Hello to all in forum,
May you experts could help me with this complex comparison please.
I need to search the numbers in file1 within column3 of file2 and if found, compare column4 to NF
of file2 with the lines in file3. If the column4 to NF in file2 match any of the lines
in file3, then print column2 to NF of file2 and add "C" at the end. If the column4 to NF don't match
any of the lines in file3, print column2 to NF of file2, adding "D" at the end.
file1
547680
575210
804270
123989
623989
221209
file2
1|4501892|547680|1|2|30|73491|12|34|1
2|4788930|575210|1|2|30|73472|12|34|1
3|6793773|804270|1|2|30|73420
4|6673724|123989|1|2|30|73001|12|34|1
5|8099821|333722|1|30|73473|10|34|1
6|7889200|623989|1|2|30|73001|12|45|1
7|8882662|221209|1|2|30|83002|12|34|1
file3
1|2|30|73472|12|34|1
1|2|30|73001|12|34|1
1|2|30|83002|12|34|1
Desired output:
547680|1|2|30|73491|12|34|1,D
575210|1|2|30|73472|12|34|1,C
804270|1|2|30|73420,D
123989|1|2|30|73001|12|34|1,C
623989|1|2|30|73001|12|45|1,D
221209|1|2|30|83002|12|34|1,C
Thanks in advance for any help.
pamu
September 5, 2013, 2:16am
2
Try
awk -F \| 'f==1{A[$1]++}
f==2 {$1=$2="";B[$7]=$0;}
f==3{if(B[$4]){sub("\\|\\|","",B[$4]);print B[$4]",C";delete B[$4]}}END{for (i in B){if(B){sub("\\|\\|","",B);print B",D"}}}
' OFS=\| f=1 file1 f=2 file2 f=3 file3
575210|1|2|30|73472|12|34|1,C
623989|1|2|30|73001|12|45|1,C
221209|1|2|30|83002|12|34|1,C
804270|1|2|30|73420,D
333722|1|30|73473|10|34|1,D
547680|1|2|30|73491|12|34|1,D
Another version:
awk '!f{A[$0]; next} {h=$3; $1=$2=$3=x; sub(/\|\|\|/,x)} h in A{print h, $0 "," ($0 in A?"C":"D")}' FS=\| OFS=\| file1 file3 f=1 file2
MR.bean
September 5, 2013, 3:02am
4
Another one
bash-3.2$ cat f1
547680
575210
804270
123989
623989
221209
bash-3.2$ cat f2
1|4501892|547680|1|2|30|73491|12|34|1
2|4788930|575210|1|2|30|73472|12|34|1
3|6793773|804270|1|2|30|73420
4|6673724|123989|1|2|30|73001|12|34|1
5|8099821|333722|1|30|73473|10|34|1
6|7889200|623989|1|2|30|73001|12|45|1
7|8882662|221209|1|2|30|83002|12|34|1
bash-3.2$
bash-3.2$ cat f3
1|2|30|73472|12|34|1
1|2|30|73001|12|34|1
1|2|30|83002|12|34|1
bash-3.2$
bash-3.2$ awk 'NR==FNR { x[$0]++; next; } { m=0; for(i in x) { regexp=gensub(/\|/, "\\\\\\|", "g", i); if(match($0, regexp)) { m++; }} printf "%s", $0; print m ? ",C" : ",D" } ' f3 <(join -t'|' -2 3 f1 f2 | cut -d'|' -f 1,4-)
547680|1|2|30|73491|12|34|1,D
575210|1|2|30|73472|12|34|1,C
804270|1|2|30|73420,D
123989|1|2|30|73001|12|34|1,C
623989|1|2|30|73001|12|45|1,D
221209|1|2|30|83002|12|34|1,C
Hello Pamu, Scrutinizer, Mr.Bean
Many thanks! all 3 solution works correctly
awk -F"[|]" '{
if(FILENAME=="file1")
key[$1]=1
else if(FILENAME=="file3"){
tail[$0]=1
}
else{
if(key[$3]==1){
str=$4
for(i=5;i<=NF;i++){
str=sprintf("%s|%s",str,$i)
}
if(tail[str]==1)
print $3"|"str",C"
else
print $3"|"str",D"
}
}
}' file1 file3 file2