Hello guys,
I need a script to get the common lines from two files with a criteria that if the first two columns match then I keep the maximum value of the 5th column.(tab separated columns) . 3rd and 4th columns corresponds to the row which has highest value for the 5th column.
Sample input:
file1:
111 222 ABC PQR 0.1
333 444 xxx yyy 0.5
555 666 PQR DEF 0.4
file 2:
111 222 abc xyz 0.7
555 666 def pqr 0.3
777 888 rst mno 0.4
sample output:
111 222 abc xyz 0.7
555 666 PQR DEF 0.4
This is being done for all the files in the same format in a directory. I have the script, but it does not consider the conditions for 3rd and 4th columns.
awk 'NR==FNR{a[$1" "$2]=$3;next;}($1" "$2 in a){if(a[$1" "$2] > $3) print $1, $2,a[$1" "$2]; else print;}'
Please help. Thanks in advance.
awk 'NR==FNR{a[$1" "$2]=$3" "$4;b[$1" "$2]=$5;next}$1" "$2 in a{c[$1" "$2];if(b[$1" "$2]<$5){b[$1" "$2]=$5;a[$1" "$2]=$3" "$4}}
END{for (i in c)print i,a,b}' file1 file2
# cat file1
111 222 ABC PQR 0.1
333 444 xxx yyy 0.5
555 666 PQR DEF 0.4
# cat file2
111 222 abc xyz 0.7
555 666 def pqr 0.3
777 888 rst mno 0.4
# ./justdoit
111 222 abc xyz 0.7
555 666 PQR DEF 0.4
## justdoit ##
#!/bin/bash
cnt=$(sed -n '$=' file1) ; cntx=$(sed -n '$=' file2)
if [[ $cnt -gt $cntx ]] ; then
count=$cntx
else
count=$cnt
fi
rm -f justtmp
x=1
while [ $(( count -= 1 )) -gt -1 ]
do
a=$(sed -n "$x s/^\([^\t]*\)\t\([^\t]*\).*/\1 \2/p" file1) # first and second tab chars
for i in $(seq 1 3)
do
a1=$(sed -n "$i s/^\([^\t]*\)\t\([^\t]*\).*/\1 \2/p" file2)
if [[ $a == $a1 ]] ; then
line1=$(echo " `sed -n "$x s/.*\t\(.*\)$/\1/p" file1` * 1000" | bc | sed 's/\..*//')
line2=$(echo " `sed -n "$i s/.*\t\(.*\)$/\1/p" file2` * 1000" | bc | sed 's/\..*//')
if [[ $line1 -lt $line2 ]] ; then
sed -n "$x s/^\([^\t]*\)\t\([^\t]*\).*/\1\t\2/p" file1 > tmpx
sed -n "$i s/^[^\t]*\t[^\t]*\t//p" file2 > tmpX
paste -d"\t" tmpx tmpX >> justtmp
else
sed -n "$i s/^\([^\t]*\)\t\([^\t]*\).*/\1\t\2/p" file2 > tmpx
sed -n "$x s/^[^\t]*\t[^\t]*\t//p" file1 > tmpX
paste -d"\t" tmpx tmpX >> justtmp
fi
fi
done
let x=$x+1
done
more justtmp