In the below I am trying to use awk
to match all the $13
values in input
, which is tab-delimited
,
that are in $1
of gene
which is just a single column of text.
However only the line with the greatest $9
value in input
needs to be printed.
So in the example below all the MECP2 and LTBP1
lines in input
are found, but since line 2 MECP2
and line 5 LTBP1
have the greatest $9
value they are printed.
The awk
does run but the output is not desired and I'm not sure if this is the best way. Thank you :).
gene
MECP2
LTPB1
GJB2
input
219 NM_001110792 chrX - 153295685 153363188 153295817 153363122 3 153295685,153297657,153363060, 153296901,153298008,153363188, 0 MECP2 cmpl cmpl 2,2,0,
219 NM_004992 chrX - 153287263 153363188 153295817 153357667 4 153287263,153297657,153357641,153363060, 153296901,153298008,153357765,153363188, 0 MECP2 cmpl cmpl 2,2,0,-1,
1 NM_000627 chr2 + 33359663 33624575 33359804 33623612 30 33359663,33411922,33413643,33442618,33447146,33468728,33477743,33482350,33484654,33487788,33488360,33498722,33500031,33500867,33505105,33518226,33525517,33526588,33534500,33540210,33567904,33572433,33585663,33586495,33588456,33589282,33590399,33614250,33622199,33623430, 33360027,33412147,33413918,33442721,33447218,33468851,33477911,33482578,33484677,33487888,33488459,33498848,33500157,33500990,33505225,33518349,33525640,33526711,33534623,33540336,33568030,33572577,33585846,33586582,33588585,33589423,33590570,33614373,33622349,33624575, 0 LTBP1 cmpl cmpl 0,1,1,0,1,1,1,1,1,0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
1 NM_001166264 chr2 + 33359663 33624575 33359804 33623612 29 33359663,33411922,33413643,33442618,33447146,33468728,33477743,33482350,33484654,33487788,33488360,33498722,33500031,33500867,33505105,33518226,33525517,33526588,33534500,33540210,33572433,33585663,33586495,33588456,33589282,33590399,33614250,33622199,33623430, 33360027,33412147,33413918,33442721,33447218,33468851,33477911,33482578,33484677,33487888,33488459,33498848,33500157,33500990,33505225,33518349,33525640,33526711,33534623,33540336,33572577,33585846,33586582,33588585,33589423,33590570,33614373,33622349,33624575, 0 LTBP1 cmpl cmpl 0,1,1,0,1,1,1,1,1,0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
1 NM_206943 chr2 + 33172368 33624575 33172391 33623612 34 33172368,33173941,33245975,33335648,33359859,33411922,33413643,33442618,33447146,33468728,33477743,33482350,33484654,33487788,33488360,33498722,33500031,33500867,33505105,33518226,33525517,33526588,33534500,33540210,33567904,33572433,33585663,33586495,33588456,33589282,33590399,33614250,33622199,33623430, 33172885,33174012,33246273,33335818,33360027,33412147,33413918,33442721,33447218,33468851,33477911,33482578,33484677,33487888,33488459,33498848,33500157,33500990,33505225,33518349,33525640,33526711,33534623,33540336,33568030,33572577,33585846,33586582,33588585,33589423,33590570,33614373,33622349,33624575, 0 LTBP1 cmpl cmpl 0,2,1,2,1,1,1,0,1,1,1,1,1,0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
1 NM_001166265 chr2 + 33359663 33624575 33359804 33623612 30 33359663,33411922,33413643,33442618,33447146,33468728,33477743,33482509,33484654,33487788,33488360,33498722,33500031,33500867,33505105,33518226,33525517,33526588,33534500,33540210,33567904,33572433,33585663,33586495,33588456,33589282,33590399,33614250,33622199,33623430, 33360027,33412147,33413918,33442721,33447218,33468851,33477911,33482578,33484677,33487888,33488459,33498848,33500157,33500990,33505225,33518349,33525640,33526711,33534623,33540336,33568030,33572577,33585846,33586582,33588585,33589423,33590570,33614373,33622349,33624575, 0 LTBP1 cmpl cmpl 0,1,1,0,1,1,1,1,1,0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
1 NM_001166265 chr2 + 33359663 33624575 33359804 33623612 30 33359663,33411922,33413643,33442618,33447146,33468728,33477743,33482509,33484654,33487788,33488360,33498722,33500031,33500867,33505105,33518226,33525517,33526588,33534500,33540210,33567904,33572433,33585663,33586495,33588456,33589282,33590399,33614250,33622199,33623430, 33360027,33412147,33413918,33442721,33447218,33468851,33477911,33482578,33484677,33487888,33488459,33498848,33500157,33500990,33505225,33518349,33525640,33526711,33534623,33540336,33568030,33572577,33585846,33586582,33588585,33589423,33590570,33614373,33622349,33624575, 0 LTBP1 cmpl cmpl 0,1,1,0,1,1,1,1,1,0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
desired output line 2 and line 5
219 NM_004992 chrX - 153287263 153363188 153295817 153357667 4 153287263,153297657,153357641,153363060, 153296901,153298008,153357765,153363188, 0 MECP2 cmpl cmpl 2,2,0,-1,
1 NM_206943 chr2 + 33172368 33624575 33172391 33623612 34 33172368,33173941,33245975,33335648,33359859,33411922,33413643,33442618,33447146,33468728,33477743,33482350,33484654,33487788,33488360,33498722,33500031,33500867,33505105,33518226,33525517,33526588,33534500,33540210,33567904,33572433,33585663,33586495,33588456,33589282,33590399,33614250,33622199,33623430, 33172885,33174012,33246273,33335818,33360027,33412147,33413918,33442721,33447218,33468851,33477911,33482578,33484677,33487888,33488459,33498848,33500157,33500990,33505225,33518349,33525640,33526711,33534623,33540336,33568030,33572577,33585846,33586582,33588585,33589423,33590570,33614373,33622349,33624575, 0 LTBP1 cmpl cmpl 0,2,1,2,1,1,1,0,1,1,1,1,1,0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
awk
awk -F'\t' 'NR==FNR{A[$1];next}$13 in A && $9>a {a=$9; b=$0} END {print b}' gene input > out
current output skipping MECP2 line
1 NM_206943 chr2 + 33172368 33624575 33172391 33623612 34 33172368,33173941,33245975,33335648,33359859,33411922,33413643,33442618,33447146,33468728,33477743,33482350,33484654,33487788,33488360,33498722,33500031,33500867,33505105,33518226,33525517,33526588,33534500,33540210,33567904,33572433,33585663,33586495,33588456,33589282,33590399,33614250,33622199,33623430, 33172885,33174012,33246273,33335818,33360027,33412147,33413918,33442721,33447218,33468851,33477911,33482578,33484677,33487888,33488459,33498848,33500157,33500990,33505225,33518349,33525640,33526711,33534623,33540336,33568030,33572577,33585846,33586582,33588585,33589423,33590570,33614373,33622349,33624575, 0 LTBP1 cmpl cmpl 0,2,1,2,1,1,1,0,1,1,1,1,1,0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
---------- Post updated at 01:40 PM ---------- Previous update was at 08:34 AM ----------
This seems to work:
awk '{if ($9>max[$13]) # combine all matching $13 and grad the max value of $9
{max[$13]=$9; val[$13]=$0}# read them into variable
}
END {for (i in val) print val}' input > out # print variable with line
Thank you :).