In the below perl
code I am using tags within each line to extract certain information. The tags that are used are:
STB
>0.8 is STRAND BIAS otherwise GOOD
FDP
is the second number
GO
towards the end of the line is read into an array and the value returned is outputed, in the first line that value is 2/2 or hom
column6
is then dived by 33 to give the last number (in line one it is 455.945/33
or 13
The current output is perfect for most cases however, not all of them. In the line1 that did not work STB
has two values in it seperated by a ,
. That is not the case in most lines but I can not seem to get the desired output. Thank you :).
input
chr9 140053158 . TGGGGGC TGGGG,TGGGGC 455.945 PASS AF=0,1;AO=21,60;DP=126;FAO=0,124;FDP=124;FR=.;FRO=0;FSAF=0,80;FSAR=0,44;FSRF=0;FSRR=0;FWDB=0.0104498,0.231579;FXX=0.0158718;HRUN=5,5;LEN=2,1;MLLD=9.50728,12.1416;OALT=-,-;OID=.,.;OMAPALT=TGGGG,TGGGGC;OPOS=140053163,140053159;OREF=GC,G;PB=0.5,0.5;PBP=1,1;QD=14.7079;RBI=0.0306624,0.23266;REFB=-0.0212019,-0.024401;REVB=0.0288268,0.0223937;RO=25;SAF=21,41;SAR=0,19;SRF=17;SRR=8;SSEN=0.326531,0.326531;SSEP=0,0;SSSB=0.814959,0.00209983;STB=0.5,0.5;STBP=1,1;TYPE=del,del;VARB=-0.0378589,0.00692405;ANN=GRIN1 GT:GQ:DP:FDP:RO:FRO:AO:FAO:AF:SAR:SAF:SRF:SRR:FSAR:FSAF:FSRF:FSRR 2/2:81:126:124:25:0:21,60:0,124:0,1:0,19:21,41:17:8:0,44:0,80:0:0
chr1 949597 . C T 629.899 PASS AF=0.4375;AO=513;DP=1095;FAO=175;FDP=400;FR=.;FRO=225;FSAF=77;FSAR=98;FSRF=118;FSRR=107;FWDB=-0.00642053;FXX=0;HRUN=1;LEN=1;MLLD=188.973;OALT=T;OID=.;OMAPALT=T;OPOS=949597;OREF=C;PB=0.5;PBP=1;QD=6.29899;RBI=0.0194428;REFB=0.00779203;REVB=-0.0183521;RO=579;SAF=226;SAR=287;SRF=306;SRR=273;SSEN=0;SSEP=0;SSSB=-0.0851561;STB=0.547637;STBP=0.084;TYPE=snp;VARB=-0.010388;ANN=ISG15 GT:GQ:DP:FDP:RO:FRO:AO:FAO:AF:SAR:SAF:SRF:SRR:FSAR:FSAF:FSRF:FSRR 0/1:629:1095:400:579:225:513:175:0.4375:287:226:306:273:98:77:118:107
chr1 949654 . A G 765.255 PASS AF=0.4775;AO=496;DP=1115;FAO=191;FDP=400;FR=.;FRO=209;FSAF=80;FSAR=111;FSRF=101;FSRR=108;FWDB=-0.00182381;FXX=0;HRUN=1;LEN=1;MLLD=130.022;OALT=G;OID=.;OMAPALT=G;OPOS=949654;OREF=A;PB=0.5;PBP=1;QD=7.65255;RBI=0.0126329;REFB=-0.00552621;REVB=-0.0125005;RO=617;SAF=242;SAR=254;SRF=309;SRR=308;SSEN=0;SSEP=0;SSSB=-0.0129692;STB=0.534175;STBP=0.184;TYPE=snp;VARB=0.00480316;ANN=ISG15 GT:GQ:DP:FDP:RO:FRO:AO:FAO:AF:SAR:SAF:SRF:SRR:FSAR:FSAF:FSRF:FSRR 0/1:765:1115:400:617:209:496:191:0.4775:254:242:309:308:111:80:101:108
current output
chr9 140053158 . TGGGGGC TGGGG,TGGGGC 455.945 PASS AF=0,1;AO=21,60;DP=126;FAO=0,124;FDP=124;FR=.;FRO=0;FSAF=0,80;FSAR=0,44;FSRF=0;FSRR=0;FWDB=0.0104498,0.231579;FXX=0.0158718;HRUN=5,5;LEN=2,1;MLLD=9.50728,12.1416;OALT=-,-;OID=.,.;OMAPALT=TGGGG,TGGGGC;OPOS=140053163,140053159;OREF=GC,G;PB=0.5,0.5;PBP=1,1;QD=14.7079;RBI=0.0306624,0.23266;REFB=-0.0212019,-0.024401;REVB=0.0288268,0.0223937;RO=25;SAF=21,41;SAR=0,19;SRF=17;SRR=8;SSEN=0.326531,0.326531;SSEP=0,0;SSSB=0.814959,0.00209983;STB=0.5,0.5;STBP=1,1;TYPE=del,del;VARB=-0.0378589,0.00692405;ANN=GRIN1 GT:GQ:DP:FDP:RO:FRO:AO:FAO:AF:SAR:SAF:SRF:SRR:FSAR:FSAF:FSRF:FSRR 2/2:81:126:124:25:0:21,60:0,124:0,1:0,19:21,41:17:8:0,44:0,80:0:0
chr1 949597 . C T 629.899 PASS AF=0.4375;AO=513;DP=1095;FAO=175;FDP=400;FR=.;FRO=225;FSAF=77;FSAR=98;FSRF=118;FSRR=107;FWDB=-0.00642053;FXX=0;HRUN=1;LEN=1;MLLD=188.973;OALT=T;OID=.;OMAPALT=T;OPOS=949597;OREF=C;PB=0.5;PBP=1;QD=6.29899;RBI=0.0194428;REFB=0.00779203;REVB=-0.0183521;RO=579;SAF=226;SAR=287;SRF=306;SRR=273;SSEN=0;SSEP=0;SSSB=-0.0851561;STB=0.547637;STBP=0.084;TYPE=snp;VARB=-0.010388;ANN=ISG15 GT:GQ:DP:FDP:RO:FRO:AO:FAO:AF:SAR:SAF:SRF:SRR:FSAR:FSAF:FSRF:FSRR 0/1:629:1095:400:579:225:513:175:0.4375:287:226:306:273:98:77:118:107 GOOD 400 het 19
chr1 949654 . A G 765.255 PASS AF=0.4775;AO=496;DP=1115;FAO=191;FDP=400;FR=.;FRO=209;FSAF=80;FSAR=111;FSRF=101;FSRR=108;FWDB=-0.00182381;FXX=0;HRUN=1;LEN=1;MLLD=130.022;OALT=G;OID=.;OMAPALT=G;OPOS=949654;OREF=A;PB=0.5;PBP=1;QD=7.65255;RBI=0.0126329;REFB=-0.00552621;REVB=-0.0125005;RO=617;SAF=242;SAR=254;SRF=309;SRR=308;SSEN=0;SSEP=0;SSSB=-0.0129692;STB=0.534175;STBP=0.184;TYPE=snp;VARB=0.00480316;ANN=ISG15 GT:GQ:DP:FDP:RO:FRO:AO:FAO:AF:SAR:SAF:SRF:SRR:FSAR:FSAF:FSRF:FSRR 0/1:765:1115:400:617:209:496:191:0.4775:254:242:309:308:111:80:101:108 GOOD 400 het 23
desired output
chr9 140053158 . TGGGGGC TGGGG,TGGGGC 455.945 PASS AF=0,1;AO=21,60;DP=126;FAO=0,124;FDP=124;FR=.;FRO=0;FSAF=0,80;FSAR=0,44;FSRF=0;FSRR=0;FWDB=0.0104498,0.231579;FXX=0.0158718;HRUN=5,5;LEN=2,1;MLLD=9.50728,12.1416;OALT=-,-;OID=.,.;OMAPALT=TGGGG,TGGGGC;OPOS=140053163,140053159;OREF=GC,G;PB=0.5,0.5;PBP=1,1;QD=14.7079;RBI=0.0306624,0.23266;REFB=-0.0212019,-0.024401;REVB=0.0288268,0.0223937;RO=25;SAF=21,41;SAR=0,19;SRF=17;SRR=8;SSEN=0.326531,0.326531;SSEP=0,0;SSSB=0.814959,0.00209983;STB=0.5,0.5;STBP=1,1;TYPE=del,del;VARB=-0.0378589,0.00692405;ANN=GRIN1 GT:GQ:DP:FDP:RO:FRO:AO:FAO:AF:SAR:SAF:SRF:SRR:FSAR:FSAF:FSRF:FSRR 2/2:81:126:124:25:0:21,60:0,124:0,1:0,19:21,41:17:8:0,44:0,80:0:0
chr1 949597 . C T 629.899 PASS AF=0.4375;
GOOD 127 hom 13
AO=513;DP=1095;FAO=175;FDP=400;FR=.;FRO=225;FSAF=77;FSAR=98;FSRF=118;FSRR=107;FWDB=-0.00642053;FXX=0;HRUN=1;LEN=1;MLLD=188.973;OALT=T;OID=.;OMAPALT=T;OPOS=949597;OREF=C;PB=0.5;PBP=1;QD=6.29899;RBI=0.0194428;REFB=0.00779203;REVB=-0.0183521;RO=579;SAF=226;SAR=287;SRF=306;SRR=273;SSEN=0;SSEP=0;SSSB=-0.0851561;STB=0.547637;STBP=0.084;TYPE=snp;VARB=-0.010388;ANN=ISG15 GT:GQ:DP:FDP:RO:FRO:AO:FAO:AF:SAR:SAF:SRF:SRR:FSAR:FSAF:FSRF:FSRR 0/1:629:1095:400:579:225:513:175:0.4375:287:226:306:273:98:77:118:107 GOOD 400 het 19
chr1 949654 . A G 765.255 PASS AF=0.4775;AO=496;DP=1115;FAO=191;FDP=400;FR=.;FRO=209;FSAF=80;FSAR=111;FSRF=101;FSRR=108;FWDB=-0.00182381;FXX=0;HRUN=1;LEN=1;MLLD=130.022;OALT=G;OID=.;OMAPALT=G;OPOS=949654;OREF=A;PB=0.5;PBP=1;QD=7.65255;RBI=0.0126329;REFB=-0.00552621;REVB=-0.0125005;RO=617;SAF=242;SAR=254;SRF=309;SRR=308;SSEN=0;SSEP=0;SSSB=-0.0129692;STB=0.534175;STBP=0.184;TYPE=snp;VARB=0.00480316;ANN=ISG15 GT:GQ:DP:FDP:RO:FRO:AO:FAO:AF:SAR:SAF:SRF:SRR:FSAR:FSAF:FSRF:FSRR 0/1:765:1115:400:617:209:496:191:0.4775:254:242:309:308:111:80:101:108 GOOD 400 het 23
perl
perl -plae '
BEGIN{ %h = qw(0/0 hom 0/1 het 1/1 hom 1/2 het 2/2 hom) }
/^[^#].*FDP=(\d+);.*STB=(\d+\.\d+);.*([0-2]\/[0-2])/ and
$_ .= join "\t", ("", ($2 >= 0.8 ? "STRAND BIAS" : "GOOD"), $1, $h{$3}, int($F[5]/33+0.5))' input