I am trying to add a condition to the below perl
that will capture the GT
tag and place a specific string in the last field of each line. The problem is that the GT
value used is not right after the tag rather it is a few fields away. The values should always be 0/1
or 1/2
and are in bold in the input
. 0/1=het
and 1/2=hom
. Thank you :).
input
##
##
##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">
##FORMAT=<ID=GQ,Number=1,Type=Integer,Description="Genotype Quality, the Phred-scaled marginal (or unconditional) probability of the called genotype">
##FORMAT=<ID=DP,Number=1,Type=Integer,Description="Read Depth">
##
##
#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT SAMPLE
chr1 9324670 . A G 672.016 PASS AF=0.528369;AO=148;DP=281;FAO=149;FDP=282;FR=.;FRO=133;FSAF=59;FSAR=90;FSRF=60;FSRR=73;FWDB=0.00343606;FXX=0;HRUN=1;LEN=1;MLLD=155.207;OALT=G;OID=.;OMAPALT=G;OPOS=9324670;OREF=A;PB=0.5;PBP=1;QD=9.53214;RBI=0.00594431;REFB=-0.0181827;REVB=0.00485061;RO=130;SAF=59;SAR=89;SRF=57;SRR=73;SSEN=0;SSEP=0;SSSB=-0.0352973;STB=0.526882;STBP=0.323;TYPE=snp;VARB=0.0184938;ANN=H6PD GT:GQ:DP:FDP:RO:FRO:AO:FAO:AF:SAR:SAF:SRF:SRR:FSAR:FSAF:FSRF:FSRR 0/1:527:281:282:130:133:148:149:0.528369:89:59:57:73:90:59:60:73 GOOD 282 reads
chr1 10318652 . C G 360.217 PASS AF=0.566929;AO=72;DP=129;FAO=72;FDP=127;FR=.;FRO=55;FSAF=36;FSAR=36;FSRF=31;FSRR=24;FWDB=0.00760676;FXX=0.0155027;HRUN=2;LEN=1;MLLD=115.62;OALT=G;OID=.;OMAPALT=G;OPOS=10318652;OREF=C;PB=0.5;PBP=1;QD=11.3454;RBI=0.0125905;REFB=-0.0312889;REVB=-0.0100329;RO=55;SAF=36;SAR=36;SRF=31;SRR=24;SSEN=0;SSEP=0;SSSB=-0.0505108;STB=0.527551;STBP=0.492;TYPE=snp;VARB=0.0181889;ANN=KIF1B GT:GQ:DP:FDP:RO:FRO:AO:FAO:AF:SAR:SAF:SRF:SRR:FSAR:FSAF:FSRF:FSRR 1/1:203:129:127:55:55:72:72:0.566929:36:36:31:24:36:36:31:24 GOOD 127 reads
chr1 10355834 . C T 504.995 PASS AF=0.456747;AO=132;DP=290;FAO=132;FDP=289;FR=.;FRO=157;FSAF=56;FSAR=76;FSRF=85;FSRR=72;FWDB=0.0634576;FXX=0.00344816;HRUN=2;LEN=1;MLLD=58.7971;OALT=T;OID=.;OMAPALT=T;OPOS=10355834;OREF=C;PB=0.5;PBP=1;QD=6.98956;RBI=0.0644815;REFB=0.00590624;REVB=-0.0114455;RO=156;SAF=56;SAR=76;SRF=85;SRR=71;SSEN=0;SSEP=0;SSSB=-0.118565;STB=0.563872;STBP=0.047;TYPE=snp;VARB=-0.00678859;ANN=KIF1B GT:GQ:DP:FDP:RO:FRO:AO:FAO:AF:SAR:SAF:SRF:SRR:FSAR:FSAF:FSRF:FSRR 0/1:504:290:289:156:157:132:132:0.456747:76:56:85:71:76:56:85:72 GOOD 289 reads
chr1 11090916 . C A 1855.57 PASS AF=1;AO=193;DP=193;FAO=194;FDP=194;FR=.;FRO=0;FSAF=96;FSAR=98;FSRF=0;FSRR=0;FWDB=0.0243175;FXX=0;HRUN=1;LEN=1;MLLD=207.14;OALT=A;OID=.;OMAPALT=A;OPOS=11090916;OREF=C;PB=0.5;PBP=1;QD=38.2592;RBI=0.048476;REFB=0;REVB=0.0419355;RO=0;SAF=95;SAR=98;SRF=0;SRR=0;SSEN=0;SSEP=0;SSSB=4.54468e-08;STB=0.5;STBP=1;TYPE=snp;VARB=3.87852e-05;ANN=MASP2 GT:GQ:DP:FDP:RO:FRO:AO:FAO:AF:SAR:SAF:SRF:SRR:FSAR:FSAF:FSRF:FSRR 1/1:88:193:194:0:0:193:194:1:98:95:0:0:98:96:0:0 GOOD 194 reads
chr1 213068404 . T C 70.4374 PASS AF=0.435897;AO=17;DP=39;FAO=17;FDP=39;FR=.;FRO=22;FSAF=16;FSAR=1;FSRF=11;FSRR=11;FWDB=-0.000611735;FXX=0;HRUN=1;LEN=1;MLLD=243.519;OALT=C;OID=.;OMAPALT=C;OPOS=213068404;OREF=T;PB=0.5;PBP=1;QD=7.22435;RBI=0.023516;REFB=0.00205571;REVB=0.023508;RO=22;SAF=16;SAR=1;SRF=11;SRR=11;SSEN=0;SSEP=0;SSSB=0.630793;STB=0.87614;STBP=0.001;TYPE=snp;VARB=-0.00167889;ANN=FLVCR1 GT:GQ:DP:FDP:RO:FRO:AO:FAO:AF:SAR:SAF:SRF:SRR:FSAR:FSAF:FSRF:FSRR 0/1:70:39:39:22:22:17:17:0.435897:1:16:11:11:1:16:11:11 STRAND BIAS 39 reads
desired output
##
##
##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">
##FORMAT=<ID=GQ,Number=1,Type=Integer,Description="Genotype Quality, the Phred-scaled marginal (or unconditional) probability of the called genotype">
##FORMAT=<ID=DP,Number=1,Type=Integer,Description="Read Depth">
##
##
#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT SAMPLE
chr1 9324670 . A G 672.016 PASS AF=0.528369;AO=148;DP=281;FAO=149;FDP=282;FR=.;FRO=133;FSAF=59;FSAR=90;FSRF=60;FSRR=73;FWDB=0.00343606;FXX=0;HRUN=1;LEN=1;MLLD=155.207;OALT=G;OID=.;OMAPALT=G;OPOS=9324670;OREF=A;PB=0.5;PBP=1;QD=9.53214;RBI=0.00594431;REFB=-0.0181827;REVB=0.00485061;RO=130;SAF=59;SAR=89;SRF=57;SRR=73;SSEN=0;SSEP=0;SSSB=-0.0352973;STB=0.526882;STBP=0.323;TYPE=snp;VARB=0.0184938;ANN=H6PD GT:GQ:DP:FDP:RO:FRO:AO:FAO:AF:SAR:SAF:SRF:SRR:FSAR:FSAF:FSRF:FSRR 0/1:527:281:282:130:133:148:149:0.528369:89:59:57:73:90:59:60:73 GOOD 282 reads het
chr1 10318652 . C G 360.217 PASS AF=0.566929;AO=72;DP=129;FAO=72;FDP=127;FR=.;FRO=55;FSAF=36;FSAR=36;FSRF=31;FSRR=24;FWDB=0.00760676;FXX=0.0155027;HRUN=2;LEN=1;MLLD=115.62;OALT=G;OID=.;OMAPALT=G;OPOS=10318652;OREF=C;PB=0.5;PBP=1;QD=11.3454;RBI=0.0125905;REFB=-0.0312889;REVB=-0.0100329;RO=55;SAF=36;SAR=36;SRF=31;SRR=24;SSEN=0;SSEP=0;SSSB=-0.0505108;STB=0.527551;STBP=0.492;TYPE=snp;VARB=0.0181889;ANN=KIF1B GT:GQ:DP:FDP:RO:FRO:AO:FAO:AF:SAR:SAF:SRF:SRR:FSAR:FSAF:FSRF:FSRR 1/1:203:129:127:55:55:72:72:0.566929:36:36:31:24:36:36:31:24 GOOD 127 reads hom
chr1 10355834 . C T 504.995 PASS AF=0.456747;AO=132;DP=290;FAO=132;FDP=289;FR=.;FRO=157;FSAF=56;FSAR=76;FSRF=85;FSRR=72;FWDB=0.0634576;FXX=0.00344816;HRUN=2;LEN=1;MLLD=58.7971;OALT=T;OID=.;OMAPALT=T;OPOS=10355834;OREF=C;PB=0.5;PBP=1;QD=6.98956;RBI=0.0644815;REFB=0.00590624;REVB=-0.0114455;RO=156;SAF=56;SAR=76;SRF=85;SRR=71;SSEN=0;SSEP=0;SSSB=-0.118565;STB=0.563872;STBP=0.047;TYPE=snp;VARB=-0.00678859;ANN=KIF1B GT:GQ:DP:FDP:RO:FRO:AO:FAO:AF:SAR:SAF:SRF:SRR:FSAR:FSAF:FSRF:FSRR 0/1:504:290:289:156:157:132:132:0.456747:76:56:85:71:76:56:85:72 GOOD 289 reads het
chr1 11090916 . C A 1855.57 PASS AF=1;AO=193;DP=193;FAO=194;FDP=194;FR=.;FRO=0;FSAF=96;FSAR=98;FSRF=0;FSRR=0;FWDB=0.0243175;FXX=0;HRUN=1;LEN=1;MLLD=207.14;OALT=A;OID=.;OMAPALT=A;OPOS=11090916;OREF=C;PB=0.5;PBP=1;QD=38.2592;RBI=0.048476;REFB=0;REVB=0.0419355;RO=0;SAF=95;SAR=98;SRF=0;SRR=0;SSEN=0;SSEP=0;SSSB=4.54468e-08;STB=0.5;STBP=1;TYPE=snp;VARB=3.87852e-05;ANN=MASP2 GT:GQ:DP:FDP:RO:FRO:AO:FAO:AF:SAR:SAF:SRF:SRR:FSAR:FSAF:FSRF:FSRR 1/1:88:193:194:0:0:193:194:1:98:95:0:0:98:96:0:0 GOOD 194 reads hom
chr1 213068404 . T C 70.4374 PASS AF=0.435897;AO=17;DP=39;FAO=17;FDP=39;FR=.;FRO=22;FSAF=16;FSAR=1;FSRF=11;FSRR=11;FWDB=-0.000611735;FXX=0;HRUN=1;LEN=1;MLLD=243.519;OALT=C;OID=.;OMAPALT=C;OPOS=213068404;OREF=T;PB=0.5;PBP=1;QD=7.22435;RBI=0.023516;REFB=0.00205571;REVB=0.023508;RO=22;SAF=16;SAR=1;SRF=11;SRR=11;SSEN=0;SSEP=0;SSSB=0.630793;STB=0.87614;STBP=0.001;TYPE=snp;VARB=-0.00167889;ANN=FLVCR1 GT:GQ:DP:FDP:RO:FRO:AO:FAO:AF:SAR:SAF:SRF:SRR:FSAR:FSAF:FSRF:FSRR 0/1:70:39:39:22:22:17:17:0.435897:1:16:11:11:1:16:11:11 STRAND BIAS 39 reads het
perl
perl -ple '/^[^#].*FDP=(\d+);.*STB=(\d+\.\d+);.*GT=(\d+); ($9:*)= 0/1?="het",= 1/1?="hom"/ and $_.=($2 >= 0.8?" STRAND BIAS ":" GOOD ").$1." reads"' input > results.txt