In the below perl
I am trying to extract and print the values AF1=
, the GT
value, and F[5]
or QUAL
diveded by 33 (rounded to the nearest whole #). The GT
value is at the end after the GT:PL
so all the possibilities are read into a hash h
, then depending on the value that is in the line the coresponding het ot hom
results. The command does execute but the file
is not changed. I know many of the regex
in the command are not used but I need to keep the same format (this is a special case scenario). Thank you :).
file
##bcftools_callVersion=1.8+htslib-1.8
##bcftools_callCommand=call -c -; Date=Thu May 16 12:25:46 2019
#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT xxxx
chr1 235972435 . T C 195.009 . DP=744;VDB=0.27972;SGB=-0.693147;RPB=0.506757;MQB=0.997295;MQSB=0.616175;BQB=0.810181;MQ0F=0;AF1=0.5;AC1=1;DP4=218,151,192,179;MQ=58;FQ=188.502;PV4=0.0462442,1,0.32459,1 GT:PL 0/1:225,0,216
chr2 220022998 . G . 283.236 . DP=456;SGB=-0.379885;RPB=1;MQB=1;MQSB=0.840495;BQB=1;MQ0F=0;AF1=0;AC1=0;DP4=245,209,1,0;MQ=59;FQ=-281.989;PV4=1,0.0010322,1,4.68871e-09 GT:PL 0/0:0
chr3 128205860 . G C 221.999 . DP=353;VDB=0.00961268;SGB=-0.693147;RPB=1;MQB=1;MQSB=0.685507;BQB=1;MQ0F=0;AF1=1;AC1=2;DP4=0,1,147,176;MQ=57;FQ=-281.989;PV4=1,1,1,0.30647 GT:PL 1/1:255,255,0
chr5 35871190 . G A 170.009 . DP=173;VDB=0.000529499;SGB=-0.693147;RPB=0.605358;MQB=0.998833;MQSB=0.99703;BQB=0.113906;MQ0F=0;AF1=0.5;AC1=1;DP4=41,52,35,39;MQ=55;FQ=172.502;PV4=0.754883,1,1,1 GT:PL 0/1:200,0,209
chr5 77412011 . A G 161.009 . DP=293;VDB=0.0010866;SGB=-0.693147;RPB=0.990047;MQB=0.923303;MQSB=0.967232;BQB=0.000232632;MQ0F=0;AF1=0.5;AC1=1;DP4=83,74,71,50;MQ=57;FQ=164.015;PV4=0.394406,2.23136e-05,1,1 GT:PL 0/1:191,0,225
chr7 66459269 . C . 136.006 . DP=316;MQSB=0.314206;MQ0F=0.00316456;AF1=0;AC1=0;DP4=122,176,0,0;MQ=12;FQ=-281.989 GT:PL 0/0:0
perl
perl -plae '
BEGIN{ %h = qw(0/0 hom 0/1 het 1/1 hom 1/2 het 2/2 hom) } /^[^#].*AF1=(\d+\.?\d*);.*FDP=(\d+);.*FSAF=(\d+);.*FSAR=(\d+);.*HRUN=(\d+);.*STB=(\d+\.?\d*)[,;].*([0-2]\/[0-2])/ and $_ .= join "\t", ("", sprintf("%1.3f",$9), $h{$7}, int($F[5]/33+0.5))' file
desired tab-delimited
##bcftools_callVersion=1.8+htslib-1.8
##bcftools_callCommand=call -c -; Date=Thu May 16 12:25:46 2019
#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT xxxx
chr1 235972435 . T C 195.009 . DP=744;VDB=0.27972;SGB=-0.693147;RPB=0.506757;MQB=0.997295;MQSB=0.616175;BQB=0.810181;MQ0F=0;AF1=0.5;AC1=1;DP4=218,151,192,179;MQ=58;FQ=188.502;PV4=0.0462442,1,0.32459,1 GT:PL 0/1:225,0,216 0.50 het 6
chr2 220022998 . G . 283.236 . DP=456;SGB=-0.379885;RPB=1;MQB=1;MQSB=0.840495;BQB=1;MQ0F=0;AF1=0;AC1=0;DP4=245,209,1,0;MQ=59;FQ=-281.989;PV4=1,0.0010322,1,4.68871e-09 GT:PL 0/0:0 0.00 hom 9
chr3 128205860 . G C 221.999 . DP=353;VDB=0.00961268;SGB=-0.693147;RPB=1;MQB=1;MQSB=0.685507;BQB=1;MQ0F=0;AF1=1;AC1=2;DP4=0,1,147,176;MQ=57;FQ=-281.989;PV4=1,1,1,0.30647 GT:PL 1/1:255,255,0 1.00 hom 7
chr5 35871190 . G A 170.009 . DP=173;VDB=0.000529499;SGB=-0.693147;RPB=0.605358;MQB=0.998833;MQSB=0.99703;BQB=0.113906;MQ0F=0;AF1=0.5;AC1=1;DP4=41,52,35,39;MQ=55;FQ=172.502;PV4=0.754883,1,1,1 GT:PL 0/1:200,0,209 0.50 het 5
chr5 77412011 . A G 161.009 . DP=293;VDB=0.0010866;SGB=-0.693147;RPB=0.990047;MQB=0.923303;MQSB=0.967232;BQB=0.000232632;MQ0F=0;AF1=0.5;AC1=1;DP4=83,74,71,50;MQ=57;FQ=164.015;PV4=0.394406,2.23136e-05,1,1 GT:PL 0/1:191,0,225
chr7 66459269 . C . 136.006 . DP=316;MQSB=0.314206;MQ0F=0.00316456;AF1=0;AC1=0;DP4=122,176,0,0;MQ=12;FQ=-281.989 GT:PL 0/0:0 0.50 hom 4