awk to add lines with symbol to output file

In the awk below which does execute I get output that is close, except for all the lines that start with a # are removed. Some lines have one others two or three and after the script adds the
ID= to the fields below the pattern in the awk , I can not seem to add the # lines back to the output. Thank you :).

awk

awk -v OFS="\t" '/^#CHROM/{00-0000-xx r=NR }r && NR>r{ $11="ID1="$11; $12="ID2="$12; $13="ID3="$13; $14="ID4="$14; print }' file

file

##INFO=<ID=ANN,Number=1,Type=Integer,Description="My custom annotation">
##source_20170530.1=vcf-annotate(r953) -a /home/cmccabe/Desktop/NGS/bed/vcf/annotations.bed.gz -d key=INFO,ID=ANN,Number=1,Type=Integer,Description=My custom annotation -c CHROM,FROM,TO,INFO/ANN
##INFO=<ID=,Number=A,Type=Float,Description="Variant quality">
###INFO=<ID=,Number=A,Type=Float,Description="Variant quality">
###INFO=<ID=ID1,Type=Integer,Description="Variant quality">
###INFO=<ID=ID2,Type=String,Description="Reads">
###INFO=<ID=ID3,Type=String,Description="Zygosity">
###INFO=<ID=ID4,Type=Integer,Description="Score">
#CHROM    POS    ID    REF    ALT    QUAL    FILTER    INFO    FORMAT    00-0000-xx
chr1    948846    .    T    TA    529.927    PASS    AF=0.970874;AO=97;DP=106;FAO=100;FDP=103;FR=.;FRO=3;FSAF=52;FSAR=48;FSRF=3;FSRR=0;FWDB=-0.0127942;FXX=0.00961446;HRUN=1;LEN=1;MLLD=26.521;OALT=A;OID=.;OMAPALT=TA;OPOS=948847;OREF=-;PB=.;PBP=.;QD=20.5797;RBI=0.0732214;REFB=0.0962764;REVB=0.0720949;RO=7;SAF=49;SAR=48;SRF=6;SRR=1;SSEN=0;SSEP=0;SSSB=-0.0448565;STB=0.514016;STBP=0.111;TYPE=ins;VARB=-0.0047395    GT:GQ:DP:FDP:RO:FRO:AO:FAO:AF:SAR:SAF:SRF:SRR:FSAR:FSAF:FSRF:FSRR:QT    1/1:90:106:103:7:3:97:100:0.970874:48:49:6:1:48:52:3:0:1    GOOD    103    hom    16
chr1    948870    .    C    G    279.296    PASS    AF=0.482014;AO=67;DP=139;FAO=67;FDP=139;FR=.,REALIGNEDx0.4964;FRO=72;FSAF=34;FSAR=33;FSRF=34;FSRR=38;FWDB=-0.000997446;FXX=0;HRUN=2;LEN=1;MLLD=60.2134;OALT=G;OID=.;OMAPALT=G;OPOS=948870;OREF=C;PB=.;PBP=.;QD=8.0373;RBI=0.00460624;REFB=-0.0184382;REVB=0.00449694;RO=72;SAF=34;SAR=33;SRF=34;SRR=38;SSEN=0;SSEP=0;SSSB=0.0329868;STB=0.518243;STBP=0.7;TYPE=snp;VARB=0.0213678    GT:GQ:DP:FDP:RO:FRO:AO:FAO:AF:SAR:SAF:SRF:SRR:FSAR:FSAF:FSRF:FSRR:QT    0/1:279:139:139:72:72:67:67:0.482014:33:34:34:38:33:34:34:38:1    GOOD    139    het    8

current output

chr1     948846    .    T    TA    529.927    PASS     AF=0.970874;AO=97;DP=106;FAO=100;FDP=103;FR=.;FRO=3;FSAF=52;FSAR=48;FSRF=3;FSRR=0;FWDB=-0.0127942;FXX=0.00961446;HRUN=1;LEN=1;MLLD=26.521;OALT=A;OID=.;OMAPALT=TA;OPOS=948847;OREF=-;PB=.;PBP=.;QD=20.5797;RBI=0.0732214;REFB=0.0962764;REVB=0.0720949;RO=7;SAF=49;SAR=48;SRF=6;SRR=1;SSEN=0;SSEP=0;SSSB=-0.0448565;STB=0.514016;STBP=0.111;TYPE=ins;VARB=-0.0047395     GT:GQ:DP:FDP:RO:FRO:AO:FAO:AF:SAR:SAF:SRF:SRR:FSAR:FSAF:FSRF:FSRR:QT     1/1:90:106:103:7:3:97:100:0.970874:48:49:6:1:48:52:3:0:1    ID1=GOOD    ID2=103    ID3=hom    ID4=16
chr1     948870    .    C    G    279.296    PASS     AF=0.482014;AO=67;DP=139;FAO=67;FDP=139;FR=.,REALIGNEDx0.4964;FRO=72;FSAF=34;FSAR=33;FSRF=34;FSRR=38;FWDB=-0.000997446;FXX=0;HRUN=2;LEN=1;MLLD=60.2134;OALT=G;OID=.;OMAPALT=G;OPOS=948870;OREF=C;PB=.;PBP=.;QD=8.0373;RBI=0.00460624;REFB=-0.0184382;REVB=0.00449694;RO=72;SAF=34;SAR=33;SRF=34;SRR=38;SSEN=0;SSEP=0;SSSB=0.0329868;STB=0.518243;STBP=0.7;TYPE=snp;VARB=0.0213678     GT:GQ:DP:FDP:RO:FRO:AO:FAO:AF:SAR:SAF:SRF:SRR:FSAR:FSAF:FSRF:FSRR:QT     0/1:279:139:139:72:72:67:67:0.482014:33:34:34:38:33:34:34:38:1    ID1=GOOD    ID2=139    ID3=het    ID4=8

desired output

##INFO=<ID=ANN,Number=1,Type=Integer,Description="My custom annotation">
##source_20170530.1=vcf-annotate(r953) -a /home/cmccabe/Desktop/NGS/bed/vcf/annotations.bed.gz -d key=INFO,ID=ANN,Number=1,Type=Integer,Description=My custom annotation -c CHROM,FROM,TO,INFO/ANN
##INFO=<ID=,Number=A,Type=Float,Description="Variant quality">
###INFO=<ID=,Number=A,Type=Float,Description="Variant quality">
###INFO=<ID=ID1,Type=Integer,Description="Variant quality">
###INFO=<ID=ID2,Type=String,Description="Reads">
###INFO=<ID=ID3,Type=String,Description="Zygosity">
###INFO=<ID=ID4,Type=Integer,Description="Score">
#CHROM    POS    ID    REF    ALT    QUAL    FILTER    INFO    FORMAT    00-0000-xx
chr1    948846    .    T    TA    529.927    PASS    AF=0.970874;AO=97;DP=106;FAO=100;FDP=103;FR=.;FRO=3;FSAF=52;FSAR=48;FSRF=3;FSRR=0;FWDB=-0.0127942;FXX=0.00961446;HRUN=1;LEN=1;MLLD=26.521;OALT=A;OID=.;OMAPALT=TA;OPOS=948847;OREF=-;PB=.;PBP=.;QD=20.5797;RBI=0.0732214;REFB=0.0962764;REVB=0.0720949;RO=7;SAF=49;SAR=48;SRF=6;SRR=1;SSEN=0;SSEP=0;SSSB=-0.0448565;STB=0.514016;STBP=0.111;TYPE=ins;VARB=-0.0047395    GT:GQ:DP:FDP:RO:FRO:AO:FAO:AF:SAR:SAF:SRF:SRR:FSAR:FSAF:FSRF:FSRR:QT    1/1:90:106:103:7:3:97:100:0.970874:48:49:6:1:48:52:3:0:1    ID1=GOOD    ID2=103    ID3=hom    ID4=16
chr1    948870    .    C    G    279.296    PASS    AF=0.482014;AO=67;DP=139;FAO=67;FDP=139;FR=.,REALIGNEDx0.4964;FRO=72;FSAF=34;FSAR=33;FSRF=34;FSRR=38;FWDB=-0.000997446;FXX=0;HRUN=2;LEN=1;MLLD=60.2134;OALT=G;OID=.;OMAPALT=G;OPOS=948870;OREF=C;PB=.;PBP=.;QD=8.0373;RBI=0.00460624;REFB=-0.0184382;REVB=0.00449694;RO=72;SAF=34;SAR=33;SRF=34;SRR=38;SSEN=0;SSEP=0;SSSB=0.0329868;STB=0.518243;STBP=0.7;TYPE=snp;VARB=0.0213678    GT:GQ:DP:FDP:RO:FRO:AO:FAO:AF:SAR:SAF:SRF:SRR:FSAR:FSAF:FSRF:FSRR:QT    0/1:279:139:139:72:72:67:67:0.482014:33:34:34:38:33:34:34:38:1    ID1=GOOD    ID2=139    ID3=het    ID4=8

Try this adaptation:

awk -v OFS="\t" '!/^#/{for(i=11; i<=14; i++) $i="ID" i "=" $i}1'  file
1 Like

Thank you very much :).

Hello cmccabe,

Same code like Scrutinizer, only difference is following code will add string ID=1 , ID=2 , ID=3 and ID=4 into the output.

awk -v OFS="\t" '!/^#/{for(i=11; i<=14; i++) $i="ID" ++q "=" $i;q=""}1'   Input_file

Thanks,
R. Singh

2 Likes

Ravinder is correct. It can also be corrected like so:

awk -v OFS="\t" '!/^#/{for(i=11; i<=14; i++) $i="ID" i-10 "=" $i}1' file
1 Like

Thank you both :).