I added a couple parameters in the script to pass from the for
loop. It does look like the correct files are being passed to the script, however the output is not correct. Commenting out the printf
statements causes syntax errors
awk: cmd. line:9: $1, $4, c[$1, $4], m[$1, $4, c[$1, $4]],
awk: cmd. line:9: ^ syntax error
awk: cmd. line:9: $1, $4, c[$1, $4], m[$1, $4, c[$1, $4]],
awk: cmd. line:9: ^ syntax error
awk: cmd. line:9: $1, $4, c[$1, $4], m[$1, $4, c[$1, $4]],
awk: cmd. line:9: ^ syntax error
awk: cmd. line:10: $1, $4, c[$1, $4], M[$1, $4, c[$1, $4]])
awk: cmd. line:10: ^ syntax error
awk: cmd. line:10: $1, $4, c[$1, $4], M[$1, $4, c[$1, $4]])
awk: cmd. line:10: ^ syntax error
awk: cmd. line:16: i, m[$1, $4, i],
awk: cmd. line:16: ^ syntax error
awk: cmd. line:16: i, m[$1, $4, i],
awk: cmd. line:16: ^ syntax error
awk: cmd. line:17: i, M[$1, $4, i],
I guess I am not understanding if the parameters are being passed from the for
loop what else I am missing. I added the code that allows the operands to be controlled by the parameters passed by the for
loop. Thank you :).
for file in /home/cmccabe/folder/less/*.txt ; do
bname=$(basename "$file")
pref=${bname%%_*.txt}
#echo "file:\"$file\" bname:\"$bname\" pref:\"$pref\""
#echo "output will be directed to:\"/home/cmccabe/folder/less/${pref}_output.txt\""
bash -x /home/cmccabe/folder/less/exon.sh /home/cmccabe/folder/less/all_cdsV2 "$file" > /home/cmccabe/folder/less/${pref}_output.txt
done
exon.sh
#!/bin/sh
awk -v d=$# '
BEGIN { FS = "[\t_]"
OFS = "\t"
}
FNR == NR {
m[$1, $4, ++c[$1, $4]] = $2 + 0
M[$1, $4, c[$1, $4]] = $3 + 0
if(d) printf("m[%s,%s,%d]=%s,M[%s,%s,%d]=%s\n",
$1, $4, c[$1, $4], m[$1, $4, c[$1, $4]],
$1, $4, c[$1, $4], M[$1, $4, c[$1, $4]])
next
}
{ if(d) printf("FNR=%d:\"%s\"\n",FNR,$0)
for(i = 1; i <= c[$1, $4]; i++) {
if(d) printf("m[%d]=%d,M[%d]=%d,$2=%d\n",
i, m[$1, $4, i],
i, M[$1, $4, i],
$2)
if(m[$1, $4, i] <= $2 && $2 <= M[$1, $4, i]) {
$5 = "exon"
break
} else {if(m[$1, $4, i] > $2 + 0) {
if(m[$1, $4, i] - 10 <= $2 + 0) {
$5 = "splicing"
break
} else {$5 = "intron"
break
}
}
}
}
if(i > c[$1, $4])
$5 = "intron"
}
1' "$1" "$2"
using the bash -x
+ awk -v d=2 '
BEGIN { FS = "[\t_]"
OFS = "\t"
}
FNR == NR {
m[$1, $4, ++c[$1, $4]] = $2 + 0
M[$1, $4, c[$1, $4]] = $3 + 0
if(d) printf("m[%s,%s,%d]=%s,M[%s,%s,%d]=%s\n",
$1, $4, c[$1, $4], m[$1, $4, c[$1, $4]],
$1, $4, c[$1, $4], M[$1, $4, c[$1, $4]])
next
}
{ if(d) printf("FNR=%d:\"%s\"\n",FNR,$0)
for(i = 1; i <= c[$1, $4]; i++) {
if(d) printf("m[%d]=%d,M[%d]=%d,$2=%d\n",
i, m[$1, $4, i],
i, M[$1, $4, i],
$2)
if(m[$1, $4, i] <= $2 && $2 <= M[$1, $4, i]) {
$5 = "exon"
break
} else {if(m[$1, $4, i] > $2 + 0) {
if(m[$1, $4, i] - 10 <= $2 + 0) {
$5 = "splicing"
break
} else {$5 = "intron"
break
}
}
}
}
if(i > c[$1, $4])
$5 = "intron"
}
1' /home/cmccabe/folder/less/all_cdsV2 /home/cmccabe/folder/less/11-1111_regions.txt
output
00-0000_output.txt (only a few lines)
m[chr1,ADC,1]=33547850,M[chr1,ADC,1]=33547955
m[chr1,ADC,2]=33549554,M[chr1,ADC,2]=33549728
m[chr1,ADC,3]=33557650,M[chr1,ADC,3]=33557823
11-1111_output.txt
m[chr1,ADC,1]=33547850,M[chr1,ADC,1]=33547955
m[chr1,ADC,2]=33549554,M[chr1,ADC,2]=33549728
m[chr1,ADC,3]=33557650,M[chr1,ADC,3]=33557823