awk to calculate difference of split and sum the difference

In the awk I am trying to subtract the difference $3-$2 of each matching $4 before the first _ (underscore) and print that value in $13 .
I think the awk will do that, but added comments. What I am not sure off is how to add a line or lines that will add sum each matching $13 value and put it in the first line of each matching array[1] in $14 . The file is tab-delimited and is a subset of the actual data, but there are all the same format. Thank you :).

file

chr12	9220418	9220435	A2M_cds_0_0_chr12_9220419_r	0	-	.	-1	-1		.	0
chr12	9220778	9220820	A2M_cds_1_0_chr12_9220779_r	0	-	.	-1	-1		.	0
chr12	9241795	9241847	A2M_cds_14_0_chr12_9241796_r	0	-	.	-1	-1		.	0
chr12	9262909	9262930	A2M_cds_31_0_chr12_9262910_r	0	-	.	-1	-1		.	0
chr12	9264754	9264807	A2M_cds_32_0_chr12_9264755_r	0	-	.	-1	-1		.	0
chr12	53708877	53708924	AAAS_cds_11_0_chr12_53708878_r	0	-	.	-1	-1		.	0
chr12	53709510	53709566	AAAS_cds_13_0_chr12_53709511_r	0	-	.	-1	-1		.	0

desired output

chr12	9220418	9220435	A2M_cds_0_0_chr12_9220419_r	0	-	.	-1	-1		.	0	17	185
chr12	9220778	9220820	A2M_cds_1_0_chr12_9220779_r	0	-	.	-1	-1		.	0	42	
chr12	9241795	9241847	A2M_cds_14_0_chr12_9241796_r	0	-	.	-1	-1		.	0	52	
chr12	9262909	9262930	A2M_cds_31_0_chr12_9262910_r	0	-	.	-1	-1		.	0	21	
chr12	9264754	9264807	A2M_cds_32_0_chr12_9264755_r	0	-	.	-1	-1		.	0	53	
chr12	53708877	53708924	AAAS_cds_11_0_chr12_53708878_r	0	-	.	-1	-1		.	0	47	103
chr12	53709510	53709566	AAAS_cds_13_0_chr12_53709511_r	0	-	.	-1	-1		.	0	56	

awk

awk '
BEGIN {FS = OFS = "\t"}  # define FS and OFS as tab
FNR==NR{                 # process same line
         split($4,array,"_");   # split $4 on _ and srore in array
         print $0,(array[1] in a) && print ($3-$2), $13  # print matching lines of array[1] and the difference of $3-$2 in $13 
         next  # process next line
       }' file
awk '
BEGIN {FS = OFS = "\t"}  # define FS and OFS as tab
FNR==NR{                 # process same line
         split($4,array,"_");   # split $4 on _ and srore in array
         b[FNR]=$3-$2; a[array[1], array[2]]+=b[FNR];
         next  # process next line
}
{
   split($4,array,"_");
   print $0, b[FNR], ((!c[array[1], array[2]]++) ? a[array[1], array[2]] : "");
}
' file file
1 Like

Thank you very much :slight_smile: