Convert data into tabular matrix

Hi There,

I want to convert the following data into tabular matrix, based on column 4th and 5th, and output the column 10th value

chr1    2804449    2804450    NACpG_1    window1    +    chr1    2804443    2804475    1
chr1    2804450    2804451    NACpG_1    window2    +    chr1    2804443    2804475    1
chr1    2804451    2804452    NACpG_1    window3    +    chr1    2804443    2804475    1
chr1    2804452    2804453    NACpG_1    window4    +    chr1    2804443    2804475    1
chr1    2804453    2804454    NACpG_1    window5    +    chr1    2804443    2804475    1
chr1    2804454    2804455    NACpG_1    window6    +    chr1    2804443    2804475    1
chr1    2804455    2804456    NACpG_1    window7    +    chr1    2804443    2804475    1
chr1    2804456    2804457    NACpG_1    window8    +    chr1    2804443    2804475    1
chr1    2804457    2804458    NACpG_1    window9    +    chr1    2804443    2804475    1
chr1    2804458    2804459    NACpG_1    window10    +    chr1    2804443    2804475    1
chr1    47235698    47235699    NACpG_2    window10    -    chr1    47235697    47235699    0.90
chr1    47235699    47235700    NACpG_2    window9    -    chr1    47235699    47235703    0.11
chr1    47235700    47235701    NACpG_2    window8    -    chr1    47235699    47235703    0.17
chr1    47235701    47235702    NACpG_2    window7    -    chr1    47235699    47235703    0.17
chr1    47235702    47235703    NACpG_2    window6    -    chr1    47235699    47235703    0.97
chr1    47235703    47235704    NACpG_2    window5    -    chr1    47235703    47235705    0.98
chr1    47235704    47235705    NACpG_2    window4    -    chr1    47235703    47235705    0.98
chr1    47235705    47235706    NACpG_2    window3    -    chr1    47235705    47235709    0.29
chr1    47235706    47235707    NACpG_2    window2    -    chr1    47235705    47235709    0.29
chr1    47235707    47235708    NACpG_2    window1    -    chr1    47235705    47235709    1
chr1    56927295    56927296    NANoCpG_3    window10    -    .    -1    -1    0
chr1    56927296    56927297    NANoCpG_3    window9    -    .    -1    -1    0
chr1    56927297    56927298    NANoCpG_3    window8    -    .    -1    -1    0
chr1    56927298    56927299    NANoCpG_3    window7    -    .    -1    -1    0
chr1    56927299    56927300    NANoCpG_3    window6    -    .    -1    -1    0
chr1    56927300    56927301    NANoCpG_3    window5    -    .    -1    -1    0
chr1    56927301    56927302    NANoCpG_3    window4    -    .    -1    -1    0
chr1    56927302    56927303    NANoCpG_3    window3    -    .    -1    -1    1
chr1    56927303    56927304    NANoCpG_3    window2    -    .    -1    -1    1
chr1    56927304    56927305    NANoCpG_3    window1    -    .    -1    -1    1

Desired output format

    window1 window2 window3 window4 window5 window6 window7 window8 window9 window10
NACpG_1    1    1    1    1    1    1    1    1    1    1
NACpG_2    1    0.29    0.29    0.98    0.98    0.97    0.17    0.17    0.11    0.9
NANoCpG_3    1    1    1    0    0    0    0    0    0        

Previously, i was doing it using perl. As the current output is from command-line, i wanted to convert this output into required format using awk.

I tried, but did not get far:

cat Test | awk '{ f1[$4]++; f2[$5]++; } END { ORS=" ";print "      "; for ( i in f2) { print i }; "\n"}'

Any help will be appreciated. Thanks in advance.

Hello ChiragNepal,

Welcome to forums, could you please code tags for commands/codes/inputs used by you in your posts as per forum rules.
Following may help you in same.

awk 'BEGIN{print "window1 window2 window3 window4 window5 window6 window7 window8 window9 window10"}{Y[$4]++;X[$4]=$NF OFS X[$4]} END{for(i in X){;print i OFS X}}' Input_file

Output will be as follows.

window1 window2 window3 window4 window5 window6 window7 window8 window9 window10
NACpG_1 1 1 1 1 1 1 1 1 1 1
NANoCpG_3 1 1 1 0 0 0 0 0 0 0
NACpG_2 1 0.29 0.29 0.98 0.98 0.97 0.17 0.17 0.11 0.90

Thanks,
R. Singh

1 Like

Thank you RavinderSingh !!

Please control your use of code tags!

You'll have to remove the DOS <CR> line terminators before any of these solutions work.
Try also

awk     '       {LN[$4]; HD[$5]; MX[$4,$5]=$NF}
         END    {               printf "%10s", ""; for (i in HD) printf "%10s", i; print "";
                 for (j in LN) {printf "%10s",j;   for (i in HD) printf "%10s", MX[j,i]; print ""}
                }
        ' file
            window10   window1   window2   window3   window4   window5   window6   window7   window8   window9
   NACpG_1         1         1         1         1         1         1         1         1         1         1
   NACpG_2      0.90         1      0.29      0.29      0.98      0.98      0.97      0.17      0.17      0.11
 NANoCpG_3         0         1         1         1         0         0         0         0         0         0
1 Like