Category and count with awk

aydj · August 7, 2015, 1:59pm

I want to categorize and count the as below:

Input file:

A1 G1 C1 F1
A2 G1 C1 F1
A3 G1 C1 F2
A4 G1 C2 F2
A7 G1 C2 F2
A8 G1 C2 F3
A11 G1 C2 F3
A23 G1 C2 F3
B4 G1 C2 F3
AC4 G2 C3 F4
B6 G2 C4 F4
BB5 G2 C4 F4
A25 G2 C5 F4
B13 G2 C5 F5
D12 G2 C5 F5
D2 G2 C5 F5
B89 G2 C5 F6
B44 G2 C5 F6

Desired Output:

Total            : 18
               G1 : 9
               G2 : 9
               F1 : 2
               F2 : 3
               F3 : 4
               F4 : 4
               F5 : 3
               F6 : 2

G1[9]

F1(2)
C1=A1,A2

F2(3)
C1=A3
C2=A4,A7

F3(4)
C2=A8,A11,A23,B4

G2[9]

F4(4)
C4=B6,BB5
C5=A25
C3=AC4

F5(3)
C5=B13,D12,D2

F6(2)
C5=B89,B44

I have tried:

#!/usr/bin/ksh
awk '  {
          D[$2]++
          A[$4]++
          B[$4 FS $3 FS $2] = B[$4 FS $3 FS $2] ? B[$4 FS $3 FS $2] "," $1 : $1
       }
    END{
        {printf "%20s%-3s \n", "Total            : ", NR }
        {for (i in D)printf "%20s%-3s \n", i" : ",D}
        {for (i in A)printf "%20s%-3s \n", i" : ",A}
        {print " "}
        for(k in D){
                    print k"["D[k]"] "
                           for(i in A){
                                   print i"("A")"
                    for(j in B){
                                   split(j,X)
                                   if(X[3]==k && X[1]==i)
                                   print X[2]"="B[j]
                               }
                                   print ""
                       }
                    }
       }
' file.txt

But I get:

Total            : 18
               G1 : 9
               G2 : 9
               F1 : 2
               F2 : 3
               F3 : 4
               F4 : 4
               F5 : 3
               F6 : 2

G1[9]
F1(2)
C1=A1,A2

F2(3)
C1=A3
C2=A4,A7

F3(4)
C2=A8,A11,A23,B4

F4(4)

F5(3)

F6(2)

G2[9]
F1(2)

F2(3)

F3(4)

F4(4)
C4=B6,BB5
C5=A25
C3=AC4

F5(3)
C5=B13,D12,D2

F6(2)
C5=B89,B44

How do I get rid of F4,F5 and F6 from G1 category, and F1,F2 and F3 from G2 category.

bakunin · August 8, 2015, 2:51am

You seem to have exactly the problem described here.

I hope this helps.

bakunin

Scrutinizer · August 8, 2015, 2:58am

After some reformatting and outlining (it helps!) I came up with the following quick fix:

#!/usr/bin/ksh
awk '
    {
      D[$2]++
      A[$4]++
      C[$3]
      B[$4,$3,$2] = B[$4,$3,$2] ? B[$4,$3,$2] "," $1 : $1
    }
    END {
      printf "%20s%-3s \n", "Total            : ", NR
      for (i in D)
        printf "%20s%-3s \n", i" : ",D
      for (i in A)
        printf "%20s%-3s \n", i" : ",A
      print " "
      for(k in D) {
        print k"["D[k]"]" RS
        for(i in A) {
          entries=0
          for(j in C) {
            if((i,j,k) in B) {
              if(!entries++)
                print i"("A")"
              print j"="B[i,j,k]
            }
          }
          if(entries)
            print ""
        }
      }
    }
' file.txt

Which hopefully will get you going again..

aydj · August 8, 2015, 1:22pm

Thanks, Solves it.