Use awk to count and rearrange entries

aydj · January 21, 2014, 7:25am

How can I use awk to count the occurrence of field 2 and rearrange the output like below:

Input:

OA1 FM AA
OA0 FM CC
ON0 FM CC
FN1 FN BB
OY1 FN BB
OY2 FN CC
OY3 FN CC
YT0 FM AA
KW1 FN CC
KW3 FM BB
YT4 FM AA
FN2 FT BB
OA3 FT AA
ON7 FM BB

output:

FM(7)
AA=OA1,YT0,YT4
CC=OA0,ON0
BB=KW3,ON7

FN(5)
BB=FN1,OY1
CC=OY2,OY3,KW1

FT(2)
BB=FN2
AA=OA3

Akshay_Hegde · January 21, 2014, 8:02am

If order doesn't matter then this would be fine

$ awk '{
          A[$2]++
          B[$2 FS $3] = B[$2 FS $3] ? B[$2 FS $3] "," $1 : $1
       }
    END{   
            for(i in A){
                                   print i"("A")"
                    for(j in B){
                                   split(j,X)
                                   if(X[1]==i)
                                   print X[2]"="B[j]              
                               }
                                   print ""
                       }
       }' file

FM(7)
AA=OA1,YT0,YT4
BB=KW3,ON7
CC=OA0,ON0

FN(5)
BB=FN1,OY1
CC=OY2,OY3,KW1

FT(2)
AA=OA3
BB=FN2

aydj · January 21, 2014, 8:26am

Thank you for your effort, but I am still not getting the desired output.

Akshay_Hegde · January 21, 2014, 8:30am

Hai aydi I just updated did you try ?

RavinderSingh13 · January 21, 2014, 8:32am

Hello,

Following may also help.

awk '/FM/ {i++};
        /FN/ {j++};
        /FT/ {k++};
        {
                if($3 ~ f && $2 =="FM")
                {a=a","$1}
                 {f="AA"; g="BB"; h="CC"}
                if($3 ~ g && $2 =="FM")
                 {b=b","$1}
                 {f="AA"; g="BB"; h="CC"}
                if($3 ~ h && $2 =="FM")
                {c=c","$1}
                 {f="AA"; g="BB"; h="CC"}

                if($3 ~ f && $2 =="FN")
                {a1=a1","$1}
                 {f="AA"; g="BB"; h="CC"}
                if($3 ~ g && $2 =="FN")
                 {b1=b1","$1}
                 {f="AA"; g="BB"; h="CC"}
                if($3 ~ h && $2 =="FN")
                {c1=c1","$1}
                 {f="AA"; g="BB"; h="CC"}

                if($3 ~ f && $2 =="FT")
                {a2=a2","$1}
                 {f="AA"; g="BB"; h="CC"}
                if($3 ~ g && $2 =="FT")
                 {b2=b2","$1}
                 {f="AA"; g="BB"; h="CC"}
                if($3 ~ h && $2 =="FT")
                {c2=c2","$1}
                 {f="AA"; g="BB"; h="CC"}
 
        }
END { print"FM= "i"\nAA= " a"\nBB= "b"\nCC= "c"\nFN= "j"\nAA= " a1"\nBB= "b1"\nCC= "c1"\nFT= "k"\nAA= " a2"\nBB= "b2"\nCC= "c2}' check_data_range_details | sed 's/\= \,/\=/g'

Output will be as follows.

FM= 7
AA=OA1,YT0,YT4
BB=KW3,ON7
CC=OA0,ON0
FN= 6
AA=
BB=FN1,OY1
CC=OY2,OY3,KW1
FT= 2
AA=OA3
BB=FN2
CC=

Thanks,
R. Singh

aydj · January 21, 2014, 8:34am

Yeah, I used the edited version

Akshay_Hegde · January 21, 2014, 8:35am

Okay..

aydj · January 21, 2014, 8:36am

Thank you for your effort, but the categories in the file are many and not constant, so I cant do a find and replace within the script

Akshay_Hegde · January 21, 2014, 8:38am

aydi except order remaining seems to be fine.. please do check

aydj · January 21, 2014, 8:54am

The Order is very Important

RudiC · January 21, 2014, 9:26am

Way more clumsy than I intended when I started, but it will keep the order of occurrences of $2. Try

awk     '!A[$2]++       {IX[++MAX]=$2}
                        {B[$2 FS $3]=B[$2 FS $3]","$1}
         END            {for (i=1; i<=MAX; i++) {T1= IX
                                                 print T1 "(" A[T1] ")"
                                                 for (Y in B) if (Y ~ "^"T1)
                                                                {T2=B[Y]
                                                                 sub (T1" ", "", Y)
                                                                 sub (/^,/, "", T2)
                                                                 print Y"="T2}
                                                 print ""}
                        }
        ' file
FM(7)
AA=OA1,YT0,YT4
BB=KW3,ON7
CC=OA0,ON0

FN(5)
BB=FN1,OY1
CC=OY2,OY3,KW1

FT(2)
AA=OA3
BB=FN2

Akshay_Hegde · January 21, 2014, 9:36am

@RudiC : in first tag he wants CC=OA0,ON0 to come first

Here is a code which takes care about order

awk '  {
          A[$2]++
          B[$2 FS $3] = B[$2 FS $3] ? B[$2 FS $3] "," $1 : $1
          if(!tmp[$2 FS $3]++)order[++c]=$2 FS $3
       }

function orderp(subs,mains){
                             for(j in B){
                                        if(j==subs){
                                                    split(j,X)
                                                    if(X[1]==mains)
                                                    print X[2]"="B[j]
                                                   }           
                                        }
                           }

    END{  
               for(i in A){
                                   print i"("A")"
                                   for(m=1;m<=c;m++){ orderp(order[m],i) }
                                   print ""
                          }
       }
   ' file

FM(7)
AA=OA1,YT0,YT4
CC=OA0,ON0
BB=KW3,ON7

FN(5)
BB=FN1,OY1
CC=OY2,OY3,KW1

FT(2)
BB=FN2
AA=OA3

aydj · January 21, 2014, 9:45am

Its working!!!!!!!!!!!!

RudiC · January 21, 2014, 9:49am

@Akshay Hegde: That one might spoil the order of $2, couldn't it?

Akshay_Hegde · January 21, 2014, 9:54am

I need to think... my mind is just unset