Group By in Unix

Hi,
I have file with Header Data and trailer records

Head|currentdate|EOF
Data|AAA|BBB|CCC|DDD|EEE|Source1
Data|AAA|BBB|CCC|DDD|EEE|Source1
Data|AAA|BBB|CCC|DDD|EEE|Source2
Data|AAA|BBB|CCC|DDD|EEE|Source2
Data|AAA|BBB|CCC|DDD|EEE|Source2
End|rec|EOF

Now I need the count of only "Data" records (5 records as per abv ex)
group by the Source system. My output should be

Source1 2
Source2 3
How can i achive this functionality in UNIX, Your help will be highly appreciated.

 
grep '^Data|' | cut -d'|' -f 6 | sort | uniq -c

Get the data lines, cut out the system field, sort them and count each value. Someone once did a sort of SQL in shell, years ago. Now you can get JDBC tools for flat files that let you query.

or:

sed -n 's/^Data.*|//p' file | sort | uniq -c
sed '1d;$d;s/.*|//' file | sort | uniq -c

awk:

awk -F'|' 'NF>3{A[$NF]++}END{for(i in A) print i,A}' file

I wrote an aggregator in C:

sed -n 's/^Data.*|//p' file | aggsx -l
 
$ aggsx --help
Usage:
aggsx [ -b ] [ -l ] [ -p <prefix> ] [ -u ] [ -d ] [ -h ]
Computes the count distinct, count null, min, count of min, max,
count of max, average (mean) of not null values if numeric,
median of not null values, largest of the most popular values,
count of that most popular value.
If -l is present, first prints out all values in order and their counts,
null last, but no aggregates.
If -b is present, prints out like -l and then prints aggregates.
If -p is present, the aggregate is prefixed with '<prefix>|'.
If -u is present, just immediately prints out unique values.
If -d is present, just immediately prints out duplicated values.
If -h is present, prefixes values line with header line:
CtD|CtN|Min|CtMin|Max|CtMax|Avg|Med|MPop|CtMPop
 
$ cat mysrc/aggsx.c
#include <stdio.h>
#include <limits.h>
#include <errno.h>
#include <stdlib.h>
#include <strings.h>
static  long double     sum = 0.0 ;
static  long double     nval ;
static  unsigned long   lct = 0 ;
static  unsigned long   nct = 0 ;
static  unsigned long   ll2 ;
static  unsigned long   nvc = 0 ;
static  unsigned long   dct = 0 ;
static  unsigned long   act = 0 ;
static  unsigned long   ll ;
static  unsigned long   mpc = 0 ;
static  unsigned long   *vct = NULL ;   /* value counts */
static  unsigned long   *lp ;
static  char            **vl = NULL ;   /* value list */
static  char            **cpp ;
static  char            *cp ;
static  char            *cp2 ;
static  char            *cp3 ;
static  char            *me = "" ;
static  char            *mp = "" ;
static  char            *pfx = NULL ;
static  int             i ;
static  int             d = 0 ; /* -d option */
static  int             u = 0 ; /* -u option */
static  int             l = 0 ; /* -l option */
static  int             b = 0 ; /* -b option */
static  int             num = 1 ;
static  int             lfm ;   /* line feed missing */
static  char            buf[66000] ;
static  void            fmv( char *val )
{
        unsigned long   cv ;
        unsigned long   cl = 0 ;
        unsigned long   ch ;
        int             r ;
        char            **cf ;
        char            **ct ;
        char            **ce ;
        unsigned long   *lf ;
        unsigned long   *lt ;
        if ( dct )
                for ( cl = 0, ch = dct - 1 ; cl <= ch ; )
                {
                        cv = ( ch + cl ) >> 1 ;
                        r = strcmp( val, vl[cv] );
                        if ( r > 0 )
                        {
                                cl = cv + 1 ;
                        }
                        else if ( r < 0 )
                        {
                                if ( cv )
                                        ch = cv - 1 ;
                                else
                                        break ;
                        }
                        else
                        {
                                lt = vct + cv ;
                                *lt += 1 ;
                                if ( d
                                  && *lt == 2 ) /* report dups */
                                {
                                        if ( 0 > printf( "%s\n", val )
                                          || fflush( stdout ) )
                                        {
                                                if ( ferror( stdout ) )
                                                {
                                                        perror( "stdout" );
                                                        exit( 1 );
                                                }
                                                exit( 0 );
                                        }
                                }
                                return ;
                        }
                }
        if ( u ) /* report unique */
        {
                if ( 0 > printf( "%s\n", val )
                  || fflush( stdout ) )
                {
                        if ( ferror( stdout ) )
                        {
                                perror( "stdout" );
                                exit( 1 );
                        }
                        exit( 0 );
                }
        }
        cv = dct ;
        if ( ++dct > act )
        {
                act += 1024 ;
                if ( !( vl = realloc( vl, act * sizeof( char* ) ) ) )
                {
                        perror( "realloc()" );
                        exit( 1 );
                }
                if ( !( vct = realloc( vct, act * sizeof( long ) ) ) )
                {
                        perror( "realloc()" );
                        exit( 1 );
                }
        }
        for ( ce = vl + cl,
                cf = ( ( ct = vl + cv ) - 1 ),
                lf = ( ( lt = vct + cv ) - 1 ) ;
              ct > ce ;
              cf--, ct--, lf--, lt-- )
        {
                *ct = *cf ;
                *lt = *lf ;
        }
        *lt = 1 ;
        if ( !( *ct = malloc( strlen( val ) + 1 ) ) )
        {
                perror( "malloc()" );
                exit( 1 );
        }
        strcpy( *ct, val );
        return ;
}
int main( int argc, char **argv ){
        setvbuf( stdin, NULL, _IOFBF, PIPE_MAX );
        setvbuf( stdout, NULL, _IOFBF, PIPE_MAX );
        for ( i = 1 ; i < argc ; i++ )
        {
                if ( !strcmp( argv[1], "-b" ) )
                {
                        b = 1 ;
                        continue ;
                }
                if ( !strcmp( argv[1], "-l" ) )
                {
                        l = 1 ;
                        continue ;
                }
                if ( !strcmp( argv[1], "-p" )
                  && ++i < argc )
                {
                        pfx = argv;
                        continue ;
                }
                if ( !strcmp( argv[1], "-u" ) )
                {
                        u = 1 ;
                        continue ;
                }
                if ( !strcmp( argv[1], "-d" ) )
                {
                        d = 1 ;
                        continue ;
                }
                if ( !strcmp( argv[1], "-h" ) )
                {
                        fputs( 
"CtD|CtN|Min|CtMin|Max|CtMax|Avg|Med|MPop|CtMPop|Ct\n",
                                stdout );
                        continue ;
                }
                fputs(
"Usage:\n"
"\n"
"aggsx [ -b ] [ -l ] [ -p <prefix> ] [ -u ] [ -d ] [ -h ]\n"
"\n"
"Computes the count distinct, count null, min, count of min, max,\n"
"count of max, average (mean) of not null values if numeric,\n"
"median of not null values, largest of the most popular values,\n"
"count of that most popular value.\n"
"\n"
"If -l is present, first prints out all values in order and their counts,\n"
"null last, but no aggregates.\n"
"If -b is present, prints out like -l and then prints aggregates.\n"
"If -p is present, the aggregate is prefixed with '<prefix>|'.\n"
"If -u is present, just immediately prints out unique values.\n"
"If -d is present, just immediately prints out duplicated values.\n"
"If -h is present, prefixes values line with header line:\n"
"CtD|CtN|Min|CtMin|Max|CtMax|Avg|Med|MPop|CtMPop\n"
"\n"                    , stderr );
                exit( 1 );
        }
        while( fgets( buf, sizeof( buf ), stdin ) )
        {
                lct++ ;
                for ( cp = buf, cp2 = cp3 = NULL, lfm = 1 ; *cp ; cp++ )
                {
                        switch( *cp )
                        {
                        case '\n':
                                lfm = 0 ;
                                /* intentional fall through */
                        case '\r':
                                /* intentional fall through */
                        case ' ':
                                /* intentional fall through */
                        case '\t':
                                continue ;
                                /* intentional fall through */
                        default:
                                if ( !cp2 )
                                {
                                        cp2 = cp ;
                                }
                                cp3 = cp ;
                        }
                }
                if ( lfm )
                {
                        fprintf( stderr, "\nFatal: Data line %lu too long!\n",
                                                lct );
                        exit( 1 );
                }
                if ( cp3 )
                {
                        *(++cp3) = NULL ;
                        cp = cp2 ;
                }
                if ( strcmp( cp, "<null>" ) )
                {
                        fmv( cp );
                }
                else
                {
                        nct++ ;
                }
        }
        if ( ferror( stdin ) )
        {
                perror( "stdin" );
                exit( 1 );
        }
        if ( u
          || d )
                exit( 0 );
        if ( l || b )
        {
                for ( ll = 0, cpp = vl, lp = vct ;
                        ll < dct ;
                        ll++, lp++, cpp++ )
                {
                        if ( 0 > printf( "%lu\t%s\n", *lp, *cpp ) )
                        {
                                if ( ferror( stdout ) )
                                {
                                        perror( "stdout" );
                                        exit( 1 );
                                }
                                exit( 0 );
                        }
                }
                if ( nct
                  && 0 > printf( "%lu\t%s\n", nct, "<null>" ) )
                {
                        if ( ferror( stdout ) )
                        {
                                perror( "stdout" );
                                exit( 1 );
                        }
                        exit( 0 );
                }
                if ( !b )
                {
                        exit( 0 );
                }
        }
        for ( ll = 0L, cpp = vl, lp = vct, ll2 = ( ( lct - nct ) >> 1 ) + nct ;
              ll < dct ; ll++, lp++, cpp++ )
        {
                cp = *cpp ;
                if ( *lp >= mpc )
                {
                        mpc = *lp ;
                        mp = cp ;
                }
                if ( *cp
                  && num )
                {
                        errno = 0 ;
                        nval = strtod( cp, &cp2 );
                        if ( errno              /* underflow or overflow */
                          || ( cp2 == cp )      /* didn't like the characters */
                          || *cp2 )             /* didn't like some */
                        {
                                num = 0 ;
                        }
                        else
                        {
                                sum += ( nval * *lp ) ;
                                nvc += *lp ;
                        }
                }
                if ( ll2 <= lct  )
                {
                        me = cp ;
                        ll2 += *lp ;
                }
        }
        if ( num
          && nvc )
        {
                sum /= nvc ;
                sprintf( buf, "%-30.20LG", sum );
                for ( cp = buf + strlen( buf ) - 1 ;
                      cp >= buf && *cp == ' ' ;
                      cp-- )
                {
                        *cp = NULL ;
                }
        }
        else
        {
                strcpy( buf, "N/A" );
        }
        if ( ( ( pfx
              && 0 > printf( "%s|", pfx ) )
            || 0 > printf( "%lu|%lu|%s|%lu|%s|%lu|%s|%s|%s|%lu|%lu\n",
                                dct, nct,
                                ( dct ? vl[0] : "" ),
                                ( dct ? vct[0] : 0 ),
                                ( dct ? vl[dct - 1] : "" ),
                                ( dct ? vct[dct - 1] : 0 ),
                                buf, me, mp, mpc, lct ) )
          && ferror( stdout ) )
        {
                perror( "stdout" );
                exit( 1 );
        }
        exit( 0 );
}