Counting average data per hour

Hi i have log like this :

Actually i will process the data become

Anybody can help me ?

Try this:

awk -F"[ :]" ' BEGIN {
printf "Date Hour average_RSS average_PCPU\n";
                            }
NR>1{ 
if(arr[$1" "$2]=="") { rss="";cpu="";cnt=0;} 
rss+=$(NF-2); cpu+=$NF; cnt++; 
arr[$1" "$2]=rss" "cpu" "cnt; } 
END {
for (i in arr) { split(arr,a," "); 
printf "%s %.1f %.1f\n", i,a[1]/a[3],a[2]/a[3]; } 
      }' filename

Output:

Hi Dennis,

I think the average_PCPU for the date 20091116 at hours 08 should be 22.1818

Change the below line in the code according to your required precision. ie, %.4f here

printf "%s %.1f %.1f\n", i,a[1]/a[3],a[2]/a[3]; }

awk -F[\ \:] 'BEGIN {print "Date Hour average_RSS average_PCPU"}
{if (NR>1) rss[$2]+=$(NF-2); cpu[$2]+=$NF; count[$2]++}
END { for (i in rss) {printf "11-16-2009 %s %.4f %.4f\n",i,rss/count,cpu/count} }' urfile

Date Hour average_RSS average_PCPU
11-16-2009 07 1342177.0000 19.7727
11-16-2009 08 1380012.0909 22.1818
11-16-2009 09 1403367.0000 26.7500

Python, if you have it

#!/usr/bin/env python

d={}
e={}
f=open("file")
line=f.readline()
for li in f:
    s = li.split()
    year,mth,day=s[0][:4],s[0][4:6],s[0][6:8]
    ti = s[1][:2]
    STR= day+"-"+mth+"-"+year+" "+ti
    d.setdefault(STR,[])
    e.setdefault(STR,[])
    rss,pcpu=s[7],s[8]
    d[STR].append(rss)
    e[STR].append(pcpu)
f.close()

for date,values in d.iteritems():
    print date, sum(map(int,values))/len(values), sum(map(int,e[date]))/len(e[date])

output

# python script.py 
16-11-2009 09 1682302 1403367
16-11-2009 08 1667231 1380012
16-11-2009 07 1646110 1342177

# cat awk.script
BEGIN                           {
                                        FS="[ :]"
                                }
        NR == 1                 {
                                        print "Date    Time average_RSS average_PCPU"
                                }
        a &&  a != $1 OFS $2    {
                                        printf "%s %.4f %.4f\n",a,(b[a]/d),(c[a]/d)
                                        b[$1 OFS $2] = c[$1 OFS $2] = ""
                                        d = 0
                                }
        NR > 1                  {
                                        a =  $1 OFS $2
                                        b[$1 OFS $2] += $(NF-2)
                                        c[$1 OFS $2] += $NF
                                        d++
                                        next
                                }
END                             {
                                        printf "%s %.4f %.4f\n",a,(b[a]/d),(c[a]/d)
                                }
# awk -f awk.script LOG.FILE
Date    Time average_RSS average_PCPU
20091116 07 1342177.0000 19.7727
20091116 08 1380012.0909 22.1818
20091116 09 1403367.0000 26.7500