I have a input file that has some common values in 1st,2nd and 3rd columns. 4th and 5th are different. Now I would like to print the mean of the fourth column of similar values in 1st.2nd and 3rd columns along with all the values in 5th column.
awk '{
# create an index composed by the concatanation of fields 1 to 3
ind=sprintf("%s %s %s",$1,$2,$3)
# array that compute the total for every line having the same $1,$2,$3
t[ind]+=$4
# this one counts the number of lines processed for every line having the same $1,$2,$3
n[ind]++
# here we concatenate the fields 5 and 4 for every line having the same $1,$2,$3
s[ind]=s[ind] " " $5 " " $4
# alternative to get rid of the extra space
s[ind]=sprintf("%s%s%s %s", s[ind], s[ind]?" ":"", $5, $4)
}
# when all lines have been processed, we traverse the respective arrays and compute the average
END{
for(i in t) printf "%s %.3f %s\n",i,t/n,s
}'