awk for histogram

I have a single file that looks like this:

1.62816
1.62816
0.86941
0.86941
0.731465
0.731465
1.03174
1.03174
0.769444
0.769444
0.981181
0.981181
1.14681
1.14681
1.00511
1.00511
1.20385
1.20385
0.0340752
0.0340752

I am trying to plot a probability distribution/histogram, so I'd like to divide this into bins of of equal width based on the maximum and minimum values, and output the number of items in a bin such that my final output data would be like:

Range  Midpoint  No_of_data_in_range
0-0.02     0.01      2
0.02-0.04 0.03     7

I have tried using something like:

wk '{if($1>= 0.0 && $1 <=0.1) {print }}' out.dat

but it doesnt seem to work well as it is too manual. Could someone out here help me out with this?

[edit] working on it.

$ awk '{
        BIN=sprintf("%d", $1*(1/BINSIZE))+0;
        DATA[BIN]++;
        if((!MIN)||(MIN>BIN)) MIN=BIN;
        if((!MAX)||(MAX<BIN)) MAX=BIN;
 }
END {
        for(BIN=MIN; BIN<=MAX; BIN++)
                printf("%+2.5f-%+2.5f\t%d\n", (BIN*BINSIZE), (BIN*BINSIZE)+(BINSIZE-0.00001), DATA[BIN]);
}' BINSIZE=0.02 datafile

+0.02000-+0.03999       2
+0.04000-+0.05999       0
+0.06000-+0.07999       0
+0.08000-+0.09999       0
+0.10000-+0.11999       0
+0.12000-+0.13999       0
+0.14000-+0.15999       0
+0.16000-+0.17999       0
+0.18000-+0.19999       0
+0.20000-+0.21999       0
+0.22000-+0.23999       0
+0.24000-+0.25999       0
+0.26000-+0.27999       0
+0.28000-+0.29999       0
+0.30000-+0.31999       0
+0.32000-+0.33999       0
+0.34000-+0.35999       0
+0.36000-+0.37999       0
+0.38000-+0.39999       0
+0.40000-+0.41999       0
+0.42000-+0.43999       0
+0.44000-+0.45999       0
+0.46000-+0.47999       0
+0.48000-+0.49999       0
+0.50000-+0.51999       0
+0.52000-+0.53999       0
+0.54000-+0.55999       0
+0.56000-+0.57999       0
+0.58000-+0.59999       0
+0.60000-+0.61999       0
+0.62000-+0.63999       0
+0.64000-+0.65999       0
+0.66000-+0.67999       0
+0.68000-+0.69999       0
+0.70000-+0.71999       0
+0.72000-+0.73999       2
+0.74000-+0.75999       0
+0.76000-+0.77999       2
+0.78000-+0.79999       0
+0.80000-+0.81999       0
+0.82000-+0.83999       0
+0.84000-+0.85999       0
+0.86000-+0.87999       2
+0.88000-+0.89999       0
+0.90000-+0.91999       0
+0.92000-+0.93999       0
+0.94000-+0.95999       0
+0.96000-+0.97999       0
+0.98000-+0.99999       2
+1.00000-+1.01999       2
+1.02000-+1.03999       2
+1.04000-+1.05999       0
+1.06000-+1.07999       0
+1.08000-+1.09999       0
+1.10000-+1.11999       0
+1.12000-+1.13999       0
+1.14000-+1.15999       2
+1.16000-+1.17999       0
+1.18000-+1.19999       0
+1.20000-+1.21999       2
+1.22000-+1.23999       0
+1.24000-+1.25999       0
+1.26000-+1.27999       0
+1.28000-+1.29999       0
+1.30000-+1.31999       0
+1.32000-+1.33999       0
+1.34000-+1.35999       0
+1.36000-+1.37999       0
+1.38000-+1.39999       0
+1.40000-+1.41999       0
+1.42000-+1.43999       0
+1.44000-+1.45999       0
+1.46000-+1.47999       0
+1.48000-+1.49999       0
+1.50000-+1.51999       0
+1.52000-+1.53999       0
+1.54000-+1.55999       0
+1.56000-+1.57999       0
+1.58000-+1.59999       0
+1.60000-+1.61999       0
+1.62000-+1.63999       2

$
1 Like

0.01 was just an example. Not from the data. I'll try your solution now