Given this data file -
$
$
$ cat input.txt
10-06-2006 17:09:28,1880,1862,1865,1866,1879,1881,1879,1879
10-06-2006 17:19:28,1885,1883,1884,1884,1648,1648,1648,1648
10-06-2006 17:29:28,1853,1844,1845,1845,1879,1878,1879,1879
10-06-2006 17:39:28,1912,1910,1914,1914,1847,1847,1847,1847
10-06-2006 17:49:28,1871,1855,1859,1858,1842,1843,1842,1842
10-06-2006 17:59:28,1907,1895,1898,1898,1789,1789,1789,1789
10-07-2006 18:09:28,2235,2233,2235,2236,2078,2078,2078,2078
10-07-2006 18:19:28,2094,2090,2094,2094,1932,1931,1932,1932
10-07-2006 18:29:28,1999,1977,1981,1981,1928,1929,1928,1928
10-08-2006 18:39:28,2061,2041,2062,2061,2005,2005,2005,2005
10-08-2006 18:49:28,2204,2207,2195,2193,2034,2034,2034,2034
10-08-2006 18:59:28,2354,2343,2347,2349,2143,2143,2141,2141
$
$
here's a Perl program that does the number crunching for individual days/months/years -
$
$
$ cat -n process_input.pl
1 #!perl -w
2 # declare variables, assign where necessary
3 my $key;
4 my @x;
5 my $len = -1;
6 my %counts;
7 my $i;
8 my $k;
9
10 # capture user input
11 print "Enter day (YYYYMMDD), month (YYYYMM) or year (YYYY).\n";
12 print "Press RETURN to process entire file. => ";
13 chomp(my $datevalue = <STDIN>);
14
15 # start processing the input file
16 my $file = "input.txt";
17 open (F, $file) or die "Can't open $file: $!";
18 while (<F>) {
19 # match the regex with the current line
20 /^(\d+)-(\d+)-(\d+) .*?,(.*)$/;
21 # set up the key for the hash "%counts"
22 if ($datevalue eq "") {
23 $key = "$3$1$2";
24 } elsif ($datevalue eq "$3$1$2") {
25 $key = "$3$1$2";
26 } elsif ($datevalue eq "$3$1") {
27 $key = "$3$1";
28 } elsif ($datevalue eq "$3") {
29 $key = "$3";
30 } else {
31 next;
32 }
33 # load the numbers to the right of the timestamp into @x array
34 @x = split(/,/, $4);
35 $len = $#x;
36 # the hash %counts has a key as determined above, and a value
37 # that is a reference to the number array. As each line is processed,
38 # the respective elements of the number array are added up.
39 if (defined $counts{$key}) {
40 $counts{$key} = [ map{$x[$_] + ${$counts{$key}}[$_]} 0..$#x ];
41 } else {
42 $counts{$key} = [ @x ];
43 }
44 }
45 close (F) or die "Can't close $file: $!";
46
47 # now simply iterate through the %counts hash and print the array
48 print join "\t", sort keys %counts;
49 print "\n";
50 for $i (0..$len) {
51 for $k (sort keys %counts) {
52 print ${$counts{$k}}[$i],"\t";
53 }
54 print "\n";
55 }
$
$
And the tests follow -
$
$
$ # (1) Test for contents of the entire file
$ perl process_input.pl
Enter day (YYYYMMDD), month (YYYYMM) or year (YYYY).
Press RETURN to process entire file. =>
20061006 20061007 20061008
11308 6328 6619
11249 6300 6591
11265 6310 6604
11265 6311 6603
10884 5938 6182
10886 5938 6182
10884 5938 6180
10884 5938 6180
$
$ # (2) Test for individual days
$ perl process_input.pl
Enter day (YYYYMMDD), month (YYYYMM) or year (YYYY).
Press RETURN to process entire file. => 20061006
20061006
11308
11249
11265
11265
10884
10886
10884
10884
$
$ perl process_input.pl
Enter day (YYYYMMDD), month (YYYYMM) or year (YYYY).
Press RETURN to process entire file. => 20061007
20061007
6328
6300
6310
6311
5938
5938
5938
5938
$
$ perl process_input.pl
Enter day (YYYYMMDD), month (YYYYMM) or year (YYYY).
Press RETURN to process entire file. => 20061008
20061008
6619
6591
6604
6603
6182
6182
6180
6180
$
$ # (3) Test for the month that exists in data file
$ perl process_input.pl
Enter day (YYYYMMDD), month (YYYYMM) or year (YYYY).
Press RETURN to process entire file. => 200610
200610
24255
24140
24179
24179
23004
23006
23002
23002
$
$ # (4) Test for the year that exists in data file
$ perl process_input.pl
Enter day (YYYYMMDD), month (YYYYMM) or year (YYYY).
Press RETURN to process entire file. => 2006
2006
24255
24140
24179
24179
23004
23006
23002
23002
$
$ # (5) Test for non-existent day, month and year
$ perl process_input.pl
Enter day (YYYYMMDD), month (YYYYMM) or year (YYYY).
Press RETURN to process entire file. => 20061022
$
$ perl process_input.pl
Enter day (YYYYMMDD), month (YYYYMM) or year (YYYY).
Press RETURN to process entire file. => 200611
$
$ perl process_input.pl
Enter day (YYYYMMDD), month (YYYYMM) or year (YYYY).
Press RETURN to process entire file. => 2007
$
$
Hope that helps,
tyler_durden