ust3
February 12, 2014, 8:51pm
1
Hi All ,
I have the following script as below , I tried to modify to meet the requirement , could someone help ? very thanks
================================================================================================
while read STR NAME; do
Total=0
MyString="$STR"
GetData () {
AwkMyString="$( sed -e 's/[\/*?.+{}]/\\&/g' -e 's/[[]/\\&/g' -e 's/[]]/\\&/g' <<< "${MyString}" )"
for File in *.log;do
Date="$(date "+%b %Y" --date="${File:4:6}01")"
awk 'BEGIN{C=0}{for (i=1;i<=NF;i++) if ( $i ~ /'${AwkMyString}'/ ){C++}} END{printf "%s %s?","'"${Date}"'",C}' "$File"
done
}
echo "No. of \"${MyString}\" in $(date "+%b %Y" --date="${File:4:6}01") = $Count"
Total=$(( $Count + $Total ))
while read -d '?' Month Year Count;do
TableDate+=("${Month} ${Year}")
TableCount+=("${Count}")
done <<< $(GetData)
printf "%10s" "|"
for i in $(seq 0 $(( ${#TableDate[@]} - 1 )) );do
printf "%s |" "${TableDate[$i]}"
done
printf " Total |\n"
for i in $(seq 0 $(( ${#TableCount[@]} +1 )) );do
printf "=========|"
done
printf "\n"
printf "%10s" "|"
for i in $(seq 0 $(( ${#TableCount[@]} - 1 )) );do
printf "%*s%*s" $(( $((11 + ${#TableCount[$i]}))/2 )) "${TableCount[$i]}" $(( $((10 - ${#TableCount[$i]}))/2 )) "|"
done
printf "\n"
echo "======="
done < file.txt
================================================================================================
the files are as below
#vi file.txt
aaa 111
bbb 222
#vi aaaa201401.log
aaa 111
aaa 111
bbb 222
bbb 222
#vi aaaa201402.log
aaa 111
aaa 111
aaa 111
aaa 111
bbb 222
bbb 222
bbb 222
bbb 222
bbb 222
the result when I run the above script is as below
#my_script
No. of "aaa" in Feb 2014 =
|Jan 2014 |Feb 2014 | Total |
=========|=========|=========|=========|
| 2 | 4 |
=======
No. of "bbb" in Feb 2014 =
|Jan 2014 |Feb 2014 |Jan 2014 |Feb 2014 | Total |
=========|=========|=========|=========|=========|=========|
| 2 | 4 | 2 | 5 |
=======
What I would like to do is , list the number of occurance of for each month , then calculate the total amount of all months.
As you could see , the output data is dulicated and without Total amount , what I would like the output is as below , could advise how to change it ? very thanks .
#my_script
No. of "aaa" in Feb 2014 =
|Jan 2014 |Feb 2014 | Total |
|=========|=========|=========|
| 2 | 4 | 6
No. of "bbb" in Feb 2014 =
|Jan 2014 |Feb 2014 | Total |
|=========|=========|=========|
| 2 | 5 | 7
ctsgnb
February 13, 2014, 4:33pm
2
You can get the needed data in a lazy way (i have no doubt it may be improved) but you can go with something like :
grep -E "^" aaa*.log | sed 's/.log:/ /;s/^[^0-9]*//' | awk '{a[$2" "$1]++}END{for(i in a) print i, a}' | sort -k 1
or
$ grep -E "^" aaa*.log | sed 's/\(..\).log:/ \1 /;s/^[^0-9]*//' | uniq -c | awk '{a[$4" "$2]+=$1;print}END{for(i in a)print "Tot",i,"=",a}'
2 2014 01 aaa 111
2 2014 01 bbb 222
4 2014 02 aaa 111
5 2014 02 bbb 222
Tot aaa 2014 = 6
Tot bbb 2014 = 7
The formatting is then just a matter of cosmetic
In your above script you could replace:
printf "\n"
echo "======="
done < file.txt
with:
LineTotal=$(($(IFS=+; echo "${TableCount[*]}")))
printf "%*s%*s\n" $(( $((11 + ${#LineTotal}))/2 )) "$LineTotal" $(( $((10 - ${#LineTotal}))/2 )) "|"
done < file.txt
ust3
February 13, 2014, 8:30pm
4
chubler_xl:
In your above script you could replace:
printf "\n"
echo "======="
done < file.txt
with:
LineTotal=$(($(IFS=+; echo "${TableCount
[*]}")))
printf "%*s%*s\n" $(( $((11 + ${#LineTotal}))/2 )) "$LineTotal" $(( $((10 - ${#LineTotal}))/2 )) "|"
done < file.txt
thanks reply ,
I replaced the code as your advice , the result is as below
No. of "aaa" in Feb 2014 =
|Jan 2014 |Feb 2014 | Total |
=========|=========|=========|=========|
| 2 | 4 | 6 |
No. of "bbb" in Feb 2014 =
|Jan 2014 |Feb 2014 |Jan 2014 |Feb 2014 | Total |
=========|=========|=========|=========|=========|=========|
| 2 | 4 | 2 | 5 | 13 |
However ,the above data is duplicated , the output should be
No. of "aaa" in Feb 2014 =
|Jan 2014 |Feb 2014 | Total |
|=========|=========|=========|
| 2 | 4 | 6 |
No. of "bbb" in Feb 2014 =
|Jan 2014 |Feb 2014 | Total |
|=========|=========|=========|
| 2 | 5 | 13 |
Could advise how to modify it ? thanks
Sorry I didn't pickup on that existing bug in your script.
Change:
Total=$(( $Count + $Total ))
while read -d '?' Month Year Count;do
To:
Total=$(( $Count + $Total ))
TableDate=()
TableCount=()
while read -d '?' Month Year Count;do
ust33
February 13, 2014, 10:56pm
6
thanks reply ,
The current output is as below , it works fine .
No. of "aaa" in Feb 2014 =
|Jan 2014 |Feb 2014 | Total |
=========|=========|=========|=========|
| 2 | 4 | 6 |
No. of "bbb" in Feb 2014 =
|Jan 2014 |Feb 2014 | Total |
=========|=========|=========|=========|
| 2 | 5 | 7 |
one more question , If I Would like the output change to as below , would advise how to modify it ? very thanks
Month |Jan 2014 |Feb 2014 | Total |
=========|=========|=========|=========|
111 | 2 | 4 | 6 |
=========|=========|=========|=========
222 | 2 | 5 | 7 |
ust3
March 1, 2014, 6:59am
7
Hi All ,
Would you help on my last post ? very thanks
Here I think a complete re-write is best:
function print_row {
printf "%8s |" "${Table[@]}"
printf "\n"
printf "=====%.0s====|" "${Table[@]}"
printf "\n"
}
# Build header row
Table=("Month" )
for File in *.log;do
Table+=( "$(date "+%b %Y" --date="${File:4:6}01")" )
done
Table+=( "Total" )
print_row
while read STR NAME; do
Table=( "$NAME" $(awk -v S="$STR" -v N="$NAME" '
FNR==1{f++}
$1==S && $2==N{T++;C[f]++}
END{
for(i=1;i<=f;i++) printf " %d",C
printf " %d\n", T}' *.log ) )
print_row
done < file.txt
ust3
March 3, 2014, 1:29am
9
thanks Chubler XL 's great job .
May be my question is still not clearly ,
what I would like to count is the number of the currence of the first string in file.txt , the file.txt is as below.
aaa 111
bbb 222
therefore , I would like to count the number of the string "aaa" and "bbb" in *.log , the "111" and "222" will be shown on the report only.
in aaaa201401.log , there are two "aaa" , two "bbb" ;
in aaaa201402.log , there are four "aaa" , five "bbb
the desired report should be as bwlow.
=========|=========|=========|=========|
Month |Jan 2014 |Feb 2014 | Total |
=========|=========|=========|=========|
111 | 2 | 4 | 6 |
=========|=========|=========|=========|
222 | 2 | 5 | 7 |
=========|=========|=========|=========|
There is a bit should be modify , would you please help ? thanks
That output is what is produced by the code above, using your input files from post #1 .
If this is wrong, please build new input files show the problem.
ust3
March 3, 2014, 4:37am
11
I have re-buile a new log files , it should be more clear .
What I hope to do is to check how many the string ( "aaa" and "bbb" ) are occurence in each file .
#vi file.txt
aaa this is description file 1
bbb this is description file 2
#vi aaaa201401.log
aaa dfasf ffd
aaa dfags gf
bbb xdfas gfs
bbb dfasf gfsd
bbb dfasgf
#vi aaaa201402.log
aaa fsdafhgh
aaa fdasf hgh
aaa fdasfas hgdh
aaa fsdafasf 45252
bbb fdsafa trt
bbb dfasfaf 543
bbb ghsghsgf gfs
bbb gfsg gsg
bbb gdsf gfsg
in aaaa201401.log , there are two "aaa" , three "bbb" ;
in aaaa201402.log , there are four "aaa" , five "bbb
the desired report should be as below.
===============================|=========|=========|=========|
Month |Jan 2014 |Feb 2014 | Total |
===============================|=========|=========|=========|
this is description file 1 | 2 | 4 | 6 |
===============================|=========|=========|=========|
this is description file 2 | 3 | 5 | 8 |
===============================|=========|=========|=========|
If you have groff on your system this might be an easier way to print the ascii table:
(
printf ".TS\nallbox;\nc c"
printf "%.0s c" *.log
printf "\nl r"
printf "%.0s r" *.log
printf ".\n Month"
for File in *.log;do
printf "\t%s" "$(date "+%b %Y" --date="${File:4:6}01")"
done
printf "\tTotal\n"
while read STR NAME; do
printf "%s" "$NAME"
awk -v S="$STR" '
FNR==1{f++}
$1==S{T++;C[f]++}
END{
for(i=1;i<=f;i++) printf "\t%d",C
printf "\t%d\n", T}' *.log
done < file.txt
echo ".TE" ) | tbl | groff -T ascii | grep -v '^$'
Output:
+---------------------------+----------+----------+-------+
| Month | Jan 2014 | Feb 2014 | Total |
+---------------------------+----------+----------+-------+
|this is description file 1 | 2 | 4 | 6 |
+---------------------------+----------+----------+-------+
|this is description file 2 | 3 | 5 | 8 |
+---------------------------+----------+----------+-------+
---------- Post updated at 04:42 AM ---------- Previous update was at 03:29 AM ----------
If your using an xterm (or such) you could try groff -T utf8
instead of groff -T ascii
:
Month Jan 2014 Feb 2014 Total
this is description file 1 2 4 6
this is description file 2 3 5 8
ust3
March 4, 2014, 9:07pm
13
chubler_xl:
If you have groff on your system this might be an easier way to print the ascii table:
(
printf ".TS\nallbox;\nc c"
printf "%.0s c" *.log
printf "\nl r"
printf "%.0s r" *.log
printf ".\n Month"
for File in *.log;do
printf "\t%s" "$(date "+%b %Y" --date="${File:4:6}01")"
done
printf "\tTotal\n"
while read STR NAME; do
printf "%s" "$NAME"
awk -v S="$STR" '
FNR==1{f++}
$1==S{T++;C[f]++}
END{
for(i=1;i<=f;i++) printf "\t%d",C
printf "\t%d\n", T}' *.log
done < file.txt
echo ".TE" ) | tbl | groff -T ascii | grep -v '^$'
Output:
+---------------------------+----------+----------+-------+
| Month | Jan 2014 | Feb 2014 | Total |
+---------------------------+----------+----------+-------+
|this is description file 1 | 2 | 4 | 6 |
+---------------------------+----------+----------+-------+
|this is description file 2 | 3 | 5 | 8 |
+---------------------------+----------+----------+-------+
---------- Post updated at 04:42 AM ---------- Previous update was at 03:29 AM ----------
If your using an xterm (or such) you could try groff -T utf8
instead of groff -T ascii
:
Month Jan 2014 Feb 2014 Total
this is description file 1 2 4 6
this is description file 2 3 5 8
Hi Chubler_XL ,
Very thanks for your work , I hope this is my last question .
If I would like to check the occurrence of the whole string "aaa" , the string "aaa" may not be begins and ended with space , for example , if the log is as below , it still count the occurrence of "aaa" is 2 , how to modify the script ? thanks
XXaaaYY dfasf ffd
CCaaaDD dfags gf
bbb xdfas gfs
bbb dfasf gfsd
bbb dfasgf
replace $1==S{T++;C[f]++}
with $1~S{T++;C[f]++}
ust3
March 6, 2014, 1:20am
15
Hi XL ,
If the string have special characters , how to handle it ? thanks
#vi file.txt
aaa\\. this is description file 1
bbb=? this is description file 2
#vi aaaa201401.log
aaa\\. dfasf ffd
aaa\\. dfags gf
bbb=? xdfas gfs
bbb=? dfasf gfsd
bbb=? dfasgf
How about this replacement loop:
while read -r STR NAME; do
printf "%s" "$NAME"
awk -v S="$STR" '
BEGIN{gsub("\\\\","\\\\",S)}
FNR==1{f++}
index($1,S){T++;C[f]++}
END{
for(i=1;i<=f;i++) printf "\t%d",C
printf "\t%d\n", T}' *.log
done < file.txt
ust3
March 6, 2014, 9:06pm
17
thanks XL ,
It seems very well , but the code still can not handle the space character , if the line have space ( like the below example ) , it can not check the occurence of string "aaa" and "bbb" , if I remove the space , then it works fine , would to plesae advise how to change it ? thanks
#vi aaaa201401.log
ZZZ YYY XXXaaa\\. dfasf ffd
ZZZ YYY XXXaaa\\. dfags gf
ZZZ YYY XXXbbb=? xdfas gfs
ZZZ YYY XXXbbb=? dfasf gfsd
ZZZ YYY XXXbbb=? dfasgf
Space would be OK if we can get a distinction between the code and description.
Currently the program cannot determine what is code and what is description:
Code=ZZZ
Desc=YYY XXXaaa\\. this is description file 1
Code=ZZZ YYY
Desc=XXXaaa\\. this is description file 1
Code=ZZZ YYY XXXaaa\\.
Desc=this is description file 1
Could we change the file.txt to have a Tab character between the two fields eg:
ZZZ YYY XXXaaa\\.<tab>this is description file 1
Where <tab>
above represents a single ascii character #9 or ctrl-I:
while IFS=$'\t' read -r STR NAME; do
printf "%s" "$NAME"
awk -v S="$STR" '
BEGIN{gsub("\\\\","\\\\",S)}
FNR==1{f++}
index($0,S){T++;C[f]++}
END{
for(i=1;i<=f;i++) printf "\t%d",C
printf "\t%d\n", T}' *.log
done < file.txt
ust3
March 7, 2014, 1:40am
19
chubler_xl:
Space would be OK if we can get a distinction between the code and description.
Currently the program cannot determine what is code and what is description:
Code=ZZZ
Desc=YYY XXXaaa\\. this is description file 1
Code=ZZZ YYY
Desc=XXXaaa\\. this is description file 1
Code=ZZZ YYY XXXaaa\\.
Desc=this is description file 1
Could we change the file.txt to have a Tab character between the two fields eg:
ZZZ YYY XXXaaa\\.<tab>this is description file 1
Where <tab>
above represents a single ascii character #9 or ctrl-I:
while IFS=$'\t' read -r STR NAME; do
printf "%s" "$NAME"
awk -v S="$STR" '
BEGIN{gsub("\\\\","\\\\",S)}
FNR==1{f++}
index($0,S){T++;C[f]++}
END{
for(i=1;i<=f;i++) printf "\t%d",C
printf "\t%d\n", T}' *.log
done < file.txt
it should be ok , thanks a lot