OK as promised here is a typical script that uses awk to process a csv file created from an excel spreadsheet. It performs various validation checks on the files and directories involved.
The input file called "file_validation.csv": -
WORK ORDER,DESCRIPTION,PROJECT,RAISED BY,STATUS,ASSIGNED TO,DATE RAISED,DATE COMPLETED
WO_102,Automate delivery of desktop apps to testers,Metering,wateruchav,new,atkinsb,03/06/2007,13/06/2007
WO_105,Create logging script for desktop installations,Metering,patelb,completed,lanem,05/06/2007,12/06/2007
WO_106,Create tool to automatically deploy patches to metering environments,Metering,smithc,completed,atkinsb,08/06/2007,21/06/2007
WO_107,Create tool to gather metrics for the tools team work load,Jupiter,atkinsb,new,atkinsb,11/06/2007,28/06/2007
The script to process it called "file_validation": -
#! /bin/ksh
#############################################################################################################
##
## Name - file_validation
## Author - Bradley Atkins
## Description - Example code to illustrate typical file validation
## techniques in a ksh shell script that uses awk to process a csv file
## created from an excel spreadsheet.
## Date - 03/12/2009
## Args -
## Environment -
## Return - 1 Error
## 0 Success
##
#############################################################################################################
##-----------------------------------------------
## Functions
##-----------------------------------------------
tidyupexit()
{
[[ -d /tmp/${DATE}_muse ]] && rm -rf /tmp/${DATE}_muse ## Use the full path rather than $TMP_DIR
[[ -n $2 ]] && echo $2
exit $1
}
get_metrics()
{
##-----------------------------------------------
## Return the requsted metrics
##-----------------------------------------------
[[ $# -ne 4 ]] && tidyupexit 1 "Usage error, get_metrics()"
FILE=$1
[[ -r $FILE ]] || tidyupexit 1 "File not found / readable. get_metrics()"
FIELD=$2
TARGET=$3
QUERY_TYPE=$4
[[ $QUERY_TYPE == +([0-9]) ]] || tidyupexit 1 "None numeric query type"
print $(nawk -F, -v f=$FIELD -v t=$TARGET -v qt=$QUERY_TYPE '
( (qt == 1) && (tolower($f) == tolower(t)) )
( (qt == 2) && (tolower($f) == tolower(t)) )
' $FILE | wc -l)
}
[[ $# -ne 1 ]] && tidyupexit 1 "Usage error. Incorrect parameter count, 1 expected <csv file>"
##-----------------------------------------------
## Initialise our variables etc
##-----------------------------------------------
MSCRIPTNAME=file_validation
DATE=$(date +'%Y%m%d')
TMP_DIR=/tmp/${DATE}_muse
RESULTS_DIR=~/${DATE}_muse.results
CSV_FILE=$1
[[ -r $CSV_FILE ]] || tidyupexit 1 "Input file not found / readable ($CSV_FILE)"
typeset cWORK_ORDER=1 cDESCRIPTION=2 cPROJECT=3 cRAISED_BY=4 cSTATUS=5 cASSIGNED_TO=6 cDATE_RAISED=7 cDATE_COMPLETED=8 \
DEVELOPERS="atkinsb lanem patelb" HEADER TMPSTR qtDEV=1 qtSTA=2 NUMBER TOTAL_NUMBER STATUS_STRINGS
STATUS_STRINGS="new completed in_progress code_review uat"
##-----------------------------------------------
## Create our temporary files etc
##-----------------------------------------------
mkdir -p $TMP_DIR || tidyupexit 1 "Failed to create temp directory"
TMPFILE1=$TMP_DIR/muse.${MSCRIPTNAME}.tmp1
SWAPFILE=$TMP_DIR/muse.${MSCRIPTNAME}.tmp2
##-----------------------------------------------
## Create our output files
##-----------------------------------------------
mkdir -p $RESULTS_DIR || tidyupexit 1 "Failed to create results directory"
DEV_CSV=${RESULTS_DIR}/developers.csv
STA_CSV=${RESULTS_DIR}/status.csv
##-----------------------------------------------
## Process the CSV file and get our metrics
##-----------------------------------------------
HEADER="DEVELOPER TOTALS" ##
Developer Metrics
NUMBER=0
TOTAL_NUMBER=0
TMPSTR=""
for D in $DEVELOPERS;do
typeset -u HEADER=${HEADER}","$D
NUMBER=$(get_metrics $CSV_FILE $cASSIGNED_TO $D $qtDEV)
TMPSTR=${TMPSTR}","$NUMBER
TOTAL_NUMBER=$(( TOTAL_NUMBER + NUMBER ))
done
echo $HEADER > $DEV_CSV
echo ${TOTAL_NUMBER}${TMPSTR} >> $DEV_CSV
HEADER="STATUS TOTALS" ## Status Metrics
NUMBER=0
TOTAL_NUMBER=0
TMPSTR=""
for S in $STATUS_STRINGS;do
typeset -u HEADER=${HEADER}","$S
NUMBER=$(get_metrics $CSV_FILE $cSTATUS $S $qtSTA)
TMPSTR=${TMPSTR}","$NUMBER
TOTAL_NUMBER=$(( TOTAL_NUMBER + NUMBER ))
done
echo $HEADER > $STA_CSV
echo ${TOTAL_NUMBER}${TMPSTR} >> $STA_CSV
tidyupexit 0
Be sure to mess around with it before asking me any questions, it is much more satisfying to figure it out for yourself.
If you don't know how to trace the code while it is running let me know.
Enjoy