Hi all, I have a file with records that look something like this,
"Transaction ID",Date,Email,"Card Type",Amount,"NETBANX Ref","Root Ref","Transaction Type","Merchant Ref",Status,"Interface ID","Interface Name","User ID"
nnnnnnnnn,"21 Nov 2011 00:10:47",someone@hotmail.co.uk,"Visa Debit",nnnn,d8rkf93jspe840fj,,"Immediate Bill",n-nnnnnnnn-nnnnnnnnnnnnnnnn-n-aaa,n,nnnnn,,system
nnnnnnnnn,"21 Nov 2011 13:46:14","None Given","Visa Debit",nnnn,nanananananananana,nanananananananana,Refund,,n,nnnnn,,aaaaaaaa-24aaaaaa-e
I need to reformat this file to look like this,
nnnnnnnnn,21/11/2011,2011-11-21 00:10:47.000000,someone@hotmail.co.uk,"Visa Debit",nnnn,nananananananana,,"Immediate Bill",n,nnnnnnnn,nnnnnnnnnnnnnnnn,n,CSS,n-nnnnnnnn-nnnnnnnnnnnnnnnn-n-CSS,n,nnnnn,nnnnn,system,,n,,
nnnnnnnnn,21/11/2011,2011-11-21 13:46:14.000000,"None Given","Visa Debit",nnnnn,nanananananananana,nanananananananana,Refund,0,00000000,0000000000000000,0," "," ",n,nnnnn," ",stockley-24nnn-e,,0,,
I currently have a peice of code that splits the file into the 2 record types and removes the lines containing the phrase "Transaction ID" in field 1, like this,
cat /netbanx/netbanx.txt | while read LINE
do
field9=`echo $LINE | awk -F"," '{ print $9 }'`
echo $field9 | grep '[A-Za-z0-9]' > /dev/null
if [ $? -ne 0 ];then
echo "$LINE" | sed 's/^M//g' >> /netbanx/netbanx_noMref.tmp
else
field1=`echo $LINE | awk -F"," '{ print $1 }'`
echo $field1 | grep "Transaction ID" > /dev/null
if [ $? -ne 0 ];then
echo $LINE","$field9 | awk '{FS=","} {OFS=","} split ($9,aa,"-") {$9=aa[1]","aa[2]","aa[3]","aa[4]","aa[5] } {print}' | sed 's
/^M//g' >> /netbanx/netbanx2.tmp
fi
fi
done
Once this has been done the noMref file is reformatted using this code,
cat /netbanx/netbanx_noMref.tmp | while read LINE
do
field1=`echo $LINE | awk -F"," '{ print $1 }'`
field3tmp=`echo $LINE | awk -F"," '{ print $2 }' | sed 's/"//g'`
field4=`echo $LINE | awk -F"," '{ print $3 }'`
field5=`echo $LINE | awk -F"," '{ print $4 }'`
field6=`echo $LINE | awk -F"," '{ print $5 }'`
field7=`echo $LINE | awk -F"," '{ print $6 }'`
field8=`echo $LINE | awk -F"," '{ print $7 }'`
field9=`echo $LINE | awk -F"," '{ print $8 }'`
field10='0'
field11='00000000'
field12='0000000000000000'
field13='0'
field14="\" "\"
field15="\" "\"
field16=`echo $LINE | awk -F"," '{ print $10 }'`
field17=`echo $LINE | awk -F"," '{ print $11 }'`
field18tmp=`echo $LINE | awk -F"," '{ print $12 }'`
field19=`echo $LINE | awk -F"," '{ print $13 }'`
field20=''
field21='0'
typeset -RZ2 day=`echo $field3tmp | awk '{ print $1 }'`
tmpmonth=`echo $field3tmp | awk '{ print $2 }'`
month=`cat netbanx_datamart_load.sh | grep "#$tmpmonth" | awk '{ print $2 }'`
year=`echo $field3tmp | awk '{ print $3 }'`
timestamp=`echo $field3tmp | awk '{ print $4 }' | cut -c1-5`
field2=`echo $day/$month/$year`
#field3=`echo $day/$month/$year $timestamp`
field3=`echo $year-$month-$day $timestamp:00.000000`
echo $field18tmp | grep '[A-Za-z0-9]' > /dev/null
if [ $? -ne 0 ];then
field18="\" "\"
else
field18=$field18tmp
fi
echo "$field1,$field2,$field3,$field4,$field5,$field6,$field7,$field8,$field9,$field10,$field11,$field12,$field13,$field14,$field15,
$field16,$field17,$field18,$field19,$field20,$field21,," >> /netbanx/netbanx_noMref.txt
done
and the netbanx2.tmp file is reeformatted using this code,
cat /netbanx/netbanx2.tmp | while read LINE
do
field1=`echo $LINE | awk -F"," '{ print $1 }'`
field3tmp=`echo $LINE | awk -F"," '{ print $2 }' | sed 's/"//g'`
field4=`echo $LINE | awk -F"," '{ print $3 }'`
field5=`echo $LINE | awk -F"," '{ print $4 }'`
field6=`echo $LINE | awk -F"," '{ print $5 }'`
field7=`echo $LINE | awk -F"," '{ print $6 }'`
field8=`echo $LINE | awk -F"," '{ print $7 }'`
field9=`echo $LINE | awk -F"," '{ print $8 }'`
field10=`echo $LINE | awk -F"," '{ print $9 }'`
field11=`echo $LINE | awk -F"," '{ print $10 }'`
field12=`echo $LINE | awk -F"," '{ print $11 }'`
field13=`echo $LINE | awk -F"," '{ print $12 }'`
field14=`echo $LINE | awk -F"," '{ print $13 }'`
field15=`echo $LINE | awk -F"," '{ print $18 }'`
field16=`echo $LINE | awk -F"," '{ print $14 }'`
field17tmp=`echo $LINE | awk -F"," '{ print $15 }'`
field18=`echo $LINE | awk -F"," '{ print $16 }'`
field19=`echo $LINE | awk -F"," '{ print $17 }'`
field20=''
field21='0'
typeset -RZ2 day=`echo $field3tmp | awk '{ print $1 }'`
tmpmonth=`echo $field3tmp | awk '{ print $2 }'`
month=`cat netbanx_datamart_load.sh | grep "#$tmpmonth" | awk '{ print $2 }'`
year=`echo $field3tmp | awk '{ print $3 }'`
timestamp=`echo $field3tmp | awk '{ print $4 }' | cut -c1-5`
field2=`echo $day/$month/$year`
field3=`echo $year-$month-$day $timestamp:00.000000`
echo $field17tmp | grep '[A-Za-z0-9]' > /dev/null
if [ $? -ne 0 ];then
field18="\" "\"
else
field18=$field17tmp
fi
echo "$field1,$field2,$field3,$field4,$field5,$field6,$field7,$field8,$field9,$field10,$field11,$field12,$field13,$field14,$field15,
$field16,$field17,$field18,$field19,$field20,$field21,," >> /netbanx/netbanx2.txt
done
The 2 output files are then merged together at the end.
This script works perfectly but it runs for about 40 minutes with an input file containing about 13000 lines. The worst offending parts are the file split (7 mins) and the second reformat (30 mins) although that is because most of the records are split into the netbanx2.tmp file
I really need to make this script more efficinet both in run time and cpu usage so any assistance anyone can give would be very much appreciated.
I could do this more efficiently in perl but unfortunately for various reasons this needs to be in shell script.