Hi I have a script which extracts the table from HTML and convert it into .csv.
But the problem in the script is if we have 2 tables in HTMl . it takes only the first table.
Please help me what changes i need to do in the script to make it read the complete HTML page.
Script is as below:
#!bin/ksh
timestamp=$(date +%d_%m_%y-%T )
export REPORT_PATH=/tefuser5/tef/acw/migwrk1/Informatica/9.5.1/server/infa_shared/Phase4_WLS/bin/reports/COUNT_REPORTS/Work
#mv /tefuser5/tef/acw/migwrk1/Informatica/9.5.1/server/infa_shared/Phase4_WLS/bin/reports/COUNT_REPORTS/Work/ABP_EXP_WRLN.csv /tefuser5/tef/acw/migwrk1/Informatica/9.5.1/server/infa_shared/Phase4_WLS/bin/reports/COUNT_REPORTS/Work/Archive/ABP_EXP_WRLN.csv"_$timestamp"
cd $REPORT_PATH
#rm ABP_EXP_WRLN.csv
#cd $REPORT_PATH
#rm -f *.csv 2>/dev/null
for HTML_F in *.HTML
do
echo "converting $HTML_F file to csv.."
dos2unix $HTML_F 1>/dev/null 0>/dev/null 2>/dev/null
l=0
j=0
k=0
rm -f xyz.csv 2>/dev/null
rm -f abc.csv 2>/dev/null
while IFS='' read -r line || [[ -n "$line" ]];
# awk '/<TABLE/ {CNT++; if (CNT == 2) P = 1}; P; /<\/TABLE/ {P = 0}'
do
# echo "$line"
#awk '{/<TABLE/}'
if [[ "$line" == \<BR\>\<TABLE\ \ width\=* || $j -ge 2 ]]; then
let j=$j+1
if [ $j -ge 2 ]; then
#echo "reached in 1st if"
echo "$line" | grep -i '</TD>' 1>/dev/null 2>/dev/null
if [ $? -eq 0 ]; then
#echo "reached in 2nd if"
tmp=${line#*\"\>}
#echo "$tmp"
res=${tmp%%\ \<\/TD\>*}
echo "$res" >> abc.csv
else
:
fi
else
:
fi
else
:
fi
#echo "$line"
if [[ "$line" == *TABLE\>* ]]; then
#echo "end of 1st table"
let k=$k+1
fi
if [ $k -eq 2 ]; then
echo "$HTML_F is ending.."
break
fi
done < "$HTML_F"
while read a
do
if [ $l -eq 4 ]; then
l=0
echo "$a" >> xyz.csv
else
let l=$l+1
echo "$a" | tr -s '\n' ',' >> xyz.csv
fi
done < abc.csv
rm -f abc.csv
tmpfname=`basename $HTML_F .HTML`
rm -f $tmpfname.csv 2>/dev/null
mv xyz.csv $tmpfname.csv
#printf "\n\n\n\n\n\n\n,,THIS IS END OF FILE,," >> $tmpfname.csv
#printf "\n\n" >> $tmpfname.csv
dos2unix $tmpfname.csv 1>/dev/null 0>/dev/null 2>/dev/null
chr=`echo $tmpfname.csv|cut -d'_' -f1`
echo "$chr $tmpfname.csv"
#chkDiff $chr $tmpfname.csv
done
HTML page is as below:
<html>
<body>
<b><br>Running Date: </b>11-JAN-2019 03:07</br>
<h2> Schema mapping and info </h2>
<BR><TABLE width="100%" class="x1h" cellpadding="1" cellspacing="0" border="5">
<TR>
<b><td class="x3w" bgcolor="#808080" width="4%"> No </TD>
<b><td class="x3w" bgcolor="#808080" width="20%"> Exp Schema e </TD>
<b><td class="x3w" bgcolor="#808080" width="20%"> Export Tables </TD>
<b><td class="x3w" bgcolor="#808080" width="20%"> Imp Schema </TD>
<b><td class="x3w" bgcolor="#808080" width="20%"> Import Tables </TD>
<b><td class="x3w" bgcolor="#808080" width="5%"> Diff </TD></TR>
<tr>
<b><td class="x3w" bgcolor="#E3E4FA">1 </TD>
<b><td class="x3w" bgcolor="#E3E4FA">FVT4 </TD>
<b><td class="x3w" bgcolor="#E3E4FA">54 </TD>
<b><td class="x3w" bgcolor="#E3E4FA">PRDCUSTO </TD>
<b><td class="x3w" bgcolor="#E3E4FA">54 </TD>
<b><td class="x3w" bgcolor="#E3E4FA"> </TD></TR>
<tr>
<b><td class="x3w" bgcolor="#E3E4FA">1 </TD>
<b><td class="x3w" bgcolor="#E3E4FA">FVT4 </TD>
<b><td class="x3w" bgcolor="#E3E4FA">56 </TD>
<b><td class="x3w" bgcolor="#E3E4FA">All Imp Schema</TD>
<b><td class="x3w" bgcolor="#E3E4FA">54 </TD>
<b><td class="x3w" bgcolor="#FF0000">2 </TD></TR>
</TABLE>
<h2> Missing Tables on ImpLogs </h2>
<h3> TABLE_NAME :NAME_DATA </h3>
<h3> TABLE_NAME :WHITE_LIST_MIG </h3>
<h2> Table Rows Comparison </h2>
<BR><TABLE width="100%" class="x1h" cellpadding="1" cellspacing="0" border="5">
<TR>
<b><td class="x3w" bgcolor="#808080" width="4%"> No </TD>
<b><td class="x3w" bgcolor="#808080" width="20%"> TABLE NAME </TD>
<b><td class="x3w" bgcolor="#808080" width="20%"> Exported Rows </TD>
<b><td class="x3w" bgcolor="#808080" width="20%"> Imported Rows </TD>
<b><td class="x3w" bgcolor="#808080" width="20%"> Diff Rows </TD></TR>
<tr>
<b><td class="x3w" bgcolor="#E3E4FA">1 </TD>
<b><td class="x3w" bgcolor="#E3E4FA">NAME_DATA </TD>
<b><td class="x3w" bgcolor="#E3E4FA">24760 </TD>
<b><td class="x3w" bgcolor="#E3E4FA"> </TD>
<b><td class="x3w" bgcolor="#FF0000">Not exist on Imp </TD></TR>
<tr>
<b><td class="x3w" bgcolor="#E3E4FA">2 </TD>
<b><td class="x3w" bgcolor="#E3E4FA">WHITE_LIST_MIG </TD>
<b><td class="x3w" bgcolor="#E3E4FA">12912 </TD>
<b><td class="x3w" bgcolor="#E3E4FA"> </TD>
<b><td class="x3w" bgcolor="#FF0000">Not exist on Imp </TD></TR>
</TABLE>
<h3> Imp and Exp logs Missmatch </h3>
<h2> All Exp , Imp logs Info </h2>
<BR><TABLE width="100%" class="x1h" cellpadding="1" cellspacing="0" border="5">
<TR>
<b><td class="x3w" bgcolor="#808080" width="4%"> No </TD>
<b><td class="x3w" bgcolor="#808080" width="20%"> TABLE NAME </TD>
<b><td class="x3w" bgcolor="#808080" width="20%"> Exported Rows </TD>
<b><td class="x3w" bgcolor="#808080" width="20%"> Imported Rows </TD>
<b><td class="x3w" bgcolor="#808080" width="20%"> Diff Rows </TD></TR>
<tr>
<b><td class="x3w" bgcolor="#BDD7EE">1 </TD>
<b><td class="x3w" bgcolor="#BDD7EE">ADDRESS_DATA </TD>
<b><td class="x3w" bgcolor="#BDD7EE">13753 </TD>
<b><td class="x3w" bgcolor="#BDD7EE">13753 </TD>
<b><td class="x3w" bgcolor="#BDD7EE">0 </TD></TR>
<tr>
<b><td class="x3w" bgcolor="#BDD7EE">2 </TD>
<b><td class="x3w" bgcolor="#BDD7EE">ADDRESS_NAME_LINK </TD>
<b><td class="x3w" bgcolor="#BDD7EE">68715 </TD>
<b><td class="x3w" bgcolor="#BDD7EE">68715 </TD>
<b><td class="x3w" bgcolor="#BDD7EE">0 </TD></TR>
<tr>
<b><td class="x3w" bgcolor="#BDD7EE">3 </TD>
<b><td class="x3w" bgcolor="#BDD7EE">AGREEMENT </TD>
<b><td class="x3w" bgcolor="#BDD7EE">0 </TD>
<b><td class="x3w" bgcolor="#BDD7EE">0 </TD>
<b><td class="x3w" bgcolor="#BDD7EE">0 </TD></TR>
<tr>
<b><td class="x3w" bgcolor="#BDD7EE">4 </TD>
<b><td class="x3w" bgcolor="#BDD7EE">AGREEMENT_RESOURCE </TD>
<b><td class="x3w" bgcolor="#BDD7EE">29979 </TD>
<b><td class="x3w" bgcolor="#BDD7EE">29979 </TD>
<b><td class="x3w" bgcolor="#BDD7EE">0 </TD></TR>
<tr>
<b><td class="x3w" bgcolor="#BDD7EE">5 </TD>
<b><td class="x3w" bgcolor="#BDD7EE">AGR_RES_HISTORY </TD>
<b><td class="x3w" bgcolor="#BDD7EE">0 </TD>
<b><td class="x3w" bgcolor="#BDD7EE">0 </TD>
<b><td class="x3w" bgcolor="#BDD7EE">0 </TD></TR>
<tr>
<b><td class="x3w" bgcolor="#BDD7EE">6 </TD>
<b><td class="x3w" bgcolor="#BDD7EE">AR1_ACCOUNT </TD>
<b><td class="x3w" bgcolor="#BDD7EE">12912 </TD>
<b><td class="x3w" bgcolor="#BDD7EE">12912 </TD>
<b><td class="x3w" bgcolor="#BDD7EE">0 </TD></TR>
<tr>
<b><td class="x3w" bgcolor="#BDD7EE">7 </TD>
<b><td class="x3w" bgcolor="#BDD7EE">AR1_ADDRESS_NAME </TD>
<b><td class="x3w" bgcolor="#BDD7EE">25824 </TD>
<b><td class="x3w" bgcolor="#BDD7EE">25824 </TD>
<b><td class="x3w" bgcolor="#BDD7EE">0 </TD></TR>
<tr>
<b><td class="x3w" bgcolor="#BDD7EE">8 </TD>
<b><td class="x3w" bgcolor="#BDD7EE">AR1_AGED_TRIAL_BALANCE </TD>
<b><td class="x3w" bgcolor="#BDD7EE">18780 </TD>
<b><td class="x3w" bgcolor="#BDD7EE">18780 </TD>
<b><td class="x3w" bgcolor="#BDD7EE">0 </TD></TR>
<tr>
<b><td class="x3w" bgcolor="#BDD7EE">9 </TD>
<b><td class="x3w" bgcolor="#BDD7EE">AR1_BILLING_ARRANGEMENT </TD>
<b><td class="x3w" bgcolor="#BDD7EE">12912 </TD>
<b><td class="x3w" bgcolor="#BDD7EE">12912 </TD>
<b><td class="x3w" bgcolor="#BDD7EE">0 </TD></TR>
<tr>
<b><td class="x3w" bgcolor="#BDD7EE">10 </TD>
<b><td class="x3w" bgcolor="#BDD7EE">AR1_CHARGES </TD>
<b><td class="x3w" bgcolor="#BDD7EE">18069 </TD>
<b><td class="x3w" bgcolor="#BDD7EE">18069 </TD>
<b><td class="x3w" bgcolor="#BDD7EE">0 </TD></TR>
<tr>
<b><td class="x3w" bgcolor="#BDD7EE">11 </TD>
<b><td class="x3w" bgcolor="#BDD7EE">AR1_CHARGE_GROUP </TD>
<b><td class="x3w" bgcolor="#BDD7EE">18069 </TD>
<b><td class="x3w" bgcolor="#BDD7EE">18069 </TD>
<b><td class="x3w" bgcolor="#BDD7EE">0 </TD></TR>
<tr>
<b><td class="x3w" bgcolor="#BDD7EE">12 </TD>
<b><td class="x3w" bgcolor="#BDD7EE">AR1_CREDIT_DEBIT_LINK </TD>
<b><td class="x3w" bgcolor="#BDD7EE">11032 </TD>
<b><td class="x3w" bgcolor="#BDD7EE">11032 </TD>
<b><td class="x3w" bgcolor="#BDD7EE">0 </TD></TR>
<tr>
<b><td class="x3w" bgcolor="#BDD7EE">13 </TD>
<b><td class="x3w" bgcolor="#BDD7EE">AR1_CUSTOMER_CREDIT </TD>
<b><td class="x3w" bgcolor="#BDD7EE">359 </TD>
<b><td class="x3w" bgcolor="#BDD7EE">359 </TD>
<b><td class="x3w" bgcolor="#BDD7EE">0 </TD></TR>
<tr>
<b><td class="x3w" bgcolor="#BDD7EE">14 </TD>
<b><td class="x3w" bgcolor="#BDD7EE">AR1_INVOICE </TD>
<b><td class="x3w" bgcolor="#BDD7EE">18428 </TD>
<b><td class="x3w" bgcolor="#BDD7EE">18428 </TD>
<b><td class="x3w" bgcolor="#BDD7EE">0 </TD></TR>
<tr>
<b><td class="x3w" bgcolor="#BDD7EE">15 </TD>
<b><td class="x3w" bgcolor="#BDD7EE">AR1_JGL_CONTROL </TD>
<b><td class="x3w" bgcolor="#BDD7EE">0 </TD>
<b><td class="x3w" bgcolor="#BDD7EE">0 </TD>
<b><td class="x3w" bgcolor="#BDD7EE">0 </TD></TR>
<tr>
<b><td class="x3w" bgcolor="#BDD7EE">16 </TD>
<b><td class="x3w" bgcolor="#BDD7EE">AR1_PAYMENT </TD>
<b><td class="x3w" bgcolor="#BDD7EE">8439 </TD>
<b><td class="x3w" bgcolor="#BDD7EE">8439 </TD>
<b><td class="x3w" bgcolor="#BDD7EE">0 </TD></TR>
<tr>
<b><td class="x3w" bgcolor="#BDD7EE">17 </TD>
<b><td class="x3w" bgcolor="#BDD7EE">AR1_PAYMENT_DETAILS </TD>
<b><td class="x3w" bgcolor="#BDD7EE">8439 </TD>
<b><td class="x3w" bgcolor="#BDD7EE">8439 </TD>
<b><td class="x3w" bgcolor="#BDD7EE">0 </TD></TR>
<tr>
<b><td class="x3w" bgcolor="#BDD7EE">18 </TD>
<b><td class="x3w" bgcolor="#BDD7EE">AR1_PAY_CHANNEL </TD>
<b><td class="x3w" bgcolor="#BDD7EE">12912 </TD>
<b><td class="x3w" bgcolor="#BDD7EE">12912 </TD>
<b><td class="x3w" bgcolor="#BDD7EE">0 </TD></TR>
<tr>
<b><td class="x3w" bgcolor="#BDD7EE">19 </TD>
<b><td class="x3w" bgcolor="#BDD7EE">AR1_PROOF_AND_BALANCE </TD>
<b><td class="x3w" bgcolor="#BDD7EE">4 </TD>
<b><td class="x3w" bgcolor="#BDD7EE">4 </TD>
<b><td class="x3w" bgcolor="#BDD7EE">0 </TD></TR>
<tr>
<b><td class="x3w" bgcolor="#BDD7EE">20 </TD>
<b><td class="x3w" bgcolor="#BDD7EE">AR1_TAX_ITEM </TD>
<b><td class="x3w" bgcolor="#BDD7EE">0 </TD>
<b><td class="x3w" bgcolor="#BDD7EE">0 </TD>
<b><td class="x3w" bgcolor="#BDD7EE">0 </TD></TR>
<tr>
<b><td class="x3w" bgcolor="#BDD7EE">21 </TD>
<b><td class="x3w" bgcolor="#BDD7EE">AR1_TRANSACTION_LOG </TD>
<b><td class="x3w" bgcolor="#BDD7EE">26867 </TD>
<b><td class="x3w" bgcolor="#BDD7EE">26867 </TD>
<b><td class="x3w" bgcolor="#BDD7EE">0 </TD></TR>
<tr>
<b><td class="x3w" bgcolor="#BDD7EE">22 </TD>
<b><td class="x3w" bgcolor="#BDD7EE">AR1_UNAPPLIED_CREDIT </TD>
<b><td class="x3w" bgcolor="#BDD7EE">711 </TD>
<b><td class="x3w" bgcolor="#BDD7EE">711 </TD>
<b><td class="x3w" bgcolor="#BDD7EE">0 </TD></TR>
<tr>
<b><td class="x3w" bgcolor="#BDD7EE">23 </TD>
<b><td class="x3w" bgcolor="#BDD7EE">BL1_ACTIVITY_HISTORY </TD>
<b><td class="x3w" bgcolor="#BDD7EE">30928 </TD>
<b><td class="x3w" bgcolor="#BDD7EE">30928 </TD>
<b><td class="x3w" bgcolor="#BDD7EE">0 </TD></TR>
<tr>
<b><td class="x3w" bgcolor="#BDD7EE">24 </TD>
<b><td class="x3w" bgcolor="#BDD7EE">BL1_BILL_STATEMENT </TD>
<b><td class="x3w" bgcolor="#BDD7EE">17269 </TD>
<b><td class="x3w" bgcolor="#BDD7EE">17269 </TD>
<b><td class="x3w" bgcolor="#BDD7EE">0 </TD></TR>
<tr>
<b><td class="x3w" bgcolor="#BDD7EE">25 </TD>
<b><td class="x3w" bgcolor="#BDD7EE">BL1_BLNG_ARRANGEMENT </TD>
<b><td class="x3w" bgcolor="#BDD7EE">12912 </TD>
<b><td class="x3w" bgcolor="#BDD7EE">12912 </TD>
<b><td class="x3w" bgcolor="#BDD7EE">0 </TD></TR>
<tr>
<b><td class="x3w" bgcolor="#BDD7EE">26 </TD>
<b><td class="x3w" bgcolor="#BDD7EE">BL1_CHARGE </TD>
<b><td class="x3w" bgcolor="#BDD7EE">17269 </TD>
<b><td class="x3w" bgcolor="#BDD7EE">17269 </TD>
<b><td class="x3w" bgcolor="#BDD7EE">0 </TD></TR>
<tr>
<b><td class="x3w" bgcolor="#BDD7EE">27 </TD>
<b><td class="x3w" bgcolor="#BDD7EE">BL1_CHARGE_REQUEST </TD>
<b><td class="x3w" bgcolor="#BDD7EE">1966 </TD>
<b><td class="x3w" bgcolor="#BDD7EE">1966 </TD>
<b><td class="x3w" bgcolor="#BDD7EE">0 </TD></TR>
<tr>
<b><td class="x3w" bgcolor="#BDD7EE">28 </TD>
<b><td class="x3w" bgcolor="#BDD7EE">BL1_CUSTOMER </TD>
<b><td class="x3w" bgcolor="#BDD7EE">12912 </TD>
<b><td class="x3w" bgcolor="#BDD7EE">12912 </TD>
<b><td class="x3w" bgcolor="#BDD7EE">0 </TD></TR>
<tr>
<b><td class="x3w" bgcolor="#BDD7EE">29 </TD>
<b><td class="x3w" bgcolor="#BDD7EE">BL1_CUSTOMER_INFO </TD>
<b><td class="x3w" bgcolor="#BDD7EE">55803 </TD>
<b><td class="x3w" bgcolor="#BDD7EE">55803 </TD>
<b><td class="x3w" bgcolor="#BDD7EE">0 </TD></TR>
<tr>
<b><td class="x3w" bgcolor="#BDD7EE">30 </TD>
<b><td class="x3w" bgcolor="#BDD7EE">BL1_CYCLE_CUSTOMERS </TD>
<b><td class="x3w" bgcolor="#BDD7EE">17269 </TD>
<b><td class="x3w" bgcolor="#BDD7EE">17269 </TD>
<b><td class="x3w" bgcolor="#BDD7EE">0 </TD></TR>
<tr>
<b><td class="x3w" bgcolor="#BDD7EE">31 </TD>
<b><td class="x3w" bgcolor="#BDD7EE">BL1_CYC_PAYER_POP </TD>
<b><td class="x3w" bgcolor="#BDD7EE">17269 </TD>
<b><td class="x3w" bgcolor="#BDD7EE">17269 </TD>
<b><td class="x3w" bgcolor="#BDD7EE">0 </TD></TR>
<tr>
<b><td class="x3w" bgcolor="#BDD7EE">32 </TD>
<b><td class="x3w" bgcolor="#BDD7EE">BL1_DOCUMENT </TD>
<b><td class="x3w" bgcolor="#BDD7EE">17269 </TD>
<b><td class="x3w" bgcolor="#BDD7EE">17269 </TD>
<b><td class="x3w" bgcolor="#BDD7EE">0 </TD></TR>
<tr>
<b><td class="x3w" bgcolor="#BDD7EE">33 </TD>
<b><td class="x3w" bgcolor="#BDD7EE">BL1_INVOICE </TD>
<b><td class="x3w" bgcolor="#BDD7EE">17269 </TD>
<b><td class="x3w" bgcolor="#BDD7EE">17269 </TD>
<b><td class="x3w" bgcolor="#BDD7EE">0 </TD></TR>
<tr>
<b><td class="x3w" bgcolor="#BDD7EE">34 </TD>
<b><td class="x3w" bgcolor="#BDD7EE">BL1_INV_CHARGE_REL </TD>
<b><td class="x3w" bgcolor="#BDD7EE">17269 </TD>
<b><td class="x3w" bgcolor="#BDD7EE">17269 </TD>
<b><td class="x3w" bgcolor="#BDD7EE">0 </TD></TR>
<tr>
<b><td class="x3w" bgcolor="#BDD7EE">35 </TD>
<b><td class="x3w" bgcolor="#BDD7EE">BL1_PAY_CHANNEL </TD>
<b><td class="x3w" bgcolor="#BDD7EE">12912 </TD>
<b><td class="x3w" bgcolor="#BDD7EE">12912 </TD>
<b><td class="x3w" bgcolor="#BDD7EE">0 </TD></TR>
<tr>
<b><td class="x3w" bgcolor="#BDD7EE">36 </TD>
<b><td class="x3w" bgcolor="#BDD7EE">BL1_RC_RATES </TD>
<b><td class="x3w" bgcolor="#BDD7EE">30928 </TD>
<b><td class="x3w" bgcolor="#BDD7EE">30928 </TD>
<b><td class="x3w" bgcolor="#BDD7EE">0 </TD></TR>
<tr>
<b><td class="x3w" bgcolor="#BDD7EE">37 </TD>
<b><td class="x3w" bgcolor="#BDD7EE">BL1_TAX </TD>
<b><td class="x3w" bgcolor="#BDD7EE">0 </TD>
<b><td class="x3w" bgcolor="#BDD7EE">0 </TD>
<b><td class="x3w" bgcolor="#BDD7EE">0 </TD></TR>
<tr>
<b><td class="x3w" bgcolor="#BDD7EE">38 </TD>
<b><td class="x3w" bgcolor="#BDD7EE">BL1_TAX_ITEM </TD>
<b><td class="x3w" bgcolor="#BDD7EE">0 </TD>
<b><td class="x3w" bgcolor="#BDD7EE">0 </TD>
<b><td class="x3w" bgcolor="#BDD7EE">0 </TD></TR>
<tr>
<b><td class="x3w" bgcolor="#BDD7EE">39 </TD>
<b><td class="x3w" bgcolor="#BDD7EE">BL9_PROVINCIAL_PCP </TD>
<b><td class="x3w" bgcolor="#BDD7EE">18259 </TD>
<b><td class="x3w" bgcolor="#BDD7EE">18259 </TD>
<b><td class="x3w" bgcolor="#BDD7EE">0 </TD></TR>
<tr>