Thanks for the reply... Could you please point to any tutorial which explains about the funtionalities used in your command... like array 'a[$1]', 'in a',...
I try to write a script for compare columns issue
if wanted more can customized more further (for exa i can add "-F" (field separator so we can change other chars instead of comma) parameter. in this script field separator (column separator) is comma for your examples)
as far i mean , you want second column compare on files then run the script for compare 2 column.our compare column parameter is 'c'
our command is like below and results..
and we can use the script for another compare examples..
below file has same columns then the script will ask add or not.
and if we use as 2 column for compare process then script
in file1 has only one column with "9" line and script fails for compare non-first columns choice..also file1 has second column with non-number (uncomparable)!! .. and also it has multiple lines then the script ask this issue for add or not these..
# ./justdoit c=2
['file1'] contains non-digit chars or columns!
['file2'] contains non-digit chars or columns!
['file1'] file, ['"11","NOTNUMBER","BBBBBB","CCCCCC"'] line, ['2'] numbered ['NOTNUMBER'] column, has contain non-digit chars!!
sed: -e expression #1, char 31: invalid reference \1 on `s' command's RHS
Check your values.. file ['file1'], line ['"9"'], compare column ['2']
"SED will give error!! And the result is partially false!!!"
sed: -e expression #1, char 31: invalid reference \1 on `s' command's RHS
sed: -e expression #1, char 31: invalid reference \1 on `s' command's RHS
Multiple lines have same number of columns for compare column ['9900000003'] !!
"4","9900000003","AAAAAAA"
"4","9900000003","AAAAAAA"
Multiple lines add to results [y/n]? y
--> Showing compare results..
==================================
"4","9900000003","AAAAAAA"
"4","9900000003","AAAAAAA"
"20","9900000007","1","A","2"
"3","9900000001","1","A","2"
"3","9900000002"
"17","9900000005","1","A","2","CCCCCC"
"13","9900000004","0","A","2"
=================================
of course examples are reproduced.
and maybe happens false results for unforeseeable issues that i forgot variations..
#!/bin/bash
## justdoit##
## SED column comparison ##
maxcolfind()
{
local col='"[^ ]*"'
for file in file1 file2 ; do
maxcol=0 ; z=0 ; newcol=$col
while [[ $(sed -n "/^$newcol/p" $file) ]] ; do
((maxcol++))
newcol="$newcol,$col" ; newcol=$(echo $newcol|sed 's/ ,/,/g')
done
filec[z]=$maxcol ; ((z++))
done
if [[ ${filec[z-1]} -ge ${filec[z]} ]] ; then maxcol=${filec[z-1]} ; else maxcol=${filec[z]} ; fi
}
maxcolfind ""
nondigitsfind()
{
for file in file1 file2 ; do
if [[ $(sed -n '/[^0-9]/p' $file) ]] ; then echo "['$file'] contains non-digit chars or columns!" ; fi
done
}
nondigitsfind ""
maxcoltmp=$maxcol; countcompare=$maxcol; countcomparereset=$maxcol; listresults=$maxcol
compcol=$( echo "$1" | sed "s/c=\([1-$maxcol]\)/\1/" )
col='"[^ ]*"' ;maxfmt=() ; charforrhs='"[A-Za-z0-9][A-Za-z0-9]*"' ; colsed='\"\[A-Za-z0-9]\[A-Za-z0-9]\*\",'
if [ ! $(echo "$compcol" | sed -n "/^[1-$maxcol]$/p") ] ; then
echo "You entered an invalid value for compare column!!
You have max $maxcol --> $col <-- string!!
Usage $0 c=[1-$maxcol] (c--> Compare Which Column)"
exit 1
fi
while [ $(( maxcol -= 1 )) -ge 0 ]
do
maxfmt=(${maxfmt[@]} $charforrhs)
done
fmtarr=( $(echo ${maxfmt[@]} | sed 's/\" /",/g') )
x=0 ; compcolarr[x]=${fmtarr[@]} ; colsedtmp=$colsed
while [ $(( maxcoltmp -= 1 )) -ge 1 ]
do
((x++)) ; compcolarr[x]=$(echo ${fmtarr[@]}|sed "s/$colsed//")
colsed=$colsed$colsedtmp
done
>newout
x=0;count=0 ; first=ok
for j in file1 file2
do
while [ $(( countcompare -= 1 )) -ge 0 ] ;
do
if [[ $(sed -n "/^${compcolarr[x]}$/p" $j) ]] ; then
for i in $(sed "" $j)
do
if [[ $(echo $i|sed -n "/^${compcolarr[x]}$/p") ]] ; then
findcompcol=$(echo ${compcolarr[x]}|sed 's/\[A-Za-z0-9]\[A-Za-z0-9]\*/\\(&\\)/'$compcol' ' )
(echo $i|sed "s/$findcompcol/\1/") 2>&1 >/dev/null ## test columns for compare values according to file content
if [ $? != 0 ] ; then
echo "Check your values.. file ['$j'], line ['$i'], compare column ['$compcol'] "
echo "\"SED will give error!! And the result is partially false!!!\" "
fi
if [[ ! $(echo $i|sed "s/$findcompcol/\1/"|sed -n '/[^0-9]/p') ]] ; then
okline="$i" ; isstring=no
else
falsecompcol=$(echo $i|sed "s/$findcompcol/\1/")
echo "['$j'] file, ['$i'] line, ['$((compcol))'] numbered ['$falsecompcol'] column, has contain non-digit chars!!"
isstring=yes ;
fi
if [ ! $isstring = "yes" ] ; then
comparr=$(echo $okline|sed "s/$findcompcol/\1/" )
isadded=no ;
fi
fi
if [ ! $isadded = "yes" ] && [ ! $isstring = "yes" ] ; then
if [ "$first" = "ok" ] ; then
addedarr=(${addedarr[@]} $comparr ) ; first=notok ; isadded=yes
else
for k in ${addedarr[@]}
do
if [[ $comparr = $k ]] ; then
isadded=yes ;break
fi
done
if [ ! "$isadded" = "yes" ] ; then
addedarr=(${addedarr[@]} $comparr )
fi
fi
fi
done
fi
((x++)) ; ((count++))
done
countcompare=$countcomparereset ;x=0; count=0
done
l=0 ; listreset=$listresults
for i in ${addedarr[@]}
do
for x in "$(sed -n "/$i/p" file1 file2)"
do
while [ $(( listresults -= 1 )) -ge 0 ] ;
do
if [[ $(echo "$x"|sed -n "/^${compcolarr[l]}$/p") ]] ; then
addthis=$(echo "$x"|sed -n "/^${compcolarr[l]}$/p")
if [[ $(echo "$addthis"|sed -n "/$i/p" | sed = | sed -e '$!N;s/\n/ /g' -ne '$s/^\([0-9]\).*/\1/p' ) != 1 ]] ; then
echo "Multiple lines have same number of columns for compare column ['$i'] !!"
echo "$addthis"
read -p "Multiple lines add to results [y/n]? " ch
if [ $ch = 'y' ] ; then
echo "$addthis" >>newout
fi
else
echo "$addthis" >>newout
fi
break
fi
((l++))
done
done
listresults=$listreset ; l=0
done
echo "--> Showing compare results..
==================================
$(more newout)
================================="