As the files are sorted try reading up on the unix "comm" command. I assume that the second file doesn't have those leading space characters which we see in the post.
# cat file2
TeST 34TesTesT
wwwis ttesSt TEESSSTT Test
# ./compare file1 file2 -c=1
FS parameter is missing! FS is setting the -> 'space'
Will be comparing with case sensitive [y/n]? y
====================================================================
Comparing process will be with ''' case sensitive ''' !OK
'file1' && 'file2' Compare Process is Completed!!
====================================================================
********* Results for 1. column to for file1 file **********
====================================================================
Line(s) "Differences"
2 >> xxtest
====================================================================
Line(s) "Duplicates"
1 << test
************************ End of Results ****************************
# ./compare -F'//' file5 file6 -c=5
Will be comparing with case sensitive [y/n]? n
FS -> '//' maybe cause occur false results!!
FS cannot be found file 'file6' -> 3.line !!
FS checking will not continue for remaining lines.
====================================================================
Comparing process will be with ''' non case sensitive ''' !OK
'file5' && 'file6' Compare Process is Completed!!
====================================================================
********* Results for 5. column to for file5 file **********
====================================================================
Line(s) "Differences"
2 >> /xxli
====================================================================
Line(s) "Duplicates"
No Duplicates Found!!
************************ End of Results ****************************
# ./compare -F'//' file5 file6 -c=8
Will be comparing with case sensitive [y/n]? n
FS -> '//' maybe cause occur false results!!
FS cannot be found file 'file6' -> 3.line !!
FS checking will not continue for remaining lines.
====================================================================
Comparing process will be with ''' non case sensitive ''' !OK
'file5' && 'file6' Compare Process is Completed!!
====================================================================
********* Results for 8. column to for file5 file **********
====================================================================
Line(s) "Differences"
3 >> /
====================================================================
Line(s) "Duplicates"
No Duplicates Found!!
************************ End of Results ****************************
# ./compare -F'//' file5 file6 -c=4
Will be comparing with case sensitive [y/n]? y
FS -> '//' maybe cause occur false results!!
FS cannot be found file 'file6' -> 3.line !!
FS checking will not continue for remaining lines.
====================================================================
Comparing process will be with ''' case sensitive ''' !OK
'file5' && 'file6' Compare Process is Completed!!
====================================================================
********* Results for 4. column to for file5 file **********
====================================================================
Line(s) "Differences"
4 >> fli
====================================================================
Line(s) "Duplicates"
3 << DdLi
5 << sssli
************************ End of Results ****************************
and the code
#!/bin/bash
## justdoit ##
## SED column comparison v2 @unix.com ##
## Probably it has some bugs!! ##
## Please notifiy me for errors and negative issues
## or wanted additional features ## @ygemici ##
exitt() {
if [ -z $maxcol ] ; then maxcolor='?' ; else maxcolor=$maxcol ; fi
echo "Usage e.g -> '$0 -F='\' file1 file2 -c=[1-$maxcolor]' (c-> Compare Column) "
echo "Usage e.g -> '$0 file1 file2 -c=[1-$maxcolor]' (FS is automatically synchronized to a 'space') "
exit 1
}
casematch() {
read -p "Will be comparing with case sensitive [y/n]? " cs
case $cs in
y|ye|yes)eee=1;ee="Comparing process will be with \'\'\' case sensitive \'\'\' !OK ";;
*)eee=0;ee="Comparing process will be with \'\'\' non case sensitive \'\'\' !OK ";;
esac
}
fscontrol() {
if [ $1 -ne 3 ] && [ $1 -ne 4 ] ; then
exitt
fi
}
fsset() {
FS=$(echo "$FS"|sed 's/[\/]/\\&/g')
}
fstest() {
for file in $file1 $file2
do
if [[ -z $(sed -n "/$FS/p" $file) ]]
then echo "FS(field separator) -> '$FSX' cannot be found in $file file!!"
exit=ok
fi
done
if [ "$exit" = "ok" ] ; then echo;exitt ; exit 1 ; fi
}
fschk() {
for file in ${file1}numtmp ${file2}numtmp
do
lmaxx=$(sed -n '$=' $file )
for((i=1;i<=lmaxx;i++))
do
sed -n "$i p" $file > ${file}chk
chk=$(sed -n "/^[0-9][0-9]* $col$FS/p" ${file}chk)
if [ -z "$chk" ] ; then
echo "FS -> '$FSX' maybe cause occur false results!!"
echo "FS cannot be found file '${file%%numtmp}' -> $i.line !!"
echo -e "FS checking will not continue for remaining lines.\n"
break
fi
done
done
}
maxcolfind() {
ix=0
for file in $file1 $file2
do
y=1
for((i=1;i<=$(sed -n '$=' $file);i++)) ; do
x=$(sed -n "$i p" $file|sed "s/$FS/\n/g"|sed -n '$=')
if [ $x -gt $y ] ; then maxcol[ix]=$x
fi
y=${maxcol[ix]}
done
ix=1
done
if [ ${maxcol[ix-1]} -gt ${maxcol[ix]} ]
then maxcol=${maxcol[ix-1]};xxfile=$file1
else maxcol=${maxcol[ix]};xxfile=$file2
fi
}
addlnums() {
for file in $file1 $file2 ; do
sed '=' $file | sed -n 'N;s/\n/ /;/^[0-9][0-9]* $/d;p' >${file}numtmp
done
while [[ $(sed -n "/$sedundef/p" ${file1}numtmp ${file2}numtmp) ]] ; do
((u++));sedundef="X_undef_X_${u}"
done
while [[ $(sed -n "/$sedsp/p" ${file1}numtmp ${file2}numtmp) ]] ; do
((v++));sedsp="X_sp_X_${v}"
done
}
compcoltst() {
if [[ ! $(echo "$compcol"|sed -n "/^[1-$1]$/p") ]]
then echo "You entered an invalid value for compare column!!"
echo "You have max $2 columns in '$xxfile' !!";echo;exitt
fi
}
col='[^ ]*'
c=$#
fscontrol $c
ftest() {
if [[ ! $(echo "$exptest"|sed -n '/-*c=*/p') ]] ; then
echo -e "Compare column is missing!!\n";exitt
elif [ ! $(echo "$compt"|sed -n '/-*c=*/p') ] ; then
echo "Compare column is must be specified as the last parameter!!"
exitt
fi
fft=$(echo "$exptest"|sed -n '/-*c=*\([(1-9]\)/p')
if [ -z "$fft" ] ; then
echo "Compare column number is unspecified!!"
exitt
fi
if [ ! -f $1 ] ; then
echo "'$1' file is not a regular file";ex=1
fi
if [ ! -f $2 ] ; then
echo "'$2' file is not a regular file";ex=1
fi
if [[ $ex -eq 1 ]] ; then
exitt
fi
}
case $c in
3)exptest=$@;compt=$(eval echo "\$$#");ftest $1 $2 "";file1=$1;file2=$2;FS=" "
echo "FS parameter is missing! FS is setting the -> 'space' "
FSX="$FS";fstest;maxcolfind
if [ $maxcol -gt 9 ];then maxcolx=9;
else maxcolx=$maxcol;fi
compcol=$(echo "$3"|sed "s/-*c=*\\([1-$maxcolx]\\)/\\1/")
compcoltst $maxcolx $maxcol;sedsp=X_sp_X;sedundef="X_undef_X";;
4)exptest=$@;compt=$(eval echo "\$$#");ftest $2 $3 "y";file1=$2;file2=$3;FS=$(echo "$1"|sed 's/-*F=*//')
if [ "$FS" = "" ] ; then
echo -e "FS is undefined!!\nFS setting the -> 'space' \n";FS=" ";fi
FSX="$FS";fsset;fstest;maxcolfind;
if [ $maxcol -gt 9 ];then maxcolx=9
else maxcolx=$maxcol;fi
compcol=$(echo "$4"|sed "s/-*c=*\\([1-$maxcolx]\\)/\\1/")
compcoltst $maxcolx $maxcol;sedsp=X_sp_X;sedundef="X_undef_X";;
esac
casematch;addlnums;fschk
calculate() {
k=0;colx=$1
undefinedar=();removel=();removefull=()
charforrhs="$col"
undefinedar[k]="${FS}$sedundef"
fsgroup[k]="$FS"
while [ $(( colx -= 1 )) -ge 1 ] ; do
((k++))
undefinedar[k]="${undefinedar[k-1]}${undefinedar[0]}"
fsgroup[k]="${fsgroup[k-1]}.*${fsgroup[0]}"
done
FSR=$(echo "$FS"|sed 's/[\/]/\\&/g')
sedforrhs=$(echo ".*${fsgroup[maxcol-1]}"|sed "s/$FSR$//;s|\.\*|\\\(&\\\)|$compcol")
}
calculate $maxcol
createlst() {
file=$1;x=0
if [ $(echo "$FSX"|sed -n '/^[\]*$/p') ] ; then
a='\\'
elif [ $(echo "$FSX"|sed -n '/^[/]*$/p') ] ; then
a='\/'
fi
if [ "$a" != "" ] ; then
while [[ $(sed -n '/'$FS''$a'/p' ${file}numtmp) ]] ; do
sed 's/\('$FS'\)\('$a'\+\)/\1'$sedundef'\2/g ' ${file}numtmp>${file}lsttmp
mv -f ${file}lsttmp ${file}numtmp
done
sed "s/$FS\$//" ${file}numtmp>${file}lsttmp
mv -f ${file}lsttmp ${file}numtmp
fi
>${file}lst
c=$(sed -n '$=' ${file}numtmp)
for((i=1;i<=c;i++));do
x=$(sed -n "$i p" ${file}numtmp|sed "s/^[0-9]* //;s/$FS/\n/g"|sed -n '$=')
if [ $x -eq $maxcol ] ; then
sed -n "$i s/.*/&/p" ${file}numtmp >>${file}lst
elif [ $x -lt $maxcol ] ; then
sed -n "$i s/.*/&${undefinedar[maxcol-x-1]}/p" ${file}numtmp >>${file}lst
fi
done
}
for file in $file1 $file2 ; do
createlst $file
done
process() {
ff=($(sed "s/^\([0-9][0-9]*\) $sedforrhs/\1$sedsp \2/" ${file1}lst))
fff=($(sed "s/^\([0-9][0-9]*\) $sedforrhs/\1$sedsp \2/" ${file2}lst))
re=0;a=0
>${file}lsttmp
echo "${ff[@]}"|sed 's/\([0-9][0-9]*'$sedsp'\) /\n\1/g;s/'$sedsp'/ /g;'|sed '/^$/d'|
while read -r cc ; do
echo "$cc">>${file}lsttmp
((a++))
done
while read -r refv ; do
if [ "$(echo "${refv#* }")" != "$sedundef" ] ; then
reff[re]="$refv";((re++))
fi
done<${file}lsttmp
re=0;a=0
>${file}lsttmp
echo "${fff[@]}"|sed 's/\([0-9][0-9]*'$sedsp'\) /\n\1/g;s/'$sedsp'/ /g;'|sed '/^$/d'|
while read -r cc ; do
echo "$cc">>${file}lsttmp
((a++))
done
while read -r reffv ; do
if [[ $(echo "${reffv#* }") != "$sedundef" ]] ; then
refff[re]="$reffv";((re++))
fi
done<${file}lsttmp
for((x=0;x<${#reff[@]};x++))
do
isnot=0;i="${reff[x]}"
for((y=0;y<${#refff[@]};y++))
do
k="${refff[y]}"
if [ $eee -eq 1 ] ; then
kk=$(echo "${k#* }"|sed 's/[\/]/\\&/g')
if [[ $(echo "${i#* }"|sed -n "/^$kk$/Ip") ]] ; then
dupp=("${dupp[@]}" "$(echo "$i"|sed 's/'$sedundef'//')")
k="${refff[y]}"
if [ $eee -eq 1 ] ; then
kk=$(echo "${k#* }"|sed 's/[\/]/\\&/g')
if [[ $(echo "${i#* }"|sed -n "/^$kk$/Ip") ]] ; then
dupp=("${dupp[@]}" "$(echo "$i"|sed 's/'$sedundef'//')")
break
else
((isnot++))
fi
else
if [[ "${i#* }" = "${k#* }" ]] ; then
dupp=("${dupp[@]}" "$(echo "$i"|sed 's/'$sedundef'//')")
break
else
((isnot++))
fi
fi
done
if [ ${#refff[@]} -eq $isnot ] ; then
diff=("${diff[@]}" "$(echo "$i"|sed 's/'$sedundef'//')")
fi
done
}
write() {
>${file}_dupp
if [ "$dupp" = "" ]
then lastdup="No Duplicates Found!!"
else for((i=0;i<=${#dupp[@]};i++));do
echo "${dupp}">>${file}_dupp;done
lastdup=$(echo "$(<${file}_dupp)"|sed 's/ / <<\t\t/')
fi
>${file}_diff
if [ "$diff" = "" ]
then lastdif="No Differences Found!!"
else for((i=0;i<=${#diff[@]};i++));do
echo "${diff}">>${file}_diff;done
lastdif=$(echo "$(<${file}_diff)"|sed 's/ / >>\t\t/')
fi
if [ "$dupp" = "" ] && [ "$diff" = "" ]
then lastdif="\t $compcol. column is NULL!!"
lastdup="\t $compcol. column is NULL!!"
fi
echo "===================================================================="
eval echo $ee
echo -e "\n\
'$file1' && '$file2' Compare Process is Completed!!
====================================================================\n\
********* Results for $compcol. column to for $file1 file **********\n\
====================================================================\n\
Line(s) \t\"Differences\" \n$lastdif\n\
====================================================================\n\
Line(s) \t\"Duplicates\" \n$lastdup\n\
************************ End of Results ****************************\n"
}
remtmp() {
for file in $file1 $file2 ; do
rm -f ${file}{lst,lsttmp,_dupp,_diff,numtmp,numtmpchk}
done
}
process;write;remtmp