Compare two files using shell script

Hi
i want to compare two files and i need the o/p of only difference

here the files
file1

achilles
aedxbepo
aedxbwdm01
aedxbwdm02
albedo
amarice
ambrister
anakin
anton
argon
artephius
asgard
avatar
aymara
bach
baldur
baltar
bancroft
bee
beethoven
beknods1 / yogi
beknoepo
benosds01 / yakky
bittem
bluestreak
blume
bobientje
boemel
bolder
boobie
boomer
bragi
brockman
brown
burton
caillou
casper
chalmers
checkov
chopin
cibinensis
citrinitas
cliffjumper
coen
coot
copernicus
coppola
dandy
defraitdsrv01 / s3 fra
defraitdsrv02 / s3 fea backup
defrargdb
dido
django
dodi
dqmkt
dualla
duck
egret
eira
elodie
emile
epius
esbcns3srv01
esmcvs3svr01
fattony
fedexukftp
filiberke
filiberke
fortran
fozzie
freyr
frisco
frusi
fulmar
galactica
galileo
galileo
galvatron
gawin
geber
gonzo
grutte
gurani
hector
hecuba
heimdall
helium
hermod
hildorf
hod
honir
horst
horton
hotherus
iene
inbomwdm01
inca
itmhqlnxapp02
itmhqlnxapp03
itmhqlnxapp04
itmhqlnxapw01
itmhqlnxcedi1
itmhqlnxcedi2
itmhqlnxdim01
itmhqlnxdim02
itmhqlnxhblu
itmhqlnxs303
itmxplnxapw02
itmxplnxs302
itreksjed
jackson
janssen
janssen
jazz
jommeke
jord
kent
kir00097
krypton
kubrick
kwak
lalo
larousse
lessley
louise / pngemea04
lovejoy
lucas
mallard
manush
mcclure
mccoy
mercury.emea.fedex.com
miene
milhouse
mimir
ming
moleman
molly
morton
mozart
mungalia
murdo
mutte
nabucco
natch
navi
nessa
nigredo
nimbus
nleinwww1 hp
nowa
octaaf
odilon
oran
osthanes
perceptor
petrel
phantom
pinkas
pintail
pngemea01
pngemea02
porter
priam
prowl
pryscilla
puffin
pumyra
rembrandt
roslin
rubedo
ruby
s3primary
s3secondary
sansiro
scotty
seagull
sharita
shrek
sidious
skjold
skua
snoid
solero
song
spielberg
spo00004
sunstreak
suzy
synesius
talon
tancredus
tang
tarantino
thetis
tidiamh
tolkien
tolkien
trypticon
tupi
twister
tyr
uhura
verdi
vidar
wagner
wheeljack
wigeon
windcharger
xenon
zichen
zolder
zosimos
zuse
zuse
zussen

and Files2 as follows

         achilles
             aedxbwdm01
             albedo
             amarice
             ambrister
             anakin
             anton
             asgard
             avatar
             aymara
             bach
             baldur
             baltar
             bancroft
             bee
             beethoven
             beknoepo
             bluestreak
             blume
             bobientje
             bolder
             boobie
             boomer
             bragi
             brockman
             brown
             burton
             caillou
             chalmers
             chopin
             cibinensis
             citrinitas
             cliffjumper
             coen
             coot
             copernicus
             coppola
             dandy
             defrargdb
             denebola
             dido
             django
             dodi
             dqmkt
             droes
             dualla
             duck
             egret
             eira
             emeads
             emile
             epius
             esbcns3svr01
             expo
             fattony
             fedexukftp
             fergus
             filiberke
             fortran
             fozzie
             freyr
             frisco
             frusi
             galactica
             galileo
             galvatron
             gawin
             geber
             gonzo
             gridhouse
             gridman
             grutte
             hector
             hecuba
             heimdall
             hermod
             hibbert
             hildorf
             hod
             honir
             horst
             horton
             huffer
             iene
             inca
             itmhqlnxapw01
             itmhqlnxcedi1
             itmhqlnxcedi2
             itmhqlnxhblu01
             itmhqlnxs303
             itmxplnxapw02
             itmxplnxs302
             itreksjed
             jackson
             janssen
             jazz
             jommeke
             jord
             kent
             kubrick
             lalo
             larousse
             lessley
             lorna
             louise
             lovejoy
             lucas
             manush
             mcclure
             mercury
             miene
             milou
             mimir
             ming
             moleman
             molly
             momfer
             morton
             mozart
             mungalia
             murdo
             mutte
             nabucco
             navi
             nessa
             nigredo
             nimbus
             octaaf
             oculus
             oran
             orion
             osthanes.eema.fedex.com
             paracelsus
             perceptor
             phantom
             pinkas
             porter
             priam
             propus
             prowl
             pryscilla
             pumyra
             raket
             rembrandt
             roslin
             rubedo
             sharita
             shrek
             sidious
             skjold
             solero
             spielberg
             sunstreak
             suzy
             synesius
             talon
             tancredus
             tarantino
             thelma
             thetis
             tidiamh
             trypticon
             tupi
             twister
             tyr
             vidar
             wagner
             wheeljack
             windcharger
             zeus
             zichen
             zoef
             zolder
             zosimos
             zussen
        

Kindlu let me know the script for find the difference of the above two files ....

diff file1 file2

i know this but i want only the difference if the both file having same line i dont want the o/p

awk 'NR==FNR{A[$1]=$1;next}$1=$1{if($1!=A[$1])print}' f1 f2

Shorter:

awk 'NR==FNR{A[$1]=$1;next}{$1=$1}$1!=A[$1]' f1 f2

As the files are sorted try reading up on the unix "comm" command. I assume that the second file doesn't have those leading space characters which we see in the post.

Venikathir,
Try with below code

rm -f  newfile.txt
while read line
do
  line1=`grep -ie "${line}" 2.txt `
  if [ $? -ne 0 ] ; then
    echo "$line" >> newfile.txt
  fi
done <1.txt

Cheers
Harish

tukuyomi

thnx for the help it works fine for me

you can use sdiff command also

hi All ,
again i faced a issue to find
its case sensitive
how its avoid that

Not tested, but try this:

awk 'NR==FNR{A[$1]=tolower($1);next}{$1=$1}tolower($1)!=A[$1]' f1 f2

You can maybe try a script, its more funny :b:

# cat file1
test test22
xxtest TTestT TESt
# cat file2
TeST 34TesTesT
wwwis ttesSt TEESSSTT Test
# ./compare file1 file2 -c=1
FS parameter is missing! FS is setting the -> 'space'
Will be comparing with case sensitive [y/n]? y
====================================================================
Comparing process will be with ''' case sensitive ''' !OK
 
                'file1' && 'file2' Compare Process is Completed!!
     ====================================================================
     ********* Results for 1. column to for file1 file **********
     ====================================================================
Line(s)         "Differences"
2 >>            xxtest
     ====================================================================
Line(s)         "Duplicates"
1 <<            test
     ************************ End of Results ****************************
 

lets look the another..

# cat file5
55li///ddccli///aabbli
xxli///zxxli///////xxli
aali//////DdLi/////////
wwli///ggli////fli
nrli//////sssli
# cat file6
xxli/////////////////zxxli///////////ttli
yyli/////////aali/////bbli
ccali
zzzli//////DDli
//////SSSli
# ./compare -F'//' file5 file6 -c=5
Will be comparing with case sensitive [y/n]? n
FS -> '//' maybe cause occur false results!!
FS cannot be found file 'file6' -> 3.line !!
FS checking will not continue for remaining lines.
====================================================================
Comparing process will be with ''' non case sensitive ''' !OK
 
                'file5' && 'file6' Compare Process is Completed!!
     ====================================================================
     ********* Results for 5. column to for file5 file **********
     ====================================================================
Line(s)         "Differences"
2 >>            /xxli
     ====================================================================
Line(s)         "Duplicates"
No Duplicates Found!!
     ************************ End of Results ****************************
 
# ./compare -F'//' file5 file6 -c=8
Will be comparing with case sensitive [y/n]? n
FS -> '//' maybe cause occur false results!!
FS cannot be found file 'file6' -> 3.line !!
FS checking will not continue for remaining lines.
====================================================================
Comparing process will be with ''' non case sensitive ''' !OK
 
                'file5' && 'file6' Compare Process is Completed!!
     ====================================================================
     ********* Results for 8. column to for file5 file **********
     ====================================================================
Line(s)         "Differences"
3 >>            /
     ====================================================================
Line(s)         "Duplicates"
No Duplicates Found!!
     ************************ End of Results ****************************
 
# ./compare -F'//' file5 file6 -c=4
Will be comparing with case sensitive [y/n]? y
FS -> '//' maybe cause occur false results!!
FS cannot be found file 'file6' -> 3.line !!
FS checking will not continue for remaining lines.
====================================================================
Comparing process will be with ''' case sensitive ''' !OK
 
                'file5' && 'file6' Compare Process is Completed!!
     ====================================================================
     ********* Results for 4. column to for file5 file **********
     ====================================================================
Line(s)         "Differences"
4 >>            fli
     ====================================================================
Line(s)         "Duplicates"
3 <<            DdLi
5 <<            sssli
     ************************ End of Results ****************************
 

and the code :wink:

#!/bin/bash
## justdoit ##
## SED column comparison v2 @unix.com ##
## Probably it has some bugs!! ##
## Please notifiy me for errors and negative issues
## or wanted additional features ##  @ygemici ##
 
exitt() {
if [ -z $maxcol ] ; then maxcolor='?' ; else maxcolor=$maxcol ; fi
 echo "Usage e.g -> '$0 -F='\' file1 file2 -c=[1-$maxcolor]' (c-> Compare Column) "
 echo "Usage e.g -> '$0 file1 file2 -c=[1-$maxcolor]' (FS is automatically synchronized to a 'space') "
exit 1
}
 
casematch() {
read -p "Will be comparing with case sensitive [y/n]? " cs
case $cs in
y|ye|yes)eee=1;ee="Comparing process will be with \'\'\' case sensitive \'\'\' !OK ";;
*)eee=0;ee="Comparing process will be with \'\'\' non case sensitive \'\'\' !OK ";;
esac
}
 
fscontrol() {
if [ $1 -ne 3 ] && [ $1 -ne 4 ] ; then
exitt
fi
}
 
fsset() {
FS=$(echo "$FS"|sed 's/[\/]/\\&/g')
}
 
fstest() {
for file in $file1 $file2
do
 if [[ -z $(sed -n "/$FS/p" $file) ]]
 then echo "FS(field separator) -> '$FSX' cannot be found in $file file!!"
 exit=ok
 fi
done
if [ "$exit" = "ok" ] ; then echo;exitt ; exit 1 ; fi
}
 
fschk() {
for file in ${file1}numtmp ${file2}numtmp
do
 lmaxx=$(sed -n '$=' $file )
 for((i=1;i<=lmaxx;i++))
  do
   sed -n "$i p" $file > ${file}chk
   chk=$(sed -n "/^[0-9][0-9]* $col$FS/p" ${file}chk)
   if [ -z "$chk" ] ; then
    echo "FS -> '$FSX' maybe cause occur false results!!"
    echo "FS cannot be found file '${file%%numtmp}' -> $i.line !!"
    echo -e "FS checking will not continue for remaining lines.\n"
    break
   fi
  done
done
}
 
maxcolfind() {
ix=0
for file in $file1 $file2
do
 y=1
for((i=1;i<=$(sed -n '$=' $file);i++)) ; do
 x=$(sed -n "$i p" $file|sed "s/$FS/\n/g"|sed -n '$=')
 if [ $x -gt $y ] ; then maxcol[ix]=$x
 fi
 y=${maxcol[ix]}
 done
ix=1
done
if [ ${maxcol[ix-1]} -gt ${maxcol[ix]} ]
 then maxcol=${maxcol[ix-1]};xxfile=$file1
 else maxcol=${maxcol[ix]};xxfile=$file2
fi
}
 
addlnums() {
for file in $file1 $file2 ; do
 sed '=' $file | sed -n 'N;s/\n/ /;/^[0-9][0-9]* $/d;p' >${file}numtmp
done
while [[ $(sed -n "/$sedundef/p" ${file1}numtmp ${file2}numtmp) ]] ; do
 ((u++));sedundef="X_undef_X_${u}"
done
while [[ $(sed -n "/$sedsp/p" ${file1}numtmp ${file2}numtmp) ]] ; do
 ((v++));sedsp="X_sp_X_${v}"
done
}
 
compcoltst() {
if [[ ! $(echo "$compcol"|sed -n "/^[1-$1]$/p") ]]
 then echo "You entered an invalid value for compare column!!"
 echo "You have max $2 columns in '$xxfile' !!";echo;exitt
fi
}
 
col='[^ ]*'
c=$#
fscontrol $c
 
ftest() {
if [[ ! $(echo "$exptest"|sed -n '/-*c=*/p') ]] ; then
 echo -e "Compare column is missing!!\n";exitt
elif [ ! $(echo "$compt"|sed -n '/-*c=*/p') ] ; then
 echo "Compare column is must be specified as the last parameter!!"
 exitt
fi
fft=$(echo "$exptest"|sed -n '/-*c=*\([(1-9]\)/p')
if [ -z "$fft" ] ; then
 echo "Compare column number is unspecified!!"
 exitt
fi
if [ ! -f $1 ] ; then
 echo "'$1' file is not a regular file";ex=1
fi
if [ ! -f $2 ] ; then
 echo "'$2' file is not a regular file";ex=1
fi
if [[ $ex -eq 1 ]] ; then
 exitt
fi
}
 
case $c in
3)exptest=$@;compt=$(eval echo "\$$#");ftest $1 $2 "";file1=$1;file2=$2;FS=" "
echo "FS parameter is missing! FS is setting the -> 'space' "
FSX="$FS";fstest;maxcolfind
if [ $maxcol -gt 9 ];then maxcolx=9;
else maxcolx=$maxcol;fi
compcol=$(echo "$3"|sed "s/-*c=*\\([1-$maxcolx]\\)/\\1/")
compcoltst $maxcolx $maxcol;sedsp=X_sp_X;sedundef="X_undef_X";;
4)exptest=$@;compt=$(eval echo "\$$#");ftest $2 $3 "y";file1=$2;file2=$3;FS=$(echo "$1"|sed 's/-*F=*//')
if [ "$FS" = "" ] ; then
echo -e "FS is undefined!!\nFS setting the -> 'space' \n";FS=" ";fi
FSX="$FS";fsset;fstest;maxcolfind;
if [ $maxcol -gt 9 ];then maxcolx=9
else maxcolx=$maxcol;fi
compcol=$(echo "$4"|sed "s/-*c=*\\([1-$maxcolx]\\)/\\1/")
compcoltst $maxcolx $maxcol;sedsp=X_sp_X;sedundef="X_undef_X";;
esac
 
casematch;addlnums;fschk
 
calculate() {
k=0;colx=$1
undefinedar=();removel=();removefull=()
charforrhs="$col"
undefinedar[k]="${FS}$sedundef"
fsgroup[k]="$FS"
while [ $(( colx -= 1 )) -ge 1 ] ; do
((k++))
undefinedar[k]="${undefinedar[k-1]}${undefinedar[0]}"
fsgroup[k]="${fsgroup[k-1]}.*${fsgroup[0]}"
done
FSR=$(echo "$FS"|sed 's/[\/]/\\&/g')
sedforrhs=$(echo ".*${fsgroup[maxcol-1]}"|sed "s/$FSR$//;s|\.\*|\\\(&\\\)|$compcol")
}
 
calculate $maxcol
 
createlst() {
file=$1;x=0
if [ $(echo "$FSX"|sed -n '/^[\]*$/p') ] ; then
a='\\'
elif [ $(echo "$FSX"|sed -n '/^[/]*$/p') ] ; then
a='\/'
fi
if [ "$a" != "" ] ; then
while [[ $(sed -n '/'$FS''$a'/p' ${file}numtmp) ]] ; do
sed 's/\('$FS'\)\('$a'\+\)/\1'$sedundef'\2/g ' ${file}numtmp>${file}lsttmp
mv -f ${file}lsttmp ${file}numtmp
done
 
sed "s/$FS\$//" ${file}numtmp>${file}lsttmp
mv -f ${file}lsttmp ${file}numtmp
fi
 
>${file}lst
c=$(sed -n '$=' ${file}numtmp)
for((i=1;i<=c;i++));do
x=$(sed -n "$i p" ${file}numtmp|sed "s/^[0-9]* //;s/$FS/\n/g"|sed -n '$=')
if [ $x -eq $maxcol ] ; then
sed -n "$i s/.*/&/p" ${file}numtmp >>${file}lst
elif [ $x -lt $maxcol ] ; then
sed -n "$i s/.*/&${undefinedar[maxcol-x-1]}/p" ${file}numtmp >>${file}lst
fi
done
}
 
for file in $file1 $file2 ; do
createlst $file
done
 
process() {
ff=($(sed "s/^\([0-9][0-9]*\) $sedforrhs/\1$sedsp \2/" ${file1}lst))
fff=($(sed "s/^\([0-9][0-9]*\) $sedforrhs/\1$sedsp \2/" ${file2}lst))
re=0;a=0
>${file}lsttmp
echo "${ff[@]}"|sed 's/\([0-9][0-9]*'$sedsp'\) /\n\1/g;s/'$sedsp'/ /g;'|sed '/^$/d'|
 while read -r cc ; do
  echo "$cc">>${file}lsttmp
  ((a++))
 done
 while read -r refv ; do
  if [ "$(echo "${refv#* }")" != "$sedundef" ] ; then
  reff[re]="$refv";((re++))
  fi
 done<${file}lsttmp
re=0;a=0
>${file}lsttmp
echo "${fff[@]}"|sed 's/\([0-9][0-9]*'$sedsp'\) /\n\1/g;s/'$sedsp'/ /g;'|sed '/^$/d'|
 while read -r cc ; do
  echo "$cc">>${file}lsttmp
  ((a++))
 done
 while read -r reffv ; do
  if [[ $(echo "${reffv#* }") != "$sedundef" ]] ; then
  refff[re]="$reffv";((re++))
  fi
 done<${file}lsttmp
for((x=0;x<${#reff[@]};x++))
do
  isnot=0;i="${reff[x]}"
 for((y=0;y<${#refff[@]};y++))
 do
    k="${refff[y]}"
   if [ $eee -eq 1 ] ; then
    kk=$(echo "${k#* }"|sed 's/[\/]/\\&/g')
    if [[ $(echo "${i#* }"|sed -n "/^$kk$/Ip") ]] ; then
     dupp=("${dupp[@]}" "$(echo "$i"|sed 's/'$sedundef'//')")
    k="${refff[y]}"
   if [ $eee -eq 1 ] ; then
    kk=$(echo "${k#* }"|sed 's/[\/]/\\&/g')
    if [[ $(echo "${i#* }"|sed -n "/^$kk$/Ip") ]] ; then
     dupp=("${dupp[@]}" "$(echo "$i"|sed 's/'$sedundef'//')")
      break
    else
      ((isnot++))
    fi
   else
    if [[ "${i#* }" = "${k#* }" ]] ; then
      dupp=("${dupp[@]}" "$(echo "$i"|sed 's/'$sedundef'//')")
      break
    else
      ((isnot++))
    fi
   fi
 done
  if [ ${#refff[@]} -eq $isnot ] ; then
     diff=("${diff[@]}" "$(echo "$i"|sed 's/'$sedundef'//')")
  fi
done
}
 
write() {
>${file}_dupp
if [ "$dupp" = "" ]
  then lastdup="No Duplicates Found!!"
  else for((i=0;i<=${#dupp[@]};i++));do
  echo "${dupp}">>${file}_dupp;done
  lastdup=$(echo "$(<${file}_dupp)"|sed 's/ / <<\t\t/')
fi
>${file}_diff
if [ "$diff" = "" ]
  then lastdif="No Differences Found!!"
  else for((i=0;i<=${#diff[@]};i++));do
  echo "${diff}">>${file}_diff;done
  lastdif=$(echo "$(<${file}_diff)"|sed 's/ / >>\t\t/')
fi
if [ "$dupp" = "" ] && [ "$diff" = "" ]
  then lastdif="\t $compcol. column is NULL!!"
         lastdup="\t $compcol. column is NULL!!"
fi
echo "===================================================================="
eval echo $ee
echo -e "\n\
                '$file1' && '$file2' Compare Process is Completed!!
     ====================================================================\n\
     ********* Results for $compcol. column to for $file1 file **********\n\
     ====================================================================\n\
Line(s)    \t\"Differences\" \n$lastdif\n\
     ====================================================================\n\
Line(s)    \t\"Duplicates\"  \n$lastdup\n\
     ************************ End of Results ****************************\n"
}
 
remtmp() {
for file in $file1 $file2 ; do
rm -f ${file}{lst,lsttmp,_dupp,_diff,numtmp,numtmpchk}
done
}
 
process;write;remtmp
 

regards
ygemici