Hi guys,
I wrote the following function to compare two csv files column by column.
However, sometimes the input needs to be sorted before parsing it to awk.
I can do this by changing the awk arguments, but I would like to make this variable if possible. The below doesn't work since the s_args variable isn't evaluated. Is there any other way to achieve this?
function csvdiff() {
if [ "$1" == "-s" ] ; then
local s_args="<(sort $2) <(sort $3)"
shift
fi
awk '
NR==FNR { file1[FNR]=$0; next }
FNR==(h?h:1) {
if (!D) {
char["\t"]=gsub(/\t/, "\t")
char[","]=gsub(/,/, ",")
char["|"]=gsub(/\|/, "|")
char[";"]=gsub(/;/, ";")
for (c in char) {
if (char[c] >= max) {
max=char[c]
delim=c
}
}
max=0
}
else delim=D
if (h != 0 || h == "") {
split($0, a, delim)
for (i=1;i<=length(a);i++) {
header=a
}
next
}
}
FNR>(h?h:0) && file1[FNR] {
split(file1[FNR], a, delim)
split($0, b, delim)
for (i=1;i<=length(a);i++) {
if (a != b) {
diff++
if (col == i) printf "%-1s%s%s\n", a, OFS, b
}
}
}
END {
if (FNR != length(file1)) {
print "error: number of lines do not match: " length(file1), FNR
exit
}
if (!col) {
if (length(diff)) {
print "column differences:"
for (i in diff) {
print i " " header ": " diff
}
}
else {
print "no differences found"
}
}
}' OFS="\t" ${s_args-$@}
}
Thanks