Variable input to awk script

Hi guys,

I wrote the following function to compare two csv files column by column.
However, sometimes the input needs to be sorted before parsing it to awk.

I can do this by changing the awk arguments, but I would like to make this variable if possible. The below doesn't work since the s_args variable isn't evaluated. Is there any other way to achieve this?

function csvdiff() {
  if [ "$1" == "-s" ] ; then
    local s_args="<(sort $2) <(sort $3)"
    shift
  fi
  
  awk '
    NR==FNR { file1[FNR]=$0; next }
    FNR==(h?h:1) {
      if (!D) {
        char["\t"]=gsub(/\t/, "\t")
        char[","]=gsub(/,/, ",")
        char["|"]=gsub(/\|/, "|")
        char[";"]=gsub(/;/, ";")
        for (c in char) {
          if (char[c] >= max) {
            max=char[c]
            delim=c
          }
        }
        max=0
      }
      else delim=D

      if (h != 0 || h == "") {
        split($0, a, delim)
    
        for (i=1;i<=length(a);i++) {
          header=a
        }
        next
      }
    }
    FNR>(h?h:0) && file1[FNR] {
      split(file1[FNR], a, delim)
      split($0, b, delim)
      for (i=1;i<=length(a);i++) {
        if (a != b)  {
          diff++
          if (col == i) printf "%-1s%s%s\n", a, OFS, b
        }
      }
    }
    END {
      if (FNR != length(file1)) {
        print "error: number of lines do not match: " length(file1), FNR
        exit
      }
      if (!col) {
        if (length(diff)) {
          print "column differences:"
          for (i in diff) {
            print i " " header ": " diff
          }
        }
        else {
          print "no differences found"
        }
      }
    }' OFS="\t" ${s_args-$@}
}

Thanks

You could try with eval . This method should be deployed with care; you should know EXACTLY what you are eval ing as you may end up running unwanted or even malicious code without control of it.

1 Like

Are $2 and $3 supposed to be filenames or space separated parameter lists?

Here is one bash solution using the -v option of printf:

#!/bin/bash
function csvdiff {
   if [ "$1" == "-s" ] ; then
      local s_args
      shift

      printf -v s_args "%s%s" \
          "$(printf "%s\n" $1 | sort | tr '\n' ' ' )" \
          "$(printf "%s\n" $2 | sort | tr '\n' ' ' )"
   fi
   echo "${s_args:-$@}"
}

echo unsorted
csvdiff "7 2 1 8 9" "5 1 3 7 4"
echo sorted
csvdiff -s "7 2 1 8 9" "5 1 3 7 4"

output:

unsorted
7 2 1 8 9 5 1 3 7 4
sorted
1 2 7 8 9 1 3 4 5 7 
1 Like

Thanks guys,

Not sure if it's the best solution, but in the end I chose to create another nested function to run the awk command:

function csvdiff() {
  function _csvdiff() {
    awk '
      ...
      }' OFS="\t" $@
  }
  
  if [ "$1" == "-s" ] ; then
    _csvdiff <(sort "$2") <(sort "$3")
  else
    _csvdiff $@
  fi
  
  unset -f _csvdiff
}

This seems to work fine. Thanks for the input though! Both solutions would have worked, although I'm reading in files and not space separated lists.