awk -v wrk1=$workfile1ns -v wrk2x=$workfile2x -v wrk4=$workfile4 -v wrk5=$workfile5 -v planf=$workfile3 '
# put all plans in an array
FILENAME==planf { FS=","
ua1_pl[$1]=$2
print "### 40a ## Set plan "$1" to "$2
}
# put all jobs in an array
FILENAME==wrk2x { FS=","
j_num[$1]=$2
j_yr[$1]=$3
j_mon[$1]=$4
j_month[$1]=$5
print "### 40b ## Set job "$1"="$2" to month/year "$4"/"$3
}
# put all the data together
FILENAME==wrk1 { FS=","
# check on plan data
if ( ua1_pl[$1] >"")
{ w_plan=ua1_pl[$1]
} else
{ w_plan="***UA1 CODE UNDEFINED ["$1"]"
# print "### 40c ## Undefined UA1 code "$1","$2
}
# check on job number data
# print $1,$2,j_num[$2],j_yr[$2]
if ( j_num[$2] > "")
{ w_jnum=j_num[$2] ; w_jyr=j_yr[$2]
w_jmon=j_mon[$2]
}
# write out all the variables
print $1","$2","w_plan","w_jnum","w_jyr","w_jmon > wrk4
print ","w_jnum","w_jyr","w_jmon","w_plan > wrk5
}' "$workfile3" "$workfile2x" "$workfile1ns"
I have to read through over a million records, and match up against a couple of smaller files. Thus, my thought to load the two smaller files into arrays and then read through the entire third file. I do array lookups on each record of the third file; thus creating one file containing all the important details.
In the example above, since I forced the FS to be "," I suppose I could simply change the awk -v ... to awk -F"," -v ... and be all set. But, what if the files had different delimiters? Where would I put the BEGIN before the FS="," commands?
assuming ALL 3 files have different FieldSeparators - adjust the FS assignments in the 'BEGIN' accordingly:
awk -v wrk1=$workfile1ns -v wrk2x=$workfile2x -v wrk4=$workfile4 -v wrk5=$workfile5 -v planf=$workfile3 '
BEGIN {
FSf1=","
FSf2="|"
FSf3="#"
}
FNR == 1 {
if (FILENAME==planf) FS=FSf3
if (FILENAME==wrk2x ) FS=FSf2
if (FILENAME==wrk1 ) FS=FSf1
$1=$1
}
# put all plans in an array
FILENAME==planf {
ua1_pl[$1]=$2
print "### 40a ## Set plan "$1" to "$2
}
# put all jobs in an array
FILENAME==wrk2x {
j_num[$1]=$2
j_yr[$1]=$3
j_mon[$1]=$4
j_month[$1]=$5
print "### 40b ## Set job "$1"="$2" to month/year "$4"/"$3
}
# put all the data together
FILENAME==wrk1 {
# check on plan data
if ( ua1_pl[$1] >"")
{ w_plan=ua1_pl[$1]
} else
{ w_plan="***UA1 CODE UNDEFINED ["$1"]"
# print "### 40c ## Undefined UA1 code "$1","$2
}
# check on job number data
# print $1,$2,j_num[$2],j_yr[$2]
if ( j_num[$2] > "")
{ w_jnum=j_num[$2] ; w_jyr=j_yr[$2]
w_jmon=j_mon[$2]
}
# write out all the variables
print $1","$2","w_plan","w_jnum","w_jyr","w_jmon > wrk4
print ","w_jnum","w_jyr","w_jmon","w_plan > wrk5
}' "$workfile3" "$workfile2x" "$workfile1ns"
If you change the field separator within the code, the the command is read after the first record is read.
The command $0=$0 or $=$1 is to rearrange the fields with the new field separator in the buffer ($0).