Single awk
can do your job, if order doesn't matter then you can process in END
block also, current script reads same input twice and take care of output order.
Input
akshay@nio:/tmp$ cat infile
K123 X CATA 3
K123 Y CATA 4
K123 Z CATA 2
K123 X CATB 5
K123 Y CATB 2
K123 Z CATB 2
B65 M CATB 3
B65 N CATB 4
B85 X CATA 1
B85 Y CATA 4
TR4 X CATA 3
TR4 Y CATA 2
TR4 Z CATA 2
TR4 X CATB 2
TR4 Y CATB 3
TR4 Z CATB 2
U23 X CATA 3
U23 Y CATA 1
U23 Z CATA 2
U23 P CATA 2
Script
akshay@nio:/tmp$ cat test.awk
NR==1{
key1 = "CATA"
key2 = "CATB"
IFS = "/"
print "ITEM",key1 IFS key2
}
FNR==NR{
A[$1,$3] = sprintf("%s%s%s",A[$1,$3],$2,$4)
next
}
(($1,key1) in A || ($1,key2) in A){
print $1, \
(A[$1,key1] = ($1,key1) in A ? A[$1,key1] : "---") IFS \
(A[$1,key2] = ($1,key2) in A ? A[$1,key2] : "---")
delete A[$1,key1]
delete A[$1,key2]
}
Output
akshay@nio:/tmp$ awk -f test.awk infile infile
ITEM CATA/CATB
K123 X3Y4Z2/X5Y2Z2
B65 ---/M3N4
B85 X1Y4/---
TR4 X3Y2Z2/X2Y3Z2
U23 X3Y1Z2P2/---
---------- Post updated at 10:02 PM ---------- Previous update was at 09:44 PM ----------
---edit--
Processing the same in END
block is as follows, which reads input file only once.
akshay@nio:/tmp$ cat test2.awk
NR==1{
key1 = "CATA"
key2 = "CATB"
IFS = "/"
print "ITEM",key1 IFS key2
}
{
A[$1,$3] = sprintf("%s%s%s",A[$1,$3],$2,$4)
}
END{
for(i in A)
{
split(i,I,SUBSEP)
if((I[1],key1) in A || (I[1],key2) in A)
{
print I[1], \
(A[I[1],key1] = (I[1],key1) in A ? A[I[1],key1] : "---") IFS \
(A[I[1],key2] = (I[1],key2) in A ? A[I[1],key2] : "---")
delete A[I[1],key1]
delete A[I[1],key2]
}
}
}
Resulting
akshay@nio:/tmp$ awk -f test2.awk infile
ITEM CATA/CATB
TR4 X3Y2Z2/X2Y3Z2
B65 ---/M3N4
K123 X3Y4Z2/X5Y2Z2
B85 X1Y4/---
U23 X3Y1Z2P2/---