This is my script and its too slow
cat File|awk -F',' '{print $1}' |sort -u >UniqueIds
cat File|awk -F',' '{print $2}' |sort -u >UniqueColumns
Counter=1
cat UniqueIds|
{
while read Line
do
cat UniqueColumns|
{
while read Col
do
if [[ $(grep $Line File|grep $Col) == "" ]];then
echo $Line,$Col,0 >>File
fi
done
}
COL_LIST=$(cat File|grep $Line|sort -t',' -k2|awk -F',' '{print $2}' ORS='|'|awk -F'|' '{NF=NF-1;$1=$1}1' OFS="|")
VALUE_LIST=$(cat File|grep $Line|sort -t',' -k2|awk -F',' '{print $3}' ORS='|'|awk -F'|' '{NF=NF-1;$1=$1}1' OFS="|")
if [[ $Counter == 1 ]];then
echo Party_ID"|"$COL_LIST
fi
echo $Line"|"$VALUE_LIST
((Counter=Counter+1))
done
}
rm UniqueIds UniqueColumns
---------- Post updated at 10:18 AM ---------- Previous update was at 10:17 AM ----------
Done . I send my script. I need to sort the second column
---------- Post updated at 03:10 PM ---------- Previous update was at 10:18 AM ----------
GREAT THANKS Rudi
My Script takes 3 minute for processing 30Million records .I modified little bit and here is the one
awk '{LN[$1]; HD[$2]; MX[$1,$2]=$3}
END{
printf "%s", "PARTY_ID"; for (i in HD) printf "|%s", i; print "";
for (j in LN) {printf "%s",j;
for (i in HD) printf "|%s", MX[j,i]; print ""}
}
' FS=, file
---------- Post updated at 03:20 PM ---------- Previous update was at 03:10 PM ----------
I have made small modification to print zero too
awk '{LN[$1]; HD[$2]; MX[$1,$2]=$3}
END{
printf "%s", "PARTY_ID"; for (i in HD) printf "|%s", i; print "";
for (j in LN)
{
printf "%s",j;
for (i in HD)
if (MX[j,i] =="")
printf "|%s",0
else
printf "|%s", MX[j,i];
print ""
}
}' FS=, file