If I correctly understand what you're trying to do, maybe the folllowing is closer to what you want:
awk '
NR == 1 {
# We have the 1st line in the input file... Print output header...
for(i = 2; i <= NF; i++)
printf("%s count %s%s", $i, $1, i == NF ? "\n" : " ")
# Save # of fields for use in END clause.
nf = NF
next
}
{ # For each remaining line in the input file... Accumulate data...
d[NR, 1] = $1
for(i = 2; i <= NF; i++) {
d[NR, i] = $i
if(!((i, $i) in vc)) {
# Add new value for this field...
v[i, ++nv] = $i
# If the # of values in this row is greater than the #
# of output rows to be produced, increment the # of
# output rows.
if(nv > orows)
orows++
}
# Increment count of occurences of this value in this field.
vc[i, $i]++
}
}
END { # Process accumulated data and print results...
# For each output row to be printed...
for(row = 1; row <= orows; row++) {
# For each set of 3 columns to be printed for this row...
for(field = 2; field <= nf; field++) {
if(row > nv[field]) {
# No data for this field for this row.
# Print dashes...
printf("- - -%s", field == nf ? "\n" : " ")
# Skip to next field.
continue
}
# Print field value and count columns for this set.
printf("%s %d ", v[field, row],
vc[field, v[field, row]])
# Print Column 1 values list for this set.
oc = 0
for(line = 2; oc < vc[field, v[field, row]]; line++) {
# If the data in this input line or this field
# is the string we are looking for, print the
# field 1 value for this line...
if(d[line, field] == v[field, row])
printf("%s%s", d[line, 1],
++oc < vc[field, v[field, row]]\
? "," : \
field == nf ? "\n" : " ")
}
}
}
}' file
With the following in file
:
col1 col2 col3 col4 col5 col6 col7
1 A C E A XX AA
3 D E G A XX AG
5 T T A A XX AC
6 D C A G XX AT
100 E C A A XX CA
5002 E G G G XX CC
99999 A C E A AB CG
it produces the output:
col2 count col1 col3 count col1 col4 count col1 col5 count col1 col6 count col1 col7 count col1
A 2 1,99999 C 4 1,6,100,99999 E 2 1,99999 A 5 1,3,5,100,99999 XX 6 1,3,5,6,100,5002 AA 1 1
D 2 3,6 E 1 3 G 2 3,5002 G 2 6,5002 AB 1 99999 AG 1 3
T 1 5 T 1 5 A 3 5,6,100 - - - - - - AC 1 5
E 2 100,5002 G 1 5002 - - - - - - - - - AT 1 6
- - - - - - - - - - - - - - - CA 1 100
- - - - - - - - - - - - - - - CC 1 5002
- - - - - - - - - - - - - - - CG 1 99999
Is something like this what you want?