Hello jacobs.smith,
Let's say we have following Input_file(which willbe created by your 1st requirement, so I have edited it to test it more).
cat Input_file
Name,Set1,Set2,Set3,Set4,Set5,Set6,Set7,Set8
g5,0,1,1,1,0,1,1,0
g6,1,0,0,0,0,0,0,0
g7,0,1,0,0,0,0,0,1
g8,0,0,1,1,1,1,0,1
g1,1,1,1,0,1,1,1,0
g2,1,1,0,1,1,1,1,1
g3,0,0,1,0,0,0,1,1
g4,1,0,0,1,0,0,1,1
Then following code may help you in same.
awk -F, 'NR==1{
next
}
{
for(i=2;i<=NF;i++){
for(j=i+1;j<=NF;j++){
if($i==$j && $i!=0 && $j!=0){
S["Set"(i-1)(j-1)"_common"]++;
};
}
}
}
{
for(q=2;q<=NF;q++){
if($q==1) {
num=q-1;
E++
}
};
if(E==1) {
Y["Set"num"_unique"]++
};
E=""
}
END {
for(i in S){
print i OFS S
}
for(u in Y){
print u OFS Y
}
}
' Input_file
Then output will be as follows:
Set28_common 2
Set27_common 3
Set18_common 2
Set45_common 2
Set35_common 2
Set36_common 3
Set34_common 2
Set78_common 3
Set25_common 2
Set26_common 3
Set24_common 2
Set17_common 3
Set16_common 2
Set15_common 2
Set68_common 2
Set58_common 2
Set23_common 2
Set14_common 2
Set13_common 1
Set12_common 2
Set67_common 3
Set57_common 2
Set56_common 3
Set48_common 3
Set47_common 3
Set46_common 3
Set38_common 2
Set37_common 3
Set1_unique 1
Now you could make All in All command as follows, which you could run with original Input_file(posted in POST#1)
awk -F, 'NR==1{
print "Name," $0;
R=NF
}
NR>1 {
for(i=1;i<=NF;i++){
A[$i,i]++;
if($i){
C[$i]
}
}
}
END {
for(i in C) {
for(j=1;j<=R;j++){
Q=Q?Q FS (A[i,j]=A[i,j]>=1?1:0):i FS (A[i,j]=A[i,j]>=1?1:0)};
print Q;
Q=""
}
}
' Input_file | awk -F, 'NR==1{
next
}
{
for(i=2;i<=NF;i++){
for(j=i+1;j<=NF;j++){
if($i==$j && $i!=0 && $j!=0){
S["Set"(i-1)(j-1)"_common"]++;
};
}
}
}
{
for(q=2;q<=NF;q++){
if($q==1) {
num=q-1;
E++
}
};
if(E==1) {
Y["Set"num"_unique"]++
};
E=""
}
END {
for(i in S){
print i OFS S
}
for(u in Y){
print u OFS Y
}
}
'
Output will be as follows(as per Input_file into your POST#1).
Set23_common 2
Set13_common 1
Set12_common 2
Set1_unique 2
Set3_unique 2
Set2_unique 1
Please let me know if this helps you, will be glad.
Thanks,
R. Singh