Change the RECORD increment i++; according to your correction fixed the problem!
Thank you so much!
---------------------------------------------------------------------------
Wait! There is still something I must have missed.
1) The last member of each struct RECORD is not correctly parsed;
2) Sample 10 and after will not get correctly parsed when there are 10 or more samples.
This brings me back to the original learning on sscanf() with fgets() with the regex I used.
Here is the reformatted code to print a re-arranged table of the input file.
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
// pratice with fgets() + sscanf() to read in multiple lines into struct
typedef struct {
char ID[32];
char SNPs[8];
char MNPs[8];
char Insertion[8];
char Deletion[8];
char Indels[8];
char SameRef[8];
char MissingGT[8];
char SNPTransTranv[8];
char TotalHetHomRatio[8];
char SNPHetHomRatio[8];
char MNPHetHomRatio[8];
char InsertionHetHomRatio[8];
char DeletionHetHomRatio[8];
char IndelHetHomRatio[8];
char InsertDeletionRatio[8];
char Indel_SNPMNPRatio[8];
} RECORD;
int main (int argc, char *argv[])
{
char line[256]; //for row read from file
char name[32]; //1st part (key part) parsed from each line[]
char str1[8]; //2nd part (value part) parsed from each line[]
// char tail[128]; //rest behind 2nd part
FILE* fPtr = fopen(argv[1], "r");
RECORD record[16]; //test file may have 288 ~ 306 rows including blank lines for 16 RECORD
static int i = -1; //initialize counter
while (fgets(line, sizeof(line), fPtr) != NULL) {
str1[0]='0';
if ( line[0] == '\n' ) continue; //skip "blank" lines with e.g. empty or invisible spaces(to be improved!).
//scan in two parts delimited by ":", in the 2nd part, only take the first part delimited by space
sscanf(line, "%[^:] : %s", name, str1);
if (strstr(name, "Sample Name") != NULL) {
i++;
strcpy(record.ID, str1);
printf("%s ", str1);
}
else if (strstr(name, "SNPs") != NULL) {
strcpy(record.SNPs, str1);
printf("%s ", str1);}
else if (strstr(name, "MNPs") != NULL) {
strcpy(record.MNPs, str1);
printf("%s ", str1);}
else if (strstr(name, "Insertions") != NULL) {
strcpy(record.Insertion, str1);
printf("%s ", str1);}
else if (strstr(name, "Deletions") != NULL) { //changed the variable line -> name in the original post, and all the rest after this line
strcpy(record.Deletion, str1);
printf("%s ", str1);}
else if (strstr(name, "Indels") != NULL) {
strcpy(record.Indels, str1);
printf("%s ", str1);}
else if (strstr(name, "Same as reference") != NULL) {
strcpy(record.SameRef, str1);
printf("%s ", str1);}
else if (strstr(name, "Missing Genotype") != NULL) {
strcpy(record.MissingGT, str1);
printf("%s ", str1);}
else if (strstr(name, "SNP Transitions") != NULL) {
strcpy(record.SNPTransTranv, str1);
printf("%s ", str1);}
else if (strstr(name, "Total Het/Hom") != NULL) {
strcpy(record.TotalHetHomRatio, str1);
printf("%s ", str1);}
else if (strstr(name, "SNP Het/Hom ratio") != NULL) {
strcpy(record.SNPHetHomRatio, str1);
printf("%s ", str1);}
else if (strstr(name, "MNP Het/Hom ratio") != NULL) {
strcpy(record.MNPHetHomRatio, str1);
printf("%s ", str1);}
else if (strstr(name, "Insertion Het/Hom ratio") != NULL) {
strcpy(record.InsertionHetHomRatio, str1);
printf("%s ", str1);}
else if (strstr(name, "Deletion Het/Hom ratio") != NULL) {
strcpy(record.DeletionHetHomRatio, str1);
printf("%s ", str1);}
else if (strstr(name, "Indel Het/Hom ratio") != NULL) {
strcpy(record.IndelHetHomRatio, str1);
printf("%s ", str1);}
else if (strstr(name, "Insertion/Deletion ratio") != NULL) {
strcpy(record.InsertDeletionRatio, str1);
printf("%s ", str1);}
else if (strstr(name, "Inde/SNP+MNP ratio") != NULL) {
strcpy(record.Indel_SNPMNPRatio, str1);
printf("%s ", str1); }
else printf("Sthwrong!\n");
// printf("%s: %s\n", name, str1);
}
puts("END"); //For debug. puts() always adds newline at the end of the string
fclose(fPtr);
return 0;
}
./prog infile
The attached infile is simply repeating the first 4 RECORD with unique sample IDs for trial.
And the wrong output is:
sample1 91 1 5 2 0 1 44 1.74 2.96 2.79 - 4.00 - - 2.50 Sthwrong!
Sthwrong!
sample2 73 2 2 3 0 1 63 1.87 2.59 2.50 1.00 - - - 0.67 Sthwrong!
Sthwrong!
sample3 87 1 4 2 0 1 42 1.74 2.96 2.79 - 2.00 - - 1.25 Sthwrong!
Sthwrong!
sample4 83 1 2 3 0 4 65 1.87 2.59 2.50 1.00 - - - 0.67 Sthwrong!
Sthwrong!
sample5 91 1 5 2 0 1 44 1.74 2.96 2.79 - 4.00 - - 2.50 Sthwrong!
Sthwrong!
sample6 73 2 2 3 0 1 63 1.87 2.59 2.50 1.00 - - - 0.67 Sthwrong!
Sthwrong!
sample7 87 1 4 2 0 1 42 1.74 2.96 2.79 - 2.00 - - 1.25 Sthwrong!
Sthwrong!
sample8 83 1 2 3 0 4 65 1.87 2.59 2.50 1.00 - - - 0.67 Sthwrong!
Sthwrong!
sample9 91 1 5 2 0 1 44 1.74 2.96 2.79 - 4.00 - - 2.50 Sthwrong!
Sthwrong!
Sthwrong!
73 2 2 3 0 1 63 1.87 2.59 2.50 1.00 - - - 0.67 Sthwrong!
Sthwrong!
Sthwrong!
87 1 4 2 0 1 42 1.74 2.96 2.79 - 2.00 - - 1.25 Sthwrong!
Sthwrong!
Sthwrong!
83 1 2 3 0 4 65 1.87 2.59 2.50 1.00 - - - 0.67 Sthwrong!
Sthwrong!
Sthwrong!
91 1 5 2 0 1 44 1.74 2.96 2.79 - 4.00 - - 2.50 Sthwrong!
Sthwrong!
Sthwrong!
73 2 2 3 0 1 63 1.87 2.59 2.50 1.00 - - - 0.67 Sthwrong!
Sthwrong!
Sthwrong!
87 1 4 2 0 1 42 1.74 2.96 2.79 - 2.00 - - 1.25 Sthwrong!
Sthwrong!
Sthwrong!
83 1 2 3 0 4 65 1.87 2.59 2.50 1.00 - - - 0.67 Sthwrong!
Sthwrong!
END
But, what is expected is:
sample1 91 1 5 2 0 1 44 1.74 2.96 2.79 - 4.00 - - 2.50 0.08
sample2 73 2 2 3 0 1 63 1.87 2.59 2.50 1.00 - - - 0.67 0.07
sample3 87 1 4 2 0 1 42 1.74 2.96 2.79 - 2.00 - - 1.25 0.08
sample4 83 1 2 3 0 4 65 1.87 2.59 2.50 1.00 - - - 0.67 0.07
sample5 91 1 5 2 0 1 44 1.74 2.96 2.79 - 4.00 - - 2.50 0.08
sample6 73 2 2 3 0 1 63 1.87 2.59 2.50 1.00 - - - 0.67 0.07
sample7 87 1 4 2 0 1 42 1.74 2.96 2.79 - 2.00 - - 1.25 0.08
sample8 83 1 2 3 0 4 65 1.87 2.59 2.50 1.00 - - - 0.67 0.07
sample9 91 1 5 2 0 1 44 1.74 2.96 2.79 - 4.00 - - 2.50 0.08
sample10 73 2 2 3 0 1 63 1.87 2.59 2.50 1.00 - - - 0.67 0.07
sample11 87 1 4 2 0 1 42 1.74 2.96 2.79 - 2.00 - - 1.25 0.08
sample12 83 1 2 3 0 4 65 1.87 2.59 2.50 1.00 - - - 0.67 0.07
sample13 91 1 5 2 0 1 44 1.74 2.96 2.79 - 4.00 - - 2.50 0.08
sample14 73 2 2 3 0 1 63 1.87 2.59 2.50 1.00 - - - 0.67 0.07
sample15 87 1 4 2 0 1 42 1.74 2.96 2.79 - 2.00 - - 1.25 0.08
sample16 83 1 2 3 0 4 65 1.87 2.59 2.50 1.00 - - - 0.67 0.07
END
I believe the string size is correct, as the last member record[i].Indel_SNPMNPRatio is never more than 3 digits. Any help is greatly appreciated.