Lex: analyzing a C file and printing out identifiers and line numbers they're found on

Florida State University, Tallahassee, FL USA, Dr. Whalley, COP4342

  1. The problem statement, all variables and given/known data:
    Create a lex specification file that reads a C source program that ignores keywords and collects all identifiers (regular variable names) and also displays the line numbers each was found on.

I am having trouble with concatenating the line numbers in the 'C' function I created. I either get a segmentation fault or some compiler error. I wrote a program like this in Perl, but I've never written any code in C like this.

  1. Relevant commands, code, scripts, algorithms:

  2. The attempts at a solution (include all code and scripts):

%{
#include <stdio.h>
#include <string.h>
    const char* id[1000];
    void insertId(char*, int);
    int i = 0;
    int newLineCnt = 1;
%}

%%
auto;
break;
case;
char;
continue;
default;
do;
double;
else;
extern;
float;
for;
goto;
if;
int;
long;
register;
return;
short;
sizeof;
static;
struct;
switch;
typedef;
union;
unsigned;
void;
while;
[a-zA-Z][a-zA-Z0-9_]+  {
                            //id = yytext;
                           // printf("The word found was: %s and 'i' is: %d\n", id, i);
                            
                            insertId(yytext, newLineCnt);
                       }
"\n"                   newLineCnt++; 
[a-z]              printf("Lowercase word\n");
[A-Z]              printf("Uppercase word\n");
[0-9]              printf("Integer\n");
";"                printf("Semicolon");
"("                printf("Open parentheses\n");
")"                printf("Close parentheses");
%%
void insertId(char* str, int nLine)
{
    char num[2];
    sprintf ( num, "%d", nLine);
    static char string[100];
    
    int iter;
    for(iter = 0; iter < i+1; iter++)
    {
        if ( strcmp(str, id[iter]) == 0 )
        {
            strcat( string, ", " );
            strcat( string, num );
            strcat ( id[iter], string );
            //printf("The word found was: %s\n", id[iter]);
            return;
        }
    }



    i++;
    
   // printf("That string was: %s\n", str);
    strcpy ( string, str);
    strcat ( string, ": ");
    sprintf ( num, "%d", nLine);         
    strcat (string, num);
   
  //  sprintf ( num, "%d", nLine);
  //  strcat (string, num);
    id = string;

    //printf("The word found was:   %s on line %d\n", id[i-1], nLine);
}

That's not going to work. You can't store two things in one pointer that way. I'd use a structure with three things, the name, an array of lines, and the number of lines stored, so you don't have to do string operations all the time to find the name and don't try to constantly resize your strings.

struct {
        char *name;
        int *lines;
        int len;
} id[1000];
int i=0;

void insertId(char* str, int nLine)
{
        int x;
        for(x=0; x<i; x++)
        {
                if(strcmp(str, id[x].name) == 0)
                {
                        id[x].lines=realloc(id[x].lines, sizeof(int)*(id[x].len+1));
                        id[x].lines[id[x].len]=nLine;
                        id[x].len++;
                        return;
                }
        }

        id.name=strdup(str);
        id.lines=realloc(id.lines, sizeof(int));
        id.lines[0]=nLine;
        id.len=1;
        i++;
}

id[i].lines is an array. Access the elements individually, first element at 0, last element at id[x].lines[id[x].len-1]

I've changed quite a bit of the code to something I understand. Also, I apologize about the forum homework policy. The question posted previously was just asking about some crazy C syntax that I did not understand.

I have this compiler error and I need a second eye to see if I missed something.
Error:

Test:desktop D2K$ make
lex cxref.l
gcc -g -c lex.yy.c
cxref.l:57: error: expected �;', �,' or �)' before numeric constant
make: *** [lex.yy.o] Error 1
Test:desktop D2K$ 

Line 57 is just inside the void insertID() function near the top.

My modified code:

%{
#include <stdio.h>
#include <string.h>
    char identifier[1000][82];
    char linesFound[100][100];
    void insertId(char*, int);
    int i = 0;
    int lineNum = 1;
%}

%x comment
%s str

%%
"/*"                        BEGIN(comment);

<comment>[^*\n]*        /* eat anything that's not a '*' */
<comment>"*"+[^*/\n]*   /* eat up '*'s not followed by '/'s */
<comment>\n             ++lineNum;
<comment>"*"+"/"        BEGIN(INITIAL);

"\n"                              ++lineNum;

auto                        ;
break                       ;
case                        ;
char                        ;
continue                    ;
default                     ;
do                          ;
double                      ;
else                        ;
extern                      ;
float                       ;
for                         ;
goto                        ;
if                          ;
int                         ;
long                        ;
register                    ;
return                      ;
short                       ;
sizeof                      ;
static                      ;
struct                      ;
switch                      ;
typedef                     ;
union                       ;
unsigned                    ;
void                        ;
while                       ;
[*]?[a-zA-Z][a-zA-Z0-9_]*   insertId(yytext, lineNum);
[^a-zA-Z0-9_]+              ;
[0-9]+                      ;
%%
void insertId(char* str, int nLine)
{
    char num[2];
    sprintf ( num, "%d", nLine);
    
    int iter;
    for(iter = 0; iter <= i; iter++)
    {
        if ( strcmp(identifier[iter], str) == 0 )
        {
            strcat( linesFound[iter], ", " );
            strcat( linesFound[iter], num );
            return;
        }
    }

    strcpy( identifier, str );
    strcat( identifier, ": " );
    strcpy( linesFound, num );

    i++;
   
}

It would be helpful to know which line caused the error, not just its general vicinity, but I think you have to declare int iter at the top of the function.

Your code is going to explode if you have more than 99 9 lines, more than 100 identifiers, or the list of lines for anything is longer than 81 characters. You really need to make num larger. Make it at least 10 in size, that should last a while.

Of course, if you make your big arrays much bigger you're going to be wasting tremendous amounts of RAM. You're wasting 100K already...