./match_pattern.out: malloc(): memory corruption: 0x0000000013a11600 ***

Hi All,

I have a simple code which does some computation by matching string patterns.

In brief:

  1. The code reads .dat and .txt files.
  2. .dat files are huge text files and .txt files contain some important words.
  3. I am just doing strstr to find the patterns.
  4. The function returns the address of number_of_words
void read_file ( char *path , int32_t *number_of_words )
  1. The problem is after executing the code, I am getting this error which I am finding it hard to decipher. I reckon its the problem with malloc(), free() pair.
./match_pattern.out
*** glibc detected *** ./match_pattern.out: malloc(): memory corruption: 0x0000000013a11600 ***
======= Backtrace: =========
/lib64/libc.so.6[0x3bfea730fe]
/lib64/libc.so.6(__libc_malloc+0x6e)[0x3bfea74e2e]
./match_pattern.out[0x400edc]
./match_pattern.out[0x400c5b]
/lib64/libc.so.6(__libc_start_main+0xf4)[0x3bfea1d994]
./match_pattern.out[0x400949]
======= Memory map: ========
00400000-00402000 r-xp 00000000 fd:00 99975171                           /data/info_fil/msjameel/ontology/stemmed_data/medical/match_pattern.out
00601000-00602000 rw-p 00001000 fd:00 99975171                           /data/info_fil/msjameel/ontology/stemmed_data/medical/match_pattern.out
12a39000-13e1d000 rw-p 12a39000 00:00 0                                  [heap]
3bfe600000-3bfe61c000 r-xp 00000000 fd:00 72515586                       /lib64/ld-2.5.so
3bfe81c000-3bfe81d000 r--p 0001c000 fd:00 72515586                       /lib64/ld-2.5.so
3bfe81d000-3bfe81e000 rw-p 0001d000 fd:00 72515586                       /lib64/ld-2.5.so
3bfea00000-3bfeb4e000 r-xp 00000000 fd:00 72515607                       /lib64/libc-2.5.so
3bfeb4e000-3bfed4e000 ---p 0014e000 fd:00 72515607                       /lib64/libc-2.5.so
3bfed4e000-3bfed52000 r--p 0014e000 fd:00 72515607                       /lib64/libc-2.5.so
3bfed52000-3bfed53000 rw-p 00152000 fd:00 72515607                       /lib64/libc-2.5.so
3bfed53000-3bfed58000 rw-p 3bfed53000 00:00 0
3bfee00000-3bfee82000 r-xp 00000000 fd:00 72515640                       /lib64/libm-2.5.so
3bfee82000-3bff081000 ---p 00082000 fd:00 72515640                       /lib64/libm-2.5.so
3bff081000-3bff082000 r--p 00081000 fd:00 72515640                       /lib64/libm-2.5.so
3bff082000-3bff083000 rw-p 00082000 fd:00 72515640                       /lib64/libm-2.5.so
3c03e00000-3c03e0d000 r-xp 00000000 fd:00 72515754                       /lib64/libgcc_s-4.1.2-20080825.so.1
3c03e0d000-3c0400d000 ---p 0000d000 fd:00 72515754                       /lib64/libgcc_s-4.1.2-20080825.so.1
3c0400d000-3c0400e000 rw-p 0000d000 fd:00 72515754                       /lib64/libgcc_s-4.1.2-20080825.so.1
2b4c0f17b000-2b4c0f1a7000 rw-p 2b4c0f17b000 00:00 0
2b4c0f1a7000-2b4c0f1fd000 rw-p 2b4c0f1a7000 00:00 0
2b4c10000000-2b4c10021000 rw-p 2b4c10000000 00:00 0
2b4c10021000-2b4c14000000 ---p 2b4c10021000 00:00 0
7fff80f35000-7fff80f4a000 rw-p 7ffffffe9000 00:00 0                      [stack]
7fff80ffc000-7fff81000000 r-xp 7fff80ffc000 00:00 0                      [vdso]
ffffffffff600000-ffffffffffe00000 ---p 00000000 00:00 0                  [vsyscall]
Abort
//Match patterns from several files.

#define _GNU_SOURCE

#include<stdio.h>
#include<string.h>
#include<stdlib.h>
#include<inttypes.h>

char *chomp ( char * );
void read_file ( char * , int32_t * );

int32_t main ( int32_t argc , char ** argv )
{
	char *dat_line = NULL;
	char *txt_line = NULL;
	char *file_name_txt = NULL;
	char *file_name_dat = NULL;
	char *entire_dat_file = NULL;
	char *line_from_txt_file = NULL;
	char *chomped_line = NULL;


	unsigned long int summation = 0;
	unsigned long int value = 0;
	float average = 0.0;
	float scope = 0;

	unsigned long int dat_len = 0;
	unsigned long int read;
	unsigned long int txt_len = 0;
	unsigned long int number_of_characters = 0;
	unsigned long int number_of_words = 0;
	unsigned long int i = 0;

	FILE *open_dat_file = NULL;
	FILE *open_txt_file = NULL;
	FILE *output = NULL;

	//let get the TXT files...
	system ("ls -1 *.txt > text_files.tmp" );
	
	//lets get the DAT files...
	system ( "ls -1 *.dat > dat_files.tmp" );

	//Outer look is for the DAT files and inner loop is for the TXT
	//files...

	FILE *txt_pointer = NULL;
	txt_pointer = fopen ( "text_files.tmp" , "r" );
	if ( txt_pointer == NULL )
	{
		fprintf ( stderr , "The file list for text files does not exist\n" );
	}

	FILE *dat_pointer = NULL;
	dat_pointer = fopen ( "dat_files.tmp" , "r" );
	if ( dat_pointer == NULL )
	{
		fprintf ( stderr , "The file list for the dat files does not exist\n" );
	}


	output = fopen ( "average_values.res" , "a" );
	if ( output == NULL )
	{
		fprintf ( stderr , "File append error\n" );
	}


	while ( ( read = getline ( &dat_line , &dat_len , dat_pointer ) ) != -1 )
	{
		file_name_dat = chomp ( dat_line );
		open_dat_file = fopen ( file_name_dat , "r" );

		( void ) fseek ( open_dat_file , 0L , SEEK_END );
		number_of_characters = ftell ( open_dat_file );

		entire_dat_file = ( char * ) malloc ( ( number_of_characters + 1 ) * sizeof ( char ) );
		if ( entire_dat_file == NULL )
		{
			fprintf ( stderr , "malloc() memory allocation failure in entire_dat_file\n" );
		}

		rewind ( open_dat_file );

		read_file ( file_name_dat , &number_of_words );

		fgets ( entire_dat_file , number_of_characters , open_dat_file );

		while ( ( read = getline ( &txt_line , &txt_len , txt_pointer ) ) != -1 )
		{
			file_name_txt = chomp ( txt_line );
			open_txt_file = fopen ( file_name_txt , "r" );
			value++;

			//Now read to read the txt files one by one and search for the pattern...
		

			while ( ( read = getline ( &line_from_txt_file , &txt_len , open_txt_file ) ) != 1 && !feof ( open_txt_file ) )
			{
				chomped_line = chomp ( line_from_txt_file );
				if ( strstr ( entire_dat_file , chomped_line ) != NULL )
				{
					summation = summation + value;
				}
			}
		}

		average = ( float ) summation / ( number_of_words + 1 );
		scope = powf ( 2.71828183 , -(average) );
		fprintf ( output , "%f\n" , scope );
		number_of_words = 0;
		summation = 0;
		value = 0;
		average = 0;
		scope = 0;
		free ( entire_dat_file );
		number_of_characters = 0;
		rewind ( txt_pointer );

		fclose ( open_dat_file );
	}

	fclose ( open_txt_file );
	fclose ( output );

	if ( txt_line )
	{
		free ( txt_line );
	}

	if ( dat_line )
	{
		free ( dat_line );
	}


	return ( EXIT_SUCCESS );
}


char *chomp ( char *s )
{
    char *n = malloc( strlen( s ? s : "\n" ) );
    if( s )
        strcpy( n, s );
    n[strlen(n)-1]='\0';
    return n;
}


void read_file ( char *path , int32_t *number_of_words )
{
	FILE *pointer = NULL;

	char ch;

	pointer = fopen ( path , "r" );
	if ( pointer == NULL)
	{
		perror ( "File read error " );
	}

	(*number_of_words) = 0;

	while ( !feof ( pointer ) )
	{
		ch = fgetc ( pointer );
		if ( ch == ' ' && ch != EOF )
		{
			(*number_of_words) ++;
		}
	}

	fclose ( pointer );
}

I am working in Linux and using gcc compiler version gcc (GCC) 4.1.2 20080704 (Red Hat 4.1.2-50)

Memory corruption is normally due to you straying outside the bounds of an array - not with malloc()/free() statements matching up. You've probably overwritten some of the space next to a malloc()'d portion of memory that glibc expects to be able to use for keeping track of the memory region - go through with a debugger and have a look for bounds errors.

As an aside, here's the sort of thing you can expect if you did have a problem with malloc()/free() pairs:

[john@john-laptop tmp.u6Q7IbZntd]$ cat prog.c
#include <stdlib.h>

int main(void)
{
    void *p = malloc(1);
    free(p);
    free(p);
    return 0;
}
[john@john-laptop tmp.u6Q7IbZntd]$ gcc prog.c -o prog
[john@john-laptop tmp.u6Q7IbZntd]$ ./prog
*** glibc detected *** ./prog: double free or corruption (fasttop): 0x09ba3008 ***
======= Backtrace: =========
/lib/libc.so.6(+0x6c501)[0x58b501]
/lib/libc.so.6(+0x6dd70)[0x58cd70]
/lib/libc.so.6(cfree+0x6d)[0x58fe5d]
./prog[0x8048425]
/lib/libc.so.6(__libc_start_main+0xe7)[0x535ce7]
./prog[0x8048361]
======= Memory map: ========
0051f000-00676000 r-xp 00000000 08:04 2793795    /lib/libc-2.12.1.so
00676000-00678000 r--p 00157000 08:04 2793795    /lib/libc-2.12.1.so
00678000-00679000 rw-p 00159000 08:04 2793795    /lib/libc-2.12.1.so
00679000-0067c000 rw-p 00000000 00:00 0 
0091c000-0091d000 r-xp 00000000 00:00 0          [vdso]
00ab9000-00ad3000 r-xp 00000000 08:04 2793551    /lib/libgcc_s.so.1
00ad3000-00ad4000 r--p 00019000 08:04 2793551    /lib/libgcc_s.so.1
00ad4000-00ad5000 rw-p 0001a000 08:04 2793551    /lib/libgcc_s.so.1
00dd4000-00df0000 r-xp 00000000 08:04 2793792    /lib/ld-2.12.1.so
00df0000-00df1000 r--p 0001b000 08:04 2793792    /lib/ld-2.12.1.so
00df1000-00df2000 rw-p 0001c000 08:04 2793792    /lib/ld-2.12.1.so
08048000-08049000 r-xp 00000000 08:04 1220779    /tmp/tmp.u6Q7IbZntd/prog
08049000-0804a000 r--p 00000000 08:04 1220779    /tmp/tmp.u6Q7IbZntd/prog
0804a000-0804b000 rw-p 00001000 08:04 1220779    /tmp/tmp.u6Q7IbZntd/prog
09ba3000-09bc4000 rw-p 00000000 00:00 0          [heap]
b7500000-b7521000 rw-p 00000000 00:00 0 
b7521000-b7600000 ---p 00000000 00:00 0 
b76fc000-b76fd000 rw-p 00000000 00:00 0 
b7714000-b7716000 rw-p 00000000 00:00 0 
bffdf000-c0000000 rw-p 00000000 00:00 0          [stack]
Aborted

Thanks a lot. I spent hours debugging my code and found that the problem was with the chomp() function. SO, I changed the chomp() function and it worked. Here's my code. And yes, I was wrong about memory corruption.

//Match patterns from several files.

#define _GNU_SOURCE

#include<stdio.h>
#include<string.h>
#include<stdlib.h>
#include<inttypes.h>

char *chomp ( char * );
void read_file ( char * , int32_t * );

int32_t main ( int32_t argc , char ** argv )
{
	char *dat_line = NULL;
	char *txt_line = NULL;
	char *file_name_txt = NULL;
	char *file_name_dat = NULL;
	char *entire_dat_file = NULL;
	char *line_from_txt_file = NULL;
	char *chomped_line = NULL;


	unsigned long int summation = 0;
	unsigned long int value = 1;
	float average = 0.0;
	float scope = 0;

	unsigned long int dat_len = 0;
	unsigned long int read;
	unsigned long int txt_len = 0;
	unsigned long int number_of_characters = 0;
	unsigned long int number_of_words = 0;
	unsigned long int i = 0;

	FILE *open_dat_file = NULL;
	FILE *open_txt_file = NULL;
	FILE *output = NULL;

	//let get the TXT files...
	system ("ls -1 *.txt > text_files.tmp" );
	
	//lets get the DAT files...
	system ( "ls -1 *.dat > dat_files.tmp" );

	//Outer look is for the DAT files and inner loop is for the TXT
	//files...

	FILE *txt_pointer = NULL;
	txt_pointer = fopen ( "text_files.tmp" , "r" );
	if ( txt_pointer == NULL )
	{
		fprintf ( stderr , "The file list for text files does not exist\n" );
	}

	FILE *dat_pointer = NULL;
	dat_pointer = fopen ( "dat_files.tmp" , "r" );
	if ( dat_pointer == NULL )
	{
		fprintf ( stderr , "The file list for the dat files does not exist\n" );
	}


	output = fopen ( "average_values.res" , "a" );
	if ( output == NULL )
	{
		fprintf ( stderr , "File append error\n" );
	}


	while ( ( read = getline ( &dat_line , &dat_len , dat_pointer ) ) != -1 )
	{
		file_name_dat = chomp ( dat_line );
		open_dat_file = fopen ( file_name_dat , "r" );

		( void ) fseek ( open_dat_file , 0L , SEEK_END );
		number_of_characters = ftell ( open_dat_file );

		entire_dat_file = ( char * ) malloc ( ( number_of_characters + 1 ) * sizeof ( char ) );
		if ( entire_dat_file == NULL )
		{
			fprintf ( stderr , "malloc() memory allocation failure in entire_dat_file\n" );
		}

		rewind ( open_dat_file );

		read_file ( file_name_dat , &number_of_words );

		fgets ( entire_dat_file , number_of_characters , open_dat_file );

		while ( ( read = getline ( &txt_line , &txt_len , txt_pointer ) ) != -1 )
		{
			file_name_txt = chomp ( txt_line );
			open_txt_file = fopen ( file_name_txt , "r" );

			//Now read to read the txt files one by one and search for the pattern...
		
			while ( ( read = getline ( &line_from_txt_file , &txt_len , open_txt_file ) ) != 1 && !feof ( open_txt_file ) )
			{
				chomped_line = chomp ( line_from_txt_file );
				if ( strstr ( entire_dat_file , chomped_line ) != NULL )
				{
					summation = summation + value;
				}
				value ++;
			}
			value = 1;
			fclose ( open_txt_file );
			memset ( file_name_txt , 0 , strlen ( file_name_txt ) );
		}

		average = ( float ) summation / ( number_of_words + 1 );
		average = - average;
		scope = powf ( 2.71828183 , average );
		fprintf ( output , "%f\n" , scope );
		number_of_words = 0;
		summation = 0;
		value = 0;
		average = 0;
		scope = 0;
		number_of_characters = 0;

		rewind ( txt_pointer );
		free ( entire_dat_file );
		fclose ( open_dat_file );
		memset ( file_name_dat , 0 , strlen ( file_name_dat ) );
	}

	fclose ( output );

	if ( txt_line )
	{
		free ( txt_line );
	}

	if ( dat_line )
	{
		free ( dat_line );
	}

	fclose ( dat_pointer );
	fclose (txt_pointer );

	return ( EXIT_SUCCESS );
}


char *chomp ( char *s )
{
	unsigned long int len = 0;
	len = strlen ( s );
	if ( s [ len - 1 ] == '\n' )
	{
		s [ len - 1 ] = 0;
	}
	return ( s );
}


void read_file ( char *path , int32_t *number_of_words )
{
	FILE *pointer = NULL;

	char ch;

	pointer = fopen ( path , "r" );
	if ( pointer == NULL)
	{
		perror ( "File read error " );
	}

	(*number_of_words) = 0;

	while ( !feof ( pointer ) )
	{
		ch = fgetc ( pointer );
		if ( ch == ' ' && ch != EOF )
		{
			(*number_of_words) ++;
		}
	}

	fclose ( pointer );
}

If you're copying a string, better to use strdup(). You won't make that off-by-one mistake that way.

1 Like