problem with threads in C

I have problem that if I create for example 100 threads program work correctly but if I define more threads for example 1000
// if I change static int NUM_E from 100 to 1000
than program stop about 350 threads and doesn't continue
where should be problem please?

#include <pthread.h>
#include <semaphore.h>
#include <stdio.h>
#include <stdlib.h>

// deklaracia poctu elfov a sobov
int c_elves;
int c_reindeer;
int e_end;
int r_end;
int koniec_cyklu;

// deklaracia semaforov nad vlaknami
sem_t s_elves;
sem_t s_reindeer;
sem_t s_waitingR;
sem_t s_cakaren;

//mutex santa
pthread_mutex_t m_santa;
pthread_mutex_t m_elves;
pthread_mutex_t m_reindeer;

//podmienky
pthread_cond_t con_elves;
pthread_cond_t con_reindeer;

// deklaracia vlakien
void *f_reindeer(void *);
void *f_elves(void *);
void *f_santa(void *);

// deklaracia fukcii
void prepareSleigh();
void getHitched(int ktory);
void helpElves(void);
void getHelp(int ktory);

// definujeme si pocet sobov
static int NUM_R = 9;
static int NUM_E = 1000;

// hlavny program
int main(int argc, char **argv)
{
    //deklaracia lokalnych premmenych
    int i,j,index;
    //inicializacia pociatocnych semaforov a ich hodnot
    sem_init(&s_elves,0,1);
    sem_init(&s_reindeer,0,0);
    sem_init(&s_cakaren,0,0);
    sem_init(&s_waitingR,0,1);

    //inicializacia mutexov
    pthread_mutex_init(&m_santa,NULL);
    pthread_mutex_init(&m_elves,NULL);
    pthread_mutex_init(&m_reindeer,NULL);
    pthread_mutex_lock(&m_santa);

    //definicia pola sobov,skriatkov a santu
    pthread_t t_reindeer[NUM_R];
    pthread_t t_elves[NUM_E];
    pthread_t t_santa;
    //definicia poctu elfov a sobov ktory chcu pomoct resp sa vratili z "dovolenky"
    c_reindeer = 0;
    c_elves = 0;
    e_end = 0;
    r_end = 0;
    koniec_cyklu = 0;
    index = 0;

    pthread_create(&t_santa, NULL, f_santa, (void *)NULL);

    for (i = 0; i < NUM_E; i++)
    {
        pthread_create(&t_elves, NULL, f_elves, (void *)i);
    }
    for (i = 0; i < NUM_R; i++)
    {
	pthread_create(&t_reindeer, NULL, f_reindeer, (void *)i);
    }

    pthread_join(t_santa,NULL);
    for (i = 0; i < NUM_R; i++)
    {
	pthread_join(t_reindeer, NULL);
    }
    for (i = 0; i < NUM_E; i++)
    {
	pthread_join(t_elves, NULL);
    }
    return 0;
}

//
//vlakno sob
void *f_reindeer(void *param)
{
    printf("\033[01;37msom sob cislo %d\n",(int)param+1);
    sem_wait(&s_waitingR);
    c_reindeer++;
    if (c_reindeer == 9)
    {
	pthread_mutex_unlock(&m_santa);
    }
    sem_post(&s_waitingR);
    sem_wait(&s_reindeer);
    getHitched((int)param);
    c_reindeer--;
    if(c_reindeer == 0)
    {
	koniec_cyklu = 1;
	while(r_end == 0)
	{
	    pthread_cond_broadcast(&con_reindeer);
	}
    }
    return;
}

//
//vlakno elf
void *f_elves(void *param)
{
    printf("\033[01;37msom skriatok cislo %d\n",(int)param+1);

    //ak uz santa odletel a prisiel skriatok koniec
    if (koniec_cyklu == 1)
    {
	printf("\33[22;31melf konci prisiel neskoro %d\n",param+1);
	return;
    }

    //cakanie semafor
    sem_wait(&s_elves);
    //ak uz caka v semafore a dokoncili sa sobovia iba ukoncenie
    if (koniec_cyklu == 1)
    {
	printf("\33[22;31melf konci prisiel neskoro %d\n",param+1);
	sem_post(&s_elves);
	return;
    }

    //kod elfa ked pride do cakarne
    c_elves++;
    if (c_elves == 3)
    {
	pthread_mutex_unlock(&m_santa);
    }
    else
    {
	sem_post(&s_elves);
    }

    //cakanie pred "dverami kde je santa" teda caka sa kym budu 3 skriatkovia
    sem_wait(&s_cakaren);
    getHelp((int)param);
    c_elves--;
    if(c_elves == 0)
    {
	while (e_end == 0)
	{
	    pthread_cond_broadcast(&con_elves);
	}
	sem_post(&s_elves);
    }
    printf("\33[22;31melf konci normalne %d\n",param+1);
    return;
}

//
//vlakno santa
void *f_santa(void *param)
{
    int i;
while(1)
{
    pthread_mutex_lock(&m_santa);
    e_end = 0;
    r_end = 0;
    
    if (c_reindeer == 9)
    {
	prepareSleigh();
	for(i = 0; i < 9; i++)
	{
	    sem_post(&s_reindeer);
	}
	pthread_mutex_lock(&m_reindeer);
	pthread_cond_wait(&con_reindeer,&m_reindeer);
	r_end = 1;
	pthread_mutex_unlock(&m_reindeer);
	
	e_end = 1;
	sem_post(&s_elves);
	sem_post(&s_cakaren);
	sem_post(&s_cakaren);
	sem_post(&s_cakaren);
	printf("\33[22;33msanta konci\n");
	return;
    }
    else
    {
	if (c_elves == 3)
	{
	    helpElves();
	    for(i = 0; i < 3; i++)
	    {
		sem_post(&s_cakaren);
	    }
	//cez mutex 
	    pthread_mutex_lock(&m_elves);
	    pthread_cond_wait(&con_elves,&m_elves);
	    e_end = 1;
	    pthread_mutex_unlock(&m_elves);
	}
    }
}
    return;
}

//funkcia priprav sane -- santa
void prepareSleigh(void)
{
    printf("\033[22;33mpripravujem sane\n");
    return;
}

//funkcia priputaj -- sob
void getHitched(int ktory)
{
    printf("\033[22;32msom zviazany %d\n",ktory+1);
    return;
}

//funkcia pomoz elfom -- santa
void helpElves(void)
{
    printf("\033[22;33mdavam pomoc\n");
    return;
}

//funkcia dostal som pomoc -- elf
void getHelp(int ktory)
{
    printf("\033[22;31mdostal som pomoc%d\n",ktory+1);
    return;
}

Assume Linux, it works pretty much the same elsewhere as well. Each thread is a process that shares some address space with the parent process, and each thread usually has about 1MB of stack space by default. There is also an imposed process limit, say 4096. We are not going near that total.

  1. 100 threads == 100MB of stack space, 1000 threads == 1GB of stack space. There is a process limit on stack size, which is charged to the parent process, see ulimit.

  2. Every thread requires a context switch to "run". Depending on your system cpu architecture -
    For a single cpu with one core, this means for each LWP (thread or lightweight process) to get cpu you have for 100 threads:
    100 == 100 context switches requested every time a quantum (time slice) elapses. Say you have a 10ms quantum. That is 100 x 100 or 10000 context switches requested every second. You are spending a lot of time doing context switches.

For 1000 LWP's you have 1000 x 1000 or 1 million possible context switches per second.
When this happens, you spend all of your time in context switching and scheduling processes.

In practice a few dozen threads should be your maximum. If you get up in the 100+ range you are going to seriously degrade performance of your app and your system overall.

Consider a vmstat or top display that shows 1000 active processes, each one wanting the cpu. Everybody is waiting, almost nobody is doing anything else but waiting.

The only time this kind of thread overload becomes feasible is the case where you have 100's of cpu cores available. Clearly you do not.

1 Like

Thanks a lot jim.
Can I have a question that if some threads end before another is create, can I join them in some way please?

there I create some

for (i = 0; i < NUM_E; i++)
{
pthread_create(&t_elves[i], NULL, f_elves, (void *)i);
}

and while are some threads creating, another can end but still isn't join, becouse I join these threads after I create them

  • only one solution I have in my mind, that I can join that thread in another. Or is there other solution please ?

You can detach the threads, so they don't need to join. See this reference. You can't get their return values this way, though.

Good evening,

Jim is right with the limitation of the virtual memory. On some 32-bits Linux system, you have up to 3Gb of VM available for the application. Assuming 8Mb stack per thread, it follows that you may have only up to 384 threads active at the same time.

Even if your threads terminate; you may still need to join them. Or as mentioned by Corona688, to detach the threads. The reason behind is explained in this article

Cheers, Lo�c