From: John Tsiombikas Date: Tue, 22 Dec 2020 04:27:51 +0000 (+0200) Subject: added thread pool, started fleshing out the render job system X-Git-Url: http://git.mutantstargoat.com/user/nuclear/?p=erebus2020;a=commitdiff_plain;h=c70b7e63be9882e589a17b457900b05aca96ebb9 added thread pool, started fleshing out the render job system --- diff --git a/liberebus/src/erebus.c b/liberebus/src/erebus.c index af2b2d9..32af87b 100644 --- a/liberebus/src/erebus.c +++ b/liberebus/src/erebus.c @@ -1,5 +1,13 @@ #include +#include #include "erebus.h" +#include "tpool.h" + +struct render_job { + struct erb_rend *erb; + int x, y, width, height; + struct render_job *next; +}; struct erb_rend { int fb_width, fb_height, fb_npix; @@ -7,10 +15,24 @@ struct erb_rend { int *fb_nsamples; }; +static void proc_render_job(void *cls); +static struct render_job *alloc_job(void); +static void free_job(struct render_job *job); + +static struct thread_pool *tpool; + + struct erb_rend *erb_create(void) { struct erb_rend *erb; + if(!tpool) { + if(!(tpool = tpool_create(0))) { + fprintf(stderr, "erb_create: fatal error, failed to create thread pool!\n"); + return 0; + } + } + if(!(erb = calloc(1, sizeof *erb))) { return 0; } @@ -93,3 +115,106 @@ float *erb_end(struct erb_rend *erb) return erb->fb_pixels; } + +void erb_queue_frame(struct erb_rend *erb) +{ + erb_queue_block(erb, 0, 0, erb->fb_width, erb->fb_height); +} + +void erb_queue_block(struct erb_rend *erb, int x, int y, int width, int height) +{ + struct render_job *job; + + if(!(job = alloc_job())) { + fprintf(stderr, "erb_queue_block: failed to allocate rendering job\n"); + return; + } + + job->erb = erb; + job->x = x; + job->y = y; + job->width = width; + job->height = height; + + tpool_enqueue(tpool, job, proc_render_job, 0); +} + +void erb_wait(struct erb_rend *erb) +{ + /* XXX should we have a per-renderer instance thread pool, to wait only for our own jobs? */ + tpool_wait(tpool); +} + +void erb_primary_ray(struct erb_rend *erb, struct erb_ray *ray, int sample) +{ + /* TODO */ +} + +void erb_sample_ray(struct erb_rend *erb, struct erb_ray *ray, float *col) +{ + /* TODO */ + col[0] = fmod(col[0] + 1.0f, 1.0f); + col[1] = col[2] = fmod(col[1] + 0.33f, 1.0f); +} + +static void proc_render_job(void *cls) +{ + int i, j, fboffs; + struct erb_rend *erb; + struct render_job *job = cls; + float *fbptr; + int *nsptr; + struct erb_ray ray; + + erb = job->erb; + fboffs = job->y * erb->fb_width + job->x; + fbptr = erb->fb_pixels + fboffs * 3; + nsptr = erb->fb_nsamples + fboffs; + + for(i=0; iheight; i++) { + for(j=0; jwidth; j++) { + erb_primary_ray(erb, &ray, *nsptr++); + erb_sample_ray(erb, &ray, fbptr); + fbptr += 3; + } + } + free_job(job); +} + + +#define MAX_JOB_POOL 128 +static struct render_job *job_pool; +static int num_free_jobs; +static pthread_mutex_t job_pool_lock = PTHREAD_MUTEX_INITIALIZER; + +static struct render_job *alloc_job(void) +{ + struct render_job *job; + + pthread_mutex_lock(&job_pool_lock); + if(!job_pool) { + pthread_mutex_unlock(&job_pool_lock); + return malloc(sizeof *job); + } + + job = job_pool; + job_pool = job->next; + num_free_jobs--; + pthread_mutex_unlock(&job_pool_lock); + return job; +} + +static void free_job(struct render_job *job) +{ + pthread_mutex_lock(&job_pool_lock); + if(num_free_jobs >= MAX_JOB_POOL) { + pthread_mutex_unlock(&job_pool_lock); + free(job); + return; + } + + job->next = job_pool; + job_pool = job; + num_free_jobs++; + pthread_mutex_unlock(&job_pool_lock); +} diff --git a/liberebus/src/erebus.h b/liberebus/src/erebus.h index fcb097b..a2566d8 100644 --- a/liberebus/src/erebus.h +++ b/liberebus/src/erebus.h @@ -70,6 +70,13 @@ void erb_begin(struct erb_rend *erb); /* finalizes the frame, averaging samples (optional) */ float *erb_end(struct erb_rend *erb); +void erb_queue_frame(struct erb_rend *erb); +void erb_queue_block(struct erb_rend *erb, int x, int y, int width, int height); +void erb_wait(struct erb_rend *erb); + +void erb_primary_ray(struct erb_rend *erb, struct erb_ray *ray, int sample); +void erb_sample_ray(struct erb_rend *erb, struct erb_ray *ray, float *col); + /* transformation nodes */ struct erb_node *erb_node(void); void erb_free_node(struct erb_node *n); diff --git a/liberebus/src/tpool.c b/liberebus/src/tpool.c new file mode 100644 index 0000000..7364bc6 --- /dev/null +++ b/liberebus/src/tpool.c @@ -0,0 +1,488 @@ +/* worker thread pool based on POSIX threads + * author: John Tsiombikas + * This code is public domain. + */ +#include +#include +#include +#include +#include "tpool.h" + +#if defined(__APPLE__) && defined(__MACH__) +# ifndef __unix__ +# define __unix__ 1 +# endif /* unix */ +# ifndef __bsd__ +# define __bsd__ 1 +# endif /* bsd */ +#endif /* apple */ + +#if defined(unix) || defined(__unix__) +#include +#include + +# ifdef __bsd__ +# include +# endif +#endif + +#if defined(WIN32) || defined(__WIN32__) +#include +#endif + + +struct work_item { + void *data; + tpool_callback work, done; + struct work_item *next; +}; + +struct thread_data { + int id; + struct thread_pool *pool; +}; + +struct thread_pool { + pthread_t *threads; + struct thread_data *tdata; + int num_threads; + pthread_key_t idkey; + + int qsize; + struct work_item *workq, *workq_tail; + pthread_mutex_t workq_mutex; + pthread_cond_t workq_condvar; + + int nactive; /* number of active workers (not sleeping) */ + + pthread_cond_t done_condvar; + + int should_quit; + int in_batch; + + int nref; /* reference count */ + +#if defined(WIN32) || defined(__WIN32__) + HANDLE wait_event; +#else + int wait_pipe[2]; +#endif +}; + +static void *thread_func(void *args); +static void send_done_event(struct thread_pool *tpool); + +static struct work_item *alloc_work_item(void); +static void free_work_item(struct work_item *w); + + +struct thread_pool *tpool_create(int num_threads) +{ + int i; + struct thread_pool *tpool; + + if(!(tpool = calloc(1, sizeof *tpool))) { + return 0; + } + pthread_mutex_init(&tpool->workq_mutex, 0); + pthread_cond_init(&tpool->workq_condvar, 0); + pthread_cond_init(&tpool->done_condvar, 0); + pthread_key_create(&tpool->idkey, 0); + + pthread_setspecific(tpool->idkey, (void*)0xffffffff); + +#if !defined(WIN32) && !defined(__WIN32__) + tpool->wait_pipe[0] = tpool->wait_pipe[1] = -1; +#endif + + if(num_threads <= 0) { + num_threads = tpool_num_processors(); + } + tpool->num_threads = num_threads; + + if(!(tpool->threads = calloc(num_threads, sizeof *tpool->threads))) { + free(tpool); + return 0; + } + if(!(tpool->tdata = malloc(num_threads * sizeof *tpool->tdata))) { + free(tpool->threads); + free(tpool); + return 0; + } + + for(i=0; itdata[i].id = i; + tpool->tdata[i].pool = tpool; + + if(pthread_create(tpool->threads + i, 0, thread_func, tpool->tdata + i) == -1) { + /*tpool->threads[i] = 0;*/ + tpool_destroy(tpool); + return 0; + } + } + return tpool; +} + +void tpool_destroy(struct thread_pool *tpool) +{ + int i; + if(!tpool) return; + + tpool_clear(tpool); + tpool->should_quit = 1; + + pthread_cond_broadcast(&tpool->workq_condvar); + + if(tpool->threads) { + for(i=0; inum_threads; i++) { + pthread_join(tpool->threads[i], 0); + } + putchar('\n'); + free(tpool->threads); + } + free(tpool->tdata); + + /* also wake up anyone waiting on the wait* calls */ + tpool->nactive = 0; + pthread_cond_broadcast(&tpool->done_condvar); + send_done_event(tpool); + + pthread_mutex_destroy(&tpool->workq_mutex); + pthread_cond_destroy(&tpool->workq_condvar); + pthread_cond_destroy(&tpool->done_condvar); + pthread_key_delete(tpool->idkey); + +#if defined(WIN32) || defined(__WIN32__) + if(tpool->wait_event) { + CloseHandle(tpool->wait_event); + } +#else + if(tpool->wait_pipe[0] >= 0) { + close(tpool->wait_pipe[0]); + close(tpool->wait_pipe[1]); + } +#endif +} + +int tpool_addref(struct thread_pool *tpool) +{ + return ++tpool->nref; +} + +int tpool_release(struct thread_pool *tpool) +{ + if(--tpool->nref <= 0) { + tpool_destroy(tpool); + return 0; + } + return tpool->nref; +} + +void tpool_begin_batch(struct thread_pool *tpool) +{ + tpool->in_batch = 1; +} + +void tpool_end_batch(struct thread_pool *tpool) +{ + tpool->in_batch = 0; + pthread_cond_broadcast(&tpool->workq_condvar); +} + +int tpool_enqueue(struct thread_pool *tpool, void *data, + tpool_callback work_func, tpool_callback done_func) +{ + struct work_item *job; + + if(!(job = alloc_work_item())) { + return -1; + } + job->work = work_func; + job->done = done_func; + job->data = data; + job->next = 0; + + pthread_mutex_lock(&tpool->workq_mutex); + if(tpool->workq) { + tpool->workq_tail->next = job; + tpool->workq_tail = job; + } else { + tpool->workq = tpool->workq_tail = job; + } + ++tpool->qsize; + pthread_mutex_unlock(&tpool->workq_mutex); + + if(!tpool->in_batch) { + pthread_cond_broadcast(&tpool->workq_condvar); + } + return 0; +} + +void tpool_clear(struct thread_pool *tpool) +{ + pthread_mutex_lock(&tpool->workq_mutex); + while(tpool->workq) { + void *tmp = tpool->workq; + tpool->workq = tpool->workq->next; + free(tmp); + } + tpool->workq = tpool->workq_tail = 0; + tpool->qsize = 0; + pthread_mutex_unlock(&tpool->workq_mutex); +} + +int tpool_queued_jobs(struct thread_pool *tpool) +{ + int res; + pthread_mutex_lock(&tpool->workq_mutex); + res = tpool->qsize; + pthread_mutex_unlock(&tpool->workq_mutex); + return res; +} + +int tpool_active_jobs(struct thread_pool *tpool) +{ + int res; + pthread_mutex_lock(&tpool->workq_mutex); + res = tpool->nactive; + pthread_mutex_unlock(&tpool->workq_mutex); + return res; +} + +int tpool_pending_jobs(struct thread_pool *tpool) +{ + int res; + pthread_mutex_lock(&tpool->workq_mutex); + res = tpool->qsize + tpool->nactive; + pthread_mutex_unlock(&tpool->workq_mutex); + return res; +} + +void tpool_wait(struct thread_pool *tpool) +{ + pthread_mutex_lock(&tpool->workq_mutex); + while(tpool->nactive || tpool->qsize) { + pthread_cond_wait(&tpool->done_condvar, &tpool->workq_mutex); + } + pthread_mutex_unlock(&tpool->workq_mutex); +} + +void tpool_wait_pending(struct thread_pool *tpool, int pending_target) +{ + pthread_mutex_lock(&tpool->workq_mutex); + while(tpool->qsize + tpool->nactive > pending_target) { + pthread_cond_wait(&tpool->done_condvar, &tpool->workq_mutex); + } + pthread_mutex_unlock(&tpool->workq_mutex); +} + +#if defined(WIN32) || defined(__WIN32__) +long tpool_timedwait(struct thread_pool *tpool, long timeout) +{ + fprintf(stderr, "tpool_timedwait currently unimplemented on windows\n"); + abort(); + return 0; +} + +/* TODO: actually does this work with MinGW ? */ +int tpool_get_wait_fd(struct thread_pool *tpool) +{ + static int once; + if(!once) { + once = 1; + fprintf(stderr, "warning: tpool_get_wait_fd call on Windows does nothing\n"); + } + return 0; +} + +void *tpool_get_wait_handle(struct thread_pool *tpool) +{ + if(!tpool->wait_event) { + if(!(tpool->wait_event = CreateEvent(0, 0, 0, 0))) { + return 0; + } + } + return tpool->wait_event; +} + +static void send_done_event(struct thread_pool *tpool) +{ + if(tpool->wait_event) { + SetEvent(tpool->wait_event); + } +} + +#else /* UNIX */ + +long tpool_timedwait(struct thread_pool *tpool, long timeout) +{ + struct timespec tout_ts; + struct timeval tv0, tv; + gettimeofday(&tv0, 0); + + long sec = timeout / 1000; + tout_ts.tv_nsec = tv0.tv_usec * 1000 + (timeout % 1000) * 1000000; + tout_ts.tv_sec = tv0.tv_sec + sec; + + pthread_mutex_lock(&tpool->workq_mutex); + while(tpool->nactive || tpool->qsize) { + if(pthread_cond_timedwait(&tpool->done_condvar, + &tpool->workq_mutex, &tout_ts) == ETIMEDOUT) { + break; + } + } + pthread_mutex_unlock(&tpool->workq_mutex); + + gettimeofday(&tv, 0); + return (tv.tv_sec - tv0.tv_sec) * 1000 + (tv.tv_usec - tv0.tv_usec) / 1000; +} + +int tpool_get_wait_fd(struct thread_pool *tpool) +{ + if(tpool->wait_pipe[0] < 0) { + if(pipe(tpool->wait_pipe) == -1) { + return -1; + } + } + return tpool->wait_pipe[0]; +} + +void *tpool_get_wait_handle(struct thread_pool *tpool) +{ + static int once; + if(!once) { + once = 1; + fprintf(stderr, "warning: tpool_get_wait_handle call on UNIX does nothing\n"); + } + return 0; +} + +static void send_done_event(struct thread_pool *tpool) +{ + if(tpool->wait_pipe[1] >= 0) { + write(tpool->wait_pipe[1], tpool, 1); + } +} +#endif /* WIN32/UNIX */ + +static void *thread_func(void *args) +{ + struct thread_data *tdata = args; + struct thread_pool *tpool = tdata->pool; + + pthread_setspecific(tpool->idkey, (void*)(intptr_t)tdata->id); + + pthread_mutex_lock(&tpool->workq_mutex); + while(!tpool->should_quit) { + if(!tpool->workq) { + pthread_cond_wait(&tpool->workq_condvar, &tpool->workq_mutex); + if(tpool->should_quit) break; + } + + while(!tpool->should_quit && tpool->workq) { + /* grab the first job */ + struct work_item *job = tpool->workq; + tpool->workq = tpool->workq->next; + if(!tpool->workq) + tpool->workq_tail = 0; + ++tpool->nactive; + --tpool->qsize; + pthread_mutex_unlock(&tpool->workq_mutex); + + /* do the job */ + job->work(job->data); + if(job->done) { + job->done(job->data); + } + free_work_item(job); + + pthread_mutex_lock(&tpool->workq_mutex); + /* notify everyone interested that we're done with this job */ + pthread_cond_broadcast(&tpool->done_condvar); + send_done_event(tpool); + --tpool->nactive; + } + } + pthread_mutex_unlock(&tpool->workq_mutex); + + return 0; +} + + +int tpool_thread_id(struct thread_pool *tpool) +{ + int id = (intptr_t)pthread_getspecific(tpool->idkey); + if(id >= tpool->num_threads) { + return -1; + } + return id; +} + + +/* The following highly platform-specific code detects the number + * of processors available in the system. It's used by the thread pool + * to autodetect how many threads to spawn. + * Currently works on: Linux, BSD, Darwin, and Windows. + */ +int tpool_num_processors(void) +{ +#if defined(unix) || defined(__unix__) +# if defined(__bsd__) + /* BSD systems provide the num.processors through sysctl */ + int num, mib[] = {CTL_HW, HW_NCPU}; + size_t len = sizeof num; + + sysctl(mib, 2, &num, &len, 0, 0); + return num; + +# elif defined(__sgi) + /* SGI IRIX flavour of the _SC_NPROC_ONLN sysconf */ + return sysconf(_SC_NPROC_ONLN); +# else + /* Linux (and others?) have the _SC_NPROCESSORS_ONLN sysconf */ + return sysconf(_SC_NPROCESSORS_ONLN); +# endif /* bsd/sgi/other */ + +#elif defined(WIN32) || defined(__WIN32__) + /* under windows we need to call GetSystemInfo */ + SYSTEM_INFO info; + GetSystemInfo(&info); + return info.dwNumberOfProcessors; +#endif +} + +#define MAX_WPOOL_SIZE 64 +static pthread_mutex_t wpool_lock = PTHREAD_MUTEX_INITIALIZER; +static struct work_item *wpool; +static int wpool_size; + +/* work item allocator */ +static struct work_item *alloc_work_item(void) +{ + struct work_item *w; + + pthread_mutex_lock(&wpool_lock); + if(!wpool) { + pthread_mutex_unlock(&wpool_lock); + return malloc(sizeof(struct work_item)); + } + + w = wpool; + wpool = wpool->next; + --wpool_size; + pthread_mutex_unlock(&wpool_lock); + return w; +} + +static void free_work_item(struct work_item *w) +{ + pthread_mutex_lock(&wpool_lock); + if(wpool_size >= MAX_WPOOL_SIZE) { + free(w); + } else { + w->next = wpool; + wpool = w; + ++wpool_size; + } + pthread_mutex_unlock(&wpool_lock); +} diff --git a/liberebus/src/tpool.h b/liberebus/src/tpool.h new file mode 100644 index 0000000..7f763c2 --- /dev/null +++ b/liberebus/src/tpool.h @@ -0,0 +1,86 @@ +/* worker thread pool based on POSIX threads + * author: John Tsiombikas + * This code is public domain. + */ +#ifndef THREADPOOL_H_ +#define THREADPOOL_H_ + +struct thread_pool; + +/* type of the function accepted as work or completion callback */ +typedef void (*tpool_callback)(void*); + +#ifdef __cplusplus +extern "C" { +#endif + +/* if num_threads == 0, auto-detect how many threads to spawn */ +struct thread_pool *tpool_create(int num_threads); +void tpool_destroy(struct thread_pool *tpool); + +/* optional reference counting interface for thread pool sharing */ +int tpool_addref(struct thread_pool *tpool); +int tpool_release(struct thread_pool *tpool); /* will tpool_destroy on nref 0 */ + +/* if begin_batch is called before an enqueue, the worker threads will not be + * signalled to start working until end_batch is called. + */ +void tpool_begin_batch(struct thread_pool *tpool); +void tpool_end_batch(struct thread_pool *tpool); + +/* if enqueue is called without calling begin_batch first, it will immediately + * wake up the worker threads to start working on the enqueued item + */ +int tpool_enqueue(struct thread_pool *tpool, void *data, + tpool_callback work_func, tpool_callback done_func); +/* clear the work queue. does not cancel any currently running jobs */ +void tpool_clear(struct thread_pool *tpool); + +/* returns the number of queued work items */ +int tpool_queued_jobs(struct thread_pool *tpool); +/* returns the number of active (working) threads */ +int tpool_active_jobs(struct thread_pool *tpool); +/* returns the number of pending jobs, both in queue and active */ +int tpool_pending_jobs(struct thread_pool *tpool); + +/* wait for all pending jobs to be completed */ +void tpool_wait(struct thread_pool *tpool); +/* wait until the pending jobs are down to the target specified + * for example, to wait until a single job has been completed: + * tpool_wait_pending(tpool, tpool_pending_jobs(tpool) - 1); + * this interface is slightly awkward to avoid race conditions. */ +void tpool_wait_pending(struct thread_pool *tpool, int pending_target); +/* wait for all pending jobs to be completed for up to "timeout" milliseconds */ +long tpool_timedwait(struct thread_pool *tpool, long timeout); + +/* return a file descriptor which can be used to wait for pending job + * completion events. A single char is written every time a job completes. + * You should empty the pipe every time you receive such an event. + * + * This is a UNIX-specific call. On windows it does nothing. + */ +int tpool_get_wait_fd(struct thread_pool *tpool); + +/* return an auto-resetting Event HANDLE which can be used to wait for + * pending job completion events. + * + * This is a Win32-specific call. On UNIX it does nothing. + */ +void *tpool_get_wait_handle(struct thread_pool *tpool); + +/* When called by a work/done callback, it returns the thread number executing + * it. From the main thread it returns -1. + */ +int tpool_thread_id(struct thread_pool *tpool); + + +/* returns the number of processors on the system. + * individual cores in multi-core processors are counted as processors. + */ +int tpool_num_processors(void); + +#ifdef __cplusplus +} +#endif + +#endif /* THREADPOOL_H_ */