From: John Tsiombikas <nuclear@member.fsf.org>
Date: Tue, 22 Dec 2020 04:27:51 +0000 (+0200)
Subject: added thread pool, started fleshing out the render job system
X-Git-Url: http://git.mutantstargoat.com/user/nuclear/?p=erebus2020;a=commitdiff_plain;h=c70b7e63be9882e589a17b457900b05aca96ebb9

added thread pool, started fleshing out the render job system
---

diff --git a/liberebus/src/erebus.c b/liberebus/src/erebus.c
index af2b2d9..32af87b 100644
--- a/liberebus/src/erebus.c
+++ b/liberebus/src/erebus.c
@@ -1,5 +1,13 @@
 #include <stdio.h>
+#include <pthread.h>
 #include "erebus.h"
+#include "tpool.h"
+
+struct render_job {
+	struct erb_rend *erb;
+	int x, y, width, height;
+	struct render_job *next;
+};
 
 struct erb_rend {
 	int fb_width, fb_height, fb_npix;
@@ -7,10 +15,24 @@ struct erb_rend {
 	int *fb_nsamples;
 };
 
+static void proc_render_job(void *cls);
+static struct render_job *alloc_job(void);
+static void free_job(struct render_job *job);
+
+static struct thread_pool *tpool;
+
+
 struct erb_rend *erb_create(void)
 {
 	struct erb_rend *erb;
 
+	if(!tpool) {
+		if(!(tpool = tpool_create(0))) {
+			fprintf(stderr, "erb_create: fatal error, failed to create thread pool!\n");
+			return 0;
+		}
+	}
+
 	if(!(erb = calloc(1, sizeof *erb))) {
 		return 0;
 	}
@@ -93,3 +115,106 @@ float *erb_end(struct erb_rend *erb)
 
 	return erb->fb_pixels;
 }
+
+void erb_queue_frame(struct erb_rend *erb)
+{
+	erb_queue_block(erb, 0, 0, erb->fb_width, erb->fb_height);
+}
+
+void erb_queue_block(struct erb_rend *erb, int x, int y, int width, int height)
+{
+	struct render_job *job;
+
+	if(!(job = alloc_job())) {
+		fprintf(stderr, "erb_queue_block: failed to allocate rendering job\n");
+		return;
+	}
+
+	job->erb = erb;
+	job->x = x;
+	job->y = y;
+	job->width = width;
+	job->height = height;
+
+	tpool_enqueue(tpool, job, proc_render_job, 0);
+}
+
+void erb_wait(struct erb_rend *erb)
+{
+	/* XXX should we have a per-renderer instance thread pool, to wait only for our own jobs? */
+	tpool_wait(tpool);
+}
+
+void erb_primary_ray(struct erb_rend *erb, struct erb_ray *ray, int sample)
+{
+	/* TODO */
+}
+
+void erb_sample_ray(struct erb_rend *erb, struct erb_ray *ray, float *col)
+{
+	/* TODO */
+	col[0] = fmod(col[0] + 1.0f, 1.0f);
+	col[1] = col[2] = fmod(col[1] + 0.33f, 1.0f);
+}
+
+static void proc_render_job(void *cls)
+{
+	int i, j, fboffs;
+	struct erb_rend *erb;
+	struct render_job *job = cls;
+	float *fbptr;
+	int *nsptr;
+	struct erb_ray ray;
+
+	erb = job->erb;
+	fboffs = job->y * erb->fb_width + job->x;
+	fbptr = erb->fb_pixels + fboffs * 3;
+	nsptr = erb->fb_nsamples + fboffs;
+
+	for(i=0; i<job->height; i++) {
+		for(j=0; j<job->width; j++) {
+			erb_primary_ray(erb, &ray, *nsptr++);
+			erb_sample_ray(erb, &ray, fbptr);
+			fbptr += 3;
+		}
+	}
+	free_job(job);
+}
+
+
+#define MAX_JOB_POOL	128
+static struct render_job *job_pool;
+static int num_free_jobs;
+static pthread_mutex_t job_pool_lock = PTHREAD_MUTEX_INITIALIZER;
+
+static struct render_job *alloc_job(void)
+{
+	struct render_job *job;
+
+	pthread_mutex_lock(&job_pool_lock);
+	if(!job_pool) {
+		pthread_mutex_unlock(&job_pool_lock);
+		return malloc(sizeof *job);
+	}
+
+	job = job_pool;
+	job_pool = job->next;
+	num_free_jobs--;
+	pthread_mutex_unlock(&job_pool_lock);
+	return job;
+}
+
+static void free_job(struct render_job *job)
+{
+	pthread_mutex_lock(&job_pool_lock);
+	if(num_free_jobs >= MAX_JOB_POOL) {
+		pthread_mutex_unlock(&job_pool_lock);
+		free(job);
+		return;
+	}
+
+	job->next = job_pool;
+	job_pool = job;
+	num_free_jobs++;
+	pthread_mutex_unlock(&job_pool_lock);
+}
diff --git a/liberebus/src/erebus.h b/liberebus/src/erebus.h
index fcb097b..a2566d8 100644
--- a/liberebus/src/erebus.h
+++ b/liberebus/src/erebus.h
@@ -70,6 +70,13 @@ void erb_begin(struct erb_rend *erb);
 /* finalizes the frame, averaging samples (optional) */
 float *erb_end(struct erb_rend *erb);
 
+void erb_queue_frame(struct erb_rend *erb);
+void erb_queue_block(struct erb_rend *erb, int x, int y, int width, int height);
+void erb_wait(struct erb_rend *erb);
+
+void erb_primary_ray(struct erb_rend *erb, struct erb_ray *ray, int sample);
+void erb_sample_ray(struct erb_rend *erb, struct erb_ray *ray, float *col);
+
 /* transformation nodes */
 struct erb_node *erb_node(void);
 void erb_free_node(struct erb_node *n);
diff --git a/liberebus/src/tpool.c b/liberebus/src/tpool.c
new file mode 100644
index 0000000..7364bc6
--- /dev/null
+++ b/liberebus/src/tpool.c
@@ -0,0 +1,488 @@
+/* worker thread pool based on POSIX threads
+ * author: John Tsiombikas <nuclear@member.fsf.org>
+ * This code is public domain.
+ */
+#include <stdio.h>
+#include <stdlib.h>
+#include <errno.h>
+#include <pthread.h>
+#include "tpool.h"
+
+#if defined(__APPLE__) && defined(__MACH__)
+# ifndef __unix__
+#  define __unix__	1
+# endif	/* unix */
+# ifndef __bsd__
+#  define __bsd__	1
+# endif	/* bsd */
+#endif	/* apple */
+
+#if defined(unix) || defined(__unix__)
+#include <unistd.h>
+#include <sys/time.h>
+
+# ifdef __bsd__
+#  include <sys/sysctl.h>
+# endif
+#endif
+
+#if defined(WIN32) || defined(__WIN32__)
+#include <windows.h>
+#endif
+
+
+struct work_item {
+	void *data;
+	tpool_callback work, done;
+	struct work_item *next;
+};
+
+struct thread_data {
+	int id;
+	struct thread_pool *pool;
+};
+
+struct thread_pool {
+	pthread_t *threads;
+	struct thread_data *tdata;
+	int num_threads;
+	pthread_key_t idkey;
+
+	int qsize;
+	struct work_item *workq, *workq_tail;
+	pthread_mutex_t workq_mutex;
+	pthread_cond_t workq_condvar;
+
+	int nactive;	/* number of active workers (not sleeping) */
+
+	pthread_cond_t done_condvar;
+
+	int should_quit;
+	int in_batch;
+
+	int nref;	/* reference count */
+
+#if defined(WIN32) || defined(__WIN32__)
+	HANDLE wait_event;
+#else
+	int wait_pipe[2];
+#endif
+};
+
+static void *thread_func(void *args);
+static void send_done_event(struct thread_pool *tpool);
+
+static struct work_item *alloc_work_item(void);
+static void free_work_item(struct work_item *w);
+
+
+struct thread_pool *tpool_create(int num_threads)
+{
+	int i;
+	struct thread_pool *tpool;
+
+	if(!(tpool = calloc(1, sizeof *tpool))) {
+		return 0;
+	}
+	pthread_mutex_init(&tpool->workq_mutex, 0);
+	pthread_cond_init(&tpool->workq_condvar, 0);
+	pthread_cond_init(&tpool->done_condvar, 0);
+	pthread_key_create(&tpool->idkey, 0);
+
+	pthread_setspecific(tpool->idkey, (void*)0xffffffff);
+
+#if !defined(WIN32) && !defined(__WIN32__)
+	tpool->wait_pipe[0] = tpool->wait_pipe[1] = -1;
+#endif
+
+	if(num_threads <= 0) {
+		num_threads = tpool_num_processors();
+	}
+	tpool->num_threads = num_threads;
+
+	if(!(tpool->threads = calloc(num_threads, sizeof *tpool->threads))) {
+		free(tpool);
+		return 0;
+	}
+	if(!(tpool->tdata = malloc(num_threads * sizeof *tpool->tdata))) {
+		free(tpool->threads);
+		free(tpool);
+		return 0;
+	}
+
+	for(i=0; i<num_threads; i++) {
+		tpool->tdata[i].id = i;
+		tpool->tdata[i].pool = tpool;
+
+		if(pthread_create(tpool->threads + i, 0, thread_func, tpool->tdata + i) == -1) {
+			/*tpool->threads[i] = 0;*/
+			tpool_destroy(tpool);
+			return 0;
+		}
+	}
+	return tpool;
+}
+
+void tpool_destroy(struct thread_pool *tpool)
+{
+	int i;
+	if(!tpool) return;
+
+	tpool_clear(tpool);
+	tpool->should_quit = 1;
+
+	pthread_cond_broadcast(&tpool->workq_condvar);
+
+	if(tpool->threads) {
+		for(i=0; i<tpool->num_threads; i++) {
+			pthread_join(tpool->threads[i], 0);
+		}
+		putchar('\n');
+		free(tpool->threads);
+	}
+	free(tpool->tdata);
+
+	/* also wake up anyone waiting on the wait* calls */
+	tpool->nactive = 0;
+	pthread_cond_broadcast(&tpool->done_condvar);
+	send_done_event(tpool);
+
+	pthread_mutex_destroy(&tpool->workq_mutex);
+	pthread_cond_destroy(&tpool->workq_condvar);
+	pthread_cond_destroy(&tpool->done_condvar);
+	pthread_key_delete(tpool->idkey);
+
+#if defined(WIN32) || defined(__WIN32__)
+	if(tpool->wait_event) {
+		CloseHandle(tpool->wait_event);
+	}
+#else
+	if(tpool->wait_pipe[0] >= 0) {
+		close(tpool->wait_pipe[0]);
+		close(tpool->wait_pipe[1]);
+	}
+#endif
+}
+
+int tpool_addref(struct thread_pool *tpool)
+{
+	return ++tpool->nref;
+}
+
+int tpool_release(struct thread_pool *tpool)
+{
+	if(--tpool->nref <= 0) {
+		tpool_destroy(tpool);
+		return 0;
+	}
+	return tpool->nref;
+}
+
+void tpool_begin_batch(struct thread_pool *tpool)
+{
+	tpool->in_batch = 1;
+}
+
+void tpool_end_batch(struct thread_pool *tpool)
+{
+	tpool->in_batch = 0;
+	pthread_cond_broadcast(&tpool->workq_condvar);
+}
+
+int tpool_enqueue(struct thread_pool *tpool, void *data,
+		tpool_callback work_func, tpool_callback done_func)
+{
+	struct work_item *job;
+
+	if(!(job = alloc_work_item())) {
+		return -1;
+	}
+	job->work = work_func;
+	job->done = done_func;
+	job->data = data;
+	job->next = 0;
+
+	pthread_mutex_lock(&tpool->workq_mutex);
+	if(tpool->workq) {
+		tpool->workq_tail->next = job;
+		tpool->workq_tail = job;
+	} else {
+		tpool->workq = tpool->workq_tail = job;
+	}
+	++tpool->qsize;
+	pthread_mutex_unlock(&tpool->workq_mutex);
+
+	if(!tpool->in_batch) {
+		pthread_cond_broadcast(&tpool->workq_condvar);
+	}
+	return 0;
+}
+
+void tpool_clear(struct thread_pool *tpool)
+{
+	pthread_mutex_lock(&tpool->workq_mutex);
+	while(tpool->workq) {
+		void *tmp = tpool->workq;
+		tpool->workq = tpool->workq->next;
+		free(tmp);
+	}
+	tpool->workq = tpool->workq_tail = 0;
+	tpool->qsize = 0;
+	pthread_mutex_unlock(&tpool->workq_mutex);
+}
+
+int tpool_queued_jobs(struct thread_pool *tpool)
+{
+	int res;
+	pthread_mutex_lock(&tpool->workq_mutex);
+	res = tpool->qsize;
+	pthread_mutex_unlock(&tpool->workq_mutex);
+	return res;
+}
+
+int tpool_active_jobs(struct thread_pool *tpool)
+{
+	int res;
+	pthread_mutex_lock(&tpool->workq_mutex);
+	res = tpool->nactive;
+	pthread_mutex_unlock(&tpool->workq_mutex);
+	return res;
+}
+
+int tpool_pending_jobs(struct thread_pool *tpool)
+{
+	int res;
+	pthread_mutex_lock(&tpool->workq_mutex);
+	res = tpool->qsize + tpool->nactive;
+	pthread_mutex_unlock(&tpool->workq_mutex);
+	return res;
+}
+
+void tpool_wait(struct thread_pool *tpool)
+{
+	pthread_mutex_lock(&tpool->workq_mutex);
+	while(tpool->nactive || tpool->qsize) {
+		pthread_cond_wait(&tpool->done_condvar, &tpool->workq_mutex);
+	}
+	pthread_mutex_unlock(&tpool->workq_mutex);
+}
+
+void tpool_wait_pending(struct thread_pool *tpool, int pending_target)
+{
+	pthread_mutex_lock(&tpool->workq_mutex);
+	while(tpool->qsize + tpool->nactive > pending_target) {
+		pthread_cond_wait(&tpool->done_condvar, &tpool->workq_mutex);
+	}
+	pthread_mutex_unlock(&tpool->workq_mutex);
+}
+
+#if defined(WIN32) || defined(__WIN32__)
+long tpool_timedwait(struct thread_pool *tpool, long timeout)
+{
+	fprintf(stderr, "tpool_timedwait currently unimplemented on windows\n");
+	abort();
+	return 0;
+}
+
+/* TODO: actually does this work with MinGW ? */
+int tpool_get_wait_fd(struct thread_pool *tpool)
+{
+	static int once;
+	if(!once) {
+		once = 1;
+		fprintf(stderr, "warning: tpool_get_wait_fd call on Windows does nothing\n");
+	}
+	return 0;
+}
+
+void *tpool_get_wait_handle(struct thread_pool *tpool)
+{
+	if(!tpool->wait_event) {
+		if(!(tpool->wait_event = CreateEvent(0, 0, 0, 0))) {
+			return 0;
+		}
+	}
+	return tpool->wait_event;
+}
+
+static void send_done_event(struct thread_pool *tpool)
+{
+	if(tpool->wait_event) {
+		SetEvent(tpool->wait_event);
+	}
+}
+
+#else		/* UNIX */
+
+long tpool_timedwait(struct thread_pool *tpool, long timeout)
+{
+	struct timespec tout_ts;
+	struct timeval tv0, tv;
+	gettimeofday(&tv0, 0);
+
+	long sec = timeout / 1000;
+	tout_ts.tv_nsec = tv0.tv_usec * 1000 + (timeout % 1000) * 1000000;
+	tout_ts.tv_sec = tv0.tv_sec + sec;
+
+	pthread_mutex_lock(&tpool->workq_mutex);
+	while(tpool->nactive || tpool->qsize) {
+		if(pthread_cond_timedwait(&tpool->done_condvar,
+					&tpool->workq_mutex, &tout_ts) == ETIMEDOUT) {
+			break;
+		}
+	}
+	pthread_mutex_unlock(&tpool->workq_mutex);
+
+	gettimeofday(&tv, 0);
+	return (tv.tv_sec - tv0.tv_sec) * 1000 + (tv.tv_usec - tv0.tv_usec) / 1000;
+}
+
+int tpool_get_wait_fd(struct thread_pool *tpool)
+{
+	if(tpool->wait_pipe[0] < 0) {
+		if(pipe(tpool->wait_pipe) == -1) {
+			return -1;
+		}
+	}
+	return tpool->wait_pipe[0];
+}
+
+void *tpool_get_wait_handle(struct thread_pool *tpool)
+{
+	static int once;
+	if(!once) {
+		once = 1;
+		fprintf(stderr, "warning: tpool_get_wait_handle call on UNIX does nothing\n");
+	}
+	return 0;
+}
+
+static void send_done_event(struct thread_pool *tpool)
+{
+	if(tpool->wait_pipe[1] >= 0) {
+		write(tpool->wait_pipe[1], tpool, 1);
+	}
+}
+#endif	/* WIN32/UNIX */
+
+static void *thread_func(void *args)
+{
+	struct thread_data *tdata = args;
+	struct thread_pool *tpool = tdata->pool;
+
+	pthread_setspecific(tpool->idkey, (void*)(intptr_t)tdata->id);
+
+	pthread_mutex_lock(&tpool->workq_mutex);
+	while(!tpool->should_quit) {
+		if(!tpool->workq) {
+			pthread_cond_wait(&tpool->workq_condvar, &tpool->workq_mutex);
+			if(tpool->should_quit) break;
+		}
+
+		while(!tpool->should_quit && tpool->workq) {
+			/* grab the first job */
+			struct work_item *job = tpool->workq;
+			tpool->workq = tpool->workq->next;
+			if(!tpool->workq)
+				tpool->workq_tail = 0;
+			++tpool->nactive;
+			--tpool->qsize;
+			pthread_mutex_unlock(&tpool->workq_mutex);
+
+			/* do the job */
+			job->work(job->data);
+			if(job->done) {
+				job->done(job->data);
+			}
+			free_work_item(job);
+
+			pthread_mutex_lock(&tpool->workq_mutex);
+			/* notify everyone interested that we're done with this job */
+			pthread_cond_broadcast(&tpool->done_condvar);
+			send_done_event(tpool);
+			--tpool->nactive;
+		}
+	}
+	pthread_mutex_unlock(&tpool->workq_mutex);
+
+	return 0;
+}
+
+
+int tpool_thread_id(struct thread_pool *tpool)
+{
+	int id = (intptr_t)pthread_getspecific(tpool->idkey);
+	if(id >= tpool->num_threads) {
+		return -1;
+	}
+	return id;
+}
+
+
+/* The following highly platform-specific code detects the number
+ * of processors available in the system. It's used by the thread pool
+ * to autodetect how many threads to spawn.
+ * Currently works on: Linux, BSD, Darwin, and Windows.
+ */
+int tpool_num_processors(void)
+{
+#if defined(unix) || defined(__unix__)
+# if defined(__bsd__)
+	/* BSD systems provide the num.processors through sysctl */
+	int num, mib[] = {CTL_HW, HW_NCPU};
+	size_t len = sizeof num;
+
+	sysctl(mib, 2, &num, &len, 0, 0);
+	return num;
+
+# elif defined(__sgi)
+	/* SGI IRIX flavour of the _SC_NPROC_ONLN sysconf */
+	return sysconf(_SC_NPROC_ONLN);
+# else
+	/* Linux (and others?) have the _SC_NPROCESSORS_ONLN sysconf */
+	return sysconf(_SC_NPROCESSORS_ONLN);
+# endif	/* bsd/sgi/other */
+
+#elif defined(WIN32) || defined(__WIN32__)
+	/* under windows we need to call GetSystemInfo */
+	SYSTEM_INFO info;
+	GetSystemInfo(&info);
+	return info.dwNumberOfProcessors;
+#endif
+}
+
+#define MAX_WPOOL_SIZE	64
+static pthread_mutex_t wpool_lock = PTHREAD_MUTEX_INITIALIZER;
+static struct work_item *wpool;
+static int wpool_size;
+
+/* work item allocator */
+static struct work_item *alloc_work_item(void)
+{
+	struct work_item *w;
+
+	pthread_mutex_lock(&wpool_lock);
+	if(!wpool) {
+		pthread_mutex_unlock(&wpool_lock);
+		return malloc(sizeof(struct work_item));
+	}
+
+	w = wpool;
+	wpool = wpool->next;
+	--wpool_size;
+	pthread_mutex_unlock(&wpool_lock);
+	return w;
+}
+
+static void free_work_item(struct work_item *w)
+{
+	pthread_mutex_lock(&wpool_lock);
+	if(wpool_size >= MAX_WPOOL_SIZE) {
+		free(w);
+	} else {
+		w->next = wpool;
+		wpool = w;
+		++wpool_size;
+	}
+	pthread_mutex_unlock(&wpool_lock);
+}
diff --git a/liberebus/src/tpool.h b/liberebus/src/tpool.h
new file mode 100644
index 0000000..7f763c2
--- /dev/null
+++ b/liberebus/src/tpool.h
@@ -0,0 +1,86 @@
+/* worker thread pool based on POSIX threads
+ * author: John Tsiombikas <nuclear@member.fsf.org>
+ * This code is public domain.
+ */
+#ifndef THREADPOOL_H_
+#define THREADPOOL_H_
+
+struct thread_pool;
+
+/* type of the function accepted as work or completion callback */
+typedef void (*tpool_callback)(void*);
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/* if num_threads == 0, auto-detect how many threads to spawn */
+struct thread_pool *tpool_create(int num_threads);
+void tpool_destroy(struct thread_pool *tpool);
+
+/* optional reference counting interface for thread pool sharing */
+int tpool_addref(struct thread_pool *tpool);
+int tpool_release(struct thread_pool *tpool);	/* will tpool_destroy on nref 0 */
+
+/* if begin_batch is called before an enqueue, the worker threads will not be
+ * signalled to start working until end_batch is called.
+ */
+void tpool_begin_batch(struct thread_pool *tpool);
+void tpool_end_batch(struct thread_pool *tpool);
+
+/* if enqueue is called without calling begin_batch first, it will immediately
+ * wake up the worker threads to start working on the enqueued item
+ */
+int tpool_enqueue(struct thread_pool *tpool, void *data,
+		tpool_callback work_func, tpool_callback done_func);
+/* clear the work queue. does not cancel any currently running jobs */
+void tpool_clear(struct thread_pool *tpool);
+
+/* returns the number of queued work items */
+int tpool_queued_jobs(struct thread_pool *tpool);
+/* returns the number of active (working) threads */
+int tpool_active_jobs(struct thread_pool *tpool);
+/* returns the number of pending jobs, both in queue and active */
+int tpool_pending_jobs(struct thread_pool *tpool);
+
+/* wait for all pending jobs to be completed */
+void tpool_wait(struct thread_pool *tpool);
+/* wait until the pending jobs are down to the target specified
+ * for example, to wait until a single job has been completed:
+ *   tpool_wait_pending(tpool, tpool_pending_jobs(tpool) - 1);
+ * this interface is slightly awkward to avoid race conditions. */
+void tpool_wait_pending(struct thread_pool *tpool, int pending_target);
+/* wait for all pending jobs to be completed for up to "timeout" milliseconds */
+long tpool_timedwait(struct thread_pool *tpool, long timeout);
+
+/* return a file descriptor which can be used to wait for pending job
+ * completion events. A single char is written every time a job completes.
+ * You should empty the pipe every time you receive such an event.
+ *
+ * This is a UNIX-specific call. On windows it does nothing.
+ */
+int tpool_get_wait_fd(struct thread_pool *tpool);
+
+/* return an auto-resetting Event HANDLE which can be used to wait for
+ * pending job completion events.
+ *
+ * This is a Win32-specific call. On UNIX it does nothing.
+ */
+void *tpool_get_wait_handle(struct thread_pool *tpool);
+
+/* When called by a work/done callback, it returns the thread number executing
+ * it. From the main thread it returns -1.
+ */
+int tpool_thread_id(struct thread_pool *tpool);
+
+
+/* returns the number of processors on the system.
+ * individual cores in multi-core processors are counted as processors.
+ */
+int tpool_num_processors(void);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif	/* THREADPOOL_H_ */