obj = $(src:.c=.o)
bin = fbgfx
-CFLAGS = -pedantic -Wall -g
-LDFLAGS = -limago -lm
+CFLAGS = -pedantic -Wall -g -O3
+LDFLAGS = -limago -lm -lpthread
$(bin): $(obj)
$(CC) -o $@ $(obj) $(LDFLAGS)
#include "fbgfx.h"
#include "fbevents.h"
#include "tunnel.h"
+#include "timer.h"
+
+unsigned long start_msec, time_msec, num_frames;
static void keyboard(int key, int pressed, void *cls);
static void mouse(int bn, int pressed, int x, int y, void *cls);
int main(void)
{
fbgfx_save_video_mode();
- if(!(vmem = fbgfx_set_video_mode(800, 600, 16))) {
+ fbgfx_get_video_mode(&xsz, &ysz, &depth);
+
+ if(!(vmem = fbgfx_set_video_mode(xsz, ysz, 16))) {
return 1;
}
fbgfx_get_video_mode(&xsz, &ysz, &depth);
if(depth != 16) {
+ fprintf(stderr, "failed to set color depth: 16bpp\n");
goto end;
}
if(fbev_init() == -1) {
goto end;
}
+ start_msec = get_time_msec();
for(;;) {
fbev_update();
if(quit) break;
+ time_msec = get_time_msec() - start_msec;
+
draw_tunnel(vmem);
+ ++num_frames;
}
+ time_msec = get_time_msec() - start_msec;
end:
destroy_tunnel();
fbev_shutdown();
fbgfx_restore_video_mode();
+ if(num_frames && time_msec) {
+ printf("\ravg framerate: %.1f\n", (float)num_frames / ((float)time_msec / 1000.0));
+ }
return 0;
}
case 27:
case 'q':
case 'Q':
- exit(0);
+ quit = 1;
+ break;
}
}
--- /dev/null
+#include "timer.h"
+
+#if defined(__APPLE__) && !defined(__unix__)
+#define __unix__
+#endif
+
+#ifdef __unix__
+#include <time.h>
+#include <unistd.h>
+#include <sys/time.h>
+
+#ifdef CLOCK_MONOTONIC
+unsigned long get_time_msec(void)
+{
+ struct timespec ts;
+ static struct timespec ts0;
+
+ clock_gettime(CLOCK_MONOTONIC, &ts);
+ if(ts0.tv_sec == 0 && ts0.tv_nsec == 0) {
+ ts0 = ts;
+ return 0;
+ }
+ return (ts.tv_sec - ts0.tv_sec) * 1000 + (ts.tv_nsec - ts0.tv_nsec) / 1000000;
+}
+#else /* no fancy POSIX clocks, fallback to good'ol gettimeofday */
+unsigned long get_time_msec(void)
+{
+ struct timeval tv;
+ static struct timeval tv0;
+
+ gettimeofday(&tv, 0);
+ if(tv0.tv_sec == 0 && tv0.tv_usec == 0) {
+ tv0 = tv;
+ return 0;
+ }
+ return (tv.tv_sec - tv0.tv_sec) * 1000 + (tv.tv_usec - tv0.tv_usec) / 1000;
+}
+#endif /* !posix clock */
+
+void sleep_msec(unsigned long msec)
+{
+ usleep(msec * 1000);
+}
+#endif
+
+#ifdef WIN32
+#include <windows.h>
+#pragma comment(lib, "winmm.lib")
+
+unsigned long get_time_msec(void)
+{
+ return timeGetTime();
+}
+
+void sleep_msec(unsigned long msec)
+{
+ Sleep(msec);
+}
+#endif
+
+double get_time_sec(void)
+{
+ return get_time_msec() / 1000.0f;
+}
+
+void sleep_sec(double sec)
+{
+ if(sec > 0.0f) {
+ sleep_msec(sec * 1000.0f);
+ }
+}
--- /dev/null
+#ifndef TIMER_H_
+#define TIMER_H_
+
+unsigned long get_time_msec(void);
+void sleep_msec(unsigned long msec);
+
+double get_time_sec(void);
+void sleep_sec(double sec);
+
+#endif /* TIMER_H_ */
--- /dev/null
+/* worker thread pool based on POSIX threads
+ * author: John Tsiombikas <nuclear@member.fsf.org>
+ * This code is public domain.
+ */
+#include <stdio.h>
+#include <stdlib.h>
+#include <errno.h>
+#include <unistd.h>
+#include <sys/time.h>
+#include <pthread.h>
+#include "tpool.h"
+
+struct work_item {
+ void *data;
+ tpool_callback work, done;
+ struct work_item *next;
+};
+
+struct thread_pool {
+ pthread_t *threads;
+ int num_threads;
+
+ int qsize;
+ struct work_item *workq, *workq_tail;
+ pthread_mutex_t workq_mutex;
+ pthread_cond_t workq_condvar;
+
+ int nactive; /* number of active workers (not sleeping) */
+
+ pthread_cond_t done_condvar;
+
+ int should_quit;
+ int in_batch;
+};
+
+static void *thread_func(void *args);
+
+struct thread_pool *tpool_create(int num_threads)
+{
+ int i;
+ struct thread_pool *tpool;
+
+ if(!(tpool = calloc(1, sizeof *tpool))) {
+ return 0;
+ }
+ pthread_mutex_init(&tpool->workq_mutex, 0);
+ pthread_cond_init(&tpool->workq_condvar, 0);
+ pthread_cond_init(&tpool->done_condvar, 0);
+
+ if(num_threads <= 0) {
+ num_threads = tpool_num_processors();
+ }
+ tpool->num_threads = num_threads;
+
+ if(!(tpool->threads = calloc(num_threads, sizeof *tpool->threads))) {
+ free(tpool);
+ return 0;
+ }
+ for(i=0; i<num_threads; i++) {
+ if(pthread_create(tpool->threads + i, 0, thread_func, tpool) == -1) {
+ tpool->threads[i] = 0;
+ tpool_destroy(tpool);
+ return 0;
+ }
+ }
+ return tpool;
+}
+
+void tpool_destroy(struct thread_pool *tpool)
+{
+ int i;
+ if(!tpool) return;
+
+ tpool_clear(tpool);
+ tpool->should_quit = 1;
+
+ pthread_cond_broadcast(&tpool->workq_condvar);
+
+ if(tpool->threads) {
+ printf("thread_pool: waiting for %d worker threads to stop ", tpool->num_threads);
+ fflush(stdout);
+
+ for(i=0; i<tpool->num_threads; i++) {
+ pthread_join(tpool->threads[i], 0);
+ putchar('.');
+ fflush(stdout);
+ }
+ putchar('\n');
+ free(tpool->threads);
+ }
+
+ pthread_mutex_destroy(&tpool->workq_mutex);
+ pthread_cond_destroy(&tpool->workq_condvar);
+ pthread_cond_destroy(&tpool->done_condvar);
+}
+
+void tpool_begin_batch(struct thread_pool *tpool)
+{
+ tpool->in_batch = 1;
+}
+
+void tpool_end_batch(struct thread_pool *tpool)
+{
+ tpool->in_batch = 0;
+ pthread_cond_broadcast(&tpool->workq_condvar);
+}
+
+int tpool_enqueue(struct thread_pool *tpool, void *data,
+ tpool_callback work_func, tpool_callback done_func)
+{
+ struct work_item *job;
+
+ if(!(job = malloc(sizeof *job))) {
+ return -1;
+ }
+ job->work = work_func;
+ job->done = done_func;
+ job->data = data;
+ job->next = 0;
+
+ pthread_mutex_lock(&tpool->workq_mutex);
+ if(tpool->workq) {
+ tpool->workq_tail->next = job;
+ tpool->workq_tail = job;
+ } else {
+ tpool->workq = tpool->workq_tail = job;
+ }
+ ++tpool->qsize;
+ pthread_mutex_unlock(&tpool->workq_mutex);
+
+ if(!tpool->in_batch) {
+ pthread_cond_broadcast(&tpool->workq_condvar);
+ }
+ return 0;
+}
+
+void tpool_clear(struct thread_pool *tpool)
+{
+ pthread_mutex_lock(&tpool->workq_mutex);
+ while(tpool->workq) {
+ void *tmp = tpool->workq;
+ tpool->workq = tpool->workq->next;
+ free(tmp);
+ }
+ tpool->workq = tpool->workq_tail = 0;
+ tpool->qsize = 0;
+ pthread_mutex_unlock(&tpool->workq_mutex);
+}
+
+int tpool_queued_jobs(struct thread_pool *tpool)
+{
+ int res;
+ pthread_mutex_lock(&tpool->workq_mutex);
+ res = tpool->qsize;
+ pthread_mutex_unlock(&tpool->workq_mutex);
+ return res;
+}
+
+int tpool_active_jobs(struct thread_pool *tpool)
+{
+ int res;
+ pthread_mutex_lock(&tpool->workq_mutex);
+ res = tpool->nactive;
+ pthread_mutex_unlock(&tpool->workq_mutex);
+ return res;
+}
+
+int tpool_pending_jobs(struct thread_pool *tpool)
+{
+ int res;
+ pthread_mutex_lock(&tpool->workq_mutex);
+ res = tpool->qsize + tpool->nactive;
+ pthread_mutex_unlock(&tpool->workq_mutex);
+ return res;
+}
+
+void tpool_wait(struct thread_pool *tpool)
+{
+ pthread_mutex_lock(&tpool->workq_mutex);
+ while(tpool->nactive || tpool->qsize) {
+ pthread_cond_wait(&tpool->done_condvar, &tpool->workq_mutex);
+ }
+ pthread_mutex_unlock(&tpool->workq_mutex);
+}
+
+void tpool_wait_one(struct thread_pool *tpool)
+{
+ int cur_pending;
+ pthread_mutex_lock(&tpool->workq_mutex);
+ cur_pending = tpool->qsize + tpool->nactive;
+ if(cur_pending) {
+ while(tpool->qsize + tpool->nactive >= cur_pending) {
+ pthread_cond_wait(&tpool->done_condvar, &tpool->workq_mutex);
+ }
+ }
+ pthread_mutex_unlock(&tpool->workq_mutex);
+}
+
+long tpool_timedwait(struct thread_pool *tpool, long timeout)
+{
+ struct timespec tout_ts;
+ struct timeval tv0, tv;
+ gettimeofday(&tv0, 0);
+
+ long sec = timeout / 1000;
+ tout_ts.tv_nsec = tv0.tv_usec * 1000 + (timeout % 1000) * 1000000;
+ tout_ts.tv_sec = tv0.tv_sec + sec;
+
+ pthread_mutex_lock(&tpool->workq_mutex);
+ while(tpool->nactive || tpool->qsize) {
+ if(pthread_cond_timedwait(&tpool->done_condvar,
+ &tpool->workq_mutex, &tout_ts) == ETIMEDOUT) {
+ break;
+ }
+ }
+ pthread_mutex_unlock(&tpool->workq_mutex);
+
+ gettimeofday(&tv, 0);
+ return (tv.tv_sec - tv0.tv_sec) * 1000 + (tv.tv_usec - tv0.tv_usec) / 1000;
+}
+
+static void *thread_func(void *args)
+{
+ struct thread_pool *tpool = args;
+
+ pthread_mutex_lock(&tpool->workq_mutex);
+ while(!tpool->should_quit) {
+ pthread_cond_wait(&tpool->workq_condvar, &tpool->workq_mutex);
+
+ while(!tpool->should_quit && tpool->workq) {
+ /* grab the first job */
+ struct work_item *job = tpool->workq;
+ tpool->workq = tpool->workq->next;
+ if(!tpool->workq)
+ tpool->workq_tail = 0;
+ ++tpool->nactive;
+ --tpool->qsize;
+ pthread_mutex_unlock(&tpool->workq_mutex);
+
+ /* do the job */
+ job->work(job->data);
+ if(job->done) {
+ job->done(job->data);
+ }
+
+ pthread_mutex_lock(&tpool->workq_mutex);
+ /* notify everyone interested that we're done with this job */
+ pthread_cond_broadcast(&tpool->done_condvar);
+ --tpool->nactive;
+ }
+ }
+ pthread_mutex_unlock(&tpool->workq_mutex);
+
+ return 0;
+}
+
+
+/* The following highly platform-specific code detects the number
+ * of processors available in the system. It's used by the thread pool
+ * to autodetect how many threads to spawn.
+ * Currently works on: Linux, BSD, Darwin, and Windows.
+ */
+
+#if defined(__APPLE__) && defined(__MACH__)
+# ifndef __unix__
+# define __unix__ 1
+# endif /* unix */
+# ifndef __bsd__
+# define __bsd__ 1
+# endif /* bsd */
+#endif /* apple */
+
+#if defined(unix) || defined(__unix__)
+#include <unistd.h>
+
+# ifdef __bsd__
+# include <sys/sysctl.h>
+# endif
+#endif
+
+#if defined(WIN32) || defined(__WIN32__)
+#include <windows.h>
+#endif
+
+
+int tpool_num_processors(void)
+{
+#if defined(unix) || defined(__unix__)
+# if defined(__bsd__)
+ /* BSD systems provide the num.processors through sysctl */
+ int num, mib[] = {CTL_HW, HW_NCPU};
+ size_t len = sizeof num;
+
+ sysctl(mib, 2, &num, &len, 0, 0);
+ return num;
+
+# elif defined(__sgi)
+ /* SGI IRIX flavour of the _SC_NPROC_ONLN sysconf */
+ return sysconf(_SC_NPROC_ONLN);
+# else
+ /* Linux (and others?) have the _SC_NPROCESSORS_ONLN sysconf */
+ return sysconf(_SC_NPROCESSORS_ONLN);
+# endif /* bsd/sgi/other */
+
+#elif defined(WIN32) || defined(__WIN32__)
+ /* under windows we need to call GetSystemInfo */
+ SYSTEM_INFO info;
+ GetSystemInfo(&info);
+ return info.dwNumberOfProcessors;
+#endif
+}
--- /dev/null
+/* worker thread pool based on POSIX threads
+ * author: John Tsiombikas <nuclear@member.fsf.org>
+ * This code is public domain.
+ */
+#ifndef THREADPOOL_H_
+#define THREADPOOL_H_
+
+struct thread_pool;
+
+/* type of the function accepted as work or completion callback */
+typedef void (*tpool_callback)(void*);
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/* if num_threads == 0, auto-detect how many threads to spawn */
+struct thread_pool *tpool_create(int num_threads);
+void tpool_destroy(struct thread_pool *tpool);
+
+/* if begin_batch is called before an enqueue, the worker threads will not be
+ * signalled to start working until end_batch is called.
+ */
+void tpool_begin_batch(struct thread_pool *tpool);
+void tpool_end_batch(struct thread_pool *tpool);
+
+/* if enqueue is called without calling begin_batch first, it will immediately
+ * wake up the worker threads to start working on the enqueued item
+ */
+int tpool_enqueue(struct thread_pool *tpool, void *data,
+ tpool_callback work_func, tpool_callback done_func);
+/* clear the work queue. does not cancel any currently running jobs */
+void tpool_clear(struct thread_pool *tpool);
+
+/* returns the number of queued work items */
+int tpool_queued_jobs(struct thread_pool *tpool);
+/* returns the number of active (working) threads */
+int tpool_active_jobs(struct thread_pool *tpool);
+/* returns the number of pending jobs, both in queue and active */
+int tpool_pending_jobs(struct thread_pool *tpool);
+
+/* wait for all pending jobs to be completed */
+void tpool_wait(struct thread_pool *tpool);
+/* wait until the pending jobs are down to the target specified
+ * for example, to wait until a single job has been completed:
+ * tpool_wait_pending(tpool, tpool_pending_jobs(tpool) - 1);
+ * this interface is slightly awkward to avoid race conditions. */
+void tpool_wait_pending(struct thread_pool *tpool, int pending_target);
+/* wait for all pending jobs to be completed for up to "timeout" milliseconds */
+long tpool_timedwait(struct thread_pool *tpool, long timeout);
+
+/* returns the number of processors on the system.
+ * individual cores in multi-core processors are counted as processors.
+ */
+int tpool_num_processors(void);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* THREADPOOL_H_ */
#include <stdlib.h>
#include <math.h>
#include <imago2.h>
+#include "tpool.h"
#include "tunnel.h"
-static int xsz, ysz;
+#define TEX_FNAME "data/grid.png"
+#define TEX_USCALE 4
+#define TEX_VSCALE 2
+
+#define USCALE 2
+#define VSCALE 1
+
+extern unsigned long time_msec;
+
+static void draw_tunnel_range(unsigned short *pixels, int starty, int num_lines);
+static int count_bits(unsigned int x);
+static int count_zeros(unsigned int x);
+
+static int xsz, ysz, vxsz, vysz;
static unsigned int *tunnel_map;
+static unsigned char *tunnel_fog;
+
+static int tex_xsz, tex_ysz;
+static unsigned int *tex_pixels;
+static int tex_xshift, tex_yshift;
+static unsigned int tex_xmask, tex_ymask;
+
+static struct thread_pool *tpool;
int init_tunnel(int x, int y)
{
- int i, j;
+ int i, j, n;
unsigned int *tmap;
+ unsigned char *fog;
+ float aspect = (float)x / (float)y;
xsz = x;
ysz = y;
+ vxsz = xsz / USCALE;
+ vysz = ysz / VSCALE;
- printf("precalculating tunnel map...\n");
-
- if(!(tunnel_map = malloc(xsz * ysz * sizeof *tunnel_map))) {
+ if(!(tunnel_map = malloc(vxsz * vysz * sizeof *tunnel_map))) {
fprintf(stderr, "failed to allocate tunnel map\n");
return -1;
}
+ if(!(tunnel_fog = malloc(vxsz * vysz))) {
+ fprintf(stderr, "failed to allocate tunnel fog map\n");
+ return -1;
+ }
+
tmap = tunnel_map;
+ fog = tunnel_fog;
- for(i=0; i<ysz; i++) {
- float y = 2.0 * (float)i / (float)ysz - 0.5;
- for(j=0; j<xsz; j++) {
- float x = 2.0 * (float)j / (float)xsz - 0.5;
+ for(i=0; i<vysz; i++) {
+ float y = 2.0 * (float)i / (float)vysz - 1.0;
+ for(j=0; j<vxsz; j++) {
+ float x = aspect * (2.0 * (float)j / (float)vxsz - 1.0);
float tu = atan2(y, x) / M_PI * 0.5 + 0.5;
- float tv = sqrt(x*x + y*y);
+ float d = sqrt(x * x + y * y);
+ float tv = d == 0.0 ? 0.0 : 1.0 / d;
+
+ int tx = (int)(tu * 65535.0 * TEX_USCALE) & 0xffff;
+ int ty = (int)(tv * 65535.0 * TEX_VSCALE) & 0xffff;
- int tx = (int)(tu * 65535.0) & 0xffff;
- int ty = (int)(tv * 65535.0) & 0xffff;
+ int f = (int)(d * 95.0);
*tmap++ = (tx << 16) | ty;
+ *fog++ = f > 255 ? 255 : f;
}
}
+ if(!(tex_pixels = img_load_pixels(TEX_FNAME, &tex_xsz, &tex_ysz, IMG_FMT_RGBA32))) {
+ fprintf(stderr, "failed to load image " TEX_FNAME "\n");
+ return -1;
+ }
+ if((count_bits(tex_xsz) | count_bits(tex_ysz)) != 1) {
+ fprintf(stderr, "non-pow2 image (%dx%d)\n", tex_xsz, tex_ysz);
+ return -1;
+ }
+
+ n = count_zeros(tex_xsz);
+ for(i=0; i<n; i++) {
+ tex_xmask |= 1 << i;
+ }
+ tex_xshift = n;
+
+ n = count_zeros(tex_ysz);
+ for(i=0; i<n; i++) {
+ tex_ymask |= 1 << i;
+ }
+ tex_yshift = n;
+
+ if(!(tpool = tpool_create(0))) {
+ fprintf(stderr, "failed to create thread pool\n");
+ return -1;
+ }
+
return 0;
}
void destroy_tunnel(void)
{
+ tpool_destroy(tpool);
free(tunnel_map);
+ free(tunnel_fog);
+}
+
+#define NUM_WORK_ITEMS 32
+
+static struct work {
+ unsigned short *pixels;
+ int starty, num_lines;
+} work[NUM_WORK_ITEMS];
+
+static void work_func(void *cls)
+{
+ struct work *w = (struct work*)cls;
+ draw_tunnel_range(w->pixels, w->starty, w->num_lines);
}
void draw_tunnel(unsigned short *pixels)
{
- int i, j, r, g, b;
- unsigned int *tmap = tunnel_map;
+ int i, num_lines = vysz / NUM_WORK_ITEMS;
+ for(i=0; i<NUM_WORK_ITEMS; i++) {
+ work[i].pixels = pixels;
+ work[i].starty = i * num_lines;
+ work[i].num_lines = num_lines;
+
+ tpool_enqueue(tpool, work + i, work_func, 0);
+ }
+ tpool_wait(tpool);
+}
+
+#define PACK_RGB16(r, g, b) \
+ (((((r) >> 3) & 0x1f) << 11) | ((((g) >> 2) & 0x3f) << 5) | ((b) & 0x1f))
+
+static void draw_tunnel_range(unsigned short *pixels, int starty, int num_lines)
+{
+ int i, j, k, r, g, b;
+ unsigned int *tmap = tunnel_map + starty * vxsz;
+ unsigned char *fog = tunnel_fog + starty * vxsz;
+
+ long toffs = time_msec / 4;
+ pixels += starty * xsz * VSCALE;
- for(i=0; i<ysz; i++) {
- for(j=0; j<xsz; j++) {
- unsigned int tx = (*tmap >> 16) & 0xffff;
- unsigned int ty = *tmap & 0xffff;
+ for(i=0; i<num_lines; i++) {
+ for(j=0; j<vxsz; j++) {
+ unsigned short *ptr;
+ unsigned int col;
+ unsigned int tx = (((*tmap >> 16) & 0xffff) << tex_xshift) >> 16;
+ unsigned int ty = ((*tmap & 0xffff) << tex_yshift) >> 16;
++tmap;
- r = tx >> 8;
- g = ty >> 8;
+ tx += toffs;
+ ty += toffs << 1;
+
+ tx &= tex_xmask;
+ ty &= tex_ymask;
- *pixels++ = ((((r >> 3) & 0x1f) << 11) |
- (((g >> 2) & 0x3f) << 5));/* |
- ((b >> 3) & 0x1f));*/
+ col = tex_pixels[(ty << tex_xshift) + tx];
+ r = col & 0xff;
+ g = (col >> 8) & 0xff;
+ b = (col >> 16) & 0xff;
+
+ r = (r * *fog) >> 8;
+ g = (g * *fog) >> 8;
+ b = (b * *fog) >> 8;
+ ++fog;
+
+ col = ((((r >> 3) & 0x1f) << 11) | (((g >> 2) & 0x3f) << 5) | ((b >> 3) & 0x1f));
+
+ ptr = pixels;
+ for(k=0; k<VSCALE; k++) {
+ switch(USCALE) {
+ case 4:
+ ptr[3] = col;
+ case 3:
+ ptr[2] = col;
+ case 2:
+ ptr[1] = col;
+ case 1:
+ *ptr = col;
+ }
+ ptr += xsz;
+ }
+ pixels += USCALE;
}
+ pixels += xsz * (VSCALE - 1);
+ }
+}
+
+static int count_bits(unsigned int x)
+{
+ int i, nbits = 0;
+ for(i=0; i<32; i++) {
+ if(x & 1) ++nbits;
+ x >>= 1;
+ }
+ return nbits;
+}
+
+static int count_zeros(unsigned int x)
+{
+ int i, num = 0;
+ for(i=0; i<32; i++) {
+ if(x & 1) break;
+ ++num;
+ x >>= 1;
}
+ return num;
}