From 03eaea2400647375400c3bc4d927c399e142a901 Mon Sep 17 00:00:00 2001 From: John Tsiombikas Date: Tue, 10 Mar 2020 00:42:32 +0200 Subject: [PATCH] MMX --- Makefile | 23 +++++++++------- libs/imago/src/conv.c | 37 ++++++++++++++++++++++++++ libs/imago/src/imago2.c | 1 + libs/imago/src/imago2.h | 1 + src/dos/gfx.c | 6 +++-- src/dos/main.c | 1 - src/game.c | 2 +- src/game.h | 2 +- src/intro_s.asm | 68 +++++++++++++++++++++++++++++++++++++++++++++++ src/introscr.c | 29 +++++++++++++------- src/sdl/main.c | 2 +- src/util.h | 23 ++++++++++++++++ 12 files changed, 170 insertions(+), 25 deletions(-) create mode 100644 src/intro_s.asm diff --git a/Makefile b/Makefile index 9ff0e58..8f198e5 100644 --- a/Makefile +++ b/Makefile @@ -1,22 +1,27 @@ !ifdef __UNIX__ dosobj = src/dos/main.obj src/dos/gfx.obj src/dos/timer.obj src/dos/watdpmi.obj & - src/dos/vbe.obj src/dos/vga.obj src/dos/keyb.obj src/dos/mouse.obj & - src/dos/logger.obj -scrobj = src/introscr.obj src/menuscr.obj -gameobj = src/game.obj src/util.obj src/gfxutil.obj src/dynarr.obj src/rbtree.obj + src/dos/vbe.obj src/dos/vga.obj src/dos/keyb.obj src/dos/mouse.obj & + src/dos/logger.obj +scrobj = src/introscr.obj src/intro_s.obj src/menuscr.obj +gameobj = src/game.obj src/util.obj src/gfxutil.obj src/dynarr.obj & + src/rbtree.obj gfxobj = src/3dgfx/3dgfx.obj src/3dgfx/mesh.obj src/3dgfx/meshload.obj & - src/3dgfx/polyfill.obj src/3dgfx/polyclip.obj src/sprite.obj + src/3dgfx/polyfill.obj src/3dgfx/polyclip.obj src/sprite.obj + incpath = -Isrc -Isrc/dos -Ilibs/imago/src libpath = libpath libs/imago + !else -dosobj = src\dos\main.obj src\dos\gfx.obj src\dos\timer.obj & - src\dos\watdpmi.obj src\dos\vbe.obj src\dos\vga.obj src\dos\keyb.obj & - src\dos\mouse.obj src\dos\logger.obj -scrobj = src\introscr.obj src\menuscr.obj + +dosobj = src\dos\main.obj src\dos\gfx.obj src\dos\timer.obj src\dos\watdpmi.obj & + src\dos\vbe.obj src\dos\vga.obj src\dos\keyb.obj src\dos\mouse.obj & + src\dos\logger.obj +scrobj = src\introscr.obj src\intro_s.obj src\menuscr.obj gameobj = src\game.obj src\util.obj src\gfxutil.obj src\dynarr.obj & src\rbtree.obj gfxobj = src\3dgfx\3dgfx.obj src\3dgfx\mesh.obj src\3dgfx\meshload.obj & src\3dgfx\polyfill.obj src\3dgfx\polyclip.obj src\sprite.obj + incpath = -Isrc -Isrc\dos -Ilibs\imago\src libpath = libpath libs\imago !endif diff --git a/libs/imago/src/conv.c b/libs/imago/src/conv.c index 51ecd1f..501b3c4 100644 --- a/libs/imago/src/conv.c +++ b/libs/imago/src/conv.c @@ -32,6 +32,7 @@ struct pixel { static void unpack_grey8(struct pixel *unp, void *pptr, int count); static void unpack_rgb24(struct pixel *unp, void *pptr, int count); static void unpack_rgba32(struct pixel *unp, void *pptr, int count); +static void unpack_bgra32(struct pixel *unp, void *pptr, int count); static void unpack_greyf(struct pixel *unp, void *pptr, int count); static void unpack_rgbf(struct pixel *unp, void *pptr, int count); static void unpack_rgbaf(struct pixel *unp, void *pptr, int count); @@ -40,6 +41,7 @@ static void unpack_rgb565(struct pixel *unp, void *pptr, int count); static void pack_grey8(void *pptr, struct pixel *unp, int count); static void pack_rgb24(void *pptr, struct pixel *unp, int count); static void pack_rgba32(void *pptr, struct pixel *unp, int count); +static void pack_bgra32(void *pptr, struct pixel *unp, int count); static void pack_greyf(void *pptr, struct pixel *unp, int count); static void pack_rgbf(void *pptr, struct pixel *unp, int count); static void pack_rgbaf(void *pptr, struct pixel *unp, int count); @@ -50,6 +52,7 @@ static void (*unpack[])(struct pixel*, void*, int) = { unpack_grey8, unpack_rgb24, unpack_rgba32, + unpack_bgra32, unpack_greyf, unpack_rgbf, unpack_rgbaf, @@ -61,6 +64,7 @@ static void (*pack[])(void*, struct pixel*, int) = { pack_grey8, pack_rgb24, pack_rgba32, + pack_bgra32, pack_greyf, pack_rgbf, pack_rgbaf, @@ -145,6 +149,20 @@ static void unpack_rgba32(struct pixel *unp, void *pptr, int count) } } +static void unpack_bgra32(struct pixel *unp, void *pptr, int count) +{ + int i; + unsigned char *pix = pptr; + + for(i=0; ib = (float)*pix++ / 255.0; + unp->g = (float)*pix++ / 255.0; + unp->r = (float)*pix++ / 255.0; + unp->a = (float)*pix++ / 255.0; + unp++; + } +} + static void unpack_greyf(struct pixel *unp, void *pptr, int count) { int i; @@ -256,6 +274,25 @@ static void pack_rgba32(void *pptr, struct pixel *unp, int count) } } +static void pack_bgra32(void *pptr, struct pixel *unp, int count) +{ + int i; + unsigned char *pix = pptr; + + for(i=0; ir * 255.0); + int g = (int)(unp->g * 255.0); + int b = (int)(unp->b * 255.0); + int a = (int)(unp->a * 255.0); + + *pix++ = CLAMP(b, 0, 255); + *pix++ = CLAMP(g, 0, 255); + *pix++ = CLAMP(r, 0, 255); + *pix++ = CLAMP(a, 0, 255); + unp++; + } +} + static void pack_greyf(void *pptr, struct pixel *unp, int count) { int i; diff --git a/libs/imago/src/imago2.c b/libs/imago/src/imago2.c index 816f8c0..65c554d 100644 --- a/libs/imago/src/imago2.c +++ b/libs/imago/src/imago2.c @@ -419,6 +419,7 @@ static int pixel_size(enum img_fmt fmt) case IMG_FMT_RGB24: return 3; case IMG_FMT_RGBA32: + case IMG_FMT_BGRA32: return 4; case IMG_FMT_GREYF: return sizeof(float); diff --git a/libs/imago/src/imago2.h b/libs/imago/src/imago2.h index 2cd1481..8086d28 100644 --- a/libs/imago/src/imago2.h +++ b/libs/imago/src/imago2.h @@ -32,6 +32,7 @@ enum img_fmt { IMG_FMT_GREY8, IMG_FMT_RGB24, IMG_FMT_RGBA32, + IMG_FMT_BGRA32, IMG_FMT_GREYF, IMG_FMT_RGBF, IMG_FMT_RGBAF, diff --git a/src/dos/gfx.c b/src/dos/gfx.c index 671cd8a..c588742 100644 --- a/src/dos/gfx.c +++ b/src/dos/gfx.c @@ -1,10 +1,12 @@ #include #include #include +#include "game.h" #include "cdpmi.h" #include "gfx.h" #include "vbe.h" #include "vga.h" +#include "util.h" #define SAME_BPP(a, b) \ ((a) == (b) || ((a) == 16 && (b) == 15) || ((a) == 15 && (b) == 16) || \ @@ -238,7 +240,7 @@ static void blit_frame_lfb(void *pixels, int vsync) { dbg_fps(pixels); if(vsync) wait_vsync(); - memcpy(vpgaddr[frontidx], pixels, pgsize); + memcpy64(vpgaddr[frontidx], pixels, pgsize >> 3); } static void blit_frame_banked(void *pixels, int vsync) @@ -256,7 +258,7 @@ static void blit_frame_banked(void *pixels, int vsync) pending = pgsize; while(pending > 0) { sz = pending > curmode->bank_size ? curmode->bank_size : pending; - memcpy((void*)0xa0000, pptr, sz); + memcpy64((void*)0xa0000, pptr, sz >> 3); pptr += sz; pending -= sz; vbe_setwin(0, ++offs); diff --git a/src/dos/main.c b/src/dos/main.c index 7d7806b..4a3dc1a 100644 --- a/src/dos/main.c +++ b/src/dos/main.c @@ -10,7 +10,6 @@ static struct video_mode *vmode; static int quit; -static void *vmem; int main(int argc, char **argv) { diff --git a/src/game.c b/src/game.c index 86170cb..b1f87f4 100644 --- a/src/game.c +++ b/src/game.c @@ -6,7 +6,7 @@ int fb_width, fb_height; long fb_size; -uint16_t *fb_pixels; +uint16_t *fb_pixels, *vmem; long time_msec; diff --git a/src/game.h b/src/game.h index a986389..a08ea51 100644 --- a/src/game.h +++ b/src/game.h @@ -8,7 +8,7 @@ extern int fb_width; extern int fb_height; extern long fb_size; -extern uint16_t *fb_pixels; +extern uint16_t *fb_pixels, *vmem; extern long time_msec; diff --git a/src/intro_s.asm b/src/intro_s.asm new file mode 100644 index 0000000..9cb4598 --- /dev/null +++ b/src/intro_s.asm @@ -0,0 +1,68 @@ +; vi:ft=nasm: + section .text + bits 32 + + global fade_image +fade_image: + push ebp + mov ebp, esp + push ebx + + mov eax, [ebp + 8] + mov edx, [ebp + 12] + mov ebx, [ebp + 16] + call fade_image_ + + pop ebx + pop ebp + ret + + ; void fade_image(uint16_t *dest, uint32_t *src, uint16_t fade) + ; - dest points to a 16bit 565 framebuffer + ; - src points to a 32bit RGBX image + ; - fade is 24.8 fixed point [0, 1] + ; watcom register calling convention arguments: eax, edx, ebx + global fade_image_ +fade_image_: + push ecx + push edi + + mov ecx, 640 * 480 + mov edi, eax + + ; take fade and duplicate it across all words of mm1 + movd mm1, ebx ; mm1 [00|00|00|VV] + punpckldq mm1, mm1 ; mm1 [00|VV|00|VV] + packssdw mm1, mm1 ; mm1 [VV|VV|VV|VV] +.loop: + ; grab RGB32 pixel and unpack it to zero-extended words in mm0 + movd mm0, [edx] ; mm0 [??|??|?R|GB] + add edx, 4 + pxor mm7, mm7 + punpcklbw mm0, mm7 ; mm0 [0?|0R|0G|0B] + ; multiply by fade and divide by 256 to drop the decimal part + pmullw mm0, mm1 + psrlw mm0, 10 ; 8 for the div + 2 to make them 666, easier 565 packing + ; pack result into 565 in ax + packuswb mm0, mm0 + movd eax, mm0 + mov ebx, eax + shr al, 1 ; blue in position [........|00RRRRRR|00GGGGGG|000BBBBB] + xor bl, bl + shr bx, 3 + xor ah, ah + or ax, bx ; green in position ...|00RRRRRR|00000GGG|GGGBBBBB] + shr ebx, 6 + and ebx, 0f800h + or eax, ebx ; done [RRRRRGGG|GGGBBBBB] + mov [edi], ax + add edi, 2 + + dec ecx + jnz .loop + + emms ; clear fpu state + + pop edi + pop ecx + ret diff --git a/src/introscr.c b/src/introscr.c index 4d0db19..a836650 100644 --- a/src/introscr.c +++ b/src/introscr.c @@ -5,6 +5,8 @@ #include "gfxutil.h" #include "game.h" +#define USE_MMX + #define FADE_DUR 800 static void *logo; @@ -13,7 +15,7 @@ static long start_time; int intro_init(void) { - if(!(logo = img_load_pixels("data/msglogo.jpg", &logo_width, &logo_height, IMG_FMT_RGB24))) { + if(!(logo = img_load_pixels("data/msglogo.jpg", &logo_width, &logo_height, IMG_FMT_BGRA32))) { fprintf(stderr, "failed to load logo image\n"); return -1; } @@ -37,13 +39,12 @@ void intro_stop(void) { } +void fade_image(void *dest, void *src, uint16_t fade); + void intro_draw(void) { - int i, j; long tm; uint16_t fade; - unsigned char *src = logo; - uint16_t *dest = fb_pixels; tm = time_msec - start_time; if(tm < FADE_DUR) { @@ -57,16 +58,24 @@ void intro_draw(void) //menu_start(); } - for(i=0; i