!ifdef __UNIX__
dosobj = src/dos/main.obj src/dos/gfx.obj src/dos/timer.obj src/dos/watdpmi.obj &
- src/dos/vbe.obj src/dos/vga.obj src/dos/keyb.obj src/dos/mouse.obj &
- src/dos/logger.obj
-scrobj = src/introscr.obj src/menuscr.obj
-gameobj = src/game.obj src/util.obj src/gfxutil.obj src/dynarr.obj src/rbtree.obj
+ src/dos/vbe.obj src/dos/vga.obj src/dos/keyb.obj src/dos/mouse.obj &
+ src/dos/logger.obj
+scrobj = src/introscr.obj src/intro_s.obj src/menuscr.obj
+gameobj = src/game.obj src/util.obj src/gfxutil.obj src/dynarr.obj &
+ src/rbtree.obj
gfxobj = src/3dgfx/3dgfx.obj src/3dgfx/mesh.obj src/3dgfx/meshload.obj &
- src/3dgfx/polyfill.obj src/3dgfx/polyclip.obj src/sprite.obj
+ src/3dgfx/polyfill.obj src/3dgfx/polyclip.obj src/sprite.obj
+
incpath = -Isrc -Isrc/dos -Ilibs/imago/src
libpath = libpath libs/imago
+
!else
-dosobj = src\dos\main.obj src\dos\gfx.obj src\dos\timer.obj &
- src\dos\watdpmi.obj src\dos\vbe.obj src\dos\vga.obj src\dos\keyb.obj &
- src\dos\mouse.obj src\dos\logger.obj
-scrobj = src\introscr.obj src\menuscr.obj
+
+dosobj = src\dos\main.obj src\dos\gfx.obj src\dos\timer.obj src\dos\watdpmi.obj &
+ src\dos\vbe.obj src\dos\vga.obj src\dos\keyb.obj src\dos\mouse.obj &
+ src\dos\logger.obj
+scrobj = src\introscr.obj src\intro_s.obj src\menuscr.obj
gameobj = src\game.obj src\util.obj src\gfxutil.obj src\dynarr.obj &
src\rbtree.obj
gfxobj = src\3dgfx\3dgfx.obj src\3dgfx\mesh.obj src\3dgfx\meshload.obj &
src\3dgfx\polyfill.obj src\3dgfx\polyclip.obj src\sprite.obj
+
incpath = -Isrc -Isrc\dos -Ilibs\imago\src
libpath = libpath libs\imago
!endif
static void unpack_grey8(struct pixel *unp, void *pptr, int count);
static void unpack_rgb24(struct pixel *unp, void *pptr, int count);
static void unpack_rgba32(struct pixel *unp, void *pptr, int count);
+static void unpack_bgra32(struct pixel *unp, void *pptr, int count);
static void unpack_greyf(struct pixel *unp, void *pptr, int count);
static void unpack_rgbf(struct pixel *unp, void *pptr, int count);
static void unpack_rgbaf(struct pixel *unp, void *pptr, int count);
static void pack_grey8(void *pptr, struct pixel *unp, int count);
static void pack_rgb24(void *pptr, struct pixel *unp, int count);
static void pack_rgba32(void *pptr, struct pixel *unp, int count);
+static void pack_bgra32(void *pptr, struct pixel *unp, int count);
static void pack_greyf(void *pptr, struct pixel *unp, int count);
static void pack_rgbf(void *pptr, struct pixel *unp, int count);
static void pack_rgbaf(void *pptr, struct pixel *unp, int count);
unpack_grey8,
unpack_rgb24,
unpack_rgba32,
+ unpack_bgra32,
unpack_greyf,
unpack_rgbf,
unpack_rgbaf,
pack_grey8,
pack_rgb24,
pack_rgba32,
+ pack_bgra32,
pack_greyf,
pack_rgbf,
pack_rgbaf,
}
}
+static void unpack_bgra32(struct pixel *unp, void *pptr, int count)
+{
+ int i;
+ unsigned char *pix = pptr;
+
+ for(i=0; i<count; i++) {
+ unp->b = (float)*pix++ / 255.0;
+ unp->g = (float)*pix++ / 255.0;
+ unp->r = (float)*pix++ / 255.0;
+ unp->a = (float)*pix++ / 255.0;
+ unp++;
+ }
+}
+
static void unpack_greyf(struct pixel *unp, void *pptr, int count)
{
int i;
}
}
+static void pack_bgra32(void *pptr, struct pixel *unp, int count)
+{
+ int i;
+ unsigned char *pix = pptr;
+
+ for(i=0; i<count; i++) {
+ int r = (int)(unp->r * 255.0);
+ int g = (int)(unp->g * 255.0);
+ int b = (int)(unp->b * 255.0);
+ int a = (int)(unp->a * 255.0);
+
+ *pix++ = CLAMP(b, 0, 255);
+ *pix++ = CLAMP(g, 0, 255);
+ *pix++ = CLAMP(r, 0, 255);
+ *pix++ = CLAMP(a, 0, 255);
+ unp++;
+ }
+}
+
static void pack_greyf(void *pptr, struct pixel *unp, int count)
{
int i;
case IMG_FMT_RGB24:
return 3;
case IMG_FMT_RGBA32:
+ case IMG_FMT_BGRA32:
return 4;
case IMG_FMT_GREYF:
return sizeof(float);
IMG_FMT_GREY8,
IMG_FMT_RGB24,
IMG_FMT_RGBA32,
+ IMG_FMT_BGRA32,
IMG_FMT_GREYF,
IMG_FMT_RGBF,
IMG_FMT_RGBAF,
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
+#include "game.h"
#include "cdpmi.h"
#include "gfx.h"
#include "vbe.h"
#include "vga.h"
+#include "util.h"
#define SAME_BPP(a, b) \
((a) == (b) || ((a) == 16 && (b) == 15) || ((a) == 15 && (b) == 16) || \
{
dbg_fps(pixels);
if(vsync) wait_vsync();
- memcpy(vpgaddr[frontidx], pixels, pgsize);
+ memcpy64(vpgaddr[frontidx], pixels, pgsize >> 3);
}
static void blit_frame_banked(void *pixels, int vsync)
pending = pgsize;
while(pending > 0) {
sz = pending > curmode->bank_size ? curmode->bank_size : pending;
- memcpy((void*)0xa0000, pptr, sz);
+ memcpy64((void*)0xa0000, pptr, sz >> 3);
pptr += sz;
pending -= sz;
vbe_setwin(0, ++offs);
static struct video_mode *vmode;
static int quit;
-static void *vmem;
int main(int argc, char **argv)
{
int fb_width, fb_height;
long fb_size;
-uint16_t *fb_pixels;
+uint16_t *fb_pixels, *vmem;
long time_msec;
extern int fb_width;
extern int fb_height;
extern long fb_size;
-extern uint16_t *fb_pixels;
+extern uint16_t *fb_pixels, *vmem;
extern long time_msec;
--- /dev/null
+; vi:ft=nasm:
+ section .text
+ bits 32
+
+ global fade_image
+fade_image:
+ push ebp
+ mov ebp, esp
+ push ebx
+
+ mov eax, [ebp + 8]
+ mov edx, [ebp + 12]
+ mov ebx, [ebp + 16]
+ call fade_image_
+
+ pop ebx
+ pop ebp
+ ret
+
+ ; void fade_image(uint16_t *dest, uint32_t *src, uint16_t fade)
+ ; - dest points to a 16bit 565 framebuffer
+ ; - src points to a 32bit RGBX image
+ ; - fade is 24.8 fixed point [0, 1]
+ ; watcom register calling convention arguments: eax, edx, ebx
+ global fade_image_
+fade_image_:
+ push ecx
+ push edi
+
+ mov ecx, 640 * 480
+ mov edi, eax
+
+ ; take fade and duplicate it across all words of mm1
+ movd mm1, ebx ; mm1 [00|00|00|VV]
+ punpckldq mm1, mm1 ; mm1 [00|VV|00|VV]
+ packssdw mm1, mm1 ; mm1 [VV|VV|VV|VV]
+.loop:
+ ; grab RGB32 pixel and unpack it to zero-extended words in mm0
+ movd mm0, [edx] ; mm0 [??|??|?R|GB]
+ add edx, 4
+ pxor mm7, mm7
+ punpcklbw mm0, mm7 ; mm0 [0?|0R|0G|0B]
+ ; multiply by fade and divide by 256 to drop the decimal part
+ pmullw mm0, mm1
+ psrlw mm0, 10 ; 8 for the div + 2 to make them 666, easier 565 packing
+ ; pack result into 565 in ax
+ packuswb mm0, mm0
+ movd eax, mm0
+ mov ebx, eax
+ shr al, 1 ; blue in position [........|00RRRRRR|00GGGGGG|000BBBBB]
+ xor bl, bl
+ shr bx, 3
+ xor ah, ah
+ or ax, bx ; green in position ...|00RRRRRR|00000GGG|GGGBBBBB]
+ shr ebx, 6
+ and ebx, 0f800h
+ or eax, ebx ; done [RRRRRGGG|GGGBBBBB]
+ mov [edi], ax
+ add edi, 2
+
+ dec ecx
+ jnz .loop
+
+ emms ; clear fpu state
+
+ pop edi
+ pop ecx
+ ret
#include "gfxutil.h"
#include "game.h"
+#define USE_MMX
+
#define FADE_DUR 800
static void *logo;
int intro_init(void)
{
- if(!(logo = img_load_pixels("data/msglogo.jpg", &logo_width, &logo_height, IMG_FMT_RGB24))) {
+ if(!(logo = img_load_pixels("data/msglogo.jpg", &logo_width, &logo_height, IMG_FMT_BGRA32))) {
fprintf(stderr, "failed to load logo image\n");
return -1;
}
{
}
+void fade_image(void *dest, void *src, uint16_t fade);
+
void intro_draw(void)
{
- int i, j;
long tm;
uint16_t fade;
- unsigned char *src = logo;
- uint16_t *dest = fb_pixels;
tm = time_msec - start_time;
if(tm < FADE_DUR) {
//menu_start();
}
- for(i=0; i<fb_height; i++) {
- for(j=0; j<fb_width; j++) {
- uint16_t r = (uint16_t)*src++ * fade / 256;
- uint16_t g = (uint16_t)*src++ * fade / 256;
- uint16_t b = (uint16_t)*src++ * fade / 256;
+#ifdef USE_MMX
+ fade_image(fb_pixels, logo, fade);
+#else
+ {
+ int i, j;
+ uint32_t *src = logo;
+ uint16_t *dest = fb_pixels;
+ for(i=0; i<640*480; i++) {
+ uint32_t pix = *src++;
+ uint16_t r = (uint16_t)UNPACK_R32(pix) * fade / 256;
+ uint16_t g = (uint16_t)UNPACK_G32(pix) * fade / 256;
+ uint16_t b = (uint16_t)UNPACK_B32(pix) * fade / 256;
*dest++ = PACK_RGB16(r, g, b);
}
}
+#endif
- blit_frame(fb_pixels, 1);
+ blit_frame(fb_pixels, 0);
}
void intro_keyb(int key, int pressed)
fprintf(stderr, "failed to allocate virtual framebuffer\n");
return 1;
}
- fb_pixels = (uint16_t*)((char*)fb_buf + FB_WIDTH * 2);
+ vmem = fb_pixels = (uint16_t*)((char*)fb_buf + FB_WIDTH * 2);
SDL_Init(SDL_INIT_VIDEO | SDL_INIT_TIMER | SDL_INIT_NOPARACHUTE);
if(!(fbsurf = SDL_SetVideoMode(xsz, ysz, FB_BPP, sdl_flags))) {
extern uint32_t perf_start_count, perf_interval_count;
#ifdef __WATCOMC__
+void memcpy64(void *dest, void *src, int count);
+#pragma aux memcpy64 = \
+ "cploop:" \
+ "movq mm0, [edx]" \
+ "movq [ebx], mm0" \
+ "add edx, 8" \
+ "add ebx, 8" \
+ "dec ecx" \
+ "jnz cploop" \
+ "emms" \
+ parm[ebx][edx][ecx];
+
void perf_start(void);
#pragma aux perf_start = \
"xor eax, eax" \
#endif
#ifdef __GNUC__
+#define memcpy64(dest, src, count) asm volatile ( \
+ "0:\n\t" \
+ "movq (%1), %%mm0\n\t" \
+ "movq %%mm0, (%0)\n\t" \
+ "add $8, %1\n\t" \
+ "add $8, %0\n\t" \
+ "dec %2\n\t" \
+ "jnz 0b\n\t" \
+ "emms\n\t" \
+ :: "r"(dest), "r"(src), "r"(count))
+
#define perf_start() asm volatile ( \
"xor %%eax, %%eax\n" \
"cpuid\n" \