#include <stdlib.h>
#include <string.h>
#include "3dgfx.h"
+#include "video.h"
#include "util.h"
+struct rect {
+ int x0, y0, x1, y1;
+};
+
+static void reset_dirty(struct rect *r);
+
static int32_t mvmat[16];
static int32_t pmat[16];
static int vp[4];
-static int dirty_x0, dirty_x1, dirty_y0, dirty_y1;
+static struct rect dirty_rect[2];
+static unsigned int cur_dirty;
unsigned char *g3d_fbpixels;
int g3d_width, g3d_height;
mvmat[0] = mvmat[5] = mvmat[10] = mvmat[15] = 0x10000;
pmat[0] = pmat[5] = pmat[10] = pmat[15] = 0x10000;
g3d_curcidx = 0xff;
+
+ cur_dirty = 0;
return 0;
}
void g3d_framebuffer(int width, int height, void *fb)
{
- g3d_fbpixels = fb;
+ g3d_fbpixels = fb ? fb : vid_backbuf;
g3d_width = width;
g3d_height = height;
vp[2] = width;
vp[3] = height;
- g3d_reset_dirty();
+ reset_dirty(dirty_rect);
+ reset_dirty(dirty_rect + 1);
}
-void g3d_reset_dirty(void)
+void g3d_framebuf_addr(void *fb)
{
- dirty_x0 = XRES;
- dirty_y0 = YRES;
- dirty_x1 = 0;
- dirty_y1 = 0;
+ g3d_fbpixels = vid_backbuf;
}
-void blkclear(void *p, int len, int col);
-#pragma aux blkclear = \
- "mov ah, al" \
- "shl eax, 8" \
- "mov al, ah" \
- "shl eax, 8" \
- "mov al, ah" \
- "shr ecx, 2" \
- "rep stosd" \
- parm [edi] [ecx] [eax] \
- modify [eax ecx edi];
-
-extern volatile long nticks;
-#define COL 0
-
-void g3d_clear_dirty(void)
+static void reset_dirty(struct rect *r)
{
- unsigned char *ptr;
- int i, count, nlines;
-
- if(dirty_x0 < 0) dirty_x0 = 0;
- if(dirty_y0 < 0) dirty_y0 = 0;
- if(dirty_x1 >= XRES) dirty_x1 = XRES - 1;
- if(dirty_y1 >= YRES) dirty_y1 = YRES - 1;
-
- nlines = dirty_y1 - dirty_y0;
- if(dirty_y1 <= 0 || nlines >= YRES) {
- blkclear(g3d_fbpixels, XRES * YRES, COL);
- goto end;
- }
-
- ptr = g3d_fbpixels + dirty_y0 * XRES;
- if(dirty_x1 > XRES - 4) {
- blkclear(ptr, nlines * XRES, COL);
- goto end;
- }
-
- ptr = (unsigned char*)((uintptr_t)(ptr + dirty_x0) & 0xfffffffc);
- count = dirty_x1 + 3 - dirty_x0;
- for(i=0; i<nlines; i++) {
- blkclear(ptr, count, COL);
- ptr += XRES;
- }
-end:
- g3d_reset_dirty();
+ r->x0 = XRES;
+ r->y0 = YRES;
+ r->x1 = 0;
+ r->y1 = 0;
}
-
-void vmemcopy(long fboffs, void *p, int len);
-#pragma aux vmemcopy = \
- "mov edi, 0xa0000" \
- "add edi, eax" \
- "shr ecx, 2" \
- "rep movsd" \
- parm [eax] [esi] [ecx] \
- modify [ecx edi esi];
-
-void g3d_copy_dirty(void)
+void g3d_clear_dirty(void)
{
- int i, count, nlines;
- unsigned long fboffs;
-
- if(dirty_x0 < 0) dirty_x0 = 0;
- if(dirty_y0 < 0) dirty_y0 = 0;
- if(dirty_x1 >= XRES) dirty_x1 = XRES - 1;
- if(dirty_y1 >= YRES) dirty_y1 = YRES - 1;
-
- nlines = dirty_y1 - dirty_y0;
- if(dirty_y1 <= 0 || nlines >= YRES) {
- vmemcopy(0, g3d_fbpixels, XRES * YRES);
- return;
- }
-
- fboffs = dirty_y0 * XRES;
- if(dirty_x1 > XRES - 4) {
- vmemcopy(fboffs, g3d_fbpixels + fboffs, nlines * XRES);
- return;
- }
-
- fboffs += dirty_x0 & 0xfffffffc;
- count = dirty_x1 + 3 - dirty_x0;
- for(i=0; i<nlines; i++) {
- vmemcopy(fboffs, g3d_fbpixels + fboffs, count);
- fboffs += XRES;
- }
+ int width, height;
+ struct rect *dirty;
+
+ /* we need to clear based not on the last current dirty region, but the one
+ * before that. Then we need to reset the same one we used, and increment
+ * cur_dirty in preperation of this frames drawing
+ */
+ cur_dirty = (cur_dirty + 1) & 1;
+ dirty = dirty_rect + cur_dirty;
+
+ if(dirty->x0 < 0) dirty->x0 = 0;
+ if(dirty->y0 < 0) dirty->y0 = 0;
+ if(dirty->x1 >= XRES) dirty->x1 = XRES - 1;
+ if(dirty->y1 >= YRES) dirty->y1 = YRES - 1;
+
+ width = dirty->x1 + 1 - dirty->x0;
+ height = dirty->y1 + 1 - dirty->y0;
+
+ vid_clearfb_rect(dirty->x0, dirty->y0, width, height);
+ reset_dirty(dirty);
}
void g3d_modelview(const int32_t *m)
#if defined(USE_DIRTY_CLEAR) || defined(USE_DIRTY_COPY)
x = v[i].x >> 8;
y = v[i].y >> 8;
- if(x - 4 < dirty_x0) dirty_x0 = x - 4;
- if(y - 4 < dirty_y0) dirty_y0 = y - 4;
- if(x + 8 > dirty_x1) dirty_x1 = x + 8;
- if(y + 8 > dirty_y1) dirty_y1 = y + 8;
+ {
+ struct rect *dirty = dirty_rect + cur_dirty;
+ if(x < dirty->x0) dirty->x0 = x;
+ if(y < dirty->y0) dirty->y0 = y;
+ if(x + 4 > dirty->x1) dirty->x1 = x + 4;
+ if(y > dirty->y1) dirty->y1 = y;
+ }
#endif
}
void g3d_shutdown(void);
void g3d_framebuffer(int width, int height, void *fb);
+void g3d_framebuf_addr(void *fb);
-void g3d_reset_dirty(void);
void g3d_clear_dirty(void);
void g3d_copy_dirty(void);
#ifndef CONFIG_H_
#define CONFIG_H_
-#undef USE_VSYNC
#define USE_DIRTY_CLEAR
-#define USE_DIRTY_COPY
+#undef USE_DIRTY_COPY
#endif /* CONFIG_H_ */
void update(void);
void handle_key(int key);
void interrupt timer_intr();
+int parse_args(int argc, char **argv);
+void dump_vmem(void);
+static int opt_vsync = 1;
static int quit;
-static unsigned char *fb;
-static long nframes;
+static unsigned long nframes;
volatile unsigned long nticks;
static void interrupt (*prev_timer_intr)();
-int main(void)
+int main(int argc, char **argv)
{
int32_t proj[16];
- long rate;
+ unsigned long rate, total_ticks, sec;
- if(!(fb = calloc(1, 64000))) {
- fprintf(stderr, "failed to allocate framebuffer\n");
+ if(parse_args(argc, argv) == -1) {
return 1;
}
init_video();
g3d_init();
- g3d_framebuffer(320, 200, fb);
+ g3d_framebuffer(320, 200, 0);
mat_perspective(proj, 50, (4 << 16) / 3, 0x8000, 0x100000);
g3d_projection(proj);
}
end:
+ _disable();
+ total_ticks = nticks;
_dos_setvect(0x1c, prev_timer_intr);
+ _enable();
close_video();
- free(fb);
- rate = nframes * 100 * 18 / nticks;
- printf("%ld frames in %ld sec, rate: %ld.%ld\n", nframes, nticks / 18,
- rate / 100, rate % 100);
+ /* 1.193182MHz / 65536 = 18.20651245117 ticks/sec */
+ rate = nframes * 1820 / nticks;
+ sec = nticks * 100 / 182;
+ printf("%lu frames in %lu.%lu sec - %lu.%lu fps\n", nframes,
+ sec / 10, sec % 10, rate / 100, rate % 100);
+
return 0;
}
{
int32_t xform[16];
-#ifdef USE_DIRTY_CLEAR
- g3d_clear_dirty();
-#else
- vid_clearfb(fb);
-#endif
-
mat_trans(xform, 0, 0, -0x40000);
mat_mul_rotx(xform, nframes);
mat_mul_roty(xform, nframes);
g3d_modelview(xform);
+ g3d_fbpixels = vid_backbuf;
+ if(opt_vsync) {
+ vid_wait_vblank();
+ }
+
+#ifdef USE_DIRTY_CLEAR
+ g3d_clear_dirty();
+#else
+ vid_clearfb();
+#endif
+
g3d_color(9);
g3d_draw(G3D_QUADS, varr, 4);
g3d_color(10);
g3d_color(14);
g3d_draw(G3D_QUADS, varr + 20, 4);
-#ifdef USE_VSYNC
- wait_vsync();
-#endif
-#ifdef USE_DIRTY_COPY
- g3d_copy_dirty();
-#else
- vid_copyfb(fb);
-#endif
+ vid_pgflip();
}
void handle_key(int key)
case 27:
quit = 1;
break;
+
+ case ' ':
+ dump_vmem();
+ break;
}
}
nticks++;
_chain_intr(prev_timer_intr);
}
+
+static const char *helpfmt = "Usage: %s [options]\n"
+ "Options:\n"
+ " -vsync: enable vsync (default)\n"
+ " -novsync: disable vsync\n"
+ " -help,-h: print usage and exit\n";
+
+int parse_args(int argc, char **argv)
+{
+ int i;
+
+ for(i=1; i<argc; i++) {
+ if(argv[i][0] == '-') {
+ if(strcmp(argv[i], "-vsync") == 0) {
+ opt_vsync = 1;
+ } else if(strcmp(argv[i], "-novsync") == 0) {
+ opt_vsync = 0;
+ } else if(strcmp(argv[i], "-help") == 0 || strcmp(argv[i], "-h") == 0) {
+ printf(helpfmt, argv[0]);
+ exit(0);
+ } else {
+ fprintf(stderr, "invalid option: %s\n", argv[i]);
+ return -1;
+ }
+ } else {
+ fprintf(stderr, "unexpected argument: %s\n", argv[i]);
+ return -1;
+ }
+ }
+
+ return 0;
+}
+
+#define GC_ADDR 0x3ce
+#define GC_DATA 0x3cf
+#define RDSEL 4
+
+void dump_vmem(void)
+{
+ unsigned char *img, *src, *dest;
+ unsigned char cmap[256][3];
+ int i, j;
+ FILE *fp;
+
+ /* grab colormap */
+ outp(0x3c7, 0);
+ for(i=0; i<256; i++) {
+ cmap[i][0] = inp(0x3c9) << 2;
+ cmap[i][1] = inp(0x3c9) << 2;
+ cmap[i][2] = inp(0x3c9) << 2;
+ }
+
+ /* allocate image */
+ if(!(img = malloc(320 * 400 * 3))) {
+ return;
+ }
+ if(!(fp = fopen("vmem.ppm", "wb"))) {
+ free(img);
+ return;
+ }
+
+ src = (unsigned char*)0xa0000;
+ for(i=0; i<4; i++) {
+ outp(0x3ce, 4); /* graphics controller: read map select register */
+ outp(0x3cf, i);
+ dest = img + i * 3;
+ for(j=0; j<16000; j++) {
+ int idx = src[j];
+ dest[0] = cmap[idx][0];
+ dest[1] = cmap[idx][1];
+ dest[2] = cmap[idx][2];
+ dest += 12;
+ }
+ }
+
+ src = (unsigned char*)0xa4000;
+ for(i=0; i<4; i++) {
+ outp(0x3ce, 4); /* graphics controller: read map select register */
+ outp(0x3cf, i);
+ dest = img + i * 3;
+ for(j=0; j<16000; j++) {
+ int idx = src[j];
+ dest[0] = cmap[idx][0];
+ dest[1] = cmap[idx][1];
+ dest[2] = cmap[idx][2];
+ dest += 12;
+ }
+ }
+
+ fprintf(fp, "P6\n%d %d\n255\n", 320, 400);
+ fwrite(img, 1, 320 * 400 * 3, fp);
+ fclose(fp);
+ free(img);
+}
#include <stdio.h>
#include <string.h>
#include "3dgfx.h"
+#include "video.h"
#include "util.h"
static void filltop(struct g3d_vertex *v0, struct g3d_vertex *v1, struct g3d_vertex *v2);
static void fillbot(struct g3d_vertex *v0, struct g3d_vertex *v1, struct g3d_vertex *v2);
+static void fillspan(unsigned char *dest, int x, int len);
void g3d_polyfill(struct g3d_vertex *verts)
{
static void filltop(struct g3d_vertex *v0, struct g3d_vertex *v1, struct g3d_vertex *v2)
{
struct g3d_vertex *vtmp;
- int x, xn, line, lasty, len;
+ int x, line, lasty, len;
int32_t xl, xr, dxl, dxr, slopel, sloper, dy;
int32_t y0, y1, yoffs;
unsigned char *fbptr;
if(lasty >= YRES) lasty = YRES - 1;
x = xl >> 8;
- fbptr = g3d_fbpixels + line * XRES + x;
+ fbptr = g3d_fbpixels + line * (XRES >> 2);
while(line <= lasty) {
if(line >= 0) {
len = ((xr + 0x100) >> 8) - (xl >> 8);
- if(len > 0) memset(fbptr, g3d_curcidx, len);
+ if(len > 0) fillspan(fbptr, x, len);
}
xl += slopel;
xr += sloper;
- xn = xl >> 8;
- fbptr += XRES + (xn - x);
- x = xn;
+ x = xl >> 8;
+ fbptr += XRES >> 2;
line++;
}
}
static void fillbot(struct g3d_vertex *v0, struct g3d_vertex *v1, struct g3d_vertex *v2)
{
struct g3d_vertex *vtmp;
- int x, xn, line, lasty, len;
+ int x, line, lasty, len;
int32_t xl, xr, dxl, dxr, slopel, sloper, dy;
int32_t y0, y1, yoffs;
unsigned char *fbptr;
if(lasty >= YRES) lasty = YRES - 1;
x = xl >> 8;
- fbptr = g3d_fbpixels + line * XRES + x;
+ fbptr = g3d_fbpixels + line * (XRES >> 2);
while(line <= lasty) {
if(line >= 0) {
len = ((xr + 0x100) >> 8) - (xl >> 8);
- if(len > 0) memset(fbptr, g3d_curcidx, len);
+ if(len > 0) fillspan(fbptr, x, len);
}
xl += slopel;
xr += sloper;
- xn = xl >> 8;
- fbptr += XRES + (xn - x);
- x = xn;
+ x = xl >> 8;
+ fbptr += XRES >> 2;
line++;
}
}
+
+static void fillspan(unsigned char *dest, int x, int len)
+{
+ unsigned int mask = 0xf;
+ int align;
+
+ dest += x >> 2;
+
+ if(len < 4) mask >>= 4 - len;
+
+ /* handle the start of the span. The x offset alignment affects:
+ * 1. which bitplane to start from, adjust the plane mask accordingly.
+ * 2. how many pixels we write, adjust remaining length accordingly.
+ */
+ align = x & 3;
+ vid_setmask(mask << align);
+ *dest++ = g3d_curcidx;
+ len -= 4 - align;
+
+ /* the middle part of the span is all written 4 pixels at a time by
+ * enabling all 4 bit planes.
+ */
+ if(len >= 4) {
+ vid_setmask(0xf);
+ while(len >= 4) {
+ *dest++ = g3d_curcidx;
+ len -= 4;
+ }
+ }
+
+ /* handle any leftovers at the end */
+ if(len) {
+ mask = 0xf >> (4 - len);
+ vid_setmask(mask);
+ *dest = g3d_curcidx;
+ }
+}
--- /dev/null
+ bits 32
+ section .text USE32
+
+SC_ADDR equ 3c4h ; sequence controller address register
+CRTC_ADDR equ 3d4h ; CRTC address register
+
+ global init_video_
+init_video_:
+ pusha
+ mov ax, 13h
+ int 10h
+
+ ; disable chain-4 (bit 3 of sequencer memory mode register [4])
+ mov dx, SC_ADDR
+ mov ax, 0604h
+ out dx, ax
+ ; disable double-word addressing (bit 6 of CRTC underline location
+ ; register [14h])
+ mov dx, CRTC_ADDR
+ mov ax, 0014h
+ out dx, ax
+ ; enable byte mode address generation (bit 6 of CRTC mode control
+ ; register [17h])
+ mov ax, 0e317h
+ out dx, ax
+
+ ; clear all 256kb of vram
+ mov dx, SC_ADDR
+ mov ax, 0f02h ; map mask reg (2) enable all planes (f)
+ out dx, ax
+ mov edi, 0a0000h
+ mov ecx, 3fffh
+ xor eax, eax
+ rep stosd
+
+ ; initial back buffer is the second page
+ mov dword [_vid_backbuf], 0a4000h
+
+ ; set initial scanout address to page 0. if we never pageflip, we
+ ; can just draw to a0000 as usual and it will be visible.
+ ; This also makes sure the low byte is 0, because we're not touching it
+ ; while page flipping; we flip by toggling a bit in the high byte.
+ mov dx, 3dah
+.invb: in al, dx
+ and al, 8
+ jnz .invb
+ mov dx, CRTC_ADDR
+ mov ax, 000ch ; 0ch: start address high register
+ mov ax, 000dh ; 0dh: start address low register
+ out dx, ax
+
+ popa
+ ret
+
+ global close_video_
+close_video_:
+ push ax
+ mov ax, 3
+ int 10h
+ pop ax
+ ret
+
+ ; clear the framebuffer 4 pixels at a time
+ global vid_clearfb_
+vid_clearfb_:
+ push eax
+ push ecx
+ push edx
+ push edi
+ mov dx, SC_ADDR
+ mov ax, 0f02h ; map mask reg (2) enable all planes (f)
+ out dx, ax
+ mov edi, [_vid_backbuf]
+ mov ecx, 4000 ; 4000 dwords * 4 planes * 4 bytes = 64000 pixels
+ ;xor eax, eax
+ mov eax, 08080808h
+ rep stosd
+ pop edi
+ pop edx
+ pop ecx
+ pop eax
+ ret
+
+ ; clear area of the framebuffer
+ ; eax: x edx: y ebx: width ecx: height
+ global vid_clearfb_rect_
+vid_clearfb_rect_:
+ push edi
+ push esi
+ mov edi, [_vid_backbuf]
+ shr eax, 2
+ add edi, eax
+ lea eax, [edx * 4 + edx]
+ shl eax, 4
+ add edi, eax
+
+ mov eax, ebx
+ or eax, ecx
+ cmp eax, 0
+ jle .done ; abort if width or height is negative or zero
+
+ ; enable all planes
+ mov dx, SC_ADDR
+ mov ax, 0f02h ; map mask reg (2) enable all planes (f)
+ out dx, ax
+
+ xor eax, eax
+ ;mov eax, [xyzzy]
+ ;add eax, 01010101h
+ ;cmp al, 3
+ ;jbe .foo
+ ;mov eax, 01010101h
+;.foo: mov [xyzzy], eax
+ add ebx, 15 ; round up to next 16-pixel block
+ shr ebx, 4 ; /4 (planes) /4 (stosd) = /16
+ mov edx, ecx ; use edx for height, to free ecx for rep stosd
+.yloop: mov esi, edi ; save pointer
+ mov ecx, ebx
+ rep stosd
+ mov edi, esi ; restore pointer
+ add edi, 80 ; advance scanline (320 / 4 planes)
+ dec edx
+ jnz .yloop
+.done: pop esi
+ pop edi
+ ret
+
+
+ ; vid_backbuf is the linear address of the back buffer in video RAM
+ ; either a0000 or a4000. Flipping bit 14 switches between them, and
+ ; masking with ffff gives the CRTC start address.
+
+ global vid_pgflip_
+vid_pgflip_:
+ push eax
+ push ebx
+ push edx
+ ; set the current backbuffer as the new CRTC scanout start address
+ mov ebx, [_vid_backbuf]
+ mov bl, 0ch ; CRTC start address high register
+
+ ; only proceed if we're out of vblank, otherwise we might think we've
+ ; set a new scanout address, but it might not be latched until the next
+ ; vblank, and we'll be drawing over the scanout buffer in the meantime.
+ mov dx, 3dah
+.wait: in al, dx
+ and al, 8
+ jnz .wait
+
+ mov dx, CRTC_ADDR
+ mov ax, bx ; get previously prepared reg addr and value
+ out dx, ax
+ ; clear low bits and flip the backbuffer pointer
+ xor ax, 400ch
+ mov [_vid_backbuf], ax
+ pop edx
+ pop ebx
+ pop eax
+ ret
+
+ section .data
+
+ align 4
+ global _vid_backbuf
+_vid_backbuf dd 0
+xyzzy dd 01010101h
#ifndef VIDEO_H_
#define VIDEO_H_
+extern unsigned char *vid_backbuf;
+
void init_video(void);
void close_video(void);
-void wait_vsync(void);
-#pragma aux wait_vsync = \
+void vid_clearfb(void);
+void vid_clearfb_rect(int x, int y, int w, int h);
+void vid_pgflip(void);
+
+void vid_setmask(unsigned int mask);
+#pragma aux vid_setmask = \
+ "mov dx, 0x3c4" \
+ "mov ah, al" \
+ "mov al, 2" \
+ "out dx, ax" \
+ parm [eax] \
+ modify [ax dx];
+
+void vid_wait_vblank(void);
+#pragma aux vid_wait_vblank = \
+ "mov dx, 0x3da" \
+ "waitvb:" \
+ "in al, dx" \
+ "and al, 8" \
+ "jz waitvb" \
+ modify [al dx];
+
+void vid_vsync(void);
+#pragma aux vid_vsync = \
"mov dx, 0x3da" \
"invb:" \
"in al, dx" \
"in al, dx" \
"and al, 8" \
"jz waitvb" \
- modify [eax edx];
-
-void vid_clearfb(void *fb);
-#pragma aux vid_clearfb = \
- "mov ecx, 16000" \
- "xor eax, eax" \
- "rep stosd" \
- parm [edi] \
- modify [eax ecx];
-
-void vid_copyfb(void *fb);
-#pragma aux vid_copyfb = \
- "mov ecx, 16000" \
- "mov edi, 0xa0000" \
- "rep movsd" \
- parm [esi] \
- modify [ecx edi];
+ modify [al dx];
+
#endif /* VIDEO_H_ */
--- /dev/null
+#!/bin/sh
+
+unzip low3d.zip || exit 1
+
+if [ -f makefile ]; then
+ mv makefile Makefile
+fi
+if [ -f tools/makefile ]; then
+ mv tools/makefile tools/Makefile
+fi
+
+dos2unix src/*.c
+dos2unix src/*.h
+dos2unix src/*.asm
+dos2unix tools/*.c
+dos2unix Makefile tools/Makefile
+dos2unix tools/packsrc tools/unpacksrc