From 574b83282bd01d6b9a70843238bbcbab1590fe76 Mon Sep 17 00:00:00 2001 From: John Tsiombikas Date: Tue, 3 May 2022 15:49:41 +0300 Subject: [PATCH] - VBE banked mode fixes backported from rbench - write combining setup when available - added CPUID code - fixed djgpp build - noise3 bugfix from andemo --- Makefile | 6 +- Makefile.dj | 7 +- src/cpuid.c | 2 +- src/dos/gfx.c | 234 ++++++++++++++++++++++++++++++++++++++++++++---- src/dos/gfx.h | 2 +- src/dos/main.c | 11 ++- src/dos/vbe.c | 2 +- src/dos/vga.c | 1 + src/noise.c | 16 ++-- src/util.h | 21 +---- src/util_s.asm | 62 +++++++++++++ tools/scripts/pceminst | 6 +- 12 files changed, 314 insertions(+), 56 deletions(-) create mode 100644 src/util_s.asm diff --git a/Makefile b/Makefile index 341b0de..ea8473f 100644 --- a/Makefile +++ b/Makefile @@ -7,7 +7,8 @@ dosobj = src/dos/audos.obj src/dos/djdpmi.obj src/dos/gfx.obj src/dos/keyb.obj & srcobj = src/bsptree.obj src/cfgopt.obj src/console.obj src/demo.obj & src/dynarr.obj src/gfxutil.obj src/metasurf.obj src/noise.obj & src/rbtree.obj src/screen.obj src/tinyfps.obj src/treestor.obj & - src/image.obj src/ts_text.obj src/util.obj src/data.obj + src/image.obj src/ts_text.obj src/util.obj src/util_s.obj src/cpuid.obj & + src/cpuid_s.obj src/data.obj scrobj = src/scr/bump.obj src/scr/fract.obj src/scr/greets.obj & src/scr/grise.obj src/scr/hairball.obj src/scr/infcubes.obj & src/scr/metaball.obj src/scr/plasma.obj src/scr/polytest.obj & @@ -28,7 +29,8 @@ dosobj = src\dos\audos.obj src\dos\djdpmi.obj src\dos\gfx.obj src\dos\keyb.obj & srcobj = src\bsptree.obj src\cfgopt.obj src\console.obj src\demo.obj & src\dynarr.obj src\gfxutil.obj src\metasurf.obj src\noise.obj & src\rbtree.obj src\screen.obj src\tinyfps.obj src\treestor.obj & - src\image.obj src\ts_text.obj src\util.obj src\data.obj + src\image.obj src\ts_text.obj src\util.obj src\util_s.obj src\cpuid.obj & + src\cpuid_s.obj src\data.obj scrobj = src\scr\bump.obj src\scr\fract.obj src\scr\greets.obj & src\scr\grise.obj src\scr\hairball.obj src\scr\infcubes.obj & src\scr\metaball.obj src\scr\plasma.obj src\scr\polytest.obj & diff --git a/Makefile.dj b/Makefile.dj index 32ab4ae..59d4042 100644 --- a/Makefile.dj +++ b/Makefile.dj @@ -22,16 +22,17 @@ warn = -pedantic -Wall -Wno-unused-function -Wno-unused-variable ifdef RELEASE dbg = -g - def = -DNDEBUG -DNO_MUSIC + def = -DNDEBUG -DNO_SOUND else - def = -DNO_MUSIC + def = -DNO_SOUND endif #prof = -pg CC = $(TOOLPREFIX)gcc AR = $(TOOLPREFIX)ar CFLAGS = -march=pentium $(warn) -MMD $(dbg) $(opt) $(prof) $(inc) $(def) -LDFLAGS = libs/imago/imago.dja libs/anim/anim.dja libs/midas/libmidas.a +LDFLAGS = libs/imago/imago.dja libs/anim/anim.dja +#libs/midas/libmidas.a ifneq ($(hostsys), dos) .PHONY: all diff --git a/src/cpuid.c b/src/cpuid.c index b67143a..5a09fc4 100644 --- a/src/cpuid.c +++ b/src/cpuid.c @@ -67,7 +67,7 @@ static const char *cpuname(struct cpuid_info *cpu) int model, family; char *rd, *wr; - if(cpu->brandstr) { + if(*cpu->brandstr) { /* unwank the string */ rd = wr = cpu->brandstr; while(*rd) { diff --git a/src/dos/gfx.c b/src/dos/gfx.c index 29f6349..2f67bad 100644 --- a/src/dos/gfx.c +++ b/src/dos/gfx.c @@ -1,12 +1,20 @@ #include #include #include +#include #include "demo.h" #include "cdpmi.h" #include "gfx.h" #include "vbe.h" #include "vga.h" #include "util.h" +#include "cpuid.h" + +#ifdef __DJGPP__ +#define VMEM_PTR ((void*)(0xa0000 + __djgpp_conventional_base)) +#else +#define VMEM_PTR ((void*)0xa0000) +#endif #define SAME_BPP(a, b) \ ((a) == (b) || ((a) == 16 && (b) == 15) || ((a) == 15 && (b) == 16) || \ @@ -18,6 +26,10 @@ static void blit_frame_lfb(void *pixels, int vsync); static void blit_frame_banked(void *pixels, int vsync); static uint32_t calc_mask(int sz, int pos); +static void enable_wrcomb(uint32_t addr, int len); +static const char *mtrr_type_name(int type); +static void print_mtrr(void); + static struct video_mode *vmodes; static int num_vmodes; @@ -85,17 +97,13 @@ int init_video(void) vmptr->rmask = calc_mask(minf.rsize, minf.rpos); vmptr->gmask = calc_mask(minf.gsize, minf.gpos); vmptr->bmask = calc_mask(minf.bsize, minf.bpos); - vmptr->bpp = vmptr->rbits + vmptr->gbits + vmptr->bbits; + /*vmptr->bpp = vmptr->rbits + vmptr->gbits + vmptr->bbits;*/ } if(minf.attr & VBE_ATTR_LFB) { vmptr->fb_addr = minf.fb_addr; - } else { - vmptr->bank_size = (uint32_t)minf.bank_size * 1024; - if(!vmptr->bank_size) { - vmptr->bank_size = 65536; - } } vmptr->max_pages = minf.num_img_pages; + vmptr->win_gran = minf.win_gran; printf("%04x: ", vbe.modes[i]); vbe_print_mode_info(stdout, &minf); @@ -184,7 +192,7 @@ void *set_video_mode(int idx, int nbuf) } /* unmap previous video memory mapping, if there was one (switching modes) */ - if(vpgaddr[0] && vpgaddr[0] != (void*)0xa0000) { + if(vpgaddr[0] && vpgaddr[0] != VMEM_PTR) { dpmi_munmap(vpgaddr[0]); vpgaddr[0] = vpgaddr[1] = 0; } @@ -192,12 +200,19 @@ void *set_video_mode(int idx, int nbuf) curmode = vm; if(nbuf < 1) nbuf = 1; if(nbuf > 2) nbuf = 2; - pgcount = nbuf > vm->max_pages ? vm->max_pages : nbuf; + pgcount = nbuf > vm->max_pages + 1 ? vm->max_pages + 1 : nbuf; pgsize = vm->ysz * vm->pitch; fbsize = pgcount * pgsize; + if(vm->bpp > 8) { + printf("rgb mask: %x %x %x\n", (unsigned int)vm->rmask, + (unsigned int)vm->gmask, (unsigned int)vm->bmask); + printf("rgb shift: %d %d %d\n", vm->rshift, vm->gshift, vm->bshift); + } printf("pgcount: %d, pgsize: %d, fbsize: %d\n", pgcount, pgsize, fbsize); - printf("phys addr: %p\n", (void*)vm->fb_addr); + if(vm->fb_addr) { + printf("phys addr: %p\n", (void*)vm->fb_addr); + } fflush(stdout); if(vm->fb_addr) { @@ -221,11 +236,48 @@ void *set_video_mode(int idx, int nbuf) blit_frame = blit_frame_lfb; + /* only attempt to set up write combining if the CPU we're running on + * supports memory type range registers, and we're running on ring 0 + */ + if(CPU_HAVE_MTRR) { + int cpl = get_cpl(); + if(cpl > 0) { + fprintf(stderr, "Can't set framebuffer range to write-combining, running in ring %d\n", cpl); + } else { + uint32_t len = (uint32_t)vbe.vmem_blk << 16; + + /* if vmem_blk is 0 or if the reported size is absurd (more than + * 256mb), just use the framebuffer size for this mode to setup the + * mtrr + */ + if(!len || len > 0x10000000) { + printf("reported vmem too large or overflowed, using fbsize for wrcomb setup\n"); + len = fbsize; + } + print_mtrr(); + enable_wrcomb(vm->fb_addr, len); + } + } + } else { - vpgaddr[0] = (void*)0xa0000; + vpgaddr[0] = VMEM_PTR; vpgaddr[1] = 0; blit_frame = blit_frame_banked; + + /* calculate window granularity shift */ + vm->win_gran_shift = 0; + vm->win_64k_step = 1; + if(vm->win_gran > 0 && vm->win_gran < 64) { + int gran = vm->win_gran; + while(gran < 64) { + vm->win_gran_shift++; + gran <<= 1; + } + vm->win_64k_step = 1 << vm->win_gran_shift; + } + + printf("granularity: %dk (step: %d)\n", vm->win_gran, vm->win_64k_step); } /* allocate main memory framebuffer */ @@ -236,13 +288,14 @@ void *set_video_mode(int idx, int nbuf) return 0; } + fflush(stdout); return vpgaddr[0]; } int set_text_mode(void) { /* unmap previous video memory mapping, if there was one (switching modes) */ - if(vpgaddr[0] && vpgaddr[0] != (void*)0xa0000) { + if(vpgaddr[0] && vpgaddr[0] != VMEM_PTR) { dpmi_munmap(vpgaddr[0]); vpgaddr[0] = vpgaddr[1] = 0; } @@ -277,8 +330,7 @@ static void blit_frame_lfb(void *pixels, int vsync) static void blit_frame_banked(void *pixels, int vsync) { - int i, sz, offs; - unsigned int pending; + int sz, offs, pending; unsigned char *pptr = pixels; demo_post_draw(pixels); @@ -289,22 +341,168 @@ static void blit_frame_banked(void *pixels, int vsync) offs = 0; pending = pgsize; while(pending > 0) { - sz = pending > curmode->bank_size ? curmode->bank_size : pending; - memcpy((void*)phys_to_virt(0xa0000), pptr, sz); + sz = pending > 65536 ? 65536 : pending; + /*memcpy64(VMEM_PTR, pptr, sz >> 3);*/ + memcpy(VMEM_PTR, pptr, sz); pptr += sz; pending -= sz; - vbe_setwin(0, ++offs); + offs += curmode->win_64k_step; + vbe_setwin(0, offs); } - vbe_setwin(0, 0); } static uint32_t calc_mask(int sz, int pos) { - int i; uint32_t mask = 0; while(sz-- > 0) { mask = (mask << 1) | 1; } return mask << pos; } + +#define MSR_MTRRCAP 0xfe +#define MSR_MTRRDEFTYPE 0x2ff +#define MSR_MTRRBASE(x) (0x200 | ((x) << 1)) +#define MSR_MTRRMASK(x) (0x201 | ((x) << 1)) +#define MTRRDEF_EN 0x800 +#define MTRRCAP_HAVE_WC 0x400 +#define MTRRMASK_VALID 0x800 + +#define MTRR_WC 1 + +static int get_page_memtype(uint32_t addr, int num_ranges) +{ + int i; + uint32_t rlow, rhigh; + uint32_t base, mask; + + for(i=0; i 0) { + if(get_page_memtype(addr, num_ranges) != MTRR_WC) { + return 0; + } + addr += 4096; + len -= 4096; + } + return 1; +} + +static int alloc_mtrr(int num_ranges) +{ + int i; + uint32_t rlow, rhigh; + + for(i=0; i> 1; + mask |= mask >> 2; + mask |= mask >> 4; + mask |= mask >> 8; + mask |= mask >> 16; + mask = ~mask & 0xfffff000; + + printf(" ... mask: %08x\n", (unsigned int)mask); + + _disable(); + get_msr(MSR_MTRRDEFTYPE, &def, &rhigh); + set_msr(MSR_MTRRDEFTYPE, def & ~MTRRDEF_EN, rhigh); + + set_msr(MSR_MTRRBASE(mtrr), addr | MTRR_WC, 0); + set_msr(MSR_MTRRMASK(mtrr), mask | MTRRMASK_VALID, 0); + + set_msr(MSR_MTRRDEFTYPE, def | MTRRDEF_EN, 0); + _enable(); +} + +static const char *mtrr_names[] = { "N/A", "W C", "N/A", "N/A", "W T", "W P", "W B" }; + +static const char *mtrr_type_name(int type) +{ + if(type < 0 || type >= sizeof mtrr_names / sizeof *mtrr_names) { + return mtrr_names[0]; + } + return mtrr_names[type]; +} + +static void print_mtrr(void) +{ + int i, num_ranges; + uint32_t rlow, rhigh, base, mask; + + get_msr(MSR_MTRRCAP, &rlow, &rhigh); + num_ranges = rlow & 0xff; + + for(i=0; iattr & VBE_ATTR_LFB) { fprintf(fp, " lfb@%lx", (unsigned long)minf->fb_addr); } else { - fprintf(fp, " %xkb/bank", (unsigned int)minf->bank_size); + fprintf(fp, " (%dk gran)", (int)minf->win_gran); } fprintf(fp, " ["); diff --git a/src/dos/vga.c b/src/dos/vga.c index 376e72b..f2f8142 100644 --- a/src/dos/vga.c +++ b/src/dos/vga.c @@ -1,3 +1,4 @@ +#include #include "vga.h" #include "vgaregs.h" #include "cdpmi.h" diff --git a/src/noise.c b/src/noise.c index 4401e5a..63bfa82 100644 --- a/src/noise.c +++ b/src/noise.c @@ -186,12 +186,12 @@ float noise3(float x, float y, float z) c = lerp(a, b, sy); /* interpolate along the bottom slice of the cell */ - u = dotgrad3(grad3[b00 + bz0], rx0, ry0, rz1); - v = dotgrad3(grad3[b10 + bz0], rx1, ry0, rz1); + u = dotgrad3(grad3[b00 + bz1], rx0, ry0, rz1); + v = dotgrad3(grad3[b10 + bz1], rx1, ry0, rz1); a = lerp(u, v, sx); - u = dotgrad3(grad3[b01 + bz0], rx0, ry1, rz1); - v = dotgrad3(grad3[b11 + bz0], rx1, ry1, rz1); + u = dotgrad3(grad3[b01 + bz1], rx0, ry1, rz1); + v = dotgrad3(grad3[b11 + bz1], rx1, ry1, rz1); b = lerp(u, v, sx); d = lerp(a, b, sy); @@ -298,12 +298,12 @@ float pnoise3(float x, float y, float z, int per_x, int per_y, int per_z) c = lerp(a, b, sy); /* interpolate along the bottom slice of the cell */ - u = dotgrad3(grad3[b00 + bz0], rx0, ry0, rz1); - v = dotgrad3(grad3[b10 + bz0], rx1, ry0, rz1); + u = dotgrad3(grad3[b00 + bz1], rx0, ry0, rz1); + v = dotgrad3(grad3[b10 + bz1], rx1, ry0, rz1); a = lerp(u, v, sx); - u = dotgrad3(grad3[b01 + bz0], rx0, ry1, rz1); - v = dotgrad3(grad3[b11 + bz0], rx1, ry1, rz1); + u = dotgrad3(grad3[b01 + bz1], rx0, ry1, rz1); + v = dotgrad3(grad3[b11 + bz1], rx1, ry1, rz1); b = lerp(u, v, sx); d = lerp(a, b, sy); diff --git a/src/util.h b/src/util.h index 5f3489f..35d1a97 100644 --- a/src/util.h +++ b/src/util.h @@ -106,12 +106,6 @@ void debug_break(void); void halt(void); #pragma aux halt = "hlt"; - -unsigned int get_cs(void); -#pragma aux get_cs = \ - "xor eax, eax" \ - "mov ax, cs" \ - value[eax]; #endif #ifdef __GNUC__ @@ -178,17 +172,6 @@ static void INLINE memset16(void *dest, uint16_t val, int count) #define halt() \ asm volatile("hlt") - -static unsigned int INLINE get_cs(void) -{ - unsigned int res; - asm volatile ( - "xor %%eax, %%eax\n\t" - "mov %%cs, %0\n\t" - : "=a"(res) - ); - return res; -} #endif #ifdef _MSC_VER @@ -251,6 +234,10 @@ static unsigned int __inline get_cs(void) } #endif +unsigned int get_cs(void); #define get_cpl() ((int)(get_cs() & 7)) +void get_msr(uint32_t msr, uint32_t *low, uint32_t *high); +void set_msr(uint32_t msr, uint32_t low, uint32_t high); + #endif /* UTIL_H_ */ diff --git a/src/util_s.asm b/src/util_s.asm new file mode 100644 index 0000000..69c034a --- /dev/null +++ b/src/util_s.asm @@ -0,0 +1,62 @@ + section .text +; foo_ are watcom functions, _foo are djgpp functions + + global get_cs + global _get_cs + global get_cs_ +get_cs: +_get_cs: +get_cs_: + xor eax, eax + mov ax, cs + ret + + global get_msr + global _get_msr +get_msr: +_get_msr: + push ebp + mov ebp, esp + push ebx + mov ecx, [ebp + 8] + rdmsr + mov ebx, [ebp + 12] + mov [ebx], eax + mov ebx, [ebp + 16] + mov [ebx], edx + pop ebx + pop ebp + ret + + global get_msr_ +get_msr_: + push ebx + push edx + mov ecx, eax + rdmsr + pop ebx + mov [ebx], eax + pop ebx + mov [ebx], edx + ret + + global set_msr + global _set_msr +set_msr: +_set_msr: + mov ecx, [esp + 4] + mov eax, [esp + 8] + mov edx, [esp + 12] + rdmsr + ret + + global set_msr_ +set_msr_: + mov ecx, eax + mov eax, edx + mov edx, ebx + wrmsr + ret + + +; vi:ft=nasm: diff --git a/tools/scripts/pceminst b/tools/scripts/pceminst index c0e8660..6687f82 100755 --- a/tools/scripts/pceminst +++ b/tools/scripts/pceminst @@ -1,10 +1,10 @@ #!/bin/sh # NOTES: -# assumes a PCem setup with a fat16 image mounted at /pcem_dos. fstab entry: -# /home/nuclear/.pcem/pentium_dos.img /pcem_dos msdos user,noauto,loop,fmask=0113,dmask=0002,gid=6,offset=32256 0 0 +# assumes a PCem setup with a fat16 image mounted at /pcem/pentium. fstab entry: +# /home/nuclear/.pcem/pentium_dos.img /pcem/pentium msdos user,noauto,loop,fmask=0113,dmask=0002,gid=6,offset=32256 0 0 -mntpt=/pcem_dos +mntpt=/pcem/pentium do_umount=false if ! ( mount | grep pcem >/dev/null ); then -- 1.7.10.4