#include <stdio.h>
#include <stdlib.h>
#include <string.h>
+#include <dos.h>
#include "demo.h"
#include "cdpmi.h"
#include "gfx.h"
#include "vbe.h"
#include "vga.h"
#include "util.h"
+#include "cpuid.h"
+
+#ifdef __DJGPP__
+#define VMEM_PTR ((void*)(0xa0000 + __djgpp_conventional_base))
+#else
+#define VMEM_PTR ((void*)0xa0000)
+#endif
#define SAME_BPP(a, b) \
((a) == (b) || ((a) == 16 && (b) == 15) || ((a) == 15 && (b) == 16) || \
static void blit_frame_banked(void *pixels, int vsync);
static uint32_t calc_mask(int sz, int pos);
+static void enable_wrcomb(uint32_t addr, int len);
+static const char *mtrr_type_name(int type);
+static void print_mtrr(void);
+
static struct video_mode *vmodes;
static int num_vmodes;
vmptr->rmask = calc_mask(minf.rsize, minf.rpos);
vmptr->gmask = calc_mask(minf.gsize, minf.gpos);
vmptr->bmask = calc_mask(minf.bsize, minf.bpos);
- vmptr->bpp = vmptr->rbits + vmptr->gbits + vmptr->bbits;
+ /*vmptr->bpp = vmptr->rbits + vmptr->gbits + vmptr->bbits;*/
}
if(minf.attr & VBE_ATTR_LFB) {
vmptr->fb_addr = minf.fb_addr;
- } else {
- vmptr->bank_size = (uint32_t)minf.bank_size * 1024;
- if(!vmptr->bank_size) {
- vmptr->bank_size = 65536;
- }
}
vmptr->max_pages = minf.num_img_pages;
+ vmptr->win_gran = minf.win_gran;
printf("%04x: ", vbe.modes[i]);
vbe_print_mode_info(stdout, &minf);
}
/* unmap previous video memory mapping, if there was one (switching modes) */
- if(vpgaddr[0] && vpgaddr[0] != (void*)0xa0000) {
+ if(vpgaddr[0] && vpgaddr[0] != VMEM_PTR) {
dpmi_munmap(vpgaddr[0]);
vpgaddr[0] = vpgaddr[1] = 0;
}
curmode = vm;
if(nbuf < 1) nbuf = 1;
if(nbuf > 2) nbuf = 2;
- pgcount = nbuf > vm->max_pages ? vm->max_pages : nbuf;
+ pgcount = nbuf > vm->max_pages + 1 ? vm->max_pages + 1 : nbuf;
pgsize = vm->ysz * vm->pitch;
fbsize = pgcount * pgsize;
+ if(vm->bpp > 8) {
+ printf("rgb mask: %x %x %x\n", (unsigned int)vm->rmask,
+ (unsigned int)vm->gmask, (unsigned int)vm->bmask);
+ printf("rgb shift: %d %d %d\n", vm->rshift, vm->gshift, vm->bshift);
+ }
printf("pgcount: %d, pgsize: %d, fbsize: %d\n", pgcount, pgsize, fbsize);
- printf("phys addr: %p\n", (void*)vm->fb_addr);
+ if(vm->fb_addr) {
+ printf("phys addr: %p\n", (void*)vm->fb_addr);
+ }
fflush(stdout);
if(vm->fb_addr) {
blit_frame = blit_frame_lfb;
+ /* only attempt to set up write combining if the CPU we're running on
+ * supports memory type range registers, and we're running on ring 0
+ */
+ if(CPU_HAVE_MTRR) {
+ int cpl = get_cpl();
+ if(cpl > 0) {
+ fprintf(stderr, "Can't set framebuffer range to write-combining, running in ring %d\n", cpl);
+ } else {
+ uint32_t len = (uint32_t)vbe.vmem_blk << 16;
+
+ /* if vmem_blk is 0 or if the reported size is absurd (more than
+ * 256mb), just use the framebuffer size for this mode to setup the
+ * mtrr
+ */
+ if(!len || len > 0x10000000) {
+ printf("reported vmem too large or overflowed, using fbsize for wrcomb setup\n");
+ len = fbsize;
+ }
+ print_mtrr();
+ enable_wrcomb(vm->fb_addr, len);
+ }
+ }
+
} else {
- vpgaddr[0] = (void*)0xa0000;
+ vpgaddr[0] = VMEM_PTR;
vpgaddr[1] = 0;
blit_frame = blit_frame_banked;
+
+ /* calculate window granularity shift */
+ vm->win_gran_shift = 0;
+ vm->win_64k_step = 1;
+ if(vm->win_gran > 0 && vm->win_gran < 64) {
+ int gran = vm->win_gran;
+ while(gran < 64) {
+ vm->win_gran_shift++;
+ gran <<= 1;
+ }
+ vm->win_64k_step = 1 << vm->win_gran_shift;
+ }
+
+ printf("granularity: %dk (step: %d)\n", vm->win_gran, vm->win_64k_step);
}
/* allocate main memory framebuffer */
return 0;
}
+ fflush(stdout);
return vpgaddr[0];
}
int set_text_mode(void)
{
/* unmap previous video memory mapping, if there was one (switching modes) */
- if(vpgaddr[0] && vpgaddr[0] != (void*)0xa0000) {
+ if(vpgaddr[0] && vpgaddr[0] != VMEM_PTR) {
dpmi_munmap(vpgaddr[0]);
vpgaddr[0] = vpgaddr[1] = 0;
}
static void blit_frame_banked(void *pixels, int vsync)
{
- int i, sz, offs;
- unsigned int pending;
+ int sz, offs, pending;
unsigned char *pptr = pixels;
demo_post_draw(pixels);
offs = 0;
pending = pgsize;
while(pending > 0) {
- sz = pending > curmode->bank_size ? curmode->bank_size : pending;
- memcpy((void*)phys_to_virt(0xa0000), pptr, sz);
+ sz = pending > 65536 ? 65536 : pending;
+ /*memcpy64(VMEM_PTR, pptr, sz >> 3);*/
+ memcpy(VMEM_PTR, pptr, sz);
pptr += sz;
pending -= sz;
- vbe_setwin(0, ++offs);
+ offs += curmode->win_64k_step;
+ vbe_setwin(0, offs);
}
-
vbe_setwin(0, 0);
}
static uint32_t calc_mask(int sz, int pos)
{
- int i;
uint32_t mask = 0;
while(sz-- > 0) {
mask = (mask << 1) | 1;
}
return mask << pos;
}
+
+#define MSR_MTRRCAP 0xfe
+#define MSR_MTRRDEFTYPE 0x2ff
+#define MSR_MTRRBASE(x) (0x200 | ((x) << 1))
+#define MSR_MTRRMASK(x) (0x201 | ((x) << 1))
+#define MTRRDEF_EN 0x800
+#define MTRRCAP_HAVE_WC 0x400
+#define MTRRMASK_VALID 0x800
+
+#define MTRR_WC 1
+
+static int get_page_memtype(uint32_t addr, int num_ranges)
+{
+ int i;
+ uint32_t rlow, rhigh;
+ uint32_t base, mask;
+
+ for(i=0; i<num_ranges; i++) {
+ get_msr(MSR_MTRRMASK(i), &rlow, &rhigh);
+ if(!(rlow & MTRRMASK_VALID)) {
+ continue;
+ }
+ mask = rlow & 0xfffff000;
+
+ get_msr(MSR_MTRRBASE(i), &rlow, &rhigh);
+ base = rlow & 0xfffff000;
+
+ if((addr & mask) == (base & mask)) {
+ return rlow & 0xff;
+ }
+ }
+
+ get_msr(MSR_MTRRDEFTYPE, &rlow, &rhigh);
+ return rlow & 0xff;
+}
+
+static int check_wrcomb_enabled(uint32_t addr, int len, int num_ranges)
+{
+ while(len > 0) {
+ if(get_page_memtype(addr, num_ranges) != MTRR_WC) {
+ return 0;
+ }
+ addr += 4096;
+ len -= 4096;
+ }
+ return 1;
+}
+
+static int alloc_mtrr(int num_ranges)
+{
+ int i;
+ uint32_t rlow, rhigh;
+
+ for(i=0; i<num_ranges; i++) {
+ get_msr(MSR_MTRRMASK(i), &rlow, &rhigh);
+ if(!(rlow & MTRRMASK_VALID)) {
+ return i;
+ }
+ }
+ return -1;
+}
+
+static void enable_wrcomb(uint32_t addr, int len)
+{
+ int num_ranges, mtrr;
+ uint32_t rlow, rhigh;
+ uint32_t def, mask;
+
+ if(len <= 0 || (addr | (uint32_t)len) & 0xfff) {
+ fprintf(stderr, "failed to enable write combining, unaligned range: %p/%x\n",
+ (void*)addr, (unsigned int)len);
+ return;
+ }
+
+ get_msr(MSR_MTRRCAP, &rlow, &rhigh);
+ num_ranges = rlow & 0xff;
+
+ printf("enable_wrcomb: addr=%p len=%x\n", (void*)addr, (unsigned int)len);
+
+ if(!(rlow & MTRRCAP_HAVE_WC)) {
+ fprintf(stderr, "failed to enable write combining, processor doesn't support it\n");
+ return;
+ }
+
+ if(check_wrcomb_enabled(addr, len, num_ranges)) {
+ return;
+ }
+
+ if((mtrr = alloc_mtrr(num_ranges)) == -1) {
+ fprintf(stderr, "failed to enable write combining, no free MTRRs\n");
+ return;
+ }
+
+ mask = len - 1;
+ mask |= mask >> 1;
+ mask |= mask >> 2;
+ mask |= mask >> 4;
+ mask |= mask >> 8;
+ mask |= mask >> 16;
+ mask = ~mask & 0xfffff000;
+
+ printf(" ... mask: %08x\n", (unsigned int)mask);
+
+ _disable();
+ get_msr(MSR_MTRRDEFTYPE, &def, &rhigh);
+ set_msr(MSR_MTRRDEFTYPE, def & ~MTRRDEF_EN, rhigh);
+
+ set_msr(MSR_MTRRBASE(mtrr), addr | MTRR_WC, 0);
+ set_msr(MSR_MTRRMASK(mtrr), mask | MTRRMASK_VALID, 0);
+
+ set_msr(MSR_MTRRDEFTYPE, def | MTRRDEF_EN, 0);
+ _enable();
+}
+
+static const char *mtrr_names[] = { "N/A", "W C", "N/A", "N/A", "W T", "W P", "W B" };
+
+static const char *mtrr_type_name(int type)
+{
+ if(type < 0 || type >= sizeof mtrr_names / sizeof *mtrr_names) {
+ return mtrr_names[0];
+ }
+ return mtrr_names[type];
+}
+
+static void print_mtrr(void)
+{
+ int i, num_ranges;
+ uint32_t rlow, rhigh, base, mask;
+
+ get_msr(MSR_MTRRCAP, &rlow, &rhigh);
+ num_ranges = rlow & 0xff;
+
+ for(i=0; i<num_ranges; i++) {
+ get_msr(MSR_MTRRBASE(i), &base, &rhigh);
+ get_msr(MSR_MTRRMASK(i), &mask, &rhigh);
+
+ if(mask & MTRRMASK_VALID) {
+ printf("mtrr%d: base %p, mask %08x type %s\n", i, (void*)(base & 0xfffff000),
+ (unsigned int)(mask & 0xfffff000), mtrr_type_name(base & 0xff));
+ } else {
+ printf("mtrr%d unused (%08x/%08x)\n", i, (unsigned int)base,
+ (unsigned int)mask);
+ }
+ }
+ fflush(stdout);
+}
c = lerp(a, b, sy);
/* interpolate along the bottom slice of the cell */
- u = dotgrad3(grad3[b00 + bz0], rx0, ry0, rz1);
- v = dotgrad3(grad3[b10 + bz0], rx1, ry0, rz1);
+ u = dotgrad3(grad3[b00 + bz1], rx0, ry0, rz1);
+ v = dotgrad3(grad3[b10 + bz1], rx1, ry0, rz1);
a = lerp(u, v, sx);
- u = dotgrad3(grad3[b01 + bz0], rx0, ry1, rz1);
- v = dotgrad3(grad3[b11 + bz0], rx1, ry1, rz1);
+ u = dotgrad3(grad3[b01 + bz1], rx0, ry1, rz1);
+ v = dotgrad3(grad3[b11 + bz1], rx1, ry1, rz1);
b = lerp(u, v, sx);
d = lerp(a, b, sy);
c = lerp(a, b, sy);
/* interpolate along the bottom slice of the cell */
- u = dotgrad3(grad3[b00 + bz0], rx0, ry0, rz1);
- v = dotgrad3(grad3[b10 + bz0], rx1, ry0, rz1);
+ u = dotgrad3(grad3[b00 + bz1], rx0, ry0, rz1);
+ v = dotgrad3(grad3[b10 + bz1], rx1, ry0, rz1);
a = lerp(u, v, sx);
- u = dotgrad3(grad3[b01 + bz0], rx0, ry1, rz1);
- v = dotgrad3(grad3[b11 + bz0], rx1, ry1, rz1);
+ u = dotgrad3(grad3[b01 + bz1], rx0, ry1, rz1);
+ v = dotgrad3(grad3[b11 + bz1], rx1, ry1, rz1);
b = lerp(u, v, sx);
d = lerp(a, b, sy);