- VBE banked mode fixes backported from rbench
[dosdemo] / src / dos / gfx.c
index 29f6349..2f67bad 100644 (file)
@@ -1,12 +1,20 @@
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
+#include <dos.h>
 #include "demo.h"
 #include "cdpmi.h"
 #include "gfx.h"
 #include "vbe.h"
 #include "vga.h"
 #include "util.h"
+#include "cpuid.h"
+
+#ifdef __DJGPP__
+#define VMEM_PTR       ((void*)(0xa0000 + __djgpp_conventional_base))
+#else
+#define VMEM_PTR       ((void*)0xa0000)
+#endif
 
 #define SAME_BPP(a, b) \
        ((a) == (b) || ((a) == 16 && (b) == 15) || ((a) == 15 && (b) == 16) || \
@@ -18,6 +26,10 @@ static void blit_frame_lfb(void *pixels, int vsync);
 static void blit_frame_banked(void *pixels, int vsync);
 static uint32_t calc_mask(int sz, int pos);
 
+static void enable_wrcomb(uint32_t addr, int len);
+static const char *mtrr_type_name(int type);
+static void print_mtrr(void);
+
 static struct video_mode *vmodes;
 static int num_vmodes;
 
@@ -85,17 +97,13 @@ int init_video(void)
                        vmptr->rmask = calc_mask(minf.rsize, minf.rpos);
                        vmptr->gmask = calc_mask(minf.gsize, minf.gpos);
                        vmptr->bmask = calc_mask(minf.bsize, minf.bpos);
-                       vmptr->bpp = vmptr->rbits + vmptr->gbits + vmptr->bbits;
+                       /*vmptr->bpp = vmptr->rbits + vmptr->gbits + vmptr->bbits;*/
                }
                if(minf.attr & VBE_ATTR_LFB) {
                        vmptr->fb_addr = minf.fb_addr;
-               } else {
-                       vmptr->bank_size = (uint32_t)minf.bank_size * 1024;
-                       if(!vmptr->bank_size) {
-                               vmptr->bank_size = 65536;
-                       }
                }
                vmptr->max_pages = minf.num_img_pages;
+               vmptr->win_gran = minf.win_gran;
 
                printf("%04x: ", vbe.modes[i]);
                vbe_print_mode_info(stdout, &minf);
@@ -184,7 +192,7 @@ void *set_video_mode(int idx, int nbuf)
        }
 
        /* unmap previous video memory mapping, if there was one (switching modes) */
-       if(vpgaddr[0] && vpgaddr[0] != (void*)0xa0000) {
+       if(vpgaddr[0] && vpgaddr[0] != VMEM_PTR) {
                dpmi_munmap(vpgaddr[0]);
                vpgaddr[0] = vpgaddr[1] = 0;
        }
@@ -192,12 +200,19 @@ void *set_video_mode(int idx, int nbuf)
        curmode = vm;
        if(nbuf < 1) nbuf = 1;
        if(nbuf > 2) nbuf = 2;
-       pgcount = nbuf > vm->max_pages ? vm->max_pages : nbuf;
+       pgcount = nbuf > vm->max_pages + 1 ? vm->max_pages + 1 : nbuf;
        pgsize = vm->ysz * vm->pitch;
        fbsize = pgcount * pgsize;
 
+       if(vm->bpp > 8) {
+               printf("rgb mask: %x %x %x\n", (unsigned int)vm->rmask,
+                               (unsigned int)vm->gmask, (unsigned int)vm->bmask);
+               printf("rgb shift: %d %d %d\n", vm->rshift, vm->gshift, vm->bshift);
+       }
        printf("pgcount: %d, pgsize: %d, fbsize: %d\n", pgcount, pgsize, fbsize);
-       printf("phys addr: %p\n", (void*)vm->fb_addr);
+       if(vm->fb_addr) {
+               printf("phys addr: %p\n", (void*)vm->fb_addr);
+       }
        fflush(stdout);
 
        if(vm->fb_addr) {
@@ -221,11 +236,48 @@ void *set_video_mode(int idx, int nbuf)
 
                blit_frame = blit_frame_lfb;
 
+               /* only attempt to set up write combining if the CPU we're running on
+                * supports memory type range registers, and we're running on ring 0
+                */
+               if(CPU_HAVE_MTRR) {
+                       int cpl = get_cpl();
+                       if(cpl > 0) {
+                               fprintf(stderr, "Can't set framebuffer range to write-combining, running in ring %d\n", cpl);
+                       } else {
+                               uint32_t len = (uint32_t)vbe.vmem_blk << 16;
+
+                               /* if vmem_blk is 0 or if the reported size is absurd (more than
+                                * 256mb), just use the framebuffer size for this mode to setup the
+                                * mtrr
+                                */
+                               if(!len || len > 0x10000000) {
+                                       printf("reported vmem too large or overflowed, using fbsize for wrcomb setup\n");
+                                       len = fbsize;
+                               }
+                               print_mtrr();
+                               enable_wrcomb(vm->fb_addr, len);
+                       }
+               }
+
        } else {
-               vpgaddr[0] = (void*)0xa0000;
+               vpgaddr[0] = VMEM_PTR;
                vpgaddr[1] = 0;
 
                blit_frame = blit_frame_banked;
+
+               /* calculate window granularity shift */
+               vm->win_gran_shift = 0;
+               vm->win_64k_step = 1;
+               if(vm->win_gran > 0 && vm->win_gran < 64) {
+                       int gran = vm->win_gran;
+                       while(gran < 64) {
+                               vm->win_gran_shift++;
+                               gran <<= 1;
+                       }
+                       vm->win_64k_step = 1 << vm->win_gran_shift;
+               }
+
+               printf("granularity: %dk (step: %d)\n", vm->win_gran, vm->win_64k_step);
        }
 
        /* allocate main memory framebuffer */
@@ -236,13 +288,14 @@ void *set_video_mode(int idx, int nbuf)
                return 0;
        }
 
+       fflush(stdout);
        return vpgaddr[0];
 }
 
 int set_text_mode(void)
 {
        /* unmap previous video memory mapping, if there was one (switching modes) */
-       if(vpgaddr[0] && vpgaddr[0] != (void*)0xa0000) {
+       if(vpgaddr[0] && vpgaddr[0] != VMEM_PTR) {
                dpmi_munmap(vpgaddr[0]);
                vpgaddr[0] = vpgaddr[1] = 0;
        }
@@ -277,8 +330,7 @@ static void blit_frame_lfb(void *pixels, int vsync)
 
 static void blit_frame_banked(void *pixels, int vsync)
 {
-       int i, sz, offs;
-       unsigned int pending;
+       int sz, offs, pending;
        unsigned char *pptr = pixels;
 
        demo_post_draw(pixels);
@@ -289,22 +341,168 @@ static void blit_frame_banked(void *pixels, int vsync)
        offs = 0;
        pending = pgsize;
        while(pending > 0) {
-               sz = pending > curmode->bank_size ? curmode->bank_size : pending;
-               memcpy((void*)phys_to_virt(0xa0000), pptr, sz);
+               sz = pending > 65536 ? 65536 : pending;
+               /*memcpy64(VMEM_PTR, pptr, sz >> 3);*/
+               memcpy(VMEM_PTR, pptr, sz);
                pptr += sz;
                pending -= sz;
-               vbe_setwin(0, ++offs);
+               offs += curmode->win_64k_step;
+               vbe_setwin(0, offs);
        }
-
        vbe_setwin(0, 0);
 }
 
 static uint32_t calc_mask(int sz, int pos)
 {
-       int i;
        uint32_t mask = 0;
        while(sz-- > 0) {
                mask = (mask << 1) | 1;
        }
        return mask << pos;
 }
+
+#define MSR_MTRRCAP                    0xfe
+#define MSR_MTRRDEFTYPE                0x2ff
+#define MSR_MTRRBASE(x)                (0x200 | ((x) << 1))
+#define MSR_MTRRMASK(x)                (0x201 | ((x) << 1))
+#define MTRRDEF_EN                     0x800
+#define MTRRCAP_HAVE_WC                0x400
+#define MTRRMASK_VALID         0x800
+
+#define MTRR_WC                                1
+
+static int get_page_memtype(uint32_t addr, int num_ranges)
+{
+       int i;
+       uint32_t rlow, rhigh;
+       uint32_t base, mask;
+
+       for(i=0; i<num_ranges; i++) {
+               get_msr(MSR_MTRRMASK(i), &rlow, &rhigh);
+               if(!(rlow & MTRRMASK_VALID)) {
+                       continue;
+               }
+               mask = rlow & 0xfffff000;
+
+               get_msr(MSR_MTRRBASE(i), &rlow, &rhigh);
+               base = rlow & 0xfffff000;
+
+               if((addr & mask) == (base & mask)) {
+                       return rlow & 0xff;
+               }
+       }
+
+       get_msr(MSR_MTRRDEFTYPE, &rlow, &rhigh);
+       return rlow & 0xff;
+}
+
+static int check_wrcomb_enabled(uint32_t addr, int len, int num_ranges)
+{
+       while(len > 0) {
+               if(get_page_memtype(addr, num_ranges) != MTRR_WC) {
+                       return 0;
+               }
+               addr += 4096;
+               len -= 4096;
+       }
+       return 1;
+}
+
+static int alloc_mtrr(int num_ranges)
+{
+       int i;
+       uint32_t rlow, rhigh;
+
+       for(i=0; i<num_ranges; i++) {
+               get_msr(MSR_MTRRMASK(i), &rlow, &rhigh);
+               if(!(rlow & MTRRMASK_VALID)) {
+                       return i;
+               }
+       }
+       return -1;
+}
+
+static void enable_wrcomb(uint32_t addr, int len)
+{
+       int num_ranges, mtrr;
+       uint32_t rlow, rhigh;
+       uint32_t def, mask;
+
+       if(len <= 0 || (addr | (uint32_t)len) & 0xfff) {
+               fprintf(stderr, "failed to enable write combining, unaligned range: %p/%x\n",
+                               (void*)addr, (unsigned int)len);
+               return;
+       }
+
+       get_msr(MSR_MTRRCAP, &rlow, &rhigh);
+       num_ranges = rlow & 0xff;
+
+       printf("enable_wrcomb: addr=%p len=%x\n", (void*)addr, (unsigned int)len);
+
+       if(!(rlow & MTRRCAP_HAVE_WC)) {
+               fprintf(stderr, "failed to enable write combining, processor doesn't support it\n");
+               return;
+       }
+
+       if(check_wrcomb_enabled(addr, len, num_ranges)) {
+               return;
+       }
+
+       if((mtrr = alloc_mtrr(num_ranges)) == -1) {
+               fprintf(stderr, "failed to enable write combining, no free MTRRs\n");
+               return;
+       }
+
+       mask = len - 1;
+       mask |= mask >> 1;
+       mask |= mask >> 2;
+       mask |= mask >> 4;
+       mask |= mask >> 8;
+       mask |= mask >> 16;
+       mask = ~mask & 0xfffff000;
+
+       printf("  ... mask: %08x\n", (unsigned int)mask);
+
+       _disable();
+       get_msr(MSR_MTRRDEFTYPE, &def, &rhigh);
+       set_msr(MSR_MTRRDEFTYPE, def & ~MTRRDEF_EN, rhigh);
+
+       set_msr(MSR_MTRRBASE(mtrr), addr | MTRR_WC, 0);
+       set_msr(MSR_MTRRMASK(mtrr), mask | MTRRMASK_VALID, 0);
+
+       set_msr(MSR_MTRRDEFTYPE, def | MTRRDEF_EN, 0);
+       _enable();
+}
+
+static const char *mtrr_names[] = { "N/A", "W C", "N/A", "N/A", "W T", "W P", "W B" };
+
+static const char *mtrr_type_name(int type)
+{
+       if(type < 0 || type >= sizeof mtrr_names / sizeof *mtrr_names) {
+               return mtrr_names[0];
+       }
+       return mtrr_names[type];
+}
+
+static void print_mtrr(void)
+{
+       int i, num_ranges;
+       uint32_t rlow, rhigh, base, mask;
+
+       get_msr(MSR_MTRRCAP, &rlow, &rhigh);
+       num_ranges = rlow & 0xff;
+
+       for(i=0; i<num_ranges; i++) {
+               get_msr(MSR_MTRRBASE(i), &base, &rhigh);
+               get_msr(MSR_MTRRMASK(i), &mask, &rhigh);
+
+               if(mask & MTRRMASK_VALID) {
+                       printf("mtrr%d: base %p, mask %08x type %s\n", i, (void*)(base & 0xfffff000),
+                                       (unsigned int)(mask & 0xfffff000), mtrr_type_name(base & 0xff));
+               } else {
+                       printf("mtrr%d unused (%08x/%08x)\n", i, (unsigned int)base,
+                                       (unsigned int)mask);
+               }
+       }
+       fflush(stdout);
+}