mirroring tunnel vertically improves performance
authorJohn Tsiombikas <nuclear@member.fsf.org>
Mon, 12 Apr 2021 03:35:12 +0000 (06:35 +0300)
committerJohn Tsiombikas <nuclear@member.fsf.org>
Mon, 12 Apr 2021 03:35:12 +0000 (06:35 +0300)
src/debug.c
src/gamescr.c

index e0696ca..60dc387 100644 (file)
@@ -7,9 +7,9 @@
 #include "util.h"
 
 uint16_t vblperf_color[] = {
-       /* grn  cyan  yellow  orng    red    purple ... */
-       /* 60    30     20     15     12      10 ... */
-       0x3e0, 0xffc0, 0x3ff, 0x1ff, 0x001f, 0xf81f, 0xf81f, 0xf81f, 0xf81f, 0xf81f
+       /* grn  blue   cyan  yellow  orng    red     purple  d.green purple ... */
+       /* 60    30     20     15     12      10      8.5     7.5    ... */
+       0x3e0, 0xf863, 0xffc0, 0x3ff, 0x1ff, 0x001f, 0xf81f, 0x1e0, 0xf81f, 0xf81f, 0xf81f
 };
 
 static void vblperf_intr(void)
index bd73b1b..e5dd8e4 100644 (file)
@@ -13,9 +13,9 @@ void gamescr(void)
 {
        int i, j, tx, ty, angle, depth, nframes, backbuf, zoffs;
        static uint16_t *vram[] = { (uint16_t*)VRAM_LFB_FB0_ADDR, (uint16_t*)VRAM_LFB_FB1_ADDR };
-       uint16_t *cdst;
+       uint16_t *cdst, *top, *bot;
        unsigned char *csrc;
-       uint32_t tun, *tunptr, *tuncache;
+       uint32_t tun, *tunptr;
 
        REG_DISPCNT = 4 | DISPCNT_BG2 | DISPCNT_FB1;
 
@@ -31,10 +31,6 @@ void gamescr(void)
        fillblock_16byte(vram[0], 0xffffffff, 240 * 160 / 16);
        fillblock_16byte(vram[1], 0xffffffff, 240 * 160 / 16);
 
-       if(!(tuncache = malloc(240 * 160 * 2))) {
-               panic(get_pc(), "failed to allocate tuntab");
-       }
-       memcpy(tuncache, tunmap, 240 * 160 * 2);
        memcpy(tex, tuncross_pixels, 32 * 32);
 
        nframes = 0;
@@ -43,28 +39,35 @@ void gamescr(void)
 
                zoffs = nframes << 1;
 
-               cdst = vram[backbuf];
-               tunptr = tuncache;
-               for(i=0; i<160 * 240 / 2; i++) {
-                       //for(j=1; j<240/2; j++) {
-                               uint16_t pp;
+               top = vram[backbuf];
+               bot = vram[backbuf] + 159 * 240 / 2;
+               tunptr = tunmap;
+               for(i=0; i<80; i++) {
+                       for(j=0; j<240/2; j++) {
+                               uint16_t pptop, ppbot;
 
                                tun = *tunptr++;
 
                                angle = tun & 0xff;
                                depth = (tun >> 8) & 0xff;
-                               tx = (angle >> 1) & 0x1f;
+                               tx = ((angle >> 1) + zoffs) & 0x1f;
                                ty = ((depth >> 1) + zoffs) & 0x1f;
-                               pp = tex[(ty << 5) + tx];
+                               pptop = tex[(ty << 5) + tx];
+                               tx = ((angle >> 1) - zoffs) & 0x1f;
+                               ppbot = tex[(ty << 5) + tx];
 
                                angle = (tun >> 16) & 0xff;
                                depth = (tun >> 24) & 0xff;
-                               tx = (angle >> 1) & 0x1f;
+                               tx = ((angle >> 1) + zoffs) & 0x1f;
                                ty = ((depth >> 1) + zoffs) & 0x1f;
-                               pp |= (uint16_t)tex[(ty << 5) + tx] << 8;
+                               pptop |= (uint16_t)tex[(ty << 5) + tx] << 8;
+                               tx = ((angle >> 1) - zoffs) & 0x1f;
+                               ppbot |= (uint16_t)tex[(ty << 5) + tx] << 8;
 
-                               *cdst++ = pp;
-                       //}
+                               *top++ = pptop;
+                               *bot++ = ppbot;
+                       }
+                       bot -= 240;
                }
 
                vblperf_end();