scrobj = tunnel.obj fract.obj grise.obj polytest.obj plasma.obj bump.obj &
thunder.obj metaball.obj greets.obj
sysobj = gfx.obj vbe.obj watdpmi.obj timer.obj keyb.obj mouse.obj sball.obj &
-logger.obj tinyfps.obj
+logger.obj tinyfps.obj util.obj
obj = $(baseobj) $(demoobj) $(sysobj) $(scrobj)
bin = demo.exe
run the following in the demo root directory:
svn co svn://mutantstargoat.com/datadirs/dosdemo data
+
+Random optimization details about the Pentium1 (p54c)
+-----------------------------------------------------
+Use cround64 (util.h) for float -> integer conversions, instead of casts.
+
+Performance measurement with RDTSC:
+ perf_start();
+ /* code under test */
+ perf_end(); /* result in perf_interval_count */
+
+Cache organization (L1): 8kb data / 8kb instruction
+128 sets of 2 cache lines, 32 bytes per cache line.
+
+Addresses which are multiples of 4096 fall in the same set and can only have
+two of them in cache at any time.
+
+U/V pipe pairing rules:
+ - both instructions must be simple
+ - no read-after-write or write-after-write reg dependencies
+ - no displacement AND immediate in either instruction
+ - instr. with prefixes (except 0x0f) can only run on U pipe.
+ - prefixes are treated as separate 1-byte instructions (except 0x0f).
+ - branches can be paired if they are the second instr. of the pair only.
+
+Simple instructions are:
+ - mov reg, reg/mem/imm
+ - mov mem, reg/imm
+ - alu reg, reg/mem/imm (alu: add/sub/cmp/and/or/xor)
+ - alu mem, reg/imm
+ - inc reg/mem
+ - dec reg/mem
+ - push reg/mem
+ - pop reg
+ - lea reg,mem
+ - jmp/call/jcc near
+ - nop
+ - test reg,reg/mem
+ - test acc,imm
+
+U-only pairable instructions:
+ - adc, sbb
+ - shr, sar, shl, sal with immediate
+ - ror, rol, rcr, rcl with immediate=1
--- /dev/null
+#include "util.h"
+
+uint32_t perf_start_count, perf_interval_count;
return *(int32_t*)&val;
}
-uint32_t perf_start_count, perf_interval_count;
+extern uint32_t perf_start_count, perf_interval_count;
#ifdef __WATCOMC__
void perf_start(void);
#pragma aux perf_start = \
+ "xor eax, eax" \
+ "cpuid" \
"rdtsc" \
"mov [perf_start_count], eax" \
- modify[eax edx];
+ modify[eax ebx ecx edx];
void perf_end(void);
#pragma aux perf_end = \
+ "xor eax, eax" \
+ "cpuid" \
"rdtsc" \
"sub eax, [perf_start_count]" \
"mov [perf_interval_count], eax" \
- modify [eax edx];
+ modify [eax ebx ecx edx];
#endif
#ifdef __GNUC__
#define perf_start() asm volatile ( \
+ "xor %%eax, %%eax\n" \
+ "cpuid\n" \
"rdtsc\n" \
"mov %%eax, %0\n" \
- : "=m"(perf_start_count) :: "%eax", "%edx")
+ : "=m"(perf_start_count) \
+ :: "%eax", "%ebx", "%ecx", "%edx")
#define perf_end() asm volatile ( \
+ "xor %%eax, %%eax\n" \
+ "cpuid\n" \
"rdtsc\n" \
"sub %1, %%eax\n" \
"mov %%eax, %0\n" \
: "=m"(perf_interval_count) \
: "m"(perf_start_count) \
- : "%eax", "%edx")
+ : "%eax", "%ebx", "%ecx", "%edx")
#endif
#ifdef _MSC_VER
#define perf_start() \
do { \
__asm { \
+ xor eax, eax \
+ cpuid \
rdtsc \
mov [perf_start_count], eax \
} \
#define perf_end() \
do { \
__asm { \
+ xor eax, eax \
+ cpuid \
rdtsc \
sub eax, [perf_start_count] \
mov [perf_interval_count], eax \