X-Git-Url: http://git.mutantstargoat.com/user/nuclear/?a=blobdiff_plain;f=src%2Futil.h;h=69d9d1e73a507e94846599f55719f587c5f6db4c;hb=387c4948b144c51c5b6fcfb8f558c3becf324f70;hp=d1f81eb45e0665054c30ff04b807029c4ba5ff95;hpb=b651991a8cb4cf8a1e64c66175f27091c805fdf6;p=retrobench diff --git a/src/util.h b/src/util.h index d1f81eb..69d9d1e 100644 --- a/src/util.h +++ b/src/util.h @@ -1,6 +1,7 @@ #ifndef UTIL_H_ #define UTIL_H_ + #ifdef NO_STDINT_H typedef char int8_t; typedef unsigned char uint8_t; @@ -8,10 +9,32 @@ typedef short int16_t; typedef unsigned short uint16_t; typedef int int32_t; typedef unsigned int uint32_t; +typedef unsigned long intptr_t; #else #include #endif +#ifdef __GNUC__ +#define INLINE __inline +#define PACKED __attribute__((packed)) + +#elif defined(__WATCOMC__) +#define INLINE __inline +#define PACKED + +#else +#define INLINE +#define PACKED +#endif + +#define BSWAP16(x) ((((x) >> 8) & 0xff) | (((x) & 0xff) << 8)) +#define BSWAP32(x) \ + ((((x) >> 24) & 0xff) | \ + (((x) >> 8) & 0xff00) | \ + (((x) << 8) & 0xff0000) | \ + ((x) << 24)) + + extern int sinlut[]; #define SIN(x) sinlut[(x) & 0x3ff] @@ -19,4 +42,198 @@ extern int sinlut[]; int mask_to_shift(unsigned int mask); +#if defined(__i386__) || defined(__x86_64__) || defined(__386__) || defined(MSDOS) +/* fast conversion of double -> 32bit int + * for details see: + * - http://chrishecker.com/images/f/fb/Gdmfp.pdf + * - http://stereopsis.com/FPU.html#convert + */ +static INLINE int32_t cround64(double val) +{ + val += 6755399441055744.0; + return *(int32_t*)&val; +} +#else +#define cround64(x) ((int32_t)(x)) +#endif + +static INLINE float rsqrt(float x) +{ + float xhalf = x * 0.5f; + int32_t i = *(int32_t*)&x; + i = 0x5f3759df - (i >> 1); + x = *(float*)&i; + x = x * (1.5f - xhalf * x * x); + return x; +} + +extern uint32_t perf_start_count, perf_interval_count; + +#ifdef __WATCOMC__ +void memset16(void *dest, uint16_t val, int count); +#pragma aux memset16 = \ + "cld" \ + "test ecx, 1" \ + "jz memset16_dwords" \ + "rep stosw" \ + "jmp memset16_done" \ + "memset16_dwords:" \ + "shr ecx, 1" \ + "push ax" \ + "shl eax, 16" \ + "pop ax" \ + "rep stosd" \ + "memset16_done:" \ + parm[edi][ax][ecx]; + +#ifdef USE_MMX +void memcpy64(void *dest, void *src, int count); +#pragma aux memcpy64 = \ + "cploop:" \ + "movq mm0, [edx]" \ + "movq [ebx], mm0" \ + "add edx, 8" \ + "add ebx, 8" \ + "dec ecx" \ + "jnz cploop" \ + "emms" \ + parm[ebx][edx][ecx] \ + modify[8087]; +#else +#define memcpy64(dest, src, count) memcpy(dest, src, (count) << 3) +#endif + +void perf_start(void); +#pragma aux perf_start = \ + "xor eax, eax" \ + "cpuid" \ + "rdtsc" \ + "mov [perf_start_count], eax" \ + modify[eax ebx ecx edx]; + +void perf_end(void); +#pragma aux perf_end = \ + "xor eax, eax" \ + "cpuid" \ + "rdtsc" \ + "sub eax, [perf_start_count]" \ + "mov [perf_interval_count], eax" \ + modify [eax ebx ecx edx]; + +void debug_break(void); +#pragma aux debug_break = "int 3"; +#endif + +#ifdef __GNUC__ +#if defined(__i386__) || defined(__x86_64__) +#define memset16(dest, val, count) asm volatile ( \ + "cld\n\t" \ + "test $1, %2\n\t" \ + "jz 0f\n\t" \ + "rep stosw\n\t" \ + "jmp 1f\n\t" \ + "0:\n\t" \ + "shr $1, %2\n\t" \ + "push %%ax\n\t" \ + "shl $16, %%eax\n\t" \ + "pop %%ax\n\t" \ + "rep stosl\n\t" \ + "1:\n\t"\ + :: "D"(dest), "a"((uint16_t)(val)), "c"(count) \ + : "memory") +#else +static void INLINE memset16(void *dest, uint16_t val, int count) +{ + uint16_t *ptr = dest; + while(count--) *ptr++ = val; +} +#endif + +#ifdef USE_MMX +#define memcpy64(dest, src, count) asm volatile ( \ + "0:\n\t" \ + "movq (%1), %%mm0\n\t" \ + "movq %%mm0, (%0)\n\t" \ + "add $8, %1\n\t" \ + "add $8, %0\n\t" \ + "dec %2\n\t" \ + "jnz 0b\n\t" \ + "emms\n\t" \ + :: "r"(dest), "r"(src), "r"(count) \ + : "%mm0") +#else +#define memcpy64(dest, src, count) memcpy(dest, src, (count) << 3) +#endif + +#define perf_start() asm volatile ( \ + "xor %%eax, %%eax\n" \ + "cpuid\n" \ + "rdtsc\n" \ + "mov %%eax, %0\n" \ + : "=m"(perf_start_count) \ + :: "%eax", "%ebx", "%ecx", "%edx") + +#define perf_end() asm volatile ( \ + "xor %%eax, %%eax\n" \ + "cpuid\n" \ + "rdtsc\n" \ + "sub %1, %%eax\n" \ + "mov %%eax, %0\n" \ + : "=m"(perf_interval_count) \ + : "m"(perf_start_count) \ + : "%eax", "%ebx", "%ecx", "%edx") + +#define debug_break() \ + asm volatile ("int $3") +#endif + +#ifdef _MSC_VER +void __inline memset16(void *dest, uint16_t val, int count) +{ + __asm { + cld + mov ax, val + mov edi, dest + mov ecx, count + test ecx, 1 + jz memset16_dwords + rep stosw + jmp memset16_done + memset16_dwords: + shr ecx, 1 + push ax + shl eax, 16 + pop ax + rep stosd + memset16_done: + } +} + +#define perf_start() \ + do { \ + __asm { \ + xor eax, eax \ + cpuid \ + rdtsc \ + mov [perf_start_count], eax \ + } \ + } while(0) + +#define perf_end() \ + do { \ + __asm { \ + xor eax, eax \ + cpuid \ + rdtsc \ + sub eax, [perf_start_count] \ + mov [perf_interval_count], eax \ + } \ + } while(0) + +#define debug_break() \ + do { \ + __asm { int 3 } \ + } while(0) +#endif + #endif /* UTIL_H_ */