added read_cpuid and MTRR support checking before trying to set them
[retrobench] / src / util.h
1 #ifndef UTIL_H_
2 #define UTIL_H_
3
4
5 #ifdef NO_STDINT_H
6 typedef char int8_t;
7 typedef unsigned char uint8_t;
8 typedef short int16_t;
9 typedef unsigned short uint16_t;
10 typedef int int32_t;
11 typedef unsigned int uint32_t;
12 typedef unsigned long intptr_t;
13 #else
14 #include <stdint.h>
15 #endif
16
17 #ifdef __GNUC__
18 #define INLINE __inline
19 #define PACKED __attribute__((packed))
20
21 #elif defined(__WATCOMC__)
22 #define INLINE __inline
23 #define PACKED
24
25 #else
26 #define INLINE
27 #define PACKED
28 #endif
29
30 #define BSWAP16(x)      ((((x) >> 8) & 0xff) | (((x) & 0xff) << 8))
31 #define BSWAP32(x)      \
32         ((((x) >> 24) & 0xff) | \
33          (((x) >> 8) & 0xff00) | \
34          (((x) << 8) & 0xff0000) | \
35          ((x) << 24))
36
37
38 extern short sinlut[];
39
40 #define SIN(x) (int)sinlut[(x) & 0x7ff]
41 #define COS(x) (int)sinlut[((x) + 512) & 0x7ff]
42
43 int mask_to_shift(unsigned int mask);
44
45 #if defined(__i386__) || defined(__x86_64__) || defined(__386__) || defined(MSDOS)
46 /* fast conversion of double -> 32bit int
47  * for details see:
48  *  - http://chrishecker.com/images/f/fb/Gdmfp.pdf
49  *  - http://stereopsis.com/FPU.html#convert
50  */
51 static INLINE int32_t cround64(double val)
52 {
53         val += 6755399441055744.0;
54         return *(int32_t*)&val;
55 }
56 #else
57 #define cround64(x)     ((int32_t)(x))
58 #endif
59
60 static INLINE float rsqrt(float x)
61 {
62         float xhalf = x * 0.5f;
63         int32_t i = *(int32_t*)&x;
64         i = 0x5f3759df - (i >> 1);
65         x = *(float*)&i;
66         x = x * (1.5f - xhalf * x * x);
67         return x;
68 }
69
70 extern uint32_t perf_start_count, perf_interval_count;
71
72 #ifdef __WATCOMC__
73 void memset16(void *dest, uint16_t val, int count);
74 #pragma aux memset16 = \
75         "cld" \
76         "test ecx, 1" \
77         "jz memset16_dwords" \
78         "rep stosw" \
79         "jmp memset16_done" \
80         "memset16_dwords:" \
81         "shr ecx, 1" \
82         "push ax" \
83         "shl eax, 16" \
84         "pop ax" \
85         "rep stosd" \
86         "memset16_done:" \
87         parm[edi][ax][ecx];
88
89 #ifdef USE_MMX
90 void memcpy64(void *dest, void *src, int count);
91 #pragma aux memcpy64 = \
92         "cploop:" \
93         "movq mm0, [edx]" \
94         "movq [ebx], mm0" \
95         "add edx, 8" \
96         "add ebx, 8" \
97         "dec ecx" \
98         "jnz cploop" \
99         "emms" \
100         parm[ebx][edx][ecx] \
101         modify[8087];
102 #else
103 #define memcpy64(dest, src, count)      memcpy(dest, src, (count) << 3)
104 #endif
105
106 void perf_start(void);
107 #pragma aux perf_start = \
108         "xor eax, eax" \
109         "cpuid" \
110         "rdtsc" \
111         "mov [perf_start_count], eax" \
112         modify[eax ebx ecx edx];
113
114 void perf_end(void);
115 #pragma aux perf_end = \
116         "xor eax, eax" \
117         "cpuid" \
118         "rdtsc" \
119         "sub eax, [perf_start_count]" \
120         "mov [perf_interval_count], eax" \
121         modify [eax ebx ecx edx];
122
123 void debug_break(void);
124 #pragma aux debug_break = "int 3";
125 #endif
126
127 #ifdef __GNUC__
128 #if defined(__i386__) || defined(__x86_64__)
129 #define memset16(dest, val, count) asm volatile ( \
130         "cld\n\t" \
131         "test $1, %2\n\t" \
132         "jz 0f\n\t" \
133         "rep stosw\n\t" \
134         "jmp 1f\n\t" \
135         "0:\n\t" \
136         "shr $1, %2\n\t" \
137         "push %%ax\n\t" \
138         "shl $16, %%eax\n\t" \
139         "pop %%ax\n\t" \
140         "rep stosl\n\t" \
141         "1:\n\t"\
142         :: "D"(dest), "a"((uint16_t)(val)), "c"(count) \
143         : "memory")
144 #else
145 static void INLINE memset16(void *dest, uint16_t val, int count)
146 {
147         uint16_t *ptr = dest;
148         while(count--) *ptr++ = val;
149 }
150 #endif
151
152 #ifdef USE_MMX
153 #define memcpy64(dest, src, count) asm volatile ( \
154         "0:\n\t" \
155         "movq (%1), %%mm0\n\t" \
156         "movq %%mm0, (%0)\n\t" \
157         "add $8, %1\n\t" \
158         "add $8, %0\n\t" \
159         "dec %2\n\t" \
160         "jnz 0b\n\t" \
161         "emms\n\t" \
162         :: "r"(dest), "r"(src), "r"(count) \
163         : "%mm0")
164 #else
165 #define memcpy64(dest, src, count)      memcpy(dest, src, (count) << 3)
166 #endif
167
168 #define perf_start()  asm volatile ( \
169         "xor %%eax, %%eax\n" \
170         "cpuid\n" \
171         "rdtsc\n" \
172         "mov %%eax, %0\n" \
173         : "=m"(perf_start_count) \
174         :: "%eax", "%ebx", "%ecx", "%edx")
175
176 #define perf_end() asm volatile ( \
177         "xor %%eax, %%eax\n" \
178         "cpuid\n" \
179         "rdtsc\n" \
180         "sub %1, %%eax\n" \
181         "mov %%eax, %0\n" \
182         : "=m"(perf_interval_count) \
183         : "m"(perf_start_count) \
184         : "%eax", "%ebx", "%ecx", "%edx")
185
186 #define debug_break() \
187         asm volatile ("int $3")
188 #endif
189
190 #ifdef _MSC_VER
191 void __inline memset16(void *dest, uint16_t val, int count)
192 {
193         __asm {
194                 cld
195                 mov ax, val
196                 mov edi, dest
197                 mov ecx, count
198                 test ecx, 1
199                 jz memset16_dwords
200                 rep stosw
201                 jmp memset16_done
202                 memset16_dwords:
203                 shr ecx, 1
204                 push ax
205                 shl eax, 16
206                 pop ax
207                 rep stosd
208                 memset16_done:
209         }
210 }
211
212 #define perf_start() \
213         do { \
214                 __asm { \
215                         xor eax, eax \
216                         cpuid \
217                         rdtsc \
218                         mov [perf_start_count], eax \
219                 } \
220         } while(0)
221
222 #define perf_end() \
223         do { \
224                 __asm { \
225                         xor eax, eax \
226                         cpuid \
227                         rdtsc \
228                         sub eax, [perf_start_count] \
229                         mov [perf_interval_count], eax \
230                 } \
231         } while(0)
232
233 #define debug_break() \
234         do { \
235                 __asm { int 3 } \
236         } while(0)
237 #endif
238
239 struct cpuid_info {
240         uint32_t maxidx;        /* 0: eax */
241         char vendor[12];        /* 0: ebx, edx, ecx */
242         uint32_t id;            /* 1: eax */
243         uint32_t rsvd0;         /* 1: ebx */
244         uint32_t feat;          /* 1: edx */
245         uint32_t feat2;         /* 1: ecx */
246 };
247
248 #define CPUID_STEPPING(id)      ((id) & 0xf)
249 #define CPUID_MODEL(id)         (((id) >> 4) & 0xf)
250 #define CPUID_FAMILY(id)        (((id) >> 8) & 0xf)
251
252 #define CPUID_FEAT_FPU                  0x00000001
253 #define CPUID_FEAT_VME                  0x00000002
254 #define CPUID_FEAT_DBGEXT               0x00000004
255 #define CPUID_FEAT_PSE                  0x00000008
256 #define CPUID_FEAT_TSC                  0x00000010
257 #define CPUID_FEAT_MSR                  0x00000020
258 #define CPUID_FEAT_PAE                  0x00000040
259 #define CPUID_FEAT_MCE                  0x00000080
260 #define CPUID_FEAT_CX8                  0x00000100
261 #define CPUID_FEAT_APIC                 0x00000200
262 #define CPUID_FEAT_SEP                  0x00000800
263 #define CPUID_FEAT_MTRR                 0x00001000
264 #define CPUID_FEAT_PGE                  0x00002000
265 #define CPUID_FEAT_MCA                  0x00004000
266 #define CPUID_FEAT_CMOV                 0x00008000
267 #define CPUID_FEAT_PAT                  0x00010000
268 #define CPUID_FEAT_PSE36                0x00020000
269 #define CPUID_FEAT_PSN                  0x00040000
270 #define CPUID_FEAT_CLF                  0x00080000
271 #define CPUID_FEAT_DTES                 0x00200000
272 #define CPUID_FEAT_ACPI                 0x00400000
273 #define CPUID_FEAT_MMX                  0x00800000
274 #define CPUID_FEAT_FXSR                 0x01000000
275 #define CPUID_FEAT_SSE                  0x02000000
276 #define CPUID_FEAT_SSE2                 0x04000000
277 #define CPUID_FEAT_SS                   0x08000000
278 #define CPUID_FEAT_HTT                  0x10000000
279 #define CPUID_FEAT_TM1                  0x20000000
280 #define CPUID_FEAT_IA64                 0x40000000
281 #define CPUID_FEAT_PBE                  0x80000000
282
283 #define CPUID_FEAT2_SSE3                0x00000001
284 #define CPUID_FEAT2_PCLMUL              0x00000002
285 #define CPUID_FEAT2_DTES64              0x00000004
286 #define CPUID_FEAT2_MONITOR             0x00000008
287 #define CPUID_FEAT2_DS_CPL              0x00000010
288 #define CPUID_FEAT2_VMX                 0x00000020
289 #define CPUID_FEAT2_SMX                 0x00000040
290 #define CPUID_FEAT2_EST                 0x00000080
291 #define CPUID_FEAT2_TM2                 0x00000100
292 #define CPUID_FEAT2_SSSE3               0x00000200
293 #define CPUID_FEAT2_CID                 0x00000400
294 #define CPUID_FEAT2_FMA                 0x00001000
295 #define CPUID_FEAT2_CX16                0x00002000
296 #define CPUID_FEAT2_ETPRD               0x00004000
297 #define CPUID_FEAT2_PDCM                0x00008000
298 #define CPUID_FEAT2_PCIDE               0x00020000
299 #define CPUID_FEAT2_DCA                 0x00040000
300 #define CPUID_FEAT2_SSE41               0x00080000
301 #define CPUID_FEAT2_SSE42               0x00100000
302 #define CPUID_FEAT2_X2APIC              0x00200000
303 #define CPUID_FEAT2_MOVBE               0x00400000
304 #define CPUID_FEAT2_POPCNT              0x00800000
305 #define CPUID_FEAT2_AES                 0x02000000
306 #define CPUID_FEAT2_XSAVE               0x04000000
307 #define CPUID_FEAT2_OSXSAVE             0x08000000
308 #define CPUID_FEAT2_AVX                 0x10000000
309
310 int read_cpuid(struct cpuid_info *info);
311
312 #endif  /* UTIL_H_ */