+#if defined(__i386__) || defined(__x86_64__)
+#define memset16(dest, val, count) asm volatile ( \
+ "cld\n\t" \
+ "test $1, %2\n\t" \
+ "jz 0f\n\t" \
+ "rep stosw\n\t" \
+ "jmp 1f\n\t" \
+ "0:\n\t" \
+ "shr $1, %2\n\t" \
+ "push %%ax\n\t" \
+ "shl $16, %%eax\n\t" \
+ "pop %%ax\n\t" \
+ "rep stosl\n\t" \
+ "1:\n\t"\
+ :: "D"(dest), "a"((uint16_t)(val)), "c"(count) \
+ : "memory")
+#else
+static void INLINE memset16(void *dest, uint16_t val, int count)
+{
+ uint16_t *ptr = dest;
+ while(count--) *ptr++ = val;
+}
+#endif
+
+#ifdef USE_MMX
+#define memcpy64(dest, src, count) asm volatile ( \
+ "0:\n\t" \
+ "movq (%1), %%mm0\n\t" \
+ "movq %%mm0, (%0)\n\t" \
+ "add $8, %1\n\t" \
+ "add $8, %0\n\t" \
+ "dec %2\n\t" \
+ "jnz 0b\n\t" \
+ "emms\n\t" \
+ :: "r"(dest), "r"(src), "r"(count) \
+ : "%mm0")
+#else
+#define memcpy64(dest, src, count) memcpy(dest, src, (count) << 3)
+#endif
+