--- /dev/null
+#include <string.h>
+#include <stdint.h>
+
+static inline void memcpy_words(uint32_t *dest, const uint32_t *src, size_t n)
+{
+ while(n-- > 0) {
+ *dest++ = *src++;
+ }
+}
+
+static inline void memcpy_bytes(unsigned char *dest, const unsigned char *src, size_t n)
+{
+ while(n-- > 0) {
+ *dest++ = *src++;
+ }
+}
+
+void *memcpy(void *dest, const void *src, size_t n)
+{
+ int pre;
+ size_t nwords;
+ int dalign = (uintptr_t)dest & 3;
+ int salign = (uintptr_t)src & 3;
+ unsigned char *bdest;
+ const unsigned char *bsrc;
+ uint32_t *wdest;
+ const uint32_t *wsrc;
+
+ if(n < 4) {
+ memcpy_bytes(dest, src, n);
+ return dest;
+ }
+
+ if(dalign != 0 || salign != 0) {
+ if(dalign != salign) {
+ /* alignment differs, copy bytes */
+ memcpy_bytes(dest, src, n);
+ return dest;
+ }
+
+ /* unaligned but on the same alignment, copy bytes first */
+ pre = ~dalign & 3;
+ bdest = dest;
+ bsrc = src;
+ memcpy_bytes(dest, src, pre);
+
+ wdest = (uint32_t*)((char*)dest + pre);
+ wsrc = (const uint32_t*)((char*)src + pre);
+ n -= pre;
+ } else {
+ /* both aligned */
+ wdest = dest;
+ wsrc = src;
+ }
+
+ nwords = n >> 2;
+ if(nwords) {
+ memcpy_words(wdest, wsrc, nwords);
+ }
+
+ n &= 3;
+ if(n) {
+ bdest = (unsigned char*)(wdest + nwords);
+ bsrc = (const unsigned char*)(wsrc + nwords);
+ memcpy_bytes(bdest, bsrc, n);
+ }
+ return dest;
+}