From 5a961b914da78d534e8e2dc6bfb5103114dbff54 Mon Sep 17 00:00:00 2001 From: John Tsiombikas Date: Sat, 28 Apr 2018 22:58:27 +0300 Subject: [PATCH] moving some standard library functions to assembly --- src/libc/string.c | 7 +- src/libc/string_asm.s | 179 +++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 185 insertions(+), 1 deletion(-) create mode 100644 src/libc/string_asm.s diff --git a/src/libc/string.c b/src/libc/string.c index f251f67..ce3c2e7 100644 --- a/src/libc/string.c +++ b/src/libc/string.c @@ -17,6 +17,7 @@ along with this program. If not, see . */ #include +/* void memset(void *s, int c, size_t n) { char *ptr = s; @@ -24,10 +25,12 @@ void memset(void *s, int c, size_t n) *ptr++ = c; } } +*/ /* Does the same thing as memset only with 16bit values. * n in this case is the number of values, not the number of bytes. */ +/* void memset16(void *s, int c, size_t n) { int16_t *ptr = s; @@ -35,7 +38,8 @@ void memset16(void *s, int c, size_t n) *ptr++ = c; } } - +*/ +/* void *memcpy(void *dest, const void *src, size_t n) { char *dptr = dest; @@ -46,6 +50,7 @@ void *memcpy(void *dest, const void *src, size_t n) } return dest; } +*/ void *memmove(void *dest, const void *src, size_t n) { diff --git a/src/libc/string_asm.s b/src/libc/string_asm.s new file mode 100644 index 0000000..7bb6f40 --- /dev/null +++ b/src/libc/string_asm.s @@ -0,0 +1,179 @@ +# pcboot - bootable PC demo/game kernel +# Copyright (C) 2018 John Tsiombikas +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY, without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . + + # standard C memset + .global memset +memset: + push %ebp + mov %esp, %ebp + push %edi + + mov 8(%ebp), %edi + mov 12(%ebp), %al + mov %al, %ah + mov %ax, %cx + shl $16, %eax + mov %cx, %ax + mov 16(%ebp), %ecx + + cmp $0, %ecx + jz msdone + + mov %edi, %edx + and $3, %edx + jmp *mspre_tab(,%edx,4) + +mspre_tab: .long msmain, mspre1, mspre2, mspre3 +mspre1: stosb + dec %ecx +mspre2: stosb + dec %ecx +mspre3: stosb + dec %ecx + jz msdone + +msmain: + push %ecx + shr $2, %ecx + rep stosl + pop %ecx + + and $3, %ecx + jmp *mspost_tab(,%ecx,4) + +mspost_tab: .long msdone, mspost1, mspost2, mspost3 +mspost3:stosb +mspost2:stosb +mspost1:stosb + +msdone: + pop %edi + pop %ebp + ret + + + # same as memset but copies 16bit values, and the count is the number + # of 16bit values to copy, not bytes. + .global memset16 +memset16: + push %ebp + mov %esp, %ebp + push %edi + + mov 8(%ebp), %edi + mov 12(%ebp), %ax + shl $16, %eax + mov 12(%ebp), %ax + mov 16(%ebp), %ecx + + cmp $0, %ecx + jz ms16done + + mov %edi, %edx + and $3, %edx + jmp *ms16pre_tab(,%edx,4) + +ms16pre_tab: .long ms16main, ms16pre1, ms16pre2, ms16pre3 +ms16pre3: + # unaligned by +3: + # same as next one, but jump to ms16main instead of falling through + mov %al, (%edi) + mov %ah, -1(%edi,%ecx,2) + rol $8, %eax + inc %edi + dec %ecx + jz ms16done + jmp ms16main +ms16pre1: + # unaligned by +1: + # - write low byte at edi + # - write high byte at the end + # - rotate by 8 for the rest + # - decrement ecx + mov %al, (%edi) + mov %ah, -1(%edi,%ecx,2) + rol $8, %eax + inc %edi + dec %ecx + jz ms16done +ms16pre2: + # unaligned by +2 + stosw + dec %ecx + jz ms16done + +ms16main: + push %ecx + shr $1, %ecx + rep stosl + pop %ecx + + and $1, %ecx + jz ms16done + stosw +ms16done: + + pop %edi + pop %ebp + ret + + # standard C memcpy + .global memcpy +memcpy: + push %ebp + mov %esp, %ebp + push %edi + push %esi + + mov 8(%ebp), %edi + mov 12(%ebp), %esi + mov 16(%ebp), %ecx + + cmp $0, %ecx + jz mcdone + + mov %edi, %edx + and $3, %edx + jmp *mcpre_tab(,%edx,4) + +mcpre_tab: .long mcmain, mcpre1, mcpre2, mcpre3 +mcpre1: movsw + dec %ecx +mcpre2: movsw + dec %ecx +mcpre3: movsw + dec %ecx + jz mcdone + +mcmain: + push %ecx + shr $2, %ecx + rep movsl + pop %ecx + + and $3, %ecx + jmp *mcpost_tab(,%ecx,4) + +mcpost_tab: .long mcdone, mcpost1, mcpost2, mcpost3 +mcpost3:movsb +mcpost2:movsb +mcpost1:movsb + +mcdone: + pop %esi + pop %edi + pop %ebp + ret -- 1.7.10.4