push %edi
mov 8(%ebp), %edi
+ push %edi
mov 12(%ebp), %al
mov %al, %ah
mov %ax, %cx
cmp $0, %ecx
jz msdone
+ # write 1, 2, or 3 times until we reache a 32bit-aligned dest address
mov %edi, %edx
and $3, %edx
+ jz msmain
jmp *mspre_tab(,%edx,4)
mspre_tab: .long msmain, mspre1, mspre2, mspre3
-mspre1: stosb
+mspre3: stosb
dec %ecx
mspre2: stosb
dec %ecx
-mspre3: stosb
+mspre1: stosb
dec %ecx
jz msdone
+ # edi is 32bit-aligned here, write ecx>>2 32bit values
msmain:
push %ecx
shr $2, %ecx
rep stosl
pop %ecx
+ # write any trailing bytes
and $3, %ecx
jmp *mspost_tab(,%ecx,4)
mspost1:stosb
msdone:
+ pop %eax
pop %edi
pop %ebp
ret
push %edi
mov 8(%ebp), %edi
+ push %edi
mov 12(%ebp), %ax
shl $16, %eax
mov 12(%ebp), %ax
mov %edi, %edx
and $3, %edx
+ jz ms16main
jmp *ms16pre_tab(,%edx,4)
ms16pre_tab: .long ms16main, ms16pre1, ms16pre2, ms16pre3
jz ms16done
stosw
ms16done:
-
+ pop %eax
pop %edi
pop %ebp
ret
cmp $0, %ecx
jz mcdone
- mov %edi, %edx
- and $3, %edx
- jmp *mcpre_tab(,%edx,4)
-
-mcpre_tab: .long mcmain, mcpre1, mcpre2, mcpre3
-mcpre1: movsw
- dec %ecx
-mcpre2: movsw
- dec %ecx
-mcpre3: movsw
- dec %ecx
- jz mcdone
-
-mcmain:
- push %ecx
+ mov %ecx, %edx
shr $2, %ecx
rep movsl
- pop %ecx
- and $3, %ecx
- jmp *mcpost_tab(,%ecx,4)
+ and $3, %edx
+ jmp *mcpost_tab(,%edx,4)
mcpost_tab: .long mcdone, mcpost1, mcpost2, mcpost3
mcpost3:movsb