1 @ Copyright (c) 2003-2021 James Daniels
2 @ Distributed under the MIT License
3 @ license terms: see LICENSE file in root or http://opensource.org/licenses/MIT
6 .SECTION .iwram,"ax",%progbits
11 .GLOBAL AAS_MixAudio_NoChange
14 .GLOBAL _AAS_MixAudio_mod1
15 .GLOBAL _AAS_MixAudio_mod2
16 .GLOBAL _AAS_MixAudio_mod3
17 .GLOBAL _AAS_MixAudio_mod4
18 .GLOBAL _AAS_MixAudio_mod5
19 .GLOBAL _AAS_MixAudio_mod6
20 .GLOBAL _AAS_MixAudio_mod7
21 .GLOBAL _AAS_MixAudio_mod8
25 _ma_mov_r3_0: mov r3,#0
26 _ma_add_r0_r0_0: add r0,r0,#0
28 _ma_ldr_pc_0: .word 0xe51f0000+8 @ sort of equivalent to opcode(ldr r0,[pc,#-0])
29 _ma_mov_r14_r0_lsr_6: mov r14,r0,lsr #6
30 _ma_ldrsb_r0_r14_shifted: .word 0x3e1fe00d @ (opcode("ldrsb r0,[r14,#+0]!")>>4) + (3<<28)
31 _ma_add_r0_r0_r0_lsl_16: adds r0,r0,r0,lsl #16 @ change regs as appropriate
32 _ma_vol_lookup_addr: .word _AAS_vol_lookup-1
33 _ma_total_iterations: .word 0x0
34 _ma_total_delta: .word 0x0
35 _ma_bytes_available: .word 0x0
36 _ma_no_skip: .word 0x0
39 @ AAS_CODE_IN_IWRAM void AAS_MixAudio_NoChange( AAS_s8* mix_buffer, struct AAS_Channel chans[], int iterations );
41 AAS_MixAudio_NoChange:
42 stmfd sp!,{r4-r11,r14}
48 ldr r10,_ma_bytes_available
49 ldr r9,_ma_total_iterations
53 ldr r2,_ma_total_delta
58 @ AAS_CODE_IN_IWRAM void AAS_MixAudio( AAS_s8* mix_buffer, struct AAS_Channel chans[], int iterations );
61 stmfd sp!,{r4-r11,r14}
65 @ [sp] = _ma_mix_buffer
68 @ [sp,#12] = _ma_iterations_loop
69 @ [sp,#16] = _ma_iterations_buffer
70 @ [sp,#20] = _ma_iterations_scale_buffer
71 @ [sp,#24] = _ma_loop_counter
75 @ r2 = iterations in main loop
76 @ r3 = _ma_add_r0_r0_r0_lsl_16
79 @ r6 = outer loop counter/active channels found/total delta>>2
90 adr r12,ma_buffer_start
91 ldr r3,_ma_add_r0_r0_r0_lsl_16
93 mov r6,#0x70000000 @ was #0x30000000
98 ldrb r14,[r1],#20 @ effective_volume
101 beq ma_skip @ skip if effective_volume == 0
104 @ Setup volume registers:
105 @ r11 = increment for r4
106 @ r14 = "mul r5,r0,r3"/"mov r5,r0,lsl #0"/"mlane r5,r0,r3,r5"/"add r5,r5,r0,lsl #0"
107 @adr r10,_ma_vol_lookup-1
108 ldr r10,_ma_vol_lookup_addr
109 add r4,r10,#129 @ 129 = 1+_ma_mul_r5_r0_r3-_ma_vol_lookup
111 ands r5,r6,#0x0f000000 @ test if this is first active channel
112 addne r4,r4,#16 @ 16 = _ma_mlane_r5_r0_r3_r5-_ma_mul_r5_r0_r3
113 @adreq r4,_ma_mul_r5_r0_r3 @ use mul/mov if this is first non-zero chan
114 @adrne r4,_ma_mlane_r5_r0_r3_r5 @ use mlane/add if this is first non-zero chan
116 ldrlt r11,_ma_mov_r3_0 @ read "mov r3,#vol" if vol not power of 2
117 addlt r11,r11,r14 @ set #vol in "mov r3,#vol" if vol not power of 2
118 strlt r11,[r12],#4 @ write "mov r3,#vol" if vol not power of 2
119 addge r4,r4,#8 @ increment if vol is power of 2
120 ldmia r4,{r11,r14} @ read mul/mlane/mov/add and increment
121 addge r14,r14,r10,lsl #7 @ set lsl #val for mov/add if vol power of 2
124 @ r0,r4,r5,r7,r8,r9,r10 available
125 @ r3 = _ma_add_r0_r0_0
126 @ r5 = delta/increment for r14
127 @ r8 = _ma_divide_table
128 @ r11 = temp (was increment for r14)
130 @ r14 = "mul r5,r0,r3"/"mov r5,r0,lsl #0"/"mlane r5,r0,r3,r5"/"add r5,r5,r0,lsl #0"
132 @ Setup delta registers, write delta increment instructions:
133 @ r5 = delta/increment for r14
134 adr r7,_ma_chan_cache @ could remove
135 add r7,r7,r5,lsr #22 @ could remove
136 ldr r10,_ma_ldr_pc_0 @ could pre-subtract _ma_chan_cache from _ma_ldr_pc_0 (would need to set at runtime)
137 sub r10,r10,r7 @ could change to sub r10,r10,r5,lsr #22
139 eor r10,r10,#0x00100000 @ switch to str
140 ldr r7,_ma_mov_r14_r0_lsr_6
141 ldrh r5,[r1,#6-20] @ delta
143 ldr r4,_ma_add_r0_r0_0
145 stmia r12!,{r0,r7,r9}
153 add r5,r11,r5,lsl #20
160 @ r10 = local outer loop counter/_ma_ldrsb_r0_r14_shifted
161 mov r0,#0x200 @ delta_pos = 0.5 (was 0)
162 ldr r10,_ma_ldrsb_r0_r14_shifted
169 @ r2 = iterations in main loop
170 @ r3 = _ma_add_r0_r0_r0_lsl_16
172 @ r5 = delta/increment for r14
173 @ r6 = outer loop counter/total delta<<1
177 @ r10 = local outer loop counter/_ma_ldrsb_r0_r14_shifted
178 @ r11 = temp (was increment for r14)
180 @ r14 = "mul r5,r0,r3"/"mov r5,r0,lsl #0"/"mlane r5,r0,r3,r5"/"add r5,r5,r0,lsl #0"
182 @ Write instructions:
185 b ma_setup_inner_loop_first
186 .word 0,0,0 @ padding
192 beq ma_setup_inner_loop_skip1
198 ma_setup_inner_loop_skip1:
201 ma_setup_inner_loop_first:
206 beq ma_setup_inner_loop_skip2
213 ma_setup_inner_loop_skip2:
219 beq ma_setup_inner_loop_skip3
226 ma_setup_inner_loop_skip3:
229 @ Write merge and mul/mla/mov/add:
232 add r7,r14,r11,lsr #8
234 bic r11,r9,#0x00ff0000
237 add r14,r14,r5,lsl #12
242 beq ma_setup_inner_loop_skip4
249 ma_setup_inner_loop_skip4:
252 @ Write merge: (skips if unnecessary)
255 bic r4,r9,#0x00ff0000
257 addne r11,r3,r4,lsr #8
258 addne r11,r11,r7,lsl #4
261 @ Write mul/mla/mov/add:
264 add r14,r14,r5,lsl #12
266 subs r10,r10,#0x10000000
267 bge ma_setup_outer_loop
270 @ Calculate iterations until end of sample:
271 ldr r10,[r1,#8-20] @ pos
272 ldr r0,[r1,#12-20] @ end
275 ldr r8,_ma_divide_table
281 add r6,r6,#0x01000000 @ increment active channels found
284 subs r6,r6,#0x10000000
293 @bic r0,r0,#0xff000000
294 @add r0,r0,#0xea000000
296 bic r0,r0,#0x15000000 @ offset always negative, so this is equivalent to above
300 and r4,r6,#0x0f000000
301 sub r10,r10,r4,lsr #21 @ r10 -= 8*used_channels
302 @str r10,_ma_iterations_scale_buffer
304 str r10,_ma_bytes_available
308 str r9,_ma_total_iterations
317 bic r2,r6,#0xff000000
318 str r2,_ma_total_delta
320 @ r2 = total_delta>>2
321 @ r9 = r11 = total_iterations - i.e. iterations until loop - argh! need to recalculate each call!
322 @ r10 = ((bytes_available-(8*channels_used))<<4)
324 @ Could remove need to recalc total_iterations each call by not doing "total_iterations = min( iterations, total_iterations )" - do min below instead and sub "iterations" from "total_iterations" afterwards. Problem: Need to accurately calculate "total_iterations" even when it is large. (Although only need to cope with total_iterations being twice as large as it is now because only ever re-use config once.)
327 @ldr r4,_ma_mix_buffer
331 @ r1 = &_ma_mix_buffer
332 @ r2 = total_delta>>2
333 @ r3 = refill iterations<<3
334 @ r4 = _ma_mix_buffer
339 @ r9 = total iterations
340 @ r10 = ((bytes_available-(8*channels_used))<<4)
341 @ r11 = total iterations
346 @ iterations = ((bytes_available-(8*channels_used))<<4)/(total_delta>>2)
347 ldr r3,_ma_divide_table
348 ma_begin: @ called from process loop
352 @str r3,_ma_iterations_buffer
355 cmp r9,r3,lsr #12 @ was asr #3
356 movgt r9,r3,lsr #12 @ was asr #3
359 @str r3,_ma_iterations_loop
370 @ r4 = _ma_mix_buffer
374 @ r8 = &_ma_chan_cache
378 @ r12 = buffer address
382 adr r8,_ma_chan_cache
387 mov r10,#8 @ was #4 - could perhaps set according to max # of channels / 2?
389 adr r12,ma_buffer_end
394 ldrb r14,[r3],#20 @ effective_volume
396 beq ma_fill_buffer_skip
397 ldr r7,[r3,#8-20] @ pos
399 ldrb r11,[r3,#3-20] @ pos_fraction
402 ldrh r14,[r3,#6-20] @ delta
404 add r11,r11,r14,lsl #2
405 strb r11,[r3,#3-20] @ pos_fraction
407 str r7,[r3,#8-20] @ pos
408 add r7,r6,r14,lsr #8 @ words to copy
409 sub r12,r12,r7,lsl #2
411 str r1,[r8],#4 @ IWRAM pos
412 add r14,r7,#0x84000000
413 stmia r5,{r0,r12,r14}
416 bgt ma_fill_buffer_loop
418 @ Setup registers for main loop
425 @ Setup registers for main loop
427 @add r14,r14,r9,lsl #24
429 bic r14,r14,#0x1000000
430 add r14,r14,r9,lsl #25
435 @ldr r9,_ma_iterations_loop
438 @ldrne r3,_ma_iterations_buffer
444 @ should be after ble below?
453 @ change so only branch if done all iterations and no samples have ended
456 @ r1 = chans[] (was loop)
457 @ r2 = total_delta>>2
459 @ r4 = _ma_mix_buffer
465 @ r10 = _ma_specific_first
466 @ r11 = &ma_chan0_start
469 @ r14 = loop (was chans[])
471 ldr r3,_ma_divide_table
482 ldrh r7,[r1,#6-20] @ delta
483 ldr r12,[r1,#8-20] @ pos
484 ldr r6,[r1,#12-20] @ end
501 bgt ma_check_chan_loop
504 bne ma_chan_has_finished
507 @ldrgt r10,_ma_iterations_scale_buffer
509 ldrgt r10,_ma_bytes_available
514 ldmfd sp!, {r4-r11, r14}
515 bx lr @ Thumb interwork friendly.
517 ma_chan_has_finished:
519 beq ma_chan_all_zeroes
523 sub r1,r1,#(20*8) @ was #(20*4)
524 @str r4,_ma_mix_buffer
528 ma_chan_all_zeroes: @ very rare - only happens if last active channel finished during this period
530 ldr r5,[sp,#8] @ can be 0 sometimes
532 add r5,r5,#0x85000000
536 stmneia r6,{r2,r4,r5} @ r5(_ma_to_go) can be 0 sometimes
542 ldr r5,[r1,#16-20] @ loop_length
545 mov r0,#1 @ redo setup !!moved!!
549 strne r12,[r1,#8-20] @ pos
553 strh r5,[r1,#0-20] @ effective_volume + active
554 @mov r0,#1 @ redo setup !!was here!!
559 .word 0,0,0,0,0,0,0,0 @ IWRAM pos, chan: 0,1,2,3,4,5,6,7
561 _ma_divide_table: .word AAS_DivTable
570 @ r4 : output address
571 @ r5-r12 : output buffer
572 @ r14 : sample address/loop/mask
578 @ r2 = mask_0x80808080
581 @ Make sure algo is as efficient as possible
584 @ldr r14,_ma_loop_counter
591 and r10,r14,r10,lsr #8
592 and r11,r14,r11,lsr #8
593 and r12,r14,r12,lsr #8
597 add r8,r11,r12,lsl #8
603 subs r14,r14,#0x2000000
607 @str r14,_ma_loop_counter
610 ma_buffer_start: @ 2048 bytes (1152 bytes would be equivalent to previous cache size)
612 .word 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 @ 128 bytes
613 .word 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 @ 128 bytes
614 .word 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 @ 128 bytes
615 .word 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 @ 128 bytes
617 .word 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 @ 128 bytes
618 .word 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 @ 128 bytes
619 .word 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 @ 128 bytes
620 .word 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 @ 128 bytes
622 .word 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 @ 128 bytes
623 .word 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 @ 128 bytes
624 .word 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 @ 128 bytes
625 .word 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 @ 128 bytes
627 .word 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 @ 128 bytes
628 .word 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 @ 128 bytes
629 .word 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 @ 128 bytes
630 .word 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 @ 128 bytes