1 /* MikMod sound library
2 (c) 1998, 1999, 2000 Miodrag Vallat and others - see file AUTHORS for
5 This library is free software; you can redistribute it and/or modify
6 it under the terms of the GNU Library General Public License as
7 published by the Free Software Foundation; either version 2 of
8 the License, or (at your option) any later version.
10 This program is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 GNU Library General Public License for more details.
15 You should have received a copy of the GNU Library General Public
16 License along with this library; if not, write to the Free Software
17 Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
21 /*==============================================================================
25 High-quality sample mixing routines, using a 32 bits mixing buffer,
26 interpolation, and sample smoothing to improve sound quality and remove
29 ==============================================================================*/
34 Low-Pass filter to remove annoying staticy buzz.
49 #include "mikmod_internals.h"
55 MAXVOL_FACTOR (was BITSHIFT in virtch.c)
56 Controls the maximum volume of the output data. All mixed data is
57 divided by this number after mixing, so larger numbers result in
58 quieter mixing. Smaller numbers will increase the likeliness of
59 distortion on loud modules.
62 Larger numbers result in shorter reverb duration. Longer reverb
63 durations can cause unwanted static and make the reverb sound more
67 Specified the shift multiplier which controls by how much the mixing
68 rate is multiplied while mixing. Higher values can improve quality by
69 smoothing the sound and reducing pops and clicks. Note, this is a shift
70 value, so a value of 2 becomes a mixing-rate multiplier of 4, and a
74 The number of bits per integer devoted to the fractional part of the
75 number. Generally, this number should not be changed for any reason.
77 !!! IMPORTANT !!! All values below MUST ALWAYS be greater than 0
82 #define MAXVOL_FACTOR (1<<BITSHIFT)
83 #define REVERBERATION 11000L
85 #define SAMPLING_SHIFT 2
86 #define SAMPLING_FACTOR (1UL<<SAMPLING_SHIFT)
89 #define FRACMASK ((1UL<<FRACBITS)-1UL)
91 #define TICKLSIZE 8192
92 #define TICKWSIZE (TICKLSIZE * 2)
93 #define TICKBSIZE (TICKWSIZE * 2)
95 #define CLICK_SHIFT_BASE 6
96 #define CLICK_SHIFT (CLICK_SHIFT_BASE + SAMPLING_SHIFT)
97 #define CLICK_BUFFER (1L << CLICK_SHIFT)
100 #define MIN(a,b) (((a)<(b)) ? (a) : (b))
103 typedef struct VINFO {
104 UBYTE kick; /* =1 -> sample has to be restarted */
105 UBYTE active; /* =1 -> sample is playing */
106 UWORD flags; /* 16/8 bits looping/one-shot */
107 SWORD handle; /* identifies the sample */
108 ULONG start; /* start index */
109 ULONG size; /* samplesize */
110 ULONG reppos; /* loop start */
111 ULONG repend; /* loop end */
112 ULONG frq; /* current frequency */
113 int vol; /* current volume */
114 int pan; /* current panning position */
118 SLONG lastvalL,lastvalR;
119 int lvolsel,rvolsel; /* Volume factor in range 0-255 */
122 SLONGLONG current; /* current index in the sample */
123 SLONGLONG increment; /* increment value */
126 static SWORD **Samples;
127 static VINFO *vinf=NULL,*vnf;
128 static long tickleft,samplesthatfit,vc_memory=0;
129 static int vc_softchn;
130 static SLONGLONG idxsize,idxlpos,idxlend;
131 static SLONG *vc_tickbuf=NULL;
132 static UWORD vc_mode;
135 /* Weird bug in compiler */ /* FIXME is this still needed? */
136 typedef void (*MikMod_callback_t)(unsigned char *data, size_t len);
139 /* Reverb control variables */
141 static int RVc1, RVc2, RVc3, RVc4, RVc5, RVc6, RVc7, RVc8;
142 static ULONG RVRindex;
144 /* For Mono or Left Channel */
145 static SLONG *RVbufL1=NULL,*RVbufL2=NULL,*RVbufL3=NULL,*RVbufL4=NULL,
146 *RVbufL5=NULL,*RVbufL6=NULL,*RVbufL7=NULL,*RVbufL8=NULL;
148 /* For Stereo only (Right Channel) */
149 static SLONG *RVbufR1=NULL,*RVbufR2=NULL,*RVbufR3=NULL,*RVbufR4=NULL,
150 *RVbufR5=NULL,*RVbufR6=NULL,*RVbufR7=NULL,*RVbufR8=NULL;
152 #ifdef NATIVE_64BIT_INT
153 #define NATIVE SLONGLONG
158 /*========== 32 bit sample mixers - only for 32 bit platforms */
159 #ifndef NATIVE_64BIT_INT
161 static SLONG Mix32MonoNormal(const SWORD* const srce,SLONG* dest,SLONG idx,SLONG increment,SLONG todo)
167 i=idx>>FRACBITS,f=idx&FRACMASK;
168 sample=(SWORD)( (((SLONG)(srce[i]*(FRACMASK+1L-f)) +
169 ((SLONG)srce[i+1]*f)) >> FRACBITS));
174 ( ( (SLONG)(vnf->oldlvol*vnf->rampvol) +
175 (vnf->lvolsel*(CLICK_BUFFER-vnf->rampvol)) ) *
176 (SLONG)sample ) >> CLICK_SHIFT );
181 ( ( ((SLONG)vnf->lvolsel*(CLICK_BUFFER-vnf->click)) *
183 (vnf->lastvalL*vnf->click) ) >> CLICK_SHIFT );
186 *dest++ +=vnf->lvolsel*sample;
188 vnf->lastvalL=vnf->lvolsel * sample;
193 static SLONG Mix32StereoNormal(const SWORD* const srce,SLONG* dest,SLONG idx,SLONG increment,ULONG todo)
199 i=idx>>FRACBITS,f=idx&FRACMASK;
200 sample=(SWORD)(((((SLONG)srce[i]*(FRACMASK+1L-f)) +
201 ((SLONG)srce[i+1] * f)) >> FRACBITS));
206 ( ( ((SLONG)vnf->oldlvol*vnf->rampvol) +
207 (vnf->lvolsel*(CLICK_BUFFER-vnf->rampvol))
208 ) * (SLONG)sample ) >> CLICK_SHIFT );
210 ( ( ((SLONG)vnf->oldrvol*vnf->rampvol) +
211 (vnf->rvolsel*(CLICK_BUFFER-vnf->rampvol))
212 ) * (SLONG)sample ) >> CLICK_SHIFT );
217 ( ( (SLONG)(vnf->lvolsel*(CLICK_BUFFER-vnf->click)) *
218 (SLONG)sample ) + (vnf->lastvalL * vnf->click) )
221 ( ( ((SLONG)vnf->rvolsel*(CLICK_BUFFER-vnf->click)) *
222 (SLONG)sample ) + (vnf->lastvalR * vnf->click) )
226 *dest++ +=vnf->lvolsel*sample;
227 *dest++ +=vnf->rvolsel*sample;
230 vnf->lastvalL=vnf->lvolsel*sample;
231 vnf->lastvalR=vnf->rvolsel*sample;
236 static SLONG Mix32StereoSurround(const SWORD* const srce,SLONG* dest,SLONG idx,SLONG increment,ULONG todo)
243 i=idx>>FRACBITS,f=idx&FRACMASK;
244 sample=(SWORD)(((((SLONG)srce[i]*(FRACMASK+1L-f)) +
245 ((SLONG)srce[i+1]*f)) >> FRACBITS));
250 ( ( (SLONG)(vnf->oldlvol*vnf->rampvol) +
251 (vnf->lvolsel*(CLICK_BUFFER-vnf->rampvol)) ) *
252 (SLONG)sample) >> CLICK_SHIFT );
259 ( ( ((SLONG)vnf->lvolsel*(CLICK_BUFFER-vnf->click)) *
261 (vnf->lastvalL * vnf->click) ) >> CLICK_SHIFT );
266 *dest++ +=vnf->lvolsel*sample;
267 *dest++ -=vnf->lvolsel*sample;
270 vnf->lastvalL=vnf->lvolsel*sample;
271 vnf->lastvalR=vnf->lvolsel*sample;
277 /*========== 64 bit mixers */
279 static SLONGLONG MixMonoNormal(const SWORD* const srce,SLONG* dest,SLONGLONG idx,SLONGLONG increment,SLONG todo)
285 i=idx>>FRACBITS,f=idx&FRACMASK;
286 sample=(SWORD)((((SLONGLONG)(srce[i]*(FRACMASK+1L-f)) +
287 ((SLONGLONG)srce[i+1]*f)) >> FRACBITS));
292 ( ( (SLONGLONG)(vnf->oldlvol*vnf->rampvol) +
293 (vnf->lvolsel*(CLICK_BUFFER-vnf->rampvol)) ) *
294 (SLONGLONG)sample ) >> CLICK_SHIFT );
299 ( ( ((SLONGLONG)vnf->lvolsel*(CLICK_BUFFER-vnf->click)) *
300 (SLONGLONG)sample ) +
301 (vnf->lastvalL*vnf->click) ) >> CLICK_SHIFT );
304 *dest++ +=vnf->lvolsel*sample;
306 vnf->lastvalL=vnf->lvolsel * sample;
311 /* Slowest part... */
313 #if defined HAVE_SSE2 || defined HAVE_ALTIVEC
315 static __inline SWORD GetSample(const SWORD* const srce, SLONGLONG idx)
317 SLONGLONG i=idx>>FRACBITS;
318 SLONGLONG f=idx&FRACMASK;
319 return (SWORD)(((((SLONGLONG)srce[i]*(FRACMASK+1L-f)) +
320 ((SLONGLONG)srce[i+1] * f)) >> FRACBITS));
323 static SLONGLONG MixSIMDStereoNormal(const SWORD* const srce,SLONG* dest,SLONGLONG idx,SLONGLONG increment,ULONG todo)
325 SWORD vol[8] = {vnf->lvolsel, vnf->rvolsel};
329 /* Dest can be misaligned */
330 while(!IS_ALIGNED_16(dest)) {
331 sample=srce[idx >> FRACBITS];
333 *dest++ += vol[0] * sample;
334 *dest++ += vol[1] * sample;
339 /* Srce is always aligned */
341 #if defined HAVE_SSE2
344 __m128i v0 = _mm_set_epi16(0, vol[1],
348 for(todo>>=2;todo; todo--)
350 SWORD s0 = GetSample(srce, idx);
351 SWORD s1 = GetSample(srce, idx += increment);
352 SWORD s2 = GetSample(srce, idx += increment);
353 SWORD s3 = GetSample(srce, idx += increment);
354 __m128i v1 = _mm_set_epi16(0, s1, 0, s1, 0, s0, 0, s0);
355 __m128i v2 = _mm_set_epi16(0, s3, 0, s3, 0, s2, 0, s2);
356 __m128i v3 = _mm_load_si128((__m128i*)(dest+0));
357 __m128i v4 = _mm_load_si128((__m128i*)(dest+4));
358 _mm_store_si128((__m128i*)(dest+0), _mm_add_epi32(v3, _mm_madd_epi16(v0, v1)));
359 _mm_store_si128((__m128i*)(dest+4), _mm_add_epi32(v4, _mm_madd_epi16(v0, v2)));
365 #elif defined HAVE_ALTIVEC
369 vector signed short r0 = vec_ld(0, vol);
370 vector signed short v0 = vec_perm(r0, r0, (vector unsigned char)(0, 1, /* l */
380 for(todo>>=2;todo; todo--)
383 vector signed short v1, v2;
384 vector signed int v3, v4, v5, v6;
387 s[0] = GetSample(srce, idx);
388 s[1] = GetSample(srce, idx += increment);
389 s[2] = GetSample(srce, idx += increment);
390 s[3] = GetSample(srce, idx += increment);
394 v1 = vec_perm(r1, r1, (vector unsigned char)
395 (0*2, 0*2+1, /* s0 */
404 v2 = vec_perm(r1, r1, (vector unsigned char)
405 (2*2, 2*2+1, /* s2 */
415 v3 = vec_ld(0, dest);
416 v4 = vec_ld(0x10, dest);
417 v5 = vec_mule(v0, v1);
418 v6 = vec_mule(v0, v2);
420 vec_st(vec_add(v3, v5), 0, dest);
421 vec_st(vec_add(v4, v6), 0x10, dest);
427 #endif /* HAVE_ALTIVEC */
431 sample=GetSample(srce, idx);
433 *dest++ += vol[0] * sample;
434 *dest++ += vol[1] * sample;
437 vnf->lastvalL=vnf->lvolsel*sample;
438 vnf->lastvalR=vnf->rvolsel*sample;
442 static SLONGLONG MixStereoNormal(const SWORD* const srce,SLONG* dest,SLONGLONG idx,SLONGLONG increment,ULONG todo)
450 i=idx>>FRACBITS,f=idx&FRACMASK;
451 sample=(SWORD)(((((SLONGLONG)srce[i]*(FRACMASK+1L-f)) +
452 ((SLONGLONG)srce[i+1] * f)) >> FRACBITS));
456 ( ( ((SLONGLONG)vnf->oldlvol*vnf->rampvol) +
457 (vnf->lvolsel*(CLICK_BUFFER-vnf->rampvol))
458 ) * (SLONGLONG)sample ) >> CLICK_SHIFT );
460 ( ( ((SLONGLONG)vnf->oldrvol*vnf->rampvol) +
461 (vnf->rvolsel*(CLICK_BUFFER-vnf->rampvol))
462 ) * (SLONGLONG)sample ) >> CLICK_SHIFT );
472 i=idx>>FRACBITS,f=idx&FRACMASK;
473 sample=(SWORD)(((((SLONGLONG)srce[i]*(FRACMASK+1L-f)) +
474 ((SLONGLONG)srce[i+1] * f)) >> FRACBITS));
478 ( ( (SLONGLONG)(vnf->lvolsel*(CLICK_BUFFER-vnf->click)) *
479 (SLONGLONG)sample ) + (vnf->lastvalL * vnf->click) )
483 ( ( ((SLONGLONG)vnf->rvolsel*(CLICK_BUFFER-vnf->click)) *
484 (SLONGLONG)sample ) + (vnf->lastvalR * vnf->click) )
494 if (md_mode & DMODE_SIMDMIXER) {
495 return MixSIMDStereoNormal(srce, dest, idx, increment, todo);
501 sample=(SWORD)(((((SLONGLONG)srce[i]*(FRACMASK+1L-f)) +
502 ((SLONGLONG)srce[i+1] * f)) >> FRACBITS));
505 *dest++ +=vnf->lvolsel*sample;
506 *dest++ +=vnf->rvolsel*sample;
510 vnf->lastvalL=vnf->lvolsel*sample;
511 vnf->lastvalR=vnf->rvolsel*sample;
516 #else /* HAVE_SSE2 || HAVE_ALTIVEC */
517 static SLONGLONG MixStereoNormal(const SWORD* const srce,SLONG* dest,SLONGLONG idx,SLONGLONG increment,ULONG todo)
523 i=idx>>FRACBITS,f=idx&FRACMASK;
524 sample=(SWORD)(((((SLONGLONG)srce[i]*(FRACMASK+1L-f)) +
525 ((SLONGLONG)srce[i+1] * f)) >> FRACBITS));
530 ( ( ((SLONGLONG)vnf->oldlvol*vnf->rampvol) +
531 (vnf->lvolsel*(CLICK_BUFFER-vnf->rampvol))
532 ) * (SLONGLONG)sample ) >> CLICK_SHIFT );
534 ( ( ((SLONGLONG)vnf->oldrvol*vnf->rampvol) +
535 (vnf->rvolsel*(CLICK_BUFFER-vnf->rampvol))
536 ) * (SLONGLONG)sample ) >> CLICK_SHIFT );
541 ( ( (SLONGLONG)(vnf->lvolsel*(CLICK_BUFFER-vnf->click)) *
542 (SLONGLONG)sample ) + (vnf->lastvalL * vnf->click) )
545 ( ( ((SLONGLONG)vnf->rvolsel*(CLICK_BUFFER-vnf->click)) *
546 (SLONGLONG)sample ) + (vnf->lastvalR * vnf->click) )
550 *dest++ +=vnf->lvolsel*sample;
551 *dest++ +=vnf->rvolsel*sample;
554 vnf->lastvalL=vnf->lvolsel*sample;
555 vnf->lastvalR=vnf->rvolsel*sample;
559 #endif /* HAVE_SSE2 || HAVE_ALTIVEC */
562 static SLONGLONG MixStereoSurround(const SWORD* srce,SLONG* dest,SLONGLONG idx,SLONGLONG increment,ULONG todo)
569 i=idx>>FRACBITS,f=idx&FRACMASK;
570 sample=(SWORD)(((((SLONGLONG)srce[i]*(FRACMASK+1L-f)) +
571 ((SLONGLONG)srce[i+1]*f)) >> FRACBITS));
576 ( ( (SLONGLONG)(vnf->oldlvol*vnf->rampvol) +
577 (vnf->lvolsel*(CLICK_BUFFER-vnf->rampvol)) ) *
578 (SLONGLONG)sample) >> CLICK_SHIFT );
585 ( ( ((SLONGLONG)vnf->lvolsel*(CLICK_BUFFER-vnf->click)) *
587 (vnf->lastvalL * vnf->click) ) >> CLICK_SHIFT );
592 *dest++ +=vnf->lvolsel*sample;
593 *dest++ -=vnf->lvolsel*sample;
596 vnf->lastvalL=vnf->lvolsel*sample;
597 vnf->lastvalR=vnf->lvolsel*sample;
602 static void(*Mix32toFP)(float* dste,const SLONG *srce,NATIVE count);
603 static void(*Mix32to16)(SWORD* dste,const SLONG *srce,NATIVE count);
604 static void(*Mix32to8)(SBYTE* dste,const SLONG *srce,NATIVE count);
605 static void(*MixReverb)(SLONG *srce,NATIVE count);
608 #define COMPUTE_LOC(n) loc##n = RVRindex % RVc##n
609 #define COMPUTE_LECHO(n) RVbufL##n [loc##n ]=speedup+((ReverbPct*RVbufL##n [loc##n ])>>7)
610 #define COMPUTE_RECHO(n) RVbufR##n [loc##n ]=speedup+((ReverbPct*RVbufR##n [loc##n ])>>7)
612 static void MixReverb_Normal(SLONG *srce,NATIVE count)
616 unsigned int loc1,loc2,loc3,loc4,loc5,loc6,loc7,loc8;
618 ReverbPct=58+(md_reverb*4);
620 COMPUTE_LOC(1); COMPUTE_LOC(2); COMPUTE_LOC(3); COMPUTE_LOC(4);
621 COMPUTE_LOC(5); COMPUTE_LOC(6); COMPUTE_LOC(7); COMPUTE_LOC(8);
624 /* Compute the left channel echo buffers */
625 speedup = *srce >> 3;
627 COMPUTE_LECHO(1); COMPUTE_LECHO(2); COMPUTE_LECHO(3); COMPUTE_LECHO(4);
628 COMPUTE_LECHO(5); COMPUTE_LECHO(6); COMPUTE_LECHO(7); COMPUTE_LECHO(8);
630 /* Prepare to compute actual finalized data */
633 COMPUTE_LOC(1); COMPUTE_LOC(2); COMPUTE_LOC(3); COMPUTE_LOC(4);
634 COMPUTE_LOC(5); COMPUTE_LOC(6); COMPUTE_LOC(7); COMPUTE_LOC(8);
637 *srce++ +=RVbufL1[loc1]-RVbufL2[loc2]+RVbufL3[loc3]-RVbufL4[loc4]+
638 RVbufL5[loc5]-RVbufL6[loc6]+RVbufL7[loc7]-RVbufL8[loc8];
642 static void MixReverb_Stereo(SLONG *srce,NATIVE count)
646 unsigned int loc1,loc2,loc3,loc4,loc5,loc6,loc7,loc8;
648 ReverbPct=58+(md_reverb*4);
650 COMPUTE_LOC(1); COMPUTE_LOC(2); COMPUTE_LOC(3); COMPUTE_LOC(4);
651 COMPUTE_LOC(5); COMPUTE_LOC(6); COMPUTE_LOC(7); COMPUTE_LOC(8);
654 /* Compute the left channel echo buffers */
655 speedup = *srce >> 3;
657 COMPUTE_LECHO(1); COMPUTE_LECHO(2); COMPUTE_LECHO(3); COMPUTE_LECHO(4);
658 COMPUTE_LECHO(5); COMPUTE_LECHO(6); COMPUTE_LECHO(7); COMPUTE_LECHO(8);
660 /* Compute the right channel echo buffers */
661 speedup = srce[1] >> 3;
663 COMPUTE_RECHO(1); COMPUTE_RECHO(2); COMPUTE_RECHO(3); COMPUTE_RECHO(4);
664 COMPUTE_RECHO(5); COMPUTE_RECHO(6); COMPUTE_RECHO(7); COMPUTE_RECHO(8);
666 /* Prepare to compute actual finalized data */
669 COMPUTE_LOC(1); COMPUTE_LOC(2); COMPUTE_LOC(3); COMPUTE_LOC(4);
670 COMPUTE_LOC(5); COMPUTE_LOC(6); COMPUTE_LOC(7); COMPUTE_LOC(8);
673 *srce++ +=RVbufL1[loc1]-RVbufL2[loc2]+RVbufL3[loc3]-RVbufL4[loc4]+
674 RVbufL5[loc5]-RVbufL6[loc6]+RVbufL7[loc7]-RVbufL8[loc8];
677 *srce++ +=RVbufR1[loc1]-RVbufR2[loc2]+RVbufR3[loc3]-RVbufR4[loc4]+
678 RVbufR5[loc5]-RVbufR6[loc6]+RVbufR7[loc7]-RVbufR8[loc8];
682 static void (*MixLowPass)(SLONG* srce,NATIVE count);
684 static int nLeftNR, nRightNR;
686 static void MixLowPass_Stereo(SLONG* srce,NATIVE count)
688 int n1 = nLeftNR, n2 = nRightNR;
693 int vnr = pnr[0] >> 1;
705 static void MixLowPass_Normal(SLONG* srce,NATIVE count)
712 int vnr = pnr[0] >> 1;
721 #define EXTRACT_SAMPLE_FP(var,attenuation) var=*srce++*((1.0f / 32768.0f) / (MAXVOL_FACTOR*attenuation))
722 #define CHECK_SAMPLE_FP(var,bound) var=(var>bound)?bound:(var<-bound)?-bound:var
724 static void Mix32ToFP_Normal(float* dste,const SLONG *srce,NATIVE count)
729 for(count/=SAMPLING_FACTOR;count;count--) {
732 for(i=SAMPLING_FACTOR/2;i;i--) {
733 EXTRACT_SAMPLE_FP(x1,1.0f); EXTRACT_SAMPLE_FP(x2,1.0f);
735 CHECK_SAMPLE_FP(x1,1.0f); CHECK_SAMPLE_FP(x2,1.0f);
739 *dste++ =tmpx*(1.0f/SAMPLING_FACTOR);
743 static void Mix32ToFP_Stereo(float* dste,const SLONG *srce,NATIVE count)
745 float x1,x2,x3,x4,tmpx,tmpy;
748 for(count/=SAMPLING_FACTOR;count;count--) {
751 for(i=SAMPLING_FACTOR/2;i;i--) {
752 EXTRACT_SAMPLE_FP(x1,1.0f); EXTRACT_SAMPLE_FP(x2,1.0f);
753 EXTRACT_SAMPLE_FP(x3,1.0f); EXTRACT_SAMPLE_FP(x4,1.0f);
755 CHECK_SAMPLE_FP(x1,1.0f); CHECK_SAMPLE_FP(x2,1.0f);
756 CHECK_SAMPLE_FP(x3,1.0f); CHECK_SAMPLE_FP(x4,1.0f);
761 *dste++ =tmpx*(1.0f/SAMPLING_FACTOR);
762 *dste++ =tmpy*(1.0f/SAMPLING_FACTOR);
767 #define EXTRACT_SAMPLE(var,attenuation) var=*srce++/(MAXVOL_FACTOR*attenuation)
768 #define CHECK_SAMPLE(var,bound) var=(var>=bound)?bound-1:(var<-bound)?-bound:var
770 static void Mix32To16_Normal(SWORD* dste,const SLONG *srce,NATIVE count)
775 for(count/=SAMPLING_FACTOR;count;count--) {
778 for(i=SAMPLING_FACTOR/2;i;i--) {
779 EXTRACT_SAMPLE(x1,1); EXTRACT_SAMPLE(x2,1);
781 CHECK_SAMPLE(x1,32768); CHECK_SAMPLE(x2,32768);
785 *dste++ =(SWORD)(tmpx/SAMPLING_FACTOR);
790 static void Mix32To16_Stereo(SWORD* dste,const SLONG *srce,NATIVE count)
792 NATIVE x1,x2,x3,x4,tmpx,tmpy;
795 for(count/=SAMPLING_FACTOR;count;count--) {
798 for(i=SAMPLING_FACTOR/2;i;i--) {
799 EXTRACT_SAMPLE(x1,1); EXTRACT_SAMPLE(x2,1);
800 EXTRACT_SAMPLE(x3,1); EXTRACT_SAMPLE(x4,1);
802 CHECK_SAMPLE(x1,32768); CHECK_SAMPLE(x2,32768);
803 CHECK_SAMPLE(x3,32768); CHECK_SAMPLE(x4,32768);
808 *dste++ =(SWORD)(tmpx/SAMPLING_FACTOR);
809 *dste++ =(SWORD)(tmpy/SAMPLING_FACTOR);
813 static void Mix32To8_Normal(SBYTE* dste,const SLONG *srce,NATIVE count)
818 for(count/=SAMPLING_FACTOR;count;count--) {
821 for(i=SAMPLING_FACTOR/2;i;i--) {
822 EXTRACT_SAMPLE(x1,256); EXTRACT_SAMPLE(x2,256);
824 CHECK_SAMPLE(x1,128); CHECK_SAMPLE(x2,128);
828 *dste++ = (SBYTE)((tmpx/SAMPLING_FACTOR)+128);
832 static void Mix32To8_Stereo(SBYTE* dste,const SLONG *srce,NATIVE count)
834 NATIVE x1,x2,x3,x4,tmpx,tmpy;
837 for(count/=SAMPLING_FACTOR;count;count--) {
840 for(i=SAMPLING_FACTOR/2;i;i--) {
841 EXTRACT_SAMPLE(x1,256); EXTRACT_SAMPLE(x2,256);
842 EXTRACT_SAMPLE(x3,256); EXTRACT_SAMPLE(x4,256);
844 CHECK_SAMPLE(x1,128); CHECK_SAMPLE(x2,128);
845 CHECK_SAMPLE(x3,128); CHECK_SAMPLE(x4,128);
850 *dste++ =(SBYTE)((tmpx/SAMPLING_FACTOR)+128);
851 *dste++ =(SBYTE)((tmpy/SAMPLING_FACTOR)+128);
855 #if defined HAVE_SSE2
856 #define SHIFT_MIX_TO_16 (BITSHIFT + 16 - 16)
858 static void Mix32To16_Stereo_SIMD_4Tap(SWORD* dste, const SLONG* srce, NATIVE count)
862 /* Check unaligned dste buffer. srce is always aligned. */
863 while(!IS_ALIGNED_16(dste))
865 Mix32To16_Stereo(dste, srce, SAMPLING_FACTOR);
872 /* dste and srce aligned. srce is always aligned. */
874 /* count / 2 for 1 sample */
876 for(count>>=4;count;count--)
878 /* Load 32bit sample. 1st average */
879 __m128i v0 = _mm_add_epi32(
880 _mm_srai_epi32(_mm_loadu_si128((__m128i const *)(srce+0)), SHIFT_MIX_TO_16),
881 _mm_srai_epi32(_mm_loadu_si128((__m128i const *)(srce+4)), SHIFT_MIX_TO_16)
882 ); /* v0: s0.l+s2.l | s0.r+s2.r | s1.l+s3.l | s1.r+s3.r */
884 /* 2nd average (s0.l+s2.l+s1.l+s3.l / 4, s0.r+s2.r+s1.r+s3.r / 4). Upper 64bit is unused (1 stereo sample) */
885 __m128i v1 = _mm_srai_epi32(_mm_add_epi32(v0, mm_hiqq(v0)), 2);
886 /* v1: s0.l+s2.l / 4 | s0.r+s2.r / 4 | s1.l+s3.l+s0.l+s2.l / 4 | s1.r+s3.r+s0.r+s2.r / 4 */
888 __m128i v2 = _mm_add_epi32(
889 _mm_srai_epi32(_mm_loadu_si128((__m128i const *)(srce+8)), SHIFT_MIX_TO_16),
890 _mm_srai_epi32(_mm_loadu_si128((__m128i const *)(srce+12)), SHIFT_MIX_TO_16)
891 ); /* v2: s4.l+s6.l | s4.r+s6.r | s5.l+s7.l | s5.r+s7.r */
893 __m128i v3 = _mm_srai_epi32(_mm_add_epi32(v2, mm_hiqq(v2)), 2); /* Upper 64bit is unused */
894 /* v3: s4.l+s6.l /4 | s4.r+s6.r / 4| s5.l+s7.l+s4.l+s6.l / 4 | s5.r+s7.r+s4.r+s6.l / 4 */
896 /* pack two stereo samples in one */
897 __m128i v4 = _mm_unpacklo_epi64(v1, v3); /* v4 = avg(s0,s1,s2,s3) | avg(s4,s5,s6,s7) */
901 /* Load 32bit sample. 1st average (s0.l+s2.l, s0.r+s2.r, s1.l+s3.l, s1.r+s3.r) */
903 _mm_srai_epi32(_mm_loadu_si128((__m128i const *)(srce+16)), SHIFT_MIX_TO_16),
904 _mm_srai_epi32(_mm_loadu_si128((__m128i const *)(srce+20)), SHIFT_MIX_TO_16)
905 ); /* 128bit = 2 stereo samples */
907 /* 2nd average (s0.l+s2.l+s1.l+s3.l / 4, s0.r+s2.r+s1.r+s3.r / 4). Upper 64bit is unused (1 stereo sample) */
908 v1 = _mm_srai_epi32(_mm_add_epi32(v0, mm_hiqq(v0)), 2);
911 _mm_srai_epi32(_mm_loadu_si128((__m128i const *)(srce+24)), SHIFT_MIX_TO_16),
912 _mm_srai_epi32(_mm_loadu_si128((__m128i const *)(srce+28)), SHIFT_MIX_TO_16)
915 v3 = _mm_srai_epi32(_mm_add_epi32(v2, mm_hiqq(v2)), 2); /* Upper 64bit is unused */
917 /* pack two stereo samples in one */
918 v6 = _mm_unpacklo_epi64(v1, v3); /* v6 = avg(s8,s9,s10,s11) | avg(s12,s13,s14,s15) */
920 _mm_store_si128((__m128i*)dste, _mm_packs_epi32(v4, v6)); /* 4 interpolated stereo sample 32bit to 4 */
923 srce+=32; /* 32 = 4 * 8 */
926 /* FIXME: THIS PART WRITES PAST DST !! */
929 Mix32To16_Stereo(dste, srce, remain);
933 #elif defined HAVE_ALTIVEC
934 #define SHIFT_MIX_TO_16 vec_splat_u32(BITSHIFT + 16 - 16)
936 static void Mix32To16_Stereo_SIMD_4Tap(SWORD* dste, const SLONG* srce, NATIVE count)
940 /* Check unaligned dste buffer. srce is always aligned. */
941 while(!IS_ALIGNED_16(dste))
943 Mix32To16_Stereo(dste, srce, SAMPLING_FACTOR);
950 /* dste and srce aligned. srce is always aligned. */
952 for(count>>=4;count;count--)
954 /* Load 32bit sample. 1st average (s0.l+s2.l, s0.r+s2.r, s1.l+s3.l, s1.r+s3.r) */
955 vector signed int v0 = vec_add(
956 vec_sra(vec_ld(0, srce), SHIFT_MIX_TO_16), /* 128bit = 2 stereo samples */
957 vec_sra(vec_ld(0x10, srce), SHIFT_MIX_TO_16)
958 ); /* 128bit = 2 stereo samples */
960 /* 2nd average (s0.l+s2.l+s1.l+s3.l / 4, s0.r+s2.r+s1.r+s3.r / 4). Upper 64bit is unused (1 stereo sample) */
961 vector signed int v1 = vec_sra(vec_add(v0, vec_hiqq(v0)), vec_splat_u32(2));
963 vector signed int v2 = vec_add(
964 vec_sra(vec_ld(0x20, srce), SHIFT_MIX_TO_16),
965 vec_sra(vec_ld(0x30, srce), SHIFT_MIX_TO_16)
968 vector signed int v3 = vec_sra(vec_add(v2, vec_hiqq(v2)), vec_splat_u32(2)); /* Upper 64bit is unused */
970 /* pack two stereo samples in one */
971 vector signed int v6, v4 = vec_unpacklo(v1, v3); /* v4 = lo64(v1) | lo64(v3) */
973 /* Load 32bit sample. 1st average (s0.l+s2.l, s0.r+s2.r, s1.l+s3.l, s1.r+s3.r) */
975 vec_sra(vec_ld(0x40, srce), SHIFT_MIX_TO_16), /* 128bit = 2 stereo samples */
976 vec_sra(vec_ld(0x50, srce), SHIFT_MIX_TO_16)
977 ); /* 128bit = 2 stereo samples */
979 /* 2nd average (s0.l+s2.l+s1.l+s3.l / 4, s0.r+s2.r+s1.r+s3.r / 4). Upper 64bit is unused (1 stereo sample) */
980 v1 = vec_sra(vec_add(v0, vec_hiqq(v0)), vec_splat_u32(2));
983 vec_sra(vec_ld(0x60, srce), SHIFT_MIX_TO_16),
984 vec_sra(vec_ld(0x70, srce), SHIFT_MIX_TO_16)
987 v3 = vec_sra(vec_add(v2, vec_hiqq(v2)), vec_splat_u32(2)); /* Upper 64bit is unused */
989 /* pack two stereo samples in one */
990 v6 = vec_unpacklo(v1, v3);
992 vec_st(vec_packs(v4, v6), 0, dste); /* 4 interpolated stereo sample 32bit to 4 interpolated stereo sample 16bit + saturation */
995 srce+=32; /* 32 = 4 * 8 */
1000 Mix32To16_Stereo(dste, srce, remain);
1007 static void AddChannel(SLONG* ptr,NATIVE todo)
1012 if(!(s=Samples[vnf->handle])) {
1013 vnf->current = vnf->active = 0;
1014 vnf->lastvalL = vnf->lastvalR = 0;
1018 /* update the 'current' index so the sample loops, or stops playing if it
1019 reached the end of the sample */
1023 if(vnf->flags & SF_REVERSE) {
1024 /* The sample is playing in reverse */
1025 if((vnf->flags&SF_LOOP)&&(vnf->current<idxlpos)) {
1026 /* the sample is looping and has reached the loopstart index */
1027 if(vnf->flags & SF_BIDI) {
1028 /* sample is doing bidirectional loops, so 'bounce' the
1029 current index against the idxlpos */
1030 vnf->current = idxlpos+(idxlpos-vnf->current);
1031 vnf->flags &= ~SF_REVERSE;
1032 vnf->increment = -vnf->increment;
1034 /* normal backwards looping, so set the current position to
1036 vnf->current=idxlend-(idxlpos-vnf->current);
1038 /* the sample is not looping, so check if it reached index 0 */
1039 if(vnf->current < 0) {
1040 /* playing index reached 0, so stop playing this sample */
1041 vnf->current = vnf->active = 0;
1046 /* The sample is playing forward */
1047 if((vnf->flags & SF_LOOP) &&
1048 (vnf->current >= idxlend)) {
1049 /* the sample is looping, check the loopend index */
1050 if(vnf->flags & SF_BIDI) {
1051 /* sample is doing bidirectional loops, so 'bounce' the
1052 current index against the idxlend */
1053 vnf->flags |= SF_REVERSE;
1054 vnf->increment = -vnf->increment;
1055 vnf->current = idxlend-(vnf->current-idxlend);
1057 /* normal backwards looping, so set the current position
1059 vnf->current=idxlpos+(vnf->current-idxlend);
1061 /* sample is not looping, so check if it reached the last
1063 if(vnf->current >= idxsize) {
1064 /* yes, so stop playing this sample */
1065 vnf->current = vnf->active = 0;
1071 end=(vnf->flags&SF_REVERSE)?(vnf->flags&SF_LOOP)?idxlpos:0:
1072 (vnf->flags&SF_LOOP)?idxlend:idxsize;
1074 /* if the sample is not blocked... */
1075 if((end==vnf->current)||(!vnf->increment))
1078 done=MIN((end-vnf->current)/vnf->increment+1,todo);
1087 endpos=vnf->current+done*vnf->increment;
1089 if(vnf->vol || vnf->rampvol) {
1090 #ifndef NATIVE_64BIT_INT
1091 /* use the 32 bit mixers as often as we can (they're much faster) */
1092 if((vnf->current<0x7fffffff)&&(endpos<0x7fffffff)) {
1093 if(vc_mode & DMODE_STEREO) {
1094 if((vnf->pan==PAN_SURROUND)&&(vc_mode&DMODE_SURROUND))
1095 vnf->current=(SLONGLONG)Mix32StereoSurround
1096 (s,ptr,vnf->current,vnf->increment,done);
1098 vnf->current=Mix32StereoNormal
1099 (s,ptr,vnf->current,vnf->increment,done);
1101 vnf->current=Mix32MonoNormal
1102 (s,ptr,vnf->current,vnf->increment,done);
1107 if(vc_mode & DMODE_STEREO) {
1108 if((vnf->pan==PAN_SURROUND)&&(vc_mode&DMODE_SURROUND))
1109 vnf->current=MixStereoSurround
1110 (s,ptr,vnf->current,vnf->increment,done);
1112 vnf->current=MixStereoNormal
1113 (s,ptr,vnf->current,vnf->increment,done);
1115 vnf->current=MixMonoNormal
1116 (s,ptr,vnf->current,vnf->increment,done);
1119 vnf->lastvalL = vnf->lastvalR = 0;
1120 /* update sample position */
1121 vnf->current=endpos;
1125 ptr += (vc_mode & DMODE_STEREO)?(done<<1):done;
1131 #define VC1_SilenceBytes VC2_SilenceBytes
1132 #define VC1_WriteSamples VC2_WriteSamples
1133 #define VC1_WriteBytes VC2_WriteBytes
1134 #define VC1_Exit VC2_Exit
1135 #define VC1_VoiceSetVolume VC2_VoiceSetVolume
1136 #define VC1_VoiceGetVolume VC2_VoiceGetVolume
1137 #define VC1_VoiceSetPanning VC2_VoiceSetPanning
1138 #define VC1_VoiceGetPanning VC2_VoiceGetPanning
1139 #define VC1_VoiceSetFrequency VC2_VoiceSetFrequency
1140 #define VC1_VoiceGetFrequency VC2_VoiceGetFrequency
1141 #define VC1_VoicePlay VC2_VoicePlay
1142 #define VC1_VoiceStop VC2_VoiceStop
1143 #define VC1_VoiceStopped VC2_VoiceStopped
1144 #define VC1_VoiceGetPosition VC2_VoiceGetPosition
1145 #define VC1_SampleUnload VC2_SampleUnload
1146 #define VC1_SampleLoad VC2_SampleLoad
1147 #define VC1_SampleSpace VC2_SampleSpace
1148 #define VC1_SampleLength VC2_SampleLength
1149 #define VC1_VoiceRealVolume VC2_VoiceRealVolume
1151 #include "virtch_common.c"
1154 void VC2_WriteSamples(SBYTE* buf,ULONG todo)
1160 todo*=SAMPLING_FACTOR;
1164 if(vc_mode & DMODE_SOFT_MUSIC) md_player();
1165 tickleft=(md_mixfreq*125L*SAMPLING_FACTOR)/(md_bpm*50L);
1166 tickleft&=~(SAMPLING_FACTOR-1);
1168 left = MIN(tickleft, (long)todo);
1172 buf += samples2bytes(left)/SAMPLING_FACTOR;
1175 portion = MIN(left, samplesthatfit);
1176 memset(vc_tickbuf,0,portion<<((vc_mode&DMODE_STEREO)?3:2));
1177 for(t=0;t<vc_softchn;t++) {
1181 vnf->current=((SLONGLONG)(vnf->start))<<FRACBITS;
1184 vnf->click = CLICK_BUFFER;
1188 if(!vnf->frq) vnf->active = 0;
1191 vnf->increment=((SLONGLONG)(vnf->frq)<<(FRACBITS-SAMPLING_SHIFT))
1193 if(vnf->flags&SF_REVERSE) vnf->increment=-vnf->increment;
1194 vol = vnf->vol; pan = vnf->pan;
1196 vnf->oldlvol=vnf->lvolsel;vnf->oldrvol=vnf->rvolsel;
1197 if(vc_mode & DMODE_STEREO) {
1198 if(pan!=PAN_SURROUND) {
1199 vnf->lvolsel=(vol*(PAN_RIGHT-pan))>>8;
1200 vnf->rvolsel=(vol*pan)>>8;
1202 vnf->lvolsel=vnf->rvolsel=(vol * 256L) / 480;
1207 idxsize=(vnf->size)?((SLONGLONG)(vnf->size)<<FRACBITS)-1:0;
1208 idxlend=(vnf->repend)?((SLONGLONG)(vnf->repend)<<FRACBITS)-1:0;
1209 idxlpos=(SLONGLONG)(vnf->reppos)<<FRACBITS;
1210 AddChannel(vc_tickbuf,portion);
1214 if(md_mode & DMODE_NOISEREDUCTION) {
1215 MixLowPass(vc_tickbuf, portion);
1219 if(md_reverb>15) md_reverb=15;
1220 MixReverb(vc_tickbuf,portion);
1224 vc_callback((unsigned char*)vc_tickbuf, portion);
1227 if(vc_mode & DMODE_FLOAT)
1228 Mix32toFP((float*)buffer,vc_tickbuf,portion);
1229 else if(vc_mode & DMODE_16BITS)
1230 Mix32to16((SWORD*)buffer,vc_tickbuf,portion);
1232 Mix32to8((SBYTE*)buffer,vc_tickbuf,portion);
1234 buffer += samples2bytes(portion) / SAMPLING_FACTOR;
1244 if (!(md_mode&DMODE_HQMIXER))
1247 if(!(Samples=(SWORD**)MikMod_amalloc(MAXSAMPLEHANDLES*sizeof(SWORD*)))) {
1248 _mm_errno = MMERR_INITIALIZING_MIXER;
1252 if(!(vc_tickbuf=(SLONG*)MikMod_amalloc((TICKLSIZE+32)*sizeof(SLONG)))) {
1253 _mm_errno = MMERR_INITIALIZING_MIXER;
1258 if(md_mode & DMODE_STEREO) {
1259 Mix32toFP = Mix32ToFP_Stereo;
1260 #if ((defined HAVE_ALTIVEC || defined HAVE_SSE2) && (SAMPLING_FACTOR == 4))
1261 if (md_mode & DMODE_SIMDMIXER)
1262 Mix32to16 = Mix32To16_Stereo_SIMD_4Tap;
1265 Mix32to16 = Mix32To16_Stereo;
1266 Mix32to8 = Mix32To8_Stereo;
1267 MixReverb = MixReverb_Stereo;
1268 MixLowPass = MixLowPass_Stereo;
1270 Mix32toFP = Mix32ToFP_Normal;
1271 Mix32to16 = Mix32To16_Normal;
1272 Mix32to8 = Mix32To8_Normal;
1273 MixReverb = MixReverb_Normal;
1274 MixLowPass = MixLowPass_Normal;
1277 md_mode |= DMODE_INTERP;
1282 int VC2_PlayStart(void)
1284 md_mode|=DMODE_INTERP;
1286 samplesthatfit = TICKLSIZE;
1287 if(vc_mode & DMODE_STEREO) samplesthatfit >>= 1;
1290 RVc1 = (5000L * md_mixfreq) / (REVERBERATION * 10);
1291 RVc2 = (5078L * md_mixfreq) / (REVERBERATION * 10);
1292 RVc3 = (5313L * md_mixfreq) / (REVERBERATION * 10);
1293 RVc4 = (5703L * md_mixfreq) / (REVERBERATION * 10);
1294 RVc5 = (6250L * md_mixfreq) / (REVERBERATION * 10);
1295 RVc6 = (6953L * md_mixfreq) / (REVERBERATION * 10);
1296 RVc7 = (7813L * md_mixfreq) / (REVERBERATION * 10);
1297 RVc8 = (8828L * md_mixfreq) / (REVERBERATION * 10);
1299 if(!(RVbufL1=(SLONG*)MikMod_calloc((RVc1+1),sizeof(SLONG)))) return 1;
1300 if(!(RVbufL2=(SLONG*)MikMod_calloc((RVc2+1),sizeof(SLONG)))) return 1;
1301 if(!(RVbufL3=(SLONG*)MikMod_calloc((RVc3+1),sizeof(SLONG)))) return 1;
1302 if(!(RVbufL4=(SLONG*)MikMod_calloc((RVc4+1),sizeof(SLONG)))) return 1;
1303 if(!(RVbufL5=(SLONG*)MikMod_calloc((RVc5+1),sizeof(SLONG)))) return 1;
1304 if(!(RVbufL6=(SLONG*)MikMod_calloc((RVc6+1),sizeof(SLONG)))) return 1;
1305 if(!(RVbufL7=(SLONG*)MikMod_calloc((RVc7+1),sizeof(SLONG)))) return 1;
1306 if(!(RVbufL8=(SLONG*)MikMod_calloc((RVc8+1),sizeof(SLONG)))) return 1;
1308 /* allocate reverb buffers for the right channel if in stereo mode only. */
1309 if (vc_mode & DMODE_STEREO) {
1310 if(!(RVbufR1=(SLONG*)MikMod_calloc((RVc1+1),sizeof(SLONG)))) return 1;
1311 if(!(RVbufR2=(SLONG*)MikMod_calloc((RVc2+1),sizeof(SLONG)))) return 1;
1312 if(!(RVbufR3=(SLONG*)MikMod_calloc((RVc3+1),sizeof(SLONG)))) return 1;
1313 if(!(RVbufR4=(SLONG*)MikMod_calloc((RVc4+1),sizeof(SLONG)))) return 1;
1314 if(!(RVbufR5=(SLONG*)MikMod_calloc((RVc5+1),sizeof(SLONG)))) return 1;
1315 if(!(RVbufR6=(SLONG*)MikMod_calloc((RVc6+1),sizeof(SLONG)))) return 1;
1316 if(!(RVbufR7=(SLONG*)MikMod_calloc((RVc7+1),sizeof(SLONG)))) return 1;
1317 if(!(RVbufR8=(SLONG*)MikMod_calloc((RVc8+1),sizeof(SLONG)))) return 1;
1324 void VC2_PlayStop(void)
1326 MikMod_free(RVbufL1);
1327 MikMod_free(RVbufL2);
1328 MikMod_free(RVbufL3);
1329 MikMod_free(RVbufL4);
1330 MikMod_free(RVbufL5);
1331 MikMod_free(RVbufL6);
1332 MikMod_free(RVbufL7);
1333 MikMod_free(RVbufL8);
1334 MikMod_free(RVbufR1);
1335 MikMod_free(RVbufR2);
1336 MikMod_free(RVbufR3);
1337 MikMod_free(RVbufR4);
1338 MikMod_free(RVbufR5);
1339 MikMod_free(RVbufR6);
1340 MikMod_free(RVbufR7);
1341 MikMod_free(RVbufR8);
1343 RVbufL1=RVbufL2=RVbufL3=RVbufL4=RVbufL5=RVbufL6=RVbufL7=RVbufL8=NULL;
1344 RVbufR1=RVbufR2=RVbufR3=RVbufR4=RVbufR5=RVbufR6=RVbufR7=RVbufR8=NULL;
1347 int VC2_SetNumVoices(void)
1351 md_mode|=DMODE_INTERP;
1353 if(!(vc_softchn=md_softchn)) return 0;
1356 if(!(vinf=(VINFO*)MikMod_calloc(vc_softchn,sizeof(VINFO)))) return 1;
1358 for(t=0;t<vc_softchn;t++) {
1360 vinf[t].pan=(t&1)?PAN_LEFT:PAN_RIGHT;
1366 #endif /* ! NO_HQMIXER */