1 /********************************************************************
3 * THIS FILE IS PART OF THE OggVorbis SOFTWARE CODEC SOURCE CODE. *
4 * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS *
5 * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
6 * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. *
8 * THE OggVorbis SOURCE CODE IS (C) COPYRIGHT 1994-2009 *
9 * by the Xiph.Org Foundation http://www.xiph.org/ *
11 ********************************************************************
13 function: *unnormalized* fft transform
14 last mod: $Id: smallft.c 16227 2009-07-08 06:58:46Z xiphmont $
16 ********************************************************************/
18 /* FFT implementation from OggSquish, minus cosine transforms,
19 * minus all but radix 2/4 case. In Vorbis we only need this
22 * To do more than just power-of-two sized vectors, see the full
23 * version I wrote for NetLib.
25 * Note that the packing is a little strange; rather than the FFT r/i
26 * packing following R_0, I_n, R_1, I_1, R_2, I_2 ... R_n-1, I_n-1,
27 * it follows R_0, R_1, I_1, R_2, I_2 ... R_n-1, I_n-1, I_n like the
38 static void drfti1(int n, float *wa, int *ifac){
39 static int ntryh[4] = { 4,2,3,5 };
40 static float tpi = 6.28318530717958648f;
41 float arg,argh,argld,fi;
44 int ld, ii, ip, is, nq, nr;
84 for (k1=0;k1<nfm1;k1++){
96 for (ii=2;ii<ido;ii+=2){
108 static void fdrffti(int n, float *wsave, int *ifac){
111 drfti1(n, wsave+n, ifac);
114 static void dradf2(int ido,int l1,float *cc,float *ch,float *wa1){
117 int t0,t1,t2,t3,t4,t5,t6;
123 ch[t1<<1]=cc[t1]+cc[t2];
124 ch[(t1<<1)+t3-1]=cc[t1]-cc[t2];
144 tr2=wa1[i-2]*cc[t3-1]+wa1[i-1]*cc[t3];
145 ti2=wa1[i-2]*cc[t3]-wa1[i-1]*cc[t3-1];
148 ch[t6-1]=cc[t5-1]+tr2;
149 ch[t4-1]=cc[t5-1]-tr2;
169 static void dradf4(int ido,int l1,float *cc,float *ch,float *wa1,
170 float *wa2,float *wa3){
171 static float hsqt2 = .70710678118654752f;
172 int i,k,t0,t1,t2,t3,t4,t5,t6;
173 float ci2,ci3,ci4,cr2,cr3,cr4,ti1,ti2,ti3,ti4,tr1,tr2,tr3,tr4;
185 ch[t5=t3<<2]=tr1+tr2;
186 ch[(ido<<2)+t5-1]=tr2-tr1;
187 ch[(t5+=(ido<<1))-1]=cc[t3]-cc[t4];
188 ch[t5]=cc[t2]-cc[t1];
211 cr2=wa1[i-2]*cc[t3-1]+wa1[i-1]*cc[t3];
212 ci2=wa1[i-2]*cc[t3]-wa1[i-1]*cc[t3-1];
214 cr3=wa2[i-2]*cc[t3-1]+wa2[i-1]*cc[t3];
215 ci3=wa2[i-2]*cc[t3]-wa2[i-1]*cc[t3-1];
217 cr4=wa3[i-2]*cc[t3-1]+wa3[i-1]*cc[t3];
218 ci4=wa3[i-2]*cc[t3]-wa3[i-1]*cc[t3-1];
248 t2=(t1=t0+ido-1)+(t0<<1);
255 ti1=-hsqt2*(cc[t1]+cc[t2]);
256 tr1=hsqt2*(cc[t1]-cc[t2]);
258 ch[t4-1]=tr1+cc[t6-1];
259 ch[t4+t5-1]=cc[t6-1]-tr1;
261 ch[t4]=ti1-cc[t1+t0];
262 ch[t4+t5]=ti1+cc[t1+t0];
271 static void dradfg(int ido,int ip,int l1,int idl1,float *cc,float *c1,
272 float *c2,float *ch,float *ch2,float *wa){
274 static float tpi=6.283185307179586f;
275 int idij,ipph,i,j,k,l,ic,ik,is;
276 int t0,t1,t2,t3,t4,t5,t6,t7,t8,t9,t10;
277 float dc2,ai1,ai2,ar1,ar2,ds2;
279 float dcp,arg,dsp,ar1h,ar2h;
293 for(ik=0;ik<idl1;ik++)ch2[ik]=c2[ik];
319 ch[t3-1]=wa[idij-1]*c1[t3-1]+wa[idij]*c1[t3];
320 ch[t3]=wa[idij-1]*c1[t3]-wa[idij]*c1[t3-1];
336 ch[t3-1]=wa[idij-1]*c1[t3-1]+wa[idij]*c1[t3];
337 ch[t3]=wa[idij-1]*c1[t3]-wa[idij]*c1[t3-1];
360 c1[t5-1]=ch[t5-1]+ch[t6-1];
361 c1[t6-1]=ch[t5]-ch[t6];
362 c1[t5]=ch[t5]+ch[t6];
363 c1[t6]=ch[t6-1]-ch[t5-1];
379 c1[t5-1]=ch[t5-1]+ch[t6-1];
380 c1[t6-1]=ch[t5]-ch[t6];
381 c1[t5]=ch[t5]+ch[t6];
382 c1[t6]=ch[t6-1]-ch[t5-1];
391 for(ik=0;ik<idl1;ik++)c2[ik]=ch2[ik];
403 c1[t3]=ch[t3]+ch[t4];
404 c1[t4]=ch[t4]-ch[t3];
416 ar1h=dcp*ar1-dsp*ai1;
424 for(ik=0;ik<idl1;ik++){
425 ch2[t4++]=c2[ik]+ar1*c2[t7++];
426 ch2[t5++]=ai1*c2[t6++];
440 ar2h=dc2*ar2-ds2*ai2;
448 for(ik=0;ik<idl1;ik++){
449 ch2[t6++]+=ar2*c2[t8++];
450 ch2[t7++]+=ai2*c2[t9++];
459 for(ik=0;ik<idl1;ik++)ch2[ik]+=c2[t2++];
469 for(i=0;i<ido;i++)cc[t4++]=ch[t3++];
530 cc[i+t7-1]=ch[i+t8-1]+ch[i+t9-1];
531 cc[ic+t6-1]=ch[i+t8-1]-ch[i+t9-1];
532 cc[i+t7]=ch[i+t8]+ch[i+t9];
533 cc[ic+t6]=ch[i+t9]-ch[i+t8];
560 cc[t7-1]=ch[t8-1]+ch[t9-1];
561 cc[t6-1]=ch[t8-1]-ch[t9-1];
562 cc[t7]=ch[t8]+ch[t9];
563 cc[t6]=ch[t9]-ch[t8];
573 static void drftf1(int n,float *c,float *ch,float *wa,int *ifac){
576 int ip,iw,ido,idl1,ix2,ix3;
583 for(k1=0;k1<nf;k1++){
597 dradf4(ido,l1,ch,c,wa+iw-1,wa+ix2-1,wa+ix3-1);
599 dradf4(ido,l1,c,ch,wa+iw-1,wa+ix2-1,wa+ix3-1);
606 dradf2(ido,l1,c,ch,wa+iw-1);
610 dradf2(ido,l1,ch,c,wa+iw-1);
617 dradfg(ido,ip,l1,idl1,c,c,c,ch,ch,wa+iw-1);
622 dradfg(ido,ip,l1,idl1,ch,ch,ch,c,c,wa+iw-1);
631 for(i=0;i<n;i++)c[i]=ch[i];
634 static void dradb2(int ido,int l1,float *cc,float *ch,float *wa1){
635 int i,k,t0,t1,t2,t3,t4,t5,t6;
644 ch[t1]=cc[t2]+cc[t3+t2];
645 ch[t1+t0]=cc[t2]-cc[t3+t2];
663 ch[t3-1]=cc[t4-1]+cc[t5-1];
664 tr2=cc[t4-1]-cc[t5-1];
665 ch[t3]=cc[t4]-cc[t5];
667 ch[t6-1]=wa1[i-2]*tr2-wa1[i-1]*ti2;
668 ch[t6]=wa1[i-2]*ti2+wa1[i-1]*tr2;
679 ch[t1]=cc[t2]+cc[t2];
680 ch[t1+t0]=-(cc[t2+1]+cc[t2+1]);
686 static void dradb3(int ido,int l1,float *cc,float *ch,float *wa1,
688 static float taur = -.5f;
689 static float taui = .8660254037844386f;
690 int i,k,t0,t1,t2,t3,t4,t5,t6,t7,t8,t9,t10;
691 float ci2,ci3,di2,di3,cr2,cr3,dr2,dr3,ti2,tr2;
700 tr2=cc[t3-1]+cc[t3-1];
701 cr2=cc[t5]+(taur*tr2);
703 ci3=taui*(cc[t3]+cc[t3]);
728 tr2=cc[t5-1]+cc[t6-1];
729 cr2=cc[t7-1]+(taur*tr2);
730 ch[t8-1]=cc[t7-1]+tr2;
732 ci2=cc[t7]+(taur*ti2);
734 cr3=taui*(cc[t5-1]-cc[t6-1]);
735 ci3=taui*(cc[t5]+cc[t6]);
740 ch[t9-1]=wa1[i-2]*dr2-wa1[i-1]*di2;
741 ch[t9]=wa1[i-2]*di2+wa1[i-1]*dr2;
742 ch[t10-1]=wa2[i-2]*dr3-wa2[i-1]*di3;
743 ch[t10]=wa2[i-2]*di3+wa2[i-1]*dr3;
749 static void dradb4(int ido,int l1,float *cc,float *ch,float *wa1,
750 float *wa2,float *wa3){
751 static float sqrt2=1.414213562373095f;
752 int i,k,t0,t1,t2,t3,t4,t5,t6,t7,t8;
753 float ci2,ci3,ci4,cr2,cr3,cr4,ti1,ti2,ti3,ti4,tr1,tr2,tr3,tr4;
763 tr3=cc[t4-1]+cc[t4-1];
765 tr1=cc[t3]-cc[(t4+=t6)-1];
780 t5=(t4=(t3=(t2=t1<<2)+t6))+t6;
792 tr1=cc[t2-1]-cc[t5-1];
793 tr2=cc[t2-1]+cc[t5-1];
794 ti4=cc[t3-1]-cc[t4-1];
795 tr3=cc[t3-1]+cc[t4-1];
805 ch[(t8=t7+t0)-1]=wa1[i-2]*cr2-wa1[i-1]*ci2;
806 ch[t8]=wa1[i-2]*ci2+wa1[i-1]*cr2;
807 ch[(t8+=t0)-1]=wa2[i-2]*cr3-wa2[i-1]*ci3;
808 ch[t8]=wa2[i-2]*ci3+wa2[i-1]*cr3;
809 ch[(t8+=t0)-1]=wa3[i-2]*cr4-wa3[i-1]*ci4;
810 ch[t8]=wa3[i-2]*ci4+wa3[i-1]*cr4;
815 if(ido%2 == 1)return;
827 tr1=cc[t1-1]-cc[t4-1];
828 tr2=cc[t1-1]+cc[t4-1];
830 ch[t5+=t0]=sqrt2*(tr1-ti1);
832 ch[t5+=t0]=-sqrt2*(tr1+ti1);
840 static void dradbg(int ido,int ip,int l1,int idl1,float *cc,float *c1,
841 float *c2,float *ch,float *ch2,float *wa){
842 static float tpi=6.283185307179586f;
843 int idij,ipph,i,j,k,l,ik,is,t0,t1,t2,t3,t4,t5,t6,t7,t8,t9,t10,
845 float dc2,ai1,ai2,ar1,ar2,ds2;
847 float dcp,arg,dsp,ar1h,ar2h;
899 ch[t3]=cc[t6-1]+cc[t6-1];
900 ch[t4]=cc[t6]+cc[t6];
908 if (ido == 1)goto L116;
932 ch[t5-1]=cc[t9-1]+cc[t11-1];
933 ch[t6-1]=cc[t9-1]-cc[t11-1];
934 ch[t5]=cc[t9]-cc[t11];
935 ch[t6]=cc[t9]+cc[t11];
966 ch[t5-1]=cc[t11-1]+cc[t12-1];
967 ch[t6-1]=cc[t11-1]-cc[t12-1];
968 ch[t5]=cc[t11]-cc[t12];
969 ch[t6]=cc[t11]+cc[t12];
988 ar1h=dcp*ar1-dsp*ai1;
996 for(ik=0;ik<idl1;ik++){
997 c2[t4++]=ch2[t6++]+ar1*ch2[t7++];
998 c2[t5++]=ai1*ch2[t8++];
1007 for(j=2;j<ipph;j++){
1010 ar2h=dc2*ar2-ds2*ai2;
1011 ai2=dc2*ai2+ds2*ar2;
1017 for(ik=0;ik<idl1;ik++){
1018 c2[t4++]+=ar2*ch2[t11++];
1019 c2[t5++]+=ai2*ch2[t12++];
1025 for(j=1;j<ipph;j++){
1028 for(ik=0;ik<idl1;ik++)ch2[ik]+=ch2[t2++];
1033 for(j=1;j<ipph;j++){
1039 ch[t3]=c1[t3]-c1[t4];
1040 ch[t4]=c1[t3]+c1[t4];
1046 if(ido==1)goto L132;
1047 if(nbd<l1)goto L128;
1051 for(j=1;j<ipph;j++){
1059 for(i=2;i<ido;i+=2){
1062 ch[t5-1]=c1[t5-1]-c1[t6];
1063 ch[t6-1]=c1[t5-1]+c1[t6];
1064 ch[t5]=c1[t5]+c1[t6-1];
1065 ch[t6]=c1[t5]-c1[t6-1];
1076 for(j=1;j<ipph;j++){
1081 for(i=2;i<ido;i+=2){
1087 ch[t5-1]=c1[t5-1]-c1[t6];
1088 ch[t6-1]=c1[t5-1]+c1[t6];
1089 ch[t5]=c1[t5]+c1[t6-1];
1090 ch[t6]=c1[t5]-c1[t6-1];
1100 for(ik=0;ik<idl1;ik++)c2[ik]=ch2[ik];
1111 if(nbd>l1)goto L139;
1120 for(i=2;i<ido;i+=2){
1125 c1[t3-1]=wa[idij-1]*ch[t3-1]-wa[idij]*ch[t3];
1126 c1[t3]=wa[idij-1]*ch[t3]+wa[idij]*ch[t3-1];
1143 for(i=2;i<ido;i+=2){
1146 c1[t3-1]=wa[idij-1]*ch[t3-1]-wa[idij]*ch[t3];
1147 c1[t3]=wa[idij-1]*ch[t3]+wa[idij]*ch[t3-1];
1154 static void drftb1(int n, float *c, float *ch, float *wa, int *ifac){
1157 int nf,ip,iw,ix2,ix3,ido,idl1;
1164 for(k1=0;k1<nf;k1++){
1174 dradb4(ido,l1,ch,c,wa+iw-1,wa+ix2-1,wa+ix3-1);
1176 dradb4(ido,l1,c,ch,wa+iw-1,wa+ix2-1,wa+ix3-1);
1184 dradb2(ido,l1,ch,c,wa+iw-1);
1186 dradb2(ido,l1,c,ch,wa+iw-1);
1195 dradb3(ido,l1,ch,c,wa+iw-1,wa+ix2-1);
1197 dradb3(ido,l1,c,ch,wa+iw-1,wa+ix2-1);
1202 /* The radix five case can be translated later..... */
1203 /* if(ip!=5)goto L112;
1209 dradb5(ido,l1,ch,c,wa+iw-1,wa+ix2-1,wa+ix3-1,wa+ix4-1);
1211 dradb5(ido,l1,c,ch,wa+iw-1,wa+ix2-1,wa+ix3-1,wa+ix4-1);
1217 dradbg(ido,ip,l1,idl1,ch,ch,ch,c,c,wa+iw-1);
1219 dradbg(ido,ip,l1,idl1,c,c,c,ch,ch,wa+iw-1);
1229 for(i=0;i<n;i++)c[i]=ch[i];
1232 void drft_forward(drft_lookup *l,float *data){
1234 drftf1(l->n,data,l->trigcache,l->trigcache+l->n,l->splitcache);
1237 void drft_backward(drft_lookup *l,float *data){
1239 drftb1(l->n,data,l->trigcache,l->trigcache+l->n,l->splitcache);
1242 void drft_init(drft_lookup *l,int n){
1244 l->trigcache=_ogg_calloc(3*n,sizeof(*l->trigcache));
1245 l->splitcache=_ogg_calloc(32,sizeof(*l->splitcache));
1246 fdrffti(n, l->trigcache, l->splitcache);
1249 void drft_clear(drft_lookup *l){
1251 if(l->trigcache)_ogg_free(l->trigcache);
1252 if(l->splitcache)_ogg_free(l->splitcache);
1253 memset(l,0,sizeof(*l));