1 /* Copyright Jean-Marc Valin 2001 */
2 /* This software is released under the version 2.1 of the GNU Lesser General*/
3
4
5 /*Example: for an (out-of-place) 256 transform:
6 COMPLEX in[256];
7 COMPLEX out[256];
8 COMPLEX w[256];
9 int bits[256];
10 fft_initCosSinTables(w, bits, 8);
11 void fft(in, out, 8, w, bits);
12
13 Note: fft_initCosSinTables only need to be called once for each size
14 8 is the order of the FFT (2^8=256)
15 */
16
17 #include <math.h>
18
19
20 typedef struct COMPLEX {
21 float re;
22 float im;
23 } COMPLEX;
24
25 void fft_initCosSinTables(COMPLEX *w, int *bits, int M)
26 {
27 int i,j;
28 int tmp;
29 int size = 1 << M;
30 for (i=0;i<size;i++)
31 {
32 bits[i]=0;
33 tmp=i;
34 for (j=0;j<M;j++)
35 {
36 bits[i] <<= 1;
37 bits[i] += tmp&1;
38 tmp>>=1;
39 }
40 }
41
42 while (size)
43 {
44 int k;
45 float tmp, p = (2.0 * M_PI) / size;
46 COMPLEX tmp2;
47 for (k = 0; k < (size>>1); k++) {
48 tmp = k * p;
49 tmp2.re=cos(tmp);
50 tmp2.im=-sin(tmp);
51 *w++ = tmp2;
52 }
53 size >>= 1;
54 }
55 }
56 inline void bitrev(COMPLEX *in, COMPLEX *out, int *bits, int M)
57 {
58 int dummy;
59 int size = 1 << M;
60
61 __asm__ __volatile__ (
62 "
63 push %0
64 push %2
65 push %3
66 .loop%=:
67 push %0
68
69
70 movl (%3), %4
71 add $4, %3
72 movl (%3), %0
73 add $4, %3
74 movq (%1,%4,8), %%mm0
75 movq (%1,%0,8), %%mm1
76
77 movl (%3), %4
78 add $4, %3
79 movl (%3), %0
80 add $4, %3
81 movq (%1,%4,8), %%mm2
82 movq (%1,%0,8), %%mm3
83
84
85 movl (%3), %4
86 add $4, %3
87 movl (%3), %0
88 add $4, %3
89 movq (%1,%4,8), %%mm4
90 movq (%1,%0,8), %%mm5
91
92 movl (%3), %4
93 add $4, %3
94 movl (%3), %0
95 add $4, %3
96 movq (%1,%4,8), %%mm6
97 movq (%1,%0,8), %%mm7
98
99 movq %%mm0, (%2)
100 add $8, %2
101 movq %%mm1, (%2)
102 add $8, %2
103 movq %%mm2, (%2)
104 add $8, %2
105 movq %%mm3, (%2)
106 add $8, %2
107
108
109 movq %%mm4, (%2)
110 add $8, %2
111 movq %%mm5, (%2)
112 add $8, %2
113 movq %%mm6, (%2)
114 add $8, %2
115 movq %%mm7, (%2)
116 add $8, %2
117
118
119 pop %0
120
121 dec %0
122 jne .loop%=
123 femms
124 pop %3
125 pop %2
126 pop %0
127 " : : "q" (size>>3), "r" (in), "r" (out), "r" (bits), "q" (dummy)
128 : "memory", "st", "st(1)", "st(2)", "st(3)", "st(4)", "st(5)", "st(6)", "st(7)");
129
130 }
131
132 inline void recurs_fft(COMPLEX *x, int M, COMPLEX *w, int repeat)
133 {
134 int dummy1, dummy2, dummy3, dummy4;
135 int N = 1 << M;
136 int N2 = N >> 1;
137 int mask = N-1;
138 if (M==1)
139 return;
140 if (M>11)
141 {
142 recurs_fft(x, M-1, w+N2, repeat);
143 recurs_fft(x+N2, M-1, w+N2, repeat);
144 } else {
145 recurs_fft(x, M-1, w+N2, repeat<<1);
146 }
147 if (M>2)
148 {
149 while (repeat--)
150 {
151 __asm__ __volatile__ (
152 "
153 .loop%=:
154 movq (%0), %%mm0
155 movq 8(%0), %%mm4
156 pswapd %%mm0, %%mm2
157 pswapd %%mm4, %%mm6
158 movq (%1), %%mm1
159 movq 8(%1), %%mm5
160 pfmul %%mm1, %%mm0
161 pfmul %%mm1, %%mm2
162 pfmul %%mm5, %%mm4
163 pfmul %%mm5, %%mm6
164 pfpnacc %%mm2, %%mm0
165 pfpnacc %%mm6, %%mm4
166 movq (%2), %%mm3
167 movq 8(%2), %%mm7
168 movq %%mm3, %%mm1
169 movq %%mm7, %%mm5
170 pfsub %%mm0, %%mm3
171 pfadd %%mm0, %%mm1
172 pfsub %%mm4, %%mm7
173 pfadd %%mm4, %%mm5
174 movq %%mm1, (%2)
175 movq %%mm5, 8(%2)
176 add $16, %1
177 add $16, %2
178 movq %%mm3, (%0)
179 movq %%mm7, 8(%0)
180 add $16, %0
181 dec %3
182 jne .loop%=
183 "
184 : "=r" (dummy1), "=r" (dummy2), "=r" (dummy3), "=q" (dummy4) : "0" (x+N2), "1" (w), "2" (x), "3" (N2>>1)
185 : "memory", "st", "st(1)", "st(2)", "st(3)", "st(4)", "st(5)", "st(6)", "st(7)");
186
187 x+=N;
188 }
189 } else {
190 int mask[2]={0x80000000, 0x00000000};
191 __asm__ __volatile__ (
192 "
193 push %0
194 push %1
195 movq (%2), %%mm7
196 .loop%=:
197 movq (%0), %%mm0
198 movq 8(%0), %%mm1
199
200 movq %%mm0, %%mm4
201 pfadd %%mm1, %%mm0
202
203 movq 16(%0), %%mm2
204 movq 24(%0), %%mm3
205
206 movq %%mm2, %%mm5
207 pfadd %%mm3, %%mm2
208
209 pfsub %%mm1, %%mm4
210 pfsub %%mm3, %%mm5
211
212 movq %%mm0, %%mm1
213 pfsub %%mm2, %%mm0
214 pfadd %%mm2, %%mm1
215 pswapd %%mm5, %%mm5
216 movq %%mm0, 16(%0)
217 movq %%mm1, (%0)
218
219 pxor %%mm7, %%mm5
220 movq %%mm4, %%mm0
221 pfadd %%mm5, %%mm0
222 pfsub %%mm5, %%mm4
223
224 movq %%mm0, 24(%0)
225 movq %%mm4, 8(%0)
226
227 add $32, %0
228
229 dec %1
230 jne .loop%=
231 pop %1
232 pop %0
233 "
234 : : "r" (x), "q" (repeat), "r" (mask)
235 : "memory", "st", "st(1)", "st(2)", "st(3)", "st(4)", "st(5)", "st(6)", "st(7)");
236
237 }
238 __asm__ __volatile__ ("femms"
239 : :
240 : "memory", "st", "st(1)", "st(2)", "st(3)", "st(4)", "st(5)", "st(6)", "st(7)");
241
242 }
243
244
245 void fft(COMPLEX *in, COMPLEX *out, int M, COMPLEX *w, int *bits)
246 {
247 bitrev(in, out, bits, M);
248 recurs_fft(out, M, w, 1);
249 }
250