Project Ne10
An Open Optimized Software Library Project for the ARM Architecture
Loading...
Searching...
No Matches
NE10_fft_int32.c
1/*
2 * Copyright 2013-15 ARM Limited and Contributors.
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions are met:
7 * * Redistributions of source code must retain the above copyright
8 * notice, this list of conditions and the following disclaimer.
9 * * Redistributions in binary form must reproduce the above copyright
10 * notice, this list of conditions and the following disclaimer in the
11 * documentation and/or other materials provided with the distribution.
12 * * Neither the name of ARM Limited nor the
13 * names of its contributors may be used to endorse or promote products
14 * derived from this software without specific prior written permission.
15 *
16 * THIS SOFTWARE IS PROVIDED BY ARM LIMITED AND CONTRIBUTORS "AS IS" AND
17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
18 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
19 * DISCLAIMED. IN NO EVENT SHALL ARM LIMITED AND CONTRIBUTORS BE LIABLE FOR ANY
20 * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
21 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
22 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
23 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
25 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26 */
27
28/* license of Kiss FFT */
29/*
30Copyright (c) 2003-2010, Mark Borgerding
31
32All rights reserved.
33
34Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met:
35
36 * Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer.
37 * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution.
38 * Neither the author nor the names of any contributors may be used to endorse or promote products derived from this software without specific prior written permission.
39
40THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
41*/
42
43/*
44 * NE10 Library : dsp/NE10_fft_int32.c
45 */
46
47#include "NE10_types.h"
48#include "NE10_macros.h"
49#include "NE10_fft.h"
50
51
52static void ne10_mixed_radix_butterfly_int32_c (ne10_fft_cpx_int32_t * Fout,
54 ne10_int32_t * factors,
55 ne10_fft_cpx_int32_t * twiddles,
56 ne10_fft_cpx_int32_t * buffer,
57 ne10_int32_t scaled_flag)
58{
59 ne10_int32_t fstride, mstride, N;
60 ne10_int32_t fstride1;
61 ne10_int32_t f_count, m_count;
62 ne10_int32_t stage_count;
63
64 ne10_fft_cpx_int32_t scratch_in[8];
65 ne10_fft_cpx_int32_t scratch_out[8];
66 ne10_fft_cpx_int32_t scratch[16];
67 ne10_fft_cpx_int32_t scratch_tw[6];
68
69 ne10_fft_cpx_int32_t *Fin1, *Fin2, *Fout1, *Fout2;
70 ne10_fft_cpx_int32_t *Fout_ls = Fout;
72 ne10_fft_cpx_int32_t *tw, *tw1, *tw2;
73 const ne10_int32_t TW_81 = 1518500249;
74 const ne10_int32_t TW_81N = -1518500249;
75
76
77
78 // init fstride, mstride, N, tw
79 stage_count = factors[0];
80 fstride = factors[1];
81 mstride = factors[ (stage_count << 1) - 1 ];
82 N = factors[ stage_count << 1 ]; // radix
83 tw = twiddles;
84
85 // the first stage
86 Fin1 = Fin;
87 Fout1 = Fout;
88 if (N == 2) // length of FFT is 2^n (n is odd)
89 {
90 // radix 8
91 N = fstride >> 1; // 1/4 of length of FFT
92 fstride1 = fstride >> 2;
93
94 Fin1 = Fin;
95 for (f_count = 0; f_count < fstride1; f_count ++)
96 {
97 Fout1 = & Fout[ f_count * 8 ];
98 // load
99 if (scaled_flag == 1)
100 {
101 NE10_F2I32_FIXDIV (Fin1[0], 8);
102 NE10_F2I32_FIXDIV (Fin1[0 + fstride], 8);
103 NE10_F2I32_FIXDIV (Fin1[fstride1], 8);
104 NE10_F2I32_FIXDIV (Fin1[fstride1 + fstride], 8);
105 NE10_F2I32_FIXDIV (Fin1[fstride1 * 2], 8);
106 NE10_F2I32_FIXDIV (Fin1[fstride1 * 2 + fstride], 8);
107 NE10_F2I32_FIXDIV (Fin1[fstride1 * 3], 8);
108 NE10_F2I32_FIXDIV (Fin1[fstride1 * 3 + fstride], 8);
109 }
110 scratch_in[0].r = Fin1[0].r + Fin1[0 + fstride].r;
111 scratch_in[0].i = Fin1[0].i + Fin1[0 + fstride].i;
112 scratch_in[1].r = Fin1[0].r - Fin1[0 + fstride].r;
113 scratch_in[1].i = Fin1[0].i - Fin1[0 + fstride].i;
114 scratch_in[2].r = Fin1[fstride1].r + Fin1[fstride1 + fstride].r;
115 scratch_in[2].i = Fin1[fstride1].i + Fin1[fstride1 + fstride].i;
116 scratch_in[3].r = Fin1[fstride1].r - Fin1[fstride1 + fstride].r;
117 scratch_in[3].i = Fin1[fstride1].i - Fin1[fstride1 + fstride].i;
118 scratch_in[4].r = Fin1[fstride1 * 2].r + Fin1[fstride1 * 2 + fstride].r;
119 scratch_in[4].i = Fin1[fstride1 * 2].i + Fin1[fstride1 * 2 + fstride].i;
120 scratch_in[5].r = Fin1[fstride1 * 2].r - Fin1[fstride1 * 2 + fstride].r;
121 scratch_in[5].i = Fin1[fstride1 * 2].i - Fin1[fstride1 * 2 + fstride].i;
122 scratch_in[6].r = Fin1[fstride1 * 3].r + Fin1[fstride1 * 3 + fstride].r;
123 scratch_in[6].i = Fin1[fstride1 * 3].i + Fin1[fstride1 * 3 + fstride].i;
124 scratch_in[7].r = Fin1[fstride1 * 3].r - Fin1[fstride1 * 3 + fstride].r;
125 scratch_in[7].i = Fin1[fstride1 * 3].i - Fin1[fstride1 * 3 + fstride].i;
126
127 // radix 4 butterfly without twiddles
128 scratch[0] = scratch_in[0];
129 scratch[1] = scratch_in[1];
130
131 scratch[2] = scratch_in[2];
132 scratch[3].r = (ne10_int32_t) ( ( (NE10_F2I32_SAMPPROD) (scratch_in[3].r + scratch_in[3].i) * TW_81) >> 31);
133 scratch[3].i = (ne10_int32_t) ( ( (NE10_F2I32_SAMPPROD) (scratch_in[3].i - scratch_in[3].r) * TW_81) >> 31);
134
135 scratch[4] = scratch_in[4];
136 scratch[5].r = scratch_in[5].i;
137 scratch[5].i = -scratch_in[5].r;
138
139 scratch[6].r = scratch_in[6].r;
140 scratch[6].i = scratch_in[6].i;
141 scratch[7].r = (ne10_int32_t) ( ( (NE10_F2I32_SAMPPROD) (scratch_in[7].r - scratch_in[7].i) * TW_81N) >> 31);
142 scratch[7].i = (ne10_int32_t) ( ( (NE10_F2I32_SAMPPROD) (scratch_in[7].i + scratch_in[7].r) * TW_81N) >> 31);
143
144 // radix 2 butterfly
145 scratch[8].r = scratch[0].r + scratch[4].r;
146 scratch[8].i = scratch[0].i + scratch[4].i;
147 scratch[9].r = scratch[1].r + scratch[5].r;
148 scratch[9].i = scratch[1].i + scratch[5].i;
149
150 scratch[10].r = scratch[0].r - scratch[4].r;
151 scratch[10].i = scratch[0].i - scratch[4].i;
152 scratch[11].r = scratch[1].r - scratch[5].r;
153 scratch[11].i = scratch[1].i - scratch[5].i;
154
155 // radix 2 butterfly
156 scratch[12].r = scratch[2].r + scratch[6].r;
157 scratch[12].i = scratch[2].i + scratch[6].i;
158 scratch[13].r = scratch[3].r + scratch[7].r;
159 scratch[13].i = scratch[3].i + scratch[7].i;
160
161 scratch[14].r = scratch[2].r - scratch[6].r;
162 scratch[14].i = scratch[2].i - scratch[6].i;
163 scratch[15].r = scratch[3].r - scratch[7].r;
164 scratch[15].i = scratch[3].i - scratch[7].i;
165
166 // third result
167 scratch_out[4].r = scratch[8].r - scratch[12].r;
168 scratch_out[4].i = scratch[8].i - scratch[12].i;
169 scratch_out[5].r = scratch[9].r - scratch[13].r;
170 scratch_out[5].i = scratch[9].i - scratch[13].i;
171
172 // first result
173 scratch_out[0].r = scratch[8].r + scratch[12].r;
174 scratch_out[0].i = scratch[8].i + scratch[12].i;
175 scratch_out[1].r = scratch[9].r + scratch[13].r;
176 scratch_out[1].i = scratch[9].i + scratch[13].i;
177
178 // second result
179 scratch_out[2].r = scratch[10].r + scratch[14].i;
180 scratch_out[2].i = scratch[10].i - scratch[14].r;
181 scratch_out[3].r = scratch[11].r + scratch[15].i;
182 scratch_out[3].i = scratch[11].i - scratch[15].r;
183
184 // forth result
185 scratch_out[6].r = scratch[10].r - scratch[14].i;
186 scratch_out[6].i = scratch[10].i + scratch[14].r;
187 scratch_out[7].r = scratch[11].r - scratch[15].i;
188 scratch_out[7].i = scratch[11].i + scratch[15].r;
189
190 // store
191 Fout1[0] = scratch_out[0];
192 Fout1[1] = scratch_out[1];
193 Fout1[2] = scratch_out[2];
194 Fout1[3] = scratch_out[3];
195 Fout1[4] = scratch_out[4];
196 Fout1[5] = scratch_out[5];
197 Fout1[6] = scratch_out[6];
198 Fout1[7] = scratch_out[7];
199
200 Fin1 += 1;
201 } // f_count
202 tw += 6;
203 mstride <<= 2;
204 fstride >>= 4;
205 stage_count -= 2;
206
207 // swap
208 Ftmp = buffer;
209 buffer = Fout;
210 Fout = Ftmp;
211 }
212 else if (N == 4) // length of FFT is 2^n (n is even)
213 {
214 //fstride is nfft>>2
215 for (f_count = fstride; f_count ; f_count --)
216 {
217 // load
218 scratch_in[0] = *Fin1;
219 Fin2 = Fin1 + fstride;
220 scratch_in[1] = *Fin2;
221 Fin2 = Fin2 + fstride;
222 scratch_in[2] = *Fin2;
223 Fin2 = Fin2 + fstride;
224 scratch_in[3] = *Fin2;
225
226 // radix 4 butterfly without twiddles
227 if (scaled_flag == 1)
228 {
229 NE10_F2I32_FIXDIV (scratch_in[0], 4);
230 NE10_F2I32_FIXDIV (scratch_in[1], 4);
231 NE10_F2I32_FIXDIV (scratch_in[2], 4);
232 NE10_F2I32_FIXDIV (scratch_in[3], 4);
233 }
234
235 // radix 2 butterfly
236 scratch[0].r = scratch_in[0].r + scratch_in[2].r;
237 scratch[0].i = scratch_in[0].i + scratch_in[2].i;
238
239 scratch[1].r = scratch_in[0].r - scratch_in[2].r;
240 scratch[1].i = scratch_in[0].i - scratch_in[2].i;
241
242 // radix 2 butterfly
243 scratch[2].r = scratch_in[1].r + scratch_in[3].r;
244 scratch[2].i = scratch_in[1].i + scratch_in[3].i;
245
246 scratch[3].r = scratch_in[1].r - scratch_in[3].r;
247 scratch[3].i = scratch_in[1].i - scratch_in[3].i;
248
249 // third result
250 scratch_out[2].r = scratch[0].r - scratch[2].r;
251 scratch_out[2].i = scratch[0].i - scratch[2].i;
252
253 // first result
254 scratch_out[0].r = scratch[0].r + scratch[2].r;
255 scratch_out[0].i = scratch[0].i + scratch[2].i;
256
257 // second result
258 scratch_out[1].r = scratch[1].r + scratch[3].i;
259 scratch_out[1].i = scratch[1].i - scratch[3].r;
260
261 // forth result
262 scratch_out[3].r = scratch[1].r - scratch[3].i;
263 scratch_out[3].i = scratch[1].i + scratch[3].r;
264
265 // store
266 * Fout1 ++ = scratch_out[0];
267 * Fout1 ++ = scratch_out[1];
268 * Fout1 ++ = scratch_out[2];
269 * Fout1 ++ = scratch_out[3];
270
271 Fin1++;
272 } // f_count
273
274 N = fstride; // 1/4 of length of FFT
275
276 // swap
277 Ftmp = buffer;
278 buffer = Fout;
279 Fout = Ftmp;
280
281 // update address for other stages
282 stage_count--;
283 fstride >>= 2;
284 // end of first stage
285 }
286
287
288 // others but the last one
289 for (; stage_count > 1 ; stage_count--)
290 {
291 Fin1 = buffer;
292 for (f_count = 0; f_count < fstride; f_count ++)
293 {
294 Fout1 = & Fout[ f_count * mstride << 2 ];
295 tw1 = tw;
296 for (m_count = mstride; m_count ; m_count --)
297 {
298 // load
299 scratch_tw[0] = *tw1;
300 tw2 = tw1 + mstride;
301 scratch_tw[1] = *tw2;
302 tw2 += mstride;
303 scratch_tw[2] = *tw2;
304 scratch_in[0] = * Fin1;
305 Fin2 = Fin1 + N;
306 scratch_in[1] = * Fin2;
307 Fin2 += N;
308 scratch_in[2] = * Fin2;
309 Fin2 += N;
310 scratch_in[3] = * Fin2;
311 if (scaled_flag == 1)
312 {
313 NE10_F2I32_FIXDIV (scratch_in[0], 4);
314 NE10_F2I32_FIXDIV (scratch_in[1], 4);
315 NE10_F2I32_FIXDIV (scratch_in[2], 4);
316 NE10_F2I32_FIXDIV (scratch_in[3], 4);
317 }
318
319 // radix 4 butterfly with twiddles
320
321 scratch[0] = scratch_in[0];
322 scratch[1].r = (ne10_int32_t) ( ( (NE10_F2I32_SAMPPROD) scratch_in[1].r * scratch_tw[0].r
323 - (NE10_F2I32_SAMPPROD) scratch_in[1].i * scratch_tw[0].i) >> 31);
324 scratch[1].i = (ne10_int32_t) ( ( (NE10_F2I32_SAMPPROD) scratch_in[1].i * scratch_tw[0].r
325 + (NE10_F2I32_SAMPPROD) scratch_in[1].r * scratch_tw[0].i) >> 31);
326
327 scratch[2].r = (ne10_int32_t) ( ( (NE10_F2I32_SAMPPROD) scratch_in[2].r * scratch_tw[1].r
328 - (NE10_F2I32_SAMPPROD) scratch_in[2].i * scratch_tw[1].i) >> 31);
329 scratch[2].i = (ne10_int32_t) ( ( (NE10_F2I32_SAMPPROD) scratch_in[2].i * scratch_tw[1].r
330 + (NE10_F2I32_SAMPPROD) scratch_in[2].r * scratch_tw[1].i) >> 31);
331
332 scratch[3].r = (ne10_int32_t) ( ( (NE10_F2I32_SAMPPROD) scratch_in[3].r * scratch_tw[2].r
333 - (NE10_F2I32_SAMPPROD) scratch_in[3].i * scratch_tw[2].i) >> 31);
334 scratch[3].i = (ne10_int32_t) ( ( (NE10_F2I32_SAMPPROD) scratch_in[3].i * scratch_tw[2].r
335 + (NE10_F2I32_SAMPPROD) scratch_in[3].r * scratch_tw[2].i) >> 31);
336
337 // radix 2 butterfly
338 scratch[4].r = scratch[0].r + scratch[2].r;
339 scratch[4].i = scratch[0].i + scratch[2].i;
340
341 scratch[5].r = scratch[0].r - scratch[2].r;
342 scratch[5].i = scratch[0].i - scratch[2].i;
343
344 // radix 2 butterfly
345 scratch[6].r = scratch[1].r + scratch[3].r;
346 scratch[6].i = scratch[1].i + scratch[3].i;
347
348 scratch[7].r = scratch[1].r - scratch[3].r;
349 scratch[7].i = scratch[1].i - scratch[3].i;
350
351 // third result
352 scratch_out[2].r = scratch[4].r - scratch[6].r;
353 scratch_out[2].i = scratch[4].i - scratch[6].i;
354
355 // first result
356 scratch_out[0].r = scratch[4].r + scratch[6].r;
357 scratch_out[0].i = scratch[4].i + scratch[6].i;
358
359 // second result
360 scratch_out[1].r = scratch[5].r + scratch[7].i;
361 scratch_out[1].i = scratch[5].i - scratch[7].r;
362
363 // forth result
364 scratch_out[3].r = scratch[5].r - scratch[7].i;
365 scratch_out[3].i = scratch[5].i + scratch[7].r;
366
367 // store
368 *Fout1 = scratch_out[0];
369 Fout2 = Fout1 + mstride;
370 *Fout2 = scratch_out[1];
371 Fout2 += mstride;
372 *Fout2 = scratch_out[2];
373 Fout2 += mstride;
374 *Fout2 = scratch_out[3];
375
376 tw1++;
377 Fin1 ++;
378 Fout1 ++;
379 } // m_count
380 } // f_count
381 tw += mstride * 3;
382 mstride <<= 2;
383 // swap
384 Ftmp = buffer;
385 buffer = Fout;
386 Fout = Ftmp;
387 fstride >>= 2;
388 } // stage_count
389
390 // the last one
391 if (stage_count)
392 {
393 Fin1 = buffer;
394 // if stage count is even, output to the input array
395 Fout1 = Fout_ls;
396
397 for (f_count = 0; f_count < fstride; f_count ++)
398 {
399 tw1 = tw;
400 for (m_count = mstride; m_count ; m_count --)
401 {
402 // load
403 scratch_tw[0] = *tw1;
404 tw2 = tw1 + mstride;
405 scratch_tw[1] = *tw2;
406 tw2 += mstride;
407 scratch_tw[2] = *tw2;
408 scratch_in[0] = * Fin1;
409 Fin2 = Fin1 + N;
410 scratch_in[1] = * Fin2;
411 Fin2 += N;
412 scratch_in[2] = * Fin2;
413 Fin2 += N;
414 scratch_in[3] = * Fin2;
415 if (scaled_flag == 1)
416 {
417 NE10_F2I32_FIXDIV (scratch_in[0], 4);
418 NE10_F2I32_FIXDIV (scratch_in[1], 4);
419 NE10_F2I32_FIXDIV (scratch_in[2], 4);
420 NE10_F2I32_FIXDIV (scratch_in[3], 4);
421 }
422
423 // radix 4 butterfly with twiddles
424
425 scratch[0] = scratch_in[0];
426 scratch[1].r = (ne10_int32_t) ( ( (NE10_F2I32_SAMPPROD) scratch_in[1].r * scratch_tw[0].r
427 - (NE10_F2I32_SAMPPROD) scratch_in[1].i * scratch_tw[0].i) >> 31);
428 scratch[1].i = (ne10_int32_t) ( ( (NE10_F2I32_SAMPPROD) scratch_in[1].i * scratch_tw[0].r
429 + (NE10_F2I32_SAMPPROD) scratch_in[1].r * scratch_tw[0].i) >> 31);
430
431 scratch[2].r = (ne10_int32_t) ( ( (NE10_F2I32_SAMPPROD) scratch_in[2].r * scratch_tw[1].r
432 - (NE10_F2I32_SAMPPROD) scratch_in[2].i * scratch_tw[1].i) >> 31);
433 scratch[2].i = (ne10_int32_t) ( ( (NE10_F2I32_SAMPPROD) scratch_in[2].i * scratch_tw[1].r
434 + (NE10_F2I32_SAMPPROD) scratch_in[2].r * scratch_tw[1].i) >> 31);
435
436 scratch[3].r = (ne10_int32_t) ( ( (NE10_F2I32_SAMPPROD) scratch_in[3].r * scratch_tw[2].r
437 - (NE10_F2I32_SAMPPROD) scratch_in[3].i * scratch_tw[2].i) >> 31);
438 scratch[3].i = (ne10_int32_t) ( ( (NE10_F2I32_SAMPPROD) scratch_in[3].i * scratch_tw[2].r
439 + (NE10_F2I32_SAMPPROD) scratch_in[3].r * scratch_tw[2].i) >> 31);
440
441 // radix 2 butterfly
442 scratch[4].r = scratch[0].r + scratch[2].r;
443 scratch[4].i = scratch[0].i + scratch[2].i;
444
445 scratch[5].r = scratch[0].r - scratch[2].r;
446 scratch[5].i = scratch[0].i - scratch[2].i;
447
448 // radix 2 butterfly
449 scratch[6].r = scratch[1].r + scratch[3].r;
450 scratch[6].i = scratch[1].i + scratch[3].i;
451
452 scratch[7].r = scratch[1].r - scratch[3].r;
453 scratch[7].i = scratch[1].i - scratch[3].i;
454
455 // third result
456 scratch_out[2].r = scratch[4].r - scratch[6].r;
457 scratch_out[2].i = scratch[4].i - scratch[6].i;
458
459 // first result
460 scratch_out[0].r = scratch[4].r + scratch[6].r;
461 scratch_out[0].i = scratch[4].i + scratch[6].i;
462
463 // second result
464 scratch_out[1].r = scratch[5].r + scratch[7].i;
465 scratch_out[1].i = scratch[5].i - scratch[7].r;
466
467 // forth result
468 scratch_out[3].r = scratch[5].r - scratch[7].i;
469 scratch_out[3].i = scratch[5].i + scratch[7].r;
470
471 // store
472 *Fout1 = scratch_out[0];
473 Fout2 = Fout1 + N;
474 *Fout2 = scratch_out[1];
475 Fout2 += N;
476 *Fout2 = scratch_out[2];
477 Fout2 += N;
478 *Fout2 = scratch_out[3];
479
480 tw1 ++;
481 Fin1 ++;
482 Fout1 ++;
483 } // m_count
484 } // f_count
485 } // last stage
486}
487
488static void ne10_mixed_radix_butterfly_inverse_int32_c (ne10_fft_cpx_int32_t * Fout,
490 ne10_int32_t * factors,
491 ne10_fft_cpx_int32_t * twiddles,
492 ne10_fft_cpx_int32_t * buffer,
493 ne10_int32_t scaled_flag)
494{
495 ne10_int32_t fstride, mstride, N;
496 ne10_int32_t fstride1;
497 ne10_int32_t f_count, m_count;
498 ne10_int32_t stage_count;
499
500 ne10_fft_cpx_int32_t scratch_in[8];
501 ne10_fft_cpx_int32_t scratch_out[8];
502 ne10_fft_cpx_int32_t scratch[16];
503 ne10_fft_cpx_int32_t scratch_tw[6];
504
505 ne10_fft_cpx_int32_t *Fin1, *Fin2, *Fout1, *Fout2;
506 ne10_fft_cpx_int32_t *Fout_ls = Fout;
508 ne10_fft_cpx_int32_t *tw, *tw1, *tw2;
509 const ne10_int32_t TW_81 = 1518500249;
510 const ne10_int32_t TW_81N = -1518500249;
511
512 // init fstride, mstride, N
513 stage_count = factors[0];
514 fstride = factors[1];
515 mstride = factors[ (stage_count << 1) - 1 ];
516 N = factors[ stage_count << 1 ]; // radix
517 tw = twiddles;
518
519 // the first stage
520 Fin1 = Fin;
521 Fout1 = Fout;
522 if (N == 2) // length of FFT is 2^n (n is odd)
523 {
524 // radix 8
525 N = fstride >> 1; // 1/4 of length of FFT
526 fstride1 = fstride >> 2;
527
528 Fin1 = Fin;
529 for (f_count = 0; f_count < fstride1; f_count ++)
530 {
531 Fout1 = & Fout[ f_count * 8 ];
532 // load
533 if (scaled_flag == 1)
534 {
535 NE10_F2I32_FIXDIV (Fin1[0], 8);
536 NE10_F2I32_FIXDIV (Fin1[0 + fstride], 8);
537 NE10_F2I32_FIXDIV (Fin1[fstride1], 8);
538 NE10_F2I32_FIXDIV (Fin1[fstride1 + fstride], 8);
539 NE10_F2I32_FIXDIV (Fin1[fstride1 * 2], 8);
540 NE10_F2I32_FIXDIV (Fin1[fstride1 * 2 + fstride], 8);
541 NE10_F2I32_FIXDIV (Fin1[fstride1 * 3], 8);
542 NE10_F2I32_FIXDIV (Fin1[fstride1 * 3 + fstride], 8);
543 }
544
545 scratch_in[0].r = Fin1[0].r + Fin1[0 + fstride].r;
546 scratch_in[0].i = Fin1[0].i + Fin1[0 + fstride].i;
547 scratch_in[1].r = Fin1[0].r - Fin1[0 + fstride].r;
548 scratch_in[1].i = Fin1[0].i - Fin1[0 + fstride].i;
549 scratch_in[2].r = Fin1[fstride1].r + Fin1[fstride1 + fstride].r;
550 scratch_in[2].i = Fin1[fstride1].i + Fin1[fstride1 + fstride].i;
551 scratch_in[3].r = Fin1[fstride1].r - Fin1[fstride1 + fstride].r;
552 scratch_in[3].i = Fin1[fstride1].i - Fin1[fstride1 + fstride].i;
553 scratch_in[4].r = Fin1[fstride1 * 2].r + Fin1[fstride1 * 2 + fstride].r;
554 scratch_in[4].i = Fin1[fstride1 * 2].i + Fin1[fstride1 * 2 + fstride].i;
555 scratch_in[5].r = Fin1[fstride1 * 2].r - Fin1[fstride1 * 2 + fstride].r;
556 scratch_in[5].i = Fin1[fstride1 * 2].i - Fin1[fstride1 * 2 + fstride].i;
557 scratch_in[6].r = Fin1[fstride1 * 3].r + Fin1[fstride1 * 3 + fstride].r;
558 scratch_in[6].i = Fin1[fstride1 * 3].i + Fin1[fstride1 * 3 + fstride].i;
559 scratch_in[7].r = Fin1[fstride1 * 3].r - Fin1[fstride1 * 3 + fstride].r;
560 scratch_in[7].i = Fin1[fstride1 * 3].i - Fin1[fstride1 * 3 + fstride].i;
561
562 // radix 4 butterfly with twiddles
563
564 scratch[0] = scratch_in[0];
565 scratch[1] = scratch_in[1];
566
567 scratch[2] = scratch_in[2];
568 scratch[3].r = (ne10_int32_t) ( ( (NE10_F2I32_SAMPPROD) (scratch_in[3].r - scratch_in[3].i) * TW_81) >> 31);
569 scratch[3].i = (ne10_int32_t) ( ( (NE10_F2I32_SAMPPROD) (scratch_in[3].i + scratch_in[3].r) * TW_81) >> 31);
570
571 scratch[4] = scratch_in[4];
572 scratch[5].r = -scratch_in[5].i;
573 scratch[5].i = scratch_in[5].r;
574
575 scratch[6].r = scratch_in[6].r;
576 scratch[6].i = scratch_in[6].i;
577 scratch[7].r = (ne10_int32_t) ( ( (NE10_F2I32_SAMPPROD) (scratch_in[7].r + scratch_in[7].i) * TW_81N) >> 31);
578 scratch[7].i = (ne10_int32_t) ( ( (NE10_F2I32_SAMPPROD) (scratch_in[7].i - scratch_in[7].r) * TW_81N) >> 31);
579
580 // radix 2 butterfly
581 scratch[8].r = scratch[0].r + scratch[4].r;
582 scratch[8].i = scratch[0].i + scratch[4].i;
583 scratch[9].r = scratch[1].r + scratch[5].r;
584 scratch[9].i = scratch[1].i + scratch[5].i;
585
586 scratch[10].r = scratch[0].r - scratch[4].r;
587 scratch[10].i = scratch[0].i - scratch[4].i;
588 scratch[11].r = scratch[1].r - scratch[5].r;
589 scratch[11].i = scratch[1].i - scratch[5].i;
590
591 // radix 2 butterfly
592 scratch[12].r = scratch[2].r + scratch[6].r;
593 scratch[12].i = scratch[2].i + scratch[6].i;
594 scratch[13].r = scratch[3].r + scratch[7].r;
595 scratch[13].i = scratch[3].i + scratch[7].i;
596
597 scratch[14].r = scratch[2].r - scratch[6].r;
598 scratch[14].i = scratch[2].i - scratch[6].i;
599 scratch[15].r = scratch[3].r - scratch[7].r;
600 scratch[15].i = scratch[3].i - scratch[7].i;
601
602 // third result
603 scratch_out[4].r = scratch[8].r - scratch[12].r;
604 scratch_out[4].i = scratch[8].i - scratch[12].i;
605 scratch_out[5].r = scratch[9].r - scratch[13].r;
606 scratch_out[5].i = scratch[9].i - scratch[13].i;
607
608 // first result
609 scratch_out[0].r = scratch[8].r + scratch[12].r;
610 scratch_out[0].i = scratch[8].i + scratch[12].i;
611 scratch_out[1].r = scratch[9].r + scratch[13].r;
612 scratch_out[1].i = scratch[9].i + scratch[13].i;
613
614 // second result
615 scratch_out[2].r = scratch[10].r - scratch[14].i;
616 scratch_out[2].i = scratch[10].i + scratch[14].r;
617 scratch_out[3].r = scratch[11].r - scratch[15].i;
618 scratch_out[3].i = scratch[11].i + scratch[15].r;
619
620 // forth result
621 scratch_out[6].r = scratch[10].r + scratch[14].i;
622 scratch_out[6].i = scratch[10].i - scratch[14].r;
623 scratch_out[7].r = scratch[11].r + scratch[15].i;
624 scratch_out[7].i = scratch[11].i - scratch[15].r;
625
626 // store
627 Fout1[0] = scratch_out[0];
628 Fout1[1] = scratch_out[1];
629 Fout1[2] = scratch_out[2];
630 Fout1[3] = scratch_out[3];
631 Fout1[4] = scratch_out[4];
632 Fout1[5] = scratch_out[5];
633 Fout1[6] = scratch_out[6];
634 Fout1[7] = scratch_out[7];
635
636 Fin1 += 1;
637 } // f_count
638 tw += 6;
639 mstride <<= 2;
640 fstride >>= 4;
641 stage_count -= 2;
642
643 // swap
644 Ftmp = buffer;
645 buffer = Fout;
646 Fout = Ftmp;
647 }
648 else if (N == 4) // length of FFT is 2^n (n is even)
649 {
650 //fstride is nfft>>2
651 for (f_count = fstride; f_count ; f_count --)
652 {
653 // load
654 scratch_in[0] = *Fin1;
655 Fin2 = Fin1 + fstride;
656 scratch_in[1] = *Fin2;
657 Fin2 = Fin2 + fstride;
658 scratch_in[2] = *Fin2;
659 Fin2 = Fin2 + fstride;
660 scratch_in[3] = *Fin2;
661
662 // radix 4 butterfly without twiddles
663 if (scaled_flag == 1)
664 {
665 NE10_F2I32_FIXDIV (scratch_in[0], 4);
666 NE10_F2I32_FIXDIV (scratch_in[1], 4);
667 NE10_F2I32_FIXDIV (scratch_in[2], 4);
668 NE10_F2I32_FIXDIV (scratch_in[3], 4);
669 }
670
671 // radix 2 butterfly
672 scratch[0].r = scratch_in[0].r + scratch_in[2].r;
673 scratch[0].i = scratch_in[0].i + scratch_in[2].i;
674
675 scratch[1].r = scratch_in[0].r - scratch_in[2].r;
676 scratch[1].i = scratch_in[0].i - scratch_in[2].i;
677
678 // radix 2 butterfly
679 scratch[2].r = scratch_in[1].r + scratch_in[3].r;
680 scratch[2].i = scratch_in[1].i + scratch_in[3].i;
681
682 scratch[3].r = scratch_in[1].r - scratch_in[3].r;
683 scratch[3].i = scratch_in[1].i - scratch_in[3].i;
684
685 // third result
686 scratch_out[2].r = scratch[0].r - scratch[2].r;
687 scratch_out[2].i = scratch[0].i - scratch[2].i;
688
689 // first result
690 scratch_out[0].r = scratch[0].r + scratch[2].r;
691 scratch_out[0].i = scratch[0].i + scratch[2].i;
692
693 // second result
694 scratch_out[1].r = scratch[1].r - scratch[3].i;
695 scratch_out[1].i = scratch[1].i + scratch[3].r;
696
697 // forth result
698 scratch_out[3].r = scratch[1].r + scratch[3].i;
699 scratch_out[3].i = scratch[1].i - scratch[3].r;
700
701 // store
702 * Fout1 ++ = scratch_out[0];
703 * Fout1 ++ = scratch_out[1];
704 * Fout1 ++ = scratch_out[2];
705 * Fout1 ++ = scratch_out[3];
706
707 Fin1++;
708 } // f_count
709
710 N = fstride; // 1/4 of length of FFT
711 // update address for other stages
712 stage_count--;
713 fstride >>= 2;
714
715 // swap
716 Ftmp = buffer;
717 buffer = Fout;
718 Fout = Ftmp;
719
720 // end of first stage
721 }
722
723
724 // others but the last one
725 for (; stage_count > 1 ; stage_count--)
726 {
727 Fin1 = buffer;
728 for (f_count = 0; f_count < fstride; f_count ++)
729 {
730 Fout1 = & Fout[ f_count * mstride << 2 ];
731 tw1 = tw;
732 for (m_count = mstride; m_count ; m_count --)
733 {
734 // load
735 scratch_tw[0] = *tw1;
736 tw2 = tw1 + mstride;
737 scratch_tw[1] = *tw2;
738 tw2 += mstride;
739 scratch_tw[2] = *tw2;
740 scratch_in[0] = * Fin1;
741 Fin2 = Fin1 + N;
742 scratch_in[1] = * Fin2;
743 Fin2 += N;
744 scratch_in[2] = * Fin2;
745 Fin2 += N;
746 scratch_in[3] = * Fin2;
747 if (scaled_flag == 1)
748 {
749 NE10_F2I32_FIXDIV (scratch_in[0], 4);
750 NE10_F2I32_FIXDIV (scratch_in[1], 4);
751 NE10_F2I32_FIXDIV (scratch_in[2], 4);
752 NE10_F2I32_FIXDIV (scratch_in[3], 4);
753 }
754
755 // radix 4 butterfly with twiddles
756
757 scratch[0] = scratch_in[0];
758 scratch[1].r = (ne10_int32_t) ( ( (NE10_F2I32_SAMPPROD) scratch_in[1].r * scratch_tw[0].r
759 + (NE10_F2I32_SAMPPROD) scratch_in[1].i * scratch_tw[0].i) >> 31);
760 scratch[1].i = (ne10_int32_t) ( ( (NE10_F2I32_SAMPPROD) scratch_in[1].i * scratch_tw[0].r
761 - (NE10_F2I32_SAMPPROD) scratch_in[1].r * scratch_tw[0].i) >> 31);
762
763 scratch[2].r = (ne10_int32_t) ( ( (NE10_F2I32_SAMPPROD) scratch_in[2].r * scratch_tw[1].r
764 + (NE10_F2I32_SAMPPROD) scratch_in[2].i * scratch_tw[1].i) >> 31);
765 scratch[2].i = (ne10_int32_t) ( ( (NE10_F2I32_SAMPPROD) scratch_in[2].i * scratch_tw[1].r
766 - (NE10_F2I32_SAMPPROD) scratch_in[2].r * scratch_tw[1].i) >> 31);
767
768 scratch[3].r = (ne10_int32_t) ( ( (NE10_F2I32_SAMPPROD) scratch_in[3].r * scratch_tw[2].r
769 + (NE10_F2I32_SAMPPROD) scratch_in[3].i * scratch_tw[2].i) >> 31);
770 scratch[3].i = (ne10_int32_t) ( ( (NE10_F2I32_SAMPPROD) scratch_in[3].i * scratch_tw[2].r
771 - (NE10_F2I32_SAMPPROD) scratch_in[3].r * scratch_tw[2].i) >> 31);
772
773 // radix 2 butterfly
774 scratch[4].r = scratch[0].r + scratch[2].r;
775 scratch[4].i = scratch[0].i + scratch[2].i;
776
777 scratch[5].r = scratch[0].r - scratch[2].r;
778 scratch[5].i = scratch[0].i - scratch[2].i;
779
780 // radix 2 butterfly
781 scratch[6].r = scratch[1].r + scratch[3].r;
782 scratch[6].i = scratch[1].i + scratch[3].i;
783
784 scratch[7].r = scratch[1].r - scratch[3].r;
785 scratch[7].i = scratch[1].i - scratch[3].i;
786
787 // third result
788 scratch_out[2].r = scratch[4].r - scratch[6].r;
789 scratch_out[2].i = scratch[4].i - scratch[6].i;
790
791 // first result
792 scratch_out[0].r = scratch[4].r + scratch[6].r;
793 scratch_out[0].i = scratch[4].i + scratch[6].i;
794
795 // second result
796 scratch_out[1].r = scratch[5].r - scratch[7].i;
797 scratch_out[1].i = scratch[5].i + scratch[7].r;
798
799 // forth result
800 scratch_out[3].r = scratch[5].r + scratch[7].i;
801 scratch_out[3].i = scratch[5].i - scratch[7].r;
802
803 // store
804 *Fout1 = scratch_out[0];
805 Fout2 = Fout1 + mstride;
806 *Fout2 = scratch_out[1];
807 Fout2 += mstride;
808 *Fout2 = scratch_out[2];
809 Fout2 += mstride;
810 *Fout2 = scratch_out[3];
811
812 tw1++;
813 Fin1 ++;
814 Fout1 ++;
815 } // m_count
816 } // f_count
817 tw += mstride * 3;
818 mstride <<= 2;
819 // swap
820 Ftmp = buffer;
821 buffer = Fout;
822 Fout = Ftmp;
823 fstride >>= 2;
824 } // stage_count
825
826 // the last one
827 if (stage_count)
828 {
829 Fin1 = buffer;
830 // if stage count is even, output to the input array
831 Fout1 = Fout_ls;
832
833 for (f_count = 0; f_count < fstride; f_count ++)
834 {
835 tw1 = tw;
836 for (m_count = mstride; m_count ; m_count --)
837 {
838 // load
839 scratch_tw[0] = *tw1;
840 tw2 = tw1 + mstride;
841 scratch_tw[1] = *tw2;
842 tw2 += mstride;
843 scratch_tw[2] = *tw2;
844 scratch_in[0] = * Fin1;
845 Fin2 = Fin1 + N;
846 scratch_in[1] = * Fin2;
847 Fin2 += N;
848 scratch_in[2] = * Fin2;
849 Fin2 += N;
850 scratch_in[3] = * Fin2;
851 if (scaled_flag == 1)
852 {
853 NE10_F2I32_FIXDIV (scratch_in[0], 4);
854 NE10_F2I32_FIXDIV (scratch_in[1], 4);
855 NE10_F2I32_FIXDIV (scratch_in[2], 4);
856 NE10_F2I32_FIXDIV (scratch_in[3], 4);
857 }
858
859 // radix 4 butterfly with twiddles
860
861 scratch[0] = scratch_in[0];
862 scratch[1].r = (ne10_int32_t) ( ( (NE10_F2I32_SAMPPROD) scratch_in[1].r * scratch_tw[0].r
863 + (NE10_F2I32_SAMPPROD) scratch_in[1].i * scratch_tw[0].i) >> 31);
864 scratch[1].i = (ne10_int32_t) ( ( (NE10_F2I32_SAMPPROD) scratch_in[1].i * scratch_tw[0].r
865 - (NE10_F2I32_SAMPPROD) scratch_in[1].r * scratch_tw[0].i) >> 31);
866
867 scratch[2].r = (ne10_int32_t) ( ( (NE10_F2I32_SAMPPROD) scratch_in[2].r * scratch_tw[1].r
868 + (NE10_F2I32_SAMPPROD) scratch_in[2].i * scratch_tw[1].i) >> 31);
869 scratch[2].i = (ne10_int32_t) ( ( (NE10_F2I32_SAMPPROD) scratch_in[2].i * scratch_tw[1].r
870 - (NE10_F2I32_SAMPPROD) scratch_in[2].r * scratch_tw[1].i) >> 31);
871
872 scratch[3].r = (ne10_int32_t) ( ( (NE10_F2I32_SAMPPROD) scratch_in[3].r * scratch_tw[2].r
873 + (NE10_F2I32_SAMPPROD) scratch_in[3].i * scratch_tw[2].i) >> 31);
874 scratch[3].i = (ne10_int32_t) ( ( (NE10_F2I32_SAMPPROD) scratch_in[3].i * scratch_tw[2].r
875 - (NE10_F2I32_SAMPPROD) scratch_in[3].r * scratch_tw[2].i) >> 31);
876
877 // radix 2 butterfly
878 scratch[4].r = scratch[0].r + scratch[2].r;
879 scratch[4].i = scratch[0].i + scratch[2].i;
880
881 scratch[5].r = scratch[0].r - scratch[2].r;
882 scratch[5].i = scratch[0].i - scratch[2].i;
883
884 // radix 2 butterfly
885 scratch[6].r = scratch[1].r + scratch[3].r;
886 scratch[6].i = scratch[1].i + scratch[3].i;
887
888 scratch[7].r = scratch[1].r - scratch[3].r;
889 scratch[7].i = scratch[1].i - scratch[3].i;
890
891 // third result
892 scratch_out[2].r = scratch[4].r - scratch[6].r;
893 scratch_out[2].i = scratch[4].i - scratch[6].i;
894
895 // first result
896 scratch_out[0].r = scratch[4].r + scratch[6].r;
897 scratch_out[0].i = scratch[4].i + scratch[6].i;
898
899 // second result
900 scratch_out[1].r = scratch[5].r - scratch[7].i;
901 scratch_out[1].i = scratch[5].i + scratch[7].r;
902
903 // forth result
904 scratch_out[3].r = scratch[5].r + scratch[7].i;
905 scratch_out[3].i = scratch[5].i - scratch[7].r;
906
907 // store
908 *Fout1 = scratch_out[0];
909 Fout2 = Fout1 + N;
910 *Fout2 = scratch_out[1];
911 Fout2 += N;
912 *Fout2 = scratch_out[2];
913 Fout2 += N;
914 *Fout2 = scratch_out[3];
915
916 tw1 ++;
917 Fin1 ++;
918 Fout1 ++;
919 } // m_count
920 } // f_count
921 } // last stage
922}
923
924static void ne10_fft_split_r2c_1d_int32 (ne10_fft_cpx_int32_t *dst,
925 const ne10_fft_cpx_int32_t *src,
926 ne10_fft_cpx_int32_t *twiddles,
927 ne10_int32_t ncfft,
928 ne10_int32_t scaled_flag)
929{
930 ne10_int32_t k;
931 ne10_fft_cpx_int32_t fpnk, fpk, f1k, f2k, tw, tdc;
932
933 tdc.r = src[0].r;
934 tdc.i = src[0].i;
935
936 if (scaled_flag)
937 NE10_F2I32_FIXDIV (tdc, 2);
938
939 dst[0].r = tdc.r + tdc.i;
940 dst[ncfft].r = tdc.r - tdc.i;
941 dst[ncfft].i = dst[0].i = 0;
942
943 for (k = 1; k <= ncfft / 2 ; ++k)
944 {
945 fpk = src[k];
946 fpnk.r = src[ncfft - k].r;
947 fpnk.i = - src[ncfft - k].i;
948 if (scaled_flag)
949 {
950 NE10_F2I32_FIXDIV (fpk, 2);
951 NE10_F2I32_FIXDIV (fpnk, 2);
952 }
953
954 f1k.r = fpk.r + fpnk.r;
955 f1k.i = fpk.i + fpnk.i;
956
957 f2k.r = fpk.r - fpnk.r;
958 f2k.i = fpk.i - fpnk.i;
959
960 tw.r = ( ( (ne10_int32_t) ( ( (NE10_F2I32_SAMPPROD) f2k.r * (twiddles[k - 1]).r) >> 32)) - ( (ne10_int32_t) ( ( (NE10_F2I32_SAMPPROD) f2k.i * (twiddles[k - 1]).i) >> 32))) << 1;
961 tw.i = ( ( (ne10_int32_t) ( ( (NE10_F2I32_SAMPPROD) f2k.r * (twiddles[k - 1]).i) >> 32)) + ( (ne10_int32_t) ( ( (NE10_F2I32_SAMPPROD) f2k.i * (twiddles[k - 1]).r) >> 32))) << 1;
962
963 dst[k].r = (f1k.r + tw.r) >> 1;
964 dst[k].i = (f1k.i + tw.i) >> 1;
965 dst[ncfft - k].r = (f1k.r - tw.r) >> 1;
966 dst[ncfft - k].i = (tw.i - f1k.i) >> 1;
967 }
968}
969
970static void ne10_fft_split_c2r_1d_int32 (ne10_fft_cpx_int32_t *dst,
971 const ne10_fft_cpx_int32_t *src,
972 ne10_fft_cpx_int32_t *twiddles,
973 ne10_int32_t ncfft,
974 ne10_int32_t scaled_flag)
975{
976
977 ne10_int32_t k;
978 ne10_fft_cpx_int32_t fk, fnkc, fek, fok, tmp;
979
980
981 dst[0].r = src[0].r + src[ncfft].r;
982 dst[0].i = src[0].r - src[ncfft].r;
983
984 if (scaled_flag)
985 NE10_F2I32_FIXDIV (dst[0], 2);
986
987 for (k = 1; k <= ncfft / 2; k++)
988 {
989 fk = src[k];
990 fnkc.r = src[ncfft - k].r;
991 fnkc.i = -src[ncfft - k].i;
992 if (scaled_flag)
993 {
994 NE10_F2I32_FIXDIV (fk, 2);
995 NE10_F2I32_FIXDIV (fnkc, 2);
996 }
997
998 fek.r = fk.r + fnkc.r;
999 fek.i = fk.i + fnkc.i;
1000
1001 tmp.r = fk.r - fnkc.r;
1002 tmp.i = fk.i - fnkc.i;
1003
1004 fok.r = ( ( (ne10_int32_t) ( ( (NE10_F2I32_SAMPPROD) tmp.r * (twiddles[k - 1]).r) >> 32)) + ( (ne10_int32_t) ( ( (NE10_F2I32_SAMPPROD) tmp.i * (twiddles[k - 1]).i) >> 32))) << 1;
1005 fok.i = ( ( (ne10_int32_t) ( ( (NE10_F2I32_SAMPPROD) tmp.i * (twiddles[k - 1]).r) >> 32)) - ( (ne10_int32_t) ( ( (NE10_F2I32_SAMPPROD) tmp.r * (twiddles[k - 1]).i) >> 32))) << 1;
1006
1007 dst[k].r = fek.r + fok.r;
1008 dst[k].i = fek.i + fok.i;
1009
1010 dst[ncfft - k].r = fek.r - fok.r;
1011 dst[ncfft - k].i = fok.i - fek.i;
1012 }
1013}
1014
1015
1028{
1029 ne10_fft_cfg_int32_t st = NULL;
1030 ne10_uint32_t memneeded = sizeof (ne10_fft_state_int32_t)
1031 + sizeof (ne10_int32_t) * (NE10_MAXFACTORS * 2) /* factors */
1032 + sizeof (ne10_fft_cpx_int32_t) * nfft /* twiddle */
1033 + sizeof (ne10_fft_cpx_int32_t) * nfft /* buffer */
1034 + NE10_FFT_BYTE_ALIGNMENT; /* 64-bit alignment*/
1035
1036 st = (ne10_fft_cfg_int32_t) NE10_MALLOC (memneeded);
1037 if (st)
1038 {
1039 uintptr_t address = (uintptr_t) st + sizeof (ne10_fft_state_int32_t);
1040 NE10_BYTE_ALIGNMENT (address, NE10_FFT_BYTE_ALIGNMENT);
1041 st->factors = (ne10_int32_t*) address;
1042 st->twiddles = (ne10_fft_cpx_int32_t*) (st->factors + (NE10_MAXFACTORS * 2));
1043 st->buffer = st->twiddles + nfft;
1044 st->nfft = nfft;
1045
1046 ne10_int32_t result = ne10_factor (nfft, st->factors, NE10_FACTOR_DEFAULT);
1047 if (result == NE10_ERR)
1048 {
1049 NE10_FREE (st);
1050 return st;
1051 }
1052
1053 ne10_int32_t *factors = st->factors;
1054 ne10_fft_cpx_int32_t *twiddles = st->twiddles;
1055
1056 ne10_fft_generate_twiddles_int32 (twiddles, factors, nfft);
1057 }
1058 return st;
1059}
1060
1075 ne10_int32_t inverse_fft,
1076 ne10_int32_t scaled_flag)
1077{
1078 ne10_int32_t stage_count = cfg->factors[0];
1079 ne10_int32_t algorithm_flag = cfg->factors[2 * (stage_count + 1)];
1080
1081 assert ((algorithm_flag == NE10_FFT_ALG_24)
1082 || (algorithm_flag == NE10_FFT_ALG_ANY));
1083
1084 switch (algorithm_flag)
1085 {
1086 case NE10_FFT_ALG_24:
1087 if (inverse_fft)
1088 {
1089 ne10_mixed_radix_butterfly_inverse_int32_c (fout, fin, cfg->factors, cfg->twiddles, cfg->buffer, scaled_flag);
1090 }
1091 else
1092 {
1093 ne10_mixed_radix_butterfly_int32_c (fout, fin, cfg->factors, cfg->twiddles, cfg->buffer, scaled_flag);
1094 }
1095 break;
1096 case NE10_FFT_ALG_ANY:
1097 if (inverse_fft)
1098 {
1099 ne10_mixed_radix_generic_butterfly_inverse_int32_c (fout, fin, cfg->factors, cfg->twiddles, cfg->buffer, scaled_flag);
1100 }
1101 else
1102 {
1103 ne10_mixed_radix_generic_butterfly_int32_c (fout, fin, cfg->factors, cfg->twiddles, cfg->buffer, scaled_flag);
1104 }
1105 break;
1106 }
1107}
1108
//end of C2C_FFT_IFFT group
1112
1113
1126{
1127 ne10_fft_r2c_cfg_int32_t st = NULL;
1128 ne10_int32_t ncfft = nfft >> 1;
1129
1130 ne10_uint32_t memneeded = sizeof (ne10_fft_r2c_state_int32_t)
1131 + sizeof (ne10_int32_t) * (NE10_MAXFACTORS * 2) /* factors */
1132 + sizeof (ne10_fft_cpx_int32_t) * ncfft /* twiddle*/
1133 + sizeof (ne10_fft_cpx_int32_t) * ncfft / 2 /* super twiddles*/
1134 + sizeof (ne10_fft_cpx_int32_t) * nfft /* buffer*/
1135 + NE10_FFT_BYTE_ALIGNMENT; /* 64-bit alignment*/
1136
1137 st = (ne10_fft_r2c_cfg_int32_t) NE10_MALLOC (memneeded);
1138
1139 if (st)
1140 {
1141 uintptr_t address = (uintptr_t) st + sizeof (ne10_fft_r2c_state_int32_t);
1142 NE10_BYTE_ALIGNMENT (address, NE10_FFT_BYTE_ALIGNMENT);
1143 st->factors = (ne10_int32_t*) address;
1144 st->twiddles = (ne10_fft_cpx_int32_t*) (st->factors + (NE10_MAXFACTORS * 2));
1145 st->super_twiddles = st->twiddles + ncfft;
1146 st->buffer = st->super_twiddles + (ncfft / 2);
1147 st->ncfft = ncfft;
1148
1149 ne10_int32_t result = ne10_factor (ncfft, st->factors, NE10_FACTOR_DEFAULT);
1150 if (result == NE10_ERR)
1151 {
1152 NE10_FREE (st);
1153 return st;
1154 }
1155
1156 ne10_int32_t i, j;
1157 ne10_int32_t *factors = st->factors;
1158 ne10_fft_cpx_int32_t *twiddles = st->twiddles;
1160 ne10_int32_t stage_count = factors[0];
1161 ne10_int32_t fstride1 = factors[1];
1162 ne10_int32_t fstride2 = fstride1 * 2;
1163 ne10_int32_t fstride3 = fstride1 * 3;
1164 ne10_int32_t m;
1165
1166 const ne10_float32_t pi = NE10_PI;
1167 ne10_float32_t phase1;
1168 ne10_float32_t phase2;
1169 ne10_float32_t phase3;
1170
1171 for (i = stage_count - 1; i > 0; i--)
1172 {
1173 fstride1 >>= 2;
1174 fstride2 >>= 2;
1175 fstride3 >>= 2;
1176 m = factors[2 * i + 1];
1177 tw = twiddles;
1178 for (j = 0; j < m; j++)
1179 {
1180 phase1 = -2 * pi * fstride1 * j / ncfft;
1181 phase2 = -2 * pi * fstride2 * j / ncfft;
1182 phase3 = -2 * pi * fstride3 * j / ncfft;
1183 tw->r = (ne10_int32_t) floor (0.5f + NE10_F2I32_MAX * cos (phase1));
1184 tw->i = (ne10_int32_t) floor (0.5f + NE10_F2I32_MAX * sin (phase1));
1185 (tw + m)->r = (ne10_int32_t) floor (0.5f + NE10_F2I32_MAX * cos (phase2));
1186 (tw + m)->i = (ne10_int32_t) floor (0.5f + NE10_F2I32_MAX * sin (phase2));
1187 (tw + m * 2)->r = (ne10_int32_t) floor (0.5f + NE10_F2I32_MAX * cos (phase3));
1188 (tw + m * 2)->i = (ne10_int32_t) floor (0.5f + NE10_F2I32_MAX * sin (phase3));
1189 tw++;
1190 }
1191 twiddles += m * 3;
1192 }
1193
1194 tw = st->super_twiddles;
1195 for (i = 0; i < ncfft / 2; i++)
1196 {
1197 phase1 = -pi * ( (ne10_float32_t) (i + 1) / ncfft + 0.5f);
1198 tw->r = (ne10_int32_t) floor (0.5f + NE10_F2I32_MAX * cos (phase1));
1199 tw->i = (ne10_int32_t) floor (0.5f + NE10_F2I32_MAX * sin (phase1));
1200 tw++;
1201 }
1202
1203 }
1204 return st;
1205}
1206
1220 ne10_int32_t *fin,
1222 ne10_int32_t scaled_flag)
1223{
1224 ne10_fft_cpx_int32_t * tmpbuf = cfg->buffer;
1225
1226 ne10_mixed_radix_butterfly_int32_c (tmpbuf, (ne10_fft_cpx_int32_t*) fin, cfg->factors, cfg->twiddles, fout, scaled_flag);
1227 ne10_fft_split_r2c_1d_int32 (fout, tmpbuf, cfg->super_twiddles, cfg->ncfft, scaled_flag);
1228}
1229
1241void ne10_fft_c2r_1d_int32_c (ne10_int32_t *fout,
1244 ne10_int32_t scaled_flag)
1245{
1246 ne10_fft_cpx_int32_t * tmpbuf1 = cfg->buffer;
1247 ne10_fft_cpx_int32_t * tmpbuf2 = cfg->buffer + cfg->ncfft;
1248
1249 ne10_fft_split_c2r_1d_int32 (tmpbuf1, fin, cfg->super_twiddles, cfg->ncfft, scaled_flag);
1250 ne10_mixed_radix_butterfly_inverse_int32_c ( (ne10_fft_cpx_int32_t*) fout, tmpbuf1, cfg->factors, cfg->twiddles, tmpbuf2, scaled_flag);
1251}
1252
void ne10_fft_c2c_1d_int32_c(ne10_fft_cpx_int32_t *fout, ne10_fft_cpx_int32_t *fin, ne10_fft_cfg_int32_t cfg, ne10_int32_t inverse_fft, ne10_int32_t scaled_flag)
Mixed radix-2/4 complex FFT/IFFT of 32-bit fixed point data.
ne10_fft_cfg_int32_t ne10_fft_alloc_c2c_int32_c(ne10_int32_t nfft)
User-callable function to allocate all necessary storage space for the fft.
void ne10_fft_r2c_1d_int32_c(ne10_fft_cpx_int32_t *fout, ne10_int32_t *fin, ne10_fft_r2c_cfg_int32_t cfg, ne10_int32_t scaled_flag)
Mixed radix-2/4 FFT (real to complex) of int32 data.
void ne10_fft_c2r_1d_int32_c(ne10_int32_t *fout, ne10_fft_cpx_int32_t *fin, ne10_fft_r2c_cfg_int32_t cfg, ne10_int32_t scaled_flag)
Mixed radix-2/4 IFFT (complex to real) of int32 data.
ne10_fft_r2c_cfg_int32_t ne10_fft_alloc_r2c_int32(ne10_int32_t nfft)
User-callable function to allocate all necessary storage space for the fft (r2c/c2r).
structure for the 32 bits fixed point FFT function.
Definition NE10_types.h:329