···11-/*
22- * Copyright (c) 2003-2010, Mark Borgerding. All rights reserved.
33- * This file is part of KISS FFT - https://github.com/mborgerding/kissfft
44- *
55- * SPDX-License-Identifier: BSD-3-Clause
66- * See COPYING file for more information.
77- */
88-99- /* kiss_fft.h
1010- defines kiss_fft_scalar as either short or a float type
1111- and defines
1212- typedef struct { kiss_fft_scalar r; kiss_fft_scalar i; }kiss_fft_cpx; */
1313-#include "kiss_fft.h"
1414-#include <limits.h>
1515-1616-#define MAXFACTORS 32
1717- /* e.g. an fft of length 128 has 4 factors
1818- as far as kissfft is concerned
1919- 4*4*4*2
2020- */
2121-2222-struct kiss_fft_state {
2323- int nfft;
2424- int inverse;
2525- int factors[2 * MAXFACTORS];
2626- kiss_fft_cpx twiddles[1];
2727-};
2828-2929-/*
3030- Explanation of macros dealing with complex math:
3131-3232- C_MUL(m,a,b) : m = a*b
3333- C_FIXDIV( c , div ) : if a fixed point impl., c /= div. noop otherwise
3434- C_SUB( res, a,b) : res = a - b
3535- C_SUBFROM( res , a) : res -= a
3636- C_ADDTO( res , a) : res += a
3737- * */
3838-#ifdef FIXED_POINT
3939-#if (FIXED_POINT==32)
4040-# define FRACBITS 31
4141-# define SAMPPROD int64_t
4242-#define SAMP_MAX 2147483647
4343-#else
4444-# define FRACBITS 15
4545-# define SAMPPROD int32_t
4646-#define SAMP_MAX 32767
4747-#endif
4848-4949-#define SAMP_MIN -SAMP_MAX
5050-5151-#if defined(CHECK_OVERFLOW)
5252-# define CHECK_OVERFLOW_OP(a,op,b) \
5353- if ( (SAMPPROD)(a) op (SAMPPROD)(b) > SAMP_MAX || (SAMPPROD)(a) op (SAMPPROD)(b) < SAMP_MIN ) { \
5454- fprintf(stderr,"WARNING:overflow @ " __FILE__ "(%d): (%d " #op" %d) = %ld\n",__LINE__,(a),(b),(SAMPPROD)(a) op (SAMPPROD)(b) ); }
5555-#endif
5656-5757-5858-# define smul(a,b) ( (SAMPPROD)(a)*(b) )
5959-# define sround( x ) (kiss_fft_scalar)( ( (x) + (1<<(FRACBITS-1)) ) >> FRACBITS )
6060-6161-# define S_MUL(a,b) sround( smul(a,b) )
6262-6363-# define C_MUL(m,a,b) \
6464- do{ (m).r = sround( smul((a).r,(b).r) - smul((a).i,(b).i) ); \
6565- (m).i = sround( smul((a).r,(b).i) + smul((a).i,(b).r) ); }while(0)
6666-6767-# define DIVSCALAR(x,k) \
6868- (x) = sround( smul( x, SAMP_MAX/k ) )
6969-7070-# define C_FIXDIV(c,div) \
7171- do { DIVSCALAR( (c).r , div); \
7272- DIVSCALAR( (c).i , div); }while (0)
7373-7474-# define C_MULBYSCALAR( c, s ) \
7575- do{ (c).r = sround( smul( (c).r , s ) ) ;\
7676- (c).i = sround( smul( (c).i , s ) ) ; }while(0)
7777-7878-#else /* not FIXED_POINT*/
7979-8080-# define S_MUL(a,b) ( (a)*(b) )
8181-#define C_MUL(m,a,b) \
8282- do{ (m).r = (a).r*(b).r - (a).i*(b).i;\
8383- (m).i = (a).r*(b).i + (a).i*(b).r; }while(0)
8484-# define C_FIXDIV(c,div) /* NOOP */
8585-# define C_MULBYSCALAR( c, s ) \
8686- do{ (c).r *= (s);\
8787- (c).i *= (s); }while(0)
8888-#endif
8989-9090-#ifndef CHECK_OVERFLOW_OP
9191-# define CHECK_OVERFLOW_OP(a,op,b) /* noop */
9292-#endif
9393-9494-#define C_ADD( res, a,b)\
9595- do { \
9696- CHECK_OVERFLOW_OP((a).r,+,(b).r)\
9797- CHECK_OVERFLOW_OP((a).i,+,(b).i)\
9898- (res).r=(a).r+(b).r; (res).i=(a).i+(b).i; \
9999- }while(0)
100100-#define C_SUB( res, a,b)\
101101- do { \
102102- CHECK_OVERFLOW_OP((a).r,-,(b).r)\
103103- CHECK_OVERFLOW_OP((a).i,-,(b).i)\
104104- (res).r=(a).r-(b).r; (res).i=(a).i-(b).i; \
105105- }while(0)
106106-#define C_ADDTO( res , a)\
107107- do { \
108108- CHECK_OVERFLOW_OP((res).r,+,(a).r)\
109109- CHECK_OVERFLOW_OP((res).i,+,(a).i)\
110110- (res).r += (a).r; (res).i += (a).i;\
111111- }while(0)
112112-113113-#define C_SUBFROM( res , a)\
114114- do {\
115115- CHECK_OVERFLOW_OP((res).r,-,(a).r)\
116116- CHECK_OVERFLOW_OP((res).i,-,(a).i)\
117117- (res).r -= (a).r; (res).i -= (a).i; \
118118- }while(0)
119119-120120-121121-#ifdef FIXED_POINT
122122-# define KISS_FFT_COS(phase) floor(.5+SAMP_MAX * cos (phase))
123123-# define KISS_FFT_SIN(phase) floor(.5+SAMP_MAX * sin (phase))
124124-# define HALF_OF(x) ((x)>>1)
125125-#elif defined(USE_SIMD)
126126-# define KISS_FFT_COS(phase) _mm_set1_ps( cos(phase) )
127127-# define KISS_FFT_SIN(phase) _mm_set1_ps( sin(phase) )
128128-# define HALF_OF(x) ((x)*_mm_set1_ps(.5))
129129-#else
130130-# define KISS_FFT_COS(phase) (kiss_fft_scalar) cos(phase)
131131-# define KISS_FFT_SIN(phase) (kiss_fft_scalar) sin(phase)
132132-# define HALF_OF(x) ((x)*.5)
133133-#endif
134134-135135-#define kf_cexp(x,phase) \
136136- do{ \
137137- (x)->r = KISS_FFT_COS(phase);\
138138- (x)->i = KISS_FFT_SIN(phase);\
139139- }while(0)
140140-141141-142142- /* a debugging function */
143143-#define pcpx(c)\
144144- fprintf(stderr,"%g + %gi\n",(double)((c)->r),(double)((c)->i) )
145145-146146-147147-#ifdef KISS_FFT_USE_ALLOCA
148148-// define this to allow use of alloca instead of malloc for temporary buffers
149149-// Temporary buffers are used in two case:
150150-// 1. FFT sizes that have "bad" factors. i.e. not 2,3 and 5
151151-// 2. "in-place" FFTs. Notice the quotes, since kissfft does not really do an in-place transform.
152152-#include <alloca.h>
153153-#define KISS_FFT_TMP_ALLOC(nbytes) alloca(nbytes)
154154-#define KISS_FFT_TMP_FREE(ptr)
155155-#else
156156-#define KISS_FFT_TMP_ALLOC(nbytes) KISS_FFT_MALLOC(nbytes)
157157-#define KISS_FFT_TMP_FREE(ptr) KISS_FFT_FREE(ptr)
158158-#endif
11+/*
22+ * Copyright (c) 2003-2010, Mark Borgerding. All rights reserved.
33+ * This file is part of KISS FFT - https://github.com/mborgerding/kissfft
44+ *
55+ * SPDX-License-Identifier: BSD-3-Clause
66+ * See COPYING file for more information.
77+ */
88+99+/* kiss_fft.h
1010+ defines kiss_fft_scalar as either short or a float type
1111+ and defines
1212+ typedef struct { kiss_fft_scalar r; kiss_fft_scalar i; }kiss_fft_cpx; */
1313+1414+#ifndef _kiss_fft_guts_h
1515+#define _kiss_fft_guts_h
1616+1717+#include "kiss_fft.h"
1818+#include "kiss_fft_log.h"
1919+#include <limits.h>
2020+2121+#define MAXFACTORS 32
2222+/* e.g. an fft of length 128 has 4 factors
2323+ as far as kissfft is concerned
2424+ 4*4*4*2
2525+ */
2626+2727+struct kiss_fft_state{
2828+ int nfft;
2929+ int inverse;
3030+ int factors[2*MAXFACTORS];
3131+ kiss_fft_cpx twiddles[1];
3232+};
3333+3434+/*
3535+ Explanation of macros dealing with complex math:
3636+3737+ C_MUL(m,a,b) : m = a*b
3838+ C_FIXDIV( c , div ) : if a fixed point impl., c /= div. noop otherwise
3939+ C_SUB( res, a,b) : res = a - b
4040+ C_SUBFROM( res , a) : res -= a
4141+ C_ADDTO( res , a) : res += a
4242+ * */
4343+#ifdef FIXED_POINT
4444+#include <stdint.h>
4545+#if (FIXED_POINT==32)
4646+# define FRACBITS 31
4747+# define SAMPPROD int64_t
4848+#define SAMP_MAX INT32_MAX
4949+#define SAMP_MIN INT32_MIN
5050+#else
5151+# define FRACBITS 15
5252+# define SAMPPROD int32_t
5353+#define SAMP_MAX INT16_MAX
5454+#define SAMP_MIN INT16_MIN
5555+#endif
5656+5757+#if defined(CHECK_OVERFLOW)
5858+# define CHECK_OVERFLOW_OP(a,op,b) \
5959+ if ( (SAMPPROD)(a) op (SAMPPROD)(b) > SAMP_MAX || (SAMPPROD)(a) op (SAMPPROD)(b) < SAMP_MIN ) { \
6060+ KISS_FFT_WARNING("overflow (%d " #op" %d) = %ld", (a),(b),(SAMPPROD)(a) op (SAMPPROD)(b)); }
6161+#endif
6262+6363+6464+# define smul(a,b) ( (SAMPPROD)(a)*(b) )
6565+# define sround( x ) (kiss_fft_scalar)( ( (x) + (1<<(FRACBITS-1)) ) >> FRACBITS )
6666+6767+# define S_MUL(a,b) sround( smul(a,b) )
6868+6969+# define C_MUL(m,a,b) \
7070+ do{ (m).r = sround( smul((a).r,(b).r) - smul((a).i,(b).i) ); \
7171+ (m).i = sround( smul((a).r,(b).i) + smul((a).i,(b).r) ); }while(0)
7272+7373+# define DIVSCALAR(x,k) \
7474+ (x) = sround( smul( x, SAMP_MAX/k ) )
7575+7676+# define C_FIXDIV(c,div) \
7777+ do { DIVSCALAR( (c).r , div); \
7878+ DIVSCALAR( (c).i , div); }while (0)
7979+8080+# define C_MULBYSCALAR( c, s ) \
8181+ do{ (c).r = sround( smul( (c).r , s ) ) ;\
8282+ (c).i = sround( smul( (c).i , s ) ) ; }while(0)
8383+8484+#else /* not FIXED_POINT*/
8585+8686+# define S_MUL(a,b) ( (a)*(b) )
8787+#define C_MUL(m,a,b) \
8888+ do{ (m).r = (a).r*(b).r - (a).i*(b).i;\
8989+ (m).i = (a).r*(b).i + (a).i*(b).r; }while(0)
9090+# define C_FIXDIV(c,div) /* NOOP */
9191+# define C_MULBYSCALAR( c, s ) \
9292+ do{ (c).r *= (s);\
9393+ (c).i *= (s); }while(0)
9494+#endif
9595+9696+#ifndef CHECK_OVERFLOW_OP
9797+# define CHECK_OVERFLOW_OP(a,op,b) /* noop */
9898+#endif
9999+100100+#define C_ADD( res, a,b)\
101101+ do { \
102102+ CHECK_OVERFLOW_OP((a).r,+,(b).r)\
103103+ CHECK_OVERFLOW_OP((a).i,+,(b).i)\
104104+ (res).r=(a).r+(b).r; (res).i=(a).i+(b).i; \
105105+ }while(0)
106106+#define C_SUB( res, a,b)\
107107+ do { \
108108+ CHECK_OVERFLOW_OP((a).r,-,(b).r)\
109109+ CHECK_OVERFLOW_OP((a).i,-,(b).i)\
110110+ (res).r=(a).r-(b).r; (res).i=(a).i-(b).i; \
111111+ }while(0)
112112+#define C_ADDTO( res , a)\
113113+ do { \
114114+ CHECK_OVERFLOW_OP((res).r,+,(a).r)\
115115+ CHECK_OVERFLOW_OP((res).i,+,(a).i)\
116116+ (res).r += (a).r; (res).i += (a).i;\
117117+ }while(0)
118118+119119+#define C_SUBFROM( res , a)\
120120+ do {\
121121+ CHECK_OVERFLOW_OP((res).r,-,(a).r)\
122122+ CHECK_OVERFLOW_OP((res).i,-,(a).i)\
123123+ (res).r -= (a).r; (res).i -= (a).i; \
124124+ }while(0)
125125+126126+127127+#ifdef FIXED_POINT
128128+# define KISS_FFT_COS(phase) floor(.5+SAMP_MAX * cos (phase))
129129+# define KISS_FFT_SIN(phase) floor(.5+SAMP_MAX * sin (phase))
130130+# define HALF_OF(x) ((x)>>1)
131131+#elif defined(USE_SIMD)
132132+# define KISS_FFT_COS(phase) _mm_set1_ps( cos(phase) )
133133+# define KISS_FFT_SIN(phase) _mm_set1_ps( sin(phase) )
134134+# define HALF_OF(x) ((x)*_mm_set1_ps(.5))
135135+#else
136136+# define KISS_FFT_COS(phase) (kiss_fft_scalar) cos(phase)
137137+# define KISS_FFT_SIN(phase) (kiss_fft_scalar) sin(phase)
138138+# define HALF_OF(x) ((x)*((kiss_fft_scalar).5))
139139+#endif
140140+141141+#define kf_cexp(x,phase) \
142142+ do{ \
143143+ (x)->r = KISS_FFT_COS(phase);\
144144+ (x)->i = KISS_FFT_SIN(phase);\
145145+ }while(0)
146146+147147+148148+/* a debugging function */
149149+#define pcpx(c)\
150150+ KISS_FFT_DEBUG("%g + %gi\n",(double)((c)->r),(double)((c)->i))
151151+152152+153153+#ifdef KISS_FFT_USE_ALLOCA
154154+// define this to allow use of alloca instead of malloc for temporary buffers
155155+// Temporary buffers are used in two case:
156156+// 1. FFT sizes that have "bad" factors. i.e. not 2,3 and 5
157157+// 2. "in-place" FFTs. Notice the quotes, since kissfft does not really do an in-place transform.
158158+#include <alloca.h>
159159+#define KISS_FFT_TMP_ALLOC(nbytes) alloca(nbytes)
160160+#define KISS_FFT_TMP_FREE(ptr)
161161+#else
162162+#define KISS_FFT_TMP_ALLOC(nbytes) KISS_FFT_MALLOC(nbytes)
163163+#define KISS_FFT_TMP_FREE(ptr) KISS_FFT_FREE(ptr)
164164+#endif
165165+166166+#endif /* _kiss_fft_guts_h */
167167+
+420-406
src/ext/kiss_fft.c
···11-/*
22- * Copyright (c) 2003-2010, Mark Borgerding. All rights reserved.
33- * This file is part of KISS FFT - https://github.com/mborgerding/kissfft
44- *
55- * SPDX-License-Identifier: BSD-3-Clause
66- * See COPYING file for more information.
77- */
88-99-1010-#include "_kiss_fft_guts.h"
1111- /* The guts header contains all the multiplication and addition macros that are defined for
1212- fixed or floating point complex numbers. It also delares the kf_ internal functions.
1313- */
1414-1515-static void kf_bfly2(
1616- kiss_fft_cpx* Fout,
1717- const size_t fstride,
1818- const kiss_fft_cfg st,
1919- int m
2020-)
2121-{
2222- kiss_fft_cpx* Fout2;
2323- kiss_fft_cpx* tw1 = st->twiddles;
2424- kiss_fft_cpx t;
2525- Fout2 = Fout + m;
2626- do {
2727- C_FIXDIV(*Fout, 2); C_FIXDIV(*Fout2, 2);
2828-2929- C_MUL(t, *Fout2, *tw1);
3030- tw1 += fstride;
3131- C_SUB(*Fout2, *Fout, t);
3232- C_ADDTO(*Fout, t);
3333- ++Fout2;
3434- ++Fout;
3535- } while (--m);
3636-}
3737-3838-static void kf_bfly4(
3939- kiss_fft_cpx* Fout,
4040- const size_t fstride,
4141- const kiss_fft_cfg st,
4242- const size_t m
4343-)
4444-{
4545- kiss_fft_cpx* tw1, * tw2, * tw3;
4646- kiss_fft_cpx scratch[6];
4747- size_t k = m;
4848- const size_t m2 = 2 * m;
4949- const size_t m3 = 3 * m;
5050-5151-5252- tw3 = tw2 = tw1 = st->twiddles;
5353-5454- do {
5555- C_FIXDIV(*Fout, 4); C_FIXDIV(Fout[m], 4); C_FIXDIV(Fout[m2], 4); C_FIXDIV(Fout[m3], 4);
5656-5757- C_MUL(scratch[0], Fout[m], *tw1);
5858- C_MUL(scratch[1], Fout[m2], *tw2);
5959- C_MUL(scratch[2], Fout[m3], *tw3);
6060-6161- C_SUB(scratch[5], *Fout, scratch[1]);
6262- C_ADDTO(*Fout, scratch[1]);
6363- C_ADD(scratch[3], scratch[0], scratch[2]);
6464- C_SUB(scratch[4], scratch[0], scratch[2]);
6565- C_SUB(Fout[m2], *Fout, scratch[3]);
6666- tw1 += fstride;
6767- tw2 += fstride * 2;
6868- tw3 += fstride * 3;
6969- C_ADDTO(*Fout, scratch[3]);
7070-7171- if (st->inverse) {
7272- Fout[m].r = scratch[5].r - scratch[4].i;
7373- Fout[m].i = scratch[5].i + scratch[4].r;
7474- Fout[m3].r = scratch[5].r + scratch[4].i;
7575- Fout[m3].i = scratch[5].i - scratch[4].r;
7676- }
7777- else {
7878- Fout[m].r = scratch[5].r + scratch[4].i;
7979- Fout[m].i = scratch[5].i - scratch[4].r;
8080- Fout[m3].r = scratch[5].r - scratch[4].i;
8181- Fout[m3].i = scratch[5].i + scratch[4].r;
8282- }
8383- ++Fout;
8484- } while (--k);
8585-}
8686-8787-static void kf_bfly3(
8888- kiss_fft_cpx* Fout,
8989- const size_t fstride,
9090- const kiss_fft_cfg st,
9191- size_t m
9292-)
9393-{
9494- size_t k = m;
9595- const size_t m2 = 2 * m;
9696- kiss_fft_cpx* tw1, * tw2;
9797- kiss_fft_cpx scratch[5];
9898- kiss_fft_cpx epi3;
9999- epi3 = st->twiddles[fstride * m];
100100-101101- tw1 = tw2 = st->twiddles;
102102-103103- do {
104104- C_FIXDIV(*Fout, 3); C_FIXDIV(Fout[m], 3); C_FIXDIV(Fout[m2], 3);
105105-106106- C_MUL(scratch[1], Fout[m], *tw1);
107107- C_MUL(scratch[2], Fout[m2], *tw2);
108108-109109- C_ADD(scratch[3], scratch[1], scratch[2]);
110110- C_SUB(scratch[0], scratch[1], scratch[2]);
111111- tw1 += fstride;
112112- tw2 += fstride * 2;
113113-114114- Fout[m].r = Fout->r - HALF_OF(scratch[3].r);
115115- Fout[m].i = Fout->i - HALF_OF(scratch[3].i);
116116-117117- C_MULBYSCALAR(scratch[0], epi3.i);
118118-119119- C_ADDTO(*Fout, scratch[3]);
120120-121121- Fout[m2].r = Fout[m].r + scratch[0].i;
122122- Fout[m2].i = Fout[m].i - scratch[0].r;
123123-124124- Fout[m].r -= scratch[0].i;
125125- Fout[m].i += scratch[0].r;
126126-127127- ++Fout;
128128- } while (--k);
129129-}
130130-131131-static void kf_bfly5(
132132- kiss_fft_cpx* Fout,
133133- const size_t fstride,
134134- const kiss_fft_cfg st,
135135- int m
136136-)
137137-{
138138- kiss_fft_cpx* Fout0, * Fout1, * Fout2, * Fout3, * Fout4;
139139- int u;
140140- kiss_fft_cpx scratch[13];
141141- kiss_fft_cpx* twiddles = st->twiddles;
142142- kiss_fft_cpx* tw;
143143- kiss_fft_cpx ya, yb;
144144- ya = twiddles[fstride * m];
145145- yb = twiddles[fstride * 2 * m];
146146-147147- Fout0 = Fout;
148148- Fout1 = Fout0 + m;
149149- Fout2 = Fout0 + 2 * m;
150150- Fout3 = Fout0 + 3 * m;
151151- Fout4 = Fout0 + 4 * m;
152152-153153- tw = st->twiddles;
154154- for (u = 0; u < m; ++u) {
155155- C_FIXDIV(*Fout0, 5); C_FIXDIV(*Fout1, 5); C_FIXDIV(*Fout2, 5); C_FIXDIV(*Fout3, 5); C_FIXDIV(*Fout4, 5);
156156- scratch[0] = *Fout0;
157157-158158- C_MUL(scratch[1], *Fout1, tw[u * fstride]);
159159- C_MUL(scratch[2], *Fout2, tw[2 * u * fstride]);
160160- C_MUL(scratch[3], *Fout3, tw[3 * u * fstride]);
161161- C_MUL(scratch[4], *Fout4, tw[4 * u * fstride]);
162162-163163- C_ADD(scratch[7], scratch[1], scratch[4]);
164164- C_SUB(scratch[10], scratch[1], scratch[4]);
165165- C_ADD(scratch[8], scratch[2], scratch[3]);
166166- C_SUB(scratch[9], scratch[2], scratch[3]);
167167-168168- Fout0->r += scratch[7].r + scratch[8].r;
169169- Fout0->i += scratch[7].i + scratch[8].i;
170170-171171- scratch[5].r = scratch[0].r + S_MUL(scratch[7].r, ya.r) + S_MUL(scratch[8].r, yb.r);
172172- scratch[5].i = scratch[0].i + S_MUL(scratch[7].i, ya.r) + S_MUL(scratch[8].i, yb.r);
173173-174174- scratch[6].r = S_MUL(scratch[10].i, ya.i) + S_MUL(scratch[9].i, yb.i);
175175- scratch[6].i = -S_MUL(scratch[10].r, ya.i) - S_MUL(scratch[9].r, yb.i);
176176-177177- C_SUB(*Fout1, scratch[5], scratch[6]);
178178- C_ADD(*Fout4, scratch[5], scratch[6]);
179179-180180- scratch[11].r = scratch[0].r + S_MUL(scratch[7].r, yb.r) + S_MUL(scratch[8].r, ya.r);
181181- scratch[11].i = scratch[0].i + S_MUL(scratch[7].i, yb.r) + S_MUL(scratch[8].i, ya.r);
182182- scratch[12].r = -S_MUL(scratch[10].i, yb.i) + S_MUL(scratch[9].i, ya.i);
183183- scratch[12].i = S_MUL(scratch[10].r, yb.i) - S_MUL(scratch[9].r, ya.i);
184184-185185- C_ADD(*Fout2, scratch[11], scratch[12]);
186186- C_SUB(*Fout3, scratch[11], scratch[12]);
187187-188188- ++Fout0; ++Fout1; ++Fout2; ++Fout3; ++Fout4;
189189- }
190190-}
191191-192192-/* perform the butterfly for one stage of a mixed radix FFT */
193193-static void kf_bfly_generic(
194194- kiss_fft_cpx* Fout,
195195- const size_t fstride,
196196- const kiss_fft_cfg st,
197197- int m,
198198- int p
199199-)
200200-{
201201- int u, k, q1, q;
202202- kiss_fft_cpx* twiddles = st->twiddles;
203203- kiss_fft_cpx t;
204204- int Norig = st->nfft;
205205-206206- kiss_fft_cpx* scratch = (kiss_fft_cpx*)KISS_FFT_TMP_ALLOC(sizeof(kiss_fft_cpx) * p);
207207-208208- for (u = 0; u < m; ++u) {
209209- k = u;
210210- for (q1 = 0; q1 < p; ++q1) {
211211- scratch[q1] = Fout[k];
212212- C_FIXDIV(scratch[q1], p);
213213- k += m;
214214- }
215215-216216- k = u;
217217- for (q1 = 0; q1 < p; ++q1) {
218218- int twidx = 0;
219219- Fout[k] = scratch[0];
220220- for (q = 1; q < p; ++q) {
221221- twidx += fstride * k;
222222- if (twidx >= Norig) twidx -= Norig;
223223- C_MUL(t, scratch[q], twiddles[twidx]);
224224- C_ADDTO(Fout[k], t);
225225- }
226226- k += m;
227227- }
228228- }
229229- KISS_FFT_TMP_FREE(scratch);
230230-}
231231-232232-static
233233-void kf_work(
234234- kiss_fft_cpx* Fout,
235235- const kiss_fft_cpx* f,
236236- const size_t fstride,
237237- int in_stride,
238238- int* factors,
239239- const kiss_fft_cfg st
240240-)
241241-{
242242- kiss_fft_cpx* Fout_beg = Fout;
243243- const int p = *factors++; /* the radix */
244244- const int m = *factors++; /* stage's fft length/p */
245245- const kiss_fft_cpx* Fout_end = Fout + p * m;
246246-247247-#ifdef _OPENMP
248248- // use openmp extensions at the
249249- // top-level (not recursive)
250250- if (fstride == 1 && p <= 5)
251251- {
252252- int k;
253253-254254- // execute the p different work units in different threads
255255-# pragma omp parallel for
256256- for (k = 0; k < p; ++k)
257257- kf_work(Fout + k * m, f + fstride * in_stride * k, fstride * p, in_stride, factors, st);
258258- // all threads have joined by this point
259259-260260- switch (p) {
261261- case 2: kf_bfly2(Fout, fstride, st, m); break;
262262- case 3: kf_bfly3(Fout, fstride, st, m); break;
263263- case 4: kf_bfly4(Fout, fstride, st, m); break;
264264- case 5: kf_bfly5(Fout, fstride, st, m); break;
265265- default: kf_bfly_generic(Fout, fstride, st, m, p); break;
266266- }
267267- return;
268268- }
269269-#endif
270270-271271- if (m == 1) {
272272- do {
273273- *Fout = *f;
274274- f += fstride * in_stride;
275275- } while (++Fout != Fout_end);
276276- }
277277- else {
278278- do {
279279- // recursive call:
280280- // DFT of size m*p performed by doing
281281- // p instances of smaller DFTs of size m,
282282- // each one takes a decimated version of the input
283283- kf_work(Fout, f, fstride * p, in_stride, factors, st);
284284- f += fstride * in_stride;
285285- } while ((Fout += m) != Fout_end);
286286- }
287287-288288- Fout = Fout_beg;
289289-290290- // recombine the p smaller DFTs
291291- switch (p) {
292292- case 2: kf_bfly2(Fout, fstride, st, m); break;
293293- case 3: kf_bfly3(Fout, fstride, st, m); break;
294294- case 4: kf_bfly4(Fout, fstride, st, m); break;
295295- case 5: kf_bfly5(Fout, fstride, st, m); break;
296296- default: kf_bfly_generic(Fout, fstride, st, m, p); break;
297297- }
298298-}
299299-300300-/* facbuf is populated by p1,m1,p2,m2, ...
301301- where
302302- p[i] * m[i] = m[i-1]
303303- m0 = n */
304304-static
305305-void kf_factor(int n, int* facbuf)
306306-{
307307- int p = 4;
308308- double floor_sqrt;
309309- floor_sqrt = floor(sqrt((double)n));
310310-311311- /*factor out powers of 4, powers of 2, then any remaining primes */
312312- do {
313313- while (n % p) {
314314- switch (p) {
315315- case 4: p = 2; break;
316316- case 2: p = 3; break;
317317- default: p += 2; break;
318318- }
319319- if (p > floor_sqrt)
320320- p = n; /* no more factors, skip to end */
321321- }
322322- n /= p;
323323- *facbuf++ = p;
324324- *facbuf++ = n;
325325- } while (n > 1);
326326-}
327327-328328-/*
329329- *
330330- * User-callable function to allocate all necessary storage space for the fft.
331331- *
332332- * The return value is a contiguous block of memory, allocated with malloc. As such,
333333- * It can be freed with free(), rather than a kiss_fft-specific function.
334334- * */
335335-kiss_fft_cfg kiss_fft_alloc(int nfft, int inverse_fft, void* mem, size_t* lenmem)
336336-{
337337- kiss_fft_cfg st = NULL;
338338- size_t memneeded = sizeof(struct kiss_fft_state)
339339- + sizeof(kiss_fft_cpx) * (nfft - 1); /* twiddle factors*/
340340-341341- if (lenmem == NULL) {
342342- st = (kiss_fft_cfg)KISS_FFT_MALLOC(memneeded);
343343- }
344344- else {
345345- if (mem != NULL && *lenmem >= memneeded)
346346- st = (kiss_fft_cfg)mem;
347347- *lenmem = memneeded;
348348- }
349349- if (st) {
350350- int i;
351351- st->nfft = nfft;
352352- st->inverse = inverse_fft;
353353-354354- for (i = 0; i < nfft; ++i) {
355355- const double pi = 3.141592653589793238462643383279502884197169399375105820974944;
356356- double phase = -2 * pi * i / nfft;
357357- if (st->inverse)
358358- phase *= -1;
359359- kf_cexp(st->twiddles + i, phase);
360360- }
361361-362362- kf_factor(nfft, st->factors);
363363- }
364364- return st;
365365-}
366366-367367-368368-void kiss_fft_stride(kiss_fft_cfg st, const kiss_fft_cpx* fin, kiss_fft_cpx* fout, int in_stride)
369369-{
370370- if (fin == fout) {
371371- //NOTE: this is not really an in-place FFT algorithm.
372372- //It just performs an out-of-place FFT into a temp buffer
373373- kiss_fft_cpx* tmpbuf = (kiss_fft_cpx*)KISS_FFT_TMP_ALLOC(sizeof(kiss_fft_cpx) * st->nfft);
374374- kf_work(tmpbuf, fin, 1, in_stride, st->factors, st);
375375- memcpy(fout, tmpbuf, sizeof(kiss_fft_cpx) * st->nfft);
376376- KISS_FFT_TMP_FREE(tmpbuf);
377377- }
378378- else {
379379- kf_work(fout, fin, 1, in_stride, st->factors, st);
380380- }
381381-}
382382-383383-void kiss_fft(kiss_fft_cfg cfg, const kiss_fft_cpx* fin, kiss_fft_cpx* fout)
384384-{
385385- kiss_fft_stride(cfg, fin, fout, 1);
386386-}
387387-388388-389389-void kiss_fft_cleanup(void)
390390-{
391391- // nothing needed any more
392392-}
393393-394394-int kiss_fft_next_fast_size(int n)
395395-{
396396- while (1) {
397397- int m = n;
398398- while ((m % 2) == 0) m /= 2;
399399- while ((m % 3) == 0) m /= 3;
400400- while ((m % 5) == 0) m /= 5;
401401- if (m <= 1)
402402- break; /* n is completely factorable by twos, threes, and fives */
403403- n++;
404404- }
405405- return n;
406406-}
11+/*
22+ * Copyright (c) 2003-2010, Mark Borgerding. All rights reserved.
33+ * This file is part of KISS FFT - https://github.com/mborgerding/kissfft
44+ *
55+ * SPDX-License-Identifier: BSD-3-Clause
66+ * See COPYING file for more information.
77+ */
88+99+1010+#include "_kiss_fft_guts.h"
1111+/* The guts header contains all the multiplication and addition macros that are defined for
1212+ fixed or floating point complex numbers. It also delares the kf_ internal functions.
1313+ */
1414+1515+static void kf_bfly2(
1616+ kiss_fft_cpx * Fout,
1717+ const size_t fstride,
1818+ const kiss_fft_cfg st,
1919+ int m
2020+ )
2121+{
2222+ kiss_fft_cpx * Fout2;
2323+ kiss_fft_cpx * tw1 = st->twiddles;
2424+ kiss_fft_cpx t;
2525+ Fout2 = Fout + m;
2626+ do{
2727+ C_FIXDIV(*Fout,2); C_FIXDIV(*Fout2,2);
2828+2929+ C_MUL (t, *Fout2 , *tw1);
3030+ tw1 += fstride;
3131+ C_SUB( *Fout2 , *Fout , t );
3232+ C_ADDTO( *Fout , t );
3333+ ++Fout2;
3434+ ++Fout;
3535+ }while (--m);
3636+}
3737+3838+static void kf_bfly4(
3939+ kiss_fft_cpx * Fout,
4040+ const size_t fstride,
4141+ const kiss_fft_cfg st,
4242+ const size_t m
4343+ )
4444+{
4545+ kiss_fft_cpx *tw1,*tw2,*tw3;
4646+ kiss_fft_cpx scratch[6];
4747+ size_t k=m;
4848+ const size_t m2=2*m;
4949+ const size_t m3=3*m;
5050+5151+5252+ tw3 = tw2 = tw1 = st->twiddles;
5353+5454+ do {
5555+ C_FIXDIV(*Fout,4); C_FIXDIV(Fout[m],4); C_FIXDIV(Fout[m2],4); C_FIXDIV(Fout[m3],4);
5656+5757+ C_MUL(scratch[0],Fout[m] , *tw1 );
5858+ C_MUL(scratch[1],Fout[m2] , *tw2 );
5959+ C_MUL(scratch[2],Fout[m3] , *tw3 );
6060+6161+ C_SUB( scratch[5] , *Fout, scratch[1] );
6262+ C_ADDTO(*Fout, scratch[1]);
6363+ C_ADD( scratch[3] , scratch[0] , scratch[2] );
6464+ C_SUB( scratch[4] , scratch[0] , scratch[2] );
6565+ C_SUB( Fout[m2], *Fout, scratch[3] );
6666+ tw1 += fstride;
6767+ tw2 += fstride*2;
6868+ tw3 += fstride*3;
6969+ C_ADDTO( *Fout , scratch[3] );
7070+7171+ if(st->inverse) {
7272+ Fout[m].r = scratch[5].r - scratch[4].i;
7373+ Fout[m].i = scratch[5].i + scratch[4].r;
7474+ Fout[m3].r = scratch[5].r + scratch[4].i;
7575+ Fout[m3].i = scratch[5].i - scratch[4].r;
7676+ }else{
7777+ Fout[m].r = scratch[5].r + scratch[4].i;
7878+ Fout[m].i = scratch[5].i - scratch[4].r;
7979+ Fout[m3].r = scratch[5].r - scratch[4].i;
8080+ Fout[m3].i = scratch[5].i + scratch[4].r;
8181+ }
8282+ ++Fout;
8383+ }while(--k);
8484+}
8585+8686+static void kf_bfly3(
8787+ kiss_fft_cpx * Fout,
8888+ const size_t fstride,
8989+ const kiss_fft_cfg st,
9090+ size_t m
9191+ )
9292+{
9393+ size_t k=m;
9494+ const size_t m2 = 2*m;
9595+ kiss_fft_cpx *tw1,*tw2;
9696+ kiss_fft_cpx scratch[5];
9797+ kiss_fft_cpx epi3;
9898+ epi3 = st->twiddles[fstride*m];
9999+100100+ tw1=tw2=st->twiddles;
101101+102102+ do{
103103+ C_FIXDIV(*Fout,3); C_FIXDIV(Fout[m],3); C_FIXDIV(Fout[m2],3);
104104+105105+ C_MUL(scratch[1],Fout[m] , *tw1);
106106+ C_MUL(scratch[2],Fout[m2] , *tw2);
107107+108108+ C_ADD(scratch[3],scratch[1],scratch[2]);
109109+ C_SUB(scratch[0],scratch[1],scratch[2]);
110110+ tw1 += fstride;
111111+ tw2 += fstride*2;
112112+113113+ Fout[m].r = Fout->r - HALF_OF(scratch[3].r);
114114+ Fout[m].i = Fout->i - HALF_OF(scratch[3].i);
115115+116116+ C_MULBYSCALAR( scratch[0] , epi3.i );
117117+118118+ C_ADDTO(*Fout,scratch[3]);
119119+120120+ Fout[m2].r = Fout[m].r + scratch[0].i;
121121+ Fout[m2].i = Fout[m].i - scratch[0].r;
122122+123123+ Fout[m].r -= scratch[0].i;
124124+ Fout[m].i += scratch[0].r;
125125+126126+ ++Fout;
127127+ }while(--k);
128128+}
129129+130130+static void kf_bfly5(
131131+ kiss_fft_cpx * Fout,
132132+ const size_t fstride,
133133+ const kiss_fft_cfg st,
134134+ int m
135135+ )
136136+{
137137+ kiss_fft_cpx *Fout0,*Fout1,*Fout2,*Fout3,*Fout4;
138138+ int u;
139139+ kiss_fft_cpx scratch[13];
140140+ kiss_fft_cpx * twiddles = st->twiddles;
141141+ kiss_fft_cpx *tw;
142142+ kiss_fft_cpx ya,yb;
143143+ ya = twiddles[fstride*m];
144144+ yb = twiddles[fstride*2*m];
145145+146146+ Fout0=Fout;
147147+ Fout1=Fout0+m;
148148+ Fout2=Fout0+2*m;
149149+ Fout3=Fout0+3*m;
150150+ Fout4=Fout0+4*m;
151151+152152+ tw=st->twiddles;
153153+ for ( u=0; u<m; ++u ) {
154154+ C_FIXDIV( *Fout0,5); C_FIXDIV( *Fout1,5); C_FIXDIV( *Fout2,5); C_FIXDIV( *Fout3,5); C_FIXDIV( *Fout4,5);
155155+ scratch[0] = *Fout0;
156156+157157+ C_MUL(scratch[1] ,*Fout1, tw[u*fstride]);
158158+ C_MUL(scratch[2] ,*Fout2, tw[2*u*fstride]);
159159+ C_MUL(scratch[3] ,*Fout3, tw[3*u*fstride]);
160160+ C_MUL(scratch[4] ,*Fout4, tw[4*u*fstride]);
161161+162162+ C_ADD( scratch[7],scratch[1],scratch[4]);
163163+ C_SUB( scratch[10],scratch[1],scratch[4]);
164164+ C_ADD( scratch[8],scratch[2],scratch[3]);
165165+ C_SUB( scratch[9],scratch[2],scratch[3]);
166166+167167+ Fout0->r += scratch[7].r + scratch[8].r;
168168+ Fout0->i += scratch[7].i + scratch[8].i;
169169+170170+ scratch[5].r = scratch[0].r + S_MUL(scratch[7].r,ya.r) + S_MUL(scratch[8].r,yb.r);
171171+ scratch[5].i = scratch[0].i + S_MUL(scratch[7].i,ya.r) + S_MUL(scratch[8].i,yb.r);
172172+173173+ scratch[6].r = S_MUL(scratch[10].i,ya.i) + S_MUL(scratch[9].i,yb.i);
174174+ scratch[6].i = -S_MUL(scratch[10].r,ya.i) - S_MUL(scratch[9].r,yb.i);
175175+176176+ C_SUB(*Fout1,scratch[5],scratch[6]);
177177+ C_ADD(*Fout4,scratch[5],scratch[6]);
178178+179179+ scratch[11].r = scratch[0].r + S_MUL(scratch[7].r,yb.r) + S_MUL(scratch[8].r,ya.r);
180180+ scratch[11].i = scratch[0].i + S_MUL(scratch[7].i,yb.r) + S_MUL(scratch[8].i,ya.r);
181181+ scratch[12].r = - S_MUL(scratch[10].i,yb.i) + S_MUL(scratch[9].i,ya.i);
182182+ scratch[12].i = S_MUL(scratch[10].r,yb.i) - S_MUL(scratch[9].r,ya.i);
183183+184184+ C_ADD(*Fout2,scratch[11],scratch[12]);
185185+ C_SUB(*Fout3,scratch[11],scratch[12]);
186186+187187+ ++Fout0;++Fout1;++Fout2;++Fout3;++Fout4;
188188+ }
189189+}
190190+191191+/* perform the butterfly for one stage of a mixed radix FFT */
192192+static void kf_bfly_generic(
193193+ kiss_fft_cpx * Fout,
194194+ const size_t fstride,
195195+ const kiss_fft_cfg st,
196196+ int m,
197197+ int p
198198+ )
199199+{
200200+ int u,k,q1,q;
201201+ kiss_fft_cpx * twiddles = st->twiddles;
202202+ kiss_fft_cpx t;
203203+ int Norig = st->nfft;
204204+205205+ kiss_fft_cpx * scratch = (kiss_fft_cpx*)KISS_FFT_TMP_ALLOC(sizeof(kiss_fft_cpx)*p);
206206+ if (scratch == NULL){
207207+ KISS_FFT_ERROR("Memory allocation failed.");
208208+ return;
209209+ }
210210+211211+ for ( u=0; u<m; ++u ) {
212212+ k=u;
213213+ for ( q1=0 ; q1<p ; ++q1 ) {
214214+ scratch[q1] = Fout[ k ];
215215+ C_FIXDIV(scratch[q1],p);
216216+ k += m;
217217+ }
218218+219219+ k=u;
220220+ for ( q1=0 ; q1<p ; ++q1 ) {
221221+ int twidx=0;
222222+ Fout[ k ] = scratch[0];
223223+ for (q=1;q<p;++q ) {
224224+ twidx += fstride * k;
225225+ if (twidx>=Norig) twidx-=Norig;
226226+ C_MUL(t,scratch[q] , twiddles[twidx] );
227227+ C_ADDTO( Fout[ k ] ,t);
228228+ }
229229+ k += m;
230230+ }
231231+ }
232232+ KISS_FFT_TMP_FREE(scratch);
233233+}
234234+235235+static
236236+void kf_work(
237237+ kiss_fft_cpx * Fout,
238238+ const kiss_fft_cpx * f,
239239+ const size_t fstride,
240240+ int in_stride,
241241+ int * factors,
242242+ const kiss_fft_cfg st
243243+ )
244244+{
245245+ kiss_fft_cpx * Fout_beg=Fout;
246246+ const int p=*factors++; /* the radix */
247247+ const int m=*factors++; /* stage's fft length/p */
248248+ const kiss_fft_cpx * Fout_end = Fout + p*m;
249249+250250+#ifdef _OPENMP
251251+ // use openmp extensions at the
252252+ // top-level (not recursive)
253253+ if (fstride==1 && p<=5 && m!=1)
254254+ {
255255+ int k;
256256+257257+ // execute the p different work units in different threads
258258+# pragma omp parallel for
259259+ for (k=0;k<p;++k)
260260+ kf_work( Fout +k*m, f+ fstride*in_stride*k,fstride*p,in_stride,factors,st);
261261+ // all threads have joined by this point
262262+263263+ switch (p) {
264264+ case 2: kf_bfly2(Fout,fstride,st,m); break;
265265+ case 3: kf_bfly3(Fout,fstride,st,m); break;
266266+ case 4: kf_bfly4(Fout,fstride,st,m); break;
267267+ case 5: kf_bfly5(Fout,fstride,st,m); break;
268268+ default: kf_bfly_generic(Fout,fstride,st,m,p); break;
269269+ }
270270+ return;
271271+ }
272272+#endif
273273+274274+ if (m==1) {
275275+ do{
276276+ *Fout = *f;
277277+ f += fstride*in_stride;
278278+ }while(++Fout != Fout_end );
279279+ }else{
280280+ do{
281281+ // recursive call:
282282+ // DFT of size m*p performed by doing
283283+ // p instances of smaller DFTs of size m,
284284+ // each one takes a decimated version of the input
285285+ kf_work( Fout , f, fstride*p, in_stride, factors,st);
286286+ f += fstride*in_stride;
287287+ }while( (Fout += m) != Fout_end );
288288+ }
289289+290290+ Fout=Fout_beg;
291291+292292+ // recombine the p smaller DFTs
293293+ switch (p) {
294294+ case 2: kf_bfly2(Fout,fstride,st,m); break;
295295+ case 3: kf_bfly3(Fout,fstride,st,m); break;
296296+ case 4: kf_bfly4(Fout,fstride,st,m); break;
297297+ case 5: kf_bfly5(Fout,fstride,st,m); break;
298298+ default: kf_bfly_generic(Fout,fstride,st,m,p); break;
299299+ }
300300+}
301301+302302+/* facbuf is populated by p1,m1,p2,m2, ...
303303+ where
304304+ p[i] * m[i] = m[i-1]
305305+ m0 = n */
306306+static
307307+void kf_factor(int n,int * facbuf)
308308+{
309309+ int p=4;
310310+ double floor_sqrt;
311311+ floor_sqrt = floor( sqrt((double)n) );
312312+313313+ /*factor out powers of 4, powers of 2, then any remaining primes */
314314+ do {
315315+ while (n % p) {
316316+ switch (p) {
317317+ case 4: p = 2; break;
318318+ case 2: p = 3; break;
319319+ default: p += 2; break;
320320+ }
321321+ if (p > floor_sqrt)
322322+ p = n; /* no more factors, skip to end */
323323+ }
324324+ n /= p;
325325+ *facbuf++ = p;
326326+ *facbuf++ = n;
327327+ } while (n > 1);
328328+}
329329+330330+/*
331331+ *
332332+ * User-callable function to allocate all necessary storage space for the fft.
333333+ *
334334+ * The return value is a contiguous block of memory, allocated with malloc. As such,
335335+ * It can be freed with free(), rather than a kiss_fft-specific function.
336336+ * */
337337+kiss_fft_cfg kiss_fft_alloc(int nfft,int inverse_fft,void * mem,size_t * lenmem )
338338+{
339339+ KISS_FFT_ALIGN_CHECK(mem)
340340+341341+ kiss_fft_cfg st=NULL;
342342+ size_t memneeded = KISS_FFT_ALIGN_SIZE_UP(sizeof(struct kiss_fft_state)
343343+ + sizeof(kiss_fft_cpx)*(nfft-1)); /* twiddle factors*/
344344+345345+ if ( lenmem==NULL ) {
346346+ st = ( kiss_fft_cfg)KISS_FFT_MALLOC( memneeded );
347347+ }else{
348348+ if (mem != NULL && *lenmem >= memneeded)
349349+ st = (kiss_fft_cfg)mem;
350350+ *lenmem = memneeded;
351351+ }
352352+ if (st) {
353353+ int i;
354354+ st->nfft=nfft;
355355+ st->inverse = inverse_fft;
356356+357357+ for (i=0;i<nfft;++i) {
358358+ const double pi=3.141592653589793238462643383279502884197169399375105820974944;
359359+ double phase = -2*pi*i / nfft;
360360+ if (st->inverse)
361361+ phase *= -1;
362362+ kf_cexp(st->twiddles+i, phase );
363363+ }
364364+365365+ kf_factor(nfft,st->factors);
366366+ }
367367+ return st;
368368+}
369369+370370+371371+void kiss_fft_stride(kiss_fft_cfg st,const kiss_fft_cpx *fin,kiss_fft_cpx *fout,int in_stride)
372372+{
373373+ if (fin == fout) {
374374+ //NOTE: this is not really an in-place FFT algorithm.
375375+ //It just performs an out-of-place FFT into a temp buffer
376376+ if (fout == NULL){
377377+ KISS_FFT_ERROR("fout buffer NULL.");
378378+ return;
379379+ }
380380+381381+ kiss_fft_cpx * tmpbuf = (kiss_fft_cpx*)KISS_FFT_TMP_ALLOC( sizeof(kiss_fft_cpx)*st->nfft);
382382+ if (tmpbuf == NULL){
383383+ KISS_FFT_ERROR("Memory allocation error.");
384384+ return;
385385+ }
386386+387387+388388+389389+ kf_work(tmpbuf,fin,1,in_stride, st->factors,st);
390390+ memcpy(fout,tmpbuf,sizeof(kiss_fft_cpx)*st->nfft);
391391+ KISS_FFT_TMP_FREE(tmpbuf);
392392+ }else{
393393+ kf_work( fout, fin, 1,in_stride, st->factors,st );
394394+ }
395395+}
396396+397397+void kiss_fft(kiss_fft_cfg cfg,const kiss_fft_cpx *fin,kiss_fft_cpx *fout)
398398+{
399399+ kiss_fft_stride(cfg,fin,fout,1);
400400+}
401401+402402+403403+void kiss_fft_cleanup(void)
404404+{
405405+ // nothing needed any more
406406+}
407407+408408+int kiss_fft_next_fast_size(int n)
409409+{
410410+ while(1) {
411411+ int m=n;
412412+ while ( (m%2) == 0 ) m/=2;
413413+ while ( (m%3) == 0 ) m/=3;
414414+ while ( (m%5) == 0 ) m/=5;
415415+ if (m<=1)
416416+ break; /* n is completely factorable by twos, threes, and fives */
417417+ n++;
418418+ }
419419+ return n;
420420+}
+160-132
src/ext/kiss_fft.h
···11-/*
22- * Copyright (c) 2003-2010, Mark Borgerding. All rights reserved.
33- * This file is part of KISS FFT - https://github.com/mborgerding/kissfft
44- *
55- * SPDX-License-Identifier: BSD-3-Clause
66- * See COPYING file for more information.
77- */
88-99-#ifndef KISS_FFT_H
1010-#define KISS_FFT_H
1111-1212-#include <stdlib.h>
1313-#include <stdio.h>
1414-#include <math.h>
1515-#include <string.h>
1616-1717-#ifdef __cplusplus
1818-extern "C" {
1919-#endif
2020-2121- /*
2222- ATTENTION!
2323- If you would like a :
2424- -- a utility that will handle the caching of fft objects
2525- -- real-only (no imaginary time component ) FFT
2626- -- a multi-dimensional FFT
2727- -- a command-line utility to perform ffts
2828- -- a command-line utility to perform fast-convolution filtering
2929-3030- Then see kfc.h kiss_fftr.h kiss_fftnd.h fftutil.c kiss_fastfir.c
3131- in the tools/ directory.
3232- */
3333-3434-#ifdef USE_SIMD
3535-# include <xmmintrin.h>
3636-# define kiss_fft_scalar __m128
3737-#define KISS_FFT_MALLOC(nbytes) _mm_malloc(nbytes,16)
3838-#define KISS_FFT_FREE _mm_free
3939-#else
4040-#define KISS_FFT_MALLOC malloc
4141-#define KISS_FFT_FREE free
4242-#endif
4343-4444-4545-#ifdef FIXED_POINT
4646-#include <sys/types.h>
4747-# if (FIXED_POINT == 32)
4848-# define kiss_fft_scalar int32_t
4949-# else
5050-# define kiss_fft_scalar int16_t
5151-# endif
5252-#else
5353-# ifndef kiss_fft_scalar
5454- /* default is float */
5555-# define kiss_fft_scalar float
5656-# endif
5757-#endif
5858-5959- typedef struct {
6060- kiss_fft_scalar r;
6161- kiss_fft_scalar i;
6262- }kiss_fft_cpx;
6363-6464- typedef struct kiss_fft_state* kiss_fft_cfg;
6565-6666- /*
6767- * kiss_fft_alloc
6868- *
6969- * Initialize a FFT (or IFFT) algorithm's cfg/state buffer.
7070- *
7171- * typical usage: kiss_fft_cfg mycfg=kiss_fft_alloc(1024,0,NULL,NULL);
7272- *
7373- * The return value from fft_alloc is a cfg buffer used internally
7474- * by the fft routine or NULL.
7575- *
7676- * If lenmem is NULL, then kiss_fft_alloc will allocate a cfg buffer using malloc.
7777- * The returned value should be free()d when done to avoid memory leaks.
7878- *
7979- * The state can be placed in a user supplied buffer 'mem':
8080- * If lenmem is not NULL and mem is not NULL and *lenmem is large enough,
8181- * then the function places the cfg in mem and the size used in *lenmem
8282- * and returns mem.
8383- *
8484- * If lenmem is not NULL and ( mem is NULL or *lenmem is not large enough),
8585- * then the function returns NULL and places the minimum cfg
8686- * buffer size in *lenmem.
8787- * */
8888-8989- kiss_fft_cfg kiss_fft_alloc(int nfft, int inverse_fft, void* mem, size_t* lenmem);
9090-9191- /*
9292- * kiss_fft(cfg,in_out_buf)
9393- *
9494- * Perform an FFT on a complex input buffer.
9595- * for a forward FFT,
9696- * fin should be f[0] , f[1] , ... ,f[nfft-1]
9797- * fout will be F[0] , F[1] , ... ,F[nfft-1]
9898- * Note that each element is complex and can be accessed like
9999- f[k].r and f[k].i
100100- * */
101101- void kiss_fft(kiss_fft_cfg cfg, const kiss_fft_cpx* fin, kiss_fft_cpx* fout);
102102-103103- /*
104104- A more generic version of the above function. It reads its input from every Nth sample.
105105- * */
106106- void kiss_fft_stride(kiss_fft_cfg cfg, const kiss_fft_cpx* fin, kiss_fft_cpx* fout, int fin_stride);
107107-108108- /* If kiss_fft_alloc allocated a buffer, it is one contiguous
109109- buffer and can be simply free()d when no longer needed*/
110110-#define kiss_fft_free KISS_FFT_FREE
111111-112112- /*
113113- Cleans up some memory that gets managed internally. Not necessary to call, but it might clean up
114114- your compiler output to call this before you exit.
115115- */
116116- void kiss_fft_cleanup(void);
117117-118118-119119- /*
120120- * Returns the smallest integer k, such that k>=n and k has only "fast" factors (2,3,5)
121121- */
122122- int kiss_fft_next_fast_size(int n);
123123-124124- /* for real ffts, we need an even size */
125125-#define kiss_fftr_next_fast_size_real(n) \
126126- (kiss_fft_next_fast_size( ((n)+1)>>1)<<1)
127127-128128-#ifdef __cplusplus
129129-}
130130-#endif
131131-132132-#endif
11+/*
22+ * Copyright (c) 2003-2010, Mark Borgerding. All rights reserved.
33+ * This file is part of KISS FFT - https://github.com/mborgerding/kissfft
44+ *
55+ * SPDX-License-Identifier: BSD-3-Clause
66+ * See COPYING file for more information.
77+ */
88+99+#ifndef KISS_FFT_H
1010+#define KISS_FFT_H
1111+1212+#include <stdlib.h>
1313+#include <stdio.h>
1414+#include <math.h>
1515+#include <string.h>
1616+1717+// Define KISS_FFT_SHARED macro to properly export symbols
1818+#ifdef KISS_FFT_SHARED
1919+# ifdef _WIN32
2020+# ifdef KISS_FFT_BUILD
2121+# define KISS_FFT_API __declspec(dllexport)
2222+# else
2323+# define KISS_FFT_API __declspec(dllimport)
2424+# endif
2525+# else
2626+# define KISS_FFT_API __attribute__ ((visibility ("default")))
2727+# endif
2828+#else
2929+# define KISS_FFT_API
3030+#endif
3131+3232+#ifdef __cplusplus
3333+extern "C" {
3434+#endif
3535+3636+/*
3737+ ATTENTION!
3838+ If you would like a :
3939+ -- a utility that will handle the caching of fft objects
4040+ -- real-only (no imaginary time component ) FFT
4141+ -- a multi-dimensional FFT
4242+ -- a command-line utility to perform ffts
4343+ -- a command-line utility to perform fast-convolution filtering
4444+4545+ Then see kfc.h kiss_fftr.h kiss_fftnd.h fftutil.c kiss_fastfir.c
4646+ in the tools/ directory.
4747+*/
4848+4949+/* User may override KISS_FFT_MALLOC and/or KISS_FFT_FREE. */
5050+#ifdef USE_SIMD
5151+# include <xmmintrin.h>
5252+# define kiss_fft_scalar __m128
5353+# ifndef KISS_FFT_MALLOC
5454+# define KISS_FFT_MALLOC(nbytes) _mm_malloc(nbytes,16)
5555+# define KISS_FFT_ALIGN_CHECK(ptr)
5656+# define KISS_FFT_ALIGN_SIZE_UP(size) ((size + 15UL) & ~0xFUL)
5757+# endif
5858+# ifndef KISS_FFT_FREE
5959+# define KISS_FFT_FREE _mm_free
6060+# endif
6161+#else
6262+# define KISS_FFT_ALIGN_CHECK(ptr)
6363+# define KISS_FFT_ALIGN_SIZE_UP(size) (size)
6464+# ifndef KISS_FFT_MALLOC
6565+# define KISS_FFT_MALLOC malloc
6666+# endif
6767+# ifndef KISS_FFT_FREE
6868+# define KISS_FFT_FREE free
6969+# endif
7070+#endif
7171+7272+7373+#ifdef FIXED_POINT
7474+#include <stdint.h>
7575+# if (FIXED_POINT == 32)
7676+# define kiss_fft_scalar int32_t
7777+# else
7878+# define kiss_fft_scalar int16_t
7979+# endif
8080+#else
8181+# ifndef kiss_fft_scalar
8282+/* default is float */
8383+# define kiss_fft_scalar float
8484+# endif
8585+#endif
8686+8787+typedef struct {
8888+ kiss_fft_scalar r;
8989+ kiss_fft_scalar i;
9090+}kiss_fft_cpx;
9191+9292+typedef struct kiss_fft_state* kiss_fft_cfg;
9393+9494+/*
9595+ * kiss_fft_alloc
9696+ *
9797+ * Initialize a FFT (or IFFT) algorithm's cfg/state buffer.
9898+ *
9999+ * typical usage: kiss_fft_cfg mycfg=kiss_fft_alloc(1024,0,NULL,NULL);
100100+ *
101101+ * The return value from fft_alloc is a cfg buffer used internally
102102+ * by the fft routine or NULL.
103103+ *
104104+ * If lenmem is NULL, then kiss_fft_alloc will allocate a cfg buffer using malloc.
105105+ * The returned value should be free()d when done to avoid memory leaks.
106106+ *
107107+ * The state can be placed in a user supplied buffer 'mem':
108108+ * If lenmem is not NULL and mem is not NULL and *lenmem is large enough,
109109+ * then the function places the cfg in mem and the size used in *lenmem
110110+ * and returns mem.
111111+ *
112112+ * If lenmem is not NULL and ( mem is NULL or *lenmem is not large enough),
113113+ * then the function returns NULL and places the minimum cfg
114114+ * buffer size in *lenmem.
115115+ * */
116116+117117+kiss_fft_cfg KISS_FFT_API kiss_fft_alloc(int nfft,int inverse_fft,void * mem,size_t * lenmem);
118118+119119+/*
120120+ * kiss_fft(cfg,in_out_buf)
121121+ *
122122+ * Perform an FFT on a complex input buffer.
123123+ * for a forward FFT,
124124+ * fin should be f[0] , f[1] , ... ,f[nfft-1]
125125+ * fout will be F[0] , F[1] , ... ,F[nfft-1]
126126+ * Note that each element is complex and can be accessed like
127127+ f[k].r and f[k].i
128128+ * */
129129+void KISS_FFT_API kiss_fft(kiss_fft_cfg cfg,const kiss_fft_cpx *fin,kiss_fft_cpx *fout);
130130+131131+/*
132132+ A more generic version of the above function. It reads its input from every Nth sample.
133133+ * */
134134+void KISS_FFT_API kiss_fft_stride(kiss_fft_cfg cfg,const kiss_fft_cpx *fin,kiss_fft_cpx *fout,int fin_stride);
135135+136136+/* If kiss_fft_alloc allocated a buffer, it is one contiguous
137137+ buffer and can be simply free()d when no longer needed*/
138138+#define kiss_fft_free KISS_FFT_FREE
139139+140140+/*
141141+ Cleans up some memory that gets managed internally. Not necessary to call, but it might clean up
142142+ your compiler output to call this before you exit.
143143+*/
144144+void KISS_FFT_API kiss_fft_cleanup(void);
145145+146146+147147+/*
148148+ * Returns the smallest integer k, such that k>=n and k has only "fast" factors (2,3,5)
149149+ */
150150+int KISS_FFT_API kiss_fft_next_fast_size(int n);
151151+152152+/* for real ffts, we need an even size */
153153+#define kiss_fftr_next_fast_size_real(n) \
154154+ (kiss_fft_next_fast_size( ((n)+1)>>1)<<1)
155155+156156+#ifdef __cplusplus
157157+}
158158+#endif
159159+160160+#endif
+36
src/ext/kiss_fft_log.h
···11+/*
22+ * Copyright (c) 2003-2010, Mark Borgerding. All rights reserved.
33+ * This file is part of KISS FFT - https://github.com/mborgerding/kissfft
44+ *
55+ * SPDX-License-Identifier: BSD-3-Clause
66+ * See COPYING file for more information.
77+ */
88+99+#ifndef kiss_fft_log_h
1010+#define kiss_fft_log_h
1111+1212+#define ERROR 1
1313+#define WARNING 2
1414+#define INFO 3
1515+#define DEBUG 4
1616+1717+#define STRINGIFY(x) #x
1818+#define TOSTRING(x) STRINGIFY(x)
1919+2020+#if defined(NDEBUG)
2121+# define KISS_FFT_LOG_MSG(severity, ...) ((void)0)
2222+#else
2323+# define KISS_FFT_LOG_MSG(severity, ...) \
2424+ fprintf(stderr, "[" #severity "] " __FILE__ ":" TOSTRING(__LINE__) " "); \
2525+ fprintf(stderr, __VA_ARGS__); \
2626+ fprintf(stderr, "\n")
2727+#endif
2828+2929+#define KISS_FFT_ERROR(...) KISS_FFT_LOG_MSG(ERROR, __VA_ARGS__)
3030+#define KISS_FFT_WARNING(...) KISS_FFT_LOG_MSG(WARNING, __VA_ARGS__)
3131+#define KISS_FFT_INFO(...) KISS_FFT_LOG_MSG(INFO, __VA_ARGS__)
3232+#define KISS_FFT_DEBUG(...) KISS_FFT_LOG_MSG(DEBUG, __VA_ARGS__)
3333+3434+3535+3636+#endif /* kiss_fft_log_h */
+155-154
src/ext/kiss_fftr.c
···11-/*
22- * Copyright (c) 2003-2004, Mark Borgerding. All rights reserved.
33- * This file is part of KISS FFT - https://github.com/mborgerding/kissfft
44- *
55- * SPDX-License-Identifier: BSD-3-Clause
66- * See COPYING file for more information.
77- */
88-99-#include "kiss_fftr.h"
1010-#include "_kiss_fft_guts.h"
1111-1212-struct kiss_fftr_state {
1313- kiss_fft_cfg substate;
1414- kiss_fft_cpx* tmpbuf;
1515- kiss_fft_cpx* super_twiddles;
1616-#ifdef USE_SIMD
1717- void* pad;
1818-#endif
1919-};
2020-2121-kiss_fftr_cfg kiss_fftr_alloc(int nfft, int inverse_fft, void* mem, size_t* lenmem)
2222-{
2323- int i;
2424- kiss_fftr_cfg st = NULL;
2525- size_t subsize, memneeded;
2626-2727- if (nfft & 1) {
2828- fprintf(stderr, "Real FFT optimization must be even.\n");
2929- return NULL;
3030- }
3131- nfft >>= 1;
3232-3333- kiss_fft_alloc(nfft, inverse_fft, NULL, &subsize);
3434- memneeded = sizeof(struct kiss_fftr_state) + subsize + sizeof(kiss_fft_cpx) * (nfft * 3 / 2);
3535-3636- if (lenmem == NULL) {
3737- st = (kiss_fftr_cfg)KISS_FFT_MALLOC(memneeded);
3838- }
3939- else {
4040- if (*lenmem >= memneeded)
4141- st = (kiss_fftr_cfg)mem;
4242- *lenmem = memneeded;
4343- }
4444- if (!st)
4545- return NULL;
4646-4747- st->substate = (kiss_fft_cfg)(st + 1); /*just beyond kiss_fftr_state struct */
4848- st->tmpbuf = (kiss_fft_cpx*)(((char*)st->substate) + subsize);
4949- st->super_twiddles = st->tmpbuf + nfft;
5050- kiss_fft_alloc(nfft, inverse_fft, st->substate, &subsize);
5151-5252- for (i = 0; i < nfft / 2; ++i) {
5353- double phase =
5454- -3.14159265358979323846264338327 * ((double)(i + 1) / nfft + .5);
5555- if (inverse_fft)
5656- phase *= -1;
5757- kf_cexp(st->super_twiddles + i, phase);
5858- }
5959- return st;
6060-}
6161-6262-void kiss_fftr(kiss_fftr_cfg st, const kiss_fft_scalar* timedata, kiss_fft_cpx* freqdata)
6363-{
6464- /* input buffer timedata is stored row-wise */
6565- int k, ncfft;
6666- kiss_fft_cpx fpnk, fpk, f1k, f2k, tw, tdc;
6767-6868- if (st->substate->inverse) {
6969- fprintf(stderr, "kiss fft usage error: improper alloc\n");
7070- exit(1);
7171- }
7272-7373- ncfft = st->substate->nfft;
7474-7575- /*perform the parallel fft of two real signals packed in real,imag*/
7676- kiss_fft(st->substate, (const kiss_fft_cpx*)timedata, st->tmpbuf);
7777- /* The real part of the DC element of the frequency spectrum in st->tmpbuf
7878- * contains the sum of the even-numbered elements of the input time sequence
7979- * The imag part is the sum of the odd-numbered elements
8080- *
8181- * The sum of tdc.r and tdc.i is the sum of the input time sequence.
8282- * yielding DC of input time sequence
8383- * The difference of tdc.r - tdc.i is the sum of the input (dot product) [1,-1,1,-1...
8484- * yielding Nyquist bin of input time sequence
8585- */
8686-8787- tdc.r = st->tmpbuf[0].r;
8888- tdc.i = st->tmpbuf[0].i;
8989- C_FIXDIV(tdc, 2);
9090- CHECK_OVERFLOW_OP(tdc.r, +, tdc.i);
9191- CHECK_OVERFLOW_OP(tdc.r, -, tdc.i);
9292- freqdata[0].r = tdc.r + tdc.i;
9393- freqdata[ncfft].r = tdc.r - tdc.i;
9494-#ifdef USE_SIMD
9595- freqdata[ncfft].i = freqdata[0].i = _mm_set1_ps(0);
9696-#else
9797- freqdata[ncfft].i = freqdata[0].i = 0;
9898-#endif
9999-100100- for (k = 1; k <= ncfft / 2; ++k) {
101101- fpk = st->tmpbuf[k];
102102- fpnk.r = st->tmpbuf[ncfft - k].r;
103103- fpnk.i = -st->tmpbuf[ncfft - k].i;
104104- C_FIXDIV(fpk, 2);
105105- C_FIXDIV(fpnk, 2);
106106-107107- C_ADD(f1k, fpk, fpnk);
108108- C_SUB(f2k, fpk, fpnk);
109109- C_MUL(tw, f2k, st->super_twiddles[k - 1]);
110110-111111- freqdata[k].r = HALF_OF(f1k.r + tw.r);
112112- freqdata[k].i = HALF_OF(f1k.i + tw.i);
113113- freqdata[ncfft - k].r = HALF_OF(f1k.r - tw.r);
114114- freqdata[ncfft - k].i = HALF_OF(tw.i - f1k.i);
115115- }
116116-}
117117-118118-void kiss_fftri(kiss_fftr_cfg st, const kiss_fft_cpx* freqdata, kiss_fft_scalar* timedata)
119119-{
120120- /* input buffer timedata is stored row-wise */
121121- int k, ncfft;
122122-123123- if (st->substate->inverse == 0) {
124124- fprintf(stderr, "kiss fft usage error: improper alloc\n");
125125- exit(1);
126126- }
127127-128128- ncfft = st->substate->nfft;
129129-130130- st->tmpbuf[0].r = freqdata[0].r + freqdata[ncfft].r;
131131- st->tmpbuf[0].i = freqdata[0].r - freqdata[ncfft].r;
132132- C_FIXDIV(st->tmpbuf[0], 2);
133133-134134- for (k = 1; k <= ncfft / 2; ++k) {
135135- kiss_fft_cpx fk, fnkc, fek, fok, tmp;
136136- fk = freqdata[k];
137137- fnkc.r = freqdata[ncfft - k].r;
138138- fnkc.i = -freqdata[ncfft - k].i;
139139- C_FIXDIV(fk, 2);
140140- C_FIXDIV(fnkc, 2);
141141-142142- C_ADD(fek, fk, fnkc);
143143- C_SUB(tmp, fk, fnkc);
144144- C_MUL(fok, tmp, st->super_twiddles[k - 1]);
145145- C_ADD(st->tmpbuf[k], fek, fok);
146146- C_SUB(st->tmpbuf[ncfft - k], fek, fok);
147147-#ifdef USE_SIMD
148148- st->tmpbuf[ncfft - k].i *= _mm_set1_ps(-1.0);
149149-#else
150150- st->tmpbuf[ncfft - k].i *= -1;
151151-#endif
152152- }
153153- kiss_fft(st->substate, st->tmpbuf, (kiss_fft_cpx*)timedata);
154154-}
11+/*
22+ * Copyright (c) 2003-2004, Mark Borgerding. All rights reserved.
33+ * This file is part of KISS FFT - https://github.com/mborgerding/kissfft
44+ *
55+ * SPDX-License-Identifier: BSD-3-Clause
66+ * See COPYING file for more information.
77+ */
88+99+#include "kiss_fftr.h"
1010+#include "_kiss_fft_guts.h"
1111+1212+struct kiss_fftr_state{
1313+ kiss_fft_cfg substate;
1414+ kiss_fft_cpx * tmpbuf;
1515+ kiss_fft_cpx * super_twiddles;
1616+#ifdef USE_SIMD
1717+ void * pad;
1818+#endif
1919+};
2020+2121+kiss_fftr_cfg kiss_fftr_alloc(int nfft,int inverse_fft,void * mem,size_t * lenmem)
2222+{
2323+ KISS_FFT_ALIGN_CHECK(mem)
2424+2525+ int i;
2626+ kiss_fftr_cfg st = NULL;
2727+ size_t subsize = 0, memneeded;
2828+2929+ if (nfft & 1) {
3030+ KISS_FFT_ERROR("Real FFT optimization must be even.");
3131+ return NULL;
3232+ }
3333+ nfft >>= 1;
3434+3535+ kiss_fft_alloc (nfft, inverse_fft, NULL, &subsize);
3636+ memneeded = sizeof(struct kiss_fftr_state) + subsize + sizeof(kiss_fft_cpx) * ( nfft * 3 / 2);
3737+3838+ if (lenmem == NULL) {
3939+ st = (kiss_fftr_cfg) KISS_FFT_MALLOC (memneeded);
4040+ } else {
4141+ if (*lenmem >= memneeded)
4242+ st = (kiss_fftr_cfg) mem;
4343+ *lenmem = memneeded;
4444+ }
4545+ if (!st)
4646+ return NULL;
4747+4848+ st->substate = (kiss_fft_cfg) (st + 1); /*just beyond kiss_fftr_state struct */
4949+ st->tmpbuf = (kiss_fft_cpx *) (((char *) st->substate) + subsize);
5050+ st->super_twiddles = st->tmpbuf + nfft;
5151+ kiss_fft_alloc(nfft, inverse_fft, st->substate, &subsize);
5252+5353+ for (i = 0; i < nfft/2; ++i) {
5454+ double phase =
5555+ -3.14159265358979323846264338327 * ((double) (i+1) / nfft + .5);
5656+ if (inverse_fft)
5757+ phase *= -1;
5858+ kf_cexp (st->super_twiddles+i,phase);
5959+ }
6060+ return st;
6161+}
6262+6363+void kiss_fftr(kiss_fftr_cfg st,const kiss_fft_scalar *timedata,kiss_fft_cpx *freqdata)
6464+{
6565+ /* input buffer timedata is stored row-wise */
6666+ int k,ncfft;
6767+ kiss_fft_cpx fpnk,fpk,f1k,f2k,tw,tdc;
6868+6969+ if ( st->substate->inverse) {
7070+ KISS_FFT_ERROR("kiss fft usage error: improper alloc");
7171+ return;/* The caller did not call the correct function */
7272+ }
7373+7474+ ncfft = st->substate->nfft;
7575+7676+ /*perform the parallel fft of two real signals packed in real,imag*/
7777+ kiss_fft( st->substate , (const kiss_fft_cpx*)timedata, st->tmpbuf );
7878+ /* The real part of the DC element of the frequency spectrum in st->tmpbuf
7979+ * contains the sum of the even-numbered elements of the input time sequence
8080+ * The imag part is the sum of the odd-numbered elements
8181+ *
8282+ * The sum of tdc.r and tdc.i is the sum of the input time sequence.
8383+ * yielding DC of input time sequence
8484+ * The difference of tdc.r - tdc.i is the sum of the input (dot product) [1,-1,1,-1...
8585+ * yielding Nyquist bin of input time sequence
8686+ */
8787+8888+ tdc.r = st->tmpbuf[0].r;
8989+ tdc.i = st->tmpbuf[0].i;
9090+ C_FIXDIV(tdc,2);
9191+ CHECK_OVERFLOW_OP(tdc.r ,+, tdc.i);
9292+ CHECK_OVERFLOW_OP(tdc.r ,-, tdc.i);
9393+ freqdata[0].r = tdc.r + tdc.i;
9494+ freqdata[ncfft].r = tdc.r - tdc.i;
9595+#ifdef USE_SIMD
9696+ freqdata[ncfft].i = freqdata[0].i = _mm_set1_ps(0);
9797+#else
9898+ freqdata[ncfft].i = freqdata[0].i = 0;
9999+#endif
100100+101101+ for ( k=1;k <= ncfft/2 ; ++k ) {
102102+ fpk = st->tmpbuf[k];
103103+ fpnk.r = st->tmpbuf[ncfft-k].r;
104104+ fpnk.i = - st->tmpbuf[ncfft-k].i;
105105+ C_FIXDIV(fpk,2);
106106+ C_FIXDIV(fpnk,2);
107107+108108+ C_ADD( f1k, fpk , fpnk );
109109+ C_SUB( f2k, fpk , fpnk );
110110+ C_MUL( tw , f2k , st->super_twiddles[k-1]);
111111+112112+ freqdata[k].r = HALF_OF(f1k.r + tw.r);
113113+ freqdata[k].i = HALF_OF(f1k.i + tw.i);
114114+ freqdata[ncfft-k].r = HALF_OF(f1k.r - tw.r);
115115+ freqdata[ncfft-k].i = HALF_OF(tw.i - f1k.i);
116116+ }
117117+}
118118+119119+void kiss_fftri(kiss_fftr_cfg st,const kiss_fft_cpx *freqdata,kiss_fft_scalar *timedata)
120120+{
121121+ /* input buffer timedata is stored row-wise */
122122+ int k, ncfft;
123123+124124+ if (st->substate->inverse == 0) {
125125+ KISS_FFT_ERROR("kiss fft usage error: improper alloc");
126126+ return;/* The caller did not call the correct function */
127127+ }
128128+129129+ ncfft = st->substate->nfft;
130130+131131+ st->tmpbuf[0].r = freqdata[0].r + freqdata[ncfft].r;
132132+ st->tmpbuf[0].i = freqdata[0].r - freqdata[ncfft].r;
133133+ C_FIXDIV(st->tmpbuf[0],2);
134134+135135+ for (k = 1; k <= ncfft / 2; ++k) {
136136+ kiss_fft_cpx fk, fnkc, fek, fok, tmp;
137137+ fk = freqdata[k];
138138+ fnkc.r = freqdata[ncfft - k].r;
139139+ fnkc.i = -freqdata[ncfft - k].i;
140140+ C_FIXDIV( fk , 2 );
141141+ C_FIXDIV( fnkc , 2 );
142142+143143+ C_ADD (fek, fk, fnkc);
144144+ C_SUB (tmp, fk, fnkc);
145145+ C_MUL (fok, tmp, st->super_twiddles[k-1]);
146146+ C_ADD (st->tmpbuf[k], fek, fok);
147147+ C_SUB (st->tmpbuf[ncfft - k], fek, fok);
148148+#ifdef USE_SIMD
149149+ st->tmpbuf[ncfft - k].i *= _mm_set1_ps(-1.0);
150150+#else
151151+ st->tmpbuf[ncfft - k].i *= -1;
152152+#endif
153153+ }
154154+ kiss_fft (st->substate, st->tmpbuf, (kiss_fft_cpx *) timedata);
155155+}
+54-54
src/ext/kiss_fftr.h
···11-/*
22- * Copyright (c) 2003-2004, Mark Borgerding. All rights reserved.
33- * This file is part of KISS FFT - https://github.com/mborgerding/kissfft
44- *
55- * SPDX-License-Identifier: BSD-3-Clause
66- * See COPYING file for more information.
77- */
88-99-#ifndef KISS_FTR_H
1010-#define KISS_FTR_H
1111-1212-#include "kiss_fft.h"
1313-#ifdef __cplusplus
1414-extern "C" {
1515-#endif
1616-1717-1818- /*
1919-2020- Real optimized version can save about 45% cpu time vs. complex fft of a real seq.
2121-2222-2323-2424- */
2525-2626- typedef struct kiss_fftr_state* kiss_fftr_cfg;
2727-2828-2929- kiss_fftr_cfg kiss_fftr_alloc(int nfft, int inverse_fft, void* mem, size_t* lenmem);
3030- /*
3131- nfft must be even
3232-3333- If you don't care to allocate space, use mem = lenmem = NULL
3434- */
3535-3636-3737- void kiss_fftr(kiss_fftr_cfg cfg, const kiss_fft_scalar* timedata, kiss_fft_cpx* freqdata);
3838- /*
3939- input timedata has nfft scalar points
4040- output freqdata has nfft/2+1 complex points
4141- */
4242-4343- void kiss_fftri(kiss_fftr_cfg cfg, const kiss_fft_cpx* freqdata, kiss_fft_scalar* timedata);
4444- /*
4545- input freqdata has nfft/2+1 complex points
4646- output timedata has nfft scalar points
4747- */
4848-4949-#define kiss_fftr_free KISS_FFT_FREE
5050-5151-#ifdef __cplusplus
5252-}
5353-#endif
5454-#endif
11+/*
22+ * Copyright (c) 2003-2004, Mark Borgerding. All rights reserved.
33+ * This file is part of KISS FFT - https://github.com/mborgerding/kissfft
44+ *
55+ * SPDX-License-Identifier: BSD-3-Clause
66+ * See COPYING file for more information.
77+ */
88+99+#ifndef KISS_FTR_H
1010+#define KISS_FTR_H
1111+1212+#include "kiss_fft.h"
1313+#ifdef __cplusplus
1414+extern "C" {
1515+#endif
1616+1717+1818+/*
1919+2020+ Real optimized version can save about 45% cpu time vs. complex fft of a real seq.
2121+2222+2323+2424+ */
2525+2626+typedef struct kiss_fftr_state *kiss_fftr_cfg;
2727+2828+2929+kiss_fftr_cfg KISS_FFT_API kiss_fftr_alloc(int nfft,int inverse_fft,void * mem, size_t * lenmem);
3030+/*
3131+ nfft must be even
3232+3333+ If you don't care to allocate space, use mem = lenmem = NULL
3434+*/
3535+3636+3737+void KISS_FFT_API kiss_fftr(kiss_fftr_cfg cfg,const kiss_fft_scalar *timedata,kiss_fft_cpx *freqdata);
3838+/*
3939+ input timedata has nfft scalar points
4040+ output freqdata has nfft/2+1 complex points
4141+*/
4242+4343+void KISS_FFT_API kiss_fftri(kiss_fftr_cfg cfg,const kiss_fft_cpx *freqdata,kiss_fft_scalar *timedata);
4444+/*
4545+ input freqdata has nfft/2+1 complex points
4646+ output timedata has nfft scalar points
4747+*/
4848+4949+#define kiss_fftr_free KISS_FFT_FREE
5050+5151+#ifdef __cplusplus
5252+}
5353+#endif
5454+#endif