this repo has no description
0
fork

Configure Feed

Select the types of activity you want to include in your feed.

update kiss_fft

alice c476bb6f 5a75cc2d

+992 -904
+167 -158
src/ext/_kiss_fft_guts.h
··· 1 - /* 2 - * Copyright (c) 2003-2010, Mark Borgerding. All rights reserved. 3 - * This file is part of KISS FFT - https://github.com/mborgerding/kissfft 4 - * 5 - * SPDX-License-Identifier: BSD-3-Clause 6 - * See COPYING file for more information. 7 - */ 8 - 9 - /* kiss_fft.h 10 - defines kiss_fft_scalar as either short or a float type 11 - and defines 12 - typedef struct { kiss_fft_scalar r; kiss_fft_scalar i; }kiss_fft_cpx; */ 13 - #include "kiss_fft.h" 14 - #include <limits.h> 15 - 16 - #define MAXFACTORS 32 17 - /* e.g. an fft of length 128 has 4 factors 18 - as far as kissfft is concerned 19 - 4*4*4*2 20 - */ 21 - 22 - struct kiss_fft_state { 23 - int nfft; 24 - int inverse; 25 - int factors[2 * MAXFACTORS]; 26 - kiss_fft_cpx twiddles[1]; 27 - }; 28 - 29 - /* 30 - Explanation of macros dealing with complex math: 31 - 32 - C_MUL(m,a,b) : m = a*b 33 - C_FIXDIV( c , div ) : if a fixed point impl., c /= div. noop otherwise 34 - C_SUB( res, a,b) : res = a - b 35 - C_SUBFROM( res , a) : res -= a 36 - C_ADDTO( res , a) : res += a 37 - * */ 38 - #ifdef FIXED_POINT 39 - #if (FIXED_POINT==32) 40 - # define FRACBITS 31 41 - # define SAMPPROD int64_t 42 - #define SAMP_MAX 2147483647 43 - #else 44 - # define FRACBITS 15 45 - # define SAMPPROD int32_t 46 - #define SAMP_MAX 32767 47 - #endif 48 - 49 - #define SAMP_MIN -SAMP_MAX 50 - 51 - #if defined(CHECK_OVERFLOW) 52 - # define CHECK_OVERFLOW_OP(a,op,b) \ 53 - if ( (SAMPPROD)(a) op (SAMPPROD)(b) > SAMP_MAX || (SAMPPROD)(a) op (SAMPPROD)(b) < SAMP_MIN ) { \ 54 - fprintf(stderr,"WARNING:overflow @ " __FILE__ "(%d): (%d " #op" %d) = %ld\n",__LINE__,(a),(b),(SAMPPROD)(a) op (SAMPPROD)(b) ); } 55 - #endif 56 - 57 - 58 - # define smul(a,b) ( (SAMPPROD)(a)*(b) ) 59 - # define sround( x ) (kiss_fft_scalar)( ( (x) + (1<<(FRACBITS-1)) ) >> FRACBITS ) 60 - 61 - # define S_MUL(a,b) sround( smul(a,b) ) 62 - 63 - # define C_MUL(m,a,b) \ 64 - do{ (m).r = sround( smul((a).r,(b).r) - smul((a).i,(b).i) ); \ 65 - (m).i = sround( smul((a).r,(b).i) + smul((a).i,(b).r) ); }while(0) 66 - 67 - # define DIVSCALAR(x,k) \ 68 - (x) = sround( smul( x, SAMP_MAX/k ) ) 69 - 70 - # define C_FIXDIV(c,div) \ 71 - do { DIVSCALAR( (c).r , div); \ 72 - DIVSCALAR( (c).i , div); }while (0) 73 - 74 - # define C_MULBYSCALAR( c, s ) \ 75 - do{ (c).r = sround( smul( (c).r , s ) ) ;\ 76 - (c).i = sround( smul( (c).i , s ) ) ; }while(0) 77 - 78 - #else /* not FIXED_POINT*/ 79 - 80 - # define S_MUL(a,b) ( (a)*(b) ) 81 - #define C_MUL(m,a,b) \ 82 - do{ (m).r = (a).r*(b).r - (a).i*(b).i;\ 83 - (m).i = (a).r*(b).i + (a).i*(b).r; }while(0) 84 - # define C_FIXDIV(c,div) /* NOOP */ 85 - # define C_MULBYSCALAR( c, s ) \ 86 - do{ (c).r *= (s);\ 87 - (c).i *= (s); }while(0) 88 - #endif 89 - 90 - #ifndef CHECK_OVERFLOW_OP 91 - # define CHECK_OVERFLOW_OP(a,op,b) /* noop */ 92 - #endif 93 - 94 - #define C_ADD( res, a,b)\ 95 - do { \ 96 - CHECK_OVERFLOW_OP((a).r,+,(b).r)\ 97 - CHECK_OVERFLOW_OP((a).i,+,(b).i)\ 98 - (res).r=(a).r+(b).r; (res).i=(a).i+(b).i; \ 99 - }while(0) 100 - #define C_SUB( res, a,b)\ 101 - do { \ 102 - CHECK_OVERFLOW_OP((a).r,-,(b).r)\ 103 - CHECK_OVERFLOW_OP((a).i,-,(b).i)\ 104 - (res).r=(a).r-(b).r; (res).i=(a).i-(b).i; \ 105 - }while(0) 106 - #define C_ADDTO( res , a)\ 107 - do { \ 108 - CHECK_OVERFLOW_OP((res).r,+,(a).r)\ 109 - CHECK_OVERFLOW_OP((res).i,+,(a).i)\ 110 - (res).r += (a).r; (res).i += (a).i;\ 111 - }while(0) 112 - 113 - #define C_SUBFROM( res , a)\ 114 - do {\ 115 - CHECK_OVERFLOW_OP((res).r,-,(a).r)\ 116 - CHECK_OVERFLOW_OP((res).i,-,(a).i)\ 117 - (res).r -= (a).r; (res).i -= (a).i; \ 118 - }while(0) 119 - 120 - 121 - #ifdef FIXED_POINT 122 - # define KISS_FFT_COS(phase) floor(.5+SAMP_MAX * cos (phase)) 123 - # define KISS_FFT_SIN(phase) floor(.5+SAMP_MAX * sin (phase)) 124 - # define HALF_OF(x) ((x)>>1) 125 - #elif defined(USE_SIMD) 126 - # define KISS_FFT_COS(phase) _mm_set1_ps( cos(phase) ) 127 - # define KISS_FFT_SIN(phase) _mm_set1_ps( sin(phase) ) 128 - # define HALF_OF(x) ((x)*_mm_set1_ps(.5)) 129 - #else 130 - # define KISS_FFT_COS(phase) (kiss_fft_scalar) cos(phase) 131 - # define KISS_FFT_SIN(phase) (kiss_fft_scalar) sin(phase) 132 - # define HALF_OF(x) ((x)*.5) 133 - #endif 134 - 135 - #define kf_cexp(x,phase) \ 136 - do{ \ 137 - (x)->r = KISS_FFT_COS(phase);\ 138 - (x)->i = KISS_FFT_SIN(phase);\ 139 - }while(0) 140 - 141 - 142 - /* a debugging function */ 143 - #define pcpx(c)\ 144 - fprintf(stderr,"%g + %gi\n",(double)((c)->r),(double)((c)->i) ) 145 - 146 - 147 - #ifdef KISS_FFT_USE_ALLOCA 148 - // define this to allow use of alloca instead of malloc for temporary buffers 149 - // Temporary buffers are used in two case: 150 - // 1. FFT sizes that have "bad" factors. i.e. not 2,3 and 5 151 - // 2. "in-place" FFTs. Notice the quotes, since kissfft does not really do an in-place transform. 152 - #include <alloca.h> 153 - #define KISS_FFT_TMP_ALLOC(nbytes) alloca(nbytes) 154 - #define KISS_FFT_TMP_FREE(ptr) 155 - #else 156 - #define KISS_FFT_TMP_ALLOC(nbytes) KISS_FFT_MALLOC(nbytes) 157 - #define KISS_FFT_TMP_FREE(ptr) KISS_FFT_FREE(ptr) 158 - #endif 1 + /* 2 + * Copyright (c) 2003-2010, Mark Borgerding. All rights reserved. 3 + * This file is part of KISS FFT - https://github.com/mborgerding/kissfft 4 + * 5 + * SPDX-License-Identifier: BSD-3-Clause 6 + * See COPYING file for more information. 7 + */ 8 + 9 + /* kiss_fft.h 10 + defines kiss_fft_scalar as either short or a float type 11 + and defines 12 + typedef struct { kiss_fft_scalar r; kiss_fft_scalar i; }kiss_fft_cpx; */ 13 + 14 + #ifndef _kiss_fft_guts_h 15 + #define _kiss_fft_guts_h 16 + 17 + #include "kiss_fft.h" 18 + #include "kiss_fft_log.h" 19 + #include <limits.h> 20 + 21 + #define MAXFACTORS 32 22 + /* e.g. an fft of length 128 has 4 factors 23 + as far as kissfft is concerned 24 + 4*4*4*2 25 + */ 26 + 27 + struct kiss_fft_state{ 28 + int nfft; 29 + int inverse; 30 + int factors[2*MAXFACTORS]; 31 + kiss_fft_cpx twiddles[1]; 32 + }; 33 + 34 + /* 35 + Explanation of macros dealing with complex math: 36 + 37 + C_MUL(m,a,b) : m = a*b 38 + C_FIXDIV( c , div ) : if a fixed point impl., c /= div. noop otherwise 39 + C_SUB( res, a,b) : res = a - b 40 + C_SUBFROM( res , a) : res -= a 41 + C_ADDTO( res , a) : res += a 42 + * */ 43 + #ifdef FIXED_POINT 44 + #include <stdint.h> 45 + #if (FIXED_POINT==32) 46 + # define FRACBITS 31 47 + # define SAMPPROD int64_t 48 + #define SAMP_MAX INT32_MAX 49 + #define SAMP_MIN INT32_MIN 50 + #else 51 + # define FRACBITS 15 52 + # define SAMPPROD int32_t 53 + #define SAMP_MAX INT16_MAX 54 + #define SAMP_MIN INT16_MIN 55 + #endif 56 + 57 + #if defined(CHECK_OVERFLOW) 58 + # define CHECK_OVERFLOW_OP(a,op,b) \ 59 + if ( (SAMPPROD)(a) op (SAMPPROD)(b) > SAMP_MAX || (SAMPPROD)(a) op (SAMPPROD)(b) < SAMP_MIN ) { \ 60 + KISS_FFT_WARNING("overflow (%d " #op" %d) = %ld", (a),(b),(SAMPPROD)(a) op (SAMPPROD)(b)); } 61 + #endif 62 + 63 + 64 + # define smul(a,b) ( (SAMPPROD)(a)*(b) ) 65 + # define sround( x ) (kiss_fft_scalar)( ( (x) + (1<<(FRACBITS-1)) ) >> FRACBITS ) 66 + 67 + # define S_MUL(a,b) sround( smul(a,b) ) 68 + 69 + # define C_MUL(m,a,b) \ 70 + do{ (m).r = sround( smul((a).r,(b).r) - smul((a).i,(b).i) ); \ 71 + (m).i = sround( smul((a).r,(b).i) + smul((a).i,(b).r) ); }while(0) 72 + 73 + # define DIVSCALAR(x,k) \ 74 + (x) = sround( smul( x, SAMP_MAX/k ) ) 75 + 76 + # define C_FIXDIV(c,div) \ 77 + do { DIVSCALAR( (c).r , div); \ 78 + DIVSCALAR( (c).i , div); }while (0) 79 + 80 + # define C_MULBYSCALAR( c, s ) \ 81 + do{ (c).r = sround( smul( (c).r , s ) ) ;\ 82 + (c).i = sround( smul( (c).i , s ) ) ; }while(0) 83 + 84 + #else /* not FIXED_POINT*/ 85 + 86 + # define S_MUL(a,b) ( (a)*(b) ) 87 + #define C_MUL(m,a,b) \ 88 + do{ (m).r = (a).r*(b).r - (a).i*(b).i;\ 89 + (m).i = (a).r*(b).i + (a).i*(b).r; }while(0) 90 + # define C_FIXDIV(c,div) /* NOOP */ 91 + # define C_MULBYSCALAR( c, s ) \ 92 + do{ (c).r *= (s);\ 93 + (c).i *= (s); }while(0) 94 + #endif 95 + 96 + #ifndef CHECK_OVERFLOW_OP 97 + # define CHECK_OVERFLOW_OP(a,op,b) /* noop */ 98 + #endif 99 + 100 + #define C_ADD( res, a,b)\ 101 + do { \ 102 + CHECK_OVERFLOW_OP((a).r,+,(b).r)\ 103 + CHECK_OVERFLOW_OP((a).i,+,(b).i)\ 104 + (res).r=(a).r+(b).r; (res).i=(a).i+(b).i; \ 105 + }while(0) 106 + #define C_SUB( res, a,b)\ 107 + do { \ 108 + CHECK_OVERFLOW_OP((a).r,-,(b).r)\ 109 + CHECK_OVERFLOW_OP((a).i,-,(b).i)\ 110 + (res).r=(a).r-(b).r; (res).i=(a).i-(b).i; \ 111 + }while(0) 112 + #define C_ADDTO( res , a)\ 113 + do { \ 114 + CHECK_OVERFLOW_OP((res).r,+,(a).r)\ 115 + CHECK_OVERFLOW_OP((res).i,+,(a).i)\ 116 + (res).r += (a).r; (res).i += (a).i;\ 117 + }while(0) 118 + 119 + #define C_SUBFROM( res , a)\ 120 + do {\ 121 + CHECK_OVERFLOW_OP((res).r,-,(a).r)\ 122 + CHECK_OVERFLOW_OP((res).i,-,(a).i)\ 123 + (res).r -= (a).r; (res).i -= (a).i; \ 124 + }while(0) 125 + 126 + 127 + #ifdef FIXED_POINT 128 + # define KISS_FFT_COS(phase) floor(.5+SAMP_MAX * cos (phase)) 129 + # define KISS_FFT_SIN(phase) floor(.5+SAMP_MAX * sin (phase)) 130 + # define HALF_OF(x) ((x)>>1) 131 + #elif defined(USE_SIMD) 132 + # define KISS_FFT_COS(phase) _mm_set1_ps( cos(phase) ) 133 + # define KISS_FFT_SIN(phase) _mm_set1_ps( sin(phase) ) 134 + # define HALF_OF(x) ((x)*_mm_set1_ps(.5)) 135 + #else 136 + # define KISS_FFT_COS(phase) (kiss_fft_scalar) cos(phase) 137 + # define KISS_FFT_SIN(phase) (kiss_fft_scalar) sin(phase) 138 + # define HALF_OF(x) ((x)*((kiss_fft_scalar).5)) 139 + #endif 140 + 141 + #define kf_cexp(x,phase) \ 142 + do{ \ 143 + (x)->r = KISS_FFT_COS(phase);\ 144 + (x)->i = KISS_FFT_SIN(phase);\ 145 + }while(0) 146 + 147 + 148 + /* a debugging function */ 149 + #define pcpx(c)\ 150 + KISS_FFT_DEBUG("%g + %gi\n",(double)((c)->r),(double)((c)->i)) 151 + 152 + 153 + #ifdef KISS_FFT_USE_ALLOCA 154 + // define this to allow use of alloca instead of malloc for temporary buffers 155 + // Temporary buffers are used in two case: 156 + // 1. FFT sizes that have "bad" factors. i.e. not 2,3 and 5 157 + // 2. "in-place" FFTs. Notice the quotes, since kissfft does not really do an in-place transform. 158 + #include <alloca.h> 159 + #define KISS_FFT_TMP_ALLOC(nbytes) alloca(nbytes) 160 + #define KISS_FFT_TMP_FREE(ptr) 161 + #else 162 + #define KISS_FFT_TMP_ALLOC(nbytes) KISS_FFT_MALLOC(nbytes) 163 + #define KISS_FFT_TMP_FREE(ptr) KISS_FFT_FREE(ptr) 164 + #endif 165 + 166 + #endif /* _kiss_fft_guts_h */ 167 +
+420 -406
src/ext/kiss_fft.c
··· 1 - /* 2 - * Copyright (c) 2003-2010, Mark Borgerding. All rights reserved. 3 - * This file is part of KISS FFT - https://github.com/mborgerding/kissfft 4 - * 5 - * SPDX-License-Identifier: BSD-3-Clause 6 - * See COPYING file for more information. 7 - */ 8 - 9 - 10 - #include "_kiss_fft_guts.h" 11 - /* The guts header contains all the multiplication and addition macros that are defined for 12 - fixed or floating point complex numbers. It also delares the kf_ internal functions. 13 - */ 14 - 15 - static void kf_bfly2( 16 - kiss_fft_cpx* Fout, 17 - const size_t fstride, 18 - const kiss_fft_cfg st, 19 - int m 20 - ) 21 - { 22 - kiss_fft_cpx* Fout2; 23 - kiss_fft_cpx* tw1 = st->twiddles; 24 - kiss_fft_cpx t; 25 - Fout2 = Fout + m; 26 - do { 27 - C_FIXDIV(*Fout, 2); C_FIXDIV(*Fout2, 2); 28 - 29 - C_MUL(t, *Fout2, *tw1); 30 - tw1 += fstride; 31 - C_SUB(*Fout2, *Fout, t); 32 - C_ADDTO(*Fout, t); 33 - ++Fout2; 34 - ++Fout; 35 - } while (--m); 36 - } 37 - 38 - static void kf_bfly4( 39 - kiss_fft_cpx* Fout, 40 - const size_t fstride, 41 - const kiss_fft_cfg st, 42 - const size_t m 43 - ) 44 - { 45 - kiss_fft_cpx* tw1, * tw2, * tw3; 46 - kiss_fft_cpx scratch[6]; 47 - size_t k = m; 48 - const size_t m2 = 2 * m; 49 - const size_t m3 = 3 * m; 50 - 51 - 52 - tw3 = tw2 = tw1 = st->twiddles; 53 - 54 - do { 55 - C_FIXDIV(*Fout, 4); C_FIXDIV(Fout[m], 4); C_FIXDIV(Fout[m2], 4); C_FIXDIV(Fout[m3], 4); 56 - 57 - C_MUL(scratch[0], Fout[m], *tw1); 58 - C_MUL(scratch[1], Fout[m2], *tw2); 59 - C_MUL(scratch[2], Fout[m3], *tw3); 60 - 61 - C_SUB(scratch[5], *Fout, scratch[1]); 62 - C_ADDTO(*Fout, scratch[1]); 63 - C_ADD(scratch[3], scratch[0], scratch[2]); 64 - C_SUB(scratch[4], scratch[0], scratch[2]); 65 - C_SUB(Fout[m2], *Fout, scratch[3]); 66 - tw1 += fstride; 67 - tw2 += fstride * 2; 68 - tw3 += fstride * 3; 69 - C_ADDTO(*Fout, scratch[3]); 70 - 71 - if (st->inverse) { 72 - Fout[m].r = scratch[5].r - scratch[4].i; 73 - Fout[m].i = scratch[5].i + scratch[4].r; 74 - Fout[m3].r = scratch[5].r + scratch[4].i; 75 - Fout[m3].i = scratch[5].i - scratch[4].r; 76 - } 77 - else { 78 - Fout[m].r = scratch[5].r + scratch[4].i; 79 - Fout[m].i = scratch[5].i - scratch[4].r; 80 - Fout[m3].r = scratch[5].r - scratch[4].i; 81 - Fout[m3].i = scratch[5].i + scratch[4].r; 82 - } 83 - ++Fout; 84 - } while (--k); 85 - } 86 - 87 - static void kf_bfly3( 88 - kiss_fft_cpx* Fout, 89 - const size_t fstride, 90 - const kiss_fft_cfg st, 91 - size_t m 92 - ) 93 - { 94 - size_t k = m; 95 - const size_t m2 = 2 * m; 96 - kiss_fft_cpx* tw1, * tw2; 97 - kiss_fft_cpx scratch[5]; 98 - kiss_fft_cpx epi3; 99 - epi3 = st->twiddles[fstride * m]; 100 - 101 - tw1 = tw2 = st->twiddles; 102 - 103 - do { 104 - C_FIXDIV(*Fout, 3); C_FIXDIV(Fout[m], 3); C_FIXDIV(Fout[m2], 3); 105 - 106 - C_MUL(scratch[1], Fout[m], *tw1); 107 - C_MUL(scratch[2], Fout[m2], *tw2); 108 - 109 - C_ADD(scratch[3], scratch[1], scratch[2]); 110 - C_SUB(scratch[0], scratch[1], scratch[2]); 111 - tw1 += fstride; 112 - tw2 += fstride * 2; 113 - 114 - Fout[m].r = Fout->r - HALF_OF(scratch[3].r); 115 - Fout[m].i = Fout->i - HALF_OF(scratch[3].i); 116 - 117 - C_MULBYSCALAR(scratch[0], epi3.i); 118 - 119 - C_ADDTO(*Fout, scratch[3]); 120 - 121 - Fout[m2].r = Fout[m].r + scratch[0].i; 122 - Fout[m2].i = Fout[m].i - scratch[0].r; 123 - 124 - Fout[m].r -= scratch[0].i; 125 - Fout[m].i += scratch[0].r; 126 - 127 - ++Fout; 128 - } while (--k); 129 - } 130 - 131 - static void kf_bfly5( 132 - kiss_fft_cpx* Fout, 133 - const size_t fstride, 134 - const kiss_fft_cfg st, 135 - int m 136 - ) 137 - { 138 - kiss_fft_cpx* Fout0, * Fout1, * Fout2, * Fout3, * Fout4; 139 - int u; 140 - kiss_fft_cpx scratch[13]; 141 - kiss_fft_cpx* twiddles = st->twiddles; 142 - kiss_fft_cpx* tw; 143 - kiss_fft_cpx ya, yb; 144 - ya = twiddles[fstride * m]; 145 - yb = twiddles[fstride * 2 * m]; 146 - 147 - Fout0 = Fout; 148 - Fout1 = Fout0 + m; 149 - Fout2 = Fout0 + 2 * m; 150 - Fout3 = Fout0 + 3 * m; 151 - Fout4 = Fout0 + 4 * m; 152 - 153 - tw = st->twiddles; 154 - for (u = 0; u < m; ++u) { 155 - C_FIXDIV(*Fout0, 5); C_FIXDIV(*Fout1, 5); C_FIXDIV(*Fout2, 5); C_FIXDIV(*Fout3, 5); C_FIXDIV(*Fout4, 5); 156 - scratch[0] = *Fout0; 157 - 158 - C_MUL(scratch[1], *Fout1, tw[u * fstride]); 159 - C_MUL(scratch[2], *Fout2, tw[2 * u * fstride]); 160 - C_MUL(scratch[3], *Fout3, tw[3 * u * fstride]); 161 - C_MUL(scratch[4], *Fout4, tw[4 * u * fstride]); 162 - 163 - C_ADD(scratch[7], scratch[1], scratch[4]); 164 - C_SUB(scratch[10], scratch[1], scratch[4]); 165 - C_ADD(scratch[8], scratch[2], scratch[3]); 166 - C_SUB(scratch[9], scratch[2], scratch[3]); 167 - 168 - Fout0->r += scratch[7].r + scratch[8].r; 169 - Fout0->i += scratch[7].i + scratch[8].i; 170 - 171 - scratch[5].r = scratch[0].r + S_MUL(scratch[7].r, ya.r) + S_MUL(scratch[8].r, yb.r); 172 - scratch[5].i = scratch[0].i + S_MUL(scratch[7].i, ya.r) + S_MUL(scratch[8].i, yb.r); 173 - 174 - scratch[6].r = S_MUL(scratch[10].i, ya.i) + S_MUL(scratch[9].i, yb.i); 175 - scratch[6].i = -S_MUL(scratch[10].r, ya.i) - S_MUL(scratch[9].r, yb.i); 176 - 177 - C_SUB(*Fout1, scratch[5], scratch[6]); 178 - C_ADD(*Fout4, scratch[5], scratch[6]); 179 - 180 - scratch[11].r = scratch[0].r + S_MUL(scratch[7].r, yb.r) + S_MUL(scratch[8].r, ya.r); 181 - scratch[11].i = scratch[0].i + S_MUL(scratch[7].i, yb.r) + S_MUL(scratch[8].i, ya.r); 182 - scratch[12].r = -S_MUL(scratch[10].i, yb.i) + S_MUL(scratch[9].i, ya.i); 183 - scratch[12].i = S_MUL(scratch[10].r, yb.i) - S_MUL(scratch[9].r, ya.i); 184 - 185 - C_ADD(*Fout2, scratch[11], scratch[12]); 186 - C_SUB(*Fout3, scratch[11], scratch[12]); 187 - 188 - ++Fout0; ++Fout1; ++Fout2; ++Fout3; ++Fout4; 189 - } 190 - } 191 - 192 - /* perform the butterfly for one stage of a mixed radix FFT */ 193 - static void kf_bfly_generic( 194 - kiss_fft_cpx* Fout, 195 - const size_t fstride, 196 - const kiss_fft_cfg st, 197 - int m, 198 - int p 199 - ) 200 - { 201 - int u, k, q1, q; 202 - kiss_fft_cpx* twiddles = st->twiddles; 203 - kiss_fft_cpx t; 204 - int Norig = st->nfft; 205 - 206 - kiss_fft_cpx* scratch = (kiss_fft_cpx*)KISS_FFT_TMP_ALLOC(sizeof(kiss_fft_cpx) * p); 207 - 208 - for (u = 0; u < m; ++u) { 209 - k = u; 210 - for (q1 = 0; q1 < p; ++q1) { 211 - scratch[q1] = Fout[k]; 212 - C_FIXDIV(scratch[q1], p); 213 - k += m; 214 - } 215 - 216 - k = u; 217 - for (q1 = 0; q1 < p; ++q1) { 218 - int twidx = 0; 219 - Fout[k] = scratch[0]; 220 - for (q = 1; q < p; ++q) { 221 - twidx += fstride * k; 222 - if (twidx >= Norig) twidx -= Norig; 223 - C_MUL(t, scratch[q], twiddles[twidx]); 224 - C_ADDTO(Fout[k], t); 225 - } 226 - k += m; 227 - } 228 - } 229 - KISS_FFT_TMP_FREE(scratch); 230 - } 231 - 232 - static 233 - void kf_work( 234 - kiss_fft_cpx* Fout, 235 - const kiss_fft_cpx* f, 236 - const size_t fstride, 237 - int in_stride, 238 - int* factors, 239 - const kiss_fft_cfg st 240 - ) 241 - { 242 - kiss_fft_cpx* Fout_beg = Fout; 243 - const int p = *factors++; /* the radix */ 244 - const int m = *factors++; /* stage's fft length/p */ 245 - const kiss_fft_cpx* Fout_end = Fout + p * m; 246 - 247 - #ifdef _OPENMP 248 - // use openmp extensions at the 249 - // top-level (not recursive) 250 - if (fstride == 1 && p <= 5) 251 - { 252 - int k; 253 - 254 - // execute the p different work units in different threads 255 - # pragma omp parallel for 256 - for (k = 0; k < p; ++k) 257 - kf_work(Fout + k * m, f + fstride * in_stride * k, fstride * p, in_stride, factors, st); 258 - // all threads have joined by this point 259 - 260 - switch (p) { 261 - case 2: kf_bfly2(Fout, fstride, st, m); break; 262 - case 3: kf_bfly3(Fout, fstride, st, m); break; 263 - case 4: kf_bfly4(Fout, fstride, st, m); break; 264 - case 5: kf_bfly5(Fout, fstride, st, m); break; 265 - default: kf_bfly_generic(Fout, fstride, st, m, p); break; 266 - } 267 - return; 268 - } 269 - #endif 270 - 271 - if (m == 1) { 272 - do { 273 - *Fout = *f; 274 - f += fstride * in_stride; 275 - } while (++Fout != Fout_end); 276 - } 277 - else { 278 - do { 279 - // recursive call: 280 - // DFT of size m*p performed by doing 281 - // p instances of smaller DFTs of size m, 282 - // each one takes a decimated version of the input 283 - kf_work(Fout, f, fstride * p, in_stride, factors, st); 284 - f += fstride * in_stride; 285 - } while ((Fout += m) != Fout_end); 286 - } 287 - 288 - Fout = Fout_beg; 289 - 290 - // recombine the p smaller DFTs 291 - switch (p) { 292 - case 2: kf_bfly2(Fout, fstride, st, m); break; 293 - case 3: kf_bfly3(Fout, fstride, st, m); break; 294 - case 4: kf_bfly4(Fout, fstride, st, m); break; 295 - case 5: kf_bfly5(Fout, fstride, st, m); break; 296 - default: kf_bfly_generic(Fout, fstride, st, m, p); break; 297 - } 298 - } 299 - 300 - /* facbuf is populated by p1,m1,p2,m2, ... 301 - where 302 - p[i] * m[i] = m[i-1] 303 - m0 = n */ 304 - static 305 - void kf_factor(int n, int* facbuf) 306 - { 307 - int p = 4; 308 - double floor_sqrt; 309 - floor_sqrt = floor(sqrt((double)n)); 310 - 311 - /*factor out powers of 4, powers of 2, then any remaining primes */ 312 - do { 313 - while (n % p) { 314 - switch (p) { 315 - case 4: p = 2; break; 316 - case 2: p = 3; break; 317 - default: p += 2; break; 318 - } 319 - if (p > floor_sqrt) 320 - p = n; /* no more factors, skip to end */ 321 - } 322 - n /= p; 323 - *facbuf++ = p; 324 - *facbuf++ = n; 325 - } while (n > 1); 326 - } 327 - 328 - /* 329 - * 330 - * User-callable function to allocate all necessary storage space for the fft. 331 - * 332 - * The return value is a contiguous block of memory, allocated with malloc. As such, 333 - * It can be freed with free(), rather than a kiss_fft-specific function. 334 - * */ 335 - kiss_fft_cfg kiss_fft_alloc(int nfft, int inverse_fft, void* mem, size_t* lenmem) 336 - { 337 - kiss_fft_cfg st = NULL; 338 - size_t memneeded = sizeof(struct kiss_fft_state) 339 - + sizeof(kiss_fft_cpx) * (nfft - 1); /* twiddle factors*/ 340 - 341 - if (lenmem == NULL) { 342 - st = (kiss_fft_cfg)KISS_FFT_MALLOC(memneeded); 343 - } 344 - else { 345 - if (mem != NULL && *lenmem >= memneeded) 346 - st = (kiss_fft_cfg)mem; 347 - *lenmem = memneeded; 348 - } 349 - if (st) { 350 - int i; 351 - st->nfft = nfft; 352 - st->inverse = inverse_fft; 353 - 354 - for (i = 0; i < nfft; ++i) { 355 - const double pi = 3.141592653589793238462643383279502884197169399375105820974944; 356 - double phase = -2 * pi * i / nfft; 357 - if (st->inverse) 358 - phase *= -1; 359 - kf_cexp(st->twiddles + i, phase); 360 - } 361 - 362 - kf_factor(nfft, st->factors); 363 - } 364 - return st; 365 - } 366 - 367 - 368 - void kiss_fft_stride(kiss_fft_cfg st, const kiss_fft_cpx* fin, kiss_fft_cpx* fout, int in_stride) 369 - { 370 - if (fin == fout) { 371 - //NOTE: this is not really an in-place FFT algorithm. 372 - //It just performs an out-of-place FFT into a temp buffer 373 - kiss_fft_cpx* tmpbuf = (kiss_fft_cpx*)KISS_FFT_TMP_ALLOC(sizeof(kiss_fft_cpx) * st->nfft); 374 - kf_work(tmpbuf, fin, 1, in_stride, st->factors, st); 375 - memcpy(fout, tmpbuf, sizeof(kiss_fft_cpx) * st->nfft); 376 - KISS_FFT_TMP_FREE(tmpbuf); 377 - } 378 - else { 379 - kf_work(fout, fin, 1, in_stride, st->factors, st); 380 - } 381 - } 382 - 383 - void kiss_fft(kiss_fft_cfg cfg, const kiss_fft_cpx* fin, kiss_fft_cpx* fout) 384 - { 385 - kiss_fft_stride(cfg, fin, fout, 1); 386 - } 387 - 388 - 389 - void kiss_fft_cleanup(void) 390 - { 391 - // nothing needed any more 392 - } 393 - 394 - int kiss_fft_next_fast_size(int n) 395 - { 396 - while (1) { 397 - int m = n; 398 - while ((m % 2) == 0) m /= 2; 399 - while ((m % 3) == 0) m /= 3; 400 - while ((m % 5) == 0) m /= 5; 401 - if (m <= 1) 402 - break; /* n is completely factorable by twos, threes, and fives */ 403 - n++; 404 - } 405 - return n; 406 - } 1 + /* 2 + * Copyright (c) 2003-2010, Mark Borgerding. All rights reserved. 3 + * This file is part of KISS FFT - https://github.com/mborgerding/kissfft 4 + * 5 + * SPDX-License-Identifier: BSD-3-Clause 6 + * See COPYING file for more information. 7 + */ 8 + 9 + 10 + #include "_kiss_fft_guts.h" 11 + /* The guts header contains all the multiplication and addition macros that are defined for 12 + fixed or floating point complex numbers. It also delares the kf_ internal functions. 13 + */ 14 + 15 + static void kf_bfly2( 16 + kiss_fft_cpx * Fout, 17 + const size_t fstride, 18 + const kiss_fft_cfg st, 19 + int m 20 + ) 21 + { 22 + kiss_fft_cpx * Fout2; 23 + kiss_fft_cpx * tw1 = st->twiddles; 24 + kiss_fft_cpx t; 25 + Fout2 = Fout + m; 26 + do{ 27 + C_FIXDIV(*Fout,2); C_FIXDIV(*Fout2,2); 28 + 29 + C_MUL (t, *Fout2 , *tw1); 30 + tw1 += fstride; 31 + C_SUB( *Fout2 , *Fout , t ); 32 + C_ADDTO( *Fout , t ); 33 + ++Fout2; 34 + ++Fout; 35 + }while (--m); 36 + } 37 + 38 + static void kf_bfly4( 39 + kiss_fft_cpx * Fout, 40 + const size_t fstride, 41 + const kiss_fft_cfg st, 42 + const size_t m 43 + ) 44 + { 45 + kiss_fft_cpx *tw1,*tw2,*tw3; 46 + kiss_fft_cpx scratch[6]; 47 + size_t k=m; 48 + const size_t m2=2*m; 49 + const size_t m3=3*m; 50 + 51 + 52 + tw3 = tw2 = tw1 = st->twiddles; 53 + 54 + do { 55 + C_FIXDIV(*Fout,4); C_FIXDIV(Fout[m],4); C_FIXDIV(Fout[m2],4); C_FIXDIV(Fout[m3],4); 56 + 57 + C_MUL(scratch[0],Fout[m] , *tw1 ); 58 + C_MUL(scratch[1],Fout[m2] , *tw2 ); 59 + C_MUL(scratch[2],Fout[m3] , *tw3 ); 60 + 61 + C_SUB( scratch[5] , *Fout, scratch[1] ); 62 + C_ADDTO(*Fout, scratch[1]); 63 + C_ADD( scratch[3] , scratch[0] , scratch[2] ); 64 + C_SUB( scratch[4] , scratch[0] , scratch[2] ); 65 + C_SUB( Fout[m2], *Fout, scratch[3] ); 66 + tw1 += fstride; 67 + tw2 += fstride*2; 68 + tw3 += fstride*3; 69 + C_ADDTO( *Fout , scratch[3] ); 70 + 71 + if(st->inverse) { 72 + Fout[m].r = scratch[5].r - scratch[4].i; 73 + Fout[m].i = scratch[5].i + scratch[4].r; 74 + Fout[m3].r = scratch[5].r + scratch[4].i; 75 + Fout[m3].i = scratch[5].i - scratch[4].r; 76 + }else{ 77 + Fout[m].r = scratch[5].r + scratch[4].i; 78 + Fout[m].i = scratch[5].i - scratch[4].r; 79 + Fout[m3].r = scratch[5].r - scratch[4].i; 80 + Fout[m3].i = scratch[5].i + scratch[4].r; 81 + } 82 + ++Fout; 83 + }while(--k); 84 + } 85 + 86 + static void kf_bfly3( 87 + kiss_fft_cpx * Fout, 88 + const size_t fstride, 89 + const kiss_fft_cfg st, 90 + size_t m 91 + ) 92 + { 93 + size_t k=m; 94 + const size_t m2 = 2*m; 95 + kiss_fft_cpx *tw1,*tw2; 96 + kiss_fft_cpx scratch[5]; 97 + kiss_fft_cpx epi3; 98 + epi3 = st->twiddles[fstride*m]; 99 + 100 + tw1=tw2=st->twiddles; 101 + 102 + do{ 103 + C_FIXDIV(*Fout,3); C_FIXDIV(Fout[m],3); C_FIXDIV(Fout[m2],3); 104 + 105 + C_MUL(scratch[1],Fout[m] , *tw1); 106 + C_MUL(scratch[2],Fout[m2] , *tw2); 107 + 108 + C_ADD(scratch[3],scratch[1],scratch[2]); 109 + C_SUB(scratch[0],scratch[1],scratch[2]); 110 + tw1 += fstride; 111 + tw2 += fstride*2; 112 + 113 + Fout[m].r = Fout->r - HALF_OF(scratch[3].r); 114 + Fout[m].i = Fout->i - HALF_OF(scratch[3].i); 115 + 116 + C_MULBYSCALAR( scratch[0] , epi3.i ); 117 + 118 + C_ADDTO(*Fout,scratch[3]); 119 + 120 + Fout[m2].r = Fout[m].r + scratch[0].i; 121 + Fout[m2].i = Fout[m].i - scratch[0].r; 122 + 123 + Fout[m].r -= scratch[0].i; 124 + Fout[m].i += scratch[0].r; 125 + 126 + ++Fout; 127 + }while(--k); 128 + } 129 + 130 + static void kf_bfly5( 131 + kiss_fft_cpx * Fout, 132 + const size_t fstride, 133 + const kiss_fft_cfg st, 134 + int m 135 + ) 136 + { 137 + kiss_fft_cpx *Fout0,*Fout1,*Fout2,*Fout3,*Fout4; 138 + int u; 139 + kiss_fft_cpx scratch[13]; 140 + kiss_fft_cpx * twiddles = st->twiddles; 141 + kiss_fft_cpx *tw; 142 + kiss_fft_cpx ya,yb; 143 + ya = twiddles[fstride*m]; 144 + yb = twiddles[fstride*2*m]; 145 + 146 + Fout0=Fout; 147 + Fout1=Fout0+m; 148 + Fout2=Fout0+2*m; 149 + Fout3=Fout0+3*m; 150 + Fout4=Fout0+4*m; 151 + 152 + tw=st->twiddles; 153 + for ( u=0; u<m; ++u ) { 154 + C_FIXDIV( *Fout0,5); C_FIXDIV( *Fout1,5); C_FIXDIV( *Fout2,5); C_FIXDIV( *Fout3,5); C_FIXDIV( *Fout4,5); 155 + scratch[0] = *Fout0; 156 + 157 + C_MUL(scratch[1] ,*Fout1, tw[u*fstride]); 158 + C_MUL(scratch[2] ,*Fout2, tw[2*u*fstride]); 159 + C_MUL(scratch[3] ,*Fout3, tw[3*u*fstride]); 160 + C_MUL(scratch[4] ,*Fout4, tw[4*u*fstride]); 161 + 162 + C_ADD( scratch[7],scratch[1],scratch[4]); 163 + C_SUB( scratch[10],scratch[1],scratch[4]); 164 + C_ADD( scratch[8],scratch[2],scratch[3]); 165 + C_SUB( scratch[9],scratch[2],scratch[3]); 166 + 167 + Fout0->r += scratch[7].r + scratch[8].r; 168 + Fout0->i += scratch[7].i + scratch[8].i; 169 + 170 + scratch[5].r = scratch[0].r + S_MUL(scratch[7].r,ya.r) + S_MUL(scratch[8].r,yb.r); 171 + scratch[5].i = scratch[0].i + S_MUL(scratch[7].i,ya.r) + S_MUL(scratch[8].i,yb.r); 172 + 173 + scratch[6].r = S_MUL(scratch[10].i,ya.i) + S_MUL(scratch[9].i,yb.i); 174 + scratch[6].i = -S_MUL(scratch[10].r,ya.i) - S_MUL(scratch[9].r,yb.i); 175 + 176 + C_SUB(*Fout1,scratch[5],scratch[6]); 177 + C_ADD(*Fout4,scratch[5],scratch[6]); 178 + 179 + scratch[11].r = scratch[0].r + S_MUL(scratch[7].r,yb.r) + S_MUL(scratch[8].r,ya.r); 180 + scratch[11].i = scratch[0].i + S_MUL(scratch[7].i,yb.r) + S_MUL(scratch[8].i,ya.r); 181 + scratch[12].r = - S_MUL(scratch[10].i,yb.i) + S_MUL(scratch[9].i,ya.i); 182 + scratch[12].i = S_MUL(scratch[10].r,yb.i) - S_MUL(scratch[9].r,ya.i); 183 + 184 + C_ADD(*Fout2,scratch[11],scratch[12]); 185 + C_SUB(*Fout3,scratch[11],scratch[12]); 186 + 187 + ++Fout0;++Fout1;++Fout2;++Fout3;++Fout4; 188 + } 189 + } 190 + 191 + /* perform the butterfly for one stage of a mixed radix FFT */ 192 + static void kf_bfly_generic( 193 + kiss_fft_cpx * Fout, 194 + const size_t fstride, 195 + const kiss_fft_cfg st, 196 + int m, 197 + int p 198 + ) 199 + { 200 + int u,k,q1,q; 201 + kiss_fft_cpx * twiddles = st->twiddles; 202 + kiss_fft_cpx t; 203 + int Norig = st->nfft; 204 + 205 + kiss_fft_cpx * scratch = (kiss_fft_cpx*)KISS_FFT_TMP_ALLOC(sizeof(kiss_fft_cpx)*p); 206 + if (scratch == NULL){ 207 + KISS_FFT_ERROR("Memory allocation failed."); 208 + return; 209 + } 210 + 211 + for ( u=0; u<m; ++u ) { 212 + k=u; 213 + for ( q1=0 ; q1<p ; ++q1 ) { 214 + scratch[q1] = Fout[ k ]; 215 + C_FIXDIV(scratch[q1],p); 216 + k += m; 217 + } 218 + 219 + k=u; 220 + for ( q1=0 ; q1<p ; ++q1 ) { 221 + int twidx=0; 222 + Fout[ k ] = scratch[0]; 223 + for (q=1;q<p;++q ) { 224 + twidx += fstride * k; 225 + if (twidx>=Norig) twidx-=Norig; 226 + C_MUL(t,scratch[q] , twiddles[twidx] ); 227 + C_ADDTO( Fout[ k ] ,t); 228 + } 229 + k += m; 230 + } 231 + } 232 + KISS_FFT_TMP_FREE(scratch); 233 + } 234 + 235 + static 236 + void kf_work( 237 + kiss_fft_cpx * Fout, 238 + const kiss_fft_cpx * f, 239 + const size_t fstride, 240 + int in_stride, 241 + int * factors, 242 + const kiss_fft_cfg st 243 + ) 244 + { 245 + kiss_fft_cpx * Fout_beg=Fout; 246 + const int p=*factors++; /* the radix */ 247 + const int m=*factors++; /* stage's fft length/p */ 248 + const kiss_fft_cpx * Fout_end = Fout + p*m; 249 + 250 + #ifdef _OPENMP 251 + // use openmp extensions at the 252 + // top-level (not recursive) 253 + if (fstride==1 && p<=5 && m!=1) 254 + { 255 + int k; 256 + 257 + // execute the p different work units in different threads 258 + # pragma omp parallel for 259 + for (k=0;k<p;++k) 260 + kf_work( Fout +k*m, f+ fstride*in_stride*k,fstride*p,in_stride,factors,st); 261 + // all threads have joined by this point 262 + 263 + switch (p) { 264 + case 2: kf_bfly2(Fout,fstride,st,m); break; 265 + case 3: kf_bfly3(Fout,fstride,st,m); break; 266 + case 4: kf_bfly4(Fout,fstride,st,m); break; 267 + case 5: kf_bfly5(Fout,fstride,st,m); break; 268 + default: kf_bfly_generic(Fout,fstride,st,m,p); break; 269 + } 270 + return; 271 + } 272 + #endif 273 + 274 + if (m==1) { 275 + do{ 276 + *Fout = *f; 277 + f += fstride*in_stride; 278 + }while(++Fout != Fout_end ); 279 + }else{ 280 + do{ 281 + // recursive call: 282 + // DFT of size m*p performed by doing 283 + // p instances of smaller DFTs of size m, 284 + // each one takes a decimated version of the input 285 + kf_work( Fout , f, fstride*p, in_stride, factors,st); 286 + f += fstride*in_stride; 287 + }while( (Fout += m) != Fout_end ); 288 + } 289 + 290 + Fout=Fout_beg; 291 + 292 + // recombine the p smaller DFTs 293 + switch (p) { 294 + case 2: kf_bfly2(Fout,fstride,st,m); break; 295 + case 3: kf_bfly3(Fout,fstride,st,m); break; 296 + case 4: kf_bfly4(Fout,fstride,st,m); break; 297 + case 5: kf_bfly5(Fout,fstride,st,m); break; 298 + default: kf_bfly_generic(Fout,fstride,st,m,p); break; 299 + } 300 + } 301 + 302 + /* facbuf is populated by p1,m1,p2,m2, ... 303 + where 304 + p[i] * m[i] = m[i-1] 305 + m0 = n */ 306 + static 307 + void kf_factor(int n,int * facbuf) 308 + { 309 + int p=4; 310 + double floor_sqrt; 311 + floor_sqrt = floor( sqrt((double)n) ); 312 + 313 + /*factor out powers of 4, powers of 2, then any remaining primes */ 314 + do { 315 + while (n % p) { 316 + switch (p) { 317 + case 4: p = 2; break; 318 + case 2: p = 3; break; 319 + default: p += 2; break; 320 + } 321 + if (p > floor_sqrt) 322 + p = n; /* no more factors, skip to end */ 323 + } 324 + n /= p; 325 + *facbuf++ = p; 326 + *facbuf++ = n; 327 + } while (n > 1); 328 + } 329 + 330 + /* 331 + * 332 + * User-callable function to allocate all necessary storage space for the fft. 333 + * 334 + * The return value is a contiguous block of memory, allocated with malloc. As such, 335 + * It can be freed with free(), rather than a kiss_fft-specific function. 336 + * */ 337 + kiss_fft_cfg kiss_fft_alloc(int nfft,int inverse_fft,void * mem,size_t * lenmem ) 338 + { 339 + KISS_FFT_ALIGN_CHECK(mem) 340 + 341 + kiss_fft_cfg st=NULL; 342 + size_t memneeded = KISS_FFT_ALIGN_SIZE_UP(sizeof(struct kiss_fft_state) 343 + + sizeof(kiss_fft_cpx)*(nfft-1)); /* twiddle factors*/ 344 + 345 + if ( lenmem==NULL ) { 346 + st = ( kiss_fft_cfg)KISS_FFT_MALLOC( memneeded ); 347 + }else{ 348 + if (mem != NULL && *lenmem >= memneeded) 349 + st = (kiss_fft_cfg)mem; 350 + *lenmem = memneeded; 351 + } 352 + if (st) { 353 + int i; 354 + st->nfft=nfft; 355 + st->inverse = inverse_fft; 356 + 357 + for (i=0;i<nfft;++i) { 358 + const double pi=3.141592653589793238462643383279502884197169399375105820974944; 359 + double phase = -2*pi*i / nfft; 360 + if (st->inverse) 361 + phase *= -1; 362 + kf_cexp(st->twiddles+i, phase ); 363 + } 364 + 365 + kf_factor(nfft,st->factors); 366 + } 367 + return st; 368 + } 369 + 370 + 371 + void kiss_fft_stride(kiss_fft_cfg st,const kiss_fft_cpx *fin,kiss_fft_cpx *fout,int in_stride) 372 + { 373 + if (fin == fout) { 374 + //NOTE: this is not really an in-place FFT algorithm. 375 + //It just performs an out-of-place FFT into a temp buffer 376 + if (fout == NULL){ 377 + KISS_FFT_ERROR("fout buffer NULL."); 378 + return; 379 + } 380 + 381 + kiss_fft_cpx * tmpbuf = (kiss_fft_cpx*)KISS_FFT_TMP_ALLOC( sizeof(kiss_fft_cpx)*st->nfft); 382 + if (tmpbuf == NULL){ 383 + KISS_FFT_ERROR("Memory allocation error."); 384 + return; 385 + } 386 + 387 + 388 + 389 + kf_work(tmpbuf,fin,1,in_stride, st->factors,st); 390 + memcpy(fout,tmpbuf,sizeof(kiss_fft_cpx)*st->nfft); 391 + KISS_FFT_TMP_FREE(tmpbuf); 392 + }else{ 393 + kf_work( fout, fin, 1,in_stride, st->factors,st ); 394 + } 395 + } 396 + 397 + void kiss_fft(kiss_fft_cfg cfg,const kiss_fft_cpx *fin,kiss_fft_cpx *fout) 398 + { 399 + kiss_fft_stride(cfg,fin,fout,1); 400 + } 401 + 402 + 403 + void kiss_fft_cleanup(void) 404 + { 405 + // nothing needed any more 406 + } 407 + 408 + int kiss_fft_next_fast_size(int n) 409 + { 410 + while(1) { 411 + int m=n; 412 + while ( (m%2) == 0 ) m/=2; 413 + while ( (m%3) == 0 ) m/=3; 414 + while ( (m%5) == 0 ) m/=5; 415 + if (m<=1) 416 + break; /* n is completely factorable by twos, threes, and fives */ 417 + n++; 418 + } 419 + return n; 420 + }
+160 -132
src/ext/kiss_fft.h
··· 1 - /* 2 - * Copyright (c) 2003-2010, Mark Borgerding. All rights reserved. 3 - * This file is part of KISS FFT - https://github.com/mborgerding/kissfft 4 - * 5 - * SPDX-License-Identifier: BSD-3-Clause 6 - * See COPYING file for more information. 7 - */ 8 - 9 - #ifndef KISS_FFT_H 10 - #define KISS_FFT_H 11 - 12 - #include <stdlib.h> 13 - #include <stdio.h> 14 - #include <math.h> 15 - #include <string.h> 16 - 17 - #ifdef __cplusplus 18 - extern "C" { 19 - #endif 20 - 21 - /* 22 - ATTENTION! 23 - If you would like a : 24 - -- a utility that will handle the caching of fft objects 25 - -- real-only (no imaginary time component ) FFT 26 - -- a multi-dimensional FFT 27 - -- a command-line utility to perform ffts 28 - -- a command-line utility to perform fast-convolution filtering 29 - 30 - Then see kfc.h kiss_fftr.h kiss_fftnd.h fftutil.c kiss_fastfir.c 31 - in the tools/ directory. 32 - */ 33 - 34 - #ifdef USE_SIMD 35 - # include <xmmintrin.h> 36 - # define kiss_fft_scalar __m128 37 - #define KISS_FFT_MALLOC(nbytes) _mm_malloc(nbytes,16) 38 - #define KISS_FFT_FREE _mm_free 39 - #else 40 - #define KISS_FFT_MALLOC malloc 41 - #define KISS_FFT_FREE free 42 - #endif 43 - 44 - 45 - #ifdef FIXED_POINT 46 - #include <sys/types.h> 47 - # if (FIXED_POINT == 32) 48 - # define kiss_fft_scalar int32_t 49 - # else 50 - # define kiss_fft_scalar int16_t 51 - # endif 52 - #else 53 - # ifndef kiss_fft_scalar 54 - /* default is float */ 55 - # define kiss_fft_scalar float 56 - # endif 57 - #endif 58 - 59 - typedef struct { 60 - kiss_fft_scalar r; 61 - kiss_fft_scalar i; 62 - }kiss_fft_cpx; 63 - 64 - typedef struct kiss_fft_state* kiss_fft_cfg; 65 - 66 - /* 67 - * kiss_fft_alloc 68 - * 69 - * Initialize a FFT (or IFFT) algorithm's cfg/state buffer. 70 - * 71 - * typical usage: kiss_fft_cfg mycfg=kiss_fft_alloc(1024,0,NULL,NULL); 72 - * 73 - * The return value from fft_alloc is a cfg buffer used internally 74 - * by the fft routine or NULL. 75 - * 76 - * If lenmem is NULL, then kiss_fft_alloc will allocate a cfg buffer using malloc. 77 - * The returned value should be free()d when done to avoid memory leaks. 78 - * 79 - * The state can be placed in a user supplied buffer 'mem': 80 - * If lenmem is not NULL and mem is not NULL and *lenmem is large enough, 81 - * then the function places the cfg in mem and the size used in *lenmem 82 - * and returns mem. 83 - * 84 - * If lenmem is not NULL and ( mem is NULL or *lenmem is not large enough), 85 - * then the function returns NULL and places the minimum cfg 86 - * buffer size in *lenmem. 87 - * */ 88 - 89 - kiss_fft_cfg kiss_fft_alloc(int nfft, int inverse_fft, void* mem, size_t* lenmem); 90 - 91 - /* 92 - * kiss_fft(cfg,in_out_buf) 93 - * 94 - * Perform an FFT on a complex input buffer. 95 - * for a forward FFT, 96 - * fin should be f[0] , f[1] , ... ,f[nfft-1] 97 - * fout will be F[0] , F[1] , ... ,F[nfft-1] 98 - * Note that each element is complex and can be accessed like 99 - f[k].r and f[k].i 100 - * */ 101 - void kiss_fft(kiss_fft_cfg cfg, const kiss_fft_cpx* fin, kiss_fft_cpx* fout); 102 - 103 - /* 104 - A more generic version of the above function. It reads its input from every Nth sample. 105 - * */ 106 - void kiss_fft_stride(kiss_fft_cfg cfg, const kiss_fft_cpx* fin, kiss_fft_cpx* fout, int fin_stride); 107 - 108 - /* If kiss_fft_alloc allocated a buffer, it is one contiguous 109 - buffer and can be simply free()d when no longer needed*/ 110 - #define kiss_fft_free KISS_FFT_FREE 111 - 112 - /* 113 - Cleans up some memory that gets managed internally. Not necessary to call, but it might clean up 114 - your compiler output to call this before you exit. 115 - */ 116 - void kiss_fft_cleanup(void); 117 - 118 - 119 - /* 120 - * Returns the smallest integer k, such that k>=n and k has only "fast" factors (2,3,5) 121 - */ 122 - int kiss_fft_next_fast_size(int n); 123 - 124 - /* for real ffts, we need an even size */ 125 - #define kiss_fftr_next_fast_size_real(n) \ 126 - (kiss_fft_next_fast_size( ((n)+1)>>1)<<1) 127 - 128 - #ifdef __cplusplus 129 - } 130 - #endif 131 - 132 - #endif 1 + /* 2 + * Copyright (c) 2003-2010, Mark Borgerding. All rights reserved. 3 + * This file is part of KISS FFT - https://github.com/mborgerding/kissfft 4 + * 5 + * SPDX-License-Identifier: BSD-3-Clause 6 + * See COPYING file for more information. 7 + */ 8 + 9 + #ifndef KISS_FFT_H 10 + #define KISS_FFT_H 11 + 12 + #include <stdlib.h> 13 + #include <stdio.h> 14 + #include <math.h> 15 + #include <string.h> 16 + 17 + // Define KISS_FFT_SHARED macro to properly export symbols 18 + #ifdef KISS_FFT_SHARED 19 + # ifdef _WIN32 20 + # ifdef KISS_FFT_BUILD 21 + # define KISS_FFT_API __declspec(dllexport) 22 + # else 23 + # define KISS_FFT_API __declspec(dllimport) 24 + # endif 25 + # else 26 + # define KISS_FFT_API __attribute__ ((visibility ("default"))) 27 + # endif 28 + #else 29 + # define KISS_FFT_API 30 + #endif 31 + 32 + #ifdef __cplusplus 33 + extern "C" { 34 + #endif 35 + 36 + /* 37 + ATTENTION! 38 + If you would like a : 39 + -- a utility that will handle the caching of fft objects 40 + -- real-only (no imaginary time component ) FFT 41 + -- a multi-dimensional FFT 42 + -- a command-line utility to perform ffts 43 + -- a command-line utility to perform fast-convolution filtering 44 + 45 + Then see kfc.h kiss_fftr.h kiss_fftnd.h fftutil.c kiss_fastfir.c 46 + in the tools/ directory. 47 + */ 48 + 49 + /* User may override KISS_FFT_MALLOC and/or KISS_FFT_FREE. */ 50 + #ifdef USE_SIMD 51 + # include <xmmintrin.h> 52 + # define kiss_fft_scalar __m128 53 + # ifndef KISS_FFT_MALLOC 54 + # define KISS_FFT_MALLOC(nbytes) _mm_malloc(nbytes,16) 55 + # define KISS_FFT_ALIGN_CHECK(ptr) 56 + # define KISS_FFT_ALIGN_SIZE_UP(size) ((size + 15UL) & ~0xFUL) 57 + # endif 58 + # ifndef KISS_FFT_FREE 59 + # define KISS_FFT_FREE _mm_free 60 + # endif 61 + #else 62 + # define KISS_FFT_ALIGN_CHECK(ptr) 63 + # define KISS_FFT_ALIGN_SIZE_UP(size) (size) 64 + # ifndef KISS_FFT_MALLOC 65 + # define KISS_FFT_MALLOC malloc 66 + # endif 67 + # ifndef KISS_FFT_FREE 68 + # define KISS_FFT_FREE free 69 + # endif 70 + #endif 71 + 72 + 73 + #ifdef FIXED_POINT 74 + #include <stdint.h> 75 + # if (FIXED_POINT == 32) 76 + # define kiss_fft_scalar int32_t 77 + # else 78 + # define kiss_fft_scalar int16_t 79 + # endif 80 + #else 81 + # ifndef kiss_fft_scalar 82 + /* default is float */ 83 + # define kiss_fft_scalar float 84 + # endif 85 + #endif 86 + 87 + typedef struct { 88 + kiss_fft_scalar r; 89 + kiss_fft_scalar i; 90 + }kiss_fft_cpx; 91 + 92 + typedef struct kiss_fft_state* kiss_fft_cfg; 93 + 94 + /* 95 + * kiss_fft_alloc 96 + * 97 + * Initialize a FFT (or IFFT) algorithm's cfg/state buffer. 98 + * 99 + * typical usage: kiss_fft_cfg mycfg=kiss_fft_alloc(1024,0,NULL,NULL); 100 + * 101 + * The return value from fft_alloc is a cfg buffer used internally 102 + * by the fft routine or NULL. 103 + * 104 + * If lenmem is NULL, then kiss_fft_alloc will allocate a cfg buffer using malloc. 105 + * The returned value should be free()d when done to avoid memory leaks. 106 + * 107 + * The state can be placed in a user supplied buffer 'mem': 108 + * If lenmem is not NULL and mem is not NULL and *lenmem is large enough, 109 + * then the function places the cfg in mem and the size used in *lenmem 110 + * and returns mem. 111 + * 112 + * If lenmem is not NULL and ( mem is NULL or *lenmem is not large enough), 113 + * then the function returns NULL and places the minimum cfg 114 + * buffer size in *lenmem. 115 + * */ 116 + 117 + kiss_fft_cfg KISS_FFT_API kiss_fft_alloc(int nfft,int inverse_fft,void * mem,size_t * lenmem); 118 + 119 + /* 120 + * kiss_fft(cfg,in_out_buf) 121 + * 122 + * Perform an FFT on a complex input buffer. 123 + * for a forward FFT, 124 + * fin should be f[0] , f[1] , ... ,f[nfft-1] 125 + * fout will be F[0] , F[1] , ... ,F[nfft-1] 126 + * Note that each element is complex and can be accessed like 127 + f[k].r and f[k].i 128 + * */ 129 + void KISS_FFT_API kiss_fft(kiss_fft_cfg cfg,const kiss_fft_cpx *fin,kiss_fft_cpx *fout); 130 + 131 + /* 132 + A more generic version of the above function. It reads its input from every Nth sample. 133 + * */ 134 + void KISS_FFT_API kiss_fft_stride(kiss_fft_cfg cfg,const kiss_fft_cpx *fin,kiss_fft_cpx *fout,int fin_stride); 135 + 136 + /* If kiss_fft_alloc allocated a buffer, it is one contiguous 137 + buffer and can be simply free()d when no longer needed*/ 138 + #define kiss_fft_free KISS_FFT_FREE 139 + 140 + /* 141 + Cleans up some memory that gets managed internally. Not necessary to call, but it might clean up 142 + your compiler output to call this before you exit. 143 + */ 144 + void KISS_FFT_API kiss_fft_cleanup(void); 145 + 146 + 147 + /* 148 + * Returns the smallest integer k, such that k>=n and k has only "fast" factors (2,3,5) 149 + */ 150 + int KISS_FFT_API kiss_fft_next_fast_size(int n); 151 + 152 + /* for real ffts, we need an even size */ 153 + #define kiss_fftr_next_fast_size_real(n) \ 154 + (kiss_fft_next_fast_size( ((n)+1)>>1)<<1) 155 + 156 + #ifdef __cplusplus 157 + } 158 + #endif 159 + 160 + #endif
+36
src/ext/kiss_fft_log.h
··· 1 + /* 2 + * Copyright (c) 2003-2010, Mark Borgerding. All rights reserved. 3 + * This file is part of KISS FFT - https://github.com/mborgerding/kissfft 4 + * 5 + * SPDX-License-Identifier: BSD-3-Clause 6 + * See COPYING file for more information. 7 + */ 8 + 9 + #ifndef kiss_fft_log_h 10 + #define kiss_fft_log_h 11 + 12 + #define ERROR 1 13 + #define WARNING 2 14 + #define INFO 3 15 + #define DEBUG 4 16 + 17 + #define STRINGIFY(x) #x 18 + #define TOSTRING(x) STRINGIFY(x) 19 + 20 + #if defined(NDEBUG) 21 + # define KISS_FFT_LOG_MSG(severity, ...) ((void)0) 22 + #else 23 + # define KISS_FFT_LOG_MSG(severity, ...) \ 24 + fprintf(stderr, "[" #severity "] " __FILE__ ":" TOSTRING(__LINE__) " "); \ 25 + fprintf(stderr, __VA_ARGS__); \ 26 + fprintf(stderr, "\n") 27 + #endif 28 + 29 + #define KISS_FFT_ERROR(...) KISS_FFT_LOG_MSG(ERROR, __VA_ARGS__) 30 + #define KISS_FFT_WARNING(...) KISS_FFT_LOG_MSG(WARNING, __VA_ARGS__) 31 + #define KISS_FFT_INFO(...) KISS_FFT_LOG_MSG(INFO, __VA_ARGS__) 32 + #define KISS_FFT_DEBUG(...) KISS_FFT_LOG_MSG(DEBUG, __VA_ARGS__) 33 + 34 + 35 + 36 + #endif /* kiss_fft_log_h */
+155 -154
src/ext/kiss_fftr.c
··· 1 - /* 2 - * Copyright (c) 2003-2004, Mark Borgerding. All rights reserved. 3 - * This file is part of KISS FFT - https://github.com/mborgerding/kissfft 4 - * 5 - * SPDX-License-Identifier: BSD-3-Clause 6 - * See COPYING file for more information. 7 - */ 8 - 9 - #include "kiss_fftr.h" 10 - #include "_kiss_fft_guts.h" 11 - 12 - struct kiss_fftr_state { 13 - kiss_fft_cfg substate; 14 - kiss_fft_cpx* tmpbuf; 15 - kiss_fft_cpx* super_twiddles; 16 - #ifdef USE_SIMD 17 - void* pad; 18 - #endif 19 - }; 20 - 21 - kiss_fftr_cfg kiss_fftr_alloc(int nfft, int inverse_fft, void* mem, size_t* lenmem) 22 - { 23 - int i; 24 - kiss_fftr_cfg st = NULL; 25 - size_t subsize, memneeded; 26 - 27 - if (nfft & 1) { 28 - fprintf(stderr, "Real FFT optimization must be even.\n"); 29 - return NULL; 30 - } 31 - nfft >>= 1; 32 - 33 - kiss_fft_alloc(nfft, inverse_fft, NULL, &subsize); 34 - memneeded = sizeof(struct kiss_fftr_state) + subsize + sizeof(kiss_fft_cpx) * (nfft * 3 / 2); 35 - 36 - if (lenmem == NULL) { 37 - st = (kiss_fftr_cfg)KISS_FFT_MALLOC(memneeded); 38 - } 39 - else { 40 - if (*lenmem >= memneeded) 41 - st = (kiss_fftr_cfg)mem; 42 - *lenmem = memneeded; 43 - } 44 - if (!st) 45 - return NULL; 46 - 47 - st->substate = (kiss_fft_cfg)(st + 1); /*just beyond kiss_fftr_state struct */ 48 - st->tmpbuf = (kiss_fft_cpx*)(((char*)st->substate) + subsize); 49 - st->super_twiddles = st->tmpbuf + nfft; 50 - kiss_fft_alloc(nfft, inverse_fft, st->substate, &subsize); 51 - 52 - for (i = 0; i < nfft / 2; ++i) { 53 - double phase = 54 - -3.14159265358979323846264338327 * ((double)(i + 1) / nfft + .5); 55 - if (inverse_fft) 56 - phase *= -1; 57 - kf_cexp(st->super_twiddles + i, phase); 58 - } 59 - return st; 60 - } 61 - 62 - void kiss_fftr(kiss_fftr_cfg st, const kiss_fft_scalar* timedata, kiss_fft_cpx* freqdata) 63 - { 64 - /* input buffer timedata is stored row-wise */ 65 - int k, ncfft; 66 - kiss_fft_cpx fpnk, fpk, f1k, f2k, tw, tdc; 67 - 68 - if (st->substate->inverse) { 69 - fprintf(stderr, "kiss fft usage error: improper alloc\n"); 70 - exit(1); 71 - } 72 - 73 - ncfft = st->substate->nfft; 74 - 75 - /*perform the parallel fft of two real signals packed in real,imag*/ 76 - kiss_fft(st->substate, (const kiss_fft_cpx*)timedata, st->tmpbuf); 77 - /* The real part of the DC element of the frequency spectrum in st->tmpbuf 78 - * contains the sum of the even-numbered elements of the input time sequence 79 - * The imag part is the sum of the odd-numbered elements 80 - * 81 - * The sum of tdc.r and tdc.i is the sum of the input time sequence. 82 - * yielding DC of input time sequence 83 - * The difference of tdc.r - tdc.i is the sum of the input (dot product) [1,-1,1,-1... 84 - * yielding Nyquist bin of input time sequence 85 - */ 86 - 87 - tdc.r = st->tmpbuf[0].r; 88 - tdc.i = st->tmpbuf[0].i; 89 - C_FIXDIV(tdc, 2); 90 - CHECK_OVERFLOW_OP(tdc.r, +, tdc.i); 91 - CHECK_OVERFLOW_OP(tdc.r, -, tdc.i); 92 - freqdata[0].r = tdc.r + tdc.i; 93 - freqdata[ncfft].r = tdc.r - tdc.i; 94 - #ifdef USE_SIMD 95 - freqdata[ncfft].i = freqdata[0].i = _mm_set1_ps(0); 96 - #else 97 - freqdata[ncfft].i = freqdata[0].i = 0; 98 - #endif 99 - 100 - for (k = 1; k <= ncfft / 2; ++k) { 101 - fpk = st->tmpbuf[k]; 102 - fpnk.r = st->tmpbuf[ncfft - k].r; 103 - fpnk.i = -st->tmpbuf[ncfft - k].i; 104 - C_FIXDIV(fpk, 2); 105 - C_FIXDIV(fpnk, 2); 106 - 107 - C_ADD(f1k, fpk, fpnk); 108 - C_SUB(f2k, fpk, fpnk); 109 - C_MUL(tw, f2k, st->super_twiddles[k - 1]); 110 - 111 - freqdata[k].r = HALF_OF(f1k.r + tw.r); 112 - freqdata[k].i = HALF_OF(f1k.i + tw.i); 113 - freqdata[ncfft - k].r = HALF_OF(f1k.r - tw.r); 114 - freqdata[ncfft - k].i = HALF_OF(tw.i - f1k.i); 115 - } 116 - } 117 - 118 - void kiss_fftri(kiss_fftr_cfg st, const kiss_fft_cpx* freqdata, kiss_fft_scalar* timedata) 119 - { 120 - /* input buffer timedata is stored row-wise */ 121 - int k, ncfft; 122 - 123 - if (st->substate->inverse == 0) { 124 - fprintf(stderr, "kiss fft usage error: improper alloc\n"); 125 - exit(1); 126 - } 127 - 128 - ncfft = st->substate->nfft; 129 - 130 - st->tmpbuf[0].r = freqdata[0].r + freqdata[ncfft].r; 131 - st->tmpbuf[0].i = freqdata[0].r - freqdata[ncfft].r; 132 - C_FIXDIV(st->tmpbuf[0], 2); 133 - 134 - for (k = 1; k <= ncfft / 2; ++k) { 135 - kiss_fft_cpx fk, fnkc, fek, fok, tmp; 136 - fk = freqdata[k]; 137 - fnkc.r = freqdata[ncfft - k].r; 138 - fnkc.i = -freqdata[ncfft - k].i; 139 - C_FIXDIV(fk, 2); 140 - C_FIXDIV(fnkc, 2); 141 - 142 - C_ADD(fek, fk, fnkc); 143 - C_SUB(tmp, fk, fnkc); 144 - C_MUL(fok, tmp, st->super_twiddles[k - 1]); 145 - C_ADD(st->tmpbuf[k], fek, fok); 146 - C_SUB(st->tmpbuf[ncfft - k], fek, fok); 147 - #ifdef USE_SIMD 148 - st->tmpbuf[ncfft - k].i *= _mm_set1_ps(-1.0); 149 - #else 150 - st->tmpbuf[ncfft - k].i *= -1; 151 - #endif 152 - } 153 - kiss_fft(st->substate, st->tmpbuf, (kiss_fft_cpx*)timedata); 154 - } 1 + /* 2 + * Copyright (c) 2003-2004, Mark Borgerding. All rights reserved. 3 + * This file is part of KISS FFT - https://github.com/mborgerding/kissfft 4 + * 5 + * SPDX-License-Identifier: BSD-3-Clause 6 + * See COPYING file for more information. 7 + */ 8 + 9 + #include "kiss_fftr.h" 10 + #include "_kiss_fft_guts.h" 11 + 12 + struct kiss_fftr_state{ 13 + kiss_fft_cfg substate; 14 + kiss_fft_cpx * tmpbuf; 15 + kiss_fft_cpx * super_twiddles; 16 + #ifdef USE_SIMD 17 + void * pad; 18 + #endif 19 + }; 20 + 21 + kiss_fftr_cfg kiss_fftr_alloc(int nfft,int inverse_fft,void * mem,size_t * lenmem) 22 + { 23 + KISS_FFT_ALIGN_CHECK(mem) 24 + 25 + int i; 26 + kiss_fftr_cfg st = NULL; 27 + size_t subsize = 0, memneeded; 28 + 29 + if (nfft & 1) { 30 + KISS_FFT_ERROR("Real FFT optimization must be even."); 31 + return NULL; 32 + } 33 + nfft >>= 1; 34 + 35 + kiss_fft_alloc (nfft, inverse_fft, NULL, &subsize); 36 + memneeded = sizeof(struct kiss_fftr_state) + subsize + sizeof(kiss_fft_cpx) * ( nfft * 3 / 2); 37 + 38 + if (lenmem == NULL) { 39 + st = (kiss_fftr_cfg) KISS_FFT_MALLOC (memneeded); 40 + } else { 41 + if (*lenmem >= memneeded) 42 + st = (kiss_fftr_cfg) mem; 43 + *lenmem = memneeded; 44 + } 45 + if (!st) 46 + return NULL; 47 + 48 + st->substate = (kiss_fft_cfg) (st + 1); /*just beyond kiss_fftr_state struct */ 49 + st->tmpbuf = (kiss_fft_cpx *) (((char *) st->substate) + subsize); 50 + st->super_twiddles = st->tmpbuf + nfft; 51 + kiss_fft_alloc(nfft, inverse_fft, st->substate, &subsize); 52 + 53 + for (i = 0; i < nfft/2; ++i) { 54 + double phase = 55 + -3.14159265358979323846264338327 * ((double) (i+1) / nfft + .5); 56 + if (inverse_fft) 57 + phase *= -1; 58 + kf_cexp (st->super_twiddles+i,phase); 59 + } 60 + return st; 61 + } 62 + 63 + void kiss_fftr(kiss_fftr_cfg st,const kiss_fft_scalar *timedata,kiss_fft_cpx *freqdata) 64 + { 65 + /* input buffer timedata is stored row-wise */ 66 + int k,ncfft; 67 + kiss_fft_cpx fpnk,fpk,f1k,f2k,tw,tdc; 68 + 69 + if ( st->substate->inverse) { 70 + KISS_FFT_ERROR("kiss fft usage error: improper alloc"); 71 + return;/* The caller did not call the correct function */ 72 + } 73 + 74 + ncfft = st->substate->nfft; 75 + 76 + /*perform the parallel fft of two real signals packed in real,imag*/ 77 + kiss_fft( st->substate , (const kiss_fft_cpx*)timedata, st->tmpbuf ); 78 + /* The real part of the DC element of the frequency spectrum in st->tmpbuf 79 + * contains the sum of the even-numbered elements of the input time sequence 80 + * The imag part is the sum of the odd-numbered elements 81 + * 82 + * The sum of tdc.r and tdc.i is the sum of the input time sequence. 83 + * yielding DC of input time sequence 84 + * The difference of tdc.r - tdc.i is the sum of the input (dot product) [1,-1,1,-1... 85 + * yielding Nyquist bin of input time sequence 86 + */ 87 + 88 + tdc.r = st->tmpbuf[0].r; 89 + tdc.i = st->tmpbuf[0].i; 90 + C_FIXDIV(tdc,2); 91 + CHECK_OVERFLOW_OP(tdc.r ,+, tdc.i); 92 + CHECK_OVERFLOW_OP(tdc.r ,-, tdc.i); 93 + freqdata[0].r = tdc.r + tdc.i; 94 + freqdata[ncfft].r = tdc.r - tdc.i; 95 + #ifdef USE_SIMD 96 + freqdata[ncfft].i = freqdata[0].i = _mm_set1_ps(0); 97 + #else 98 + freqdata[ncfft].i = freqdata[0].i = 0; 99 + #endif 100 + 101 + for ( k=1;k <= ncfft/2 ; ++k ) { 102 + fpk = st->tmpbuf[k]; 103 + fpnk.r = st->tmpbuf[ncfft-k].r; 104 + fpnk.i = - st->tmpbuf[ncfft-k].i; 105 + C_FIXDIV(fpk,2); 106 + C_FIXDIV(fpnk,2); 107 + 108 + C_ADD( f1k, fpk , fpnk ); 109 + C_SUB( f2k, fpk , fpnk ); 110 + C_MUL( tw , f2k , st->super_twiddles[k-1]); 111 + 112 + freqdata[k].r = HALF_OF(f1k.r + tw.r); 113 + freqdata[k].i = HALF_OF(f1k.i + tw.i); 114 + freqdata[ncfft-k].r = HALF_OF(f1k.r - tw.r); 115 + freqdata[ncfft-k].i = HALF_OF(tw.i - f1k.i); 116 + } 117 + } 118 + 119 + void kiss_fftri(kiss_fftr_cfg st,const kiss_fft_cpx *freqdata,kiss_fft_scalar *timedata) 120 + { 121 + /* input buffer timedata is stored row-wise */ 122 + int k, ncfft; 123 + 124 + if (st->substate->inverse == 0) { 125 + KISS_FFT_ERROR("kiss fft usage error: improper alloc"); 126 + return;/* The caller did not call the correct function */ 127 + } 128 + 129 + ncfft = st->substate->nfft; 130 + 131 + st->tmpbuf[0].r = freqdata[0].r + freqdata[ncfft].r; 132 + st->tmpbuf[0].i = freqdata[0].r - freqdata[ncfft].r; 133 + C_FIXDIV(st->tmpbuf[0],2); 134 + 135 + for (k = 1; k <= ncfft / 2; ++k) { 136 + kiss_fft_cpx fk, fnkc, fek, fok, tmp; 137 + fk = freqdata[k]; 138 + fnkc.r = freqdata[ncfft - k].r; 139 + fnkc.i = -freqdata[ncfft - k].i; 140 + C_FIXDIV( fk , 2 ); 141 + C_FIXDIV( fnkc , 2 ); 142 + 143 + C_ADD (fek, fk, fnkc); 144 + C_SUB (tmp, fk, fnkc); 145 + C_MUL (fok, tmp, st->super_twiddles[k-1]); 146 + C_ADD (st->tmpbuf[k], fek, fok); 147 + C_SUB (st->tmpbuf[ncfft - k], fek, fok); 148 + #ifdef USE_SIMD 149 + st->tmpbuf[ncfft - k].i *= _mm_set1_ps(-1.0); 150 + #else 151 + st->tmpbuf[ncfft - k].i *= -1; 152 + #endif 153 + } 154 + kiss_fft (st->substate, st->tmpbuf, (kiss_fft_cpx *) timedata); 155 + }
+54 -54
src/ext/kiss_fftr.h
··· 1 - /* 2 - * Copyright (c) 2003-2004, Mark Borgerding. All rights reserved. 3 - * This file is part of KISS FFT - https://github.com/mborgerding/kissfft 4 - * 5 - * SPDX-License-Identifier: BSD-3-Clause 6 - * See COPYING file for more information. 7 - */ 8 - 9 - #ifndef KISS_FTR_H 10 - #define KISS_FTR_H 11 - 12 - #include "kiss_fft.h" 13 - #ifdef __cplusplus 14 - extern "C" { 15 - #endif 16 - 17 - 18 - /* 19 - 20 - Real optimized version can save about 45% cpu time vs. complex fft of a real seq. 21 - 22 - 23 - 24 - */ 25 - 26 - typedef struct kiss_fftr_state* kiss_fftr_cfg; 27 - 28 - 29 - kiss_fftr_cfg kiss_fftr_alloc(int nfft, int inverse_fft, void* mem, size_t* lenmem); 30 - /* 31 - nfft must be even 32 - 33 - If you don't care to allocate space, use mem = lenmem = NULL 34 - */ 35 - 36 - 37 - void kiss_fftr(kiss_fftr_cfg cfg, const kiss_fft_scalar* timedata, kiss_fft_cpx* freqdata); 38 - /* 39 - input timedata has nfft scalar points 40 - output freqdata has nfft/2+1 complex points 41 - */ 42 - 43 - void kiss_fftri(kiss_fftr_cfg cfg, const kiss_fft_cpx* freqdata, kiss_fft_scalar* timedata); 44 - /* 45 - input freqdata has nfft/2+1 complex points 46 - output timedata has nfft scalar points 47 - */ 48 - 49 - #define kiss_fftr_free KISS_FFT_FREE 50 - 51 - #ifdef __cplusplus 52 - } 53 - #endif 54 - #endif 1 + /* 2 + * Copyright (c) 2003-2004, Mark Borgerding. All rights reserved. 3 + * This file is part of KISS FFT - https://github.com/mborgerding/kissfft 4 + * 5 + * SPDX-License-Identifier: BSD-3-Clause 6 + * See COPYING file for more information. 7 + */ 8 + 9 + #ifndef KISS_FTR_H 10 + #define KISS_FTR_H 11 + 12 + #include "kiss_fft.h" 13 + #ifdef __cplusplus 14 + extern "C" { 15 + #endif 16 + 17 + 18 + /* 19 + 20 + Real optimized version can save about 45% cpu time vs. complex fft of a real seq. 21 + 22 + 23 + 24 + */ 25 + 26 + typedef struct kiss_fftr_state *kiss_fftr_cfg; 27 + 28 + 29 + kiss_fftr_cfg KISS_FFT_API kiss_fftr_alloc(int nfft,int inverse_fft,void * mem, size_t * lenmem); 30 + /* 31 + nfft must be even 32 + 33 + If you don't care to allocate space, use mem = lenmem = NULL 34 + */ 35 + 36 + 37 + void KISS_FFT_API kiss_fftr(kiss_fftr_cfg cfg,const kiss_fft_scalar *timedata,kiss_fft_cpx *freqdata); 38 + /* 39 + input timedata has nfft scalar points 40 + output freqdata has nfft/2+1 complex points 41 + */ 42 + 43 + void KISS_FFT_API kiss_fftri(kiss_fftr_cfg cfg,const kiss_fft_cpx *freqdata,kiss_fft_scalar *timedata); 44 + /* 45 + input freqdata has nfft/2+1 complex points 46 + output timedata has nfft scalar points 47 + */ 48 + 49 + #define kiss_fftr_free KISS_FFT_FREE 50 + 51 + #ifdef __cplusplus 52 + } 53 + #endif 54 + #endif