Merge pull request #3 from ralphtandetzky/master · alice.mosphere.at/kissfft@e797a0e

+218 -156

1 changed file

expand all

kissfft.hh

+218 -156

kissfft.hh

··· 1 1 #ifndef KISSFFT_CLASS_HH 2 2 #define KISSFFT_CLASS_HH 3 3 #include <complex> 4 + #include <utility> 4 5 #include <vector> 5 6 6 - namespace kissfft_utils { 7 7 8 - template <typename T_scalar> 9 - struct traits 8 + template <typename T_Scalar, 9 + typename T_Complex=std::complex<T_Scalar> 10 + > 11 + class kissfft 10 12 { 11 - typedef T_scalar scalar_type; 12 - typedef std::complex<scalar_type> cpx_type; 13 - void fill_twiddles( std::complex<T_scalar> * dst ,int nfft,bool inverse) 14 - { 15 - T_scalar phinc = (inverse?2:-2)* acos( (T_scalar) -1) / nfft; 16 - for (int i=0;i<nfft;++i) 17 - dst[i] = exp( std::complex<T_scalar>(0,i*phinc) ); 18 - } 13 + public: 14 + typedef T_Scalar scalar_type; 15 + typedef T_Complex cpx_type; 19 16 20 - void prepare( 21 - std::vector< std::complex<T_scalar> > & dst, 22 - int nfft,bool inverse, 23 - std::vector<int> & stageRadix, 24 - std::vector<int> & stageRemainder ) 25 - { 26 - _twiddles.resize(nfft); 27 - fill_twiddles( &_twiddles[0],nfft,inverse); 28 - dst = _twiddles; 17 + kissfft( std::size_t nfft, 18 + bool inverse ) 19 + :_nfft(nfft) 20 + ,_inverse(inverse) 21 + { 22 + // fill twiddle factors 23 + _twiddles.resize(_nfft); 24 + const scalar_type phinc = (_inverse?2:-2)* acos( (scalar_type) -1) / _nfft; 25 + for (std::size_t i=0;i<_nfft;++i) 26 + _twiddles[i] = exp( cpx_type(0,i*phinc) ); 29 27 30 - //factorize 31 - //start factoring out 4's, then 2's, then 3,5,7,9,... 32 - int n= nfft; 33 - int p=4; 34 - do { 35 - while (n % p) { 36 - switch (p) { 37 - case 4: p = 2; break; 38 - case 2: p = 3; break; 39 - default: p += 2; break; 28 + //factorize 29 + //start factoring out 4's, then 2's, then 3,5,7,9,... 30 + std::size_t n= _nfft; 31 + std::size_t p=4; 32 + do { 33 + while (n % p) { 34 + switch (p) { 35 + case 4: p = 2; break; 36 + case 2: p = 3; break; 37 + default: p += 2; break; 38 + } 39 + if (p*p>n) 40 + p = n;// no more factors 40 41 } 41 - if (p*p>n) 42 - p=n;// no more factors 43 - } 44 - n /= p; 45 - stageRadix.push_back(p); 46 - stageRemainder.push_back(n); 47 - }while(n>1); 48 - } 49 - std::vector<cpx_type> _twiddles; 42 + n /= p; 43 + _stageRadix.push_back(p); 44 + _stageRemainder.push_back(n); 45 + }while(n>1); 46 + } 50 47 51 48 52 - const cpx_type twiddle(int i) { return _twiddles[i]; } 53 - }; 54 - 55 - } 56 - 57 - template <typename T_Scalar, 58 - typename T_traits=kissfft_utils::traits<T_Scalar> 59 - > 60 - class kissfft 61 - { 62 - public: 63 - typedef T_traits traits_type; 64 - typedef typename traits_type::scalar_type scalar_type; 65 - typedef typename traits_type::cpx_type cpx_type; 66 - 67 - kissfft(int nfft,bool inverse,const traits_type & traits=traits_type() ) 68 - :_nfft(nfft),_inverse(inverse),_traits(traits) 49 + /// Changes the FFT-length and/or the transform direction. 50 + /// 51 + /// @post The @c kissfft object will be in the same state as if it 52 + /// had been newly constructed with the passed arguments. 53 + /// However, the implementation may be faster than constructing a 54 + /// new fft object. 55 + void assign( std::size_t nfft, 56 + bool inverse ) 69 57 { 70 - _traits.prepare(_twiddles, _nfft,_inverse ,_stageRadix, _stageRemainder); 58 + if ( nfft != _nfft ) 59 + { 60 + kissfft tmp( nfft, inverse ); // O(n) time. 61 + std::swap( tmp, *this ); // this is O(1) in C++11, O(n) otherwise. 62 + } 63 + else if ( inverse != _inverse ) 64 + { 65 + // conjugate the twiddle factors. 66 + for ( typename std::vector<cpx_type>::iterator it = _twiddles.begin(); 67 + it != _twiddles.end(); ++it ) 68 + it->imag( -it->imag() ); 69 + } 71 70 } 72 71 73 - void transform(const cpx_type * src , cpx_type * dst) 72 + /// Calculates the complex Discrete Fourier Transform. 73 + /// 74 + /// The size of the passed arrays must be passed in the constructor. 75 + /// The sum of the squares of the absolute values in the @c dst 76 + /// array will be @c N times the sum of the squares of the absolute 77 + /// values in the @c src array, where @c N is the size of the array. 78 + /// In other words, the l_2 norm of the resulting array will be 79 + /// @c sqrt(N) times as big as the l_2 norm of the input array. 80 + /// This is also the case when the inverse flag is set in the 81 + /// constructor. Hence when applying the same transform twice, but with 82 + /// the inverse flag changed the second time, then the result will 83 + /// be equal to the original input times @c N. 84 + void transform( const cpx_type * src, 85 + cpx_type * dst ) const 74 86 { 75 87 kf_work(0, dst, src, 1,1); 76 88 } 77 89 90 + /// Calculates the Discrete Fourier Transform (DFT) of a real input 91 + /// of size @c 2*N. 92 + /// 93 + /// The 0-th and N-th value of the DFT are real numbers. These are 94 + /// stored in @c dst[0].real() and @c dst[1].imag() respectively. 95 + /// The remaining DFT values up to the index N-1 are stored in 96 + /// @c dst[1] to @c dst[N-1]. 97 + /// The other half of the DFT values can be calculated from the 98 + /// symmetry relation 99 + /// @code 100 + /// DFT(src)[2*N-k] == conj( DFT(src)[k] ); 101 + /// @endcode 102 + /// The same scaling factors as in @c transform() apply. 103 + /// 104 + /// @note For this to work, the types @c scalar_type and @c cpx_type 105 + /// must fulfill the following requirements: 106 + /// 107 + /// For any object @c z of type @c cpx_type, 108 + /// @c reinterpret_cast<scalar_type(&)[2]>(z)[0] is the real part of @c z and 109 + /// @c reinterpret_cast<scalar_type(&)[2]>(z)[1] is the imaginary part of @c z. 110 + /// For any pointer to an element of an array of @c cpx_type named @c p 111 + /// and any valid array index @c i, @c reinterpret_cast<T*>(p)[2*i] 112 + /// is the real part of the complex number @c p[i], and 113 + /// @c reinterpret_cast<T*>(p)[2*i+1] is the imaginary part of the 114 + /// complex number @c p[i]. 115 + /// 116 + /// Since C++11, these requirements are guaranteed to be satisfied for 117 + /// @c scalar_types being @c float, @c double or @c long @c double 118 + /// together with @c cpx_type being @c std::complex<scalar_type>. 119 + void transform_real( const scalar_type * src, 120 + cpx_type * dst ) const 121 + { 122 + const std::size_t N = _nfft; 123 + if ( N == 0 ) 124 + return; 125 + 126 + // perform complex FFT 127 + transform( reinterpret_cast<const cpx_type*>(src), dst ); 128 + 129 + // post processing for k = 0 and k = N 130 + dst[0] = cpx_type( dst[0].real() + dst[0].imag(), 131 + dst[0].real() - dst[0].imag() ); 132 + 133 + // post processing for all the other k = 1, 2, ..., N-1 134 + const scalar_type pi = acos( (scalar_type) -1); 135 + const scalar_type half_phi_inc = ( _inverse ? pi : -pi ) / N; 136 + const cpx_type twiddle_mul = exp( cpx_type(0, half_phi_inc) ); 137 + for ( std::size_t k = 1; 2*k < N; ++k ) 138 + { 139 + const cpx_type w = 0.5 * cpx_type( 140 + dst[k].real() + dst[N-k].real(), 141 + dst[k].imag() - dst[N-k].imag() ); 142 + const cpx_type z = 0.5 * cpx_type( 143 + dst[k].imag() + dst[N-k].imag(), 144 + -dst[k].real() + dst[N-k].real() ); 145 + const cpx_type twiddle = 146 + k % 2 == 0 ? 147 + _twiddles[k/2] : 148 + _twiddles[k/2] * twiddle_mul; 149 + dst[ k] = w + twiddle * z; 150 + dst[N-k] = conj( w - twiddle * z ); 151 + } 152 + if ( N % 2 == 0 ) 153 + dst[N/2] = conj( dst[N/2] ); 154 + } 155 + 78 156 private: 79 - void kf_work( int stage,cpx_type * Fout, const cpx_type * f, size_t fstride,size_t in_stride) 157 + void kf_work( std::size_t stage, 158 + cpx_type * Fout, 159 + const cpx_type * f, 160 + std::size_t fstride, 161 + std::size_t in_stride) const 80 162 { 81 - int p = _stageRadix[stage]; 82 - int m = _stageRemainder[stage]; 83 - cpx_type * Fout_beg = Fout; 84 - cpx_type * Fout_end = Fout + p*m; 163 + const std::size_t p = _stageRadix[stage]; 164 + const std::size_t m = _stageRemainder[stage]; 165 + cpx_type * const Fout_beg = Fout; 166 + cpx_type * const Fout_end = Fout + p*m; 85 167 86 168 if (m==1) { 87 169 do{ ··· 111 193 } 112 194 } 113 195 114 - // these were #define macros in the original kiss_fft 115 - void C_ADD( cpx_type & c,const cpx_type & a,const cpx_type & b) { c=a+b;} 116 - void C_MUL( cpx_type & c,const cpx_type & a,const cpx_type & b) { c=a*b;} 117 - void C_SUB( cpx_type & c,const cpx_type & a,const cpx_type & b) { c=a-b;} 118 - void C_ADDTO( cpx_type & c,const cpx_type & a) { c+=a;} 119 - void C_FIXDIV( cpx_type & ,int ) {} // NO-OP for float types 120 - scalar_type S_MUL( const scalar_type & a,const scalar_type & b) { return a*b;} 121 - scalar_type HALF_OF( const scalar_type & a) { return a*.5;} 122 - void C_MULBYSCALAR(cpx_type & c,const scalar_type & a) {c*=a;} 123 - 124 - void kf_bfly2( cpx_type * Fout, const size_t fstride, int m) 196 + void kf_bfly2( cpx_type * Fout, const size_t fstride, std::size_t m) const 125 197 { 126 - for (int k=0;k<m;++k) { 127 - cpx_type t = Fout[m+k] * _traits.twiddle(k*fstride); 198 + for (std::size_t k=0;k<m;++k) { 199 + const cpx_type t = Fout[m+k] * _twiddles[k*fstride]; 128 200 Fout[m+k] = Fout[k] - t; 129 201 Fout[k] += t; 130 202 } 131 203 } 132 204 133 - void kf_bfly4( cpx_type * Fout, const size_t fstride, const size_t m) 205 + void kf_bfly4( cpx_type * Fout, const std::size_t fstride, const std::size_t m) const 134 206 { 135 207 cpx_type scratch[7]; 136 - int negative_if_inverse = _inverse * -2 +1; 137 - for (size_t k=0;k<m;++k) { 138 - scratch[0] = Fout[k+m] * _traits.twiddle(k*fstride); 139 - scratch[1] = Fout[k+2*m] * _traits.twiddle(k*fstride*2); 140 - scratch[2] = Fout[k+3*m] * _traits.twiddle(k*fstride*3); 208 + const scalar_type negative_if_inverse = _inverse ? -1 : +1; 209 + for (std::size_t k=0;k<m;++k) { 210 + scratch[0] = Fout[k+ m] * _twiddles[k*fstride ]; 211 + scratch[1] = Fout[k+2*m] * _twiddles[k*fstride*2]; 212 + scratch[2] = Fout[k+3*m] * _twiddles[k*fstride*3]; 141 213 scratch[5] = Fout[k] - scratch[1]; 142 214 143 215 Fout[k] += scratch[1]; 144 216 scratch[3] = scratch[0] + scratch[2]; 145 217 scratch[4] = scratch[0] - scratch[2]; 146 - scratch[4] = cpx_type( scratch[4].imag()*negative_if_inverse , -scratch[4].real()* negative_if_inverse ); 218 + scratch[4] = cpx_type( scratch[4].imag()*negative_if_inverse , 219 + -scratch[4].real()*negative_if_inverse ); 147 220 148 221 Fout[k+2*m] = Fout[k] - scratch[3]; 149 - Fout[k] += scratch[3]; 150 - Fout[k+m] = scratch[5] + scratch[4]; 222 + Fout[k ]+= scratch[3]; 223 + Fout[k+ m] = scratch[5] + scratch[4]; 151 224 Fout[k+3*m] = scratch[5] - scratch[4]; 152 225 } 153 226 } 154 227 155 - void kf_bfly3( cpx_type * Fout, const size_t fstride, const size_t m) 228 + void kf_bfly3( cpx_type * Fout, const std::size_t fstride, const std::size_t m) const 156 229 { 157 - size_t k=m; 158 - const size_t m2 = 2*m; 159 - cpx_type *tw1,*tw2; 230 + std::size_t k=m; 231 + const std::size_t m2 = 2*m; 232 + const cpx_type *tw1,*tw2; 160 233 cpx_type scratch[5]; 161 - cpx_type epi3; 162 - epi3 = _twiddles[fstride*m]; 234 + const cpx_type epi3 = _twiddles[fstride*m]; 163 235 164 236 tw1=tw2=&_twiddles[0]; 165 237 166 238 do{ 167 - C_FIXDIV(*Fout,3); C_FIXDIV(Fout[m],3); C_FIXDIV(Fout[m2],3); 239 + scratch[1] = Fout[m] * *tw1; 240 + scratch[2] = Fout[m2] * *tw2; 168 241 169 - C_MUL(scratch[1],Fout[m] , *tw1); 170 - C_MUL(scratch[2],Fout[m2] , *tw2); 171 - 172 - C_ADD(scratch[3],scratch[1],scratch[2]); 173 - C_SUB(scratch[0],scratch[1],scratch[2]); 242 + scratch[3] = scratch[1] + scratch[2]; 243 + scratch[0] = scratch[1] - scratch[2]; 174 244 tw1 += fstride; 175 245 tw2 += fstride*2; 176 246 177 - Fout[m] = cpx_type( Fout->real() - HALF_OF(scratch[3].real() ) , Fout->imag() - HALF_OF(scratch[3].imag() ) ); 247 + Fout[m] = Fout[0] - scratch[3]*scalar_type(0.5); 248 + scratch[0] *= epi3.imag(); 178 249 179 - C_MULBYSCALAR( scratch[0] , epi3.imag() ); 180 - 181 - C_ADDTO(*Fout,scratch[3]); 250 + Fout[0] += scratch[3]; 182 251 183 252 Fout[m2] = cpx_type( Fout[m].real() + scratch[0].imag() , Fout[m].imag() - scratch[0].real() ); 184 253 185 - C_ADDTO( Fout[m] , cpx_type( -scratch[0].imag(),scratch[0].real() ) ); 254 + Fout[m] += cpx_type( -scratch[0].imag(),scratch[0].real() ); 186 255 ++Fout; 187 256 }while(--k); 188 257 } 189 258 190 - void kf_bfly5( cpx_type * Fout, const size_t fstride, const size_t m) 259 + void kf_bfly5( cpx_type * Fout, const std::size_t fstride, const std::size_t m) const 191 260 { 192 261 cpx_type *Fout0,*Fout1,*Fout2,*Fout3,*Fout4; 193 - size_t u; 194 262 cpx_type scratch[13]; 195 - cpx_type * twiddles = &_twiddles[0]; 196 - cpx_type *tw; 197 - cpx_type ya,yb; 198 - ya = twiddles[fstride*m]; 199 - yb = twiddles[fstride*2*m]; 263 + const cpx_type ya = _twiddles[fstride*m]; 264 + const cpx_type yb = _twiddles[fstride*2*m]; 200 265 201 266 Fout0=Fout; 202 267 Fout1=Fout0+m; ··· 204 269 Fout3=Fout0+3*m; 205 270 Fout4=Fout0+4*m; 206 271 207 - tw=twiddles; 208 - for ( u=0; u<m; ++u ) { 209 - C_FIXDIV( *Fout0,5); C_FIXDIV( *Fout1,5); C_FIXDIV( *Fout2,5); C_FIXDIV( *Fout3,5); C_FIXDIV( *Fout4,5); 272 + for ( std::size_t u=0; u<m; ++u ) { 210 273 scratch[0] = *Fout0; 211 274 212 - C_MUL(scratch[1] ,*Fout1, tw[u*fstride]); 213 - C_MUL(scratch[2] ,*Fout2, tw[2*u*fstride]); 214 - C_MUL(scratch[3] ,*Fout3, tw[3*u*fstride]); 215 - C_MUL(scratch[4] ,*Fout4, tw[4*u*fstride]); 275 + scratch[1] = *Fout1 * _twiddles[ u*fstride]; 276 + scratch[2] = *Fout2 * _twiddles[2*u*fstride]; 277 + scratch[3] = *Fout3 * _twiddles[3*u*fstride]; 278 + scratch[4] = *Fout4 * _twiddles[4*u*fstride]; 216 279 217 - C_ADD( scratch[7],scratch[1],scratch[4]); 218 - C_SUB( scratch[10],scratch[1],scratch[4]); 219 - C_ADD( scratch[8],scratch[2],scratch[3]); 220 - C_SUB( scratch[9],scratch[2],scratch[3]); 280 + scratch[7] = scratch[1] + scratch[4]; 281 + scratch[10]= scratch[1] - scratch[4]; 282 + scratch[8] = scratch[2] + scratch[3]; 283 + scratch[9] = scratch[2] - scratch[3]; 221 284 222 - C_ADDTO( *Fout0, scratch[7]); 223 - C_ADDTO( *Fout0, scratch[8]); 285 + *Fout0 += scratch[7]; 286 + *Fout0 += scratch[8]; 224 287 225 288 scratch[5] = scratch[0] + cpx_type( 226 - S_MUL(scratch[7].real(),ya.real() ) + S_MUL(scratch[8].real() ,yb.real() ), 227 - S_MUL(scratch[7].imag(),ya.real()) + S_MUL(scratch[8].imag(),yb.real()) 289 + scratch[7].real()*ya.real() + scratch[8].real()*yb.real(), 290 + scratch[7].imag()*ya.real() + scratch[8].imag()*yb.real() 228 291 ); 229 292 230 293 scratch[6] = cpx_type( 231 - S_MUL(scratch[10].imag(),ya.imag()) + S_MUL(scratch[9].imag(),yb.imag()), 232 - -S_MUL(scratch[10].real(),ya.imag()) - S_MUL(scratch[9].real(),yb.imag()) 294 + scratch[10].imag()*ya.imag() + scratch[9].imag()*yb.imag(), 295 + -scratch[10].real()*ya.imag() - scratch[9].real()*yb.imag() 233 296 ); 234 297 235 - C_SUB(*Fout1,scratch[5],scratch[6]); 236 - C_ADD(*Fout4,scratch[5],scratch[6]); 298 + *Fout1 = scratch[5] - scratch[6]; 299 + *Fout4 = scratch[5] + scratch[6]; 237 300 238 301 scratch[11] = scratch[0] + 239 302 cpx_type( 240 - S_MUL(scratch[7].real(),yb.real()) + S_MUL(scratch[8].real(),ya.real()), 241 - S_MUL(scratch[7].imag(),yb.real()) + S_MUL(scratch[8].imag(),ya.real()) 303 + scratch[7].real()*yb.real() + scratch[8].real()*ya.real(), 304 + scratch[7].imag()*yb.real() + scratch[8].imag()*ya.real() 242 305 ); 243 306 244 307 scratch[12] = cpx_type( 245 - -S_MUL(scratch[10].imag(),yb.imag()) + S_MUL(scratch[9].imag(),ya.imag()), 246 - S_MUL(scratch[10].real(),yb.imag()) - S_MUL(scratch[9].real(),ya.imag()) 308 + -scratch[10].imag()*yb.imag() + scratch[9].imag()*ya.imag(), 309 + scratch[10].real()*yb.imag() - scratch[9].real()*ya.imag() 247 310 ); 248 311 249 - C_ADD(*Fout2,scratch[11],scratch[12]); 250 - C_SUB(*Fout3,scratch[11],scratch[12]); 312 + *Fout2 = scratch[11] + scratch[12]; 313 + *Fout3 = scratch[11] - scratch[12]; 251 314 252 - ++Fout0;++Fout1;++Fout2;++Fout3;++Fout4; 315 + ++Fout0; 316 + ++Fout1; 317 + ++Fout2; 318 + ++Fout3; 319 + ++Fout4; 253 320 } 254 321 } 255 322 ··· 257 324 void kf_bfly_generic( 258 325 cpx_type * Fout, 259 326 const size_t fstride, 260 - int m, 261 - int p 262 - ) 327 + std::size_t m, 328 + std::size_t p 329 + ) const 263 330 { 264 - int u,k,q1,q; 265 - cpx_type * twiddles = &_twiddles[0]; 266 - cpx_type t; 267 - int Norig = _nfft; 331 + const cpx_type * twiddles = &_twiddles[0]; 268 332 cpx_type scratchbuf[p]; 269 333 270 - for ( u=0; u<m; ++u ) { 271 - k=u; 272 - for ( q1=0 ; q1<p ; ++q1 ) { 334 + for ( std::size_t u=0; u<m; ++u ) { 335 + std::size_t k = u; 336 + for ( std::size_t q1=0 ; q1<p ; ++q1 ) { 273 337 scratchbuf[q1] = Fout[ k ]; 274 - C_FIXDIV(scratchbuf[q1],p); 275 338 k += m; 276 339 } 277 340 278 341 k=u; 279 - for ( q1=0 ; q1<p ; ++q1 ) { 280 - int twidx=0; 342 + for ( std::size_t q1=0 ; q1<p ; ++q1 ) { 343 + std::size_t twidx=0; 281 344 Fout[ k ] = scratchbuf[0]; 282 - for (q=1;q<p;++q ) { 345 + for ( std::size_t q=1;q<p;++q ) { 283 346 twidx += fstride * k; 284 - if (twidx>=Norig) twidx-=Norig; 285 - C_MUL(t,scratchbuf[q] , twiddles[twidx] ); 286 - C_ADDTO( Fout[ k ] ,t); 347 + if (twidx>=_nfft) 348 + twidx-=_nfft; 349 + Fout[ k ] += scratchbuf[q] * twiddles[twidx]; 287 350 } 288 351 k += m; 289 352 } 290 353 } 291 354 } 292 355 293 - int _nfft; 356 + std::size_t _nfft; 294 357 bool _inverse; 295 358 std::vector<cpx_type> _twiddles; 296 - std::vector<int> _stageRadix; 297 - std::vector<int> _stageRemainder; 298 - traits_type _traits; 359 + std::vector<std::size_t> _stageRadix; 360 + std::vector<std::size_t> _stageRemainder; 299 361 }; 300 362 #endif

Configure Feed

Configure Feed