fix type-system, use overload for tranform() and reorder butterfly-fn

+112 -123

1 changed file

expand all

kissfft.hh

+112 -123

kissfft.hh

··· 5 5 #include <vector> 6 6 7 7 8 - template <typename T_Scalar, 9 - typename T_Complex=std::complex<T_Scalar> 10 - > 8 + template <typename scalar_t> 11 9 class kissfft 12 10 { 13 11 public: 14 - typedef T_Scalar scalar_type; 15 - typedef T_Complex cpx_type; 12 + 13 + using cpx_t = std::complex<scalar_t>; 16 14 17 15 kissfft( std::size_t nfft, 18 16 bool inverse ) ··· 21 19 { 22 20 // fill twiddle factors 23 21 _twiddles.resize(_nfft); 24 - const scalar_type phinc = (_inverse?2:-2)* acos( (scalar_type) -1) / _nfft; 22 + const scalar_t phinc = (_inverse?2:-2)* acos( (scalar_t) -1) / _nfft; 25 23 for (std::size_t i=0;i<_nfft;++i) 26 - _twiddles[i] = exp( cpx_type(0,i*phinc) ); 24 + _twiddles[i] = exp( cpx_t(0,i*phinc) ); 27 25 28 26 //factorize 29 27 //start factoring out 4's, then 2's, then 3,5,7,9,... ··· 43 41 _stageRadix.push_back(p); 44 42 _stageRemainder.push_back(n); 45 43 }while(n>1); 46 - } 44 + }; 47 45 48 46 49 47 /// Changes the FFT-length and/or the transform direction. ··· 63 61 else if ( inverse != _inverse ) 64 62 { 65 63 // conjugate the twiddle factors. 66 - for ( typename std::vector<cpx_type>::iterator it = _twiddles.begin(); 64 + for ( typename std::vector<cpx_t>::iterator it = _twiddles.begin(); 67 65 it != _twiddles.end(); ++it ) 68 66 it->imag( -it->imag() ); 69 67 } 70 - } 68 + }; 71 69 72 70 /// Calculates the complex Discrete Fourier Transform. 73 71 /// ··· 81 79 /// constructor. Hence when applying the same transform twice, but with 82 80 /// the inverse flag changed the second time, then the result will 83 81 /// be equal to the original input times @c N. 84 - void transform( const cpx_type * src, 85 - cpx_type * dst ) const 82 + void transform(const cpx_t * fft_in, cpx_t * fft_out, std::size_t stage = 0, std::size_t fstride = 1, std::size_t in_stride = 1) const 86 83 { 87 - kf_work(0, dst, src, 1,1); 88 - } 84 + const std::size_t p = _stageRadix[stage]; 85 + const std::size_t m = _stageRemainder[stage]; 86 + cpx_t * const Fout_beg = fft_out; 87 + cpx_t * const Fout_end = fft_out + p*m; 88 + 89 + if (m==1) { 90 + do{ 91 + *fft_out = *fft_in; 92 + fft_in += fstride*in_stride; 93 + }while(++fft_out != Fout_end ); 94 + }else{ 95 + do{ 96 + // recursive call: 97 + // DFT of size m*p performed by doing 98 + // p instances of smaller DFTs of size m, 99 + // each one takes a decimated version of the input 100 + transform(fft_in, fft_out, stage+1, fstride*p,in_stride); 101 + fft_in += fstride*in_stride; 102 + }while( (fft_out += m) != Fout_end ); 103 + } 104 + 105 + fft_out=Fout_beg; 106 + 107 + // recombine the p smaller DFTs 108 + switch (p) { 109 + case 2: kf_bfly2(fft_out,fstride,m); break; 110 + case 3: kf_bfly3(fft_out,fstride,m); break; 111 + case 4: kf_bfly4(fft_out,fstride,m); break; 112 + case 5: kf_bfly5(fft_out,fstride,m); break; 113 + default: kf_bfly_generic(fft_out,fstride,m,p); break; 114 + } 115 + }; 89 116 90 117 /// Calculates the Discrete Fourier Transform (DFT) of a real input 91 118 /// of size @c 2*N. ··· 101 128 /// @endcode 102 129 /// The same scaling factors as in @c transform() apply. 103 130 /// 104 - /// @note For this to work, the types @c scalar_type and @c cpx_type 131 + /// @note For this to work, the types @c scalar_t and @c cpx_t 105 132 /// must fulfill the following requirements: 106 133 /// 107 - /// For any object @c z of type @c cpx_type, 108 - /// @c reinterpret_cast<scalar_type(&)[2]>(z)[0] is the real part of @c z and 109 - /// @c reinterpret_cast<scalar_type(&)[2]>(z)[1] is the imaginary part of @c z. 110 - /// For any pointer to an element of an array of @c cpx_type named @c p 134 + /// For any object @c z of type @c cpx_t, 135 + /// @c reinterpret_cast<scalar_t(&)[2]>(z)[0] is the real part of @c z and 136 + /// @c reinterpret_cast<scalar_t(&)[2]>(z)[1] is the imaginary part of @c z. 137 + /// For any pointer to an element of an array of @c cpx_t named @c p 111 138 /// and any valid array index @c i, @c reinterpret_cast<T*>(p)[2*i] 112 139 /// is the real part of the complex number @c p[i], and 113 140 /// @c reinterpret_cast<T*>(p)[2*i+1] is the imaginary part of the 114 141 /// complex number @c p[i]. 115 142 /// 116 143 /// Since C++11, these requirements are guaranteed to be satisfied for 117 - /// @c scalar_types being @c float, @c double or @c long @c double 118 - /// together with @c cpx_type being @c std::complex<scalar_type>. 119 - void transform_real( const scalar_type * src, 120 - cpx_type * dst ) const 144 + /// @c scalar_ts being @c float, @c double or @c long @c double 145 + /// together with @c cpx_t being @c std::complex<scalar_t>. 146 + void transform_real( const scalar_t * src, 147 + cpx_t * dst ) const 121 148 { 122 149 const std::size_t N = _nfft; 123 150 if ( N == 0 ) 124 151 return; 125 152 126 153 // perform complex FFT 127 - transform( reinterpret_cast<const cpx_type*>(src), dst ); 154 + transform( reinterpret_cast<const cpx_t*>(src), dst ); 128 155 129 156 // post processing for k = 0 and k = N 130 - dst[0] = cpx_type( dst[0].real() + dst[0].imag(), 157 + dst[0] = cpx_t( dst[0].real() + dst[0].imag(), 131 158 dst[0].real() - dst[0].imag() ); 132 159 133 160 // post processing for all the other k = 1, 2, ..., N-1 134 - const scalar_type pi = acos( (scalar_type) -1); 135 - const scalar_type half_phi_inc = ( _inverse ? pi : -pi ) / N; 136 - const cpx_type twiddle_mul = exp( cpx_type(0, half_phi_inc) ); 161 + const scalar_t pi = acos( (scalar_t) -1); 162 + const scalar_t half_phi_inc = ( _inverse ? pi : -pi ) / N; 163 + const cpx_t twiddle_mul = exp( cpx_t(0, half_phi_inc) ); 137 164 for ( std::size_t k = 1; 2*k < N; ++k ) 138 165 { 139 - const cpx_type w = (scalar_type)0.5 * cpx_type( 166 + const cpx_t w = (scalar_t)0.5 * cpx_t( 140 167 dst[k].real() + dst[N-k].real(), 141 168 dst[k].imag() - dst[N-k].imag() ); 142 - const cpx_type z = (scalar_type)0.5 * cpx_type( 169 + const cpx_t z = (scalar_t)0.5 * cpx_t( 143 170 dst[k].imag() + dst[N-k].imag(), 144 171 -dst[k].real() + dst[N-k].real() ); 145 - const cpx_type twiddle = 172 + const cpx_t twiddle = 146 173 k % 2 == 0 ? 147 174 _twiddles[k/2] : 148 175 _twiddles[k/2] * twiddle_mul; ··· 151 178 } 152 179 if ( N % 2 == 0 ) 153 180 dst[N/2] = conj( dst[N/2] ); 154 - } 181 + }; 155 182 156 183 private: 157 - void kf_work( std::size_t stage, 158 - cpx_type * Fout, 159 - const cpx_type * f, 160 - std::size_t fstride, 161 - std::size_t in_stride) const 162 - { 163 - const std::size_t p = _stageRadix[stage]; 164 - const std::size_t m = _stageRemainder[stage]; 165 - cpx_type * const Fout_beg = Fout; 166 - cpx_type * const Fout_end = Fout + p*m; 167 184 168 - if (m==1) { 169 - do{ 170 - *Fout = *f; 171 - f += fstride*in_stride; 172 - }while(++Fout != Fout_end ); 173 - }else{ 174 - do{ 175 - // recursive call: 176 - // DFT of size m*p performed by doing 177 - // p instances of smaller DFTs of size m, 178 - // each one takes a decimated version of the input 179 - kf_work(stage+1, Fout , f, fstride*p,in_stride); 180 - f += fstride*in_stride; 181 - }while( (Fout += m) != Fout_end ); 182 - } 183 - 184 - Fout=Fout_beg; 185 - 186 - // recombine the p smaller DFTs 187 - switch (p) { 188 - case 2: kf_bfly2(Fout,fstride,m); break; 189 - case 3: kf_bfly3(Fout,fstride,m); break; 190 - case 4: kf_bfly4(Fout,fstride,m); break; 191 - case 5: kf_bfly5(Fout,fstride,m); break; 192 - default: kf_bfly_generic(Fout,fstride,m,p); break; 193 - } 194 - } 195 - 196 - void kf_bfly2( cpx_type * Fout, const size_t fstride, std::size_t m) const 185 + void kf_bfly2( cpx_t * Fout, const size_t fstride, std::size_t m) const 197 186 { 198 187 for (std::size_t k=0;k<m;++k) { 199 - const cpx_type t = Fout[m+k] * _twiddles[k*fstride]; 188 + const cpx_t t = Fout[m+k] * _twiddles[k*fstride]; 200 189 Fout[m+k] = Fout[k] - t; 201 190 Fout[k] += t; 202 191 } 203 - } 192 + }; 204 193 205 - void kf_bfly4( cpx_type * Fout, const std::size_t fstride, const std::size_t m) const 206 - { 207 - cpx_type scratch[7]; 208 - const scalar_type negative_if_inverse = _inverse ? -1 : +1; 209 - for (std::size_t k=0;k<m;++k) { 210 - scratch[0] = Fout[k+ m] * _twiddles[k*fstride ]; 211 - scratch[1] = Fout[k+2*m] * _twiddles[k*fstride*2]; 212 - scratch[2] = Fout[k+3*m] * _twiddles[k*fstride*3]; 213 - scratch[5] = Fout[k] - scratch[1]; 214 - 215 - Fout[k] += scratch[1]; 216 - scratch[3] = scratch[0] + scratch[2]; 217 - scratch[4] = scratch[0] - scratch[2]; 218 - scratch[4] = cpx_type( scratch[4].imag()*negative_if_inverse , 219 - -scratch[4].real()*negative_if_inverse ); 220 - 221 - Fout[k+2*m] = Fout[k] - scratch[3]; 222 - Fout[k ]+= scratch[3]; 223 - Fout[k+ m] = scratch[5] + scratch[4]; 224 - Fout[k+3*m] = scratch[5] - scratch[4]; 225 - } 226 - } 227 - 228 - void kf_bfly3( cpx_type * Fout, const std::size_t fstride, const std::size_t m) const 194 + void kf_bfly3( cpx_t * Fout, const std::size_t fstride, const std::size_t m) const 229 195 { 230 196 std::size_t k=m; 231 197 const std::size_t m2 = 2*m; 232 - const cpx_type *tw1,*tw2; 233 - cpx_type scratch[5]; 234 - const cpx_type epi3 = _twiddles[fstride*m]; 198 + const cpx_t *tw1,*tw2; 199 + cpx_t scratch[5]; 200 + const cpx_t epi3 = _twiddles[fstride*m]; 235 201 236 202 tw1=tw2=&_twiddles[0]; 237 203 ··· 244 210 tw1 += fstride; 245 211 tw2 += fstride*2; 246 212 247 - Fout[m] = Fout[0] - scratch[3]*scalar_type(0.5); 213 + Fout[m] = Fout[0] - scratch[3]*scalar_t(0.5); 248 214 scratch[0] *= epi3.imag(); 249 215 250 216 Fout[0] += scratch[3]; 251 217 252 - Fout[m2] = cpx_type( Fout[m].real() + scratch[0].imag() , Fout[m].imag() - scratch[0].real() ); 218 + Fout[m2] = cpx_t( Fout[m].real() + scratch[0].imag() , Fout[m].imag() - scratch[0].real() ); 253 219 254 - Fout[m] += cpx_type( -scratch[0].imag(),scratch[0].real() ); 220 + Fout[m] += cpx_t( -scratch[0].imag(),scratch[0].real() ); 255 221 ++Fout; 256 222 }while(--k); 257 - } 223 + }; 224 + 225 + void kf_bfly4( cpx_t * Fout, const std::size_t fstride, const std::size_t m) const 226 + { 227 + cpx_t scratch[7]; 228 + const scalar_t negative_if_inverse = _inverse ? -1 : +1; 229 + for (std::size_t k=0;k<m;++k) { 230 + scratch[0] = Fout[k+ m] * _twiddles[k*fstride ]; 231 + scratch[1] = Fout[k+2*m] * _twiddles[k*fstride*2]; 232 + scratch[2] = Fout[k+3*m] * _twiddles[k*fstride*3]; 233 + scratch[5] = Fout[k] - scratch[1]; 234 + 235 + Fout[k] += scratch[1]; 236 + scratch[3] = scratch[0] + scratch[2]; 237 + scratch[4] = scratch[0] - scratch[2]; 238 + scratch[4] = cpx_t( scratch[4].imag()*negative_if_inverse , 239 + -scratch[4].real()*negative_if_inverse ); 240 + 241 + Fout[k+2*m] = Fout[k] - scratch[3]; 242 + Fout[k ]+= scratch[3]; 243 + Fout[k+ m] = scratch[5] + scratch[4]; 244 + Fout[k+3*m] = scratch[5] - scratch[4]; 245 + } 246 + }; 258 247 259 - void kf_bfly5( cpx_type * Fout, const std::size_t fstride, const std::size_t m) const 248 + void kf_bfly5( cpx_t * Fout, const std::size_t fstride, const std::size_t m) const 260 249 { 261 - cpx_type *Fout0,*Fout1,*Fout2,*Fout3,*Fout4; 262 - cpx_type scratch[13]; 263 - const cpx_type ya = _twiddles[fstride*m]; 264 - const cpx_type yb = _twiddles[fstride*2*m]; 250 + cpx_t *Fout0,*Fout1,*Fout2,*Fout3,*Fout4; 251 + cpx_t scratch[13]; 252 + const cpx_t ya = _twiddles[fstride*m]; 253 + const cpx_t yb = _twiddles[fstride*2*m]; 265 254 266 255 Fout0=Fout; 267 256 Fout1=Fout0+m; ··· 285 274 *Fout0 += scratch[7]; 286 275 *Fout0 += scratch[8]; 287 276 288 - scratch[5] = scratch[0] + cpx_type( 277 + scratch[5] = scratch[0] + cpx_t( 289 278 scratch[7].real()*ya.real() + scratch[8].real()*yb.real(), 290 279 scratch[7].imag()*ya.real() + scratch[8].imag()*yb.real() 291 280 ); 292 281 293 - scratch[6] = cpx_type( 282 + scratch[6] = cpx_t( 294 283 scratch[10].imag()*ya.imag() + scratch[9].imag()*yb.imag(), 295 284 -scratch[10].real()*ya.imag() - scratch[9].real()*yb.imag() 296 285 ); ··· 299 288 *Fout4 = scratch[5] + scratch[6]; 300 289 301 290 scratch[11] = scratch[0] + 302 - cpx_type( 291 + cpx_t( 303 292 scratch[7].real()*yb.real() + scratch[8].real()*ya.real(), 304 293 scratch[7].imag()*yb.real() + scratch[8].imag()*ya.real() 305 294 ); 306 295 307 - scratch[12] = cpx_type( 296 + scratch[12] = cpx_t( 308 297 -scratch[10].imag()*yb.imag() + scratch[9].imag()*ya.imag(), 309 298 scratch[10].real()*yb.imag() - scratch[9].real()*ya.imag() 310 299 ); ··· 318 307 ++Fout3; 319 308 ++Fout4; 320 309 } 321 - } 310 + }; 322 311 323 312 /* perform the butterfly for one stage of a mixed radix FFT */ 324 313 void kf_bfly_generic( 325 - cpx_type * Fout, 314 + cpx_t * Fout, 326 315 const size_t fstride, 327 316 std::size_t m, 328 317 std::size_t p 329 318 ) const 330 319 { 331 - const cpx_type * twiddles = &_twiddles[0]; 332 - cpx_type scratchbuf[p]; 320 + const cpx_t * twiddles = &_twiddles[0]; 321 + cpx_t scratchbuf[p]; 333 322 334 323 for ( std::size_t u=0; u<m; ++u ) { 335 324 std::size_t k = u; ··· 351 340 k += m; 352 341 } 353 342 } 354 - } 343 + }; 355 344 356 - std::size_t _nfft; 357 - bool _inverse; 358 - std::vector<cpx_type> _twiddles; 359 - std::vector<std::size_t> _stageRadix; 360 - std::vector<std::size_t> _stageRemainder; 345 + std::size_t _nfft; 346 + bool _inverse; 347 + std::vector<cpx_t> _twiddles; 348 + std::vector<std::size_t> _stageRadix; 349 + std::vector<std::size_t> _stageRemainder; 361 350 }; 362 351 #endif

Configure Feed

Configure Feed