integer cpp-version · alice.mosphere.at/kissfft@5b13552

+304

1 changed file

expand all

+304

kissfft_i32.hh

··· 1 + #ifndef KISSFFT_I32_CLASS_HH 2 + #define KISSFFT_I32_CLASS_HH 3 + 4 + #include <complex> 5 + #include <utility> 6 + #include <vector> 7 + 8 + // TODO1: substitute complex<type> (behaviour not defined for nonfloats), should be faster 9 + // TODO2: use std:: namespace 10 + // TODO3: make unittests for all ffts (c, cpp, i32) 11 + 12 + template <typename DType> 13 + struct complex_s 14 + { 15 + DType real; 16 + DType imag; 17 + }; 18 + 19 + class kissfft_i32 20 + { 21 + private: 22 + 23 + using scalar_type = int32_t; 24 + using cpx_type = complex<int32_t>; 25 + 26 + scalar_type _scale_factor; 27 + std::size_t _nfft; 28 + bool _inverse; 29 + std::vector<cpx_type> _twiddles; 30 + std::vector<std::size_t> _stageRadix; 31 + std::vector<std::size_t> _stageRemainder; 32 + 33 + public: 34 + 35 + // scale_factor: upscale twiddle-factors otherwise they lie between 0..1 (out of range for integer) --> fixed point math 36 + kissfft_i32(const std::size_t nfft, const bool inverse, const double scale_factor = 1024.0) 37 + : _scale_factor(scalar_type(scale_factor)), _nfft(nfft), _inverse(inverse) 38 + { 39 + // fill twiddle factors 40 + _twiddles.resize(_nfft); 41 + const double phinc = (_inverse ? 2 : -2) * acos(-1.0) / _nfft; 42 + for (std::size_t i = 0; i < _nfft; ++i) 43 + { 44 + _twiddles[i] = scale_factor * exp(complex<double>(0, i * phinc)); 45 + } 46 + //factorize 47 + //start factoring out 4's, then 2's, then 3,5,7,9,... 48 + std::size_t n = _nfft; 49 + std::size_t p = 4; 50 + do 51 + { 52 + while (n % p) 53 + { 54 + switch (p) 55 + { 56 + case 4: 57 + p = 2; 58 + break; 59 + case 2: 60 + p = 3; 61 + break; 62 + default: 63 + p += 2; 64 + break; 65 + } 66 + if (p * p > n) p = n;// no more factors 67 + } 68 + n /= p; 69 + _stageRadix.push_back(p); 70 + _stageRemainder.push_back(n); 71 + } while (n > 1); 72 + } 73 + 74 + /// Calculates the complex Discrete Fourier Transform. 75 + /// 76 + /// The size of the passed arrays must be passed in the constructor. 77 + /// The sum of the squares of the absolute values in the @c dst 78 + /// array will be @c N times the sum of the squares of the absolute 79 + /// values in the @c src array, where @c N is the size of the array. 80 + /// In other words, the l_2 norm of the resulting array will be 81 + /// @c sqrt(N) times as big as the l_2 norm of the input array. 82 + /// This is also the case when the inverse flag is set in the 83 + /// constructor. Hence when applying the same transform twice, but with 84 + /// the inverse flag changed the second time, then the result will 85 + /// be equal to the original input times @c N. 86 + void transform(const cpx_type * FSrc, 87 + cpx_type * FDst, 88 + const std::size_t stage = 0, 89 + const std::size_t fstride = 1, 90 + const std::size_t in_stride = 1) const 91 + { 92 + const std::size_t p = _stageRadix[stage]; 93 + const std::size_t m = _stageRemainder[stage]; 94 + cpx_type *const Fout_beg = FDst; 95 + cpx_type *const Fout_end = FDst + p * m; 96 + 97 + if (m == 1) 98 + { 99 + do 100 + { 101 + *FDst = *FSrc; 102 + FSrc += fstride * in_stride; 103 + } while (++FDst != Fout_end); 104 + } 105 + else 106 + { 107 + do 108 + { 109 + // recursive call: 110 + // DFT of size m*p performed by doing 111 + // p instances of smaller DFTs of size m, 112 + // each one takes a decimated version of the input 113 + transform(FSrc, FDst, stage + 1, fstride * p, in_stride); 114 + FSrc += fstride * in_stride; 115 + } while ((FDst += m) != Fout_end); 116 + } 117 + 118 + FDst = Fout_beg; 119 + 120 + // recombine the p smaller DFTs 121 + switch (p) 122 + { 123 + case 2: 124 + kf_bfly2(FDst, fstride, m); 125 + break; 126 + case 3: 127 + kf_bfly3(FDst, fstride, m); 128 + break; 129 + case 4: 130 + kf_bfly4(FDst, fstride, m); 131 + break; 132 + case 5: 133 + kf_bfly5(FDst, fstride, m); 134 + break; 135 + default: 136 + kf_bfly_generic(FDst, fstride, m, p); 137 + break; 138 + } 139 + } 140 + 141 + private: 142 + 143 + void kf_bfly2(cpx_type *const Fout, const size_t fstride, const std::size_t m) const 144 + { 145 + for (std::size_t k = 0; k < m; ++k) 146 + { 147 + const cpx_type t = (Fout[m + k] * _twiddles[k * fstride]) / _scale_factor; 148 + Fout[m + k] = Fout[k] - t; 149 + Fout[k] += t; 150 + } 151 + } 152 + 153 + void kf_bfly3(cpx_type *Fout, const std::size_t fstride, const std::size_t m) const 154 + { 155 + std::size_t k = m; 156 + const std::size_t m2 = 2 * m; 157 + const cpx_type *tw1, *tw2; 158 + cpx_type scratch[5]; 159 + const cpx_type epi3 = _twiddles[fstride * m]; 160 + 161 + tw1 = tw2 = &_twiddles[0]; 162 + 163 + do 164 + { 165 + scratch[1] = (Fout[m] * *tw1) / _scale_factor; 166 + scratch[2] = (Fout[m2] * *tw2) / _scale_factor; 167 + 168 + scratch[3] = scratch[1] + scratch[2]; 169 + scratch[0] = scratch[1] - scratch[2]; 170 + tw1 += fstride; 171 + tw2 += fstride * 2; 172 + 173 + Fout[m] = Fout[0] - (scratch[3] / 2); 174 + scratch[0] *= epi3.imag(); 175 + scratch[0] /= _scale_factor; 176 + 177 + Fout[0] += scratch[3]; 178 + 179 + Fout[m2] = cpx_type(Fout[m].real() + scratch[0].imag(), Fout[m].imag() - scratch[0].real()); 180 + 181 + Fout[m] += cpx_type(-scratch[0].imag(), scratch[0].real()); 182 + ++Fout; 183 + } while (--k); 184 + } 185 + 186 + void kf_bfly4(cpx_type *const Fout, const std::size_t fstride, const std::size_t m) const 187 + { 188 + cpx_type scratch[7]; 189 + const scalar_type negative_if_inverse = _inverse ? -1 : +1; 190 + 191 + for (std::size_t k = 0; k < m; ++k) 192 + { 193 + scratch[0] = (Fout[k + m] * _twiddles[k * fstride]) / _scale_factor; 194 + scratch[1] = (Fout[k + 2 * m] * _twiddles[k * fstride * 2]) / _scale_factor; 195 + scratch[2] = (Fout[k + 3 * m] * _twiddles[k * fstride * 3]) / _scale_factor; 196 + scratch[5] = Fout[k] - scratch[1]; 197 + 198 + Fout[k] += scratch[1]; 199 + scratch[3] = scratch[0] + scratch[2]; 200 + scratch[4] = scratch[0] - scratch[2]; 201 + scratch[4] = cpx_type(scratch[4].imag() * negative_if_inverse, 202 + -scratch[4].real() * negative_if_inverse); 203 + 204 + Fout[k + 2 * m] = Fout[k] - scratch[3]; 205 + Fout[k] += scratch[3]; 206 + Fout[k + m] = scratch[5] + scratch[4]; 207 + Fout[k + 3 * m] = scratch[5] - scratch[4]; 208 + } 209 + } 210 + 211 + void kf_bfly5(cpx_type *const Fout, const std::size_t fstride, const std::size_t m) const 212 + { 213 + cpx_type *Fout0, *Fout1, *Fout2, *Fout3, *Fout4; 214 + cpx_type scratch[13]; 215 + const cpx_type ya = _twiddles[fstride * m]; 216 + const cpx_type yb = _twiddles[fstride * 2 * m]; 217 + 218 + Fout0 = Fout; 219 + Fout1 = Fout0 + m; 220 + Fout2 = Fout0 + 2 * m; 221 + Fout3 = Fout0 + 3 * m; 222 + Fout4 = Fout0 + 4 * m; 223 + 224 + for (std::size_t u = 0; u < m; ++u) 225 + { 226 + scratch[0] = *Fout0; 227 + 228 + scratch[1] = (*Fout1 * _twiddles[u * fstride]) / _scale_factor; 229 + scratch[2] = (*Fout2 * _twiddles[2 * u * fstride]) / _scale_factor; 230 + scratch[3] = (*Fout3 * _twiddles[3 * u * fstride]) / _scale_factor; 231 + scratch[4] = (*Fout4 * _twiddles[4 * u * fstride]) / _scale_factor; 232 + 233 + scratch[7] = scratch[1] + scratch[4]; 234 + scratch[10] = scratch[1] - scratch[4]; 235 + scratch[8] = scratch[2] + scratch[3]; 236 + scratch[9] = scratch[2] - scratch[3]; 237 + 238 + *Fout0 += scratch[7]; 239 + *Fout0 += scratch[8]; 240 + 241 + scratch[5] = scratch[0] + (cpx_type( 242 + scratch[7].real() * ya.real() + scratch[8].real() * yb.real(), 243 + scratch[7].imag() * ya.real() + scratch[8].imag() * yb.real() ) / _scale_factor); 244 + 245 + scratch[6] = cpx_type( 246 + scratch[10].imag() * ya.imag() + scratch[9].imag() * yb.imag(), 247 + -scratch[10].real() * ya.imag() - scratch[9].real() * yb.imag() ) / _scale_factor; 248 + 249 + *Fout1 = scratch[5] - scratch[6]; 250 + *Fout4 = scratch[5] + scratch[6]; 251 + 252 + scratch[11] = scratch[0] + (cpx_type( 253 + scratch[7].real() * yb.real() + scratch[8].real() * ya.real(), 254 + scratch[7].imag() * yb.real() + scratch[8].imag() * ya.real() ) / _scale_factor); 255 + 256 + scratch[12] = cpx_type( 257 + -scratch[10].imag() * yb.imag() + scratch[9].imag() * ya.imag(), 258 + scratch[10].real() * yb.imag() - scratch[9].real() * ya.imag() ) / _scale_factor; 259 + 260 + *Fout2 = scratch[11] + scratch[12]; 261 + *Fout3 = scratch[11] - scratch[12]; 262 + 263 + ++Fout0; 264 + ++Fout1; 265 + ++Fout2; 266 + ++Fout3; 267 + ++Fout4; 268 + } 269 + } 270 + 271 + /* perform the butterfly for one stage of a mixed radix FFT */ 272 + void kf_bfly_generic(cpx_type * const Fout, const size_t fstride, const std::size_t m, const std::size_t p) const 273 + { 274 + const cpx_type *twiddles = &_twiddles[0]; 275 + cpx_type scratchbuf[p]; 276 + 277 + for (std::size_t u = 0; u < m; ++u) 278 + { 279 + std::size_t k = u; 280 + for (std::size_t q1 = 0; q1 < p; ++q1) 281 + { 282 + scratchbuf[q1] = Fout[k]; 283 + k += m; 284 + } 285 + 286 + k = u; 287 + for (std::size_t q1 = 0; q1 < p; ++q1) 288 + { 289 + std::size_t twidx = 0; 290 + Fout[k] = scratchbuf[0]; 291 + for (std::size_t q = 1; q < p; ++q) 292 + { 293 + twidx += fstride * k; 294 + if (twidx >= _nfft) 295 + twidx -= _nfft; 296 + Fout[k] += (scratchbuf[q] * twiddles[twidx]) / _scale_factor; 297 + } 298 + k += m; 299 + } 300 + } 301 + } 302 + }; 303 + 304 + #endif

Configure Feed

Configure Feed