···4874872. Configurable transition frequency
4884883. Optional multi-resolution mode for maximum accuracy
489489490490+## FFT Performance Measurements (CRITICAL UPDATE)
491491+492492+### M1 Pro Benchmark Results:
493493+Actual measurements completely contradict initial estimates:
494494+```
495495+CQT FFT Benchmark on this CPU:
496496+================================
497497+ 4096-point FFT: 0.014 ms
498498+ 6144-point FFT: 0.021 ms (current implementation)
499499+ 8192-point FFT: 0.025 ms
500500+12288-point FFT: 0.047 ms
501501+16384-point FFT: 0.066 ms (!!)
502502+================================
503503+```
504504+505505+### Key Findings:
506506+- **75-150x faster than conservative estimates**
507507+- 16K FFT uses only 0.066ms (0.4% of 16.67ms frame budget at 60fps)
508508+- Even 32K FFT would likely be ~0.15ms (still under 1% frame budget)
509509+- Apple Silicon (or auto-vectorization) provides exceptional FFT performance
510510+511511+### Performance Comparison Results:
512512+- **M1 Pro**: 16K FFT = 0.066ms
513513+- **Intel i5-1130G7 (ThinkPad X1 Nano)**:
514514+ - Performance mode: 16K FFT = 0.112ms (0.67% of frame budget)
515515+ - Power save mode: 16K FFT = 0.335ms (2% of frame budget)
516516+- Even in power save mode, 16K FFT is completely viable!
517517+518518+### Implications:
519519+**16K FFT is now implemented** and provides:
520520+- 20Hz: Q≈7.4 (truncated from ideal 17, but much better than previous 1.86)
521521+- 30Hz: Q≈11.2 (near ideal)
522522+- 45Hz+: Full Q≈17 (ideal resolution - no truncation!)
523523+- Dramatically improved low-frequency resolution for electronic music
524524+525525+The profiling code has been added to measure actual performance on each platform.
526526+490527## Phase 3: Next Steps
491491-1. Implement hybrid windowing approach (immediate priority)
528528+1. ~~Implement 16K FFT based on benchmark results~~ (COMPLETE)
4925292. Add remaining API functions: `cqts()`, `cqto()`, `cqtos()`
4935303. Create separate audio buffer for CQT (restore FFT_SIZE to 1024)
4945314. Create FFT vs CQT comparison demo
495495-5. Performance optimization if needed
532532+5. Test and verify improved frequency resolution at 20Hz, 50Hz, 100Hz
496533497534### Test Script Example
498535```lua
+1-1
src/cqtdata.h
···44#define CQT_BINS 120
55#define CQT_OCTAVES 10
66#define CQT_BINS_PER_OCTAVE 12
77-#define CQT_FFT_SIZE 6144 // 6K FFT - balance between quality and performance
77+#define CQT_FFT_SIZE 16384 // 16K FFT - excellent low-frequency resolution with minimal performance impact
8899// CQT frequency range
1010#define CQT_MIN_FREQ 20.0f // Sub-bass for electronic music
+4-4
src/ext/cqt.c
···2323 printf("\nCQT FFT Benchmark on this CPU:\n");
2424 printf("================================\n");
25252626- int sizes[] = {4096, 6144, 8192, 12288, 16384};
2727- int numSizes = 5;
2626+ int sizes[] = {4096, 6144, 8192, 12288, 16384, 24576, 32768};
2727+ int numSizes = 7;
28282929 for (int s = 0; s < numSizes; s++)
3030 {
···242242 static double totalKernelTime = 0.0;
243243 static int profileCount = 0;
244244245245- // Perform 6144-point FFT with timing
245245+ // Perform 16384-point FFT with timing
246246 clock_t fftStart = clock();
247247 kiss_fftr(cqtFftCfg, cqtAudioBuffer, cqtFftOutput);
248248 clock_t fftEnd = clock();
···273273 // Print profiling info every 60 frames (~1 second)
274274 if (profileCount % 60 == 0)
275275 {
276276- printf("CQT Performance (6K FFT):\n");
276276+ printf("CQT Performance (16K FFT):\n");
277277 printf(" FFT avg: %.3fms\n", totalFftTime / profileCount);
278278 printf(" Kernels avg: %.3fms\n", totalKernelTime / profileCount);
279279 printf(" Total avg: %.3fms\n", (totalFftTime + totalKernelTime) / profileCount);
+12-21
src/ext/cqt_kernel.c
···8383 float Q = CQT_CalculateQ(CQT_BINS_PER_OCTAVE);
8484 int windowLength;
85858686- if (centerFreq < 100.0f) {
8787- // With 6K FFT, we can use higher Q for better resolution
8888- // 20Hz: Q=2.8 gives ~6144 samples (full FFT)
8989- // 50Hz: Q=7 gives ~6174 samples (slightly truncated)
9090- // 100Hz: Q=14 gives ~6174 samples (slightly truncated)
9191- float targetQ = Q; // Start with ideal Q
9292- windowLength = (int)(targetQ * sampleRate / centerFreq);
9393-9494- // If window doesn't fit, reduce Q to fit exactly
9595- if (windowLength > fftSize) {
9696- targetQ = (float)fftSize * centerFreq / sampleRate;
9797- windowLength = fftSize;
9898- }
9999- } else {
100100- // Constant-Q for higher frequencies
101101- windowLength = (int)(Q * sampleRate / centerFreq);
102102-103103- // Ensure it fits in FFT size with some margin
104104- if (windowLength > fftSize * 0.9) {
105105- windowLength = (int)(fftSize * 0.9);
106106- }
8686+ // With 16K FFT, we can use full constant-Q across the entire spectrum!
8787+ windowLength = (int)(Q * sampleRate / centerFreq);
8888+8989+ // At 20Hz: windowLength = 17 * 44100 / 20 = 37,485 samples
9090+ // 16K FFT can handle up to frequencies down to ~45 Hz without truncation
9191+ // For lower frequencies, we'll still get better Q than before
9292+9393+ // Ensure it fits in FFT size
9494+ if (windowLength > fftSize) {
9595+ windowLength = fftSize;
9696+ // Even at 20Hz with truncation to 16384 samples:
9797+ // Effective Q = 16384 * 20 / 44100 = 7.4 (much better than 1.86!)
10798 }
10899109100 // Ensure window length is reasonable
+2-2
src/fftdata.h
···11#pragma once
22#include <stdbool.h>
33-// TEMPORARY: Changed from 1024 to 3072 to support CQT's 6144-point FFT
33+// TEMPORARY: Changed from 1024 to 8192 to support CQT's 16384-point FFT
44// This breaks FFT bin resolution but enables CQT to work properly
55// TODO: Restore to 1024 and implement separate buffer for CQT
66-#define FFT_SIZE 3072
66+#define FFT_SIZE 8192
77extern float fPeakMinValue;
88extern float fPeakSmoothing;
99extern float fPeakSmoothValue;