diff --git a/CMakeLists.txt b/CMakeLists.txt index c4b65d2..0fecd2c 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -6,7 +6,7 @@ project(spectralyze) add_executable(spectralyze "src/main.cpp" - "src/FFT.hpp" + "src/FFT.hpp" "src/FFT.cpp" ) target_include_directories(spectralyze PRIVATE diff --git a/README.md b/README.md index 83ce6d7..66c7bed 100644 --- a/README.md +++ b/README.md @@ -33,6 +33,9 @@ This will tell the program to pad to the 3rd-next power of 2! This means, if the **RESOLUTION (and thus file size) SCALES WITH 2^p** +## Window functions +Window functions are used to "cut out" parts of the signal. When you use the `-i` flag, you are only looking at a certain interval in the audio file. This is equivalent to multiplying the whole audio file with a rectangular window function (it is 0 everywhere except in the interval, where it is 1). With the `-w` flag you can choose between different window functions. Currently supported are the Von-Hann function, and the Gauss function. Both of these yield "smoother" spectra and get rid of a lot of noise. + ## Example command ``` spectralyze -i 20 -f 0,1000 -p 3 coolSong.wav diff --git a/src/FFT.cpp b/src/FFT.cpp new file mode 100644 index 0000000..021a5a7 --- /dev/null +++ b/src/FFT.cpp @@ -0,0 +1,116 @@ +#include "FFT.hpp" + +#define _USE_MATH_DEFINES +#include +#include +#include +#include + +#define POW_OF_TWO(x) (x && !(x & (x - 1))) + +using namespace std::complex_literals; + +typedef std::function WindowFunction; + +inline double WindowRectangle(unsigned int k, unsigned int offset, unsigned int width); +inline double WindowVonHann(unsigned int k, unsigned int offset, unsigned int width); +inline double WindowGauss(unsigned int k, unsigned int offset, unsigned int width); + +std::vector> +radix2dit( + const std::vector& list, + size_t offset, + size_t N, + size_t s, + WindowFunction winFunc) +{ + std::vector> output(N); + if (N == 1) + { + output[0] = winFunc(offset) * (list[offset]); + } + else + { + size_t halfN = N >> 1; + std::vector> first = radix2dit(list, offset, halfN, s << 1, winFunc); + std::vector> second = radix2dit(list, offset + s, halfN, s << 1, winFunc); + + std::complex coeff = -M_PI * 1.0i / (double)halfN; + + for (int k = 0; k < halfN; k++) + { + std::complex p = first[k]; + std::complex q = std::exp(coeff * (double)k) * second[k]; + + output[k] = p + q; + output[halfN + k] = p - q; + } + } + + return output; +} + +std::vector> +FFT(const std::vector::const_iterator& begin, + const std::vector::const_iterator& end, + size_t sampleRate, + double minFreq, double maxFreq, + unsigned int zeropadding, + WindowFunctions func, unsigned int width, unsigned int offset) +{ + std::vector signal(begin, end); + size_t N = signal.size(); + while (!POW_OF_TWO(N)) + { + // Pad with zeros + signal.push_back(0.0f); + N++; + } + + if (zeropadding > 1) { + N = (signal.size() << (zeropadding - 1)); + signal.insert(signal.end(), N - signal.size(), 0); + } + + WindowFunction f; + switch (func) + { + case WindowFunctions::RECTANGLE: f = std::bind(WindowRectangle, std::placeholders::_1, offset, width); break; + case WindowFunctions::VON_HANN: f = std::bind(WindowVonHann, std::placeholders::_1, offset, width); break; + case WindowFunctions::GAUSS: f = std::bind(WindowGauss, std::placeholders::_1, offset, width); break; + } + + + std::vector> spectrum = radix2dit(signal, 0, N, 1, f); + double freqRes = (double)sampleRate / (double)N; + double nyquistLimit = (double)sampleRate / 2.0f; + + std::vector> output; + double freq = minFreq; + if (maxFreq == 0) + maxFreq = nyquistLimit; + + for (int k = freq / freqRes; freq < nyquistLimit && freq < maxFreq; k++) + { + output.push_back(std::make_pair(freq, 2.0f * std::abs(spectrum[k]) / (double)N)); + freq += freqRes; + } + + return output; +} + +inline double WindowRectangle(unsigned int k, unsigned int offset, unsigned int width) +{ + return ((offset < k) && (k < width)); +} + +inline double WindowVonHann(unsigned int k, unsigned int offset, unsigned int width) +{ + return ((offset < k) && (k < width)) ? (0.5f * (1.0f - cos(2.0f * M_PI * k / (width - 1)))) : 0; +} + +inline double WindowGauss(unsigned int k, unsigned int offset, unsigned int width) +{ + double coeff = (k - (width - 1) * 0.5f) / (0.4f * (width - 1) * 0.5f); + return ((offset < k) && (k < width)) ? (std::exp(-0.5f * coeff * coeff)) : 0; +} diff --git a/src/FFT.hpp b/src/FFT.hpp index 750100f..549d721 100644 --- a/src/FFT.hpp +++ b/src/FFT.hpp @@ -1,89 +1,17 @@ #pragma once -#define _USE_MATH_DEFINES -#include #include #include -#define TWO_PI (double)6.28318530718f -#define POW_OF_TWO(x) (x && !(x & (x - 1))) +enum class WindowFunctions { + RECTANGLE, + GAUSS, + VON_HANN +}; -using namespace std::complex_literals; - -std::vector> radix2dit(const std::vector::const_iterator& begin, size_t N, size_t s) -{ - std::vector> output(N); - if (N == 1) - { - output[0] = *begin; - } - else - { - size_t halfN = N >> 1; - std::vector> first = radix2dit(begin, halfN, s << 1); - std::vector> second = radix2dit(begin + s, halfN, s << 1); - - /*if (s == 1) { - std::future>> firstFuture = std::async(&radix2dit, begin, halfN, s << 1); - std::future>> secondFuture = std::async(&radix2dit, begin + s, halfN, s << 1); - - first = firstFuture.get(); - second = secondFuture.get(); - } - else { - first = radix2dit(begin, halfN, s << 1); - second = radix2dit(begin + 1, halfN, s << 1); - }*/ - - std::complex coeff = -M_PI * 1.0i / (double)halfN; - - for (int k = 0; k < N >> 1; k++) - { - std::complex p = first[k]; - std::complex q = std::exp(coeff * (double)k) * second[k]; - - output[k] = p + q; - output[halfN + k] = p - q; - } - } - - return output; -} - -std::vector> -FFT(const std::vector::const_iterator& begin, - const std::vector::const_iterator& end, +extern std::vector> FFT(const std::vector::const_iterator& begin, + const std::vector::const_iterator& end, size_t sampleRate, double minFreq, double maxFreq, - unsigned int zeropadding) -{ - std::vector signal(begin, end); - size_t N = signal.size(); - while (!POW_OF_TWO(N)) - { - // Pad with zeros - signal.push_back(0.0f); - N++; - } + unsigned int zeropadding, + WindowFunctions func, unsigned int width, unsigned int offset); - if (zeropadding > 1) { - N = (signal.size() << (zeropadding - 1)); - signal.insert(signal.end(), N - signal.size(), 0); - } - - std::vector> spectrum = radix2dit(signal.cbegin(), N, 1); - double freqRes = (double)sampleRate / (double)N; - double nyquistLimit = (double)sampleRate / 2.0f; - - std::vector> output; - double freq = minFreq; - if (maxFreq == 0) - maxFreq = nyquistLimit; - - for (int k = freq / freqRes; freq < nyquistLimit && freq < maxFreq; k++) - { - output.push_back(std::make_pair(freq, 2.0f * std::abs(spectrum[k]) / (double)N)); - freq += freqRes; - } - - return output; -} diff --git a/src/main.cpp b/src/main.cpp index 392824f..99f4b30 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -1,6 +1,7 @@ #include #include #include +#include #include #include "AudioFile.h" @@ -10,6 +11,12 @@ #define PRINTER(s, x) if(!s.quiet) { std::cout << x; } +const std::map FUNCTIONS { + {"rectangle", WindowFunctions::RECTANGLE}, + {"von-hann", WindowFunctions::VON_HANN}, + {"gauss", WindowFunctions::GAUSS} +}; + struct Settings { std::vector files; bool quiet; @@ -17,6 +24,7 @@ struct Settings { double minFreq, maxFreq; unsigned int analyzeChannel; unsigned int zeropadding; + WindowFunctions window; }; Settings Parse(int argc, char** argv); @@ -61,7 +69,8 @@ int main(int argc, char** argv) audioFile.samples[c-1].cend(), sampleRate, setts.minFreq, setts.maxFreq, - setts.zeropadding + setts.zeropadding, + setts.window, audioFile.samples[c-1].size(), 0 ); output[chName] = nlohmann::json::array(); @@ -77,14 +86,15 @@ int main(int argc, char** argv) { std::vector> spectrum = FFT( - audioFile.samples[c-1].cbegin() + currentSample, + audioFile.samples[c - 1].cbegin() + currentSample, std::min( - audioFile.samples[c-1].cbegin() + currentSample + sampleInterval, - audioFile.samples[c-1].cend() - ), + audioFile.samples[c - 1].cbegin() + currentSample + sampleInterval, + audioFile.samples[c - 1].cend() + ), sampleRate, setts.minFreq, setts.maxFreq, - setts.zeropadding + setts.zeropadding, + setts.window, sampleInterval, 0 ); output[chName].push_back({ @@ -127,6 +137,7 @@ Settings Parse(int argc, char** argv) ("i,interval", "Splits audio file into intervals of length i milliseconds and transforms them individually (0 to not split file)", cxxopts::value()) ("f,frequency", "Defines the frequency range of the output spectrum (Default: all the frequencies)", cxxopts::value>()) ("p,pad", "Add extra zero-padding. By default, the program will pad the signals with 0s until the number of samples is a power of 2 (this would be equivalent to -p 1). With this option you can tell the program to instead pad until the power of 2 after the next one (-p 2) etc. This increases frequency resolution", cxxopts::value()) + ("w,window", "Specify the window function used (rectangle (default), von-hann, gauss)", cxxopts::value()->default_value("rectangle")) ("m,mono", "Analyze only the given channel", cxxopts::value()->default_value("0")) ("files", "Files to fourier transform", cxxopts::value>()) ("h,help", "Print usage") @@ -163,6 +174,26 @@ Settings Parse(int argc, char** argv) setts.splitInterval = (result.count("interval") ? result["interval"].as() : 0.0f); setts.analyzeChannel = (result.count("mono") ? result["mono"].as() : 0); setts.zeropadding = (result.count("pad") ? result["pad"].as() : 1); + + if (!result.count("window")) + { + setts.window = WindowFunctions::RECTANGLE; + } + else + { + std::string data = result["window"].as(); + std::transform(data.begin(), data.end(), data.begin(), [](unsigned char c) { return std::tolower(c); }); + auto it = FUNCTIONS.find(data); + if (it == FUNCTIONS.end()) + { + setts.window = WindowFunctions::RECTANGLE; + } + else + { + setts.window = it->second; + } + + } if (setts.maxFreq <= setts.minFreq && (setts.maxFreq != 0))