diff --git a/CMakeLists.txt b/CMakeLists.txt index c4b65d2..0fecd2c 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -6,7 +6,7 @@ project(spectralyze) add_executable(spectralyze "src/main.cpp" - "src/FFT.hpp" + "src/FFT.hpp" "src/FFT.cpp" ) target_include_directories(spectralyze PRIVATE diff --git a/README.md b/README.md index 83ce6d7..3926af0 100644 --- a/README.md +++ b/README.md @@ -15,7 +15,7 @@ By default, spectralyze will output the entire frequency spectrum, all the way u ``` spectralyze -f 0,2500 coolSong.wav ``` -This command would only output frequencies ranging from 0kHz-2kHz, greatly decreasing file size. +This command would only output frequencies ranging from 0kHz-2.5kHz, greatly decreasing file size. ## Disabling channels By default this program will analyze all channels in the given audio file, if you are only interested in noe specific channel you can tell the program that via the `-m` flag: @@ -33,6 +33,9 @@ This will tell the program to pad to the 3rd-next power of 2! This means, if the **RESOLUTION (and thus file size) SCALES WITH 2^p** +## Window functions +Window functions are used to "cut out" parts of the signal. When you use the `-i` flag, you are only looking at a certain interval in the audio file. This is equivalent to multiplying the whole audio file with a rectangular window function (it is 0 everywhere except in the interval, where it is 1). With the `-w` flag you can choose between different window functions. Currently supported are the Von-Hann function, and the Gauss function. Both of these yield "smoother" spectra and get rid of a lot of noise. + ## Example command ``` spectralyze -i 20 -f 0,1000 -p 3 coolSong.wav @@ -69,7 +72,18 @@ Each audio channel in the file is transformed separately. The resulting JSON has ``` Every supplied audio file will result in one JSON file. The magnitude is the absolute value of the real and imaginary part of the Fourier transformation. +## Example use case +This tool can theoretically be used to visualize music. The visualization part has to be written by you, though. For my little experiment I used python with matplotlib to create a line diagram from the spectra: + + +https://user-images.githubusercontent.com/24511538/116720172-2d217a00-a9dc-11eb-945f-5db40300da78.mp4 + + +https://user-images.githubusercontent.com/24511538/116688886-a22e8880-a9b7-11eb-9a3d-b9b5069de697.mp4 + +Visualization written by [mpsparrow](https://github.com/mpsparrow) + ## Used libraries * [AudioFile](https://github.com/adamstark/AudioFile) for loading audio files * [JSON for Modern C++](https://github.com/nlohmann/json) for writing JSON data -* [cxxopts](https://github.com/jarro2783/cxxopts) for parsing commandline arguments \ No newline at end of file +* [cxxopts](https://github.com/jarro2783/cxxopts) for parsing commandline arguments diff --git a/src/FFT.cpp b/src/FFT.cpp new file mode 100644 index 0000000..f3895ef --- /dev/null +++ b/src/FFT.cpp @@ -0,0 +1,184 @@ +#include "FFT.hpp" + +#define _USE_MATH_DEFINES +#include +#include +#include +#include + +#define POW_OF_TWO(x) (x && !(x & (x - 1))) + +constexpr double REC_2_FAC = (double)1.0f / (double)2.0f; +constexpr double REC_3_FAC = (double)1.0f / (double)6.0f; +constexpr double REC_4_FAC = (double)1.0f / (double)24.0f; +constexpr double REC_5_FAC = (double)1.0f / (double)120.0f; +constexpr double REC_6_FAC = (double)1.0f / (double)720.0f; +constexpr double REC_7_FAC = (double)1.0f / (double)5040.0f; +constexpr double REC_8_FAC = (double)1.0f / (double)40320.0f; +constexpr double REC_9_FAC = (double)1.0f / (double)362880.0f; + +using namespace std::complex_literals; + +typedef std::function WindowFunction; +typedef std::function TrigFunction; +typedef std::function(double)> ExpFunction; + +WindowFunction window; +TrigFunction Sin = std::bind((double(*)(double))& std::sin, std::placeholders::_1); +TrigFunction Cos = std::bind((double(*)(double))& std::cos, std::placeholders::_1); + +inline double WindowRectangle(unsigned int k, unsigned int offset, unsigned int width); +inline double WindowVonHann(unsigned int k, unsigned int offset, unsigned int width); +inline double WindowGauss(unsigned int k, unsigned int offset, unsigned int width); +inline double WindowTriangle(unsigned int k, unsigned int offset, unsigned int width); +inline double WindowBlackman(unsigned int k, unsigned int offset, unsigned int width); + +double FastCos(double x); +double FastSin(double x); +std::complex ComplexExp(double x); + +std::vector> +radix2dit( + const std::vector& list, + size_t offset, + size_t N, + size_t s) +{ + std::vector> output(N); + if (N == 1) + { + output[0] = window(offset) * (list[offset]); + } + else + { + size_t halfN = N >> 1; + std::vector> first = radix2dit(list, offset, halfN, s << 1); + std::vector> second = radix2dit(list, offset + s, halfN, s << 1); + + double coeff = -M_PI / (double)halfN; + + for (int k = 0; k < halfN; k++) + { + std::complex p = first[k]; + std::complex q = ComplexExp(coeff * (double)k) * second[k]; + + output[k] = p + q; + output[halfN + k] = p - q; + } + } + + return output; +} + +std::vector> +FFT(const std::vector::const_iterator& begin, + const std::vector::const_iterator& end, + size_t sampleRate, + double minFreq, double maxFreq, + unsigned int zeropadding) +{ + std::vector signal(begin, end); + size_t N = signal.size(); + while (!POW_OF_TWO(N)) + { + // Pad with zeros + signal.push_back(0.0f); + N++; + } + + if (zeropadding > 1) { + N = (signal.size() << (zeropadding - 1)); + signal.insert(signal.end(), N - signal.size(), 0); + } + + WindowFunction f; + + + + std::vector> spectrum = radix2dit(signal, 0, N, 1); + double freqRes = (double)sampleRate / (double)N; + double nyquistLimit = (double)sampleRate / 2.0f; + + std::vector> output; + double freq = minFreq; + if (maxFreq == 0) + maxFreq = nyquistLimit; + + for (int k = freq / freqRes; freq < nyquistLimit && freq < maxFreq; k++) + { + output.push_back(std::make_pair(freq, 2.0f * std::abs(spectrum[k]) / (double)N)); + + freq += freqRes; + } + + return output; +} + +void SetWindowFunction(WindowFunctions func, unsigned int width) +{ + switch (func) + { + case WindowFunctions::RECTANGLE: window = std::bind(WindowRectangle, std::placeholders::_1, 0, width); break; + case WindowFunctions::VON_HANN: window = std::bind(WindowVonHann, std::placeholders::_1, 0, width); break; + case WindowFunctions::GAUSS: window = std::bind(WindowGauss, std::placeholders::_1, 0, width); break; + case WindowFunctions::TRIANGLE: window = std::bind(WindowTriangle, std::placeholders::_1, 0, width); break; + case WindowFunctions::BLACKMAN: window = std::bind(WindowBlackman, std::placeholders::_1, 0, width); break; + } +} + +void UseFastFunctions() +{ + Sin = std::bind(FastSin, std::placeholders::_1); + Cos = std::bind(FastCos, std::placeholders::_1); +} + +inline double WindowRectangle(unsigned int k, unsigned int offset, unsigned int width) +{ + return ((offset < k) && (k < width)); +} + +inline double WindowVonHann(unsigned int k, unsigned int offset, unsigned int width) +{ + return ((offset < k) && (k < width)) ? (0.5f * (1.0f - Cos(2.0f * M_PI * k / (width - 1)))) : 0; +} + +inline double WindowGauss(unsigned int k, unsigned int offset, unsigned int width) +{ + double coeff = (k - (width - 1) * 0.5f) / (0.4f * (width - 1) * 0.5f); + return ((offset < k) && (k < width)) ? (std::exp(-0.5f * coeff * coeff)) : 0; +} + +inline double WindowTriangle(unsigned int k, unsigned int offset, unsigned int width) +{ + return 1.0f - std::abs(((double)k - ((double)width / 2.0f)) / ((double)width / 2.0f)); +} + +inline double WindowBlackman(unsigned int k, unsigned int offset, unsigned int width) +{ + return (double)0.5f * ((double)1.0f - (double)0.16f) - 0.5f * Cos(2.0f * M_PI * k / (width - 1)) + (double)0.5f * (double)0.16f * Cos(4.0f * M_PI * k / (width - 1)); +} + +double FastCos(double x) +{ + x -= (x > M_PI) * (double)2.0f * M_PI; + x += (x < -M_PI) * (double)2.0f * M_PI; + double xpow2 = x * x; + double xpow4 = xpow2 * x * x; + double xpow6 = xpow4 * x * x; + return (double)1.0f - xpow2 * REC_2_FAC + xpow4 * REC_4_FAC - xpow6 * REC_6_FAC + xpow6 * x * x * REC_8_FAC; +} + +double FastSin(double x) +{ + x -= (x > M_PI) * (double)2.0f * M_PI; + x += (x < -M_PI) * (double)2.0f * M_PI; + double xpow3 = x * x * x; + double xpow5 = xpow3 * x * x; + double xpow7 = xpow5 * x * x; + return (double)x - xpow3 * REC_3_FAC + xpow5 * REC_5_FAC - xpow7 * REC_7_FAC + xpow7 * x * x * REC_9_FAC; +} + +std::complex ComplexExp(double x) +{ + return std::complex(Cos(x), Sin(x)); +} diff --git a/src/FFT.hpp b/src/FFT.hpp index 750100f..6417fd6 100644 --- a/src/FFT.hpp +++ b/src/FFT.hpp @@ -1,89 +1,20 @@ #pragma once -#define _USE_MATH_DEFINES -#include #include #include -#define TWO_PI (double)6.28318530718f -#define POW_OF_TWO(x) (x && !(x & (x - 1))) +enum class WindowFunctions { + RECTANGLE, + GAUSS, + VON_HANN, + TRIANGLE, + BLACKMAN +}; -using namespace std::complex_literals; - -std::vector> radix2dit(const std::vector::const_iterator& begin, size_t N, size_t s) -{ - std::vector> output(N); - if (N == 1) - { - output[0] = *begin; - } - else - { - size_t halfN = N >> 1; - std::vector> first = radix2dit(begin, halfN, s << 1); - std::vector> second = radix2dit(begin + s, halfN, s << 1); - - /*if (s == 1) { - std::future>> firstFuture = std::async(&radix2dit, begin, halfN, s << 1); - std::future>> secondFuture = std::async(&radix2dit, begin + s, halfN, s << 1); - - first = firstFuture.get(); - second = secondFuture.get(); - } - else { - first = radix2dit(begin, halfN, s << 1); - second = radix2dit(begin + 1, halfN, s << 1); - }*/ - - std::complex coeff = -M_PI * 1.0i / (double)halfN; - - for (int k = 0; k < N >> 1; k++) - { - std::complex p = first[k]; - std::complex q = std::exp(coeff * (double)k) * second[k]; - - output[k] = p + q; - output[halfN + k] = p - q; - } - } - - return output; -} - -std::vector> -FFT(const std::vector::const_iterator& begin, - const std::vector::const_iterator& end, +extern std::vector> FFT(const std::vector::const_iterator& begin, + const std::vector::const_iterator& end, size_t sampleRate, double minFreq, double maxFreq, - unsigned int zeropadding) -{ - std::vector signal(begin, end); - size_t N = signal.size(); - while (!POW_OF_TWO(N)) - { - // Pad with zeros - signal.push_back(0.0f); - N++; - } + unsigned int zeropadding); - if (zeropadding > 1) { - N = (signal.size() << (zeropadding - 1)); - signal.insert(signal.end(), N - signal.size(), 0); - } - - std::vector> spectrum = radix2dit(signal.cbegin(), N, 1); - double freqRes = (double)sampleRate / (double)N; - double nyquistLimit = (double)sampleRate / 2.0f; - - std::vector> output; - double freq = minFreq; - if (maxFreq == 0) - maxFreq = nyquistLimit; - - for (int k = freq / freqRes; freq < nyquistLimit && freq < maxFreq; k++) - { - output.push_back(std::make_pair(freq, 2.0f * std::abs(spectrum[k]) / (double)N)); - freq += freqRes; - } - - return output; -} +extern void SetWindowFunction(WindowFunctions func, unsigned int width); +extern void UseFastFunctions(); \ No newline at end of file diff --git a/src/main.cpp b/src/main.cpp index 392824f..860b00e 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -1,6 +1,7 @@ #include #include #include +#include #include #include "AudioFile.h" @@ -10,6 +11,14 @@ #define PRINTER(s, x) if(!s.quiet) { std::cout << x; } +const std::map FUNCTIONS { + {"rectangle", WindowFunctions::RECTANGLE}, + {"von-hann", WindowFunctions::VON_HANN}, + {"gauss", WindowFunctions::GAUSS}, + {"triangle", WindowFunctions::TRIANGLE}, + {"blackman", WindowFunctions::BLACKMAN} +}; + struct Settings { std::vector files; bool quiet; @@ -17,6 +26,8 @@ struct Settings { double minFreq, maxFreq; unsigned int analyzeChannel; unsigned int zeropadding; + bool approx, legacy; + WindowFunctions window; }; Settings Parse(int argc, char** argv); @@ -26,6 +37,33 @@ int main(int argc, char** argv) Settings setts; setts = Parse(argc, argv); + if (setts.approx) + UseFastFunctions(); + + std::function>&)> toJson; + if (setts.legacy) + { + toJson = [](nlohmann::json& target, const std::vector>& spectrum) + { + target.push_back({ "spectrum", nlohmann::json::array()}); + + for (const std::pair& pair : spectrum) { + target["spectrum"].push_back({{"freq", pair.first}, {"mag", pair.second}}); + } + }; + } + else + { + toJson = [](nlohmann::json& target, const std::vector>& spectrum) + { + target.push_back({ "spectrum", nlohmann::json::array() }); + + for (const std::pair& pair : spectrum) { + target["spectrum"].push_back(pair.second); + } + }; + } + int numFiles = setts.files.size(); for (auto& file : setts.files) { AudioFile audioFile; @@ -41,6 +79,9 @@ int main(int argc, char** argv) int numChannels = audioFile.getNumChannels(); nlohmann::json output; + if(!setts.legacy) + output["freqs"] = nlohmann::json::array(); + int c = setts.analyzeChannel; if (c == 0) c = 1; @@ -53,57 +94,43 @@ int main(int argc, char** argv) std::string chName = "channel_" + std::to_string(c); output[chName] = nlohmann::json::array(); - if (setts.splitInterval == 0.0f) + int sampleInterval = (setts.splitInterval > 0.0f ? sampleRate * setts.splitInterval / 1000 : audioFile.samples[c - 1].size()); + SetWindowFunction(setts.window, sampleInterval); + int currentSample; + for (currentSample = 0; currentSample < audioFile.samples[c - 1].size(); currentSample += sampleInterval) { std::vector> spectrum = FFT( - audioFile.samples[c-1].cbegin(), - audioFile.samples[c-1].cend(), + audioFile.samples[c - 1].cbegin() + currentSample, + std::min( + audioFile.samples[c - 1].cbegin() + currentSample + sampleInterval, + audioFile.samples[c - 1].cend() + ), sampleRate, setts.minFreq, setts.maxFreq, setts.zeropadding ); - output[chName] = nlohmann::json::array(); - for (const std::pair& pair : spectrum) { - output[chName].push_back({ {"freq", pair.first}, {"mag", pair.second } }); - } - } - else - { - int sampleInterval = sampleRate * setts.splitInterval / 1000; - int currentSample; - for (currentSample = 0; currentSample < audioFile.samples[c - 1].size(); currentSample += sampleInterval) + if (!setts.legacy && output["freqs"].empty()) { - std::vector> spectrum = - FFT( - audioFile.samples[c-1].cbegin() + currentSample, - std::min( - audioFile.samples[c-1].cbegin() + currentSample + sampleInterval, - audioFile.samples[c-1].cend() - ), - sampleRate, - setts.minFreq, setts.maxFreq, - setts.zeropadding - ); - - output[chName].push_back({ - {"begin", currentSample}, - {"end", currentSample + sampleInterval}, - {"spectrum", nlohmann::json::array()} - }); - for (const std::pair& pair : spectrum) { - output[chName].back()["spectrum"].push_back({ {"freq", pair.first}, {"mag", pair.second } }); + output["freqs"].push_back(pair.first); } - - PRINTER(setts, "\rAnalyzing " << filename << "... Channel " << c << "/" << numChannels << " " << (int)std::floor((float)currentSample / (float)audioFile.samples[c-1].size() * 100.0f) << "% "); } + + output[chName].push_back({ + {"begin", currentSample}, + {"end", currentSample + sampleInterval} + }); + + toJson(output[chName].back(), spectrum); + + PRINTER(setts, "\rAnalyzing " << filename << "... Channel " << c << "/" << numChannels << " " << (int)std::floor((float)currentSample / (float)audioFile.samples[c-1].size() * 100.0f) << "% "); } } std::ofstream ofs(file.replace_extension("json")); - ofs << std::setw(4) << output << std::endl; + ofs << std::setw(4) << output.dump() << std::endl; ofs.close(); PRINTER(setts, "\rAnalyzing " << filename << "... 100% " << std::endl); @@ -127,8 +154,11 @@ Settings Parse(int argc, char** argv) ("i,interval", "Splits audio file into intervals of length i milliseconds and transforms them individually (0 to not split file)", cxxopts::value()) ("f,frequency", "Defines the frequency range of the output spectrum (Default: all the frequencies)", cxxopts::value>()) ("p,pad", "Add extra zero-padding. By default, the program will pad the signals with 0s until the number of samples is a power of 2 (this would be equivalent to -p 1). With this option you can tell the program to instead pad until the power of 2 after the next one (-p 2) etc. This increases frequency resolution", cxxopts::value()) + ("w,window", "Specify the window function used (rectangle (default), von-hann, gauss, triangle, blackman (3-term))", cxxopts::value()->default_value("rectangle")) ("m,mono", "Analyze only the given channel", cxxopts::value()->default_value("0")) + ("approx", "Use faster, but more inaccurate trigonometric functions instead of the std-functions (EXPERIMENTAL)") ("files", "Files to fourier transform", cxxopts::value>()) + ("legacy", "Uses the legacy data structure (WHICH IS VERY BAD!)", cxxopts::value()->default_value("false")) ("h,help", "Print usage") ; @@ -163,6 +193,28 @@ Settings Parse(int argc, char** argv) setts.splitInterval = (result.count("interval") ? result["interval"].as() : 0.0f); setts.analyzeChannel = (result.count("mono") ? result["mono"].as() : 0); setts.zeropadding = (result.count("pad") ? result["pad"].as() : 1); + setts.approx = (result.count("approx") ? true : false); + setts.legacy = (result.count("legacy") ? result["legacy"].as() : false); + + if (!result.count("window")) + { + setts.window = WindowFunctions::RECTANGLE; + } + else + { + std::string data = result["window"].as(); + std::transform(data.begin(), data.end(), data.begin(), [](unsigned char c) { return std::tolower(c); }); + auto it = FUNCTIONS.find(data); + if (it == FUNCTIONS.end()) + { + setts.window = WindowFunctions::RECTANGLE; + } + else + { + setts.window = it->second; + } + + } if (setts.maxFreq <= setts.minFreq && (setts.maxFreq != 0))