Compare commits

...

12 commits
v1.1.1 ... main

Author SHA1 Message Date
Lauchmelder d3f71641f1 further reduced output file size 2021-12-19 22:39:13 +01:00
Lauchmelder 9558264b5a changed json file structure to be more space efficient 2021-12-19 21:56:53 +01:00
Lauchmelder ec7f291194
Update README.md 2021-06-23 13:34:53 +02:00
Lauchmelder 7cd64b6222
Update README.md 2021-04-30 17:48:01 +02:00
Lauchmelder 56bc0e752d
Update README.md 2021-04-30 13:26:43 +02:00
Robert 7ba3225f61 Merge branch 'main' of https://github.com/Lauchmelder23/spectralyze into main 2021-04-30 13:25:16 +02:00
Robert bb947b4a53 added trigonometric approximations 2021-04-30 13:25:09 +02:00
Robert 0609fe6c33 removed mertz 2021-04-30 11:36:18 +02:00
Lauchmelder 0f22f865e4
Update README.md 2021-04-29 23:26:09 +02:00
Robert f2ad3bd04a added mertz method 2021-04-29 23:21:52 +02:00
Robert 291b985bc3 added blackman harris 2021-04-29 17:25:35 +02:00
Robert c903658f65 added triangle window 2021-04-29 17:14:29 +02:00
4 changed files with 160 additions and 63 deletions

View file

@ -15,7 +15,7 @@ By default, spectralyze will output the entire frequency spectrum, all the way u
``` ```
spectralyze -f 0,2500 coolSong.wav spectralyze -f 0,2500 coolSong.wav
``` ```
This command would only output frequencies ranging from 0kHz-2kHz, greatly decreasing file size. This command would only output frequencies ranging from 0kHz-2.5kHz, greatly decreasing file size.
## Disabling channels ## Disabling channels
By default this program will analyze all channels in the given audio file, if you are only interested in noe specific channel you can tell the program that via the `-m` flag: By default this program will analyze all channels in the given audio file, if you are only interested in noe specific channel you can tell the program that via the `-m` flag:
@ -75,8 +75,13 @@ Every supplied audio file will result in one JSON file. The magnitude is the abs
## Example use case ## Example use case
This tool can theoretically be used to visualize music. The visualization part has to be written by you, though. For my little experiment I used python with matplotlib to create a line diagram from the spectra: This tool can theoretically be used to visualize music. The visualization part has to be written by you, though. For my little experiment I used python with matplotlib to create a line diagram from the spectra:
https://user-images.githubusercontent.com/24511538/116532180-4218e300-a8e0-11eb-8914-6b3b50228e58.mp4
https://user-images.githubusercontent.com/24511538/116720172-2d217a00-a9dc-11eb-945f-5db40300da78.mp4
https://user-images.githubusercontent.com/24511538/116688886-a22e8880-a9b7-11eb-9a3d-b9b5069de697.mp4
Visualization written by [mpsparrow](https://github.com/mpsparrow)
## Used libraries ## Used libraries
* [AudioFile](https://github.com/adamstark/AudioFile) for loading audio files * [AudioFile](https://github.com/adamstark/AudioFile) for loading audio files

View file

@ -8,39 +8,59 @@
#define POW_OF_TWO(x) (x && !(x & (x - 1))) #define POW_OF_TWO(x) (x && !(x & (x - 1)))
constexpr double REC_2_FAC = (double)1.0f / (double)2.0f;
constexpr double REC_3_FAC = (double)1.0f / (double)6.0f;
constexpr double REC_4_FAC = (double)1.0f / (double)24.0f;
constexpr double REC_5_FAC = (double)1.0f / (double)120.0f;
constexpr double REC_6_FAC = (double)1.0f / (double)720.0f;
constexpr double REC_7_FAC = (double)1.0f / (double)5040.0f;
constexpr double REC_8_FAC = (double)1.0f / (double)40320.0f;
constexpr double REC_9_FAC = (double)1.0f / (double)362880.0f;
using namespace std::complex_literals; using namespace std::complex_literals;
typedef std::function<double(unsigned int)> WindowFunction; typedef std::function<double(unsigned int)> WindowFunction;
typedef std::function<double(double)> TrigFunction;
typedef std::function<std::complex<double>(double)> ExpFunction;
WindowFunction window;
TrigFunction Sin = std::bind((double(*)(double))& std::sin, std::placeholders::_1);
TrigFunction Cos = std::bind((double(*)(double))& std::cos, std::placeholders::_1);
inline double WindowRectangle(unsigned int k, unsigned int offset, unsigned int width); inline double WindowRectangle(unsigned int k, unsigned int offset, unsigned int width);
inline double WindowVonHann(unsigned int k, unsigned int offset, unsigned int width); inline double WindowVonHann(unsigned int k, unsigned int offset, unsigned int width);
inline double WindowGauss(unsigned int k, unsigned int offset, unsigned int width); inline double WindowGauss(unsigned int k, unsigned int offset, unsigned int width);
inline double WindowTriangle(unsigned int k, unsigned int offset, unsigned int width);
inline double WindowBlackman(unsigned int k, unsigned int offset, unsigned int width);
double FastCos(double x);
double FastSin(double x);
std::complex<double> ComplexExp(double x);
std::vector<std::complex<double>> std::vector<std::complex<double>>
radix2dit( radix2dit(
const std::vector<double>& list, const std::vector<double>& list,
size_t offset, size_t offset,
size_t N, size_t N,
size_t s, size_t s)
WindowFunction winFunc)
{ {
std::vector<std::complex<double>> output(N); std::vector<std::complex<double>> output(N);
if (N == 1) if (N == 1)
{ {
output[0] = winFunc(offset) * (list[offset]); output[0] = window(offset) * (list[offset]);
} }
else else
{ {
size_t halfN = N >> 1; size_t halfN = N >> 1;
std::vector<std::complex<double>> first = radix2dit(list, offset, halfN, s << 1, winFunc); std::vector<std::complex<double>> first = radix2dit(list, offset, halfN, s << 1);
std::vector<std::complex<double>> second = radix2dit(list, offset + s, halfN, s << 1, winFunc); std::vector<std::complex<double>> second = radix2dit(list, offset + s, halfN, s << 1);
std::complex<double> coeff = -M_PI * 1.0i / (double)halfN; double coeff = -M_PI / (double)halfN;
for (int k = 0; k < halfN; k++) for (int k = 0; k < halfN; k++)
{ {
std::complex<double> p = first[k]; std::complex<double> p = first[k];
std::complex<double> q = std::exp(coeff * (double)k) * second[k]; std::complex<double> q = ComplexExp(coeff * (double)k) * second[k];
output[k] = p + q; output[k] = p + q;
output[halfN + k] = p - q; output[halfN + k] = p - q;
@ -55,8 +75,7 @@ FFT(const std::vector<double>::const_iterator& begin,
const std::vector<double>::const_iterator& end, const std::vector<double>::const_iterator& end,
size_t sampleRate, size_t sampleRate,
double minFreq, double maxFreq, double minFreq, double maxFreq,
unsigned int zeropadding, unsigned int zeropadding)
WindowFunctions func, unsigned int width, unsigned int offset)
{ {
std::vector<double> signal(begin, end); std::vector<double> signal(begin, end);
size_t N = signal.size(); size_t N = signal.size();
@ -73,15 +92,10 @@ FFT(const std::vector<double>::const_iterator& begin,
} }
WindowFunction f; WindowFunction f;
switch (func)
{
case WindowFunctions::RECTANGLE: f = std::bind(WindowRectangle, std::placeholders::_1, offset, width); break;
case WindowFunctions::VON_HANN: f = std::bind(WindowVonHann, std::placeholders::_1, offset, width); break;
case WindowFunctions::GAUSS: f = std::bind(WindowGauss, std::placeholders::_1, offset, width); break;
}
std::vector<std::complex<double>> spectrum = radix2dit(signal, 0, N, 1);
std::vector<std::complex<double>> spectrum = radix2dit(signal, 0, N, 1, f);
double freqRes = (double)sampleRate / (double)N; double freqRes = (double)sampleRate / (double)N;
double nyquistLimit = (double)sampleRate / 2.0f; double nyquistLimit = (double)sampleRate / 2.0f;
@ -93,12 +107,31 @@ FFT(const std::vector<double>::const_iterator& begin,
for (int k = freq / freqRes; freq < nyquistLimit && freq < maxFreq; k++) for (int k = freq / freqRes; freq < nyquistLimit && freq < maxFreq; k++)
{ {
output.push_back(std::make_pair(freq, 2.0f * std::abs(spectrum[k]) / (double)N)); output.push_back(std::make_pair(freq, 2.0f * std::abs(spectrum[k]) / (double)N));
freq += freqRes; freq += freqRes;
} }
return output; return output;
} }
void SetWindowFunction(WindowFunctions func, unsigned int width)
{
switch (func)
{
case WindowFunctions::RECTANGLE: window = std::bind(WindowRectangle, std::placeholders::_1, 0, width); break;
case WindowFunctions::VON_HANN: window = std::bind(WindowVonHann, std::placeholders::_1, 0, width); break;
case WindowFunctions::GAUSS: window = std::bind(WindowGauss, std::placeholders::_1, 0, width); break;
case WindowFunctions::TRIANGLE: window = std::bind(WindowTriangle, std::placeholders::_1, 0, width); break;
case WindowFunctions::BLACKMAN: window = std::bind(WindowBlackman, std::placeholders::_1, 0, width); break;
}
}
void UseFastFunctions()
{
Sin = std::bind(FastSin, std::placeholders::_1);
Cos = std::bind(FastCos, std::placeholders::_1);
}
inline double WindowRectangle(unsigned int k, unsigned int offset, unsigned int width) inline double WindowRectangle(unsigned int k, unsigned int offset, unsigned int width)
{ {
return ((offset < k) && (k < width)); return ((offset < k) && (k < width));
@ -106,7 +139,7 @@ inline double WindowRectangle(unsigned int k, unsigned int offset, unsigned int
inline double WindowVonHann(unsigned int k, unsigned int offset, unsigned int width) inline double WindowVonHann(unsigned int k, unsigned int offset, unsigned int width)
{ {
return ((offset < k) && (k < width)) ? (0.5f * (1.0f - cos(2.0f * M_PI * k / (width - 1)))) : 0; return ((offset < k) && (k < width)) ? (0.5f * (1.0f - Cos(2.0f * M_PI * k / (width - 1)))) : 0;
} }
inline double WindowGauss(unsigned int k, unsigned int offset, unsigned int width) inline double WindowGauss(unsigned int k, unsigned int offset, unsigned int width)
@ -114,3 +147,38 @@ inline double WindowGauss(unsigned int k, unsigned int offset, unsigned int widt
double coeff = (k - (width - 1) * 0.5f) / (0.4f * (width - 1) * 0.5f); double coeff = (k - (width - 1) * 0.5f) / (0.4f * (width - 1) * 0.5f);
return ((offset < k) && (k < width)) ? (std::exp(-0.5f * coeff * coeff)) : 0; return ((offset < k) && (k < width)) ? (std::exp(-0.5f * coeff * coeff)) : 0;
} }
inline double WindowTriangle(unsigned int k, unsigned int offset, unsigned int width)
{
return 1.0f - std::abs(((double)k - ((double)width / 2.0f)) / ((double)width / 2.0f));
}
inline double WindowBlackman(unsigned int k, unsigned int offset, unsigned int width)
{
return (double)0.5f * ((double)1.0f - (double)0.16f) - 0.5f * Cos(2.0f * M_PI * k / (width - 1)) + (double)0.5f * (double)0.16f * Cos(4.0f * M_PI * k / (width - 1));
}
double FastCos(double x)
{
x -= (x > M_PI) * (double)2.0f * M_PI;
x += (x < -M_PI) * (double)2.0f * M_PI;
double xpow2 = x * x;
double xpow4 = xpow2 * x * x;
double xpow6 = xpow4 * x * x;
return (double)1.0f - xpow2 * REC_2_FAC + xpow4 * REC_4_FAC - xpow6 * REC_6_FAC + xpow6 * x * x * REC_8_FAC;
}
double FastSin(double x)
{
x -= (x > M_PI) * (double)2.0f * M_PI;
x += (x < -M_PI) * (double)2.0f * M_PI;
double xpow3 = x * x * x;
double xpow5 = xpow3 * x * x;
double xpow7 = xpow5 * x * x;
return (double)x - xpow3 * REC_3_FAC + xpow5 * REC_5_FAC - xpow7 * REC_7_FAC + xpow7 * x * x * REC_9_FAC;
}
std::complex<double> ComplexExp(double x)
{
return std::complex<double>(Cos(x), Sin(x));
}

View file

@ -5,13 +5,16 @@
enum class WindowFunctions { enum class WindowFunctions {
RECTANGLE, RECTANGLE,
GAUSS, GAUSS,
VON_HANN VON_HANN,
TRIANGLE,
BLACKMAN
}; };
extern std::vector<std::pair<double, double>> FFT(const std::vector<double>::const_iterator& begin, extern std::vector<std::pair<double, double>> FFT(const std::vector<double>::const_iterator& begin,
const std::vector<double>::const_iterator& end, const std::vector<double>::const_iterator& end,
size_t sampleRate, size_t sampleRate,
double minFreq, double maxFreq, double minFreq, double maxFreq,
unsigned int zeropadding, unsigned int zeropadding);
WindowFunctions func, unsigned int width, unsigned int offset);
extern void SetWindowFunction(WindowFunctions func, unsigned int width);
extern void UseFastFunctions();

View file

@ -14,7 +14,9 @@
const std::map<std::string, WindowFunctions> FUNCTIONS { const std::map<std::string, WindowFunctions> FUNCTIONS {
{"rectangle", WindowFunctions::RECTANGLE}, {"rectangle", WindowFunctions::RECTANGLE},
{"von-hann", WindowFunctions::VON_HANN}, {"von-hann", WindowFunctions::VON_HANN},
{"gauss", WindowFunctions::GAUSS} {"gauss", WindowFunctions::GAUSS},
{"triangle", WindowFunctions::TRIANGLE},
{"blackman", WindowFunctions::BLACKMAN}
}; };
struct Settings { struct Settings {
@ -24,6 +26,7 @@ struct Settings {
double minFreq, maxFreq; double minFreq, maxFreq;
unsigned int analyzeChannel; unsigned int analyzeChannel;
unsigned int zeropadding; unsigned int zeropadding;
bool approx, legacy;
WindowFunctions window; WindowFunctions window;
}; };
@ -34,6 +37,33 @@ int main(int argc, char** argv)
Settings setts; Settings setts;
setts = Parse(argc, argv); setts = Parse(argc, argv);
if (setts.approx)
UseFastFunctions();
std::function<void(nlohmann::json&, const std::vector<std::pair<double, double>>&)> toJson;
if (setts.legacy)
{
toJson = [](nlohmann::json& target, const std::vector<std::pair<double, double>>& spectrum)
{
target.push_back({ "spectrum", nlohmann::json::array()});
for (const std::pair<double, double>& pair : spectrum) {
target["spectrum"].push_back({{"freq", pair.first}, {"mag", pair.second}});
}
};
}
else
{
toJson = [](nlohmann::json& target, const std::vector<std::pair<double, double>>& spectrum)
{
target.push_back({ "spectrum", nlohmann::json::array() });
for (const std::pair<double, double>& pair : spectrum) {
target["spectrum"].push_back(pair.second);
}
};
}
int numFiles = setts.files.size(); int numFiles = setts.files.size();
for (auto& file : setts.files) { for (auto& file : setts.files) {
AudioFile<double> audioFile; AudioFile<double> audioFile;
@ -49,6 +79,9 @@ int main(int argc, char** argv)
int numChannels = audioFile.getNumChannels(); int numChannels = audioFile.getNumChannels();
nlohmann::json output; nlohmann::json output;
if(!setts.legacy)
output["freqs"] = nlohmann::json::array();
int c = setts.analyzeChannel; int c = setts.analyzeChannel;
if (c == 0) if (c == 0)
c = 1; c = 1;
@ -61,59 +94,43 @@ int main(int argc, char** argv)
std::string chName = "channel_" + std::to_string(c); std::string chName = "channel_" + std::to_string(c);
output[chName] = nlohmann::json::array(); output[chName] = nlohmann::json::array();
if (setts.splitInterval == 0.0f) int sampleInterval = (setts.splitInterval > 0.0f ? sampleRate * setts.splitInterval / 1000 : audioFile.samples[c - 1].size());
SetWindowFunction(setts.window, sampleInterval);
int currentSample;
for (currentSample = 0; currentSample < audioFile.samples[c - 1].size(); currentSample += sampleInterval)
{ {
std::vector<std::pair<double, double>> spectrum = std::vector<std::pair<double, double>> spectrum =
FFT( FFT(
audioFile.samples[c-1].cbegin(), audioFile.samples[c - 1].cbegin() + currentSample,
audioFile.samples[c-1].cend(), std::min(
audioFile.samples[c - 1].cbegin() + currentSample + sampleInterval,
audioFile.samples[c - 1].cend()
),
sampleRate, sampleRate,
setts.minFreq, setts.maxFreq, setts.minFreq, setts.maxFreq,
setts.zeropadding, setts.zeropadding
setts.window, audioFile.samples[c-1].size(), 0
); );
output[chName] = nlohmann::json::array(); if (!setts.legacy && output["freqs"].empty())
for (const std::pair<double, double>& pair : spectrum) {
output[chName].push_back({ {"freq", pair.first}, {"mag", pair.second } });
}
}
else
{
int sampleInterval = sampleRate * setts.splitInterval / 1000;
int currentSample;
for (currentSample = 0; currentSample < audioFile.samples[c - 1].size(); currentSample += sampleInterval)
{ {
std::vector<std::pair<double, double>> spectrum =
FFT(
audioFile.samples[c - 1].cbegin() + currentSample,
std::min(
audioFile.samples[c - 1].cbegin() + currentSample + sampleInterval,
audioFile.samples[c - 1].cend()
),
sampleRate,
setts.minFreq, setts.maxFreq,
setts.zeropadding,
setts.window, sampleInterval, 0
);
output[chName].push_back({
{"begin", currentSample},
{"end", currentSample + sampleInterval},
{"spectrum", nlohmann::json::array()}
});
for (const std::pair<double, double>& pair : spectrum) { for (const std::pair<double, double>& pair : spectrum) {
output[chName].back()["spectrum"].push_back({ {"freq", pair.first}, {"mag", pair.second } }); output["freqs"].push_back(pair.first);
} }
PRINTER(setts, "\rAnalyzing " << filename << "... Channel " << c << "/" << numChannels << " " << (int)std::floor((float)currentSample / (float)audioFile.samples[c-1].size() * 100.0f) << "% ");
} }
output[chName].push_back({
{"begin", currentSample},
{"end", currentSample + sampleInterval}
});
toJson(output[chName].back(), spectrum);
PRINTER(setts, "\rAnalyzing " << filename << "... Channel " << c << "/" << numChannels << " " << (int)std::floor((float)currentSample / (float)audioFile.samples[c-1].size() * 100.0f) << "% ");
} }
} }
std::ofstream ofs(file.replace_extension("json")); std::ofstream ofs(file.replace_extension("json"));
ofs << std::setw(4) << output << std::endl; ofs << std::setw(4) << output.dump() << std::endl;
ofs.close(); ofs.close();
PRINTER(setts, "\rAnalyzing " << filename << "... 100% " << std::endl); PRINTER(setts, "\rAnalyzing " << filename << "... 100% " << std::endl);
@ -137,9 +154,11 @@ Settings Parse(int argc, char** argv)
("i,interval", "Splits audio file into intervals of length i milliseconds and transforms them individually (0 to not split file)", cxxopts::value<float>()) ("i,interval", "Splits audio file into intervals of length i milliseconds and transforms them individually (0 to not split file)", cxxopts::value<float>())
("f,frequency", "Defines the frequency range of the output spectrum (Default: all the frequencies)", cxxopts::value<std::vector<double>>()) ("f,frequency", "Defines the frequency range of the output spectrum (Default: all the frequencies)", cxxopts::value<std::vector<double>>())
("p,pad", "Add extra zero-padding. By default, the program will pad the signals with 0s until the number of samples is a power of 2 (this would be equivalent to -p 1). With this option you can tell the program to instead pad until the power of 2 after the next one (-p 2) etc. This increases frequency resolution", cxxopts::value<unsigned int>()) ("p,pad", "Add extra zero-padding. By default, the program will pad the signals with 0s until the number of samples is a power of 2 (this would be equivalent to -p 1). With this option you can tell the program to instead pad until the power of 2 after the next one (-p 2) etc. This increases frequency resolution", cxxopts::value<unsigned int>())
("w,window", "Specify the window function used (rectangle (default), von-hann, gauss)", cxxopts::value<std::string>()->default_value("rectangle")) ("w,window", "Specify the window function used (rectangle (default), von-hann, gauss, triangle, blackman (3-term))", cxxopts::value<std::string>()->default_value("rectangle"))
("m,mono", "Analyze only the given channel", cxxopts::value<unsigned int>()->default_value("0")) ("m,mono", "Analyze only the given channel", cxxopts::value<unsigned int>()->default_value("0"))
("approx", "Use faster, but more inaccurate trigonometric functions instead of the std-functions (EXPERIMENTAL)")
("files", "Files to fourier transform", cxxopts::value<std::vector<std::filesystem::path>>()) ("files", "Files to fourier transform", cxxopts::value<std::vector<std::filesystem::path>>())
("legacy", "Uses the legacy data structure (WHICH IS VERY BAD!)", cxxopts::value<bool>()->default_value("false"))
("h,help", "Print usage") ("h,help", "Print usage")
; ;
@ -174,6 +193,8 @@ Settings Parse(int argc, char** argv)
setts.splitInterval = (result.count("interval") ? result["interval"].as<float>() : 0.0f); setts.splitInterval = (result.count("interval") ? result["interval"].as<float>() : 0.0f);
setts.analyzeChannel = (result.count("mono") ? result["mono"].as<unsigned int>() : 0); setts.analyzeChannel = (result.count("mono") ? result["mono"].as<unsigned int>() : 0);
setts.zeropadding = (result.count("pad") ? result["pad"].as<unsigned int>() : 1); setts.zeropadding = (result.count("pad") ? result["pad"].as<unsigned int>() : 1);
setts.approx = (result.count("approx") ? true : false);
setts.legacy = (result.count("legacy") ? result["legacy"].as<bool>() : false);
if (!result.count("window")) if (!result.count("window"))
{ {