Compare commits
12 commits
Author | SHA1 | Date | |
---|---|---|---|
![]() |
d3f71641f1 | ||
![]() |
9558264b5a | ||
![]() |
ec7f291194 | ||
![]() |
7cd64b6222 | ||
![]() |
56bc0e752d | ||
![]() |
7ba3225f61 | ||
![]() |
bb947b4a53 | ||
![]() |
0609fe6c33 | ||
![]() |
0f22f865e4 | ||
![]() |
f2ad3bd04a | ||
![]() |
291b985bc3 | ||
![]() |
c903658f65 |
|
@ -15,7 +15,7 @@ By default, spectralyze will output the entire frequency spectrum, all the way u
|
|||
```
|
||||
spectralyze -f 0,2500 coolSong.wav
|
||||
```
|
||||
This command would only output frequencies ranging from 0kHz-2kHz, greatly decreasing file size.
|
||||
This command would only output frequencies ranging from 0kHz-2.5kHz, greatly decreasing file size.
|
||||
|
||||
## Disabling channels
|
||||
By default this program will analyze all channels in the given audio file, if you are only interested in noe specific channel you can tell the program that via the `-m` flag:
|
||||
|
@ -75,8 +75,13 @@ Every supplied audio file will result in one JSON file. The magnitude is the abs
|
|||
## Example use case
|
||||
This tool can theoretically be used to visualize music. The visualization part has to be written by you, though. For my little experiment I used python with matplotlib to create a line diagram from the spectra:
|
||||
|
||||
https://user-images.githubusercontent.com/24511538/116532180-4218e300-a8e0-11eb-8914-6b3b50228e58.mp4
|
||||
|
||||
https://user-images.githubusercontent.com/24511538/116720172-2d217a00-a9dc-11eb-945f-5db40300da78.mp4
|
||||
|
||||
|
||||
https://user-images.githubusercontent.com/24511538/116688886-a22e8880-a9b7-11eb-9a3d-b9b5069de697.mp4
|
||||
|
||||
Visualization written by [mpsparrow](https://github.com/mpsparrow)
|
||||
|
||||
## Used libraries
|
||||
* [AudioFile](https://github.com/adamstark/AudioFile) for loading audio files
|
||||
|
|
104
src/FFT.cpp
104
src/FFT.cpp
|
@ -8,39 +8,59 @@
|
|||
|
||||
#define POW_OF_TWO(x) (x && !(x & (x - 1)))
|
||||
|
||||
constexpr double REC_2_FAC = (double)1.0f / (double)2.0f;
|
||||
constexpr double REC_3_FAC = (double)1.0f / (double)6.0f;
|
||||
constexpr double REC_4_FAC = (double)1.0f / (double)24.0f;
|
||||
constexpr double REC_5_FAC = (double)1.0f / (double)120.0f;
|
||||
constexpr double REC_6_FAC = (double)1.0f / (double)720.0f;
|
||||
constexpr double REC_7_FAC = (double)1.0f / (double)5040.0f;
|
||||
constexpr double REC_8_FAC = (double)1.0f / (double)40320.0f;
|
||||
constexpr double REC_9_FAC = (double)1.0f / (double)362880.0f;
|
||||
|
||||
using namespace std::complex_literals;
|
||||
|
||||
typedef std::function<double(unsigned int)> WindowFunction;
|
||||
typedef std::function<double(double)> TrigFunction;
|
||||
typedef std::function<std::complex<double>(double)> ExpFunction;
|
||||
|
||||
WindowFunction window;
|
||||
TrigFunction Sin = std::bind((double(*)(double))& std::sin, std::placeholders::_1);
|
||||
TrigFunction Cos = std::bind((double(*)(double))& std::cos, std::placeholders::_1);
|
||||
|
||||
inline double WindowRectangle(unsigned int k, unsigned int offset, unsigned int width);
|
||||
inline double WindowVonHann(unsigned int k, unsigned int offset, unsigned int width);
|
||||
inline double WindowGauss(unsigned int k, unsigned int offset, unsigned int width);
|
||||
inline double WindowTriangle(unsigned int k, unsigned int offset, unsigned int width);
|
||||
inline double WindowBlackman(unsigned int k, unsigned int offset, unsigned int width);
|
||||
|
||||
double FastCos(double x);
|
||||
double FastSin(double x);
|
||||
std::complex<double> ComplexExp(double x);
|
||||
|
||||
std::vector<std::complex<double>>
|
||||
radix2dit(
|
||||
const std::vector<double>& list,
|
||||
size_t offset,
|
||||
size_t N,
|
||||
size_t s,
|
||||
WindowFunction winFunc)
|
||||
size_t s)
|
||||
{
|
||||
std::vector<std::complex<double>> output(N);
|
||||
if (N == 1)
|
||||
{
|
||||
output[0] = winFunc(offset) * (list[offset]);
|
||||
output[0] = window(offset) * (list[offset]);
|
||||
}
|
||||
else
|
||||
{
|
||||
size_t halfN = N >> 1;
|
||||
std::vector<std::complex<double>> first = radix2dit(list, offset, halfN, s << 1, winFunc);
|
||||
std::vector<std::complex<double>> second = radix2dit(list, offset + s, halfN, s << 1, winFunc);
|
||||
std::vector<std::complex<double>> first = radix2dit(list, offset, halfN, s << 1);
|
||||
std::vector<std::complex<double>> second = radix2dit(list, offset + s, halfN, s << 1);
|
||||
|
||||
std::complex<double> coeff = -M_PI * 1.0i / (double)halfN;
|
||||
double coeff = -M_PI / (double)halfN;
|
||||
|
||||
for (int k = 0; k < halfN; k++)
|
||||
{
|
||||
std::complex<double> p = first[k];
|
||||
std::complex<double> q = std::exp(coeff * (double)k) * second[k];
|
||||
std::complex<double> q = ComplexExp(coeff * (double)k) * second[k];
|
||||
|
||||
output[k] = p + q;
|
||||
output[halfN + k] = p - q;
|
||||
|
@ -55,8 +75,7 @@ FFT(const std::vector<double>::const_iterator& begin,
|
|||
const std::vector<double>::const_iterator& end,
|
||||
size_t sampleRate,
|
||||
double minFreq, double maxFreq,
|
||||
unsigned int zeropadding,
|
||||
WindowFunctions func, unsigned int width, unsigned int offset)
|
||||
unsigned int zeropadding)
|
||||
{
|
||||
std::vector<double> signal(begin, end);
|
||||
size_t N = signal.size();
|
||||
|
@ -73,15 +92,10 @@ FFT(const std::vector<double>::const_iterator& begin,
|
|||
}
|
||||
|
||||
WindowFunction f;
|
||||
switch (func)
|
||||
{
|
||||
case WindowFunctions::RECTANGLE: f = std::bind(WindowRectangle, std::placeholders::_1, offset, width); break;
|
||||
case WindowFunctions::VON_HANN: f = std::bind(WindowVonHann, std::placeholders::_1, offset, width); break;
|
||||
case WindowFunctions::GAUSS: f = std::bind(WindowGauss, std::placeholders::_1, offset, width); break;
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
std::vector<std::complex<double>> spectrum = radix2dit(signal, 0, N, 1, f);
|
||||
std::vector<std::complex<double>> spectrum = radix2dit(signal, 0, N, 1);
|
||||
double freqRes = (double)sampleRate / (double)N;
|
||||
double nyquistLimit = (double)sampleRate / 2.0f;
|
||||
|
||||
|
@ -93,12 +107,31 @@ FFT(const std::vector<double>::const_iterator& begin,
|
|||
for (int k = freq / freqRes; freq < nyquistLimit && freq < maxFreq; k++)
|
||||
{
|
||||
output.push_back(std::make_pair(freq, 2.0f * std::abs(spectrum[k]) / (double)N));
|
||||
|
||||
freq += freqRes;
|
||||
}
|
||||
|
||||
return output;
|
||||
}
|
||||
|
||||
void SetWindowFunction(WindowFunctions func, unsigned int width)
|
||||
{
|
||||
switch (func)
|
||||
{
|
||||
case WindowFunctions::RECTANGLE: window = std::bind(WindowRectangle, std::placeholders::_1, 0, width); break;
|
||||
case WindowFunctions::VON_HANN: window = std::bind(WindowVonHann, std::placeholders::_1, 0, width); break;
|
||||
case WindowFunctions::GAUSS: window = std::bind(WindowGauss, std::placeholders::_1, 0, width); break;
|
||||
case WindowFunctions::TRIANGLE: window = std::bind(WindowTriangle, std::placeholders::_1, 0, width); break;
|
||||
case WindowFunctions::BLACKMAN: window = std::bind(WindowBlackman, std::placeholders::_1, 0, width); break;
|
||||
}
|
||||
}
|
||||
|
||||
void UseFastFunctions()
|
||||
{
|
||||
Sin = std::bind(FastSin, std::placeholders::_1);
|
||||
Cos = std::bind(FastCos, std::placeholders::_1);
|
||||
}
|
||||
|
||||
inline double WindowRectangle(unsigned int k, unsigned int offset, unsigned int width)
|
||||
{
|
||||
return ((offset < k) && (k < width));
|
||||
|
@ -106,7 +139,7 @@ inline double WindowRectangle(unsigned int k, unsigned int offset, unsigned int
|
|||
|
||||
inline double WindowVonHann(unsigned int k, unsigned int offset, unsigned int width)
|
||||
{
|
||||
return ((offset < k) && (k < width)) ? (0.5f * (1.0f - cos(2.0f * M_PI * k / (width - 1)))) : 0;
|
||||
return ((offset < k) && (k < width)) ? (0.5f * (1.0f - Cos(2.0f * M_PI * k / (width - 1)))) : 0;
|
||||
}
|
||||
|
||||
inline double WindowGauss(unsigned int k, unsigned int offset, unsigned int width)
|
||||
|
@ -114,3 +147,38 @@ inline double WindowGauss(unsigned int k, unsigned int offset, unsigned int widt
|
|||
double coeff = (k - (width - 1) * 0.5f) / (0.4f * (width - 1) * 0.5f);
|
||||
return ((offset < k) && (k < width)) ? (std::exp(-0.5f * coeff * coeff)) : 0;
|
||||
}
|
||||
|
||||
inline double WindowTriangle(unsigned int k, unsigned int offset, unsigned int width)
|
||||
{
|
||||
return 1.0f - std::abs(((double)k - ((double)width / 2.0f)) / ((double)width / 2.0f));
|
||||
}
|
||||
|
||||
inline double WindowBlackman(unsigned int k, unsigned int offset, unsigned int width)
|
||||
{
|
||||
return (double)0.5f * ((double)1.0f - (double)0.16f) - 0.5f * Cos(2.0f * M_PI * k / (width - 1)) + (double)0.5f * (double)0.16f * Cos(4.0f * M_PI * k / (width - 1));
|
||||
}
|
||||
|
||||
double FastCos(double x)
|
||||
{
|
||||
x -= (x > M_PI) * (double)2.0f * M_PI;
|
||||
x += (x < -M_PI) * (double)2.0f * M_PI;
|
||||
double xpow2 = x * x;
|
||||
double xpow4 = xpow2 * x * x;
|
||||
double xpow6 = xpow4 * x * x;
|
||||
return (double)1.0f - xpow2 * REC_2_FAC + xpow4 * REC_4_FAC - xpow6 * REC_6_FAC + xpow6 * x * x * REC_8_FAC;
|
||||
}
|
||||
|
||||
double FastSin(double x)
|
||||
{
|
||||
x -= (x > M_PI) * (double)2.0f * M_PI;
|
||||
x += (x < -M_PI) * (double)2.0f * M_PI;
|
||||
double xpow3 = x * x * x;
|
||||
double xpow5 = xpow3 * x * x;
|
||||
double xpow7 = xpow5 * x * x;
|
||||
return (double)x - xpow3 * REC_3_FAC + xpow5 * REC_5_FAC - xpow7 * REC_7_FAC + xpow7 * x * x * REC_9_FAC;
|
||||
}
|
||||
|
||||
std::complex<double> ComplexExp(double x)
|
||||
{
|
||||
return std::complex<double>(Cos(x), Sin(x));
|
||||
}
|
||||
|
|
|
@ -5,13 +5,16 @@
|
|||
enum class WindowFunctions {
|
||||
RECTANGLE,
|
||||
GAUSS,
|
||||
VON_HANN
|
||||
VON_HANN,
|
||||
TRIANGLE,
|
||||
BLACKMAN
|
||||
};
|
||||
|
||||
extern std::vector<std::pair<double, double>> FFT(const std::vector<double>::const_iterator& begin,
|
||||
const std::vector<double>::const_iterator& end,
|
||||
size_t sampleRate,
|
||||
double minFreq, double maxFreq,
|
||||
unsigned int zeropadding,
|
||||
WindowFunctions func, unsigned int width, unsigned int offset);
|
||||
unsigned int zeropadding);
|
||||
|
||||
extern void SetWindowFunction(WindowFunctions func, unsigned int width);
|
||||
extern void UseFastFunctions();
|
101
src/main.cpp
101
src/main.cpp
|
@ -14,7 +14,9 @@
|
|||
const std::map<std::string, WindowFunctions> FUNCTIONS {
|
||||
{"rectangle", WindowFunctions::RECTANGLE},
|
||||
{"von-hann", WindowFunctions::VON_HANN},
|
||||
{"gauss", WindowFunctions::GAUSS}
|
||||
{"gauss", WindowFunctions::GAUSS},
|
||||
{"triangle", WindowFunctions::TRIANGLE},
|
||||
{"blackman", WindowFunctions::BLACKMAN}
|
||||
};
|
||||
|
||||
struct Settings {
|
||||
|
@ -24,6 +26,7 @@ struct Settings {
|
|||
double minFreq, maxFreq;
|
||||
unsigned int analyzeChannel;
|
||||
unsigned int zeropadding;
|
||||
bool approx, legacy;
|
||||
WindowFunctions window;
|
||||
};
|
||||
|
||||
|
@ -34,6 +37,33 @@ int main(int argc, char** argv)
|
|||
Settings setts;
|
||||
setts = Parse(argc, argv);
|
||||
|
||||
if (setts.approx)
|
||||
UseFastFunctions();
|
||||
|
||||
std::function<void(nlohmann::json&, const std::vector<std::pair<double, double>>&)> toJson;
|
||||
if (setts.legacy)
|
||||
{
|
||||
toJson = [](nlohmann::json& target, const std::vector<std::pair<double, double>>& spectrum)
|
||||
{
|
||||
target.push_back({ "spectrum", nlohmann::json::array()});
|
||||
|
||||
for (const std::pair<double, double>& pair : spectrum) {
|
||||
target["spectrum"].push_back({{"freq", pair.first}, {"mag", pair.second}});
|
||||
}
|
||||
};
|
||||
}
|
||||
else
|
||||
{
|
||||
toJson = [](nlohmann::json& target, const std::vector<std::pair<double, double>>& spectrum)
|
||||
{
|
||||
target.push_back({ "spectrum", nlohmann::json::array() });
|
||||
|
||||
for (const std::pair<double, double>& pair : spectrum) {
|
||||
target["spectrum"].push_back(pair.second);
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
int numFiles = setts.files.size();
|
||||
for (auto& file : setts.files) {
|
||||
AudioFile<double> audioFile;
|
||||
|
@ -49,6 +79,9 @@ int main(int argc, char** argv)
|
|||
int numChannels = audioFile.getNumChannels();
|
||||
|
||||
nlohmann::json output;
|
||||
if(!setts.legacy)
|
||||
output["freqs"] = nlohmann::json::array();
|
||||
|
||||
int c = setts.analyzeChannel;
|
||||
if (c == 0)
|
||||
c = 1;
|
||||
|
@ -61,59 +94,43 @@ int main(int argc, char** argv)
|
|||
std::string chName = "channel_" + std::to_string(c);
|
||||
output[chName] = nlohmann::json::array();
|
||||
|
||||
if (setts.splitInterval == 0.0f)
|
||||
int sampleInterval = (setts.splitInterval > 0.0f ? sampleRate * setts.splitInterval / 1000 : audioFile.samples[c - 1].size());
|
||||
SetWindowFunction(setts.window, sampleInterval);
|
||||
int currentSample;
|
||||
for (currentSample = 0; currentSample < audioFile.samples[c - 1].size(); currentSample += sampleInterval)
|
||||
{
|
||||
std::vector<std::pair<double, double>> spectrum =
|
||||
FFT(
|
||||
audioFile.samples[c-1].cbegin(),
|
||||
audioFile.samples[c-1].cend(),
|
||||
audioFile.samples[c - 1].cbegin() + currentSample,
|
||||
std::min(
|
||||
audioFile.samples[c - 1].cbegin() + currentSample + sampleInterval,
|
||||
audioFile.samples[c - 1].cend()
|
||||
),
|
||||
sampleRate,
|
||||
setts.minFreq, setts.maxFreq,
|
||||
setts.zeropadding,
|
||||
setts.window, audioFile.samples[c-1].size(), 0
|
||||
setts.zeropadding
|
||||
);
|
||||
|
||||
output[chName] = nlohmann::json::array();
|
||||
for (const std::pair<double, double>& pair : spectrum) {
|
||||
output[chName].push_back({ {"freq", pair.first}, {"mag", pair.second } });
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
int sampleInterval = sampleRate * setts.splitInterval / 1000;
|
||||
int currentSample;
|
||||
for (currentSample = 0; currentSample < audioFile.samples[c - 1].size(); currentSample += sampleInterval)
|
||||
if (!setts.legacy && output["freqs"].empty())
|
||||
{
|
||||
std::vector<std::pair<double, double>> spectrum =
|
||||
FFT(
|
||||
audioFile.samples[c - 1].cbegin() + currentSample,
|
||||
std::min(
|
||||
audioFile.samples[c - 1].cbegin() + currentSample + sampleInterval,
|
||||
audioFile.samples[c - 1].cend()
|
||||
),
|
||||
sampleRate,
|
||||
setts.minFreq, setts.maxFreq,
|
||||
setts.zeropadding,
|
||||
setts.window, sampleInterval, 0
|
||||
);
|
||||
|
||||
output[chName].push_back({
|
||||
{"begin", currentSample},
|
||||
{"end", currentSample + sampleInterval},
|
||||
{"spectrum", nlohmann::json::array()}
|
||||
});
|
||||
|
||||
for (const std::pair<double, double>& pair : spectrum) {
|
||||
output[chName].back()["spectrum"].push_back({ {"freq", pair.first}, {"mag", pair.second } });
|
||||
output["freqs"].push_back(pair.first);
|
||||
}
|
||||
|
||||
PRINTER(setts, "\rAnalyzing " << filename << "... Channel " << c << "/" << numChannels << " " << (int)std::floor((float)currentSample / (float)audioFile.samples[c-1].size() * 100.0f) << "% ");
|
||||
}
|
||||
|
||||
output[chName].push_back({
|
||||
{"begin", currentSample},
|
||||
{"end", currentSample + sampleInterval}
|
||||
});
|
||||
|
||||
toJson(output[chName].back(), spectrum);
|
||||
|
||||
PRINTER(setts, "\rAnalyzing " << filename << "... Channel " << c << "/" << numChannels << " " << (int)std::floor((float)currentSample / (float)audioFile.samples[c-1].size() * 100.0f) << "% ");
|
||||
}
|
||||
}
|
||||
|
||||
std::ofstream ofs(file.replace_extension("json"));
|
||||
ofs << std::setw(4) << output << std::endl;
|
||||
ofs << std::setw(4) << output.dump() << std::endl;
|
||||
ofs.close();
|
||||
|
||||
PRINTER(setts, "\rAnalyzing " << filename << "... 100% " << std::endl);
|
||||
|
@ -137,9 +154,11 @@ Settings Parse(int argc, char** argv)
|
|||
("i,interval", "Splits audio file into intervals of length i milliseconds and transforms them individually (0 to not split file)", cxxopts::value<float>())
|
||||
("f,frequency", "Defines the frequency range of the output spectrum (Default: all the frequencies)", cxxopts::value<std::vector<double>>())
|
||||
("p,pad", "Add extra zero-padding. By default, the program will pad the signals with 0s until the number of samples is a power of 2 (this would be equivalent to -p 1). With this option you can tell the program to instead pad until the power of 2 after the next one (-p 2) etc. This increases frequency resolution", cxxopts::value<unsigned int>())
|
||||
("w,window", "Specify the window function used (rectangle (default), von-hann, gauss)", cxxopts::value<std::string>()->default_value("rectangle"))
|
||||
("w,window", "Specify the window function used (rectangle (default), von-hann, gauss, triangle, blackman (3-term))", cxxopts::value<std::string>()->default_value("rectangle"))
|
||||
("m,mono", "Analyze only the given channel", cxxopts::value<unsigned int>()->default_value("0"))
|
||||
("approx", "Use faster, but more inaccurate trigonometric functions instead of the std-functions (EXPERIMENTAL)")
|
||||
("files", "Files to fourier transform", cxxopts::value<std::vector<std::filesystem::path>>())
|
||||
("legacy", "Uses the legacy data structure (WHICH IS VERY BAD!)", cxxopts::value<bool>()->default_value("false"))
|
||||
("h,help", "Print usage")
|
||||
;
|
||||
|
||||
|
@ -174,6 +193,8 @@ Settings Parse(int argc, char** argv)
|
|||
setts.splitInterval = (result.count("interval") ? result["interval"].as<float>() : 0.0f);
|
||||
setts.analyzeChannel = (result.count("mono") ? result["mono"].as<unsigned int>() : 0);
|
||||
setts.zeropadding = (result.count("pad") ? result["pad"].as<unsigned int>() : 1);
|
||||
setts.approx = (result.count("approx") ? true : false);
|
||||
setts.legacy = (result.count("legacy") ? result["legacy"].as<bool>() : false);
|
||||
|
||||
if (!result.count("window"))
|
||||
{
|
||||
|
|
Loading…
Reference in a new issue