Compare commits
10 commits
Author | SHA1 | Date | |
---|---|---|---|
![]() |
d3f71641f1 | ||
![]() |
9558264b5a | ||
![]() |
ec7f291194 | ||
![]() |
7cd64b6222 | ||
![]() |
56bc0e752d | ||
![]() |
7ba3225f61 | ||
![]() |
bb947b4a53 | ||
![]() |
0609fe6c33 | ||
![]() |
0f22f865e4 | ||
![]() |
f2ad3bd04a |
|
@ -15,7 +15,7 @@ By default, spectralyze will output the entire frequency spectrum, all the way u
|
|||
```
|
||||
spectralyze -f 0,2500 coolSong.wav
|
||||
```
|
||||
This command would only output frequencies ranging from 0kHz-2kHz, greatly decreasing file size.
|
||||
This command would only output frequencies ranging from 0kHz-2.5kHz, greatly decreasing file size.
|
||||
|
||||
## Disabling channels
|
||||
By default this program will analyze all channels in the given audio file, if you are only interested in noe specific channel you can tell the program that via the `-m` flag:
|
||||
|
@ -75,8 +75,13 @@ Every supplied audio file will result in one JSON file. The magnitude is the abs
|
|||
## Example use case
|
||||
This tool can theoretically be used to visualize music. The visualization part has to be written by you, though. For my little experiment I used python with matplotlib to create a line diagram from the spectra:
|
||||
|
||||
https://user-images.githubusercontent.com/24511538/116532180-4218e300-a8e0-11eb-8914-6b3b50228e58.mp4
|
||||
|
||||
https://user-images.githubusercontent.com/24511538/116720172-2d217a00-a9dc-11eb-945f-5db40300da78.mp4
|
||||
|
||||
|
||||
https://user-images.githubusercontent.com/24511538/116688886-a22e8880-a9b7-11eb-9a3d-b9b5069de697.mp4
|
||||
|
||||
Visualization written by [mpsparrow](https://github.com/mpsparrow)
|
||||
|
||||
## Used libraries
|
||||
* [AudioFile](https://github.com/adamstark/AudioFile) for loading audio files
|
||||
|
|
61
src/FFT.cpp
61
src/FFT.cpp
|
@ -8,11 +8,24 @@
|
|||
|
||||
#define POW_OF_TWO(x) (x && !(x & (x - 1)))
|
||||
|
||||
constexpr double REC_2_FAC = (double)1.0f / (double)2.0f;
|
||||
constexpr double REC_3_FAC = (double)1.0f / (double)6.0f;
|
||||
constexpr double REC_4_FAC = (double)1.0f / (double)24.0f;
|
||||
constexpr double REC_5_FAC = (double)1.0f / (double)120.0f;
|
||||
constexpr double REC_6_FAC = (double)1.0f / (double)720.0f;
|
||||
constexpr double REC_7_FAC = (double)1.0f / (double)5040.0f;
|
||||
constexpr double REC_8_FAC = (double)1.0f / (double)40320.0f;
|
||||
constexpr double REC_9_FAC = (double)1.0f / (double)362880.0f;
|
||||
|
||||
using namespace std::complex_literals;
|
||||
|
||||
typedef std::function<double(unsigned int)> WindowFunction;
|
||||
typedef std::function<double(double)> TrigFunction;
|
||||
typedef std::function<std::complex<double>(double)> ExpFunction;
|
||||
|
||||
static WindowFunction window;
|
||||
WindowFunction window;
|
||||
TrigFunction Sin = std::bind((double(*)(double))& std::sin, std::placeholders::_1);
|
||||
TrigFunction Cos = std::bind((double(*)(double))& std::cos, std::placeholders::_1);
|
||||
|
||||
inline double WindowRectangle(unsigned int k, unsigned int offset, unsigned int width);
|
||||
inline double WindowVonHann(unsigned int k, unsigned int offset, unsigned int width);
|
||||
|
@ -20,6 +33,10 @@ inline double WindowGauss(unsigned int k, unsigned int offset, unsigned int widt
|
|||
inline double WindowTriangle(unsigned int k, unsigned int offset, unsigned int width);
|
||||
inline double WindowBlackman(unsigned int k, unsigned int offset, unsigned int width);
|
||||
|
||||
double FastCos(double x);
|
||||
double FastSin(double x);
|
||||
std::complex<double> ComplexExp(double x);
|
||||
|
||||
std::vector<std::complex<double>>
|
||||
radix2dit(
|
||||
const std::vector<double>& list,
|
||||
|
@ -38,12 +55,12 @@ radix2dit(
|
|||
std::vector<std::complex<double>> first = radix2dit(list, offset, halfN, s << 1);
|
||||
std::vector<std::complex<double>> second = radix2dit(list, offset + s, halfN, s << 1);
|
||||
|
||||
std::complex<double> coeff = -M_PI * 1.0i / (double)halfN;
|
||||
double coeff = -M_PI / (double)halfN;
|
||||
|
||||
for (int k = 0; k < halfN; k++)
|
||||
{
|
||||
std::complex<double> p = first[k];
|
||||
std::complex<double> q = std::exp(coeff * (double)k) * second[k];
|
||||
std::complex<double> q = ComplexExp(coeff * (double)k) * second[k];
|
||||
|
||||
output[k] = p + q;
|
||||
output[halfN + k] = p - q;
|
||||
|
@ -76,7 +93,7 @@ FFT(const std::vector<double>::const_iterator& begin,
|
|||
|
||||
WindowFunction f;
|
||||
|
||||
|
||||
|
||||
|
||||
std::vector<std::complex<double>> spectrum = radix2dit(signal, 0, N, 1);
|
||||
double freqRes = (double)sampleRate / (double)N;
|
||||
|
@ -90,6 +107,7 @@ FFT(const std::vector<double>::const_iterator& begin,
|
|||
for (int k = freq / freqRes; freq < nyquistLimit && freq < maxFreq; k++)
|
||||
{
|
||||
output.push_back(std::make_pair(freq, 2.0f * std::abs(spectrum[k]) / (double)N));
|
||||
|
||||
freq += freqRes;
|
||||
}
|
||||
|
||||
|
@ -108,6 +126,12 @@ void SetWindowFunction(WindowFunctions func, unsigned int width)
|
|||
}
|
||||
}
|
||||
|
||||
void UseFastFunctions()
|
||||
{
|
||||
Sin = std::bind(FastSin, std::placeholders::_1);
|
||||
Cos = std::bind(FastCos, std::placeholders::_1);
|
||||
}
|
||||
|
||||
inline double WindowRectangle(unsigned int k, unsigned int offset, unsigned int width)
|
||||
{
|
||||
return ((offset < k) && (k < width));
|
||||
|
@ -115,7 +139,7 @@ inline double WindowRectangle(unsigned int k, unsigned int offset, unsigned int
|
|||
|
||||
inline double WindowVonHann(unsigned int k, unsigned int offset, unsigned int width)
|
||||
{
|
||||
return ((offset < k) && (k < width)) ? (0.5f * (1.0f - cos(2.0f * M_PI * k / (width - 1)))) : 0;
|
||||
return ((offset < k) && (k < width)) ? (0.5f * (1.0f - Cos(2.0f * M_PI * k / (width - 1)))) : 0;
|
||||
}
|
||||
|
||||
inline double WindowGauss(unsigned int k, unsigned int offset, unsigned int width)
|
||||
|
@ -131,5 +155,30 @@ inline double WindowTriangle(unsigned int k, unsigned int offset, unsigned int w
|
|||
|
||||
inline double WindowBlackman(unsigned int k, unsigned int offset, unsigned int width)
|
||||
{
|
||||
return 0.5f * (1.0f - 0.16f) - 0.5f * cos(2.0f * M_PI * k / (width - 1)) + 0.5f * 0.16f * cos(4.0f * M_PI * k / (width - 1));
|
||||
return (double)0.5f * ((double)1.0f - (double)0.16f) - 0.5f * Cos(2.0f * M_PI * k / (width - 1)) + (double)0.5f * (double)0.16f * Cos(4.0f * M_PI * k / (width - 1));
|
||||
}
|
||||
|
||||
double FastCos(double x)
|
||||
{
|
||||
x -= (x > M_PI) * (double)2.0f * M_PI;
|
||||
x += (x < -M_PI) * (double)2.0f * M_PI;
|
||||
double xpow2 = x * x;
|
||||
double xpow4 = xpow2 * x * x;
|
||||
double xpow6 = xpow4 * x * x;
|
||||
return (double)1.0f - xpow2 * REC_2_FAC + xpow4 * REC_4_FAC - xpow6 * REC_6_FAC + xpow6 * x * x * REC_8_FAC;
|
||||
}
|
||||
|
||||
double FastSin(double x)
|
||||
{
|
||||
x -= (x > M_PI) * (double)2.0f * M_PI;
|
||||
x += (x < -M_PI) * (double)2.0f * M_PI;
|
||||
double xpow3 = x * x * x;
|
||||
double xpow5 = xpow3 * x * x;
|
||||
double xpow7 = xpow5 * x * x;
|
||||
return (double)x - xpow3 * REC_3_FAC + xpow5 * REC_5_FAC - xpow7 * REC_7_FAC + xpow7 * x * x * REC_9_FAC;
|
||||
}
|
||||
|
||||
std::complex<double> ComplexExp(double x)
|
||||
{
|
||||
return std::complex<double>(Cos(x), Sin(x));
|
||||
}
|
||||
|
|
|
@ -16,4 +16,5 @@ extern std::vector<std::pair<double, double>> FFT(const std::vector<double>::con
|
|||
double minFreq, double maxFreq,
|
||||
unsigned int zeropadding);
|
||||
|
||||
extern void SetWindowFunction(WindowFunctions func, unsigned int width);
|
||||
extern void SetWindowFunction(WindowFunctions func, unsigned int width);
|
||||
extern void UseFastFunctions();
|
93
src/main.cpp
93
src/main.cpp
|
@ -26,6 +26,7 @@ struct Settings {
|
|||
double minFreq, maxFreq;
|
||||
unsigned int analyzeChannel;
|
||||
unsigned int zeropadding;
|
||||
bool approx, legacy;
|
||||
WindowFunctions window;
|
||||
};
|
||||
|
||||
|
@ -36,6 +37,33 @@ int main(int argc, char** argv)
|
|||
Settings setts;
|
||||
setts = Parse(argc, argv);
|
||||
|
||||
if (setts.approx)
|
||||
UseFastFunctions();
|
||||
|
||||
std::function<void(nlohmann::json&, const std::vector<std::pair<double, double>>&)> toJson;
|
||||
if (setts.legacy)
|
||||
{
|
||||
toJson = [](nlohmann::json& target, const std::vector<std::pair<double, double>>& spectrum)
|
||||
{
|
||||
target.push_back({ "spectrum", nlohmann::json::array()});
|
||||
|
||||
for (const std::pair<double, double>& pair : spectrum) {
|
||||
target["spectrum"].push_back({{"freq", pair.first}, {"mag", pair.second}});
|
||||
}
|
||||
};
|
||||
}
|
||||
else
|
||||
{
|
||||
toJson = [](nlohmann::json& target, const std::vector<std::pair<double, double>>& spectrum)
|
||||
{
|
||||
target.push_back({ "spectrum", nlohmann::json::array() });
|
||||
|
||||
for (const std::pair<double, double>& pair : spectrum) {
|
||||
target["spectrum"].push_back(pair.second);
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
int numFiles = setts.files.size();
|
||||
for (auto& file : setts.files) {
|
||||
AudioFile<double> audioFile;
|
||||
|
@ -51,6 +79,9 @@ int main(int argc, char** argv)
|
|||
int numChannels = audioFile.getNumChannels();
|
||||
|
||||
nlohmann::json output;
|
||||
if(!setts.legacy)
|
||||
output["freqs"] = nlohmann::json::array();
|
||||
|
||||
int c = setts.analyzeChannel;
|
||||
if (c == 0)
|
||||
c = 1;
|
||||
|
@ -63,59 +94,43 @@ int main(int argc, char** argv)
|
|||
std::string chName = "channel_" + std::to_string(c);
|
||||
output[chName] = nlohmann::json::array();
|
||||
|
||||
if (setts.splitInterval == 0.0f)
|
||||
int sampleInterval = (setts.splitInterval > 0.0f ? sampleRate * setts.splitInterval / 1000 : audioFile.samples[c - 1].size());
|
||||
SetWindowFunction(setts.window, sampleInterval);
|
||||
int currentSample;
|
||||
for (currentSample = 0; currentSample < audioFile.samples[c - 1].size(); currentSample += sampleInterval)
|
||||
{
|
||||
SetWindowFunction(setts.window, audioFile.samples[c-1].size());
|
||||
std::vector<std::pair<double, double>> spectrum =
|
||||
FFT(
|
||||
audioFile.samples[c-1].cbegin(),
|
||||
audioFile.samples[c-1].cend(),
|
||||
audioFile.samples[c - 1].cbegin() + currentSample,
|
||||
std::min(
|
||||
audioFile.samples[c - 1].cbegin() + currentSample + sampleInterval,
|
||||
audioFile.samples[c - 1].cend()
|
||||
),
|
||||
sampleRate,
|
||||
setts.minFreq, setts.maxFreq,
|
||||
setts.zeropadding
|
||||
);
|
||||
|
||||
output[chName] = nlohmann::json::array();
|
||||
for (const std::pair<double, double>& pair : spectrum) {
|
||||
output[chName].push_back({ {"freq", pair.first}, {"mag", pair.second } });
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
int sampleInterval = sampleRate * setts.splitInterval / 1000;
|
||||
SetWindowFunction(setts.window, sampleInterval);
|
||||
int currentSample;
|
||||
for (currentSample = 0; currentSample < audioFile.samples[c - 1].size(); currentSample += sampleInterval)
|
||||
if (!setts.legacy && output["freqs"].empty())
|
||||
{
|
||||
std::vector<std::pair<double, double>> spectrum =
|
||||
FFT(
|
||||
audioFile.samples[c - 1].cbegin() + currentSample,
|
||||
std::min(
|
||||
audioFile.samples[c - 1].cbegin() + currentSample + sampleInterval,
|
||||
audioFile.samples[c - 1].cend()
|
||||
),
|
||||
sampleRate,
|
||||
setts.minFreq, setts.maxFreq,
|
||||
setts.zeropadding
|
||||
);
|
||||
|
||||
output[chName].push_back({
|
||||
{"begin", currentSample},
|
||||
{"end", currentSample + sampleInterval},
|
||||
{"spectrum", nlohmann::json::array()}
|
||||
});
|
||||
|
||||
for (const std::pair<double, double>& pair : spectrum) {
|
||||
output[chName].back()["spectrum"].push_back({ {"freq", pair.first}, {"mag", pair.second } });
|
||||
output["freqs"].push_back(pair.first);
|
||||
}
|
||||
|
||||
PRINTER(setts, "\rAnalyzing " << filename << "... Channel " << c << "/" << numChannels << " " << (int)std::floor((float)currentSample / (float)audioFile.samples[c-1].size() * 100.0f) << "% ");
|
||||
}
|
||||
|
||||
output[chName].push_back({
|
||||
{"begin", currentSample},
|
||||
{"end", currentSample + sampleInterval}
|
||||
});
|
||||
|
||||
toJson(output[chName].back(), spectrum);
|
||||
|
||||
PRINTER(setts, "\rAnalyzing " << filename << "... Channel " << c << "/" << numChannels << " " << (int)std::floor((float)currentSample / (float)audioFile.samples[c-1].size() * 100.0f) << "% ");
|
||||
}
|
||||
}
|
||||
|
||||
std::ofstream ofs(file.replace_extension("json"));
|
||||
ofs << std::setw(4) << output << std::endl;
|
||||
ofs << std::setw(4) << output.dump() << std::endl;
|
||||
ofs.close();
|
||||
|
||||
PRINTER(setts, "\rAnalyzing " << filename << "... 100% " << std::endl);
|
||||
|
@ -141,7 +156,9 @@ Settings Parse(int argc, char** argv)
|
|||
("p,pad", "Add extra zero-padding. By default, the program will pad the signals with 0s until the number of samples is a power of 2 (this would be equivalent to -p 1). With this option you can tell the program to instead pad until the power of 2 after the next one (-p 2) etc. This increases frequency resolution", cxxopts::value<unsigned int>())
|
||||
("w,window", "Specify the window function used (rectangle (default), von-hann, gauss, triangle, blackman (3-term))", cxxopts::value<std::string>()->default_value("rectangle"))
|
||||
("m,mono", "Analyze only the given channel", cxxopts::value<unsigned int>()->default_value("0"))
|
||||
("approx", "Use faster, but more inaccurate trigonometric functions instead of the std-functions (EXPERIMENTAL)")
|
||||
("files", "Files to fourier transform", cxxopts::value<std::vector<std::filesystem::path>>())
|
||||
("legacy", "Uses the legacy data structure (WHICH IS VERY BAD!)", cxxopts::value<bool>()->default_value("false"))
|
||||
("h,help", "Print usage")
|
||||
;
|
||||
|
||||
|
@ -176,6 +193,8 @@ Settings Parse(int argc, char** argv)
|
|||
setts.splitInterval = (result.count("interval") ? result["interval"].as<float>() : 0.0f);
|
||||
setts.analyzeChannel = (result.count("mono") ? result["mono"].as<unsigned int>() : 0);
|
||||
setts.zeropadding = (result.count("pad") ? result["pad"].as<unsigned int>() : 1);
|
||||
setts.approx = (result.count("approx") ? true : false);
|
||||
setts.legacy = (result.count("legacy") ? result["legacy"].as<bool>() : false);
|
||||
|
||||
if (!result.count("window"))
|
||||
{
|
||||
|
|
Loading…
Reference in a new issue