added window functions
This commit is contained in:
parent
0dc3cadc52
commit
ef770e3bb4
|
@ -6,7 +6,7 @@ project(spectralyze)
|
|||
|
||||
add_executable(spectralyze
|
||||
"src/main.cpp"
|
||||
"src/FFT.hpp"
|
||||
"src/FFT.hpp" "src/FFT.cpp"
|
||||
)
|
||||
|
||||
target_include_directories(spectralyze PRIVATE
|
||||
|
|
|
@ -33,6 +33,9 @@ This will tell the program to pad to the 3rd-next power of 2! This means, if the
|
|||
|
||||
**RESOLUTION (and thus file size) SCALES WITH 2^p**
|
||||
|
||||
## Window functions
|
||||
Window functions are used to "cut out" parts of the signal. When you use the `-i` flag, you are only looking at a certain interval in the audio file. This is equivalent to multiplying the whole audio file with a rectangular window function (it is 0 everywhere except in the interval, where it is 1). With the `-w` flag you can choose between different window functions. Currently supported are the Von-Hann function, and the Gauss function. Both of these yield "smoother" spectra and get rid of a lot of noise.
|
||||
|
||||
## Example command
|
||||
```
|
||||
spectralyze -i 20 -f 0,1000 -p 3 coolSong.wav
|
||||
|
|
116
src/FFT.cpp
Normal file
116
src/FFT.cpp
Normal file
|
@ -0,0 +1,116 @@
|
|||
#include "FFT.hpp"
|
||||
|
||||
#define _USE_MATH_DEFINES
|
||||
#include <math.h>
|
||||
#include <functional>
|
||||
#include <iostream>
|
||||
#include <map>
|
||||
|
||||
#define POW_OF_TWO(x) (x && !(x & (x - 1)))
|
||||
|
||||
using namespace std::complex_literals;
|
||||
|
||||
typedef std::function<double(unsigned int)> WindowFunction;
|
||||
|
||||
inline double WindowRectangle(unsigned int k, unsigned int offset, unsigned int width);
|
||||
inline double WindowVonHann(unsigned int k, unsigned int offset, unsigned int width);
|
||||
inline double WindowGauss(unsigned int k, unsigned int offset, unsigned int width);
|
||||
|
||||
std::vector<std::complex<double>>
|
||||
radix2dit(
|
||||
const std::vector<double>& list,
|
||||
size_t offset,
|
||||
size_t N,
|
||||
size_t s,
|
||||
WindowFunction winFunc)
|
||||
{
|
||||
std::vector<std::complex<double>> output(N);
|
||||
if (N == 1)
|
||||
{
|
||||
output[0] = winFunc(offset) * (list[offset]);
|
||||
}
|
||||
else
|
||||
{
|
||||
size_t halfN = N >> 1;
|
||||
std::vector<std::complex<double>> first = radix2dit(list, offset, halfN, s << 1, winFunc);
|
||||
std::vector<std::complex<double>> second = radix2dit(list, offset + s, halfN, s << 1, winFunc);
|
||||
|
||||
std::complex<double> coeff = -M_PI * 1.0i / (double)halfN;
|
||||
|
||||
for (int k = 0; k < halfN; k++)
|
||||
{
|
||||
std::complex<double> p = first[k];
|
||||
std::complex<double> q = std::exp(coeff * (double)k) * second[k];
|
||||
|
||||
output[k] = p + q;
|
||||
output[halfN + k] = p - q;
|
||||
}
|
||||
}
|
||||
|
||||
return output;
|
||||
}
|
||||
|
||||
std::vector<std::pair<double, double>>
|
||||
FFT(const std::vector<double>::const_iterator& begin,
|
||||
const std::vector<double>::const_iterator& end,
|
||||
size_t sampleRate,
|
||||
double minFreq, double maxFreq,
|
||||
unsigned int zeropadding,
|
||||
WindowFunctions func, unsigned int width, unsigned int offset)
|
||||
{
|
||||
std::vector<double> signal(begin, end);
|
||||
size_t N = signal.size();
|
||||
while (!POW_OF_TWO(N))
|
||||
{
|
||||
// Pad with zeros
|
||||
signal.push_back(0.0f);
|
||||
N++;
|
||||
}
|
||||
|
||||
if (zeropadding > 1) {
|
||||
N = (signal.size() << (zeropadding - 1));
|
||||
signal.insert(signal.end(), N - signal.size(), 0);
|
||||
}
|
||||
|
||||
WindowFunction f;
|
||||
switch (func)
|
||||
{
|
||||
case WindowFunctions::RECTANGLE: f = std::bind(WindowRectangle, std::placeholders::_1, offset, width); break;
|
||||
case WindowFunctions::VON_HANN: f = std::bind(WindowVonHann, std::placeholders::_1, offset, width); break;
|
||||
case WindowFunctions::GAUSS: f = std::bind(WindowGauss, std::placeholders::_1, offset, width); break;
|
||||
}
|
||||
|
||||
|
||||
std::vector<std::complex<double>> spectrum = radix2dit(signal, 0, N, 1, f);
|
||||
double freqRes = (double)sampleRate / (double)N;
|
||||
double nyquistLimit = (double)sampleRate / 2.0f;
|
||||
|
||||
std::vector<std::pair<double, double>> output;
|
||||
double freq = minFreq;
|
||||
if (maxFreq == 0)
|
||||
maxFreq = nyquistLimit;
|
||||
|
||||
for (int k = freq / freqRes; freq < nyquistLimit && freq < maxFreq; k++)
|
||||
{
|
||||
output.push_back(std::make_pair(freq, 2.0f * std::abs(spectrum[k]) / (double)N));
|
||||
freq += freqRes;
|
||||
}
|
||||
|
||||
return output;
|
||||
}
|
||||
|
||||
inline double WindowRectangle(unsigned int k, unsigned int offset, unsigned int width)
|
||||
{
|
||||
return ((offset < k) && (k < width));
|
||||
}
|
||||
|
||||
inline double WindowVonHann(unsigned int k, unsigned int offset, unsigned int width)
|
||||
{
|
||||
return ((offset < k) && (k < width)) ? (0.5f * (1.0f - cos(2.0f * M_PI * k / (width - 1)))) : 0;
|
||||
}
|
||||
|
||||
inline double WindowGauss(unsigned int k, unsigned int offset, unsigned int width)
|
||||
{
|
||||
double coeff = (k - (width - 1) * 0.5f) / (0.4f * (width - 1) * 0.5f);
|
||||
return ((offset < k) && (k < width)) ? (std::exp(-0.5f * coeff * coeff)) : 0;
|
||||
}
|
90
src/FFT.hpp
90
src/FFT.hpp
|
@ -1,89 +1,17 @@
|
|||
#pragma once
|
||||
#define _USE_MATH_DEFINES
|
||||
#include <math.h>
|
||||
#include <vector>
|
||||
#include <complex>
|
||||
|
||||
#define TWO_PI (double)6.28318530718f
|
||||
#define POW_OF_TWO(x) (x && !(x & (x - 1)))
|
||||
enum class WindowFunctions {
|
||||
RECTANGLE,
|
||||
GAUSS,
|
||||
VON_HANN
|
||||
};
|
||||
|
||||
using namespace std::complex_literals;
|
||||
|
||||
std::vector<std::complex<double>> radix2dit(const std::vector<double>::const_iterator& begin, size_t N, size_t s)
|
||||
{
|
||||
std::vector<std::complex<double>> output(N);
|
||||
if (N == 1)
|
||||
{
|
||||
output[0] = *begin;
|
||||
}
|
||||
else
|
||||
{
|
||||
size_t halfN = N >> 1;
|
||||
std::vector<std::complex<double>> first = radix2dit(begin, halfN, s << 1);
|
||||
std::vector<std::complex<double>> second = radix2dit(begin + s, halfN, s << 1);
|
||||
|
||||
/*if (s == 1) {
|
||||
std::future<std::vector<std::complex<double>>> firstFuture = std::async(&radix2dit, begin, halfN, s << 1);
|
||||
std::future<std::vector<std::complex<double>>> secondFuture = std::async(&radix2dit, begin + s, halfN, s << 1);
|
||||
|
||||
first = firstFuture.get();
|
||||
second = secondFuture.get();
|
||||
}
|
||||
else {
|
||||
first = radix2dit(begin, halfN, s << 1);
|
||||
second = radix2dit(begin + 1, halfN, s << 1);
|
||||
}*/
|
||||
|
||||
std::complex<double> coeff = -M_PI * 1.0i / (double)halfN;
|
||||
|
||||
for (int k = 0; k < N >> 1; k++)
|
||||
{
|
||||
std::complex<double> p = first[k];
|
||||
std::complex<double> q = std::exp(coeff * (double)k) * second[k];
|
||||
|
||||
output[k] = p + q;
|
||||
output[halfN + k] = p - q;
|
||||
}
|
||||
}
|
||||
|
||||
return output;
|
||||
}
|
||||
|
||||
std::vector<std::pair<double, double>>
|
||||
FFT(const std::vector<double>::const_iterator& begin,
|
||||
const std::vector<double>::const_iterator& end,
|
||||
extern std::vector<std::pair<double, double>> FFT(const std::vector<double>::const_iterator& begin,
|
||||
const std::vector<double>::const_iterator& end,
|
||||
size_t sampleRate,
|
||||
double minFreq, double maxFreq,
|
||||
unsigned int zeropadding)
|
||||
{
|
||||
std::vector<double> signal(begin, end);
|
||||
size_t N = signal.size();
|
||||
while (!POW_OF_TWO(N))
|
||||
{
|
||||
// Pad with zeros
|
||||
signal.push_back(0.0f);
|
||||
N++;
|
||||
}
|
||||
unsigned int zeropadding,
|
||||
WindowFunctions func, unsigned int width, unsigned int offset);
|
||||
|
||||
if (zeropadding > 1) {
|
||||
N = (signal.size() << (zeropadding - 1));
|
||||
signal.insert(signal.end(), N - signal.size(), 0);
|
||||
}
|
||||
|
||||
std::vector<std::complex<double>> spectrum = radix2dit(signal.cbegin(), N, 1);
|
||||
double freqRes = (double)sampleRate / (double)N;
|
||||
double nyquistLimit = (double)sampleRate / 2.0f;
|
||||
|
||||
std::vector<std::pair<double, double>> output;
|
||||
double freq = minFreq;
|
||||
if (maxFreq == 0)
|
||||
maxFreq = nyquistLimit;
|
||||
|
||||
for (int k = freq / freqRes; freq < nyquistLimit && freq < maxFreq; k++)
|
||||
{
|
||||
output.push_back(std::make_pair(freq, 2.0f * std::abs(spectrum[k]) / (double)N));
|
||||
freq += freqRes;
|
||||
}
|
||||
|
||||
return output;
|
||||
}
|
||||
|
|
43
src/main.cpp
43
src/main.cpp
|
@ -1,6 +1,7 @@
|
|||
#include <iostream>
|
||||
#include <fstream>
|
||||
#include <iomanip>
|
||||
#include <map>
|
||||
#include <filesystem>
|
||||
|
||||
#include "AudioFile.h"
|
||||
|
@ -10,6 +11,12 @@
|
|||
|
||||
#define PRINTER(s, x) if(!s.quiet) { std::cout << x; }
|
||||
|
||||
const std::map<std::string, WindowFunctions> FUNCTIONS {
|
||||
{"rectangle", WindowFunctions::RECTANGLE},
|
||||
{"von-hann", WindowFunctions::VON_HANN},
|
||||
{"gauss", WindowFunctions::GAUSS}
|
||||
};
|
||||
|
||||
struct Settings {
|
||||
std::vector<std::filesystem::path> files;
|
||||
bool quiet;
|
||||
|
@ -17,6 +24,7 @@ struct Settings {
|
|||
double minFreq, maxFreq;
|
||||
unsigned int analyzeChannel;
|
||||
unsigned int zeropadding;
|
||||
WindowFunctions window;
|
||||
};
|
||||
|
||||
Settings Parse(int argc, char** argv);
|
||||
|
@ -61,7 +69,8 @@ int main(int argc, char** argv)
|
|||
audioFile.samples[c-1].cend(),
|
||||
sampleRate,
|
||||
setts.minFreq, setts.maxFreq,
|
||||
setts.zeropadding
|
||||
setts.zeropadding,
|
||||
setts.window, audioFile.samples[c-1].size(), 0
|
||||
);
|
||||
|
||||
output[chName] = nlohmann::json::array();
|
||||
|
@ -77,14 +86,15 @@ int main(int argc, char** argv)
|
|||
{
|
||||
std::vector<std::pair<double, double>> spectrum =
|
||||
FFT(
|
||||
audioFile.samples[c-1].cbegin() + currentSample,
|
||||
audioFile.samples[c - 1].cbegin() + currentSample,
|
||||
std::min(
|
||||
audioFile.samples[c-1].cbegin() + currentSample + sampleInterval,
|
||||
audioFile.samples[c-1].cend()
|
||||
),
|
||||
audioFile.samples[c - 1].cbegin() + currentSample + sampleInterval,
|
||||
audioFile.samples[c - 1].cend()
|
||||
),
|
||||
sampleRate,
|
||||
setts.minFreq, setts.maxFreq,
|
||||
setts.zeropadding
|
||||
setts.zeropadding,
|
||||
setts.window, sampleInterval, 0
|
||||
);
|
||||
|
||||
output[chName].push_back({
|
||||
|
@ -127,6 +137,7 @@ Settings Parse(int argc, char** argv)
|
|||
("i,interval", "Splits audio file into intervals of length i milliseconds and transforms them individually (0 to not split file)", cxxopts::value<float>())
|
||||
("f,frequency", "Defines the frequency range of the output spectrum (Default: all the frequencies)", cxxopts::value<std::vector<double>>())
|
||||
("p,pad", "Add extra zero-padding. By default, the program will pad the signals with 0s until the number of samples is a power of 2 (this would be equivalent to -p 1). With this option you can tell the program to instead pad until the power of 2 after the next one (-p 2) etc. This increases frequency resolution", cxxopts::value<unsigned int>())
|
||||
("w,window", "Specify the window function used (rectangle (default), von-hann, gauss)", cxxopts::value<std::string>()->default_value("rectangle"))
|
||||
("m,mono", "Analyze only the given channel", cxxopts::value<unsigned int>()->default_value("0"))
|
||||
("files", "Files to fourier transform", cxxopts::value<std::vector<std::filesystem::path>>())
|
||||
("h,help", "Print usage")
|
||||
|
@ -163,6 +174,26 @@ Settings Parse(int argc, char** argv)
|
|||
setts.splitInterval = (result.count("interval") ? result["interval"].as<float>() : 0.0f);
|
||||
setts.analyzeChannel = (result.count("mono") ? result["mono"].as<unsigned int>() : 0);
|
||||
setts.zeropadding = (result.count("pad") ? result["pad"].as<unsigned int>() : 1);
|
||||
|
||||
if (!result.count("window"))
|
||||
{
|
||||
setts.window = WindowFunctions::RECTANGLE;
|
||||
}
|
||||
else
|
||||
{
|
||||
std::string data = result["window"].as<std::string>();
|
||||
std::transform(data.begin(), data.end(), data.begin(), [](unsigned char c) { return std::tolower(c); });
|
||||
auto it = FUNCTIONS.find(data);
|
||||
if (it == FUNCTIONS.end())
|
||||
{
|
||||
setts.window = WindowFunctions::RECTANGLE;
|
||||
}
|
||||
else
|
||||
{
|
||||
setts.window = it->second;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
||||
if (setts.maxFreq <= setts.minFreq && (setts.maxFreq != 0))
|
||||
|
|
Loading…
Reference in a new issue