added window functions

This commit is contained in:
Robert 2021-04-29 14:00:53 +02:00
parent 0dc3cadc52
commit ef770e3bb4
5 changed files with 166 additions and 88 deletions

View file

@ -6,7 +6,7 @@ project(spectralyze)
add_executable(spectralyze
"src/main.cpp"
"src/FFT.hpp"
"src/FFT.hpp" "src/FFT.cpp"
)
target_include_directories(spectralyze PRIVATE

View file

@ -33,6 +33,9 @@ This will tell the program to pad to the 3rd-next power of 2! This means, if the
**RESOLUTION (and thus file size) SCALES WITH 2^p**
## Window functions
Window functions are used to "cut out" parts of the signal. When you use the `-i` flag, you are only looking at a certain interval in the audio file. This is equivalent to multiplying the whole audio file with a rectangular window function (it is 0 everywhere except in the interval, where it is 1). With the `-w` flag you can choose between different window functions. Currently supported are the Von-Hann function, and the Gauss function. Both of these yield "smoother" spectra and get rid of a lot of noise.
## Example command
```
spectralyze -i 20 -f 0,1000 -p 3 coolSong.wav

116
src/FFT.cpp Normal file
View file

@ -0,0 +1,116 @@
#include "FFT.hpp"
#define _USE_MATH_DEFINES
#include <math.h>
#include <functional>
#include <iostream>
#include <map>
#define POW_OF_TWO(x) (x && !(x & (x - 1)))
using namespace std::complex_literals;
typedef std::function<double(unsigned int)> WindowFunction;
inline double WindowRectangle(unsigned int k, unsigned int offset, unsigned int width);
inline double WindowVonHann(unsigned int k, unsigned int offset, unsigned int width);
inline double WindowGauss(unsigned int k, unsigned int offset, unsigned int width);
std::vector<std::complex<double>>
radix2dit(
const std::vector<double>& list,
size_t offset,
size_t N,
size_t s,
WindowFunction winFunc)
{
std::vector<std::complex<double>> output(N);
if (N == 1)
{
output[0] = winFunc(offset) * (list[offset]);
}
else
{
size_t halfN = N >> 1;
std::vector<std::complex<double>> first = radix2dit(list, offset, halfN, s << 1, winFunc);
std::vector<std::complex<double>> second = radix2dit(list, offset + s, halfN, s << 1, winFunc);
std::complex<double> coeff = -M_PI * 1.0i / (double)halfN;
for (int k = 0; k < halfN; k++)
{
std::complex<double> p = first[k];
std::complex<double> q = std::exp(coeff * (double)k) * second[k];
output[k] = p + q;
output[halfN + k] = p - q;
}
}
return output;
}
std::vector<std::pair<double, double>>
FFT(const std::vector<double>::const_iterator& begin,
const std::vector<double>::const_iterator& end,
size_t sampleRate,
double minFreq, double maxFreq,
unsigned int zeropadding,
WindowFunctions func, unsigned int width, unsigned int offset)
{
std::vector<double> signal(begin, end);
size_t N = signal.size();
while (!POW_OF_TWO(N))
{
// Pad with zeros
signal.push_back(0.0f);
N++;
}
if (zeropadding > 1) {
N = (signal.size() << (zeropadding - 1));
signal.insert(signal.end(), N - signal.size(), 0);
}
WindowFunction f;
switch (func)
{
case WindowFunctions::RECTANGLE: f = std::bind(WindowRectangle, std::placeholders::_1, offset, width); break;
case WindowFunctions::VON_HANN: f = std::bind(WindowVonHann, std::placeholders::_1, offset, width); break;
case WindowFunctions::GAUSS: f = std::bind(WindowGauss, std::placeholders::_1, offset, width); break;
}
std::vector<std::complex<double>> spectrum = radix2dit(signal, 0, N, 1, f);
double freqRes = (double)sampleRate / (double)N;
double nyquistLimit = (double)sampleRate / 2.0f;
std::vector<std::pair<double, double>> output;
double freq = minFreq;
if (maxFreq == 0)
maxFreq = nyquistLimit;
for (int k = freq / freqRes; freq < nyquistLimit && freq < maxFreq; k++)
{
output.push_back(std::make_pair(freq, 2.0f * std::abs(spectrum[k]) / (double)N));
freq += freqRes;
}
return output;
}
inline double WindowRectangle(unsigned int k, unsigned int offset, unsigned int width)
{
return ((offset < k) && (k < width));
}
inline double WindowVonHann(unsigned int k, unsigned int offset, unsigned int width)
{
return ((offset < k) && (k < width)) ? (0.5f * (1.0f - cos(2.0f * M_PI * k / (width - 1)))) : 0;
}
inline double WindowGauss(unsigned int k, unsigned int offset, unsigned int width)
{
double coeff = (k - (width - 1) * 0.5f) / (0.4f * (width - 1) * 0.5f);
return ((offset < k) && (k < width)) ? (std::exp(-0.5f * coeff * coeff)) : 0;
}

View file

@ -1,89 +1,17 @@
#pragma once
#define _USE_MATH_DEFINES
#include <math.h>
#include <vector>
#include <complex>
#define TWO_PI (double)6.28318530718f
#define POW_OF_TWO(x) (x && !(x & (x - 1)))
enum class WindowFunctions {
RECTANGLE,
GAUSS,
VON_HANN
};
using namespace std::complex_literals;
std::vector<std::complex<double>> radix2dit(const std::vector<double>::const_iterator& begin, size_t N, size_t s)
{
std::vector<std::complex<double>> output(N);
if (N == 1)
{
output[0] = *begin;
}
else
{
size_t halfN = N >> 1;
std::vector<std::complex<double>> first = radix2dit(begin, halfN, s << 1);
std::vector<std::complex<double>> second = radix2dit(begin + s, halfN, s << 1);
/*if (s == 1) {
std::future<std::vector<std::complex<double>>> firstFuture = std::async(&radix2dit, begin, halfN, s << 1);
std::future<std::vector<std::complex<double>>> secondFuture = std::async(&radix2dit, begin + s, halfN, s << 1);
first = firstFuture.get();
second = secondFuture.get();
}
else {
first = radix2dit(begin, halfN, s << 1);
second = radix2dit(begin + 1, halfN, s << 1);
}*/
std::complex<double> coeff = -M_PI * 1.0i / (double)halfN;
for (int k = 0; k < N >> 1; k++)
{
std::complex<double> p = first[k];
std::complex<double> q = std::exp(coeff * (double)k) * second[k];
output[k] = p + q;
output[halfN + k] = p - q;
}
}
return output;
}
std::vector<std::pair<double, double>>
FFT(const std::vector<double>::const_iterator& begin,
const std::vector<double>::const_iterator& end,
extern std::vector<std::pair<double, double>> FFT(const std::vector<double>::const_iterator& begin,
const std::vector<double>::const_iterator& end,
size_t sampleRate,
double minFreq, double maxFreq,
unsigned int zeropadding)
{
std::vector<double> signal(begin, end);
size_t N = signal.size();
while (!POW_OF_TWO(N))
{
// Pad with zeros
signal.push_back(0.0f);
N++;
}
unsigned int zeropadding,
WindowFunctions func, unsigned int width, unsigned int offset);
if (zeropadding > 1) {
N = (signal.size() << (zeropadding - 1));
signal.insert(signal.end(), N - signal.size(), 0);
}
std::vector<std::complex<double>> spectrum = radix2dit(signal.cbegin(), N, 1);
double freqRes = (double)sampleRate / (double)N;
double nyquistLimit = (double)sampleRate / 2.0f;
std::vector<std::pair<double, double>> output;
double freq = minFreq;
if (maxFreq == 0)
maxFreq = nyquistLimit;
for (int k = freq / freqRes; freq < nyquistLimit && freq < maxFreq; k++)
{
output.push_back(std::make_pair(freq, 2.0f * std::abs(spectrum[k]) / (double)N));
freq += freqRes;
}
return output;
}

View file

@ -1,6 +1,7 @@
#include <iostream>
#include <fstream>
#include <iomanip>
#include <map>
#include <filesystem>
#include "AudioFile.h"
@ -10,6 +11,12 @@
#define PRINTER(s, x) if(!s.quiet) { std::cout << x; }
const std::map<std::string, WindowFunctions> FUNCTIONS {
{"rectangle", WindowFunctions::RECTANGLE},
{"von-hann", WindowFunctions::VON_HANN},
{"gauss", WindowFunctions::GAUSS}
};
struct Settings {
std::vector<std::filesystem::path> files;
bool quiet;
@ -17,6 +24,7 @@ struct Settings {
double minFreq, maxFreq;
unsigned int analyzeChannel;
unsigned int zeropadding;
WindowFunctions window;
};
Settings Parse(int argc, char** argv);
@ -61,7 +69,8 @@ int main(int argc, char** argv)
audioFile.samples[c-1].cend(),
sampleRate,
setts.minFreq, setts.maxFreq,
setts.zeropadding
setts.zeropadding,
setts.window, audioFile.samples[c-1].size(), 0
);
output[chName] = nlohmann::json::array();
@ -77,14 +86,15 @@ int main(int argc, char** argv)
{
std::vector<std::pair<double, double>> spectrum =
FFT(
audioFile.samples[c-1].cbegin() + currentSample,
audioFile.samples[c - 1].cbegin() + currentSample,
std::min(
audioFile.samples[c-1].cbegin() + currentSample + sampleInterval,
audioFile.samples[c-1].cend()
),
audioFile.samples[c - 1].cbegin() + currentSample + sampleInterval,
audioFile.samples[c - 1].cend()
),
sampleRate,
setts.minFreq, setts.maxFreq,
setts.zeropadding
setts.zeropadding,
setts.window, sampleInterval, 0
);
output[chName].push_back({
@ -127,6 +137,7 @@ Settings Parse(int argc, char** argv)
("i,interval", "Splits audio file into intervals of length i milliseconds and transforms them individually (0 to not split file)", cxxopts::value<float>())
("f,frequency", "Defines the frequency range of the output spectrum (Default: all the frequencies)", cxxopts::value<std::vector<double>>())
("p,pad", "Add extra zero-padding. By default, the program will pad the signals with 0s until the number of samples is a power of 2 (this would be equivalent to -p 1). With this option you can tell the program to instead pad until the power of 2 after the next one (-p 2) etc. This increases frequency resolution", cxxopts::value<unsigned int>())
("w,window", "Specify the window function used (rectangle (default), von-hann, gauss)", cxxopts::value<std::string>()->default_value("rectangle"))
("m,mono", "Analyze only the given channel", cxxopts::value<unsigned int>()->default_value("0"))
("files", "Files to fourier transform", cxxopts::value<std::vector<std::filesystem::path>>())
("h,help", "Print usage")
@ -163,6 +174,26 @@ Settings Parse(int argc, char** argv)
setts.splitInterval = (result.count("interval") ? result["interval"].as<float>() : 0.0f);
setts.analyzeChannel = (result.count("mono") ? result["mono"].as<unsigned int>() : 0);
setts.zeropadding = (result.count("pad") ? result["pad"].as<unsigned int>() : 1);
if (!result.count("window"))
{
setts.window = WindowFunctions::RECTANGLE;
}
else
{
std::string data = result["window"].as<std::string>();
std::transform(data.begin(), data.end(), data.begin(), [](unsigned char c) { return std::tolower(c); });
auto it = FUNCTIONS.find(data);
if (it == FUNCTIONS.end())
{
setts.window = WindowFunctions::RECTANGLE;
}
else
{
setts.window = it->second;
}
}
if (setts.maxFreq <= setts.minFreq && (setts.maxFreq != 0))