Finished code, begining example generation

This commit is contained in:
Sam Perry
2017-02-20 13:58:45 +00:00
parent b5bf4abaea
commit dd5c8b2cd8
3 changed files with 137 additions and 61 deletions
+3 -3
View File
@@ -12,12 +12,12 @@ def main():
transience_s = s1['transience_s'][0]
transience_e = s1['transience_e'][0]
WLen = s1['WLen'][0]
win_count = s1['win_count'][0]
win_count = int(s1['win_count'][0])
n1 = s1['n1'][0]
filterN1 = s1['filterN1'][0][0]
pdb.set_trace()
plt.plot(((np.arange(win_count)*n1)[:-84])+WLen/2,analysis)
plt.plot(((np.arange(win_count)*n1))+WLen/2,analysis)
for i in transience_s:
plt.axvline(i, color='r', linestyle='--')
for i in transience_e:
+2 -2
View File
@@ -1,5 +1,5 @@
function main()
fileName = './media/Limbo1.wav';
ratio = 15;
fileName = './media/PianoDebussy.wav';
ratio = 3;
timeStretch(fileName, ratio);
+132 -56
View File
@@ -1,22 +1,21 @@
function timeStretch(fileName, ratio)
% function timeStretch(fileName, ratio)
% (based on DAFx Book, ch08/VX_tstretch_real_pv.m)
%===== this program performs time stretching
%===== using the FFT-IFFT approach,
%===== for real ratio, and using
%===== w1 and w2 windows (analysis and synthesis)
%===== WLen is the length of the windows
%===== hopSize and n2: steps (in samples) for the analysis and synthesis
function timeStretch(fileName, stretchRatio)
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% This program performs time stretching using the FFT-IFFT approach.
% (based on DAFx Book, ch08/VX_tstretch_real_pv.m)
% Inputs:
% fileName: Input audio vector
% stretchRatio: Audio samplerate
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
if (nargin < 2) || (ratio <= 0)
error('usage: timeStretch(fileName, ratio)');
if (nargin < 2) || (stretchRatio <= 0)
error('usage: timeStretch(fileName, stretchRatio)');
end
% Analysis step [samples]
n2 = 512;
n2 = 256;
% Synthesis step [samples]
hopSize = round(n2 / ratio);
hopSize = round(n2 / stretchRatio);
% Window length
WLen = 2048;
@@ -37,53 +36,95 @@ function timeStretch(fileName, ratio)
in = [zeros(WLen, 1); in; ...
zeros(WLen-mod(L,hopSize),1)] / max(abs(in));
delta = 0.2;
delta = 0.1;
% Segment audio based on it's trasient and stables components, returning
% markers for stable sections and a ratio for their proportion of all the
% audio
[stable, stable_ratio] = segmentTransience(in, FS, WLen, hopSize, delta);
[stable, stableRatio] = segmentTransience(in, FS, WLen, hopSize, delta);
% Use stable transient/stable segmentation to stretch stable section of
% audio by a given ratio.
timeStretchStable(in, FS, stable, ratio / stable_ratio);
timeStretchStable(in, FS, stable, stableRatio, stretchRatio);
function [stable, stableRatio] = segmentTransience(in, FS, WLen, hopSize, delta)
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% Function to extract stable/transient segment information from input
% audio.
% Inputs:
% in: Input audio vector
% FS: Audio samplerate
% WLen: Analysis window length
% hopeSize: Analysis window hop size
% delta: Selection threshold used for stable/transient segment
% seperation. Values between 0.0 and 1.0 are recommended.
% Returns:
% stable: a 2XN vector of stable part start+end markers
% stableRatio: The ratio of stable/transient content in input audio
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
function [stable, stable_ratio] = segmentTransience(in, FS, WLen, hopSize, delta)
% Calculate the spectral flux of the audio. This provides a measurements
% for transience accross the audio
[analysis, winCount] = calculateSpectralFlux(in, WLen, hopSize);
filterN1 = 30;
% Normalise and filter the analysis to provide data that can be used for
% effective transience segmentation.
analysis = normaliseAnalysis(analysis, delta);
analysis = normaliseAnalysis(analysis, delta, filterN1, 1000);
% Generate segmentation markers from analysis to be used in the time
% stretching algorithm
[stable, stable_ratio] = getStable(in, analysis, WLen, delta, hopSize);
[stable, stableRatio] = getStable(in, analysis, WLen, delta, hopSize, ...
filterN1, winCount);
function timeStretchStable(in, FS, stable, ratio)
%----- time stretching initializations -----
n2 = 256; % analysis step [samples]
n1 = round(n2 / ratio); % synthesis step [samples]
WLen = 2048; % Window length
w1 = hanning(WLen); % Hanning window of length WLen
w2 = w1;
% TODO; add semi-colon
tstretch_ratio = n2/n1
out = zeros(WLen+ceil(length(in)*tstretch_ratio),1);
function timeStretchStable(in, FS, stable, stableRatio, stretchRatio)
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% Function to apply phase vocoder based time stretching to stable section
% of input audio
% Inputs:
% in: Input audio vector
% stable: 2XN vector of stable part start+end markers
% stableRatio: The ratio of stable/transient content in input audio
% stretchRatio: The ratio to stretch stable audio by. Value > 1 will
% result in a stretching of output. Values < 1 will result in a
% compression.
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
stretchRatio = stretchRatio / stableRatio;
% analysis step [samples]
n2 = 256;
% synthesis step [samples]
n1 = round(n2 / stretchRatio);
% Window length
WLen = 2048;
% Hanning window of length WLen
w1 = hanning(WLen);
% Calculate ratio between analysis and synthesis hop size as the stretch
% ratio
tstretch_ratio = n2/n1;
% Allocate memory for output samples
out = zeros(WLen+ceil(length(in)*(1-stableRatio)) + WLen*2+ceil(length(in)*stableRatio*stretchRatio),1);
length(out)
% Initialize memory for phase vocoder variables
omega = 2*pi*n1*[0:WLen-1]'/WLen;
phi0 = zeros(WLen,1);
psi = zeros(WLen,1);
devcent = 2*pi*n1/WLen;
% Initialize read and write pointers for audio input and output
pin = 0;
pout = 0;
% Calculate the length of input audio
pend = length(in)-WLen;
while pin<pend
% Read grain from input and apply hanning window
grain = in(pin+1:pin+WLen).* w1;
% If the center of the grain is within any stable boundaries, apply
% time stretching
if(any(pin+WLen/2 > stable(:, 1) & pin+WLen/2 < stable(:, 2)))
% Time stretch using the phase vocoder implementation from DAFX by
% U. Zolzer
%===========================================
f = fft(fftshift(grain));
r = abs(f);
@@ -92,14 +133,19 @@ function timeStretchStable(in, FS, stable, ratio)
phi0 = phi;
psi = princarg(psi+delta_phi*tstretch_ratio);
ft = (r.* exp(i*psi));
grain = fftshift(real(ifft(ft))).*w2;
% plot(grain);drawnow;
grain = fftshift(real(ifft(ft))).*w1;
% ===========================================
% Overlap grain with previous outputs
out(pout+1:pout+WLen) = ...
out(pout+1:pout+WLen) + grain;
out(pout+1:pout+WLen) + grain;
% Increament read and write pointers by hope sizes
pin = pin + n1;
pout = pout + n2;
% Else, synthesize grain at it's original speed
else
% Time stretch using the phase vocoder implementation from DAFX by
% U. Zolzer
%===========================================
f = fft(fftshift(grain));
r = abs(f);
phi = angle(f);
@@ -107,39 +153,50 @@ function timeStretchStable(in, FS, stable, ratio)
phi0 = phi;
psi = princarg(psi+delta_phi);
ft = (r.* exp(i*psi));
grain = fftshift(real(ifft(ft))).*w2;
grain = fftshift(real(ifft(ft))).*w1;
%===========================================
% Overlap grain with previous outputs, scaling the grain by the
% stretch ratio to counter the increase in amplitude resulting from
% denser overlapping of grains.
out(pout+1:pout+WLen) = ...
out(pout+1:pout+WLen) + grain/tstretch_ratio;
out(pout+1:pout+WLen) + grain/tstretch_ratio;
% Increament read and write pointers by hope sizes
pin = pin + n1;
pout = pout + n1;
end
end
%----- listening and saving the output -----
%in = in(WLen+1:WLen+L);
% Normalise output
out = out(WLen+1:length(out))/max(abs(out));
% soundsc(out, FS);
outName = ['./out' sprintf('%3.1f', ratio) '.wav'];
% Write audio out and open in the deafult system application
outName = ['./out' sprintf('%3.1f', stretchRatio) '.wav'];
wavwrite(out, FS, outName);
system(['open ' outName]);
function [stable, ratio] = getStable(in, analysis, delta, WLen, hopSize)
function [stable, ratio] = getStable(in, analysis, delta, WLen, hopSize,...
filterN1, winCount)
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% Function to convert transience start and end times to stable part
% segments.
% Inputs:
% in: Input audio vector
% analysis: Normalised Spectral Flux analysis frames
% delta: Threshold for seperating transient/stable segments
% WLen: Analysis window size
% hopSize: Analysis hop size
% Returns:
% stable: a 2xN array of segment start and end times, where N is the number
% of stable parts in the audio.
% ratio: The ratio between the total size of stable and unstable parts in
% the audio.
% stable: a 2xN array of segment start and end times, where N is the number
% of stable parts in the audio.
% ratio: The ratio between the total size of stable and unstable parts in
% the audio.
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% Enables the saving of variables to mat files for plotting in Python
pythonPlot = true;
% TODO: do something with this...
delta = 0;
% Chosen as it was decided that picking to many transient sections is
% better than picking too few
delta = -0.05;
winCount = floor((length(in)-WLen)/hopSize);
% create boolean array of analysis values above a set threshold delta
a = double(analysis > delta);
@@ -175,6 +232,7 @@ function [stable, ratio] = getStable(in, analysis, delta, WLen, hopSize)
s1.win_count = winCount;
s1.n1 = hopSize;
s1.WLen = WLen;
s1.filterN1 = filterN1;
save('./vars.mat','-struct', 's1')
end
@@ -199,7 +257,17 @@ function [stable, ratio] = getStable(in, analysis, delta, WLen, hopSize)
ratio = sum(stable(:, 2) - stable(:, 1)) / L;
function [analysis, winCount] = calculateSpectralFlux(in, WLen, hopSize)
%----- transience analysis initialization -----
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% Function to calculate Spectral Flux analysis for an input.
% Inputs:
% in: Input audio vector
% WLen: Analysis window size
% hopSize: Analysis hop size
% Returns:
% analysis: Normalised Spectral Flux analysis frames
% winCount: The total number of windows used in analysis of the input
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% Allocate memory to store the current grain to be analysed
grain = zeros(WLen,1);
% Allocate memory to store the previous window's magnitude during analysis
@@ -239,20 +307,28 @@ function [analysis, winCount] = calculateSpectralFlux(in, WLen, hopSize)
pout = pout + 1;
end
function analysis = normaliseAnalysis(analysis, delta)
function analysis = normaliseAnalysis(analysis, delta, filt1, filt2)
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% Function to normalise Spectral Flux analysis. this is achieved via the
% method proposed in A Tutorial on Onset Detection in Music Signals - J.
% Bello et al. (p.9)
% Inputs:
% in: Input audio vector
% WLen: Analysis window size
% hopSize: Analysis hop size
% Returns:
% analysis: Normalised Spectral Flux analysis frames
% winCount: The total number of windows used in analysis of the input
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% Normalize analysis
analysis = analysis - mean(analysis);
analysis = analysis / max(abs(std(analysis)));
%TODO: Check that this aligns with the original analysis and with the
%audio.
analysis = filter(ones(1, 40)/40, 1, analysis);
analysis = analysis(25:end);
analysis = medfilt1(analysis, 40);
%TODO: Check that this aligns with the original analysis and with the
%audio.
thresh = medfilt1(analysis, 1000);
% Subtract low frequency content to flatten analysis, leaving relevant
% peaks for onset/transience detection
analysis = analysis - (delta+thresh);