Finished code, begining example generation
This commit is contained in:
+3
-3
@@ -12,12 +12,12 @@ def main():
|
||||
transience_s = s1['transience_s'][0]
|
||||
transience_e = s1['transience_e'][0]
|
||||
WLen = s1['WLen'][0]
|
||||
win_count = s1['win_count'][0]
|
||||
win_count = int(s1['win_count'][0])
|
||||
n1 = s1['n1'][0]
|
||||
filterN1 = s1['filterN1'][0][0]
|
||||
|
||||
|
||||
pdb.set_trace()
|
||||
plt.plot(((np.arange(win_count)*n1)[:-84])+WLen/2,analysis)
|
||||
plt.plot(((np.arange(win_count)*n1))+WLen/2,analysis)
|
||||
for i in transience_s:
|
||||
plt.axvline(i, color='r', linestyle='--')
|
||||
for i in transience_e:
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
function main()
|
||||
fileName = './media/Limbo1.wav';
|
||||
ratio = 15;
|
||||
fileName = './media/PianoDebussy.wav';
|
||||
ratio = 3;
|
||||
timeStretch(fileName, ratio);
|
||||
|
||||
|
||||
+132
-56
@@ -1,22 +1,21 @@
|
||||
|
||||
function timeStretch(fileName, ratio)
|
||||
% function timeStretch(fileName, ratio)
|
||||
% (based on DAFx Book, ch08/VX_tstretch_real_pv.m)
|
||||
%===== this program performs time stretching
|
||||
%===== using the FFT-IFFT approach,
|
||||
%===== for real ratio, and using
|
||||
%===== w1 and w2 windows (analysis and synthesis)
|
||||
%===== WLen is the length of the windows
|
||||
%===== hopSize and n2: steps (in samples) for the analysis and synthesis
|
||||
function timeStretch(fileName, stretchRatio)
|
||||
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||
% This program performs time stretching using the FFT-IFFT approach.
|
||||
% (based on DAFx Book, ch08/VX_tstretch_real_pv.m)
|
||||
% Inputs:
|
||||
% fileName: Input audio vector
|
||||
% stretchRatio: Audio samplerate
|
||||
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||
|
||||
if (nargin < 2) || (ratio <= 0)
|
||||
error('usage: timeStretch(fileName, ratio)');
|
||||
if (nargin < 2) || (stretchRatio <= 0)
|
||||
error('usage: timeStretch(fileName, stretchRatio)');
|
||||
end
|
||||
|
||||
% Analysis step [samples]
|
||||
n2 = 512;
|
||||
n2 = 256;
|
||||
% Synthesis step [samples]
|
||||
hopSize = round(n2 / ratio);
|
||||
hopSize = round(n2 / stretchRatio);
|
||||
% Window length
|
||||
WLen = 2048;
|
||||
|
||||
@@ -37,53 +36,95 @@ function timeStretch(fileName, ratio)
|
||||
in = [zeros(WLen, 1); in; ...
|
||||
zeros(WLen-mod(L,hopSize),1)] / max(abs(in));
|
||||
|
||||
delta = 0.2;
|
||||
delta = 0.1;
|
||||
% Segment audio based on it's trasient and stables components, returning
|
||||
% markers for stable sections and a ratio for their proportion of all the
|
||||
% audio
|
||||
[stable, stable_ratio] = segmentTransience(in, FS, WLen, hopSize, delta);
|
||||
[stable, stableRatio] = segmentTransience(in, FS, WLen, hopSize, delta);
|
||||
|
||||
% Use stable transient/stable segmentation to stretch stable section of
|
||||
% audio by a given ratio.
|
||||
timeStretchStable(in, FS, stable, ratio / stable_ratio);
|
||||
timeStretchStable(in, FS, stable, stableRatio, stretchRatio);
|
||||
|
||||
function [stable, stableRatio] = segmentTransience(in, FS, WLen, hopSize, delta)
|
||||
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||
% Function to extract stable/transient segment information from input
|
||||
% audio.
|
||||
% Inputs:
|
||||
% in: Input audio vector
|
||||
% FS: Audio samplerate
|
||||
% WLen: Analysis window length
|
||||
% hopeSize: Analysis window hop size
|
||||
% delta: Selection threshold used for stable/transient segment
|
||||
% seperation. Values between 0.0 and 1.0 are recommended.
|
||||
% Returns:
|
||||
% stable: a 2XN vector of stable part start+end markers
|
||||
% stableRatio: The ratio of stable/transient content in input audio
|
||||
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||
|
||||
function [stable, stable_ratio] = segmentTransience(in, FS, WLen, hopSize, delta)
|
||||
% Calculate the spectral flux of the audio. This provides a measurements
|
||||
% for transience accross the audio
|
||||
[analysis, winCount] = calculateSpectralFlux(in, WLen, hopSize);
|
||||
|
||||
filterN1 = 30;
|
||||
% Normalise and filter the analysis to provide data that can be used for
|
||||
% effective transience segmentation.
|
||||
analysis = normaliseAnalysis(analysis, delta);
|
||||
analysis = normaliseAnalysis(analysis, delta, filterN1, 1000);
|
||||
|
||||
% Generate segmentation markers from analysis to be used in the time
|
||||
% stretching algorithm
|
||||
[stable, stable_ratio] = getStable(in, analysis, WLen, delta, hopSize);
|
||||
[stable, stableRatio] = getStable(in, analysis, WLen, delta, hopSize, ...
|
||||
filterN1, winCount);
|
||||
|
||||
function timeStretchStable(in, FS, stable, ratio)
|
||||
%----- time stretching initializations -----
|
||||
n2 = 256; % analysis step [samples]
|
||||
n1 = round(n2 / ratio); % synthesis step [samples]
|
||||
WLen = 2048; % Window length
|
||||
w1 = hanning(WLen); % Hanning window of length WLen
|
||||
w2 = w1;
|
||||
% TODO; add semi-colon
|
||||
tstretch_ratio = n2/n1
|
||||
out = zeros(WLen+ceil(length(in)*tstretch_ratio),1);
|
||||
function timeStretchStable(in, FS, stable, stableRatio, stretchRatio)
|
||||
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||
% Function to apply phase vocoder based time stretching to stable section
|
||||
% of input audio
|
||||
% Inputs:
|
||||
% in: Input audio vector
|
||||
% stable: 2XN vector of stable part start+end markers
|
||||
% stableRatio: The ratio of stable/transient content in input audio
|
||||
% stretchRatio: The ratio to stretch stable audio by. Value > 1 will
|
||||
% result in a stretching of output. Values < 1 will result in a
|
||||
% compression.
|
||||
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||
|
||||
stretchRatio = stretchRatio / stableRatio;
|
||||
% analysis step [samples]
|
||||
n2 = 256;
|
||||
% synthesis step [samples]
|
||||
n1 = round(n2 / stretchRatio);
|
||||
% Window length
|
||||
WLen = 2048;
|
||||
% Hanning window of length WLen
|
||||
w1 = hanning(WLen);
|
||||
% Calculate ratio between analysis and synthesis hop size as the stretch
|
||||
% ratio
|
||||
tstretch_ratio = n2/n1;
|
||||
|
||||
% Allocate memory for output samples
|
||||
out = zeros(WLen+ceil(length(in)*(1-stableRatio)) + WLen*2+ceil(length(in)*stableRatio*stretchRatio),1);
|
||||
length(out)
|
||||
% Initialize memory for phase vocoder variables
|
||||
omega = 2*pi*n1*[0:WLen-1]'/WLen;
|
||||
phi0 = zeros(WLen,1);
|
||||
psi = zeros(WLen,1);
|
||||
|
||||
devcent = 2*pi*n1/WLen;
|
||||
|
||||
% Initialize read and write pointers for audio input and output
|
||||
pin = 0;
|
||||
pout = 0;
|
||||
% Calculate the length of input audio
|
||||
pend = length(in)-WLen;
|
||||
|
||||
while pin<pend
|
||||
% Read grain from input and apply hanning window
|
||||
grain = in(pin+1:pin+WLen).* w1;
|
||||
|
||||
% If the center of the grain is within any stable boundaries, apply
|
||||
% time stretching
|
||||
if(any(pin+WLen/2 > stable(:, 1) & pin+WLen/2 < stable(:, 2)))
|
||||
% Time stretch using the phase vocoder implementation from DAFX by
|
||||
% U. Zolzer
|
||||
%===========================================
|
||||
f = fft(fftshift(grain));
|
||||
r = abs(f);
|
||||
@@ -92,14 +133,19 @@ function timeStretchStable(in, FS, stable, ratio)
|
||||
phi0 = phi;
|
||||
psi = princarg(psi+delta_phi*tstretch_ratio);
|
||||
ft = (r.* exp(i*psi));
|
||||
grain = fftshift(real(ifft(ft))).*w2;
|
||||
% plot(grain);drawnow;
|
||||
grain = fftshift(real(ifft(ft))).*w1;
|
||||
% ===========================================
|
||||
% Overlap grain with previous outputs
|
||||
out(pout+1:pout+WLen) = ...
|
||||
out(pout+1:pout+WLen) + grain;
|
||||
out(pout+1:pout+WLen) + grain;
|
||||
% Increament read and write pointers by hope sizes
|
||||
pin = pin + n1;
|
||||
pout = pout + n2;
|
||||
% Else, synthesize grain at it's original speed
|
||||
else
|
||||
% Time stretch using the phase vocoder implementation from DAFX by
|
||||
% U. Zolzer
|
||||
%===========================================
|
||||
f = fft(fftshift(grain));
|
||||
r = abs(f);
|
||||
phi = angle(f);
|
||||
@@ -107,39 +153,50 @@ function timeStretchStable(in, FS, stable, ratio)
|
||||
phi0 = phi;
|
||||
psi = princarg(psi+delta_phi);
|
||||
ft = (r.* exp(i*psi));
|
||||
grain = fftshift(real(ifft(ft))).*w2;
|
||||
grain = fftshift(real(ifft(ft))).*w1;
|
||||
%===========================================
|
||||
% Overlap grain with previous outputs, scaling the grain by the
|
||||
% stretch ratio to counter the increase in amplitude resulting from
|
||||
% denser overlapping of grains.
|
||||
out(pout+1:pout+WLen) = ...
|
||||
out(pout+1:pout+WLen) + grain/tstretch_ratio;
|
||||
out(pout+1:pout+WLen) + grain/tstretch_ratio;
|
||||
% Increament read and write pointers by hope sizes
|
||||
pin = pin + n1;
|
||||
pout = pout + n1;
|
||||
end
|
||||
end
|
||||
|
||||
%----- listening and saving the output -----
|
||||
%in = in(WLen+1:WLen+L);
|
||||
% Normalise output
|
||||
out = out(WLen+1:length(out))/max(abs(out));
|
||||
% soundsc(out, FS);
|
||||
outName = ['./out' sprintf('%3.1f', ratio) '.wav'];
|
||||
% Write audio out and open in the deafult system application
|
||||
outName = ['./out' sprintf('%3.1f', stretchRatio) '.wav'];
|
||||
wavwrite(out, FS, outName);
|
||||
system(['open ' outName]);
|
||||
|
||||
function [stable, ratio] = getStable(in, analysis, delta, WLen, hopSize)
|
||||
function [stable, ratio] = getStable(in, analysis, delta, WLen, hopSize,...
|
||||
filterN1, winCount)
|
||||
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||
% Function to convert transience start and end times to stable part
|
||||
% segments.
|
||||
% Inputs:
|
||||
% in: Input audio vector
|
||||
% analysis: Normalised Spectral Flux analysis frames
|
||||
% delta: Threshold for seperating transient/stable segments
|
||||
% WLen: Analysis window size
|
||||
% hopSize: Analysis hop size
|
||||
% Returns:
|
||||
% stable: a 2xN array of segment start and end times, where N is the number
|
||||
% of stable parts in the audio.
|
||||
% ratio: The ratio between the total size of stable and unstable parts in
|
||||
% the audio.
|
||||
% stable: a 2xN array of segment start and end times, where N is the number
|
||||
% of stable parts in the audio.
|
||||
% ratio: The ratio between the total size of stable and unstable parts in
|
||||
% the audio.
|
||||
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||
|
||||
% Enables the saving of variables to mat files for plotting in Python
|
||||
pythonPlot = true;
|
||||
% TODO: do something with this...
|
||||
delta = 0;
|
||||
% Chosen as it was decided that picking to many transient sections is
|
||||
% better than picking too few
|
||||
delta = -0.05;
|
||||
|
||||
winCount = floor((length(in)-WLen)/hopSize);
|
||||
% create boolean array of analysis values above a set threshold delta
|
||||
a = double(analysis > delta);
|
||||
|
||||
@@ -175,6 +232,7 @@ function [stable, ratio] = getStable(in, analysis, delta, WLen, hopSize)
|
||||
s1.win_count = winCount;
|
||||
s1.n1 = hopSize;
|
||||
s1.WLen = WLen;
|
||||
s1.filterN1 = filterN1;
|
||||
save('./vars.mat','-struct', 's1')
|
||||
end
|
||||
|
||||
@@ -199,7 +257,17 @@ function [stable, ratio] = getStable(in, analysis, delta, WLen, hopSize)
|
||||
ratio = sum(stable(:, 2) - stable(:, 1)) / L;
|
||||
|
||||
function [analysis, winCount] = calculateSpectralFlux(in, WLen, hopSize)
|
||||
%----- transience analysis initialization -----
|
||||
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||
% Function to calculate Spectral Flux analysis for an input.
|
||||
% Inputs:
|
||||
% in: Input audio vector
|
||||
% WLen: Analysis window size
|
||||
% hopSize: Analysis hop size
|
||||
% Returns:
|
||||
% analysis: Normalised Spectral Flux analysis frames
|
||||
% winCount: The total number of windows used in analysis of the input
|
||||
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||
|
||||
% Allocate memory to store the current grain to be analysed
|
||||
grain = zeros(WLen,1);
|
||||
% Allocate memory to store the previous window's magnitude during analysis
|
||||
@@ -239,20 +307,28 @@ function [analysis, winCount] = calculateSpectralFlux(in, WLen, hopSize)
|
||||
pout = pout + 1;
|
||||
end
|
||||
|
||||
function analysis = normaliseAnalysis(analysis, delta)
|
||||
function analysis = normaliseAnalysis(analysis, delta, filt1, filt2)
|
||||
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||
% Function to normalise Spectral Flux analysis. this is achieved via the
|
||||
% method proposed in A Tutorial on Onset Detection in Music Signals - J.
|
||||
% Bello et al. (p.9)
|
||||
% Inputs:
|
||||
% in: Input audio vector
|
||||
% WLen: Analysis window size
|
||||
% hopSize: Analysis hop size
|
||||
% Returns:
|
||||
% analysis: Normalised Spectral Flux analysis frames
|
||||
% winCount: The total number of windows used in analysis of the input
|
||||
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||
% Normalize analysis
|
||||
analysis = analysis - mean(analysis);
|
||||
analysis = analysis / max(abs(std(analysis)));
|
||||
|
||||
%TODO: Check that this aligns with the original analysis and with the
|
||||
%audio.
|
||||
analysis = filter(ones(1, 40)/40, 1, analysis);
|
||||
analysis = analysis(25:end);
|
||||
analysis = medfilt1(analysis, 40);
|
||||
|
||||
%TODO: Check that this aligns with the original analysis and with the
|
||||
%audio.
|
||||
thresh = medfilt1(analysis, 1000);
|
||||
|
||||
|
||||
% Subtract low frequency content to flatten analysis, leaving relevant
|
||||
% peaks for onset/transience detection
|
||||
analysis = analysis - (delta+thresh);
|
||||
|
||||
Reference in New Issue
Block a user