Finished stable/transient segmentation

This commit is contained in:
Sam Perry
2017-02-18 15:31:58 +00:00
parent c24f3a3152
commit 0f97dab817
3 changed files with 193 additions and 160 deletions
+2 -1
View File
@@ -1,4 +1,5 @@
function main()
fileName = './media/test3.wav';
ratio = 0.5;
ratio = 10.5;
timeStretch(fileName, ratio);
+3 -3
View File
@@ -2,9 +2,9 @@ function phase = princarg(phase_in)
% This function puts an arbitrary phase value into ]-pi,pi] [rad]
%
%--------------------------------------------------------------------------
% This source code is provided without any warranties as published in
% DAFX book 2nd edition, copyright Wiley & Sons 2011, available at
% http://www.dafx.de. It may be used for educational purposes and not
% This source code is provided without any warranties as published in
% DAFX book 2nd edition, copyright Wiley & Sons 2011, available at
% http://www.dafx.de. It may be used for educational purposes and not
% for commercial applications without further permission.
%--------------------------------------------------------------------------
+188 -156
View File
@@ -1,170 +1,202 @@
function timeStretch(fileName, ratio)
% function timeStretch(fileName, ratio)
% (based on DAFx Book, ch08/VX_tstretch_real_pv.m)
%===== this program performs time stretching
%===== using the FFT-IFFT approach,
%===== for real ratio, and using
%===== w1 and w2 windows (analysis and synthesis)
%===== WLen is the length of the windows
%===== n1 and n2: steps (in samples) for the analysis and synthesis
% function timeStretch(fileName, ratio)
% (based on DAFx Book, ch08/VX_tstretch_real_pv.m)
%===== this program performs time stretching
%===== using the FFT-IFFT approach,
%===== for real ratio, and using
%===== w1 and w2 windows (analysis and synthesis)
%===== WLen is the length of the windows
%===== n1 and n2: steps (in samples) for the analysis and synthesis
if (nargin < 2) || (ratio <= 0)
error('usage: timeStretch(fileName, ratio)');
end
%----- user data -----
n2 = 512; % analysis step [samples]
n1 = round(n2 / ratio); % synthesis step [samples]
WLen = 2048; % Window length
w1 = hanning(WLen); % Hanning window of length WLen
w2 = w1;
[DAFx_in,FS,channels] = wavread(fileName);
if channels > 1
DAFx_in = sum(DAFx_in,2);
end
L = length(DAFx_in);
DAFx_in = [zeros(WLen, 1); DAFx_in; ...
zeros(WLen-mod(L,n1),1)] / max(abs(DAFx_in));
%----- transience analysis initialization -----
test = 0.4;
devcent = 2*pi*n1/WLen;
vtest = test * devcent;
grain = zeros(WLen,1);
theta1 = zeros(WLen,1);
theta2 = zeros(WLen,1);
mag1 = zeros(WLen/2,1);
mag2 = zeros(WLen/2,1);
win_count = floor((length(DAFx_in)-WLen)/n1);
analysis = zeros(win_count, 1);
pin = 0;
pout = 1;
pend = length(DAFx_in)-WLen;
%----- transience analysis -----
while pin<pend
grain = DAFx_in(pin+1:pin+WLen).* w1;
f = fft(grain);
mag = abs(f(1:WLen/2));
analysis(pout) = sqrt(sum((mag-mag1).^2))/(WLen/2);
%mag_diff = mag-mag1
%analysis(pout) = sum(mag_diff-abs(mag_diff)/2);
mag1 = mag;
pin = pin + n1;
pout = pout + 1;
end
% Normalize analysis
analysis = analysis - mean(analysis);
analysis = analysis / max(analysis)
% TODO: Absolute values seems odd and possibly wrong... check this...
thresh = zeros(length(analysis)-2, 1);
for i = 3:length(analysis)
thresh(i-2) = mean( ...
[abs(analysis(i)), ...
abs(analysis(i-1)),...
abs(analysis(i-2))]...
);
end
thresh
delta = 0.05;
lambda = 1.00;
a = zeros(length(thresh),1);
a(i) = analysis(1) > thresh(1);
for i = 3:length(thresh)
if a(i-2) == true
a(i-1) = analysis(i) > delta - lambda * thresh(i-2);
else
a(i-1) = analysis(i) > delta + lambda * thresh(i-2);
if (nargin < 2) || (ratio <= 0)
error('usage: timeStretch(fileName, ratio)');
end
end
if(false)
figure
%plot(DAFx_in)
%hold on;
plot(((1:win_count)*n1)+WLen/2,a)
hold on;
plot(((1:win_count)*n1)+WLen/2,analysis)
end
%----- user data -----
n2 = 512; % analysis step [samples]
n1 = round(n2 / ratio); % synthesis step [samples]
WLen = 2048; % Window length
w1 = hanning(WLen); % Hanning window of length WLen
w2 = w1;
[DAFx_in,FS,channels] = wavread(fileName);
if channels > 1
DAFx_in = sum(DAFx_in,2);
end
L = length(DAFx_in);
DAFx_in = [zeros(WLen, 1); DAFx_in; ...
zeros(WLen-mod(L,n1),1)] / max(abs(DAFx_in));
% Code adapted from https://uk.mathworks.com/matlabcentral/newsreader/view_thread/151318
krn=[1 -1];
changes=conv(krn, a);
% Calculate start and end window indexes of transient segments
t_s = find(changes==1);
t_e = find(changes==-1);
%----- transience analysis initialization -----
test = 0.4;
devcent = 2*pi*n1/WLen;
vtest = test * devcent;
grain = zeros(WLen,1);
theta1 = zeros(WLen,1);
theta2 = zeros(WLen,1);
mag1 = zeros(WLen/2,1);
mag2 = zeros(WLen/2,1);
win_count = floor((length(DAFx_in)-WLen)/n1);
analysis = zeros(win_count, 1);
% Convert window index to samples
% TODO: Check sample accuracy of this...
transience_s = t_s * n1;
transience_e = t_e * n1;
pin = 0;
pout = 1;
pend = length(DAFx_in)-WLen;
%----- transience analysis -----
while pin<pend
grain = DAFx_in(pin+1:pin+WLen).* w1;
f = fft(grain);
mag = abs(f(1:WLen/2));
s1.analysis = analysis';
s1.transience_e = transience_e' ;
s1.transience_s = transience_s';
s1.a = a';
s1.win_count = win_count;
s1.n1 = n1;
s1.WLen = WLen;
analysis(pout) = sqrt(sum((mag-mag1).^2))/(WLen/2);
%mag_diff = mag-mag1
%analysis(pout) = sum(mag_diff-abs(mag_diff)/2);
mag1 = mag;
save('./vars.mat','-struct', 's1')
pin = pin + n1;
pout = pout + 1;
if(false)
figure
%plot(DAFx_in)
plot(((1:win_count)*n1)+WLen/2,analysis)
plot()
end
return
end
%-------------------------------
% Normalize analysis
analysis = analysis - mean(analysis);
analysis = analysis / max(analysis)
%----- time stretching initializations -----
tstretch_ratio = n2/n1
DAFx_out = zeros(WLen+ceil(length(DAFx_in)*tstretch_ratio),1);
omega = 2*pi*n1*[0:WLen-1]'/WLen;
phi0 = zeros(WLen,1);
psi = zeros(WLen,1);
% TODO: Absolute values seems odd and possibly wrong... check this...
thresh = zeros(length(analysis)-2, 1);
for i = 3:length(analysis)
thresh(i-2) = mean( ...
[abs(analysis(i)), ...
abs(analysis(i-1)),...
abs(analysis(i-2))]...
);
end
thresh
delta = 0.05;
lambda = 1.00;
devcent = 2*pi*n1/WLen;
a = zeros(length(thresh),1);
a(i) = analysis(1) > thresh(1);
for i = 3:length(thresh)
if a(i-2) == true
a(i-1) = analysis(i) > delta - lambda * thresh(i-2);
else
a(i-1) = analysis(i) > delta + lambda * thresh(i-2);
end
end
tic
%UUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUU
pin = 0;
pout = 0;
pend = length(DAFx_in)-WLen;
while pin<pend
grain = DAFx_in(pin+1:pin+WLen).* w1;
%===========================================
f = fft(fftshift(grain));
r = abs(f);
phi = angle(f);
delta_phi= omega + princarg(phi-phi0-omega);
phi0 = phi;
psi = princarg(psi+delta_phi*tstretch_ratio);
ft = (r.* exp(i*psi));
grain = fftshift(real(ifft(ft))).*w2;
% plot(grain);drawnow;
% ===========================================
DAFx_out(pout+1:pout+WLen) = ...
DAFx_out(pout+1:pout+WLen) + grain;
pin = pin + n1;
pout = pout + n2;
end
%UUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUU
toc
if(false)
figure
%plot(DAFx_in)
%hold on;
plot(((1:win_count)*n1)+WLen/2,a)
hold on;
plot(((1:win_count)*n1)+WLen/2,analysis)
end
%----- listening and saving the output -----
%DAFx_in = DAFx_in(WLen+1:WLen+L);
DAFx_out = DAFx_out(WLen+1:length(DAFx_out))/max(abs(DAFx_out));
% soundsc(DAFx_out, FS);
outName = [fileName(1:end-4) sprintf('%3.1f', ratio) '.wav'];
wavwrite(DAFx_out, FS, outName);
system(['play --silent ' outName]);
% Code adapted from https://uk.mathworks.com/matlabcentral/newsreader/view_thread/151318
krn=[1 -1];
changes=conv(krn, a);
% Calculate start and end window indexes of transient segments
t_s = find(changes==1);
t_e = find(changes==-1);
% Convert window index to samples
% TODO: Check sample accuracy of this...
transience_s = t_s * n1;
transience_e = t_e * n1;
% Export variables to mat file for plotting in Python
s1.analysis = analysis';
s1.transience_e = transience_e' ;
s1.transience_s = transience_s';
s1.a = a';
s1.win_count = win_count;
s1.n1 = n1;
s1.WLen = WLen;
save('./vars.mat','-struct', 's1')
[stable, stable_ratio] = getStable(DAFx_in, transience_s, transience_e);
timeStretchStable(DAFx_in, FS, stable, ratio / stable_ratio);
function timeStretchStable(in, FS, stable, ratio)
%----- time stretching initializations -----
n2 = 512; % analysis step [samples]
n1 = round(n2 / ratio); % synthesis step [samples]
WLen = 2048; % Window length
w1 = hanning(WLen); % Hanning window of length WLen
w2 = w1;
tstretch_ratio = n2/n1
out = zeros(WLen+ceil(length(in)*tstretch_ratio),1);
omega = 2*pi*n1*[0:WLen-1]'/WLen;
phi0 = zeros(WLen,1);
psi = zeros(WLen,1);
devcent = 2*pi*n1/WLen;
tic
%UUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUU
pin = 0;
pout = 0;
pend = length(in)-WLen;
while pin<pend
grain = in(pin+1:pin+WLen).* w1;
if(any(pin >= stable(:, 1) & pin < stable(:, 2)))
%===========================================
f = fft(fftshift(grain));
r = abs(f);
phi = angle(f);
delta_phi= omega + princarg(phi-phi0-omega);
phi0 = phi;
psi = princarg(psi+delta_phi*tstretch_ratio);
ft = (r.* exp(i*psi));
grain = fftshift(real(ifft(ft))).*w2;
% plot(grain);drawnow;
% ===========================================
out(pout+1:pout+WLen) = ...
out(pout+1:pout+WLen) + grain;
pin = pin + n1;
pout = pout + n2;
else
out(pout+1:pout+WLen) = ...
out(pout+1:pout+WLen) + grain;
pin = pin + n1;
pout = pout + n1;
end
end
%UUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUU
toc
%----- listening and saving the output -----
%in = in(WLen+1:WLen+L);
out = out(WLen+1:length(out))/max(abs(out));
% soundsc(out, FS);
outName = ['./out' sprintf('%3.1f', ratio) '.wav'];
wavwrite(out, FS, outName);
system(['play --silent ' outName]);
function [stable, ratio] = getStable(in, transience_s, transience_e)
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% Function to convert transience start and end times to stable part
% segments.
% Returns:
% stable: a 2xN array of segment start and end times, where N is the number
% of stable parts in the audio.
% ratio: The ratio between the total size of stable and unstable parts in
% the audio.
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% Convert column vectors to rows
transience_s = transience_s(:);
transience_e = transience_e(:);
% Get the length of the input audio
L = length(in);
if(transience_s(1) ~= 0)
transience_e = [0; transience_e];
transience_s = [transience_s; L];
end
stable = horzcat(transience_e, transience_s)
stable(:, 2) - stable(:, 1)
ratio = sum(stable(:, 2) - stable(:, 1)) / L