Finished code, begining example generation

2017-02-20 13:58:45 +00:00
parent b5bf4abaea
commit dd5c8b2cd8
3 changed files with 137 additions and 61 deletions
@@ -12,12 +12,12 @@ def main():
    transience_s = s1['transience_s'][0]
    transience_e = s1['transience_e'][0]
    WLen = s1['WLen'][0]
-    win_count = s1['win_count'][0]
+    win_count = int(s1['win_count'][0])
    n1 = s1['n1'][0]
+    filterN1 = s1['filterN1'][0][0]


-    pdb.set_trace()
-    plt.plot(((np.arange(win_count)*n1)[:-84])+WLen/2,analysis)
+    plt.plot(((np.arange(win_count)*n1))+WLen/2,analysis)
    for i in transience_s:
        plt.axvline(i, color='r', linestyle='--')
    for i in transience_e:
@@ -1,5 +1,5 @@
 function main()
-    fileName = './media/Limbo1.wav';
-    ratio = 15;
+    fileName = './media/PianoDebussy.wav';
+    ratio = 3;
    timeStretch(fileName, ratio);

@@ -1,22 +1,21 @@

-function timeStretch(fileName, ratio)
-    % function timeStretch(fileName, ratio)
-    %     (based on DAFx Book, ch08/VX_tstretch_real_pv.m)
-    %===== this program performs time stretching
-    %===== using the FFT-IFFT approach,
-    %===== for real ratio, and using
-    %===== w1 and w2 windows (analysis and synthesis)
-    %===== WLen is the length of the windows
-    %===== hopSize and n2: steps (in samples) for the analysis and synthesis
+function timeStretch(fileName, stretchRatio)
+    %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+    % This program performs time stretching using the FFT-IFFT approach.
+    % (based on DAFx Book, ch08/VX_tstretch_real_pv.m)
+    % Inputs:
+    %   fileName: Input audio vector
+    %   stretchRatio: Audio samplerate
+    %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

-    if (nargin < 2) || (ratio <= 0)
-        error('usage: timeStretch(fileName, ratio)');
+    if (nargin < 2) || (stretchRatio <= 0)
+        error('usage: timeStretch(fileName, stretchRatio)');
    end

    % Analysis step [samples]
-    n2 = 512;
+    n2 = 256;
    % Synthesis step [samples]
-    hopSize = round(n2 / ratio);
+    hopSize = round(n2 / stretchRatio);
    % Window length
    WLen = 2048;

@@ -37,53 +36,95 @@ function timeStretch(fileName, ratio)
    in = [zeros(WLen, 1); in; ...
       zeros(WLen-mod(L,hopSize),1)] / max(abs(in));

-    delta = 0.2;
+    delta = 0.1;
    % Segment audio based on it's trasient and stables components, returning
    % markers for stable sections and a ratio for their proportion of all the
    % audio
-    [stable, stable_ratio] = segmentTransience(in, FS, WLen, hopSize, delta);
+    [stable, stableRatio] = segmentTransience(in, FS, WLen, hopSize, delta);

    % Use stable transient/stable segmentation to stretch stable section of
    % audio by a given ratio.
-    timeStretchStable(in, FS, stable, ratio / stable_ratio);
+    timeStretchStable(in, FS, stable, stableRatio, stretchRatio);
+
+function [stable, stableRatio] = segmentTransience(in, FS, WLen, hopSize, delta)
+    %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+    % Function to extract stable/transient segment information from input
+    % audio.
+    % Inputs:
+    %   in: Input audio vector
+    %   FS: Audio samplerate
+    %   WLen: Analysis window length
+    %   hopeSize: Analysis window hop size
+    %   delta: Selection threshold used for stable/transient segment
+    %   seperation. Values between 0.0 and 1.0 are recommended.
+    % Returns:
+    %   stable: a 2XN vector of stable part start+end markers
+    %   stableRatio: The ratio of stable/transient content in input audio
+    %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

-function [stable, stable_ratio] = segmentTransience(in, FS, WLen, hopSize, delta)
    % Calculate the spectral flux of the audio. This provides a measurements
    % for transience accross the audio
    [analysis, winCount] = calculateSpectralFlux(in, WLen, hopSize);

+    filterN1 = 30;
    % Normalise and filter the analysis to provide data that can be used for
    % effective transience segmentation.
-    analysis = normaliseAnalysis(analysis, delta);
+    analysis = normaliseAnalysis(analysis, delta, filterN1, 1000);

    % Generate segmentation markers from analysis to be used in the time
    % stretching algorithm
-    [stable, stable_ratio] = getStable(in, analysis, WLen, delta, hopSize);
+    [stable, stableRatio] = getStable(in, analysis, WLen, delta, hopSize, ...
+    filterN1, winCount);

-function timeStretchStable(in, FS,  stable, ratio)
-    %----- time stretching initializations -----
-    n2           = 256; % analysis step [samples]
-    n1           = round(n2 / ratio); % synthesis step [samples]
-    WLen         = 2048; % Window length
-    w1           = hanning(WLen); % Hanning window of length WLen
-    w2           = w1;
-    % TODO; add semi-colon
-    tstretch_ratio = n2/n1
-    out = zeros(WLen+ceil(length(in)*tstretch_ratio),1);
+function timeStretchStable(in, FS, stable, stableRatio, stretchRatio)
+    %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+    % Function to apply phase vocoder based time stretching to stable section
+    % of input audio
+    % Inputs:
+    %   in: Input audio vector
+    %   stable: 2XN vector of stable part start+end markers
+    %   stableRatio: The ratio of stable/transient content in input audio
+    %   stretchRatio: The ratio to stretch stable audio by. Value > 1 will
+    %   result in a stretching of output. Values < 1 will result in a
+    %   compression.
+    %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+
+    stretchRatio = stretchRatio / stableRatio;
+    % analysis step [samples]
+    n2 = 256;
+    % synthesis step [samples]
+    n1 = round(n2 / stretchRatio);
+    % Window length
+    WLen = 2048;
+    % Hanning window of length WLen
+    w1 = hanning(WLen);
+    % Calculate ratio between analysis and synthesis hop size as the stretch
+    % ratio
+    tstretch_ratio = n2/n1;
+
+    % Allocate memory for output samples
+    out = zeros(WLen+ceil(length(in)*(1-stableRatio)) + WLen*2+ceil(length(in)*stableRatio*stretchRatio),1);
+    length(out)
+    % Initialize memory for phase vocoder variables
    omega    = 2*pi*n1*[0:WLen-1]'/WLen;
    phi0     = zeros(WLen,1);
    psi      = zeros(WLen,1);

-    devcent = 2*pi*n1/WLen;
-
+    % Initialize read and write pointers for audio input and output
    pin  = 0;
    pout = 0;
+    % Calculate the length of input audio
    pend = length(in)-WLen;

    while pin<pend
+        % Read grain from input and apply hanning window
        grain = in(pin+1:pin+WLen).* w1;

+        % If the center of the grain is within any stable boundaries, apply
+        % time stretching
        if(any(pin+WLen/2 > stable(:, 1) & pin+WLen/2 < stable(:, 2)))
+            % Time stretch using the phase vocoder implementation from DAFX by
+            % U. Zolzer
            %===========================================
            f     = fft(fftshift(grain));
            r     = abs(f);
@@ -92,14 +133,19 @@ function timeStretchStable(in, FS,  stable, ratio)
            phi0  = phi;
            psi   = princarg(psi+delta_phi*tstretch_ratio);
            ft    = (r.* exp(i*psi));
-            grain = fftshift(real(ifft(ft))).*w2;
-            % plot(grain);drawnow;
+            grain = fftshift(real(ifft(ft))).*w1;
            % ===========================================
+            % Overlap grain with previous outputs
            out(pout+1:pout+WLen) = ...
-               out(pout+1:pout+WLen) + grain;
+                out(pout+1:pout+WLen) + grain;
+            % Increament read and write pointers by hope sizes
            pin  = pin + n1;
            pout = pout + n2;
+        % Else, synthesize grain at it's original speed
        else
+            % Time stretch using the phase vocoder implementation from DAFX by
+            % U. Zolzer
+            %===========================================
            f     = fft(fftshift(grain));
            r     = abs(f);
            phi   = angle(f);
@@ -107,39 +153,50 @@ function timeStretchStable(in, FS,  stable, ratio)
            phi0  = phi;
            psi   = princarg(psi+delta_phi);
            ft    = (r.* exp(i*psi));
-            grain = fftshift(real(ifft(ft))).*w2;
+            grain = fftshift(real(ifft(ft))).*w1;
+            %===========================================
+            % Overlap grain with previous outputs, scaling the grain by the
+            % stretch ratio to counter the increase in amplitude resulting from
+            % denser overlapping of grains.
            out(pout+1:pout+WLen) = ...
-               out(pout+1:pout+WLen) + grain/tstretch_ratio;
+                out(pout+1:pout+WLen) + grain/tstretch_ratio;
+            % Increament read and write pointers by hope sizes
            pin  = pin + n1;
            pout = pout + n1;
        end
    end

-    %----- listening and saving the output -----
-    %in  = in(WLen+1:WLen+L);
+    % Normalise output
    out = out(WLen+1:length(out))/max(abs(out));
-    % soundsc(out, FS);
-    outName = ['./out' sprintf('%3.1f', ratio) '.wav'];
+    % Write audio out and open in the deafult system application
+    outName = ['./out' sprintf('%3.1f', stretchRatio) '.wav'];
    wavwrite(out, FS, outName);
    system(['open ' outName]);

-function [stable, ratio] = getStable(in, analysis, delta, WLen, hopSize)
+function [stable, ratio] = getStable(in, analysis, delta, WLen, hopSize,...
+    filterN1, winCount)
    %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
    % Function to convert transience start and end times to stable part
    % segments.
+    % Inputs:
+    %   in: Input audio vector
+    %   analysis: Normalised Spectral Flux analysis frames
+    %   delta: Threshold for seperating transient/stable segments
+    %   WLen: Analysis window size
+    %   hopSize: Analysis hop size
    % Returns:
-    % stable: a 2xN array of segment start and end times, where N is the number
-    % of stable parts in the audio.
-    % ratio: The ratio between the total size of stable and unstable parts in
-    % the audio.
+    %   stable: a 2xN array of segment start and end times, where N is the number
+    %   of stable parts in the audio.
+    %   ratio: The ratio between the total size of stable and unstable parts in
+    %   the audio.
    %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

    % Enables the saving of variables to mat files for plotting in Python
    pythonPlot = true;
-    % TODO: do something with this...
-    delta = 0;
+    % Chosen as it was decided that picking to many transient sections is
+    % better than picking too few
+    delta = -0.05;

-    winCount = floor((length(in)-WLen)/hopSize);
    % create boolean array of analysis values above a set threshold delta
    a = double(analysis > delta);

@@ -175,6 +232,7 @@ function [stable, ratio] = getStable(in, analysis, delta, WLen, hopSize)
        s1.win_count = winCount;
        s1.n1 = hopSize;
        s1.WLen = WLen;
+        s1.filterN1 = filterN1;
        save('./vars.mat','-struct', 's1')
    end

@@ -199,7 +257,17 @@ function [stable, ratio] = getStable(in, analysis, delta, WLen, hopSize)
    ratio = sum(stable(:, 2) - stable(:, 1)) / L;

 function [analysis, winCount] = calculateSpectralFlux(in, WLen, hopSize)
-    %----- transience analysis initialization -----
+    %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+    % Function to calculate Spectral Flux analysis for an input.
+    % Inputs:
+    %   in: Input audio vector
+    %   WLen: Analysis window size
+    %   hopSize: Analysis hop size
+    % Returns:
+    %   analysis: Normalised Spectral Flux analysis frames
+    %   winCount: The total number of windows used in analysis of the input
+    %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+
    % Allocate memory to store the current grain to be analysed
    grain = zeros(WLen,1);
    % Allocate memory to store the previous window's magnitude during analysis
@@ -239,20 +307,28 @@ function [analysis, winCount] = calculateSpectralFlux(in, WLen, hopSize)
        pout = pout + 1;
    end

-function analysis = normaliseAnalysis(analysis, delta)
+function analysis = normaliseAnalysis(analysis, delta, filt1, filt2)
+    %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+    % Function to normalise Spectral Flux analysis. this is achieved via the
+    % method proposed in A Tutorial on Onset Detection in Music Signals - J.
+    % Bello et al. (p.9)
+    % Inputs:
+    %   in: Input audio vector
+    %   WLen: Analysis window size
+    %   hopSize: Analysis hop size
+    % Returns:
+    %   analysis: Normalised Spectral Flux analysis frames
+    %   winCount: The total number of windows used in analysis of the input
+    %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
    % Normalize analysis
    analysis = analysis - mean(analysis);
    analysis = analysis / max(abs(std(analysis)));

-    %TODO: Check that this aligns with the original analysis and with the
-    %audio.
-    analysis = filter(ones(1, 40)/40, 1, analysis);
-    analysis = analysis(25:end);
+    analysis = medfilt1(analysis, 40);

-    %TODO: Check that this aligns with the original analysis and with the
-    %audio.
    thresh = medfilt1(analysis, 1000);

+
    % Subtract low frequency content to flatten analysis, leaving relevant
    % peaks for onset/transience detection
    analysis = analysis - (delta+thresh);