Files
BPLabs/ITU_P56/asl_P56_Fred_v2.m
T

159 lines
4.5 KiB
Matlab

function [asl_msq, actfact, c0]= asl_P56_Fred_v2 ( x, fs, nbits)
% this implements ITU P.56 method B.
% 'x' is the speech file to calculate active speech level for,
% 'actfact' is the activity factor (between 0 and 1)
% This is the proportion of the time that the speech is deemed "active"
% 'asl_msq' is the active speech level mean square energy.
% This is the mean square value in uPa^2 if x is in uPa.
% For active speech with x in uPa,
% the Leq in dB re 20 uPa is 10log10[asl_msq/20^2];
%
% 'c0' is the active speech level threshold.
% thi is the level in uPa above which the speech is deemed active
% Coded by Fred; Commented by BL; 16/6/12.
% x is the column vector of floating point speech data
x = x(:); % make sure x is column vector
T = 0.03; % time constant of smoothing, in seconds
H = 0.2; % hangover time in seconds
M = 15.9; % margin in dB of the difference between threshold and ASL
thres_no = nbits- 1; % number of thresholds, for 16 bit, it's 15
I = ceil( fs* H); % hangover in samples
g = exp( -1/( fs* T)); % smoothing factor in envelop detection
c( 1: thres_no)= 2.^ (-15: thres_no- 16);
% vector with thresholds from one quantizing level up to half the maximum
% code, at a step of 2, in the case of 16bit samples, from 2^-15 to 0.5;
a( 1: thres_no) = 0; % activity counter for each level threshold
hang( 1: thres_no) = I; % hangover counter for each level threshold
sq = x'* x; % long-term level square energy of x
x_len = length( x); % length of x
% use a 2nd order IIR filter to detect the envelope q
x_abs = abs( x);
p = filter( 1-g, [1 -g], x_abs);
q = filter( 1-g, [1 -g], p); % q is the envelope, obtained from moving average of abs(x) (with slight "hangover").
for k = 1: x_len
for j = 1: thres_no
if (q(k)>= c(j))
a(j) = a(j)+ 1;
hang(j)= 0;
elseif (hang(j)< I)
a(j)= a(j)+ 1;
hang(j)= hang(j)+ 1;
else
break;
end
end
end
actfact= 0;
asl_msq= 0;
if (a(1)== 0)
return;
else
AdB1= 10* log10( sq/ a(1)+ eps);
end
CdB1= 20* log10( c(1)+ eps);
if (AdB1- CdB1< M)
return;
end
AdB(1)= AdB1;
CdB(1)= CdB1;
Delta(1)= AdB1- CdB1;
for j= 2: thres_no
AdB(j)= 10* log10( sq/ (a(j)+ eps)+ eps);
CdB(j)= 20* log10( c(j)+ eps);
end
for j= 2: thres_no
if (a(j) ~= 0)
Delta(j)= AdB(j)- CdB(j);
if (Delta(j)<= M)
% interpolate to find the actfact
[asl_ms_log, cl0]= bin_interp( AdB(j), ...
AdB(j-1), CdB(j), CdB(j-1), M, 0.5);
asl_msq= 10^ (asl_ms_log/ 10); % this is the mean square value NOT the rms
actfact= (sq/ x_len)/ asl_msq; % this is the proportion of the time that the speech is deemed "active"
c0= 10^( cl0/ 20); % this is the threshold above which the speech is deemed "active".
break;
end
end
end
end
%--------------------------------------------------------------------------
function [asl_ms_log, cc]= bin_interp(upcount, lwcount, ...
upthr, lwthr, Margin, tol)
if (tol < 0)
tol = -tol;
end
% Check if extreme counts are not already the true active value
iterno = 1;
if (abs(upcount - upthr - Margin) < tol)
asl_ms_log= upcount;
cc= upthr;
return;
end
if (abs(lwcount - lwthr - Margin) < tol)
asl_ms_log= lwcount;
cc= lwthr;
return;
end
% Initialize first middle for given (initial) bounds
midcount = (upcount + lwcount) / 2.0;
midthr = (upthr + lwthr) / 2.0;
% Repeats loop until `diff' falls inside the tolerance (-tol<=diff<=tol)
while ( 1)
diff= midcount- midthr- Margin;
if (abs(diff)<= tol)
break;
end
% if tolerance is not met up to 20 iteractions, then relax the
% tolerance by 10%
iterno= iterno+ 1;
if (iterno>20)
tol = tol* 1.1;
end
if (diff> tol) % then new bounds are ...
midcount = (upcount + midcount) / 2.0;
% upper and middle activities
midthr = (upthr + midthr) / 2.0;
% ... and thresholds
elseif (diff< -tol) % then new bounds are ...
midcount = (midcount + lwcount) / 2.0;
% middle and lower activities
midthr = (midthr + lwthr) / 2.0;
% ... and thresholds
end
end
% Since the tolerance has been satisfied, midcount is selected
% as the interpolated value with a tol [dB] tolerance.
asl_ms_log= midcount;
cc= midthr;
end