Reverted to RMS from ITU due to issues with SNR calculation
This commit is contained in:
@@ -31,6 +31,7 @@ def asl_P56(x, fs, nbits):
|
||||
H = 0.2 # hangover time in seconds
|
||||
M = 15.9 # margin in dB of the difference between threshold and ASL
|
||||
thres_no = nbits-1 # number of thresholds, for 16 bit, it's 15
|
||||
c0 = None
|
||||
|
||||
I = np.ceil(fs*H) # hangover in samples
|
||||
g = np.exp(-1/(fs*T)) # smoothing factor in envelop detection
|
||||
|
||||
+6
-3
@@ -12,6 +12,7 @@ from shutil import copy2
|
||||
import re
|
||||
import sounddevice as sd
|
||||
from ITU_P56 import asl_P56
|
||||
from snrops import rms_no_silences
|
||||
|
||||
from hearing_loss_sim import apply_hearing_loss_sim
|
||||
from test_base import BaseThread, run_test_thread
|
||||
@@ -250,15 +251,17 @@ class EEGTestThread(BaseThread):
|
||||
|
||||
speech = audio[:, :2]
|
||||
triggers = audio[:, 2]
|
||||
speech_rms, _, _ = asl_P56(speech, fs, 16.)
|
||||
#speech_rms, _, _ = asl_P56(speech, fs, 16.)
|
||||
rms_no_silences(speech, fs, -30.)
|
||||
|
||||
wf = []
|
||||
wm = []
|
||||
for ind2, s in enumerate(snr):
|
||||
start = randint(0, noise_file.frames()-speech.shape[0])
|
||||
noise_file.seek(start)
|
||||
noise = noise_file.read_frames(speech.shape[0])
|
||||
#noise_rms = np.sqrt(np.mean(noise**2))
|
||||
noise_rms = asl_P56(noise, fs, 16)
|
||||
noise_rms = np.sqrt(np.mean(noise**2))
|
||||
# noise_rms = asl_P56(noise, fs, 16)
|
||||
snr_fs = 10**(-s/20)
|
||||
if snr_fs == np.inf:
|
||||
snr_fs = 0.
|
||||
|
||||
@@ -20,6 +20,7 @@ from pysndfile import PySndfile, sndio
|
||||
import matplotlib.pyplot as plt
|
||||
from ITU_P56 import asl_P56
|
||||
from pathlib import Path
|
||||
from snrops import rms_no_silences
|
||||
|
||||
from multiprocessing.dummy import Pool as ThreadPool
|
||||
import multiprocessing
|
||||
@@ -215,7 +216,8 @@ def gen_noise(OutDir, b, fs):
|
||||
y = noise_norm_wav.read_frames(fs*60)
|
||||
y = y/(np.abs(y).max() * 0.95)
|
||||
# rms = np.sqrt(np.mean(y**2))
|
||||
rms, _, _ = asl_P56(y, fs, 16)
|
||||
# rms, _, _ = asl_P56(y, fs, 16)
|
||||
rms = rms_no_silences(y, fs, -30.)
|
||||
print(f"Noise level: {rms}")
|
||||
|
||||
peak = np.abs(y).max()
|
||||
@@ -235,7 +237,9 @@ def calc_speech_rms(files, silences, rmsDir, fs=44100, plot=False):
|
||||
def level_calc(args):
|
||||
ind, wavfile = args
|
||||
x, fs, _ = sndio.read(wavfile)
|
||||
level = asl_P56(x, fs, 16.)[0]
|
||||
# level = asl_P56(x, fs, 16.)[0]
|
||||
level = rms_no_silences(x, fs, -30.)
|
||||
|
||||
print(f"Calculated level of {Path(wavfile).name} ({ind+1}/{n_files}): {level}")
|
||||
return level
|
||||
|
||||
|
||||
@@ -147,10 +147,9 @@ def gen_trigger(x, freq, length, fs):
|
||||
|
||||
def calc_rms(y, window, plot=False):
|
||||
y_2 = y**2
|
||||
rms = np.zeros(y_2.size + round(window/2.))
|
||||
y_2 = np.pad(y_2, (round(window/2.)-1, round(window/2.)), 'constant', constant_values=(0, 0))
|
||||
y_i = rolling_window_lastaxis(y_2, window)
|
||||
for ind, frame in enumerate(y_i):
|
||||
rms[ind+round(window/2.)] = np.sqrt(np.mean(frame))
|
||||
rms = np.sqrt(np.mean(y_i, axis=1))
|
||||
rms[np.isnan(rms)] = 0
|
||||
if plot:
|
||||
plt.plot(y)
|
||||
|
||||
@@ -19,6 +19,8 @@ from matrix_test.helper_modules.filesystem import globDir
|
||||
from test_base import BaseThread
|
||||
import sounddevice as sd
|
||||
import pdb
|
||||
from ITU_P56 import asl_P56
|
||||
from snrops import rms_no_silences
|
||||
|
||||
from config import socketio
|
||||
from hearing_loss_sim import apply_hearing_loss_sim
|
||||
@@ -400,7 +402,8 @@ class MatTestThread(BaseThread):
|
||||
# Read in audio file and calculate it's RMS
|
||||
x, self.fs, _ = sndio.read(fp)
|
||||
logger.info(f"Calculating level for {Path(fp).name}")
|
||||
x_rms, _, _ = asl_P56(x, self.fs, 16.)
|
||||
# x_rms, _, _ = asl_P56(x, self.fs, 16.)
|
||||
x_rms = rms_no_silences(x, self.fs, -30.)
|
||||
self.lists[-1].append(x)
|
||||
self.listsRMS[-1].append(x_rms)
|
||||
self.listsString[-1].append(words)
|
||||
@@ -537,8 +540,10 @@ class AdaptiveTrack():
|
||||
end = start + noiseLen
|
||||
self.noise.seek(start)
|
||||
x_noise = self.noise.read_frames(end-start)
|
||||
# x_rms = np.sqrt(np.mean(x**2))
|
||||
# Scale noise to match the RMS of the speech
|
||||
x_noise *= x_rms/self.noise_rms
|
||||
noise_rms = np.sqrt(np.mean(x_noise**2))
|
||||
x_noise *= x_rms/noise_rms
|
||||
y = x_noise
|
||||
# Set speech to start 500ms after the noise, scaled to the desired SNR
|
||||
sigStart = random.randint(round(self.fs/2.), round(2*self.fs))
|
||||
|
||||
+11
@@ -0,0 +1,11 @@
|
||||
from pysndfile import sndio
|
||||
from snrops import rms_no_silences
|
||||
|
||||
def main():
|
||||
x, fs, enc = sndio.read('./matrix_test/behavioural_stim/stimulus/wav/sentence-lists/ukmatrix10.1/Trial_00001.wav')
|
||||
rms = rms_no_silences(x, fs, -30)
|
||||
breakpoint()
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
@@ -0,0 +1,39 @@
|
||||
import sys
|
||||
sys.path.insert(0, "matrix_test/helper_modules")
|
||||
import numpy as np
|
||||
from signalops import rolling_window_lastaxis, calc_rms
|
||||
|
||||
|
||||
def detect_silences(x, fs, threshold=-30.):
|
||||
print("Detecting silence in wav files...")
|
||||
if len(x.shape) < 2:
|
||||
x = x[:, np.newaxis]
|
||||
x = x.sum(axis=1)/2.
|
||||
env = calc_rms(x, window=int(fs*0.1))
|
||||
threshold = (10**(threshold/20.))*np.max(env)
|
||||
silence = env < threshold
|
||||
# Get segment start end indexes for all silences in envelope
|
||||
sil_start = np.where(np.sign(np.diff(silence.astype(float))) == 1)[0]
|
||||
sil_end = np.where(np.sign(np.diff(silence.astype(float))) == -1)[0]
|
||||
if silence[0]:
|
||||
sil_start = np.concatenate([[0], sil_start])
|
||||
if silence[-1]:
|
||||
sil_end = np.concatenate([sil_end, [env.size]])
|
||||
segs = np.vstack([sil_start, sil_end]).T
|
||||
validSegs = np.diff(segs) > 0.02*fs
|
||||
segs = segs[np.repeat(validSegs, 2, axis=1)].reshape(-1, 2)
|
||||
return segs
|
||||
|
||||
|
||||
def slices_to_mask(slices, mask_length):
|
||||
out = np.zeros(mask_length, dtype=bool)
|
||||
for s in slices:
|
||||
out[s[0]:s[1]] = True
|
||||
return out
|
||||
|
||||
|
||||
def rms_no_silences(x, fs, threshold):
|
||||
silences = detect_silences(x, fs, threshold)
|
||||
sil_mask = slices_to_mask(silences, x.size)
|
||||
rms = np.sqrt(np.mean(x[~sil_mask]**2))
|
||||
return rms
|
||||
Reference in New Issue
Block a user