Reverted to RMS from ITU due to issues with SNR calculation

This commit is contained in:
2020-01-13 14:02:13 +00:00
parent e955b746d1
commit fa21fe2cdb
7 changed files with 72 additions and 10 deletions
+1
View File
@@ -31,6 +31,7 @@ def asl_P56(x, fs, nbits):
H = 0.2 # hangover time in seconds H = 0.2 # hangover time in seconds
M = 15.9 # margin in dB of the difference between threshold and ASL M = 15.9 # margin in dB of the difference between threshold and ASL
thres_no = nbits-1 # number of thresholds, for 16 bit, it's 15 thres_no = nbits-1 # number of thresholds, for 16 bit, it's 15
c0 = None
I = np.ceil(fs*H) # hangover in samples I = np.ceil(fs*H) # hangover in samples
g = np.exp(-1/(fs*T)) # smoothing factor in envelop detection g = np.exp(-1/(fs*T)) # smoothing factor in envelop detection
+6 -3
View File
@@ -12,6 +12,7 @@ from shutil import copy2
import re import re
import sounddevice as sd import sounddevice as sd
from ITU_P56 import asl_P56 from ITU_P56 import asl_P56
from snrops import rms_no_silences
from hearing_loss_sim import apply_hearing_loss_sim from hearing_loss_sim import apply_hearing_loss_sim
from test_base import BaseThread, run_test_thread from test_base import BaseThread, run_test_thread
@@ -250,15 +251,17 @@ class EEGTestThread(BaseThread):
speech = audio[:, :2] speech = audio[:, :2]
triggers = audio[:, 2] triggers = audio[:, 2]
speech_rms, _, _ = asl_P56(speech, fs, 16.) #speech_rms, _, _ = asl_P56(speech, fs, 16.)
rms_no_silences(speech, fs, -30.)
wf = [] wf = []
wm = [] wm = []
for ind2, s in enumerate(snr): for ind2, s in enumerate(snr):
start = randint(0, noise_file.frames()-speech.shape[0]) start = randint(0, noise_file.frames()-speech.shape[0])
noise_file.seek(start) noise_file.seek(start)
noise = noise_file.read_frames(speech.shape[0]) noise = noise_file.read_frames(speech.shape[0])
#noise_rms = np.sqrt(np.mean(noise**2)) noise_rms = np.sqrt(np.mean(noise**2))
noise_rms = asl_P56(noise, fs, 16) # noise_rms = asl_P56(noise, fs, 16)
snr_fs = 10**(-s/20) snr_fs = 10**(-s/20)
if snr_fs == np.inf: if snr_fs == np.inf:
snr_fs = 0. snr_fs = 0.
+6 -2
View File
@@ -20,6 +20,7 @@ from pysndfile import PySndfile, sndio
import matplotlib.pyplot as plt import matplotlib.pyplot as plt
from ITU_P56 import asl_P56 from ITU_P56 import asl_P56
from pathlib import Path from pathlib import Path
from snrops import rms_no_silences
from multiprocessing.dummy import Pool as ThreadPool from multiprocessing.dummy import Pool as ThreadPool
import multiprocessing import multiprocessing
@@ -215,7 +216,8 @@ def gen_noise(OutDir, b, fs):
y = noise_norm_wav.read_frames(fs*60) y = noise_norm_wav.read_frames(fs*60)
y = y/(np.abs(y).max() * 0.95) y = y/(np.abs(y).max() * 0.95)
# rms = np.sqrt(np.mean(y**2)) # rms = np.sqrt(np.mean(y**2))
rms, _, _ = asl_P56(y, fs, 16) # rms, _, _ = asl_P56(y, fs, 16)
rms = rms_no_silences(y, fs, -30.)
print(f"Noise level: {rms}") print(f"Noise level: {rms}")
peak = np.abs(y).max() peak = np.abs(y).max()
@@ -235,7 +237,9 @@ def calc_speech_rms(files, silences, rmsDir, fs=44100, plot=False):
def level_calc(args): def level_calc(args):
ind, wavfile = args ind, wavfile = args
x, fs, _ = sndio.read(wavfile) x, fs, _ = sndio.read(wavfile)
level = asl_P56(x, fs, 16.)[0] # level = asl_P56(x, fs, 16.)[0]
level = rms_no_silences(x, fs, -30.)
print(f"Calculated level of {Path(wavfile).name} ({ind+1}/{n_files}): {level}") print(f"Calculated level of {Path(wavfile).name} ({ind+1}/{n_files}): {level}")
return level return level
+2 -3
View File
@@ -147,10 +147,9 @@ def gen_trigger(x, freq, length, fs):
def calc_rms(y, window, plot=False): def calc_rms(y, window, plot=False):
y_2 = y**2 y_2 = y**2
rms = np.zeros(y_2.size + round(window/2.)) y_2 = np.pad(y_2, (round(window/2.)-1, round(window/2.)), 'constant', constant_values=(0, 0))
y_i = rolling_window_lastaxis(y_2, window) y_i = rolling_window_lastaxis(y_2, window)
for ind, frame in enumerate(y_i): rms = np.sqrt(np.mean(y_i, axis=1))
rms[ind+round(window/2.)] = np.sqrt(np.mean(frame))
rms[np.isnan(rms)] = 0 rms[np.isnan(rms)] = 0
if plot: if plot:
plt.plot(y) plt.plot(y)
+7 -2
View File
@@ -19,6 +19,8 @@ from matrix_test.helper_modules.filesystem import globDir
from test_base import BaseThread from test_base import BaseThread
import sounddevice as sd import sounddevice as sd
import pdb import pdb
from ITU_P56 import asl_P56
from snrops import rms_no_silences
from config import socketio from config import socketio
from hearing_loss_sim import apply_hearing_loss_sim from hearing_loss_sim import apply_hearing_loss_sim
@@ -400,7 +402,8 @@ class MatTestThread(BaseThread):
# Read in audio file and calculate it's RMS # Read in audio file and calculate it's RMS
x, self.fs, _ = sndio.read(fp) x, self.fs, _ = sndio.read(fp)
logger.info(f"Calculating level for {Path(fp).name}") logger.info(f"Calculating level for {Path(fp).name}")
x_rms, _, _ = asl_P56(x, self.fs, 16.) # x_rms, _, _ = asl_P56(x, self.fs, 16.)
x_rms = rms_no_silences(x, self.fs, -30.)
self.lists[-1].append(x) self.lists[-1].append(x)
self.listsRMS[-1].append(x_rms) self.listsRMS[-1].append(x_rms)
self.listsString[-1].append(words) self.listsString[-1].append(words)
@@ -537,8 +540,10 @@ class AdaptiveTrack():
end = start + noiseLen end = start + noiseLen
self.noise.seek(start) self.noise.seek(start)
x_noise = self.noise.read_frames(end-start) x_noise = self.noise.read_frames(end-start)
# x_rms = np.sqrt(np.mean(x**2))
# Scale noise to match the RMS of the speech # Scale noise to match the RMS of the speech
x_noise *= x_rms/self.noise_rms noise_rms = np.sqrt(np.mean(x_noise**2))
x_noise *= x_rms/noise_rms
y = x_noise y = x_noise
# Set speech to start 500ms after the noise, scaled to the desired SNR # Set speech to start 500ms after the noise, scaled to the desired SNR
sigStart = random.randint(round(self.fs/2.), round(2*self.fs)) sigStart = random.randint(round(self.fs/2.), round(2*self.fs))
+11
View File
@@ -0,0 +1,11 @@
from pysndfile import sndio
from snrops import rms_no_silences
def main():
x, fs, enc = sndio.read('./matrix_test/behavioural_stim/stimulus/wav/sentence-lists/ukmatrix10.1/Trial_00001.wav')
rms = rms_no_silences(x, fs, -30)
breakpoint()
if __name__ == '__main__':
main()
+39
View File
@@ -0,0 +1,39 @@
import sys
sys.path.insert(0, "matrix_test/helper_modules")
import numpy as np
from signalops import rolling_window_lastaxis, calc_rms
def detect_silences(x, fs, threshold=-30.):
print("Detecting silence in wav files...")
if len(x.shape) < 2:
x = x[:, np.newaxis]
x = x.sum(axis=1)/2.
env = calc_rms(x, window=int(fs*0.1))
threshold = (10**(threshold/20.))*np.max(env)
silence = env < threshold
# Get segment start end indexes for all silences in envelope
sil_start = np.where(np.sign(np.diff(silence.astype(float))) == 1)[0]
sil_end = np.where(np.sign(np.diff(silence.astype(float))) == -1)[0]
if silence[0]:
sil_start = np.concatenate([[0], sil_start])
if silence[-1]:
sil_end = np.concatenate([sil_end, [env.size]])
segs = np.vstack([sil_start, sil_end]).T
validSegs = np.diff(segs) > 0.02*fs
segs = segs[np.repeat(validSegs, 2, axis=1)].reshape(-1, 2)
return segs
def slices_to_mask(slices, mask_length):
out = np.zeros(mask_length, dtype=bool)
for s in slices:
out[s[0]:s[1]] = True
return out
def rms_no_silences(x, fs, threshold):
silences = detect_silences(x, fs, threshold)
sil_mask = slices_to_mask(silences, x.size)
rms = np.sqrt(np.mean(x[~sil_mask]**2))
return rms