306 lines
10 KiB
Python
Executable File
306 lines
10 KiB
Python
Executable File
#!/usr/bin/env python3
|
|
# -*- coding: utf-8 -*-
|
|
import sys
|
|
#sys.path.insert(0, "../helper_modules")
|
|
#sys.path.insert(0, "../matrix_test/helper_modules/")
|
|
sys.path.insert(0, "../../")
|
|
sys.path.insert(0, "../helper_modules")
|
|
|
|
import argparse
|
|
import os
|
|
import errno
|
|
import shutil
|
|
import re
|
|
import fnmatch
|
|
import pdb
|
|
import numpy as np
|
|
from natsort import natsorted
|
|
from collections import namedtuple
|
|
from pysndfile import PySndfile, sndio
|
|
import matplotlib.pyplot as plt
|
|
from ITU_P56 import asl_P56
|
|
from pathlib import Path
|
|
|
|
from multiprocessing.dummy import Pool as ThreadPool
|
|
import multiprocessing
|
|
|
|
from pathops import dir_must_exist
|
|
from signalops import rolling_window_lastaxis, block_lfilter, calc_rms, block_process_wav
|
|
|
|
import scipy.signal as sgnl
|
|
from scipy.stats import pearsonr
|
|
|
|
|
|
from lpc import lpc
|
|
|
|
from filesystem import globDir, organiseWavs, prepareOutDir
|
|
|
|
|
|
def block_lfilter_wav(b, a, x, outfile, fmt, fs, blocksize=8192):
|
|
'''
|
|
Filter 1D signal in blocks. For use with large signals
|
|
'''
|
|
new_state = np.zeros(b.size-1)
|
|
sndfile = PySndfile(outfile, 'w', fmt, 1, fs)
|
|
i = 0
|
|
y_out = np.zeros(x.size)
|
|
y_max = 0.0
|
|
while i < x.size:
|
|
print("Filtering {0} to {1} of {2}".format(i, i+blocksize, x.size))
|
|
if i+blocksize > x.size:
|
|
y, new_state = sgnl.lfilter(b,a,x[i:-1], zi=new_state)
|
|
sndfile.write_frames(y)
|
|
y_out[i:i+y.size] = y
|
|
else:
|
|
y, new_state = sgnl.lfilter(b,a,x[i:i+blocksize], zi=new_state)
|
|
sndfile.write_frames(y)
|
|
y_out[i:i+y.size] = y
|
|
y_max = np.max([y_max, np.abs(y).max()])
|
|
i += blocksize
|
|
return y_out, y_max
|
|
|
|
|
|
def synthesize_trial(wavFileMatrix, indexes):
|
|
'''
|
|
Using the matrix of alternative words and the selected words for each
|
|
column, generate samples from audio files
|
|
Returns an array of samples generated by concatenating the selected audio
|
|
files
|
|
'''
|
|
columnNames = ['a', 'b', 'c', 'd', 'e']
|
|
indexes = np.pad(indexes, ((0, 1)), 'constant', constant_values=0)
|
|
indexes = rolling_window_lastaxis(indexes, 2)
|
|
offset = 10
|
|
y = np.array([])
|
|
filenames = []
|
|
for name, ind in zip(columnNames, indexes):
|
|
if name == 'e':
|
|
offset = 1
|
|
wavFilename, wavFilepath = wavFileMatrix[name][(ind[0]*offset)+ind[1]]
|
|
wav = PySndfile(wavFilepath)
|
|
fs = wav.samplerate()
|
|
x = wav.read_frames()
|
|
y = np.append(y, x)
|
|
filenames.append(wavFilename)
|
|
return (y, {'rate': fs, 'format': wav.major_format_str(), 'enc': wav.encoding_str()}, filenames)
|
|
|
|
|
|
def gen_audio_stim(MatrixDir, OutDir, indexes):
|
|
if os.path.exists(OutDir):
|
|
shutil.rmtree(OutDir)
|
|
os.makedirs(OutDir)
|
|
wavFiles = globDir(MatrixDir, '*.wav')
|
|
wavFileMatrix = organiseWavs(wavFiles)
|
|
wavDir = os.path.join(OutDir, "wav")
|
|
dir_must_exist(wavDir)
|
|
wavDir = os.path.join(wavDir, "noise-sentences")
|
|
dir_must_exist(wavDir)
|
|
files = []
|
|
n = 0
|
|
o = 0
|
|
for sentenceList in indexes:
|
|
n += 1
|
|
o = 0
|
|
files.append([])
|
|
for ind in sentenceList:
|
|
o += 1
|
|
y, wavInfo, partnames = synthesize_trial(wavFileMatrix, ind)
|
|
fileName = os.path.join(wavDir, 'Trial_{0:02d}_{1:02d}.wav'.format(n, o))
|
|
print("Generating: " + fileName)
|
|
sndio.write(fileName, y, **wavInfo)
|
|
files[-1].append(fileName)
|
|
|
|
return files
|
|
|
|
def gen_indexes():
|
|
x = np.repeat(np.arange(10), 5)
|
|
x = x.reshape(10, 5)
|
|
|
|
y = np.zeros((50, 10, 5), dtype=int)
|
|
|
|
# 50 lists
|
|
for i in range(50):
|
|
x[:, 1] = np.roll(x[:, 1], 1)
|
|
x[:, 2] = np.roll(x[:, 2], 2)
|
|
x[:, 3] = np.roll(x[:, 3], 3)
|
|
x[:, 4] = np.roll(x[:, 4], 4)
|
|
y[i] = x.copy()
|
|
return y
|
|
|
|
def gen_rms(files, OutDir):
|
|
rmsFiles = []
|
|
OutPeakDir = './stimulus/peak'
|
|
for sentenceList in files:
|
|
for file in sentenceList:
|
|
head, tail = os.path.split(file)
|
|
tail = os.path.splitext(tail)[0]
|
|
tail_rms = tail + "_rms.npy"
|
|
dir_must_exist(OutDir)
|
|
rmsFilepath = os.path.join(OutDir, tail_rms)
|
|
print("Generating: "+rmsFilepath)
|
|
y, fs, _ = sndio.read(file)
|
|
y_rms = calc_rms(y, round(0.02*fs))
|
|
np.save(rmsFilepath, y_rms)
|
|
rmsFiles.append(rmsFilepath)
|
|
|
|
y, fs, _ = sndio.read(file)
|
|
tail_peak = tail + "_peak.npy"
|
|
dir_must_exist(OutPeakDir)
|
|
peakFilepath = os.path.join(OutPeakDir, tail_peak)
|
|
print("Generating: "+peakFilepath)
|
|
peak = np.abs(y).max()
|
|
np.save(peakFilepath, peak)
|
|
return rmsFiles
|
|
|
|
def detect_silences(rmsFiles, fs):
|
|
print("Detecting silence in wav files...")
|
|
silences = []
|
|
for envelopeFile in rmsFiles:
|
|
env = np.load(envelopeFile)
|
|
silence = env < 0.001
|
|
# Get segment start end indexes for all silences in envelope
|
|
silentSegs = np.where(np.concatenate(([silence[0]],silence[:-1]!=silence[1:],[True])))[0].reshape(-1, 2)
|
|
validSegs = np.diff(silentSegs) > 0.02*fs
|
|
silences.append(silentSegs[np.repeat(validSegs, 2, axis=1)].reshape(-1, 2))
|
|
return silences
|
|
|
|
|
|
def calc_spectrum(files, silences, fs=44100, plot=False):
|
|
window = 4096
|
|
sentenceLen = []
|
|
sentenceFFT = []
|
|
|
|
print("Calculating LTASS...")
|
|
for ind, sentenceList in enumerate(files):
|
|
for ind2, file in enumerate(sentenceList):
|
|
x, fs, _ = sndio.read(file)
|
|
f, t, Zxx = sgnl.stft(x, window=np.ones(window), nperseg=window, noverlap=0)
|
|
sil = silences[ind*10+ind2]
|
|
sTemp = np.zeros((sil.shape[0], t.size), dtype=bool)
|
|
for ind3, s in enumerate(sil):
|
|
sTemp[ind3, :] = np.logical_and(t > s[0], t < s[1])
|
|
invalidFFT = np.any(sTemp, axis=0)
|
|
sentenceFFT.append(np.abs(Zxx[:, ~np.any(sTemp, axis=0)]))
|
|
sentenceLen.append(x.size)
|
|
sentenceLen = np.array([sentenceLen]).T
|
|
sentenceLen = sentenceLen / sentenceLen.max()
|
|
sentenceFFT = [x * sentenceLen[i] for i, x in enumerate(sentenceFFT)]
|
|
sentenceFFT = np.concatenate([x.T for x in sentenceFFT])
|
|
|
|
grandAvgFFT = np.mean(sentenceFFT, axis=0)
|
|
grandAvgFFT = grandAvgFFT / grandAvgFFT.max()
|
|
print("Fitting filter to LTASS...")
|
|
b = sgnl.firls(2049, np.linspace(0, 1, 2049)[1:], grandAvgFFT[1:])
|
|
if plot:
|
|
plt.semilogy(np.abs(sgnl.freqz(b)[1]))
|
|
plt.plot(np.linspace(0, 512, 2049), grandAvgFFT)
|
|
plt.show()
|
|
return b
|
|
|
|
|
|
def gen_noise(OutDir, b, fs):
|
|
print("Generating noise...")
|
|
# Generate 10 minutes of white noise
|
|
x = np.random.randn(int(fs*60.*5.))
|
|
x /= x.max()
|
|
noiseDir = os.path.join(OutDir, 'wav')
|
|
noiseRMSDir = os.path.join(OutDir, 'rms')
|
|
dir_must_exist(noiseDir)
|
|
noiseDir = os.path.join(noiseDir, 'noise')
|
|
dir_must_exist(noiseDir)
|
|
y, y_max = block_lfilter_wav(b, [1.0], x, os.path.join(noiseDir, 'noise.wav'), 65538, 44100)
|
|
block_process_wav(os.path.join(noiseDir, 'noise.wav'), os.path.join(noiseDir, 'noise_norm.wav'), lambda x: x / (y_max * 1.05))
|
|
noise_norm_wav = PySndfile(os.path.join(noiseDir, 'noise_norm.wav'), 'r')
|
|
noise_rms_path = os.path.join(noiseRMSDir, 'noise_rms.npy')
|
|
y = noise_norm_wav.read_frames(fs*60)
|
|
y = y/(np.abs(y).max() * 0.95)
|
|
# rms = np.sqrt(np.mean(y**2))
|
|
rms, _, _ = asl_P56(y, fs, 16)
|
|
print(f"Noise level: {rms}")
|
|
|
|
peak = np.abs(y).max()
|
|
np.save(noise_rms_path, rms)
|
|
np.save('./stimulus/peak/noise_peak.npy', peak)
|
|
return y
|
|
|
|
|
|
def calc_speech_rms(files, silences, rmsDir, fs=44100, plot=False):
|
|
'''
|
|
'''
|
|
files = files[:3]
|
|
#silences = silences[:3]
|
|
f = sum(files, [])
|
|
n_files = len(f)
|
|
#for ind, (wavfile, sil) in enumerate(zip(f, silences)):
|
|
def level_calc(args):
|
|
ind, wavfile = args
|
|
x, fs, _ = sndio.read(wavfile)
|
|
level = asl_P56(x, fs, 16.)[0]
|
|
print(f"Calculated level of {Path(wavfile).name} ({ind+1}/{n_files}): {level}")
|
|
return level
|
|
|
|
# Make the Pool of workers
|
|
pool = ThreadPool(multiprocessing.cpu_count()-1)
|
|
# Open the urls in their own threads
|
|
# and return the results
|
|
levels = pool.map(level_calc, enumerate(f))
|
|
#close the pool and wait for the work to finish
|
|
pool.close()
|
|
pool.join()
|
|
rms = np.mean(levels)
|
|
|
|
# f = sum(files, [])
|
|
# sumsqrd = 0.0
|
|
# n = 0
|
|
# for wavfile, sil in zip(f, silences):
|
|
# y, fs, _ = sndio.read(wavfile)
|
|
# t = np.arange(y.size)
|
|
# sTemp = np.zeros((sil.shape[0], t.size), dtype=bool)
|
|
# for ind3, s in enumerate(sil):
|
|
# sTemp[ind3, :] = np.logical_and(t > s[0], t < s[1])
|
|
# silentSamples = np.any(sTemp, axis=0)
|
|
# y_temp = y[~silentSamples]
|
|
# sumsqrd += np.sum(y_temp**2)
|
|
# n += y_temp.size
|
|
# rms = np.sqrt(sumsqrd/n)
|
|
#np.save(os.path.join(rmsDir, 'overall_speech_rms.npy'), rms)
|
|
return rms
|
|
#sentenceFFT.append(np.abs(Zxx[:, ~np.any(sTemp, axis=0)]))
|
|
|
|
|
|
if __name__ == "__main__":
|
|
from pathtype import PathType
|
|
# Create commandline interface
|
|
parser = argparse.ArgumentParser(description='Generate stimulus for '
|
|
'training TRF decoder by concatenating '
|
|
'matrix test materials')
|
|
parser.add_argument('--MatrixDir', type=PathType(exists=True, type='dir'),
|
|
default='../speech_components',
|
|
help='Matrix test speech data location')
|
|
parser.add_argument('--OutDir', type=PathType(exists=None, type='dir'),
|
|
default='./stimulus', help='Output directory')
|
|
parser.add_argument('--CalcRMS', action='store_true')
|
|
args = {k:v for k,v in vars(parser.parse_args()).items() if v is not None}
|
|
|
|
rmsDir = os.path.join(args['OutDir'], "rms")
|
|
if args['CalcRMS']:
|
|
indexes = gen_indexes()
|
|
wavFiles = gen_audio_stim(args['MatrixDir'], args['OutDir'], indexes)
|
|
rmsFiles = gen_rms(wavFiles, rmsDir)
|
|
else:
|
|
wavDir = os.path.join(args['OutDir'], "wav")
|
|
dir_must_exist(wavDir)
|
|
wavFiles = globDir(wavDir, '*.wav')
|
|
wf = []
|
|
for listInd in range(50):
|
|
wf.append([])
|
|
for sentenceInd in range(10):
|
|
wf[listInd].append(wavFiles[listInd*10+sentenceInd])
|
|
wavFiles = wf
|
|
|
|
rmsFiles = globDir(rmsDir, 'Trial*.npy')
|
|
silences = detect_silences(rmsFiles, 44100)
|
|
s_rms = calc_speech_rms(wavFiles, silences, rmsDir)
|
|
b = calc_spectrum(wavFiles, silences)
|
|
y = gen_noise(args['OutDir'], b, 44100)
|