Last minute fixes

This commit is contained in:
2016-04-12 19:07:57 +01:00
parent 5ee8165525
commit 9c4de3e9c2
5 changed files with 110 additions and 74 deletions
+20 -3
View File
@@ -37,11 +37,24 @@ class F0Analysis(Analysis):
self.nyquist_rate = self.AnalysedAudioFile.samplerate / 2.
if config:
self.window_size = config.f0["window_size"]
self.overlap = 1. / config.f0["overlap"]
else:
self.window_size=512
self.overlap = 0.5
self.analysis_group = analysis_group
frames = self.AnalysedAudioFile.read_grain()
self.logger.info("Creating F0 analysis for {0}".format(self.AnalysedAudioFile.name))
self.create_analysis(frames, self.AnalysedAudioFile.samplerate)
self.create_analysis(
frames,
self.AnalysedAudioFile.samplerate,
window_size=self.window_size,
overlapFac=self.overlap,
threshold=config.f0["ratio_threshold"]
)
def get_analysis_grains(self, start, end):
"""
@@ -66,6 +79,7 @@ class F0Analysis(Analysis):
samplerate,
window_size=512,
overlapFac=0.5,
threshold=0.0,
m0=None,
M=None,
):
@@ -76,7 +90,7 @@ class F0Analysis(Analysis):
of the audio file and save to disk.
"""
if not M:
M=round(0.016*samplerate)
M=int(round(0.016*samplerate))
hopSize = int(window_size - np.floor(overlapFac * window_size))
@@ -154,7 +168,8 @@ class F0Analysis(Analysis):
f0 = np.nan
return f0, HR
R=autocorr([frames])[0]
#R=autocorr([frames])[0]
R = np.correlate(frames, frames, mode='full')
g=R[frames.size]
R=R[frames.size-1:]
@@ -201,6 +216,8 @@ class F0Analysis(Analysis):
samplerate/2))
if HR >= 1:
HR = 1
if HR < threshold:
HR = np.nan
return (f0, HR)
output = np.apply_along_axis(per_frame_f0, 1, frames, m0, M)
-1
View File
@@ -40,7 +40,6 @@ class RMSAnalysis(Analysis):
self.AnalysedAudioFile = AnalysedAudioFile
if config:
# TODO: create case for when config isn't present.
self.window_size = config.rms["window_size"] * self.AnalysedAudioFile.samplerate / 1000
self.overlap = 1. / config.rms["overlap"]
else:
+1 -1
View File
@@ -87,7 +87,7 @@ class AudioFile(object):
def __enter__(self):
"""Allow AudioFile object to be opened by 'with' statements"""
self.logger.info("Opening soundfile {0}".format(self.filepath))
self.logger.debug("Opening soundfile {0}".format(self.filepath))
if self.mode == 'r':
if not os.path.exists(self.filepath):
raise IOError(
+32 -26
View File
@@ -1,30 +1,36 @@
# Specify analysis parameters for root mean square analysis.
rms = {
"window_size": 120,
"overlap": 2,
"window_size": 100,
"overlap": 8,
}
f0 = {
"window_size": 2048,
"overlap": 8,
"ratio_threshold": 0.0
}
# Specify analysis parameters for variance analysis.
variance = {
"window_size": 120,
"overlap": 2
"window_size": 100,
"overlap": 8
}
# Specify analysis parameters for temporal kurtosis analysis.
kurtosis = {
"window_size": 120,
"overlap": 2
"window_size": 100,
"overlap": 8
}
# Specify analysis parameters for temporal skewness analysis.
skewness = {
"window_size": 120,
"overlap": 2
"window_size": 100,
"overlap": 8
}
# Specify analysis parameters for FFT analysis.
fft = {
"window_size": 65536
"window_size": 2048
}
database = {
@@ -36,20 +42,20 @@ database = {
# Sets the weighting for each analysis. a higher weighting gives an analysis
# higher presendence when finding the best matches.
matcher_weightings = {
"f0" : 1.,
"f0" : 2.,
"spccntr" : 1.,
"spcsprd" : 1.,
"spcflux" : 1.,
"spccf" : 1.,
"spcflatness": 1.,
"zerox" : 1.,
"rms" : 1.,
"peak": 1.,
"spcsprd" : 2.,
"spcflux" : 2.,
"spccf" : 2.,
"spcflatness": 3.,
"zerox" : 0.,
"rms" : 0,
"peak": 0.,
"centroid": 1.,
"kurtosis": 1.,
"skewness": 1.,
"variance": 1.,
"harm_ratio": 1.
"variance": 2.,
"harm_ratio": 5.
}
# Specifies the method for averaging analysis frames to create a single value
@@ -81,9 +87,9 @@ analysis = {
matcher = {
# Force the re-matching of analyses
"rematch": True,
"grain_size": 120,
"overlap": 2,
"rematch": False,
"grain_size": 100,
"overlap": 8,
# Defines the number of matches to keep for synthesis. Note that this must
# also be specified in the synthesis config
"match_quantity": 20,
@@ -95,16 +101,16 @@ matcher = {
synthesizer = {
# Artificially scale the output grain by the difference in RMS values
# between source and target.
"enforce_rms": True,
"enforce_intensity": True,
# Specify the ratio limit that is the grain can be scaled by.
"enf_rms_ratio_limit": 100.,
"enf_intensity_ratio_limit": 5.,
# Artificially modify the pitch by the difference in f0 values between
# source and target.
"enforce_f0": True,
# Specify the ratio limit that is the grain can be modified by.
"enf_f0_ratio_limit": 10.,
"grain_size": 120,
"overlap": 2,
"grain_size": 100,
"overlap": 8,
# Normalize output, avoid clipping of final output by scaling the final
# frames.
"normalize" : True,
+57 -43
View File
@@ -400,6 +400,9 @@ class Matcher:
weightings = {x: 1. for x in self.matcher_analyses}
# Create an imputer object for handeling Nan values.
imp = Imputer(axis=0, strategy='median')
for tind, target_entry in enumerate(self.target_db.analysed_audio):
# Check if match data already exists and use it rather than
# regenerating if it does.
@@ -426,12 +429,11 @@ class Matcher:
all_target_analyses[i] = target_data
pdb.set_trace()
imp = Imputer(axis=0)
nan_columns = np.all(np.isnan(all_target_analyses), axis=0)
all_target_analyses[:, nan_columns] = 0.
# Impute values for Nans
all_target_analyses = imp.fit_transform(all_target_analyses)
# all_target_analyses[np.isnan(all_target_analyses)] = np.inf
for sind, source_entry in enumerate(self.source_db.analysed_audio):
self.logger.info("K-d Tree Matching: {0} to {1}".format(source_entry.name, target_entry.name))
@@ -455,6 +457,8 @@ class Matcher:
all_source_analyses[:, nan_columns] = 0.
all_source_analyses = imp.fit_transform(all_source_analyses)
# all_source_analyses[np.isnan(all_source_analyses)] = np.inf
source_tree = spatial.cKDTree(all_source_analyses.T, leafsize=100)
results_vals, results_inds = source_tree.query(all_target_analyses.T, k=self.match_quantity, p=2)
@@ -730,7 +734,9 @@ class Matcher:
)
if not np.all(np.any(x, axis=1)):
pdb.set_trace()
raise ValueError("Not all match indexes have a corresponding sample index. This shouldn't happen...")
x = x.reshape(mi_shape[0], mi_shape[1], x.shape[1])
x = np.argmax(x, axis=2)
@@ -760,10 +766,10 @@ class Synthesizer:
self.config = kwargs.pop("config", None)
self.enforce_rms_bool = self.config.synthesizer["enforce_rms"]
self.enforce_intensity_bool = self.config.synthesizer["enforce_intensity"]
# Key word arguments overwrite config file.
self.enforce_rms_bool = kwargs.pop("enforce_rms", self.enforce_rms_bool)
if self.enforce_rms_bool and ("rms" not in self.target_db.analysis_list or "rms" not in self.match_db.analysis_list):
self.enforce_intensity_bool = kwargs.pop("enforce_intensity", self.enforce_intensity_bool)
if self.enforce_intensity_bool and ("rms" not in self.target_db.analysis_list or "rms" not in self.match_db.analysis_list):
raise RuntimeError("BLARGHHH")
self.enforce_f0_bool = self.config.synthesizer["enforce_f0"]
@@ -772,7 +778,7 @@ class Synthesizer:
if self.enforce_f0_bool and ("f0" not in self.target_db.analysis_list or "f0" not in self.match_db.analysis_list):
raise RuntimeError("F0 enforcement cannot be enabled if both databases do not have F0 analyses.")
if self.enforce_rms:
if self.enforce_intensity:
if not self.target_db:
raise ValueError("Target database must be provided if rms or F0 enforcement is enabled.")
@@ -820,6 +826,17 @@ class Synthesizer:
match_index = np.random.randint(matches.shape[0])
match_db_ind, match_grain_ind = matches[match_index]
with self.match_db.analysed_audio[match_db_ind] as match_sample:
self.logger.info("Synthesizing grain:\n"
"Source sample: {0}\n"
"Source grain index: {1}\n"
"Target output: {2}\n"
"Target grain index: {3} out of {4}".format(
match_sample,
match_grain_ind,
output_name,
target_grain_ind,
len(grain_matches)
))
match_sample.generate_grain_times(match_grain_size, match_overlap, save_times=True)
# TODO: Make proper fix for grain index offset of 1
@@ -828,7 +845,8 @@ class Synthesizer:
except:
pdb.set_trace()
if self.enforce_rms_bool:
if self.enforce_intensity_bool:
# Get the target sample from the database
target_sample = self.target_db[job_ind]
@@ -836,7 +854,7 @@ class Synthesizer:
# indexing.
target_sample.generate_grain_times(match_grain_size, match_overlap, save_times=True)
match_grain = self.enforce_rms(match_grain, match_sample, match_grain_ind, target_sample, target_grain_ind)
match_grain = self.enforce_intensity(match_grain, match_sample, match_grain_ind, target_sample, target_grain_ind)
if self.enforce_f0_bool:
# Get the target sample from the database
@@ -848,6 +866,7 @@ class Synthesizer:
match_grain = self.enforce_pitch(match_grain, match_sample, match_grain_ind, target_sample, target_grain_ind)
# Apply hanning window to grain
match_grain *= np.hanning(match_grain.size)
output_frames[offset:offset+match_grain.size] += match_grain
offset += hop_size
@@ -867,6 +886,10 @@ class Synthesizer:
# TODO: Make proper fix for grain index offset of 1
target_times = target_sample.times[target_grain_ind-1]
# Get mean harmonic ratio of f0 frames in time range specified.
target_harmonic_ratio = target_sample.analysis_data_grains(target_times, "harm_ratio", format="mean")[0][0]
# Get mean of f0 frames in time range specified.
target_f0 = target_sample.analysis_data_grains(target_times, "f0", format="median")[0][0]
@@ -874,6 +897,13 @@ class Synthesizer:
# TODO: Make proper fix for grain index offset of 1
source_times = source_sample.times[source_grain_ind-1]
# Get mean harmonic ratio of f0 frames in time range specified.
source_harmonic_ratio = source_sample.analysis_data_grains(source_times, "harm_ratio", format="mean")[0][0]
hr_array = np.array([source_harmonic_ratio, target_harmonic_ratio])
if np.any(np.isnan(hr_array)):
return grain
# Get mean of f0 frames in time range specified.
source_f0 = source_sample.analysis_data_grains(source_times, "f0", format="median")[0][0]
@@ -886,31 +916,15 @@ class Synthesizer:
ratio_limit = self.config.synthesizer["enf_f0_ratio_limit"]
if ratio_difference > ratio_limit:
self.logger.warning("Grain f0 ratio too large({0}), enforcing f0 at limit ({1})\n"
"Source sample: {2}\n"
"Source grain index: {3}\n"
"Target sample: {4}\n"
"Target grain index: {5}".format(
self.logger.warning("Grain f0 ratio too large({0}), enforcing f0 at limit ({1})".format(
ratio_difference,
ratio_limit,
source_sample,
source_grain_ind,
target_sample,
target_grain_ind
))
ratio_difference = ratio_limit
elif ratio_difference < 1./ratio_limit:
self.logger.warning("Grain f0 ratio too large ({0}), enforcing f0 at limit ({1})\n"
"Source sample: {2}\n"
"Source grain index: {3}\n"
"Target sample: {4}\n"
"Target grain index: {5}".format(
self.logger.warning("Grain f0 ratio too large ({0}), enforcing f0 at limit ({1})".format(
ratio_difference,
1./ratio_limit,
source_sample,
source_grain_ind,
target_sample,
target_grain_ind
))
ratio_difference = 1./ratio_limit
@@ -918,11 +932,11 @@ class Synthesizer:
return grain
def enforce_rms(self, grain, source_sample, source_grain_ind, target_sample, target_grain_ind):
def enforce_intensity(self, grain, source_sample, source_grain_ind, target_sample, target_grain_ind):
"""
Scales the amplitude of the grain by the difference between it's rms and the rms of the grain specified.
Scales the amplitude of the grain by the difference between it's intensity and the intensity of the grain specified.
This method will fail if either AnalysedAudioFile object does not have an rms analysis.
This method will fail if either AnalysedAudioFile object does not have any intensity analyses.
"""
# Get grain start and finish range to retreive analysis frames from.
@@ -931,6 +945,9 @@ class Synthesizer:
# Get mean of RMS frames in time range specified.
target_rms = target_sample.analysis_data_grains(target_times, "rms", format="mean")[0][0]
target_peak = target_sample.analysis_data_grains(target_times, "peak", format="mean")[0][0]
target_intensity_value = np.mean([target_rms, target_peak])
# Get grain start and finish range to retreive analysis frames from.
# TODO: Make proper fix for grain index offset of 1
@@ -938,26 +955,23 @@ class Synthesizer:
# Get mean of RMS frames in time range specified.
source_rms = source_sample.analysis_data_grains(source_times, "rms", format="mean")[0][0]
source_peak = source_sample.analysis_data_grains(source_times, "peak", format="mean")[0][0]
source_intensity_value = np.mean([source_rms, source_peak])
ratio_difference = target_intensity_value / source_intensity_value
ratio_difference = target_rms / source_rms
if not np.isfinite(ratio_difference):
return grain
# If the ratio difference is within the limits
ratio_limit = self.config.synthesizer["enf_rms_ratio_limit"]
ratio_limit = self.config.synthesizer["enf_intensity_ratio_limit"]
if ratio_difference > ratio_limit:
self.logger.warning("Grain RMS ratio too large({0}), enforcing RMS at limit ({1})\n"
"Source sample: {2}\n"
"Source grain index: {3}\n"
"Target sample: {4}\n"
"Target grain index: {5}".format(
ratio_difference,
ratio_limit,
source_sample,
source_grain_ind,
target_sample,
target_grain_ind
))
self.logger.warning(
"Grain RMS ratio too large({0}), enforcing RMS at limit ({1})\n".format(
ratio_difference,
ratio_limit,
))
ratio_difference = ratio_limit
grain *= ratio_difference