Last minute fixes
This commit is contained in:
@@ -37,11 +37,24 @@ class F0Analysis(Analysis):
|
||||
|
||||
self.nyquist_rate = self.AnalysedAudioFile.samplerate / 2.
|
||||
|
||||
if config:
|
||||
self.window_size = config.f0["window_size"]
|
||||
self.overlap = 1. / config.f0["overlap"]
|
||||
else:
|
||||
self.window_size=512
|
||||
self.overlap = 0.5
|
||||
|
||||
self.analysis_group = analysis_group
|
||||
frames = self.AnalysedAudioFile.read_grain()
|
||||
self.logger.info("Creating F0 analysis for {0}".format(self.AnalysedAudioFile.name))
|
||||
|
||||
self.create_analysis(frames, self.AnalysedAudioFile.samplerate)
|
||||
self.create_analysis(
|
||||
frames,
|
||||
self.AnalysedAudioFile.samplerate,
|
||||
window_size=self.window_size,
|
||||
overlapFac=self.overlap,
|
||||
threshold=config.f0["ratio_threshold"]
|
||||
)
|
||||
|
||||
def get_analysis_grains(self, start, end):
|
||||
"""
|
||||
@@ -66,6 +79,7 @@ class F0Analysis(Analysis):
|
||||
samplerate,
|
||||
window_size=512,
|
||||
overlapFac=0.5,
|
||||
threshold=0.0,
|
||||
m0=None,
|
||||
M=None,
|
||||
):
|
||||
@@ -76,7 +90,7 @@ class F0Analysis(Analysis):
|
||||
of the audio file and save to disk.
|
||||
"""
|
||||
if not M:
|
||||
M=round(0.016*samplerate)
|
||||
M=int(round(0.016*samplerate))
|
||||
|
||||
hopSize = int(window_size - np.floor(overlapFac * window_size))
|
||||
|
||||
@@ -154,7 +168,8 @@ class F0Analysis(Analysis):
|
||||
f0 = np.nan
|
||||
return f0, HR
|
||||
|
||||
R=autocorr([frames])[0]
|
||||
#R=autocorr([frames])[0]
|
||||
R = np.correlate(frames, frames, mode='full')
|
||||
g=R[frames.size]
|
||||
|
||||
R=R[frames.size-1:]
|
||||
@@ -201,6 +216,8 @@ class F0Analysis(Analysis):
|
||||
samplerate/2))
|
||||
if HR >= 1:
|
||||
HR = 1
|
||||
if HR < threshold:
|
||||
HR = np.nan
|
||||
return (f0, HR)
|
||||
|
||||
output = np.apply_along_axis(per_frame_f0, 1, frames, m0, M)
|
||||
|
||||
@@ -40,7 +40,6 @@ class RMSAnalysis(Analysis):
|
||||
self.AnalysedAudioFile = AnalysedAudioFile
|
||||
|
||||
if config:
|
||||
# TODO: create case for when config isn't present.
|
||||
self.window_size = config.rms["window_size"] * self.AnalysedAudioFile.samplerate / 1000
|
||||
self.overlap = 1. / config.rms["overlap"]
|
||||
else:
|
||||
|
||||
@@ -87,7 +87,7 @@ class AudioFile(object):
|
||||
|
||||
def __enter__(self):
|
||||
"""Allow AudioFile object to be opened by 'with' statements"""
|
||||
self.logger.info("Opening soundfile {0}".format(self.filepath))
|
||||
self.logger.debug("Opening soundfile {0}".format(self.filepath))
|
||||
if self.mode == 'r':
|
||||
if not os.path.exists(self.filepath):
|
||||
raise IOError(
|
||||
|
||||
+32
-26
@@ -1,30 +1,36 @@
|
||||
# Specify analysis parameters for root mean square analysis.
|
||||
rms = {
|
||||
"window_size": 120,
|
||||
"overlap": 2,
|
||||
"window_size": 100,
|
||||
"overlap": 8,
|
||||
}
|
||||
|
||||
f0 = {
|
||||
"window_size": 2048,
|
||||
"overlap": 8,
|
||||
"ratio_threshold": 0.0
|
||||
}
|
||||
|
||||
# Specify analysis parameters for variance analysis.
|
||||
variance = {
|
||||
"window_size": 120,
|
||||
"overlap": 2
|
||||
"window_size": 100,
|
||||
"overlap": 8
|
||||
}
|
||||
|
||||
# Specify analysis parameters for temporal kurtosis analysis.
|
||||
kurtosis = {
|
||||
"window_size": 120,
|
||||
"overlap": 2
|
||||
"window_size": 100,
|
||||
"overlap": 8
|
||||
}
|
||||
|
||||
# Specify analysis parameters for temporal skewness analysis.
|
||||
skewness = {
|
||||
"window_size": 120,
|
||||
"overlap": 2
|
||||
"window_size": 100,
|
||||
"overlap": 8
|
||||
}
|
||||
|
||||
# Specify analysis parameters for FFT analysis.
|
||||
fft = {
|
||||
"window_size": 65536
|
||||
"window_size": 2048
|
||||
}
|
||||
|
||||
database = {
|
||||
@@ -36,20 +42,20 @@ database = {
|
||||
# Sets the weighting for each analysis. a higher weighting gives an analysis
|
||||
# higher presendence when finding the best matches.
|
||||
matcher_weightings = {
|
||||
"f0" : 1.,
|
||||
"f0" : 2.,
|
||||
"spccntr" : 1.,
|
||||
"spcsprd" : 1.,
|
||||
"spcflux" : 1.,
|
||||
"spccf" : 1.,
|
||||
"spcflatness": 1.,
|
||||
"zerox" : 1.,
|
||||
"rms" : 1.,
|
||||
"peak": 1.,
|
||||
"spcsprd" : 2.,
|
||||
"spcflux" : 2.,
|
||||
"spccf" : 2.,
|
||||
"spcflatness": 3.,
|
||||
"zerox" : 0.,
|
||||
"rms" : 0,
|
||||
"peak": 0.,
|
||||
"centroid": 1.,
|
||||
"kurtosis": 1.,
|
||||
"skewness": 1.,
|
||||
"variance": 1.,
|
||||
"harm_ratio": 1.
|
||||
"variance": 2.,
|
||||
"harm_ratio": 5.
|
||||
}
|
||||
|
||||
# Specifies the method for averaging analysis frames to create a single value
|
||||
@@ -81,9 +87,9 @@ analysis = {
|
||||
|
||||
matcher = {
|
||||
# Force the re-matching of analyses
|
||||
"rematch": True,
|
||||
"grain_size": 120,
|
||||
"overlap": 2,
|
||||
"rematch": False,
|
||||
"grain_size": 100,
|
||||
"overlap": 8,
|
||||
# Defines the number of matches to keep for synthesis. Note that this must
|
||||
# also be specified in the synthesis config
|
||||
"match_quantity": 20,
|
||||
@@ -95,16 +101,16 @@ matcher = {
|
||||
synthesizer = {
|
||||
# Artificially scale the output grain by the difference in RMS values
|
||||
# between source and target.
|
||||
"enforce_rms": True,
|
||||
"enforce_intensity": True,
|
||||
# Specify the ratio limit that is the grain can be scaled by.
|
||||
"enf_rms_ratio_limit": 100.,
|
||||
"enf_intensity_ratio_limit": 5.,
|
||||
# Artificially modify the pitch by the difference in f0 values between
|
||||
# source and target.
|
||||
"enforce_f0": True,
|
||||
# Specify the ratio limit that is the grain can be modified by.
|
||||
"enf_f0_ratio_limit": 10.,
|
||||
"grain_size": 120,
|
||||
"overlap": 2,
|
||||
"grain_size": 100,
|
||||
"overlap": 8,
|
||||
# Normalize output, avoid clipping of final output by scaling the final
|
||||
# frames.
|
||||
"normalize" : True,
|
||||
|
||||
+57
-43
@@ -400,6 +400,9 @@ class Matcher:
|
||||
weightings = {x: 1. for x in self.matcher_analyses}
|
||||
|
||||
|
||||
# Create an imputer object for handeling Nan values.
|
||||
imp = Imputer(axis=0, strategy='median')
|
||||
|
||||
for tind, target_entry in enumerate(self.target_db.analysed_audio):
|
||||
# Check if match data already exists and use it rather than
|
||||
# regenerating if it does.
|
||||
@@ -426,12 +429,11 @@ class Matcher:
|
||||
all_target_analyses[i] = target_data
|
||||
|
||||
|
||||
pdb.set_trace()
|
||||
imp = Imputer(axis=0)
|
||||
nan_columns = np.all(np.isnan(all_target_analyses), axis=0)
|
||||
all_target_analyses[:, nan_columns] = 0.
|
||||
# Impute values for Nans
|
||||
all_target_analyses = imp.fit_transform(all_target_analyses)
|
||||
# all_target_analyses[np.isnan(all_target_analyses)] = np.inf
|
||||
|
||||
for sind, source_entry in enumerate(self.source_db.analysed_audio):
|
||||
self.logger.info("K-d Tree Matching: {0} to {1}".format(source_entry.name, target_entry.name))
|
||||
@@ -455,6 +457,8 @@ class Matcher:
|
||||
all_source_analyses[:, nan_columns] = 0.
|
||||
all_source_analyses = imp.fit_transform(all_source_analyses)
|
||||
|
||||
# all_source_analyses[np.isnan(all_source_analyses)] = np.inf
|
||||
|
||||
source_tree = spatial.cKDTree(all_source_analyses.T, leafsize=100)
|
||||
results_vals, results_inds = source_tree.query(all_target_analyses.T, k=self.match_quantity, p=2)
|
||||
|
||||
@@ -730,7 +734,9 @@ class Matcher:
|
||||
)
|
||||
|
||||
if not np.all(np.any(x, axis=1)):
|
||||
pdb.set_trace()
|
||||
raise ValueError("Not all match indexes have a corresponding sample index. This shouldn't happen...")
|
||||
|
||||
x = x.reshape(mi_shape[0], mi_shape[1], x.shape[1])
|
||||
x = np.argmax(x, axis=2)
|
||||
|
||||
@@ -760,10 +766,10 @@ class Synthesizer:
|
||||
|
||||
self.config = kwargs.pop("config", None)
|
||||
|
||||
self.enforce_rms_bool = self.config.synthesizer["enforce_rms"]
|
||||
self.enforce_intensity_bool = self.config.synthesizer["enforce_intensity"]
|
||||
# Key word arguments overwrite config file.
|
||||
self.enforce_rms_bool = kwargs.pop("enforce_rms", self.enforce_rms_bool)
|
||||
if self.enforce_rms_bool and ("rms" not in self.target_db.analysis_list or "rms" not in self.match_db.analysis_list):
|
||||
self.enforce_intensity_bool = kwargs.pop("enforce_intensity", self.enforce_intensity_bool)
|
||||
if self.enforce_intensity_bool and ("rms" not in self.target_db.analysis_list or "rms" not in self.match_db.analysis_list):
|
||||
raise RuntimeError("BLARGHHH")
|
||||
|
||||
self.enforce_f0_bool = self.config.synthesizer["enforce_f0"]
|
||||
@@ -772,7 +778,7 @@ class Synthesizer:
|
||||
if self.enforce_f0_bool and ("f0" not in self.target_db.analysis_list or "f0" not in self.match_db.analysis_list):
|
||||
raise RuntimeError("F0 enforcement cannot be enabled if both databases do not have F0 analyses.")
|
||||
|
||||
if self.enforce_rms:
|
||||
if self.enforce_intensity:
|
||||
if not self.target_db:
|
||||
raise ValueError("Target database must be provided if rms or F0 enforcement is enabled.")
|
||||
|
||||
@@ -820,6 +826,17 @@ class Synthesizer:
|
||||
match_index = np.random.randint(matches.shape[0])
|
||||
match_db_ind, match_grain_ind = matches[match_index]
|
||||
with self.match_db.analysed_audio[match_db_ind] as match_sample:
|
||||
self.logger.info("Synthesizing grain:\n"
|
||||
"Source sample: {0}\n"
|
||||
"Source grain index: {1}\n"
|
||||
"Target output: {2}\n"
|
||||
"Target grain index: {3} out of {4}".format(
|
||||
match_sample,
|
||||
match_grain_ind,
|
||||
output_name,
|
||||
target_grain_ind,
|
||||
len(grain_matches)
|
||||
))
|
||||
match_sample.generate_grain_times(match_grain_size, match_overlap, save_times=True)
|
||||
|
||||
# TODO: Make proper fix for grain index offset of 1
|
||||
@@ -828,7 +845,8 @@ class Synthesizer:
|
||||
except:
|
||||
pdb.set_trace()
|
||||
|
||||
if self.enforce_rms_bool:
|
||||
|
||||
if self.enforce_intensity_bool:
|
||||
# Get the target sample from the database
|
||||
target_sample = self.target_db[job_ind]
|
||||
|
||||
@@ -836,7 +854,7 @@ class Synthesizer:
|
||||
# indexing.
|
||||
target_sample.generate_grain_times(match_grain_size, match_overlap, save_times=True)
|
||||
|
||||
match_grain = self.enforce_rms(match_grain, match_sample, match_grain_ind, target_sample, target_grain_ind)
|
||||
match_grain = self.enforce_intensity(match_grain, match_sample, match_grain_ind, target_sample, target_grain_ind)
|
||||
|
||||
if self.enforce_f0_bool:
|
||||
# Get the target sample from the database
|
||||
@@ -848,6 +866,7 @@ class Synthesizer:
|
||||
|
||||
match_grain = self.enforce_pitch(match_grain, match_sample, match_grain_ind, target_sample, target_grain_ind)
|
||||
|
||||
# Apply hanning window to grain
|
||||
match_grain *= np.hanning(match_grain.size)
|
||||
output_frames[offset:offset+match_grain.size] += match_grain
|
||||
offset += hop_size
|
||||
@@ -867,6 +886,10 @@ class Synthesizer:
|
||||
# TODO: Make proper fix for grain index offset of 1
|
||||
target_times = target_sample.times[target_grain_ind-1]
|
||||
|
||||
# Get mean harmonic ratio of f0 frames in time range specified.
|
||||
target_harmonic_ratio = target_sample.analysis_data_grains(target_times, "harm_ratio", format="mean")[0][0]
|
||||
|
||||
|
||||
# Get mean of f0 frames in time range specified.
|
||||
target_f0 = target_sample.analysis_data_grains(target_times, "f0", format="median")[0][0]
|
||||
|
||||
@@ -874,6 +897,13 @@ class Synthesizer:
|
||||
# TODO: Make proper fix for grain index offset of 1
|
||||
source_times = source_sample.times[source_grain_ind-1]
|
||||
|
||||
# Get mean harmonic ratio of f0 frames in time range specified.
|
||||
source_harmonic_ratio = source_sample.analysis_data_grains(source_times, "harm_ratio", format="mean")[0][0]
|
||||
hr_array = np.array([source_harmonic_ratio, target_harmonic_ratio])
|
||||
|
||||
if np.any(np.isnan(hr_array)):
|
||||
return grain
|
||||
|
||||
# Get mean of f0 frames in time range specified.
|
||||
source_f0 = source_sample.analysis_data_grains(source_times, "f0", format="median")[0][0]
|
||||
|
||||
@@ -886,31 +916,15 @@ class Synthesizer:
|
||||
ratio_limit = self.config.synthesizer["enf_f0_ratio_limit"]
|
||||
|
||||
if ratio_difference > ratio_limit:
|
||||
self.logger.warning("Grain f0 ratio too large({0}), enforcing f0 at limit ({1})\n"
|
||||
"Source sample: {2}\n"
|
||||
"Source grain index: {3}\n"
|
||||
"Target sample: {4}\n"
|
||||
"Target grain index: {5}".format(
|
||||
self.logger.warning("Grain f0 ratio too large({0}), enforcing f0 at limit ({1})".format(
|
||||
ratio_difference,
|
||||
ratio_limit,
|
||||
source_sample,
|
||||
source_grain_ind,
|
||||
target_sample,
|
||||
target_grain_ind
|
||||
))
|
||||
ratio_difference = ratio_limit
|
||||
elif ratio_difference < 1./ratio_limit:
|
||||
self.logger.warning("Grain f0 ratio too large ({0}), enforcing f0 at limit ({1})\n"
|
||||
"Source sample: {2}\n"
|
||||
"Source grain index: {3}\n"
|
||||
"Target sample: {4}\n"
|
||||
"Target grain index: {5}".format(
|
||||
self.logger.warning("Grain f0 ratio too large ({0}), enforcing f0 at limit ({1})".format(
|
||||
ratio_difference,
|
||||
1./ratio_limit,
|
||||
source_sample,
|
||||
source_grain_ind,
|
||||
target_sample,
|
||||
target_grain_ind
|
||||
))
|
||||
ratio_difference = 1./ratio_limit
|
||||
|
||||
@@ -918,11 +932,11 @@ class Synthesizer:
|
||||
|
||||
return grain
|
||||
|
||||
def enforce_rms(self, grain, source_sample, source_grain_ind, target_sample, target_grain_ind):
|
||||
def enforce_intensity(self, grain, source_sample, source_grain_ind, target_sample, target_grain_ind):
|
||||
"""
|
||||
Scales the amplitude of the grain by the difference between it's rms and the rms of the grain specified.
|
||||
Scales the amplitude of the grain by the difference between it's intensity and the intensity of the grain specified.
|
||||
|
||||
This method will fail if either AnalysedAudioFile object does not have an rms analysis.
|
||||
This method will fail if either AnalysedAudioFile object does not have any intensity analyses.
|
||||
"""
|
||||
|
||||
# Get grain start and finish range to retreive analysis frames from.
|
||||
@@ -931,6 +945,9 @@ class Synthesizer:
|
||||
|
||||
# Get mean of RMS frames in time range specified.
|
||||
target_rms = target_sample.analysis_data_grains(target_times, "rms", format="mean")[0][0]
|
||||
target_peak = target_sample.analysis_data_grains(target_times, "peak", format="mean")[0][0]
|
||||
|
||||
target_intensity_value = np.mean([target_rms, target_peak])
|
||||
|
||||
# Get grain start and finish range to retreive analysis frames from.
|
||||
# TODO: Make proper fix for grain index offset of 1
|
||||
@@ -938,26 +955,23 @@ class Synthesizer:
|
||||
|
||||
# Get mean of RMS frames in time range specified.
|
||||
source_rms = source_sample.analysis_data_grains(source_times, "rms", format="mean")[0][0]
|
||||
source_peak = source_sample.analysis_data_grains(source_times, "peak", format="mean")[0][0]
|
||||
|
||||
source_intensity_value = np.mean([source_rms, source_peak])
|
||||
|
||||
ratio_difference = target_intensity_value / source_intensity_value
|
||||
|
||||
ratio_difference = target_rms / source_rms
|
||||
if not np.isfinite(ratio_difference):
|
||||
return grain
|
||||
# If the ratio difference is within the limits
|
||||
ratio_limit = self.config.synthesizer["enf_rms_ratio_limit"]
|
||||
ratio_limit = self.config.synthesizer["enf_intensity_ratio_limit"]
|
||||
|
||||
if ratio_difference > ratio_limit:
|
||||
self.logger.warning("Grain RMS ratio too large({0}), enforcing RMS at limit ({1})\n"
|
||||
"Source sample: {2}\n"
|
||||
"Source grain index: {3}\n"
|
||||
"Target sample: {4}\n"
|
||||
"Target grain index: {5}".format(
|
||||
ratio_difference,
|
||||
ratio_limit,
|
||||
source_sample,
|
||||
source_grain_ind,
|
||||
target_sample,
|
||||
target_grain_ind
|
||||
))
|
||||
self.logger.warning(
|
||||
"Grain RMS ratio too large({0}), enforcing RMS at limit ({1})\n".format(
|
||||
ratio_difference,
|
||||
ratio_limit,
|
||||
))
|
||||
ratio_difference = ratio_limit
|
||||
|
||||
grain *= ratio_difference
|
||||
|
||||
Reference in New Issue
Block a user