Compare commits
18 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
| 4465fae7d3 | |||
| 02e426a8e8 | |||
| 2a16ace7a5 | |||
| 38ea3ff76e | |||
| b581e578b9 | |||
| bc578eee73 | |||
| 0b6d56392e | |||
| 4ec7b13bcf | |||
| 0cac135882 | |||
| 2182774048 | |||
| e10a7f2f18 | |||
| 86fda63151 | |||
| d87b9cc3ed | |||
| bc6bb4ea83 | |||
| 5e0e4dd5ed | |||
| a123097ab3 | |||
| e9c9ca3bff | |||
| dd235ef977 |
Executable
+47
@@ -0,0 +1,47 @@
|
||||
#!/usr/bin/env python
|
||||
|
||||
import subprocess
|
||||
import os
|
||||
import pdb
|
||||
import fnmatch
|
||||
|
||||
import sys
|
||||
|
||||
def main():
|
||||
p = subprocess.Popen(["git", "rev-parse", "--show-toplevel"], stdout=subprocess.PIPE, stderr=subprocess.PIPE)
|
||||
out, err = p.communicate()
|
||||
|
||||
out = out.strip('\n')
|
||||
track_filepath = os.path.join(out, ".gittrack")
|
||||
|
||||
p = subprocess.Popen(["git", "ls-files", out, "--exclude-standard", "--others"], stdout=subprocess.PIPE, stderr=subprocess.PIPE)
|
||||
out, err = p.communicate()
|
||||
|
||||
out = out.splitlines()
|
||||
|
||||
try:
|
||||
with open(track_filepath) as f:
|
||||
content = f.read().splitlines()
|
||||
except IOError:
|
||||
return 0
|
||||
|
||||
|
||||
untracked = []
|
||||
for filepath in out:
|
||||
for name in content:
|
||||
if fnmatch.fnmatch(filepath, name):
|
||||
untracked.append(filepath)
|
||||
|
||||
if untracked:
|
||||
print "The following files are not tracked: "
|
||||
for i in untracked:
|
||||
print i
|
||||
print "Please either stage these files for the commit or add them to the project's .gitignore to disregard them."
|
||||
return 1
|
||||
else:
|
||||
return 0
|
||||
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
exit(main())
|
||||
+39
@@ -5,3 +5,42 @@
|
||||
Session.vim
|
||||
pip-selfcheck.json
|
||||
.DS_Store
|
||||
src/spfileops/
|
||||
playground
|
||||
|
||||
bin/
|
||||
# Compiled Object files
|
||||
*.slo
|
||||
*.lo
|
||||
*.o
|
||||
*.obj
|
||||
|
||||
# Precompiled Headers
|
||||
*.gch
|
||||
*.pch
|
||||
|
||||
# Compiled Dynamic libraries
|
||||
*.so
|
||||
*.dylib
|
||||
*.dll
|
||||
|
||||
# Fortran module files
|
||||
*.mod
|
||||
*.smod
|
||||
|
||||
# Compiled Static libraries
|
||||
*.lai
|
||||
*.la
|
||||
*.a
|
||||
*.lib
|
||||
|
||||
# Executables
|
||||
*.exe
|
||||
*.out
|
||||
*.app
|
||||
|
||||
# Log files
|
||||
*.log
|
||||
|
||||
external/
|
||||
CMakeFiles/
|
||||
|
||||
@@ -0,0 +1,41 @@
|
||||
# Use this file to configure the Overcommit hooks you wish to use. This will
|
||||
# extend the default configuration defined in:
|
||||
# https://github.com/brigade/overcommit/blob/master/config/default.yml
|
||||
#
|
||||
# At the topmost level of this YAML file is a key representing type of hook
|
||||
# being run (e.g. pre-commit, commit-msg, etc.). Within each type you can
|
||||
# customize each hook, such as whether to only run it on certain files (via
|
||||
# `include`), whether to only display output if it fails (via `quiet`), etc.
|
||||
#
|
||||
# For a complete list of hooks, see:
|
||||
# https://github.com/brigade/overcommit/tree/master/lib/overcommit/hook
|
||||
#
|
||||
# For a complete list of options that you can use to customize hooks, see:
|
||||
# https://github.com/brigade/overcommit#configuration
|
||||
#
|
||||
# Uncomment the following lines to make the configuration take effect.
|
||||
|
||||
#PreCommit:
|
||||
# RuboCop:
|
||||
# enabled: true
|
||||
# on_warn: fail # Treat all warnings as failures
|
||||
#
|
||||
# TrailingWhitespace:
|
||||
# enabled: true
|
||||
# exclude:
|
||||
# - '**/db/structure.sql' # Ignore trailing whitespace in generated files
|
||||
#
|
||||
#PostCheckout:
|
||||
# ALL: # Special hook name that customizes all hooks of this type
|
||||
# quiet: true # Change all post-checkout hooks to only display output on failure
|
||||
#
|
||||
# IndexTags:
|
||||
# enabled: true # Generate a tags file with `ctags` each time HEAD changes
|
||||
|
||||
|
||||
PreCommit:
|
||||
CheckUntracked:
|
||||
enabled: true
|
||||
quiet: false
|
||||
description: 'Check for files that should be tracked or ignored.'
|
||||
required_executable: './.git-hooks/pre-commit/check_untracked.py'
|
||||
@@ -0,0 +1 @@
|
||||
2.7.11
|
||||
@@ -1 +0,0 @@
|
||||
./src/sppysound/concatenator.py
|
||||
@@ -1,8 +0,0 @@
|
||||
set -euo pipefail
|
||||
pip install numpy
|
||||
pip install scipy
|
||||
pip install pysndfile
|
||||
pip install h5py
|
||||
pip install https://github.com/Pezz89/fileops/zipball/master
|
||||
pip install -e ./
|
||||
pip install sklearn
|
||||
@@ -0,0 +1,8 @@
|
||||
-e git://github.com/Pezz89/SPFileOps@9bab7f59c339443903dea6cf42873a335f966d28#egg=spfileops
|
||||
h5py==2.6.0
|
||||
matplotlib==1.5.3
|
||||
numpy==1.11.2
|
||||
pysndfile==0.2.11
|
||||
scipy==0.18.1
|
||||
sphinx_bootstrap_theme==0.4.12
|
||||
scikit_learn==0.18.1
|
||||
|
||||
@@ -86,8 +86,7 @@ class Analysis(object):
|
||||
size containing frames for these times.
|
||||
"""
|
||||
times = self.analysis_group[self.name]["times"][:]
|
||||
start = start / 1000
|
||||
end = end / 1000
|
||||
# Seconds to milliseconds
|
||||
vtimes = times.reshape(-1, 1)
|
||||
|
||||
selection = np.transpose((vtimes >= start) & (vtimes <= end))
|
||||
|
||||
@@ -68,7 +68,7 @@ class CentroidAnalysis(Analysis):
|
||||
hopSize = int(window_size - np.floor(overlapFac * window_size))
|
||||
|
||||
# zeros at beginning (thus center of 1st window should be for sample nr. 0)
|
||||
samples = np.append(np.zeros(np.floor(window_size/2.0)), frames)
|
||||
samples = np.append(np.zeros(int(window_size/2.0)), frames)
|
||||
|
||||
# cols for windowing
|
||||
cols = np.ceil((len(samples) - window_size) / float(hopSize)) + 1
|
||||
@@ -77,7 +77,7 @@ class CentroidAnalysis(Analysis):
|
||||
|
||||
frames = stride_tricks.as_strided(
|
||||
samples,
|
||||
shape=(cols, window_size),
|
||||
shape=(int(cols), window_size),
|
||||
strides=(samples.strides[0]*hopSize, samples.strides[0])
|
||||
).copy()
|
||||
|
||||
@@ -112,5 +112,4 @@ class CentroidAnalysis(Analysis):
|
||||
# multiply by the frame numbers.
|
||||
centroid_times = (float(sample_frames.shape[0])/float(timebins)) * scale[:-1].astype(float)
|
||||
# Divide by the samplerate to give times in seconds
|
||||
centroid_times = centroid_times / samplerate
|
||||
return centroid_times
|
||||
|
||||
@@ -7,9 +7,11 @@ from numpy.lib import stride_tricks
|
||||
from Analysis import Analysis
|
||||
from scipy import signal
|
||||
from numpy.fft import fft, ifft, fftshift
|
||||
from sppysound import multirate
|
||||
import multirate
|
||||
import warnings
|
||||
import matplotlib.pyplot as plt
|
||||
|
||||
import pYIN.pYINmain as pYINmain
|
||||
from numpy import polyfit, arange
|
||||
|
||||
class F0Analysis(Analysis):
|
||||
@@ -50,6 +52,11 @@ class F0Analysis(Analysis):
|
||||
self.analysis_group = analysis_group
|
||||
self.logger.info("Creating F0 analysis for {0}".format(self.AnalysedAudioFile.name))
|
||||
|
||||
self.hopSize = int(np.floor(self.overlap * self.window_size))
|
||||
self.pYinInst = pYINmain.PyinMain()
|
||||
self.pYinInst.initialise(channels = 1, inputSampleRate = self.AnalysedAudioFile.samplerate, stepSize = self.hopSize, blockSize = self.window_size,
|
||||
lowAmp = 0.00, onsetSensitivity = 0.5, pruneThresh = 0.0)
|
||||
|
||||
self.create_analysis(
|
||||
frames,
|
||||
self.AnalysedAudioFile.samplerate,
|
||||
@@ -66,13 +73,7 @@ class F0Analysis(Analysis):
|
||||
"""
|
||||
times = self.analysis_group["F0"]["times"][:]
|
||||
frames = self.analysis_group["F0"]["frames"][:]
|
||||
hr = self.analysis_group["F0"]["harmonic_ratio"][:]
|
||||
start = start / 1000
|
||||
end = end / 1000
|
||||
vtimes = times.reshape(-1, 1)
|
||||
nan_inds = hr < self.threshold
|
||||
hr[nan_inds] = np.nan
|
||||
frames[nan_inds] = np.nan
|
||||
|
||||
selection = np.transpose((vtimes >= start) & (vtimes <= end))
|
||||
if not selection.any():
|
||||
@@ -80,10 +81,10 @@ class F0Analysis(Analysis):
|
||||
closest_frames = np.abs(vtimes-frame_center).argsort()[:2]
|
||||
selection[closest_frames] = True
|
||||
|
||||
return ((frames, times, hr), selection)
|
||||
return ((frames, times), selection)
|
||||
|
||||
@staticmethod
|
||||
def create_f0_analysis(
|
||||
self,
|
||||
frames,
|
||||
samplerate,
|
||||
window_size=512,
|
||||
@@ -98,122 +99,38 @@ class F0Analysis(Analysis):
|
||||
Calculate the frequency and harmonic ratio values of windowed segments
|
||||
of the audio file and save to disk.
|
||||
"""
|
||||
|
||||
if hasattr(frames, '__call__'):
|
||||
frames = frames()
|
||||
if not M:
|
||||
M=int(round(0.016*samplerate))
|
||||
|
||||
|
||||
hopSize = int(window_size - np.floor(overlapFac * window_size))
|
||||
|
||||
# zeros at beginning (thus center of 1st window should be for sample nr. 0)
|
||||
samples = frames
|
||||
#samples = np.concatenate((np.zeros(np.floor(window_size/2.0)), frames))
|
||||
|
||||
# cols for windowing
|
||||
cols = np.ceil((len(samples) - window_size) / float(hopSize)) + 1
|
||||
cols = np.ceil((len(samples) - window_size) / float(self.hopSize)) + 1
|
||||
# zeros at end (thus samples can be fully covered by frames)
|
||||
samples = np.concatenate((samples, np.zeros(window_size)))
|
||||
|
||||
frames = stride_tricks.as_strided(
|
||||
samples,
|
||||
shape=(cols, window_size),
|
||||
strides=(samples.strides[0]*hopSize, samples.strides[0])
|
||||
).copy()
|
||||
shape=(int(cols), window_size),
|
||||
strides=(samples.strides[0]*self.hopSize, samples.strides[0])
|
||||
)
|
||||
|
||||
# TODO: Replace this with zero crossing object.
|
||||
def feature_zcr(window):
|
||||
window2 = np.zeros(window.size)
|
||||
window2[1:-1] = window[0:-2]
|
||||
Z = (1/(2*window.size)) * np.sum(np.abs(np.sign(window)-np.sign(window2)))
|
||||
return Z
|
||||
for ind, frame in enumerate(frames):
|
||||
fs = self.pYinInst.process(frame)
|
||||
output = self.pYinInst.getSmoothedPitchTrack()
|
||||
# TODO: Create proper fix for this
|
||||
try:
|
||||
output[output < 0] = np.nan
|
||||
except:
|
||||
output = np.array([np.nan])
|
||||
|
||||
def parabolic(f, x):
|
||||
"""
|
||||
Quadratic interpolation for estimating the true position of an
|
||||
inter-sample maximum when nearby samples are known.
|
||||
|
||||
f is a vector and x is an index for that vector.
|
||||
|
||||
Returns (vx, vy), the coordinates of the vertex of a parabola that
|
||||
goes through point x and its two neighbors.
|
||||
|
||||
Example:
|
||||
|
||||
Defining a vector f with a local maximum at index 3 (= 6), find
|
||||
local maximum if points 2, 3, and 4 actually defined a parabola.
|
||||
|
||||
In [3]: f = [2, 3, 1, 6, 4, 2, 3, 1]
|
||||
|
||||
In [4]: parabolic(f, argmax(f))
|
||||
|
||||
Out[4]: (3.2142857142857144, 6.1607142857142856)
|
||||
|
||||
Ref: https://gist.github.com/endolith/255291
|
||||
|
||||
"""
|
||||
if x >= f.size-1 or x <= 2:
|
||||
return x, f[x]
|
||||
|
||||
xv = 1/2. * (f[x-1] - f[x+1]) / (f[x-1] - 2 * f[x] + f[x+1]) + x
|
||||
yv = f[x] - 1/4. * (f[x-1] - f[x+1]) * (xv - x)
|
||||
return (xv, yv)
|
||||
|
||||
def per_frame_f0(frames, m0, M):
|
||||
if not frames.any():
|
||||
HR = np.nan
|
||||
f0 = np.nan
|
||||
return f0, HR
|
||||
|
||||
#R=autocorr([frames])[0]
|
||||
R = np.correlate(frames, frames, mode='full')
|
||||
g=R[frames.size]
|
||||
|
||||
R=R[frames.size-1:]
|
||||
|
||||
if not m0:
|
||||
# estimate m0 (as the first zero crossing of R)
|
||||
m0 = np.argmin(np.diff(np.sign(R[1:])))+1
|
||||
if m0 == 1:
|
||||
m0 = R.size
|
||||
if M > R.size:
|
||||
M = R.size
|
||||
Gamma = np.zeros(M)
|
||||
|
||||
CSum = np.cumsum(frames*frames)
|
||||
with warnings.catch_warnings():
|
||||
warnings.filterwarnings('error')
|
||||
try:
|
||||
Gamma[m0:M] = R[m0:M] / (np.sqrt([g*CSum[-m0:-M:-1]])+np.finfo(float).eps)
|
||||
except Warning:
|
||||
pass
|
||||
|
||||
# compute T0 and harmonic ratio:
|
||||
if np.isnan(Gamma).any():
|
||||
HR = np.nan
|
||||
f0 = np.nan
|
||||
else:
|
||||
blag = np.argmax(Gamma)
|
||||
HR = Gamma[blag]
|
||||
interp, HR = parabolic(Gamma, blag)
|
||||
if not interp:
|
||||
f0 = np.nan
|
||||
HR = np.nan
|
||||
else:
|
||||
# get fundamental frequency:
|
||||
f0 = samplerate / interp
|
||||
if f0 > samplerate/2:
|
||||
raise ValueError("F0 value ({0}) is above the nyquist rate "
|
||||
"({1}). This shouldn't happen...".format(f0,
|
||||
samplerate/2))
|
||||
if HR >= 1:
|
||||
HR = 1
|
||||
return (f0, HR)
|
||||
|
||||
output = np.apply_along_axis(per_frame_f0, 1, frames, m0, M)
|
||||
# output = np.empty((frames.shape[0], 2))
|
||||
# for ind, i in enumerate(frames):
|
||||
# output[ind] = per_frame_f0(i, m0, M)
|
||||
|
||||
return output
|
||||
|
||||
@@ -224,12 +141,9 @@ class F0Analysis(Analysis):
|
||||
samplerate = self.AnalysedAudioFile.samplerate
|
||||
frames = args[0]
|
||||
# frames = multirate.interp(frames, 4)
|
||||
samplerate *= 1
|
||||
data = self.create_f0_analysis(frames, samplerate, **kwargs)
|
||||
f0 = data[:, 0]
|
||||
harmonic_ratio = data[:, 1]
|
||||
f0 = self.create_f0_analysis(frames, samplerate, **kwargs)
|
||||
f0_times = self.calc_f0_frame_times(f0, frames, samplerate)
|
||||
return ({'frames': f0, 'harmonic_ratio': harmonic_ratio, 'times': f0_times}, {})
|
||||
return ({'frames': f0, 'times': f0_times}, {})
|
||||
|
||||
@staticmethod
|
||||
def calc_f0_frame_times(f0frames, sample_frames, samplerate):
|
||||
@@ -246,14 +160,13 @@ class F0Analysis(Analysis):
|
||||
# multiply by the frame numbers.
|
||||
f0_times = (sample_frames.shape[0]/timebins) * scale[:-1]
|
||||
# Divide by the samplerate to give times in seconds
|
||||
f0_times = f0_times / samplerate
|
||||
return f0_times
|
||||
|
||||
def analysis_formatter(self, data, selection, format):
|
||||
"""Calculate the average analysis value of the grain using the match format specified."""
|
||||
frames, times, harm_ratio = data
|
||||
frames, times = data
|
||||
# Get indexes of all valid frames (that aren't nan)
|
||||
valid_inds = np.isfinite(frames) & np.isfinite(harm_ratio)
|
||||
valid_inds = np.isfinite(frames)
|
||||
|
||||
format_style_dict = {
|
||||
'mean': np.mean,
|
||||
|
||||
@@ -46,8 +46,6 @@ class F0HarmRatioAnalysis(Analysis):
|
||||
"""
|
||||
times = self.analysis_group["F0"]["times"][:]
|
||||
hr = self.analysis_group["F0"]["harmonic_ratio"][:]
|
||||
start = start / 1000
|
||||
end = end / 1000
|
||||
vtimes = times.reshape(-1, 1)
|
||||
|
||||
nan_inds = hr < self.threshold
|
||||
@@ -65,7 +63,6 @@ class F0HarmRatioAnalysis(Analysis):
|
||||
def calc_F0HarmRatio_frame_times(F0HarmRatioframes, sample_frames, samplerate):
|
||||
|
||||
"""Calculate times for frames using sample size and samplerate."""
|
||||
samplerate *= 1
|
||||
|
||||
if hasattr(sample_frames, '__call__'):
|
||||
sample_frames = sample_frames()
|
||||
@@ -77,7 +74,6 @@ class F0HarmRatioAnalysis(Analysis):
|
||||
# multiply by the frame numbers.
|
||||
F0HarmRatio_times = (sample_frames.shape[0]/timebins) * scale[:-1]
|
||||
# Divide by the samplerate to give times in seconds
|
||||
F0HarmRatio_times = F0HarmRatio_times / samplerate
|
||||
return F0HarmRatio_times
|
||||
|
||||
def analysis_formatter(self, data, selection, format):
|
||||
|
||||
@@ -77,8 +77,6 @@ class FFTAnalysis(Analysis):
|
||||
size containing frames for these times.
|
||||
"""
|
||||
times = self.analysis_group["FFT"]["times"][:]
|
||||
start = start / 1000
|
||||
end = end / 1000
|
||||
vtimes = times.reshape(-1, 1)
|
||||
|
||||
selection = np.transpose((vtimes >= start) & (vtimes <= end))
|
||||
@@ -127,8 +125,8 @@ class FFTAnalysis(Analysis):
|
||||
|
||||
frames = stride_tricks.as_strided(
|
||||
samples,
|
||||
shape=(cols, frameSize),
|
||||
strides=(samples.strides[0]*hopSize, samples.strides[0])
|
||||
shape=(int(cols), frameSize),
|
||||
strides=(int(samples.strides[0]*hopSize), samples.strides[0])
|
||||
).copy()
|
||||
|
||||
frames *= win
|
||||
@@ -230,7 +228,6 @@ class FFTAnalysis(Analysis):
|
||||
# multiply by the frame numbers.
|
||||
fft_times = (sample_frames.shape[0]/timebins) * scale[:-1]
|
||||
# Divide by the samplerate to give times in seconds
|
||||
fft_times = fft_times / samplerate
|
||||
return fft_times
|
||||
|
||||
|
||||
|
||||
@@ -37,7 +37,7 @@ class KurtosisAnalysis(Analysis):
|
||||
self.AnalysedAudioFile = AnalysedAudioFile
|
||||
|
||||
if config:
|
||||
self.window_size = config.kurtosis["window_size"] * self.AnalysedAudioFile.samplerate / 1000
|
||||
self.window_size = config.kurtosis["window_size"]
|
||||
self.overlap = 1. / config.kurtosis["overlap"]
|
||||
|
||||
try:
|
||||
@@ -75,7 +75,7 @@ class KurtosisAnalysis(Analysis):
|
||||
hopSize = int(window_size - np.floor(overlapFac * window_size))
|
||||
|
||||
# zeros at beginning (thus center of 1st window should be for sample nr. 0)
|
||||
samples = np.append(np.zeros(np.floor(window_size/2.0)), frames)
|
||||
samples = np.append(np.zeros(int(window_size/2.0)), frames)
|
||||
|
||||
# cols for windowing
|
||||
cols = np.ceil((len(samples) - window_size) / float(hopSize)) + 1
|
||||
@@ -84,13 +84,16 @@ class KurtosisAnalysis(Analysis):
|
||||
|
||||
frames = stride_tricks.as_strided(
|
||||
samples,
|
||||
shape=(cols, window_size),
|
||||
shape=(int(cols), window_size),
|
||||
strides=(samples.strides[0]*hopSize, samples.strides[0])
|
||||
).copy()
|
||||
|
||||
if window:
|
||||
win = window(window_size)
|
||||
frames *= win
|
||||
try:
|
||||
frames *= win
|
||||
except:
|
||||
pdb.set_trace()
|
||||
|
||||
frame_mean = np.mean(frames, axis=1)
|
||||
|
||||
@@ -126,5 +129,4 @@ class KurtosisAnalysis(Analysis):
|
||||
# multiply by the frame numbers.
|
||||
kurtosis_times = (float(sample_frames.shape[0])/float(timebins)) * scale[:-1].astype(float)
|
||||
# Divide by the samplerate to give times in seconds
|
||||
kurtosis_times = kurtosis_times / samplerate
|
||||
return kurtosis_times
|
||||
|
||||
@@ -63,7 +63,7 @@ class PeakAnalysis(Analysis):
|
||||
hopSize = int(window_size - np.floor(overlapFac * window_size))
|
||||
|
||||
# zeros at beginning (thus center of 1st window should be for sample nr. 0)
|
||||
samples = np.append(np.zeros(np.floor(window_size/2.0)), frames)
|
||||
samples = np.append(np.zeros(int(window_size/2.0)), frames)
|
||||
|
||||
# cols for windowing
|
||||
cols = np.ceil((len(samples) - window_size) / float(hopSize)) + 1
|
||||
@@ -72,7 +72,7 @@ class PeakAnalysis(Analysis):
|
||||
|
||||
frames = stride_tricks.as_strided(
|
||||
samples,
|
||||
shape=(cols, window_size),
|
||||
shape=(int(cols), window_size),
|
||||
strides=(samples.strides[0]*hopSize, samples.strides[0])
|
||||
).copy()
|
||||
|
||||
@@ -104,5 +104,4 @@ class PeakAnalysis(Analysis):
|
||||
# multiply by the frame numbers.
|
||||
peak_times = (float(sample_frames.shape[0])/float(timebins)) * scale[:-1].astype(float)
|
||||
# Divide by the samplerate to give times in seconds
|
||||
peak_times = peak_times / samplerate
|
||||
return peak_times
|
||||
|
||||
@@ -40,7 +40,7 @@ class RMSAnalysis(Analysis):
|
||||
self.AnalysedAudioFile = AnalysedAudioFile
|
||||
|
||||
if config:
|
||||
self.window_size = config.rms["window_size"] * self.AnalysedAudioFile.samplerate / 1000
|
||||
self.window_size = config.rms["window_size"]
|
||||
self.overlap = 1. / config.rms["overlap"]
|
||||
else:
|
||||
self.window_size=512
|
||||
@@ -88,7 +88,7 @@ class RMSAnalysis(Analysis):
|
||||
hopSize = int(window_size - np.floor(overlapFac * window_size))
|
||||
|
||||
# zeros at beginning (thus center of 1st window should be for sample nr. 0)
|
||||
samples = np.append(np.zeros(np.floor(window_size/2.0)), frames)
|
||||
samples = np.append(np.zeros(int(window_size/2.0)), frames)
|
||||
|
||||
# cols for windowing
|
||||
cols = np.ceil((len(samples) - window_size) / float(hopSize)) + 1
|
||||
@@ -97,7 +97,7 @@ class RMSAnalysis(Analysis):
|
||||
|
||||
frames = stride_tricks.as_strided(
|
||||
samples,
|
||||
shape=(cols, window_size),
|
||||
shape=(int(cols), window_size),
|
||||
strides=(samples.strides[0]*hopSize, samples.strides[0])
|
||||
).copy()
|
||||
|
||||
@@ -133,5 +133,4 @@ class RMSAnalysis(Analysis):
|
||||
# multiply by the frame numbers.
|
||||
rms_times = (float(sample_frames.shape[0])/float(timebins)) * scale[:-1].astype(float)
|
||||
# Divide by the samplerate to give times in seconds
|
||||
rms_times = rms_times / samplerate
|
||||
return rms_times
|
||||
|
||||
@@ -37,7 +37,7 @@ class SkewnessAnalysis(Analysis):
|
||||
self.AnalysedAudioFile = AnalysedAudioFile
|
||||
|
||||
if config:
|
||||
self.window_size = config.skewness["window_size"] * self.AnalysedAudioFile.samplerate / 1000
|
||||
self.window_size = config.skewness["window_size"]
|
||||
self.overlap = 1. / config.skewness["overlap"]
|
||||
|
||||
try:
|
||||
@@ -75,7 +75,7 @@ class SkewnessAnalysis(Analysis):
|
||||
hopSize = int(window_size - np.floor(overlapFac * window_size))
|
||||
|
||||
# zeros at beginning (thus center of 1st window should be for sample nr. 0)
|
||||
samples = np.append(np.zeros(np.floor(window_size/2.0)), frames)
|
||||
samples = np.append(np.zeros(int(window_size/2.0)), frames)
|
||||
|
||||
# cols for windowing
|
||||
cols = np.ceil((len(samples) - window_size) / float(hopSize)) + 1
|
||||
@@ -84,7 +84,7 @@ class SkewnessAnalysis(Analysis):
|
||||
|
||||
frames = stride_tricks.as_strided(
|
||||
samples,
|
||||
shape=(cols, window_size),
|
||||
shape=(int(cols), window_size),
|
||||
strides=(samples.strides[0]*hopSize, samples.strides[0])
|
||||
).copy()
|
||||
|
||||
@@ -125,5 +125,4 @@ class SkewnessAnalysis(Analysis):
|
||||
# multiply by the frame numbers.
|
||||
skewness_times = (float(sample_frames.shape[0])/float(timebins)) * scale[:-1].astype(float)
|
||||
# Divide by the samplerate to give times in seconds
|
||||
skewness_times = skewness_times / samplerate
|
||||
return skewness_times
|
||||
|
||||
@@ -97,6 +97,5 @@ class SpectralCentroidAnalysis(Analysis):
|
||||
# multiply by the frame numbers.
|
||||
spccntr_times = (float(sample_frame_count)/float(timebins)) * scale[:-1].astype(float)
|
||||
# Divide by the samplerate to give times in seconds
|
||||
spccntr_times = spccntr_times / samplerate
|
||||
return spccntr_times
|
||||
|
||||
|
||||
@@ -88,7 +88,6 @@ class SpectralCrestFactorAnalysis(Analysis):
|
||||
# multiply by the frame numbers.
|
||||
spccf_times = (float(sample_frame_count)/float(timebins)) * scale[:-1].astype(float)
|
||||
# Divide by the samplerate to give times in seconds
|
||||
spccf_times = spccf_times / samplerate
|
||||
return spccf_times
|
||||
|
||||
def mean_formatter(self, data):
|
||||
|
||||
@@ -91,7 +91,6 @@ class SpectralFlatnessAnalysis(Analysis):
|
||||
# multiply by the frame numbers.
|
||||
spcflatness_times = (float(sample_frame_count)/float(timebins)) * scale[:-1].astype(float)
|
||||
# Divide by the samplerate to give times in seconds
|
||||
spcflatness_times = spcflatness_times / samplerate
|
||||
return spcflatness_times
|
||||
|
||||
def mean_formatter(self, data):
|
||||
|
||||
@@ -92,7 +92,6 @@ class SpectralFluxAnalysis(Analysis):
|
||||
# multiply by the frame numbers.
|
||||
spcflux_times = (float(sample_frame_count)/float(timebins)) * scale[:-1].astype(float)
|
||||
# Divide by the samplerate to give times in seconds
|
||||
spcflux_times = spcflux_times / samplerate
|
||||
return spcflux_times
|
||||
|
||||
def mean_formatter(self, data):
|
||||
|
||||
@@ -107,7 +107,6 @@ class SpectralSpreadAnalysis(Analysis):
|
||||
# multiply by the frame numbers.
|
||||
spcsprd_times = (float(sample_frame_count)/float(timebins)) * scale[:-1].astype(float)
|
||||
# Divide by the samplerate to give times in seconds
|
||||
spcsprd_times = spcsprd_times / samplerate
|
||||
return spcsprd_times
|
||||
|
||||
def mean_formatter(self, data):
|
||||
|
||||
@@ -39,7 +39,7 @@ class VarianceAnalysis(Analysis):
|
||||
self.AnalysedAudioFile = AnalysedAudioFile
|
||||
|
||||
if config:
|
||||
self.window_size = config.variance["window_size"] * self.AnalysedAudioFile.samplerate / 1000
|
||||
self.window_size = config.variance["window_size"]
|
||||
self.overlap = 1. / config.variance["overlap"]
|
||||
|
||||
self.analysis_group = analysis_group
|
||||
@@ -68,7 +68,7 @@ class VarianceAnalysis(Analysis):
|
||||
hopSize = int(window_size - np.floor(overlapFac * window_size))
|
||||
|
||||
# zeros at beginning (thus center of 1st window should be for sample nr. 0)
|
||||
samples = np.append(np.zeros(np.floor(window_size/2.0)), frames)
|
||||
samples = np.append(np.zeros(int(window_size/2.0)), frames)
|
||||
|
||||
# cols for windowing
|
||||
cols = np.ceil((len(samples) - window_size) / float(hopSize)) + 1
|
||||
@@ -77,7 +77,7 @@ class VarianceAnalysis(Analysis):
|
||||
|
||||
frames = stride_tricks.as_strided(
|
||||
samples,
|
||||
shape=(cols, window_size),
|
||||
shape=(int(cols), window_size),
|
||||
strides=(samples.strides[0]*hopSize, samples.strides[0])
|
||||
).copy()
|
||||
|
||||
@@ -110,5 +110,4 @@ class VarianceAnalysis(Analysis):
|
||||
# multiply by the frame numbers.
|
||||
variance_times = (float(sample_frames.shape[0])/float(timebins)) * scale[:-1].astype(float)
|
||||
# Divide by the samplerate to give times in seconds
|
||||
variance_times = variance_times / samplerate
|
||||
return variance_times
|
||||
|
||||
@@ -47,7 +47,7 @@ class ZeroXAnalysis(Analysis):
|
||||
hopSize = int(window_size - np.floor(overlapFac * window_size))
|
||||
|
||||
# zeros at beginning (thus center of 1st window should be for sample nr. 0)
|
||||
samples = np.append(np.zeros(np.floor(window_size/2.0)), frames)
|
||||
samples = np.append(np.zeros(int(window_size/2.0)), frames)
|
||||
|
||||
# cols for windowing
|
||||
cols = np.ceil((len(samples) - window_size) / float(hopSize)) + 1
|
||||
@@ -61,7 +61,7 @@ class ZeroXAnalysis(Analysis):
|
||||
|
||||
frames = stride_tricks.as_strided(
|
||||
samples,
|
||||
shape=(cols, window_size),
|
||||
shape=(int(cols), window_size),
|
||||
strides=(samples.strides[0]*hopSize, samples.strides[0])
|
||||
).copy()
|
||||
zero_crossing = np.sum(np.abs(np.diff(np.sign(frames))), axis=1)
|
||||
@@ -82,7 +82,6 @@ class ZeroXAnalysis(Analysis):
|
||||
# multiply by the frame numbers.
|
||||
zerox_times = (sample_frames.shape[0]/timebins) * scale[:-1]
|
||||
# Divide by the samplerate to give times in seconds
|
||||
zerox_times = zerox_times / samplerate
|
||||
return zerox_times
|
||||
|
||||
def hdf5_dataset_formatter(self, *args, **kwargs):
|
||||
|
||||
@@ -0,0 +1,74 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
'''
|
||||
* Copyright (C) 2015 Music Technology Group - Universitat Pompeu Fabra
|
||||
*
|
||||
* This file is part of pypYIN
|
||||
*
|
||||
* pypYIN is free software: you can redistribute it and/or modify it under
|
||||
* the terms of the GNU Affero General Public License as published by the Free
|
||||
* Software Foundation (FSF), either version 3 of the License, or (at your
|
||||
* option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful, but WITHOUT
|
||||
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
|
||||
* FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
|
||||
* details.
|
||||
*
|
||||
* You should have received a copy of the Affero GNU General Public License
|
||||
* version 3 along with this program. If not, see http://www.gnu.org/licenses/
|
||||
*
|
||||
* If you have any problem about this python version code, please contact: Rong Gong
|
||||
* rong.gong@upf.edu
|
||||
*
|
||||
* If you have any problem about this algorithm, I suggest you to contact: Matthias Mauch
|
||||
* m.mauch@qmul.ac.uk who is the original C++ version author of this algorithm
|
||||
*
|
||||
* If you want to refer this code, please consider this article:
|
||||
*
|
||||
* M. Mauch and S. Dixon,
|
||||
* “pYIN: A Fundamental Frequency Estimator Using Probabilistic Threshold Distributions”,
|
||||
* in Proceedings of the IEEE International Conference on Acoustics,
|
||||
* Speech, and Signal Processing (ICASSP 2014), 2014.
|
||||
*
|
||||
* M. Mauch, C. Cannam, R. Bittner, G. Fazekas, J. Salamon, J. Dai, J. Bello and S. Dixon,
|
||||
* “Computer-aided Melody Note Transcription Using the Tony Software: Accuracy and Efficiency”,
|
||||
* in Proceedings of the First International Conference on Technologies for
|
||||
* Music Notation and Representation, 2015.
|
||||
'''
|
||||
|
||||
from MonoNoteHMM import MonoNoteHMM
|
||||
from MonoNoteParameters import MonoNoteParameters
|
||||
import numpy as np
|
||||
import time
|
||||
|
||||
|
||||
class FrameOutput(object):
|
||||
def __init__(self, frameNumber, pitch, noteState):
|
||||
self.frameNumber = frameNumber
|
||||
self.pitch = pitch
|
||||
self.noteState = noteState
|
||||
|
||||
class MonoNote(object):
|
||||
|
||||
def __init__(self):
|
||||
self.hmm = MonoNoteHMM()
|
||||
|
||||
def process(self, pitchProb):
|
||||
obsProb = [self.hmm.calculatedObsProb(pitchProb[0]), ]
|
||||
for iFrame in range(1, len(pitchProb)):
|
||||
obsProb += [self.hmm.calculatedObsProb(pitchProb[iFrame])]
|
||||
out = []
|
||||
|
||||
path, scale = self.hmm.decodeViterbi(obsProb)
|
||||
|
||||
for iFrame in range(len(path)):
|
||||
currPitch = -1.0
|
||||
stateKind = 0
|
||||
|
||||
currPitch = self.hmm.par.minPitch + (path[iFrame]/self.hmm.par.nSPP) * 1.0/self.hmm.par.nPPS
|
||||
stateKind = (path[iFrame]) % self.hmm.par.nSPP + 1
|
||||
|
||||
out.append(FrameOutput(iFrame, currPitch, stateKind))
|
||||
|
||||
return out
|
||||
@@ -0,0 +1,187 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
'''
|
||||
* Copyright (C) 2015 Music Technology Group - Universitat Pompeu Fabra
|
||||
*
|
||||
* This file is part of pypYIN
|
||||
*
|
||||
* pypYIN is free software: you can redistribute it and/or modify it under
|
||||
* the terms of the GNU Affero General Public License as published by the Free
|
||||
* Software Foundation (FSF), either version 3 of the License, or (at your
|
||||
* option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful, but WITHOUT
|
||||
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
|
||||
* FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
|
||||
* details.
|
||||
*
|
||||
* You should have received a copy of the Affero GNU General Public License
|
||||
* version 3 along with this program. If not, see http://www.gnu.org/licenses/
|
||||
*
|
||||
* If you have any problem about this python version code, please contact: Rong Gong
|
||||
* rong.gong@upf.edu
|
||||
*
|
||||
* If you have any problem about this algorithm, I suggest you to contact: Matthias Mauch
|
||||
* m.mauch@qmul.ac.uk who is the original C++ version author of this algorithm
|
||||
*
|
||||
* If you want to refer this code, please consider this article:
|
||||
*
|
||||
* M. Mauch and S. Dixon,
|
||||
* “pYIN: A Fundamental Frequency Estimator Using Probabilistic Threshold Distributions”,
|
||||
* in Proceedings of the IEEE International Conference on Acoustics,
|
||||
* Speech, and Signal Processing (ICASSP 2014), 2014.
|
||||
*
|
||||
* M. Mauch, C. Cannam, R. Bittner, G. Fazekas, J. Salamon, J. Dai, J. Bello and S. Dixon,
|
||||
* “Computer-aided Melody Note Transcription Using the Tony Software: Accuracy and Efficiency”,
|
||||
* in Proceedings of the First International Conference on Technologies for
|
||||
* Music Notation and Representation, 2015.
|
||||
'''
|
||||
|
||||
import numpy as np
|
||||
from SparseHMM import SparseHMM
|
||||
from MonoNoteParameters import MonoNoteParameters
|
||||
from math import *
|
||||
from scipy.stats import norm
|
||||
|
||||
class MonoNoteHMM(SparseHMM):
|
||||
def __init__(self):
|
||||
SparseHMM.__init__(self)
|
||||
self.par = MonoNoteParameters()
|
||||
self.pitchDistr = []
|
||||
self.build()
|
||||
|
||||
def calculatedObsProb(self, pitchProb):
|
||||
# pitchProb is a list of pairs (pitches and their probabilities)
|
||||
|
||||
nCandidate = len(pitchProb)
|
||||
|
||||
# what is the probability of pitched
|
||||
pIsPitched = 0.0
|
||||
for iCandidate in range(nCandidate):
|
||||
# pIsPitched = pitchProb[iCandidate].second > pIsPitched ? pitchProb[iCandidate].second : pIsPitched;
|
||||
pIsPitched += pitchProb[iCandidate][1]
|
||||
|
||||
# the pitched probability, check Ryynanen's paper
|
||||
pIsPitched = pIsPitched * (1-self.par.priorWeight) + self.par.priorPitchedProb * self.par.priorWeight
|
||||
|
||||
out = np.zeros((self.par.n,), dtype=np.float64)
|
||||
tempProbSum = 0
|
||||
for i in range(self.par.n):
|
||||
if i % self.par.nSPP != 2:
|
||||
# std::cerr << getMidiPitch(i) << std::endl;
|
||||
tempProb = 0.0
|
||||
if nCandidate > 0:
|
||||
minDist = 10000.0
|
||||
minDistProb = 0.0
|
||||
minDistCandidate = 0
|
||||
for iCandidate in range(nCandidate):
|
||||
currDist = fabs(self.getMidiPitch(i)-pitchProb[iCandidate][0])
|
||||
if (currDist < minDist):
|
||||
minDist = currDist
|
||||
minDistProb = pitchProb[iCandidate][1]
|
||||
minDistCandidate = iCandidate
|
||||
tempProb = pow(minDistProb, self.par.yinTrust) * self.pitchDistr[i].pdf(pitchProb[minDistCandidate][0])
|
||||
else:
|
||||
tempProb = 1
|
||||
tempProbSum += tempProb
|
||||
out[i] = tempProb
|
||||
|
||||
for i in range(self.par.n):
|
||||
if i % self.par.nSPP != 2:
|
||||
if tempProbSum > 0:
|
||||
out[i] = out[i] / tempProbSum * pIsPitched
|
||||
else:
|
||||
# the prob of non pitched
|
||||
out[i] = (1-pIsPitched) / (self.par.nPPS * self.par.nS)
|
||||
|
||||
return out
|
||||
|
||||
def getMidiPitch(self, index):
|
||||
return self.pitchDistr[index].mean()
|
||||
|
||||
def getFrequency(self, index):
|
||||
return 440 * pow(2.0, (self.pitchDistr[index].mean()-69)/12)
|
||||
|
||||
def build(self):
|
||||
# the states are organised as follows:
|
||||
# 0-2. lowest pitch
|
||||
# 0. attack state
|
||||
# 1. stable state
|
||||
# 2. silent state
|
||||
# 3-5. second-lowest pitch
|
||||
# 3. attack state
|
||||
# ...
|
||||
|
||||
# observation distributions
|
||||
for iState in range(self.par.n):
|
||||
self.pitchDistr.append(norm(loc=0, scale=1))
|
||||
if iState % self.par.nSPP == 2:
|
||||
# silent state starts tracking
|
||||
self.init = np.append(self.init, np.float64(1.0/(self.par.nS * self.par.nPPS)))
|
||||
else:
|
||||
self.init = np.append(self.init, np.float64(0.0))
|
||||
|
||||
for iPitch in range(self.par.nS * self.par.nPPS):
|
||||
index = iPitch * self.par.nSPP # each pitch has 3 state
|
||||
mu = self.par.minPitch + iPitch * 1.0/self.par.nPPS
|
||||
self.pitchDistr[index] = norm(loc=mu, scale=self.par.sigmaYinPitchAttack)
|
||||
self.pitchDistr[index+1] = norm(loc=mu, scale=self.par.sigmaYinPitchStable)
|
||||
self.pitchDistr[index+2] = norm(loc=mu, scale=1.0) # dummy
|
||||
|
||||
# this might be the note transition probability function
|
||||
noteDistanceDistr = norm(loc=0, scale=self.par.sigma2Note)
|
||||
|
||||
for iPitch in range(self.par.nS * self.par.nPPS):
|
||||
# loop through all notes and set sparse transition probabilities
|
||||
index = iPitch * self.par.nSPP
|
||||
|
||||
# transitions from attack state
|
||||
self.fromIndex = np.append(self.fromIndex, np.uint64(index))
|
||||
self.toIndex = np.append(self.toIndex, np.uint64(index))
|
||||
self.transProb = np.append(self.transProb, np.float64(self.par.pAttackSelftrans))
|
||||
|
||||
self.fromIndex = np.append(self.fromIndex, np.uint64(index))
|
||||
self.toIndex = np.append(self.toIndex, np.uint64(index+1))
|
||||
self.transProb = np.append(self.transProb, np.float64(1-self.par.pAttackSelftrans))
|
||||
|
||||
# transitions from stable state
|
||||
self.fromIndex = np.append(self.fromIndex, np.uint64(index+1))
|
||||
self.toIndex = np.append(self.toIndex, np.uint64(index+1)) # to itself
|
||||
self.transProb = np.append(self.transProb, np.float64(self.par.pStableSelftrans))
|
||||
|
||||
self.fromIndex = np.append(self.fromIndex, np.uint64(index+1))
|
||||
self.toIndex = np.append(self.toIndex, np.uint64(index+2)) # to silent
|
||||
self.transProb = np.append(self.transProb, np.float64(self.par.pStable2Silent))
|
||||
|
||||
# the "easy" transitions from silent state
|
||||
self.fromIndex = np.append(self.fromIndex, np.uint64(index+2))
|
||||
self.toIndex = np.append(self.toIndex, np.uint64(index+2))
|
||||
self.transProb = np.append(self.transProb, np.float64(self.par.pSilentSelftrans))
|
||||
|
||||
# the more complicated transitions from the silent
|
||||
# this prob only applies to transitions from silent to non silent
|
||||
# which is the note transition
|
||||
probSumSilent = 0.0
|
||||
|
||||
tempTransProbSilent = []
|
||||
for jPitch in range(self.par.nS * self.par.nPPS):
|
||||
fromPitch = iPitch
|
||||
toPitch = jPitch
|
||||
semitoneDistance = fabs(fromPitch - toPitch) * 1.0 / self.par.nPPS
|
||||
|
||||
if semitoneDistance == 0 or \
|
||||
(semitoneDistance > self.par.minSemitoneDistance and semitoneDistance < self.par.maxJump):
|
||||
|
||||
toIndex = jPitch * self.par.nSPP # note attack index
|
||||
|
||||
tempWeightSilent = noteDistanceDistr.pdf(semitoneDistance)
|
||||
probSumSilent += tempWeightSilent
|
||||
|
||||
tempTransProbSilent.append(tempWeightSilent)
|
||||
|
||||
self.fromIndex = np.append(self.fromIndex, np.uint64(index+2)) # from a silence
|
||||
self.toIndex = np.append(self.toIndex, np.uint64(toIndex)) # to an attack
|
||||
|
||||
for i in range(len(tempTransProbSilent)):
|
||||
self.transProb = np.append(self.transProb,
|
||||
np.float64(((1-self.par.pSilentSelftrans) * tempTransProbSilent[i]/probSumSilent)))
|
||||
@@ -0,0 +1,65 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
'''
|
||||
* Copyright (C) 2015 Music Technology Group - Universitat Pompeu Fabra
|
||||
*
|
||||
* This file is part of pypYIN
|
||||
*
|
||||
* pypYIN is free software: you can redistribute it and/or modify it under
|
||||
* the terms of the GNU Affero General Public License as published by the Free
|
||||
* Software Foundation (FSF), either version 3 of the License, or (at your
|
||||
* option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful, but WITHOUT
|
||||
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
|
||||
* FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
|
||||
* details.
|
||||
*
|
||||
* You should have received a copy of the Affero GNU General Public License
|
||||
* version 3 along with this program. If not, see http://www.gnu.org/licenses/
|
||||
*
|
||||
* If you have any problem about this python version code, please contact: Rong Gong
|
||||
* rong.gong@upf.edu
|
||||
*
|
||||
* If you have any problem about this algorithm, I suggest you to contact: Matthias Mauch
|
||||
* m.mauch@qmul.ac.uk who is the original C++ version author of this algorithm
|
||||
*
|
||||
* If you want to refer this code, please consider this article:
|
||||
*
|
||||
* M. Mauch and S. Dixon,
|
||||
* “pYIN: A Fundamental Frequency Estimator Using Probabilistic Threshold Distributions”,
|
||||
* in Proceedings of the IEEE International Conference on Acoustics,
|
||||
* Speech, and Signal Processing (ICASSP 2014), 2014.
|
||||
*
|
||||
* M. Mauch, C. Cannam, R. Bittner, G. Fazekas, J. Salamon, J. Dai, J. Bello and S. Dixon,
|
||||
* “Computer-aided Melody Note Transcription Using the Tony Software: Accuracy and Efficiency”,
|
||||
* in Proceedings of the First International Conference on Technologies for
|
||||
* Music Notation and Representation, 2015.
|
||||
'''
|
||||
|
||||
import numpy as np
|
||||
|
||||
class MonoNoteParameters(object):
|
||||
def __init__(self):
|
||||
self.minPitch = 35
|
||||
self.nPPS = 3 # 3 steps per semitone
|
||||
self.nS = 69
|
||||
self.nSPP = 3 # states per pitch
|
||||
self.n = 0
|
||||
self.initPi = np.array([], dtype=np.float64)
|
||||
self.pAttackSelftrans = 0.9
|
||||
self.pStableSelftrans = 0.99
|
||||
self.pStable2Silent = 0.01
|
||||
self.pSilentSelftrans = 0.9999
|
||||
self.sigma2Note = 0.7
|
||||
self.maxJump = 13.0
|
||||
self.pInterSelftrans = 0.0
|
||||
self.priorPitchedProb = 0.7
|
||||
self.priorWeight = 0.5
|
||||
self.minSemitoneDistance = 0.5
|
||||
self.sigmaYinPitchAttack = 5.0
|
||||
self.sigmaYinPitchStable = 0.8
|
||||
self.sigmaYinPitchInter = 0.1
|
||||
self.yinTrust = 0.1
|
||||
|
||||
self.n = self.nPPS * self.nS * self.nSPP
|
||||
@@ -0,0 +1,75 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
'''
|
||||
* Copyright (C) 2015 Music Technology Group - Universitat Pompeu Fabra
|
||||
*
|
||||
* This file is part of pypYIN
|
||||
*
|
||||
* pypYIN is free software: you can redistribute it and/or modify it under
|
||||
* the terms of the GNU Affero General Public License as published by the Free
|
||||
* Software Foundation (FSF), either version 3 of the License, or (at your
|
||||
* option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful, but WITHOUT
|
||||
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
|
||||
* FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
|
||||
* details.
|
||||
*
|
||||
* You should have received a copy of the Affero GNU General Public License
|
||||
* version 3 along with this program. If not, see http://www.gnu.org/licenses/
|
||||
*
|
||||
* If you have any problem about this python version code, please contact: Rong Gong
|
||||
* rong.gong@upf.edu
|
||||
*
|
||||
* If you have any problem about this algorithm, I suggest you to contact: Matthias Mauch
|
||||
* m.mauch@qmul.ac.uk who is the original C++ version author of this algorithm
|
||||
*
|
||||
* If you want to refer this code, please consider this article:
|
||||
*
|
||||
* M. Mauch and S. Dixon,
|
||||
* “pYIN: A Fundamental Frequency Estimator Using Probabilistic Threshold Distributions”,
|
||||
* in Proceedings of the IEEE International Conference on Acoustics,
|
||||
* Speech, and Signal Processing (ICASSP 2014), 2014.
|
||||
*
|
||||
* M. Mauch, C. Cannam, R. Bittner, G. Fazekas, J. Salamon, J. Dai, J. Bello and S. Dixon,
|
||||
* “Computer-aided Melody Note Transcription Using the Tony Software: Accuracy and Efficiency”,
|
||||
* in Proceedings of the First International Conference on Technologies for
|
||||
* Music Notation and Representation, 2015.
|
||||
'''
|
||||
|
||||
from MonoPitchHMM import MonoPitchHMM
|
||||
import numpy as np
|
||||
from math import *
|
||||
|
||||
class MonoPitch(object):
|
||||
def __init__(self):
|
||||
self.hmm = MonoPitchHMM()
|
||||
|
||||
def process(self, pitchProb):
|
||||
obsProb = [self.hmm.calculatedObsProb(pitchProb[0]),]
|
||||
for iFrame in range(1,len(pitchProb)):
|
||||
obsProb += [self.hmm.calculatedObsProb(pitchProb[iFrame])]
|
||||
|
||||
out = np.array([], dtype=np.float32)
|
||||
|
||||
path, scale = self.hmm.decodeViterbi(obsProb)
|
||||
|
||||
for iFrame in range(len(path)):
|
||||
hmmFreq = self.hmm.m_freqs[path[iFrame]]
|
||||
bestFreq = 0.0
|
||||
leastDist = 10000.0
|
||||
if hmmFreq > 0:
|
||||
# This was a Yin estimate, so try to get original pitch estimate back
|
||||
# ... a bit hacky, since we could have direclty saved the frequency
|
||||
# that was assigned to the HMM bin in hmm.calculateObsProb -- but would
|
||||
# have had to rethink the interface of that method.
|
||||
for iPitch in range(len(pitchProb[iFrame])):
|
||||
freq = 440. * pow(2.0, (pitchProb[iFrame][iPitch][0] - 69)/12.0)
|
||||
dist = fabs(hmmFreq-freq)
|
||||
if dist < leastDist:
|
||||
leastDist = dist
|
||||
bestFreq = freq
|
||||
else:
|
||||
bestFreq = hmmFreq
|
||||
out = np.append(out, bestFreq)
|
||||
return out
|
||||
@@ -0,0 +1,136 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
'''
|
||||
* Copyright (C) 2015 Music Technology Group - Universitat Pompeu Fabra
|
||||
*
|
||||
* This file is part of pypYIN
|
||||
*
|
||||
* pypYIN is free software: you can redistribute it and/or modify it under
|
||||
* the terms of the GNU Affero General Public License as published by the Free
|
||||
* Software Foundation (FSF), either version 3 of the License, or (at your
|
||||
* option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful, but WITHOUT
|
||||
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
|
||||
* FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
|
||||
* details.
|
||||
*
|
||||
* You should have received a copy of the Affero GNU General Public License
|
||||
* version 3 along with this program. If not, see http://www.gnu.org/licenses/
|
||||
*
|
||||
* If you have any problem about this python version code, please contact: Rong Gong
|
||||
* rong.gong@upf.edu
|
||||
*
|
||||
* If you have any problem about this algorithm, I suggest you to contact: Matthias Mauch
|
||||
* m.mauch@qmul.ac.uk who is the original C++ version author of this algorithm
|
||||
*
|
||||
* If you want to refer this code, please consider this article:
|
||||
*
|
||||
* M. Mauch and S. Dixon,
|
||||
* “pYIN: A Fundamental Frequency Estimator Using Probabilistic Threshold Distributions”,
|
||||
* in Proceedings of the IEEE International Conference on Acoustics,
|
||||
* Speech, and Signal Processing (ICASSP 2014), 2014.
|
||||
*
|
||||
* M. Mauch, C. Cannam, R. Bittner, G. Fazekas, J. Salamon, J. Dai, J. Bello and S. Dixon,
|
||||
* “Computer-aided Melody Note Transcription Using the Tony Software: Accuracy and Efficiency”,
|
||||
* in Proceedings of the First International Conference on Technologies for
|
||||
* Music Notation and Representation, 2015.
|
||||
'''
|
||||
|
||||
from SparseHMM import SparseHMM
|
||||
from math import *
|
||||
import numpy as np
|
||||
|
||||
|
||||
class MonoPitchHMM(SparseHMM):
|
||||
|
||||
def __init__(self):
|
||||
SparseHMM.__init__(self)
|
||||
self.m_minFreq = 61.735
|
||||
self.m_nBPS = 5
|
||||
self.m_nPitch = 0
|
||||
self.m_selfTrans = 0.99
|
||||
self.m_yinTrust = 0.5
|
||||
self.m_transitionWidth = 5*(np.uint64(self.m_nBPS/2)) + 1 # 2 semi-tones of frame jump
|
||||
self.m_nPitch = 69 * self.m_nBPS # 69 semi-tone, each semi-tone divided to 5, step is 20 cents
|
||||
self.m_freqs = np.zeros(2*self.m_nPitch, dtype=np.float64)
|
||||
for iPitch in range(self.m_nPitch):
|
||||
self.m_freqs[iPitch] = self.m_minFreq * pow(2, iPitch * 1.0 / (12 * self.m_nBPS)) # 0 to m_nPitch-1 positive pitch
|
||||
self.m_freqs[iPitch+self.m_nPitch] = -self.m_freqs[iPitch] # m_nPitch to 2*m_nPitch-1 negative pitch
|
||||
self.build()
|
||||
|
||||
def calculatedObsProb(self, pitchProb):
|
||||
# pitchProb is the pitch candidates of one frame
|
||||
out = np.zeros((2*self.m_nPitch+1,), dtype=np.float64)
|
||||
probYinPitched = 0.0
|
||||
# BIN THE PITCHES
|
||||
for iPair in range(len(pitchProb)):
|
||||
freq = 440. * pow(2.0, (pitchProb[iPair][0] - 69)/12.0)
|
||||
if freq <= self.m_minFreq: continue
|
||||
d = 0
|
||||
oldd = 1000
|
||||
for iPitch in range(self.m_nPitch):
|
||||
d = fabs(freq-self.m_freqs[iPitch])
|
||||
if oldd < d and iPitch > 0:
|
||||
# previous bin must have been the closest
|
||||
# when iPitch move far away from freq candidate
|
||||
# add pitch prob to probYinPitched, break
|
||||
out[iPitch-1] = pitchProb[iPair][1]
|
||||
probYinPitched += out[iPitch-1]
|
||||
break
|
||||
oldd = d
|
||||
|
||||
probReallyPitched = self.m_yinTrust * probYinPitched
|
||||
# damn, I forget what this is all about...
|
||||
# don't understand this part, inspired by note tracking method
|
||||
for iPitch in range(self.m_nPitch):
|
||||
if probYinPitched > 0: out[iPitch] *= (probReallyPitched/probYinPitched) # times self.m_yinTrust
|
||||
# non voiced pitch obs
|
||||
# 1 - sum(pitchProb)*0.5
|
||||
# this observation prob is very small, but equal for every unvoiced state
|
||||
# so that the sum of them are 1 - sum(pitchProb)*0.5
|
||||
out[iPitch+self.m_nPitch] = (1 - probReallyPitched) / self.m_nPitch
|
||||
|
||||
return out
|
||||
|
||||
def build(self):
|
||||
|
||||
# initial vector, uniform distribution
|
||||
self.init = np.ones((2*self.m_nPitch), dtype=np.float64) * 1.0/2*self.m_nPitch
|
||||
|
||||
# transitions
|
||||
for iPitch in range(self.m_nPitch):
|
||||
theoreticalMinNextPitch = int(iPitch)-int(self.m_transitionWidth/2)
|
||||
minNextPitch = iPitch-int(self.m_transitionWidth/2) if iPitch>self.m_transitionWidth/2 else 0
|
||||
maxNextPitch = iPitch+int(self.m_transitionWidth/2) if iPitch<self.m_nPitch-self.m_transitionWidth/2 else self.m_nPitch-1
|
||||
|
||||
# weight vector, triangle, maximum is at iPitch
|
||||
weightSum = 0
|
||||
weights = np.array([], dtype=np.float64)
|
||||
for i in range(minNextPitch, maxNextPitch+1):
|
||||
if i <= iPitch:
|
||||
weights = np.append(weights, np.float64(i-theoreticalMinNextPitch+1))
|
||||
else:
|
||||
weights = np.append(weights, np.float64(iPitch-theoreticalMinNextPitch+1-(i-iPitch)))
|
||||
weightSum += weights[len(weights)-1]
|
||||
|
||||
for i in range(minNextPitch, maxNextPitch+1):
|
||||
# from voiced to voiced
|
||||
self.fromIndex = np.append(self.fromIndex, np.uint64(iPitch))
|
||||
self.toIndex = np.append(self.toIndex, np.uint64(i))
|
||||
self.transProb = np.append(self.transProb, np.float64(weights[i-minNextPitch] / weightSum * self.m_selfTrans))
|
||||
|
||||
# from voiced to non voiced
|
||||
self.fromIndex = np.append(self.fromIndex, np.uint64(iPitch))
|
||||
self.toIndex = np.append(self.toIndex, np.uint64(i+self.m_nPitch))
|
||||
self.transProb = np.append(self.transProb, np.float64(weights[i-minNextPitch] / weightSum * (1-self.m_selfTrans)))
|
||||
|
||||
# from non voiced to non voiced
|
||||
self.fromIndex = np.append(self.fromIndex, np.uint64(iPitch+self.m_nPitch))
|
||||
self.toIndex = np.append(self.toIndex, np.uint64(i+self.m_nPitch))
|
||||
self.transProb = np.append(self.transProb, np.float64(weights[i-minNextPitch] / weightSum * self.m_selfTrans))
|
||||
|
||||
# from non voiced to voiced
|
||||
self.fromIndex = np.append(self.fromIndex, np.uint64(iPitch+self.m_nPitch))
|
||||
self.toIndex = np.append(self.toIndex, np.uint64(i))
|
||||
self.transProb = np.append(self.transProb, np.float64(weights[i-minNextPitch] / weightSum * (1-self.m_selfTrans)))
|
||||
@@ -0,0 +1,129 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
'''
|
||||
* Copyright (C) 2015 Music Technology Group - Universitat Pompeu Fabra
|
||||
*
|
||||
* This file is part of pypYIN
|
||||
*
|
||||
* pypYIN is free software: you can redistribute it and/or modify it under
|
||||
* the terms of the GNU Affero General Public License as published by the Free
|
||||
* Software Foundation (FSF), either version 3 of the License, or (at your
|
||||
* option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful, but WITHOUT
|
||||
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
|
||||
* FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
|
||||
* details.
|
||||
*
|
||||
* You should have received a copy of the Affero GNU General Public License
|
||||
* version 3 along with this program. If not, see http://www.gnu.org/licenses/
|
||||
*
|
||||
* If you have any problem about this python version code, please contact: Rong Gong
|
||||
* rong.gong@upf.edu
|
||||
*
|
||||
* If you have any problem about this algorithm, I suggest you to contact: Matthias Mauch
|
||||
* m.mauch@qmul.ac.uk who is the original C++ version author of this algorithm
|
||||
*
|
||||
* If you want to refer this code, please consider this article:
|
||||
*
|
||||
* M. Mauch and S. Dixon,
|
||||
* “pYIN: A Fundamental Frequency Estimator Using Probabilistic Threshold Distributions”,
|
||||
* in Proceedings of the IEEE International Conference on Acoustics,
|
||||
* Speech, and Signal Processing (ICASSP 2014), 2014.
|
||||
*
|
||||
* M. Mauch, C. Cannam, R. Bittner, G. Fazekas, J. Salamon, J. Dai, J. Bello and S. Dixon,
|
||||
* “Computer-aided Melody Note Transcription Using the Tony Software: Accuracy and Efficiency”,
|
||||
* in Proceedings of the First International Conference on Technologies for
|
||||
* Music Notation and Representation, 2015.
|
||||
'''
|
||||
|
||||
import numpy as np
|
||||
from math import *
|
||||
|
||||
class SparseHMM(object):
|
||||
|
||||
def __init__(self):
|
||||
self.init = np.array([], dtype=np.float64)
|
||||
self.transProb = np.array([], dtype=np.float64)
|
||||
self.fromIndex = np.array([], dtype=np.uint64)
|
||||
self.toIndex = np.array([],dtype=np.uint64)
|
||||
|
||||
def calculatedObsProb(self, data):
|
||||
# to be overloaded
|
||||
return data
|
||||
|
||||
def decodeViterbi(self, obsProb):
|
||||
|
||||
if len(obsProb) < 1: return np.array([], dtype=np.int)
|
||||
|
||||
nState = len(self.init)
|
||||
nFrame = len(obsProb)
|
||||
|
||||
# check for consistency
|
||||
nTrans = len(self.transProb)
|
||||
|
||||
# declaring variables
|
||||
scale = np.array([], dtype=np.float64)
|
||||
delta = np.zeros((nState,), dtype=np.float64)
|
||||
oldDelta = np.zeros((nState,), dtype=np.float64)
|
||||
path = np.ones(nFrame, dtype=np.int) * (nState-1) # the final output path
|
||||
|
||||
deltasum = 0
|
||||
|
||||
# initialise first frame in time 1, rabiner 32a
|
||||
for iState in range(nState):
|
||||
oldDelta[iState] = self.init[iState] * obsProb[0][iState]
|
||||
deltasum += oldDelta[iState]
|
||||
|
||||
for iState in range(nState):
|
||||
oldDelta[iState] /= deltasum # normalise (scale)
|
||||
|
||||
scale = np.append(scale, np.double(1.0/deltasum))
|
||||
psi = [np.zeros(nState, dtype=np.int),] # matrix of remembered indices of the best transitions
|
||||
|
||||
# rest of forward step
|
||||
for iFrame in range(1, nFrame):
|
||||
deltasum = 0
|
||||
psi = psi + [np.zeros(nState, dtype=np.int)]
|
||||
|
||||
# calculate best previous state for every current state
|
||||
|
||||
# this is the "sparse" loop
|
||||
for iTrans in range(nTrans):
|
||||
fromState = self.fromIndex[iTrans]
|
||||
toState = self.toIndex[iTrans]
|
||||
currentTransProb = self.transProb[iTrans]
|
||||
|
||||
currentValue = oldDelta[fromState] * currentTransProb
|
||||
if (currentValue > delta[toState]): # to find the maximum
|
||||
delta[toState] = currentValue # just change the toState delta, will be multiplied by the right obs later!
|
||||
psi[iFrame][toState] = fromState # rabiner 33b
|
||||
|
||||
for jState in range(nState):
|
||||
delta[jState] *= obsProb[iFrame][jState]
|
||||
deltasum += delta[jState]
|
||||
|
||||
if deltasum > 0:
|
||||
for iState in range(nState):
|
||||
oldDelta[iState] = delta[iState] / deltasum # normalise (scale)
|
||||
delta[iState] = 0
|
||||
scale = np.append(scale, np.double(1.0/deltasum))
|
||||
else:
|
||||
print "WARNING: Viterbi has been fed some zero probabilities, at least they become zero at frame " + str(iFrame) + " in combination with the model."
|
||||
for iState in range(nState):
|
||||
oldDelta[iState] = 1.0/nState
|
||||
delta[iState] = 0
|
||||
scale = np.append(scale, np.double(1.0/deltasum))
|
||||
|
||||
# initialise backward step
|
||||
bestValue = 0
|
||||
for iState in range(nState):
|
||||
currentValue = oldDelta[iState] # use directly the normalised delta
|
||||
if currentValue > bestValue:
|
||||
bestValue = currentValue # rabiner 34b
|
||||
path[nFrame-1] = iState # path of last frame
|
||||
|
||||
for iFrame in reversed(range(nFrame-1)):
|
||||
path[iFrame] = psi[iFrame+1][path[iFrame+1]]
|
||||
|
||||
return path, scale
|
||||
@@ -0,0 +1,120 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
'''
|
||||
* Copyright (C) 2015 Music Technology Group - Universitat Pompeu Fabra
|
||||
*
|
||||
* This file is part of pypYIN
|
||||
*
|
||||
* pypYIN is free software: you can redistribute it and/or modify it under
|
||||
* the terms of the GNU Affero General Public License as published by the Free
|
||||
* Software Foundation (FSF), either version 3 of the License, or (at your
|
||||
* option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful, but WITHOUT
|
||||
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
|
||||
* FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
|
||||
* details.
|
||||
*
|
||||
* You should have received a copy of the Affero GNU General Public License
|
||||
* version 3 along with this program. If not, see http://www.gnu.org/licenses/
|
||||
*
|
||||
* If you have any problem about this python version code, please contact: Rong Gong
|
||||
* rong.gong@upf.edu
|
||||
*
|
||||
* If you have any problem about this algorithm, I suggest you to contact: Matthias Mauch
|
||||
* m.mauch@qmul.ac.uk who is the original C++ version author of this algorithm
|
||||
*
|
||||
* If you want to refer this code, please consider this article:
|
||||
*
|
||||
* M. Mauch and S. Dixon,
|
||||
* “pYIN: A Fundamental Frequency Estimator Using Probabilistic Threshold Distributions”,
|
||||
* in Proceedings of the IEEE International Conference on Acoustics,
|
||||
* Speech, and Signal Processing (ICASSP 2014), 2014.
|
||||
*
|
||||
* M. Mauch, C. Cannam, R. Bittner, G. Fazekas, J. Salamon, J. Dai, J. Bello and S. Dixon,
|
||||
* “Computer-aided Melody Note Transcription Using the Tony Software: Accuracy and Efficiency”,
|
||||
* in Proceedings of the First International Conference on Technologies for
|
||||
* Music Notation and Representation, 2015.
|
||||
'''
|
||||
|
||||
import numpy as np
|
||||
from math import *
|
||||
import YinUtil
|
||||
|
||||
class Yin(object):
|
||||
|
||||
def __init__(self):
|
||||
self.m_frameSize = 2048
|
||||
self.m_inputSampleRate = 44100
|
||||
self.m_thresh = 0.2
|
||||
self.m_threshDistr = 2
|
||||
self.m_yinBufferSize = self.m_frameSize/2
|
||||
self.m_fast = True
|
||||
|
||||
def Yin(self, frameSize, inputSampleRate, thresh = 0.2, fast = True):
|
||||
self.m_frameSize = frameSize
|
||||
self.m_inputSampleRate = inputSampleRate
|
||||
self.m_thresh = thresh
|
||||
self.m_threshDistr = 2
|
||||
self.m_yinBufferSize = frameSize/2
|
||||
self.m_fast = fast
|
||||
|
||||
class YinOutput(object):
|
||||
|
||||
def __init__(self, f0 = 0.0, periodicity = 0.0, rms = 0.0):
|
||||
self.f0 = f0
|
||||
self.periodicity = periodicity
|
||||
self.rms = rms
|
||||
self.salience = np.array([], dtype=np.float64)
|
||||
self.freqProb = np.array([], dtype=np.float64)
|
||||
|
||||
def processProbabilisticYin(self, input):
|
||||
|
||||
# calculate aperiodicity function for all periods, output stores in yinBuffer
|
||||
if self.m_fast:
|
||||
yinBuffer = YinUtil.fastDifference(input, self.m_yinBufferSize)
|
||||
else:
|
||||
yinBuffer = YinUtil.slowDifference(input, self.m_yinBufferSize)
|
||||
|
||||
yinBuffer = YinUtil.cumulativeDifference(yinBuffer ,self.m_yinBufferSize)
|
||||
|
||||
peakProbability = YinUtil.yinProb(yinBuffer, self.m_threshDistr, self.m_yinBufferSize, 0, 0)
|
||||
|
||||
# calculate overall "probability" from peak probability, overall "probability" probSum seems never be used
|
||||
rms = sqrt(YinUtil.sumSquare(input, 0, self.m_yinBufferSize)/self.m_yinBufferSize)
|
||||
yo = Yin.YinOutput(0.0, 0.0, rms)
|
||||
|
||||
firstStack = False
|
||||
for iBuf in range(self.m_yinBufferSize):
|
||||
yo.salience = np.append(yo.salience, peakProbability[iBuf])
|
||||
|
||||
# if peakProb > 0, a fundamental frequency candidate is generated
|
||||
if peakProbability[iBuf] > 0:
|
||||
currentF0 = self.m_inputSampleRate * (1.0 / YinUtil.parabolicInterpolation(yinBuffer, iBuf, self.m_yinBufferSize))
|
||||
if firstStack == False:
|
||||
yo.freqProb = np.array([np.array([currentF0, peakProbability[iBuf]], dtype=np.float64),])
|
||||
firstStack = True
|
||||
else:
|
||||
yo.freqProb = np.vstack((yo.freqProb, np.array([currentF0, peakProbability[iBuf]], dtype=np.float64)))
|
||||
return yo
|
||||
|
||||
def setThreshold(self, parameter):
|
||||
|
||||
self.m_thresh = parameter
|
||||
return 0
|
||||
|
||||
def setThresholdDistr(self, parameter):
|
||||
|
||||
self.m_threshDistr = parameter
|
||||
return 0
|
||||
|
||||
def setFrameSize(self, parameter):
|
||||
|
||||
m_frameSize = parameter
|
||||
m_yinBufferSize = m_frameSize/2
|
||||
return 0
|
||||
|
||||
def setFast(self, parameter):
|
||||
|
||||
m_fast = parameter
|
||||
return 0
|
||||
@@ -0,0 +1,213 @@
|
||||
from math import *
|
||||
import numpy as np
|
||||
|
||||
def slowDifference(input, yinBufferSize):
|
||||
|
||||
yinBuffer = np.zeros((yinBufferSize,), dtype=np.float64)
|
||||
|
||||
startPoint = 0
|
||||
endPoint = 0
|
||||
for i in range(yinBufferSize):
|
||||
startPoint = yinBufferSize/2 - i/2
|
||||
endPoint = startPoint + yinBufferSize
|
||||
for j in range(startPoint,endPoint):
|
||||
delta = input[i+j] - input[j]
|
||||
yinBuffer[i] += delta * delta
|
||||
|
||||
return yinBuffer
|
||||
|
||||
def fastDifference(input, yinBufferSize):
|
||||
|
||||
frameSize = 2 * yinBufferSize
|
||||
|
||||
# DECLARE AND INITIALISE
|
||||
yinBuffer = np.zeros((yinBufferSize,), dtype=np.float64)
|
||||
powerTerms = np.zeros((yinBufferSize,), dtype=np.float64)
|
||||
|
||||
kernel = np.zeros((frameSize,), dtype=np.float64)
|
||||
yinStyleACFReal = np.zeros((frameSize,), dtype=np.float64)
|
||||
yinStyleACFImag = np.zeros((frameSize,), dtype=np.float64)
|
||||
|
||||
# POWER TERM CALCULATION
|
||||
# ... for the power terms in equation (7) in the Yin paper
|
||||
for j in range(yinBufferSize):
|
||||
powerTerms[0] += input[j] * input[j]
|
||||
|
||||
# now iteratively calculate all others, second term in equation (7)
|
||||
for tau in range(1, yinBufferSize):
|
||||
powerTerms[tau] = powerTerms[tau-1] - input[tau-1] * input[tau-1] + input[tau+yinBufferSize] * input[tau+yinBufferSize]
|
||||
|
||||
# YIN-STYLE AUTOCORRELATION via FFT
|
||||
# 1. data
|
||||
at = np.fft.fft(input, frameSize)
|
||||
audioTransformedReal = at.real
|
||||
audioTransformedImag = at.imag
|
||||
|
||||
# 2. half of the data, disguised as a convolution kernel
|
||||
for j in range(yinBufferSize):
|
||||
kernel[j] = input[yinBufferSize-1-j]
|
||||
kt = np.fft.fft(kernel, frameSize)
|
||||
kernelTransformedReal = kt.real
|
||||
kernelTransformedImag = kt.imag
|
||||
|
||||
# 3. convolution via complex multiplication
|
||||
for j in range(frameSize):
|
||||
yinStyleACFReal[j] = audioTransformedReal[j]*kernelTransformedReal[j] - audioTransformedImag[j]*kernelTransformedImag[j]
|
||||
yinStyleACFImag[j] = audioTransformedReal[j]*kernelTransformedImag[j] + audioTransformedImag[j]*kernelTransformedReal[j]
|
||||
yinStyleACF = np.array(yinStyleACFReal, dtype=np.float64) + np.array(yinStyleACFImag, dtype=np.float64)*1j
|
||||
iat = np.fft.ifft(yinStyleACF, frameSize)
|
||||
|
||||
# CALCULATION OF difference function
|
||||
for j in range(yinBufferSize):
|
||||
yinBuffer[j] = powerTerms[0] + powerTerms[j] - 2 * iat.real[j+yinBufferSize-1]
|
||||
|
||||
return yinBuffer
|
||||
|
||||
def cumulativeDifference(yinBuffer ,yinBufferSize):
|
||||
|
||||
yinBuffer[0] = 1.0
|
||||
runningSum = 0.0
|
||||
|
||||
for tau in range(1, yinBufferSize):
|
||||
runningSum += yinBuffer[tau]
|
||||
if runningSum == 0:
|
||||
yinBuffer[tau] = 1
|
||||
else:
|
||||
yinBuffer[tau] *= tau / runningSum
|
||||
|
||||
return yinBuffer
|
||||
|
||||
uniformDist = [0.0100000,0.0100000,0.0100000,0.0100000,0.0100000,0.0100000,0.0100000,0.0100000,0.0100000,0.0100000,0.0100000,0.0100000,0.0100000,0.0100000,0.0100000,0.0100000,0.0100000,0.0100000,0.0100000,0.0100000,0.0100000,0.0100000,0.0100000,0.0100000,0.0100000,0.0100000,0.0100000,0.0100000,0.0100000,0.0100000,0.0100000,0.0100000,0.0100000,0.0100000,0.0100000,0.0100000,0.0100000,0.0100000,0.0100000,0.0100000,0.0100000,0.0100000,0.0100000,0.0100000,0.0100000,0.0100000,0.0100000,0.0100000,0.0100000,0.0100000,0.0100000,0.0100000,0.0100000,0.0100000,0.0100000,0.0100000,0.0100000,0.0100000,0.0100000,0.0100000,0.0100000,0.0100000,0.0100000,0.0100000,0.0100000,0.0100000,0.0100000,0.0100000,0.0100000,0.0100000,0.0100000,0.0100000,0.0100000,0.0100000,0.0100000,0.0100000,0.0100000,0.0100000,0.0100000,0.0100000,0.0100000,0.0100000,0.0100000,0.0100000,0.0100000,0.0100000,0.0100000,0.0100000,0.0100000,0.0100000,0.0100000,0.0100000,0.0100000,0.0100000,0.0100000,0.0100000,0.0100000,0.0100000,0.0100000,0.0100000]
|
||||
betaDist1 = [0.028911,0.048656,0.061306,0.068539,0.071703,0.071877,0.069915,0.066489,0.062117,0.057199,0.052034,0.046844,0.041786,0.036971,0.032470,0.028323,0.024549,0.021153,0.018124,0.015446,0.013096,0.011048,0.009275,0.007750,0.006445,0.005336,0.004397,0.003606,0.002945,0.002394,0.001937,0.001560,0.001250,0.000998,0.000792,0.000626,0.000492,0.000385,0.000300,0.000232,0.000179,0.000137,0.000104,0.000079,0.000060,0.000045,0.000033,0.000024,0.000018,0.000013,0.000009,0.000007,0.000005,0.000003,0.000002,0.000002,0.000001,0.000001,0.000001,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000]
|
||||
betaDist2 = [0.012614,0.022715,0.030646,0.036712,0.041184,0.044301,0.046277,0.047298,0.047528,0.047110,0.046171,0.044817,0.043144,0.041231,0.039147,0.036950,0.034690,0.032406,0.030133,0.027898,0.025722,0.023624,0.021614,0.019704,0.017900,0.016205,0.014621,0.013148,0.011785,0.010530,0.009377,0.008324,0.007366,0.006497,0.005712,0.005005,0.004372,0.003806,0.003302,0.002855,0.002460,0.002112,0.001806,0.001539,0.001307,0.001105,0.000931,0.000781,0.000652,0.000542,0.000449,0.000370,0.000303,0.000247,0.000201,0.000162,0.000130,0.000104,0.000082,0.000065,0.000051,0.000039,0.000030,0.000023,0.000018,0.000013,0.000010,0.000007,0.000005,0.000004,0.000003,0.000002,0.000001,0.000001,0.000001,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000]
|
||||
betaDist3 = [0.006715,0.012509,0.017463,0.021655,0.025155,0.028031,0.030344,0.032151,0.033506,0.034458,0.035052,0.035331,0.035332,0.035092,0.034643,0.034015,0.033234,0.032327,0.031314,0.030217,0.029054,0.027841,0.026592,0.025322,0.024042,0.022761,0.021489,0.020234,0.019002,0.017799,0.016630,0.015499,0.014409,0.013362,0.012361,0.011407,0.010500,0.009641,0.008830,0.008067,0.007351,0.006681,0.006056,0.005475,0.004936,0.004437,0.003978,0.003555,0.003168,0.002814,0.002492,0.002199,0.001934,0.001695,0.001481,0.001288,0.001116,0.000963,0.000828,0.000708,0.000603,0.000511,0.000431,0.000361,0.000301,0.000250,0.000206,0.000168,0.000137,0.000110,0.000088,0.000070,0.000055,0.000043,0.000033,0.000025,0.000019,0.000014,0.000010,0.000007,0.000005,0.000004,0.000002,0.000002,0.000001,0.000001,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000]
|
||||
betaDist4 = [0.003996,0.007596,0.010824,0.013703,0.016255,0.018501,0.020460,0.022153,0.023597,0.024809,0.025807,0.026607,0.027223,0.027671,0.027963,0.028114,0.028135,0.028038,0.027834,0.027535,0.027149,0.026687,0.026157,0.025567,0.024926,0.024240,0.023517,0.022763,0.021983,0.021184,0.020371,0.019548,0.018719,0.017890,0.017062,0.016241,0.015428,0.014627,0.013839,0.013068,0.012315,0.011582,0.010870,0.010181,0.009515,0.008874,0.008258,0.007668,0.007103,0.006565,0.006053,0.005567,0.005107,0.004673,0.004264,0.003880,0.003521,0.003185,0.002872,0.002581,0.002312,0.002064,0.001835,0.001626,0.001434,0.001260,0.001102,0.000959,0.000830,0.000715,0.000612,0.000521,0.000440,0.000369,0.000308,0.000254,0.000208,0.000169,0.000136,0.000108,0.000084,0.000065,0.000050,0.000037,0.000027,0.000019,0.000014,0.000009,0.000006,0.000004,0.000002,0.000001,0.000001,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000]
|
||||
single10 = [0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,1.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000]
|
||||
single15 = [0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,1.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000]
|
||||
single20 = [0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,1.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000]
|
||||
|
||||
def yinProb(yinBuffer, prior, yinBufferSize, minTau0, maxTau0):
|
||||
|
||||
minTau = 2
|
||||
maxTau = yinBufferSize
|
||||
|
||||
# adapt period range, if necessary
|
||||
if minTau0 > 0 and minTau0 < maxTau0: minTau = minTau0
|
||||
if maxTau0 > 0 and maxTau0 < yinBufferSize and maxTau0 > minTau: maxTau = maxTau0
|
||||
|
||||
minWeight = 0.01
|
||||
thresholds = np.array([], dtype=np.float32)
|
||||
distribution = np.array([], dtype=np.float32)
|
||||
peakProb = np.zeros((yinBufferSize,), dtype=np.float64)
|
||||
|
||||
nThreshold = 100
|
||||
nThresholdInt = nThreshold
|
||||
|
||||
for i in range(nThresholdInt):
|
||||
|
||||
thresholds = np.append(thresholds, np.double(0.01 + i*0.01))
|
||||
|
||||
if prior == 0:
|
||||
distribution = np.append(distribution, uniformDist[i])
|
||||
elif prior == 1:
|
||||
distribution = np.append(distribution, betaDist1[i])
|
||||
elif prior == 2:
|
||||
distribution = np.append(distribution, betaDist2[i])
|
||||
elif prior == 3:
|
||||
distribution = np.append(distribution, betaDist3[i])
|
||||
elif prior == 4:
|
||||
distribution = np.append(distribution, betaDist4[i])
|
||||
elif prior == 5:
|
||||
distribution = np.append(distribution, single10[i])
|
||||
elif prior == 6:
|
||||
distribution = np.append(distribution, single15[i])
|
||||
elif prior == 7:
|
||||
distribution = np.append(distribution, single20[i])
|
||||
else:
|
||||
distribution = np.append(distribution, uniformDist[i])
|
||||
|
||||
tau = minTau
|
||||
minInd = 0
|
||||
minVal = 42.0
|
||||
sumProb = 0.0
|
||||
|
||||
while tau+1 < maxTau:
|
||||
# yinBuffer < 1 && ...
|
||||
if yinBuffer[tau] < thresholds[len(thresholds)-1] and yinBuffer[tau+1] < yinBuffer[tau]:
|
||||
# search for all dip points
|
||||
while tau + 1 < maxTau and yinBuffer[tau+1] < yinBuffer[tau]:
|
||||
tau += 1
|
||||
# tau is now local minimum,
|
||||
# because it's the turning point from yinBuffer[tau+1] < yinBuffer[tau] to yinBuffer[tau+1] >= yinBuffer[tau]
|
||||
if yinBuffer[tau] < minVal and tau > 2:
|
||||
minVal = yinBuffer[tau] # mininum d'
|
||||
minInd = tau
|
||||
currThreshInd = nThresholdInt-1
|
||||
# formula (4), the threshold is on y-axis, the probability of P is the cumulation of distribution
|
||||
# when d'(tau) < threshold
|
||||
while thresholds[currThreshInd] > yinBuffer[tau] and currThreshInd > -1:
|
||||
peakProb[tau] += distribution[currThreshInd]
|
||||
currThreshInd -= 1
|
||||
|
||||
sumProb += peakProb[tau]
|
||||
tau += 1
|
||||
else:
|
||||
tau += 1
|
||||
|
||||
if peakProb[minInd] > 1:
|
||||
print "WARNING: yin has prob > 1 ??? I'm returning all zeros instead."
|
||||
return np.zeros((yinBufferSize,), dtype=np.float64)
|
||||
|
||||
nonPeakProb = 1.0
|
||||
if sumProb > 0:
|
||||
for i in range(minTau, maxTau):
|
||||
# nomalization, the max prob will be peakProb[minInd]
|
||||
peakProb[i] = peakProb[i] / sumProb * peakProb[minInd]
|
||||
nonPeakProb -= peakProb[i]
|
||||
if minInd > 0:
|
||||
# adds nonPeakProb only for the prob with minimum d(tau)
|
||||
# because here we have a small threshold s, for all tau d'(tau) > s
|
||||
# we choose tau as the index of global minimum of d'
|
||||
peakProb[minInd] += nonPeakProb * minWeight
|
||||
|
||||
return peakProb
|
||||
|
||||
def parabolicInterpolation(yinBuffer, tau, yinBufferSize):
|
||||
|
||||
# this is taken almost literally from Joren Six's Java implementation
|
||||
if tau == yinBufferSize: # not valid anyway.
|
||||
return tau
|
||||
|
||||
betterTau = 0.0
|
||||
if tau > 0 and tau < yinBufferSize-1:
|
||||
s0 = yinBuffer[tau-1]
|
||||
s1 = yinBuffer[tau]
|
||||
s2 = yinBuffer[tau+1]
|
||||
|
||||
adjustment = (s2 - s0) / (2 * (2 * s1 - s2 - s0))
|
||||
|
||||
if np.fabs(adjustment)>1: adjustment = 0
|
||||
|
||||
betterTau = tau + adjustment
|
||||
else:
|
||||
#print "WARNING: can't do interpolation at the edge (tau = " + str(tau) + "), will return un-interpolated value.\n"
|
||||
betterTau = tau
|
||||
|
||||
return betterTau
|
||||
|
||||
def sumSquare(input, start, end):
|
||||
out = 0.0
|
||||
for i in range(start,end):
|
||||
out += input[i] * input[i]
|
||||
|
||||
return out
|
||||
|
||||
def RMS(inputBuffers, blockSize):
|
||||
|
||||
rms = 0.0
|
||||
for i in range(blockSize):
|
||||
rms += inputBuffers[i] * inputBuffers[i]
|
||||
rms /= blockSize
|
||||
rms = sqrt(rms)
|
||||
|
||||
return rms
|
||||
@@ -0,0 +1,103 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
'''
|
||||
* Copyright (C) 2015 Music Technology Group - Universitat Pompeu Fabra
|
||||
*
|
||||
* This file is part of pypYIN
|
||||
*
|
||||
* pypYIN is free software: you can redistribute it and/or modify it under
|
||||
* the terms of the GNU Affero General Public License as published by the Free
|
||||
* Software Foundation (FSF), either version 3 of the License, or (at your
|
||||
* option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful, but WITHOUT
|
||||
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
|
||||
* FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
|
||||
* details.
|
||||
*
|
||||
* You should have received a copy of the Affero GNU General Public License
|
||||
* version 3 along with this program. If not, see http://www.gnu.org/licenses/
|
||||
*
|
||||
* If you have any problem about this python version code, please contact: Rong Gong
|
||||
* rong.gong@upf.edu
|
||||
*
|
||||
* If you have any problem about this algorithm, I suggest you to contact: Matthias Mauch
|
||||
* m.mauch@qmul.ac.uk who is the original C++ version author of this algorithm
|
||||
*
|
||||
* If you want to refer this code, please consider this article:
|
||||
*
|
||||
* M. Mauch and S. Dixon,
|
||||
* “pYIN: A Fundamental Frequency Estimator Using Probabilistic Threshold Distributions”,
|
||||
* in Proceedings of the IEEE International Conference on Acoustics,
|
||||
* Speech, and Signal Processing (ICASSP 2014), 2014.
|
||||
*
|
||||
* M. Mauch, C. Cannam, R. Bittner, G. Fazekas, J. Salamon, J. Dai, J. Bello and S. Dixon,
|
||||
* “Computer-aided Melody Note Transcription Using the Tony Software: Accuracy and Efficiency”,
|
||||
* in Proceedings of the First International Conference on Technologies for
|
||||
* Music Notation and Representation, 2015.
|
||||
'''
|
||||
|
||||
import os, sys
|
||||
import pYINmain
|
||||
import essentia.standard as ess
|
||||
import numpy as np
|
||||
from YinUtil import RMS
|
||||
|
||||
def pYINPtNote(filename1,fs=44100,frameSize=2048,hopSize=256):
|
||||
|
||||
'''
|
||||
Given filename, return pitchtrack and note transcription track
|
||||
:param filename1:
|
||||
:param fs:
|
||||
:param frameSize:
|
||||
:param hopSize:
|
||||
:return:
|
||||
'''
|
||||
# initialise
|
||||
pYinInst = pYINmain.PyinMain()
|
||||
pYinInst.initialise(channels = 1, inputSampleRate = fs, stepSize = hopSize, blockSize = frameSize,
|
||||
lowAmp = 0.25, onsetSensitivity = 0.7, pruneThresh = 0.1)
|
||||
|
||||
# frame-wise calculation
|
||||
audio = ess.MonoLoader(filename = filename1, sampleRate = fs)()
|
||||
|
||||
# rms mean
|
||||
# rms = []
|
||||
# for frame in ess.FrameGenerator(audio, frameSize=frameSize, hopSize=hopSize):
|
||||
# rms.append(RMS(frame, frameSize))
|
||||
# rmsMean = np.mean(rms)
|
||||
# print 'rmsMean', rmsMean
|
||||
|
||||
for frame in ess.FrameGenerator(audio, frameSize=frameSize, hopSize=hopSize):
|
||||
fs = pYinInst.process(frame)
|
||||
|
||||
# calculate smoothed pitch and mono note
|
||||
monoPitch = pYinInst.getSmoothedPitchTrack()
|
||||
|
||||
# output smoothed pitch track
|
||||
print 'pitch track'
|
||||
for ii in fs.m_oSmoothedPitchTrack:
|
||||
print ii.values
|
||||
print '\n'
|
||||
|
||||
fs = pYinInst.getRemainingFeatures(monoPitch)
|
||||
|
||||
# output of mono notes,
|
||||
# column 0: frame number,
|
||||
# column 1: pitch in midi numuber, this is the decoded pitch
|
||||
# column 2: attack 1, stable 2, silence 3
|
||||
print 'mono note decoded pitch'
|
||||
for ii in fs.m_oMonoNoteOut:
|
||||
print ii.frameNumber, ii.pitch, ii.noteState
|
||||
print '\n'
|
||||
|
||||
print 'note pitch tracks'
|
||||
for ii in fs.m_oNotePitchTracks:
|
||||
print ii
|
||||
print '\n'
|
||||
|
||||
# median pitch in Hz of the notes
|
||||
print 'median note pitch'
|
||||
for ii in fs.m_oNotes:
|
||||
print ii.values
|
||||
print '\n'
|
||||
@@ -0,0 +1,278 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
'''
|
||||
* Copyright (C) 2015 Music Technology Group - Universitat Pompeu Fabra
|
||||
*
|
||||
* This file is part of pypYIN
|
||||
*
|
||||
* pypYIN is free software: you can redistribute it and/or modify it under
|
||||
* the terms of the GNU Affero General Public License as published by the Free
|
||||
* Software Foundation (FSF), either version 3 of the License, or (at your
|
||||
* option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful, but WITHOUT
|
||||
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
|
||||
* FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
|
||||
* details.
|
||||
*
|
||||
* You should have received a copy of the Affero GNU General Public License
|
||||
* version 3 along with this program. If not, see http://www.gnu.org/licenses/
|
||||
*
|
||||
* If you have any problem about this python version code, please contact: Rong Gong
|
||||
* rong.gong@upf.edu
|
||||
*
|
||||
* If you have any problem about this algorithm, I suggest you to contact: Matthias Mauch
|
||||
* m.mauch@qmul.ac.uk who is the original C++ version author of this algorithm
|
||||
*
|
||||
* If you want to refer this code, please consider this article:
|
||||
*
|
||||
* M. Mauch and S. Dixon,
|
||||
* “pYIN: A Fundamental Frequency Estimator Using Probabilistic Threshold Distributions”,
|
||||
* in Proceedings of the IEEE International Conference on Acoustics,
|
||||
* Speech, and Signal Processing (ICASSP 2014), 2014.
|
||||
*
|
||||
* M. Mauch, C. Cannam, R. Bittner, G. Fazekas, J. Salamon, J. Dai, J. Bello and S. Dixon,
|
||||
* “Computer-aided Melody Note Transcription Using the Tony Software: Accuracy and Efficiency”,
|
||||
* in Proceedings of the First International Conference on Technologies for
|
||||
* Music Notation and Representation, 2015.
|
||||
'''
|
||||
|
||||
import numpy as np
|
||||
import copy
|
||||
from math import *
|
||||
from Yin import *
|
||||
from YinUtil import RMS
|
||||
from MonoPitch import MonoPitch
|
||||
from MonoNote import MonoNote
|
||||
|
||||
class Feature(object):
|
||||
def __init__(self):
|
||||
self.values = np.array([], dtype=np.float64)
|
||||
|
||||
def resetValues(self):
|
||||
self.values = np.array([], dtype=np.float64)
|
||||
|
||||
class FeatureSet(object):
|
||||
def __init__(self):
|
||||
self.m_oF0Candidates = []
|
||||
self.m_oF0Probs = []
|
||||
self.m_oVoicedProb = []
|
||||
self.m_oCandidateSalience = []
|
||||
self.m_oSmoothedPitchTrack = []
|
||||
self.m_oMonoNoteOut = []
|
||||
self.m_oNotes = []
|
||||
self.m_oNotePitchTracks = []
|
||||
|
||||
class PyinMain(object):
|
||||
|
||||
def __init__(self):
|
||||
self.m_channels = 0
|
||||
self.m_stepSize = 256
|
||||
self.m_blockSize = 2048
|
||||
self.m_inputSampleRate = 44100
|
||||
self.m_fmin = 40
|
||||
self.m_fmax = 1600
|
||||
|
||||
self.m_yin = Yin()
|
||||
|
||||
self.m_threshDistr = 2.0
|
||||
self.m_outputUnvoiced = 2
|
||||
self.m_preciseTime = 0.0
|
||||
self.m_lowAmp = 0.1
|
||||
self.m_onsetSensitivity = 0.7
|
||||
self.m_pruneThresh = 0.1
|
||||
|
||||
self.m_pitchProb = []
|
||||
self.m_level = np.array([], dtype=np.float32)
|
||||
|
||||
self.fs = FeatureSet()
|
||||
|
||||
def initialise(self, channels = 1, inputSampleRate = 44100, stepSize = 256, blockSize = 2048,
|
||||
lowAmp = 0.1, onsetSensitivity = 0.7, pruneThresh = 0.1 ):
|
||||
|
||||
if channels != 1:
|
||||
return False
|
||||
|
||||
self.m_channels = channels
|
||||
self.m_inputSampleRate = inputSampleRate
|
||||
self.m_stepSize = stepSize
|
||||
self.m_blockSize = blockSize
|
||||
|
||||
self.m_lowAmp = lowAmp
|
||||
self.m_onsetSensitivity = onsetSensitivity
|
||||
self.m_pruneThresh = pruneThresh
|
||||
|
||||
self.reset()
|
||||
|
||||
return True
|
||||
|
||||
def reset(self):
|
||||
|
||||
self.m_yin.setThresholdDistr(self.m_threshDistr)
|
||||
self.m_yin.setFrameSize(self.m_blockSize)
|
||||
self.m_yin.setFast(not self.m_preciseTime)
|
||||
|
||||
self.m_pitchProb = np.array([], dtype=np.float64)
|
||||
self.m_level = np.array([], dtype=np.float32)
|
||||
|
||||
def process(self, inputBuffers):
|
||||
|
||||
dInputBuffers = np.zeros((self.m_blockSize,), dtype=np.float64)
|
||||
for i in range(self.m_blockSize):
|
||||
dInputBuffers[i] = inputBuffers[i]
|
||||
|
||||
rms = RMS(inputBuffers, self.m_blockSize)
|
||||
|
||||
isLowAmplitude = rms < self.m_lowAmp
|
||||
|
||||
yo = self.m_yin.processProbabilisticYin(dInputBuffers)
|
||||
|
||||
self.m_level = np.append(self.m_level, yo.rms)
|
||||
|
||||
'''
|
||||
First, get the things out of the way that we don't want to output
|
||||
immediately, but instead save for later
|
||||
'''
|
||||
tempPitchProb = np.array([], dtype=np.float32)
|
||||
firstStack = False
|
||||
for iCandidate in range(yo.freqProb.shape[0]):
|
||||
tempPitch = 12.0 * log(yo.freqProb[iCandidate][0]/440.0)/log(2.0) + 69.0
|
||||
if not isLowAmplitude:
|
||||
if firstStack == False:
|
||||
tempPitchProb = np.array([np.array([tempPitch, yo.freqProb[iCandidate][1]], dtype=np.float64),])
|
||||
firstStack = True
|
||||
else:
|
||||
tempPitchProb = np.vstack((tempPitchProb, np.array([tempPitch, yo.freqProb[iCandidate][1]], dtype=np.float64)))
|
||||
else:
|
||||
factor = ((rms+0.01*self.m_lowAmp)/(1.01*self.m_lowAmp))
|
||||
if firstStack == False:
|
||||
tempPitchProb = np.array([np.array([tempPitch, yo.freqProb[iCandidate][1]*factor], dtype=np.float64),])
|
||||
firstStack = True
|
||||
else:
|
||||
tempPitchProb = np.vstack((tempPitchProb, np.array([tempPitch, yo.freqProb[iCandidate][1]*factor], dtype=np.float64)))
|
||||
if len(self.m_pitchProb) < 1 and len(tempPitchProb) > 0:
|
||||
self.m_pitchProb = [tempPitchProb,]
|
||||
elif len(self.m_pitchProb) >= 1:
|
||||
self.m_pitchProb = self.m_pitchProb + [tempPitchProb]
|
||||
|
||||
# f0 CANDIDATES
|
||||
f = Feature()
|
||||
for i in range(yo.freqProb.shape[0]):
|
||||
f.values = np.append(f.values, yo.freqProb[i][0])
|
||||
self.fs.m_oF0Candidates.append(copy.copy(f))
|
||||
|
||||
f.resetValues()
|
||||
voicedProb = 0.0
|
||||
for i in range(yo.freqProb.shape[0]):
|
||||
f.values = np.append(f.values, yo.freqProb[i][1])
|
||||
voicedProb += yo.freqProb[i][1]
|
||||
self.fs.m_oF0Probs.append(copy.copy(f))
|
||||
|
||||
f.values = np.append(f.values, voicedProb)
|
||||
self.fs.m_oVoicedProb.append(copy.copy(f))
|
||||
|
||||
# SALIENCE -- maybe this should eventually disappear
|
||||
f.resetValues()
|
||||
salienceSum = 0.0
|
||||
for iBin in range(yo.salience.shape[0]):
|
||||
f.values = np.append(f.values, yo.salience[iBin])
|
||||
salienceSum += yo.salience[iBin]
|
||||
self.fs.m_oCandidateSalience.append(copy.copy(f))
|
||||
|
||||
return self.fs
|
||||
|
||||
def getSmoothedPitchTrack(self):
|
||||
f = Feature()
|
||||
|
||||
if len(self.m_pitchProb) == 0:
|
||||
return self.fs
|
||||
|
||||
# MONO-PITCH STUFF
|
||||
mp = MonoPitch()
|
||||
mpOut = mp.process(self.m_pitchProb)
|
||||
for iFrame in range(len(mpOut)):
|
||||
if mpOut[iFrame] < 0 and self.m_outputUnvoiced == 0:
|
||||
continue
|
||||
f.resetValues()
|
||||
if self.m_outputUnvoiced == 1:
|
||||
f.values = np.append(f.values, np.fabs(mpOut[iFrame]))
|
||||
else:
|
||||
f.values = np.append(f.values, mpOut[iFrame])
|
||||
|
||||
self.fs.m_oSmoothedPitchTrack.append(copy.copy(f))
|
||||
|
||||
return mpOut
|
||||
|
||||
def getRemainingFeatures(self,mpOut):
|
||||
f = Feature()
|
||||
|
||||
if len(mpOut) == 0:
|
||||
return self.fs
|
||||
|
||||
# if len(self.m_pitchProb) == 0:
|
||||
# return self.fs
|
||||
#
|
||||
# # MONO-PITCH STUFF
|
||||
# mp = MonoPitch()
|
||||
# mpOut = mp.process(self.m_pitchProb)
|
||||
# for iFrame in range(len(mpOut)):
|
||||
# if mpOut[iFrame] < 0 and self.m_outputUnvoiced == 0:
|
||||
# continue
|
||||
# f.resetValues()
|
||||
# if self.m_outputUnvoiced == 1:
|
||||
# f.values = np.append(f.values, np.fabs(mpOut[iFrame]))
|
||||
# else:
|
||||
# f.values = np.append(f.values, mpOut[iFrame])
|
||||
#
|
||||
# self.fs.m_oSmoothedPitchTrack.append(copy.copy(f))
|
||||
|
||||
# MONO-NOTE STUFF
|
||||
mn = MonoNote()
|
||||
smoothedPitch = []
|
||||
for iFrame in range(len(mpOut)):
|
||||
temp = []
|
||||
if mpOut[iFrame] > 0: # negative value: silence
|
||||
tempPitch = 12 * log(mpOut[iFrame]/440.0)/log(2.0) + 69
|
||||
temp += [[tempPitch, 0.9]]
|
||||
smoothedPitch += [temp]
|
||||
|
||||
mnOut = mn.process(smoothedPitch)
|
||||
|
||||
self.fs.m_oMonoNoteOut = mnOut
|
||||
|
||||
# turning feature into a note feature
|
||||
|
||||
f.resetValues()
|
||||
|
||||
onsetFrame = 0
|
||||
isVoiced = 0
|
||||
oldIsVoiced = 0
|
||||
nFrame = len(self.m_pitchProb)
|
||||
|
||||
minNoteFrames = (self.m_inputSampleRate*self.m_pruneThresh)/self.m_stepSize
|
||||
|
||||
notePitchTrack = np.array([], dtype=np.float32) # collects pitches for one note at a time
|
||||
for iFrame in range(nFrame):
|
||||
isVoiced = mnOut[iFrame].noteState < 3 \
|
||||
and len(smoothedPitch[iFrame]) > 0 \
|
||||
and (iFrame >= nFrame-2 or (self.m_level[iFrame]/self.m_level[iFrame+2]>self.m_onsetSensitivity))
|
||||
|
||||
if isVoiced and iFrame != nFrame-1:
|
||||
if oldIsVoiced == 0: # beginning of the note
|
||||
onsetFrame = iFrame
|
||||
pitch = smoothedPitch[iFrame][0][0]
|
||||
notePitchTrack = np.append(notePitchTrack, pitch) # add to the note's pitch
|
||||
else: # not currently voiced
|
||||
if oldIsVoiced == 1: # end of the note
|
||||
if len(notePitchTrack) >= minNoteFrames:
|
||||
notePitchTrack = np.sort(notePitchTrack)
|
||||
medianPitch = notePitchTrack[int(len(notePitchTrack)/2)]
|
||||
medianFreq = pow(2, (medianPitch-69)/12)*440
|
||||
f.resetValues()
|
||||
f.values = np.append(f.values, np.double(medianFreq))
|
||||
self.fs.m_oNotes.append(copy.copy(f))
|
||||
self.fs.m_oNotePitchTracks.append(copy.copy(notePitchTrack))
|
||||
notePitchTrack = np.array([], dtype=np.float32)
|
||||
oldIsVoiced = isVoiced
|
||||
|
||||
return self.fs
|
||||
@@ -396,7 +396,10 @@ class AudioFile(object):
|
||||
grain_size = int(grain_size)
|
||||
position = self.get_seek_position()
|
||||
# Read grain
|
||||
index = self.pysndfile_object.seek(start_index, 0)
|
||||
try:
|
||||
index = self.pysndfile_object.seek(start_index, 0)
|
||||
except:
|
||||
pdb.set_trace()
|
||||
if index + grain_size > self.get_frames():
|
||||
grain = self.read_frames(self.get_frames() - index)
|
||||
if padding:
|
||||
@@ -693,8 +696,8 @@ class AudioFile(object):
|
||||
|
||||
- overlap: the factor by which grains overlap (integer)
|
||||
"""
|
||||
length = self.samps_to_ms(self.frames)
|
||||
hop_size = grain_length / overlap
|
||||
length = self.frames
|
||||
hop_size = int(np.floor(grain_length / overlap))
|
||||
grain_count = int(length / hop_size) - 1
|
||||
times = np.arange(grain_count).reshape(-1, 1)
|
||||
times = np.hstack((times, times)).astype(np.dtype('float64'))
|
||||
@@ -715,9 +718,33 @@ class AudioFile(object):
|
||||
"AnalysedAudioFile.generate_grain_times(grain_size, "
|
||||
"overlap, save_times=True)")
|
||||
grain_times = self.times[key].copy()
|
||||
grain_times *= (self.samplerate / 1000)
|
||||
return self.read_grain(start_index=grain_times[0], grain_size=grain_times[1]-grain_times[0])
|
||||
|
||||
def read_extended_grain(self, key, extra_length=0):
|
||||
"""
|
||||
Allow for grains to be retreived by indexing with zero padding after grain times have been generated.
|
||||
"""
|
||||
if self.times == None:
|
||||
raise IndexError("AudioFile object grain times must be generated "
|
||||
"before grains can be accesed by index. Try running "
|
||||
"AnalysedAudioFile.generate_grain_times(grain_size, "
|
||||
"overlap, save_times=True)")
|
||||
grain_times = self.times[key].copy()
|
||||
start = int(grain_times[0]-extra_length)
|
||||
zpad = 0
|
||||
if start < 0:
|
||||
zpad = abs(start)
|
||||
start = 0
|
||||
grain_size = (grain_times[1]-start)+extra_length
|
||||
grain = self.read_grain(start_index=start, grain_size=grain_size, padding=True)
|
||||
grain = np.pad(
|
||||
grain,
|
||||
(zpad, 0),
|
||||
'constant',
|
||||
constant_values=(0, 0)
|
||||
)
|
||||
return grain
|
||||
|
||||
|
||||
@staticmethod
|
||||
def gen_window(window_type, window_size, sym=True):
|
||||
|
||||
@@ -127,7 +127,6 @@ def parse_arguments():
|
||||
"variance",
|
||||
"kurtosis",
|
||||
"skewness",
|
||||
"harm_ratio"
|
||||
]
|
||||
|
||||
parser.add_argument(
|
||||
|
||||
+25
-23
@@ -2,13 +2,13 @@
|
||||
rms = {
|
||||
# Analysis window sizes can be changed for each analysis individually.
|
||||
# These do not need to match the grain size of the matcher or synthesis.
|
||||
"window_size": 100,
|
||||
"window_size": 1024,
|
||||
"overlap": 4,
|
||||
}
|
||||
|
||||
f0 = {
|
||||
"window_size": 4096,
|
||||
"overlap": 4,
|
||||
"window_size": 2048,
|
||||
"overlap": 2,
|
||||
# Currently all frames below this ratio are digaurded and left as silence.
|
||||
# Different databases will require different values for the best results.
|
||||
# Noisier databases will need lower values than more tonal databases.
|
||||
@@ -17,26 +17,26 @@ f0 = {
|
||||
|
||||
# Specify analysis parameters for variance analysis.
|
||||
variance = {
|
||||
"window_size": 100,
|
||||
"window_size": 1024,
|
||||
"overlap": 4
|
||||
}
|
||||
|
||||
# Specify analysis parameters for temporal kurtosis analysis.
|
||||
kurtosis = {
|
||||
"window_size": 100,
|
||||
"window_size": 1024,
|
||||
"overlap": 4
|
||||
}
|
||||
|
||||
# Specify analysis parameters for temporal skewness analysis.
|
||||
skewness = {
|
||||
"window_size": 100,
|
||||
"window_size": 1024,
|
||||
"overlap": 4
|
||||
}
|
||||
|
||||
# Specify analysis parameters for FFT analysis.
|
||||
fft = {
|
||||
# The FFT window size determines the window size for all spectral analyses.
|
||||
"window_size": 4096
|
||||
"window_size": 2048
|
||||
}
|
||||
|
||||
database = {
|
||||
@@ -48,20 +48,20 @@ database = {
|
||||
# Sets the weighting for each analysis. a higher weighting gives an analysis
|
||||
# higher presendence when finding the best matches.
|
||||
matcher_weightings = {
|
||||
"f0" : 8,
|
||||
"f0" : 1,
|
||||
"spccntr" : 1.,
|
||||
"spcsprd" : 1.,
|
||||
"spcflux" : 3.,
|
||||
"spccf" : 3.,
|
||||
"spcflatness": 3.,
|
||||
"spcflatness": 1.,
|
||||
"zerox" : 1.,
|
||||
"rms" : 0.1,
|
||||
"peak": 0.1,
|
||||
"centroid": 0.5,
|
||||
"kurtosis": 2.,
|
||||
"skewness": 2.,
|
||||
"rms" : 1,
|
||||
"peak": 1,
|
||||
"centroid": 0.0,
|
||||
"kurtosis": 0.,
|
||||
"skewness": 0.,
|
||||
"variance": 0.,
|
||||
"harm_ratio": 2
|
||||
"harm_ratio": 0
|
||||
}
|
||||
|
||||
# Specifies the method for averaging analysis frames to create a single value
|
||||
@@ -99,11 +99,11 @@ matcher = {
|
||||
# This value must be the same as the synthesis grain size to avoid the
|
||||
# speeding up or slowing down of the resulting file in relation to the
|
||||
# original.
|
||||
"grain_size": 100,
|
||||
"overlap": 4,
|
||||
"grain_size": 2048,
|
||||
"overlap": 2,
|
||||
# Defines the number of matches to keep for synthesis. Note that this must
|
||||
# also be specified in the synthesis config
|
||||
"match_quantity": 2,
|
||||
"match_quantity": 10,
|
||||
# Choose the algorithm used to perform matching. kdtree is recommended for
|
||||
# larger datasets.
|
||||
"method": 'kdtree'
|
||||
@@ -114,20 +114,22 @@ synthesizer = {
|
||||
# between source and target.
|
||||
"enforce_intensity": True,
|
||||
# Specify the ratio limit that is the grain can be scaled by.
|
||||
"enf_intensity_ratio_limit": 1000.,
|
||||
"enf_intensity_ratio_limit": 20.,
|
||||
# Artificially modify the pitch by the difference in f0 values between
|
||||
# source and target.
|
||||
"enforce_f0": True,
|
||||
# Specify the ratio limit that is the grain can be modified by.
|
||||
"enf_f0_ratio_limit": 1.,
|
||||
"grain_size": 100,
|
||||
"overlap": 4,
|
||||
"enf_f0_ratio_limit": 10.,
|
||||
"grain_size": 2048,
|
||||
"overlap": 2,
|
||||
# Normalize output, avoid clipping of final output by scaling the final
|
||||
# frames.
|
||||
"normalize" : False,
|
||||
# Defines the number of potential grains to choose from matches when
|
||||
# synthesizing output.
|
||||
"match_quantity": 2
|
||||
"match_quantity": 1,
|
||||
|
||||
"silence_inharmonics": True
|
||||
}
|
||||
|
||||
# Specifies the format for the output file. Changing this has not been tested
|
||||
|
||||
Executable → Regular
+154
-63
@@ -82,7 +82,6 @@ class AudioDatabase:
|
||||
'variance',
|
||||
'kurtosis',
|
||||
'skewness',
|
||||
'harm_ratio'
|
||||
}
|
||||
for analysis in analysis_list:
|
||||
if analysis not in valid_analyses:
|
||||
@@ -362,7 +361,7 @@ class Matcher:
|
||||
grain_indexes = np.empty((entry_count, 2))
|
||||
|
||||
for ind, entry in enumerate(database.analysed_audio):
|
||||
length = entry.samps_to_ms(entry.frames)
|
||||
length = entry.frames
|
||||
hop_size = grain_length / overlap
|
||||
grain_indexes[ind][0] = int(length / hop_size) - 1
|
||||
grain_indexes[:, 1] = np.cumsum(grain_indexes[:, 0]).astype(int)
|
||||
@@ -415,7 +414,7 @@ class Matcher:
|
||||
# Create an array of grain times for target sample
|
||||
target_times = target_entry.times
|
||||
x_size = target_times.shape[0]
|
||||
match_indexes = np.empty((x_size, self.match_quantity))
|
||||
match_indexes = np.empty((x_size, self.match_quantity), dtype=int)
|
||||
match_vals = np.empty((x_size, self.match_quantity))
|
||||
match_vals.fill(np.inf)
|
||||
|
||||
@@ -453,7 +452,6 @@ class Matcher:
|
||||
source_data *= weightings[analysis]
|
||||
all_source_analyses[i] = source_data
|
||||
|
||||
|
||||
# Impute values for Nans
|
||||
nan_columns = np.all(np.isnan(all_source_analyses), axis=0)
|
||||
all_source_analyses[:, nan_columns] = 0.
|
||||
@@ -470,7 +468,8 @@ class Matcher:
|
||||
|
||||
vals_append = np.append(match_vals, results_vals, axis=1)
|
||||
vals_sort = np.argsort(vals_append)
|
||||
inds_append = np.append(match_indexes, results_inds+source_sample_indexes[sind][0], axis=1)
|
||||
#TODO: Check that this minus 1 should be there...
|
||||
inds_append = np.append(match_indexes, results_inds+int(source_sample_indexes[sind][0]), axis=1)
|
||||
|
||||
m = np.arange(len(vals_append))[:, np.newaxis]
|
||||
best_match_inds = inds_append[m, vals_sort]
|
||||
@@ -480,6 +479,90 @@ class Matcher:
|
||||
|
||||
match_grain_inds = self.calculate_db_inds(match_indexes, source_sample_indexes)
|
||||
|
||||
###################################################################
|
||||
# Find optimum continuity between selection of best matches per
|
||||
# grain
|
||||
###################################################################
|
||||
|
||||
# Initialize empty array to store final match indexes
|
||||
final_indexes = np.array([])
|
||||
# For every match found
|
||||
for h, match_indexes in enumerate(match_grain_inds[:-1]):
|
||||
self.logger.info("Calculating grain distances for grain: {0} of {1}".format(h, match_grain_inds.shape[0]))
|
||||
# Initialise empty array to store analysis values for all of
|
||||
# the current grain's matches
|
||||
all_source_analyses = np.zeros((len(self.matcher_analyses), self.match_quantity))
|
||||
# For every analysis generated...
|
||||
for i, analysis in enumerate(self.matcher_analyses):
|
||||
# Find the style of formatting used for reducing analysis
|
||||
# frames to a single normalized value
|
||||
analysis_formatting = self.analysis_dict[analysis]
|
||||
|
||||
# For each of the current grain's matches...
|
||||
for j, (db_ind, grain_ind) in enumerate(match_indexes):
|
||||
# Get the start and end time (in samples) for the
|
||||
# current grain
|
||||
grain_time = self.source_db.analysed_audio[db_ind].times[grain_ind-1]
|
||||
# Get the normalised analysis value for the current
|
||||
# grain
|
||||
analysis_val, s = self.source_db.analysed_audio[db_ind].analysis_data_grains(grain_time, analysis, format=analysis_formatting)
|
||||
# Weight the analysis by weighting set by user
|
||||
analysis_val *= weightings[analysis]
|
||||
# Add grain's analysis to
|
||||
all_source_analyses[i][j] = analysis_val
|
||||
|
||||
# get the next grain's match indexes...
|
||||
next_grain_indexes = match_grain_inds[h+1]
|
||||
# Initialise empty array to store analysis values for all of
|
||||
# the next grain's matches
|
||||
next_grain_analyses = np.zeros((len(self.matcher_analyses), self.match_quantity))
|
||||
# For every analysis generated...
|
||||
for i, analysis in enumerate(self.matcher_analyses):
|
||||
# Find the style of formatting used for reducing analysis
|
||||
# frames to a single normalized value
|
||||
analysis_formatting = self.analysis_dict[analysis]
|
||||
|
||||
# For each of the next grain's matches...
|
||||
for j, (db_ind, grain_ind) in enumerate(next_grain_indexes):
|
||||
# Get the start and end time (in samples) for the
|
||||
# next grain
|
||||
grain_time = self.source_db.analysed_audio[db_ind].times[grain_ind-1]
|
||||
analysis_val, s = self.source_db.analysed_audio[db_ind].analysis_data_grains(grain_time, analysis, format=analysis_formatting)
|
||||
analysis_val *= weightings[analysis]
|
||||
next_grain_analyses[i][j] = analysis_val
|
||||
|
||||
# Impute values for Nans
|
||||
nan_columns = np.all(np.isnan(all_source_analyses), axis=0)
|
||||
all_source_analyses[:, nan_columns] = 0.
|
||||
all_source_analyses = imp.fit_transform(all_source_analyses)
|
||||
# Impute values for Nans
|
||||
nan_columns = np.all(np.isnan(next_grain_analyses), axis=0)
|
||||
next_grain_analyses[:, nan_columns] = 0.
|
||||
next_grain_analyses = imp.fit_transform(next_grain_analyses)
|
||||
|
||||
source_tree = spatial.cKDTree(all_source_analyses.T, leafsize=100)
|
||||
# Return array of distances and indexes for matches in the next
|
||||
# grain that are closest to matches in the current grains.
|
||||
results_vals, results_inds = source_tree.query(next_grain_analyses.T, k=1, p=2)
|
||||
|
||||
# Make sure arrays are in the correct orientation
|
||||
if len(results_vals.shape) < 2:
|
||||
results_vals = np.array([results_vals]).T
|
||||
results_inds = np.array([results_inds]).T
|
||||
|
||||
# If the distance was calculated between the first and second
|
||||
# grains, add the first index as well as the second
|
||||
if not final_indexes.size:
|
||||
a = np.argmax(results_vals)
|
||||
final_indexes = np.vstack((match_indexes[a], match_indexes[results_inds[a]]))
|
||||
else:
|
||||
a = np.argmax(results_vals)
|
||||
final_indexes = np.append(final_indexes, match_indexes[results_inds[a]], axis=0)
|
||||
|
||||
match_grain_inds = final_indexes
|
||||
|
||||
###################################################################
|
||||
|
||||
datafile_path = ''.join(("match/", target_entry.name))
|
||||
try:
|
||||
self.output_db.data[datafile_path] = match_grain_inds
|
||||
@@ -738,7 +821,7 @@ class Matcher:
|
||||
if not np.all(np.any(x, axis=1)):
|
||||
raise ValueError("Not all match indexes have a corresponding sample index. This shouldn't happen...\n"
|
||||
"Check that all database path arguments are correct then try re-running with the --rematch and --reanalyse flags.\n"
|
||||
"If this does'nt work, delete the audio and data directories in all databases and try again...")
|
||||
"If this doesn't work, delete the audio and data directories in all databases and try again...")
|
||||
|
||||
x = x.reshape(mi_shape[0], mi_shape[1], x.shape[1])
|
||||
x = np.argmax(x, axis=2)
|
||||
@@ -819,63 +902,70 @@ class Synthesizer:
|
||||
format=output_config["format"],
|
||||
channels=output_config["channels"]
|
||||
) as output:
|
||||
hop_size = (grain_size / overlap) * output.samplerate/1000
|
||||
_grain_size *= int(output.samplerate / 1000)
|
||||
hop_size = int(np.floor(grain_size / overlap))
|
||||
output_frames = np.zeros(_grain_size*2 + (int(hop_size*len(grain_matches))))
|
||||
offset = 0
|
||||
for target_grain_ind, matches in enumerate(grain_matches):
|
||||
# If there are multiple matches, choose a match at random
|
||||
# from available matches.
|
||||
match_index = np.random.randint(matches.shape[0])
|
||||
match_db_ind, match_grain_ind = matches[match_index]
|
||||
with self.match_db.analysed_audio[match_db_ind] as match_sample:
|
||||
self.logger.info("Synthesizing grain:\n"
|
||||
"Source sample: {0}\n"
|
||||
"Source grain index: {1}\n"
|
||||
"Target output: {2}\n"
|
||||
"Target grain index: {3} out of {4}".format(
|
||||
match_sample,
|
||||
match_grain_ind,
|
||||
output_name,
|
||||
target_grain_ind,
|
||||
len(grain_matches)
|
||||
))
|
||||
match_sample.generate_grain_times(match_grain_size, match_overlap, save_times=True)
|
||||
#match_index = np.random.randint(matches.shape[0])
|
||||
match_db_ind, match_grain_ind = matches
|
||||
try:
|
||||
with self.match_db.analysed_audio[match_db_ind] as match_sample:
|
||||
self.logger.info("Synthesizing grain:\n"
|
||||
"Source sample: {0}\n"
|
||||
"Source grain index: {1}\n"
|
||||
"Target output: {2}\n"
|
||||
"Target grain index: {3} out of {4}".format(
|
||||
match_sample,
|
||||
match_grain_ind,
|
||||
output_name,
|
||||
target_grain_ind,
|
||||
len(grain_matches)
|
||||
))
|
||||
match_sample.generate_grain_times(match_grain_size, match_overlap, save_times=True)
|
||||
|
||||
# TODO: Make proper fix for grain index offset of 1
|
||||
try:
|
||||
match_grain = match_sample[match_grain_ind-1]
|
||||
except:
|
||||
pdb.set_trace()
|
||||
# TODO: Make proper fix for grain index offset of 1
|
||||
# match_grain = match_sample[match_grain_ind-1]
|
||||
extra_length = match_sample.get_samplerate()
|
||||
match_grain = match_sample.read_extended_grain(match_grain_ind-1, extra_length=extra_length)
|
||||
|
||||
###################################################################
|
||||
# Adjust current grain overlap to correlate with previous grain
|
||||
###################################################################
|
||||
|
||||
if self.enforce_intensity_bool:
|
||||
# Get the target sample from the database
|
||||
target_sample = self.target_db[job_ind]
|
||||
if self.enforce_intensity_bool:
|
||||
# Get the target sample from the database
|
||||
target_sample = self.target_db[job_ind]
|
||||
|
||||
# Calculate garin times for sample to allow for
|
||||
# indexing.
|
||||
target_sample.generate_grain_times(match_grain_size, match_overlap, save_times=True)
|
||||
# Calculate garin times for sample to allow for
|
||||
# indexing.
|
||||
target_sample.generate_grain_times(match_grain_size, match_overlap, save_times=True)
|
||||
|
||||
match_grain = self.enforce_intensity(match_grain, match_sample, match_grain_ind, target_sample, target_grain_ind)
|
||||
match_grain = self.enforce_intensity(match_grain, match_sample, match_grain_ind, target_sample, target_grain_ind)
|
||||
|
||||
if self.enforce_f0_bool:
|
||||
# Get the target sample from the database
|
||||
target_sample = self.target_db[job_ind]
|
||||
if self.enforce_f0_bool:
|
||||
# Get the target sample from the database
|
||||
target_sample = self.target_db[job_ind]
|
||||
|
||||
# Calculate grain times for sample to allow for
|
||||
# indexing.
|
||||
target_sample.generate_grain_times(match_grain_size, match_overlap, save_times=True)
|
||||
# Calculate grain times for sample to allow for
|
||||
# indexing.
|
||||
target_sample.generate_grain_times(match_grain_size, match_overlap, save_times=True)
|
||||
|
||||
match_grain = self.enforce_pitch(match_grain, match_sample, match_grain_ind, target_sample, target_grain_ind)
|
||||
# TODO: Fix occasional output of grain size one
|
||||
# sample larger than it should be
|
||||
match_grain = self.enforce_pitch(match_grain, match_sample, match_grain_ind, target_sample, target_grain_ind)
|
||||
match_grain = match_grain[extra_length:-extra_length]
|
||||
|
||||
# Apply hanning window to grain
|
||||
match_grain *= np.hanning(match_grain.size)
|
||||
try:
|
||||
output_frames[offset:offset+match_grain.size] += match_grain
|
||||
except:
|
||||
pass
|
||||
offset += hop_size
|
||||
# Apply hanning window to grain
|
||||
match_grain *= np.hanning(match_grain.size)
|
||||
try:
|
||||
output_frames[offset:offset+match_grain.size] += match_grain
|
||||
except:
|
||||
pass
|
||||
offset += hop_size
|
||||
except:
|
||||
pdb.set_trace()
|
||||
# If output normalization is active, normalize output.
|
||||
if self.config.synthesizer["normalize"]:
|
||||
output_frames = (output_frames / np.max(np.abs(output_frames))) * 0.9
|
||||
@@ -892,10 +982,6 @@ class Synthesizer:
|
||||
# TODO: Make proper fix for grain index offset of 1
|
||||
target_times = target_sample.times[target_grain_ind-1]
|
||||
|
||||
# Get mean harmonic ratio of f0 frames in time range specified.
|
||||
target_harmonic_ratio = target_sample.analysis_data_grains(target_times, "harm_ratio", format="mean")[0][0]
|
||||
|
||||
|
||||
# Get mean of f0 frames in time range specified.
|
||||
target_f0 = target_sample.analysis_data_grains(target_times, "f0", format="median")[0][0]
|
||||
|
||||
@@ -903,20 +989,25 @@ class Synthesizer:
|
||||
# TODO: Make proper fix for grain index offset of 1
|
||||
source_times = source_sample.times[source_grain_ind-1]
|
||||
|
||||
# Get mean harmonic ratio of f0 frames in time range specified.
|
||||
source_harmonic_ratio = source_sample.analysis_data_grains(source_times, "harm_ratio", format="mean")[0][0]
|
||||
hr_array = np.array([source_harmonic_ratio, target_harmonic_ratio])
|
||||
|
||||
if np.any(np.isnan(hr_array)):
|
||||
return grain*0
|
||||
|
||||
# Get mean of f0 frames in time range specified.
|
||||
source_f0 = source_sample.analysis_data_grains(source_times, "f0", format="median")[0][0]
|
||||
|
||||
f0_array = np.array([source_f0, target_f0])
|
||||
if np.any(np.isnan(f0_array)):
|
||||
if not self.config.synthesizer["silence_inharmonics"]:
|
||||
return grain
|
||||
else:
|
||||
return grain*0
|
||||
|
||||
ratio_difference = target_f0 / source_f0
|
||||
|
||||
if not np.isfinite(ratio_difference):
|
||||
return grain*0
|
||||
if not self.config.synthesizer["silence_inharmonics"]:
|
||||
return grain
|
||||
else:
|
||||
return grain*0
|
||||
|
||||
# If the ratio difference is within the limits
|
||||
ratio_limit = self.config.synthesizer["enf_f0_ratio_limit"]
|
||||
@@ -935,8 +1026,10 @@ class Synthesizer:
|
||||
ratio_difference = 1./ratio_limit
|
||||
|
||||
grain = pitch_shift.shift(grain, ratio_difference)
|
||||
'''
|
||||
if ratio_difference > ratio_limit or ratio_difference < 1./ratio_limit:
|
||||
grain *= 0
|
||||
'''
|
||||
|
||||
return grain
|
||||
|
||||
@@ -953,9 +1046,8 @@ class Synthesizer:
|
||||
|
||||
# Get mean of RMS frames in time range specified.
|
||||
target_rms = target_sample.analysis_data_grains(target_times, "rms", format="mean")[0][0]
|
||||
target_peak = target_sample.analysis_data_grains(target_times, "peak", format="mean")[0][0]
|
||||
|
||||
target_intensity_value = np.mean([target_rms, target_peak])
|
||||
target_intensity_value = target_rms
|
||||
|
||||
# Get grain start and finish range to retreive analysis frames from.
|
||||
# TODO: Make proper fix for grain index offset of 1
|
||||
@@ -963,9 +1055,8 @@ class Synthesizer:
|
||||
|
||||
# Get mean of RMS frames in time range specified.
|
||||
source_rms = source_sample.analysis_data_grains(source_times, "rms", format="mean")[0][0]
|
||||
source_peak = source_sample.analysis_data_grains(source_times, "peak", format="mean")[0][0]
|
||||
|
||||
source_intensity_value = np.mean([source_rms, source_peak])
|
||||
source_intensity_value = np.mean(source_rms)
|
||||
|
||||
ratio_difference = target_intensity_value / source_intensity_value
|
||||
|
||||
|
||||
Executable → Regular
Executable → Regular
+38
-38
@@ -142,7 +142,7 @@ def main():
|
||||
example below attempts to mimic the examples provided by mathworks MATLAB
|
||||
documentation, http://www.mathworks.com/help/toolbox/signal/
|
||||
"""
|
||||
import pylab
|
||||
import matplotlib.pyplot as plt
|
||||
argv = sys.argv
|
||||
if len(argv) != 1:
|
||||
print >>sys.stderr, 'usage: python -m pim.sp.multirate'
|
||||
@@ -166,25 +166,25 @@ def main():
|
||||
t = numpy.arange(0, 1, 0.00025)
|
||||
x = numpy.sin(2*numpy.pi*30*t) + numpy.sin(2*numpy.pi*60*t)
|
||||
y = decimate(x,4)
|
||||
pylab.figure()
|
||||
pylab.subplot(2, 1, 1)
|
||||
pylab.title('Original Signal')
|
||||
pylab.stem(numpy.arange(len(x[0:120])), x[0:120])
|
||||
pylab.subplot(2, 1, 2)
|
||||
pylab.title('Decimated Signal')
|
||||
pylab.stem(numpy.arange(len(y[0:30])), y[0:30])
|
||||
plt.figure()
|
||||
plt.subplot(2, 1, 1)
|
||||
plt.title('Original Signal')
|
||||
plt.stem(numpy.arange(len(x[0:120])), x[0:120])
|
||||
plt.subplot(2, 1, 2)
|
||||
plt.title('Decimated Signal')
|
||||
plt.stem(numpy.arange(len(y[0:30])), y[0:30])
|
||||
|
||||
#Interp
|
||||
t = numpy.arange(0, 1, 0.001)
|
||||
x = numpy.sin(2*numpy.pi*30*t) + numpy.sin(2*numpy.pi*60*t)
|
||||
y = interp(x,4)
|
||||
pylab.figure()
|
||||
pylab.subplot(2, 1, 1)
|
||||
pylab.title('Original Signal')
|
||||
pylab.stem(numpy.arange(len(x[0:30])), x[0:30])
|
||||
pylab.subplot(2, 1, 2)
|
||||
pylab.title('Interpolated Signal')
|
||||
pylab.stem(numpy.arange(len(y[0:120])), y[0:120])
|
||||
plt.figure()
|
||||
plt.subplot(2, 1, 1)
|
||||
plt.title('Original Signal')
|
||||
plt.stem(numpy.arange(len(x[0:30])), x[0:30])
|
||||
plt.subplot(2, 1, 2)
|
||||
plt.title('Interpolated Signal')
|
||||
plt.stem(numpy.arange(len(y[0:120])), y[0:120])
|
||||
|
||||
#upfirdn
|
||||
L = 147.0
|
||||
@@ -196,11 +196,11 @@ def main():
|
||||
n = numpy.arange(0, 10239)
|
||||
x = numpy.sin(2*numpy.pi*1000/Fs*n)
|
||||
y = upfirdn(x, h, L, M)
|
||||
pylab.figure()
|
||||
pylab.stem(n[1:49]/Fs, x[1:49])
|
||||
pylab.stem(n[1:45]/(Fs*L/M), y[13:57], 'r', markerfmt='ro',)
|
||||
pylab.xlabel('Time (sec)')
|
||||
pylab.ylabel('Signal value')
|
||||
plt.figure()
|
||||
plt.stem(n[1:49]/Fs, x[1:49])
|
||||
plt.stem(n[1:45]/(Fs*L/M), y[13:57], 'r', markerfmt='ro',)
|
||||
plt.xlabel('Time (sec)')
|
||||
plt.ylabel('Signal value')
|
||||
|
||||
#resample
|
||||
fs1 = 10.0
|
||||
@@ -208,30 +208,30 @@ def main():
|
||||
x = t1
|
||||
y = resample(x, 3, 2)
|
||||
t2 = numpy.arange(0,(len(y)))*2.0/(3.0*fs1)
|
||||
pylab.figure()
|
||||
pylab.plot(t1, x, '*')
|
||||
pylab.plot(t2, y, 'o')
|
||||
pylab.plot(numpy.arange(-0.5,1.5, 0.01), numpy.arange(-0.5,1.5, 0.01), ':')
|
||||
pylab.legend(('original','resampled'))
|
||||
pylab.xlabel('Time')
|
||||
plt.figure()
|
||||
plt.plot(t1, x, '*')
|
||||
plt.plot(t2, y, 'o')
|
||||
plt.plot(numpy.arange(-0.5,1.5, 0.01), numpy.arange(-0.5,1.5, 0.01), ':')
|
||||
plt.legend(('original','resampled'))
|
||||
plt.xlabel('Time')
|
||||
|
||||
x = numpy.hstack([numpy.arange(1,11), numpy.arange(9,0,-1)])
|
||||
y = resample(x,3,2)
|
||||
pylab.figure()
|
||||
pylab.subplot(2, 1, 1)
|
||||
pylab.title('Edge Effects Not Noticeable')
|
||||
pylab.plot(numpy.arange(19)+1, x, '*')
|
||||
pylab.plot(numpy.arange(29)*2/3.0 + 1, y, 'o')
|
||||
pylab.legend(('original', 'resampled'))
|
||||
plt.figure()
|
||||
plt.subplot(2, 1, 1)
|
||||
plt.title('Edge Effects Not Noticeable')
|
||||
plt.plot(numpy.arange(19)+1, x, '*')
|
||||
plt.plot(numpy.arange(29)*2/3.0 + 1, y, 'o')
|
||||
plt.legend(('original', 'resampled'))
|
||||
x = numpy.hstack([numpy.arange(10, 0, -1), numpy.arange(2,11)])
|
||||
y = resample(x,3,2)
|
||||
pylab.subplot(2, 1, 2)
|
||||
pylab.plot(numpy.arange(19)+1, x, '*')
|
||||
pylab.plot(numpy.arange(29)*2/3.0 + 1, y, 'o')
|
||||
pylab.title('Edge Effects Very Noticeable')
|
||||
pylab.legend(('original', 'resampled'))
|
||||
plt.subplot(2, 1, 2)
|
||||
plt.plot(numpy.arange(19)+1, x, '*')
|
||||
plt.plot(numpy.arange(29)*2/3.0 + 1, y, 'o')
|
||||
plt.title('Edge Effects Very Noticeable')
|
||||
plt.legend(('original', 'resampled'))
|
||||
|
||||
pylab.show()
|
||||
plt.show()
|
||||
return 0
|
||||
|
||||
if __name__ == '__main__':
|
||||
|
||||
@@ -31,3 +31,6 @@ def shift(sigin, pitch):
|
||||
# Read result
|
||||
result = shift_output.read_grain()
|
||||
return result
|
||||
|
||||
def shift2():
|
||||
print "To implement..."
|
||||
|
||||
Executable → Regular
@@ -1,11 +1,11 @@
|
||||
import numpy as np
|
||||
|
||||
def gen_wave(
|
||||
size,
|
||||
freq,
|
||||
wave_type,
|
||||
phase = 0.0,
|
||||
amplitude = 1.0,
|
||||
size,
|
||||
freq,
|
||||
wave_type,
|
||||
phase = 0.0,
|
||||
amplitude = 1.0,
|
||||
samplerate = 44100
|
||||
):
|
||||
"""
|
||||
@@ -18,10 +18,10 @@ def gen_wave(
|
||||
def sine():
|
||||
samples = np.arange(0, size, 1. / samplerate)
|
||||
return amplitude * np.sin(2.0*np.pi*freq*samples)
|
||||
|
||||
|
||||
def square():
|
||||
return amplitude * np.sign(sine())
|
||||
|
||||
|
||||
def triangle():
|
||||
samples = np.arange(0, size, 1. / samplerate)
|
||||
return amplitude - (2 * np.abs(samples * (2 * freq) % (2*amplitude) - amplitude))
|
||||
@@ -29,11 +29,11 @@ def gen_wave(
|
||||
def sawtooth():
|
||||
samples = np.arange(0, size, 1. / samplerate)
|
||||
return amplitude - (2 * np.abs((samples * freq) % amplitude - amplitude))
|
||||
|
||||
|
||||
def reverse_saw():
|
||||
samples = np.arange(0, size, 1. / samplerate)
|
||||
return amplitude - (2 * np.abs(((samples * freq) % amplitude)))
|
||||
|
||||
|
||||
options = {
|
||||
"sine" : sine,
|
||||
"square" : square,
|
||||
|
||||
Executable → Regular
@@ -1,8 +1,18 @@
|
||||
"""A set of unit tests to check the correct operation of the pysound module."""
|
||||
import unittest
|
||||
import numpy as np
|
||||
import sys
|
||||
|
||||
######
|
||||
# Import sppysound module from directory above current one
|
||||
from inspect import getsourcefile
|
||||
import os.path as path, sys
|
||||
current_dir = path.dirname(path.abspath(getsourcefile(lambda:0)))
|
||||
sys.path.insert(0, current_dir[:current_dir.rfind(path.sep)])
|
||||
from sppysound import AudioFile, analysis
|
||||
from sppysound.database import AudioDatabase, Matcher
|
||||
######
|
||||
|
||||
import subprocess
|
||||
from scipy import signal
|
||||
|
||||
|
||||
Executable → Regular
Executable → Regular
Reference in New Issue
Block a user