Added sklearn to install script

This commit is contained in:
2016-04-14 14:25:10 +01:00
parent d9d64e5b02
commit e35ddae1f5
6 changed files with 384 additions and 58 deletions
+1
View File
@@ -5,3 +5,4 @@ pip install pysndfile
pip install h5py
pip install https://github.com/Pezz89/fileops/zipball/master
pip install -e ./
pip install sklearn
+122 -6
View File
@@ -1,23 +1,139 @@
# Specify analysis parameters for root mean square analysis.
rms = {
# Analysis window sizes can be changed for each analysis individually.
# These do not need to match the grain size of the matcher or synthesis.
"window_size": 100,
"overlap": 2,
"overlap": 8,
}
f0 = {
"window_size": 4096,
"overlap": 8,
# Currently all frames below this ratio are digaurded and left as silence.
# Different databases will require different values for the best results.
# Noisier databases will need lower values than more tonal databases.
"ratio_threshold": 0.45
}
# Specify analysis parameters for variance analysis.
variance = {
"window_size": 100,
"overlap": 8
}
# Specify analysis parameters for temporal kurtosis analysis.
kurtosis = {
"window_size": 100,
"overlap": 8
}
# Specify analysis parameters for temporal skewness analysis.
skewness = {
"window_size": 100,
"overlap": 8
}
# Specify analysis parameters for FFT analysis.
fft = {
# The FFT window size determines the window size for all spectral analyses.
"window_size": 4096
}
database = {
# Enables creation of symbolic links to files not in the database rather
# than making pysical copies.
"symlink": True
}
# Sets the weighting for each analysis. a higher weighting gives an analysis
# higher presendence when finding the best matches.
matcher_weightings = {
"f0" : 0.5,
"spccntr" : 1.,
"spcsprd" : 1.,
"spcflux" : 3.,
"spccf" : 3.,
"spcflatness": 3.,
"zerox" : 1.,
"rms" : 0.1,
"peak": 0.1,
"centroid": 0.5,
"kurtosis": 2.,
"skewness": 2.,
"variance": 0.,
"harm_ratio": 2
}
# Specifies the method for averaging analysis frames to create a single value
# for comparing to other grains. Possible formatters are: 'mean', 'median',
# 'log2_mean', 'log2_median'
analysis_dict = {
# log2_median formats using mel scale. This is useful for analyses such as
# F0.
"f0": "log2_median",
"rms": "mean"
"rms": "mean",
"zerox": "mean",
"spccntr": "median",
"spcsprd": "median",
"spcflux": "median",
"spccf": "median",
"spcflatness": "median",
"peak": "mean",
"centroid": "mean",
"kurtosis": "mean",
"skewness": "mean",
"variance": "mean",
"harm_ratio": "mean"
}
analysis = {
# Force the deletion of any pre-existing analyses to create new ones. This
# is needed for overwriting old analyses generated with different
# parameters to the current ones.
"reanalyse": False
}
matcher = {
# Force the re-matching of analyses
"rematch": False,
# This value must be the same as the synthesis grain size to avoid the
# speeding up or slowing down of the resulting file in relation to the
# original.
"grain_size": 100,
"overlap": 8,
# Defines the number of matches to keep for synthesis. Note that this must
# also be specified in the synthesis config
"match_quantity": 5,
# Choose the algorithm used to perform matching. kdtree is recommended for
# larger datasets.
"method": 'kdtree'
}
synthesizer = {
# Artificially scale the output grain by the difference in RMS values
# between source and target.
"enforce_intensity": True,
# Specify the ratio limit that is the grain can be scaled by.
"enf_intensity_ratio_limit": 1000.,
# Artificially modify the pitch by the difference in f0 values between
# source and target.
"enforce_f0": True,
# Specify the ratio limit that is the grain can be modified by.
"enf_f0_ratio_limit": 10.,
"grain_size": 100,
"overlap": 8,
# Normalize output, avoid clipping of final output by scaling the final
# frames.
"normalize" : True,
# Defines the number of potential grains to choose from matches when
# synthesizing output.
"match_quantity": 5
}
# Specifies the format for the output file. Changing this has not been tested
# so may produce errors/undesirable results.
output_file = {
"samplerate": 44100,
"format": 131075,
"channels": 1
}
database = {
"symlink": True
}
+18 -17
View File
@@ -15,7 +15,7 @@ used to determine the central point of a signal's amplitude and is calculated
as:
.. math::
C(n) = \frac{\sum_{i=i_s(n)}^{i_e(n)}(i-i_s(n)) \cdot x(i)}{\sum_{i=i_s(n)}^{i_e(n)} \cdot x(n)}
C(n) = \frac{\sum_{i=i_s(n)}^{i_e(n)}(i-i_s(n)) \cdot x(i)}{\sum_{i=i_s(n)}^{i_e(n)} \cdot x(n)}.
Ref: :cite:`lerch2012itaca`
@@ -34,10 +34,10 @@ defined as:
.. math::
R_n(m) = \sum_{i=i_s(n)}^{i_e(n)} x(i) x(i-m)
Then normalizing:
then normalizing:
.. math::
\Gamma_n(m) = \frac{R_n(m)}{\sqrt{\sum_{i=i_s(n)}^{i_e(n)}x(i)^2 \sum_{i=i_s(n)}^{i_e(n)}x(i-m)^2}}
\Gamma_n(m) = \frac{R_n(m)}{\sqrt{\sum_{i=i_s(n)}^{i_e(n)}x(i)^2 \sum_{i=i_s(n)}^{i_e(n)}x(i-m)^2}}.
The fundamental period of the signal is then calculated as the point between
:math:`T_{min}` and :math:`T_{max}` at which the correlated signal most closely matches the
@@ -45,7 +45,7 @@ original. :math:`T_{min}` and :math:`T_{max}` are defined as the minimum and max
the fundamental period.
.. math::
y = arg\,max_{T_{min} \leq m \leq T_{max}} \{\Gamma_i(m)\}
y = arg\,max_{T_{min} \leq m \leq T_{max}} \{\Gamma_i(m)\}.
In order to improve the accuracy of peak detection, parabolic interpolation is
used to estimate the peak's location with greater accuracy by using the peak
@@ -66,7 +66,7 @@ Ref: :cite:`smith2011sasp`
From this, the fundamental period the frequency is then calculated as:
.. math::
f_0^n = \frac{1}{T_0^n}
f_0^n = \frac{1}{T_0^n}.
Ref: :cite:`itaa2014`
@@ -83,7 +83,7 @@ the signal. The calculation of the STFT is defined as:
.. math::
X(k,n) = \sum_{i=i_s(n)}^{i_e(n)} x(i) \exp{\Big(-jk \cdot (i -
i_s(n))\frac{2\pi}{K}\Big)}
i_s(n))\frac{2\pi}{K}\Big)}.
Ref: :cite:`lerch2012itaca`
@@ -96,7 +96,7 @@ of confidence measure in determining the validity of F0 values. It is
calculated as part of the F0 estimation algorithm as:
.. math::
HR(n) = max_{T_{min} \leq m \leq T_{max}}{\{T_n(m)\}}
HR(n) = max_{T_{min} \leq m \leq T_{max}}{\{T_n(m)\}}.
Ref: :cite:`lerch2012itaca`
@@ -107,7 +107,7 @@ values indicate a flatter distribution and positive values indicate a more
"peaky" distribution. Kurtosis is calculated as:
.. math::
TK(n)=\frac{1}{\sigma_x^4(n) \cdot K}\sum_{i=i_s(n)}^{i_e(n)}\Big(x(i)-\mu_x(n)\Big)^4-3
TK(n)=\frac{1}{\sigma_x^4(n) \cdot K}\sum_{i=i_s(n)}^{i_e(n)}\Big(x(i)-\mu_x(n)\Big)^4-3.
Ref: :cite:`lerch2012itaca`
@@ -117,16 +117,17 @@ Peak amplitude measures the highest peak in the absolute signal. It is
calculated as:
.. math::
P(n) = \max_{i_s(n) \leq i \leq i_e(n)}\{\left|x(i)\right|\}
P(n) = \max_{i_s(n) \leq i \leq i_e(n)}\{\left|x(i)\right|\}.
RMS
~~~
The perceived loudness of a signal is an important feature as it can be related
to the dynamics of the signal. RMS is used as a measure of sound intensity and
is used for distinguishing between loud and quiet audio. It is calculated as:
is used for distinguishing between loud and quiet audio. It is calculated as,
where $K$ is the total number of samples:
.. math::
RMS(n) = \sqrt{\frac{1}{K} \sum_{i=i_s(n)}^{i_e(n)} x(i)^2}
RMS(n) = \sqrt{\frac{1}{K} \sum_{i=i_s(n)}^{i_e(n)} x(i)^2}.
Other methods that take the human perception of loudness into account may
provide more perceptually relevant results. However the RMS measurement
@@ -142,7 +143,7 @@ values indicate that the spectral content is centred in higher frequencies and
lower value indicate a lower centre. The spectral centroid is calculated as:
.. math::
SC(n) = \frac{\sum_{k=0}^{K/2-1} k \cdot | X(k,n) | ^2}{\sum_{k=0}^{K/2-1} | X(k,n) | ^2}
SC(n) = \frac{\sum_{k=0}^{K/2-1} k \cdot | X(k,n) | ^2}{\sum_{k=0}^{K/2-1} | X(k,n) | ^2}.
The result is the sum of magnitudes, weighted by their index, normalized by the
unweighted sum.
@@ -158,7 +159,7 @@ This differentiates between flat spectrums and sinusoidal spectrums. (low values
representing the former and high values representing the latter.)
.. math::
SCF = \frac{ \max_{0 \leq k \leq K/2-1} \{| X(k,n) | \}}{\sum_{k=0}^{K/2-1} | X(k,n) | }
SCF = \frac{ \max_{0 \leq k \leq K/2-1} \{| X(k,n) | \}}{\sum_{k=0}^{K/2-1} | X(k,n) | }.
Ref: :cite:`lerch2012itaca`
@@ -171,7 +172,7 @@ values that represent a more tonal signal. Spectral flatness is calculated as:
.. math::
TFl(n) = \frac{\sqrt[K/2]{\prod_{k=0}^{K/2-1} | X(k,n) | }}{2/K \cdot
\sum_{k=0}^{K/2-1} | X(k,n) | }
\sum_{k=0}^{K/2-1} | X(k,n) | }.
Ref: :cite:`lerch2012itaca`
@@ -184,7 +185,7 @@ similar frames (that suggests a steady state signal). It is calculated as:
.. math::
SF(n) = \frac{\sqrt{\sum_{k=0}^{K/2-1} \Big( | X(k,n) | - | X(k,n-1) | \Big)^2
}}{K/2}
}}{K/2}.
Ref: :cite:`lerch2012itaca`
@@ -196,7 +197,7 @@ and is associated with perceptions of timbre. It is calculated as:
.. math::
SS(n) = \sqrt{\frac{\sum_{k=0}^{K/2-1} \Big(k-SC(n)\Big)^2 \cdot | X(k,n)
| ^2}{\sum_{k=0}^{K/2-1} | X(k,n) | ^2}}
| ^2}{\sum_{k=0}^{K/2-1} | X(k,n) | ^2}}.
Ref: :cite:`lerch2012itaca`
@@ -206,7 +207,7 @@ The variance of a signal measures it's spread around the signal's arithmetic
mean. It is used in the calculation of Kurtosis and is calculated as:
.. math::
\sigma_x^2 = \frac{1}{K} \sum_{i=i_s(n)}^{i_e(n)}(x(i) - \mu_x(n))^2
\sigma_x^2 = \frac{1}{K} \sum_{i=i_s(n)}^{i_e(n)}(x(i) - \mu_x(n))^2.
Ref: :cite:`lerch2012itaca`
+116 -13
View File
@@ -1,36 +1,139 @@
# Specify analysis parameters for root mean square analysis.
rms = {
# Analysis window sizes can be changed for each analysis individually.
# These do not need to match the grain size of the matcher or synthesis.
"window_size": 100,
"overlap": 2,
"overlap": 8,
}
analysis_dict = {
"f0": "log2_median",
"rms": "mean"
f0 = {
"window_size": 4096,
"overlap": 8,
# Currently all frames below this ratio are digaurded and left as silence.
# Different databases will require different values for the best results.
# Noisier databases will need lower values than more tonal databases.
"ratio_threshold": 0.45
}
# Specify analysis parameters for variance analysis.
variance = {
"window_size": 100,
"overlap": 8
}
# Specify analysis parameters for temporal kurtosis analysis.
kurtosis = {
"window_size": 100,
"overlap": 8
}
# Specify analysis parameters for temporal skewness analysis.
skewness = {
"window_size": 100,
"overlap": 8
}
# Specify analysis parameters for FFT analysis.
fft = {
# The FFT window size determines the window size for all spectral analyses.
"window_size": 4096
}
database = {
# Enables creation of symbolic links to files not in the database rather
# than making pysical copies.
"symlink": True
}
# Sets the weighting for each analysis. a higher weighting gives an analysis
# higher presendence when finding the best matches.
matcher_weightings = {
"f0" : 1.,
"rms": 1.
"f0" : 0.5,
"spccntr" : 1.,
"spcsprd" : 1.,
"spcflux" : 3.,
"spccf" : 3.,
"spcflatness": 3.,
"zerox" : 1.,
"rms" : 0.1,
"peak": 0.1,
"centroid": 0.5,
"kurtosis": 2.,
"skewness": 2.,
"variance": 0.,
"harm_ratio": 2
}
# Specifies the method for averaging analysis frames to create a single value
# for comparing to other grains. Possible formatters are: 'mean', 'median',
# 'log2_mean', 'log2_median'
analysis_dict = {
# log2_median formats using mel scale. This is useful for analyses such as
# F0.
"f0": "log2_median",
"rms": "mean",
"zerox": "mean",
"spccntr": "median",
"spcsprd": "median",
"spcflux": "median",
"spccf": "median",
"spcflatness": "median",
"peak": "mean",
"centroid": "mean",
"kurtosis": "mean",
"skewness": "mean",
"variance": "mean",
"harm_ratio": "mean"
}
analysis = {
# Force the deletion of any pre-existing analyses to create new ones. This
# is needed for overwriting old analyses generated with different
# parameters to the current ones.
"reanalyse": False
}
matcher = {
# Force the re-matching of analyses
"rematch": False,
# This value must be the same as the synthesis grain size to avoid the
# speeding up or slowing down of the resulting file in relation to the
# original.
"grain_size": 100,
"overlap": 2,
# Defines the number of matches to keep for synthesis.
"match_quantity": 20
"overlap": 8,
# Defines the number of matches to keep for synthesis. Note that this must
# also be specified in the synthesis config
"match_quantity": 5,
# Choose the algorithm used to perform matching. kdtree is recommended for
# larger datasets.
"method": 'kdtree'
}
synthesizer = {
# Artificially scale the output grain by the difference in RMS values
# between source and target.
"enforce_intensity": True,
# Specify the ratio limit that is the grain can be scaled by.
"enf_intensity_ratio_limit": 1000.,
# Artificially modify the pitch by the difference in f0 values between
# source and target.
"enforce_f0": True,
# Specify the ratio limit that is the grain can be modified by.
"enf_f0_ratio_limit": 10.,
"grain_size": 100,
"overlap": 8,
# Normalize output, avoid clipping of final output by scaling the final
# frames.
"normalize" : True,
# Defines the number of potential grains to choose from matches when
# synthesizing output.
"match_quantity": 5
}
# Specifies the format for the output file. Changing this has not been tested
# so may produce errors/undesirable results.
output_file = {
"samplerate": 44100,
"format": 131075,
"channels": 1
}
database = {
"symlink": True
}
+122 -19
View File
@@ -1,36 +1,139 @@
# Specify analysis parameters for root mean square analysis.
rms = {
# Analysis window sizes can be changed for each analysis individually.
# These do not need to match the grain size of the matcher or synthesis.
"window_size": 100,
"overlap": 2,
"overlap": 8,
}
f0 = {
"window_size": 4096,
"overlap": 8,
# Currently all frames below this ratio are digaurded and left as silence.
# Different databases will require different values for the best results.
# Noisier databases will need lower values than more tonal databases.
"ratio_threshold": 0.45
}
# Specify analysis parameters for variance analysis.
variance = {
"window_size": 100,
"overlap": 8
}
# Specify analysis parameters for temporal kurtosis analysis.
kurtosis = {
"window_size": 100,
"overlap": 8
}
# Specify analysis parameters for temporal skewness analysis.
skewness = {
"window_size": 100,
"overlap": 8
}
# Specify analysis parameters for FFT analysis.
fft = {
# The FFT window size determines the window size for all spectral analyses.
"window_size": 4096
}
database = {
# Enables creation of symbolic links to files not in the database rather
# than making pysical copies.
"symlink": True
}
# Sets the weighting for each analysis. a higher weighting gives an analysis
# higher presendence when finding the best matches.
matcher_weightings = {
"f0" : 0.5,
"spccntr" : 1.,
"spcsprd" : 1.,
"spcflux" : 3.,
"spccf" : 3.,
"spcflatness": 3.,
"zerox" : 1.,
"rms" : 0.1,
"peak": 0.1,
"centroid": 0.5,
"kurtosis": 2.,
"skewness": 2.,
"variance": 0.,
"harm_ratio": 2
}
# Specifies the method for averaging analysis frames to create a single value
# for comparing to other grains. Possible formatters are: 'mean', 'median',
# 'log2_mean', 'log2_median'
analysis_dict = {
# log2_median formats using mel scale. This is useful for analyses such as
# F0.
"f0": "log2_median",
"rms": "mean"
"rms": "mean",
"zerox": "mean",
"spccntr": "median",
"spcsprd": "median",
"spcflux": "median",
"spccf": "median",
"spcflatness": "median",
"peak": "mean",
"centroid": "mean",
"kurtosis": "mean",
"skewness": "mean",
"variance": "mean",
"harm_ratio": "mean"
}
analysis = {
# Force the deletion of any pre-existing analyses to create new ones. This
# is needed for overwriting old analyses generated with different
# parameters to the current ones.
"reanalyse": False
}
matcher = {
# Force the re-matching of analyses
"rematch": False,
# This value must be the same as the synthesis grain size to avoid the
# speeding up or slowing down of the resulting file in relation to the
# original.
"grain_size": 100,
"overlap": 8,
# Defines the number of matches to keep for synthesis. Note that this must
# also be specified in the synthesis config
"match_quantity": 5,
# Choose the algorithm used to perform matching. kdtree is recommended for
# larger datasets.
"method": 'kdtree'
}
synthesizer = {
# Artificially scale the output grain by the difference in RMS values
# between source and target.
"enforce_intensity": True,
# Specify the ratio limit that is the grain can be scaled by.
"enf_intensity_ratio_limit": 1000.,
# Artificially modify the pitch by the difference in f0 values between
# source and target.
"enforce_f0": True,
# Specify the ratio limit that is the grain can be modified by.
"enf_f0_ratio_limit": 10.,
"grain_size": 100,
"overlap": 8,
# Normalize output, avoid clipping of final output by scaling the final
# frames.
"normalize" : True,
# Defines the number of potential grains to choose from matches when
# synthesizing output.
"match_quantity": 5
}
# Specifies the format for the output file. Changing this has not been tested
# so may produce errors/undesirable results.
output_file = {
"samplerate": 44100,
"format": 131075,
"channels": 1
}
synthesizer = {
"enforce_rms": True,
"enf_rms_ratio_limit": 5.,
"enforce_f0": True,
"enf_f0_ratio_limit": 10.,
"grain_size": 100,
"overlap": 2,
"normalize" : True,
# Defines the number of potential grains to choose from matches when
# synthesizing output.
"match_quantity": 20
}
database = {
"symlink": True
}
+4 -2
View File
@@ -84,7 +84,7 @@ For this demonstration, the following file structure will be used:
|-- target.03.wav
`-- target.04.wav
A source database containing a small selection of trumpet samples (aquired from
A source database containing a small selection of trumpet samples (acquired from
http://theremin.music.uiowa.edu/MIS.html) will be used to match grains with 4
target sounds. This will produce 4 output files, one for each target sound.
@@ -417,7 +417,9 @@ concatenate.py Script Flags
--match_method Choose the algorithm to use when matching analyses. Available algorithms are:
Brute force: 'bruteforce'
Brute force: 'bruteforce' (BROKEN. The brute force
matcher no longer works with the current release of
this script. Use the K-d Tree Search.)
K-d Tree Search: 'kdtree'