Wrote tutorial, tried to fix broken brute force matcher. still broken...
This commit is contained in:
@@ -96,10 +96,11 @@ def parse_arguments():
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
'--src-db',
|
||||
'--src_db',
|
||||
help="Specifies the directory to create the source database and store analyses "
|
||||
"in. If not specified then the source directory will be used directly.",
|
||||
type=str
|
||||
type=str,
|
||||
metavar=''
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
@@ -107,7 +108,8 @@ def parse_arguments():
|
||||
help="Specifies the directory to create the target database and store analyses "
|
||||
"in. If not specified then the target directory will be used directly.",
|
||||
type=str,
|
||||
default=''
|
||||
default='',
|
||||
metavar=''
|
||||
)
|
||||
|
||||
analyses = [
|
||||
@@ -202,7 +204,13 @@ def parse_arguments():
|
||||
"'kdtree'",
|
||||
)
|
||||
|
||||
parser.add_argument('--verbose', '-v', action='count')
|
||||
parser.add_argument(
|
||||
'--verbose',
|
||||
'-v',
|
||||
action='count',
|
||||
help='Specifies level of verbosity in output. For example: \'-vvvvv\' '
|
||||
'will output all information. \'-v\' will output minimal information. '
|
||||
)
|
||||
|
||||
args = parser.parse_args()
|
||||
for item in config_items:
|
||||
@@ -256,6 +264,7 @@ def main():
|
||||
log_filename=modpath,
|
||||
logger_filelevel=args.verbose
|
||||
)
|
||||
pdb.set_trace()
|
||||
|
||||
# Create/load a pre-existing source database
|
||||
source_db = AudioDatabase(
|
||||
|
||||
+30
-8
@@ -1,23 +1,28 @@
|
||||
# Specify analysis parameters for root mean square analysis.
|
||||
rms = {
|
||||
"window_size": 70,
|
||||
"overlap": 8,
|
||||
"overlap": 2,
|
||||
}
|
||||
|
||||
# Specify analysis parameters for variance analysis.
|
||||
variance = {
|
||||
"window_size": 70,
|
||||
"overlap": 8
|
||||
"overlap": 2
|
||||
}
|
||||
|
||||
# Specify analysis parameters for temporal kurtosis analysis.
|
||||
kurtosis = {
|
||||
"window_size": 70,
|
||||
"overlap": 8
|
||||
"overlap": 2
|
||||
}
|
||||
|
||||
# Specify analysis parameters for temporal skewness analysis.
|
||||
skewness = {
|
||||
"window_size": 70,
|
||||
"overlap": 8
|
||||
"overlap": 2
|
||||
}
|
||||
|
||||
# Specify analysis parameters for FFT analysis.
|
||||
fft = {
|
||||
"window_size": 65536
|
||||
}
|
||||
@@ -28,6 +33,8 @@ database = {
|
||||
"symlink": True
|
||||
}
|
||||
|
||||
# Sets the weighting for each analysis. a higher weighting gives an analysis
|
||||
# higher presendence when finding the best matches.
|
||||
matcher_weightings = {
|
||||
"f0" : 1.,
|
||||
"spccntr" : 1.,
|
||||
@@ -45,6 +52,9 @@ matcher_weightings = {
|
||||
"harm_ratio": 1.
|
||||
}
|
||||
|
||||
# Specifies the method for averaging analysis frames to create a single value
|
||||
# for comparing to other grains. Possible formatters are: 'mean', 'median',
|
||||
# 'log2_mean', 'log2_median'
|
||||
analysis_dict = {
|
||||
"f0": "log2_median",
|
||||
"rms": "mean",
|
||||
@@ -63,32 +73,44 @@ analysis_dict = {
|
||||
}
|
||||
|
||||
analysis = {
|
||||
# Force the deletion of any pre-existing analyses to create new ones. This
|
||||
# is needed for overwriting old analyses generated with different
|
||||
# parameters to the current ones.
|
||||
"reanalyse": False
|
||||
}
|
||||
|
||||
matcher = {
|
||||
# Force the re-matching of analyses
|
||||
"rematch": True,
|
||||
"grain_size": 70,
|
||||
"overlap": 8,
|
||||
"overlap": 2,
|
||||
# Defines the number of matches to keep for synthesis. Note that this must
|
||||
# also be specified in the synthesis config
|
||||
"match_quantity": 1,
|
||||
"match_quantity": 20,
|
||||
# Choose the algorithm used to perform matching. kdtree is recommended for
|
||||
# larger datasets.
|
||||
"method": 'kdtree'
|
||||
}
|
||||
|
||||
synthesizer = {
|
||||
# Artificially scale the output grain by the difference in RMS values
|
||||
# between source and target.
|
||||
"enforce_rms": True,
|
||||
# Specify the ratio limit that is the grain can be scaled by.
|
||||
"enf_rms_ratio_limit": 100.,
|
||||
# Artificially modify the pitch by the difference in f0 values between
|
||||
# source and target.
|
||||
"enforce_f0": True,
|
||||
# Specify the ratio limit that is the grain can be modified by.
|
||||
"enf_f0_ratio_limit": 10.,
|
||||
"grain_size": 70,
|
||||
"overlap": 8,
|
||||
"overlap": 2,
|
||||
# Normalize output, avoid clipping of final output by scaling the final
|
||||
# frames.
|
||||
"normalize" : True,
|
||||
# Defines the number of potential grains to choose from matches when
|
||||
# synthesizing output.
|
||||
"match_quantity": 1
|
||||
"match_quantity": 20
|
||||
}
|
||||
|
||||
output_file = {
|
||||
|
||||
@@ -493,12 +493,8 @@ class Matcher:
|
||||
# Create an array of grain times for target sample
|
||||
target_times = target_entry.generate_grain_times(grain_size, overlap, save_times=True)
|
||||
|
||||
# Stores an accumulated distance between source and target grains,
|
||||
# added to by each analysis.
|
||||
distance_accum = np.zeros((target_times.shape[0], source_sample_indexes[-1][-1]))
|
||||
# Allocate memory for storing accumulated distances between
|
||||
# source and target grains
|
||||
|
||||
x_size = target_times.shape[0]
|
||||
y_size = int(source_sample_indexes[-1][-1])
|
||||
chunk_size = 8192
|
||||
@@ -507,14 +503,16 @@ class Matcher:
|
||||
|
||||
try:
|
||||
del self.output_db.data["data_distance"]
|
||||
self.output_db.data.create_dataset("data_distance", (x_size, y_size), dtype=np.float, chunks=True)
|
||||
except RuntimeError:
|
||||
except KeyError:
|
||||
pass
|
||||
|
||||
self.output_db.data.create_dataset("data_distance", (x_size, y_size), dtype=np.float, chunks=True)
|
||||
|
||||
try:
|
||||
del self.output_db.data["distance_accum"]
|
||||
self.output_db.data.create_dataset("distance_accum", (x_size, y_size), dtype=np.float, chunks=True, fillvalue=0)
|
||||
except RuntimeError:
|
||||
except KeyError:
|
||||
pass
|
||||
|
||||
self.output_db.data.create_dataset("distance_accum", (x_size, y_size), dtype=np.float, chunks=True, fillvalue=0)
|
||||
|
||||
for analysis in self.matcher_analyses:
|
||||
|
||||
@@ -141,7 +141,7 @@
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"matcher.match(\n",
|
||||
" matcher.brute_force_matcher,\n",
|
||||
" matcher.kdtree_matcher,\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
|
||||
@@ -9,7 +9,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"execution_count": 4,
|
||||
"metadata": {
|
||||
"collapsed": true
|
||||
},
|
||||
@@ -17,12 +17,12 @@
|
||||
"source": [
|
||||
"from sppysound.database import AudioDatabase, Synthesizer, Matcher\n",
|
||||
"import synthesis_config\n",
|
||||
"import matching_config"
|
||||
"import config"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"execution_count": 5,
|
||||
"metadata": {
|
||||
"collapsed": true
|
||||
},
|
||||
@@ -43,16 +43,16 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"execution_count": 6,
|
||||
"metadata": {
|
||||
"collapsed": true
|
||||
"collapsed": false
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"source_database = AudioDatabase(\n",
|
||||
" source_dir,\n",
|
||||
" config=synthesis_config,\n",
|
||||
" analysis_list={\"f0\", \"rms\", \"peak\"}\n",
|
||||
" analysis_list={\"f0\", \"rms\"}\n",
|
||||
")\n",
|
||||
"source_database.load_database(reanalyse=True)"
|
||||
]
|
||||
@@ -67,22 +67,36 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"execution_count": 7,
|
||||
"metadata": {
|
||||
"collapsed": false
|
||||
},
|
||||
"outputs": [],
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Traceback (most recent call last):\n",
|
||||
" File \"/Users/samuelperry/PerryPerrySource/pysource/sppysound/src/sppysound/database.py\", line 157, in analyse_database\n",
|
||||
" config=self.config\n",
|
||||
" File \"/Users/samuelperry/PerryPerrySource/pysource/sppysound/src/sppysound/audiofile.py\", line 943, in __enter__\n",
|
||||
" \"empty\".format(self.name))\n",
|
||||
"IOError: File isn't valid: ElectricGuitarSample-out_output.wav\n",
|
||||
"Check that file is mono and isn't empty\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"target_database = AudioDatabase(\n",
|
||||
" target_dir,\n",
|
||||
" config=synthesis_config,\n",
|
||||
" analysis_list={\"f0\", \"rms\", \"peak\"}\n",
|
||||
" analysis_list={\"f0\", \"rms\"}\n",
|
||||
")\n",
|
||||
"target_database.load_database(reanalyse=True)\n",
|
||||
"\n",
|
||||
"output_database = AudioDatabase(\n",
|
||||
" output_dir,\n",
|
||||
" config=synthesis_config\n",
|
||||
" config=config\n",
|
||||
")\n",
|
||||
"output_database.load_database(reanalyse=False)\n",
|
||||
"\n",
|
||||
@@ -90,11 +104,13 @@
|
||||
" source_database,\n",
|
||||
" target_database,\n",
|
||||
" output_db=output_database,\n",
|
||||
" config=matching_config,\n",
|
||||
" config=config,\n",
|
||||
" rematch=True\n",
|
||||
")\n",
|
||||
"matcher.match(\n",
|
||||
" matcher.brute_force_matcher,\n",
|
||||
" matcher.kdtree_matcher,\n",
|
||||
" grain_size=config.matcher[\"grain_size\"],\n",
|
||||
" overlap=config.matcher[\"overlap\"]\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
@@ -107,7 +123,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"execution_count": 8,
|
||||
"metadata": {
|
||||
"collapsed": false
|
||||
},
|
||||
@@ -117,7 +133,7 @@
|
||||
" source_database, \n",
|
||||
" output_database, \n",
|
||||
" target_db=target_database, \n",
|
||||
" config=synthesis_config\n",
|
||||
" config=config\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
@@ -130,14 +146,17 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 6,
|
||||
"execution_count": 9,
|
||||
"metadata": {
|
||||
"collapsed": false,
|
||||
"scrolled": true
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"synthesizer.synthesize()"
|
||||
"synthesizer.synthesize(\n",
|
||||
" grain_size=config.synthesizer[\"grain_size\"],\n",
|
||||
" overlap=config.synthesizer[\"overlap\"]\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
||||
@@ -17,3 +17,7 @@ output_file = {
|
||||
"format": 131075,
|
||||
"channels": 1
|
||||
}
|
||||
|
||||
database = {
|
||||
"symlink": True
|
||||
}
|
||||
|
||||
@@ -87,15 +87,20 @@ concatenate.py Script Usage
|
||||
--enforcerms This flag enables scaling of matched grains to better
|
||||
match the target's volume.
|
||||
|
||||
--copy This flag enables the copying of audio files from
|
||||
their location to the database, rather than creating
|
||||
symbolic links. This is useful for creating portable
|
||||
databases.
|
||||
|
||||
--match_method Choose the algorithm to use when matching analyses. Available algorithms are:
|
||||
|
||||
Brute force: 'bruteforce'
|
||||
|
||||
K-d Tree Search: 'kdtree'
|
||||
|
||||
--verbose, -v Specify the verbosity of the script's output. Additional
|
||||
v will produce greater levels of detail ie. -vvvvv will
|
||||
produce all messages.
|
||||
--verbose, -v Specifies level of verbosity in output. For example:
|
||||
'-vvvvv' will output all information. '-v' will output
|
||||
minimal information.
|
||||
|
||||
-------------------
|
||||
AudioFile Class
|
||||
|
||||
@@ -1,8 +0,0 @@
|
||||
Overview
|
||||
========
|
||||
Concatenator is a tool for synthesizing interpretations of a sound, through the
|
||||
analysis and synthesis of audio grains from a database of sounds.
|
||||
The program works by analysing overlapping segments of audio (known as grains)
|
||||
from both the target sound and the source database, then searching for the
|
||||
closest matching grain in the source database to the target sound. Finally, the
|
||||
output is generated by overlap-adding the best matches.
|
||||
@@ -30,3 +30,7 @@ output_file = {
|
||||
"format": 131075,
|
||||
"channels": 1
|
||||
}
|
||||
|
||||
database = {
|
||||
"symlink": True
|
||||
}
|
||||
|
||||
@@ -1,3 +1,5 @@
|
||||
.. _overview:
|
||||
|
||||
Overview
|
||||
========
|
||||
Concatenator is a tool for synthesizing interpretations of a sound, through the
|
||||
@@ -73,6 +75,15 @@ precedence over others based on user preference.
|
||||
The best match indexes are then saved to the output database ready for
|
||||
synthesis.
|
||||
|
||||
There are currently two implementations for the matching algorithm:
|
||||
|
||||
- Brute Force
|
||||
|
||||
- K-d Tree Search
|
||||
|
||||
Both will return similar results, however the K-d tree search algorithm is
|
||||
far more efficient when analysing large datasets so is the preferred method.
|
||||
|
||||
.. graphviz::
|
||||
|
||||
digraph b {
|
||||
@@ -162,7 +173,7 @@ database, performing any post-processing (such as pitch shifting and amplitude
|
||||
scaling) to improve the similarity of the match, then windowed overlap adding
|
||||
the grains to create the final output. The post-processing phase involves using
|
||||
the ratio difference between the source and target grain to artificially alter
|
||||
the source grain so that it better ressembles the target. This is particularly
|
||||
the source grain so that it better resembles the target. This is particularly
|
||||
useful when using small source databases as it improves the similarity of any
|
||||
match (important when best matches aren't very close to the target.) The final
|
||||
output is saved to the output database's audio directory.
|
||||
|
||||
@@ -30,3 +30,7 @@ synthesizer = {
|
||||
# synthesizing output.
|
||||
"match_quantity": 20
|
||||
}
|
||||
|
||||
database = {
|
||||
"symlink": True
|
||||
}
|
||||
|
||||
@@ -1,6 +1,304 @@
|
||||
Tutorial
|
||||
========
|
||||
|
||||
This section gives a brief introduction to using the 'concatenator.py' script. The
|
||||
script can be found in the src/sppysound directory of the project folder, or
|
||||
can be accessed by running the 'concatenator' symbolic link from the project
|
||||
folder root.
|
||||
|
||||
Getting Started
|
||||
---------------
|
||||
|
||||
To view all available options simply run:
|
||||
|
||||
.. code:: bash
|
||||
|
||||
./concatenator -h
|
||||
|
||||
A list of all commands available is then presented:
|
||||
|
||||
.. code:: bash
|
||||
|
||||
usage: concatenator [-h] [--src-db] [--tar_db]
|
||||
[--analyse [ANALYSE [ANALYSE ...]]] [--analysis_dict]
|
||||
[--fft] [--kurtosis] [--matcher] [--matcher_weightings]
|
||||
[--rms] [--skewness] [--synthesizer] [--variance]
|
||||
[--reanalyse] [--rematch] [--enforcef0] [--enforcerms]
|
||||
[--copy] [--match_method] [--verbose]
|
||||
source target output
|
||||
|
||||
Concatenator is a tool for synthesizing interpretations of a sound, through
|
||||
the analysis and synthesis of audio grains from a corpus database. The program
|
||||
works by analysing overlapping segments of audio (known as grains) from both
|
||||
the target sound and the source database, then searching for the closest
|
||||
matching grain in the source database to the target sound. Finally, the output
|
||||
is generated by overlap-adding the best matches.
|
||||
|
||||
positional arguments:
|
||||
source Directory of source files/database to take grains from
|
||||
when synthesizing output
|
||||
target Directory of target files/database to match source
|
||||
grains to.
|
||||
output Directory to use as database for outputing results and
|
||||
match information. Output audio will be stored in the
|
||||
/audio sub-directory and match data will be stored in
|
||||
the /data directory.
|
||||
|
||||
optional arguments:
|
||||
-h, --help show this help message and exit
|
||||
--src-db Specifies the directory to create the source database
|
||||
and store analyses in. If not specified then the
|
||||
|
||||
...
|
||||
|
||||
For this demonstration, the following file structure will be used:
|
||||
|
||||
.. code:: bash
|
||||
|
||||
/Users/samuelperry/concatenator_test/
|
||||
|-- source_db
|
||||
| |-- Trumpet.novib.ff.A3.stereo.aif
|
||||
| |-- Trumpet.novib.ff.A4.stereo.aif
|
||||
| |-- Trumpet.novib.ff.A5.stereo.aif
|
||||
|
||||
...
|
||||
|
||||
| |-- Trumpet.novib.ff.F5.stereo.aif
|
||||
| |-- Trumpet.novib.ff.G3.stereo.aif
|
||||
| `-- Trumpet.novib.ff.G4.stereo.aif
|
||||
`-- target_db
|
||||
|-- target.01.wav
|
||||
|-- target.02.wav
|
||||
|-- target.03.wav
|
||||
`-- target.04.wav
|
||||
|
||||
A source database containing a small selection of trumpet samples (aquired from
|
||||
|
||||
http://theremin.music.uiowa.edu/MIS.html) will be used to match grains with 4
|
||||
target sounds. This will produce 4 output files, one for each target sound.
|
||||
|
||||
The following command is used to to generate the output:
|
||||
|
||||
.. code:: bash
|
||||
|
||||
concatenator ./source_db ./target_db ./output_db --src_db \
|
||||
./analysed_source_db --tar_db ./analysed_tar_db
|
||||
|
||||
The specified directories are searched recursively for audio files that are
|
||||
used as items in the database. These item are then matched and synthesized as
|
||||
explained in the :ref:`overview` section. Output is stored in the audio
|
||||
directory of the output database that has been created.
|
||||
This produces this directory structure:
|
||||
|
||||
.. code:: bash
|
||||
|
||||
/Users/samuelperry/concatenator_test/
|
||||
|-- analysed_source_db
|
||||
| |-- audio
|
||||
| | |-- Trumpet.novib.ff.A3.stereo.aif -> (Symlink)
|
||||
| | |-- Trumpet.novib.ff.A4.stereo.aif -> (Symlink)
|
||||
| | |-- Trumpet.novib.ff.A5.stereo.aif -> (Symlink)
|
||||
|
||||
...
|
||||
|
||||
| | |-- Trumpet.novib.ff.F5.stereo.aif -> (Symlink)
|
||||
| | |-- Trumpet.novib.ff.G3.stereo.aif -> (Symlink)
|
||||
| | `-- Trumpet.novib.ff.G4.stereo.aif -> (Symlink)
|
||||
| `-- data
|
||||
| `-- analysis_data.hdf5
|
||||
|-- analysed_tar_db
|
||||
| |-- audio
|
||||
| | |-- target.01.wav -> (Symlink)
|
||||
| | |-- target.02.wav -> (Symlink)
|
||||
| | |-- target.03.wav -> (Symlink)
|
||||
| | `-- target.04.wav -> (Symlink)
|
||||
| `-- data
|
||||
| `-- analysis_data.hdf5
|
||||
|-- output_db
|
||||
| |-- audio
|
||||
| | |-- target.01_output.wav
|
||||
| | |-- target.02_output.wav
|
||||
| | |-- target.03_output.wav
|
||||
| | `-- target.04_output.wav
|
||||
| `-- data
|
||||
| `-- analysis_data.hdf5
|
||||
|-- source_db
|
||||
| |-- Trumpet.novib.ff.A3.stereo.aif
|
||||
| |-- Trumpet.novib.ff.A4.stereo.aif
|
||||
| |-- Trumpet.novib.ff.A5.stereo.aif
|
||||
| |-- Trumpet.novib.ff.F5.stereo.aif
|
||||
|
||||
...
|
||||
|
||||
| |-- Trumpet.novib.ff.G3.stereo.aif
|
||||
| `-- Trumpet.novib.ff.G4.stereo.aif
|
||||
`-- target_db
|
||||
|-- target.01.wav
|
||||
|-- target.02.wav
|
||||
|-- target.03.wav
|
||||
`-- target.04.wav
|
||||
|
||||
By using the --src_db and --tar_db flags, alternative locations are specified
|
||||
for generating the databases and storing analysis data. Symbolic links are
|
||||
created, referencing the original audio files without moving them. This allows
|
||||
large databases to be used in place without copying or moving it's content.
|
||||
|
||||
Alternatively, databases can be generated in place by ommiting the --src_db and
|
||||
--tar_db flags. this will create the database directory structure directly in
|
||||
the directories provided as source and target.
|
||||
|
||||
The --copy flag can be used in conjunction with these flags in order to create
|
||||
actual copies of the audio files at the destinations. This allows for the
|
||||
creation of partable databases that can moved to other machines without
|
||||
breaking links to the original files. (Any pre-existing symbolic links will be
|
||||
overwritten with hard copies when using this option.)
|
||||
|
||||
config.py
|
||||
---------
|
||||
The config.py file is used for specifying all user defined options and can be
|
||||
edited in the concatenator project directory. Comments explain the function of
|
||||
each parameter. The default config.py file looks like this:
|
||||
|
||||
.. code:: python
|
||||
|
||||
# Specify analysis parameters for root mean square analysis.
|
||||
rms = {
|
||||
"window_size": 70,
|
||||
"overlap": 2,
|
||||
}
|
||||
|
||||
# Specify analysis parameters for variance analysis.
|
||||
variance = {
|
||||
"window_size": 70,
|
||||
"overlap": 2
|
||||
}
|
||||
|
||||
# Specify analysis parameters for temporal kurtosis analysis.
|
||||
kurtosis = {
|
||||
"window_size": 70,
|
||||
"overlap": 2
|
||||
}
|
||||
|
||||
# Specify analysis parameters for temporal skewness analysis.
|
||||
skewness = {
|
||||
"window_size": 70,
|
||||
"overlap": 2
|
||||
}
|
||||
|
||||
# Specify analysis parameters for FFT analysis.
|
||||
fft = {
|
||||
"window_size": 65536
|
||||
}
|
||||
|
||||
database = {
|
||||
# Enables creation of symbolic links to files not in the database rather
|
||||
# than making pysical copies.
|
||||
"symlink": True
|
||||
}
|
||||
|
||||
# Sets the weighting for each analysis. a higher weighting gives an analysis
|
||||
# higher presendence when finding the best matches.
|
||||
matcher_weightings = {
|
||||
"f0" : 1.,
|
||||
"spccntr" : 1.,
|
||||
"spcsprd" : 1.,
|
||||
"spcflux" : 1.,
|
||||
"spccf" : 1.,
|
||||
"spcflatness": 1.,
|
||||
"zerox" : 1.,
|
||||
"rms" : 1.,
|
||||
"peak": 1.,
|
||||
"centroid": 1.,
|
||||
"kurtosis": 1.,
|
||||
"skewness": 1.,
|
||||
"variance": 3.,
|
||||
"harm_ratio": 1.
|
||||
}
|
||||
|
||||
# Specifies the method for averaging analysis frames to create a single value
|
||||
# for comparing to other grains. Possible formatters are: 'mean', 'median',
|
||||
# 'log2_mean', 'log2_median'
|
||||
analysis_dict = {
|
||||
"f0": "log2_median",
|
||||
"rms": "mean",
|
||||
"zerox": "mean",
|
||||
"spccntr": "mean",
|
||||
"spcsprd": "mean",
|
||||
"spcflux": "mean",
|
||||
"spccf": "mean",
|
||||
"spcflatness": "mean",
|
||||
"peak": "mean",
|
||||
"centroid": "mean",
|
||||
"kurtosis": "mean",
|
||||
"skewness": "mean",
|
||||
"variance": "mean",
|
||||
"harm_ratio": "mean"
|
||||
}
|
||||
|
||||
analysis = {
|
||||
# Force the deletion of any pre-existing analyses to create new ones. This
|
||||
# is needed for overwriting old analyses generated with different
|
||||
# parameters to the current ones.
|
||||
"reanalyse": False
|
||||
}
|
||||
|
||||
matcher = {
|
||||
# Force the re-matching of analyses
|
||||
"rematch": True,
|
||||
"grain_size": 70,
|
||||
"overlap": 2,
|
||||
# Defines the number of matches to keep for synthesis. Note that this must
|
||||
# also be specified in the synthesis config
|
||||
"match_quantity": 1,
|
||||
# Choose the algorithm used to perform matching. kdtree is recommended for
|
||||
# larger datasets.
|
||||
"method": 'kdtree'
|
||||
}
|
||||
|
||||
synthesizer = {
|
||||
# Artificially scale the output grain by the difference in RMS values
|
||||
# between source and target.
|
||||
"enforce_rms": True,
|
||||
# Specify the ratio limit that is the grain can be scaled by.
|
||||
"enf_rms_ratio_limit": 100.,
|
||||
# Artificially modify the pitch by the difference in f0 values between
|
||||
# source and target.
|
||||
"enforce_f0": True,
|
||||
# Specify the ratio limit that is the grain can be modified by.
|
||||
"enf_f0_ratio_limit": 10.,
|
||||
"grain_size": 70,
|
||||
"overlap": 2,
|
||||
# Normalize output, avoid clipping of final output by scaling the final
|
||||
# frames.
|
||||
"normalize" : True,
|
||||
# Defines the number of potential grains to choose from matches when
|
||||
# synthesizing output.
|
||||
"match_quantity": 1
|
||||
}
|
||||
|
||||
output_file = {
|
||||
"samplerate": 44100,
|
||||
"format": 131075,
|
||||
"channels": 1
|
||||
}
|
||||
|
||||
Configuration Flags
|
||||
-------------------
|
||||
For quick modification of analysis parameters, parameter flags can be specified
|
||||
directly when calling the script. For example:
|
||||
|
||||
.. code:: bash
|
||||
|
||||
concatenator ./source_db ./target_db ./output_db --src_db \
|
||||
./analysed_source_db --tar_db ./analysed_tar_db --reanalyse --fft \
|
||||
'--window_size 2048'
|
||||
|
||||
This overwrites the value specified for window_size in the config file with the
|
||||
value provided.
|
||||
|
||||
When databases have already been created, previous data is used when re-running
|
||||
the script over them. This allows for different databases to be used without
|
||||
continuous reanalysis. However, if analysis or matching parameters are changed,
|
||||
the "--reanalyse" and "--rematch" flags can be used to force the overwriting of
|
||||
old data, using the new parameters.
|
||||
|
||||
|
||||
@@ -695,7 +695,8 @@ class DatabaseTests(globalTests):
|
||||
# Create database object
|
||||
database = AudioDatabase(
|
||||
"./.test_db",
|
||||
analysis_list=["rms", "zerox", "fft", "spccntr", "spcsprd", "f0"]
|
||||
analysis_list=["rms", "zerox", "fft", "spccntr", "spcsprd", "f0"],
|
||||
config=config
|
||||
)
|
||||
# Create/load a pre-existing database
|
||||
database.load_database(reanalyse=True)
|
||||
@@ -736,6 +737,7 @@ class MatcherTests(globalTests):
|
||||
# Create database object
|
||||
self.database1 = AudioDatabase(
|
||||
"./.test_db1",
|
||||
config=config
|
||||
)
|
||||
# Create/load a pre-existing database
|
||||
self.database1.load_database(reanalyse=True)
|
||||
@@ -762,6 +764,7 @@ class MatcherTests(globalTests):
|
||||
# Create database object
|
||||
self.database2 = AudioDatabase(
|
||||
"./.test_db2",
|
||||
config=config
|
||||
)
|
||||
# Create/load a pre-existing database
|
||||
self.database2.load_database(reanalyse=True)
|
||||
|
||||
@@ -88,3 +88,7 @@ output_file = {
|
||||
"format": 131075,
|
||||
"channels": 1
|
||||
}
|
||||
|
||||
database = {
|
||||
"symlink": True
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user