Wrote tutorial, tried to fix broken brute force matcher. still broken...
This commit is contained in:
@@ -96,10 +96,11 @@ def parse_arguments():
|
|||||||
)
|
)
|
||||||
|
|
||||||
parser.add_argument(
|
parser.add_argument(
|
||||||
'--src-db',
|
'--src_db',
|
||||||
help="Specifies the directory to create the source database and store analyses "
|
help="Specifies the directory to create the source database and store analyses "
|
||||||
"in. If not specified then the source directory will be used directly.",
|
"in. If not specified then the source directory will be used directly.",
|
||||||
type=str
|
type=str,
|
||||||
|
metavar=''
|
||||||
)
|
)
|
||||||
|
|
||||||
parser.add_argument(
|
parser.add_argument(
|
||||||
@@ -107,7 +108,8 @@ def parse_arguments():
|
|||||||
help="Specifies the directory to create the target database and store analyses "
|
help="Specifies the directory to create the target database and store analyses "
|
||||||
"in. If not specified then the target directory will be used directly.",
|
"in. If not specified then the target directory will be used directly.",
|
||||||
type=str,
|
type=str,
|
||||||
default=''
|
default='',
|
||||||
|
metavar=''
|
||||||
)
|
)
|
||||||
|
|
||||||
analyses = [
|
analyses = [
|
||||||
@@ -202,7 +204,13 @@ def parse_arguments():
|
|||||||
"'kdtree'",
|
"'kdtree'",
|
||||||
)
|
)
|
||||||
|
|
||||||
parser.add_argument('--verbose', '-v', action='count')
|
parser.add_argument(
|
||||||
|
'--verbose',
|
||||||
|
'-v',
|
||||||
|
action='count',
|
||||||
|
help='Specifies level of verbosity in output. For example: \'-vvvvv\' '
|
||||||
|
'will output all information. \'-v\' will output minimal information. '
|
||||||
|
)
|
||||||
|
|
||||||
args = parser.parse_args()
|
args = parser.parse_args()
|
||||||
for item in config_items:
|
for item in config_items:
|
||||||
@@ -256,6 +264,7 @@ def main():
|
|||||||
log_filename=modpath,
|
log_filename=modpath,
|
||||||
logger_filelevel=args.verbose
|
logger_filelevel=args.verbose
|
||||||
)
|
)
|
||||||
|
pdb.set_trace()
|
||||||
|
|
||||||
# Create/load a pre-existing source database
|
# Create/load a pre-existing source database
|
||||||
source_db = AudioDatabase(
|
source_db = AudioDatabase(
|
||||||
|
|||||||
+30
-8
@@ -1,23 +1,28 @@
|
|||||||
|
# Specify analysis parameters for root mean square analysis.
|
||||||
rms = {
|
rms = {
|
||||||
"window_size": 70,
|
"window_size": 70,
|
||||||
"overlap": 8,
|
"overlap": 2,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
# Specify analysis parameters for variance analysis.
|
||||||
variance = {
|
variance = {
|
||||||
"window_size": 70,
|
"window_size": 70,
|
||||||
"overlap": 8
|
"overlap": 2
|
||||||
}
|
}
|
||||||
|
|
||||||
|
# Specify analysis parameters for temporal kurtosis analysis.
|
||||||
kurtosis = {
|
kurtosis = {
|
||||||
"window_size": 70,
|
"window_size": 70,
|
||||||
"overlap": 8
|
"overlap": 2
|
||||||
}
|
}
|
||||||
|
|
||||||
|
# Specify analysis parameters for temporal skewness analysis.
|
||||||
skewness = {
|
skewness = {
|
||||||
"window_size": 70,
|
"window_size": 70,
|
||||||
"overlap": 8
|
"overlap": 2
|
||||||
}
|
}
|
||||||
|
|
||||||
|
# Specify analysis parameters for FFT analysis.
|
||||||
fft = {
|
fft = {
|
||||||
"window_size": 65536
|
"window_size": 65536
|
||||||
}
|
}
|
||||||
@@ -28,6 +33,8 @@ database = {
|
|||||||
"symlink": True
|
"symlink": True
|
||||||
}
|
}
|
||||||
|
|
||||||
|
# Sets the weighting for each analysis. a higher weighting gives an analysis
|
||||||
|
# higher presendence when finding the best matches.
|
||||||
matcher_weightings = {
|
matcher_weightings = {
|
||||||
"f0" : 1.,
|
"f0" : 1.,
|
||||||
"spccntr" : 1.,
|
"spccntr" : 1.,
|
||||||
@@ -45,6 +52,9 @@ matcher_weightings = {
|
|||||||
"harm_ratio": 1.
|
"harm_ratio": 1.
|
||||||
}
|
}
|
||||||
|
|
||||||
|
# Specifies the method for averaging analysis frames to create a single value
|
||||||
|
# for comparing to other grains. Possible formatters are: 'mean', 'median',
|
||||||
|
# 'log2_mean', 'log2_median'
|
||||||
analysis_dict = {
|
analysis_dict = {
|
||||||
"f0": "log2_median",
|
"f0": "log2_median",
|
||||||
"rms": "mean",
|
"rms": "mean",
|
||||||
@@ -63,32 +73,44 @@ analysis_dict = {
|
|||||||
}
|
}
|
||||||
|
|
||||||
analysis = {
|
analysis = {
|
||||||
|
# Force the deletion of any pre-existing analyses to create new ones. This
|
||||||
|
# is needed for overwriting old analyses generated with different
|
||||||
|
# parameters to the current ones.
|
||||||
"reanalyse": False
|
"reanalyse": False
|
||||||
}
|
}
|
||||||
|
|
||||||
matcher = {
|
matcher = {
|
||||||
|
# Force the re-matching of analyses
|
||||||
"rematch": True,
|
"rematch": True,
|
||||||
"grain_size": 70,
|
"grain_size": 70,
|
||||||
"overlap": 8,
|
"overlap": 2,
|
||||||
# Defines the number of matches to keep for synthesis. Note that this must
|
# Defines the number of matches to keep for synthesis. Note that this must
|
||||||
# also be specified in the synthesis config
|
# also be specified in the synthesis config
|
||||||
"match_quantity": 1,
|
"match_quantity": 20,
|
||||||
# Choose the algorithm used to perform matching. kdtree is recommended for
|
# Choose the algorithm used to perform matching. kdtree is recommended for
|
||||||
# larger datasets.
|
# larger datasets.
|
||||||
"method": 'kdtree'
|
"method": 'kdtree'
|
||||||
}
|
}
|
||||||
|
|
||||||
synthesizer = {
|
synthesizer = {
|
||||||
|
# Artificially scale the output grain by the difference in RMS values
|
||||||
|
# between source and target.
|
||||||
"enforce_rms": True,
|
"enforce_rms": True,
|
||||||
|
# Specify the ratio limit that is the grain can be scaled by.
|
||||||
"enf_rms_ratio_limit": 100.,
|
"enf_rms_ratio_limit": 100.,
|
||||||
|
# Artificially modify the pitch by the difference in f0 values between
|
||||||
|
# source and target.
|
||||||
"enforce_f0": True,
|
"enforce_f0": True,
|
||||||
|
# Specify the ratio limit that is the grain can be modified by.
|
||||||
"enf_f0_ratio_limit": 10.,
|
"enf_f0_ratio_limit": 10.,
|
||||||
"grain_size": 70,
|
"grain_size": 70,
|
||||||
"overlap": 8,
|
"overlap": 2,
|
||||||
|
# Normalize output, avoid clipping of final output by scaling the final
|
||||||
|
# frames.
|
||||||
"normalize" : True,
|
"normalize" : True,
|
||||||
# Defines the number of potential grains to choose from matches when
|
# Defines the number of potential grains to choose from matches when
|
||||||
# synthesizing output.
|
# synthesizing output.
|
||||||
"match_quantity": 1
|
"match_quantity": 20
|
||||||
}
|
}
|
||||||
|
|
||||||
output_file = {
|
output_file = {
|
||||||
|
|||||||
@@ -493,12 +493,8 @@ class Matcher:
|
|||||||
# Create an array of grain times for target sample
|
# Create an array of grain times for target sample
|
||||||
target_times = target_entry.generate_grain_times(grain_size, overlap, save_times=True)
|
target_times = target_entry.generate_grain_times(grain_size, overlap, save_times=True)
|
||||||
|
|
||||||
# Stores an accumulated distance between source and target grains,
|
|
||||||
# added to by each analysis.
|
|
||||||
distance_accum = np.zeros((target_times.shape[0], source_sample_indexes[-1][-1]))
|
|
||||||
# Allocate memory for storing accumulated distances between
|
# Allocate memory for storing accumulated distances between
|
||||||
# source and target grains
|
# source and target grains
|
||||||
|
|
||||||
x_size = target_times.shape[0]
|
x_size = target_times.shape[0]
|
||||||
y_size = int(source_sample_indexes[-1][-1])
|
y_size = int(source_sample_indexes[-1][-1])
|
||||||
chunk_size = 8192
|
chunk_size = 8192
|
||||||
@@ -507,15 +503,17 @@ class Matcher:
|
|||||||
|
|
||||||
try:
|
try:
|
||||||
del self.output_db.data["data_distance"]
|
del self.output_db.data["data_distance"]
|
||||||
self.output_db.data.create_dataset("data_distance", (x_size, y_size), dtype=np.float, chunks=True)
|
except KeyError:
|
||||||
except RuntimeError:
|
pass
|
||||||
self.output_db.data.create_dataset("data_distance", (x_size, y_size), dtype=np.float, chunks=True)
|
|
||||||
|
self.output_db.data.create_dataset("data_distance", (x_size, y_size), dtype=np.float, chunks=True)
|
||||||
|
|
||||||
try:
|
try:
|
||||||
del self.output_db.data["distance_accum"]
|
del self.output_db.data["distance_accum"]
|
||||||
self.output_db.data.create_dataset("distance_accum", (x_size, y_size), dtype=np.float, chunks=True, fillvalue=0)
|
except KeyError:
|
||||||
except RuntimeError:
|
pass
|
||||||
self.output_db.data.create_dataset("distance_accum", (x_size, y_size), dtype=np.float, chunks=True, fillvalue=0)
|
|
||||||
|
self.output_db.data.create_dataset("distance_accum", (x_size, y_size), dtype=np.float, chunks=True, fillvalue=0)
|
||||||
|
|
||||||
for analysis in self.matcher_analyses:
|
for analysis in self.matcher_analyses:
|
||||||
self.logger.info("Current analysis: {0}".format(analysis))
|
self.logger.info("Current analysis: {0}".format(analysis))
|
||||||
|
|||||||
@@ -141,7 +141,7 @@
|
|||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"matcher.match(\n",
|
"matcher.match(\n",
|
||||||
" matcher.brute_force_matcher,\n",
|
" matcher.kdtree_matcher,\n",
|
||||||
")"
|
")"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
|
|||||||
@@ -9,7 +9,7 @@
|
|||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 1,
|
"execution_count": 4,
|
||||||
"metadata": {
|
"metadata": {
|
||||||
"collapsed": true
|
"collapsed": true
|
||||||
},
|
},
|
||||||
@@ -17,12 +17,12 @@
|
|||||||
"source": [
|
"source": [
|
||||||
"from sppysound.database import AudioDatabase, Synthesizer, Matcher\n",
|
"from sppysound.database import AudioDatabase, Synthesizer, Matcher\n",
|
||||||
"import synthesis_config\n",
|
"import synthesis_config\n",
|
||||||
"import matching_config"
|
"import config"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 2,
|
"execution_count": 5,
|
||||||
"metadata": {
|
"metadata": {
|
||||||
"collapsed": true
|
"collapsed": true
|
||||||
},
|
},
|
||||||
@@ -43,16 +43,16 @@
|
|||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 3,
|
"execution_count": 6,
|
||||||
"metadata": {
|
"metadata": {
|
||||||
"collapsed": true
|
"collapsed": false
|
||||||
},
|
},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"source_database = AudioDatabase(\n",
|
"source_database = AudioDatabase(\n",
|
||||||
" source_dir,\n",
|
" source_dir,\n",
|
||||||
" config=synthesis_config,\n",
|
" config=synthesis_config,\n",
|
||||||
" analysis_list={\"f0\", \"rms\", \"peak\"}\n",
|
" analysis_list={\"f0\", \"rms\"}\n",
|
||||||
")\n",
|
")\n",
|
||||||
"source_database.load_database(reanalyse=True)"
|
"source_database.load_database(reanalyse=True)"
|
||||||
]
|
]
|
||||||
@@ -67,22 +67,36 @@
|
|||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 4,
|
"execution_count": 7,
|
||||||
"metadata": {
|
"metadata": {
|
||||||
"collapsed": false
|
"collapsed": false
|
||||||
},
|
},
|
||||||
"outputs": [],
|
"outputs": [
|
||||||
|
{
|
||||||
|
"name": "stdout",
|
||||||
|
"output_type": "stream",
|
||||||
|
"text": [
|
||||||
|
"Traceback (most recent call last):\n",
|
||||||
|
" File \"/Users/samuelperry/PerryPerrySource/pysource/sppysound/src/sppysound/database.py\", line 157, in analyse_database\n",
|
||||||
|
" config=self.config\n",
|
||||||
|
" File \"/Users/samuelperry/PerryPerrySource/pysource/sppysound/src/sppysound/audiofile.py\", line 943, in __enter__\n",
|
||||||
|
" \"empty\".format(self.name))\n",
|
||||||
|
"IOError: File isn't valid: ElectricGuitarSample-out_output.wav\n",
|
||||||
|
"Check that file is mono and isn't empty\n"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
],
|
||||||
"source": [
|
"source": [
|
||||||
"target_database = AudioDatabase(\n",
|
"target_database = AudioDatabase(\n",
|
||||||
" target_dir,\n",
|
" target_dir,\n",
|
||||||
" config=synthesis_config,\n",
|
" config=synthesis_config,\n",
|
||||||
" analysis_list={\"f0\", \"rms\", \"peak\"}\n",
|
" analysis_list={\"f0\", \"rms\"}\n",
|
||||||
")\n",
|
")\n",
|
||||||
"target_database.load_database(reanalyse=True)\n",
|
"target_database.load_database(reanalyse=True)\n",
|
||||||
"\n",
|
"\n",
|
||||||
"output_database = AudioDatabase(\n",
|
"output_database = AudioDatabase(\n",
|
||||||
" output_dir,\n",
|
" output_dir,\n",
|
||||||
" config=synthesis_config\n",
|
" config=config\n",
|
||||||
")\n",
|
")\n",
|
||||||
"output_database.load_database(reanalyse=False)\n",
|
"output_database.load_database(reanalyse=False)\n",
|
||||||
"\n",
|
"\n",
|
||||||
@@ -90,11 +104,13 @@
|
|||||||
" source_database,\n",
|
" source_database,\n",
|
||||||
" target_database,\n",
|
" target_database,\n",
|
||||||
" output_db=output_database,\n",
|
" output_db=output_database,\n",
|
||||||
" config=matching_config,\n",
|
" config=config,\n",
|
||||||
" rematch=True\n",
|
" rematch=True\n",
|
||||||
")\n",
|
")\n",
|
||||||
"matcher.match(\n",
|
"matcher.match(\n",
|
||||||
" matcher.brute_force_matcher,\n",
|
" matcher.kdtree_matcher,\n",
|
||||||
|
" grain_size=config.matcher[\"grain_size\"],\n",
|
||||||
|
" overlap=config.matcher[\"overlap\"]\n",
|
||||||
")"
|
")"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
@@ -107,7 +123,7 @@
|
|||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 5,
|
"execution_count": 8,
|
||||||
"metadata": {
|
"metadata": {
|
||||||
"collapsed": false
|
"collapsed": false
|
||||||
},
|
},
|
||||||
@@ -117,7 +133,7 @@
|
|||||||
" source_database, \n",
|
" source_database, \n",
|
||||||
" output_database, \n",
|
" output_database, \n",
|
||||||
" target_db=target_database, \n",
|
" target_db=target_database, \n",
|
||||||
" config=synthesis_config\n",
|
" config=config\n",
|
||||||
")"
|
")"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
@@ -130,14 +146,17 @@
|
|||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 6,
|
"execution_count": 9,
|
||||||
"metadata": {
|
"metadata": {
|
||||||
"collapsed": false,
|
"collapsed": false,
|
||||||
"scrolled": true
|
"scrolled": true
|
||||||
},
|
},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"synthesizer.synthesize()"
|
"synthesizer.synthesize(\n",
|
||||||
|
" grain_size=config.synthesizer[\"grain_size\"],\n",
|
||||||
|
" overlap=config.synthesizer[\"overlap\"]\n",
|
||||||
|
")"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
|||||||
@@ -17,3 +17,7 @@ output_file = {
|
|||||||
"format": 131075,
|
"format": 131075,
|
||||||
"channels": 1
|
"channels": 1
|
||||||
}
|
}
|
||||||
|
|
||||||
|
database = {
|
||||||
|
"symlink": True
|
||||||
|
}
|
||||||
|
|||||||
@@ -87,15 +87,20 @@ concatenate.py Script Usage
|
|||||||
--enforcerms This flag enables scaling of matched grains to better
|
--enforcerms This flag enables scaling of matched grains to better
|
||||||
match the target's volume.
|
match the target's volume.
|
||||||
|
|
||||||
|
--copy This flag enables the copying of audio files from
|
||||||
|
their location to the database, rather than creating
|
||||||
|
symbolic links. This is useful for creating portable
|
||||||
|
databases.
|
||||||
|
|
||||||
--match_method Choose the algorithm to use when matching analyses. Available algorithms are:
|
--match_method Choose the algorithm to use when matching analyses. Available algorithms are:
|
||||||
|
|
||||||
Brute force: 'bruteforce'
|
Brute force: 'bruteforce'
|
||||||
|
|
||||||
K-d Tree Search: 'kdtree'
|
K-d Tree Search: 'kdtree'
|
||||||
|
|
||||||
--verbose, -v Specify the verbosity of the script's output. Additional
|
--verbose, -v Specifies level of verbosity in output. For example:
|
||||||
v will produce greater levels of detail ie. -vvvvv will
|
'-vvvvv' will output all information. '-v' will output
|
||||||
produce all messages.
|
minimal information.
|
||||||
|
|
||||||
-------------------
|
-------------------
|
||||||
AudioFile Class
|
AudioFile Class
|
||||||
|
|||||||
@@ -1,8 +0,0 @@
|
|||||||
Overview
|
|
||||||
========
|
|
||||||
Concatenator is a tool for synthesizing interpretations of a sound, through the
|
|
||||||
analysis and synthesis of audio grains from a database of sounds.
|
|
||||||
The program works by analysing overlapping segments of audio (known as grains)
|
|
||||||
from both the target sound and the source database, then searching for the
|
|
||||||
closest matching grain in the source database to the target sound. Finally, the
|
|
||||||
output is generated by overlap-adding the best matches.
|
|
||||||
@@ -30,3 +30,7 @@ output_file = {
|
|||||||
"format": 131075,
|
"format": 131075,
|
||||||
"channels": 1
|
"channels": 1
|
||||||
}
|
}
|
||||||
|
|
||||||
|
database = {
|
||||||
|
"symlink": True
|
||||||
|
}
|
||||||
|
|||||||
@@ -1,3 +1,5 @@
|
|||||||
|
.. _overview:
|
||||||
|
|
||||||
Overview
|
Overview
|
||||||
========
|
========
|
||||||
Concatenator is a tool for synthesizing interpretations of a sound, through the
|
Concatenator is a tool for synthesizing interpretations of a sound, through the
|
||||||
@@ -73,6 +75,15 @@ precedence over others based on user preference.
|
|||||||
The best match indexes are then saved to the output database ready for
|
The best match indexes are then saved to the output database ready for
|
||||||
synthesis.
|
synthesis.
|
||||||
|
|
||||||
|
There are currently two implementations for the matching algorithm:
|
||||||
|
|
||||||
|
- Brute Force
|
||||||
|
|
||||||
|
- K-d Tree Search
|
||||||
|
|
||||||
|
Both will return similar results, however the K-d tree search algorithm is
|
||||||
|
far more efficient when analysing large datasets so is the preferred method.
|
||||||
|
|
||||||
.. graphviz::
|
.. graphviz::
|
||||||
|
|
||||||
digraph b {
|
digraph b {
|
||||||
@@ -162,7 +173,7 @@ database, performing any post-processing (such as pitch shifting and amplitude
|
|||||||
scaling) to improve the similarity of the match, then windowed overlap adding
|
scaling) to improve the similarity of the match, then windowed overlap adding
|
||||||
the grains to create the final output. The post-processing phase involves using
|
the grains to create the final output. The post-processing phase involves using
|
||||||
the ratio difference between the source and target grain to artificially alter
|
the ratio difference between the source and target grain to artificially alter
|
||||||
the source grain so that it better ressembles the target. This is particularly
|
the source grain so that it better resembles the target. This is particularly
|
||||||
useful when using small source databases as it improves the similarity of any
|
useful when using small source databases as it improves the similarity of any
|
||||||
match (important when best matches aren't very close to the target.) The final
|
match (important when best matches aren't very close to the target.) The final
|
||||||
output is saved to the output database's audio directory.
|
output is saved to the output database's audio directory.
|
||||||
|
|||||||
@@ -30,3 +30,7 @@ synthesizer = {
|
|||||||
# synthesizing output.
|
# synthesizing output.
|
||||||
"match_quantity": 20
|
"match_quantity": 20
|
||||||
}
|
}
|
||||||
|
|
||||||
|
database = {
|
||||||
|
"symlink": True
|
||||||
|
}
|
||||||
|
|||||||
@@ -1,6 +1,304 @@
|
|||||||
Tutorial
|
Tutorial
|
||||||
========
|
========
|
||||||
|
|
||||||
|
This section gives a brief introduction to using the 'concatenator.py' script. The
|
||||||
|
script can be found in the src/sppysound directory of the project folder, or
|
||||||
|
can be accessed by running the 'concatenator' symbolic link from the project
|
||||||
|
folder root.
|
||||||
|
|
||||||
Getting Started
|
Getting Started
|
||||||
---------------
|
---------------
|
||||||
|
|
||||||
|
To view all available options simply run:
|
||||||
|
|
||||||
|
.. code:: bash
|
||||||
|
|
||||||
|
./concatenator -h
|
||||||
|
|
||||||
|
A list of all commands available is then presented:
|
||||||
|
|
||||||
|
.. code:: bash
|
||||||
|
|
||||||
|
usage: concatenator [-h] [--src-db] [--tar_db]
|
||||||
|
[--analyse [ANALYSE [ANALYSE ...]]] [--analysis_dict]
|
||||||
|
[--fft] [--kurtosis] [--matcher] [--matcher_weightings]
|
||||||
|
[--rms] [--skewness] [--synthesizer] [--variance]
|
||||||
|
[--reanalyse] [--rematch] [--enforcef0] [--enforcerms]
|
||||||
|
[--copy] [--match_method] [--verbose]
|
||||||
|
source target output
|
||||||
|
|
||||||
|
Concatenator is a tool for synthesizing interpretations of a sound, through
|
||||||
|
the analysis and synthesis of audio grains from a corpus database. The program
|
||||||
|
works by analysing overlapping segments of audio (known as grains) from both
|
||||||
|
the target sound and the source database, then searching for the closest
|
||||||
|
matching grain in the source database to the target sound. Finally, the output
|
||||||
|
is generated by overlap-adding the best matches.
|
||||||
|
|
||||||
|
positional arguments:
|
||||||
|
source Directory of source files/database to take grains from
|
||||||
|
when synthesizing output
|
||||||
|
target Directory of target files/database to match source
|
||||||
|
grains to.
|
||||||
|
output Directory to use as database for outputing results and
|
||||||
|
match information. Output audio will be stored in the
|
||||||
|
/audio sub-directory and match data will be stored in
|
||||||
|
the /data directory.
|
||||||
|
|
||||||
|
optional arguments:
|
||||||
|
-h, --help show this help message and exit
|
||||||
|
--src-db Specifies the directory to create the source database
|
||||||
|
and store analyses in. If not specified then the
|
||||||
|
|
||||||
|
...
|
||||||
|
|
||||||
|
For this demonstration, the following file structure will be used:
|
||||||
|
|
||||||
|
.. code:: bash
|
||||||
|
|
||||||
|
/Users/samuelperry/concatenator_test/
|
||||||
|
|-- source_db
|
||||||
|
| |-- Trumpet.novib.ff.A3.stereo.aif
|
||||||
|
| |-- Trumpet.novib.ff.A4.stereo.aif
|
||||||
|
| |-- Trumpet.novib.ff.A5.stereo.aif
|
||||||
|
|
||||||
|
...
|
||||||
|
|
||||||
|
| |-- Trumpet.novib.ff.F5.stereo.aif
|
||||||
|
| |-- Trumpet.novib.ff.G3.stereo.aif
|
||||||
|
| `-- Trumpet.novib.ff.G4.stereo.aif
|
||||||
|
`-- target_db
|
||||||
|
|-- target.01.wav
|
||||||
|
|-- target.02.wav
|
||||||
|
|-- target.03.wav
|
||||||
|
`-- target.04.wav
|
||||||
|
|
||||||
|
A source database containing a small selection of trumpet samples (aquired from
|
||||||
|
|
||||||
|
http://theremin.music.uiowa.edu/MIS.html) will be used to match grains with 4
|
||||||
|
target sounds. This will produce 4 output files, one for each target sound.
|
||||||
|
|
||||||
|
The following command is used to to generate the output:
|
||||||
|
|
||||||
|
.. code:: bash
|
||||||
|
|
||||||
|
concatenator ./source_db ./target_db ./output_db --src_db \
|
||||||
|
./analysed_source_db --tar_db ./analysed_tar_db
|
||||||
|
|
||||||
|
The specified directories are searched recursively for audio files that are
|
||||||
|
used as items in the database. These item are then matched and synthesized as
|
||||||
|
explained in the :ref:`overview` section. Output is stored in the audio
|
||||||
|
directory of the output database that has been created.
|
||||||
|
This produces this directory structure:
|
||||||
|
|
||||||
|
.. code:: bash
|
||||||
|
|
||||||
|
/Users/samuelperry/concatenator_test/
|
||||||
|
|-- analysed_source_db
|
||||||
|
| |-- audio
|
||||||
|
| | |-- Trumpet.novib.ff.A3.stereo.aif -> (Symlink)
|
||||||
|
| | |-- Trumpet.novib.ff.A4.stereo.aif -> (Symlink)
|
||||||
|
| | |-- Trumpet.novib.ff.A5.stereo.aif -> (Symlink)
|
||||||
|
|
||||||
|
...
|
||||||
|
|
||||||
|
| | |-- Trumpet.novib.ff.F5.stereo.aif -> (Symlink)
|
||||||
|
| | |-- Trumpet.novib.ff.G3.stereo.aif -> (Symlink)
|
||||||
|
| | `-- Trumpet.novib.ff.G4.stereo.aif -> (Symlink)
|
||||||
|
| `-- data
|
||||||
|
| `-- analysis_data.hdf5
|
||||||
|
|-- analysed_tar_db
|
||||||
|
| |-- audio
|
||||||
|
| | |-- target.01.wav -> (Symlink)
|
||||||
|
| | |-- target.02.wav -> (Symlink)
|
||||||
|
| | |-- target.03.wav -> (Symlink)
|
||||||
|
| | `-- target.04.wav -> (Symlink)
|
||||||
|
| `-- data
|
||||||
|
| `-- analysis_data.hdf5
|
||||||
|
|-- output_db
|
||||||
|
| |-- audio
|
||||||
|
| | |-- target.01_output.wav
|
||||||
|
| | |-- target.02_output.wav
|
||||||
|
| | |-- target.03_output.wav
|
||||||
|
| | `-- target.04_output.wav
|
||||||
|
| `-- data
|
||||||
|
| `-- analysis_data.hdf5
|
||||||
|
|-- source_db
|
||||||
|
| |-- Trumpet.novib.ff.A3.stereo.aif
|
||||||
|
| |-- Trumpet.novib.ff.A4.stereo.aif
|
||||||
|
| |-- Trumpet.novib.ff.A5.stereo.aif
|
||||||
|
| |-- Trumpet.novib.ff.F5.stereo.aif
|
||||||
|
|
||||||
|
...
|
||||||
|
|
||||||
|
| |-- Trumpet.novib.ff.G3.stereo.aif
|
||||||
|
| `-- Trumpet.novib.ff.G4.stereo.aif
|
||||||
|
`-- target_db
|
||||||
|
|-- target.01.wav
|
||||||
|
|-- target.02.wav
|
||||||
|
|-- target.03.wav
|
||||||
|
`-- target.04.wav
|
||||||
|
|
||||||
|
By using the --src_db and --tar_db flags, alternative locations are specified
|
||||||
|
for generating the databases and storing analysis data. Symbolic links are
|
||||||
|
created, referencing the original audio files without moving them. This allows
|
||||||
|
large databases to be used in place without copying or moving it's content.
|
||||||
|
|
||||||
|
Alternatively, databases can be generated in place by ommiting the --src_db and
|
||||||
|
--tar_db flags. this will create the database directory structure directly in
|
||||||
|
the directories provided as source and target.
|
||||||
|
|
||||||
|
The --copy flag can be used in conjunction with these flags in order to create
|
||||||
|
actual copies of the audio files at the destinations. This allows for the
|
||||||
|
creation of partable databases that can moved to other machines without
|
||||||
|
breaking links to the original files. (Any pre-existing symbolic links will be
|
||||||
|
overwritten with hard copies when using this option.)
|
||||||
|
|
||||||
|
config.py
|
||||||
|
---------
|
||||||
|
The config.py file is used for specifying all user defined options and can be
|
||||||
|
edited in the concatenator project directory. Comments explain the function of
|
||||||
|
each parameter. The default config.py file looks like this:
|
||||||
|
|
||||||
|
.. code:: python
|
||||||
|
|
||||||
|
# Specify analysis parameters for root mean square analysis.
|
||||||
|
rms = {
|
||||||
|
"window_size": 70,
|
||||||
|
"overlap": 2,
|
||||||
|
}
|
||||||
|
|
||||||
|
# Specify analysis parameters for variance analysis.
|
||||||
|
variance = {
|
||||||
|
"window_size": 70,
|
||||||
|
"overlap": 2
|
||||||
|
}
|
||||||
|
|
||||||
|
# Specify analysis parameters for temporal kurtosis analysis.
|
||||||
|
kurtosis = {
|
||||||
|
"window_size": 70,
|
||||||
|
"overlap": 2
|
||||||
|
}
|
||||||
|
|
||||||
|
# Specify analysis parameters for temporal skewness analysis.
|
||||||
|
skewness = {
|
||||||
|
"window_size": 70,
|
||||||
|
"overlap": 2
|
||||||
|
}
|
||||||
|
|
||||||
|
# Specify analysis parameters for FFT analysis.
|
||||||
|
fft = {
|
||||||
|
"window_size": 65536
|
||||||
|
}
|
||||||
|
|
||||||
|
database = {
|
||||||
|
# Enables creation of symbolic links to files not in the database rather
|
||||||
|
# than making pysical copies.
|
||||||
|
"symlink": True
|
||||||
|
}
|
||||||
|
|
||||||
|
# Sets the weighting for each analysis. a higher weighting gives an analysis
|
||||||
|
# higher presendence when finding the best matches.
|
||||||
|
matcher_weightings = {
|
||||||
|
"f0" : 1.,
|
||||||
|
"spccntr" : 1.,
|
||||||
|
"spcsprd" : 1.,
|
||||||
|
"spcflux" : 1.,
|
||||||
|
"spccf" : 1.,
|
||||||
|
"spcflatness": 1.,
|
||||||
|
"zerox" : 1.,
|
||||||
|
"rms" : 1.,
|
||||||
|
"peak": 1.,
|
||||||
|
"centroid": 1.,
|
||||||
|
"kurtosis": 1.,
|
||||||
|
"skewness": 1.,
|
||||||
|
"variance": 3.,
|
||||||
|
"harm_ratio": 1.
|
||||||
|
}
|
||||||
|
|
||||||
|
# Specifies the method for averaging analysis frames to create a single value
|
||||||
|
# for comparing to other grains. Possible formatters are: 'mean', 'median',
|
||||||
|
# 'log2_mean', 'log2_median'
|
||||||
|
analysis_dict = {
|
||||||
|
"f0": "log2_median",
|
||||||
|
"rms": "mean",
|
||||||
|
"zerox": "mean",
|
||||||
|
"spccntr": "mean",
|
||||||
|
"spcsprd": "mean",
|
||||||
|
"spcflux": "mean",
|
||||||
|
"spccf": "mean",
|
||||||
|
"spcflatness": "mean",
|
||||||
|
"peak": "mean",
|
||||||
|
"centroid": "mean",
|
||||||
|
"kurtosis": "mean",
|
||||||
|
"skewness": "mean",
|
||||||
|
"variance": "mean",
|
||||||
|
"harm_ratio": "mean"
|
||||||
|
}
|
||||||
|
|
||||||
|
analysis = {
|
||||||
|
# Force the deletion of any pre-existing analyses to create new ones. This
|
||||||
|
# is needed for overwriting old analyses generated with different
|
||||||
|
# parameters to the current ones.
|
||||||
|
"reanalyse": False
|
||||||
|
}
|
||||||
|
|
||||||
|
matcher = {
|
||||||
|
# Force the re-matching of analyses
|
||||||
|
"rematch": True,
|
||||||
|
"grain_size": 70,
|
||||||
|
"overlap": 2,
|
||||||
|
# Defines the number of matches to keep for synthesis. Note that this must
|
||||||
|
# also be specified in the synthesis config
|
||||||
|
"match_quantity": 1,
|
||||||
|
# Choose the algorithm used to perform matching. kdtree is recommended for
|
||||||
|
# larger datasets.
|
||||||
|
"method": 'kdtree'
|
||||||
|
}
|
||||||
|
|
||||||
|
synthesizer = {
|
||||||
|
# Artificially scale the output grain by the difference in RMS values
|
||||||
|
# between source and target.
|
||||||
|
"enforce_rms": True,
|
||||||
|
# Specify the ratio limit that is the grain can be scaled by.
|
||||||
|
"enf_rms_ratio_limit": 100.,
|
||||||
|
# Artificially modify the pitch by the difference in f0 values between
|
||||||
|
# source and target.
|
||||||
|
"enforce_f0": True,
|
||||||
|
# Specify the ratio limit that is the grain can be modified by.
|
||||||
|
"enf_f0_ratio_limit": 10.,
|
||||||
|
"grain_size": 70,
|
||||||
|
"overlap": 2,
|
||||||
|
# Normalize output, avoid clipping of final output by scaling the final
|
||||||
|
# frames.
|
||||||
|
"normalize" : True,
|
||||||
|
# Defines the number of potential grains to choose from matches when
|
||||||
|
# synthesizing output.
|
||||||
|
"match_quantity": 1
|
||||||
|
}
|
||||||
|
|
||||||
|
output_file = {
|
||||||
|
"samplerate": 44100,
|
||||||
|
"format": 131075,
|
||||||
|
"channels": 1
|
||||||
|
}
|
||||||
|
|
||||||
|
Configuration Flags
|
||||||
|
-------------------
|
||||||
|
For quick modification of analysis parameters, parameter flags can be specified
|
||||||
|
directly when calling the script. For example:
|
||||||
|
|
||||||
|
.. code:: bash
|
||||||
|
|
||||||
|
concatenator ./source_db ./target_db ./output_db --src_db \
|
||||||
|
./analysed_source_db --tar_db ./analysed_tar_db --reanalyse --fft \
|
||||||
|
'--window_size 2048'
|
||||||
|
|
||||||
|
This overwrites the value specified for window_size in the config file with the
|
||||||
|
value provided.
|
||||||
|
|
||||||
|
When databases have already been created, previous data is used when re-running
|
||||||
|
the script over them. This allows for different databases to be used without
|
||||||
|
continuous reanalysis. However, if analysis or matching parameters are changed,
|
||||||
|
the "--reanalyse" and "--rematch" flags can be used to force the overwriting of
|
||||||
|
old data, using the new parameters.
|
||||||
|
|
||||||
|
|||||||
@@ -695,7 +695,8 @@ class DatabaseTests(globalTests):
|
|||||||
# Create database object
|
# Create database object
|
||||||
database = AudioDatabase(
|
database = AudioDatabase(
|
||||||
"./.test_db",
|
"./.test_db",
|
||||||
analysis_list=["rms", "zerox", "fft", "spccntr", "spcsprd", "f0"]
|
analysis_list=["rms", "zerox", "fft", "spccntr", "spcsprd", "f0"],
|
||||||
|
config=config
|
||||||
)
|
)
|
||||||
# Create/load a pre-existing database
|
# Create/load a pre-existing database
|
||||||
database.load_database(reanalyse=True)
|
database.load_database(reanalyse=True)
|
||||||
@@ -736,6 +737,7 @@ class MatcherTests(globalTests):
|
|||||||
# Create database object
|
# Create database object
|
||||||
self.database1 = AudioDatabase(
|
self.database1 = AudioDatabase(
|
||||||
"./.test_db1",
|
"./.test_db1",
|
||||||
|
config=config
|
||||||
)
|
)
|
||||||
# Create/load a pre-existing database
|
# Create/load a pre-existing database
|
||||||
self.database1.load_database(reanalyse=True)
|
self.database1.load_database(reanalyse=True)
|
||||||
@@ -762,6 +764,7 @@ class MatcherTests(globalTests):
|
|||||||
# Create database object
|
# Create database object
|
||||||
self.database2 = AudioDatabase(
|
self.database2 = AudioDatabase(
|
||||||
"./.test_db2",
|
"./.test_db2",
|
||||||
|
config=config
|
||||||
)
|
)
|
||||||
# Create/load a pre-existing database
|
# Create/load a pre-existing database
|
||||||
self.database2.load_database(reanalyse=True)
|
self.database2.load_database(reanalyse=True)
|
||||||
|
|||||||
@@ -88,3 +88,7 @@ output_file = {
|
|||||||
"format": 131075,
|
"format": 131075,
|
||||||
"channels": 1
|
"channels": 1
|
||||||
}
|
}
|
||||||
|
|
||||||
|
database = {
|
||||||
|
"symlink": True
|
||||||
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user