diff --git a/config.py b/config.py new file mode 120000 index 0000000..10fd4c7 --- /dev/null +++ b/config.py @@ -0,0 +1 @@ +./src/sppysound/config.py \ No newline at end of file diff --git a/src/sppysound/concatenator.py b/src/sppysound/concatenator.py index d8f8ecb..bd9aa15 100755 --- a/src/sppysound/concatenator.py +++ b/src/sppysound/concatenator.py @@ -96,10 +96,11 @@ def parse_arguments(): ) parser.add_argument( - '--src-db', + '--src_db', help="Specifies the directory to create the source database and store analyses " "in. If not specified then the source directory will be used directly.", - type=str + type=str, + metavar='' ) parser.add_argument( @@ -107,7 +108,8 @@ def parse_arguments(): help="Specifies the directory to create the target database and store analyses " "in. If not specified then the target directory will be used directly.", type=str, - default='' + default='', + metavar='' ) analyses = [ @@ -202,7 +204,13 @@ def parse_arguments(): "'kdtree'", ) - parser.add_argument('--verbose', '-v', action='count') + parser.add_argument( + '--verbose', + '-v', + action='count', + help='Specifies level of verbosity in output. For example: \'-vvvvv\' ' + 'will output all information. \'-v\' will output minimal information. ' + ) args = parser.parse_args() for item in config_items: @@ -256,6 +264,7 @@ def main(): log_filename=modpath, logger_filelevel=args.verbose ) + pdb.set_trace() # Create/load a pre-existing source database source_db = AudioDatabase( diff --git a/src/sppysound/config.py b/src/sppysound/config.py index 6859afa..6ab8c90 100644 --- a/src/sppysound/config.py +++ b/src/sppysound/config.py @@ -1,23 +1,28 @@ +# Specify analysis parameters for root mean square analysis. rms = { "window_size": 70, - "overlap": 8, + "overlap": 2, } +# Specify analysis parameters for variance analysis. variance = { "window_size": 70, - "overlap": 8 + "overlap": 2 } +# Specify analysis parameters for temporal kurtosis analysis. kurtosis = { "window_size": 70, - "overlap": 8 + "overlap": 2 } +# Specify analysis parameters for temporal skewness analysis. skewness = { "window_size": 70, - "overlap": 8 + "overlap": 2 } +# Specify analysis parameters for FFT analysis. fft = { "window_size": 65536 } @@ -28,6 +33,8 @@ database = { "symlink": True } +# Sets the weighting for each analysis. a higher weighting gives an analysis +# higher presendence when finding the best matches. matcher_weightings = { "f0" : 1., "spccntr" : 1., @@ -45,6 +52,9 @@ matcher_weightings = { "harm_ratio": 1. } +# Specifies the method for averaging analysis frames to create a single value +# for comparing to other grains. Possible formatters are: 'mean', 'median', +# 'log2_mean', 'log2_median' analysis_dict = { "f0": "log2_median", "rms": "mean", @@ -63,32 +73,44 @@ analysis_dict = { } analysis = { + # Force the deletion of any pre-existing analyses to create new ones. This + # is needed for overwriting old analyses generated with different + # parameters to the current ones. "reanalyse": False } matcher = { + # Force the re-matching of analyses "rematch": True, "grain_size": 70, - "overlap": 8, + "overlap": 2, # Defines the number of matches to keep for synthesis. Note that this must # also be specified in the synthesis config - "match_quantity": 1, + "match_quantity": 20, # Choose the algorithm used to perform matching. kdtree is recommended for # larger datasets. "method": 'kdtree' } synthesizer = { + # Artificially scale the output grain by the difference in RMS values + # between source and target. "enforce_rms": True, + # Specify the ratio limit that is the grain can be scaled by. "enf_rms_ratio_limit": 100., + # Artificially modify the pitch by the difference in f0 values between + # source and target. "enforce_f0": True, + # Specify the ratio limit that is the grain can be modified by. "enf_f0_ratio_limit": 10., "grain_size": 70, - "overlap": 8, + "overlap": 2, + # Normalize output, avoid clipping of final output by scaling the final + # frames. "normalize" : True, # Defines the number of potential grains to choose from matches when # synthesizing output. - "match_quantity": 1 + "match_quantity": 20 } output_file = { diff --git a/src/sppysound/database.py b/src/sppysound/database.py index d663d7d..986bb16 100644 --- a/src/sppysound/database.py +++ b/src/sppysound/database.py @@ -493,12 +493,8 @@ class Matcher: # Create an array of grain times for target sample target_times = target_entry.generate_grain_times(grain_size, overlap, save_times=True) - # Stores an accumulated distance between source and target grains, - # added to by each analysis. - distance_accum = np.zeros((target_times.shape[0], source_sample_indexes[-1][-1])) # Allocate memory for storing accumulated distances between # source and target grains - x_size = target_times.shape[0] y_size = int(source_sample_indexes[-1][-1]) chunk_size = 8192 @@ -507,15 +503,17 @@ class Matcher: try: del self.output_db.data["data_distance"] - self.output_db.data.create_dataset("data_distance", (x_size, y_size), dtype=np.float, chunks=True) - except RuntimeError: - self.output_db.data.create_dataset("data_distance", (x_size, y_size), dtype=np.float, chunks=True) + except KeyError: + pass + + self.output_db.data.create_dataset("data_distance", (x_size, y_size), dtype=np.float, chunks=True) try: del self.output_db.data["distance_accum"] - self.output_db.data.create_dataset("distance_accum", (x_size, y_size), dtype=np.float, chunks=True, fillvalue=0) - except RuntimeError: - self.output_db.data.create_dataset("distance_accum", (x_size, y_size), dtype=np.float, chunks=True, fillvalue=0) + except KeyError: + pass + + self.output_db.data.create_dataset("distance_accum", (x_size, y_size), dtype=np.float, chunks=True, fillvalue=0) for analysis in self.matcher_analyses: self.logger.info("Current analysis: {0}".format(analysis)) diff --git a/src/sppysound/docs/DatabaseMatchingExample.ipynb b/src/sppysound/docs/DatabaseMatchingExample.ipynb index 743114b..8070c06 100644 --- a/src/sppysound/docs/DatabaseMatchingExample.ipynb +++ b/src/sppysound/docs/DatabaseMatchingExample.ipynb @@ -141,7 +141,7 @@ "outputs": [], "source": [ "matcher.match(\n", - " matcher.brute_force_matcher,\n", + " matcher.kdtree_matcher,\n", ")" ] }, diff --git a/src/sppysound/docs/MatchSynthesisExample.ipynb b/src/sppysound/docs/MatchSynthesisExample.ipynb index 9ac8ea8..a4aab1f 100644 --- a/src/sppysound/docs/MatchSynthesisExample.ipynb +++ b/src/sppysound/docs/MatchSynthesisExample.ipynb @@ -9,7 +9,7 @@ }, { "cell_type": "code", - "execution_count": 1, + "execution_count": 4, "metadata": { "collapsed": true }, @@ -17,12 +17,12 @@ "source": [ "from sppysound.database import AudioDatabase, Synthesizer, Matcher\n", "import synthesis_config\n", - "import matching_config" + "import config" ] }, { "cell_type": "code", - "execution_count": 2, + "execution_count": 5, "metadata": { "collapsed": true }, @@ -43,16 +43,16 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": 6, "metadata": { - "collapsed": true + "collapsed": false }, "outputs": [], "source": [ "source_database = AudioDatabase(\n", " source_dir,\n", " config=synthesis_config,\n", - " analysis_list={\"f0\", \"rms\", \"peak\"}\n", + " analysis_list={\"f0\", \"rms\"}\n", ")\n", "source_database.load_database(reanalyse=True)" ] @@ -67,22 +67,36 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": 7, "metadata": { "collapsed": false }, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Traceback (most recent call last):\n", + " File \"/Users/samuelperry/PerryPerrySource/pysource/sppysound/src/sppysound/database.py\", line 157, in analyse_database\n", + " config=self.config\n", + " File \"/Users/samuelperry/PerryPerrySource/pysource/sppysound/src/sppysound/audiofile.py\", line 943, in __enter__\n", + " \"empty\".format(self.name))\n", + "IOError: File isn't valid: ElectricGuitarSample-out_output.wav\n", + "Check that file is mono and isn't empty\n" + ] + } + ], "source": [ "target_database = AudioDatabase(\n", " target_dir,\n", " config=synthesis_config,\n", - " analysis_list={\"f0\", \"rms\", \"peak\"}\n", + " analysis_list={\"f0\", \"rms\"}\n", ")\n", "target_database.load_database(reanalyse=True)\n", "\n", "output_database = AudioDatabase(\n", " output_dir,\n", - " config=synthesis_config\n", + " config=config\n", ")\n", "output_database.load_database(reanalyse=False)\n", "\n", @@ -90,11 +104,13 @@ " source_database,\n", " target_database,\n", " output_db=output_database,\n", - " config=matching_config,\n", + " config=config,\n", " rematch=True\n", ")\n", "matcher.match(\n", - " matcher.brute_force_matcher,\n", + " matcher.kdtree_matcher,\n", + " grain_size=config.matcher[\"grain_size\"],\n", + " overlap=config.matcher[\"overlap\"]\n", ")" ] }, @@ -107,7 +123,7 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": 8, "metadata": { "collapsed": false }, @@ -117,7 +133,7 @@ " source_database, \n", " output_database, \n", " target_db=target_database, \n", - " config=synthesis_config\n", + " config=config\n", ")" ] }, @@ -130,14 +146,17 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": 9, "metadata": { "collapsed": false, "scrolled": true }, "outputs": [], "source": [ - "synthesizer.synthesize()" + "synthesizer.synthesize(\n", + " grain_size=config.synthesizer[\"grain_size\"],\n", + " overlap=config.synthesizer[\"overlap\"]\n", + ")" ] }, { diff --git a/src/sppysound/docs/analysis_config.py b/src/sppysound/docs/analysis_config.py index a208228..99fc036 100644 --- a/src/sppysound/docs/analysis_config.py +++ b/src/sppysound/docs/analysis_config.py @@ -17,3 +17,7 @@ output_file = { "format": 131075, "channels": 1 } + +database = { + "symlink": True +} diff --git a/src/sppysound/docs/api.rst b/src/sppysound/docs/api.rst index 59b6bb2..3ce12da 100644 --- a/src/sppysound/docs/api.rst +++ b/src/sppysound/docs/api.rst @@ -87,15 +87,20 @@ concatenate.py Script Usage --enforcerms This flag enables scaling of matched grains to better match the target's volume. +--copy This flag enables the copying of audio files from + their location to the database, rather than creating + symbolic links. This is useful for creating portable + databases. + --match_method Choose the algorithm to use when matching analyses. Available algorithms are: Brute force: 'bruteforce' K-d Tree Search: 'kdtree' ---verbose, -v Specify the verbosity of the script's output. Additional - v will produce greater levels of detail ie. -vvvvv will - produce all messages. +--verbose, -v Specifies level of verbosity in output. For example: + '-vvvvv' will output all information. '-v' will output + minimal information. ------------------- AudioFile Class diff --git a/src/sppysound/docs/intro.rst b/src/sppysound/docs/intro.rst deleted file mode 100644 index 08f1f78..0000000 --- a/src/sppysound/docs/intro.rst +++ /dev/null @@ -1,8 +0,0 @@ -Overview -======== -Concatenator is a tool for synthesizing interpretations of a sound, through the -analysis and synthesis of audio grains from a database of sounds. -The program works by analysing overlapping segments of audio (known as grains) -from both the target sound and the source database, then searching for the -closest matching grain in the source database to the target sound. Finally, the -output is generated by overlap-adding the best matches. diff --git a/src/sppysound/docs/matching_config.py b/src/sppysound/docs/matching_config.py index ff41614..4d63733 100644 --- a/src/sppysound/docs/matching_config.py +++ b/src/sppysound/docs/matching_config.py @@ -30,3 +30,7 @@ output_file = { "format": 131075, "channels": 1 } + +database = { + "symlink": True +} diff --git a/src/sppysound/docs/overview.rst b/src/sppysound/docs/overview.rst index 35d4e8b..8d91cf6 100644 --- a/src/sppysound/docs/overview.rst +++ b/src/sppysound/docs/overview.rst @@ -1,3 +1,5 @@ +.. _overview: + Overview ======== Concatenator is a tool for synthesizing interpretations of a sound, through the @@ -73,6 +75,15 @@ precedence over others based on user preference. The best match indexes are then saved to the output database ready for synthesis. +There are currently two implementations for the matching algorithm: + +- Brute Force + +- K-d Tree Search + +Both will return similar results, however the K-d tree search algorithm is +far more efficient when analysing large datasets so is the preferred method. + .. graphviz:: digraph b { @@ -162,7 +173,7 @@ database, performing any post-processing (such as pitch shifting and amplitude scaling) to improve the similarity of the match, then windowed overlap adding the grains to create the final output. The post-processing phase involves using the ratio difference between the source and target grain to artificially alter -the source grain so that it better ressembles the target. This is particularly +the source grain so that it better resembles the target. This is particularly useful when using small source databases as it improves the similarity of any match (important when best matches aren't very close to the target.) The final output is saved to the output database's audio directory. diff --git a/src/sppysound/docs/synthesis_config.py b/src/sppysound/docs/synthesis_config.py index 305c34f..a3afa73 100644 --- a/src/sppysound/docs/synthesis_config.py +++ b/src/sppysound/docs/synthesis_config.py @@ -30,3 +30,7 @@ synthesizer = { # synthesizing output. "match_quantity": 20 } + +database = { + "symlink": True +} diff --git a/src/sppysound/docs/tutorial.rst b/src/sppysound/docs/tutorial.rst index 01aad7d..5abd04c 100644 --- a/src/sppysound/docs/tutorial.rst +++ b/src/sppysound/docs/tutorial.rst @@ -1,6 +1,304 @@ Tutorial ======== +This section gives a brief introduction to using the 'concatenator.py' script. The +script can be found in the src/sppysound directory of the project folder, or +can be accessed by running the 'concatenator' symbolic link from the project +folder root. + Getting Started --------------- +To view all available options simply run: + +.. code:: bash + + ./concatenator -h + +A list of all commands available is then presented: + +.. code:: bash + + usage: concatenator [-h] [--src-db] [--tar_db] + [--analyse [ANALYSE [ANALYSE ...]]] [--analysis_dict] + [--fft] [--kurtosis] [--matcher] [--matcher_weightings] + [--rms] [--skewness] [--synthesizer] [--variance] + [--reanalyse] [--rematch] [--enforcef0] [--enforcerms] + [--copy] [--match_method] [--verbose] + source target output + + Concatenator is a tool for synthesizing interpretations of a sound, through + the analysis and synthesis of audio grains from a corpus database. The program + works by analysing overlapping segments of audio (known as grains) from both + the target sound and the source database, then searching for the closest + matching grain in the source database to the target sound. Finally, the output + is generated by overlap-adding the best matches. + + positional arguments: + source Directory of source files/database to take grains from + when synthesizing output + target Directory of target files/database to match source + grains to. + output Directory to use as database for outputing results and + match information. Output audio will be stored in the + /audio sub-directory and match data will be stored in + the /data directory. + + optional arguments: + -h, --help show this help message and exit + --src-db Specifies the directory to create the source database + and store analyses in. If not specified then the + + ... + +For this demonstration, the following file structure will be used: + +.. code:: bash + + /Users/samuelperry/concatenator_test/ + |-- source_db + | |-- Trumpet.novib.ff.A3.stereo.aif + | |-- Trumpet.novib.ff.A4.stereo.aif + | |-- Trumpet.novib.ff.A5.stereo.aif + + ... + + | |-- Trumpet.novib.ff.F5.stereo.aif + | |-- Trumpet.novib.ff.G3.stereo.aif + | `-- Trumpet.novib.ff.G4.stereo.aif + `-- target_db + |-- target.01.wav + |-- target.02.wav + |-- target.03.wav + `-- target.04.wav + +A source database containing a small selection of trumpet samples (aquired from + +http://theremin.music.uiowa.edu/MIS.html) will be used to match grains with 4 +target sounds. This will produce 4 output files, one for each target sound. + +The following command is used to to generate the output: + +.. code:: bash + + concatenator ./source_db ./target_db ./output_db --src_db \ + ./analysed_source_db --tar_db ./analysed_tar_db + +The specified directories are searched recursively for audio files that are +used as items in the database. These item are then matched and synthesized as +explained in the :ref:`overview` section. Output is stored in the audio +directory of the output database that has been created. +This produces this directory structure: + +.. code:: bash + + /Users/samuelperry/concatenator_test/ + |-- analysed_source_db + | |-- audio + | | |-- Trumpet.novib.ff.A3.stereo.aif -> (Symlink) + | | |-- Trumpet.novib.ff.A4.stereo.aif -> (Symlink) + | | |-- Trumpet.novib.ff.A5.stereo.aif -> (Symlink) + + ... + + | | |-- Trumpet.novib.ff.F5.stereo.aif -> (Symlink) + | | |-- Trumpet.novib.ff.G3.stereo.aif -> (Symlink) + | | `-- Trumpet.novib.ff.G4.stereo.aif -> (Symlink) + | `-- data + | `-- analysis_data.hdf5 + |-- analysed_tar_db + | |-- audio + | | |-- target.01.wav -> (Symlink) + | | |-- target.02.wav -> (Symlink) + | | |-- target.03.wav -> (Symlink) + | | `-- target.04.wav -> (Symlink) + | `-- data + | `-- analysis_data.hdf5 + |-- output_db + | |-- audio + | | |-- target.01_output.wav + | | |-- target.02_output.wav + | | |-- target.03_output.wav + | | `-- target.04_output.wav + | `-- data + | `-- analysis_data.hdf5 + |-- source_db + | |-- Trumpet.novib.ff.A3.stereo.aif + | |-- Trumpet.novib.ff.A4.stereo.aif + | |-- Trumpet.novib.ff.A5.stereo.aif + | |-- Trumpet.novib.ff.F5.stereo.aif + + ... + + | |-- Trumpet.novib.ff.G3.stereo.aif + | `-- Trumpet.novib.ff.G4.stereo.aif + `-- target_db + |-- target.01.wav + |-- target.02.wav + |-- target.03.wav + `-- target.04.wav + +By using the --src_db and --tar_db flags, alternative locations are specified +for generating the databases and storing analysis data. Symbolic links are +created, referencing the original audio files without moving them. This allows +large databases to be used in place without copying or moving it's content. + +Alternatively, databases can be generated in place by ommiting the --src_db and +--tar_db flags. this will create the database directory structure directly in +the directories provided as source and target. + +The --copy flag can be used in conjunction with these flags in order to create +actual copies of the audio files at the destinations. This allows for the +creation of partable databases that can moved to other machines without +breaking links to the original files. (Any pre-existing symbolic links will be +overwritten with hard copies when using this option.) + +config.py +--------- +The config.py file is used for specifying all user defined options and can be +edited in the concatenator project directory. Comments explain the function of +each parameter. The default config.py file looks like this: + +.. code:: python + + # Specify analysis parameters for root mean square analysis. + rms = { + "window_size": 70, + "overlap": 2, + } + + # Specify analysis parameters for variance analysis. + variance = { + "window_size": 70, + "overlap": 2 + } + + # Specify analysis parameters for temporal kurtosis analysis. + kurtosis = { + "window_size": 70, + "overlap": 2 + } + + # Specify analysis parameters for temporal skewness analysis. + skewness = { + "window_size": 70, + "overlap": 2 + } + + # Specify analysis parameters for FFT analysis. + fft = { + "window_size": 65536 + } + + database = { + # Enables creation of symbolic links to files not in the database rather + # than making pysical copies. + "symlink": True + } + + # Sets the weighting for each analysis. a higher weighting gives an analysis + # higher presendence when finding the best matches. + matcher_weightings = { + "f0" : 1., + "spccntr" : 1., + "spcsprd" : 1., + "spcflux" : 1., + "spccf" : 1., + "spcflatness": 1., + "zerox" : 1., + "rms" : 1., + "peak": 1., + "centroid": 1., + "kurtosis": 1., + "skewness": 1., + "variance": 3., + "harm_ratio": 1. + } + + # Specifies the method for averaging analysis frames to create a single value + # for comparing to other grains. Possible formatters are: 'mean', 'median', + # 'log2_mean', 'log2_median' + analysis_dict = { + "f0": "log2_median", + "rms": "mean", + "zerox": "mean", + "spccntr": "mean", + "spcsprd": "mean", + "spcflux": "mean", + "spccf": "mean", + "spcflatness": "mean", + "peak": "mean", + "centroid": "mean", + "kurtosis": "mean", + "skewness": "mean", + "variance": "mean", + "harm_ratio": "mean" + } + + analysis = { + # Force the deletion of any pre-existing analyses to create new ones. This + # is needed for overwriting old analyses generated with different + # parameters to the current ones. + "reanalyse": False + } + + matcher = { + # Force the re-matching of analyses + "rematch": True, + "grain_size": 70, + "overlap": 2, + # Defines the number of matches to keep for synthesis. Note that this must + # also be specified in the synthesis config + "match_quantity": 1, + # Choose the algorithm used to perform matching. kdtree is recommended for + # larger datasets. + "method": 'kdtree' + } + + synthesizer = { + # Artificially scale the output grain by the difference in RMS values + # between source and target. + "enforce_rms": True, + # Specify the ratio limit that is the grain can be scaled by. + "enf_rms_ratio_limit": 100., + # Artificially modify the pitch by the difference in f0 values between + # source and target. + "enforce_f0": True, + # Specify the ratio limit that is the grain can be modified by. + "enf_f0_ratio_limit": 10., + "grain_size": 70, + "overlap": 2, + # Normalize output, avoid clipping of final output by scaling the final + # frames. + "normalize" : True, + # Defines the number of potential grains to choose from matches when + # synthesizing output. + "match_quantity": 1 + } + + output_file = { + "samplerate": 44100, + "format": 131075, + "channels": 1 + } + +Configuration Flags +------------------- +For quick modification of analysis parameters, parameter flags can be specified +directly when calling the script. For example: + +.. code:: bash + + concatenator ./source_db ./target_db ./output_db --src_db \ + ./analysed_source_db --tar_db ./analysed_tar_db --reanalyse --fft \ + '--window_size 2048' + +This overwrites the value specified for window_size in the config file with the +value provided. + +When databases have already been created, previous data is used when re-running +the script over them. This allows for different databases to be used without +continuous reanalysis. However, if analysis or matching parameters are changed, +the "--reanalyse" and "--rematch" flags can be used to force the overwriting of +old data, using the new parameters. + diff --git a/src/tests/audiofile_tests.py b/src/tests/audiofile_tests.py index a12bb39..4ad6929 100644 --- a/src/tests/audiofile_tests.py +++ b/src/tests/audiofile_tests.py @@ -695,7 +695,8 @@ class DatabaseTests(globalTests): # Create database object database = AudioDatabase( "./.test_db", - analysis_list=["rms", "zerox", "fft", "spccntr", "spcsprd", "f0"] + analysis_list=["rms", "zerox", "fft", "spccntr", "spcsprd", "f0"], + config=config ) # Create/load a pre-existing database database.load_database(reanalyse=True) @@ -736,6 +737,7 @@ class MatcherTests(globalTests): # Create database object self.database1 = AudioDatabase( "./.test_db1", + config=config ) # Create/load a pre-existing database self.database1.load_database(reanalyse=True) @@ -762,6 +764,7 @@ class MatcherTests(globalTests): # Create database object self.database2 = AudioDatabase( "./.test_db2", + config=config ) # Create/load a pre-existing database self.database2.load_database(reanalyse=True) diff --git a/src/tests/config.py b/src/tests/config.py index 6f03eb3..5112db9 100644 --- a/src/tests/config.py +++ b/src/tests/config.py @@ -88,3 +88,7 @@ output_file = { "format": 131075, "channels": 1 } + +database = { + "symlink": True +}