Wrote tutorial, tried to fix broken brute force matcher. still broken...

2016-03-26 01:13:04 +00:00
parent 17e7bc09c9
commit b5fa78bee8
15 changed files with 426 additions and 52 deletions
@@ -0,0 +1 @@
+./src/sppysound/config.py
@@ -96,10 +96,11 @@ def parse_arguments():
    )

    parser.add_argument(
-        '--src-db',
+        '--src_db',
        help="Specifies the directory to create the source database and store analyses "
        "in. If not specified then the source directory will be used directly.",
-        type=str
+        type=str,
+        metavar=''
    )

    parser.add_argument(
@@ -107,7 +108,8 @@ def parse_arguments():
        help="Specifies the directory to create the target database and store analyses "
        "in. If not specified then the target directory will be used directly.",
        type=str,
-        default=''
+        default='',
+        metavar=''
    )

    analyses = [
@@ -202,7 +204,13 @@ def parse_arguments():
        "'kdtree'",
    )

-    parser.add_argument('--verbose', '-v', action='count')
+    parser.add_argument(
+        '--verbose',
+        '-v',
+        action='count',
+        help='Specifies level of verbosity in output. For example: \'-vvvvv\' '
+        'will output all information. \'-v\' will output minimal information. '
+    )

    args = parser.parse_args()
    for item in config_items:
@@ -256,6 +264,7 @@ def main():
        log_filename=modpath,
        logger_filelevel=args.verbose
    )
+    pdb.set_trace()

    # Create/load a pre-existing source database
    source_db = AudioDatabase(
@@ -1,23 +1,28 @@
+# Specify analysis parameters for root mean square analysis.
 rms = {
    "window_size": 70,
-    "overlap": 8,
+    "overlap": 2,
 }

+# Specify analysis parameters for variance analysis.
 variance = {
    "window_size": 70,
-    "overlap": 8
+    "overlap": 2
 }

+# Specify analysis parameters for temporal kurtosis analysis.
 kurtosis = {
    "window_size": 70,
-    "overlap": 8
+    "overlap": 2
 }

+# Specify analysis parameters for temporal skewness analysis.
 skewness = {
    "window_size": 70,
-    "overlap": 8
+    "overlap": 2
 }

+# Specify analysis parameters for FFT analysis.
 fft = {
    "window_size": 65536
 }
@@ -28,6 +33,8 @@ database = {
    "symlink": True
 }

+# Sets the weighting for each analysis. a higher weighting gives an analysis
+# higher presendence when finding the best matches.
 matcher_weightings = {
    "f0" : 1.,
    "spccntr" : 1.,
@@ -45,6 +52,9 @@ matcher_weightings = {
    "harm_ratio": 1.
 }

+# Specifies the method for averaging analysis frames to create a single value
+# for comparing to other grains. Possible formatters are: 'mean', 'median',
+# 'log2_mean', 'log2_median'
 analysis_dict = {
    "f0": "log2_median",
    "rms": "mean",
@@ -63,32 +73,44 @@ analysis_dict = {
 }

 analysis = {
+    # Force the deletion of any pre-existing analyses to create new ones. This
+    # is needed for overwriting old analyses generated with different
+    # parameters to the current ones.
    "reanalyse": False
 }

 matcher = {
+    # Force the re-matching of analyses
    "rematch": True,
    "grain_size": 70,
-    "overlap": 8,
+    "overlap": 2,
    # Defines the number of matches to keep for synthesis. Note that this must
    # also be specified in the synthesis config
-    "match_quantity": 1,
+    "match_quantity": 20,
    # Choose the algorithm used to perform matching. kdtree is recommended for
    # larger datasets.
    "method": 'kdtree'
 }

 synthesizer = {
+    # Artificially scale the output grain by the difference in RMS values
+    # between source and target.
    "enforce_rms": True,
+    # Specify the ratio limit that is the grain can be scaled by.
    "enf_rms_ratio_limit": 100.,
+    # Artificially modify the pitch by the difference in f0 values between
+    # source and target.
    "enforce_f0": True,
+    # Specify the ratio limit that is the grain can be modified by.
    "enf_f0_ratio_limit": 10.,
    "grain_size": 70,
-    "overlap": 8,
+    "overlap": 2,
+    # Normalize output, avoid clipping of final output by scaling the final
+    # frames.
    "normalize" : True,
    # Defines the number of potential grains to choose from matches when
    # synthesizing output.
-    "match_quantity": 1
+    "match_quantity": 20
 }

 output_file = {
@@ -493,12 +493,8 @@ class Matcher:
            # Create an array of grain times for target sample
            target_times = target_entry.generate_grain_times(grain_size, overlap, save_times=True)

-            # Stores an accumulated distance between source and target grains,
-            # added to by each analysis.
-            distance_accum = np.zeros((target_times.shape[0], source_sample_indexes[-1][-1]))
            # Allocate memory for storing accumulated distances between
            # source and target grains
-
            x_size = target_times.shape[0]
            y_size = int(source_sample_indexes[-1][-1])
            chunk_size = 8192
@@ -507,14 +503,16 @@ class Matcher:

            try:
                del self.output_db.data["data_distance"]
-                self.output_db.data.create_dataset("data_distance", (x_size, y_size), dtype=np.float, chunks=True)
-            except RuntimeError:
+            except KeyError:
+                pass
+
            self.output_db.data.create_dataset("data_distance", (x_size, y_size), dtype=np.float, chunks=True)

            try:
                del self.output_db.data["distance_accum"]
-                self.output_db.data.create_dataset("distance_accum", (x_size, y_size), dtype=np.float, chunks=True, fillvalue=0)
-            except RuntimeError:
+            except KeyError:
+                pass
+
            self.output_db.data.create_dataset("distance_accum", (x_size, y_size), dtype=np.float, chunks=True, fillvalue=0)

            for analysis in self.matcher_analyses:
@@ -141,7 +141,7 @@
   "outputs": [],
   "source": [
    "matcher.match(\n",
-    "    matcher.brute_force_matcher,\n",
+    "    matcher.kdtree_matcher,\n",
    ")"
   ]
  },
@@ -9,7 +9,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 1,
+   "execution_count": 4,
   "metadata": {
    "collapsed": true
   },
@@ -17,12 +17,12 @@
   "source": [
    "from sppysound.database import AudioDatabase, Synthesizer, Matcher\n",
    "import synthesis_config\n",
-    "import matching_config"
+    "import config"
   ]
  },
  {
   "cell_type": "code",
-   "execution_count": 2,
+   "execution_count": 5,
   "metadata": {
    "collapsed": true
   },
@@ -43,16 +43,16 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 3,
+   "execution_count": 6,
   "metadata": {
-    "collapsed": true
+    "collapsed": false
   },
   "outputs": [],
   "source": [
    "source_database = AudioDatabase(\n",
    "    source_dir,\n",
    "    config=synthesis_config,\n",
-    "    analysis_list={\"f0\", \"rms\", \"peak\"}\n",
+    "    analysis_list={\"f0\", \"rms\"}\n",
    ")\n",
    "source_database.load_database(reanalyse=True)"
   ]
@@ -67,22 +67,36 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 4,
+   "execution_count": 7,
   "metadata": {
    "collapsed": false
   },
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Traceback (most recent call last):\n",
+      "  File \"/Users/samuelperry/PerryPerrySource/pysource/sppysound/src/sppysound/database.py\", line 157, in analyse_database\n",
+      "    config=self.config\n",
+      "  File \"/Users/samuelperry/PerryPerrySource/pysource/sppysound/src/sppysound/audiofile.py\", line 943, in __enter__\n",
+      "    \"empty\".format(self.name))\n",
+      "IOError: File isn't valid: ElectricGuitarSample-out_output.wav\n",
+      "Check that file is mono and isn't empty\n"
+     ]
+    }
+   ],
   "source": [
    "target_database = AudioDatabase(\n",
    "    target_dir,\n",
    "    config=synthesis_config,\n",
-    "    analysis_list={\"f0\", \"rms\", \"peak\"}\n",
+    "    analysis_list={\"f0\", \"rms\"}\n",
    ")\n",
    "target_database.load_database(reanalyse=True)\n",
    "\n",
    "output_database = AudioDatabase(\n",
    "    output_dir,\n",
-    "    config=synthesis_config\n",
+    "    config=config\n",
    ")\n",
    "output_database.load_database(reanalyse=False)\n",
    "\n",
@@ -90,11 +104,13 @@
    "    source_database,\n",
    "    target_database,\n",
    "    output_db=output_database,\n",
-    "    config=matching_config,\n",
+    "    config=config,\n",
    "    rematch=True\n",
    ")\n",
    "matcher.match(\n",
-    "    matcher.brute_force_matcher,\n",
+    "    matcher.kdtree_matcher,\n",
+    "    grain_size=config.matcher[\"grain_size\"],\n",
+    "    overlap=config.matcher[\"overlap\"]\n",
    ")"
   ]
  },
@@ -107,7 +123,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 5,
+   "execution_count": 8,
   "metadata": {
    "collapsed": false
   },
@@ -117,7 +133,7 @@
    "    source_database, \n",
    "    output_database, \n",
    "    target_db=target_database, \n",
-    "    config=synthesis_config\n",
+    "    config=config\n",
    ")"
   ]
  },
@@ -130,14 +146,17 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 6,
+   "execution_count": 9,
   "metadata": {
    "collapsed": false,
    "scrolled": true
   },
   "outputs": [],
   "source": [
-    "synthesizer.synthesize()"
+    "synthesizer.synthesize(\n",
+    "    grain_size=config.synthesizer[\"grain_size\"],\n",
+    "    overlap=config.synthesizer[\"overlap\"]\n",
+    ")"
   ]
  },
  {
@@ -17,3 +17,7 @@ output_file = {
    "format": 131075,
    "channels": 1
 }
+
+database = {
+    "symlink": True
+}
@@ -87,15 +87,20 @@ concatenate.py Script Usage
 --enforcerms          This flag enables scaling of matched grains to better
                      match the target's volume.

+--copy                This flag enables the copying of audio files from
+                      their location to the database, rather than creating
+                      symbolic links. This is useful for creating portable
+                      databases.
+
 --match_method        Choose the algorithm to use when matching analyses. Available algorithms are:

                         Brute force: 'bruteforce'

                         K-d Tree Search: 'kdtree'

--verbose, -v         Specify the verbosity of the script's output. Additional
-                      v will produce greater levels of detail ie. -vvvvv will 
-                      produce all messages.
+--verbose, -v         Specifies level of verbosity in output. For example:
+                      '-vvvvv' will output all information. '-v' will output
+                      minimal information.

 -------------------
 AudioFile Class
@@ -1,8 +0,0 @@
-Overview
-========
-Concatenator is a tool for synthesizing interpretations of a sound, through the
-analysis and synthesis of audio grains from a database of sounds.
-The program works by analysing overlapping segments of audio (known as grains)
-from both the target sound and the source database, then searching for the
-closest matching grain in the source database to the target sound. Finally, the
-output is generated by overlap-adding the best matches.
@@ -30,3 +30,7 @@ output_file = {
    "format": 131075,
    "channels": 1
 }
+
+database = {
+    "symlink": True
+}
@@ -1,3 +1,5 @@
+.. _overview:
+
 Overview
 ========
 Concatenator is a tool for synthesizing interpretations of a sound, through the
@@ -73,6 +75,15 @@ precedence over others based on user preference.
 The best match indexes are then saved to the output database ready for
 synthesis.

+There are currently two implementations for the matching algorithm:
+
+- Brute Force
+
+- K-d Tree Search
+
+Both will return similar results, however the K-d tree search algorithm is
+far more efficient when analysing large datasets so is the preferred method.
+
 .. graphviz::

   digraph b {
@@ -162,7 +173,7 @@ database, performing any post-processing (such as pitch shifting and amplitude
 scaling) to improve the similarity of the match, then windowed overlap adding
 the grains to create the final output. The post-processing phase involves using
 the ratio difference between the source and target grain to artificially alter
-the source grain so that it better ressembles the target. This is particularly
+the source grain so that it better resembles the target. This is particularly
 useful when using small source databases as it improves the similarity of any
 match (important when best matches aren't very close to the target.) The final
 output is saved to the output database's audio directory.
@@ -30,3 +30,7 @@ synthesizer = {
    # synthesizing output.
    "match_quantity": 20
 }
+
+database = {
+    "symlink": True
+}
@@ -1,6 +1,304 @@
 Tutorial
 ========

+This section gives a brief introduction to using the 'concatenator.py' script. The
+script can be found in the src/sppysound directory of the project folder, or
+can be accessed by running the 'concatenator' symbolic link from the project
+folder root.
+
 Getting Started
 ---------------

+To view all available options simply run:
+
+.. code:: bash
+
+    ./concatenator -h
+
+A list of all commands available is then presented:
+
+.. code:: bash
+
+    usage: concatenator [-h] [--src-db] [--tar_db]
+                    [--analyse [ANALYSE [ANALYSE ...]]] [--analysis_dict]
+                    [--fft] [--kurtosis] [--matcher] [--matcher_weightings]
+                    [--rms] [--skewness] [--synthesizer] [--variance]
+                    [--reanalyse] [--rematch] [--enforcef0] [--enforcerms]
+                    [--copy] [--match_method] [--verbose]
+                    source target output
+
+    Concatenator is a tool for synthesizing interpretations of a sound, through
+    the analysis and synthesis of audio grains from a corpus database. The program
+    works by analysing overlapping segments of audio (known as grains) from both
+    the target sound and the source database, then searching for the closest
+    matching grain in the source database to the target sound. Finally, the output
+    is generated by overlap-adding the best matches.
+
+    positional arguments:
+      source                Directory of source files/database to take grains from
+                            when synthesizing output
+      target                Directory of target files/database to match source
+                            grains to.
+      output                Directory to use as database for outputing results and
+                            match information. Output audio will be stored in the
+                            /audio sub-directory and match data will be stored in
+                            the /data directory.
+
+    optional arguments:
+      -h, --help            show this help message and exit
+      --src-db              Specifies the directory to create the source database
+                            and store analyses in. If not specified then the
+
+    ...
+
+For this demonstration, the following file structure will be used:
+
+.. code:: bash
+
+    /Users/samuelperry/concatenator_test/
+    |-- source_db
+    |   |-- Trumpet.novib.ff.A3.stereo.aif
+    |   |-- Trumpet.novib.ff.A4.stereo.aif
+    |   |-- Trumpet.novib.ff.A5.stereo.aif
+
+    ...
+
+    |   |-- Trumpet.novib.ff.F5.stereo.aif
+    |   |-- Trumpet.novib.ff.G3.stereo.aif
+    |   `-- Trumpet.novib.ff.G4.stereo.aif
+    `-- target_db
+        |-- target.01.wav
+        |-- target.02.wav
+        |-- target.03.wav
+        `-- target.04.wav
+
+A source database containing a small selection of trumpet samples (aquired from
+
+http://theremin.music.uiowa.edu/MIS.html) will be used to match grains with 4
+target sounds. This will produce 4 output files, one for each target sound.
+
+The following command is used to to generate the output:
+
+.. code:: bash
+
+    concatenator ./source_db ./target_db ./output_db --src_db \
+    ./analysed_source_db --tar_db ./analysed_tar_db
+
+The specified directories are searched recursively for audio files that are
+used as items in the database. These item are then matched and synthesized as
+explained in the :ref:`overview` section. Output is stored in the audio
+directory of the output database that has been created.
+This produces this directory structure:
+
+.. code:: bash
+    
+    /Users/samuelperry/concatenator_test/
+    |-- analysed_source_db
+    |   |-- audio
+    |   |   |-- Trumpet.novib.ff.A3.stereo.aif -> (Symlink)
+    |   |   |-- Trumpet.novib.ff.A4.stereo.aif -> (Symlink)
+    |   |   |-- Trumpet.novib.ff.A5.stereo.aif -> (Symlink)
+
+    ...
+
+    |   |   |-- Trumpet.novib.ff.F5.stereo.aif -> (Symlink)
+    |   |   |-- Trumpet.novib.ff.G3.stereo.aif -> (Symlink)
+    |   |   `-- Trumpet.novib.ff.G4.stereo.aif -> (Symlink)
+    |   `-- data
+    |       `-- analysis_data.hdf5
+    |-- analysed_tar_db
+    |   |-- audio
+    |   |   |-- target.01.wav -> (Symlink)
+    |   |   |-- target.02.wav -> (Symlink)
+    |   |   |-- target.03.wav -> (Symlink)
+    |   |   `-- target.04.wav -> (Symlink)
+    |   `-- data
+    |       `-- analysis_data.hdf5
+    |-- output_db
+    |   |-- audio
+    |   |   |-- target.01_output.wav
+    |   |   |-- target.02_output.wav
+    |   |   |-- target.03_output.wav
+    |   |   `-- target.04_output.wav
+    |   `-- data
+    |       `-- analysis_data.hdf5
+    |-- source_db
+    |   |-- Trumpet.novib.ff.A3.stereo.aif
+    |   |-- Trumpet.novib.ff.A4.stereo.aif
+    |   |-- Trumpet.novib.ff.A5.stereo.aif
+    |   |-- Trumpet.novib.ff.F5.stereo.aif
+
+    ...
+
+    |   |-- Trumpet.novib.ff.G3.stereo.aif
+    |   `-- Trumpet.novib.ff.G4.stereo.aif
+    `-- target_db
+        |-- target.01.wav
+        |-- target.02.wav
+        |-- target.03.wav
+        `-- target.04.wav
+
+By using the --src_db and --tar_db flags, alternative locations are specified
+for generating the databases and storing analysis data. Symbolic links are
+created, referencing the original audio files without moving them.  This allows
+large databases to be used in place without copying or moving it's content.
+
+Alternatively, databases can be generated in place by ommiting the --src_db and
+--tar_db flags. this will create the database directory structure directly in
+the directories provided as source and target.
+
+The --copy flag can be used in conjunction with these flags in order to create
+actual copies of the audio files at the destinations. This allows for the
+creation of partable databases that can moved to other machines without
+breaking links to the original files. (Any pre-existing symbolic links will be
+overwritten with hard copies when using this option.)
+
+config.py
+---------
+The config.py file is used for specifying all user defined options and can be
+edited in the concatenator project directory. Comments explain the function of
+each parameter. The default config.py file looks like this:
+
+.. code:: python
+
+    # Specify analysis parameters for root mean square analysis.
+    rms = {
+        "window_size": 70,
+        "overlap": 2,
+    }
+
+    # Specify analysis parameters for variance analysis.
+    variance = {
+        "window_size": 70,
+        "overlap": 2
+    }
+
+    # Specify analysis parameters for temporal kurtosis analysis.
+    kurtosis = {
+        "window_size": 70,
+        "overlap": 2
+    }
+
+    # Specify analysis parameters for temporal skewness analysis.
+    skewness = {
+        "window_size": 70,
+        "overlap": 2
+    }
+
+    # Specify analysis parameters for FFT analysis.
+    fft = {
+        "window_size": 65536
+    }
+
+    database = {
+        # Enables creation of symbolic links to files not in the database rather
+        # than making pysical copies.
+        "symlink": True
+    }
+
+    # Sets the weighting for each analysis. a higher weighting gives an analysis
+    # higher presendence when finding the best matches.
+    matcher_weightings = {
+        "f0" : 1.,
+        "spccntr" : 1.,
+        "spcsprd" : 1.,
+        "spcflux" : 1.,
+        "spccf" : 1.,
+        "spcflatness": 1.,
+        "zerox" : 1.,
+        "rms" : 1.,
+        "peak": 1.,
+        "centroid": 1.,
+        "kurtosis": 1.,
+        "skewness": 1.,
+        "variance": 3.,
+        "harm_ratio": 1.
+    }
+
+    # Specifies the method for averaging analysis frames to create a single value
+    # for comparing to other grains. Possible formatters are: 'mean', 'median',
+    # 'log2_mean', 'log2_median'
+    analysis_dict = {
+        "f0": "log2_median",
+        "rms": "mean",
+        "zerox": "mean",
+        "spccntr": "mean",
+        "spcsprd": "mean",
+        "spcflux": "mean",
+        "spccf": "mean",
+        "spcflatness": "mean",
+        "peak": "mean",
+        "centroid": "mean",
+        "kurtosis": "mean",
+        "skewness": "mean",
+        "variance": "mean",
+        "harm_ratio": "mean"
+    }
+
+    analysis = {
+        # Force the deletion of any pre-existing analyses to create new ones. This
+        # is needed for overwriting old analyses generated with different
+        # parameters to the current ones.
+        "reanalyse": False
+    }
+
+    matcher = {
+        # Force the re-matching of analyses
+        "rematch": True,
+        "grain_size": 70,
+        "overlap": 2,
+        # Defines the number of matches to keep for synthesis. Note that this must
+        # also be specified in the synthesis config
+        "match_quantity": 1,
+        # Choose the algorithm used to perform matching. kdtree is recommended for
+        # larger datasets.
+        "method": 'kdtree'
+    }
+
+    synthesizer = {
+        # Artificially scale the output grain by the difference in RMS values
+        # between source and target.
+        "enforce_rms": True,
+        # Specify the ratio limit that is the grain can be scaled by.
+        "enf_rms_ratio_limit": 100.,
+        # Artificially modify the pitch by the difference in f0 values between
+        # source and target.
+        "enforce_f0": True,
+        # Specify the ratio limit that is the grain can be modified by.
+        "enf_f0_ratio_limit": 10.,
+        "grain_size": 70,
+        "overlap": 2,
+        # Normalize output, avoid clipping of final output by scaling the final
+        # frames.
+        "normalize" : True,
+        # Defines the number of potential grains to choose from matches when
+        # synthesizing output.
+        "match_quantity": 1
+    }
+
+    output_file = {
+        "samplerate": 44100,
+        "format": 131075,
+        "channels": 1
+    }
+
+Configuration Flags
+-------------------
+For quick modification of analysis parameters, parameter flags can be specified
+directly when calling the script. For example:
+
+.. code:: bash
+
+    concatenator ./source_db ./target_db ./output_db --src_db \
+    ./analysed_source_db --tar_db ./analysed_tar_db --reanalyse --fft \
+    '--window_size 2048'
+
+This overwrites the value specified for window_size in the config file with the
+value provided.
+
+When databases have already been created, previous data is used when re-running
+the script over them. This allows for different databases to be used without
+continuous reanalysis. However, if analysis or matching parameters are changed,
+the "--reanalyse" and "--rematch" flags can be used to force the overwriting of
+old data, using the new parameters.
+
@@ -695,7 +695,8 @@ class DatabaseTests(globalTests):
        # Create database object
        database = AudioDatabase(
            "./.test_db",
-            analysis_list=["rms", "zerox", "fft", "spccntr", "spcsprd", "f0"]
+            analysis_list=["rms", "zerox", "fft", "spccntr", "spcsprd", "f0"],
+            config=config
        )
        # Create/load a pre-existing database
        database.load_database(reanalyse=True)
@@ -736,6 +737,7 @@ class MatcherTests(globalTests):
        # Create database object
        self.database1 = AudioDatabase(
            "./.test_db1",
+            config=config
        )
        # Create/load a pre-existing database
        self.database1.load_database(reanalyse=True)
@@ -762,6 +764,7 @@ class MatcherTests(globalTests):
        # Create database object
        self.database2 = AudioDatabase(
            "./.test_db2",
+            config=config
        )
        # Create/load a pre-existing database
        self.database2.load_database(reanalyse=True)
@@ -88,3 +88,7 @@ output_file = {
    "format": 131075,
    "channels": 1
 }
+
+database = {
+    "symlink": True
+}