Wrote tutorial, tried to fix broken brute force matcher. still broken...

2016-03-26 01:13:04 +00:00
parent 17e7bc09c9
commit b5fa78bee8
15 changed files with 426 additions and 52 deletions
@@ -0,0 +1 @@
 ./src/sppysound/config.py
@@ -96,10 +96,11 @@ def parse_arguments():
    )
    parser.add_argument(
-        '--src-db',
+        '--src_db',
        help="Specifies the directory to create the source database and store analyses "
        "in. If not specified then the source directory will be used directly.",
-        type=str
+        type=str,
        metavar=''
    )
    parser.add_argument(
@@ -107,7 +108,8 @@ def parse_arguments():
        help="Specifies the directory to create the target database and store analyses "
        "in. If not specified then the target directory will be used directly.",
        type=str,
-        default=''
+        default='',
        metavar=''
    )
    analyses = [
@@ -202,7 +204,13 @@ def parse_arguments():
        "'kdtree'",
    )
-    parser.add_argument('--verbose', '-v', action='count')
+    parser.add_argument(
        '--verbose',
        '-v',
        action='count',
        help='Specifies level of verbosity in output. For example: \'-vvvvv\' '
        'will output all information. \'-v\' will output minimal information. '
    )
    args = parser.parse_args()
    for item in config_items:
@@ -256,6 +264,7 @@ def main():
        log_filename=modpath,
        logger_filelevel=args.verbose
    )
    pdb.set_trace()
    # Create/load a pre-existing source database
    source_db = AudioDatabase(
@@ -1,23 +1,28 @@
 # Specify analysis parameters for root mean square analysis.
 rms = {
    "window_size": 70,
-    "overlap": 8,
+    "overlap": 2,
 }
 # Specify analysis parameters for variance analysis.
 variance = {
    "window_size": 70,
-    "overlap": 8
+    "overlap": 2
 }
 # Specify analysis parameters for temporal kurtosis analysis.
 kurtosis = {
    "window_size": 70,
-    "overlap": 8
+    "overlap": 2
 }
 # Specify analysis parameters for temporal skewness analysis.
 skewness = {
    "window_size": 70,
-    "overlap": 8
+    "overlap": 2
 }
 # Specify analysis parameters for FFT analysis.
 fft = {
    "window_size": 65536
 }
@@ -28,6 +33,8 @@ database = {
    "symlink": True
 }
 # Sets the weighting for each analysis. a higher weighting gives an analysis
 # higher presendence when finding the best matches.
 matcher_weightings = {
    "f0" : 1.,
    "spccntr" : 1.,
@@ -45,6 +52,9 @@ matcher_weightings = {
    "harm_ratio": 1.
 }
 # Specifies the method for averaging analysis frames to create a single value
 # for comparing to other grains. Possible formatters are: 'mean', 'median',
 # 'log2_mean', 'log2_median'
 analysis_dict = {
    "f0": "log2_median",
    "rms": "mean",
@@ -63,32 +73,44 @@ analysis_dict = {
 }
 analysis = {
    # Force the deletion of any pre-existing analyses to create new ones. This
    # is needed for overwriting old analyses generated with different
    # parameters to the current ones.
    "reanalyse": False
 }
 matcher = {
    # Force the re-matching of analyses
    "rematch": True,
    "grain_size": 70,
-    "overlap": 8,
+    "overlap": 2,
    # Defines the number of matches to keep for synthesis. Note that this must
    # also be specified in the synthesis config
-    "match_quantity": 1,
+    "match_quantity": 20,
    # Choose the algorithm used to perform matching. kdtree is recommended for
    # larger datasets.
    "method": 'kdtree'
 }
 synthesizer = {
    # Artificially scale the output grain by the difference in RMS values
    # between source and target.
    "enforce_rms": True,
    # Specify the ratio limit that is the grain can be scaled by.
    "enf_rms_ratio_limit": 100.,
    # Artificially modify the pitch by the difference in f0 values between
    # source and target.
    "enforce_f0": True,
    # Specify the ratio limit that is the grain can be modified by.
    "enf_f0_ratio_limit": 10.,
    "grain_size": 70,
-    "overlap": 8,
+    "overlap": 2,
    # Normalize output, avoid clipping of final output by scaling the final
    # frames.
    "normalize" : True,
    # Defines the number of potential grains to choose from matches when
    # synthesizing output.
-    "match_quantity": 1
+    "match_quantity": 20
 }
 output_file = {
@@ -493,12 +493,8 @@ class Matcher:
            # Create an array of grain times for target sample
            target_times = target_entry.generate_grain_times(grain_size, overlap, save_times=True)
            # Stores an accumulated distance between source and target grains,
            # added to by each analysis.
            distance_accum = np.zeros((target_times.shape[0], source_sample_indexes[-1][-1]))
            # Allocate memory for storing accumulated distances between
            # source and target grains
            x_size = target_times.shape[0]
            y_size = int(source_sample_indexes[-1][-1])
            chunk_size = 8192
@@ -507,15 +503,17 @@ class Matcher:
            try:
                del self.output_db.data["data_distance"]
-                self.output_db.data.create_dataset("data_distance", (x_size, y_size), dtype=np.float, chunks=True)
+            except KeyError:
-            except RuntimeError:
+                pass
-                self.output_db.data.create_dataset("data_distance", (x_size, y_size), dtype=np.float, chunks=True)
+
            self.output_db.data.create_dataset("data_distance", (x_size, y_size), dtype=np.float, chunks=True)
            try:
                del self.output_db.data["distance_accum"]
-                self.output_db.data.create_dataset("distance_accum", (x_size, y_size), dtype=np.float, chunks=True, fillvalue=0)
+            except KeyError:
-            except RuntimeError:
+                pass
-                self.output_db.data.create_dataset("distance_accum", (x_size, y_size), dtype=np.float, chunks=True, fillvalue=0)
+
            self.output_db.data.create_dataset("distance_accum", (x_size, y_size), dtype=np.float, chunks=True, fillvalue=0)
            for analysis in self.matcher_analyses:
                self.logger.info("Current analysis: {0}".format(analysis))
@@ -141,7 +141,7 @@
   "outputs": [],
   "source": [
    "matcher.match(\n",
-    "    matcher.brute_force_matcher,\n",
+    "    matcher.kdtree_matcher,\n",
    ")"
   ]
  },
@@ -9,7 +9,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 1,
+   "execution_count": 4,
   "metadata": {
    "collapsed": true
   },
@@ -17,12 +17,12 @@
   "source": [
    "from sppysound.database import AudioDatabase, Synthesizer, Matcher\n",
    "import synthesis_config\n",
-    "import matching_config"
+    "import config"
   ]
  },
  {
   "cell_type": "code",
-   "execution_count": 2,
+   "execution_count": 5,
   "metadata": {
    "collapsed": true
   },
@@ -43,16 +43,16 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 3,
+   "execution_count": 6,
   "metadata": {
-    "collapsed": true
+    "collapsed": false
   },
   "outputs": [],
   "source": [
    "source_database = AudioDatabase(\n",
    "    source_dir,\n",
    "    config=synthesis_config,\n",
-    "    analysis_list={\"f0\", \"rms\", \"peak\"}\n",
+    "    analysis_list={\"f0\", \"rms\"}\n",
    ")\n",
    "source_database.load_database(reanalyse=True)"
   ]
@@ -67,22 +67,36 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 4,
+   "execution_count": 7,
   "metadata": {
    "collapsed": false
   },
-   "outputs": [],
+   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Traceback (most recent call last):\n",
      "  File \"/Users/samuelperry/PerryPerrySource/pysource/sppysound/src/sppysound/database.py\", line 157, in analyse_database\n",
      "    config=self.config\n",
      "  File \"/Users/samuelperry/PerryPerrySource/pysource/sppysound/src/sppysound/audiofile.py\", line 943, in __enter__\n",
      "    \"empty\".format(self.name))\n",
      "IOError: File isn't valid: ElectricGuitarSample-out_output.wav\n",
      "Check that file is mono and isn't empty\n"
     ]
    }
   ],
   "source": [
    "target_database = AudioDatabase(\n",
    "    target_dir,\n",
    "    config=synthesis_config,\n",
-    "    analysis_list={\"f0\", \"rms\", \"peak\"}\n",
+    "    analysis_list={\"f0\", \"rms\"}\n",
    ")\n",
    "target_database.load_database(reanalyse=True)\n",
    "\n",
    "output_database = AudioDatabase(\n",
    "    output_dir,\n",
-    "    config=synthesis_config\n",
+    "    config=config\n",
    ")\n",
    "output_database.load_database(reanalyse=False)\n",
    "\n",
@@ -90,11 +104,13 @@
    "    source_database,\n",
    "    target_database,\n",
    "    output_db=output_database,\n",
-    "    config=matching_config,\n",
+    "    config=config,\n",
    "    rematch=True\n",
    ")\n",
    "matcher.match(\n",
-    "    matcher.brute_force_matcher,\n",
+    "    matcher.kdtree_matcher,\n",
    "    grain_size=config.matcher[\"grain_size\"],\n",
    "    overlap=config.matcher[\"overlap\"]\n",
    ")"
   ]
  },
@@ -107,7 +123,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 5,
+   "execution_count": 8,
   "metadata": {
    "collapsed": false
   },
@@ -117,7 +133,7 @@
    "    source_database, \n",
    "    output_database, \n",
    "    target_db=target_database, \n",
-    "    config=synthesis_config\n",
+    "    config=config\n",
    ")"
   ]
  },
@@ -130,14 +146,17 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 6,
+   "execution_count": 9,
   "metadata": {
    "collapsed": false,
    "scrolled": true
   },
   "outputs": [],
   "source": [
-    "synthesizer.synthesize()"
+    "synthesizer.synthesize(\n",
    "    grain_size=config.synthesizer[\"grain_size\"],\n",
    "    overlap=config.synthesizer[\"overlap\"]\n",
    ")"
   ]
  },
  {
@@ -17,3 +17,7 @@ output_file = {
    "format": 131075,
    "channels": 1
 }
 database = {
    "symlink": True
 }
@@ -87,15 +87,20 @@ concatenate.py Script Usage
 --enforcerms          This flag enables scaling of matched grains to better
                      match the target's volume.
 --copy                This flag enables the copying of audio files from
                      their location to the database, rather than creating
                      symbolic links. This is useful for creating portable
                      databases.
 --match_method        Choose the algorithm to use when matching analyses. Available algorithms are:
                         Brute force: 'bruteforce'
                         K-d Tree Search: 'kdtree'
--verbose, -v         Specify the verbosity of the script's output. Additional
+--verbose, -v         Specifies level of verbosity in output. For example:
-                      v will produce greater levels of detail ie. -vvvvv will 
+                      '-vvvvv' will output all information. '-v' will output
-                      produce all messages.
+                      minimal information.
 -------------------
 AudioFile Class
@@ -1,8 +0,0 @@
 Overview
 ========
 Concatenator is a tool for synthesizing interpretations of a sound, through the
 analysis and synthesis of audio grains from a database of sounds.
 The program works by analysing overlapping segments of audio (known as grains)
 from both the target sound and the source database, then searching for the
 closest matching grain in the source database to the target sound. Finally, the
 output is generated by overlap-adding the best matches.
@@ -30,3 +30,7 @@ output_file = {
    "format": 131075,
    "channels": 1
 }
 database = {
    "symlink": True
 }
@@ -1,3 +1,5 @@
 .. _overview:
 Overview
 ========
 Concatenator is a tool for synthesizing interpretations of a sound, through the
@@ -73,6 +75,15 @@ precedence over others based on user preference.
 The best match indexes are then saved to the output database ready for
 synthesis.
 There are currently two implementations for the matching algorithm:
 - Brute Force
 - K-d Tree Search
 Both will return similar results, however the K-d tree search algorithm is
 far more efficient when analysing large datasets so is the preferred method.
 .. graphviz::
   digraph b {
@@ -162,7 +173,7 @@ database, performing any post-processing (such as pitch shifting and amplitude
 scaling) to improve the similarity of the match, then windowed overlap adding
 the grains to create the final output. The post-processing phase involves using
 the ratio difference between the source and target grain to artificially alter
-the source grain so that it better ressembles the target. This is particularly
+the source grain so that it better resembles the target. This is particularly
 useful when using small source databases as it improves the similarity of any
 match (important when best matches aren't very close to the target.) The final
 output is saved to the output database's audio directory.
@@ -30,3 +30,7 @@ synthesizer = {
    # synthesizing output.
    "match_quantity": 20
 }
 database = {
    "symlink": True
 }
@@ -1,6 +1,304 @@
 Tutorial
 ========
 This section gives a brief introduction to using the 'concatenator.py' script. The
 script can be found in the src/sppysound directory of the project folder, or
 can be accessed by running the 'concatenator' symbolic link from the project
 folder root.
 Getting Started
 ---------------
 To view all available options simply run:
 .. code:: bash
    ./concatenator -h
 A list of all commands available is then presented:
 .. code:: bash
    usage: concatenator [-h] [--src-db] [--tar_db]
                    [--analyse [ANALYSE [ANALYSE ...]]] [--analysis_dict]
                    [--fft] [--kurtosis] [--matcher] [--matcher_weightings]
                    [--rms] [--skewness] [--synthesizer] [--variance]
                    [--reanalyse] [--rematch] [--enforcef0] [--enforcerms]
                    [--copy] [--match_method] [--verbose]
                    source target output
    Concatenator is a tool for synthesizing interpretations of a sound, through
    the analysis and synthesis of audio grains from a corpus database. The program
    works by analysing overlapping segments of audio (known as grains) from both
    the target sound and the source database, then searching for the closest
    matching grain in the source database to the target sound. Finally, the output
    is generated by overlap-adding the best matches.
    positional arguments:
      source                Directory of source files/database to take grains from
                            when synthesizing output
      target                Directory of target files/database to match source
                            grains to.
      output                Directory to use as database for outputing results and
                            match information. Output audio will be stored in the
                            /audio sub-directory and match data will be stored in
                            the /data directory.
    optional arguments:
      -h, --help            show this help message and exit
      --src-db              Specifies the directory to create the source database
                            and store analyses in. If not specified then the
    ...
 For this demonstration, the following file structure will be used:
 .. code:: bash
    /Users/samuelperry/concatenator_test/
    |-- source_db
    |   |-- Trumpet.novib.ff.A3.stereo.aif
    |   |-- Trumpet.novib.ff.A4.stereo.aif
    |   |-- Trumpet.novib.ff.A5.stereo.aif
    ...
    |   |-- Trumpet.novib.ff.F5.stereo.aif
    |   |-- Trumpet.novib.ff.G3.stereo.aif
    |   `-- Trumpet.novib.ff.G4.stereo.aif
    `-- target_db
        |-- target.01.wav
        |-- target.02.wav
        |-- target.03.wav
        `-- target.04.wav
 A source database containing a small selection of trumpet samples (aquired from
 http://theremin.music.uiowa.edu/MIS.html) will be used to match grains with 4
 target sounds. This will produce 4 output files, one for each target sound.
 The following command is used to to generate the output:
 .. code:: bash
    concatenator ./source_db ./target_db ./output_db --src_db \
    ./analysed_source_db --tar_db ./analysed_tar_db
 The specified directories are searched recursively for audio files that are
 used as items in the database. These item are then matched and synthesized as
 explained in the :ref:`overview` section. Output is stored in the audio
 directory of the output database that has been created.
 This produces this directory structure:
 .. code:: bash
    /Users/samuelperry/concatenator_test/
    |-- analysed_source_db
    |   |-- audio
    |   |   |-- Trumpet.novib.ff.A3.stereo.aif -> (Symlink)
    |   |   |-- Trumpet.novib.ff.A4.stereo.aif -> (Symlink)
    |   |   |-- Trumpet.novib.ff.A5.stereo.aif -> (Symlink)
    ...
    |   |   |-- Trumpet.novib.ff.F5.stereo.aif -> (Symlink)
    |   |   |-- Trumpet.novib.ff.G3.stereo.aif -> (Symlink)
    |   |   `-- Trumpet.novib.ff.G4.stereo.aif -> (Symlink)
    |   `-- data
    |       `-- analysis_data.hdf5
    |-- analysed_tar_db
    |   |-- audio
    |   |   |-- target.01.wav -> (Symlink)
    |   |   |-- target.02.wav -> (Symlink)
    |   |   |-- target.03.wav -> (Symlink)
    |   |   `-- target.04.wav -> (Symlink)
    |   `-- data
    |       `-- analysis_data.hdf5
    |-- output_db
    |   |-- audio
    |   |   |-- target.01_output.wav
    |   |   |-- target.02_output.wav
    |   |   |-- target.03_output.wav
    |   |   `-- target.04_output.wav
    |   `-- data
    |       `-- analysis_data.hdf5
    |-- source_db
    |   |-- Trumpet.novib.ff.A3.stereo.aif
    |   |-- Trumpet.novib.ff.A4.stereo.aif
    |   |-- Trumpet.novib.ff.A5.stereo.aif
    |   |-- Trumpet.novib.ff.F5.stereo.aif
    ...
    |   |-- Trumpet.novib.ff.G3.stereo.aif
    |   `-- Trumpet.novib.ff.G4.stereo.aif
    `-- target_db
        |-- target.01.wav
        |-- target.02.wav
        |-- target.03.wav
        `-- target.04.wav
 By using the --src_db and --tar_db flags, alternative locations are specified
 for generating the databases and storing analysis data. Symbolic links are
 created, referencing the original audio files without moving them.  This allows
 large databases to be used in place without copying or moving it's content.
 Alternatively, databases can be generated in place by ommiting the --src_db and
 --tar_db flags. this will create the database directory structure directly in
 the directories provided as source and target.
 The --copy flag can be used in conjunction with these flags in order to create
 actual copies of the audio files at the destinations. This allows for the
 creation of partable databases that can moved to other machines without
 breaking links to the original files. (Any pre-existing symbolic links will be
 overwritten with hard copies when using this option.)
 config.py
 ---------
 The config.py file is used for specifying all user defined options and can be
 edited in the concatenator project directory. Comments explain the function of
 each parameter. The default config.py file looks like this:
 .. code:: python
    # Specify analysis parameters for root mean square analysis.
    rms = {
        "window_size": 70,
        "overlap": 2,
    }
    # Specify analysis parameters for variance analysis.
    variance = {
        "window_size": 70,
        "overlap": 2
    }
    # Specify analysis parameters for temporal kurtosis analysis.
    kurtosis = {
        "window_size": 70,
        "overlap": 2
    }
    # Specify analysis parameters for temporal skewness analysis.
    skewness = {
        "window_size": 70,
        "overlap": 2
    }
    # Specify analysis parameters for FFT analysis.
    fft = {
        "window_size": 65536
    }
    database = {
        # Enables creation of symbolic links to files not in the database rather
        # than making pysical copies.
        "symlink": True
    }
    # Sets the weighting for each analysis. a higher weighting gives an analysis
    # higher presendence when finding the best matches.
    matcher_weightings = {
        "f0" : 1.,
        "spccntr" : 1.,
        "spcsprd" : 1.,
        "spcflux" : 1.,
        "spccf" : 1.,
        "spcflatness": 1.,
        "zerox" : 1.,
        "rms" : 1.,
        "peak": 1.,
        "centroid": 1.,
        "kurtosis": 1.,
        "skewness": 1.,
        "variance": 3.,
        "harm_ratio": 1.
    }
    # Specifies the method for averaging analysis frames to create a single value
    # for comparing to other grains. Possible formatters are: 'mean', 'median',
    # 'log2_mean', 'log2_median'
    analysis_dict = {
        "f0": "log2_median",
        "rms": "mean",
        "zerox": "mean",
        "spccntr": "mean",
        "spcsprd": "mean",
        "spcflux": "mean",
        "spccf": "mean",
        "spcflatness": "mean",
        "peak": "mean",
        "centroid": "mean",
        "kurtosis": "mean",
        "skewness": "mean",
        "variance": "mean",
        "harm_ratio": "mean"
    }
    analysis = {
        # Force the deletion of any pre-existing analyses to create new ones. This
        # is needed for overwriting old analyses generated with different
        # parameters to the current ones.
        "reanalyse": False
    }
    matcher = {
        # Force the re-matching of analyses
        "rematch": True,
        "grain_size": 70,
        "overlap": 2,
        # Defines the number of matches to keep for synthesis. Note that this must
        # also be specified in the synthesis config
        "match_quantity": 1,
        # Choose the algorithm used to perform matching. kdtree is recommended for
        # larger datasets.
        "method": 'kdtree'
    }
    synthesizer = {
        # Artificially scale the output grain by the difference in RMS values
        # between source and target.
        "enforce_rms": True,
        # Specify the ratio limit that is the grain can be scaled by.
        "enf_rms_ratio_limit": 100.,
        # Artificially modify the pitch by the difference in f0 values between
        # source and target.
        "enforce_f0": True,
        # Specify the ratio limit that is the grain can be modified by.
        "enf_f0_ratio_limit": 10.,
        "grain_size": 70,
        "overlap": 2,
        # Normalize output, avoid clipping of final output by scaling the final
        # frames.
        "normalize" : True,
        # Defines the number of potential grains to choose from matches when
        # synthesizing output.
        "match_quantity": 1
    }
    output_file = {
        "samplerate": 44100,
        "format": 131075,
        "channels": 1
    }
 Configuration Flags
 -------------------
 For quick modification of analysis parameters, parameter flags can be specified
 directly when calling the script. For example:
 .. code:: bash
    concatenator ./source_db ./target_db ./output_db --src_db \
    ./analysed_source_db --tar_db ./analysed_tar_db --reanalyse --fft \
    '--window_size 2048'
 This overwrites the value specified for window_size in the config file with the
 value provided.
 When databases have already been created, previous data is used when re-running
 the script over them. This allows for different databases to be used without
 continuous reanalysis. However, if analysis or matching parameters are changed,
 the "--reanalyse" and "--rematch" flags can be used to force the overwriting of
 old data, using the new parameters.
@@ -695,7 +695,8 @@ class DatabaseTests(globalTests):
        # Create database object
        database = AudioDatabase(
            "./.test_db",
-            analysis_list=["rms", "zerox", "fft", "spccntr", "spcsprd", "f0"]
+            analysis_list=["rms", "zerox", "fft", "spccntr", "spcsprd", "f0"],
            config=config
        )
        # Create/load a pre-existing database
        database.load_database(reanalyse=True)
@@ -736,6 +737,7 @@ class MatcherTests(globalTests):
        # Create database object
        self.database1 = AudioDatabase(
            "./.test_db1",
            config=config
        )
        # Create/load a pre-existing database
        self.database1.load_database(reanalyse=True)
@@ -762,6 +764,7 @@ class MatcherTests(globalTests):
        # Create database object
        self.database2 = AudioDatabase(
            "./.test_db2",
            config=config
        )
        # Create/load a pre-existing database
        self.database2.load_database(reanalyse=True)
@@ -88,3 +88,7 @@ output_file = {
    "format": 131075,
    "channels": 1
 }
 database = {
    "symlink": True
 }