Fixed rematching flag in kdtree and added match algorithm selection flag

2016-03-25 12:38:40 +00:00
parent 997a91ce1b
commit fec05bbcb7
4 changed files with 59 additions and 22 deletions
@@ -14,6 +14,14 @@ import json
 modpath = sys.argv[0]
 modpath = os.path.splitext(modpath)[0]+'.log'

+class SmartFormatter(argparse.HelpFormatter):
+
+    def _split_lines(self, text, width):
+        # this is the RawTextHelpFormatter._split_lines
+        if text.startswith('R|'):
+            return text[2:].splitlines()
+        return argparse.HelpFormatter._split_lines(self, text, width)
+
 def parse_sub_args(args, analysis):
    try:
        args = getattr(args, analysis)
@@ -53,6 +61,7 @@ def parse_arguments():
    # TODO: Write program description.
    parser = argparse.ArgumentParser(
        description='',
+        formatter_class=SmartFormatter
    )

    parser.add_argument(
@@ -157,7 +166,7 @@ def parse_arguments():
    parser.add_argument(
        "--enforcef0",
        action="store_true",
-        help="This flag enables pitch shifting of matched grainsto better match the target."
+        help="This flag enables pitch shifting of matched grains to better match the target."
    )

    parser.add_argument(
@@ -166,12 +175,24 @@ def parse_arguments():
        help="This flag enables scaling of matched grains to better match the target's volume."
    )

+    parser.add_argument(
+        "--match_method",
+        type=str,
+        metavar='',
+        help="R|Choose the algorithm to use when matching analyses. Available "
+        "algorithms are:\nBrute force: \'bruteforce\'\nK-d Tree Search: "
+        "'kdtree'",
+    )
+
    parser.add_argument('--verbose', '-v', action='count')

    args = parser.parse_args()
    for item in config_items:
        parse_sub_args(args, item)

+    if args.match_method:
+        config.matcher["method"] = args.match_method
+
    if args.rematch:
        config.matcher["rematch"] = True

@@ -251,9 +272,14 @@ def main():
        rematch=args.rematch
    )

+    match_method_dict = {
+        'bruteforce': matcher.brute_force_matcher,
+        'kdtree': matcher.kdtree_matcher
+    }
+
    # Perform matching on databases using the method specified.
    matcher.match(
-        matcher.knn_matcher,
+        match_method_dict[config.matcher["method"]],
        grain_size=config.matcher["grain_size"],
        overlap=config.matcher["overlap"]
    )
@@ -1,21 +1,21 @@
 rms = {
    "window_size": 70,
-    "overlap": 2,
+    "overlap": 8,
 }

 variance = {
    "window_size": 70,
-    "overlap": 2
+    "overlap": 8
 }

 kurtosis = {
    "window_size": 70,
-    "overlap": 2
+    "overlap": 8
 }

 skewness = {
    "window_size": 70,
-    "overlap": 2
+    "overlap": 8
 }

 fft = {
@@ -64,10 +64,13 @@ analysis = {
 matcher = {
    "rematch": True,
    "grain_size": 70,
-    "overlap": 2,
+    "overlap": 8,
    # Defines the number of matches to keep for synthesis. Note that this must
    # also be specified in the synthesis config
-    "match_quantity": 1
+    "match_quantity": 1,
+    # Choose the algorithm used to perform matching. kdtree is recommended for
+    # larger datasets.
+    "method": 'kdtree'
 }

 synthesizer = {
@@ -76,7 +79,7 @@ synthesizer = {
    "enforce_f0": True,
    "enf_f0_ratio_limit": 10.,
    "grain_size": 70,
-    "overlap": 2,
+    "overlap": 8,
    "normalize" : True,
    # Defines the number of potential grains to choose from matches when
    # synthesizing output.
@@ -355,18 +355,16 @@ class Matcher:
        grain_indexes[:, 0] = grain_indexes[:, 1] - grain_indexes[:, 0]
        return grain_indexes

-    def knn_matcher(self, grain_size, overlap):
+    def kdtree_matcher(self, grain_size, overlap):
        # Count grains of the source database
        source_sample_indexes = self.count_grains(self.source_db, grain_size, overlap)
        try:
            self.output_db.data.create_group("match")
        except ValueError:
-            self.logger.debug("Match group already exists in the {0} HDF5 file.".format(self.output_db))
+            self.logger.info("Match group already exists in the {0} HDF5 file.".format(self.output_db))

        if self.rematch:
            self.output_db.data["match"].clear()
-        #
-        final_match_indexes = []

        if self.config:
            weightings = self.config.matcher_weightings
@@ -374,6 +372,14 @@ class Matcher:
            weightings = {x: 1. for x in self.matcher_analyses}

        for tind, target_entry in enumerate(self.target_db.analysed_audio):
+            # Check if match data already exists and use it rather than
+            # regenerating if it does.
+            if target_entry.name in self.output_db.data["match"].keys():
+                self.logger.info("Match data already exists for {0}. Using this "
+                                 "data. Run with the \'--rematch\' flag to "
+                                 "overwrite.".format(self.output_db))
+                continue
+
            # Create an array of grain times for target sample
            target_times = target_entry.generate_grain_times(grain_size, overlap, save_times=True)
            x_size = target_times.shape[0]
@@ -381,6 +387,7 @@ class Matcher:
            match_vals = np.empty((x_size, self.match_quantity))
            match_vals.fill(np.inf)

+            # Allocate memory for target analyses.
            all_target_analyses = np.empty((len(self.matcher_analyses), target_times.shape[0]))

            for i, analysis in enumerate(self.matcher_analyses):
@@ -390,10 +397,8 @@ class Matcher:
                all_target_analyses[i] = target_data

            imp = Imputer(axis=1)
+            # Impute values for Nans
            all_target_analyses = imp.fit_transform(all_target_analyses)
-            # all_target_analyses[np.isnan(all_target_analyses)] = np.inf
-            # all_target_analyses = np.nan_to_num(all_target_analyses)
-

            for sind, source_entry in enumerate(self.source_db.analysed_audio):
                # Create an array of grain times for source sample
@@ -406,10 +411,9 @@ class Matcher:
                    source_data, s = source_entry.analysis_data_grains(source_times, analysis, format=analysis_formatting)
                    all_source_analyses[i] = source_data

-                self.logger.info("Matching \"{0}\" for: {1} to {2}".format(analysis, source_entry.name, target_entry.name))
-                # all_source_analyses[np.isnan(all_source_analyses)] = np.inf
-                # all_source_analyses = np.nan_to_num(all_source_analyses)
+                self.logger.info("K-d Tree Matching: {0} to {1}".format(source_entry.name, target_entry.name))

+                # Impute values for Nans
                all_source_analyses = imp.fit_transform(all_source_analyses)

                source_tree = spatial.cKDTree(all_source_analyses.T, leafsize=100)
@@ -458,8 +462,6 @@ class Matcher:

        if self.rematch:
            self.output_db.data["match"].clear()
-        #
-        final_match_indexes = []

        if self.config:
            weightings = self.config.matcher_weightings
@@ -86,7 +86,13 @@ concatenate.py Script Usage
                      
 --enforcerms          This flag enables scaling of matched grains to better
                      match the target's volume.
-                      
+
+--match_method        Choose the algorithm to use when matching analyses. Available algorithms are:
+
+                         Brute force: 'bruteforce'
+
+                         K-d Tree Search: 'kdtree'
+
 --verbose, -v         Specify the verbosity of the script's output. Additional
                      v will produce greater levels of detail ie. -vvvvv will 
                      produce all messages.