flake8 checks

2016-03-11 20:52:21 -05:00
parent 60cf92c9f9
commit d85cce0645
35 changed files with 213 additions and 147 deletions
@@ -4,9 +4,24 @@ doc-warnings: yes

 ignore-paths:
  - mlxtend/data
+  - mlxtend/externals

 ignore-patterns:
    - ^example/doc_.*\.py$
    - (^|/)docs(/|$)
    - __init__.py

+pylint:
+  options:
+    dummy-variables-rgx: _$|.+_$|dummy_.+
+  disable:
+    - missing-docstring
+    - protected-access
+    - too-few-public-methods
+    - too-many-arguments
+    - too-many-instance-attributes
+    - too-many-locals
+    - too-many-public-methods
+    - too-many-return-statements
+    - too-many-statements
+    - unpacking-non-sequence
@@ -12,6 +12,7 @@ from .base import _BaseClassifier


 class Adaline(_BaseClassifier):
+
    """ADAptive LInear NEuron classifier.

    Parameters
@@ -94,9 +95,10 @@ class Adaline(_BaseClassifier):
            self.thres_ = 0.5

        if init_weights:
-            self.w_ = self._init_weights(shape=1 + X.shape[1],
-                                         zero_init_weight=self.zero_init_weight,
-                                         seed=self.random_seed)
+            self.w_ = self._init_weights(
+                shape=1 + X.shape[1],
+                zero_init_weight=self.zero_init_weight,
+                seed=self.random_seed)

        self.cost_ = []

@@ -125,7 +127,7 @@ class Adaline(_BaseClassifier):
                cost = self._sum_squared_error_cost(y, self._activation(X))
                self.cost_.append(cost)
                if self.print_progress:
-                    self._print_progress(epoch=i+1, cost=cost)
+                    self._print_progress(epoch=i + 1, cost=cost)

        return self

@@ -12,11 +12,17 @@ from time import time


 class _BaseClassifier(object):
-    """Parent Class Base Classifier"""
+
+    """Parent Class Base Classifier
+
+    A base class that is important by
+    classifier child classes.
+
+    """
    def __init__(self, print_progress=0):
        self.print_progress = print_progress

-    def fit(self, X, y):
+    def fit(self, X, y, init_weights=True):
        """Learn weight coefficients from training data.

        Parameters
@@ -26,12 +32,18 @@ class _BaseClassifier(object):
            n_features is the number of features.
        y : array-like, shape = [n_samples]
            Target values.
+        init_weights : bool (default: None)
+            Reinitialize weights

        Returns
        -------
        self : object

        """
+        if not (init_weights is None or isinstance(init_weights, bool)):
+            raise AttributeError("init_weights must be True, False, or None")
+        init_weights
+        self._check_arrays(X=X, y=y)
        return self

    def predict(self, X):
@@ -19,6 +19,7 @@ import numpy as np


 class EnsembleVoteClassifier(BaseEstimator, ClassifierMixin, TransformerMixin):
+    
    """Soft Voting/Majority Rule classifier for scikit-learn estimators.

    Parameters
@@ -168,8 +169,7 @@ class EnsembleVoteClassifier(BaseEstimator, ClassifierMixin, TransformerMixin):
        else:  # 'hard' voting
            predictions = self._predict(X)

-            maj = np.apply_along_axis(
-                                      lambda x:
+            maj = np.apply_along_axis(lambda x:
                                      np.argmax(np.bincount(x,
                                                weights=self.weights)),
                                      axis=1,
@@ -12,6 +12,7 @@ from .base import _BaseClassifier


 class LogisticRegression(_BaseClassifier):
+
    """Logistic regression classifier.

    Parameters
@@ -51,7 +52,7 @@ class LogisticRegression(_BaseClassifier):
        epoch.

    """
-    def __init__(self, eta=0.01, epochs=50, regularization=None,
+    def __init__(self, eta=0.01, epochs=50,
                 l2_lambda=0.0, minibatches=1,
                 random_seed=None, zero_init_weight=False,
                 print_progress=0):
@@ -88,9 +89,10 @@ class LogisticRegression(_BaseClassifier):
            raise ValueError('Supports only binary class labels 0 and 1')

        if init_weights:
-            self.w_ = self._init_weights(shape=1 + X.shape[1],
-                                         zero_init_weight=self.zero_init_weight,
-                                         seed=self.random_seed)
+            self.w_ = self._init_weights(
+                shape=1 + X.shape[1],
+                zero_init_weight=self.zero_init_weight,
+                seed=self.random_seed)

        self.m_ = len(self.w_)
        self.cost_ = []
@@ -117,7 +119,7 @@ class LogisticRegression(_BaseClassifier):
            cost = self._logit_cost(y, self._activation(X))
            self.cost_.append(cost)
            if self.print_progress:
-                self._print_progress(epoch=i+1, cost=cost)
+                self._print_progress(epoch=i + 1, cost=cost)
        return self

    def _predict(self, X):
@@ -9,11 +9,11 @@
 import numpy as np
 from .base import _BaseClassifier
 from scipy.special import expit
-import sys
 from time import time


 class NeuralNetMLP(_BaseClassifier):
+
    """ Feedforward neural network / Multi-layer perceptron classifier.

    Parameters
@@ -122,11 +122,11 @@ class NeuralNetMLP(_BaseClassifier):

    def _initialize_weights(self):
        """Initialize weights with small random numbers."""
-        w1 = self._init_weights(shape=self.n_hidden*(self.n_features + 1),
+        w1 = self._init_weights(shape=self.n_hidden * (self.n_features + 1),
                                zero_init_weight=self.zero_init_weight,
                                seed=self.random_seed)
        w1 = w1.reshape(self.n_hidden, self.n_features + 1)
-        w2 = self._init_weights(shape=self.n_output*(self.n_hidden + 1),
+        w2 = self._init_weights(shape=self.n_output * (self.n_hidden + 1),
                                zero_init_weight=self.zero_init_weight,
                                seed=self.random_seed)
        w2 = w2.reshape(self.n_output, self.n_hidden + 1)
@@ -148,10 +148,10 @@ class NeuralNetMLP(_BaseClassifier):
    def _add_bias_unit(self, X, how='column'):
        """Add bias unit (column or row of 1s) to array at index 0."""
        if how == 'column':
-            X_new = np.ones((X.shape[0], X.shape[1]+1))
+            X_new = np.ones((X.shape[0], X.shape[1] + 1))
            X_new[:, 1:] = X
        elif how == 'row':
-            X_new = np.ones((X.shape[0]+1, X.shape[1]))
+            X_new = np.ones((X.shape[0] + 1, X.shape[1]))
            X_new[1:, :] = X
        else:
            raise AttributeError('how must be columns or row')
@@ -193,12 +193,12 @@ class NeuralNetMLP(_BaseClassifier):

    def _L2_reg(self, lambda_, w1, w2):
        """Compute L2-regularization cost."""
-        return ((lambda_/2.0) * (np.sum(w1[:, 1:] ** 2) +
+        return ((lambda_ / 2.0) * (np.sum(w1[:, 1:] ** 2) +
                np.sum(w2[:, 1:] ** 2)))

    def _L1_reg(self, lambda_, w1, w2):
        """Compute L1-regularization cost."""
-        return ((lambda_/2.0) * (np.abs(w1[:, 1:]).sum() +
+        return ((lambda_ / 2.0) * (np.abs(w1[:, 1:]).sum() +
                np.abs(w2[:, 1:]).sum()))

    def _get_cost(self, y_enc, output, w1, w2):
@@ -306,7 +306,7 @@ class NeuralNetMLP(_BaseClassifier):
        for i in range(self.epochs):

            # adaptive learning rate
-            self.eta /= (1 + self.decrease_const*i)
+            self.eta /= (1 + self.decrease_const * i)

            if self.shuffle_epoch:
                idx = np.random.permutation(y_enc.shape[1])
@@ -342,7 +342,7 @@ class NeuralNetMLP(_BaseClassifier):
                delta_w1_prev, delta_w2_prev = delta_w1, delta_w2

                if self.print_progress:
-                    self._print_progress(epoch=i+1)
+                    self._print_progress(epoch=i + 1)

        return self

@@ -364,7 +364,7 @@ class NeuralNetMLP(_BaseClassifier):
                a1, z2, a2, z3, a3 = self._feedforward(X,
                                                       w1 - epsilon_ary1,
                                                       w2)
-                cost1 = self._get_cost(y_enc, a3, w1-epsilon_ary1, w2)
+                cost1 = self._get_cost(y_enc, a3, w1 - epsilon_ary1, w2)
                a1, z2, a2, z3, a3 = self._feedforward(X,
                                                       w1 + epsilon_ary1,
                                                       w2)
@@ -12,6 +12,7 @@ from .base import _BaseClassifier


 class Perceptron(_BaseClassifier):
+    
    """Perceptron classifier.

    Parameters
@@ -82,9 +83,10 @@ class Perceptron(_BaseClassifier):
                             ' class labels {0, 1} or {-1, 1}.')

        if init_weights:
-            self.w_ = self._init_weights(shape=1 + X.shape[1],
-                                         zero_init_weight=self.zero_init_weight,
-                                         seed=self.random_seed)
+            self.w_ = self._init_weights(
+                shape=1 + X.shape[1],
+                zero_init_weight=self.zero_init_weight,
+                seed=self.random_seed)

        self.cost_ = []

@@ -105,7 +107,7 @@ class Perceptron(_BaseClassifier):
                errors += int(update != 0.0)

            if self.print_progress:
-                self._print_progress(epoch=i+1, cost=errors)
+                self._print_progress(epoch=i + 1, cost=errors)
            self.cost_.append(errors)
        return self

@@ -14,6 +14,7 @@ from .base import _BaseClassifier


 class SoftmaxRegression(_BaseClassifier):
+
    """Logistic regression classifier.

    Parameters
@@ -72,11 +73,6 @@ class SoftmaxRegression(_BaseClassifier):
            mat[i, val] = 1
        return mat.astype(float)

-    def _init_bias(self, n_features, n_classes):
-        w = np.zeros((n_features, n_classes))
-        b = np.zeros(n_classes)
-        return w, b
-
    def _net_input(self, X, W, b):
        return (X.dot(W) + b)

@@ -113,13 +109,14 @@ class SoftmaxRegression(_BaseClassifier):
        if init_weights:
            self._n_classes = np.max(y) + 1
            self._n_features = X.shape[1]
-            self.w_ = self._init_weights(shape=(self._n_features,
-                                                self._n_classes),
-                                         zero_init_weight=self.zero_init_weight,
-                                         seed=self.random_seed)
-            self.b_ = self._init_weights(shape=self._n_classes,
-                                         zero_init_weight=self.zero_init_weight,
-                                         seed=self.random_seed)
+            self.w_ = self._init_weights(
+                shape=(self._n_features, self._n_classes),
+                zero_init_weight=self.zero_init_weight,
+                seed=self.random_seed)
+            self.b_ = self._init_weights(
+                shape=self._n_classes,
+                zero_init_weight=self.zero_init_weight,
+                seed=self.random_seed)
            self.cost_ = []

        n_idx = list(range(y.shape[0]))
@@ -162,7 +159,7 @@ class SoftmaxRegression(_BaseClassifier):
            self.cost_.append(cost)

            if self.print_progress:
-                self._print_progress(epoch=i+1, cost=cost)
+                self._print_progress(epoch=i + 1, cost=cost)

        return self

@@ -18,6 +18,7 @@ import numpy as np


 class StackingClassifier(BaseEstimator, ClassifierMixin, TransformerMixin):
+    
    """A Stacking classifier for scikit-learn estimators for classification.

    Parameters
@@ -31,7 +31,7 @@ def test_array_dimensions():


 def test_normal_equation():
-    t1 = np.array([-5.21e-16,  -7.86e-02,   1.02e+00])
+    t1 = np.array([-5.21e-16, -7.86e-02, 1.02e+00])
    ada = Adaline(epochs=30,
                  eta=0.01,
                  minibatches=None,
@@ -42,7 +42,7 @@ def test_normal_equation():


 def test_gradient_descent():
-    t1 = np.array([-5.21e-16,  -7.86e-02,   1.02e+00])
+    t1 = np.array([-5.21e-16, -7.86e-02, 1.02e+00])
    ada = Adaline(epochs=30,
                  eta=0.01,
                  minibatches=1,
@@ -53,7 +53,7 @@ def test_gradient_descent():


 def test_refit_weights():
-    t1 = np.array([-5.21e-16,  -7.86e-02,   1.02e+00])
+    t1 = np.array([-5.21e-16, -7.86e-02, 1.02e+00])
    ada = Adaline(epochs=15,
                  eta=0.01,
                  minibatches=1,
@@ -65,7 +65,7 @@ def test_refit_weights():


 def test_standardized_iris_data_with_zero_weights():
-    t1 = np.array([-5.21e-16,  -7.86e-02,   1.02e+00])
+    t1 = np.array([-5.21e-16, -7.86e-02, 1.02e+00])
    ada = Adaline(epochs=30,
                  eta=0.01,
                  minibatches=1,
@@ -77,7 +77,7 @@ def test_standardized_iris_data_with_zero_weights():


 def test_stochastic_gradient_descent():
-    t1 = np.array([-5.21e-16,  -7.86e-02,   1.02e+00])
+    t1 = np.array([-5.21e-16, -7.86e-02, 1.02e+00])
    ada = Adaline(epochs=30,
                  eta=0.01,
                  minibatches=len(y),
@@ -98,7 +98,7 @@ def test_ary_persistency_in_shuffling():


 def test_0_1_class():
-    t1 = np.array([0.51, -0.04,  0.51])
+    t1 = np.array([0.51, -0.04, 0.51])
    ada = Adaline(epochs=30,
                  eta=0.01,
                  minibatches=1,
@@ -9,8 +9,6 @@ from mlxtend.data import iris_data
 import numpy as np


-#### Binary
-
 X, y = iris_data()
 X = X[:, [0, 3]]  # sepal length and petal width
 X_bin = X[0:100]  # class 0 and class 1
@@ -77,8 +75,6 @@ def test_multi_logistic_regression_gd_weights():


 def test_multi_logistic_regression_gd_acc():
-    t = np.array([[-0.17, -2.86, 3.51],
-                  [-4.85, 2.0, 0.35]])
    lr = SoftmaxRegression(epochs=200,
                           eta=0.005,
                           minibatches=1,
@@ -62,7 +62,7 @@ def plot_decision_regions(X, y, clf,

    if not y.dtype == int:
        y = y.astype(int)
-        
+
    # check if test data is provided
    plot_testdata = True
    if not isinstance(X_highlight, np.ndarray):
@@ -146,7 +146,7 @@ def plot_decision_regions(X, y, clf,
                       X_highlight[:, 1],
                       c='',
                       alpha=1.0,
-                       linewidth=1,
+                       linewidths=1,
                       marker='o',
                       s=80)
        else:
@@ -154,7 +154,7 @@ def plot_decision_regions(X, y, clf,
                       [0 for i in X_highlight],
                       c='',
                       alpha=1.0,
-                       linewidth=1,
+                       linewidths=1,
                       marker='o',
                       s=80)

@@ -120,7 +120,7 @@ def plot_learning_curves(X_train, y_train,
            plt.ylabel('Performance ({})'.format(scoring))
            if print_model:
                plt.title('Learning Curves\n\n{}\n'.format(model))
-            plt.legend(loc='best', numpoints=1)
+            plt.legend(loc=legend_loc, numpoints=1)
            plt.xlim([0, 110])
            max_y = max(max(test_errors), max(training_errors))
            min_y = min(min(test_errors), min(training_errors))
@@ -145,9 +145,9 @@ def scoring(y_target, y_predicted, metric='error',
        elif metric == 'f1':
            pre = float(tp) / (tp + fp)
            rec = float(tp) / (fn + tp)
-            res = 2.0 * (pre * rec)/(pre + rec)
+            res = 2.0 * (pre * rec) / (pre + rec)
        elif metric == 'matthews_corr_coef':
-            res = float(tp*tn - fp*fn)
-            res = res / np.sqrt((tp + fp)*(tp + fn)*(tn + fp)*(tn + fn))
+            res = float(tp * tn - fp * fn)
+            res = res / np.sqrt((tp + fp) * (tp + fn) * (tn + fp) * (tn + fn))

    return res
@@ -7,6 +7,7 @@
 from mlxtend.evaluate import scoring
 import numpy as np

+
 def test_metric_argument():
    "Test exception is raised when user provides invalid metric argument"
    try:
@@ -8,8 +8,10 @@


 class ColumnSelector(object):
-    """ A feature selector for scikit-learn's Pipeline class that returns
-        specified columns from a numpy array.
+    """Select specific columns from a data set.
+
+    A feature selector for scikit-learn's Pipeline class that returns
+    specified columns from a numpy array.

    """
    def __init__(self, cols):
@@ -20,10 +20,10 @@ from sklearn.base import BaseEstimator
 from sklearn.base import MetaEstimatorMixin
 from sklearn.cross_validation import cross_val_score
 from ..externals.name_estimators import _name_estimators
-from ..externals import six


 class SequentialFeatureSelector(BaseEstimator, MetaEstimatorMixin):
+
    """Sequential Feature Selection for Classification and Regression.

    Parameters
@@ -133,29 +133,29 @@ class SequentialFeatureSelector(BaseEstimator, MetaEstimatorMixin):
            prev_subset = set(k_idx)
            if self.forward:
                k_idx, k_score, cv_scores = \
-                         self._inclusion(orig_set=orig_set,
-                                         subset=prev_subset,
-                                         X=X, y=y)
+                    self._inclusion(orig_set=orig_set,
+                                    subset=prev_subset,
+                                    X=X, y=y)
            else:
                k_idx, k_score, cv_scores = \
-                         self._exclusion(feature_set=prev_subset, X=X, y=y)
+                    self._exclusion(feature_set=prev_subset, X=X, y=y)

            if self.floating and not self._is_stuck(sdq):
                (new_feature,) = set(k_idx) ^ prev_subset
                if self.forward:
                    k_idx_c, k_score_c, cv_scores_c = \
-                            self._exclusion(feature_set=k_idx,
-                                            fixed_feature=new_feature,
-                                            X=X, y=y)
+                        self._exclusion(feature_set=k_idx,
+                                        fixed_feature=new_feature,
+                                        X=X, y=y)
                else:
                    k_idx_c, k_score_c, cv_scores_c = \
-                            self._inclusion(orig_set=orig_set - {new_feature},
-                                            subset=set(k_idx),
-                                            X=X, y=y)
+                        self._inclusion(orig_set=orig_set - {new_feature},
+                                        subset=set(k_idx),
+                                        X=X, y=y)

                if k_score_c and k_score_c > k_score:
                    k_idx, k_score, cv_scores = \
-                                k_idx_c, k_score_c, cv_scores_c
+                        k_idx_c, k_score_c, cv_scores_c

            k = len(k_idx)
            # floating can lead to multiple same-sized subsets
@@ -168,7 +168,7 @@ class SequentialFeatureSelector(BaseEstimator, MetaEstimatorMixin):

            if self.print_progress:
                sys.stderr.write('\rFeatures: %d/%d' % (
-                        len(k_idx), self.k_features))
+                    len(k_idx), self.k_features))
                sys.stderr.flush()

        self.k_feature_idx_ = k_idx
@@ -221,7 +221,7 @@ class SequentialFeatureSelector(BaseEstimator, MetaEstimatorMixin):
            all_avg_scores = []
            all_cv_scores = []
            all_subsets = []
-            for p in combinations(feature_set, r=n-1):
+            for p in combinations(feature_set, r=n - 1):
                if fixed_feature and fixed_feature not in set(p):
                    continue
                cv_scores = self._calc_score(X, y, p)
@@ -14,7 +14,7 @@ from itertools import cycle


 def enrichment_plot(df, colors='bgrkcy', markers=' ', linestyles='-',
-                    alpha=0.5, lw=2, legend=True, where='post', grid=True,
+                    alpha=0.5, lw=2, where='post', grid=True,
                    count_label='Count',
                    xlim='auto', ylim='auto', invert_axes=False,
                    legend_loc='best', ax=None):
@@ -37,8 +37,6 @@ def enrichment_plot(df, colors='bgrkcy', markers=' ', linestyles='-',
        Transparency level from 0.0 to 1.0.
    lw : int or float (default: 2)
        Linewidth parameter.
-    legend : bool (default: True)
-        Plots legend if True.
    where : {'post', 'pre', 'mid'} (default: 'post')
        Starting location of the steps.
    grid : bool (default: `True`)
@@ -74,7 +72,7 @@ def enrichment_plot(df, colors='bgrkcy', markers=' ', linestyles='-',
    color_gen = cycle(colors)
    marker_gen = cycle(markers)
    linestyle_gen = cycle(linestyles.split(','))
-    r = range(1, len(df_temp.index)+1)
+    r = range(1, len(df_temp.index) + 1)
    labels = df_temp.columns

    x_data = df_temp
@@ -99,13 +97,13 @@ def enrichment_plot(df, colors='bgrkcy', markers=' ', linestyles='-',
        ax.set_ylim, ax.set_xlim = ax.set_xlim, ax.set_ylim

    if ylim == 'auto':
-        ax.set_ylim([np.min(y_data)-1, np.max(y_data)+1])
+        ax.set_ylim([np.min(y_data) - 1, np.max(y_data) + 1])
    else:
        ax.set_ylim(ylim)

    if xlim == 'auto':
        df_min, df_max = np.min(x_data.min()), np.max(x_data.max())
-        ax.set_xlim([df_min-1, df_max+1])
+        ax.set_xlim([df_min - 1, df_max + 1])

    else:
        ax.set_xlim(xlim)
@@ -8,6 +8,7 @@


 class DenseTransformer(object):
+
    """Convert a sparse matrix into a dense matrix."""

    def __init__(self, some_param=True):
@@ -11,6 +11,7 @@ from .transformer import TransformerObj


 class MeanCenterer(TransformerObj):
+
    """Column centering of vectors and matrices.

    Attributes
@@ -22,7 +22,7 @@ def one_hot(y, num_labels='auto', dtype='float'):

    Returns
    ----------
-    onehot : numpy.ndarray, shape = [n_classlabels]
+    ary : numpy.ndarray, shape = [n_classlabels]
        One-hot encoded array, where each sample is represented as
        a row vector in the returned array.

@@ -41,5 +41,11 @@ def one_hot(y, num_labels='auto', dtype='float'):
    else:
        uniq = num_labels
    if uniq == 1:
-        return np.array([[0.]], dtype=dtype)
-    return (np.arange(uniq) == yt[:, None]).astype(dtype)
+        ary = np.array([[0.]], dtype=dtype)
+
+    else:
+        ary = np.zeros((len(y), uniq))
+        for i, val in enumerate(y):
+            ary[i, val] = 1
+
+    return ary.astype(dtype)
@@ -48,8 +48,8 @@ def minmax_scaling(array, columns, min_val=0, max_val=1):
    ary_newt[:, columns] = numerator / denominator

    if not min_val == 0 and not max_val == 1:
-            ary_newt[:, columns] = (ary_newt[:, columns] *
-                                    (max_val - min_val) + min_val)
+        ary_newt[:, columns] = (ary_newt[:, columns] *
+                                (max_val - min_val) + min_val)

    return ary_newt[:, columns]

@@ -46,13 +46,13 @@ def test_list():
@raises(AttributeError)
 def test_multidim_list():
    y = [[0, 1, 2, 3, 4, 2]]
-    out = one_hot(y)
+    one_hot(y)


@raises(AttributeError)
 def test_multidim_array():
    y = np.array([[0], [1], [2], [3], [4], [2]])
-    out = one_hot(y)
+    one_hot(y)


 def test_oneclass():
@@ -60,7 +60,7 @@ def test_oneclass():
                                  np.array([[0.]], dtype='float'))


-def test_list():
+def test_list_morelabels():
    y = [0, 1]
    expect = np.array([[1., 0., 0.],
                       [0., 1., 0.]], dtype='float')
@@ -13,7 +13,8 @@ import matplotlib.pyplot as plt
 import numpy as np


-def plot_linear_regression(X, y, model=LinearRegression(), corr_func='pearsonr',
+def plot_linear_regression(X, y, model=LinearRegression(),
+                           corr_func='pearsonr',
                           scattercolor='blue', fit_style='k--', legend=True,
                           xlim='auto'):
    """Plot a linear regression line fit.
@@ -12,11 +12,17 @@ from time import time


 class _BaseRegressor(object):
-    """Parent Class Base Regressor"""
+
+    """Parent Class Base Regressor
+
+    A base class that is important by
+    regressor child classes.
+
+    """
    def __init__(self, print_progress=0):
        self.print_progress = print_progress

-    def fit(self, X, y):
+    def fit(self, X, y, init_weights=True):
        """Learn weight coefficients from training data.

        Parameters
@@ -26,12 +32,18 @@ class _BaseRegressor(object):
            n_features is the number of features.
        y : array-like, shape = [n_samples]
            Target values.
+        init_weights : bool (default: None)
+            Reinitialize weights

        Returns
        -------
        self : object

        """
+        if not (init_weights is None or isinstance(init_weights, bool)):
+            raise AttributeError("init_weights must be True, False, or None")
+        init_weights
+        self._check_arrays(X=X, y=y)
        return self

    def predict(self, X):
@@ -58,8 +70,8 @@ class _BaseRegressor(object):

    def _shuffle(self, arrays):
        """Shuffle arrays in unison."""
-        r = np.random.permutation(len(y))
-        return [ary[r] for r in arrays]
+        r = np.random.permutation(len(arrays[0]))
+        return [ary[r] for ary in arrays]

    def _print_progress(self, epoch, cost=None, time_interval=10):
        if self.print_progress > 0:
@@ -7,7 +7,6 @@
 # License: BSD 3 clause

 import numpy as np
-from sys import stderr
 from time import time
 from .base import _BaseRegressor

@@ -19,10 +18,9 @@ from .base import _BaseRegressor
 #
 # License: BSD 3 clause

-import numpy as np
-

 class LinearRegression(_BaseRegressor):
+
    """ Ordinary least squares linear regression.

    Parameters
@@ -93,9 +91,10 @@ class LinearRegression(_BaseRegressor):

        # initialize weights
        if init_weights:
-            self.w_ = self._init_weights(shape=1 + X.shape[1],
-                                         zero_init_weight=self.zero_init_weight,
-                                         seed=self.random_seed)
+            self.w_ = self._init_weights(
+                shape=1 + X.shape[1],
+                zero_init_weight=self.zero_init_weight,
+                seed=self.random_seed)

        self.cost_ = []

@@ -124,7 +123,7 @@ class LinearRegression(_BaseRegressor):
                cost = self._sum_squared_error_cost(y, self.activation(X))
                self.cost_.append(cost)
                if self.print_progress:
-                    self._print_progress(epoch=i+1, cost=cost)
+                    self._print_progress(epoch=i + 1, cost=cost)

        return self

@@ -135,11 +134,6 @@ class LinearRegression(_BaseRegressor):
        w = np.dot(z, np.dot(Xb.T, y))
        return w

-    def _shuffle(self, X, y):
-        """Unison shuffling."""
-        r = np.random.permutation(len(y))
-        return X[r], y[r]
-
    def net_input(self, X):
        """Compute the linear net input."""
        return np.dot(X, self.w_[1:]) + self.w_[0]
@@ -18,6 +18,7 @@ import numpy as np


 class StackingRegressor(BaseEstimator, RegressorMixin, TransformerMixin):
+
    """A Stacking regressor for scikit-learn estimators for regression.

    Parameters
@@ -4,12 +4,10 @@
 #
 # License: BSD 3 clause

-from mlxtend.data import boston_housing_data
 from mlxtend.regressor import StackingRegressor
 from sklearn.linear_model import LinearRegression
 from sklearn.linear_model import Ridge
 from sklearn.svm import SVR
-from mlxtend.data import boston_housing_data
 import numpy as np
 from sklearn.grid_search import GridSearchCV
 from numpy.testing import assert_almost_equal
@@ -31,7 +29,7 @@ def test_different_models():
    svr_rbf = SVR(kernel='rbf')
    stregr = StackingRegressor(regressors=[svr_lin, lr, ridge],
                               meta_regressor=svr_rbf)
-    y_pred = stregr.fit(X1, y).predict(X1)
+    stregr.fit(X1, y).predict(X1)
    mse = 0.214
    got = np.mean((stregr.predict(X1) - y) ** 2)
    assert round(got, 3) == mse
@@ -44,7 +42,7 @@ def test_multivariate():
    svr_rbf = SVR(kernel='rbf')
    stregr = StackingRegressor(regressors=[svr_lin, lr, ridge],
                               meta_regressor=svr_rbf)
-    y_pred = stregr.fit(X2, y).predict(X2)
+    stregr.fit(X2, y).predict(X2)
    mse = 0.218
    got = np.mean((stregr.predict(X2) - y) ** 2)
    print(got)
@@ -129,4 +127,5 @@ def test_get_coeff_fail():
    stregr = StackingRegressor(regressors=[ridge, lr],
                               meta_regressor=svr_rbf)
    stregr = stregr.fit(X1, y)
-    got = stregr.coef_
+    r = stregr.coef_
+    assert r
@@ -14,4 +14,4 @@ from .tokenizer import tokenizer_words_and_emoticons
 from .tokenizer import tokenizer_emoticons

 __all__ = ["generalize_names", "generalize_names_duplcheck",
-            "tokenizer_words_and_emoticons", "tokenizer_emoticons"]
+           "tokenizer_words_and_emoticons", "tokenizer_emoticons"]
@@ -17,6 +17,7 @@ if sys.version_info <= (3, 0):
                      " with Python 2.x,"
                      " due to its unicode intricacies")

+
 def generalize_names(name, output_sep=' ', firstname_output_letters=1):
    """Generalize a person's first and last name.

@@ -48,15 +49,14 @@ def generalize_names(name, output_sep=' ', firstname_output_letters=1):
    exc = ['van der ', 'de ', 'van ', 'von ', 'di ']
    for e in exc:
        if name.startswith(e):
-            repl = e.replace(' ','')
-            name = (repl + name[len(e)-1:].strip())
+            repl = e.replace(' ', '')
+            name = (repl + name[len(e) - 1:].strip())

    exc = [' van der ', ' de ', ' van ', ' von ', ' di ',
-    ', van der ', ', de', ', van ', ', von ', ', di ']
+           ', van der ', ', de', ', van ', ', von ', ', di ']

    for e in exc:
-        name = name.replace(e, ' '+e.replace(' ', ''))
-
+        name = name.replace(e, ' ' + e.replace(' ', ''))

    if ',' in name:
        last, first = first, last
@@ -71,12 +71,15 @@ def generalize_names(name, output_sep=' ', firstname_output_letters=1):
    if sys.version_info.major == 2:
        name = name.decode('utf-8')

-    name = ''.join(x for x in unicodedata.normalize('NFKD', name) if x in string.ascii_letters+' ')
+    name = ''.join(x for x in unicodedata.normalize('NFKD', name)
+                   if x in string.ascii_letters + ' ')

    # get first and last name if applicable
    m = re.match('(?P<first>\w+)\W+(?P<last>\w+)', name)
    if m:
-        output = '%s%s%s' % (m.group(last), output_sep, m.group(first)[:firstname_output_letters])
+        output = '%s%s%s' % (m.group(last),
+                             output_sep,
+                             m.group(first)[:firstname_output_letters])
    else:
        output = name

@@ -87,21 +90,24 @@ def generalize_names(name, output_sep=' ', firstname_output_letters=1):
 def generalize_names_duplcheck(df, col_name):
    """ Generalizes names and removes duplicates.

-    Description : Applies mlxtend.text.generalize_names to a DataFrame with 1 first name letter
-    by default and uses more first name letters if duplicates are detected.
+    Description : Applies mlxtend.text.generalize_names to a DataFrame
+    with 1 first name letter by default
+    and uses more first name letters if duplicates are detected.

    Parameters
    ----------
    df : `pandas.DataFrame`
-      DataFrame that contains a column where generalize_names should be applied.
-
+        DataFrame that contains a column where
+        generalize_names should be applied.
    col_name : `str`
-      Name of the DataFrame column where `generalize_names` function should be applied to.
+        Name of the DataFrame column where `generalize_names`
+        function should be applied to.

    Returns
    ----------
    df_new : `str`
-      New DataFrame object where generalize_names function has been applied without duplicates.
+        New DataFrame object where generalize_names function has
+        been applied without duplicates.

    """
    df_new = df.copy()
@@ -110,15 +116,20 @@ def generalize_names_duplcheck(df, col_name):

    df_new[col_name] = df_new[col_name].apply(generalize_names)

-    dupl = list(df_new[df_new.duplicated(subset=col_name, take_last=True)].index) + \
-       list(df_new[df_new.duplicated(subset=col_name, take_last=False)].index)
+    dupl = (list(df_new[df_new.duplicated(subset=col_name,
+                                          take_last=True)].index) +
+            list(df_new[df_new.duplicated(subset=col_name,
+                                          take_last=False)].index))

    firstname_letters = 2
    while len(dupl) > 0:
        for idx in dupl:
-            df_new.loc[idx, col_name] = generalize_names(df.loc[idx, col_name],
-                                                  firstname_output_letters=firstname_letters)
-        dupl = list(df_new[df_new.duplicated(subset=col_name, take_last=True)].index) + \
-               list(df_new[df_new.duplicated(subset=col_name, take_last=False)].index)
+            df_new.loc[idx, col_name] = generalize_names(
+                df.loc[idx, col_name],
+                firstname_output_letters=firstname_letters)
+        dupl = (list(df_new[df_new.duplicated(subset=col_name,
+                                              take_last=True)].index) +
+                list(df_new[df_new.duplicated(subset=col_name,
+                                              take_last=False)].index))
        firstname_letters += 1
    return df_new
@@ -6,6 +6,7 @@ if sys.version_info < (3, 0):

 from mlxtend.text import generalize_names

+
 def test_generalize_names():

    assert(generalize_names("Samuel Eto'o") == 'etoo s')
@@ -13,18 +14,25 @@ def test_generalize_names():
    assert(generalize_names("Eto'o, Samuel") == 'etoo s')
    assert(generalize_names('Xavi') == 'xavi')
    assert(generalize_names('Yaya Toure') == 'toure y')
-    assert(generalize_names('Pozo, Jose Angel') ==  'pozo j')
+    assert(generalize_names('Pozo, Jose Angel') == 'pozo j')
    assert(generalize_names('Pozo, Jose Angel') == 'pozo j')
    assert(generalize_names('Jose Angel Pozo') == 'pozo j')
    assert(generalize_names('Jose Pozo') == 'pozo j')
-    assert(generalize_names('Pozo, Jose Angel', firstname_output_letters=2) == 'pozo jo')
-    assert(generalize_names("Eto'o, Samuel", firstname_output_letters=2) == 'etoo sa')
-    assert(generalize_names("Eto'o, Samuel", firstname_output_letters=0) == 'etoo')
+    assert(generalize_names('Pozo, Jose Angel', firstname_output_letters=2) ==
+           'pozo jo')
+    assert(generalize_names("Eto'o, Samuel", firstname_output_letters=2) ==
+           'etoo sa')
+    assert(generalize_names("Eto'o, Samuel", firstname_output_letters=0) ==
+           'etoo')
    assert(generalize_names("Eto'o, Samuel", output_sep=', ') == 'etoo, s')
    assert(generalize_names("Eto'o, Samuel", output_sep=', ') == 'etoo, s')

-    assert(generalize_names("van Persie, Robin", output_sep=', ') == 'vanpersie, r')
-    assert(generalize_names("Robin van Persie", output_sep=', ') == 'vanpersie, r')
-    assert(generalize_names("Rafael van der Vaart", output_sep=', ') == 'vandervaart, r')
-    assert(generalize_names("van der Vaart, Rafael", output_sep=', ') == 'vandervaart, r')
+    assert(generalize_names("van Persie, Robin", output_sep=', ') ==
+           'vanpersie, r')
+    assert(generalize_names("Robin van Persie", output_sep=', ') ==
+           'vanpersie, r')
+    assert(generalize_names("Rafael van der Vaart", output_sep=', ') ==
+           'vandervaart, r')
+    assert(generalize_names("van der Vaart, Rafael", output_sep=', ') ==
+           'vandervaart, r')
    assert(generalize_names("Ben Hamer") == 'hamer b')
@@ -9,18 +9,17 @@ from mlxtend.text import generalize_names_duplcheck
 from mlxtend.text import generalize_names
 from io import StringIO
 import pandas as pd
-import os
+

 def test_generalize_names_duplcheck():

-
    df = pd.read_csv(StringIO(csv))

    # duplicates before
    dupl = any(df['Name'].apply(generalize_names).duplicated())
-    assert(dupl==True)
+    assert dupl is True

    # no duplicates
    df_new = generalize_names_duplcheck(df=df, col_name='Name')
    no_dupl = any(df_new['Name'].duplicated())
-    assert(no_dupl==False)
+    assert no_dupl is False
@@ -1,8 +1,12 @@
 from mlxtend.text import tokenizer_words_and_emoticons
 from mlxtend.text import tokenizer_emoticons

-def test_tokenizer_words_and_emoticons():
-    assert(tokenizer_words_and_emoticons('</a>This :) is :( a test :-)!') == ['this', 'is', 'a', 'test', ':)', ':(', ':-)'])

-def test_tokenizer_words_and_emoticons():
-    assert(tokenizer_emoticons('</a>This :) is :( a test :-)!') == [':)', ':(', ':-)'])
+def test_tokenizer_words_and_emoticons_1():
+    assert(tokenizer_words_and_emoticons('</a>This :) is :( a test :-)!') ==
+           ['this', 'is', 'a', 'test', ':)', ':(', ':-)'])
+
+
+def test_tokenizer_words_and_emoticons_2():
+    assert(tokenizer_emoticons('</a>This :) is :( a test :-)!') ==
+           [':)', ':(', ':-)'])
@@ -6,9 +6,9 @@
 #
 # License: BSD 3 clause

-
 import re

+
 def tokenizer_words_and_emoticons(text):
    """Convert text to lowercase words and emoticons.

@@ -10,7 +10,8 @@ import time
 import sys


-class Counter():
+class Counter(object):
+
    """Class to display the progress of for-loop iterators.

    Parameters