simplify str checking in error msg for diff py versions

re-enable all tests
meaningful error if not numpy array fed to fit of StackingCVClassifier
2017-06-01 23:35:55 -04:00 · 2017-06-01 21:28:46 -04:00 · 2017-06-01 21:27:34 -04:00
5 changed files with 95 additions and 59 deletions
@@ -23,9 +23,8 @@ The CHANGELOG for the current development version is available at

 - The TensorFlow estimator have been removed from mlxtend, since TensorFlow has now very convenient ways to build on estimators, which render those implementations obsolete.
 - `plot_decision_regions` now supports plotting decision regions for more than 2 training features. (via [James Bourbeau](https://github.com/jrbourbeau)).
-
-
 - Parallel execution in `mlxtend.feature_selection.SequentialFeatureSelector` and `mlxtend.feature_selection.ExhaustiveFeatureSelector` is now performed over different feature subsets instead of the different cross-validation folds to better utilize machines with multiple processors if the number of features is large ([#193](https://github.com/rasbt/mlxtend/pull/193), via [@whalebot-helmsman](https://github.com/whalebot-helmsman)).
+- Raise meaningful error messages if pandas `DataFrame`s or Python lists of lists are fed into the StackingCVClassifer as a `fit` arguments.

 ##### Bug Fixes

@@ -105,10 +105,11 @@ class StackingCVClassifier(BaseEstimator, ClassifierMixin, TransformerMixin):

        Parameters
        ----------
-        X : {array-like, sparse matrix}, shape = [n_samples, n_features]
+        X : numpy array, shape = [n_samples, n_features]
            Training vectors, where n_samples is the number of samples and
            n_features is the number of features.
-        y : array-like, shape = [n_samples]
+
+        y : numpy array, shape = [n_samples]
            Target values.

        Returns
@@ -158,7 +159,20 @@ class StackingCVClassifier(BaseEstimator, ClassifierMixin, TransformerMixin):
                    print("Training and fitting fold %d of %d..." %
                          ((num + 1), self.n_folds))

-                model.fit(X[train_index], y[train_index])
+                try:
+                    model.fit(X[train_index], y[train_index])
+                except TypeError as e:
+                    raise TypeError(str(e) + '\nPlease check that X and y'
+                                    'are NumPy arrays. If X and y are lists'
+                                    ' of lists,\ntry passing them as'
+                                    ' numpy.array(X)'
+                                    ' and numpy.array(y).')
+                except KeyError as e:
+                    raise KeyError(str(e) + '\nPlease check that X and y'
+                                   ' are NumPy arrays. If X and y are pandas'
+                                   ' DataFrames,\ntry passing them as'
+                                   ' X.values'
+                                   ' and y.values.')

                if not self.use_probas:
                    prediction = model.predict(X[test_index])
@@ -223,7 +237,7 @@ class StackingCVClassifier(BaseEstimator, ClassifierMixin, TransformerMixin):

        Parameters
        ----------
-        X : {array-like, sparse matrix}, shape = [n_samples, n_features]
+        X : numpy array, shape = [n_samples, n_features]
            Training vectors, where n_samples is the number of samples and
            n_features is the number of features.

@@ -257,7 +271,7 @@ class StackingCVClassifier(BaseEstimator, ClassifierMixin, TransformerMixin):

        Parameters
        ----------
-        X : {array-like, sparse matrix}, shape = [n_samples, n_features]
+        X : numpy array, shape = [n_samples, n_features]
            Training vectors, where n_samples is the number of samples and
            n_features is the number of features.

@@ -7,6 +7,7 @@

 from mlxtend.classifier import StackingCVClassifier

+import pandas as pd
 from sklearn.linear_model import LogisticRegression
 from sklearn.naive_bayes import GaussianNB
 from sklearn.ensemble import RandomForestClassifier
@@ -188,3 +189,36 @@ def test_verbose():
                                shuffle=False,
                                verbose=3)
    sclf.fit(iris.data, iris.target)
+
+
+def test_list_of_lists():
+    X_list = [i for i in X]
+    meta = LogisticRegression()
+    clf1 = RandomForestClassifier()
+    clf2 = GaussianNB()
+    sclf = StackingCVClassifier(classifiers=[clf1, clf2],
+                                use_probas=True,
+                                meta_classifier=meta,
+                                shuffle=False,
+                                verbose=0)
+
+    try:
+        sclf.fit(X_list, iris.target)
+    except TypeError as e:
+        assert 'are NumPy arrays. If X and y are lists' in str(e)
+
+
+def test_pandas():
+    X_df = pd.DataFrame(X)
+    meta = LogisticRegression()
+    clf1 = RandomForestClassifier()
+    clf2 = GaussianNB()
+    sclf = StackingCVClassifier(classifiers=[clf1, clf2],
+                                use_probas=True,
+                                meta_classifier=meta,
+                                shuffle=False,
+                                verbose=0)
+    try:
+        sclf.fit(X_df, iris.target)
+    except KeyError as e:
+        assert 'are NumPy arrays. If X and y are pandas DataFrames' in str(e)
Author	SHA1	Message	Date
rasbt	7c397216a7	simplify str checking in error msg for diff py versions	2017-06-01 23:35:55 -04:00
rasbt	1ee254eaa7	re-enable all tests	2017-06-01 21:28:46 -04:00
rasbt	7219ef97ff	meaningful error if not numpy array fed to fit of StackingCVClassifier	2017-06-01 21:27:34 -04:00