3 Commits

Author SHA1 Message Date
rasbt 7c397216a7 simplify str checking in error msg for diff py versions 2017-06-01 23:35:55 -04:00
rasbt 1ee254eaa7 re-enable all tests 2017-06-01 21:28:46 -04:00
rasbt 7219ef97ff meaningful error if not numpy array fed to fit of StackingCVClassifier 2017-06-01 21:27:34 -04:00
5 changed files with 95 additions and 59 deletions
+1 -2
View File
@@ -23,9 +23,8 @@ The CHANGELOG for the current development version is available at
- The TensorFlow estimator have been removed from mlxtend, since TensorFlow has now very convenient ways to build on estimators, which render those implementations obsolete.
- `plot_decision_regions` now supports plotting decision regions for more than 2 training features. (via [James Bourbeau](https://github.com/jrbourbeau)).
- Parallel execution in `mlxtend.feature_selection.SequentialFeatureSelector` and `mlxtend.feature_selection.ExhaustiveFeatureSelector` is now performed over different feature subsets instead of the different cross-validation folds to better utilize machines with multiple processors if the number of features is large ([#193](https://github.com/rasbt/mlxtend/pull/193), via [@whalebot-helmsman](https://github.com/whalebot-helmsman)).
- Raise meaningful error messages if pandas `DataFrame`s or Python lists of lists are fed into the StackingCVClassifer as a `fit` arguments.
##### Bug Fixes
File diff suppressed because one or more lines are too long
Binary file not shown.

Before

Width:  |  Height:  |  Size: 47 KiB

After

Width:  |  Height:  |  Size: 31 KiB

@@ -105,10 +105,11 @@ class StackingCVClassifier(BaseEstimator, ClassifierMixin, TransformerMixin):
Parameters
----------
X : {array-like, sparse matrix}, shape = [n_samples, n_features]
X : numpy array, shape = [n_samples, n_features]
Training vectors, where n_samples is the number of samples and
n_features is the number of features.
y : array-like, shape = [n_samples]
y : numpy array, shape = [n_samples]
Target values.
Returns
@@ -158,7 +159,20 @@ class StackingCVClassifier(BaseEstimator, ClassifierMixin, TransformerMixin):
print("Training and fitting fold %d of %d..." %
((num + 1), self.n_folds))
model.fit(X[train_index], y[train_index])
try:
model.fit(X[train_index], y[train_index])
except TypeError as e:
raise TypeError(str(e) + '\nPlease check that X and y'
'are NumPy arrays. If X and y are lists'
' of lists,\ntry passing them as'
' numpy.array(X)'
' and numpy.array(y).')
except KeyError as e:
raise KeyError(str(e) + '\nPlease check that X and y'
' are NumPy arrays. If X and y are pandas'
' DataFrames,\ntry passing them as'
' X.values'
' and y.values.')
if not self.use_probas:
prediction = model.predict(X[test_index])
@@ -223,7 +237,7 @@ class StackingCVClassifier(BaseEstimator, ClassifierMixin, TransformerMixin):
Parameters
----------
X : {array-like, sparse matrix}, shape = [n_samples, n_features]
X : numpy array, shape = [n_samples, n_features]
Training vectors, where n_samples is the number of samples and
n_features is the number of features.
@@ -257,7 +271,7 @@ class StackingCVClassifier(BaseEstimator, ClassifierMixin, TransformerMixin):
Parameters
----------
X : {array-like, sparse matrix}, shape = [n_samples, n_features]
X : numpy array, shape = [n_samples, n_features]
Training vectors, where n_samples is the number of samples and
n_features is the number of features.
@@ -7,6 +7,7 @@
from mlxtend.classifier import StackingCVClassifier
import pandas as pd
from sklearn.linear_model import LogisticRegression
from sklearn.naive_bayes import GaussianNB
from sklearn.ensemble import RandomForestClassifier
@@ -188,3 +189,36 @@ def test_verbose():
shuffle=False,
verbose=3)
sclf.fit(iris.data, iris.target)
def test_list_of_lists():
X_list = [i for i in X]
meta = LogisticRegression()
clf1 = RandomForestClassifier()
clf2 = GaussianNB()
sclf = StackingCVClassifier(classifiers=[clf1, clf2],
use_probas=True,
meta_classifier=meta,
shuffle=False,
verbose=0)
try:
sclf.fit(X_list, iris.target)
except TypeError as e:
assert 'are NumPy arrays. If X and y are lists' in str(e)
def test_pandas():
X_df = pd.DataFrame(X)
meta = LogisticRegression()
clf1 = RandomForestClassifier()
clf2 = GaussianNB()
sclf = StackingCVClassifier(classifiers=[clf1, clf2],
use_probas=True,
meta_classifier=meta,
shuffle=False,
verbose=0)
try:
sclf.fit(X_df, iris.target)
except KeyError as e:
assert 'are NumPy arrays. If X and y are pandas DataFrames' in str(e)