Compare commits
3 Commits
master
...
stackingdata
| Author | SHA1 | Date | |
|---|---|---|---|
| 7c397216a7 | |||
| 1ee254eaa7 | |||
| 7219ef97ff |
@@ -23,9 +23,8 @@ The CHANGELOG for the current development version is available at
|
||||
|
||||
- The TensorFlow estimator have been removed from mlxtend, since TensorFlow has now very convenient ways to build on estimators, which render those implementations obsolete.
|
||||
- `plot_decision_regions` now supports plotting decision regions for more than 2 training features. (via [James Bourbeau](https://github.com/jrbourbeau)).
|
||||
|
||||
|
||||
- Parallel execution in `mlxtend.feature_selection.SequentialFeatureSelector` and `mlxtend.feature_selection.ExhaustiveFeatureSelector` is now performed over different feature subsets instead of the different cross-validation folds to better utilize machines with multiple processors if the number of features is large ([#193](https://github.com/rasbt/mlxtend/pull/193), via [@whalebot-helmsman](https://github.com/whalebot-helmsman)).
|
||||
- Raise meaningful error messages if pandas `DataFrame`s or Python lists of lists are fed into the StackingCVClassifer as a `fit` arguments.
|
||||
|
||||
##### Bug Fixes
|
||||
|
||||
|
||||
File diff suppressed because one or more lines are too long
BIN
Binary file not shown.
|
Before Width: | Height: | Size: 47 KiB After Width: | Height: | Size: 31 KiB |
@@ -105,10 +105,11 @@ class StackingCVClassifier(BaseEstimator, ClassifierMixin, TransformerMixin):
|
||||
|
||||
Parameters
|
||||
----------
|
||||
X : {array-like, sparse matrix}, shape = [n_samples, n_features]
|
||||
X : numpy array, shape = [n_samples, n_features]
|
||||
Training vectors, where n_samples is the number of samples and
|
||||
n_features is the number of features.
|
||||
y : array-like, shape = [n_samples]
|
||||
|
||||
y : numpy array, shape = [n_samples]
|
||||
Target values.
|
||||
|
||||
Returns
|
||||
@@ -158,7 +159,20 @@ class StackingCVClassifier(BaseEstimator, ClassifierMixin, TransformerMixin):
|
||||
print("Training and fitting fold %d of %d..." %
|
||||
((num + 1), self.n_folds))
|
||||
|
||||
model.fit(X[train_index], y[train_index])
|
||||
try:
|
||||
model.fit(X[train_index], y[train_index])
|
||||
except TypeError as e:
|
||||
raise TypeError(str(e) + '\nPlease check that X and y'
|
||||
'are NumPy arrays. If X and y are lists'
|
||||
' of lists,\ntry passing them as'
|
||||
' numpy.array(X)'
|
||||
' and numpy.array(y).')
|
||||
except KeyError as e:
|
||||
raise KeyError(str(e) + '\nPlease check that X and y'
|
||||
' are NumPy arrays. If X and y are pandas'
|
||||
' DataFrames,\ntry passing them as'
|
||||
' X.values'
|
||||
' and y.values.')
|
||||
|
||||
if not self.use_probas:
|
||||
prediction = model.predict(X[test_index])
|
||||
@@ -223,7 +237,7 @@ class StackingCVClassifier(BaseEstimator, ClassifierMixin, TransformerMixin):
|
||||
|
||||
Parameters
|
||||
----------
|
||||
X : {array-like, sparse matrix}, shape = [n_samples, n_features]
|
||||
X : numpy array, shape = [n_samples, n_features]
|
||||
Training vectors, where n_samples is the number of samples and
|
||||
n_features is the number of features.
|
||||
|
||||
@@ -257,7 +271,7 @@ class StackingCVClassifier(BaseEstimator, ClassifierMixin, TransformerMixin):
|
||||
|
||||
Parameters
|
||||
----------
|
||||
X : {array-like, sparse matrix}, shape = [n_samples, n_features]
|
||||
X : numpy array, shape = [n_samples, n_features]
|
||||
Training vectors, where n_samples is the number of samples and
|
||||
n_features is the number of features.
|
||||
|
||||
|
||||
@@ -7,6 +7,7 @@
|
||||
|
||||
from mlxtend.classifier import StackingCVClassifier
|
||||
|
||||
import pandas as pd
|
||||
from sklearn.linear_model import LogisticRegression
|
||||
from sklearn.naive_bayes import GaussianNB
|
||||
from sklearn.ensemble import RandomForestClassifier
|
||||
@@ -188,3 +189,36 @@ def test_verbose():
|
||||
shuffle=False,
|
||||
verbose=3)
|
||||
sclf.fit(iris.data, iris.target)
|
||||
|
||||
|
||||
def test_list_of_lists():
|
||||
X_list = [i for i in X]
|
||||
meta = LogisticRegression()
|
||||
clf1 = RandomForestClassifier()
|
||||
clf2 = GaussianNB()
|
||||
sclf = StackingCVClassifier(classifiers=[clf1, clf2],
|
||||
use_probas=True,
|
||||
meta_classifier=meta,
|
||||
shuffle=False,
|
||||
verbose=0)
|
||||
|
||||
try:
|
||||
sclf.fit(X_list, iris.target)
|
||||
except TypeError as e:
|
||||
assert 'are NumPy arrays. If X and y are lists' in str(e)
|
||||
|
||||
|
||||
def test_pandas():
|
||||
X_df = pd.DataFrame(X)
|
||||
meta = LogisticRegression()
|
||||
clf1 = RandomForestClassifier()
|
||||
clf2 = GaussianNB()
|
||||
sclf = StackingCVClassifier(classifiers=[clf1, clf2],
|
||||
use_probas=True,
|
||||
meta_classifier=meta,
|
||||
shuffle=False,
|
||||
verbose=0)
|
||||
try:
|
||||
sclf.fit(X_df, iris.target)
|
||||
except KeyError as e:
|
||||
assert 'are NumPy arrays. If X and y are pandas DataFrames' in str(e)
|
||||
|
||||
Reference in New Issue
Block a user