我想做的是:
我想在多类别问题上使用roc_auc
来计算cross_val_score
我尝试做的是:
这是一个用iris数据集制作的可重现示例。
from sklearn.datasets import load_irisfrom sklearn.preprocessing import OneHotEncoderfrom sklearn.model_selection import cross_val_score iris = load_iris()X = pd.DataFrame(data=iris.data, columns=iris.feature_names)
我对目标变量进行独热编码
encoder = OneHotEncoder()y = encoder.fit_transform(pd.DataFrame(iris.target)).toarray()
我使用决策树分类器
model = DecisionTreeClassifier(max_depth=1)
最后我执行交叉验证
cross_val_score(model, X, y, cv=3, scoring="roc_auc")
失败的原因:
最后一行抛出了以下错误
---------------------------------------------------------------------------ValueError Traceback (most recent call last)<ipython-input-87-91dc6fa67512> in <module>()----> 1 cross_val_score(model, X, y, cv=3, scoring="roc_auc")~/programs/anaconda3/lib/python3.7/site-packages/sklearn/model_selection/_validation.py in cross_val_score(estimator, X, y, groups, scoring, cv, n_jobs, verbose, fit_params, pre_dispatch) 340 n_jobs=n_jobs, verbose=verbose, 341 fit_params=fit_params,--> 342 pre_dispatch=pre_dispatch) 343 return cv_results['test_score'] 344 ~/programs/anaconda3/lib/python3.7/site-packages/sklearn/model_selection/_validation.py in cross_validate(estimator, X, y, groups, scoring, cv, n_jobs, verbose, fit_params, pre_dispatch, return_train_score) 204 fit_params, return_train_score=return_train_score, 205 return_times=True)--> 206 for train, test in cv.split(X, y, groups)) 207 208 if return_train_score:~/programs/anaconda3/lib/python3.7/site-packages/sklearn/externals/joblib/parallel.py in __call__(self, iterable) 777 # was dispatched. In particular this covers the edge 778 # case of Parallel used with an exhausted iterator.--> 779 while self.dispatch_one_batch(iterator): 780 self._iterating = True 781 else:~/programs/anaconda3/lib/python3.7/site-packages/sklearn/externals/joblib/parallel.py in dispatch_one_batch(self, iterator) 623 return False 624 else:--> 625 self._dispatch(tasks) 626 return True 627 ~/programs/anaconda3/lib/python3.7/site-packages/sklearn/externals/joblib/parallel.py in _dispatch(self, batch) 586 dispatch_timestamp = time.time() 587 cb = BatchCompletionCallBack(dispatch_timestamp, len(batch), self)--> 588 job = self._backend.apply_async(batch, callback=cb) 589 self._jobs.append(job) 590 ~/programs/anaconda3/lib/python3.7/site-packages/sklearn/externals/joblib/_parallel_backends.py in apply_async(self, func, callback) 109 def apply_async(self, func, callback=None): 110 """Schedule a func to be run"""--> 111 result = ImmediateResult(func) 112 if callback: 113 callback(result)~/programs/anaconda3/lib/python3.7/site-packages/sklearn/externals/joblib/_parallel_backends.py in __init__(self, batch) 330 # Don't delay the application, to avoid keeping the input 331 # arguments in memory--> 332 self.results = batch() 333 334 def get(self):~/programs/anaconda3/lib/python3.7/site-packages/sklearn/externals/joblib/parallel.py in __call__(self) 129 130 def __call__(self):--> 131 return [func(*args, **kwargs) for func, args, kwargs in self.items] 132 133 def __len__(self):~/programs/anaconda3/lib/python3.7/site-packages/sklearn/externals/joblib/parallel.py in <listcomp>(.0) 129 130 def __call__(self):--> 131 return [func(*args, **kwargs) for func, args, kwargs in self.items] 132 133 def __len__(self):~/programs/anaconda3/lib/python3.7/site-packages/sklearn/model_selection/_validation.py in _fit_and_score(estimator, X, y, scorer, train, test, verbose, parameters, fit_params, return_train_score, return_parameters, return_n_test_samples, return_times, error_score) 486 fit_time = time.time() - start_time 487 # _score will return dict if is_multimetric is True--> 488 test_scores = _score(estimator, X_test, y_test, scorer, is_multimetric) 489 score_time = time.time() - start_time - fit_time 490 if return_train_score:~/programs/anaconda3/lib/python3.7/site-packages/sklearn/model_selection/_validation.py in _score(estimator, X_test, y_test, scorer, is_multimetric) 521 """ 522 if is_multimetric:--> 523 return _multimetric_score(estimator, X_test, y_test, scorer) 524 else: 525 if y_test is None:~/programs/anaconda3/lib/python3.7/site-packages/sklearn/model_selection/_validation.py in _multimetric_score(estimator, X_test, y_test, scorers) 551 score = scorer(estimator, X_test) 552 else:--> 553 score = scorer(estimator, X_test, y_test) 554 555 if hasattr(score, 'item'):~/programs/anaconda3/lib/python3.7/site-packages/sklearn/metrics/scorer.py in __call__(self, clf, X, y, sample_weight) 204 **self._kwargs) 205 else:--> 206 return self._sign * self._score_func(y, y_pred, **self._kwargs) 207 208 def _factory_args(self):~/programs/anaconda3/lib/python3.7/site-packages/sklearn/metrics/ranking.py in roc_auc_score(y_true, y_score, average, sample_weight) 275 return _average_binary_score( 276 _binary_roc_auc_score, y_true, y_score, average,--> 277 sample_weight=sample_weight) 278 279 ~/programs/anaconda3/lib/python3.7/site-packages/sklearn/metrics/base.py in _average_binary_score(binary_metric, y_true, y_score, average, sample_weight) 116 y_score_c = y_score.take([c], axis=not_average_axis).ravel() 117 score[c] = binary_metric(y_true_c, y_score_c,--> 118 sample_weight=score_weight) 119 120 # Average the results~/programs/anaconda3/lib/python3.7/site-packages/sklearn/metrics/ranking.py in _binary_roc_auc_score(y_true, y_score, sample_weight) 266 def _binary_roc_auc_score(y_true, y_score, sample_weight=None): 267 if len(np.unique(y_true)) != 2:--> 268 raise ValueError("Only one class present in y_true. ROC AUC score " 269 "is not defined in that case.") 270 ValueError: Only one class present in y_true. ROC AUC score is not defined in that case.
我的环境:
python==3.7.2
sklearn==0.19.2
我的问题:
这是个bug,还是我用错了?
回答: