我在尝试调整DecisionTreeClassifier
的max_depth参数。我尝试使用AUC(曲线下面积)作为评估指标。以下是我的代码:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25) def max_depth_prediction(X_train, y_train, X_test, y_test, y): max_depths = np.linspace(1, 32, 32, endpoint=True) train_results = [] test_results = [] for max_depth in max_depths: dt = DecisionTreeClassifier(max_depth=max_depth) dt.fit(X_train, y_train) train_pred = dt.predict(X_train) print(y_train) print(train_pred) false_positive_rate, true_positive_rate, thresholds = roc_curve(y_train.astype(int), train_pred.astype(int)) roc_auc = auc(false_positive_rate, true_positive_rate) # 将auc分数添加到之前的训练结果中 train_results.append(roc_auc) y_pred = dt.predict(X_test) false_positive_rate, true_positive_rate, thresholds = roc_curve(y_test, y_pred) roc_auc = auc(false_positive_rate, true_positive_rate) # 将auc分数添加到之前的测试结果中 test_results.append(roc_auc)
然而在使用时我遇到了错误:
ValueError: y_true takes value in {'0', '1'} and pos_label is not specified: either make y_true take value in {0, 1} or {-1, 1} or pass pos_label explicitly.
我检查了我的两个向量,它们看起来是正确的:
y_train = ['0' '0' '0' ... '1' '1' '0']train_pred = ['0' '0' '1' ... '1' '1' '0']
回答:
y_train
和 train_pred
是字符串列表,它们应该为整数。尝试这样做
def max_depth_prediction(X_train, y_train, X_test, y_test, y): max_depths = np.linspace(1, 32, 32, endpoint=True) train_results = [] test_results = [] for max_depth in max_depths: dt = DecisionTreeClassifier(max_depth=max_depth) dt.fit(X_train, y_train) train_pred = dt.predict(X_train) train_pred = [int(i) for i in train_pred] y_train = [int(i) for i in y_train] print(y_train) print(train_pred) false_positive_rate, true_positive_rate, thresholds = roc_curve(y_train.astype(int), train_pred.astype(int)) roc_auc = auc(false_positive_rate, true_positive_rate) # 将auc分数添加到之前的训练结果中 train_results.append(roc_auc) y_pred = dt.predict(X_test) false_positive_rate, true_positive_rate, thresholds = roc_curve(y_test, y_pred) roc_auc = auc(false_positive_rate, true_positive_rate) # 将auc分数添加到之前的测试结果中 test_results.append(roc_auc)