from sklearn.neighbors import KNeighborsClassifier import pandas as pdfrom sklearn import metricsfrom sklearn.cross_validation import train_test_splitimport matplotlib.pyplot as pltr = pd.read_csv("vitalsign_test.csv")clm_list = []for column in r.columns: clm_list.append(column)X = r[clm_list[1:len(clm_list)-1]].valuesy = r[clm_list[len(clm_list)-1]].valuesX_train, X_test, y_train, y_test = train_test_split (X,y, test_size = 0.3, random_state=4)k_range = range(1,25)scores = []for k in k_range: clf = KNeighborsClassifier(n_neighbors = k) clf.fit(X_train,y_train)y_pred = clf.predict(X_test)scores.append(metrics.accuracy_score(y_test,y_pred))plt.plot(k_range,scores)plt.xlabel('value of k for clf')plt.ylabel('testing accuracy')
我得到的响应是
ValueError: x 和 y 必须具有相同的第一维度
我的特征和响应的形状是:
y.shapeOut[60]: (500,)X.shapeOut[61]: (500, 6)
回答:
这与你的 X
和 y
无关,而是关于传递给 plot 函数的 x
和 y
参数,因为你的 scores
只有一个元素,而 k_range
有 25 个。错误在于缩进不正确:
for k in k_range: clf = KNeighborsClassifier(n_neighbors = k) clf.fit(X_train,y_train)y_pred = clf.predict(X_test)scores.append(metrics.accuracy_score(y_test,y_pred))
应该改为
for k in k_range: clf = KNeighborsClassifier(n_neighbors = k) clf.fit(X_train,y_train) y_pred = clf.predict(X_test) scores.append(metrics.accuracy_score(y_test,y_pred))