在Jupyter Notebook中运行以下代码时,我遇到了ValueError错误。
ValueError: 模型的特征数必须与输入匹配。模型的特征数为11,而输入的特征数为2
如何解决这个问题?
# Visualising the Training set resultsfrom matplotlib.colors import ListedColormapX_set, y_set = X_train, y_trainX1, X2 = np.meshgrid(np.arange(start = X_set[:, 0].min() - 1, stop = X_set[:, 0].max() + 1, step = 0.01), np.arange(start = X_set[:, 1].min() - 1, stop = X_set[:, 1].max() + 1, step = 0.01))plt.contourf(X1, X2, classifier.predict(np.array([X1.ravel(), X2.ravel()]).T).reshape(X1.shape), alpha = 0.75, cmap = ListedColormap(('red', 'green')))
我得到了以下错误:
ValueError Traceback (most recent call last)<ipython-input-42-bc13e66e79fe> in <module> 4 X1, X2 = np.meshgrid(np.arange(start = X_set[:, 0].min() - 1, stop = X_set[:, 0].max() + 1, step = 0.01), 5 np.arange(start = X_set[:, 1].min() - 1, stop = X_set[:, 1].max() + 1, step = 0.01))----> 6 plt.contourf(X1, X2, classifier.predict(np.array([X1.ravel(), X2.ravel()]).T).reshape(X1.shape), 7 alpha = 0.75, cmap = ListedColormap(('red', 'green'))) 8 plt.xlim(X1.min(), X1.max())~\anaconda3\lib\site-packages\sklearn\ensemble\_forest.py in predict(self, X) 627 The predicted classes. 628 """--> 629 proba = self.predict_proba(X) 630 631 if self.n_outputs_ == 1:~\anaconda3\lib\site-packages\sklearn\ensemble\_forest.py in predict_proba(self, X) 671 check_is_fitted(self) 672 # Check data--> 673 X = self._validate_X_predict(X) 674 675 # Assign chunk of trees to jobs~\anaconda3\lib\site-packages\sklearn\ensemble\_forest.py in _validate_X_predict(self, X) 419 check_is_fitted(self) 420 --> 421 return self.estimators_[0]._validate_X_predict(X, check_input=True) 422 423 @property~\anaconda3\lib\site-packages\sklearn\tree\_classes.py in _validate_X_predict(self, X, check_input) 394 n_features = X.shape[1] 395 if self.n_features_ != n_features:--> 396 raise ValueError("Number of features of the model must " 397 "match the input. Model n_features is %s and " 398 "input n_features is %s "ValueError: 模型的特征数必须与输入匹配。模型的特征数为11,而输入的特征数为2
回答:
我按照我理解的问题修复了你的代码,增加了几行额外的代码。主要问题是你只为预测提供了第1列和第2列的数据,但预测器期望的是11列,从第1列到第11列。因此,第3列到第11列应该以某种方式填充。至少你可以用零来填充它们。
在我的解决方案中,我按第一列对训练集进行了排序,然后在构建网格时,尝试通过查找与网格中的X1值接近的最近的第一列值来近似预测所需的第3列到第11列。也就是说,我试图在只给出第一列的情况下找到第3列到第11列的最佳近似值,这样做只是为了不将第3列到第11列填充为零,这也是可以的。
另外,我注释掉了#from sklearn.cross_validation import train_test_split
这一行,并将其替换为from sklearn.model_selection import train_test_split
,因为第一行使用的是旧的scikit-learn库,在新版本中只有第二行有效,子模块名称已更改。请根据你的情况选择正确的这一行代码。
# Random Forest Classification# Importing the librariesimport numpy as npimport matplotlib.pyplot as pltimport pandas as pd# Importing the datasetdataset = pd.read_csv('finalplacementdata3.csv')X = dataset.iloc[:, range(1, 12)].valuesy = dataset.iloc[:, 12].valuessiX = np.lexsort((X[:, 1], X[:, 0]))sX, sy = X[siX], y[siX]# Splitting the dataset into the Training set and Test set#from sklearn.cross_validation import train_test_splitfrom sklearn.model_selection import train_test_splitX_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.25, random_state = 0)# Feature Scalingfrom sklearn.preprocessing import StandardScalersc = StandardScaler()X_train = sc.fit_transform(X_train)X_test = sc.transform(X_test)# Fitting Random Forest Classification to the Training setfrom sklearn.ensemble import RandomForestClassifierclassifier = RandomForestClassifier(n_estimators = 10, criterion = 'entropy', random_state = 0)classifier.fit(X_train, y_train)# Predicting the Test set resultsy_pred = classifier.predict(X_test)# Making the Confusion Matrixfrom sklearn.metrics import confusion_matrixcm = confusion_matrix(y_test, y_pred)# Visualising the Training set resultsfrom matplotlib.colors import ListedColormapX_set, y_set = X_train, y_trainX1, X2 = np.meshgrid(np.arange(start = X_set[:, 0].min() - 1, stop = X_set[:, 0].max() + 1, step = 0.01), np.arange(start = X_set[:, 1].min() - 1, stop = X_set[:, 1].max() + 1, step = 0.01)) riX = np.minimum(sX.shape[0] - 1, np.searchsorted(sX[:, 0], X1.ravel()))rX = X[riX]plt.contourf(X1, X2, classifier.predict(np.array([X1.ravel(), X2.ravel()] + list(rX[:, 2:].T)).T).reshape(X1.shape), alpha = 0.75, cmap = ListedColormap(('red', 'green')))plt.xlim(X1.min(), X1.max())plt.ylim(X2.min(), X2.max())for i, j in enumerate(np.unique(y_set)): plt.scatter(X_set[y_set == j, 0], X_set[y_set == j, 1], c = ListedColormap(('red', 'green'))(i), label = j)plt.title('Random Forest Classification (Training set)')plt.xlabel('Quants')plt.ylabel('CGPA')plt.legend()plt.show()# Visualising the Test set resultsfrom matplotlib.colors import ListedColormapX_set, y_set = X_test, y_testX1, X2 = np.meshgrid(np.arange(start = X_set[:, 0].min() - 1, stop = X_set[:, 0].max() + 1, step = 0.01), np.arange(start = X_set[:, 1].min() - 1, stop = X_set[:, 1].max() + 1, step = 0.01))riX = np.minimum(sX.shape[0] - 1, np.searchsorted(sX[:, 0], X1.ravel()))rX = X[riX]plt.contourf(X1, X2, classifier.predict(np.array([X1.ravel(), X2.ravel()] + list(rX[:, 2:].T)).T).reshape(X1.shape), alpha = 0.75, cmap = ListedColormap(('red', 'green')))plt.xlim(X1.min(), X1.max())plt.ylim(X2.min(), X2.max())for i, j in enumerate(np.unique(y_set)): plt.scatter(X_set[y_set == j, 0], X_set[y_set == j, 1], c = ListedColormap(('red', 'green'))(i), label = j)plt.title('Random Forest Classification (Test set)')plt.xlabel('Quants')plt.ylabel('CGPA')plt.legend()plt.show()