from sklearn.svm import LinearSVCfrom sklearn.feature_extraction.text import CountVectorizerfrom sklearn.feature_extraction.text import TfidfTransformerfrom sklearn.metrics import accuracy_scoreX = data['Review']y = data['Category']tfidf = TfidfVectorizer(ngram_range=(1,1))classifier = LinearSVC()X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.3)clf = Pipeline([ ('tfidf', tfidf), ('clf', classifier)])clf.fit(X_train, y_train)y_pred = clf.predict(X_test)print(classification_report(y_test, y_pred))accuracy_score(y_test, y_pred)
这是训练模型和预测的代码。我需要了解我的模型性能。那么,我应该在哪里进行修改以使用交叉验证评分(cross_val_score)呢?
回答:
使用这个:(这是我之前项目中的一个示例)
import numpy as npfrom sklearn.model_selection import KFold, cross_val_scorekfolds = KFold(n_splits=5, shuffle=True, random_state=42)def cv_f1(model, X, y): score = np.mean(cross_val_score(model, X, y, scoring="f1", cv=kfolds)) return (score)model = ....score_f1 = cv_f1(model, X_train, y_train)
你可以使用多种评分方法。你只需要更改scoring=”f1″。如果你想查看每个折叠的得分,只需移除np.mean即可。