在我开发完下面的程序后,一位前辈建议我不要将机器学习程序包含在后端。我该如何使用pickle文件来实现这一点?
from flask import Flask,render_template,url_for,requestimport pandas as pdimport picklefrom sklearn.naive_bayes import MultinomialNBfrom sklearn.externals import joblibapp = Flask(__name__)@app.route('/')def home(): return render_template('home.html')@app.route('/predict',methods=['POST'])def predict(): df= pd.read_csv("YoutubeSpamMergedData.csv") df_data = df[["CONTENT","CLASS"]] # 特征和标签 df_x = df_data['CONTENT'] df_y = df_data.CLASS # 使用CountVectorizer提取特征 corpus = df_x cv = TfidfVectorizer(ngram_range=[1,2]) X = cv.fit_transform(corpus) # 拟合数据 from sklearn.model_selection import train_test_split X_train, X_test, y_train, y_test = train_test_split(X, df_y, test_size=0.33, random_state=42) # 朴素贝叶斯分类器 from sklearn.naive_bayes import MultinomialNB clf = MultinomialNB() clf.fit(X_train,y_train) acc = clf.score(X_test,y_test) # 保存模型的替代用法 # ytb_model = open("naivebayes_spam_model.pkl","rb") # clf = joblib.load(ytb_model) if request.method == 'POST': comment = request.form['comment'] data = [comment] vect = cv.transform(data).toarray() my_prediction = clf.predict(vect) return render_template('result.html',prediction = my_prediction, accuracy = acc)if _name_ == '__main__': app.run(debug=True)
回答:
你应该离线训练MultinomialNB分类器并将其保存到文件中。然后在你的Flask后端加载模型文件来预测请求数据。
# 训练分类器模型 from sklearn.externals import joblibfrom sklearn.naive_bayes import MultinomialNBclf = MultinomialNB()clf.fit(X, y)joblib.dump(clf, 'filename.pkl') # flask后端classifier = joblib.load("filename.pkl")@app.route("/predict", methods=["POST"])def predict(): # 获取vect result = classifier.predict(vect) return result