我在Django中创建了一个垃圾邮件检测系统,在应用的主页上,我将输入任意字符串,该字符串将传递给机器学习函数,该函数会返回字符串是垃圾邮件还是正常邮件,结果将在下一页显示,但我在pd.read_csv
函数中无法定义CSV文件的路径。它显示错误 ‘../data/spam.csv’ 不存在: b’../data/spam.csv’
view.py 文件
def hompage(request): form = DetectForm(request.POST) return render(request, 'index.html', {'form': form})def result(request): form=DetectForm(request.POST) if form.is_valid(): x=form.cleaned_data['msg'] y=machine(x) return render(request, 'result.html',{'msg':y})
ml.py 文件
def machine(stringx): import pandas as pd import numpy as np import re from nltk.stem.porter import PorterStemmer from nltk.corpus import stopwords data = pd.read_csv('../data/spam.csv', encoding='latin-1') data = data.iloc[:, [0, 1]] data['v1'] = data.v1.map({'ham': 0, 'spam': 1}) courpas = [] # data_cleaning string = stringx df2 = pd.DataFrame({"v1": [0], "v2": [string]}) data = data.append(df2, ignore_index=True) # data_cleaning for a in data['v2']: review = re.sub('[^a-zA-Z]', ' ', a) review = review.lower() review = review.split() ps = PorterStemmer() review = [ps.stem(x) for x in review if not x in stopwords.words('english')] review = ' '.join(review) courpas.append(review) # create a bag of word model from sklearn.feature_extraction.text import CountVectorizer cv = CountVectorizer(max_features=5000) x = cv.fit_transform(courpas).toarray() y = data.iloc[:, 0].values x_train, ytrain = x[:-1], y[:-1] x_test, y_test = x[5572:5573], y[5572:5573] from sklearn.model_selection import train_test_split from sklearn.naive_bayes import GaussianNB classifier = GaussianNB() classifier.fit(x_train, ytrain) y_pred = classifier.predict(x_test) if y_pred == 1: return 'spam' else: return 'ham'
result.html 文件
{% block title %} <h2>Result</h2> {% endblock %} {% block content %} <p>{{msg}}</p> {% endblock %}
url.py 文件
from django.conf.urls import url from . import views from django.urls import path app_name = "spam" urlpatterns=[ url(r'^',views.hompage,name='hompage'), ]
回答:
如果数据文件夹位于项目的根目录,则
data = pd.read_csv('data/spam.csv', encoding='latin-1')
就可以工作
或者你可以使用 BASE_DIR
变量来创建路径
然后在你的视图中使用这个变量
data_dir = os.path.join(BASE_DIR, 'data') # 将此放在settings.py中
在视图中
from django.conf import settingsdata = pd.read_csv(settings.data_dir + 'spam.csv', encoding='latin-1')