我正在使用以下Python脚本通过线性回归预测一个数字
from selenium import webdriverfrom webdriver_manager.chrome import ChromeDriverManagerfrom selenium.webdriver.common.by import Byfrom selenium.webdriver.support.ui import WebDriverWaitfrom selenium.webdriver.support import expected_conditions as ECfrom sklearn.linear_model import LinearRegressionimport timeimport numpy as npfrom sklearn.svm import SVRimport pytzfrom datetime import datetimefrom sys import argv, exitimport os, psutil################################################if len(argv) != 5: print (argv[0] + '<train count> <timeout(s)> <predict date(Y/M/D)> <predict clock(H:M:S)>') exit(2)X_predict = [(int(datetime.strptime(argv[3] + " " + argv[4], '%Y/%m/%d %H:%M:%S').timestamp()*(10000000)))]################################################X=[]y=[]chromeOptions = webdriver.ChromeOptions()chromeOptions.add_argument("--headless")chromeOptions.add_argument("--remote-debugging-port=2212")chromeOptions.add_argument('--no-sandbox')chromeOptions.add_argument('--disable-dev-shm-usage')driver = webdriver.Chrome('/usr/bin/chromedriver',chrome_options=chromeOptions)driver.get('https://sample.com/')elem_xpath = '//div[contains(text(), "number")]/following-sibling::div'for i in range(1, int(argv[1])): try: elem = WebDriverWait(driver, 10).until(EC.visibility_of_element_located((By.XPATH, elem_xpath))) print ("train => ", i) X.append(int(time.time()*(10000000))) y.append(int(elem.text.replace(',', ''))) time.sleep(int(argv[2])) finally: driver.quit ##############################################X = np.array(X).reshape(-1, 1)y = np.array(y).reshape(-1, 1)X_predict = np.array(X_predict).reshape(-1, 1)############################################## svr_rbf = LinearRegression()y_rbf = svr_rbf.fit(X,y).predict(X_predict)print ('y_rbf: {}'.format(int(y_rbf)))print('memory usage: {} MB'.format(int(psutil.Process(os.getpid()).memory_info().rss/1024/1024)))
这个代码运行得很好。据我所知,从线性回归中我们可以得到一些输出,比如斜率、截距、r_value、p_value和std_err。r_value将告诉你线性回归的准确性。在上面的脚本中,如何获取r_value的值?
回答:
这有点难以确定,但我认为你可能在寻找类似这样的东西。
import numpy as npimport pandas as pdimport matplotlib.pyplot as pltimport statsmodels.api as smfrom statsmodels.sandbox.regression.predstd import wls_prediction_stdnp.random.seed(9876789)# OLS estimation¶# Artificial data:nsample = 100x = np.linspace(0, 10, 100)X = np.column_stack((x, x**2))beta = np.array([1, 0.1, 10])e = np.random.normal(size=nsample)# Our model needs an intercept so we add a column of 1s:X = sm.add_constant(X)y = np.dot(X, beta) + e# Fit and summary:model = sm.OLS(y, X)results = model.fit()print(results.summary())
结果: