我们可以使用RandomizedSearchCV
来获取模型的最佳参数。
def test_model(): X_train, X_test, y_train, y_test = make_friedman1() result_dfs = [] model = Ridge() search = RandomizedSearchCV(model, space, n_iter=500, scoring='neg_mean_absolute_error', n_jobs=-1, cv=cv) result = search.fit(X_train, y_train) print('最佳得分: %s' % result.best_score_) print('最佳超参数: %s' % result.best_params_)
现在,我正在尝试使用X_test
数据获取每种不同参数组合的测试得分(即MSE
、R2
)。
def test_model(): X_train, X_test, y_train, y_test = make_friedman1() result_dfs = [] model = Ridge() search = RandomizedSearchCV(model, space, n_iter=500, scoring='neg_mean_absolute_error', n_jobs=-1, cv=cv) result = search.fit(X_train, y_train) print('最佳得分: %s' % result.best_score_) print('最佳超参数: %s' % result.best_params_) test_result = search.fit(X_train, y_train).predict(X_test) diff_acc = test_result - y_test fold_df = pd.DataFrame() fold_df["MSE"] = [mean_squared_error(y_test, test_result)] fold_df["R2"] = [r2_score(y_test, test_result)] result_dfs.append(fold_df) rep_df = pd.concat(result_dfs, axis=0, ignore_index=True) return rep_df
我得到的输出是
最佳得分: -0.495580216817403最佳超参数: {'alpha': 28.590361345568553, 'fit_intercept': False, 'normalize': True, 'solver': 'cholesky'} MSE R2 0 0.460333 0.504366
但我想获取param space
中所有不同参数配置的测试得分,并将它们保存到df
中。
更具体地说,假设我的程序中有n_iter=500
,因此有500种参数设置的组合。我希望使用这些参数在下面的代码行中进行fit
和predict
操作。最终,我将为每个不同的参数组合得到500个MSE
和R2
值。
test_result = search.fit(X_train, y_train).predict(X_test)
你能告诉我如何使用RandomizedSearchCV
获取每种不同参数组合的所有测试得分吗?
完整代码
from sklearn import datasetsfrom sklearn.model_selection import train_test_splitfrom sklearn.linear_model import Ridgefrom sklearn.metrics import mean_absolute_error, mean_squared_error, r2_scoreimport numpy as npimport pandas as pdfrom scipy.stats import loguniformfrom sklearn.model_selection import RepeatedKFoldfrom sklearn.model_selection import RandomizedSearchCV# 定义搜索空间space = dict()space['solver'] = ['svd', 'cholesky', 'lsqr', 'sag']space['alpha'] = loguniform(1e-5, 100)space['fit_intercept'] = [True, False]space['normalize'] = [True, False]cv = RepeatedKFold(n_splits=5, n_repeats=3)def generate_friedman1(): data = datasets.make_friedman1(n_samples=300) X = data[0] y = data[1] X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3) return X_train, X_test, y_train, y_testdef test_model(): X_train, X_test, y_train, y_test = make_friedman1() result_dfs = [] model = Ridge() search = RandomizedSearchCV(model, space, n_iter=500, scoring='neg_mean_absolute_error', n_jobs=-1, cv=cv) result = search.fit(X_train, y_train) print('最佳得分: %s' % result.best_score_) print('最佳超参数: %s' % result.best_params_) test_result = search.fit(X_train, y_train).predict(X_test) diff_acc = test_result - y_test fold_df = pd.DataFrame() fold_df["MSqE"] = [mean_squared_error(y_test, test_result)] fold_df["R2"] = [r2_score(y_test, test_result)] result_dfs.append(fold_df) rep_df = pd.concat(result_dfs, axis=0, ignore_index=True) return rep_dfif __name__ == "__main__": print(test_model())
回答:
你可以将所有参数保存到一个变量中
all_param_combination = search.cv_results_['params']
然后你可以使用一个循环来使用模型进行fit
和predict
for i in range(len(all_param_combination)): reg_preds = Ridge(**all_param_combination[i]).fit(X_train, y_train).predict(X_test) acc_diff = reg_preds - y_test fold_df = pd.DataFrame() fold_df["MSE"] = [mean_squared_error(y_test, reg_preds)] fold_df["R2"] = [r2_score(y_test, reg_preds)] fold_dfs.append(fold_df) rep_df = pd.concat(fold_dfs, axis=0, ignore_index=True)