如何在GridSearchCV中对数据进行标准化?
这是我的代码。我不知道如何操作。
import datasetimport warningswarnings.filterwarnings("ignore")import pandas as pddataset = pd.read_excel('../dataset/dataset_experiment1.xlsx')X = dataset.iloc[:,1:-1].valuesy = dataset.iloc[:,66].valuesfrom sklearn.model_selection import GridSearchCV#from sklearn.pipeline import Pipelinefrom sklearn.preprocessing import StandardScalerstdizer = StandardScaler()print('===Grid Search===')print('logistic regression')model = LogisticRegression()parameter_grid = {'solver': ['newton-cg', 'lbfgs', 'liblinear', 'sag', 'saga']}grid_search = GridSearchCV(model, param_grid=parameter_grid, cv=kfold, scoring = scoring3)grid_search.fit(X, y)print('Best score: {}'.format(grid_search.best_score_))print('Best parameters: {}'.format(grid_search.best_params_))print('\n')
更新 这是我尝试运行的代码,但出现了错误:
print('logistic regression')model = LogisticRegression()pipeline = Pipeline([('scale', StandardScaler()), ('clf', model)])parameter_grid = {'solver': ['newton-cg', 'lbfgs', 'liblinear', 'sag', 'saga']}grid_search = GridSearchCV(pipeline, param_grid=parameter_grid, cv=kfold, scoring = scoring3)grid_search.fit(X, y)print('Best score: {}'.format(grid_search.best_score_))print('Best parameters: {}'.format(grid_search.best_params_))print('\n')
回答:
示例:
from sklearn.pipeline import Pipelinefrom sklearn.model_selection import train_test_splitX_train, X_test, y_train, y_test = \ train_test_split(X, y, test_size=0.33)pipe = Pipeline([ ('scale', StandardScaler()), ('clf', LogisticRegression())])param_grid = [ { 'clf__solver': ['newton-cg', 'lbfgs', 'liblinear', 'sag', 'saga'], 'clf__C': np.logspace(-3, 1, 5), },]grid = GridSearchCV(pipe, param_grid=param_grid, cv=3, n_jobs=-1, verbose=2)grid.fit(X_train, y_train)