我已经将Neuraxle更新到最新版本(3.4)。
我注意到整个auto_ml.py
文件被重新编写了。我查看了文档,但没有找到相关信息。在git上似乎RandomSearch()
方法很久以前就被AutoML()
方法替代了。然而,参数有所不同。
有人知道如何在最新的Neuraxle版本(3.4)中将Boston Housing示例管道引导到自动参数搜索吗?
import numpy as npfrom sklearn.cluster import KMeansfrom sklearn.datasets import load_bostonfrom sklearn.decomposition import PCA, FastICAfrom sklearn.ensemble import GradientBoostingRegressorfrom sklearn.linear_model import Ridgefrom sklearn.metrics import r2_scorefrom sklearn.model_selection import train_test_splitfrom sklearn.utils import shufflefrom neuraxle.hyperparams.distributions import RandInt, LogUniform, Booleanfrom neuraxle.hyperparams.space import HyperparameterSpacefrom neuraxle.metaopt.auto_ml import RandomSearchfrom neuraxle.metaopt.random import KFoldCrossValidationWrapperfrom neuraxle.pipeline import Pipelinefrom neuraxle.steps.numpy import NumpyTransposefrom neuraxle.steps.sklearn import SKLearnWrapperfrom neuraxle.union import AddFeatures, ModelStackingdef main(): boston = load_boston() X, y = shuffle(boston.data, boston.target, random_state=13) X = X.astype(np.float32) X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, shuffle=False) # 注意,超参数空间在此处在定义管道时定义,但如果使用自定义类,也可以在类定义时设置, # 或者也可以在声明管道后使用平面字典或嵌套字典定义。 p = Pipeline([ AddFeatures([ SKLearnWrapper( PCA(n_components=2), HyperparameterSpace({"n_components": RandInt(1, 3)}) ), SKLearnWrapper( FastICA(n_components=2), HyperparameterSpace({"n_components": RandInt(1, 3)}) ), ]), ModelStacking([ SKLearnWrapper( GradientBoostingRegressor(), HyperparameterSpace({ "n_estimators": RandInt(50, 600), "max_depth": RandInt(1, 10), "learning_rate": LogUniform(0.07, 0.7) }) ), SKLearnWrapper( KMeans(), HyperparameterSpace({"n_clusters": RandInt(5, 10)}) ), ], joiner=NumpyTranspose(), judge=SKLearnWrapper( Ridge(), HyperparameterSpace({"alpha": LogUniform(0.7, 1.4), "fit_intercept": Boolean()}) ), ) ]) print("在训练集上进行元拟合:") p = p.meta_fit(X_train, y_train, metastep=RandomSearch( n_iter=10, higher_score_is_better=True, validation_technique=KFoldCrossValidationWrapper(scoring_function=r2_score, k_fold=10) )) # 这是一种更“管道化”的替代方法: # p = RandomSearch( # p, # n_iter=15, # higher_score_is_better=True, # validation_technique=KFoldCrossValidation(scoring_function=r2_score, k_fold=3) # ).fit(X_train, y_train) print("") print("转换训练集和测试集:") y_train_predicted = p.predict(X_train) y_test_predicted = p.predict(X_test) print("") print("评估转换后的训练集:") score_transform = r2_score(y_train_predicted, y_train) print('R2回归得分:', score_transform) print("") print("评估转换后的测试集:") score_test = r2_score(y_test_predicted, y_test) print('R2回归得分:', score_test)if __name__ == "__main__": main()
回答:
这是解决您问题的方案,这是一个尚未在文档网站上发布的新示例:
- https://drive.google.com/drive/u/0/folders/12uzcNKU7n0EUyFzgitSt1wSaSvV4qJbs(从那里查看第二个编程Kata的解决方案)
来自上述链接的示例管道代码:
from neuraxle.base import Identityfrom neuraxle.steps.flow import TrainOnlyWrapper, ChooseOneStepOffrom neuraxle.steps.numpy import NumpyConcatenateInnerFeatures, NumpyShapePrinter, NumpyFlattenDatumfrom neuraxle.union import FeatureUnionpipeline = Pipeline([ TrainOnlyWrapper(NumpyShapePrinter(custom_message="输入形状在特征联合前")), FeatureUnion([ Pipeline([ NumpyFFT(), NumpyAbs(), FeatureUnion([ NumpyFlattenDatum(), # 从3D重塑到平面2D:除批量大小外展平数据 FFTPeakBinWithValue() # 从3D FFT bin中提取2D特征 ], joiner=NumpyConcatenateInnerFeatures()) ]), NumpyMean(), NumpyMedian(), NumpyMin(), NumpyMax() ], joiner=NumpyConcatenateInnerFeatures()), # TODO,可选:在这里添加一些特征选择,供有动力的人使用: # https://scikit-learn.org/stable/modules/feature_selection.html # TODO,可选:在这里添加归一化(如果使用其他分类器) # https://scikit-learn.org/stable/modules/generated/sklearn.preprocessing.normalize.html TrainOnlyWrapper(NumpyShapePrinter(custom_message="特征联合后,分类前的形状")), # 形状:[batch_size, remade_features] ChooseOneStepOf([ decision_tree_classifier, # extra_tree_classifier, # TODO # ridge_classifier, # TODO logistic_regression, # random_forest_classifier # TODO ]), TrainOnlyWrapper(NumpyShapePrinter(custom_message="分类后的输出形状")), # 形状:[batch_size] Identity()])
然后进行AutoML:
from neuraxle.metaopt.auto_ml import AutoML, InMemoryHyperparamsRepository, validation_splitter, \ RandomSearchHyperparameterSelectionStrategyfrom neuraxle.metaopt.callbacks import ScoringCallbackfrom sklearn.metrics import accuracy_scoreauto_ml = AutoML( pipeline=pipeline, hyperparams_optimizer=RandomSearchHyperparameterSelectionStrategy(), validation_split_function=validation_splitter(test_size=0.20), scoring_callback=ScoringCallback(accuracy_score, higher_score_is_better=False), n_trials=7, epochs=1, hyperparams_repository=InMemoryHyperparamsRepository(cache_folder=cache_folder), refit_trial=True,)
这个示例也在Neuraxio的Clean Machine Learning培训中进行了研究: