Neuraxle的RandomSearch()后继者

我已经将Neuraxle更新到最新版本（3.4）。

我注意到整个auto_ml.py文件被重新编写了。我查看了文档，但没有找到相关信息。在git上似乎RandomSearch()方法很久以前就被AutoML()方法替代了。然而，参数有所不同。

有人知道如何在最新的Neuraxle版本（3.4）中将Boston Housing示例管道引导到自动参数搜索吗？

import numpy as npfrom sklearn.cluster import KMeansfrom sklearn.datasets import load_bostonfrom sklearn.decomposition import PCA, FastICAfrom sklearn.ensemble import GradientBoostingRegressorfrom sklearn.linear_model import Ridgefrom sklearn.metrics import r2_scorefrom sklearn.model_selection import train_test_splitfrom sklearn.utils import shufflefrom neuraxle.hyperparams.distributions import RandInt, LogUniform, Booleanfrom neuraxle.hyperparams.space import HyperparameterSpacefrom neuraxle.metaopt.auto_ml import RandomSearchfrom neuraxle.metaopt.random import KFoldCrossValidationWrapperfrom neuraxle.pipeline import Pipelinefrom neuraxle.steps.numpy import NumpyTransposefrom neuraxle.steps.sklearn import SKLearnWrapperfrom neuraxle.union import AddFeatures, ModelStackingdef main():    boston = load_boston()    X, y = shuffle(boston.data, boston.target, random_state=13)    X = X.astype(np.float32)    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, shuffle=False)    # 注意，超参数空间在此处在定义管道时定义，但如果使用自定义类，也可以在类定义时设置，    # 或者也可以在声明管道后使用平面字典或嵌套字典定义。    p = Pipeline([        AddFeatures([            SKLearnWrapper(                PCA(n_components=2),                HyperparameterSpace({"n_components": RandInt(1, 3)})            ),            SKLearnWrapper(                FastICA(n_components=2),                HyperparameterSpace({"n_components": RandInt(1, 3)})            ),        ]),        ModelStacking([            SKLearnWrapper(                GradientBoostingRegressor(),                HyperparameterSpace({                    "n_estimators": RandInt(50, 600), "max_depth": RandInt(1, 10),                    "learning_rate": LogUniform(0.07, 0.7)                })            ),            SKLearnWrapper(                KMeans(),                HyperparameterSpace({"n_clusters": RandInt(5, 10)})            ),        ],            joiner=NumpyTranspose(),            judge=SKLearnWrapper(                Ridge(),                HyperparameterSpace({"alpha": LogUniform(0.7, 1.4), "fit_intercept": Boolean()})            ),        )    ])    print("在训练集上进行元拟合：")    p = p.meta_fit(X_train, y_train, metastep=RandomSearch(        n_iter=10,        higher_score_is_better=True,        validation_technique=KFoldCrossValidationWrapper(scoring_function=r2_score, k_fold=10)    ))    # 这是一种更“管道化”的替代方法：    # p = RandomSearch(    #     p,    #     n_iter=15,    #     higher_score_is_better=True,    #     validation_technique=KFoldCrossValidation(scoring_function=r2_score, k_fold=3)    # ).fit(X_train, y_train)    print("")    print("转换训练集和测试集：")    y_train_predicted = p.predict(X_train)    y_test_predicted = p.predict(X_test)    print("")    print("评估转换后的训练集：")    score_transform = r2_score(y_train_predicted, y_train)    print('R2回归得分:', score_transform)    print("")    print("评估转换后的测试集：")    score_test = r2_score(y_test_predicted, y_test)    print('R2回归得分:', score_test)if __name__ == "__main__":    main()

回答：

这是解决您问题的方案，这是一个尚未在文档网站上发布的新示例：

https://drive.google.com/drive/u/0/folders/12uzcNKU7n0EUyFzgitSt1wSaSvV4qJbs（从那里查看第二个编程Kata的解决方案）

来自上述链接的示例管道代码：

from neuraxle.base import Identityfrom neuraxle.steps.flow import TrainOnlyWrapper, ChooseOneStepOffrom neuraxle.steps.numpy import NumpyConcatenateInnerFeatures, NumpyShapePrinter, NumpyFlattenDatumfrom neuraxle.union import FeatureUnionpipeline = Pipeline([    TrainOnlyWrapper(NumpyShapePrinter(custom_message="输入形状在特征联合前")),    FeatureUnion([        Pipeline([            NumpyFFT(),            NumpyAbs(),            FeatureUnion([                NumpyFlattenDatum(),  # 从3D重塑到平面2D：除批量大小外展平数据                FFTPeakBinWithValue()  # 从3D FFT bin中提取2D特征            ], joiner=NumpyConcatenateInnerFeatures())        ]),        NumpyMean(),        NumpyMedian(),        NumpyMin(),        NumpyMax()    ], joiner=NumpyConcatenateInnerFeatures()),    # TODO，可选：在这里添加一些特征选择，供有动力的人使用：    #      https://scikit-learn.org/stable/modules/feature_selection.html    # TODO，可选：在这里添加归一化（如果使用其他分类器）    #      https://scikit-learn.org/stable/modules/generated/sklearn.preprocessing.normalize.html    TrainOnlyWrapper(NumpyShapePrinter(custom_message="特征联合后，分类前的形状")),    # 形状：[batch_size, remade_features]    ChooseOneStepOf([        decision_tree_classifier,        # extra_tree_classifier,  # TODO        # ridge_classifier,  # TODO        logistic_regression,        # random_forest_classifier  # TODO    ]),    TrainOnlyWrapper(NumpyShapePrinter(custom_message="分类后的输出形状")),    # 形状：[batch_size]    Identity()])

然后进行AutoML：

from neuraxle.metaopt.auto_ml import AutoML, InMemoryHyperparamsRepository, validation_splitter, \    RandomSearchHyperparameterSelectionStrategyfrom neuraxle.metaopt.callbacks import ScoringCallbackfrom sklearn.metrics import accuracy_scoreauto_ml = AutoML(    pipeline=pipeline,    hyperparams_optimizer=RandomSearchHyperparameterSelectionStrategy(),    validation_split_function=validation_splitter(test_size=0.20),    scoring_callback=ScoringCallback(accuracy_score, higher_score_is_better=False),    n_trials=7,    epochs=1,    hyperparams_repository=InMemoryHyperparamsRepository(cache_folder=cache_folder),    refit_trial=True,)

这个示例也在Neuraxio的Clean Machine Learning培训中进行了研究：

https://www.neuraxio.com/products/clean-machine-learning-training

学技术

Neuraxle的RandomSearch()后继者

发表回复取消回复

相关文章：

Related Posts

使用LSTM在Python中预测未来值

如何在gensim的word2vec模型中查找双词组的相似性

dask_xgboost.predict 可以工作但无法显示 – 数据必须是一维的

ML Tuning – Cross Validation in Spark

如何在React JS中使用fetch从REST API获取预测

如何分析ML.NET中多类分类预测得分数组？

发表回复 取消回复

发表回复取消回复