Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def test_pipeline_tosklearn():
import sklearn.pipeline
the_step = SomeStep()
step_to_check = the_step.tosklearn()
p = Pipeline([
("a", SomeStep()),
("b", SKLearnWrapper(sklearn.pipeline.Pipeline([
("a", sklearn.pipeline.Pipeline([
('z', step_to_check)
])),
("b", SomeStep().tosklearn()),
("c", SomeStep().tosklearn())
]), return_all_sklearn_default_params_on_get=True)),
("c", SomeStep())
])
# assert False
p.set_hyperparams({
"b": {
"a__z__learning_rate": 7,
"b__learning_rate": 9
}
})
def test_pipeline_tosklearn():
import sklearn.pipeline
the_step = SomeStep()
step_to_check = the_step.tosklearn()
p = Pipeline([
("a", SomeStep()),
("b", SKLearnWrapper(sklearn.pipeline.Pipeline([
("a", sklearn.pipeline.Pipeline([
('z', step_to_check)
])),
("b", SomeStep().tosklearn()),
("c", SomeStep().tosklearn())
]), return_all_sklearn_default_params_on_get=True)),
("c", SomeStep())
])
# assert False
p.set_hyperparams({
"b": {
"a__z__learning_rate": 7,
"b__learning_rate": 9
}
})
def main():
boston = load_boston()
X, y = shuffle(boston.data, boston.target, random_state=13)
X = X.astype(np.float32)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, shuffle=False)
# Note that the hyperparameter spaces are defined here during the pipeline definition, but it could be already set
# within the classes ar their definition if using custom classes, or also it could be defined after declaring the
# pipeline using a flat dict or a nested dict.
p = Pipeline([
AddFeatures([
SKLearnWrapper(
PCA(n_components=2),
HyperparameterSpace({"n_components": RandInt(1, 3)})
),
SKLearnWrapper(
FastICA(n_components=2),
HyperparameterSpace({"n_components": RandInt(1, 3)})
),
]),
ModelStacking([
SKLearnWrapper(
GradientBoostingRegressor(),
HyperparameterSpace({
"n_estimators": RandInt(50, 600), "max_depth": RandInt(1, 10),
"learning_rate": LogUniform(0.07, 0.7)
})
),
boston = load_boston()
X, y = shuffle(boston.data, boston.target, random_state=13)
X = X.astype(np.float32)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, shuffle=False)
# Note that the hyperparameter spaces are defined here during the pipeline definition, but it could be already set
# within the classes ar their definition if using custom classes, or also it could be defined after declaring the
# pipeline using a flat dict or a nested dict.
p = Pipeline([
AddFeatures([
SKLearnWrapper(
PCA(n_components=2),
HyperparameterSpace({"n_components": RandInt(1, 3)})
),
SKLearnWrapper(
FastICA(n_components=2),
HyperparameterSpace({"n_components": RandInt(1, 3)})
),
]),
ModelStacking([
SKLearnWrapper(
GradientBoostingRegressor(),
HyperparameterSpace({
"n_estimators": RandInt(50, 600), "max_depth": RandInt(1, 10),
"learning_rate": LogUniform(0.07, 0.7)
})
),
SKLearnWrapper(
KMeans(),
HyperparameterSpace({"n_clusters": RandInt(5, 10)})
),
]),
ModelStacking([
SKLearnWrapper(
GradientBoostingRegressor(),
HyperparameterSpace({
"n_estimators": RandInt(50, 600), "max_depth": RandInt(1, 10),
"learning_rate": LogUniform(0.07, 0.7)
})
),
SKLearnWrapper(
KMeans(),
HyperparameterSpace({"n_clusters": RandInt(5, 10)})
),
],
joiner=NumpyTranspose(),
judge=SKLearnWrapper(
Ridge(),
HyperparameterSpace({"alpha": LogUniform(0.7, 1.4), "fit_intercept": Boolean()})
),
)
])
print("Meta-fitting on train:")
p = p.meta_fit(X_train, y_train, metastep=RandomSearch(
n_iter=10,
higher_score_is_better=True,
validation_technique=KFoldCrossValidationWrapper(scoring_function=r2_score, k_fold=10)
))
# Here is an alternative way to do it, more "pipeliney":
# p = RandomSearch(
# p,
# n_iter=15,
# higher_score_is_better=True,
# within the classes ar their definition if using custom classes, or also it could be defined after declaring the
# pipeline using a flat dict or a nested dict.
p = Pipeline([
AddFeatures([
SKLearnWrapper(
PCA(n_components=2),
HyperparameterSpace({"n_components": RandInt(1, 3)})
),
SKLearnWrapper(
FastICA(n_components=2),
HyperparameterSpace({"n_components": RandInt(1, 3)})
),
]),
ModelStacking([
SKLearnWrapper(
GradientBoostingRegressor(),
HyperparameterSpace({
"n_estimators": RandInt(50, 600), "max_depth": RandInt(1, 10),
"learning_rate": LogUniform(0.07, 0.7)
})
),
SKLearnWrapper(
KMeans(),
HyperparameterSpace({"n_clusters": RandInt(5, 10)})
),
],
joiner=NumpyTranspose(),
judge=SKLearnWrapper(
Ridge(),
HyperparameterSpace({"alpha": LogUniform(0.7, 1.4), "fit_intercept": Boolean()})
),
def main():
p = Pipeline([
('step1', MultiplyByN()),
('step2', MultiplyByN()),
Pipeline([
Identity(),
Identity(),
SKLearnWrapper(PCA(n_components=4))
])
])
p.set_hyperparams_space(HyperparameterSpace({
'step1__multiply_by': RandInt(42, 50),
'step2__multiply_by': RandInt(-10, 0),
'Pipeline__SKLearnWrapper_PCA__n_components': RandInt(2, 3)
}))
samples = p.get_hyperparams_space().rvs()
p.set_hyperparams(samples)
samples = p.get_hyperparams()
assert 42 <= samples['step1__multiply_by'] <= 50
assert -10 <= samples['step2__multiply_by'] <= 0
assert samples['Pipeline__SKLearnWrapper_PCA__n_components'] in [2, 3]