Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def test_local_run_swapped_parameter_order_model(self):
# construct sci-kit learn classifier
clf = Pipeline(steps=[('imputer', SimpleImputer(strategy='median')),
('estimator', RandomForestClassifier())])
# download task
task = openml.tasks.get_task(7)
# invoke OpenML run
run = openml.runs.run_model_on_task(
task, clf,
avoid_duplicate_runs=False,
upload_flow=False,
)
self._test_local_evaluations(run)
def test_to_from_filesystem_search(self):
model = Pipeline([
('imputer', SimpleImputer(strategy='mean')),
('classifier', DecisionTreeClassifier(max_depth=1)),
])
model = GridSearchCV(
estimator=model,
param_grid={
"classifier__max_depth": [1, 2, 3, 4, 5],
"imputer__strategy": ['mean', 'median'],
}
)
task = openml.tasks.get_task(119)
run = openml.runs.run_model_on_task(
model=model,
task=task,
add_local_measures=False,
avoid_duplicate_runs=False,
)
cache_path = os.path.join(
self.workdir,
'runs',
str(random.getrandbits(128)),
)
run.to_filesystem(cache_path)
run_prime = openml.runs.OpenMLRun.from_filesystem(cache_path)
self._test_run_obj_equals(run, run_prime)
run_prime.publish()
('Imputer', impute.SimpleImputer(strategy='most_frequent')),
(
'Encoder',
preprocessing.OneHotEncoder(
sparse=False, handle_unknown='ignore',
)
),
]),
nominal_feature_indices,
),
]),
),
('Classifier', ensemble.RandomForestClassifier(n_estimators=10))
])
run = openml.runs.run_model_on_task(pipe, task, avoid_duplicate_runs=False)
myrun = run.publish()
print("Uploaded to http://test.openml.org/r/" + str(myrun.run_id))
###############################################################################
# Running flows on tasks offline for later upload
# ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
# For those scenarios where there is no access to internet, it is possible to run
# a model on a task without uploading results or flows to the server immediately.
# To perform the following line offline, it is required to have been called before
# such that the task is cached on the local openml cache directory:
task = openml.tasks.get_task(6)
# The following lines can then be executed offline:
run = openml.runs.run_model_on_task(
pipe,
# Uncomment and set your OpenML key. Don't share your key with others.
# openml.config.apikey = 'YOURKEY'
# Define a scikit-learn pipeline
clf = pipeline.Pipeline(
steps=[
('imputer', impute.SimpleImputer()),
('estimator', tree.DecisionTreeClassifier())
]
)
############################################################################
# Download the OpenML task for the german credit card dataset.
task = openml.tasks.get_task(97)
############################################################################
# Run the scikit-learn model on the task (requires an API key).
run = openml.runs.run_model_on_task(clf, task)
# Publish the experiment on OpenML (optional, requires an API key).
run.publish()
print('URL for run: %s/run/%d' % (openml.config.server, run.run_id))
############################################################################
openml.config.stop_using_configuration_for_example()
# 'MYDIR' with the path to the cache directory. By default, OpenML
# will use **~/.openml/cache** as the cache directory.
# * Run the code below, replacing 'YOURDIR' with the path to the cache directory.
# Uncomment and set your OpenML cache directory
# import os
# openml.config.cache_directory = os.path.expanduser('YOURDIR')
############################################################################
# Simple Example
# ^^^^^^^^^^^^^^
# Download the OpenML task for the eeg-eye-state.
task = openml.tasks.get_task(403)
data = openml.datasets.get_dataset(task.dataset_id)
clf = neighbors.KNeighborsClassifier(n_neighbors=5)
run = openml.runs.run_model_on_task(clf, task, avoid_duplicate_runs=False)
# Publish the experiment on OpenML (optional, requires an API key).
# For this tutorial, our configuration publishes to the test server
# as to not crowd the main server with runs created by examples.
myrun = run.publish()
print("kNN on %s: http://test.openml.org/r/%d" % (data.name, myrun.run_id))
############################################################################
openml.config.stop_using_configuration_for_example()
openml.config.start_using_configuration_for_example()
# Very simple classifier which ignores the feature type
clf = sklearn.pipeline.Pipeline(steps=[
('imputer', sklearn.impute.SimpleImputer()),
('estimator', sklearn.tree.DecisionTreeClassifier(max_depth=5)),
])
suite = openml.study.get_suite(1)
# We'll create a study with one run on three random datasets each
tasks = np.random.choice(suite.tasks, size=3, replace=False)
run_ids = []
for task_id in tasks:
task = openml.tasks.get_task(task_id)
run = openml.runs.run_model_on_task(clf, task)
run.publish()
run_ids.append(run.run_id)
# The study needs a machine-readable and unique alias. To obtain this,
# we simply generate a random uuid.
alias = uuid.uuid4().hex
new_study = openml.study.create_study(
name='Test-Study',
description='Test study for the Python tutorial on studies',
run_ids=run_ids,
alias=alias,
benchmark_suite=suite.study_id,
)
new_study.publish()
print(new_study)
# 'MYDIR' with the path to the cache directory. By default, OpenML
# will use **~/.openml/cache** as the cache directory.
# * Run the code below, replacing 'YOURDIR' with the path to the cache directory.
# Uncomment and set your OpenML cache directory
# import os
# openml.config.cache_directory = os.path.expanduser('YOURDIR')
############################################################################
# Simple Example
# ^^^^^^^^^^^^^^
# Download the OpenML task for the eeg-eye-state.
task = openml.tasks.get_task(403)
data = openml.datasets.get_dataset(task.dataset_id)
clf = neighbors.KNeighborsClassifier(n_neighbors=5)
run = openml.runs.run_model_on_task(clf, task, avoid_duplicate_runs=False)
# Publish the experiment on OpenML (optional, requires an API key).
# For this tutorial, our configuration publishes to the test server
# as to not pollute the main server.
myrun = run.publish()
print("kNN on %s: http://test.openml.org/r/%d" % (data.name, myrun.run_id))
# Let's change some hyperparameters. Of course, in any good application we
# would tune them using, e.g., Random Search or Bayesian Optimization, but for
# the purpose of this tutorial we set them to some specific values that might
# or might not be optimal
hyperparameters_original = {
'simpleimputer__strategy': 'median',
'randomforestclassifier__criterion': 'entropy',
'randomforestclassifier__max_features': 0.2,
'randomforestclassifier__min_samples_leaf': 1,
'randomforestclassifier__n_estimators': 16,
'randomforestclassifier__random_state': 42,
}
model_original.set_params(**hyperparameters_original)
# solve the task and upload the result (this implicitly creates the flow)
run = openml.runs.run_model_on_task(
model_original,
task,
avoid_duplicate_runs=False)
run_original = run.publish() # this implicitly uploads the flow
###############################################################################
# 2) Download the flow and solve the same task again.
###############################################################################
# obtain setup id (note that the setup id is assigned by the OpenML server -
# therefore it was not yet available in our local copy of the run)
run_downloaded = openml.runs.get_run(run_original.run_id)
setup_id = run_downloaded.setup_id
# after this, we can easily reinstantiate the model
model_duplicate = openml.setups.initialize_model(setup_id)
# Uncomment and set your OpenML key. Don't share your key with others.
# openml.config.apikey = 'YOURKEY'
# Define a scikit-learn pipeline
clf = pipeline.Pipeline(
steps=[
('imputer', impute.SimpleImputer()),
('estimator', tree.DecisionTreeClassifier())
]
)
############################################################################
# Download the OpenML task for the german credit card dataset.
task = openml.tasks.get_task(97)
############################################################################
# Run the scikit-learn model on the task (requires an API key).
run = openml.runs.run_model_on_task(clf, task)
# Publish the experiment on OpenML (optional, requires an API key).
run.publish()
print('URL for run: %s/run/%d' % (openml.config.server, run.run_id))
############################################################################
openml.config.stop_using_configuration_for_example()
###############################################################################
# 2) Download the flow and solve the same task again.
###############################################################################
# obtain setup id (note that the setup id is assigned by the OpenML server -
# therefore it was not yet available in our local copy of the run)
run_downloaded = openml.runs.get_run(run_original.run_id)
setup_id = run_downloaded.setup_id
# after this, we can easily reinstantiate the model
model_duplicate = openml.setups.initialize_model(setup_id)
# it will automatically have all the hyperparameters set
# and run the task again
run_duplicate = openml.runs.run_model_on_task(
model_duplicate, task, avoid_duplicate_runs=False)
###############################################################################
# 3) We will verify that the obtained results are exactly the same.
###############################################################################
# the run has stored all predictions in the field data content
np.testing.assert_array_equal(run_original.data_content,
run_duplicate.data_content)
###############################################################################
openml.config.stop_using_configuration_for_example()