Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
# if 'submission_id' not in tags: # no longer added to tags
# continue
if tags.get('event_name') != event_name:
continue
if 'train_loop' not in tags:
continue
# Process each instance
submission_name = tags['Name']
assert submission_name.startswith('submission_')
submission_id = int(submission_name[11:])
submission = get_submission_by_id(config, submission_id)
label = '{}_{}'.format(submission_id, submission.name)
state = get_submission_state(config, submission_id)
submissions_dir = os.path.split(submission.path)[0]
if state == 'sent_to_training':
exit_status = upload_submission(
conf_aws, instance_id, submission_name, submissions_dir)
if exit_status != 0:
logger.error(
'Cannot upload submission "{}"'
', an error occured'.format(label))
continue
# start training HERE
exit_status = launch_train(
conf_aws, instance_id, submission_name)
if exit_status != 0:
logger.error(
'Cannot start training of submission "{}"'
', an error occured.'.format(label))
continue
set_submission_state(config, submission_id, 'training')
_run_hook(config, HOOK_START_TRAINING, submission_id)
This will launch an instance on Amazon, and copy the submission
to the instance.
"""
# sanity check for the configuration variable
for required_param in ('instance_type', 'access_key_id'):
self._check_config_name(self.config, required_param)
logger.info("Setting up AWSWorker for submission '{}'".format(
self.submission))
self.instance, = aws.launch_ec2_instances(self.config)
logger.info("Instance launched for submission '{}'".format(
self.submission))
for _ in range(5):
# try uploading the submission a few times, as this regularly fails
exit_status = aws.upload_submission(
self.config, self.instance.id, self.submission,
self.submissions_path)
if exit_status == 0:
break
else:
logger.info("Uploading submission failed, retrying ...")
if exit_status != 0:
logger.error(
'Cannot upload submission "{}"'
', an error occured'.format(self.submission))
# TODO do something with this status (no launching needs to be
# done)
else:
logger.info("Uploaded submission '{}'".format(self.submission))
self.status = 'setup'
def train_on_existing_ec2_instance(config, instance_id, submission_id):
"""
Train a submission on a ready ec2 instance
the steps followed by this function are the following:
1) upload the submission code to the instance
2) launch training in a screen
3) wait until training is finished
4) download the predictions
5) download th log
6) set the predictions in the database
7) score the submission
"""
conf_aws = config[AWS_CONFIG_SECTION]
upload_submission(conf_aws, instance_id, submission_id)
launch_train(conf_aws, instance_id, submission_id)
set_submission_state(config, submission_id, 'training')
_run_hook(config, HOOK_START_TRAINING, submission_id)
_wait_until_train_finished(conf_aws, instance_id, submission_id)
download_log(conf_aws, instance_id, submission_id)
label = _get_submission_label_by_id(config, submission_id)
submission = get_submission_by_id(config, submission_id)
actual_nb_folds = get_event_nb_folds(config, submission.event.name)
if _training_successful(conf_aws, instance_id, submission_id,
actual_nb_folds):
logger.info('Training of "{}" was successful'.format(
label, instance_id))
if conf_aws[MEMORY_PROFILING_FIELD]:
logger.info('Download max ram usage info of "{}"'.format(label))
download_mprof_data(conf_aws, instance_id, submission_id)