How to use the ramp-engine.ramp_engine.aws.api.launch_train function in ramp-engine

To help you get started, we’ve selected a few ramp-engine examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github paris-saclay-cds / ramp-board / ramp-engine / ramp_engine / aws / aws_train.py View on Github external
assert submission_name.startswith('submission_')
            submission_id = int(submission_name[11:])
            submission = get_submission_by_id(config, submission_id)
            label = '{}_{}'.format(submission_id, submission.name)
            state = get_submission_state(config, submission_id)
            submissions_dir = os.path.split(submission.path)[0]
            if state == 'sent_to_training':
                exit_status = upload_submission(
                    conf_aws, instance_id, submission_name, submissions_dir)
                if exit_status != 0:
                    logger.error(
                        'Cannot upload submission "{}"'
                        ', an error occured'.format(label))
                    continue
                # start training HERE
                exit_status = launch_train(
                    conf_aws, instance_id, submission_name)
                if exit_status != 0:
                    logger.error(
                        'Cannot start training of submission "{}"'
                        ', an error occured.'.format(label))
                    continue
                set_submission_state(config, submission_id, 'training')
                _run_hook(config, HOOK_START_TRAINING, submission_id)

            elif state == 'training':
                # in any case (successful training or not)
                # download the log
                download_log(conf_aws, instance_id, submission_name)
                if _training_finished(conf_aws, instance_id, submission_name):
                    logger.info(
                        'Training of "{}" finished, checking '
github paris-saclay-cds / ramp-board / ramp-engine / ramp_engine / aws / worker.py View on Github external
def launch_submission(self):
        """Launch the submission.

        Basically, this runs ``ramp_test_submission`` inside the
        Amazon instance.
        """
        if self.status == 'running':
            raise RuntimeError("Cannot launch submission: one is already "
                               "started")
        exit_status = aws.launch_train(
            self.config, self.instance.id, self.submission)
        if exit_status != 0:
            logger.error(
                'Cannot start training of submission "{}"'
                ', an error occured.'.format(self.submission))
        else:
            self.status = 'running'
        return exit_status
github paris-saclay-cds / ramp-board / ramp-engine / ramp_engine / aws / aws_train.py View on Github external
def train_on_existing_ec2_instance(config, instance_id, submission_id):
    """
    Train a submission on a ready ec2 instance
    the steps followed by this function are the following:
        1) upload the submission code to the instance
        2) launch training in a screen
        3) wait until training is finished
        4) download the predictions
        5) download th log
        6) set the predictions in the database
        7) score the submission
    """
    conf_aws = config[AWS_CONFIG_SECTION]
    upload_submission(conf_aws, instance_id, submission_id)
    launch_train(conf_aws, instance_id, submission_id)
    set_submission_state(config, submission_id, 'training')
    _run_hook(config, HOOK_START_TRAINING, submission_id)
    _wait_until_train_finished(conf_aws, instance_id, submission_id)
    download_log(conf_aws, instance_id, submission_id)

    label = _get_submission_label_by_id(config, submission_id)
    submission = get_submission_by_id(config, submission_id)
    actual_nb_folds = get_event_nb_folds(config, submission.event.name)
    if _training_successful(conf_aws, instance_id, submission_id,
                            actual_nb_folds):
        logger.info('Training of "{}" was successful'.format(
            label, instance_id))
        if conf_aws[MEMORY_PROFILING_FIELD]:
            logger.info('Download max ram usage info of "{}"'.format(label))
            download_mprof_data(conf_aws, instance_id, submission_id)
            max_ram = _get_submission_max_ram(conf_aws, submission_id)