How to use the dsub.lib.job_model.TaskDescriptor function in dsub

To help you get started, we’ve selected a few dsub examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github DataBiosphere / dsub / test / integration / e2e_python_api.py View on Github external
output_file_param_util = param_util.OutputFileParamUtil('output')
  output_data = set()
  for (recursive, items) in ((False, outputs.items()),
                             (True, outputs_recursive.items())):
    for (name, value) in items:
      name = output_file_param_util.get_variable_name(name)
      output_data.add(output_file_param_util.make_param(name, value, recursive))

  job_params = {
      'envs': env_data,
      'inputs': input_data,
      'outputs': output_data,
      'labels': label_data,
  }
  task_descriptors = [
      job_model.TaskDescriptor({
          'task-id': None
      }, {
          'envs': set(),
          'labels': set(),
          'inputs': set(),
          'outputs': set(),
      }, job_model.Resources())
  ]

  return dsub.run(
      get_dsub_provider(),
      job_resources,
      job_params,
      task_descriptors,
      name=job_name,
      command=command,
github DataBiosphere / dsub / dsub / lib / job_model.py View on Github external
task_params['labels'] = cls._label_params_from_dict(
          task.get('labels', {}))
      task_params['envs'] = cls._env_params_from_dict(task.get('envs', {}))
      task_params['inputs'] = cls._input_file_params_from_dict(
          task.get('inputs', {}), False)
      task_params['input-recursives'] = cls._input_file_params_from_dict(
          task.get('input-recursives', {}), True)
      task_params['outputs'] = cls._output_file_params_from_dict(
          task.get('outputs', {}), False)
      task_params['output-recursives'] = cls._output_file_params_from_dict(
          task.get('output-recursives', {}), True)

      task_resources = Resources(logging_path=task.get('logging-path'))

      task_descriptors.append(
          TaskDescriptor(task_metadata, task_params, task_resources))

    return JobDescriptor(job_metadata, job_params, job_resources,
                         task_descriptors)
github DataBiosphere / dsub / dsub / commands / dsub.py View on Github external
def _retry_task(provider, job_descriptor, task_id, task_attempt):
  """Retry task_id (numeric id) assigning it task_attempt."""
  td_orig = job_descriptor.find_task_descriptor(task_id)

  new_task_descriptors = [
      job_model.TaskDescriptor({
          'task-id': task_id,
          'task-attempt': task_attempt
      }, td_orig.task_params, td_orig.task_resources)
  ]

  # Update the logging path and preemptible field.
  _resolve_task_resources(job_descriptor.job_metadata,
                          job_descriptor.job_resources, new_task_descriptors)

  provider.submit_job(
      job_model.JobDescriptor(
          job_descriptor.job_metadata, job_descriptor.job_params,
          job_descriptor.job_resources, new_task_descriptors), False)
github DataBiosphere / dsub / dsub / commands / dsub.py View on Github external
args.output_recursive, args.mount, input_file_param_util,
      output_file_param_util, mount_param_util)
  # If --tasks is on the command-line, then get task-specific data
  if args.tasks:
    task_descriptors = param_util.tasks_file_to_task_descriptors(
        args.tasks, args.retries, input_file_param_util, output_file_param_util)

    # Validate job data + task data
    _validate_job_and_task_arguments(job_params, task_descriptors)
  else:
    # Create the implicit task
    task_metadata = {'task-id': None}
    if args.retries:
      task_metadata['task-attempt'] = 1
    task_descriptors = [
        job_model.TaskDescriptor(task_metadata, {
            'labels': set(),
            'envs': set(),
            'inputs': set(),
            'outputs': set()
        }, job_model.Resources())
    ]

  return run(
      provider_base.get_provider(args, resources),
      _get_job_resources(args),
      job_params,
      task_descriptors,
      name=args.name,
      dry_run=args.dry_run,
      command=args.command,
      script=args.script,
github DataBiosphere / dsub / dsub / lib / param_util.py View on Github external
if isinstance(param, job_model.EnvParam):
        envs.add(job_model.EnvParam(name, row[i]))

      elif isinstance(param, job_model.LabelParam):
        labels.add(job_model.LabelParam(name, row[i]))

      elif isinstance(param, job_model.InputFileParam):
        inputs.add(
            input_file_param_util.make_param(name, row[i], param.recursive))

      elif isinstance(param, job_model.OutputFileParam):
        outputs.add(
            output_file_param_util.make_param(name, row[i], param.recursive))

    task_descriptors.append(
        job_model.TaskDescriptor({
            'task-id': task_id,
            'task-attempt': 1 if retries else None
        }, {
            'labels': labels,
            'envs': envs,
            'inputs': inputs,
            'outputs': outputs
        }, job_model.Resources()))

  # Ensure that there are jobs to execute (and not just a header)
  if not task_descriptors:
    raise ValueError('No tasks added from %s' % path)

  return task_descriptors
github DataBiosphere / dsub / dsub / lib / job_model.py View on Github external
params['outputs'] = cls._output_file_params_from_dict(
        job.get('outputs', {}), False)

    if job.get('task-id') is None:
      job_params = params
      task_metadata = {'task-id': None}
      task_params = {}
    else:
      job_params = {}
      task_metadata = {'task-id': str(job.get('task-id'))}
      task_params = params

    task_resources = Resources(logging_path=job.get('logging'))

    task_descriptors = [
        TaskDescriptor.get_complete_descriptor(task_metadata, task_params,
                                               task_resources)
    ]

    return JobDescriptor.get_complete_descriptor(
        job_metadata, job_params, job_resources, task_descriptors)