How to use the dsub.providers.google_base function in dsub

To help you get started, we’ve selected a few dsub examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github DataBiosphere / dsub / dsub / providers / google_v2.py View on Github external
self._job_descriptor.job_params,
            self._job_descriptor.task_descriptors[0].task_params, field)
        value.update({item.name: item.value for item in items})
    elif field == 'mounts':
      if self._job_descriptor:
        items = providers_util.get_job_and_task_param(
            self._job_descriptor.job_params,
            self._job_descriptor.task_descriptors[0].task_params, field)
        value = {item.name: item.value for item in items}
    elif field == 'create-time':
      ds = google_v2_operations.get_create_time(self._op)
      value = google_base.parse_rfc3339_utc_string(ds)
    elif field == 'start-time':
      ds = google_v2_operations.get_start_time(self._op)
      if ds:
        value = google_base.parse_rfc3339_utc_string(ds)
    elif field == 'end-time':
      ds = google_v2_operations.get_end_time(self._op)
      if ds:
        value = google_base.parse_rfc3339_utc_string(ds)
    elif field == 'status':
      value = self._operation_status()
    elif field == 'status-message':
      msg, action = self._operation_status_message()
      if msg.startswith('Execution failed:'):
        # msg may look something like
        # "Execution failed: action 2: pulling image..."
        # Emit the actual message ("pulling image...")
        msg = msg.split(': ', 2)[-1]
      value = msg
    elif field == 'status-detail':
      msg, action = self._operation_status_message()
github DataBiosphere / dsub / dsub / providers / google_v2.py View on Github external
# We are not using the documented default page size of 256,
    # nor allowing for the maximum page size of 2048 as larger page sizes
    # currently cause the operations.list() API to return an error:
    # HttpError 429 ... Resource has been exhausted (e.g. check quota).
    max_page_size = 128

    # Set the page size to the smallest (non-zero) size we can
    page_size = min(sz for sz in [page_size, max_page_size, max_tasks] if sz)

    # Execute operations.list() and return all of the dsub operations
    api = self._service.projects().operations().list(
        name='projects/{}/operations'.format(self._project),
        filter=ops_filter,
        pageToken=page_token,
        pageSize=page_size)
    google_base_api = google_base.Api(verbose)
    response = google_base_api.execute(api)

    return [
        GoogleOperation(op)
        for op in response.get('operations', [])
        if google_v2_operations.is_dsub_operation(op)
    ], response.get('nextPageToken')
github DataBiosphere / dsub / dsub / providers / google.py View on Github external
def prepare_job_metadata(self, script, job_name, user_id, create_time):
    """Returns a dictionary of metadata fields for the job."""
    return google_base.prepare_job_metadata(script, job_name, user_id,
                                            create_time)
github DataBiosphere / dsub / dsub / providers / google_v2.py View on Github external
job_resources = task_view.job_resources
    task_metadata = task_view.task_descriptors[0].task_metadata
    task_params = task_view.task_descriptors[0].task_params
    task_resources = task_view.task_descriptors[0].task_resources

    # Set up VM-specific variables
    mnt_datadisk = google_v2_pipelines.build_mount(
        disk=_DATA_DISK_NAME,
        path=providers_util.DATA_MOUNT_POINT,
        read_only=False)
    scopes = job_resources.scopes or google_base.DEFAULT_SCOPES

    # Set up the task labels
    labels = {
        label.name: label.value if label.value else '' for label in
        google_base.build_pipeline_labels(job_metadata, task_metadata)
        | job_params['labels'] | task_params['labels']
    }

    # Set local variables for the core pipeline values
    script = task_view.job_metadata['script']
    user_project = task_view.job_metadata['user-project'] or ''

    envs = job_params['envs'] | task_params['envs']
    inputs = job_params['inputs'] | task_params['inputs']
    outputs = job_params['outputs'] | task_params['outputs']
    mounts = job_params['mounts']
    gcs_mounts = param_util.get_gcs_mounts(mounts)

    persistent_disk_mount_params = param_util.get_persistent_disk_mounts(mounts)

    persistent_disks = [
github DataBiosphere / dsub / dsub / providers / google.py View on Github external
page_token = None
    more_operations = True
    documented_default_page_size = 256
    documented_max_page_size = 2048

    if not page_size:
      page_size = documented_default_page_size
    page_size = min(page_size, documented_max_page_size)

    while more_operations:
      api = service.operations().list(
          name='operations',
          filter=ops_filter,
          pageToken=page_token,
          pageSize=page_size)
      google_base_api = google_base.Api(verbose)
      response = google_base_api.execute(api)

      ops = response.get('operations', [])
      for op in ops:
        if cls.is_dsub_operation(op):
          yield GoogleOperation(op)

      page_token = response.get('nextPageToken')
      more_operations = bool(page_token)
github DataBiosphere / dsub / dsub / providers / google_v2.py View on Github external
def _build_pipeline_request(self, task_view):
    """Returns a Pipeline objects for the task."""
    job_metadata = task_view.job_metadata
    job_params = task_view.job_params
    job_resources = task_view.job_resources
    task_metadata = task_view.task_descriptors[0].task_metadata
    task_params = task_view.task_descriptors[0].task_params
    task_resources = task_view.task_descriptors[0].task_resources

    # Set up VM-specific variables
    mnt_datadisk = google_v2_pipelines.build_mount(
        disk=_DATA_DISK_NAME,
        path=providers_util.DATA_MOUNT_POINT,
        read_only=False)
    scopes = job_resources.scopes or google_base.DEFAULT_SCOPES

    # Set up the task labels
    labels = {
        label.name: label.value if label.value else '' for label in
        google_base.build_pipeline_labels(job_metadata, task_metadata)
        | job_params['labels'] | task_params['labels']
    }

    # Set local variables for the core pipeline values
    script = task_view.job_metadata['script']
    user_project = task_view.job_metadata['user-project'] or ''

    envs = job_params['envs'] | task_params['envs']
    inputs = job_params['inputs'] | task_params['inputs']
    outputs = job_params['outputs'] | task_params['outputs']
    mounts = job_params['mounts']
github DataBiosphere / dsub / dsub / providers / google.py View on Github external
A list of tasks canceled and a list of error messages.
    """
    # Look up the job(s)
    tasks = list(
        self.lookup_job_tasks(
            {'RUNNING'},
            user_ids=user_ids,
            job_ids=job_ids,
            task_ids=task_ids,
            labels=labels,
            create_time_min=create_time_min,
            create_time_max=create_time_max))

    print('Found %d tasks to delete.' % len(tasks))

    return google_base.cancel(self._service.new_batch_http_request,
                              self._service.operations().cancel, tasks)
github DataBiosphere / dsub / dsub / providers / google_v2.py View on Github external
job_resources.min_cores, job_resources.min_ram)
    else:
      machine_type = job_model.DEFAULT_MACHINE_TYPE
    accelerators = None
    if job_resources.accelerator_type:
      accelerators = [
          google_v2_pipelines.build_accelerator(job_resources.accelerator_type,
                                                job_resources.accelerator_count)
      ]
    service_account = google_v2_pipelines.build_service_account(
        job_resources.service_account or 'default', scopes)

    resources = google_v2_pipelines.build_resources(
        self._project,
        job_resources.regions,
        google_base.get_zones(job_resources.zones),
        google_v2_pipelines.build_machine(
            network=network,
            machine_type=machine_type,
            # Preemptible comes from task_resources because it may change
            # on retry attempts
            preemptible=task_resources.preemptible,
            service_account=service_account,
            boot_disk_size_gb=job_resources.boot_disk_size,
            disks=disks,
            accelerators=accelerators,
            nvidia_driver_version=job_resources.nvidia_driver_version,
            labels=labels,
            cpu_platform=job_resources.cpu_platform,
            enable_stackdriver_monitoring=job_resources
            .enable_stackdriver_monitoring),
    )
github DataBiosphere / dsub / dsub / providers / google.py View on Github external
def _build_pipeline_request(self, task_view):
    """Returns a Pipeline objects for the job."""
    job_metadata = task_view.job_metadata
    job_params = task_view.job_params
    job_resources = task_view.job_resources
    task_metadata = task_view.task_descriptors[0].task_metadata
    task_params = task_view.task_descriptors[0].task_params
    task_resources = task_view.task_descriptors[0].task_resources

    script = task_view.job_metadata['script']

    reserved_labels = google_base.build_pipeline_labels(
        job_metadata, task_metadata, task_id_pattern='task-%d')

    # Build the ephemeralPipeline for this job.
    # The ephemeralPipeline definition changes for each job because file
    # parameters localCopy.path changes based on the remote_uri.
    pipeline = _Pipelines.build_pipeline(
        project=self._project,
        zones=job_resources.zones,
        min_cores=job_resources.min_cores,
        min_ram=job_resources.min_ram,
        disk_size=job_resources.disk_size,
        boot_disk_size=job_resources.boot_disk_size,
        preemptible=task_resources.preemptible,
        accelerator_type=job_resources.accelerator_type,
        accelerator_count=job_resources.accelerator_count,
        image=job_resources.image,
github DataBiosphere / dsub / dsub / providers / google_v2.py View on Github external
# where more than one value cannot be true at the same time. For example,
    # an operation cannot have a status of both RUNNING and CANCELED.
    #
    # The second set of filters, labeled here as AND filters are elements
    # where more than one value can be true. For example,
    # an operation can have a label with key1=value2 AND key2=value2.

    # Translate the semantic requests into a v2alpha1-specific filter.

    # 'OR' filtering arguments.
    status_filters = self._get_status_filters(statuses)
    user_id_filters = self._get_label_filters(
        'user-id', google_base.prepare_query_label_value(user_ids))
    job_id_filters = self._get_label_filters('job-id', job_ids)
    job_name_filters = self._get_label_filters(
        'job-name', google_base.prepare_query_label_value(job_names))
    task_id_filters = self._get_label_filters('task-id', task_ids)
    task_attempt_filters = self._get_label_filters('task-attempt',
                                                   task_attempts)
    # 'AND' filtering arguments.
    label_filters = self._get_labels_filters(labels)
    create_time_filters = self._get_create_time_filters(create_time_min,
                                                        create_time_max)

    if job_id_filters and job_name_filters:
      raise ValueError(
          'Filtering by both job IDs and job names is not supported')

    # Now build up the full text filter.
    # OR all of the OR filter arguments together.
    # AND all of the AND arguments together.
    or_arguments = []