How to use the joblib.delayed function in joblib

To help you get started, we’ve selected a few joblib examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github picrust / picrust2 / picrust2 / pathway_pipeline.py View on Github external
for line in in_map:
            line = line.rstrip()
            line_split = line.split()
            func_map[line_split[0]] += line_split[1].split(",")

    # Get set of all unique functions.
    functions = list(set(in_df['function']))

    chunk_size = int(len(functions) / proc) + 1

    function_chunks = [functions[i:i + chunk_size]
                       for i in range(0, len(functions), chunk_size)]

    # Loop over all functions in parallel and return pandas dataframe for each
    # function with regrouped ids (which will are combined together afterwards).
    raw_new_ids_dfs = Parallel(n_jobs=proc)(delayed(
                                    convert_func_ids)(func_subset,
                                                      in_df,
                                                      func_map)
                                    for func_subset in function_chunks)

    # Combine all returned DFs into a single DF.
    raw_new_ids_combined = pd.concat(raw_new_ids_dfs, sort=False)

    if in_format == "contrib":
        regrouped_table = pd.pivot_table(raw_new_ids_combined,
                                         index=['sample', 'function', 'taxon',
                                                'taxon_abun',
                                                'taxon_rel_abun'],
                                         aggfunc=np.sum)

    elif in_format == "strat":
github powervr-graphics / Native_SDK / framework / scripts / equi_to_cube.py View on Github external
else:
            raise "Unknown format"
    else:
        format = imgIn.dtype.char
        if format == 'f2':
            args.format = 'f16'
        else:
            format = 'f4'
            args.format = 'f32'

    if imgIn.dtype.char != format:
        print "Converting to ", args.format, "(",format,")"
    imgIn = imgIn.astype(format)
            
    print (args.output_file.rsplit('.', 1) if args.output_file else args.input_file.rsplit('.',1))
    result = Parallel(n_jobs=8)(delayed(convertFace)(args,imgIn,i) for i in xrange(6))
    
    if args.flatten_cubemap:
        components = args.output_file.rsplit('.', 1) if args.output_file else args.input_file.rsplit('.',1)
        filename = components[0] + "_" + str(imgIn.shape[0]/2) + "_full-cubemap_" +args.format+".raw"
        result_arrays = result.sort(key=lambda x: FACE_REMAP_ORDER[x[0]])
        numpy.concatenate(tuple([x[1] for x in result])).tofile(open(filename,"wb"))
    
    print "Time elapsed: ",(time.time() - start)
    print "Saved processed image as: " + filename
    print '== PVRTexTool Wrap Raw Data settings =='
    print '= Width x Height : %d x %d' % (imgIn.shape[0]/2 , imgIn.shape[0]/2)
    print '= Variable type  : "Signed Floating Point"'
    print '= Colour space   : Linear RGB'
    print '= Faces          : 6'
    print '= MIP-Map levels : 1'
    print '= Array Members  : 1'
github urinieto / msaf / algorithms / fmc2d / run_segmenter.py View on Github external
def process(in_path, annot_beats=False, feature="mfcc", ds_name="*", n_jobs=4,
            xmeans=False, k=5):
    """Main process."""

    # Get relevant files
    jam_files = glob.glob(os.path.join(in_path, "annotations",
                                       "%s_*.jams" % ds_name))
    audio_files = glob.glob(os.path.join(in_path, "audio",
                                         "%s_*.[wm][ap][v3]" % ds_name))

    # Call in parallel
    Parallel(n_jobs=n_jobs)(delayed(process_track)(
        in_path, audio_file, jam_file, annot_beats, ds_name, xmeans, k)
        for audio_file, jam_file in zip(audio_files, jam_files))
github gao-lab / Cell_BLAST / Cell_BLAST / blast.py View on Github external
Number of parallel jobs to run.

        Returns
        -------
        filtered_hits
            Hit object containing remaining hits after filtering
        """
        if by == "pval":
            assert self.pval is not None
            by = Hits.FILTER_BY_PVAL
        else:  # by == "dist"
            by = Hits.FILTER_BY_DIST
        if self.dist[0].ndim == 1:
            hits, dist, pval = [_ for _ in zip(*joblib.Parallel(
                n_jobs=n_jobs, backend="threading"
            )(joblib.delayed(self._filter_reconciled_hits)(
                _hits, _dist, _pval, by, cutoff
            ) for _hits, _dist, _pval in zip(
                self.hits, self.dist, self.pval
            )))]
        else:
            hits, dist, pval = [_ for _ in zip(*joblib.Parallel(
                n_jobs=n_jobs, backend="threading"
            )(joblib.delayed(self._filter_hits)(
                _hits, _dist, _pval, by, cutoff, model_tolerance
            ) for _hits, _dist, _pval in zip(
                self.hits, self.dist, self.pval
            )))]
        return Hits(self.blast, hits, dist, pval, self.query)
github DiamondLightSource / DosNa / dosna / dataset.py View on Github external
def map(self, new_name, func, *args, **kwargs):
        dsout = self.clone(new_name)
        chunks = self.chunks

        if self.njobs is None or self.njobs == 1:
            for idx in np.ndindex(*chunks):
                slices = self._lchunk_bounds_slices(idx)
                result = func(self._get_chunk_data(idx, slices=slices), *args, **kwargs)
                dsout._set_chunk_data(idx, result, slices=slices)
        else:
            Parallel(n_jobs=self.njobs, backend="threading")(
                delayed(_map_parallel)(self, dsout, func, idx, *args, **kwargs)
                for idx in np.ndindex(*chunks)
            )
        return dsout
github ClementPinard / unsupervised-depthnet / data / prepare_train_data.py View on Github external
get_depth=args.with_depth,
                                     get_pose=args.with_pose,
                                     depth_size_ratio=args.depth_size_ratio)

    if args.dataset_format == 'cityscapes':
        from cityscapes_loader import cityscapes_loader
        data_loader = cityscapes_loader(args.dataset_dir,
                                        img_height=args.height,
                                        img_width=args.width)

    print('Retrieving frames')
    if args.num_threads == 1:
        for scene in tqdm(data_loader.scenes):
            dump_example(args, scene)
    else:
        Parallel(n_jobs=args.num_threads)(delayed(dump_example)(args, scene) for scene in tqdm(data_loader.scenes))

    print('Generating train val lists')
    np.random.seed(8964)
    subfolders = args.dump_root.dirs()
    with open(args.dump_root / 'train.txt', 'w') as tf:
        with open(args.dump_root / 'val.txt', 'w') as vf:
            for s in tqdm(subfolders):
                if np.random.random() < 0.1:
                    vf.write('{}\n'.format(s.name))
                else:
                    tf.write('{}\n'.format(s.name))
                    if args.with_depth and args.no_train_gt:
                        for gt_file in s.files('*.npy'):
                            gt_file.remove_p()
github thanhdtran / RME / rec_eval.py View on Github external
def parallel_map_at_k(train_data, heldout_data, U, V, batch_users=2000, k=100, mu=None,
             vad_data=None, agg=np.nanmean, clear_invalid=False, n_jobs=16, cache=False):
    n_users = train_data.shape[0]
    res = Parallel(n_jobs=n_jobs)(delayed(MAP_at_k_batch)(train_data, heldout_data, U, V.T,
                                                          user_idx, k=k, mu=mu,
                                                          vad_data=vad_data, clear_invalid=clear_invalid, cache=cache)
                                  for user_idx in user_idx_generator(n_users, batch_users))
    map = np.hstack(res)
    # print map
    if callable(agg):
        return agg(map)
    return map
github GeoscienceAustralia / PyRate / pyrate / pyaps.py View on Github external
if params[cf.APS_METHOD] == 1:
        incidence_map = np.ones_like(dem)  # dummy
    elif params[cf.APS_METHOD] == 2:
        incidence_map = get_incidence_map()
    else:
        raise PyAPSException('PyAPS method must be 1 or 2')

    list_of_dates_for_grb_download = []

    parallel = params[cf.PARALLEL]
    data_paths = [i.data_path for i in ifgs]

    if parallel:
        aps_delay = Parallel(n_jobs=params[cf.PROCESSES], verbose=50)(
            delayed(parallel_aps)(d, dem, dem_header,
                               incidence_angle,
                               incidence_map, list_of_dates_for_grb_download,
                               mlooked_dem, params)
            for d in data_paths)
    else:
        aps_delay = []
        for d in data_paths:  # demo for only one ifg
            aps_delay.append(parallel_aps(d, dem, dem_header, incidence_angle,
                                          incidence_map,
                                          list_of_dates_for_grb_download,
                                          mlooked_dem, params))

    for i, ifg in enumerate(ifgs):
        ifg.phase_data -= aps_delay[i]  # remove delay
        # add to ifg.meta_data
        ifg.meta_data[ifc.PYRATE_WEATHER_ERROR] = APS_STATUS
github scot-dev / scot / scot / parallel.py View on Github external
try:
        from joblib import Parallel, delayed
    except ImportError:
        try:
            from sklearn.externals.joblib import Parallel, delayed
        except ImportError:
            n_jobs = None

    if n_jobs is None:
        if verbose >= 10:
            print('running ', func, ' serially')
        par = lambda x: list(x)
    else:
        if verbose >= 10:
            print('running ', func, ' in parallel')
        func = delayed(func)
        par = Parallel(n_jobs=n_jobs, verbose=verbose)

    return par, func
github jmcarpenter2 / parfit / parallelizeFit.py View on Github external
:param X: The X data ou wish to use for prediction
    :param y: The ground truth y data you wish to compare the predictions to
    :param metric: The metric yo wish to use to score the predictions using
        Defaults to roc_auc_score
    :param predictType: Choice between 'predict_proba' and 'predict' for scoring routine
        Defaults to 'predict_proba' when possible
    :param n_jobs: Number of cores to use in parallelization (defaults to -1: all cores)
    :param verbose: The level of verbosity of reporting updates on parallel process
        Default is 10 (send an update at the completion of each job)
    :return: Returns a list of scores in the same order as the list of models

    Example usage:
        from sklearn.metrics import recall_score

    """
    return Parallel(n_jobs=n_jobs, verbose=verbose)(delayed(scoreOne)(m,
                                                                      X,
                                                                      y,
                                                                      metric,
                                                                      predictType) for m in models)