How to use the ncls.NCLS function in ncls

To help you get started, we’ve selected a few ncls examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github lh3 / cgranges / test / bedcov-ncls.py View on Github external
bed, i = {}, 0
	start = timer()
	with open(argv[1]) as fp:
		for line in fp:
			t = line[:-1].split("\t")
			if not t[0] in bed:
				bed[t[0]] = [[], [], [], None]
			bed[t[0]][0].append(t[1])
			bed[t[0]][1].append(t[2])
			bed[t[0]][2].append(i)
			i += 1
	sys.stderr.write("Read in {} sec\n".format(timer() - start))
	start = timer()
	for ctg in bed:
		bed[ctg][3] = NCLS(np.array(bed[ctg][0], dtype=np.long), np.array(bed[ctg][1], dtype=np.long), np.array(bed[ctg][2], dtype=np.long))
	sys.stderr.write("Index in {} sec\n".format(timer() - start))
	start = timer()
	with open(argv[2]) as fp:
		for line in fp:
			t = line[:-1].split("\t")
			if not t[0] in bed:
				print("{}\t{}\t{}\t0".format(t[0], t[1], t[2]))
			else:
				cnt = 0
				it = bed[t[0]][3].find_overlap(long(t[1]), long(t[2]))
				for r in it: cnt += 1
				print("{}\t{}\t{}\t{}".format(t[0], t[1], t[2], cnt))
	sys.stderr.write("Query in {} sec\n".format(timer() - start))
github biocore-ntnu / ncls / examples / test_all_overlaps_both.py View on Github external
from ncls import NCLS

import pickle
import pandas as pd
import numpy as np


starts = np.array(list(reversed([3, 5, 8])), dtype=np.long)
ends = np.array(list(reversed([6, 7, 9])), dtype=np.long)
indexes = np.array(list(reversed([0, 1, 2])), dtype=np.long)

# starts = np.array([3, 5, 8], dtype=np.long)
# ends = np.array([6, 7, 9], dtype=np.long)
# indexes = np.array([0, 1, 2], dtype=np.long)

ncls = NCLS(starts, ends, indexes)

starts2 = np.array([1, 6])
ends2 = np.array([10, 7])
indexes2 = np.array([0, 1])

print(ncls.all_overlaps_both(starts2, ends2, indexes2))
github biocore-ntnu / ncls / examples / test_pickle.py View on Github external
from ncls import NCLS

import pickle
import pandas as pd
import numpy as np

starts = np.random.randint(0, int(1e8), int(1e7))
ends = starts + 100
ids = starts

ncls = NCLS(starts, ends, ids)

for i in ncls.find_overlap(0, 2):
    print(i)

pickle.dump(ncls, open("test.pckl", "wb"))

import pickle

ncls2 = pickle.load(open("test.pckl", "rb"))

for i in ncls2.find_overlap(0, 2):
    print(i)
github biocore-ntnu / ncls / tests / test_ncls.py View on Github external
def test_ncls():
    # ids = starts

    print(starts, ends, ids)

    ncls = NCLS(starts, ends, ids)
    print(ncls)
    print(ncls.intervals())

    assert list(ncls.find_overlap(0, 2)) == []
    print("aaa", list(ncls.find_overlap(9_223_372_036_854_775_805, 9_223_372_036_854_775_806)))
    assert list(ncls.find_overlap(0, 9_223_372_036_854_775_806)) == [(5, 6, 2147483647), (9223372036854775805, 9223372036854775807, 3)]

    r, l = ncls.all_overlaps_both(starts, ends, ids)
    assert list(r) == [2147483647, 3]
    assert list(l) == [2147483647, 3]
github biocore-ntnu / pyranges / pyranges / methods / intersection.py View on Github external
def _overlap(scdf, ocdf, **kwargs):

    invert = kwargs["invert"]
    return_indexes = kwargs.get("return_indexes", False)

    if scdf.empty or ocdf.empty:
        return None

    how = kwargs["how"]

    assert how in "containment first".split() + [False, None]
    starts = scdf.Start.values
    ends = scdf.End.values
    indexes = scdf.index.values

    it = NCLS(ocdf.Start.values, ocdf.End.values, ocdf.index.values)

    if not how:
        _indexes = it.all_overlaps_self(starts, ends, indexes)
    elif how == "containment":
        _indexes, _ = it.all_containments_both(starts, ends, indexes)
    else:
        _indexes = it.has_overlaps(starts, ends, indexes)

    if invert:
        _indexes = scdf.index.difference(_indexes)

    if return_indexes:
        return _indexes

    return scdf.reindex(_indexes)
github biocore-ntnu / pyranges / pyranges / methods / coverage.py View on Github external
def _number_overlapping(scdf, ocdf, **kwargs):

    keep_nonoverlapping = kwargs.get("keep_nonoverlapping", True)

    if scdf.empty:
        return None
    if ocdf.empty:
        if keep_nonoverlapping:
            df = scdf.copy()
            df.insert(df.shape[1], "NumberOverlaps", 0)
            return df
        else:
            return None

    oncls = NCLS(ocdf.Start.values, ocdf.End.values, ocdf.index.values)

    starts = scdf.Start.values
    ends = scdf.End.values
    indexes = scdf.index.values

    _self_indexes, _other_indexes = oncls.all_overlaps_both(
        starts, ends, indexes)

    s = pd.Series(_self_indexes)
    counts_per_read = s.value_counts()[s.unique()].reset_index()
    counts_per_read.columns = ["Index", "Count"]

    df = scdf.copy()

    if keep_nonoverlapping:
        _missing_indexes = np.setdiff1d(scdf.index, _self_indexes)
github biocore-ntnu / pyranges / pyranges / subset.py View on Github external
def create_ncls(df):

    return NCLS(df.Start.values, df.End.values, df.index.values)
github biocore-ntnu / pyranges / pyranges / methods / subtraction.py View on Github external
def _subtraction(scdf, ocdf, **kwargs):

    if ocdf.empty or scdf.empty:
        return scdf

    strandedness = kwargs["strandedness"]
    strand = True if strandedness else False

    chromosome = scdf.Chromosome.head(1).iloc[0]
    kwargs["chromosome"] = chromosome

    if "Strand" in ocdf and strand:
        strand = scdf.Strand.head(1).iloc[0]
        kwargs["strand"] = strand

    o = NCLS(ocdf.Start.values, ocdf.End.values, ocdf.index.values)

    idx_self, new_starts, new_ends = o.set_difference_helper(
        scdf.Start.values, scdf.End.values, scdf.index.values)

    missing_idx = pd.Index(scdf.index).difference(idx_self)

    idx_to_drop = new_starts != -1

    new_starts = new_starts[idx_to_drop]
    new_ends = new_ends[idx_to_drop]

    idx_self = idx_self[idx_to_drop]
    new_starts = pd.Series(new_starts, index=idx_self)
    new_ends = pd.Series(new_ends, index=idx_self)

    scdf = scdf.reindex(missing_idx.union(idx_self)).sort_index()

ncls

A fast interval tree-like implementation in C, wrapped for the Python ecosystem.

BSD-3-Clause
Latest version published 2 years ago

Package Health Score

42 / 100
Full package analysis