How to use the pynndescent.NNDescent function in pynndescent

To help you get started, we’ve selected a few pynndescent examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github pavlin-policar / openTSNE / tests / test_nearest_neighbors.py View on Github external
    @patch("pynndescent.NNDescent", wraps=pynndescent.NNDescent)
    def test_random_state_being_passed_through(self, nndescent):
        random_state = 1
        knn_index = nearest_neighbors.NNDescent("euclidean", random_state=random_state)
        knn_index.build(self.x1, k=30)

        nndescent.assert_called_once()
        check_mock_called_with_kwargs(nndescent, {"random_state": random_state})
github sdomanskyi / DigitalCellSorter / DigitalCellSorter / DigitalCellSorter.py View on Github external
k_neighbors = 40

            try:
                metric = clusteringFunction[metric]
            except:
                metric = 'euclidean'
            
            try:
                clusterExpression = clusteringFunction[clusterExpression]
            except:
                clusterExpression = False

            data = df_expr.values.T if clusterExpression else X_pca.T

            print('Searching for %s nearest neighbors'%(k_neighbors))
            knn = pynndescent.NNDescent(data, metric=metric, n_neighbors=k_neighbors).query(data, k=k_neighbors)

            print('k(=%s) nearest neighbors found. Constructing a NetworkX graph'%(k_neighbors))
            A = np.zeros((len(knn[0]),len(knn[0])))
            for i in range(len(knn[0])):
                A[i, knn[0][i]] = knn[1][i]

            G = nx.from_numpy_array(A)

            print('Clustering the graph')
            cellClusterIndex = pd.Series(community.best_partition(G)).sort_index().values
        else:
            cellClusterIndex = clusteringFunction(n_clusters=self.nClusters).fit(X_pca.T).labels_

        return cellClusterIndex
github sdomanskyi / DigitalCellSorter / DigitalCellSorter / core.py View on Github external
try:
                metric = self.clusteringFunction[metric]
            except Exception as exception:
                print(exception)
                metric = 'euclidean'
            
            try:
                clusterExpression = self.clusteringFunction[clusterExpression]
            except Exception as exception:
                print(exception)
                clusterExpression = False

            data = self._df_expr.values.T if clusterExpression else df_xpca.values.T

            print('Searching for %s nearest neighbors' % (k_neighbors), flush=True)
            knn = pynndescent.NNDescent(data, metric=metric, n_neighbors=k_neighbors).query(data, k=k_neighbors)

            print('k(=%s) nearest neighbors found. Constructing a NetworkX graph' % (k_neighbors), flush=True)
            A = np.zeros((len(knn[0]),len(knn[0])))
            for i in range(len(knn[0])):
                A[i, knn[0][i]] = knn[1][i]

            G = nx.from_numpy_array(A)

            print('Clustering the graph', flush=True)
            cellClusterIndex = pd.Series(community.best_partition(G)).sort_index().values
        else:
            data = df_xpca.values

            cellClusterIndex = self.clusteringFunction(n_clusters=self.nClusters).fit(data.T).labels_.astype(float).astype(str)
            print(np.unique(cellClusterIndex, return_counts=True))
github pavlin-policar / openTSNE / openTSNE / nearest_neighbors.py View on Github external
def build(self, data, k):
        # These values were taken from UMAP, which we assume to be sensible defaults
        n_trees = 5 + int(round((data.shape[0]) ** 0.5 / 20))
        n_iters = max(5, int(round(np.log2(data.shape[0]))))

        # Numba takes a while to load up, so there's little point in loading it
        # unless we're actually going to use it
        import pynndescent

        # UMAP uses the "alternative" algorithm, but that sometimes causes
        # memory corruption, so use the standard one, which seems to work fine
        self.index = pynndescent.NNDescent(
            data,
            n_neighbors=15,
            metric=self.metric,
            metric_kwds=self.metric_params,
            random_state=self.random_state,
            n_trees=n_trees,
            n_iters=n_iters,
            algorithm="standard",
            max_candidates=60,
            n_jobs=self.n_jobs,
        )

        indices, distances = self.index.query(data, k=k + 1, queue_size=1)
        return indices[:, 1:], distances[:, 1:]

pynndescent

Nearest Neighbor Descent

BSD-2-Clause
Latest version published 5 months ago

Package Health Score

85 / 100
Full package analysis

Similar packages