How to use the scipy.sparse.issparse function in scipy

To help you get started, we’ve selected a few scipy examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github bsc-wdc / dislib / tests / test_array.py View on Github external
def _equal_arrays(x1, x2):
    if sp.issparse(x1):
        return np.allclose(x1.toarray(), x2.toarray())
    else:
        return np.allclose(x1, x2)
github fabsig / KTBoost / KTBoost / tree.py View on Github external
def fit(self, X, y, sample_weight=None, check_input=True,
            X_idx_sorted=None):

        random_state = check_random_state(self.random_state)
        if check_input:
            X = check_array(X, dtype=DTYPE, accept_sparse="csc")
            y = check_array(y, ensure_2d=False, dtype=None)
            if issparse(X):
                X.sort_indices()

                if X.indices.dtype != np.intc or X.indptr.dtype != np.intc:
                    raise ValueError("No support for np.int64 index based "
                                     "sparse matrices")

        # Determine output settings
        n_samples, self.n_features_ = X.shape
        is_classification = is_classifier(self)

        y = np.atleast_1d(y)
        expanded_class_weight = None

        if y.ndim == 1:
            # reshape is necessary to preserve the data contiguity against vs
            # [:, np.newaxis] that does not.
github scikit-learn / scikit-learn / sklearn / decomposition / _incremental_pca.py View on Github external
----------
        X : array-like, shape (n_samples, n_features)
            Training data, where n_samples is the number of samples and
            n_features is the number of features.
        check_input : bool
            Run check_array on X.

        y : Ignored

        Returns
        -------
        self : object
            Returns the instance itself.
        """
        if check_input:
            if sparse.issparse(X):
                raise TypeError(
                    "IncrementalPCA.partial_fit does not support "
                    "sparse input. Either convert data to dense "
                    "or use IncrementalPCA.fit to do so in batches.")
            X = check_array(X, copy=self.copy, dtype=[np.float64, np.float32])
        n_samples, n_features = X.shape
        if not hasattr(self, 'components_'):
            self.components_ = None

        if self.n_components is None:
            if self.components_ is None:
                self.n_components_ = min(n_samples, n_features)
            else:
                self.n_components_ = self.components_.shape[0]
        elif not 1 <= self.n_components <= n_features:
            raise ValueError("n_components=%r invalid for n_features=%d, need "
github RaRe-Technologies / gensim / gensim / sklearn_api / ldamodel.py View on Github external
This method can be used in two ways:
        * On an unfitted model in which case the model is initialized and trained on `X`.
        * On an already fitted model in which case the model is **updated** by `X`.

        Parameters
        ----------
        X : {iterable of iterable of (int, int), scipy.sparse matrix}
            A collection of documents in BOW format used for training the model.

        Returns
        -------
        :class:`~gensim.sklearn_api.ldamodel.LdaTransformer`
            The trained model.

        """
        if sparse.issparse(X):
            X = matutils.Sparse2Corpus(sparse=X, documents_columns=False)

        if self.gensim_model is None:
            self.gensim_model = models.LdaModel(
                num_topics=self.num_topics, id2word=self.id2word,
                chunksize=self.chunksize, passes=self.passes, update_every=self.update_every,
                alpha=self.alpha, eta=self.eta, decay=self.decay, offset=self.offset,
                eval_every=self.eval_every, iterations=self.iterations, gamma_threshold=self.gamma_threshold,
                minimum_probability=self.minimum_probability, random_state=self.random_state,
                dtype=self.dtype
            )

        self.gensim_model.update(corpus=X)
        return self
github RaRe-Technologies / gensim / old / ipyutils.py View on Github external
def toNLTK(_mat, prefix = 2):
    import mscs
    if scipy.sparse.issparse(_mat):
        mat = _mat.tocsc()
    else:
        mat = _mat
    result = []
    for i in xrange(mat.shape[1]):
        if scipy.sparse.issparse(mat):
            a0 = mat[:, i].toarray()
        else:
            a0 = mat[:, i]
        nnind = a0.nonzero()[0]
        nnvals = a0.take(nnind)
        features = dict(zip(nnind, nnvals))
        id = docids[i]
        okarts = [a for a in arts if a.id_int == id]
        if len(okarts) != 1:
            raise Exception('%i articles with id=%s' % repr(id))
github ksodipo / DataWiz / datawiz / label.py View on Github external
def _inverse_binarize_multiclass(y, classes):
    """Inverse label binarization transformation for multiclass.

    Multiclass uses the maximal score instead of a threshold.
    """
    classes = np.asarray(classes)

    if sp.issparse(y):
        # Find the argmax for each row in y where y is a CSR matrix

        y = y.tocsr()
        n_samples, n_outputs = y.shape
        outputs = np.arange(n_outputs)
        row_max = sparse_min_max(y, 1)[1]
        row_nnz = np.diff(y.indptr)

        y_data_repeated_max = np.repeat(row_max, row_nnz)
        # picks out all indices obtaining the maximum per row
        y_i_all_argmax = np.flatnonzero(y_data_repeated_max == y.data)

        # For corner case where last row has a max of 0
        if row_max[-1] == 0:
            y_i_all_argmax = np.append(y_i_all_argmax, [len(y.data)])
github sawcordwell / pymdptoolbox / src / mdptoolbox / mdp.py View on Github external
def _computeMatrixReward(self, reward, transition):
        if _sp.issparse(reward):
            # An approach like this might be more memory efficeint
            # reward.data = reward.data * transition[reward.nonzero()]
            # return reward.sum(1).A.reshape(self.S)
            # but doesn't work as it is.
            return reward.multiply(transition).sum(1).A.reshape(self.S)
        elif _sp.issparse(transition):
            return transition.multiply(reward).sum(1).A.reshape(self.S)
        else:
            return _np.multiply(transition, reward).sum(1).reshape(self.S)
github RaRe-Technologies / gensim / gensim / matutils.py View on Github external
def _convert_vec(vec1, vec2, num_features=None):
    if scipy.sparse.issparse(vec1):
        vec1 = vec1.toarray()
    if scipy.sparse.issparse(vec2):
        vec2 = vec2.toarray()  # converted both the vectors to dense in case they were in sparse matrix
    if isbow(vec1) and isbow(vec2):  # if they are in bag of words format we make it dense
        if num_features is not None:  # if not None, make as large as the documents drawing from
            dense1 = sparse2full(vec1, num_features)
            dense2 = sparse2full(vec2, num_features)
            return dense1, dense2
        else:
            max_len = max(len(vec1), len(vec2))
            dense1 = sparse2full(vec1, max_len)
            dense2 = sparse2full(vec2, max_len)
            return dense1, dense2
    else:
        # this conversion is made because if it is not in bow format, it might be a list within a list after conversion
        # the scipy implementation of Kullback fails in such a case so we pick up only the nested list.
        if len(vec1) == 1:
            vec1 = vec1[0]
github Savvysherpa / slda / slda / topic_models.py View on Github external
def _create_lookups(self, X):
        """
        Create document and term lookups for all tokens.
        """
        docs, terms = np.nonzero(X)
        if issparse(X):
            x = np.array(X[docs, terms])[0]
        else:
            x = X[docs, terms]
        doc_lookup = np.ascontiguousarray(np.repeat(docs, x), dtype=np.intc)
        term_lookup = np.ascontiguousarray(np.repeat(terms, x), dtype=np.intc)
        return doc_lookup, term_lookup