Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def test_einsum():
from scvelo.tools.utils import prod_sum_obs, prod_sum_var, norm
Ms, Mu = np.random.rand(5, 4), np.random.rand(5, 4)
assert np.allclose(prod_sum_obs(Ms, Mu), np.sum(Ms * Mu, 0))
assert np.allclose(prod_sum_var(Ms, Mu), np.sum(Ms * Mu, 1))
assert np.allclose(norm(Ms), np.linalg.norm(Ms, axis=1))
if diff_kinetics in adata.var.keys():
if diff_kinetics in adata.uns['recover_dynamics']:
groupby = adata.uns['recover_dynamics']['fit_diff_kinetics']
else:
groupby = 'clusters'
clusters = adata.obs[groupby]
for i, v in enumerate(np.array(adata.var[diff_kinetics].values, dtype=str)):
if len(v) > 0 and v != 'nan':
idx = 1 - clusters.isin([a.strip() for a in v.split(',')])
adata.layers[vkey][:, i] *= idx
if mode == 'dynamical':
adata.layers[f'{vkey}_u'][:, i] *= idx
adata.uns[f'{vkey}_params'] = {'mode': mode, 'fit_offset': fit_offset, 'perc': perc}
logg.info(' finished', time=True, end=' ' if settings.verbosity > 2 else '\n')
logg.hint('added \n'
f' \'{vkey}\', velocity vectors for each individual cell (adata.layers)')
return adata if copy else None
neighbors.distances, neighbors.connectivities = compute_connectivities_umap(
neighbors.knn_indices, knn_distances, X.shape[0], n_neighbors=n_neighbors
)
elif method == "hnsw":
X = adata.X if use_rep == "X" else adata.obsm[use_rep]
neighbors = FastNeighbors(n_neighbors=n_neighbors, num_threads=num_threads)
neighbors.fit(
X if n_pcs is None else X[:, :n_pcs],
metric=metric,
random_state=random_state,
**metric_kwds,
)
else:
logg.switch_verbosity("off", module="scanpy")
with warnings.catch_warnings(): # ignore numba warning (umap/issues/252)
warnings.simplefilter("ignore")
neighbors = Neighbors(adata)
neighbors.compute_neighbors(
n_neighbors=n_neighbors,
knn=knn,
n_pcs=n_pcs,
method=method,
use_rep=None if use_rep == "X_pca" else use_rep,
random_state=random_state,
metric=metric,
metric_kwds=metric_kwds,
write_knn_indices=True,
)
logg.switch_verbosity("on", module="scanpy")
if diff_kinetics in adata.uns['recover_dynamics']:
groupby = adata.uns['recover_dynamics']['fit_diff_kinetics']
else:
groupby = 'clusters'
clusters = adata.obs[groupby]
for i, v in enumerate(np.array(adata.var[diff_kinetics].values, dtype=str)):
if len(v) > 0 and v != 'nan':
idx = 1 - clusters.isin([a.strip() for a in v.split(',')])
adata.layers[vkey][:, i] *= idx
if mode == 'dynamical':
adata.layers[f'{vkey}_u'][:, i] *= idx
adata.uns[f'{vkey}_params'] = {'mode': mode, 'fit_offset': fit_offset, 'perc': perc}
logg.info(' finished', time=True, end=' ' if settings.verbosity > 2 else '\n')
logg.hint('added \n'
f' \'{vkey}\', velocity vectors for each individual cell (adata.layers)')
return adata if copy else None
-------
Returns or updates `adata`
"""
adata = data.copy() if copy else data
logg.info('recovering dynamics', r=True)
if isinstance(var_names, str) and var_names not in adata.var_names:
var_names = adata.var_names[adata.var[var_names] == True] if 'genes' in var_names and var_names in adata.var.keys() \
else adata.var_names if 'all' in var_names or 'genes' in var_names else var_names
var_names = make_unique_list(var_names, allow_array=True)
var_names = [name for name in var_names if name in adata.var_names]
if len(var_names) == 0:
raise ValueError('Variable name not found in var keys.')
if fit_connected_states:
fit_connected_states = get_connectivities(adata)
alpha, beta, gamma, t_, scaling = read_pars(adata)
idx = []
L, P, T = [], [], adata.layers['fit_t'] if 'fit_t' in adata.layers.keys() else np.zeros(adata.shape) * np.nan
progress = logg.ProgressReporter(len(var_names))
for i, gene in enumerate(var_names):
dm = DynamicsRecovery(adata, gene, use_raw=use_raw, load_pars=load_pars, fit_time=fit_time, fit_alpha=fit_alpha,
fit_switching=fit_switching, fit_scaling=fit_scaling, fit_steady_states=fit_steady_states,
fit_connected_states=fit_connected_states)
if max_iter > 1:
dm.fit(max_iter, learning_rate, assignment_mode=assignment_mode, min_loss=min_loss, method=method, **kwargs)
ix = np.where(adata.var_names == gene)[0][0]
idx.append(ix)
if isinstance(var_names, str) and var_names not in adata.var_names:
var_names = adata.var_names[adata.var[var_names] == True] if 'genes' in var_names and var_names in adata.var.keys() \
else adata.var_names if 'all' in var_names or 'genes' in var_names else var_names
var_names = make_unique_list(var_names, allow_array=True)
var_names = [name for name in var_names if name in adata.var_names]
if len(var_names) == 0:
raise ValueError('Variable name not found in var keys.')
if fit_connected_states:
fit_connected_states = get_connectivities(adata)
alpha, beta, gamma, t_, scaling = read_pars(adata)
idx = []
L, P, T = [], [], adata.layers['fit_t'] if 'fit_t' in adata.layers.keys() else np.zeros(adata.shape) * np.nan
progress = logg.ProgressReporter(len(var_names))
for i, gene in enumerate(var_names):
dm = DynamicsRecovery(adata, gene, use_raw=use_raw, load_pars=load_pars, fit_time=fit_time, fit_alpha=fit_alpha,
fit_switching=fit_switching, fit_scaling=fit_scaling, fit_steady_states=fit_steady_states,
fit_connected_states=fit_connected_states)
if max_iter > 1:
dm.fit(max_iter, learning_rate, assignment_mode=assignment_mode, min_loss=min_loss, method=method, **kwargs)
ix = np.where(adata.var_names == gene)[0][0]
idx.append(ix)
alpha[ix], beta[ix], gamma[ix], t_[ix], scaling[ix] = dm.pars[:, -1]
T[:, ix] = dm.t
L.append(dm.loss)
if plot_results and i < 4:
P.append(dm.pars)
groups = clusters.cat.categories
pars_names = ["diff_kinetics", "pval_kinetics"]
diff_kinetics, pval_kinetics = read_pars(adata, pars_names=pars_names)
pvals = None
if "fit_pvals_kinetics" in adata.varm.keys():
pvals = pd.DataFrame(adata.varm["fit_pvals_kinetics"]).to_numpy()
if pvals is None or pvals.shape[1] != len(groups):
pvals = np.zeros((adata.n_vars, len(groups))) * np.nan
if "fit_diff_kinetics" in adata.var.keys():
diff_kinetics = np.array(adata.var["fit_diff_kinetics"])
else:
diff_kinetics = np.empty(adata.n_vars, dtype="|U16")
idx = []
progress = logg.ProgressReporter(len(var_names))
for i, gene in enumerate(var_names):
dm = DynamicsRecovery(adata, gene, use_raw=use_raw, load_pars=True, max_iter=0)
if dm.recoverable:
dm.differential_kinetic_test(clusters, **kwargs)
ix = np.where(adata.var_names == gene)[0][0]
idx.append(ix)
diff_kinetics[ix], pval_kinetics[ix], = dm.diff_kinetics, dm.pval_kinetics
pvals[ix] = np.array(dm.pvals_kinetics)
progress.update()
else:
logg.warn(dm.gene, "not recoverable due to insufficient samples.")
dm = None
progress.finish()
pl.subplots_adjust(wspace=0.7, hspace=0.5)
for i, gene in enumerate(var_names[:4]):
if t_max is not False:
mi = dm.m[i]
P[i] *= np.array([1 / mi, 1 / mi, 1 / mi, mi, 1])[:, None]
ax = axes[i] if n_rows > 1 else axes
for j, pij in enumerate(P[i]):
ax[j].plot(pij)
ax[len(P[i])].plot(L[i])
if i == 0:
pars_names = ["alpha", "beta", "gamma", "t_", "scaling", "loss"]
for j, name in enumerate(pars_names):
ax[j].set_title(name, fontsize=fontsize)
if return_model:
logg.info("\noutputs model fit of gene:", dm.gene)
return dm if return_model else adata if copy else None
def remove_duplicate_cells(adata):
if "X_pca" not in adata.obsm.keys():
pca(adata)
idx_duplicates = get_duplicate_cells(adata)
if len(idx_duplicates) > 0:
mask = np.ones(adata.n_obs, bool)
mask[idx_duplicates] = 0
logg.info("Removed", len(idx_duplicates), "duplicate cells.")
adata._inplace_subset_obs(mask)
neighbors(adata)
layers = [layer for layer in {"spliced", "unspliced"} if layer in adata.layers]
if any([not_yet_normalized(adata.layers[layer]) for layer in layers]):
normalize_per_cell(adata)
if n_neighbors is not None and n_neighbors > get_n_neighs(adata):
if use_rep is None:
use_rep = "X_pca"
neighbors(
adata, n_neighbors=n_neighbors, use_rep=use_rep, n_pcs=n_pcs, method=method
)
verify_neighbors(adata)
if "spliced" not in adata.layers.keys() or "unspliced" not in adata.layers.keys():
logg.warn("Skipping moments, because un/spliced counts were not found.")
else:
logg.info(f"computing moments based on {mode}", r=True)
connectivities = get_connectivities(
adata, mode, n_neighbors=n_neighbors, recurse_neighbors=False
)
adata.layers["Ms"] = (
csr_matrix.dot(connectivities, csr_matrix(adata.layers["spliced"]))
.astype(np.float32)
.A
)
adata.layers["Mu"] = (
csr_matrix.dot(connectivities, csr_matrix(adata.layers["unspliced"]))
.astype(np.float32)
.A
)
# if renormalize: normalize_per_cell(adata, layers={'Ms', 'Mu'}, enforce=True)