Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
import seaborn as sns;
sns.set(color_codes=True)
sns.set(font_scale=1.5)
freq_matrix = np.array([freq_table[(r1, r2)] for (r1, r2) in freq_table])
row_labels = [r1 + " - " + r2 for (r1, r2) in freq_table]
pdframe = pd.DataFrame(freq_matrix, index=row_labels, columns=col_labels)
# Scale down figsize if too large
figsize = [pdframe.shape[1], pdframe.shape[0]]
if figsize[1] > 320:
figsize[0] *= 320 / figsize[1]
figsize[1] *= 320 / figsize[1]
# Create clustermap
fingerprints = sns.clustermap(pdframe,
figsize=figsize,
annot=False,
col_cluster=cluster_columns,
linewidths=0.5,
linecolor='black',
cmap='Greens')
# Remove color bar
# fingerprints.cax.set_visible(False)
import matplotlib.pyplot as plt
plt.setp(fingerprints.ax_heatmap.yaxis.get_majorticklabels(), rotation=0)
plt.setp(fingerprints.ax_heatmap.xaxis.get_majorticklabels(), rotation=90)
fingerprints.savefig(out_file)
filter_seqs = np.transpose(filter_seqs)
if drop_dead:
filter_stds = filter_seqs.std(axis=1)
filter_seqs = filter_seqs[filter_stds > 0]
# downsample sequences
seqs_i = np.random.randint(0, filter_seqs.shape[1], 500)
hmin = np.percentile(filter_seqs[:,seqs_i], 0.1)
hmax = np.percentile(filter_seqs[:,seqs_i], 99.9)
sns.set(font_scale=0.3)
plt.figure()
sns.clustermap(filter_seqs[:,seqs_i], row_cluster=True, col_cluster=True, linewidths=0, xticklabels=False, vmin=hmin, vmax=hmax)
plt.savefig(out_pdf)
#out_png = out_pdf[:-2] + 'ng'
#plt.savefig(out_png, dpi=300)
plt.close()
def plot_ANIn_cov_heatmap(Ndb):
gs = []
for Mcluster in Ndb['MASH_cluster'].unique():
db = Ndb[Ndb['MASH_cluster'] == Mcluster].copy()
if len(db['reference'].unique()) == 1:
continue
d = db.pivot("reference","querry","alignment_coverage")
g = sns.clustermap(d,method=METHOD)
g.fig.suptitle("MASH cluster {0} - Alignment Coverage".format(Mcluster))
plt.setp(g.ax_heatmap.yaxis.get_majorticklabels(), rotation=0)
gs.append(g)
return gs
# LOLA
# take top n per PC
import string
lola_enrichments["set_id"] = lola_enrichments[
["collection", "description", "cellType", "tissue", "antibody", "treatment"]].astype(str).apply(string.join, axis=1)
top = lola_enrichments.set_index('set_id').groupby("PC")['pValueLog'].nlargest(50)
top_ids = top.index.get_level_values('set_id').unique()
pivot = pd.pivot_table(
lola_enrichments,
index="set_id", columns="PC", values="pValueLog").fillna(0)
pivot.index = pivot.index.str.replace(" nan", "").str.replace("blueprint blueprint", "blueprint").str.replace("None", "")
top_ids = top_ids.str.replace(" nan", "").str.replace("blueprint blueprint", "blueprint").str.replace("None", "")
g = sns.clustermap(
pivot.ix[top_ids],
cbar_kws={"label": "Enrichment: -log10(p-value)"}, cmap="Spectral_r",
col_cluster=True)
for tick in g.ax_heatmap.get_xticklabels():
tick.set_rotation(90)
for tick in g.ax_heatmap.get_yticklabels():
tick.set_rotation(0)
g.fig.savefig(os.path.join(self.results_dir, "PCA.PC_pvalues.lola_enrichments.svg"), bbox_inches="tight", dpi=300)
g = sns.clustermap(
pivot.ix[top_ids],
cbar_kws={"label": "Enrichment: p-value z-score"},
col_cluster=True, z_score=0)
for tick in g.ax_heatmap.get_xticklabels():
tick.set_rotation(90)
for tick in g.ax_heatmap.get_yticklabels():
# exclude saved bias files
if (m == 1 or n == 1):
raise Exception('Not matrix, but vector, so skipped')
print('matrix sample', arr[0:2, 0:2])
print('matrix shape:', arr.shape)
# exclude large matrix
m_max = 1000
n_max = 1000
if (m > m_max or n > n_max):
print('matrix too large, down-sample to 1000 max each dim')
arr = random_subset_arr(arr, m_max, n_max)
# seaborn clustering (the rows are rows, columns are columns in clustmap)
heatmap = sns.clustermap(arr, method='average', cmap="summer", robust=True)
heatmap.savefig(in_name+'.'+tag+'.png', bbox_inches='tight')
ratio = m.loc[sex_chroms[1]] - m.loc[sex_chroms[0]]
ratio.name = "{}_to_{}_ratio".format(sex_chroms[1], sex_chroms[0])
ratio.to_csv(
os.path.join(output_dir, self.name + "." + output_prefix + ".csv"), header=True
)
if plot:
ratio.sort_values(inplace=True)
m = m.reindex(ratio.index, axis=1)
# Clustermap
if isinstance(ratio.index, pd.MultiIndex):
cols = m.columns.get_level_values("sample_name")
else:
cols = m.columns
grid = sns.clustermap(
m.T,
z_score=1,
center=0,
cmap="RdBu_r",
figsize=(m.shape[0] * 0.3, m.shape[1] * 0.3),
row_cluster=False,
col_cluster=False,
cbar_kws={"label": "Deviation from mean\nchromosome accessibility"},
yticklabels=cols,
)
grid.ax_heatmap.set_xlabel("Chromosomes")
grid.ax_heatmap.set_ylabel("Samples")
savefig(
grid, os.path.join(output_dir, self.name + "." + output_prefix + ".clustermap.svg")
)
def cluster_map(data, names):
import seaborn as sns
import pylab as plt
data = data.ix[names]
X = np.log(data).fillna(0)
cg = sns.clustermap(X,cmap='RdYlBu',figsize=(8,9),lw=1,linecolor='gray')
mt = plt.setp(cg.ax_heatmap.yaxis.get_majorticklabels(), rotation=0)
plt.setp(cg.ax_heatmap.xaxis.get_majorticklabels(), rotation=90)
cg.fig.subplots_adjust(right=.75)
return cg
def save_bottle_neck_representation():
print("> save bottle-neck_representation")
# todo: change variable name for each model
code_bottle_neck_input = sess.run(e_a1, feed_dict={X: df.values, pIn_holder: 1, pHidden_holder: 1})
np.save('pre_train/code_neck_valid.npy', code_bottle_neck_input)
# todo: hclust, but seaborn not on server yet
clustermap = sns.clustermap(code_bottle_neck_input)
clustermap.savefig('./plots/bottle_neck.hclust.png')
if out_dir is not None:
dist_df.to_csv('%s/%s_mcs_dist_table.csv' % (out_dir, file_prefix), index=False)
for k in range(10):
mol_i = base_mols[dist_df.i.values[k]]
mol_j = base_mols[dist_df.j.values[k]]
img_file_i = '%s/%d_%s.png' % (out_dir, k, compound_ids[dist_df.i.values[k]])
img_file_j = '%s/%d_%s.png' % (out_dir, k, compound_ids[dist_df.j.values[k]])
Draw.MolToFile(mol_i, img_file_i, size=(500,500), fitImage=False)
Draw.MolToFile(mol_j, img_file_j, size=(500,500), fitImage=False)
mcs_linkage = linkage(mcs_dist, method='complete')
mcs_df = pd.DataFrame(mcs_dist, columns=compound_ids, index=compound_ids)
if out_dir is not None:
pdf_path = '%s/%s_mcs_clustermap.pdf' % (out_dir, file_prefix)
pdf = PdfPages(pdf_path)
g = sns.clustermap(mcs_df, row_linkage=mcs_linkage, col_linkage=mcs_linkage, figsize=(12,12), cmap='plasma')
if out_dir is not None:
pdf.savefig(g.fig)
pdf.close()
# Draw a UMAP projection based on MCS distance
mapper = umap.UMAP(n_neighbors=20, min_dist=0.1, n_components=2, metric='precomputed', random_state=17)
reps = mapper.fit_transform(mcs_dist)
rep_df = pd.DataFrame.from_records(reps, columns=['x', 'y'])
rep_df['compound_id'] = compound_ids
if out_dir is not None:
pdf_path = '%s/%s_mcs_umap_proj.pdf' % (out_dir, file_prefix)
pdf = PdfPages(pdf_path)
fig, ax = plt.subplots(figsize=(12,12))
if responses is None:
sns.scatterplot(x='x', y='y', data=rep_df, ax=ax)
else:
sanitize_anndata(adata)
if use_raw is None and adata.raw is not None:
use_raw = True
X = adata.raw.X if use_raw else adata.X
if issparse(X):
X = X.toarray()
df = pd.DataFrame(X, index=adata.obs_names, columns=adata.var_names)
if obs_keys is not None:
row_colors = adata.obs[obs_keys]
_utils.add_colors_for_categorical_sample_annotation(adata, obs_keys)
# do this more efficiently... just a quick solution
lut = dict(
zip(row_colors.cat.categories, adata.uns[obs_keys + '_colors'])
)
row_colors = adata.obs[obs_keys].map(lut)
g = sns.clustermap(df, row_colors=row_colors.values, **kwds)
else:
g = sns.clustermap(df, **kwds)
show = settings.autoshow if show is None else show
_utils.savefig_or_show('clustermap', show=show, save=save)
if show:
pl.show()
else:
return g