Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def make_picard_summary_plots(inputfiles, ncol=4):
d = {}
TOOLS = "pan,box_zoom,wheel_zoom,box_select,lasso_select,reset,save,hover"
for (metrics_file, hist_file) in zip(inputfiles[0::2], inputfiles[1::2]):
df_met = _read_metrics(metrics_file)
df_hist = _read_metrics(hist_file)
p1 = df_met.plot_metrics(tools=TOOLS)
key = os.path.splitext(metrics_file)[0]
if df_met.label not in d:
d[df_met.label] = {}
d[df_met.label][key] = {}
d[df_met.label][key]['uri'] = [data_uri(metrics_file)]
d[df_met.label][key]['file'] = [metrics_file]
if df_hist is not None:
p2 = df_hist.plot_hist(tools=TOOLS)
d[df_met.label][key]['uri'].append(data_uri(hist_file))
d[df_met.label][key]['file'].append(hist_file)
else:
p2 = []
plist = p1 + p2
gp = gridplot([plist[i:i+ncol] for i in range(0, len(plist), ncol)])
d[df_met.label][key]['fig'] = gp
return d
def make_cutadapt_summary_plot(inputfile):
df_summary = pd.read_csv(inputfile)
df_summary["sample"] = df_summary["sample"].astype("str")
TOOLS = "pan,wheel_zoom,box_zoom,box_select,reset,save"
df_pivot = df_summary[["sample", "read1_pct", "read2_pct"]].pivot_table(index="sample").stack().reset_index([0,1])
df_pivot.columns = ["Sample", "Level", "Percent"]
fig = Scatter(df_pivot, x="Sample", y="Percent",
color="Level", legend="top_right",
title="Cutadapt metrics")
return {'fig': fig, 'uri': data_uri(inputfile), 'file': inputfile}
def make_qualimap_plots(qmglobals=None, coverage_per_contig=None,
**kwargs):
"""Make qualimap summary plots"""
retval = {'fig': {'coverage_per_contig': None, 'globals': None},
'file': {'coverage_per_contig': coverage_per_contig,
'globals': qmglobals},
'uri': {'coverage_per_contig': data_uri(coverage_per_contig),
'globals': data_uri(qmglobals)}}
# Globals
if qmglobals is not None:
df_all = pd.read_csv(qmglobals)
df_all['number of unique reads'] = df_all['number of mapped reads']\
- df_all['number of duplicated reads']
plot_config = {'x': 'Sample',
'y': ['number of reads',
'number of mapped reads',
'number of duplicated reads',
'number of unique reads'],
'df': df_all, 'groups': ['Sample'],
'y_range': [0, max(df_all['number of reads'])],
'xaxis': {'axis_label': "sample",
'major_label_orientation': np.pi / 3,
'axis_label_text_font_size': '10pt'},
tooltips=[{'type': HoverTool, 'tips': [
('Sample', '@samples'),
('ExonMap', '@ExonMap'), ]}],
title="Tags mapping to exons", **plot_config)
# Fraction reads mapping to the 10% right-most end
p2 = make_scatterplot(y='three_prime_map', x='samples',
source=source,
tooltips=[{
'type': HoverTool, 'tips': [
('Sample', '@samples'),
('ThreePrimeMap', '@three_prime_map'), ]}],
title="Reads mapping to 3' end", **plot_config)
p2.x_range = p1.x_range
return {'fig': gridplot([[p1, p2]]),
'uri': [data_uri(rd_file), data_uri(gc_file)],
'file': [rd_file, gc_file]}
def make_qualimap_plots(qmglobals=None, coverage_per_contig=None,
**kwargs):
"""Make qualimap summary plots"""
retval = {'fig': {'coverage_per_contig': None, 'globals': None},
'file': {'coverage_per_contig': coverage_per_contig,
'globals': qmglobals},
'uri': {'coverage_per_contig': data_uri(coverage_per_contig),
'globals': data_uri(qmglobals)}}
# Globals
if qmglobals is not None:
df_all = pd.read_csv(qmglobals)
df_all['number of unique reads'] = df_all['number of mapped reads']\
- df_all['number of duplicated reads']
plot_config = {'x': 'Sample',
'y': ['number of reads',
'number of mapped reads',
'number of duplicated reads',
'number of unique reads'],
'df': df_all, 'groups': ['Sample'],
'y_range': [0, max(df_all['number of reads'])],
'xaxis': {'axis_label': "sample",
'major_label_orientation': np.pi / 3,
'axis_label_text_font_size': '10pt'},
'yaxis': {'axis_label': "count",
# p6.yaxis.axis_label = "Count"
df_qc = None
if do_qc:
# QC summary table
d = {'samples': samples,
'read_filter': df['Number_of_input_reads'] < min_reads,
'map_filter': df['Uniquely_mapped_reads_PCT'] < min_map,
'mismatch_filter': df['mismatch_sum'] > 1.0, }
d['filter'] = d['read_filter'] | d['map_filter'] | d['mismatch_filter']
df_qc = pd.DataFrame(data=d, index=df.samples)
return {'fig': VBox(children=[gridplot([[p1, p2, p3]]),
HBox(children=[gridplot([[p4, p5]])])]),
'table': table, 'qctable': df_qc,
'uri': data_uri(inputfile),
'file': inputfile}
Args:
inutfile (str): input file name
qc (dict): qc parameter values
ncol (int): number of columns in returned gridplot
share_x_range (bool): share x range between plots
kwargs: keyword argument passed to plots
Returns:
gp (py:class:`~bokeh.models.plots.GridPlot`): gridplot object
"""
retval = {'fig': None,
'file': inputfile,
'table': None,
'uri': data_uri(inputfile)}
if inputfile is None:
return retval
df = pd.read_csv(inputfile, index_col=0)
columns = [
TableColumn(field="samples", title="Sample"),
TableColumn(field="Number_of_input_reads",
title="Number of input reads"),
TableColumn(field="Uniquely_mapped_reads_PCT",
title="Uniquely mapped reads (%)"),
TableColumn(field="Mismatch_rate_per_base__PCT",
title="Mismatch rate per base (%)"),
TableColumn(field="Insertion_rate_per_base",
title="Insertion rate per base (%)"),
TableColumn(field="Deletion_rate_per_base",
title="Deletion rate per base (%)"),
def make_picard_summary_plots(inputfiles, ncol=4):
d = {}
TOOLS = "pan,box_zoom,wheel_zoom,box_select,lasso_select,reset,save,hover"
for (metrics_file, hist_file) in zip(inputfiles[0::2], inputfiles[1::2]):
df_met = _read_metrics(metrics_file)
df_hist = _read_metrics(hist_file)
p1 = df_met.plot_metrics(tools=TOOLS)
key = os.path.splitext(metrics_file)[0]
if df_met.label not in d:
d[df_met.label] = {}
d[df_met.label][key] = {}
d[df_met.label][key]['uri'] = [data_uri(metrics_file)]
d[df_met.label][key]['file'] = [metrics_file]
if df_hist is not None:
p2 = df_hist.plot_hist(tools=TOOLS)
d[df_met.label][key]['uri'].append(data_uri(hist_file))
d[df_met.label][key]['file'].append(hist_file)
else:
p2 = []
plist = p1 + p2
gp = gridplot([plist[i:i+ncol] for i in range(0, len(plist), ncol)])
d[df_met.label][key]['fig'] = gp
return d