Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
""" Make the HTSeq Count assignment rates plot """
cats = OrderedDict()
cats['assigned'] = { 'name': 'Assigned' }
cats['ambiguous'] = { 'name': 'Ambiguous' }
cats['alignment_not_unique'] = { 'name': 'Alignment Not Unique' }
cats['no_feature'] = { 'name': 'No Feature' }
cats['too_low_aQual'] = { 'name': 'Too Low aQual' }
cats['not_aligned'] = { 'name': 'Not Aligned' }
config = {
'id': 'htseq_assignment_plot',
'title': 'HTSeq: Count Assignments',
'ylab': '# Reads',
'hide_zero_cats': False,
'cpswitch_counts_label': 'Number of Reads'
}
return bargraph.plot(self.htseq_data, cats, config)
for sid, dd in self.mdata['align_mapq'].items():
pd[sid] = {'OAligned':0, 'SAligned':0, 'UAligned':1}
for mapq, cnt in dd.items():
if mapq == 'unmapped':
pd[sid]['UAligned'] += int(cnt)
elif int(mapq) >= 40:
pd[sid]['OAligned'] += int(cnt)
else:
pd[sid]['SAligned'] += int(cnt)
self.add_section(
name = 'Mapping Summary',
anchor = 'biscuit-mapping',
description = 'This shows the fraction of optimally aligned reads, which is defined by mapQ >= 40.',
helptext = 'A good library should have high fraction of reads optimally aligned. Suboptimally aligned reads include both nonunique alignments and imperfect alignments.',
plot = bargraph.plot(pd, OrderedDict([
('OAligned', {'name':'Optimally Aligned Reads'}),
('SAligned', {'name':'Suboptimally Aligned Reads'}),
('UAligned', {'name':'Unaligned Reads'})
]), {'id':'biscuit_mapping_summary',
'title':'BISCUIT: Mapping Summary',
'ylab':'Number of Reads',
'cpswitch_counts_label': '# Reads'
})
)
# Mapping quality together in one plot
total = {}
for sid, dd in self.mdata['align_mapq'].items():
total[sid] = sum([int(cnt) for _, cnt in dd.items() if _ != "unmapped"])
pd_mapping = {}
# Config for the plot
pconfig = {
'id': 'picard_rrbs_converted_bases_plot',
'title': 'Picard: Converted Bases',
'ylab': '# CpG Bases',
'cpswitch_counts_label': 'Number of Bases',
'data_labels': [
{'name': 'CpG', 'ylab': '# CpG Bases'},
{'name': 'Non-CpG', 'ylab': '# Non-CpG Bases'}
]
}
self.add_section (
name = 'RRBS Converted Bases',
anchor = 'picard-rrbssummary-convertedbases',
plot = bargraph.plot([pdata_cpg, pdata_noncpg], [keys, keys], pconfig)
)
# Make the bar plot of processed reads
pdata = dict()
for s_name in self.picard_rrbs_metrics.keys():
pdata[s_name] = dict()
pdata[s_name]['with_no_cpg'] = self.picard_rrbs_metrics[s_name]['READS_WITH_NO_CPG']
pdata[s_name]['ignored_short'] = self.picard_rrbs_metrics[s_name]['READS_IGNORED_SHORT']
pdata[s_name]['ignored_mismatches'] = self.picard_rrbs_metrics[s_name]['READS_IGNORED_MISMATCHES']
pdata[s_name]['not_ignored'] = (
self.picard_rrbs_metrics[s_name]['READS_ALIGNED'] -
pdata[s_name]['with_no_cpg'] -
pdata[s_name]['ignored_short'] -
pdata[s_name]['ignored_mismatches']
)
config = {
'id': 'seqyclean-discarded-reads-plot',
'title': 'SeqyClean: Discarded Reads',
'ylab': 'Number of Reads'
}
keys = [
'SEDiscByContam', # single end
'SEDiscByLength',
'PE1DiscByContam', # paired end
'PE1DiscByLength',
'PE2DiscByContam',
'PE2DiscByLength',
'DiscByContam', # 454 data
'DiscByLength'
]
return bargraph.plot(self.seqyclean_data, self._clean_keys(keys), config)
data = {}
for name in self.sequana_data.keys():
data[name] = {
'polyA':
self.sequana_data[name]["polyA"]
}
pconfig = {
"title": "polyA",
"logswitch": True,
}
self.add_section(
name = 'Number of polyA',
anchor = 'poylA',
description = 'polyA',
helptext="",
plot = bargraph.plot(data, None, pconfig))
data[name] = {'failed_qc_pct': self.sequana_data[name]["failed_qc_pct"]}
pconfig = {
"title": "Failed QC (%)",
"percentages": True,
"min": 0,
"max":100,
"logswitch": False,
}
total = sum([data[name]['failed_qc_pct'] for name in self.sequana_data.keys()])
if total == 0:
plot = None
description = "Failed QC (none found)."
else:
plot = bargraph.plot(data, None, pconfig)
description = "Failed QC (%)."
self.add_section(
name = 'Failed QC (%)',
anchor = 'failed_qc',
description = 'Failed QC',
helptext = "",
plot = plot)
pconfig = {
"title": "CCS reads",
"percentages": False,
"min": 0,
#"max":100,
"format": '{0:.2f}',
"logswitch": False,
}
self.add_section(
name = 'CCS reads',
anchor = 'ccs_reads',
description = 'The following barplots summarizes the number of CCS reads.',
helptext = "",
plot = bargraph.plot(data, None, pconfig))
"percentages": True,
"min": 0,
"max":100,
"logswitch": False,
}
keys = OrderedDict()
keys['fwd'] = {'color': '#437bb1', 'name': 'Mapped'}
keys['rev'] = {'color': '#b1084c', 'name': 'Unmapped'}
self.add_section(
name = 'Forward/Reverse',
anchor = 'fwd_rev',
description = 'Forward reverse',
helptext = "",
plot = bargraph.plot(data, keys, pconfig))
data[name] = {
'mean': self.sequana_data[name]["mean_length"],
}
pconfig = {
"title": "Mean CCS read length",
"percentages": False,
"min": 100,
"logswitch": True,
}
self.add_section(
name = 'Mean CCS read length',
anchor = 'mean_ccs_read_length',
description = 'Mean CCS length of the reads',
helptext = "",
plot = bargraph.plot(data, None, pconfig))