Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def featurecounts_stats_table(self):
""" Take the parsed stats from the featureCounts report and add them to the
basic stats table at the top of the report """
headers = OrderedDict()
headers['percent_assigned'] = {
'title': '% Assigned',
'description': '% Assigned reads',
'max': 100,
'min': 0,
'suffix': '%',
'scale': 'RdYlGn'
}
headers['Assigned'] = {
'title': '{} Assigned'.format(config.read_count_prefix),
'description': 'Assigned reads ({})'.format(config.read_count_desc),
'min': 0,
'scale': 'PuBu',
'modify': lambda x: float(x) * config.read_count_multiplier,
'shared_key': 'read_count'
}
self.general_stats_addcols(self.featurecounts_data, headers)
def rsem_stats_table(self):
""" Take the parsed stats from the rsem report and add them to the
basic stats table at the top of the report """
headers = OrderedDict()
headers['alignable_percent'] = {
'title': '% Alignable'.format(config.read_count_prefix),
'description': '% Alignable reads'.format(config.read_count_desc),
'max': 100,
'min': 0,
'suffix': '%',
'scale': 'YlGn'
}
self.general_stats_addcols(self.rsem_mapped_data, headers)
'min': 0,
'max': 100,
'suffix': '%',
'scale': 'OrRd'
}
headers['Prephased'] = {
'title': 'Prephased (%)',
'description': 'The value used by RTA for the percentage of molecules in a cluster for which sequencing falls behind (phasing) or jumps ahead (prephasing) the current cycle within a read.',
'format': '{:.,2f}',
'min': 0,
'max': 100,
'suffix': '%',
'scale': 'OrRd'
}
headers['Reads'] = {
'title': '{} Reads'.format(config.read_count_prefix),
'description': 'The number of clusters ({})'.format(config.read_count_desc),
'shared_key': 'read_count',
'modify': lambda x: (x*1000000.0) * config.read_count_multiplier, # number is already in millions
}
headers['Reads PF'] = {
'title': '{} PF Reads'.format(config.read_count_prefix),
'description': 'The number of passing filter clusters ({})'.format(config.read_count_desc),
'shared_key': 'read_count',
'modify': lambda x: (x*1000000.0) * config.read_count_multiplier, # number is already in millions
}
headers['Cycles Error'] = {
'title': 'Cycles Error',
'description': 'The number of cycles that have been error-rated using PhiX, starting at cycle 1.',
'format': '{:.,0f}',
}
headers['Yield'] = {
'max': 100,
'suffix': '%',
'scale': 'OrRd',
'format': '{:,.2f}',
'modify': lambda x: x * 100.0
}
stats_headers['non-primary_alignments'] = {
'title': '{} Non-Primary'.format(config.read_count_prefix),
'description': 'Non-primary alignments ({})'.format(config.read_count_desc),
'min': 0,
'scale': 'PuBu',
'modify': lambda x: x * config.read_count_multiplier,
'shared_key': 'read_count'
}
stats_headers['reads_mapped'] = {
'title': '{} Reads Mapped'.format(config.read_count_prefix),
'description': 'Reads Mapped in the bam file ({})'.format(config.read_count_desc),
'min': 0,
'modify': lambda x: x * config.read_count_multiplier,
'shared_key': 'read_count'
}
stats_headers['reads_mapped_percent'] = {
'title': '% Mapped',
'description': '% Mapped Reads',
'max': 100,
'min': 0,
'suffix': '%',
'scale': 'RdYlGn'
}
stats_headers['raw_total_sequences'] = {
'title': '{} Total seqs'.format(config.read_count_prefix),
'description': 'Total sequences in the bam file ({})'.format(config.read_count_desc),
log.debug("Duplicate sample name found! Overwriting: {}".format(f['s_name']))
self.add_data_source(f, section='flagstat')
self.samtools_flagstat[f['s_name']] = parsed_data
# Filter to strip out ignored sample names
self.samtools_flagstat = self.ignore_samples(self.samtools_flagstat)
if len(self.samtools_flagstat) > 0:
# Write parsed report data to a file (restructure first)
self.write_data_file(self.samtools_flagstat, 'multiqc_samtools_flagstat')
# General Stats Table
flagstats_headers = dict()
flagstats_headers['mapped_passed'] = {
'title': '{} Reads Mapped'.format(config.read_count_prefix),
'description': 'Reads Mapped in the bam file ({})'.format(config.read_count_desc),
'min': 0,
'modify': lambda x: x * config.read_count_multiplier,
'shared_key': 'read_count',
'placement' : 100.0
}
self.general_stats_addcols(self.samtools_flagstat, flagstats_headers, 'Samtools Flagstat')
# Make dot plot of counts
keys = OrderedDict()
reads = {
'min': 0,
'modify': lambda x: float(x) / 1000000.0,
'suffix': 'M reads',
'decimalPlaces': 2,
'shared_key': 'read_count'
def tophat_general_stats_table(self):
""" Take the parsed stats from the Tophat report and add it to the
basic stats table at the top of the report """
headers = OrderedDict()
headers['overall_aligned_percent'] = {
'title': '% Aligned',
'description': 'overall read mapping rate',
'max': 100,
'min': 0,
'suffix': '%',
'scale': 'YlGn'
}
headers['aligned_not_multimapped_discordant'] = {
'title': '{} Aligned'.format(config.read_count_prefix),
'description': 'Aligned reads, not multimapped or discordant ({})'.format(config.read_count_desc),
'min': 0,
'scale': 'PuRd',
'modify': lambda x: x * config.read_count_multiplier,
'shared_key': 'read_count'
}
self.general_stats_addcols(self.tophat_data, headers)
def sargasso_stats_table(self):
""" Take the parsed stats from the sargasso report and add them to the
basic stats table at the top of the report """
headers = OrderedDict()
headers['sargasso_percent_assigned'] = {
'title': '% Assigned',
'description': 'Sargasso % Assigned reads',
'max': 100,
'min': 0,
'suffix': '%',
'scale': 'RdYlGn'
}
headers['Assigned-Reads'] = {
'title': '{} Assigned'.format(config.read_count_prefix),
'description': 'Sargasso Assigned reads ({})'.format(config.read_count_desc),
'min': 0,
'scale': 'PuBu',
'modify': lambda x: float(x) * config.read_count_multiplier,
'shared_key': 'read_count'
}
self.general_stats_addcols(self.sargasso_data, headers)
'max': 100,
'min': 0,
'suffix': '%',
'scale': 'RdYlGn-rev',
}
headers['dedup']['dedup_reads'] = {
'title': '{} Unique'.format(config.read_count_prefix),
'description': 'Deduplicated Alignments ({})'.format(config.read_count_desc),
'min': 0,
'scale': 'Greens',
'modify': lambda x: x * config.read_count_multiplier,
'shared_key': 'read_count',
'hidden': True
}
headers['alignment']['aligned_reads'] = {
'title': '{} Aligned'.format(config.read_count_prefix),
'description': 'Total Aligned Sequences ({})'.format(config.read_count_desc),
'min': 0,
'scale': 'PuRd',
'modify': lambda x: x * config.read_count_multiplier,
'shared_key': 'read_count',
'hidden': True
}
headers['alignment']['percent_aligned'] = {
'title': '% Aligned',
'description': 'Percent Aligned Sequences',
'max': 100,
'min': 0,
'suffix': '%',
'scale': 'YlGn'
}
'title': h.replace("_", " ").lower().capitalize(),
'description' : DESC[h] if h in DESC else None,
'scale': 'RdYlGn',
'min': 0,
'namespace': 'HsMetrics'
}
if h.find("READS") > -1:
this.update({
'shared_key': "read_count",
'modify': lambda x: x * config.read_count_multiplier
})
this['title'] = "{} {}".format(config.read_count_prefix, this['title'])
elif h.find("BASES") > -1:
this.update({'shared_key': 'base_count',
'modify': lambda x: x * config.base_count_multiplier})
this['title'] = "{} {}".format(config.read_count_prefix, this['title'])
if h in ["BAIT_TERRITORY", "TOTAL_READS", "TARGET_TERRITORY", "AT_DROPOUT", "GC_DROPOUT"]:
this.update({'hidden': True})
header.update({h : this})
return OrderedDict(sorted(header.items(), key=lambda t: t[1]['title']))
'description': 'Valid Pairs ({})'.format(config.read_count_desc),
'min': 0,
'scale': 'PuRd',
'modify': lambda x: x * config.read_count_multiplier,
'shared_key': 'read_count'
}
headers['Percentage_Valid'] = {
'title': '% Valid',
'description': 'Percent Valid Pairs',
'max': 100,
'min': 0,
'suffix': '%',
'scale': 'YlGn'
}
headers['Paired_Read_1'] = {
'title': '{} Pairs Aligned'.format(config.read_count_prefix),
'description': 'Paired Alignments ({})'.format(config.read_count_desc),
'min': 0,
'scale': 'PuRd',
'modify': lambda x: x * config.read_count_multiplier,
'shared_key': 'read_count'
}
headers['Percentage_Mapped'] = {
'title': '% Aligned',
'description': 'Percentage of Paired Alignments',
'max': 100,
'min': 0,
'suffix': '%',
'scale': 'YlGn'
}
self.general_stats_addcols(self.hicup_data, headers, 'HiCUP')