Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
if not options.coverage_file:
if not options.bam_files:
self.logger.warning('One or more BAM files must be specified in order to calculate coverage profiles.')
coverage_file = None
else:
coverage = Coverage(options.cpus)
coverage_file = os.path.join(options.output_dir, 'coverage.tsv')
coverage.run(options.bam_files, coverage_file, options.cov_all_reads, options.cov_min_align, options.cov_max_edit_dist)
self.logger.info('Coverage profiles written to: %s' % coverage_file)
else:
check_file_exists(options.coverage_file)
coverage_file = options.coverage_file
# get tetranucleotide signatures
if not options.tetra_file:
tetra = Tetranucleotide(options.cpus)
tetra_file = os.path.join(options.output_dir, 'tetra.tsv')
signatures = tetra.run(options.scaffold_file)
tetra.write(signatures, tetra_file)
self.logger.info('Tetranucleotide signatures written to: %s' % tetra_file)
else:
tetra_file = options.tetra_file
# write out scaffold statistics
stats_output = os.path.join(options.output_dir, 'scaffold_stats.tsv')
stats = ScaffoldStats(options.cpus)
stats.run(options.scaffold_file, genome_files, tetra_file, coverage_file, stats_output)
self.logger.info('Scaffold statistic written to: %s' % stats_output)
Parameters
----------
scaffold_file : str
Fasta file containing scaffolds.
genome_files : list of str
Fasta files with binned scaffolds.
tetra_file : str
Tetranucleotide signatures for scaffolds.
coverage_file : str
Coverage profiles for scaffolds
output_file : str
Output file for scaffolds statistics.
"""
tetra = Tetranucleotide(self.cpus)
signatures = tetra.read(tetra_file)
cov_profiles = None
if coverage_file:
coverage = Coverage(self.cpus)
cov_profiles, _ = coverage.read(coverage_file)
# determine bin assignment for each scaffold
self.logger.info('Determining scaffold statistics.')
scaffold_id_genome_id = {}
for gf in genome_files:
genome_id = remove_extension(gf)
for scaffold_id, _seq in seq_io.read_seq(gf):
scaffold_id_genome_id[scaffold_id] = genome_id