Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def filter_bins(self, options):
"""Filter bins command"""
make_sure_path_exists(options.output_dir)
genome_files = self._genome_files(options.genome_nt_dir, options.genome_ext)
if not self._check_nuclotide_seqs(genome_files):
self.logger.warning('All files must contain nucleotide sequences.')
sys.exit()
outliers = Outliers()
for genome_file in genome_files:
gf = remove_extension(genome_file) + '.filtered.' + options.genome_ext
out_genome = os.path.join(options.output_dir, gf)
outliers.remove_outliers(genome_file, options.filter_file, out_genome, options.modified_only)
self.logger.info('Modified genome written to: ' + options.output_dir)
sys.exit()
if options.outlier_file and options.compatible_file:
self.logger.warning("The 'outlier_file' and 'compatible_file' options cannot be specified at the same time.\n")
sys.exit()
failed_to_add = []
failed_to_remove = []
if options.add or options.remove:
failed_to_add, failed_to_remove = genome_tk.modify(options.genome_file,
options.scaffold_file,
options.add,
options.remove,
options.output_genome)
elif options.outlier_file:
outliers = Outliers()
outliers.remove_outliers(options.genome_file,
options.outlier_file,
options.output_genome,
False)
elif options.compatible_file:
outliers = Outliers()
if options.unique_only:
outliers.add_compatible_unique(options.scaffold_file,
options.genome_file,
options.compatible_file,
options.min_len,
options.output_genome)
elif options.closest_only:
outliers.add_compatible_closest(options.scaffold_file,
options.genome_file,
options.compatible_file,
failed_to_add = []
failed_to_remove = []
if options.add or options.remove:
failed_to_add, failed_to_remove = genome_tk.modify(options.genome_file,
options.scaffold_file,
options.add,
options.remove,
options.output_genome)
elif options.outlier_file:
outliers = Outliers()
outliers.remove_outliers(options.genome_file,
options.outlier_file,
options.output_genome,
False)
elif options.compatible_file:
outliers = Outliers()
if options.unique_only:
outliers.add_compatible_unique(options.scaffold_file,
options.genome_file,
options.compatible_file,
options.min_len,
options.output_genome)
elif options.closest_only:
outliers.add_compatible_closest(options.scaffold_file,
options.genome_file,
options.compatible_file,
options.min_len,
options.output_genome)
else:
outliers.add_compatible(options.scaffold_file,
options.genome_file,
options.compatible_file,
# read scaffold statistics and calculate genome stats
self.logger.info('Reading scaffold statistics.')
scaffold_stats = ScaffoldStats()
scaffold_stats.read(options.scaffold_stats_file)
genome_stats = GenomeStats()
genome_stats = genome_stats.run(scaffold_stats)
# identify putative homologs to reference genomes
reference = Reference(1, None)
putative_homologs = reference.homology_check(options.reference_file,
options.min_genes,
float(options.perc_genes))
# identify scaffolds compatible with bins
outliers = Outliers()
output_file = os.path.join(options.output_dir, 'compatible.tsv')
outliers.compatible(putative_homologs, scaffold_stats, genome_stats,
options.gc_perc, options.td_perc,
options.cov_corr, options.cov_perc,
options.report_type, output_file)
self.logger.info('Results written to: ' + output_file)
def outliers(self, options):
"""Outlier command"""
check_file_exists(options.scaffold_stats_file)
make_sure_path_exists(options.output_dir)
self.logger.info('Reading scaffold statistics.')
scaffold_stats = ScaffoldStats()
scaffold_stats.read(options.scaffold_stats_file)
genome_stats = GenomeStats()
genome_stats = genome_stats.run(scaffold_stats)
# identify outliers
outliers = Outliers()
outlier_file = os.path.join(options.output_dir, 'outliers.tsv')
outliers.identify(scaffold_stats, genome_stats,
options.gc_perc, options.td_perc,
options.cov_corr, options.cov_perc,
options.report_type, outlier_file)
self.logger.info('Outlier information written to: ' + outlier_file)
# create outlier plots
if options.create_plots:
plot_dir = os.path.join(options.output_dir, 'plots')
make_sure_path_exists(plot_dir)
outliers.plot(scaffold_stats,
genome_stats,
outliers.gc_dist,
outliers.td_dist,