Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def taxon_profile(self, options):
"""Call genes command"""
make_sure_path_exists(options.output_dir)
check_file_exists(options.scaffold_stats_file)
check_file_exists(options.taxonomy_file)
check_file_exists(options.db_file)
gene_files = self._genome_files(options.genome_prot_dir, options.protein_ext)
if not self._check_protein_seqs(gene_files):
self.logger.warning('All files must contain amino acid sequences.')
sys.exit()
# build gene profile
taxon_profile = TaxonProfile(options.cpus, options.output_dir)
taxon_profile.run(gene_files,
options.scaffold_stats_file,
options.db_file,
options.taxonomy_file,
options.per_to_classify,
options.evalue,
options.per_identity,
options.per_aln_len,
options.tmpdir)
self.logger.info('Results written to: %s' % options.output_dir)
def taxon_filter(self, options):
"""Taxon filter command"""
make_sure_path_exists(options.taxon_profile_dir)
# build gene profile
taxon_profile = TaxonProfile(options.cpus, options.taxon_profile_dir)
if False:
taxon_profile.filter(options.genome_threshold,
options.min_scaffold_agreement,
options.max_scaffold_disagreement,
options.min_classified_per,
options.output_file)
else:
taxon_profile.filter(options.consensus_taxon,
options.trusted_scaffold,
options.common_taxa,
options.congruent_scaffold,
options.min_classified_per,
options.min_classified,
options.consensus_scaffold,
options.output_file)
ssu_order : float
ssu_family : float
ssu_genus : float
output_dir : str
Directory for output files.
"""
header = 'Scaffold id\tGenome id\tGenome classification\tIncongruent common taxa set'
header += '\tNo. 16S in Genome'
header += '\t16S Classification\t16S length\t16S e-value\t16S alignment length\t16S percent identity'
header += '\tScaffold length (bp)\n'
fout = open(os.path.join(output_dir, 'ssu_erroneous.tsv'), 'w')
fout.write(header)
taxon_profile = TaxonProfile(1, taxon_profile_dir)
common_taxa = taxon_profile.common_taxa(common_taxon_threshold, 25.0)
genome_taxonomy = taxon_profile.read_genome_taxonomy()
for genome_id, scaffold_ids in ssu_hits.items():
# **** HACK for SRA processing
gid = genome_id.replace('.filtered', '')
for scaffold_id in scaffold_ids:
hmm_model, evalue, _start, _stop, ssu_length, _rev_comp, scaffold_len = ssu_hits[genome_id][scaffold_id]
evalue = float(evalue)
ssu_length = int(ssu_length)
scaffold_len = int(scaffold_len)
if ssu_length < ssu_min_length:
continue