Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def search(term, provider=None):
"""
Search for genomes that contain TERM in their name or description.
Function is case-insensitive. Spaces in TERM can be replaced with underscores
(_) or TERM can be "quoted", e.g., "homo sapiens".
"""
data = [["name", "provider", "accession", "species", "tax_id", "other_info"]]
for row in genomepy.search(term, provider):
data.append([x.decode("utf-8", "ignore") for x in row])
if len(data) == 1:
print("No genomes found!", file=sys.stderr)
return
# In case we print to a terminal, the output is aligned.
# Otherwise (file, pipe) we use tab-separated columns.
if sys.stdout.isatty():
sizes = [max(len(row[i]) + 4 for row in data) for i in range(len(data[0]))]
fstring = "".join([f"{{: <{size}}}" for size in sizes])
else:
fstring = "\t".join(["{}" for _ in range(len(data[0]))])
for i, row in enumerate(data):
if i == 0:
print(Style.BRIGHT + fstring.format(*row))
logger.info("Loading chromosome mapping.")
if to.startswith("GCA"):
if provider is None:
raise ValueError("Need a provider: NCBI, UCSC or Ensembl")
asm_acc = to
else:
try:
genome = Genome(to)
logger.info("Using local genome information")
asm_acc = genome.assembly_accession
if provider is None:
provider = genome.provider
except Exception:
logger.info("Searching remote genome information")
result = [row for row in search(to, provider=provider)]
if len(result) > 1:
p = [row[1].decode() for row in result]
raise ValueError(
f"More than one result, need one of these providers: {', '.join(p)}"
)
if provider is None:
provider = result[0][1].decode()
asm_acc = result[0][2].decode()
logger.info(f"Assembly {asm_acc}, provider {provider}")
if provider not in ["UCSC", "NCBI", "Ensembl"]:
logger.error(f"Can't map to provider {provider}")
return None
asm_report = ncbi_assembly_report(asm_acc)