Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def test_reference_sequence_generic_load_sequence(self):
with open("tests/input/generic.fa", "rb") as f_in:
with atomic_write(
"tests/input/generic.fa.gz", mode="wb", overwrite=True
) as f_out:
with gzip.open(f_out, "wb") as f_gzip:
shutil.copyfileobj(f_in, f_gzip)
seq = ReferenceSequence(ID="1", path="tests/input/generic.fa.gz")
assert seq.ID == "1"
assert seq.chrom == "1"
assert seq.path == "tests/input/generic.fa.gz"
np.testing.assert_array_equal(
seq.sequence,
np.array(
bytearray(
"NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNAGGCCGGACNNNNNNNN",
encoding="utf-8",
errors="strict",
),
dtype=np.uint8,
),
)
assert list("AGGCCGGAC") == list(map(chr, seq.sequence[100:109]))
assert seq.md5 == "6ac6176535ad0e38aba2d05d786c39b6"
def test_save_snps_vcf(self):
s = SNPs("tests/input/testvcf.vcf")
r = Resources()
r._reference_sequences["GRCh37"] = {}
with tempfile.TemporaryDirectory() as tmpdir:
dest = os.path.join(tmpdir, "generic.fa.gz")
gzip_file("tests/input/generic.fa", dest)
seq = ReferenceSequence(ID="1", path=dest)
r._reference_sequences["GRCh37"]["1"] = seq
self.assertEqual(
os.path.relpath(s.save_snps(vcf=True)), "output/vcf_GRCh37.vcf"
)
self.run_parsing_tests_vcf("output/vcf_GRCh37.vcf")
def test_save_snps_vcf_phased(self):
# read phased data
s = SNPs("tests/input/testvcf_phased.vcf")
# setup resource to use test FASTA reference sequence
r = Resources()
r._reference_sequences["GRCh37"] = {}
with tempfile.TemporaryDirectory() as tmpdir:
dest = os.path.join(tmpdir, "generic.fa.gz")
gzip_file("tests/input/generic.fa", dest)
seq = ReferenceSequence(ID="1", path=dest)
r._reference_sequences["GRCh37"]["1"] = seq
# save phased data to VCF
self.assertEqual(
os.path.relpath(s.save_snps(vcf=True)), "output/vcf_GRCh37.vcf"
)
# read saved VCF
self.run_parsing_tests_vcf("output/vcf_GRCh37.vcf", phased=True)