Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
# align False requires forked minimap2, and isn't much faster for
# a small number of sequences due to index construction time.
warnings.warn("`align` is ignored", DeprecationWarning)
alignments = []
aligner = mappy.Aligner(seq=template, preset='map-ont')
for sr in self.subreads:
try:
hit = next(aligner.map(sr.seq))
except StopIteration:
continue
else:
flag = 0 if hit.strand == 1 else 16
if hit.strand == 1:
seq = sr.seq
else:
seq = medaka.common.reverse_complement(sr.seq)
clip = [
'' if x == 0 else '{}S'.format(x)
for x in (hit.q_st, len(sr.seq) - hit.q_en)]
if hit.strand == -1:
clip = clip[::-1]
cigstr = ''.join((clip[0], hit.cigar_str, clip[1]))
aln = Alignment(
template_name, sr.name, flag, hit.r_st, seq, cigstr)
alignments.append(aln)
hit = None
return alignments
fast5_path = fast5_path[0]
try:
fast5_call, wl, wk = get_rl_params(
alignment.query_name, fast5_path)
except KeyError:
msg = 'RLE table not found for file {}, read {}'
logger.info(msg.format(fast5_path, alignment.query_name))
return None
# params needs flipping for reverse alignments
if alignment.is_reverse:
wl = wl[::-1]
wk = wk[::-1]
fast5_call = medaka.common.reverse_complement(fast5_call)
# ocasional errors where bases are repeated
# may lead to discrepancies, discard these
if fast5_call != query_rle.compact_basecall:
logger.warning(
'RLE table within fast5 file is inconsistent with '
'compressed basecall for read {}. {} != {}'.format(
alignment.query_name,
fast5_call,
query_rle.compact_basecall))
return None
tags = {'WL': wl, 'WK': wk}
else:
tags = dict()
# Create alignment object
def align_to_template(self, template, template_name):
"""Align subreads to a template sequence using Smith-Waterman.
:param template: sequence to which to align subreads.
:param template_name: name of template sequence.
:returns: `Alignment` tuples.
"""
self.initialize()
alignments = []
for orient, sr in zip(self._orient, self.subreads):
if orient:
seq = sr.seq
else:
seq = medaka.common.reverse_complement(sr.seq)
result = parasail.sw_trace_striped_16(
seq, template, 8, 4, parasail.dnafull)
if result.cigar.beg_ref >= result.end_ref or \
result.cigar.beg_query >= result.end_query:
# unsure why this can happen
continue
rstart, cigar = parasail_to_sam(result, seq)
flag = 0 if orient else 16
aln = Alignment(template_name, sr.name, flag, rstart, seq, cigar)
alignments.append(aln)
return alignments
def orient_subreads(self):
"""Find orientation of subreads with respect to consensus sequence.
:returns: `Alignment` s of subreads to consensus.
"""
# TODO: use a profile here
# TODO: refactor with align_to_template
self._orient = []
alignments = []
for sr in self.subreads:
rc_seq = medaka.common.reverse_complement(sr.seq)
result_fwd = parasail.sw_trace_striped_16(
sr.seq, self.consensus, 8, 4, parasail.dnafull)
result_rev = parasail.sw_trace_striped_16(
rc_seq, self.consensus, 8, 4, parasail.dnafull)
is_fwd = result_fwd.score > result_rev.score
self._orient.append(is_fwd)
result = result_fwd if is_fwd else result_rev
seq = sr.seq if is_fwd else rc_seq
if result.cigar.beg_ref >= result.end_ref or \
result.cigar.beg_query >= result.end_query:
# unsure why this can happen
continue
rstart, cigar = parasail_to_sam(result, seq)
flag = 0 if is_fwd else 16
aln = Alignment(
'consensus_{}'.format(self.name), sr.name,
:param read: a `Read` namedtuple.
:param tags: list containing additional tags to add to sam record.
:returns: a (string) sam record.
"""
try:
align = list(aligner.map(read.sequence, MD=True, cs=True))[0]
except IndexError:
return None
if align.strand == +1:
flag = '0'
seq = read.sequence
qstring = read.quality
else:
flag = '16'
seq = medaka.common.reverse_complement(read.sequence)
qstring = read.quality[::-1]
rname = align.ctg
pos = str(align.r_st + 1)
mapq = str(align.mapq)
clip = [
'' if x == 0 else '{}S'.format(x)
for x in (align.q_st, len(seq) - align.q_en)]
if align.strand == -1:
clip = clip[::-1]
cigar = clip[0] + align.cigar_str + clip[1]
NM = 'NM:i:' + str(align.NM)
# NOTE: tags written without reversal, see below
sam = '\t'.join((
read.read_id, flag, rname, pos, mapq, cigar,
'*', '0', '0', seq, qstring, NM, *tags))
def poa_consensus(self, additional_seq=None, method='racon'):
"""Create a consensus sequence for the read."""
self.initialize()
with tempfile.NamedTemporaryFile(
'w', suffix='.fasta', delete=False) as fh:
if additional_seq is not None:
fh.write(">{}\n{}\n".format('additional', additional_seq))
for orient, subread in zip(self._orient, self.subreads):
if method == 'spoa':
if orient:
seq = subread.seq
else:
seq = medaka.common.reverse_complement(subread.seq)
else:
seq = subread.seq
fh.write(">{}\n{}\n".format(subread.name, seq))
fh.flush()
if method == 'spoa':
consensus_seq = self._run_spoa(fh.name)
elif method == 'racon':
consensus_seq = self._run_racon(fh.name)
else:
raise ValueError('Unrecognised method: {}.'.format(method))
self.consensus = consensus_seq
self._alignments_valid = False
self.consensus_run = True
return consensus_seq