Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def parse_number_space_unit(self, source: str) -> DateTimeResolutionResult:
result = DateTimeResolutionResult()
# if there are spaces between number and unit
ers = self.config.cardinal_extractor.extract(source)
if len(ers) != 1:
return result
suffix = source
source_unit = ''
er = ers[0]
pr = self.config.number_parser.parse(er)
no_num = source[pr.start + pr.length:].strip().lower()
match = regex.search(self.config.followed_unit, no_num)
if match is not None:
suffix = RegExpUtility.get_group(match, Constants.SUFFIX_GROUP_NAME)
source_unit = RegExpUtility.get_group(match, Constants.UNIT)
if source_unit not in self.config.unit_map:
return result
num = float(pr.value) + self.parse_number_with_unit_and_suffix(suffix)
unit = self.config.unit_map[source_unit]
num = QueryProcessor.float_or_int(num)
is_time = 'T' if self.is_less_than_day(unit) else ''
result.timex = f'P{is_time}{num}{unit[0]}'
result.future_value = QueryProcessor.float_or_int(
num * self.config.unit_value_map[source_unit])
directory (str, optional): the directory in which to search
for submodules
"""
path = Path(path).expanduser()
if not path.is_file():
raise REPPError(f'REPP config file not found: {path!s}')
confdir = path.parent
# TODO: can TDL parsing be repurposed for this variant?
conf = path.read_text(encoding='utf-8')
conf = re.sub(r';.*', '', conf).replace('\n', ' ')
m = re.search(
r'repp-modules\s*:=\s*((?:[-\w]+\s+)*[-\w]+)\s*\.', conf)
t = re.search(
r'repp-tokenizer\s*:=\s*([-\w]+)\s*\.', conf)
a = re.search(
r'repp-calls\s*:=\s*((?:[-\w]+\s+)*[-\w]+)\s*\.', conf)
# f = re.search(
# r'format\s*:=\s*(\w+)\s*\.', conf)
d = re.search(
r'repp-directory\s*:=\s*(.*)\.\s*$', conf)
if m is None:
raise REPPError('repp-modules option must be set')
if t is None:
raise REPPError('repp-tokenizer option must be set')
# mods = m.group(1).split()
tok = t.group(1).strip()
active = a.group(1).split() if a is not None else None
# fmt = f.group(1).strip() if f is not None else None
'deu': lambda ws: [any([
re.search('[rR]epublik$', w),
re.search('land$', w),
re.search('stan$', w),
re.search('[sS]tadt$', w),
re.search('heim$', w),
re.search('dorf$', w),
re.search('hausen$', w),
re.search('burg$', w),
re.search('berg$', w),
re.search('gau$', w),
re.search('[pP]rovinz$', w)
]) for w in ws],
'amh': lambda ws: [w in {
out = []
for line in text:
# the line is just whitespace, add it to the output, and move on
if not line.strip():
if not eat_whitespace:
out.append(line)
continue
eat_whitespace = False
pattern = (r'^(?P{0})(?P{1}{2})\.(?P\.?)'
r'(?::(?P<cite>\S+))? (?P<content>.*)$'.format(tre,
align_re_s, cls_re_s))
match = re.search(pattern, line, flags=re.S | re.U)
# tag specified on this line.
if match:
# if we had a previous extended tag but not this time, close up
# the tag
if ext and out:
# it's out[-2] because the last element in out is the
# whitespace that preceded this line
if not escaped:
content = encode_html(out[-2], quotes=True)
escaped = True
else:
content = out[-2]
if not multiline_para:
content = generate_tag(block.inner_tag, content,
block.inner_atts)</content></cite>
def adjust_by_prefix(self, prefix: str, adjust: AdjustParams):
delta_min = 0
trimmed_prefix = prefix.strip().lower()
if trimmed_prefix.endswith('demie'):
delta_min = 30
elif trimmed_prefix.endswith('un quart') or trimmed_prefix.endswith('quart'):
delta_min = 15
elif trimmed_prefix.endswith('trois quarts'):
delta_min = 45
else:
match = regex.search(self.less_than_one_hour, trimmed_prefix)
if match:
min_str = RegExpUtility.get_group(match, 'deltamin')
if min_str:
delta_min = int(min_str)
else:
min_str = RegExpUtility.get_group(
match, 'deltaminnum').lower()
delta_min = self.numbers.get(min_str)
if trimmed_prefix.endswith('à'):
delta_min = delta_min * -1
adjust.minute += delta_min
if adjust.minute < 0:
adjust.minute += 60
return ""
text = re.sub(
unicodedata.normalize("NFD", "ü"),
"ü",
re.sub(
unicodedata.normalize("NFD", "ê"),
"ê",
unicodedata.normalize("NFD", text)
)
)
if re.search(
"[aeiouêü]" + tones + "[aeiou]?[aeiouêü]" + tones + "",
text.lower()):
return ""
text = text.lower()
if not re.search(tones, text) and re.match("[1-5]", text):
return re.sub("(\d)(\p{Ll})", "\1 \2", text)
if re.search("[一不,.?]", text):
text = re.sub(
"([一不])$",
lambda x: " yī" if x.group() == "一" else " bù",
text
)
text = re.sub("([一不])", r" \1 ", text)
text = re.sub("([,.?])", r" \1 ", text)
text = re.sub(" +", " ", text)
text = re.sub("^ ", "", text)
text = re.sub(" $", "", text)
text = re.sub("\. \. \.", "...", text)
text = re.sub("['\-]", " ", text)
text = re.sub(
"([aeiouêü]" + tones + "?n?g?r?)([bpmfdtnlgkhjqxzcsywr]h?)",
'deu': lambda ws: [any([
re.search('[rR]epublik$', w),
re.search('land$', w),
re.search('stan$', w),
re.search('[sS]tadt$', w),
re.search('heim$', w),
re.search('dorf$', w),
re.search('hausen$', w),
re.search('burg$', w),
re.search('berg$', w),
re.search('gau$', w),
re.search('[pP]rovinz$', w)
]) for w in ws],
'amh': lambda ws: [w in {
elif int(pileupcolumn.pos)==int(pos)-1 and str(querybase)==str(minor_allele): #and pileupread.alignment.mapping_quality>=10:
minor_ids.append(pileupread.alignment.query_name)
minor_num+=1
except:
continue
# elif len(major_allele)>1 and len(minor_allele)==1:
elif len(major_allele)> len(minor_allele):
state="DEL"
#context1[name]=reference[chrom][int(pos)-2:int(pos)+1]
context1=reference[chrom][max(1,int(pos)-11):min(int(pos)+1,int(chr_sizes[chrom]))]
context2=reference[chrom][max(1,int(pos)-1):min(int(pos)+10,int(chr_sizes[chrom]))]
context=reference[chrom][max(1,int(pos)-11):min(int(pos)+10,int(chr_sizes[chrom]))]
if_homopolymer="No"
for item in homopolymers:
if re.search(str(item), str(context1)) or re.search(str(item),str(context2)):
if_homopolymer="Yes"
break
if if_homopolymer=="No":
for read in a.fetch(chrom,start-length, end+length):
try:
#if read.cigar[0][0]==4 and read.cigar[0][1]<=length and read.reference_start>= pos-1 and read.reference_start-read.query_alignment_start< pos-1:
if (read.cigar[0][0]==4 or read.cigar[0][0]==5) and read.reference_start>= pos-2 and read.reference_start-read.query_alignment_start< pos-1:
query_clipped = read.query_sequence[:read.query_alignment_start][:length]
if re.search(query_clipped, major_allele):
minor_ids.append(read.query_name)
minor_num+=1
#elif read.cigar[-1][0]==4 and read.cigar[-1][1]<=length and read.reference_end <= pos-1 and (read.reference_end + read.query_length-read.query_alignment_end>pos-1):
elif (read.cigar[-1][0]==4 or read.cigar[-1][0]==5) and read.reference_end <= pos and (read.reference_end + read.query_length-read.query_alignment_end>pos-1):
query_clipped = read.query_sequence[read.query_alignment_end:][-length:]
if re.search(query_clipped, major_allele):
minor_ids.append(read.query_name)
def regexFuzzySearch(
# Text to search for. It is NOT treated as a regex.
searchText,
# Text in which to find the searchText
targetText):
# Escape any characters in searchText that would be treated as a regexp.
searchText = regex.escape(searchText)
# The regex_ library supports fuzzy matching. Quoting from the manual:
#
# - ``(item){e}`` means perform a fuzzy match of the given ``item``,
# allowing insertions, deletions, or substitutions.
# - The BESTMATCH flag searches for the best possible match, rather than the
# match found first.
return regex.search('(' + searchText + '){e}', targetText, regex.BESTMATCH)
if not parse_result1.value or not parse_result2.value:
return result
future_date: datetime = parse_result1.value.future_value
past_date: datetime = parse_result1.value.past_value
time: datetime = parse_result2.value.future_value
hour = time.hour
minute = time.minute
second = time.second
# handle morning, afternoon
if regex.search(self.config.pm_time_regex, source) and hour < 12:
hour += 12
elif regex.search(self.config.am_time_regex, source) and hour >= 12:
hour -= 12
time_str = parse_result2.timex_str
if time_str.endswith(Constants.AM_PM_GROUP_NAME):
time_str = time_str[:-4]
time_str = f'T{hour:02d}{time_str[3:]}'
result.timex = parse_result1.timex_str + time_str
val = parse_result2.value
has_am_pm = regex.search(self.config.pm_time_regex, source) and regex.search(
self.config.am_time_regex, source)
if hour <= 12 and not has_am_pm and val.comment:
result.comment = Constants.AM_PM_GROUP_NAME