Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def separate_string(string):
"""
>>> separate_string("test <2>")
(['test ', ''], ['2'])
"""
string_list = regex.split(r'<(?![!=])', regex.sub(r'>', '<', string))
return string_list[::2], string_list[1::2] # Returns even and odd elements
Notes
-----
- Use ``_j`` for palatalized instead of ``'``
- Use ``=`` for syllabic instead of ``_=``
- Use ``~`` for nasalization instead of ``_~``
- Please refer to :doc:`sym` for more details.
Examples
--------
>>> IPA_text = "/t͡ʃeɪnd͡ʒ/" # en: [[change]]
>>> XSAMPA_text = IPA_to_XSAMPA(IPA_text)
>>> XSAMPA_text
"/t__SeInd__Z/"
"""
text = re.sub("ːː", ":", text)
text += " "
XSAMPA_lst = []
i = 0
while i < len(text) - 1:
if text[i:i+2] in i2x_lookup.keys():
XSAMPA_lst.append(i2x_lookup[text[i:i+2]])
i += 1
elif text[i] in i2x_lookup.keys():
XSAMPA_lst.append(i2x_lookup[text[i]])
else:
XSAMPA_lst.append(text[i])
i += 1
return "".join(XSAMPA_lst)
processed_line = regex.sub(r"<((?:m:)?m(sub|sup))><((?:m:)?mi)>(.+?)<((?:m:)?mi)>(.+?)", "<i>\\4</i><\\2><i>\\6</i>", processed_line)
processed_line = regex.sub(r"<((?:m:)?m(sub|sup))><((?:m:)?mi)>(.+?)<((?:m:)?mn)>(.+?)", "<i>\\4</i><\\2>\\6", processed_line)
processed_line = regex.sub(r"<((?:m:)?m(sub|sup))><((?:m:)?mn)>(.+?)<((?:m:)?mn)>(.+?)", "\\4<\\2>\\6", processed_line)
processed_line = regex.sub(r"<((?:m:)?m(sub|sup))><((?:m:)?mn)>(.+?)<((?:m:)?mi)>(.+?)", "\\4<\\2><i>\\6</i>", processed_line)
processed_line = regex.sub(r"<((?:m:)?m(sub|sup))><((?:m:)?mi) mathvariant=\"normal\">(.+?)<((?:m:)?mi)>(.+?)", "\\4<\\2><i>\\6</i>", processed_line)
processed_line = regex.sub(r"<((?:m:)?m(sub|sup))><((?:m:)?mi) mathvariant=\"normal\">(.+?)<((?:m:)?mn)>(.+?)", "\\4<\\2>\\6", processed_line)
processed_line = regex.sub(r"<(?:m:)?mo>{}".format(se.FUNCTION_APPLICATION), "", processed_line, flags=regex.IGNORECASE) # The ignore case flag is required to match here with the special FUNCTION_APPLICATION character, it's unclear why
processed_line = regex.sub(r"<(?:m:)?mfenced><((?:m:)(?:mo|mi|mn|mrow))>(.+?)", "(<\\1>\\2)", processed_line)
processed_line = regex.sub(r"<(?:m:)?mrow>([^>].+?)", "\\1", processed_line)
processed_line = regex.sub(r"<(?:m:)?mi>([^<]+?)", "<i>\\1</i>", processed_line)
processed_line = regex.sub(r"<(?:m:)?mi mathvariant=\"normal\">([^<]+?)", "\\1", processed_line)
processed_line = regex.sub(r"<(?:m:)?mo>([+\-−=×])", " \\1 ", processed_line)
processed_line = regex.sub(r"<((?:m:)?m[no])>(.+?)", "\\2", processed_line)
processed_line = regex.sub(r"", "", processed_line)
processed_line = processed_line.strip()
processed_line = regex.sub(r"<i>", "", processed_line, flags=regex.DOTALL)
# Did we succeed? Is there any more MathML in our string?
if regex.findall("".format(mathml_count))
mathml_count = mathml_count + 1
else:
# Success! Replace the MathML with our new string.
processed_xhtml = processed_xhtml.replace(line, processed_line)
if processed_xhtml != xhtml:
file.seek(0)
file.write(processed_xhtml)
file.truncate()</i>
def abbreviation_replacement(text):
text = re.sub(r"i\'m", "i am", text)
text = re.sub(r"\'re", "are", text)
text = re.sub(r"he\'s", "he is", text)
text = re.sub(r"it\'s", "it is", text)
text = re.sub(r"that\'s", "that is", text)
text = re.sub(r"who\'s", "who is", text)
text = re.sub(r"what\'s", "what is", text)
text = re.sub(r"n\'t", "not", text)
text = re.sub(r"\'ve", "have", text)
text = re.sub(r"\'d", "would", text)
text = re.sub(r"\'ll", "will", text)
text = re.sub(r",", " , ", text)
text = re.sub(r"!", " ! ", text)
text = re.sub(r"\.", " \. ", text)
text = re.sub(r"\(", " \( ", text)
text = re.sub(r"\)", " \) ", text)
text = re.sub(r"\?", " \? ", text)
return text
if chk_path in extmethods:
for method in [i for i in dir(extmethods[chk_path]) if not i.startswith("_")]:
clsslots.append("_" + method)
class YANGBaseClass(base_type):
# we only create slots for things that are restricted
# in adding attributes to them - this means containing
# data nodes. This means that we can allow
# leaf._someattr to be used by consuming code - it
# also fixes an issue whereby we could set __slots__
# and try and inherit a variable-length inbuilt such
# as long, which is not allowed.
if yang_type in ["container", "list"] or is_container == "container":
__slots__ = tuple(clsslots)
_pybind_base_class = regex.sub("<(type|class) '(?P.*)'>", "\g", str(base_type))
def __new__(self, *args, **kwargs):
try:
obj = base_type.__new__(self, *args, **kwargs)
except TypeError:
obj = base_type.__new__(self)
return obj
def __init__(self, *args, **kwargs):
self._default = False
self._mchanged = False
self._yang_name = yang_name
self._parent = parent_instance
self._choice = choice_member
self._path_helper = path_helper
self._supplied_register_path = supplied_register_path
def set_release_timestamp(self) -> None:
"""
If this ebook has not yet been released, set the first release timestamp in the metadata file.
"""
if "1900-01-01T00:00:00Z" in self.metadata_xhtml:
now = datetime.datetime.utcnow()
now_iso = regex.sub(r"\.[0-9]+$", "", now.isoformat()) + "Z"
now_iso = regex.sub(r"\+.+?Z$", "Z", now_iso)
now_friendly = "{0:%B %e, %Y, %l:%M <abbr class="\"time">%p</abbr>}".format(now)
now_friendly = regex.sub(r"\s+", " ", now_friendly).replace("AM", "a.m.").replace("PM", "p.m.").replace(" [^<]+?", "{}".format(now_iso), self.metadata_xhtml)
self.metadata_xhtml = regex.sub(r"[^<]+?", "{}".format(now_iso), self.metadata_xhtml)
with open(self.metadata_file_path, "w", encoding="utf-8") as file:
file.seek(0)
file.write(self.metadata_xhtml)
file.truncate()
self._metadata_tree = None
with open(self.path / "src" / "epub" / "text" / "colophon.xhtml", "r+", encoding="utf-8") as file:
xhtml = file.read()
xhtml = xhtml.replace("<b>January 1, 1900, 12:00 <abbr class="\"time">a.m.</abbr></b>", "<b>{}</b>".format(now_friendly))
file.seek(0)
file.write(xhtml)
file.truncate()
def decode(self, tokens): # I hate regexps
if not isinstance(tokens,list):
tokens = tokens.tolist()
result = self.bpe.decode(tokens)[0]
result = re.sub(r'( )?(<\|n\|>)( )?', r'\n', result)
result = re.sub(r'([\n(]) (\w)',r'\g<1>\g<2>', result)
result = re.sub(r'(\W)([«"''\n(]|^) (\w)',r'\g<1>\g<2>\g<3>', result)
result = re.sub(r'(\w)- (\w)',r'\g<1>-\g<2>', result)
return result
# (2) -ill- after a vowel; repeat if necessary in case of VillVill
# sequence (ailloille respelling of ayoye)
text = sub_repeatedly("(" + vowel_c + ")ill", r"\1j", text)
# (3) any other ill, except word-initially (illustrer etc.)
text = re.sub("([^⁀])ill", r"\1ij", text)
# (4) final -il after a vowel; we consider final -Cil to contain a
# pronounced /l/ (e.g. 'il', 'fil', 'avril', 'exil', 'volatil', 'profil')
text = re.sub("(" + vowel_c + ")il([⁀‿])", r"\1j\2", text)
# (5) -il- after a vowel, before a consonant (not totally necessary;
# unlikely to occur normally, respelling can use -ill-)
text = re.sub("(" + vowel_c + ")il(" + cons_c + ")", r"\1j\2", text)
# y; include before removing final -e so we can distinguish -ay from
# -aye
text = re.sub("ay([⁀‿])", r"ai\1", text) # Gamay
text = re.sub("éy", "éj", text) # used in respellings, eqv. to 'éill'
text = re.sub("(" + vowel_no_i_c + ")y", r"\1iy", text)
text = re.sub("yi([" + vowel + ".])", r"y.y\1", text)
text = re.sub("'y‿", "'j‿", text) # il n'y‿a
text = re.sub("(" + cons_c + ")y(" + cons_c + ")", r"\1i\2", text)
text = re.sub("(" + cons_c + ")ye?([⁀‿])", r"\1i\2", text)
text = re.sub("⁀y(" + cons_c + ")", r"⁀i\1", text)
text = re.sub("⁀y⁀", "⁀i⁀", text)
text = re.sub("y", "j", text)
# nasal hacks
# make 'n' before liaison in certain cases both nasal and pronounced
text = re.sub("(⁀[mts]?on)‿", r"\1N‿", text) # mon, son, ton, on
text = re.sub("('on)‿", r"\1N‿", text) # qu'on, l'on
text = re.sub("([eu]n)‿", r"\1N‿", text) # en, bien, un, chacun etc.
# in bon, certain etc. the preceding vowel isn't nasal
text = re.sub("n‿", "N‿", text)
def parse_insert(insert, ref):
insert_type = None
ref_type = None
item = None
if insert:
insert_parts = insert.partition(' ')
insert_type = insert_parts[0]
item = insert_parts[2].strip()
# strip unnecessary quotes from insert item
item = re.sub(r'^(["\'])|(["\'])$', '', item)
if item == '':
raise SAMParserStructureError("Insert item not specified in: {0}".format(insert))
elif ref:
if ref[0] == '$':
item = ref[1:]
ref_type = 'stringref'
elif ref[0] == '*':
item = ref[1:]
ref_type = 'idref'
elif ref[0] == '#':
item = ref[1:]
ref_type = 'nameref'
elif ref[0] == '%':
item = ref[1:]
ref_type = 'keyref'
else:
# 2. remaining geminate n after the stress between vowels
pron = sub_repeatedly("(" + stress_accents + ".*?" + vowels + accents + "?n)ː(" + vowels + ")", r"\1(ː)\2", pron)
# 3. remaining ž and n between vowels
pron = sub_repeatedly("(" + vowels + accents + "?[žn])ː(" + vowels + ")", r"\1ˑ\2", pron)
# 4. ssk (and zsk, already normalized) immediately after the stress
pron = re.sub("(" + vowels + stress_accents + "[^" + vow + "]*s)ː(k)", r"\1ˑ\2", pron)
# 5. eliminate remaining gemination, except for ɕː and ӂː
pron = re.sub("([^ɕӂ\(\)])ː", r"\1", pron)
# 6. convert special gemination symbol ˑ to regular gemination
pron = re.sub("ˑ", "ː", pron)
# handle soft and hard signs, assimilative palatalization
# 1. insert j before i when required
pron = re.sub("ʹi", "ʹji", pron)
# 2. insert glottal stop after hard sign if required
pron = re.sub("ʺ([aɛiouy])", r"ʔ\1", pron)
# 3. (ь) indicating optional palatalization
pron = re.sub("\(ʹ\)", "⁽ʲ⁾", pron)
# 4. assimilative palatalization of consonants when followed by
# front vowels or soft sign
pron = re.sub("([mnpbtdkgfvszxɣrl])([ː()]*[eiäạëöüʹ])", r"\1ʲ\2", pron)
pron = re.sub("([cĵ])([ː()]*[äạöüʹ])", r"\1ʲ\2", pron)
# 5. remove hard and soft signs
pron = re.sub("[ʹʺ]", "", pron)
# reduction of unstressed word-final -я, -е; but special-case
# unstressed не, же. Final -я always becomes [ə]; final -е may
# become [ə], [e], [ɪ] or [ɨ] depending on the part of speech and
# the preceding consonants/vowels.
pron = re.sub("[äạ]⁀", "ə⁀", pron)
pron = re.sub("⁀nʲe⁀", "⁀nʲi⁀", pron)
pron = re.sub("⁀že⁀", "⁀žy⁀", pron)