Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
to = match.group(2)
to = to.replace('\\/', '/')
try:
fl = match.group(3)
if fl is None:
fl = ''
fl = fl[1:]
except IndexError:
fl = ''
# Build Python regex flags
count = 1
flags = 0
for f in fl:
if f == 'i':
flags |= regex.IGNORECASE
elif f == 'g':
count = 0
else:
return None, f"Unknown flag: {f}"
def actually_doit(original):
try:
s = original.message
if s.startswith(HEADER):
s = s[len(HEADER):]
s, i = regex.subn(fr, to, s, count=count, flags=flags)
if i > 0:
return original, s
except Exception as e:
return None, f"u dun goofed m8: {str(e)}"
return None, None
""" set up the unchanging portion of this """
to_send_data_dict = dict()
to_send_data_dict['userName'] = if_config_vars['user_name']
to_send_data_dict['licenseKey'] = if_config_vars['license_key']
to_send_data_dict['projectName'] = if_config_vars['project_name']
to_send_data_dict['instanceName'] = HOSTNAME
to_send_data_dict['agentType'] = get_agent_type_from_project_type()
if 'METRIC' in if_config_vars['project_type'] and 'sampling_interval' in if_config_vars:
to_send_data_dict['samplingInterval'] = str(if_config_vars['sampling_interval'])
logger.debug(to_send_data_dict)
return to_send_data_dict
if __name__ == "__main__":
# declare a few vars
TRUE = regex.compile(r"T(RUE)?", regex.IGNORECASE)
FALSE = regex.compile(r"F(ALSE)?", regex.IGNORECASE)
SPACES = regex.compile(r"\s+")
SLASHES = regex.compile(r"\/+")
UNDERSCORE = regex.compile(r"\_+")
COLONS = regex.compile(r"\:+")
LEFT_BRACE = regex.compile(r"\[")
RIGHT_BRACE = regex.compile(r"\]")
PERIOD = regex.compile(r"\.")
COMMA = regex.compile(r"\,")
NON_ALNUM = regex.compile(r"[^a-zA-Z0-9]")
PCT_z_FMT = regex.compile(r"[\+\-][0-9]{2}[\:]?[0-9]{2}|\w+\s+\w+\s+\w+")
PCT_Z_FMT = regex.compile(r"[A-Z]{3,4}")
FORMAT_STR = regex.compile(r"{(.*?)}")
HOSTNAME = socket.gethostname().partition('.')[0]
ISO8601 = ['%Y-%m-%dT%H:%M:%SZ', '%Y-%m-%dT%H:%M:%S', '%Y%m%dT%H%M%SZ', 'epoch']
JSON_LEVEL_DELIM = '.'
def calc_unwanted_chars_re(self):
unwanted_chars_re = u'[^\p{{AlNum}}{safe_chars}]+'.format(safe_chars=re.escape(self._safe_chars or ''))
self.unwanted_chars_re = re.compile(unwanted_chars_re, re.IGNORECASE)
if self._stop_words:
unwanted_chars_and_words_re = unwanted_chars_re + u'|(?)(?!\p{AlNum})'
self.unwanted_chars_and_words_re = re.compile(unwanted_chars_and_words_re, re.IGNORECASE, stop_words=self._stop_words)
else:
self.unwanted_chars_and_words_re = None
""" set up the unchanging portion of this """
to_send_data_dict = dict()
to_send_data_dict['userName'] = if_config_vars['user_name']
to_send_data_dict['licenseKey'] = if_config_vars['license_key']
to_send_data_dict['projectName'] = if_config_vars['project_name']
to_send_data_dict['instanceName'] = HOSTNAME
to_send_data_dict['agentType'] = get_agent_type_from_project_type()
if 'METRIC' in if_config_vars['project_type'] and 'sampling_interval' in if_config_vars:
to_send_data_dict['samplingInterval'] = str(if_config_vars['sampling_interval'])
logger.debug(to_send_data_dict)
return to_send_data_dict
if __name__ == "__main__":
# declare a few vars
TRUE = regex.compile(r"T(RUE)?", regex.IGNORECASE)
FALSE = regex.compile(r"F(ALSE)?", regex.IGNORECASE)
SPACES = regex.compile(r"\s+")
SLASHES = regex.compile(r"\/+")
UNDERSCORE = regex.compile(r"\_+")
COLONS = regex.compile(r"\:+")
LEFT_BRACE = regex.compile(r"\[")
RIGHT_BRACE = regex.compile(r"\]")
PERIOD = regex.compile(r"\.")
COMMA = regex.compile(r"\,")
NON_ALNUM = regex.compile(r"[^a-zA-Z0-9]")
FORMAT_STR = regex.compile(r"{(.*?)}")
HOSTNAME = socket.gethostname().partition('.')[0]
ISO8601 = ['%Y-%m-%dT%H:%M:%SZ', '%Y-%m-%dT%H:%M:%S', '%Y%m%dT%H%M%SZ', 'epoch']
JSON_LEVEL_DELIM = '.'
CSV_DELIM = r",|\t"
ATTEMPTS = 3
def __init__(self, **kwargs):
"""
Args:
annotators: None or empty set (only tokenizes).
"""
self._regexp = regex.compile(
'(%s)|(%s)' % (self.ALPHA_NUM, self.NON_WS),
flags=regex.IGNORECASE + regex.UNICODE + regex.MULTILINE
)
annotators = kwargs.get('annotators', {})
if len(annotators) > 0 and not ('lemma' in annotators and len(annotators) == 1):
logger.warning('%s only tokenizes! Skipping annotators: %s' %
(type(self).__name__, kwargs.get('annotators')))
self.annotators = annotators
if 'lemma' in self.annotators or 'pos' in self.annotators:
self.ma = pymorphy2.MorphAnalyzer()
else:
self.ma = None
if REGEX_SUPPORT:
# Expose some common re flags and methods to
# save having to import re and backrefs libs
D = regex.D
DEBUG = regex.DEBUG
A = regex.A
ASCII = regex.ASCII
B = regex.B
BESTMATCH = regex.BESTMATCH
E = regex.E
ENHANCEMATCH = regex.ENHANCEMATCH
F = regex.F
FULLCASE = regex.FULLCASE
I = regex.I
IGNORECASE = regex.IGNORECASE
L = regex.L
LOCALE = regex.LOCALE
M = regex.M
MULTILINE = regex.MULTILINE
R = regex.R
REVERSE = regex.REVERSE
S = regex.S
DOTALL = regex.DOTALL
U = regex.U
UNICODE = regex.UNICODE
X = regex.X
VERBOSE = regex.VERBOSE
V0 = regex.V0
VERSION0 = regex.VERSION0
V1 = regex.V1
VERSION1 = regex.VERSION1
def regex_match_score(prediction, pattern):
"""Check if the prediction matches the given regular expression."""
try:
compiled = re.compile(
pattern,
flags=re.IGNORECASE + re.UNICODE + re.MULTILINE
)
except BaseException:
logger.warn('Regular expression failed to compile: %s' % pattern)
return False
return compiled.match(prediction) is not None
xhtml = smartypants.smartypants(xhtml) # Attr.u *should* output unicode characters instead of HTML entities, but it doesn't work
# Convert entities again
xhtml = html.unescape(xhtml) # This converts html entites to unicode
xhtml = regex.sub(r"&([^#a-z])", r"&\1", xhtml) # Oops! html.unescape also unescapes plain ampersands...
# Replace no-break hyphen with regular hyphen
xhtml = xhtml.replace(se.NO_BREAK_HYPHEN, "-")
# Replace sequential em dashes with the two or three em dash character
xhtml = xhtml.replace("———", "⸻")
xhtml = xhtml.replace("——", "⸺")
# Smartypants doesn't do well on em dashes followed by open quotes. Fix that here
xhtml = regex.sub(r"—”([a-z])", r"—“\1", xhtml, flags=regex.IGNORECASE)
xhtml = regex.sub(r"—’([a-z])", r"—‘\1", xhtml, flags=regex.IGNORECASE)
xhtml = regex.sub(r"-“<p></p>", r"—”<p></p>", xhtml, flags=regex.IGNORECASE)
xhtml = regex.sub(r"‘”<p></p>", r"’{}”<p></p>".format(se.HAIR_SPACE), xhtml, flags=regex.IGNORECASE)
# Remove spaces between en and em dashes
# Note that we match at least one character before the dashes, so that we don't catch start-of-line em dashes like in poetry.
xhtml = regex.sub(r"([^\.\s])\s*([–—])\s*", r"\1\2", xhtml)
# First, remove stray word joiners
xhtml = xhtml.replace(se.WORD_JOINER, "")
# Some older texts use the ,— construct; remove that archaichism
xhtml = xhtml.replace(",—", "—")
# Fix some common em-dash transcription errors
xhtml = regex.sub(r"([:;])-([a-z])", r"\1—\2", xhtml, flags=regex.IGNORECASE)