Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
position = p.start(1) + diffrence
diffrence += -len(replace_from) + len(replace_to)
# Remove spaces
answer = answer[:position] + answer[position:].replace(replace_from, replace_to, 1)
# Change placeholders back to spaces
answer = answer.replace("##DONOTTOUCHTHISSPACE##", ' ')
detokenized_answers.append(answer)
return detokenized_answers
# Prepare vocab tokens from line
re_split = re.compile('(?: |^)(?:▁(▁))?([' + re.escape(r'`~!@#$%^&*()-_=+{[}]:;\'",<>?/|\\') + '0-9]|newlinechar|\.+)')
def sentence_split(sentence):
# If not an embedded detokenizer - split by spaces
if not preprocessing['embedded_detokenizer']:
return sentence.split()
global re_split
# Prepare for split sentence into a words by ' ▁'
line = ' ▁▁' + sentence[1:].replace('▁', '▁▁')
line = re_split.sub(r' ▁\1\2 ▁', line)
# split, filer and return
return list(filter(lambda line: False if len(line) == 0 or line == '▁' else True, [token.strip() for token in line.split(' ▁')]))
# Load json file with BPE join pairs
def check_urls(index, question, answer):
global full_sentence_valid_url
full_sentence_valid_url = False
valid_url = False
# Disabled
if score_settings['incorrect_url_modifier_value'] is None:
return 0
# Find all utls in sentence
for url in re.finditer('http(?:s?):(//([^/]*?)/(?:[^ ])*?(?=$|[' + re.escape(score_settings['url_delimiters']) + ']))?', answer):
# Check if result is in cache already and return it
if url_cache[url.group(0)][1] > time.time():
if url_cache[url.group(0)][0] == 0:
return score_settings['incorrect_url_modifier_value']
# Url not in cache - check it
else:
# Send HEAD request and check HTTP response code
try:
request = requests.head(url.group(0))
code = request.status_code
except Exception as e:
code = 0
def __findtagpairspans(
s: str,
tag: str, closetag: Optional[str] = None,
useregex: bool = False
) -> Iterable[Tuple[Tuple[int, int], ...]]:
if closetag is None or tag == closetag:
yield from __findeqtagpairspans(s, tag, useregex=useregex)
return
if not useregex:
tag = re.escape(tag)
closetag = re.escape(closetag)
retags = re.compile(r"(?P<__open>{})|(?P<__close>{})".format(tag, closetag))
startspans = []
for match in retags.finditer(s):
opengroup = match.group("__open")
if opengroup:
startspans.append(match.span())
continue
closegroup = match.group("__close")
if closegroup and startspans:
startspan = startspans.pop()
endspan = match.span()
def extract(s: str, entities: Iterable[str], useregex=False, ignorecase=True) -> Iterable[str]:
for m in re.compile(
r"\b(?:{})\b".format(r"|".join(
e if useregex else re.escape(e).replace(' ', r"s+") for e in entities
)),
re.I if ignorecase else 0
).finditer(s):
yield m.group(0)
if boss is None:
return
name = loc.name
editor = boss.edit_file_requested(name)
if editor is None:
return
editor = editor.editor
if loc.line_number is not None:
block = editor.document().findBlockByNumber(loc.line_number - 1) # blockNumber() is zero based
if not block.isValid():
return
c = editor.textCursor()
c.setPosition(block.position(), c.MoveAnchor)
editor.setTextCursor(c)
if loc.text_on_line is not None:
editor.find(regex.compile(regex.escape(loc.text_on_line)))
contents_file_name = "Item_" + contents_file_name + ".txt"
try:
contents_file = codecs.open(os.path.join(json_loc, contents_file_name),
mode='r', encoding='utf-8')
except FileNotFoundError:
print("\t{0} not found.".format(contents_file_name))
continue
contents = json.load(contents_file)
print("{0} loaded.".format(contents_file_name))
repcount = 0 # Number of items in ItemBags translated from this file
for item in contents:
name_en = item["tr_text"]
if name_en != "":
name_jp = regex.escape(item["jp_text"]) # Escape [] in In/Ba/Ou
repcount += len(regex.findall("\[" + name_jp + "\]", itembags))
itembags = regex.sub("\[" + name_jp + "\]",
"[" + name_en + "]",
itembags)
print(" Translated {0} item name{1}."
.format(repcount,
"" if repcount == 1 else "s"))
contents_file.close()
print("{0} closed.".format(contents_file_name))
# Clean up eyelash colours and Cast parts
itembags = regex.sub('Black\] \(4 colors\)',
'(4 colors)]',
itembags)
itembags = regex.sub('Black (.+)\] \(4 colors\)',
r'\1 (4 colors)]',
def dep_counts(name):
ref_patterns = {
'alone': r'^{} \d'.format(re.escape(name)),
'commentor': r'{} on'.format(re.escape(name)),
'commentee': r'on {} \d'.format(re.escape(name))
}
commentee_title_pattern = r'on {}'.format(re.escape(name))
ret = {
'version title exact match': text.VersionSet({"title": name}).count(),
'version title match commentor': text.VersionSet({"title": {"$regex": ref_patterns["commentor"]}}).count(),
'version title match commentee': text.VersionSet({"title": {"$regex": commentee_title_pattern}}).count(),
'history title exact match': history.HistorySet({"title": name}).count(),
'history title match commentor': history.HistorySet({"title": {"$regex": ref_patterns["commentor"]}}).count(),
'history title match commentee': history.HistorySet({"title": {"$regex": commentee_title_pattern}}).count(),
}
for pname, pattern in ref_patterns.items():
def __radd__(self, other):
if isinstance(other, String):
return Pattern(re_escape(str(other)) + str(self))
if isinstance(other, str):
return Pattern(re_escape(other) + str(self))
if u' '.join(TOKENIZER.tokenize(variant)) in _text[index]:
exact_matches.append(variant)
else:
fuzzy_variants.append(variant)
exact_matches.sort(key=lambda s: len(TOKENIZER.tokenize(s)), reverse=True)
fuzzy_variants.sort(key=lambda s: len(TOKENIZER.tokenize(s)), reverse=True)
variants_list = exact_matches + fuzzy_variants
for variant in variants_list:
original_text = self._get_entity_substring_from_text(self.__processed_texts[index], variant)
if original_text:
value_final_list.append(variants_to_values[variant])
original_final_list.append(original_text)
boundary_punct_pattern = re.compile(r'(^[{0}]+)|([{0}]+$)'.format(re.escape(string.punctuation)))
original_text_ = boundary_punct_pattern.sub("", original_text)
_pattern = re.compile(r'\b%s\b' % re.escape(original_text_), flags=_re_flags)
self.__tagged_texts[index] = _pattern.sub(self.tag, self.__tagged_texts[index])
# Instead of dropping completely like in other entities,
# we replace with tag to avoid matching non contiguous segments
self.__processed_texts[index] = _pattern.sub(self.tag, self.__processed_texts[index])
value_final_list_.append(value_final_list)
original_final_list_.append(original_final_list)
return value_final_list_, original_final_list_
method_path = method_meta["plugin_info"]["parent_path"]
if not method_path in self.cached_regex:
regex_string = method_meta["regex_pattern"]
if "case_sensitive" in method_meta and not method_meta["case_sensitive"]:
regex_string = "(?i)%s" % regex_string
if method_meta["multiline"]:
try:
self.cached_regex[method_path] = regex.compile("%s{e<=%s}" % (
regex_string,
settings.FUZZY_REGEX_ALLOWABLE_ERRORS
), regex.MULTILINE | regex.DOTALL | regex.ENHANCEMATCH)
except:
self.cached_regex[method_path] = regex.compile("%s{e<=%s}" % (
regex.escape(regex_string),
settings.FUZZY_REGEX_ALLOWABLE_ERRORS
), regex.MULTILINE | regex.DOTALL | regex.ENHANCEMATCH)
else:
try:
self.cached_regex[method_path] = regex.compile("%s{e<=%s}" % (
regex_string,
settings.FUZZY_REGEX_ALLOWABLE_ERRORS
), regex.ENHANCEMATCH)
except:
self.cached_regex[method_path] = regex.compile("%s{e<=%s}" % (
regex.escape(regex_string),
settings.FUZZY_REGEX_ALLOWABLE_ERRORS
), regex.ENHANCEMATCH)
return self.cached_regex[method_path]