Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def build_common_words():
# Read raw 4 letter file
path = os.path.join(TOPDIR, "common-words.txt")
words = defaultdict(list) # Collect words based on length
with open(path, "r", encoding="utf-8") as file:
for line in file:
if line.startswith("#"):
continue
line = line.rstrip()
if (
line not in load.units(lang).surfaces_all
and line not in load.units(lang).symbols
):
words[len(line)].append(line)
plural = load.pluralize(line)
if (
plural not in load.units(lang).surfaces_all
and plural not in load.units(lang).symbols
):
words[len(plural)].append(plural)
return words
def clean_surface(surface, span):
"""
Remove spurious characters from a quantity's surface.
"""
surface = surface.replace("-", " ")
no_start = ["and", " "]
no_end = [" "] + [" {}".format(misc) for misc in reg.miscnum(lang)]
found = True
while found:
found = False
for word in no_start:
if surface.lower().startswith(word):
surface = surface[len(word) :]
span = (span[0] + len(word), span[1])
found = True
for word in no_end:
if surface.lower().endswith(word):
surface = surface[: -len(word)]
span = (span[0], span[1] - len(word))
found = True
if not surface:
surface = unit.replace(".", "")
power = re.findall(r"-?[0-9%s]+" % reg.unicode_superscript_regex(), surface)
power_written = re.findall(r"\b(%s)\b" % "|".join(reg.powers(lang)), surface)
if power:
power = [
reg.unicode_superscript()[i] if i in reg.unicode_superscript() else i
for i in power
]
power = "".join(power)
new_power = -1 * int(power) if slash else int(power)
surface = re.sub(r"\^?-?[0-9%s]+" % reg.unicode_superscript(), "", surface)
elif power_written:
exponent = reg.powers(lang)[power_written[0]]
new_power = -exponent if slash else exponent
surface = re.sub(r"\b%s\b" % power_written[0], "", surface).strip()
else:
new_power = -1 if slash else 1
return surface, new_power
def unit_to_spoken(unit, count=1):
"""
Convert a given unit to the unit in words, correctly inflected.
:param unit: The unit to be converted
:param count: The value of the quantity (i.e. 1 for one watt, 2 for two
seconds)
:return: A string with the correctly inflected spoken version of the unit
"""
if unit.surfaces:
unit_string = unit.surfaces[0]
unit_string = load.pluralize(unit_string, count)
else:
# derived unit
denominator_dimensions = [i for i in unit.dimensions if i["power"] > 0]
denominator_string = parser.name_from_dimensions(denominator_dimensions, lang)
plural_denominator_string = load.pluralize(denominator_string)
unit_string = unit.name.replace(denominator_string, plural_denominator_string)
return unit_string
# Read raw 4 letter file
path = os.path.join(TOPDIR, "common-words.txt")
words = defaultdict(list) # Collect words based on length
with open(path, "r", encoding="utf-8") as file:
for line in file:
if line.startswith("#"):
continue
line = line.rstrip()
if (
line not in load.units(lang).surfaces_all
and line not in load.units(lang).symbols
):
words[len(line)].append(line)
plural = load.pluralize(line)
if (
plural not in load.units(lang).surfaces_all
and plural not in load.units(lang).symbols
):
words[len(plural)].append(plural)
return words