Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def perform_computation(num_sentences):
truth = ["this is a speed test" for _ in range(0, num_sentences)]
hypo = ["this is not a speed test" for _ in range(0, num_sentences)]
wer(truth, hypo)
def test_fail_on_empty_ground_truth(self):
for method in [
jiwer.wer,
jiwer.wil,
jiwer.wip,
jiwer.mer,
jiwer.compute_measures,
]:
def callback():
method("", "test")
self.assertRaises(ValueError, callback)
score_failure_cnt = 0
for step, text in tqdm(enumerate(self._dataset), desc='evaluation steps', total=len(self._dataset)):
if self.limit_len is not None:
text = text[:self.limit_len]
try:
unspaced_text = unspacing(text.strip())
tokenized_text = text_to_list(unspaced_text)
input_batch = torch.Tensor([self._input_vocab.to_indices(tokenized_text)]).long()
_, tag_seq = self._model(input_batch)
labeled_tag_seq = self._tag_vocab.to_tokens(tag_seq[0].tolist())
pred_text = segment_word_by_tags(unspaced_text, labeled_tag_seq)
wer_score += jiwer.wer(text.strip(), pred_text.strip())
if text.split() == pred_text.split():
corrected_sent_cnt += 1
_, labels = labelize(text, bi_tags_only=True)
labels = [ch for ch in labels]
labeled_tag_seq = ' '.join(labeled_tag_seq).replace('E', 'I').replace('S', 'B').replace('',
'I').split()
acc_score += acc(labeled_tag_seq, labels)
f1_score += f1(labeled_tag_seq, labels, labels=['B', 'I'])
except Exception as e:
score_failure_cnt += 1
logger.warning("Error message while calculating wer score: {}".format(e))
logger.info('wer score failure {} times'.format(score_failure_cnt))
raise ValueError()
else:
'RowID': rowID,
'filename': filename
}
)
for row in c.fetchall():
rowID = row[0]
transcription = row[1]
recording_type = row[2]
if(len(verified_transcription) == 0):
recording_type = "noise"
print("Setting recording_type to noise")
if(result == "unclear"):
recording_type = "unclear"
print("Setting recording_type to unclear")
# calculate the word error rate
WER = wer(
transcription,
verified_transcription
)
c.execute(
" ".join([
"update audiolog set ",
" type=:type,",
" verified_transcription=:vt,",
" reviewed=:reviewed,",
" wer=:wer",
"where RowID=:RowID"
]),
{
"type": recording_type,
"vt": verified_transcription,
"reviewed": now,
def compare_asr(s_wav, t_wav):
try:
gt = asr(s_wav)
recog = asr(t_wav)
err_result = wer(gt, recog), wer(' '.join([c for c in gt if c != ' ']), ' '.join([c for c in recog if c != ' ']))
except sr.UnknownValueError:
err_result = [1., 1.]
except:
err_result = [-1., -1.]
return err_result
possiblesubstitutions = currenttemplate.count(
'{}{}{}'.format('{', matchlist, '}')
)
# print("Matchlist: {} Word: {} Subs: {}".format(matchlist, word, possiblesubstitutions))
# We don't actually know if there are actually any
# substitutions in the template
if(possiblesubstitutions > 0):
for i in range(possiblesubstitutions):
# print("i={}".format(i))
currenttemplate = replacenth(
'{}{}{}'.format('{', matchlist, '}'),
word,
currenttemplate,
i + 1
)
templates[currenttemplate] = wer(
currentvariant,
currenttemplate
)
currenttemplate = min(
templates,
key=lambda key: templates[key]
)
# print(currenttemplate)
# print("{}: {}".format(word,currenttemplate))
# print("{}: {}".format(matchlist,currenttemplate))
# Now that we have a matching template, run through a list of all
# substitutions in the template and see if there are any we have not
# identified yet.
substitutions = re.findall('\{(.*?)\}', currenttemplate)
if(substitutions):
for substitution in substitutions:
# pprint(intentscores)
bestintent = max(intentscores, key=lambda key: intentscores[key])
variantscores[variant] = {
'intent': bestintent,
'input': phrase,
'score': intentscores[bestintent],
'matches': allvariants[variant],
'action': self.intent_map['intents'][bestintent]['action']
}
bestvariant = max(variantscores, key=lambda key: variantscores[key]['score'])
# print("BEST: {}".format(bestvariant))
# pprint(variantscores[bestvariant])
# find the template with the smallest levenshtein distance
templates = {}
for template in self.intent_map['intents'][bestintent]['templates']:
templates[template] = wer(template, variant)
# print("distance from '{}' to '{}' is {}".format(variant,template,templates[template]))
besttemplate = min(templates, key=lambda key: templates[key])
# The next thing we have to do is match up all the substitutions
# that have been made between the template and the current variant
# This is so that if there are multiple match indicators we can eliminate
# the ones that have matched.
# Consider the following:
# Team: ['bengals','patriots']
# Template: will the {Team} play the {Team} {Day}
# Input: will done browns play the bengals today
# Input with matches: will done browns play the {Team} {Day}
# Matches: {Team: bengals, Day: today}
# Obviously there is a very low Levenshtein distance between the template
# and the input with matches, but it's not that easy to figure out which
# Team in the template has been matched. So loop through the matches and
# words and match the word with each possible location in the template