How to use the jiwer.wer function in jiwer

To help you get started, we’ve selected a few jiwer examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github jitsi / asr-wer / tests / test_speed.py View on Github external
def perform_computation(num_sentences):
    truth = ["this is a speed test" for _ in range(0, num_sentences)]
    hypo = ["this is not a speed test" for _ in range(0, num_sentences)]

    wer(truth, hypo)
github jitsi / asr-wer / tests / test_measures.py View on Github external
def test_fail_on_empty_ground_truth(self):
        for method in [
            jiwer.wer,
            jiwer.wil,
            jiwer.wip,
            jiwer.mer,
            jiwer.compute_measures,
        ]:

            def callback():
                method("", "test")

            self.assertRaises(ValueError, callback)
github Taekyoon / takos-alpha / takos / evaluator / eval_word_segment_model.py View on Github external
score_failure_cnt = 0

        for step, text in tqdm(enumerate(self._dataset), desc='evaluation steps', total=len(self._dataset)):
            if self.limit_len is not None:
                text = text[:self.limit_len]
            try:
                unspaced_text = unspacing(text.strip())
                tokenized_text = text_to_list(unspaced_text)

                input_batch = torch.Tensor([self._input_vocab.to_indices(tokenized_text)]).long()

                _, tag_seq = self._model(input_batch)
                labeled_tag_seq = self._tag_vocab.to_tokens(tag_seq[0].tolist())
                pred_text = segment_word_by_tags(unspaced_text, labeled_tag_seq)
                wer_score += jiwer.wer(text.strip(), pred_text.strip())
                if text.split() == pred_text.split():
                    corrected_sent_cnt += 1

                _, labels = labelize(text, bi_tags_only=True)
                labels = [ch for ch in labels]
                labeled_tag_seq = ' '.join(labeled_tag_seq).replace('E', 'I').replace('S', 'B').replace('',
                                                                                                        'I').split()
                acc_score += acc(labeled_tag_seq, labels)
                f1_score += f1(labeled_tag_seq, labels, labels=['B', 'I'])
            except Exception as e:
                score_failure_cnt += 1
                logger.warning("Error message while calculating wer score: {}".format(e))
                logger.info('wer score failure {} times'.format(score_failure_cnt))
                raise ValueError()

        else:
github NaomiProject / Naomi / NaomiSTTTrainer.py View on Github external
'RowID': rowID,
                                'filename': filename
                            }
                        )
                        for row in c.fetchall():
                            rowID = row[0]
                            transcription = row[1]
                            recording_type = row[2]
                            if(len(verified_transcription) == 0):
                                recording_type = "noise"
                                print("Setting recording_type to noise")
                                if(result == "unclear"):
                                    recording_type = "unclear"
                                    print("Setting recording_type to unclear")
                            # calculate the word error rate
                            WER = wer(
                                transcription,
                                verified_transcription
                            )
                            c.execute(
                                " ".join([
                                    "update audiolog set ",
                                    " type=:type,",
                                    " verified_transcription=:vt,",
                                    " reviewed=:reviewed,",
                                    " wer=:wer",
                                    "where RowID=:RowID"
                                ]),
                                {
                                    "type": recording_type,
                                    "vt": verified_transcription,
                                    "reviewed": now,
github andi611 / ZeroSpeech-TTS-without-T / convert.py View on Github external
def compare_asr(s_wav, t_wav):
	try:
		gt = asr(s_wav)
		recog = asr(t_wav)
		err_result = wer(gt, recog), wer(' '.join([c for c in gt if c != ' ']), ' '.join([c for c in recog if c != ' ']))
	except sr.UnknownValueError:
		err_result = [1., 1.]
	except:
		err_result = [-1., -1.]
	return err_result
github NaomiProject / Naomi / plugins / tti / naomi_tti / naomi_tti.py View on Github external
possiblesubstitutions = currenttemplate.count(
                    '{}{}{}'.format('{', matchlist, '}')
                )
                # print("Matchlist: {} Word: {} Subs: {}".format(matchlist, word, possiblesubstitutions))
                # We don't actually know if there are actually any
                # substitutions in the template
                if(possiblesubstitutions > 0):
                    for i in range(possiblesubstitutions):
                        # print("i={}".format(i))
                        currenttemplate = replacenth(
                            '{}{}{}'.format('{', matchlist, '}'),
                            word,
                            currenttemplate,
                            i + 1
                        )
                        templates[currenttemplate] = wer(
                            currentvariant,
                            currenttemplate
                        )
                    currenttemplate = min(
                        templates,
                        key=lambda key: templates[key]
                    )
                    # print(currenttemplate)
                # print("{}: {}".format(word,currenttemplate))
            # print("{}: {}".format(matchlist,currenttemplate))
        # Now that we have a matching template, run through a list of all
        # substitutions in the template and see if there are any we have not
        # identified yet.
        substitutions = re.findall('\{(.*?)\}', currenttemplate)
        if(substitutions):
            for substitution in substitutions:
github NaomiProject / Naomi / plugins / tti / naomi_tti / naomi_tti.py View on Github external
# pprint(intentscores)
            bestintent = max(intentscores, key=lambda key: intentscores[key])
            variantscores[variant] = {
                'intent': bestintent,
                'input': phrase,
                'score': intentscores[bestintent],
                'matches': allvariants[variant],
                'action': self.intent_map['intents'][bestintent]['action']
            }
        bestvariant = max(variantscores, key=lambda key: variantscores[key]['score'])
        # print("BEST: {}".format(bestvariant))
        # pprint(variantscores[bestvariant])
        # find the template with the smallest levenshtein distance
        templates = {}
        for template in self.intent_map['intents'][bestintent]['templates']:
            templates[template] = wer(template, variant)
            # print("distance from '{}' to '{}' is {}".format(variant,template,templates[template]))
        besttemplate = min(templates, key=lambda key: templates[key])
        # The next thing we have to do is match up all the substitutions
        # that have been made between the template and the current variant
        # This is so that if there are multiple match indicators we can eliminate
        # the ones that have matched.
        # Consider the following:
        #   Team: ['bengals','patriots']
        #   Template: will the {Team} play the {Team} {Day}
        #   Input: will done browns play the bengals today
        #   Input with matches: will done browns play the {Team} {Day}
        #   Matches: {Team: bengals, Day: today}
        # Obviously there is a very low Levenshtein distance between the template
        # and the input with matches, but it's not that easy to figure out which
        # Team in the template has been matched. So loop through the matches and
        # words and match the word with each possible location in the template