Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def test_subjectset_from_tuple():
uris = ['http://www.yso.fi/onto/yso/p10849',
'http://www.yso.fi/onto/yso/p19740']
labels = ['arkeologit', 'obeliskit']
sset = annif.corpus.SubjectSet((uris, labels))
assert sset.has_uris()
assert len(sset.subject_uris) == 2
assert 'http://www.yso.fi/onto/yso/p10849' in sset.subject_uris
assert 'http://www.yso.fi/onto/yso/p19740' in sset.subject_uris
def test_evaluation_batch(subject_index):
batch = annif.eval.EvaluationBatch(subject_index)
gold_set = annif.corpus.SubjectSet.from_string(
'\tarkeologit')
hits1 = annif.suggestion.ListSuggestionResult([
annif.suggestion.SubjectSuggestion(
uri='http://www.yso.fi/onto/yso/p10849',
label='arkeologit',
score=1.0)], subject_index)
batch.evaluate(hits1, gold_set)
hits2 = annif.suggestion.ListSuggestionResult([
annif.suggestion.SubjectSuggestion(
uri='http://www.yso.fi/onto/yso/p1747',
label='egyptologit',
score=1.0)], subject_index)
batch.evaluate(hits2, gold_set)
results = batch.results()
assert results['Precision (doc avg)'] == 0.5
assert results['Recall (doc avg)'] == 0.5
def _suggest_train_corpus(source_project, corpus):
scores = []
true = []
for doc in corpus.documents:
hits = source_project.suggest(doc.text)
scores.append(hits.vector)
subjects = annif.corpus.SubjectSet((doc.uris, doc.labels))
true.append(subjects.as_vector(source_project.subjects))
return np.array(scores), np.array(true)
Compare the results of automated indexing against a gold standard. The
path may be either a TSV file with short documents or a directory with
documents in separate files.
"""
project = get_project(project_id)
backend_params = parse_backend_params(backend_param)
hit_filter = SuggestionFilter(limit=limit, threshold=threshold)
eval_batch = annif.eval.EvaluationBatch(project.subjects)
docs = open_documents(paths)
for doc in docs.documents:
results = project.suggest(doc.text, backend_params)
hits = hit_filter(results)
eval_batch.evaluate(hits,
annif.corpus.SubjectSet((doc.uris, doc.labels)))
template = "{0:<20}\t{1}"
for metric, score in eval_batch.results().items():
click.echo(template.format(metric + ":", score))
def _doc_to_example(self, doc, project, source_projects):
examples = []
subjects = annif.corpus.SubjectSet((doc.uris, doc.labels))
true = subjects.as_vector(project.subjects)
score_vector = self._doc_score_vector(doc, source_projects)
for subj_id in range(len(true)):
if true[subj_id] or score_vector[:, subj_id].sum() > 0.0:
ex = (subj_id, self._format_example(
subj_id,
score_vector[:, subj_id],
true[subj_id]))
examples.append(ex)
return examples