Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def test_vw_ensemble_train_and_learn(app, datadir, tmpdir):
vw_ensemble_type = annif.backend.get_backend("vw_ensemble")
vw_ensemble = vw_ensemble_type(
backend_id='vw_ensemble',
config_params={'sources': 'dummy-en'},
datadir=str(datadir))
tmpfile = tmpdir.join('document.tsv')
tmpfile.write("dummy\thttp://example.org/dummy\n" +
"another\thttp://example.org/dummy\n" +
"none\thttp://example.org/none")
document_corpus = annif.corpus.DocumentFile(str(tmpfile))
project = annif.project.get_project('dummy-en')
with app.app_context():
vw_ensemble.train(document_corpus, project)
assert datadir.join('vw-train.txt').exists()
assert datadir.join('vw-train.txt').size() > 0
assert datadir.join('subject-freq.json').exists()
assert datadir.join('subject-freq.json').size() > 0
assert datadir.join('vw-model').exists()
assert datadir.join('vw-model').size() > 0
# test online learning
modelfile = datadir.join('vw-model')
freqfile = datadir.join('subject-freq.json')
old_size = modelfile.size()
def test_combinedcorpus(tmpdir):
docfile = tmpdir.join('documents.tsv')
docfile.write("""Läntinen\t
Oulunlinnan\t
Harald Hirmuinen\t""")
corpus1 = annif.corpus.DocumentFile(str(docfile))
corpus2 = annif.corpus.DocumentFile(str(docfile))
combined = annif.corpus.CombinedCorpus([corpus1, corpus2])
assert len(list(combined.documents)) == 6
def test_combinedcorpus(tmpdir):
docfile = tmpdir.join('documents.tsv')
docfile.write("""Läntinen\t
Oulunlinnan\t
Harald Hirmuinen\t""")
corpus1 = annif.corpus.DocumentFile(str(docfile))
corpus2 = annif.corpus.DocumentFile(str(docfile))
combined = annif.corpus.CombinedCorpus([corpus1, corpus2])
assert len(list(combined.documents)) == 6
def test_docfile_is_empty(tmpdir):
empty_file = tmpdir.ensure('empty.tsv')
docs = annif.corpus.DocumentFile(str(empty_file))
assert docs.is_empty()
def test_docfile_plain(tmpdir):
docfile = tmpdir.join('documents.tsv')
docfile.write("""Läntinen\t
Oulunlinnan\t
Harald Hirmuinen\t""")
docs = annif.corpus.DocumentFile(str(docfile))
assert len(list(docs.documents)) == 3
def open_doc_path(path):
"""open a single path and return it as a DocumentCorpus"""
if os.path.isdir(path):
return annif.corpus.DocumentDirectory(path, require_subjects=True)
return annif.corpus.DocumentFile(path)