Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def test_omikuji_create_train_file(tmpdir, project, datadir):
tmpfile = tmpdir.join('document.tsv')
tmpfile.write("nonexistent\thttp://example.com/nonexistent\n" +
"arkeologia\thttp://www.yso.fi/onto/yso/p1265\n" +
"...\thttp://example.com/none")
corpus = annif.corpus.DocumentFile(str(tmpfile))
omikuji_type = annif.backend.get_backend('omikuji')
omikuji = omikuji_type(
backend_id='omikuji',
config_params={},
project=project)
input = (doc.text for doc in corpus.documents)
veccorpus = omikuji.create_vectorizer(input, {})
omikuji._create_train_file(veccorpus, corpus)
assert datadir.join('omikuji-train.txt').exists()
traindata = datadir.join('omikuji-train.txt').read().splitlines()
assert len(traindata) == 2 # header + 1 example
examples, features, labels = map(int, traindata[0].split())
assert examples == 1
assert features == 2
assert labels == 125
def test_http_suggest(app, project):
with unittest.mock.patch('requests.post') as mock_request:
# create a mock response whose .json() method returns the list that we
# define here
mock_response = unittest.mock.Mock()
mock_response.json.return_value = [
{'uri': 'http://example.org/http', 'label': 'http', 'score': 1.0}]
mock_request.return_value = mock_response
http_type = annif.backend.get_backend("http")
http = http_type(
backend_id='http',
config_params={
'endpoint': 'http://api.example.org/analyze',
'project': 'dummy'},
datadir=app.config['DATADIR'])
result = http.suggest('this is some text', project=project)
assert len(result) == 1
assert result[0].uri == 'http://example.org/http'
assert result[0].label == 'http'
assert result[0].score == 1.0
def test_omikuji_suggest(project):
omikuji_type = annif.backend.get_backend('omikuji')
omikuji = omikuji_type(
backend_id='omikuji',
config_params={'limit': 8},
project=project)
results = omikuji.suggest("""Arkeologiaa sanotaan joskus myös
muinaistutkimukseksi tai muinaistieteeksi. Se on humanistinen tiede
tai oikeammin joukko tieteitä, jotka tutkivat ihmisen menneisyyttä.
Tutkimusta tehdään analysoimalla muinaisjäännöksiä eli niitä jälkiä,
joita ihmisten toiminta on jättänyt maaperään tai vesistöjen
pohjaan.""")
assert len(results) > 0
assert len(results) <= 8
assert 'http://www.yso.fi/onto/yso/p1265' in [
result.uri for result in results]
def test_fasttext_train_nodocuments(tmpdir, datadir, project):
fasttext_type = annif.backend.get_backend("fasttext")
fasttext = fasttext_type(
backend_id='fasttext',
config_params={
'limit': 50,
'dim': 100,
'lr': 0.25,
'epoch': 20,
'loss': 'hs'},
datadir=str(datadir))
empty_file = tmpdir.ensure('empty.tsv')
empty_document_corpus = annif.corpus.DocumentFile(str(empty_file))
with pytest.raises(NotSupportedException) as excinfo:
fasttext.train(empty_document_corpus, project)
assert 'training backend fasttext with no documents' in str(excinfo.value)
def test_pav_train(app, datadir, tmpdir, project):
pav_type = annif.backend.get_backend("pav")
pav = pav_type(
backend_id='pav',
config_params={'limit': 50, 'min-docs': 2, 'sources': 'dummy-fi'},
datadir=str(datadir))
tmpfile = tmpdir.join('document.tsv')
tmpfile.write("dummy\thttp://example.org/dummy\n" +
"another\thttp://example.org/dummy\n" +
"none\thttp://example.org/none")
document_corpus = annif.corpus.DocumentFile(str(tmpfile))
with app.app_context():
pav.train(document_corpus, project)
assert datadir.join('pav-model-dummy-fi').exists()
assert datadir.join('pav-model-dummy-fi').size() > 0
def test_tfidf_analyze(datadir, project_with_vectorizer):
tfidf_type = annif.backend.get_backend("tfidf")
tfidf = tfidf_type(
backend_id='tfidf',
params={'limit': 10},
datadir=str(datadir))
results = tfidf.analyze("""Arkeologiaa sanotaan joskus myös
muinaistutkimukseksi tai muinaistieteeksi. Se on humanistinen tiede
tai oikeammin joukko tieteitä, jotka tutkivat ihmisen menneisyyttä.
Tutkimusta tehdään analysoimalla muinaisjäännöksiä eli niitä jälkiä,
joita ihmisten toiminta on jättänyt maaperään tai vesistöjen
pohjaan.""", project_with_vectorizer)
assert len(results) == 10
assert 'http://www.yso.fi/onto/yso/p1265' in [
result.uri for result in results]
assert 'arkeologia' in [result.label for result in results]
def test_omikuji_train(datadir, document_corpus, project):
omikuji_type = annif.backend.get_backend('omikuji')
omikuji = omikuji_type(
backend_id='omikuji',
config_params={},
project=project)
# verify that training works even if there is a preexisting model directory
# - to simulate this we will create an empty directory instead
datadir.join('omikuji-model').ensure(dir=True)
assert not datadir.join('omikuji-model').listdir() # empty dir
omikuji.train(document_corpus)
assert omikuji._model is not None
assert datadir.join('omikuji-model').exists()
assert datadir.join('omikuji-model').listdir() # non-empty dir
def test_vw_ensemble_default_params(datadir, project):
vw_type = annif.backend.get_backend("vw_ensemble")
vw = vw_type(
backend_id='vw_ensemble',
config_params={},
datadir=str(datadir))
expected_default_params = {
'limit': 100,
'discount_rate': 0.01,
'loss_function': 'squared',
}
actual_params = vw.params
for param, val in expected_default_params.items():
assert param in actual_params and actual_params[param] == val
def test_nn_ensemble_suggest_no_model(project):
nn_ensemble_type = annif.backend.get_backend('nn_ensemble')
nn_ensemble = nn_ensemble_type(
backend_id='nn_ensemble',
config_params={'sources': 'dummy-en'},
project=project)
with pytest.raises(NotInitializedException):
results = nn_ensemble.suggest("example text")
def test_get_backend_dummy(app, project):
dummy_type = annif.backend.get_backend("dummy")
dummy = dummy_type(backend_id='dummy', config_params={},
datadir=app.config['DATADIR'])
result = dummy.suggest(text='this is some text', project=project)
assert len(result) == 1
assert result[0].uri == 'http://example.org/dummy'
assert result[0].label == 'dummy'
assert result[0].score == 1.0