Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def test_vw_ensemble_train_and_learn(app, datadir, tmpdir):
vw_ensemble_type = annif.backend.get_backend("vw_ensemble")
vw_ensemble = vw_ensemble_type(
backend_id='vw_ensemble',
config_params={'sources': 'dummy-en'},
datadir=str(datadir))
tmpfile = tmpdir.join('document.tsv')
tmpfile.write("dummy\thttp://example.org/dummy\n" +
"another\thttp://example.org/dummy\n" +
"none\thttp://example.org/none")
document_corpus = annif.corpus.DocumentFile(str(tmpfile))
project = annif.project.get_project('dummy-en')
with app.app_context():
vw_ensemble.train(document_corpus, project)
assert datadir.join('vw-train.txt').exists()
assert datadir.join('vw-train.txt').size() > 0
assert datadir.join('subject-freq.json').exists()
assert datadir.join('subject-freq.json').size() > 0
assert datadir.join('vw-model').exists()
assert datadir.join('vw-model').size() > 0
# test online learning
modelfile = datadir.join('vw-model')
freqfile = datadir.join('subject-freq.json')
old_size = modelfile.size()
old_mtime = modelfile.mtime()
def test_nn_ensemble_train_and_learn(app, tmpdir):
project = annif.project.get_project('dummy-en')
nn_ensemble_type = annif.backend.get_backend("nn_ensemble")
nn_ensemble = nn_ensemble_type(
backend_id='nn_ensemble',
config_params={'sources': 'dummy-en'},
project=project)
tmpfile = tmpdir.join('document.tsv')
tmpfile.write("dummy\thttp://example.org/dummy\n" +
"another\thttp://example.org/dummy\n" +
"none\thttp://example.org/none")
document_corpus = annif.corpus.DocumentFile(str(tmpfile))
with app.app_context():
nn_ensemble.train(document_corpus)
datadir = py.path.local(project.datadir)
def get_project(project_id):
"""
Helper function to get a project by ID and bail out if it doesn't exist"""
try:
return annif.project.get_project(project_id, min_access=Access.hidden)
except ValueError:
click.echo(
"No projects found with id \'{0}\'.".format(project_id),
err=True)
sys.exit(1)
def _suggest_with_sources(self, text, sources):
hits_from_sources = []
for project_id, weight in sources:
source_project = annif.project.get_project(project_id)
hits = source_project.suggest(text)
self.debug(
'Got {} hits from project {}'.format(
len(hits), source_project.project_id))
norm_hits = self._normalize_hits(hits, source_project)
hits_from_sources.append(
annif.suggestion.WeightedSuggestion(
hits=norm_hits, weight=weight))
return hits_from_sources
def _corpus_to_vectors(self, corpus):
# pass corpus through all source projects
sources = [(annif.project.get_project(project_id), weight)
for project_id, weight
in annif.util.parse_sources(self.params['sources'])]
score_vectors = []
true_vectors = []
for doc in corpus.documents:
doc_scores = []
for source_project, weight in sources:
hits = source_project.suggest(doc.text)
doc_scores.append(hits.vector * weight)
score_vectors.append(np.array(doc_scores,
dtype=np.float32).transpose())
subjects = annif.corpus.SubjectSet((doc.uris, doc.labels))
true_vectors.append(subjects.as_vector(self.project.subjects))
# collect the results into a single vector, considering weights
scores = np.array(score_vectors, dtype=np.float32)
def show_project(project_id):
"""return a single project formatted according to Swagger spec"""
try:
project = annif.project.get_project(
project_id, min_access=Access.hidden)
except ValueError:
return project_not_found_error(project_id)
return project.dump()
def list_projects():
"""
List available projects.
Usage: annif list-projects
REST equivalent: GET /projects/
"""
return [proj.dump() for proj in annif.project.get_projects().values()]
def list_projects():
"""return a dict with projects formatted according to Swagger spec"""
return {
'projects': [
proj.dump() for proj in annif.project.get_projects(
min_access=Access.public).values()]}
if config_name is None:
if os.environ.get('FLASK_RUN_FROM_CLI') == 'true':
config_name = 'annif.default_config.Config'
else:
config_name = 'annif.default_config.ProductionConfig'
logger.debug('creating app with configuration %s', config_name)
cxapp.app.config.from_object(config_name)
cxapp.app.config.from_envvar('ANNIF_SETTINGS', silent=True)
cxapp.add_api('annif.yaml')
# add CORS support
CORS(cxapp.app)
if cxapp.app.config['INITIALIZE_PROJECTS']:
annif.project.initialize_projects(cxapp.app)
# register the views via blueprints
from annif.views import bp
cxapp.app.register_blueprint(bp)
# return the Flask app
return cxapp.app
def analyze(project_id, text, limit, threshold):
"""Analyze a document and return a list of AnalysisHit objects."""
try:
project = annif.project.get_project(project_id)
except ValueError:
return "No projects found with id \'{0}\'.".format(project_id)
return [hit.dump() for hit in project.analyze(text, limit, threshold)]