Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
"""Annif backend using the Omikuji classifier"""
import omikuji
import os.path
import shutil
import annif.util
from annif.suggestion import SubjectSuggestion, ListSuggestionResult
from annif.exception import NotInitializedException, NotSupportedException
from . import backend
from . import mixins
class OmikujiBackend(mixins.TfidfVectorizerMixin, backend.AnnifBackend):
"""Omikuji based backend for Annif"""
name = "omikuji"
needs_subject_index = True
# defaults for uninitialized instances
_model = None
TRAIN_FILE = 'omikuji-train.txt'
MODEL_FILE = 'omikuji-model'
DEFAULT_PARAMS = {
'min_df': 1,
'cluster_balanced': True,
'cluster_k': 2,
'max_depth': 20,
}
"""Ensemble backend that combines results from multiple projects"""
import annif.suggestion
import annif.project
import annif.util
from . import backend
class EnsembleBackend(backend.AnnifBackend):
"""Ensemble backend that combines results from multiple projects"""
name = "ensemble"
def _normalize_hits(self, hits, source_project):
"""Hook for processing hits from backends. Intended to be overridden
by subclasses."""
return hits
def _suggest_with_sources(self, text, sources):
hits_from_sources = []
for project_id, weight in sources:
source_project = annif.project.get_project(project_id)
hits = source_project.suggest(text)
self.debug(
'Got {} hits from project {}'.format(
len(hits), source_project.project_id))
def default_params(self):
params = backend.AnnifBackend.DEFAULT_PARAMS.copy()
params.update(self.DEFAULT_PARAMS)
params.update({param: default_val
for param, (_, default_val) in self.VW_PARAMS.items()
if default_val is not None})
return params
"""Maui backend that makes calls to a Maui Server instance using its API"""
import time
import os.path
import json
import requests
import requests.exceptions
from annif.exception import ConfigurationException
from annif.exception import NotSupportedException
from annif.exception import OperationFailedException
from annif.suggestion import SubjectSuggestion, ListSuggestionResult
from . import backend
class MauiBackend(backend.AnnifBackend):
name = "maui"
TRAIN_FILE = 'maui-train.jsonl'
@property
def endpoint(self):
try:
return self.params['endpoint']
except KeyError:
raise ConfigurationException(
"endpoint must be set in project configuration",
backend_id=self.backend_id)
@property
def tagger(self):
try:
def write(self, text):
self._buffer.append(text)
if len(self._buffer) >= self.BUFFER_SIZE:
self.flush()
def read(self):
if not self._created:
# file was never created - we can simply return the buffer content
return "\n".join(self._buffer)
else:
with open(self._path, 'r', encoding='utf-8') as subjfile:
return subjfile.read() + "\n" + "\n".join(self._buffer)
class TFIDFBackend(backend.AnnifBackend):
"""TF-IDF vector space similarity based backend for Annif"""
name = "tfidf"
needs_subject_index = True
# defaults for uninitialized instances
_vectorizer = None
_index = None
VECTORIZER_FILE = 'vectorizer'
INDEX_FILE = 'tfidf-index'
def _generate_subjects_from_documents(self, corpus, project):
with tempfile.TemporaryDirectory() as tempdir:
subject_buffer = {}
for subject_id in range(len(project.subjects)):
subject_buffer[subject_id] = SubjectBuffer(tempdir,
def default_params(self):
params = backend.AnnifBackend.DEFAULT_PARAMS.copy()
params.update(self.DEFAULT_PARAMS)
return params
"""Annif backend using the fastText classifier"""
import collections
import os.path
import annif.util
from annif.suggestion import SubjectSuggestion, ListSuggestionResult
from annif.exception import NotInitializedException, NotSupportedException
import fastText
from . import backend
from . import mixins
class FastTextBackend(mixins.ChunkingBackend, backend.AnnifBackend):
"""fastText backend for Annif"""
name = "fasttext"
needs_subject_index = True
FASTTEXT_PARAMS = {
'lr': float,
'lrUpdateRate': int,
'dim': int,
'ws': int,
'epoch': int,
'minCount': int,
'neg': int,
'wordNgrams': int,
'loss': str,
'bucket': int,
def default_params(self):
params = backend.AnnifBackend.DEFAULT_PARAMS.copy()
params.update(mixins.ChunkingBackend.DEFAULT_PARAMS)
params.update(self.DEFAULT_PARAMS)
params.update({param: default_val
for param, (_, default_val) in self.VW_PARAMS.items()
if default_val is not None})
return params
return self._suggest(text, project, params=beparams)
def debug(self, message):
"""Log a debug message from this backend"""
logger.debug("Backend {}: {}".format(self.backend_id, message))
def info(self, message):
"""Log an info message from this backend"""
logger.info("Backend {}: {}".format(self.backend_id, message))
def warning(self, message):
"""Log a warning message from this backend"""
logger.warning("Backend {}: {}".format(self.backend_id, message))
class AnnifLearningBackend(AnnifBackend):
"""Base class for Annif backends that can perform online learning"""
@abc.abstractmethod
def learn(self, corpus, project):
"""further train the model on the given document or subject corpus"""
pass # pragma: no cover
"""HTTP/REST client backend that makes calls to a web service
and returns the results"""
import requests
import requests.exceptions
from annif.suggestion import SubjectSuggestion, ListSuggestionResult
from . import backend
class HTTPBackend(backend.AnnifBackend):
name = "http"
def _suggest(self, text, params):
data = {'text': text}
if 'project' in params:
data['project'] = params['project']
try:
req = requests.post(params['endpoint'], data=data)
req.raise_for_status()
except requests.exceptions.RequestException as err:
self.warning("HTTP request failed: {}".format(err))
return ListSuggestionResult([], self.project.subjects)
try:
response = req.json()