How to use the flair.embeddings.FlairEmbeddings function in flair

To help you get started, we’ve selected a few flair examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github flairNLP / flair / tests / test_model_integration.py View on Github external
def test_train_charlm_load_use_classifier(results_base_path, tasks_base_path):
    corpus = flair.datasets.ClassificationCorpus(tasks_base_path / "imdb")
    label_dict = corpus.make_label_dictionary()

    embedding: TokenEmbeddings = FlairEmbeddings("news-forward-fast")
    document_embeddings: DocumentRNNEmbeddings = DocumentRNNEmbeddings(
        [embedding], 128, 1, False, 64, False, False
    )

    model: TextClassifier = TextClassifier(document_embeddings, label_dict, False)

    trainer = ModelTrainer(model, corpus)
    trainer.train(results_base_path, max_epochs=2, shuffle=False)

    sentence = Sentence("Berlin is a really nice city.")

    for s in model.predict(sentence):
        for l in s.labels:
            assert l.value is not None
            assert 0.0 <= l.score <= 1.0
            assert type(l.score) is float
github flairNLP / flair / tests / test_embeddings.py View on Github external
def test_loading_not_existing_char_lm_embedding():
    with pytest.raises(ValueError):
        FlairEmbeddings('other')
github flairNLP / flair / tests / test_model_integration.py View on Github external
resources_path / "corpora/lorem_ipsum",
        dictionary,
        language_model.is_forward_lm,
        character_level=True,
    )

    # train the language model
    trainer: LanguageModelTrainer = LanguageModelTrainer(
        language_model, corpus, test_mode=True
    )
    trainer.train(
        results_base_path, sequence_length=10, mini_batch_size=10, max_epochs=2
    )

    # use the character LM as embeddings to embed the example sentence 'I love Berlin'
    char_lm_embeddings: TokenEmbeddings = FlairEmbeddings(
        str(results_base_path / "best-lm.pt")
    )
    sentence = Sentence("I love Berlin")
    char_lm_embeddings.embed(sentence)

    text, likelihood = language_model.generate_text(number_of_characters=100)
    assert text is not None
    assert len(text) >= 100

    # clean up results directory
    shutil.rmtree(results_base_path, ignore_errors=True)
github flairNLP / flair / tests / test_embeddings.py View on Github external
def init_document_embeddings():
    text = 'I love Berlin. Berlin is a great place to live.'
    sentence: Sentence = Sentence(text)

    glove: TokenEmbeddings = WordEmbeddings('turian')
    charlm: TokenEmbeddings = FlairEmbeddings('news-forward-fast')

    return sentence, glove, charlm
github flairNLP / flair / tests / test_language_model.py View on Github external
def test_compute_perplexity():

    from flair.embeddings import FlairEmbeddings

    language_model = FlairEmbeddings("news-forward-fast").lm

    grammatical = "The company made a profit"
    perplexity_gramamtical_sentence = language_model.calculate_perplexity(grammatical)

    ungrammatical = "Nook negh qapla!"
    perplexity_ungramamtical_sentence = language_model.calculate_perplexity(
        ungrammatical
    )

    print(f'"{grammatical}" - perplexity is {perplexity_gramamtical_sentence}')
    print(f'"{ungrammatical}" - perplexity is {perplexity_ungramamtical_sentence}')

    assert perplexity_gramamtical_sentence < perplexity_ungramamtical_sentence

    language_model = FlairEmbeddings("news-backward-fast").lm
github hanxiao / demo-poems-ir / flair-encode / my_flair.py View on Github external
def post_init(self):
        from flair.embeddings import DocumentPoolEmbeddings, WordEmbeddings, FlairEmbeddings
        self._flair = DocumentPoolEmbeddings(
            [WordEmbeddings(self.word_embedding),
             FlairEmbeddings(self.flair_embeddings[0]),
             FlairEmbeddings(self.flair_embeddings[1])],
            pooling=self.pooling_strategy)
github JULIELab / jcore-base / jcore-flair-token-embedding-ae / src / main / resources / de / julielab / jcore / ae / fte / python / getEmbeddingScript.py View on Github external
return content.decode("utf-8")

embeddingList=[]
for i in range(1,len(sys.argv)):
    arg = sys.argv[i]
    typeAndPath = arg.split(":")
    type = typeAndPath[0]
    path = typeAndPath[1]
    if type == "word":
        embeddingList.append(WordEmbeddings(path))
    if type == "char":
        embeddingList.append(CharacterEmbeddings(path))
    if type == "bytepair":
        embeddingList.append(BytePairEmbeddings(path))
    if type == "flair":
        embeddingList.append(FlairEmbeddings(path))
    if type == "bert":
        embeddingList.append(BertEmbeddings(path))
    if type == "elmo":
        embeddingList.append(ELMoEmbeddings(path))
if len(embeddingList) > 1:
    embeddings = StackedEmbeddings(embeddings=embeddingList)
else:
    embeddings = embeddingList[0]

stdbuffer = sys.stdin.buffer
print("Script is ready")
while True:
    line = decodeString(stdbuffer)
    if line.strip() == "exit":
        sys.exit(0)
    sentenceTaggingRequests = json.loads(line)
github abhinavkashyap / sciwing / sciwing / modules / embedders / flair_embedder.py View on Github external
word_tokens_namespace: str = "tokens",
    ):
        """ Flair Embeddings. This is used to produce Named Entity Recognition. Note: This only
        works if your tokens are produced by splitting based on white space

        Parameters
        ----------
        embedding_type
        datasets_manager
        device
        word_tokens_namespace
        """
        super(FlairEmbedder, self).__init__()
        self.allowed_type = ["en", "news"]
        assert embedding_type in self.allowed_type
        self.embedder_forward = FlairEmbeddings(f"{embedding_type}-forward")
        self.embedder_backward = FlairEmbeddings(f"{embedding_type}-backward")
        self.embedder_name = f"FlairEmbedder-{embedding_type}"
        self.datasets_manager = datasets_manager
        self.device = torch.device(device) if isinstance(device, str) else device
        self.word_tokens_namespace = word_tokens_namespace
github dcavar / Flair-JSON-NLP / flairjsonnlp / __init__.py View on Github external
def get_embeddings(embeddings: List[str], character: bool, lang: str, bpe_size: int) -> StackedEmbeddings:
    """To Construct and return a embedding model"""
    stack = []
    for e in embeddings:
        if e != '':
            if 'forward' in e or 'backward' in e:
                stack.append(FlairEmbeddings(e))
            else:
                stack.append(WordEmbeddings(e))
    if character:
        stack.append(CharacterEmbeddings())
    if bpe_size > 0:
        stack.append(BytePairEmbeddings(language=lang, dim=bpe_size))

    return StackedEmbeddings(embeddings=stack)
github alexandrainst / danlp / danlp / models / embeddings.py View on Github external
"""
    from flair.embeddings import FlairEmbeddings
    from flair.embeddings import WordEmbeddings
    from flair.embeddings import StackedEmbeddings

    embeddings = []

    if word_embeddings:
        fasttext_embedding = WordEmbeddings('da')
        embeddings.append(fasttext_embedding)

    if direction == 'bi' or direction == 'fwd':
        fwd_weight_path = download_model('flair.fwd', cache_dir,
                                         verbose=verbose,
                                         process_func=_unzip_process_func)
        embeddings.append(FlairEmbeddings(fwd_weight_path))

    if direction == 'bi' or direction == 'bwd':
        bwd_weight_path = download_model('flair.bwd', cache_dir,
                                         verbose=verbose,
                                         process_func=_unzip_process_func)
        embeddings.append(FlairEmbeddings(bwd_weight_path))

    if len(embeddings) == 1:
        return embeddings[0]

    return StackedEmbeddings(embeddings=embeddings)