How to use the flair.data.Dictionary function in flair

To help you get started, we’ve selected a few flair examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github flairNLP / flair / tests / test_language_model.py View on Github external
def test_train_resume_language_model_training(
    resources_path, results_base_path, tasks_base_path
):
    # get default dictionary
    dictionary: Dictionary = Dictionary.load("chars")

    # get the example corpus and process at character level in forward direction
    corpus: TextCorpus = TextCorpus(
        resources_path / "corpora/lorem_ipsum",
        dictionary,
        forward=True,
        character_level=True,
    )

    assert corpus.test is not None
    assert corpus.train is not None
    assert corpus.valid is not None
    assert len(corpus.train) == 2
github flairNLP / flair / tests / test_model_integration.py View on Github external
def test_train_resume_language_model_training(
    resources_path, results_base_path, tasks_base_path
):
    # get default dictionary
    dictionary: Dictionary = Dictionary.load("chars")

    # init forward LM with 128 hidden states and 1 layer
    language_model: LanguageModel = LanguageModel(
        dictionary, is_forward_lm=True, hidden_size=128, nlayers=1
    )

    # get the example corpus and process at character level in forward direction
    corpus: TextCorpus = TextCorpus(
        resources_path / "corpora/lorem_ipsum",
        dictionary,
        language_model.is_forward_lm,
        character_level=True,
    )

    # train the language model
    trainer: LanguageModelTrainer = LanguageModelTrainer(
github flairNLP / flair / tests / test_data.py View on Github external
def test_dictionary_get_idx_for_item():
    dictionary: Dictionary = Dictionary(add_unk=False)

    dictionary.add_item("class_1")
    dictionary.add_item("class_2")
    dictionary.add_item("class_3")

    idx = dictionary.get_idx_for_item("class_2")

    assert 1 == idx
github flairNLP / flair / tests / test_data.py View on Github external
def test_dictionary_get_items_without_unk():
    dictionary: Dictionary = Dictionary(add_unk=False)

    dictionary.add_item("class_1")
    dictionary.add_item("class_2")
    dictionary.add_item("class_3")

    items = dictionary.get_items()

    assert 3 == len(items)
    assert "class_1" == items[0]
    assert "class_2" == items[1]
    assert "class_3" == items[2]
github flairNLP / flair / tests / test_data.py View on Github external
def test_dictionary_save_and_load():
    dictionary: Dictionary = Dictionary(add_unk=False)

    dictionary.add_item("class_1")
    dictionary.add_item("class_2")
    dictionary.add_item("class_3")

    file_path = "dictionary.txt"

    dictionary.save(file_path)
    loaded_dictionary = dictionary.load_from_file(file_path)

    assert len(dictionary) == len(loaded_dictionary)
    assert len(dictionary.get_items()) == len(loaded_dictionary.get_items())

    # clean up file
    os.remove(file_path)
github flairNLP / flair / tests / test_model_integration.py View on Github external
def test_train_language_model(results_base_path, resources_path):
    # get default dictionary
    dictionary: Dictionary = Dictionary.load("chars")

    # init forward LM with 128 hidden states and 1 layer
    language_model: LanguageModel = LanguageModel(
        dictionary, is_forward_lm=True, hidden_size=128, nlayers=1
    )

    # get the example corpus and process at character level in forward direction
    corpus: TextCorpus = TextCorpus(
        resources_path / "corpora/lorem_ipsum",
        dictionary,
        language_model.is_forward_lm,
        character_level=True,
    )

    # train the language model
    trainer: LanguageModelTrainer = LanguageModelTrainer(
github flairNLP / flair / flair / models / text_regression_model.py View on Github external
def __init__(self, document_embeddings: flair.embeddings.DocumentEmbeddings):

        super(TextRegressor, self).__init__(
            document_embeddings=document_embeddings,
            label_dictionary=flair.data.Dictionary(),
            multi_label=False,
        )

        log.info("Using REGRESSION - experimental")

        self.loss_function = nn.MSELoss()
github flairNLP / flair / flair / embeddings.py View on Github external
self,
        path_to_char_dict: str = None,
        char_embedding_dim: int = 25,
        hidden_size_char: int = 25,
    ):
        """Uses the default character dictionary if none provided."""

        super().__init__()
        self.name = "Char"
        self.static_embeddings = False

        # use list of common characters if none provided
        if path_to_char_dict is None:
            self.char_dictionary: Dictionary = Dictionary.load("common-chars")
        else:
            self.char_dictionary: Dictionary = Dictionary.load_from_file(
                path_to_char_dict
            )

        self.char_embedding_dim: int = char_embedding_dim
        self.hidden_size_char: int = hidden_size_char
        self.char_embedding = torch.nn.Embedding(
            len(self.char_dictionary.item2idx), self.char_embedding_dim
        )
        self.char_rnn = torch.nn.LSTM(
            self.char_embedding_dim,
            self.hidden_size_char,
            num_layers=1,
            bidirectional=True,
        )

        self.__embedding_length = self.char_embedding_dim * 2