How to use simpletransformers - 10 common examples

To help you get started, weโ€™ve selected a few simpletransformers examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github ThilinaRajapakse / simpletransformers / simpletransformers / ner / ner_model.py View on Github external
to_predict: A python list of text (str) to be sent to the model for prediction.

        Returns:
            preds: A Python list of lists with dicts containg each word mapped to its NER tag.
            model_outputs: A python list of the raw model outputs for each text.
        """

        tokenizer = self.tokenizer
        device = self.device
        model = self.model
        args = self.args
        pad_token_label_id = self.pad_token_label_id

        self._move_model_to_device()

        predict_examples = [InputExample(i, sentence.split(), ["O" for word in sentence.split()]) for i, sentence in enumerate(to_predict)]

        eval_dataset = self.load_and_cache_examples(None, to_predict=predict_examples)

        eval_sampler = SequentialSampler(eval_dataset)
        eval_dataloader = DataLoader(eval_dataset, sampler=eval_sampler, batch_size=args["eval_batch_size"])

        eval_loss = 0.0
        nb_eval_steps = 0
        preds = None
        out_label_ids = None
        model.eval()

        for batch in tqdm(eval_dataloader, disable=args['silent']):
            batch = tuple(t.to(device) for t in batch)

            with torch.no_grad():
github ThilinaRajapakse / simpletransformers / simpletransformers / ner / ner_utils.py View on Github external
def read_examples_from_file(data_file, mode):
    file_path = data_file
    guid_index = 1
    examples = []
    with open(file_path, encoding="utf-8") as f:
        words = []
        labels = []
        for line in f:
            if line.startswith("-DOCSTART-") or line == "" or line == "\n":
                if words:
                    examples.append(InputExample(guid="{}-{}".format(mode, guid_index),
                                                 words=words,
                                                 labels=labels))
                    guid_index += 1
                    words = []
                    labels = []
            else:
                splits = line.split(" ")
                words.append(splits[0])
                if len(splits) > 1:
                    labels.append(splits[-1].replace("\n", ""))
                else:
                    # Examples could have no label for mode = "test"
                    labels.append("O")
        if words:
            examples.append(InputExample(guid="%s-%d".format(mode, guid_index),
                                         words=words,
github ThilinaRajapakse / simpletransformers / simpletransformers / question_answering / question_answering_model.py View on Github external
if args['model_type'] in ['xlnet', 'xlm']:
                    inputs.update({'cls_index': batch[4],
                                   'p_mask':       batch[5]})

                outputs = model(**inputs)

                for i, example_index in enumerate(example_indices):
                    eval_feature = features[example_index.item()]
                    unique_id = int(eval_feature.unique_id)
                    if args['model_type'] in ['xlnet', 'xlm']:
                        # XLNet uses a more complex post-processing procedure
                        result = RawResultExtended(unique_id=unique_id,
                                                   start_top_log_probs=to_list(outputs[0][i]),
                                                   start_top_index=to_list(outputs[1][i]),
                                                   end_top_log_probs=to_list(outputs[2][i]),
                                                   end_top_index=to_list(outputs[3][i]),
                                                   cls_logits=to_list(outputs[4][i]))
                    else:
                        result = RawResult(unique_id=unique_id,
                                           start_logits=to_list(outputs[0][i]),
                                           end_logits=to_list(outputs[1][i]))
                    all_results.append(result)

        prefix = 'test'
        if not os.path.isdir(output_dir):
            os.mkdir(output_dir)

        output_prediction_file = os.path.join(output_dir, "predictions_{}.json".format(prefix))
        output_nbest_file = os.path.join(output_dir, "nbest_predictions_{}.json".format(prefix))
        output_null_log_odds_file = os.path.join(output_dir, "null_odds_{}.json".format(prefix))
github ThilinaRajapakse / simpletransformers / simpletransformers / question_answering / question_answering_model.py View on Github external
if args['model_type'] in ['xlnet', 'xlm']:
                    inputs.update({'cls_index': batch[4],
                                   'p_mask':       batch[5]})

                outputs = model(**inputs)

                for i, example_index in enumerate(example_indices):
                    eval_feature = features[example_index.item()]
                    unique_id = int(eval_feature.unique_id)
                    if args['model_type'] in ['xlnet', 'xlm']:
                        # XLNet uses a more complex post-processing procedure
                        result = RawResultExtended(unique_id=unique_id,
                                                   start_top_log_probs=to_list(outputs[0][i]),
                                                   start_top_index=to_list(outputs[1][i]),
                                                   end_top_log_probs=to_list(outputs[2][i]),
                                                   end_top_index=to_list(outputs[3][i]),
                                                   cls_logits=to_list(outputs[4][i]))
                    else:
                        result = RawResult(unique_id=unique_id,
                                           start_logits=to_list(outputs[0][i]),
                                           end_logits=to_list(outputs[1][i]))
                    all_results.append(result)

        if args['model_type'] in ['xlnet', 'xlm']:
            answers = get_best_predictions_extended(examples, features, all_results, n_best_size,
                                                    args['max_answer_length'], model.config.start_n_top, model.config.end_n_top, True, tokenizer, args['null_score_diff_threshold'])
        else:
            answers = get_best_predictions(examples, features, all_results, n_best_size, args['max_answer_length'], False, False, True, False)

        return answers
github ThilinaRajapakse / simpletransformers / simpletransformers / question_answering / question_answering_model.py View on Github external
inputs.update({'cls_index': batch[4],
                                   'p_mask':       batch[5]})

                outputs = model(**inputs)

                for i, example_index in enumerate(example_indices):
                    eval_feature = features[example_index.item()]
                    unique_id = int(eval_feature.unique_id)
                    if args['model_type'] in ['xlnet', 'xlm']:
                        # XLNet uses a more complex post-processing procedure
                        result = RawResultExtended(unique_id=unique_id,
                                                   start_top_log_probs=to_list(outputs[0][i]),
                                                   start_top_index=to_list(outputs[1][i]),
                                                   end_top_log_probs=to_list(outputs[2][i]),
                                                   end_top_index=to_list(outputs[3][i]),
                                                   cls_logits=to_list(outputs[4][i]))
                    else:
                        result = RawResult(unique_id=unique_id,
                                           start_logits=to_list(outputs[0][i]),
                                           end_logits=to_list(outputs[1][i]))
                    all_results.append(result)

        prefix = 'test'
        if not os.path.isdir(output_dir):
            os.mkdir(output_dir)

        output_prediction_file = os.path.join(output_dir, "predictions_{}.json".format(prefix))
        output_nbest_file = os.path.join(output_dir, "nbest_predictions_{}.json".format(prefix))
        output_null_log_odds_file = os.path.join(output_dir, "null_odds_{}.json".format(prefix))

        if args['model_type'] in ['xlnet', 'xlm']:
            # XLNet uses a more complex post-processing procedure
github ThilinaRajapakse / simpletransformers / simpletransformers / question_answering / question_answering_model.py View on Github external
example_indices = batch[3]

                if args['model_type'] in ['xlnet', 'xlm']:
                    inputs.update({'cls_index': batch[4],
                                   'p_mask':       batch[5]})

                outputs = model(**inputs)

                for i, example_index in enumerate(example_indices):
                    eval_feature = features[example_index.item()]
                    unique_id = int(eval_feature.unique_id)
                    if args['model_type'] in ['xlnet', 'xlm']:
                        # XLNet uses a more complex post-processing procedure
                        result = RawResultExtended(unique_id=unique_id,
                                                   start_top_log_probs=to_list(outputs[0][i]),
                                                   start_top_index=to_list(outputs[1][i]),
                                                   end_top_log_probs=to_list(outputs[2][i]),
                                                   end_top_index=to_list(outputs[3][i]),
                                                   cls_logits=to_list(outputs[4][i]))
                    else:
                        result = RawResult(unique_id=unique_id,
                                           start_logits=to_list(outputs[0][i]),
                                           end_logits=to_list(outputs[1][i]))
                    all_results.append(result)

        prefix = 'test'
        if not os.path.isdir(output_dir):
            os.mkdir(output_dir)

        output_prediction_file = os.path.join(output_dir, "predictions_{}.json".format(prefix))
        output_nbest_file = os.path.join(output_dir, "nbest_predictions_{}.json".format(prefix))
github ThilinaRajapakse / simpletransformers / simpletransformers / question_answering / question_answering_model.py View on Github external
if args['model_type'] in ['xlnet', 'xlm']:
                    inputs.update({'cls_index': batch[4],
                                   'p_mask':       batch[5]})

                outputs = model(**inputs)

                for i, example_index in enumerate(example_indices):
                    eval_feature = features[example_index.item()]
                    unique_id = int(eval_feature.unique_id)
                    if args['model_type'] in ['xlnet', 'xlm']:
                        # XLNet uses a more complex post-processing procedure
                        result = RawResultExtended(unique_id=unique_id,
                                                   start_top_log_probs=to_list(outputs[0][i]),
                                                   start_top_index=to_list(outputs[1][i]),
                                                   end_top_log_probs=to_list(outputs[2][i]),
                                                   end_top_index=to_list(outputs[3][i]),
                                                   cls_logits=to_list(outputs[4][i]))
                    else:
                        result = RawResult(unique_id=unique_id,
                                           start_logits=to_list(outputs[0][i]),
                                           end_logits=to_list(outputs[1][i]))
                    all_results.append(result)

        if args['model_type'] in ['xlnet', 'xlm']:
            answers = get_best_predictions_extended(examples, features, all_results, n_best_size,
                                                    args['max_answer_length'], model.config.start_n_top, model.config.end_n_top, True, tokenizer, args['null_score_diff_threshold'])
        else:
            answers = get_best_predictions(examples, features, all_results, n_best_size, args['max_answer_length'], False, False, True, False)

        return answers
github ThilinaRajapakse / simpletransformers / simpletransformers / question_answering / question_answering_model.py View on Github external
outputs = model(**inputs)

                for i, example_index in enumerate(example_indices):
                    eval_feature = features[example_index.item()]
                    unique_id = int(eval_feature.unique_id)
                    if args['model_type'] in ['xlnet', 'xlm']:
                        # XLNet uses a more complex post-processing procedure
                        result = RawResultExtended(unique_id=unique_id,
                                                   start_top_log_probs=to_list(outputs[0][i]),
                                                   start_top_index=to_list(outputs[1][i]),
                                                   end_top_log_probs=to_list(outputs[2][i]),
                                                   end_top_index=to_list(outputs[3][i]),
                                                   cls_logits=to_list(outputs[4][i]))
                    else:
                        result = RawResult(unique_id=unique_id,
                                           start_logits=to_list(outputs[0][i]),
                                           end_logits=to_list(outputs[1][i]))
                    all_results.append(result)

        prefix = 'test'
        if not os.path.isdir(output_dir):
            os.mkdir(output_dir)

        output_prediction_file = os.path.join(output_dir, "predictions_{}.json".format(prefix))
        output_nbest_file = os.path.join(output_dir, "nbest_predictions_{}.json".format(prefix))
        output_null_log_odds_file = os.path.join(output_dir, "null_odds_{}.json".format(prefix))

        if args['model_type'] in ['xlnet', 'xlm']:
            # XLNet uses a more complex post-processing procedure
            all_predictions, all_nbest_json, scores_diff_json = write_predictions_extended(examples, features, all_results, args['n_best_size'],
                                                                                           args['max_answer_length'], output_prediction_file,
                                                                                           output_nbest_file, output_null_log_odds_file, eval_data,
github ThilinaRajapakse / simpletransformers / simpletransformers / question_answering / question_answering_model.py View on Github external
if args['model_type'] in ['xlnet', 'xlm']:
                    inputs.update({'cls_index': batch[4],
                                   'p_mask':       batch[5]})

                outputs = model(**inputs)

                for i, example_index in enumerate(example_indices):
                    eval_feature = features[example_index.item()]
                    unique_id = int(eval_feature.unique_id)
                    if args['model_type'] in ['xlnet', 'xlm']:
                        # XLNet uses a more complex post-processing procedure
                        result = RawResultExtended(unique_id=unique_id,
                                                   start_top_log_probs=to_list(outputs[0][i]),
                                                   start_top_index=to_list(outputs[1][i]),
                                                   end_top_log_probs=to_list(outputs[2][i]),
                                                   end_top_index=to_list(outputs[3][i]),
                                                   cls_logits=to_list(outputs[4][i]))
                    else:
                        result = RawResult(unique_id=unique_id,
                                           start_logits=to_list(outputs[0][i]),
                                           end_logits=to_list(outputs[1][i]))
                    all_results.append(result)

        prefix = 'test'
        if not os.path.isdir(output_dir):
            os.mkdir(output_dir)

        output_prediction_file = os.path.join(output_dir, "predictions_{}.json".format(prefix))
        output_nbest_file = os.path.join(output_dir, "nbest_predictions_{}.json".format(prefix))
        output_null_log_odds_file = os.path.join(output_dir, "null_odds_{}.json".format(prefix))

        if args['model_type'] in ['xlnet', 'xlm']:
github ThilinaRajapakse / simpletransformers / simpletransformers / question_answering / question_answering_model.py View on Github external
example_indices = batch[3]

                if args['model_type'] in ['xlnet', 'xlm']:
                    inputs.update({'cls_index': batch[4],
                                   'p_mask':       batch[5]})

                outputs = model(**inputs)

                for i, example_index in enumerate(example_indices):
                    eval_feature = features[example_index.item()]
                    unique_id = int(eval_feature.unique_id)
                    if args['model_type'] in ['xlnet', 'xlm']:
                        # XLNet uses a more complex post-processing procedure
                        result = RawResultExtended(unique_id=unique_id,
                                                   start_top_log_probs=to_list(outputs[0][i]),
                                                   start_top_index=to_list(outputs[1][i]),
                                                   end_top_log_probs=to_list(outputs[2][i]),
                                                   end_top_index=to_list(outputs[3][i]),
                                                   cls_logits=to_list(outputs[4][i]))
                    else:
                        result = RawResult(unique_id=unique_id,
                                           start_logits=to_list(outputs[0][i]),
                                           end_logits=to_list(outputs[1][i]))
                    all_results.append(result)

        prefix = 'test'
        if not os.path.isdir(output_dir):
            os.mkdir(output_dir)

        output_prediction_file = os.path.join(output_dir, "predictions_{}.json".format(prefix))
        output_nbest_file = os.path.join(output_dir, "nbest_predictions_{}.json".format(prefix))
        output_null_log_odds_file = os.path.join(output_dir, "null_odds_{}.json".format(prefix))

simpletransformers

An easy-to-use wrapper library for the Transformers library.

Apache-2.0
Latest version published 6 months ago

Package Health Score

70 / 100
Full package analysis

Similar packages