Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def infer(model_path, batch_size, test_data_file, vocab_file, target_file,
use_gpu):
"""
use the model under model_path to predict the test data, the result will be printed on the screen
return nothing
"""
word_dict = load_dict(vocab_file)
word_reverse_dict = load_reverse_dict(vocab_file)
label_dict = load_dict(target_file)
label_reverse_dict = load_reverse_dict(target_file)
test_data = paddle.batch(
reader.data_reader(test_data_file, word_dict, label_dict),
batch_size=batch_size)
place = fluid.CUDAPlace(0) if use_gpu else fluid.CPUPlace()
exe = fluid.Executor(place)
inference_scope = fluid.core.Scope()
with fluid.scope_guard(inference_scope):
[inference_program, feed_target_names,
fetch_targets] = fluid.io.load_inference_model(model_path, exe)
for data in test_data():
word = to_lodtensor(map(lambda x: x[0], data), place)
mark = to_lodtensor(map(lambda x: x[1], data), place)
target = to_lodtensor(map(lambda x: x[2], data), place)
crf_decode = exe.run(
inference_program,
feed={"word": word,
"mark": mark,
sgd_optimizer.minimize(avg_cost)
crf_decode = fluid.layers.crf_decoding(
input=feature_out, param_attr=fluid.ParamAttr(name='crfw'))
chunk_evaluator = fluid.evaluator.ChunkEvaluator(
input=crf_decode,
label=target,
chunk_scheme="IOB",
num_chunk_types=int(math.ceil((label_dict_len - 1) / 2.0)))
train_reader = paddle.batch(
reader.data_reader(train_data_file, word_dict, label_dict),
batch_size=BATCH_SIZE, drop_last=False)
test_reader = paddle.batch(
reader.data_reader(test_data_file, word_dict, label_dict),
batch_size=BATCH_SIZE, drop_last=False)
place = fluid.CUDAPlace(0) if use_gpu else fluid.CPUPlace()
feeder = fluid.DataFeeder(feed_list=[word, mark, target], place=place)
exe = fluid.Executor(place)
exe.run(fluid.default_startup_program())
embedding_name = 'emb'
embedding_param = fluid.global_scope().find_var(embedding_name).get_tensor(
)
embedding_param.set(word_vector_values, place)
batch_id = 0
total_time = 0.0
for pass_id in xrange(num_passes):
def train(model_type, data_path=None, pr_figure_path=None,
model_save_path=None, vectorizer_path=None, col_sep=',',
thresholds=0.5, num_classes=2):
data_content, data_lbl = data_reader(data_path, col_sep)
# data feature
data_tfidf = tfidf(data_content)
# save data feature
dump_pkl(data_tfidf, vectorizer_path)
# label
data_label = label_encoder(data_lbl)
X_train, X_val, y_train, y_val = train_test_split(
data_tfidf, data_label, test_size=0.1, random_state=42)
model = get_model(model_type)
# fit
model.fit(X_train, y_train)
# save model
dump_pkl(model, model_save_path)
# evaluate
eval(model, X_val, y_val, thresholds=thresholds, num_classes=num_classes,
model_type=model_type, pr_figure_path=pr_figure_path)
paddle.init(use_gpu=use_gpu, trainer_count=trainer_count)
ids_2_word = load_reverse_dict(config.dict_path)
outputs = GNR(config, is_infer=True)
# load the trained models
parameters = paddle.parameters.Parameters.from_tar(
gzip.open(model_path, "r"))
logger.info("loading parameter is done.")
inferer = paddle.inference.Inference(
output_layer=outputs, parameters=parameters)
_, valid_samples = choose_samples(data_dir)
test_reader = reader.data_reader(valid_samples, is_train=False)
test_batch = []
for i, item in enumerate(test_reader()):
test_batch.append(item)
if len(test_batch) == batch_size:
infer_a_batch(inferer, test_batch, ids_2_word, len(outputs))
test_batch = []
if len(test_batch):
infer_a_batch(inferer, test_batch, ids_2_word, len(outputs))
test_batch = []
# initialize PaddlePaddle
paddle.init(use_gpu=False, trainer_count=1)
parameters = paddle.parameters.Parameters.from_tar(
gzip.open(model_path, "r"))
predict = ner_net(
word_dict_len=word_dict_len,
label_dict_len=label_dict_len,
is_train=False)
inferer = paddle.inference.Inference(
output_layer=predict, parameters=parameters)
test_data = []
for i, item in enumerate(
reader.data_reader(test_data_file, word_dict, label_dict)()):
test_data.append([item[0], item[1]])
if len(test_data) == batch_size:
_infer_a_batch(inferer, test_data, word_reverse_dict,
label_reverse_dict)
test_data = []
_infer_a_batch(inferer, test_data, word_reverse_dict, label_reverse_dict)
test_data = []
prob = conv_seq2seq(
src_dict_size=src_dict_size,
trg_dict_size=trg_dict_size,
pos_size=pos_size,
emb_dim=emb_dim,
enc_conv_blocks=enc_conv_blocks,
dec_conv_blocks=dec_conv_blocks,
drop_rate=drop_rate,
is_infer=True)
# load parameters
parameters = paddle.parameters.Parameters.from_tar(gzip.open(model_path))
padding_list = [context_len - 1 for (size, context_len) in dec_conv_blocks]
padding_num = reduce(lambda x, y: x + y, padding_list)
infer_reader = reader.data_reader(
data_file=infer_data_path,
src_dict=src_dict,
trg_dict=trg_dict,
pos_size=pos_size,
padding_num=padding_num)
inferer = paddle.inference.Inference(
output_layer=prob, parameters=parameters)
searcher = BeamSearch(
inferer=inferer,
trg_dict=trg_dict,
pos_size=pos_size,
padding_num=padding_num,
max_len=max_len,
beam_size=beam_size)
train_reader = paddle.batch(
reader=paddle.reader.shuffle(
reader=reader.data_reader(
data_file=train_data_path,
src_dict=src_dict,
trg_dict=trg_dict,
pos_size=pos_size,
padding_num=padding_num),
buf_size=10240),
batch_size=batch_size)
test_reader = None
if test_data_path:
test_reader = paddle.batch(
reader=paddle.reader.shuffle(
reader=reader.data_reader(
data_file=test_data_path,
src_dict=src_dict,
trg_dict=trg_dict,
pos_size=pos_size,
padding_num=padding_num),
buf_size=10240),
batch_size=batch_size)
return train_reader, test_reader
"""Build the data reader for this model.
Arguments:
- data_dir: The path of training data.
- batch_size: batch size for the training task.
"""
train_samples, valid_samples = choose_samples(data_dir)
train_reader = paddle.batch(
paddle.reader.shuffle(
reader.data_reader(train_samples), buf_size=102400),
batch_size=batch_size)
# testing data is not shuffled
test_reader = paddle.batch(
reader.data_reader(
valid_samples, is_train=False),
batch_size=batch_size)
return train_reader, test_reader, len(train_samples)
num_correct_chunks) = fluid.layers.chunk_eval(
input=crf_decode,
label=target,
chunk_scheme="IOB",
num_chunk_types=int(math.ceil((label_dict_len - 1) / 2.0)))
chunk_evaluator = fluid.metrics.ChunkEvaluator()
inference_program = fluid.default_main_program().clone(for_test=True)
test_fetch_list = [num_infer_chunks, num_label_chunks, num_correct_chunks]
sgd_optimizer = fluid.optimizer.SGD(learning_rate=1e-3)
sgd_optimizer.minimize(avg_cost)
if "CE_MODE_X" not in os.environ:
train_reader = paddle.batch(
paddle.reader.shuffle(
reader.data_reader(train_data_file, word_dict, label_dict),
buf_size=20000),
batch_size=batch_size)
test_reader = paddle.batch(
paddle.reader.shuffle(
reader.data_reader(test_data_file, word_dict, label_dict),
buf_size=20000),
batch_size=batch_size)
else:
train_reader = paddle.batch(
reader.data_reader(train_data_file, word_dict, label_dict),
batch_size=batch_size)
test_reader = paddle.batch(
reader.data_reader(test_data_file, word_dict, label_dict),
batch_size=batch_size)
place = fluid.CUDAPlace(0) if use_gpu else fluid.CPUPlace()