Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
for t, char in enumerate(input_text):
encoder_input_data[i, t, input_token_index[char]] = 1.0
for t, char in enumerate(target_text):
# decoder_target_data is a head of decoder_input_data by one timestep
decoder_input_data[i, t, target_token_index[char]] = 1.0
if t > 0:
decoder_target_data[i, t - 1, target_token_index[char]] = 1.0
logger.info("Data loaded.")
# split to train and val
encoder_input_data_train, encoder_input_data_val, decoder_input_data_train, decoder_input_data_val, \
decoder_target_data_train, decoder_target_data_val = train_test_split(
encoder_input_data, decoder_input_data, decoder_target_data, test_size=0.1)
# model
logger.info("Training seq2seq model...")
model, encoder_model, decoder_model = create_model(num_encoder_tokens, num_decoder_tokens, rnn_hidden_dim)
# Run training
callbacks_list = callback(save_model_path, logger)
model.fit_generator(
generator=data_generator(encoder_input_data_train, decoder_input_data_train, decoder_target_data_train,
batch_size),
steps_per_epoch=(len(encoder_input_data_train) + batch_size - 1) // batch_size,
epochs=epochs,
verbose=1,
validation_data=([encoder_input_data_val, decoder_input_data_val], decoder_target_data_val),
callbacks=callbacks_list)
encoder_model.save(encoder_model_path)
decoder_model.save(decoder_model_path)
logger.info("Model save to " + save_model_path)
logger.info("Training has finished.")
save_word_dict(target_token_index, save_target_token_path)
encoder_input_data = np.zeros((len(input_texts), max_input_texts_len, num_encoder_tokens), dtype='float32')
decoder_input_data = np.zeros((len(input_texts), max_target_texts_len, num_decoder_tokens), dtype='float32')
decoder_target_data = np.zeros((len(input_texts), max_target_texts_len, num_decoder_tokens), dtype='float32')
# one hot representation
for i, (input_text, target_text) in enumerate(zip(input_texts, target_texts)):
for t, char in enumerate(input_text):
encoder_input_data[i, t, input_token_index[char]] = 1.0
for t, char in enumerate(target_text):
# decoder_target_data is a head of decoder_input_data by one timestep
decoder_input_data[i, t, target_token_index[char]] = 1.0
if t > 0:
decoder_target_data[i, t - 1, target_token_index[char]] = 1.0
logger.info("Data loaded.")
# split to train and val
encoder_input_data_train, encoder_input_data_val, decoder_input_data_train, decoder_input_data_val, \
decoder_target_data_train, decoder_target_data_val = train_test_split(
encoder_input_data, decoder_input_data, decoder_target_data, test_size=0.1)
# model
logger.info("Training seq2seq model...")
model, encoder_model, decoder_model = create_model(num_encoder_tokens, num_decoder_tokens, rnn_hidden_dim)
# Run training
callbacks_list = callback(save_model_path, logger)
model.fit_generator(
generator=data_generator(encoder_input_data_train, decoder_input_data_train, decoder_target_data_train,
batch_size),
steps_per_epoch=(len(encoder_input_data_train) + batch_size - 1) // batch_size,
epochs=epochs,
# model
logger.info("Training seq2seq model...")
model, encoder_model, decoder_model = create_model(num_encoder_tokens, num_decoder_tokens, rnn_hidden_dim)
# Run training
callbacks_list = callback(save_model_path, logger)
model.fit_generator(
generator=data_generator(encoder_input_data_train, decoder_input_data_train, decoder_target_data_train,
batch_size),
steps_per_epoch=(len(encoder_input_data_train) + batch_size - 1) // batch_size,
epochs=epochs,
verbose=1,
validation_data=([encoder_input_data_val, decoder_input_data_val], decoder_target_data_val),
callbacks=callbacks_list)
encoder_model.save(encoder_model_path)
decoder_model.save(decoder_model_path)
logger.info("Model save to " + save_model_path)
logger.info("Training has finished.")
evaluate(encoder_model, decoder_model, num_encoder_tokens,
num_decoder_tokens, rnn_hidden_dim, target_token_index,
max_target_texts_len, encoder_input_data_val, input_texts)
logger.info("Training seq2seq model...")
model, encoder_model, decoder_model = create_model(num_encoder_tokens, num_decoder_tokens, rnn_hidden_dim)
# Run training
callbacks_list = callback(save_model_path, logger)
model.fit_generator(
generator=data_generator(encoder_input_data_train, decoder_input_data_train, decoder_target_data_train,
batch_size),
steps_per_epoch=(len(encoder_input_data_train) + batch_size - 1) // batch_size,
epochs=epochs,
verbose=1,
validation_data=([encoder_input_data_val, decoder_input_data_val], decoder_target_data_val),
callbacks=callbacks_list)
encoder_model.save(encoder_model_path)
decoder_model.save(decoder_model_path)
logger.info("Model save to " + save_model_path)
logger.info("Training has finished.")
evaluate(encoder_model, decoder_model, num_encoder_tokens,
num_decoder_tokens, rnn_hidden_dim, target_token_index,
max_target_texts_len, encoder_input_data_val, input_texts)
' prob:', top_score_val[j].item())
print()
if args.predict_file:
eval_examples = read_lm_examples(input_file=args.predict_file)
eval_features = convert_examples_to_features(
examples=eval_examples,
tokenizer=tokenizer,
max_seq_length=args.max_seq_length,
mask_token=MASK_TOKEN,
mask_id=MASK_ID)
logger.info("***** Running predictions *****")
logger.info(" Num orig examples = %d", len(eval_examples))
logger.info(" Num split examples = %d", len(eval_features))
logger.info("Start predict ...")
for f in eval_features:
input_ids = torch.tensor([f.input_ids])
segment_ids = torch.tensor([f.segment_ids])
predictions = model(input_ids, segment_ids)
# confirm we were able to predict 'henson'
mask_positions = f.mask_positions
if mask_positions:
for idx, i in enumerate(mask_positions):
if not i:
continue
scores = predictions[0, i]
# predicted_index = torch.argmax(scores).item()
top_scores = torch.sort(scores, 0, True)
top_score_val = top_scores[0][:5]
top_score_idx = top_scores[1][:5]
has_error = True
if continue_error and label[k] != current_error and not error_label_id:
end_pos = k
f.write('%s\t%d\t%d\t%s\t%s\t%s\n' % (sid, start_pos, end_pos,
ids_label_dict[current_error], sentence, gold_error))
continue_error = False
current_error = 0
if continue_error and label[k] != current_error and error_label_id:
end_pos = k
f.write('%s\t%d\t%d\t%s\t%s\t%s\n' % (sid, start_pos, end_pos,
ids_label_dict[current_error], sentence, gold_error))
start_pos = k + 1
current_error = label[k]
if not has_error:
f.write('%s\tcorrect\t%s\t%s\n' % (sid, sentence, gold_error))
logger.info('save to %s done, data size: %d' % (out_path, len(X_test)))
for j in range(len(top_score_idx)):
print('Mask predict is:', tokenizer.convert_ids_to_tokens([top_score_idx[j].item()])[0],
' prob:', top_score_val[j].item())
print()
if args.predict_file:
eval_examples = read_lm_examples(input_file=args.predict_file)
eval_features = convert_examples_to_features(
examples=eval_examples,
tokenizer=tokenizer,
max_seq_length=args.max_seq_length,
mask_token=MASK_TOKEN,
mask_id=MASK_ID)
logger.info("***** Running predictions *****")
logger.info(" Num orig examples = %d", len(eval_examples))
logger.info(" Num split examples = %d", len(eval_features))
logger.info("Start predict ...")
for f in eval_features:
input_ids = torch.tensor([f.input_ids])
segment_ids = torch.tensor([f.segment_ids])
predictions = model(input_ids, segment_ids)
# confirm we were able to predict 'henson'
mask_positions = f.mask_positions
if mask_positions:
for idx, i in enumerate(mask_positions):
if not i:
continue
scores = predictions[0, i]
# predicted_index = torch.argmax(scores).item()
top_scores = torch.sort(scores, 0, True)