Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
fluid.io.load_persistables(exe, checkpoint, main_program=train_prog)
if pretrained_model:
def if_exist(var):
return os.path.exists(os.path.join(pretrained_model, var.name))
fluid.io.load_vars(
exe, pretrained_model, main_program=train_prog, predicate=if_exist)
devicenum = get_gpu_num()
assert (args.train_batch_size % devicenum) == 0
train_batch_size = args.train_batch_size // devicenum
test_batch_size = args.test_batch_size
train_reader = paddle.batch(reader.train(args), batch_size=train_batch_size, drop_last=True)
test_reader = paddle.batch(reader.test(args), batch_size=test_batch_size, drop_last=False)
test_feeder = fluid.DataFeeder(place=place, feed_list=[image, label])
train_py_reader.decorate_paddle_reader(train_reader)
train_exe = fluid.ParallelExecutor(
main_program=train_prog,
use_cuda=args.use_gpu,
loss_name=train_cost.name)
totalruntime = 0
train_py_reader.start()
iter_no = 0
train_info = [0, 0, 0, 0]
while iter_no <= args.total_iter_num:
t1 = time.time()
lr, loss, acc1, acc5 = train_exe.run(fetch_list=train_fetch_list)
if args.use_gpu:
devicenum = get_gpu_num()
assert (args.train_batch_size % devicenum) == 0
else:
devicenum = get_cpu_num()
assert (args.train_batch_size % devicenum) == 0
#注意: 使用py_reader 的输入的batch大小,是单卡的batch大小,所以要除一下
train_batch_size = args.train_batch_size // devicenum
test_batch_size = args.test_batch_size
logging.debug('device number is %d, batch on each card:%d', devicenum,
train_batch_size)
#创建新的train_reader 将输入的reader读入的数据组成batch 。另外将train_reader 连接到 pyreader,由pyreader创建的线程主动读取,不在主线程调用。
train_reader = paddle.batch(
reader.train(args), batch_size=train_batch_size, drop_last=True)
test_reader = paddle.batch(
reader.test(args), batch_size=test_batch_size, drop_last=False)
test_feeder = fluid.DataFeeder(place=place, feed_list=[image, label])
train_py_reader.decorate_paddle_reader(train_reader)
#使用ParallelExecutor 实现多卡训练
train_exe = fluid.ParallelExecutor(
main_program=train_prog,
use_cuda=args.use_gpu,
loss_name=train_cost.name)
totalruntime = 0
#启动pyreader的读取线程
train_py_reader.start()
iter_no = 0
def main(_):
# 原始数据
train_data, valid_data, test_data, _ = reader.ptb_raw_data(DATA_PATH)
# 计算一个epoch需要训练的次数
train_data_len = len(train_data) # 数据集的大小
train_batch_len = train_data_len // TRAIN_BATCH_SIZE # batch的个数
train_epoch_size = (train_batch_len - 1) // TRAIN_NUM_STEP # 该epoch的训练次数
valid_data_len = len(valid_data)
valid_batch_len = valid_data_len // EVAL_BATCH_SIZE
valid_epoch_size = (valid_batch_len - 1) // EVAL_NUM_STEP
test_data_len = len(test_data)
test_batch_len = test_data_len // EVAL_BATCH_SIZE
test_epoch_size = (test_batch_len - 1) // EVAL_NUM_STEP
# 生成数据队列,必须放在开启多线程之前
train_queue = reader.ptb_producer(train_data, train_model.batch_size,
loss = criterion(outputs.view(-1, model.vocab_size), tt)
costs += loss.data[0] * model.num_steps
iters += model.num_steps
if is_train:
loss.backward()
torch.nn.utils.clip_grad_norm(model.parameters(), 0.25)
for p in model.parameters():
p.data.add_(-lr, p.grad.data)
if step % (epoch_size // 10) == 10:
print("{} perplexity: {:8.2f} speed: {} wps".format(step * 1.0 / epoch_size, np.exp(costs / iters),
iters * model.batch_size / (time.time() - start_time)))
return np.exp(costs / iters)
if __name__ == "__main__":
raw_data = reader.ptb_raw_data(data_path=args.data_path)
train_data, valid_data, test_data, word_to_id, id_to_word = raw_data
vocab_size = len(word_to_id)
print('Vocabulary size: {}'.format(vocab_size))
model = PTB_Model(embedding_dim=args.hidden_size, num_steps=args.num_steps, batch_size=args.batch_size,
vocab_size=vocab_size, num_layers=args.num_layers, dp_keep_prob=args.keep_prob)
model.cuda()
lr = args.lr_start
# decay factor for learning rate
lr_decay_base = args.lr_decay_rate
# we will not touch lr for the first m_flat_lr epochs
m_flat_lr = 14.0
print("########## Training ##########################")
for epoch in range(args.max_max_epoch):
lr_decay = lr_decay_base ** max(epoch - m_flat_lr, 0)
total_loss += cost_train
iters += num_steps
ppl = np.exp(total_loss / iters)
return ppl
# get train epoch size
batch_len = len(train_data) // batch_size
epoch_size = (batch_len - 1) // num_steps
log_interval = epoch_size // 10
total_time = 0.0
for epoch_id in range(max_epoch):
start_time = time.time()
print("epoch id", epoch_id)
train_data_iter = reader.get_data_iter(train_data, batch_size,
num_steps)
total_loss = 0
init_hidden = None
init_cell = None
#debug_para(fluid.framework.default_main_program(), parallel_executor)
total_loss = 0
iters = 0
init_hidden = np.zeros(
(num_layers, batch_size, hidden_size), dtype='float32')
init_cell = np.zeros(
(num_layers, batch_size, hidden_size), dtype='float32')
for batch_id, batch in enumerate(train_data_iter):
input_data_feed = prepare_input(
batch, init_hidden, init_cell, epoch_id=epoch_id)
def __init__(self, config, data, name=None):
self.batch_size = batch_size = config.batch_size
self.num_steps = num_steps = config.num_steps
self.epoch_size = ((len(data) // batch_size) - 1) // num_steps
self.input_data, self.targets = reader.ptb_producer(
data, batch_size, num_steps, name=name)
def __init__(self, config, data, name=None):
flattened_data = [word for sentence in data for word in sentence] # flatten list of lists
self.batch_size = batch_size = config['batch_size']
self.num_steps = num_steps = config['num_steps']
self.epoch_size = ((len(flattened_data) // batch_size) - 1) // num_steps
# input_data = Tensor of size batch_size x num_steps, same for targets (but shifted 1 step to the right)
self.input_data, self.targets = reader.ptb_producer(data, config, name=name)
def __init__(self, config, data, name=None):
self.batch_size = batch_size = config.batch_size
self.num_steps = num_steps = config.num_steps
self.epoch_size = ((len(data) // batch_size) - 1) // num_steps
self.input_data, self.targets = reader.ptb_producer(
data, batch_size, num_steps, name=name)
def __init__(self, config, data, name=None):
self.batch_size = batch_size = config.batch_size
self.num_steps = num_steps = config.num_steps
self.epoch_size = ((len(data) // batch_size) - 1) // num_steps
self.input_data, self.targets = reader.ptb_producer(
data, batch_size, num_steps, name=name)
def __init__(self, config, data, name=None):
'''
num_steps: the number of timesteps (or unrolled steps)
'''
self.batch_size = batch_size = config.batch_size
self.num_steps = num_steps = config.num_steps
self.epoch_size = ((len(data) // batch_size) - 1) // num_steps
self.input_data, self.targets = reader.ptb_producer(
data, batch_size, num_steps, name=name)