How to use reader - 10 common examples

To help you get started, we’ve selected a few reader examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github PaddlePaddle / models / PaddleCV / metric_learning / train_elem.py View on Github external
fluid.io.load_persistables(exe, checkpoint, main_program=train_prog)

    if pretrained_model:

        def if_exist(var):
            return os.path.exists(os.path.join(pretrained_model, var.name))

        fluid.io.load_vars(
            exe, pretrained_model, main_program=train_prog, predicate=if_exist)

    devicenum = get_gpu_num()
    assert (args.train_batch_size % devicenum) == 0
    train_batch_size = args.train_batch_size // devicenum
    test_batch_size = args.test_batch_size
    
    train_reader = paddle.batch(reader.train(args), batch_size=train_batch_size, drop_last=True)
    test_reader = paddle.batch(reader.test(args), batch_size=test_batch_size, drop_last=False)
    test_feeder = fluid.DataFeeder(place=place, feed_list=[image, label])
    train_py_reader.decorate_paddle_reader(train_reader)

    train_exe = fluid.ParallelExecutor(
        main_program=train_prog,
        use_cuda=args.use_gpu,
        loss_name=train_cost.name)

    totalruntime = 0
    train_py_reader.start()
    iter_no = 0
    train_info = [0, 0, 0, 0]
    while iter_no <= args.total_iter_num:
        t1 = time.time()
        lr, loss, acc1, acc5 = train_exe.run(fetch_list=train_fetch_list)
github lgone2000 / paddle-tutorial / image_feature / metric_learning / train_elem.py View on Github external
if args.use_gpu:
        devicenum = get_gpu_num()
        assert (args.train_batch_size % devicenum) == 0
    else:
        devicenum = get_cpu_num()
        assert (args.train_batch_size % devicenum) == 0
    #注意: 使用py_reader 的输入的batch大小,是单卡的batch大小,所以要除一下
    train_batch_size = args.train_batch_size // devicenum
    test_batch_size = args.test_batch_size

    logging.debug('device number is %d, batch on each card:%d', devicenum,
                  train_batch_size)

    #创建新的train_reader 将输入的reader读入的数据组成batch 。另外将train_reader 连接到 pyreader,由pyreader创建的线程主动读取,不在主线程调用。
    train_reader = paddle.batch(
        reader.train(args), batch_size=train_batch_size, drop_last=True)
    test_reader = paddle.batch(
        reader.test(args), batch_size=test_batch_size, drop_last=False)
    test_feeder = fluid.DataFeeder(place=place, feed_list=[image, label])

    train_py_reader.decorate_paddle_reader(train_reader)

    #使用ParallelExecutor 实现多卡训练
    train_exe = fluid.ParallelExecutor(
        main_program=train_prog,
        use_cuda=args.use_gpu,
        loss_name=train_cost.name)

    totalruntime = 0
    #启动pyreader的读取线程
    train_py_reader.start()
    iter_no = 0
github widiot / tensorflow-practices / lstm / nl-modeling / train.py View on Github external
def main(_):
    # 原始数据
    train_data, valid_data, test_data, _ = reader.ptb_raw_data(DATA_PATH)

    # 计算一个epoch需要训练的次数
    train_data_len = len(train_data)  # 数据集的大小
    train_batch_len = train_data_len // TRAIN_BATCH_SIZE  # batch的个数
    train_epoch_size = (train_batch_len - 1) // TRAIN_NUM_STEP  # 该epoch的训练次数

    valid_data_len = len(valid_data)
    valid_batch_len = valid_data_len // EVAL_BATCH_SIZE
    valid_epoch_size = (valid_batch_len - 1) // EVAL_NUM_STEP

    test_data_len = len(test_data)
    test_batch_len = test_data_len // EVAL_BATCH_SIZE
    test_epoch_size = (test_batch_len - 1) // EVAL_NUM_STEP

    # 生成数据队列,必须放在开启多线程之前
    train_queue = reader.ptb_producer(train_data, train_model.batch_size,
github ujjax / fast-slow-lstm / main.py View on Github external
loss = criterion(outputs.view(-1, model.vocab_size), tt)
        costs += loss.data[0] * model.num_steps
        iters += model.num_steps

        if is_train:
            loss.backward()
            torch.nn.utils.clip_grad_norm(model.parameters(), 0.25)
            for p in model.parameters():
                p.data.add_(-lr, p.grad.data)
            if step % (epoch_size // 10) == 10:
                print("{} perplexity: {:8.2f} speed: {} wps".format(step * 1.0 / epoch_size, np.exp(costs / iters),
                                  iters * model.batch_size / (time.time() - start_time)))
    return np.exp(costs / iters)

if __name__ == "__main__":
    raw_data = reader.ptb_raw_data(data_path=args.data_path)
    train_data, valid_data, test_data, word_to_id, id_to_word = raw_data
    vocab_size = len(word_to_id)
    print('Vocabulary size: {}'.format(vocab_size))
    model = PTB_Model(embedding_dim=args.hidden_size, num_steps=args.num_steps, batch_size=args.batch_size,
                      vocab_size=vocab_size, num_layers=args.num_layers, dp_keep_prob=args.keep_prob)
    model.cuda()
    lr = args.lr_start
    # decay factor for learning rate
    lr_decay_base = args.lr_decay_rate
    # we will not touch lr for the first m_flat_lr epochs
    m_flat_lr = 14.0

    print("########## Training ##########################")

    for epoch in range(args.max_max_epoch):
        lr_decay = lr_decay_base ** max(epoch - m_flat_lr, 0)
github PaddlePaddle / models / fluid / PaddleNLP / language_model / lstm / train.py View on Github external
total_loss += cost_train
            iters += num_steps

        ppl = np.exp(total_loss / iters)
        return ppl

    # get train epoch size
    batch_len = len(train_data) // batch_size
    epoch_size = (batch_len - 1) // num_steps
    log_interval = epoch_size // 10
    total_time = 0.0
    for epoch_id in range(max_epoch):
        start_time = time.time()
        print("epoch id", epoch_id)
        train_data_iter = reader.get_data_iter(train_data, batch_size,
                                               num_steps)

        total_loss = 0

        init_hidden = None
        init_cell = None
        #debug_para(fluid.framework.default_main_program(), parallel_executor)
        total_loss = 0
        iters = 0
        init_hidden = np.zeros(
            (num_layers, batch_size, hidden_size), dtype='float32')
        init_cell = np.zeros(
            (num_layers, batch_size, hidden_size), dtype='float32')
        for batch_id, batch in enumerate(train_data_iter):
            input_data_feed = prepare_input(
                batch, init_hidden, init_cell, epoch_id=epoch_id)
github jingli9111 / RUM-Tensorflow / ptb_task.py View on Github external
def __init__(self, config, data, name=None):
        self.batch_size = batch_size = config.batch_size
        self.num_steps = num_steps = config.num_steps
        self.epoch_size = ((len(data) // batch_size) - 1) // num_steps
        self.input_data, self.targets = reader.ptb_producer(
            data, batch_size, num_steps, name=name)
github lverwimp / tf-lm / scripts / word_lm_rescore_nbest.py View on Github external
def __init__(self, config, data, name=None):
		flattened_data = [word for sentence in data for word in sentence] # flatten list of lists
		self.batch_size = batch_size = config['batch_size']
		self.num_steps = num_steps = config['num_steps']
		self.epoch_size = ((len(flattened_data) // batch_size) - 1) // num_steps

		# input_data = Tensor of size batch_size x num_steps, same for targets (but shifted 1 step to the right)
		self.input_data, self.targets = reader.ptb_producer(data, config, name=name)
github jingli9111 / RUM-Tensorflow / main.py View on Github external
def __init__(self, config, data, name=None):
        self.batch_size = batch_size = config.batch_size
        self.num_steps = num_steps = config.num_steps
        self.epoch_size = ((len(data) // batch_size) - 1) // num_steps
        self.input_data, self.targets = reader.ptb_producer(
            data, batch_size, num_steps, name=name)
github cmdowney / psrnn / ptb_word_lm.py View on Github external
def __init__(self, config, data, name=None):
    self.batch_size = batch_size = config.batch_size
    self.num_steps = num_steps = config.num_steps
    self.epoch_size = ((len(data) // batch_size) - 1) // num_steps
    self.input_data, self.targets = reader.ptb_producer(
        data, batch_size, num_steps, name=name)
github woodfrog / ActionRecognition / rnn_practice / tf_rnn_tut / ptb_word_lm.py View on Github external
def __init__(self, config, data, name=None):
        '''
          num_steps: the number of timesteps (or unrolled steps)

        '''
        self.batch_size = batch_size = config.batch_size
        self.num_steps = num_steps = config.num_steps
        self.epoch_size = ((len(data) // batch_size) - 1) // num_steps
        self.input_data, self.targets = reader.ptb_producer(
            data, batch_size, num_steps, name=name)