How to use the reader.get_data_iter function in reader

To help you get started, we’ve selected a few reader examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github PaddlePaddle / models / fluid / PaddleNLP / language_model / lstm / train.py View on Github external
total_loss += cost_train
            iters += num_steps

        ppl = np.exp(total_loss / iters)
        return ppl

    # get train epoch size
    batch_len = len(train_data) // batch_size
    epoch_size = (batch_len - 1) // num_steps
    log_interval = epoch_size // 10
    total_time = 0.0
    for epoch_id in range(max_epoch):
        start_time = time.time()
        print("epoch id", epoch_id)
        train_data_iter = reader.get_data_iter(train_data, batch_size,
                                               num_steps)

        total_loss = 0

        init_hidden = None
        init_cell = None
        #debug_para(fluid.framework.default_main_program(), parallel_executor)
        total_loss = 0
        iters = 0
        init_hidden = np.zeros(
            (num_layers, batch_size, hidden_size), dtype='float32')
        init_cell = np.zeros(
            (num_layers, batch_size, hidden_size), dtype='float32')
        for batch_id, batch in enumerate(train_data_iter):
            input_data_feed = prepare_input(
                batch, init_hidden, init_cell, epoch_id=epoch_id)
github PaddlePaddle / models / PaddleNLP / unarchived / neural_machine_translation / rnn_search / train.py View on Github external
def train():
        ce_time = []
        ce_ppl = []
        max_epoch = args.max_epoch
        for epoch_id in range(max_epoch):
            start_time = time.time()
            if args.enable_ce:
                train_data_iter = reader.get_data_iter(train_data, batch_size, enable_ce=True)
            else:
                train_data_iter = reader.get_data_iter(train_data, batch_size)
                

            total_loss = 0
            word_count = 0.0
            batch_times = []
            for batch_id, batch in enumerate(train_data_iter):
                batch_start_time = time.time()
                input_data_feed, word_num = prepare_input(batch, epoch_id=epoch_id)
                fetch_outs = exe.run(program=train_program,
                                     feed=input_data_feed,
                                     fetch_list=[loss.name],
                                     use_program_cache=True)

                cost_train = np.array(fetch_outs[0])

                total_loss += cost_train * batch_size
github PaddlePaddle / models / PaddleNLP / PaddleTextGEN / seq2seq / train.py View on Github external
def train():
        ce_time = []
        ce_ppl = []
        max_epoch = args.max_epoch
        for epoch_id in range(max_epoch):
            start_time = time.time()
            if args.enable_ce:
                train_data_iter = reader.get_data_iter(
                    train_data, batch_size, enable_ce=True)
            else:
                train_data_iter = reader.get_data_iter(train_data, batch_size)

            total_loss = 0
            word_count = 0.0
            batch_times = []
            for batch_id, batch in enumerate(train_data_iter):
                batch_start_time = time.time()
                input_data_feed, word_num = prepare_input(
                    batch, epoch_id=epoch_id)
                word_count += word_num
                fetch_outs = exe.run(program=CompiledProgram,
                                     feed=input_data_feed,
                                     fetch_list=[loss.name],
                                     use_program_cache=True)
github PaddlePaddle / models / PaddleNLP / unarchived / neural_machine_translation / rnn_search / train.py View on Github external
def train():
        ce_time = []
        ce_ppl = []
        max_epoch = args.max_epoch
        for epoch_id in range(max_epoch):
            start_time = time.time()
            if args.enable_ce:
                train_data_iter = reader.get_data_iter(train_data, batch_size, enable_ce=True)
            else:
                train_data_iter = reader.get_data_iter(train_data, batch_size)
                

            total_loss = 0
            word_count = 0.0
            batch_times = []
            for batch_id, batch in enumerate(train_data_iter):
                batch_start_time = time.time()
                input_data_feed, word_num = prepare_input(batch, epoch_id=epoch_id)
                fetch_outs = exe.run(program=train_program,
                                     feed=input_data_feed,
                                     fetch_list=[loss.name],
                                     use_program_cache=True)

                cost_train = np.array(fetch_outs[0])
github PaddlePaddle / models / PaddleNLP / unarchived / language_model / lstm / train.py View on Github external
def eval(data):
        # when eval the batch_size set to 1
        eval_data_iter = reader.get_data_iter(data, batch_size, num_steps)
        total_loss = 0.0
        iters = 0
        init_hidden = np.zeros(
            (num_layers, batch_size, hidden_size), dtype='float32')
        init_cell = np.zeros(
            (num_layers, batch_size, hidden_size), dtype='float32')
        for batch_id, batch in enumerate(eval_data_iter):
            input_data_feed = prepare_input(
                batch, init_hidden, init_cell, epoch_id, with_lr=False)
            fetch_outs = exe.run(
                inference_program,
                feed=input_data_feed,
                fetch_list=[loss.name, last_hidden.name, last_cell.name],
                use_program_cache=True)

            cost_train = np.array(fetch_outs[0])
github PaddlePaddle / models / PaddleNLP / language_model / train.py View on Github external
def train_an_epoch(epoch_id, batch_times):
        # get train epoch size
        log_interval = get_log_interval(len(train_data))
        train_data_iter = reader.get_data_iter(train_data, config.batch_size *
                                               device_count, config.num_steps)

        total_loss = 0
        iters = 0

        init_hidden, init_cell = generate_init_data()
        for batch_id, batch in enumerate(train_data_iter):
            input_data_feed = prepare_input(
                batch,
                init_hidden=init_hidden,
                init_cell=init_cell,
                epoch_id=epoch_id,
                with_lr=True,
                device_count=device_count)
            batch_start_time = time.time()
            fetch_outs = exe.run(train_program,
github PaddlePaddle / models / PaddleNLP / language_model / train.py View on Github external
def data_gen():
                data_iter_size = config.batch_size
                train_batches = reader.get_data_iter(train_data, data_iter_size,
                                                     config.num_steps)
                for batch in train_batches:
                    x, y = batch
                    x = x.reshape((-1, config.num_steps, 1))
                    y = y.reshape((-1, 1))
                    yield x, y
github PaddlePaddle / models / PaddleNLP / PaddleTextGEN / variational_seq2seq / train.py View on Github external
def eval(data):
        eval_data_iter = reader.get_data_iter(data, batch_size, mode='eval')
        total_loss = 0.0
        word_count = 0.0
        batch_count = 0.0
        for batch_id, batch in enumerate(eval_data_iter):
            input_data_feed, src_word_num, dec_word_sum = prepare_input(batch)
            fetch_outs = exe.run(inference_program,
                                 feed=input_data_feed,
                                 fetch_list=[loss.name],
                                 use_program_cache=False)

            cost_train = np.array(fetch_outs[0])

            total_loss += cost_train * batch_size
            word_count += dec_word_sum
            batch_count += batch_size
github PaddlePaddle / models / dygraph / ptb_lm / ptb_dy.py View on Github external
def eval(model, data):
            print("begion to eval")
            total_loss = 0.0
            iters = 0.0
            init_hidden_data = np.zeros(
                (num_layers, batch_size, hidden_size), dtype='float32')
            init_cell_data = np.zeros(
                (num_layers, batch_size, hidden_size), dtype='float32')

            model.eval()
            train_data_iter = reader.get_data_iter(data, batch_size, num_steps)
            for batch_id, batch in enumerate(train_data_iter):
                x_data, y_data = batch
                x_data = x_data.reshape((-1, num_steps))
                y_data = y_data.reshape((-1, 1))
                x = to_variable(x_data)
                y = to_variable(y_data)
                init_hidden = to_variable(init_hidden_data)
                init_cell = to_variable(init_cell_data)
                dy_loss, last_hidden, last_cell = ptb_model(x, y, init_hidden,
                                                            init_cell)

                out_loss = dy_loss.numpy()

                init_hidden_data = last_hidden.numpy()
                init_cell_data = last_cell.numpy()