How to use the pycorrector.seq2seq_attention.data_reader.str2id function in pycorrector

To help you get started, we’ve selected a few pycorrector examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github shibing624 / pycorrector / pycorrector / seq2seq_attention / train.py View on Github external
def get_validation_data(input_texts, target_texts, vocab2id, maxlen=400):
    # 数据生成器
    X, Y = [], []
    for i in range(len(input_texts)):
        X.append(str2id(input_texts[i], vocab2id, maxlen))
        Y.append([vocab2id[GO_TOKEN]] + str2id(target_texts[i], vocab2id, maxlen) + [vocab2id[EOS_TOKEN]])
        X = np.array(padding(X, vocab2id))
        Y = np.array(padding(Y, vocab2id))
        return [X, Y], None
github shibing624 / pycorrector / pycorrector / seq2seq_attention / train.py View on Github external
def data_generator(input_texts, target_texts, vocab2id, batch_size, maxlen=400):
    # 数据生成器
    while True:
        X, Y = [], []
        for i in range(len(input_texts)):
            X.append(str2id(input_texts[i], vocab2id, maxlen))
            Y.append([vocab2id[GO_TOKEN]] + str2id(target_texts[i], vocab2id, maxlen) + [vocab2id[EOS_TOKEN]])
            if len(X) == batch_size:
                X = np.array(padding(X, vocab2id))
                Y = np.array(padding(Y, vocab2id))
                yield [X, Y], None
                X, Y = [], []
github shibing624 / pycorrector / pycorrector / seq2seq_attention / evaluate.py View on Github external
def gen_target(input_text, model, vocab2id, id2vocab, maxlen=400, topk=3, max_target_len=50):
    """beam search解码
    每次只保留topk个最优候选结果;如果topk=1,那么就是贪心搜索
    """
    xid = np.array([str2id(input_text, vocab2id, maxlen)] * topk)  # 输入转id
    yid = np.array([[vocab2id[GO_TOKEN]]] * topk)  # 解码均以GO开始
    scores = [0] * topk  # 候选答案分数
    for i in range(max_target_len):  # 强制要求target不超过maxlen字
        proba = model.predict([xid, yid])[:, i, :]  # 预测
        log_proba = np.log(proba + 1e-6)  # 取对数,方便计算
        arg_topk = log_proba.argsort(axis=1)[:, -topk:]  # 每一项选出topk
        _yid = []  # 暂存的候选目标序列
        _scores = []  # 暂存的候选目标序列得分
        if i == 0:
            for j in range(topk):
                _yid.append(list(yid[j]) + [arg_topk[0][j]])
                _scores.append(scores[j] + log_proba[0][arg_topk[0][j]])
        else:
            for j in range(len(xid)):
                for k in range(topk):  # 遍历topk*topk的组合
                    _yid.append(list(yid[j]) + [arg_topk[j][k]])
github shibing624 / pycorrector / pycorrector / seq2seq_attention / train.py View on Github external
def get_validation_data(input_texts, target_texts, vocab2id, maxlen=400):
    # 数据生成器
    X, Y = [], []
    for i in range(len(input_texts)):
        X.append(str2id(input_texts[i], vocab2id, maxlen))
        Y.append([vocab2id[GO_TOKEN]] + str2id(target_texts[i], vocab2id, maxlen) + [vocab2id[EOS_TOKEN]])
        X = np.array(padding(X, vocab2id))
        Y = np.array(padding(Y, vocab2id))
        return [X, Y], None
github shibing624 / pycorrector / pycorrector / seq2seq_attention / train.py View on Github external
def data_generator(input_texts, target_texts, vocab2id, batch_size, maxlen=400):
    # 数据生成器
    while True:
        X, Y = [], []
        for i in range(len(input_texts)):
            X.append(str2id(input_texts[i], vocab2id, maxlen))
            Y.append([vocab2id[GO_TOKEN]] + str2id(target_texts[i], vocab2id, maxlen) + [vocab2id[EOS_TOKEN]])
            if len(X) == batch_size:
                X = np.array(padding(X, vocab2id))
                Y = np.array(padding(Y, vocab2id))
                yield [X, Y], None
                X, Y = [], []