Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def get_validation_data(input_texts, target_texts, vocab2id, maxlen=400):
# 数据生成器
X, Y = [], []
for i in range(len(input_texts)):
X.append(str2id(input_texts[i], vocab2id, maxlen))
Y.append([vocab2id[GO_TOKEN]] + str2id(target_texts[i], vocab2id, maxlen) + [vocab2id[EOS_TOKEN]])
X = np.array(padding(X, vocab2id))
Y = np.array(padding(Y, vocab2id))
return [X, Y], None
def data_generator(input_texts, target_texts, vocab2id, batch_size, maxlen=400):
# 数据生成器
while True:
X, Y = [], []
for i in range(len(input_texts)):
X.append(str2id(input_texts[i], vocab2id, maxlen))
Y.append([vocab2id[GO_TOKEN]] + str2id(target_texts[i], vocab2id, maxlen) + [vocab2id[EOS_TOKEN]])
if len(X) == batch_size:
X = np.array(padding(X, vocab2id))
Y = np.array(padding(Y, vocab2id))
yield [X, Y], None
X, Y = [], []
def gen_target(input_text, model, vocab2id, id2vocab, maxlen=400, topk=3, max_target_len=50):
"""beam search解码
每次只保留topk个最优候选结果;如果topk=1,那么就是贪心搜索
"""
xid = np.array([str2id(input_text, vocab2id, maxlen)] * topk) # 输入转id
yid = np.array([[vocab2id[GO_TOKEN]]] * topk) # 解码均以GO开始
scores = [0] * topk # 候选答案分数
for i in range(max_target_len): # 强制要求target不超过maxlen字
proba = model.predict([xid, yid])[:, i, :] # 预测
log_proba = np.log(proba + 1e-6) # 取对数,方便计算
arg_topk = log_proba.argsort(axis=1)[:, -topk:] # 每一项选出topk
_yid = [] # 暂存的候选目标序列
_scores = [] # 暂存的候选目标序列得分
if i == 0:
for j in range(topk):
_yid.append(list(yid[j]) + [arg_topk[0][j]])
_scores.append(scores[j] + log_proba[0][arg_topk[0][j]])
else:
for j in range(len(xid)):
for k in range(topk): # 遍历topk*topk的组合
_yid.append(list(yid[j]) + [arg_topk[j][k]])
def get_validation_data(input_texts, target_texts, vocab2id, maxlen=400):
# 数据生成器
X, Y = [], []
for i in range(len(input_texts)):
X.append(str2id(input_texts[i], vocab2id, maxlen))
Y.append([vocab2id[GO_TOKEN]] + str2id(target_texts[i], vocab2id, maxlen) + [vocab2id[EOS_TOKEN]])
X = np.array(padding(X, vocab2id))
Y = np.array(padding(Y, vocab2id))
return [X, Y], None
def data_generator(input_texts, target_texts, vocab2id, batch_size, maxlen=400):
# 数据生成器
while True:
X, Y = [], []
for i in range(len(input_texts)):
X.append(str2id(input_texts[i], vocab2id, maxlen))
Y.append([vocab2id[GO_TOKEN]] + str2id(target_texts[i], vocab2id, maxlen) + [vocab2id[EOS_TOKEN]])
if len(X) == batch_size:
X = np.array(padding(X, vocab2id))
Y = np.array(padding(Y, vocab2id))
yield [X, Y], None
X, Y = [], []