Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
import numpy as np
from common.optimizer import SGD
from dataset import ptb
from simple_rnnlm import SimpleRnnlm
# ハイパーパラメータの設定
batch_size = 10
wordvec_size = 100
hidden_size = 100
time_size = 5 # Truncated BPTTの展開する時間サイズ
lr = 0.1
max_epoch = 100
# 学習データの読み込み(データセットを小さくする)
corpus, word_to_id, id_to_word = ptb.load_data('train')
corpus_size = 1000
corpus = corpus[:corpus_size]
vocab_size = int(max(corpus) + 1)
xs = corpus[:-1] # 入力
ts = corpus[1:] # 出力(教師ラベル)
data_size = len(xs)
print('corpus size: %d, vocabulary size: %d' % (corpus_size, vocab_size))
# 学習時に使用する変数
max_iters = data_size // (batch_size * time_size)
time_idx = 0
total_loss = 0
loss_count = 0
ppl_list = []
def run(args):
reader_kwargs = {
"frame_length": args.frame_length,
"frame_shift": args.frame_shift,
"window": args.window,
"center": False,
"apply_abs": True,
"apply_log": args.apply_log,
"apply_pow": args.apply_pow
}
num_bins = nfft(args.frame_length) // 2 + 1
reader = SpectrogramReader(args.wave_scp, **reader_kwargs)
mean = np.zeros(num_bins)
std = np.zeros(num_bins)
num_frames = 0
# D(X) = E(X^2) - E(X)^2
for _, spectrogram in tqdm.tqdm(reader):
num_frames += spectrogram.shape[0]
mean += np.sum(spectrogram, 0)
std += np.sum(spectrogram**2, 0)
mean = mean / num_frames
std = np.sqrt(std / num_frames - mean**2)
with open(args.cmvn_dst, "wb") as f:
cmvn_dict = {"mean": mean, "std": std}
pickle.dump(cmvn_dict, f)
print("Totally processed {} frames".format(num_frames))
print("Global mean: {}".format(mean))
print("Global std: {}".format(std))
def init(bot, testing=False):
""" Initialize updater """
global DATABASE
global config
global botref
global updater
global logger
if testing:
DATABASE = dataset.connect("sqlite:///:memory:")
else:
DATABASE = dataset.connect("sqlite:///databases/rss.db")
logger.info("RSS module initialized")
botref = bot
config = bot.config.get("rss", {})
finalize()
# As there's no signal if this is a rehash or restart
# update feeds in 30 seconds
updater = callLater(30, update_feeds)
def inference(flags):
print_out("inference data file {0}".format(flags.data_files))
dataset = DataSet(flags.data_files, flags.vocab_file, flags.label_file, flags.batch_size, reverse=flags.reverse, split_word=flags.split_word, max_len=flags.max_len)
hparams = load_hparams(flags.checkpoint_dir,{"mode":'inference','checkpoint_dir':flags.checkpoint_dir+"/best_eval",'embed_file':None})
with tf.Session(config = get_config_proto(log_device_placement=False)) as sess:
model = Model(hparams)
model.build()
try:
model.restore_model(sess) #restore best solution
except Exception as e:
print("unable to restore model with exception",e)
exit(1)
scalars = model.scalars.eval(session=sess)
print("Scalars:", scalars)
weight = model.weight.eval(session=sess)
print("Weight:",weight)
cnt = 0
trg_vocab['stoi'] = load_vocab(opt.trg_vocab)
src_vocab['itos'] = invert_vocab(src_vocab['stoi'])
trg_vocab['itos'] = invert_vocab(trg_vocab['stoi'])
UNK = ''
SOS = ''
EOS = ''
PAD = ''
opt.enc_pad = src_vocab['stoi'][PAD]
opt.dec_sos = trg_vocab['stoi'][SOS]
opt.dec_eos = trg_vocab['stoi'][EOS]
opt.dec_pad = trg_vocab['stoi'][PAD]
opt.enc_ntok = len(src_vocab['stoi'])
opt.dec_ntok = len(trg_vocab['stoi'])
# load dataset for testing
test_dataset = dataset(opt.test_src, opt.test_trg)
test_iter = torch.utils.data.DataLoader(test_dataset, 1, shuffle=False, collate_fn=lambda x: zip(*x))
# create the model
model = getattr(model, opt.model)(opt).to(device)
state_dict = torch.load(os.path.join(opt.checkpoint, opt.name))
model.load_state_dict(state_dict)
model.eval()
def bleu_script(f):
ref_stem = opt.test_trg[0][:-1] + '*'
cmd = '{eval_script} {refs} {hyp}'.format(eval_script=opt.eval_script, refs=ref_stem, hyp=f)
p = subprocess.Popen(cmd.split(), stdout=subprocess.PIPE, stderr=subprocess.PIPE)
out, err = p.communicate()
if p.returncode > 0:
if use_cuda:
os.environ['CUDA_VISIBLE_DEVICES'] = gpus
torch.cuda.manual_seed(seed)
global model
model = Darknet(cfgfile)
#model.print_network()
init_width = model.width
init_height = model.height
kwargs = {'num_workers': num_workers, 'pin_memory': True} if use_cuda else {}
global test_loader
test_loader = torch.utils.data.DataLoader(
dataset.listDataset(testlist, shape=(init_width, init_height),
shuffle=False,
transform=transforms.Compose([
transforms.ToTensor(),
]), train=False),
batch_size=batch_size, shuffle=False, **kwargs)
if use_cuda:
if ngpus > 1:
model = torch.nn.DataParallel(model)
model = model.module
model = model.to(torch.device("cuda" if use_cuda else "cpu"))
for w in FLAGS.weights:
model.load_weights(w)
logging('evaluating ... %s' % (w))
test()
if torch.cuda.is_available() and not opt.cuda:
print("WARNING: You have a CUDA device, so you should probably run with --cuda")
train_dataset = dataset.listDataset(list_file =opt.trainlist)
assert train_dataset
if not opt.random_sample:
sampler = dataset.randomSequentialSampler(train_dataset, opt.batchSize)
else:
sampler = None
train_loader = torch.utils.data.DataLoader(
train_dataset, batch_size=opt.batchSize,
shuffle=False, sampler=sampler,
num_workers=int(opt.workers),
collate_fn=dataset.alignCollate(imgH=opt.imgH, imgW=opt.imgW, keep_ratio=opt.keep_ratio))
test_dataset = dataset.listDataset(list_file =opt.vallist, transform=dataset.resizeNormalize((100, 32)))
nclass = len(opt.alphabet.split(opt.sep))
nc = 1
converter = utils.strLabelConverterForAttention(opt.alphabet, opt.sep)
criterion = torch.nn.CrossEntropyLoss()
# custom weights initialization called on crnn
def weights_init(m):
classname = m.__class__.__name__
if classname.find('Conv') != -1:
m.weight.data.normal_(0.0, 0.02)
elif classname.find('BatchNorm') != -1:
m.weight.data.normal_(1.0, 0.02)
m.bias.data.fill_(0)
if torch.cuda.is_available() and not opt.cuda:
print("WARNING: You have a CUDA device, so you should probably run with --cuda")
train_dataset = dataset.lmdbDataset(root=opt.trainroot)
assert train_dataset
if not opt.random_sample:
sampler = dataset.randomSequentialSampler(train_dataset, opt.batchSize)
else:
sampler = None
train_loader = torch.utils.data.DataLoader(
train_dataset, batch_size=opt.batchSize,
shuffle=True, sampler=sampler,
num_workers=int(opt.workers),
collate_fn=dataset.alignCollate(imgH=opt.imgH, imgW=opt.imgW, keep_ratio=opt.keep_ratio))
test_dataset = dataset.lmdbDataset(
root=opt.valroot, transform=dataset.resizeNormalize((256, 32)))
test_dataset = dataset.lmdbDataset(
root=opt.valroot)
ngpu = int(opt.ngpu)
nh = int(opt.nh)
alphabet = opt.alphabet
nclass = len(alphabet) + 1
nc = 1
converter = utils.strLabelConverter(alphabet)
criterion = CTCLoss()
# custom weights initialization called on crnn
def weights_init(m):
if torch.cuda.is_available() and not opt.cuda:
print("WARNING: You have a CUDA device, so you should probably run with --cuda")
train_dataset = dataset.lmdbDataset(root=opt.trainroot)
assert train_dataset
if not opt.random_sample:
sampler = dataset.randomSequentialSampler(train_dataset, opt.batchSize)
else:
sampler = None
train_loader = torch.utils.data.DataLoader(
train_dataset, batch_size=opt.batchSize,
shuffle=True, sampler=sampler,
num_workers=int(opt.workers),
collate_fn=dataset.alignCollate(imgH=opt.imgH, imgW=opt.imgW, keep_ratio=opt.keep_ratio))
test_dataset = dataset.lmdbDataset(
root=opt.valroot, transform=dataset.resizeNormalize((100, 32)))
alphabet = opt.alphabet.decode('utf-8')
nclass = len(alphabet) + 1
nc = 1
converter = utils.strLabelConverter(alphabet)
criterion = CTCLoss()
# custom weights initialization called on crnn
def weights_init(m):
classname = m.__class__.__name__
if classname.find('Conv') != -1:
m.weight.data.normal_(0.0, 0.02)
elif classname.find('BatchNorm') != -1:
if torch.cuda.is_available() and not opt.cuda:
print("WARNING: You have a CUDA device, so you should probably run with --cuda")
train_dataset = dataset.lmdbDataset(root=opt.trainroot)
assert train_dataset
if not opt.random_sample:
sampler = dataset.randomSequentialSampler(train_dataset, opt.batchSize)
else:
sampler = None
train_loader = torch.utils.data.DataLoader(
train_dataset, batch_size=opt.batchSize,
shuffle=True, sampler=sampler,
num_workers=int(opt.workers),
collate_fn=dataset.alignCollate(imgH=opt.imgH, imgW=opt.imgW, keep_ratio=opt.keep_ratio))
test_dataset = dataset.lmdbDataset(
root=opt.valroot, transform=dataset.resizeNormalize((100, 32)))
nclass = len(opt.alphabet) + 1
nc = 1
converter = utils.strLabelConverter(opt.alphabet)
criterion = CTCLoss()
# custom weights initialization called on crnn
def weights_init(m):
classname = m.__class__.__name__
if classname.find('Conv') != -1:
m.weight.data.normal_(0.0, 0.02)
elif classname.find('BatchNorm') != -1:
m.weight.data.normal_(1.0, 0.02)
m.bias.data.fill_(0)