Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def test_cache_models():
cache_language_models = ['awd_lstm_lm_1150', 'awd_lstm_lm_600', 'standard_lstm_lm_200',
'standard_lstm_lm_650', 'standard_lstm_lm_1500']
datasets = ['wikitext-2']
for name in cache_language_models:
for dataset_name in datasets:
cache_cell = nlp.model.train.get_cache_model(name, dataset_name, window=1, theta=0.6,
lambdas=0.2)
outs, word_history, cache_history, hidden = cache_cell(mx.nd.arange(
10).reshape(10, 1), mx.nd.arange(10).reshape(10, 1), None, None)
print(cache_cell)
print("outs:")
print(outs)
print("word_history:")
print(word_history)
print("cache_history:")
print(cache_history)
def test_conv_encoder_nonhighway_forward(hybridize, mask):
encoder = model.ConvolutionalEncoder(embed_size=2, num_filters=(1, 1),
ngram_filter_sizes=(1, 2))
print(encoder)
encoder.initialize(init='One')
if hybridize:
encoder.hybridize()
inputs = mx.nd.array([[[.7, .8], [.1, 1.5], [.2, .3]], [[.5, .6], [.2, 2.5], [.4, 4]]])
if mask:
output = encoder(inputs, mx.nd.ones(inputs.shape[:-1]))
else:
output = encoder(inputs)
assert output.shape == (3, 2), output.shape
assert_almost_equal(output.asnumpy(),
mx.nd.array([[1.37, 1.42],
[1.49, 1.49],
[1.5, 1.5]]).asnumpy(),
decimal=2)
def test_csr_embedding(sparse_grad, hybridize):
token_to_idx = dict(hello=0, world=1)
embedding = nlp.model.train.CSREmbeddingModel(token_to_idx, 30,
sparse_grad=sparse_grad)
embedding.initialize()
if hybridize:
embedding.hybridize()
one_word_per_row = mx.nd.sparse.csr_matrix(
([1.0, 1.0],
([0, 1], [0, 1])), shape=(2, len(token_to_idx)), dtype=np.float32)
two_words_per_row = mx.nd.sparse.csr_matrix(
([1.0, 1.0],
([0, 0], [0, 1])), shape=(1, len(token_to_idx)), dtype=np.float32)
emb = embedding(one_word_per_row)
emb2 = embedding(two_words_per_row)
assert_allclose(emb.sum(axis=0, keepdims=True).asnumpy(), emb2.asnumpy())
assert_allclose(emb.asnumpy(), embedding[["hello", "world"]].asnumpy())
assert_allclose(emb[0].asnumpy(), embedding["hello"].asnumpy())
def test_highway_forward():
highway = model.Highway(input_size=2, num_layers=2)
print(highway)
highway.initialize()
inputs = mx.nd.ones((2, 3, 2))
output = highway(inputs)
print(output)
assert output.shape == (2, 3, 2), output.shape
def test_beam_search_score(length, alpha, K):
batch_size = 2
scorer = model.BeamSearchScorer(alpha=alpha, K=K)
scorer.hybridize()
sum_log_probs = mx.nd.zeros((batch_size,))
scores = mx.nd.zeros((batch_size,))
for step in range(1, length + 1):
log_probs = mx.nd.random.normal(0, 1, (batch_size, 1))
sum_log_probs += log_probs[:, 0]
scores = scorer(log_probs, scores, mx.nd.array([step]))[:, 0]
lp = (K + length) ** alpha / (K + 1) ** alpha
assert_allclose(scores.asnumpy(), sum_log_probs.asnumpy() / lp, 1E-5, 1E-5)
###############################################################################
# Build the model
###############################################################################
ntokens = len(vocab)
if args.weight_dropout > 0:
print('Use AWDRNN')
model = nlp.model.language_model.AWDRNN(args.model, len(vocab), args.emsize,
args.nhid, args.nlayers, args.tied,
args.dropout, args.weight_dropout, args.dropout_h,
args.dropout_i, args.dropout_e)
else:
model = nlp.model.language_model.StandardRNN(args.model, len(vocab), args.emsize,
args.nhid, args.nlayers, args.dropout, args.tied)
model.initialize(mx.init.Xavier(), ctx=context)
if args.optimizer == 'sgd':
trainer_params = {'learning_rate': args.lr,
'momentum': 0,
'wd': args.wd}
elif args.optimizer == 'adam':
trainer_params = {'learning_rate': args.lr,
'wd': args.wd,
'beta1': 0,
'beta2': 0.999,
'epsilon': 1e-9}
trainer = gluon.Trainer(model.collect_params(), args.optimizer, trainer_params)
formatter_class=argparse.ArgumentDefaultsHelpFormatter)
parser.add_argument('--model', type=str, default='bert_12_768_12',
choices=['bert_12_768_12', 'bert_24_1024_16'], help='BERT model name')
parser.add_argument('--dataset_name', type=str, default='scibert_scivocab_uncased',
help='Dataset name')
parser.add_argument('--pytorch_checkpoint_dir', type=str,
help='Path to Tensorflow checkpoint folder.')
parser.add_argument('--debug', action='store_true', help='debugging mode')
parser.add_argument('--out', default='gluon_to_pytorch_naming.json',
help='Output file to store gluon to pytorch name mapping.')
args = parser.parse_args()
logging.getLogger().setLevel(logging.DEBUG if args.debug else logging.INFO)
logging.info(args)
# Load Gluon Model
bert, vocab = nlp.model.get_model(args.model, dataset_name=args.dataset_name, pretrained=True)
parameters = bert._collect_params_with_prefix()
parameters = {k: v.data().asnumpy() for k, v in parameters.items()}
# Load PyTorch Model
pytorch_parameters = torch.load(os.path.join(args.pytorch_checkpoint_dir, 'pytorch_model.bin'),
map_location=lambda storage, loc: storage)
pytorch_vocab = tf_vocab_to_gluon_vocab(
load_text_vocab(os.path.join(args.pytorch_checkpoint_dir, 'vocab.txt')))
pytorch_parameters = {k: v.numpy() for k, v in pytorch_parameters.items()}
# Assert that vocabularies are equal
assert pytorch_vocab.idx_to_token == vocab.idx_to_token
mapping = dict()
for name, param in parameters.items():
def generate():
assert not args.lm_model.startswith('gpt2') or args.command != 'beam-search'
decoder, vocab = get_decoder_vocab(args.lm_model)
tokenizer, detokenizer = get_tokenizer(args.lm_model)
bos_str = args.bos
if not bos_str.startswith(' '):
bos_str = ' ' + bos_str
bos_tokens = tokenizer(bos_str)
bos_ids = vocab[bos_tokens]
eos_id = vocab[vocab.eos_token]
if args.command == 'random-sample':
print('Sampling Parameters: beam_size={}, temperature={}, use_top_k={}'\
.format(args.beam_size, args.temperature, args.use_top_k))
sampler = nlp.model.SequenceSampler(beam_size=args.beam_size,
decoder=decoder,
eos_id=eos_id,
max_length=args.max_length - len(bos_tokens),
temperature=args.temperature,
top_k=args.use_top_k)
else:
print('Beam Seach Parameters: beam_size={}, alpha={}, K={}'\
.format(args.beam_size, args.alpha, args.k))
scorer = nlp.model.BeamSearchScorer(alpha=args.alpha, K=args.k, from_logits=False)
sampler = nlp.model.BeamSearchSampler(beam_size=args.beam_size,
decoder=decoder,
eos_id=eos_id,
scorer=scorer,
max_length=args.max_length - len(bos_tokens))
inputs, begin_states = get_initial_input_state(decoder, bos_ids)
test_data = test_data[0:100]
print(args)
###############################################################################
# Build the model
###############################################################################
ntokens = len(vocab)
if args.weight_dropout > 0:
print('Use AWDRNN')
model_eval = nlp.model.AWDRNN(args.model, len(vocab), args.emsize, args.nhid, args.nlayers,
args.tied, args.dropout, args.weight_dropout,
args.dropout_h, args.dropout_i, args.dropout_e)
model = nlp.model.train.AWDRNN(args.model, len(vocab), args.emsize, args.nhid, args.nlayers,
args.tied, args.dropout, args.weight_dropout,
args.dropout_h, args.dropout_i, args.dropout_e)
else:
model_eval = nlp.model.StandardRNN(args.model, len(vocab), args.emsize,
args.nhid, args.nlayers, args.dropout, args.tied)
model = nlp.model.train.StandardRNN(args.model, len(vocab), args.emsize,
args.nhid, args.nlayers, args.dropout, args.tied)
model.initialize(mx.init.Xavier(), ctx=context)
model.hybridize(static_alloc=True)
print(model)
if args.optimizer == 'sgd':
max_seq_length : int, default 25
max length of each sequence
batch_size : int, default 256
batch size
"""
self.ctx = ctx
self.dtype = dtype
self.max_seq_length = max_seq_length
self.batch_size = batch_size
self.dataset_name = dataset_name
if params_path is not None:
# Don't download the pretrained models if we have a parameter path
pretrained = False
else:
pretrained = True
self.bert, self.vocab = gluonnlp.model.get_model(model,
dataset_name=self.dataset_name,
pretrained=pretrained,
ctx=self.ctx,
use_pooler=False,
use_decoder=False,
use_classifier=False)
self.bert.cast(self.dtype)
if params_path:
logger.info('Loading params from %s', params_path)
self.bert.load_parameters(params_path, ctx=ctx, ignore_extra=True)
lower = 'uncased' in self.dataset_name
self.tokenizer = BERTTokenizer(self.vocab, lower=lower)
self.transform = BERTSentenceTransform(tokenizer=self.tokenizer,