Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
max_positions=1024, convolutions=((512, 3),) * 20, dropout=0.1):
super().__init__(dictionary)
self.dropout = dropout
self.num_attention_layers = None
num_embeddings = len(dictionary)
padding_idx = dictionary.pad()
self.embed_tokens = Embedding(num_embeddings, embed_dim, padding_idx)
if embed_dict:
self.embed_tokens = utils.load_embedding(embed_dict, self.dictionary, self.embed_tokens)
self.embed_positions = PositionalEmbedding(
max_positions,
embed_dim,
padding_idx,
left_pad=LanguagePairDataset.LEFT_PAD_SOURCE,
)
in_channels = convolutions[0][0]
self.fc1 = Linear(embed_dim, in_channels, dropout=dropout)
self.projections = nn.ModuleList()
self.convolutions = nn.ModuleList()
for (out_channels, kernel_size) in convolutions:
self.projections.append(Linear(in_channels, out_channels)
if in_channels != out_channels else None)
if kernel_size % 2 == 1:
padding = kernel_size // 2
else:
padding = 0
self.convolutions.append(
ConvTBC(in_channels, out_channels * 2, kernel_size,
dropout=dropout, padding=padding)
# B batch size
# K target dimension size
feature = torch.randn(B, T, D)
# this (B, T, D) layout is just a convention, you can override it by
# write your own _prepare_forward_input function
src_lengths = torch.from_numpy(
np.random.randint(low=1, high=T, size=B).astype(np.int64)
)
src_lengths[0] = T # make sure the maximum length matches
prev_output_tokens = []
for b in range(B):
token_length = np.random.randint(low=1, high=src_lengths[b].item() + 1)
tokens = np.random.randint(low=0, high=K, size=token_length)
prev_output_tokens.append(torch.from_numpy(tokens))
prev_output_tokens = fairseq_data_utils.collate_tokens(
prev_output_tokens,
pad_idx=1,
eos_idx=2,
left_pad=False,
move_eos_to_beginning=False,
)
src_lengths, sorted_order = src_lengths.sort(descending=True)
forward_input["src_tokens"] = feature.index_select(0, sorted_order)
forward_input["src_lengths"] = src_lengths
forward_input["prev_output_tokens"] = prev_output_tokens
return forward_input
def get_trainer_and_epoch_itr(epoch, epoch_size, num_updates, iterations_in_epoch):
tokens = torch.LongTensor(list(range(epoch_size))).view(1, -1)
tokens_ds = data.TokenBlockDataset(
tokens, sizes=[tokens.size(-1)], block_size=1, pad=0, eos=1, include_targets=False,
)
trainer = mock_trainer(epoch, num_updates, iterations_in_epoch)
dataset = data.LanguagePairDataset(tokens_ds, tokens_ds.sizes, mock_dict(), shuffle=False)
epoch_itr = data.EpochBatchIterator(
dataset=dataset,
collate_fn=dataset.collater,
batch_sampler=[[i] for i in range(epoch_size)],
)
return trainer, epoch_itr
def train(args, trainer, task, epoch_itr):
"""Train the model for one epoch."""
# Update parameters every N batches
update_freq = args.update_freq[epoch_itr.epoch - 1] \
if epoch_itr.epoch <= len(args.update_freq) else args.update_freq[-1]
# Initialize data iterator
itr = epoch_itr.next_epoch_itr(
fix_batches_to_gpus=args.fix_batches_to_gpus,
shuffle=(epoch_itr.epoch >= args.curriculum),
)
itr = iterators.GroupedIterator(itr, update_freq)
progress = progress_bar.build_progress_bar(
args, itr, epoch_itr.epoch, no_progress_bar='simple',
)
extra_meters = collections.defaultdict(lambda: AverageMeter())
valid_subsets = args.valid_subset.split(',')
max_update = args.max_update or math.inf
for i, samples in enumerate(progress, start=epoch_itr.iterations_in_epoch):
log_output = trainer.train_step(samples)
if log_output is None:
continue
# log mid-epoch stats
stats = get_training_stats(trainer)
for k, v in log_output.items():
if k in ['loss', 'nll_loss', 'ntokens', 'nsentences', 'sample_size']:
continue # these are already logged above
def train(args, trainer, task, epoch_itr):
"""Train the model for one epoch."""
# Update parameters every N batches
update_freq = args.update_freq[epoch_itr.epoch - 1] \
if epoch_itr.epoch <= len(args.update_freq) else args.update_freq[-1]
# Initialize data iterator
itr = epoch_itr.next_epoch_itr(
fix_batches_to_gpus=args.fix_batches_to_gpus,
shuffle=(epoch_itr.epoch >= args.curriculum),
)
itr = iterators.GroupedIterator(itr, update_freq)
progress = progress_bar.build_progress_bar(
args, itr, epoch_itr.epoch, no_progress_bar='simple',
)
extra_meters = collections.defaultdict(lambda: AverageMeter())
valid_subsets = args.valid_subset.split(',')
max_update = args.max_update or math.inf
for i, samples in enumerate(progress, start=epoch_itr.iterations_in_epoch):
log_output = trainer.train_step(samples)
if log_output is None:
continue
# log mid-epoch stats
stats = get_training_stats(trainer)
for k, v in log_output.items():
if k in ['loss', 'nll_loss', 'ntokens', 'nsentences', 'sample_size']:
continue # these are already logged above
def train(args, trainer, task, epoch_itr):
"""Train the model for one epoch."""
# Update parameters every N batches
update_freq = args.update_freq[epoch_itr.epoch - 1] \
if epoch_itr.epoch <= len(args.update_freq) else args.update_freq[-1]
# Initialize data iterator
itr = epoch_itr.next_epoch_itr(
fix_batches_to_gpus=args.fix_batches_to_gpus,
shuffle=(epoch_itr.epoch >= args.curriculum),
)
itr = iterators.GroupedIterator(itr, update_freq)
progress = progress_bar.build_progress_bar(
args, itr, epoch_itr.epoch, no_progress_bar='simple',
)
extra_meters = collections.defaultdict(lambda: AverageMeter())
valid_subsets = args.valid_subset.split(',')
max_update = args.max_update or math.inf
for i, samples in enumerate(progress, start=epoch_itr.iterations_in_epoch):
log_output = trainer.train_step(samples)
if log_output is None:
continue
# log mid-epoch stats
stats = get_training_stats(trainer)
for k, v in log_output.items():
if k in ['loss', 'nll_loss', 'ntokens', 'nsentences', 'sample_size']:
continue # these are already logged above
def prepare(cls, args, **kwargs):
args.left_pad_source = options.eval_bool(args.left_pad_source)
args.left_pad_target = options.eval_bool(args.left_pad_target)
args.lang_tok = options.eval_bool(args.lang_tok)
s = args.word_mask_keep_rand.split(',')
s = [float(x) for x in s]
setattr(args, 'word_mask', s[0])
setattr(args, 'word_rand', s[1])
setattr(args, 'word_keep', s[2])
setattr(args, 'pred_probs', torch.FloatTensor([s[0], s[1], s[2]]))
args.langs = sorted(args.langs.split(','))
args.source_langs = sorted(args.source_langs.split(','))
args.target_langs = sorted(args.target_langs.split(','))
for lang in args.source_langs:
assert lang in args.langs
for lang in args.target_langs:
assert lang in args.langs
# Initialize CUDA and distributed training
if torch.cuda.is_available() and not args.cpu:
torch.cuda.set_device(args.device_id)
np.random.seed(args.seed)
torch.manual_seed(args.seed)
if init_distributed:
args.distributed_rank = distributed_utils.distributed_init(args)
if distributed_utils.is_master(args):
checkpoint_utils.verify_checkpoint_directory(args.save_dir)
# Print args
print(args)
# Setup task, e.g., translation, language modeling, etc.
task = tasks.setup_task(args)
# Load valid dataset (we load training data below, based on the latest checkpoint)
for valid_sub_split in args.valid_subset.split(','):
task.load_dataset(valid_sub_split, combine=False, epoch=0)
# Build model and criterion
model = task.build_model(args)
criterion = task.build_criterion(args)
print(model)
print('| model {}, criterion {}'.format(args.arch, criterion.__class__.__name__))
print('| num. model params: {} (num. trained: {})'.format(
sum(p.numel() for p in model.parameters()),
sum(p.numel() for p in model.parameters() if p.requires_grad),
))
# Build trainer
# Initialize CUDA and distributed training
if torch.cuda.is_available() and not args.cpu:
torch.cuda.set_device(args.device_id)
np.random.seed(args.seed)
torch.manual_seed(args.seed)
if init_distributed:
args.distributed_rank = distributed_utils.distributed_init(args)
if distributed_utils.is_master(args):
checkpoint_utils.verify_checkpoint_directory(args.save_dir)
# Print args
print(args)
# Setup task, e.g., translation, language modeling, etc.
task = tasks.setup_task(args)
# Load valid dataset (we load training data below, based on the latest checkpoint)
for valid_sub_split in args.valid_subset.split(','):
task.load_dataset(valid_sub_split, combine=False, epoch=0)
# Build model and criterion
model = task.build_model(args)
criterion = task.build_criterion(args)
print(model)
print('| model {}, criterion {}'.format(args.arch, criterion.__class__.__name__))
print('| num. model params: {} (num. trained: {})'.format(
sum(p.numel() for p in model.parameters()),
sum(p.numel() for p in model.parameters() if p.requires_grad),
))
# Build trainer
models, args = checkpoint_utils.load_model_ensemble(
parsed_args.path.split(':'),
arg_overrides=eval(parsed_args.model_overrides),
task=task,
)
for arg in vars(parsed_args).keys():
if arg not in {
'self_target', 'future_target', 'past_target', 'tokens_per_sample',
'output_size_dictionary', 'add_bos_token',
}:
setattr(args, arg, getattr(parsed_args, arg))
# reduce tokens per sample by the required context window size
args.tokens_per_sample -= args.context_window
task = tasks.setup_task(args)
# Load dataset splits
task.load_dataset(args.gen_subset)
dataset = task.dataset(args.gen_subset)
if args.context_window > 0:
dataset = LMContextWindowDataset(
dataset=dataset,
tokens_per_sample=args.tokens_per_sample,
context_window=args.context_window,
pad_idx=task.source_dictionary.pad(),
)
print('| {} {} {} examples'.format(args.data, args.gen_subset, len(dataset)))
# Optimize ensemble for generation and set the source and dest dicts on the model (required by scorer)
for model in models:
model.make_generation_fast_()