Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def get_or_build(path, build_fn, *args, **kwargs):
"""
Load from serialized form or build an object, saving the built
object.
Remaining arguments are provided to `build_fn`.
"""
save = False
obj = None
if path is not None and os.path.isfile(path):
with open(path, 'rb') as obj_f:
obj = msgpack.load(obj_f, use_list=False, encoding='utf-8')
else:
save = True
if obj is None:
obj = build_fn(*args, **kwargs)
if save and path is not None:
with open(path, 'wb') as obj_f:
msgpack.dump(obj, obj_f)
return obj
def load_data(opt):
with open('SQuAD/meta.msgpack', 'rb') as f:
meta = msgpack.load(f, encoding='utf8')
embedding = torch.Tensor(meta['embedding'])
opt['pretrained_words'] = True
opt['vocab_size'] = embedding.size(0)
opt['embedding_dim'] = embedding.size(1)
if not opt['fix_embeddings']:
embedding[1] = torch.normal(means=torch.zeros(opt['embedding_dim']), std=1.)
with open(args.data_file, 'rb') as f:
data = msgpack.load(f, encoding='utf8')
train_orig = pd.read_csv('SQuAD/train.csv')
dev_orig = pd.read_csv('SQuAD/dev.csv')
train = list(zip(
data['trn_context_ids'],
data['trn_context_features'],
data['trn_context_tags'],
data['trn_context_ents'],
data['trn_question_ids'],
def load_train_data(opt):
with open(os.path.join(args.train_dir, 'train_meta.msgpack'), 'rb') as f:
meta = msgpack.load(f, encoding='utf8')
embedding = torch.Tensor(meta['embedding'])
opt['vocab_size'] = embedding.size(0)
opt['embedding_dim'] = embedding.size(1)
with open(os.path.join(args.train_dir, 'train_data.msgpack'), 'rb') as f:
data = msgpack.load(f, encoding='utf8')
#data_orig = pd.read_csv(os.path.join(args.train_dir, 'train.csv'))
opt['num_features'] = len(data['context_features'][0][0])
train = {'context': list(zip(
data['context_ids'],
data['context_tags'],
data['context_ents'],
data['context'],
data['context_span'],
print(sensor_sample_hist)
for name in sensor_samples.keys():
sensor_samples[name] = [s for s in sensor_samples[name] if abs(s[1] - fn_time) < max_sensor_radius]
update_sensor_count(sensor_samples, sensor_sample_hist)
sensor_types = {k: v for k, v in sensor_types.items() if k in sensor_samples}
yield str(fn_time * 1000), {'data:image': open(fn).read(),
'meta:filename': os.path.basename(fn),
'meta:sensor_samples': msgpack.dumps(sensor_samples),
'meta:sensor_types': msgpack.dumps(sensor_types),
'meta:time': msgpack.dumps(fn_time)}
sensor_samples = {}
sensor_types = {}
else:
try:
data = msgpack.load(open(fn))
except ValueError:
print('Could not parse [%s]' % fn)
continue
print(data)
for name, samples in data[3].items():
sensor_samples.setdefault(name, []).extend(samples)
for name, type_num in data[2].items():
sensor_types[name] = type_num
print(sensor_sample_hist)
def load_train_data(opt):
with open(os.path.join(args.train_dir, 'train_meta.msgpack'), 'rb') as f:
meta = msgpack.load(f, encoding='utf8')
embedding = torch.Tensor(meta['embedding'])
opt['vocab_size'] = embedding.size(0)
opt['embedding_dim'] = embedding.size(1)
with open(os.path.join(args.train_dir, 'train_data.msgpack'), 'rb') as f:
data = msgpack.load(f, encoding='utf8')
#data_orig = pd.read_csv(os.path.join(args.train_dir, 'train.csv'))
opt['num_features'] = len(data['context_features'][0][0])
train = {'context': list(zip(
data['context_ids'],
data['context_tags'],
data['context_ents'],
data['context'],
data['context_span'],
data['1st_question'],
data['context_tokenized'])),
'qa': list(zip(
data['question_CID'],
data['question_ids'],
data['context_features'],
def load(cls, f):
# Assumes everything is encoded in UTF-8.
# This means that if some records (e.g., config files, feature vector
# keys) are not encoded in UTF-8, the model cannot be loaded. However,
# such models cannot be written out to text or JSON, so we don't really
# care. Callers are responsible for handling UnicodeDecodeError.
values = msgpack.load(f, encoding='utf-8', unicode_errors='strict')
field_names = map(lambda x: x[0], cls.fields())
c = cls()
c.set(dict(zip(field_names, values)))
return c
def load(cls, filepath=None):
"""Load the contents of the given filepath.
If None, assume '/repos.msgpack'"""
if filepath is None:
filepath = os.path.join(config['current_snapshot'], 'repos.msgpack')
with open(filepath, 'rb') as f:
records = msgpack.load(f, object_hook=cls._loader, use_list=False)
return records
def load_data(opt):
with open('SQuAD/meta.msgpack', 'rb') as f:
meta = msgpack.load(f, encoding='utf8')
embedding = torch.Tensor(meta['embedding'])
opt['pretrained_words'] = True
opt['vocab_size'] = embedding.size(0)
opt['embedding_dim'] = embedding.size(1)
opt['pos_size'] = len(meta['vocab_tag'])
opt['ner_size'] = len(meta['vocab_ent'])
BatchGen.pos_size = opt['pos_size']
BatchGen.ner_size = opt['ner_size']
with open(opt['data_file'], 'rb') as f:
data = msgpack.load(f, encoding='utf8')
train = data['train']
data['dev'].sort(key=lambda x: len(x[1]))
dev = [x[:-1] for x in data['dev']]
dev_y = [x[-1] for x in data['dev']]
return train, dev, dev_y, embedding, opt
def parse(self, stream, media_type=None, parser_context=None):
try:
return msgpack.load(stream,
use_list=True,
encoding="utf-8",
object_hook=MessagePackDecoder().decode)
except Exception as exc:
raise ParseError('MessagePack parse error - %s' % text_type(exc))