Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def test_to_memory_to_disk(self):
field_loader, _ = db_generator.get_test_data_FieldLoader('train')
field_loader.to_memory = True
field_loader.to_memory = False
assert isinstance(field_loader.data, h5py._hl.dataset.Dataset)
_h5set(store, subgrp, k, v, path)
# Regular built-in types:
elif value is None:
grp.create_dataset(key, data=None, shape=None, dtype='f')
elif isinstance(value, (int, float, str, bool, array.array)):
grp[key] = value
elif isinstance(value, bytes):
grp[key] = numpy.bytes_(value)
# NumPy types
elif type(value).__module__ == numpy.__name__:
grp[key] = value
# h5py native types
elif isinstance(value, h5py._hl.dataset.Dataset):
grp[key] = value # Creates hard-link!
# Other types
else:
_load_pandas() # might be a pandas type
if _is_pandas_type(value):
_requires_tables()
store.close()
with _pandas.HDFStore(store._filename, mode='a') as store_:
store_[path] = value
store.open()
else:
grp[key] = value
warnings.warn(
"Storage for object of type '{}' appears to have succeeded, but this "
"type is not officially supported!".format(type(value)))
def load(h5_grp):
"""Load a HDF5 group recursively into a Python dictionary,
and return the dictionary.
"""
data = {}
for key in list(h5_grp.keys()):
h5py_class = h5_grp.get(key, getclass=True)
if h5py_class is h5py._hl.group.Group:
# print h5py_class, "Group"
subgrp = h5_grp[key]
val = load(subgrp)
elif h5py_class is h5py._hl.dataset.Dataset:
# print h5py_class, "Data"
val = (h5_grp[key])[()]
else:
# shouldn't be reached at all
raise ValueError
data[key] = val
for key in h5_grp.attrs:
data[key] = h5_grp.attrs[key]
return data
The value to use where there is no data.
"""
from .vds import VDSmap
# Encode filenames and dataset names appropriately.
sources = [VDSmap(vspace, filename_encode(file_name),
self._e(dset_name), src_space)
for (vspace, file_name, dset_name, src_space)
in layout.sources]
with phil:
dsid = dataset.make_new_virtual_dset(self, layout.shape,
sources=sources, dtype=layout.dtype,
maxshape=layout.maxshape, fillvalue=fillvalue)
dset = dataset.Dataset(dsid)
if name is not None:
self[name] = dset
return dset
def items(self):
""" Get a list of (name, Dataset) pairs with all scales on this
dimension.
"""
with phil:
scales = []
# H5DSiterate raises an error if there are no dimension scales,
# rather than iterating 0 times. See #483.
if len(self) > 0:
h5ds.iterate(self._id, self._dimension, scales.append, 0)
return [
(self._d(h5ds.get_scale_name(x)), Dataset(x))
for x in scales
]
def recursive_copy(self,src, dest, mode='max', n_frames=None):
print src, type(src)
self.copy_attributes(src, dest)
assert n_frames is not None, 'Need to provide n_frames'
assert type(src) in [h5py._hl.group.Group, h5py._hl.files.File]
for key in src:
if type(src[key]) in [h5py._hl.group.Group, h5py._hl.files.File]:
dest_child = dest.create_group(key)
self.recursive_copy(src[key], dest_child,mode=mode,n_frames=n_frames)
elif type(src[key]) is h5py._hl.dataset.Dataset:
dataset = src[key]
print key, dataset.shape
if dataset.shape == (n_frames, 8192, 128):
if dataset.name != "/entry_1/data_1/data":
print "Skipping data block", dataset.name
continue
print '====================================='
if mode == 'max':
dmax = np.zeros((8192, 128))
dmax[:] = -np.inf
for i in range(n_frames):
frame = dataset[i]
dmax = np.maximum(dmax, frame)
result = dmax.reshape(1 ,8192, 128)
elif mode == 'mean':
dsum = np.zeros((8192, 128))
global default h5.get_config().track_order.
external
(Iterable of tuples) Sets the external storage property, thus
designating that the dataset will be stored in one or more
non-HDF5 files external to the HDF5 file. Adds each tuple
of (name, offset, size) to the dataset's list of external files.
Each name must be a str, bytes, or os.PathLike; each offset and
size, an integer. If only a name is given instead of an iterable
of tuples, it is equivalent to [(name, 0, h5py.h5f.UNLIMITED)].
"""
if 'track_order' not in kwds:
kwds['track_order'] = h5.get_config().track_order
with phil:
dsid = dataset.make_new_dset(self, shape, dtype, data, **kwds)
dset = dataset.Dataset(dsid)
if name is not None:
self[name] = dset
return dset
def check_dataset_type(val, name='The hdf5 dataset', allow_none=False, print_value=True, location=''):
"""
Check if the given value is an hdf5 dataset. And also check if the val is None.
:param val: the given value to check
:param name: name of val
:param print_value: whether or not to print the value name in case of error
:param location: The location of the potential hdf5 value to check
:param allow_none: whether the val is allowed to be None
:raise TypeError: if val is not of expected type
:raise ValueError: if val is None while not allow None
"""
none_msg = name + ' was not found in the hdf5 file at its location ' + location
return check_type_value(val, name, h5py._hl.dataset.Dataset,
allow_none=allow_none, print_value=print_value, none_msg=none_msg)
def upload(uid, filter_metadata=False, collapse_duplicates=True):
# Load HDF5 File
h5 = '/download/{uid}.h5'.format(**locals())
with open(h5, 'wb') as openfile:
openfile.write(urllib.request.urlopen('https://storage.googleapis.com/jupyter-notebook-generator-user-data/{uid}/{uid}.h5'.format(**locals())).read())
f = h5py.File(h5, 'r')
# Get data
rawcount_dataframe = pd.DataFrame(data=f['data']['expression'].value, index=[x for x in f['meta']['gene']['symbol'].value], columns=[x for x in f['meta']['sample']['Sample'].value])
sample_metadata_dataframe = pd.DataFrame({key: [x for x in value.value] if type(value) == h5py._hl.dataset.Dataset else [x for x in [y for y in value.items()][0][1].value] for key, value in f['meta']['sample'].items()}).set_index('Sample')#, drop=False).rename(columns={'Sample': 'Sample Title'})
# Filter
if filter_metadata:
for column in sample_metadata_dataframe.columns:
unique_vals = list(set(sample_metadata_dataframe[column]))
if len(unique_vals) == 1 or any([len(x) > 20 for x in unique_vals]):
sample_metadata_dataframe.drop(column, axis=1, inplace=True)
# Collapse duplicates
if collapse_duplicates and any(rawcount_dataframe.index.duplicated()):
try:
rawcount_dataframe = rawcount_dataframe.fillna(0).reset_index().groupby('index').sum()
except:
pass
data = {'rawdata': rawcount_dataframe, 'sample_metadata': sample_metadata_dataframe, 'dataset_metadata': {'source': 'upload', 'datatype': 'rnaseq'}}
def _recursively_load_dict_contents_from_group(cls, hdf_file=None, path=None):
"""
Loads structure dataset which has form of Python dictionary.
:param hdf_file: hdf file object from which dataset is loaded
:param path: path to dataset in hdf file
:returns: dictionary which was loaded from hdf file
"""
ans = {}
for key, item in hdf_file[path].items():
# noinspection PyUnresolvedReferences,PyProtectedMember,PyProtectedMember
if isinstance(item, h5py._hl.dataset.Dataset):
ans[key] = item.value
elif isinstance(item, h5py._hl.group.Group):
ans[key] = cls._recursively_load_dict_contents_from_group(hdf_file, path + key + '/')
return ans