Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def check_for_dataset(nodes, var_names):
"""
A function to check for datasets in an hdf file and collect them
"""
import h5py
for node in nodes:
if isinstance(node, h5py._hl.dataset.Dataset):
#-------------------------------------------------------------------------------------------------------
# if the name of the dataset (without grp/subgrp name) is one of the supplied variable names of interest,
# update the dictionary for relevant datasets
#-------------------------------------------------------------------------------------------------------
dataset_name = node.name.rsplit('/', 1)[1]
dataset = node
if dataset_name in var_names:
dict_of_h5py_datasets[dataset_name] = dataset
elif isinstance( node , h5py._hl.group.Group):
check_for_dataset(node.values(), var_names)
''' Read an hdf5 group '''
Hinst=ReadInto()
NamesList=[]
Grp.visit(NamesList.append)
### identify higher level
MainLev=[]
for name in NamesList:
if '/' not in name: MainLev.append(name)
### Loop through higher level
for name in MainLev:
# sub-group
if type(Grp[name]) is h5py._hl.group.Group:
#print('adding subclass %s' %name)
Ginst=read_group_as_class(Grp[name])
setattr(Hinst,name,Ginst)
else:
#print('adding attribute %s' %name)
setattr(Hinst,name,Grp[name].value)
return Hinst
Loads one structured dataset.
:param hdf_file: hdf file object from which structured dataset should be loaded.
:param name: name of dataset
:param group: name of the main group
:returns: loaded dataset
"""
if not isinstance(name, str):
raise ValueError("Invalid name of the dataset.")
if name in group:
hdf_group = group[name]
else:
raise ValueError("Invalid name of the dataset.")
# noinspection PyUnresolvedReferences,PyProtectedMember
if isinstance(hdf_group, h5py._hl.dataset.Dataset):
return hdf_group.value
elif all([self._get_subgrp_name(hdf_group[el].name).isdigit() for el in hdf_group.keys()]):
structured_dataset_list = []
# here we make an assumption about keys which have a numeric values; we assume that always : 1, 2, 3... Max
num_keys = len(hdf_group.keys())
for item in range(num_keys):
structured_dataset_list.append(
self._recursively_load_dict_contents_from_group(hdf_file=hdf_file,
path=hdf_group.name + "/%s" % item))
return self._convert_unicode_to_str(structured_dataset_list)
else:
return self._convert_unicode_to_str(
self._recursively_load_dict_contents_from_group(hdf_file=hdf_file,
path=hdf_group.name + "/"))
def recursively_load_dict_contents_from_group(h5file, path):
"""
....
"""
ans = {}
for key, item in h5file[path].items():
try:
kdict = int(key)
except ValueError:
try:
kdict = float(key)
except ValueError:
kdict = key
if isinstance(item, h5py._hl.dataset.Dataset):
ans[kdict] = item[()]
elif isinstance(item, h5py._hl.group.Group):
ans[kdict] = recursively_load_dict_contents_from_group(h5file, path + key + '/')
return ans
def _load(py_container, h_group):
""" Load a hickle file
Recursive funnction to load hdf5 data into a PyContainer()
Args:
py_container (PyContainer): Python container to load data into
h_group (h5 group or dataset): h5py object, group or dataset, to spider
and load all datasets.
"""
group_dtype = h5._hl.group.Group
dataset_dtype = h5._hl.dataset.Dataset
#either a file, group, or dataset
if isinstance(h_group, (H5FileWrapper, group_dtype)):
py_subcontainer = PyContainer()
try:
py_subcontainer.container_type = bytes(h_group.attrs['type'][0])
except KeyError:
raise
#py_subcontainer.container_type = ''
py_subcontainer.name = h_group.name
if py_subcontainer.container_type == b'dict_item':
py_subcontainer.key_type = h_group.attrs['key_type']
def _recursively_load_dict_contents_from_group(cls, hdf_file=None, path=None):
"""
Loads structure dataset which has form of Python dictionary.
:param hdf_file: hdf file object from which dataset is loaded
:param path: path to dataset in hdf file
:returns: dictionary which was loaded from hdf file
"""
ans = {}
for key, item in hdf_file[path].items():
# noinspection PyUnresolvedReferences,PyProtectedMember,PyProtectedMember
if isinstance(item, h5py._hl.dataset.Dataset):
ans[key] = item.value
elif isinstance(item, h5py._hl.group.Group):
ans[key] = cls._recursively_load_dict_contents_from_group(hdf_file, path + key + '/')
return ans
Args:
h5file: hdf5 object
hdf5 file where to store the dictionary
path: str
path within the hdf5 file structure
dic: dictionary
dictionary to save
'''
# argument type checking
if not isinstance(dic, dict):
raise ValueError("must provide a dictionary")
if not isinstance(path, str):
raise ValueError("path must be a string")
if not isinstance(h5file, h5py._hl.files.File):
raise ValueError("must be an open h5py file")
# save items to the hdf5 file
for key, item in dic.items():
key = str(key)
if key == 'g':
logging.info(key + ' is an object type')
item = np.array(list(item))
if key == 'g_tot':
item = np.asarray(item, dtype=np.float)
if key in ['groups', 'idx_tot', 'ind_A', 'Ab_epoch', 'coordinates',
'loaded_model', 'optional_outputs', 'merged_ROIs', 'tf_in',
'tf_out']:
logging.info(['groups', 'idx_tot', 'ind_A', 'Ab_epoch', 'coordinates', 'loaded_model', 'optional_outputs', 'merged_ROIs',
'** not saved'])
def file_opener(f, mode='r', track_times=True):
""" A file opener helper function with some error handling. This can open
files through a file object, a h5py file, or just the filename. """
# Were we handed a file object or just a file name string?
if isinstance(f, file):
filename, mode = f.name, f.mode
f.close()
h5f = h5.File(filename, mode)
elif isinstance(f, str) or isinstance(f, unicode):
filename = f
h5f = h5.File(filename, mode)
elif isinstance(f, H5FileWrapper) or isinstance(f, h5._hl.files.File):
try:
filename = f.filename
except ValueError:
raise ClosedFileError()
h5f = f
else:
print type(f)
raise FileError
h5f.__class__ = H5FileWrapper
h5f.track_times = track_times
return h5f
def _load(py_container, h_group):
""" Load a hickle file """
group_dtype = h5._hl.group.Group
dataset_dtype = h5._hl.dataset.Dataset
#either a file, group, or dataset
if isinstance(h_group, H5FileWrapper) or isinstance(h_group, group_dtype):
py_subcontainer = PyContainer()
py_subcontainer.container_type = h_group.attrs['type'][0]
py_subcontainer.name = h_group.name
if py_subcontainer.container_type != 'dict':
h_keys = sort_keys(h_group.keys())
else:
h_keys = h_group.keys()
for h_name in h_keys:
h_node = h_group[h_name]
py_subcontainer = _load(py_subcontainer, h_node)