Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
internal_dim=2)
test_policy = EpsilonGreedyPolicy(learning_algo, env.nActions(), rng, 1.)
# --- Instantiate agent ---
agent = NeuralAgent(
env,
learning_algo,
parameters.replay_memory_size,
max(env.inputDimensions()[i][0] for i in range(len(env.inputDimensions()))),
parameters.batch_size,
rng,
test_policy=test_policy)
# --- Create unique filename for FindBestController ---
h = hash(vars(parameters), hash_name="sha1")
fname = "test_" + h
print("The parameters hash is: {}".format(h))
print("The parameters are: {}".format(parameters))
# As for the discount factor and the learning rate, one can update periodically the parameter of the epsilon-greedy
# policy implemented by the agent. This controllers has a bit more capabilities, as it allows one to choose more
# precisely when to update epsilon: after every X action, episode or epoch. This parameter can also be reset every
# episode or epoch (or never, hence the resetEvery='none').
agent.attach(bc.EpsilonController(
initial_e=parameters.epsilon_start,
e_decays=parameters.epsilon_decay,
e_min=parameters.epsilon_min,
evaluate_on='action',
periodicity=1,
reset_every='none'))
# Fit the model
estimator.fit(X, y)
# Compare the state of the model parameters with the original parameters
new_params = estimator.get_params()
for param_name, original_value in original_params.items():
new_value = new_params[param_name]
# We should never change or mutate the internal state of input
# parameters by default. To check this we use the joblib.hash function
# that introspects recursively any subobjects to compute a checksum.
# The only exception to this rule of immutable constructor parameters
# is possible RandomState instance but in this check we explicitly
# fixed the random_state params recursively to be integer seeds.
assert joblib.hash(new_value) == joblib.hash(original_value), (
"Estimator %s should not change or mutate "
" the parameter %s from %s to %s during fit."
% (name, param_name, original_value, new_value))
def _evaluate_one(**kwargs):
params = DEFAULT_PARAMS.copy()
params.update(kwargs)
params_digest = joblib.hash(params)
results = params.copy()
results['digest'] = params_digest
results_folder = Path('results')
results_folder.mkdir(exist_ok=True)
folder = results_folder.joinpath(params_digest)
folder.mkdir(exist_ok=True)
if len(list(folder.glob("*/results.json"))) == 4:
print('Skipping')
split_idx = params.get('split_idx', 0)
print("Evaluating model on split #%d:" % split_idx)
pprint(params)
ratings_train, ratings_test = train_test_split(
all_ratings, test_size=0.2, random_state=split_idx)
exclude_list: list or None
List of attributes to skip.
if None, skips ['metadata']
hash_type: {'sha1', 'md5'}
Algorithm to use for hashing. Must be valid joblib hash type
"""
if exclude_list is None:
exclude_list = ['metadata']
ret = {}
hashes = {}
for key, value in self.items():
if key in exclude_list:
continue
data_hash = joblib.hash(value, hash_name=hash_type)
hashes[key] = f"{hash_type}:{data_hash}"
ret["hashes"] = hashes
return ret
dephash_list.append(keyhashmap[taskelem])
except Exception:
# Else hash the object.
arghash_list.extend(recursive_hash(taskelem))
else:
try:
# Assume a dask graph key.
dephash_list.append(keyhashmap[task])
except Exception:
# Else hash the object.
arghash_list.extend(recursive_hash(task))
# Calculate subhashes
src_hash = joblib_hash("".join(fnhash_list))
arg_hash = joblib_hash("".join(arghash_list))
dep_hash = joblib_hash("".join(dephash_list))
subhashes = {"src": src_hash, "arg": arg_hash, "dep": dep_hash}
objhash = joblib_hash(src_hash + arg_hash + dep_hash)
return objhash, subhashes
exclude_list: list or None
List of attributes to skip.
if None, skips ['metadata']
hash_type: {'sha1', 'md5', 'sha256'}
Algorithm to use for hashing
"""
if exclude_list is None:
exclude_list = ['metadata']
ret = {'hash_type': hash_type}
for key, value in self.items():
if key in exclude_list:
continue
ret[f"{key}_hash"] = joblib.hash(value, hash_name=hash_type)
return ret
else:
dirname = os.path.expanduser('~/.config/neo_rawio_cache')
dirname = os.path.join(dirname, self.__class__.__name__)
if not os.path.exists(dirname):
os.makedirs(dirname)
elif cache_path == 'same_as_resource':
dirname = os.path.dirname(ressource_name)
else:
assert os.path.exists(cache_path), \
'cache_path do not exists use "home" or "same_as_file" to make this auto'
# the hash of the ressource (dir of file) is done with filename+datetime
# TODO make something more sofisticated when rawmode='one-dir' that use all filename and datetime
d = dict(ressource_name=ressource_name, mtime=os.path.getmtime(ressource_name))
hash = joblib.hash(d, hash_name='md5')
# name is compund by the real_n,ame and the hash
name = '{}_{}'.format(os.path.basename(ressource_name), hash)
self.cache_filename = os.path.join(dirname, name)
if os.path.exists(self.cache_filename):
self.logger.warning('Use existing cache file {}'.format(self.cache_filename))
self._cache = joblib.load(self.cache_filename)
else:
self.logger.warning('Create cache file {}'.format(self.cache_filename))
self._cache = {}
self.dump_cache()
output_dataset = f'{model_name}_exp_{dataset_name}_{run_number}'
os.makedirs(output_path, exist_ok=True)
dataset = Dataset.load(dataset_name)
model, model_meta = load_model(model_name)
# add experiment metadata
experiment = {
'model_name': model_name,
'dataset_name': dataset_name,
'run_number': run_number,
'hash_type': hash_type,
'input_data_hash': joblib.hash(dataset.data, hash_name=hash_type),
'input_target_hash': joblib.hash(dataset.target, hash_name=hash_type),
'model_hash': joblib.hash(model, hash_name=hash_type),
}
logger.debug(f"Predict: Applying {model_name} to {dataset_name}")
metadata_fq = output_path / f'{output_dataset}.metadata'
if metadata_fq.exists() and force is False:
cached_metadata = Dataset.load(output_dataset, data_path=output_path,
metadata_only=True)
if experiment.items() <= cached_metadata['experiment'].items():
logger.info("Experiment has already been run. Returning Cached Result")
return Dataset.load(output_dataset, data_path=output_path)
else:
raise Exception(f'An Experiment with name {output_dataset} exists already, '
'but metadata has changed. '
'Use `force=True` to overwrite, or change one of '
'`run_number` or `output_dataset`')
# pandify
start = time()
columns = pd.Index('%d' % col for col in range(jagged.shape[1]))
roundtripped = [pd.DataFrame(data, copy=False, columns=columns) for data in roundtripped]
measurements['pandify_time'] = (time() - start)
# sum (will give a small idea on the overhead of laziness / mmap)
measurements['before_sum_mem'] = available_ram()
start = time()
measurements['suma'] = float(np.sum([np.nansum(df['6']) for df in roundtripped]))
measurements['sum_time'] = time() - start
measurements['after_sum_mem'] = available_ram()
# get a checksum from the whole collection
start = time()
measurements['checksum'] = joblib.hash(tuple(joblib.hash(df) for df in roundtripped))
measurements['checksum_time'] = time() - start
return measurements
adding or removing keys as specified.
hash_type: {'md5', 'sha1'}
Hash algorithm to use
ignore: list
list of keys to ignore
kwargs:
key/value pairs to add before hashing
"""
if ignore is None:
ignore = ['download_dir']
my_dict = {**self.to_dict(), **kwargs}
for key in ignore:
my_dict.pop(key, None)
return joblib.hash(my_dict, hash_name=hash_type)