Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
data = clean(input_path, NOW, CAT_COLS, NUM_COLS, DATE_COLS, IDS_COL, LABEL_COL)
ids = data[IDS_COL].values
label = data[LABEL_COL].values
data = data.drop([IDS_COL, LABEL_COL], axis = 1)
# train/test split twice to achieve train/validation/test three way split
df_train, df_test, y_train, y_test, ids_train, ids_test = train_test_split(
data, label, ids, test_size = TEST_SIZE,
random_state = SPLIT_RANDOM_STATE, stratify = label)
df_train, df_val, y_train, y_val, ids_train, ids_val = train_test_split(
df_train, y_train, ids_train, test_size = VAL_SIZE,
random_state = SPLIT_RANDOM_STATE, stratify = y_train)
# obtain finalized columns
num_cols_cleaned = list(SortedSet(df_train.columns) - SortedSet(CAT_COLS))
preprocess = Preprocesser(num_cols = num_cols_cleaned, cat_cols = CAT_COLS)
X_train = preprocess.fit_transform(df_train)
X_val = preprocess.transform(df_val)
X_test = preprocess.transform(df_test)
logger.info('modeling')
eval_set = [(X_train, y_train), (X_val, y_val)]
xgb_tuned = build_xgb(N_ITER, CV, MODEL_RANDOM_STATE, eval_set)
xgb_tuned.fit(X_train, y_train)
if not os.path.isdir(MODEL_DIR):
os.mkdir(MODEL_DIR)
dump(preprocess, checkpoint_preprocess)
dump(xgb_tuned, checkpoint_xgb)
def init(self):
# Define set of CMasher colormaps that should be at the top
cmr_cmaps = sset(['dusk', 'freeze', 'gothic', 'heat', 'rainforest',
'sunburst'])
# Check that all of those colormaps are available in CMasher
cmr_cmaps.intersection_update(cmr.cm.cmap_d)
# Obtain a set with default MPL colormaps that should be at the top
std_cmaps = sset(['cividis', 'inferno', 'magma', 'plasma', 'viridis'])
# Add CMasher colormaps to it
std_cmaps.update(['cmr.'+cmap for cmap in cmr_cmaps])
# Obtain reversed set of recommended colormaps
std_cmaps_r = sset([cmap+'_r' for cmap in std_cmaps])
# Obtain a list with all colormaps and their reverses
all_cmaps = sset([cmap for cmap in cm.cmap_d
async def compute_fair_share(self):
free_cores_mcpu = sum([worker.free_cores_mcpu for worker in self.inst_pool.healthy_instances_by_free_cores])
user_running_cores_mcpu = {}
user_total_cores_mcpu = {}
result = {}
pending_users_by_running_cores = sortedcontainers.SortedSet(
key=lambda user: user_running_cores_mcpu[user])
allocating_users_by_total_cores = sortedcontainers.SortedSet(
key=lambda user: user_total_cores_mcpu[user])
records = self.db.execute_and_fetchall(
'''
SELECT user, n_ready_jobs, ready_cores_mcpu, n_running_jobs, running_cores_mcpu
FROM user_resources;
''')
async for record in records:
user = record['user']
user_running_cores_mcpu[user] = record['running_cores_mcpu']
user_total_cores_mcpu[user] = record['running_cores_mcpu'] + record['ready_cores_mcpu']
pending_users_by_running_cores.add(user)
record['allocated_cores_mcpu'] = 0
def pathfind(start, goal, collide_check_func):
if collide_check_func(*goal) or collide_check_func(*start):
return None
q = SortedSet()
def _heuristic(x1, y1, x2, y2):
return (abs(x1 - x2) + abs(y1 - y2)) ** 2
max_h = _heuristic(*start, *goal)
q.add((max_h, start))
visited = set()
curr_cost = dict()
curr_cost[start] = (0, max_h)
backtrace = dict()
found = False
while q:
cost, point = q.pop(0)
visited.add(point)
def _add_feff_to_file(self,file,crystal_structure):
file.write('DEBYE {0} {1} \n\n'.format(self.optical_spectrum_options['temperature'],self.optical_spectrum_options['debye temperature']))
scattering_atom = int(self.optical_spectrum_options['atom'])-1
sphere_radius = float(self.optical_spectrum_options['sphere radius'])
single_cell_coord = crystal_structure.calc_absolute_coordinates()
Z_scattering = int( single_cell_coord[scattering_atom,3] )
atoms = self._find_atoms_within_sphere(crystal_structure,sphere_radius,scattering_atom)
species = SortedSet(atoms[:,3].astype('int'))
file.write('POTENTIALS\n')
file.write(' 0 {}\n'.format(Z_scattering))
for i,specie in enumerate(species):
file.write(' {0} {1}\n'.format(i+1,specie))
file.write('\nATOMS\n')
for atom in atoms:
coords = atom[:3]
Z = int(atom[3])
if np.linalg.norm(coords-single_cell_coord[scattering_atom,:3]) <1e-6:
potential_number = 0
else:
potential_number = species.index(Z)+1
in_list = [coords[0]*sst.bohr,coords[1]*sst.bohr,coords[2]*sst.bohr,potential_number]
if isinstance(par_idx, str):
par_seq[i] = self._par_name.index(par_idx)
# If not, try to use it as a parameter index
else:
self._par_name[par_idx]
par_seq[i] = par_idx % self._n_par
# If any operation above fails, raise error
except Exception as error:
err_msg = ("Input argument %r[%i] is invalid! (%s)"
% (name, i, error))
raise_error(err_msg, InputError, logger)
# If everything went without exceptions, check if list is not empty and
# remove duplicates
if par_seq:
par_seq = list(sset(par_seq))
else:
err_msg = "Input argument %r is empty!" % (name)
raise_error(err_msg, ValueError, logger)
# Log end
logger.info("Finished converting sequence of model parameter "
"names/indices.")
# Return it
return(par_seq)
signature = fs_desc['declarations'][key]['signature']
func_intf = Function(func, signature)
# Do not set definition file since it is out of scope of the target program fragment
else:
signature = fs_desc.get('signature')
func_intf = Function(func, signature)
func_intf.definition_file = scope
# Set static
if fs_desc.get('type') == "static":
func_intf.static = True
else:
func_intf.static = False
# Add declarations
files = sortedcontainers.SortedSet()
if func_intf.definition_file:
files.add(func_intf.definition_file)
if fs_desc['declarations']:
files.update({f for f in fs_desc['declarations'] if f != 'unknown' and f in deps})
for file in files:
if file not in cfiles and file not in func_intf.header_files:
func_intf.header_files.append(file)
for cfile in deps[file]:
self.set_source_function(func_intf, cfile)
func_intf.declaration_files.add(cfile)
def get_dest_tables(self) -> AbstractSet[str]:
"""
Return a SortedSet of all destination table names.
"""
return SortedSet([
ddr.dest_table
for ddr in self.rows
if not ddr.omit
])
for container_id in [container_id for container_id in list(options_interfaces)
if [value for value in interface_to_value[container_id] if value in basevalue_to_value]]:
# Collect all child values
summary_values = sortedcontainers.SortedSet()
summary_interfaces = sortedcontainers.SortedSet()
original_options = sortedcontainers.SortedSet()
for value in [value for value in interface_to_value[container_id] if value in basevalue_to_value]:
summary_values.update(basevalue_to_value[value])
summary_interfaces.update(basevalue_to_interface[value])
original_options.add(value)
# Greedy add implementations to fill all child values
fulfilled_values = sortedcontainers.SortedSet()
fulfilled_interfaces = sortedcontainers.SortedSet()
final_set = sortedcontainers.SortedSet()
original_options = sorted(sorted(original_options), key=lambda v: len(basevalue_to_value[v]), reverse=True)
while set(summary_values - fulfilled_values) and \
(len(fulfilled_values) != len(summary_values) or len(fulfilled_interfaces) != len(summary_interfaces)):
value = sorted(set(summary_values - fulfilled_values)).pop()
chosen_value = None
for option in original_options:
if value in basevalue_to_value[option]:
chosen_value = option
final_set.add(option)
fulfilled_values.update(basevalue_to_value[option])
fulfilled_interfaces.update(basevalue_to_interface[option])
break
if not chosen_value:
raise RuntimeError('Inifnite loop due to inability to cover an implementation by a container')
def get_source_databases(self) -> AbstractSet[str]:
"""
Return a SortedSet of source database names.
"""
return SortedSet([
ddr.src_db
for ddr in self.rows
if ddr.required()
])