Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
--------
out : pandas.Series
Prediction scores for selected items on how likely to be the next item of this session. Indexed by the item IDs.
'''
if self.prev_session_id != session_id:
self.prev_session_id = session_id
self.pers = dict()
v = self.pers.get(input_item_id)
if v:
self.pers[input_item_id] = v + 1
else:
self.pers[input_item_id] = 1
preds = np.zeros(len(predict_for_item_ids))
mask = np.in1d(predict_for_item_ids, self.pop_list.index)
ser = pd.Series(self.pers)
preds[mask] = self.pop_list[predict_for_item_ids[mask]]
mask = np.in1d(predict_for_item_ids, ser.index)
preds[mask] += ser[predict_for_item_ids[mask]]
return pd.Series(data=preds, index=predict_for_item_ids)
def std_err(self):
"""
Array of parameter standard errors
"""
return pd.Series(np.sqrt(np.diag(self.param_cov)),
index=self._names, name='std_err')
lambda x: C(pandas.Series(x))]
for prep in preps:
def _update_ansys_variables(self, variations=None):
"""
Updates the list of ansys hfss variables for the set of sweeps.
"""
variations = variations or self.variations
for variation in variations:
self._hfss_variables[variation] = pd.Series(
self.get_variables(variation=variation))
return self._hfss_variables
def batch_generator():
"""Perform stratified sampling
It is assumed we are doing binary classification because it
doesn't make sense to do this in the multiclass setting.
"""
ys_train = self.ys_train.flatten() # assume no multi-task
class_counts = pd.Series(ys_train).value_counts()
rare_class, common_class = class_counts.argmin(), class_counts.argmax()
rare_idxs = np.argwhere(ys_train == rare_class).flatten()
common_idxs = np.argwhere(ys_train == common_class).flatten()
while True:
# do stratified sampling
rare_batch_idxs = np.random.choice(rare_idxs, size=int(batch_size*mb_ratio) + 1)
common_batch_idxs = np.random.choice(common_idxs, size=int(batch_size*(1-mb_ratio)))
batch_idxs = np.concatenate([rare_batch_idxs, common_batch_idxs])
train_data = {}
for label in self.label_names:
train_data[label] = self.train_data[label][batch_idxs]
train_data['input'] = self.train_data['input'][batch_idxs]
ets = datetime.date(year, 12, 31)
today = datetime.date.today()
if ets > today:
ets = today
days = []
coverage = []
while now <= ets:
idx = iemre.daily_offset(now)
sevenday = np.sum(precip[(idx - period) : idx, :, :], 0)
pday = np.where(hasdata > 0, sevenday[:, :], -1)
tots = np.sum(np.where(pday >= (threshold * 25.4), 1, 0))
days.append(now)
coverage.append(tots / float(datapts) * 100.0)
now += datetime.timedelta(days=1)
df = pd.DataFrame(dict(day=pd.Series(days), coverage=pd.Series(coverage)))
(fig, ax) = plt.subplots(1, 1)
ax.bar(days, coverage, fc="g", ec="g")
ax.set_title(
(
"%s IEM Estimated Areal Coverage Percent of %s\n"
" receiving %.2f inches of rain over trailing %s day period"
)
% (year, reference.state_names[state], threshold, period)
)
ax.set_ylabel("Areal Coverage [%]")
ax.xaxis.set_major_formatter(mdates.DateFormatter("%b\n%-d"))
ax.set_yticks(range(0, 101, 25))
ax.grid(True)
return fig, df
def getPValues(control_avg, curr_avg):
p_values = pd.Series(index=curr_avg.columns)
for feat in curr_avg.columns:
x = control_avg[feat].values.astype(np.float)
y = curr_avg[feat].values.astype(np.float)
x = x[~np.isnan(x)]
y = y[~np.isnan(y)]
#if np.all(np.isnan(x)) or np.all(np.isnan(y)):
# continue
_, p_value = ttest_ind(x,y, equal_var=False)
#_, p_value = ranksums(x,y)
#p_value positive if N2 is larger than the strain
p_values[feat] = p_value
p_values = p_values.dropna()
#correct for false discovery rate using 2-stage Benjamini-Krieger-Yekutieli
reject, pvals_corrected, alphacSidak, alphacBonf = \
def interpNaN(data):
'''
Interpolate data using a linear interpolation
@param data: 1d numpy or pandas Series with possible NaN's
@return data after interpolation
'''
if isinstance(data, np.ndarray):
data = pd.Series(data)
return data.interpolate().as_matrix()
elif isinstance(data, pd.Series):
return data.interpolate()
def plot_episode_stats(stats, smoothing_window=10, noshow=False):
# Plot the episode length over time
fig1 = plt.figure(figsize=(10,5))
plt.plot(stats.episode_lengths)
plt.xlabel("Episode")
plt.ylabel("Episode Length")
plt.title("Episode Length over Time")
if noshow:
plt.close(fig1)
else:
plt.show(fig1)
# Plot the episode reward over time
fig2 = plt.figure(figsize=(10,5))
rewards_smoothed = pd.Series(stats.episode_rewards).rolling(smoothing_window, min_periods=smoothing_window).mean()
plt.plot(rewards_smoothed)
plt.xlabel("Episode")
plt.ylabel("Episode Reward (Smoothed)")
plt.title("Episode Reward over Time (Smoothed over window size {})".format(smoothing_window))
if noshow:
plt.close(fig2)
else:
plt.show(fig2)
# Plot time steps and episode number
fig3 = plt.figure(figsize=(10,5))
plt.plot(np.cumsum(stats.episode_lengths), np.arange(len(stats.episode_lengths)))
plt.xlabel("Time Steps")
plt.ylabel("Episode")
plt.title("Episode per time step")
if noshow:
def _prepare_data(self):
if self.data.shape[1] != 4:
print "shape of the data is wrong > you need 4 variable columns"
else:
self.data['single_input_vector'] = self.data.apply(tuple, axis=1).apply(list)
self.data['single_input_vector'] = self.data.single_input_vector.apply(lambda x: [list(x)])
self.data['cumulative_input_vectors'] = self.data.single_input_vector.cumsum()
self.data['output_vector'] = self.data[[1]].apply(tuple, axis=1).apply(list)
self.max_sequence_length = self.data['cumulative_input_vectors'].apply(len).max()
self.padded_sequences = pad_sequences(self.data['cumulative_input_vectors'].tolist(), self.max_sequence_length).tolist()
self.data['padded_input_vectors'] = pd.Series(self.padded_sequences).apply(np.asarray)
return 'NULL'