Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
if not pd.isnull(lp_r02c01):
xml_txt_box.getchildren()[0].text = '%.0f' % lp_r02c01
else:
xml_txt_box.getchildren()[0].text = '-'
xml_txt_box = tree2.findall('''.//*[@id='lp_r02c02']''')[0]
if not pd.isnull(lp_r02c02):
xml_txt_box.getchildren()[0].text = '%.0f' % lp_r02c02
else:
xml_txt_box.getchildren()[0].text = '-'
xml_txt_box = tree2.findall('''.//*[@id='lp_r02c03']''')[0]
if not pd.isnull(lp_r02c03):
xml_txt_box.getchildren()[0].text = '%.0f' % lp_r02c03
else:
xml_txt_box.getchildren()[0].text = '-'
xml_txt_box = tree2.findall('''.//*[@id='lp_r02c04']''')[0]
if not pd.isnull(lp_r02c04):
xml_txt_box.getchildren()[0].text = '%.0f' % lp_r02c04
else:
xml_txt_box.getchildren()[0].text = '-'
xml_txt_box = tree2.findall('''.//*[@id='lp_r02c05']''')[0]
if not pd.isnull(lp_r02c05):
xml_txt_box.getchildren()[0].text = '%.0f' % lp_r02c05
else:
xml_txt_box.getchildren()[0].text = '-'
xml_txt_box = tree2.findall('''.//*[@id='lp_r02c06']''')[0]
if not pd.isnull(lp_r02c06):
xml_txt_box.getchildren()[0].text = '%.0f' % lp_r02c06
else:
xml_txt_box.getchildren()[0].text = '-'
xml_txt_box = tree2.findall('''.//*[@id='lp_r02c07']''')[0]
if not pd.isnull(lp_r02c07):
xml_txt_box.getchildren()[0].text = '%.0f' % lp_r02c07
self._matrix[feature] = self._matrix[feature].apply(lambda x: distribution() if pd.isnull(x) else x)
'path_cost_1':'num paths missing from file2'}, inplace=True)
df2_only = df_diff.loc[pd.isnull(df_diff.path_cost_1)].groupby(['iteration','passenger_id_num','trip_list_id_num']).agg({'union pathset probability':'max','path_cost_2':'count'})
df2_only.rename(columns={'union pathset probability':'max prob missing from file1',
'path_cost_2':'num paths missing from file1'}, inplace=True)
df_diff_summary = df_diff_counts.merge(df1_only, how='left', left_index=True, right_index=True)
df_diff_summary = df_diff_summary.merge(df2_only, how='left', left_index=True, right_index=True)
# note paths for which we didn't find ANY in one or the other run
df_diff_summary['only in file1'] = 0
df_diff_summary.loc[df_diff_summary['num paths missing from file2']==df_diff_summary['num total paths'],'only in file1'] = 1
df_diff_summary['only in file2'] = 0
df_diff_summary.loc[df_diff_summary['num paths missing from file1']==df_diff_summary['num total paths'],'only in file2'] = 1
# NaN means zero
df_diff_summary.loc[pd.isnull(df_diff_summary['num paths missing from file1']), 'num paths missing from file1'] = 0
df_diff_summary.loc[pd.isnull(df_diff_summary['num paths missing from file2']), 'num paths missing from file2'] = 0
# write detailed output
detail_file = os.path.join(dir1, "ft_compare_pathset.csv")
df_diff_summary.reset_index().to_csv(detail_file, index=False)
FastTripsLogger.info("Wrote detailed pathset diff info to %s" % detail_file)
# Report
FastTripsLogger.info(" Average pathset size: %.1f" % df_diff_summary['num total paths'].mean())
FastTripsLogger.info(" Trips with paths ONLY in pathset 1: %d" % df_diff_summary['only in file1'].sum())
FastTripsLogger.debug(" -- diffs --\n" + \
str(df_diff_summary.loc[df_diff_summary['only in file1']==1]) + "\n")
FastTripsLogger.info(" Trips with paths ONLY in pathset 2: %d" % df_diff_summary['only in file2'].sum())
FastTripsLogger.debug(" -- diffs --\n" + \
str(df_diff_summary.loc[df_diff_summary['only in file2']==1]) + "\n")
def _fit_na_fill(self, X):
for column in self._columns:
if np.sum(pd.isnull(X[column]) == True) == 0:
continue
self._na_fill_params[column] = self._get_fill_value(X[column])
if not pd.isnull(lp_r04c07):
xml_txt_box.getchildren()[0].text = '%.0f' % lp_r04c07
else:
xml_txt_box.getchildren()[0].text = '-'
xml_txt_box = tree2.findall('''.//*[@id='lp_r04c08']''')[0]
if not pd.isnull(lp_r04c08):
xml_txt_box.getchildren()[0].text = '%.0f' % lp_r04c08
else:
xml_txt_box.getchildren()[0].text = '-'
xml_txt_box = tree2.findall('''.//*[@id='lp_r04c09']''')[0]
if not pd.isnull(lp_r04c09):
xml_txt_box.getchildren()[0].text = '%.0f' % lp_r04c09
else:
xml_txt_box.getchildren()[0].text = '-'
xml_txt_box = tree2.findall('''.//*[@id='lp_r04c10']''')[0]
if not pd.isnull(lp_r04c10):
xml_txt_box.getchildren()[0].text = '%.0f' % lp_r04c10
else:
xml_txt_box.getchildren()[0].text = '-'
xml_txt_box = tree2.findall('''.//*[@id='lp_r04c11']''')[0]
if not pd.isnull(lp_r04c11):
xml_txt_box.getchildren()[0].text = '%.0f' % lp_r04c11
else:
xml_txt_box.getchildren()[0].text = '-'
xml_txt_box = tree2.findall('''.//*[@id='lp_r04c12']''')[0]
if not pd.isnull(lp_r04c12):
xml_txt_box.getchildren()[0].text = '%.0f' % lp_r04c12
else:
xml_txt_box.getchildren()[0].text = '-'
xml_txt_box = tree2.findall('''.//*[@id='wp_r01c01']''')[0]
if not pd.isnull(wp_r01c01):
xml_txt_box.getchildren()[0].text = '%.2f' % wp_r01c01
start_flag = True
end_flag = False
for i, row in enumerate(self._ts.itertuples()):
date = row.Index.to_pydatetime()
high = row.high
low = row.low
close = row.close
sma = row.sma
upper_band = sma + sma * self._percent_band
lower_band = sma - sma * self._percent_band
end_flag = True if (i == len(self._ts) - 1) else False
shares = 0
if pd.isnull(sma) or date < self._start:
continue
elif start_flag:
start_flag = False
# set start and end
self._start = date
self._end = self._ts.index[-1]
# buy
if (self._tlog.num_open_trades() == 0
and close > upper_band
and not end_flag):
# enter buy in trade log
shares = self._tlog.enter_trade(date, close)
# sell
back_sid = back[0]
dt = tc.previous_session_label(roll_dt)
if self._frequency == 'minute':
dt = tc.open_and_close_for_session(dt)[1]
roll_dt = tc.open_and_close_for_session(roll_dt)[0]
partitions.append((front_sid,
back_sid,
dt,
roll_dt))
for partition in partitions:
front_sid, back_sid, dt, roll_dt = partition
last_front_dt = self._bar_reader.get_last_traded_dt(
self._asset_finder.retrieve_asset(front_sid), dt)
last_back_dt = self._bar_reader.get_last_traded_dt(
self._asset_finder.retrieve_asset(back_sid), dt)
if isnull(last_front_dt) or isnull(last_back_dt):
continue
front_close = self._bar_reader.get_value(
front_sid, last_front_dt, 'close')
back_close = self._bar_reader.get_value(
back_sid, last_back_dt, 'close')
adj_loc = dts.searchsorted(roll_dt)
end_loc = adj_loc - 1
adj = self._make_adjustment(cf.adjustment,
front_close,
back_close,
end_loc)
try:
adjs[adj_loc].append(adj)
except KeyError:
adjs[adj_loc] = [adj]
return adjs
if imputer.strategy in self.predictive_strategies:
preds = self._preds[column]
if preds == "all":
x_ = X.drop(column, axis=1)
else:
x_ = X[preds]
# isolate missingness
if isinstance(x_, pd.Series):
x_ = x_.to_frame()
x_ = x_.loc[imp_ix]
else:
x_ = x_.loc[imp_ix, :]
# default univariate impute for missing covariates
mis_cov = pd.isnull(x_).sum()
mis_cov = mis_cov[mis_cov > 0]
if any(mis_cov):
x_m = mis_cov.index
for col in x_m:
d = DefaultUnivarImputer()
d_imps = d.fit_impute(x_[col], None)
if mis_cov[col] == x_.shape[0]:
d_imps = 0
x_null = x_[col][x_[col].isnull()].index
x_.loc[x_null, col] = d_imps
# handling encoding again for prediction of imputations
x_ = _one_hot_encode(x_)
# perform imputation given the specified imputer and value for x_
X.loc[imp_ix, column] = imputer.impute(x_)
def filter_on_feature(self, feature, value):
# remove rows where feature == value
if pd.isnull(value): # nan is not comparable, so need different syntax
rows_to_remove = self._matrix[pd.isnull(self._matrix[feature])].index
else:
try:
rows_to_remove = self._matrix[self._matrix[feature] == value].index
except TypeError:
log.info('Cannot filter %s on %s; types are not comparable.' % (feature, str(value)))
return
self._matrix.drop(rows_to_remove, inplace = True)
self._matrix.reset_index(drop=True, inplace = True)
# return number of rows remaining
return self._matrix.shape[0]
:param autoincrement_pkey_col: an ORM column of some model mapped to an sql table. The
column, as the name says, must be a primary key with auto-increment.
**The column MUST be of sql type INTEGER, otherwise this method should not be used**
dataframe[A] will have dtype int64, which is fine as we replace (or add) **all** the row values.
Note that if we replaced only partially some values, then dataframe[A] might still hold the old
dtype (e.g., float) which **would be bad** as some db (e.g., postgres) are strict and will issue
an `sqlalchemy.exc.DataError` if inserting/updating a non-nan/non-int value
(e.g., 6.0 instead of 6)
:param dataframe: the dataframe with values to be inserted/updated/deleted from the table
mapped by `autoincrement_pkey_col`
'''
max_pkey = (_get_max(session, autoincrement_pkey_col) if max is None else max) + 1
pkeyname = autoincrement_pkey_col.key
if not overwrite:
if pkeyname in dataframe:
mask = pd.isnull(dataframe[pkeyname])
nacount = mask.sum()
if nacount != len(dataframe):
dataframe.iloc[mask: pkeyname] = np.arange(max_pkey, max_pkey+nacount, dtype=int)
# cast values if we modified only SOME row values of dataframe[pkeyname]
# This is why we might have had floats (because we had na) and now we still have
# floats (postgres complains if we add 6.0 instead of 6!)
return _cast_column(dataframe, autoincrement_pkey_col)
# if we are here, either we want to set all values of dataframe[pkeyname],
# or pkeyname is not a column of dataframe,
# or all dataframe[pkeyname] are na
# In ALL these cases pandas changes the dtype, so the cast is not needed
new_pkeys = np.arange(max_pkey, max_pkey+len(dataframe), dtype=int)
dataframe[pkeyname] = new_pkeys
return dataframe