Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
10.0, 5.0, 5.0, 5.0, 5.0, 0.0, 5.0, 5.0, 5.0]],
index=[0, 1, 2])
test_front_end_option = SimilarityFrontEndOption(
comp_file_id=2, sort_ascending=True, sort_column=0)
test_id_table = {0: "F1.txt", 1: "F2.txt", 2: "F3.txt"}
similarity_model = SimilarityModel(
test_options=SimilarityTestOption(
doc_term_matrix=test_dtm,
front_end_option=test_front_end_option,
document_label_map=test_id_table
)
)
pd.testing.assert_frame_equal(
similarity_model._get_similarity_query(),
pd.DataFrame(index=["Documents", "Cosine Similarity"],
data=[["F1.txt", "F2.txt"], [1., 1.]]).transpose()
)
"IOC", "HINDPETRO", "HEROMOTOCO",
"M&M", "ULTRACEMCO", "BAJAJFINSV",
"TATASTEEL", "HDFC", "BHARTIARTL",
"EICHERMOT", "JSWSTEEL", "ASIANPAINT",
"BAJAJ-AUTO", "AXISBANK", "YESBANK",
"IBULHSGFIN", "ITC", "LT",
"UPL", "KOTAKBANK", "HDFCBANK",
"HINDUNILVR", "ONGC", "TITAN",
"RELIANCE", "GAIL", "POWERGRID",
"NTPC", "COALINDIA", "ICICIBANK",
"SUNPHARMA", "INFRATEL", "GRASIM",
"SBIN", "HCLTECH", "INFY", "TCS",
"BAJFINANCE", "ZEEL", "CIPLA", "DRREDDY",
"WIPRO", "TECHM"]
self.all_stock_data = pd.DataFrame(columns=['Stock', 'Volume', 'High', 'Low'])
i = 0
for stock in self.nifty_50_stocks:
stock_data = fetch_stock_data(stock, 1, '1d')
self.all_stock_data.loc[i] = [stock, stock_data['Volume'].mean(), stock_data['High'].mean(),
stock_data['Low'].mean()]
i = i + 1
print('Fetched data for all nifty 50 stocks')
def test_nonvectorized_math_apply_on_large_dataframe_broadcast(self):
LOG.info("test_nonvectorized_math_apply_on_large_dataframe_broadcast")
df = pd.DataFrame({"x": np.random.normal(size=1_000_000), "y": np.random.uniform(size=1_000_000)})
tqdm.pandas(desc="Pandas Nonvec math apply + broadcast ~ DF")
start_pd = time.time()
pd_val = df.progress_apply(math_agg_foo, axis=1, result_type="broadcast")
end_pd = time.time()
pd_time = end_pd - start_pd
start_swifter = time.time()
swifter_val = df.swifter.progress_bar(desc="Nonvec math apply + broadcast ~ DF").apply(
math_agg_foo, axis=1, result_type="broadcast"
)
end_swifter = time.time()
swifter_time = end_swifter - start_swifter
self.assertEqual(pd_val, swifter_val) # equality test
if self.ncores > 1: # speed test
else:
assert type(idx0) == np.ndarray \
and idx0.shape == (n,) \
and idx0.dtype == 'int64', \
"Wrong type or size for indexing data on pointcloud."
points = pd.DataFrame({
'height': hgt0,
'mass': mas0,
'pos': pos0,
'rep': idx0,
},
columns=['height', 'mass', 'pos', 'rep'],
index=idx0)
self.coords = pd.DataFrame(data_array, index=idx0)
self.covertree = None
edges = stratum_maker(1)
super(self.__class__, self).__init__(stratum={0: points, 1: edges})
self.labels = np.zeros(shape=(self.coords.shape[0],), dtype=np.int64)
self.source = np.zeros(shape=(self.coords.shape[0],), dtype=np.int64)
self.label_info = pd.DataFrame(index=['black'])
self.label_info['clouds'] = np.array([1], dtype=np.int64)
self.label_info['points'] = np.array([n], dtype=np.int64)
self.label_info['tot_mass'] = np.array([self.stratum[0]['mass'].sum()])
self.label_info['int_index'] = np.array([0], dtype=np.int64)
self.max_length = max_length
if self.max_length > 0.0 or self.max_length == -1.0:
# use covertree to make all appropriate edges.
def _getAllHistory(self):
total_row = len(self.all_date)
df = pd.DataFrame(index=range(0, total_row))
df['date'] = pd.DataFrame(self.all_date)
df['price'] = pd.DataFrame(self.all_price)
df['signal'] = pd.DataFrame(self.all_signal)
df['cumulative_return'] = pd.DataFrame(self.all_return)
price = df['price']
signal = df['signal']
df_sell = pd.DataFrame(index=range(0, total_row), columns=['sell'])
df_buy = pd.DataFrame(index=range(0, total_row) , columns=['buy'])
if signal.loc[0] == SELL:
pass # if signal is SELL, first row don't take action
elif signal.loc[0] == BUY:
df_buy.loc[0] = price.loc[0]
# first row
old_signal = signal.loc[0]
for index in range(1, total_row):
new_signal = signal.loc[index]
if new_signal != old_signal:
if new_signal == BUY:
df_buy.loc[index] = price.loc[index]
elif new_signal == SELL:
df_sell.loc[index] = price.loc[index]
# new circuit hosting the island grid
circuit = MultiCircuitPowerFlow(baseMVA, bus_island, gen_island, branch_island, graph, self.solver_type, is_an_island=True)
# add the circuit to the islands
island_circuits.append(circuit)
original_indices.append(original_indices_entry)
fixed_power_indices.append(fixed_power)
island_idx += 1
recalculate_islands = False
# turn rosetta into a pandas dataframe, it will allow easy querying later)
cols = ['Original_idx', 'at_island_idx', 'island_idx', 'Fixed']
bus_rosetta = pd.DataFrame(data=bus_rosetta_vals, columns=cols, dtype=int)
gen_rosetta = pd.DataFrame(data=gen_rosetta_vals, columns=cols, dtype=int)
return island_circuits, original_indices, recalculate_islands, fixed_power_indices, bus_rosetta, gen_rosetta
"histogram" : hist,
"normalized" : hist.normalized,
"reverse_cumulative" : hist.reverse_cumulative,
"cumulative" : hist.cumulative,
"rebinned" : hist.rebinned
}[fcn](**fcn_args).values()
bin_edge = {
"histogram" : "m",
"normalized" : "m",
"reverse_cumulative" : "l",
"cumulative" : "r"
}[fcn]
xvals = hist.xvals(bin_edge)
frames.append(pd.DataFrame({hist.name : hist_data}, index=xvals))
all_frames = pd.concat(frames, axis=1)
return all_frames.fillna(0.0)
def save_test_out(tokens, labels):
# transform format
newtokens, newlabels = [], []
for to, la in zip(tokens, labels):
newtokens.extend(to)
newtokens.append("")
newlabels.extend(la)
newlabels.append("")
# save
save_csv_(pd.DataFrame({"token": newtokens, "label": newlabels}), self.output_test_file, ["token", "label"],
delimiter=self.configs.delimiter)
def get_term_counts_list(terms,text):
# Convert words into stems
stems = do_stem(terms)
# data frame hold counts
counts = pandas.DataFrame(0,columns=["count"],index=stems)
for sentence in text:
blob = TextBlob(sentence)
words = do_stem(blob.words)
words = [w for w in words if w in stems]
counts.loc[words] = counts.loc[words] + 1
return counts
def empty_table(df):
"""Display an empty table with column headings."""
table_source = ColumnDataSource(pd.DataFrame(columns=df.columns))
columns = [TableColumn(field=col, title=col) for col in df.columns]
table = DataTable(source=table_source, columns=columns, width=800)
return table