Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
if(size == 1):
return 'Single'
elif(size <=3):
return 'Small'
elif(size <= 6):
return 'Medium'
else:
return 'Large'
titanic_all['FamilyCategory'] = titanic_all['FamilySize'].map(convert_family_size)
def extract_title(name):
return name.split(',')[1].split('.')[0].strip()
titanic_all['Title'] = titanic_all['Name'].map(extract_title)
tmp_df = titanic_all[0:titanic_train.shape[0]]
sns.FacetGrid(tmp_df, row="Survived",size=8).map(sns.kdeplot, "FamilySize").add_legend()
sns.factorplot(x="Title", hue="Survived", data=tmp_df, kind="count", size=6)
sns.factorplot(x="FamilyCategory", hue="Survived", data=tmp_df, kind="count", size=6)
sns.FacetGrid(tmp_df, row="Survived",size=8).map(sns.kdeplot, "Age").add_legend()
titanic_all.drop(['PassengerId', 'Name', 'Cabin','Ticket','Survived'], axis=1, inplace=True)
features = ['Sex', 'Embarked', 'Pclass', 'Title', 'FamilyCategory']
titanic_all = pd.get_dummies(titanic_all, columns=features)
X_train = titanic_all[0:titanic_train.shape[0]]
y_train = titanic_train['Survived']
#applying feature selection algorithm to get impactful features
rf = ensemble.RandomForestClassifier(n_estimators=100)
rf.fit(X_train, y_train)
>>> plt.show()
"""
# to set default as an empty dictionary that is later filled with defaults
if fitline_kwds is None:
fitline_kwds = dict()
figsize = kwargs.pop('figsize', (7, 7))
# get fig and ax
fig, ax = _create_moran_fig_ax(ax, figsize)
# plot distribution
shade = kwargs.pop('shade', True)
color = kwargs.pop('color', splot_colors['moran_base'])
sbn.kdeplot(moran.sim, shade=shade, color=color, ax=ax, **kwargs)
# customize plot
fitline_kwds.setdefault('color', splot_colors['moran_fit'])
ax.vlines(moran.I, 0, 1, **fitline_kwds)
ax.vlines(moran.EI, 0, 1)
ax.set_title('Reference Distribution')
ax.set_xlabel('Moran I: ' + str(round(moran.I, 2)))
return fig, ax
makedirs(opt.save_path)
codes = torch.randn(opt.nb_points, opt.code_size).cuda()
for r_idx in range(1+opt.r_iterations):
codes_r_2d = embed_or_load_cache(codes, gen, r_idx, opt.batch_size, opt.save_path)
fig, ax = plt.subplots(nrows=1, ncols=1, figsize=(10, 10))
ax.scatter(codes_r_2d[:, 0], codes_r_2d[:, 1], s=2, alpha=0.2)
ax.set_xlim((-10, 10))
ax.set_ylim((-10, 10))
fig.savefig(os.path.join(opt.save_path, 'tsne_plots', 'tsne_%04d_r%02d.jpg' % (0, r_idx,)))
plt.close()
fig, ax = plt.subplots(nrows=1, ncols=1, figsize=(10, 10))
sns_plot = sns.kdeplot(codes_r_2d[:, 0], codes_r_2d[:, 1], shade=True, ax=ax)
sns_plot.axes.set_xlim((-10, 10))
sns_plot.axes.set_ylim((-10, 10))
fig = sns_plot.get_figure()
fig.savefig(os.path.join(opt.save_path, 'tsne_plots', 'tsne_%04d_r%02d_kde.jpg' % (0, r_idx,)))
def pandas_ridge_plot(df, model, pop, k, folder='figures', name='personalization', save=True):
sns.set(style="white", rc={"axes.facecolor": (0, 0, 0, 0)})
num_models = len(df.model.unique())
# Initialize the FacetGrid object
pal = sns.cubehelix_palette(num_models, rot=-.25, light=.7)
g = sns.FacetGrid(df, row=model, hue=model, aspect=10, height=1, palette=pal)
# Draw the densities in a few steps
g.map(sns.kdeplot, pop, clip_on=False, shade=True, alpha=1, lw=1.5, bw=50)
g.map(sns.kdeplot, pop, clip_on=False, color="w", lw=1.5, bw=50)
g.map(plt.axhline, y=0, lw=2, clip_on=False)
# Define and use a simple function to label the plot in axes coordinates
def label(x, color, label):
ax = plt.gca()
ax.text(-0.1, .1, label, fontweight="bold", color=color,
ha="left", va="center", transform=ax.transAxes)
g.map(label, pop)
# Set the subplots to overlap
g.fig.subplots_adjust(hspace=-0.8)
# Remove axes details that don't play well with overlap
g.set_xlabels("Popularity Distribution of The Top-{0} Recommended Items".format(k))
if i in cols_for_log_plus_one:
tmp_df.iloc[:, 0] = np.log(df[i] + 1)
pos_tmp_df = tmp_df.loc[tmp_df[self.cfg.Y].astype(float) == 1, i]
pos_tmp_df.rename(pos_label, inplace=True)
neg_tmp_df = tmp_df.loc[tmp_df[self.cfg.Y].astype(float) == 0, i]
neg_tmp_df.rename(neg_label, inplace=True)
try:
sns.kdeplot(pos_tmp_df, shade=True, color="#fb6a4a")
except:
pass
try:
sns.kdeplot(neg_tmp_df, shade=True, color="#3182bd")
except:
pass
plt.title(i)
plt.show()
plot_filepath = str(self.cfg.eda_out / 'numerical_features_histograms.pdf')
fig.savefig(plot_filepath, format='pdf', bbox_inches='tight')
def chat_scatter_matrix_density(data):
g = sns.PairGrid(data, diag_sharey=False)
g.map_lower(sns.kdeplot, cmap="Blues_d")
g.map_upper(plt.scatter)
g.map_diag(sns.kdeplot, lw=3)
plt.title(r'Scatter plot together with estimated pair densities')
# plt.suptitle(r""+title, fontsize=18)
plt.show()
# Build Plot (via Matplotlib)
MY_DPI = 96
IMG_WIDTH = 1024
IMG_HEIGHT = 440
FIG_WIDTH = IMG_WIDTH / MY_DPI
FIG_HEIGHT = IMG_HEIGHT / MY_DPI
fig = plt.figure(figsize=(FIG_WIDTH, FIG_HEIGHT), dpi=MY_DPI)
ax = fig.add_subplot(111)
ax_extent = [-100, 100, -42.5, 42.5]
img = Image.open(os.path.join(PROJECT_ROOT, 'resources/images/Rink-Shotmap-Blank.png'))
ax.imshow(img, extent=ax_extent)
# Draw the heatmap portion of the graph
sns.set_style("white")
sns.kdeplot(pref_df.coords_x, pref_df.coords_y, cmap='Reds', shade=True, bw=0.2, cut=100, shade_lowest=False, alpha=0.9, ax=ax)
sns.kdeplot(other_df.coords_x, other_df.coords_y, cmap="Blues", shade=True, bw=0.2, cut=100, shade_lowest=False, alpha=0.9, ax=ax)
# Draw the goal markers
if not pref_goals_df.empty:
ax.scatter(pref_goals_df.coords_x, pref_goals_df.coords_y, marker='*', s=30, c='#333333')
if not other_goals_df.empty:
ax.scatter(other_goals_df.coords_x, other_goals_df.coords_y, marker='*', s=30, c='#333333')
if not pref_ppg_df.empty:
ax.scatter(pref_ppg_df.coords_x, pref_ppg_df.coords_y, marker='^', s=30, c='#333333')
if not other_ppg_df.empty:
ax.scatter(other_ppg_df.coords_x, other_ppg_df.coords_y, marker='^', s=30, c='#333333')
# Draw the shot markers (40% opacity)
ax.scatter(pref_shots_df.coords_x, pref_shots_df.coords_y, marker='o', s=10, c='#333333', alpha=0.4)
ax.scatter(other_shots_df.coords_x, other_shots_df.coords_y, marker='o', s=10, c='#333333', alpha=0.4)
plt.figure()
plt.ylabel('$Z_2$', fontsize=15)
plt.xlabel('$Z_1$', fontsize=15)
if colors is None:
# colors = ["r", "g", "b", "y", "k"]
if len(target_names) == 5:
colors = ["r", "g", "b", "y", "k"]
else:
cmap = plt.cm.get_cmap("Accent", len(target_names))
colors = [cmap(i) for i in range(len(target_names))]
if target_names is None:
target_names = ["%d" % i for i in nb_labels]
for i in nb_labels:
cur_pal = sns.light_palette(colors[i], as_cmap=True)
d0, d1 = res[ds_l == i][:, 0], res[ds_l == i][:, 1]
ax = sns.kdeplot(d0, d1, shade=False, cmap=cur_pal,
alpha=0.6, shade_lowest=False, gridsize=100)
ax.patch.set_facecolor('white')
ax.collections[0].set_alpha(0)
plt.scatter(res[ds_l == i][:, 0], res[ds_l == i][:, 1],
s=1.2, lw=0, alpha=0.5, color=colors[i], label=target_names[i])
handles = []
for ii in range(len(target_names)):
handles.append(mpatches.Patch(color=colors[ii], label=target_names[ii]))
plt.legend(handles=handles, loc="best")
plt.savefig(dest_path, dpi=300)
plt.close()
if do_3d:
# density plot 1st and 3rd PC
plt.figure()
plt.ylabel('$Z_3$', fontsize=15)
plt.xlabel('$Z_1$', fontsize=15)
#explore univariate continuous feature
print titanic_train['Fare'].mean()
print titanic_train['Fare'].median()
print titanic_train['Fare'].quantile(0.25)
print titanic_train['Fare'].quantile(0.75)
print titanic_train['Fare'].std()
titanic_train['Fare'].describe()
titanic_train['SibSp'].describe()
#explore univariate continuous features visually
sns.boxplot(x='Fare',data=titanic_train)
sns.distplot(titanic_train['Fare'])
sns.distplot(titanic_train['Fare'], bins=20, rug=True, kde=False)
sns.distplot(titanic_train['Fare'], bins=100, kde=False)
sns.kdeplot(data=titanic_train['Fare'])
sns.kdeplot(data=titanic_train['Fare'], shade=True)
#explore univariate categorical feature
titanic_train['Survived'].describe()
titanic_train['Survived'].value_counts()
pd.crosstab(index=titanic_train["Survived"], columns="count")
pd.crosstab(index=titanic_train["Pclass"], columns="count")
pd.crosstab(index=titanic_train["Sex"], columns="count")
#explore univariate categorical features visually
sns.countplot(x='Survived',data=titanic_train)
sns.countplot(x='Pclass',data=titanic_train)
):
prediction_dict = defaultdict(list)
for i in range(num_steps):
features, labels = next(dataset_iterator)
p_out_preds = iic_model.predict(features, steps=None)
if type(p_out_preds) == np.ndarray:
p_out_preds = [p_out_preds]
for k, p_out in enumerate(p_out_preds):
prediction_dict[f"y_pred_{k}"] += p_out.argmax(-1).tolist()
prediction_dict[f"y_true"] += labels["label"].numpy().tolist()
df = pd.DataFrame(prediction_dict)
g = sns.PairGrid(df)
g.map_diag(sns.kdeplot)
return g.map_offdiag(sns.kdeplot, n_levels=6)