Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def test_regression():
from sklearn.datasets import load_boston
from sklearn.metrics import mean_squared_error
data, target = load_boston(True)
x_train, x_test, y_train, y_test = train_test_split(data, target,
test_size=0.2,
random_state=42)
ngb = NGBRegressor(verbose=False)
ngb.fit(x_train, y_train)
preds = ngb.predict(x_test)
score = mean_squared_error(y_test, preds)
assert score <= 8.0
)
ngb.fit(X_train, y_train)
# pick the best iteration on the validation set
y_preds = ngb.staged_predict(X_val)
y_forecasts = ngb.staged_pred_dist(X_val)
val_rmse = [mean_squared_error(y_pred, y_val) for y_pred in y_preds]
val_nll = [
-y_forecast.logpdf(y_val.flatten()).mean() for y_forecast in y_forecasts
]
best_itr = np.argmin(val_rmse) + 1
# re-train using all the data after tuning number of iterations
ngb = NGBRegressor(
Base=base_name_to_learner[args.base],
Dist=eval(args.distn),
Score=eval(args.score),
n_estimators=args.n_est,
learning_rate=args.lr,
natural_gradient=args.natural,
minibatch_frac=args.minibatch_frac,
verbose=args.verbose,
)
ngb.fit(X_trainall, y_trainall)
# the final prediction for this fold
forecast = ngb.pred_dist(X_test, max_iter=best_itr)
forecast_val = ngb.pred_dist(X_val, max_iter=best_itr)
# set the appropriate scale if using a homoskedastic Normal
from ngboost import NGBRegressor
from ngboost.distns import Normal
from sklearn.datasets import load_boston
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
if __name__ == "__main__":
X, Y = load_boston(True)
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2)
ngb = NGBRegressor(Dist=Normal).fit(X_train, Y_train)
Y_preds = ngb.predict(X_test)
Y_dists = ngb.pred_dist(X_test)
# test Mean Squared Error
test_MSE = mean_squared_error(Y_preds, Y_test)
print("Test MSE", test_MSE)
# test Negative Log Likelihood
test_NLL = -Y_dists.logpdf(Y_test.flatten()).mean()
print("Test NLL", test_NLL)
train_index = permutation[0:end_train]
test_index = permutation[end_train:n]
folds.append((train_index, test_index))
for itr, (train_index, test_index) in enumerate(folds):
X_trainall, X_test = X[train_index], X[test_index]
y_trainall, y_test = y[train_index], y[test_index]
X_train, X_val, y_train, y_val = train_test_split(
X_trainall, y_trainall, test_size=0.2
)
y_true += list(y_test.flatten())
ngb = NGBRegressor(
Base=base_name_to_learner[args.base],
Dist=eval(args.distn),
Score=eval(args.score),
n_estimators=args.n_est,
learning_rate=args.lr,
natural_gradient=args.natural,
minibatch_frac=args.minibatch_frac,
verbose=args.verbose,
)
ngb.fit(X_train, y_train)
# pick the best iteration on the validation set
y_preds = ngb.staged_predict(X_val)
y_forecasts = ngb.staged_pred_dist(X_val)