Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
estimator=lambda y,x: OLS(y,x).fit().params,
nboot=399)
# create lagmat of both time series
dta = lagmat2ds(x, mxlg, trim='both')
dta = np.delete(dta, -1, axis = 1) # removal of the not lagged xs
#add constant
if addconst:
dtaown = add_constant(dta[:, 1:(mxlg + 1)], prepend=False)
dtajoint = add_constant(dta[:, 1:], prepend=False)
else:
raise NotImplementedError('Not Implemented')
#dtaown = dta[:, 1:mxlg]
#dtajoint = dta[:, 1:]
# Run ols on both models without and with lags of second variable
res2down = OLS(dta[:, 0], dtaown).fit()
res2djoint = OLS(dta[:, 0], dtajoint).fit()
#print results
#for ssr based tests see:
#http://support.sas.com/rnd/app/examples/ets/granger/index.htm
#the other tests are made-up
# Granger Causality test using ssr (F statistic)
fgc1 = ((res2down.ssr - res2djoint.ssr) /
res2djoint.ssr / mxlg * res2djoint.df_resid)
if verbose:
print('ssr based F test: F=%-8.4f, p=%-8.4f, df_denom=%d,'
' df_num=%d' % (fgc1,
stats.f.sf(fgc1, mxlg,
res2djoint.df_resid),
res2djoint.df_resid, mxlg))
if isinstance(RHS, np.ndarray) and RHS.size==0:
RHS_isemtpy = True
elif isinstance(RHS, pd.DataFrame) and RHS.empty:
RHS_isemtpy = True
if isinstance(exog_i, str):
exog_i = dmatrix(exog_i + "-1", data)
# all arrays or pandas-like
if RHS_isemtpy:
ax.plot(endog, exog_i, 'o', **kwargs)
fitted_line = OLS(endog, exog_i).fit()
x_axis_endog_name = 'x' if isinstance(exog_i, np.ndarray) else exog_i.name
y_axis_endog_name = 'y' if isinstance(endog, np.ndarray) else endog.design_info.column_names[0]
else:
res_yaxis = OLS(endog, RHS).fit()
res_xaxis = OLS(exog_i, RHS).fit()
xaxis_resid = res_xaxis.resid
yaxis_resid = res_yaxis.resid
x_axis_endog_name = res_xaxis.model.endog_names
y_axis_endog_name = res_yaxis.model.endog_names
ax.plot(xaxis_resid, yaxis_resid, 'o', **kwargs)
fitted_line = OLS(yaxis_resid, xaxis_resid).fit()
fig = abline_plot(0, fitted_line.params[0], color='k', ax=ax)
if x_axis_endog_name == 'y': # for no names regression will just get a y
x_axis_endog_name = 'x' # this is misleading, so use x
ax.set_xlabel("e(%s | X)" % x_axis_endog_name)
ax.set_ylabel("e(%s | X)" % y_axis_endog_name)
ax.set_title('Partial Regression Plot', **title_kwargs)
import collections
self.history = collections.defaultdict(list) #not really necessary
res_resid = None #if maxiter < 2 no updating
for i in range(maxiter):
#pinv_wexog is cached
if hasattr(self, 'pinv_wexog'):
del self.pinv_wexog
#self.initialize()
#print 'wls self',
results = self.fit()
self.history['self_params'].append(results.params)
if not i == maxiter-1: #skip for last iteration, could break instead
#print 'ols',
self.results_old = results #for debugging
#estimate heteroscedasticity
res_resid = OLS(self.link(results.resid**2), self.exog_var).fit()
self.history['ols_params'].append(res_resid.params)
#update weights
self.weights = 1./self.linkinv(res_resid.fittedvalues)
self.weights /= self.weights.max() #not required
self.weights[self.weights < 1e-14] = 1e-14 #clip
#print 'in iter', i, self.weights.var() #debug, do weights change
self.initialize()
#note results is the wrapper, results._results is the results instance
results._results.results_residual_regression = res_resid
return results
xdiff = np.diff(x)
#
xdall = lagmat(x[:,None], maxlag, trim='both')
nobs = xdall.shape[0]
xdall = np.c_[np.ones((nobs,1)), xdall]
xshort = x[-nobs:]
if store:
resstore = ResultsStore()
if autolag:
#search for lag length with highest information criteria
#Note: I use the same number of observations to have comparable IC
results = {}
for mlag in range(1, maxlag+1):
results[mlag] = OLS(xshort, xdall[:,:mlag+1]).fit()
if autolag.lower() == 'aic':
bestic, icbestlag = min((v.aic,k) for k,v in iteritems(results))
elif autolag.lower() == 'bic':
icbest, icbestlag = min((v.bic,k) for k,v in iteritems(results))
else:
raise ValueError("autolag can only be None, 'AIC' or 'BIC'")
#rerun ols with best ic
xdall = lagmat(x[:,None], icbestlag, trim='both')
nobs = xdall.shape[0]
xdall = np.c_[np.ones((nobs,1)), xdall]
xshort = x[-nobs:]
usedlag = icbestlag
if regresults:
resstore.results = results
>>> sm.graphics.plot_ccpr(results, 'single')
>>> plt.show()
.. plot:: plots/graphics_regression_ccpr.py
"""
fig, ax = utils.create_mpl_ax(ax)
exog_name, exog_idx = utils.maybe_name_or_idx(exog_idx, results.model)
results = maybe_unwrap_results(results)
x1 = results.model.exog[:, exog_idx]
#namestr = ' for %s' % self.name if self.name else ''
x1beta = x1*results.params[exog_idx]
ax.plot(x1, x1beta + results.resid, 'o')
from statsmodels.tools.tools import add_constant
mod = OLS(x1beta, add_constant(x1)).fit()
params = mod.params
fig = abline_plot(*params, **dict(ax=ax))
#ax.plot(x1, x1beta, '-')
ax.set_title('Component and component plus residual plot')
ax.set_ylabel("Residual + %s*beta_%d" % (exog_name, exog_idx))
ax.set_xlabel("%s" % exog_name)
return fig
regresults=regresults)
resstore.autolag_results = alres
bestlag -= startlag # convert to lag not column index
# rerun ols with best autolag
xdall = lagmat(xdiff[:, None], bestlag, trim='both', original='in')
nobs = xdall.shape[0]
xdall[:, 0] = x[-nobs - 1:-1] # replace 0 xdiff with level of x
xdshort = xdiff[-nobs:]
usedlag = bestlag
else:
usedlag = maxlag
icbest = None
if regression != 'nc':
resols = OLS(xdshort, add_trend(xdall[:, :usedlag + 1],
regression)).fit()
else:
resols = OLS(xdshort, xdall[:, :usedlag + 1]).fit()
adfstat = resols.tvalues[0]
# adfstat = (resols.params[0]-1.0)/resols.bse[0]
# the "asymptotically correct" z statistic is obtained as
# nobs/(1-np.sum(resols.params[1:-(trendorder+1)])) (resols.params[0] - 1)
# I think this is the statistic that is used for series that are integrated
# for orders higher than I(1), ie., not ADF but cointegration tests.
# Get approx p-value and critical values
pvalue = mackinnonp(adfstat, regression=regression, N=1)
critvalues = mackinnoncrit(N=1, regression=regression, nobs=nobs)
critvalues = {"1%" : critvalues[0], "5%" : critvalues[1],
"10%" : critvalues[2]}
# all arrays or pandas-like
if RHS_isemtpy:
ax.plot(endog, exog_i, 'o', **kwargs)
fitted_line = OLS(endog, exog_i).fit()
x_axis_endog_name = 'x' if isinstance(exog_i, np.ndarray) else exog_i.name
y_axis_endog_name = 'y' if isinstance(endog, np.ndarray) else endog.design_info.column_names[0]
else:
res_yaxis = OLS(endog, RHS).fit()
res_xaxis = OLS(exog_i, RHS).fit()
xaxis_resid = res_xaxis.resid
yaxis_resid = res_yaxis.resid
x_axis_endog_name = res_xaxis.model.endog_names
y_axis_endog_name = res_yaxis.model.endog_names
ax.plot(xaxis_resid, yaxis_resid, 'o', **kwargs)
fitted_line = OLS(yaxis_resid, xaxis_resid).fit()
fig = abline_plot(0, fitted_line.params[0], color='k', ax=ax)
if x_axis_endog_name == 'y': # for no names regression will just get a y
x_axis_endog_name = 'x' # this is misleading, so use x
ax.set_xlabel("e(%s | X)" % x_axis_endog_name)
ax.set_ylabel("e(%s | X)" % y_axis_endog_name)
ax.set_title('Partial Regression Plot', **title_kwargs)
#NOTE: if we want to get super fancy, we could annotate if a point is
#clicked using this widget
#http://stackoverflow.com/questions/4652439/
#is-there-a-matplotlib-equivalent-of-matlabs-datacursormode/
#4674445#4674445
if obs_labels is True:
if data is not None:
nsample = 1000
sig = 0.5
x1 = np.linspace(0, 20, nsample)
X = np.c_[x1, (x1-5)**2, np.ones(nsample)]
np.random.seed(0)#9876789) #9876543)
beta = [0.5, -0.015, 1.]
y_true2 = np.dot(X, beta)
w = np.ones(nsample)
w[nsample*6//10:] = 4 #Note this is the squared value
#y2[:nsample*6/10] = y_true2[:nsample*6/10] + sig*1. * np.random.normal(size=nsample*6/10)
#y2[nsample*6/10:] = y_true2[nsample*6/10:] + sig*4. * np.random.normal(size=nsample*4/10)
y2 = y_true2 + sig*np.sqrt(w)* np.random.normal(size=nsample)
X2 = X[:,[0,2]]
X2 = X
res_ols = OLS(y2, X2).fit()
print('OLS beta estimates')
print(res_ols.params)
print('OLS stddev of beta')
print(res_ols.bse)
print('\nWLS')
mod0 = GLSHet2(y2, X2, exog_var=w)
res0 = mod0.fit()
print('new version')
mod1 = GLSHet(y2, X2, exog_var=w)
res1 = mod1.iterative_fit(2)
print('WLS beta estimates')
print(res1.params)
print(res0.params)
print('WLS stddev of beta')
print(res1.bse)
#compare with previous version GLSHet2, refactoring check
if x2 is not None:
dm_excl = dm(x2, lin_pred)
if mean_deriv is not None:
# allow both and stack
dm_excl = np.column_stack((dm_excl, mean_deriv))
elif mean_deriv is not None:
dm_excl = mean_deriv
else:
raise ValueError('either exog_extra or mean_deriv have to be provided')
# TODO check for rank or redundant, note OLS calculates the rank
k_constraint = dm_excl.shape[1]
fittedvalues = res.predict() # discrete has linpred instead of mean
v = var_func(fittedvalues)
std = np.sqrt(v)
res_ols1 = OLS(res.resid_response / std, np.column_stack((dm_incl, dm_excl)) / std[:, None]).fit()
# case: nonrobust assumes variance implied by distribution is correct
c1 = res_ols1.ess
pval1 = stats.chi2.sf(c1, k_constraint)
#print c1, stats.chi2.sf(c1, 2)
# case: robust to dispersion
c2 = nobs * res_ols1.rsquared
pval2 = stats.chi2.sf(c2, k_constraint)
#print c2, stats.chi2.sf(c2, 2)
# case: robust to heteroscedasticity
from statsmodels.stats.multivariate_tools import partial_project
pp = partial_project(dm_excl / std[:,None], dm_incl / std[:,None])
resid_p = res.resid_response / std
res_ols3 = OLS(np.ones(nobs), pp.resid * resid_p[:,None]).fit()