Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
self._nw_lags = nw_lags
self._nw_overlap = nw_overlap
(self._y, self._x, self._weights, self._x_filtered,
self._index, self._time_has_obs) = self._prepare_data()
if self._weights is not None:
self._x_trans = self._x.mul(np.sqrt(self._weights), axis=0)
self._y_trans = self._y * np.sqrt(self._weights)
self.sm_ols = sm.WLS(self._y.get_values(),
self._x.get_values(),
weights=self._weights.values).fit()
else:
self._x_trans = self._x
self._y_trans = self._y
self.sm_ols = sm.OLS(self._y.get_values(),
self._x.get_values()).fit()
# -*- coding: utf-8 -*-
"""
Created on Sun May 06 05:32:15 2012
Author: Josef Perktold
editted by: Paul Hobson (2012-08-19)
"""
from scipy import stats
from matplotlib import pyplot as plt
import statsmodels.api as sm
#example from docstring
data = sm.datasets.longley.load(as_pandas=False)
data.exog = sm.add_constant(data.exog, prepend=True)
mod_fit = sm.OLS(data.endog, data.exog).fit()
res = mod_fit.resid
left = -1.8 #x coordinate for text insert
fig = plt.figure()
ax = fig.add_subplot(2, 2, 1)
sm.graphics.qqplot(res, ax=ax)
top = ax.get_ylim()[1] * 0.75
txt = ax.text(left, top, 'no keywords', verticalalignment='top')
txt.set_bbox(dict(facecolor='k', alpha=0.1))
ax = fig.add_subplot(2, 2, 2)
sm.graphics.qqplot(res, line='s', ax=ax)
top = ax.get_ylim()[1] * 0.75
txt = ax.text(left, top, "line='s'", verticalalignment='top')
def hedge_ratio(Y, X):
# Look into using Kalman Filter to calculate the hedge ratio
X = sm.add_constant(X)
model = sm.OLS(Y, X).fit()
return model.params[1]
import matplotlib as mpl
from pandas import DataFrame, Series
import statsmodels.api as sm
import statsmodels.tsa.api as tsa
from statsmodels.tsa.arima_process import arma_generate_sample
data = sm.datasets.stackloss.load(as_pandas=False)
X = DataFrame(data.exog, columns=data.exog_name)
X['intercept'] = 1.
Y = Series(data.endog)
#Example: OLS
model = sm.OLS(Y, X)
results = model.fit()
print(results.summary())
print(results.params)
print(results.cov_params())
infl = results.get_influence()
print(infl.summary_table())
#raise
#Example RLM
huber_t = sm.RLM(Y, X, M=sm.robust.norms.HuberT())
hub_results = huber_t.fit()
print(hub_results.params)
print(hub_results.bcov_scaled)
civs = [civ for civ in civs if civ[0] not in ivs]
reg_covars = []
for var in self.model.graph.predecessors(X):
if var in self.model.observed:
reg_covars.append(var)
# Get CIV conditionals
civ_conditionals = []
for civ in civs:
civ_conditionals.extend(civ[1])
# First stage regression.
params = (
sm.OLS(data.loc[:, X], data.loc[:, reg_covars + civ_conditionals])
.fit()
.params
)
data["X_pred"] = np.zeros(data.shape[0])
for var in reg_covars:
data.X_pred += params[var] * data.loc[:, var]
summary = sm.OLS(
data.loc[:, Y], data.loc[:, ["X_pred"] + civ_conditionals]
).fit()
return summary.params["X_pred"], summary
'gold',
'jpy',
'cad'])
# In[6]:
#create r squared bar charts
var=locals()
for i in df.columns:
if i!='cad':
x=sm.add_constant(df[i])
y=df['cad']
m=sm.OLS(y,x).fit()
var[str(i)]=m.rsquared
ax=plt.figure(figsize=(10,5)).add_subplot(111)
ax.spines['top'].set_visible(False)
ax.spines['right'].set_visible(False)
width=0.7
colorlist=['#9499a6','#9499a6','#9499a6','#9499a6',
'#9499a6','#9499a6','#9499a6','#582a20',
'#be7052','#f2c083','#9499a6','#9499a6']
temp=list(df.columns)
for i in temp:
if i!='cad':
plt.bar(temp.index(i)+width,
var[str(i)],width=width,label=i,
def _get_start(self):
# Use OLS to get starting values for mean structure parameters
model = sm.OLS(self.endog, self.exog)
result = model.fit()
m = self.exog_scale.shape[1] + self.exog_smooth.shape[1]
m += self.exog_noise.shape[1]
return np.concatenate((result.params, np.zeros(m)))
"""Generalized Least Squares
"""
import statsmodels.api as sm
data = sm.datasets.longley.load()
data.exog = sm.add_constant(data.exog)
# The Longley dataset is a time series dataset
# Let's assume that the data is heteroskedastic and that we know
# the nature of the heteroskedasticity. We can then define
# `sigma` and use it to give us a GLS model
# First we will obtain the residuals from an OLS fit
ols_resid = sm.OLS(data.endog, data.exog).fit().resid
# Assume that the error terms follow an AR(1) process with a trend
# resid[i] = beta_0 + rho*resid[i-1] + e[i]
# where e ~ N(0,some_sigma**2)
# and that rho is simply the correlation of the residuals
# a consistent estimator for rho is to regress the residuals
# on the lagged residuals
resid_fit = sm.OLS(ols_resid[1:], sm.add_constant(ols_resid[:-1])).fit()
print resid_fit.tvalues[1]
print resid_fit.pvalues[1]
# While we don't have strong evidence that the errors follow an AR(1)
# process we continue
rho = resid_fit.params[1]
"""
Weighted Least Squares
example is extended to look at the meaning of rsquared in WLS,
at outliers, compares with RLM and a short bootstrap
"""
from __future__ import print_function
import numpy as np
import statsmodels.api as sm
import matplotlib.pyplot as plt
data = sm.datasets.ccard.load()
data.exog = sm.add_constant(data.exog, prepend=False)
ols_fit = sm.OLS(data.endog, data.exog).fit()
# perhaps the residuals from this fit depend on the square of income
incomesq = data.exog[:,2]
plt.scatter(incomesq, ols_fit.resid)
#@savefig wls_resid_check.png
plt.grid()
# If we think that the variance is proportional to income**2
# we would want to weight the regression by income
# the weights argument in WLS weights the regression by its square root
# and since income enters the equation, if we have income/income
# it becomes the constant, so we would want to perform
# this type of regression without an explicit constant in the design
#..data.exog = data.exog[:,:-1]
else:
genes_list = self.refgen.bootstrap_candidate_genes(
locus_list, flank_limit=flank_limit, chain=True,
include_parent_locus=True
)
self.log("Found {} candidate genes", len(genes_list))
# Get global and local degree for candidates
gdegree = self.global_degree(genes_list, trans_locus_only=True)
ldegree = self.local_degree(genes_list, trans_locus_only=True)
# Merge the columns
degree = ldegree.merge(gdegree,left_index=True,right_index=True)
degree.columns = ['local', 'global']
degree = degree.sort_values(by='global')
if include_regression:
# Add the regression lines
ols = sm.OLS(degree['local'], degree['global']).fit()
degree['resid'] = ols.resid
degree['fitted'] = ols.fittedvalues
degree = degree.sort_values(by='resid',ascending=False)
if iter_name is not None:
degree['iter_name'] = iter_name
return degree