Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
res_o.pearson_chi2 / res_o.df_resid
# ### condensed data (unique observations with frequencies)
#
# Combining identical observations and using frequency weights to take
# into account the multiplicity of observations produces exactly the same
# results. Some results attribute will differ when we want to have
# information about the observation and not about the aggregate of all
# identical observations. For example, residuals do not take
# ``freq_weights`` into account.
glm = smf.glm(
'affairs ~ rate_marriage + age + yrs_married',
data=dc,
family=sm.families.Poisson(),
freq_weights=np.asarray(dc['freq']))
res_f = glm.fit()
print(res_f.summary())
res_f.pearson_chi2 / res_f.df_resid
# ### condensed using ``var_weights`` instead of ``freq_weights``
#
# Next, we compare ``var_weights`` to ``freq_weights``. It is a common
# practice to incorporate ``var_weights`` when the endogenous variable
# reflects averages and not identical observations.
# I do not see a theoretical reason why it produces the same results (in
# general).
#
# This produces the same results but ``df_resid`` differs the
# ``freq_weights`` example because ``var_weights`` do not change the number
covs = data_dict['covs']
# create PHOTON hyperpipe
my_pipe, metrics = setup_model()
# shuffle targets if running a permutation test
if perm_test == True:
print('PERMUTATION TEST: SHUFFLING TARGETS NOW!')
np.random.shuffle(targets)
# remove confounders from target data (age, gender, site, ICV)
if covs_out:
import statsmodels.api as sm
ols_X = covs
ols_X = sm.add_constant(ols_X)
ols_model = sm.OLS(targets, ols_X)
ols_results = ols_model.fit()
targets = np.asarray(ols_results.resid)
print('Removing covariates from targets.')
# fit PHOTON model
results = my_pipe.fit(data, targets)
results_tree = results.result_tree
# get feature importance
if getImp:
importance_scores = get_feature_importance(results=results, feature_names=snp_names, data=data, targets=targets, roiName=roiName)
else:
importance_scores = []
# TEST SET -> Test
#best_config_performance_test = results_tree.get_best_config_performance_validation_set(outer_cv_fold=1) # when outer fold is active
s = self.table.multiplerowlist
if len(s) == 0:
sub = data.index
else:
sub = data.index[s]
self.sub = sub
y,X = dmatrices(formula, data=data, return_type='dataframe')
self.X = X
self.y = y
Xf = X.ix[sub]
yf = y.ix[sub]
if est == 'ols':
#model = smf.ols(formula=formula, data=s)
model = sm.OLS(yf, Xf)
elif est == 'gls':
model = sm.GLS(y, X)
elif est == 'logit':
model = sm.Logit(y, X)
return model
# ## WLS Estimation
#
# ### Artificial data: Heteroscedasticity 2 groups
#
# Model assumptions:
#
# * Misspecification: true model is quadratic, estimate only linear
# * Independent noise/error term
# * Two groups for error variance, low and high variance groups
nsample = 50
x = np.linspace(0, 20, nsample)
X = np.column_stack((x, (x - 5)**2))
X = sm.add_constant(X)
beta = [5., 0.5, -0.01]
sig = 0.5
w = np.ones(nsample)
w[nsample * 6/10:] = 3
y_true = np.dot(X, beta)
e = np.random.normal(size=nsample)
y = y_true + sig * w * e
X = X[:,[0,1]]
# ### WLS knowing the true variance ratio of heteroscedasticity
mod_wls = sm.WLS(y, X, weights=1./w)
res_wls = mod_wls.fit()
print(res_wls.summary())
@batch_transform
def ols_transform(data, spreads):
p0 = data.price['PEP']
p1 = sm.add_constant(data.price['KO'])
beta, intercept = sm.OLS(p0, p1).fit().params
spread = (data.price['PEP'] - (beta * data.price['KO'] + intercept))[-1]
if len(spreads) > 10:
z_score = (spread - np.mean(spreads[-10:])) / np.std(spreads[-10:])
else:
z_score = np.nan
spreads.append(spread)
return z_score
def statsmodels_test():
nsample = 100
x = numpy.linspace(0, 10, 100)
X = numpy.column_stack((x, x**2))
beta = numpy.array([1, 0.1, 10])
e = numpy.random.normal(size=nsample)
X = sm.add_constant(X)
y = numpy.dot(X, beta) + e
model = sm.OLS(y, X)
results = model.fit()
print ("statsmodels tests >>>", results.summary())
model_gen=lambda y, x: sm.OLS(y, x),
**kwargs)
#Case Study on mtcars dataset in Python download data
#Download data
import statsmodels.api as sm
#https://vincentarelbundock.github.io/Rdatasets/datasets.html
dataset_mtcars = sm.datasets.get_rdataset(dataname='mtcars', package='datasets')
dataset_mtcars.data.head()
mtcars = dataset_mtcars.data
#structure
def examples_errors(self):
flags = doctest.NORMALIZE_WHITESPACE | doctest.IGNORE_EXCEPTION_DETAIL
finder = doctest.DocTestFinder()
runner = doctest.DocTestRunner(optionflags=flags)
context = {'np': numpy, 'pd': pandas, 'sm': statsmodels.api}
error_msgs = ''
for test in finder.find(self.raw_doc, self.name, globs=context):
f = StringIO()
runner.run(test, out=f.write)
error_msgs += f.getvalue()
return error_msgs
def _initialize(self, data1, data2):
try:
import statsmodels.api as sm
lowess = sm.nonparametric.lowess
except ImportError:
print("===================================")
print("Cannot import the module lowess from 'statsmodels', \nplease install the Python package 'statsmodels'")
print("===================================")
# NOTE: delta parameter is only available from statsmodels > 0.5.0
delta = (max(data1) - min(data1)) * 0.01
frac = 0.1
if len(data1) < 100:
frac = 1.0
k = 0
while k <= 10:
k += 1
# Input data is y/x -> needs switch