Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
# ## GLM: Gamma for proportional count response
#
# ### Load data
#
# In the example above, we printed the ``NOTE`` attribute to learn about
# the
# Star98 dataset. statsmodels datasets ships with other useful
# information. For
# example:
print(sm.datasets.scotland.DESCRLONG)
# Load the data and add a constant to the exogenous variables:
data2 = sm.datasets.scotland.load()
data2.exog = sm.add_constant(data2.exog, prepend=False)
print(data2.exog[:5, :])
print(data2.endog[:5])
# ### Fit and summary
glm_gamma = sm.GLM(data2.endog, data2.exog, family=sm.families.Gamma())
glm_results = glm_gamma.fit()
print(glm_results.summary())
# ## GLM: Gaussian distribution with a noncanonical link
#
# ### Artificial data
nobs2 = 100
x = np.arange(nobs2)
# 3.0
#
import numpy as np
from scipy import stats
import statsmodels.api as sm
from statsmodels.base.model import GenericLikelihoodModel
#
print sm.datasets.spector.NOTE
#
data = sm.datasets.spector.load_pandas()
exog = sm.add_constant(data.exog, prepend=True)
endog = data.endog
#
sm_probit = sm.Probit(endog, exog).fit()
#
# * To create your own Likelihood Model, you just need to overwrite the loglike method.
#
class MyProbit(GenericLikelihoodModel):
def loglike(self, params):
exog = self.exog
#
# This is a brief introduction notebook to VARMAX models in statsmodels.
# The VARMAX model is generically specified as:
# $$
# y_t = \nu + A_1 y_{t-1} + \dots + A_p y_{t-p} + B x_t + \epsilon_t +
# M_1 \epsilon_{t-1} + \dots M_q \epsilon_{t-q}
# $$
#
# where $y_t$ is a $\text{k_endog} \times 1$ vector.
import numpy as np
import pandas as pd
import statsmodels.api as sm
import matplotlib.pyplot as plt
dta = sm.datasets.webuse('lutkepohl2', 'https://www.stata-press.com/data/r12/')
dta.index = dta.qtr
endog = dta.loc['1960-04-01':'1978-10-01',
['dln_inv', 'dln_inc', 'dln_consump']]
# ## Model specification
#
# The `VARMAX` class in statsmodels allows estimation of VAR, VMA, and
# VARMA models (through the `order` argument), optionally with a constant
# term (via the `trend` argument). Exogenous regressors may also be included
# (as usual in statsmodels, by the `exog` argument), and in this way a time
# trend may be added. Finally, the class allows measurement error (via the
# `measurement_error` argument) and allows specifying either a diagonal or
# unstructured innovation covariance matrix (via the `error_cov_type`
# argument).
# ## Example 1: VAR
# coding: utf-8
#Load the El Nino dataset. Consists of 60 years worth of Pacific Ocean sea
#surface temperature data.
import numpy as np
import matplotlib.pyplot as plt
import statsmodels.api as sm
data = sm.datasets.elnino.load(as_pandas=False)
#Create a HDR functional boxplot. We see that the years 1982-83 and 1997-98 are
#outliers; these are the years where El Nino (a climate pattern
#characterized by warming up of the sea surface and higher air pressures)
#occurred with unusual intensity.
fig = plt.figure()
ax = fig.add_subplot(111)
fig, res = sm.graphics.hdrboxplot(data.raw_data[:, 1:],
labels=data.raw_data[:, 0].astype(int),
ax=ax)
ax.plot([0, 10], [25, 25])
ax.set_xlabel("Month of the year")
ax.set_ylabel("Sea surface temperature (C)")
ax.set_xticks(np.arange(13, step=3) - 1)
import statsmodels.api as sm
import pandas as pd
dta = sm.datasets.macrodata.load_pandas().data
dates = sm.tsa.datetools.dates_from_range('1959Q1', '2009Q3')
index = pd.DatetimeIndex(dates)
dta.set_index(index, inplace=True)
#
# A survey of women only was conducted in 1974 by *Redbook* asking about extramarital affairs.
#
import numpy as np
from scipy import stats
import matplotlib.pyplot as plt
import statsmodels.api as sm
from statsmodels.formula.api import logit, probit, poisson, ols
#
print sm.datasets.fair.SOURCE
#
print sm.datasets.fair.NOTE
#
dta = sm.datasets.fair.load_pandas().data
#
dta['affair'] = (dta['affairs'] > 0).astype(float)
print dta.head(10)
#
# the parameter vectors, and can be constructed using the formula interface.
import numpy as np
import pandas as pd
import statsmodels.api as sm
import matplotlib.pyplot as plt
from pandas_datareader.data import DataReader
np.set_printoptions(suppress=True)
# ## Example 1: Copper
#
# We first consider parameter stability in the copper dataset (description
# below).
print(sm.datasets.copper.DESCRLONG)
dta = sm.datasets.copper.load_pandas().data
dta.index = pd.date_range('1951-01-01', '1975-01-01', freq='AS')
endog = dta['WORLDCONSUMPTION']
# To the regressors in the dataset, we add a column of ones for an
# intercept
exog = sm.add_constant(
dta[['COPPERPRICE', 'INCOMEINDEX', 'ALUMPRICE', 'INVENTORYINDEX']])
# First, construct and fit the model, and print a summary. Although the
# `RLS` model computes the regression parameters recursively, so there are
# as many estimates as there are datapoints, the summary table only presents
# the regression parameters estimated on the entire sample; except for small
# effects from initialization of the recursions, these estimates are
# equivalent to OLS estimates.
#Python write to file
#csv
#excel
#googlesheets
import statsmodels.api as sm
iris = sm.datasets.get_rdataset(dataname='iris', package='datasets')
iris.data
iris.data.head()
df1 = iris.data
type(df1)
#https://vincentarelbundock.github.io/Rdatasets/datasets.html
mtcars = sm.datasets.get_rdataset(dataname='mtcars', package='datasets')
mtcars.data
mtcars.data.head()
df2 = mtcars.data
type(df2)
#
import pandas as pd
#check the cwd. file will get saved there
df2.to_excel('exceloutput.xlsx')
#save only when file is not opened
df1.to_excel('exceloutput.xlsx','iris')
df2.to_excel('exceloutput.xlsx', engine='xlsxwriter')
df1.to_excel("E:/pywork/pydata/exceloutput2.xlsx",'iris')
#see direction of / and check if folders exist
def statsmodel_stream(stream):
import statsmodels.api as sm
data = getattr(sm.datasets, stream)
f = open(data.PATH, 'rb')
if stream == 'randhie':
# For a description of this dataset, see:
# http://statsmodels.sourceforge.net/devel/datasets/generated/randhie.html
f.readline() # read out the headers line
dtypes = ('{mdvis: string, lncoins: float32, idp: int32,'
' lpi:float32, fmde: float32, physlm: float32,'
' disea: float32, hlthg: int32, hlthf: int32,'
' hlthp: int32}')
else:
raise NotImplementedError(
"Importing this dataset has not been implemented yet")
sreader = csv.reader(f)
dtype = ndt.type(dtypes)
return sreader, dtype
"""
Look at some macro plots, then do some VARs and IRFs.
"""
import numpy as np
import statsmodels.api as sm
import scikits.timeseries as ts
import scikits.timeseries.lib.plotlib as tplt
data = sm.datasets.macrodata.load(as_pandas=False)
data = data.data
### Create Timeseries Representations of a few vars
dates = ts.date_array(start_date=ts.Date('Q', year=1959, quarter=1),
end_date=ts.Date('Q', year=2009, quarter=3))
ts_data = data[['realgdp','realcons','cpi']].view(float).reshape(-1,3)
ts_data = np.column_stack((ts_data, (1 - data['unemp']/100) * data['pop']))
ts_series = ts.time_series(ts_data, dates)
fig = tplt.tsfigure()
fsp = fig.add_tsplot(221)
fsp.tsplot(ts_series[:,0],'-')