How to use the statsmodels.api.datasets function in statsmodels

To help you get started, we’ve selected a few statsmodels examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

statsmodels / statsmodels / examples / python / glm.py View on Github

# ## GLM: Gamma for proportional count response
#
# ### Load data
#
#  In the example above, we printed the ``NOTE`` attribute to learn about
# the
#  Star98 dataset. statsmodels datasets ships with other useful
# information. For
#  example:

print(sm.datasets.scotland.DESCRLONG)

#  Load the data and add a constant to the exogenous variables:

data2 = sm.datasets.scotland.load()
data2.exog = sm.add_constant(data2.exog, prepend=False)
print(data2.exog[:5, :])
print(data2.endog[:5])

# ### Fit and summary

glm_gamma = sm.GLM(data2.endog, data2.exog, family=sm.families.Gamma())
glm_results = glm_gamma.fit()
print(glm_results.summary())

# ## GLM: Gaussian distribution with a noncanonical link
#
# ### Artificial data

nobs2 = 100
x = np.arange(nobs2)

jseabold / statsmodels-tutorial / generic_mle.py View on Github

# 3.0

# 

import numpy as np
from scipy import stats
import statsmodels.api as sm
from statsmodels.base.model import GenericLikelihoodModel

# 

print sm.datasets.spector.NOTE

# 

data = sm.datasets.spector.load_pandas()
exog = sm.add_constant(data.exog, prepend=True)
endog = data.endog

# 

sm_probit = sm.Probit(endog, exog).fit()

# 

# * To create your own Likelihood Model, you just need to overwrite the loglike method.

# 

class MyProbit(GenericLikelihoodModel):
    def loglike(self, params):
        exog = self.exog

statsmodels / statsmodels / examples / python / statespace_varmax.py View on Github

#
# This is a brief introduction notebook to VARMAX models in statsmodels.
# The VARMAX model is generically specified as:
# $$
# y_t = \nu + A_1 y_{t-1} + \dots + A_p y_{t-p} + B x_t + \epsilon_t +
# M_1 \epsilon_{t-1} + \dots M_q \epsilon_{t-q}
# $$
#
# where $y_t$ is a $\text{k_endog} \times 1$ vector.

import numpy as np
import pandas as pd
import statsmodels.api as sm
import matplotlib.pyplot as plt

dta = sm.datasets.webuse('lutkepohl2', 'https://www.stata-press.com/data/r12/')
dta.index = dta.qtr
endog = dta.loc['1960-04-01':'1978-10-01',
                ['dln_inv', 'dln_inc', 'dln_consump']]

# ## Model specification
#
# The `VARMAX` class in statsmodels allows estimation of VAR, VMA, and
# VARMA models (through the `order` argument), optionally with a constant
# term (via the `trend` argument). Exogenous regressors may also be included
# (as usual in statsmodels, by the `exog` argument), and in this way a time
# trend may be added. Finally, the class allows measurement error (via the
# `measurement_error` argument) and allows specifying either a diagonal or
# unstructured innovation covariance matrix (via the `error_cov_type`
# argument).

# ## Example 1: VAR

statsmodels / statsmodels / docs / source / plots / graphics_functional_hdrboxplot.py View on Github

# coding: utf-8

#Load the El Nino dataset.  Consists of 60 years worth of Pacific Ocean sea
#surface temperature data.

import numpy as np
import matplotlib.pyplot as plt
import statsmodels.api as sm
data = sm.datasets.elnino.load(as_pandas=False)

#Create a HDR functional boxplot. We see that the years 1982-83 and 1997-98 are
#outliers; these are the years where El Nino (a climate pattern
#characterized by warming up of the sea surface and higher air pressures)
#occurred with unusual intensity.

fig = plt.figure()
ax = fig.add_subplot(111)
fig, res = sm.graphics.hdrboxplot(data.raw_data[:, 1:],
                                  labels=data.raw_data[:, 0].astype(int),
                                  ax=ax)

ax.plot([0, 10], [25, 25])
ax.set_xlabel("Month of the year")
ax.set_ylabel("Sea surface temperature (C)")
ax.set_xticks(np.arange(13, step=3) - 1)

nyoka-pmml / nyoka / examples / statsmodels / exponential_smoothing / stats_models / docs / source / plots / load_macrodata.py View on Github

import statsmodels.api as sm
import pandas as pd
dta = sm.datasets.macrodata.load_pandas().data
dates = sm.tsa.datetools.dates_from_range('1959Q1', '2009Q3')
index = pd.DatetimeIndex(dates)
dta.set_index(index, inplace=True)

jseabold / statsmodels-tutorial / discrete_choice.py View on Github

# 

# A survey of women only was conducted in 1974 by *Redbook* asking about extramarital affairs.

# 

import numpy as np
from scipy import stats
import matplotlib.pyplot as plt
import statsmodels.api as sm
from statsmodels.formula.api import logit, probit, poisson, ols

# 

print sm.datasets.fair.SOURCE

# 

print sm.datasets.fair.NOTE

# 

dta = sm.datasets.fair.load_pandas().data

# 

dta['affair'] = (dta['affairs'] &gt; 0).astype(float)
print dta.head(10)

#

statsmodels / statsmodels / examples / python / recursive_ls.py View on Github

# the parameter vectors, and can be constructed using the formula interface.

import numpy as np
import pandas as pd
import statsmodels.api as sm
import matplotlib.pyplot as plt
from pandas_datareader.data import DataReader

np.set_printoptions(suppress=True)

# ## Example 1: Copper
#
# We first consider parameter stability in the copper dataset (description
# below).

print(sm.datasets.copper.DESCRLONG)

dta = sm.datasets.copper.load_pandas().data
dta.index = pd.date_range('1951-01-01', '1975-01-01', freq='AS')
endog = dta['WORLDCONSUMPTION']

# To the regressors in the dataset, we add a column of ones for an
# intercept
exog = sm.add_constant(
    dta[['COPPERPRICE', 'INCOMEINDEX', 'ALUMPRICE', 'INVENTORYINDEX']])

# First, construct and fit the model, and print a summary. Although the
# `RLS` model computes the regression parameters recursively, so there are
# as many estimates as there are datapoints, the summary table only presents
# the regression parameters estimated on the entire sample; except for small
# effects from initialization of the recursions, these estimates are
# equivalent to OLS estimates.

DUanalytics / pyAnalytics / 42-dataIE / data-pd_csv_excel.py View on Github

#Python write to file
#csv
#excel
#googlesheets


import statsmodels.api as sm
iris = sm.datasets.get_rdataset(dataname='iris', package='datasets')
iris.data
iris.data.head()
df1 = iris.data
type(df1)
#https://vincentarelbundock.github.io/Rdatasets/datasets.html
mtcars = sm.datasets.get_rdataset(dataname='mtcars', package='datasets')
mtcars.data
mtcars.data.head()
df2 = mtcars.data
type(df2)

#
import pandas as pd
#check the cwd. file will get saved there
df2.to_excel('exceloutput.xlsx')
#save only when file is not opened
df1.to_excel('exceloutput.xlsx','iris')
df2.to_excel('exceloutput.xlsx', engine='xlsxwriter')
df1.to_excel("E:/pywork/pydata/exceloutput2.xlsx",'iris')
#see direction of / and check if folders exist

blaze / blaze / samples / ooc-groupby.py View on Github

def statsmodel_stream(stream):
    import statsmodels.api as sm
    data = getattr(sm.datasets, stream)
    f = open(data.PATH, 'rb')
    if stream == 'randhie':
        # For a description of this dataset, see:
        # http://statsmodels.sourceforge.net/devel/datasets/generated/randhie.html
        f.readline()   # read out the headers line
        dtypes = ('{mdvis: string, lncoins: float32, idp: int32,'
                  ' lpi:float32, fmde: float32, physlm: float32,'
                  ' disea: float32, hlthg: int32, hlthf: int32,'
                  ' hlthp: int32}')
    else:
        raise NotImplementedError(
            "Importing this dataset has not been implemented yet")

    sreader = csv.reader(f)
    dtype = ndt.type(dtypes)
    return sreader, dtype

statsmodels / statsmodels / statsmodels / sandbox / tsa / examples / example_var.py View on Github

"""
Look at some macro plots, then do some VARs and IRFs.
"""

import numpy as np
import statsmodels.api as sm
import scikits.timeseries as ts
import scikits.timeseries.lib.plotlib as tplt

data = sm.datasets.macrodata.load(as_pandas=False)
data = data.data


### Create Timeseries Representations of a few vars

dates = ts.date_array(start_date=ts.Date('Q', year=1959, quarter=1),
    end_date=ts.Date('Q', year=2009, quarter=3))

ts_data = data[['realgdp','realcons','cpi']].view(float).reshape(-1,3)
ts_data = np.column_stack((ts_data, (1 - data['unemp']/100) * data['pop']))
ts_series = ts.time_series(ts_data, dates)


fig = tplt.tsfigure()
fsp = fig.add_tsplot(221)
fsp.tsplot(ts_series[:,0],'-')

How to use the statsmodels.api.datasets function in statsmodels

To help you get started, we’ve selected a few statsmodels examples, based on popular ways it is used in public projects.

statsmodels

Package Health Score

Popular statsmodels functions

Similar packages