Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
The time series vector.
Returns
-------
pval : float
The computed P-value of the test.
sig : bool
Whether the P-value is significant at the ``alpha`` level.
More directly, whether to difference the time series.
"""
if not self._base_case(x):
return np.nan, False
# ensure vector
x = check_endog(x, dtype=DTYPE)
# embed the vector. This is some funkiness that goes on in the R
# code... basically, make a matrix where the column (rows if not T)
# are lagged windows of x
z = self._embed(x, 2) # Same as R t(embed(x, k))
yt = z[0, :]
yt1 = z[1, :] # type: np.ndarray
# fit a linear model to a predictor matrix
n = yt.shape[0]
tt = (np.arange(n) + 1) - (n / 2.0)
X = np.array([np.ones(n), tt, yt1]).T
res = LinearRegression().fit(X, yt) # lm(yt ~ 1 + tt + yt1)
coef = res.coef_
# check for singularities - do we want to do this??? in the R code,
critical values::
(0.4617146, 0.7479655, 1.0007818,
1.2375350, 1.4625240, 1.6920200,
1.9043096, 2.1169602, 2.3268562,
2.5406922, 2.7391007)
For different values of ``m``, the CH statistic is compared
to a different critical value, and returns 1 if the computed
statistic is greater than the critical value, or 0 if not.
"""
if not self._base_case(x):
return 0
# ensure vector
x = check_endog(x, dtype=DTYPE)
n = x.shape[0]
m = int(self.m)
if n < 2 * m + 5:
return 0
chstat = self._sd_test(x, m)
if m <= 12:
return int(chstat > self.crit_vals[m - 2]) # R does m - 1...
if m == 24:
return int(chstat > 5.098624)
if m == 52:
return int(chstat > 10.341416)
if m == 365:
-------
d : int
The estimated differencing term. This is the maximum value of ``d``
such that ``d <= max_d`` and the time series is judged stationary.
If the time series is constant, will return 0.
References
----------
.. [1] R's auto_arima ndiffs function: https://bit.ly/2Bu8CHN
"""
if max_d <= 0:
raise ValueError('max_d must be a positive integer')
# get the test
testfunc = get_callable(test, VALID_TESTS)(alpha, **kwargs).should_diff
x = check_endog(x, dtype=DTYPE, copy=False)
# base case, if constant return 0
d = 0
if is_constant(x):
return d
# get initial diff
pval, dodiff = testfunc(x)
# if initially NaN, return 0
if np.isnan(pval):
return 0 # (d is zero, but this is more explicit to the reader)
# Begin loop.
while dodiff and d < max_d:
d += 1
If the ``ties`` parameter is explicitly set to 'ordered' then order
is already assumed. Otherwise, the removal process will happen.
Parameters
----------
x : array-like, shape=(n_samples,)
The x vector.
y : array-like, shape=(n_samples,)
The y vector.
ties : str
One of {'ordered', 'mean'}, handles the ties.
"""
x, y = [
check_endog(arr, dtype=DTYPE)
for arr in (x, y)
]
nx = x.shape[0]
if nx != y.shape[0]:
raise ValueError('array dim mismatch: %i != %i' % (nx, y.shape[0]))
# manipulate x if needed. if ties is 'ordered' we assume that x is
# already ordered and everything has been handled already...
if ties != 'ordered':
o = np.argsort(x)
# keep ordered with one another
x = x[o]
y = y[o]
x : array-like, shape=(n_samples,)
The time series vector.
Returns
-------
D : int
The seasonal differencing term. For different values of ``m``,
the OCSB statistic is compared to an estimated critical value, and
returns 1 if the computed statistic is greater than the critical
value, or 0 if not.
"""
if not self._base_case(x):
return 0
# ensure vector
x = check_endog(x, dtype=DTYPE)
# Get the critical value for m
stat = self._compute_test_statistic(x)
crit_val = self._calc_ocsb_crit_val(self.m)
return int(stat > crit_val)
The time series vector.
Returns
-------
pval : float
The computed P-value of the test.
sig : bool
Whether the P-value is significant at the ``alpha`` level.
More directly, whether to difference the time series.
"""
if not self._base_case(x):
return np.nan, False
# ensure vector
x = check_endog(x, dtype=DTYPE)
# if k is none...
k = self.k
if k is None:
k = np.trunc(np.power(x.shape[0] - 1, 1 / 3.0))
# See [2] for the R source. This is L153 - L160
k = int(k) + 1
y = diff(x) # diff(as.vector(x, mode='double'))
n = y.shape[0]
z = self._embed(y, k).T # Same as R embed(x, k)
# Compute ordinary least squares
res = self._ols(x, y, z, k)
STAT = self._ols_std_error(res)
Note that the CHTest is very slow for large data.
Returns
-------
D : int
The estimated seasonal differencing term. This is the maximum value
of ``D`` such that ``D <= max_D`` and the time series is judged
seasonally stationary. If the time series is constant, will return 0.
"""
if max_D <= 0:
raise ValueError('max_D must be a positive integer')
# get the test - this validates m internally
testfunc = get_callable(test, VALID_STESTS)(m, **kwargs)\
.estimate_seasonal_differencing_term
x = check_endog(x, dtype=DTYPE, copy=False)
if is_constant(x):
return 0
D = 0
dodiff = testfunc(x)
while dodiff == 1 and D < max_D:
D += 1
x = diff(x, lag=m)
if is_constant(x):
return D
dodiff = testfunc(x)
return D
The time series vector.
Returns
-------
pval : float
The computed P-value of the test.
sig : bool
Whether the P-value is significant at the ``alpha`` level.
More directly, whether to difference the time series.
"""
if not self._base_case(x):
return np.nan, False
# ensure vector
x = check_endog(x, dtype=DTYPE)
n = x.shape[0]
# check on status of null
null = self.null
# fit a model on an arange to determine the residuals
if null == 'trend':
t = np.arange(n).reshape(n, 1)
# these numbers came out of the R code.. I've found 0 doc for these
table = c(0.216, 0.176, 0.146, 0.119)
elif null == 'level':
t = np.ones(n).reshape(n, 1)
# these numbers came out of the R code.. I've found 0 doc for these
table = c(0.739, 0.574, 0.463, 0.347)