Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def mytest(*args):
"""
Hypothesis test for test_self_defined_statistical_tests
"""
mytest.__name__ = "Test name"
_, pval = stats.ks_2samp(*args)
return pval
def test_scipy_2p(self):
# carry out fit
S = bl.Study()
S.loadData(np.array([1, 2, 3, 4, 5]))
L = bl.om.SciPy(scipy.stats.norm, 'loc', bl.cint(0, 7, 200), 'scale', bl.oint(0, 1, 200))
S.setOM(L)
S.setTM(bl.tm.Static())
S.fit()
# test model evidence value
np.testing.assert_almost_equal(S.logEvidence, -13.663836264357225, decimal=5,
err_msg='Erroneous log-evidence value.')
usedJdayHours=[]
for c in range(numConDat):
condat,usedJdayHours=self._loadRandomContinousData(jdays,filt,usedJdayHours,LTATime,STATime,staltalimit)
#condat=self._loadContinousData(a[1],b[1],jdays)
MPcon=self._multiplex(condat,Nc)
CCmat[c]=self._MPXCorr(MPcon,MPtemFD,reqlen,sum_nt,MPtem,Nc)
CCs=np.fromiter(itertools.chain.from_iterable(CCmat), dtype=np.float64)
#deb(CCs)
#deb([CCs,template]) #start here, figure out distribution
outdict={}
outdict['bins']=histBins
outdict['hist']=np.histogram(CCs,bins=histBins)[0]
#results[a[0]]['normdist']=scipy.stats.norm.fit(CCs)
betaparams=scipy.stats.beta.fit((CCs+1)/2,floc=0,fscale=1) # transformed beta
outdict['betadist']=betaparams # enforce hard limits on detection statistic
normparams=scipy.stats.norm.fit(CCs)
outdict['normparams']=normparams
df[b[1].NETWORK+'.'+b[1].STATION][a[1].NAME]=outdict
result=result.append(df)
self.result=result
if saveit:
result.to_pickle(filename)
#pickle.dump(self,open(filename,'wb'))
# Assume prior p(w) = N(w | 0, alpha*I), alpha := variance
# Precisions, hyperparameters
alpha = 2 # for p(w), the prior
beta = 2 # for p(t|y), the noisy observation
y_mean = np.zeros(M)
y_cov = 1/alpha * Phi @ Phi.T # Gram matrix
K = y_cov
p_y = st.multivariate_normal(mean=y_mean, cov=y_cov, allow_singular=True)
# with noise
t_mean = np.zeros(M)
t_cov = y_cov + 1/beta*np.eye(M)
C = t_cov
p_t = st.multivariate_normal(mean=y_mean, cov=t_cov, allow_singular=True)
# Predictive distribution
# -----------------------
N = 100 # num of data points to predict
X_prime = np.linspace(0, 20, num=N)
# Kernel matrix of X_prime and observation X
K_ = np.zeros([N, M])
for i, x in enumerate(X_prime):
K_[i] = gaussian_kernel(x)
K_ = K_.T # K_: MxN
# Gram matrix of X_prime
K__ = np.zeros([N, N])
for i, x in enumerate(X_prime):
output = []
for i, list1 in enumerate(violin_data):
for j, list2 in enumerate(violin_data):
if j <= i or len(list1) < 2 or len(list2) < 2:
continue
list1set = set(list1)
if len(list1set) == 1 and list1set == set(list2):
statistic = 'NA'
pvalue = 1
else:
if compare_test == 'mannwhitneyu':
statistic, pvalue = scipy.stats.mannwhitneyu(list1, list2, alternative='two-sided')
elif compare_test == 'ks_2samp':
statistic, pvalue = scipy.stats.ks_2samp(list1, list2)
else:
raise Error('Test "' + compare_test + '" not recognised. Cannot continue')
effect_size = abs(scipy.stats.norm.ppf(pvalue) / math.sqrt(len(list1) + len(list2)))
significant = 'yes' if pvalue < p_cutoff else 'no'
output.append((columns[i], columns[j], len(list1), len(list2), pvalue, significant, effect_size))
output.sort(key=lambda x: x[4])
with open(outfile, 'w') as f:
print('Combination1', 'Combination2', 'Size1', 'Size2', 'p-value', 'significant', 'effect_size', 'corrected_p-value', 'corrected_significant', 'corrected_effect_size', sep='\t', file=f)
for x in output:
corrected_p = min(1, len(output) * x[4])
corrected_significant = 'yes' if corrected_p < p_cutoff else 'no'
try:
f = h5py.File(params_file, 'r')
except:
raise ValueError('File not found.')
if params is not None:
if not isinstance(params, list):
params = [params]
for p in params:
if p not in f.keys():
raise ValueError('Parameter {} is not in {}'.format(p, params_file))
else:
params = [str(k) for k in f.keys()]
params_values = {p:f[p][:] for p in params}
f.close()
values = numpy.vstack((params_values[p] for p in params))
return params, scipy.stats.gaussian_kde(values)
else:
conv_params[:, i] = stats.triang.ppf(
limited_params[:, i], c=b2, scale=b1, loc=0)
elif dists[i] == 'unif':
if b1 >= b2:
raise ValueError('''Uniform distribution: lower bound
must be less than upper bound''')
else:
conv_params[:, i] = limited_params[:, i] * (b2 - b1) + b1
elif dists[i] == 'norm':
if b2 <= 0:
raise ValueError('''Normal distribution: stdev must be > 0''')
else:
conv_params[:, i] = stats.norm.ppf(
limited_params[:, i], loc=b1, scale=b2)
# lognormal distribution (ln-space, not base-10)
# paramters are ln-space mean and standard deviation
elif dists[i] == 'lognorm':
# checking for valid parameters
if b2 <= 0:
raise ValueError(
'''Lognormal distribution: stdev must be > 0''')
else:
conv_params[:, i] = np.exp(
stats.norm.ppf(limited_params[:, i], loc=b1, scale=b2))
else:
valid_dists = ['unif', 'triang', 'norm', 'lognorm']
raise ValueError('Distributions: choose one of %s' %
:param table: Table supported by arcpy.
:param data_field: Field containing the data values.
:param zscore_field: Field to be populated with the Z-Score.
:param outlier_percent_threshold:
:return:
"""
# sql query to exclude records with null values or complete outliers
sql_query = '{0} IS NOT NULL AND {1} <= {2} AND {3} >= -{4}'.format(
data_field, data_field, outlier_percent_threshold, data_field, outlier_percent_threshold
)
# create the array of values from the data field
data_list = [row[0] for row in arcpy.da.SearchCursor(table, data_field, sql_query)]
# get a corresponding list of Z-Scores
zscore_list = list(scipy.stats.zscore(numpy.array(data_list)))
# create an update cursor to revise field values
with arcpy.da.UpdateCursor(table, zscore_field, sql_query) as update_cursor:
# iterate the rows and populate the zscore
for index, row in enumerate(update_cursor):
# populate the zscore field with the corresponding zscore value
row[0] = zscore_list[index]
# update the row
update_cursor.updateRow(row)
def __call__(
self,
x: dict,
x_0: dict,
t: int = None,
par: dict = None) -> float:
x = np.array(_arr(x, self.keys), dtype=int)
x_0 = np.array(_arr(x_0, self.keys), dtype=int)
# compute p
p = self.p if not callable(self.p) else self.p(par)
if self.ret_scale == SCALE_LIN:
ret = np.prod(sp.stats.nbinom.pmf(k=x_0, n=x, p=p))
else: # self.ret_scale == SCALE_LOG
ret = np.sum(sp.stats.nbinom.logpmf(k=x_0, n=x, p=p))
return ret
def doLogLikelihoodTest(complex_ll, complex_np,
simple_ll, simple_np,
significance_threshold=0.05):
"""perform log-likelihood test between model1 and model2.
"""
assert complex_ll >= simple_ll, "log likelihood of complex model smaller than for simple model: %f > %f" % (
complex_ll, simple_ll)
chi = 2 * (complex_ll - simple_ll)
df = complex_np - simple_np
if df <= 0:
raise ValueError("difference of degrees of freedom not larger than 0")
p = scipy.stats.chisqprob(chi, df)
l = LogLikelihoodTest()
l.mComplexLogLikelihood = complex_ll
l.mSimpleLogLikelihood = simple_ll
l.mComplexNumParameters = complex_np
l.mSimpleNumParameters = simple_np
l.mSignificanceThreshold = significance_threshold
l.mProbability = p
l.mChiSquaredValue = chi
l.mDegreesFreedom = df
if p < significance_threshold:
l.mPassed = True
else:
l.mPassed = False