How to use the scipy.stats function in scipy

To help you get started, we’ve selected a few scipy examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github tompollard / tableone / test_tableone.py View on Github external
def mytest(*args):
    """
    Hypothesis test for test_self_defined_statistical_tests
    """
    mytest.__name__ = "Test name"
    _, pval = stats.ks_2samp(*args)
    return pval
github christophmark / bayesloop / tests / test_observationmodels.py View on Github external
def test_scipy_2p(self):
        # carry out fit
        S = bl.Study()
        S.loadData(np.array([1, 2, 3, 4, 5]))

        L = bl.om.SciPy(scipy.stats.norm, 'loc', bl.cint(0, 7, 200), 'scale', bl.oint(0, 1, 200))

        S.setOM(L)
        S.setTM(bl.tm.Static())
        S.fit()

        # test model evidence value
        np.testing.assert_almost_equal(S.logEvidence, -13.663836264357225, decimal=5,
                                       err_msg='Erroneous log-evidence value.')
github d-chambers / Detex / detex / xcorr.py View on Github external
usedJdayHours=[]
                for c in range(numConDat):
                    condat,usedJdayHours=self._loadRandomContinousData(jdays,filt,usedJdayHours,LTATime,STATime,staltalimit)
                    #condat=self._loadContinousData(a[1],b[1],jdays)
                    MPcon=self._multiplex(condat,Nc)
                    CCmat[c]=self._MPXCorr(MPcon,MPtemFD,reqlen,sum_nt,MPtem,Nc)
                CCs=np.fromiter(itertools.chain.from_iterable(CCmat), dtype=np.float64)
                #deb(CCs)
                #deb([CCs,template]) #start here, figure out distribution
                outdict={}
                outdict['bins']=histBins
                outdict['hist']=np.histogram(CCs,bins=histBins)[0]
                #results[a[0]]['normdist']=scipy.stats.norm.fit(CCs)
                betaparams=scipy.stats.beta.fit((CCs+1)/2,floc=0,fscale=1) # transformed beta
                outdict['betadist']=betaparams # enforce hard limits on detection statistic
                normparams=scipy.stats.norm.fit(CCs)
                outdict['normparams']=normparams
                df[b[1].NETWORK+'.'+b[1].STATION][a[1].NAME]=outdict
            result=result.append(df)
        self.result=result
        if saveit:
            result.to_pickle(filename)
            #pickle.dump(self,open(filename,'wb'))
github wiseodd / probabilistic-models / models / bayesian_nonparametric / gaussian_process.py View on Github external
# Assume prior p(w) = N(w | 0, alpha*I), alpha := variance

# Precisions, hyperparameters
alpha = 2  # for p(w), the prior
beta = 2  # for p(t|y), the noisy observation

y_mean = np.zeros(M)
y_cov = 1/alpha * Phi @ Phi.T  # Gram matrix
K = y_cov
p_y = st.multivariate_normal(mean=y_mean, cov=y_cov, allow_singular=True)

# with noise
t_mean = np.zeros(M)
t_cov = y_cov + 1/beta*np.eye(M)
C = t_cov
p_t = st.multivariate_normal(mean=y_mean, cov=t_cov, allow_singular=True)


# Predictive distribution
# -----------------------
N = 100  # num of data points to predict
X_prime = np.linspace(0, 20, num=N)

# Kernel matrix of X_prime and observation X
K_ = np.zeros([N, M])
for i, x in enumerate(X_prime):
    K_[i] = gaussian_kernel(x)
K_ = K_.T  # K_: MxN

# Gram matrix of X_prime
K__ = np.zeros([N, N])
for i, x in enumerate(X_prime):
github sanger-pathogens / ariba / ariba / mic_plotter.py View on Github external
output = []

        for i, list1 in enumerate(violin_data):
            for j, list2 in enumerate(violin_data):
                if j <= i or len(list1) < 2 or len(list2) < 2:
                    continue

                list1set = set(list1)

                if len(list1set) == 1 and list1set == set(list2):
                    statistic = 'NA'
                    pvalue = 1
                else:
                    if compare_test == 'mannwhitneyu':
                        statistic, pvalue = scipy.stats.mannwhitneyu(list1, list2, alternative='two-sided')
                    elif compare_test == 'ks_2samp':
                        statistic, pvalue = scipy.stats.ks_2samp(list1, list2)
                    else:
                        raise Error('Test "' + compare_test + '" not recognised. Cannot continue')

                effect_size = abs(scipy.stats.norm.ppf(pvalue) / math.sqrt(len(list1) + len(list2)))
                significant = 'yes' if pvalue < p_cutoff else 'no'
                output.append((columns[i], columns[j], len(list1), len(list2), pvalue, significant, effect_size))

        output.sort(key=lambda x: x[4])

        with open(outfile, 'w') as f:
            print('Combination1', 'Combination2', 'Size1', 'Size2', 'p-value', 'significant', 'effect_size', 'corrected_p-value', 'corrected_significant', 'corrected_effect_size', sep='\t', file=f)
            for x in output:
                corrected_p = min(1, len(output) * x[4])
                corrected_significant = 'yes' if corrected_p < p_cutoff else 'no'
github gwastro / pycbc / pycbc / inference / distributions.py View on Github external
try:
            f = h5py.File(params_file, 'r')
        except:
            raise ValueError('File not found.')
        if params is not None:
            if not isinstance(params, list):
                params = [params]
            for p in params:
                if p not in f.keys():
                    raise ValueError('Parameter {} is not in {}'.format(p, params_file))
        else:
            params = [str(k) for k in f.keys()]
        params_values = {p:f[p][:] for p in params}
        f.close()
        values = numpy.vstack((params_values[p] for p in params))
        return params, scipy.stats.gaussian_kde(values)
github SALib / SALib / SALib / util / __init__.py View on Github external
else:
                conv_params[:, i] = stats.triang.ppf(
                    limited_params[:, i], c=b2, scale=b1, loc=0)

        elif dists[i] == 'unif':
            if b1 >= b2:
                raise ValueError('''Uniform distribution: lower bound
                       must be less than upper bound''')
            else:
                conv_params[:, i] = limited_params[:, i] * (b2 - b1) + b1

        elif dists[i] == 'norm':
            if b2 <= 0:
                raise ValueError('''Normal distribution: stdev must be > 0''')
            else:
                conv_params[:, i] = stats.norm.ppf(
                    limited_params[:, i], loc=b1, scale=b2)

        # lognormal distribution (ln-space, not base-10)
        # paramters are ln-space mean and standard deviation
        elif dists[i] == 'lognorm':
            # checking for valid parameters
            if b2 <= 0:
                raise ValueError(
                    '''Lognormal distribution: stdev must be > 0''')
            else:
                conv_params[:, i] = np.exp(
                    stats.norm.ppf(limited_params[:, i], loc=b1, scale=b2))

        else:
            valid_dists = ['unif', 'triang', 'norm', 'lognorm']
            raise ValueError('Distributions: choose one of %s' %
github knu2xs / arcgis-tablular-summary-tools / summary_utilities.py View on Github external
:param table: Table supported by arcpy.
    :param data_field: Field containing the data values.
    :param zscore_field: Field to be populated with the Z-Score.
    :param outlier_percent_threshold:
    :return:
    """
    # sql query to exclude records with null values or complete outliers
    sql_query = '{0} IS NOT NULL AND {1} <= {2} AND {3} >= -{4}'.format(
        data_field, data_field, outlier_percent_threshold, data_field, outlier_percent_threshold
    )

    # create the array of values from the data field
    data_list = [row[0] for row in arcpy.da.SearchCursor(table, data_field, sql_query)]

    # get a corresponding list of Z-Scores
    zscore_list = list(scipy.stats.zscore(numpy.array(data_list)))

    # create an update cursor to revise field values
    with arcpy.da.UpdateCursor(table, zscore_field, sql_query) as update_cursor:

        # iterate the rows and populate the zscore
        for index, row in enumerate(update_cursor):

            # populate the zscore field with the corresponding zscore value
            row[0] = zscore_list[index]

            # update the row
            update_cursor.updateRow(row)
github ICB-DCM / pyABC / pyabc / distance / kernel.py View on Github external
def __call__(
            self,
            x: dict,
            x_0: dict,
            t: int = None,
            par: dict = None) -> float:
        x = np.array(_arr(x, self.keys), dtype=int)
        x_0 = np.array(_arr(x_0, self.keys), dtype=int)

        # compute p
        p = self.p if not callable(self.p) else self.p(par)

        if self.ret_scale == SCALE_LIN:
            ret = np.prod(sp.stats.nbinom.pmf(k=x_0, n=x, p=p))
        else:  # self.ret_scale == SCALE_LOG
            ret = np.sum(sp.stats.nbinom.logpmf(k=x_0, n=x, p=p))

        return ret
github CGATOxford / CGATPipelines / CGAT / Stats.py View on Github external
def doLogLikelihoodTest(complex_ll, complex_np,
                        simple_ll, simple_np,
                        significance_threshold=0.05):
    """perform log-likelihood test between model1 and model2.
    """

    assert complex_ll >= simple_ll, "log likelihood of complex model smaller than for simple model: %f > %f" % (
        complex_ll, simple_ll)

    chi = 2 * (complex_ll - simple_ll)
    df = complex_np - simple_np

    if df <= 0:
        raise ValueError("difference of degrees of freedom not larger than 0")

    p = scipy.stats.chisqprob(chi, df)

    l = LogLikelihoodTest()

    l.mComplexLogLikelihood = complex_ll
    l.mSimpleLogLikelihood = simple_ll
    l.mComplexNumParameters = complex_np
    l.mSimpleNumParameters = simple_np
    l.mSignificanceThreshold = significance_threshold
    l.mProbability = p
    l.mChiSquaredValue = chi
    l.mDegreesFreedom = df

    if p < significance_threshold:
        l.mPassed = True
    else:
        l.mPassed = False