Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
--------
Compute the Cochran Q test for repeated measurements.
>>> from pingouin import cochran, read_dataset
>>> df = read_dataset('cochran')
>>> cochran(data=df, dv='Energetic', within='Time', subject='Subject')
Source dof Q p-unc
cochran Time 2 6.706 0.034981
"""
# Check data
_check_dataframe(dv=dv, within=within, data=data, subject=subject,
effects='within')
# Remove NaN
if data[dv].isnull().any():
data = remove_rm_na(dv=dv, within=within, subject=subject,
data=data[[subject, within, dv]])
# Groupby and extract size
grp = data.groupby(within)[dv]
grp_s = data.groupby(subject)[dv]
k = data[within].nunique()
dof = k - 1
# n = grp.count().unique()[0]
# Q statistic and p-value
q = (dof * (k * np.sum(grp.sum()**2) - grp.sum().sum()**2)) / \
(k * grp.sum().sum() - np.sum(grp_s.sum()**2))
p_unc = scipy.stats.chi2.sf(q, dof)
# Create output dataframe
stats = pd.DataFrame({'Source': within,
'ddof2' : Degrees of freedom (denominator)
'F' : F-value
'p-unc' : Uncorrected p-value
'np2' : Partial eta-square effect size
'eps' : Greenhouse-Geisser epsilon factor (= index of sphericity)
'p-GG-corr' : Greenhouse-Geisser corrected p-value
"""
a, b = within
# Validate the dataframe
_check_dataframe(dv=dv, within=within, data=data, subject=subject,
effects='within')
# Remove NaN
if data[[subject, a, b, dv]].isnull().any().any():
data = remove_rm_na(dv=dv, subject=subject, within=[a, b],
data=data[[subject, a, b, dv]])
# Collapse to the mean (that this is also done in remove_rm_na)
data = data.groupby([subject, a, b]).mean().reset_index()
assert not data[a].isnull().any(), 'Cannot have NaN in %s' % a
assert not data[b].isnull().any(), 'Cannot have NaN in %s' % b
assert not data[subject].isnull().any(), 'Cannot have NaN in %s' % subject
# Group sizes and grandmean
n_a = data[a].nunique()
n_b = data[b].nunique()
n_s = data[subject].nunique()
mu = data[dv].mean()
# Groupby means
assert data.shape[0] > 2, 'Data must have at least 3 rows.'
assert data.shape[1] > 1, 'Data must contain at least two columns.'
data['Subj'] = np.arange(data.shape[0])
data = data.melt(id_vars='Subj', var_name='Within', value_name='DV')
subject, within, dv = 'Subj', 'Within', 'DV'
# Check dataframe
_check_dataframe(dv=dv, within=within, data=data, subject=subject,
effects='within')
# Collapse to the mean
data = data.groupby([subject, within]).mean().reset_index()
# Remove NaN
if data[dv].isnull().any():
data = remove_rm_na(dv=dv, within=within, subject=subject,
data=data[[subject, within, dv]])
assert not data[within].isnull().any(), 'Cannot have NaN in `within`.'
assert not data[subject].isnull().any(), 'Cannot have NaN in `subject`.'
# Groupby
grp_with = data.groupby(within)[dv]
rm = list(data[within].unique())
n_rm = len(rm)
n_obs = int(data.groupby(within)[dv].count().max())
grandmean = data[dv].mean()
# Calculate sums of squares
sstime = ((grp_with.mean() - grandmean)**2 * grp_with.count()).sum()
sswithin = grp_with.apply(lambda x: (x - x.mean())**2).sum()
grp_subj = data.groupby(subject)[dv]
sssubj = n_rm * np.sum((grp_subj.mean() - grandmean)**2)
>>> df = read_dataset('rm_anova')
>>> friedman(data=df, dv='DesireToKill', within='Disgustingness',
... subject='Subject')
Source ddof1 Q p-unc
Friedman Disgustingness 1 9.228 0.002384
"""
# Check data
_check_dataframe(dv=dv, within=within, data=data, subject=subject,
effects='within')
# Collapse to the mean
data = data.groupby([subject, within]).mean().reset_index()
# Remove NaN
if data[dv].isnull().any():
data = remove_rm_na(dv=dv, within=within, subject=subject,
data=data[[subject, within, dv]])
# Extract number of groups and total sample size
grp = data.groupby(within)[dv]
rm = list(data[within].unique())
k = len(rm)
X = np.array([grp.get_group(r).values for r in rm]).T
n = X.shape[0]
# Rank per subject
ranked = np.zeros(X.shape)
for i in range(n):
ranked[i] = scipy.stats.rankdata(X[i, :])
ssbn = (ranked.sum(axis=0)**2).sum()
>>> aov
Source SS DF1 DF2 MS F p-unc np2 eps
0 Group 5.460 1 58 5.460 5.052 0.028420 0.080 -
1 Time 7.628 2 116 3.814 4.027 0.020373 0.065 0.999
2 Interaction 5.168 2 116 2.584 2.728 0.069530 0.045 -
"""
# Check data
_check_dataframe(dv=dv, within=within, between=between, data=data,
subject=subject, effects='interaction')
# Collapse to the mean
data = data.groupby([subject, within, between]).mean().reset_index()
# Remove NaN
if data[dv].isnull().any():
data = remove_rm_na(dv=dv, within=within, subject=subject,
data=data[[subject, within, between, dv]])
# SUMS OF SQUARES
grandmean = data[dv].mean()
# Extract main effects of time and between
mtime = rm_anova(dv=dv, within=within, subject=subject, data=data,
correction=correction, detailed=True)
mbetw = anova(dv=dv, between=between, data=data, detailed=True)
# Extract SS total, residuals and interactions
grp = data.groupby([between, within])[dv]
sstotal = grp.apply(lambda x: (x - grandmean)**2).sum()
# sst = residuals within + residuals between
sst = grp.apply(lambda x: (x - x.mean())**2).sum()
# Interaction
ssinter = sstotal - (sst + mtime.at[0, 'SS'] + mbetw.at[0, 'SS'])
sswg = mtime.at[1, 'SS'] - ssinter