Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
method = 'bonferroni'
elif opts.multiTest == 'Holm':
method = 'holm'
elif opts.multiTest == 'Hochberg':
method = 'simes-hochberg'
elif opts.multiTest == 'Hommel':
method = 'hommel'
elif opts.multiTest == 'BY':
method = 'fdr_by'
elif opts.multiTest == 'TSBH':
method = 'tsbh'
else:
sys.stderr.write('ERROR: The methods for multiple test correction can only accept \'Bonferroni\', \'Holm\', \'Hochberg\', \'Hommel\', \'BH\', \'BY\' or \'TSBH\' as its input.\n')
sys.exit()
mtc = sms.stats.multicomp.multipletests(pval[idx], alpha=0.1, method=method, returnsorted=False)
padj = pval.copy()
padj[idx] = mtc[1]
data.padj = padj
return data
def correct_multi_test(self, pvalues):
mtc_dict = {'fdr_bh':'Benjamini/Hochberg (non-negative)'}
logging.info('Applying multi-test correction using the %s method' % (mtc_dict[self.multi_test_correction]) )
corrected_pvals \
= sm.multipletests(pvalues,
alpha = self.threshold,
method = self.multi_test_correction,
returnsorted = False,
is_sorted = False)[1]
return corrected_pvals
het_deficit = []
het_excess = []
with open(f) as fh:
#Skip header
next(fh)
for line in fh:
fields = line.strip().split()
snp_pos.append((fields[0], fields[1]))
pvals.append(float(fields[5]))
het_deficit.append(float(fields[6]))
het_excess.append(float(fields[7]))
fdr_bool_list, fdr_pvalue_list, alpha_S, alpha_B = \
multi_correction.multipletests(pvals, alpha=float(alpha),
method="fdr_bh")
snp_pvals = OrderedDict()
for pos, pval in zip(snp_pos, fdr_pvalue_list):
snp_pvals["-".join(pos)] = pval
with open(vcf_file) as vcf_fh, open(vcf_outfile, "w") as ofh:
for line in vcf_file:
if line.startswith("#"):
ofh.write(line)
elif line.strip() != "":
fields = line.split()
# Check pval for locus
pos = "-".join(fields[0], fields[1])
if snp_pvals[pos] <= 0.05:
ofh.write(line)
continue
else:
lefter=0
for j in range(int(h), max(rand_heights_dist)+1):
lefter+=rand_heights_dist[j]
height_to_pval[h]=lefter/float(rand_sum)
pval_list=[]
for i in obs_heights_count:
if i<1:
continue
pval_list.append(height_to_pval[i])
if len(pval_list)<=1:
return []
if correction_method==2:
qval_list=multipletests(pval_list, method='fdr_bh')[1]
else:
qval_list=[min(x*(len(set([int(y) for y in height_to_pval if y!=0]))), 1.0) for x in pval_list]
ind=0
last_height=0
for j in range(len(obs_heights_count)):
this_height=obs_heights_count[j]
if this_height<1:
last_height=0
continue
if qval_list[ind] <= pval_cutoff:
if this_height==last_height:
chr, last_start, last_end, last_strand, last_height, last_qval=tid_to_qval[-1]
tid_to_qval[-1]=[chr, last_start, tstart+j+1, strand, last_height, last_qval]
else:
tid_to_qval.append([chr, tstart+j, tstart+j+1, strand, obs_heights_count[j], qval_list[ind]]) # chr, start, end, strand, height, this_qval
chi2_fi = one_way(n_selected_term, n_term)
p_fi = special.chdtrc(1, chi2_fi)
sign_fi = np.sign(n_selected_term - np.mean(n_selected_term)).ravel() # pylint: disable=no-member
# Two-way chi-square test for specificity of activation
cells = np.array([[n_selected_term, n_selected_noterm], # pylint: disable=no-member
[n_unselected_term, n_unselected_noterm]]).T
chi2_ri = two_way(cells)
p_ri = special.chdtrc(1, chi2_ri)
sign_ri = np.sign(p_selected_g_term - p_selected_g_noterm).ravel() # pylint: disable=no-member
# Multiple comparisons correction across terms. Separately done for FI and RI.
if correction is not None:
_, p_corr_fi, _, _ = multipletests(p_fi, alpha=u, method=correction,
returnsorted=False)
_, p_corr_ri, _, _ = multipletests(p_ri, alpha=u, method=correction,
returnsorted=False)
else:
p_corr_fi = p_fi
p_corr_ri = p_ri
# Compute z-values
z_corr_fi = p_to_z(p_corr_fi, 'two') * sign_fi
z_corr_ri = p_to_z(p_corr_ri, 'two') * sign_ri
# Effect size
# est. prob. of brain state described by term finding activation in ROI
p_selected_g_term_g_prior = prior * p_selected_g_term + (1 - prior) * p_selected_g_noterm
# est. prob. of activation in ROI reflecting brain state described by term
p_term_g_selected_g_prior = p_selected_g_term * prior / p_selected_g_term_g_prior
brain_mask = path.abspath(path.expanduser(brain_mask))
try:
img = nib.load(p_file)
brain_mask = nib.load(brain_mask)
except (FileNotFoundError, nib.py3k.FileNotFoundError):
return None,None,None,None,None
data = img.get_data()
brain_mask = brain_mask.get_data()
header = img.header
affine = img.affine
shape = data.shape
data = data.flatten()
brain_mask = brain_mask.flatten()
brain_mask = brain_mask.astype(bool)
brain_data = data[brain_mask]
reject, nonzero_data, _, _ = multipletests(brain_data, p_level, method="fdr_bh")
brain_mask[brain_mask]=reject
brain_mask = brain_mask.astype(int)
mask = brain_mask.reshape(shape)
mask_map = nib.Nifti1Image(mask, affine, header)
masker = NiftiMasker(mask_img=mask_map)
try:
timecourse = masker.fit_transform(ts_file).T
betas = masker.fit_transform(beta_file).T
except ValueError:
return None,None,None,None,None
subplot_title = "\n ".join([str(substitution["subject"]),str(substitution["session"])])
timecourse = np.mean(timecourse, axis=0)
design = pd.read_csv(design_file, skiprows=5, sep="\t", header=None, index_col=False)
design = design*np.mean(betas)
event_df = pd.read_csv(event_file, sep="\t")
# calculate p-value from Fisher's exact test
intersections['a'] = total - intersections[['size1', 'size2', 'intersection']].sum(axis=1)
intersections['b'] = intersections['size1'] - intersections['intersection']
intersections['c'] = intersections['size2'] - intersections['intersection']
intersections['d'] = intersections['intersection']
for i, row in intersections[['d', 'b', 'c', 'a']].astype(int).iterrows():
odds, p = fisher_exact(
row
.values
.reshape((2, 2)),
alternative="greater")
intersections.loc[i, 'odds_ratio'] = odds
intersections.loc[i, 'p_value'] = p
# intersections['q_value'] = intersections['p_value'] * intersections.shape[0]
intersections['q_value'] = multipletests(intersections['p_value'])[1]
intersections['log_p_value'] = (-np.log10(intersections['p_value'])).fillna(0).replace(np.inf, 300)
intersections['log_q_value'] = (-np.log10(intersections['q_value'])).fillna(0).replace(np.inf, 300)
# save
intersections.to_csv(os.path.join(output_dir, output_prefix + ".differential_overlap.csv"), index=False)
intersections = pd.read_csv(os.path.join(output_dir, output_prefix + ".differential_overlap.csv"))
for metric, label, description, fill_value in [
("intersection_max_perc", "percentage_overlap", "max of intersection %", 0),
("log_p_value", "significance", "p-value", 0)]:
print(metric)
# make pivot tables
piv_up = pd.pivot_table(
intersections[(intersections['dir1'] == "up") & (intersections['dir2'] == "up")],
index="group1", columns="group2", values=metric).fillna(fill_value)
piv_down = pd.pivot_table(
print('sampling %d cells (with replacement) per iteration' % n_cell)
print('sampling %d molecules per cell' % v)
with closing(Pool()) as pool:
results = pool.map(sampling_function, repeat(norm_data, n_iter))
results = np.stack(results) # u, z, p
ci = confidence_interval(results[:, :, 1])
results = pd.DataFrame(
data=np.concatenate([np.median(results, axis=0), ci], axis=1),
index=labels,
columns=['U', 'z_approx', 'p', 'z_lo', 'z_hi'])
# add multiple-testing correction
results['q'] = multipletests(results['p'], alpha=alpha, method='fdr_tsbh')[1]
# remove low-value genes whose median sampling value is -inf
neginf = np.isneginf(results['z_approx'])
results.ix[neginf, 'z_lo'] = np.nan
results.ix[neginf, 'z_approx'] = 0
results.ix[neginf, ['p', 'q']] = 1.
results = results[['U', 'z_approx', 'z_lo', 'z_hi', 'p', 'q']].sort_values('q')
results.iloc[:, 1:4] = np.round(results.iloc[:, 1:4], 2)
return results
def BH(pvalues, active_var, s, q=0.2):
decisions = multipletests(pvalues, alpha=q, method="fdr_bh")[0]
TP = decisions[active_var].sum()
FDP = np.true_divide(decisions.sum() - TP, max(decisions.sum(), 1))
power = np.true_divide(TP, s)
total_rejections = decisions.sum()
false_rejections = total_rejections - TP
return FDP, power, total_rejections, false_rejections
def fdr_bh(pv):
import numpy as np
from statsmodels.sandbox.stats.multicomp import multipletests
return multipletests(np.asarray(pv), method="fdr_bh")[1]