Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
E.info("Running {} regression for trait 2: {}".format(model2,
trait2))
R('''source("%(scriptsdir)s/PET_functions.R")''' % locals())
E.info("Pushing data objects into the R environment")
# push everything into the R environment
r_df = py2ri.py2ri_pandasdataframe(dataframe)
R.assign("data.df", r_df)
r_snps = ro.StrVector([sp for sp in snps])
R.assign("snp.list", r_snps)
E.info("Parsing covariates")
covars = covars.split(",")
r_covar = ro.StrVector([cv for cv in covars])
R.assign("covar.list", r_covar)
E.info("{} covariates found to adjust "
"in regression models".format(len(covars)))
# clean up, replacing "missing values" with NAs for R
R('''data.df[data.df == -9] <- NA''')
R('''pet_results <- list()''')
# loop over all SNP, calculate PCC and p-value
# this takes a long time <- need to think of speed ups
# possible Python-pure implementation, i.e. with LIMIX?
E.info("Iteratively calculating PCC for all SNPs")
R('''results <- loopPET(data.df=data.df, trait1="%(trait1)s", trait2="%(trait2)s", '''
'''trait1.link=trait1.link, trait2.link=trait2.link, '''
'''trait1.mod=trait1.mod, trait2.mod=trait2.mod, covars=covar.list,'''
'''resamples=%(resamples)i, snp.list=snp.list)''' % locals())
def probit_fit(T, Y):
rpy2.robjects.numpy2ri.activate()
rpy.r.assign('T', T)
rpy.r.assign('Y', Y.astype(np.int))
rpy.r('''
Y = as.numeric(Y)
T = as.numeric(T)
M = glm(Y ~ ns(T, 10), family=binomial(link='probit'))
fitfn = function(t) { predict(M, newdata=data.frame(T=t), type='link') }
''')
rpy2.robjects.numpy2ri.deactivate()
def fitfn(t):
rpy2.robjects.numpy2ri.activate()
fitfn_r = rpy.r('fitfn')
val = fitfn_r(t)
rpy2.robjects.numpy2ri.deactivate()
return ndist.cdf(val)
return fitfn
values1, values2, paired=False, correct=True, *xargs, **kwargs)
elif options.method == "paired-mwu":
result = R.wilcox_test(
values1, values2, paired=True, correct=True, *xargs, **kwargs)
elif options.method == "paired-t":
result = R.t_test(values1, values2, paired=True, *xargs, **kwargs)
elif options.method == "shapiro":
if len(values1) > 5000:
E.warn(
"shapiro-wilk test only accepts < 5000 values, a random sample has been created.")
values1 = random.sample(values1, 5000)
result = R.shapiro_test(values1, *xargs, **kwargs)
if options.plot:
R.assign("v1", values1)
R.assign("v2", values2)
if options.title:
# set the size of the outer margins - the title needs to be added at the end
# after plots have been created
R.par(oma=R.c(0, 0, 4, 0))
R.layout(R.matrix((1, 2, 3, 4), 2, 2, byrow=True))
R.boxplot(values1, values2, col=('white', 'red'), main="Boxplot")
R("""qqplot( v1, v2, main ='Quantile-quantile plot' ); lines( c(0,1), c(0,1) );""")
# compute breaks:
min_value = min(min(values1), min(values2))
if options.min_value is not None:
min_value = min(min_value, options.min_value)
result = R.wilcox_test(
values1, values2, paired=False, correct=True, *xargs, **kwargs)
elif options.method == "paired-mwu":
result = R.wilcox_test(
values1, values2, paired=True, correct=True, *xargs, **kwargs)
elif options.method == "paired-t":
result = R.t_test(values1, values2, paired=True, *xargs, **kwargs)
elif options.method == "shapiro":
if len(values1) > 5000:
E.warn(
"shapiro-wilk test only accepts < 5000 values, a random sample has been created.")
values1 = random.sample(values1, 5000)
result = R.shapiro_test(values1, *xargs, **kwargs)
if options.plot:
R.assign("v1", values1)
R.assign("v2", values2)
if options.title:
# set the size of the outer margins - the title needs to be added at the end
# after plots have been created
R.par(oma=R.c(0, 0, 4, 0))
R.layout(R.matrix((1, 2, 3, 4), 2, 2, byrow=True))
R.boxplot(values1, values2, col=('white', 'red'), main="Boxplot")
R("""qqplot( v1, v2, main ='Quantile-quantile plot' ); lines( c(0,1), c(0,1) );""")
# compute breaks:
min_value = min(min(values1), min(values2))
if options.min_value is not None:
def do_multivariate_cox(time, censor, var, additional_vars, float_vars=False):
df = pd.DataFrame({
'time': time,
'censor': censor,
'var': var})
df = df.join(additional_vars, how='inner')
surv = importr('survival')
r.assign('time',robjects.IntVector(np.array(df['time'])))
r.assign('censor', robjects.IntVector(np.array(df['censor'])))
r.assign('var', robjects.FloatVector(np.array(df['var'])))
for i in additional_vars:
if float_vars:
r.assign(i, robjects.FloatVector(np.array(df[i])))
else:
r.assign(i, robjects.IntVector(np.array(df[i])))
additional_vars_str = ' + '.join(additional_vars.columns)
formula = 'Surv(time, censor) ~ var + ' + additional_vars_str
coxuh_output = r('summary( coxph(formula = ' + formula + ', model=FALSE, x=FALSE, y=FALSE))')
coef_ind = list(coxuh_output.names).index('coefficients')
coeffs = coxuh_output[coef_ind]
patient_count_ind = list(coxuh_output.names).index('n')
patient_count = coxuh_output[patient_count_ind][0]
var_zscore = get_zscore('var', coeffs)
var_pvalue = get_pvalue('var', coeffs)
def plot(self, hardcopy=None):
if hardcopy:
R.png(hardcopy, width=1024, height=768, type="cairo")
R.require('qvalue')
# build a qobj
R.assign("pval", self.mPValues)
R.assign("pi0", self.mPi0)
R.assign("qval", self.mQValues)
R.assign("lambda", self.mLambda)
R("""qobj <-list( pi0=pi0, qvalues=qval, pvalues=pval, lambda=lambda)""")
R(""" class(qobj) <- "qvalue" """)
R("""qplot(qobj)""")
if hardcopy:
R.dev_off()
def BHfilter(pval, q=0.2):
robjects.r.assign('pval', pval)
robjects.r.assign('q', q)
robjects.r('Pval = p.adjust(pval, method="BH")')
robjects.r('S = which((Pval < q)) - 1')
S = robjects.r('S')
ind = np.zeros(pval.shape[0], np.bool)
ind[np.asarray(S, np.int)] = 1
return ind
def main(sample_dir):
max_freq = 1.5
out_file = os.path.join(sample_dir, "patient_sample_heatmap.pdf")
freqs = {}
for fname in glob.glob(os.path.join(sample_dir, "*.vcf")):
sample_name = os.path.splitext(os.path.basename(fname))[0]
with open(fname) as in_handle:
freqs[sample_name] = read_aa_vals(in_handle, max_freq)
df = pandas.DataFrame(freqs)
rpy.r.assign("out_file", out_file)
rpy.r.assign("df", com.convert_to_r_dataframe(df))
rpy.r.assign("col_names", df.columns.tolist())
rpy.r('''
library(pheatmap)
# push all elements into the R environment
R('''sink(file="sink.text")''')
R('''suppressPackageStartupMessages(library(coloc))''')
R('''source("%(scriptsdir)s/coloQtl.R")''' % locals())
E.info("Pushing results tables into R environment")
py2ri.activate()
np2ri.activate()
r_trait1 = py2ri.py2ri(trait1)
R.assign("r.trait1", r_trait1)
r_trait2 = py2ri.py2ri(trait2)
R.assign("r.trait2", r_trait2)
r_maf = py2ri.py2ri(maf_table)
R.assign("r.mafs", r_maf)
if trait1_prev:
R.assign("trait1.prev", trait1_prev)
else:
R('''trait1.prev <- NULL''')
if trait2_prev:
R.assign("trait2.prev", trait2_prev)
else:
R('''trait2.prev <- NULL''')
E.info("Checking for gene list")
if gene_list:
E.info("Gene list contains {} genes".format(len(set(gene_list))))
r_genes = ro.StrVector([rx for rx in set(gene_list)])
R.assign("gene.list", r_genes)