Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
X = random_from_categorical_formula(d, size)
X = ML.rec_append_fields(X, 'response', np.random.standard_normal(size))
fname = tempfile.mktemp()
ML.rec2csv(X, fname)
Rstr = '''
data = read.table("%s", sep=',', header=T)
cur.lm = lm(response ~ %s, data)
COEF = coef(cur.lm)
''' % (fname, d.Rstr)
rpy2.robjects.r(Rstr)
remove(fname)
nR = list(np.array(rpy2.robjects.r("names(COEF)")))
nt.assert_true('(Intercept)' in nR)
nR.remove("(Intercept)")
nF = [str(t).replace("_","").replace("*",":") for t in d.formula.terms]
nR = sorted([sorted(n.split(":")) for n in nR])
nt.assert_true('1' in nF)
nF.remove('1')
nF = sorted([sorted(n.split(":")) for n in nF])
nt.assert_equal(nR, nF)
return d, X, nR, nF
import matplotlib.pyplot as plt
from mpl_toolkits.basemap import Basemap
from twx.db.reanalysis import NNRNghData
from scipy import stats
from twx.interp.clibs import clib_wxTopo
import twx.db.ushcn as ushcn
from datetime import datetime
import twx.utils as utils
import mpl_toolkits.basemap as bm
#rpy2
import rpy2
import rpy2.robjects as robjects
from rpy2.robjects.numpy2ri import numpy2ri
robjects.conversion.py2ri = numpy2ri
r = robjects.r
NCDF_CHK_COLS = 50
USE_ALL_IMP_THRESHOLD = np.round(365.25 * 5.0)
RM_STN_FLAG = "RM_STN_FLAG"
RM_STN_DUP = 1
RM_STN_BAD_DATA = 2
RM_STN_NO_TDI = 3
DTYPE_RM_STN = [(STN_ID,"
def __loadLeafRanks(self):
utilFileName = PathDefaults.getSourceDir() + "/apgl/metabolomics/R/Util.R"
leafRanksFileName = PathDefaults.getSourceDir() + "/apgl/metabolomics/R/MSLeafRanks.R"
robjects.r["source"](utilFileName)
robjects.r["source"](leafRanksFileName)
def _generate_table(self, declarations):
"""Generates an R data frame table from the list of declarations."""
decl_table = defaultdict(list)
for decl in declarations:
decl_dict = self._map_fields(decl)
# R DataFrame is column-major.
for k, v in decl_dict.items():
decl_table[k].append(v)
return robjects.DataFrame(
# Have to translate into a properly typed vector, otherwise R will treat the data in a bad way.
{k: (robjects.StrVector(v) if k in STR_COLUMNS else robjects.FloatVector(v)) for k, v in decl_table.items()}
)
def __init__(self, metrics, repo_id, testingCommits):
"""
@metrics - this is the list of metrics from the TRAINING data set.
@repo_id - the repository repo_id
@testingCommits - this is commits from the TESTING data set
"""
self.metrics = metrics
self.repo_id = repo_id
self.stats = importr('stats', robject_translations={'format_perc': '_format_perc'})
self.base = importr('base')
self.readcsv = robjects.r['read.csv']
self.sig_threshold = 0.05
self.data = None
self.commits = testingCommits
def get_confident_sources(self, combo_result_dict={}, n_sources_per_class=10, prob_thresh=0.5):
""" Generate a N-list of confident sources which should be a good representations
of each science class.
"""
robjects.globalenv['pred'] = robjects.IntVector(combo_result_dict['all.pred'])
robjects.globalenv['maxprob'] = robjects.FloatVector(combo_result_dict['all_top_prob'])
# KLUDGEY
srcid_list = []
for str_srcid in combo_result_dict['srcid_list']:
srcid_list.append(int(str_srcid))
robjects.globalenv['ID'] = robjects.IntVector(srcid_list)
r_str = '''
m = %d
probThresh= %f
whichConf = which(maxprob>probThresh) # only look at sources with maxProb>probThresh
tabConf = table(pred[whichConf]) # class distribution of confident sources
confAdd = NULL # sources to add
for(ii in 1:length(tabConf)){
if(tabConf[ii]>0){ # cycle thru confident classes
if(tabConf[ii]
from joblib import Parallel,delayed
# Connect to an R session
import rpy2.robjects
r = rpy2.robjects.r
# For a Pythonic interface to R
from rpy2.robjects.packages import importr
from rpy2.robjects import Formula, FactorVector
from rpy2.robjects.environments import Environment
from rpy2.robjects.vectors import DataFrame, Vector, FloatVector
from rpy2.rinterface import MissingArg,SexpVector
# Make it so we can send numpy arrays to R
import rpy2.robjects.numpy2ri
rpy2.robjects.numpy2ri.activate()
# load some required packages
# PBS: Eventually we should try/except these to get people
# to install missing packages
lme4 = importr('lme4')
rstats = importr('stats')
fdrtool = importr('fdrtool')
ssvd = importr('ssvd')
if hasattr(lme4,'coef'):
r_coef = lme4.coef
else:
r_coef = rstats.coef
if hasattr(lme4,'model_matrix'):
r_model_matrix = lme4.model_matrix
else:
r_model_matrix = rstats.model_matrix
LabelsPath : Cell population annotations file path (.csv).
CV_RDataPath : Cross validation RData file path (.RData), obtained from Cross_Validation.R function.
OutputDir : Output directory defining the path of the exported file.
GeneOrderPath : Gene order file path (.csv) obtained from feature selection,
defining the genes order for each cross validation fold, default is NULL.
NumGenes : Number of genes used in case of feature selection (integer), default is 0.
'''
# read the Rdata file
robjects.r['load'](CV_RDataPath)
nfolds = np.array(robjects.r['n_folds'], dtype = 'int')
tokeep = np.array(robjects.r['Cells_to_Keep'], dtype = 'bool')
col = np.array(robjects.r['col_Index'], dtype = 'int')
col = col - 1
test_ind = np.array(robjects.r['Test_Idx'])
train_ind = np.array(robjects.r['Train_Idx'])
# read the data
data = pd.read_csv(DataPath,index_col=0,sep=',')
labels = pd.read_csv(LabelsPath, header=0,index_col=None, sep=',', usecols = col)
labels = labels.iloc[tokeep]
data = data.iloc[tokeep]
# read the feature file
if (NumGenes > 0):
features = pd.read_csv(GeneOrderPath,header=0,index_col=None, sep=',')
os.chdir(OutputDir)
if (NumGenes == 0):
def ConfidIntervals(self,data,p):
data=robjects.FloatVector(data) #The given list changes into float vector in order to be handled by RPy2
alpha=1-p
rsqrt=robjects.r['sqrt'] #Call square root function - R function
rsd=robjects.r['sd'] #Call standard deviation function - R function
rmean=robjects.r['mean'] #Call mean function - R function
t=len(data)
n=rsqrt(t)
b=rsd(data)
rqt=robjects.r['qt'] #Call the cumulative probability distribution function for t distribution
q=rqt((1-(alpha/2)),t-1)
m=rmean(data) #Calculate the sample average value
me=q[0]*(b[0]/n[0]) #Calculate the margin of error
#Calculate the lower and the upper bound
lo=m[0]-me
up=m[0]+me
l=[lo,up]
return l
import warnings
import rpy2.robjects as robjects
import rpy2.rinterface as rinterface
from rpy2.robjects.packages import importr
# Suppress noisy warnings from R.
if hasattr(rinterface, "RRuntimeWarning"):
warnings.simplefilter("ignore", rinterface.RRuntimeWarning)
else:
# older versions of rpy2 don't have RRuntimeWarning, they use UserWarning instead
warnings.simplefilter("ignore", UserWarning)
try:
ape=importr("ape")
ARGmetrics = importr("ARGmetrics")
if not robjects.r('packageVersion("ARGmetrics") >= "0.0.2.0"')[0]:
raise ImportError
except (ImportError, rinterface.RRuntimeError):
logging.warning("ARGmetrics in R not installed or too old (requires >= 0.0.2.0). "
'Install the latest version from source by syncing with git and doing e.g.\n'
'> R CMD INSTALL ARGmetrics')
raise
def get_metric_names():
"""
Returns the list of the names of the computed metrics.
"""
# We could do it with :
# return list(pandas.DataFrame(columns=ARGmetrics.genome_trees_dist().names))
# but it's extremely slow. Just return the list of strings instead.
return [n for n in ARGmetrics.genome_trees_dist().names if n!='rgt']