Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def genResampleData(infile, outfile):
'''
Resample the data n-times with replacement - generates
n flat files which are then propagated at later stages.
Files are generally small though.
'''
time_agg = TIME.__dict__['track2groups'].keys()
time_points = [int(str(x).split("-")[1]) for x in time_agg]
time_points.sort()
time_points = list(set(time_points))
rep_agg = REPLICATE.__dict__['track2groups'].keys()
replicates = [str(x).split("-")[2] for x in rep_agg]
time_rep_comb = [x for x in itertools.product(time_points, replicates)]
time_cond = ro.StrVector([x[0] for x in time_rep_comb])
rep_cond = ro.StrVector([x[1] for x in time_rep_comb])
ref_gtf = str(infile).split("-")[1]
condition = (str(infile).split("-")[0]).strip("deseq.dir/")
time_points = ",".join([str(i) for i in time_points])
replicates = ",".join(replicates)
statement = '''
python %(scriptsdir)s/data2resamples.py
--log=%(outfile)s.log
--time=%(time_points)s
--replicates=%(replicates)s
--condition=%(condition)s
--resamples=%(resampling_resample)s
--input-gtf=%(ref_gtf)s
--output-file-directory=clustering.dir
--seed=%(resampling_seed)s
str(r_cov[2]) + r_ovp[2] + " + " +
str(r_cov[3]) + r_ovp[3] + " -")
else:
cov_str = (
'Group1 (Blue): ' + str(r_cov[0]) + r_ovp[0] +
'; Group2 (Red): ' + str(r_cov[2]) + r_ovp[2]
) if strand == '+' else (
'Group1 (Blue): ' + str(r_cov[1]) + r_ovp[1] +
'; Group2 (Red): ' + str(r_cov[3]) + r_ovp[3])
titles.append(
chrm + ":" + strand + ' ' + stat +
" ::: Coverage: " + cov_str)
Titles = r.DataFrame({
'Title':r.StrVector(titles),
'Region':r.StrVector(zip(*plot_intervals)[0])})
return Titles, plot_types
BaseStart.append(unicode(i + reg_data.start))
else:
if zero_start:
BaseStart.append(unicode(
reg_data.end - reg_data.start - i - 1))
else:
BaseStart.append(unicode(
reg_data.end - i - 1))
Bases.append(base)
BaseRegion.append(reg_data.reg_id)
BaseStrand.append(REV_STRAND)
return r.DataFrame({
'Position':r.FloatVector(BaseStart),
'Base':r.StrVector(Bases),
'Region':r.StrVector(BaseRegion),
'Strand':r.FactorVector(
r.StrVector(BaseStrand),
ordered=True, levels=r.StrVector((FWD_STRAND, REV_STRAND)))})
def empty_graph(node_names):
node_names = list(node_names)
node_names = rpy2.robjects.StrVector(node_names)
remptygraphfn = rpy2.robjects.r['empty.graph']
return remptygraphfn(node_names)
Position.extend(
interval_start + base_i + start_offset +
np.linspace(0, 1, stop - start, endpoint=False))
Signal.extend(r_sig[start-overlap_seg_data[0]:
stop-overlap_seg_data[0]])
Read.extend(list(repeat(
str(r_num) + '_' + group_num, stop - start)))
Strand.extend(list(repeat(
FWD_STRAND if r_strand == '+' else
REV_STRAND, stop - start)))
Region.extend(list(repeat(region_i, stop - start)))
return r.DataFrame({
'Position':r.FloatVector(Position),
'Signal':r.FloatVector(Signal),
'Read':r.StrVector(Read),
'Strand':r.StrVector(Strand),
'Region':r.StrVector(Region),
'Group':r.StrVector(list(repeat(group_num, len(Position))))})
def get_base_r_data(all_reg_data, all_reg_base_data):
BaseStart, Bases, BaseRegion = [], [], []
for (region_i, interval_start, chrom, reg_reads
), reg_base_data in zip(
all_reg_data, all_reg_base_data):
for i, base in enumerate(reg_base_data):
BaseStart.append(str(i + interval_start))
Bases.append(base)
BaseRegion.append(region_i)
return r.DataFrame({
'Position':r.FloatVector(BaseStart),
'Base':r.StrVector(Bases),
'Region':r.StrVector(BaseRegion)})
str(r_cov[1]) + r_ovp[1] + " -; Group2 (Red): " +
str(r_cov[2]) + r_ovp[2] + " + " +
str(r_cov[3]) + r_ovp[3] + " -")
else:
cov_str = (
'Group1 (Blue): ' + str(r_cov[0]) + r_ovp[0] +
'; Group2 (Red): ' + str(r_cov[2]) + r_ovp[2]
) if strand == '+' else (
'Group1 (Blue): ' + str(r_cov[1]) + r_ovp[1] +
'; Group2 (Red): ' + str(r_cov[3]) + r_ovp[3])
titles.append(
chrm + ":" + strand + ' ' + stat +
" ::: Coverage: " + cov_str)
Titles = r.DataFrame({
'Title':r.StrVector(titles),
'Region':r.StrVector(zip(*plot_intervals)[0])})
return Titles, plot_types
except Queue.Empty:
sleep(1)
continue
# empty any entries left in queue after processes have finished
while not dists_q.empty():
row_dists = dists_q.get(block=False)
reg_sig_diff_dists.append(row_dists)
reg_sig_diff_dists = zip(*sorted(reg_sig_diff_dists))[1]
reg_sig_diff_dists = r.r.matrix(
r.FloatVector(np.concatenate(reg_sig_diff_dists)),
ncol=len(reg_sig_diffs), byrow=True)
if r_struct_fn is not None:
reg_sig_diff_dists.colnames = r.StrVector(
['::'.join((seq, chrm, strand, str(start))) for seq, (
region_i, (chrm, interval_start, strand, stat)) in
zip(reg_seqs, uniq_p_intervals)])
r_struct_fn = r.StrVector([r_struct_fn,])
else:
r_struct_fn = r.NA_Character
if VERBOSE: sys.stderr.write('Plotting (and saving data).\n')
r.r(resource_string(__name__, 'R_scripts/plotSigMDS.R'))
r.r('pdf("' + pdf_fn + '", height=7, width=7)')
r.globalenv['plotSigMDS'](reg_sig_diff_dists, r_struct_fn)
r.r('dev.off()')
return
def module_eigengenes(self, membership):
'''
wrapper for moduleEigengenes function
calculates eigengenes from profiles &
module membership (gene -> membership dict)
'''
params = {}
params['softPower'] = self.params['power'] if 'power' in self.params else 6
params['expr'] = base().as_data_frame(self.transpose_data())
params['colors'] = ro.StrVector(membership)
return wgcna().moduleEigengenes(**params)
raise
train_featname_longfeatval_dict = traindata_dict['featname_longfeatval_dict']
for feat_name, feat_longlist in train_featname_longfeatval_dict.iteritems():
train_featname_longfeatval_dict[feat_name] = robjects.FloatVector(feat_longlist)
traindata_dict['features'] = robjects.r['data.frame'](**train_featname_longfeatval_dict)
traindata_dict['classes'] = robjects.StrVector(traindata_dict['class_list'])
robjects.globalenv['xtr'] = traindata_dict['features']
robjects.globalenv['ytr'] = traindata_dict['classes']
test_featname_longfeatval_dict = testdata_dict['featname_longfeatval_dict']
for feat_name, feat_longlist in test_featname_longfeatval_dict.iteritems():
test_featname_longfeatval_dict[feat_name] = robjects.FloatVector(feat_longlist)
testdata_dict['features'] = robjects.r['data.frame'](**test_featname_longfeatval_dict)
testdata_dict['classes'] = robjects.StrVector(testdata_dict['class_list'])
robjects.globalenv['xte'] = testdata_dict['features']
robjects.globalenv['yte'] = testdata_dict['classes']
robjects.globalenv['actlearn_sources_freqsignifs'] = robjects.FloatVector(actlearn_sources_freqsignifs)
robjects.globalenv['both_user_match_srcid_bool'] = robjects.BoolVector(both_user_match_srcid_bool)
r_str = '''
cat("In R code\n")
random_seed = %d
set.seed(random_seed)
m=%d
ntrees=%d
mtry=%d