Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def test1_maelstrom(self):
""" Test Motif Activity by Ensemble Learning (maelstrom) """
run_maelstrom(
self.clusters,
"mm10",
self.outdir,
score_table=self.score_table,
count_table=self.count_table,
plot=False,
)
df = pd.read_table(self.outfile, index_col=0, comment="#")
print(df.shape)
self.assertEquals((623, 4), df.shape)
for fname in glob(os.path.join(self.outdir, "activity*")):
os.unlink(fname)
for fname in glob(os.path.join(self.outdir, "gimme.verte*")):
os.unlink(fname)
os.unlink(self.outfile)
plot_dist_dguides(dstep,dpam,plotpf)
else:
logging.warning(f'not found: {dstepp}')
# make plot_dna_features_view
stepi=3
plotd=f"{datad}/plot_d{cfg[stepi].replace('/','').split('_')[-1]}_dna_features_view"
plotps=glob(plotd+'/*')
if len(plotps)==0 or cfg['force']:
dguidesp=f"{cfg[stepi]}/d{cfg[stepi].replace('/','').split('_')[-1]}.tsv"
dsequencesp=f"{cfg[stepi-2]}/d{cfg[stepi-2].replace('/','').split('_')[-1]}.tsv"
if exists(dguidesp):
logging.info('plot_dna_features_view')
plot_dna_features_view(cfg,
dsequences=del_Unnamed(pd.read_table(dsequencesp,keep_default_na=False)).drop_duplicates(),
dguides=del_Unnamed(pd.read_table(dguidesp,keep_default_na=False)).drop_duplicates(),
plotd=plotd,more=False)
else:
logging.warning(f'not found: {dstepp}')
# # step2 # make submap #FIXME get all the columns used for plotting in the dguides.
# stepi=3
# plotp=f"{datad}/plot_d{cfg[stepi].replace('/','').split('_')[-1]}_submap_used_for_mutagenesis"
# plotps=glob(plotp+'*')
# if len(plotps)==0 or cfg['force']:
# plotpf=plotp+"_{mutation_type}.png"
# dstepp=f"{cfg[stepi]}/d{cfg[stepi].replace('/','').split('_')[-1]}.tsv"
# dstep=del_Unnamed(pd.read_table(dstepp)).drop_duplicates()
# logging.info('plot_submap_possibilities')
# plot_submap_possibilities(dmutagenesis=dstep,
# plotpf=plotpf,test=False)
def _load_data(self, fname):
_, _, ext = split_filename(fname)
if ext == '.tsv':
return pd.read_table(fname, index_col=0)
elif ext in ('.nii', '.nii.gz', '.gii'):
return nb.load(fname)
raise ValueError("Unknown file type!")
def check_for_job_done(mission_id):
while True:
r = requests.get('http://www.enrichnet.org/filecreated.php?temp={}'.format(mission_id))
if len(r.text) < len("Success"):
time.sleep(20)
continue
if r.status_code == 404:
time.sleep(20)
continue
elif r.status_code == 200:
break
else:
raise Exception("Unclear status code while query the server")
r = requests.get("http://www.enrichnet.org/file2.php?filen=C:/xampp/htdocs/enrichnet/pages/"
"tmp/{}/enrichnet_ranking_table.txt".format(mission_id))
return pd.read_table(StringIO(r.text))
active_adult_trips_df.loc[active_adult_trips_df['trip_mode_str']=='wlk_lrf_drv', 'active_mode'] = 'wTrnD'
active_adult_trips_df.loc[active_adult_trips_df['trip_mode_str']=='wlk_exp_drv', 'active_mode'] = 'wTrnD'
active_adult_trips_df.loc[active_adult_trips_df['trip_mode_str']=='wlk_hvy_drv', 'active_mode'] = 'wTrnD'
active_adult_trips_df.loc[active_adult_trips_df['trip_mode_str']=='wlk_com_drv', 'active_mode'] = 'wTrnD'
active_adult_trips_df.loc[:,'active_minutes'] = 0.0
# print active_adult_trips_df['active_mode'].value_counts()
# print active_adult_trips_df['time_period'].value_counts()
active_adult_trips_df_len = len(active_adult_trips_df)
# figure out how many minutes of activity per trip: join with activeTimeSkims
for time_period in ['EA','AM','MD','PM','EV']:
filename = os.path.join("database", "ActiveTimeSkimsDatabase%s.csv" % time_period)
print "%s Reading %s" % (datetime.datetime.now().strftime("%x %X"), filename)
skim_df = pandas.read_table(filename, sep=",")
skim_df.loc[:, 'time_period'] = time_period
for active_mode in ['walk','bike','wTrnW','dTrnW','wTrnD']:
# get the skim for this mode
skim_tp_df = skim_df.loc[:,['orig','dest','time_period',active_mode]]
skim_tp_df.loc[:,'active_mode']=active_mode
skim_tp_df.rename(columns={'orig':'orig_taz','dest':'dest_taz'}, inplace=True)
# join it, adding active_mode-named column
active_adult_trips_df = pandas.merge(left =active_adult_trips_df,
right =skim_tp_df,
on =['orig_taz','dest_taz','time_period','active_mode'],
how ='left')
# set those minutes
active_adult_trips_df.loc[active_adult_trips_df[active_mode].notnull(), 'active_minutes'] = active_adult_trips_df[active_mode]
# drop the new column
logging.basicConfig(filename=opts.log_file,
format='%(levelname)s (%(asctime)s): %(message)s')
log = logging.getLogger(name)
if opts.verbose:
log.setLevel(logging.DEBUG)
else:
log.setLevel(logging.INFO)
log.debug(opts)
motifs = dict()
for meme_file in opts.meme_db:
log.info('Parse %s' % (meme_file))
motifs.update(parse_meme(meme_file))
log.info('Enrich %s' % (opts.tomtom_file))
tom = pd.read_table(opts.tomtom_file)
tom.rename(columns={'#Query ID': 'Query ID'}, inplace=True)
tom['Target name'] = ''
tom['URL'] = ''
for i in tom.index:
target = tom.loc[i, 'Target ID']
if target in motifs:
tom.loc[i, 'Target name'] = motifs[target][0]
tom.loc[i, 'URL'] = motifs[target][1]
t = tom.to_csv(opts.out_file, sep='\t', index=False)
if t is not None:
print(t, end='')
log.info('Done!')
return 0
def readAndMerge(df, cpg_annotations_inf):
tmp_df = pd.read_table(cpg_annotations_inf)
tmp_df.set_index(["contig", "position"], inplace=True)
tmp_df = tmp_df.join(df, how="inner")
tmp_df = pd.melt(tmp_df, id_vars=["feature", "CpG_density"])
return tmp_df
def execute_classification(good_variants, bad_variants, grey_variants, model, output_handle, user_features, threshold):
dir = os.path.dirname(os.path.realpath(good_variants))
print('Loading data...')
good = pd.read_table(good_variants)
bad = pd.read_table(bad_variants)
grey = pd.read_table(grey_variants)
pred, prob = classification(good, bad, grey, model, user_features, threshold)
grey['Probability'] = prob
grey['Good'] = pred
predicted_good = grey[grey['Good'] == 1]
predicted_bad = grey[grey['Good'] == 0]
print('Number of predicted good variants: ' + str(predicted_good.shape[0]))
print('Number of predicted bad variants: ' + str(predicted_bad.shape[0]))
print('\nWriting data...')
predicted_good.to_csv(dir + '/' + 'predicted_good.' + output_handle, header=None, index = False, sep = '\t', na_rep='NA')
predicted_bad.to_csv(dir + '/' + 'predicted_bad.' + output_handle, header=None, index = False, sep = '\t', na_rep='NA')
print('Done.')
def convertSampleToVector2DList(sampleSeq3DArr, nb_windows, refMatrFileName):
"""
Convertd the raw data to probability matrix
PARAMETER
---------
sampleSeq3DArr: 3D List
List - numpy matrix(3D)
Sample List: List (nb_windows, nb_samples, SEQLen/nb_windows , 100)
"""
rawDataFrame = pd.read_table(refMatrFileName, sep='\t', header=None)
raw_data_seq_index_df = pd.DataFrame({'seq' : rawDataFrame[0] , 'indexing':rawDataFrame.index})
raw_data_seq_df_index_dict = raw_data_seq_index_df.set_index('seq')['indexing'].to_dict()
nb_raw_data_frame_column = len(rawDataFrame.columns)
nb_sample = sampleSeq3DArr.shape[0]
len_seq = len(sampleSeq3DArr[1])
re_statement = ".{%d}" % (nb_windows)
probMatr_list = []
for tmp_idx in range(nb_windows):
probMatr_list.append( np.zeros((nb_sample, int((len_seq - tmp_idx)/nb_windows) , 100)) )
def get_datasets(fpath, condition=None):
unit = 0
datasets = []
for root, dir, files in os.walk(fpath):
if 'log.txt' in files:
param_path = open(os.path.join(root,'params.json'))
params = json.load(param_path)
exp_name = params['exp_name']
log_path = os.path.join(root,'log.txt')
experiment_data = pd.read_table(log_path)
experiment_data.insert(
len(experiment_data.columns),
'Unit',
unit
)
experiment_data.insert(
len(experiment_data.columns),
'Condition',
condition or exp_name
)
datasets.append(experiment_data)
unit += 1
return datasets