Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
rep2_control = ctl2_ta # default. May be changed later.
rep2_ctl_msg = "control rep2"
rep_info = [(ntags_rep1, 'replicate 1', rep1_ta_filename)]
if not simplicate_experiment:
rep_info.append((ntags_rep2, 'replicate 2', rep2_ta_filename))
rep_info.extend(
[(ntags_ctl1, 'control 1', ctl1_ta_filename),
(ntags_ctl2, 'control 2', ctl2_ta_filename)])
for n, name, filename in rep_info:
logger.info("Found %d tags in %s file %s" % (n, name, filename))
subprocess.check_output('ls -l', shell=True, stderr=subprocess.STDOUT)
if not simplicate_experiment:
pool_applet = dxpy.find_one_data_object(
classname='applet',
name='pool',
project=dxpy.PROJECT_CONTEXT_ID,
zero_ok=False,
more_ok=False,
return_handler=True)
pool_replicates_subjob = \
pool_applet.run(
{"inputs": [rep1_ta, rep2_ta],
"prefix": 'pooled_reps'},
name='Pool replicates')
pooled_replicates = pool_replicates_subjob.get_output_ref("pooled")
pooled_replicates_xcor_subjob = \
xcor_only(
pooled_replicates,
paired_end,
def find_reference_file_by_name(reference_name, project_name):
'''Looks up a reference file by name in the project that holds common tools. From Joe Dale's code.'''
project = dxpy.find_one_project(name=project_name, name_mode='exact', return_handler=False)
cached = '*'
if (reference_name, project['id']) not in REFERENCE_FILES:
found = dxpy.find_one_data_object(classname="file", name=reference_name,
project=project['id'],
recurse=True,
zero_ok=False, more_ok=False, return_handler=True)
REFERENCE_FILES[(reference_name, project['id'])] = found
cached = ''
print cached + "Resolved %s to %s" % (reference_name, REFERENCE_FILES[(reference_name, project['id'])].get_id())
return dxpy.dxlink(REFERENCE_FILES[(reference_name, project['id'])])
def find_applet_by_name(applet_name, applets_project_id):
'''Looks up an applet by name in the project that holds tools.
From Joe Dale's code.'''
cached = '*'
if (applet_name, applets_project_id) not in APPLETS:
found = dxpy.find_one_data_object(
classname="applet",
name=applet_name,
project=applets_project_id,
zero_ok=False,
more_ok=False,
return_handler=True)
APPLETS[(applet_name, applets_project_id)] = found
cached = ''
logging.info(
cached + "Resolved applet %s to %s"
% (applet_name, APPLETS[(applet_name, applets_project_id)].get_id()))
return APPLETS[(applet_name, applets_project_id)]
try:
project_handler = resolve_project(DATA_CACHE_PROJECT)
snapshot_project = project_handler
except:
logger.error("Cannot find cache project %s" % (DATA_CACHE_PROJECT))
snapshot_project = None
logger.debug('Cache project: %s' % (snapshot_project))
if snapshot_project:
try:
accession_search = accession + '*'
logger.debug(
'Looking recursively for %s in %s'
% (accession_search, snapshot_project.name))
file_handler = dxpy.find_one_data_object(
name=accession_search, name_mode='glob', more_ok=False,
classname='file', recurse=True, return_handler=True,
folder='/', project=snapshot_project.get_id())
logger.debug('Got file handler for %s' % (file_handler.name))
return file_handler
except:
logger.debug(
"Cannot find accession %s in project %s"
% (accession, snapshot_project))
# we're here because we couldn't find the cache or couldn't find the file
# in the cache, so look in AWS
# this returns a link to the file in the applet's project context
dx_file = s3_dxcp(accession, server, keypair)
logger.debug("Now looking for file %s" % (file_identifier))
m = re.match(r'''(^[\w\-\ /\.]+)/([\w\-\ \.]+)''', file_identifier)
if m:
folder_name = m.group(1)
if not folder_name.startswith('/'):
folder_name = '/' + folder_name
file_name = m.group(2)
else:
folder_name = '/fastqs/'
file_name = file_identifier + '.fastq.gz'
logger.debug("Looking for file %s in folder %s" % (file_name, folder_name))
try:
file_handler = dxpy.find_one_data_object(
name=file_name, folder=folder_name, project=project.get_id(),
more_ok=False, zero_ok=False, return_handler=True)
except:
logger.debug(
'%s not found in project %s folder %s'
% (file_name, project.get_id(), folder_name))
try: # maybe it's just filename in the default workspace
file_handler = dxpy.DXFile(dxid=identifier, mode='r')
except:
logger.debug('%s not found as a dxid' % (identifier))
file_handler = resolve_accession(identifier, server, keypair)
assert file_handler, "Failed to resolve file identifier %s" % (identifier)
logger.debug(
"Resolved file identifier %s to %s" % (identifier, file_handler.name))
def resolve_dx_file(identifier):
try:
handler = dxpy.get_handler(identifier)
except dxpy.DXError:
try:
handler = dxpy.find_one_data_object(
classname='file',
name=identifier,
return_handler=True,
zero_ok=False,
more_ok=False)
except dxpy.DXSearchError:
logging.error('Failed to resolve control %s to unique dx object. ID or name does not exist or multiple files of that name were found.' % (str(identifier)))
return None
else:
return handler
else:
return handler
if m:
folder_name = m.group(1)
if not folder_name.startswith('/'):
folder_name = '/' + folder_name
recurse = False
file_name = m.group(2)
else:
folder_name = '/'
recurse = True
file_name = file_identifier
logging.debug(
"Looking for file %s in folder %s" % (file_name, folder_name))
try:
file_handler = dxpy.find_one_data_object(
name=file_name,
folder=folder_name,
project=project.get_id(),
recurse=recurse,
more_ok=False,
zero_ok=False,
return_handler=True)
except dxpy.DXSearchError:
logging.debug(
'%s not found in project %s folder %s. Trying as file ID'
% (file_name, project.get_id(), folder_name))
file_handler = None
except:
raise
if not file_handler: