Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
bucket_name = s3_dir.split('/')[0]
bucket_dir = '/'.join(s3_dir.split('/')[1:])
# I/O
uuid_tar = return_input_paths(job, work_dir, ids, 'uuid.tar.gz')
# Upload to S3
conn = boto.connect_s3()
bucket = conn.get_bucket(bucket_name)
k = Key(bucket)
k.key = os.path.join(bucket_dir, uuid + '.tar.gz')
k.set_contents_from_filename(uuid_tar)
if __name__ == "__main__":
# Define Parser object and add to toil
parser = build_parser()
Job.Runner.addToilOptions(parser)
args = parser.parse_args()
# Store input_URLs for downloading
inputs = {'config': args.config,
'unc.bed': args.unc,
'hg19.transcripts.fa': args.fasta,
'composite_exons.bed': args.composite_exons,
'normalize.pl': args.normalize,
'output_dir': args.output_dir,
'rsem_ref.zip': args.rsem_ref,
'chromosomes.zip': args.chromosomes,
'ebwt.zip': args.ebwt,
'ssec': args.ssec,
's3_dir': args.s3_dir,
'uuid': None,
'samples.zip': None,
help='Path to the (filled in) manifest file, generated with "generate-manifest". '
'\nDefault value: "%(default)s"')
parser_run.add_argument('--fq', default=None, type=str,
help='URL for the sample BAM. URLs can take the form: http://, file://, s3://, '
'and gnos://. The UUID for the sample must be given with the "--uuid" flag.')
parser_run.add_argument('--uuid', default=None, type=str, help='Provide the UUID of a sample when using the'
'"--bam" option')
# If no arguments provided, print full help menu
if len(sys.argv) == 1:
parser.print_help()
sys.exit(1)
writeToDebug('Debug log')
Job.Runner.addToilOptions(parser)
args = parser.parse_args()
cwd = os.getcwd()
if args.command == 'generate-config' or args.command == 'generate':
generate_file(os.path.join(cwd, 'config-toil-defuse.yaml'), generate_config)
if args.command == 'generate-manifest' or args.command == 'generate':
generate_file(os.path.join(cwd, 'manifest-toil-defuse.tsv'), generate_manifest)
if 'generate' in args.command:
sys.exit()
if args.command == 'run':
# Read in config yaml file and set the default value to None
config = {x.replace('-', '_'): y for x, y in yaml.load(open(args.config).read()).iteritems()}
check_for_required_parameters(config)
# Program checks
'\nDefault value: "%(default)s"')
group.add_argument('--manifest', default='manifest-toil-rnaseq.tsv', type=str,
help='Path to the (filled in) manifest file, generated with "generate-manifest". '
'\nDefault value: "%(default)s"')
group.add_argument('--samples', default=None, nargs='+', type=str,
help='Space delimited sample URLs (any number). Samples must be tarfiles/tarballs that contain '
'fastq files. URLs follow the format: http://foo.com/sample.tar, '
'file:///full/path/to/file.tar. The UUID for the sample will be derived from the file.'
'Samples passed in this way will be assumed to be paired end, if using single-end data, '
'please use the manifest option.')
# If no arguments provided, print full help menu
if len(sys.argv) == 1:
parser.print_help()
sys.exit(1)
# Add Toil options
Job.Runner.addToilOptions(parser_run)
args = parser.parse_args()
# Parse subparsers related to generation of config and manifest
cwd = os.getcwd()
if args.command == 'generate-config' or args.command == 'generate':
generate_file(os.path.join(cwd, 'config-toil-rnaseq.yaml'), generate_config)
if args.command == 'generate-manifest' or args.command == 'generate':
generate_file(os.path.join(cwd, 'manifest-toil-rnaseq.tsv'), generate_manifest)
# Pipeline execution
elif args.command == 'run':
require(os.path.exists(args.config), '{} not found. Please run '
'"toil-rnaseq generate-config"'.format(args.config))
if not args.samples:
require(os.path.exists(args.manifest), '{} not found and no samples provided. Please '
'run "toil-rnaseq generate-manifest"'.format(args.manifest))
samples = parse_samples(path_to_manifest=args.manifest)
else:
"""
if len(args) == 2 and args[1] == "--test":
# Run the tests
return doctest.testmod(optionflags=doctest.NORMALIZE_WHITESPACE)
options = parse_args(args) # This holds the nicely-parsed options object
RealTimeLogger.start_master()
# Make a root job
root_job = Job.wrapJobFn(copy_everything, options,
cores=1, memory="1G", disk="4G")
# Run it and see how many jobs fail
failed_jobs = Job.Runner.startToil(root_job, options)
if failed_jobs > 0:
raise Exception("{} jobs failed!".format(failed_jobs))
print("All jobs completed successfully")
RealTimeLogger.stop_master()
def main():
"""
This is a Toil pipeline to transfer TCGA data into an S3 Bucket
Data is pulled down with Genetorrent and transferred to S3 via S3AM.
"""
# Define Parser object and add to toil
parser = build_parser()
Job.Runner.addToilOptions(parser)
args = parser.parse_args()
# Store inputs from argparse
inputs = {'genetorrent': args.genetorrent,
'genetorrent_key': args.genetorrent_key,
'ssec': args.ssec,
's3_dir': args.s3_dir}
# Sanity checks
if args.ssec:
assert os.path.isfile(args.ssec)
if args.genetorrent:
assert os.path.isfile(args.genetorrent)
if args.genetorrent_key:
assert os.path.isfile(args.genetorrent_key)
samples = parse_genetorrent(args.genetorrent)
# Start pipeline
# map_job accepts a function, an iterable, and *args. The function is launched as a child
options.log_host = socket.getfqdn()
options.log_port = logging_server.server_address[1]
RealTimeLogger.set_master(options)
logger = RealTimeLogger.get()
# Make the root job
root_job = Job.wrapJobFn(downloadAllReads, options,
cores=1, memory="1G", disk="4G")
print("Sending log from master")
logger.info("This is the master")
# Run Toil
Job.Runner.startToil(root_job, options)
logging_server.shutdown()
server_thread.join()
f_out.write(generate_unique_key(key_path, url))
# Commands to upload to S3 via S3AM
s3am_command = ['s3am',
'upload',
'--sse-key-file', os.path.join(work_dir, uuid + '.key'),
'file://{}'.format(os.path.join(work_dir, uuid + '.bam')),
bucket_name,
os.path.join(bucket_dir, uuid + '.bam')]
subprocess.check_call(s3am_command)
if __name__ == "__main__":
# Define Parser object and add to toil
parser = build_parser()
Job.Runner.addToilOptions(parser)
args = parser.parse_args()
# Store input_URLs for downloading
inputs = {'config': args.config,
'ref.fa': args.ref,
'ref.fa.amb': args.amb,
'ref.fa.ann': args.ann,
'ref.fa.bwt': args.bwt,
'ref.fa.pac': args.pac,
'ref.fa.sa': args.sa,
'ref.fa.fai': args.fai,
'ssec':args.ssec,
'output_dir': args.out,
's3_dir': args.s3_dir,
'cpu_count': None}
#sfams = sdoms["sfam_id"].drop_duplicates().dropna().tolist()
#map_job(job, setup_clustering, sfams, pdbFileStoreID, resoluFileStoreID)
# #Add jobs for each sdi
setup_clustering(job, "299845", pdbFileStoreID, resoluFileStoreID, pdbs)
#del sdoms
os.remove(sdoms_file)
os.remove(resolu_file)
if __name__ == "__main__":
from toil.common import Toil
from toil.job import Job
parser = Job.Runner.getDefaultArgumentParser()
options = parser.parse_args()
options.clean = "always"
options.targetTime = 1
job = Job.wrapJobFn(start_toil)
with Toil(options) as workflow:
pdbs = [(os.path.basename(f), workflow.importFile('file://' + f)) for f in glob.glob("/root/ig/*/*.pdb")]
workflow.start(Job.wrapJobFn(start_toil, pdbs))