Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def upload_split(self):
"""Uploads the individual results recursively to GCS."""
self.report.populate_upload_directory(output_dir=self._upload_dir)
# 1. Copy [ID]-summary.json.gz to gs://wptd/[SHA]/[ID]-summary.json.gz.
gsutil.copy(
os.path.join(self._upload_dir, self.report.sha_summary_path),
self.results_gs_url,
gzipped=True)
# 2. Copy the individual results recursively if there is any (i.e. if
# the report is not empty).
results_dir = os.path.join(
self._upload_dir, self.report.sha_product_path)
if os.path.exists(results_dir):
# gs://wptd/[SHA] is guaranteed to exist after 1, so copying foo to
# gs://wptd/[SHA] will create gs://wptd/[SHA]/foo according to
# `gsutil cp --help`.
gsutil.copy(
results_dir,
self.results_gs_url[:self.results_gs_url.rfind('/')],
gzipped=True)
# 1. Copy [ID]-summary.json.gz to gs://wptd/[SHA]/[ID]-summary.json.gz.
gsutil.copy(
os.path.join(self._upload_dir, self.report.sha_summary_path),
self.results_gs_url,
gzipped=True)
# 2. Copy the individual results recursively if there is any (i.e. if
# the report is not empty).
results_dir = os.path.join(
self._upload_dir, self.report.sha_product_path)
if os.path.exists(results_dir):
# gs://wptd/[SHA] is guaranteed to exist after 1, so copying foo to
# gs://wptd/[SHA] will create gs://wptd/[SHA]/foo according to
# `gsutil cp --help`.
gsutil.copy(
results_dir,
self.results_gs_url[:self.results_gs_url.rfind('/')],
gzipped=True)
def _download_gcs(self, gcs):
assert gcs.startswith('gs://')
ext = self.known_extension(gcs)
fd, path = tempfile.mkstemp(suffix=ext, dir=self._temp_dir)
os.close(fd)
# gsutil will log itself.
gsutil.copy(gcs, path)
return path
def upload_raw(self):
"""Uploads the merged raw JSON report to GCS."""
with tempfile.NamedTemporaryFile(
suffix='.json.gz', dir=self._temp_dir) as temp:
self.report.serialize_gzip(temp.name)
gsutil.copy(temp.name, self.raw_results_gs_url, gzipped=True)
def results_url(self):
return gsutil.gs_to_public_url(self.results_gs_url)
def raw_results_url(self):
return gsutil.gs_to_public_url(self.raw_results_gs_url)
def initialize():
"""Initializes gsutil."""
sys.path.insert(0, os.path.abspath(os.path.join(sys.path[0], '..')))
import gsutil # pylint: disable=g-import-not-at-top
atexit.register(print_sorted_initialization_times)
gsutil.MEASURING_TIME_ACTIVE = True
gsutil.RunMain()
def initialize():
"""Initializes gsutil."""
sys.path.insert(0, os.path.abspath(os.path.join(sys.path[0], '..')))
import gsutil # pylint: disable=g-import-not-at-top
atexit.register(print_sorted_initialization_times)
gsutil.MEASURING_TIME_ACTIVE = True
gsutil.RunMain()
if not course_id_set:
print "ERROR! Must specify list of course_id's for report. Aborting."
return
org = course_id_set[0].split('/',1)[0] # extract org from first course_id
self.org = org
self.output_project_id = output_project_id
crname = ('course_report_%s' % org)
if use_dataset_latest:
crname = 'course_report_latest'
self.dataset = output_dataset_id or crname
self.gsbucket = gsutil.gs_path_from_course_id(crname, gsbucket=output_bucket)
self.course_id_set = course_id_set
course_id = course_id_set
#course_datasets = [ bqutil.course_id2dataset(x, use_dataset_latest=use_dataset_latest) for x in course_id_set]
#course_datasets_dict = { x:bqutil.course_id2dataset(x, use_dataset_latest=use_dataset_latest) for x in course_id_set}
course_dataset = bqutil.course_id2dataset( course_id, use_dataset_latest=use_dataset_latest )
self.rdp_matrix = collections.OrderedDict()
#for course_id in course_datasets_dict.keys():
print "[researchData] Processing data for course %s" % ( course_id )
sys.stdout.flush()
for rdp in RESEARCH_DATA_PRODUCTS.keys():
try:
table = bqutil.get_bq_table_info( course_dataset, rdp )
#table = bqutil.get_bq_table_info( course_id, rdp )
if not course_id_set:
print "ERROR! Must specify list of course_id's for report. Aborting."
return
org = course_id_set[0].split('/',1)[0] # extract org from first course_id
self.org = org
self.output_project_id = output_project_id
crname = ('course_report_%s' % org)
if use_dataset_latest:
crname = 'course_report_latest'
self.dataset = output_dataset_id or crname
self.gsbucket = gsutil.gs_path_from_course_id(crname, gsbucket=output_bucket)
self.course_id_set = course_id_set
course_datasets = [ bqutil.course_id2dataset(x, use_dataset_latest=use_dataset_latest) for x in course_id_set]
# check to see which datasets have person_course tables
datasets_with_pc = []
self.all_pc_tables = OrderedDict()
self.all_pcday_ip_counts_tables = OrderedDict()
self.all_pcday_trlang_counts_tables = OrderedDict()
self.all_uic_tables = OrderedDict()
self.all_ca_tables = OrderedDict()
self.all_va_tables = OrderedDict()
self.all_tott_tables = OrderedDict()
for cd in course_datasets:
try:
table = bqutil.get_bq_table_info(cd, 'person_course')