Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def build(opt):
dpath = os.path.join(opt['datapath'], 'CCPE')
version = '1.0'
if not build_data.built(dpath, version_string=version):
print('[building data: ' + dpath + ']')
if build_data.built(dpath):
# An older version exists, so remove these outdated files.
build_data.remove_dir(dpath)
build_data.make_dir(dpath)
# Download the data.
for downloadable_file in RESOURCES:
downloadable_file.download_file(dpath)
# Mark the data as built.
build_data.mark_done(dpath, version_string=version)
def build(opt):
data_path = os.path.join(opt['datapath'], 'ConvAIChitChat')
version = '1501534800'
if not build_data.built(data_path, version_string=version):
print('[building data: ' + data_path + ']')
if build_data.built(data_path):
build_data.remove_dir(data_path)
build_data.make_dir(data_path)
fname = 'data_' + version + '.tar.gz'
url = 'https://raw.githubusercontent.com/deepmipt/turing-data/master/' + fname
build_data.download(url, data_path, fname)
build_data.untar(data_path, fname)
os.rename(
os.path.join(data_path, 'data_train_' + version + '.json'),
os.path.join(data_path, 'train.json'),
)
os.rename(
def build(opt):
dpath = os.path.join(opt['datapath'], FOLDER_NAME)
# version 1.0: initial release
# version 1.1: add evaluation logs
version = '1.1'
if not build_data.built(dpath, version_string=version):
if build_data.built(dpath):
# older version exists, so remove the outdated files.
build_data.remove_dir(dpath)
build_data.make_dir(dpath)
# first download the data files
fname_data = 'data_v1.tar.gz'
build_data.download(URL_ROOT + fname_data, dpath, fname_data)
build_data.untar(dpath, fname_data)
# next download the wordstats files
fname_wordstats = 'wordstats_v1.tar.gz'
build_data.download(URL_ROOT + fname_wordstats, dpath, fname_wordstats)
build_data.untar(dpath, fname_wordstats)
# next download the evaluation logs
fname_evallogs = 'evaluationlogs_v1.tar.gz'
build_data.download(URL_ROOT + fname_evallogs, dpath, fname_evallogs)
build_data.untar(dpath, fname_evallogs)
print("Data has been placed in " + dpath)
build_data.mark_done(dpath, version)
def build(opt):
version = 'v1.0'
dpath = os.path.join(opt['datapath'], 'SCAN')
if not build_data.built(dpath, version):
print('[building data: ' + dpath + ']')
if build_data.built(dpath):
# An older version exists, so remove these outdated files.
build_data.remove_dir(dpath)
build_data.make_dir(dpath)
# Download the data.
for downloadable_file in RESOURCES:
downloadable_file.download_file(dpath)
create_fb_format(dpath, 'train', os.path.join(dpath, 'tasks_train_simple.txt'))
create_fb_format(dpath, 'valid', os.path.join(dpath, 'tasks_train_simple.txt'))
create_fb_format(dpath, 'test', os.path.join(dpath, 'tasks_test_simple.txt'))
# Mark the data as built.
build_data.mark_done(dpath, version)
def build(datapath, use_history):
dpath = os.path.join(datapath, 'OpenSubtitles2018')
if not use_history:
dpath += '_no_history'
version = '1'
if not build_data.built(dpath, version_string=version):
print('[building data: ' + dpath + ']')
if build_data.built(dpath):
# An older version exists, so remove these outdated files.
build_data.remove_dir(dpath)
build_data.make_dir(dpath)
untar_path = os.path.join(dpath, 'OpenSubtitles', 'xml', 'en')
if len(glob.glob(untar_path + '/*/*/*.xml')) != NUM_SUBTITLES_FILES:
# Download the data.
url = 'https://object.pouta.csc.fi/OPUS-OpenSubtitles/v2018/xml/en.zip'
build_data.download(url, dpath, 'OpenSubtitles2018.zip')
build_data.untar(dpath, 'OpenSubtitles2018.zip')
create_fb_format(untar_path, dpath, use_history)
def build(opt):
dpath = os.path.join(opt['datapath'], 'CNN_DM')
version = None
if not build_data.built(dpath, version_string=version):
print('[building data: ' + dpath + ']')
if build_data.built(dpath):
# An older version exists, so remove these outdated files.
build_data.remove_dir(dpath)
build_data.make_dir(dpath)
# Download the data.
# Download the data.
for downloadable_file in RESOURCES:
downloadable_file.download_file(dpath)
for i, f in enumerate(RESOURCES[2:5]):
dt = data_type[i]
urls_fname = os.path.join(dpath, f.file_name)
split_fname = os.path.join(dpath, dt + '.txt')
with open(urls_fname) as urls_file, open(split_fname, 'a') as split_file:
def build(opt):
dpath = os.path.join(opt['datapath'], 'VQA-v2')
version = None
if not build_data.built(dpath, version_string=version):
print('[building data: ' + dpath + ']')
# An older version exists, so remove these outdated files.
if build_data.built(dpath):
build_data.remove_dir(dpath)
build_data.make_dir(dpath)
# Download the data.
fname1 = 'v2_Questions_Train_mscoco.zip'
fname2 = 'v2_Questions_Val_mscoco.zip'
fname3 = 'v2_Questions_Test_mscoco.zip'
fname4 = 'v2_Annotations_Val_mscoco.zip'
fname5 = 'v2_Annotations_Train_mscoco.zip'
url = 'https://s3.amazonaws.com/cvmlp/vqa/mscoco/vqa/'
build_data.download(url + fname1, dpath, fname1)
build_data.download(url + fname2, dpath, fname2)
build_data.download(url + fname3, dpath, fname3)
build_data.download(url + fname4, dpath, fname4)
build_data.download(url + fname5, dpath, fname5)
def build(opt):
# Depends upon another dataset, wikimovies, build that first.
wikimovies_build.build(opt)
dpath = os.path.join(opt['datapath'], 'MTurkWikiMovies')
version = None
if not build_data.built(dpath, version_string=version):
print('[building data: ' + dpath + ']')
if build_data.built(dpath):
# An older version exists, so remove these outdated files.
build_data.remove_dir(dpath)
build_data.make_dir(dpath)
# Download the data.
fname = 'mturkwikimovies.tar.gz'
url = 'http://parl.ai/downloads/mturkwikimovies/' + fname
build_data.download(url, dpath, fname)
build_data.untar(dpath, fname)
# Mark the data as built.
build_data.mark_done(dpath, version_string=version)
def build(opt):
dpath = os.path.join(opt['datapath'], 'CornellMovie')
version = None
if not build_data.built(dpath, version_string=version):
print('[building data: ' + dpath + ']')
if build_data.built(dpath):
# An older version exists, so remove these outdated files.
build_data.remove_dir(dpath)
build_data.make_dir(dpath)
# Download the data.
fname = 'cornell_movie_dialogs_corpus.tgz'
url = 'http://parl.ai/downloads/cornell_movie/' + fname
build_data.download(url, dpath, fname)
build_data.untar(dpath, fname)
dpext = os.path.join(dpath, 'cornell movie-dialogs corpus')
create_fb_format(
os.path.join(dpext, 'movie_lines.txt'),
os.path.join(dpext, 'movie_conversations.txt'),
def build(opt):
dpath = os.path.join(opt['datapath'], 'VQA-v2')
version = None
if not build_data.built(dpath, version_string=version):
print('[building data: ' + dpath + ']')
# An older version exists, so remove these outdated files.
if build_data.built(dpath):
build_data.remove_dir(dpath)
build_data.make_dir(dpath)
# Download the data.
fname1 = 'v2_Questions_Train_mscoco.zip'
fname2 = 'v2_Questions_Val_mscoco.zip'
fname3 = 'v2_Questions_Test_mscoco.zip'
fname4 = 'v2_Annotations_Val_mscoco.zip'
fname5 = 'v2_Annotations_Train_mscoco.zip'
url = 'https://s3.amazonaws.com/cvmlp/vqa/mscoco/vqa/'
build_data.download(url + fname1, dpath, fname1)