How to use the parlai.core.build_data function in parlai

To help you get started, we’ve selected a few parlai examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github facebookresearch / ParlAI / parlai / tasks / ccpe / build.py View on Github external
def build(opt):
    dpath = os.path.join(opt['datapath'], 'CCPE')
    version = '1.0'

    if not build_data.built(dpath, version_string=version):
        print('[building data: ' + dpath + ']')
        if build_data.built(dpath):
            # An older version exists, so remove these outdated files.
            build_data.remove_dir(dpath)
        build_data.make_dir(dpath)

        # Download the data.
        for downloadable_file in RESOURCES:
            downloadable_file.download_file(dpath)

        # Mark the data as built.
        build_data.mark_done(dpath, version_string=version)
github facebookresearch / ParlAI / parlai / tasks / convai_chitchat / build.py View on Github external
def build(opt):
    data_path = os.path.join(opt['datapath'], 'ConvAIChitChat')
    version = '1501534800'

    if not build_data.built(data_path, version_string=version):
        print('[building data: ' + data_path + ']')

        if build_data.built(data_path):
            build_data.remove_dir(data_path)
        build_data.make_dir(data_path)

        fname = 'data_' + version + '.tar.gz'
        url = 'https://raw.githubusercontent.com/deepmipt/turing-data/master/' + fname
        build_data.download(url, data_path, fname)
        build_data.untar(data_path, fname)

        os.rename(
            os.path.join(data_path, 'data_train_' + version + '.json'),
            os.path.join(data_path, 'train.json'),
        )
        os.rename(
github facebookresearch / ParlAI / projects / controllable_dialogue / tasks / build.py View on Github external
def build(opt):
    dpath = os.path.join(opt['datapath'], FOLDER_NAME)
    # version 1.0: initial release
    # version 1.1: add evaluation logs
    version = '1.1'

    if not build_data.built(dpath, version_string=version):
        if build_data.built(dpath):
            # older version exists, so remove the outdated files.
            build_data.remove_dir(dpath)
        build_data.make_dir(dpath)

        # first download the data files
        fname_data = 'data_v1.tar.gz'
        build_data.download(URL_ROOT + fname_data, dpath, fname_data)
        build_data.untar(dpath, fname_data)

        # next download the wordstats files
        fname_wordstats = 'wordstats_v1.tar.gz'
        build_data.download(URL_ROOT + fname_wordstats, dpath, fname_wordstats)
        build_data.untar(dpath, fname_wordstats)

        # next download the evaluation logs
        fname_evallogs = 'evaluationlogs_v1.tar.gz'
        build_data.download(URL_ROOT + fname_evallogs, dpath, fname_evallogs)
        build_data.untar(dpath, fname_evallogs)

        print("Data has been placed in " + dpath)

        build_data.mark_done(dpath, version)
github facebookresearch / ParlAI / parlai / tasks / scan / build.py View on Github external
def build(opt):
    version = 'v1.0'
    dpath = os.path.join(opt['datapath'], 'SCAN')

    if not build_data.built(dpath, version):
        print('[building data: ' + dpath + ']')
        if build_data.built(dpath):
            # An older version exists, so remove these outdated files.
            build_data.remove_dir(dpath)
        build_data.make_dir(dpath)

        # Download the data.
        for downloadable_file in RESOURCES:
            downloadable_file.download_file(dpath)

        create_fb_format(dpath, 'train', os.path.join(dpath, 'tasks_train_simple.txt'))
        create_fb_format(dpath, 'valid', os.path.join(dpath, 'tasks_train_simple.txt'))
        create_fb_format(dpath, 'test', os.path.join(dpath, 'tasks_test_simple.txt'))

        # Mark the data as built.
        build_data.mark_done(dpath, version)
github facebookresearch / ParlAI / parlai / tasks / opensubtitles / build_2018.py View on Github external
def build(datapath, use_history):
    dpath = os.path.join(datapath, 'OpenSubtitles2018')
    if not use_history:
        dpath += '_no_history'
    version = '1'

    if not build_data.built(dpath, version_string=version):
        print('[building data: ' + dpath + ']')
        if build_data.built(dpath):
            # An older version exists, so remove these outdated files.
            build_data.remove_dir(dpath)
        build_data.make_dir(dpath)

        untar_path = os.path.join(dpath, 'OpenSubtitles', 'xml', 'en')

        if len(glob.glob(untar_path + '/*/*/*.xml')) != NUM_SUBTITLES_FILES:
            # Download the data.
            url = 'https://object.pouta.csc.fi/OPUS-OpenSubtitles/v2018/xml/en.zip'
            build_data.download(url, dpath, 'OpenSubtitles2018.zip')
            build_data.untar(dpath, 'OpenSubtitles2018.zip')

        create_fb_format(untar_path, dpath, use_history)
github facebookresearch / ParlAI / parlai / tasks / cnn_dm / build.py View on Github external
def build(opt):
    dpath = os.path.join(opt['datapath'], 'CNN_DM')
    version = None

    if not build_data.built(dpath, version_string=version):
        print('[building data: ' + dpath + ']')
        if build_data.built(dpath):
            # An older version exists, so remove these outdated files.
            build_data.remove_dir(dpath)
        build_data.make_dir(dpath)

        # Download the data.
        # Download the data.
        for downloadable_file in RESOURCES:
            downloadable_file.download_file(dpath)

        for i, f in enumerate(RESOURCES[2:5]):
            dt = data_type[i]
            urls_fname = os.path.join(dpath, f.file_name)
            split_fname = os.path.join(dpath, dt + '.txt')
            with open(urls_fname) as urls_file, open(split_fname, 'a') as split_file:
github facebookresearch / ParlAI / parlai / tasks / vqa_v2 / build.py View on Github external
def build(opt):
    dpath = os.path.join(opt['datapath'], 'VQA-v2')
    version = None

    if not build_data.built(dpath, version_string=version):
        print('[building data: ' + dpath + ']')
        # An older version exists, so remove these outdated files.
        if build_data.built(dpath):
            build_data.remove_dir(dpath)
        build_data.make_dir(dpath)

        # Download the data.
        fname1 = 'v2_Questions_Train_mscoco.zip'
        fname2 = 'v2_Questions_Val_mscoco.zip'
        fname3 = 'v2_Questions_Test_mscoco.zip'

        fname4 = 'v2_Annotations_Val_mscoco.zip'
        fname5 = 'v2_Annotations_Train_mscoco.zip'

        url = 'https://s3.amazonaws.com/cvmlp/vqa/mscoco/vqa/'
        build_data.download(url + fname1, dpath, fname1)
        build_data.download(url + fname2, dpath, fname2)
        build_data.download(url + fname3, dpath, fname3)

        build_data.download(url + fname4, dpath, fname4)
        build_data.download(url + fname5, dpath, fname5)
github facebookresearch / ParlAI / parlai / tasks / mturkwikimovies / build.py View on Github external
def build(opt):
    # Depends upon another dataset, wikimovies, build that first.
    wikimovies_build.build(opt)

    dpath = os.path.join(opt['datapath'], 'MTurkWikiMovies')
    version = None

    if not build_data.built(dpath, version_string=version):
        print('[building data: ' + dpath + ']')
        if build_data.built(dpath):
            # An older version exists, so remove these outdated files.
            build_data.remove_dir(dpath)
        build_data.make_dir(dpath)

        # Download the data.
        fname = 'mturkwikimovies.tar.gz'
        url = 'http://parl.ai/downloads/mturkwikimovies/' + fname
        build_data.download(url, dpath, fname)
        build_data.untar(dpath, fname)

        # Mark the data as built.
        build_data.mark_done(dpath, version_string=version)
github facebookresearch / ParlAI / parlai / tasks / cornell_movie / build.py View on Github external
def build(opt):
    dpath = os.path.join(opt['datapath'], 'CornellMovie')
    version = None

    if not build_data.built(dpath, version_string=version):
        print('[building data: ' + dpath + ']')
        if build_data.built(dpath):
            # An older version exists, so remove these outdated files.
            build_data.remove_dir(dpath)
        build_data.make_dir(dpath)

        # Download the data.
        fname = 'cornell_movie_dialogs_corpus.tgz'
        url = 'http://parl.ai/downloads/cornell_movie/' + fname
        build_data.download(url, dpath, fname)
        build_data.untar(dpath, fname)

        dpext = os.path.join(dpath, 'cornell movie-dialogs corpus')
        create_fb_format(
            os.path.join(dpext, 'movie_lines.txt'),
            os.path.join(dpext, 'movie_conversations.txt'),
github facebookresearch / ParlAI / parlai / tasks / vqa_v2 / build.py View on Github external
def build(opt):
    dpath = os.path.join(opt['datapath'], 'VQA-v2')
    version = None

    if not build_data.built(dpath, version_string=version):
        print('[building data: ' + dpath + ']')
        # An older version exists, so remove these outdated files.
        if build_data.built(dpath):
            build_data.remove_dir(dpath)
        build_data.make_dir(dpath)

        # Download the data.
        fname1 = 'v2_Questions_Train_mscoco.zip'
        fname2 = 'v2_Questions_Val_mscoco.zip'
        fname3 = 'v2_Questions_Test_mscoco.zip'

        fname4 = 'v2_Annotations_Val_mscoco.zip'
        fname5 = 'v2_Annotations_Train_mscoco.zip'

        url = 'https://s3.amazonaws.com/cvmlp/vqa/mscoco/vqa/'
        build_data.download(url + fname1, dpath, fname1)