Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
from icrawler.storage import BaseStorage
from io import BytesIO
class GoogleStorage(BaseStorage):
"""Google Storage backend.
The id is filename and data is stored as text files or binary files.
The root_dir is the bucket address such as gs:///.
"""
def __init__(self, root_dir):
try:
from google.cloud import storage
except ImportError:
print('GoogleStorage backend requires the package '
'"google-cloud-storage", execute '
'"pip install google-cloud-storage" to install it.')
self.client = storage.Client()
bucket_str = root_dir[5:].split('/')[0]
def set_storage(self, storage):
"""Set storage backend for downloader
For full list of storage backend supported, please see :mod:`storage`.
Args:
storage (dict or BaseStorage): storage backend configuration or instance
"""
if isinstance(storage, BaseStorage):
self.storage = storage
elif isinstance(storage, dict):
if 'backend' not in storage and 'root_dir' in storage:
storage['backend'] = 'FileSystem'
try:
backend_cls = getattr(storage_package, storage['backend'])
except AttributeError:
try:
backend_cls = import_module(storage['backend'])
except ImportError:
self.logger.error('cannot find backend module %s',
storage['backend'])
sys.exit()
kwargs = storage.copy()
del kwargs['backend']
self.storage = backend_cls(**kwargs)
# -*- coding: utf-8 -*-
import os
import os.path as osp
import six
from icrawler.storage import BaseStorage
class FileSystem(BaseStorage):
"""Use filesystem as storage backend.
The id is filename and data is stored as text files or binary files.
"""
def __init__(self, root_dir):
self.root_dir = root_dir
def write(self, id, data):
filepath = osp.join(self.root_dir, id)
folder = osp.dirname(filepath)
if not osp.isdir(folder):
try:
os.makedirs(folder)
except OSError:
pass