Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def test_conflicting_init(self):
with open(os.path.join(OUTPUT_DIR, 'test_conflict.txt'), 'w+') as f:
f.write('test')
try:
with output_hidden(show_failing=False):
archivebox_init.main([])
assert False, 'Init should have exited with an exception'
except SystemExit:
pass
assert not os.path.exists(os.path.join(OUTPUT_DIR, SQL_INDEX_FILENAME))
assert not os.path.exists(os.path.join(OUTPUT_DIR, JSON_INDEX_FILENAME))
assert not os.path.exists(os.path.join(OUTPUT_DIR, HTML_INDEX_FILENAME))
try:
load_main_index(out_dir=OUTPUT_DIR)
assert False, 'load_main_index should raise an exception when no index is present'
except:
pass
def test_remove_regex(self):
with output_hidden():
archivebox_remove.main(['--yes', '--delete', '--filter-type=regex', 'http(s)?:\/\/(.+\.)?(example\d\.com)'])
all_links = load_main_index(out_dir=OUTPUT_DIR)
assert len(all_links) == 4
def test_add_stdin_url(self):
with output_hidden():
archivebox_add.main([], stdin=test_urls)
all_links = load_main_index(out_dir=OUTPUT_DIR)
assert len(all_links) == 12
def test_remove_exact(self):
with output_hidden():
archivebox_remove.main(['--yes', '--delete', 'https://example5.com/'])
all_links = load_main_index(out_dir=OUTPUT_DIR)
assert len(all_links) == 11
@enforce_types
def fetch_git(link: Link, link_dir: Optional[str]=None, timeout: int=TIMEOUT) -> ArchiveResult:
"""download full site using git"""
link_dir = link_dir or link.link_dir
output: ArchiveOutput = 'git'
output_path = os.path.join(link_dir, str(output))
os.makedirs(output_path, exist_ok=True)
cmd = [
GIT_BINARY,
'clone',
'--mirror',
'--recursive',
*([] if CHECK_SSL_VALIDITY else ['-c', 'http.sslVerify=false']),
without_query(without_fragment(link.url)),
]
status = 'succeeded'
@enforce_types
def save_file_to_sources(path: str, timeout: int=TIMEOUT, out_dir: str=OUTPUT_DIR) -> str:
"""download a given url's content into output/sources/domain-.txt"""
check_data_folder(out_dir=out_dir)
sources_dir = os.path.join(out_dir, SOURCES_DIR_NAME)
if not os.path.exists(sources_dir):
os.makedirs(sources_dir)
ts = str(datetime.now().timestamp()).split('.', 1)[0]
source_path = os.path.join(sources_dir, '{}-{}.txt'.format(basename(path), ts))
if any(path.startswith(s) for s in ('http://', 'https://', 'ftp://')):
source_path = os.path.join(sources_dir, '{}-{}.txt'.format(domain(path), ts))
print('{}[*] [{}] Downloading {}{}'.format(
ANSI['green'],
elif set:
new_config = {}
failed_options = []
for line in config_options:
if line.startswith('#') or not line.strip():
continue
if '=' not in line:
stderr('[X] Config KEY=VALUE must have an = sign in it', color='red')
stderr(f' {line}')
raise SystemExit(2)
raw_key, val = line.split('=')
raw_key = raw_key.upper().strip()
key = get_real_name(raw_key)
if key != raw_key:
stderr(f'[i] Note: The config option {raw_key} has been renamed to {key}, please use the new name going forwards.', color='lightyellow')
if key in CONFIG:
new_config[key] = val.strip()
else:
failed_options.append(line)
if new_config:
before = CONFIG
matching_config = write_config_file(new_config, out_dir=OUTPUT_DIR)
after = load_all_config()
print(printable_config(matching_config))
side_effect_changes: ConfigDict = {}
for key, val in after.items():
if key in USER_CONFIG and (before[key] != after[key]) and (key not in matching_config):
side_effect_changes[key] = after[key]
stderr(' archivebox init --every="0/5 * * * *" https://example.com/some/rss/feed.xml')
raise SystemExit(1)
cron = dedupe_cron_jobs(cron)
cron.write()
total_runs = sum(j.frequency_per_year() for j in cron)
existing_jobs = list(cron.find_comment(CRON_COMMENT))
print()
print('{green}[√] Scheduled new ArchiveBox cron job for user: {} ({} jobs are active).{reset}'.format(USER, len(existing_jobs), **ANSI))
print('\n'.join(f' > {cmd}' if str(cmd) == str(new_job) else f' {cmd}' for cmd in existing_jobs))
if total_runs > 60 and not quiet:
stderr()
stderr('{lightyellow}[!] With the current cron config, ArchiveBox is estimated to run >{} times per year.{reset}'.format(total_runs, **ANSI))
stderr(f' Congrats on being an enthusiastic internet archiver! 👌')
stderr()
stderr(' Make sure you have enough storage space available to hold all the data.')
stderr(' Using a compressed/deduped filesystem like ZFS is recommended if you plan on archiving a lot.')
raise SystemExit(0)
stderr(' archivebox init --every=day https://example.com/some/rss/feed.xml')
stderr(' archivebox init --every="0/5 * * * *" https://example.com/some/rss/feed.xml')
raise SystemExit(1)
cron = dedupe_cron_jobs(cron)
cron.write()
total_runs = sum(j.frequency_per_year() for j in cron)
existing_jobs = list(cron.find_comment(CRON_COMMENT))
print()
print('{green}[√] Scheduled new ArchiveBox cron job for user: {} ({} jobs are active).{reset}'.format(USER, len(existing_jobs), **ANSI))
print('\n'.join(f' > {cmd}' if str(cmd) == str(new_job) else f' {cmd}' for cmd in existing_jobs))
if total_runs > 60 and not quiet:
stderr()
stderr('{lightyellow}[!] With the current cron config, ArchiveBox is estimated to run >{} times per year.{reset}'.format(total_runs, **ANSI))
stderr(f' Congrats on being an enthusiastic internet archiver! 👌')
stderr()
stderr(' Make sure you have enough storage space available to hold all the data.')
stderr(' Using a compressed/deduped filesystem like ZFS is recommended if you plan on archiving a lot.')
raise SystemExit(0)
print(' {}'.format(printable_config(side_effect_changes, prefix=' ')))
if failed_options:
stderr()
stderr('[X] These options failed to set:', color='red')
stderr(' {}'.format('\n '.join(failed_options)))
raise SystemExit(bool(failed_options))
elif reset:
stderr('[X] This command is not implemented yet.', color='red')
stderr(' Please manually remove the relevant lines from your config file:')
stderr(f' {CONFIG_FILE}')
raise SystemExit(2)
else:
stderr('[X] You must pass either --get or --set, or no arguments to get the whole config.', color='red')
stderr(' archivebox config')
stderr(' archivebox config --get SOME_KEY')
stderr(' archivebox config --set SOME_KEY=SOME_VALUE')
raise SystemExit(2)