Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def parse_html_main_index(out_dir: str=OUTPUT_DIR) -> Iterator[str]:
"""parse an archive index html file and return the list of urls"""
index_path = os.path.join(out_dir, HTML_INDEX_FILENAME)
if os.path.exists(index_path):
with open(index_path, 'r', encoding='utf-8') as f:
for line in f:
if 'class="link-url"' in line:
yield line.split('"')[1]
return ()
def write_json_main_index(links: List[Link], out_dir: str=OUTPUT_DIR) -> None:
"""write the json link index to a given path"""
assert isinstance(links, List), 'Links must be a list, not a generator.'
assert not links or isinstance(links[0].history, dict)
assert not links or isinstance(links[0].sources, list)
if links and links[0].history.get('title'):
assert isinstance(links[0].history['title'][0], ArchiveResult)
if links and links[0].sources:
assert isinstance(links[0].sources[0], str)
main_index_json = {
**MAIN_INDEX_HEADER,
'num_links': len(links),
'updated': datetime.now(),
def write_html_main_index(links: List[Link], out_dir: str=OUTPUT_DIR, finished: bool=False) -> None:
"""write the html link index to a given path"""
copy_and_overwrite(join(TEMPLATES_DIR, FAVICON_FILENAME), join(out_dir, FAVICON_FILENAME))
copy_and_overwrite(join(TEMPLATES_DIR, ROBOTS_TXT_FILENAME), join(out_dir, ROBOTS_TXT_FILENAME))
copy_and_overwrite(join(TEMPLATES_DIR, STATIC_DIR_NAME), join(out_dir, STATIC_DIR_NAME))
rendered_html = main_index_template(links, finished=finished)
atomic_write(rendered_html, join(out_dir, HTML_INDEX_FILENAME))
def list_links(filter_patterns: Optional[List[str]]=None,
filter_type: str='exact',
after: Optional[float]=None,
before: Optional[float]=None,
out_dir: str=OUTPUT_DIR) -> Iterable[Link]:
check_data_folder(out_dir=out_dir)
all_links = load_main_index(out_dir=out_dir)
for link in all_links:
if after is not None and float(link.timestamp) < after:
continue
if before is not None and float(link.timestamp) > before:
continue
if filter_patterns:
if link_matches_filter(link, filter_patterns, filter_type):
yield link
else:
yield link
def parse_json_main_index(out_dir: str=OUTPUT_DIR) -> Iterator[Link]:
"""parse an archive index json file and return the list of links"""
index_path = os.path.join(out_dir, JSON_INDEX_FILENAME)
if os.path.exists(index_path):
with open(index_path, 'r', encoding='utf-8') as f:
links = pyjson.load(f)['links']
for link_json in links:
yield Link.from_json(link_json)
return ()
def get(self, request):
if not request.user.is_authenticated and not PUBLIC_INDEX:
return redirect(f'/admin/login/?next={request.path}')
all_links = load_main_index(out_dir=OUTPUT_DIR)
meta_info = load_main_index_meta(out_dir=OUTPUT_DIR)
context = {
'updated': meta_info['updated'],
'num_links': meta_info['num_links'],
'links': all_links,
'VERSION': VERSION,
'FOOTER_INFO': FOOTER_INFO,
}
return render(template_name=self.template, request=request, context=context)
def help(out_dir: str=OUTPUT_DIR) -> None:
"""Print the ArchiveBox help message and usage"""
all_subcommands = list_subcommands()
COMMANDS_HELP_TEXT = '\n '.join(
f'{cmd.ljust(20)} {summary}'
for cmd, summary in all_subcommands.items()
if cmd in meta_cmds
) + '\n\n ' + '\n '.join(
f'{cmd.ljust(20)} {summary}'
for cmd, summary in all_subcommands.items()
if cmd in main_cmds
) + '\n\n ' + '\n '.join(
f'{cmd.ljust(20)} {summary}'
for cmd, summary in all_subcommands.items()
if cmd in archive_cmds
) + '\n\n ' + '\n '.join(
def get_admins(out_dir: str=OUTPUT_DIR) -> List[str]:
setup_django(out_dir, check_db=False)
from django.contrib.auth.models import User
return User.objects.filter(is_superuser=True)