Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def get_unrecognized_folders(links, out_dir: str=OUTPUT_DIR) -> Dict[str, Optional[Link]]:
"""dirs that don't contain recognizable archive data and aren't listed in the main index"""
by_timestamp = {link.timestamp: 0 for link in links}
unrecognized_folders: Dict[str, Optional[Link]] = {}
for entry in os.scandir(os.path.join(out_dir, ARCHIVE_DIR_NAME)):
if entry.is_dir(follow_symlinks=True):
index_exists = os.path.exists(os.path.join(entry.path, 'index.json'))
link = None
try:
link = parse_json_link_details(entry.path)
except Exception:
pass
if index_exists and link is None:
# index exists but it's corrupted or unparseable
unrecognized_folders[entry.path] = link
elif not index_exists:
# link details index doesn't exist and the folder isn't in the main index
timestamp = entry.path.rsplit('/', 1)[-1]
if timestamp not in by_timestamp:
unrecognized_folders[entry.path] = link
return unrecognized_folders
def get_orphaned_folders(links, out_dir: str=OUTPUT_DIR) -> Dict[str, Optional[Link]]:
"""dirs that contain a valid index but aren't listed in the main index"""
links = list(links)
indexed_folders = {link.link_dir: link for link in links}
orphaned_folders = {}
for entry in os.scandir(os.path.join(out_dir, ARCHIVE_DIR_NAME)):
if entry.is_dir(follow_symlinks=True):
index_exists = os.path.exists(os.path.join(entry.path, 'index.json'))
link = None
try:
link = parse_json_link_details(entry.path)
except Exception:
pass
if index_exists and entry.path not in indexed_folders:
# folder is a valid link data dir with index details, but it's not in the main index
orphaned_folders[entry.path] = link
return orphaned_folders
by_url = {link.url: 0 for link in links}
by_timestamp = {link.timestamp: 0 for link in links}
duplicate_folders = {}
indexed_folders = {link.link_dir for link in links}
data_folders = (
entry.path
for entry in os.scandir(os.path.join(out_dir, ARCHIVE_DIR_NAME))
if entry.is_dir(follow_symlinks=True) and entry.path not in indexed_folders
)
for path in chain(sorted(indexed_folders), sorted(data_folders)):
link = None
try:
link = parse_json_link_details(path)
except Exception:
pass
if link:
# link folder has same timestamp as different link folder
by_timestamp[link.timestamp] = by_timestamp.get(link.timestamp, 0) + 1
if by_timestamp[link.timestamp] > 1:
duplicate_folders[path] = link
# link folder has same url as different link folder
by_url[link.url] = by_url.get(link.url, 0) + 1
if by_url[link.url] > 1:
duplicate_folders[path] = link
return duplicate_folders
def is_valid(link: Link) -> bool:
dir_exists = os.path.exists(link.link_dir)
index_exists = os.path.exists(os.path.join(link.link_dir, 'index.json'))
if not dir_exists:
# unarchived links are not included in the valid list
return False
if dir_exists and not index_exists:
return False
if dir_exists and index_exists:
try:
parsed_link = parse_json_link_details(link.link_dir)
return link.url == parsed_link.url
except Exception:
pass
return False
def get_present_folders(links, out_dir: str=OUTPUT_DIR) -> Dict[str, Optional[Link]]:
"""dirs that are expected to exist based on the main index"""
all_folders = {}
for entry in os.scandir(os.path.join(out_dir, ARCHIVE_DIR_NAME)):
if entry.is_dir(follow_symlinks=True):
link = None
try:
link = parse_json_link_details(entry.path)
except Exception:
pass
all_folders[entry.path] = link
return all_folders