Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
if not indexFileContents:
return 0
lines = indexFileContents.strip().split('\n')
uris = {}
for i, l in enumerate(lines):
if not ipwbUtils.isValidCDXJLine(l):
continue
if ipwbUtils.isCDXJMetadataRecord(l):
continue
cdxjFields = l.split(' ', 2)
uri = unsurt(cdxjFields[0])
datetime = cdxjFields[1]
try:
jsonFields = json.loads(cdxjFields[2])
except Exception as e: # Skip lines w/o JSON block
continue
if uri not in uris:
uris[uri] = []
mementoAsJSON = {
'datetime': datetime,
'mime': jsonFields['mime_type'] or '',
'status': jsonFields['status_code']
}
if 'title' in jsonFields:
def generateLinkTimeMapFromCDXJLines(cdxjLines, original, tmself, tgURI):
tmurl = getProxiedURIT(tmself)
if app.proxy is not None:
tmself = urlunsplit(tmurl)
tgURI = urlunsplit(getProxiedURIT(tgURI))
# Extract and trim for host:port prepending
tmurl[2] = '' # Clear TM path
hostAndPort = urlunsplit(tmurl) + '/'
# unsurted URI will never have a scheme, add one
originalURI = 'http://{0}'.format(unsurt(original))
tmData = '<{0}>; rel="original",\n'.format(originalURI)
tmData += '<{0}>; rel="self timemap"; '.format(tmself)
tmData += 'type="application/link-format",\n'
cdxjTMURI = tmself.replace('/timemap/link/', '/timemap/cdxj/')
tmData += '<{0}>; rel="timemap"; '.format(cdxjTMURI)
tmData += 'type="application/cdxj+ors",\n'
tmData += '<{0}>; rel="timegate"'.format(tgURI)
for i, line in enumerate(cdxjLines):
(surtURI, datetime, json) = line.split(' ', 2)
dtRFC1123 = ipwbUtils.digits14ToRFC1123(datetime)
firstLastStr = ''
for i, line in enumerate(cdxjLines):
(surtURI, datetime, json) = line.split(' ', 2)
dtRFC1123 = ipwbUtils.digits14ToRFC1123(datetime)
firstLastStr = ''
if len(cdxjLines) > 1:
if i == 0:
firstLastStr = 'first '
elif i == len(cdxjLines) - 1:
firstLastStr = 'last '
elif len(cdxjLines) == 1:
firstLastStr = 'first last '
tmData += ',\n<{0}memento/{1}/{2}>; rel="{3}memento"; datetime="{4}"' \
.format(hostAndPort, datetime, unsurt(surtURI), firstLastStr,
dtRFC1123)
return tmData + '\n'
def generateCDXJTimeMapFromCDXJLines(cdxjLines, original, tmself, tgURI):
tmurl = getProxiedURIT(tmself)
if app.proxy is not None:
tmself = urlunsplit(tmurl)
tgURI = urlunsplit(getProxiedURIT(tgURI))
# unsurted URI will never have a scheme, add one
originalURI = 'http://{0}'.format(unsurt(original))
tmData = '!context ["http://tools.ietf.org/html/rfc7089"]\n'
tmData += '!id {{"uri": "{0}"}}\n'.format(tmself)
tmData += '!keys ["memento_datetime_YYYYMMDDhhmmss"]\n'
tmData += '!meta {{"original_uri": "{0}"}}\n'.format(originalURI)
tmData += '!meta {{"timegate_uri": "{0}"}}\n'.format(tgURI)
linkTMURI = tmself.replace('/timemap/cdxj/', '/timemap/link/')
tmData += ('!meta {{"timemap_uri": {{'
'"link_format": "{0}", '
'"cdxj_format": "{1}"'
'}}}}\n').format(linkTMURI, tmself)
hostAndPort = tmself[0:tmself.index('timemap/')]
for i, line in enumerate(cdxjLines):
(surtURI, datetime, json) = line.split(' ', 2)
dtRFC1123 = ipwbUtils.digits14ToRFC1123(datetime)
s = surt.surt(urir, path_strip_trailing_slash_unless_empty=False)
indexPath = ipwbUtils.getIPWBReplayIndexPath()
print('Getting CDXJ Lines with the URI-R {0} from {1}'
.format(urir, indexPath))
cdxjLinesWithURIR = getCDXJLinesWithURIR(urir, indexPath)
closestLine = getCDXJLineClosestTo(datetime, cdxjLinesWithURIR)
if closestLine is None:
msg = '<h1>ERROR 404</h1>'
msg += 'No capture found for {0} at {1}.'.format(urir, datetime)
return Response(msg, status=404)
uri = unsurt(closestLine.split(' ')[0])
newDatetime = closestLine.split(' ')[1]
linkHeader = getLinkHeaderAbbreviatedTimeMap(urir, newDatetime)
return (newDatetime, linkHeader, uri)
def generateNoMementosInterface(path, datetime):
msg = '<h1>ERROR 404</h1>'
msg += 'No capture found for {0} at {1}.'.format(path, datetime)
linesWithSameURIR = getCDXJLinesWithURIR(path, None)
print('CDXJ lines with URI-R at {0}'.format(path))
print(linesWithSameURIR)
# TODO: Use closest instead of conditioning on single entry
# temporary fix for core functionality in #225
if len(linesWithSameURIR) == 1:
fields = linesWithSameURIR[0].split(' ', 2)
redirectURI = '/{1}/{0}'.format(unsurt(fields[0]), fields[1])
return redirect(redirectURI, code=302)
urir = ''
if linesWithSameURIR:
msg += '<p>{0} capture(s) available:</p><ul>'.format(
len(linesWithSameURIR))
for line in linesWithSameURIR:
fields = line.split(' ', 2)
urir = unsurt(fields[0])
msg += ('<li><a href="/{1}/{0}">{0} at {1}</a></li>'
.format(urir, fields[1]))
msg += '</ul>'
msg += '<p>TimeMaps: '
msg += '<a href="/timemap/link/{0}">Link</a> '.format(urir)</p>