Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
from ..htmlcleanup import stripHTML
from .. import exceptions as exceptions
# py2 vs py3 transition
from ..six import text_type as unicode
from ..six.moves.urllib.error import HTTPError
from .base_adapter import BaseSiteAdapter, makeDate
def getClass():
return FanFiktionDeAdapter
# Class name has to be unique. Our convention is camel case the
# sitename with Adapter at the end. www is skipped.
class FanFiktionDeAdapter(BaseSiteAdapter):
def __init__(self, config, url):
BaseSiteAdapter.__init__(self, config, url)
self.username = "NoneGiven" # if left empty, site doesn't return any message at all.
self.password = ""
self.is_adult=False
# get storyId from url--url validation guarantees query is only sid=1234
self.story.setMetadata('storyId',self.parsedUrl.path.split('/',)[2])
# normalized story URL.
self._setURL('https://' + self.getSiteDomain() + '/s/'+self.story.getMetadata('storyId') + '/1')
# Each adapter needs to have a unique site abbreviation.
logger = logging.getLogger(__name__)
pseudo_html_regex_format = r'(<+(?!/?(%s)>).*?>+)'
real_html_regex = re.compile(r'' % '|'.join(HTML_TAGS), re.IGNORECASE)
def getClass():
return WWWWebNovelComAdapter
def fix_pseudo_html(pseudo_html, whitelist_tags=()):
tags = set(HTML_TAGS).union(whitelist_tags)
pseudo_html_regex = re.compile(pseudo_html_regex_format % '|'.join(tags), re.IGNORECASE)
return pseudo_html_regex.sub(lambda match: cgi.escape(match.group(1)), pseudo_html)
class WWWWebNovelComAdapter(BaseSiteAdapter):
_GET_VIP_CONTENT_DELAY = 8
def __init__(self, config, url):
BaseSiteAdapter.__init__(self, config, url)
# get storyId from url
# https://www.webnovel.com/book/6831837102000205
self.story.setMetadata('storyId', self.parsedUrl.path.split('/')[2])
# normalized story URL.
self._setURL('https://' + self.getSiteDomain() + '/book/' + self.story.getMetadata('storyId'))
# Each adapter needs to have a unique site abbreviation.
self.story.setMetadata('siteabbrev', 'wncom')
self._csrf_token = None
from ..htmlcleanup import stripHTML
from .. import exceptions as exceptions
# py2 vs py3 transition
from ..six import text_type as unicode
from ..six.moves.urllib.error import HTTPError
from .base_adapter import BaseSiteAdapter, makeDate
def getClass():
return AsianFanFicsComAdapter
logger = logging.getLogger(__name__)
class AsianFanFicsComAdapter(BaseSiteAdapter):
def __init__(self, config, url):
BaseSiteAdapter.__init__(self, config, url)
self.username = ""
self.password = ""
self.is_adult=False
# get storyId from url--url validation guarantees query is only sid=1234
self.story.setMetadata('storyId',self.parsedUrl.path.split('/',)[3])
# get storyId from url--url validation guarantees query correct
m = re.match(self.getSiteURLPattern(),url)
if m:
self.story.setMetadata('storyId',m.group('id'))
def __init__(self, config, url):
BaseSiteAdapter.__init__(self, config, url)
self.username = "NoneGiven" # if left empty, site doesn't return any message at all.
self.password = ""
self.is_adult=False
# get storyId from url--url validation guarantees query is only sid=1234
self.story.setMetadata('storyId',self.parsedUrl.query.split('=',)[1])
# normalized story URL.
self._setURL('http://' + self.getSiteDomain() + '/viewstory.php?sid='+self.story.getMetadata('storyId'))
# Each adapter needs to have a unique site abbreviation.
self.story.setMetadata('siteabbrev','chosen2')
# The date format will vary from site to site.
from ..htmlcleanup import stripHTML
from .. import exceptions as exceptions
# py2 vs py3 transition
from ..six import text_type as unicode
from ..six.moves.urllib.error import HTTPError
from .base_adapter import BaseSiteAdapter, makeDate
def getClass():
return ArchiveOfOurOwnOrgAdapter
logger = logging.getLogger(__name__)
class ArchiveOfOurOwnOrgAdapter(BaseSiteAdapter):
def __init__(self, config, url):
BaseSiteAdapter.__init__(self, config, url)
self.username = "NoneGiven" # if left empty, site doesn't return any message at all.
self.password = ""
self.is_adult=False
self.full_work_soup = None
self.use_full_work_soup = True
# get storyId from url--url validation guarantees query is only sid=1234
self.story.setMetadata('storyId',self.parsedUrl.path.split('/',)[2])
# get storyId from url--url validation guarantees query correct
m = re.match(self.getSiteURLPattern(),url)
# Software: eFiction
from __future__ import absolute_import
import logging
logger = logging.getLogger(__name__)
import re
from ..htmlcleanup import stripHTML
from .. import exceptions as exceptions
# py2 vs py3 transition
from ..six import text_type as unicode
from ..six.moves.urllib.error import HTTPError
from .base_adapter import BaseSiteAdapter, makeDate
class WhoficComSiteAdapter(BaseSiteAdapter):
def __init__(self, config, url):
BaseSiteAdapter.__init__(self, config, url)
self.story.setMetadata('siteabbrev','whof')
# The date format will vary from site to site.
# http://docs.python.org/library/datetime.html#strftime-strptime-behavior
self.dateformat = '%Y.%m.%d'
@staticmethod
def getSiteDomain():
return 'www.whofic.com'
@classmethod
def getSiteExampleURLs(cls):
return "https://"+cls.getSiteDomain()+"/viewstory.php?sid=1234"
def __init__(self, config, url):
BaseSiteAdapter.__init__(self, config, url)
self.story.setMetadata('siteabbrev',self.getSiteAbbrev())
self.set_decode(self.getEncoding())
storyId = re.compile(self.getSiteURLPattern()).match(self.url).group('storyId')
self.story.setMetadata('storyId', storyId)
self._setURL(self.getViewStoryUrl(storyId))
self.triedLoggingIn = False
self.triedAcceptWarnings = False
self.username = "NoneGiven" # if left empty, site doesn't return any message at all.
'bdo', 'big', 'blockquote', 'body', 'br', 'button', 'canvas', 'caption', 'center', 'cite', 'code', 'col',
'colgroup', 'datalist', 'dd', 'del', 'details', 'dfn', 'dialog', 'dir', 'div', 'dl', 'dt', 'em', 'embed',
'fieldset', 'figcaption', 'figure', 'font', 'footer', 'form', 'frame', 'frameset', 'h1', 'h2', 'h3', 'h4', 'h5',
'h6', 'head', 'header', 'hr', 'html', 'i', 'iframe', 'img', 'input', 'ins', 'kbd', 'label', 'legend', 'li', 'link',
'main', 'map', 'mark', 'menu', 'menuitem', 'meta', 'meter', 'nav', 'noframes', 'noscript', 'object', 'ol',
'optgroup', 'option', 'output', 'p', 'param', 'picture', 'pre', 'progress', 'q', 'rp', 'rt', 'ruby', 's', 'samp',
'script', 'section', 'select', 'small', 'source', 'span', 'strike', 'strong', 'style', 'sub', 'summary', 'sup',
'svg', 'table', 'tbody', 'td', 'template', 'textarea', 'tfoot', 'th', 'thead', 'time', 'title', 'tr', 'track', 'tt',
'u', 'ul', 'var', 'video', 'wbr')
def getClass():
''' Initializing the class '''
return LightNovelGateSiteAdapter
class LightNovelGateSiteAdapter(BaseSiteAdapter):
''' Adapter for LightNovelGate.com '''
def __init__(self, config, url):
BaseSiteAdapter.__init__(self, config, url)
self.story.setMetadata('siteabbrev', 'lng')
self.dateformat = "%Y-%m-%dT%H:%M:%S+00:00"
self.is_adult = False
self.username = None
self.password = None
# get storyId from url--url validation guarantees query correct
m = re.match(self.getSiteURLPattern(), url)
if m:
self.story.setMetadata('storyId', m.group('id'))
def _fetchUrlOpened(self, url,
parameters=None,
usecache=True,
extrasleep=2.0,
referer=None):
## We've been requested by the site(s) admin to rein in hits.
## This is in additional to what ever the slow_down_sleep_time
## setting is.
return BaseSiteAdapter._fetchUrlOpened(self,url,
parameters=parameters,
usecache=usecache,
extrasleep=extrasleep,
referer=referer)
def _fetchUrl(self,url,parameters=None,extrasleep=1.0,usecache=True):
## ffnet(and, I assume, fpcom) tends to fail more if hit too
## fast. This is in additional to what ever the
## slow_down_sleep_time setting is.
return BaseSiteAdapter._fetchUrl(self,url,
parameters=parameters,
extrasleep=extrasleep,
usecache=usecache)