How to use the ruia.field._LxmlElementField function in ruia

To help you get started, we’ve selected a few ruia examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github howie6879 / ruia / ruia / field.py View on Github external
return elements if self.many else elements[0]

        if elements:
            results = [self._parse_element(element) for element in elements]
        elif self.default is None:
            raise NothingMatchedError(
                f"Extract `{self.css_select or self.xpath_select}` error, "
                f"please check selector or set parameter named `default`"
            )
        else:
            results = [self.default]

        return results if self.many else results[0]


class AttrField(_LxmlElementField):
    """
    This field is used to get  attribute.
    """

    def __init__(
        self,
        attr,
        css_select: str = None,
        xpath_select: str = None,
        default="",
        many: bool = False,
    ):
        super(AttrField, self).__init__(
            css_select=css_select, xpath_select=xpath_select, default=default, many=many
        )
        self.attr = attr
github howie6879 / ruia / ruia / field.py View on Github external
attr,
        css_select: str = None,
        xpath_select: str = None,
        default="",
        many: bool = False,
    ):
        super(AttrField, self).__init__(
            css_select=css_select, xpath_select=xpath_select, default=default, many=many
        )
        self.attr = attr

    def _parse_element(self, element):
        return element.get(self.attr, self.default)


class HtmlField(_LxmlElementField):
    """
    This field is used to get raw html data.
    """

    def _parse_element(self, element):
        return etree.tostring(element, encoding="utf-8").decode(encoding="utf-8")


class TextField(_LxmlElementField):
    """
    This field is used to get text.
    """

    def _parse_element(self, element):
        strings = [node.strip() for node in element.itertext()]
        string = "".join(strings)
github howie6879 / ruia / ruia / field.py View on Github external
self.attr = attr

    def _parse_element(self, element):
        return element.get(self.attr, self.default)


class HtmlField(_LxmlElementField):
    """
    This field is used to get raw html data.
    """

    def _parse_element(self, element):
        return etree.tostring(element, encoding="utf-8").decode(encoding="utf-8")


class TextField(_LxmlElementField):
    """
    This field is used to get text.
    """

    def _parse_element(self, element):
        strings = [node.strip() for node in element.itertext()]
        string = "".join(strings)
        return string if string else self.default


class RegexField(BaseField):
    """
    This field is used to get raw html code by regular expression.
    RegexField uses standard library `re` inner, that is to say it has a better performance than _LxmlElementField.
    """
github howie6879 / ruia / ruia / field.py View on Github external
def __init__(
        self,
        css_select: str = None,
        xpath_select: str = None,
        default=None,
        many: bool = False,
    ):
        """
        :param css_select: css select http://lxml.de/cssselect.html
        :param xpath_select: http://www.w3school.com.cn/xpath/index.asp
        :param default: inherit
        :param many: inherit
        """
        super(_LxmlElementField, self).__init__(default=default, many=many)
        self.css_select = css_select
        self.xpath_select = xpath_select