How to use webencodings - 10 common examples

To help you get started, we’ve selected a few webencodings examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github Kozea / tinycss2 / tinycss2 / test_tinycss2.py View on Github external
def to_json():
    def numeric(t):
        return [
            t.representation, t.value,
            'integer' if t.int_value is not None else 'number']
    return {
        type(None): lambda _: None,
        str: lambda s: s,
        int: lambda s: s,
        list: lambda l: [to_json(el) for el in l],
        tuple: lambda l: [to_json(el) for el in l],
        Encoding: lambda e: e.name,
        ParseError: lambda e: ['error', e.kind],

        Comment: lambda t: '/* … */',
        WhitespaceToken: lambda t: ' ',
        LiteralToken: lambda t: t.value,
        IdentToken: lambda t: ['ident', t.value],
        AtKeywordToken: lambda t: ['at-keyword', t.value],
        HashToken: lambda t: ['hash', t.value,
                              'id' if t.is_identifier else 'unrestricted'],
        StringToken: lambda t: ['string', t.value],
        URLToken: lambda t: ['url', t.value],
        NumberToken: lambda t: ['number'] + numeric(t),
        PercentageToken: lambda t: ['percentage'] + numeric(t),
        DimensionToken: lambda t: ['dimension'] + numeric(t) + [t.unit],
        UnicodeRangeToken: lambda t: ['unicode-range', t.start, t.end],
github Kozea / tinycss2 / tinycss2 / test_tinycss2.py View on Github external
def test_stylesheet_bytes(kwargs):
    kwargs['css_bytes'] = kwargs['css_bytes'].encode('latin1')
    kwargs.pop('comment', None)
    if kwargs.get('environment_encoding'):
        kwargs['environment_encoding'] = lookup(kwargs['environment_encoding'])
    kwargs.update(SKIP)
    return parse_stylesheet_bytes(**kwargs)
github webrecorder / pywb / pywb / rewrite / rewrite_content.py View on Github external
if text_type == 'html':
            head_insert_str = ''
            charset = rewritten_headers.charset

            # if no charset set, attempt to extract from first 1024
            if not rewritten_headers.charset:
                first_buff = stream.read(1024)
                charset = self._extract_html_charset(first_buff,
                                                     status_headers)

            if head_insert_func and not wb_url.is_url_rewrite_only:
                head_insert_orig = head_insert_func(rule, cdx)

                if charset:
                    try:
                        head_insert_str = webencodings.encode(head_insert_orig, charset)
                    except:
                        pass

                if not head_insert_str:
                    charset = 'utf-8'
                    head_insert_str = head_insert_orig.encode(charset)

                head_insert_buf = head_insert_str
                #head_insert_str = to_native_str(head_insert_str)
                head_insert_str = head_insert_str.decode('iso-8859-1')


            if wb_url.is_banner_only:
                gen = self._head_insert_only_gen(head_insert_buf,
                                                 stream,
                                                 first_buff)
github webrecorder / pywb / pywb / rewrite / content_rewriter.py View on Github external
def get_head_insert(self, rwinfo, rule, head_insert_func, cdx):
        head_insert_str = ''

        # if no charset set, attempt to extract from first 1024
        if not rwinfo.charset:
            first_buff = rwinfo.read_and_keep(1024)
            rwinfo.charset = self.extract_html_charset(first_buff)

        if head_insert_func:
            head_insert_orig = head_insert_func(rule, cdx)

            if rwinfo.charset:
                try:
                    head_insert_str = webencodings.encode(head_insert_orig, rwinfo.charset)
                except:
                    pass

            # no charset detected, encode banner as ascii html entities
            if not head_insert_str:
                head_insert_str = head_insert_orig.encode('ascii', 'xmlcharrefreplace')

            head_insert_str = head_insert_str.decode('iso-8859-1')

        return head_insert_str
github Kozea / tinycss2 / tinycss2 / bytes.py View on Github external
if protocol_encoding:
        fallback = lookup(protocol_encoding)
        if fallback:
            return decode(css_bytes, fallback)
    if css_bytes.startswith(b'@charset "'):
        # 10 is len(b'@charset "')
        # 100 is arbitrary so that no encoding label is more than 100-10 bytes.
        end_quote = css_bytes.find(b'"', 10, 100)
        if end_quote != -1 and css_bytes.startswith(b'";', end_quote):
            fallback = lookup(css_bytes[10:end_quote].decode('latin1'))
            if fallback:
                if fallback.name in ('utf-16be', 'utf-16le'):
                    return decode(css_bytes, UTF8)
                return decode(css_bytes, fallback)
    if environment_encoding:
        return decode(css_bytes, environment_encoding)
    return decode(css_bytes, UTF8)
github Kozea / tinycss2 / tinycss2 / bytes.py View on Github external
fallback = lookup(protocol_encoding)
        if fallback:
            return decode(css_bytes, fallback)
    if css_bytes.startswith(b'@charset "'):
        # 10 is len(b'@charset "')
        # 100 is arbitrary so that no encoding label is more than 100-10 bytes.
        end_quote = css_bytes.find(b'"', 10, 100)
        if end_quote != -1 and css_bytes.startswith(b'";', end_quote):
            fallback = lookup(css_bytes[10:end_quote].decode('latin1'))
            if fallback:
                if fallback.name in ('utf-16be', 'utf-16le'):
                    return decode(css_bytes, UTF8)
                return decode(css_bytes, fallback)
    if environment_encoding:
        return decode(css_bytes, environment_encoding)
    return decode(css_bytes, UTF8)
github Kozea / tinycss2 / tinycss2 / bytes.py View on Github external
"""
    # http://dev.w3.org/csswg/css-syntax/#the-input-byte-stream
    if protocol_encoding:
        fallback = lookup(protocol_encoding)
        if fallback:
            return decode(css_bytes, fallback)
    if css_bytes.startswith(b'@charset "'):
        # 10 is len(b'@charset "')
        # 100 is arbitrary so that no encoding label is more than 100-10 bytes.
        end_quote = css_bytes.find(b'"', 10, 100)
        if end_quote != -1 and css_bytes.startswith(b'";', end_quote):
            fallback = lookup(css_bytes[10:end_quote].decode('latin1'))
            if fallback:
                if fallback.name in ('utf-16be', 'utf-16le'):
                    return decode(css_bytes, UTF8)
                return decode(css_bytes, fallback)
    if environment_encoding:
        return decode(css_bytes, environment_encoding)
    return decode(css_bytes, UTF8)
github Kozea / tinycss2 / tinycss2 / bytes.py View on Github external
(e.g. via the ``charset`` parameter of the ``Content-Type`` header.)
    :param environment_encoding:
        A :class:`webencodings.Encoding` object
        for the `environment encoding
        `_,
        if any.
    :returns:
        A 2-tuple of a decoded Unicode string
        and the :class:`webencodings.Encoding` object that was used.

    """
    # http://dev.w3.org/csswg/css-syntax/#the-input-byte-stream
    if protocol_encoding:
        fallback = lookup(protocol_encoding)
        if fallback:
            return decode(css_bytes, fallback)
    if css_bytes.startswith(b'@charset "'):
        # 10 is len(b'@charset "')
        # 100 is arbitrary so that no encoding label is more than 100-10 bytes.
        end_quote = css_bytes.find(b'"', 10, 100)
        if end_quote != -1 and css_bytes.startswith(b'";', end_quote):
            fallback = lookup(css_bytes[10:end_quote].decode('latin1'))
            if fallback:
                if fallback.name in ('utf-16be', 'utf-16le'):
                    return decode(css_bytes, UTF8)
                return decode(css_bytes, fallback)
    if environment_encoding:
        return decode(css_bytes, environment_encoding)
    return decode(css_bytes, UTF8)
github Kozea / tinycss2 / tinycss2 / bytes.py View on Github external
:param protocol_encoding:
        The encoding label, if any, defined by HTTP or equivalent protocol.
        (e.g. via the ``charset`` parameter of the ``Content-Type`` header.)
    :param environment_encoding:
        A :class:`webencodings.Encoding` object
        for the `environment encoding
        `_,
        if any.
    :returns:
        A 2-tuple of a decoded Unicode string
        and the :class:`webencodings.Encoding` object that was used.

    """
    # http://dev.w3.org/csswg/css-syntax/#the-input-byte-stream
    if protocol_encoding:
        fallback = lookup(protocol_encoding)
        if fallback:
            return decode(css_bytes, fallback)
    if css_bytes.startswith(b'@charset "'):
        # 10 is len(b'@charset "')
        # 100 is arbitrary so that no encoding label is more than 100-10 bytes.
        end_quote = css_bytes.find(b'"', 10, 100)
        if end_quote != -1 and css_bytes.startswith(b'";', end_quote):
            fallback = lookup(css_bytes[10:end_quote].decode('latin1'))
            if fallback:
                if fallback.name in ('utf-16be', 'utf-16le'):
                    return decode(css_bytes, UTF8)
                return decode(css_bytes, fallback)
    if environment_encoding:
        return decode(css_bytes, environment_encoding)
    return decode(css_bytes, UTF8)
github Kozea / tinycss2 / tinycss2 / bytes.py View on Github external
:returns:
        A 2-tuple of a decoded Unicode string
        and the :class:`webencodings.Encoding` object that was used.

    """
    # http://dev.w3.org/csswg/css-syntax/#the-input-byte-stream
    if protocol_encoding:
        fallback = lookup(protocol_encoding)
        if fallback:
            return decode(css_bytes, fallback)
    if css_bytes.startswith(b'@charset "'):
        # 10 is len(b'@charset "')
        # 100 is arbitrary so that no encoding label is more than 100-10 bytes.
        end_quote = css_bytes.find(b'"', 10, 100)
        if end_quote != -1 and css_bytes.startswith(b'";', end_quote):
            fallback = lookup(css_bytes[10:end_quote].decode('latin1'))
            if fallback:
                if fallback.name in ('utf-16be', 'utf-16le'):
                    return decode(css_bytes, UTF8)
                return decode(css_bytes, fallback)
    if environment_encoding:
        return decode(css_bytes, environment_encoding)
    return decode(css_bytes, UTF8)

webencodings

Character encoding aliases for legacy web content

BSD-2-Clause
Latest version published 8 years ago

Package Health Score

73 / 100
Full package analysis

Similar packages