mirror of
https://github.com/gryf/ebook-converter.git
synced 2026-01-03 17:34:11 +01:00
Initial import
This commit is contained in:
52
ebook_converter/tinycss/__init__.py
Normal file
52
ebook_converter/tinycss/__init__.py
Normal file
@@ -0,0 +1,52 @@
|
||||
# coding: utf8
|
||||
"""
|
||||
tinycss
|
||||
-------
|
||||
|
||||
A CSS parser, and nothing else.
|
||||
|
||||
:copyright: (c) 2012 by Simon Sapin.
|
||||
:license: BSD, see LICENSE for more details.
|
||||
"""
|
||||
|
||||
from .version import VERSION
|
||||
__version__ = VERSION
|
||||
|
||||
from tinycss.css21 import CSS21Parser
|
||||
from tinycss.page3 import CSSPage3Parser
|
||||
from tinycss.fonts3 import CSSFonts3Parser
|
||||
from tinycss.media3 import CSSMedia3Parser
|
||||
|
||||
|
||||
PARSER_MODULES = {
|
||||
'page3': CSSPage3Parser,
|
||||
'fonts3': CSSFonts3Parser,
|
||||
'media3': CSSMedia3Parser,
|
||||
}
|
||||
|
||||
|
||||
def make_parser(*features, **kwargs):
|
||||
"""Make a parser object with the chosen features.
|
||||
|
||||
:param features:
|
||||
Positional arguments are base classes the new parser class will extend.
|
||||
The string ``'page3'`` is accepted as short for
|
||||
:class:`~page3.CSSPage3Parser`.
|
||||
:param kwargs:
|
||||
Keyword arguments are passed to the parser’s constructor.
|
||||
:returns:
|
||||
An instance of a new subclass of :class:`CSS21Parser`
|
||||
|
||||
"""
|
||||
if features:
|
||||
bases = tuple(PARSER_MODULES.get(f, f) for f in features)
|
||||
parser_class = type('CustomCSSParser', bases + (CSS21Parser,), {})
|
||||
else:
|
||||
parser_class = CSS21Parser
|
||||
return parser_class(**kwargs)
|
||||
|
||||
|
||||
def make_full_parser(**kwargs):
|
||||
''' A parser that parses all supported CSS 3 modules in addition to CSS 2.1 '''
|
||||
features = tuple(PARSER_MODULES)
|
||||
return make_parser(*features, **kwargs)
|
||||
382
ebook_converter/tinycss/color3.py
Normal file
382
ebook_converter/tinycss/color3.py
Normal file
@@ -0,0 +1,382 @@
|
||||
# coding: utf8
|
||||
"""
|
||||
tinycss.colors3
|
||||
---------------
|
||||
|
||||
Parser for CSS 3 color values
|
||||
http://www.w3.org/TR/css3-color/
|
||||
|
||||
This module does not provide anything that integrates in a parser class,
|
||||
only functions that parse single tokens from (eg.) a property value.
|
||||
|
||||
:copyright: (c) 2012 by Simon Sapin.
|
||||
:license: BSD, see LICENSE for more details.
|
||||
"""
|
||||
|
||||
from __future__ import unicode_literals, division
|
||||
import collections
|
||||
import itertools
|
||||
import re
|
||||
|
||||
from .tokenizer import tokenize_grouped
|
||||
|
||||
|
||||
class RGBA(collections.namedtuple('RGBA', ['red', 'green', 'blue', 'alpha'])):
|
||||
"""An RGBA color.
|
||||
|
||||
A tuple of four floats in the 0..1 range: ``(r, g, b, a)``.
|
||||
Also has ``red``, ``green``, ``blue`` and ``alpha`` attributes to access
|
||||
the same values.
|
||||
|
||||
"""
|
||||
|
||||
|
||||
def parse_color_string(css_string):
|
||||
"""Parse a CSS string as a color value.
|
||||
|
||||
This is a convenience wrapper around :func:`parse_color` in case you
|
||||
have a string that is not from a CSS stylesheet.
|
||||
|
||||
:param css_string:
|
||||
An unicode string in CSS syntax.
|
||||
:returns:
|
||||
Same as :func:`parse_color`.
|
||||
|
||||
"""
|
||||
tokens = list(tokenize_grouped(css_string.strip()))
|
||||
if len(tokens) == 1:
|
||||
return parse_color(tokens[0])
|
||||
|
||||
|
||||
def parse_color(token):
|
||||
"""Parse single token as a color value.
|
||||
|
||||
:param token:
|
||||
A single :class:`~.token_data.Token` or
|
||||
:class:`~.token_data.ContainerToken`, as found eg. in a
|
||||
property value.
|
||||
:returns:
|
||||
* ``None``, if the token is not a valid CSS 3 color value.
|
||||
(No exception is raised.)
|
||||
* For the *currentColor* keyword: the string ``'currentColor'``
|
||||
* Every other values (including keywords, HSL and HSLA) is converted
|
||||
to RGBA and returned as an :class:`RGBA` object (a 4-tuple with
|
||||
attribute access).
|
||||
The alpha channel is clipped to [0, 1], but R, G, or B can be
|
||||
out of range (eg. ``rgb(-51, 306, 0)`` is represented as
|
||||
``(-.2, 1.2, 0, 1)``.)
|
||||
|
||||
"""
|
||||
if token.type == 'IDENT':
|
||||
return COLOR_KEYWORDS.get(token.value.lower())
|
||||
elif token.type == 'HASH':
|
||||
for multiplier, regexp in HASH_REGEXPS:
|
||||
match = regexp(token.value)
|
||||
if match:
|
||||
r, g, b = [int(group * multiplier, 16) / 255
|
||||
for group in match.groups()]
|
||||
return RGBA(r, g, b, 1.)
|
||||
elif token.type == 'FUNCTION':
|
||||
args = parse_comma_separated(token.content)
|
||||
if args:
|
||||
name = token.function_name.lower()
|
||||
if name == 'rgb':
|
||||
return parse_rgb(args, alpha=1.)
|
||||
elif name == 'rgba':
|
||||
alpha = parse_alpha(args[3:])
|
||||
if alpha is not None:
|
||||
return parse_rgb(args[:3], alpha)
|
||||
elif name == 'hsl':
|
||||
return parse_hsl(args, alpha=1.)
|
||||
elif name == 'hsla':
|
||||
alpha = parse_alpha(args[3:])
|
||||
if alpha is not None:
|
||||
return parse_hsl(args[:3], alpha)
|
||||
|
||||
|
||||
def parse_alpha(args):
|
||||
"""
|
||||
If args is a list of a single INTEGER or NUMBER token,
|
||||
retur its value clipped to the 0..1 range
|
||||
Otherwise, return None.
|
||||
"""
|
||||
if len(args) == 1 and args[0].type in ('NUMBER', 'INTEGER'):
|
||||
return min(1, max(0, args[0].value))
|
||||
|
||||
|
||||
def parse_rgb(args, alpha):
|
||||
"""
|
||||
If args is a list of 3 INTEGER tokens or 3 PERCENTAGE tokens,
|
||||
return RGB values as a tuple of 3 floats in 0..1.
|
||||
Otherwise, return None.
|
||||
"""
|
||||
types = [arg.type for arg in args]
|
||||
if types == ['INTEGER', 'INTEGER', 'INTEGER']:
|
||||
r, g, b = [arg.value / 255 for arg in args[:3]]
|
||||
return RGBA(r, g, b, alpha)
|
||||
elif types == ['PERCENTAGE', 'PERCENTAGE', 'PERCENTAGE']:
|
||||
r, g, b = [arg.value / 100 for arg in args[:3]]
|
||||
return RGBA(r, g, b, alpha)
|
||||
|
||||
|
||||
def parse_hsl(args, alpha):
|
||||
"""
|
||||
If args is a list of 1 INTEGER token and 2 PERCENTAGE tokens,
|
||||
return RGB values as a tuple of 3 floats in 0..1.
|
||||
Otherwise, return None.
|
||||
"""
|
||||
types = [arg.type for arg in args]
|
||||
if types == ['INTEGER', 'PERCENTAGE', 'PERCENTAGE']:
|
||||
hsl = [arg.value for arg in args[:3]]
|
||||
r, g, b = hsl_to_rgb(*hsl)
|
||||
return RGBA(r, g, b, alpha)
|
||||
|
||||
|
||||
def hsl_to_rgb(hue, saturation, lightness):
|
||||
"""
|
||||
:param hue: degrees
|
||||
:param saturation: percentage
|
||||
:param lightness: percentage
|
||||
:returns: (r, g, b) as floats in the 0..1 range
|
||||
"""
|
||||
hue = (hue / 360) % 1
|
||||
saturation = min(1, max(0, saturation / 100))
|
||||
lightness = min(1, max(0, lightness / 100))
|
||||
|
||||
# Translated from ABC: http://www.w3.org/TR/css3-color/#hsl-color
|
||||
def hue_to_rgb(m1, m2, h):
|
||||
if h < 0:
|
||||
h += 1
|
||||
if h > 1:
|
||||
h -= 1
|
||||
if h * 6 < 1:
|
||||
return m1 + (m2 - m1) * h * 6
|
||||
if h * 2 < 1:
|
||||
return m2
|
||||
if h * 3 < 2:
|
||||
return m1 + (m2 - m1) * (2 / 3 - h) * 6
|
||||
return m1
|
||||
|
||||
if lightness <= 0.5:
|
||||
m2 = lightness * (saturation + 1)
|
||||
else:
|
||||
m2 = lightness + saturation - lightness * saturation
|
||||
m1 = lightness * 2 - m2
|
||||
return (
|
||||
hue_to_rgb(m1, m2, hue + 1 / 3),
|
||||
hue_to_rgb(m1, m2, hue),
|
||||
hue_to_rgb(m1, m2, hue - 1 / 3),
|
||||
)
|
||||
|
||||
|
||||
def parse_comma_separated(tokens):
|
||||
"""Parse a list of tokens (typically the content of a function token)
|
||||
as arguments made of a single token each, separated by mandatory commas,
|
||||
with optional white space around each argument.
|
||||
|
||||
return the argument list without commas or white space;
|
||||
or None if the function token content do not match the description above.
|
||||
|
||||
"""
|
||||
tokens = [token for token in tokens if token.type != 'S']
|
||||
if not tokens:
|
||||
return []
|
||||
if len(tokens) % 2 == 1 and all(
|
||||
token.type == 'DELIM' and token.value == ','
|
||||
for token in tokens[1::2]):
|
||||
return tokens[::2]
|
||||
|
||||
|
||||
HASH_REGEXPS = (
|
||||
(2, re.compile(r'^#([\da-f])([\da-f])([\da-f])$', re.I).match),
|
||||
(1, re.compile(r'^#([\da-f]{2})([\da-f]{2})([\da-f]{2})$', re.I).match),
|
||||
)
|
||||
|
||||
|
||||
# (r, g, b) in 0..255
|
||||
BASIC_COLOR_KEYWORDS = [
|
||||
('black', (0, 0, 0)),
|
||||
('silver', (192, 192, 192)),
|
||||
('gray', (128, 128, 128)),
|
||||
('white', (255, 255, 255)),
|
||||
('maroon', (128, 0, 0)),
|
||||
('red', (255, 0, 0)),
|
||||
('purple', (128, 0, 128)),
|
||||
('fuchsia', (255, 0, 255)),
|
||||
('green', (0, 128, 0)),
|
||||
('lime', (0, 255, 0)),
|
||||
('olive', (128, 128, 0)),
|
||||
('yellow', (255, 255, 0)),
|
||||
('navy', (0, 0, 128)),
|
||||
('blue', (0, 0, 255)),
|
||||
('teal', (0, 128, 128)),
|
||||
('aqua', (0, 255, 255)),
|
||||
]
|
||||
|
||||
|
||||
# (r, g, b) in 0..255
|
||||
EXTENDED_COLOR_KEYWORDS = [
|
||||
('aliceblue', (240, 248, 255)),
|
||||
('antiquewhite', (250, 235, 215)),
|
||||
('aqua', (0, 255, 255)),
|
||||
('aquamarine', (127, 255, 212)),
|
||||
('azure', (240, 255, 255)),
|
||||
('beige', (245, 245, 220)),
|
||||
('bisque', (255, 228, 196)),
|
||||
('black', (0, 0, 0)),
|
||||
('blanchedalmond', (255, 235, 205)),
|
||||
('blue', (0, 0, 255)),
|
||||
('blueviolet', (138, 43, 226)),
|
||||
('brown', (165, 42, 42)),
|
||||
('burlywood', (222, 184, 135)),
|
||||
('cadetblue', (95, 158, 160)),
|
||||
('chartreuse', (127, 255, 0)),
|
||||
('chocolate', (210, 105, 30)),
|
||||
('coral', (255, 127, 80)),
|
||||
('cornflowerblue', (100, 149, 237)),
|
||||
('cornsilk', (255, 248, 220)),
|
||||
('crimson', (220, 20, 60)),
|
||||
('cyan', (0, 255, 255)),
|
||||
('darkblue', (0, 0, 139)),
|
||||
('darkcyan', (0, 139, 139)),
|
||||
('darkgoldenrod', (184, 134, 11)),
|
||||
('darkgray', (169, 169, 169)),
|
||||
('darkgreen', (0, 100, 0)),
|
||||
('darkgrey', (169, 169, 169)),
|
||||
('darkkhaki', (189, 183, 107)),
|
||||
('darkmagenta', (139, 0, 139)),
|
||||
('darkolivegreen', (85, 107, 47)),
|
||||
('darkorange', (255, 140, 0)),
|
||||
('darkorchid', (153, 50, 204)),
|
||||
('darkred', (139, 0, 0)),
|
||||
('darksalmon', (233, 150, 122)),
|
||||
('darkseagreen', (143, 188, 143)),
|
||||
('darkslateblue', (72, 61, 139)),
|
||||
('darkslategray', (47, 79, 79)),
|
||||
('darkslategrey', (47, 79, 79)),
|
||||
('darkturquoise', (0, 206, 209)),
|
||||
('darkviolet', (148, 0, 211)),
|
||||
('deeppink', (255, 20, 147)),
|
||||
('deepskyblue', (0, 191, 255)),
|
||||
('dimgray', (105, 105, 105)),
|
||||
('dimgrey', (105, 105, 105)),
|
||||
('dodgerblue', (30, 144, 255)),
|
||||
('firebrick', (178, 34, 34)),
|
||||
('floralwhite', (255, 250, 240)),
|
||||
('forestgreen', (34, 139, 34)),
|
||||
('fuchsia', (255, 0, 255)),
|
||||
('gainsboro', (220, 220, 220)),
|
||||
('ghostwhite', (248, 248, 255)),
|
||||
('gold', (255, 215, 0)),
|
||||
('goldenrod', (218, 165, 32)),
|
||||
('gray', (128, 128, 128)),
|
||||
('green', (0, 128, 0)),
|
||||
('greenyellow', (173, 255, 47)),
|
||||
('grey', (128, 128, 128)),
|
||||
('honeydew', (240, 255, 240)),
|
||||
('hotpink', (255, 105, 180)),
|
||||
('indianred', (205, 92, 92)),
|
||||
('indigo', (75, 0, 130)),
|
||||
('ivory', (255, 255, 240)),
|
||||
('khaki', (240, 230, 140)),
|
||||
('lavender', (230, 230, 250)),
|
||||
('lavenderblush', (255, 240, 245)),
|
||||
('lawngreen', (124, 252, 0)),
|
||||
('lemonchiffon', (255, 250, 205)),
|
||||
('lightblue', (173, 216, 230)),
|
||||
('lightcoral', (240, 128, 128)),
|
||||
('lightcyan', (224, 255, 255)),
|
||||
('lightgoldenrodyellow', (250, 250, 210)),
|
||||
('lightgray', (211, 211, 211)),
|
||||
('lightgreen', (144, 238, 144)),
|
||||
('lightgrey', (211, 211, 211)),
|
||||
('lightpink', (255, 182, 193)),
|
||||
('lightsalmon', (255, 160, 122)),
|
||||
('lightseagreen', (32, 178, 170)),
|
||||
('lightskyblue', (135, 206, 250)),
|
||||
('lightslategray', (119, 136, 153)),
|
||||
('lightslategrey', (119, 136, 153)),
|
||||
('lightsteelblue', (176, 196, 222)),
|
||||
('lightyellow', (255, 255, 224)),
|
||||
('lime', (0, 255, 0)),
|
||||
('limegreen', (50, 205, 50)),
|
||||
('linen', (250, 240, 230)),
|
||||
('magenta', (255, 0, 255)),
|
||||
('maroon', (128, 0, 0)),
|
||||
('mediumaquamarine', (102, 205, 170)),
|
||||
('mediumblue', (0, 0, 205)),
|
||||
('mediumorchid', (186, 85, 211)),
|
||||
('mediumpurple', (147, 112, 219)),
|
||||
('mediumseagreen', (60, 179, 113)),
|
||||
('mediumslateblue', (123, 104, 238)),
|
||||
('mediumspringgreen', (0, 250, 154)),
|
||||
('mediumturquoise', (72, 209, 204)),
|
||||
('mediumvioletred', (199, 21, 133)),
|
||||
('midnightblue', (25, 25, 112)),
|
||||
('mintcream', (245, 255, 250)),
|
||||
('mistyrose', (255, 228, 225)),
|
||||
('moccasin', (255, 228, 181)),
|
||||
('navajowhite', (255, 222, 173)),
|
||||
('navy', (0, 0, 128)),
|
||||
('oldlace', (253, 245, 230)),
|
||||
('olive', (128, 128, 0)),
|
||||
('olivedrab', (107, 142, 35)),
|
||||
('orange', (255, 165, 0)),
|
||||
('orangered', (255, 69, 0)),
|
||||
('orchid', (218, 112, 214)),
|
||||
('palegoldenrod', (238, 232, 170)),
|
||||
('palegreen', (152, 251, 152)),
|
||||
('paleturquoise', (175, 238, 238)),
|
||||
('palevioletred', (219, 112, 147)),
|
||||
('papayawhip', (255, 239, 213)),
|
||||
('peachpuff', (255, 218, 185)),
|
||||
('peru', (205, 133, 63)),
|
||||
('pink', (255, 192, 203)),
|
||||
('plum', (221, 160, 221)),
|
||||
('powderblue', (176, 224, 230)),
|
||||
('purple', (128, 0, 128)),
|
||||
('red', (255, 0, 0)),
|
||||
('rosybrown', (188, 143, 143)),
|
||||
('royalblue', (65, 105, 225)),
|
||||
('saddlebrown', (139, 69, 19)),
|
||||
('salmon', (250, 128, 114)),
|
||||
('sandybrown', (244, 164, 96)),
|
||||
('seagreen', (46, 139, 87)),
|
||||
('seashell', (255, 245, 238)),
|
||||
('sienna', (160, 82, 45)),
|
||||
('silver', (192, 192, 192)),
|
||||
('skyblue', (135, 206, 235)),
|
||||
('slateblue', (106, 90, 205)),
|
||||
('slategray', (112, 128, 144)),
|
||||
('slategrey', (112, 128, 144)),
|
||||
('snow', (255, 250, 250)),
|
||||
('springgreen', (0, 255, 127)),
|
||||
('steelblue', (70, 130, 180)),
|
||||
('tan', (210, 180, 140)),
|
||||
('teal', (0, 128, 128)),
|
||||
('thistle', (216, 191, 216)),
|
||||
('tomato', (255, 99, 71)),
|
||||
('turquoise', (64, 224, 208)),
|
||||
('violet', (238, 130, 238)),
|
||||
('wheat', (245, 222, 179)),
|
||||
('white', (255, 255, 255)),
|
||||
('whitesmoke', (245, 245, 245)),
|
||||
('yellow', (255, 255, 0)),
|
||||
('yellowgreen', (154, 205, 50)),
|
||||
]
|
||||
|
||||
|
||||
# (r, g, b, a) in 0..1 or a string marker
|
||||
SPECIAL_COLOR_KEYWORDS = {
|
||||
'currentcolor': 'currentColor',
|
||||
'transparent': RGBA(0., 0., 0., 0.),
|
||||
}
|
||||
|
||||
|
||||
# RGBA namedtuples of (r, g, b, a) in 0..1 or a string marker
|
||||
COLOR_KEYWORDS = SPECIAL_COLOR_KEYWORDS.copy()
|
||||
COLOR_KEYWORDS.update(
|
||||
# 255 maps to 1, 0 to 0, the rest is linear.
|
||||
(keyword, RGBA(r / 255., g / 255., b / 255., 1.))
|
||||
for keyword, (r, g, b) in itertools.chain(
|
||||
BASIC_COLOR_KEYWORDS, EXTENDED_COLOR_KEYWORDS))
|
||||
830
ebook_converter/tinycss/css21.py
Normal file
830
ebook_converter/tinycss/css21.py
Normal file
@@ -0,0 +1,830 @@
|
||||
# coding: utf8
|
||||
"""
|
||||
tinycss.css21
|
||||
-------------
|
||||
|
||||
Parser for CSS 2.1
|
||||
http://www.w3.org/TR/CSS21/syndata.html
|
||||
|
||||
:copyright: (c) 2012 by Simon Sapin.
|
||||
:license: BSD, see LICENSE for more details.
|
||||
"""
|
||||
|
||||
from __future__ import unicode_literals
|
||||
from itertools import chain, islice
|
||||
|
||||
from tinycss.decoding import decode
|
||||
from tinycss.token_data import TokenList
|
||||
from tinycss.tokenizer import tokenize_grouped
|
||||
from tinycss.parsing import (
|
||||
strip_whitespace, remove_whitespace, split_on_comma, validate_value,
|
||||
validate_any, ParseError)
|
||||
|
||||
|
||||
# stylesheet : [ CDO | CDC | S | statement ]*;
|
||||
# statement : ruleset | at-rule;
|
||||
# at-rule : ATKEYWORD S* any* [ block | ';' S* ];
|
||||
# block : '{' S* [ any | block | ATKEYWORD S* | ';' S* ]* '}' S*;
|
||||
# ruleset : selector? '{' S* declaration? [ ';' S* declaration? ]* '}' S*;
|
||||
# selector : any+;
|
||||
# declaration : property S* ':' S* value;
|
||||
# property : IDENT;
|
||||
# value : [ any | block | ATKEYWORD S* ]+;
|
||||
# any : [ IDENT | NUMBER | PERCENTAGE | DIMENSION | STRING
|
||||
# | DELIM | URI | HASH | UNICODE-RANGE | INCLUDES
|
||||
# | DASHMATCH | ':' | FUNCTION S* [any|unused]* ')'
|
||||
# | '(' S* [any|unused]* ')' | '[' S* [any|unused]* ']'
|
||||
# ] S*;
|
||||
# unused : block | ATKEYWORD S* | ';' S* | CDO S* | CDC S*;
|
||||
|
||||
|
||||
class Stylesheet(object):
|
||||
"""
|
||||
A parsed CSS stylesheet.
|
||||
|
||||
.. attribute:: rules
|
||||
|
||||
A mixed list, in source order, of :class:`RuleSet` and various
|
||||
at-rules such as :class:`ImportRule`, :class:`MediaRule`
|
||||
and :class:`PageRule`.
|
||||
Use their :obj:`at_keyword` attribute to distinguish them.
|
||||
|
||||
.. attribute:: errors
|
||||
|
||||
A list of :class:`~.parsing.ParseError`. Invalid rules and declarations
|
||||
are ignored, with the details logged in this list.
|
||||
|
||||
.. attribute:: encoding
|
||||
|
||||
The character encoding that was used to decode the stylesheet
|
||||
from bytes, or ``None`` for Unicode stylesheets.
|
||||
|
||||
"""
|
||||
def __init__(self, rules, errors, encoding):
|
||||
self.rules = rules
|
||||
self.errors = errors
|
||||
self.encoding = encoding
|
||||
|
||||
def __repr__(self):
|
||||
return '<{0.__class__.__name__} {1} rules {2} errors>'.format(
|
||||
self, len(self.rules), len(self.errors))
|
||||
|
||||
|
||||
class AtRule(object):
|
||||
"""
|
||||
An unparsed at-rule.
|
||||
|
||||
.. attribute:: at_keyword
|
||||
|
||||
The normalized (lower-case) at-keyword as a string. Eg: ``'@page'``
|
||||
|
||||
.. attribute:: head
|
||||
|
||||
The part of the at-rule between the at-keyword and the ``{``
|
||||
marking the body, or the ``;`` marking the end of an at-rule without
|
||||
a body. A :class:`~.token_data.TokenList`.
|
||||
|
||||
.. attribute:: body
|
||||
|
||||
The content of the body between ``{`` and ``}`` as a
|
||||
:class:`~.token_data.TokenList`, or ``None`` if there is no body
|
||||
(ie. if the rule ends with ``;``).
|
||||
|
||||
The head was validated against the core grammar but **not** the body,
|
||||
as the body might contain declarations. In case of an error in a
|
||||
declaration, parsing should continue from the next declaration.
|
||||
The whole rule should not be ignored as it would be for an error
|
||||
in the head.
|
||||
|
||||
These at-rules are expected to be parsed further before reaching
|
||||
the user API.
|
||||
|
||||
"""
|
||||
|
||||
__slots__ = 'at_keyword', 'head', 'body', 'line', 'column'
|
||||
|
||||
def __init__(self, at_keyword, head, body, line, column):
|
||||
self.at_keyword = at_keyword
|
||||
self.head = TokenList(head)
|
||||
self.body = TokenList(body) if body is not None else body
|
||||
self.line = line
|
||||
self.column = column
|
||||
|
||||
def __repr__(self):
|
||||
return ('<{0.__class__.__name__} {0.line}:{0.column} {0.at_keyword}>'
|
||||
.format(self))
|
||||
|
||||
|
||||
class RuleSet(object):
|
||||
"""A ruleset.
|
||||
|
||||
.. attribute:: at_keyword
|
||||
|
||||
Always ``None``. Helps to tell rulesets apart from at-rules.
|
||||
|
||||
.. attribute:: selector
|
||||
|
||||
The selector as a :class:`~.token_data.TokenList`.
|
||||
In CSS 3, this is actually called a selector group.
|
||||
|
||||
``rule.selector.as_css()`` gives the selector as a string.
|
||||
This string can be used with *cssselect*, see :ref:`selectors3`.
|
||||
|
||||
.. attribute:: declarations
|
||||
|
||||
The list of :class:`Declaration`, in source order.
|
||||
|
||||
"""
|
||||
|
||||
at_keyword = None
|
||||
__slots__ = 'selector', 'declarations', 'line', 'column'
|
||||
|
||||
def __init__(self, selector, declarations, line, column):
|
||||
self.selector = TokenList(selector)
|
||||
self.declarations = declarations
|
||||
self.line = line
|
||||
self.column = column
|
||||
|
||||
def __repr__(self):
|
||||
return ('<{0.__class__.__name__} at {0.line}:{0.column} {1}>'
|
||||
.format(self, self.selector.as_css()))
|
||||
|
||||
|
||||
class Declaration(object):
|
||||
"""A property declaration.
|
||||
|
||||
.. attribute:: name
|
||||
|
||||
The property name as a normalized (lower-case) string.
|
||||
|
||||
.. attribute:: value
|
||||
|
||||
The property value as a :class:`~.token_data.TokenList`.
|
||||
|
||||
The value is not parsed. UAs using tinycss may only support
|
||||
some properties or some values and tinycss does not know which.
|
||||
They need to parse values themselves and ignore declarations with
|
||||
unknown or unsupported properties or values, and fall back
|
||||
on any previous declaration.
|
||||
|
||||
:mod:`tinycss.color3` parses color values, but other values
|
||||
will need specific parsing/validation code.
|
||||
|
||||
.. attribute:: priority
|
||||
|
||||
Either the string ``'important'`` or ``None``.
|
||||
|
||||
"""
|
||||
__slots__ = 'name', 'value', 'priority', 'line', 'column'
|
||||
|
||||
def __init__(self, name, value, priority, line, column):
|
||||
self.name = name
|
||||
self.value = TokenList(value)
|
||||
self.priority = priority
|
||||
self.line = line
|
||||
self.column = column
|
||||
|
||||
def __repr__(self):
|
||||
priority = ' !' + self.priority if self.priority else ''
|
||||
return ('<{0.__class__.__name__} {0.line}:{0.column}'
|
||||
' {0.name}: {1}{2}>'.format(
|
||||
self, self.value.as_css(), priority))
|
||||
|
||||
|
||||
class PageRule(object):
|
||||
"""A parsed CSS 2.1 @page rule.
|
||||
|
||||
.. attribute:: at_keyword
|
||||
|
||||
Always ``'@page'``
|
||||
|
||||
.. attribute:: selector
|
||||
|
||||
The page selector.
|
||||
In CSS 2.1 this is either ``None`` (no selector), or the string
|
||||
``'first'``, ``'left'`` or ``'right'`` for the pseudo class
|
||||
of the same name.
|
||||
|
||||
.. attribute:: specificity
|
||||
|
||||
Specificity of the page selector. This is a tuple of four integers,
|
||||
but these tuples are mostly meant to be compared to each other.
|
||||
|
||||
.. attribute:: declarations
|
||||
|
||||
A list of :class:`Declaration`, in source order.
|
||||
|
||||
.. attribute:: at_rules
|
||||
|
||||
The list of parsed at-rules inside the @page block, in source order.
|
||||
Always empty for CSS 2.1.
|
||||
|
||||
"""
|
||||
at_keyword = '@page'
|
||||
__slots__ = 'selector', 'specificity', 'declarations', 'at_rules', 'line', 'column'
|
||||
|
||||
def __init__(self, selector, specificity, declarations, at_rules,
|
||||
line, column):
|
||||
self.selector = selector
|
||||
self.specificity = specificity
|
||||
self.declarations = declarations
|
||||
self.at_rules = at_rules
|
||||
self.line = line
|
||||
self.column = column
|
||||
|
||||
def __repr__(self):
|
||||
return ('<{0.__class__.__name__} {0.line}:{0.column}'
|
||||
' {0.selector}>'.format(self))
|
||||
|
||||
|
||||
class MediaRule(object):
|
||||
"""A parsed @media rule.
|
||||
|
||||
.. attribute:: at_keyword
|
||||
|
||||
Always ``'@media'``
|
||||
|
||||
.. attribute:: media
|
||||
|
||||
For CSS 2.1 without media queries: the media types
|
||||
as a list of strings.
|
||||
|
||||
.. attribute:: rules
|
||||
|
||||
The list :class:`RuleSet` and various at-rules inside the @media
|
||||
block, in source order.
|
||||
|
||||
"""
|
||||
at_keyword = '@media'
|
||||
__slots__ = 'media', 'rules', 'line', 'column'
|
||||
|
||||
def __init__(self, media, rules, line, column):
|
||||
self.media = media
|
||||
self.rules = rules
|
||||
self.line = line
|
||||
self.column = column
|
||||
|
||||
def __repr__(self):
|
||||
return ('<{0.__class__.__name__} {0.line}:{0.column}'
|
||||
' {0.media}>'.format(self))
|
||||
|
||||
|
||||
class ImportRule(object):
|
||||
"""A parsed @import rule.
|
||||
|
||||
.. attribute:: at_keyword
|
||||
|
||||
Always ``'@import'``
|
||||
|
||||
.. attribute:: uri
|
||||
|
||||
The URI to be imported, as read from the stylesheet.
|
||||
(URIs are not made absolute.)
|
||||
|
||||
.. attribute:: media
|
||||
|
||||
For CSS 2.1 without media queries: the media types
|
||||
as a list of strings.
|
||||
This attribute is explicitly ``['all']`` if the media was omitted
|
||||
in the source.
|
||||
|
||||
"""
|
||||
at_keyword = '@import'
|
||||
__slots__ = 'uri', 'media', 'line', 'column'
|
||||
|
||||
def __init__(self, uri, media, line, column):
|
||||
self.uri = uri
|
||||
self.media = media
|
||||
self.line = line
|
||||
self.column = column
|
||||
|
||||
def __repr__(self):
|
||||
return ('<{0.__class__.__name__} {0.line}:{0.column}'
|
||||
' {0.uri}>'.format(self))
|
||||
|
||||
|
||||
def _remove_at_charset(tokens):
|
||||
"""Remove any valid @charset at the beggining of a token stream.
|
||||
|
||||
:param tokens:
|
||||
An iterable of tokens
|
||||
:returns:
|
||||
A possibly truncated iterable of tokens
|
||||
|
||||
"""
|
||||
tokens = iter(tokens)
|
||||
header = list(islice(tokens, 4))
|
||||
if [t.type for t in header] == ['ATKEYWORD', 'S', 'STRING', ';']:
|
||||
atkw, space, string, semicolon = header
|
||||
if ((atkw.value, space.value) == ('@charset', ' ')
|
||||
and string.as_css()[0] == '"'):
|
||||
# Found a valid @charset rule, only keep what’s after it.
|
||||
return tokens
|
||||
return chain(header, tokens)
|
||||
|
||||
|
||||
class CSS21Parser(object):
|
||||
"""Parser for CSS 2.1
|
||||
|
||||
This parser supports the core CSS syntax as well as @import, @media,
|
||||
@page and !important.
|
||||
|
||||
Note that property values are still not parsed, as UAs using this
|
||||
parser may only support some properties or some values.
|
||||
|
||||
Currently the parser holds no state. It being a class only allows
|
||||
subclassing and overriding its methods.
|
||||
|
||||
"""
|
||||
|
||||
def __init__(self):
|
||||
self.at_parsers = {
|
||||
'@' + x:getattr(self, 'parse_%s_rule' % x) for x in ('media', 'page', 'import', 'charset')}
|
||||
|
||||
# User API:
|
||||
|
||||
def parse_stylesheet_file(self, css_file, protocol_encoding=None,
|
||||
linking_encoding=None, document_encoding=None):
|
||||
"""Parse a stylesheet from a file or filename.
|
||||
|
||||
Character encoding-related parameters and behavior are the same
|
||||
as in :meth:`parse_stylesheet_bytes`.
|
||||
|
||||
:param css_file:
|
||||
Either a file (any object with a :meth:`~file.read` method)
|
||||
or a filename.
|
||||
:return:
|
||||
A :class:`Stylesheet`.
|
||||
|
||||
"""
|
||||
if hasattr(css_file, 'read'):
|
||||
css_bytes = css_file.read()
|
||||
else:
|
||||
with open(css_file, 'rb') as fd:
|
||||
css_bytes = fd.read()
|
||||
return self.parse_stylesheet_bytes(css_bytes, protocol_encoding,
|
||||
linking_encoding, document_encoding)
|
||||
|
||||
def parse_stylesheet_bytes(self, css_bytes, protocol_encoding=None,
|
||||
linking_encoding=None, document_encoding=None):
|
||||
"""Parse a stylesheet from a byte string.
|
||||
|
||||
The character encoding is determined from the passed metadata and the
|
||||
``@charset`` rule in the stylesheet (if any).
|
||||
If no encoding information is available or decoding fails,
|
||||
decoding defaults to UTF-8 and then fall back on ISO-8859-1.
|
||||
|
||||
:param css_bytes:
|
||||
A CSS stylesheet as a byte string.
|
||||
:param protocol_encoding:
|
||||
The "charset" parameter of a "Content-Type" HTTP header (if any),
|
||||
or similar metadata for other protocols.
|
||||
:param linking_encoding:
|
||||
``<link charset="">`` or other metadata from the linking mechanism
|
||||
(if any)
|
||||
:param document_encoding:
|
||||
Encoding of the referring style sheet or document (if any)
|
||||
:return:
|
||||
A :class:`Stylesheet`.
|
||||
|
||||
"""
|
||||
css_unicode, encoding = decode(css_bytes, protocol_encoding,
|
||||
linking_encoding, document_encoding)
|
||||
return self.parse_stylesheet(css_unicode, encoding=encoding)
|
||||
|
||||
def parse_stylesheet(self, css_unicode, encoding=None):
|
||||
"""Parse a stylesheet from an Unicode string.
|
||||
|
||||
:param css_unicode:
|
||||
A CSS stylesheet as an unicode string.
|
||||
:param encoding:
|
||||
The character encoding used to decode the stylesheet from bytes,
|
||||
if any.
|
||||
:return:
|
||||
A :class:`Stylesheet`.
|
||||
|
||||
"""
|
||||
tokens = tokenize_grouped(css_unicode)
|
||||
if encoding:
|
||||
tokens = _remove_at_charset(tokens)
|
||||
rules, errors = self.parse_rules(tokens, context='stylesheet')
|
||||
return Stylesheet(rules, errors, encoding)
|
||||
|
||||
def parse_style_attr(self, css_source):
|
||||
"""Parse a "style" attribute (eg. of an HTML element).
|
||||
|
||||
This method only accepts Unicode as the source (HTML) document
|
||||
is supposed to handle the character encoding.
|
||||
|
||||
:param css_source:
|
||||
The attribute value, as an unicode string.
|
||||
:return:
|
||||
A tuple of the list of valid :class:`Declaration` and
|
||||
a list of :class:`~.parsing.ParseError`.
|
||||
"""
|
||||
return self.parse_declaration_list(tokenize_grouped(css_source))
|
||||
|
||||
# API for subclasses:
|
||||
|
||||
def parse_rules(self, tokens, context):
|
||||
"""Parse a sequence of rules (rulesets and at-rules).
|
||||
|
||||
:param tokens:
|
||||
An iterable of tokens.
|
||||
:param context:
|
||||
Either ``'stylesheet'`` or an at-keyword such as ``'@media'``.
|
||||
(Most at-rules are only allowed in some contexts.)
|
||||
:return:
|
||||
A tuple of a list of parsed rules and a list of
|
||||
:class:`~.parsing.ParseError`.
|
||||
|
||||
"""
|
||||
rules = []
|
||||
errors = []
|
||||
tokens = iter(tokens)
|
||||
for token in tokens:
|
||||
if token.type not in ('S', 'CDO', 'CDC'):
|
||||
try:
|
||||
if token.type == 'ATKEYWORD':
|
||||
rule = self.read_at_rule(token, tokens)
|
||||
result = self.parse_at_rule(
|
||||
rule, rules, errors, context)
|
||||
rules.append(result)
|
||||
else:
|
||||
rule, rule_errors = self.parse_ruleset(token, tokens)
|
||||
rules.append(rule)
|
||||
errors.extend(rule_errors)
|
||||
except ParseError as exc:
|
||||
errors.append(exc)
|
||||
# Skip the entire rule
|
||||
return rules, errors
|
||||
|
||||
def read_at_rule(self, at_keyword_token, tokens):
|
||||
"""Read an at-rule from a token stream.
|
||||
|
||||
:param at_keyword_token:
|
||||
The ATKEYWORD token that starts this at-rule
|
||||
You may have read it already to distinguish the rule
|
||||
from a ruleset.
|
||||
:param tokens:
|
||||
An iterator of subsequent tokens. Will be consumed just enough
|
||||
for one at-rule.
|
||||
:return:
|
||||
An unparsed :class:`AtRule`.
|
||||
:raises:
|
||||
:class:`~.parsing.ParseError` if the head is invalid for the core
|
||||
grammar. The body is **not** validated. See :class:`AtRule`.
|
||||
|
||||
"""
|
||||
# CSS syntax is case-insensitive
|
||||
at_keyword = at_keyword_token.value.lower()
|
||||
head = []
|
||||
# For the ParseError in case `tokens` is empty:
|
||||
token = at_keyword_token
|
||||
for token in tokens:
|
||||
if token.type in '{;':
|
||||
break
|
||||
# Ignore white space just after the at-keyword.
|
||||
else:
|
||||
head.append(token)
|
||||
# On unexpected end of stylesheet, pretend that a ';' was there
|
||||
head = strip_whitespace(head)
|
||||
for head_token in head:
|
||||
validate_any(head_token, 'at-rule head')
|
||||
body = token.content if token.type == '{' else None
|
||||
return AtRule(at_keyword, head, body,
|
||||
at_keyword_token.line, at_keyword_token.column)
|
||||
|
||||
def parse_at_rule(self, rule, previous_rules, errors, context):
|
||||
"""Parse an at-rule.
|
||||
|
||||
Subclasses that override this method must use ``super()`` and
|
||||
pass its return value for at-rules they do not know.
|
||||
|
||||
In CSS 2.1, this method handles @charset, @import, @media and @page
|
||||
rules.
|
||||
|
||||
:param rule:
|
||||
An unparsed :class:`AtRule`.
|
||||
:param previous_rules:
|
||||
The list of at-rules and rulesets that have been parsed so far
|
||||
in this context. This list can be used to decide if the current
|
||||
rule is valid. (For example, @import rules are only allowed
|
||||
before anything but a @charset rule.)
|
||||
:param context:
|
||||
Either ``'stylesheet'`` or an at-keyword such as ``'@media'``.
|
||||
(Most at-rules are only allowed in some contexts.)
|
||||
:raises:
|
||||
:class:`~.parsing.ParseError` if the rule is invalid.
|
||||
:return:
|
||||
A parsed at-rule
|
||||
|
||||
"""
|
||||
try:
|
||||
parser = self.at_parsers[rule.at_keyword]
|
||||
except KeyError:
|
||||
raise ParseError(rule, 'unknown at-rule in {0} context: {1}'
|
||||
.format(context, rule.at_keyword))
|
||||
else:
|
||||
return parser(rule, previous_rules, errors, context)
|
||||
|
||||
def parse_page_rule(self, rule, previous_rules, errors, context):
|
||||
if context != 'stylesheet':
|
||||
raise ParseError(rule, '@page rule not allowed in ' + context)
|
||||
selector, specificity = self.parse_page_selector(rule.head)
|
||||
if rule.body is None:
|
||||
raise ParseError(rule,
|
||||
'invalid {0} rule: missing block'.format(rule.at_keyword))
|
||||
declarations, at_rules, rule_errors = \
|
||||
self.parse_declarations_and_at_rules(rule.body, '@page')
|
||||
errors.extend(rule_errors)
|
||||
return PageRule(selector, specificity, declarations, at_rules,
|
||||
rule.line, rule.column)
|
||||
|
||||
def parse_media_rule(self, rule, previous_rules, errors, context):
|
||||
if context != 'stylesheet':
|
||||
raise ParseError(rule, '@media rule not allowed in ' + context)
|
||||
media = self.parse_media(rule.head, errors)
|
||||
if rule.body is None:
|
||||
raise ParseError(rule,
|
||||
'invalid {0} rule: missing block'.format(rule.at_keyword))
|
||||
rules, rule_errors = self.parse_rules(rule.body, '@media')
|
||||
errors.extend(rule_errors)
|
||||
return MediaRule(media, rules, rule.line, rule.column)
|
||||
|
||||
def parse_import_rule(self, rule, previous_rules, errors, context):
|
||||
if context != 'stylesheet':
|
||||
raise ParseError(rule,
|
||||
'@import rule not allowed in ' + context)
|
||||
for previous_rule in previous_rules:
|
||||
if previous_rule.at_keyword not in ('@charset', '@import'):
|
||||
if previous_rule.at_keyword:
|
||||
type_ = 'an {0} rule'.format(previous_rule.at_keyword)
|
||||
else:
|
||||
type_ = 'a ruleset'
|
||||
raise ParseError(previous_rule,
|
||||
'@import rule not allowed after ' + type_)
|
||||
head = rule.head
|
||||
if not head:
|
||||
raise ParseError(rule,
|
||||
'expected URI or STRING for @import rule')
|
||||
if head[0].type not in ('URI', 'STRING'):
|
||||
raise ParseError(rule,
|
||||
'expected URI or STRING for @import rule, got '
|
||||
+ head[0].type)
|
||||
uri = head[0].value
|
||||
media = self.parse_media(strip_whitespace(head[1:]), errors)
|
||||
if rule.body is not None:
|
||||
# The position of the ';' token would be best, but we don’t
|
||||
# have it anymore here.
|
||||
raise ParseError(head[-1], "expected ';', got a block")
|
||||
return ImportRule(uri, media, rule.line, rule.column)
|
||||
|
||||
def parse_charset_rule(self, rule, previous_rules, errors, context):
|
||||
raise ParseError(rule, 'mis-placed or malformed @charset rule')
|
||||
|
||||
def parse_media(self, tokens, errors):
|
||||
"""For CSS 2.1, parse a list of media types.
|
||||
|
||||
Media Queries are expected to override this.
|
||||
|
||||
:param tokens:
|
||||
A list of tokens
|
||||
:raises:
|
||||
:class:`~.parsing.ParseError` on invalid media types/queries
|
||||
:returns:
|
||||
For CSS 2.1, a list of media types as strings
|
||||
"""
|
||||
if not tokens:
|
||||
return ['all']
|
||||
media_types = []
|
||||
for part in split_on_comma(remove_whitespace(tokens)):
|
||||
types = [token.type for token in part]
|
||||
if types == ['IDENT']:
|
||||
media_types.append(part[0].value)
|
||||
else:
|
||||
raise ParseError(tokens[0], 'expected a media type'
|
||||
+ ((', got ' + ', '.join(types)) if types else ''))
|
||||
return media_types
|
||||
|
||||
def parse_page_selector(self, tokens):
|
||||
"""Parse an @page selector.
|
||||
|
||||
:param tokens:
|
||||
An iterable of token, typically from the ``head`` attribute of
|
||||
an unparsed :class:`AtRule`.
|
||||
:returns:
|
||||
A page selector. For CSS 2.1, this is ``'first'``, ``'left'``,
|
||||
``'right'`` or ``None``.
|
||||
:raises:
|
||||
:class:`~.parsing.ParseError` on invalid selectors
|
||||
|
||||
"""
|
||||
if not tokens:
|
||||
return None, (0, 0)
|
||||
if (len(tokens) == 2 and tokens[0].type == ':'
|
||||
and tokens[1].type == 'IDENT'):
|
||||
pseudo_class = tokens[1].value
|
||||
specificity = {
|
||||
'first': (1, 0), 'left': (0, 1), 'right': (0, 1),
|
||||
}.get(pseudo_class)
|
||||
if specificity:
|
||||
return pseudo_class, specificity
|
||||
raise ParseError(tokens[0], 'invalid @page selector')
|
||||
|
||||
def parse_declarations_and_at_rules(self, tokens, context):
|
||||
"""Parse a mixed list of declarations and at rules, as found eg.
|
||||
in the body of an @page rule.
|
||||
|
||||
Note that to add supported at-rules inside @page,
|
||||
:class:`~.page3.CSSPage3Parser` extends :meth:`parse_at_rule`,
|
||||
not this method.
|
||||
|
||||
:param tokens:
|
||||
An iterable of token, typically from the ``body`` attribute of
|
||||
an unparsed :class:`AtRule`.
|
||||
:param context:
|
||||
An at-keyword such as ``'@page'``.
|
||||
(Most at-rules are only allowed in some contexts.)
|
||||
:returns:
|
||||
A tuple of:
|
||||
|
||||
* A list of :class:`Declaration`
|
||||
* A list of parsed at-rules (empty for CSS 2.1)
|
||||
* A list of :class:`~.parsing.ParseError`
|
||||
|
||||
"""
|
||||
at_rules = []
|
||||
declarations = []
|
||||
errors = []
|
||||
tokens = iter(tokens)
|
||||
for token in tokens:
|
||||
if token.type == 'ATKEYWORD':
|
||||
try:
|
||||
rule = self.read_at_rule(token, tokens)
|
||||
result = self.parse_at_rule(
|
||||
rule, at_rules, errors, context)
|
||||
at_rules.append(result)
|
||||
except ParseError as err:
|
||||
errors.append(err)
|
||||
elif token.type != 'S':
|
||||
declaration_tokens = []
|
||||
while token and token.type != ';':
|
||||
declaration_tokens.append(token)
|
||||
token = next(tokens, None)
|
||||
if declaration_tokens:
|
||||
try:
|
||||
declarations.append(
|
||||
self.parse_declaration(declaration_tokens))
|
||||
except ParseError as err:
|
||||
errors.append(err)
|
||||
return declarations, at_rules, errors
|
||||
|
||||
def parse_ruleset(self, first_token, tokens):
|
||||
"""Parse a ruleset: a selector followed by declaration block.
|
||||
|
||||
:param first_token:
|
||||
The first token of the ruleset (probably of the selector).
|
||||
You may have read it already to distinguish the rule
|
||||
from an at-rule.
|
||||
:param tokens:
|
||||
an iterator of subsequent tokens. Will be consumed just enough
|
||||
for one ruleset.
|
||||
:return:
|
||||
a tuple of a :class:`RuleSet` and an error list.
|
||||
The errors are recovered :class:`~.parsing.ParseError` in declarations.
|
||||
(Parsing continues from the next declaration on such errors.)
|
||||
:raises:
|
||||
:class:`~.parsing.ParseError` if the selector is invalid for the
|
||||
core grammar.
|
||||
Note a that a selector can be valid for the core grammar but
|
||||
not for CSS 2.1 or another level.
|
||||
|
||||
"""
|
||||
selector = []
|
||||
for token in chain([first_token], tokens):
|
||||
if token.type == '{':
|
||||
# Parse/validate once we’ve read the whole rule
|
||||
selector = strip_whitespace(selector)
|
||||
if not selector:
|
||||
raise ParseError(first_token, 'empty selector')
|
||||
for selector_token in selector:
|
||||
validate_any(selector_token, 'selector')
|
||||
declarations, errors = self.parse_declaration_list(
|
||||
token.content)
|
||||
ruleset = RuleSet(selector, declarations,
|
||||
first_token.line, first_token.column)
|
||||
return ruleset, errors
|
||||
else:
|
||||
selector.append(token)
|
||||
raise ParseError(token, 'no declaration block found for ruleset')
|
||||
|
||||
def parse_declaration_list(self, tokens):
|
||||
"""Parse a ``;`` separated declaration list.
|
||||
|
||||
You may want to use :meth:`parse_declarations_and_at_rules` (or
|
||||
some other method that uses :func:`parse_declaration` directly)
|
||||
instead if you have not just declarations in the same context.
|
||||
|
||||
:param tokens:
|
||||
an iterable of tokens. Should stop at (before) the end
|
||||
of the block, as marked by ``}``.
|
||||
:return:
|
||||
a tuple of the list of valid :class:`Declaration` and a list
|
||||
of :class:`~.parsing.ParseError`
|
||||
|
||||
"""
|
||||
# split at ';'
|
||||
parts = []
|
||||
this_part = []
|
||||
for token in tokens:
|
||||
if token.type == ';':
|
||||
parts.append(this_part)
|
||||
this_part = []
|
||||
else:
|
||||
this_part.append(token)
|
||||
parts.append(this_part)
|
||||
|
||||
declarations = []
|
||||
errors = []
|
||||
for tokens in parts:
|
||||
tokens = strip_whitespace(tokens)
|
||||
if tokens:
|
||||
try:
|
||||
declarations.append(self.parse_declaration(tokens))
|
||||
except ParseError as exc:
|
||||
errors.append(exc)
|
||||
# Skip the entire declaration
|
||||
return declarations, errors
|
||||
|
||||
def parse_declaration(self, tokens):
|
||||
"""Parse a single declaration.
|
||||
|
||||
:param tokens:
|
||||
an iterable of at least one token. Should stop at (before)
|
||||
the end of the declaration, as marked by a ``;`` or ``}``.
|
||||
Empty declarations (ie. consecutive ``;`` with only white space
|
||||
in-between) should be skipped earlier and not passed to
|
||||
this method.
|
||||
:returns:
|
||||
a :class:`Declaration`
|
||||
:raises:
|
||||
:class:`~.parsing.ParseError` if the tokens do not match the
|
||||
'declaration' production of the core grammar.
|
||||
|
||||
"""
|
||||
tokens = iter(tokens)
|
||||
|
||||
name_token = next(tokens) # assume there is at least one
|
||||
if name_token.type == 'IDENT':
|
||||
# CSS syntax is case-insensitive
|
||||
property_name = name_token.value.lower()
|
||||
else:
|
||||
raise ParseError(name_token,
|
||||
'expected a property name, got {0}'.format(name_token.type))
|
||||
|
||||
token = name_token # In case ``tokens`` is now empty
|
||||
for token in tokens:
|
||||
if token.type == ':':
|
||||
break
|
||||
elif token.type != 'S':
|
||||
raise ParseError(
|
||||
token, "expected ':', got {0}".format(token.type))
|
||||
else:
|
||||
raise ParseError(token, "expected ':'")
|
||||
|
||||
value = strip_whitespace(list(tokens))
|
||||
if not value:
|
||||
raise ParseError(token, 'expected a property value')
|
||||
validate_value(value)
|
||||
value, priority = self.parse_value_priority(value)
|
||||
return Declaration(
|
||||
property_name, value, priority, name_token.line, name_token.column)
|
||||
|
||||
def parse_value_priority(self, tokens):
|
||||
"""Separate any ``!important`` marker at the end of a property value.
|
||||
|
||||
:param tokens:
|
||||
A list of tokens for the property value.
|
||||
:returns:
|
||||
A tuple of the actual property value (a list of tokens)
|
||||
and the :attr:`~Declaration.priority`.
|
||||
"""
|
||||
value = list(tokens)
|
||||
# Walk the token list from the end
|
||||
token = value.pop()
|
||||
if token.type == 'IDENT' and token.value.lower() == 'important':
|
||||
while value:
|
||||
token = value.pop()
|
||||
if token.type == 'DELIM' and token.value == '!':
|
||||
# Skip any white space before the '!'
|
||||
while value and value[-1].type == 'S':
|
||||
value.pop()
|
||||
if not value:
|
||||
raise ParseError(
|
||||
token, 'expected a value before !important')
|
||||
return value, 'important'
|
||||
# Skip white space between '!' and 'important'
|
||||
elif token.type != 'S':
|
||||
break
|
||||
return tokens, None
|
||||
255
ebook_converter/tinycss/decoding.py
Normal file
255
ebook_converter/tinycss/decoding.py
Normal file
@@ -0,0 +1,255 @@
|
||||
# coding: utf8
|
||||
"""
|
||||
tinycss.decoding
|
||||
----------------
|
||||
|
||||
Decoding stylesheets from bytes to Unicode.
|
||||
http://www.w3.org/TR/CSS21/syndata.html#charset
|
||||
|
||||
:copyright: (c) 2012 by Simon Sapin.
|
||||
:license: BSD, see LICENSE for more details.
|
||||
"""
|
||||
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import operator
|
||||
import re
|
||||
|
||||
from polyglot.binary import from_hex_bytes
|
||||
|
||||
|
||||
__all__ = ['decode'] # Everything else is implementation detail
|
||||
|
||||
|
||||
def decode(css_bytes, protocol_encoding=None,
|
||||
linking_encoding=None, document_encoding=None):
|
||||
"""
|
||||
Determine the character encoding from the passed metadata and the
|
||||
``@charset`` rule in the stylesheet (if any); and decode accordingly.
|
||||
If no encoding information is available or decoding fails,
|
||||
decoding defaults to UTF-8 and then fall back on ISO-8859-1.
|
||||
|
||||
:param css_bytes:
|
||||
a CSS stylesheet as a byte string
|
||||
:param protocol_encoding:
|
||||
The "charset" parameter of a "Content-Type" HTTP header (if any),
|
||||
or similar metadata for other protocols.
|
||||
:param linking_encoding:
|
||||
``<link charset="">`` or other metadata from the linking mechanism
|
||||
(if any)
|
||||
:param document_encoding:
|
||||
Encoding of the referring style sheet or document (if any)
|
||||
:return:
|
||||
A tuple of an Unicode string, with any BOM removed, and the
|
||||
encoding that was used.
|
||||
|
||||
"""
|
||||
if protocol_encoding:
|
||||
css_unicode = try_encoding(css_bytes, protocol_encoding)
|
||||
if css_unicode is not None:
|
||||
return css_unicode, protocol_encoding
|
||||
for encoding, pattern in ENCODING_MAGIC_NUMBERS:
|
||||
match = pattern(css_bytes)
|
||||
if match:
|
||||
has_at_charset = isinstance(encoding, tuple)
|
||||
if has_at_charset:
|
||||
extract, endianness = encoding
|
||||
encoding = extract(match.group(1))
|
||||
# Get an ASCII-only unicode value.
|
||||
# This is the only thing that works on both Python 2 and 3
|
||||
# for bytes.decode()
|
||||
# Non-ASCII encoding names are invalid anyway,
|
||||
# but make sure they stay invalid.
|
||||
encoding = encoding.decode('ascii', 'replace')
|
||||
encoding = encoding.replace('\ufffd', '?')
|
||||
if encoding.replace('-', '').replace('_', '').lower() in [
|
||||
'utf16', 'utf32']:
|
||||
encoding += endianness
|
||||
encoding = encoding.encode('ascii', 'replace').decode('ascii')
|
||||
css_unicode = try_encoding(css_bytes, encoding)
|
||||
if css_unicode and not (has_at_charset and not
|
||||
css_unicode.startswith('@charset "')):
|
||||
return css_unicode, encoding
|
||||
break
|
||||
for encoding in [linking_encoding, document_encoding]:
|
||||
if encoding:
|
||||
css_unicode = try_encoding(css_bytes, encoding)
|
||||
if css_unicode is not None:
|
||||
return css_unicode, encoding
|
||||
css_unicode = try_encoding(css_bytes, 'UTF-8')
|
||||
if css_unicode is not None:
|
||||
return css_unicode, 'UTF-8'
|
||||
return try_encoding(css_bytes, 'ISO-8859-1', fallback=False), 'ISO-8859-1'
|
||||
|
||||
|
||||
def try_encoding(css_bytes, encoding, fallback=True):
|
||||
if fallback:
|
||||
try:
|
||||
css_unicode = css_bytes.decode(encoding)
|
||||
# LookupError means unknown encoding
|
||||
except (UnicodeDecodeError, LookupError):
|
||||
return None
|
||||
else:
|
||||
css_unicode = css_bytes.decode(encoding)
|
||||
if css_unicode and css_unicode[0] == '\ufeff':
|
||||
# Remove any Byte Order Mark
|
||||
css_unicode = css_unicode[1:]
|
||||
return css_unicode
|
||||
|
||||
|
||||
def hex2re(hex_data):
|
||||
return re.escape(from_hex_bytes(hex_data.replace(' ', '').encode('ascii')))
|
||||
|
||||
|
||||
class Slicer(object):
|
||||
"""Slice()[start:stop:end] == slice(start, stop, end)"""
|
||||
def __getitem__(self, slice_):
|
||||
return operator.itemgetter(slice_)
|
||||
|
||||
|
||||
Slice = Slicer()
|
||||
|
||||
|
||||
# List of (bom_size, encoding, pattern)
|
||||
# bom_size is in bytes and can be zero
|
||||
# encoding is a string or (slice_, endianness) for "as specified"
|
||||
# slice_ is a slice object.How to extract the specified
|
||||
|
||||
ENCODING_MAGIC_NUMBERS = [
|
||||
((Slice[:], ''), re.compile(
|
||||
hex2re('EF BB BF 40 63 68 61 72 73 65 74 20 22')
|
||||
+ b'([^\x22]*?)'
|
||||
+ hex2re('22 3B')).match),
|
||||
|
||||
('UTF-8', re.compile(
|
||||
hex2re('EF BB BF')).match),
|
||||
|
||||
((Slice[:], ''), re.compile(
|
||||
hex2re('40 63 68 61 72 73 65 74 20 22')
|
||||
+ b'([^\x22]*?)'
|
||||
+ hex2re('22 3B')).match),
|
||||
|
||||
((Slice[1::2], '-BE'), re.compile(
|
||||
hex2re('FE FF 00 40 00 63 00 68 00 61 00 72 00 73 00 65 00'
|
||||
'74 00 20 00 22')
|
||||
+ b'((\x00[^\x22])*?)'
|
||||
+ hex2re('00 22 00 3B')).match),
|
||||
|
||||
((Slice[1::2], '-BE'), re.compile(
|
||||
hex2re('00 40 00 63 00 68 00 61 00 72 00 73 00 65 00 74 00'
|
||||
'20 00 22')
|
||||
+ b'((\x00[^\x22])*?)'
|
||||
+ hex2re('00 22 00 3B')).match),
|
||||
|
||||
((Slice[::2], '-LE'), re.compile(
|
||||
hex2re('FF FE 40 00 63 00 68 00 61 00 72 00 73 00 65 00 74'
|
||||
'00 20 00 22 00')
|
||||
+ b'(([^\x22]\x00)*?)'
|
||||
+ hex2re('22 00 3B 00')).match),
|
||||
|
||||
((Slice[::2], '-LE'), re.compile(
|
||||
hex2re('40 00 63 00 68 00 61 00 72 00 73 00 65 00 74 00 20'
|
||||
'00 22 00')
|
||||
+ b'(([^\x22]\x00)*?)'
|
||||
+ hex2re('22 00 3B 00')).match),
|
||||
|
||||
((Slice[3::4], '-BE'), re.compile(
|
||||
hex2re('00 00 FE FF 00 00 00 40 00 00 00 63 00 00 00 68 00'
|
||||
'00 00 61 00 00 00 72 00 00 00 73 00 00 00 65 00 00'
|
||||
'00 74 00 00 00 20 00 00 00 22')
|
||||
+ b'((\x00\x00\x00[^\x22])*?)'
|
||||
+ hex2re('00 00 00 22 00 00 00 3B')).match),
|
||||
|
||||
((Slice[3::4], '-BE'), re.compile(
|
||||
hex2re('00 00 00 40 00 00 00 63 00 00 00 68 00 00 00 61 00'
|
||||
'00 00 72 00 00 00 73 00 00 00 65 00 00 00 74 00 00'
|
||||
'00 20 00 00 00 22')
|
||||
+ b'((\x00\x00\x00[^\x22])*?)'
|
||||
+ hex2re('00 00 00 22 00 00 00 3B')).match),
|
||||
|
||||
|
||||
# Python does not support 2143 or 3412 endianness, AFAIK.
|
||||
# I guess we could fix it up ourselves but meh. Patches welcome.
|
||||
|
||||
# ((Slice[2::4], '-2143'), re.compile(
|
||||
# hex2re('00 00 FF FE 00 00 40 00 00 00 63 00 00 00 68 00 00'
|
||||
# '00 61 00 00 00 72 00 00 00 73 00 00 00 65 00 00 00'
|
||||
# '74 00 00 00 20 00 00 00 22 00')
|
||||
# + b'((\x00\x00[^\x22]\x00)*?)'
|
||||
# + hex2re('00 00 22 00 00 00 3B 00')).match),
|
||||
|
||||
# ((Slice[2::4], '-2143'), re.compile(
|
||||
# hex2re('00 00 40 00 00 00 63 00 00 00 68 00 00 00 61 00 00'
|
||||
# '00 72 00 00 00 73 00 00 00 65 00 00 00 74 00 00 00'
|
||||
# '20 00 00 00 22 00')
|
||||
# + b'((\x00\x00[^\x22]\x00)*?)'
|
||||
# + hex2re('00 00 22 00 00 00 3B 00')).match),
|
||||
|
||||
# ((Slice[1::4], '-3412'), re.compile(
|
||||
# hex2re('FE FF 00 00 00 40 00 00 00 63 00 00 00 68 00 00 00'
|
||||
# '61 00 00 00 72 00 00 00 73 00 00 00 65 00 00 00 74'
|
||||
# '00 00 00 20 00 00 00 22 00 00')
|
||||
# + b'((\x00[^\x22]\x00\x00)*?)'
|
||||
# + hex2re('00 22 00 00 00 3B 00 00')).match),
|
||||
|
||||
# ((Slice[1::4], '-3412'), re.compile(
|
||||
# hex2re('00 40 00 00 00 63 00 00 00 68 00 00 00 61 00 00 00'
|
||||
# '72 00 00 00 73 00 00 00 65 00 00 00 74 00 00 00 20'
|
||||
# '00 00 00 22 00 00')
|
||||
# + b'((\x00[^\x22]\x00\x00)*?)'
|
||||
# + hex2re('00 22 00 00 00 3B 00 00')).match),
|
||||
|
||||
((Slice[::4], '-LE'), re.compile(
|
||||
hex2re('FF FE 00 00 40 00 00 00 63 00 00 00 68 00 00 00 61'
|
||||
'00 00 00 72 00 00 00 73 00 00 00 65 00 00 00 74 00'
|
||||
'00 00 20 00 00 00 22 00 00 00')
|
||||
+ b'(([^\x22]\x00\x00\x00)*?)'
|
||||
+ hex2re('22 00 00 00 3B 00 00 00')).match),
|
||||
|
||||
((Slice[::4], '-LE'), re.compile(
|
||||
hex2re('40 00 00 00 63 00 00 00 68 00 00 00 61 00 00 00 72'
|
||||
'00 00 00 73 00 00 00 65 00 00 00 74 00 00 00 20 00'
|
||||
'00 00 22 00 00 00')
|
||||
+ b'(([^\x22]\x00\x00\x00)*?)'
|
||||
+ hex2re('22 00 00 00 3B 00 00 00')).match),
|
||||
|
||||
('UTF-32-BE', re.compile(
|
||||
hex2re('00 00 FE FF')).match),
|
||||
|
||||
('UTF-32-LE', re.compile(
|
||||
hex2re('FF FE 00 00')).match),
|
||||
|
||||
# ('UTF-32-2143', re.compile(
|
||||
# hex2re('00 00 FF FE')).match),
|
||||
|
||||
# ('UTF-32-3412', re.compile(
|
||||
# hex2re('FE FF 00 00')).match),
|
||||
|
||||
('UTF-16-BE', re.compile(
|
||||
hex2re('FE FF')).match),
|
||||
|
||||
('UTF-16-LE', re.compile(
|
||||
hex2re('FF FE')).match),
|
||||
|
||||
|
||||
# Some of there are supported by Python, but I didn’t bother.
|
||||
# You know the story with patches ...
|
||||
|
||||
# # as specified, transcoded from EBCDIC to ASCII
|
||||
# ('as_specified-EBCDIC', re.compile(
|
||||
# hex2re('7C 83 88 81 99 A2 85 A3 40 7F')
|
||||
# + b'([^\x7F]*?)'
|
||||
# + hex2re('7F 5E')).match),
|
||||
|
||||
# # as specified, transcoded from IBM1026 to ASCII
|
||||
# ('as_specified-IBM1026', re.compile(
|
||||
# hex2re('AE 83 88 81 99 A2 85 A3 40 FC')
|
||||
# + b'([^\xFC]*?)'
|
||||
# + hex2re('FC 5E')).match),
|
||||
|
||||
# # as specified, transcoded from GSM 03.38 to ASCII
|
||||
# ('as_specified-GSM_03.38', re.compile(
|
||||
# hex2re('00 63 68 61 72 73 65 74 20 22')
|
||||
# + b'([^\x22]*?)'
|
||||
# + hex2re('22 3B')).match),
|
||||
]
|
||||
225
ebook_converter/tinycss/fonts3.py
Normal file
225
ebook_converter/tinycss/fonts3.py
Normal file
@@ -0,0 +1,225 @@
|
||||
#!/usr/bin/env python2
|
||||
# vim:fileencoding=utf-8
|
||||
from __future__ import absolute_import, division, print_function, unicode_literals
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2014, Kovid Goyal <kovid at kovidgoyal.net>'
|
||||
|
||||
|
||||
import re
|
||||
from polyglot.builtins import map
|
||||
from tinycss.css21 import CSS21Parser, ParseError
|
||||
from .tokenizer import tokenize_grouped
|
||||
|
||||
|
||||
def parse_font_family_tokens(tokens):
|
||||
families = []
|
||||
current_family = ''
|
||||
|
||||
def commit():
|
||||
val = current_family.strip()
|
||||
if val:
|
||||
families.append(val)
|
||||
|
||||
for token in tokens:
|
||||
if token.type == 'STRING':
|
||||
if current_family:
|
||||
commit()
|
||||
current_family = token.value
|
||||
elif token.type == 'DELIM':
|
||||
if token.value == ',':
|
||||
if current_family:
|
||||
commit()
|
||||
current_family = ''
|
||||
elif token.type == 'IDENT':
|
||||
current_family += ' ' + token.value
|
||||
if current_family:
|
||||
commit()
|
||||
return families
|
||||
|
||||
|
||||
def parse_font_family(css_string):
|
||||
return parse_font_family_tokens(tokenize_grouped(type('')(css_string).strip()))
|
||||
|
||||
|
||||
def serialize_single_font_family(x):
|
||||
xl = x.lower()
|
||||
if xl in GENERIC_FAMILIES:
|
||||
if xl == 'sansserif':
|
||||
xl = 'sans-serif'
|
||||
return xl
|
||||
if SIMPLE_NAME_PAT.match(x) is not None and not x.lower().startswith('and'):
|
||||
# css_parser dies if a font name starts with and
|
||||
return x
|
||||
return '"%s"' % x.replace('"', r'\"')
|
||||
|
||||
|
||||
def serialize_font_family(families):
|
||||
return ', '.join(map(serialize_single_font_family, families))
|
||||
|
||||
|
||||
GLOBAL_IDENTS = frozenset('inherit initial unset normal'.split())
|
||||
STYLE_IDENTS = frozenset('italic oblique'.split())
|
||||
VARIANT_IDENTS = frozenset(('small-caps',))
|
||||
WEIGHT_IDENTS = frozenset('bold bolder lighter'.split())
|
||||
STRETCH_IDENTS = frozenset('ultra-condensed extra-condensed condensed semi-condensed semi-expanded expanded extra-expanded ultra-expanded'.split())
|
||||
BEFORE_SIZE_IDENTS = STYLE_IDENTS | VARIANT_IDENTS | WEIGHT_IDENTS | STRETCH_IDENTS
|
||||
SIZE_IDENTS = frozenset('xx-small x-small small medium large x-large xx-large larger smaller'.split())
|
||||
WEIGHT_SIZES = frozenset(map(int, '100 200 300 400 500 600 700 800 900'.split()))
|
||||
LEGACY_FONT_SPEC = frozenset('caption icon menu message-box small-caption status-bar'.split())
|
||||
GENERIC_FAMILIES = frozenset('serif sans-serif sansserif cursive fantasy monospace'.split())
|
||||
SIMPLE_NAME_PAT = re.compile(r'[a-zA-Z][a-zA-Z0-9_-]*$')
|
||||
|
||||
|
||||
def serialize_font(font_dict):
|
||||
ans = []
|
||||
for x in 'style variant weight stretch'.split():
|
||||
val = font_dict.get('font-' + x)
|
||||
if val is not None:
|
||||
ans.append(val)
|
||||
val = font_dict.get('font-size')
|
||||
if val is not None:
|
||||
fs = val
|
||||
val = font_dict.get('line-height')
|
||||
if val is not None:
|
||||
fs += '/' + val
|
||||
ans.append(fs)
|
||||
val = font_dict.get('font-family')
|
||||
if val:
|
||||
ans.append(serialize_font_family(val))
|
||||
return ' '.join(ans)
|
||||
|
||||
|
||||
def parse_font(css_string):
|
||||
# See https://www.w3.org/TR/css-fonts-3/#font-prop
|
||||
style = variant = weight = stretch = size = height = None
|
||||
tokens = list(reversed(tuple(tokenize_grouped(type('')(css_string).strip()))))
|
||||
if tokens and tokens[-1].value in LEGACY_FONT_SPEC:
|
||||
return {'font-family':['sans-serif']}
|
||||
while tokens:
|
||||
tok = tokens.pop()
|
||||
if tok.type == 'STRING':
|
||||
tokens.append(tok)
|
||||
break
|
||||
if tok.type == 'INTEGER':
|
||||
if size is None:
|
||||
if weight is None and tok.value in WEIGHT_SIZES:
|
||||
weight = tok.as_css()
|
||||
continue
|
||||
break
|
||||
if height is None:
|
||||
height = tok.as_css()
|
||||
break
|
||||
break
|
||||
if tok.type == 'NUMBER':
|
||||
if size is not None and height is None:
|
||||
height = tok.as_css()
|
||||
break
|
||||
if tok.type == 'DELIM':
|
||||
if tok.value == '/' and size is not None and height is None:
|
||||
continue
|
||||
break
|
||||
if tok.type in ('DIMENSION', 'PERCENTAGE'):
|
||||
if size is None:
|
||||
size = tok.as_css()
|
||||
continue
|
||||
if height is None:
|
||||
height = tok.as_css()
|
||||
break
|
||||
if tok.type == 'IDENT':
|
||||
if tok.value in GLOBAL_IDENTS:
|
||||
if size is not None:
|
||||
if height is None:
|
||||
height = tok.value
|
||||
else:
|
||||
tokens.append(tok)
|
||||
break
|
||||
if style is None:
|
||||
style = tok.value
|
||||
elif variant is None:
|
||||
variant = tok.value
|
||||
elif weight is None:
|
||||
weight = tok.value
|
||||
elif stretch is None:
|
||||
stretch = tok.value
|
||||
elif size is None:
|
||||
size = tok.value
|
||||
elif height is None:
|
||||
height = tok.value
|
||||
break
|
||||
else:
|
||||
tokens.append(tok)
|
||||
break
|
||||
continue
|
||||
if tok.value in BEFORE_SIZE_IDENTS:
|
||||
if size is not None:
|
||||
break
|
||||
if tok.value in STYLE_IDENTS:
|
||||
style = tok.value
|
||||
elif tok.value in VARIANT_IDENTS:
|
||||
variant = tok.value
|
||||
elif tok.value in WEIGHT_IDENTS:
|
||||
weight = tok.value
|
||||
elif tok.value in STRETCH_IDENTS:
|
||||
stretch = tok.value
|
||||
elif tok.value in SIZE_IDENTS:
|
||||
size = tok.value
|
||||
else:
|
||||
tokens.append(tok)
|
||||
break
|
||||
families = parse_font_family_tokens(reversed(tokens))
|
||||
ans = {}
|
||||
if style is not None:
|
||||
ans['font-style'] = style
|
||||
if variant is not None:
|
||||
ans['font-variant'] = variant
|
||||
if weight is not None:
|
||||
ans['font-weight'] = weight
|
||||
if stretch is not None:
|
||||
ans['font-stretch'] = stretch
|
||||
if size is not None:
|
||||
ans['font-size'] = size
|
||||
if height is not None:
|
||||
ans['line-height'] = height
|
||||
if families:
|
||||
ans['font-family'] = families
|
||||
return ans
|
||||
|
||||
|
||||
class FontFaceRule(object):
|
||||
|
||||
at_keyword = '@font-face'
|
||||
__slots__ = 'declarations', 'line', 'column'
|
||||
|
||||
def __init__(self, declarations, line, column):
|
||||
self.declarations = declarations
|
||||
self.line = line
|
||||
self.column = column
|
||||
|
||||
def __repr__(self):
|
||||
return ('<{0.__class__.__name__} at {0.line}:{0.column}>'
|
||||
.format(self))
|
||||
|
||||
|
||||
class CSSFonts3Parser(CSS21Parser):
|
||||
|
||||
''' Parse @font-face rules from the CSS 3 fonts module '''
|
||||
|
||||
ALLOWED_CONTEXTS_FOR_FONT_FACE = {'stylesheet', '@media', '@page'}
|
||||
|
||||
def __init__(self):
|
||||
super(CSSFonts3Parser, self).__init__()
|
||||
self.at_parsers['@font-face'] = self.parse_font_face_rule
|
||||
|
||||
def parse_font_face_rule(self, rule, previous_rules, errors, context):
|
||||
if context not in self.ALLOWED_CONTEXTS_FOR_FONT_FACE:
|
||||
raise ParseError(rule,
|
||||
'@font-face rule not allowed in ' + context)
|
||||
if rule.body is None:
|
||||
raise ParseError(rule,
|
||||
'invalid {0} rule: missing block'.format(rule.at_keyword))
|
||||
if rule.head:
|
||||
raise ParseError(rule, '{0} rule is not allowed to have content before the descriptor declaration'.format(rule.at_keyword))
|
||||
declarations, decerrors = self.parse_declaration_list(rule.body)
|
||||
errors.extend(decerrors)
|
||||
return FontFaceRule(declarations, rule.line, rule.column)
|
||||
106
ebook_converter/tinycss/media3.py
Normal file
106
ebook_converter/tinycss/media3.py
Normal file
@@ -0,0 +1,106 @@
|
||||
#!/usr/bin/env python2
|
||||
# vim:fileencoding=utf-8
|
||||
from __future__ import absolute_import, division, print_function, unicode_literals
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2014, Kovid Goyal <kovid at kovidgoyal.net>'
|
||||
|
||||
from tinycss.css21 import CSS21Parser
|
||||
from tinycss.parsing import remove_whitespace, split_on_comma, ParseError
|
||||
from polyglot.builtins import error_message
|
||||
|
||||
|
||||
class MediaQuery(object):
|
||||
|
||||
__slots__ = 'media_type', 'expressions', 'negated'
|
||||
|
||||
def __init__(self, media_type='all', expressions=(), negated=False):
|
||||
self.media_type = media_type
|
||||
self.expressions = expressions
|
||||
self.negated = negated
|
||||
|
||||
def __repr__(self):
|
||||
return '<MediaQuery type=%s negated=%s expressions=%s>' % (
|
||||
self.media_type, self.negated, self.expressions)
|
||||
|
||||
def __eq__(self, other):
|
||||
return self.media_type == getattr(other, 'media_type', None) and \
|
||||
self.negated == getattr(other, 'negated', None) and \
|
||||
self.expressions == getattr(other, 'expressions', None)
|
||||
|
||||
|
||||
class MalformedExpression(Exception):
|
||||
|
||||
def __init__(self, tok, msg):
|
||||
Exception.__init__(self, msg)
|
||||
self.tok = tok
|
||||
|
||||
|
||||
class CSSMedia3Parser(CSS21Parser):
|
||||
|
||||
''' Parse media queries as defined by the CSS 3 media module '''
|
||||
|
||||
def parse_media(self, tokens, errors):
|
||||
if not tokens:
|
||||
return [MediaQuery('all')]
|
||||
queries = []
|
||||
|
||||
for part in split_on_comma(remove_whitespace(tokens)):
|
||||
negated = False
|
||||
media_type = None
|
||||
expressions = []
|
||||
try:
|
||||
for i, tok in enumerate(part):
|
||||
if i == 0 and tok.type == 'IDENT':
|
||||
val = tok.value.lower()
|
||||
if val == 'only':
|
||||
continue # ignore leading ONLY
|
||||
if val == 'not':
|
||||
negated = True
|
||||
continue
|
||||
if media_type is None and tok.type == 'IDENT':
|
||||
media_type = tok.value
|
||||
continue
|
||||
elif media_type is None:
|
||||
media_type = 'all'
|
||||
|
||||
if tok.type == 'IDENT' and tok.value.lower() == 'and':
|
||||
continue
|
||||
if not tok.is_container:
|
||||
raise MalformedExpression(tok, 'expected a media expression not a %s' % tok.type)
|
||||
if tok.type != '(':
|
||||
raise MalformedExpression(tok, 'media expressions must be in parentheses not %s' % tok.type)
|
||||
content = remove_whitespace(tok.content)
|
||||
if len(content) == 0:
|
||||
raise MalformedExpression(tok, 'media expressions cannot be empty')
|
||||
if content[0].type != 'IDENT':
|
||||
raise MalformedExpression(content[0], 'expected a media feature not a %s' % tok.type)
|
||||
media_feature, expr = content[0].value, None
|
||||
if len(content) > 1:
|
||||
if len(content) < 3:
|
||||
raise MalformedExpression(content[1], 'malformed media feature definition')
|
||||
if content[1].type != ':':
|
||||
raise MalformedExpression(content[1], 'expected a :')
|
||||
expr = content[2:]
|
||||
if len(expr) == 1:
|
||||
expr = expr[0]
|
||||
elif len(expr) == 3 and (expr[0].type, expr[1].type, expr[1].value, expr[2].type) == (
|
||||
'INTEGER', 'DELIM', '/', 'INTEGER'):
|
||||
# This should really be moved into token_data, but
|
||||
# since RATIO is not part of CSS 2.1 and does not
|
||||
# occur anywhere else, we special case it here.
|
||||
r = expr[0]
|
||||
r.value = (expr[0].value, expr[2].value)
|
||||
r.type = 'RATIO'
|
||||
r._as_css = expr[0]._as_css + expr[1]._as_css + expr[2]._as_css
|
||||
expr = r
|
||||
else:
|
||||
raise MalformedExpression(expr[0], 'malformed media feature definition')
|
||||
|
||||
expressions.append((media_feature, expr))
|
||||
except MalformedExpression as err:
|
||||
errors.append(ParseError(err.tok, error_message(err)))
|
||||
media_type, negated, expressions = 'all', True, ()
|
||||
queries.append(MediaQuery(media_type or 'all', expressions=tuple(expressions), negated=negated))
|
||||
|
||||
return queries
|
||||
163
ebook_converter/tinycss/page3.py
Normal file
163
ebook_converter/tinycss/page3.py
Normal file
@@ -0,0 +1,163 @@
|
||||
# coding: utf8
|
||||
"""
|
||||
tinycss.page3
|
||||
------------------
|
||||
|
||||
Support for CSS 3 Paged Media syntax:
|
||||
http://dev.w3.org/csswg/css3-page/
|
||||
|
||||
Adds support for named page selectors and margin rules.
|
||||
|
||||
:copyright: (c) 2012 by Simon Sapin.
|
||||
:license: BSD, see LICENSE for more details.
|
||||
"""
|
||||
|
||||
from __future__ import unicode_literals, division
|
||||
from .css21 import CSS21Parser, ParseError
|
||||
|
||||
|
||||
class MarginRule(object):
|
||||
"""A parsed at-rule for margin box.
|
||||
|
||||
.. attribute:: at_keyword
|
||||
|
||||
One of the 16 following strings:
|
||||
|
||||
* ``@top-left-corner``
|
||||
* ``@top-left``
|
||||
* ``@top-center``
|
||||
* ``@top-right``
|
||||
* ``@top-right-corner``
|
||||
* ``@bottom-left-corner``
|
||||
* ``@bottom-left``
|
||||
* ``@bottom-center``
|
||||
* ``@bottom-right``
|
||||
* ``@bottom-right-corner``
|
||||
* ``@left-top``
|
||||
* ``@left-middle``
|
||||
* ``@left-bottom``
|
||||
* ``@right-top``
|
||||
* ``@right-middle``
|
||||
* ``@right-bottom``
|
||||
|
||||
.. attribute:: declarations
|
||||
|
||||
A list of :class:`~.css21.Declaration` objects.
|
||||
|
||||
.. attribute:: line
|
||||
|
||||
Source line where this was read.
|
||||
|
||||
.. attribute:: column
|
||||
|
||||
Source column where this was read.
|
||||
|
||||
"""
|
||||
|
||||
__slots__ = 'at_keyword', 'declarations', 'line', 'column'
|
||||
|
||||
def __init__(self, at_keyword, declarations, line, column):
|
||||
self.at_keyword = at_keyword
|
||||
self.declarations = declarations
|
||||
self.line = line
|
||||
self.column = column
|
||||
|
||||
|
||||
class CSSPage3Parser(CSS21Parser):
|
||||
"""Extend :class:`~.css21.CSS21Parser` for `CSS 3 Paged Media`_ syntax.
|
||||
|
||||
.. _CSS 3 Paged Media: http://dev.w3.org/csswg/css3-page/
|
||||
|
||||
Compared to CSS 2.1, the ``at_rules`` and ``selector`` attributes of
|
||||
:class:`~.css21.PageRule` objects are modified:
|
||||
|
||||
* ``at_rules`` is not always empty, it is a list of :class:`MarginRule`
|
||||
objects.
|
||||
|
||||
* ``selector``, instead of a single string, is a tuple of the page name
|
||||
and the pseudo class. Each of these may be a ``None`` or a string.
|
||||
|
||||
+--------------------------+------------------------+
|
||||
| CSS | Parsed selectors |
|
||||
+==========================+========================+
|
||||
| .. code-block:: css | .. code-block:: python |
|
||||
| | |
|
||||
| @page {} | (None, None) |
|
||||
| @page :first {} | (None, 'first') |
|
||||
| @page chapter {} | ('chapter', None) |
|
||||
| @page table:right {} | ('table', 'right') |
|
||||
+--------------------------+------------------------+
|
||||
|
||||
"""
|
||||
|
||||
PAGE_MARGIN_AT_KEYWORDS = (
|
||||
'@top-left-corner',
|
||||
'@top-left',
|
||||
'@top-center',
|
||||
'@top-right',
|
||||
'@top-right-corner',
|
||||
'@bottom-left-corner',
|
||||
'@bottom-left',
|
||||
'@bottom-center',
|
||||
'@bottom-right',
|
||||
'@bottom-right-corner',
|
||||
'@left-top',
|
||||
'@left-middle',
|
||||
'@left-bottom',
|
||||
'@right-top',
|
||||
'@right-middle',
|
||||
'@right-bottom',
|
||||
)
|
||||
|
||||
def __init__(self):
|
||||
super(CSSPage3Parser, self).__init__()
|
||||
for x in self.PAGE_MARGIN_AT_KEYWORDS:
|
||||
self.at_parsers[x] = self.parse_page_margin_rule
|
||||
|
||||
def parse_page_margin_rule(self, rule, previous_rules, errors, context):
|
||||
if context != '@page':
|
||||
raise ParseError(rule,
|
||||
'%s rule not allowed in %s' % (rule.at_keyword, context))
|
||||
if rule.head:
|
||||
raise ParseError(rule.head[0],
|
||||
'unexpected %s token in %s rule header'
|
||||
% (rule.head[0].type, rule.at_keyword))
|
||||
declarations, body_errors = self.parse_declaration_list(rule.body)
|
||||
errors.extend(body_errors)
|
||||
return MarginRule(rule.at_keyword, declarations,
|
||||
rule.line, rule.column)
|
||||
|
||||
def parse_page_selector(self, head):
|
||||
"""Parse an @page selector.
|
||||
|
||||
:param head:
|
||||
The ``head`` attribute of an unparsed :class:`AtRule`.
|
||||
:returns:
|
||||
A page selector. For CSS 2.1, this is 'first', 'left', 'right'
|
||||
or None. 'blank' is added by GCPM.
|
||||
:raises:
|
||||
:class`~parsing.ParseError` on invalid selectors
|
||||
|
||||
"""
|
||||
if not head:
|
||||
return (None, None), (0, 0, 0)
|
||||
if head[0].type == 'IDENT':
|
||||
name = head.pop(0).value
|
||||
while head and head[0].type == 'S':
|
||||
head.pop(0)
|
||||
if not head:
|
||||
return (name, None), (1, 0, 0)
|
||||
name_specificity = (1,)
|
||||
else:
|
||||
name = None
|
||||
name_specificity = (0,)
|
||||
if (len(head) == 2 and head[0].type == ':'
|
||||
and head[1].type == 'IDENT'):
|
||||
pseudo_class = head[1].value
|
||||
specificity = {
|
||||
'first': (1, 0), 'blank': (1, 0),
|
||||
'left': (0, 1), 'right': (0, 1),
|
||||
}.get(pseudo_class)
|
||||
if specificity:
|
||||
return (name, pseudo_class), (name_specificity + specificity)
|
||||
raise ParseError(head[0], 'invalid @page selector')
|
||||
165
ebook_converter/tinycss/parsing.py
Normal file
165
ebook_converter/tinycss/parsing.py
Normal file
@@ -0,0 +1,165 @@
|
||||
# coding: utf8
|
||||
"""
|
||||
tinycss.parsing
|
||||
---------------
|
||||
|
||||
Utilities for parsing lists of tokens.
|
||||
|
||||
:copyright: (c) 2012 by Simon Sapin.
|
||||
:license: BSD, see LICENSE for more details.
|
||||
"""
|
||||
|
||||
from __future__ import unicode_literals
|
||||
|
||||
|
||||
# TODO: unit tests
|
||||
|
||||
def split_on_comma(tokens):
|
||||
"""Split a list of tokens on commas, ie ``,`` DELIM tokens.
|
||||
|
||||
Only "top-level" comma tokens are splitting points, not commas inside a
|
||||
function or other :class:`ContainerToken`.
|
||||
|
||||
:param tokens:
|
||||
An iterable of :class:`~.token_data.Token` or
|
||||
:class:`~.token_data.ContainerToken`.
|
||||
:returns:
|
||||
A list of lists of tokens
|
||||
|
||||
"""
|
||||
parts = []
|
||||
this_part = []
|
||||
for token in tokens:
|
||||
if token.type == 'DELIM' and token.value == ',':
|
||||
parts.append(this_part)
|
||||
this_part = []
|
||||
else:
|
||||
this_part.append(token)
|
||||
parts.append(this_part)
|
||||
return parts
|
||||
|
||||
|
||||
def strip_whitespace(tokens):
|
||||
"""Remove whitespace at the beggining and end of a token list.
|
||||
|
||||
Whitespace tokens in-between other tokens in the list are preserved.
|
||||
|
||||
:param tokens:
|
||||
A list of :class:`~.token_data.Token` or
|
||||
:class:`~.token_data.ContainerToken`.
|
||||
:return:
|
||||
A new sub-sequence of the list.
|
||||
|
||||
"""
|
||||
for i, token in enumerate(tokens):
|
||||
if token.type != 'S':
|
||||
break
|
||||
else:
|
||||
return [] # only whitespace
|
||||
tokens = tokens[i:]
|
||||
while tokens and tokens[-1].type == 'S':
|
||||
tokens.pop()
|
||||
return tokens
|
||||
|
||||
|
||||
def remove_whitespace(tokens):
|
||||
"""Remove any top-level whitespace in a token list.
|
||||
|
||||
Whitespace tokens inside recursive :class:`~.token_data.ContainerToken`
|
||||
are preserved.
|
||||
|
||||
:param tokens:
|
||||
A list of :class:`~.token_data.Token` or
|
||||
:class:`~.token_data.ContainerToken`.
|
||||
:return:
|
||||
A new sub-sequence of the list.
|
||||
|
||||
"""
|
||||
return [token for token in tokens if token.type != 'S']
|
||||
|
||||
|
||||
def validate_value(tokens):
|
||||
"""Validate a property value.
|
||||
|
||||
:param tokens:
|
||||
an iterable of tokens
|
||||
:raises:
|
||||
:class:`ParseError` if there is any invalid token for the 'value'
|
||||
production of the core grammar.
|
||||
|
||||
"""
|
||||
for token in tokens:
|
||||
type_ = token.type
|
||||
if type_ == '{':
|
||||
validate_block(token.content, 'property value')
|
||||
else:
|
||||
validate_any(token, 'property value')
|
||||
|
||||
def validate_block(tokens, context):
|
||||
"""
|
||||
:raises:
|
||||
:class:`ParseError` if there is any invalid token for the 'block'
|
||||
production of the core grammar.
|
||||
:param tokens: an iterable of tokens
|
||||
:param context: a string for the 'unexpected in ...' message
|
||||
|
||||
"""
|
||||
for token in tokens:
|
||||
type_ = token.type
|
||||
if type_ == '{':
|
||||
validate_block(token.content, context)
|
||||
elif type_ not in (';', 'ATKEYWORD'):
|
||||
validate_any(token, context)
|
||||
|
||||
|
||||
def validate_any(token, context):
|
||||
"""
|
||||
:raises:
|
||||
:class:`ParseError` if this is an invalid token for the
|
||||
'any' production of the core grammar.
|
||||
:param token: a single token
|
||||
:param context: a string for the 'unexpected in ...' message
|
||||
|
||||
"""
|
||||
type_ = token.type
|
||||
if type_ in ('FUNCTION', '(', '['):
|
||||
for token in token.content:
|
||||
validate_any(token, type_)
|
||||
elif type_ not in ('S', 'IDENT', 'DIMENSION', 'PERCENTAGE', 'NUMBER',
|
||||
'INTEGER', 'URI', 'DELIM', 'STRING', 'HASH', ':',
|
||||
'UNICODE-RANGE'):
|
||||
if type_ in ('}', ')', ']'):
|
||||
adjective = 'unmatched'
|
||||
else:
|
||||
adjective = 'unexpected'
|
||||
raise ParseError(token,
|
||||
'{0} {1} token in {2}'.format(adjective, type_, context))
|
||||
|
||||
|
||||
class ParseError(ValueError):
|
||||
"""Details about a CSS syntax error. Usually indicates that something
|
||||
(a rule or a declaration) was ignored and will not appear as a parsed
|
||||
object.
|
||||
|
||||
This exception is typically logged in a list rather than being propagated
|
||||
to the user API.
|
||||
|
||||
.. attribute:: line
|
||||
|
||||
Source line where the error occured.
|
||||
|
||||
.. attribute:: column
|
||||
|
||||
Column in the source line where the error occured.
|
||||
|
||||
.. attribute:: reason
|
||||
|
||||
What happend (a string).
|
||||
|
||||
"""
|
||||
def __init__(self, subject, reason):
|
||||
self.line = subject.line
|
||||
self.column = subject.column
|
||||
self.reason = reason
|
||||
super(ParseError, self).__init__(
|
||||
'Parse error at {0.line}:{0.column}, {0.reason}'.format(self))
|
||||
37
ebook_converter/tinycss/tests/__init__.py
Normal file
37
ebook_converter/tinycss/tests/__init__.py
Normal file
@@ -0,0 +1,37 @@
|
||||
#!/usr/bin/env python2
|
||||
# vim:fileencoding=utf-8
|
||||
from __future__ import absolute_import, division, print_function, unicode_literals
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2014, Kovid Goyal <kovid at kovidgoyal.net>'
|
||||
|
||||
import unittest
|
||||
|
||||
|
||||
def jsonify(tokens):
|
||||
"""Turn tokens into "JSON-compatible" data structures."""
|
||||
for token in tokens:
|
||||
if token.type == 'FUNCTION':
|
||||
yield (token.type, token.function_name,
|
||||
list(jsonify(token.content)))
|
||||
elif token.is_container:
|
||||
yield token.type, list(jsonify(token.content))
|
||||
else:
|
||||
yield token.type, token.value
|
||||
|
||||
|
||||
class BaseTest(unittest.TestCase):
|
||||
|
||||
longMessage = True
|
||||
maxDiff = None
|
||||
ae = unittest.TestCase.assertEqual
|
||||
|
||||
def assert_errors(self, errors, expected_errors):
|
||||
"""Test not complete error messages but only substrings."""
|
||||
self.ae(len(errors), len(expected_errors))
|
||||
for error, expected in zip(errors, expected_errors):
|
||||
self.assertIn(expected, type(u'')(error))
|
||||
|
||||
def jsonify_declarations(self, rule):
|
||||
return [(decl.name, list(jsonify(decl.value)))
|
||||
for decl in rule.declarations]
|
||||
198
ebook_converter/tinycss/tests/color3.py
Normal file
198
ebook_converter/tinycss/tests/color3.py
Normal file
@@ -0,0 +1,198 @@
|
||||
#!/usr/bin/env python2
|
||||
# vim:fileencoding=utf-8
|
||||
from __future__ import absolute_import, division, print_function, unicode_literals
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2014, Kovid Goyal <kovid at kovidgoyal.net>'
|
||||
|
||||
from tinycss.color3 import parse_color_string, hsl_to_rgb
|
||||
from tinycss.tests import BaseTest
|
||||
|
||||
|
||||
class TestColor3(BaseTest):
|
||||
|
||||
def test_color_parsing(self):
|
||||
for css_source, expected_result in [
|
||||
('', None),
|
||||
(' /* hey */\n', None),
|
||||
('4', None),
|
||||
('top', None),
|
||||
('/**/transparent', (0, 0, 0, 0)),
|
||||
('transparent', (0, 0, 0, 0)),
|
||||
(' transparent\n', (0, 0, 0, 0)),
|
||||
('TransParent', (0, 0, 0, 0)),
|
||||
('currentColor', 'currentColor'),
|
||||
('CURRENTcolor', 'currentColor'),
|
||||
('current_Color', None),
|
||||
|
||||
('black', (0, 0, 0, 1)),
|
||||
('white', (1, 1, 1, 1)),
|
||||
('fuchsia', (1, 0, 1, 1)),
|
||||
('cyan', (0, 1, 1, 1)),
|
||||
('CyAn', (0, 1, 1, 1)),
|
||||
('darkkhaki', (189 / 255., 183 / 255., 107 / 255., 1)),
|
||||
|
||||
('#', None),
|
||||
('#f', None),
|
||||
('#ff', None),
|
||||
('#fff', (1, 1, 1, 1)),
|
||||
('#ffg', None),
|
||||
('#ffff', None),
|
||||
('#fffff', None),
|
||||
('#ffffff', (1, 1, 1, 1)),
|
||||
('#fffffg', None),
|
||||
('#fffffff', None),
|
||||
('#ffffffff', None),
|
||||
('#fffffffff', None),
|
||||
|
||||
('#cba987', (203 / 255., 169 / 255., 135 / 255., 1)),
|
||||
('#CbA987', (203 / 255., 169 / 255., 135 / 255., 1)),
|
||||
('#1122aA', (17 / 255., 34 / 255., 170 / 255., 1)),
|
||||
('#12a', (17 / 255., 34 / 255., 170 / 255., 1)),
|
||||
|
||||
('rgb(203, 169, 135)', (203 / 255., 169 / 255., 135 / 255., 1)),
|
||||
('RGB(255, 255, 255)', (1, 1, 1, 1)),
|
||||
('rgB(0, 0, 0)', (0, 0, 0, 1)),
|
||||
('rgB(0, 51, 255)', (0, .2, 1, 1)),
|
||||
('rgb(0,51,255)', (0, .2, 1, 1)),
|
||||
('rgb(0\t, 51 ,255)', (0, .2, 1, 1)),
|
||||
('rgb(/* R */0, /* G */51, /* B */255)', (0, .2, 1, 1)),
|
||||
('rgb(-51, 306, 0)', (-.2, 1.2, 0, 1)), # out of 0..1 is allowed
|
||||
|
||||
('rgb(42%, 3%, 50%)', (.42, .03, .5, 1)),
|
||||
('RGB(100%, 100%, 100%)', (1, 1, 1, 1)),
|
||||
('rgB(0%, 0%, 0%)', (0, 0, 0, 1)),
|
||||
('rgB(10%, 20%, 30%)', (.1, .2, .3, 1)),
|
||||
('rgb(10%,20%,30%)', (.1, .2, .3, 1)),
|
||||
('rgb(10%\t, 20% ,30%)', (.1, .2, .3, 1)),
|
||||
('rgb(/* R */10%, /* G */20%, /* B */30%)', (.1, .2, .3, 1)),
|
||||
('rgb(-12%, 110%, 1400%)', (-.12, 1.1, 14, 1)), # out of 0..1 is allowed
|
||||
|
||||
('rgb(10%, 50%, 0)', None),
|
||||
('rgb(255, 50%, 0%)', None),
|
||||
('rgb(0, 0 0)', None),
|
||||
('rgb(0, 0, 0deg)', None),
|
||||
('rgb(0, 0, light)', None),
|
||||
('rgb()', None),
|
||||
('rgb(0)', None),
|
||||
('rgb(0, 0)', None),
|
||||
('rgb(0, 0, 0, 0)', None),
|
||||
('rgb(0%)', None),
|
||||
('rgb(0%, 0%)', None),
|
||||
('rgb(0%, 0%, 0%, 0%)', None),
|
||||
('rgb(0%, 0%, 0%, 0)', None),
|
||||
|
||||
('rgba(0, 0, 0, 0)', (0, 0, 0, 0)),
|
||||
('rgba(203, 169, 135, 0.3)', (203 / 255., 169 / 255., 135 / 255., 0.3)),
|
||||
('RGBA(255, 255, 255, 0)', (1, 1, 1, 0)),
|
||||
('rgBA(0, 51, 255, 1)', (0, 0.2, 1, 1)),
|
||||
('rgba(0, 51, 255, 1.1)', (0, 0.2, 1, 1)),
|
||||
('rgba(0, 51, 255, 37)', (0, 0.2, 1, 1)),
|
||||
('rgba(0, 51, 255, 0.42)', (0, 0.2, 1, 0.42)),
|
||||
('rgba(0, 51, 255, 0)', (0, 0.2, 1, 0)),
|
||||
('rgba(0, 51, 255, -0.1)', (0, 0.2, 1, 0)),
|
||||
('rgba(0, 51, 255, -139)', (0, 0.2, 1, 0)),
|
||||
|
||||
('rgba(42%, 3%, 50%, 0.3)', (.42, .03, .5, 0.3)),
|
||||
('RGBA(100%, 100%, 100%, 0)', (1, 1, 1, 0)),
|
||||
('rgBA(0%, 20%, 100%, 1)', (0, 0.2, 1, 1)),
|
||||
('rgba(0%, 20%, 100%, 1.1)', (0, 0.2, 1, 1)),
|
||||
('rgba(0%, 20%, 100%, 37)', (0, 0.2, 1, 1)),
|
||||
('rgba(0%, 20%, 100%, 0.42)', (0, 0.2, 1, 0.42)),
|
||||
('rgba(0%, 20%, 100%, 0)', (0, 0.2, 1, 0)),
|
||||
('rgba(0%, 20%, 100%, -0.1)', (0, 0.2, 1, 0)),
|
||||
('rgba(0%, 20%, 100%, -139)', (0, 0.2, 1, 0)),
|
||||
|
||||
('rgba(255, 255, 255, 0%)', None),
|
||||
('rgba(10%, 50%, 0, 1)', None),
|
||||
('rgba(255, 50%, 0%, 1)', None),
|
||||
('rgba(0, 0, 0 0)', None),
|
||||
('rgba(0, 0, 0, 0deg)', None),
|
||||
('rgba(0, 0, 0, light)', None),
|
||||
('rgba()', None),
|
||||
('rgba(0)', None),
|
||||
('rgba(0, 0, 0)', None),
|
||||
('rgba(0, 0, 0, 0, 0)', None),
|
||||
('rgba(0%)', None),
|
||||
('rgba(0%, 0%)', None),
|
||||
('rgba(0%, 0%, 0%)', None),
|
||||
('rgba(0%, 0%, 0%, 0%)', None),
|
||||
('rgba(0%, 0%, 0%, 0%, 0%)', None),
|
||||
|
||||
('HSL(0, 0%, 0%)', (0, 0, 0, 1)),
|
||||
('hsL(0, 100%, 50%)', (1, 0, 0, 1)),
|
||||
('hsl(60, 100%, 37.5%)', (0.75, 0.75, 0, 1)),
|
||||
('hsl(780, 100%, 37.5%)', (0.75, 0.75, 0, 1)),
|
||||
('hsl(-300, 100%, 37.5%)', (0.75, 0.75, 0, 1)),
|
||||
('hsl(300, 50%, 50%)', (0.75, 0.25, 0.75, 1)),
|
||||
|
||||
('hsl(10, 50%, 0)', None),
|
||||
('hsl(50%, 50%, 0%)', None),
|
||||
('hsl(0, 0% 0%)', None),
|
||||
('hsl(30deg, 100%, 100%)', None),
|
||||
('hsl(0, 0%, light)', None),
|
||||
('hsl()', None),
|
||||
('hsl(0)', None),
|
||||
('hsl(0, 0%)', None),
|
||||
('hsl(0, 0%, 0%, 0%)', None),
|
||||
|
||||
('HSLA(-300, 100%, 37.5%, 1)', (0.75, 0.75, 0, 1)),
|
||||
('hsLA(-300, 100%, 37.5%, 12)', (0.75, 0.75, 0, 1)),
|
||||
('hsla(-300, 100%, 37.5%, 0.2)', (0.75, 0.75, 0, .2)),
|
||||
('hsla(-300, 100%, 37.5%, 0)', (0.75, 0.75, 0, 0)),
|
||||
('hsla(-300, 100%, 37.5%, -3)', (0.75, 0.75, 0, 0)),
|
||||
|
||||
('hsla(10, 50%, 0, 1)', None),
|
||||
('hsla(50%, 50%, 0%, 1)', None),
|
||||
('hsla(0, 0% 0%, 1)', None),
|
||||
('hsla(30deg, 100%, 100%, 1)', None),
|
||||
('hsla(0, 0%, light, 1)', None),
|
||||
('hsla()', None),
|
||||
('hsla(0)', None),
|
||||
('hsla(0, 0%)', None),
|
||||
('hsla(0, 0%, 0%, 50%)', None),
|
||||
('hsla(0, 0%, 0%, 1, 0%)', None),
|
||||
|
||||
('cmyk(0, 0, 0, 0)', None),
|
||||
]:
|
||||
result = parse_color_string(css_source)
|
||||
if isinstance(result, tuple):
|
||||
for got, expected in zip(result, expected_result):
|
||||
# Compensate for floating point errors:
|
||||
self.assertLess(abs(got - expected), 1e-10)
|
||||
for i, attr in enumerate(['red', 'green', 'blue', 'alpha']):
|
||||
self.ae(getattr(result, attr), result[i])
|
||||
else:
|
||||
self.ae(result, expected_result)
|
||||
|
||||
def test_hsl(self):
|
||||
for hsl, expected_rgb in [
|
||||
# http://en.wikipedia.org/wiki/HSL_and_HSV#Examples
|
||||
((0, 0, 100), (1, 1, 1)),
|
||||
((127, 0, 100), (1, 1, 1)),
|
||||
((0, 0, 50), (0.5, 0.5, 0.5)),
|
||||
((127, 0, 50), (0.5, 0.5, 0.5)),
|
||||
((0, 0, 0), (0, 0, 0)),
|
||||
((127, 0, 0), (0, 0, 0)),
|
||||
((0, 100, 50), (1, 0, 0)),
|
||||
((60, 100, 37.5), (0.75, 0.75, 0)),
|
||||
((780, 100, 37.5), (0.75, 0.75, 0)),
|
||||
((-300, 100, 37.5), (0.75, 0.75, 0)),
|
||||
((120, 100, 25), (0, 0.5, 0)),
|
||||
((180, 100, 75), (0.5, 1, 1)),
|
||||
((240, 100, 75), (0.5, 0.5, 1)),
|
||||
((300, 50, 50), (0.75, 0.25, 0.75)),
|
||||
((61.8, 63.8, 39.3), (0.628, 0.643, 0.142)),
|
||||
((251.1, 83.2, 51.1), (0.255, 0.104, 0.918)),
|
||||
((134.9, 70.7, 39.6), (0.116, 0.675, 0.255)),
|
||||
((49.5, 89.3, 49.7), (0.941, 0.785, 0.053)),
|
||||
((283.7, 77.5, 54.2), (0.704, 0.187, 0.897)),
|
||||
((14.3, 81.7, 62.4), (0.931, 0.463, 0.316)),
|
||||
((56.9, 99.1, 76.5), (0.998, 0.974, 0.532)),
|
||||
((162.4, 77.9, 44.7), (0.099, 0.795, 0.591)),
|
||||
((248.3, 60.1, 37.3), (0.211, 0.149, 0.597)),
|
||||
((240.5, 29, 60.7), (0.495, 0.493, 0.721)),
|
||||
]:
|
||||
for got, expected in zip(hsl_to_rgb(*hsl), expected_rgb):
|
||||
# Compensate for floating point errors and Wikipedia’s rounding:
|
||||
self.assertLess(abs(got - expected), 0.001)
|
||||
336
ebook_converter/tinycss/tests/css21.py
Normal file
336
ebook_converter/tinycss/tests/css21.py
Normal file
@@ -0,0 +1,336 @@
|
||||
#!/usr/bin/env python2
|
||||
# vim:fileencoding=utf-8
|
||||
from __future__ import absolute_import, division, print_function, unicode_literals
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2014, Kovid Goyal <kovid at kovidgoyal.net>'
|
||||
|
||||
import io
|
||||
import os
|
||||
import tempfile
|
||||
|
||||
from tinycss.css21 import CSS21Parser
|
||||
from tinycss.tests.tokenizing import jsonify
|
||||
from tinycss.tests import BaseTest
|
||||
|
||||
class CoreParser(CSS21Parser):
|
||||
"""A parser that always accepts unparsed at-rules."""
|
||||
def parse_at_rule(self, rule, stylesheet_rules, errors, context):
|
||||
return rule
|
||||
|
||||
def parse_bytes(css_bytes, kwargs):
|
||||
return CSS21Parser().parse_stylesheet_bytes(css_bytes, **kwargs)
|
||||
|
||||
|
||||
def parse_bytesio_file(css_bytes, kwargs):
|
||||
css_file = io.BytesIO(css_bytes)
|
||||
return CSS21Parser().parse_stylesheet_file(css_file, **kwargs)
|
||||
|
||||
|
||||
def parse_filename(css_bytes, kwargs):
|
||||
css_file = tempfile.NamedTemporaryFile(delete=False)
|
||||
try:
|
||||
css_file.write(css_bytes)
|
||||
# Windows can not open the filename a second time while
|
||||
# it is still open for writing.
|
||||
css_file.close()
|
||||
return CSS21Parser().parse_stylesheet_file(css_file.name, **kwargs)
|
||||
finally:
|
||||
os.remove(css_file.name)
|
||||
|
||||
|
||||
class TestCSS21(BaseTest):
|
||||
|
||||
def test_bytes(self):
|
||||
for (css_bytes, kwargs, expected_result, parse) in [
|
||||
params + (parse,)
|
||||
for parse in [parse_bytes, parse_bytesio_file, parse_filename]
|
||||
for params in [
|
||||
('@import "é";'.encode('utf8'), {}, 'é'),
|
||||
('@import "é";'.encode('utf16'), {}, 'é'), # with a BOM
|
||||
('@import "é";'.encode('latin1'), {}, 'é'),
|
||||
('@import "£";'.encode('Shift-JIS'), {}, '\x81\x92'), # latin1 mojibake
|
||||
('@charset "Shift-JIS";@import "£";'.encode('Shift-JIS'), {}, '£'),
|
||||
(' @charset "Shift-JIS";@import "£";'.encode('Shift-JIS'), {},
|
||||
'\x81\x92'),
|
||||
('@import "£";'.encode('Shift-JIS'),
|
||||
{'document_encoding': 'Shift-JIS'}, '£'),
|
||||
('@import "£";'.encode('Shift-JIS'),
|
||||
{'document_encoding': 'utf8'}, '\x81\x92'),
|
||||
('@charset "utf8"; @import "£";'.encode('utf8'),
|
||||
{'document_encoding': 'latin1'}, '£'),
|
||||
# Mojibake yay!
|
||||
(' @charset "utf8"; @import "é";'.encode('utf8'),
|
||||
{'document_encoding': 'latin1'}, 'é'),
|
||||
('@import "é";'.encode('utf8'), {'document_encoding': 'latin1'}, 'é'),
|
||||
]
|
||||
]:
|
||||
stylesheet = parse(css_bytes, kwargs)
|
||||
self.ae(stylesheet.rules[0].at_keyword, '@import')
|
||||
self.ae(stylesheet.rules[0].uri, expected_result)
|
||||
|
||||
def test_at_rules(self):
|
||||
for (css_source, expected_rules, expected_errors) in [
|
||||
(' /* hey */\n', 0, []),
|
||||
('foo {}', 1, []),
|
||||
('foo{} @lipsum{} bar{}', 2,
|
||||
['unknown at-rule in stylesheet context: @lipsum']),
|
||||
('@charset "ascii"; foo {}', 1, []),
|
||||
(' @charset "ascii"; foo {}', 1, ['mis-placed or malformed @charset rule']),
|
||||
('@charset ascii; foo {}', 1, ['mis-placed or malformed @charset rule']),
|
||||
('foo {} @charset "ascii";', 1, ['mis-placed or malformed @charset rule']),
|
||||
]:
|
||||
# Pass 'encoding' to allow @charset
|
||||
stylesheet = CSS21Parser().parse_stylesheet(css_source, encoding='utf8')
|
||||
self.assert_errors(stylesheet.errors, expected_errors)
|
||||
self.ae(len(stylesheet.rules), expected_rules)
|
||||
|
||||
def test_core_parser(self):
|
||||
for (css_source, expected_rules, expected_errors) in [
|
||||
(' /* hey */\n', [], []),
|
||||
|
||||
('foo{} /* hey */\n@bar;@baz{}',
|
||||
[('foo', []), ('@bar', [], None), ('@baz', [], [])], []),
|
||||
|
||||
('@import "foo.css"/**/;', [
|
||||
('@import', [('STRING', 'foo.css')], None)], []),
|
||||
|
||||
('@import "foo.css"/**/', [
|
||||
('@import', [('STRING', 'foo.css')], None)], []),
|
||||
|
||||
('@import "foo.css', [
|
||||
('@import', [('STRING', 'foo.css')], None)], []),
|
||||
|
||||
('{}', [], ['empty selector']),
|
||||
|
||||
('a{b:4}', [('a', [('b', [('INTEGER', 4)])])], []),
|
||||
|
||||
('@page {\t b: 4; @margin}', [('@page', [], [
|
||||
('S', '\t '), ('IDENT', 'b'), (':', ':'), ('S', ' '), ('INTEGER', 4),
|
||||
(';', ';'), ('S', ' '), ('ATKEYWORD', '@margin'),
|
||||
])], []),
|
||||
|
||||
('foo', [], ['no declaration block found']),
|
||||
|
||||
('foo @page {} bar {}', [('bar', [])],
|
||||
['unexpected ATKEYWORD token in selector']),
|
||||
|
||||
('foo { content: "unclosed string;\n color:red; ; margin/**/\n: 2cm; }',
|
||||
[('foo', [('margin', [('DIMENSION', 2)])])],
|
||||
['unexpected BAD_STRING token in property value']),
|
||||
|
||||
('foo { 4px; bar: 12% }',
|
||||
[('foo', [('bar', [('PERCENTAGE', 12)])])],
|
||||
['expected a property name, got DIMENSION']),
|
||||
|
||||
('foo { bar! 3cm auto ; baz: 7px }',
|
||||
[('foo', [('baz', [('DIMENSION', 7)])])],
|
||||
["expected ':', got DELIM"]),
|
||||
|
||||
('foo { bar ; baz: {("}"/* comment */) {0@fizz}} }',
|
||||
[('foo', [('baz', [('{', [
|
||||
('(', [('STRING', '}')]), ('S', ' '),
|
||||
('{', [('INTEGER', 0), ('ATKEYWORD', '@fizz')])
|
||||
])])])],
|
||||
["expected ':'"]),
|
||||
|
||||
('foo { bar: ; baz: not(z) }',
|
||||
[('foo', [('baz', [('FUNCTION', 'not', [('IDENT', 'z')])])])],
|
||||
['expected a property value']),
|
||||
|
||||
('foo { bar: (]) ; baz: U+20 }',
|
||||
[('foo', [('baz', [('UNICODE-RANGE', 'U+20')])])],
|
||||
['unmatched ] token in (']),
|
||||
]:
|
||||
stylesheet = CoreParser().parse_stylesheet(css_source)
|
||||
self.assert_errors(stylesheet.errors, expected_errors)
|
||||
result = [
|
||||
(rule.at_keyword, list(jsonify(rule.head)),
|
||||
list(jsonify(rule.body))
|
||||
if rule.body is not None else None)
|
||||
if rule.at_keyword else
|
||||
(rule.selector.as_css(), [
|
||||
(decl.name, list(jsonify(decl.value)))
|
||||
for decl in rule.declarations])
|
||||
for rule in stylesheet.rules
|
||||
]
|
||||
self.ae(result, expected_rules)
|
||||
|
||||
def test_parse_style_attr(self):
|
||||
for (css_source, expected_declarations, expected_errors) in [
|
||||
(' /* hey */\n', [], []),
|
||||
|
||||
('b:4', [('b', [('INTEGER', 4)])], []),
|
||||
|
||||
('{b:4}', [], ['expected a property name, got {']),
|
||||
|
||||
('b:4} c:3', [], ['unmatched } token in property value']),
|
||||
|
||||
(' 4px; bar: 12% ',
|
||||
[('bar', [('PERCENTAGE', 12)])],
|
||||
['expected a property name, got DIMENSION']),
|
||||
|
||||
('bar! 3cm auto ; baz: 7px',
|
||||
[('baz', [('DIMENSION', 7)])],
|
||||
["expected ':', got DELIM"]),
|
||||
|
||||
('foo; bar ; baz: {("}"/* comment */) {0@fizz}}',
|
||||
[('baz', [('{', [
|
||||
('(', [('STRING', '}')]), ('S', ' '),
|
||||
('{', [('INTEGER', 0), ('ATKEYWORD', '@fizz')])
|
||||
])])],
|
||||
["expected ':'", "expected ':'"]),
|
||||
|
||||
('bar: ; baz: not(z)',
|
||||
[('baz', [('FUNCTION', 'not', [('IDENT', 'z')])])],
|
||||
['expected a property value']),
|
||||
|
||||
('bar: (]) ; baz: U+20',
|
||||
[('baz', [('UNICODE-RANGE', 'U+20')])],
|
||||
['unmatched ] token in (']),
|
||||
]:
|
||||
declarations, errors = CSS21Parser().parse_style_attr(css_source)
|
||||
self.assert_errors(errors, expected_errors)
|
||||
result = [(decl.name, list(jsonify(decl.value)))
|
||||
for decl in declarations]
|
||||
self.ae(result, expected_declarations)
|
||||
|
||||
def test_important(self):
|
||||
for (css_source, expected_declarations, expected_errors) in [
|
||||
(' /* hey */\n', [], []),
|
||||
|
||||
('a:1; b:2',
|
||||
[('a', [('INTEGER', 1)], None), ('b', [('INTEGER', 2)], None)], []),
|
||||
|
||||
('a:1 important; b: important',
|
||||
[('a', [('INTEGER', 1), ('S', ' '), ('IDENT', 'important')], None),
|
||||
('b', [('IDENT', 'important')], None)],
|
||||
[]),
|
||||
|
||||
('a:1 !important; b:2',
|
||||
[('a', [('INTEGER', 1)], 'important'), ('b', [('INTEGER', 2)], None)],
|
||||
[]),
|
||||
|
||||
('a:1!\t Im\\50 O\\RTant; b:2',
|
||||
[('a', [('INTEGER', 1)], 'important'), ('b', [('INTEGER', 2)], None)],
|
||||
[]),
|
||||
|
||||
('a: !important; b:2',
|
||||
[('b', [('INTEGER', 2)], None)],
|
||||
['expected a value before !important']),
|
||||
|
||||
]:
|
||||
declarations, errors = CSS21Parser().parse_style_attr(css_source)
|
||||
self.assert_errors(errors, expected_errors)
|
||||
result = [(decl.name, list(jsonify(decl.value)), decl.priority)
|
||||
for decl in declarations]
|
||||
self.ae(result, expected_declarations)
|
||||
|
||||
def test_at_import(self):
|
||||
for (css_source, expected_rules, expected_errors) in [
|
||||
(' /* hey */\n', [], []),
|
||||
('@import "foo.css";', [('foo.css', ['all'])], []),
|
||||
('@import url(foo.css);', [('foo.css', ['all'])], []),
|
||||
('@import "foo.css" screen, print;',
|
||||
[('foo.css', ['screen', 'print'])], []),
|
||||
('@charset "ascii"; @import "foo.css"; @import "bar.css";',
|
||||
[('foo.css', ['all']), ('bar.css', ['all'])], []),
|
||||
('foo {} @import "foo.css";',
|
||||
[], ['@import rule not allowed after a ruleset']),
|
||||
('@page {} @import "foo.css";',
|
||||
[], ['@import rule not allowed after an @page rule']),
|
||||
('@import ;',
|
||||
[], ['expected URI or STRING for @import rule']),
|
||||
('@import foo.css;',
|
||||
[], ['expected URI or STRING for @import rule, got IDENT']),
|
||||
('@import "foo.css" {}',
|
||||
[], ["expected ';', got a block"]),
|
||||
]:
|
||||
# Pass 'encoding' to allow @charset
|
||||
stylesheet = CSS21Parser().parse_stylesheet(css_source, encoding='utf8')
|
||||
self.assert_errors(stylesheet.errors, expected_errors)
|
||||
|
||||
result = [
|
||||
(rule.uri, rule.media)
|
||||
for rule in stylesheet.rules
|
||||
if rule.at_keyword == '@import'
|
||||
]
|
||||
self.ae(result, expected_rules)
|
||||
|
||||
def test_at_page(self):
|
||||
for (css, expected_result, expected_errors) in [
|
||||
('@page {}', (None, (0, 0), []), []),
|
||||
('@page:first {}', ('first', (1, 0), []), []),
|
||||
('@page :left{}', ('left', (0, 1), []), []),
|
||||
('@page\t\n:right {}', ('right', (0, 1), []), []),
|
||||
('@page :last {}', None, ['invalid @page selector']),
|
||||
('@page : right {}', None, ['invalid @page selector']),
|
||||
('@page table:left {}', None, ['invalid @page selector']),
|
||||
|
||||
('@page;', None, ['invalid @page rule: missing block']),
|
||||
('@page { a:1; ; b: 2 }',
|
||||
(None, (0, 0), [('a', [('INTEGER', 1)]), ('b', [('INTEGER', 2)])]),
|
||||
[]),
|
||||
('@page { a:1; c: ; b: 2 }',
|
||||
(None, (0, 0), [('a', [('INTEGER', 1)]), ('b', [('INTEGER', 2)])]),
|
||||
['expected a property value']),
|
||||
('@page { a:1; @top-left {} b: 2 }',
|
||||
(None, (0, 0), [('a', [('INTEGER', 1)]), ('b', [('INTEGER', 2)])]),
|
||||
['unknown at-rule in @page context: @top-left']),
|
||||
('@page { a:1; @top-left {}; b: 2 }',
|
||||
(None, (0, 0), [('a', [('INTEGER', 1)]), ('b', [('INTEGER', 2)])]),
|
||||
['unknown at-rule in @page context: @top-left']),
|
||||
]:
|
||||
stylesheet = CSS21Parser().parse_stylesheet(css)
|
||||
self.assert_errors(stylesheet.errors, expected_errors)
|
||||
|
||||
if expected_result is None:
|
||||
self.assertFalse(stylesheet.rules)
|
||||
else:
|
||||
self.ae(len(stylesheet.rules), 1)
|
||||
rule = stylesheet.rules[0]
|
||||
self.ae(rule.at_keyword, '@page')
|
||||
self.ae(rule.at_rules, []) # in CSS 2.1
|
||||
result = (
|
||||
rule.selector,
|
||||
rule.specificity,
|
||||
[(decl.name, list(jsonify(decl.value)))
|
||||
for decl in rule.declarations],
|
||||
)
|
||||
self.ae(result, expected_result)
|
||||
|
||||
def test_at_media(self):
|
||||
for (css_source, expected_rules, expected_errors) in [
|
||||
(' /* hey */\n', [], []),
|
||||
('@media {}', [(['all'], [])], []),
|
||||
('@media all {}', [(['all'], [])], []),
|
||||
('@media screen, print {}', [(['screen', 'print'], [])], []),
|
||||
('@media all;', [], ['invalid @media rule: missing block']),
|
||||
('@media 4 {}', [], ['expected a media type, got INTEGER']),
|
||||
('@media , screen {}', [], ['expected a media type']),
|
||||
('@media screen, {}', [], ['expected a media type']),
|
||||
('@media screen print {}', [],
|
||||
['expected a media type, got IDENT, IDENT']),
|
||||
|
||||
('@media all { @page { a: 1 } @media; @import; foo { a: 1 } }',
|
||||
[(['all'], [('foo', [('a', [('INTEGER', 1)])])])],
|
||||
['@page rule not allowed in @media',
|
||||
'@media rule not allowed in @media',
|
||||
'@import rule not allowed in @media']),
|
||||
|
||||
]:
|
||||
stylesheet = CSS21Parser().parse_stylesheet(css_source)
|
||||
self.assert_errors(stylesheet.errors, expected_errors)
|
||||
|
||||
for rule in stylesheet.rules:
|
||||
self.ae(rule.at_keyword, '@media')
|
||||
result = [
|
||||
(rule.media, [
|
||||
(sub_rule.selector.as_css(), [
|
||||
(decl.name, list(jsonify(decl.value)))
|
||||
for decl in sub_rule.declarations])
|
||||
for sub_rule in rule.rules
|
||||
])
|
||||
for rule in stylesheet.rules
|
||||
]
|
||||
self.ae(result, expected_rules)
|
||||
71
ebook_converter/tinycss/tests/decoding.py
Normal file
71
ebook_converter/tinycss/tests/decoding.py
Normal file
@@ -0,0 +1,71 @@
|
||||
#!/usr/bin/env python2
|
||||
# vim:fileencoding=utf-8
|
||||
from __future__ import absolute_import, division, print_function, unicode_literals
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2014, Kovid Goyal <kovid at kovidgoyal.net>'
|
||||
|
||||
from tinycss.decoding import decode
|
||||
from tinycss.tests import BaseTest
|
||||
|
||||
def params(css, encoding, use_bom=False, expect_error=False, **kwargs):
|
||||
"""Nicer syntax to make a tuple."""
|
||||
return css, encoding, use_bom, expect_error, kwargs
|
||||
|
||||
class TestDecoding(BaseTest):
|
||||
|
||||
def test_decoding(self):
|
||||
for (css, encoding, use_bom, expect_error, kwargs) in [
|
||||
params('', 'utf8'), # default to utf8
|
||||
params('𐂃', 'utf8'),
|
||||
params('é', 'latin1'), # utf8 fails, fall back on ShiftJIS
|
||||
params('£', 'ShiftJIS', expect_error=True),
|
||||
params('£', 'ShiftJIS', protocol_encoding='Shift-JIS'),
|
||||
params('£', 'ShiftJIS', linking_encoding='Shift-JIS'),
|
||||
params('£', 'ShiftJIS', document_encoding='Shift-JIS'),
|
||||
params('£', 'ShiftJIS', protocol_encoding='utf8',
|
||||
document_encoding='ShiftJIS'),
|
||||
params('@charset "utf8"; £', 'ShiftJIS', expect_error=True),
|
||||
params('@charset "utf£8"; £', 'ShiftJIS', expect_error=True),
|
||||
params('@charset "unknown-encoding"; £', 'ShiftJIS', expect_error=True),
|
||||
params('@charset "utf8"; £', 'ShiftJIS', document_encoding='ShiftJIS'),
|
||||
params('£', 'ShiftJIS', linking_encoding='utf8',
|
||||
document_encoding='ShiftJIS'),
|
||||
params('@charset "utf-32"; 𐂃', 'utf-32-be'),
|
||||
params('@charset "Shift-JIS"; £', 'ShiftJIS'),
|
||||
params('@charset "ISO-8859-8"; £', 'ShiftJIS', expect_error=True),
|
||||
params('𐂃', 'utf-16-le', expect_error=True), # no BOM
|
||||
params('𐂃', 'utf-16-le', use_bom=True),
|
||||
params('𐂃', 'utf-32-be', expect_error=True),
|
||||
params('𐂃', 'utf-32-be', use_bom=True),
|
||||
params('𐂃', 'utf-32-be', document_encoding='utf-32-be'),
|
||||
params('𐂃', 'utf-32-be', linking_encoding='utf-32-be'),
|
||||
params('@charset "utf-32-le"; 𐂃', 'utf-32-be',
|
||||
use_bom=True, expect_error=True),
|
||||
# protocol_encoding takes precedence over @charset
|
||||
params('@charset "ISO-8859-8"; £', 'ShiftJIS',
|
||||
protocol_encoding='Shift-JIS'),
|
||||
params('@charset "unknown-encoding"; £', 'ShiftJIS',
|
||||
protocol_encoding='Shift-JIS'),
|
||||
params('@charset "Shift-JIS"; £', 'ShiftJIS',
|
||||
protocol_encoding='utf8'),
|
||||
# @charset takes precedence over document_encoding
|
||||
params('@charset "Shift-JIS"; £', 'ShiftJIS',
|
||||
document_encoding='ISO-8859-8'),
|
||||
# @charset takes precedence over linking_encoding
|
||||
params('@charset "Shift-JIS"; £', 'ShiftJIS',
|
||||
linking_encoding='ISO-8859-8'),
|
||||
# linking_encoding takes precedence over document_encoding
|
||||
params('£', 'ShiftJIS',
|
||||
linking_encoding='Shift-JIS', document_encoding='ISO-8859-8'),
|
||||
]:
|
||||
if use_bom:
|
||||
source = '\ufeff' + css
|
||||
else:
|
||||
source = css
|
||||
css_bytes = source.encode(encoding)
|
||||
result, result_encoding = decode(css_bytes, **kwargs)
|
||||
if expect_error:
|
||||
self.assertNotEqual(result, css)
|
||||
else:
|
||||
self.ae(result, css)
|
||||
66
ebook_converter/tinycss/tests/fonts3.py
Normal file
66
ebook_converter/tinycss/tests/fonts3.py
Normal file
@@ -0,0 +1,66 @@
|
||||
#!/usr/bin/env python2
|
||||
# vim:fileencoding=utf-8
|
||||
from __future__ import absolute_import, division, print_function, unicode_literals
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2014, Kovid Goyal <kovid at kovidgoyal.net>'
|
||||
|
||||
from tinycss.fonts3 import CSSFonts3Parser, parse_font_family, parse_font, serialize_font
|
||||
from tinycss.tests import BaseTest
|
||||
|
||||
from polyglot.builtins import iteritems
|
||||
|
||||
|
||||
class TestFonts3(BaseTest):
|
||||
|
||||
def test_font_face(self):
|
||||
'Test parsing of font face rules'
|
||||
for css, expected_declarations, expected_errors in [
|
||||
('@font-face {}', [], []),
|
||||
|
||||
('@font-face { font-family: Moose; src: url(font1.ttf) }',
|
||||
[('font-family', [('IDENT', 'Moose')]), ('src', [('URI', 'font1.ttf')])], []),
|
||||
]:
|
||||
stylesheet = CSSFonts3Parser().parse_stylesheet(css)
|
||||
self.assert_errors(stylesheet.errors, expected_errors)
|
||||
self.ae(len(stylesheet.rules), 1)
|
||||
rule = stylesheet.rules[0]
|
||||
self.ae(self.jsonify_declarations(rule), expected_declarations)
|
||||
|
||||
stylesheet = CSSFonts3Parser().parse_stylesheet('@font-face;')
|
||||
self.assert_errors(stylesheet.errors, ['missing block'])
|
||||
|
||||
def test_parse_font_family(self):
|
||||
' Test parsing of font-family values '
|
||||
for raw, q in iteritems({
|
||||
'"1as"': ['1as'],
|
||||
'A B C, serif': ['A B C', 'serif'],
|
||||
r'Red\/Black': ['Red/Black'],
|
||||
'A B': ['A B'],
|
||||
r'Ahem\!': ['Ahem!'],
|
||||
r'"Ahem!"': ['Ahem!'],
|
||||
'€42': ['€42'],
|
||||
r'Hawaii\ 5-0': ['Hawaii 5-0'],
|
||||
r'"X \"Y"': ['X "Y'],
|
||||
'A B, C D, "E", serif': ['A B', 'C D', 'E', 'serif'],
|
||||
'': [],
|
||||
'"", a': ['a'],
|
||||
}):
|
||||
self.ae(q, parse_font_family(raw))
|
||||
for single in ('serif', 'sans-serif', 'A B C'):
|
||||
self.ae([single], parse_font_family(single))
|
||||
|
||||
def test_parse_font(self):
|
||||
def t(raw, **kw):
|
||||
q = {('line' if k == 'height' else 'font') + '-' + k:v for k, v in iteritems(kw)}
|
||||
self.ae(q, parse_font(raw))
|
||||
self.ae(q, parse_font(serialize_font(q)))
|
||||
t('caption', family=['sans-serif'])
|
||||
t('serif', family=['serif'])
|
||||
t('12pt/14pt sans-serif', size='12pt', height='14pt', family=['sans-serif'])
|
||||
t('80% sans-serif', size='80%', family=['sans-serif'])
|
||||
t('x-large/110% "new century schoolbook", serif', size='x-large', height='110%', family=['new century schoolbook', 'serif'])
|
||||
t('bold italic large Palatino, serif', weight='bold', style='italic', size='large', family=['Palatino', 'serif'])
|
||||
t('normal small-caps 120%/120% fantasy', style='normal', variant='small-caps', size='120%', height='120%', family=['fantasy'])
|
||||
t('condensed oblique 12pt Helvetica Neue, serif', stretch='condensed', style='oblique', size='12pt', family=['Helvetica Neue', 'serif'])
|
||||
t('300 italic 1.3em/1.7em FB Armada, sans-serif', weight='300', style='italic', size='1.3em', height='1.7em', family=['FB Armada', 'sans-serif'])
|
||||
56
ebook_converter/tinycss/tests/main.py
Normal file
56
ebook_converter/tinycss/tests/main.py
Normal file
@@ -0,0 +1,56 @@
|
||||
#!/usr/bin/env python2
|
||||
# vim:fileencoding=utf-8
|
||||
from __future__ import absolute_import, division, print_function, unicode_literals
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2014, Kovid Goyal <kovid at kovidgoyal.net>'
|
||||
|
||||
import unittest, os, argparse
|
||||
|
||||
def find_tests():
|
||||
from calibre.utils.run_tests import find_tests_in_dir
|
||||
base = os.path.dirname(os.path.abspath(__file__))
|
||||
return find_tests_in_dir(base)
|
||||
|
||||
def run_tests(find_tests=find_tests, for_build=False):
|
||||
if not for_build:
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument('name', nargs='?', default=None,
|
||||
help='The name of the test to run')
|
||||
args = parser.parse_args()
|
||||
if not for_build and args.name and args.name.startswith('.'):
|
||||
tests = find_tests()
|
||||
q = args.name[1:]
|
||||
if not q.startswith('test_'):
|
||||
q = 'test_' + q
|
||||
ans = None
|
||||
try:
|
||||
for suite in tests:
|
||||
for test in suite._tests:
|
||||
if test.__class__.__name__ == 'ModuleImportFailure':
|
||||
raise Exception('Failed to import a test module: %s' % test)
|
||||
for s in test:
|
||||
if s._testMethodName == q:
|
||||
ans = s
|
||||
raise StopIteration()
|
||||
except StopIteration:
|
||||
pass
|
||||
if ans is None:
|
||||
print ('No test named %s found' % args.name)
|
||||
raise SystemExit(1)
|
||||
tests = ans
|
||||
else:
|
||||
tests = unittest.defaultTestLoader.loadTestsFromName(args.name) if not for_build and args.name else find_tests()
|
||||
r = unittest.TextTestRunner
|
||||
if for_build:
|
||||
r = r(verbosity=0, buffer=True, failfast=True)
|
||||
else:
|
||||
r = r(verbosity=4)
|
||||
result = r.run(tests)
|
||||
if for_build and result.errors or result.failures:
|
||||
raise SystemExit(1)
|
||||
|
||||
if __name__ == '__main__':
|
||||
run_tests()
|
||||
|
||||
|
||||
65
ebook_converter/tinycss/tests/media3.py
Normal file
65
ebook_converter/tinycss/tests/media3.py
Normal file
@@ -0,0 +1,65 @@
|
||||
#!/usr/bin/env python2
|
||||
# vim:fileencoding=utf-8
|
||||
from __future__ import absolute_import, division, print_function, unicode_literals
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2014, Kovid Goyal <kovid at kovidgoyal.net>'
|
||||
|
||||
from tinycss.media3 import CSSMedia3Parser, MediaQuery as MQ
|
||||
from tinycss.tests import BaseTest, jsonify
|
||||
|
||||
def jsonify_expr(e):
|
||||
if e is None:
|
||||
return None
|
||||
return next(jsonify([e]))
|
||||
|
||||
def jsonify_expressions(mqlist):
|
||||
for mq in mqlist:
|
||||
mq.expressions = tuple(
|
||||
(k, jsonify_expr(e)) for k, e in mq.expressions)
|
||||
return mqlist
|
||||
|
||||
class TestFonts3(BaseTest):
|
||||
|
||||
def test_media_queries(self):
|
||||
'Test parsing of media queries from the CSS 3 media module'
|
||||
for css, media_query_list, expected_errors in [
|
||||
# CSS 2.1 (simple media queries)
|
||||
('@media {}', [MQ()], []),
|
||||
('@media all {}', [MQ()], []),
|
||||
('@media screen {}', [MQ('screen')], []),
|
||||
('@media , screen {}', [MQ(), MQ('screen')], []),
|
||||
('@media screen, {}', [MQ('screen'), MQ()], []),
|
||||
|
||||
# Examples from the CSS 3 specs
|
||||
('@media screen and (color) {}', [MQ('screen', (('color', None),))], []),
|
||||
('@media all and (min-width:500px) {}', [
|
||||
MQ('all', (('min-width', ('DIMENSION', 500)),))], []),
|
||||
('@media (min-width:500px) {}', [
|
||||
MQ('all', (('min-width', ('DIMENSION', 500)),))], []),
|
||||
('@media (orientation: portrait) {}', [
|
||||
MQ('all', (('orientation', ('IDENT', 'portrait')),))], []),
|
||||
('@media screen and (color), projection and (color) {}', [
|
||||
MQ('screen', (('color', None),)), MQ('projection', (('color', None),)),], []),
|
||||
('@media not screen and (color) {}', [
|
||||
MQ('screen', (('color', None),), True)], []),
|
||||
('@media only screen and (color) {}', [
|
||||
MQ('screen', (('color', None),))], []),
|
||||
('@media aural and (device-aspect-ratio: 16/9) {}', [
|
||||
MQ('aural', (('device-aspect-ratio', ('RATIO', (16, 9))),))], []),
|
||||
('@media (resolution: 166dpi) {}', [
|
||||
MQ('all', (('resolution', ('DIMENSION', 166)),))], []),
|
||||
('@media (min-resolution: 166DPCM) {}', [
|
||||
MQ('all', (('min-resolution', ('DIMENSION', 166)),))], []),
|
||||
|
||||
# Malformed media queries
|
||||
('@media (example, all,), speech {}', [MQ(negated=True), MQ('speech')], ['expected a :']),
|
||||
('@media &test, screen {}', [MQ(negated=True), MQ('screen')], ['expected a media expression not a DELIM']),
|
||||
|
||||
]:
|
||||
stylesheet = CSSMedia3Parser().parse_stylesheet(css)
|
||||
self.assert_errors(stylesheet.errors, expected_errors)
|
||||
self.ae(len(stylesheet.rules), 1)
|
||||
rule = stylesheet.rules[0]
|
||||
self.ae(jsonify_expressions(rule.media), media_query_list)
|
||||
|
||||
86
ebook_converter/tinycss/tests/page3.py
Normal file
86
ebook_converter/tinycss/tests/page3.py
Normal file
@@ -0,0 +1,86 @@
|
||||
#!/usr/bin/env python2
|
||||
# vim:fileencoding=utf-8
|
||||
from __future__ import absolute_import, division, print_function, unicode_literals
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2014, Kovid Goyal <kovid at kovidgoyal.net>'
|
||||
|
||||
from tinycss.page3 import CSSPage3Parser
|
||||
from tinycss.tests import BaseTest
|
||||
|
||||
class TestPage3(BaseTest):
|
||||
|
||||
def test_selectors(self):
|
||||
for css, expected_selector, expected_specificity, expected_errors in [
|
||||
('@page {}', (None, None), (0, 0, 0), []),
|
||||
|
||||
('@page :first {}', (None, 'first'), (0, 1, 0), []),
|
||||
('@page:left{}', (None, 'left'), (0, 0, 1), []),
|
||||
('@page :right {}', (None, 'right'), (0, 0, 1), []),
|
||||
('@page :blank{}', (None, 'blank'), (0, 1, 0), []),
|
||||
('@page :last {}', None, None, ['invalid @page selector']),
|
||||
('@page : first {}', None, None, ['invalid @page selector']),
|
||||
|
||||
('@page foo:first {}', ('foo', 'first'), (1, 1, 0), []),
|
||||
('@page bar :left {}', ('bar', 'left'), (1, 0, 1), []),
|
||||
(r'@page \26:right {}', ('&', 'right'), (1, 0, 1), []),
|
||||
|
||||
('@page foo {}', ('foo', None), (1, 0, 0), []),
|
||||
(r'@page \26 {}', ('&', None), (1, 0, 0), []),
|
||||
|
||||
('@page foo fist {}', None, None, ['invalid @page selector']),
|
||||
('@page foo, bar {}', None, None, ['invalid @page selector']),
|
||||
('@page foo&first {}', None, None, ['invalid @page selector']),
|
||||
]:
|
||||
stylesheet = CSSPage3Parser().parse_stylesheet(css)
|
||||
self.assert_errors(stylesheet.errors, expected_errors)
|
||||
|
||||
if stylesheet.rules:
|
||||
self.ae(len(stylesheet.rules), 1)
|
||||
rule = stylesheet.rules[0]
|
||||
self.ae(rule.at_keyword, '@page')
|
||||
selector = rule.selector
|
||||
self.ae(rule.specificity, expected_specificity)
|
||||
else:
|
||||
selector = None
|
||||
self.ae(selector, expected_selector)
|
||||
|
||||
def test_content(self):
|
||||
for css, expected_declarations, expected_rules, expected_errors in [
|
||||
('@page {}', [], [], []),
|
||||
('@page { foo: 4; bar: z }',
|
||||
[('foo', [('INTEGER', 4)]), ('bar', [('IDENT', 'z')])], [], []),
|
||||
('''@page { foo: 4;
|
||||
@top-center { content: "Awesome Title" }
|
||||
@bottom-left { content: counter(page) }
|
||||
bar: z
|
||||
}''',
|
||||
[('foo', [('INTEGER', 4)]), ('bar', [('IDENT', 'z')])],
|
||||
[('@top-center', [('content', [('STRING', 'Awesome Title')])]),
|
||||
('@bottom-left', [('content', [
|
||||
('FUNCTION', 'counter', [('IDENT', 'page')])])])],
|
||||
[]),
|
||||
('''@page { foo: 4;
|
||||
@bottom-top { content: counter(page) }
|
||||
bar: z
|
||||
}''',
|
||||
[('foo', [('INTEGER', 4)]), ('bar', [('IDENT', 'z')])],
|
||||
[],
|
||||
['unknown at-rule in @page context: @bottom-top']),
|
||||
|
||||
('@page{} @top-right{}', [], [], [
|
||||
'@top-right rule not allowed in stylesheet']),
|
||||
('@page{ @top-right 4 {} }', [], [], [
|
||||
'unexpected INTEGER token in @top-right rule header']),
|
||||
# Not much error recovery tests here. This should be covered in test_css21
|
||||
]:
|
||||
stylesheet = CSSPage3Parser().parse_stylesheet(css)
|
||||
self.assert_errors(stylesheet.errors, expected_errors)
|
||||
|
||||
self.ae(len(stylesheet.rules), 1)
|
||||
rule = stylesheet.rules[0]
|
||||
self.ae(rule.at_keyword, '@page')
|
||||
self.ae(self.jsonify_declarations(rule), expected_declarations)
|
||||
rules = [(margin_rule.at_keyword, self.jsonify_declarations(margin_rule))
|
||||
for margin_rule in rule.at_rules]
|
||||
self.ae(rules, expected_rules)
|
||||
269
ebook_converter/tinycss/tests/tokenizing.py
Normal file
269
ebook_converter/tinycss/tests/tokenizing.py
Normal file
@@ -0,0 +1,269 @@
|
||||
#!/usr/bin/env python2
|
||||
# vim:fileencoding=utf-8
|
||||
from __future__ import absolute_import, division, print_function, unicode_literals
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2014, Kovid Goyal <kovid at kovidgoyal.net>'
|
||||
|
||||
from tinycss.tests import BaseTest, jsonify
|
||||
from tinycss.tokenizer import python_tokenize_flat, c_tokenize_flat, regroup
|
||||
|
||||
if c_tokenize_flat is None:
|
||||
tokenizers = (python_tokenize_flat,)
|
||||
else:
|
||||
tokenizers = (python_tokenize_flat, c_tokenize_flat)
|
||||
|
||||
def token_api(self, tokenize):
|
||||
for css_source in [
|
||||
'(8, foo, [z])', '[8, foo, (z)]', '{8, foo, [z]}', 'func(8, foo, [z])'
|
||||
]:
|
||||
tokens = list(regroup(tokenize(css_source)))
|
||||
self.ae(len(tokens), 1)
|
||||
self.ae(len(tokens[0].content), 7)
|
||||
|
||||
def token_serialize_css(self, tokenize):
|
||||
for tokenize in tokenizers:
|
||||
for css_source in [
|
||||
r'''p[example="\
|
||||
foo(int x) {\
|
||||
this.x = x;\
|
||||
}\
|
||||
"]''',
|
||||
'"Lorem\\26Ipsum\ndolor" sit',
|
||||
'/* Lorem\nipsum */\fa {\n color: red;\tcontent: "dolor\\\fsit" }',
|
||||
'not([[lorem]]{ipsum (42)})',
|
||||
'a[b{d]e}',
|
||||
'a[b{"d',
|
||||
]:
|
||||
for _regroup in (regroup, lambda x: x):
|
||||
tokens = _regroup(tokenize(css_source, ignore_comments=False))
|
||||
result = ''.join(token.as_css() for token in tokens)
|
||||
self.ae(result, css_source)
|
||||
|
||||
def comments(self, tokenize):
|
||||
for ignore_comments, expected_tokens in [
|
||||
(False, [
|
||||
('COMMENT', '/* lorem */'),
|
||||
('S', ' '),
|
||||
('IDENT', 'ipsum'),
|
||||
('[', [
|
||||
('IDENT', 'dolor'),
|
||||
('COMMENT', '/* sit */'),
|
||||
]),
|
||||
('BAD_COMMENT', '/* amet')
|
||||
]),
|
||||
(True, [
|
||||
('S', ' '),
|
||||
('IDENT', 'ipsum'),
|
||||
('[', [
|
||||
('IDENT', 'dolor'),
|
||||
]),
|
||||
]),
|
||||
]:
|
||||
css_source = '/* lorem */ ipsum[dolor/* sit */]/* amet'
|
||||
tokens = regroup(tokenize(css_source, ignore_comments))
|
||||
result = list(jsonify(tokens))
|
||||
self.ae(result, expected_tokens)
|
||||
|
||||
def token_grouping(self, tokenize):
|
||||
for css_source, expected_tokens in [
|
||||
('', []),
|
||||
(r'Lorem\26 "i\psum"4px', [
|
||||
('IDENT', 'Lorem&'), ('STRING', 'ipsum'), ('DIMENSION', 4)]),
|
||||
|
||||
('not([[lorem]]{ipsum (42)})', [
|
||||
('FUNCTION', 'not', [
|
||||
('[', [
|
||||
('[', [
|
||||
('IDENT', 'lorem'),
|
||||
]),
|
||||
]),
|
||||
('{', [
|
||||
('IDENT', 'ipsum'),
|
||||
('S', ' '),
|
||||
('(', [
|
||||
('INTEGER', 42),
|
||||
])
|
||||
])
|
||||
])]),
|
||||
|
||||
# Close everything at EOF, no error
|
||||
('a[b{"d', [
|
||||
('IDENT', 'a'),
|
||||
('[', [
|
||||
('IDENT', 'b'),
|
||||
('{', [
|
||||
('STRING', 'd'),
|
||||
]),
|
||||
]),
|
||||
]),
|
||||
|
||||
# Any remaining ), ] or } token is a nesting error
|
||||
('a[b{d]e}', [
|
||||
('IDENT', 'a'),
|
||||
('[', [
|
||||
('IDENT', 'b'),
|
||||
('{', [
|
||||
('IDENT', 'd'),
|
||||
(']', ']'), # The error is visible here
|
||||
('IDENT', 'e'),
|
||||
]),
|
||||
]),
|
||||
]),
|
||||
# ref:
|
||||
('a[b{d}e]', [
|
||||
('IDENT', 'a'),
|
||||
('[', [
|
||||
('IDENT', 'b'),
|
||||
('{', [
|
||||
('IDENT', 'd'),
|
||||
]),
|
||||
('IDENT', 'e'),
|
||||
]),
|
||||
]),
|
||||
]:
|
||||
tokens = regroup(tokenize(css_source, ignore_comments=False))
|
||||
result = list(jsonify(tokens))
|
||||
self.ae(result, expected_tokens)
|
||||
|
||||
def positions(self, tokenize):
|
||||
css = '/* Lorem\nipsum */\fa {\n color: red;\tcontent: "dolor\\\fsit" }'
|
||||
tokens = tokenize(css, ignore_comments=False)
|
||||
result = [(token.type, token.line, token.column) for token in tokens]
|
||||
self.ae(result, [
|
||||
('COMMENT', 1, 1), ('S', 2, 9),
|
||||
('IDENT', 3, 1), ('S', 3, 2), ('{', 3, 3),
|
||||
('S', 3, 4), ('IDENT', 4, 5), (':', 4, 10),
|
||||
('S', 4, 11), ('IDENT', 4, 12), (';', 4, 15), ('S', 4, 16),
|
||||
('IDENT', 4, 17), (':', 4, 24), ('S', 4, 25), ('STRING', 4, 26),
|
||||
('S', 5, 5), ('}', 5, 6)])
|
||||
|
||||
def tokens(self, tokenize):
|
||||
for css_source, expected_tokens in [
|
||||
('', []),
|
||||
('red -->',
|
||||
[('IDENT', 'red'), ('S', ' '), ('CDC', '-->')]),
|
||||
# Longest match rule: no CDC
|
||||
('red-->',
|
||||
[('IDENT', 'red--'), ('DELIM', '>')]),
|
||||
|
||||
(r'''p[example="\
|
||||
foo(int x) {\
|
||||
this.x = x;\
|
||||
}\
|
||||
"]''', [
|
||||
('IDENT', 'p'),
|
||||
('[', '['),
|
||||
('IDENT', 'example'),
|
||||
('DELIM', '='),
|
||||
('STRING', 'foo(int x) { this.x = x;}'),
|
||||
(']', ']')]),
|
||||
|
||||
# Numbers are parsed
|
||||
('42 .5 -4pX 1.25em 30%',
|
||||
[('INTEGER', 42), ('S', ' '),
|
||||
('NUMBER', .5), ('S', ' '),
|
||||
# units are normalized to lower-case:
|
||||
('DIMENSION', -4, 'px'), ('S', ' '),
|
||||
('DIMENSION', 1.25, 'em'), ('S', ' '),
|
||||
('PERCENTAGE', 30, '%')]),
|
||||
|
||||
# URLs are extracted
|
||||
('url(foo.png)', [('URI', 'foo.png')]),
|
||||
('url("foo.png")', [('URI', 'foo.png')]),
|
||||
|
||||
# Escaping
|
||||
|
||||
(r'/* Comment with a \ backslash */',
|
||||
[('COMMENT', '/* Comment with a \ backslash */')]), # Unchanged
|
||||
|
||||
# backslash followed by a newline in a string: ignored
|
||||
('"Lorem\\\nIpsum"', [('STRING', 'LoremIpsum')]),
|
||||
|
||||
# backslash followed by a newline outside a string: stands for itself
|
||||
('Lorem\\\nIpsum', [
|
||||
('IDENT', 'Lorem'), ('DELIM', '\\'),
|
||||
('S', '\n'), ('IDENT', 'Ipsum')]),
|
||||
|
||||
# Cancel the meaning of special characters
|
||||
(r'"Lore\m Ipsum"', [('STRING', 'Lorem Ipsum')]), # or not specal
|
||||
(r'"Lorem \49psum"', [('STRING', 'Lorem Ipsum')]),
|
||||
(r'"Lorem \49 psum"', [('STRING', 'Lorem Ipsum')]),
|
||||
(r'"Lorem\"Ipsum"', [('STRING', 'Lorem"Ipsum')]),
|
||||
(r'"Lorem\\Ipsum"', [('STRING', r'Lorem\Ipsum')]),
|
||||
(r'"Lorem\5c Ipsum"', [('STRING', r'Lorem\Ipsum')]),
|
||||
(r'Lorem\+Ipsum', [('IDENT', 'Lorem+Ipsum')]),
|
||||
(r'Lorem+Ipsum', [('IDENT', 'Lorem'), ('DELIM', '+'), ('IDENT', 'Ipsum')]),
|
||||
(r'url(foo\).png)', [('URI', 'foo).png')]),
|
||||
|
||||
# Unicode and backslash escaping
|
||||
('\\26 B', [('IDENT', '&B')]),
|
||||
('\\&B', [('IDENT', '&B')]),
|
||||
('@\\26\tB', [('ATKEYWORD', '@&B')]),
|
||||
('@\\&B', [('ATKEYWORD', '@&B')]),
|
||||
('#\\26\nB', [('HASH', '#&B')]),
|
||||
('#\\&B', [('HASH', '#&B')]),
|
||||
('\\26\r\nB(', [('FUNCTION', '&B(')]),
|
||||
('\\&B(', [('FUNCTION', '&B(')]),
|
||||
(r'12.5\000026B', [('DIMENSION', 12.5, '&b')]),
|
||||
(r'12.5\0000263B', [('DIMENSION', 12.5, '&3b')]), # max 6 digits
|
||||
(r'12.5\&B', [('DIMENSION', 12.5, '&b')]),
|
||||
(r'"\26 B"', [('STRING', '&B')]),
|
||||
(r"'\000026B'", [('STRING', '&B')]),
|
||||
(r'"\&B"', [('STRING', '&B')]),
|
||||
(r'url("\26 B")', [('URI', '&B')]),
|
||||
(r'url(\26 B)', [('URI', '&B')]),
|
||||
(r'url("\&B")', [('URI', '&B')]),
|
||||
(r'url(\&B)', [('URI', '&B')]),
|
||||
(r'Lorem\110000Ipsum', [('IDENT', 'Lorem\uFFFDIpsum')]),
|
||||
|
||||
# Bad strings
|
||||
|
||||
# String ends at EOF without closing: no error, parsed
|
||||
('"Lorem\\26Ipsum', [('STRING', 'Lorem&Ipsum')]),
|
||||
# Unescaped newline: ends the string, error, unparsed
|
||||
('"Lorem\\26Ipsum\n', [
|
||||
('BAD_STRING', r'"Lorem\26Ipsum'), ('S', '\n')]),
|
||||
# Tokenization restarts after the newline, so the second " starts
|
||||
# a new string (which ends at EOF without errors, as above.)
|
||||
('"Lorem\\26Ipsum\ndolor" sit', [
|
||||
('BAD_STRING', r'"Lorem\26Ipsum'), ('S', '\n'),
|
||||
('IDENT', 'dolor'), ('STRING', ' sit')]),
|
||||
|
||||
]:
|
||||
sources = [css_source]
|
||||
for css_source in sources:
|
||||
tokens = tokenize(css_source, ignore_comments=False)
|
||||
result = [
|
||||
(token.type, token.value) + (
|
||||
() if token.unit is None else (token.unit,))
|
||||
for token in tokens
|
||||
]
|
||||
self.ae(result, expected_tokens)
|
||||
|
||||
|
||||
class TestTokenizer(BaseTest):
|
||||
|
||||
def run_test(self, func):
|
||||
for tokenize in tokenizers:
|
||||
func(self, tokenize)
|
||||
|
||||
def test_token_api(self):
|
||||
self.run_test(token_api)
|
||||
|
||||
def test_token_serialize_css(self):
|
||||
self.run_test(token_serialize_css)
|
||||
|
||||
def test_comments(self):
|
||||
self.run_test(comments)
|
||||
|
||||
def test_token_grouping(self):
|
||||
self.run_test(token_grouping)
|
||||
|
||||
def test_positions(self):
|
||||
"""Test the reported line/column position of each token."""
|
||||
self.run_test(positions)
|
||||
|
||||
def test_tokens(self):
|
||||
self.run_test(tokens)
|
||||
|
||||
450
ebook_converter/tinycss/token_data.py
Normal file
450
ebook_converter/tinycss/token_data.py
Normal file
@@ -0,0 +1,450 @@
|
||||
# coding: utf8
|
||||
"""
|
||||
tinycss.token_data
|
||||
------------------
|
||||
|
||||
Shared data for both implementations (Cython and Python) of the tokenizer.
|
||||
|
||||
:copyright: (c) 2012 by Simon Sapin.
|
||||
:license: BSD, see LICENSE for more details.
|
||||
"""
|
||||
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import re
|
||||
import sys
|
||||
import operator
|
||||
import functools
|
||||
import string
|
||||
|
||||
|
||||
# * Raw strings with the r'' notation are used so that \ do not need
|
||||
# to be escaped.
|
||||
# * Names and regexps are separated by a tabulation.
|
||||
# * Macros are re-ordered so that only previous definitions are needed.
|
||||
# * {} are used for macro substitution with ``string.Formatter``,
|
||||
# so other uses of { or } have been doubled.
|
||||
# * The syntax is otherwise compatible with re.compile.
|
||||
# * Some parentheses were added to add capturing groups.
|
||||
# (in unicode, DIMENSION and URI)
|
||||
|
||||
# *** Willful violation: ***
|
||||
# Numbers can take a + or - sign, but the sign is a separate DELIM token.
|
||||
# Since comments are allowed anywhere between tokens, this makes
|
||||
# the following this is valid. It means 10 negative pixels:
|
||||
# margin-top: -/**/10px
|
||||
|
||||
# This makes parsing numbers a pain, so instead we’ll do the same is Firefox
|
||||
# and make the sign part as of the 'num' macro. The above CSS will be invalid.
|
||||
# See discussion:
|
||||
# http://lists.w3.org/Archives/Public/www-style/2011Oct/0028.html
|
||||
MACROS = r'''
|
||||
nl \n|\r\n|\r|\f
|
||||
w [ \t\r\n\f]*
|
||||
nonascii [^\0-\237]
|
||||
unicode \\([0-9a-f]{{1,6}})(\r\n|[ \n\r\t\f])?
|
||||
simple_escape [^\n\r\f0-9a-f]
|
||||
escape {unicode}|\\{simple_escape}
|
||||
nmstart [_a-z]|{nonascii}|{escape}
|
||||
nmchar [_a-z0-9-]|{nonascii}|{escape}
|
||||
name {nmchar}+
|
||||
ident [-]?{nmstart}{nmchar}*
|
||||
num [-+]?(?:[0-9]*\.[0-9]+|[0-9]+)
|
||||
string1 \"([^\n\r\f\\"]|\\{nl}|{escape})*\"
|
||||
string2 \'([^\n\r\f\\']|\\{nl}|{escape})*\'
|
||||
string {string1}|{string2}
|
||||
badstring1 \"([^\n\r\f\\"]|\\{nl}|{escape})*\\?
|
||||
badstring2 \'([^\n\r\f\\']|\\{nl}|{escape})*\\?
|
||||
badstring {badstring1}|{badstring2}
|
||||
badcomment1 \/\*[^*]*\*+([^/*][^*]*\*+)*
|
||||
badcomment2 \/\*[^*]*(\*+[^/*][^*]*)*
|
||||
badcomment {badcomment1}|{badcomment2}
|
||||
baduri1 url\({w}([!#$%&*-~]|{nonascii}|{escape})*{w}
|
||||
baduri2 url\({w}{string}{w}
|
||||
baduri3 url\({w}{badstring}
|
||||
baduri {baduri1}|{baduri2}|{baduri3}
|
||||
'''.replace(r'\0', '\0').replace(r'\237', '\237')
|
||||
|
||||
# Removed these tokens. Instead, they’re tokenized as two DELIM each.
|
||||
# INCLUDES ~=
|
||||
# DASHMATCH |=
|
||||
# They are only used in selectors but selectors3 also have ^=, *= and $=.
|
||||
# We don’t actually parse selectors anyway
|
||||
|
||||
# Re-ordered so that the longest match is always the first.
|
||||
# For example, "url('foo')" matches URI, BAD_URI, FUNCTION and IDENT,
|
||||
# but URI would always be a longer match than the others.
|
||||
TOKENS = r'''
|
||||
S [ \t\r\n\f]+
|
||||
|
||||
URI url\({w}({string}|([!#$%&*-\[\]-~]|{nonascii}|{escape})*){w}\)
|
||||
BAD_URI {baduri}
|
||||
FUNCTION {ident}\(
|
||||
UNICODE-RANGE u\+[0-9a-f?]{{1,6}}(-[0-9a-f]{{1,6}})?
|
||||
IDENT {ident}
|
||||
|
||||
ATKEYWORD @{ident}
|
||||
HASH #{name}
|
||||
|
||||
DIMENSION ({num})({ident})
|
||||
PERCENTAGE {num}%
|
||||
NUMBER {num}
|
||||
|
||||
STRING {string}
|
||||
BAD_STRING {badstring}
|
||||
|
||||
COMMENT \/\*[^*]*\*+([^/*][^*]*\*+)*\/
|
||||
BAD_COMMENT {badcomment}
|
||||
|
||||
: :
|
||||
; ;
|
||||
{ \{{
|
||||
} \}}
|
||||
( \(
|
||||
) \)
|
||||
[ \[
|
||||
] \]
|
||||
CDO <!--
|
||||
CDC -->
|
||||
'''
|
||||
|
||||
|
||||
# Strings with {macro} expanded
|
||||
COMPILED_MACROS = {}
|
||||
|
||||
|
||||
COMPILED_TOKEN_REGEXPS = [] # [(name, regexp.match)] ordered
|
||||
COMPILED_TOKEN_INDEXES = {} # {name: i} helper for the C speedups
|
||||
|
||||
|
||||
# Indexed by codepoint value of the first character of a token.
|
||||
# Codepoints >= 160 (aka nonascii) all use the index 160.
|
||||
# values are (i, name, regexp.match)
|
||||
TOKEN_DISPATCH = []
|
||||
|
||||
|
||||
try:
|
||||
unichr
|
||||
except NameError:
|
||||
# Python 3
|
||||
unichr = chr
|
||||
unicode = str
|
||||
|
||||
|
||||
def _init():
|
||||
"""Import-time initialization."""
|
||||
COMPILED_MACROS.clear()
|
||||
for line in MACROS.splitlines():
|
||||
if line.strip():
|
||||
name, value = line.split('\t')
|
||||
COMPILED_MACROS[name.strip()] = '(?:%s)' \
|
||||
% value.format(**COMPILED_MACROS)
|
||||
|
||||
COMPILED_TOKEN_REGEXPS[:] = (
|
||||
(
|
||||
name.strip(),
|
||||
re.compile(
|
||||
value.format(**COMPILED_MACROS),
|
||||
# Case-insensitive when matching eg. uRL(foo)
|
||||
# but preserve the case in extracted groups
|
||||
re.I
|
||||
).match
|
||||
)
|
||||
for line in TOKENS.splitlines()
|
||||
if line.strip()
|
||||
for name, value in [line.split('\t')]
|
||||
)
|
||||
|
||||
COMPILED_TOKEN_INDEXES.clear()
|
||||
for i, (name, regexp) in enumerate(COMPILED_TOKEN_REGEXPS):
|
||||
COMPILED_TOKEN_INDEXES[name] = i
|
||||
|
||||
dispatch = [[] for i in range(161)]
|
||||
for chars, names in [
|
||||
(' \t\r\n\f', ['S']),
|
||||
('uU', ['URI', 'BAD_URI', 'UNICODE-RANGE']),
|
||||
# \ is an escape outside of another token
|
||||
(string.ascii_letters + '\\_-' + unichr(160), ['FUNCTION', 'IDENT']),
|
||||
(string.digits + '.+-', ['DIMENSION', 'PERCENTAGE', 'NUMBER']),
|
||||
('@', ['ATKEYWORD']),
|
||||
('#', ['HASH']),
|
||||
('\'"', ['STRING', 'BAD_STRING']),
|
||||
('/', ['COMMENT', 'BAD_COMMENT']),
|
||||
('<', ['CDO']),
|
||||
('-', ['CDC']),
|
||||
]:
|
||||
for char in chars:
|
||||
dispatch[ord(char)].extend(names)
|
||||
for char in ':;{}()[]':
|
||||
dispatch[ord(char)] = [char]
|
||||
|
||||
TOKEN_DISPATCH[:] = (
|
||||
[
|
||||
(index,) + COMPILED_TOKEN_REGEXPS[index]
|
||||
for name in names
|
||||
for index in [COMPILED_TOKEN_INDEXES[name]]
|
||||
]
|
||||
for names in dispatch
|
||||
)
|
||||
|
||||
_init()
|
||||
|
||||
|
||||
def _unicode_replace(match, int=int, unichr=unichr, maxunicode=sys.maxunicode):
|
||||
codepoint = int(match.group(1), 16)
|
||||
if codepoint <= maxunicode:
|
||||
return unichr(codepoint)
|
||||
else:
|
||||
return '\N{REPLACEMENT CHARACTER}' # U+FFFD
|
||||
|
||||
UNICODE_UNESCAPE = functools.partial(
|
||||
re.compile(COMPILED_MACROS['unicode'], re.I).sub,
|
||||
_unicode_replace)
|
||||
|
||||
NEWLINE_UNESCAPE = functools.partial(
|
||||
re.compile(r'()\\' + COMPILED_MACROS['nl']).sub,
|
||||
'')
|
||||
|
||||
SIMPLE_UNESCAPE = functools.partial(
|
||||
re.compile(r'\\(%s)' % COMPILED_MACROS['simple_escape'] , re.I).sub,
|
||||
# Same as r'\1', but faster on CPython
|
||||
operator.methodcaller('group', 1))
|
||||
|
||||
FIND_NEWLINES = lambda x : list(re.compile(COMPILED_MACROS['nl']).finditer(x))
|
||||
|
||||
|
||||
class Token(object):
|
||||
r"""A single atomic token.
|
||||
|
||||
.. attribute:: is_container
|
||||
|
||||
Always ``False``.
|
||||
Helps to tell :class:`Token` apart from :class:`ContainerToken`.
|
||||
|
||||
.. attribute:: type
|
||||
|
||||
The type of token as a string:
|
||||
|
||||
``S``
|
||||
A sequence of white space
|
||||
|
||||
``IDENT``
|
||||
An identifier: a name that does not start with a digit.
|
||||
A name is a sequence of letters, digits, ``_``, ``-``, escaped
|
||||
characters and non-ASCII characters. Eg: ``margin-left``
|
||||
|
||||
``HASH``
|
||||
``#`` followed immediately by a name. Eg: ``#ff8800``
|
||||
|
||||
``ATKEYWORD``
|
||||
``@`` followed immediately by an identifier. Eg: ``@page``
|
||||
|
||||
``URI``
|
||||
Eg: ``url(foo)`` The content may or may not be quoted.
|
||||
|
||||
``UNICODE-RANGE``
|
||||
``U+`` followed by one or two hexadecimal
|
||||
Unicode codepoints. Eg: ``U+20-00FF``
|
||||
|
||||
``INTEGER``
|
||||
An integer with an optional ``+`` or ``-`` sign
|
||||
|
||||
``NUMBER``
|
||||
A non-integer number with an optional ``+`` or ``-`` sign
|
||||
|
||||
``DIMENSION``
|
||||
An integer or number followed immediately by an
|
||||
identifier (the unit). Eg: ``12px``
|
||||
|
||||
``PERCENTAGE``
|
||||
An integer or number followed immediately by ``%``
|
||||
|
||||
``STRING``
|
||||
A string, quoted with ``"`` or ``'``
|
||||
|
||||
``:`` or ``;``
|
||||
That character.
|
||||
|
||||
``DELIM``
|
||||
A single character not matched in another token. Eg: ``,``
|
||||
|
||||
See the source of the :mod:`.token_data` module for the precise
|
||||
regular expressions that match various tokens.
|
||||
|
||||
Note that other token types exist in the early tokenization steps,
|
||||
but these are ignored, are syntax errors, or are later transformed
|
||||
into :class:`ContainerToken` or :class:`FunctionToken`.
|
||||
|
||||
.. attribute:: value
|
||||
|
||||
The parsed value:
|
||||
|
||||
* INTEGER, NUMBER, PERCENTAGE or DIMENSION tokens: the numeric value
|
||||
as an int or float.
|
||||
* STRING tokens: the unescaped string without quotes
|
||||
* URI tokens: the unescaped URI without quotes or
|
||||
``url(`` and ``)`` markers.
|
||||
* IDENT, ATKEYWORD or HASH tokens: the unescaped token,
|
||||
with ``@`` or ``#`` markers left as-is
|
||||
* Other tokens: same as :attr:`as_css`
|
||||
|
||||
*Unescaped* refers to the various escaping methods based on the
|
||||
backslash ``\`` character in CSS syntax.
|
||||
|
||||
.. attribute:: unit
|
||||
|
||||
* DIMENSION tokens: the normalized (unescaped, lower-case)
|
||||
unit name as a string. eg. ``'px'``
|
||||
* PERCENTAGE tokens: the string ``'%'``
|
||||
* Other tokens: ``None``
|
||||
|
||||
.. attribute:: line
|
||||
|
||||
The line number in the CSS source of the start of this token.
|
||||
|
||||
.. attribute:: column
|
||||
|
||||
The column number (inside a source line) of the start of this token.
|
||||
|
||||
"""
|
||||
is_container = False
|
||||
__slots__ = 'type', '_as_css', 'value', 'unit', 'line', 'column'
|
||||
|
||||
def __init__(self, type_, css_value, value, unit, line, column):
|
||||
self.type = type_
|
||||
self._as_css = css_value
|
||||
self.value = value
|
||||
self.unit = unit
|
||||
self.line = line
|
||||
self.column = column
|
||||
|
||||
def as_css(self):
|
||||
"""
|
||||
Return as an Unicode string the CSS representation of the token,
|
||||
as parsed in the source.
|
||||
"""
|
||||
return self._as_css
|
||||
|
||||
def __repr__(self):
|
||||
return ('<Token {0.type} at {0.line}:{0.column} {0.value!r}{1}>'
|
||||
.format(self, self.unit or ''))
|
||||
|
||||
|
||||
class ContainerToken(object):
|
||||
"""A token that contains other (nested) tokens.
|
||||
|
||||
.. attribute:: is_container
|
||||
|
||||
Always ``True``.
|
||||
Helps to tell :class:`ContainerToken` apart from :class:`Token`.
|
||||
|
||||
.. attribute:: type
|
||||
|
||||
The type of token as a string. One of ``{``, ``(``, ``[`` or
|
||||
``FUNCTION``. For ``FUNCTION``, the object is actually a
|
||||
:class:`FunctionToken`.
|
||||
|
||||
.. attribute:: unit
|
||||
|
||||
Always ``None``. Included to make :class:`ContainerToken` behave
|
||||
more like :class:`Token`.
|
||||
|
||||
.. attribute:: content
|
||||
|
||||
A list of :class:`Token` or nested :class:`ContainerToken`,
|
||||
not including the opening or closing token.
|
||||
|
||||
.. attribute:: line
|
||||
|
||||
The line number in the CSS source of the start of this token.
|
||||
|
||||
.. attribute:: column
|
||||
|
||||
The column number (inside a source line) of the start of this token.
|
||||
|
||||
"""
|
||||
is_container = True
|
||||
unit = None
|
||||
__slots__ = 'type', '_css_start', '_css_end', 'content', 'line', 'column'
|
||||
|
||||
def __init__(self, type_, css_start, css_end, content, line, column):
|
||||
self.type = type_
|
||||
self._css_start = css_start
|
||||
self._css_end = css_end
|
||||
self.content = content
|
||||
self.line = line
|
||||
self.column = column
|
||||
|
||||
def as_css(self):
|
||||
"""
|
||||
Return as an Unicode string the CSS representation of the token,
|
||||
as parsed in the source.
|
||||
"""
|
||||
parts = [self._css_start]
|
||||
parts.extend(token.as_css() for token in self.content)
|
||||
parts.append(self._css_end)
|
||||
return ''.join(parts)
|
||||
|
||||
format_string = '<ContainerToken {0.type} at {0.line}:{0.column}>'
|
||||
|
||||
def __repr__(self):
|
||||
return (self.format_string + ' {0.content}').format(self)
|
||||
|
||||
|
||||
class FunctionToken(ContainerToken):
|
||||
"""A specialized :class:`ContainerToken` for a ``FUNCTION`` group.
|
||||
Has an additional attribute:
|
||||
|
||||
.. attribute:: function_name
|
||||
|
||||
The unescaped name of the function, with the ``(`` marker removed.
|
||||
|
||||
"""
|
||||
__slots__ = 'function_name',
|
||||
|
||||
def __init__(self, type_, css_start, css_end, function_name, content,
|
||||
line, column):
|
||||
super(FunctionToken, self).__init__(
|
||||
type_, css_start, css_end, content, line, column)
|
||||
# Remove the ( marker:
|
||||
self.function_name = function_name[:-1]
|
||||
|
||||
format_string = ('<FunctionToken {0.function_name}() at '
|
||||
'{0.line}:{0.column}>')
|
||||
|
||||
|
||||
class TokenList(list):
|
||||
"""
|
||||
A mixed list of :class:`~.token_data.Token` and
|
||||
:class:`~.token_data.ContainerToken` objects.
|
||||
|
||||
This is a subclass of the builtin :class:`~builtins.list` type.
|
||||
It can be iterated, indexed and sliced as usual, but also has some
|
||||
additional API:
|
||||
|
||||
"""
|
||||
@property
|
||||
def line(self):
|
||||
"""The line number in the CSS source of the first token."""
|
||||
return self[0].line
|
||||
|
||||
@property
|
||||
def column(self):
|
||||
"""The column number (inside a source line) of the first token."""
|
||||
return self[0].column
|
||||
|
||||
def as_css(self):
|
||||
"""
|
||||
Return as an Unicode string the CSS representation of the tokens,
|
||||
as parsed in the source.
|
||||
"""
|
||||
return ''.join(token.as_css() for token in self)
|
||||
|
||||
def load_c_tokenizer():
|
||||
from calibre.constants import plugins
|
||||
tokenizer, err = plugins['tokenizer']
|
||||
if err:
|
||||
raise RuntimeError('Failed to load module tokenizer: %s' % err)
|
||||
tokens = list(':;(){}[]') + ['DELIM', 'INTEGER', 'STRING']
|
||||
tokenizer.init(COMPILED_TOKEN_REGEXPS, UNICODE_UNESCAPE, NEWLINE_UNESCAPE, SIMPLE_UNESCAPE, FIND_NEWLINES, TOKEN_DISPATCH, COMPILED_TOKEN_INDEXES, *tokens)
|
||||
return tokenizer
|
||||
504
ebook_converter/tinycss/tokenizer.c
Normal file
504
ebook_converter/tinycss/tokenizer.c
Normal file
@@ -0,0 +1,504 @@
|
||||
/*
|
||||
* tokenizer.c
|
||||
* Copyright (C) 2014 Kovid Goyal <kovid at kovidgoyal.net>
|
||||
*
|
||||
* Distributed under terms of the GPL3 license.
|
||||
*/
|
||||
|
||||
#define UNICODE
|
||||
#define PY_SSIZE_T_CLEAN
|
||||
#include <Python.h>
|
||||
#include <structmember.h>
|
||||
|
||||
// Token type definition {{{
|
||||
typedef struct {
|
||||
PyObject_HEAD
|
||||
// Type-specific fields go here.
|
||||
PyObject *is_container;
|
||||
PyObject *type;
|
||||
PyObject *_as_css;
|
||||
PyObject *value;
|
||||
PyObject *unit;
|
||||
PyObject *line;
|
||||
PyObject *column;
|
||||
|
||||
} tokenizer_Token;
|
||||
|
||||
static void
|
||||
tokenizer_Token_dealloc(tokenizer_Token* self)
|
||||
{
|
||||
Py_XDECREF(self->is_container); self->is_container = NULL;
|
||||
Py_XDECREF(self->type); self->type = NULL;
|
||||
Py_XDECREF(self->_as_css); self->_as_css = NULL;
|
||||
Py_XDECREF(self->value); self->value = NULL;
|
||||
Py_XDECREF(self->unit); self->unit = NULL;
|
||||
Py_XDECREF(self->line); self->line = NULL;
|
||||
Py_XDECREF(self->column); self->column = NULL;
|
||||
Py_TYPE(self)->tp_free((PyObject*)self);
|
||||
}
|
||||
|
||||
|
||||
static PyObject *
|
||||
tokenizer_Token_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
|
||||
{
|
||||
tokenizer_Token *self = NULL;
|
||||
self = (tokenizer_Token *)type->tp_alloc(type, 0);
|
||||
if (self == NULL) return PyErr_NoMemory();
|
||||
|
||||
if (!PyArg_ParseTuple(args, "OOOOOO", &(self->type), &(self->_as_css), &(self->value), &(self->unit), &(self->line), &(self->column))) {
|
||||
Py_TYPE(self)->tp_free((PyObject *) self);
|
||||
return NULL;
|
||||
}
|
||||
Py_INCREF(self->type); Py_INCREF(self->_as_css); Py_INCREF(self->value); Py_INCREF(self->unit); Py_INCREF(self->line); Py_INCREF(self->column);
|
||||
self->is_container = Py_False; Py_INCREF(self->is_container);
|
||||
|
||||
return (PyObject *)self;
|
||||
}
|
||||
|
||||
#if PY_MAJOR_VERSION >= 3
|
||||
#define PyObject_Unicode_Compat(arg) PyObject_Str(arg)
|
||||
#else
|
||||
#define PyObject_Unicode_Compat(arg) PyObject_Unicode(arg)
|
||||
#endif
|
||||
|
||||
static PyObject *
|
||||
tokenizer_Token_repr(tokenizer_Token *self) {
|
||||
PyObject *type = NULL, *line = NULL, *column = NULL, *value = NULL, *ans = NULL, *unit = NULL;
|
||||
if (!self->type || !self->line || !self->column || !self->value)
|
||||
return PyBytes_FromString("<Token NULL fields>");
|
||||
type = PyObject_Unicode_Compat(self->type);
|
||||
line = PyObject_Unicode_Compat(self->line);
|
||||
column = PyObject_Unicode_Compat(self->column);
|
||||
value = PyObject_Unicode_Compat(self->value);
|
||||
if (type && line && column && value) {
|
||||
if (self->unit != NULL && PyObject_IsTrue(self->unit)) {
|
||||
unit = PyObject_Unicode_Compat(self->unit);
|
||||
if (unit != NULL)
|
||||
ans = PyUnicode_FromFormat("<Token %U at %U:%U %U%U>", type, line, column, value, unit);
|
||||
else
|
||||
PyErr_NoMemory();
|
||||
} else
|
||||
ans = PyUnicode_FromFormat("<Token %U at %U:%U %U>", type, line, column, value);
|
||||
} else PyErr_NoMemory();
|
||||
Py_XDECREF(type); Py_XDECREF(line); Py_XDECREF(column); Py_XDECREF(value); Py_XDECREF(unit);
|
||||
return ans;
|
||||
}
|
||||
|
||||
static PyObject *
|
||||
tokenizer_Token_as_css(tokenizer_Token *self, PyObject *args, PyObject *kwargs) {
|
||||
if (!self->_as_css) {
|
||||
Py_RETURN_NONE;
|
||||
}
|
||||
Py_INCREF(self->_as_css);
|
||||
return self->_as_css;
|
||||
}
|
||||
|
||||
static PyMemberDef tokenizer_Token_members[] = {
|
||||
{"is_container", T_OBJECT_EX, offsetof(tokenizer_Token, is_container), 0, "False unless this token is a container for other tokens"},
|
||||
{"type", T_OBJECT_EX, offsetof(tokenizer_Token, type), 0, "The token type"},
|
||||
{"_as_css", T_OBJECT_EX, offsetof(tokenizer_Token, _as_css), 0, "Internal variable, use as_css() method instead."},
|
||||
{"value", T_OBJECT_EX, offsetof(tokenizer_Token, value), 0, "The token value"},
|
||||
{"unit", T_OBJECT_EX, offsetof(tokenizer_Token, unit), 0, "The token unit"},
|
||||
{"line", T_OBJECT_EX, offsetof(tokenizer_Token, line), 0, "The token line number"},
|
||||
{"column", T_OBJECT_EX, offsetof(tokenizer_Token, column), 0, "The token column number"},
|
||||
{NULL} /* Sentinel */
|
||||
};
|
||||
|
||||
static PyMethodDef tokenizer_Token_methods[] = {
|
||||
{"as_css", (PyCFunction)tokenizer_Token_as_css, METH_VARARGS,
|
||||
"as_css() -> Return the CSS representation of this token"
|
||||
},
|
||||
|
||||
{NULL} /* Sentinel */
|
||||
};
|
||||
|
||||
static PyTypeObject tokenizer_TokenType = { // {{{
|
||||
PyVarObject_HEAD_INIT(NULL, 0)
|
||||
/* tp_name */ "tokenizer.Token",
|
||||
/* tp_basicsize */ sizeof(tokenizer_Token),
|
||||
/* tp_itemsize */ 0,
|
||||
/* tp_dealloc */ (destructor) tokenizer_Token_dealloc,
|
||||
/* tp_print */ 0,
|
||||
/* tp_getattr */ 0,
|
||||
/* tp_setattr */ 0,
|
||||
/* tp_compare */ 0,
|
||||
/* tp_repr */ (reprfunc) tokenizer_Token_repr,
|
||||
/* tp_as_number */ 0,
|
||||
/* tp_as_sequence */ 0,
|
||||
/* tp_as_mapping */ 0,
|
||||
/* tp_hash */ 0,
|
||||
/* tp_call */ 0,
|
||||
/* tp_str */ 0,
|
||||
/* tp_getattro */ 0,
|
||||
/* tp_setattro */ 0,
|
||||
/* tp_as_buffer */ 0,
|
||||
/* tp_flags */ Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE,
|
||||
/* tp_doc */ "Token",
|
||||
/* tp_traverse */ 0,
|
||||
/* tp_clear */ 0,
|
||||
/* tp_richcompare */ 0,
|
||||
/* tp_weaklistoffset */ 0,
|
||||
/* tp_iter */ 0,
|
||||
/* tp_iternext */ 0,
|
||||
/* tp_methods */ tokenizer_Token_methods,
|
||||
/* tp_members */ tokenizer_Token_members,
|
||||
/* tp_getset */ 0,
|
||||
/* tp_base */ 0,
|
||||
/* tp_dict */ 0,
|
||||
/* tp_descr_get */ 0,
|
||||
/* tp_descr_set */ 0,
|
||||
/* tp_dictoffset */ 0,
|
||||
/* tp_init */ 0,
|
||||
/* tp_alloc */ 0,
|
||||
/* tp_new */ tokenizer_Token_new,
|
||||
}; // }}}
|
||||
// }}}
|
||||
|
||||
static PyObject *COMPILED_TOKEN_REGEXPS = NULL, *UNICODE_UNESCAPE = NULL, *NEWLINE_UNESCAPE = NULL, *SIMPLE_UNESCAPE = NULL, *FIND_NEWLINES = NULL, *TOKEN_DISPATCH = NULL;
|
||||
static PyObject *COLON = NULL, *SCOLON = NULL, *LPAR = NULL, *RPAR = NULL, *LBRACE = NULL, *RBRACE = NULL, *LBOX = NULL, *RBOX = NULL, *DELIM_TOK = NULL, *INTEGER = NULL, *STRING_TOK = NULL;
|
||||
|
||||
static Py_ssize_t BAD_COMMENT, BAD_STRING, PERCENTAGE, DIMENSION, ATKEYWORD, FUNCTION, COMMENT, NUMBER, STRING, IDENT, HASH, URI, DELIM = -1;
|
||||
|
||||
#define CLEANUP(x) Py_XDECREF((x)); x = NULL;
|
||||
|
||||
static PyObject*
|
||||
tokenize_cleanup(PyObject *self, PyObject *args) {
|
||||
CLEANUP(COMPILED_TOKEN_REGEXPS); CLEANUP(UNICODE_UNESCAPE); CLEANUP(NEWLINE_UNESCAPE); CLEANUP(SIMPLE_UNESCAPE); CLEANUP(FIND_NEWLINES); CLEANUP(TOKEN_DISPATCH);
|
||||
CLEANUP(COLON); CLEANUP(SCOLON); CLEANUP(LPAR); CLEANUP(RPAR); CLEANUP(LBRACE); CLEANUP(RBRACE); CLEANUP(LBOX); CLEANUP(RBOX); CLEANUP(DELIM_TOK); CLEANUP(INTEGER); CLEANUP(STRING_TOK);
|
||||
Py_RETURN_NONE;
|
||||
}
|
||||
|
||||
static PyObject*
|
||||
tokenize_init(PyObject *self, PyObject *args) {
|
||||
PyObject *cti = NULL;
|
||||
|
||||
if (COMPILED_TOKEN_REGEXPS != NULL) {
|
||||
tokenize_cleanup(NULL, NULL);
|
||||
}
|
||||
if (!PyArg_ParseTuple(args, "OOOOOOOOOOOOOOOOOO", &COMPILED_TOKEN_REGEXPS, &UNICODE_UNESCAPE, &NEWLINE_UNESCAPE, &SIMPLE_UNESCAPE, &FIND_NEWLINES, &TOKEN_DISPATCH, &cti, &COLON, &SCOLON, &LPAR, &RPAR, &LBRACE, &RBRACE, &LBOX, &RBOX, &DELIM_TOK, &INTEGER, &STRING_TOK)) return NULL;
|
||||
Py_INCREF(COMPILED_TOKEN_REGEXPS); Py_INCREF(UNICODE_UNESCAPE); Py_INCREF(NEWLINE_UNESCAPE); Py_INCREF(SIMPLE_UNESCAPE); Py_INCREF(FIND_NEWLINES); Py_INCREF(TOKEN_DISPATCH);
|
||||
Py_INCREF(COLON); Py_INCREF(SCOLON); Py_INCREF(LPAR); Py_INCREF(RPAR); Py_INCREF(LBRACE); Py_INCREF(RBRACE); Py_INCREF(LBOX); Py_INCREF(RBOX); Py_INCREF(DELIM_TOK); Py_INCREF(INTEGER); Py_INCREF(STRING_TOK);
|
||||
|
||||
#define SETCONST(x) do { (x) = PyNumber_AsSsize_t(PyDict_GetItemString(cti, #x), PyExc_OverflowError); \
|
||||
if((x) == -1 && PyErr_Occurred() != NULL) { return NULL; } \
|
||||
} while(0)
|
||||
SETCONST(BAD_COMMENT); SETCONST(BAD_STRING); SETCONST(PERCENTAGE); SETCONST(DIMENSION); SETCONST(ATKEYWORD); SETCONST(FUNCTION); SETCONST(COMMENT); SETCONST(NUMBER); SETCONST(STRING); SETCONST(IDENT); SETCONST(HASH); SETCONST(URI);
|
||||
|
||||
Py_RETURN_NONE;
|
||||
}
|
||||
|
||||
#if PY_VERSION_HEX >= 0x03030000
|
||||
#define ITER_CODE_PTS(unicode_object) { \
|
||||
int _kind = PyUnicode_KIND(unicode_object); \
|
||||
void *_data = PyUnicode_DATA(unicode_object); \
|
||||
for (Py_ssize_t iteridx = 0; iteridx < PyUnicode_GET_LENGTH(unicode_object); iteridx++) { \
|
||||
Py_UCS4 ch = PyUnicode_READ(_kind, _data, iteridx);
|
||||
#else
|
||||
#define PyUnicode_GET_LENGTH PyUnicode_GET_SIZE
|
||||
#define ITER_CODE_PTS(unicode_object) { \
|
||||
Py_UNICODE *_data = PyUnicode_AS_UNICODE(unicode_object); \
|
||||
Py_ssize_t iteridx; \
|
||||
for (iteridx = 0; iteridx < PyUnicode_GET_LENGTH(unicode_object); iteridx++) { \
|
||||
Py_UNICODE ch = _data[iteridx];
|
||||
#endif
|
||||
|
||||
#define END_ITER_CODE_PTS }}
|
||||
|
||||
static PyObject *unicode_to_number(PyObject *src) {
|
||||
#if PY_MAJOR_VERSION >= 3
|
||||
PyObject* ans = PyFloat_FromString(src);
|
||||
#else
|
||||
PyObject* ans = PyFloat_FromString(src, NULL);
|
||||
#endif
|
||||
double val = PyFloat_AsDouble(ans);
|
||||
long lval = (long)val;
|
||||
if (val - lval != 0) return ans;
|
||||
Py_DECREF(ans);
|
||||
#if PY_MAJOR_VERSION >= 3
|
||||
return PyLong_FromLong(lval);
|
||||
#else
|
||||
return PyInt_FromLong(lval);
|
||||
#endif
|
||||
}
|
||||
|
||||
|
||||
static void lowercase(PyObject *x) {
|
||||
ITER_CODE_PTS(x)
|
||||
if ('A' <= ch && ch <= 'Z') {
|
||||
#if PY_VERSION_HEX >= 0x03030000
|
||||
PyUnicode_WRITE(_kind, _data, iteridx, ch + 32);
|
||||
#else
|
||||
_data[iteridx] += 32;
|
||||
#endif
|
||||
}
|
||||
END_ITER_CODE_PTS
|
||||
}
|
||||
|
||||
static PyObject*
|
||||
clone_unicode(const PyObject* src, Py_ssize_t start_offset, Py_ssize_t end_offset) {
|
||||
#if PY_VERSION_HEX >= 0x03030000
|
||||
int kind = PyUnicode_KIND(src);
|
||||
void *data;
|
||||
switch(kind) {
|
||||
case PyUnicode_1BYTE_KIND:
|
||||
data = PyUnicode_1BYTE_DATA(src) + start_offset; break;
|
||||
case PyUnicode_2BYTE_KIND:
|
||||
data = PyUnicode_2BYTE_DATA(src) + start_offset; break;
|
||||
case PyUnicode_4BYTE_KIND:
|
||||
data = PyUnicode_4BYTE_DATA(src) + start_offset; break;
|
||||
default:
|
||||
PyErr_SetString(PyExc_RuntimeError, "Invalid byte kind for unicode object");
|
||||
return NULL;
|
||||
}
|
||||
return PyUnicode_FromKindAndData(kind, data, PyUnicode_GET_LENGTH(src) - start_offset - end_offset);
|
||||
#else
|
||||
return PyUnicode_FromUnicode(PyUnicode_AS_UNICODE(src) + start_offset, PyUnicode_GET_LENGTH(src) - start_offset - end_offset);
|
||||
#endif
|
||||
}
|
||||
|
||||
static PyObject*
|
||||
tokenize_flat(PyObject *self, PyObject *args) {
|
||||
#if PY_VERSION_HEX >= 0x03030000
|
||||
void *css_source = NULL; int css_kind; Py_UCS4 c = 0, codepoint = 0;
|
||||
#define first_char(string) PyUnicode_READ_CHAR(string, 0)
|
||||
#define unicode_from_data(data, sz) PyUnicode_FromKindAndData(css_kind, data, sz)
|
||||
#else
|
||||
Py_UNICODE *css_source = NULL, c = 0, codepoint = 0;
|
||||
#define first_char(string) PyUnicode_AS_UNICODE(string)[0]
|
||||
#define unicode_from_data(data, sz) PyUnicode_FromUnicode(data, sz)
|
||||
#endif
|
||||
PyObject *ic = NULL, *token = NULL, *tokens = NULL, *type_name = NULL, *css_value = NULL, *value = NULL, *unit = NULL, *tries = NULL, *match = NULL, *match_func = NULL, *py_source = NULL, *item = NULL, *newlines = NULL;
|
||||
int ignore_comments = 0;
|
||||
Py_ssize_t pos = 0, line = 1, column = 1, i = 0;
|
||||
Py_ssize_t length = 0, next_pos = 0, type_ = -1, source_len = 0;
|
||||
|
||||
|
||||
if (COMPILED_TOKEN_REGEXPS == NULL) {
|
||||
PyErr_SetString(PyExc_RuntimeError, "tokenizer module not initialized. You must call init() first."); return NULL;
|
||||
}
|
||||
|
||||
if (!PyArg_ParseTuple(args, "UO", &py_source, &ic)) return NULL;
|
||||
if (PyObject_IsTrue(ic)) ignore_comments = 1;
|
||||
#if PY_VERSION_HEX >= 0x03030000
|
||||
if (PyUnicode_READY(py_source) != 0) return NULL;
|
||||
css_source = PyUnicode_DATA(py_source); css_kind = PyUnicode_KIND(py_source);
|
||||
#else
|
||||
css_source = PyUnicode_AS_UNICODE(py_source);
|
||||
#endif
|
||||
source_len = PyUnicode_GET_LENGTH(py_source);
|
||||
|
||||
tokens = PyList_New(0);
|
||||
if (tokens == NULL) return PyErr_NoMemory();
|
||||
|
||||
#define UNESCAPE(x, func) item = PyObject_CallFunctionObjArgs(func, x, NULL); if (item == NULL) { goto error; } Py_DECREF(x); x = item; item = NULL;
|
||||
|
||||
#define TONUMBER(x) item = unicode_to_number(x); if (item == NULL) goto error; Py_DECREF(x); x = item; item = NULL;
|
||||
|
||||
#define SINGLE(x) { type_ = -1; type_name = x; Py_INCREF(type_name); css_value = x; Py_INCREF(css_value); }
|
||||
|
||||
while (pos < source_len) {
|
||||
#if PY_VERSION_HEX >= 0x03030000
|
||||
c = PyUnicode_READ(css_kind, css_source, pos);
|
||||
#else
|
||||
c = css_source[pos];
|
||||
#endif
|
||||
|
||||
css_value = NULL; type_name = NULL; value = NULL; unit = NULL; match = NULL;
|
||||
|
||||
if (c == ':') SINGLE(COLON) else if (c == ';') SINGLE(SCOLON) else if (c == '(') SINGLE(LPAR) else if (c == ')') SINGLE(RPAR) else if (c == '{') SINGLE(LBRACE) else if (c == '}') SINGLE(RBRACE) else if (c == '[') SINGLE(LBOX) else if (c == ']') SINGLE(RBOX) else
|
||||
{
|
||||
codepoint = (c > 160) ? 160: c;
|
||||
tries = PyList_GET_ITEM(TOKEN_DISPATCH, codepoint);
|
||||
for (i = 0; i < PyList_Size(tries); i++) {
|
||||
item = PyList_GET_ITEM(tries, i);
|
||||
match_func = PyTuple_GET_ITEM(item, 2);
|
||||
match = PyObject_CallFunction(match_func, "On", py_source, pos);
|
||||
if (match == NULL) { goto error; }
|
||||
if (match != Py_None) {
|
||||
css_value = PyObject_CallMethod(match, "group", NULL);
|
||||
if (css_value == NULL) { goto error; }
|
||||
type_ = PyNumber_AsSsize_t(PyTuple_GET_ITEM(item, 0), PyExc_OverflowError);
|
||||
if(type_ == -1 && PyErr_Occurred() != NULL) { goto error; }
|
||||
type_name = PyTuple_GET_ITEM(item, 1);
|
||||
Py_INCREF(type_name);
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (css_value == NULL) { // No match
|
||||
type_ = DELIM; type_name = DELIM_TOK; Py_INCREF(type_name); css_value = unicode_from_data(&c, 1);
|
||||
if (css_value == NULL) { goto error; }
|
||||
}
|
||||
}
|
||||
|
||||
length = PyUnicode_GET_LENGTH(css_value);
|
||||
next_pos = pos + length;
|
||||
|
||||
// Now calculate the value and unit for this token (if any)
|
||||
if (! (ignore_comments && (type_ == COMMENT || type_ == BAD_COMMENT))) {
|
||||
if (type_ == DIMENSION) {
|
||||
value = PyObject_CallMethod(match, "group", "I", 1);
|
||||
if (value == NULL) { goto error; }
|
||||
TONUMBER(value);
|
||||
unit = PyObject_CallMethod(match, "group", "I", 2);
|
||||
if (unit == NULL) { goto error; }
|
||||
UNESCAPE(unit, SIMPLE_UNESCAPE);
|
||||
UNESCAPE(unit, UNICODE_UNESCAPE);
|
||||
lowercase(unit);
|
||||
} else
|
||||
|
||||
if (type_ == PERCENTAGE) {
|
||||
if (PyUnicode_GET_LENGTH(css_value) > 0) {
|
||||
value = clone_unicode(css_value, 0, 1);
|
||||
if (value == NULL) goto error;
|
||||
} else { value = css_value; Py_INCREF(value); }
|
||||
if (value == NULL) goto error;
|
||||
TONUMBER(value);
|
||||
unit = PyUnicode_FromString("%");
|
||||
if (unit == NULL) goto error;
|
||||
} else
|
||||
|
||||
if (type_ == NUMBER) {
|
||||
value = css_value; Py_INCREF(value);
|
||||
TONUMBER(value);
|
||||
if (!PyFloat_Check(value)) {
|
||||
Py_XDECREF(type_name);
|
||||
type_name = INTEGER;
|
||||
Py_INCREF(type_name);
|
||||
}
|
||||
} else
|
||||
|
||||
if (type_ == IDENT || type_ == ATKEYWORD || type_ == HASH || type_ == FUNCTION) {
|
||||
value = PyObject_CallFunctionObjArgs(SIMPLE_UNESCAPE, css_value, NULL);
|
||||
if (value == NULL) goto error;
|
||||
UNESCAPE(value, UNICODE_UNESCAPE);
|
||||
} else
|
||||
|
||||
if (type_ == URI) {
|
||||
value = PyObject_CallMethod(match, "group", "I", 1);
|
||||
if (value == NULL) { goto error; }
|
||||
if (PyObject_IsTrue(value) && PyUnicode_GET_LENGTH(value) > 1 && (first_char(value) == '"' || first_char(value) == '\'')) {
|
||||
item = clone_unicode(value, 1, 1);
|
||||
if (item == NULL) goto error;
|
||||
Py_DECREF(value); value = item; item = NULL;
|
||||
UNESCAPE(value, NEWLINE_UNESCAPE);
|
||||
}
|
||||
UNESCAPE(value, SIMPLE_UNESCAPE);
|
||||
UNESCAPE(value, UNICODE_UNESCAPE);
|
||||
} else
|
||||
|
||||
if (type_ == STRING) {
|
||||
if (PyObject_IsTrue(css_value) && PyUnicode_GET_LENGTH(css_value) > 1) { // remove quotes
|
||||
value = clone_unicode(css_value, 1, 1);
|
||||
} else {
|
||||
value = css_value; Py_INCREF(value);
|
||||
}
|
||||
UNESCAPE(value, NEWLINE_UNESCAPE);
|
||||
UNESCAPE(value, SIMPLE_UNESCAPE);
|
||||
UNESCAPE(value, UNICODE_UNESCAPE);
|
||||
} else
|
||||
|
||||
if (type_ == BAD_STRING && next_pos == source_len) {
|
||||
Py_XDECREF(type_name); type_name = STRING_TOK; Py_INCREF(type_name);
|
||||
if (PyObject_IsTrue(css_value) && PyUnicode_GET_LENGTH(css_value) > 0) { // remove quote
|
||||
value = clone_unicode(css_value, 1, 0);
|
||||
} else {
|
||||
value = css_value; Py_INCREF(value);
|
||||
}
|
||||
UNESCAPE(value, NEWLINE_UNESCAPE);
|
||||
UNESCAPE(value, SIMPLE_UNESCAPE);
|
||||
UNESCAPE(value, UNICODE_UNESCAPE);
|
||||
} else {
|
||||
value = css_value; Py_INCREF(value);
|
||||
} // if(type_ == ...)
|
||||
|
||||
if (unit == NULL) { unit = Py_None; Py_INCREF(unit); }
|
||||
item = Py_BuildValue("OOOOnn", type_name, css_value, value, unit, line, column);
|
||||
if (item == NULL) goto error;
|
||||
token = PyObject_CallObject((PyObject *) &tokenizer_TokenType, item);
|
||||
Py_DECREF(item); item = NULL;
|
||||
if (token == NULL) goto error;
|
||||
if (PyList_Append(tokens, token) != 0) { Py_DECREF(token); token = NULL; goto error; }
|
||||
Py_DECREF(token);
|
||||
|
||||
} // if(!(ignore_comments...
|
||||
|
||||
Py_XDECREF(match); match = NULL;
|
||||
|
||||
pos = next_pos;
|
||||
newlines = PyObject_CallFunctionObjArgs(FIND_NEWLINES, css_value, NULL);
|
||||
if (newlines == NULL) goto error;
|
||||
Py_XDECREF(css_value); css_value = NULL; Py_XDECREF(type_name); type_name = NULL; Py_XDECREF(value); value = NULL; Py_XDECREF(unit); unit = NULL;
|
||||
if (PyObject_IsTrue(newlines)) {
|
||||
line += PyList_Size(newlines);
|
||||
item = PyObject_CallMethod(PyList_GET_ITEM(newlines, PyList_Size(newlines) - 1), "end", NULL);
|
||||
if (item == NULL) { Py_DECREF(newlines); newlines = NULL; goto error; }
|
||||
column = PyNumber_AsSsize_t(item, PyExc_OverflowError);
|
||||
if(column == -1 && PyErr_Occurred()) { Py_DECREF(newlines); newlines = NULL; goto error; }
|
||||
column = length - column + 1;
|
||||
Py_DECREF(item); item = NULL;
|
||||
} else column += length;
|
||||
Py_DECREF(newlines); newlines = NULL;
|
||||
|
||||
} // while (pos < ...)
|
||||
|
||||
return tokens;
|
||||
error:
|
||||
Py_XDECREF(tokens); Py_XDECREF(css_value); Py_XDECREF(type_name); Py_XDECREF(value); Py_XDECREF(unit); Py_XDECREF(match);
|
||||
return NULL;
|
||||
#undef unicode_from_data
|
||||
#undef first_char
|
||||
}
|
||||
|
||||
static PyMethodDef tokenizer_methods[] = {
|
||||
{"tokenize_flat", tokenize_flat, METH_VARARGS,
|
||||
"tokenize_flat(css_source, ignore_comments)\n\n Convert CSS source into a flat list of tokens"
|
||||
},
|
||||
|
||||
{"init", tokenize_init, METH_VARARGS,
|
||||
"init()\n\nInitialize the module."
|
||||
},
|
||||
|
||||
{"cleanup", tokenize_cleanup, METH_VARARGS,
|
||||
"cleanup()\n\nRelease resources allocated by init(). Safe to call multiple times."
|
||||
},
|
||||
|
||||
{NULL, NULL, 0, NULL}
|
||||
};
|
||||
|
||||
#if PY_MAJOR_VERSION >= 3
|
||||
#define INITERROR return NULL
|
||||
static struct PyModuleDef tokenizer_module = {
|
||||
/* m_base */ PyModuleDef_HEAD_INIT,
|
||||
/* m_name */ "tokenizer",
|
||||
/* m_doc */ "Implementation of tokenizer in C for speed.",
|
||||
/* m_size */ -1,
|
||||
/* m_methods */ tokenizer_methods,
|
||||
/* m_slots */ 0,
|
||||
/* m_traverse */ 0,
|
||||
/* m_clear */ 0,
|
||||
/* m_free */ 0,
|
||||
};
|
||||
|
||||
CALIBRE_MODINIT_FUNC PyInit_tokenizer(void) {
|
||||
if (PyType_Ready(&tokenizer_TokenType) < 0)
|
||||
INITERROR;
|
||||
|
||||
PyObject *mod = PyModule_Create(&tokenizer_module);
|
||||
#else
|
||||
#define INITERROR return
|
||||
CALIBRE_MODINIT_FUNC inittokenizer(void) {
|
||||
if (PyType_Ready(&tokenizer_TokenType) < 0)
|
||||
INITERROR;
|
||||
|
||||
PyObject *mod = Py_InitModule3("tokenizer", tokenizer_methods,
|
||||
"Implementation of tokenizer in C for speed.");
|
||||
#endif
|
||||
|
||||
if (mod == NULL) INITERROR;
|
||||
Py_INCREF(&tokenizer_TokenType);
|
||||
PyModule_AddObject(mod, "Token", (PyObject *) &tokenizer_TokenType);
|
||||
|
||||
#if PY_MAJOR_VERSION >= 3
|
||||
return mod;
|
||||
#endif
|
||||
}
|
||||
216
ebook_converter/tinycss/tokenizer.py
Normal file
216
ebook_converter/tinycss/tokenizer.py
Normal file
@@ -0,0 +1,216 @@
|
||||
# coding: utf8
|
||||
"""
|
||||
tinycss.tokenizer
|
||||
-----------------
|
||||
|
||||
Tokenizer for the CSS core syntax:
|
||||
http://www.w3.org/TR/CSS21/syndata.html#tokenization
|
||||
|
||||
This is the pure-python implementation. See also speedups.pyx
|
||||
|
||||
:copyright: (c) 2012 by Simon Sapin.
|
||||
:license: BSD, see LICENSE for more details.
|
||||
"""
|
||||
|
||||
from __future__ import unicode_literals
|
||||
|
||||
from tinycss import token_data
|
||||
|
||||
|
||||
def tokenize_flat(css_source, ignore_comments=True,
|
||||
# Make these local variable to avoid global lookups in the loop
|
||||
tokens_dispatch=token_data.TOKEN_DISPATCH,
|
||||
unicode_unescape=token_data.UNICODE_UNESCAPE,
|
||||
newline_unescape=token_data.NEWLINE_UNESCAPE,
|
||||
simple_unescape=token_data.SIMPLE_UNESCAPE,
|
||||
find_newlines=token_data.FIND_NEWLINES,
|
||||
Token=token_data.Token,
|
||||
len=len,
|
||||
int=int,
|
||||
float=float,
|
||||
list=list,
|
||||
_None=None,
|
||||
):
|
||||
"""
|
||||
:param css_source:
|
||||
CSS as an unicode string
|
||||
:param ignore_comments:
|
||||
if true (the default) comments will not be included in the
|
||||
return value
|
||||
:return:
|
||||
An iterator of :class:`Token`
|
||||
|
||||
"""
|
||||
|
||||
pos = 0
|
||||
line = 1
|
||||
column = 1
|
||||
source_len = len(css_source)
|
||||
tokens = []
|
||||
while pos < source_len:
|
||||
char = css_source[pos]
|
||||
if char in ':;{}()[]':
|
||||
type_ = char
|
||||
css_value = char
|
||||
else:
|
||||
codepoint = min(ord(char), 160)
|
||||
for _index, type_, regexp in tokens_dispatch[codepoint]:
|
||||
match = regexp(css_source, pos)
|
||||
if match is not None:
|
||||
# First match is the longest. See comments on TOKENS above.
|
||||
css_value = match.group()
|
||||
break
|
||||
else:
|
||||
# No match.
|
||||
# "Any other character not matched by the above rules,
|
||||
# and neither a single nor a double quote."
|
||||
# ... but quotes at the start of a token are always matched
|
||||
# by STRING or BAD_STRING. So DELIM is any single character.
|
||||
type_ = 'DELIM'
|
||||
css_value = char
|
||||
length = len(css_value)
|
||||
next_pos = pos + length
|
||||
|
||||
# A BAD_COMMENT is a comment at EOF. Ignore it too.
|
||||
if not (ignore_comments and type_ in ('COMMENT', 'BAD_COMMENT')):
|
||||
# Parse numbers, extract strings and URIs, unescape
|
||||
unit = _None
|
||||
if type_ == 'DIMENSION':
|
||||
value = match.group(1)
|
||||
value = float(value) if '.' in value else int(value)
|
||||
unit = match.group(2)
|
||||
unit = simple_unescape(unit)
|
||||
unit = unicode_unescape(unit)
|
||||
unit = unit.lower() # normalize
|
||||
elif type_ == 'PERCENTAGE':
|
||||
value = css_value[:-1]
|
||||
value = float(value) if '.' in value else int(value)
|
||||
unit = '%'
|
||||
elif type_ == 'NUMBER':
|
||||
value = css_value
|
||||
if '.' in value:
|
||||
value = float(value)
|
||||
else:
|
||||
value = int(value)
|
||||
type_ = 'INTEGER'
|
||||
elif type_ in ('IDENT', 'ATKEYWORD', 'HASH', 'FUNCTION'):
|
||||
value = simple_unescape(css_value)
|
||||
value = unicode_unescape(value)
|
||||
elif type_ == 'URI':
|
||||
value = match.group(1)
|
||||
if value and value[0] in '"\'':
|
||||
value = value[1:-1] # Remove quotes
|
||||
value = newline_unescape(value)
|
||||
value = simple_unescape(value)
|
||||
value = unicode_unescape(value)
|
||||
elif type_ == 'STRING':
|
||||
value = css_value[1:-1] # Remove quotes
|
||||
value = newline_unescape(value)
|
||||
value = simple_unescape(value)
|
||||
value = unicode_unescape(value)
|
||||
# BAD_STRING can only be one of:
|
||||
# * Unclosed string at the end of the stylesheet:
|
||||
# Close the string, but this is not an error.
|
||||
# Make it a "good" STRING token.
|
||||
# * Unclosed string at the (unescaped) end of the line:
|
||||
# Close the string, but this is an error.
|
||||
# Leave it as a BAD_STRING, don’t bother parsing it.
|
||||
# See http://www.w3.org/TR/CSS21/syndata.html#parsing-errors
|
||||
elif type_ == 'BAD_STRING' and next_pos == source_len:
|
||||
type_ = 'STRING'
|
||||
value = css_value[1:] # Remove quote
|
||||
value = newline_unescape(value)
|
||||
value = simple_unescape(value)
|
||||
value = unicode_unescape(value)
|
||||
else:
|
||||
value = css_value
|
||||
tokens.append(Token(type_, css_value, value, unit, line, column))
|
||||
|
||||
pos = next_pos
|
||||
newlines = find_newlines(css_value)
|
||||
if newlines:
|
||||
line += len(newlines)
|
||||
# Add 1 to have lines start at column 1, not 0
|
||||
column = length - newlines[-1].end() + 1
|
||||
else:
|
||||
column += length
|
||||
return tokens
|
||||
|
||||
|
||||
def regroup(tokens):
|
||||
"""
|
||||
Match pairs of tokens: () [] {} function()
|
||||
(Strings in "" or '' are taken care of by the tokenizer.)
|
||||
|
||||
Opening tokens are replaced by a :class:`ContainerToken`.
|
||||
Closing tokens are removed. Unmatched closing tokens are invalid
|
||||
but left as-is. All nested structures that are still open at
|
||||
the end of the stylesheet are implicitly closed.
|
||||
|
||||
:param tokens:
|
||||
a *flat* iterable of tokens, as returned by :func:`tokenize_flat`.
|
||||
:return:
|
||||
A tree of tokens.
|
||||
|
||||
"""
|
||||
# "global" objects for the inner recursion
|
||||
pairs = {'FUNCTION': ')', '(': ')', '[': ']', '{': '}'}
|
||||
tokens = iter(tokens)
|
||||
eof = [False]
|
||||
|
||||
def _regroup_inner(stop_at=None,
|
||||
tokens=tokens, pairs=pairs, eof=eof,
|
||||
ContainerToken=token_data.ContainerToken,
|
||||
FunctionToken=token_data.FunctionToken):
|
||||
for token in tokens:
|
||||
type_ = token.type
|
||||
if type_ == stop_at:
|
||||
return
|
||||
|
||||
end = pairs.get(type_)
|
||||
if end is None:
|
||||
yield token # Not a grouping token
|
||||
else:
|
||||
assert not isinstance(token, ContainerToken), (
|
||||
'Token looks already grouped: {0}'.format(token))
|
||||
content = list(_regroup_inner(end))
|
||||
if eof[0]:
|
||||
end = '' # Implicit end of structure at EOF.
|
||||
if type_ == 'FUNCTION':
|
||||
yield FunctionToken(token.type, token.as_css(), end,
|
||||
token.value, content,
|
||||
token.line, token.column)
|
||||
else:
|
||||
yield ContainerToken(token.type, token.as_css(), end,
|
||||
content,
|
||||
token.line, token.column)
|
||||
else:
|
||||
eof[0] = True # end of file/stylesheet
|
||||
return _regroup_inner()
|
||||
|
||||
|
||||
def tokenize_grouped(css_source, ignore_comments=True):
|
||||
"""
|
||||
:param css_source:
|
||||
CSS as an unicode string
|
||||
:param ignore_comments:
|
||||
if true (the default) comments will not be included in the
|
||||
return value
|
||||
:return:
|
||||
An iterator of :class:`Token`
|
||||
|
||||
"""
|
||||
return regroup(tokenize_flat(css_source, ignore_comments))
|
||||
|
||||
|
||||
# Optional Cython version of tokenize_flat
|
||||
# Make both versions available with explicit names for tests.
|
||||
python_tokenize_flat = tokenize_flat
|
||||
|
||||
try:
|
||||
tok = token_data.load_c_tokenizer()
|
||||
except (ImportError, RuntimeError):
|
||||
c_tokenize_flat = None
|
||||
else:
|
||||
# Use the c tokenizer by default
|
||||
c_tokenize_flat = tokenize_flat = lambda s, ignore_comments=False:tok.tokenize_flat(s, ignore_comments)
|
||||
1
ebook_converter/tinycss/version.py
Normal file
1
ebook_converter/tinycss/version.py
Normal file
@@ -0,0 +1 @@
|
||||
VERSION = '0.3'
|
||||
Reference in New Issue
Block a user