Initial import

2026-02-18 15:35:48 +01:00 · 2020-03-31 17:15:23 +02:00
commit d97ea9b0bc
311 changed files with 131419 additions and 0 deletions
--- a/ebook_converter/tinycss/init.py
+++ b/ebook_converter/tinycss/init.py
@@ -0,0 +1,52 @@
+# coding: utf8
+"""
+    tinycss
+    -------
+
+    A CSS parser, and nothing else.
+
+    :copyright: (c) 2012 by Simon Sapin.
+    :license: BSD, see LICENSE for more details.
+"""
+
+from .version import VERSION
+__version__ = VERSION
+
+from tinycss.css21 import CSS21Parser
+from tinycss.page3 import CSSPage3Parser
+from tinycss.fonts3 import CSSFonts3Parser
+from tinycss.media3 import CSSMedia3Parser
+
+
+PARSER_MODULES = {
+    'page3': CSSPage3Parser,
+    'fonts3': CSSFonts3Parser,
+    'media3': CSSMedia3Parser,
+}
+
+
+def make_parser(*features, **kwargs):
+    """Make a parser object with the chosen features.
+
+    :param features:
+        Positional arguments are base classes the new parser class will extend.
+        The string ``'page3'`` is accepted as short for
+        :class:`~page3.CSSPage3Parser`.
+    :param kwargs:
+        Keyword arguments are passed to the parser’s constructor.
+    :returns:
+        An instance of a new subclass of :class:`CSS21Parser`
+
+    """
+    if features:
+        bases = tuple(PARSER_MODULES.get(f, f) for f in features)
+        parser_class = type('CustomCSSParser', bases + (CSS21Parser,), {})
+    else:
+        parser_class = CSS21Parser
+    return parser_class(**kwargs)
+
+
+def make_full_parser(**kwargs):
+    ''' A parser that parses all supported CSS 3 modules in addition to CSS 2.1 '''
+    features = tuple(PARSER_MODULES)
+    return make_parser(*features, **kwargs)
--- a/ebook_converter/tinycss/color3.py
+++ b/ebook_converter/tinycss/color3.py
@@ -0,0 +1,382 @@
+# coding: utf8
+"""
+    tinycss.colors3
+    ---------------
+
+    Parser for CSS 3 color values
+    http://www.w3.org/TR/css3-color/
+
+    This module does not provide anything that integrates in a parser class,
+    only functions that parse single tokens from (eg.) a property value.
+
+    :copyright: (c) 2012 by Simon Sapin.
+    :license: BSD, see LICENSE for more details.
+"""
+
+from __future__ import unicode_literals, division
+import collections
+import itertools
+import re
+
+from .tokenizer import tokenize_grouped
+
+
+class RGBA(collections.namedtuple('RGBA', ['red', 'green', 'blue', 'alpha'])):
+    """An RGBA color.
+
+    A tuple of four floats in the 0..1 range: ``(r, g, b, a)``.
+    Also has ``red``, ``green``, ``blue`` and ``alpha`` attributes to access
+    the same values.
+
+    """
+
+
+def parse_color_string(css_string):
+    """Parse a CSS string as a color value.
+
+    This is a convenience wrapper around :func:`parse_color` in case you
+    have a string that is not from a CSS stylesheet.
+
+    :param css_string:
+        An unicode string in CSS syntax.
+    :returns:
+        Same as :func:`parse_color`.
+
+    """
+    tokens = list(tokenize_grouped(css_string.strip()))
+    if len(tokens) == 1:
+        return parse_color(tokens[0])
+
+
+def parse_color(token):
+    """Parse single token as a color value.
+
+    :param token:
+        A single :class:`~.token_data.Token` or
+        :class:`~.token_data.ContainerToken`, as found eg. in a
+        property value.
+    :returns:
+        * ``None``, if the token is not a valid CSS 3 color value.
+          (No exception is raised.)
+        * For the *currentColor* keyword: the string ``'currentColor'``
+        * Every other values (including keywords, HSL and HSLA) is converted
+          to RGBA and returned as an :class:`RGBA` object (a 4-tuple with
+          attribute access).
+          The alpha channel is clipped to [0, 1], but R, G, or B can be
+          out of range (eg. ``rgb(-51, 306, 0)`` is represented as
+          ``(-.2, 1.2, 0, 1)``.)
+
+    """
+    if token.type == 'IDENT':
+        return COLOR_KEYWORDS.get(token.value.lower())
+    elif token.type == 'HASH':
+        for multiplier, regexp in HASH_REGEXPS:
+            match = regexp(token.value)
+            if match:
+                r, g, b = [int(group * multiplier, 16) / 255
+                           for group in match.groups()]
+                return RGBA(r, g, b, 1.)
+    elif token.type == 'FUNCTION':
+        args = parse_comma_separated(token.content)
+        if args:
+            name = token.function_name.lower()
+            if name == 'rgb':
+                return parse_rgb(args, alpha=1.)
+            elif name == 'rgba':
+                alpha = parse_alpha(args[3:])
+                if alpha is not None:
+                    return parse_rgb(args[:3], alpha)
+            elif name == 'hsl':
+                return parse_hsl(args, alpha=1.)
+            elif name == 'hsla':
+                alpha = parse_alpha(args[3:])
+                if alpha is not None:
+                    return parse_hsl(args[:3], alpha)
+
+
+def parse_alpha(args):
+    """
+    If args is a list of a single INTEGER or NUMBER token,
+    retur its value clipped to the 0..1 range
+    Otherwise, return None.
+    """
+    if len(args) == 1 and args[0].type in ('NUMBER', 'INTEGER'):
+        return min(1, max(0, args[0].value))
+
+
+def parse_rgb(args, alpha):
+    """
+    If args is a list of 3 INTEGER tokens or 3 PERCENTAGE tokens,
+    return RGB values as a tuple of 3 floats in 0..1.
+    Otherwise, return None.
+    """
+    types = [arg.type for arg in args]
+    if types == ['INTEGER', 'INTEGER', 'INTEGER']:
+        r, g, b = [arg.value / 255 for arg in args[:3]]
+        return RGBA(r, g, b, alpha)
+    elif types == ['PERCENTAGE', 'PERCENTAGE', 'PERCENTAGE']:
+        r, g, b = [arg.value / 100 for arg in args[:3]]
+        return RGBA(r, g, b, alpha)
+
+
+def parse_hsl(args, alpha):
+    """
+    If args is a list of 1 INTEGER token and 2 PERCENTAGE tokens,
+    return RGB values as a tuple of 3 floats in 0..1.
+    Otherwise, return None.
+    """
+    types = [arg.type for arg in args]
+    if types == ['INTEGER', 'PERCENTAGE', 'PERCENTAGE']:
+        hsl = [arg.value for arg in args[:3]]
+        r, g, b = hsl_to_rgb(*hsl)
+        return RGBA(r, g, b, alpha)
+
+
+def hsl_to_rgb(hue, saturation, lightness):
+    """
+    :param hue: degrees
+    :param saturation: percentage
+    :param lightness: percentage
+    :returns: (r, g, b) as floats in the 0..1 range
+    """
+    hue = (hue / 360) % 1
+    saturation = min(1, max(0, saturation / 100))
+    lightness = min(1, max(0, lightness / 100))
+
+    # Translated from ABC: http://www.w3.org/TR/css3-color/#hsl-color
+    def hue_to_rgb(m1, m2, h):
+        if h < 0:
+            h += 1
+        if h > 1:
+            h -= 1
+        if h * 6 < 1:
+            return m1 + (m2 - m1) * h * 6
+        if h * 2 < 1:
+            return m2
+        if h * 3 < 2:
+            return m1 + (m2 - m1) * (2 / 3 - h) * 6
+        return m1
+
+    if lightness <= 0.5:
+        m2 = lightness * (saturation + 1)
+    else:
+        m2 = lightness + saturation - lightness * saturation
+    m1 = lightness * 2 - m2
+    return (
+        hue_to_rgb(m1, m2, hue + 1 / 3),
+        hue_to_rgb(m1, m2, hue),
+        hue_to_rgb(m1, m2, hue - 1 / 3),
+    )
+
+
+def parse_comma_separated(tokens):
+    """Parse a list of tokens (typically the content of a function token)
+    as arguments made of a single token each, separated by mandatory commas,
+    with optional white space around each argument.
+
+    return the argument list without commas or white space;
+    or None if the function token content do not match the description above.
+
+    """
+    tokens = [token for token in tokens if token.type != 'S']
+    if not tokens:
+        return []
+    if len(tokens) % 2 == 1 and all(
+            token.type == 'DELIM' and token.value == ','
+            for token in tokens[1::2]):
+        return tokens[::2]
+
+
+HASH_REGEXPS = (
+    (2, re.compile(r'^#([\da-f])([\da-f])([\da-f])$', re.I).match),
+    (1, re.compile(r'^#([\da-f]{2})([\da-f]{2})([\da-f]{2})$', re.I).match),
+)
+
+
+# (r, g, b) in 0..255
+BASIC_COLOR_KEYWORDS = [
+    ('black', (0, 0, 0)),
+    ('silver', (192, 192, 192)),
+    ('gray', (128, 128, 128)),
+    ('white', (255, 255, 255)),
+    ('maroon', (128, 0, 0)),
+    ('red', (255, 0, 0)),
+    ('purple', (128, 0, 128)),
+    ('fuchsia', (255, 0, 255)),
+    ('green', (0, 128, 0)),
+    ('lime', (0, 255, 0)),
+    ('olive', (128, 128, 0)),
+    ('yellow', (255, 255, 0)),
+    ('navy', (0, 0, 128)),
+    ('blue', (0, 0, 255)),
+    ('teal', (0, 128, 128)),
+    ('aqua', (0, 255, 255)),
+]
+
+
+# (r, g, b) in 0..255
+EXTENDED_COLOR_KEYWORDS = [
+    ('aliceblue', (240, 248, 255)),
+    ('antiquewhite', (250, 235, 215)),
+    ('aqua', (0, 255, 255)),
+    ('aquamarine', (127, 255, 212)),
+    ('azure', (240, 255, 255)),
+    ('beige', (245, 245, 220)),
+    ('bisque', (255, 228, 196)),
+    ('black', (0, 0, 0)),
+    ('blanchedalmond', (255, 235, 205)),
+    ('blue', (0, 0, 255)),
+    ('blueviolet', (138, 43, 226)),
+    ('brown', (165, 42, 42)),
+    ('burlywood', (222, 184, 135)),
+    ('cadetblue', (95, 158, 160)),
+    ('chartreuse', (127, 255, 0)),
+    ('chocolate', (210, 105, 30)),
+    ('coral', (255, 127, 80)),
+    ('cornflowerblue', (100, 149, 237)),
+    ('cornsilk', (255, 248, 220)),
+    ('crimson', (220, 20, 60)),
+    ('cyan', (0, 255, 255)),
+    ('darkblue', (0, 0, 139)),
+    ('darkcyan', (0, 139, 139)),
+    ('darkgoldenrod', (184, 134, 11)),
+    ('darkgray', (169, 169, 169)),
+    ('darkgreen', (0, 100, 0)),
+    ('darkgrey', (169, 169, 169)),
+    ('darkkhaki', (189, 183, 107)),
+    ('darkmagenta', (139, 0, 139)),
+    ('darkolivegreen', (85, 107, 47)),
+    ('darkorange', (255, 140, 0)),
+    ('darkorchid', (153, 50, 204)),
+    ('darkred', (139, 0, 0)),
+    ('darksalmon', (233, 150, 122)),
+    ('darkseagreen', (143, 188, 143)),
+    ('darkslateblue', (72, 61, 139)),
+    ('darkslategray', (47, 79, 79)),
+    ('darkslategrey', (47, 79, 79)),
+    ('darkturquoise', (0, 206, 209)),
+    ('darkviolet', (148, 0, 211)),
+    ('deeppink', (255, 20, 147)),
+    ('deepskyblue', (0, 191, 255)),
+    ('dimgray', (105, 105, 105)),
+    ('dimgrey', (105, 105, 105)),
+    ('dodgerblue', (30, 144, 255)),
+    ('firebrick', (178, 34, 34)),
+    ('floralwhite', (255, 250, 240)),
+    ('forestgreen', (34, 139, 34)),
+    ('fuchsia', (255, 0, 255)),
+    ('gainsboro', (220, 220, 220)),
+    ('ghostwhite', (248, 248, 255)),
+    ('gold', (255, 215, 0)),
+    ('goldenrod', (218, 165, 32)),
+    ('gray', (128, 128, 128)),
+    ('green', (0, 128, 0)),
+    ('greenyellow', (173, 255, 47)),
+    ('grey', (128, 128, 128)),
+    ('honeydew', (240, 255, 240)),
+    ('hotpink', (255, 105, 180)),
+    ('indianred', (205, 92, 92)),
+    ('indigo', (75, 0, 130)),
+    ('ivory', (255, 255, 240)),
+    ('khaki', (240, 230, 140)),
+    ('lavender', (230, 230, 250)),
+    ('lavenderblush', (255, 240, 245)),
+    ('lawngreen', (124, 252, 0)),
+    ('lemonchiffon', (255, 250, 205)),
+    ('lightblue', (173, 216, 230)),
+    ('lightcoral', (240, 128, 128)),
+    ('lightcyan', (224, 255, 255)),
+    ('lightgoldenrodyellow', (250, 250, 210)),
+    ('lightgray', (211, 211, 211)),
+    ('lightgreen', (144, 238, 144)),
+    ('lightgrey', (211, 211, 211)),
+    ('lightpink', (255, 182, 193)),
+    ('lightsalmon', (255, 160, 122)),
+    ('lightseagreen', (32, 178, 170)),
+    ('lightskyblue', (135, 206, 250)),
+    ('lightslategray', (119, 136, 153)),
+    ('lightslategrey', (119, 136, 153)),
+    ('lightsteelblue', (176, 196, 222)),
+    ('lightyellow', (255, 255, 224)),
+    ('lime', (0, 255, 0)),
+    ('limegreen', (50, 205, 50)),
+    ('linen', (250, 240, 230)),
+    ('magenta', (255, 0, 255)),
+    ('maroon', (128, 0, 0)),
+    ('mediumaquamarine', (102, 205, 170)),
+    ('mediumblue', (0, 0, 205)),
+    ('mediumorchid', (186, 85, 211)),
+    ('mediumpurple', (147, 112, 219)),
+    ('mediumseagreen', (60, 179, 113)),
+    ('mediumslateblue', (123, 104, 238)),
+    ('mediumspringgreen', (0, 250, 154)),
+    ('mediumturquoise', (72, 209, 204)),
+    ('mediumvioletred', (199, 21, 133)),
+    ('midnightblue', (25, 25, 112)),
+    ('mintcream', (245, 255, 250)),
+    ('mistyrose', (255, 228, 225)),
+    ('moccasin', (255, 228, 181)),
+    ('navajowhite', (255, 222, 173)),
+    ('navy', (0, 0, 128)),
+    ('oldlace', (253, 245, 230)),
+    ('olive', (128, 128, 0)),
+    ('olivedrab', (107, 142, 35)),
+    ('orange', (255, 165, 0)),
+    ('orangered', (255, 69, 0)),
+    ('orchid', (218, 112, 214)),
+    ('palegoldenrod', (238, 232, 170)),
+    ('palegreen', (152, 251, 152)),
+    ('paleturquoise', (175, 238, 238)),
+    ('palevioletred', (219, 112, 147)),
+    ('papayawhip', (255, 239, 213)),
+    ('peachpuff', (255, 218, 185)),
+    ('peru', (205, 133, 63)),
+    ('pink', (255, 192, 203)),
+    ('plum', (221, 160, 221)),
+    ('powderblue', (176, 224, 230)),
+    ('purple', (128, 0, 128)),
+    ('red', (255, 0, 0)),
+    ('rosybrown', (188, 143, 143)),
+    ('royalblue', (65, 105, 225)),
+    ('saddlebrown', (139, 69, 19)),
+    ('salmon', (250, 128, 114)),
+    ('sandybrown', (244, 164, 96)),
+    ('seagreen', (46, 139, 87)),
+    ('seashell', (255, 245, 238)),
+    ('sienna', (160, 82, 45)),
+    ('silver', (192, 192, 192)),
+    ('skyblue', (135, 206, 235)),
+    ('slateblue', (106, 90, 205)),
+    ('slategray', (112, 128, 144)),
+    ('slategrey', (112, 128, 144)),
+    ('snow', (255, 250, 250)),
+    ('springgreen', (0, 255, 127)),
+    ('steelblue', (70, 130, 180)),
+    ('tan', (210, 180, 140)),
+    ('teal', (0, 128, 128)),
+    ('thistle', (216, 191, 216)),
+    ('tomato', (255, 99, 71)),
+    ('turquoise', (64, 224, 208)),
+    ('violet', (238, 130, 238)),
+    ('wheat', (245, 222, 179)),
+    ('white', (255, 255, 255)),
+    ('whitesmoke', (245, 245, 245)),
+    ('yellow', (255, 255, 0)),
+    ('yellowgreen', (154, 205, 50)),
+]
+
+
+# (r, g, b, a) in 0..1 or a string marker
+SPECIAL_COLOR_KEYWORDS = {
+    'currentcolor': 'currentColor',
+    'transparent': RGBA(0., 0., 0., 0.),
+}
+
+
+# RGBA namedtuples of (r, g, b, a) in 0..1 or a string marker
+COLOR_KEYWORDS = SPECIAL_COLOR_KEYWORDS.copy()
+COLOR_KEYWORDS.update(
+    # 255 maps to 1, 0 to 0, the rest is linear.
+    (keyword, RGBA(r / 255., g / 255., b / 255., 1.))
+    for keyword, (r, g, b) in itertools.chain(
+        BASIC_COLOR_KEYWORDS, EXTENDED_COLOR_KEYWORDS))
--- a/ebook_converter/tinycss/css21.py
+++ b/ebook_converter/tinycss/css21.py
@@ -0,0 +1,830 @@
+# coding: utf8
+"""
+    tinycss.css21
+    -------------
+
+    Parser for CSS 2.1
+    http://www.w3.org/TR/CSS21/syndata.html
+
+    :copyright: (c) 2012 by Simon Sapin.
+    :license: BSD, see LICENSE for more details.
+"""
+
+from __future__ import unicode_literals
+from itertools import chain, islice
+
+from tinycss.decoding import decode
+from tinycss.token_data import TokenList
+from tinycss.tokenizer import tokenize_grouped
+from tinycss.parsing import (
+    strip_whitespace, remove_whitespace, split_on_comma, validate_value,
+    validate_any, ParseError)
+
+
+#  stylesheet  : [ CDO | CDC | S | statement ]*;
+#  statement   : ruleset | at-rule;
+#  at-rule     : ATKEYWORD S* any* [ block | ';' S* ];
+#  block       : '{' S* [ any | block | ATKEYWORD S* | ';' S* ]* '}' S*;
+#  ruleset     : selector? '{' S* declaration? [ ';' S* declaration? ]* '}' S*;
+#  selector    : any+;
+#  declaration : property S* ':' S* value;
+#  property    : IDENT;
+#  value       : [ any | block | ATKEYWORD S* ]+;
+#  any         : [ IDENT | NUMBER | PERCENTAGE | DIMENSION | STRING
+#                | DELIM | URI | HASH | UNICODE-RANGE | INCLUDES
+#                | DASHMATCH | ':' | FUNCTION S* [any|unused]* ')'
+#                | '(' S* [any|unused]* ')' | '[' S* [any|unused]* ']'
+#                ] S*;
+#  unused      : block | ATKEYWORD S* | ';' S* | CDO S* | CDC S*;
+
+
+class Stylesheet(object):
+    """
+    A parsed CSS stylesheet.
+
+    .. attribute:: rules
+
+        A mixed list, in source order, of :class:`RuleSet` and various
+        at-rules such as :class:`ImportRule`, :class:`MediaRule`
+        and :class:`PageRule`.
+        Use their :obj:`at_keyword` attribute to distinguish them.
+
+    .. attribute:: errors
+
+        A list of :class:`~.parsing.ParseError`. Invalid rules and declarations
+        are ignored, with the details logged in this list.
+
+    .. attribute:: encoding
+
+        The character encoding that was used to decode the stylesheet
+        from bytes, or ``None`` for Unicode stylesheets.
+
+    """
+    def __init__(self, rules, errors, encoding):
+        self.rules = rules
+        self.errors = errors
+        self.encoding = encoding
+
+    def __repr__(self):
+        return '<{0.__class__.__name__} {1} rules {2} errors>'.format(
+            self, len(self.rules), len(self.errors))
+
+
+class AtRule(object):
+    """
+    An unparsed at-rule.
+
+    .. attribute:: at_keyword
+
+        The normalized (lower-case) at-keyword as a string. Eg: ``'@page'``
+
+    .. attribute:: head
+
+        The part of the at-rule between the at-keyword and the ``{``
+        marking the body, or the ``;`` marking the end of an at-rule without
+        a body.  A :class:`~.token_data.TokenList`.
+
+    .. attribute:: body
+
+        The content of the body between ``{`` and ``}`` as a
+        :class:`~.token_data.TokenList`, or ``None`` if there is no body
+        (ie. if the rule ends with ``;``).
+
+    The head was validated against the core grammar but **not** the body,
+    as the body might contain declarations. In case of an error in a
+    declaration, parsing should continue from the next declaration.
+    The whole rule should not be ignored as it would be for an error
+    in the head.
+
+    These at-rules are expected to be parsed further before reaching
+    the user API.
+
+    """
+
+    __slots__ = 'at_keyword', 'head', 'body', 'line', 'column'
+
+    def __init__(self, at_keyword, head, body, line, column):
+        self.at_keyword = at_keyword
+        self.head = TokenList(head)
+        self.body = TokenList(body) if body is not None else body
+        self.line = line
+        self.column = column
+
+    def __repr__(self):
+        return ('<{0.__class__.__name__} {0.line}:{0.column} {0.at_keyword}>'
+                .format(self))
+
+
+class RuleSet(object):
+    """A ruleset.
+
+    .. attribute:: at_keyword
+
+        Always ``None``. Helps to tell rulesets apart from at-rules.
+
+    .. attribute:: selector
+
+        The selector as a :class:`~.token_data.TokenList`.
+        In CSS 3, this is actually called a selector group.
+
+        ``rule.selector.as_css()`` gives the selector as a string.
+        This string can be used with *cssselect*, see :ref:`selectors3`.
+
+    .. attribute:: declarations
+
+        The list of :class:`Declaration`, in source order.
+
+    """
+
+    at_keyword = None
+    __slots__ = 'selector', 'declarations', 'line', 'column'
+
+    def __init__(self, selector, declarations, line, column):
+        self.selector = TokenList(selector)
+        self.declarations = declarations
+        self.line = line
+        self.column = column
+
+    def __repr__(self):
+        return ('<{0.__class__.__name__} at {0.line}:{0.column} {1}>'
+                .format(self, self.selector.as_css()))
+
+
+class Declaration(object):
+    """A property declaration.
+
+    .. attribute:: name
+
+        The property name as a normalized (lower-case) string.
+
+    .. attribute:: value
+
+        The property value as a :class:`~.token_data.TokenList`.
+
+        The value is not parsed. UAs using tinycss may only support
+        some properties or some values and tinycss does not know which.
+        They need to parse values themselves and ignore declarations with
+        unknown or unsupported properties or values, and fall back
+        on any previous declaration.
+
+        :mod:`tinycss.color3` parses color values, but other values
+        will need specific parsing/validation code.
+
+    .. attribute:: priority
+
+        Either the string ``'important'`` or ``None``.
+
+    """
+    __slots__ = 'name', 'value', 'priority', 'line', 'column'
+
+    def __init__(self, name, value, priority, line, column):
+        self.name = name
+        self.value = TokenList(value)
+        self.priority = priority
+        self.line = line
+        self.column = column
+
+    def __repr__(self):
+        priority = ' !' + self.priority if self.priority else ''
+        return ('<{0.__class__.__name__} {0.line}:{0.column}'
+                ' {0.name}: {1}{2}>'.format(
+                    self, self.value.as_css(), priority))
+
+
+class PageRule(object):
+    """A parsed CSS 2.1 @page rule.
+
+    .. attribute:: at_keyword
+
+        Always ``'@page'``
+
+    .. attribute:: selector
+
+        The page selector.
+        In CSS 2.1 this is either ``None`` (no selector), or the string
+        ``'first'``, ``'left'`` or ``'right'`` for the pseudo class
+        of the same name.
+
+    .. attribute:: specificity
+
+        Specificity of the page selector. This is a tuple of four integers,
+        but these tuples are mostly meant to be compared to each other.
+
+    .. attribute:: declarations
+
+        A list of :class:`Declaration`, in source order.
+
+    .. attribute:: at_rules
+
+        The list of parsed at-rules inside the @page block, in source order.
+        Always empty for CSS 2.1.
+
+    """
+    at_keyword = '@page'
+    __slots__ = 'selector', 'specificity', 'declarations', 'at_rules', 'line', 'column'
+
+    def __init__(self, selector, specificity, declarations, at_rules,
+                 line, column):
+        self.selector = selector
+        self.specificity = specificity
+        self.declarations = declarations
+        self.at_rules = at_rules
+        self.line = line
+        self.column = column
+
+    def __repr__(self):
+        return ('<{0.__class__.__name__} {0.line}:{0.column}'
+                ' {0.selector}>'.format(self))
+
+
+class MediaRule(object):
+    """A parsed @media rule.
+
+    .. attribute:: at_keyword
+
+        Always ``'@media'``
+
+    .. attribute:: media
+
+        For CSS 2.1 without media queries: the media types
+        as a list of strings.
+
+    .. attribute:: rules
+
+        The list :class:`RuleSet` and various at-rules inside the @media
+        block, in source order.
+
+    """
+    at_keyword = '@media'
+    __slots__ = 'media', 'rules', 'line', 'column'
+
+    def __init__(self, media, rules, line, column):
+        self.media = media
+        self.rules = rules
+        self.line = line
+        self.column = column
+
+    def __repr__(self):
+        return ('<{0.__class__.__name__} {0.line}:{0.column}'
+                ' {0.media}>'.format(self))
+
+
+class ImportRule(object):
+    """A parsed @import rule.
+
+    .. attribute:: at_keyword
+
+        Always ``'@import'``
+
+    .. attribute:: uri
+
+        The URI to be imported, as read from the stylesheet.
+        (URIs are not made absolute.)
+
+    .. attribute:: media
+
+        For CSS 2.1 without media queries: the media types
+        as a list of strings.
+        This attribute is explicitly ``['all']`` if the media was omitted
+        in the source.
+
+    """
+    at_keyword = '@import'
+    __slots__ = 'uri', 'media', 'line', 'column'
+
+    def __init__(self, uri, media, line, column):
+        self.uri = uri
+        self.media = media
+        self.line = line
+        self.column = column
+
+    def __repr__(self):
+        return ('<{0.__class__.__name__} {0.line}:{0.column}'
+                ' {0.uri}>'.format(self))
+
+
+def _remove_at_charset(tokens):
+    """Remove any valid @charset at the beggining of a token stream.
+
+    :param tokens:
+        An iterable of tokens
+    :returns:
+        A possibly truncated iterable of tokens
+
+    """
+    tokens = iter(tokens)
+    header = list(islice(tokens, 4))
+    if [t.type for t in header] == ['ATKEYWORD', 'S', 'STRING', ';']:
+        atkw, space, string, semicolon = header
+        if ((atkw.value, space.value) == ('@charset', ' ')
+                and string.as_css()[0] == '"'):
+            # Found a valid @charset rule, only keep what’s after it.
+            return tokens
+    return chain(header, tokens)
+
+
+class CSS21Parser(object):
+    """Parser for CSS 2.1
+
+    This parser supports the core CSS syntax as well as @import, @media,
+    @page and !important.
+
+    Note that property values are still not parsed, as UAs using this
+    parser may only support some properties or some values.
+
+    Currently the parser holds no state. It being a class only allows
+    subclassing and overriding its methods.
+
+    """
+
+    def __init__(self):
+        self.at_parsers = {
+            '@' + x:getattr(self, 'parse_%s_rule' % x) for x in ('media', 'page', 'import', 'charset')}
+
+    # User API:
+
+    def parse_stylesheet_file(self, css_file, protocol_encoding=None,
+                             linking_encoding=None, document_encoding=None):
+        """Parse a stylesheet from a file or filename.
+
+        Character encoding-related parameters and behavior are the same
+        as in :meth:`parse_stylesheet_bytes`.
+
+        :param css_file:
+            Either a file (any object with a :meth:`~file.read` method)
+            or a filename.
+        :return:
+            A :class:`Stylesheet`.
+
+        """
+        if hasattr(css_file, 'read'):
+            css_bytes = css_file.read()
+        else:
+            with open(css_file, 'rb') as fd:
+                css_bytes = fd.read()
+        return self.parse_stylesheet_bytes(css_bytes, protocol_encoding,
+                                           linking_encoding, document_encoding)
+
+    def parse_stylesheet_bytes(self, css_bytes, protocol_encoding=None,
+                               linking_encoding=None, document_encoding=None):
+        """Parse a stylesheet from a byte string.
+
+        The character encoding is determined from the passed metadata and the
+        ``@charset`` rule in the stylesheet (if any).
+        If no encoding information is available or decoding fails,
+        decoding defaults to UTF-8 and then fall back on ISO-8859-1.
+
+        :param css_bytes:
+            A CSS stylesheet as a byte string.
+        :param protocol_encoding:
+            The "charset" parameter of a "Content-Type" HTTP header (if any),
+            or similar metadata for other protocols.
+        :param linking_encoding:
+            ``<link charset="">`` or other metadata from the linking mechanism
+            (if any)
+        :param document_encoding:
+            Encoding of the referring style sheet or document (if any)
+        :return:
+            A :class:`Stylesheet`.
+
+        """
+        css_unicode, encoding = decode(css_bytes, protocol_encoding,
+                                       linking_encoding, document_encoding)
+        return self.parse_stylesheet(css_unicode, encoding=encoding)
+
+    def parse_stylesheet(self, css_unicode, encoding=None):
+        """Parse a stylesheet from an Unicode string.
+
+        :param css_unicode:
+            A CSS stylesheet as an unicode string.
+        :param encoding:
+            The character encoding used to decode the stylesheet from bytes,
+            if any.
+        :return:
+            A :class:`Stylesheet`.
+
+        """
+        tokens = tokenize_grouped(css_unicode)
+        if encoding:
+            tokens = _remove_at_charset(tokens)
+        rules, errors = self.parse_rules(tokens, context='stylesheet')
+        return Stylesheet(rules, errors, encoding)
+
+    def parse_style_attr(self, css_source):
+        """Parse a "style" attribute (eg. of an HTML element).
+
+        This method only accepts Unicode as the source (HTML) document
+        is supposed to handle the character encoding.
+
+        :param css_source:
+            The attribute value, as an unicode string.
+        :return:
+            A tuple of the list of valid :class:`Declaration` and
+            a list of :class:`~.parsing.ParseError`.
+        """
+        return self.parse_declaration_list(tokenize_grouped(css_source))
+
+    # API for subclasses:
+
+    def parse_rules(self, tokens, context):
+        """Parse a sequence of rules (rulesets and at-rules).
+
+        :param tokens:
+            An iterable of tokens.
+        :param context:
+            Either ``'stylesheet'`` or an at-keyword such as ``'@media'``.
+            (Most at-rules are only allowed in some contexts.)
+        :return:
+            A tuple of a list of parsed rules and a list of
+            :class:`~.parsing.ParseError`.
+
+        """
+        rules = []
+        errors = []
+        tokens = iter(tokens)
+        for token in tokens:
+            if token.type not in ('S', 'CDO', 'CDC'):
+                try:
+                    if token.type == 'ATKEYWORD':
+                        rule = self.read_at_rule(token, tokens)
+                        result = self.parse_at_rule(
+                            rule, rules, errors, context)
+                        rules.append(result)
+                    else:
+                        rule, rule_errors = self.parse_ruleset(token, tokens)
+                        rules.append(rule)
+                        errors.extend(rule_errors)
+                except ParseError as exc:
+                    errors.append(exc)
+                    # Skip the entire rule
+        return rules, errors
+
+    def read_at_rule(self, at_keyword_token, tokens):
+        """Read an at-rule from a token stream.
+
+        :param at_keyword_token:
+            The ATKEYWORD token that starts this at-rule
+            You may have read it already to distinguish the rule
+            from a ruleset.
+        :param tokens:
+            An iterator of subsequent tokens. Will be consumed just enough
+            for one at-rule.
+        :return:
+            An unparsed :class:`AtRule`.
+        :raises:
+            :class:`~.parsing.ParseError` if the head is invalid for the core
+            grammar. The body is **not** validated. See :class:`AtRule`.
+
+        """
+        # CSS syntax is case-insensitive
+        at_keyword = at_keyword_token.value.lower()
+        head = []
+        # For the ParseError in case `tokens` is empty:
+        token = at_keyword_token
+        for token in tokens:
+            if token.type in '{;':
+                break
+            # Ignore white space just after the at-keyword.
+            else:
+                head.append(token)
+        # On unexpected end of stylesheet, pretend that a ';' was there
+        head = strip_whitespace(head)
+        for head_token in head:
+            validate_any(head_token, 'at-rule head')
+        body = token.content if token.type == '{' else None
+        return AtRule(at_keyword, head, body,
+                      at_keyword_token.line, at_keyword_token.column)
+
+    def parse_at_rule(self, rule, previous_rules, errors, context):
+        """Parse an at-rule.
+
+        Subclasses that override this method must use ``super()`` and
+        pass its return value for at-rules they do not know.
+
+        In CSS 2.1, this method handles @charset, @import, @media and @page
+        rules.
+
+        :param rule:
+            An unparsed :class:`AtRule`.
+        :param previous_rules:
+            The list of at-rules and rulesets that have been parsed so far
+            in this context. This list can be used to decide if the current
+            rule is valid. (For example, @import rules are only allowed
+            before anything but a @charset rule.)
+        :param context:
+            Either ``'stylesheet'`` or an at-keyword such as ``'@media'``.
+            (Most at-rules are only allowed in some contexts.)
+        :raises:
+            :class:`~.parsing.ParseError` if the rule is invalid.
+        :return:
+            A parsed at-rule
+
+        """
+        try:
+            parser = self.at_parsers[rule.at_keyword]
+        except KeyError:
+            raise ParseError(rule, 'unknown at-rule in {0} context: {1}'
+                                    .format(context, rule.at_keyword))
+        else:
+            return parser(rule, previous_rules, errors, context)
+
+    def parse_page_rule(self, rule, previous_rules, errors, context):
+        if context != 'stylesheet':
+            raise ParseError(rule, '@page rule not allowed in ' + context)
+        selector, specificity = self.parse_page_selector(rule.head)
+        if rule.body is None:
+            raise ParseError(rule,
+                'invalid {0} rule: missing block'.format(rule.at_keyword))
+        declarations, at_rules, rule_errors = \
+            self.parse_declarations_and_at_rules(rule.body, '@page')
+        errors.extend(rule_errors)
+        return PageRule(selector, specificity, declarations, at_rules,
+                        rule.line, rule.column)
+
+    def parse_media_rule(self, rule, previous_rules, errors, context):
+        if context != 'stylesheet':
+            raise ParseError(rule, '@media rule not allowed in ' + context)
+        media = self.parse_media(rule.head, errors)
+        if rule.body is None:
+            raise ParseError(rule,
+                'invalid {0} rule: missing block'.format(rule.at_keyword))
+        rules, rule_errors = self.parse_rules(rule.body, '@media')
+        errors.extend(rule_errors)
+        return MediaRule(media, rules, rule.line, rule.column)
+
+    def parse_import_rule(self, rule, previous_rules, errors, context):
+        if context != 'stylesheet':
+            raise ParseError(rule,
+                '@import rule not allowed in ' + context)
+        for previous_rule in previous_rules:
+            if previous_rule.at_keyword not in ('@charset', '@import'):
+                if previous_rule.at_keyword:
+                    type_ = 'an {0} rule'.format(previous_rule.at_keyword)
+                else:
+                    type_ = 'a ruleset'
+                raise ParseError(previous_rule,
+                    '@import rule not allowed after ' + type_)
+        head = rule.head
+        if not head:
+            raise ParseError(rule,
+                'expected URI or STRING for @import rule')
+        if head[0].type not in ('URI', 'STRING'):
+            raise ParseError(rule,
+                'expected URI or STRING for @import rule, got '
+                + head[0].type)
+        uri = head[0].value
+        media = self.parse_media(strip_whitespace(head[1:]), errors)
+        if rule.body is not None:
+            # The position of the ';' token would be best, but we don’t
+            # have it anymore here.
+            raise ParseError(head[-1], "expected ';', got a block")
+        return ImportRule(uri, media, rule.line, rule.column)
+
+    def parse_charset_rule(self, rule, previous_rules, errors, context):
+        raise ParseError(rule, 'mis-placed or malformed @charset rule')
+
+    def parse_media(self, tokens, errors):
+        """For CSS 2.1, parse a list of media types.
+
+        Media Queries are expected to override this.
+
+        :param tokens:
+            A list of tokens
+        :raises:
+            :class:`~.parsing.ParseError` on invalid media types/queries
+        :returns:
+            For CSS 2.1, a list of media types as strings
+        """
+        if not tokens:
+            return ['all']
+        media_types = []
+        for part in split_on_comma(remove_whitespace(tokens)):
+            types = [token.type for token in part]
+            if types == ['IDENT']:
+                media_types.append(part[0].value)
+            else:
+                raise ParseError(tokens[0], 'expected a media type'
+                    + ((', got ' + ', '.join(types)) if types else ''))
+        return media_types
+
+    def parse_page_selector(self, tokens):
+        """Parse an @page selector.
+
+        :param tokens:
+            An iterable of token, typically from the  ``head`` attribute of
+            an unparsed :class:`AtRule`.
+        :returns:
+            A page selector. For CSS 2.1, this is ``'first'``, ``'left'``,
+            ``'right'`` or ``None``.
+        :raises:
+            :class:`~.parsing.ParseError` on invalid selectors
+
+        """
+        if not tokens:
+            return None, (0, 0)
+        if (len(tokens) == 2 and tokens[0].type == ':'
+                and tokens[1].type == 'IDENT'):
+            pseudo_class = tokens[1].value
+            specificity = {
+                'first': (1, 0), 'left': (0, 1), 'right': (0, 1),
+            }.get(pseudo_class)
+            if specificity:
+                return pseudo_class, specificity
+        raise ParseError(tokens[0], 'invalid @page selector')
+
+    def parse_declarations_and_at_rules(self, tokens, context):
+        """Parse a mixed list of declarations and at rules, as found eg.
+        in the body of an @page rule.
+
+        Note that to add supported at-rules inside @page,
+        :class:`~.page3.CSSPage3Parser` extends :meth:`parse_at_rule`,
+        not this method.
+
+        :param tokens:
+            An iterable of token, typically from the  ``body`` attribute of
+            an unparsed :class:`AtRule`.
+        :param context:
+            An at-keyword such as ``'@page'``.
+            (Most at-rules are only allowed in some contexts.)
+        :returns:
+            A tuple of:
+
+            * A list of :class:`Declaration`
+            * A list of parsed at-rules (empty for CSS 2.1)
+            * A list of :class:`~.parsing.ParseError`
+
+        """
+        at_rules = []
+        declarations = []
+        errors = []
+        tokens = iter(tokens)
+        for token in tokens:
+            if token.type == 'ATKEYWORD':
+                try:
+                    rule = self.read_at_rule(token, tokens)
+                    result = self.parse_at_rule(
+                        rule, at_rules, errors, context)
+                    at_rules.append(result)
+                except ParseError as err:
+                    errors.append(err)
+            elif token.type != 'S':
+                declaration_tokens = []
+                while token and token.type != ';':
+                    declaration_tokens.append(token)
+                    token = next(tokens, None)
+                if declaration_tokens:
+                    try:
+                        declarations.append(
+                            self.parse_declaration(declaration_tokens))
+                    except ParseError as err:
+                        errors.append(err)
+        return declarations, at_rules, errors
+
+    def parse_ruleset(self, first_token, tokens):
+        """Parse a ruleset: a selector followed by declaration block.
+
+        :param first_token:
+            The first token of the ruleset (probably of the selector).
+            You may have read it already to distinguish the rule
+            from an at-rule.
+        :param tokens:
+            an iterator of subsequent tokens. Will be consumed just enough
+            for one ruleset.
+        :return:
+            a tuple of a :class:`RuleSet` and an error list.
+            The errors are recovered :class:`~.parsing.ParseError` in declarations.
+            (Parsing continues from the next declaration on such errors.)
+        :raises:
+            :class:`~.parsing.ParseError` if the selector is invalid for the
+            core grammar.
+            Note a that a selector can be valid for the core grammar but
+            not for CSS 2.1 or another level.
+
+        """
+        selector = []
+        for token in chain([first_token], tokens):
+            if token.type == '{':
+                # Parse/validate once we’ve read the whole rule
+                selector = strip_whitespace(selector)
+                if not selector:
+                    raise ParseError(first_token, 'empty selector')
+                for selector_token in selector:
+                    validate_any(selector_token, 'selector')
+                declarations, errors = self.parse_declaration_list(
+                    token.content)
+                ruleset = RuleSet(selector, declarations,
+                                  first_token.line, first_token.column)
+                return ruleset, errors
+            else:
+                selector.append(token)
+        raise ParseError(token, 'no declaration block found for ruleset')
+
+    def parse_declaration_list(self, tokens):
+        """Parse a ``;`` separated declaration list.
+
+        You may want to use :meth:`parse_declarations_and_at_rules` (or
+        some other method that uses :func:`parse_declaration` directly)
+        instead if you have not just declarations in the same context.
+
+        :param tokens:
+            an iterable of tokens. Should stop at (before) the end
+            of the block, as marked by ``}``.
+        :return:
+            a tuple of the list of valid :class:`Declaration` and a list
+            of :class:`~.parsing.ParseError`
+
+        """
+        # split at ';'
+        parts = []
+        this_part = []
+        for token in tokens:
+            if token.type == ';':
+                parts.append(this_part)
+                this_part = []
+            else:
+                this_part.append(token)
+        parts.append(this_part)
+
+        declarations = []
+        errors = []
+        for tokens in parts:
+            tokens = strip_whitespace(tokens)
+            if tokens:
+                try:
+                    declarations.append(self.parse_declaration(tokens))
+                except ParseError as exc:
+                    errors.append(exc)
+                    # Skip the entire declaration
+        return declarations, errors
+
+    def parse_declaration(self, tokens):
+        """Parse a single declaration.
+
+        :param tokens:
+            an iterable of at least one token. Should stop at (before)
+            the end of the declaration, as marked by a ``;`` or ``}``.
+            Empty declarations (ie. consecutive ``;`` with only white space
+            in-between) should be skipped earlier and not passed to
+            this method.
+        :returns:
+            a :class:`Declaration`
+        :raises:
+            :class:`~.parsing.ParseError` if the tokens do not match the
+            'declaration' production of the core grammar.
+
+        """
+        tokens = iter(tokens)
+
+        name_token = next(tokens)  # assume there is at least one
+        if name_token.type == 'IDENT':
+            # CSS syntax is case-insensitive
+            property_name = name_token.value.lower()
+        else:
+            raise ParseError(name_token,
+                'expected a property name, got {0}'.format(name_token.type))
+
+        token = name_token  # In case ``tokens`` is now empty
+        for token in tokens:
+            if token.type == ':':
+                break
+            elif token.type != 'S':
+                raise ParseError(
+                    token, "expected ':', got {0}".format(token.type))
+        else:
+            raise ParseError(token, "expected ':'")
+
+        value = strip_whitespace(list(tokens))
+        if not value:
+            raise ParseError(token, 'expected a property value')
+        validate_value(value)
+        value, priority = self.parse_value_priority(value)
+        return Declaration(
+            property_name, value, priority, name_token.line, name_token.column)
+
+    def parse_value_priority(self, tokens):
+        """Separate any ``!important`` marker at the end of a property value.
+
+        :param tokens:
+            A list of tokens for the property value.
+        :returns:
+            A tuple of the actual property value (a list of tokens)
+            and the :attr:`~Declaration.priority`.
+        """
+        value = list(tokens)
+        # Walk the token list from the end
+        token = value.pop()
+        if token.type == 'IDENT' and token.value.lower() == 'important':
+            while value:
+                token = value.pop()
+                if token.type == 'DELIM' and token.value == '!':
+                    # Skip any white space before the '!'
+                    while value and value[-1].type == 'S':
+                        value.pop()
+                    if not value:
+                        raise ParseError(
+                            token, 'expected a value before !important')
+                    return value, 'important'
+                # Skip white space between '!' and 'important'
+                elif token.type != 'S':
+                    break
+        return tokens, None
--- a/ebook_converter/tinycss/decoding.py
+++ b/ebook_converter/tinycss/decoding.py
@@ -0,0 +1,255 @@
+# coding: utf8
+"""
+    tinycss.decoding
+    ----------------
+
+    Decoding stylesheets from bytes to Unicode.
+    http://www.w3.org/TR/CSS21/syndata.html#charset
+
+    :copyright: (c) 2012 by Simon Sapin.
+    :license: BSD, see LICENSE for more details.
+"""
+
+from __future__ import unicode_literals
+
+import operator
+import re
+
+from polyglot.binary import from_hex_bytes
+
+
+__all__ = ['decode']  # Everything else is implementation detail
+
+
+def decode(css_bytes, protocol_encoding=None,
+           linking_encoding=None, document_encoding=None):
+    """
+    Determine the character encoding from the passed metadata and the
+    ``@charset`` rule in the stylesheet (if any); and decode accordingly.
+    If no encoding information is available or decoding fails,
+    decoding defaults to UTF-8 and then fall back on ISO-8859-1.
+
+    :param css_bytes:
+        a CSS stylesheet as a byte string
+    :param protocol_encoding:
+        The "charset" parameter of a "Content-Type" HTTP header (if any),
+        or similar metadata for other protocols.
+    :param linking_encoding:
+        ``<link charset="">`` or other metadata from the linking mechanism
+        (if any)
+    :param document_encoding:
+        Encoding of the referring style sheet or document (if any)
+    :return:
+        A tuple of an Unicode string, with any BOM removed, and the
+        encoding that was used.
+
+    """
+    if protocol_encoding:
+        css_unicode = try_encoding(css_bytes, protocol_encoding)
+        if css_unicode is not None:
+            return css_unicode, protocol_encoding
+    for encoding, pattern in ENCODING_MAGIC_NUMBERS:
+        match = pattern(css_bytes)
+        if match:
+            has_at_charset = isinstance(encoding, tuple)
+            if has_at_charset:
+                extract, endianness = encoding
+                encoding = extract(match.group(1))
+                # Get an ASCII-only unicode value.
+                # This is the only thing that works on both Python 2 and 3
+                # for bytes.decode()
+                # Non-ASCII encoding names are invalid anyway,
+                # but make sure they stay invalid.
+                encoding = encoding.decode('ascii', 'replace')
+                encoding = encoding.replace('\ufffd', '?')
+                if encoding.replace('-', '').replace('_', '').lower() in [
+                        'utf16', 'utf32']:
+                    encoding += endianness
+                encoding = encoding.encode('ascii', 'replace').decode('ascii')
+            css_unicode = try_encoding(css_bytes, encoding)
+            if css_unicode and not (has_at_charset and not
+                                    css_unicode.startswith('@charset "')):
+                return css_unicode, encoding
+            break
+    for encoding in [linking_encoding, document_encoding]:
+        if encoding:
+            css_unicode = try_encoding(css_bytes, encoding)
+            if css_unicode is not None:
+                return css_unicode, encoding
+    css_unicode = try_encoding(css_bytes, 'UTF-8')
+    if css_unicode is not None:
+        return css_unicode, 'UTF-8'
+    return try_encoding(css_bytes, 'ISO-8859-1', fallback=False), 'ISO-8859-1'
+
+
+def try_encoding(css_bytes, encoding, fallback=True):
+    if fallback:
+        try:
+            css_unicode = css_bytes.decode(encoding)
+        # LookupError means unknown encoding
+        except (UnicodeDecodeError, LookupError):
+            return None
+    else:
+        css_unicode = css_bytes.decode(encoding)
+    if css_unicode and css_unicode[0] == '\ufeff':
+        # Remove any Byte Order Mark
+        css_unicode = css_unicode[1:]
+    return css_unicode
+
+
+def hex2re(hex_data):
+    return re.escape(from_hex_bytes(hex_data.replace(' ', '').encode('ascii')))
+
+
+class Slicer(object):
+    """Slice()[start:stop:end] == slice(start, stop, end)"""
+    def __getitem__(self, slice_):
+        return operator.itemgetter(slice_)
+
+
+Slice = Slicer()
+
+
+# List of (bom_size, encoding, pattern)
+#   bom_size is in bytes and can be zero
+#   encoding is a string or (slice_, endianness) for "as specified"
+#   slice_ is a slice object.How to extract the specified
+
+ENCODING_MAGIC_NUMBERS = [
+    ((Slice[:], ''), re.compile(
+        hex2re('EF BB BF 40 63 68 61 72 73 65 74 20 22')
+        + b'([^\x22]*?)'
+        + hex2re('22 3B')).match),
+
+    ('UTF-8', re.compile(
+        hex2re('EF BB BF')).match),
+
+    ((Slice[:], ''), re.compile(
+        hex2re('40 63 68 61 72 73 65 74 20 22')
+        + b'([^\x22]*?)'
+        + hex2re('22 3B')).match),
+
+    ((Slice[1::2], '-BE'), re.compile(
+        hex2re('FE FF 00 40 00 63 00 68 00 61 00 72 00 73 00 65 00'
+               '74 00 20 00 22')
+        + b'((\x00[^\x22])*?)'
+        + hex2re('00 22 00 3B')).match),
+
+    ((Slice[1::2], '-BE'), re.compile(
+        hex2re('00 40 00 63 00 68 00 61 00 72 00 73 00 65 00 74 00'
+               '20 00 22')
+        + b'((\x00[^\x22])*?)'
+        + hex2re('00 22 00 3B')).match),
+
+    ((Slice[::2], '-LE'), re.compile(
+        hex2re('FF FE 40 00 63 00 68 00 61 00 72 00 73 00 65 00 74'
+               '00 20 00 22 00')
+        + b'(([^\x22]\x00)*?)'
+        + hex2re('22 00 3B 00')).match),
+
+    ((Slice[::2], '-LE'), re.compile(
+        hex2re('40 00 63 00 68 00 61 00 72 00 73 00 65 00 74 00 20'
+               '00 22 00')
+        + b'(([^\x22]\x00)*?)'
+        + hex2re('22 00 3B 00')).match),
+
+    ((Slice[3::4], '-BE'), re.compile(
+        hex2re('00 00 FE FF 00 00 00 40 00 00 00 63 00 00 00 68 00'
+               '00 00 61 00 00 00 72 00 00 00 73 00 00 00 65 00 00'
+               '00 74 00 00 00 20 00 00 00 22')
+        + b'((\x00\x00\x00[^\x22])*?)'
+        + hex2re('00 00 00 22 00 00 00 3B')).match),
+
+    ((Slice[3::4], '-BE'), re.compile(
+        hex2re('00 00 00 40 00 00 00 63 00 00 00 68 00 00 00 61 00'
+               '00 00 72 00 00 00 73 00 00 00 65 00 00 00 74 00 00'
+               '00 20 00 00 00 22')
+        + b'((\x00\x00\x00[^\x22])*?)'
+        + hex2re('00 00 00 22 00 00 00 3B')).match),
+
+
+# Python does not support 2143 or 3412 endianness, AFAIK.
+# I guess we could fix it up ourselves but meh. Patches welcome.
+
+#    ((Slice[2::4], '-2143'), re.compile(
+#        hex2re('00 00 FF FE 00 00 40 00 00 00 63 00 00 00 68 00 00'
+#               '00 61 00 00 00 72 00 00 00 73 00 00 00 65 00 00 00'
+#               '74 00 00 00 20 00 00 00 22 00')
+#        + b'((\x00\x00[^\x22]\x00)*?)'
+#        + hex2re('00 00 22 00 00 00 3B 00')).match),
+
+#    ((Slice[2::4], '-2143'), re.compile(
+#        hex2re('00 00 40 00 00 00 63 00 00 00 68 00 00 00 61 00 00'
+#               '00 72 00 00 00 73 00 00 00 65 00 00 00 74 00 00 00'
+#               '20 00 00 00 22 00')
+#        + b'((\x00\x00[^\x22]\x00)*?)'
+#        + hex2re('00 00 22 00 00 00 3B 00')).match),
+
+#    ((Slice[1::4], '-3412'), re.compile(
+#        hex2re('FE FF 00 00 00 40 00 00 00 63 00 00 00 68 00 00 00'
+#               '61 00 00 00 72 00 00 00 73 00 00 00 65 00 00 00 74'
+#               '00 00 00 20 00 00 00 22 00 00')
+#        + b'((\x00[^\x22]\x00\x00)*?)'
+#        + hex2re('00 22 00 00 00 3B 00 00')).match),
+
+#    ((Slice[1::4], '-3412'), re.compile(
+#        hex2re('00 40 00 00 00 63 00 00 00 68 00 00 00 61 00 00 00'
+#               '72 00 00 00 73 00 00 00 65 00 00 00 74 00 00 00 20'
+#               '00 00 00 22 00 00')
+#        + b'((\x00[^\x22]\x00\x00)*?)'
+#        + hex2re('00 22 00 00 00 3B 00 00')).match),
+
+    ((Slice[::4], '-LE'), re.compile(
+        hex2re('FF FE 00 00 40 00 00 00 63 00 00 00 68 00 00 00 61'
+               '00 00 00 72 00 00 00 73 00 00 00 65 00 00 00 74 00'
+               '00 00 20 00 00 00 22 00 00 00')
+        + b'(([^\x22]\x00\x00\x00)*?)'
+        + hex2re('22 00 00 00 3B 00 00 00')).match),
+
+    ((Slice[::4], '-LE'), re.compile(
+        hex2re('40 00 00 00 63 00 00 00 68 00 00 00 61 00 00 00 72'
+               '00 00 00 73 00 00 00 65 00 00 00 74 00 00 00 20 00'
+               '00 00 22 00 00 00')
+        + b'(([^\x22]\x00\x00\x00)*?)'
+        + hex2re('22 00 00 00 3B 00 00 00')).match),
+
+    ('UTF-32-BE', re.compile(
+        hex2re('00 00 FE FF')).match),
+
+    ('UTF-32-LE', re.compile(
+        hex2re('FF FE 00 00')).match),
+
+#    ('UTF-32-2143', re.compile(
+#        hex2re('00 00 FF FE')).match),
+
+#    ('UTF-32-3412', re.compile(
+#        hex2re('FE FF 00 00')).match),
+
+    ('UTF-16-BE', re.compile(
+        hex2re('FE FF')).match),
+
+    ('UTF-16-LE', re.compile(
+        hex2re('FF FE')).match),
+
+
+# Some of there are supported by Python, but I didn’t bother.
+# You know the story with patches ...
+
+#    # as specified, transcoded from EBCDIC to ASCII
+#    ('as_specified-EBCDIC', re.compile(
+#        hex2re('7C 83 88 81 99 A2 85 A3 40 7F')
+#        + b'([^\x7F]*?)'
+#        + hex2re('7F 5E')).match),
+
+#    # as specified, transcoded from IBM1026 to ASCII
+#    ('as_specified-IBM1026', re.compile(
+#        hex2re('AE 83 88 81 99 A2 85 A3 40 FC')
+#        + b'([^\xFC]*?)'
+#        + hex2re('FC 5E')).match),
+
+#    # as specified, transcoded from GSM 03.38 to ASCII
+#    ('as_specified-GSM_03.38', re.compile(
+#        hex2re('00 63 68 61 72 73 65 74 20 22')
+#        + b'([^\x22]*?)'
+#        + hex2re('22 3B')).match),
+]
--- a/ebook_converter/tinycss/fonts3.py
+++ b/ebook_converter/tinycss/fonts3.py
@@ -0,0 +1,225 @@
+#!/usr/bin/env python2
+# vim:fileencoding=utf-8
+from __future__ import absolute_import, division, print_function, unicode_literals
+
+__license__ = 'GPL v3'
+__copyright__ = '2014, Kovid Goyal <kovid at kovidgoyal.net>'
+
+
+import re
+from polyglot.builtins import map
+from tinycss.css21 import CSS21Parser, ParseError
+from .tokenizer import tokenize_grouped
+
+
+def parse_font_family_tokens(tokens):
+    families = []
+    current_family = ''
+
+    def commit():
+        val = current_family.strip()
+        if val:
+            families.append(val)
+
+    for token in tokens:
+        if token.type == 'STRING':
+            if current_family:
+                commit()
+            current_family = token.value
+        elif token.type == 'DELIM':
+            if token.value == ',':
+                if current_family:
+                    commit()
+                current_family = ''
+        elif token.type == 'IDENT':
+            current_family += ' ' + token.value
+    if current_family:
+        commit()
+    return families
+
+
+def parse_font_family(css_string):
+    return parse_font_family_tokens(tokenize_grouped(type('')(css_string).strip()))
+
+
+def serialize_single_font_family(x):
+    xl = x.lower()
+    if xl in GENERIC_FAMILIES:
+        if xl == 'sansserif':
+            xl = 'sans-serif'
+        return xl
+    if SIMPLE_NAME_PAT.match(x) is not None and not x.lower().startswith('and'):
+        # css_parser dies if a font name starts with and
+        return x
+    return '"%s"' % x.replace('"', r'\"')
+
+
+def serialize_font_family(families):
+    return ', '.join(map(serialize_single_font_family, families))
+
+
+GLOBAL_IDENTS = frozenset('inherit initial unset normal'.split())
+STYLE_IDENTS = frozenset('italic oblique'.split())
+VARIANT_IDENTS = frozenset(('small-caps',))
+WEIGHT_IDENTS = frozenset('bold bolder lighter'.split())
+STRETCH_IDENTS = frozenset('ultra-condensed extra-condensed condensed semi-condensed semi-expanded expanded extra-expanded ultra-expanded'.split())
+BEFORE_SIZE_IDENTS = STYLE_IDENTS | VARIANT_IDENTS | WEIGHT_IDENTS | STRETCH_IDENTS
+SIZE_IDENTS = frozenset('xx-small x-small small medium large x-large xx-large larger smaller'.split())
+WEIGHT_SIZES = frozenset(map(int, '100 200 300 400 500 600 700 800 900'.split()))
+LEGACY_FONT_SPEC = frozenset('caption icon menu message-box small-caption status-bar'.split())
+GENERIC_FAMILIES = frozenset('serif sans-serif sansserif cursive fantasy monospace'.split())
+SIMPLE_NAME_PAT = re.compile(r'[a-zA-Z][a-zA-Z0-9_-]*$')
+
+
+def serialize_font(font_dict):
+    ans = []
+    for x in 'style variant weight stretch'.split():
+        val = font_dict.get('font-' + x)
+        if val is not None:
+            ans.append(val)
+    val = font_dict.get('font-size')
+    if val is not None:
+        fs = val
+        val = font_dict.get('line-height')
+        if val is not None:
+            fs += '/' + val
+        ans.append(fs)
+    val = font_dict.get('font-family')
+    if val:
+        ans.append(serialize_font_family(val))
+    return ' '.join(ans)
+
+
+def parse_font(css_string):
+    # See https://www.w3.org/TR/css-fonts-3/#font-prop
+    style = variant = weight = stretch = size = height = None
+    tokens = list(reversed(tuple(tokenize_grouped(type('')(css_string).strip()))))
+    if tokens and tokens[-1].value in LEGACY_FONT_SPEC:
+        return {'font-family':['sans-serif']}
+    while tokens:
+        tok = tokens.pop()
+        if tok.type == 'STRING':
+            tokens.append(tok)
+            break
+        if tok.type == 'INTEGER':
+            if size is None:
+                if weight is None and tok.value in WEIGHT_SIZES:
+                    weight = tok.as_css()
+                    continue
+                break
+            if height is None:
+                height = tok.as_css()
+                break
+            break
+        if tok.type == 'NUMBER':
+            if size is not None and height is None:
+                height = tok.as_css()
+            break
+        if tok.type == 'DELIM':
+            if tok.value == '/' and size is not None and height is None:
+                continue
+            break
+        if tok.type in ('DIMENSION', 'PERCENTAGE'):
+            if size is None:
+                size = tok.as_css()
+                continue
+            if height is None:
+                height = tok.as_css()
+            break
+        if tok.type == 'IDENT':
+            if tok.value in GLOBAL_IDENTS:
+                if size is not None:
+                    if height is None:
+                        height = tok.value
+                    else:
+                        tokens.append(tok)
+                    break
+                if style is None:
+                    style = tok.value
+                elif variant is None:
+                    variant = tok.value
+                elif weight is None:
+                    weight = tok.value
+                elif stretch is None:
+                    stretch = tok.value
+                elif size is None:
+                    size = tok.value
+                elif height is None:
+                    height = tok.value
+                    break
+                else:
+                    tokens.append(tok)
+                    break
+                continue
+            if tok.value in BEFORE_SIZE_IDENTS:
+                if size is not None:
+                    break
+                if tok.value in STYLE_IDENTS:
+                    style = tok.value
+                elif tok.value in VARIANT_IDENTS:
+                    variant = tok.value
+                elif tok.value in WEIGHT_IDENTS:
+                    weight = tok.value
+                elif tok.value in STRETCH_IDENTS:
+                    stretch = tok.value
+            elif tok.value in SIZE_IDENTS:
+                size = tok.value
+            else:
+                tokens.append(tok)
+                break
+    families = parse_font_family_tokens(reversed(tokens))
+    ans = {}
+    if style is not None:
+        ans['font-style'] = style
+    if variant is not None:
+        ans['font-variant'] = variant
+    if weight is not None:
+        ans['font-weight'] = weight
+    if stretch is not None:
+        ans['font-stretch'] = stretch
+    if size is not None:
+        ans['font-size'] = size
+    if height is not None:
+        ans['line-height'] = height
+    if families:
+        ans['font-family'] = families
+    return ans
+
+
+class FontFaceRule(object):
+
+    at_keyword = '@font-face'
+    __slots__ = 'declarations', 'line', 'column'
+
+    def __init__(self, declarations, line, column):
+        self.declarations = declarations
+        self.line = line
+        self.column = column
+
+    def __repr__(self):
+        return ('<{0.__class__.__name__} at {0.line}:{0.column}>'
+                .format(self))
+
+
+class CSSFonts3Parser(CSS21Parser):
+
+    ''' Parse @font-face rules from the CSS 3 fonts module '''
+
+    ALLOWED_CONTEXTS_FOR_FONT_FACE = {'stylesheet', '@media', '@page'}
+
+    def __init__(self):
+        super(CSSFonts3Parser, self).__init__()
+        self.at_parsers['@font-face'] = self.parse_font_face_rule
+
+    def parse_font_face_rule(self, rule, previous_rules, errors, context):
+        if context not in self.ALLOWED_CONTEXTS_FOR_FONT_FACE:
+            raise ParseError(rule,
+                '@font-face rule not allowed in ' + context)
+        if rule.body is None:
+            raise ParseError(rule,
+                'invalid {0} rule: missing block'.format(rule.at_keyword))
+        if rule.head:
+            raise ParseError(rule, '{0} rule is not allowed to have content before the descriptor declaration'.format(rule.at_keyword))
+        declarations, decerrors = self.parse_declaration_list(rule.body)
+        errors.extend(decerrors)
+        return FontFaceRule(declarations, rule.line, rule.column)
--- a/ebook_converter/tinycss/media3.py
+++ b/ebook_converter/tinycss/media3.py
@@ -0,0 +1,106 @@
+#!/usr/bin/env python2
+# vim:fileencoding=utf-8
+from __future__ import absolute_import, division, print_function, unicode_literals
+
+__license__ = 'GPL v3'
+__copyright__ = '2014, Kovid Goyal <kovid at kovidgoyal.net>'
+
+from tinycss.css21 import CSS21Parser
+from tinycss.parsing import remove_whitespace, split_on_comma, ParseError
+from polyglot.builtins import error_message
+
+
+class MediaQuery(object):
+
+    __slots__ = 'media_type', 'expressions', 'negated'
+
+    def __init__(self, media_type='all', expressions=(), negated=False):
+        self.media_type = media_type
+        self.expressions = expressions
+        self.negated = negated
+
+    def __repr__(self):
+        return '<MediaQuery type=%s negated=%s expressions=%s>' % (
+            self.media_type, self.negated, self.expressions)
+
+    def __eq__(self, other):
+        return self.media_type == getattr(other, 'media_type', None) and \
+            self.negated == getattr(other, 'negated', None) and \
+            self.expressions == getattr(other, 'expressions', None)
+
+
+class MalformedExpression(Exception):
+
+    def __init__(self, tok, msg):
+        Exception.__init__(self, msg)
+        self.tok = tok
+
+
+class CSSMedia3Parser(CSS21Parser):
+
+    ''' Parse media queries as defined by the CSS 3 media module '''
+
+    def parse_media(self, tokens, errors):
+        if not tokens:
+            return [MediaQuery('all')]
+        queries = []
+
+        for part in split_on_comma(remove_whitespace(tokens)):
+            negated = False
+            media_type = None
+            expressions = []
+            try:
+                for i, tok in enumerate(part):
+                    if i == 0 and tok.type == 'IDENT':
+                        val = tok.value.lower()
+                        if val == 'only':
+                            continue  # ignore leading ONLY
+                        if val == 'not':
+                            negated = True
+                            continue
+                    if media_type is None and tok.type == 'IDENT':
+                        media_type = tok.value
+                        continue
+                    elif media_type is None:
+                        media_type = 'all'
+
+                    if tok.type == 'IDENT' and tok.value.lower() == 'and':
+                        continue
+                    if not tok.is_container:
+                        raise MalformedExpression(tok, 'expected a media expression not a %s' % tok.type)
+                    if tok.type != '(':
+                        raise MalformedExpression(tok, 'media expressions must be in parentheses not %s' % tok.type)
+                    content = remove_whitespace(tok.content)
+                    if len(content) == 0:
+                        raise MalformedExpression(tok, 'media expressions cannot be empty')
+                    if content[0].type != 'IDENT':
+                        raise MalformedExpression(content[0], 'expected a media feature not a %s' % tok.type)
+                    media_feature, expr = content[0].value, None
+                    if len(content) > 1:
+                        if len(content) < 3:
+                            raise MalformedExpression(content[1], 'malformed media feature definition')
+                        if content[1].type != ':':
+                            raise MalformedExpression(content[1], 'expected a :')
+                        expr = content[2:]
+                        if len(expr) == 1:
+                            expr = expr[0]
+                        elif len(expr) == 3 and (expr[0].type, expr[1].type, expr[1].value, expr[2].type) == (
+                            'INTEGER', 'DELIM', '/', 'INTEGER'):
+                            # This should really be moved into token_data, but
+                            # since RATIO is not part of CSS 2.1 and does not
+                            # occur anywhere else, we special case it here.
+                            r = expr[0]
+                            r.value = (expr[0].value, expr[2].value)
+                            r.type = 'RATIO'
+                            r._as_css = expr[0]._as_css + expr[1]._as_css + expr[2]._as_css
+                            expr = r
+                        else:
+                            raise MalformedExpression(expr[0], 'malformed media feature definition')
+
+                    expressions.append((media_feature, expr))
+            except MalformedExpression as err:
+                errors.append(ParseError(err.tok, error_message(err)))
+                media_type, negated, expressions = 'all', True, ()
+            queries.append(MediaQuery(media_type or 'all', expressions=tuple(expressions), negated=negated))
+
+        return queries
--- a/ebook_converter/tinycss/page3.py
+++ b/ebook_converter/tinycss/page3.py
@@ -0,0 +1,163 @@
+# coding: utf8
+"""
+    tinycss.page3
+    ------------------
+
+    Support for CSS 3 Paged Media syntax:
+    http://dev.w3.org/csswg/css3-page/
+
+    Adds support for named page selectors and margin rules.
+
+    :copyright: (c) 2012 by Simon Sapin.
+    :license: BSD, see LICENSE for more details.
+"""
+
+from __future__ import unicode_literals, division
+from .css21 import CSS21Parser, ParseError
+
+
+class MarginRule(object):
+    """A parsed at-rule for margin box.
+
+    .. attribute:: at_keyword
+
+        One of the 16 following strings:
+
+        * ``@top-left-corner``
+        * ``@top-left``
+        * ``@top-center``
+        * ``@top-right``
+        * ``@top-right-corner``
+        * ``@bottom-left-corner``
+        * ``@bottom-left``
+        * ``@bottom-center``
+        * ``@bottom-right``
+        * ``@bottom-right-corner``
+        * ``@left-top``
+        * ``@left-middle``
+        * ``@left-bottom``
+        * ``@right-top``
+        * ``@right-middle``
+        * ``@right-bottom``
+
+    .. attribute:: declarations
+
+        A list of :class:`~.css21.Declaration` objects.
+
+    .. attribute:: line
+
+        Source line where this was read.
+
+    .. attribute:: column
+
+        Source column where this was read.
+
+    """
+
+    __slots__ = 'at_keyword', 'declarations', 'line', 'column'
+
+    def __init__(self, at_keyword, declarations, line, column):
+        self.at_keyword = at_keyword
+        self.declarations = declarations
+        self.line = line
+        self.column = column
+
+
+class CSSPage3Parser(CSS21Parser):
+    """Extend :class:`~.css21.CSS21Parser` for `CSS 3 Paged Media`_ syntax.
+
+    .. _CSS 3 Paged Media: http://dev.w3.org/csswg/css3-page/
+
+    Compared to CSS 2.1, the ``at_rules`` and ``selector`` attributes of
+    :class:`~.css21.PageRule` objects are modified:
+
+    * ``at_rules`` is not always empty, it is a list of :class:`MarginRule`
+      objects.
+
+    * ``selector``, instead of a single string, is a tuple of the page name
+      and the pseudo class. Each of these may be a ``None`` or a string.
+
+    +--------------------------+------------------------+
+    | CSS                      | Parsed selectors       |
+    +==========================+========================+
+    | .. code-block:: css      | .. code-block:: python |
+    |                          |                        |
+    |     @page {}             |     (None, None)       |
+    |     @page :first {}      |     (None, 'first')    |
+    |     @page chapter {}     |     ('chapter', None)  |
+    |     @page table:right {} |     ('table', 'right') |
+    +--------------------------+------------------------+
+
+    """
+
+    PAGE_MARGIN_AT_KEYWORDS = (
+        '@top-left-corner',
+        '@top-left',
+        '@top-center',
+        '@top-right',
+        '@top-right-corner',
+        '@bottom-left-corner',
+        '@bottom-left',
+        '@bottom-center',
+        '@bottom-right',
+        '@bottom-right-corner',
+        '@left-top',
+        '@left-middle',
+        '@left-bottom',
+        '@right-top',
+        '@right-middle',
+        '@right-bottom',
+    )
+
+    def __init__(self):
+        super(CSSPage3Parser, self).__init__()
+        for x in self.PAGE_MARGIN_AT_KEYWORDS:
+            self.at_parsers[x] = self.parse_page_margin_rule
+
+    def parse_page_margin_rule(self, rule, previous_rules, errors, context):
+        if context != '@page':
+            raise ParseError(rule,
+                '%s rule not allowed in %s' % (rule.at_keyword, context))
+        if rule.head:
+            raise ParseError(rule.head[0],
+                'unexpected %s token in %s rule header'
+                % (rule.head[0].type, rule.at_keyword))
+        declarations, body_errors = self.parse_declaration_list(rule.body)
+        errors.extend(body_errors)
+        return MarginRule(rule.at_keyword, declarations,
+                            rule.line, rule.column)
+
+    def parse_page_selector(self, head):
+        """Parse an @page selector.
+
+        :param head:
+            The ``head`` attribute of an unparsed :class:`AtRule`.
+        :returns:
+            A page selector. For CSS 2.1, this is 'first', 'left', 'right'
+            or None. 'blank' is added by GCPM.
+        :raises:
+            :class`~parsing.ParseError` on invalid selectors
+
+        """
+        if not head:
+            return (None, None), (0, 0, 0)
+        if head[0].type == 'IDENT':
+            name = head.pop(0).value
+            while head and head[0].type == 'S':
+                head.pop(0)
+            if not head:
+                return (name, None), (1, 0, 0)
+            name_specificity = (1,)
+        else:
+            name = None
+            name_specificity = (0,)
+        if (len(head) == 2 and head[0].type == ':'
+                and head[1].type == 'IDENT'):
+            pseudo_class = head[1].value
+            specificity = {
+                'first': (1, 0), 'blank': (1, 0),
+                'left': (0, 1), 'right': (0, 1),
+            }.get(pseudo_class)
+            if specificity:
+                return (name, pseudo_class), (name_specificity + specificity)
+        raise ParseError(head[0], 'invalid @page selector')
--- a/ebook_converter/tinycss/parsing.py
+++ b/ebook_converter/tinycss/parsing.py
@@ -0,0 +1,165 @@
+# coding: utf8
+"""
+    tinycss.parsing
+    ---------------
+
+    Utilities for parsing lists of tokens.
+
+    :copyright: (c) 2012 by Simon Sapin.
+    :license: BSD, see LICENSE for more details.
+"""
+
+from __future__ import unicode_literals
+
+
+# TODO: unit tests
+
+def split_on_comma(tokens):
+    """Split a list of tokens on commas, ie ``,`` DELIM tokens.
+
+    Only "top-level" comma tokens are splitting points, not commas inside a
+    function or other :class:`ContainerToken`.
+
+    :param tokens:
+        An iterable of :class:`~.token_data.Token` or
+        :class:`~.token_data.ContainerToken`.
+    :returns:
+        A list of lists of tokens
+
+    """
+    parts = []
+    this_part = []
+    for token in tokens:
+        if token.type == 'DELIM' and token.value == ',':
+            parts.append(this_part)
+            this_part = []
+        else:
+            this_part.append(token)
+    parts.append(this_part)
+    return parts
+
+
+def strip_whitespace(tokens):
+    """Remove whitespace at the beggining and end of a token list.
+
+    Whitespace tokens in-between other tokens in the list are preserved.
+
+    :param tokens:
+        A list of :class:`~.token_data.Token` or
+        :class:`~.token_data.ContainerToken`.
+    :return:
+        A new sub-sequence of the list.
+
+    """
+    for i, token in enumerate(tokens):
+        if token.type != 'S':
+            break
+    else:
+        return []  # only whitespace
+    tokens = tokens[i:]
+    while tokens and tokens[-1].type == 'S':
+        tokens.pop()
+    return tokens
+
+
+def remove_whitespace(tokens):
+    """Remove any top-level whitespace in a token list.
+
+    Whitespace tokens inside recursive :class:`~.token_data.ContainerToken`
+    are preserved.
+
+    :param tokens:
+        A list of :class:`~.token_data.Token` or
+        :class:`~.token_data.ContainerToken`.
+    :return:
+        A new sub-sequence of the list.
+
+    """
+    return [token for token in tokens if token.type != 'S']
+
+
+def validate_value(tokens):
+    """Validate a property value.
+
+    :param tokens:
+        an iterable of tokens
+    :raises:
+        :class:`ParseError` if there is any invalid token for the 'value'
+        production of the core grammar.
+
+    """
+    for token in tokens:
+        type_ = token.type
+        if type_ == '{':
+            validate_block(token.content, 'property value')
+        else:
+            validate_any(token, 'property value')
+
+def validate_block(tokens, context):
+    """
+    :raises:
+        :class:`ParseError` if there is any invalid token for the 'block'
+        production of the core grammar.
+    :param tokens: an iterable of tokens
+    :param context: a string for the 'unexpected in ...' message
+
+    """
+    for token in tokens:
+        type_ = token.type
+        if type_ == '{':
+            validate_block(token.content, context)
+        elif type_ not in (';', 'ATKEYWORD'):
+            validate_any(token, context)
+
+
+def validate_any(token, context):
+    """
+    :raises:
+        :class:`ParseError` if this is an invalid token for the
+        'any' production of the core grammar.
+    :param token: a single token
+    :param context: a string for the 'unexpected in ...' message
+
+    """
+    type_ = token.type
+    if type_ in ('FUNCTION', '(', '['):
+        for token in token.content:
+            validate_any(token, type_)
+    elif type_ not in ('S', 'IDENT', 'DIMENSION', 'PERCENTAGE', 'NUMBER',
+                       'INTEGER', 'URI', 'DELIM', 'STRING', 'HASH', ':',
+                       'UNICODE-RANGE'):
+        if type_ in ('}', ')', ']'):
+            adjective = 'unmatched'
+        else:
+            adjective = 'unexpected'
+        raise ParseError(token,
+            '{0} {1} token in {2}'.format(adjective, type_, context))
+
+
+class ParseError(ValueError):
+    """Details about a CSS syntax error. Usually indicates that something
+    (a rule or a declaration) was ignored and will not appear as a parsed
+    object.
+
+    This exception is typically logged in a list rather than being propagated
+    to the user API.
+
+    .. attribute:: line
+
+        Source line where the error occured.
+
+    .. attribute:: column
+
+        Column in the source line where the error occured.
+
+    .. attribute:: reason
+
+        What happend (a string).
+
+    """
+    def __init__(self, subject, reason):
+        self.line = subject.line
+        self.column = subject.column
+        self.reason = reason
+        super(ParseError, self).__init__(
+            'Parse error at {0.line}:{0.column}, {0.reason}'.format(self))
--- a/ebook_converter/tinycss/tests/init.py
+++ b/ebook_converter/tinycss/tests/init.py
@@ -0,0 +1,37 @@
+#!/usr/bin/env python2
+# vim:fileencoding=utf-8
+from __future__ import absolute_import, division, print_function, unicode_literals
+
+__license__ = 'GPL v3'
+__copyright__ = '2014, Kovid Goyal <kovid at kovidgoyal.net>'
+
+import unittest
+
+
+def jsonify(tokens):
+    """Turn tokens into "JSON-compatible" data structures."""
+    for token in tokens:
+        if token.type == 'FUNCTION':
+            yield (token.type, token.function_name,
+                   list(jsonify(token.content)))
+        elif token.is_container:
+            yield token.type, list(jsonify(token.content))
+        else:
+            yield token.type, token.value
+
+
+class BaseTest(unittest.TestCase):
+
+    longMessage = True
+    maxDiff = None
+    ae = unittest.TestCase.assertEqual
+
+    def assert_errors(self, errors, expected_errors):
+        """Test not complete error messages but only substrings."""
+        self.ae(len(errors), len(expected_errors))
+        for error, expected in zip(errors, expected_errors):
+            self.assertIn(expected, type(u'')(error))
+
+    def jsonify_declarations(self, rule):
+        return [(decl.name, list(jsonify(decl.value)))
+                for decl in rule.declarations]
--- a/ebook_converter/tinycss/tests/color3.py
+++ b/ebook_converter/tinycss/tests/color3.py
@@ -0,0 +1,198 @@
+#!/usr/bin/env python2
+# vim:fileencoding=utf-8
+from __future__ import absolute_import, division, print_function, unicode_literals
+
+__license__ = 'GPL v3'
+__copyright__ = '2014, Kovid Goyal <kovid at kovidgoyal.net>'
+
+from tinycss.color3 import parse_color_string, hsl_to_rgb
+from tinycss.tests import BaseTest
+
+
+class TestColor3(BaseTest):
+
+    def test_color_parsing(self):
+        for css_source, expected_result in [
+            ('', None),
+            (' /* hey */\n', None),
+            ('4', None),
+            ('top', None),
+            ('/**/transparent', (0, 0, 0, 0)),
+            ('transparent', (0, 0, 0, 0)),
+            (' transparent\n', (0, 0, 0, 0)),
+            ('TransParent', (0, 0, 0, 0)),
+            ('currentColor', 'currentColor'),
+            ('CURRENTcolor', 'currentColor'),
+            ('current_Color', None),
+
+            ('black', (0, 0, 0, 1)),
+            ('white', (1, 1, 1, 1)),
+            ('fuchsia', (1, 0, 1, 1)),
+            ('cyan', (0, 1, 1, 1)),
+            ('CyAn', (0, 1, 1, 1)),
+            ('darkkhaki', (189 / 255., 183 / 255., 107 / 255., 1)),
+
+            ('#', None),
+            ('#f', None),
+            ('#ff', None),
+            ('#fff', (1, 1, 1, 1)),
+            ('#ffg', None),
+            ('#ffff', None),
+            ('#fffff', None),
+            ('#ffffff', (1, 1, 1, 1)),
+            ('#fffffg', None),
+            ('#fffffff', None),
+            ('#ffffffff', None),
+            ('#fffffffff', None),
+
+            ('#cba987', (203 / 255., 169 / 255., 135 / 255., 1)),
+            ('#CbA987', (203 / 255., 169 / 255., 135 / 255., 1)),
+            ('#1122aA', (17 / 255., 34 / 255., 170 / 255., 1)),
+            ('#12a', (17 / 255., 34 / 255., 170 / 255., 1)),
+
+            ('rgb(203, 169, 135)', (203 / 255., 169 / 255., 135 / 255., 1)),
+            ('RGB(255, 255, 255)', (1, 1, 1, 1)),
+            ('rgB(0, 0, 0)', (0, 0, 0, 1)),
+            ('rgB(0, 51, 255)', (0, .2, 1, 1)),
+            ('rgb(0,51,255)', (0, .2, 1, 1)),
+            ('rgb(0\t,  51 ,255)', (0, .2, 1, 1)),
+            ('rgb(/* R */0, /* G */51, /* B */255)', (0, .2, 1, 1)),
+            ('rgb(-51, 306, 0)', (-.2, 1.2, 0, 1)),  # out of 0..1 is allowed
+
+            ('rgb(42%, 3%, 50%)', (.42, .03, .5, 1)),
+            ('RGB(100%, 100%, 100%)', (1, 1, 1, 1)),
+            ('rgB(0%, 0%, 0%)', (0, 0, 0, 1)),
+            ('rgB(10%, 20%, 30%)', (.1, .2, .3, 1)),
+            ('rgb(10%,20%,30%)', (.1, .2, .3, 1)),
+            ('rgb(10%\t,  20% ,30%)', (.1, .2, .3, 1)),
+            ('rgb(/* R */10%, /* G */20%, /* B */30%)', (.1, .2, .3, 1)),
+            ('rgb(-12%, 110%, 1400%)', (-.12, 1.1, 14, 1)),  # out of 0..1 is allowed
+
+            ('rgb(10%, 50%, 0)', None),
+            ('rgb(255, 50%, 0%)', None),
+            ('rgb(0, 0 0)', None),
+            ('rgb(0, 0, 0deg)', None),
+            ('rgb(0, 0, light)', None),
+            ('rgb()', None),
+            ('rgb(0)', None),
+            ('rgb(0, 0)', None),
+            ('rgb(0, 0, 0, 0)', None),
+            ('rgb(0%)', None),
+            ('rgb(0%, 0%)', None),
+            ('rgb(0%, 0%, 0%, 0%)', None),
+            ('rgb(0%, 0%, 0%, 0)', None),
+
+            ('rgba(0, 0, 0, 0)', (0, 0, 0, 0)),
+            ('rgba(203, 169, 135, 0.3)', (203 / 255., 169 / 255., 135 / 255., 0.3)),
+            ('RGBA(255, 255, 255, 0)', (1, 1, 1, 0)),
+            ('rgBA(0, 51, 255, 1)', (0, 0.2, 1, 1)),
+            ('rgba(0, 51, 255, 1.1)', (0, 0.2, 1, 1)),
+            ('rgba(0, 51, 255, 37)', (0, 0.2, 1, 1)),
+            ('rgba(0, 51, 255, 0.42)', (0, 0.2, 1, 0.42)),
+            ('rgba(0, 51, 255, 0)', (0, 0.2, 1, 0)),
+            ('rgba(0, 51, 255, -0.1)', (0, 0.2, 1, 0)),
+            ('rgba(0, 51, 255, -139)', (0, 0.2, 1, 0)),
+
+            ('rgba(42%, 3%, 50%, 0.3)', (.42, .03, .5, 0.3)),
+            ('RGBA(100%, 100%, 100%, 0)', (1, 1, 1, 0)),
+            ('rgBA(0%, 20%, 100%, 1)', (0, 0.2, 1, 1)),
+            ('rgba(0%, 20%, 100%, 1.1)', (0, 0.2, 1, 1)),
+            ('rgba(0%, 20%, 100%, 37)', (0, 0.2, 1, 1)),
+            ('rgba(0%, 20%, 100%, 0.42)', (0, 0.2, 1, 0.42)),
+            ('rgba(0%, 20%, 100%, 0)', (0, 0.2, 1, 0)),
+            ('rgba(0%, 20%, 100%, -0.1)', (0, 0.2, 1, 0)),
+            ('rgba(0%, 20%, 100%, -139)', (0, 0.2, 1, 0)),
+
+            ('rgba(255, 255, 255, 0%)', None),
+            ('rgba(10%, 50%, 0, 1)', None),
+            ('rgba(255, 50%, 0%, 1)', None),
+            ('rgba(0, 0, 0 0)', None),
+            ('rgba(0, 0, 0, 0deg)', None),
+            ('rgba(0, 0, 0, light)', None),
+            ('rgba()', None),
+            ('rgba(0)', None),
+            ('rgba(0, 0, 0)', None),
+            ('rgba(0, 0, 0, 0, 0)', None),
+            ('rgba(0%)', None),
+            ('rgba(0%, 0%)', None),
+            ('rgba(0%, 0%, 0%)', None),
+            ('rgba(0%, 0%, 0%, 0%)', None),
+            ('rgba(0%, 0%, 0%, 0%, 0%)', None),
+
+            ('HSL(0, 0%, 0%)', (0, 0, 0, 1)),
+            ('hsL(0, 100%, 50%)', (1, 0, 0, 1)),
+            ('hsl(60, 100%, 37.5%)', (0.75, 0.75, 0, 1)),
+            ('hsl(780, 100%, 37.5%)', (0.75, 0.75, 0, 1)),
+            ('hsl(-300, 100%, 37.5%)', (0.75, 0.75, 0, 1)),
+            ('hsl(300, 50%, 50%)', (0.75, 0.25, 0.75, 1)),
+
+            ('hsl(10, 50%, 0)', None),
+            ('hsl(50%, 50%, 0%)', None),
+            ('hsl(0, 0% 0%)', None),
+            ('hsl(30deg, 100%, 100%)', None),
+            ('hsl(0, 0%, light)', None),
+            ('hsl()', None),
+            ('hsl(0)', None),
+            ('hsl(0, 0%)', None),
+            ('hsl(0, 0%, 0%, 0%)', None),
+
+            ('HSLA(-300, 100%, 37.5%, 1)', (0.75, 0.75, 0, 1)),
+            ('hsLA(-300, 100%, 37.5%, 12)', (0.75, 0.75, 0, 1)),
+            ('hsla(-300, 100%, 37.5%, 0.2)', (0.75, 0.75, 0, .2)),
+            ('hsla(-300, 100%, 37.5%, 0)', (0.75, 0.75, 0, 0)),
+            ('hsla(-300, 100%, 37.5%, -3)', (0.75, 0.75, 0, 0)),
+
+            ('hsla(10, 50%, 0, 1)', None),
+            ('hsla(50%, 50%, 0%, 1)', None),
+            ('hsla(0, 0% 0%, 1)', None),
+            ('hsla(30deg, 100%, 100%, 1)', None),
+            ('hsla(0, 0%, light, 1)', None),
+            ('hsla()', None),
+            ('hsla(0)', None),
+            ('hsla(0, 0%)', None),
+            ('hsla(0, 0%, 0%, 50%)', None),
+            ('hsla(0, 0%, 0%, 1, 0%)', None),
+
+            ('cmyk(0, 0, 0, 0)', None),
+        ]:
+            result = parse_color_string(css_source)
+            if isinstance(result, tuple):
+                for got, expected in zip(result, expected_result):
+                    # Compensate for floating point errors:
+                    self.assertLess(abs(got - expected), 1e-10)
+                for i, attr in enumerate(['red', 'green', 'blue', 'alpha']):
+                    self.ae(getattr(result, attr), result[i])
+            else:
+                self.ae(result, expected_result)
+
+    def test_hsl(self):
+        for hsl, expected_rgb in [
+            # http://en.wikipedia.org/wiki/HSL_and_HSV#Examples
+            ((0,     0,    100), (1,     1,     1)),
+            ((127,   0,    100), (1,     1,     1)),
+            ((0,     0,    50), (0.5,   0.5,   0.5)),
+            ((127,   0,    50), (0.5,   0.5,   0.5)),
+            ((0,     0,    0), (0,     0,     0)),
+            ((127,   0,    0), (0,     0,     0)),
+            ((0,     100,  50), (1,     0,     0)),
+            ((60,    100,  37.5), (0.75,  0.75,  0)),
+            ((780,   100,  37.5), (0.75,  0.75,  0)),
+            ((-300,  100,  37.5), (0.75,  0.75,  0)),
+            ((120,   100,  25), (0,     0.5,   0)),
+            ((180,   100,  75), (0.5,   1,     1)),
+            ((240,   100,  75), (0.5,   0.5,   1)),
+            ((300,   50,   50), (0.75,  0.25,  0.75)),
+            ((61.8,  63.8, 39.3), (0.628, 0.643, 0.142)),
+            ((251.1, 83.2, 51.1), (0.255, 0.104, 0.918)),
+            ((134.9, 70.7, 39.6), (0.116, 0.675, 0.255)),
+            ((49.5,  89.3, 49.7), (0.941, 0.785, 0.053)),
+            ((283.7, 77.5, 54.2), (0.704, 0.187, 0.897)),
+            ((14.3,  81.7, 62.4), (0.931, 0.463, 0.316)),
+            ((56.9,  99.1, 76.5), (0.998, 0.974, 0.532)),
+            ((162.4, 77.9, 44.7), (0.099, 0.795, 0.591)),
+            ((248.3, 60.1, 37.3), (0.211, 0.149, 0.597)),
+            ((240.5, 29,   60.7), (0.495, 0.493, 0.721)),
+        ]:
+            for got, expected in zip(hsl_to_rgb(*hsl), expected_rgb):
+                # Compensate for floating point errors and Wikipedia’s rounding:
+                self.assertLess(abs(got - expected), 0.001)
--- a/ebook_converter/tinycss/tests/css21.py
+++ b/ebook_converter/tinycss/tests/css21.py
@@ -0,0 +1,336 @@
+#!/usr/bin/env python2
+# vim:fileencoding=utf-8
+from __future__ import absolute_import, division, print_function, unicode_literals
+
+__license__ = 'GPL v3'
+__copyright__ = '2014, Kovid Goyal <kovid at kovidgoyal.net>'
+
+import io
+import os
+import tempfile
+
+from tinycss.css21 import CSS21Parser
+from tinycss.tests.tokenizing import jsonify
+from tinycss.tests import BaseTest
+
+class CoreParser(CSS21Parser):
+    """A parser that always accepts unparsed at-rules."""
+    def parse_at_rule(self, rule, stylesheet_rules, errors, context):
+        return rule
+
+def parse_bytes(css_bytes, kwargs):
+    return CSS21Parser().parse_stylesheet_bytes(css_bytes, **kwargs)
+
+
+def parse_bytesio_file(css_bytes, kwargs):
+    css_file = io.BytesIO(css_bytes)
+    return CSS21Parser().parse_stylesheet_file(css_file, **kwargs)
+
+
+def parse_filename(css_bytes, kwargs):
+    css_file = tempfile.NamedTemporaryFile(delete=False)
+    try:
+        css_file.write(css_bytes)
+        # Windows can not open the filename a second time while
+        # it is still open for writing.
+        css_file.close()
+        return CSS21Parser().parse_stylesheet_file(css_file.name, **kwargs)
+    finally:
+        os.remove(css_file.name)
+
+
+class TestCSS21(BaseTest):
+
+    def test_bytes(self):
+        for (css_bytes, kwargs, expected_result, parse) in [
+            params + (parse,)
+            for parse in [parse_bytes, parse_bytesio_file, parse_filename]
+            for params in [
+                ('@import "é";'.encode('utf8'), {}, 'é'),
+                ('@import "é";'.encode('utf16'), {}, 'é'),  # with a BOM
+                ('@import "é";'.encode('latin1'), {}, 'é'),
+                ('@import "£";'.encode('Shift-JIS'), {}, '\x81\x92'),  # latin1 mojibake
+                ('@charset "Shift-JIS";@import "£";'.encode('Shift-JIS'), {}, '£'),
+                (' @charset "Shift-JIS";@import "£";'.encode('Shift-JIS'), {},
+                    '\x81\x92'),
+                ('@import "£";'.encode('Shift-JIS'),
+                    {'document_encoding': 'Shift-JIS'}, '£'),
+                ('@import "£";'.encode('Shift-JIS'),
+                    {'document_encoding': 'utf8'}, '\x81\x92'),
+                ('@charset "utf8"; @import "£";'.encode('utf8'),
+                    {'document_encoding': 'latin1'}, '£'),
+                # Mojibake yay!
+                (' @charset "utf8"; @import "é";'.encode('utf8'),
+                    {'document_encoding': 'latin1'}, 'Ã©'),
+                ('@import "é";'.encode('utf8'), {'document_encoding': 'latin1'}, 'Ã©'),
+            ]
+        ]:
+            stylesheet = parse(css_bytes, kwargs)
+            self.ae(stylesheet.rules[0].at_keyword, '@import')
+            self.ae(stylesheet.rules[0].uri, expected_result)
+
+    def test_at_rules(self):
+        for (css_source, expected_rules, expected_errors) in [
+                (' /* hey */\n', 0, []),
+                ('foo {}', 1, []),
+                ('foo{} @lipsum{} bar{}', 2,
+                 ['unknown at-rule in stylesheet context: @lipsum']),
+                ('@charset "ascii"; foo {}', 1, []),
+                (' @charset "ascii"; foo {}', 1, ['mis-placed or malformed @charset rule']),
+                ('@charset ascii; foo {}', 1, ['mis-placed or malformed @charset rule']),
+                ('foo {} @charset "ascii";', 1, ['mis-placed or malformed @charset rule']),
+        ]:
+            # Pass 'encoding' to allow @charset
+            stylesheet = CSS21Parser().parse_stylesheet(css_source, encoding='utf8')
+            self.assert_errors(stylesheet.errors, expected_errors)
+            self.ae(len(stylesheet.rules), expected_rules)
+
+    def test_core_parser(self):
+        for (css_source, expected_rules, expected_errors) in [
+                (' /* hey */\n', [], []),
+
+                ('foo{} /* hey */\n@bar;@baz{}',
+                 [('foo', []), ('@bar', [], None), ('@baz', [], [])], []),
+
+                ('@import "foo.css"/**/;', [
+                    ('@import', [('STRING', 'foo.css')], None)], []),
+
+                ('@import "foo.css"/**/', [
+                    ('@import', [('STRING', 'foo.css')], None)], []),
+
+                ('@import "foo.css', [
+                    ('@import', [('STRING', 'foo.css')], None)], []),
+
+                ('{}', [], ['empty selector']),
+
+                ('a{b:4}', [('a', [('b', [('INTEGER', 4)])])], []),
+
+                ('@page {\t b: 4; @margin}', [('@page', [], [
+                    ('S', '\t '), ('IDENT', 'b'), (':', ':'), ('S', ' '), ('INTEGER', 4),
+                    (';', ';'), ('S', ' '), ('ATKEYWORD', '@margin'),
+                ])], []),
+
+                ('foo', [], ['no declaration block found']),
+
+                ('foo @page {} bar {}', [('bar', [])],
+                 ['unexpected ATKEYWORD token in selector']),
+
+                ('foo { content: "unclosed string;\n color:red; ; margin/**/\n: 2cm; }',
+                 [('foo', [('margin', [('DIMENSION', 2)])])],
+                 ['unexpected BAD_STRING token in property value']),
+
+                ('foo { 4px; bar: 12% }',
+                 [('foo', [('bar', [('PERCENTAGE', 12)])])],
+                 ['expected a property name, got DIMENSION']),
+
+                ('foo { bar! 3cm auto ; baz: 7px }',
+                 [('foo', [('baz', [('DIMENSION', 7)])])],
+                 ["expected ':', got DELIM"]),
+
+                ('foo { bar ; baz: {("}"/* comment */) {0@fizz}} }',
+                 [('foo', [('baz', [('{', [
+                     ('(', [('STRING', '}')]), ('S', ' '),
+                     ('{', [('INTEGER', 0), ('ATKEYWORD', '@fizz')])
+                 ])])])],
+                 ["expected ':'"]),
+
+                ('foo { bar: ; baz: not(z) }',
+                 [('foo', [('baz', [('FUNCTION', 'not', [('IDENT', 'z')])])])],
+                 ['expected a property value']),
+
+                ('foo { bar: (]) ; baz: U+20 }',
+                [('foo', [('baz', [('UNICODE-RANGE', 'U+20')])])],
+                ['unmatched ] token in (']),
+        ]:
+            stylesheet = CoreParser().parse_stylesheet(css_source)
+            self.assert_errors(stylesheet.errors, expected_errors)
+            result = [
+                (rule.at_keyword, list(jsonify(rule.head)),
+                    list(jsonify(rule.body))
+                    if rule.body is not None else None)
+                if rule.at_keyword else
+                (rule.selector.as_css(), [
+                    (decl.name, list(jsonify(decl.value)))
+                    for decl in rule.declarations])
+                for rule in stylesheet.rules
+            ]
+            self.ae(result, expected_rules)
+
+    def test_parse_style_attr(self):
+        for (css_source, expected_declarations, expected_errors) in [
+            (' /* hey */\n', [], []),
+
+            ('b:4', [('b', [('INTEGER', 4)])], []),
+
+            ('{b:4}', [], ['expected a property name, got {']),
+
+            ('b:4} c:3', [], ['unmatched } token in property value']),
+
+            (' 4px; bar: 12% ',
+                [('bar', [('PERCENTAGE', 12)])],
+                ['expected a property name, got DIMENSION']),
+
+            ('bar! 3cm auto ; baz: 7px',
+                [('baz', [('DIMENSION', 7)])],
+                ["expected ':', got DELIM"]),
+
+            ('foo; bar ; baz: {("}"/* comment */) {0@fizz}}',
+                [('baz', [('{', [
+                    ('(', [('STRING', '}')]), ('S', ' '),
+                    ('{', [('INTEGER', 0), ('ATKEYWORD', '@fizz')])
+                ])])],
+                ["expected ':'", "expected ':'"]),
+
+            ('bar: ; baz: not(z)',
+                [('baz', [('FUNCTION', 'not', [('IDENT', 'z')])])],
+                ['expected a property value']),
+
+            ('bar: (]) ; baz: U+20',
+                [('baz', [('UNICODE-RANGE', 'U+20')])],
+                ['unmatched ] token in (']),
+        ]:
+            declarations, errors = CSS21Parser().parse_style_attr(css_source)
+            self.assert_errors(errors, expected_errors)
+            result = [(decl.name, list(jsonify(decl.value)))
+                    for decl in declarations]
+            self.ae(result, expected_declarations)
+
+    def test_important(self):
+        for (css_source, expected_declarations, expected_errors) in [
+            (' /* hey */\n', [], []),
+
+            ('a:1; b:2',
+                [('a', [('INTEGER', 1)], None), ('b', [('INTEGER', 2)], None)], []),
+
+            ('a:1 important; b: important',
+                [('a', [('INTEGER', 1), ('S', ' '), ('IDENT', 'important')], None),
+                    ('b', [('IDENT', 'important')], None)],
+                []),
+
+            ('a:1 !important; b:2',
+                [('a', [('INTEGER', 1)], 'important'), ('b', [('INTEGER', 2)], None)],
+                []),
+
+            ('a:1!\t Im\\50 O\\RTant; b:2',
+                [('a', [('INTEGER', 1)], 'important'), ('b', [('INTEGER', 2)], None)],
+                []),
+
+            ('a: !important; b:2',
+                [('b', [('INTEGER', 2)], None)],
+                ['expected a value before !important']),
+
+        ]:
+            declarations, errors = CSS21Parser().parse_style_attr(css_source)
+            self.assert_errors(errors, expected_errors)
+            result = [(decl.name, list(jsonify(decl.value)), decl.priority)
+                    for decl in declarations]
+            self.ae(result, expected_declarations)
+
+    def test_at_import(self):
+        for (css_source, expected_rules, expected_errors) in [
+            (' /* hey */\n', [], []),
+            ('@import "foo.css";', [('foo.css', ['all'])], []),
+            ('@import url(foo.css);', [('foo.css', ['all'])], []),
+            ('@import "foo.css" screen, print;',
+                [('foo.css', ['screen', 'print'])], []),
+            ('@charset "ascii"; @import "foo.css"; @import "bar.css";',
+                [('foo.css', ['all']), ('bar.css', ['all'])], []),
+            ('foo {} @import "foo.css";',
+                [], ['@import rule not allowed after a ruleset']),
+            ('@page {} @import "foo.css";',
+                [], ['@import rule not allowed after an @page rule']),
+            ('@import ;',
+                [], ['expected URI or STRING for @import rule']),
+            ('@import foo.css;',
+                [], ['expected URI or STRING for @import rule, got IDENT']),
+            ('@import "foo.css" {}',
+                [], ["expected ';', got a block"]),
+        ]:
+            # Pass 'encoding' to allow @charset
+            stylesheet = CSS21Parser().parse_stylesheet(css_source, encoding='utf8')
+            self.assert_errors(stylesheet.errors, expected_errors)
+
+            result = [
+                (rule.uri, rule.media)
+                for rule in stylesheet.rules
+                if rule.at_keyword == '@import'
+            ]
+            self.ae(result, expected_rules)
+
+    def test_at_page(self):
+        for (css, expected_result, expected_errors) in [
+            ('@page {}', (None, (0, 0), []), []),
+            ('@page:first {}', ('first', (1, 0), []), []),
+            ('@page :left{}', ('left', (0, 1), []), []),
+            ('@page\t\n:right {}', ('right', (0, 1), []), []),
+            ('@page :last {}', None, ['invalid @page selector']),
+            ('@page : right {}', None, ['invalid @page selector']),
+            ('@page table:left {}', None, ['invalid @page selector']),
+
+            ('@page;', None, ['invalid @page rule: missing block']),
+            ('@page { a:1; ; b: 2 }',
+                (None, (0, 0), [('a', [('INTEGER', 1)]), ('b', [('INTEGER', 2)])]),
+                []),
+            ('@page { a:1; c: ; b: 2 }',
+                (None, (0, 0), [('a', [('INTEGER', 1)]), ('b', [('INTEGER', 2)])]),
+                ['expected a property value']),
+            ('@page { a:1; @top-left {} b: 2 }',
+                (None, (0, 0), [('a', [('INTEGER', 1)]), ('b', [('INTEGER', 2)])]),
+                ['unknown at-rule in @page context: @top-left']),
+            ('@page { a:1; @top-left {}; b: 2 }',
+                (None, (0, 0), [('a', [('INTEGER', 1)]), ('b', [('INTEGER', 2)])]),
+                ['unknown at-rule in @page context: @top-left']),
+        ]:
+            stylesheet = CSS21Parser().parse_stylesheet(css)
+            self.assert_errors(stylesheet.errors, expected_errors)
+
+            if expected_result is None:
+                self.assertFalse(stylesheet.rules)
+            else:
+                self.ae(len(stylesheet.rules), 1)
+                rule = stylesheet.rules[0]
+                self.ae(rule.at_keyword, '@page')
+                self.ae(rule.at_rules, [])  # in CSS 2.1
+                result = (
+                    rule.selector,
+                    rule.specificity,
+                    [(decl.name, list(jsonify(decl.value)))
+                        for decl in rule.declarations],
+                )
+                self.ae(result, expected_result)
+
+    def test_at_media(self):
+        for (css_source, expected_rules, expected_errors) in [
+            (' /* hey */\n', [], []),
+            ('@media  {}', [(['all'], [])], []),
+            ('@media all {}', [(['all'], [])], []),
+            ('@media screen, print {}', [(['screen', 'print'], [])], []),
+            ('@media all;', [], ['invalid @media rule: missing block']),
+            ('@media 4 {}', [], ['expected a media type, got INTEGER']),
+            ('@media , screen {}', [], ['expected a media type']),
+            ('@media screen, {}', [], ['expected a media type']),
+            ('@media screen print {}', [],
+                ['expected a media type, got IDENT, IDENT']),
+
+            ('@media all { @page { a: 1 } @media; @import; foo { a: 1 } }',
+                [(['all'], [('foo', [('a', [('INTEGER', 1)])])])],
+                ['@page rule not allowed in @media',
+                '@media rule not allowed in @media',
+                '@import rule not allowed in @media']),
+
+        ]:
+            stylesheet = CSS21Parser().parse_stylesheet(css_source)
+            self.assert_errors(stylesheet.errors, expected_errors)
+
+            for rule in stylesheet.rules:
+                self.ae(rule.at_keyword, '@media')
+            result = [
+                (rule.media, [
+                    (sub_rule.selector.as_css(), [
+                        (decl.name, list(jsonify(decl.value)))
+                        for decl in sub_rule.declarations])
+                    for sub_rule in rule.rules
+                ])
+                for rule in stylesheet.rules
+            ]
+            self.ae(result, expected_rules)
--- a/ebook_converter/tinycss/tests/decoding.py
+++ b/ebook_converter/tinycss/tests/decoding.py
@@ -0,0 +1,71 @@
+#!/usr/bin/env python2
+# vim:fileencoding=utf-8
+from __future__ import absolute_import, division, print_function, unicode_literals
+
+__license__ = 'GPL v3'
+__copyright__ = '2014, Kovid Goyal <kovid at kovidgoyal.net>'
+
+from tinycss.decoding import decode
+from tinycss.tests import BaseTest
+
+def params(css, encoding, use_bom=False, expect_error=False, **kwargs):
+    """Nicer syntax to make a tuple."""
+    return css, encoding, use_bom, expect_error, kwargs
+
+class TestDecoding(BaseTest):
+
+    def test_decoding(self):
+        for (css, encoding, use_bom, expect_error, kwargs) in [
+            params('', 'utf8'),  # default to utf8
+            params('𐂃', 'utf8'),
+            params('é', 'latin1'),  # utf8 fails, fall back on ShiftJIS
+            params('£', 'ShiftJIS', expect_error=True),
+            params('£', 'ShiftJIS', protocol_encoding='Shift-JIS'),
+            params('£', 'ShiftJIS', linking_encoding='Shift-JIS'),
+            params('£', 'ShiftJIS', document_encoding='Shift-JIS'),
+            params('£', 'ShiftJIS', protocol_encoding='utf8',
+                                document_encoding='ShiftJIS'),
+            params('@charset "utf8"; £', 'ShiftJIS', expect_error=True),
+            params('@charset "utf£8"; £', 'ShiftJIS', expect_error=True),
+            params('@charset "unknown-encoding"; £', 'ShiftJIS', expect_error=True),
+            params('@charset "utf8"; £', 'ShiftJIS', document_encoding='ShiftJIS'),
+            params('£', 'ShiftJIS', linking_encoding='utf8',
+                                document_encoding='ShiftJIS'),
+            params('@charset "utf-32"; 𐂃', 'utf-32-be'),
+            params('@charset "Shift-JIS"; £', 'ShiftJIS'),
+            params('@charset "ISO-8859-8"; £', 'ShiftJIS', expect_error=True),
+            params('𐂃', 'utf-16-le', expect_error=True),  # no BOM
+            params('𐂃', 'utf-16-le', use_bom=True),
+            params('𐂃', 'utf-32-be', expect_error=True),
+            params('𐂃', 'utf-32-be', use_bom=True),
+            params('𐂃', 'utf-32-be', document_encoding='utf-32-be'),
+            params('𐂃', 'utf-32-be', linking_encoding='utf-32-be'),
+            params('@charset "utf-32-le"; 𐂃', 'utf-32-be',
+                use_bom=True, expect_error=True),
+            # protocol_encoding takes precedence over @charset
+            params('@charset "ISO-8859-8"; £', 'ShiftJIS',
+                protocol_encoding='Shift-JIS'),
+            params('@charset "unknown-encoding"; £', 'ShiftJIS',
+                protocol_encoding='Shift-JIS'),
+            params('@charset "Shift-JIS"; £', 'ShiftJIS',
+                protocol_encoding='utf8'),
+            # @charset takes precedence over document_encoding
+            params('@charset "Shift-JIS"; £', 'ShiftJIS',
+                document_encoding='ISO-8859-8'),
+            # @charset takes precedence over linking_encoding
+            params('@charset "Shift-JIS"; £', 'ShiftJIS',
+                linking_encoding='ISO-8859-8'),
+            # linking_encoding takes precedence over document_encoding
+            params('£', 'ShiftJIS',
+                linking_encoding='Shift-JIS', document_encoding='ISO-8859-8'),
+        ]:
+            if use_bom:
+                source = '\ufeff' + css
+            else:
+                source = css
+            css_bytes = source.encode(encoding)
+            result, result_encoding = decode(css_bytes, **kwargs)
+            if expect_error:
+                self.assertNotEqual(result, css)
+            else:
+                self.ae(result, css)
--- a/ebook_converter/tinycss/tests/fonts3.py
+++ b/ebook_converter/tinycss/tests/fonts3.py
@@ -0,0 +1,66 @@
+#!/usr/bin/env python2
+# vim:fileencoding=utf-8
+from __future__ import absolute_import, division, print_function, unicode_literals
+
+__license__ = 'GPL v3'
+__copyright__ = '2014, Kovid Goyal <kovid at kovidgoyal.net>'
+
+from tinycss.fonts3 import CSSFonts3Parser, parse_font_family, parse_font, serialize_font
+from tinycss.tests import BaseTest
+
+from polyglot.builtins import iteritems
+
+
+class TestFonts3(BaseTest):
+
+    def test_font_face(self):
+        'Test parsing of font face rules'
+        for css, expected_declarations, expected_errors in [
+                ('@font-face {}', [], []),
+
+                ('@font-face { font-family: Moose; src: url(font1.ttf) }',
+                 [('font-family', [('IDENT', 'Moose')]), ('src', [('URI', 'font1.ttf')])], []),
+        ]:
+            stylesheet = CSSFonts3Parser().parse_stylesheet(css)
+            self.assert_errors(stylesheet.errors, expected_errors)
+            self.ae(len(stylesheet.rules), 1)
+            rule = stylesheet.rules[0]
+            self.ae(self.jsonify_declarations(rule), expected_declarations)
+
+        stylesheet = CSSFonts3Parser().parse_stylesheet('@font-face;')
+        self.assert_errors(stylesheet.errors, ['missing block'])
+
+    def test_parse_font_family(self):
+        ' Test parsing of font-family values '
+        for raw, q in iteritems({
+                '"1as"': ['1as'],
+                'A B C, serif': ['A B C', 'serif'],
+                r'Red\/Black': ['Red/Black'],
+                'A  B': ['A B'],
+                r'Ahem\!': ['Ahem!'],
+                r'"Ahem!"': ['Ahem!'],
+                '€42': ['€42'],
+                r'Hawaii\ 5-0': ['Hawaii 5-0'],
+                r'"X \"Y"': ['X "Y'],
+                'A B, C D, "E", serif': ['A B', 'C D', 'E', 'serif'],
+                '': [],
+                '"", a': ['a'],
+        }):
+            self.ae(q, parse_font_family(raw))
+        for single in ('serif', 'sans-serif', 'A B C'):
+            self.ae([single], parse_font_family(single))
+
+    def test_parse_font(self):
+        def t(raw, **kw):
+            q = {('line' if k == 'height' else 'font') + '-' + k:v for k, v in iteritems(kw)}
+            self.ae(q, parse_font(raw))
+            self.ae(q, parse_font(serialize_font(q)))
+        t('caption', family=['sans-serif'])
+        t('serif', family=['serif'])
+        t('12pt/14pt sans-serif', size='12pt', height='14pt', family=['sans-serif'])
+        t('80% sans-serif', size='80%', family=['sans-serif'])
+        t('x-large/110% "new century schoolbook", serif', size='x-large', height='110%', family=['new century schoolbook', 'serif'])
+        t('bold italic large Palatino, serif', weight='bold', style='italic', size='large', family=['Palatino', 'serif'])
+        t('normal small-caps 120%/120% fantasy', style='normal', variant='small-caps', size='120%', height='120%', family=['fantasy'])
+        t('condensed oblique 12pt Helvetica Neue, serif', stretch='condensed', style='oblique', size='12pt', family=['Helvetica Neue', 'serif'])
+        t('300 italic 1.3em/1.7em FB Armada, sans-serif', weight='300', style='italic', size='1.3em', height='1.7em', family=['FB Armada', 'sans-serif'])
--- a/ebook_converter/tinycss/tests/main.py
+++ b/ebook_converter/tinycss/tests/main.py
@@ -0,0 +1,56 @@
+#!/usr/bin/env python2
+# vim:fileencoding=utf-8
+from __future__ import absolute_import, division, print_function, unicode_literals
+
+__license__ = 'GPL v3'
+__copyright__ = '2014, Kovid Goyal <kovid at kovidgoyal.net>'
+
+import unittest, os, argparse
+
+def find_tests():
+    from calibre.utils.run_tests import find_tests_in_dir
+    base = os.path.dirname(os.path.abspath(__file__))
+    return find_tests_in_dir(base)
+
+def run_tests(find_tests=find_tests, for_build=False):
+    if not for_build:
+        parser = argparse.ArgumentParser()
+        parser.add_argument('name', nargs='?', default=None,
+                            help='The name of the test to run')
+        args = parser.parse_args()
+    if not for_build and args.name and args.name.startswith('.'):
+        tests = find_tests()
+        q = args.name[1:]
+        if not q.startswith('test_'):
+            q = 'test_' + q
+        ans = None
+        try:
+            for suite in tests:
+                for test in suite._tests:
+                    if test.__class__.__name__ == 'ModuleImportFailure':
+                        raise Exception('Failed to import a test module: %s' % test)
+                    for s in test:
+                        if s._testMethodName == q:
+                            ans = s
+                            raise StopIteration()
+        except StopIteration:
+            pass
+        if ans is None:
+            print ('No test named %s found' % args.name)
+            raise SystemExit(1)
+        tests = ans
+    else:
+        tests = unittest.defaultTestLoader.loadTestsFromName(args.name) if not for_build and args.name else find_tests()
+    r = unittest.TextTestRunner
+    if for_build:
+        r = r(verbosity=0, buffer=True, failfast=True)
+    else:
+        r = r(verbosity=4)
+    result = r.run(tests)
+    if for_build and result.errors or result.failures:
+        raise SystemExit(1)
+
+if __name__ == '__main__':
+    run_tests()
+
+
--- a/ebook_converter/tinycss/tests/media3.py
+++ b/ebook_converter/tinycss/tests/media3.py
@@ -0,0 +1,65 @@
+#!/usr/bin/env python2
+# vim:fileencoding=utf-8
+from __future__ import absolute_import, division, print_function, unicode_literals
+
+__license__ = 'GPL v3'
+__copyright__ = '2014, Kovid Goyal <kovid at kovidgoyal.net>'
+
+from tinycss.media3 import CSSMedia3Parser, MediaQuery as MQ
+from tinycss.tests import BaseTest, jsonify
+
+def jsonify_expr(e):
+    if e is None:
+        return None
+    return next(jsonify([e]))
+
+def jsonify_expressions(mqlist):
+    for mq in mqlist:
+        mq.expressions = tuple(
+            (k, jsonify_expr(e)) for k, e in mq.expressions)
+    return mqlist
+
+class TestFonts3(BaseTest):
+
+    def test_media_queries(self):
+        'Test parsing of media queries from the CSS 3 media module'
+        for css, media_query_list, expected_errors in [
+                # CSS 2.1 (simple media queries)
+                ('@media {}', [MQ()], []),
+                ('@media all {}', [MQ()], []),
+                ('@media screen {}', [MQ('screen')], []),
+                ('@media , screen {}', [MQ(), MQ('screen')], []),
+                ('@media screen, {}', [MQ('screen'), MQ()], []),
+
+                # Examples from the CSS 3 specs
+                ('@media screen and (color) {}', [MQ('screen', (('color', None),))], []),
+                ('@media all and (min-width:500px) {}', [
+                    MQ('all', (('min-width', ('DIMENSION', 500)),))], []),
+                ('@media (min-width:500px) {}', [
+                    MQ('all', (('min-width', ('DIMENSION', 500)),))], []),
+                ('@media (orientation: portrait) {}', [
+                    MQ('all', (('orientation', ('IDENT', 'portrait')),))], []),
+                ('@media screen and (color), projection and (color) {}', [
+                    MQ('screen', (('color', None),)), MQ('projection', (('color', None),)),], []),
+                ('@media not screen and (color) {}', [
+                    MQ('screen', (('color', None),), True)], []),
+                ('@media only screen and (color) {}', [
+                    MQ('screen', (('color', None),))], []),
+                ('@media aural and (device-aspect-ratio: 16/9) {}', [
+                    MQ('aural', (('device-aspect-ratio', ('RATIO', (16, 9))),))], []),
+                ('@media (resolution: 166dpi) {}', [
+                    MQ('all', (('resolution', ('DIMENSION', 166)),))], []),
+                ('@media (min-resolution: 166DPCM) {}', [
+                    MQ('all', (('min-resolution', ('DIMENSION', 166)),))], []),
+
+                # Malformed media queries
+                ('@media (example, all,), speech {}', [MQ(negated=True), MQ('speech')], ['expected a :']),
+                ('@media &test, screen {}', [MQ(negated=True), MQ('screen')], ['expected a media expression not a DELIM']),
+
+        ]:
+            stylesheet = CSSMedia3Parser().parse_stylesheet(css)
+            self.assert_errors(stylesheet.errors, expected_errors)
+            self.ae(len(stylesheet.rules), 1)
+            rule = stylesheet.rules[0]
+            self.ae(jsonify_expressions(rule.media), media_query_list)
+
--- a/ebook_converter/tinycss/tests/page3.py
+++ b/ebook_converter/tinycss/tests/page3.py
@@ -0,0 +1,86 @@
+#!/usr/bin/env python2
+# vim:fileencoding=utf-8
+from __future__ import absolute_import, division, print_function, unicode_literals
+
+__license__ = 'GPL v3'
+__copyright__ = '2014, Kovid Goyal <kovid at kovidgoyal.net>'
+
+from tinycss.page3 import CSSPage3Parser
+from tinycss.tests import BaseTest
+
+class TestPage3(BaseTest):
+
+    def test_selectors(self):
+        for css, expected_selector, expected_specificity, expected_errors in [
+            ('@page {}', (None, None), (0, 0, 0), []),
+
+            ('@page :first {}', (None, 'first'), (0, 1, 0), []),
+            ('@page:left{}', (None, 'left'), (0, 0, 1), []),
+            ('@page :right {}', (None, 'right'), (0, 0, 1), []),
+            ('@page  :blank{}', (None, 'blank'), (0, 1, 0), []),
+            ('@page :last {}', None, None, ['invalid @page selector']),
+            ('@page : first {}', None, None, ['invalid @page selector']),
+
+            ('@page foo:first {}', ('foo', 'first'), (1, 1, 0), []),
+            ('@page bar :left {}', ('bar', 'left'), (1, 0, 1), []),
+            (r'@page \26:right {}', ('&', 'right'), (1, 0, 1), []),
+
+            ('@page foo {}', ('foo', None), (1, 0, 0), []),
+            (r'@page \26 {}', ('&', None), (1, 0, 0), []),
+
+            ('@page foo fist {}', None, None, ['invalid @page selector']),
+            ('@page foo, bar {}', None, None, ['invalid @page selector']),
+            ('@page foo&first {}', None, None, ['invalid @page selector']),
+        ]:
+            stylesheet = CSSPage3Parser().parse_stylesheet(css)
+            self.assert_errors(stylesheet.errors, expected_errors)
+
+            if stylesheet.rules:
+                self.ae(len(stylesheet.rules), 1)
+                rule = stylesheet.rules[0]
+                self.ae(rule.at_keyword, '@page')
+                selector = rule.selector
+                self.ae(rule.specificity, expected_specificity)
+            else:
+                selector = None
+            self.ae(selector, expected_selector)
+
+    def test_content(self):
+        for css, expected_declarations, expected_rules, expected_errors in [
+            ('@page {}', [], [], []),
+            ('@page { foo: 4; bar: z }',
+                [('foo', [('INTEGER', 4)]), ('bar', [('IDENT', 'z')])], [], []),
+            ('''@page { foo: 4;
+                        @top-center { content: "Awesome Title" }
+                        @bottom-left { content: counter(page) }
+                        bar: z
+                }''',
+                [('foo', [('INTEGER', 4)]), ('bar', [('IDENT', 'z')])],
+                [('@top-center', [('content', [('STRING', 'Awesome Title')])]),
+                ('@bottom-left', [('content', [
+                    ('FUNCTION', 'counter', [('IDENT', 'page')])])])],
+                []),
+            ('''@page { foo: 4;
+                        @bottom-top { content: counter(page) }
+                        bar: z
+                }''',
+                [('foo', [('INTEGER', 4)]), ('bar', [('IDENT', 'z')])],
+                [],
+                ['unknown at-rule in @page context: @bottom-top']),
+
+            ('@page{} @top-right{}', [], [], [
+                '@top-right rule not allowed in stylesheet']),
+            ('@page{ @top-right 4 {} }', [], [], [
+                'unexpected INTEGER token in @top-right rule header']),
+            # Not much error recovery tests here. This should be covered in test_css21
+        ]:
+            stylesheet = CSSPage3Parser().parse_stylesheet(css)
+            self.assert_errors(stylesheet.errors, expected_errors)
+
+            self.ae(len(stylesheet.rules), 1)
+            rule = stylesheet.rules[0]
+            self.ae(rule.at_keyword, '@page')
+            self.ae(self.jsonify_declarations(rule), expected_declarations)
+            rules = [(margin_rule.at_keyword, self.jsonify_declarations(margin_rule))
+                    for margin_rule in rule.at_rules]
+            self.ae(rules, expected_rules)
--- a/ebook_converter/tinycss/tests/tokenizing.py
+++ b/ebook_converter/tinycss/tests/tokenizing.py
@@ -0,0 +1,269 @@
+#!/usr/bin/env python2
+# vim:fileencoding=utf-8
+from __future__ import absolute_import, division, print_function, unicode_literals
+
+__license__ = 'GPL v3'
+__copyright__ = '2014, Kovid Goyal <kovid at kovidgoyal.net>'
+
+from tinycss.tests import BaseTest, jsonify
+from tinycss.tokenizer import python_tokenize_flat, c_tokenize_flat, regroup
+
+if c_tokenize_flat is None:
+    tokenizers = (python_tokenize_flat,)
+else:
+    tokenizers = (python_tokenize_flat, c_tokenize_flat)
+
+def token_api(self, tokenize):
+    for css_source in [
+            '(8, foo, [z])', '[8, foo, (z)]', '{8, foo, [z]}', 'func(8, foo, [z])'
+    ]:
+        tokens = list(regroup(tokenize(css_source)))
+        self.ae(len(tokens), 1)
+        self.ae(len(tokens[0].content), 7)
+
+def token_serialize_css(self, tokenize):
+    for tokenize in tokenizers:
+        for css_source in [
+r'''p[example="\
+foo(int x) {\
+    this.x = x;\
+}\
+"]''',
+            '"Lorem\\26Ipsum\ndolor" sit',
+            '/* Lorem\nipsum */\fa {\n    color: red;\tcontent: "dolor\\\fsit" }',
+            'not([[lorem]]{ipsum (42)})',
+            'a[b{d]e}',
+            'a[b{"d',
+        ]:
+            for _regroup in (regroup, lambda x: x):
+                tokens = _regroup(tokenize(css_source, ignore_comments=False))
+                result = ''.join(token.as_css() for token in tokens)
+                self.ae(result, css_source)
+
+def comments(self, tokenize):
+    for ignore_comments, expected_tokens in [
+        (False, [
+            ('COMMENT', '/* lorem */'),
+            ('S', ' '),
+            ('IDENT', 'ipsum'),
+            ('[', [
+                ('IDENT', 'dolor'),
+                ('COMMENT', '/* sit */'),
+            ]),
+            ('BAD_COMMENT', '/* amet')
+        ]),
+        (True, [
+            ('S', ' '),
+            ('IDENT', 'ipsum'),
+            ('[', [
+                ('IDENT', 'dolor'),
+            ]),
+        ]),
+    ]:
+        css_source = '/* lorem */ ipsum[dolor/* sit */]/* amet'
+        tokens = regroup(tokenize(css_source, ignore_comments))
+        result = list(jsonify(tokens))
+        self.ae(result, expected_tokens)
+
+def token_grouping(self, tokenize):
+    for css_source, expected_tokens in [
+        ('', []),
+        (r'Lorem\26 "i\psum"4px', [
+            ('IDENT', 'Lorem&'), ('STRING', 'ipsum'), ('DIMENSION', 4)]),
+
+        ('not([[lorem]]{ipsum (42)})', [
+            ('FUNCTION', 'not', [
+                ('[', [
+                    ('[', [
+                        ('IDENT', 'lorem'),
+                    ]),
+                ]),
+                ('{', [
+                    ('IDENT', 'ipsum'),
+                    ('S', ' '),
+                    ('(', [
+                        ('INTEGER', 42),
+                    ])
+                ])
+            ])]),
+
+        # Close everything at EOF, no error
+        ('a[b{"d', [
+            ('IDENT', 'a'),
+            ('[', [
+                ('IDENT', 'b'),
+                ('{', [
+                    ('STRING', 'd'),
+                ]),
+            ]),
+        ]),
+
+        # Any remaining ), ] or } token is a nesting error
+        ('a[b{d]e}', [
+            ('IDENT', 'a'),
+            ('[', [
+                ('IDENT', 'b'),
+                ('{', [
+                    ('IDENT', 'd'),
+                    (']', ']'),  # The error is visible here
+                    ('IDENT', 'e'),
+                ]),
+            ]),
+        ]),
+        # ref:
+        ('a[b{d}e]', [
+            ('IDENT', 'a'),
+            ('[', [
+                ('IDENT', 'b'),
+                ('{', [
+                    ('IDENT', 'd'),
+                ]),
+                ('IDENT', 'e'),
+            ]),
+        ]),
+    ]:
+        tokens = regroup(tokenize(css_source, ignore_comments=False))
+        result = list(jsonify(tokens))
+        self.ae(result, expected_tokens)
+
+def positions(self, tokenize):
+    css = '/* Lorem\nipsum */\fa {\n    color: red;\tcontent: "dolor\\\fsit" }'
+    tokens = tokenize(css, ignore_comments=False)
+    result = [(token.type, token.line, token.column) for token in tokens]
+    self.ae(result, [
+        ('COMMENT', 1, 1), ('S', 2, 9),
+        ('IDENT', 3, 1), ('S', 3, 2), ('{', 3, 3),
+        ('S', 3, 4), ('IDENT', 4, 5), (':', 4, 10),
+        ('S', 4, 11), ('IDENT', 4, 12), (';', 4, 15), ('S', 4, 16),
+        ('IDENT', 4, 17), (':', 4, 24), ('S', 4, 25), ('STRING', 4, 26),
+        ('S', 5, 5), ('}', 5, 6)])
+
+def tokens(self, tokenize):
+    for css_source, expected_tokens in [
+        ('', []),
+        ('red -->',
+            [('IDENT', 'red'), ('S', ' '), ('CDC', '-->')]),
+        # Longest match rule: no CDC
+        ('red-->',
+            [('IDENT', 'red--'), ('DELIM', '>')]),
+
+(r'''p[example="\
+foo(int x) {\
+    this.x = x;\
+}\
+"]''', [
+            ('IDENT', 'p'),
+            ('[', '['),
+            ('IDENT', 'example'),
+            ('DELIM', '='),
+            ('STRING', 'foo(int x) {    this.x = x;}'),
+            (']', ']')]),
+
+        # Numbers are parsed
+        ('42 .5 -4pX 1.25em 30%',
+            [('INTEGER', 42), ('S', ' '),
+            ('NUMBER', .5), ('S', ' '),
+            # units are normalized to lower-case:
+            ('DIMENSION', -4, 'px'), ('S', ' '),
+            ('DIMENSION', 1.25, 'em'), ('S', ' '),
+            ('PERCENTAGE', 30, '%')]),
+
+        # URLs are extracted
+        ('url(foo.png)', [('URI', 'foo.png')]),
+        ('url("foo.png")', [('URI', 'foo.png')]),
+
+        # Escaping
+
+        (r'/* Comment with a \ backslash */',
+            [('COMMENT', '/* Comment with a \ backslash */')]),  # Unchanged
+
+        # backslash followed by a newline in a string: ignored
+        ('"Lorem\\\nIpsum"', [('STRING', 'LoremIpsum')]),
+
+        # backslash followed by a newline outside a string: stands for itself
+        ('Lorem\\\nIpsum', [
+            ('IDENT', 'Lorem'), ('DELIM', '\\'),
+            ('S', '\n'), ('IDENT', 'Ipsum')]),
+
+        # Cancel the meaning of special characters
+        (r'"Lore\m Ipsum"', [('STRING', 'Lorem Ipsum')]),  # or not specal
+        (r'"Lorem \49psum"', [('STRING', 'Lorem Ipsum')]),
+        (r'"Lorem \49 psum"', [('STRING', 'Lorem Ipsum')]),
+        (r'"Lorem\"Ipsum"', [('STRING', 'Lorem"Ipsum')]),
+        (r'"Lorem\\Ipsum"', [('STRING', r'Lorem\Ipsum')]),
+        (r'"Lorem\5c Ipsum"', [('STRING', r'Lorem\Ipsum')]),
+        (r'Lorem\+Ipsum', [('IDENT', 'Lorem+Ipsum')]),
+        (r'Lorem+Ipsum', [('IDENT', 'Lorem'), ('DELIM', '+'), ('IDENT', 'Ipsum')]),
+        (r'url(foo\).png)', [('URI', 'foo).png')]),
+
+        # Unicode and backslash escaping
+        ('\\26 B', [('IDENT', '&B')]),
+        ('\\&B', [('IDENT', '&B')]),
+        ('@\\26\tB', [('ATKEYWORD', '@&B')]),
+        ('@\\&B', [('ATKEYWORD', '@&B')]),
+        ('#\\26\nB', [('HASH', '#&B')]),
+        ('#\\&B', [('HASH', '#&B')]),
+        ('\\26\r\nB(', [('FUNCTION', '&B(')]),
+        ('\\&B(', [('FUNCTION', '&B(')]),
+        (r'12.5\000026B', [('DIMENSION', 12.5, '&b')]),
+        (r'12.5\0000263B', [('DIMENSION', 12.5, '&3b')]),  # max 6 digits
+        (r'12.5\&B', [('DIMENSION', 12.5, '&b')]),
+        (r'"\26 B"', [('STRING', '&B')]),
+        (r"'\000026B'", [('STRING', '&B')]),
+        (r'"\&B"', [('STRING', '&B')]),
+        (r'url("\26 B")', [('URI', '&B')]),
+        (r'url(\26 B)', [('URI', '&B')]),
+        (r'url("\&B")', [('URI', '&B')]),
+        (r'url(\&B)', [('URI', '&B')]),
+        (r'Lorem\110000Ipsum', [('IDENT', 'Lorem\uFFFDIpsum')]),
+
+        # Bad strings
+
+        # String ends at EOF without closing: no error, parsed
+        ('"Lorem\\26Ipsum', [('STRING', 'Lorem&Ipsum')]),
+        # Unescaped newline: ends the string, error, unparsed
+        ('"Lorem\\26Ipsum\n', [
+            ('BAD_STRING', r'"Lorem\26Ipsum'), ('S', '\n')]),
+        # Tokenization restarts after the newline, so the second " starts
+        # a new string (which ends at EOF without errors, as above.)
+        ('"Lorem\\26Ipsum\ndolor" sit', [
+            ('BAD_STRING', r'"Lorem\26Ipsum'), ('S', '\n'),
+            ('IDENT', 'dolor'), ('STRING', ' sit')]),
+
+    ]:
+        sources = [css_source]
+        for css_source in sources:
+            tokens = tokenize(css_source, ignore_comments=False)
+            result = [
+                (token.type, token.value) + (
+                    () if token.unit is None else (token.unit,))
+                for token in tokens
+            ]
+            self.ae(result, expected_tokens)
+
+
+class TestTokenizer(BaseTest):
+
+    def run_test(self, func):
+        for tokenize in tokenizers:
+            func(self, tokenize)
+
+    def test_token_api(self):
+        self.run_test(token_api)
+
+    def test_token_serialize_css(self):
+        self.run_test(token_serialize_css)
+
+    def test_comments(self):
+        self.run_test(comments)
+
+    def test_token_grouping(self):
+        self.run_test(token_grouping)
+
+    def test_positions(self):
+        """Test the reported line/column position of each token."""
+        self.run_test(positions)
+
+    def test_tokens(self):
+        self.run_test(tokens)
+
--- a/ebook_converter/tinycss/token_data.py
+++ b/ebook_converter/tinycss/token_data.py
@@ -0,0 +1,450 @@
+# coding: utf8
+"""
+    tinycss.token_data
+    ------------------
+
+    Shared data for both implementations (Cython and Python) of the tokenizer.
+
+    :copyright: (c) 2012 by Simon Sapin.
+    :license: BSD, see LICENSE for more details.
+"""
+
+from __future__ import unicode_literals
+
+import re
+import sys
+import operator
+import functools
+import string
+
+
+# * Raw strings with the r'' notation are used so that \ do not need
+#   to be escaped.
+# * Names and regexps are separated by a tabulation.
+# * Macros are re-ordered so that only previous definitions are needed.
+# * {} are used for macro substitution with ``string.Formatter``,
+#   so other uses of { or } have been doubled.
+# * The syntax is otherwise compatible with re.compile.
+# * Some parentheses were added to add capturing groups.
+#   (in unicode, DIMENSION and URI)
+
+# *** Willful violation: ***
+# Numbers can take a + or - sign, but the sign is a separate DELIM token.
+# Since comments are allowed anywhere between tokens, this makes
+# the following this is valid. It means 10 negative pixels:
+#    margin-top: -/**/10px
+
+# This makes parsing numbers a pain, so instead we’ll do the same is Firefox
+# and make the sign part as of the 'num' macro. The above CSS will be invalid.
+# See discussion:
+# http://lists.w3.org/Archives/Public/www-style/2011Oct/0028.html
+MACROS = r'''
+    nl	\n|\r\n|\r|\f
+    w	[ \t\r\n\f]*
+    nonascii	[^\0-\237]
+    unicode	\\([0-9a-f]{{1,6}})(\r\n|[ \n\r\t\f])?
+    simple_escape	[^\n\r\f0-9a-f]
+    escape	{unicode}|\\{simple_escape}
+    nmstart	[_a-z]|{nonascii}|{escape}
+    nmchar	[_a-z0-9-]|{nonascii}|{escape}
+    name	{nmchar}+
+    ident	[-]?{nmstart}{nmchar}*
+    num	[-+]?(?:[0-9]*\.[0-9]+|[0-9]+)
+    string1	\"([^\n\r\f\\"]|\\{nl}|{escape})*\"
+    string2	\'([^\n\r\f\\']|\\{nl}|{escape})*\'
+    string	{string1}|{string2}
+    badstring1	\"([^\n\r\f\\"]|\\{nl}|{escape})*\\?
+    badstring2	\'([^\n\r\f\\']|\\{nl}|{escape})*\\?
+    badstring	{badstring1}|{badstring2}
+    badcomment1	\/\*[^*]*\*+([^/*][^*]*\*+)*
+    badcomment2	\/\*[^*]*(\*+[^/*][^*]*)*
+    badcomment	{badcomment1}|{badcomment2}
+    baduri1	url\({w}([!#$%&*-~]|{nonascii}|{escape})*{w}
+    baduri2	url\({w}{string}{w}
+    baduri3	url\({w}{badstring}
+    baduri	{baduri1}|{baduri2}|{baduri3}
+'''.replace(r'\0', '\0').replace(r'\237', '\237')
+
+# Removed these tokens. Instead, they’re tokenized as two DELIM each.
+#    INCLUDES	~=
+#    DASHMATCH	|=
+# They are only used in selectors but selectors3 also have ^=, *= and $=.
+# We don’t actually parse selectors anyway
+
+# Re-ordered so that the longest match is always the first.
+# For example, "url('foo')" matches URI, BAD_URI, FUNCTION and IDENT,
+# but URI would always be a longer match than the others.
+TOKENS = r'''
+    S	[ \t\r\n\f]+
+
+    URI	url\({w}({string}|([!#$%&*-\[\]-~]|{nonascii}|{escape})*){w}\)
+    BAD_URI	{baduri}
+    FUNCTION	{ident}\(
+    UNICODE-RANGE	u\+[0-9a-f?]{{1,6}}(-[0-9a-f]{{1,6}})?
+    IDENT	{ident}
+
+    ATKEYWORD	@{ident}
+    HASH	#{name}
+
+    DIMENSION	({num})({ident})
+    PERCENTAGE	{num}%
+    NUMBER	{num}
+
+    STRING	{string}
+    BAD_STRING	{badstring}
+
+    COMMENT	\/\*[^*]*\*+([^/*][^*]*\*+)*\/
+    BAD_COMMENT	{badcomment}
+
+    :	:
+    ;	;
+    {	\{{
+    }	\}}
+    (	\(
+    )	\)
+    [	\[
+    ]	\]
+    CDO	<!--
+    CDC	-->
+'''
+
+
+# Strings with {macro} expanded
+COMPILED_MACROS = {}
+
+
+COMPILED_TOKEN_REGEXPS = []  # [(name, regexp.match)]  ordered
+COMPILED_TOKEN_INDEXES = {}  # {name: i}  helper for the C speedups
+
+
+# Indexed by codepoint value of the first character of a token.
+# Codepoints >= 160 (aka nonascii) all use the index 160.
+# values are (i, name, regexp.match)
+TOKEN_DISPATCH = []
+
+
+try:
+    unichr
+except NameError:
+    # Python 3
+    unichr = chr
+    unicode = str
+
+
+def _init():
+    """Import-time initialization."""
+    COMPILED_MACROS.clear()
+    for line in MACROS.splitlines():
+        if line.strip():
+            name, value = line.split('\t')
+            COMPILED_MACROS[name.strip()] = '(?:%s)' \
+                % value.format(**COMPILED_MACROS)
+
+    COMPILED_TOKEN_REGEXPS[:] = (
+        (
+            name.strip(),
+            re.compile(
+                value.format(**COMPILED_MACROS),
+                # Case-insensitive when matching eg. uRL(foo)
+                # but preserve the case in extracted groups
+                re.I
+            ).match
+        )
+        for line in TOKENS.splitlines()
+        if line.strip()
+        for name, value in [line.split('\t')]
+    )
+
+    COMPILED_TOKEN_INDEXES.clear()
+    for i, (name, regexp) in enumerate(COMPILED_TOKEN_REGEXPS):
+        COMPILED_TOKEN_INDEXES[name] = i
+
+    dispatch = [[] for i in range(161)]
+    for chars, names in [
+        (' \t\r\n\f', ['S']),
+        ('uU', ['URI', 'BAD_URI', 'UNICODE-RANGE']),
+        # \ is an escape outside of another token
+        (string.ascii_letters + '\\_-' + unichr(160), ['FUNCTION', 'IDENT']),
+        (string.digits + '.+-', ['DIMENSION', 'PERCENTAGE', 'NUMBER']),
+        ('@', ['ATKEYWORD']),
+        ('#', ['HASH']),
+        ('\'"', ['STRING', 'BAD_STRING']),
+        ('/', ['COMMENT', 'BAD_COMMENT']),
+        ('<', ['CDO']),
+        ('-', ['CDC']),
+    ]:
+        for char in chars:
+            dispatch[ord(char)].extend(names)
+    for char in ':;{}()[]':
+        dispatch[ord(char)] = [char]
+
+    TOKEN_DISPATCH[:] = (
+        [
+            (index,) + COMPILED_TOKEN_REGEXPS[index]
+            for name in names
+            for index in [COMPILED_TOKEN_INDEXES[name]]
+        ]
+        for names in dispatch
+    )
+
+_init()
+
+
+def _unicode_replace(match, int=int, unichr=unichr, maxunicode=sys.maxunicode):
+    codepoint = int(match.group(1), 16)
+    if codepoint <= maxunicode:
+        return unichr(codepoint)
+    else:
+        return '\N{REPLACEMENT CHARACTER}'  # U+FFFD
+
+UNICODE_UNESCAPE = functools.partial(
+    re.compile(COMPILED_MACROS['unicode'], re.I).sub,
+    _unicode_replace)
+
+NEWLINE_UNESCAPE = functools.partial(
+    re.compile(r'()\\' + COMPILED_MACROS['nl']).sub,
+    '')
+
+SIMPLE_UNESCAPE = functools.partial(
+    re.compile(r'\\(%s)' % COMPILED_MACROS['simple_escape'] , re.I).sub,
+    # Same as r'\1', but faster on CPython
+    operator.methodcaller('group', 1))
+
+FIND_NEWLINES = lambda x : list(re.compile(COMPILED_MACROS['nl']).finditer(x))
+
+
+class Token(object):
+    r"""A single atomic token.
+
+    .. attribute:: is_container
+
+        Always ``False``.
+        Helps to tell :class:`Token` apart from :class:`ContainerToken`.
+
+    .. attribute:: type
+
+        The type of token as a string:
+
+        ``S``
+            A sequence of white space
+
+        ``IDENT``
+            An identifier: a name that does not start with a digit.
+            A name is a sequence of letters, digits, ``_``, ``-``, escaped
+            characters and non-ASCII characters. Eg: ``margin-left``
+
+        ``HASH``
+            ``#`` followed immediately by a name. Eg: ``#ff8800``
+
+        ``ATKEYWORD``
+            ``@`` followed immediately by an identifier. Eg: ``@page``
+
+        ``URI``
+            Eg: ``url(foo)`` The content may or may not be quoted.
+
+        ``UNICODE-RANGE``
+            ``U+`` followed by one or two hexadecimal
+            Unicode codepoints. Eg: ``U+20-00FF``
+
+        ``INTEGER``
+            An integer with an optional ``+`` or ``-`` sign
+
+        ``NUMBER``
+            A non-integer number  with an optional ``+`` or ``-`` sign
+
+        ``DIMENSION``
+            An integer or number followed immediately by an
+            identifier (the unit). Eg: ``12px``
+
+        ``PERCENTAGE``
+            An integer or number followed immediately by ``%``
+
+        ``STRING``
+            A string, quoted with ``"`` or ``'``
+
+        ``:`` or ``;``
+            That character.
+
+        ``DELIM``
+            A single character not matched in another token. Eg: ``,``
+
+        See the source of the :mod:`.token_data` module for the precise
+        regular expressions that match various tokens.
+
+        Note that other token types exist in the early tokenization steps,
+        but these are ignored, are syntax errors, or are later transformed
+        into :class:`ContainerToken` or :class:`FunctionToken`.
+
+    .. attribute:: value
+
+        The parsed value:
+
+        * INTEGER, NUMBER, PERCENTAGE or DIMENSION tokens: the numeric value
+          as an int or float.
+        * STRING tokens: the unescaped string without quotes
+        * URI tokens: the unescaped URI without quotes or
+          ``url(`` and ``)`` markers.
+        * IDENT, ATKEYWORD or HASH tokens: the unescaped token,
+          with ``@`` or ``#`` markers left as-is
+        * Other tokens: same as :attr:`as_css`
+
+        *Unescaped* refers to the various escaping methods based on the
+        backslash ``\`` character in CSS syntax.
+
+    .. attribute:: unit
+
+        * DIMENSION tokens: the normalized (unescaped, lower-case)
+          unit name as a string. eg. ``'px'``
+        * PERCENTAGE tokens: the string ``'%'``
+        * Other tokens: ``None``
+
+    .. attribute:: line
+
+        The line number in the CSS source of the start of this token.
+
+    .. attribute:: column
+
+        The column number (inside a source line) of the start of this token.
+
+    """
+    is_container = False
+    __slots__ = 'type', '_as_css', 'value', 'unit', 'line', 'column'
+
+    def __init__(self, type_, css_value, value, unit, line, column):
+        self.type = type_
+        self._as_css = css_value
+        self.value = value
+        self.unit = unit
+        self.line = line
+        self.column = column
+
+    def as_css(self):
+        """
+        Return as an Unicode string the CSS representation of the token,
+        as parsed in the source.
+        """
+        return self._as_css
+
+    def __repr__(self):
+        return ('<Token {0.type} at {0.line}:{0.column} {0.value!r}{1}>'
+                .format(self, self.unit or ''))
+
+
+class ContainerToken(object):
+    """A token that contains other (nested) tokens.
+
+    .. attribute:: is_container
+
+        Always ``True``.
+        Helps to tell :class:`ContainerToken` apart from :class:`Token`.
+
+    .. attribute:: type
+
+        The type of token as a string. One of ``{``, ``(``, ``[`` or
+        ``FUNCTION``. For ``FUNCTION``, the object is actually a
+        :class:`FunctionToken`.
+
+    .. attribute:: unit
+
+        Always ``None``. Included to make :class:`ContainerToken` behave
+        more like :class:`Token`.
+
+    .. attribute:: content
+
+        A list of :class:`Token` or nested :class:`ContainerToken`,
+        not including the opening or closing token.
+
+    .. attribute:: line
+
+        The line number in the CSS source of the start of this token.
+
+    .. attribute:: column
+
+        The column number (inside a source line) of the start of this token.
+
+    """
+    is_container = True
+    unit = None
+    __slots__ = 'type', '_css_start', '_css_end', 'content', 'line', 'column'
+
+    def __init__(self, type_, css_start, css_end, content, line, column):
+        self.type = type_
+        self._css_start = css_start
+        self._css_end = css_end
+        self.content = content
+        self.line = line
+        self.column = column
+
+    def as_css(self):
+        """
+        Return as an Unicode string the CSS representation of the token,
+        as parsed in the source.
+        """
+        parts = [self._css_start]
+        parts.extend(token.as_css() for token in self.content)
+        parts.append(self._css_end)
+        return ''.join(parts)
+
+    format_string = '<ContainerToken {0.type} at {0.line}:{0.column}>'
+
+    def __repr__(self):
+        return (self.format_string + ' {0.content}').format(self)
+
+
+class FunctionToken(ContainerToken):
+    """A specialized :class:`ContainerToken` for a ``FUNCTION`` group.
+    Has an additional attribute:
+
+    .. attribute:: function_name
+
+        The unescaped name of the function, with the ``(`` marker removed.
+
+    """
+    __slots__ = 'function_name',
+
+    def __init__(self, type_, css_start, css_end, function_name, content,
+                 line, column):
+        super(FunctionToken, self).__init__(
+            type_, css_start, css_end, content, line, column)
+        # Remove the ( marker:
+        self.function_name = function_name[:-1]
+
+    format_string = ('<FunctionToken {0.function_name}() at '
+                     '{0.line}:{0.column}>')
+
+
+class TokenList(list):
+    """
+    A mixed list of :class:`~.token_data.Token` and
+    :class:`~.token_data.ContainerToken` objects.
+
+    This is a subclass of the builtin :class:`~builtins.list` type.
+    It can be iterated, indexed and sliced as usual, but also has some
+    additional API:
+
+    """
+    @property
+    def line(self):
+        """The line number in the CSS source of the first token."""
+        return self[0].line
+
+    @property
+    def column(self):
+        """The column number (inside a source line) of the first token."""
+        return self[0].column
+
+    def as_css(self):
+        """
+        Return as an Unicode string the CSS representation of the tokens,
+        as parsed in the source.
+        """
+        return ''.join(token.as_css() for token in self)
+
+def load_c_tokenizer():
+    from calibre.constants import plugins
+    tokenizer, err = plugins['tokenizer']
+    if err:
+        raise RuntimeError('Failed to load module tokenizer: %s' % err)
+    tokens = list(':;(){}[]') + ['DELIM', 'INTEGER', 'STRING']
+    tokenizer.init(COMPILED_TOKEN_REGEXPS, UNICODE_UNESCAPE, NEWLINE_UNESCAPE, SIMPLE_UNESCAPE, FIND_NEWLINES, TOKEN_DISPATCH, COMPILED_TOKEN_INDEXES, *tokens)
+    return tokenizer
--- a/ebook_converter/tinycss/tokenizer.c
+++ b/ebook_converter/tinycss/tokenizer.c
@@ -0,0 +1,504 @@
+/*
+ * tokenizer.c
+ * Copyright (C) 2014 Kovid Goyal <kovid at kovidgoyal.net>
+ *
+ * Distributed under terms of the GPL3 license.
+ */
+
+#define UNICODE
+#define PY_SSIZE_T_CLEAN
+#include <Python.h>
+#include <structmember.h>
+
+// Token type definition {{{
+typedef struct {
+    PyObject_HEAD
+    // Type-specific fields go here.
+    PyObject *is_container;
+    PyObject *type;
+    PyObject *_as_css;
+    PyObject *value;
+    PyObject *unit;
+    PyObject *line;
+    PyObject *column;
+
+} tokenizer_Token;
+
+static void
+tokenizer_Token_dealloc(tokenizer_Token* self)
+{
+    Py_XDECREF(self->is_container); self->is_container = NULL;
+    Py_XDECREF(self->type); self->type = NULL;
+    Py_XDECREF(self->_as_css); self->_as_css = NULL;
+    Py_XDECREF(self->value); self->value = NULL;
+    Py_XDECREF(self->unit); self->unit = NULL;
+    Py_XDECREF(self->line); self->line = NULL;
+    Py_XDECREF(self->column); self->column = NULL;
+    Py_TYPE(self)->tp_free((PyObject*)self);
+}
+
+
+static PyObject *
+tokenizer_Token_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
+{
+    tokenizer_Token *self = NULL;
+    self = (tokenizer_Token *)type->tp_alloc(type, 0);
+    if (self == NULL) return PyErr_NoMemory();
+
+    if (!PyArg_ParseTuple(args, "OOOOOO", &(self->type), &(self->_as_css), &(self->value), &(self->unit), &(self->line), &(self->column))) {
+        Py_TYPE(self)->tp_free((PyObject *) self);
+        return NULL;
+    }
+    Py_INCREF(self->type); Py_INCREF(self->_as_css); Py_INCREF(self->value); Py_INCREF(self->unit); Py_INCREF(self->line); Py_INCREF(self->column);
+    self->is_container = Py_False; Py_INCREF(self->is_container);
+
+    return (PyObject *)self;
+}
+
+#if PY_MAJOR_VERSION >= 3
+#define PyObject_Unicode_Compat(arg) PyObject_Str(arg)
+#else
+#define PyObject_Unicode_Compat(arg) PyObject_Unicode(arg)
+#endif
+
+static PyObject *
+tokenizer_Token_repr(tokenizer_Token *self) {
+    PyObject *type = NULL, *line = NULL, *column = NULL, *value = NULL, *ans = NULL, *unit = NULL;
+    if (!self->type || !self->line || !self->column || !self->value)
+        return PyBytes_FromString("<Token NULL fields>");
+    type = PyObject_Unicode_Compat(self->type);
+    line = PyObject_Unicode_Compat(self->line);
+    column = PyObject_Unicode_Compat(self->column);
+    value = PyObject_Unicode_Compat(self->value);
+    if (type && line && column && value) {
+        if (self->unit != NULL && PyObject_IsTrue(self->unit)) {
+            unit = PyObject_Unicode_Compat(self->unit);
+            if (unit != NULL)
+                ans = PyUnicode_FromFormat("<Token %U at %U:%U %U%U>", type, line, column, value, unit);
+            else
+                PyErr_NoMemory();
+        } else
+            ans = PyUnicode_FromFormat("<Token %U at %U:%U %U>", type, line, column, value);
+    } else PyErr_NoMemory();
+    Py_XDECREF(type); Py_XDECREF(line); Py_XDECREF(column); Py_XDECREF(value); Py_XDECREF(unit);
+    return ans;
+}
+
+static PyObject *
+tokenizer_Token_as_css(tokenizer_Token *self, PyObject *args, PyObject *kwargs) {
+    if (!self->_as_css) {
+        Py_RETURN_NONE;
+    }
+    Py_INCREF(self->_as_css);
+    return self->_as_css;
+}
+
+static PyMemberDef tokenizer_Token_members[] = {
+    {"is_container", T_OBJECT_EX, offsetof(tokenizer_Token, is_container), 0, "False unless this token is a  container for other tokens"},
+    {"type", T_OBJECT_EX, offsetof(tokenizer_Token, type), 0, "The token type"},
+    {"_as_css", T_OBJECT_EX, offsetof(tokenizer_Token, _as_css), 0, "Internal variable, use as_css() method instead."},
+    {"value", T_OBJECT_EX, offsetof(tokenizer_Token, value), 0, "The token value"},
+    {"unit", T_OBJECT_EX, offsetof(tokenizer_Token, unit), 0, "The token unit"},
+    {"line", T_OBJECT_EX, offsetof(tokenizer_Token, line), 0, "The token line number"},
+    {"column", T_OBJECT_EX, offsetof(tokenizer_Token, column), 0, "The token column number"},
+    {NULL}  /* Sentinel */
+};
+
+static PyMethodDef tokenizer_Token_methods[] = {
+    {"as_css", (PyCFunction)tokenizer_Token_as_css, METH_VARARGS,
+     "as_css() -> Return the CSS representation of this token"
+    },
+
+    {NULL}  /* Sentinel */
+};
+
+static PyTypeObject tokenizer_TokenType = { // {{{
+        PyVarObject_HEAD_INIT(NULL, 0)
+        /* tp_name           */ "tokenizer.Token",
+        /* tp_basicsize      */ sizeof(tokenizer_Token),
+        /* tp_itemsize       */ 0,
+        /* tp_dealloc        */ (destructor) tokenizer_Token_dealloc,
+        /* tp_print          */ 0,
+        /* tp_getattr        */ 0,
+        /* tp_setattr        */ 0,
+        /* tp_compare        */ 0,
+        /* tp_repr           */ (reprfunc) tokenizer_Token_repr,
+        /* tp_as_number      */ 0,
+        /* tp_as_sequence    */ 0,
+        /* tp_as_mapping     */ 0,
+        /* tp_hash           */ 0,
+        /* tp_call           */ 0,
+        /* tp_str            */ 0,
+        /* tp_getattro       */ 0,
+        /* tp_setattro       */ 0,
+        /* tp_as_buffer      */ 0,
+        /* tp_flags          */ Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE,
+        /* tp_doc            */ "Token",
+        /* tp_traverse       */ 0,
+        /* tp_clear          */ 0,
+        /* tp_richcompare    */ 0,
+        /* tp_weaklistoffset */ 0,
+        /* tp_iter           */ 0,
+        /* tp_iternext       */ 0,
+        /* tp_methods        */ tokenizer_Token_methods,
+        /* tp_members        */ tokenizer_Token_members,
+        /* tp_getset         */ 0,
+        /* tp_base           */ 0,
+        /* tp_dict           */ 0,
+        /* tp_descr_get      */ 0,
+        /* tp_descr_set      */ 0,
+        /* tp_dictoffset     */ 0,
+        /* tp_init           */ 0,
+        /* tp_alloc          */ 0,
+        /* tp_new            */ tokenizer_Token_new,
+}; // }}}
+// }}}
+
+static PyObject *COMPILED_TOKEN_REGEXPS = NULL, *UNICODE_UNESCAPE = NULL, *NEWLINE_UNESCAPE = NULL, *SIMPLE_UNESCAPE = NULL, *FIND_NEWLINES = NULL, *TOKEN_DISPATCH = NULL;
+static PyObject *COLON = NULL, *SCOLON = NULL, *LPAR = NULL, *RPAR = NULL, *LBRACE = NULL, *RBRACE = NULL, *LBOX = NULL, *RBOX = NULL, *DELIM_TOK = NULL, *INTEGER = NULL, *STRING_TOK = NULL;
+
+static Py_ssize_t BAD_COMMENT, BAD_STRING, PERCENTAGE, DIMENSION, ATKEYWORD, FUNCTION, COMMENT, NUMBER, STRING, IDENT, HASH, URI, DELIM = -1;
+
+#define CLEANUP(x) Py_XDECREF((x)); x = NULL;
+
+static PyObject*
+tokenize_cleanup(PyObject *self, PyObject *args) {
+    CLEANUP(COMPILED_TOKEN_REGEXPS); CLEANUP(UNICODE_UNESCAPE); CLEANUP(NEWLINE_UNESCAPE); CLEANUP(SIMPLE_UNESCAPE); CLEANUP(FIND_NEWLINES); CLEANUP(TOKEN_DISPATCH);
+    CLEANUP(COLON); CLEANUP(SCOLON); CLEANUP(LPAR); CLEANUP(RPAR); CLEANUP(LBRACE); CLEANUP(RBRACE); CLEANUP(LBOX); CLEANUP(RBOX); CLEANUP(DELIM_TOK); CLEANUP(INTEGER); CLEANUP(STRING_TOK);
+    Py_RETURN_NONE;
+}
+
+static PyObject*
+tokenize_init(PyObject *self, PyObject *args) {
+    PyObject *cti = NULL;
+
+    if (COMPILED_TOKEN_REGEXPS != NULL) {
+        tokenize_cleanup(NULL, NULL);
+    }
+    if (!PyArg_ParseTuple(args, "OOOOOOOOOOOOOOOOOO", &COMPILED_TOKEN_REGEXPS, &UNICODE_UNESCAPE, &NEWLINE_UNESCAPE, &SIMPLE_UNESCAPE, &FIND_NEWLINES, &TOKEN_DISPATCH, &cti, &COLON, &SCOLON, &LPAR, &RPAR, &LBRACE, &RBRACE, &LBOX, &RBOX, &DELIM_TOK, &INTEGER, &STRING_TOK)) return NULL;
+    Py_INCREF(COMPILED_TOKEN_REGEXPS); Py_INCREF(UNICODE_UNESCAPE); Py_INCREF(NEWLINE_UNESCAPE); Py_INCREF(SIMPLE_UNESCAPE); Py_INCREF(FIND_NEWLINES); Py_INCREF(TOKEN_DISPATCH);
+    Py_INCREF(COLON); Py_INCREF(SCOLON); Py_INCREF(LPAR); Py_INCREF(RPAR); Py_INCREF(LBRACE); Py_INCREF(RBRACE); Py_INCREF(LBOX); Py_INCREF(RBOX); Py_INCREF(DELIM_TOK); Py_INCREF(INTEGER); Py_INCREF(STRING_TOK);
+
+#define SETCONST(x) do { (x) = PyNumber_AsSsize_t(PyDict_GetItemString(cti, #x), PyExc_OverflowError); \
+                         if((x) == -1 && PyErr_Occurred() != NULL) { return NULL; } \
+                       } while(0)
+    SETCONST(BAD_COMMENT); SETCONST(BAD_STRING); SETCONST(PERCENTAGE); SETCONST(DIMENSION); SETCONST(ATKEYWORD); SETCONST(FUNCTION); SETCONST(COMMENT); SETCONST(NUMBER); SETCONST(STRING); SETCONST(IDENT); SETCONST(HASH); SETCONST(URI);
+
+    Py_RETURN_NONE;
+}
+
+#if PY_VERSION_HEX >= 0x03030000
+#define ITER_CODE_PTS(unicode_object) { \
+    int _kind = PyUnicode_KIND(unicode_object); \
+    void *_data = PyUnicode_DATA(unicode_object); \
+    for (Py_ssize_t iteridx = 0; iteridx < PyUnicode_GET_LENGTH(unicode_object); iteridx++) { \
+        Py_UCS4 ch = PyUnicode_READ(_kind, _data, iteridx);
+#else
+#define PyUnicode_GET_LENGTH PyUnicode_GET_SIZE
+#define ITER_CODE_PTS(unicode_object) { \
+    Py_UNICODE *_data = PyUnicode_AS_UNICODE(unicode_object); \
+    Py_ssize_t iteridx; \
+    for (iteridx = 0; iteridx < PyUnicode_GET_LENGTH(unicode_object); iteridx++) { \
+        Py_UNICODE ch = _data[iteridx];
+#endif
+
+#define END_ITER_CODE_PTS }}
+
+static PyObject *unicode_to_number(PyObject *src) {
+#if PY_MAJOR_VERSION >= 3
+    PyObject* ans = PyFloat_FromString(src);
+#else
+    PyObject* ans = PyFloat_FromString(src, NULL);
+#endif
+    double val = PyFloat_AsDouble(ans);
+    long lval = (long)val;
+    if (val - lval != 0) return ans;
+    Py_DECREF(ans);
+#if PY_MAJOR_VERSION >= 3
+    return PyLong_FromLong(lval);
+#else
+    return PyInt_FromLong(lval);
+#endif
+}
+
+
+static void lowercase(PyObject *x) {
+    ITER_CODE_PTS(x)
+        if ('A' <= ch && ch <= 'Z') {
+#if PY_VERSION_HEX >= 0x03030000
+            PyUnicode_WRITE(_kind, _data, iteridx, ch + 32);
+#else
+            _data[iteridx] += 32;
+#endif
+        }
+    END_ITER_CODE_PTS
+}
+
+static PyObject*
+clone_unicode(const PyObject* src, Py_ssize_t start_offset, Py_ssize_t end_offset) {
+#if PY_VERSION_HEX >= 0x03030000
+    int kind = PyUnicode_KIND(src);
+    void *data;
+    switch(kind) {
+        case PyUnicode_1BYTE_KIND:
+            data = PyUnicode_1BYTE_DATA(src) + start_offset; break;
+        case PyUnicode_2BYTE_KIND:
+            data = PyUnicode_2BYTE_DATA(src) + start_offset; break;
+        case PyUnicode_4BYTE_KIND:
+            data = PyUnicode_4BYTE_DATA(src) + start_offset; break;
+        default:
+            PyErr_SetString(PyExc_RuntimeError, "Invalid byte kind for unicode object");
+            return NULL;
+    }
+    return PyUnicode_FromKindAndData(kind, data, PyUnicode_GET_LENGTH(src) - start_offset - end_offset);
+#else
+    return PyUnicode_FromUnicode(PyUnicode_AS_UNICODE(src) + start_offset, PyUnicode_GET_LENGTH(src) - start_offset - end_offset);
+#endif
+}
+
+static PyObject*
+tokenize_flat(PyObject *self, PyObject *args) {
+#if PY_VERSION_HEX >= 0x03030000
+    void *css_source = NULL; int css_kind; Py_UCS4 c = 0, codepoint = 0;
+#define first_char(string) PyUnicode_READ_CHAR(string, 0)
+#define unicode_from_data(data, sz) PyUnicode_FromKindAndData(css_kind, data, sz)
+#else
+    Py_UNICODE *css_source = NULL, c = 0, codepoint = 0;
+#define first_char(string) PyUnicode_AS_UNICODE(string)[0]
+#define unicode_from_data(data, sz) PyUnicode_FromUnicode(data, sz)
+#endif
+    PyObject *ic = NULL, *token = NULL, *tokens = NULL, *type_name = NULL, *css_value = NULL, *value = NULL, *unit = NULL, *tries = NULL, *match = NULL, *match_func = NULL, *py_source = NULL, *item = NULL, *newlines = NULL;
+    int ignore_comments = 0;
+    Py_ssize_t pos = 0, line = 1, column = 1, i = 0;
+    Py_ssize_t length = 0, next_pos = 0, type_ = -1, source_len = 0;
+
+
+    if (COMPILED_TOKEN_REGEXPS == NULL) {
+        PyErr_SetString(PyExc_RuntimeError, "tokenizer module not initialized. You must call init() first."); return NULL;
+    }
+
+    if (!PyArg_ParseTuple(args, "UO", &py_source, &ic)) return NULL;
+    if (PyObject_IsTrue(ic)) ignore_comments = 1;
+#if PY_VERSION_HEX >= 0x03030000
+    if (PyUnicode_READY(py_source) != 0) return NULL;
+    css_source = PyUnicode_DATA(py_source); css_kind = PyUnicode_KIND(py_source);
+#else
+    css_source = PyUnicode_AS_UNICODE(py_source);
+#endif
+    source_len = PyUnicode_GET_LENGTH(py_source);
+
+    tokens = PyList_New(0);
+    if (tokens == NULL) return PyErr_NoMemory();
+
+#define UNESCAPE(x, func) item = PyObject_CallFunctionObjArgs(func, x, NULL); if (item == NULL) { goto error; } Py_DECREF(x); x = item; item = NULL;
+
+#define TONUMBER(x) item = unicode_to_number(x); if (item == NULL) goto error; Py_DECREF(x); x = item; item = NULL;
+
+#define SINGLE(x) { type_ = -1; type_name = x; Py_INCREF(type_name); css_value = x; Py_INCREF(css_value); }
+
+    while (pos < source_len) {
+#if PY_VERSION_HEX >= 0x03030000
+        c = PyUnicode_READ(css_kind, css_source, pos);
+#else
+        c = css_source[pos];
+#endif
+
+        css_value = NULL; type_name = NULL; value = NULL; unit = NULL; match = NULL;
+
+        if (c == ':') SINGLE(COLON) else if (c == ';') SINGLE(SCOLON) else if (c == '(') SINGLE(LPAR) else if (c == ')') SINGLE(RPAR) else if (c == '{') SINGLE(LBRACE) else if (c == '}') SINGLE(RBRACE) else if (c == '[') SINGLE(LBOX) else if (c == ']') SINGLE(RBOX) else
+        {
+            codepoint = (c > 160) ? 160: c;
+            tries = PyList_GET_ITEM(TOKEN_DISPATCH, codepoint);
+            for (i = 0; i < PyList_Size(tries); i++) {
+                item = PyList_GET_ITEM(tries, i);
+                match_func = PyTuple_GET_ITEM(item, 2);
+                match = PyObject_CallFunction(match_func, "On", py_source, pos);
+                if (match == NULL) { goto error; }
+                if (match != Py_None) {
+                    css_value = PyObject_CallMethod(match, "group", NULL);
+                    if (css_value == NULL) { goto error; }
+                    type_ = PyNumber_AsSsize_t(PyTuple_GET_ITEM(item, 0), PyExc_OverflowError);
+                    if(type_ == -1 && PyErr_Occurred() != NULL) { goto error; }
+                    type_name = PyTuple_GET_ITEM(item, 1);
+                    Py_INCREF(type_name);
+                    break;
+                }
+            }
+            if (css_value == NULL) {  // No match
+                type_ = DELIM; type_name = DELIM_TOK; Py_INCREF(type_name); css_value = unicode_from_data(&c, 1);
+                if (css_value == NULL) { goto error; }
+            }
+        }
+
+        length = PyUnicode_GET_LENGTH(css_value);
+        next_pos = pos + length;
+
+        // Now calculate the value and unit for this token (if any)
+        if (! (ignore_comments && (type_ == COMMENT || type_ == BAD_COMMENT))) {
+            if (type_ == DIMENSION) {
+                value = PyObject_CallMethod(match, "group", "I", 1);
+                if (value == NULL) { goto error; }
+                TONUMBER(value);
+                unit = PyObject_CallMethod(match, "group", "I", 2);
+                if (unit == NULL) { goto error; }
+                UNESCAPE(unit, SIMPLE_UNESCAPE);
+                UNESCAPE(unit, UNICODE_UNESCAPE);
+                lowercase(unit);
+            } else
+
+            if (type_ == PERCENTAGE) {
+                if (PyUnicode_GET_LENGTH(css_value) > 0) {
+                    value = clone_unicode(css_value, 0, 1);
+                    if (value == NULL) goto error;
+                } else { value = css_value; Py_INCREF(value); }
+                if (value == NULL) goto error;
+                TONUMBER(value);
+                unit = PyUnicode_FromString("%");
+                if (unit == NULL) goto error;
+            } else
+
+            if (type_ == NUMBER) {
+                value = css_value; Py_INCREF(value);
+                TONUMBER(value);
+                if (!PyFloat_Check(value)) {
+                    Py_XDECREF(type_name);
+                    type_name = INTEGER;
+                    Py_INCREF(type_name);
+                }
+            } else
+
+            if (type_ == IDENT || type_ == ATKEYWORD || type_ == HASH || type_ == FUNCTION) {
+                value = PyObject_CallFunctionObjArgs(SIMPLE_UNESCAPE, css_value, NULL);
+                if (value == NULL) goto error;
+                UNESCAPE(value, UNICODE_UNESCAPE);
+            } else
+
+            if (type_ == URI) {
+                value = PyObject_CallMethod(match, "group", "I", 1);
+                if (value == NULL) { goto error; }
+                if (PyObject_IsTrue(value) && PyUnicode_GET_LENGTH(value) > 1 && (first_char(value) == '"' || first_char(value) == '\'')) {
+                    item = clone_unicode(value, 1, 1);
+                    if (item == NULL) goto error;
+                    Py_DECREF(value); value = item; item = NULL;
+                    UNESCAPE(value, NEWLINE_UNESCAPE);
+                }
+                UNESCAPE(value, SIMPLE_UNESCAPE);
+                UNESCAPE(value, UNICODE_UNESCAPE);
+            } else
+
+            if (type_ == STRING) {
+                if (PyObject_IsTrue(css_value) && PyUnicode_GET_LENGTH(css_value) > 1) {  // remove quotes
+                    value = clone_unicode(css_value, 1, 1);
+                } else {
+                    value = css_value; Py_INCREF(value);
+                }
+                UNESCAPE(value, NEWLINE_UNESCAPE);
+                UNESCAPE(value, SIMPLE_UNESCAPE);
+                UNESCAPE(value, UNICODE_UNESCAPE);
+            } else
+
+            if (type_ == BAD_STRING && next_pos == source_len) {
+                Py_XDECREF(type_name); type_name = STRING_TOK; Py_INCREF(type_name);
+                if (PyObject_IsTrue(css_value) && PyUnicode_GET_LENGTH(css_value) > 0) {  // remove quote
+                    value = clone_unicode(css_value, 1, 0);
+                } else {
+                    value = css_value; Py_INCREF(value);
+                }
+                UNESCAPE(value, NEWLINE_UNESCAPE);
+                UNESCAPE(value, SIMPLE_UNESCAPE);
+                UNESCAPE(value, UNICODE_UNESCAPE);
+            } else {
+                value = css_value; Py_INCREF(value);
+            }  // if(type_ == ...)
+
+            if (unit == NULL) { unit = Py_None; Py_INCREF(unit); }
+            item = Py_BuildValue("OOOOnn", type_name, css_value, value, unit, line, column);
+            if (item == NULL) goto error;
+            token = PyObject_CallObject((PyObject *) &tokenizer_TokenType, item);
+            Py_DECREF(item); item = NULL;
+            if (token == NULL) goto error;
+            if (PyList_Append(tokens, token) != 0) { Py_DECREF(token); token = NULL; goto error; }
+            Py_DECREF(token);
+
+        }  // if(!(ignore_comments...
+
+        Py_XDECREF(match); match = NULL;
+
+        pos = next_pos;
+        newlines = PyObject_CallFunctionObjArgs(FIND_NEWLINES, css_value, NULL);
+        if (newlines == NULL) goto error;
+        Py_XDECREF(css_value); css_value = NULL; Py_XDECREF(type_name); type_name = NULL; Py_XDECREF(value); value = NULL; Py_XDECREF(unit); unit = NULL;
+        if (PyObject_IsTrue(newlines)) {
+            line += PyList_Size(newlines);
+            item = PyObject_CallMethod(PyList_GET_ITEM(newlines, PyList_Size(newlines) - 1), "end", NULL);
+            if (item == NULL) { Py_DECREF(newlines); newlines = NULL; goto error; }
+            column = PyNumber_AsSsize_t(item, PyExc_OverflowError);
+            if(column == -1 && PyErr_Occurred()) { Py_DECREF(newlines); newlines = NULL; goto error; }
+            column = length - column + 1;
+            Py_DECREF(item); item = NULL;
+        } else column += length;
+        Py_DECREF(newlines); newlines = NULL;
+
+    }  // while (pos < ...)
+
+    return tokens;
+error:
+    Py_XDECREF(tokens); Py_XDECREF(css_value); Py_XDECREF(type_name); Py_XDECREF(value); Py_XDECREF(unit); Py_XDECREF(match);
+    return NULL;
+#undef unicode_from_data
+#undef first_char
+}
+
+static PyMethodDef tokenizer_methods[] = {
+    {"tokenize_flat", tokenize_flat, METH_VARARGS,
+        "tokenize_flat(css_source, ignore_comments)\n\n Convert CSS source into a flat list of tokens"
+    },
+
+    {"init", tokenize_init, METH_VARARGS,
+        "init()\n\nInitialize the module."
+    },
+
+    {"cleanup", tokenize_cleanup, METH_VARARGS,
+        "cleanup()\n\nRelease resources allocated by init(). Safe to call multiple times."
+    },
+
+    {NULL, NULL, 0, NULL}
+};
+
+#if PY_MAJOR_VERSION >= 3
+#define INITERROR return NULL
+static struct PyModuleDef tokenizer_module = {
+        /* m_base     */ PyModuleDef_HEAD_INIT,
+        /* m_name     */ "tokenizer",
+        /* m_doc      */ "Implementation of tokenizer in C for speed.",
+        /* m_size     */ -1,
+        /* m_methods  */ tokenizer_methods,
+        /* m_slots    */ 0,
+        /* m_traverse */ 0,
+        /* m_clear    */ 0,
+        /* m_free     */ 0,
+};
+
+CALIBRE_MODINIT_FUNC PyInit_tokenizer(void) {
+    if (PyType_Ready(&tokenizer_TokenType) < 0)
+        INITERROR;
+
+    PyObject *mod = PyModule_Create(&tokenizer_module);
+#else
+#define INITERROR return
+CALIBRE_MODINIT_FUNC inittokenizer(void) {
+    if (PyType_Ready(&tokenizer_TokenType) < 0)
+        INITERROR;
+
+    PyObject *mod = Py_InitModule3("tokenizer", tokenizer_methods,
+        "Implementation of tokenizer in C for speed.");
+#endif
+
+    if (mod == NULL) INITERROR;
+    Py_INCREF(&tokenizer_TokenType);
+    PyModule_AddObject(mod, "Token", (PyObject *) &tokenizer_TokenType);
+
+#if PY_MAJOR_VERSION >= 3
+    return mod;
+#endif
+}
--- a/ebook_converter/tinycss/tokenizer.py
+++ b/ebook_converter/tinycss/tokenizer.py
@@ -0,0 +1,216 @@
+# coding: utf8
+"""
+    tinycss.tokenizer
+    -----------------
+
+    Tokenizer for the CSS core syntax:
+    http://www.w3.org/TR/CSS21/syndata.html#tokenization
+
+    This is the pure-python implementation. See also speedups.pyx
+
+    :copyright: (c) 2012 by Simon Sapin.
+    :license: BSD, see LICENSE for more details.
+"""
+
+from __future__ import unicode_literals
+
+from tinycss import token_data
+
+
+def tokenize_flat(css_source, ignore_comments=True,
+    # Make these local variable to avoid global lookups in the loop
+    tokens_dispatch=token_data.TOKEN_DISPATCH,
+    unicode_unescape=token_data.UNICODE_UNESCAPE,
+    newline_unescape=token_data.NEWLINE_UNESCAPE,
+    simple_unescape=token_data.SIMPLE_UNESCAPE,
+    find_newlines=token_data.FIND_NEWLINES,
+    Token=token_data.Token,
+    len=len,
+    int=int,
+    float=float,
+    list=list,
+    _None=None,
+):
+    """
+    :param css_source:
+        CSS as an unicode string
+    :param ignore_comments:
+        if true (the default) comments will not be included in the
+        return value
+    :return:
+        An iterator of :class:`Token`
+
+    """
+
+    pos = 0
+    line = 1
+    column = 1
+    source_len = len(css_source)
+    tokens = []
+    while pos < source_len:
+        char = css_source[pos]
+        if char in ':;{}()[]':
+            type_ = char
+            css_value = char
+        else:
+            codepoint = min(ord(char), 160)
+            for _index, type_, regexp in tokens_dispatch[codepoint]:
+                match = regexp(css_source, pos)
+                if match is not None:
+                    # First match is the longest. See comments on TOKENS above.
+                    css_value = match.group()
+                    break
+            else:
+                # No match.
+                # "Any other character not matched by the above rules,
+                #  and neither a single nor a double quote."
+                # ... but quotes at the start of a token are always matched
+                # by STRING or BAD_STRING. So DELIM is any single character.
+                type_ = 'DELIM'
+                css_value = char
+        length = len(css_value)
+        next_pos = pos + length
+
+        # A BAD_COMMENT is a comment at EOF. Ignore it too.
+        if not (ignore_comments and type_ in ('COMMENT', 'BAD_COMMENT')):
+            # Parse numbers, extract strings and URIs, unescape
+            unit = _None
+            if type_ == 'DIMENSION':
+                value = match.group(1)
+                value = float(value) if '.' in value else int(value)
+                unit = match.group(2)
+                unit = simple_unescape(unit)
+                unit = unicode_unescape(unit)
+                unit = unit.lower()  # normalize
+            elif type_ == 'PERCENTAGE':
+                value = css_value[:-1]
+                value = float(value) if '.' in value else int(value)
+                unit = '%'
+            elif type_ == 'NUMBER':
+                value = css_value
+                if '.' in value:
+                    value = float(value)
+                else:
+                    value = int(value)
+                    type_ = 'INTEGER'
+            elif type_ in ('IDENT', 'ATKEYWORD', 'HASH', 'FUNCTION'):
+                value = simple_unescape(css_value)
+                value = unicode_unescape(value)
+            elif type_ == 'URI':
+                value = match.group(1)
+                if value and value[0] in '"\'':
+                    value = value[1:-1]  # Remove quotes
+                    value = newline_unescape(value)
+                value = simple_unescape(value)
+                value = unicode_unescape(value)
+            elif type_ == 'STRING':
+                value = css_value[1:-1]  # Remove quotes
+                value = newline_unescape(value)
+                value = simple_unescape(value)
+                value = unicode_unescape(value)
+            # BAD_STRING can only be one of:
+            # * Unclosed string at the end of the stylesheet:
+            #   Close the string, but this is not an error.
+            #   Make it a "good" STRING token.
+            # * Unclosed string at the (unescaped) end of the line:
+            #   Close the string, but this is an error.
+            #   Leave it as a BAD_STRING, don’t bother parsing it.
+            # See http://www.w3.org/TR/CSS21/syndata.html#parsing-errors
+            elif type_ == 'BAD_STRING' and next_pos == source_len:
+                type_ = 'STRING'
+                value = css_value[1:]  # Remove quote
+                value = newline_unescape(value)
+                value = simple_unescape(value)
+                value = unicode_unescape(value)
+            else:
+                value = css_value
+            tokens.append(Token(type_, css_value, value, unit, line, column))
+
+        pos = next_pos
+        newlines = find_newlines(css_value)
+        if newlines:
+            line += len(newlines)
+            # Add 1 to have lines start at column 1, not 0
+            column = length - newlines[-1].end() + 1
+        else:
+            column += length
+    return tokens
+
+
+def regroup(tokens):
+    """
+    Match pairs of tokens: () [] {} function()
+    (Strings in "" or '' are taken care of by the tokenizer.)
+
+    Opening tokens are replaced by a :class:`ContainerToken`.
+    Closing tokens are removed. Unmatched closing tokens are invalid
+    but left as-is. All nested structures that are still open at
+    the end of the stylesheet are implicitly closed.
+
+    :param tokens:
+        a *flat* iterable of tokens, as returned by :func:`tokenize_flat`.
+    :return:
+        A tree of tokens.
+
+    """
+    # "global" objects for the inner recursion
+    pairs = {'FUNCTION': ')', '(': ')', '[': ']', '{': '}'}
+    tokens = iter(tokens)
+    eof = [False]
+
+    def _regroup_inner(stop_at=None,
+            tokens=tokens, pairs=pairs, eof=eof,
+            ContainerToken=token_data.ContainerToken,
+            FunctionToken=token_data.FunctionToken):
+        for token in tokens:
+            type_ = token.type
+            if type_ == stop_at:
+                return
+
+            end = pairs.get(type_)
+            if end is None:
+                yield token  # Not a grouping token
+            else:
+                assert not isinstance(token, ContainerToken), (
+                    'Token looks already grouped: {0}'.format(token))
+                content = list(_regroup_inner(end))
+                if eof[0]:
+                    end = ''  # Implicit end of structure at EOF.
+                if type_ == 'FUNCTION':
+                    yield FunctionToken(token.type, token.as_css(), end,
+                                        token.value, content,
+                                        token.line, token.column)
+                else:
+                    yield ContainerToken(token.type, token.as_css(), end,
+                                         content,
+                                         token.line, token.column)
+        else:
+            eof[0] = True  # end of file/stylesheet
+    return _regroup_inner()
+
+
+def tokenize_grouped(css_source, ignore_comments=True):
+    """
+    :param css_source:
+        CSS as an unicode string
+    :param ignore_comments:
+        if true (the default) comments will not be included in the
+        return value
+    :return:
+        An iterator of :class:`Token`
+
+    """
+    return regroup(tokenize_flat(css_source, ignore_comments))
+
+
+# Optional Cython version of tokenize_flat
+# Make both versions available with explicit names for tests.
+python_tokenize_flat = tokenize_flat
+
+try:
+    tok = token_data.load_c_tokenizer()
+except (ImportError, RuntimeError):
+    c_tokenize_flat = None
+else:
+    # Use the c tokenizer by default
+    c_tokenize_flat = tokenize_flat = lambda s, ignore_comments=False:tok.tokenize_flat(s, ignore_comments)
--- a/ebook_converter/tinycss/version.py
+++ b/ebook_converter/tinycss/version.py
@@ -0,0 +1 @@
+VERSION = '0.3'