Initial import

2026-02-24 11:15:50 +01:00 · 2020-03-31 17:15:23 +02:00
commit d97ea9b0bc
311 changed files with 131419 additions and 0 deletions
--- a/ebook_converter/css_selectors/init.py
+++ b/ebook_converter/css_selectors/init.py
@@ -0,0 +1,12 @@
+#!/usr/bin/env python2
+# vim:fileencoding=utf-8
+from __future__ import absolute_import, division, print_function, unicode_literals
+
+__license__ = 'GPL v3'
+__copyright__ = '2015, Kovid Goyal <kovid at kovidgoyal.net>'
+
+from css_selectors.parser import parse
+from css_selectors.select import Select, INAPPROPRIATE_PSEUDO_CLASSES
+from css_selectors.errors import SelectorError, SelectorSyntaxError, ExpressionError
+
+__all__ = ['parse', 'Select', 'INAPPROPRIATE_PSEUDO_CLASSES', 'SelectorError', 'SelectorSyntaxError', 'ExpressionError']
--- a/ebook_converter/css_selectors/errors.py
+++ b/ebook_converter/css_selectors/errors.py
@@ -0,0 +1,18 @@
+#!/usr/bin/env python2
+# vim:fileencoding=utf-8
+from __future__ import absolute_import, division, print_function, unicode_literals
+
+__license__ = 'GPL v3'
+__copyright__ = '2015, Kovid Goyal <kovid at kovidgoyal.net>'
+
+class SelectorError(ValueError):
+
+    """Common parent for SelectorSyntaxError and ExpressionError"""
+
+class SelectorSyntaxError(SelectorError):
+
+    """Parsing a selector that does not match the grammar."""
+
+class ExpressionError(SelectorError):
+
+    """Unknown or unsupported selector (eg. pseudo-class)."""
--- a/ebook_converter/css_selectors/ordered_set.py
+++ b/ebook_converter/css_selectors/ordered_set.py
@@ -0,0 +1,133 @@
+#!/usr/bin/env python2
+# vim:fileencoding=utf-8
+from __future__ import absolute_import, division, print_function, unicode_literals
+
+__license__ = 'GPL v3'
+__copyright__ = '2015, Kovid Goyal <kovid at kovidgoyal.net>'
+
+import collections
+from polyglot.builtins import string_or_bytes
+
+SLICE_ALL = slice(None)
+
+
+def is_iterable(obj):
+    """
+    Are we being asked to look up a list of things, instead of a single thing?
+    We check for the `__iter__` attribute so that this can cover types that
+    don't have to be known by this module, such as NumPy arrays.
+
+    Strings, however, should be considered as atomic values to look up, not
+    iterables.
+    """
+    return hasattr(obj, '__iter__') and not isinstance(obj, string_or_bytes)
+
+
+class OrderedSet(collections.MutableSet):
+    """
+    An OrderedSet is a custom MutableSet that remembers its order, so that
+    every entry has an index that can be looked up.
+    """
+    def __init__(self, iterable=None):
+        self.items = []
+        self.map = {}
+        if iterable is not None:
+            for item in iterable:
+                idx = self.map.get(item)
+                if idx is None:
+                    self.map[item] = len(self.items)
+                    self.items.append(item)
+
+    def __len__(self):
+        return len(self.items)
+
+    def __getitem__(self, index):
+        """
+        Get the item at a given index.
+
+        If `index` is a slice, you will get back that slice of items. If it's
+        the slice [:], exactly the same object is returned. (If you want an
+        independent copy of an OrderedSet, use `OrderedSet.copy()`.)
+
+        If `index` is an iterable, you'll get the OrderedSet of items
+        corresponding to those indices. This is similar to NumPy's
+        "fancy indexing".
+        """
+        if index == SLICE_ALL:
+            return self
+        elif hasattr(index, '__index__') or isinstance(index, slice):
+            result = self.items[index]
+            if isinstance(result, list):
+                return OrderedSet(result)
+            else:
+                return result
+        elif is_iterable(index):
+            return OrderedSet([self.items[i] for i in index])
+        else:
+            raise TypeError("Don't know how to index an OrderedSet by %r" %
+                    index)
+
+    def copy(self):
+        return OrderedSet(self)
+
+    def __getstate__(self):
+        return tuple(self)
+
+    def __setstate__(self, state):
+        self.__init__(state)
+
+    def __contains__(self, key):
+        return key in self.map
+
+    def add(self, key):
+        """
+        Add `key` as an item to this OrderedSet, then return its index.
+
+        If `key` is already in the OrderedSet, return the index it already
+        had.
+        """
+        index = self.map.get(key)
+        if index is None:
+            self.map[key] = index = len(self.items)
+            self.items.append(key)
+        return index
+
+    def index(self, key):
+        """
+        Get the index of a given entry, raising an IndexError if it's not
+        present.
+
+        `key` can be an iterable of entries that is not a string, in which case
+        this returns a list of indices.
+        """
+        if is_iterable(key):
+            return [self.index(subkey) for subkey in key]
+        return self.map[key]
+
+    def discard(self, key):
+        index = self.map.get(key)
+        if index is not None:
+            self.items.pop(index)
+            for item in self.items[index:]:
+                self.map[item] -= 1
+            return True
+        return False
+
+    def __iter__(self):
+        return iter(self.items)
+
+    def __reversed__(self):
+        return reversed(self.items)
+
+    def __repr__(self):
+        if not self:
+            return '%s()' % (self.__class__.__name__,)
+        return '%s(%r)' % (self.__class__.__name__, list(self))
+
+    def __eq__(self, other):
+        if isinstance(other, OrderedSet):
+            return len(self) == len(other) and self.items == other.items
+        try:
+            return type(other)(self.map) == other
+        except TypeError:
+            return False
--- a/ebook_converter/css_selectors/parser.py
+++ b/ebook_converter/css_selectors/parser.py
@@ -0,0 +1,791 @@
+#!/usr/bin/env python2
+# vim:fileencoding=utf-8
+"""
+    Tokenizer, parser and parsed objects for CSS selectors.
+
+    :copyright: (c) 2007-2012 Ian Bicking and contributors.
+                See AUTHORS for more details.
+    :license: BSD, see LICENSE for more details.
+
+"""
+
+import sys
+import re
+import operator
+import string
+
+from css_selectors.errors import SelectorSyntaxError, ExpressionError
+from polyglot.builtins import unicode_type, codepoint_to_chr, range
+
+
+utab = {c:c+32 for c in range(ord(u'A'), ord(u'Z')+1)}
+
+if sys.version_info.major < 3:
+    tab = string.maketrans(string.ascii_uppercase, string.ascii_lowercase)
+
+    def ascii_lower(string):
+        """Lower-case, but only in the ASCII range."""
+        return string.translate(utab if isinstance(string, unicode_type) else tab)
+
+    def urepr(x):
+        if isinstance(x, list):
+            return '[%s]' % ', '.join((map(urepr, x)))
+        ans = repr(x)
+        if ans.startswith("u'") or ans.startswith('u"'):
+            ans = ans[1:]
+        return ans
+
+
+else:
+
+    def ascii_lower(x):
+        return x.translate(utab)
+
+    urepr = repr
+
+
+# Parsed objects
+
+class Selector(object):
+
+    """
+    Represents a parsed selector.
+    """
+
+    def __init__(self, tree, pseudo_element=None):
+        self.parsed_tree = tree
+        if pseudo_element is not None and not isinstance(
+                pseudo_element, FunctionalPseudoElement):
+            pseudo_element = ascii_lower(pseudo_element)
+        #: A :class:`FunctionalPseudoElement`,
+        #: or the identifier for the pseudo-element as a string,
+        #  or ``None``.
+        #:
+        #: +-------------------------+----------------+--------------------------------+
+        #: |                         | Selector       | Pseudo-element                 |
+        #: +=========================+================+================================+
+        #: | CSS3 syntax             | ``a::before``  | ``'before'``                   |
+        #: +-------------------------+----------------+--------------------------------+
+        #: | Older syntax            | ``a:before``   | ``'before'``                   |
+        #: +-------------------------+----------------+--------------------------------+
+        #: | From the Lists3_ draft, | ``li::marker`` | ``'marker'``                   |
+        #: | not in Selectors3       |                |                                |
+        #: +-------------------------+----------------+--------------------------------+
+        #: | Invalid pseudo-class    | ``li:marker``  | ``None``                       |
+        #: +-------------------------+----------------+--------------------------------+
+        #: | Functinal               | ``a::foo(2)``  | ``FunctionalPseudoElement(…)`` |
+        #: +-------------------------+----------------+--------------------------------+
+        #:
+        # : .. _Lists3: http://www.w3.org/TR/2011/WD-css3-lists-20110524/#marker-pseudoelement
+        self.pseudo_element = pseudo_element
+
+    def __repr__(self):
+        if isinstance(self.pseudo_element, FunctionalPseudoElement):
+            pseudo_element = repr(self.pseudo_element)
+        if self.pseudo_element:
+            pseudo_element = '::%s' % self.pseudo_element
+        else:
+            pseudo_element = ''
+        return '%s[%r%s]' % (
+            self.__class__.__name__, self.parsed_tree, pseudo_element)
+
+    def specificity(self):
+        """Return the specificity_ of this selector as a tuple of 3 integers.
+
+        .. _specificity: http://www.w3.org/TR/selectors/#specificity
+
+        """
+        a, b, c = self.parsed_tree.specificity()
+        if self.pseudo_element:
+            c += 1
+        return a, b, c
+
+
+class Class(object):
+
+    """
+    Represents selector.class_name
+    """
+    def __init__(self, selector, class_name):
+        self.selector = selector
+        self.class_name = class_name
+
+    def __repr__(self):
+        return '%s[%r.%s]' % (
+            self.__class__.__name__, self.selector, self.class_name)
+
+    def specificity(self):
+        a, b, c = self.selector.specificity()
+        b += 1
+        return a, b, c
+
+
+class FunctionalPseudoElement(object):
+
+    """
+    Represents selector::name(arguments)
+
+    .. attribute:: name
+
+        The name (identifier) of the pseudo-element, as a string.
+
+    .. attribute:: arguments
+
+        The arguments of the pseudo-element, as a list of tokens.
+
+        **Note:** tokens are not part of the public API,
+        and may change between versions.
+        Use at your own risks.
+
+    """
+    def __init__(self, name, arguments):
+        self.name = ascii_lower(name)
+        self.arguments = arguments
+
+    def __repr__(self):
+        return '%s[::%s(%s)]' % (
+            self.__class__.__name__, self.name,
+            urepr([token.value for token in self.arguments]))
+
+    def argument_types(self):
+        return [token.type for token in self.arguments]
+
+    def specificity(self):
+        a, b, c = self.selector.specificity()
+        b += 1
+        return a, b, c
+
+
+class Function(object):
+
+    """
+    Represents selector:name(expr)
+    """
+    def __init__(self, selector, name, arguments):
+        self.selector = selector
+        self.name = ascii_lower(name)
+        self.arguments = arguments
+        self._parsed_arguments = None
+
+    def __repr__(self):
+        return '%s[%r:%s(%s)]' % (
+            self.__class__.__name__, self.selector, self.name,
+            urepr([token.value for token in self.arguments]))
+
+    def argument_types(self):
+        return [token.type for token in self.arguments]
+
+    @property
+    def parsed_arguments(self):
+        if self._parsed_arguments is None:
+            try:
+                self._parsed_arguments = parse_series(self.arguments)
+            except ValueError:
+                raise ExpressionError("Invalid series: '%r'" % self.arguments)
+        return self._parsed_arguments
+
+    def parse_arguments(self):
+        if not self.arguments_parsed:
+            self.arguments_parsed = True
+
+    def specificity(self):
+        a, b, c = self.selector.specificity()
+        b += 1
+        return a, b, c
+
+
+class Pseudo(object):
+
+    """
+    Represents selector:ident
+    """
+    def __init__(self, selector, ident):
+        self.selector = selector
+        self.ident = ascii_lower(ident)
+
+    def __repr__(self):
+        return '%s[%r:%s]' % (
+            self.__class__.__name__, self.selector, self.ident)
+
+    def specificity(self):
+        a, b, c = self.selector.specificity()
+        b += 1
+        return a, b, c
+
+
+class Negation(object):
+
+    """
+    Represents selector:not(subselector)
+    """
+    def __init__(self, selector, subselector):
+        self.selector = selector
+        self.subselector = subselector
+
+    def __repr__(self):
+        return '%s[%r:not(%r)]' % (
+            self.__class__.__name__, self.selector, self.subselector)
+
+    def specificity(self):
+        a1, b1, c1 = self.selector.specificity()
+        a2, b2, c2 = self.subselector.specificity()
+        return a1 + a2, b1 + b2, c1 + c2
+
+
+class Attrib(object):
+
+    """
+    Represents selector[namespace|attrib operator value]
+    """
+    def __init__(self, selector, namespace, attrib, operator, value):
+        self.selector = selector
+        self.namespace = namespace
+        self.attrib = attrib
+        self.operator = operator
+        self.value = value
+
+    def __repr__(self):
+        if self.namespace:
+            attrib = '%s|%s' % (self.namespace, self.attrib)
+        else:
+            attrib = self.attrib
+        if self.operator == 'exists':
+            return '%s[%r[%s]]' % (
+                self.__class__.__name__, self.selector, attrib)
+        else:
+            return '%s[%r[%s %s %s]]' % (
+                self.__class__.__name__, self.selector, attrib,
+                self.operator, urepr(self.value))
+
+    def specificity(self):
+        a, b, c = self.selector.specificity()
+        b += 1
+        return a, b, c
+
+
+class Element(object):
+
+    """
+    Represents namespace|element
+
+    `None` is for the universal selector '*'
+
+    """
+    def __init__(self, namespace=None, element=None):
+        self.namespace = namespace
+        self.element = element
+
+    def __repr__(self):
+        element = self.element or '*'
+        if self.namespace:
+            element = '%s|%s' % (self.namespace, element)
+        return '%s[%s]' % (self.__class__.__name__, element)
+
+    def specificity(self):
+        if self.element:
+            return 0, 0, 1
+        else:
+            return 0, 0, 0
+
+
+class Hash(object):
+
+    """
+    Represents selector#id
+    """
+    def __init__(self, selector, id):
+        self.selector = selector
+        self.id = id
+
+    def __repr__(self):
+        return '%s[%r#%s]' % (
+            self.__class__.__name__, self.selector, self.id)
+
+    def specificity(self):
+        a, b, c = self.selector.specificity()
+        a += 1
+        return a, b, c
+
+
+class CombinedSelector(object):
+
+    def __init__(self, selector, combinator, subselector):
+        assert selector is not None
+        self.selector = selector
+        self.combinator = combinator
+        self.subselector = subselector
+
+    def __repr__(self):
+        if self.combinator == ' ':
+            comb = '<followed>'
+        else:
+            comb = self.combinator
+        return '%s[%r %s %r]' % (
+            self.__class__.__name__, self.selector, comb, self.subselector)
+
+    def specificity(self):
+        a1, b1, c1 = self.selector.specificity()
+        a2, b2, c2 = self.subselector.specificity()
+        return a1 + a2, b1 + b2, c1 + c2
+
+
+# Parser
+
+# foo
+_el_re = re.compile(r'^[ \t\r\n\f]*([a-zA-Z]+)[ \t\r\n\f]*$')
+
+# foo#bar or #bar
+_id_re = re.compile(r'^[ \t\r\n\f]*([a-zA-Z]*)#([a-zA-Z0-9_-]+)[ \t\r\n\f]*$')
+
+# foo.bar or .bar
+_class_re = re.compile(
+    r'^[ \t\r\n\f]*([a-zA-Z]*)\.([a-zA-Z][a-zA-Z0-9_-]*)[ \t\r\n\f]*$')
+
+
+def parse(css):
+    """Parse a CSS *group of selectors*.
+
+    :param css:
+        A *group of selectors* as an Unicode string.
+    :raises:
+        :class:`SelectorSyntaxError` on invalid selectors.
+    :returns:
+        A list of parsed :class:`Selector` objects, one for each
+        selector in the comma-separated group.
+
+    """
+    # Fast path for simple cases
+    match = _el_re.match(css)
+    if match:
+        return [Selector(Element(element=match.group(1)))]
+    match = _id_re.match(css)
+    if match is not None:
+        return [Selector(Hash(Element(element=match.group(1) or None),
+                              match.group(2)))]
+    match = _class_re.match(css)
+    if match is not None:
+        return [Selector(Class(Element(element=match.group(1) or None),
+                               match.group(2)))]
+
+    stream = TokenStream(tokenize(css))
+    stream.source = css
+    return list(parse_selector_group(stream))
+#    except SelectorSyntaxError:
+#        e = sys.exc_info()[1]
+#        message = "%s at %s -> %r" % (
+#            e, stream.used, stream.peek())
+#        e.msg = message
+#        e.args = tuple([message])
+#        raise
+
+
+def parse_selector_group(stream):
+    stream.skip_whitespace()
+    while 1:
+        yield Selector(*parse_selector(stream))
+        if stream.peek() == ('DELIM', ','):
+            stream.next()
+            stream.skip_whitespace()
+        else:
+            break
+
+
+def parse_selector(stream):
+    result, pseudo_element = parse_simple_selector(stream)
+    while 1:
+        stream.skip_whitespace()
+        peek = stream.peek()
+        if peek in (('EOF', None), ('DELIM', ',')):
+            break
+        if pseudo_element:
+            raise SelectorSyntaxError(
+                'Got pseudo-element ::%s not at the end of a selector'
+                % pseudo_element)
+        if peek.is_delim('+', '>', '~'):
+            # A combinator
+            combinator = stream.next().value
+            stream.skip_whitespace()
+        else:
+            # By exclusion, the last parse_simple_selector() ended
+            # at peek == ' '
+            combinator = ' '
+        next_selector, pseudo_element = parse_simple_selector(stream)
+        result = CombinedSelector(result, combinator, next_selector)
+    return result, pseudo_element
+
+
+special_pseudo_elements = (
+    'first-line', 'first-letter', 'before', 'after')
+
+
+def parse_simple_selector(stream, inside_negation=False):
+    stream.skip_whitespace()
+    selector_start = len(stream.used)
+    peek = stream.peek()
+    if peek.type == 'IDENT' or peek == ('DELIM', '*'):
+        if peek.type == 'IDENT':
+            namespace = stream.next().value
+        else:
+            stream.next()
+            namespace = None
+        if stream.peek() == ('DELIM', '|'):
+            stream.next()
+            element = stream.next_ident_or_star()
+        else:
+            element = namespace
+            namespace = None
+    else:
+        element = namespace = None
+    result = Element(namespace, element)
+    pseudo_element = None
+    while 1:
+        peek = stream.peek()
+        if peek.type in ('S', 'EOF') or peek.is_delim(',', '+', '>', '~') or (
+                inside_negation and peek == ('DELIM', ')')):
+            break
+        if pseudo_element:
+            raise SelectorSyntaxError(
+                'Got pseudo-element ::%s not at the end of a selector'
+                % pseudo_element)
+        if peek.type == 'HASH':
+            result = Hash(result, stream.next().value)
+        elif peek == ('DELIM', '.'):
+            stream.next()
+            result = Class(result, stream.next_ident())
+        elif peek == ('DELIM', '['):
+            stream.next()
+            result = parse_attrib(result, stream)
+        elif peek == ('DELIM', ':'):
+            stream.next()
+            if stream.peek() == ('DELIM', ':'):
+                stream.next()
+                pseudo_element = stream.next_ident()
+                if stream.peek() == ('DELIM', '('):
+                    stream.next()
+                    pseudo_element = FunctionalPseudoElement(
+                        pseudo_element, parse_arguments(stream))
+                continue
+            ident = stream.next_ident()
+            if ident.lower() in special_pseudo_elements:
+                # Special case: CSS 2.1 pseudo-elements can have a single ':'
+                # Any new pseudo-element must have two.
+                pseudo_element = unicode_type(ident)
+                continue
+            if stream.peek() != ('DELIM', '('):
+                result = Pseudo(result, ident)
+                continue
+            stream.next()
+            stream.skip_whitespace()
+            if ident.lower() == 'not':
+                if inside_negation:
+                    raise SelectorSyntaxError('Got nested :not()')
+                argument, argument_pseudo_element = parse_simple_selector(
+                    stream, inside_negation=True)
+                next = stream.next()
+                if argument_pseudo_element:
+                    raise SelectorSyntaxError(
+                        'Got pseudo-element ::%s inside :not() at %s'
+                        % (argument_pseudo_element, next.pos))
+                if next != ('DELIM', ')'):
+                    raise SelectorSyntaxError("Expected ')', got %s" % (next,))
+                result = Negation(result, argument)
+            else:
+                result = Function(result, ident, parse_arguments(stream))
+        else:
+            raise SelectorSyntaxError(
+                "Expected selector, got %s" % (peek,))
+    if len(stream.used) == selector_start:
+        raise SelectorSyntaxError(
+            "Expected selector, got %s" % (stream.peek(),))
+    return result, pseudo_element
+
+
+def parse_arguments(stream):
+    arguments = []
+    while 1:
+        stream.skip_whitespace()
+        next = stream.next()
+        if next.type in ('IDENT', 'STRING', 'NUMBER') or next in [
+                ('DELIM', '+'), ('DELIM', '-')]:
+            arguments.append(next)
+        elif next == ('DELIM', ')'):
+            return arguments
+        else:
+            raise SelectorSyntaxError(
+                "Expected an argument, got %s" % (next,))
+
+
+def parse_attrib(selector, stream):
+    stream.skip_whitespace()
+    attrib = stream.next_ident_or_star()
+    if attrib is None and stream.peek() != ('DELIM', '|'):
+        raise SelectorSyntaxError(
+            "Expected '|', got %s" % (stream.peek(),))
+    if stream.peek() == ('DELIM', '|'):
+        stream.next()
+        if stream.peek() == ('DELIM', '='):
+            namespace = None
+            stream.next()
+            op = '|='
+        else:
+            namespace = attrib
+            attrib = stream.next_ident()
+            op = None
+    else:
+        namespace = op = None
+    if op is None:
+        stream.skip_whitespace()
+        next = stream.next()
+        if next == ('DELIM', ']'):
+            return Attrib(selector, namespace, attrib, 'exists', None)
+        elif next == ('DELIM', '='):
+            op = '='
+        elif next.is_delim('^', '$', '*', '~', '|', '!') and (
+                stream.peek() == ('DELIM', '=')):
+            op = next.value + '='
+            stream.next()
+        else:
+            raise SelectorSyntaxError(
+                "Operator expected, got %s" % (next,))
+    stream.skip_whitespace()
+    value = stream.next()
+    if value.type not in ('IDENT', 'STRING'):
+        raise SelectorSyntaxError(
+            "Expected string or ident, got %s" % (value,))
+    stream.skip_whitespace()
+    next = stream.next()
+    if next != ('DELIM', ']'):
+        raise SelectorSyntaxError(
+            "Expected ']', got %s" % (next,))
+    return Attrib(selector, namespace, attrib, op, value.value)
+
+
+def parse_series(tokens):
+    """
+    Parses the arguments for :nth-child() and friends.
+
+    :raises: A list of tokens
+    :returns: :``(a, b)``
+
+    """
+    for token in tokens:
+        if token.type == 'STRING':
+            raise ValueError('String tokens not allowed in series.')
+    s = ''.join(token.value for token in tokens).strip()
+    if s == 'odd':
+        return (2, 1)
+    elif s == 'even':
+        return (2, 0)
+    elif s == 'n':
+        return (1, 0)
+    if 'n' not in s:
+        # Just b
+        return (0, int(s))
+    a, b = s.split('n', 1)
+    if not a:
+        a = 1
+    elif a == '-' or a == '+':
+        a = int(a+'1')
+    else:
+        a = int(a)
+    if not b:
+        b = 0
+    else:
+        b = int(b)
+    return (a, b)
+
+
+# Token objects
+
+class Token(tuple):
+
+    def __new__(cls, type_, value, pos):
+        obj = tuple.__new__(cls, (type_, value))
+        obj.pos = pos
+        return obj
+
+    def __repr__(self):
+        return "<%s '%s' at %i>" % (self.type, self.value, self.pos)
+
+    def is_delim(self, *values):
+        return self.type == 'DELIM' and self.value in values
+
+    type = property(operator.itemgetter(0))
+    value = property(operator.itemgetter(1))
+
+
+class EOFToken(Token):
+
+    def __new__(cls, pos):
+        return Token.__new__(cls, 'EOF', None, pos)
+
+    def __repr__(self):
+        return '<%s at %i>' % (self.type, self.pos)
+
+
+# Tokenizer
+
+
+class TokenMacros:
+    unicode_escape = r'\\([0-9a-f]{1,6})(?:\r\n|[ \n\r\t\f])?'
+    escape = unicode_escape + r'|\\[^\n\r\f0-9a-f]'
+    string_escape = r'\\(?:\n|\r\n|\r|\f)|' + escape
+    nonascii = r'[^\0-\177]'
+    nmchar = '[_a-z0-9-]|%s|%s' % (escape, nonascii)
+    nmstart = '[_a-z]|%s|%s' % (escape, nonascii)
+
+
+def _compile(pattern):
+    return re.compile(pattern % vars(TokenMacros), re.IGNORECASE).match
+
+
+_match_whitespace = _compile(r'[ \t\r\n\f]+')
+_match_number = _compile(r'[+-]?(?:[0-9]*\.[0-9]+|[0-9]+)')
+_match_hash = _compile('#(?:%(nmchar)s)+')
+_match_ident = _compile('-?(?:%(nmstart)s)(?:%(nmchar)s)*')
+_match_string_by_quote = {
+    "'": _compile(r"([^\n\r\f\\']|%(string_escape)s)*"),
+    '"': _compile(r'([^\n\r\f\\"]|%(string_escape)s)*'),
+}
+
+_sub_simple_escape = re.compile(r'\\(.)').sub
+_sub_unicode_escape = re.compile(TokenMacros.unicode_escape, re.I).sub
+_sub_newline_escape =re.compile(r'\\(?:\n|\r\n|\r|\f)').sub
+
+# Same as r'\1', but faster on CPython
+if hasattr(operator, 'methodcaller'):
+    # Python 2.6+
+    _replace_simple = operator.methodcaller('group', 1)
+else:
+    def _replace_simple(match):
+        return match.group(1)
+
+
+def _replace_unicode(match):
+    codepoint = int(match.group(1), 16)
+    if codepoint > sys.maxunicode:
+        codepoint = 0xFFFD
+    return codepoint_to_chr(codepoint)
+
+
+def unescape_ident(value):
+    value = _sub_unicode_escape(_replace_unicode, value)
+    value = _sub_simple_escape(_replace_simple, value)
+    return value
+
+
+def tokenize(s):
+    pos = 0
+    len_s = len(s)
+    while pos < len_s:
+        match = _match_whitespace(s, pos=pos)
+        if match:
+            yield Token('S', ' ', pos)
+            pos = match.end()
+            continue
+
+        match = _match_ident(s, pos=pos)
+        if match:
+            value = _sub_simple_escape(_replace_simple,
+                    _sub_unicode_escape(_replace_unicode, match.group()))
+            yield Token('IDENT', value, pos)
+            pos = match.end()
+            continue
+
+        match = _match_hash(s, pos=pos)
+        if match:
+            value = _sub_simple_escape(_replace_simple,
+                    _sub_unicode_escape(_replace_unicode, match.group()[1:]))
+            yield Token('HASH', value, pos)
+            pos = match.end()
+            continue
+
+        quote = s[pos]
+        if quote in _match_string_by_quote:
+            match = _match_string_by_quote[quote](s, pos=pos + 1)
+            assert match, 'Should have found at least an empty match'
+            end_pos = match.end()
+            if end_pos == len_s:
+                raise SelectorSyntaxError('Unclosed string at %s' % pos)
+            if s[end_pos] != quote:
+                raise SelectorSyntaxError('Invalid string at %s' % pos)
+            value = _sub_simple_escape(_replace_simple,
+                    _sub_unicode_escape(_replace_unicode,
+                    _sub_newline_escape('', match.group())))
+            yield Token('STRING', value, pos)
+            pos = end_pos + 1
+            continue
+
+        match = _match_number(s, pos=pos)
+        if match:
+            value = match.group()
+            yield Token('NUMBER', value, pos)
+            pos = match.end()
+            continue
+
+        pos2 = pos + 2
+        if s[pos:pos2] == '/*':
+            pos = s.find('*/', pos2)
+            if pos == -1:
+                pos = len_s
+            else:
+                pos += 2
+            continue
+
+        yield Token('DELIM', s[pos], pos)
+        pos += 1
+
+    assert pos == len_s
+    yield EOFToken(pos)
+
+
+class TokenStream(object):
+
+    def __init__(self, tokens, source=None):
+        self.used = []
+        self.tokens = iter(tokens)
+        self.source = source
+        self.peeked = None
+        self._peeking = False
+        try:
+            self.next_token = self.tokens.next
+        except AttributeError:
+            # Python 3
+            self.next_token = self.tokens.__next__
+
+    def next(self):
+        if self._peeking:
+            self._peeking = False
+            self.used.append(self.peeked)
+            return self.peeked
+        else:
+            next = self.next_token()
+            self.used.append(next)
+            return next
+
+    def peek(self):
+        if not self._peeking:
+            self.peeked = self.next_token()
+            self._peeking = True
+        return self.peeked
+
+    def next_ident(self):
+        next = self.next()
+        if next.type != 'IDENT':
+            raise SelectorSyntaxError('Expected ident, got %s' % (next,))
+        return next.value
+
+    def next_ident_or_star(self):
+        next = self.next()
+        if next.type == 'IDENT':
+            return next.value
+        elif next == ('DELIM', '*'):
+            return None
+        else:
+            raise SelectorSyntaxError(
+                "Expected ident or '*', got %s" % (next,))
+
+    def skip_whitespace(self):
+        peek = self.peek()
+        if peek.type == 'S':
+            self.next()
--- a/ebook_converter/css_selectors/select.py
+++ b/ebook_converter/css_selectors/select.py
@@ -0,0 +1,694 @@
+#!/usr/bin/env python2
+# vim:fileencoding=utf-8
+from __future__ import absolute_import, division, print_function, unicode_literals
+
+__license__ = 'GPL v3'
+__copyright__ = '2015, Kovid Goyal <kovid at kovidgoyal.net>'
+
+import re, itertools
+from collections import OrderedDict, defaultdict
+from functools import wraps
+from itertools import chain
+
+from lxml import etree
+
+from css_selectors.errors import ExpressionError
+from css_selectors.parser import parse, ascii_lower, Element, FunctionalPseudoElement
+from css_selectors.ordered_set import OrderedSet
+
+from polyglot.builtins import iteritems, itervalues
+
+PARSE_CACHE_SIZE = 200
+parse_cache = OrderedDict()
+XPATH_CACHE_SIZE = 30
+xpath_cache = OrderedDict()
+
+# Test that the string is not empty and does not contain whitespace
+is_non_whitespace = re.compile(r'^[^ \t\r\n\f]+$').match
+
+
+def get_parsed_selector(raw):
+    try:
+        return parse_cache[raw]
+    except KeyError:
+        parse_cache[raw] = ans = parse(raw)
+        if len(parse_cache) > PARSE_CACHE_SIZE:
+            parse_cache.pop(next(iter(parse_cache)))
+        return ans
+
+
+def get_compiled_xpath(expr):
+    try:
+        return xpath_cache[expr]
+    except KeyError:
+        xpath_cache[expr] = ans = etree.XPath(expr)
+        if len(xpath_cache) > XPATH_CACHE_SIZE:
+            xpath_cache.pop(next(iter(xpath_cache)))
+        return ans
+
+
+class AlwaysIn(object):
+
+    def __contains__(self, x):
+        return True
+
+
+always_in = AlwaysIn()
+
+
+def trace_wrapper(func):
+    @wraps(func)
+    def trace(*args, **kwargs):
+        targs = args[1:] if args and isinstance(args[0], Select) else args
+        print('Called:', func.__name__, 'with args:', targs, kwargs or '')
+        return func(*args, **kwargs)
+    return trace
+
+
+def normalize_language_tag(tag):
+    """Return a list of normalized combinations for a `BCP 47` language tag.
+
+    Example:
+
+    >>> normalize_language_tag('de_AT-1901')
+    ['de-at-1901', 'de-at', 'de-1901', 'de']
+    """
+    # normalize:
+    tag = ascii_lower(tag).replace('_','-')
+    # split (except singletons, which mark the following tag as non-standard):
+    tag = re.sub(r'-([a-zA-Z0-9])-', r'-\1_', tag)
+    subtags = [subtag.replace('_', '-') for subtag in tag.split('-')]
+    base_tag = (subtags.pop(0),)
+    taglist = {base_tag[0]}
+    # find all combinations of subtags
+    for n in range(len(subtags), 0, -1):
+        for tags in itertools.combinations(subtags, n):
+            taglist.add('-'.join(base_tag + tags))
+    return taglist
+
+
+INAPPROPRIATE_PSEUDO_CLASSES = frozenset((
+    'active', 'after', 'disabled', 'visited', 'link', 'before', 'focus', 'first-letter', 'enabled', 'first-line', 'hover', 'checked', 'target'))
+
+
+class Select(object):
+
+    '''
+
+    This class implements CSS Level 3 selectors
+    (http://www.w3.org/TR/css3-selectors) on an lxml tree, with caching for
+    performance. To use:
+
+    >>> from css_selectors import Select
+    >>> select = Select(root)  # Where root is an lxml document
+    >>> print(tuple(select('p.myclass')))
+
+    Tags are returned in document order. Note that attribute and tag names are
+    matched case-insensitively. Class and id values are also matched
+    case-insensitively. Also namespaces are ignored (this is for performance of
+    the common case).  The UI related selectors are not implemented, such as
+    :enabled, :disabled, :checked, :hover, etc.  Similarly, the non-element
+    related selectors such as ::first-line, ::first-letter, ::before, etc. are
+    not implemented.
+
+    WARNING: This class uses internal caches. You *must not* make any changes
+    to the lxml tree. If you do make some changes, either create a new Select
+    object or call :meth:`invalidate_caches`.
+
+    This class can be easily sub-classed to work with tree implementations
+    other than lxml. Simply override the methods in the ``Tree Integration``
+    block below.
+
+    The caching works by maintaining internal maps from classes/ids/tag
+    names/etc.  to node sets. These caches are populated as needed, and used
+    for all subsequent selections.  Thus, for best performance you should use
+    the same selector object for finding the matching nodes for multiple
+    queries. Of course, remember not to change the tree in between queries.
+
+    '''
+
+    combinator_mapping = {
+        ' ': 'descendant',
+        '>': 'child',
+        '+': 'direct_adjacent',
+        '~': 'indirect_adjacent',
+    }
+
+    attribute_operator_mapping = {
+        'exists': 'exists',
+        '=': 'equals',
+        '~=': 'includes',
+        '|=': 'dashmatch',
+        '^=': 'prefixmatch',
+        '$=': 'suffixmatch',
+        '*=': 'substringmatch',
+    }
+
+    def __init__(self, root, default_lang=None, ignore_inappropriate_pseudo_classes=False, dispatch_map=None, trace=False):
+        if hasattr(root, 'getroot'):
+            root = root.getroot()
+        self.root = root
+        self.dispatch_map = dispatch_map or default_dispatch_map
+        self.invalidate_caches()
+        self.default_lang = default_lang
+        if trace:
+            self.dispatch_map = {k:trace_wrapper(v) for k, v in iteritems(self.dispatch_map)}
+        if ignore_inappropriate_pseudo_classes:
+            self.ignore_inappropriate_pseudo_classes = INAPPROPRIATE_PSEUDO_CLASSES
+        else:
+            self.ignore_inappropriate_pseudo_classes = frozenset()
+
+    # External API {{{
+    def invalidate_caches(self):
+        'Invalidate all caches. You must call this before using this object if you have made changes to the HTML tree'
+        self._element_map = None
+        self._id_map = None
+        self._class_map = None
+        self._attrib_map = None
+        self._attrib_space_map = None
+        self._lang_map = None
+        self.map_tag_name = ascii_lower
+        if '{' in self.root.tag:
+            def map_tag_name(x):
+                return ascii_lower(x.rpartition('}')[2])
+            self.map_tag_name = map_tag_name
+
+    def __call__(self, selector, root=None):
+        ''' Return an iterator over all matching tags, in document order.
+        Normally, all matching tags in the document are returned, is you
+        specify root, then only tags that are root or descendants of root are
+        returned. Note that this can be very expensive if root has a lot of
+        descendants. '''
+        seen = set()
+        if root is not None:
+            root = frozenset(self.itertag(root))
+        for parsed_selector in get_parsed_selector(selector):
+            for item in self.iterparsedselector(parsed_selector):
+                if item not in seen and (root is None or item in root):
+                    yield item
+                    seen.add(item)
+
+    def has_matches(self, selector, root=None):
+        'Return True iff selector matches at least one item in the tree'
+        for elem in self(selector, root=root):
+            return True
+        return False
+    # }}}
+
+    def iterparsedselector(self, parsed_selector):
+        type_name = type(parsed_selector).__name__
+        try:
+            func = self.dispatch_map[ascii_lower(type_name)]
+        except KeyError:
+            raise ExpressionError('%s is not supported' % type_name)
+        for item in func(self, parsed_selector):
+            yield item
+
+    @property
+    def element_map(self):
+        if self._element_map is None:
+            self._element_map = em = defaultdict(OrderedSet)
+            for tag in self.itertag():
+                em[self.map_tag_name(tag.tag)].add(tag)
+        return self._element_map
+
+    @property
+    def id_map(self):
+        if self._id_map is None:
+            self._id_map = im = defaultdict(OrderedSet)
+            lower = ascii_lower
+            for elem in self.iteridtags():
+                im[lower(elem.get('id'))].add(elem)
+        return self._id_map
+
+    @property
+    def class_map(self):
+        if self._class_map is None:
+            self._class_map = cm = defaultdict(OrderedSet)
+            lower = ascii_lower
+            for elem in self.iterclasstags():
+                for cls in elem.get('class').split():
+                    cm[lower(cls)].add(elem)
+        return self._class_map
+
+    @property
+    def attrib_map(self):
+        if self._attrib_map is None:
+            self._attrib_map = am = defaultdict(lambda : defaultdict(OrderedSet))
+            map_attrib_name = ascii_lower
+            if '{' in self.root.tag:
+                def map_attrib_name(x):
+                    return ascii_lower(x.rpartition('}')[2])
+            for tag in self.itertag():
+                for attr, val in iteritems(tag.attrib):
+                    am[map_attrib_name(attr)][val].add(tag)
+        return self._attrib_map
+
+    @property
+    def attrib_space_map(self):
+        if self._attrib_space_map is None:
+            self._attrib_space_map = am = defaultdict(lambda : defaultdict(OrderedSet))
+            map_attrib_name = ascii_lower
+            if '{' in self.root.tag:
+                def map_attrib_name(x):
+                    return ascii_lower(x.rpartition('}')[2])
+            for tag in self.itertag():
+                for attr, val in iteritems(tag.attrib):
+                    for v in val.split():
+                        am[map_attrib_name(attr)][v].add(tag)
+        return self._attrib_space_map
+
+    @property
+    def lang_map(self):
+        if self._lang_map is None:
+            self._lang_map = lm = defaultdict(OrderedSet)
+            dl = normalize_language_tag(self.default_lang) if self.default_lang else None
+            lmap = {tag:dl for tag in self.itertag()} if dl else {}
+            for tag in self.itertag():
+                lang = None
+                for attr in ('{http://www.w3.org/XML/1998/namespace}lang', 'lang'):
+                    lang = tag.get(attr)
+                if lang:
+                    lang = normalize_language_tag(lang)
+                    for dtag in self.itertag(tag):
+                        lmap[dtag] = lang
+            for tag, langs in iteritems(lmap):
+                for lang in langs:
+                    lm[lang].add(tag)
+        return self._lang_map
+
+    # Tree Integration {{{
+    def itertag(self, tag=None):
+        return (self.root if tag is None else tag).iter('*')
+
+    def iterdescendants(self, tag=None):
+        return (self.root if tag is None else tag).iterdescendants('*')
+
+    def iterchildren(self, tag=None):
+        return (self.root if tag is None else tag).iterchildren('*')
+
+    def itersiblings(self, tag=None, preceding=False):
+        return (self.root if tag is None else tag).itersiblings('*', preceding=preceding)
+
+    def iteridtags(self):
+        return get_compiled_xpath('//*[@id]')(self.root)
+
+    def iterclasstags(self):
+        return get_compiled_xpath('//*[@class]')(self.root)
+
+    def sibling_count(self, child, before=True, same_type=False):
+        ' Return the number of siblings before or after child or raise ValueError if child has no parent. '
+        parent = child.getparent()
+        if parent is None:
+            raise ValueError('Child has no parent')
+        if same_type:
+            siblings = OrderedSet(child.itersiblings(preceding=before))
+            return len(self.element_map[self.map_tag_name(child.tag)] & siblings)
+        else:
+            if before:
+                return parent.index(child)
+            return len(parent) - parent.index(child) - 1
+
+    def all_sibling_count(self, child, same_type=False):
+        ' Return the number of siblings of child or raise ValueError if child has no parent '
+        parent = child.getparent()
+        if parent is None:
+            raise ValueError('Child has no parent')
+        if same_type:
+            siblings = OrderedSet(chain(child.itersiblings(preceding=False), child.itersiblings(preceding=True)))
+            return len(self.element_map[self.map_tag_name(child.tag)] & siblings)
+        else:
+            return len(parent) - 1
+
+    def is_empty(self, elem):
+        ' Return True iff elem has no child tags and no text content '
+        for child in elem:
+            # Check for comment/PI nodes with tail text
+            if child.tail:
+                return False
+        return len(tuple(elem.iterchildren('*'))) == 0 and not elem.text
+
+    # }}}
+
+# Combinators {{{
+
+
+def select_combinedselector(cache, combined):
+    """Translate a combined selector."""
+    combinator = cache.combinator_mapping[combined.combinator]
+    # Fast path for when the sub-selector is all elements
+    right = None if isinstance(combined.subselector, Element) and (
+        combined.subselector.element or '*') == '*' else cache.iterparsedselector(combined.subselector)
+    for item in cache.dispatch_map[combinator](cache, cache.iterparsedselector(combined.selector), right):
+        yield item
+
+
+def select_descendant(cache, left, right):
+    """right is a child, grand-child or further descendant of left"""
+    right = always_in if right is None else frozenset(right)
+    for ancestor in left:
+        for descendant in cache.iterdescendants(ancestor):
+            if descendant in right:
+                yield descendant
+
+
+def select_child(cache, left, right):
+    """right is an immediate child of left"""
+    right = always_in if right is None else frozenset(right)
+    for parent in left:
+        for child in cache.iterchildren(parent):
+            if child in right:
+                yield child
+
+
+def select_direct_adjacent(cache, left, right):
+    """right is a sibling immediately after left"""
+    right = always_in if right is None else frozenset(right)
+    for parent in left:
+        for sibling in cache.itersiblings(parent):
+            if sibling in right:
+                yield sibling
+            break
+
+
+def select_indirect_adjacent(cache, left, right):
+    """right is a sibling after left, immediately or not"""
+    right = always_in if right is None else frozenset(right)
+    for parent in left:
+        for sibling in cache.itersiblings(parent):
+            if sibling in right:
+                yield sibling
+# }}}
+
+
+def select_element(cache, selector):
+    """A type or universal selector."""
+    element = selector.element
+    if not element or element == '*':
+        for elem in cache.itertag():
+            yield elem
+    else:
+        for elem in cache.element_map[ascii_lower(element)]:
+            yield elem
+
+
+def select_hash(cache, selector):
+    'An id selector'
+    items = cache.id_map[ascii_lower(selector.id)]
+    if len(items) > 0:
+        for elem in cache.iterparsedselector(selector.selector):
+            if elem in items:
+                yield elem
+
+
+def select_class(cache, selector):
+    'A class selector'
+    items = cache.class_map[ascii_lower(selector.class_name)]
+    if items:
+        for elem in cache.iterparsedselector(selector.selector):
+            if elem in items:
+                yield elem
+
+
+def select_negation(cache, selector):
+    'Implement :not()'
+    exclude = frozenset(cache.iterparsedselector(selector.subselector))
+    for item in cache.iterparsedselector(selector.selector):
+        if item not in exclude:
+            yield item
+
+# Attribute selectors {{{
+
+
+def select_attrib(cache, selector):
+    operator = cache.attribute_operator_mapping[selector.operator]
+    items = frozenset(cache.dispatch_map[operator](cache, ascii_lower(selector.attrib), selector.value))
+    for item in cache.iterparsedselector(selector.selector):
+        if item in items:
+            yield item
+
+
+def select_exists(cache, attrib, value=None):
+    for elem_set in itervalues(cache.attrib_map[attrib]):
+        for elem in elem_set:
+            yield elem
+
+
+def select_equals(cache, attrib, value):
+    for elem in cache.attrib_map[attrib][value]:
+        yield elem
+
+
+def select_includes(cache, attrib, value):
+    if is_non_whitespace(value):
+        for elem in cache.attrib_space_map[attrib][value]:
+            yield elem
+
+
+def select_dashmatch(cache, attrib, value):
+    if value:
+        for val, elem_set in iteritems(cache.attrib_map[attrib]):
+            if val == value or val.startswith(value + '-'):
+                for elem in elem_set:
+                    yield elem
+
+
+def select_prefixmatch(cache, attrib, value):
+    if value:
+        for val, elem_set in iteritems(cache.attrib_map[attrib]):
+            if val.startswith(value):
+                for elem in elem_set:
+                    yield elem
+
+
+def select_suffixmatch(cache, attrib, value):
+    if value:
+        for val, elem_set in iteritems(cache.attrib_map[attrib]):
+            if val.endswith(value):
+                for elem in elem_set:
+                    yield elem
+
+
+def select_substringmatch(cache, attrib, value):
+    if value:
+        for val, elem_set in iteritems(cache.attrib_map[attrib]):
+            if value in val:
+                for elem in elem_set:
+                    yield elem
+
+# }}}
+
+# Function selectors {{{
+
+
+def select_function(cache, function):
+    """Select with a functional pseudo-class."""
+    fname = function.name.replace('-', '_')
+    try:
+        func = cache.dispatch_map[fname]
+    except KeyError:
+        raise ExpressionError(
+            "The pseudo-class :%s() is unknown" % function.name)
+    if fname == 'lang':
+        items = frozenset(func(cache, function))
+        for item in cache.iterparsedselector(function.selector):
+            if item in items:
+                yield item
+    else:
+        for item in cache.iterparsedselector(function.selector):
+            if func(cache, function, item):
+                yield item
+
+
+def select_lang(cache, function):
+    ' Implement :lang() '
+    if function.argument_types() not in (['STRING'], ['IDENT']):
+        raise ExpressionError("Expected a single string or ident for :lang(), got %r" % function.arguments)
+    lang = function.arguments[0].value
+    if lang:
+        lang = ascii_lower(lang)
+        lp = lang + '-'
+        for tlang, elem_set in iteritems(cache.lang_map):
+            if tlang == lang or (tlang is not None and tlang.startswith(lp)):
+                for elem in elem_set:
+                    yield elem
+
+
+def select_nth_child(cache, function, elem):
+    ' Implement :nth-child() '
+    a, b = function.parsed_arguments
+    try:
+        num = cache.sibling_count(elem) + 1
+    except ValueError:
+        return False
+    if a == 0:
+        return num == b
+    n = (num - b) / a
+    return n.is_integer() and n > -1
+
+
+def select_nth_last_child(cache, function, elem):
+    ' Implement :nth-last-child() '
+    a, b = function.parsed_arguments
+    try:
+        num = cache.sibling_count(elem, before=False) + 1
+    except ValueError:
+        return False
+    if a == 0:
+        return num == b
+    n = (num - b) / a
+    return n.is_integer() and n > -1
+
+
+def select_nth_of_type(cache, function, elem):
+    ' Implement :nth-of-type() '
+    a, b = function.parsed_arguments
+    try:
+        num = cache.sibling_count(elem, same_type=True) + 1
+    except ValueError:
+        return False
+    if a == 0:
+        return num == b
+    n = (num - b) / a
+    return n.is_integer() and n > -1
+
+
+def select_nth_last_of_type(cache, function, elem):
+    ' Implement :nth-last-of-type() '
+    a, b = function.parsed_arguments
+    try:
+        num = cache.sibling_count(elem, before=False, same_type=True) + 1
+    except ValueError:
+        return False
+    if a == 0:
+        return num == b
+    n = (num - b) / a
+    return n.is_integer() and n > -1
+
+# }}}
+
+# Pseudo elements {{{
+
+
+def pseudo_func(f):
+    f.is_pseudo = True
+    return f
+
+
+@pseudo_func
+def allow_all(cache, item):
+    return True
+
+
+def get_func_for_pseudo(cache, ident):
+    try:
+        func = cache.dispatch_map[ident.replace('-', '_')]
+    except KeyError:
+        if ident in cache.ignore_inappropriate_pseudo_classes:
+            func = allow_all
+        else:
+            raise ExpressionError(
+                "The pseudo-class :%s is not supported" % ident)
+
+    try:
+        func.is_pseudo
+    except AttributeError:
+        raise ExpressionError(
+            "The pseudo-class :%s is invalid" % ident)
+    return func
+
+
+def select_selector(cache, selector):
+    if selector.pseudo_element is None:
+        for item in cache.iterparsedselector(selector.parsed_tree):
+            yield item
+        return
+    if isinstance(selector.pseudo_element, FunctionalPseudoElement):
+        raise ExpressionError(
+            "The pseudo-element ::%s is not supported" % selector.pseudo_element.name)
+    func = get_func_for_pseudo(cache, selector.pseudo_element)
+    for item in cache.iterparsedselector(selector.parsed_tree):
+        if func(cache, item):
+            yield item
+
+
+def select_pseudo(cache, pseudo):
+    func = get_func_for_pseudo(cache, pseudo.ident)
+    if func is select_root:
+        yield cache.root
+        return
+
+    for item in cache.iterparsedselector(pseudo.selector):
+        if func(cache, item):
+            yield item
+
+
+@pseudo_func
+def select_root(cache, elem):
+    return elem is cache.root
+
+
+@pseudo_func
+def select_first_child(cache, elem):
+    try:
+        return cache.sibling_count(elem) == 0
+    except ValueError:
+        return False
+
+
+@pseudo_func
+def select_last_child(cache, elem):
+    try:
+        return cache.sibling_count(elem, before=False) == 0
+    except ValueError:
+        return False
+
+
+@pseudo_func
+def select_only_child(cache, elem):
+    try:
+        return cache.all_sibling_count(elem) == 0
+    except ValueError:
+        return False
+
+
+@pseudo_func
+def select_first_of_type(cache, elem):
+    try:
+        return cache.sibling_count(elem, same_type=True) == 0
+    except ValueError:
+        return False
+
+
+@pseudo_func
+def select_last_of_type(cache, elem):
+    try:
+        return cache.sibling_count(elem, before=False, same_type=True) == 0
+    except ValueError:
+        return False
+
+
+@pseudo_func
+def select_only_of_type(cache, elem):
+    try:
+        return cache.all_sibling_count(elem, same_type=True) == 0
+    except ValueError:
+        return False
+
+
+@pseudo_func
+def select_empty(cache, elem):
+    return cache.is_empty(elem)
+
+
+# }}}
+
+default_dispatch_map = {name.partition('_')[2]:obj for name, obj in globals().items() if name.startswith('select_') and callable(obj)}
+
+if __name__ == '__main__':
+    from pprint import pprint
+    root = etree.fromstring(
+            '<body xmlns="xxx" xml:lang="en"><p id="p" class="one two" lang="fr"><a id="a"/><b/><c/><d/></p></body>',
+            parser=etree.XMLParser(recover=True, no_network=True, resolve_entities=False))
+    select = Select(root, ignore_inappropriate_pseudo_classes=True, trace=True)
+    pprint(list(select('p:disabled')))
--- a/ebook_converter/css_selectors/tests.py
+++ b/ebook_converter/css_selectors/tests.py
@@ -0,0 +1,843 @@
+#!/usr/bin/env python2
+# vim:fileencoding=utf-8
+from __future__ import absolute_import, division, print_function, unicode_literals
+
+__license__ = 'GPL v3'
+__copyright__ = '2015, Kovid Goyal <kovid at kovidgoyal.net>'
+
+import unittest, sys, argparse
+
+from lxml import etree, html
+
+from css_selectors.errors import SelectorSyntaxError, ExpressionError
+from css_selectors.parser import tokenize, parse
+from css_selectors.select import Select
+
+
+class TestCSSSelectors(unittest.TestCase):
+
+    # Test data {{{
+    HTML_IDS = '''
+<html id="html"><head>
+  <link id="link-href" href="foo" />
+  <link id="link-nohref" />
+</head><body>
+<div id="outer-div">
+ <a id="name-anchor" name="foo"></a>
+ <a id="tag-anchor" rel="tag" href="http://localhost/foo">link</a>
+ <a id="nofollow-anchor" rel="nofollow" href="https://example.org">
+    link</a>
+ <ol id="first-ol" class="a b c">
+   <li id="first-li">content</li>
+   <li id="second-li" lang="En-us">
+     <div id="li-div">
+     </div>
+   </li>
+   <li id="third-li" class="ab c"></li>
+   <li id="fourth-li" class="ab
+c"></li>
+   <li id="fifth-li"></li>
+   <li id="sixth-li"></li>
+   <li id="seventh-li">  </li>
+ </ol>
+ <p id="paragraph">
+   <b id="p-b">hi</b> <em id="p-em">there</em>
+   <b id="p-b2">guy</b>
+   <input type="checkbox" id="checkbox-unchecked" />
+   <input type="checkbox" id="checkbox-disabled" disabled="" />
+   <input type="text" id="text-checked" checked="checked" />
+   <input type="hidden" />
+   <input type="hidden" disabled="disabled" />
+   <input type="checkbox" id="checkbox-checked" checked="checked" />
+   <input type="checkbox" id="checkbox-disabled-checked"
+          disabled="disabled" checked="checked" />
+   <fieldset id="fieldset" disabled="disabled">
+     <input type="checkbox" id="checkbox-fieldset-disabled" />
+     <input type="hidden" />
+   </fieldset>
+ </p>
+ <ol id="second-ol">
+ </ol>
+ <map name="dummymap">
+   <area shape="circle" coords="200,250,25" href="foo.html" id="area-href" />
+   <area shape="default" id="area-nohref" />
+ </map>
+</div>
+<div id="foobar-div" foobar="ab bc
+cde"><span id="foobar-span"></span></div>
+</body></html>
+'''
+    HTML_SHAKESPEARE = '''
+<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN"
+"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
+<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en" debug="true">
+<head>
+<meta http-equiv="Content-Type" content="text/html; charset=utf-8"/>
+</head>
+<body>
+<div id="test">
+<div class="dialog">
+<h2>As You Like It</h2>
+<div id="playwright">
+by William Shakespeare
+</div>
+<div class="dialog scene thirdClass" id="scene1">
+<h3>ACT I, SCENE III. A room in the palace.</h3>
+<div class="dialog">
+<div class="direction">Enter CELIA and ROSALIND</div>
+</div>
+<div id="speech1" class="character">CELIA</div>
+<div class="dialog">
+<div id="scene1.3.1">Why, cousin! why, Rosalind! Cupid have mercy! not a word?</div>
+</div>
+<div id="speech2" class="character">ROSALIND</div>
+<div class="dialog">
+<div id="scene1.3.2">Not one to throw at a dog.</div>
+</div>
+<div id="speech3" class="character">CELIA</div>
+<div class="dialog">
+<div id="scene1.3.3">No, thy words are too precious to be cast away upon</div>
+<div id="scene1.3.4">curs; throw some of them at me; come, lame me with reasons.</div>
+</div>
+<div id="speech4" class="character">ROSALIND</div>
+<div id="speech5" class="character">CELIA</div>
+<div class="dialog">
+<div id="scene1.3.8">But is all this for your father?</div>
+</div>
+<div class="dialog">
+<div id="scene1.3.5">Then there were two cousins laid up; when the one</div>
+<div id="scene1.3.6">should be lamed with reasons and the other mad</div>
+<div id="scene1.3.7">without any.</div>
+</div>
+<div id="speech6" class="character">ROSALIND</div>
+<div class="dialog">
+<div id="scene1.3.9">No, some of it is for my child's father. O, how</div>
+<div id="scene1.3.10">full of briers is this working-day world!</div>
+</div>
+<div id="speech7" class="character">CELIA</div>
+<div class="dialog">
+<div id="scene1.3.11">They are but burs, cousin, thrown upon thee in</div>
+<div id="scene1.3.12">holiday foolery: if we walk not in the trodden</div>
+<div id="scene1.3.13">paths our very petticoats will catch them.</div>
+</div>
+<div id="speech8" class="character">ROSALIND</div>
+<div class="dialog">
+<div id="scene1.3.14">I could shake them off my coat: these burs are in my heart.</div>
+</div>
+<div id="speech9" class="character">CELIA</div>
+<div class="dialog">
+<div id="scene1.3.15">Hem them away.</div>
+</div>
+<div id="speech10" class="character">ROSALIND</div>
+<div class="dialog">
+<div id="scene1.3.16">I would try, if I could cry 'hem' and have him.</div>
+</div>
+<div id="speech11" class="character">CELIA</div>
+<div class="dialog">
+<div id="scene1.3.17">Come, come, wrestle with thy affections.</div>
+</div>
+<div id="speech12" class="character">ROSALIND</div>
+<div class="dialog">
+<div id="scene1.3.18">O, they take the part of a better wrestler than myself!</div>
+</div>
+<div id="speech13" class="character">CELIA</div>
+<div class="dialog">
+<div id="scene1.3.19">O, a good wish upon you! you will try in time, in</div>
+<div id="scene1.3.20">despite of a fall. But, turning these jests out of</div>
+<div id="scene1.3.21">service, let us talk in good earnest: is it</div>
+<div id="scene1.3.22">possible, on such a sudden, you should fall into so</div>
+<div id="scene1.3.23">strong a liking with old Sir Rowland's youngest son?</div>
+</div>
+<div id="speech14" class="character">ROSALIND</div>
+<div class="dialog">
+<div id="scene1.3.24">The duke my father loved his father dearly.</div>
+</div>
+<div id="speech15" class="character">CELIA</div>
+<div class="dialog">
+<div id="scene1.3.25">Doth it therefore ensue that you should love his son</div>
+<div id="scene1.3.26">dearly? By this kind of chase, I should hate him,</div>
+<div id="scene1.3.27">for my father hated his father dearly; yet I hate</div>
+<div id="scene1.3.28">not Orlando.</div>
+</div>
+<div id="speech16" class="character">ROSALIND</div>
+<div title="wtf" class="dialog">
+<div id="scene1.3.29">No, faith, hate him not, for my sake.</div>
+</div>
+<div id="speech17" class="character">CELIA</div>
+<div class="dialog">
+<div id="scene1.3.30">Why should I not? doth he not deserve well?</div>
+</div>
+<div id="speech18" class="character">ROSALIND</div>
+<div class="dialog">
+<div id="scene1.3.31">Let me love him for that, and do you love him</div>
+<div id="scene1.3.32">because I do. Look, here comes the duke.</div>
+</div>
+<div id="speech19" class="character">CELIA</div>
+<div class="dialog">
+<div id="scene1.3.33">With his eyes full of anger.</div>
+<div class="direction">Enter DUKE FREDERICK, with Lords</div>
+</div>
+<div id="speech20" class="character">DUKE FREDERICK</div>
+<div class="dialog">
+<div id="scene1.3.34">Mistress, dispatch you with your safest haste</div>
+<div id="scene1.3.35">And get you from our court.</div>
+</div>
+<div id="speech21" class="character">ROSALIND</div>
+<div class="dialog">
+<div id="scene1.3.36">Me, uncle?</div>
+</div>
+<div id="speech22" class="character">DUKE FREDERICK</div>
+<div class="dialog">
+<div id="scene1.3.37">You, cousin</div>
+<div id="scene1.3.38">Within these ten days if that thou be'st found</div>
+<div id="scene1.3.39">So near our public court as twenty miles,</div>
+<div id="scene1.3.40">Thou diest for it.</div>
+</div>
+<div id="speech23" class="character">ROSALIND</div>
+<div class="dialog">
+<div id="scene1.3.41">                  I do beseech your grace,</div>
+<div id="scene1.3.42">Let me the knowledge of my fault bear with me:</div>
+<div id="scene1.3.43">If with myself I hold intelligence</div>
+<div id="scene1.3.44">Or have acquaintance with mine own desires,</div>
+<div id="scene1.3.45">If that I do not dream or be not frantic,--</div>
+<div id="scene1.3.46">As I do trust I am not--then, dear uncle,</div>
+<div id="scene1.3.47">Never so much as in a thought unborn</div>
+<div id="scene1.3.48">Did I offend your highness.</div>
+</div>
+<div id="speech24" class="character">DUKE FREDERICK</div>
+<div class="dialog">
+<div id="scene1.3.49">Thus do all traitors:</div>
+<div id="scene1.3.50">If their purgation did consist in words,</div>
+<div id="scene1.3.51">They are as innocent as grace itself:</div>
+<div id="scene1.3.52">Let it suffice thee that I trust thee not.</div>
+</div>
+<div id="speech25" class="character">ROSALIND</div>
+<div class="dialog">
+<div id="scene1.3.53">Yet your mistrust cannot make me a traitor:</div>
+<div id="scene1.3.54">Tell me whereon the likelihood depends.</div>
+</div>
+<div id="speech26" class="character">DUKE FREDERICK</div>
+<div class="dialog">
+<div id="scene1.3.55">Thou art thy father's daughter; there's enough.</div>
+</div>
+<div id="speech27" class="character">ROSALIND</div>
+<div class="dialog">
+<div id="scene1.3.56">So was I when your highness took his dukedom;</div>
+<div id="scene1.3.57">So was I when your highness banish'd him:</div>
+<div id="scene1.3.58">Treason is not inherited, my lord;</div>
+<div id="scene1.3.59">Or, if we did derive it from our friends,</div>
+<div id="scene1.3.60">What's that to me? my father was no traitor:</div>
+<div id="scene1.3.61">Then, good my liege, mistake me not so much</div>
+<div id="scene1.3.62">To think my poverty is treacherous.</div>
+</div>
+<div id="speech28" class="character">CELIA</div>
+<div class="dialog">
+<div id="scene1.3.63">Dear sovereign, hear me speak.</div>
+</div>
+<div id="speech29" class="character">DUKE FREDERICK</div>
+<div class="dialog">
+<div id="scene1.3.64">Ay, Celia; we stay'd her for your sake,</div>
+<div id="scene1.3.65">Else had she with her father ranged along.</div>
+</div>
+<div id="speech30" class="character">CELIA</div>
+<div class="dialog">
+<div id="scene1.3.66">I did not then entreat to have her stay;</div>
+<div id="scene1.3.67">It was your pleasure and your own remorse:</div>
+<div id="scene1.3.68">I was too young that time to value her;</div>
+<div id="scene1.3.69">But now I know her: if she be a traitor,</div>
+<div id="scene1.3.70">Why so am I; we still have slept together,</div>
+<div id="scene1.3.71">Rose at an instant, learn'd, play'd, eat together,</div>
+<div id="scene1.3.72">And wheresoever we went, like Juno's swans,</div>
+<div id="scene1.3.73">Still we went coupled and inseparable.</div>
+</div>
+<div id="speech31" class="character">DUKE FREDERICK</div>
+<div class="dialog">
+<div id="scene1.3.74">She is too subtle for thee; and her smoothness,</div>
+<div id="scene1.3.75">Her very silence and her patience</div>
+<div id="scene1.3.76">Speak to the people, and they pity her.</div>
+<div id="scene1.3.77">Thou art a fool: she robs thee of thy name;</div>
+<div id="scene1.3.78">And thou wilt show more bright and seem more virtuous</div>
+<div id="scene1.3.79">When she is gone. Then open not thy lips:</div>
+<div id="scene1.3.80">Firm and irrevocable is my doom</div>
+<div id="scene1.3.81">Which I have pass'd upon her; she is banish'd.</div>
+</div>
+<div id="speech32" class="character">CELIA</div>
+<div class="dialog">
+<div id="scene1.3.82">Pronounce that sentence then on me, my liege:</div>
+<div id="scene1.3.83">I cannot live out of her company.</div>
+</div>
+<div id="speech33" class="character">DUKE FREDERICK</div>
+<div class="dialog">
+<div id="scene1.3.84">You are a fool. You, niece, provide yourself:</div>
+<div id="scene1.3.85">If you outstay the time, upon mine honour,</div>
+<div id="scene1.3.86">And in the greatness of my word, you die.</div>
+<div class="direction">Exeunt DUKE FREDERICK and Lords</div>
+</div>
+<div id="speech34" class="character">CELIA</div>
+<div class="dialog">
+<div id="scene1.3.87">O my poor Rosalind, whither wilt thou go?</div>
+<div id="scene1.3.88">Wilt thou change fathers? I will give thee mine.</div>
+<div id="scene1.3.89">I charge thee, be not thou more grieved than I am.</div>
+</div>
+<div id="speech35" class="character">ROSALIND</div>
+<div class="dialog">
+<div id="scene1.3.90">I have more cause.</div>
+</div>
+<div id="speech36" class="character">CELIA</div>
+<div class="dialog">
+<div id="scene1.3.91">                  Thou hast not, cousin;</div>
+<div id="scene1.3.92">Prithee be cheerful: know'st thou not, the duke</div>
+<div id="scene1.3.93">Hath banish'd me, his daughter?</div>
+</div>
+<div id="speech37" class="character">ROSALIND</div>
+<div class="dialog">
+<div id="scene1.3.94">That he hath not.</div>
+</div>
+<div id="speech38" class="character">CELIA</div>
+<div class="dialog">
+<div id="scene1.3.95">No, hath not? Rosalind lacks then the love</div>
+<div id="scene1.3.96">Which teacheth thee that thou and I am one:</div>
+<div id="scene1.3.97">Shall we be sunder'd? shall we part, sweet girl?</div>
+<div id="scene1.3.98">No: let my father seek another heir.</div>
+<div id="scene1.3.99">Therefore devise with me how we may fly,</div>
+<div id="scene1.3.100">Whither to go and what to bear with us;</div>
+<div id="scene1.3.101">And do not seek to take your change upon you,</div>
+<div id="scene1.3.102">To bear your griefs yourself and leave me out;</div>
+<div id="scene1.3.103">For, by this heaven, now at our sorrows pale,</div>
+<div id="scene1.3.104">Say what thou canst, I'll go along with thee.</div>
+</div>
+<div id="speech39" class="character">ROSALIND</div>
+<div class="dialog">
+<div id="scene1.3.105">Why, whither shall we go?</div>
+</div>
+<div id="speech40" class="character">CELIA</div>
+<div class="dialog">
+<div id="scene1.3.106">To seek my uncle in the forest of Arden.</div>
+</div>
+<div id="speech41" class="character">ROSALIND</div>
+<div class="dialog">
+<div id="scene1.3.107">Alas, what danger will it be to us,</div>
+<div id="scene1.3.108">Maids as we are, to travel forth so far!</div>
+<div id="scene1.3.109">Beauty provoketh thieves sooner than gold.</div>
+</div>
+<div id="speech42" class="character">CELIA</div>
+<div class="dialog">
+<div id="scene1.3.110">I'll put myself in poor and mean attire</div>
+<div id="scene1.3.111">And with a kind of umber smirch my face;</div>
+<div id="scene1.3.112">The like do you: so shall we pass along</div>
+<div id="scene1.3.113">And never stir assailants.</div>
+</div>
+<div id="speech43" class="character">ROSALIND</div>
+<div class="dialog">
+<div id="scene1.3.114">Were it not better,</div>
+<div id="scene1.3.115">Because that I am more than common tall,</div>
+<div id="scene1.3.116">That I did suit me all points like a man?</div>
+<div id="scene1.3.117">A gallant curtle-axe upon my thigh,</div>
+<div id="scene1.3.118">A boar-spear in my hand; and--in my heart</div>
+<div id="scene1.3.119">Lie there what hidden woman's fear there will--</div>
+<div id="scene1.3.120">We'll have a swashing and a martial outside,</div>
+<div id="scene1.3.121">As many other mannish cowards have</div>
+<div id="scene1.3.122">That do outface it with their semblances.</div>
+</div>
+<div id="speech44" class="character">CELIA</div>
+<div class="dialog">
+<div id="scene1.3.123">What shall I call thee when thou art a man?</div>
+</div>
+<div id="speech45" class="character">ROSALIND</div>
+<div class="dialog">
+<div id="scene1.3.124">I'll have no worse a name than Jove's own page;</div>
+<div id="scene1.3.125">And therefore look you call me Ganymede.</div>
+<div id="scene1.3.126">But what will you be call'd?</div>
+</div>
+<div id="speech46" class="character">CELIA</div>
+<div class="dialog">
+<div id="scene1.3.127">Something that hath a reference to my state</div>
+<div id="scene1.3.128">No longer Celia, but Aliena.</div>
+</div>
+<div id="speech47" class="character">ROSALIND</div>
+<div class="dialog">
+<div id="scene1.3.129">But, cousin, what if we assay'd to steal</div>
+<div id="scene1.3.130">The clownish fool out of your father's court?</div>
+<div id="scene1.3.131">Would he not be a comfort to our travel?</div>
+</div>
+<div id="speech48" class="character">CELIA</div>
+<div class="dialog">
+<div id="scene1.3.132">He'll go along o'er the wide world with me;</div>
+<div id="scene1.3.133">Leave me alone to woo him. Let's away,</div>
+<div id="scene1.3.134">And get our jewels and our wealth together,</div>
+<div id="scene1.3.135">Devise the fittest time and safest way</div>
+<div id="scene1.3.136">To hide us from pursuit that will be made</div>
+<div id="scene1.3.137">After my flight. Now go we in content</div>
+<div id="scene1.3.138">To liberty and not to banishment.</div>
+<div class="direction">Exeunt</div>
+</div>
+</div>
+</div>
+</div>
+</body>
+</html>
+'''
+
+
+# }}}
+
+    ae = unittest.TestCase.assertEqual
+
+    def test_tokenizer(self):  # {{{
+        tokens = [
+            type('')(item) for item in tokenize(
+                r'E\ é > f [a~="y\"x"]:nth(/* fu /]* */-3.7)')]
+        self.ae(tokens, [
+            "<IDENT 'E é' at 0>",
+            "<S ' ' at 4>",
+            "<DELIM '>' at 5>",
+            "<S ' ' at 6>",
+            # the no-break space is not whitespace in CSS
+            "<IDENT 'f ' at 7>",  # f\xa0
+            "<DELIM '[' at 9>",
+            "<IDENT 'a' at 10>",
+            "<DELIM '~' at 11>",
+            "<DELIM '=' at 12>",
+            "<STRING 'y\"x' at 13>",
+            "<DELIM ']' at 19>",
+            "<DELIM ':' at 20>",
+            "<IDENT 'nth' at 21>",
+            "<DELIM '(' at 24>",
+            "<NUMBER '-3.7' at 37>",
+            "<DELIM ')' at 41>",
+            "<EOF at 42>",
+        ])
+    # }}}
+
+    def test_parser(self):  # {{{
+        def repr_parse(css):
+            selectors = parse(css)
+            for selector in selectors:
+                assert selector.pseudo_element is None
+            return [repr(selector.parsed_tree).replace("(u'", "('")
+                    for selector in selectors]
+
+        def parse_many(first, *others):
+            result = repr_parse(first)
+            for other in others:
+                assert repr_parse(other) == result
+            return result
+
+        assert parse_many('*') == ['Element[*]']
+        assert parse_many('*|*') == ['Element[*]']
+        assert parse_many('*|foo') == ['Element[foo]']
+        assert parse_many('foo|*') == ['Element[foo|*]']
+        assert parse_many('foo|bar') == ['Element[foo|bar]']
+        # This will never match, but it is valid:
+        assert parse_many('#foo#bar') == ['Hash[Hash[Element[*]#foo]#bar]']
+        assert parse_many(
+            'div>.foo',
+            'div> .foo',
+            'div >.foo',
+            'div > .foo',
+            'div \n>  \t \t .foo', 'div\r>\n\n\n.foo', 'div\f>\f.foo'
+        ) == ['CombinedSelector[Element[div] > Class[Element[*].foo]]']
+        assert parse_many('td.foo,.bar',
+            'td.foo, .bar',
+            'td.foo\t\r\n\f ,\t\r\n\f .bar'
+        ) == [
+            'Class[Element[td].foo]',
+            'Class[Element[*].bar]'
+        ]
+        assert parse_many('div, td.foo, div.bar span') == [
+            'Element[div]',
+            'Class[Element[td].foo]',
+            'CombinedSelector[Class[Element[div].bar] '
+            '<followed> Element[span]]']
+        assert parse_many('div > p') == [
+            'CombinedSelector[Element[div] > Element[p]]']
+        assert parse_many('td:first') == [
+            'Pseudo[Element[td]:first]']
+        assert parse_many('td:first') == [
+            'Pseudo[Element[td]:first]']
+        assert parse_many('td :first') == [
+            'CombinedSelector[Element[td] '
+            '<followed> Pseudo[Element[*]:first]]']
+        assert parse_many('td :first') == [
+            'CombinedSelector[Element[td] '
+            '<followed> Pseudo[Element[*]:first]]']
+        assert parse_many('a[name]', 'a[ name\t]') == [
+            'Attrib[Element[a][name]]']
+        assert parse_many('a [name]') == [
+            'CombinedSelector[Element[a] <followed> Attrib[Element[*][name]]]']
+        self.ae(parse_many('a[rel="include"]', 'a[rel = include]'), [
+            "Attrib[Element[a][rel = 'include']]"])
+        assert parse_many("a[hreflang |= 'en']", "a[hreflang|=en]") == [
+            "Attrib[Element[a][hreflang |= 'en']]"]
+        self.ae(parse_many('div:nth-child(10)'), [
+            "Function[Element[div]:nth-child(['10'])]"])
+        assert parse_many(':nth-child(2n+2)') == [
+            "Function[Element[*]:nth-child(['2', 'n', '+2'])]"]
+        assert parse_many('div:nth-of-type(10)') == [
+            "Function[Element[div]:nth-of-type(['10'])]"]
+        assert parse_many('div div:nth-of-type(10) .aclass') == [
+            'CombinedSelector[CombinedSelector[Element[div] <followed> '
+            "Function[Element[div]:nth-of-type(['10'])]] "
+            '<followed> Class[Element[*].aclass]]']
+        assert parse_many('label:only') == [
+            'Pseudo[Element[label]:only]']
+        assert parse_many('a:lang(fr)') == [
+            "Function[Element[a]:lang(['fr'])]"]
+        assert parse_many('div:contains("foo")') == [
+            "Function[Element[div]:contains(['foo'])]"]
+        assert parse_many('div#foobar') == [
+            'Hash[Element[div]#foobar]']
+        assert parse_many('div:not(div.foo)') == [
+            'Negation[Element[div]:not(Class[Element[div].foo])]']
+        assert parse_many('td ~ th') == [
+            'CombinedSelector[Element[td] ~ Element[th]]']
+    # }}}
+
+    def test_pseudo_elements(self):  # {{{
+        def parse_pseudo(css):
+            result = []
+            for selector in parse(css):
+                pseudo = selector.pseudo_element
+                pseudo = type('')(pseudo) if pseudo else pseudo
+                # No Symbol here
+                assert pseudo is None or isinstance(pseudo, type(''))
+                selector = repr(selector.parsed_tree).replace("(u'", "('")
+                result.append((selector, pseudo))
+            return result
+
+        def parse_one(css):
+            result = parse_pseudo(css)
+            assert len(result) == 1
+            return result[0]
+
+        self.ae(parse_one('foo'), ('Element[foo]', None))
+        self.ae(parse_one('*'), ('Element[*]', None))
+        self.ae(parse_one(':empty'), ('Pseudo[Element[*]:empty]', None))
+
+        # Special cases for CSS 2.1 pseudo-elements
+        self.ae(parse_one(':BEfore'), ('Element[*]', 'before'))
+        self.ae(parse_one(':aftER'), ('Element[*]', 'after'))
+        self.ae(parse_one(':First-Line'), ('Element[*]', 'first-line'))
+        self.ae(parse_one(':First-Letter'), ('Element[*]', 'first-letter'))
+
+        self.ae(parse_one('::befoRE'), ('Element[*]', 'before'))
+        self.ae(parse_one('::AFter'), ('Element[*]', 'after'))
+        self.ae(parse_one('::firsT-linE'), ('Element[*]', 'first-line'))
+        self.ae(parse_one('::firsT-letteR'), ('Element[*]', 'first-letter'))
+
+        self.ae(parse_one('::text-content'), ('Element[*]', 'text-content'))
+        self.ae(parse_one('::attr(name)'), (
+            "Element[*]", "FunctionalPseudoElement[::attr(['name'])]"))
+
+        self.ae(parse_one('::Selection'), ('Element[*]', 'selection'))
+        self.ae(parse_one('foo:after'), ('Element[foo]', 'after'))
+        self.ae(parse_one('foo::selection'), ('Element[foo]', 'selection'))
+        self.ae(parse_one('lorem#ipsum ~ a#b.c[href]:empty::selection'), (
+            'CombinedSelector[Hash[Element[lorem]#ipsum] ~ '
+            'Pseudo[Attrib[Class[Hash[Element[a]#b].c][href]]:empty]]',
+            'selection'))
+
+        parse_pseudo('foo:before, bar, baz:after') == [
+            ('Element[foo]', 'before'),
+            ('Element[bar]', None),
+            ('Element[baz]', 'after')]
+    # }}}
+
+    def test_specificity(self):  # {{{
+        def specificity(css):
+            selectors = parse(css)
+            assert len(selectors) == 1
+            return selectors[0].specificity()
+
+        assert specificity('*') == (0, 0, 0)
+        assert specificity(' foo') == (0, 0, 1)
+        assert specificity(':empty ') == (0, 1, 0)
+        assert specificity(':before') == (0, 0, 1)
+        assert specificity('*:before') == (0, 0, 1)
+        assert specificity(':nth-child(2)') == (0, 1, 0)
+        assert specificity('.bar') == (0, 1, 0)
+        assert specificity('[baz]') == (0, 1, 0)
+        assert specificity('[baz="4"]') == (0, 1, 0)
+        assert specificity('[baz^="4"]') == (0, 1, 0)
+        assert specificity('#lipsum') == (1, 0, 0)
+
+        assert specificity(':not(*)') == (0, 0, 0)
+        assert specificity(':not(foo)') == (0, 0, 1)
+        assert specificity(':not(.foo)') == (0, 1, 0)
+        assert specificity(':not([foo])') == (0, 1, 0)
+        assert specificity(':not(:empty)') == (0, 1, 0)
+        assert specificity(':not(#foo)') == (1, 0, 0)
+
+        assert specificity('foo:empty') == (0, 1, 1)
+        assert specificity('foo:before') == (0, 0, 2)
+        assert specificity('foo::before') == (0, 0, 2)
+        assert specificity('foo:empty::before') == (0, 1, 2)
+
+        assert specificity('#lorem + foo#ipsum:first-child > bar:first-line'
+            ) == (2, 1, 3)
+    # }}}
+
+    def test_parse_errors(self):  # {{{
+        def get_error(css):
+            try:
+                parse(css)
+            except SelectorSyntaxError:
+                # Py2, Py3, ...
+                return str(sys.exc_info()[1]).replace("(u'", "('")
+
+        self.ae(get_error('attributes(href)/html/body/a'), (
+            "Expected selector, got <DELIM '(' at 10>"))
+        assert get_error('attributes(href)') == (
+            "Expected selector, got <DELIM '(' at 10>")
+        assert get_error('html/body/a') == (
+            "Expected selector, got <DELIM '/' at 4>")
+        assert get_error(' ') == (
+            "Expected selector, got <EOF at 1>")
+        assert get_error('div, ') == (
+            "Expected selector, got <EOF at 5>")
+        assert get_error(' , div') == (
+            "Expected selector, got <DELIM ',' at 1>")
+        assert get_error('p, , div') == (
+            "Expected selector, got <DELIM ',' at 3>")
+        assert get_error('div > ') == (
+            "Expected selector, got <EOF at 6>")
+        assert get_error('  > div') == (
+            "Expected selector, got <DELIM '>' at 2>")
+        assert get_error('foo|#bar') == (
+            "Expected ident or '*', got <HASH 'bar' at 4>")
+        assert get_error('#.foo') == (
+            "Expected selector, got <DELIM '#' at 0>")
+        assert get_error('.#foo') == (
+            "Expected ident, got <HASH 'foo' at 1>")
+        assert get_error(':#foo') == (
+            "Expected ident, got <HASH 'foo' at 1>")
+        assert get_error('[*]') == (
+            "Expected '|', got <DELIM ']' at 2>")
+        assert get_error('[foo|]') == (
+            "Expected ident, got <DELIM ']' at 5>")
+        assert get_error('[#]') == (
+            "Expected ident or '*', got <DELIM '#' at 1>")
+        assert get_error('[foo=#]') == (
+            "Expected string or ident, got <DELIM '#' at 5>")
+        assert get_error('[href]a') == (
+            "Expected selector, got <IDENT 'a' at 6>")
+        assert get_error('[rel=stylesheet]') is None
+        assert get_error('[rel:stylesheet]') == (
+            "Operator expected, got <DELIM ':' at 4>")
+        assert get_error('[rel=stylesheet') == (
+            "Expected ']', got <EOF at 15>")
+        assert get_error(':lang(fr)') is None
+        assert get_error(':lang(fr') == (
+            "Expected an argument, got <EOF at 8>")
+        assert get_error(':contains("foo') == (
+            "Unclosed string at 10")
+        assert get_error('foo!') == (
+            "Expected selector, got <DELIM '!' at 3>")
+
+        # Mis-placed pseudo-elements
+        assert get_error('a:before:empty') == (
+            "Got pseudo-element ::before not at the end of a selector")
+        assert get_error('li:before a') == (
+            "Got pseudo-element ::before not at the end of a selector")
+        assert get_error(':not(:before)') == (
+            "Got pseudo-element ::before inside :not() at 12")
+        assert get_error(':not(:not(a))') == (
+            "Got nested :not()")
+    # }}}
+
+    def test_select(self):  # {{{
+        document = etree.fromstring(self.HTML_IDS, parser=etree.XMLParser(recover=True, no_network=True, resolve_entities=False))
+        select = Select(document)
+
+        def select_ids(selector):
+            for elem in select(selector):
+                yield elem.get('id')
+
+        def pcss(main, *selectors, **kwargs):
+            result = list(select_ids(main))
+            for selector in selectors:
+                self.ae(list(select_ids(selector)), result)
+            return result
+        all_ids = pcss('*')
+        self.ae(all_ids[:6], [
+            'html', None, 'link-href', 'link-nohref', None, 'outer-div'])
+        self.ae(all_ids[-1:], ['foobar-span'])
+        self.ae(pcss('div'), ['outer-div', 'li-div', 'foobar-div'])
+        self.ae(pcss('DIV'), [
+            'outer-div', 'li-div', 'foobar-div'])  # case-insensitive in HTML
+        self.ae(pcss('div div'), ['li-div'])
+        self.ae(pcss('div, div div'), ['outer-div', 'li-div', 'foobar-div'])
+        self.ae(pcss('a[name]'), ['name-anchor'])
+        self.ae(pcss('a[NAme]'), ['name-anchor'])  # case-insensitive in HTML:
+        self.ae(pcss('a[rel]'), ['tag-anchor', 'nofollow-anchor'])
+        self.ae(pcss('a[rel="tag"]'), ['tag-anchor'])
+        self.ae(pcss('a[href*="localhost"]'), ['tag-anchor'])
+        self.ae(pcss('a[href*=""]'), [])
+        self.ae(pcss('a[href^="http"]'), ['tag-anchor', 'nofollow-anchor'])
+        self.ae(pcss('a[href^="http:"]'), ['tag-anchor'])
+        self.ae(pcss('a[href^=""]'), [])
+        self.ae(pcss('a[href$="org"]'), ['nofollow-anchor'])
+        self.ae(pcss('a[href$=""]'), [])
+        self.ae(pcss('div[foobar~="bc"]', 'div[foobar~="cde"]', skip_webkit=True), ['foobar-div'])
+        self.ae(pcss('[foobar~="ab bc"]', '[foobar~=""]', '[foobar~=" \t"]'), [])
+        self.ae(pcss('div[foobar~="cd"]'), [])
+        self.ae(pcss('*[lang|="En"]', '[lang|="En-us"]'), ['second-li'])
+        # Attribute values are case sensitive
+        self.ae(pcss('*[lang|="en"]', '[lang|="en-US"]', skip_webkit=True), [])
+        self.ae(pcss('*[lang|="e"]'), [])
+        self.ae(pcss(':lang("EN")', '*:lang(en-US)', skip_webkit=True), ['second-li', 'li-div'])
+        self.ae(pcss(':lang("e")'), [])
+        self.ae(pcss('li:nth-child(1)', 'li:first-child'), ['first-li'])
+        self.ae(pcss('li:nth-child(3)', '#first-li ~ :nth-child(3)'), ['third-li'])
+        self.ae(pcss('li:nth-child(10)'), [])
+        self.ae(pcss('li:nth-child(2n)', 'li:nth-child(even)', 'li:nth-child(2n+0)'), ['second-li', 'fourth-li', 'sixth-li'])
+        self.ae(pcss('li:nth-child(+2n+1)', 'li:nth-child(odd)'), ['first-li', 'third-li', 'fifth-li', 'seventh-li'])
+        self.ae(pcss('li:nth-child(2n+4)'), ['fourth-li', 'sixth-li'])
+        self.ae(pcss('li:nth-child(3n+1)'), ['first-li', 'fourth-li', 'seventh-li'])
+        self.ae(pcss('li:nth-last-child(0)'), [])
+        self.ae(pcss('li:nth-last-child(1)', 'li:last-child'), ['seventh-li'])
+        self.ae(pcss('li:nth-last-child(2n)', 'li:nth-last-child(even)'), ['second-li', 'fourth-li', 'sixth-li'])
+        self.ae(pcss('li:nth-last-child(2n+2)'), ['second-li', 'fourth-li', 'sixth-li'])
+        self.ae(pcss('ol:first-of-type'), ['first-ol'])
+        self.ae(pcss('ol:nth-child(1)'), [])
+        self.ae(pcss('ol:nth-of-type(2)'), ['second-ol'])
+        self.ae(pcss('ol:nth-last-of-type(1)'), ['second-ol'])
+        self.ae(pcss('span:only-child'), ['foobar-span'])
+        self.ae(pcss('li div:only-child'), ['li-div'])
+        self.ae(pcss('div *:only-child'), ['li-div', 'foobar-span'])
+        self.ae(pcss('p *:only-of-type', skip_webkit=True), ['p-em', 'fieldset'])
+        self.ae(pcss('p:only-of-type', skip_webkit=True), ['paragraph'])
+        self.ae(pcss('a:empty', 'a:EMpty'), ['name-anchor'])
+        self.ae(pcss('li:empty'), ['third-li', 'fourth-li', 'fifth-li', 'sixth-li'])
+        self.ae(pcss(':root', 'html:root', 'li:root'), ['html'])
+        self.ae(pcss('* :root', 'p *:root'), [])
+        self.ae(pcss('.a', '.b', '*.a', 'ol.a'), ['first-ol'])
+        self.ae(pcss('.c', '*.c'), ['first-ol', 'third-li', 'fourth-li'])
+        self.ae(pcss('ol *.c', 'ol li.c', 'li ~ li.c', 'ol > li.c'), [
+            'third-li', 'fourth-li'])
+        self.ae(pcss('#first-li', 'li#first-li', '*#first-li'), ['first-li'])
+        self.ae(pcss('li div', 'li > div', 'div div'), ['li-div'])
+        self.ae(pcss('div > div'), [])
+        self.ae(pcss('div>.c', 'div > .c'), ['first-ol'])
+        self.ae(pcss('div + div'), ['foobar-div'])
+        self.ae(pcss('a ~ a'), ['tag-anchor', 'nofollow-anchor'])
+        self.ae(pcss('a[rel="tag"] ~ a'), ['nofollow-anchor'])
+        self.ae(pcss('ol#first-ol li:last-child'), ['seventh-li'])
+        self.ae(pcss('ol#first-ol *:last-child'), ['li-div', 'seventh-li'])
+        self.ae(pcss('#outer-div:first-child'), ['outer-div'])
+        self.ae(pcss('#outer-div :first-child'), [
+            'name-anchor', 'first-li', 'li-div', 'p-b',
+            'checkbox-fieldset-disabled', 'area-href'])
+        self.ae(pcss('a[href]'), ['tag-anchor', 'nofollow-anchor'])
+        self.ae(pcss(':not(*)'), [])
+        self.ae(pcss('a:not([href])'), ['name-anchor'])
+        self.ae(pcss('ol :Not(li[class])', skip_webkit=True), [
+            'first-li', 'second-li', 'li-div',
+            'fifth-li', 'sixth-li', 'seventh-li'])
+        self.ae(pcss(r'di\a0 v', r'div\['), [])
+        self.ae(pcss(r'[h\a0 ref]', r'[h\]ref]'), [])
+
+        self.assertRaises(ExpressionError, lambda : tuple(select('body:nth-child')))
+
+        select = Select(document, ignore_inappropriate_pseudo_classes=True)
+        self.assertGreater(len(tuple(select('p:hover'))), 0)
+
+    def test_select_shakespeare(self):
+        document = html.document_fromstring(self.HTML_SHAKESPEARE)
+        select = Select(document)
+        count = lambda s: sum(1 for r in select(s))
+
+        # Data borrowed from http://mootools.net/slickspeed/
+
+        # Changed from original; probably because I'm only
+        self.ae(count('*'), 249)
+        assert count('div:only-child') == 22  # ?
+        assert count('div:nth-child(even)') == 106
+        assert count('div:nth-child(2n)') == 106
+        assert count('div:nth-child(odd)') == 137
+        assert count('div:nth-child(2n+1)') == 137
+        assert count('div:nth-child(n)') == 243
+        assert count('div:last-child') == 53
+        assert count('div:first-child') == 51
+        assert count('div > div') == 242
+        assert count('div + div') == 190
+        assert count('div ~ div') == 190
+        assert count('body') == 1
+        assert count('body div') == 243
+        assert count('div') == 243
+        assert count('div div') == 242
+        assert count('div div div') == 241
+        assert count('div, div, div') == 243
+        assert count('div, a, span') == 243
+        assert count('.dialog') == 51
+        assert count('div.dialog') == 51
+        assert count('div .dialog') == 51
+        assert count('div.character, div.dialog') == 99
+        assert count('div.direction.dialog') == 0
+        assert count('div.dialog.direction') == 0
+        assert count('div.dialog.scene') == 1
+        assert count('div.scene.scene') == 1
+        assert count('div.scene .scene') == 0
+        assert count('div.direction .dialog ') == 0
+        assert count('div .dialog .direction') == 4
+        assert count('div.dialog .dialog .direction') == 4
+        assert count('#speech5') == 1
+        assert count('div#speech5') == 1
+        assert count('div #speech5') == 1
+        assert count('div.scene div.dialog') == 49
+        assert count('div#scene1 div.dialog div') == 142
+        assert count('#scene1 #speech1') == 1
+        assert count('div[class]') == 103
+        assert count('div[class=dialog]') == 50
+        assert count('div[class^=dia]') == 51
+        assert count('div[class$=log]') == 50
+        assert count('div[class*=sce]') == 1
+        assert count('div[class|=dialog]') == 50  # ? Seems right
+        assert count('div[class~=dialog]') == 51  # ? Seems right
+
+    # }}}
+
+
+# Run tests {{{
+def find_tests():
+    return unittest.defaultTestLoader.loadTestsFromTestCase(TestCSSSelectors)
+
+
+def run_tests(find_tests=find_tests, for_build=False):
+    if not for_build:
+        parser = argparse.ArgumentParser()
+        parser.add_argument('name', nargs='?', default=None,
+                            help='The name of the test to run')
+        args = parser.parse_args()
+    if not for_build and args.name and args.name.startswith('.'):
+        tests = find_tests()
+        q = args.name[1:]
+        if not q.startswith('test_'):
+            q = 'test_' + q
+        ans = None
+        try:
+            for test in tests:
+                if test._testMethodName == q:
+                    ans = test
+                    raise StopIteration()
+        except StopIteration:
+            pass
+        if ans is None:
+            print('No test named %s found' % args.name)
+            raise SystemExit(1)
+        tests = ans
+    else:
+        tests = unittest.defaultTestLoader.loadTestsFromName(args.name) if not for_build and args.name else find_tests()
+    r = unittest.TextTestRunner
+    if for_build:
+        r = r(verbosity=0, buffer=True, failfast=True)
+    else:
+        r = r(verbosity=4)
+    result = r.run(tests)
+    if for_build and result.errors or result.failures:
+        raise SystemExit(1)
+
+
+if __name__ == '__main__':
+    run_tests()
+# }}}