mirror of
https://github.com/gryf/ebook-converter.git
synced 2026-01-06 11:14:12 +01:00
Initial import
This commit is contained in:
12
ebook_converter/css_selectors/__init__.py
Normal file
12
ebook_converter/css_selectors/__init__.py
Normal file
@@ -0,0 +1,12 @@
|
||||
#!/usr/bin/env python2
|
||||
# vim:fileencoding=utf-8
|
||||
from __future__ import absolute_import, division, print_function, unicode_literals
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2015, Kovid Goyal <kovid at kovidgoyal.net>'
|
||||
|
||||
from css_selectors.parser import parse
|
||||
from css_selectors.select import Select, INAPPROPRIATE_PSEUDO_CLASSES
|
||||
from css_selectors.errors import SelectorError, SelectorSyntaxError, ExpressionError
|
||||
|
||||
__all__ = ['parse', 'Select', 'INAPPROPRIATE_PSEUDO_CLASSES', 'SelectorError', 'SelectorSyntaxError', 'ExpressionError']
|
||||
18
ebook_converter/css_selectors/errors.py
Normal file
18
ebook_converter/css_selectors/errors.py
Normal file
@@ -0,0 +1,18 @@
|
||||
#!/usr/bin/env python2
|
||||
# vim:fileencoding=utf-8
|
||||
from __future__ import absolute_import, division, print_function, unicode_literals
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2015, Kovid Goyal <kovid at kovidgoyal.net>'
|
||||
|
||||
class SelectorError(ValueError):
|
||||
|
||||
"""Common parent for SelectorSyntaxError and ExpressionError"""
|
||||
|
||||
class SelectorSyntaxError(SelectorError):
|
||||
|
||||
"""Parsing a selector that does not match the grammar."""
|
||||
|
||||
class ExpressionError(SelectorError):
|
||||
|
||||
"""Unknown or unsupported selector (eg. pseudo-class)."""
|
||||
133
ebook_converter/css_selectors/ordered_set.py
Normal file
133
ebook_converter/css_selectors/ordered_set.py
Normal file
@@ -0,0 +1,133 @@
|
||||
#!/usr/bin/env python2
|
||||
# vim:fileencoding=utf-8
|
||||
from __future__ import absolute_import, division, print_function, unicode_literals
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2015, Kovid Goyal <kovid at kovidgoyal.net>'
|
||||
|
||||
import collections
|
||||
from polyglot.builtins import string_or_bytes
|
||||
|
||||
SLICE_ALL = slice(None)
|
||||
|
||||
|
||||
def is_iterable(obj):
|
||||
"""
|
||||
Are we being asked to look up a list of things, instead of a single thing?
|
||||
We check for the `__iter__` attribute so that this can cover types that
|
||||
don't have to be known by this module, such as NumPy arrays.
|
||||
|
||||
Strings, however, should be considered as atomic values to look up, not
|
||||
iterables.
|
||||
"""
|
||||
return hasattr(obj, '__iter__') and not isinstance(obj, string_or_bytes)
|
||||
|
||||
|
||||
class OrderedSet(collections.MutableSet):
|
||||
"""
|
||||
An OrderedSet is a custom MutableSet that remembers its order, so that
|
||||
every entry has an index that can be looked up.
|
||||
"""
|
||||
def __init__(self, iterable=None):
|
||||
self.items = []
|
||||
self.map = {}
|
||||
if iterable is not None:
|
||||
for item in iterable:
|
||||
idx = self.map.get(item)
|
||||
if idx is None:
|
||||
self.map[item] = len(self.items)
|
||||
self.items.append(item)
|
||||
|
||||
def __len__(self):
|
||||
return len(self.items)
|
||||
|
||||
def __getitem__(self, index):
|
||||
"""
|
||||
Get the item at a given index.
|
||||
|
||||
If `index` is a slice, you will get back that slice of items. If it's
|
||||
the slice [:], exactly the same object is returned. (If you want an
|
||||
independent copy of an OrderedSet, use `OrderedSet.copy()`.)
|
||||
|
||||
If `index` is an iterable, you'll get the OrderedSet of items
|
||||
corresponding to those indices. This is similar to NumPy's
|
||||
"fancy indexing".
|
||||
"""
|
||||
if index == SLICE_ALL:
|
||||
return self
|
||||
elif hasattr(index, '__index__') or isinstance(index, slice):
|
||||
result = self.items[index]
|
||||
if isinstance(result, list):
|
||||
return OrderedSet(result)
|
||||
else:
|
||||
return result
|
||||
elif is_iterable(index):
|
||||
return OrderedSet([self.items[i] for i in index])
|
||||
else:
|
||||
raise TypeError("Don't know how to index an OrderedSet by %r" %
|
||||
index)
|
||||
|
||||
def copy(self):
|
||||
return OrderedSet(self)
|
||||
|
||||
def __getstate__(self):
|
||||
return tuple(self)
|
||||
|
||||
def __setstate__(self, state):
|
||||
self.__init__(state)
|
||||
|
||||
def __contains__(self, key):
|
||||
return key in self.map
|
||||
|
||||
def add(self, key):
|
||||
"""
|
||||
Add `key` as an item to this OrderedSet, then return its index.
|
||||
|
||||
If `key` is already in the OrderedSet, return the index it already
|
||||
had.
|
||||
"""
|
||||
index = self.map.get(key)
|
||||
if index is None:
|
||||
self.map[key] = index = len(self.items)
|
||||
self.items.append(key)
|
||||
return index
|
||||
|
||||
def index(self, key):
|
||||
"""
|
||||
Get the index of a given entry, raising an IndexError if it's not
|
||||
present.
|
||||
|
||||
`key` can be an iterable of entries that is not a string, in which case
|
||||
this returns a list of indices.
|
||||
"""
|
||||
if is_iterable(key):
|
||||
return [self.index(subkey) for subkey in key]
|
||||
return self.map[key]
|
||||
|
||||
def discard(self, key):
|
||||
index = self.map.get(key)
|
||||
if index is not None:
|
||||
self.items.pop(index)
|
||||
for item in self.items[index:]:
|
||||
self.map[item] -= 1
|
||||
return True
|
||||
return False
|
||||
|
||||
def __iter__(self):
|
||||
return iter(self.items)
|
||||
|
||||
def __reversed__(self):
|
||||
return reversed(self.items)
|
||||
|
||||
def __repr__(self):
|
||||
if not self:
|
||||
return '%s()' % (self.__class__.__name__,)
|
||||
return '%s(%r)' % (self.__class__.__name__, list(self))
|
||||
|
||||
def __eq__(self, other):
|
||||
if isinstance(other, OrderedSet):
|
||||
return len(self) == len(other) and self.items == other.items
|
||||
try:
|
||||
return type(other)(self.map) == other
|
||||
except TypeError:
|
||||
return False
|
||||
791
ebook_converter/css_selectors/parser.py
Normal file
791
ebook_converter/css_selectors/parser.py
Normal file
@@ -0,0 +1,791 @@
|
||||
#!/usr/bin/env python2
|
||||
# vim:fileencoding=utf-8
|
||||
"""
|
||||
Tokenizer, parser and parsed objects for CSS selectors.
|
||||
|
||||
:copyright: (c) 2007-2012 Ian Bicking and contributors.
|
||||
See AUTHORS for more details.
|
||||
:license: BSD, see LICENSE for more details.
|
||||
|
||||
"""
|
||||
|
||||
import sys
|
||||
import re
|
||||
import operator
|
||||
import string
|
||||
|
||||
from css_selectors.errors import SelectorSyntaxError, ExpressionError
|
||||
from polyglot.builtins import unicode_type, codepoint_to_chr, range
|
||||
|
||||
|
||||
utab = {c:c+32 for c in range(ord(u'A'), ord(u'Z')+1)}
|
||||
|
||||
if sys.version_info.major < 3:
|
||||
tab = string.maketrans(string.ascii_uppercase, string.ascii_lowercase)
|
||||
|
||||
def ascii_lower(string):
|
||||
"""Lower-case, but only in the ASCII range."""
|
||||
return string.translate(utab if isinstance(string, unicode_type) else tab)
|
||||
|
||||
def urepr(x):
|
||||
if isinstance(x, list):
|
||||
return '[%s]' % ', '.join((map(urepr, x)))
|
||||
ans = repr(x)
|
||||
if ans.startswith("u'") or ans.startswith('u"'):
|
||||
ans = ans[1:]
|
||||
return ans
|
||||
|
||||
|
||||
else:
|
||||
|
||||
def ascii_lower(x):
|
||||
return x.translate(utab)
|
||||
|
||||
urepr = repr
|
||||
|
||||
|
||||
# Parsed objects
|
||||
|
||||
class Selector(object):
|
||||
|
||||
"""
|
||||
Represents a parsed selector.
|
||||
"""
|
||||
|
||||
def __init__(self, tree, pseudo_element=None):
|
||||
self.parsed_tree = tree
|
||||
if pseudo_element is not None and not isinstance(
|
||||
pseudo_element, FunctionalPseudoElement):
|
||||
pseudo_element = ascii_lower(pseudo_element)
|
||||
#: A :class:`FunctionalPseudoElement`,
|
||||
#: or the identifier for the pseudo-element as a string,
|
||||
# or ``None``.
|
||||
#:
|
||||
#: +-------------------------+----------------+--------------------------------+
|
||||
#: | | Selector | Pseudo-element |
|
||||
#: +=========================+================+================================+
|
||||
#: | CSS3 syntax | ``a::before`` | ``'before'`` |
|
||||
#: +-------------------------+----------------+--------------------------------+
|
||||
#: | Older syntax | ``a:before`` | ``'before'`` |
|
||||
#: +-------------------------+----------------+--------------------------------+
|
||||
#: | From the Lists3_ draft, | ``li::marker`` | ``'marker'`` |
|
||||
#: | not in Selectors3 | | |
|
||||
#: +-------------------------+----------------+--------------------------------+
|
||||
#: | Invalid pseudo-class | ``li:marker`` | ``None`` |
|
||||
#: +-------------------------+----------------+--------------------------------+
|
||||
#: | Functinal | ``a::foo(2)`` | ``FunctionalPseudoElement(…)`` |
|
||||
#: +-------------------------+----------------+--------------------------------+
|
||||
#:
|
||||
# : .. _Lists3: http://www.w3.org/TR/2011/WD-css3-lists-20110524/#marker-pseudoelement
|
||||
self.pseudo_element = pseudo_element
|
||||
|
||||
def __repr__(self):
|
||||
if isinstance(self.pseudo_element, FunctionalPseudoElement):
|
||||
pseudo_element = repr(self.pseudo_element)
|
||||
if self.pseudo_element:
|
||||
pseudo_element = '::%s' % self.pseudo_element
|
||||
else:
|
||||
pseudo_element = ''
|
||||
return '%s[%r%s]' % (
|
||||
self.__class__.__name__, self.parsed_tree, pseudo_element)
|
||||
|
||||
def specificity(self):
|
||||
"""Return the specificity_ of this selector as a tuple of 3 integers.
|
||||
|
||||
.. _specificity: http://www.w3.org/TR/selectors/#specificity
|
||||
|
||||
"""
|
||||
a, b, c = self.parsed_tree.specificity()
|
||||
if self.pseudo_element:
|
||||
c += 1
|
||||
return a, b, c
|
||||
|
||||
|
||||
class Class(object):
|
||||
|
||||
"""
|
||||
Represents selector.class_name
|
||||
"""
|
||||
def __init__(self, selector, class_name):
|
||||
self.selector = selector
|
||||
self.class_name = class_name
|
||||
|
||||
def __repr__(self):
|
||||
return '%s[%r.%s]' % (
|
||||
self.__class__.__name__, self.selector, self.class_name)
|
||||
|
||||
def specificity(self):
|
||||
a, b, c = self.selector.specificity()
|
||||
b += 1
|
||||
return a, b, c
|
||||
|
||||
|
||||
class FunctionalPseudoElement(object):
|
||||
|
||||
"""
|
||||
Represents selector::name(arguments)
|
||||
|
||||
.. attribute:: name
|
||||
|
||||
The name (identifier) of the pseudo-element, as a string.
|
||||
|
||||
.. attribute:: arguments
|
||||
|
||||
The arguments of the pseudo-element, as a list of tokens.
|
||||
|
||||
**Note:** tokens are not part of the public API,
|
||||
and may change between versions.
|
||||
Use at your own risks.
|
||||
|
||||
"""
|
||||
def __init__(self, name, arguments):
|
||||
self.name = ascii_lower(name)
|
||||
self.arguments = arguments
|
||||
|
||||
def __repr__(self):
|
||||
return '%s[::%s(%s)]' % (
|
||||
self.__class__.__name__, self.name,
|
||||
urepr([token.value for token in self.arguments]))
|
||||
|
||||
def argument_types(self):
|
||||
return [token.type for token in self.arguments]
|
||||
|
||||
def specificity(self):
|
||||
a, b, c = self.selector.specificity()
|
||||
b += 1
|
||||
return a, b, c
|
||||
|
||||
|
||||
class Function(object):
|
||||
|
||||
"""
|
||||
Represents selector:name(expr)
|
||||
"""
|
||||
def __init__(self, selector, name, arguments):
|
||||
self.selector = selector
|
||||
self.name = ascii_lower(name)
|
||||
self.arguments = arguments
|
||||
self._parsed_arguments = None
|
||||
|
||||
def __repr__(self):
|
||||
return '%s[%r:%s(%s)]' % (
|
||||
self.__class__.__name__, self.selector, self.name,
|
||||
urepr([token.value for token in self.arguments]))
|
||||
|
||||
def argument_types(self):
|
||||
return [token.type for token in self.arguments]
|
||||
|
||||
@property
|
||||
def parsed_arguments(self):
|
||||
if self._parsed_arguments is None:
|
||||
try:
|
||||
self._parsed_arguments = parse_series(self.arguments)
|
||||
except ValueError:
|
||||
raise ExpressionError("Invalid series: '%r'" % self.arguments)
|
||||
return self._parsed_arguments
|
||||
|
||||
def parse_arguments(self):
|
||||
if not self.arguments_parsed:
|
||||
self.arguments_parsed = True
|
||||
|
||||
def specificity(self):
|
||||
a, b, c = self.selector.specificity()
|
||||
b += 1
|
||||
return a, b, c
|
||||
|
||||
|
||||
class Pseudo(object):
|
||||
|
||||
"""
|
||||
Represents selector:ident
|
||||
"""
|
||||
def __init__(self, selector, ident):
|
||||
self.selector = selector
|
||||
self.ident = ascii_lower(ident)
|
||||
|
||||
def __repr__(self):
|
||||
return '%s[%r:%s]' % (
|
||||
self.__class__.__name__, self.selector, self.ident)
|
||||
|
||||
def specificity(self):
|
||||
a, b, c = self.selector.specificity()
|
||||
b += 1
|
||||
return a, b, c
|
||||
|
||||
|
||||
class Negation(object):
|
||||
|
||||
"""
|
||||
Represents selector:not(subselector)
|
||||
"""
|
||||
def __init__(self, selector, subselector):
|
||||
self.selector = selector
|
||||
self.subselector = subselector
|
||||
|
||||
def __repr__(self):
|
||||
return '%s[%r:not(%r)]' % (
|
||||
self.__class__.__name__, self.selector, self.subselector)
|
||||
|
||||
def specificity(self):
|
||||
a1, b1, c1 = self.selector.specificity()
|
||||
a2, b2, c2 = self.subselector.specificity()
|
||||
return a1 + a2, b1 + b2, c1 + c2
|
||||
|
||||
|
||||
class Attrib(object):
|
||||
|
||||
"""
|
||||
Represents selector[namespace|attrib operator value]
|
||||
"""
|
||||
def __init__(self, selector, namespace, attrib, operator, value):
|
||||
self.selector = selector
|
||||
self.namespace = namespace
|
||||
self.attrib = attrib
|
||||
self.operator = operator
|
||||
self.value = value
|
||||
|
||||
def __repr__(self):
|
||||
if self.namespace:
|
||||
attrib = '%s|%s' % (self.namespace, self.attrib)
|
||||
else:
|
||||
attrib = self.attrib
|
||||
if self.operator == 'exists':
|
||||
return '%s[%r[%s]]' % (
|
||||
self.__class__.__name__, self.selector, attrib)
|
||||
else:
|
||||
return '%s[%r[%s %s %s]]' % (
|
||||
self.__class__.__name__, self.selector, attrib,
|
||||
self.operator, urepr(self.value))
|
||||
|
||||
def specificity(self):
|
||||
a, b, c = self.selector.specificity()
|
||||
b += 1
|
||||
return a, b, c
|
||||
|
||||
|
||||
class Element(object):
|
||||
|
||||
"""
|
||||
Represents namespace|element
|
||||
|
||||
`None` is for the universal selector '*'
|
||||
|
||||
"""
|
||||
def __init__(self, namespace=None, element=None):
|
||||
self.namespace = namespace
|
||||
self.element = element
|
||||
|
||||
def __repr__(self):
|
||||
element = self.element or '*'
|
||||
if self.namespace:
|
||||
element = '%s|%s' % (self.namespace, element)
|
||||
return '%s[%s]' % (self.__class__.__name__, element)
|
||||
|
||||
def specificity(self):
|
||||
if self.element:
|
||||
return 0, 0, 1
|
||||
else:
|
||||
return 0, 0, 0
|
||||
|
||||
|
||||
class Hash(object):
|
||||
|
||||
"""
|
||||
Represents selector#id
|
||||
"""
|
||||
def __init__(self, selector, id):
|
||||
self.selector = selector
|
||||
self.id = id
|
||||
|
||||
def __repr__(self):
|
||||
return '%s[%r#%s]' % (
|
||||
self.__class__.__name__, self.selector, self.id)
|
||||
|
||||
def specificity(self):
|
||||
a, b, c = self.selector.specificity()
|
||||
a += 1
|
||||
return a, b, c
|
||||
|
||||
|
||||
class CombinedSelector(object):
|
||||
|
||||
def __init__(self, selector, combinator, subselector):
|
||||
assert selector is not None
|
||||
self.selector = selector
|
||||
self.combinator = combinator
|
||||
self.subselector = subselector
|
||||
|
||||
def __repr__(self):
|
||||
if self.combinator == ' ':
|
||||
comb = '<followed>'
|
||||
else:
|
||||
comb = self.combinator
|
||||
return '%s[%r %s %r]' % (
|
||||
self.__class__.__name__, self.selector, comb, self.subselector)
|
||||
|
||||
def specificity(self):
|
||||
a1, b1, c1 = self.selector.specificity()
|
||||
a2, b2, c2 = self.subselector.specificity()
|
||||
return a1 + a2, b1 + b2, c1 + c2
|
||||
|
||||
|
||||
# Parser
|
||||
|
||||
# foo
|
||||
_el_re = re.compile(r'^[ \t\r\n\f]*([a-zA-Z]+)[ \t\r\n\f]*$')
|
||||
|
||||
# foo#bar or #bar
|
||||
_id_re = re.compile(r'^[ \t\r\n\f]*([a-zA-Z]*)#([a-zA-Z0-9_-]+)[ \t\r\n\f]*$')
|
||||
|
||||
# foo.bar or .bar
|
||||
_class_re = re.compile(
|
||||
r'^[ \t\r\n\f]*([a-zA-Z]*)\.([a-zA-Z][a-zA-Z0-9_-]*)[ \t\r\n\f]*$')
|
||||
|
||||
|
||||
def parse(css):
|
||||
"""Parse a CSS *group of selectors*.
|
||||
|
||||
:param css:
|
||||
A *group of selectors* as an Unicode string.
|
||||
:raises:
|
||||
:class:`SelectorSyntaxError` on invalid selectors.
|
||||
:returns:
|
||||
A list of parsed :class:`Selector` objects, one for each
|
||||
selector in the comma-separated group.
|
||||
|
||||
"""
|
||||
# Fast path for simple cases
|
||||
match = _el_re.match(css)
|
||||
if match:
|
||||
return [Selector(Element(element=match.group(1)))]
|
||||
match = _id_re.match(css)
|
||||
if match is not None:
|
||||
return [Selector(Hash(Element(element=match.group(1) or None),
|
||||
match.group(2)))]
|
||||
match = _class_re.match(css)
|
||||
if match is not None:
|
||||
return [Selector(Class(Element(element=match.group(1) or None),
|
||||
match.group(2)))]
|
||||
|
||||
stream = TokenStream(tokenize(css))
|
||||
stream.source = css
|
||||
return list(parse_selector_group(stream))
|
||||
# except SelectorSyntaxError:
|
||||
# e = sys.exc_info()[1]
|
||||
# message = "%s at %s -> %r" % (
|
||||
# e, stream.used, stream.peek())
|
||||
# e.msg = message
|
||||
# e.args = tuple([message])
|
||||
# raise
|
||||
|
||||
|
||||
def parse_selector_group(stream):
|
||||
stream.skip_whitespace()
|
||||
while 1:
|
||||
yield Selector(*parse_selector(stream))
|
||||
if stream.peek() == ('DELIM', ','):
|
||||
stream.next()
|
||||
stream.skip_whitespace()
|
||||
else:
|
||||
break
|
||||
|
||||
|
||||
def parse_selector(stream):
|
||||
result, pseudo_element = parse_simple_selector(stream)
|
||||
while 1:
|
||||
stream.skip_whitespace()
|
||||
peek = stream.peek()
|
||||
if peek in (('EOF', None), ('DELIM', ',')):
|
||||
break
|
||||
if pseudo_element:
|
||||
raise SelectorSyntaxError(
|
||||
'Got pseudo-element ::%s not at the end of a selector'
|
||||
% pseudo_element)
|
||||
if peek.is_delim('+', '>', '~'):
|
||||
# A combinator
|
||||
combinator = stream.next().value
|
||||
stream.skip_whitespace()
|
||||
else:
|
||||
# By exclusion, the last parse_simple_selector() ended
|
||||
# at peek == ' '
|
||||
combinator = ' '
|
||||
next_selector, pseudo_element = parse_simple_selector(stream)
|
||||
result = CombinedSelector(result, combinator, next_selector)
|
||||
return result, pseudo_element
|
||||
|
||||
|
||||
special_pseudo_elements = (
|
||||
'first-line', 'first-letter', 'before', 'after')
|
||||
|
||||
|
||||
def parse_simple_selector(stream, inside_negation=False):
|
||||
stream.skip_whitespace()
|
||||
selector_start = len(stream.used)
|
||||
peek = stream.peek()
|
||||
if peek.type == 'IDENT' or peek == ('DELIM', '*'):
|
||||
if peek.type == 'IDENT':
|
||||
namespace = stream.next().value
|
||||
else:
|
||||
stream.next()
|
||||
namespace = None
|
||||
if stream.peek() == ('DELIM', '|'):
|
||||
stream.next()
|
||||
element = stream.next_ident_or_star()
|
||||
else:
|
||||
element = namespace
|
||||
namespace = None
|
||||
else:
|
||||
element = namespace = None
|
||||
result = Element(namespace, element)
|
||||
pseudo_element = None
|
||||
while 1:
|
||||
peek = stream.peek()
|
||||
if peek.type in ('S', 'EOF') or peek.is_delim(',', '+', '>', '~') or (
|
||||
inside_negation and peek == ('DELIM', ')')):
|
||||
break
|
||||
if pseudo_element:
|
||||
raise SelectorSyntaxError(
|
||||
'Got pseudo-element ::%s not at the end of a selector'
|
||||
% pseudo_element)
|
||||
if peek.type == 'HASH':
|
||||
result = Hash(result, stream.next().value)
|
||||
elif peek == ('DELIM', '.'):
|
||||
stream.next()
|
||||
result = Class(result, stream.next_ident())
|
||||
elif peek == ('DELIM', '['):
|
||||
stream.next()
|
||||
result = parse_attrib(result, stream)
|
||||
elif peek == ('DELIM', ':'):
|
||||
stream.next()
|
||||
if stream.peek() == ('DELIM', ':'):
|
||||
stream.next()
|
||||
pseudo_element = stream.next_ident()
|
||||
if stream.peek() == ('DELIM', '('):
|
||||
stream.next()
|
||||
pseudo_element = FunctionalPseudoElement(
|
||||
pseudo_element, parse_arguments(stream))
|
||||
continue
|
||||
ident = stream.next_ident()
|
||||
if ident.lower() in special_pseudo_elements:
|
||||
# Special case: CSS 2.1 pseudo-elements can have a single ':'
|
||||
# Any new pseudo-element must have two.
|
||||
pseudo_element = unicode_type(ident)
|
||||
continue
|
||||
if stream.peek() != ('DELIM', '('):
|
||||
result = Pseudo(result, ident)
|
||||
continue
|
||||
stream.next()
|
||||
stream.skip_whitespace()
|
||||
if ident.lower() == 'not':
|
||||
if inside_negation:
|
||||
raise SelectorSyntaxError('Got nested :not()')
|
||||
argument, argument_pseudo_element = parse_simple_selector(
|
||||
stream, inside_negation=True)
|
||||
next = stream.next()
|
||||
if argument_pseudo_element:
|
||||
raise SelectorSyntaxError(
|
||||
'Got pseudo-element ::%s inside :not() at %s'
|
||||
% (argument_pseudo_element, next.pos))
|
||||
if next != ('DELIM', ')'):
|
||||
raise SelectorSyntaxError("Expected ')', got %s" % (next,))
|
||||
result = Negation(result, argument)
|
||||
else:
|
||||
result = Function(result, ident, parse_arguments(stream))
|
||||
else:
|
||||
raise SelectorSyntaxError(
|
||||
"Expected selector, got %s" % (peek,))
|
||||
if len(stream.used) == selector_start:
|
||||
raise SelectorSyntaxError(
|
||||
"Expected selector, got %s" % (stream.peek(),))
|
||||
return result, pseudo_element
|
||||
|
||||
|
||||
def parse_arguments(stream):
|
||||
arguments = []
|
||||
while 1:
|
||||
stream.skip_whitespace()
|
||||
next = stream.next()
|
||||
if next.type in ('IDENT', 'STRING', 'NUMBER') or next in [
|
||||
('DELIM', '+'), ('DELIM', '-')]:
|
||||
arguments.append(next)
|
||||
elif next == ('DELIM', ')'):
|
||||
return arguments
|
||||
else:
|
||||
raise SelectorSyntaxError(
|
||||
"Expected an argument, got %s" % (next,))
|
||||
|
||||
|
||||
def parse_attrib(selector, stream):
|
||||
stream.skip_whitespace()
|
||||
attrib = stream.next_ident_or_star()
|
||||
if attrib is None and stream.peek() != ('DELIM', '|'):
|
||||
raise SelectorSyntaxError(
|
||||
"Expected '|', got %s" % (stream.peek(),))
|
||||
if stream.peek() == ('DELIM', '|'):
|
||||
stream.next()
|
||||
if stream.peek() == ('DELIM', '='):
|
||||
namespace = None
|
||||
stream.next()
|
||||
op = '|='
|
||||
else:
|
||||
namespace = attrib
|
||||
attrib = stream.next_ident()
|
||||
op = None
|
||||
else:
|
||||
namespace = op = None
|
||||
if op is None:
|
||||
stream.skip_whitespace()
|
||||
next = stream.next()
|
||||
if next == ('DELIM', ']'):
|
||||
return Attrib(selector, namespace, attrib, 'exists', None)
|
||||
elif next == ('DELIM', '='):
|
||||
op = '='
|
||||
elif next.is_delim('^', '$', '*', '~', '|', '!') and (
|
||||
stream.peek() == ('DELIM', '=')):
|
||||
op = next.value + '='
|
||||
stream.next()
|
||||
else:
|
||||
raise SelectorSyntaxError(
|
||||
"Operator expected, got %s" % (next,))
|
||||
stream.skip_whitespace()
|
||||
value = stream.next()
|
||||
if value.type not in ('IDENT', 'STRING'):
|
||||
raise SelectorSyntaxError(
|
||||
"Expected string or ident, got %s" % (value,))
|
||||
stream.skip_whitespace()
|
||||
next = stream.next()
|
||||
if next != ('DELIM', ']'):
|
||||
raise SelectorSyntaxError(
|
||||
"Expected ']', got %s" % (next,))
|
||||
return Attrib(selector, namespace, attrib, op, value.value)
|
||||
|
||||
|
||||
def parse_series(tokens):
|
||||
"""
|
||||
Parses the arguments for :nth-child() and friends.
|
||||
|
||||
:raises: A list of tokens
|
||||
:returns: :``(a, b)``
|
||||
|
||||
"""
|
||||
for token in tokens:
|
||||
if token.type == 'STRING':
|
||||
raise ValueError('String tokens not allowed in series.')
|
||||
s = ''.join(token.value for token in tokens).strip()
|
||||
if s == 'odd':
|
||||
return (2, 1)
|
||||
elif s == 'even':
|
||||
return (2, 0)
|
||||
elif s == 'n':
|
||||
return (1, 0)
|
||||
if 'n' not in s:
|
||||
# Just b
|
||||
return (0, int(s))
|
||||
a, b = s.split('n', 1)
|
||||
if not a:
|
||||
a = 1
|
||||
elif a == '-' or a == '+':
|
||||
a = int(a+'1')
|
||||
else:
|
||||
a = int(a)
|
||||
if not b:
|
||||
b = 0
|
||||
else:
|
||||
b = int(b)
|
||||
return (a, b)
|
||||
|
||||
|
||||
# Token objects
|
||||
|
||||
class Token(tuple):
|
||||
|
||||
def __new__(cls, type_, value, pos):
|
||||
obj = tuple.__new__(cls, (type_, value))
|
||||
obj.pos = pos
|
||||
return obj
|
||||
|
||||
def __repr__(self):
|
||||
return "<%s '%s' at %i>" % (self.type, self.value, self.pos)
|
||||
|
||||
def is_delim(self, *values):
|
||||
return self.type == 'DELIM' and self.value in values
|
||||
|
||||
type = property(operator.itemgetter(0))
|
||||
value = property(operator.itemgetter(1))
|
||||
|
||||
|
||||
class EOFToken(Token):
|
||||
|
||||
def __new__(cls, pos):
|
||||
return Token.__new__(cls, 'EOF', None, pos)
|
||||
|
||||
def __repr__(self):
|
||||
return '<%s at %i>' % (self.type, self.pos)
|
||||
|
||||
|
||||
# Tokenizer
|
||||
|
||||
|
||||
class TokenMacros:
|
||||
unicode_escape = r'\\([0-9a-f]{1,6})(?:\r\n|[ \n\r\t\f])?'
|
||||
escape = unicode_escape + r'|\\[^\n\r\f0-9a-f]'
|
||||
string_escape = r'\\(?:\n|\r\n|\r|\f)|' + escape
|
||||
nonascii = r'[^\0-\177]'
|
||||
nmchar = '[_a-z0-9-]|%s|%s' % (escape, nonascii)
|
||||
nmstart = '[_a-z]|%s|%s' % (escape, nonascii)
|
||||
|
||||
|
||||
def _compile(pattern):
|
||||
return re.compile(pattern % vars(TokenMacros), re.IGNORECASE).match
|
||||
|
||||
|
||||
_match_whitespace = _compile(r'[ \t\r\n\f]+')
|
||||
_match_number = _compile(r'[+-]?(?:[0-9]*\.[0-9]+|[0-9]+)')
|
||||
_match_hash = _compile('#(?:%(nmchar)s)+')
|
||||
_match_ident = _compile('-?(?:%(nmstart)s)(?:%(nmchar)s)*')
|
||||
_match_string_by_quote = {
|
||||
"'": _compile(r"([^\n\r\f\\']|%(string_escape)s)*"),
|
||||
'"': _compile(r'([^\n\r\f\\"]|%(string_escape)s)*'),
|
||||
}
|
||||
|
||||
_sub_simple_escape = re.compile(r'\\(.)').sub
|
||||
_sub_unicode_escape = re.compile(TokenMacros.unicode_escape, re.I).sub
|
||||
_sub_newline_escape =re.compile(r'\\(?:\n|\r\n|\r|\f)').sub
|
||||
|
||||
# Same as r'\1', but faster on CPython
|
||||
if hasattr(operator, 'methodcaller'):
|
||||
# Python 2.6+
|
||||
_replace_simple = operator.methodcaller('group', 1)
|
||||
else:
|
||||
def _replace_simple(match):
|
||||
return match.group(1)
|
||||
|
||||
|
||||
def _replace_unicode(match):
|
||||
codepoint = int(match.group(1), 16)
|
||||
if codepoint > sys.maxunicode:
|
||||
codepoint = 0xFFFD
|
||||
return codepoint_to_chr(codepoint)
|
||||
|
||||
|
||||
def unescape_ident(value):
|
||||
value = _sub_unicode_escape(_replace_unicode, value)
|
||||
value = _sub_simple_escape(_replace_simple, value)
|
||||
return value
|
||||
|
||||
|
||||
def tokenize(s):
|
||||
pos = 0
|
||||
len_s = len(s)
|
||||
while pos < len_s:
|
||||
match = _match_whitespace(s, pos=pos)
|
||||
if match:
|
||||
yield Token('S', ' ', pos)
|
||||
pos = match.end()
|
||||
continue
|
||||
|
||||
match = _match_ident(s, pos=pos)
|
||||
if match:
|
||||
value = _sub_simple_escape(_replace_simple,
|
||||
_sub_unicode_escape(_replace_unicode, match.group()))
|
||||
yield Token('IDENT', value, pos)
|
||||
pos = match.end()
|
||||
continue
|
||||
|
||||
match = _match_hash(s, pos=pos)
|
||||
if match:
|
||||
value = _sub_simple_escape(_replace_simple,
|
||||
_sub_unicode_escape(_replace_unicode, match.group()[1:]))
|
||||
yield Token('HASH', value, pos)
|
||||
pos = match.end()
|
||||
continue
|
||||
|
||||
quote = s[pos]
|
||||
if quote in _match_string_by_quote:
|
||||
match = _match_string_by_quote[quote](s, pos=pos + 1)
|
||||
assert match, 'Should have found at least an empty match'
|
||||
end_pos = match.end()
|
||||
if end_pos == len_s:
|
||||
raise SelectorSyntaxError('Unclosed string at %s' % pos)
|
||||
if s[end_pos] != quote:
|
||||
raise SelectorSyntaxError('Invalid string at %s' % pos)
|
||||
value = _sub_simple_escape(_replace_simple,
|
||||
_sub_unicode_escape(_replace_unicode,
|
||||
_sub_newline_escape('', match.group())))
|
||||
yield Token('STRING', value, pos)
|
||||
pos = end_pos + 1
|
||||
continue
|
||||
|
||||
match = _match_number(s, pos=pos)
|
||||
if match:
|
||||
value = match.group()
|
||||
yield Token('NUMBER', value, pos)
|
||||
pos = match.end()
|
||||
continue
|
||||
|
||||
pos2 = pos + 2
|
||||
if s[pos:pos2] == '/*':
|
||||
pos = s.find('*/', pos2)
|
||||
if pos == -1:
|
||||
pos = len_s
|
||||
else:
|
||||
pos += 2
|
||||
continue
|
||||
|
||||
yield Token('DELIM', s[pos], pos)
|
||||
pos += 1
|
||||
|
||||
assert pos == len_s
|
||||
yield EOFToken(pos)
|
||||
|
||||
|
||||
class TokenStream(object):
|
||||
|
||||
def __init__(self, tokens, source=None):
|
||||
self.used = []
|
||||
self.tokens = iter(tokens)
|
||||
self.source = source
|
||||
self.peeked = None
|
||||
self._peeking = False
|
||||
try:
|
||||
self.next_token = self.tokens.next
|
||||
except AttributeError:
|
||||
# Python 3
|
||||
self.next_token = self.tokens.__next__
|
||||
|
||||
def next(self):
|
||||
if self._peeking:
|
||||
self._peeking = False
|
||||
self.used.append(self.peeked)
|
||||
return self.peeked
|
||||
else:
|
||||
next = self.next_token()
|
||||
self.used.append(next)
|
||||
return next
|
||||
|
||||
def peek(self):
|
||||
if not self._peeking:
|
||||
self.peeked = self.next_token()
|
||||
self._peeking = True
|
||||
return self.peeked
|
||||
|
||||
def next_ident(self):
|
||||
next = self.next()
|
||||
if next.type != 'IDENT':
|
||||
raise SelectorSyntaxError('Expected ident, got %s' % (next,))
|
||||
return next.value
|
||||
|
||||
def next_ident_or_star(self):
|
||||
next = self.next()
|
||||
if next.type == 'IDENT':
|
||||
return next.value
|
||||
elif next == ('DELIM', '*'):
|
||||
return None
|
||||
else:
|
||||
raise SelectorSyntaxError(
|
||||
"Expected ident or '*', got %s" % (next,))
|
||||
|
||||
def skip_whitespace(self):
|
||||
peek = self.peek()
|
||||
if peek.type == 'S':
|
||||
self.next()
|
||||
694
ebook_converter/css_selectors/select.py
Normal file
694
ebook_converter/css_selectors/select.py
Normal file
@@ -0,0 +1,694 @@
|
||||
#!/usr/bin/env python2
|
||||
# vim:fileencoding=utf-8
|
||||
from __future__ import absolute_import, division, print_function, unicode_literals
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2015, Kovid Goyal <kovid at kovidgoyal.net>'
|
||||
|
||||
import re, itertools
|
||||
from collections import OrderedDict, defaultdict
|
||||
from functools import wraps
|
||||
from itertools import chain
|
||||
|
||||
from lxml import etree
|
||||
|
||||
from css_selectors.errors import ExpressionError
|
||||
from css_selectors.parser import parse, ascii_lower, Element, FunctionalPseudoElement
|
||||
from css_selectors.ordered_set import OrderedSet
|
||||
|
||||
from polyglot.builtins import iteritems, itervalues
|
||||
|
||||
PARSE_CACHE_SIZE = 200
|
||||
parse_cache = OrderedDict()
|
||||
XPATH_CACHE_SIZE = 30
|
||||
xpath_cache = OrderedDict()
|
||||
|
||||
# Test that the string is not empty and does not contain whitespace
|
||||
is_non_whitespace = re.compile(r'^[^ \t\r\n\f]+$').match
|
||||
|
||||
|
||||
def get_parsed_selector(raw):
|
||||
try:
|
||||
return parse_cache[raw]
|
||||
except KeyError:
|
||||
parse_cache[raw] = ans = parse(raw)
|
||||
if len(parse_cache) > PARSE_CACHE_SIZE:
|
||||
parse_cache.pop(next(iter(parse_cache)))
|
||||
return ans
|
||||
|
||||
|
||||
def get_compiled_xpath(expr):
|
||||
try:
|
||||
return xpath_cache[expr]
|
||||
except KeyError:
|
||||
xpath_cache[expr] = ans = etree.XPath(expr)
|
||||
if len(xpath_cache) > XPATH_CACHE_SIZE:
|
||||
xpath_cache.pop(next(iter(xpath_cache)))
|
||||
return ans
|
||||
|
||||
|
||||
class AlwaysIn(object):
|
||||
|
||||
def __contains__(self, x):
|
||||
return True
|
||||
|
||||
|
||||
always_in = AlwaysIn()
|
||||
|
||||
|
||||
def trace_wrapper(func):
|
||||
@wraps(func)
|
||||
def trace(*args, **kwargs):
|
||||
targs = args[1:] if args and isinstance(args[0], Select) else args
|
||||
print('Called:', func.__name__, 'with args:', targs, kwargs or '')
|
||||
return func(*args, **kwargs)
|
||||
return trace
|
||||
|
||||
|
||||
def normalize_language_tag(tag):
|
||||
"""Return a list of normalized combinations for a `BCP 47` language tag.
|
||||
|
||||
Example:
|
||||
|
||||
>>> normalize_language_tag('de_AT-1901')
|
||||
['de-at-1901', 'de-at', 'de-1901', 'de']
|
||||
"""
|
||||
# normalize:
|
||||
tag = ascii_lower(tag).replace('_','-')
|
||||
# split (except singletons, which mark the following tag as non-standard):
|
||||
tag = re.sub(r'-([a-zA-Z0-9])-', r'-\1_', tag)
|
||||
subtags = [subtag.replace('_', '-') for subtag in tag.split('-')]
|
||||
base_tag = (subtags.pop(0),)
|
||||
taglist = {base_tag[0]}
|
||||
# find all combinations of subtags
|
||||
for n in range(len(subtags), 0, -1):
|
||||
for tags in itertools.combinations(subtags, n):
|
||||
taglist.add('-'.join(base_tag + tags))
|
||||
return taglist
|
||||
|
||||
|
||||
INAPPROPRIATE_PSEUDO_CLASSES = frozenset((
|
||||
'active', 'after', 'disabled', 'visited', 'link', 'before', 'focus', 'first-letter', 'enabled', 'first-line', 'hover', 'checked', 'target'))
|
||||
|
||||
|
||||
class Select(object):
|
||||
|
||||
'''
|
||||
|
||||
This class implements CSS Level 3 selectors
|
||||
(http://www.w3.org/TR/css3-selectors) on an lxml tree, with caching for
|
||||
performance. To use:
|
||||
|
||||
>>> from css_selectors import Select
|
||||
>>> select = Select(root) # Where root is an lxml document
|
||||
>>> print(tuple(select('p.myclass')))
|
||||
|
||||
Tags are returned in document order. Note that attribute and tag names are
|
||||
matched case-insensitively. Class and id values are also matched
|
||||
case-insensitively. Also namespaces are ignored (this is for performance of
|
||||
the common case). The UI related selectors are not implemented, such as
|
||||
:enabled, :disabled, :checked, :hover, etc. Similarly, the non-element
|
||||
related selectors such as ::first-line, ::first-letter, ::before, etc. are
|
||||
not implemented.
|
||||
|
||||
WARNING: This class uses internal caches. You *must not* make any changes
|
||||
to the lxml tree. If you do make some changes, either create a new Select
|
||||
object or call :meth:`invalidate_caches`.
|
||||
|
||||
This class can be easily sub-classed to work with tree implementations
|
||||
other than lxml. Simply override the methods in the ``Tree Integration``
|
||||
block below.
|
||||
|
||||
The caching works by maintaining internal maps from classes/ids/tag
|
||||
names/etc. to node sets. These caches are populated as needed, and used
|
||||
for all subsequent selections. Thus, for best performance you should use
|
||||
the same selector object for finding the matching nodes for multiple
|
||||
queries. Of course, remember not to change the tree in between queries.
|
||||
|
||||
'''
|
||||
|
||||
combinator_mapping = {
|
||||
' ': 'descendant',
|
||||
'>': 'child',
|
||||
'+': 'direct_adjacent',
|
||||
'~': 'indirect_adjacent',
|
||||
}
|
||||
|
||||
attribute_operator_mapping = {
|
||||
'exists': 'exists',
|
||||
'=': 'equals',
|
||||
'~=': 'includes',
|
||||
'|=': 'dashmatch',
|
||||
'^=': 'prefixmatch',
|
||||
'$=': 'suffixmatch',
|
||||
'*=': 'substringmatch',
|
||||
}
|
||||
|
||||
def __init__(self, root, default_lang=None, ignore_inappropriate_pseudo_classes=False, dispatch_map=None, trace=False):
|
||||
if hasattr(root, 'getroot'):
|
||||
root = root.getroot()
|
||||
self.root = root
|
||||
self.dispatch_map = dispatch_map or default_dispatch_map
|
||||
self.invalidate_caches()
|
||||
self.default_lang = default_lang
|
||||
if trace:
|
||||
self.dispatch_map = {k:trace_wrapper(v) for k, v in iteritems(self.dispatch_map)}
|
||||
if ignore_inappropriate_pseudo_classes:
|
||||
self.ignore_inappropriate_pseudo_classes = INAPPROPRIATE_PSEUDO_CLASSES
|
||||
else:
|
||||
self.ignore_inappropriate_pseudo_classes = frozenset()
|
||||
|
||||
# External API {{{
|
||||
def invalidate_caches(self):
|
||||
'Invalidate all caches. You must call this before using this object if you have made changes to the HTML tree'
|
||||
self._element_map = None
|
||||
self._id_map = None
|
||||
self._class_map = None
|
||||
self._attrib_map = None
|
||||
self._attrib_space_map = None
|
||||
self._lang_map = None
|
||||
self.map_tag_name = ascii_lower
|
||||
if '{' in self.root.tag:
|
||||
def map_tag_name(x):
|
||||
return ascii_lower(x.rpartition('}')[2])
|
||||
self.map_tag_name = map_tag_name
|
||||
|
||||
def __call__(self, selector, root=None):
|
||||
''' Return an iterator over all matching tags, in document order.
|
||||
Normally, all matching tags in the document are returned, is you
|
||||
specify root, then only tags that are root or descendants of root are
|
||||
returned. Note that this can be very expensive if root has a lot of
|
||||
descendants. '''
|
||||
seen = set()
|
||||
if root is not None:
|
||||
root = frozenset(self.itertag(root))
|
||||
for parsed_selector in get_parsed_selector(selector):
|
||||
for item in self.iterparsedselector(parsed_selector):
|
||||
if item not in seen and (root is None or item in root):
|
||||
yield item
|
||||
seen.add(item)
|
||||
|
||||
def has_matches(self, selector, root=None):
|
||||
'Return True iff selector matches at least one item in the tree'
|
||||
for elem in self(selector, root=root):
|
||||
return True
|
||||
return False
|
||||
# }}}
|
||||
|
||||
def iterparsedselector(self, parsed_selector):
|
||||
type_name = type(parsed_selector).__name__
|
||||
try:
|
||||
func = self.dispatch_map[ascii_lower(type_name)]
|
||||
except KeyError:
|
||||
raise ExpressionError('%s is not supported' % type_name)
|
||||
for item in func(self, parsed_selector):
|
||||
yield item
|
||||
|
||||
@property
|
||||
def element_map(self):
|
||||
if self._element_map is None:
|
||||
self._element_map = em = defaultdict(OrderedSet)
|
||||
for tag in self.itertag():
|
||||
em[self.map_tag_name(tag.tag)].add(tag)
|
||||
return self._element_map
|
||||
|
||||
@property
|
||||
def id_map(self):
|
||||
if self._id_map is None:
|
||||
self._id_map = im = defaultdict(OrderedSet)
|
||||
lower = ascii_lower
|
||||
for elem in self.iteridtags():
|
||||
im[lower(elem.get('id'))].add(elem)
|
||||
return self._id_map
|
||||
|
||||
@property
|
||||
def class_map(self):
|
||||
if self._class_map is None:
|
||||
self._class_map = cm = defaultdict(OrderedSet)
|
||||
lower = ascii_lower
|
||||
for elem in self.iterclasstags():
|
||||
for cls in elem.get('class').split():
|
||||
cm[lower(cls)].add(elem)
|
||||
return self._class_map
|
||||
|
||||
@property
|
||||
def attrib_map(self):
|
||||
if self._attrib_map is None:
|
||||
self._attrib_map = am = defaultdict(lambda : defaultdict(OrderedSet))
|
||||
map_attrib_name = ascii_lower
|
||||
if '{' in self.root.tag:
|
||||
def map_attrib_name(x):
|
||||
return ascii_lower(x.rpartition('}')[2])
|
||||
for tag in self.itertag():
|
||||
for attr, val in iteritems(tag.attrib):
|
||||
am[map_attrib_name(attr)][val].add(tag)
|
||||
return self._attrib_map
|
||||
|
||||
@property
|
||||
def attrib_space_map(self):
|
||||
if self._attrib_space_map is None:
|
||||
self._attrib_space_map = am = defaultdict(lambda : defaultdict(OrderedSet))
|
||||
map_attrib_name = ascii_lower
|
||||
if '{' in self.root.tag:
|
||||
def map_attrib_name(x):
|
||||
return ascii_lower(x.rpartition('}')[2])
|
||||
for tag in self.itertag():
|
||||
for attr, val in iteritems(tag.attrib):
|
||||
for v in val.split():
|
||||
am[map_attrib_name(attr)][v].add(tag)
|
||||
return self._attrib_space_map
|
||||
|
||||
@property
|
||||
def lang_map(self):
|
||||
if self._lang_map is None:
|
||||
self._lang_map = lm = defaultdict(OrderedSet)
|
||||
dl = normalize_language_tag(self.default_lang) if self.default_lang else None
|
||||
lmap = {tag:dl for tag in self.itertag()} if dl else {}
|
||||
for tag in self.itertag():
|
||||
lang = None
|
||||
for attr in ('{http://www.w3.org/XML/1998/namespace}lang', 'lang'):
|
||||
lang = tag.get(attr)
|
||||
if lang:
|
||||
lang = normalize_language_tag(lang)
|
||||
for dtag in self.itertag(tag):
|
||||
lmap[dtag] = lang
|
||||
for tag, langs in iteritems(lmap):
|
||||
for lang in langs:
|
||||
lm[lang].add(tag)
|
||||
return self._lang_map
|
||||
|
||||
# Tree Integration {{{
|
||||
def itertag(self, tag=None):
|
||||
return (self.root if tag is None else tag).iter('*')
|
||||
|
||||
def iterdescendants(self, tag=None):
|
||||
return (self.root if tag is None else tag).iterdescendants('*')
|
||||
|
||||
def iterchildren(self, tag=None):
|
||||
return (self.root if tag is None else tag).iterchildren('*')
|
||||
|
||||
def itersiblings(self, tag=None, preceding=False):
|
||||
return (self.root if tag is None else tag).itersiblings('*', preceding=preceding)
|
||||
|
||||
def iteridtags(self):
|
||||
return get_compiled_xpath('//*[@id]')(self.root)
|
||||
|
||||
def iterclasstags(self):
|
||||
return get_compiled_xpath('//*[@class]')(self.root)
|
||||
|
||||
def sibling_count(self, child, before=True, same_type=False):
|
||||
' Return the number of siblings before or after child or raise ValueError if child has no parent. '
|
||||
parent = child.getparent()
|
||||
if parent is None:
|
||||
raise ValueError('Child has no parent')
|
||||
if same_type:
|
||||
siblings = OrderedSet(child.itersiblings(preceding=before))
|
||||
return len(self.element_map[self.map_tag_name(child.tag)] & siblings)
|
||||
else:
|
||||
if before:
|
||||
return parent.index(child)
|
||||
return len(parent) - parent.index(child) - 1
|
||||
|
||||
def all_sibling_count(self, child, same_type=False):
|
||||
' Return the number of siblings of child or raise ValueError if child has no parent '
|
||||
parent = child.getparent()
|
||||
if parent is None:
|
||||
raise ValueError('Child has no parent')
|
||||
if same_type:
|
||||
siblings = OrderedSet(chain(child.itersiblings(preceding=False), child.itersiblings(preceding=True)))
|
||||
return len(self.element_map[self.map_tag_name(child.tag)] & siblings)
|
||||
else:
|
||||
return len(parent) - 1
|
||||
|
||||
def is_empty(self, elem):
|
||||
' Return True iff elem has no child tags and no text content '
|
||||
for child in elem:
|
||||
# Check for comment/PI nodes with tail text
|
||||
if child.tail:
|
||||
return False
|
||||
return len(tuple(elem.iterchildren('*'))) == 0 and not elem.text
|
||||
|
||||
# }}}
|
||||
|
||||
# Combinators {{{
|
||||
|
||||
|
||||
def select_combinedselector(cache, combined):
|
||||
"""Translate a combined selector."""
|
||||
combinator = cache.combinator_mapping[combined.combinator]
|
||||
# Fast path for when the sub-selector is all elements
|
||||
right = None if isinstance(combined.subselector, Element) and (
|
||||
combined.subselector.element or '*') == '*' else cache.iterparsedselector(combined.subselector)
|
||||
for item in cache.dispatch_map[combinator](cache, cache.iterparsedselector(combined.selector), right):
|
||||
yield item
|
||||
|
||||
|
||||
def select_descendant(cache, left, right):
|
||||
"""right is a child, grand-child or further descendant of left"""
|
||||
right = always_in if right is None else frozenset(right)
|
||||
for ancestor in left:
|
||||
for descendant in cache.iterdescendants(ancestor):
|
||||
if descendant in right:
|
||||
yield descendant
|
||||
|
||||
|
||||
def select_child(cache, left, right):
|
||||
"""right is an immediate child of left"""
|
||||
right = always_in if right is None else frozenset(right)
|
||||
for parent in left:
|
||||
for child in cache.iterchildren(parent):
|
||||
if child in right:
|
||||
yield child
|
||||
|
||||
|
||||
def select_direct_adjacent(cache, left, right):
|
||||
"""right is a sibling immediately after left"""
|
||||
right = always_in if right is None else frozenset(right)
|
||||
for parent in left:
|
||||
for sibling in cache.itersiblings(parent):
|
||||
if sibling in right:
|
||||
yield sibling
|
||||
break
|
||||
|
||||
|
||||
def select_indirect_adjacent(cache, left, right):
|
||||
"""right is a sibling after left, immediately or not"""
|
||||
right = always_in if right is None else frozenset(right)
|
||||
for parent in left:
|
||||
for sibling in cache.itersiblings(parent):
|
||||
if sibling in right:
|
||||
yield sibling
|
||||
# }}}
|
||||
|
||||
|
||||
def select_element(cache, selector):
|
||||
"""A type or universal selector."""
|
||||
element = selector.element
|
||||
if not element or element == '*':
|
||||
for elem in cache.itertag():
|
||||
yield elem
|
||||
else:
|
||||
for elem in cache.element_map[ascii_lower(element)]:
|
||||
yield elem
|
||||
|
||||
|
||||
def select_hash(cache, selector):
|
||||
'An id selector'
|
||||
items = cache.id_map[ascii_lower(selector.id)]
|
||||
if len(items) > 0:
|
||||
for elem in cache.iterparsedselector(selector.selector):
|
||||
if elem in items:
|
||||
yield elem
|
||||
|
||||
|
||||
def select_class(cache, selector):
|
||||
'A class selector'
|
||||
items = cache.class_map[ascii_lower(selector.class_name)]
|
||||
if items:
|
||||
for elem in cache.iterparsedselector(selector.selector):
|
||||
if elem in items:
|
||||
yield elem
|
||||
|
||||
|
||||
def select_negation(cache, selector):
|
||||
'Implement :not()'
|
||||
exclude = frozenset(cache.iterparsedselector(selector.subselector))
|
||||
for item in cache.iterparsedselector(selector.selector):
|
||||
if item not in exclude:
|
||||
yield item
|
||||
|
||||
# Attribute selectors {{{
|
||||
|
||||
|
||||
def select_attrib(cache, selector):
|
||||
operator = cache.attribute_operator_mapping[selector.operator]
|
||||
items = frozenset(cache.dispatch_map[operator](cache, ascii_lower(selector.attrib), selector.value))
|
||||
for item in cache.iterparsedselector(selector.selector):
|
||||
if item in items:
|
||||
yield item
|
||||
|
||||
|
||||
def select_exists(cache, attrib, value=None):
|
||||
for elem_set in itervalues(cache.attrib_map[attrib]):
|
||||
for elem in elem_set:
|
||||
yield elem
|
||||
|
||||
|
||||
def select_equals(cache, attrib, value):
|
||||
for elem in cache.attrib_map[attrib][value]:
|
||||
yield elem
|
||||
|
||||
|
||||
def select_includes(cache, attrib, value):
|
||||
if is_non_whitespace(value):
|
||||
for elem in cache.attrib_space_map[attrib][value]:
|
||||
yield elem
|
||||
|
||||
|
||||
def select_dashmatch(cache, attrib, value):
|
||||
if value:
|
||||
for val, elem_set in iteritems(cache.attrib_map[attrib]):
|
||||
if val == value or val.startswith(value + '-'):
|
||||
for elem in elem_set:
|
||||
yield elem
|
||||
|
||||
|
||||
def select_prefixmatch(cache, attrib, value):
|
||||
if value:
|
||||
for val, elem_set in iteritems(cache.attrib_map[attrib]):
|
||||
if val.startswith(value):
|
||||
for elem in elem_set:
|
||||
yield elem
|
||||
|
||||
|
||||
def select_suffixmatch(cache, attrib, value):
|
||||
if value:
|
||||
for val, elem_set in iteritems(cache.attrib_map[attrib]):
|
||||
if val.endswith(value):
|
||||
for elem in elem_set:
|
||||
yield elem
|
||||
|
||||
|
||||
def select_substringmatch(cache, attrib, value):
|
||||
if value:
|
||||
for val, elem_set in iteritems(cache.attrib_map[attrib]):
|
||||
if value in val:
|
||||
for elem in elem_set:
|
||||
yield elem
|
||||
|
||||
# }}}
|
||||
|
||||
# Function selectors {{{
|
||||
|
||||
|
||||
def select_function(cache, function):
|
||||
"""Select with a functional pseudo-class."""
|
||||
fname = function.name.replace('-', '_')
|
||||
try:
|
||||
func = cache.dispatch_map[fname]
|
||||
except KeyError:
|
||||
raise ExpressionError(
|
||||
"The pseudo-class :%s() is unknown" % function.name)
|
||||
if fname == 'lang':
|
||||
items = frozenset(func(cache, function))
|
||||
for item in cache.iterparsedselector(function.selector):
|
||||
if item in items:
|
||||
yield item
|
||||
else:
|
||||
for item in cache.iterparsedselector(function.selector):
|
||||
if func(cache, function, item):
|
||||
yield item
|
||||
|
||||
|
||||
def select_lang(cache, function):
|
||||
' Implement :lang() '
|
||||
if function.argument_types() not in (['STRING'], ['IDENT']):
|
||||
raise ExpressionError("Expected a single string or ident for :lang(), got %r" % function.arguments)
|
||||
lang = function.arguments[0].value
|
||||
if lang:
|
||||
lang = ascii_lower(lang)
|
||||
lp = lang + '-'
|
||||
for tlang, elem_set in iteritems(cache.lang_map):
|
||||
if tlang == lang or (tlang is not None and tlang.startswith(lp)):
|
||||
for elem in elem_set:
|
||||
yield elem
|
||||
|
||||
|
||||
def select_nth_child(cache, function, elem):
|
||||
' Implement :nth-child() '
|
||||
a, b = function.parsed_arguments
|
||||
try:
|
||||
num = cache.sibling_count(elem) + 1
|
||||
except ValueError:
|
||||
return False
|
||||
if a == 0:
|
||||
return num == b
|
||||
n = (num - b) / a
|
||||
return n.is_integer() and n > -1
|
||||
|
||||
|
||||
def select_nth_last_child(cache, function, elem):
|
||||
' Implement :nth-last-child() '
|
||||
a, b = function.parsed_arguments
|
||||
try:
|
||||
num = cache.sibling_count(elem, before=False) + 1
|
||||
except ValueError:
|
||||
return False
|
||||
if a == 0:
|
||||
return num == b
|
||||
n = (num - b) / a
|
||||
return n.is_integer() and n > -1
|
||||
|
||||
|
||||
def select_nth_of_type(cache, function, elem):
|
||||
' Implement :nth-of-type() '
|
||||
a, b = function.parsed_arguments
|
||||
try:
|
||||
num = cache.sibling_count(elem, same_type=True) + 1
|
||||
except ValueError:
|
||||
return False
|
||||
if a == 0:
|
||||
return num == b
|
||||
n = (num - b) / a
|
||||
return n.is_integer() and n > -1
|
||||
|
||||
|
||||
def select_nth_last_of_type(cache, function, elem):
|
||||
' Implement :nth-last-of-type() '
|
||||
a, b = function.parsed_arguments
|
||||
try:
|
||||
num = cache.sibling_count(elem, before=False, same_type=True) + 1
|
||||
except ValueError:
|
||||
return False
|
||||
if a == 0:
|
||||
return num == b
|
||||
n = (num - b) / a
|
||||
return n.is_integer() and n > -1
|
||||
|
||||
# }}}
|
||||
|
||||
# Pseudo elements {{{
|
||||
|
||||
|
||||
def pseudo_func(f):
|
||||
f.is_pseudo = True
|
||||
return f
|
||||
|
||||
|
||||
@pseudo_func
|
||||
def allow_all(cache, item):
|
||||
return True
|
||||
|
||||
|
||||
def get_func_for_pseudo(cache, ident):
|
||||
try:
|
||||
func = cache.dispatch_map[ident.replace('-', '_')]
|
||||
except KeyError:
|
||||
if ident in cache.ignore_inappropriate_pseudo_classes:
|
||||
func = allow_all
|
||||
else:
|
||||
raise ExpressionError(
|
||||
"The pseudo-class :%s is not supported" % ident)
|
||||
|
||||
try:
|
||||
func.is_pseudo
|
||||
except AttributeError:
|
||||
raise ExpressionError(
|
||||
"The pseudo-class :%s is invalid" % ident)
|
||||
return func
|
||||
|
||||
|
||||
def select_selector(cache, selector):
|
||||
if selector.pseudo_element is None:
|
||||
for item in cache.iterparsedselector(selector.parsed_tree):
|
||||
yield item
|
||||
return
|
||||
if isinstance(selector.pseudo_element, FunctionalPseudoElement):
|
||||
raise ExpressionError(
|
||||
"The pseudo-element ::%s is not supported" % selector.pseudo_element.name)
|
||||
func = get_func_for_pseudo(cache, selector.pseudo_element)
|
||||
for item in cache.iterparsedselector(selector.parsed_tree):
|
||||
if func(cache, item):
|
||||
yield item
|
||||
|
||||
|
||||
def select_pseudo(cache, pseudo):
|
||||
func = get_func_for_pseudo(cache, pseudo.ident)
|
||||
if func is select_root:
|
||||
yield cache.root
|
||||
return
|
||||
|
||||
for item in cache.iterparsedselector(pseudo.selector):
|
||||
if func(cache, item):
|
||||
yield item
|
||||
|
||||
|
||||
@pseudo_func
|
||||
def select_root(cache, elem):
|
||||
return elem is cache.root
|
||||
|
||||
|
||||
@pseudo_func
|
||||
def select_first_child(cache, elem):
|
||||
try:
|
||||
return cache.sibling_count(elem) == 0
|
||||
except ValueError:
|
||||
return False
|
||||
|
||||
|
||||
@pseudo_func
|
||||
def select_last_child(cache, elem):
|
||||
try:
|
||||
return cache.sibling_count(elem, before=False) == 0
|
||||
except ValueError:
|
||||
return False
|
||||
|
||||
|
||||
@pseudo_func
|
||||
def select_only_child(cache, elem):
|
||||
try:
|
||||
return cache.all_sibling_count(elem) == 0
|
||||
except ValueError:
|
||||
return False
|
||||
|
||||
|
||||
@pseudo_func
|
||||
def select_first_of_type(cache, elem):
|
||||
try:
|
||||
return cache.sibling_count(elem, same_type=True) == 0
|
||||
except ValueError:
|
||||
return False
|
||||
|
||||
|
||||
@pseudo_func
|
||||
def select_last_of_type(cache, elem):
|
||||
try:
|
||||
return cache.sibling_count(elem, before=False, same_type=True) == 0
|
||||
except ValueError:
|
||||
return False
|
||||
|
||||
|
||||
@pseudo_func
|
||||
def select_only_of_type(cache, elem):
|
||||
try:
|
||||
return cache.all_sibling_count(elem, same_type=True) == 0
|
||||
except ValueError:
|
||||
return False
|
||||
|
||||
|
||||
@pseudo_func
|
||||
def select_empty(cache, elem):
|
||||
return cache.is_empty(elem)
|
||||
|
||||
|
||||
# }}}
|
||||
|
||||
default_dispatch_map = {name.partition('_')[2]:obj for name, obj in globals().items() if name.startswith('select_') and callable(obj)}
|
||||
|
||||
if __name__ == '__main__':
|
||||
from pprint import pprint
|
||||
root = etree.fromstring(
|
||||
'<body xmlns="xxx" xml:lang="en"><p id="p" class="one two" lang="fr"><a id="a"/><b/><c/><d/></p></body>',
|
||||
parser=etree.XMLParser(recover=True, no_network=True, resolve_entities=False))
|
||||
select = Select(root, ignore_inappropriate_pseudo_classes=True, trace=True)
|
||||
pprint(list(select('p:disabled')))
|
||||
843
ebook_converter/css_selectors/tests.py
Normal file
843
ebook_converter/css_selectors/tests.py
Normal file
@@ -0,0 +1,843 @@
|
||||
#!/usr/bin/env python2
|
||||
# vim:fileencoding=utf-8
|
||||
from __future__ import absolute_import, division, print_function, unicode_literals
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2015, Kovid Goyal <kovid at kovidgoyal.net>'
|
||||
|
||||
import unittest, sys, argparse
|
||||
|
||||
from lxml import etree, html
|
||||
|
||||
from css_selectors.errors import SelectorSyntaxError, ExpressionError
|
||||
from css_selectors.parser import tokenize, parse
|
||||
from css_selectors.select import Select
|
||||
|
||||
|
||||
class TestCSSSelectors(unittest.TestCase):
|
||||
|
||||
# Test data {{{
|
||||
HTML_IDS = '''
|
||||
<html id="html"><head>
|
||||
<link id="link-href" href="foo" />
|
||||
<link id="link-nohref" />
|
||||
</head><body>
|
||||
<div id="outer-div">
|
||||
<a id="name-anchor" name="foo"></a>
|
||||
<a id="tag-anchor" rel="tag" href="http://localhost/foo">link</a>
|
||||
<a id="nofollow-anchor" rel="nofollow" href="https://example.org">
|
||||
link</a>
|
||||
<ol id="first-ol" class="a b c">
|
||||
<li id="first-li">content</li>
|
||||
<li id="second-li" lang="En-us">
|
||||
<div id="li-div">
|
||||
</div>
|
||||
</li>
|
||||
<li id="third-li" class="ab c"></li>
|
||||
<li id="fourth-li" class="ab
|
||||
c"></li>
|
||||
<li id="fifth-li"></li>
|
||||
<li id="sixth-li"></li>
|
||||
<li id="seventh-li"> </li>
|
||||
</ol>
|
||||
<p id="paragraph">
|
||||
<b id="p-b">hi</b> <em id="p-em">there</em>
|
||||
<b id="p-b2">guy</b>
|
||||
<input type="checkbox" id="checkbox-unchecked" />
|
||||
<input type="checkbox" id="checkbox-disabled" disabled="" />
|
||||
<input type="text" id="text-checked" checked="checked" />
|
||||
<input type="hidden" />
|
||||
<input type="hidden" disabled="disabled" />
|
||||
<input type="checkbox" id="checkbox-checked" checked="checked" />
|
||||
<input type="checkbox" id="checkbox-disabled-checked"
|
||||
disabled="disabled" checked="checked" />
|
||||
<fieldset id="fieldset" disabled="disabled">
|
||||
<input type="checkbox" id="checkbox-fieldset-disabled" />
|
||||
<input type="hidden" />
|
||||
</fieldset>
|
||||
</p>
|
||||
<ol id="second-ol">
|
||||
</ol>
|
||||
<map name="dummymap">
|
||||
<area shape="circle" coords="200,250,25" href="foo.html" id="area-href" />
|
||||
<area shape="default" id="area-nohref" />
|
||||
</map>
|
||||
</div>
|
||||
<div id="foobar-div" foobar="ab bc
|
||||
cde"><span id="foobar-span"></span></div>
|
||||
</body></html>
|
||||
'''
|
||||
HTML_SHAKESPEARE = '''
|
||||
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN"
|
||||
"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
|
||||
<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en" debug="true">
|
||||
<head>
|
||||
<meta http-equiv="Content-Type" content="text/html; charset=utf-8"/>
|
||||
</head>
|
||||
<body>
|
||||
<div id="test">
|
||||
<div class="dialog">
|
||||
<h2>As You Like It</h2>
|
||||
<div id="playwright">
|
||||
by William Shakespeare
|
||||
</div>
|
||||
<div class="dialog scene thirdClass" id="scene1">
|
||||
<h3>ACT I, SCENE III. A room in the palace.</h3>
|
||||
<div class="dialog">
|
||||
<div class="direction">Enter CELIA and ROSALIND</div>
|
||||
</div>
|
||||
<div id="speech1" class="character">CELIA</div>
|
||||
<div class="dialog">
|
||||
<div id="scene1.3.1">Why, cousin! why, Rosalind! Cupid have mercy! not a word?</div>
|
||||
</div>
|
||||
<div id="speech2" class="character">ROSALIND</div>
|
||||
<div class="dialog">
|
||||
<div id="scene1.3.2">Not one to throw at a dog.</div>
|
||||
</div>
|
||||
<div id="speech3" class="character">CELIA</div>
|
||||
<div class="dialog">
|
||||
<div id="scene1.3.3">No, thy words are too precious to be cast away upon</div>
|
||||
<div id="scene1.3.4">curs; throw some of them at me; come, lame me with reasons.</div>
|
||||
</div>
|
||||
<div id="speech4" class="character">ROSALIND</div>
|
||||
<div id="speech5" class="character">CELIA</div>
|
||||
<div class="dialog">
|
||||
<div id="scene1.3.8">But is all this for your father?</div>
|
||||
</div>
|
||||
<div class="dialog">
|
||||
<div id="scene1.3.5">Then there were two cousins laid up; when the one</div>
|
||||
<div id="scene1.3.6">should be lamed with reasons and the other mad</div>
|
||||
<div id="scene1.3.7">without any.</div>
|
||||
</div>
|
||||
<div id="speech6" class="character">ROSALIND</div>
|
||||
<div class="dialog">
|
||||
<div id="scene1.3.9">No, some of it is for my child's father. O, how</div>
|
||||
<div id="scene1.3.10">full of briers is this working-day world!</div>
|
||||
</div>
|
||||
<div id="speech7" class="character">CELIA</div>
|
||||
<div class="dialog">
|
||||
<div id="scene1.3.11">They are but burs, cousin, thrown upon thee in</div>
|
||||
<div id="scene1.3.12">holiday foolery: if we walk not in the trodden</div>
|
||||
<div id="scene1.3.13">paths our very petticoats will catch them.</div>
|
||||
</div>
|
||||
<div id="speech8" class="character">ROSALIND</div>
|
||||
<div class="dialog">
|
||||
<div id="scene1.3.14">I could shake them off my coat: these burs are in my heart.</div>
|
||||
</div>
|
||||
<div id="speech9" class="character">CELIA</div>
|
||||
<div class="dialog">
|
||||
<div id="scene1.3.15">Hem them away.</div>
|
||||
</div>
|
||||
<div id="speech10" class="character">ROSALIND</div>
|
||||
<div class="dialog">
|
||||
<div id="scene1.3.16">I would try, if I could cry 'hem' and have him.</div>
|
||||
</div>
|
||||
<div id="speech11" class="character">CELIA</div>
|
||||
<div class="dialog">
|
||||
<div id="scene1.3.17">Come, come, wrestle with thy affections.</div>
|
||||
</div>
|
||||
<div id="speech12" class="character">ROSALIND</div>
|
||||
<div class="dialog">
|
||||
<div id="scene1.3.18">O, they take the part of a better wrestler than myself!</div>
|
||||
</div>
|
||||
<div id="speech13" class="character">CELIA</div>
|
||||
<div class="dialog">
|
||||
<div id="scene1.3.19">O, a good wish upon you! you will try in time, in</div>
|
||||
<div id="scene1.3.20">despite of a fall. But, turning these jests out of</div>
|
||||
<div id="scene1.3.21">service, let us talk in good earnest: is it</div>
|
||||
<div id="scene1.3.22">possible, on such a sudden, you should fall into so</div>
|
||||
<div id="scene1.3.23">strong a liking with old Sir Rowland's youngest son?</div>
|
||||
</div>
|
||||
<div id="speech14" class="character">ROSALIND</div>
|
||||
<div class="dialog">
|
||||
<div id="scene1.3.24">The duke my father loved his father dearly.</div>
|
||||
</div>
|
||||
<div id="speech15" class="character">CELIA</div>
|
||||
<div class="dialog">
|
||||
<div id="scene1.3.25">Doth it therefore ensue that you should love his son</div>
|
||||
<div id="scene1.3.26">dearly? By this kind of chase, I should hate him,</div>
|
||||
<div id="scene1.3.27">for my father hated his father dearly; yet I hate</div>
|
||||
<div id="scene1.3.28">not Orlando.</div>
|
||||
</div>
|
||||
<div id="speech16" class="character">ROSALIND</div>
|
||||
<div title="wtf" class="dialog">
|
||||
<div id="scene1.3.29">No, faith, hate him not, for my sake.</div>
|
||||
</div>
|
||||
<div id="speech17" class="character">CELIA</div>
|
||||
<div class="dialog">
|
||||
<div id="scene1.3.30">Why should I not? doth he not deserve well?</div>
|
||||
</div>
|
||||
<div id="speech18" class="character">ROSALIND</div>
|
||||
<div class="dialog">
|
||||
<div id="scene1.3.31">Let me love him for that, and do you love him</div>
|
||||
<div id="scene1.3.32">because I do. Look, here comes the duke.</div>
|
||||
</div>
|
||||
<div id="speech19" class="character">CELIA</div>
|
||||
<div class="dialog">
|
||||
<div id="scene1.3.33">With his eyes full of anger.</div>
|
||||
<div class="direction">Enter DUKE FREDERICK, with Lords</div>
|
||||
</div>
|
||||
<div id="speech20" class="character">DUKE FREDERICK</div>
|
||||
<div class="dialog">
|
||||
<div id="scene1.3.34">Mistress, dispatch you with your safest haste</div>
|
||||
<div id="scene1.3.35">And get you from our court.</div>
|
||||
</div>
|
||||
<div id="speech21" class="character">ROSALIND</div>
|
||||
<div class="dialog">
|
||||
<div id="scene1.3.36">Me, uncle?</div>
|
||||
</div>
|
||||
<div id="speech22" class="character">DUKE FREDERICK</div>
|
||||
<div class="dialog">
|
||||
<div id="scene1.3.37">You, cousin</div>
|
||||
<div id="scene1.3.38">Within these ten days if that thou be'st found</div>
|
||||
<div id="scene1.3.39">So near our public court as twenty miles,</div>
|
||||
<div id="scene1.3.40">Thou diest for it.</div>
|
||||
</div>
|
||||
<div id="speech23" class="character">ROSALIND</div>
|
||||
<div class="dialog">
|
||||
<div id="scene1.3.41"> I do beseech your grace,</div>
|
||||
<div id="scene1.3.42">Let me the knowledge of my fault bear with me:</div>
|
||||
<div id="scene1.3.43">If with myself I hold intelligence</div>
|
||||
<div id="scene1.3.44">Or have acquaintance with mine own desires,</div>
|
||||
<div id="scene1.3.45">If that I do not dream or be not frantic,--</div>
|
||||
<div id="scene1.3.46">As I do trust I am not--then, dear uncle,</div>
|
||||
<div id="scene1.3.47">Never so much as in a thought unborn</div>
|
||||
<div id="scene1.3.48">Did I offend your highness.</div>
|
||||
</div>
|
||||
<div id="speech24" class="character">DUKE FREDERICK</div>
|
||||
<div class="dialog">
|
||||
<div id="scene1.3.49">Thus do all traitors:</div>
|
||||
<div id="scene1.3.50">If their purgation did consist in words,</div>
|
||||
<div id="scene1.3.51">They are as innocent as grace itself:</div>
|
||||
<div id="scene1.3.52">Let it suffice thee that I trust thee not.</div>
|
||||
</div>
|
||||
<div id="speech25" class="character">ROSALIND</div>
|
||||
<div class="dialog">
|
||||
<div id="scene1.3.53">Yet your mistrust cannot make me a traitor:</div>
|
||||
<div id="scene1.3.54">Tell me whereon the likelihood depends.</div>
|
||||
</div>
|
||||
<div id="speech26" class="character">DUKE FREDERICK</div>
|
||||
<div class="dialog">
|
||||
<div id="scene1.3.55">Thou art thy father's daughter; there's enough.</div>
|
||||
</div>
|
||||
<div id="speech27" class="character">ROSALIND</div>
|
||||
<div class="dialog">
|
||||
<div id="scene1.3.56">So was I when your highness took his dukedom;</div>
|
||||
<div id="scene1.3.57">So was I when your highness banish'd him:</div>
|
||||
<div id="scene1.3.58">Treason is not inherited, my lord;</div>
|
||||
<div id="scene1.3.59">Or, if we did derive it from our friends,</div>
|
||||
<div id="scene1.3.60">What's that to me? my father was no traitor:</div>
|
||||
<div id="scene1.3.61">Then, good my liege, mistake me not so much</div>
|
||||
<div id="scene1.3.62">To think my poverty is treacherous.</div>
|
||||
</div>
|
||||
<div id="speech28" class="character">CELIA</div>
|
||||
<div class="dialog">
|
||||
<div id="scene1.3.63">Dear sovereign, hear me speak.</div>
|
||||
</div>
|
||||
<div id="speech29" class="character">DUKE FREDERICK</div>
|
||||
<div class="dialog">
|
||||
<div id="scene1.3.64">Ay, Celia; we stay'd her for your sake,</div>
|
||||
<div id="scene1.3.65">Else had she with her father ranged along.</div>
|
||||
</div>
|
||||
<div id="speech30" class="character">CELIA</div>
|
||||
<div class="dialog">
|
||||
<div id="scene1.3.66">I did not then entreat to have her stay;</div>
|
||||
<div id="scene1.3.67">It was your pleasure and your own remorse:</div>
|
||||
<div id="scene1.3.68">I was too young that time to value her;</div>
|
||||
<div id="scene1.3.69">But now I know her: if she be a traitor,</div>
|
||||
<div id="scene1.3.70">Why so am I; we still have slept together,</div>
|
||||
<div id="scene1.3.71">Rose at an instant, learn'd, play'd, eat together,</div>
|
||||
<div id="scene1.3.72">And wheresoever we went, like Juno's swans,</div>
|
||||
<div id="scene1.3.73">Still we went coupled and inseparable.</div>
|
||||
</div>
|
||||
<div id="speech31" class="character">DUKE FREDERICK</div>
|
||||
<div class="dialog">
|
||||
<div id="scene1.3.74">She is too subtle for thee; and her smoothness,</div>
|
||||
<div id="scene1.3.75">Her very silence and her patience</div>
|
||||
<div id="scene1.3.76">Speak to the people, and they pity her.</div>
|
||||
<div id="scene1.3.77">Thou art a fool: she robs thee of thy name;</div>
|
||||
<div id="scene1.3.78">And thou wilt show more bright and seem more virtuous</div>
|
||||
<div id="scene1.3.79">When she is gone. Then open not thy lips:</div>
|
||||
<div id="scene1.3.80">Firm and irrevocable is my doom</div>
|
||||
<div id="scene1.3.81">Which I have pass'd upon her; she is banish'd.</div>
|
||||
</div>
|
||||
<div id="speech32" class="character">CELIA</div>
|
||||
<div class="dialog">
|
||||
<div id="scene1.3.82">Pronounce that sentence then on me, my liege:</div>
|
||||
<div id="scene1.3.83">I cannot live out of her company.</div>
|
||||
</div>
|
||||
<div id="speech33" class="character">DUKE FREDERICK</div>
|
||||
<div class="dialog">
|
||||
<div id="scene1.3.84">You are a fool. You, niece, provide yourself:</div>
|
||||
<div id="scene1.3.85">If you outstay the time, upon mine honour,</div>
|
||||
<div id="scene1.3.86">And in the greatness of my word, you die.</div>
|
||||
<div class="direction">Exeunt DUKE FREDERICK and Lords</div>
|
||||
</div>
|
||||
<div id="speech34" class="character">CELIA</div>
|
||||
<div class="dialog">
|
||||
<div id="scene1.3.87">O my poor Rosalind, whither wilt thou go?</div>
|
||||
<div id="scene1.3.88">Wilt thou change fathers? I will give thee mine.</div>
|
||||
<div id="scene1.3.89">I charge thee, be not thou more grieved than I am.</div>
|
||||
</div>
|
||||
<div id="speech35" class="character">ROSALIND</div>
|
||||
<div class="dialog">
|
||||
<div id="scene1.3.90">I have more cause.</div>
|
||||
</div>
|
||||
<div id="speech36" class="character">CELIA</div>
|
||||
<div class="dialog">
|
||||
<div id="scene1.3.91"> Thou hast not, cousin;</div>
|
||||
<div id="scene1.3.92">Prithee be cheerful: know'st thou not, the duke</div>
|
||||
<div id="scene1.3.93">Hath banish'd me, his daughter?</div>
|
||||
</div>
|
||||
<div id="speech37" class="character">ROSALIND</div>
|
||||
<div class="dialog">
|
||||
<div id="scene1.3.94">That he hath not.</div>
|
||||
</div>
|
||||
<div id="speech38" class="character">CELIA</div>
|
||||
<div class="dialog">
|
||||
<div id="scene1.3.95">No, hath not? Rosalind lacks then the love</div>
|
||||
<div id="scene1.3.96">Which teacheth thee that thou and I am one:</div>
|
||||
<div id="scene1.3.97">Shall we be sunder'd? shall we part, sweet girl?</div>
|
||||
<div id="scene1.3.98">No: let my father seek another heir.</div>
|
||||
<div id="scene1.3.99">Therefore devise with me how we may fly,</div>
|
||||
<div id="scene1.3.100">Whither to go and what to bear with us;</div>
|
||||
<div id="scene1.3.101">And do not seek to take your change upon you,</div>
|
||||
<div id="scene1.3.102">To bear your griefs yourself and leave me out;</div>
|
||||
<div id="scene1.3.103">For, by this heaven, now at our sorrows pale,</div>
|
||||
<div id="scene1.3.104">Say what thou canst, I'll go along with thee.</div>
|
||||
</div>
|
||||
<div id="speech39" class="character">ROSALIND</div>
|
||||
<div class="dialog">
|
||||
<div id="scene1.3.105">Why, whither shall we go?</div>
|
||||
</div>
|
||||
<div id="speech40" class="character">CELIA</div>
|
||||
<div class="dialog">
|
||||
<div id="scene1.3.106">To seek my uncle in the forest of Arden.</div>
|
||||
</div>
|
||||
<div id="speech41" class="character">ROSALIND</div>
|
||||
<div class="dialog">
|
||||
<div id="scene1.3.107">Alas, what danger will it be to us,</div>
|
||||
<div id="scene1.3.108">Maids as we are, to travel forth so far!</div>
|
||||
<div id="scene1.3.109">Beauty provoketh thieves sooner than gold.</div>
|
||||
</div>
|
||||
<div id="speech42" class="character">CELIA</div>
|
||||
<div class="dialog">
|
||||
<div id="scene1.3.110">I'll put myself in poor and mean attire</div>
|
||||
<div id="scene1.3.111">And with a kind of umber smirch my face;</div>
|
||||
<div id="scene1.3.112">The like do you: so shall we pass along</div>
|
||||
<div id="scene1.3.113">And never stir assailants.</div>
|
||||
</div>
|
||||
<div id="speech43" class="character">ROSALIND</div>
|
||||
<div class="dialog">
|
||||
<div id="scene1.3.114">Were it not better,</div>
|
||||
<div id="scene1.3.115">Because that I am more than common tall,</div>
|
||||
<div id="scene1.3.116">That I did suit me all points like a man?</div>
|
||||
<div id="scene1.3.117">A gallant curtle-axe upon my thigh,</div>
|
||||
<div id="scene1.3.118">A boar-spear in my hand; and--in my heart</div>
|
||||
<div id="scene1.3.119">Lie there what hidden woman's fear there will--</div>
|
||||
<div id="scene1.3.120">We'll have a swashing and a martial outside,</div>
|
||||
<div id="scene1.3.121">As many other mannish cowards have</div>
|
||||
<div id="scene1.3.122">That do outface it with their semblances.</div>
|
||||
</div>
|
||||
<div id="speech44" class="character">CELIA</div>
|
||||
<div class="dialog">
|
||||
<div id="scene1.3.123">What shall I call thee when thou art a man?</div>
|
||||
</div>
|
||||
<div id="speech45" class="character">ROSALIND</div>
|
||||
<div class="dialog">
|
||||
<div id="scene1.3.124">I'll have no worse a name than Jove's own page;</div>
|
||||
<div id="scene1.3.125">And therefore look you call me Ganymede.</div>
|
||||
<div id="scene1.3.126">But what will you be call'd?</div>
|
||||
</div>
|
||||
<div id="speech46" class="character">CELIA</div>
|
||||
<div class="dialog">
|
||||
<div id="scene1.3.127">Something that hath a reference to my state</div>
|
||||
<div id="scene1.3.128">No longer Celia, but Aliena.</div>
|
||||
</div>
|
||||
<div id="speech47" class="character">ROSALIND</div>
|
||||
<div class="dialog">
|
||||
<div id="scene1.3.129">But, cousin, what if we assay'd to steal</div>
|
||||
<div id="scene1.3.130">The clownish fool out of your father's court?</div>
|
||||
<div id="scene1.3.131">Would he not be a comfort to our travel?</div>
|
||||
</div>
|
||||
<div id="speech48" class="character">CELIA</div>
|
||||
<div class="dialog">
|
||||
<div id="scene1.3.132">He'll go along o'er the wide world with me;</div>
|
||||
<div id="scene1.3.133">Leave me alone to woo him. Let's away,</div>
|
||||
<div id="scene1.3.134">And get our jewels and our wealth together,</div>
|
||||
<div id="scene1.3.135">Devise the fittest time and safest way</div>
|
||||
<div id="scene1.3.136">To hide us from pursuit that will be made</div>
|
||||
<div id="scene1.3.137">After my flight. Now go we in content</div>
|
||||
<div id="scene1.3.138">To liberty and not to banishment.</div>
|
||||
<div class="direction">Exeunt</div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</body>
|
||||
</html>
|
||||
'''
|
||||
|
||||
|
||||
# }}}
|
||||
|
||||
ae = unittest.TestCase.assertEqual
|
||||
|
||||
def test_tokenizer(self): # {{{
|
||||
tokens = [
|
||||
type('')(item) for item in tokenize(
|
||||
r'E\ é > f [a~="y\"x"]:nth(/* fu /]* */-3.7)')]
|
||||
self.ae(tokens, [
|
||||
"<IDENT 'E é' at 0>",
|
||||
"<S ' ' at 4>",
|
||||
"<DELIM '>' at 5>",
|
||||
"<S ' ' at 6>",
|
||||
# the no-break space is not whitespace in CSS
|
||||
"<IDENT 'f ' at 7>", # f\xa0
|
||||
"<DELIM '[' at 9>",
|
||||
"<IDENT 'a' at 10>",
|
||||
"<DELIM '~' at 11>",
|
||||
"<DELIM '=' at 12>",
|
||||
"<STRING 'y\"x' at 13>",
|
||||
"<DELIM ']' at 19>",
|
||||
"<DELIM ':' at 20>",
|
||||
"<IDENT 'nth' at 21>",
|
||||
"<DELIM '(' at 24>",
|
||||
"<NUMBER '-3.7' at 37>",
|
||||
"<DELIM ')' at 41>",
|
||||
"<EOF at 42>",
|
||||
])
|
||||
# }}}
|
||||
|
||||
def test_parser(self): # {{{
|
||||
def repr_parse(css):
|
||||
selectors = parse(css)
|
||||
for selector in selectors:
|
||||
assert selector.pseudo_element is None
|
||||
return [repr(selector.parsed_tree).replace("(u'", "('")
|
||||
for selector in selectors]
|
||||
|
||||
def parse_many(first, *others):
|
||||
result = repr_parse(first)
|
||||
for other in others:
|
||||
assert repr_parse(other) == result
|
||||
return result
|
||||
|
||||
assert parse_many('*') == ['Element[*]']
|
||||
assert parse_many('*|*') == ['Element[*]']
|
||||
assert parse_many('*|foo') == ['Element[foo]']
|
||||
assert parse_many('foo|*') == ['Element[foo|*]']
|
||||
assert parse_many('foo|bar') == ['Element[foo|bar]']
|
||||
# This will never match, but it is valid:
|
||||
assert parse_many('#foo#bar') == ['Hash[Hash[Element[*]#foo]#bar]']
|
||||
assert parse_many(
|
||||
'div>.foo',
|
||||
'div> .foo',
|
||||
'div >.foo',
|
||||
'div > .foo',
|
||||
'div \n> \t \t .foo', 'div\r>\n\n\n.foo', 'div\f>\f.foo'
|
||||
) == ['CombinedSelector[Element[div] > Class[Element[*].foo]]']
|
||||
assert parse_many('td.foo,.bar',
|
||||
'td.foo, .bar',
|
||||
'td.foo\t\r\n\f ,\t\r\n\f .bar'
|
||||
) == [
|
||||
'Class[Element[td].foo]',
|
||||
'Class[Element[*].bar]'
|
||||
]
|
||||
assert parse_many('div, td.foo, div.bar span') == [
|
||||
'Element[div]',
|
||||
'Class[Element[td].foo]',
|
||||
'CombinedSelector[Class[Element[div].bar] '
|
||||
'<followed> Element[span]]']
|
||||
assert parse_many('div > p') == [
|
||||
'CombinedSelector[Element[div] > Element[p]]']
|
||||
assert parse_many('td:first') == [
|
||||
'Pseudo[Element[td]:first]']
|
||||
assert parse_many('td:first') == [
|
||||
'Pseudo[Element[td]:first]']
|
||||
assert parse_many('td :first') == [
|
||||
'CombinedSelector[Element[td] '
|
||||
'<followed> Pseudo[Element[*]:first]]']
|
||||
assert parse_many('td :first') == [
|
||||
'CombinedSelector[Element[td] '
|
||||
'<followed> Pseudo[Element[*]:first]]']
|
||||
assert parse_many('a[name]', 'a[ name\t]') == [
|
||||
'Attrib[Element[a][name]]']
|
||||
assert parse_many('a [name]') == [
|
||||
'CombinedSelector[Element[a] <followed> Attrib[Element[*][name]]]']
|
||||
self.ae(parse_many('a[rel="include"]', 'a[rel = include]'), [
|
||||
"Attrib[Element[a][rel = 'include']]"])
|
||||
assert parse_many("a[hreflang |= 'en']", "a[hreflang|=en]") == [
|
||||
"Attrib[Element[a][hreflang |= 'en']]"]
|
||||
self.ae(parse_many('div:nth-child(10)'), [
|
||||
"Function[Element[div]:nth-child(['10'])]"])
|
||||
assert parse_many(':nth-child(2n+2)') == [
|
||||
"Function[Element[*]:nth-child(['2', 'n', '+2'])]"]
|
||||
assert parse_many('div:nth-of-type(10)') == [
|
||||
"Function[Element[div]:nth-of-type(['10'])]"]
|
||||
assert parse_many('div div:nth-of-type(10) .aclass') == [
|
||||
'CombinedSelector[CombinedSelector[Element[div] <followed> '
|
||||
"Function[Element[div]:nth-of-type(['10'])]] "
|
||||
'<followed> Class[Element[*].aclass]]']
|
||||
assert parse_many('label:only') == [
|
||||
'Pseudo[Element[label]:only]']
|
||||
assert parse_many('a:lang(fr)') == [
|
||||
"Function[Element[a]:lang(['fr'])]"]
|
||||
assert parse_many('div:contains("foo")') == [
|
||||
"Function[Element[div]:contains(['foo'])]"]
|
||||
assert parse_many('div#foobar') == [
|
||||
'Hash[Element[div]#foobar]']
|
||||
assert parse_many('div:not(div.foo)') == [
|
||||
'Negation[Element[div]:not(Class[Element[div].foo])]']
|
||||
assert parse_many('td ~ th') == [
|
||||
'CombinedSelector[Element[td] ~ Element[th]]']
|
||||
# }}}
|
||||
|
||||
def test_pseudo_elements(self): # {{{
|
||||
def parse_pseudo(css):
|
||||
result = []
|
||||
for selector in parse(css):
|
||||
pseudo = selector.pseudo_element
|
||||
pseudo = type('')(pseudo) if pseudo else pseudo
|
||||
# No Symbol here
|
||||
assert pseudo is None or isinstance(pseudo, type(''))
|
||||
selector = repr(selector.parsed_tree).replace("(u'", "('")
|
||||
result.append((selector, pseudo))
|
||||
return result
|
||||
|
||||
def parse_one(css):
|
||||
result = parse_pseudo(css)
|
||||
assert len(result) == 1
|
||||
return result[0]
|
||||
|
||||
self.ae(parse_one('foo'), ('Element[foo]', None))
|
||||
self.ae(parse_one('*'), ('Element[*]', None))
|
||||
self.ae(parse_one(':empty'), ('Pseudo[Element[*]:empty]', None))
|
||||
|
||||
# Special cases for CSS 2.1 pseudo-elements
|
||||
self.ae(parse_one(':BEfore'), ('Element[*]', 'before'))
|
||||
self.ae(parse_one(':aftER'), ('Element[*]', 'after'))
|
||||
self.ae(parse_one(':First-Line'), ('Element[*]', 'first-line'))
|
||||
self.ae(parse_one(':First-Letter'), ('Element[*]', 'first-letter'))
|
||||
|
||||
self.ae(parse_one('::befoRE'), ('Element[*]', 'before'))
|
||||
self.ae(parse_one('::AFter'), ('Element[*]', 'after'))
|
||||
self.ae(parse_one('::firsT-linE'), ('Element[*]', 'first-line'))
|
||||
self.ae(parse_one('::firsT-letteR'), ('Element[*]', 'first-letter'))
|
||||
|
||||
self.ae(parse_one('::text-content'), ('Element[*]', 'text-content'))
|
||||
self.ae(parse_one('::attr(name)'), (
|
||||
"Element[*]", "FunctionalPseudoElement[::attr(['name'])]"))
|
||||
|
||||
self.ae(parse_one('::Selection'), ('Element[*]', 'selection'))
|
||||
self.ae(parse_one('foo:after'), ('Element[foo]', 'after'))
|
||||
self.ae(parse_one('foo::selection'), ('Element[foo]', 'selection'))
|
||||
self.ae(parse_one('lorem#ipsum ~ a#b.c[href]:empty::selection'), (
|
||||
'CombinedSelector[Hash[Element[lorem]#ipsum] ~ '
|
||||
'Pseudo[Attrib[Class[Hash[Element[a]#b].c][href]]:empty]]',
|
||||
'selection'))
|
||||
|
||||
parse_pseudo('foo:before, bar, baz:after') == [
|
||||
('Element[foo]', 'before'),
|
||||
('Element[bar]', None),
|
||||
('Element[baz]', 'after')]
|
||||
# }}}
|
||||
|
||||
def test_specificity(self): # {{{
|
||||
def specificity(css):
|
||||
selectors = parse(css)
|
||||
assert len(selectors) == 1
|
||||
return selectors[0].specificity()
|
||||
|
||||
assert specificity('*') == (0, 0, 0)
|
||||
assert specificity(' foo') == (0, 0, 1)
|
||||
assert specificity(':empty ') == (0, 1, 0)
|
||||
assert specificity(':before') == (0, 0, 1)
|
||||
assert specificity('*:before') == (0, 0, 1)
|
||||
assert specificity(':nth-child(2)') == (0, 1, 0)
|
||||
assert specificity('.bar') == (0, 1, 0)
|
||||
assert specificity('[baz]') == (0, 1, 0)
|
||||
assert specificity('[baz="4"]') == (0, 1, 0)
|
||||
assert specificity('[baz^="4"]') == (0, 1, 0)
|
||||
assert specificity('#lipsum') == (1, 0, 0)
|
||||
|
||||
assert specificity(':not(*)') == (0, 0, 0)
|
||||
assert specificity(':not(foo)') == (0, 0, 1)
|
||||
assert specificity(':not(.foo)') == (0, 1, 0)
|
||||
assert specificity(':not([foo])') == (0, 1, 0)
|
||||
assert specificity(':not(:empty)') == (0, 1, 0)
|
||||
assert specificity(':not(#foo)') == (1, 0, 0)
|
||||
|
||||
assert specificity('foo:empty') == (0, 1, 1)
|
||||
assert specificity('foo:before') == (0, 0, 2)
|
||||
assert specificity('foo::before') == (0, 0, 2)
|
||||
assert specificity('foo:empty::before') == (0, 1, 2)
|
||||
|
||||
assert specificity('#lorem + foo#ipsum:first-child > bar:first-line'
|
||||
) == (2, 1, 3)
|
||||
# }}}
|
||||
|
||||
def test_parse_errors(self): # {{{
|
||||
def get_error(css):
|
||||
try:
|
||||
parse(css)
|
||||
except SelectorSyntaxError:
|
||||
# Py2, Py3, ...
|
||||
return str(sys.exc_info()[1]).replace("(u'", "('")
|
||||
|
||||
self.ae(get_error('attributes(href)/html/body/a'), (
|
||||
"Expected selector, got <DELIM '(' at 10>"))
|
||||
assert get_error('attributes(href)') == (
|
||||
"Expected selector, got <DELIM '(' at 10>")
|
||||
assert get_error('html/body/a') == (
|
||||
"Expected selector, got <DELIM '/' at 4>")
|
||||
assert get_error(' ') == (
|
||||
"Expected selector, got <EOF at 1>")
|
||||
assert get_error('div, ') == (
|
||||
"Expected selector, got <EOF at 5>")
|
||||
assert get_error(' , div') == (
|
||||
"Expected selector, got <DELIM ',' at 1>")
|
||||
assert get_error('p, , div') == (
|
||||
"Expected selector, got <DELIM ',' at 3>")
|
||||
assert get_error('div > ') == (
|
||||
"Expected selector, got <EOF at 6>")
|
||||
assert get_error(' > div') == (
|
||||
"Expected selector, got <DELIM '>' at 2>")
|
||||
assert get_error('foo|#bar') == (
|
||||
"Expected ident or '*', got <HASH 'bar' at 4>")
|
||||
assert get_error('#.foo') == (
|
||||
"Expected selector, got <DELIM '#' at 0>")
|
||||
assert get_error('.#foo') == (
|
||||
"Expected ident, got <HASH 'foo' at 1>")
|
||||
assert get_error(':#foo') == (
|
||||
"Expected ident, got <HASH 'foo' at 1>")
|
||||
assert get_error('[*]') == (
|
||||
"Expected '|', got <DELIM ']' at 2>")
|
||||
assert get_error('[foo|]') == (
|
||||
"Expected ident, got <DELIM ']' at 5>")
|
||||
assert get_error('[#]') == (
|
||||
"Expected ident or '*', got <DELIM '#' at 1>")
|
||||
assert get_error('[foo=#]') == (
|
||||
"Expected string or ident, got <DELIM '#' at 5>")
|
||||
assert get_error('[href]a') == (
|
||||
"Expected selector, got <IDENT 'a' at 6>")
|
||||
assert get_error('[rel=stylesheet]') is None
|
||||
assert get_error('[rel:stylesheet]') == (
|
||||
"Operator expected, got <DELIM ':' at 4>")
|
||||
assert get_error('[rel=stylesheet') == (
|
||||
"Expected ']', got <EOF at 15>")
|
||||
assert get_error(':lang(fr)') is None
|
||||
assert get_error(':lang(fr') == (
|
||||
"Expected an argument, got <EOF at 8>")
|
||||
assert get_error(':contains("foo') == (
|
||||
"Unclosed string at 10")
|
||||
assert get_error('foo!') == (
|
||||
"Expected selector, got <DELIM '!' at 3>")
|
||||
|
||||
# Mis-placed pseudo-elements
|
||||
assert get_error('a:before:empty') == (
|
||||
"Got pseudo-element ::before not at the end of a selector")
|
||||
assert get_error('li:before a') == (
|
||||
"Got pseudo-element ::before not at the end of a selector")
|
||||
assert get_error(':not(:before)') == (
|
||||
"Got pseudo-element ::before inside :not() at 12")
|
||||
assert get_error(':not(:not(a))') == (
|
||||
"Got nested :not()")
|
||||
# }}}
|
||||
|
||||
def test_select(self): # {{{
|
||||
document = etree.fromstring(self.HTML_IDS, parser=etree.XMLParser(recover=True, no_network=True, resolve_entities=False))
|
||||
select = Select(document)
|
||||
|
||||
def select_ids(selector):
|
||||
for elem in select(selector):
|
||||
yield elem.get('id')
|
||||
|
||||
def pcss(main, *selectors, **kwargs):
|
||||
result = list(select_ids(main))
|
||||
for selector in selectors:
|
||||
self.ae(list(select_ids(selector)), result)
|
||||
return result
|
||||
all_ids = pcss('*')
|
||||
self.ae(all_ids[:6], [
|
||||
'html', None, 'link-href', 'link-nohref', None, 'outer-div'])
|
||||
self.ae(all_ids[-1:], ['foobar-span'])
|
||||
self.ae(pcss('div'), ['outer-div', 'li-div', 'foobar-div'])
|
||||
self.ae(pcss('DIV'), [
|
||||
'outer-div', 'li-div', 'foobar-div']) # case-insensitive in HTML
|
||||
self.ae(pcss('div div'), ['li-div'])
|
||||
self.ae(pcss('div, div div'), ['outer-div', 'li-div', 'foobar-div'])
|
||||
self.ae(pcss('a[name]'), ['name-anchor'])
|
||||
self.ae(pcss('a[NAme]'), ['name-anchor']) # case-insensitive in HTML:
|
||||
self.ae(pcss('a[rel]'), ['tag-anchor', 'nofollow-anchor'])
|
||||
self.ae(pcss('a[rel="tag"]'), ['tag-anchor'])
|
||||
self.ae(pcss('a[href*="localhost"]'), ['tag-anchor'])
|
||||
self.ae(pcss('a[href*=""]'), [])
|
||||
self.ae(pcss('a[href^="http"]'), ['tag-anchor', 'nofollow-anchor'])
|
||||
self.ae(pcss('a[href^="http:"]'), ['tag-anchor'])
|
||||
self.ae(pcss('a[href^=""]'), [])
|
||||
self.ae(pcss('a[href$="org"]'), ['nofollow-anchor'])
|
||||
self.ae(pcss('a[href$=""]'), [])
|
||||
self.ae(pcss('div[foobar~="bc"]', 'div[foobar~="cde"]', skip_webkit=True), ['foobar-div'])
|
||||
self.ae(pcss('[foobar~="ab bc"]', '[foobar~=""]', '[foobar~=" \t"]'), [])
|
||||
self.ae(pcss('div[foobar~="cd"]'), [])
|
||||
self.ae(pcss('*[lang|="En"]', '[lang|="En-us"]'), ['second-li'])
|
||||
# Attribute values are case sensitive
|
||||
self.ae(pcss('*[lang|="en"]', '[lang|="en-US"]', skip_webkit=True), [])
|
||||
self.ae(pcss('*[lang|="e"]'), [])
|
||||
self.ae(pcss(':lang("EN")', '*:lang(en-US)', skip_webkit=True), ['second-li', 'li-div'])
|
||||
self.ae(pcss(':lang("e")'), [])
|
||||
self.ae(pcss('li:nth-child(1)', 'li:first-child'), ['first-li'])
|
||||
self.ae(pcss('li:nth-child(3)', '#first-li ~ :nth-child(3)'), ['third-li'])
|
||||
self.ae(pcss('li:nth-child(10)'), [])
|
||||
self.ae(pcss('li:nth-child(2n)', 'li:nth-child(even)', 'li:nth-child(2n+0)'), ['second-li', 'fourth-li', 'sixth-li'])
|
||||
self.ae(pcss('li:nth-child(+2n+1)', 'li:nth-child(odd)'), ['first-li', 'third-li', 'fifth-li', 'seventh-li'])
|
||||
self.ae(pcss('li:nth-child(2n+4)'), ['fourth-li', 'sixth-li'])
|
||||
self.ae(pcss('li:nth-child(3n+1)'), ['first-li', 'fourth-li', 'seventh-li'])
|
||||
self.ae(pcss('li:nth-last-child(0)'), [])
|
||||
self.ae(pcss('li:nth-last-child(1)', 'li:last-child'), ['seventh-li'])
|
||||
self.ae(pcss('li:nth-last-child(2n)', 'li:nth-last-child(even)'), ['second-li', 'fourth-li', 'sixth-li'])
|
||||
self.ae(pcss('li:nth-last-child(2n+2)'), ['second-li', 'fourth-li', 'sixth-li'])
|
||||
self.ae(pcss('ol:first-of-type'), ['first-ol'])
|
||||
self.ae(pcss('ol:nth-child(1)'), [])
|
||||
self.ae(pcss('ol:nth-of-type(2)'), ['second-ol'])
|
||||
self.ae(pcss('ol:nth-last-of-type(1)'), ['second-ol'])
|
||||
self.ae(pcss('span:only-child'), ['foobar-span'])
|
||||
self.ae(pcss('li div:only-child'), ['li-div'])
|
||||
self.ae(pcss('div *:only-child'), ['li-div', 'foobar-span'])
|
||||
self.ae(pcss('p *:only-of-type', skip_webkit=True), ['p-em', 'fieldset'])
|
||||
self.ae(pcss('p:only-of-type', skip_webkit=True), ['paragraph'])
|
||||
self.ae(pcss('a:empty', 'a:EMpty'), ['name-anchor'])
|
||||
self.ae(pcss('li:empty'), ['third-li', 'fourth-li', 'fifth-li', 'sixth-li'])
|
||||
self.ae(pcss(':root', 'html:root', 'li:root'), ['html'])
|
||||
self.ae(pcss('* :root', 'p *:root'), [])
|
||||
self.ae(pcss('.a', '.b', '*.a', 'ol.a'), ['first-ol'])
|
||||
self.ae(pcss('.c', '*.c'), ['first-ol', 'third-li', 'fourth-li'])
|
||||
self.ae(pcss('ol *.c', 'ol li.c', 'li ~ li.c', 'ol > li.c'), [
|
||||
'third-li', 'fourth-li'])
|
||||
self.ae(pcss('#first-li', 'li#first-li', '*#first-li'), ['first-li'])
|
||||
self.ae(pcss('li div', 'li > div', 'div div'), ['li-div'])
|
||||
self.ae(pcss('div > div'), [])
|
||||
self.ae(pcss('div>.c', 'div > .c'), ['first-ol'])
|
||||
self.ae(pcss('div + div'), ['foobar-div'])
|
||||
self.ae(pcss('a ~ a'), ['tag-anchor', 'nofollow-anchor'])
|
||||
self.ae(pcss('a[rel="tag"] ~ a'), ['nofollow-anchor'])
|
||||
self.ae(pcss('ol#first-ol li:last-child'), ['seventh-li'])
|
||||
self.ae(pcss('ol#first-ol *:last-child'), ['li-div', 'seventh-li'])
|
||||
self.ae(pcss('#outer-div:first-child'), ['outer-div'])
|
||||
self.ae(pcss('#outer-div :first-child'), [
|
||||
'name-anchor', 'first-li', 'li-div', 'p-b',
|
||||
'checkbox-fieldset-disabled', 'area-href'])
|
||||
self.ae(pcss('a[href]'), ['tag-anchor', 'nofollow-anchor'])
|
||||
self.ae(pcss(':not(*)'), [])
|
||||
self.ae(pcss('a:not([href])'), ['name-anchor'])
|
||||
self.ae(pcss('ol :Not(li[class])', skip_webkit=True), [
|
||||
'first-li', 'second-li', 'li-div',
|
||||
'fifth-li', 'sixth-li', 'seventh-li'])
|
||||
self.ae(pcss(r'di\a0 v', r'div\['), [])
|
||||
self.ae(pcss(r'[h\a0 ref]', r'[h\]ref]'), [])
|
||||
|
||||
self.assertRaises(ExpressionError, lambda : tuple(select('body:nth-child')))
|
||||
|
||||
select = Select(document, ignore_inappropriate_pseudo_classes=True)
|
||||
self.assertGreater(len(tuple(select('p:hover'))), 0)
|
||||
|
||||
def test_select_shakespeare(self):
|
||||
document = html.document_fromstring(self.HTML_SHAKESPEARE)
|
||||
select = Select(document)
|
||||
count = lambda s: sum(1 for r in select(s))
|
||||
|
||||
# Data borrowed from http://mootools.net/slickspeed/
|
||||
|
||||
# Changed from original; probably because I'm only
|
||||
self.ae(count('*'), 249)
|
||||
assert count('div:only-child') == 22 # ?
|
||||
assert count('div:nth-child(even)') == 106
|
||||
assert count('div:nth-child(2n)') == 106
|
||||
assert count('div:nth-child(odd)') == 137
|
||||
assert count('div:nth-child(2n+1)') == 137
|
||||
assert count('div:nth-child(n)') == 243
|
||||
assert count('div:last-child') == 53
|
||||
assert count('div:first-child') == 51
|
||||
assert count('div > div') == 242
|
||||
assert count('div + div') == 190
|
||||
assert count('div ~ div') == 190
|
||||
assert count('body') == 1
|
||||
assert count('body div') == 243
|
||||
assert count('div') == 243
|
||||
assert count('div div') == 242
|
||||
assert count('div div div') == 241
|
||||
assert count('div, div, div') == 243
|
||||
assert count('div, a, span') == 243
|
||||
assert count('.dialog') == 51
|
||||
assert count('div.dialog') == 51
|
||||
assert count('div .dialog') == 51
|
||||
assert count('div.character, div.dialog') == 99
|
||||
assert count('div.direction.dialog') == 0
|
||||
assert count('div.dialog.direction') == 0
|
||||
assert count('div.dialog.scene') == 1
|
||||
assert count('div.scene.scene') == 1
|
||||
assert count('div.scene .scene') == 0
|
||||
assert count('div.direction .dialog ') == 0
|
||||
assert count('div .dialog .direction') == 4
|
||||
assert count('div.dialog .dialog .direction') == 4
|
||||
assert count('#speech5') == 1
|
||||
assert count('div#speech5') == 1
|
||||
assert count('div #speech5') == 1
|
||||
assert count('div.scene div.dialog') == 49
|
||||
assert count('div#scene1 div.dialog div') == 142
|
||||
assert count('#scene1 #speech1') == 1
|
||||
assert count('div[class]') == 103
|
||||
assert count('div[class=dialog]') == 50
|
||||
assert count('div[class^=dia]') == 51
|
||||
assert count('div[class$=log]') == 50
|
||||
assert count('div[class*=sce]') == 1
|
||||
assert count('div[class|=dialog]') == 50 # ? Seems right
|
||||
assert count('div[class~=dialog]') == 51 # ? Seems right
|
||||
|
||||
# }}}
|
||||
|
||||
|
||||
# Run tests {{{
|
||||
def find_tests():
|
||||
return unittest.defaultTestLoader.loadTestsFromTestCase(TestCSSSelectors)
|
||||
|
||||
|
||||
def run_tests(find_tests=find_tests, for_build=False):
|
||||
if not for_build:
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument('name', nargs='?', default=None,
|
||||
help='The name of the test to run')
|
||||
args = parser.parse_args()
|
||||
if not for_build and args.name and args.name.startswith('.'):
|
||||
tests = find_tests()
|
||||
q = args.name[1:]
|
||||
if not q.startswith('test_'):
|
||||
q = 'test_' + q
|
||||
ans = None
|
||||
try:
|
||||
for test in tests:
|
||||
if test._testMethodName == q:
|
||||
ans = test
|
||||
raise StopIteration()
|
||||
except StopIteration:
|
||||
pass
|
||||
if ans is None:
|
||||
print('No test named %s found' % args.name)
|
||||
raise SystemExit(1)
|
||||
tests = ans
|
||||
else:
|
||||
tests = unittest.defaultTestLoader.loadTestsFromName(args.name) if not for_build and args.name else find_tests()
|
||||
r = unittest.TextTestRunner
|
||||
if for_build:
|
||||
r = r(verbosity=0, buffer=True, failfast=True)
|
||||
else:
|
||||
r = r(verbosity=4)
|
||||
result = r.run(tests)
|
||||
if for_build and result.errors or result.failures:
|
||||
raise SystemExit(1)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
run_tests()
|
||||
# }}}
|
||||
Reference in New Issue
Block a user