1
0
mirror of https://github.com/gryf/ebook-converter.git synced 2026-01-06 11:14:12 +01:00

Initial import

This commit is contained in:
2020-03-31 17:15:23 +02:00
commit d97ea9b0bc
311 changed files with 131419 additions and 0 deletions

View File

@@ -0,0 +1,12 @@
#!/usr/bin/env python2
# vim:fileencoding=utf-8
from __future__ import absolute_import, division, print_function, unicode_literals
__license__ = 'GPL v3'
__copyright__ = '2015, Kovid Goyal <kovid at kovidgoyal.net>'
from css_selectors.parser import parse
from css_selectors.select import Select, INAPPROPRIATE_PSEUDO_CLASSES
from css_selectors.errors import SelectorError, SelectorSyntaxError, ExpressionError
__all__ = ['parse', 'Select', 'INAPPROPRIATE_PSEUDO_CLASSES', 'SelectorError', 'SelectorSyntaxError', 'ExpressionError']

View File

@@ -0,0 +1,18 @@
#!/usr/bin/env python2
# vim:fileencoding=utf-8
from __future__ import absolute_import, division, print_function, unicode_literals
__license__ = 'GPL v3'
__copyright__ = '2015, Kovid Goyal <kovid at kovidgoyal.net>'
class SelectorError(ValueError):
"""Common parent for SelectorSyntaxError and ExpressionError"""
class SelectorSyntaxError(SelectorError):
"""Parsing a selector that does not match the grammar."""
class ExpressionError(SelectorError):
"""Unknown or unsupported selector (eg. pseudo-class)."""

View File

@@ -0,0 +1,133 @@
#!/usr/bin/env python2
# vim:fileencoding=utf-8
from __future__ import absolute_import, division, print_function, unicode_literals
__license__ = 'GPL v3'
__copyright__ = '2015, Kovid Goyal <kovid at kovidgoyal.net>'
import collections
from polyglot.builtins import string_or_bytes
SLICE_ALL = slice(None)
def is_iterable(obj):
"""
Are we being asked to look up a list of things, instead of a single thing?
We check for the `__iter__` attribute so that this can cover types that
don't have to be known by this module, such as NumPy arrays.
Strings, however, should be considered as atomic values to look up, not
iterables.
"""
return hasattr(obj, '__iter__') and not isinstance(obj, string_or_bytes)
class OrderedSet(collections.MutableSet):
"""
An OrderedSet is a custom MutableSet that remembers its order, so that
every entry has an index that can be looked up.
"""
def __init__(self, iterable=None):
self.items = []
self.map = {}
if iterable is not None:
for item in iterable:
idx = self.map.get(item)
if idx is None:
self.map[item] = len(self.items)
self.items.append(item)
def __len__(self):
return len(self.items)
def __getitem__(self, index):
"""
Get the item at a given index.
If `index` is a slice, you will get back that slice of items. If it's
the slice [:], exactly the same object is returned. (If you want an
independent copy of an OrderedSet, use `OrderedSet.copy()`.)
If `index` is an iterable, you'll get the OrderedSet of items
corresponding to those indices. This is similar to NumPy's
"fancy indexing".
"""
if index == SLICE_ALL:
return self
elif hasattr(index, '__index__') or isinstance(index, slice):
result = self.items[index]
if isinstance(result, list):
return OrderedSet(result)
else:
return result
elif is_iterable(index):
return OrderedSet([self.items[i] for i in index])
else:
raise TypeError("Don't know how to index an OrderedSet by %r" %
index)
def copy(self):
return OrderedSet(self)
def __getstate__(self):
return tuple(self)
def __setstate__(self, state):
self.__init__(state)
def __contains__(self, key):
return key in self.map
def add(self, key):
"""
Add `key` as an item to this OrderedSet, then return its index.
If `key` is already in the OrderedSet, return the index it already
had.
"""
index = self.map.get(key)
if index is None:
self.map[key] = index = len(self.items)
self.items.append(key)
return index
def index(self, key):
"""
Get the index of a given entry, raising an IndexError if it's not
present.
`key` can be an iterable of entries that is not a string, in which case
this returns a list of indices.
"""
if is_iterable(key):
return [self.index(subkey) for subkey in key]
return self.map[key]
def discard(self, key):
index = self.map.get(key)
if index is not None:
self.items.pop(index)
for item in self.items[index:]:
self.map[item] -= 1
return True
return False
def __iter__(self):
return iter(self.items)
def __reversed__(self):
return reversed(self.items)
def __repr__(self):
if not self:
return '%s()' % (self.__class__.__name__,)
return '%s(%r)' % (self.__class__.__name__, list(self))
def __eq__(self, other):
if isinstance(other, OrderedSet):
return len(self) == len(other) and self.items == other.items
try:
return type(other)(self.map) == other
except TypeError:
return False

View File

@@ -0,0 +1,791 @@
#!/usr/bin/env python2
# vim:fileencoding=utf-8
"""
Tokenizer, parser and parsed objects for CSS selectors.
:copyright: (c) 2007-2012 Ian Bicking and contributors.
See AUTHORS for more details.
:license: BSD, see LICENSE for more details.
"""
import sys
import re
import operator
import string
from css_selectors.errors import SelectorSyntaxError, ExpressionError
from polyglot.builtins import unicode_type, codepoint_to_chr, range
utab = {c:c+32 for c in range(ord(u'A'), ord(u'Z')+1)}
if sys.version_info.major < 3:
tab = string.maketrans(string.ascii_uppercase, string.ascii_lowercase)
def ascii_lower(string):
"""Lower-case, but only in the ASCII range."""
return string.translate(utab if isinstance(string, unicode_type) else tab)
def urepr(x):
if isinstance(x, list):
return '[%s]' % ', '.join((map(urepr, x)))
ans = repr(x)
if ans.startswith("u'") or ans.startswith('u"'):
ans = ans[1:]
return ans
else:
def ascii_lower(x):
return x.translate(utab)
urepr = repr
# Parsed objects
class Selector(object):
"""
Represents a parsed selector.
"""
def __init__(self, tree, pseudo_element=None):
self.parsed_tree = tree
if pseudo_element is not None and not isinstance(
pseudo_element, FunctionalPseudoElement):
pseudo_element = ascii_lower(pseudo_element)
#: A :class:`FunctionalPseudoElement`,
#: or the identifier for the pseudo-element as a string,
# or ``None``.
#:
#: +-------------------------+----------------+--------------------------------+
#: | | Selector | Pseudo-element |
#: +=========================+================+================================+
#: | CSS3 syntax | ``a::before`` | ``'before'`` |
#: +-------------------------+----------------+--------------------------------+
#: | Older syntax | ``a:before`` | ``'before'`` |
#: +-------------------------+----------------+--------------------------------+
#: | From the Lists3_ draft, | ``li::marker`` | ``'marker'`` |
#: | not in Selectors3 | | |
#: +-------------------------+----------------+--------------------------------+
#: | Invalid pseudo-class | ``li:marker`` | ``None`` |
#: +-------------------------+----------------+--------------------------------+
#: | Functinal | ``a::foo(2)`` | ``FunctionalPseudoElement(…)`` |
#: +-------------------------+----------------+--------------------------------+
#:
# : .. _Lists3: http://www.w3.org/TR/2011/WD-css3-lists-20110524/#marker-pseudoelement
self.pseudo_element = pseudo_element
def __repr__(self):
if isinstance(self.pseudo_element, FunctionalPseudoElement):
pseudo_element = repr(self.pseudo_element)
if self.pseudo_element:
pseudo_element = '::%s' % self.pseudo_element
else:
pseudo_element = ''
return '%s[%r%s]' % (
self.__class__.__name__, self.parsed_tree, pseudo_element)
def specificity(self):
"""Return the specificity_ of this selector as a tuple of 3 integers.
.. _specificity: http://www.w3.org/TR/selectors/#specificity
"""
a, b, c = self.parsed_tree.specificity()
if self.pseudo_element:
c += 1
return a, b, c
class Class(object):
"""
Represents selector.class_name
"""
def __init__(self, selector, class_name):
self.selector = selector
self.class_name = class_name
def __repr__(self):
return '%s[%r.%s]' % (
self.__class__.__name__, self.selector, self.class_name)
def specificity(self):
a, b, c = self.selector.specificity()
b += 1
return a, b, c
class FunctionalPseudoElement(object):
"""
Represents selector::name(arguments)
.. attribute:: name
The name (identifier) of the pseudo-element, as a string.
.. attribute:: arguments
The arguments of the pseudo-element, as a list of tokens.
**Note:** tokens are not part of the public API,
and may change between versions.
Use at your own risks.
"""
def __init__(self, name, arguments):
self.name = ascii_lower(name)
self.arguments = arguments
def __repr__(self):
return '%s[::%s(%s)]' % (
self.__class__.__name__, self.name,
urepr([token.value for token in self.arguments]))
def argument_types(self):
return [token.type for token in self.arguments]
def specificity(self):
a, b, c = self.selector.specificity()
b += 1
return a, b, c
class Function(object):
"""
Represents selector:name(expr)
"""
def __init__(self, selector, name, arguments):
self.selector = selector
self.name = ascii_lower(name)
self.arguments = arguments
self._parsed_arguments = None
def __repr__(self):
return '%s[%r:%s(%s)]' % (
self.__class__.__name__, self.selector, self.name,
urepr([token.value for token in self.arguments]))
def argument_types(self):
return [token.type for token in self.arguments]
@property
def parsed_arguments(self):
if self._parsed_arguments is None:
try:
self._parsed_arguments = parse_series(self.arguments)
except ValueError:
raise ExpressionError("Invalid series: '%r'" % self.arguments)
return self._parsed_arguments
def parse_arguments(self):
if not self.arguments_parsed:
self.arguments_parsed = True
def specificity(self):
a, b, c = self.selector.specificity()
b += 1
return a, b, c
class Pseudo(object):
"""
Represents selector:ident
"""
def __init__(self, selector, ident):
self.selector = selector
self.ident = ascii_lower(ident)
def __repr__(self):
return '%s[%r:%s]' % (
self.__class__.__name__, self.selector, self.ident)
def specificity(self):
a, b, c = self.selector.specificity()
b += 1
return a, b, c
class Negation(object):
"""
Represents selector:not(subselector)
"""
def __init__(self, selector, subselector):
self.selector = selector
self.subselector = subselector
def __repr__(self):
return '%s[%r:not(%r)]' % (
self.__class__.__name__, self.selector, self.subselector)
def specificity(self):
a1, b1, c1 = self.selector.specificity()
a2, b2, c2 = self.subselector.specificity()
return a1 + a2, b1 + b2, c1 + c2
class Attrib(object):
"""
Represents selector[namespace|attrib operator value]
"""
def __init__(self, selector, namespace, attrib, operator, value):
self.selector = selector
self.namespace = namespace
self.attrib = attrib
self.operator = operator
self.value = value
def __repr__(self):
if self.namespace:
attrib = '%s|%s' % (self.namespace, self.attrib)
else:
attrib = self.attrib
if self.operator == 'exists':
return '%s[%r[%s]]' % (
self.__class__.__name__, self.selector, attrib)
else:
return '%s[%r[%s %s %s]]' % (
self.__class__.__name__, self.selector, attrib,
self.operator, urepr(self.value))
def specificity(self):
a, b, c = self.selector.specificity()
b += 1
return a, b, c
class Element(object):
"""
Represents namespace|element
`None` is for the universal selector '*'
"""
def __init__(self, namespace=None, element=None):
self.namespace = namespace
self.element = element
def __repr__(self):
element = self.element or '*'
if self.namespace:
element = '%s|%s' % (self.namespace, element)
return '%s[%s]' % (self.__class__.__name__, element)
def specificity(self):
if self.element:
return 0, 0, 1
else:
return 0, 0, 0
class Hash(object):
"""
Represents selector#id
"""
def __init__(self, selector, id):
self.selector = selector
self.id = id
def __repr__(self):
return '%s[%r#%s]' % (
self.__class__.__name__, self.selector, self.id)
def specificity(self):
a, b, c = self.selector.specificity()
a += 1
return a, b, c
class CombinedSelector(object):
def __init__(self, selector, combinator, subselector):
assert selector is not None
self.selector = selector
self.combinator = combinator
self.subselector = subselector
def __repr__(self):
if self.combinator == ' ':
comb = '<followed>'
else:
comb = self.combinator
return '%s[%r %s %r]' % (
self.__class__.__name__, self.selector, comb, self.subselector)
def specificity(self):
a1, b1, c1 = self.selector.specificity()
a2, b2, c2 = self.subselector.specificity()
return a1 + a2, b1 + b2, c1 + c2
# Parser
# foo
_el_re = re.compile(r'^[ \t\r\n\f]*([a-zA-Z]+)[ \t\r\n\f]*$')
# foo#bar or #bar
_id_re = re.compile(r'^[ \t\r\n\f]*([a-zA-Z]*)#([a-zA-Z0-9_-]+)[ \t\r\n\f]*$')
# foo.bar or .bar
_class_re = re.compile(
r'^[ \t\r\n\f]*([a-zA-Z]*)\.([a-zA-Z][a-zA-Z0-9_-]*)[ \t\r\n\f]*$')
def parse(css):
"""Parse a CSS *group of selectors*.
:param css:
A *group of selectors* as an Unicode string.
:raises:
:class:`SelectorSyntaxError` on invalid selectors.
:returns:
A list of parsed :class:`Selector` objects, one for each
selector in the comma-separated group.
"""
# Fast path for simple cases
match = _el_re.match(css)
if match:
return [Selector(Element(element=match.group(1)))]
match = _id_re.match(css)
if match is not None:
return [Selector(Hash(Element(element=match.group(1) or None),
match.group(2)))]
match = _class_re.match(css)
if match is not None:
return [Selector(Class(Element(element=match.group(1) or None),
match.group(2)))]
stream = TokenStream(tokenize(css))
stream.source = css
return list(parse_selector_group(stream))
# except SelectorSyntaxError:
# e = sys.exc_info()[1]
# message = "%s at %s -> %r" % (
# e, stream.used, stream.peek())
# e.msg = message
# e.args = tuple([message])
# raise
def parse_selector_group(stream):
stream.skip_whitespace()
while 1:
yield Selector(*parse_selector(stream))
if stream.peek() == ('DELIM', ','):
stream.next()
stream.skip_whitespace()
else:
break
def parse_selector(stream):
result, pseudo_element = parse_simple_selector(stream)
while 1:
stream.skip_whitespace()
peek = stream.peek()
if peek in (('EOF', None), ('DELIM', ',')):
break
if pseudo_element:
raise SelectorSyntaxError(
'Got pseudo-element ::%s not at the end of a selector'
% pseudo_element)
if peek.is_delim('+', '>', '~'):
# A combinator
combinator = stream.next().value
stream.skip_whitespace()
else:
# By exclusion, the last parse_simple_selector() ended
# at peek == ' '
combinator = ' '
next_selector, pseudo_element = parse_simple_selector(stream)
result = CombinedSelector(result, combinator, next_selector)
return result, pseudo_element
special_pseudo_elements = (
'first-line', 'first-letter', 'before', 'after')
def parse_simple_selector(stream, inside_negation=False):
stream.skip_whitespace()
selector_start = len(stream.used)
peek = stream.peek()
if peek.type == 'IDENT' or peek == ('DELIM', '*'):
if peek.type == 'IDENT':
namespace = stream.next().value
else:
stream.next()
namespace = None
if stream.peek() == ('DELIM', '|'):
stream.next()
element = stream.next_ident_or_star()
else:
element = namespace
namespace = None
else:
element = namespace = None
result = Element(namespace, element)
pseudo_element = None
while 1:
peek = stream.peek()
if peek.type in ('S', 'EOF') or peek.is_delim(',', '+', '>', '~') or (
inside_negation and peek == ('DELIM', ')')):
break
if pseudo_element:
raise SelectorSyntaxError(
'Got pseudo-element ::%s not at the end of a selector'
% pseudo_element)
if peek.type == 'HASH':
result = Hash(result, stream.next().value)
elif peek == ('DELIM', '.'):
stream.next()
result = Class(result, stream.next_ident())
elif peek == ('DELIM', '['):
stream.next()
result = parse_attrib(result, stream)
elif peek == ('DELIM', ':'):
stream.next()
if stream.peek() == ('DELIM', ':'):
stream.next()
pseudo_element = stream.next_ident()
if stream.peek() == ('DELIM', '('):
stream.next()
pseudo_element = FunctionalPseudoElement(
pseudo_element, parse_arguments(stream))
continue
ident = stream.next_ident()
if ident.lower() in special_pseudo_elements:
# Special case: CSS 2.1 pseudo-elements can have a single ':'
# Any new pseudo-element must have two.
pseudo_element = unicode_type(ident)
continue
if stream.peek() != ('DELIM', '('):
result = Pseudo(result, ident)
continue
stream.next()
stream.skip_whitespace()
if ident.lower() == 'not':
if inside_negation:
raise SelectorSyntaxError('Got nested :not()')
argument, argument_pseudo_element = parse_simple_selector(
stream, inside_negation=True)
next = stream.next()
if argument_pseudo_element:
raise SelectorSyntaxError(
'Got pseudo-element ::%s inside :not() at %s'
% (argument_pseudo_element, next.pos))
if next != ('DELIM', ')'):
raise SelectorSyntaxError("Expected ')', got %s" % (next,))
result = Negation(result, argument)
else:
result = Function(result, ident, parse_arguments(stream))
else:
raise SelectorSyntaxError(
"Expected selector, got %s" % (peek,))
if len(stream.used) == selector_start:
raise SelectorSyntaxError(
"Expected selector, got %s" % (stream.peek(),))
return result, pseudo_element
def parse_arguments(stream):
arguments = []
while 1:
stream.skip_whitespace()
next = stream.next()
if next.type in ('IDENT', 'STRING', 'NUMBER') or next in [
('DELIM', '+'), ('DELIM', '-')]:
arguments.append(next)
elif next == ('DELIM', ')'):
return arguments
else:
raise SelectorSyntaxError(
"Expected an argument, got %s" % (next,))
def parse_attrib(selector, stream):
stream.skip_whitespace()
attrib = stream.next_ident_or_star()
if attrib is None and stream.peek() != ('DELIM', '|'):
raise SelectorSyntaxError(
"Expected '|', got %s" % (stream.peek(),))
if stream.peek() == ('DELIM', '|'):
stream.next()
if stream.peek() == ('DELIM', '='):
namespace = None
stream.next()
op = '|='
else:
namespace = attrib
attrib = stream.next_ident()
op = None
else:
namespace = op = None
if op is None:
stream.skip_whitespace()
next = stream.next()
if next == ('DELIM', ']'):
return Attrib(selector, namespace, attrib, 'exists', None)
elif next == ('DELIM', '='):
op = '='
elif next.is_delim('^', '$', '*', '~', '|', '!') and (
stream.peek() == ('DELIM', '=')):
op = next.value + '='
stream.next()
else:
raise SelectorSyntaxError(
"Operator expected, got %s" % (next,))
stream.skip_whitespace()
value = stream.next()
if value.type not in ('IDENT', 'STRING'):
raise SelectorSyntaxError(
"Expected string or ident, got %s" % (value,))
stream.skip_whitespace()
next = stream.next()
if next != ('DELIM', ']'):
raise SelectorSyntaxError(
"Expected ']', got %s" % (next,))
return Attrib(selector, namespace, attrib, op, value.value)
def parse_series(tokens):
"""
Parses the arguments for :nth-child() and friends.
:raises: A list of tokens
:returns: :``(a, b)``
"""
for token in tokens:
if token.type == 'STRING':
raise ValueError('String tokens not allowed in series.')
s = ''.join(token.value for token in tokens).strip()
if s == 'odd':
return (2, 1)
elif s == 'even':
return (2, 0)
elif s == 'n':
return (1, 0)
if 'n' not in s:
# Just b
return (0, int(s))
a, b = s.split('n', 1)
if not a:
a = 1
elif a == '-' or a == '+':
a = int(a+'1')
else:
a = int(a)
if not b:
b = 0
else:
b = int(b)
return (a, b)
# Token objects
class Token(tuple):
def __new__(cls, type_, value, pos):
obj = tuple.__new__(cls, (type_, value))
obj.pos = pos
return obj
def __repr__(self):
return "<%s '%s' at %i>" % (self.type, self.value, self.pos)
def is_delim(self, *values):
return self.type == 'DELIM' and self.value in values
type = property(operator.itemgetter(0))
value = property(operator.itemgetter(1))
class EOFToken(Token):
def __new__(cls, pos):
return Token.__new__(cls, 'EOF', None, pos)
def __repr__(self):
return '<%s at %i>' % (self.type, self.pos)
# Tokenizer
class TokenMacros:
unicode_escape = r'\\([0-9a-f]{1,6})(?:\r\n|[ \n\r\t\f])?'
escape = unicode_escape + r'|\\[^\n\r\f0-9a-f]'
string_escape = r'\\(?:\n|\r\n|\r|\f)|' + escape
nonascii = r'[^\0-\177]'
nmchar = '[_a-z0-9-]|%s|%s' % (escape, nonascii)
nmstart = '[_a-z]|%s|%s' % (escape, nonascii)
def _compile(pattern):
return re.compile(pattern % vars(TokenMacros), re.IGNORECASE).match
_match_whitespace = _compile(r'[ \t\r\n\f]+')
_match_number = _compile(r'[+-]?(?:[0-9]*\.[0-9]+|[0-9]+)')
_match_hash = _compile('#(?:%(nmchar)s)+')
_match_ident = _compile('-?(?:%(nmstart)s)(?:%(nmchar)s)*')
_match_string_by_quote = {
"'": _compile(r"([^\n\r\f\\']|%(string_escape)s)*"),
'"': _compile(r'([^\n\r\f\\"]|%(string_escape)s)*'),
}
_sub_simple_escape = re.compile(r'\\(.)').sub
_sub_unicode_escape = re.compile(TokenMacros.unicode_escape, re.I).sub
_sub_newline_escape =re.compile(r'\\(?:\n|\r\n|\r|\f)').sub
# Same as r'\1', but faster on CPython
if hasattr(operator, 'methodcaller'):
# Python 2.6+
_replace_simple = operator.methodcaller('group', 1)
else:
def _replace_simple(match):
return match.group(1)
def _replace_unicode(match):
codepoint = int(match.group(1), 16)
if codepoint > sys.maxunicode:
codepoint = 0xFFFD
return codepoint_to_chr(codepoint)
def unescape_ident(value):
value = _sub_unicode_escape(_replace_unicode, value)
value = _sub_simple_escape(_replace_simple, value)
return value
def tokenize(s):
pos = 0
len_s = len(s)
while pos < len_s:
match = _match_whitespace(s, pos=pos)
if match:
yield Token('S', ' ', pos)
pos = match.end()
continue
match = _match_ident(s, pos=pos)
if match:
value = _sub_simple_escape(_replace_simple,
_sub_unicode_escape(_replace_unicode, match.group()))
yield Token('IDENT', value, pos)
pos = match.end()
continue
match = _match_hash(s, pos=pos)
if match:
value = _sub_simple_escape(_replace_simple,
_sub_unicode_escape(_replace_unicode, match.group()[1:]))
yield Token('HASH', value, pos)
pos = match.end()
continue
quote = s[pos]
if quote in _match_string_by_quote:
match = _match_string_by_quote[quote](s, pos=pos + 1)
assert match, 'Should have found at least an empty match'
end_pos = match.end()
if end_pos == len_s:
raise SelectorSyntaxError('Unclosed string at %s' % pos)
if s[end_pos] != quote:
raise SelectorSyntaxError('Invalid string at %s' % pos)
value = _sub_simple_escape(_replace_simple,
_sub_unicode_escape(_replace_unicode,
_sub_newline_escape('', match.group())))
yield Token('STRING', value, pos)
pos = end_pos + 1
continue
match = _match_number(s, pos=pos)
if match:
value = match.group()
yield Token('NUMBER', value, pos)
pos = match.end()
continue
pos2 = pos + 2
if s[pos:pos2] == '/*':
pos = s.find('*/', pos2)
if pos == -1:
pos = len_s
else:
pos += 2
continue
yield Token('DELIM', s[pos], pos)
pos += 1
assert pos == len_s
yield EOFToken(pos)
class TokenStream(object):
def __init__(self, tokens, source=None):
self.used = []
self.tokens = iter(tokens)
self.source = source
self.peeked = None
self._peeking = False
try:
self.next_token = self.tokens.next
except AttributeError:
# Python 3
self.next_token = self.tokens.__next__
def next(self):
if self._peeking:
self._peeking = False
self.used.append(self.peeked)
return self.peeked
else:
next = self.next_token()
self.used.append(next)
return next
def peek(self):
if not self._peeking:
self.peeked = self.next_token()
self._peeking = True
return self.peeked
def next_ident(self):
next = self.next()
if next.type != 'IDENT':
raise SelectorSyntaxError('Expected ident, got %s' % (next,))
return next.value
def next_ident_or_star(self):
next = self.next()
if next.type == 'IDENT':
return next.value
elif next == ('DELIM', '*'):
return None
else:
raise SelectorSyntaxError(
"Expected ident or '*', got %s" % (next,))
def skip_whitespace(self):
peek = self.peek()
if peek.type == 'S':
self.next()

View File

@@ -0,0 +1,694 @@
#!/usr/bin/env python2
# vim:fileencoding=utf-8
from __future__ import absolute_import, division, print_function, unicode_literals
__license__ = 'GPL v3'
__copyright__ = '2015, Kovid Goyal <kovid at kovidgoyal.net>'
import re, itertools
from collections import OrderedDict, defaultdict
from functools import wraps
from itertools import chain
from lxml import etree
from css_selectors.errors import ExpressionError
from css_selectors.parser import parse, ascii_lower, Element, FunctionalPseudoElement
from css_selectors.ordered_set import OrderedSet
from polyglot.builtins import iteritems, itervalues
PARSE_CACHE_SIZE = 200
parse_cache = OrderedDict()
XPATH_CACHE_SIZE = 30
xpath_cache = OrderedDict()
# Test that the string is not empty and does not contain whitespace
is_non_whitespace = re.compile(r'^[^ \t\r\n\f]+$').match
def get_parsed_selector(raw):
try:
return parse_cache[raw]
except KeyError:
parse_cache[raw] = ans = parse(raw)
if len(parse_cache) > PARSE_CACHE_SIZE:
parse_cache.pop(next(iter(parse_cache)))
return ans
def get_compiled_xpath(expr):
try:
return xpath_cache[expr]
except KeyError:
xpath_cache[expr] = ans = etree.XPath(expr)
if len(xpath_cache) > XPATH_CACHE_SIZE:
xpath_cache.pop(next(iter(xpath_cache)))
return ans
class AlwaysIn(object):
def __contains__(self, x):
return True
always_in = AlwaysIn()
def trace_wrapper(func):
@wraps(func)
def trace(*args, **kwargs):
targs = args[1:] if args and isinstance(args[0], Select) else args
print('Called:', func.__name__, 'with args:', targs, kwargs or '')
return func(*args, **kwargs)
return trace
def normalize_language_tag(tag):
"""Return a list of normalized combinations for a `BCP 47` language tag.
Example:
>>> normalize_language_tag('de_AT-1901')
['de-at-1901', 'de-at', 'de-1901', 'de']
"""
# normalize:
tag = ascii_lower(tag).replace('_','-')
# split (except singletons, which mark the following tag as non-standard):
tag = re.sub(r'-([a-zA-Z0-9])-', r'-\1_', tag)
subtags = [subtag.replace('_', '-') for subtag in tag.split('-')]
base_tag = (subtags.pop(0),)
taglist = {base_tag[0]}
# find all combinations of subtags
for n in range(len(subtags), 0, -1):
for tags in itertools.combinations(subtags, n):
taglist.add('-'.join(base_tag + tags))
return taglist
INAPPROPRIATE_PSEUDO_CLASSES = frozenset((
'active', 'after', 'disabled', 'visited', 'link', 'before', 'focus', 'first-letter', 'enabled', 'first-line', 'hover', 'checked', 'target'))
class Select(object):
'''
This class implements CSS Level 3 selectors
(http://www.w3.org/TR/css3-selectors) on an lxml tree, with caching for
performance. To use:
>>> from css_selectors import Select
>>> select = Select(root) # Where root is an lxml document
>>> print(tuple(select('p.myclass')))
Tags are returned in document order. Note that attribute and tag names are
matched case-insensitively. Class and id values are also matched
case-insensitively. Also namespaces are ignored (this is for performance of
the common case). The UI related selectors are not implemented, such as
:enabled, :disabled, :checked, :hover, etc. Similarly, the non-element
related selectors such as ::first-line, ::first-letter, ::before, etc. are
not implemented.
WARNING: This class uses internal caches. You *must not* make any changes
to the lxml tree. If you do make some changes, either create a new Select
object or call :meth:`invalidate_caches`.
This class can be easily sub-classed to work with tree implementations
other than lxml. Simply override the methods in the ``Tree Integration``
block below.
The caching works by maintaining internal maps from classes/ids/tag
names/etc. to node sets. These caches are populated as needed, and used
for all subsequent selections. Thus, for best performance you should use
the same selector object for finding the matching nodes for multiple
queries. Of course, remember not to change the tree in between queries.
'''
combinator_mapping = {
' ': 'descendant',
'>': 'child',
'+': 'direct_adjacent',
'~': 'indirect_adjacent',
}
attribute_operator_mapping = {
'exists': 'exists',
'=': 'equals',
'~=': 'includes',
'|=': 'dashmatch',
'^=': 'prefixmatch',
'$=': 'suffixmatch',
'*=': 'substringmatch',
}
def __init__(self, root, default_lang=None, ignore_inappropriate_pseudo_classes=False, dispatch_map=None, trace=False):
if hasattr(root, 'getroot'):
root = root.getroot()
self.root = root
self.dispatch_map = dispatch_map or default_dispatch_map
self.invalidate_caches()
self.default_lang = default_lang
if trace:
self.dispatch_map = {k:trace_wrapper(v) for k, v in iteritems(self.dispatch_map)}
if ignore_inappropriate_pseudo_classes:
self.ignore_inappropriate_pseudo_classes = INAPPROPRIATE_PSEUDO_CLASSES
else:
self.ignore_inappropriate_pseudo_classes = frozenset()
# External API {{{
def invalidate_caches(self):
'Invalidate all caches. You must call this before using this object if you have made changes to the HTML tree'
self._element_map = None
self._id_map = None
self._class_map = None
self._attrib_map = None
self._attrib_space_map = None
self._lang_map = None
self.map_tag_name = ascii_lower
if '{' in self.root.tag:
def map_tag_name(x):
return ascii_lower(x.rpartition('}')[2])
self.map_tag_name = map_tag_name
def __call__(self, selector, root=None):
''' Return an iterator over all matching tags, in document order.
Normally, all matching tags in the document are returned, is you
specify root, then only tags that are root or descendants of root are
returned. Note that this can be very expensive if root has a lot of
descendants. '''
seen = set()
if root is not None:
root = frozenset(self.itertag(root))
for parsed_selector in get_parsed_selector(selector):
for item in self.iterparsedselector(parsed_selector):
if item not in seen and (root is None or item in root):
yield item
seen.add(item)
def has_matches(self, selector, root=None):
'Return True iff selector matches at least one item in the tree'
for elem in self(selector, root=root):
return True
return False
# }}}
def iterparsedselector(self, parsed_selector):
type_name = type(parsed_selector).__name__
try:
func = self.dispatch_map[ascii_lower(type_name)]
except KeyError:
raise ExpressionError('%s is not supported' % type_name)
for item in func(self, parsed_selector):
yield item
@property
def element_map(self):
if self._element_map is None:
self._element_map = em = defaultdict(OrderedSet)
for tag in self.itertag():
em[self.map_tag_name(tag.tag)].add(tag)
return self._element_map
@property
def id_map(self):
if self._id_map is None:
self._id_map = im = defaultdict(OrderedSet)
lower = ascii_lower
for elem in self.iteridtags():
im[lower(elem.get('id'))].add(elem)
return self._id_map
@property
def class_map(self):
if self._class_map is None:
self._class_map = cm = defaultdict(OrderedSet)
lower = ascii_lower
for elem in self.iterclasstags():
for cls in elem.get('class').split():
cm[lower(cls)].add(elem)
return self._class_map
@property
def attrib_map(self):
if self._attrib_map is None:
self._attrib_map = am = defaultdict(lambda : defaultdict(OrderedSet))
map_attrib_name = ascii_lower
if '{' in self.root.tag:
def map_attrib_name(x):
return ascii_lower(x.rpartition('}')[2])
for tag in self.itertag():
for attr, val in iteritems(tag.attrib):
am[map_attrib_name(attr)][val].add(tag)
return self._attrib_map
@property
def attrib_space_map(self):
if self._attrib_space_map is None:
self._attrib_space_map = am = defaultdict(lambda : defaultdict(OrderedSet))
map_attrib_name = ascii_lower
if '{' in self.root.tag:
def map_attrib_name(x):
return ascii_lower(x.rpartition('}')[2])
for tag in self.itertag():
for attr, val in iteritems(tag.attrib):
for v in val.split():
am[map_attrib_name(attr)][v].add(tag)
return self._attrib_space_map
@property
def lang_map(self):
if self._lang_map is None:
self._lang_map = lm = defaultdict(OrderedSet)
dl = normalize_language_tag(self.default_lang) if self.default_lang else None
lmap = {tag:dl for tag in self.itertag()} if dl else {}
for tag in self.itertag():
lang = None
for attr in ('{http://www.w3.org/XML/1998/namespace}lang', 'lang'):
lang = tag.get(attr)
if lang:
lang = normalize_language_tag(lang)
for dtag in self.itertag(tag):
lmap[dtag] = lang
for tag, langs in iteritems(lmap):
for lang in langs:
lm[lang].add(tag)
return self._lang_map
# Tree Integration {{{
def itertag(self, tag=None):
return (self.root if tag is None else tag).iter('*')
def iterdescendants(self, tag=None):
return (self.root if tag is None else tag).iterdescendants('*')
def iterchildren(self, tag=None):
return (self.root if tag is None else tag).iterchildren('*')
def itersiblings(self, tag=None, preceding=False):
return (self.root if tag is None else tag).itersiblings('*', preceding=preceding)
def iteridtags(self):
return get_compiled_xpath('//*[@id]')(self.root)
def iterclasstags(self):
return get_compiled_xpath('//*[@class]')(self.root)
def sibling_count(self, child, before=True, same_type=False):
' Return the number of siblings before or after child or raise ValueError if child has no parent. '
parent = child.getparent()
if parent is None:
raise ValueError('Child has no parent')
if same_type:
siblings = OrderedSet(child.itersiblings(preceding=before))
return len(self.element_map[self.map_tag_name(child.tag)] & siblings)
else:
if before:
return parent.index(child)
return len(parent) - parent.index(child) - 1
def all_sibling_count(self, child, same_type=False):
' Return the number of siblings of child or raise ValueError if child has no parent '
parent = child.getparent()
if parent is None:
raise ValueError('Child has no parent')
if same_type:
siblings = OrderedSet(chain(child.itersiblings(preceding=False), child.itersiblings(preceding=True)))
return len(self.element_map[self.map_tag_name(child.tag)] & siblings)
else:
return len(parent) - 1
def is_empty(self, elem):
' Return True iff elem has no child tags and no text content '
for child in elem:
# Check for comment/PI nodes with tail text
if child.tail:
return False
return len(tuple(elem.iterchildren('*'))) == 0 and not elem.text
# }}}
# Combinators {{{
def select_combinedselector(cache, combined):
"""Translate a combined selector."""
combinator = cache.combinator_mapping[combined.combinator]
# Fast path for when the sub-selector is all elements
right = None if isinstance(combined.subselector, Element) and (
combined.subselector.element or '*') == '*' else cache.iterparsedselector(combined.subselector)
for item in cache.dispatch_map[combinator](cache, cache.iterparsedselector(combined.selector), right):
yield item
def select_descendant(cache, left, right):
"""right is a child, grand-child or further descendant of left"""
right = always_in if right is None else frozenset(right)
for ancestor in left:
for descendant in cache.iterdescendants(ancestor):
if descendant in right:
yield descendant
def select_child(cache, left, right):
"""right is an immediate child of left"""
right = always_in if right is None else frozenset(right)
for parent in left:
for child in cache.iterchildren(parent):
if child in right:
yield child
def select_direct_adjacent(cache, left, right):
"""right is a sibling immediately after left"""
right = always_in if right is None else frozenset(right)
for parent in left:
for sibling in cache.itersiblings(parent):
if sibling in right:
yield sibling
break
def select_indirect_adjacent(cache, left, right):
"""right is a sibling after left, immediately or not"""
right = always_in if right is None else frozenset(right)
for parent in left:
for sibling in cache.itersiblings(parent):
if sibling in right:
yield sibling
# }}}
def select_element(cache, selector):
"""A type or universal selector."""
element = selector.element
if not element or element == '*':
for elem in cache.itertag():
yield elem
else:
for elem in cache.element_map[ascii_lower(element)]:
yield elem
def select_hash(cache, selector):
'An id selector'
items = cache.id_map[ascii_lower(selector.id)]
if len(items) > 0:
for elem in cache.iterparsedselector(selector.selector):
if elem in items:
yield elem
def select_class(cache, selector):
'A class selector'
items = cache.class_map[ascii_lower(selector.class_name)]
if items:
for elem in cache.iterparsedselector(selector.selector):
if elem in items:
yield elem
def select_negation(cache, selector):
'Implement :not()'
exclude = frozenset(cache.iterparsedselector(selector.subselector))
for item in cache.iterparsedselector(selector.selector):
if item not in exclude:
yield item
# Attribute selectors {{{
def select_attrib(cache, selector):
operator = cache.attribute_operator_mapping[selector.operator]
items = frozenset(cache.dispatch_map[operator](cache, ascii_lower(selector.attrib), selector.value))
for item in cache.iterparsedselector(selector.selector):
if item in items:
yield item
def select_exists(cache, attrib, value=None):
for elem_set in itervalues(cache.attrib_map[attrib]):
for elem in elem_set:
yield elem
def select_equals(cache, attrib, value):
for elem in cache.attrib_map[attrib][value]:
yield elem
def select_includes(cache, attrib, value):
if is_non_whitespace(value):
for elem in cache.attrib_space_map[attrib][value]:
yield elem
def select_dashmatch(cache, attrib, value):
if value:
for val, elem_set in iteritems(cache.attrib_map[attrib]):
if val == value or val.startswith(value + '-'):
for elem in elem_set:
yield elem
def select_prefixmatch(cache, attrib, value):
if value:
for val, elem_set in iteritems(cache.attrib_map[attrib]):
if val.startswith(value):
for elem in elem_set:
yield elem
def select_suffixmatch(cache, attrib, value):
if value:
for val, elem_set in iteritems(cache.attrib_map[attrib]):
if val.endswith(value):
for elem in elem_set:
yield elem
def select_substringmatch(cache, attrib, value):
if value:
for val, elem_set in iteritems(cache.attrib_map[attrib]):
if value in val:
for elem in elem_set:
yield elem
# }}}
# Function selectors {{{
def select_function(cache, function):
"""Select with a functional pseudo-class."""
fname = function.name.replace('-', '_')
try:
func = cache.dispatch_map[fname]
except KeyError:
raise ExpressionError(
"The pseudo-class :%s() is unknown" % function.name)
if fname == 'lang':
items = frozenset(func(cache, function))
for item in cache.iterparsedselector(function.selector):
if item in items:
yield item
else:
for item in cache.iterparsedselector(function.selector):
if func(cache, function, item):
yield item
def select_lang(cache, function):
' Implement :lang() '
if function.argument_types() not in (['STRING'], ['IDENT']):
raise ExpressionError("Expected a single string or ident for :lang(), got %r" % function.arguments)
lang = function.arguments[0].value
if lang:
lang = ascii_lower(lang)
lp = lang + '-'
for tlang, elem_set in iteritems(cache.lang_map):
if tlang == lang or (tlang is not None and tlang.startswith(lp)):
for elem in elem_set:
yield elem
def select_nth_child(cache, function, elem):
' Implement :nth-child() '
a, b = function.parsed_arguments
try:
num = cache.sibling_count(elem) + 1
except ValueError:
return False
if a == 0:
return num == b
n = (num - b) / a
return n.is_integer() and n > -1
def select_nth_last_child(cache, function, elem):
' Implement :nth-last-child() '
a, b = function.parsed_arguments
try:
num = cache.sibling_count(elem, before=False) + 1
except ValueError:
return False
if a == 0:
return num == b
n = (num - b) / a
return n.is_integer() and n > -1
def select_nth_of_type(cache, function, elem):
' Implement :nth-of-type() '
a, b = function.parsed_arguments
try:
num = cache.sibling_count(elem, same_type=True) + 1
except ValueError:
return False
if a == 0:
return num == b
n = (num - b) / a
return n.is_integer() and n > -1
def select_nth_last_of_type(cache, function, elem):
' Implement :nth-last-of-type() '
a, b = function.parsed_arguments
try:
num = cache.sibling_count(elem, before=False, same_type=True) + 1
except ValueError:
return False
if a == 0:
return num == b
n = (num - b) / a
return n.is_integer() and n > -1
# }}}
# Pseudo elements {{{
def pseudo_func(f):
f.is_pseudo = True
return f
@pseudo_func
def allow_all(cache, item):
return True
def get_func_for_pseudo(cache, ident):
try:
func = cache.dispatch_map[ident.replace('-', '_')]
except KeyError:
if ident in cache.ignore_inappropriate_pseudo_classes:
func = allow_all
else:
raise ExpressionError(
"The pseudo-class :%s is not supported" % ident)
try:
func.is_pseudo
except AttributeError:
raise ExpressionError(
"The pseudo-class :%s is invalid" % ident)
return func
def select_selector(cache, selector):
if selector.pseudo_element is None:
for item in cache.iterparsedselector(selector.parsed_tree):
yield item
return
if isinstance(selector.pseudo_element, FunctionalPseudoElement):
raise ExpressionError(
"The pseudo-element ::%s is not supported" % selector.pseudo_element.name)
func = get_func_for_pseudo(cache, selector.pseudo_element)
for item in cache.iterparsedselector(selector.parsed_tree):
if func(cache, item):
yield item
def select_pseudo(cache, pseudo):
func = get_func_for_pseudo(cache, pseudo.ident)
if func is select_root:
yield cache.root
return
for item in cache.iterparsedselector(pseudo.selector):
if func(cache, item):
yield item
@pseudo_func
def select_root(cache, elem):
return elem is cache.root
@pseudo_func
def select_first_child(cache, elem):
try:
return cache.sibling_count(elem) == 0
except ValueError:
return False
@pseudo_func
def select_last_child(cache, elem):
try:
return cache.sibling_count(elem, before=False) == 0
except ValueError:
return False
@pseudo_func
def select_only_child(cache, elem):
try:
return cache.all_sibling_count(elem) == 0
except ValueError:
return False
@pseudo_func
def select_first_of_type(cache, elem):
try:
return cache.sibling_count(elem, same_type=True) == 0
except ValueError:
return False
@pseudo_func
def select_last_of_type(cache, elem):
try:
return cache.sibling_count(elem, before=False, same_type=True) == 0
except ValueError:
return False
@pseudo_func
def select_only_of_type(cache, elem):
try:
return cache.all_sibling_count(elem, same_type=True) == 0
except ValueError:
return False
@pseudo_func
def select_empty(cache, elem):
return cache.is_empty(elem)
# }}}
default_dispatch_map = {name.partition('_')[2]:obj for name, obj in globals().items() if name.startswith('select_') and callable(obj)}
if __name__ == '__main__':
from pprint import pprint
root = etree.fromstring(
'<body xmlns="xxx" xml:lang="en"><p id="p" class="one two" lang="fr"><a id="a"/><b/><c/><d/></p></body>',
parser=etree.XMLParser(recover=True, no_network=True, resolve_entities=False))
select = Select(root, ignore_inappropriate_pseudo_classes=True, trace=True)
pprint(list(select('p:disabled')))

View File

@@ -0,0 +1,843 @@
#!/usr/bin/env python2
# vim:fileencoding=utf-8
from __future__ import absolute_import, division, print_function, unicode_literals
__license__ = 'GPL v3'
__copyright__ = '2015, Kovid Goyal <kovid at kovidgoyal.net>'
import unittest, sys, argparse
from lxml import etree, html
from css_selectors.errors import SelectorSyntaxError, ExpressionError
from css_selectors.parser import tokenize, parse
from css_selectors.select import Select
class TestCSSSelectors(unittest.TestCase):
# Test data {{{
HTML_IDS = '''
<html id="html"><head>
<link id="link-href" href="foo" />
<link id="link-nohref" />
</head><body>
<div id="outer-div">
<a id="name-anchor" name="foo"></a>
<a id="tag-anchor" rel="tag" href="http://localhost/foo">link</a>
<a id="nofollow-anchor" rel="nofollow" href="https://example.org">
link</a>
<ol id="first-ol" class="a b c">
<li id="first-li">content</li>
<li id="second-li" lang="En-us">
<div id="li-div">
</div>
</li>
<li id="third-li" class="ab c"></li>
<li id="fourth-li" class="ab
c"></li>
<li id="fifth-li"></li>
<li id="sixth-li"></li>
<li id="seventh-li"> </li>
</ol>
<p id="paragraph">
<b id="p-b">hi</b> <em id="p-em">there</em>
<b id="p-b2">guy</b>
<input type="checkbox" id="checkbox-unchecked" />
<input type="checkbox" id="checkbox-disabled" disabled="" />
<input type="text" id="text-checked" checked="checked" />
<input type="hidden" />
<input type="hidden" disabled="disabled" />
<input type="checkbox" id="checkbox-checked" checked="checked" />
<input type="checkbox" id="checkbox-disabled-checked"
disabled="disabled" checked="checked" />
<fieldset id="fieldset" disabled="disabled">
<input type="checkbox" id="checkbox-fieldset-disabled" />
<input type="hidden" />
</fieldset>
</p>
<ol id="second-ol">
</ol>
<map name="dummymap">
<area shape="circle" coords="200,250,25" href="foo.html" id="area-href" />
<area shape="default" id="area-nohref" />
</map>
</div>
<div id="foobar-div" foobar="ab bc
cde"><span id="foobar-span"></span></div>
</body></html>
'''
HTML_SHAKESPEARE = '''
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN"
"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en" debug="true">
<head>
<meta http-equiv="Content-Type" content="text/html; charset=utf-8"/>
</head>
<body>
<div id="test">
<div class="dialog">
<h2>As You Like It</h2>
<div id="playwright">
by William Shakespeare
</div>
<div class="dialog scene thirdClass" id="scene1">
<h3>ACT I, SCENE III. A room in the palace.</h3>
<div class="dialog">
<div class="direction">Enter CELIA and ROSALIND</div>
</div>
<div id="speech1" class="character">CELIA</div>
<div class="dialog">
<div id="scene1.3.1">Why, cousin! why, Rosalind! Cupid have mercy! not a word?</div>
</div>
<div id="speech2" class="character">ROSALIND</div>
<div class="dialog">
<div id="scene1.3.2">Not one to throw at a dog.</div>
</div>
<div id="speech3" class="character">CELIA</div>
<div class="dialog">
<div id="scene1.3.3">No, thy words are too precious to be cast away upon</div>
<div id="scene1.3.4">curs; throw some of them at me; come, lame me with reasons.</div>
</div>
<div id="speech4" class="character">ROSALIND</div>
<div id="speech5" class="character">CELIA</div>
<div class="dialog">
<div id="scene1.3.8">But is all this for your father?</div>
</div>
<div class="dialog">
<div id="scene1.3.5">Then there were two cousins laid up; when the one</div>
<div id="scene1.3.6">should be lamed with reasons and the other mad</div>
<div id="scene1.3.7">without any.</div>
</div>
<div id="speech6" class="character">ROSALIND</div>
<div class="dialog">
<div id="scene1.3.9">No, some of it is for my child's father. O, how</div>
<div id="scene1.3.10">full of briers is this working-day world!</div>
</div>
<div id="speech7" class="character">CELIA</div>
<div class="dialog">
<div id="scene1.3.11">They are but burs, cousin, thrown upon thee in</div>
<div id="scene1.3.12">holiday foolery: if we walk not in the trodden</div>
<div id="scene1.3.13">paths our very petticoats will catch them.</div>
</div>
<div id="speech8" class="character">ROSALIND</div>
<div class="dialog">
<div id="scene1.3.14">I could shake them off my coat: these burs are in my heart.</div>
</div>
<div id="speech9" class="character">CELIA</div>
<div class="dialog">
<div id="scene1.3.15">Hem them away.</div>
</div>
<div id="speech10" class="character">ROSALIND</div>
<div class="dialog">
<div id="scene1.3.16">I would try, if I could cry 'hem' and have him.</div>
</div>
<div id="speech11" class="character">CELIA</div>
<div class="dialog">
<div id="scene1.3.17">Come, come, wrestle with thy affections.</div>
</div>
<div id="speech12" class="character">ROSALIND</div>
<div class="dialog">
<div id="scene1.3.18">O, they take the part of a better wrestler than myself!</div>
</div>
<div id="speech13" class="character">CELIA</div>
<div class="dialog">
<div id="scene1.3.19">O, a good wish upon you! you will try in time, in</div>
<div id="scene1.3.20">despite of a fall. But, turning these jests out of</div>
<div id="scene1.3.21">service, let us talk in good earnest: is it</div>
<div id="scene1.3.22">possible, on such a sudden, you should fall into so</div>
<div id="scene1.3.23">strong a liking with old Sir Rowland's youngest son?</div>
</div>
<div id="speech14" class="character">ROSALIND</div>
<div class="dialog">
<div id="scene1.3.24">The duke my father loved his father dearly.</div>
</div>
<div id="speech15" class="character">CELIA</div>
<div class="dialog">
<div id="scene1.3.25">Doth it therefore ensue that you should love his son</div>
<div id="scene1.3.26">dearly? By this kind of chase, I should hate him,</div>
<div id="scene1.3.27">for my father hated his father dearly; yet I hate</div>
<div id="scene1.3.28">not Orlando.</div>
</div>
<div id="speech16" class="character">ROSALIND</div>
<div title="wtf" class="dialog">
<div id="scene1.3.29">No, faith, hate him not, for my sake.</div>
</div>
<div id="speech17" class="character">CELIA</div>
<div class="dialog">
<div id="scene1.3.30">Why should I not? doth he not deserve well?</div>
</div>
<div id="speech18" class="character">ROSALIND</div>
<div class="dialog">
<div id="scene1.3.31">Let me love him for that, and do you love him</div>
<div id="scene1.3.32">because I do. Look, here comes the duke.</div>
</div>
<div id="speech19" class="character">CELIA</div>
<div class="dialog">
<div id="scene1.3.33">With his eyes full of anger.</div>
<div class="direction">Enter DUKE FREDERICK, with Lords</div>
</div>
<div id="speech20" class="character">DUKE FREDERICK</div>
<div class="dialog">
<div id="scene1.3.34">Mistress, dispatch you with your safest haste</div>
<div id="scene1.3.35">And get you from our court.</div>
</div>
<div id="speech21" class="character">ROSALIND</div>
<div class="dialog">
<div id="scene1.3.36">Me, uncle?</div>
</div>
<div id="speech22" class="character">DUKE FREDERICK</div>
<div class="dialog">
<div id="scene1.3.37">You, cousin</div>
<div id="scene1.3.38">Within these ten days if that thou be'st found</div>
<div id="scene1.3.39">So near our public court as twenty miles,</div>
<div id="scene1.3.40">Thou diest for it.</div>
</div>
<div id="speech23" class="character">ROSALIND</div>
<div class="dialog">
<div id="scene1.3.41"> I do beseech your grace,</div>
<div id="scene1.3.42">Let me the knowledge of my fault bear with me:</div>
<div id="scene1.3.43">If with myself I hold intelligence</div>
<div id="scene1.3.44">Or have acquaintance with mine own desires,</div>
<div id="scene1.3.45">If that I do not dream or be not frantic,--</div>
<div id="scene1.3.46">As I do trust I am not--then, dear uncle,</div>
<div id="scene1.3.47">Never so much as in a thought unborn</div>
<div id="scene1.3.48">Did I offend your highness.</div>
</div>
<div id="speech24" class="character">DUKE FREDERICK</div>
<div class="dialog">
<div id="scene1.3.49">Thus do all traitors:</div>
<div id="scene1.3.50">If their purgation did consist in words,</div>
<div id="scene1.3.51">They are as innocent as grace itself:</div>
<div id="scene1.3.52">Let it suffice thee that I trust thee not.</div>
</div>
<div id="speech25" class="character">ROSALIND</div>
<div class="dialog">
<div id="scene1.3.53">Yet your mistrust cannot make me a traitor:</div>
<div id="scene1.3.54">Tell me whereon the likelihood depends.</div>
</div>
<div id="speech26" class="character">DUKE FREDERICK</div>
<div class="dialog">
<div id="scene1.3.55">Thou art thy father's daughter; there's enough.</div>
</div>
<div id="speech27" class="character">ROSALIND</div>
<div class="dialog">
<div id="scene1.3.56">So was I when your highness took his dukedom;</div>
<div id="scene1.3.57">So was I when your highness banish'd him:</div>
<div id="scene1.3.58">Treason is not inherited, my lord;</div>
<div id="scene1.3.59">Or, if we did derive it from our friends,</div>
<div id="scene1.3.60">What's that to me? my father was no traitor:</div>
<div id="scene1.3.61">Then, good my liege, mistake me not so much</div>
<div id="scene1.3.62">To think my poverty is treacherous.</div>
</div>
<div id="speech28" class="character">CELIA</div>
<div class="dialog">
<div id="scene1.3.63">Dear sovereign, hear me speak.</div>
</div>
<div id="speech29" class="character">DUKE FREDERICK</div>
<div class="dialog">
<div id="scene1.3.64">Ay, Celia; we stay'd her for your sake,</div>
<div id="scene1.3.65">Else had she with her father ranged along.</div>
</div>
<div id="speech30" class="character">CELIA</div>
<div class="dialog">
<div id="scene1.3.66">I did not then entreat to have her stay;</div>
<div id="scene1.3.67">It was your pleasure and your own remorse:</div>
<div id="scene1.3.68">I was too young that time to value her;</div>
<div id="scene1.3.69">But now I know her: if she be a traitor,</div>
<div id="scene1.3.70">Why so am I; we still have slept together,</div>
<div id="scene1.3.71">Rose at an instant, learn'd, play'd, eat together,</div>
<div id="scene1.3.72">And wheresoever we went, like Juno's swans,</div>
<div id="scene1.3.73">Still we went coupled and inseparable.</div>
</div>
<div id="speech31" class="character">DUKE FREDERICK</div>
<div class="dialog">
<div id="scene1.3.74">She is too subtle for thee; and her smoothness,</div>
<div id="scene1.3.75">Her very silence and her patience</div>
<div id="scene1.3.76">Speak to the people, and they pity her.</div>
<div id="scene1.3.77">Thou art a fool: she robs thee of thy name;</div>
<div id="scene1.3.78">And thou wilt show more bright and seem more virtuous</div>
<div id="scene1.3.79">When she is gone. Then open not thy lips:</div>
<div id="scene1.3.80">Firm and irrevocable is my doom</div>
<div id="scene1.3.81">Which I have pass'd upon her; she is banish'd.</div>
</div>
<div id="speech32" class="character">CELIA</div>
<div class="dialog">
<div id="scene1.3.82">Pronounce that sentence then on me, my liege:</div>
<div id="scene1.3.83">I cannot live out of her company.</div>
</div>
<div id="speech33" class="character">DUKE FREDERICK</div>
<div class="dialog">
<div id="scene1.3.84">You are a fool. You, niece, provide yourself:</div>
<div id="scene1.3.85">If you outstay the time, upon mine honour,</div>
<div id="scene1.3.86">And in the greatness of my word, you die.</div>
<div class="direction">Exeunt DUKE FREDERICK and Lords</div>
</div>
<div id="speech34" class="character">CELIA</div>
<div class="dialog">
<div id="scene1.3.87">O my poor Rosalind, whither wilt thou go?</div>
<div id="scene1.3.88">Wilt thou change fathers? I will give thee mine.</div>
<div id="scene1.3.89">I charge thee, be not thou more grieved than I am.</div>
</div>
<div id="speech35" class="character">ROSALIND</div>
<div class="dialog">
<div id="scene1.3.90">I have more cause.</div>
</div>
<div id="speech36" class="character">CELIA</div>
<div class="dialog">
<div id="scene1.3.91"> Thou hast not, cousin;</div>
<div id="scene1.3.92">Prithee be cheerful: know'st thou not, the duke</div>
<div id="scene1.3.93">Hath banish'd me, his daughter?</div>
</div>
<div id="speech37" class="character">ROSALIND</div>
<div class="dialog">
<div id="scene1.3.94">That he hath not.</div>
</div>
<div id="speech38" class="character">CELIA</div>
<div class="dialog">
<div id="scene1.3.95">No, hath not? Rosalind lacks then the love</div>
<div id="scene1.3.96">Which teacheth thee that thou and I am one:</div>
<div id="scene1.3.97">Shall we be sunder'd? shall we part, sweet girl?</div>
<div id="scene1.3.98">No: let my father seek another heir.</div>
<div id="scene1.3.99">Therefore devise with me how we may fly,</div>
<div id="scene1.3.100">Whither to go and what to bear with us;</div>
<div id="scene1.3.101">And do not seek to take your change upon you,</div>
<div id="scene1.3.102">To bear your griefs yourself and leave me out;</div>
<div id="scene1.3.103">For, by this heaven, now at our sorrows pale,</div>
<div id="scene1.3.104">Say what thou canst, I'll go along with thee.</div>
</div>
<div id="speech39" class="character">ROSALIND</div>
<div class="dialog">
<div id="scene1.3.105">Why, whither shall we go?</div>
</div>
<div id="speech40" class="character">CELIA</div>
<div class="dialog">
<div id="scene1.3.106">To seek my uncle in the forest of Arden.</div>
</div>
<div id="speech41" class="character">ROSALIND</div>
<div class="dialog">
<div id="scene1.3.107">Alas, what danger will it be to us,</div>
<div id="scene1.3.108">Maids as we are, to travel forth so far!</div>
<div id="scene1.3.109">Beauty provoketh thieves sooner than gold.</div>
</div>
<div id="speech42" class="character">CELIA</div>
<div class="dialog">
<div id="scene1.3.110">I'll put myself in poor and mean attire</div>
<div id="scene1.3.111">And with a kind of umber smirch my face;</div>
<div id="scene1.3.112">The like do you: so shall we pass along</div>
<div id="scene1.3.113">And never stir assailants.</div>
</div>
<div id="speech43" class="character">ROSALIND</div>
<div class="dialog">
<div id="scene1.3.114">Were it not better,</div>
<div id="scene1.3.115">Because that I am more than common tall,</div>
<div id="scene1.3.116">That I did suit me all points like a man?</div>
<div id="scene1.3.117">A gallant curtle-axe upon my thigh,</div>
<div id="scene1.3.118">A boar-spear in my hand; and--in my heart</div>
<div id="scene1.3.119">Lie there what hidden woman's fear there will--</div>
<div id="scene1.3.120">We'll have a swashing and a martial outside,</div>
<div id="scene1.3.121">As many other mannish cowards have</div>
<div id="scene1.3.122">That do outface it with their semblances.</div>
</div>
<div id="speech44" class="character">CELIA</div>
<div class="dialog">
<div id="scene1.3.123">What shall I call thee when thou art a man?</div>
</div>
<div id="speech45" class="character">ROSALIND</div>
<div class="dialog">
<div id="scene1.3.124">I'll have no worse a name than Jove's own page;</div>
<div id="scene1.3.125">And therefore look you call me Ganymede.</div>
<div id="scene1.3.126">But what will you be call'd?</div>
</div>
<div id="speech46" class="character">CELIA</div>
<div class="dialog">
<div id="scene1.3.127">Something that hath a reference to my state</div>
<div id="scene1.3.128">No longer Celia, but Aliena.</div>
</div>
<div id="speech47" class="character">ROSALIND</div>
<div class="dialog">
<div id="scene1.3.129">But, cousin, what if we assay'd to steal</div>
<div id="scene1.3.130">The clownish fool out of your father's court?</div>
<div id="scene1.3.131">Would he not be a comfort to our travel?</div>
</div>
<div id="speech48" class="character">CELIA</div>
<div class="dialog">
<div id="scene1.3.132">He'll go along o'er the wide world with me;</div>
<div id="scene1.3.133">Leave me alone to woo him. Let's away,</div>
<div id="scene1.3.134">And get our jewels and our wealth together,</div>
<div id="scene1.3.135">Devise the fittest time and safest way</div>
<div id="scene1.3.136">To hide us from pursuit that will be made</div>
<div id="scene1.3.137">After my flight. Now go we in content</div>
<div id="scene1.3.138">To liberty and not to banishment.</div>
<div class="direction">Exeunt</div>
</div>
</div>
</div>
</div>
</body>
</html>
'''
# }}}
ae = unittest.TestCase.assertEqual
def test_tokenizer(self): # {{{
tokens = [
type('')(item) for item in tokenize(
r'E\ é > f [a~="y\"x"]:nth(/* fu /]* */-3.7)')]
self.ae(tokens, [
"<IDENT 'E é' at 0>",
"<S ' ' at 4>",
"<DELIM '>' at 5>",
"<S ' ' at 6>",
# the no-break space is not whitespace in CSS
"<IDENT 'f ' at 7>", # f\xa0
"<DELIM '[' at 9>",
"<IDENT 'a' at 10>",
"<DELIM '~' at 11>",
"<DELIM '=' at 12>",
"<STRING 'y\"x' at 13>",
"<DELIM ']' at 19>",
"<DELIM ':' at 20>",
"<IDENT 'nth' at 21>",
"<DELIM '(' at 24>",
"<NUMBER '-3.7' at 37>",
"<DELIM ')' at 41>",
"<EOF at 42>",
])
# }}}
def test_parser(self): # {{{
def repr_parse(css):
selectors = parse(css)
for selector in selectors:
assert selector.pseudo_element is None
return [repr(selector.parsed_tree).replace("(u'", "('")
for selector in selectors]
def parse_many(first, *others):
result = repr_parse(first)
for other in others:
assert repr_parse(other) == result
return result
assert parse_many('*') == ['Element[*]']
assert parse_many('*|*') == ['Element[*]']
assert parse_many('*|foo') == ['Element[foo]']
assert parse_many('foo|*') == ['Element[foo|*]']
assert parse_many('foo|bar') == ['Element[foo|bar]']
# This will never match, but it is valid:
assert parse_many('#foo#bar') == ['Hash[Hash[Element[*]#foo]#bar]']
assert parse_many(
'div>.foo',
'div> .foo',
'div >.foo',
'div > .foo',
'div \n> \t \t .foo', 'div\r>\n\n\n.foo', 'div\f>\f.foo'
) == ['CombinedSelector[Element[div] > Class[Element[*].foo]]']
assert parse_many('td.foo,.bar',
'td.foo, .bar',
'td.foo\t\r\n\f ,\t\r\n\f .bar'
) == [
'Class[Element[td].foo]',
'Class[Element[*].bar]'
]
assert parse_many('div, td.foo, div.bar span') == [
'Element[div]',
'Class[Element[td].foo]',
'CombinedSelector[Class[Element[div].bar] '
'<followed> Element[span]]']
assert parse_many('div > p') == [
'CombinedSelector[Element[div] > Element[p]]']
assert parse_many('td:first') == [
'Pseudo[Element[td]:first]']
assert parse_many('td:first') == [
'Pseudo[Element[td]:first]']
assert parse_many('td :first') == [
'CombinedSelector[Element[td] '
'<followed> Pseudo[Element[*]:first]]']
assert parse_many('td :first') == [
'CombinedSelector[Element[td] '
'<followed> Pseudo[Element[*]:first]]']
assert parse_many('a[name]', 'a[ name\t]') == [
'Attrib[Element[a][name]]']
assert parse_many('a [name]') == [
'CombinedSelector[Element[a] <followed> Attrib[Element[*][name]]]']
self.ae(parse_many('a[rel="include"]', 'a[rel = include]'), [
"Attrib[Element[a][rel = 'include']]"])
assert parse_many("a[hreflang |= 'en']", "a[hreflang|=en]") == [
"Attrib[Element[a][hreflang |= 'en']]"]
self.ae(parse_many('div:nth-child(10)'), [
"Function[Element[div]:nth-child(['10'])]"])
assert parse_many(':nth-child(2n+2)') == [
"Function[Element[*]:nth-child(['2', 'n', '+2'])]"]
assert parse_many('div:nth-of-type(10)') == [
"Function[Element[div]:nth-of-type(['10'])]"]
assert parse_many('div div:nth-of-type(10) .aclass') == [
'CombinedSelector[CombinedSelector[Element[div] <followed> '
"Function[Element[div]:nth-of-type(['10'])]] "
'<followed> Class[Element[*].aclass]]']
assert parse_many('label:only') == [
'Pseudo[Element[label]:only]']
assert parse_many('a:lang(fr)') == [
"Function[Element[a]:lang(['fr'])]"]
assert parse_many('div:contains("foo")') == [
"Function[Element[div]:contains(['foo'])]"]
assert parse_many('div#foobar') == [
'Hash[Element[div]#foobar]']
assert parse_many('div:not(div.foo)') == [
'Negation[Element[div]:not(Class[Element[div].foo])]']
assert parse_many('td ~ th') == [
'CombinedSelector[Element[td] ~ Element[th]]']
# }}}
def test_pseudo_elements(self): # {{{
def parse_pseudo(css):
result = []
for selector in parse(css):
pseudo = selector.pseudo_element
pseudo = type('')(pseudo) if pseudo else pseudo
# No Symbol here
assert pseudo is None or isinstance(pseudo, type(''))
selector = repr(selector.parsed_tree).replace("(u'", "('")
result.append((selector, pseudo))
return result
def parse_one(css):
result = parse_pseudo(css)
assert len(result) == 1
return result[0]
self.ae(parse_one('foo'), ('Element[foo]', None))
self.ae(parse_one('*'), ('Element[*]', None))
self.ae(parse_one(':empty'), ('Pseudo[Element[*]:empty]', None))
# Special cases for CSS 2.1 pseudo-elements
self.ae(parse_one(':BEfore'), ('Element[*]', 'before'))
self.ae(parse_one(':aftER'), ('Element[*]', 'after'))
self.ae(parse_one(':First-Line'), ('Element[*]', 'first-line'))
self.ae(parse_one(':First-Letter'), ('Element[*]', 'first-letter'))
self.ae(parse_one('::befoRE'), ('Element[*]', 'before'))
self.ae(parse_one('::AFter'), ('Element[*]', 'after'))
self.ae(parse_one('::firsT-linE'), ('Element[*]', 'first-line'))
self.ae(parse_one('::firsT-letteR'), ('Element[*]', 'first-letter'))
self.ae(parse_one('::text-content'), ('Element[*]', 'text-content'))
self.ae(parse_one('::attr(name)'), (
"Element[*]", "FunctionalPseudoElement[::attr(['name'])]"))
self.ae(parse_one('::Selection'), ('Element[*]', 'selection'))
self.ae(parse_one('foo:after'), ('Element[foo]', 'after'))
self.ae(parse_one('foo::selection'), ('Element[foo]', 'selection'))
self.ae(parse_one('lorem#ipsum ~ a#b.c[href]:empty::selection'), (
'CombinedSelector[Hash[Element[lorem]#ipsum] ~ '
'Pseudo[Attrib[Class[Hash[Element[a]#b].c][href]]:empty]]',
'selection'))
parse_pseudo('foo:before, bar, baz:after') == [
('Element[foo]', 'before'),
('Element[bar]', None),
('Element[baz]', 'after')]
# }}}
def test_specificity(self): # {{{
def specificity(css):
selectors = parse(css)
assert len(selectors) == 1
return selectors[0].specificity()
assert specificity('*') == (0, 0, 0)
assert specificity(' foo') == (0, 0, 1)
assert specificity(':empty ') == (0, 1, 0)
assert specificity(':before') == (0, 0, 1)
assert specificity('*:before') == (0, 0, 1)
assert specificity(':nth-child(2)') == (0, 1, 0)
assert specificity('.bar') == (0, 1, 0)
assert specificity('[baz]') == (0, 1, 0)
assert specificity('[baz="4"]') == (0, 1, 0)
assert specificity('[baz^="4"]') == (0, 1, 0)
assert specificity('#lipsum') == (1, 0, 0)
assert specificity(':not(*)') == (0, 0, 0)
assert specificity(':not(foo)') == (0, 0, 1)
assert specificity(':not(.foo)') == (0, 1, 0)
assert specificity(':not([foo])') == (0, 1, 0)
assert specificity(':not(:empty)') == (0, 1, 0)
assert specificity(':not(#foo)') == (1, 0, 0)
assert specificity('foo:empty') == (0, 1, 1)
assert specificity('foo:before') == (0, 0, 2)
assert specificity('foo::before') == (0, 0, 2)
assert specificity('foo:empty::before') == (0, 1, 2)
assert specificity('#lorem + foo#ipsum:first-child > bar:first-line'
) == (2, 1, 3)
# }}}
def test_parse_errors(self): # {{{
def get_error(css):
try:
parse(css)
except SelectorSyntaxError:
# Py2, Py3, ...
return str(sys.exc_info()[1]).replace("(u'", "('")
self.ae(get_error('attributes(href)/html/body/a'), (
"Expected selector, got <DELIM '(' at 10>"))
assert get_error('attributes(href)') == (
"Expected selector, got <DELIM '(' at 10>")
assert get_error('html/body/a') == (
"Expected selector, got <DELIM '/' at 4>")
assert get_error(' ') == (
"Expected selector, got <EOF at 1>")
assert get_error('div, ') == (
"Expected selector, got <EOF at 5>")
assert get_error(' , div') == (
"Expected selector, got <DELIM ',' at 1>")
assert get_error('p, , div') == (
"Expected selector, got <DELIM ',' at 3>")
assert get_error('div > ') == (
"Expected selector, got <EOF at 6>")
assert get_error(' > div') == (
"Expected selector, got <DELIM '>' at 2>")
assert get_error('foo|#bar') == (
"Expected ident or '*', got <HASH 'bar' at 4>")
assert get_error('#.foo') == (
"Expected selector, got <DELIM '#' at 0>")
assert get_error('.#foo') == (
"Expected ident, got <HASH 'foo' at 1>")
assert get_error(':#foo') == (
"Expected ident, got <HASH 'foo' at 1>")
assert get_error('[*]') == (
"Expected '|', got <DELIM ']' at 2>")
assert get_error('[foo|]') == (
"Expected ident, got <DELIM ']' at 5>")
assert get_error('[#]') == (
"Expected ident or '*', got <DELIM '#' at 1>")
assert get_error('[foo=#]') == (
"Expected string or ident, got <DELIM '#' at 5>")
assert get_error('[href]a') == (
"Expected selector, got <IDENT 'a' at 6>")
assert get_error('[rel=stylesheet]') is None
assert get_error('[rel:stylesheet]') == (
"Operator expected, got <DELIM ':' at 4>")
assert get_error('[rel=stylesheet') == (
"Expected ']', got <EOF at 15>")
assert get_error(':lang(fr)') is None
assert get_error(':lang(fr') == (
"Expected an argument, got <EOF at 8>")
assert get_error(':contains("foo') == (
"Unclosed string at 10")
assert get_error('foo!') == (
"Expected selector, got <DELIM '!' at 3>")
# Mis-placed pseudo-elements
assert get_error('a:before:empty') == (
"Got pseudo-element ::before not at the end of a selector")
assert get_error('li:before a') == (
"Got pseudo-element ::before not at the end of a selector")
assert get_error(':not(:before)') == (
"Got pseudo-element ::before inside :not() at 12")
assert get_error(':not(:not(a))') == (
"Got nested :not()")
# }}}
def test_select(self): # {{{
document = etree.fromstring(self.HTML_IDS, parser=etree.XMLParser(recover=True, no_network=True, resolve_entities=False))
select = Select(document)
def select_ids(selector):
for elem in select(selector):
yield elem.get('id')
def pcss(main, *selectors, **kwargs):
result = list(select_ids(main))
for selector in selectors:
self.ae(list(select_ids(selector)), result)
return result
all_ids = pcss('*')
self.ae(all_ids[:6], [
'html', None, 'link-href', 'link-nohref', None, 'outer-div'])
self.ae(all_ids[-1:], ['foobar-span'])
self.ae(pcss('div'), ['outer-div', 'li-div', 'foobar-div'])
self.ae(pcss('DIV'), [
'outer-div', 'li-div', 'foobar-div']) # case-insensitive in HTML
self.ae(pcss('div div'), ['li-div'])
self.ae(pcss('div, div div'), ['outer-div', 'li-div', 'foobar-div'])
self.ae(pcss('a[name]'), ['name-anchor'])
self.ae(pcss('a[NAme]'), ['name-anchor']) # case-insensitive in HTML:
self.ae(pcss('a[rel]'), ['tag-anchor', 'nofollow-anchor'])
self.ae(pcss('a[rel="tag"]'), ['tag-anchor'])
self.ae(pcss('a[href*="localhost"]'), ['tag-anchor'])
self.ae(pcss('a[href*=""]'), [])
self.ae(pcss('a[href^="http"]'), ['tag-anchor', 'nofollow-anchor'])
self.ae(pcss('a[href^="http:"]'), ['tag-anchor'])
self.ae(pcss('a[href^=""]'), [])
self.ae(pcss('a[href$="org"]'), ['nofollow-anchor'])
self.ae(pcss('a[href$=""]'), [])
self.ae(pcss('div[foobar~="bc"]', 'div[foobar~="cde"]', skip_webkit=True), ['foobar-div'])
self.ae(pcss('[foobar~="ab bc"]', '[foobar~=""]', '[foobar~=" \t"]'), [])
self.ae(pcss('div[foobar~="cd"]'), [])
self.ae(pcss('*[lang|="En"]', '[lang|="En-us"]'), ['second-li'])
# Attribute values are case sensitive
self.ae(pcss('*[lang|="en"]', '[lang|="en-US"]', skip_webkit=True), [])
self.ae(pcss('*[lang|="e"]'), [])
self.ae(pcss(':lang("EN")', '*:lang(en-US)', skip_webkit=True), ['second-li', 'li-div'])
self.ae(pcss(':lang("e")'), [])
self.ae(pcss('li:nth-child(1)', 'li:first-child'), ['first-li'])
self.ae(pcss('li:nth-child(3)', '#first-li ~ :nth-child(3)'), ['third-li'])
self.ae(pcss('li:nth-child(10)'), [])
self.ae(pcss('li:nth-child(2n)', 'li:nth-child(even)', 'li:nth-child(2n+0)'), ['second-li', 'fourth-li', 'sixth-li'])
self.ae(pcss('li:nth-child(+2n+1)', 'li:nth-child(odd)'), ['first-li', 'third-li', 'fifth-li', 'seventh-li'])
self.ae(pcss('li:nth-child(2n+4)'), ['fourth-li', 'sixth-li'])
self.ae(pcss('li:nth-child(3n+1)'), ['first-li', 'fourth-li', 'seventh-li'])
self.ae(pcss('li:nth-last-child(0)'), [])
self.ae(pcss('li:nth-last-child(1)', 'li:last-child'), ['seventh-li'])
self.ae(pcss('li:nth-last-child(2n)', 'li:nth-last-child(even)'), ['second-li', 'fourth-li', 'sixth-li'])
self.ae(pcss('li:nth-last-child(2n+2)'), ['second-li', 'fourth-li', 'sixth-li'])
self.ae(pcss('ol:first-of-type'), ['first-ol'])
self.ae(pcss('ol:nth-child(1)'), [])
self.ae(pcss('ol:nth-of-type(2)'), ['second-ol'])
self.ae(pcss('ol:nth-last-of-type(1)'), ['second-ol'])
self.ae(pcss('span:only-child'), ['foobar-span'])
self.ae(pcss('li div:only-child'), ['li-div'])
self.ae(pcss('div *:only-child'), ['li-div', 'foobar-span'])
self.ae(pcss('p *:only-of-type', skip_webkit=True), ['p-em', 'fieldset'])
self.ae(pcss('p:only-of-type', skip_webkit=True), ['paragraph'])
self.ae(pcss('a:empty', 'a:EMpty'), ['name-anchor'])
self.ae(pcss('li:empty'), ['third-li', 'fourth-li', 'fifth-li', 'sixth-li'])
self.ae(pcss(':root', 'html:root', 'li:root'), ['html'])
self.ae(pcss('* :root', 'p *:root'), [])
self.ae(pcss('.a', '.b', '*.a', 'ol.a'), ['first-ol'])
self.ae(pcss('.c', '*.c'), ['first-ol', 'third-li', 'fourth-li'])
self.ae(pcss('ol *.c', 'ol li.c', 'li ~ li.c', 'ol > li.c'), [
'third-li', 'fourth-li'])
self.ae(pcss('#first-li', 'li#first-li', '*#first-li'), ['first-li'])
self.ae(pcss('li div', 'li > div', 'div div'), ['li-div'])
self.ae(pcss('div > div'), [])
self.ae(pcss('div>.c', 'div > .c'), ['first-ol'])
self.ae(pcss('div + div'), ['foobar-div'])
self.ae(pcss('a ~ a'), ['tag-anchor', 'nofollow-anchor'])
self.ae(pcss('a[rel="tag"] ~ a'), ['nofollow-anchor'])
self.ae(pcss('ol#first-ol li:last-child'), ['seventh-li'])
self.ae(pcss('ol#first-ol *:last-child'), ['li-div', 'seventh-li'])
self.ae(pcss('#outer-div:first-child'), ['outer-div'])
self.ae(pcss('#outer-div :first-child'), [
'name-anchor', 'first-li', 'li-div', 'p-b',
'checkbox-fieldset-disabled', 'area-href'])
self.ae(pcss('a[href]'), ['tag-anchor', 'nofollow-anchor'])
self.ae(pcss(':not(*)'), [])
self.ae(pcss('a:not([href])'), ['name-anchor'])
self.ae(pcss('ol :Not(li[class])', skip_webkit=True), [
'first-li', 'second-li', 'li-div',
'fifth-li', 'sixth-li', 'seventh-li'])
self.ae(pcss(r'di\a0 v', r'div\['), [])
self.ae(pcss(r'[h\a0 ref]', r'[h\]ref]'), [])
self.assertRaises(ExpressionError, lambda : tuple(select('body:nth-child')))
select = Select(document, ignore_inappropriate_pseudo_classes=True)
self.assertGreater(len(tuple(select('p:hover'))), 0)
def test_select_shakespeare(self):
document = html.document_fromstring(self.HTML_SHAKESPEARE)
select = Select(document)
count = lambda s: sum(1 for r in select(s))
# Data borrowed from http://mootools.net/slickspeed/
# Changed from original; probably because I'm only
self.ae(count('*'), 249)
assert count('div:only-child') == 22 # ?
assert count('div:nth-child(even)') == 106
assert count('div:nth-child(2n)') == 106
assert count('div:nth-child(odd)') == 137
assert count('div:nth-child(2n+1)') == 137
assert count('div:nth-child(n)') == 243
assert count('div:last-child') == 53
assert count('div:first-child') == 51
assert count('div > div') == 242
assert count('div + div') == 190
assert count('div ~ div') == 190
assert count('body') == 1
assert count('body div') == 243
assert count('div') == 243
assert count('div div') == 242
assert count('div div div') == 241
assert count('div, div, div') == 243
assert count('div, a, span') == 243
assert count('.dialog') == 51
assert count('div.dialog') == 51
assert count('div .dialog') == 51
assert count('div.character, div.dialog') == 99
assert count('div.direction.dialog') == 0
assert count('div.dialog.direction') == 0
assert count('div.dialog.scene') == 1
assert count('div.scene.scene') == 1
assert count('div.scene .scene') == 0
assert count('div.direction .dialog ') == 0
assert count('div .dialog .direction') == 4
assert count('div.dialog .dialog .direction') == 4
assert count('#speech5') == 1
assert count('div#speech5') == 1
assert count('div #speech5') == 1
assert count('div.scene div.dialog') == 49
assert count('div#scene1 div.dialog div') == 142
assert count('#scene1 #speech1') == 1
assert count('div[class]') == 103
assert count('div[class=dialog]') == 50
assert count('div[class^=dia]') == 51
assert count('div[class$=log]') == 50
assert count('div[class*=sce]') == 1
assert count('div[class|=dialog]') == 50 # ? Seems right
assert count('div[class~=dialog]') == 51 # ? Seems right
# }}}
# Run tests {{{
def find_tests():
return unittest.defaultTestLoader.loadTestsFromTestCase(TestCSSSelectors)
def run_tests(find_tests=find_tests, for_build=False):
if not for_build:
parser = argparse.ArgumentParser()
parser.add_argument('name', nargs='?', default=None,
help='The name of the test to run')
args = parser.parse_args()
if not for_build and args.name and args.name.startswith('.'):
tests = find_tests()
q = args.name[1:]
if not q.startswith('test_'):
q = 'test_' + q
ans = None
try:
for test in tests:
if test._testMethodName == q:
ans = test
raise StopIteration()
except StopIteration:
pass
if ans is None:
print('No test named %s found' % args.name)
raise SystemExit(1)
tests = ans
else:
tests = unittest.defaultTestLoader.loadTestsFromName(args.name) if not for_build and args.name else find_tests()
r = unittest.TextTestRunner
if for_build:
r = r(verbosity=0, buffer=True, failfast=True)
else:
r = r(verbosity=4)
result = r.run(tests)
if for_build and result.errors or result.failures:
raise SystemExit(1)
if __name__ == '__main__':
run_tests()
# }}}