mirror of
https://github.com/gryf/ebook-converter.git
synced 2026-02-26 04:05:52 +01:00
Initial import
This commit is contained in:
165
ebook_converter/tinycss/parsing.py
Normal file
165
ebook_converter/tinycss/parsing.py
Normal file
@@ -0,0 +1,165 @@
|
||||
# coding: utf8
|
||||
"""
|
||||
tinycss.parsing
|
||||
---------------
|
||||
|
||||
Utilities for parsing lists of tokens.
|
||||
|
||||
:copyright: (c) 2012 by Simon Sapin.
|
||||
:license: BSD, see LICENSE for more details.
|
||||
"""
|
||||
|
||||
from __future__ import unicode_literals
|
||||
|
||||
|
||||
# TODO: unit tests
|
||||
|
||||
def split_on_comma(tokens):
|
||||
"""Split a list of tokens on commas, ie ``,`` DELIM tokens.
|
||||
|
||||
Only "top-level" comma tokens are splitting points, not commas inside a
|
||||
function or other :class:`ContainerToken`.
|
||||
|
||||
:param tokens:
|
||||
An iterable of :class:`~.token_data.Token` or
|
||||
:class:`~.token_data.ContainerToken`.
|
||||
:returns:
|
||||
A list of lists of tokens
|
||||
|
||||
"""
|
||||
parts = []
|
||||
this_part = []
|
||||
for token in tokens:
|
||||
if token.type == 'DELIM' and token.value == ',':
|
||||
parts.append(this_part)
|
||||
this_part = []
|
||||
else:
|
||||
this_part.append(token)
|
||||
parts.append(this_part)
|
||||
return parts
|
||||
|
||||
|
||||
def strip_whitespace(tokens):
|
||||
"""Remove whitespace at the beggining and end of a token list.
|
||||
|
||||
Whitespace tokens in-between other tokens in the list are preserved.
|
||||
|
||||
:param tokens:
|
||||
A list of :class:`~.token_data.Token` or
|
||||
:class:`~.token_data.ContainerToken`.
|
||||
:return:
|
||||
A new sub-sequence of the list.
|
||||
|
||||
"""
|
||||
for i, token in enumerate(tokens):
|
||||
if token.type != 'S':
|
||||
break
|
||||
else:
|
||||
return [] # only whitespace
|
||||
tokens = tokens[i:]
|
||||
while tokens and tokens[-1].type == 'S':
|
||||
tokens.pop()
|
||||
return tokens
|
||||
|
||||
|
||||
def remove_whitespace(tokens):
|
||||
"""Remove any top-level whitespace in a token list.
|
||||
|
||||
Whitespace tokens inside recursive :class:`~.token_data.ContainerToken`
|
||||
are preserved.
|
||||
|
||||
:param tokens:
|
||||
A list of :class:`~.token_data.Token` or
|
||||
:class:`~.token_data.ContainerToken`.
|
||||
:return:
|
||||
A new sub-sequence of the list.
|
||||
|
||||
"""
|
||||
return [token for token in tokens if token.type != 'S']
|
||||
|
||||
|
||||
def validate_value(tokens):
|
||||
"""Validate a property value.
|
||||
|
||||
:param tokens:
|
||||
an iterable of tokens
|
||||
:raises:
|
||||
:class:`ParseError` if there is any invalid token for the 'value'
|
||||
production of the core grammar.
|
||||
|
||||
"""
|
||||
for token in tokens:
|
||||
type_ = token.type
|
||||
if type_ == '{':
|
||||
validate_block(token.content, 'property value')
|
||||
else:
|
||||
validate_any(token, 'property value')
|
||||
|
||||
def validate_block(tokens, context):
|
||||
"""
|
||||
:raises:
|
||||
:class:`ParseError` if there is any invalid token for the 'block'
|
||||
production of the core grammar.
|
||||
:param tokens: an iterable of tokens
|
||||
:param context: a string for the 'unexpected in ...' message
|
||||
|
||||
"""
|
||||
for token in tokens:
|
||||
type_ = token.type
|
||||
if type_ == '{':
|
||||
validate_block(token.content, context)
|
||||
elif type_ not in (';', 'ATKEYWORD'):
|
||||
validate_any(token, context)
|
||||
|
||||
|
||||
def validate_any(token, context):
|
||||
"""
|
||||
:raises:
|
||||
:class:`ParseError` if this is an invalid token for the
|
||||
'any' production of the core grammar.
|
||||
:param token: a single token
|
||||
:param context: a string for the 'unexpected in ...' message
|
||||
|
||||
"""
|
||||
type_ = token.type
|
||||
if type_ in ('FUNCTION', '(', '['):
|
||||
for token in token.content:
|
||||
validate_any(token, type_)
|
||||
elif type_ not in ('S', 'IDENT', 'DIMENSION', 'PERCENTAGE', 'NUMBER',
|
||||
'INTEGER', 'URI', 'DELIM', 'STRING', 'HASH', ':',
|
||||
'UNICODE-RANGE'):
|
||||
if type_ in ('}', ')', ']'):
|
||||
adjective = 'unmatched'
|
||||
else:
|
||||
adjective = 'unexpected'
|
||||
raise ParseError(token,
|
||||
'{0} {1} token in {2}'.format(adjective, type_, context))
|
||||
|
||||
|
||||
class ParseError(ValueError):
|
||||
"""Details about a CSS syntax error. Usually indicates that something
|
||||
(a rule or a declaration) was ignored and will not appear as a parsed
|
||||
object.
|
||||
|
||||
This exception is typically logged in a list rather than being propagated
|
||||
to the user API.
|
||||
|
||||
.. attribute:: line
|
||||
|
||||
Source line where the error occured.
|
||||
|
||||
.. attribute:: column
|
||||
|
||||
Column in the source line where the error occured.
|
||||
|
||||
.. attribute:: reason
|
||||
|
||||
What happend (a string).
|
||||
|
||||
"""
|
||||
def __init__(self, subject, reason):
|
||||
self.line = subject.line
|
||||
self.column = subject.column
|
||||
self.reason = reason
|
||||
super(ParseError, self).__init__(
|
||||
'Parse error at {0.line}:{0.column}, {0.reason}'.format(self))
|
||||
Reference in New Issue
Block a user