Fixing leftovers from first concept of constants

2026-04-24 07:01:30 +02:00 · 2020-06-07 11:59:00 +02:00
parent 7419954e0c
commit a69884d724
9 changed files with 652 additions and 464 deletions
@@ -1,18 +1,13 @@
 import textwrap

-# from lxml.etree import Element
-
 from ebook_converter import constants as const
 from ebook_converter import force_unicode
 from ebook_converter.ebooks.oeb import parse_utils
-from ebook_converter.ebooks.oeb.base import serialize, OEB_DOCS, OEB_STYLES
+from ebook_converter.ebooks.oeb import base
 from ebook_converter.ebooks.oeb.polish.utils import guess_type
 from ebook_converter.utils.icu import sort_key


-__license__ = 'GPL v3'
-__copyright__ = '2013, Kovid Goyal <kovid at kovidgoyal.net>'
-
 def isspace(x):
    return not x.strip('\u0009\u000a\u000c\u000d\u0020')

@@ -28,37 +23,40 @@ def pretty_xml_tree(elem, level=0, indent='  '):
    for i, child in enumerate(elem):
        pretty_xml_tree(child, level=level+1, indent=indent)
        if not child.tail or isspace(child.tail):
-            l = level + 1
+            new_level = level + 1
            if i == len(elem) - 1:
-                l -= 1
-            child.tail = '\n' + (indent * l)
+                new_level -= 1
+            child.tail = '\n' + (indent * new_level)


 def pretty_opf(root):
    # Put all dc: tags first starting with title and author. Preserve order for
    # the rest.
    def dckey(x):
-        return {'title':0, 'creator':1}.get(parse_utils.barename(x.tag), 2)
-    for metadata in root.xpath('//opf:metadata', namespaces=const.OPF_NAMESPACES):
+        return {'title': 0, 'creator': 1}.get(parse_utils.barename(x.tag), 2)
+
+    for metadata in root.xpath('//opf:metadata',
+                               namespaces=const.OPF_NAMESPACES):
        dc_tags = metadata.xpath('./*[namespace-uri()="%s"]' % const.DC11_NS)
        dc_tags.sort(key=dckey)
        for x in reversed(dc_tags):
            metadata.insert(0, x)

    # Group items in the manifest
-    spine_ids = root.xpath('//opf:spine/opf:itemref/@idref', namespaces=const.OPF_NAMESPACES)
-    spine_ids = {x:i for i, x in enumerate(spine_ids)}
+    spine_ids = root.xpath('//opf:spine/opf:itemref/@idref',
+                           namespaces=const.OPF_NAMESPACES)
+    spine_ids = {x: i for i, x in enumerate(spine_ids)}

    def manifest_key(x):
        mt = x.get('media-type', '')
        href = x.get('href', '')
        ext = href.rpartition('.')[-1].lower()
        cat = 1000
-        if mt in OEB_DOCS:
+        if mt in base.OEB_DOCS:
            cat = 0
        elif mt == guess_type('a.ncx'):
            cat = 1
-        elif mt in OEB_STYLES:
+        elif mt in base.OEB_STYLES:
            cat = 2
        elif mt.startswith('image/'):
            cat = 3
@@ -75,20 +73,23 @@ def pretty_opf(root):
            i = sort_key(href)
        return (cat, i)

-    for manifest in root.xpath('//opf:manifest', namespaces=const.OPF_NAMESPACES):
+    for manifest in root.xpath('//opf:manifest',
+                               namespaces=const.OPF_NAMESPACES):
        try:
            children = sorted(manifest, key=manifest_key)
        except AttributeError:
-            continue  # There are comments so dont sort since that would mess up the comments
+            # There are comments so dont sort since that would mess up the
+            # comments.
+            continue
+
        for x in reversed(children):
            manifest.insert(0, x)


-
 def isblock(x):
    if callable(x.tag) or not x.tag:
        return True
-    if x.tag in const.XHTML_BLOCK_TAGS | {const.SVG_SVG}:
+    if x.tag in const.XHTML_BLOCK_TAGS | {base.tag('svg', 'svg')}:
        return True
    return False

@@ -133,28 +134,34 @@ def pretty_block(parent, level=1, indent='  '):
    that contain only other block tags '''
    if not parent.text or isspace(parent.text):
        parent.text = ''
-    nn = '\n' if hasattr(parent.tag, 'strip') and parse_utils.barename(parent.tag) in {'tr', 'td', 'th'} else '\n\n'
+    if (hasattr(parent.tag, 'strip') and
+            parse_utils.barename(parent.tag) in {'tr', 'td', 'th'}):
+        nn = '\n'
+    else:
+        nn = '\n\n'
    parent.text = parent.text + nn + (indent * level)
    for i, child in enumerate(parent):
        if isblock(child) and has_only_blocks(child):
            pretty_block(child, level=level+1, indent=indent)
-        elif child.tag == const.SVG_SVG:
+        elif child.tag == base.tag('svg', 'svg'):
            pretty_xml_tree(child, level=level, indent=indent)
-        l = level
+        new_level = level
        if i == len(parent) - 1:
-            l -= 1
+            new_level -= 1
        if not child.tail or isspace(child.tail):
            child.tail = ''
-        child.tail = child.tail + nn + (indent * l)
+        child.tail = child.tail + nn + (indent * new_level)


 def pretty_script_or_style(container, child):
    if child.text:
        indent = indent_for_tag(child)
        if child.tag.endswith('style'):
-            child.text = force_unicode(pretty_css(container, '', child.text), 'utf-8')
+            child.text = force_unicode(pretty_css(container, '', child.text),
+                                       'utf-8')
        child.text = textwrap.dedent(child.text)
-        child.text = '\n' + '\n'.join([(indent + x) if x else '' for x in child.text.splitlines()])
+        child.text = '\n' + '\n'.join([(indent + x) if x else ''
+                                       for x in child.text.splitlines()])
        set_indent(child, 'text', indent)


@@ -169,62 +176,82 @@ def pretty_html_tree(container, root):
        # Special case the handling of a body that contains a single block tag
        # with all content. In this case we prettify the containing block tag
        # even if it has non block children.
-        if (len(body) == 1 and not callable(body[0].tag) and isblock(body[0]) and not has_only_blocks(
-            body[0]) and parse_utils.barename(body[0].tag) not in (
-                    'pre', 'p', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6') and len(body[0]) > 0):
+        if (len(body) == 1 and
+                not callable(body[0].tag) and
+                isblock(body[0]) and
+                not has_only_blocks(body[0]) and
+                parse_utils.barename(body[0].tag) not in ('pre', 'p', 'h1',
+                                                          'h2', 'h3', 'h4',
+                                                          'h5', 'h6') and
+                len(body[0]) > 0):
            pretty_block(body[0], level=2)

    if container is not None:
        # Handle <script> and <style> tags
-        for child in root.xpath('//*[local-name()="script" or local-name()="style"]'):
+        for child in root.xpath('//*[local-name()="script" or local-name()='
+                                '"style"]'):
            pretty_script_or_style(container, child)


 def fix_html(container, raw):
-    ' Fix any parsing errors in the HTML represented as a string in raw. Fixing is done using the HTML5 parsing algorithm. '
+    """
+    Fix any parsing errors in the HTML represented as a string in raw. Fixing
+    is done using the HTML5 parsing algorithm.
+    """
    root = container.parse_xhtml(raw)
-    return serialize(root, 'text/html')
+    return base.serialize(root, 'text/html')


 def pretty_html(container, name, raw):
-    ' Pretty print the HTML represented as a string in raw '
+    """
+    Pretty print the HTML represented as a string in raw
+    """
    root = container.parse_xhtml(raw)
    pretty_html_tree(container, root)
-    return serialize(root, 'text/html')
+    return base.serialize(root, 'text/html')


 def pretty_css(container, name, raw):
-    ' Pretty print the CSS represented as a string in raw '
+    """
+    Pretty print the CSS represented as a string in raw
+    """
    sheet = container.parse_css(raw)
-    return serialize(sheet, 'text/css')
+    return base.serialize(sheet, 'text/css')


 def pretty_xml(container, name, raw):
-    ' Pretty print the XML represented as a string in raw. If ``name`` is the name of the OPF, extra OPF-specific prettying is performed. '
+    """
+    Pretty print the XML represented as a string in raw. If ``name`` is the
+    name of the OPF, extra OPF-specific prettying is performed.
+    """
    root = container.parse_xml(raw)
    if name == container.opf_name:
        pretty_opf(root)
    pretty_xml_tree(root)
-    return serialize(root, 'text/xml')
+    return base.serialize(root, 'text/xml')


 def fix_all_html(container):
-    ' Fix any parsing errors in all HTML files in the container. Fixing is done using the HTML5 parsing algorithm. '
+    """
+    Fix any parsing errors in all HTML files in the container. Fixing is done
+    using the HTML5 parsing algorithm.  """
    for name, mt in container.mime_map.items():
-        if mt in OEB_DOCS:
+        if mt in base.OEB_DOCS:
            container.parsed(name)
            container.dirty(name)


 def pretty_all(container):
-    ' Pretty print all HTML/CSS/XML files in the container '
+    """
+    Pretty print all HTML/CSS/XML files in the container
+    """
    xml_types = {guess_type('a.ncx'), guess_type('a.xml'), guess_type('a.svg')}
    for name, mt in container.mime_map.items():
        prettied = False
-        if mt in OEB_DOCS:
+        if mt in base.OEB_DOCS:
            pretty_html_tree(container, container.parsed(name))
            prettied = True
-        elif mt in OEB_STYLES:
+        elif mt in base.OEB_STYLES:
            container.parsed(name)
            prettied = True
        elif name == container.opf_name: