Use the real constants module.

This is progressing refactor of the calibre code to make it more readable, and transform it to something more coherent. In this patch, there are changes regarding imports for some modules, instead of polluting namespace of each module with some other modules symbols, which often were imported from other modules. Yuck.
2026-04-24 15:11:30 +02:00 · 2020-05-29 17:04:53 +02:00
parent ee4801228f
commit ce89f5c9d1
54 changed files with 2383 additions and 2081 deletions
@@ -1,9 +1,12 @@
-import os, sys, shutil
+import os
+import shutil
+import sys

 from lxml import etree

 from ebook_converter import walk, guess_type
-from ebook_converter.ebooks.metadata import string_to_authors, authors_to_sort_string
+from ebook_converter.ebooks.metadata import authors_to_sort_string
+from ebook_converter.ebooks.metadata import string_to_authors
 from ebook_converter.ebooks.metadata.book.base import Metadata
 from ebook_converter.ebooks.docx import InvalidDOCX
 from ebook_converter.ebooks.docx.names import DOCXNamespace
@@ -11,21 +14,11 @@ from ebook_converter.ptempfile import PersistentTemporaryDirectory
 from ebook_converter.utils.localization import canonicalize_lang
 from ebook_converter.utils.logging import default_log
 from ebook_converter.utils.zipfile import ZipFile
-from ebook_converter.utils.xml_parse import safe_xml_fromstring


-__license__ = 'GPL v3'
-__copyright__ = '2013, Kovid Goyal <kovid at kovidgoyal.net>'
-
-
-def fromstring(raw, parser=None):
-    return safe_xml_fromstring(raw)
-
 # Read metadata {{{
-
-
 def read_doc_props(raw, mi, XPath):
-    root = fromstring(raw)
+    root = etree.fromstring(raw)
    titles = XPath('//dc:title')(root)
    if titles:
        title = titles[0].text
@@ -53,29 +46,31 @@ def read_doc_props(raw, mi, XPath):
    desc = XPath('//dc:description')(root)
    if desc:
        raw = etree.tostring(desc[0], method='text', encoding='unicode')
-        raw = raw.replace('_x000d_', '')  # Word 2007 mangles newlines in the summary
+        # Word 2007 mangles newlines in the summary
+        raw = raw.replace('_x000d_', '')
        mi.comments = raw.strip()

    langs = []
    for lang in XPath('//dc:language')(root):
        if lang.text and lang.text.strip():
-            l = canonicalize_lang(lang.text)
-            if l:
-                langs.append(l)
+            canonic_lang = canonicalize_lang(lang.text)
+            if canonic_lang:
+                langs.append(canonic_lang)
    if langs:
        mi.languages = langs


 def read_app_props(raw, mi):
-    root = fromstring(raw)
+    root = etree.fromstring(raw)
    company = root.xpath('//*[local-name()="Company"]')
    if company and company[0].text and company[0].text.strip():
        mi.publisher = company[0].text.strip()


 def read_default_style_language(raw, mi, XPath):
-    root = fromstring(raw)
-    for lang in XPath('/w:styles/w:docDefaults/w:rPrDefault/w:rPr/w:lang/@w:val')(root):
+    root = etree.fromstring(raw)
+    for lang in XPath('/w:styles/w:docDefaults/w:rPrDefault/w:rPr/w:lang/'
+                      '@w:val')(root):
        lang = canonicalize_lang(lang)
        if lang:
            mi.languages = [lang]
@@ -87,7 +82,9 @@ class DOCX(object):

    def __init__(self, path_or_stream, log=None, extract=True):
        self.docx_is_transitional = True
-        stream = path_or_stream if hasattr(path_or_stream, 'read') else open(path_or_stream, 'rb')
+        stream = path_or_stream
+        if not hasattr(path_or_stream, 'read'):
+            stream = open(path_or_stream, 'rb')
        self.name = getattr(stream, 'name', None) or '<stream>'
        self.log = log or default_log
        if extract:
@@ -107,9 +104,9 @@ class DOCX(object):
        try:
            zf = ZipFile(stream)
            zf.extractall(self.tdir)
-        except:
+        except Exception:
            self.log.exception('DOCX appears to be invalid ZIP file, trying a'
-                    ' more forgiving ZIP parser')
+                               ' more forgiving ZIP parser')
            from ebook_converter.utils.localunzip import extractall
            stream.seek(0)
            extractall(stream, self.tdir)
@@ -133,13 +130,17 @@ class DOCX(object):
        try:
            raw = self.read('[Content_Types].xml')
        except KeyError:
-            raise InvalidDOCX('The file %s docx file has no [Content_Types].xml' % self.name)
-        root = fromstring(raw)
+            raise InvalidDOCX('The file %s docx file has no '
+                              '[Content_Types].xml' % self.name)
+        root = etree.fromstring(raw)
        self.content_types = {}
        self.default_content_types = {}
-        for item in root.xpath('//*[local-name()="Types"]/*[local-name()="Default" and @Extension and @ContentType]'):
-            self.default_content_types[item.get('Extension').lower()] = item.get('ContentType')
-        for item in root.xpath('//*[local-name()="Types"]/*[local-name()="Override" and @PartName and @ContentType]'):
+        for item in root.xpath('//*[local-name()="Types"]/*[local-name()='
+                               '"Default" and @Extension and @ContentType]'):
+            self.default_content_types[item.get('Extension').lower()] = \
+                    item.get('ContentType')
+        for item in root.xpath('//*[local-name()="Types"]/*[local-name()='
+                               '"Override" and @PartName and @ContentType]'):
            name = item.get('PartName').lstrip('/')
            self.content_types[name] = item.get('ContentType')

@@ -155,15 +156,19 @@ class DOCX(object):
        try:
            raw = self.read('_rels/.rels')
        except KeyError:
-            raise InvalidDOCX('The file %s docx file has no _rels/.rels' % self.name)
-        root = fromstring(raw)
+            raise InvalidDOCX('The file %s docx file has no _rels/.rels' %
+                              self.name)
+        root = etree.fromstring(raw)
        self.relationships = {}
        self.relationships_rmap = {}
-        for item in root.xpath('//*[local-name()="Relationships"]/*[local-name()="Relationship" and @Type and @Target]'):
+        for item in root.xpath('//*[local-name()="Relationships"]/*[local-name'
+                               '()="Relationship" and @Type and @Target]'):
            target = item.get('Target').lstrip('/')
            typ = item.get('Type')
            if target == 'word/document.xml':
-                self.docx_is_transitional = typ != 'http://purl.oclc.org/ooxml/officeDocument/relationships/officeDocument'
+                self.docx_is_transitional = (typ != 'http://purl.oclc.org/'
+                                             'ooxml/officeDocument/'
+                                             'relationships/officeDocument')
            self.relationships[typ] = target
            self.relationships_rmap[target] = typ

@@ -171,15 +176,17 @@ class DOCX(object):
    def document_name(self):
        name = self.relationships.get(self.namespace.names['DOCUMENT'], None)
        if name is None:
-            names = tuple(n for n in self.names if n == 'document.xml' or n.endswith('/document.xml'))
+            names = tuple(n for n in self.names if n == 'document.xml' or
+                          n.endswith('/document.xml'))
            if not names:
-                raise InvalidDOCX('The file %s docx file has no main document' % self.name)
+                raise InvalidDOCX('The file %s docx file has no main '
+                                  'document' % self.name)
            name = names[0]
        return name

    @property
    def document(self):
-        return fromstring(self.read(self.document_name))
+        return etree.fromstring(self.read(self.document_name))

    @property
    def document_relationships(self):
@@ -195,10 +202,13 @@ class DOCX(object):
        except KeyError:
            pass
        else:
-            root = fromstring(raw)
-            for item in root.xpath('//*[local-name()="Relationships"]/*[local-name()="Relationship" and @Type and @Target]'):
+            root = etree.fromstring(raw)
+            for item in root.xpath('//*[local-name()="Relationships"]/*'
+                                   '[local-name()="Relationship" and @Type '
+                                   'and @Target]'):
                target = item.get('Target')
-                if item.get('TargetMode', None) != 'External' and not target.startswith('#'):
+                if (item.get('TargetMode', None) != 'External' and not
+                        target.startswith('#')):
                    target = '/'.join((base, target.lstrip('/')))
                typ = item.get('Type')
                Id = item.get('Id')
@@ -209,13 +219,15 @@ class DOCX(object):
    def get_document_properties_names(self):
        name = self.relationships.get(self.namespace.names['DOCPROPS'], None)
        if name is None:
-            names = tuple(n for n in self.names if n.lower() == 'docprops/core.xml')
+            names = tuple(n for n in self.names
+                          if n.lower() == 'docprops/core.xml')
            if names:
                name = names[0]
        yield name
        name = self.relationships.get(self.namespace.names['APPPROPS'], None)
        if name is None:
-            names = tuple(n for n in self.names if n.lower() == 'docprops/app.xml')
+            names = tuple(n for n in self.names
+                          if n.lower() == 'docprops/app.xml')
            if names:
                name = names[0]
        yield name
@@ -239,7 +251,8 @@ class DOCX(object):
            else:
                read_default_style_language(raw, mi, self.namespace.XPath)

-        ap_name = self.relationships.get(self.namespace.names['APPPROPS'], None)
+        ap_name = self.relationships.get(self.namespace.names['APPPROPS'],
+                                         None)
        if ap_name:
            try:
                raw = self.read(ap_name)
@@ -1,12 +1,13 @@
 import sys, os, re, math, errno, uuid, numbers
 from collections import OrderedDict, defaultdict

+from lxml import etree
 from lxml import html
 from lxml.html.builder import (
    HTML, HEAD, TITLE, BODY, LINK, META, P, SPAN, BR, DIV, A, DT, DL, DD, H1)

 from ebook_converter import guess_type
-from ebook_converter.ebooks.docx.container import DOCX, fromstring
+from ebook_converter.ebooks.docx.container import DOCX
 from ebook_converter.ebooks.docx.names import XML, generate_anchor
 from ebook_converter.ebooks.docx.styles import Styles, inherit, PageProperties
 from ebook_converter.ebooks.docx.numbering import Numbering
@@ -311,7 +312,7 @@ class Convert(object):
                    raise
                self.log.warn('Settings %s file missing' % sename)
            else:
-                self.settings(fromstring(seraw))
+                self.settings(etree.fromstring(seraw))

        if foname is not None:
            try:
@@ -327,7 +328,7 @@ class Convert(object):
                self.log.warn('Endnotes %s do not exist' % enname)
            else:
                enrel = self.docx.get_relationships(enname)
-        footnotes(fromstring(foraw) if foraw else None, forel, fromstring(enraw) if enraw else None, enrel)
+        footnotes(etree.fromstring(foraw) if foraw else None, forel, etree.fromstring(enraw) if enraw else None, enrel)

        if fname is not None:
            embed_relationships = self.docx.get_relationships(fname)[0]
@@ -336,7 +337,7 @@ class Convert(object):
            except KeyError:
                self.log.warn('Fonts table %s does not exist' % fname)
            else:
-                fonts(fromstring(raw), embed_relationships, self.docx, self.dest_dir)
+                fonts(etree.fromstring(raw), embed_relationships, self.docx, self.dest_dir)

        if tname is not None:
            try:
@@ -344,7 +345,7 @@ class Convert(object):
            except KeyError:
                self.log.warn('Styles %s do not exist' % sname)
            else:
-                self.theme(fromstring(raw))
+                self.theme(etree.fromstring(raw))

        styles_loaded = False
        if sname is not None:
@@ -353,7 +354,7 @@ class Convert(object):
            except KeyError:
                self.log.warn('Styles %s do not exist' % sname)
            else:
-                self.styles(fromstring(raw), fonts, self.theme)
+                self.styles(etree.fromstring(raw), fonts, self.theme)
                styles_loaded = True
        if not styles_loaded:
            self.styles(None, fonts, self.theme)
@@ -364,7 +365,7 @@ class Convert(object):
            except KeyError:
                self.log.warn('Numbering styles %s do not exist' % nname)
            else:
-                numbering(fromstring(raw), self.styles, self.docx.get_relationships(nname)[0])
+                numbering(etree.fromstring(raw), self.styles, self.docx.get_relationships(nname)[0])

        self.styles.resolve_numbering(numbering)

@@ -1,22 +1,19 @@
+import collections
 import re
-from collections import Counter

 from ebook_converter.ebooks.docx.writer.container import create_skeleton, page_size, page_effective_area
-from ebook_converter.ebooks.docx.writer.styles import StylesManager, FloatSpec
-from ebook_converter.ebooks.docx.writer.links import LinksManager
-from ebook_converter.ebooks.docx.writer.images import ImagesManager
 from ebook_converter.ebooks.docx.writer.fonts import FontsManager
-from ebook_converter.ebooks.docx.writer.tables import Table
+from ebook_converter.ebooks.docx.writer.images import ImagesManager
+from ebook_converter.ebooks.docx.writer.links import LinksManager
 from ebook_converter.ebooks.docx.writer.lists import ListsManager
+from ebook_converter.ebooks.docx.writer.styles import StylesManager, FloatSpec
+from ebook_converter.ebooks.docx.writer.tables import Table
+from ebook_converter.ebooks.oeb import base
+from ebook_converter.ebooks.oeb import parse_utils
 from ebook_converter.ebooks.oeb.stylizer import Stylizer as Sz, Style as St
-from ebook_converter.ebooks.oeb.base import XPath, barename
 from ebook_converter.utils.localization import lang_as_iso639_1


-__license__ = 'GPL v3'
-__copyright__ = '2013, Kovid Goyal <kovid at kovidgoyal.net>'
-
-
 def lang_for_tag(tag):
    for attr in ('lang', '{http://www.w3.org/XML/1998/namespace}lang'):
        val = lang_as_iso639_1(tag.get(attr))
@@ -140,7 +137,7 @@ class Block(object):
        self.numbering_id = None
        self.parent_items = None
        self.html_block = html_block
-        self.html_tag = barename(html_block.tag)
+        self.html_tag = parse_utils.barename(html_block.tag)
        self.float_spec = float_spec
        if float_spec is not None:
            float_spec.blocks.append(self)
@@ -387,7 +384,7 @@ class Blocks(object):
    def resolve_language(self):
        default_lang = self.styles_manager.document_lang
        for block in self.all_blocks:
-            count = Counter()
+            count = collections.Counter()
            for run in block.runs:
                count[run.lang] += 1
            if count:
@@ -473,13 +470,13 @@ class Convert(object):
        self.abshref = self.images_manager.abshref = item.abshref

        self.current_lang = lang_for_tag(item.data) or self.styles_manager.document_lang
-        for i, body in enumerate(XPath('//h:body')(item.data)):
+        for i, body in enumerate(base.XPath('//h:body')(item.data)):
            with self.blocks:
                self.blocks.top_bookmark = self.links_manager.bookmark_for_anchor(self.links_manager.top_anchor, self.current_item, body)
                self.process_tag(body, stylizer, is_first_tag=i == 0)

    def process_tag(self, html_tag, stylizer, is_first_tag=False, float_spec=None):
-        tagname = barename(html_tag.tag)
+        tagname = parse_utils.barename(html_tag.tag)
        tag_style = stylizer.style(html_tag)
        ignore_tag_contents = tagname in {'script', 'style', 'title', 'meta'} or tag_style.is_hidden
        display = tag_style._get('display')
@@ -573,7 +570,7 @@ class Convert(object):
            text = html_tag.text
            if text:
                block.add_text(text, tag_style, ignore_leading_whitespace=True, is_parent_style=True, link=self.current_link, lang=self.current_lang)
-            elif tagname == 'li' and len(html_tag) and barename(html_tag[0].tag) in ('ul', 'ol') and len(html_tag[0]):
+            elif tagname == 'li' and len(html_tag) and parse_utils.barename(html_tag[0].tag) in ('ul', 'ol') and len(html_tag[0]):
                block.force_not_empty = True

    def add_inline_tag(self, tagname, html_tag, tag_style, stylizer):