Use the real constants module.

This is progressing refactor of the calibre code to make it more readable, and transform it to something more coherent. In this patch, there are changes regarding imports for some modules, instead of polluting namespace of each module with some other modules symbols, which often were imported from other modules. Yuck.
2020-05-29 17:04:53 +02:00
parent ee4801228f
commit ce89f5c9d1
54 changed files with 2383 additions and 2081 deletions
@@ -3,6 +3,7 @@ Based on ideas from comiclrf created by FangornUK.
 """
 import shutil, textwrap, codecs, os

+from ebook_converter import constants as const
 from ebook_converter.customize.conversion import InputFormatPlugin, OptionRecommendation
 from ebook_converter import CurrentDir
 from ebook_converter.ptempfile import PersistentTemporaryDirectory
@@ -245,7 +246,6 @@ class ComicInput(InputFormatPlugin):
        return os.path.abspath('metadata.opf')

    def create_wrappers(self, pages):
-        from ebook_converter.ebooks.oeb.base import XHTML_NS
        wrappers = []
        WRAPPER = textwrap.dedent('''\
        <html xmlns="%s">
@@ -267,7 +267,8 @@ class ComicInput(InputFormatPlugin):
        ''')
        dir = os.path.dirname(pages[0])
        for i, page in enumerate(pages):
-            wrapper = WRAPPER%(XHTML_NS, i+1, os.path.basename(page), i+1)
+            wrapper = WRAPPER%(const.XHTML_NS, i+1, os.path.basename(page),
+                               i+1)
            page = os.path.join(dir, 'page_%d.xhtml'%(i+1))
            with open(page, 'wb') as f:
                f.write(wrapper.encode('utf-8'))
@@ -275,8 +276,6 @@ class ComicInput(InputFormatPlugin):
        return wrappers

    def create_viewer_wrapper(self, pages):
-        from ebook_converter.ebooks.oeb.base import XHTML_NS
-
        def page(src):
            return '<img src="{}"></img>'.format(os.path.basename(src))

@@ -303,7 +302,7 @@ class ComicInput(InputFormatPlugin):
            %s
            </body>
        </html>
-        ''' % (XHTML_NS, pages)
+        ''' % (const.XHTML_NS, pages)
        path = os.path.join(base, 'wrapper.xhtml')
        with open(path, 'wb') as f:
            f.write(wrapper.encode('utf-8'))
@@ -1,14 +1,22 @@
-from ebook_converter.customize.conversion import OutputFormatPlugin, OptionRecommendation
+import io

+from lxml import etree
+
+from ebook_converter import constants as const
+from ebook_converter.customize import conversion
+from ebook_converter.ebooks.docx.dump import do_dump
+from ebook_converter.ebooks.docx.writer.container import DOCX
+from ebook_converter.ebooks.docx.writer.from_html import Convert
+from ebook_converter.ebooks.metadata import opf2 as opf_meta
+from ebook_converter.ebooks.oeb import base

-__license__ = 'GPL v3'
-__copyright__ = '2013, Kovid Goyal <kovid at kovidgoyal.net>'

 PAGE_SIZES = ['a0', 'a1', 'a2', 'a3', 'a4', 'a5', 'a6', 'b0', 'b1',
              'b2', 'b3', 'b4', 'b5', 'b6', 'legal', 'letter']
+_OPT = conversion.OptionRecommendation


-class DOCXOutput(OutputFormatPlugin):
+class DOCXOutput(conversion.OutputFormatPlugin):

    name = 'DOCX Output'
    author = 'Kovid Goyal'
@@ -16,75 +24,63 @@ class DOCXOutput(OutputFormatPlugin):
    commit_name = 'docx_output'
    ui_data = {'page_sizes': PAGE_SIZES}

-    options = {
-        OptionRecommendation(name='docx_page_size', recommended_value='letter',
-            level=OptionRecommendation.LOW, choices=PAGE_SIZES,
-            help='The size of the page. Default is letter. Choices '
-            'are %s' % PAGE_SIZES),
-
-        OptionRecommendation(name='docx_custom_page_size', recommended_value=None,
-            help='Custom size of the document. Use the form widthxheight '
-            'EG. `123x321` to specify the width and height (in pts). '
-            'This overrides any specified page-size.'),
-
-        OptionRecommendation(name='docx_no_cover', recommended_value=False,
-            help='Do not insert the book cover as an image at the start of the document.'
-                   ' If you use this option, the book cover will be discarded.'),
-
-        OptionRecommendation(name='preserve_cover_aspect_ratio', recommended_value=False,
-            help='Preserve the aspect ratio of the cover image instead of stretching'
-                   ' it out to cover the entire page.'),
-
-        OptionRecommendation(name='docx_no_toc', recommended_value=False,
-            help='Do not insert the table of contents as a page at the start of the document.'),
-
-        OptionRecommendation(name='extract_to',
-            help='Extract the contents of the generated %s file to the '
-                'specified directory. The contents of the directory are first '
-                'deleted, so be careful.' % 'DOCX'),
-
-        OptionRecommendation(name='docx_page_margin_left', recommended_value=72.0,
-            level=OptionRecommendation.LOW,
-            help='The size of the left page margin, in pts. Default is 72pt.'
-                   ' Overrides the common left page margin setting.'
-        ),
-
-        OptionRecommendation(name='docx_page_margin_top', recommended_value=72.0,
-            level=OptionRecommendation.LOW,
-            help='The size of the top page margin, in pts. Default is 72pt.'
-                   ' Overrides the common top page margin setting, unless set to zero.'
-        ),
-
-        OptionRecommendation(name='docx_page_margin_right', recommended_value=72.0,
-            level=OptionRecommendation.LOW,
-            help='The size of the right page margin, in pts. Default is 72pt.'
-                   ' Overrides the common right page margin setting, unless set to zero.'
-        ),
-
-        OptionRecommendation(name='docx_page_margin_bottom', recommended_value=72.0,
-            level=OptionRecommendation.LOW,
-            help='The size of the bottom page margin, in pts. Default is 72pt.'
-                   ' Overrides the common bottom page margin setting, unless set to zero.'
-        ),
-
-    }
+    options = {_OPT(name='docx_page_size', recommended_value='letter',
+                    level=_OPT.LOW, choices=PAGE_SIZES,
+                    help='The size of the page. Default is letter. Choices '
+                    'are %s' % PAGE_SIZES),
+               _OPT(name='docx_custom_page_size', recommended_value=None,
+                    help='Custom size of the document. Use the form '
+                    'widthxheight EG. `123x321` to specify the width and '
+                    'height (in pts). This overrides any specified '
+                    'page-size.'),
+               _OPT(name='docx_no_cover', recommended_value=False,
+                    help='Do not insert the book cover as an image at the '
+                    'start of the document. If you use this option, the book '
+                    'cover will be discarded.'),
+               _OPT(name='preserve_cover_aspect_ratio',
+                    recommended_value=False, help='Preserve the aspect ratio '
+                    'of the cover image instead of stretching it out to cover '
+                    'the entire page.'),
+               _OPT(name='docx_no_toc', recommended_value=False,
+                    help='Do not insert the table of contents as a page at '
+                    'the start of the document.'),
+               _OPT(name='extract_to', help='Extract the contents of the '
+                    'generated DOCX file to the specified directory. The '
+                    'contents of the directory are first deleted, so be '
+                    'careful.'),
+               _OPT(name='docx_page_margin_left', recommended_value=72.0,
+                    level=_OPT.LOW, help='The size of the left page margin, '
+                    'in pts. Default is 72pt. Overrides the common left page '
+                    'margin setting.'),
+               _OPT(name='docx_page_margin_top', recommended_value=72.0,
+                    level=_OPT.LOW, help='The size of the top page margin, '
+                    'in pts. Default is 72pt. Overrides the common top page '
+                    'margin setting, unless set to zero.'),
+               _OPT(name='docx_page_margin_right', recommended_value=72.0,
+                    level=_OPT.LOW, help='The size of the right page margin, '
+                    'in pts. Default is 72pt. Overrides the common right page '
+                    'margin setting, unless set to zero.'),
+               _OPT(name='docx_page_margin_bottom', recommended_value=72.0,
+                    level=_OPT.LOW, help='The size of the bottom page margin, '
+                    'in pts. Default is 72pt. Overrides the common bottom '
+                    'page margin setting, unless set to zero.')}

    def convert_metadata(self, oeb):
-        from lxml import etree
-        from ebook_converter.ebooks.oeb.base import OPF, OPF2_NS
-        from ebook_converter.ebooks.metadata.opf2 import OPF as ReadOPF
-        from io import BytesIO
-        package = etree.Element(OPF('package'), attrib={'version': '2.0'}, nsmap={None: OPF2_NS})
+
+        package = etree.Element(base.tag('opf', 'package'),
+                                attrib={'version': '2.0'},
+                                nsmap={None: const.OPF2_NS})
        oeb.metadata.to_opf2(package)
-        self.mi = ReadOPF(BytesIO(etree.tostring(package, encoding='utf-8')), populate_spine=False, try_to_guess_cover=False).to_book_metadata()
+        self.mi = opf_meta.OPF(io.BytesIO(etree.tostring(package,
+                                                         encoding='utf-8')),
+                               populate_spine=False,
+                               try_to_guess_cover=False).to_book_metadata()

    def convert(self, oeb, output_path, input_plugin, opts, log):
-        from ebook_converter.ebooks.docx.writer.container import DOCX
-        from ebook_converter.ebooks.docx.writer.from_html import Convert
        docx = DOCX(opts, log)
        self.convert_metadata(oeb)
-        Convert(oeb, docx, self.mi, not opts.docx_no_cover, not opts.docx_no_toc)()
+        Convert(oeb, docx, self.mi, not opts.docx_no_cover,
+                not opts.docx_no_toc)()
        docx.write(output_path, self.mi)
        if opts.extract_to:
-            from ebook_converter.ebooks.docx.dump import do_dump
            do_dump(output_path, opts.extract_to)
@@ -1,14 +1,19 @@
-import os, re, posixpath
-from itertools import cycle
+import hashlib
+import itertools
+import os
+import re
+import traceback
+import uuid

-from ebook_converter.customize.conversion import InputFormatPlugin, OptionRecommendation
+from lxml import etree
+
+from ebook_converter.ebooks.metadata import opf2 as opf_meta
+from ebook_converter.ebooks.oeb import base
+from ebook_converter.customize.conversion import InputFormatPlugin
+from ebook_converter.customize.conversion import OptionRecommendation


-__license__ = 'GPL 3'
-__copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>'
-__docformat__ = 'restructuredtext en'
-
-ADOBE_OBFUSCATION =  'http://ns.adobe.com/pdf/enc#RC'
+ADOBE_OBFUSCATION = 'http://ns.adobe.com/pdf/enc#RC'
 IDPF_OBFUSCATION = 'http://www.idpf.org/2008/embedding'


@@ -16,8 +21,8 @@ def decrypt_font_data(key, data, algorithm):
    is_adobe = algorithm == ADOBE_OBFUSCATION
    crypt_len = 1024 if is_adobe else 1040
    crypt = bytearray(data[:crypt_len])
-    key = cycle(iter(bytearray(key)))
-    decrypt = bytes(bytearray(x^next(key) for x in crypt))
+    key = itertools.cycle(iter(bytearray(key)))
+    decrypt = bytes(bytearray(x ^ next(key) for x in crypt))
    return decrypt + data[crypt_len:]


@@ -29,18 +34,16 @@ def decrypt_font(key, path, algorithm):

 class EPUBInput(InputFormatPlugin):

-    name        = 'EPUB Input'
-    author      = 'Kovid Goyal'
+    name = 'EPUB Input'
+    author = 'Kovid Goyal'
    description = 'Convert EPUB files (.epub) to HTML'
-    file_types  = {'epub'}
+    file_types = {'epub'}
    output_encoding = None
    commit_name = 'epub_input'

    recommendations = {('page_breaks_before', '/', OptionRecommendation.MED)}

    def process_encryption(self, encfile, opf, log):
-        from lxml import etree
-        import uuid, hashlib
        idpf_key = opf.raw_unique_identifier
        if idpf_key:
            idpf_key = re.sub('[\u0020\u0009\u000d\u000a]', '', idpf_key)
@@ -56,27 +59,28 @@ class EPUBInput(InputFormatPlugin):
                try:
                    key = item.text.rpartition(':')[-1]
                    key = uuid.UUID(key).bytes
-                except:
-                    import traceback
+                except Exception:
                    traceback.print_exc()
                    key = None

        try:
            root = etree.parse(encfile)
-            for em in root.xpath('descendant::*[contains(name(), "EncryptionMethod")]'):
+            for em in root.xpath('descendant::*[contains(name(), '
+                                 '"EncryptionMethod")]'):
                algorithm = em.get('Algorithm', '')
                if algorithm not in {ADOBE_OBFUSCATION, IDPF_OBFUSCATION}:
                    return False
-                cr = em.getparent().xpath('descendant::*[contains(name(), "CipherReference")]')[0]
+                cr = em.getparent().xpath('descendant::*[contains(name(), '
+                                          '"CipherReference")]')[0]
                uri = cr.get('URI')
-                path = os.path.abspath(os.path.join(os.path.dirname(encfile), '..', *uri.split('/')))
+                path = os.path.abspath(os.path.join(os.path.dirname(encfile),
+                                                    '..', *uri.split('/')))
                tkey = (key if algorithm == ADOBE_OBFUSCATION else idpf_key)
                if (tkey and os.path.exists(path)):
                    self._encrypted_font_uris.append(uri)
                    decrypt_font(tkey, path, algorithm)
            return True
-        except:
-            import traceback
+        except Exception:
            traceback.print_exc()
        return False

@@ -97,8 +101,11 @@ class EPUBInput(InputFormatPlugin):
            return t

    def rationalize_cover3(self, opf, log):
-        ''' If there is a reference to the cover/titlepage via manifest properties, convert to
-        entries in the <guide> so that the rest of the pipeline picks it up. '''
+        """
+        If there is a reference to the cover/titlepage via manifest
+        properties, convert to entries in the <guide> so that the rest of the
+        pipeline picks it up.
+        """
        from ebook_converter.ebooks.metadata.opf3 import items_with_property
        removed = guide_titlepage_href = guide_titlepage_id = None

@@ -128,7 +135,8 @@ class EPUBInput(InputFormatPlugin):
                titlepage_id, titlepage_href = tid, href.partition('#')[0]
                break
        if titlepage_href is None:
-            titlepage_href, titlepage_id = guide_titlepage_href, guide_titlepage_id
+            titlepage_href = guide_titlepage_href
+            titlepage_id = guide_titlepage_id
        if titlepage_href is not None:
            self.set_guide_type(opf, 'titlepage', titlepage_href, 'Title Page')
            spine = list(opf.iterspine())
@@ -148,7 +156,6 @@ class EPUBInput(InputFormatPlugin):
        means, at most one entry with type="cover" that points to a raster
        cover and at most one entry with type="titlepage" that points to an
        HTML titlepage. '''
-        from ebook_converter.ebooks.oeb.base import OPF
        removed = None
        from lxml import etree
        guide_cover, guide_elem = None, None
@@ -160,12 +167,14 @@ class EPUBInput(InputFormatPlugin):
            raster_cover = opf.raster_cover
            if raster_cover:
                if guide_elem is None:
-                    g = opf.root.makeelement(OPF('guide'))
+                    g = opf.root.makeelement(base.tag('opf', 'guide'))
                    opf.root.append(g)
                else:
                    g = guide_elem.getparent()
                guide_cover = raster_cover
-                guide_elem = g.makeelement(OPF('reference'), attrib={'href':raster_cover, 'type':'cover'})
+                guide_elem = g.makeelement(base.tag('opf', 'reference'),
+                                           attrib={'href': raster_cover,
+                                                   'type': 'cover'})
                g.append(guide_elem)
            return
        spine = list(opf.iterspine())
@@ -186,7 +195,8 @@ class EPUBInput(InputFormatPlugin):
        # specially
        if not self.for_viewer:
            if len(spine) == 1:
-                log.warn('There is only a single spine item and it is marked as the cover. Removing cover marking.')
+                log.warn('There is only a single spine item and it is marked '
+                         'as the cover. Removing cover marking.')
                for guide_elem in tuple(opf.iterguide()):
                    if guide_elem.get('type', '').lower() == 'cover':
                        guide_elem.getparent().remove(guide_elem)
@@ -215,8 +225,9 @@ class EPUBInput(InputFormatPlugin):
            # Render the titlepage to create a raster cover
            from ebook_converter.ebooks import render_html_svg_workaround
            guide_elem.set('href', 'calibre_raster_cover.jpg')
-            t = etree.SubElement(
-                elem[0].getparent(), OPF('item'), href=guide_elem.get('href'), id='calibre_raster_cover')
+            t = etree.SubElement(elem[0].getparent(), base.tag('opf', 'item'),
+                                 href=guide_elem.get('href'),
+                                 id='calibre_raster_cover')
            t.set('media-type', 'image/jpeg')
            if os.path.exists(guide_cover):
                renderer = render_html_svg_workaround(guide_cover, log)
@@ -229,17 +240,16 @@ class EPUBInput(InputFormatPlugin):
        return removed

    def find_opf(self):
-        from ebook_converter.utils.xml_parse import safe_xml_fromstring
-
        def attr(n, attr):
            for k, v in n.attrib.items():
                if k.endswith(attr):
                    return v
        try:
            with open('META-INF/container.xml', 'rb') as f:
-                root = safe_xml_fromstring(f.read())
+                root = etree.fromstring(f.read())
                for r in root.xpath('//*[local-name()="rootfile"]'):
-                    if attr(r, 'media-type') != "application/oebps-package+xml":
+                    if (attr(r, 'media-type') !=
+                            "application/oebps-package+xml"):
                        continue
                    path = attr(r, 'full-path')
                    if not path:
@@ -248,20 +258,18 @@ class EPUBInput(InputFormatPlugin):
                    if os.path.exists(path):
                        return path
        except Exception:
-            import traceback
            traceback.print_exc()

    def convert(self, stream, options, file_ext, log, accelerators):
        from ebook_converter.utils.zipfile import ZipFile
        from ebook_converter import walk
        from ebook_converter.ebooks import DRMError
-        from ebook_converter.ebooks.metadata.opf2 import OPF
        try:
            zf = ZipFile(stream)
            zf.extractall(os.getcwd())
-        except:
+        except Exception:
            log.exception('EPUB appears to be invalid ZIP file, trying a'
-                    ' more forgiving ZIP parser')
+                          ' more forgiving ZIP parser')
            from ebook_converter.utils.localunzip import extractall
            stream.seek(0)
            extractall(stream)
@@ -276,11 +284,12 @@ class EPUBInput(InputFormatPlugin):
        path = getattr(stream, 'name', 'stream')

        if opf is None:
-            raise ValueError('%s is not a valid EPUB file (could not find opf)'%path)
+            raise ValueError('%s is not a valid EPUB file (could not find '
+                             'opf)' % path)

        opf = os.path.relpath(opf, os.getcwd())
-        parts = os.path.split(opf)
-        opf = OPF(opf, os.path.dirname(os.path.abspath(opf)))
+        # parts = os.path.split(opf)
+        opf = opf_meta.OPF(opf, os.path.dirname(os.path.abspath(opf)))

        self._encrypted_font_uris = []
        if os.path.exists(encfile):
@@ -288,18 +297,23 @@ class EPUBInput(InputFormatPlugin):
                raise DRMError(os.path.basename(path))
        self.encrypted_fonts = self._encrypted_font_uris

-        if len(parts) > 1 and parts[0]:
-            delta = '/'.join(parts[:-1])+'/'
+        # XXX(gryf): this code would fail pretty ugly, thus, this part was
+        # never used.
+        # if len(parts) > 1 and parts[0]:
+        #    delta = '/'.join(parts[:-1])+'/'

-            def normpath(x):
-                return posixpath.normpath(delta + elem.get('href'))
+        #    def normpath(x):
+        #        return posixpath.normpath(delta + elem.get('href'))

-            for elem in opf.itermanifest():
-                elem.set('href', normpath(elem.get('href')))
-            for elem in opf.iterguide():
-                elem.set('href', normpath(elem.get('href')))
+        #    for elem in opf.itermanifest():
+        #        elem.set('href', normpath(elem.get('href')))
+        #    for elem in opf.iterguide():
+        #        elem.set('href', normpath(elem.get('href')))

-        f = self.rationalize_cover3 if opf.package_version >= 3.0 else self.rationalize_cover2
+        if opf.package_version >= 3.0:
+            f = self.rationalize_cover3
+        else:
+            f = self.rationalize_cover2
        self.removed_cover = f(opf, log)
        if self.removed_cover:
            self.removed_items_to_ignore = (self.removed_cover,)
@@ -352,15 +366,18 @@ class EPUBInput(InputFormatPlugin):
        from lxml import etree
        from ebook_converter.ebooks.chardet import xml_to_unicode
        from ebook_converter.ebooks.oeb.polish.parsing import parse
-        from ebook_converter.ebooks.oeb.base import EPUB_NS, XHTML, NCX_MIME, NCX, urlnormalize, urlunquote, serialize
+        from ebook_converter.ebooks.oeb.base import EPUB_NS, XHTML, NCX_MIME, \
+            NCX, urlnormalize, urlunquote, serialize
        from ebook_converter.ebooks.oeb.polish.toc import first_child
-        from ebook_converter.utils.xml_parse import safe_xml_fromstring
        from tempfile import NamedTemporaryFile
        with open(nav_path, 'rb') as f:
            raw = f.read()
-        raw = xml_to_unicode(raw, strip_encoding_pats=True, assume_utf8=True)[0]
+        raw = xml_to_unicode(raw, strip_encoding_pats=True,
+                             assume_utf8=True)[0]
        root = parse(raw, log=log)
-        ncx = safe_xml_fromstring('<ncx xmlns="http://www.daisy.org/z3986/2005/ncx/" version="2005-1" xml:lang="eng"><navMap/></ncx>')
+        ncx = etree.fromstring('<ncx xmlns="http://www.daisy.org/z3986/2005/'
+                               'ncx/" version="2005-1" xml:lang="eng">'
+                               '<navMap/></ncx>')
        navmap = ncx[0]
        et = '{%s}type' % EPUB_NS
        bn = os.path.basename(nav_path)
@@ -368,8 +385,8 @@ class EPUBInput(InputFormatPlugin):
        def add_from_li(li, parent):
            href = text = None
            for x in li.iterchildren(XHTML('a'), XHTML('span')):
-                text = etree.tostring(
-                    x, method='text', encoding='unicode', with_tail=False).strip() or ' '.join(
+                text = etree.tostring(x, method='text', encoding='unicode',
+                                      with_tail=False).strip() or ' '.join(
                            x.xpath('descendant-or-self::*/@title')).strip()
                href = x.get('href')
                if href:
@@ -382,7 +399,7 @@ class EPUBInput(InputFormatPlugin):
            np[0].append(np.makeelement(NCX('text')))
            np[0][0].text = text
            if href:
-                np.append(np.makeelement(NCX('content'), attrib={'src':href}))
+                np.append(np.makeelement(NCX('content'), attrib={'src': href}))
            return np

        def process_nav_node(node, toc_parent):
@@ -401,20 +418,25 @@ class EPUBInput(InputFormatPlugin):
        else:
            return

-        with NamedTemporaryFile(suffix='.ncx', dir=os.path.dirname(nav_path), delete=False) as f:
+        with NamedTemporaryFile(suffix='.ncx', dir=os.path.dirname(nav_path),
+                                delete=False) as f:
            f.write(etree.tostring(ncx, encoding='utf-8'))
        ncx_href = os.path.relpath(f.name, os.getcwd()).replace(os.sep, '/')
-        ncx_id = opf.create_manifest_item(ncx_href, NCX_MIME, append=True).get('id')
+        ncx_id = opf.create_manifest_item(ncx_href, NCX_MIME,
+                                          append=True).get('id')
        for spine in opf.root.xpath('//*[local-name()="spine"]'):
            spine.set('toc', ncx_id)
-        opts.epub3_nav_href = urlnormalize(os.path.relpath(nav_path).replace(os.sep, '/'))
+        url = os.path.relpath(nav_path).replace(os.sep, '/')
+        opts.epub3_nav_href = urlnormalize(url)
        opts.epub3_nav_parsed = root
        if getattr(self, 'removed_cover', None):
            changed = False
            base_path = os.path.dirname(nav_path)
            for elem in root.xpath('//*[@href]'):
                href, frag = elem.get('href').partition('#')[::2]
-                link_path = os.path.relpath(os.path.join(base_path, urlunquote(href)), base_path)
+                link_path = os.path.relpath(os.path.join(base_path,
+                                                         urlunquote(href)),
+                                            base_path)
                abs_href = urlnormalize(link_path)
                if abs_href == self.removed_cover:
                    changed = True
@@ -2,7 +2,11 @@ import os
 import re
 import shutil
 import urllib.parse
+import uuid

+from ebook_converter import constants as const
+from ebook_converter.ebooks.oeb import base
+from ebook_converter.ebooks.oeb import parse_utils
 from ebook_converter.customize.conversion import OutputFormatPlugin
 from ebook_converter.customize.conversion import OptionRecommendation

@@ -132,39 +136,37 @@ class EPUBOutput(OutputFormatPlugin):
    recommendations = {('pretty_print', True, OptionRecommendation.HIGH)}

    def workaround_webkit_quirks(self):  # {{{
-        from ebook_converter.ebooks.oeb.base import XPath
        for x in self.oeb.spine:
            root = x.data
-            body = XPath('//h:body')(root)
+            body = base.XPath('//h:body')(root)
            if body:
                body = body[0]

            if not hasattr(body, 'xpath'):
                continue

-            for pre in XPath('//h:pre')(body):
+            for pre in base.XPath('//h:pre')(body):
                if not pre.text and len(pre) == 0:
                    pre.tag = 'div'
    # }}}

    def upshift_markup(self):  # {{{
        'Upgrade markup to comply with XHTML 1.1 where possible'
-        from ebook_converter.ebooks.oeb.base import XPath, XML
        for x in self.oeb.spine:
            root = x.data
-            if (not root.get(XML('lang'))) and (root.get('lang')):
-                root.set(XML('lang'), root.get('lang'))
-            body = XPath('//h:body')(root)
+            if (not root.get(base.tag('xml', 'lang'))) and (root.get('lang')):
+                root.set(base.tag('xml', 'lang'), root.get('lang'))
+            body = base.XPath('//h:body')(root)
            if body:
                body = body[0]

            if not hasattr(body, 'xpath'):
                continue
-            for u in XPath('//h:u')(root):
+            for u in base.XPath('//h:u')(root):
                u.tag = 'span'

            seen_ids, seen_names = set(), set()
-            for x in XPath('//*[@id or @name]')(root):
+            for x in base.XPath('//*[@id or @name]')(root):
                eid, name = x.get('id', None), x.get('name', None)
                if eid:
                    if eid in seen_ids:
@@ -223,28 +225,27 @@ class EPUBOutput(OutputFormatPlugin):
            first = next(iter(self.oeb.spine))
            self.oeb.toc.add('Start', first.href)

-        from ebook_converter.ebooks.oeb.base import OPF
        identifiers = oeb.metadata['identifier']
-        uuid = None
+        _uuid = None
        for x in identifiers:
-            if x.get(OPF('scheme'), None).lower() == 'uuid' or str(x).startswith('urn:uuid:'):
-                uuid = str(x).split(':')[-1]
+            if (x.get(base.tag('opf', 'scheme'), None).lower() == 'uuid' or
+                    str(x).startswith('urn:uuid:')):
+                _uuid = str(x).split(':')[-1]
                break
        encrypted_fonts = getattr(input_plugin, 'encrypted_fonts', [])

-        if uuid is None:
+        if _uuid is None:
            self.log.warn('No UUID identifier found')
-            from uuid import uuid4
-            uuid = str(uuid4())
-            oeb.metadata.add('identifier', uuid, scheme='uuid', id=uuid)
+            _uuid = str(uuid.uuid4())
+            oeb.metadata.add('identifier', _uuid, scheme='uuid', id=_uuid)

-        if encrypted_fonts and not uuid.startswith('urn:uuid:'):
+        if encrypted_fonts and not _uuid.startswith('urn:uuid:'):
            # Apparently ADE requires this value to start with urn:uuid:
            # for some absurd reason, or it will throw a hissy fit and refuse
            # to use the obfuscated fonts.
            for x in identifiers:
-                if str(x) == uuid:
-                    x.content = 'urn:uuid:'+uuid
+                if str(x) == _uuid:
+                    x.content = 'urn:uuid:' + _uuid

        with TemporaryDirectory('_epub_output') as tdir:
            from ebook_converter.customize.ui import plugin_for_output_format
@@ -264,7 +265,7 @@ class EPUBOutput(OutputFormatPlugin):
                self.upgrade_to_epub3(tdir, opf)
            encryption = None
            if encrypted_fonts:
-                encryption = self.encrypt_fonts(encrypted_fonts, tdir, uuid)
+                encryption = self.encrypt_fonts(encrypted_fonts, tdir, _uuid)

            from ebook_converter.ebooks.epub import initialize_container
            with initialize_container(output_path, os.path.basename(opf),
@@ -312,12 +313,12 @@ class EPUBOutput(OutputFormatPlugin):
        except EnvironmentError:
            pass

-    def encrypt_fonts(self, uris, tdir, uuid):  # {{{
+    def encrypt_fonts(self, uris, tdir, _uuid):  # {{{
        from ebook_converter.polyglot.binary import from_hex_bytes

-        key = re.sub(r'[^a-fA-F0-9]', '', uuid)
+        key = re.sub(r'[^a-fA-F0-9]', '', _uuid)
        if len(key) < 16:
-            raise ValueError('UUID identifier %r is invalid'%uuid)
+            raise ValueError('UUID identifier %r is invalid'% _uuid)
        key = bytearray(from_hex_bytes((key + key)[:32]))
        paths = []
        with CurrentDir(tdir):
@@ -335,7 +336,8 @@ class EPUBOutput(OutputFormatPlugin):
                    if len(data) >= 1024:
                        data = bytearray(data)
                        f.seek(0)
-                        f.write(bytes(bytearray(data[i] ^ key[i%16] for i in range(1024))))
+                        f.write(bytes(bytearray(data[i] ^ key[i%16]
+                                                for i in range(1024))))
                    else:
                        self.log.warn('Font', path, 'is invalid, ignoring')
                if not isinstance(uri, str):
@@ -374,11 +376,10 @@ class EPUBOutput(OutputFormatPlugin):
    # }}}

    def workaround_ade_quirks(self):  # {{{
-        '''
+        """
        Perform various markup transforms to get the output to render correctly
        in the quirky ADE.
-        '''
-        from ebook_converter.ebooks.oeb.base import XPath, XHTML, barename, urlunquote
+        """

        stylesheet = self.oeb.manifest.main_stylesheet

@@ -388,23 +389,23 @@ class EPUBOutput(OutputFormatPlugin):
        for node in self.oeb.toc.iter():
            href = getattr(node, 'href', None)
            if hasattr(href, 'partition'):
-                base, _, frag = href.partition('#')
-                frag = urlunquote(frag)
+                _base, _, frag = href.partition('#')
+                frag = base.urlunquote(frag)
                if frag and frag_pat.match(frag) is None:
                    self.log.warn(
                            'Removing fragment identifier %r from TOC as Adobe Digital Editions cannot handle it'%frag)
-                    node.href = base
+                    node.href = _base

        for x in self.oeb.spine:
            root = x.data
-            body = XPath('//h:body')(root)
+            body = base.XPath('//h:body')(root)
            if body:
                body = body[0]

            if hasattr(body, 'xpath'):
                # remove <img> tags with empty src elements
                bad = []
-                for x in XPath('//h:img')(body):
+                for x in base.XPath('//h:img')(body):
                    src = x.get('src', '').strip()
                    if src in ('', '#') or src.startswith('http:'):
                        bad.append(x)
@@ -412,7 +413,7 @@ class EPUBOutput(OutputFormatPlugin):
                    img.getparent().remove(img)

                # Add id attribute to <a> tags that have name
-                for x in XPath('//h:a[@name]')(body):
+                for x in base.XPath('//h:a[@name]')(body):
                    if not x.get('id', False):
                        x.set('id', x.get('name'))
                    # The delightful epubcheck has started complaining about <a> tags that
@@ -420,19 +421,19 @@ class EPUBOutput(OutputFormatPlugin):
                    x.attrib.pop('name')

                # Replace <br> that are children of <body> as ADE doesn't handle them
-                for br in XPath('./h:br')(body):
+                for br in base.XPath('./h:br')(body):
                    if br.getparent() is None:
                        continue
                    try:
                        prior = next(br.itersiblings(preceding=True))
-                        priortag = barename(prior.tag)
+                        priortag = parse_utils.barename(prior.tag)
                        priortext = prior.tail
                    except:
                        priortag = 'body'
                        priortext = body.text
                    if priortext:
                        priortext = priortext.strip()
-                    br.tag = XHTML('p')
+                    br.tag = base.tag('xhtml', 'p')
                    br.text = '\u00a0'
                    style = br.get('style', '').split(';')
                    style = list(filter(None, map(lambda x: x.strip(), style)))
@@ -446,44 +447,44 @@ class EPUBOutput(OutputFormatPlugin):
                        style.append('height:0pt')
                    br.set('style', '; '.join(style))

-            for tag in XPath('//h:embed')(root):
+            for tag in base.XPath('//h:embed')(root):
                tag.getparent().remove(tag)
-            for tag in XPath('//h:object')(root):
+            for tag in base.XPath('//h:object')(root):
                if tag.get('type', '').lower().strip() in {'image/svg+xml', 'application/svg+xml'}:
                    continue
                tag.getparent().remove(tag)

-            for tag in XPath('//h:title|//h:style')(root):
+            for tag in base.XPath('//h:title|//h:style')(root):
                if not tag.text:
                    tag.getparent().remove(tag)
-            for tag in XPath('//h:script')(root):
+            for tag in base.XPath('//h:script')(root):
                if (not tag.text and not tag.get('src', False) and tag.get('type', None) != 'text/x-mathjax-config'):
                    tag.getparent().remove(tag)
-            for tag in XPath('//h:body/descendant::h:script')(root):
+            for tag in base.XPath('//h:body/descendant::h:script')(root):
                tag.getparent().remove(tag)

-            formchildren = XPath('./h:input|./h:button|./h:textarea|'
+            formchildren = base.XPath('./h:input|./h:button|./h:textarea|'
                    './h:label|./h:fieldset|./h:legend')
-            for tag in XPath('//h:form')(root):
+            for tag in base.XPath('//h:form')(root):
                if formchildren(tag):
                    tag.getparent().remove(tag)
                else:
                    # Not a real form
-                    tag.tag = XHTML('div')
+                    tag.tag = base.tag('xhtml', 'div')

-            for tag in XPath('//h:center')(root):
-                tag.tag = XHTML('div')
+            for tag in base.XPath('//h:center')(root):
+                tag.tag = base.tag('xhtml', 'div')
                tag.set('style', 'text-align:center')
            # ADE can't handle &amp; in an img url
-            for tag in XPath('//h:img[@src]')(root):
+            for tag in base.XPath('//h:img[@src]')(root):
                tag.set('src', tag.get('src', '').replace('&', ''))

            # ADE whimpers in fright when it encounters a <td> outside a
            # <table>
-            in_table = XPath('ancestor::h:table')
-            for tag in XPath('//h:td|//h:tr|//h:th')(root):
+            in_table = base.XPath('ancestor::h:table')
+            for tag in base.XPath('//h:td|//h:tr|//h:th')(root):
                if not in_table(tag):
-                    tag.tag = XHTML('div')
+                    tag.tag = base.tag('xhtml', 'div')

            # ADE fails to render non breaking hyphens/soft hyphens/zero width spaces
            special_chars = re.compile('[\u200b\u00ad]')
@@ -498,7 +499,7 @@ class EPUBOutput(OutputFormatPlugin):
            if stylesheet is not None:
                # ADE doesn't render lists correctly if they have left margins
                from css_parser.css import CSSRule
-                for lb in XPath('//h:ul[@class]|//h:ol[@class]')(root):
+                for lb in base.XPath('//h:ul[@class]|//h:ol[@class]')(root):
                    sel = '.'+lb.get('class')
                    for rule in stylesheet.data.cssRules.rulesOfType(CSSRule.STYLE_RULE):
                        if sel == rule.selectorList.selectorText:
@@ -519,11 +520,10 @@ class EPUBOutput(OutputFormatPlugin):
        '''
        Perform toc link transforms to alleviate slow loading.
        '''
-        from ebook_converter.ebooks.oeb.base import XPath
        from ebook_converter.ebooks.oeb.polish.toc import item_at_top

        def frag_is_at_top(root, frag):
-            elem = XPath('//*[@id="%s" or @name="%s"]'%(frag, frag))(root)
+            elem = base.XPath('//*[@id="%s" or @name="%s"]'%(frag, frag))(root)
            if elem:
                elem = elem[0]
            else:
@@ -1,59 +1,57 @@
 """
 Convert .fb2 files to .lrf
 """
-import os, re
+import os
 import pkg_resources
+import re

-from ebook_converter.customize.conversion import InputFormatPlugin, OptionRecommendation
+from lxml import etree
+
+from ebook_converter import constants as const
+from ebook_converter.customize.conversion import InputFormatPlugin
+from ebook_converter.customize.conversion import OptionRecommendation
 from ebook_converter import guess_type


-__license__ = 'GPL v3'
-__copyright__ = '2008, Anatoly Shipitsin <norguhtar at gmail.com>'
-
-FB2NS  = 'http://www.gribuser.ru/xml/fictionbook/2.0'
+FB2NS = 'http://www.gribuser.ru/xml/fictionbook/2.0'
 FB21NS = 'http://www.gribuser.ru/xml/fictionbook/2.1'


 class FB2Input(InputFormatPlugin):

-    name        = 'FB2 Input'
-    author      = 'Anatoly Shipitsin'
+    name = 'FB2 Input'
+    author = 'Anatoly Shipitsin'
    description = 'Convert FB2 and FBZ files to HTML'
-    file_types  = {'fb2', 'fbz'}
+    file_types = {'fb2', 'fbz'}
    commit_name = 'fb2_input'

-    recommendations = {
-        ('level1_toc', '//h:h1', OptionRecommendation.MED),
-        ('level2_toc', '//h:h2', OptionRecommendation.MED),
-        ('level3_toc', '//h:h3', OptionRecommendation.MED),
-        }
+    recommendations = {('level1_toc', '//h:h1', OptionRecommendation.MED),
+                       ('level2_toc', '//h:h2', OptionRecommendation.MED),
+                       ('level3_toc', '//h:h3', OptionRecommendation.MED)}

-    options = {
-    OptionRecommendation(name='no_inline_fb2_toc',
-        recommended_value=False, level=OptionRecommendation.LOW,
-        help='Do not insert a Table of Contents at the beginning of the book.'
-        )}
+    options = {OptionRecommendation(name='no_inline_fb2_toc',
+                                    recommended_value=False,
+                                    level=OptionRecommendation.LOW,
+                                    help='Do not insert a Table of Contents '
+                                    'at the beginning of the book.')}

    def convert(self, stream, options, file_ext, log,
                accelerators):
-        from lxml import etree
-        from ebook_converter.utils.xml_parse import safe_xml_fromstring
-        from ebook_converter.ebooks.metadata.fb2 import ensure_namespace, get_fb2_data
+        from ebook_converter.ebooks.metadata.fb2 import ensure_namespace
+        from ebook_converter.ebooks.metadata.fb2 import get_fb2_data
        from ebook_converter.ebooks.metadata.opf2 import OPFCreator
        from ebook_converter.ebooks.metadata.meta import get_metadata
-        from ebook_converter.ebooks.oeb.base import XLINK_NS, XHTML_NS
        from ebook_converter.ebooks.chardet import xml_to_unicode
        self.log = log
        log.debug('Parsing XML...')
        raw = get_fb2_data(stream)[0]
        raw = raw.replace(b'\0', b'')
        raw = xml_to_unicode(raw, strip_encoding_pats=True,
-            assume_utf8=True, resolve_entities=True)[0]
+                             assume_utf8=True, resolve_entities=True)[0]
        try:
-            doc = safe_xml_fromstring(raw)
+            doc = etree.fromstring(raw)
        except etree.XMLSyntaxError:
-            doc = safe_xml_fromstring(raw.replace('& ', '&amp;'))
+            doc = etree.fromstring(raw.replace('& ', '&amp;'))
        if doc is None:
            raise ValueError('The FB2 file is not valid XML')
        doc = ensure_namespace(doc)
@@ -62,22 +60,24 @@ class FB2Input(InputFormatPlugin):
        except Exception:
            fb_ns = FB2NS

-        NAMESPACES = {'f':fb_ns, 'l':XLINK_NS}
-        stylesheets = doc.xpath('//*[local-name() = "stylesheet" and @type="text/css"]')
+        NAMESPACES = {'f': fb_ns, 'l': const.XLINK_NS}
+        stylesheets = doc.xpath('//*[local-name() = "stylesheet" and '
+                                '@type="text/css"]')
        css = ''
        for s in stylesheets:
            css += etree.tostring(s, encoding='unicode', method='text',
-                    with_tail=False) + '\n\n'
+                                  with_tail=False) + '\n\n'
        if css:
-            import css_parser, logging
+            import css_parser
+            import logging
            parser = css_parser.CSSParser(fetcher=None,
-                    log=logging.getLogger('calibre.css'))
+                                          log=logging.getLogger('calibre.css'))

-            XHTML_CSS_NAMESPACE = '@namespace "%s";\n' % XHTML_NS
+            XHTML_CSS_NAMESPACE = '@namespace "%s";\n' % const.XHTML_NS
            text = XHTML_CSS_NAMESPACE + css
            log.debug('Parsing stylesheet...')
            stylesheet = parser.parseString(text)
-            stylesheet.namespaces['h'] = XHTML_NS
+            stylesheet.namespaces['h'] = const.XHTML_NS
            css = stylesheet.cssText
            if isinstance(css, bytes):
                css = css.decode('utf-8', 'replace')
@@ -92,16 +92,20 @@ class FB2Input(InputFormatPlugin):
        if options.no_inline_fb2_toc:
            log('Disabling generation of inline FB2 TOC')
            ss = re.compile(r'<!-- BUILD TOC -->.*<!-- END BUILD TOC -->',
-                    re.DOTALL).sub('', ss)
+                            re.DOTALL).sub('', ss)

-        styledoc = safe_xml_fromstring(ss)
+        styledoc = etree.fromstring(ss)

        transform = etree.XSLT(styledoc)
        result = transform(doc)

        # Handle links of type note and cite
-        notes = {a.get('href')[1:]: a for a in result.xpath('//a[@link_note and @href]') if a.get('href').startswith('#')}
-        cites = {a.get('link_cite'): a for a in result.xpath('//a[@link_cite]') if not a.get('href', '')}
+        notes = {a.get('href')[1:]: a
+                 for a in result.xpath('//a[@link_note and @href]')
+                 if a.get('href').startswith('#')}
+        cites = {a.get('link_cite'): a
+                 for a in result.xpath('//a[@link_cite]')
+                 if not a.get('href', '')}
        all_ids = {x for x in result.xpath('//*/@id')}
        for cite, a in cites.items():
            note = notes.get(cite, None)
@@ -137,8 +141,10 @@ class FB2Input(InputFormatPlugin):
                f.write(mi.cover_data[1])
            cpath = os.path.abspath('fb2_cover_calibre_mi.jpg')
        else:
-            for img in doc.xpath('//f:coverpage/f:image', namespaces=NAMESPACES):
-                href = img.get('{%s}href'%XLINK_NS, img.get('href', None))
+            for img in doc.xpath('//f:coverpage/f:image',
+                                 namespaces=NAMESPACES):
+                href = img.get('{%s}href' % const.XLINK_NS,
+                               img.get('href', None))
                if href is not None:
                    if href.startswith('#'):
                        href = href[1:]
@@ -165,15 +171,15 @@ class FB2Input(InputFormatPlugin):
                ext = ct.rpartition('/')[-1].lower()
                if ext in ('png', 'jpeg', 'jpg'):
                    if fname.lower().rpartition('.')[-1] not in {'jpg', 'jpeg',
-                            'png'}:
+                                                                 'png'}:
                        fname += '.' + ext
                    self.binary_map[elem.get('id')] = fname
                raw = elem.text.strip()
                try:
                    data = base64_decode(raw)
                except TypeError:
-                    self.log.exception('Binary data with id=%s is corrupted, ignoring'%(
-                        elem.get('id')))
+                    self.log.exception('Binary data with id=%s is corrupted, '
+                                       'ignoring' % elem.get('id'))
                else:
                    with open(fname, 'wb') as f:
                        f.write(data)
@@ -1,17 +1,17 @@
+import copy
+
+from lxml import etree
+
+from ebook_converter import constants as const
 from ebook_converter.customize.conversion import InputFormatPlugin


-__license__ = 'GPL v3'
-__copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>'
-__docformat__ = 'restructuredtext en'
-
-
 class LITInput(InputFormatPlugin):

-    name        = 'LIT Input'
-    author      = 'Marshall T. Vandegrift'
+    name = 'LIT Input'
+    author = 'Marshall T. Vandegrift'
    description = 'Convert LIT files to HTML'
-    file_types  = {'lit'}
+    file_types = {'lit'}
    commit_name = 'lit_input'

    def convert(self, stream, options, file_ext, log,
@@ -22,7 +22,7 @@ class LITInput(InputFormatPlugin):
        return create_oebbook(log, stream, options, reader=LitReader)

    def postprocess_book(self, oeb, opts, log):
-        from ebook_converter.ebooks.oeb.base import XHTML_NS, XPath, XHTML
+        from ebook_converter.ebooks.oeb.base import XPath, XHTML
        for item in oeb.spine:
            root = item.data
            if not hasattr(root, 'xpath'):
@@ -37,22 +37,23 @@ class LITInput(InputFormatPlugin):
                body = body[0]
                if len(body) == 1 and body[0].tag == XHTML('pre'):
                    pre = body[0]
-                    from ebook_converter.ebooks.txt.processor import convert_basic, \
-                        separate_paragraphs_single_line
+                    from ebook_converter.ebooks.txt.processor import \
+                        convert_basic, separate_paragraphs_single_line
                    from ebook_converter.ebooks.chardet import xml_to_unicode
-                    from ebook_converter.utils.xml_parse import safe_xml_fromstring
-                    import copy
-                    self.log('LIT file with all text in singe <pre> tag detected')
+                    self.log('LIT file with all text in singe <pre> tag '
+                             'detected')
                    html = separate_paragraphs_single_line(pre.text)
                    html = convert_basic(html).replace('<html>',
-                            '<html xmlns="%s">'%XHTML_NS)
+                                                       '<html xmlns="%s">' %
+                                                       const.XHTML_NS)
                    html = xml_to_unicode(html, strip_encoding_pats=True,
-                            resolve_entities=True)[0]
+                                          resolve_entities=True)[0]
                    if opts.smarten_punctuation:
                        # SmartyPants skips text inside <pre> tags
-                        from ebook_converter.ebooks.conversion.preprocess import smarten_punctuation
-                        html = smarten_punctuation(html, self.log)
-                    root = safe_xml_fromstring(html)
+                        from ebook_converter.ebooks.conversion import \
+                                preprocess
+                        html = preprocess.smarten_punctuation(html, self.log)
+                    root = etree.fromstring(html)
                    body = XPath('//h:body')(root)
                    pre.tag = XHTML('div')
                    pre.text = ''
@@ -1,54 +1,52 @@
-import os, sys
+import os
+import sys
 import pkg_resources

+from lxml import etree
+
 from ebook_converter.customize.conversion import InputFormatPlugin


-__license__ = 'GPL v3'
-__copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>'
-__docformat__ = 'restructuredtext en'
-
-
 class LRFInput(InputFormatPlugin):

-    name        = 'LRF Input'
-    author      = 'Kovid Goyal'
+    name = 'LRF Input'
+    author = 'Kovid Goyal'
    description = 'Convert LRF files to HTML'
-    file_types  = {'lrf'}
+    file_types = {'lrf'}
    commit_name = 'lrf_input'

    def convert(self, stream, options, file_ext, log,
                accelerators):
-        from ebook_converter.ebooks.lrf.input import (MediaType, Styles, TextBlock,
-                Canvas, ImageBlock, RuledLine)
+        from ebook_converter.ebooks.lrf.input import MediaType, Styles, \
+                TextBlock, Canvas, ImageBlock, RuledLine
        self.log = log
        self.log('Generating XML')
        from ebook_converter.ebooks.lrf.lrfparser import LRFDocument
-        from ebook_converter.utils.xml_parse import safe_xml_fromstring
-        from lxml import etree
        d = LRFDocument(stream)
        d.parse()
        xml = d.to_xml(write_files=True)
        if options.verbose > 2:
            open(u'lrs.xml', 'wb').write(xml.encode('utf-8'))
-        doc = safe_xml_fromstring(xml)
+        doc = etree.fromstring(xml)

        char_button_map = {}
        for x in doc.xpath('//CharButton[@refobj]'):
            ro = x.get('refobj')
-            jump_button = doc.xpath('//*[@objid="%s"]'%ro)
+            jump_button = doc.xpath('//*[@objid="%s"]' % ro)
            if jump_button:
-                jump_to = jump_button[0].xpath('descendant::JumpTo[@refpage and @refobj]')
+                jump_to = jump_button[0].xpath('descendant::JumpTo[@refpage '
+                                               'and @refobj]')
                if jump_to:
-                    char_button_map[ro] = '%s.xhtml#%s'%(jump_to[0].get('refpage'),
-                            jump_to[0].get('refobj'))
+                    char_button_map[ro] = ('%s.xhtml#%s' %
+                                           (jump_to[0].get('refpage'),
+                                            jump_to[0].get('refobj')))
        plot_map = {}
        for x in doc.xpath('//Plot[@refobj]'):
            ro = x.get('refobj')
-            image = doc.xpath('//Image[@objid="%s" and @refstream]'%ro)
+            image = doc.xpath('//Image[@objid="%s" and @refstream]' % ro)
            if image:
-                imgstr = doc.xpath('//ImageStream[@objid="%s" and @file]'%
-                    image[0].get('refstream'))
+                imgstr = doc.xpath('//ImageStream[@objid="%s" and @file]' %
+                                   image[0].get('refstream'))
                if imgstr:
                    plot_map[ro] = imgstr[0].get('file')

@@ -58,21 +56,19 @@ class LRFInput(InputFormatPlugin):
                  resource_filename('ebook_converter',
                                    'data/lrf.xsl')) as fobj:
            # TODO(gryf): change this nonsense to etree.parse() instead.
-            styledoc = safe_xml_fromstring(fobj.read())
+            styledoc = etree.fromstring(fobj.read())
        media_type = MediaType()
        styles = Styles()
        text_block = TextBlock(styles, char_button_map, plot_map, log)
        canvas = Canvas(doc, styles, text_block, log)
        image_block = ImageBlock(canvas)
        ruled_line = RuledLine()
-        extensions = {
-                ('calibre', 'media-type') : media_type,
-                ('calibre', 'text-block') : text_block,
-                ('calibre', 'ruled-line') : ruled_line,
-                ('calibre', 'styles')     : styles,
-                ('calibre', 'canvas')     : canvas,
-                ('calibre', 'image-block'): image_block,
-                }
+        extensions = {('calibre', 'media-type'): media_type,
+                      ('calibre', 'text-block'): text_block,
+                      ('calibre', 'ruled-line'): ruled_line,
+                      ('calibre', 'styles'): styles,
+                      ('calibre', 'canvas'): canvas,
+                      ('calibre', 'image-block'): image_block}
        transform = etree.XSLT(styledoc, extensions=extensions)
        try:
            result = transform(doc)
@@ -1,57 +1,58 @@
-import os, glob, re, textwrap
+import glob
+import os
 import pkg_resources
+import re
+import textwrap

-from ebook_converter.customize.conversion import InputFormatPlugin, OptionRecommendation
+from lxml import etree
+
+from ebook_converter.customize.conversion import InputFormatPlugin
+from ebook_converter.customize.conversion import OptionRecommendation
 from ebook_converter.polyglot.builtins import as_bytes

-__license__ = 'GPL v3'
-__copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'

-border_style_map = {
-        'single' : 'solid',
-        'double-thickness-border' : 'double',
-        'shadowed-border': 'outset',
-        'double-border': 'double',
-        'dotted-border': 'dotted',
-        'dashed': 'dashed',
-        'hairline': 'solid',
-        'inset': 'inset',
-        'dash-small': 'dashed',
-        'dot-dash': 'dotted',
-        'dot-dot-dash': 'dotted',
-        'outset': 'outset',
-        'tripple': 'double',
-        'triple': 'double',
-        'thick-thin-small': 'solid',
-        'thin-thick-small': 'solid',
-        'thin-thick-thin-small': 'solid',
-        'thick-thin-medium': 'solid',
-        'thin-thick-medium': 'solid',
-        'thin-thick-thin-medium': 'solid',
-        'thick-thin-large': 'solid',
-        'thin-thick-thin-large': 'solid',
-        'wavy': 'ridge',
-        'double-wavy': 'ridge',
-        'striped': 'ridge',
-        'emboss': 'inset',
-        'engrave': 'inset',
-        'frame': 'ridge',
-}
+border_style_map = {'single': 'solid',
+                    'double-thickness-border': 'double',
+                    'shadowed-border': 'outset',
+                    'double-border': 'double',
+                    'dotted-border': 'dotted',
+                    'dashed': 'dashed',
+                    'hairline': 'solid',
+                    'inset': 'inset',
+                    'dash-small': 'dashed',
+                    'dot-dash': 'dotted',
+                    'dot-dot-dash': 'dotted',
+                    'outset': 'outset',
+                    'tripple': 'double',
+                    'triple': 'double',
+                    'thick-thin-small': 'solid',
+                    'thin-thick-small': 'solid',
+                    'thin-thick-thin-small': 'solid',
+                    'thick-thin-medium': 'solid',
+                    'thin-thick-medium': 'solid',
+                    'thin-thick-thin-medium': 'solid',
+                    'thick-thin-large': 'solid',
+                    'thin-thick-thin-large': 'solid',
+                    'wavy': 'ridge',
+                    'double-wavy': 'ridge',
+                    'striped': 'ridge',
+                    'emboss': 'inset',
+                    'engrave': 'inset',
+                    'frame': 'ridge'}


 class RTFInput(InputFormatPlugin):

-    name        = 'RTF Input'
-    author      = 'Kovid Goyal'
+    name = 'RTF Input'
+    author = 'Kovid Goyal'
    description = 'Convert RTF files to HTML'
-    file_types  = {'rtf'}
+    file_types = {'rtf'}
    commit_name = 'rtf_input'

-    options = {
-        OptionRecommendation(name='ignore_wmf', recommended_value=False,
-            help='Ignore WMF images instead of replacing them with a '
-                 'placeholder image.'),
-    }
+    options = {OptionRecommendation(name='ignore_wmf', recommended_value=False,
+                                    help='Ignore WMF images instead of '
+                                    'replacing them with a placeholder '
+                                    'image.')}

    def generate_xml(self, stream):
        from ebook_converter.ebooks.rtf2xml.ParseRtf import ParseRtf
@@ -64,7 +65,7 @@ class RTFInput(InputFormatPlugin):
                run_lev = 4
                indent_out = 1
                self.log('Running RTFParser in debug mode')
-            except:
+            except Exception:
                self.log.warn('Impossible to run RTFParser in debug mode')
        parser = ParseRtf(
            in_file=stream,
@@ -108,7 +109,8 @@ class RTFInput(InputFormatPlugin):
            deb_dir=debug_dir,

            # Default encoding
-            default_encoding=getattr(self.opts, 'input_encoding', 'cp1252') or 'cp1252',
+            default_encoding=getattr(self.opts, 'input_encoding',
+                                     'cp1252') or 'cp1252',

            # Run level
            run_level=run_lev,
@@ -151,7 +153,7 @@ class RTFInput(InputFormatPlugin):
        for count, val in imap.items():
            try:
                imap[count] = self.convert_image(val)
-            except:
+            except Exception:
                self.log.exception('Failed to convert', val)
        return imap

@@ -161,7 +163,7 @@ class RTFInput(InputFormatPlugin):
        try:
            return self.rasterize_wmf(name)
        except Exception:
-            self.log.exception('Failed to convert WMF image %r'%name)
+            self.log.exception('Failed to convert WMF image %r' % name)
        return self.replace_wmf(name)

    def replace_wmf(self, name):
@@ -170,9 +172,11 @@ class RTFInput(InputFormatPlugin):
            return '__REMOVE_ME__'
        from ebook_converter.ebooks.covers import message_image
        if self.default_img is None:
-            self.default_img = message_image('Conversion of WMF images is not supported.'
-            ' Use Microsoft Word or OpenOffice to save this RTF file'
-            ' as HTML and convert that in calibre.')
+            self.default_img = message_image('Conversion of WMF images is not '
+                                             'supported. Use Microsoft Word '
+                                             'or OpenOffice to save this RTF '
+                                             'file as HTML and convert that '
+                                             'in calibre.')
        name = name.replace('.wmf', '.jpg')
        with open(name, 'wb') as f:
            f.write(self.default_img)
@@ -189,10 +193,10 @@ class RTFInput(InputFormatPlugin):
        return name

    def write_inline_css(self, ic, border_styles):
-        font_size_classes = ['span.fs%d { font-size: %spt }'%(i, x) for i, x in
-                enumerate(ic.font_sizes)]
-        color_classes = ['span.col%d { color: %s }'%(i, x) for i, x in
-                enumerate(ic.colors) if x != 'false']
+        font_size_classes = ['span.fs%d { font-size: %spt }' % (i, x)
+                             for i, x in enumerate(ic.font_sizes)]
+        color_classes = ['span.col%d { color: %s }' % (i, x)
+                         for i, x in enumerate(ic.colors) if x != 'false']
        css = textwrap.dedent('''
        span.none {
            text-decoration: none; font-weight: normal;
@@ -210,11 +214,11 @@ class RTFInput(InputFormatPlugin):
        span.strike-through { text-decoration: line-through }

        ''')
-        css += '\n'+'\n'.join(font_size_classes)
-        css += '\n' +'\n'.join(color_classes)
+        css += '\n' + '\n'.join(font_size_classes)
+        css += '\n' + '\n'.join(color_classes)

        for cls, val in border_styles.items():
-            css += '\n\n.%s {\n%s\n}'%(cls, val)
+            css += '\n\n.%s {\n%s\n}' % (cls, val)

        with open(u'styles.css', 'ab') as f:
            f.write(css.encode('utf-8'))
@@ -224,35 +228,34 @@ class RTFInput(InputFormatPlugin):
        style_map = {}
        for elem in doc.xpath(r'//*[local-name()="cell"]'):
            style = ['border-style: hidden', 'border-width: 1px',
-                    'border-color: black']
+                     'border-color: black']
            for x in ('bottom', 'top', 'left', 'right'):
-                bs = elem.get('border-cell-%s-style'%x, None)
+                bs = elem.get('border-cell-%s-style' % x, None)
                if bs:
                    cbs = border_style_map.get(bs, 'solid')
-                    style.append('border-%s-style: %s'%(x, cbs))
-                bw = elem.get('border-cell-%s-line-width'%x, None)
+                    style.append('border-%s-style: %s' % (x, cbs))
+                bw = elem.get('border-cell-%s-line-width' % x, None)
                if bw:
-                    style.append('border-%s-width: %spt'%(x, bw))
-                bc = elem.get('border-cell-%s-color'%x, None)
+                    style.append('border-%s-width: %spt' % (x, bw))
+                bc = elem.get('border-cell-%s-color' % x, None)
                if bc:
-                    style.append('border-%s-color: %s'%(x, bc))
+                    style.append('border-%s-color: %s' % (x, bc))
            style = ';\n'.join(style)
            if style not in border_styles:
                border_styles.append(style)
            idx = border_styles.index(style)
-            cls = 'border_style%d'%idx
+            cls = 'border_style%d' % idx
            style_map[cls] = style
            elem.set('class', cls)
        return style_map

    def convert(self, stream, options, file_ext, log,
                accelerators):
-        from lxml import etree
        from ebook_converter.ebooks.metadata.meta import get_metadata
        from ebook_converter.ebooks.metadata.opf2 import OPFCreator
-        from ebook_converter.ebooks.rtf2xml.ParseRtf import RtfInvalidCodeException
+        from ebook_converter.ebooks.rtf2xml.ParseRtf import \
+            RtfInvalidCodeException
        from ebook_converter.ebooks.rtf.input import InlineClass
-        from ebook_converter.utils.xml_parse import safe_xml_fromstring
        self.opts = options
        self.log = log
        self.log('Converting RTF to XML...')
@@ -269,14 +272,15 @@ class RTFInput(InputFormatPlugin):
            imap = {}
            try:
                imap = self.extract_images(d[0])
-            except:
+            except Exception:
                self.log.exception('Failed to extract images...')

        self.log('Parsing XML...')
-        doc = safe_xml_fromstring(xml)
+        doc = etree.fromstring(xml)
        border_styles = self.convert_borders(doc)
        for pict in doc.xpath('//rtf:pict[@num]',
-                namespaces={'rtf':'http://rtf2xml.sourceforge.net/'}):
+                              namespaces={'rtf':
+                                          'http://rtf2xml.sourceforge.net/'}):
            num = int(pict.get('num'))
            name = imap.get(num, None)
            if name is not None:
@@ -286,8 +290,8 @@ class RTFInput(InputFormatPlugin):
        inline_class = InlineClass(self.log)
        with open(pkg_resources.resource_filename('ebook_converter',
                                                  'data/rtf.xsl')) as fobj:
-            styledoc = safe_xml_fromstring(fobj.read())
-        extensions = {('calibre', 'inline-class') : inline_class}
+            styledoc = etree.fromstring(fobj.read())
+        extensions = {('calibre', 'inline-class'): inline_class}
        transform = etree.XSLT(styledoc, extensions=extensions)
        result = transform(doc)
        html = u'index.xhtml'
@@ -296,7 +300,8 @@ class RTFInput(InputFormatPlugin):
            # res = res[:100].replace('xmlns:html', 'xmlns') + res[100:]
            # clean multiple \n
            res = re.sub(b'\n+', b'\n', res)
-            # Replace newlines inserted by the 'empty_paragraphs' option in rtf2xml with html blank lines
+            # Replace newlines inserted by the 'empty_paragraphs' option in
+            # rtf2xml with html blank lines
            # res = re.sub('\s*<body>', '<body>', res)
            # res = re.sub('(?<=\n)\n{2}',
            # u'<p>\u00a0</p>\n'.encode('utf-8'), res)
@@ -316,7 +321,8 @@ class RTFInput(InputFormatPlugin):

    def postprocess_book(self, oeb, opts, log):
        for item in oeb.spine:
-            for img in item.data.xpath('//*[local-name()="img" and @src="__REMOVE_ME__"]'):
+            for img in item.data.xpath('//*[local-name()="img" and '
+                                       '@src="__REMOVE_ME__"]'):
                p = img.getparent()
                idx = p.index(img)
                p.remove(img)
@@ -1,27 +1,33 @@
 import os

+from lxml import etree
+
 from ebook_converter.customize.conversion import InputFormatPlugin
 from ebook_converter.ptempfile import TemporaryDirectory
 from ebook_converter.utils.filenames import ascii_filename


-__license__ = 'GPL 3'
-__copyright__ = '2010, Li Fanxi <lifanxi@freemindworld.com>'
-__docformat__ = 'restructuredtext en'
-
-HTML_TEMPLATE = '<html><head><meta http-equiv="Content-Type" content="text/html; charset=utf-8"/><title>%s</title></head><body>\n%s\n</body></html>'
+HTML_TEMPLATE = ('<html><head><meta http-equiv="Content-Type" '
+                 'content="text/html; charset=utf-8"/><title>%s</title>'
+                 '</head><body>\n%s\n</body></html>')


 def html_encode(s):
-    return s.replace('&', '&amp;').replace('<', '&lt;').replace('>', '&gt;').replace('"', '&quot;').replace("'", '&apos;').replace('\n', '<br/>').replace(' ', '&nbsp;')  # noqa
+    return (s.replace('&', '&amp;')
+            .replace('<', '&lt;')
+            .replace('>', '&gt;')
+            .replace('"', '&quot;')
+            .replace("'", '&apos;')
+            .replace('\n', '<br/>')
+            .replace(' ', '&nbsp;'))


 class SNBInput(InputFormatPlugin):

-    name        = 'SNB Input'
-    author      = 'Li Fanxi'
+    name = 'SNB Input'
+    author = 'Li Fanxi'
    description = 'Convert SNB files to OEB'
-    file_types  = {'snb'}
+    file_types = {'snb'}
    commit_name = 'snb_input'

    options = set()
@@ -32,13 +38,12 @@ class SNBInput(InputFormatPlugin):

        from ebook_converter.ebooks.oeb.base import DirContainer
        from ebook_converter.ebooks.snb.snbfile import SNBFile
-        from ebook_converter.utils.xml_parse import safe_xml_fromstring

        log.debug("Parsing SNB file...")
        snbFile = SNBFile()
        try:
            snbFile.Parse(stream)
-        except:
+        except Exception:
            raise ValueError("Invalid SNB file")
        if not snbFile.IsValid():
            log.debug("Invalid SNB file")
@@ -46,27 +51,28 @@ class SNBInput(InputFormatPlugin):
        log.debug("Handle meta data ...")
        from ebook_converter.ebooks.conversion.plumber import create_oebbook
        oeb = create_oebbook(log, None, options,
-                encoding=options.input_encoding, populate=False)
+                             encoding=options.input_encoding, populate=False)
        meta = snbFile.GetFileStream('snbf/book.snbf')
        if meta is not None:
-            meta = safe_xml_fromstring(meta)
-            l = {'title'    : './/head/name',
-                  'creator'  : './/head/author',
-                  'language' : './/head/language',
-                  'generator': './/head/generator',
-                  'publisher': './/head/publisher',
-                  'cover'    : './/head/cover', }
+            meta = etree.fromstring(meta)
+            item_map = {'title': './/head/name',
+                        'creator': './/head/author',
+                        'language': './/head/language',
+                        'generator': './/head/generator',
+                        'publisher': './/head/publisher',
+                        'cover': './/head/cover'}
            d = {}
-            for item in l:
-                node = meta.find(l[item])
+            for key, item in item_map.items():
+                node = meta.find(item)
                if node is not None:
-                    d[item] = node.text if node.text is not None else ''
+                    d[key] = node.text if node.text is not None else ''
                else:
-                    d[item] = ''
+                    d[key] = ''

            oeb.metadata.add('title', d['title'])
-            oeb.metadata.add('creator', d['creator'], attrib={'role':'aut'})
-            oeb.metadata.add('language', d['language'].lower().replace('_', '-'))
+            oeb.metadata.add('creator', d['creator'], attrib={'role': 'aut'})
+            oeb.metadata.add('language',
+                             d['language'].lower().replace('_', '-'))
            oeb.metadata.add('generator', d['generator'])
            oeb.metadata.add('publisher', d['publisher'])
            if d['cover'] != '':
@@ -84,7 +90,7 @@ class SNBInput(InputFormatPlugin):
            toc = snbFile.GetFileStream('snbf/toc.snbf')
            oeb.container = DirContainer(tdir, log)
            if toc is not None:
-                toc = safe_xml_fromstring(toc)
+                toc = etree.fromstring(toc)
                i = 1
                for ch in toc.find('.//body'):
                    chapterName = ch.text
@@ -93,18 +99,22 @@ class SNBInput(InputFormatPlugin):
                    data = snbFile.GetFileStream('snbc/' + chapterSrc)
                    if data is None:
                        continue
-                    snbc = safe_xml_fromstring(data)
+                    snbc = etree.fromstring(data)
                    lines = []
                    for line in snbc.find('.//body'):
                        if line.tag == 'text':
                            lines.append('<p>%s</p>' % html_encode(line.text))
                        elif line.tag == 'img':
-                            lines.append('<p><img src="%s" /></p>' % html_encode(line.text))
+                            lines.append('<p><img src="%s" /></p>' %
+                                         html_encode(line.text))
                    with open(os.path.join(tdir, fname), 'wb') as f:
-                        f.write((HTML_TEMPLATE % (chapterName, '\n'.join(lines))).encode('utf-8', 'replace'))
+                        f.write((HTML_TEMPLATE %
+                                 (chapterName,
+                                  '\n'.join(lines))).encode('utf-8',
+                                                            'replace'))
                    oeb.toc.add(ch.text, fname)
-                    id, href = oeb.manifest.generate(id='html',
-                        href=ascii_filename(fname))
+                    id, href = oeb.manifest.generate(
+                        id='html', href=ascii_filename(fname))
                    item = oeb.manifest.add(id, href, 'text/html')
                    item.html_input_href = fname
                    oeb.spine.add(item, True)
@@ -112,7 +122,7 @@ class SNBInput(InputFormatPlugin):
                imageFiles = snbFile.OutputImageFiles(tdir)
                for f, m in imageFiles:
                    id, href = oeb.manifest.generate(id='image',
-                        href=ascii_filename(f))
+                                                     href=ascii_filename(f))
                    item = oeb.manifest.add(id, href, m)
                    item.html_input_href = f