Fixed flake8 issues to several modules

2026-04-04 20:03:34 +02:00 · 2020-06-14 15:49:11 +02:00
parent 1d4f75ceba
commit 9891d02694
10 changed files with 505 additions and 335 deletions
--- a/ebook_converter/ebooks/conversion/plugins/html_input.py
+++ b/ebook_converter/ebooks/conversion/plugins/html_input.py
@@ -15,17 +15,18 @@ from ebook_converter.polyglot.builtins import as_unicode
 def sanitize_file_name(x):
-    ans = re.sub(r'\s+', ' ', re.sub(r'[?&=;#]', '_', ascii_filename(x))).strip().rstrip('.')
+    ans = re.sub(r'\s+', ' ', re.sub(r'[?&=;#]', '_',
                                     ascii_filename(x))).strip().rstrip('.')
    ans, ext = ans.rpartition('.')[::2]
    return (ans.strip() + '.' + ext.strip()).rstrip('.')
 class HTMLInput(InputFormatPlugin):
-    name        = 'HTML Input'
+    name = 'HTML Input'
-    author      = 'Kovid Goyal'
+    author = 'Kovid Goyal'
    description = 'Convert HTML and OPF files to an OEB'
-    file_types  = {'opf', 'html', 'htm', 'xhtml', 'xhtm', 'shtm', 'shtml'}
+    file_types = {'opf', 'html', 'htm', 'xhtml', 'xhtm', 'shtm', 'shtml'}
    commit_name = 'html_input'
    options = {
--- a/ebook_converter/ebooks/conversion/plugins/htmlz_input.py
+++ b/ebook_converter/ebooks/conversion/plugins/htmlz_input.py
@@ -6,10 +6,10 @@ from ebook_converter.customize.conversion import InputFormatPlugin
 class HTMLZInput(InputFormatPlugin):
-    name        = 'HTLZ Input'
+    name = 'HTLZ Input'
-    author      = 'John Schember'
+    author = 'John Schember'
    description = 'Convert HTML files to HTML'
-    file_types  = {'htmlz'}
+    file_types = {'htmlz'}
    commit_name = 'htmlz_input'
    def convert(self, stream, options, file_ext, log,
@@ -36,13 +36,14 @@ class HTMLZInput(InputFormatPlugin):
                top_levels.append(x)
        # Try to find an index. file.
        for x in top_levels:
-            if x.lower() in (u'index.html', u'index.xhtml', u'index.htm'):
+            if x.lower() in ('index.html', 'index.xhtml', 'index.htm'):
                index = x
                break
        # Look for multiple HTML files in the archive. We look at the
        # top level files only as only they matter in HTMLZ.
        for x in top_levels:
-            if os.path.splitext(x)[1].lower() in (u'.html', u'.xhtml', u'.htm'):
+            if os.path.splitext(x)[1].lower() in ('.html', '.xhtml',
                                                  '.htm'):
                # Set index to the first HTML file found if it's not
                # called index.
                if not index:
@@ -84,15 +85,14 @@ class HTMLZInput(InputFormatPlugin):
        c = 0
        while os.path.exists(htmlfile):
            c += 1
-            htmlfile = u'index%d.html'%c
+            htmlfile = u'index%d.html' % c
        with open(htmlfile, 'wb') as f:
            f.write(html.encode('utf-8'))
        odi = options.debug_pipeline
        options.debug_pipeline = None
        # Generate oeb from html conversion.
        with open(htmlfile, 'rb') as f:
-            oeb = html_input.convert(f, options, 'html', log,
+            oeb = html_input.convert(f, options, 'html', log, {})
                {})
        options.debug_pipeline = odi
        os.remove(htmlfile)
--- a/ebook_converter/ebooks/docx/to_html.py
+++ b/ebook_converter/ebooks/docx/to_html.py
@@ -1,5 +1,11 @@
-import sys, os, re, math, errno, uuid, numbers
+import sys
-from collections import OrderedDict, defaultdict
+import os
 import re
 import math
 import errno
 import uuid
 import numbers
 import collections
 import mimetypes
 from lxml import etree
@@ -7,23 +13,24 @@ from lxml import html
 from lxml.html.builder import (
    HTML, HEAD, TITLE, BODY, LINK, META, P, SPAN, BR, DIV, A, DT, DL, DD, H1)
 from ebook_converter import guess_type
 from ebook_converter.ebooks.docx.container import DOCX
 from ebook_converter.ebooks.docx.names import XML, generate_anchor
 from ebook_converter.ebooks.docx.styles import Styles, inherit, PageProperties
 from ebook_converter.ebooks.docx.numbering import Numbering
 from ebook_converter.ebooks.docx.fonts import Fonts, is_symbol_font, map_symbol_text
 from ebook_converter.ebooks.docx.images import Images
 from ebook_converter.ebooks.docx.tables import Tables
 from ebook_converter.ebooks.docx.footnotes import Footnotes
 from ebook_converter.ebooks.docx.cleanup import cleanup_markup
 from ebook_converter.ebooks.docx.container import DOCX
 from ebook_converter.ebooks.docx.fields import Fields
 from ebook_converter.ebooks.docx.fonts import Fonts
 from ebook_converter.ebooks.docx.fonts import is_symbol_font
 from ebook_converter.ebooks.docx.fonts import map_symbol_text
 from ebook_converter.ebooks.docx.footnotes import Footnotes
 from ebook_converter.ebooks.docx.images import Images
 from ebook_converter.ebooks.docx.names import XML, generate_anchor
 from ebook_converter.ebooks.docx.numbering import Numbering
 from ebook_converter.ebooks.docx.settings import Settings
 from ebook_converter.ebooks.docx.styles import Styles, inherit, PageProperties
 from ebook_converter.ebooks.docx.tables import Tables
 from ebook_converter.ebooks.docx.theme import Theme
 from ebook_converter.ebooks.docx.toc import create_toc
 from ebook_converter.ebooks.docx.fields import Fields
 from ebook_converter.ebooks.docx.settings import Settings
 from ebook_converter.ebooks.metadata.opf2 import OPFCreator
-from ebook_converter.utils.localization import canonicalize_lang, lang_as_iso639_1
+from ebook_converter.utils.localization import canonicalize_lang
-
+from ebook_converter.utils.localization import lang_as_iso639_1
 NBSP = '\xa0'
@@ -54,7 +61,9 @@ def html_lang(docx_lang):
 class Convert(object):
-    def __init__(self, path_or_stream, dest_dir=None, log=None, detect_cover=True, notes_text=None, notes_nopb=False, nosupsub=False):
+    def __init__(self, path_or_stream, dest_dir=None, log=None,
                 detect_cover=True, notes_text=None, notes_nopb=False,
                 nosupsub=False):
        self.docx = DOCX(path_or_stream, log=log)
        self.namespace = self.docx.namespace
        self.ms_pat = re.compile(r'\s{2,}')
@@ -73,7 +82,7 @@ class Convert(object):
        self.fields = Fields(self.namespace)
        self.styles = Styles(self.namespace, self.tables)
        self.images = Images(self.namespace, self.log)
-        self.object_map = OrderedDict()
+        self.object_map = collections.OrderedDict()
        self.html = HTML(
            HEAD(
                META(charset='utf-8'),
@@ -82,9 +91,9 @@ class Convert(object):
            ),
            self.body
        )
-        self.html.text='\n\t'
+        self.html.text = '\n\t'
-        self.html[0].text='\n\t\t'
+        self.html[0].text = '\n\t\t'
-        self.html[0].tail='\n'
+        self.html[0].tail = '\n'
        for child in self.html[0]:
            child.tail = '\n\t\t'
        self.html[0][-1].tail = '\n\t'
@@ -98,17 +107,18 @@ class Convert(object):
    def __call__(self):
        doc = self.docx.document
-        relationships_by_id, relationships_by_type = self.docx.document_relationships
+        (relationships_by_id,
         relationships_by_type) = self.docx.document_relationships
        self.resolve_alternate_content(doc)
        self.fields(doc, self.log)
        self.read_styles(relationships_by_type)
        self.images(relationships_by_id)
-        self.layers = OrderedDict()
+        self.layers = collections.OrderedDict()
        self.framed = [[]]
        self.frame_map = {}
        self.framed_map = {}
        self.anchor_map = {}
-        self.link_map = defaultdict(list)
+        self.link_map = collections.defaultdict(list)
        self.link_source_map = {}
        self.toc_anchor = None
        self.block_runs = []
@@ -142,7 +152,8 @@ class Convert(object):
                dl = DL(id=anchor)
                dl.set('class', 'footnote')
                self.body.append(dl)
-                dl.append(DT('[', A('←' + text, href='#back_%s' % anchor, title=text)))
+                dl.append(DT('[', A('←' + text, href='#back_%s' % anchor,
                                    title=text)))
                dl[-1][0].tail = ']'
                dl.append(DD())
                paras = []
@@ -159,7 +170,8 @@ class Convert(object):
                self.mark_block_runs(paras)
        for p, wp in self.object_map.items():
-            if len(p) > 0 and not p.text and len(p[0]) > 0 and not p[0].text and p[0][0].get('class', None) == 'tab':
+            if (len(p) > 0 and not p.text and len(p[0]) > 0 and
                    not p[0].text and p[0][0].get('class', None) == 'tab'):
                # Paragraph uses tabs for indentation, convert to text-indent
                parent = p[0]
                tabs = []
@@ -172,7 +184,9 @@ class Convert(object):
                        break
                indent = len(tabs) * self.settings.default_tab_stop
                style = self.styles.resolve(wp)
-                if style.text_indent is inherit or (hasattr(style.text_indent, 'endswith') and style.text_indent.endswith('pt')):
+                if (style.text_indent is inherit or
                        (hasattr(style.text_indent, 'endswith') and
                         style.text_indent.endswith('pt'))):
                    if style.text_indent is not inherit:
                        indent = float(style.text_indent[:-2]) + indent
                    style.text_indent = '%.3gpt' % indent
@@ -197,7 +211,8 @@ class Convert(object):
                except (TypeError, ValueError):
                    lvl = 0
                numbered.append((html_obj, num_id, lvl))
-        self.numbering.apply_markup(numbered, self.body, self.styles, self.object_map, self.images)
+        self.numbering.apply_markup(numbered, self.body, self.styles,
                                    self.object_map, self.images)
        self.apply_frames()
        if len(self.body) > 0:
@@ -232,13 +247,15 @@ class Convert(object):
        self.fields.polish_markup(self.object_map)
        self.log.debug('Cleaning up redundant markup generated by Word')
-        self.cover_image = cleanup_markup(self.log, self.html, self.styles, self.dest_dir, self.detect_cover, self.namespace.XPath)
+        self.cover_image = cleanup_markup(self.log, self.html, self.styles,
                                          self.dest_dir, self.detect_cover,
                                          self.namespace.XPath)
        return self.write(doc)
    def read_page_properties(self, doc):
        current = []
-        self.page_map = OrderedDict()
+        self.page_map = collections.OrderedDict()
        self.section_starts = []
        for p in self.namespace.descendants(doc, 'w:p', 'w:tbl'):
@@ -267,7 +284,8 @@ class Convert(object):
    def resolve_alternate_content(self, doc):
        # For proprietary extensions in Word documents use the fallback, spec
        # compliant form
-        # See https://wiki.openoffice.org/wiki/OOXML/Markup_Compatibility_and_Extensibility
+        # See https://wiki.openoffice.org/wiki/
        # OOXML/Markup_Compatibility_and_Extensibility
        for ac in self.namespace.descendants(doc, 'mc:AlternateContent'):
            choices = self.namespace.XPath('./mc:Choice')(ac)
            fallbacks = self.namespace.XPath('./mc:Fallback')(ac)
@@ -284,7 +302,8 @@ class Convert(object):
                cname[-1] = defname
                if self.docx.exists('/'.join(cname)):
                    name = name
-            if name and name.startswith('word/word') and not self.docx.exists(name):
+            if (name and name.startswith('word/word') and
                    not self.docx.exists(name)):
                name = name.partition('/')[2]
            return name
@@ -327,7 +346,8 @@ class Convert(object):
                self.log.warn('Endnotes %s do not exist' % enname)
            else:
                enrel = self.docx.get_relationships(enname)
-        footnotes(etree.fromstring(foraw) if foraw else None, forel, etree.fromstring(enraw) if enraw else None, enrel)
+        footnotes(etree.fromstring(foraw) if foraw else None, forel,
                  etree.fromstring(enraw) if enraw else None, enrel)
        if fname is not None:
            embed_relationships = self.docx.get_relationships(fname)[0]
@@ -336,7 +356,8 @@ class Convert(object):
            except KeyError:
                self.log.warn('Fonts table %s does not exist' % fname)
            else:
-                fonts(etree.fromstring(raw), embed_relationships, self.docx, self.dest_dir)
+                fonts(etree.fromstring(raw), embed_relationships, self.docx,
                      self.dest_dir)
        if tname is not None:
            try:
@@ -364,16 +385,20 @@ class Convert(object):
            except KeyError:
                self.log.warn('Numbering styles %s do not exist' % nname)
            else:
-                numbering(etree.fromstring(raw), self.styles, self.docx.get_relationships(nname)[0])
+                numbering(etree.fromstring(raw), self.styles,
                          self.docx.get_relationships(nname)[0])
        self.styles.resolve_numbering(numbering)
    def write(self, doc):
-        toc = create_toc(doc, self.body, self.resolved_link_map, self.styles, self.object_map, self.log, self.namespace)
+        toc = create_toc(doc, self.body, self.resolved_link_map, self.styles,
-        raw = html.tostring(self.html, encoding='utf-8', doctype='<!DOCTYPE html>')
+                         self.object_map, self.log, self.namespace)
        raw = html.tostring(self.html, encoding='utf-8',
                            doctype='<!DOCTYPE html>')
        with open(os.path.join(self.dest_dir, 'index.html'), 'wb') as f:
            f.write(raw)
-        css = self.styles.generate_css(self.dest_dir, self.docx, self.notes_nopb, self.nosupsub)
+        css = self.styles.generate_css(self.dest_dir, self.docx,
                                       self.notes_nopb, self.nosupsub)
        if css:
            with open(os.path.join(self.dest_dir, 'docx.css'), 'wb') as f:
                f.write(css.encode('utf-8'))
@@ -394,23 +419,29 @@ class Convert(object):
                                         title='Table of Contents',
                                         type='toc'))
        toc_file = os.path.join(self.dest_dir, 'toc.ncx')
-        with open(os.path.join(self.dest_dir, 'metadata.opf'), 'wb') as of, open(toc_file, 'wb') as ncx:
+        with open(os.path.join(self.dest_dir,
                               'metadata.opf'), 'wb') as of, open(toc_file,
                                                                  'wb') as ncx:
            opf.render(of, ncx, 'toc.ncx', process_guide=process_guide)
        if os.path.getsize(toc_file) == 0:
            os.remove(toc_file)
        return os.path.join(self.dest_dir, 'metadata.opf')
    def read_block_anchors(self, doc):
-        doc_anchors = frozenset(self.namespace.XPath('./w:body/w:bookmarkStart[@w:name]')(doc))
+        doc_anchors = frozenset(self.namespace.XPath('./w:body/w:bookmarkStart'
                                                     '[@w:name]')(doc))
        if doc_anchors:
            current_bm = set()
-            rmap = {v:k for k, v in self.object_map.items()}
+            rmap = {v: k for k, v in self.object_map.items()}
-            for p in self.namespace.descendants(doc, 'w:p', 'w:bookmarkStart[@w:name]'):
+            for p in self.namespace.descendants(doc, 'w:p',
                                                'w:bookmarkStart[@w:name]'):
                if p.tag.endswith('}p'):
                    if current_bm and p in rmap:
                        para = rmap[p]
                        if 'id' not in para.attrib:
-                            para.set('id', generate_anchor(next(iter(current_bm)), frozenset(self.anchor_map.values())))
+                            _bm = next(iter(current_bm))
                            _am = frozenset(self.anchor_map.values())
                            para.set('id', generate_anchor(_bm, _am))
                        for name in current_bm:
                            self.anchor_map[name] = para.get('id')
                        current_bm = set()
@@ -442,13 +473,15 @@ class Convert(object):
                except AttributeError:
                    break
-        for x in self.namespace.descendants(p, 'w:r', 'w:bookmarkStart', 'w:hyperlink', 'w:instrText'):
+        for x in self.namespace.descendants(p, 'w:r', 'w:bookmarkStart',
                                            'w:hyperlink', 'w:instrText'):
            if p_parent(x) is not p:
                continue
            if x.tag.endswith('}r'):
                span = self.convert_run(x)
                if current_anchor is not None:
-                    (dest if len(dest) == 0 else span).set('id', current_anchor)
+                    (dest if len(dest) == 0 else span).set('id',
                                                           current_anchor)
                    current_anchor = None
                if current_hyperlink is not None:
                    try:
@@ -462,11 +495,14 @@ class Convert(object):
                self.layers[p].append(x)
            elif x.tag.endswith('}bookmarkStart'):
                anchor = self.namespace.get(x, 'w:name')
-                if anchor and anchor not in self.anchor_map and anchor != '_GoBack':
+                if (anchor and anchor not in self.anchor_map and
                        anchor != '_GoBack'):
                    # _GoBack is a special bookmark inserted by Word 2010 for
                    # the return to previous edit feature, we ignore it
                    old_anchor = current_anchor
-                    self.anchor_map[anchor] = current_anchor = generate_anchor(anchor, frozenset(self.anchor_map.values()))
+                    current_anchor = generate_anchor(
                        anchor, frozenset(self .anchor_map.values()))
                    self.anchor_map[anchor] = current_anchor
                    if old_anchor is not None:
                        # The previous anchor was not applied to any element
                        for a, t in tuple(self.anchor_map.items()):
@@ -474,10 +510,13 @@ class Convert(object):
                                self.anchor_map[a] = current_anchor
            elif x.tag.endswith('}hyperlink'):
                current_hyperlink = x
-            elif x.tag.endswith('}instrText') and x.text and x.text.strip().startswith('TOC '):
+            elif (x.tag.endswith('}instrText') and x.text and
                    x.text.strip().startswith('TOC ')):
                old_anchor = current_anchor
                anchor = str(uuid.uuid4())
-                self.anchor_map[anchor] = current_anchor = generate_anchor('toc', frozenset(self.anchor_map.values()))
+                current_anchor = generate_anchor(
                    'toc', frozenset(self.anchor_map.values()))
                self.anchor_map[anchor] = current_anchor
                self.toc_anchor = current_anchor
                if old_anchor is not None:
                    # The previous anchor was not applied to any element
@@ -489,7 +528,8 @@ class Convert(object):
            dest.set('id', current_anchor)
            current_anchor = None
-        m = re.match(r'heading\s+(\d+)$', style.style_name or '', re.IGNORECASE)
+        m = re.match(r'heading\s+(\d+)$', style.style_name or '',
                     re.IGNORECASE)
        if m is not None:
            n = min(6, max(1, int(m.group(1))))
            dest.tag = 'h%d' % n
@@ -533,7 +573,8 @@ class Convert(object):
        if len(dest) > 0 and not dest[-1].tail:
            if dest[-1].tag == 'br':
                dest[-1].tail = NBSP
-            elif len(dest[-1]) > 0 and dest[-1][-1].tag == 'br' and not dest[-1][-1].tail:
+            elif (len(dest[-1]) > 0 and dest[-1][-1].tag == 'br' and
                    not dest[-1][-1].tail):
                dest[-1][-1].tail = NBSP
        return dest
@@ -578,12 +619,12 @@ class Convert(object):
            if anchor and anchor in self.anchor_map:
                span.set('href', '#' + self.anchor_map[anchor])
                continue
-            self.log.warn('Hyperlink with unknown target (rid=%s, anchor=%s), ignoring' %
+            self.log.warn('Hyperlink with unknown target (rid=%s, anchor=%s), '
-                          (rid, anchor))
+                          'ignoring' % (rid, anchor))
            # hrefs that point nowhere give epubcheck a hernia. The element
            # should be styled explicitly by Word anyway.
            # span.set('href', '#')
-        rmap = {v:k for k, v in self.object_map.items()}
+        rmap = {v: k for k, v in self.object_map.items()}
        for hyperlink, runs in self.fields.hyperlink_fields:
            spans = [rmap[r] for r in runs if r in rmap]
            if not spans:
@@ -604,7 +645,8 @@ class Convert(object):
                if anchor in self.anchor_map:
                    span.set('href', '#' + self.anchor_map[anchor])
                    continue
-                self.log.warn('Hyperlink field with unknown anchor: %s' % anchor)
+                self.log.warn('Hyperlink field with unknown anchor: %s' %
                              anchor)
            else:
                if url in self.anchor_map:
                    span.set('href', '#' + self.anchor_map[url])
@@ -652,7 +694,8 @@ class Convert(object):
                # actually needs it, i.e. if it has more than one
                # consecutive space or it has newlines or tabs.
                multi_spaces = self.ms_pat.search(ctext) is not None
-                preserve = multi_spaces or self.ws_pat.search(ctext) is not None
+                preserve = (multi_spaces or
                            self.ws_pat.search(ctext) is not None)
                if preserve:
                    text.add_elem(SPAN(ctext, style="white-space:pre-wrap"))
                    ans.append(text.elem)
@@ -668,24 +711,30 @@ class Convert(object):
                else:
                    clear = child.get('clear', None)
                    if clear in {'all', 'left', 'right'}:
-                        br = BR(style='clear:%s'%('both' if clear == 'all' else clear))
+                        br = BR(style='clear:%s' % ('both' if clear == 'all'
                                                    else clear))
                    else:
                        br = BR()
                text.add_elem(br)
                ans.append(text.elem)
-            elif self.namespace.is_tag(child, 'w:drawing') or self.namespace.is_tag(child, 'w:pict'):
+            elif (self.namespace.is_tag(child, 'w:drawing') or
-                for img in self.images.to_html(child, self.current_page, self.docx, self.dest_dir):
+                  self.namespace.is_tag(child, 'w:pict')):
                for img in self.images.to_html(child, self.current_page,
                                               self.docx, self.dest_dir):
                    text.add_elem(img)
                    ans.append(text.elem)
-            elif self.namespace.is_tag(child, 'w:footnoteReference') or self.namespace.is_tag(child, 'w:endnoteReference'):
+            elif (self.namespace.is_tag(child, 'w:footnoteReference') or
                  self.namespace.is_tag(child, 'w:endnoteReference')):
                anchor, name = self.footnotes.get_ref(child)
                if anchor and name:
-                    l = A(name, id='back_%s' % anchor, href='#' + anchor, title=name)
+                    _l = A(name, id='back_%s' % anchor, href='#' + anchor,
-                    l.set('class', 'noteref')
+                           title=name)
-                    text.add_elem(l)
+                    _l.set('class', 'noteref')
                    text.add_elem(_l)
                    ans.append(text.elem)
            elif self.namespace.is_tag(child, 'w:tab'):
-                spaces = int(math.ceil((self.settings.default_tab_stop / 36) * 6))
+                spaces = int(math.ceil((self.settings.default_tab_stop / 36) *
                                       6))
                text.add_elem(SPAN(NBSP * spaces))
                ans.append(text.elem)
                ans[-1].set('class', 'tab')
@@ -699,7 +748,8 @@ class Convert(object):
        style = self.styles.resolve_run(run)
        if style.vert_align in {'superscript', 'subscript'}:
            if ans.text or len(ans):
-                ans.set('data-docx-vert', 'sup' if style.vert_align == 'superscript' else 'sub')
+                ans.set('data-docx-vert',
                        'sup' if style.vert_align == 'superscript' else 'sub')
        if style.lang is not inherit:
            lang = html_lang(style.lang)
            if lang is not None and lang != self.doc_lang:
@@ -738,12 +788,14 @@ class Convert(object):
            idx = parent.index(paras[0])
            frame = DIV(*paras)
            parent.insert(idx, frame)
-            self.framed_map[frame] = css = style.css(self.page_map[self.object_map[paras[0]]])
+            self.framed_map[frame] = css = style.css(
                self.page_map[self.object_map[paras[0]]])
            self.styles.register(css, 'frame')
        if not self.block_runs:
            return
-        rmap = {v:k for k, v in self.object_map.items()}
+
        rmap = {v: k for k, v in self.object_map.items()}
        for border_style, blocks in self.block_runs:
            paras = tuple(rmap[p] for p in blocks)
            for p in paras:
@@ -796,17 +848,20 @@ class Convert(object):
                else:
                    border_style = style.clone_border_styles()
                    if has_visible_border:
-                        border_style.margin_top, style.margin_top = style.margin_top, inherit
+                        style.margin_top = inherit
                        border_style.margin_top = style.margin_top
                if p is not run[-1]:
                    style.padding_bottom = 0
                else:
                    if has_visible_border:
-                        border_style.margin_bottom, style.margin_bottom = style.margin_bottom, inherit
+                        style.margin_bottom = inherit
                        border_style.margin_bottom = style.margin_bottom
                style.clear_borders()
                if p is not run[-1]:
                    style.apply_between_border()
            if has_visible_border:
-                border_style.margin_left, border_style.margin_right = max_left,max_right
+                border_style.margin_left = max_left
                border_style.margin_right = max_right
                self.block_runs.append((border_style, run))
        run = []
--- a/ebook_converter/ebooks/docx/writer/container.py
+++ b/ebook_converter/ebooks/docx/writer/container.py
@@ -1,5 +1,6 @@
 import mimetypes
-import textwrap, os
+import os
 import textwrap
 from lxml import etree
 from lxml.builder import ElementMaker
@@ -9,22 +10,48 @@ from ebook_converter.ebooks.docx.names import DOCXNamespace
 from ebook_converter.ebooks.metadata import authors_to_string
 from ebook_converter.ebooks.pdf.render.common import PAPER_SIZES
 from ebook_converter.utils.date import utcnow
-from ebook_converter.utils.localization import canonicalize_lang, lang_as_iso639_1
+from ebook_converter.utils.localization import canonicalize_lang
 from ebook_converter.utils.localization import lang_as_iso639_1
 from ebook_converter.utils.zipfile import ZipFile
 WORD_TYPES = {"/word/footnotes.xml": "application/vnd.openxmlformats-"
              "officedocument.wordprocessingml.footnotes+xml",
              "/word/document.xml": "application/vnd.openxmlformats-"
              "officedocument.wordprocessingml.document.main+xml",
              "/word/numbering.xml": "application/vnd.openxmlformats-"
              "officedocument.wordprocessingml.numbering+xml",
              "/word/styles.xml": "application/vnd.openxmlformats-"
              "officedocument.wordprocessingml.styles+xml",
              "/word/endnotes.xml": "application/vnd.openxmlformats-"
              "officedocument.wordprocessingml.endnotes+xml",
              "/word/settings.xml": "application/vnd.openxmlformats-"
              "officedocument.wordprocessingml.settings+xml",
              "/word/theme/theme1.xml": "application/vnd.openxmlformats-"
              "officedocument.theme+xml",
              "/word/fontTable.xml": "application/vnd.openxmlformats-"
              "officedocument.wordprocessingml.fontTable+xml",
              "/word/webSettings.xml": "application/vnd.openxmlformats-"
              "officedocument.wordprocessingml.webSettings+xml",
              "/docProps/core.xml": "application/vnd.openxmlformats-package."
              "core-properties+xml",
              "/docProps/app.xml": "application/vnd.openxmlformats-"
              "officedocument.extended-properties+xml"}
 def xml2str(root, pretty_print=False, with_tail=False):
    if hasattr(etree, 'cleanup_namespaces'):
        etree.cleanup_namespaces(root)
    ans = etree.tostring(root, encoding='utf-8', xml_declaration=True,
-                          pretty_print=pretty_print, with_tail=with_tail)
+                         pretty_print=pretty_print, with_tail=with_tail)
    return ans
 def page_size(opts):
    width, height = PAPER_SIZES[opts.docx_page_size]
    if opts.docx_custom_page_size is not None:
-        width, height = map(float, opts.docx_custom_page_size.partition('x')[0::2])
+        width, height = map(float,
                            opts.docx_custom_page_size.partition('x')[0::2])
    return width, height
@@ -47,7 +74,9 @@ def create_skeleton(opts, namespaces=None):
    def w(x):
        return '{%s}%s' % (namespaces['w'], x)
-    dn = {k:v for k, v in namespaces.items() if k in {'w', 'r', 'm', 've', 'o', 'wp', 'w10', 'wne', 'a', 'pic'}}
+    dn = {k: v for k, v in namespaces.items() if k in {'w', 'r', 'm', 've',
                                                       'o', 'wp', 'w10', 'wne',
                                                       'a', 'pic'}}
    E = ElementMaker(namespace=dn['w'], nsmap=dn)
    doc = E.document()
    body = E.body()
@@ -59,27 +88,32 @@ def create_skeleton(opts, namespaces=None):
        val = page_margin(opts, which)
        return w(which), str(int(val * 20))
    body.append(E.sectPr(
-        E.pgSz(**{w('w'):str(width), w('h'):str(height)}),
+        E.pgSz(**{w('w'): str(width), w('h'): str(height)}),
        E.pgMar(**dict(map(margin, 'left top right bottom'.split()))),
-        E.cols(**{w('space'):'720'}),
+        E.cols(**{w('space'): '720'}),
-        E.docGrid(**{w('linePitch'):"360"}),
+        E.docGrid(**{w('linePitch'): "360"}),
    ))
-    dn = {k:v for k, v in namespaces.items() if k in tuple('wra') + ('wp',)}
+    dn = {k: v for k, v in namespaces.items() if k in tuple('wra') + ('wp',)}
    E = ElementMaker(namespace=dn['w'], nsmap=dn)
    styles = E.styles(
        E.docDefaults(
            E.rPrDefault(
                E.rPr(
-                    E.rFonts(**{w('asciiTheme'):"minorHAnsi", w('eastAsiaTheme'):"minorEastAsia", w('hAnsiTheme'):"minorHAnsi", w('cstheme'):"minorBidi"}),
+                    E.rFonts(**{w('asciiTheme'): "minorHAnsi",
-                    E.sz(**{w('val'):'22'}),
+                                w('eastAsiaTheme'): "minorEastAsia",
-                    E.szCs(**{w('val'):'22'}),
+                                w('hAnsiTheme'): "minorHAnsi",
-                    E.lang(**{w('val'):'en-US', w('eastAsia'):"en-US", w('bidi'):"ar-SA"})
+                                w('cstheme'): "minorBidi"}),
                    E.sz(**{w('val'): '22'}),
                    E.szCs(**{w('val'): '22'}),
                    E.lang(**{w('val'): 'en-US', w('eastAsia'): "en-US",
                              w('bidi'): "ar-SA"})
                )
            ),
            E.pPrDefault(
                E.pPr(
-                    E.spacing(**{w('after'):"0", w('line'):"276", w('lineRule'):"auto"})
+                    E.spacing(**{w('after'): "0", w('line'): "276",
                                 w('lineRule'): "auto"})
                )
            )
        )
@@ -103,8 +137,8 @@ def update_doc_props(root, mi, namespace):
    if mi.comments:
        setm('description', mi.comments)
    if mi.languages:
-        l = canonicalize_lang(mi.languages[0])
+        _l = canonicalize_lang(mi.languages[0])
-        setm('language', lang_as_iso639_1(l) or l)
+        setm('language', lang_as_iso639_1(_l) or _l)
 class DocumentRelationships(object):
@@ -115,8 +149,7 @@ class DocumentRelationships(object):
        for typ, target in {namespace.names['STYLES']: 'styles.xml',
                            namespace.names['NUMBERING']: 'numbering.xml',
                            namespace.names['WEB_SETTINGS']: 'webSettings.xml',
-                            namespace.names['FONTS']: 'fontTable.xml',
+                            namespace.names['FONTS']: 'fontTable.xml'}.items():
                           }.items():
            self.add_relationship(target, typ)
    def get_relationship_id(self, target, rtype, target_mode=None):
@@ -134,7 +167,8 @@ class DocumentRelationships(object):
    def serialize(self):
        namespaces = self.namespace.namespaces
-        E = ElementMaker(namespace=namespaces['pr'], nsmap={None:namespaces['pr']})
+        E = ElementMaker(namespace=namespaces['pr'],
                         nsmap={None: namespaces['pr']})
        relationships = E.Relationships()
        for (target, rtype, target_mode), rid in self.rmap.items():
            r = E.Relationship(Id=rid, Type=rtype, Target=target)
@@ -151,9 +185,12 @@ class DOCX(object):
        namespaces = self.namespace.namespaces
        self.opts, self.log = opts, log
        self.document_relationships = DocumentRelationships(self.namespace)
-        self.font_table = etree.Element('{%s}fonts' % namespaces['w'], nsmap={k:namespaces[k] for k in 'wr'})
+        self.font_table = etree.Element('{%s}fonts' % namespaces['w'],
-        self.numbering = etree.Element('{%s}numbering' % namespaces['w'], nsmap={k:namespaces[k] for k in 'wr'})
+                                        nsmap={k: namespaces[k] for k in 'wr'})
-        E = ElementMaker(namespace=namespaces['pr'], nsmap={None:namespaces['pr']})
+        self.numbering = etree.Element('{%s}numbering' % namespaces['w'],
                                       nsmap={k: namespaces[k] for k in 'wr'})
        E = ElementMaker(namespace=namespaces['pr'],
                         nsmap={None: namespaces['pr']})
        self.embedded_fonts = E.Relationships()
        self.fonts = {}
        self.images = {}
@@ -161,21 +198,10 @@ class DOCX(object):
    # Boilerplate {{{
    @property
    def contenttypes(self):
-        E = ElementMaker(namespace=self.namespace.namespaces['ct'], nsmap={None:self.namespace.namespaces['ct']})
+        E = ElementMaker(namespace=self.namespace.namespaces['ct'],
                         nsmap={None: self.namespace.namespaces['ct']})
        types = E.Types()
-        for partname, mt in {
+        for partname, mt in WORD_TYPES.items():
            "/word/footnotes.xml": "application/vnd.openxmlformats-officedocument.wordprocessingml.footnotes+xml",
            "/word/document.xml": "application/vnd.openxmlformats-officedocument.wordprocessingml.document.main+xml",
            "/word/numbering.xml": "application/vnd.openxmlformats-officedocument.wordprocessingml.numbering+xml",
            "/word/styles.xml": "application/vnd.openxmlformats-officedocument.wordprocessingml.styles+xml",
            "/word/endnotes.xml": "application/vnd.openxmlformats-officedocument.wordprocessingml.endnotes+xml",
            "/word/settings.xml": "application/vnd.openxmlformats-officedocument.wordprocessingml.settings+xml",
            "/word/theme/theme1.xml": "application/vnd.openxmlformats-officedocument.theme+xml",
            "/word/fontTable.xml": "application/vnd.openxmlformats-officedocument.wordprocessingml.fontTable+xml",
            "/word/webSettings.xml": "application/vnd.openxmlformats-officedocument.wordprocessingml.webSettings+xml",
            "/docProps/core.xml": "application/vnd.openxmlformats-package.core-properties+xml",
            "/docProps/app.xml": "application/vnd.openxmlformats-officedocument.extended-properties+xml",
        }.items():
            types.append(E.Override(PartName=partname, ContentType=mt))
        added = {'png', 'gif', 'jpeg', 'jpg', 'svg', 'xml'}
        for ext in added:
@@ -199,7 +225,8 @@ class DOCX(object):
    @property
    def appproperties(self):
-        E = ElementMaker(namespace=self.namespace.namespaces['ep'], nsmap={None:self.namespace.namespaces['ep']})
+        E = ElementMaker(namespace=self.namespace.namespaces['ep'],
                         nsmap={None: self.namespace.namespaces['ep']})
        props = E.Properties(
            E.Application(__appname__),
            E.AppVersion('%02d.%04d' % numeric_version[:2]),
@@ -216,16 +243,17 @@ class DOCX(object):
    @property
    def containerrels(self):
        return textwrap.dedent('''\
-        <?xml version='1.0' encoding='utf-8'?>
+<?xml version='1.0' encoding='utf-8'?>
-        <Relationships xmlns="http://schemas.openxmlformats.org/package/2006/relationships">
+<Relationships xmlns="http://schemas.openxmlformats.org/package/2006/relationships">
-            <Relationship Id="rId3" Type="{APPPROPS}" Target="docProps/app.xml"/>
+    <Relationship Id="rId3" Type="{APPPROPS}" Target="docProps/app.xml"/>
-            <Relationship Id="rId2" Type="{DOCPROPS}" Target="docProps/core.xml"/>
+    <Relationship Id="rId2" Type="{DOCPROPS}" Target="docProps/core.xml"/>
-            <Relationship Id="rId1" Type="{DOCUMENT}" Target="word/document.xml"/>
+    <Relationship Id="rId1" Type="{DOCUMENT}" Target="word/document.xml"/>
-        </Relationships>'''.format(**self.namespace.names)).encode('utf-8')
+</Relationships>'''.format(**self.namespace.names)).encode('utf-8')  # noqa
    @property
    def websettings(self):
-        E = ElementMaker(namespace=self.namespace.namespaces['w'], nsmap={'w':self.namespace.namespaces['w']})
+        E = ElementMaker(namespace=self.namespace.namespaces['w'],
                         nsmap={'w': self.namespace.namespaces['w']})
        ws = E.webSettings(
            E.optimizeForBrowser, E.allowPNG, E.doNotSaveAsSingleFile)
        return xml2str(ws)
@@ -234,11 +262,15 @@ class DOCX(object):
    def convert_metadata(self, mi):
        namespaces = self.namespace.namespaces
-        E = ElementMaker(namespace=namespaces['cp'], nsmap={x:namespaces[x] for x in 'cp dc dcterms xsi'.split()})
+        E = ElementMaker(namespace=namespaces['cp'],
                         nsmap={x: namespaces[x]
                                for x in 'cp dc dcterms xsi'.split()})
        cp = E.coreProperties(E.revision("1"), E.lastModifiedBy('calibre'))
        ts = utcnow().isoformat('T').rpartition('.')[0] + 'Z'
        for x in 'created modified'.split():
-            x = cp.makeelement('{%s}%s' % (namespaces['dcterms'], x), **{'{%s}type' % namespaces['xsi']:'dcterms:W3CDTF'})
+            x = cp.makeelement('{%s}%s' % (namespaces['dcterms'], x),
                               **{'{%s}type' %
                                  namespaces['xsi']: 'dcterms:W3CDTF'})
            x.text = ts
            cp.append(x)
        self.mi = mi
@@ -261,8 +293,10 @@ class DOCX(object):
            zf.writestr('word/styles.xml', xml2str(self.styles))
            zf.writestr('word/numbering.xml', xml2str(self.numbering))
            zf.writestr('word/fontTable.xml', xml2str(self.font_table))
-            zf.writestr('word/_rels/document.xml.rels', self.document_relationships.serialize())
+            zf.writestr('word/_rels/document.xml.rels',
-            zf.writestr('word/_rels/fontTable.xml.rels', xml2str(self.embedded_fonts))
+                        self.document_relationships.serialize())
            zf.writestr('word/_rels/fontTable.xml.rels',
                        xml2str(self.embedded_fonts))
            for fname, data_getter in self.images.items():
                zf.writestr(fname, data_getter())
            for fname, data in self.fonts.items():
--- a/ebook_converter/ebooks/metadata/init.py
+++ b/ebook_converter/ebooks/metadata/init.py
@@ -18,7 +18,7 @@ try:
    _author_pat = re.compile(tweaks['authors_split_regex'])
 except Exception:
    prints('Author split regexp:', tweaks['authors_split_regex'],
-            'is invalid, using default')
+           'is invalid, using default')
    _author_pat = re.compile(r'(?i),?\s+(and|with)\s+')
@@ -76,7 +76,8 @@ def author_to_author_sort(author, method=None):
    if method == 'copy':
        return author
-    prefixes = {force_unicode(y).lower() for y in tweaks['author_name_prefixes']}
+    prefixes = {force_unicode(y).lower()
                for y in tweaks['author_name_prefixes']}
    prefixes |= {y+'.' for y in prefixes}
    while True:
        if not tokens:
@@ -87,7 +88,8 @@ def author_to_author_sort(author, method=None):
        else:
            break
-    suffixes = {force_unicode(y).lower() for y in tweaks['author_name_suffixes']}
+    suffixes = {force_unicode(y).lower()
                for y in tweaks['author_name_suffixes']}
    suffixes |= {y+'.' for y in suffixes}
    suffix = ''
@@ -144,7 +146,7 @@ def get_title_sort_pat(lang=None):
    except:
        ans = frozenset((r'A\s+', r'The\s+', r'An\s+'))
    ans = '|'.join(ans)
-    ans = '^(%s)'%ans
+    ans = '^(%s)' % ans
    try:
        ans = re.compile(ans, re.IGNORECASE)
    except:
@@ -154,7 +156,7 @@ def get_title_sort_pat(lang=None):
 _ignore_starts = '\'"'+''.join(chr(x) for x in
-        list(range(0x2018, 0x201e))+[0x2032, 0x2033])
+                               list(range(0x2018, 0x201e))+[0x2032, 0x2033])
 def title_sort(title, order=None, lang=None):
--- a/ebook_converter/ebooks/metadata/fb2.py
+++ b/ebook_converter/ebooks/metadata/fb2.py
@@ -12,8 +12,7 @@ from lxml import etree
 from ebook_converter.utils.date import parse_only_date
 from ebook_converter.utils.img import save_cover_data_to
 from ebook_converter.utils.imghdr import identify
-from ebook_converter import guess_type, guess_all_extensions, prints, \
+from ebook_converter import guess_all_extensions, prints, force_unicode
        force_unicode
 from ebook_converter.ebooks.metadata import MetaInformation, check_isbn
 from ebook_converter.ebooks.chardet import xml_to_unicode
 from ebook_converter.polyglot.binary import as_base64_unicode
--- a/ebook_converter/ebooks/metadata/opf2.py
+++ b/ebook_converter/ebooks/metadata/opf2.py
@@ -10,11 +10,11 @@ import mimetypes
 import os
 import re
 import sys
 import textwrap
 import traceback
 import unittest
 import urllib.parse
 import uuid
 import traceback
 import textwrap
 from lxml import etree
 from lxml.builder import ElementMaker
@@ -32,7 +32,7 @@ from ebook_converter.ebooks.metadata import string_to_authors, \
 from ebook_converter.ebooks.metadata.book.base import Metadata
 from ebook_converter.utils.date import parse_date, isoformat
 from ebook_converter.utils.localization import get_lang, canonicalize_lang
-from ebook_converter import prints, guess_type
+from ebook_converter import prints
 from ebook_converter.utils.cleantext import clean_ascii_chars, clean_xml_chars
 from ebook_converter.utils.config import tweaks
 from ebook_converter.polyglot.urllib import unquote
@@ -1807,8 +1807,7 @@ def test_m2o():
 class OPFTest(unittest.TestCase):
    def setUp(self):
-        self.stream = io.BytesIO(
+        self.stream = io.BytesIO(b'''\
 b'''\
 <?xml version="1.0"  encoding="UTF-8"?>
 <package version="2.0" xmlns="http://www.idpf.org/2007/opf" >
 <metadata xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:opf="http://www.idpf.org/2007/opf">
@@ -1827,8 +1826,7 @@ b'''\
    <item id="1" href="a%20%7E%20b" media-type="text/txt" />
 </manifest>
 </package>
-'''
+''')
        )
        self.opf = OPF(self.stream, os.getcwd())
    def testReading(self, opf=None):
--- a/ebook_converter/ebooks/mobi/reader/mobi6.py
+++ b/ebook_converter/ebooks/mobi/reader/mobi6.py
@@ -1,10 +1,15 @@
-import shutil, os, re, struct, textwrap, io
+import io
 import logging
 import mimetypes
 import os
 import re
 import shutil
 import struct
 import textwrap
 from lxml import html, etree
-from ebook_converter import xml_entity_to_unicode, entity_to_unicode, guess_type
+from ebook_converter import xml_entity_to_unicode, entity_to_unicode
 from ebook_converter.utils.cleantext import clean_ascii_chars, clean_xml_chars
 from ebook_converter.ebooks import DRMError, unit_convert
 from ebook_converter.ebooks.chardet import strip_encoding_declarations
@@ -15,15 +20,11 @@ from ebook_converter.ebooks.metadata import MetaInformation
 from ebook_converter.ebooks.metadata.opf2 import OPFCreator, OPF
 from ebook_converter.ebooks.metadata.toc import TOC
 from ebook_converter.ebooks.mobi.reader.headers import BookHeader
-from ebook_converter.utils.img import save_cover_data_to, gif_data_to_png_data, AnimatedGIF
+from ebook_converter.utils.img import save_cover_data_to, gif_data_to_png_data
 from ebook_converter.utils.img import AnimatedGIF
 from ebook_converter.utils.imghdr import what
 __license__ = 'GPL v3'
 __copyright__ = '2012, Kovid Goyal <kovid@kovidgoyal.net>'
 __docformat__ = 'restructuredtext en'
 class TopazError(ValueError):
    pass
@@ -38,13 +39,14 @@ class KFXError(ValueError):
 class MobiReader(object):
-    PAGE_BREAK_PAT = re.compile(
+    PAGE_BREAK_PAT = re.compile(r'<\s*/{0,1}\s*mbp:pagebreak((?:\s+[^/>]*)'
-        r'<\s*/{0,1}\s*mbp:pagebreak((?:\s+[^/>]*){0,1})/{0,1}\s*>\s*(?:<\s*/{0,1}\s*mbp:pagebreak\s*/{0,1}\s*>)*',
+                                r'{0,1})/{0,1}\s*>\s*(?:<\s*/{0,1}'
-        re.IGNORECASE)
+                                r'\s*mbp:pagebreak\s*/{0,1}\s*>)*',
                                re.IGNORECASE)
    IMAGE_ATTRS = ('lowrecindex', 'recindex', 'hirecindex')
    def __init__(self, filename_or_stream, log, user_encoding=None, debug=None,
-            try_extra_data_fix=False):
+                 try_extra_data_fix=False):
        self.log = log
        self.debug = debug
        self.embedded_mi = None
@@ -83,8 +85,8 @@ class MobiReader(object):
        if raw.startswith(b'\xeaDRMION\xee'):
            raise KFXError()
-        self.header   = raw[0:72]
+        self.header = raw[0:72]
-        self.name     = self.header[:32].replace(b'\x00', b'')
+        self.name = self.header[:32].replace(b'\x00', b'')
        self.num_sections, = struct.unpack('>H', raw[76:78])
        self.ident = self.header[0x3C:0x3C + 8].upper()
@@ -94,7 +96,9 @@ class MobiReader(object):
        self.sections = []
        self.section_headers = []
        for i in range(self.num_sections):
-            offset, a1, a2, a3, a4 = struct.unpack('>LBBBB', raw[78 + i * 8:78 + i * 8 + 8])
+            offset, a1, a2, a3, a4 = struct.unpack('>LBBBB',
                                                   raw[78 + i * 8:78 +
                                                       i * 8 + 8])
            flags, val = a1, a2 << 16 | a3 << 8 | a4
            self.section_headers.append((offset, flags, val))
@@ -109,8 +113,9 @@ class MobiReader(object):
        for i in range(self.num_sections):
            self.sections.append((section(i), self.section_headers[i]))
-        self.book_header = bh = BookHeader(self.sections[0][0], self.ident,
+        bh = BookHeader(self.sections[0][0], self.ident, user_encoding,
-            user_encoding, self.log, try_extra_data_fix=try_extra_data_fix)
+                        self.log, try_extra_data_fix=try_extra_data_fix)
        self.book_header = bh
        self.name = self.name.decode(self.book_header.codec, 'replace')
        self.kf8_type = None
        k8i = getattr(self.book_header.exth, 'kf8_header', None)
@@ -118,18 +123,20 @@ class MobiReader(object):
        # Ancient PRC files from Baen can have random values for
        # mobi_version, so be conservative
        if (self.book_header.mobi_version == 8 and hasattr(self.book_header,
-            'skelidx')):
+                                                           'skelidx')):
            self.kf8_type = 'standalone'
        elif k8i is not None:  # Check for joint mobi 6 and kf 8 file
            try:
                raw = self.sections[k8i-1][0]
-            except:
+            except Exception:
                raw = None
            if raw == b'BOUNDARY':
                try:
                    self.book_header = BookHeader(self.sections[k8i][0],
-                            self.ident, user_encoding, self.log)
+                                                  self.ident, user_encoding,
-                    self.book_header.kf8_first_image_index = self.book_header.first_image_index + k8i
+                                                  self.log)
                    _kfii = self.book_header.first_image_index + k8i
                    self.book_header.kf8_first_image_index = _kfii
                    self.book_header.mobi6_records = bh.records
                    # Need the first_image_index from the mobi 6 header as well
@@ -143,14 +150,14 @@ class MobiReader(object):
                    self.kf8_type = 'joint'
                    self.kf8_boundary = k8i-1
-                except:
+                except Exception:
                    self.book_header = bh
    def check_for_drm(self):
        if self.book_header.encryption_type != 0:
            try:
                name = self.book_header.exth.mi.title
-            except:
+            except Exception:
                name = self.name
            if not name:
                name = self.name
@@ -163,20 +170,20 @@ class MobiReader(object):
        if self.debug is not None:
            parse_cache['calibre_raw_mobi_markup'] = self.mobi_html
        self.add_anchors()
-        self.processed_html = self.processed_html.decode(self.book_header.codec,
+        self.processed_html = self.processed_html.decode(
-            'ignore')
+            self.book_header.codec, 'ignore')
        self.processed_html = self.processed_html.replace('</</', '</')
        self.processed_html = re.sub(r'</([a-zA-Z]+)<', r'</\1><',
-                self.processed_html)
+                                     self.processed_html)
        self.processed_html = self.processed_html.replace('\ufeff', '')
        # Remove tags of the form <xyz: ...> as they can cause issues further
        # along the pipeline
        self.processed_html = re.sub(r'</{0,1}[a-zA-Z]+:\s+[^>]*>', '',
-                self.processed_html)
+                                     self.processed_html)
        self.processed_html = strip_encoding_declarations(self.processed_html)
        self.processed_html = re.sub(r'&(\S+?);', xml_entity_to_unicode,
-            self.processed_html)
+                                     self.processed_html)
        image_name_map = self.extract_images(processed_records, output_dir)
        self.replace_page_breaks()
        self.cleanup_html()
@@ -186,31 +193,41 @@ class MobiReader(object):
        try:
            root = html.fromstring(self.processed_html)
            if len(root.xpath('//html')) > 5:
-                root = html.fromstring(self.processed_html.replace('\x0c',
+                root = html.fromstring(self.processed_html
-                    '').replace('\x14', ''))
+                                       .replace('\x0c', '')
                                       .replace('\x14', ''))
        except Exception:
-            self.log.warning('MOBI markup appears to contain random bytes. Stripping.')
+            self.log.warning('MOBI markup appears to contain random bytes. '
                             'Stripping.')
            self.processed_html = self.remove_random_bytes(self.processed_html)
            root = html.fromstring(self.processed_html)
        if root.xpath('descendant::p/descendant::p'):
            from html5_parser import parse
            self.log.warning('Malformed markup, parsing using html5-parser')
-            self.processed_html = strip_encoding_declarations(self.processed_html)
+            self.processed_html = strip_encoding_declarations(
                self.processed_html)
            # These trip up the html5 parser causing all content to be placed
            # under the <guide> tag
-            self.processed_html = re.sub(r'<metadata>.+?</metadata>', '', self.processed_html, flags=re.I)
+            self.processed_html = re.sub(r'<metadata>.+?</metadata>', '',
-            self.processed_html = re.sub(r'<guide>.+?</guide>', '', self.processed_html, flags=re.I)
+                                         self.processed_html, flags=re.I)
            self.processed_html = re.sub(r'<guide>.+?</guide>', '',
                                         self.processed_html, flags=re.I)
            try:
-                root = parse(self.processed_html, maybe_xhtml=False, keep_doctype=False, sanitize_names=True)
+                root = parse(self.processed_html, maybe_xhtml=False,
                             keep_doctype=False, sanitize_names=True)
            except Exception:
-                self.log.warning('MOBI markup appears to contain random bytes. Stripping.')
+                self.log.warning('MOBI markup appears to contain random '
-                self.processed_html = self.remove_random_bytes(self.processed_html)
+                                 'bytes. Stripping.')
-                root = parse(self.processed_html, maybe_xhtml=False, keep_doctype=False, sanitize_names=True)
+                self.processed_html = self.remove_random_bytes(
                    self.processed_html)
                root = parse(self.processed_html, maybe_xhtml=False,
                             keep_doctype=False, sanitize_names=True)
            if len(root.xpath('body/descendant::*')) < 1:
                # There are probably stray </html>s in the markup
                self.processed_html = self.processed_html.replace('</html>',
-                        '')
+                                                                  '')
-                root = parse(self.processed_html, maybe_xhtml=False, keep_doctype=False, sanitize_names=True)
+                root = parse(self.processed_html, maybe_xhtml=False,
                             keep_doctype=False, sanitize_names=True)
        if root.tag != 'html':
            self.log.warn('File does not have opening <html> tag')
@@ -253,13 +270,14 @@ class MobiReader(object):
            head = root.makeelement('head', {})
            root.insert(0, head)
        head.text = '\n\t'
-        link = head.makeelement('link', {'type':'text/css',
+        link = head.makeelement('link', {'type': 'text/css',
-            'href':'styles.css', 'rel':'stylesheet'})
+                                         'href': 'styles.css',
                                         'rel': 'stylesheet'})
        head.insert(0, link)
        link.tail = '\n\t'
        title = head.xpath('descendant::title')
-        m = head.makeelement('meta', {'http-equiv':'Content-Type',
+        m = head.makeelement('meta', {'http-equiv': 'Content-Type',
-            'content':'text/html; charset=utf-8'})
+                                      'content': 'text/html; charset=utf-8'})
        head.insert(0, m)
        if not title:
            title = head.makeelement('title', {})
@@ -283,7 +301,8 @@ class MobiReader(object):
        try:
            for ref in guide.xpath('descendant::reference'):
                if 'href' in ref.attrib:
-                    ref.attrib['href'] = os.path.basename(htmlfile) + ref.attrib['href']
+                    ref.attrib['href'] = (os.path.basename(htmlfile) +
                                          ref.attrib['href'])
        except AttributeError:
            pass
@@ -299,7 +318,7 @@ class MobiReader(object):
        opf, ncx_manifest_entry = self.create_opf(htmlfile, guide, root)
        self.created_opf_path = os.path.splitext(htmlfile)[0] + '.opf'
        opf.render(open(self.created_opf_path, 'wb'), ncx,
-            ncx_manifest_entry=ncx_manifest_entry)
+                   ncx_manifest_entry=ncx_manifest_entry)
        ncx = ncx.getvalue()
        if ncx:
            ncx_path = os.path.join(os.path.dirname(htmlfile), 'toc.ncx')
@@ -313,9 +332,9 @@ class MobiReader(object):
        if self.book_header.exth is not None or self.embedded_mi is not None:
            self.log.debug('Creating OPF...')
            ncx = io.BytesIO()
-            opf, ncx_manifest_entry  = self.create_opf(htmlfile, guide, root)
+            opf, ncx_manifest_entry = self.create_opf(htmlfile, guide, root)
            opf.render(open(os.path.splitext(htmlfile)[0] + '.opf', 'wb'), ncx,
-                ncx_manifest_entry)
+                       ncx_manifest_entry)
            ncx = ncx.getvalue()
            if ncx:
                write_as_utf8(os.path.splitext(htmlfile)[0] + '.ncx', ncx)
@@ -348,28 +367,46 @@ class MobiReader(object):
    def cleanup_html(self):
        self.log.debug('Cleaning up HTML...')
-        self.processed_html = re.sub(r'<div height="0(pt|px|ex|em|%){0,1}"></div>', '', self.processed_html)
+        self.processed_html = re.sub(r'<div height="0(pt|px|ex|em|%){0,1}">'
-        if self.book_header.ancient and b'<html' not in self.mobi_html[:300].lower():
+                                     '</div>', '', self.processed_html)
-            self.processed_html = '<html><p>' + self.processed_html.replace('\n\n', '<p>') + '</html>'
+        if (self.book_header.ancient and
                b'<html' not in self.mobi_html[:300].lower()):
            self.processed_html = ('<html><p>' +
                                   self.processed_html.replace('\n\n', '<p>') +
                                   '</html>')
        self.processed_html = self.processed_html.replace('\r\n', '\n')
        self.processed_html = self.processed_html.replace('> <', '>\n<')
        self.processed_html = self.processed_html.replace('<mbp: ', '<mbp:')
        self.processed_html = re.sub(r'<\?xml[^>]*>', '', self.processed_html)
-        self.processed_html = re.sub(r'<\s*(/?)\s*o:p[^>]*>', r'', self.processed_html)
+        self.processed_html = re.sub(r'<\s*(/?)\s*o:p[^>]*>', r'',
-        # Swap inline and block level elements, and order block level elements according to priority
+                                     self.processed_html)
-        # - lxml and beautifulsoup expect/assume a specific order based on xhtml spec
+        # Swap inline and block level elements, and order block level elements
-        self.processed_html = re.sub(
+        # according to priority
-            r'(?i)(?P<styletags>(<(h\d+|i|b|u|em|small|big|strong|tt)>\s*){1,})(?P<para><p[^>]*>)', r'\g<para>'+r'\g<styletags>', self.processed_html)
+        # - lxml and beautifulsoup expect/assume a specific order based on
-        self.processed_html = re.sub(
+        #   xhtml spec
-            r'(?i)(?P<para></p[^>]*>)\s*(?P<styletags>(</(h\d+|i|b|u|em|small|big|strong|tt)>\s*){1,})', r'\g<styletags>'+r'\g<para>', self.processed_html)
+        self.processed_html = re.sub(r'(?i)(?P<styletags>(<(h\d+|i|b|u|em|'
-        self.processed_html = re.sub(
+                                     r'small|big|strong|tt)>\s*){1,})'
-            r'(?i)(?P<blockquote>(</(blockquote|div)[^>]*>\s*){1,})(?P<para></p[^>]*>)', r'\g<para>'+r'\g<blockquote>', self.processed_html)
+                                     r'(?P<para><p[^>]*>)',
-        self.processed_html = re.sub(
+                                     r'\g<para>' + r'\g<styletags>',
-            r'(?i)(?P<para><p[^>]*>)\s*(?P<blockquote>(<(blockquote|div)[^>]*>\s*){1,})', r'\g<blockquote>'+r'\g<para>', self.processed_html)
+                                     self.processed_html)
        self.processed_html = re.sub(r'(?i)(?P<para></p[^>]*>)\s*'
                                     r'(?P<styletags>(</(h\d+|i|b|u|em|small|'
                                     r'big|strong|tt)>\s*){1,})',
                                     r'\g<styletags>' + r'\g<para>',
                                     self.processed_html)
        self.processed_html = re.sub(r'(?i)(?P<blockquote>(</(blockquote|div)'
                                     r'[^>]*>\s*){1,})(?P<para></p[^>]*>)',
                                     r'\g<para>' + r'\g<blockquote>',
                                     self.processed_html)
        self.processed_html = re.sub(r'(?i)(?P<para><p[^>]*>)\s*'
                                     r'(?P<blockquote>(<(blockquote|div)[^>]*>'
                                     r'\s*){1,})',
                                     r'\g<blockquote>' + r'\g<para>',
                                     self.processed_html)
        bods = htmls = 0
        for x in re.finditer('</body>|</html>', self.processed_html):
            if x == '</body>':
-                bods +=1
+                bods += 1
            else:
                htmls += 1
            if bods > 1 and htmls > 1:
@@ -380,8 +417,8 @@ class MobiReader(object):
            self.processed_html = self.processed_html.replace('</html>', '')
    def remove_random_bytes(self, html):
-        return re.sub('\x14|\x15|\x19|\x1c|\x1d|\xef|\x12|\x13|\xec|\x08|\x01|\x02|\x03|\x04|\x05|\x06|\x07',
+        return re.sub('\x14|\x15|\x19|\x1c|\x1d|\xef|\x12|\x13|\xec|\x08|\x01'
-                    '', html)
+                      '|\x02|\x03|\x04|\x05|\x06|\x07', '', html)
    def ensure_unit(self, raw, unit='px'):
        if re.search(r'\d+$', raw) is not None:
@@ -448,9 +485,10 @@ class MobiReader(object):
                            # discarded by a renderer
                            tag.text = '\u00a0'  # nbsp
                            styles.append('height: %s' %
-                                    self.ensure_unit(height))
+                                          self.ensure_unit(height))
                        else:
-                            styles.append('margin-top: %s' % self.ensure_unit(height))
+                            styles.append('margin-top: %s' %
                                          self.ensure_unit(height))
            if 'width' in attrib:
                width = attrib.pop('width').strip()
                if width and re.search(r'\d+', width):
@@ -464,14 +502,16 @@ class MobiReader(object):
                        try:
                            ewidth_val = unit_convert(ewidth, 12, 500, 166)
                            self.text_indents[tag] = ewidth_val
-                        except:
+                        except Exception:
                            pass
                        if width.startswith('-'):
-                            styles.append('margin-left: %s' % self.ensure_unit(width[1:]))
+                            styles.append('margin-left: %s' %
                                          self.ensure_unit(width[1:]))
                            try:
-                                ewidth_val = unit_convert(ewidth[1:], 12, 500, 166)
+                                ewidth_val = unit_convert(ewidth[1:],
                                                          12, 500, 166)
                                self.left_margins[tag] = ewidth_val
-                            except:
+                            except Exception:
                                pass
            if 'align' in attrib:
@@ -514,16 +554,20 @@ class MobiReader(object):
                    except Exception:
                        pass
                    else:
-                        attrib['src'] = 'images/' + image_name_map.get(recindex, '%05d.jpg' % recindex)
+                        attrib['src'] = ('images/' +
                                         image_name_map.get(recindex,
                                                            '%05d.jpg' %
                                                            recindex))
                for attr in ('width', 'height'):
                    if attr in attrib:
                        val = attrib[attr]
                        if val.lower().endswith('em'):
                            try:
                                nval = float(val[:-2])
-                                nval *= 16 * (168.451/72)  # Assume this was set using the Kindle profile
+                                # Assume this was set using the Kindle profile
-                                attrib[attr] = "%dpx"%int(nval)
+                                nval *= 16 * (168.451/72)
-                            except:
+                                attrib[attr] = "%dpx" % int(nval)
                            except Exception:
                                del attrib[attr]
                        elif val.lower().endswith('%'):
                            del attrib[attr]
@@ -550,10 +594,12 @@ class MobiReader(object):
                    attrib['href'] = "#filepos%d" % int(filepos)
                except ValueError:
                    pass
-            if (tag.tag == 'a' and attrib.get('id', '').startswith('filepos') and
+            if (tag.tag == 'a' and
-                    not tag.text and len(tag) == 0 and (tag.tail is None or not
+                    attrib.get('id', '').startswith('filepos') and
-                        tag.tail.strip()) and getattr(tag.getnext(), 'tag',
+                    not tag.text and len(tag) == 0 and
-                            None) in BLOCK_TAGS):
+                    (tag.tail is None or
                     not tag.tail.strip()) and
                    getattr(tag.getnext(), 'tag', None) in BLOCK_TAGS):
                # This is an empty anchor immediately before a block tag, move
                # the id onto the block tag instead
                forwardable_anchors.append(tag)
@@ -625,11 +671,11 @@ class MobiReader(object):
            ti = self.text_indents.get(tag, ti)
            try:
                lm = float(lm)
-            except:
+            except Exception:
                lm = 0.0
            try:
                ti = float(ti)
-            except:
+            except Exception:
                ti = 0.0
            return lm + ti
@@ -647,13 +693,14 @@ class MobiReader(object):
            mi = MetaInformation(self.book_header.title, ['Unknown'])
        opf = OPFCreator(os.path.dirname(htmlfile), mi)
        if hasattr(self.book_header.exth, 'cover_offset'):
-            opf.cover = 'images/%05d.jpg' % (self.book_header.exth.cover_offset + 1)
+            opf.cover = 'images/%05d.jpg' % (self.book_header
                                             .exth.cover_offset + 1)
        elif mi.cover is not None:
            opf.cover = mi.cover
        else:
            opf.cover = 'images/%05d.jpg' % 1
            if not os.path.exists(os.path.join(os.path.dirname(htmlfile),
-                * opf.cover.split('/'))):
+                                               * opf.cover.split('/'))):
                opf.cover = None
        cover = opf.cover
@@ -669,7 +716,7 @@ class MobiReader(object):
                opf.cover = ncover.replace(os.sep, '/')
        manifest = [(htmlfile, 'application/xhtml+xml'),
-            (os.path.abspath('styles.css'), 'text/css')]
+                    (os.path.abspath('styles.css'), 'text/css')]
        bp = os.path.dirname(htmlfile)
        added = set()
        for i in getattr(self, 'image_names', []):
@@ -708,15 +755,17 @@ class MobiReader(object):
                        if href and re.match(r'\w+://', href) is None:
                            try:
                                text = ' '.join([t.strip() for t in
-                                    x.xpath('descendant::text()')])
+                                                 x.xpath('descendant:'
-                            except:
+                                                         ':text()')])
                            except Exception:
                                text = ''
                            text = ent_pat.sub(entity_to_unicode, text)
-                            item = tocobj.add_item(toc.partition('#')[0], href[1:],
+                            item = tocobj.add_item(toc.partition('#')[0],
-                                text)
+                                                   href[1:], text)
                            item.left_space = int(self.get_left_whitespace(x))
                            found = True
-                    if reached and found and x.get('class', None) == 'mbp_pagebreak':
+                    if (reached and found and
                            x.get('class', None) == 'mbp_pagebreak'):
                        break
            if tocobj is not None:
                tocobj = self.structure_toc(tocobj)
@@ -748,7 +797,7 @@ class MobiReader(object):
            level = indent_vals.index(item.left_space)
            parent = find_parent(level)
            last_found[level] = parent.add_item(item.href, item.fragment,
-                        item.text)
+                                                item.text)
        return newtoc
@@ -782,7 +831,9 @@ class MobiReader(object):
    def warn_about_trailing_entry_corruption(self):
        if not self.warned_about_trailing_entry_corruption:
            self.warned_about_trailing_entry_corruption = True
-            self.log.warn('The trailing data entries in this MOBI file are corrupted, you might see corrupted text in the output')
+            self.log.warn('The trailing data entries in this MOBI file are '
                          'corrupted, you might see corrupted text in the '
                          'output')
    def text_section(self, index):
        data = self.sections[index][0]
@@ -791,19 +842,23 @@ class MobiReader(object):
    def extract_text(self, offset=1):
        self.log.debug('Extracting text...')
-        text_sections = [self.text_section(i) for i in range(offset,
+        text_sections = [self.text_section(i)
-            min(self.book_header.records + offset, len(self.sections)))]
+                         for i in range(offset, min(self.book_header.records
                                                    + offset,
                                                    len(self.sections)))]
        processed_records = list(range(offset-1, self.book_header.records +
-            offset))
+                                       offset))
        self.mobi_html = b''
        if self.book_header.compression_type == b'DH':
-            huffs = [self.sections[i][0] for i in
+            huffs = [self.sections[i][0]
-                range(self.book_header.huff_offset,
+                     for i in range(self.book_header.huff_offset,
-                    self.book_header.huff_offset + self.book_header.huff_number)]
+                                    self.book_header.huff_offset +
                                    self.book_header.huff_number)]
            processed_records += list(range(self.book_header.huff_offset,
-                self.book_header.huff_offset + self.book_header.huff_number))
+                                            self.book_header.huff_offset +
                                            self.book_header.huff_number))
            huff = HuffReader(huffs)
            unpack = huff.unpack
@@ -811,19 +866,23 @@ class MobiReader(object):
            unpack = decompress_doc
        elif self.book_header.compression_type == b'\x00\x01':
-            unpack = lambda x: x
+            unpack = lambda x: x  # noqa
        else:
-            raise MobiError('Unknown compression algorithm: %r' % self.book_header.compression_type)
+            raise MobiError('Unknown compression algorithm: %r' %
                            self.book_header.compression_type)
        self.mobi_html = b''.join(map(unpack, text_sections))
        if self.mobi_html.endswith(b'#'):
            self.mobi_html = self.mobi_html[:-1]
-        if self.book_header.ancient and b'<html' not in self.mobi_html[:300].lower():
+        if (self.book_header.ancient and
                b'<html' not in self.mobi_html[:300].lower()):
            self.mobi_html = self.mobi_html.replace(b'\r ', b'\n\n ')
        self.mobi_html = self.mobi_html.replace(b'\0', b'')
        if self.book_header.codec == 'cp1252':
-            self.mobi_html = self.mobi_html.replace(b'\x1e', b'')  # record separator
+            # record separator
-            self.mobi_html = self.mobi_html.replace(b'\x02', b'')  # start of text
+            self.mobi_html = self.mobi_html.replace(b'\x1e', b'')
            # start of text
            self.mobi_html = self.mobi_html.replace(b'\x02', b'')
        return processed_records
    def replace_page_breaks(self):
@@ -835,7 +894,7 @@ class MobiReader(object):
        self.log.debug('Adding anchors...')
        positions = set()
        link_pattern = re.compile(br'''<[^<>]+filepos=['"]{0,1}(\d+)[^<>]*>''',
-            re.IGNORECASE)
+                                  re.IGNORECASE)
        for match in link_pattern.finditer(self.mobi_html):
            positions.add(int(match.group(1)))
        pos = 0
@@ -845,12 +904,13 @@ class MobiReader(object):
            if end == 0:
                continue
            oend = end
-            l = self.mobi_html.find(b'<', end)
+            _l = self.mobi_html.find(b'<', end)
            r = self.mobi_html.find(b'>', end)
            anchor = b'<a id="filepos%d"></a>'
-            if r > -1 and (r < l or l == end or l == -1):
+            if r > -1 and (r < _l or _l == end or _l == -1):
                p = self.mobi_html.rfind(b'<', 0, end + 1)
-                if (pos < end and p > -1 and not end_tag_re.match(self.mobi_html[p:r]) and
+                if (pos < end and p > -1 and
                        not end_tag_re.match(self.mobi_html[p:r]) and
                        not self.mobi_html[p:r + 1].endswith(b'/>')):
                    anchor = b' filepos-id="filepos%d"'
                    end = r
@@ -862,8 +922,9 @@ class MobiReader(object):
        processed_html = b''.join(processed_html)
        # Remove anchors placed inside entities
-        self.processed_html = re.sub(br'&([^;]*?)(<a id="filepos\d+"></a>)([^;]*);',
+        self.processed_html = re.sub(br'&([^;]*?)(<a id="filepos\d+"></a>)'
-                br'&\1\3;\2', processed_html)
+                                     br'([^;]*);', br'&\1\3;\2',
                                     processed_html)
    def extract_images(self, processed_records, output_dir):
        self.log.debug('Extracting images...')
@@ -881,10 +942,11 @@ class MobiReader(object):
            if i in processed_records:
                continue
            processed_records.append(i)
-            data  = self.sections[i][0]
+            data = self.sections[i][0]
            image_index += 1
            if data[:4] in {b'FLIS', b'FCIS', b'SRCS', b'\xe9\x8e\r\n',
-                    b'RESC', b'BOUN', b'FDST', b'DATP', b'AUDI', b'VIDE'}:
+                            b'RESC', b'BOUN', b'FDST', b'DATP', b'AUDI',
                            b'VIDE'}:
                # This record is a known non image type, no need to try to
                # load the image
                continue
@@ -920,16 +982,17 @@ class MobiReader(object):
 def test_mbp_regex():
-    for raw, m in {'<mbp:pagebreak></mbp:pagebreak>':'',
+    for raw, m in {'<mbp:pagebreak></mbp:pagebreak>': '',
-                   '<mbp:pagebreak xxx></mbp:pagebreak>yyy':' xxxyyy',
+                   '<mbp:pagebreak xxx></mbp:pagebreak>yyy': ' xxxyyy',
-                   '<mbp:pagebreak> </mbp:pagebreak>':'',
+                   '<mbp:pagebreak> </mbp:pagebreak>': '',
-                   '<mbp:pagebreak>xxx':'xxx',
+                   '<mbp:pagebreak>xxx': 'xxx',
-                   '<mbp:pagebreak/>xxx':'xxx',
+                   '<mbp:pagebreak/>xxx': 'xxx',
-                   '<mbp:pagebreak sdf/ >xxx':' sdfxxx',
+                   '<mbp:pagebreak sdf/ >xxx': ' sdfxxx',
-                   '<mbp:pagebreak / >':' ',
+                   '<mbp:pagebreak / >': ' ',
-                   '</mbp:pagebreak>':'',
+                   '</mbp:pagebreak>': '',
-                   '</mbp:pagebreak sdf>':' sdf',
+                   '</mbp:pagebreak sdf>': ' sdf',
-                   '</mbp:pagebreak><mbp:pagebreak></mbp:pagebreak>xxx':'xxx'}.items():
+                   '</mbp:pagebreak><mbp:pagebreak></mbp:pagebreak>xxx':
                   'xxx'}.items():
        ans = MobiReader.PAGE_BREAK_PAT.sub(r'\1', raw)
        if ans != m:
-            raise Exception('%r != %r for %r'%(ans, m, raw))
+            raise Exception('%r != %r for %r' % (ans, m, raw))
--- a/ebook_converter/ebooks/oeb/transforms/data_url.py
+++ b/ebook_converter/ebooks/oeb/transforms/data_url.py
@@ -1,13 +1,11 @@
 import mimetypes
 import re
 from ebook_converter.ebooks.oeb.base import XPath, urlunquote
 from ebook_converter.polyglot.binary import from_base64_bytes
 from ebook_converter.polyglot.builtins import as_bytes
 __license__ = 'GPL v3'
 __copyright__ = '2014, Kovid Goyal <kovid at kovidgoyal.net>'
 class DataURL(object):
    def __call__(self, oeb, opts):
@@ -27,25 +25,29 @@ class DataURL(object):
                    continue
                if ';base64' in header:
                    data = re.sub(r'\s+', '', data)
                    from ebook_converter.polyglot.binary import from_base64_bytes
                    try:
                        data = from_base64_bytes(data)
                    except Exception:
-                        self.log.error('Found invalid base64 encoded data URI, ignoring it')
+                        self.log.error('Found invalid base64 encoded data '
                                       'URI, ignoring it')
                        continue
                else:
                    data = urlunquote(data)
                data = as_bytes(data)
                fmt = what(None, data)
                if not fmt:
-                    self.log.warn('Image encoded as data URL has unknown format, ignoring')
+                    self.log.warn('Image encoded as data URL has unknown '
                                  'format, ignoring')
                    continue
-                img.set('src', item.relhref(self.convert_image_data_uri(data, fmt, oeb)))
+                img.set('src',
                        item.relhref(self.convert_image_data_uri(data, fmt,
                                                                 oeb)))
    def convert_image_data_uri(self, data, fmt, oeb):
-        self.log('Found image encoded as data URI converting it to normal image')
+        self.log('Found image encoded as data URI converting it to normal '
-        from ebook_converter import guess_type
+                 'image')
-        item_id, item_href = oeb.manifest.generate('data-url-image', 'data-url-image.' + fmt)
+        item_id, item_href = oeb.manifest.generate('data-url-image',
                                                   'data-url-image.' + fmt)
        oeb.manifest.add(item_id, item_href,
                         mimetypes.guess_type(item_href)[0], data=data)
        return item_href
--- a/ebook_converter/ebooks/oeb/transforms/jacket.py
+++ b/ebook_converter/ebooks/oeb/transforms/jacket.py
@@ -1,9 +1,11 @@
 import mimetypes
-import sys, os, re
+import os
 from xml.sax.saxutils import escape
 from string import Formatter
 import pkg_resources
 import re
 import string
 import sys
 import urllib.parse
 from xml.sax import saxutils
 from ebook_converter import constants as const
 from ebook_converter import strftime
@@ -16,18 +18,14 @@ from ebook_converter.ebooks.chardet import strip_encoding_declarations
 from ebook_converter.ebooks.metadata import fmt_sidx, rating_to_stars
 __license__ = 'GPL v3'
 __copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>'
 __docformat__ = 'restructuredtext en'
 JACKET_XPATH = '//h:meta[@name="calibre-content" and @content="jacket"]'
-class SafeFormatter(Formatter):
+class SafeFormatter(string.Formatter):
    def get_value(self, *args, **kwargs):
        try:
-            return Formatter.get_value(self, *args, **kwargs)
+            return string.Formatter.get_value(self, *args, **kwargs)
        except KeyError:
            return ''
@@ -40,7 +38,7 @@ class Base(object):
        for img in path(item.data):
            if removed >= limit:
                break
-            href  = item.abshref(img.get('src'))
+            href = item.abshref(img.get('src'))
            image = self.oeb.manifest.hrefs.get(href)
            if image is None:
                href = urlnormalize(href)
@@ -68,7 +66,8 @@ class RemoveFirstImage(Base):
                    raw = xml2text(body[0]).strip()
                    imgs = XPath('//h:img|//svg:svg')(item.data)
                    if not raw and not imgs:
-                        self.log('Removing %s as it has no content'%item.href)
+                        self.log('Removing %s as it has no content' %
                                 item.href)
                        self.oeb.manifest.remove(item)
                        deleted_item = item
                break
@@ -82,20 +81,20 @@ class RemoveFirstImage(Base):
            self.oeb.guide.remove_by_href(deleted_item.href)
    def __call__(self, oeb, opts, metadata):
-        '''
+        """
        Add metadata in jacket.xhtml if specified in opts
        If not specified, remove previous jacket instance
-        '''
+        """
        self.oeb, self.opts, self.log = oeb, opts, oeb.log
        if opts.remove_first_image:
            self.remove_first_image()
 class Jacket(Base):
-    '''
+    """
-    Book jacket manipulation. Remove first image and insert comments at start of
+    Book jacket manipulation. Remove first image and insert comments at start
-    book.
+    of book.
-    '''
+    """
    def insert_metadata(self, mi):
        self.log('Inserting metadata into book...')
@@ -107,22 +106,24 @@ class Jacket(Base):
        try:
            comments = str(self.oeb.metadata.description[0])
-        except:
+        except Exception:
            comments = ''
        try:
            title = str(self.oeb.metadata.title[0])
-        except:
+        except Exception:
            title = 'Unknown'
        try:
            authors = list(map(str, self.oeb.metadata.creator))
-        except:
+        except Exception:
            authors = ['Unknown']
        root = render_jacket(mi, self.opts.output_profile,
-                alt_title=title, alt_tags=tags, alt_authors=authors,
+                             alt_title=title, alt_tags=tags,
-                alt_comments=comments, rescale_fonts=True)
+                             alt_authors=authors,
                             alt_comments=comments,
                             rescale_fonts=True)
        id, href = self.oeb.manifest.generate('calibre_jacket', 'jacket.xhtml')
        jacket = self.oeb.manifest.add(id, href, mimetypes.guess_type(href)[0],
@@ -132,7 +133,8 @@ class Jacket(Base):
        for img, path in referenced_images(root):
            self.oeb.log('Embedding referenced image %s into jacket' % path)
            ext = path.rpartition('.')[-1].lower()
-            item_id, href = self.oeb.manifest.generate('jacket_image', 'jacket_img.'+ext)
+            item_id, href = self.oeb.manifest.generate('jacket_image',
                                                       'jacket_img.' + ext)
            with open(path, 'rb') as f:
                item = self.oeb.manifest.add(
                    item_id, href, mimetypes.guess_type(href)[0],
@@ -149,10 +151,10 @@ class Jacket(Base):
                break
    def __call__(self, oeb, opts, metadata):
-        '''
+        """
        Add metadata in jacket.xhtml if specified in opts
        If not specified, remove previous jacket instance
-        '''
+        """
        self.oeb, self.opts, self.log = oeb, opts, oeb.log
        self.remove_existing_jacket()
        if opts.insert_metadata:
@@ -164,8 +166,8 @@ class Jacket(Base):
 def get_rating(rating, rchar, e_rchar):
    ans = ''
    try:
-        num = float(rating)/2
+        num = float(rating) / 2
-    except:
+    except Exception:
        return ans
    num = max(0, num)
    num = min(num, 5)
@@ -180,25 +182,29 @@ class Series(str):
    def __new__(self, series, series_index):
        if series and series_index is not None:
-            roman = '{1} of <em>{0}</em>'.format(
+            _roman = saxutils.escape(fmt_sidx(series_index, use_roman=True))
-                escape(series), escape(fmt_sidx(series_index, use_roman=True)))
+            _no_roman = saxutils.escape(fmt_sidx(series_index,
-            combined = '{1} of <em>{0}</em>'.format(
+                                                 use_roman=False))
-                escape(series), escape(fmt_sidx(series_index,
+            roman = '{1} of <em>{0}</em>'.format(saxutils.escape(series),
-                                                use_roman=False)))
+                                                 _roman)
            combined = '{1} of <em>{0}</em>'.format(saxutils.escape(series),
                                                    _no_roman)
        else:
-            combined = roman = escape(series or u'')
+            combined = roman = saxutils.escape(series or u'')
        s = str.__new__(self, combined)
        s.roman = roman
-        s.name = escape(series or '')
+        s.name = saxutils.escape(series or '')
-        s.number = escape(fmt_sidx(series_index or 1.0, use_roman=False))
+        s.number = saxutils.escape(fmt_sidx(series_index or 1.0,
-        s.roman_number = escape(fmt_sidx(series_index or 1.0, use_roman=True))
+                                            use_roman=False))
        s.roman_number = saxutils.escape(fmt_sidx(series_index or 1.0,
                                                  use_roman=True))
        return s
 class Tags(str):
    def __new__(self, tags, output_profile):
-        tags = [escape(x) for x in tags or ()]
+        tags = [saxutils.escape(x) for x in tags or ()]
        t = str.__new__(self, ', '.join(tags))
        t.alphabetical = ', '.join(sorted(tags))
        t.tags_list = tags
@@ -233,9 +239,9 @@ def postprocess_jacket(root, output_profile, has_data):
        extract_class('cbj_kindle_banner_hr')
-def render_jacket(mi, output_profile,
+def render_jacket(mi, output_profile, alt_title='Unknown', alt_tags=[],
-        alt_title='Unknown', alt_tags=[], alt_comments='',
+                  alt_comments='', alt_publisher='', rescale_fonts=False,
-        alt_publisher='', rescale_fonts=False, alt_authors=None):
+                  alt_authors=None):
    with open(pkg_resources.resource_filename('ebook_converter',
                                              'data/jacket/stylesheet.css'),
              'rb') as fobj:
@@ -250,17 +256,20 @@ def render_jacket(mi, output_profile,
    try:
        title_str = alt_title if mi.is_null('title') else mi.title
-    except:
+    except Exception:
        title_str = 'Unknown'
-    title_str = escape(title_str)
+    title_str = saxutils.escape(title_str)
    title = '<span class="title">%s</span>' % title_str
    series = Series(mi.series, mi.series_index)
    try:
-        publisher = mi.publisher if not mi.is_null('publisher') else alt_publisher
+        if not mi.is_null('publisher'):
-    except:
+            publisher = mi.publisher
        else:
            publisher = alt_publisher
    except Exception:
        publisher = ''
-    publisher = escape(publisher)
+    publisher = saxutils.escape(publisher)
    try:
        if is_date_undefined(mi.pubdate):
@@ -268,10 +277,11 @@ def render_jacket(mi, output_profile,
        else:
            dt = as_local_time(mi.pubdate)
            pubdate = strftime('%Y', dt.timetuple())
-    except:
+    except Exception:
        pubdate = ''
-    rating = get_rating(mi.rating, output_profile.ratings_char, output_profile.empty_ratings_char)
+    rating = get_rating(mi.rating, output_profile.ratings_char,
                        output_profile.empty_ratings_char)
    tags = Tags((mi.tags if mi.tags else alt_tags), output_profile)
@@ -285,10 +295,10 @@ def render_jacket(mi, output_profile,
        mi.authors = list(alt_authors or ('Unknown',))
    try:
        author = mi.format_authors()
-    except:
+    except Exception:
        author = ''
    mi.authors = orig
-    author = escape(author)
+    author = saxutils.escape(author)
    has_data = {}
    def generate_html(comments):
@@ -301,7 +311,7 @@ def render_jacket(mi, output_profile,
                'publisher': publisher,
                'rating': rating,
                'rating_label': 'Rating',
-                'searchable_tags': ' '.join(escape(t) + 'ttt'
+                'searchable_tags': ' '.join(saxutils.escape(t) + 'ttt'
                                            for t in tags.tags_list),
                'series': series,
                'series_label': 'Series',
@@ -320,25 +330,30 @@ def render_jacket(mi, output_profile,
                if dt == 'series':
                    args[dkey] = Series(mi.get(key), mi.get(key + '_index'))
                elif dt == 'rating':
-                    args[dkey] = rating_to_stars(mi.get(key), m.get('display', {}).get('allow_half_stars', False))
+                    args[dkey] = rating_to_stars(mi.get(key),
                                                 m.get('display', {})
                                                 .get('allow_half_stars',
                                                      False))
                elif dt == 'comments':
                    val = val or ''
                    display = m.get('display', {})
                    ctype = display.get('interpret_as') or 'html'
                    if ctype == 'long-text':
-                        val = '<pre style="white-space:pre-wrap">%s</pre>' % escape(val)
+                        val = ('<pre style="white-space:pre-wrap">%s</pre>' %
                               saxutils.escape(val))
                    elif ctype == 'short-text':
-                        val = '<span>%s</span>' % escape(val)
+                        val = '<span>%s</span>' % saxutils.escape(val)
                    elif ctype == 'markdown':
                        val = markdown(val)
                    else:
                        val = comments_to_html(val)
                    args[dkey] = val
                else:
-                    args[dkey] = escape(val)
+                    args[dkey] = saxutils.escape(val)
-                args[dkey+'_label'] = escape(display_name)
+                args[dkey+'_label'] = saxutils.escape(display_name)
            except Exception:
-                # if the val (custom column contents) is None, don't add to args
+                # if the val (custom column contents) is None, don't add to
                # args
                pass
        if False:
@@ -371,10 +386,11 @@ def render_jacket(mi, output_profile,
        # the text in the book. That means that as long as the jacket uses
        # relative font sizes (em or %), the post conversion font size will be
        # the same as for text in the main book. So text with size x em will
-        # be rescaled to the same value in both the jacket and the main content.
+        # be rescaled to the same value in both the jacket and the main
        # content.
        #
-        # We cannot use data-calibre-rescale 100 on the body tag as that will just
+        # We cannot use data-calibre-rescale 100 on the body tag as that will
-        # give the body tag a font size of 1em, which is useless.
+        # just give the body tag a font size of 1em, which is useless.
        for body in root.xpath('//*[local-name()="body"]'):
            fw = body.makeelement(base.tag('xhtml', 'div'))
            fw.set('data-calibre-rescale', '100')