Moved misc functions from polyglot package to single polyglot module.

2021-05-25 19:06:31 +02:00
parent f46984267e
commit f47376830f
32 changed files with 244 additions and 219 deletions
@@ -5,17 +5,12 @@ import os
 from lxml import html
 from lxml.html import builder
 from ebook_converter.polyglot.urllib import unquote as _unquote
 from ebook_converter.ebooks.oeb.base import urlquote
 from ebook_converter.ebooks.chardet import xml_to_unicode
 from ebook_converter.customize.conversion import InputFormatPlugin
 from ebook_converter.ptempfile import TemporaryDirectory
 from ebook_converter.constants_old import filesystem_encoding
-from ebook_converter.polyglot.builtins import as_bytes
+from ebook_converter import polyglot
 __license__ = 'GPL v3'
 __copyright__ = ('2008, Kovid Goyal <kovid at kovidgoyal.net>, '
                 'and Alex Bramley <a.bramley at gmail.com>.')
 class CHMInput(InputFormatPlugin):
@@ -133,7 +128,7 @@ class CHMInput(InputFormatPlugin):
        def unquote(x):
            if isinstance(x, str):
                x = x.encode('utf-8')
-            return _unquote(x).decode('utf-8')
+            return polyglot.unquote(x).decode('utf-8')
        def unquote_path(x):
            y = unquote(x)
@@ -175,7 +170,7 @@ class CHMInput(InputFormatPlugin):
                                    pretty_print=True)
                f.write(raw)
            else:
-                f.write(as_bytes(hhcdata))
+                f.write(polyglot.as_bytes(hhcdata))
        return htmlpath, toc
    def _read_file(self, name):
@@ -9,7 +9,7 @@ from ebook_converter.ebooks.oeb import parse_utils
 from ebook_converter.customize.conversion import OutputFormatPlugin
 from ebook_converter.customize.conversion import OptionRecommendation
 from ebook_converter.ptempfile import TemporaryDirectory
-from ebook_converter.polyglot.builtins import as_bytes
+from ebook_converter import polyglot
 from ebook_converter.utils import directory
@@ -266,7 +266,8 @@ class EPUBOutput(OutputFormatPlugin):
                    extra_entries=extra_entries) as epub:
                epub.add_dir(tdir)
                if encryption is not None:
-                    epub.writestr('META-INF/encryption.xml', as_bytes(encryption))
+                    epub.writestr('META-INF/encryption.xml',
                                  polyglot.as_bytes(encryption))
                if metadata_xml is not None:
                    epub.writestr('META-INF/metadata.xml',
                            metadata_xml.encode('utf-8'))
@@ -308,12 +309,10 @@ class EPUBOutput(OutputFormatPlugin):
            pass
    def encrypt_fonts(self, uris, tdir, _uuid):  # {{{
        from ebook_converter.polyglot.binary import from_hex_bytes
        key = re.sub(r'[^a-fA-F0-9]', '', _uuid)
        if len(key) < 16:
            raise ValueError('UUID identifier %r is invalid'% _uuid)
-        key = bytearray(from_hex_bytes((key + key)[:32]))
+        key = bytearray(polyglot.from_hex_bytes((key + key)[:32]))
        paths = []
        with directory.CurrentDir(tdir):
            paths = [os.path.join(*x.split('/')) for x in uris]
@@ -7,7 +7,7 @@ from lxml import etree
 from ebook_converter.customize.conversion import OutputFormatPlugin, OptionRecommendation
 from ebook_converter.ebooks.oeb.base import element
-from ebook_converter.polyglot.urllib import unquote
+from ebook_converter import polyglot
 from ebook_converter.ptempfile import PersistentTemporaryDirectory
 from ebook_converter.utils.cleantext import clean_xml_chars
 from ebook_converter.utils import directory
@@ -56,7 +56,8 @@ class HTMLOutput(OutputFormatPlugin):
                    parent = element(parent, ('ul'))
                for node in current_node.nodes:
                    point = element(parent, 'li')
-                    href = relpath(os.path.abspath(unquote(node.href)),
+                    href = relpath(os.path.abspath(polyglot
                                                   .unquote(node.href)),
                                   os.path.dirname(ref_url))
                    if isinstance(href, bytes):
                        href = href.decode('utf-8')
@@ -84,7 +85,6 @@ class HTMLOutput(OutputFormatPlugin):
        from lxml import etree
        from ebook_converter.utils import zipfile
        from templite import Templite
        from ebook_converter.polyglot.urllib import unquote
        from ebook_converter.ebooks.html.meta import EasyMeta
        # read template files
@@ -156,7 +156,7 @@ class HTMLOutput(OutputFormatPlugin):
        with directory.CurrentDir(output_dir):
            for item in oeb_book.manifest:
-                path = os.path.abspath(unquote(item.href))
+                path = os.path.abspath(polyglot.unquote(item.href))
                dir = os.path.dirname(path)
                if not os.path.exists(dir):
                    os.makedirs(dir)
@@ -169,7 +169,7 @@ class HTMLOutput(OutputFormatPlugin):
                    item.unload_data_from_memory(memory=path)
            for item in oeb_book.spine:
-                path = os.path.abspath(unquote(item.href))
+                path = os.path.abspath(polyglot.unquote(item.href))
                dir = os.path.dirname(path)
                root = item.data.getroottree()
@@ -5,7 +5,7 @@ from lxml import etree
 from ebook_converter.customize.conversion import (OutputFormatPlugin,
        OptionRecommendation)
-from ebook_converter.polyglot.urllib import unquote
+from ebook_converter import polyglot
 from ebook_converter.ebooks.oeb.base import OPF_MIME, NCX_MIME, PAGE_MAP_MIME, OEB_STYLES
 from ebook_converter.ebooks.oeb.normalize_css import condense_sheet
 from ebook_converter.utils import directory
@@ -56,7 +56,7 @@ class OEBOutput(OutputFormatPlugin):
                        not self.opts.expand_css and item.media_type in OEB_STYLES and hasattr(
                            item.data, 'cssText') and 'nook' not in self.opts.output_profile.short_name):
                    condense_sheet(item.data)
-                path = os.path.abspath(unquote(item.href))
+                path = os.path.abspath(polyglot.unquote(item.href))
                dir = os.path.dirname(path)
                if not os.path.exists(dir):
                    os.makedirs(dir)
@@ -1,12 +1,7 @@
 import os
 from ebook_converter.customize.conversion import InputFormatPlugin, OptionRecommendation
-from ebook_converter.polyglot.builtins import as_bytes
+from ebook_converter import polyglot
 __license__ = 'GPL 3'
 __copyright__ = '2009, John Schember <john@nachtimwald.com>'
 __docformat__ = 'restructuredtext en'
 class PDFInput(InputFormatPlugin):
@@ -72,7 +67,8 @@ class PDFInput(InputFormatPlugin):
            ncxid = opf.manifest.id_for_path('toc.ncx')
            if ncxid:
                with open('metadata.opf', 'r+b') as f:
-                    raw = f.read().replace(b'<spine', b'<spine toc="%s"' % as_bytes(ncxid))
+                    raw = f.read().replace(b'<spine', b'<spine toc="%s"' %
                                           polyglot.as_bytes(ncxid))
                    f.seek(0)
                    f.write(raw)
@@ -8,7 +8,7 @@ from lxml import etree
 from ebook_converter.customize.conversion import InputFormatPlugin
 from ebook_converter.customize.conversion import OptionRecommendation
-from ebook_converter.polyglot.builtins import as_bytes
+from ebook_converter import polyglot
 border_style_map = {'single': 'solid',
@@ -296,7 +296,7 @@ class RTFInput(InputFormatPlugin):
        result = transform(doc)
        html = u'index.xhtml'
        with open(html, 'wb') as f:
-            res = as_bytes(transform.tostring(result))
+            res = polyglot.as_bytes(transform.tostring(result))
            # res = res[:100].replace('xmlns:html', 'xmlns') + res[100:]
            # clean multiple \n
            res = re.sub(b'\n+', b'\n', res)
@@ -1,22 +1,20 @@
-__license__ = 'GPL v3'
+import io
-__copyright__ = '2012, Kovid Goyal <kovid@kovidgoyal.net>'
+
-__docformat__ = 'restructuredtext en'
+from ebook_converter import polyglot
 def base64_decode(raw):
    from io import BytesIO
    from ebook_converter.polyglot.binary import from_base64_bytes
    # First try the python implementation as it is faster
    try:
-        return from_base64_bytes(raw)
+        return polyglot.from_base64_bytes(raw)
    except Exception:
        pass
    # Try a more robust version (adapted from FBReader sources)
    A, Z, a, z, zero, nine, plus, slash, equal = bytearray(b'AZaz09+/=')
    raw = bytearray(raw)
-    out = BytesIO()
+    out = io.BytesIO()
    pos = 0
    while pos < len(raw):
        tot = 0
@@ -32,7 +30,7 @@ def base64_decode(raw):
            elif zero <= byt <= nine:
                num = byt - zero + 52
            else:
-                num = {plus:62, slash:63, equal:64}.get(byt, None)
+                num = {plus: 62, slash: 63, equal: 64}.get(byt, None)
                if num is None:
                    # Ignore this byte
                    continue
@@ -13,7 +13,7 @@ from ebook_converter import constants as const
 from ebook_converter.constants_old import __appname__, __version__
 from ebook_converter.ebooks.oeb import base
 from ebook_converter.ebooks.oeb import parse_utils
-from ebook_converter.polyglot.binary import as_base64_unicode
+from ebook_converter import polyglot
 from ebook_converter.utils import entities
 from ebook_converter.utils.img import save_cover_data_to
 from ebook_converter.utils.localization import lang_as_iso639_1
@@ -355,10 +355,10 @@ class FB2MLizer(object):
                    if item.media_type not in ('image/jpeg', 'image/png'):
                        imdata = save_cover_data_to(item.data,
                                                    compression_quality=70)
-                        raw_data = as_base64_unicode(imdata)
+                        raw_data = polyglot.as_base64_unicode(imdata)
                        content_type = 'image/jpeg'
                    else:
-                        raw_data = as_base64_unicode(item.data)
+                        raw_data = polyglot.as_base64_unicode(item.data)
                        content_type = item.media_type
                    # Don't put the encoded image on a single line.
                    step = 72
@@ -14,26 +14,24 @@ from ebook_converter.ebooks.oeb import parse_utils
 from ebook_converter.ebooks.oeb.stylizer import Stylizer
 from ebook_converter.utils import entities
 from ebook_converter.utils.logging import default_log
-from ebook_converter.polyglot.builtins import as_bytes
+from ebook_converter import polyglot
-__license__ = 'GPL 3'
+SELF_CLOSING_TAGS = {'area', 'base', 'basefont', 'br', 'hr', 'input', 'img',
-__copyright__ = '2011, John Schember <john@nachtimwald.com>'
+                     'link', 'meta'}
 __docformat__ = 'restructuredtext en'
 SELF_CLOSING_TAGS = {'area', 'base', 'basefont', 'br', 'hr', 'input', 'img', 'link', 'meta'}
 class OEB2HTML(object):
-    '''
+    """
-    Base class. All subclasses should implement dump_text to actually transform
+    Base class. All subclasses should implement dump_text to actually
-    content. Also, callers should use oeb2html to get the transformed html.
+    transform content. Also, callers should use oeb2html to get the
-    links and images can be retrieved after calling oeb2html to get the mapping
+    transformed html links and images can be retrieved after calling oeb2html
-    of OEB links and images to the new names used in the html returned by oeb2html.
+    to get the mapping of OEB links and images to the new names used in the
-    Images will always be referenced as if they are in an images directory.
+    html returned by oeb2html. Images will always be referenced as if they are
    in an images directory.
    Use get_css to get the CSS classes for the OEB document as a string.
-    '''
+    """
    def __init__(self, log=None):
        self.log = default_log if log is None else log
@@ -55,16 +53,18 @@ class OEB2HTML(object):
        return self.mlize_spine(oeb_book)
    def mlize_spine(self, oeb_book):
-        output = [
+        output = ['<html><head><meta http-equiv="Content-Type" '
-            u'<html><head><meta http-equiv="Content-Type" content="text/html;charset=utf-8" /><title>%s</title></head><body>' % (
+                  'content="text/html;charset=utf-8" />'
-                entities.prepare_string_for_xml(self.book_title))
+                  '<title>%s</title></head>'
-        ]
+                  '<body>' % entities.prepare_string_for_xml(self.book_title)]
        for item in oeb_book.spine:
            self.log.debug('Converting %s to HTML...' % item.href)
            self.rewrite_ids(item.data, item)
-            base.rewrite_links(item.data, partial(self.rewrite_link, page=item))
+            base.rewrite_links(item.data, partial(self.rewrite_link,
                                                  page=item))
            stylizer = Stylizer(item.data, item.href, oeb_book, self.opts)
-            output += self.dump_text(item.data.find(base.tag('xhtml', 'body')), stylizer, item)
+            output += self.dump_text(item.data.find(base.tag('xhtml', 'body')),
                                     stylizer, item)
            output.append('\n\n')
        output.append('</body></html>')
        return ''.join(output)
@@ -126,13 +126,14 @@ class OEB2HTML(object):
                el.attrib['id'] = self.get_link_id(page.href)[1:]
                continue
            if 'id' in el.attrib:
-                el.attrib['id'] = self.get_link_id(page.href, el.attrib['id'])[1:]
+                el.attrib['id'] = self.get_link_id(page.href,
                                                   el.attrib['id'])[1:]
    def get_css(self, oeb_book):
        css = b''
        for item in oeb_book.manifest:
            if item.media_type == 'text/css':
-                css += as_bytes(item.data.cssText) + b'\n\n'
+                css += polyglot.as_bytes(item.data.cssText) + b'\n\n'
        return css
    def prepare_string_for_html(self, raw):
@@ -157,10 +158,14 @@ class OEB2HTMLNoCSSizer(OEB2HTML):
        # We can only processes tags. If there isn't a tag return any text.
        if not isinstance(elem.tag, (str, bytes)) \
-           or parse_utils.namespace(elem.tag) not in (const.XHTML_NS, const.SVG_NS):
+           or parse_utils.namespace(elem.tag) not in (const.XHTML_NS,
                                                      const.SVG_NS):
            p = elem.getparent()
-            if p is not None and isinstance(p.tag, (str, bytes)) and parse_utils.namespace(p.tag) in (const.XHTML_NS, const.SVG_NS) \
+            if (p is not None and
-                    and elem.tail:
+                    isinstance(p.tag, (str, bytes)) and
                    parse_utils.namespace(p.tag) in (const.XHTML_NS,
                                                     const.SVG_NS) and
                    elem.tail):
                return [elem.tail]
            return ['']
@@ -176,8 +181,8 @@ class OEB2HTMLNoCSSizer(OEB2HTML):
        tags.append(tag)
        # Ignore anything that is set to not be displayed.
-        if style['display'] in ('none', 'oeb-page-head', 'oeb-page-foot') \
+        if (style['display'] in ('none', 'oeb-page-head', 'oeb-page-foot') or
-           or style['visibility'] == 'hidden':
+                style['visibility'] == 'hidden'):
            return ['']
        # Remove attributes we won't want.
@@ -186,11 +191,13 @@ class OEB2HTMLNoCSSizer(OEB2HTML):
        if 'style' in attribs:
            del attribs['style']
-        # Turn the rest of the attributes into a string we can write with the tag.
+        # Turn the rest of the attributes into a string we can write with the
        # tag.
        at = ''
-        for k, v in attribs.items():
+        for key, value in attribs.items():
-            at += ' %s="%s"' % (k, entities
+            at += (' %s="%s"' %
-                                .prepare_string_for_xml(v, attribute=True))
+                   (key, entities.prepare_string_for_xml(value,
                                                         attribute=True)))
        # Write the tag.
        text.append('<%s%s' % (tag, at))
@@ -246,11 +253,15 @@ class OEB2HTMLInlineCSSizer(OEB2HTML):
        '''
        # We can only processes tags. If there isn't a tag return any text.
-        if not isinstance(elem.tag, (str, bytes)) \
+        if (not isinstance(elem.tag, (str, bytes)) or
-           or parse_utils.namespace(elem.tag) not in (const.XHTML_NS, const.SVG_NS):
+                parse_utils.namespace(elem.tag) not in (const.XHTML_NS,
                                                        const.SVG_NS)):
            p = elem.getparent()
-            if p is not None and isinstance(p.tag, (str, bytes)) and parse_utils.namespace(p.tag) in (const.XHTML_NS, const.SVG_NS) \
+            if (p is not None and
-                    and elem.tail:
+                    isinstance(p.tag, (str, bytes)) and
                    parse_utils.namespace(p.tag) in (const.XHTML_NS,
                                                     const.SVG_NS) and
                    elem.tail):
                return [elem.tail]
            return ['']
@@ -266,9 +277,11 @@ class OEB2HTMLInlineCSSizer(OEB2HTML):
        if tag == 'body':
            # Change the body to a div so we can merge multiple files.
            tag = 'div'
-            # Add page-break-brefore: always because renders typically treat a new file (we're merging files)
+            # Add page-break-brefore: always because renders typically treat
-            # as a page break and remove all other page break types that might be set.
+            # a new file (we're merging files) as a page break and remove all
-            style_a = 'page-break-before: always; %s' % re.sub('page-break-[^:]+:[^;]+;?', '', style_a)
+            # other page break types that might be set.
            style_a = ('page-break-before: always; %s' %
                       re.sub('page-break-[^:]+:[^;]+;?', '', style_a))
        # Remove unnecessary spaces.
        style_a = re.sub(r'\s{2,}', ' ', style_a).strip()
        tags.append(tag)
@@ -279,7 +292,8 @@ class OEB2HTMLInlineCSSizer(OEB2HTML):
        if 'style' in attribs:
            del attribs['style']
-        # Turn the rest of the attributes into a string we can write with the tag.
+        # Turn the rest of the attributes into a string we can write with
        # the tag.
        at = ''
        for k, v in attribs.items():
            at += ' %s="%s"' % (k, entities
@@ -319,43 +333,51 @@ class OEB2HTMLInlineCSSizer(OEB2HTML):
 class OEB2HTMLClassCSSizer(OEB2HTML):
-    '''
+    """
-    Use CSS classes. css_style option can specify whether to use
+    Use CSS classes. css_style option can specify whether to use inline
-    inline classes (style tag in the head) or reference an external
+    classes (style tag in the head) or reference an external CSS file called
-    CSS file called style.css.
+    style.css.
-    '''
+    """
    def mlize_spine(self, oeb_book):
        output = []
        for item in oeb_book.spine:
            self.log.debug('Converting %s to HTML...' % item.href)
            self.rewrite_ids(item.data, item)
-            base.rewrite_links(item.data, partial(self.rewrite_link, page=item))
+            base.rewrite_links(item.data, partial(self.rewrite_link,
                                                  page=item))
            stylizer = Stylizer(item.data, item.href, oeb_book, self.opts)
-            output += self.dump_text(item.data.find(base.tag('xhtml', 'body')), stylizer, item)
+            output += self.dump_text(item.data.find(base.tag('xhtml', 'body')),
                                     stylizer, item)
            output.append('\n\n')
        if self.opts.htmlz_class_style == 'external':
-            css = u'<link href="style.css" rel="stylesheet" type="text/css" />'
+            css = '<link href="style.css" rel="stylesheet" type="text/css" />'
        else:
-            css =  u'<style type="text/css">' + self.get_css(oeb_book) + u'</style>'
+            css =  ('<style type="text/css">' + self.get_css(oeb_book) +
-        title = (u'<title>%s</title>' %
+                    '</style>')
        title = ('<title>%s</title>' %
                 entities.prepare_string_for_xml(self.book_title))
-        output = [u'<html><head><meta http-equiv="Content-Type" content="text/html;charset=utf-8" />'] + \
+        output = (['<html><head><meta http-equiv="Content-Type" '
-            [css] + [title, u'</head><body>'] + output + [u'</body></html>']
+                  'content="text/html;charset=utf-8" />'] + [css] +
                  [title, '</head><body>'] + output + ['</body></html>'])
        return ''.join(output)
    def dump_text(self, elem, stylizer, page):
-        '''
+        """
        @elem: The element in the etree that we are working on.
        @stylizer: The style information attached to the element.
-        '''
+        """
        # We can only processes tags. If there isn't a tag return any text.
-        if not isinstance(elem.tag, (str, bytes)) \
+        if (not isinstance(elem.tag, (str, bytes)) or
-           or parse_utils.namespace(elem.tag) not in (const.XHTML_NS, const.SVG_NS):
+                parse_utils.namespace(elem.tag) not in (const.XHTML_NS,
                                                        const.SVG_NS)):
            p = elem.getparent()
-            if p is not None and isinstance(p.tag, (str, bytes)) and parse_utils.namespace(p.tag) in (const.XHTML_NS, const.SVG_NS) \
+            if (p is not None and
-                    and elem.tail:
+                    isinstance(p.tag, (str, bytes)) and
                    parse_utils.namespace(p.tag) in (const.XHTML_NS,
                                                     const.SVG_NS) and
                    elem.tail):
                return [elem.tail]
            return ['']
@@ -373,11 +395,12 @@ class OEB2HTMLClassCSSizer(OEB2HTML):
        if 'style' in attribs:
            del attribs['style']
-        # Turn the rest of the attributes into a string we can write with the tag.
+        # Turn the rest of the attributes into a string we can write with
        # the tag.
        at = ''
        for k, v in attribs.items():
-            at += ' %s="%s"' % (k,
+            at += ' %s="%s"' % (k, entities
-                entities.prepare_string_for_xml(v, attribute=True))
+                                .prepare_string_for_xml(v, attribute=True))
        # Write the tag.
        text.append('<%s%s' % (tag, at))
@@ -5,7 +5,7 @@ import textwrap
 from lxml import etree
-from ebook_converter.polyglot.builtins import as_bytes
+from ebook_converter import polyglot
 class Canvas(etree.XSLTExtension):
@@ -292,7 +292,7 @@ class Styles(etree.XSLTExtension):
            return '\n\t'.join(ans)
        with open(name, 'wb') as f:
-            f.write(as_bytes(self.CSS))
+            f.write(polyglot.as_bytes(self.CSS))
            for (w, sel) in [(self.text_styles, 'ts'), (self.block_styles,
                'bs')]:
                for i, s in enumerate(w):
@@ -300,7 +300,7 @@ class Styles(etree.XSLTExtension):
                        continue
                    rsel = '.%s%d'%(sel, i)
                    s = join(s)
-                    f.write(as_bytes(rsel + ' {\n\t' + s + '\n}\n\n'))
+                    f.write(polyglot.as_bytes(rsel + ' {\n\t' + s + '\n}\n\n'))
    def execute(self, context, self_node, input_node, output_parent):
        if input_node.tag == 'TextStyle':
@@ -9,7 +9,7 @@ import sys
 import urllib.parse
 from ebook_converter.utils.config_base import tweaks
-from ebook_converter.polyglot.urllib import unquote
+from ebook_converter import polyglot
 from ebook_converter.utils import encoding as uenc
@@ -248,9 +248,11 @@ class Resource(object):
                pc = url[2]
                if isinstance(pc, str):
                    pc = pc.encode('utf-8')
-                pc = unquote(pc).decode('utf-8')
+                pc = polyglot.unquote(pc).decode('utf-8')
-                self.path = os.path.abspath(os.path.join(basedir, pc.replace('/', os.sep)))
+                self.path = os.path.abspath(os.path.join(basedir,
-                self.fragment = unquote(url[-1])
+                                                         pc.replace('/',
                                                                    os.sep)))
                self.fragment = polyglot.unquote(url[-1])
    def href(self, basedir=None):
        '''
@@ -14,7 +14,7 @@ from ebook_converter.utils.img import save_cover_data_to
 from ebook_converter.utils.imghdr import identify
 from ebook_converter.ebooks.metadata import MetaInformation, check_isbn
 from ebook_converter.ebooks.chardet import xml_to_unicode
-from ebook_converter.polyglot.binary import as_base64_unicode
+from ebook_converter import polyglot
 from ebook_converter.utils import encoding as uenc
@@ -389,7 +389,7 @@ def _rnd_pic_file_name(prefix='calibre_cover_', size=32, ext='jpg'):
 def _encode_into_jpeg(data):
    data = save_cover_data_to(data)
-    return as_base64_unicode(data)
+    return polyglot.as_base64_unicode(data)
 def _set_cover(title_info, mi, ctx):
@@ -30,11 +30,11 @@ from ebook_converter.ebooks.metadata.utils import parse_opf, \
 from ebook_converter.ebooks.metadata import string_to_authors, \
        MetaInformation, check_isbn
 from ebook_converter.ebooks.metadata.book.base import Metadata
 from ebook_converter import polyglot
 from ebook_converter.utils.date import parse_date, isoformat
 from ebook_converter.utils.localization import get_lang, canonicalize_lang
 from ebook_converter.utils.cleantext import clean_ascii_chars, clean_xml_chars
 from ebook_converter.utils.config_base import tweaks
 from ebook_converter.polyglot.urllib import unquote
 pretty_print_opf = False
@@ -838,7 +838,7 @@ class OPF(object):  # {{{
    def unquote_urls(self):
        def get_href(item):
-            raw = unquote(item.get('href', ''))
+            raw = polyglot.unquote(item.get('href', ''))
            if not isinstance(raw, str):
                raw = raw.decode('utf-8')
            return raw
@@ -11,7 +11,7 @@ from lxml.builder import ElementMaker
 from ebook_converter.constants_old import __appname__, __version__
 from ebook_converter.ebooks.chardet import xml_to_unicode
 from ebook_converter.utils.cleantext import clean_xml_chars
-from ebook_converter.polyglot.urllib import unquote
+from ebook_converter import polyglot
 NCX_NS = "http://www.daisy.org/z3986/2005/ncx/"
@@ -31,7 +31,7 @@ def parse_html_toc(data):
    root = parse(clean_xml_chars(data), maybe_xhtml=True, keep_doctype=False,
                 sanitize_names=True)
    for a in root.xpath('//*[@href and local-name()="a"]'):
-        purl = urllib.parse.urlparse(unquote(a.get('href')))
+        purl = urllib.parse.urlparse(polyglot.unquote(a.get('href')))
        href, fragment = purl[2], purl[5]
        if not fragment:
            fragment = None
@@ -149,7 +149,7 @@ class TOC(list):
        if toc is not None:
            if toc.lower() not in ('ncx', 'ncxtoc'):
-                toc = urllib.parse.urlparse(unquote(toc))[2]
+                toc = urllib.parse.urlparse(polyglot.unquote(toc))[2]
                toc = toc.replace('/', os.sep)
                if not os.path.isabs(toc):
                    toc = os.path.join(self.base_path, toc)
@@ -219,7 +219,8 @@ class TOC(list):
                    content = content[0]
                    # if get_attr(content, attr='src'):
                    purl = urllib.parse.urlparse(content.get('src'))
-                    href, fragment = unquote(purl[2]), unquote(purl[5])
+                    href = polyglot.unquote(purl[2])
                    fragment = polyglot.unquote(purl[5])
                    nd = dest.add_item(href, fragment, text)
                    nd.play_order = play_order
@@ -5,7 +5,7 @@ from io import BytesIO
 from ebook_converter.utils.img import save_cover_data_to, scale_image, image_to_data, image_from_data, resize_image, png_data_to_gif_data
 from ebook_converter.utils.imghdr import what
 from ebook_converter.ebooks import normalize
-from ebook_converter.polyglot.builtins import as_bytes
+from ebook_converter import polyglot
 from ebook_converter.tinycss.color3 import parse_color_string
@@ -61,7 +61,7 @@ def decode_hex_number(raw, codec='utf-8'):
 def encode_string(raw):
-    ans = bytearray(as_bytes(raw))
+    ans = bytearray(polyglot.as_bytes(raw))
    ans.insert(0, len(ans))
    return bytes(ans)
@@ -15,7 +15,7 @@ from odf.namespaces import TEXTNS as odTEXTNS
 from ebook_converter.utils import directory
 from ebook_converter.ebooks.oeb.base import _css_logger
-from ebook_converter.polyglot.builtins import as_bytes
+from ebook_converter import polyglot
 class Extract(ODF2XHTML):
@@ -292,7 +292,7 @@ class Extract(ODF2XHTML):
            except:
                log.exception('Failed to filter CSS, conversion may be slow')
            with open('index.xhtml', 'wb') as f:
-                f.write(as_bytes(html))
+                f.write(polyglot.as_bytes(html))
            zf = ZipFile(stream, 'r')
            self.extract_pictures(zf)
            opf = OPFCreator(os.path.abspath(os.getcwd()), mi)
@@ -24,7 +24,7 @@ from ebook_converter.utils.localization import get_lang
 from ebook_converter.ptempfile import TemporaryDirectory
 from ebook_converter.constants_old import __appname__, __version__
 from ebook_converter.utils import entities
-from ebook_converter.polyglot.urllib import unquote
+from ebook_converter import polyglot
 class OEBReader(object):
@@ -641,7 +641,7 @@ class OEBReader(object):
        with TemporaryDirectory('_html_cover') as tdir:
            writer = OEBWriter()
            writer(self.oeb, tdir)
-            path = os.path.join(tdir, unquote(hcover.href))
+            path = os.path.join(tdir, polyglot.unquote(hcover.href))
            data = render_html_svg_workaround(path, self.logger)
            if not data:
                data = b''
@@ -5,7 +5,7 @@ import urllib.parse
 from lxml import etree
 from ebook_converter.utils.imghdr import identify
-from ebook_converter.polyglot.urllib import unquote
+from ebook_converter import polyglot
 class CoverManager(object):
@@ -113,7 +113,7 @@ class CoverManager(object):
            if href is not None:
                templ = self.non_svg_template if self.no_svg_cover \
                        else self.svg_template
-                tp = templ % unquote(href)
+                tp = templ % polyglot.unquote(href)
                id, href = m.generate('titlepage', 'titlepage.xhtml')
                item = m.add(id, href, mimetypes.guess_type('t.xhtml')[0],
                             data=etree.fromstring(tp))
@@ -3,8 +3,7 @@ import re
 import urllib.parse
 from ebook_converter.ebooks.oeb.base import XPath
-from ebook_converter.polyglot.binary import from_base64_bytes
+from ebook_converter import polyglot
 from ebook_converter.polyglot.builtins import as_bytes
 class DataURL(object):
@@ -27,14 +26,14 @@ class DataURL(object):
                if ';base64' in header:
                    data = re.sub(r'\s+', '', data)
                    try:
-                        data = from_base64_bytes(data)
+                        data = polyglot.from_base64_bytes(data)
                    except Exception:
                        self.log.error('Found invalid base64 encoded data '
                                       'URI, ignoring it')
                        continue
                else:
                    data = urllib.parse.unquote(data)
-                data = as_bytes(data)
+                data = polyglot.as_bytes(data)
                fmt = what(None, data)
                if not fmt:
                    self.log.warn('Image encoded as data URL has unknown '
@@ -17,7 +17,7 @@ from ebook_converter import constants as const
 from ebook_converter.ebooks.epub import rules
 from ebook_converter.ebooks.oeb import base
 from ebook_converter.ebooks.oeb.polish.split import do_split
-from ebook_converter.polyglot.urllib import unquote
+from ebook_converter import polyglot
 from ebook_converter.css_selectors import Select, SelectorError
 from ebook_converter.utils import encoding as uenc
@@ -189,7 +189,7 @@ class Split(object):
            nhref = anchor_map[frag if frag else None]
            nhref = self.current_item.relhref(nhref)
            if frag:
-                nhref = '#'.join((unquote(nhref), frag))
+                nhref = '#'.join((polyglot.unquote(nhref), frag))
            return nhref
        return url
@@ -1,20 +1,18 @@
-import codecs, zlib, numbers
+import codecs
 from io import BytesIO
 from datetime import datetime
 import io
 import numbers
 import zlib
 from ebook_converter.utils.logging import default_log
-from ebook_converter.polyglot.binary import as_hex_bytes
+from ebook_converter import polyglot
 __license__ = 'GPL v3'
 __copyright__ = '2012, Kovid Goyal <kovid at kovidgoyal.net>'
 __docformat__ = 'restructuredtext en'
 pdf_float = lambda x: f"{x:.1f}"
 EOL = b'\n'
-# Sizes {{{
+# Sizes
 inch = 72.0
 cm = inch / 2.54
 mm = cm * 0.1
@@ -45,10 +43,9 @@ B2 = (_BW*2, _BH*2)
 B1 = (_BH*4, _BW*2)
 B0 = (_BW*4, _BH*4)
-PAPER_SIZES = {k:globals()[k.upper()] for k in ('a0 a1 a2 a3 a4 a5 a6 b0 b1 b2'
+PAPER_SIZES = {k: globals()[k.upper()] for k in ('a0 a1 a2 a3 a4 a5 a6 b0 b1 '
-               ' b3 b4 b5 b6 letter legal').split()}
+                                                 'b2 b3 b4 b5 b6 letter '
-
+                                                 'legal').split()}
 # }}}
 def fmtnum(o):
@@ -70,12 +67,12 @@ def serialize(o, stream):
    elif o is None:
        stream.write_raw(b'null')
    elif isinstance(o, datetime):
-        val = o.strftime("D:%Y%m%d%H%M%%02d%z")%min(59, o.second)
+        val = o.strftime("D:%Y%m%d%H%M%%02d%z") % min(59, o.second)
        if datetime.tzinfo is not None:
-            val = "(%s'%s')"%(val[:-2], val[-2:])
+            val = "(%s'%s')" % (val[:-2], val[-2:])
        stream.write(val.encode('ascii'))
    else:
-        raise ValueError('Unknown object: %r'%o)
+        raise ValueError('Unknown object: %r' % o)
 class Name(str):
@@ -83,7 +80,7 @@ class Name(str):
    def pdf_serialize(self, stream):
        raw = self.encode('ascii')
        if len(raw) > 126:
-            raise ValueError('Name too long: %r'%self)
+            raise ValueError('Name too long: %r' % self)
        raw = bytearray(raw)
        sharp = ord(b'#')
        buf = (
@@ -96,7 +93,8 @@ def escape_pdf_string(bytestring):
    indices = []
    bad = []
    ba = bytearray(bytestring)
-    bad_map = {10:ord('n'), 13:ord('r'), 12:ord('f'), 8:ord('b'), 9:ord('\t'), 92:ord('\\')}
+    bad_map = {10: ord('n'), 13: ord('r'), 12: ord('f'),
               8: ord('b'), 9: ord('\t'), 92: ord('\\')}
    for i, num in enumerate(ba):
        if num == 40:  # (
            indices.append((i, 40))
@@ -134,7 +132,7 @@ class UTF16String(str):
        if False:
            # Disabled as the parentheses based strings give easier to debug
            # PDF files
-            stream.write(b'<' + as_hex_bytes(raw) + b'>')
+            stream.write(b'<' + polyglot.as_hex_bytes(raw) + b'>')
        else:
            stream.write(b'('+escape_pdf_string(raw)+b')')
@@ -143,9 +141,9 @@ class Dictionary(dict):
    def pdf_serialize(self, stream):
        stream.write(b'<<' + EOL)
-        sorted_keys = sorted(self,
+        sorted_keys = sorted(self, key=lambda x: ({'Type': '1',
-                             key=lambda x:({'Type':'1', 'Subtype':'2'}.get(
+                                                  'Subtype': '2'}
-                                 x, x)+x))
+                                                  .get(x, x) + x))
        for k in sorted_keys:
            serialize(Name(k), stream)
            stream.write(b' ')
@@ -177,10 +175,10 @@ class Array(list):
        stream.write(b']')
-class Stream(BytesIO):
+class Stream(io.BytesIO):
    def __init__(self, compress=False):
-        BytesIO.__init__(self)
+        io.BytesIO.__init__(self)
        self.compress = compress
        self.filters = Array()
@@ -213,7 +211,7 @@ class Stream(BytesIO):
                                  raw.encode('ascii'))
    def write_raw(self, raw):
-        BytesIO.write(self, raw)
+        io.BytesIO.write(self, raw)
 class Reference(object):
@@ -222,11 +220,11 @@ class Reference(object):
        self.num, self.obj = num, obj
    def pdf_serialize(self, stream):
-        raw = '%d 0 R'%self.num
+        raw = '%d 0 R' % self.num
        stream.write(raw.encode('ascii'))
    def __repr__(self):
-        return '%d 0 R'%self.num
+        return '%d 0 R' % self.num
    def __str__(self):
        return repr(self)
@@ -0,0 +1,59 @@
 """
 Misc converting functions from polyglot module.
 Most of the have something to do with converting between string and binary
 """
 import base64
 import binascii
 import urllib
 def as_base64_unicode(x, enc='utf-8'):
    if isinstance(x, str):
        x = x.encode(enc)
    return base64.standard_b64encode(x).decode('ascii')
 def from_base64_bytes(x):
    if isinstance(x, str):
        x = x.encode('ascii')
    return base64.standard_b64decode(x)
 def as_hex_bytes(x, enc='utf-8'):
    if isinstance(x, str):
        x = x.encode(enc)
    return binascii.hexlify(x)
 def from_hex_bytes(x):
    if isinstance(x, str):
        x = x.encode('ascii')
    return binascii.unhexlify(x)
 def as_bytes(x, encoding='utf-8'):
    if isinstance(x, str):
        return x.encode(encoding)
    if isinstance(x, bytes):
        return x
    if isinstance(x, bytearray):
        return bytes(x)
    if isinstance(x, memoryview):
        return x.tobytes()
    return str(x).encode(encoding)
 def unquote(x, encoding='utf-8', errors='replace'):
    # TODO(gryf): this works like that: if x is a binary, convert it to
    # string using encoding and make unquote. After that make it binary again.
    # If x is string, just pass it to the unquote.
    # This approach is mostly used within lxml etree strings, which suppose to
    # be binary because of its inner representation. I'm wondering, if
    # xml.etree could be used instead - to be checked.
    binary = isinstance(x, bytes)
    if binary:
        x = x.decode(encoding, errors)
    ans = urllib.parse.unquote(x, encoding, errors)
    if binary:
        ans = ans.encode(encoding, errors)
    return ans
@@ -1,26 +0,0 @@
 from base64 import standard_b64decode, standard_b64encode
 from binascii import hexlify, unhexlify
 def as_base64_unicode(x, enc='utf-8'):
    if isinstance(x, str):
        x = x.encode(enc)
    return standard_b64encode(x).decode('ascii')
 def from_base64_bytes(x):
    if isinstance(x, str):
        x = x.encode('ascii')
    return standard_b64decode(x)
 def as_hex_bytes(x, enc='utf-8'):
    if isinstance(x, str):
        x = x.encode(enc)
    return hexlify(x)
 def from_hex_bytes(x):
    if isinstance(x, str):
        x = x.encode('ascii')
    return unhexlify(x)
@@ -1,10 +0,0 @@
 def as_bytes(x, encoding='utf-8'):
    if isinstance(x, str):
        return x.encode(encoding)
    if isinstance(x, bytes):
        return x
    if isinstance(x, bytearray):
        return bytes(x)
    if isinstance(x, memoryview):
        return x.tobytes()
    return str(x).encode(encoding)
@@ -1,17 +0,0 @@
 import urllib.parse
 def unquote(x, encoding='utf-8', errors='replace'):
    # TODO(gryf): this works like that: if x is a binary, convert it to
    # string using encoding and make unquote. After that make it binary again.
    # If x is string, just pass it to the unquote.
    # This approach is mostly used within lxml etree strings, which suppose to
    # be binary because of its inner representation. I'm wondering, if
    # xml.etree could be used instead - to be checked.
    binary = isinstance(x, bytes)
    if binary:
        x = x.decode(encoding, errors)
    ans = urllib.parse.unquote(x, encoding, errors)
    if binary:
        ans = ans.encode(encoding, errors)
    return ans
@@ -8,6 +8,7 @@ import tempfile
 from ebook_converter.constants_old import __version__, __appname__, \
        filesystem_encoding
 from ebook_converter import polyglot
 def cleanup(path):
@@ -90,9 +91,8 @@ def base_dir():
        td = os.environ.get('CALIBRE_WORKER_TEMP_DIR', None)
        if td is not None:
            from ebook_converter.utils.serialize import msgpack_loads
            from ebook_converter.polyglot.binary import from_hex_bytes
            try:
-                td = msgpack_loads(from_hex_bytes(td))
+                td = msgpack_loads(polyglot.from_hex_bytes(td))
            except Exception:
                td = None
        if td and os.path.exists(td):
@@ -11,7 +11,7 @@
 import operator
 import re
-from ebook_converter.polyglot.binary import from_hex_bytes
+from ebook_converter import polyglot
 __all__ = ['decode']  # Everything else is implementation detail
@@ -94,7 +94,8 @@ def try_encoding(css_bytes, encoding, fallback=True):
 def hex2re(hex_data):
-    return re.escape(from_hex_bytes(hex_data.replace(' ', '').encode('ascii')))
+    return re.escape(polyglot.from_hex_bytes(hex_data.replace(' ', '')
                                             .encode('ascii')))
 class Slicer(object):
@@ -2,7 +2,7 @@ import struct
 from io import BytesIO
 from collections import defaultdict
-from ebook_converter.polyglot.builtins import as_bytes
+from ebook_converter import polyglot
 __license__ = 'GPL v3'
@@ -38,7 +38,7 @@ def get_tables(raw):
 def get_table(raw, name):
    ''' Get the raw table bytes for the specified table in the font '''
-    name = as_bytes(name.lower())
+    name = polyglot.as_bytes(name.lower())
    for table_tag, table, table_index, table_offset, table_checksum in get_tables(raw):
        if table_tag.lower() == name:
            return table, table_index, table_offset, table_checksum
@@ -1,3 +1,6 @@
 from ebook_converter import polyglot
 MSGPACK_MIME = 'application/x-msgpack'
 CANARY = 'jPoAv3zOyHvQ5JFNYg4hJ9'
@@ -56,11 +59,11 @@ def json_dumps(data, **kw):
 def decode_metadata(x, for_json):
    from ebook_converter.polyglot.binary import from_base64_bytes
    from ebook_converter.ebooks.metadata.book.serialize import metadata_from_dict
    obj = metadata_from_dict(x)
    if for_json and obj.cover_data and obj.cover_data[1]:
-        obj.cover_data = obj.cover_data[0], from_base64_bytes(obj.cover_data[1])
+        obj.cover_data = (obj.cover_data[0],
                          polyglot.from_base64_bytes(obj.cover_data[1]))
    return obj
@@ -1,5 +1,9 @@
-import os, sys, re
+import fcntl
-import fcntl, termios, struct
+import os
 import re
 import struct
 import sys
 import termios
 def fmt(code):
@@ -10,7 +10,7 @@ from tempfile import SpooledTemporaryFile
 from ebook_converter.utils import filenames as fms
 from ebook_converter.constants_old import filesystem_encoding
 from ebook_converter.ebooks.chardet import detect
-from ebook_converter.polyglot.builtins import as_bytes
+from ebook_converter import polyglot
 try:
    import zlib  # We may need its compression method
@@ -330,7 +330,7 @@ class ZipInfo (object):
        if os.sep != '/':
            os_sep, sep = os.sep, '/'
            if isinstance(filename, bytes):
-                os_sep, sep = as_bytes(os_sep), b'/'
+                os_sep, sep = polyglot.as_bytes(os_sep), b'/'
            if os_sep in filename:
                filename = filename.replace(os_sep, sep)