Moved misc functions from polyglot package to single polyglot module.

2021-05-25 19:06:31 +02:00
parent f46984267e
commit f47376830f
32 changed files with 244 additions and 219 deletions
@@ -9,7 +9,7 @@ import sys
 import urllib.parse

 from ebook_converter.utils.config_base import tweaks
-from ebook_converter.polyglot.urllib import unquote
+from ebook_converter import polyglot
 from ebook_converter.utils import encoding as uenc


@@ -248,9 +248,11 @@ class Resource(object):
                pc = url[2]
                if isinstance(pc, str):
                    pc = pc.encode('utf-8')
-                pc = unquote(pc).decode('utf-8')
-                self.path = os.path.abspath(os.path.join(basedir, pc.replace('/', os.sep)))
-                self.fragment = unquote(url[-1])
+                pc = polyglot.unquote(pc).decode('utf-8')
+                self.path = os.path.abspath(os.path.join(basedir,
+                                                         pc.replace('/',
+                                                                    os.sep)))
+                self.fragment = polyglot.unquote(url[-1])

    def href(self, basedir=None):
        '''
@@ -14,7 +14,7 @@ from ebook_converter.utils.img import save_cover_data_to
 from ebook_converter.utils.imghdr import identify
 from ebook_converter.ebooks.metadata import MetaInformation, check_isbn
 from ebook_converter.ebooks.chardet import xml_to_unicode
-from ebook_converter.polyglot.binary import as_base64_unicode
+from ebook_converter import polyglot
 from ebook_converter.utils import encoding as uenc


@@ -389,7 +389,7 @@ def _rnd_pic_file_name(prefix='calibre_cover_', size=32, ext='jpg'):

 def _encode_into_jpeg(data):
    data = save_cover_data_to(data)
-    return as_base64_unicode(data)
+    return polyglot.as_base64_unicode(data)


 def _set_cover(title_info, mi, ctx):
@@ -30,11 +30,11 @@ from ebook_converter.ebooks.metadata.utils import parse_opf, \
 from ebook_converter.ebooks.metadata import string_to_authors, \
        MetaInformation, check_isbn
 from ebook_converter.ebooks.metadata.book.base import Metadata
+from ebook_converter import polyglot
 from ebook_converter.utils.date import parse_date, isoformat
 from ebook_converter.utils.localization import get_lang, canonicalize_lang
 from ebook_converter.utils.cleantext import clean_ascii_chars, clean_xml_chars
 from ebook_converter.utils.config_base import tweaks
-from ebook_converter.polyglot.urllib import unquote


 pretty_print_opf = False
@@ -838,7 +838,7 @@ class OPF(object):  # {{{

    def unquote_urls(self):
        def get_href(item):
-            raw = unquote(item.get('href', ''))
+            raw = polyglot.unquote(item.get('href', ''))
            if not isinstance(raw, str):
                raw = raw.decode('utf-8')
            return raw
@@ -11,7 +11,7 @@ from lxml.builder import ElementMaker
 from ebook_converter.constants_old import __appname__, __version__
 from ebook_converter.ebooks.chardet import xml_to_unicode
 from ebook_converter.utils.cleantext import clean_xml_chars
-from ebook_converter.polyglot.urllib import unquote
+from ebook_converter import polyglot


 NCX_NS = "http://www.daisy.org/z3986/2005/ncx/"
@@ -31,7 +31,7 @@ def parse_html_toc(data):
    root = parse(clean_xml_chars(data), maybe_xhtml=True, keep_doctype=False,
                 sanitize_names=True)
    for a in root.xpath('//*[@href and local-name()="a"]'):
-        purl = urllib.parse.urlparse(unquote(a.get('href')))
+        purl = urllib.parse.urlparse(polyglot.unquote(a.get('href')))
        href, fragment = purl[2], purl[5]
        if not fragment:
            fragment = None
@@ -149,7 +149,7 @@ class TOC(list):

        if toc is not None:
            if toc.lower() not in ('ncx', 'ncxtoc'):
-                toc = urllib.parse.urlparse(unquote(toc))[2]
+                toc = urllib.parse.urlparse(polyglot.unquote(toc))[2]
                toc = toc.replace('/', os.sep)
                if not os.path.isabs(toc):
                    toc = os.path.join(self.base_path, toc)
@@ -219,7 +219,8 @@ class TOC(list):
                    content = content[0]
                    # if get_attr(content, attr='src'):
                    purl = urllib.parse.urlparse(content.get('src'))
-                    href, fragment = unquote(purl[2]), unquote(purl[5])
+                    href = polyglot.unquote(purl[2])
+                    fragment = polyglot.unquote(purl[5])
                    nd = dest.add_item(href, fragment, text)
                    nd.play_order = play_order