Get rid of polyglot as_unicode

2026-02-22 01:45:58 +01:00 · 2020-06-17 17:45:07 +02:00
parent 4b27f55f5b
commit 976ef5ce36
5 changed files with 11 additions and 22 deletions
--- a/ebook_converter/ebooks/conversion/plugins/html_input.py
+++ b/ebook_converter/ebooks/conversion/plugins/html_input.py
@@ -11,7 +11,6 @@ from ebook_converter.customize.conversion import OptionRecommendation
 from ebook_converter.utils.localization import get_lang
 from ebook_converter.utils.filenames import ascii_filename
 from ebook_converter.utils.imghdr import what
 from ebook_converter.polyglot.builtins import as_unicode
 def sanitize_file_name(x):
@@ -281,9 +280,8 @@ class HTMLInput(InputFormatPlugin):
            # bhref refers to an already existing file. The read() method of
            # DirContainer will call unquote on it before trying to read the
            # file, therefore we quote it here.
-            if isinstance(bhref, str):
+            # XXX(gryf): why the heck it was changed to bytes?
-                bhref = bhref.encode('utf-8')
+            item.html_input_href = urllib.parse.quote(bhref)
            item.html_input_href = as_unicode(urllib.parse.quote(bhref))
            if guessed in self.OEB_STYLES:
                item.override_css_fetch = functools.partial(
                        self.css_import_handler, os.path.dirname(link))
--- a/ebook_converter/ebooks/metadata/init.py
+++ b/ebook_converter/ebooks/metadata/init.py
@@ -10,7 +10,6 @@ import urllib.parse
 from ebook_converter import relpath, prints, force_unicode
 from ebook_converter.utils.config_base import tweaks
 from ebook_converter.polyglot.builtins import as_unicode
 from ebook_converter.polyglot.urllib import unquote
@@ -268,17 +267,14 @@ class Resource(object):
                basedir = os.getcwd()
        if self.path is None:
            return self._href
-        f = self.fragment.encode('utf-8') if isinstance(self.fragment, str) else self.fragment
+        frag = '#' + urllib.parse.quote(self.fragment) if self.fragment else ''
        frag = '#'+as_unicode(urllib.parse.quote(f)) if self.fragment else ''
        if self.path == basedir:
-            return ''+frag
+            return '' + frag
        try:
            rpath = relpath(self.path, basedir)
        except OSError:  # On windows path and basedir could be on different drives
            rpath = self.path
-        if isinstance(rpath, str):
+        return urllib.parse.quote(rpath.replace(os.sep, '/')) + frag
            rpath = rpath.encode('utf-8')
        return as_unicode(urllib.parse.quote(rpath.replace(os.sep, '/')))+frag
    def set_basedir(self, path):
        self._basedir = path
--- a/ebook_converter/ebooks/metadata/odt.py
+++ b/ebook_converter/ebooks/metadata/odt.py
@@ -32,7 +32,6 @@ from ebook_converter.utils.zipfile import ZipFile, safe_replace
 from odf.draw import Frame as odFrame, Image as odImage
 from odf.namespaces import DCNS, METANS, OFFICENS
 from odf.opendocument import load as odLoad
 from ebook_converter.polyglot.builtins import as_unicode
 fields = {
@@ -240,7 +239,7 @@ def _set_metadata(raw, mi):
        add_user_metadata('opf.seriesindex', '{}'.format(mi.series_index))
    if not mi.is_null('identifiers'):
        remove_user_metadata('opf.identifiers')
-        add_user_metadata('opf.identifiers', as_unicode(json.dumps(mi.identifiers)))
+        add_user_metadata('opf.identifiers', str(json.dumps(mi.identifiers)))
    if not mi.is_null('rating'):
        remove_user_metadata('opf.rating')
        add_user_metadata('opf.rating', '%.2g' % mi.rating)
--- a/ebook_converter/ebooks/mobi/reader/mobi8.py
+++ b/ebook_converter/ebooks/mobi/reader/mobi8.py
@@ -18,7 +18,6 @@ from ebook_converter.ebooks.metadata.toc import TOC
 from ebook_converter.ebooks.mobi.utils import read_font_record
 from ebook_converter.ebooks.oeb.parse_utils import parse_html
 from ebook_converter.ebooks.oeb import base
 from ebook_converter.polyglot.builtins import as_unicode
 ID_RE = re.compile(br'''<[^>]+\s(?:id|ID)\s*=\s*['"]([^'"]+)['"]''')
 NAME_RE = re.compile(br'''<\s*a\s*\s(?:name|NAME)\s*=\s*['"]([^'"]+)['"]''')
@@ -403,7 +402,11 @@ class Mobi8Reader(object):
                    continue
            entry['href'] = href
-            entry['idtag'] = as_unicode(idtag, self.header.codec or 'utf-8')
+            if isinstance(idtag, bytes):
                entry['idtag'] = idtag.decode(self.header.codec or 'utf-8',
                                              'strict')
            else:
                entry['idtag'] = idtag
        for e in remove:
            index_entries.remove(e)
--- a/ebook_converter/polyglot/builtins.py
+++ b/ebook_converter/polyglot/builtins.py
@@ -13,13 +13,6 @@ def as_bytes(x, encoding='utf-8'):
    return str(x).encode(encoding)
 def as_unicode(x, encoding='utf-8', errors='strict'):
    return str(x)
    if isinstance(x, bytes):
        return x.decode(encoding, errors)
    return str(x)
 def reraise(tp, value, tb=None):
    try:
        if value is None: