From 976ef5ce363bc29a4cb16f373bdb9367d404a787 Mon Sep 17 00:00:00 2001 From: gryf Date: Wed, 17 Jun 2020 17:45:07 +0200 Subject: [PATCH] Get rid of polyglot as_unicode --- .../ebooks/conversion/plugins/html_input.py | 6 ++---- ebook_converter/ebooks/metadata/__init__.py | 10 +++------- ebook_converter/ebooks/metadata/odt.py | 3 +-- ebook_converter/ebooks/mobi/reader/mobi8.py | 7 +++++-- ebook_converter/polyglot/builtins.py | 7 ------- 5 files changed, 11 insertions(+), 22 deletions(-) diff --git a/ebook_converter/ebooks/conversion/plugins/html_input.py b/ebook_converter/ebooks/conversion/plugins/html_input.py index 76eab84..637030d 100644 --- a/ebook_converter/ebooks/conversion/plugins/html_input.py +++ b/ebook_converter/ebooks/conversion/plugins/html_input.py @@ -11,7 +11,6 @@ from ebook_converter.customize.conversion import OptionRecommendation from ebook_converter.utils.localization import get_lang from ebook_converter.utils.filenames import ascii_filename from ebook_converter.utils.imghdr import what -from ebook_converter.polyglot.builtins import as_unicode def sanitize_file_name(x): @@ -281,9 +280,8 @@ class HTMLInput(InputFormatPlugin): # bhref refers to an already existing file. The read() method of # DirContainer will call unquote on it before trying to read the # file, therefore we quote it here. - if isinstance(bhref, str): - bhref = bhref.encode('utf-8') - item.html_input_href = as_unicode(urllib.parse.quote(bhref)) + # XXX(gryf): why the heck it was changed to bytes? + item.html_input_href = urllib.parse.quote(bhref) if guessed in self.OEB_STYLES: item.override_css_fetch = functools.partial( self.css_import_handler, os.path.dirname(link)) diff --git a/ebook_converter/ebooks/metadata/__init__.py b/ebook_converter/ebooks/metadata/__init__.py index 918b764..42b590b 100644 --- a/ebook_converter/ebooks/metadata/__init__.py +++ b/ebook_converter/ebooks/metadata/__init__.py @@ -10,7 +10,6 @@ import urllib.parse from ebook_converter import relpath, prints, force_unicode from ebook_converter.utils.config_base import tweaks -from ebook_converter.polyglot.builtins import as_unicode from ebook_converter.polyglot.urllib import unquote @@ -268,17 +267,14 @@ class Resource(object): basedir = os.getcwd() if self.path is None: return self._href - f = self.fragment.encode('utf-8') if isinstance(self.fragment, str) else self.fragment - frag = '#'+as_unicode(urllib.parse.quote(f)) if self.fragment else '' + frag = '#' + urllib.parse.quote(self.fragment) if self.fragment else '' if self.path == basedir: - return ''+frag + return '' + frag try: rpath = relpath(self.path, basedir) except OSError: # On windows path and basedir could be on different drives rpath = self.path - if isinstance(rpath, str): - rpath = rpath.encode('utf-8') - return as_unicode(urllib.parse.quote(rpath.replace(os.sep, '/')))+frag + return urllib.parse.quote(rpath.replace(os.sep, '/')) + frag def set_basedir(self, path): self._basedir = path diff --git a/ebook_converter/ebooks/metadata/odt.py b/ebook_converter/ebooks/metadata/odt.py index 11388a3..7da0a3f 100644 --- a/ebook_converter/ebooks/metadata/odt.py +++ b/ebook_converter/ebooks/metadata/odt.py @@ -32,7 +32,6 @@ from ebook_converter.utils.zipfile import ZipFile, safe_replace from odf.draw import Frame as odFrame, Image as odImage from odf.namespaces import DCNS, METANS, OFFICENS from odf.opendocument import load as odLoad -from ebook_converter.polyglot.builtins import as_unicode fields = { @@ -240,7 +239,7 @@ def _set_metadata(raw, mi): add_user_metadata('opf.seriesindex', '{}'.format(mi.series_index)) if not mi.is_null('identifiers'): remove_user_metadata('opf.identifiers') - add_user_metadata('opf.identifiers', as_unicode(json.dumps(mi.identifiers))) + add_user_metadata('opf.identifiers', str(json.dumps(mi.identifiers))) if not mi.is_null('rating'): remove_user_metadata('opf.rating') add_user_metadata('opf.rating', '%.2g' % mi.rating) diff --git a/ebook_converter/ebooks/mobi/reader/mobi8.py b/ebook_converter/ebooks/mobi/reader/mobi8.py index 8665cbe..53f96ef 100644 --- a/ebook_converter/ebooks/mobi/reader/mobi8.py +++ b/ebook_converter/ebooks/mobi/reader/mobi8.py @@ -18,7 +18,6 @@ from ebook_converter.ebooks.metadata.toc import TOC from ebook_converter.ebooks.mobi.utils import read_font_record from ebook_converter.ebooks.oeb.parse_utils import parse_html from ebook_converter.ebooks.oeb import base -from ebook_converter.polyglot.builtins import as_unicode ID_RE = re.compile(br'''<[^>]+\s(?:id|ID)\s*=\s*['"]([^'"]+)['"]''') NAME_RE = re.compile(br'''<\s*a\s*\s(?:name|NAME)\s*=\s*['"]([^'"]+)['"]''') @@ -403,7 +402,11 @@ class Mobi8Reader(object): continue entry['href'] = href - entry['idtag'] = as_unicode(idtag, self.header.codec or 'utf-8') + if isinstance(idtag, bytes): + entry['idtag'] = idtag.decode(self.header.codec or 'utf-8', + 'strict') + else: + entry['idtag'] = idtag for e in remove: index_entries.remove(e) diff --git a/ebook_converter/polyglot/builtins.py b/ebook_converter/polyglot/builtins.py index b9ec1c1..0a82017 100644 --- a/ebook_converter/polyglot/builtins.py +++ b/ebook_converter/polyglot/builtins.py @@ -13,13 +13,6 @@ def as_bytes(x, encoding='utf-8'): return str(x).encode(encoding) -def as_unicode(x, encoding='utf-8', errors='strict'): - return str(x) - if isinstance(x, bytes): - return x.decode(encoding, errors) - return str(x) - - def reraise(tp, value, tb=None): try: if value is None: