diff --git a/ebook_converter/__init__.py b/ebook_converter/__init__.py index 831bf3c..07a972e 100644 --- a/ebook_converter/__init__.py +++ b/ebook_converter/__init__.py @@ -111,22 +111,3 @@ def prepare_string_for_xml(raw, attribute=False): if attribute: raw = raw.replace('"', '"').replace("'", ''') return raw - - -def force_unicode(obj, enc=constants_old.preferred_encoding): - if isinstance(obj, bytes): - try: - obj = obj.decode(enc) - except Exception: - try: - obj = obj.decode(constants_old.filesystem_encoding - if enc == constants_old.preferred_encoding - else constants_old.preferred_encoding) - except Exception: - try: - obj = obj.decode('utf-8') - except Exception: - obj = repr(obj) - if isinstance(obj, bytes): - obj = obj.decode('utf-8') - return obj diff --git a/ebook_converter/ebooks/lrf/html/convert_from.py b/ebook_converter/ebooks/lrf/html/convert_from.py index 92462c1..f8b9d75 100644 --- a/ebook_converter/ebooks/lrf/html/convert_from.py +++ b/ebook_converter/ebooks/lrf/html/convert_from.py @@ -21,7 +21,7 @@ import math import bs4 from PIL import Image as PILImage -from ebook_converter import entity_to_unicode, force_unicode +from ebook_converter import entity_to_unicode from ebook_converter.constants_old import __appname__, filesystem_encoding, \ preferred_encoding from ebook_converter.devices.interface import DevicePlugin as Device @@ -37,6 +37,7 @@ from ebook_converter.ebooks.lrf.pylrs.pylrs import ( RuledLine, Span, Sub, Sup, TextBlock ) from ebook_converter.ptempfile import PersistentTemporaryFile +from ebook_converter.utils import encoding as uenc from ebook_converter.utils import img as uimg @@ -1935,8 +1936,8 @@ class HTMLConverter(object): def process_file(path, options, logger): path = os.path.abspath(path) - default_title = force_unicode(os.path.splitext(os.path.basename(path))[0], - filesystem_encoding) + default_title = os.path.splitext(os.path .basename(path))[0] + default_title = uenc.force_unicode(default_title, filesystem_encoding) dirpath = os.path.dirname(path) tpath = '' diff --git a/ebook_converter/ebooks/metadata/__init__.py b/ebook_converter/ebooks/metadata/__init__.py index c03c0bb..dd8ff9b 100644 --- a/ebook_converter/ebooks/metadata/__init__.py +++ b/ebook_converter/ebooks/metadata/__init__.py @@ -8,9 +8,9 @@ import re import sys import urllib.parse -from ebook_converter import force_unicode from ebook_converter.utils.config_base import tweaks from ebook_converter.polyglot.urllib import unquote +from ebook_converter.utils import encoding as uenc try: @@ -43,7 +43,7 @@ def remove_bracketed_text(src, brackets=None): from collections import Counter counts = Counter() buf = [] - src = force_unicode(src) + src = uenc.force_unicode(src) rmap = {v: k for k, v in brackets.items()} for char in src: if char in brackets: @@ -75,7 +75,7 @@ def author_to_author_sort(author, method=None): if method == 'copy': return author - prefixes = {force_unicode(y).lower() + prefixes = {uenc.force_unicode(y).lower() for y in tweaks['author_name_prefixes']} prefixes |= {y+'.' for y in prefixes} while True: @@ -87,7 +87,7 @@ def author_to_author_sort(author, method=None): else: break - suffixes = {force_unicode(y).lower() + suffixes = {uenc.force_unicode(y).lower() for y in tweaks['author_name_suffixes']} suffixes |= {y+'.' for y in suffixes} diff --git a/ebook_converter/ebooks/metadata/fb2.py b/ebook_converter/ebooks/metadata/fb2.py index 8cadba9..8ee0a06 100644 --- a/ebook_converter/ebooks/metadata/fb2.py +++ b/ebook_converter/ebooks/metadata/fb2.py @@ -12,10 +12,10 @@ from lxml import etree from ebook_converter.utils.date import parse_only_date from ebook_converter.utils.img import save_cover_data_to from ebook_converter.utils.imghdr import identify -from ebook_converter import force_unicode from ebook_converter.ebooks.metadata import MetaInformation, check_isbn from ebook_converter.ebooks.chardet import xml_to_unicode from ebook_converter.polyglot.binary import as_base64_unicode +from ebook_converter.utils import encoding as uenc NAMESPACES = {'fb2': 'http://www.gribuser.ru/xml/fictionbook/2.0', @@ -110,7 +110,7 @@ def get_metadata(stream): if book_title: book_title = str(book_title) else: - book_title = force_unicode(os.path.splitext( + book_title = uenc.force_unicode(os.path.splitext( os.path.basename(getattr(stream, 'name', 'Unknown')))[0]) mi = MetaInformation(book_title, authors) diff --git a/ebook_converter/ebooks/metadata/rtf.py b/ebook_converter/ebooks/metadata/rtf.py index ee3cea0..63c3432 100644 --- a/ebook_converter/ebooks/metadata/rtf.py +++ b/ebook_converter/ebooks/metadata/rtf.py @@ -4,8 +4,8 @@ Edit metadata in RTF files. import codecs import re -from ebook_converter import force_unicode from ebook_converter.ebooks.metadata import MetaInformation +from ebook_converter.utils import encoding as uenc title_pat = re.compile(br'\{\\info.*?\{\\title(.*?)(?