mirror of
https://github.com/gryf/ebook-converter.git
synced 2026-04-03 11:03:32 +02:00
Move force_uniceode to utils package
This commit is contained in:
@@ -111,22 +111,3 @@ def prepare_string_for_xml(raw, attribute=False):
|
|||||||
if attribute:
|
if attribute:
|
||||||
raw = raw.replace('"', '"').replace("'", ''')
|
raw = raw.replace('"', '"').replace("'", ''')
|
||||||
return raw
|
return raw
|
||||||
|
|
||||||
|
|
||||||
def force_unicode(obj, enc=constants_old.preferred_encoding):
|
|
||||||
if isinstance(obj, bytes):
|
|
||||||
try:
|
|
||||||
obj = obj.decode(enc)
|
|
||||||
except Exception:
|
|
||||||
try:
|
|
||||||
obj = obj.decode(constants_old.filesystem_encoding
|
|
||||||
if enc == constants_old.preferred_encoding
|
|
||||||
else constants_old.preferred_encoding)
|
|
||||||
except Exception:
|
|
||||||
try:
|
|
||||||
obj = obj.decode('utf-8')
|
|
||||||
except Exception:
|
|
||||||
obj = repr(obj)
|
|
||||||
if isinstance(obj, bytes):
|
|
||||||
obj = obj.decode('utf-8')
|
|
||||||
return obj
|
|
||||||
|
|||||||
@@ -21,7 +21,7 @@ import math
|
|||||||
import bs4
|
import bs4
|
||||||
from PIL import Image as PILImage
|
from PIL import Image as PILImage
|
||||||
|
|
||||||
from ebook_converter import entity_to_unicode, force_unicode
|
from ebook_converter import entity_to_unicode
|
||||||
from ebook_converter.constants_old import __appname__, filesystem_encoding, \
|
from ebook_converter.constants_old import __appname__, filesystem_encoding, \
|
||||||
preferred_encoding
|
preferred_encoding
|
||||||
from ebook_converter.devices.interface import DevicePlugin as Device
|
from ebook_converter.devices.interface import DevicePlugin as Device
|
||||||
@@ -37,6 +37,7 @@ from ebook_converter.ebooks.lrf.pylrs.pylrs import (
|
|||||||
RuledLine, Span, Sub, Sup, TextBlock
|
RuledLine, Span, Sub, Sup, TextBlock
|
||||||
)
|
)
|
||||||
from ebook_converter.ptempfile import PersistentTemporaryFile
|
from ebook_converter.ptempfile import PersistentTemporaryFile
|
||||||
|
from ebook_converter.utils import encoding as uenc
|
||||||
from ebook_converter.utils import img as uimg
|
from ebook_converter.utils import img as uimg
|
||||||
|
|
||||||
|
|
||||||
@@ -1935,8 +1936,8 @@ class HTMLConverter(object):
|
|||||||
|
|
||||||
def process_file(path, options, logger):
|
def process_file(path, options, logger):
|
||||||
path = os.path.abspath(path)
|
path = os.path.abspath(path)
|
||||||
default_title = force_unicode(os.path.splitext(os.path.basename(path))[0],
|
default_title = os.path.splitext(os.path .basename(path))[0]
|
||||||
filesystem_encoding)
|
default_title = uenc.force_unicode(default_title, filesystem_encoding)
|
||||||
dirpath = os.path.dirname(path)
|
dirpath = os.path.dirname(path)
|
||||||
|
|
||||||
tpath = ''
|
tpath = ''
|
||||||
|
|||||||
@@ -8,9 +8,9 @@ import re
|
|||||||
import sys
|
import sys
|
||||||
import urllib.parse
|
import urllib.parse
|
||||||
|
|
||||||
from ebook_converter import force_unicode
|
|
||||||
from ebook_converter.utils.config_base import tweaks
|
from ebook_converter.utils.config_base import tweaks
|
||||||
from ebook_converter.polyglot.urllib import unquote
|
from ebook_converter.polyglot.urllib import unquote
|
||||||
|
from ebook_converter.utils import encoding as uenc
|
||||||
|
|
||||||
|
|
||||||
try:
|
try:
|
||||||
@@ -43,7 +43,7 @@ def remove_bracketed_text(src, brackets=None):
|
|||||||
from collections import Counter
|
from collections import Counter
|
||||||
counts = Counter()
|
counts = Counter()
|
||||||
buf = []
|
buf = []
|
||||||
src = force_unicode(src)
|
src = uenc.force_unicode(src)
|
||||||
rmap = {v: k for k, v in brackets.items()}
|
rmap = {v: k for k, v in brackets.items()}
|
||||||
for char in src:
|
for char in src:
|
||||||
if char in brackets:
|
if char in brackets:
|
||||||
@@ -75,7 +75,7 @@ def author_to_author_sort(author, method=None):
|
|||||||
if method == 'copy':
|
if method == 'copy':
|
||||||
return author
|
return author
|
||||||
|
|
||||||
prefixes = {force_unicode(y).lower()
|
prefixes = {uenc.force_unicode(y).lower()
|
||||||
for y in tweaks['author_name_prefixes']}
|
for y in tweaks['author_name_prefixes']}
|
||||||
prefixes |= {y+'.' for y in prefixes}
|
prefixes |= {y+'.' for y in prefixes}
|
||||||
while True:
|
while True:
|
||||||
@@ -87,7 +87,7 @@ def author_to_author_sort(author, method=None):
|
|||||||
else:
|
else:
|
||||||
break
|
break
|
||||||
|
|
||||||
suffixes = {force_unicode(y).lower()
|
suffixes = {uenc.force_unicode(y).lower()
|
||||||
for y in tweaks['author_name_suffixes']}
|
for y in tweaks['author_name_suffixes']}
|
||||||
suffixes |= {y+'.' for y in suffixes}
|
suffixes |= {y+'.' for y in suffixes}
|
||||||
|
|
||||||
|
|||||||
@@ -12,10 +12,10 @@ from lxml import etree
|
|||||||
from ebook_converter.utils.date import parse_only_date
|
from ebook_converter.utils.date import parse_only_date
|
||||||
from ebook_converter.utils.img import save_cover_data_to
|
from ebook_converter.utils.img import save_cover_data_to
|
||||||
from ebook_converter.utils.imghdr import identify
|
from ebook_converter.utils.imghdr import identify
|
||||||
from ebook_converter import force_unicode
|
|
||||||
from ebook_converter.ebooks.metadata import MetaInformation, check_isbn
|
from ebook_converter.ebooks.metadata import MetaInformation, check_isbn
|
||||||
from ebook_converter.ebooks.chardet import xml_to_unicode
|
from ebook_converter.ebooks.chardet import xml_to_unicode
|
||||||
from ebook_converter.polyglot.binary import as_base64_unicode
|
from ebook_converter.polyglot.binary import as_base64_unicode
|
||||||
|
from ebook_converter.utils import encoding as uenc
|
||||||
|
|
||||||
|
|
||||||
NAMESPACES = {'fb2': 'http://www.gribuser.ru/xml/fictionbook/2.0',
|
NAMESPACES = {'fb2': 'http://www.gribuser.ru/xml/fictionbook/2.0',
|
||||||
@@ -110,7 +110,7 @@ def get_metadata(stream):
|
|||||||
if book_title:
|
if book_title:
|
||||||
book_title = str(book_title)
|
book_title = str(book_title)
|
||||||
else:
|
else:
|
||||||
book_title = force_unicode(os.path.splitext(
|
book_title = uenc.force_unicode(os.path.splitext(
|
||||||
os.path.basename(getattr(stream, 'name', 'Unknown')))[0])
|
os.path.basename(getattr(stream, 'name', 'Unknown')))[0])
|
||||||
mi = MetaInformation(book_title, authors)
|
mi = MetaInformation(book_title, authors)
|
||||||
|
|
||||||
|
|||||||
@@ -4,8 +4,8 @@ Edit metadata in RTF files.
|
|||||||
import codecs
|
import codecs
|
||||||
import re
|
import re
|
||||||
|
|
||||||
from ebook_converter import force_unicode
|
|
||||||
from ebook_converter.ebooks.metadata import MetaInformation
|
from ebook_converter.ebooks.metadata import MetaInformation
|
||||||
|
from ebook_converter.utils import encoding as uenc
|
||||||
|
|
||||||
title_pat = re.compile(br'\{\\info.*?\{\\title(.*?)(?<!\\)\}', re.DOTALL)
|
title_pat = re.compile(br'\{\\info.*?\{\\title(.*?)(?<!\\)\}', re.DOTALL)
|
||||||
author_pat = re.compile(br'\{\\info.*?\{\\author(.*?)(?<!\\)\}', re.DOTALL)
|
author_pat = re.compile(br'\{\\info.*?\{\\author(.*?)(?<!\\)\}', re.DOTALL)
|
||||||
@@ -74,7 +74,7 @@ def detect_codepage(stream):
|
|||||||
|
|
||||||
def encode(unistr):
|
def encode(unistr):
|
||||||
if not isinstance(unistr, str):
|
if not isinstance(unistr, str):
|
||||||
unistr = force_unicode(unistr)
|
unistr = uenc.force_unicode(unistr)
|
||||||
return ''.join(c if ord(c) < 128 else
|
return ''.join(c if ord(c) < 128 else
|
||||||
'\\u{}?'.format(ord(c)) for c in unistr)
|
'\\u{}?'.format(ord(c)) for c in unistr)
|
||||||
|
|
||||||
|
|||||||
@@ -17,12 +17,12 @@ from lxml import etree
|
|||||||
from lxml import html
|
from lxml import html
|
||||||
|
|
||||||
from ebook_converter import constants as const
|
from ebook_converter import constants as const
|
||||||
from ebook_converter import force_unicode
|
|
||||||
from ebook_converter.constants_old import filesystem_encoding, __version__
|
from ebook_converter.constants_old import filesystem_encoding, __version__
|
||||||
from ebook_converter.ebooks.chardet import xml_to_unicode
|
from ebook_converter.ebooks.chardet import xml_to_unicode
|
||||||
from ebook_converter.ebooks.conversion.preprocess import CSSPreProcessor
|
from ebook_converter.ebooks.conversion.preprocess import CSSPreProcessor
|
||||||
from ebook_converter.ebooks.oeb import parse_utils
|
from ebook_converter.ebooks.oeb import parse_utils
|
||||||
from ebook_converter.utils.cleantext import clean_xml_chars
|
from ebook_converter.utils.cleantext import clean_xml_chars
|
||||||
|
from ebook_converter.utils import encoding as uenc
|
||||||
from ebook_converter.utils.short_uuid import uuid4
|
from ebook_converter.utils.short_uuid import uuid4
|
||||||
|
|
||||||
|
|
||||||
@@ -1074,7 +1074,7 @@ class Manifest(object):
|
|||||||
def sort_key(self):
|
def sort_key(self):
|
||||||
href = self.href
|
href = self.href
|
||||||
if isinstance(href, bytes):
|
if isinstance(href, bytes):
|
||||||
href = force_unicode(href)
|
href = uenc.force_unicode(href)
|
||||||
|
|
||||||
if isinstance(self.spine_position, numbers.Number):
|
if isinstance(self.spine_position, numbers.Number):
|
||||||
sp = self.spine_position
|
sp = self.spine_position
|
||||||
|
|||||||
@@ -5,9 +5,10 @@ from lxml import etree
|
|||||||
from lxml import html
|
from lxml import html
|
||||||
|
|
||||||
from ebook_converter import constants as const
|
from ebook_converter import constants as const
|
||||||
from ebook_converter import xml_replace_entities, force_unicode
|
from ebook_converter import xml_replace_entities
|
||||||
from ebook_converter.constants_old import filesystem_encoding
|
from ebook_converter.constants_old import filesystem_encoding
|
||||||
from ebook_converter.ebooks.chardet import xml_to_unicode, strip_encoding_declarations
|
from ebook_converter.ebooks.chardet import xml_to_unicode, strip_encoding_declarations
|
||||||
|
from ebook_converter.utils import encoding as uenc
|
||||||
|
|
||||||
|
|
||||||
RECOVER_PARSER = etree.XMLParser(recover=True, no_network=True,
|
RECOVER_PARSER = etree.XMLParser(recover=True, no_network=True,
|
||||||
@@ -159,7 +160,7 @@ def parse_html(data, log=None, decoder=None, preprocessor=None,
|
|||||||
if log is None:
|
if log is None:
|
||||||
log = LOG
|
log = LOG
|
||||||
|
|
||||||
filename = force_unicode(filename, enc=filesystem_encoding)
|
filename = uenc.force_unicode(filename, enc=filesystem_encoding)
|
||||||
|
|
||||||
if not isinstance(data, str):
|
if not isinstance(data, str):
|
||||||
if decoder is not None:
|
if decoder is not None:
|
||||||
|
|||||||
@@ -3,12 +3,12 @@ import functools
|
|||||||
|
|
||||||
from css_parser.css import CSSRule, CSSStyleDeclaration
|
from css_parser.css import CSSRule, CSSStyleDeclaration
|
||||||
|
|
||||||
from ebook_converter import force_unicode
|
|
||||||
from ebook_converter.css_selectors import parse, SelectorSyntaxError
|
from ebook_converter.css_selectors import parse, SelectorSyntaxError
|
||||||
from ebook_converter.ebooks.oeb import base
|
from ebook_converter.ebooks.oeb import base
|
||||||
from ebook_converter.ebooks.oeb.polish import pretty
|
from ebook_converter.ebooks.oeb.polish import pretty
|
||||||
from ebook_converter.utils.icu import numeric_sort_key
|
from ebook_converter.utils.icu import numeric_sort_key
|
||||||
from ebook_converter.css_selectors import Select, SelectorError
|
from ebook_converter.css_selectors import Select, SelectorError
|
||||||
|
from ebook_converter.utils import encoding as uenc
|
||||||
|
|
||||||
|
|
||||||
def filter_used_rules(rules, log, select):
|
def filter_used_rules(rules, log, select):
|
||||||
@@ -137,7 +137,7 @@ def remove_unused_css(container, report=None, remove_unused_classes=False,
|
|||||||
if unused_rules:
|
if unused_rules:
|
||||||
num_of_removed_rules += len(unused_rules)
|
num_of_removed_rules += len(unused_rules)
|
||||||
[sheet.cssRules.remove(r) for r in unused_rules]
|
[sheet.cssRules.remove(r) for r in unused_rules]
|
||||||
style.text = force_unicode(sheet.cssText, 'utf-8')
|
style.text = uenc.force_unicode(sheet.cssText, 'utf-8')
|
||||||
pretty.pretty_script_or_style(container, style)
|
pretty.pretty_script_or_style(container, style)
|
||||||
container.dirty(name)
|
container.dirty(name)
|
||||||
|
|
||||||
@@ -241,7 +241,7 @@ def transform_inline_styles(container, name, transform_sheet, transform_style):
|
|||||||
sheet = container.parse_css(style.text)
|
sheet = container.parse_css(style.text)
|
||||||
if transform_sheet(sheet):
|
if transform_sheet(sheet):
|
||||||
changed = True
|
changed = True
|
||||||
style.text = force_unicode(sheet.cssText, 'utf-8')
|
style.text = uenc.force_unicode(sheet.cssText, 'utf-8')
|
||||||
pretty.pretty_script_or_style(container, style)
|
pretty.pretty_script_or_style(container, style)
|
||||||
for elem in root.xpath('//*[@style]'):
|
for elem in root.xpath('//*[@style]'):
|
||||||
text = elem.get('style', None)
|
text = elem.get('style', None)
|
||||||
@@ -253,8 +253,9 @@ def transform_inline_styles(container, name, transform_sheet, transform_style):
|
|||||||
del elem.attrib['style']
|
del elem.attrib['style']
|
||||||
else:
|
else:
|
||||||
elem.set('style',
|
elem.set('style',
|
||||||
force_unicode(style.getCssText(separator=' '),
|
uenc.force_unicode(style
|
||||||
'utf-8'))
|
.getCssText(separator=' '),
|
||||||
|
'utf-8'))
|
||||||
return changed
|
return changed
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@@ -1,10 +1,10 @@
|
|||||||
import textwrap
|
import textwrap
|
||||||
|
|
||||||
from ebook_converter import constants as const
|
from ebook_converter import constants as const
|
||||||
from ebook_converter import force_unicode
|
|
||||||
from ebook_converter.ebooks.oeb import parse_utils
|
from ebook_converter.ebooks.oeb import parse_utils
|
||||||
from ebook_converter.ebooks.oeb import base
|
from ebook_converter.ebooks.oeb import base
|
||||||
from ebook_converter.ebooks.oeb.polish.utils import guess_type
|
from ebook_converter.ebooks.oeb.polish.utils import guess_type
|
||||||
|
from ebook_converter.utils import encoding as uenc
|
||||||
from ebook_converter.utils.icu import sort_key
|
from ebook_converter.utils.icu import sort_key
|
||||||
|
|
||||||
|
|
||||||
@@ -157,8 +157,9 @@ def pretty_script_or_style(container, child):
|
|||||||
if child.text:
|
if child.text:
|
||||||
indent = indent_for_tag(child)
|
indent = indent_for_tag(child)
|
||||||
if child.tag.endswith('style'):
|
if child.tag.endswith('style'):
|
||||||
child.text = force_unicode(pretty_css(container, '', child.text),
|
child.text = uenc.force_unicode(pretty_css(container, '',
|
||||||
'utf-8')
|
child.text),
|
||||||
|
'utf-8')
|
||||||
child.text = textwrap.dedent(child.text)
|
child.text = textwrap.dedent(child.text)
|
||||||
child.text = '\n' + '\n'.join([(indent + x) if x else ''
|
child.text = '\n' + '\n'.join([(indent + x) if x else ''
|
||||||
for x in child.text.splitlines()])
|
for x in child.text.splitlines()])
|
||||||
|
|||||||
@@ -12,12 +12,12 @@ from css_parser import (profile as cssprofiles, parseString, parseStyle, log as
|
|||||||
css_parser_log, CSSParser, profiles, replaceUrls)
|
css_parser_log, CSSParser, profiles, replaceUrls)
|
||||||
|
|
||||||
from ebook_converter import constants as const
|
from ebook_converter import constants as const
|
||||||
from ebook_converter import force_unicode
|
|
||||||
from ebook_converter.ebooks import unit_convert
|
from ebook_converter.ebooks import unit_convert
|
||||||
from ebook_converter.ebooks.oeb import base
|
from ebook_converter.ebooks.oeb import base
|
||||||
from ebook_converter.ebooks.oeb.normalize_css import DEFAULTS, normalizers
|
from ebook_converter.ebooks.oeb.normalize_css import DEFAULTS, normalizers
|
||||||
from ebook_converter.css_selectors import Select, SelectorError, INAPPROPRIATE_PSEUDO_CLASSES
|
from ebook_converter.css_selectors import Select, SelectorError, INAPPROPRIATE_PSEUDO_CLASSES
|
||||||
from ebook_converter.tinycss.media3 import CSSMedia3Parser
|
from ebook_converter.tinycss.media3 import CSSMedia3Parser
|
||||||
|
from ebook_converter.utils import encoding as uenc
|
||||||
|
|
||||||
|
|
||||||
css_parser_log.setLevel(logging.WARN)
|
css_parser_log.setLevel(logging.WARN)
|
||||||
@@ -223,10 +223,10 @@ class Stylizer(object):
|
|||||||
for x in elem:
|
for x in elem:
|
||||||
t = getattr(x, 'text', None)
|
t = getattr(x, 'text', None)
|
||||||
if t:
|
if t:
|
||||||
text += '\n\n' + force_unicode(t, 'utf-8')
|
text += '\n\n' + uenc.force_unicode(t, 'utf-8')
|
||||||
t = getattr(x, 'tail', None)
|
t = getattr(x, 'tail', None)
|
||||||
if t:
|
if t:
|
||||||
text += '\n\n' + force_unicode(t, 'utf-8')
|
text += '\n\n' + uenc.force_unicode(t, 'utf-8')
|
||||||
if text:
|
if text:
|
||||||
text = oeb.css_preprocessor(text)
|
text = oeb.css_preprocessor(text)
|
||||||
# We handle @import rules separately
|
# We handle @import rules separately
|
||||||
|
|||||||
@@ -14,12 +14,12 @@ from lxml.etree import XPath as _XPath
|
|||||||
from lxml import etree
|
from lxml import etree
|
||||||
|
|
||||||
from ebook_converter import constants as const
|
from ebook_converter import constants as const
|
||||||
from ebook_converter import force_unicode
|
|
||||||
from ebook_converter.ebooks.epub import rules
|
from ebook_converter.ebooks.epub import rules
|
||||||
from ebook_converter.ebooks.oeb import base
|
from ebook_converter.ebooks.oeb import base
|
||||||
from ebook_converter.ebooks.oeb.polish.split import do_split
|
from ebook_converter.ebooks.oeb.polish.split import do_split
|
||||||
from ebook_converter.polyglot.urllib import unquote
|
from ebook_converter.polyglot.urllib import unquote
|
||||||
from ebook_converter.css_selectors import Select, SelectorError
|
from ebook_converter.css_selectors import Select, SelectorError
|
||||||
|
from ebook_converter.utils import encoding as uenc
|
||||||
|
|
||||||
|
|
||||||
__license__ = 'GPL v3'
|
__license__ = 'GPL v3'
|
||||||
@@ -86,10 +86,12 @@ class Split(object):
|
|||||||
stylesheets = [x.data for x in self.oeb.manifest if x.media_type in
|
stylesheets = [x.data for x in self.oeb.manifest if x.media_type in
|
||||||
base.OEB_STYLES]
|
base.OEB_STYLES]
|
||||||
for rule in rules(stylesheets):
|
for rule in rules(stylesheets):
|
||||||
before = force_unicode(getattr(rule.style.getPropertyCSSValue(
|
before = uenc.force_unicode(
|
||||||
'page-break-before'), 'cssText', '').strip().lower())
|
getattr(rule.style.getPropertyCSSValue(
|
||||||
after = force_unicode(getattr(rule.style.getPropertyCSSValue(
|
'page-break-before'), 'cssText', '').strip().lower())
|
||||||
'page-break-after'), 'cssText', '').strip().lower())
|
after = uenc.force_unicode(
|
||||||
|
getattr(rule.style.getPropertyCSSValue(
|
||||||
|
'page-break-after'), 'cssText', '').strip().lower())
|
||||||
try:
|
try:
|
||||||
if before and before not in {'avoid', 'auto', 'inherit'}:
|
if before and before not in {'avoid', 'auto', 'inherit'}:
|
||||||
self.page_break_selectors.add((rule.selectorText,
|
self.page_break_selectors.add((rule.selectorText,
|
||||||
|
|||||||
@@ -119,21 +119,24 @@ def reset_base_dir():
|
|||||||
base_dir()
|
base_dir()
|
||||||
|
|
||||||
|
|
||||||
def force_unicode(x):
|
def _force_unicode(x):
|
||||||
# Cannot use the implementation in calibre.__init__ as it causes a circular
|
# Cannot use the implementation in calibre.__init__ as it causes a circular
|
||||||
# dependency
|
# dependency
|
||||||
|
# NOTE(gryf): Congratulations! that's a 3rd function in this codebase
|
||||||
|
# called force_unicode! I guess that forcing unicode on text objects is
|
||||||
|
# some kind of hobby.
|
||||||
if isinstance(x, bytes):
|
if isinstance(x, bytes):
|
||||||
x = x.decode(filesystem_encoding)
|
x = x.decode(filesystem_encoding)
|
||||||
return x
|
return x
|
||||||
|
|
||||||
|
|
||||||
def _make_file(suffix, prefix, base):
|
def _make_file(suffix, prefix, base):
|
||||||
suffix, prefix = map(force_unicode, (suffix, prefix)) # no2to3
|
suffix, prefix = map(_force_unicode, (suffix, prefix)) # no2to3
|
||||||
return tempfile.mkstemp(suffix, prefix, dir=base)
|
return tempfile.mkstemp(suffix, prefix, dir=base)
|
||||||
|
|
||||||
|
|
||||||
def _make_dir(suffix, prefix, base):
|
def _make_dir(suffix, prefix, base):
|
||||||
suffix, prefix = map(force_unicode, (suffix, prefix)) # no2to3
|
suffix, prefix = map(_force_unicode, (suffix, prefix)) # no2to3
|
||||||
return tempfile.mkdtemp(suffix, prefix, base)
|
return tempfile.mkdtemp(suffix, prefix, base)
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@@ -7,10 +7,11 @@ import json
|
|||||||
import numbers
|
import numbers
|
||||||
import os
|
import os
|
||||||
import pickle
|
import pickle
|
||||||
import pkg_resources
|
|
||||||
import re
|
import re
|
||||||
import traceback
|
import traceback
|
||||||
|
|
||||||
|
import pkg_resources
|
||||||
|
|
||||||
from ebook_converter.constants_old import config_dir
|
from ebook_converter.constants_old import config_dir
|
||||||
from ebook_converter.constants_old import filesystem_encoding
|
from ebook_converter.constants_old import filesystem_encoding
|
||||||
from ebook_converter.constants_old import preferred_encoding
|
from ebook_converter.constants_old import preferred_encoding
|
||||||
@@ -75,7 +76,8 @@ def from_json(obj):
|
|||||||
return obj
|
return obj
|
||||||
|
|
||||||
|
|
||||||
def force_unicode(x):
|
def _force_unicode(x):
|
||||||
|
# TODO(gryf): eliminate cases, when this kind of functions are needed.
|
||||||
try:
|
try:
|
||||||
return x.decode(preferred_encoding)
|
return x.decode(preferred_encoding)
|
||||||
except UnicodeDecodeError:
|
except UnicodeDecodeError:
|
||||||
@@ -87,7 +89,7 @@ def force_unicode(x):
|
|||||||
|
|
||||||
def force_unicode_recursive(obj):
|
def force_unicode_recursive(obj):
|
||||||
if isinstance(obj, bytes):
|
if isinstance(obj, bytes):
|
||||||
return force_unicode(obj)
|
return _force_unicode(obj)
|
||||||
if isinstance(obj, (list, tuple)):
|
if isinstance(obj, (list, tuple)):
|
||||||
return type(obj)(map(force_unicode_recursive, obj))
|
return type(obj)(map(force_unicode_recursive, obj))
|
||||||
if isinstance(obj, dict):
|
if isinstance(obj, dict):
|
||||||
|
|||||||
20
ebook_converter/utils/encoding.py
Normal file
20
ebook_converter/utils/encoding.py
Normal file
@@ -0,0 +1,20 @@
|
|||||||
|
from ebook_converter import constants_old
|
||||||
|
|
||||||
|
|
||||||
|
def force_unicode(obj, enc=constants_old.preferred_encoding):
|
||||||
|
if isinstance(obj, bytes):
|
||||||
|
try:
|
||||||
|
obj = obj.decode(enc)
|
||||||
|
except Exception:
|
||||||
|
try:
|
||||||
|
obj = obj.decode(constants_old.filesystem_encoding
|
||||||
|
if enc == constants_old.preferred_encoding
|
||||||
|
else constants_old.preferred_encoding)
|
||||||
|
except Exception:
|
||||||
|
try:
|
||||||
|
obj = obj.decode('utf-8')
|
||||||
|
except Exception:
|
||||||
|
obj = repr(obj)
|
||||||
|
if isinstance(obj, bytes):
|
||||||
|
obj = obj.decode('utf-8')
|
||||||
|
return obj
|
||||||
@@ -9,9 +9,9 @@ import shutil
|
|||||||
from math import ceil
|
from math import ceil
|
||||||
|
|
||||||
from ebook_converter import constants_old
|
from ebook_converter import constants_old
|
||||||
from ebook_converter import force_unicode
|
|
||||||
from ebook_converter.constants_old import (filesystem_encoding,
|
from ebook_converter.constants_old import (filesystem_encoding,
|
||||||
preferred_encoding)
|
preferred_encoding)
|
||||||
|
from ebook_converter.utils import encoding as uenc
|
||||||
from ebook_converter.utils.localization import get_udc
|
from ebook_converter.utils.localization import get_udc
|
||||||
|
|
||||||
|
|
||||||
@@ -183,7 +183,7 @@ def case_preserving_open_file(path, mode='wb', mkdir_mode=0o777):
|
|||||||
|
|
||||||
path = os.path.abspath(path)
|
path = os.path.abspath(path)
|
||||||
|
|
||||||
sep = force_unicode(os.sep, 'ascii')
|
sep = uenc.force_unicode(os.sep, 'ascii')
|
||||||
|
|
||||||
if path.endswith(sep):
|
if path.endswith(sep):
|
||||||
path = path[:-1]
|
path = path[:-1]
|
||||||
|
|||||||
@@ -12,11 +12,11 @@ from threading import Thread
|
|||||||
#from PyQt5.QtCore import QBuffer, QByteArray, Qt
|
#from PyQt5.QtCore import QBuffer, QByteArray, Qt
|
||||||
#from PyQt5.QtGui import QColor, QImage, QImageReader, QImageWriter, QPixmap, QTransform
|
#from PyQt5.QtGui import QColor, QImage, QImageReader, QImageWriter, QPixmap, QTransform
|
||||||
|
|
||||||
from ebook_converter import force_unicode
|
|
||||||
from ebook_converter.constants_old import plugins
|
from ebook_converter.constants_old import plugins
|
||||||
from ebook_converter.ptempfile import TemporaryDirectory
|
from ebook_converter.ptempfile import TemporaryDirectory
|
||||||
from ebook_converter.utils.config_base import tweaks
|
from ebook_converter.utils.config_base import tweaks
|
||||||
from ebook_converter.utils.filenames import atomic_rename
|
from ebook_converter.utils.filenames import atomic_rename
|
||||||
|
from ebook_converter.utils import encoding as uenc
|
||||||
from ebook_converter.utils.imghdr import what
|
from ebook_converter.utils.imghdr import what
|
||||||
|
|
||||||
# Utilities {{{
|
# Utilities {{{
|
||||||
@@ -586,7 +586,7 @@ def run_optimizer(file_path, cmd, as_filter=False, input_data=None):
|
|||||||
outw = Thread(name='CopyOutput', target=copy, args=(p.stdout, outf))
|
outw = Thread(name='CopyOutput', target=copy, args=(p.stdout, outf))
|
||||||
outw.daemon = True
|
outw.daemon = True
|
||||||
outw.start()
|
outw.start()
|
||||||
raw = force_unicode(stderr.read())
|
raw = uenc.force_unicode(stderr.read())
|
||||||
if p.wait() != 0:
|
if p.wait() != 0:
|
||||||
return raw
|
return raw
|
||||||
else:
|
else:
|
||||||
|
|||||||
Reference in New Issue
Block a user