1
0
mirror of https://github.com/gryf/ebook-converter.git synced 2026-03-14 21:53:36 +01:00

Removed polyglots unicode_type usage

This commit is contained in:
2020-04-20 19:25:28 +02:00
parent ef7e2b10be
commit 128705f258
130 changed files with 657 additions and 716 deletions

View File

@@ -10,7 +10,7 @@ import pkg_resources
from ebook_converter.utils.lock import ExclusiveFile
from ebook_converter.constants import config_dir, CONFIG_DIR_MODE, ispy3, preferred_encoding, filesystem_encoding, iswindows
from ebook_converter.polyglot.builtins import unicode_type, iteritems
from ebook_converter.polyglot.builtins import iteritems
plugin_dir = os.path.join(config_dir, 'plugins')
@@ -22,7 +22,7 @@ def parse_old_style(src):
import cPickle
options = {'cPickle':cPickle}
try:
if not isinstance(src, unicode_type):
if not isinstance(src, str):
src = src.decode('utf-8')
src = src.replace('PyQt%d.QtCore' % 4, 'PyQt5.QtCore')
src = re.sub(r'cPickle\.loads\(([\'"])', r'cPickle.loads(b\1', src)
@@ -296,7 +296,7 @@ class OptionSet(object):
def parse_string(self, src):
options = {}
if src:
is_old_style = (isinstance(src, bytes) and src.startswith(b'#')) or (isinstance(src, unicode_type) and src.startswith(u'#'))
is_old_style = (isinstance(src, bytes) and src.startswith(b'#')) or (isinstance(src, str) and src.startswith(u'#'))
if is_old_style:
options = parse_old_style(src)
else:
@@ -400,7 +400,7 @@ class Config(ConfigInterface):
src = self.option_set.serialize(opts)
f.seek(0)
f.truncate()
if isinstance(src, unicode_type):
if isinstance(src, str):
src = src.encode('utf-8')
f.write(src)
@@ -551,7 +551,7 @@ def create_global_prefs(conf_obj=None):
prefs = ConfigProxy(create_global_prefs())
if prefs['installation_uuid'] is None:
import uuid
prefs['installation_uuid'] = unicode_type(uuid.uuid4())
prefs['installation_uuid'] = str(uuid.uuid4())
# Read tweaks

View File

@@ -6,7 +6,7 @@ from ebook_converter import strftime
from ebook_converter.constants import iswindows, isosx, plugins, preferred_encoding
from ebook_converter.utils.iso8601 import utc_tz, local_tz, UNDEFINED_DATE
from ebook_converter.utils.localization import lcdata
from ebook_converter.polyglot.builtins import unicode_type, native_string_type
from ebook_converter.polyglot.builtins import native_string_type
__license__ = 'GPL v3'
@@ -186,13 +186,13 @@ def fromordinal(day, as_utc=True):
def isoformat(date_time, assume_utc=False, as_utc=True, sep='T'):
if not hasattr(date_time, 'tzinfo'):
return unicode_type(date_time.isoformat())
return str(date_time.isoformat())
if date_time.tzinfo is None:
date_time = date_time.replace(tzinfo=_utc_tz if assume_utc else
_local_tz)
date_time = date_time.astimezone(_utc_tz if as_utc else _local_tz)
# native_string_type(sep) because isoformat barfs with unicode sep on python 2.x
return unicode_type(date_time.isoformat(native_string_type(sep)))
return str(date_time.isoformat(native_string_type(sep)))
def internal_iso_format_string():
@@ -205,7 +205,7 @@ def w3cdtf(date_time, assume_utc=False):
date_time = date_time.replace(tzinfo=_utc_tz if assume_utc else
_local_tz)
date_time = date_time.astimezone(_utc_tz if as_utc else _local_tz)
return unicode_type(date_time.strftime('%Y-%m-%dT%H:%M:%SZ'))
return str(date_time.strftime('%Y-%m-%dT%H:%M:%SZ'))
def as_local_time(date_time, assume_utc=True):

View File

@@ -13,7 +13,7 @@ from ebook_converter.constants import (
filesystem_encoding, iswindows, plugins, preferred_encoding, isosx, ispy3
)
from ebook_converter.utils.localization import get_udc
from ebook_converter.polyglot.builtins import iteritems, itervalues, unicode_type
from ebook_converter.polyglot.builtins import iteritems, itervalues
def ascii_text(orig):
@@ -21,7 +21,7 @@ def ascii_text(orig):
try:
ascii = udc.decode(orig)
except Exception:
if isinstance(orig, unicode_type):
if isinstance(orig, str):
orig = orig.encode('ascii', 'replace')
ascii = orig.decode(preferred_encoding, 'replace')
if isinstance(ascii, bytes):

View File

@@ -3,7 +3,6 @@ from struct import calcsize, unpack, unpack_from
from collections import namedtuple
from ebook_converter.utils.fonts.utils import get_font_names2, get_font_characteristics
from ebook_converter.polyglot.builtins import unicode_type
__license__ = 'GPL v3'
@@ -45,7 +44,7 @@ class FontMetadata(object):
elif wt == 700:
wt = 'bold'
else:
wt = unicode_type(wt)
wt = str(wt)
self.font_weight = wt
self.font_stretch = ('ultra-condensed', 'extra-condensed',

View File

@@ -6,7 +6,7 @@ from ebook_converter import walk, prints, as_unicode
from ebook_converter.constants import (config_dir, iswindows, isosx, plugins, DEBUG,
isworker, filesystem_encoding)
from ebook_converter.utils.fonts.metadata import FontMetadata, UnsupportedFont
from ebook_converter.polyglot.builtins import itervalues, unicode_type
from ebook_converter.polyglot.builtins import itervalues
__license__ = 'GPL v3'
@@ -261,7 +261,7 @@ class FontScanner(Thread):
'''
from ebook_converter.utils.fonts.utils import (supports_text,
panose_to_css_generic_family, get_printable_characters)
if not isinstance(text, unicode_type):
if not isinstance(text, str):
raise TypeError(u'%r is not unicode'%text)
text = get_printable_characters(text)
found = {}

View File

@@ -1,5 +1,4 @@
from struct import pack, unpack_from
from ebook_converter.polyglot.builtins import unicode_type
__license__ = 'GPL v3'
@@ -68,7 +67,7 @@ class ByteCode(dict):
return float(number), index
def write_float(self, f, encoding='ignored'):
s = unicode_type(f).upper()
s = str(f).upper()
if s[:2] == "0.":
s = s[1:]
elif s[:3] == "-0.":

View File

@@ -6,7 +6,7 @@ from functools import partial
from ebook_converter.utils.icu import safe_chr, ord_string
from ebook_converter.utils.fonts.sfnt.container import Sfnt
from ebook_converter.utils.fonts.sfnt.errors import UnsupportedFont, NoGlyphs
from ebook_converter.polyglot.builtins import unicode_type, iteritems, itervalues
from ebook_converter.polyglot.builtins import iteritems, itervalues
__license__ = 'GPL v3'
@@ -106,7 +106,7 @@ def pdf_subset(sfnt, glyphs):
def safe_ord(x):
return ord_string(unicode_type(x))[0]
return ord_string(str(x))[0]
def subset(raw, individual_chars, ranges=(), warnings=None):
@@ -343,12 +343,12 @@ def all():
print('No glyphs!')
continue
except UnsupportedFont as e:
unsupported.append((font['full_name'], font['path'], unicode_type(e)))
unsupported.append((font['full_name'], font['path'], str(e)))
print('Unsupported!')
continue
except Exception as e:
print('Failed!')
failed.append((font['full_name'], font['path'], unicode_type(e)))
failed.append((font['full_name'], font['path'], str(e)))
else:
averages.append(sum(itervalues(new_stats))/sum(itervalues(old_stats)) * 100)
print('Reduced to:', '%.1f'%averages[-1] , '%')

View File

@@ -2,7 +2,7 @@ import struct
from io import BytesIO
from collections import defaultdict
from ebook_converter.polyglot.builtins import iteritems, itervalues, unicode_type, as_bytes
from ebook_converter.polyglot.builtins import iteritems, itervalues, as_bytes
__license__ = 'GPL v3'
@@ -394,7 +394,7 @@ def get_bmp_glyph_ids(table, bmp, codes):
def get_glyph_ids(raw, text, raw_is_table=False):
if not isinstance(text, unicode_type):
if not isinstance(text, str):
raise TypeError('%r is not a unicode object'%text)
if raw_is_table:
table = raw
@@ -420,7 +420,7 @@ def get_glyph_ids(raw, text, raw_is_table=False):
def supports_text(raw, text, has_only_printable_chars=False):
if not isinstance(text, unicode_type):
if not isinstance(text, str):
raise TypeError('%r is not a unicode object'%text)
if not has_only_printable_chars:
text = get_printable_characters(text)

View File

@@ -8,7 +8,7 @@ import re, string, traceback, numbers
from ebook_converter import prints
from ebook_converter.constants import DEBUG
from ebook_converter.utils.formatter_functions import formatter_functions
from ebook_converter.polyglot.builtins import unicode_type, error_message
from ebook_converter.polyglot.builtins import error_message
__license__ = 'GPL v3'
@@ -214,7 +214,7 @@ class TemplateFormatter(string.Formatter):
except:
raise ValueError(
_('format: type {0} requires a decimal (float) value, got {1}').format(typ, val))
return unicode_type(('{0:'+fmt+'}').format(val))
return str(('{0:'+fmt+'}').format(val))
def _explode_format_string(self, fmt):
try:
@@ -273,7 +273,7 @@ class TemplateFormatter(string.Formatter):
# ensure we are dealing with a string.
if isinstance(val, numbers.Number):
if val:
val = unicode_type(val)
val = str(val)
else:
val = ''
# Handle conditional text

View File

@@ -14,7 +14,7 @@ from ebook_converter.utils.titlecase import titlecase
from ebook_converter.utils.icu import capitalize, strcmp, sort_key
from ebook_converter.utils.date import parse_date, format_date, now, UNDEFINED_DATE
from ebook_converter.utils.localization import calibre_langcode_to_name, canonicalize_lang
from ebook_converter.polyglot.builtins import iteritems, itervalues, unicode_type
from ebook_converter.polyglot.builtins import iteritems, itervalues
__license__ = 'GPL v3'
@@ -128,12 +128,12 @@ class FormatterFunction(object):
def eval_(self, formatter, kwargs, mi, locals, *args):
ret = self.evaluate(formatter, kwargs, mi, locals, *args)
if isinstance(ret, (bytes, unicode_type)):
if isinstance(ret, (bytes, str)):
return ret
if isinstance(ret, list):
return ','.join(ret)
if isinstance(ret, (numbers.Number, bool)):
return unicode_type(ret)
return str(ret)
class BuiltinFormatterFunction(FormatterFunction):
@@ -243,7 +243,7 @@ class BuiltinAdd(BuiltinFormatterFunction):
def evaluate(self, formatter, kwargs, mi, locals, x, y):
x = float(x if x and x != 'None' else 0)
y = float(y if y and y != 'None' else 0)
return unicode_type(x + y)
return str(x + y)
class BuiltinSubtract(BuiltinFormatterFunction):
@@ -255,7 +255,7 @@ class BuiltinSubtract(BuiltinFormatterFunction):
def evaluate(self, formatter, kwargs, mi, locals, x, y):
x = float(x if x and x != 'None' else 0)
y = float(y if y and y != 'None' else 0)
return unicode_type(x - y)
return str(x - y)
class BuiltinMultiply(BuiltinFormatterFunction):
@@ -267,7 +267,7 @@ class BuiltinMultiply(BuiltinFormatterFunction):
def evaluate(self, formatter, kwargs, mi, locals, x, y):
x = float(x if x and x != 'None' else 0)
y = float(y if y and y != 'None' else 0)
return unicode_type(x * y)
return str(x * y)
class BuiltinDivide(BuiltinFormatterFunction):
@@ -279,7 +279,7 @@ class BuiltinDivide(BuiltinFormatterFunction):
def evaluate(self, formatter, kwargs, mi, locals, x, y):
x = float(x if x and x != 'None' else 0)
y = float(y if y and y != 'None' else 0)
return unicode_type(x / y)
return str(x / y)
class BuiltinTemplate(BuiltinFormatterFunction):
@@ -371,7 +371,7 @@ class BuiltinRawField(BuiltinFormatterFunction):
if fm is None:
return ', '.join(res)
return fm['is_multiple']['list_to_ui'].join(res)
return unicode_type(res)
return str(res)
class BuiltinRawList(BuiltinFormatterFunction):
@@ -723,7 +723,7 @@ class BuiltinCount(BuiltinFormatterFunction):
'uses an ampersand. Examples: {tags:count(,)}, {authors:count(&)}')
def evaluate(self, formatter, kwargs, mi, locals, val, sep):
return unicode_type(len([v for v in val.split(sep) if v]))
return str(len([v for v in val.split(sep) if v]))
class BuiltinListitem(BuiltinFormatterFunction):
@@ -834,7 +834,7 @@ class BuiltinFormatsSizes(BuiltinFormatterFunction):
def evaluate(self, formatter, kwargs, mi, locals):
fmt_data = mi.get('format_metadata', {})
try:
return ','.join(k.upper()+':'+unicode_type(v['size']) for k,v in iteritems(fmt_data))
return ','.join(k.upper()+':'+str(v['size']) for k,v in iteritems(fmt_data))
except:
return ''
@@ -853,7 +853,7 @@ class BuiltinFormatsPaths(BuiltinFormatterFunction):
def evaluate(self, formatter, kwargs, mi, locals):
fmt_data = mi.get('format_metadata', {})
try:
return ','.join(k.upper()+':'+unicode_type(v['path']) for k,v in iteritems(fmt_data))
return ','.join(k.upper()+':'+str(v['path']) for k,v in iteritems(fmt_data))
except:
return ''
@@ -1084,7 +1084,7 @@ class BuiltinBooksize(BuiltinFormatterFunction):
try:
v = mi._proxy_metadata.book_size
if v is not None:
return unicode_type(mi._proxy_metadata.book_size)
return str(mi._proxy_metadata.book_size)
return ''
except:
pass

View File

@@ -4,7 +4,7 @@ import unicodedata
# Setup code {{{
from ebook_converter.constants import plugins
from ebook_converter.polyglot.builtins import unicode_type, cmp
from ebook_converter.polyglot.builtins import cmp
from ebook_converter.utils.config_base import tweaks
@@ -250,7 +250,7 @@ ord_string = str # _icu.ord_string
def character_name(string):
try:
return _icu.character_name(unicode_type(string)) or None
return _icu.character_name(str(string)) or None
except (TypeError, ValueError, KeyError):
pass
@@ -267,8 +267,8 @@ def normalize(text, mode='NFC'):
# that unless you have very good reasons not too. Also, it's speed
# decreases on wide python builds, where conversion to/from ICU's string
# representation is slower.
# return _icu.normalize(_nmodes[mode], unicode_type(text))
return unicode.normalize(mode, unicode_type(text))
# return _icu.normalize(_nmodes[mode], str(text))
return unicode.normalize(mode, str(text))
def contractions(col=None):

View File

@@ -17,7 +17,7 @@ from ebook_converter.ptempfile import TemporaryDirectory
from ebook_converter.utils.config_base import tweaks
from ebook_converter.utils.filenames import atomic_rename
from ebook_converter.utils.imghdr import what
from ebook_converter.polyglot.builtins import string_or_bytes, unicode_type
from ebook_converter.polyglot.builtins import string_or_bytes
# Utilities {{{
# imageops, imageops_err = plugins['imageops']
@@ -48,7 +48,7 @@ def get_exe_path(name):
def load_jxr_data(data):
with TemporaryDirectory() as tdir:
if iswindows and isinstance(tdir, unicode_type):
if iswindows and isinstance(tdir, str):
tdir = tdir.encode('mbcs')
with lopen(os.path.join(tdir, 'input.jxr'), 'wb') as f:
f.write(data)
@@ -131,7 +131,7 @@ def image_from_path(path):
def image_from_x(x):
' Create an image from a bytestring or a path or a file like object. '
if isinstance(x, unicode_type):
if isinstance(x, str):
return image_from_path(x)
if hasattr(x, 'read'):
return image_from_data(x.read())
@@ -550,8 +550,8 @@ def run_optimizer(file_path, cmd, as_filter=False, input_data=None):
# subprocess in python 2 cannot handle unicode strings that are not
# encodeable in mbcs, so we fail here, where it is more explicit,
# instead.
cmd = [x.encode('mbcs') if isinstance(x, unicode_type) else x for x in cmd]
if isinstance(cwd, unicode_type):
cmd = [x.encode('mbcs') if isinstance(x, str) else x for x in cmd]
if isinstance(cwd, str):
cwd = cwd.encode('mbcs')
stdin = subprocess.PIPE if as_filter else None
stderr = subprocess.PIPE if as_filter else subprocess.STDOUT
@@ -616,7 +616,7 @@ def encode_jpeg(file_path, quality=80):
from ebook_converter.utils.speedups import ReadOnlyFileBuffer
quality = max(0, min(100, int(quality)))
exe = get_exe_path('cjpeg')
cmd = [exe] + '-optimize -progressive -maxmemory 100M -quality'.split() + [unicode_type(quality)]
cmd = [exe] + '-optimize -progressive -maxmemory 100M -quality'.split() + [str(quality)]
img = QImage()
if not img.load(file_path):
raise ValueError('%s is not a valid image file' % file_path)

View File

@@ -6,7 +6,7 @@ import os
from ebook_converter.utils.speedups import ReadOnlyFileBuffer
from ebook_converter.constants import ispy3
from ebook_converter.polyglot.builtins import string_or_bytes, unicode_type
from ebook_converter.polyglot.builtins import string_or_bytes
HSIZE = 120
@@ -41,7 +41,7 @@ def identify(src):
recognized. '''
width = height = -1
if isinstance(src, unicode_type):
if isinstance(src, str):
stream = lopen(src, 'rb')
elif isinstance(src, bytes):
stream = ReadOnlyFileBuffer(src)

View File

@@ -5,7 +5,7 @@ from ebook_converter.constants import isosx, isfrozen, filesystem_encoding, ispy
from ebook_converter.utils.config import prefs
from ebook_converter.ptempfile import PersistentTemporaryFile, base_dir
from ebook_converter.utils.serialize import msgpack_dumps
from ebook_converter.polyglot.builtins import iteritems, unicode_type, string_or_bytes, environ_item, native_string_type, getcwd
from ebook_converter.polyglot.builtins import iteritems, string_or_bytes, environ_item, native_string_type, getcwd
from ebook_converter.polyglot.binary import as_hex_unicode
try:
import win32process
@@ -97,13 +97,13 @@ class Worker(object):
for key in os.environ:
try:
val = os.environ[key]
if isinstance(val, unicode_type):
if isinstance(val, str):
# On windows subprocess cannot handle unicode env vars
try:
val = val.encode(filesystem_encoding)
except ValueError:
val = val.encode('utf-8')
if isinstance(key, unicode_type):
if isinstance(key, str):
key = key.encode('ascii')
env[key] = val
except:
@@ -164,9 +164,9 @@ class Worker(object):
# Windows cannot handle unicode env vars
for k, v in iteritems(env):
try:
if isinstance(k, unicode_type):
if isinstance(k, str):
k = k.encode('ascii')
if isinstance(v, unicode_type):
if isinstance(v, str):
try:
v = v.encode(filesystem_encoding)
except:

View File

@@ -8,7 +8,7 @@ from ebook_converter.utils.ipc import eintr_retry_call
from ebook_converter.utils.ipc.launch import Worker
from ebook_converter.utils.serialize import msgpack_loads, msgpack_dumps
from ebook_converter.utils.monotonic import monotonic
from ebook_converter.polyglot.builtins import unicode_type, string_or_bytes, environ_item
from ebook_converter.polyglot.builtins import string_or_bytes, environ_item
from ebook_converter.polyglot.binary import as_hex_unicode, from_hex_bytes
@@ -267,7 +267,7 @@ def offload_worker(env={}, priority='normal', cwd=None):
def compile_code(src):
import re, io
if not isinstance(src, unicode_type):
if not isinstance(src, str):
match = re.search(br'coding[:=]\s*([-\w.]+)', src[:200])
enc = match.group(1).decode('utf-8') if match else 'utf-8'
src = src.decode(enc)

View File

@@ -7,7 +7,7 @@ import json
from gettext import GNUTranslations, NullTranslations
import pkg_resources
from ebook_converter.polyglot.builtins import is_py3, iteritems, unicode_type
from ebook_converter.polyglot.builtins import is_py3, iteritems
_available_translations = None
@@ -253,7 +253,7 @@ def calibre_langcode_to_name(lc, localize=True):
def canonicalize_lang(raw):
if not raw:
return None
if not isinstance(raw, unicode_type):
if not isinstance(raw, str):
raw = raw.decode('utf-8', 'ignore')
raw = raw.lower().strip()
if not raw:

View File

@@ -6,7 +6,7 @@ from functools import partial
from threading import Lock
from ebook_converter import isbytestring, force_unicode, as_unicode, prints
from ebook_converter.polyglot.builtins import unicode_type, iteritems
from ebook_converter.polyglot.builtins import iteritems
__license__ = 'GPL 3'
@@ -111,7 +111,7 @@ class UnicodeHTMLStream(HTMLStream):
for arg in args:
if isbytestring(arg):
arg = force_unicode(arg)
elif not isinstance(arg, unicode_type):
elif not isinstance(arg, str):
arg = as_unicode(arg)
self.data.append(arg+sep)
self.plain_text.append(arg+sep)

View File

@@ -1,4 +1,3 @@
from ebook_converter.polyglot.builtins import unicode_type
from ebook_converter.constants import ispy3
@@ -21,7 +20,7 @@ def create_encoder(for_json=False):
def encoder(obj):
if isinstance(obj, datetime):
return encoded(0, unicode_type(obj.isoformat()), ExtType)
return encoded(0, str(obj.isoformat()), ExtType)
if isinstance(obj, (set, frozenset)):
return encoded(1, tuple(obj), ExtType)
if getattr(obj, '__calibre_serializable__', False):

View File

@@ -3,8 +3,6 @@ Generate UUID encoded using a user specified alphabet.
'''
import string, math, uuid as _uuid
from ebook_converter.polyglot.builtins import unicode_type
def num_to_string(number, alphabet, alphabet_len, pad_to_length=None):
ans = []
@@ -30,7 +28,7 @@ class ShortUUID(object):
# We do not include zero and one in the default alphabet as they can be
# confused with the letters O and I in some fonts. And removing them
# does not change the uuid_pad_len.
self.alphabet = tuple(sorted(unicode_type(alphabet or (string.digits + string.ascii_letters)[2:])))
self.alphabet = tuple(sorted(str(alphabet or (string.digits + string.ascii_letters)[2:])))
self.alphabet_len = len(self.alphabet)
self.alphabet_map = {c:i for i, c in enumerate(self.alphabet)}
self.uuid_pad_len = int(math.ceil(math.log(1 << 128, self.alphabet_len)))

View File

@@ -1,7 +1,5 @@
import os
from ebook_converter.polyglot.builtins import unicode_type
class ReadOnlyFileBuffer(object):
@@ -64,7 +62,7 @@ def svg_path_to_painter_path(d):
# x1/y1 and x2/y2 = bezier control points
x = y = x1 = y1 = x2 = y2 = 0
if isinstance(d, unicode_type):
if isinstance(d, str):
d = d.encode('ascii')
d = d.replace(b',', b' ').replace(b'\n', b' ')
end = len(d)

View File

@@ -7,7 +7,6 @@ License: http://www.opensource.org/licenses/mit-license.php
import re
from ebook_converter.utils.icu import capitalize, upper
from ebook_converter.polyglot.builtins import unicode_type
__all__ = ['titlecase']
@@ -19,7 +18,7 @@ PUNCT = r"""!"#$%&'()*+,\-‒–—―./:;?@[\\\]_`{|}~"""
SMALL_WORDS = re.compile(r'^(%s)$' % SMALL, re.I)
INLINE_PERIOD = re.compile(r'[a-z][.][a-z]', re.I)
UC_ELSEWHERE = re.compile(r'[%s]*?[a-zA-Z]+[A-Z]+?' % PUNCT)
CAPFIRST = re.compile(unicode_type(r"^[%s]*?(\w)" % PUNCT), flags=re.UNICODE)
CAPFIRST = re.compile(str(r"^[%s]*?(\w)" % PUNCT), flags=re.UNICODE)
SMALL_FIRST = re.compile(r'^([%s]*)(%s)\b' % (PUNCT, SMALL), re.I|re.U)
SMALL_LAST = re.compile(r'\b(%s)[%s]?$' % (SMALL, PUNCT), re.I|re.U)
SMALL_AFTER_NUM = re.compile(r'(\d+\s+)(a|an|the)\b', re.I|re.U)

View File

@@ -10,7 +10,7 @@ from tempfile import SpooledTemporaryFile
from ebook_converter import sanitize_file_name
from ebook_converter.constants import filesystem_encoding
from ebook_converter.ebooks.chardet import detect
from ebook_converter.polyglot.builtins import unicode_type, string_or_bytes, getcwd, as_bytes
from ebook_converter.polyglot.builtins import string_or_bytes, getcwd, as_bytes
try:
import zlib # We may need its compression method
@@ -143,7 +143,7 @@ _CD64_OFFSET_START_CENTDIR = 9
def decode_arcname(name):
if not isinstance(name, unicode_type):
if not isinstance(name, str):
try:
name = name.decode('utf-8')
except Exception:
@@ -395,7 +395,7 @@ class ZipInfo (object):
return header + filename + extra
def _encodeFilenameFlags(self):
if isinstance(self.filename, unicode_type):
if isinstance(self.filename, str):
return self.filename.encode('utf-8'), self.flag_bits | 0x800
else:
return self.filename, self.flag_bits
@@ -1214,7 +1214,7 @@ class ZipFile:
arcname = os.path.normpath(os.path.splitdrive(arcname)[1])
while arcname[0] in (os.sep, os.altsep):
arcname = arcname[1:]
if not isinstance(arcname, unicode_type):
if not isinstance(arcname, str):
arcname = arcname.decode(filesystem_encoding)
if isdir and not arcname.endswith('/'):
arcname += '/'
@@ -1292,7 +1292,7 @@ class ZipFile:
if not isinstance(byts, bytes):
byts = byts.encode('utf-8')
if not isinstance(zinfo_or_arcname, ZipInfo):
if not isinstance(zinfo_or_arcname, unicode_type):
if not isinstance(zinfo_or_arcname, str):
zinfo_or_arcname = zinfo_or_arcname.decode(filesystem_encoding)
zinfo = ZipInfo(filename=zinfo_or_arcname,
date_time=time.localtime(time.time())[:6])
@@ -1496,7 +1496,7 @@ def safe_replace(zipstream, name, datastream, extra_replacements={},
with SpooledTemporaryFile(max_size=100*1024*1024) as temp:
ztemp = ZipFile(temp, 'w')
for obj in z.infolist():
if isinstance(obj.filename, unicode_type):
if isinstance(obj.filename, str):
obj.flag_bits |= 0x16 # Set isUTF-8 bit
if obj.filename in names:
ztemp.writestr(obj, rbytes(obj.filename))