1
0
mirror of https://github.com/gryf/ebook-converter.git synced 2026-04-28 17:54:06 +02:00

Removed polyglots unicode_type usage

This commit is contained in:
2020-04-20 19:25:28 +02:00
parent ef7e2b10be
commit 128705f258
130 changed files with 657 additions and 716 deletions
+10 -10
View File
@@ -11,7 +11,7 @@ from ebook_converter.ebooks.oeb.stylizer import Stylizer
from ebook_converter.ebooks.oeb.transforms.flatcss import KeyMapper
from ebook_converter.ebooks.mobi.utils import convert_color_for_font_tag
from ebook_converter.utils.imghdr import identify
from ebook_converter.polyglot.builtins import unicode_type, string_or_bytes
from ebook_converter.polyglot.builtins import string_or_bytes
__license__ = 'GPL v3'
@@ -151,7 +151,7 @@ class MobiMLizer(object):
return "%dem" % int(round(ptsize / embase))
def preize_text(self, text, pre_wrap=False):
text = unicode_type(text)
text = str(text)
if pre_wrap:
# Replace n consecutive spaces with n-1 NBSP + space
text = re.sub(r' {2,}', lambda m:('\xa0'*(len(m.group())-1) + ' '), text)
@@ -199,7 +199,7 @@ class MobiMLizer(object):
bstate.nested.append(para)
if tag == 'li' and len(istates) > 1:
istates[-2].list_num += 1
para.attrib['value'] = unicode_type(istates[-2].list_num)
para.attrib['value'] = str(istates[-2].list_num)
elif tag in NESTABLE_TAGS and istate.rendered:
para = wrapper = bstate.nested[-1]
elif not self.opts.mobi_ignore_margins and left > 0 and indent >= 0:
@@ -228,7 +228,7 @@ class MobiMLizer(object):
while vspace > 0:
wrapper.addprevious(etree.Element(XHTML('br')))
vspace -= 1
if istate.halign != 'auto' and isinstance(istate.halign, (bytes, unicode_type)):
if istate.halign != 'auto' and isinstance(istate.halign, (bytes, str)):
if isinstance(istate.halign, bytes):
istate.halign = istate.halign.decode('utf-8')
para.attrib['align'] = istate.halign
@@ -285,7 +285,7 @@ class MobiMLizer(object):
if fsize != 3:
inline = etree.SubElement(inline, XHTML('font'),
size=unicode_type(fsize))
size=str(fsize))
if istate.family == 'monospace':
inline = etree.SubElement(inline, XHTML('tt'))
if istate.italic:
@@ -447,7 +447,7 @@ class MobiMLizer(object):
(72/self.profile.dpi)))
except:
continue
result = unicode_type(pixs)
result = str(pixs)
istate.attrib[prop] = result
if 'width' not in istate.attrib or 'height' not in istate.attrib:
href = self.current_spine_item.abshref(elem.attrib['src'])
@@ -464,8 +464,8 @@ class MobiMLizer(object):
else:
if 'width' not in istate.attrib and 'height' not in \
istate.attrib:
istate.attrib['width'] = unicode_type(width)
istate.attrib['height'] = unicode_type(height)
istate.attrib['width'] = str(width)
istate.attrib['height'] = str(height)
else:
ar = width / height
if 'width' not in istate.attrib:
@@ -473,13 +473,13 @@ class MobiMLizer(object):
width = int(istate.attrib['height'])*ar
except:
pass
istate.attrib['width'] = unicode_type(int(width))
istate.attrib['width'] = str(int(width))
else:
try:
height = int(istate.attrib['width'])/ar
except:
pass
istate.attrib['height'] = unicode_type(int(height))
istate.attrib['height'] = str(int(height))
item.unload_data_from_memory()
elif tag == 'hr' and asfloat(style['width']) > 0 and style._get('width') not in {'100%', 'auto'}:
raww = style._get('width')
@@ -8,7 +8,6 @@ from ebook_converter.ebooks.mobi.langcodes import main_language, sub_language, m
from ebook_converter.utils.cleantext import clean_ascii_chars, clean_xml_chars
from ebook_converter.utils.localization import canonicalize_lang
from ebook_converter.utils.config_base import tweaks
from ebook_converter.polyglot.builtins import unicode_type
__license__ = 'GPL v3'
@@ -245,7 +244,7 @@ class BookHeader(object):
self.exth_flag, = struct.unpack('>L', raw[0x80:0x84])
self.exth = None
if not isinstance(self.title, unicode_type):
if not isinstance(self.title, str):
self.title = self.title.decode(self.codec, 'replace')
if self.exth_flag & 0x40:
try:
+1 -2
View File
@@ -1,7 +1,6 @@
import re, os
from ebook_converter.ebooks.chardet import strip_encoding_declarations
from ebook_converter.polyglot.builtins import unicode_type
__license__ = 'GPL v3'
@@ -127,7 +126,7 @@ def update_flow_links(mobi8_reader, resource_map, log):
flows.append(flow)
continue
if not isinstance(flow, unicode_type):
if not isinstance(flow, str):
try:
flow = flow.decode(mr.header.codec)
except UnicodeDecodeError:
+2 -2
View File
@@ -16,7 +16,7 @@ from ebook_converter.ebooks.metadata.toc import TOC
from ebook_converter.ebooks.mobi.reader.headers import BookHeader
from ebook_converter.utils.img import save_cover_data_to, gif_data_to_png_data, AnimatedGIF
from ebook_converter.utils.imghdr import what
from ebook_converter.polyglot.builtins import iteritems, unicode_type
from ebook_converter.polyglot.builtins import iteritems
__license__ = 'GPL v3'
@@ -287,7 +287,7 @@ class MobiReader(object):
pass
def write_as_utf8(path, data):
if isinstance(data, unicode_type):
if isinstance(data, str):
data = data.encode('utf-8')
with lopen(path, 'wb') as f:
f.write(data)
+2 -2
View File
@@ -18,7 +18,7 @@ from ebook_converter.ebooks.metadata.toc import TOC
from ebook_converter.ebooks.mobi.utils import read_font_record
from ebook_converter.ebooks.oeb.parse_utils import parse_html
from ebook_converter.ebooks.oeb.base import XPath, XHTML, xml2text
from ebook_converter.polyglot.builtins import unicode_type, getcwd, as_unicode
from ebook_converter.polyglot.builtins import getcwd, as_unicode
__license__ = 'GPL v3'
@@ -224,7 +224,7 @@ class Mobi8Reader(object):
self.parts.append(skeleton)
if divcnt < 1:
# Empty file
aidtext = unicode_type(uuid.uuid4())
aidtext = str(uuid.uuid4())
filename = aidtext + '.html'
self.partinfo.append(Part(skelnum, 'text', filename, skelpos,
baseptr, aidtext))
+6 -6
View File
@@ -5,7 +5,7 @@ from io import BytesIO
from ebook_converter.utils.img import save_cover_data_to, scale_image, image_to_data, image_from_data, resize_image, png_data_to_gif_data
from ebook_converter.utils.imghdr import what
from ebook_converter.ebooks import normalize
from ebook_converter.polyglot.builtins import unicode_type, as_bytes
from ebook_converter.polyglot.builtins import as_bytes
from ebook_converter.tinycss.color3 import parse_color_string
@@ -20,17 +20,17 @@ RECORD_SIZE = 0x1000 # 4096 (Text record size (uncompressed))
class PolyglotDict(dict):
def __setitem__(self, key, val):
if isinstance(key, unicode_type):
if isinstance(key, str):
key = key.encode('utf-8')
dict.__setitem__(self, key, val)
def __getitem__(self, key):
if isinstance(key, unicode_type):
if isinstance(key, str):
key = key.encode('utf-8')
return dict.__getitem__(self, key)
def __contains__(self, key):
if isinstance(key, unicode_type):
if isinstance(key, str):
key = key.encode('utf-8')
return dict.__contains__(self, key)
@@ -332,7 +332,7 @@ def utf8_text(text):
'''
if text and text.strip():
text = text.strip()
if not isinstance(text, unicode_type):
if not isinstance(text, str):
text = text.decode('utf-8', 'replace')
text = normalize(text).encode('utf-8')
else:
@@ -635,7 +635,7 @@ def is_guide_ref_start(ref):
def convert_color_for_font_tag(val):
rgba = parse_color_string(unicode_type(val or ''))
rgba = parse_color_string(str(val or ''))
if rgba is None or rgba == 'currentColor':
return val
clamp = lambda x: min(x, max(0, x), 1)
+5 -5
View File
@@ -10,7 +10,7 @@ from ebook_converter.ebooks.mobi.writer2 import (PALMDOC, UNCOMPRESSED)
from ebook_converter.ebooks.mobi.utils import (encint, encode_trailing_data,
align_block, detect_periodical, RECORD_SIZE, create_text_record)
from ebook_converter.ebooks.mobi.writer2.indexer import Indexer
from ebook_converter.polyglot.builtins import iteritems, unicode_type
from ebook_converter.polyglot.builtins import iteritems
__license__ = 'GPL v3'
@@ -48,7 +48,7 @@ class MobiWriter(object):
self.log = oeb.log
pt = None
if oeb.metadata.publication_type:
x = unicode_type(oeb.metadata.publication_type[0]).split(':')
x = str(oeb.metadata.publication_type[0]).split(':')
if len(x) > 1:
pt = x[1].lower()
self.publication_type = pt
@@ -235,7 +235,7 @@ class MobiWriter(object):
0 # Unused
)) # 0 - 15 (0x0 - 0xf)
uid = random.randint(0, 0xffffffff)
title = normalize(unicode_type(metadata.title[0])).encode('utf-8')
title = normalize(str(metadata.title[0])).encode('utf-8')
# 0x0 - 0x3
record0.write(b'MOBI')
@@ -278,7 +278,7 @@ class MobiWriter(object):
# 0x4c - 0x4f : Language specifier
record0.write(iana2mobi(
unicode_type(metadata.language[0])))
str(metadata.language[0])))
# 0x50 - 0x57 : Input language and Output language
record0.write(b'\0' * 8)
@@ -455,7 +455,7 @@ class MobiWriter(object):
'''
Write the PalmDB header
'''
title = ascii_filename(unicode_type(self.oeb.metadata.title[0])).replace(
title = ascii_filename(str(self.oeb.metadata.title[0])).replace(
' ', '_')
if not isinstance(title, bytes):
title = title.encode('ascii')
@@ -8,7 +8,7 @@ from ebook_converter.ebooks import generate_masthead
from ebook_converter.ebooks.oeb.base import OEB_RASTER_IMAGES
from ebook_converter.ptempfile import PersistentTemporaryFile
from ebook_converter.utils.imghdr import what
from ebook_converter.polyglot.builtins import iteritems, unicode_type
from ebook_converter.polyglot.builtins import iteritems
__license__ = 'GPL v3'
@@ -79,7 +79,7 @@ class Resources(object):
self.image_indices.add(0)
elif self.is_periodical:
# Generate a default masthead
data = generate_masthead(unicode_type(self.oeb.metadata['title'][0]))
data = generate_masthead(str(self.oeb.metadata['title'][0]))
self.records.append(data)
self.used_image_indices.add(0)
self.image_indices.add(0)
@@ -87,8 +87,8 @@ class Resources(object):
cover_href = self.cover_offset = self.thumbnail_offset = None
if (oeb.metadata.cover and
unicode_type(oeb.metadata.cover[0]) in oeb.manifest.ids):
cover_id = unicode_type(oeb.metadata.cover[0])
str(oeb.metadata.cover[0]) in oeb.manifest.ids):
cover_id = str(oeb.metadata.cover[0])
item = oeb.manifest.ids[cover_id]
cover_href = item.href
@@ -9,7 +9,7 @@ from ebook_converter.ebooks.mobi.utils import is_guide_ref_start
from ebook_converter.ebooks.oeb.base import (
OEB_DOCS, XHTML, XHTML_NS, XML_NS, namespace, prefixname, urlnormalize
)
from ebook_converter.polyglot.builtins import unicode_type, string_or_bytes
from ebook_converter.polyglot.builtins import string_or_bytes
__license__ = 'GPL v3'
@@ -20,7 +20,7 @@ __docformat__ = 'restructuredtext en'
class Buf(io.BytesIO):
def write(self, x):
if isinstance(x, unicode_type):
if isinstance(x, str):
x = x.encode('utf-8')
io.BytesIO.write(self, x)
@@ -226,7 +226,7 @@ class Serializer(object):
buf.write(b'<div> <div height="1em"></div>')
else:
t = tocref.title
if isinstance(t, unicode_type):
if isinstance(t, str):
t = t.encode('utf-8')
buf.write(b'<div></div> <div> <h2 height="1em"><font size="+2"><b>' + t +
b'</b></font></h2> <div height="1em"></div>')
@@ -246,7 +246,7 @@ class Serializer(object):
buf.write(b'0000000000')
buf.write(b' ><font size="+1"><b><u>')
t = tocitem.title
if isinstance(t, unicode_type):
if isinstance(t, str):
t = t.encode('utf-8')
buf.write(t)
buf.write(b'</u></b></font></a></li>')
@@ -364,7 +364,7 @@ class Serializer(object):
text = text.replace(u'\u00AD', '') # Soft-hyphen
if quot:
text = text.replace('"', '&quot;')
if isinstance(text, unicode_type):
if isinstance(text, str):
text = unicodedata.normalize('NFC', text)
self.buf.write(text.encode('utf-8'))
+11 -11
View File
@@ -6,7 +6,7 @@ from ebook_converter.constants import iswindows, isosx
from ebook_converter.ebooks.mobi.utils import (utf8_text, to_base)
from ebook_converter.utils.localization import lang_as_iso639_1
from ebook_converter.ebooks.metadata import authors_to_sort_string
from ebook_converter.polyglot.builtins import iteritems, unicode_type
from ebook_converter.polyglot.builtins import iteritems
__license__ = 'GPL v3'
@@ -59,14 +59,14 @@ def build_exth(metadata, prefer_author_sort=False, is_periodical=False,
items = metadata[term]
if term == 'creator':
if prefer_author_sort:
creators = [authors_to_sort_string([unicode_type(c)]) for c in
creators = [authors_to_sort_string([str(c)]) for c in
items]
else:
creators = [unicode_type(c) for c in items]
creators = [str(c) for c in items]
items = creators
elif term == 'rights':
try:
rights = utf8_text(unicode_type(metadata.rights[0]))
rights = utf8_text(str(metadata.rights[0]))
except:
rights = b'Unknown'
exth.write(pack(b'>II', EXTH_CODES['rights'], len(rights) + 8))
@@ -75,7 +75,7 @@ def build_exth(metadata, prefer_author_sort=False, is_periodical=False,
continue
for item in items:
data = unicode_type(item)
data = str(item)
if term != 'description':
data = COLLAPSE_RE.sub(' ', data)
if term == 'identifier':
@@ -99,14 +99,14 @@ def build_exth(metadata, prefer_author_sort=False, is_periodical=False,
from ebook_converter.ebooks.oeb.base import OPF
for x in metadata['identifier']:
if (x.get(OPF('scheme'), None).lower() == 'uuid' or
unicode_type(x).startswith('urn:uuid:')):
uuid = unicode_type(x).split(':')[-1]
str(x).startswith('urn:uuid:')):
uuid = str(x).split(':')[-1]
break
if uuid is None:
from uuid import uuid4
uuid = unicode_type(uuid4())
uuid = str(uuid4())
if isinstance(uuid, unicode_type):
if isinstance(uuid, str):
uuid = uuid.encode('utf-8')
if not share_not_sync:
exth.write(pack(b'>II', 113, len(uuid) + 8))
@@ -134,9 +134,9 @@ def build_exth(metadata, prefer_author_sort=False, is_periodical=False,
# Add a publication date entry
if metadata['date']:
datestr = unicode_type(metadata['date'][0])
datestr = str(metadata['date'][0])
elif metadata['timestamp']:
datestr = unicode_type(metadata['timestamp'][0])
datestr = str(metadata['timestamp'][0])
if datestr is None:
raise ValueError("missing date or timestamp")