Removed polyglots unicode_type usage

2020-04-20 19:25:28 +02:00
parent ef7e2b10be
commit 128705f258
130 changed files with 657 additions and 716 deletions
@@ -11,7 +11,7 @@ from ebook_converter.ebooks.oeb.stylizer import Stylizer
 from ebook_converter.ebooks.oeb.transforms.flatcss import KeyMapper
 from ebook_converter.ebooks.mobi.utils import convert_color_for_font_tag
 from ebook_converter.utils.imghdr import identify
-from ebook_converter.polyglot.builtins import unicode_type, string_or_bytes
+from ebook_converter.polyglot.builtins import string_or_bytes


 __license__ = 'GPL v3'
@@ -151,7 +151,7 @@ class MobiMLizer(object):
        return "%dem" % int(round(ptsize / embase))

    def preize_text(self, text, pre_wrap=False):
-        text = unicode_type(text)
+        text = str(text)
        if pre_wrap:
            # Replace n consecutive spaces with n-1 NBSP + space
            text = re.sub(r' {2,}', lambda m:('\xa0'*(len(m.group())-1) + ' '), text)
@@ -199,7 +199,7 @@ class MobiMLizer(object):
                bstate.nested.append(para)
                if tag == 'li' and len(istates) > 1:
                    istates[-2].list_num += 1
-                    para.attrib['value'] = unicode_type(istates[-2].list_num)
+                    para.attrib['value'] = str(istates[-2].list_num)
            elif tag in NESTABLE_TAGS and istate.rendered:
                para = wrapper = bstate.nested[-1]
            elif not self.opts.mobi_ignore_margins and left > 0 and indent >= 0:
@@ -228,7 +228,7 @@ class MobiMLizer(object):
                while vspace > 0:
                    wrapper.addprevious(etree.Element(XHTML('br')))
                    vspace -= 1
-            if istate.halign != 'auto' and isinstance(istate.halign, (bytes, unicode_type)):
+            if istate.halign != 'auto' and isinstance(istate.halign, (bytes, str)):
                if isinstance(istate.halign, bytes):
                    istate.halign = istate.halign.decode('utf-8')
                para.attrib['align'] = istate.halign
@@ -285,7 +285,7 @@ class MobiMLizer(object):

            if fsize != 3:
                inline = etree.SubElement(inline, XHTML('font'),
-                                          size=unicode_type(fsize))
+                                          size=str(fsize))
            if istate.family == 'monospace':
                inline = etree.SubElement(inline, XHTML('tt'))
            if istate.italic:
@@ -447,7 +447,7 @@ class MobiMLizer(object):
                                (72/self.profile.dpi)))
                        except:
                            continue
-                        result = unicode_type(pixs)
+                        result = str(pixs)
                    istate.attrib[prop] = result
            if 'width' not in istate.attrib or 'height' not in istate.attrib:
                href = self.current_spine_item.abshref(elem.attrib['src'])
@@ -464,8 +464,8 @@ class MobiMLizer(object):
                    else:
                        if 'width' not in istate.attrib and 'height' not in \
                                    istate.attrib:
-                            istate.attrib['width'] = unicode_type(width)
-                            istate.attrib['height'] = unicode_type(height)
+                            istate.attrib['width'] = str(width)
+                            istate.attrib['height'] = str(height)
                        else:
                            ar = width / height
                            if 'width' not in istate.attrib:
@@ -473,13 +473,13 @@ class MobiMLizer(object):
                                    width = int(istate.attrib['height'])*ar
                                except:
                                    pass
-                                istate.attrib['width'] = unicode_type(int(width))
+                                istate.attrib['width'] = str(int(width))
                            else:
                                try:
                                    height = int(istate.attrib['width'])/ar
                                except:
                                    pass
-                                istate.attrib['height'] = unicode_type(int(height))
+                                istate.attrib['height'] = str(int(height))
                        item.unload_data_from_memory()
        elif tag == 'hr' and asfloat(style['width']) > 0 and style._get('width') not in {'100%', 'auto'}:
            raww = style._get('width')
@@ -8,7 +8,6 @@ from ebook_converter.ebooks.mobi.langcodes import main_language, sub_language, m
 from ebook_converter.utils.cleantext import clean_ascii_chars, clean_xml_chars
 from ebook_converter.utils.localization import canonicalize_lang
 from ebook_converter.utils.config_base import tweaks
-from ebook_converter.polyglot.builtins import unicode_type


 __license__ = 'GPL v3'
@@ -245,7 +244,7 @@ class BookHeader(object):

            self.exth_flag, = struct.unpack('>L', raw[0x80:0x84])
            self.exth = None
-            if not isinstance(self.title, unicode_type):
+            if not isinstance(self.title, str):
                self.title = self.title.decode(self.codec, 'replace')
            if self.exth_flag & 0x40:
                try:
@@ -1,7 +1,6 @@
 import re, os

 from ebook_converter.ebooks.chardet import strip_encoding_declarations
-from ebook_converter.polyglot.builtins import unicode_type


 __license__ = 'GPL v3'
@@ -127,7 +126,7 @@ def update_flow_links(mobi8_reader, resource_map, log):
            flows.append(flow)
            continue

-        if not isinstance(flow, unicode_type):
+        if not isinstance(flow, str):
            try:
                flow = flow.decode(mr.header.codec)
            except UnicodeDecodeError:
@@ -16,7 +16,7 @@ from ebook_converter.ebooks.metadata.toc import TOC
 from ebook_converter.ebooks.mobi.reader.headers import BookHeader
 from ebook_converter.utils.img import save_cover_data_to, gif_data_to_png_data, AnimatedGIF
 from ebook_converter.utils.imghdr import what
-from ebook_converter.polyglot.builtins import iteritems, unicode_type
+from ebook_converter.polyglot.builtins import iteritems


 __license__ = 'GPL v3'
@@ -287,7 +287,7 @@ class MobiReader(object):
            pass

        def write_as_utf8(path, data):
-            if isinstance(data, unicode_type):
+            if isinstance(data, str):
                data = data.encode('utf-8')
            with lopen(path, 'wb') as f:
                f.write(data)
@@ -18,7 +18,7 @@ from ebook_converter.ebooks.metadata.toc import TOC
 from ebook_converter.ebooks.mobi.utils import read_font_record
 from ebook_converter.ebooks.oeb.parse_utils import parse_html
 from ebook_converter.ebooks.oeb.base import XPath, XHTML, xml2text
-from ebook_converter.polyglot.builtins import unicode_type, getcwd, as_unicode
+from ebook_converter.polyglot.builtins import getcwd, as_unicode


 __license__ = 'GPL v3'
@@ -224,7 +224,7 @@ class Mobi8Reader(object):
            self.parts.append(skeleton)
            if divcnt < 1:
                # Empty file
-                aidtext = unicode_type(uuid.uuid4())
+                aidtext = str(uuid.uuid4())
                filename = aidtext + '.html'
            self.partinfo.append(Part(skelnum, 'text', filename, skelpos,
                baseptr, aidtext))
@@ -5,7 +5,7 @@ from io import BytesIO
 from ebook_converter.utils.img import save_cover_data_to, scale_image, image_to_data, image_from_data, resize_image, png_data_to_gif_data
 from ebook_converter.utils.imghdr import what
 from ebook_converter.ebooks import normalize
-from ebook_converter.polyglot.builtins import unicode_type, as_bytes
+from ebook_converter.polyglot.builtins import as_bytes
 from ebook_converter.tinycss.color3 import parse_color_string


@@ -20,17 +20,17 @@ RECORD_SIZE = 0x1000  # 4096 (Text record size (uncompressed))
 class PolyglotDict(dict):

    def __setitem__(self, key, val):
-        if isinstance(key, unicode_type):
+        if isinstance(key, str):
            key = key.encode('utf-8')
        dict.__setitem__(self, key, val)

    def __getitem__(self, key):
-        if isinstance(key, unicode_type):
+        if isinstance(key, str):
            key = key.encode('utf-8')
        return dict.__getitem__(self, key)

    def __contains__(self, key):
-        if isinstance(key, unicode_type):
+        if isinstance(key, str):
            key = key.encode('utf-8')
        return dict.__contains__(self, key)

@@ -332,7 +332,7 @@ def utf8_text(text):
    '''
    if text and text.strip():
        text = text.strip()
-        if not isinstance(text, unicode_type):
+        if not isinstance(text, str):
            text = text.decode('utf-8', 'replace')
        text = normalize(text).encode('utf-8')
    else:
@@ -635,7 +635,7 @@ def is_guide_ref_start(ref):


 def convert_color_for_font_tag(val):
-    rgba = parse_color_string(unicode_type(val or ''))
+    rgba = parse_color_string(str(val or ''))
    if rgba is None or rgba == 'currentColor':
        return val
    clamp = lambda x: min(x, max(0, x), 1)
@@ -10,7 +10,7 @@ from ebook_converter.ebooks.mobi.writer2 import (PALMDOC, UNCOMPRESSED)
 from ebook_converter.ebooks.mobi.utils import (encint, encode_trailing_data,
        align_block, detect_periodical, RECORD_SIZE, create_text_record)
 from ebook_converter.ebooks.mobi.writer2.indexer import Indexer
-from ebook_converter.polyglot.builtins import iteritems, unicode_type
+from ebook_converter.polyglot.builtins import iteritems


 __license__ = 'GPL v3'
@@ -48,7 +48,7 @@ class MobiWriter(object):
        self.log = oeb.log
        pt = None
        if oeb.metadata.publication_type:
-            x = unicode_type(oeb.metadata.publication_type[0]).split(':')
+            x = str(oeb.metadata.publication_type[0]).split(':')
            if len(x) > 1:
                pt = x[1].lower()
        self.publication_type = pt
@@ -235,7 +235,7 @@ class MobiWriter(object):
            0  # Unused
        ))  # 0 - 15 (0x0 - 0xf)
        uid = random.randint(0, 0xffffffff)
-        title = normalize(unicode_type(metadata.title[0])).encode('utf-8')
+        title = normalize(str(metadata.title[0])).encode('utf-8')

        # 0x0 - 0x3
        record0.write(b'MOBI')
@@ -278,7 +278,7 @@ class MobiWriter(object):

        # 0x4c - 0x4f : Language specifier
        record0.write(iana2mobi(
-            unicode_type(metadata.language[0])))
+            str(metadata.language[0])))

        # 0x50 - 0x57 : Input language and Output language
        record0.write(b'\0' * 8)
@@ -455,7 +455,7 @@ class MobiWriter(object):
        '''
        Write the PalmDB header
        '''
-        title = ascii_filename(unicode_type(self.oeb.metadata.title[0])).replace(
+        title = ascii_filename(str(self.oeb.metadata.title[0])).replace(
                ' ', '_')
        if not isinstance(title, bytes):
            title = title.encode('ascii')
@@ -8,7 +8,7 @@ from ebook_converter.ebooks import generate_masthead
 from ebook_converter.ebooks.oeb.base import OEB_RASTER_IMAGES
 from ebook_converter.ptempfile import PersistentTemporaryFile
 from ebook_converter.utils.imghdr import what
-from ebook_converter.polyglot.builtins import iteritems, unicode_type
+from ebook_converter.polyglot.builtins import iteritems


 __license__ = 'GPL v3'
@@ -79,7 +79,7 @@ class Resources(object):
            self.image_indices.add(0)
        elif self.is_periodical:
            # Generate a default masthead
-            data = generate_masthead(unicode_type(self.oeb.metadata['title'][0]))
+            data = generate_masthead(str(self.oeb.metadata['title'][0]))
            self.records.append(data)
            self.used_image_indices.add(0)
            self.image_indices.add(0)
@@ -87,8 +87,8 @@ class Resources(object):

        cover_href = self.cover_offset = self.thumbnail_offset = None
        if (oeb.metadata.cover and
-                unicode_type(oeb.metadata.cover[0]) in oeb.manifest.ids):
-            cover_id = unicode_type(oeb.metadata.cover[0])
+                str(oeb.metadata.cover[0]) in oeb.manifest.ids):
+            cover_id = str(oeb.metadata.cover[0])
            item = oeb.manifest.ids[cover_id]
            cover_href = item.href

@@ -9,7 +9,7 @@ from ebook_converter.ebooks.mobi.utils import is_guide_ref_start
 from ebook_converter.ebooks.oeb.base import (
    OEB_DOCS, XHTML, XHTML_NS, XML_NS, namespace, prefixname, urlnormalize
 )
-from ebook_converter.polyglot.builtins import unicode_type, string_or_bytes
+from ebook_converter.polyglot.builtins import string_or_bytes


 __license__ = 'GPL v3'
@@ -20,7 +20,7 @@ __docformat__ = 'restructuredtext en'
 class Buf(io.BytesIO):

    def write(self, x):
-        if isinstance(x, unicode_type):
+        if isinstance(x, str):
            x = x.encode('utf-8')
        io.BytesIO.write(self, x)

@@ -226,7 +226,7 @@ class Serializer(object):
                buf.write(b'<div> <div height="1em"></div>')
            else:
                t = tocref.title
-                if isinstance(t, unicode_type):
+                if isinstance(t, str):
                    t = t.encode('utf-8')
                buf.write(b'<div></div> <div> <h2 height="1em"><font size="+2"><b>' + t +
                          b'</b></font></h2> <div height="1em"></div>')
@@ -246,7 +246,7 @@ class Serializer(object):
                buf.write(b'0000000000')
                buf.write(b' ><font size="+1"><b><u>')
                t = tocitem.title
-                if isinstance(t, unicode_type):
+                if isinstance(t, str):
                    t = t.encode('utf-8')
                buf.write(t)
                buf.write(b'</u></b></font></a></li>')
@@ -364,7 +364,7 @@ class Serializer(object):
        text = text.replace(u'\u00AD', '')  # Soft-hyphen
        if quot:
            text = text.replace('"', '&quot;')
-        if isinstance(text, unicode_type):
+        if isinstance(text, str):
            text = unicodedata.normalize('NFC', text)
        self.buf.write(text.encode('utf-8'))

@@ -6,7 +6,7 @@ from ebook_converter.constants import iswindows, isosx
 from ebook_converter.ebooks.mobi.utils import (utf8_text, to_base)
 from ebook_converter.utils.localization import lang_as_iso639_1
 from ebook_converter.ebooks.metadata import authors_to_sort_string
-from ebook_converter.polyglot.builtins import iteritems, unicode_type
+from ebook_converter.polyglot.builtins import iteritems


 __license__ = 'GPL v3'
@@ -59,14 +59,14 @@ def build_exth(metadata, prefer_author_sort=False, is_periodical=False,
        items = metadata[term]
        if term == 'creator':
            if prefer_author_sort:
-                creators = [authors_to_sort_string([unicode_type(c)]) for c in
+                creators = [authors_to_sort_string([str(c)]) for c in
                            items]
            else:
-                creators = [unicode_type(c) for c in items]
+                creators = [str(c) for c in items]
            items = creators
        elif term == 'rights':
            try:
-                rights = utf8_text(unicode_type(metadata.rights[0]))
+                rights = utf8_text(str(metadata.rights[0]))
            except:
                rights = b'Unknown'
            exth.write(pack(b'>II', EXTH_CODES['rights'], len(rights) + 8))
@@ -75,7 +75,7 @@ def build_exth(metadata, prefer_author_sort=False, is_periodical=False,
            continue

        for item in items:
-            data = unicode_type(item)
+            data = str(item)
            if term != 'description':
                data = COLLAPSE_RE.sub(' ', data)
            if term == 'identifier':
@@ -99,14 +99,14 @@ def build_exth(metadata, prefer_author_sort=False, is_periodical=False,
    from ebook_converter.ebooks.oeb.base import OPF
    for x in metadata['identifier']:
        if (x.get(OPF('scheme'), None).lower() == 'uuid' or
-                unicode_type(x).startswith('urn:uuid:')):
-            uuid = unicode_type(x).split(':')[-1]
+                str(x).startswith('urn:uuid:')):
+            uuid = str(x).split(':')[-1]
            break
    if uuid is None:
        from uuid import uuid4
-        uuid = unicode_type(uuid4())
+        uuid = str(uuid4())

-    if isinstance(uuid, unicode_type):
+    if isinstance(uuid, str):
        uuid = uuid.encode('utf-8')
    if not share_not_sync:
        exth.write(pack(b'>II', 113, len(uuid) + 8))
@@ -134,9 +134,9 @@ def build_exth(metadata, prefer_author_sort=False, is_periodical=False,

    # Add a publication date entry
    if metadata['date']:
-        datestr = unicode_type(metadata['date'][0])
+        datestr = str(metadata['date'][0])
    elif metadata['timestamp']:
-        datestr = unicode_type(metadata['timestamp'][0])
+        datestr = str(metadata['timestamp'][0])

    if datestr is None:
        raise ValueError("missing date or timestamp")