Removed polyglots unicode_type usage

2026-04-18 12:03:33 +02:00 · 2020-04-20 19:25:28 +02:00
parent ef7e2b10be
commit 128705f258
130 changed files with 657 additions and 716 deletions
--- a/ebook_converter/ebooks/lrf/html/convert_from.py
+++ b/ebook_converter/ebooks/lrf/html/convert_from.py
@@ -37,7 +37,7 @@ from ebook_converter.ebooks.lrf.pylrs.pylrs import (
    RuledLine, Span, Sub, Sup, TextBlock
 )
 from ebook_converter.ptempfile import PersistentTemporaryFile
-from ebook_converter.polyglot.builtins import getcwd, itervalues, string_or_bytes, unicode_type
+from ebook_converter.polyglot.builtins import getcwd, itervalues, string_or_bytes
 from ebook_converter.polyglot.urllib import unquote

 from PIL import Image as PILImage
@@ -276,7 +276,7 @@ class HTMLConverter(object):
                update_css(npcss, self.override_pcss)

        paths = [os.path.abspath(path) for path in paths]
-        paths = [path.decode(sys.getfilesystemencoding()) if not isinstance(path, unicode_type) else path for path in paths]
+        paths = [path.decode(sys.getfilesystemencoding()) if not isinstance(path, str) else path for path in paths]

        while len(paths) > 0 and self.link_level <= self.link_levels:
            for path in paths:
@@ -356,7 +356,7 @@ class HTMLConverter(object):
                os.makedirs(tdir)
            try:
                with open(os.path.join(tdir, 'html2lrf-verbose.html'), 'wb') as f:
-                    f.write(unicode_type(soup).encode('utf-8'))
+                    f.write(str(soup).encode('utf-8'))
                    self.log.info(_('Written preprocessed HTML to ')+f.name)
            except:
                pass
@@ -389,7 +389,7 @@ class HTMLConverter(object):
        self.log.info(_('\tConverting to BBeB...'))
        self.current_style = {}
        self.page_break_found = False
-        if not isinstance(path, unicode_type):
+        if not isinstance(path, str):
            path = path.decode(sys.getfilesystemencoding())
        self.target_prefix = path
        self.previous_text = '\n'
@@ -399,7 +399,7 @@ class HTMLConverter(object):
    def parse_css(self, style):
        """
        Parse the contents of a <style> tag or .css file.
-        @param style: C{unicode_type(style)} should be the CSS to parse.
+        @param style: C{str(style)} should be the CSS to parse.
        @return: A dictionary with one entry per selector where the key is the
        selector name and the value is a dictionary of properties
        """
@@ -587,7 +587,7 @@ class HTMLConverter(object):
            if isinstance(c, HTMLConverter.IGNORED_TAGS):
                continue
            if isinstance(c, bs4.NavigableString):
-                text += unicode_type(c)
+                text += str(c)
            elif isinstance(c, bs4.Tag):
                if c.name.lower() == 'img' and c.has_attr('alt'):
                    alt_text += c['alt']
@@ -642,7 +642,7 @@ class HTMLConverter(object):
            para, text, path, fragment = link['para'], link['text'], link['path'], link['fragment']
            ascii_text = text

-            if not isinstance(path, unicode_type):
+            if not isinstance(path, str):
                path = path.decode(sys.getfilesystemencoding())
            if path in self.processed_files:
                if path+fragment in self.targets.keys():
@@ -1085,7 +1085,7 @@ class HTMLConverter(object):

        s1, s2 = get('margin'), get('padding')

-        bl = unicode_type(self.current_block.blockStyle.attrs['blockwidth'])+'px'
+        bl = str(self.current_block.blockStyle.attrs['blockwidth'])+'px'

        def set(default, one, two):
            fval = None
@@ -1214,7 +1214,7 @@ class HTMLConverter(object):
                    ans = 120
            if ans is not None:
                ans += int(self.font_delta * 20)
-                ans = unicode_type(ans)
+                ans = str(ans)
            return ans

        family, weight, style, variant = 'serif', 'normal', 'normal', None
@@ -1320,10 +1320,10 @@ class HTMLConverter(object):
    def text_properties(self, tag_css):
        indent = self.book.defaultTextStyle.attrs['parindent']
        if 'text-indent' in tag_css:
-            bl = unicode_type(self.current_block.blockStyle.attrs['blockwidth'])+'px'
+            bl = str(self.current_block.blockStyle.attrs['blockwidth'])+'px'
            if 'em' in tag_css['text-indent']:
                bl = '10pt'
-            indent = self.unit_convert(unicode_type(tag_css['text-indent']), pts=True, base_length=bl)
+            indent = self.unit_convert(str(tag_css['text-indent']), pts=True, base_length=bl)
            if not indent:
                indent = 0
            if indent > 0 and indent < 10 * self.minimum_indent:
@@ -1518,11 +1518,11 @@ class HTMLConverter(object):
                    elif not urllib.parse.urlparse(tag['src'])[0]:
                        self.log.warn('Could not find image: '+tag['src'])
                else:
-                    self.log.debug("Failed to process: %s"%unicode_type(tag))
+                    self.log.debug("Failed to process: %s"%str(tag))
            elif tagname in ['style', 'link']:
                ncss, npcss = {}, {}
                if tagname == 'style':
-                    text = ''.join([unicode_type(i) for i in tag.findAll(text=True)])
+                    text = ''.join([str(i) for i in tag.findAll(text=True)])
                    css, pcss = self.parse_css(text)
                    ncss.update(css)
                    npcss.update(pcss)
@@ -1554,7 +1554,7 @@ class HTMLConverter(object):
                if tag.contents:
                    c = tag.contents[0]
                    if isinstance(c, bs4.NavigableString):
-                        c = unicode_type(c).replace('\r\n', '\n').replace('\r', '\n')
+                        c = str(c).replace('\r\n', '\n').replace('\r', '\n')
                        if c.startswith('\n'):
                            c = c[1:]
                            tag.contents[0] = bs4.NavigableString(c)
@@ -1612,7 +1612,7 @@ class HTMLConverter(object):
                            in_ol = parent.name.lower() == 'ol'
                            break
                        parent = parent.parent
-                    prepend = unicode_type(self.list_counter)+'. ' if in_ol else '\u2022' + ' '
+                    prepend = str(self.list_counter)+'. ' if in_ol else '\u2022' + ' '
                    self.current_para.append(Span(prepend))
                    self.process_children(tag, tag_css, tag_pseudo_css)
                    if in_ol:
@@ -1655,7 +1655,7 @@ class HTMLConverter(object):

                if (self.anchor_ids and tag.has_attr('id')) or (self.book_designer and tag.get('class') in ('title', ['title'])):
                    if not tag.has_attr('id'):
-                        tag['id'] = __appname__+'_id_'+unicode_type(self.id_counter)
+                        tag['id'] = __appname__+'_id_'+str(self.id_counter)
                        self.id_counter += 1

                    tkey = self.target_prefix+tag['id']
@@ -1728,7 +1728,7 @@ class HTMLConverter(object):
                except Exception as err:
                    self.log.warning(_('An error occurred while processing a table: %s. Ignoring table markup.')%repr(err))
                    self.log.exception('')
-                    self.log.debug(_('Bad table:\n%s')%unicode_type(tag)[:300])
+                    self.log.debug(_('Bad table:\n%s')%str(tag)[:300])
                    self.in_table = False
                    self.process_children(tag, tag_css, tag_pseudo_css)
                finally:
@@ -1824,9 +1824,9 @@ def process_file(path, options, logger):

    for prop in ('author', 'author_sort', 'title', 'title_sort', 'publisher', 'freetext'):
        val = getattr(options, prop, None)
-        if val and not isinstance(val, unicode_type):
+        if val and not isinstance(val, str):
            soup = html5_parser(val)
-            setattr(options, prop, unicode_type(soup))
+            setattr(options, prop, str(soup))

    title = (options.title, options.title_sort)
    author = (options.author, options.author_sort)
@@ -1870,7 +1870,7 @@ def process_file(path, options, logger):
    options.force_page_break = fpb
    options.link_exclude = le
    options.page_break = pb
-    if not isinstance(options.chapter_regex, unicode_type):
+    if not isinstance(options.chapter_regex, str):
        options.chapter_regex = options.chapter_regex.decode(preferred_encoding)
    options.chapter_regex = re.compile(options.chapter_regex, re.IGNORECASE)
    fpba = options.force_page_break_attr.split(',')
--- a/ebook_converter/ebooks/lrf/pylrs/elements.py
+++ b/ebook_converter/ebooks/lrf/pylrs/elements.py
@@ -1,7 +1,7 @@
 """
 elements.py -- replacements and helpers for ElementTree
 """
-from ebook_converter.polyglot.builtins import unicode_type, string_or_bytes
+from ebook_converter.polyglot.builtins import string_or_bytes


 class ElementWriter(object):
@@ -24,9 +24,9 @@ class ElementWriter(object):
        return text

    def _writeAttribute(self, f, name, value):
-        f.write(' %s="' % unicode_type(name))
+        f.write(' %s="' % str(name))
        if not isinstance(value, string_or_bytes):
-            value = unicode_type(value)
+            value = str(value)
        value = self._encodeCdata(value)
        value = value.replace('"', '&quot;')
        f.write(value)
@@ -37,7 +37,7 @@ class ElementWriter(object):
        f.write(text)

    def _write(self, f, e):
-        f.write('<' + unicode_type(e.tag))
+        f.write('<' + str(e.tag))

        attributes = e.items()
        attributes.sort()
--- a/ebook_converter/ebooks/lrf/pylrs/pylrf.py
+++ b/ebook_converter/ebooks/lrf/pylrs/pylrf.py
@@ -9,7 +9,7 @@ import codecs
 import os

 from .pylrfopt import tagListOptimizer
-from ebook_converter.polyglot.builtins import iteritems, string_or_bytes, unicode_type
+from ebook_converter.polyglot.builtins import iteritems, string_or_bytes

 PYLRF_VERSION = "1.0"

@@ -82,7 +82,7 @@ def writeWord(f, word):
    if int(word) > 65535:
        raise LrfError('Cannot encode a number greater than 65535 in a word.')
    if int(word) < 0:
-        raise LrfError('Cannot encode a number < 0 in a word: '+unicode_type(word))
+        raise LrfError('Cannot encode a number < 0 in a word: '+str(word))
    f.write(struct.pack("<H", int(word)))


@@ -508,7 +508,7 @@ class LrfObject(object):
            raise LrfError("object name %s not recognized" % name)

    def __str__(self):
-        return 'LRFObject: ' + self.name + ", " + unicode_type(self.objId)
+        return 'LRFObject: ' + self.name + ", " + str(self.objId)

    def appendLrfTag(self, tag):
        self.tags.append(tag)
--- a/ebook_converter/ebooks/lrf/pylrs/pylrs.py
+++ b/ebook_converter/ebooks/lrf/pylrs/pylrs.py
@@ -51,7 +51,7 @@ DEFAULT_GENREADING      = "fs"          # default is yes to both lrf and lrs

 from ebook_converter import __appname__, __version__
 from ebook_converter import entity_to_unicode
-from ebook_converter.polyglot.builtins import string_or_bytes, unicode_type, iteritems, native_string_type
+from ebook_converter.polyglot.builtins import string_or_bytes, iteritems, native_string_type


 class LrsError(Exception):
@@ -226,7 +226,7 @@ class LrsAttributes(object):
                raise LrsError("%s does not support setting %s" %
                        (self.__class__.__name__, name))
            if isinstance(value, int):
-                value = unicode_type(value)
+                value = str(value)
            self.attrs[name] = value


@@ -330,13 +330,13 @@ class LrsObject(object):
    def lrsObjectElement(self, name, objlabel="objlabel", labelName=None,
            labelDecorate=True, **settings):
        element = Element(name)
-        element.attrib["objid"] = unicode_type(self.objId)
+        element.attrib["objid"] = str(self.objId)
        if labelName is None:
            labelName = name
        if labelDecorate:
            label = "%s.%d" % (labelName, self.objId)
        else:
-            label = unicode_type(self.objId)
+            label = str(self.objId)
        element.attrib[objlabel] = label
        element.attrib.update(settings)
        return element
@@ -562,7 +562,7 @@ class Book(Delegator):
        factor = base_font_size / old_base_font_size

        def rescale(old):
-            return unicode_type(int(int(old) * factor))
+            return str(int(int(old) * factor))

        text_blocks = list(main.get_all(lambda x: isinstance(x, TextBlock)))
        for tb in text_blocks:
@@ -693,7 +693,7 @@ class TableOfContents(object):
    def addTocEntry(self, tocLabel, textBlock):
        if not isinstance(textBlock, (Canvas, TextBlock, ImageBlock, RuledLine)):
            raise LrsError("TOC destination must be a Canvas, TextBlock, ImageBlock or RuledLine"+
-                            " not a " + unicode_type(type(textBlock)))
+                            " not a " + str(type(textBlock)))

        if textBlock.parent is None:
            raise LrsError("TOC text block must be already appended to a page")
@@ -743,8 +743,8 @@ class TocLabel(object):

    def toElement(self, se):
        return ElementWithText("TocLabel", self.label,
-                 refobj=unicode_type(self.textBlock.objId),
-                 refpage=unicode_type(self.textBlock.parent.objId))
+                 refobj=str(self.textBlock.objId),
+                 refpage=str(self.textBlock.parent.objId))


 class BookInfo(object):
@@ -805,7 +805,7 @@ class DocInfo(object):
        self.thumbnail = None
        self.language = "en"
        self.creator  = None
-        self.creationdate = unicode_type(isoformat(date.today()))
+        self.creationdate = str(isoformat(date.today()))
        self.producer = "%s v%s"%(__appname__, __version__)
        self.numberofpages = "0"

@@ -829,7 +829,7 @@ class DocInfo(object):
        docInfo.append(ElementWithText("Creator", self.creator))
        docInfo.append(ElementWithText("CreationDate", self.creationdate))
        docInfo.append(ElementWithText("Producer", self.producer))
-        docInfo.append(ElementWithText("SumPage", unicode_type(self.numberofpages)))
+        docInfo.append(ElementWithText("SumPage", str(self.numberofpages)))
        return docInfo


@@ -1091,7 +1091,7 @@ class LrsStyle(LrsObject, LrsAttributes, LrsContainer):
        self.elementName = elementName
        self.objectsAppended = False
        # self.label = "%s.%d" % (elementName, self.objId)
-        # self.label = unicode_type(self.objId)
+        # self.label = str(self.objId)
        # self.parent = None

    def update(self, settings):
@@ -1101,11 +1101,11 @@ class LrsStyle(LrsObject, LrsAttributes, LrsContainer):
            self.attrs[name] = value

    def getLabel(self):
-        return unicode_type(self.objId)
+        return str(self.objId)

    def toElement(self, se):
        element = Element(self.elementName, stylelabel=self.getLabel(),
-                objid=unicode_type(self.objId))
+                objid=str(self.objId))
        element.attrib.update(self.attrs)
        return element

@@ -1236,14 +1236,14 @@ class PageStyle(LrsStyle):
            del settings[evenbase]
            if evenObj.parent is None:
                parent.append(evenObj)
-            settings[evenbase + "id"] = unicode_type(evenObj.objId)
+            settings[evenbase + "id"] = str(evenObj.objId)

        if oddbase in settings:
            oddObj = settings[oddbase]
            del settings[oddbase]
            if oddObj.parent is None:
                parent.append(oddObj)
-            settings[oddbase + "id"] = unicode_type(oddObj.objId)
+            settings[oddbase + "id"] = str(oddObj.objId)

    def appendReferencedObjects(self, parent):
        if self.objectsAppended:
@@ -1486,7 +1486,7 @@ class Paragraph(LrsContainer):

    def __init__(self, text=None):
        LrsContainer.__init__(self, [Text, CR, DropCaps, CharButton,
-                                     LrsSimpleChar1, bytes, unicode_type])
+                                     LrsSimpleChar1, bytes, str])
        if text is not None:
            if isinstance(text, string_or_bytes):
                text = Text(text)
@@ -1521,7 +1521,7 @@ class Paragraph(LrsContainer):
 class LrsTextTag(LrsContainer):

    def __init__(self, text, validContents):
-        LrsContainer.__init__(self, [Text, bytes, unicode_type] + validContents)
+        LrsContainer.__init__(self, [Text, bytes, str] + validContents)
        if text is not None:
            self.append(text)

@@ -1580,7 +1580,7 @@ class DropCaps(LrsTextTag):
        return self.text is None or not self.text.strip()

    def toElement(self, se):
-        elem =  Element('DrawChar', line=unicode_type(self.line))
+        elem =  Element('DrawChar', line=str(self.line))
        appendTextElements(elem, self.contents, se)
        return elem

@@ -1656,7 +1656,7 @@ class JumpTo(LrsContainer):
        self.textBlock = textBlock

    def toElement(self, se):
-        return Element("JumpTo", refpage=unicode_type(self.textBlock.parent.objId), refobj=unicode_type(self.textBlock.objId))
+        return Element("JumpTo", refpage=str(self.textBlock.parent.objId), refobj=str(self.textBlock.objId))


 class Plot(LrsSimpleChar1, LrsContainer):
@@ -1688,8 +1688,8 @@ class Plot(LrsSimpleChar1, LrsContainer):
            parent.append(self.obj)

    def toElement(self, se):
-        elem =  Element('Plot', xsize=unicode_type(self.xsize), ysize=unicode_type(self.ysize),
-                                refobj=unicode_type(self.obj.objId))
+        elem =  Element('Plot', xsize=str(self.xsize), ysize=str(self.ysize),
+                                refobj=str(self.obj.objId))
        if self.adjustment:
            elem.set('adjustment', self.adjustment)
        return elem
@@ -1771,7 +1771,7 @@ class Space(LrsSimpleChar1, LrsContainer):
        if self.xsize == 0:
            return

-        return Element("Space", xsize=unicode_type(self.xsize))
+        return Element("Space", xsize=str(self.xsize))

    def toLrfContainer(self, lrfWriter, container):
        if self.xsize != 0:
@@ -1785,7 +1785,7 @@ class Box(LrsSimpleChar1, LrsContainer):
    """

    def __init__(self, linetype="solid"):
-        LrsContainer.__init__(self, [Text, bytes, unicode_type])
+        LrsContainer.__init__(self, [Text, bytes, str])
        if linetype not in LINE_TYPE_ENCODING:
            raise LrsError(linetype + " is not a valid line type")
        self.linetype = linetype
@@ -1805,7 +1805,7 @@ class Box(LrsSimpleChar1, LrsContainer):
 class Span(LrsSimpleChar1, LrsContainer):

    def __init__(self, text=None, **attrs):
-        LrsContainer.__init__(self, [LrsSimpleChar1, Text, bytes, unicode_type])
+        LrsContainer.__init__(self, [LrsSimpleChar1, Text, bytes, str])
        if text is not None:
            if isinstance(text, string_or_bytes):
                text = Text(text)
@@ -1858,7 +1858,7 @@ class Span(LrsSimpleChar1, LrsContainer):
    def toElement(self, se):
        element = Element('Span')
        for (key, value) in self.attrs.items():
-            element.set(key, unicode_type(value))
+            element.set(key, str(value))

        appendTextElements(element, self.contents, se)
        return element
@@ -1871,9 +1871,9 @@ class EmpLine(LrsTextTag, LrsSimpleChar1):
    def __init__(self, text=None, emplineposition='before', emplinetype='solid'):
        LrsTextTag.__init__(self, text, [LrsSimpleChar1])
        if emplineposition not in self.__class__.emplinepositions:
-            raise LrsError('emplineposition for an EmpLine must be one of: '+unicode_type(self.__class__.emplinepositions))
+            raise LrsError('emplineposition for an EmpLine must be one of: '+str(self.__class__.emplinepositions))
        if emplinetype not in self.__class__.emplinetypes:
-            raise LrsError('emplinetype for an EmpLine must be one of: '+unicode_type(self.__class__.emplinetypes))
+            raise LrsError('emplinetype for an EmpLine must be one of: '+str(self.__class__.emplinetypes))

        self.emplinetype     = emplinetype
        self.emplineposition = emplineposition
@@ -1933,9 +1933,9 @@ class BlockSpace(LrsContainer):
        element = Element("BlockSpace")

        if self.xspace != 0:
-            element.attrib["xspace"] = unicode_type(self.xspace)
+            element.attrib["xspace"] = str(self.xspace)
        if self.yspace != 0:
-            element.attrib["yspace"] = unicode_type(self.yspace)
+            element.attrib["yspace"] = str(self.yspace)

        return element

@@ -1949,7 +1949,7 @@ class CharButton(LrsSimpleChar1, LrsContainer):
    """

    def __init__(self, button, text=None):
-        LrsContainer.__init__(self, [bytes, unicode_type, Text, LrsSimpleChar1])
+        LrsContainer.__init__(self, [bytes, str, Text, LrsSimpleChar1])
        self.button = None
        if button is not None:
            self.setButton(button)
@@ -1979,7 +1979,7 @@ class CharButton(LrsSimpleChar1, LrsContainer):
        container.appendLrfTag(LrfTag("CharButtonEnd"))

    def toElement(self, se):
-        cb = Element("CharButton", refobj=unicode_type(self.button.objId))
+        cb = Element("CharButton", refobj=str(self.button.objId))
        appendTextElements(cb, self.contents, se)
        return cb

@@ -2081,8 +2081,8 @@ class JumpButton(LrsObject, LrsContainer):
        b = self.lrsObjectElement("Button")
        pb = SubElement(b, "PushButton")
        SubElement(pb, "JumpTo",
-            refpage=unicode_type(self.textBlock.parent.objId),
-            refobj=unicode_type(self.textBlock.objId))
+            refpage=str(self.textBlock.parent.objId),
+            refobj=str(self.textBlock.objId))
        return b


@@ -2230,8 +2230,8 @@ class PutObj(LrsContainer):
            self.content.objId)))

    def toElement(self, se):
-        el = Element("PutObj", x1=unicode_type(self.x1), y1=unicode_type(self.y1),
-                    refobj=unicode_type(self.content.objId))
+        el = Element("PutObj", x1=str(self.x1), y1=str(self.y1),
+                    refobj=str(self.content.objId))
        return el


@@ -2313,9 +2313,9 @@ class Image(LrsObject, LrsContainer, LrsAttributes):

    def toElement(self, se):
        element = self.lrsObjectElement("Image", **self.attrs)
-        element.set("refstream", unicode_type(self.refstream.objId))
+        element.set("refstream", str(self.refstream.objId))
        for name in ["x0", "y0", "x1", "y1", "xsize", "ysize"]:
-            element.set(name, unicode_type(getattr(self, name)))
+            element.set(name, str(getattr(self, name)))
        return element

    def toLrf(self, lrfWriter):
@@ -2396,9 +2396,9 @@ class ImageBlock(LrsObject, LrsContainer, LrsAttributes):

    def toElement(self, se):
        element = self.lrsObjectElement("ImageBlock", **self.attrs)
-        element.set("refstream", unicode_type(self.refstream.objId))
+        element.set("refstream", str(self.refstream.objId))
        for name in ["x0", "y0", "x1", "y1", "xsize", "ysize"]:
-            element.set(name, unicode_type(getattr(self, name)))
+            element.set(name, str(getattr(self, name)))
        element.text = self.alttext
        return element