Removed polyglot codepoint_to_chr

2026-02-18 23:45:50 +01:00 · 2020-04-20 19:29:05 +02:00
parent 128705f258
commit eac0b98d6f
11 changed files with 20 additions and 25 deletions
--- a/ebook_converter/init.py
+++ b/ebook_converter/init.py
@@ -14,7 +14,7 @@ import urllib.parse
 import urllib.request
 import warnings

-from ebook_converter.polyglot.builtins import codepoint_to_chr, hasenv, native_string_type
+from ebook_converter.polyglot.builtins import hasenv, native_string_type
 from functools import partial

 if not hasenv('CALIBRE_SHOW_DEPRECATION_WARNINGS'):
@@ -115,7 +115,7 @@ def confirm_config_name(name):


 _filename_sanitize_unicode = frozenset(('\\', '|', '?', '*', '<',        # no2to3
-    '"', ':', '>', '+', '/') + tuple(map(codepoint_to_chr, range(32))))  # no2to3
+    '"', ':', '>', '+', '/') + tuple(map(chr, range(32))))  # no2to3


 def sanitize_file_name(name, substitute='_'):
--- a/ebook_converter/css_selectors/parser.py
+++ b/ebook_converter/css_selectors/parser.py
@@ -15,7 +15,6 @@ import operator
 import string

 from ebook_converter.css_selectors.errors import SelectorSyntaxError, ExpressionError
-from ebook_converter.polyglot.builtins import codepoint_to_chr


 utab = {c:c+32 for c in range(ord(u'A'), ord(u'Z')+1)}
@@ -665,7 +664,7 @@ def _replace_unicode(match):
    codepoint = int(match.group(1), 16)
    if codepoint > sys.maxunicode:
        codepoint = 0xFFFD
-    return codepoint_to_chr(codepoint)
+    return chr(codepoint)


 def unescape_ident(value):
--- a/ebook_converter/ebooks/docx/fonts.py
+++ b/ebook_converter/ebooks/docx/fonts.py
@@ -6,7 +6,7 @@ from ebook_converter.utils.filenames import ascii_filename
 from ebook_converter.utils.fonts.scanner import font_scanner, NoFonts
 from ebook_converter.utils.fonts.utils import panose_to_css_generic_family, is_truetype_font
 from ebook_converter.utils.icu import ord_string
-from ebook_converter.polyglot.builtins import codepoint_to_chr, iteritems
+from ebook_converter.polyglot.builtins import iteritems


 __license__ = 'GPL v3'
@@ -121,7 +121,7 @@ def do_map(m, points):
        if base < p < limit:
            yield m[p - base]
        else:
-            yield codepoint_to_chr(p)
+            yield chr(p)


 def map_symbol_text(text, font):
--- a/ebook_converter/ebooks/metadata/init.py
+++ b/ebook_converter/ebooks/metadata/init.py
@@ -9,7 +9,7 @@ import urllib.parse

 from ebook_converter import relpath, guess_type, prints, force_unicode
 from ebook_converter.utils.config_base import tweaks
-from ebook_converter.polyglot.builtins import codepoint_to_chr, getcwd, iteritems, itervalues, as_unicode
+from ebook_converter.polyglot.builtins import getcwd, iteritems, itervalues, as_unicode
 from ebook_converter.polyglot.urllib import unquote


@@ -157,7 +157,7 @@ def get_title_sort_pat(lang=None):
    return ans


-_ignore_starts = '\'"'+''.join(codepoint_to_chr(x) for x in
+_ignore_starts = '\'"'+''.join(chr(x) for x in
        list(range(0x2018, 0x201e))+[0x2032, 0x2033])


--- a/ebook_converter/ebooks/metadata/rtf.py
+++ b/ebook_converter/ebooks/metadata/rtf.py
@@ -6,7 +6,7 @@ import re

 from ebook_converter import force_unicode
 from ebook_converter.ebooks.metadata import MetaInformation
-from ebook_converter.polyglot.builtins import codepoint_to_chr, string_or_bytes, int_to_byte
+from ebook_converter.polyglot.builtins import string_or_bytes, int_to_byte

 title_pat    = re.compile(br'\{\\info.*?\{\\title(.*?)(?<!\\)\}', re.DOTALL)
 author_pat   = re.compile(br'\{\\info.*?\{\\author(.*?)(?<!\\)\}', re.DOTALL)
@@ -90,7 +90,7 @@ def decode(raw, codec):

    def uni(match):
        try:
-            return codepoint_to_chr(int(match.group(1)))
+            return chr(int(match.group(1)))
        except Exception:
            return '?'

--- a/ebook_converter/ebooks/oeb/base.py
+++ b/ebook_converter/ebooks/oeb/base.py
@@ -17,7 +17,7 @@ from ebook_converter import (isbytestring, as_unicode, get_types_map)
 from ebook_converter.ebooks.oeb.parse_utils import barename, XHTML_NS, namespace, XHTML, parse_html, NotHTML
 from ebook_converter.utils.cleantext import clean_xml_chars
 from ebook_converter.utils.short_uuid import uuid4
-from ebook_converter.polyglot.builtins import iteritems, string_or_bytes, itervalues, codepoint_to_chr
+from ebook_converter.polyglot.builtins import iteritems, string_or_bytes, itervalues
 from ebook_converter.polyglot.urllib import unquote as urlunquote


@@ -431,7 +431,7 @@ def serialize(data, media_type, pretty_print=False):
    return bytes(data)


-ASCII_CHARS   = frozenset(codepoint_to_chr(x) for x in range(128))
+ASCII_CHARS   = frozenset(chr(x) for x in range(128))
 UNIBYTE_CHARS = frozenset(x.encode('ascii') for x in ASCII_CHARS)
 USAFE         = ('ABCDEFGHIJKLMNOPQRSTUVWXYZ'
                 'abcdefghijklmnopqrstuvwxyz'
--- a/ebook_converter/ebooks/pdb/plucker/reader.py
+++ b/ebook_converter/ebooks/pdb/plucker/reader.py
@@ -9,7 +9,6 @@ from ebook_converter.ebooks.pdb.formatreader import FormatReader
 from ebook_converter.ebooks.compression.palmdoc import decompress_doc
 from ebook_converter.utils.imghdr import identify
 from ebook_converter.utils.img import save_cover_data_to, Canvas, image_from_data
-from ebook_converter.polyglot.builtins import codepoint_to_chr


 __license__ = 'GPL v3'
@@ -714,7 +713,7 @@ class Reader(FormatReader):
            elif c == 0xa0:
                html += '&nbsp;'
            else:
-                html += codepoint_to_chr(c)
+                html += chr(c)
            offset += 1
            if offset in paragraph_offsets:
                need_set_p_id = True
--- a/ebook_converter/ebooks/pdf/render/common.py
+++ b/ebook_converter/ebooks/pdf/render/common.py
@@ -4,7 +4,7 @@ from datetime import datetime

 from ebook_converter.constants import ispy3
 from ebook_converter.utils.logging import default_log
-from ebook_converter.polyglot.builtins import iteritems, codepoint_to_chr
+from ebook_converter.polyglot.builtins import iteritems
 from ebook_converter.polyglot.binary import as_hex_bytes


@@ -89,7 +89,7 @@ class Name(str):
        raw = bytearray(raw)
        sharp = ord(b'#')
        buf = (
-            codepoint_to_chr(x).encode('ascii') if 33 < x < 126 and x != sharp else
+            chr(x).encode('ascii') if 33 < x < 126 and x != sharp else
            '#{:x}'.format(x).encode('ascii') for x in raw)
        stream.write(b'/'+b''.join(buf))

--- a/ebook_converter/ebooks/rtf2xml/tokenize.py
+++ b/ebook_converter/ebooks/rtf2xml/tokenize.py
@@ -15,7 +15,6 @@ import os, re
 from ebook_converter.ebooks.rtf2xml import copy
 from ebook_converter.utils.mreplace import MReplace
 from ebook_converter.ptempfile import better_mktemp
-from ebook_converter.polyglot.builtins import codepoint_to_chr
 from . import open_for_read, open_for_write


@@ -95,7 +94,7 @@ class Tokenize:
            uni_len = len(match_obj.group(0))
            if uni_char < 0:
                uni_char += 65536
-            uni_char = codepoint_to_chr(uni_char).encode('ascii', 'xmlcharrefreplace').decode('ascii')
+            uni_char = chr(uni_char).encode('ascii', 'xmlcharrefreplace').decode('ascii')
            self.__uc_char = self.__uc_value[-1]
            # there is only an unicode char
            if len(token)<= uni_len:
--- a/ebook_converter/polyglot/builtins.py
+++ b/ebook_converter/polyglot/builtins.py
@@ -45,7 +45,6 @@ def reraise(tp, value, tb=None):
        tb = None


-codepoint_to_chr = chr
 string_or_bytes = str, bytes
 string_or_unicode = str
 long_type = int
--- a/ebook_converter/utils/cleantext.py
+++ b/ebook_converter/utils/cleantext.py
@@ -1,7 +1,6 @@
 import re
 import html.entities

-from ebook_converter.polyglot.builtins import codepoint_to_chr
 from ebook_converter.constants import plugins, preferred_encoding


@@ -11,7 +10,7 @@ def ascii_pat(for_binary=False):
    if ans is None:
        chars = set(range(32)) - {9, 10, 13}
        chars.add(127)
-        pat = '|'.join(map(codepoint_to_chr, chars))
+        pat = '|'.join(map(chr, chars))
        if for_binary:
            pat = pat.encode('ascii')
        ans = re.compile(pat)
@@ -32,7 +31,7 @@ def clean_ascii_chars(txt, charlist=None):
    if charlist is None:
        pat = ascii_pat(is_binary)
    else:
-        pat = '|'.join(map(codepoint_to_chr, charlist))
+        pat = '|'.join(map(chr, charlist))
        if is_binary:
            pat = pat.encode('utf-8')
    return pat.sub(empty, txt)
@@ -69,15 +68,15 @@ def unescape(text, rm=False, rchar=''):
            # character reference
            try:
                if text[:3] == "&#x":
-                    return codepoint_to_chr(int(text[3:-1], 16))
+                    return chr(int(text[3:-1], 16))
                else:
-                    return codepoint_to_chr(int(text[2:-1]))
+                    return chr(int(text[2:-1]))
            except ValueError:
                pass
        else:
            # named entity
            try:
-                text = codepoint_to_chr(html.entities
+                text = chr(html.entities
                                        .name2codepoint[text[1:-1]])
            except KeyError:
                pass