Clean up cleantext module

2020-06-17 17:44:51 +02:00
parent f31fb5e971
commit 4b27f55f5b
1 changed files with 3 additions and 11 deletions
@@ -1,8 +1,6 @@
 import re
 import html.entities

-from ebook_converter.constants_old import plugins, preferred_encoding
-

 def ascii_pat(for_binary=False):
    attr = 'binary' if for_binary else 'text'
@@ -39,7 +37,8 @@ def clean_ascii_chars(txt, charlist=None):

 def allowed(x):
    x = ord(x)
-    return (x != 127 and (31 < x < 0xd7ff or x in (9, 10, 13))) or (0xe000 < x < 0xfffd) or (0x10000 < x < 0x10ffff)
+    return ((x != 127 and (31 < x < 0xd7ff or x in (9, 10, 13))) or
+            (0xe000 < x < 0xfffd) or (0x10000 < x < 0x10ffff))


 def py_clean_xml_chars(unicode_string):
@@ -49,12 +48,6 @@ def py_clean_xml_chars(unicode_string):
 clean_xml_chars = py_clean_xml_chars


-def test_clean_xml_chars():
-    raw = 'asd\x02a\U00010437x\ud801b\udffe\ud802'
-    if native_clean_xml_chars(raw) != 'asda\U00010437xb':
-        raise ValueError('Failed to XML clean: %r' % raw)
-
-
 # Fredrik Lundh: http://effbot.org/zone/re-sub.htm#unescape-html
 # Removes HTML or XML character references and entities from a text string.
 #
@@ -76,8 +69,7 @@ def unescape(text, rm=False, rchar=''):
        else:
            # named entity
            try:
-                text = chr(html.entities
-                                        .name2codepoint[text[1:-1]])
+                text = chr(html.entities.name2codepoint[text[1:-1]])
            except KeyError:
                pass
        if rm: