Moved xml_entity_to_unicode partial to utils.entities module.

2021-01-03 19:00:17 +01:00
parent 839cc3c79a
commit 877ea68d42
4 changed files with 13 additions and 22 deletions
@@ -1,9 +1,8 @@
-import re, codecs
+import codecs
+import re

+from ebook_converter.utils import entities

-__license__ = 'GPL v3'
-__copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>'
-__docformat__ = 'restructuredtext en'

 _encoding_pats = (
    # XML declaration
@@ -94,8 +93,7 @@ def find_declared_encoding(raw, limit=50*1024):


 def substitute_entites(raw):
-    from ebook_converter import xml_entity_to_unicode
-    return ENTITY_PATTERN.sub(xml_entity_to_unicode, raw)
+    return ENTITY_PATTERN.sub(entities.xml_entity_to_unicode, raw)


 _CHARSET_ALIASES = {"macintosh" : "mac-roman",
@@ -1,13 +1,11 @@
 import os

-from ebook_converter import _ent_pat, xml_entity_to_unicode
-from ebook_converter.customize.conversion import InputFormatPlugin, OptionRecommendation
+from ebook_converter import _ent_pat
+from ebook_converter.customize.conversion import InputFormatPlugin
+from ebook_converter.customize.conversion import OptionRecommendation
+from ebook_converter.utils import entities


-__license__ = 'GPL 3'
-__copyright__ = '2009, John Schember <john@nachtimwald.com>'
-__docformat__ = 'restructuredtext en'
-
 MD_EXTENSIONS = {
    'abbr': 'Abbreviations',
    'admonition': 'Support admonitions',
@@ -200,7 +198,7 @@ class TXTInput(InputFormatPlugin):
        txt = txt.decode(ienc, 'replace')

        # Replace entities
-        txt = _ent_pat.sub(xml_entity_to_unicode, txt)
+        txt = _ent_pat.sub(entities.xml_entity_to_unicode, txt)

        # Normalize line endings
        txt = normalize_line_endings(txt)
@@ -9,7 +9,6 @@ import textwrap

 from lxml import html, etree

-from ebook_converter import xml_entity_to_unicode
 from ebook_converter.utils.cleantext import clean_ascii_chars, clean_xml_chars
 from ebook_converter.ebooks import DRMError, unit_convert
 from ebook_converter.ebooks.chardet import strip_encoding_declarations
@@ -183,7 +182,8 @@ class MobiReader(object):
                                     self.processed_html)

        self.processed_html = strip_encoding_declarations(self.processed_html)
-        self.processed_html = re.sub(r'&(\S+?);', xml_entity_to_unicode,
+        self.processed_html = re.sub(r'&(\S+?);',
+                                     entities.xml_entity_to_unicode,
                                     self.processed_html)
        image_name_map = self.extract_images(processed_records, output_dir)
        self.replace_page_breaks()