Moved entity_to_unicode function to utils.entities module.

2021-01-03 18:52:13 +01:00
parent 3152c52839
commit 839cc3c79a
6 changed files with 20 additions and 76 deletions
@@ -3,14 +3,14 @@ import json
 import math
 import re

-from ebook_converter import entity_to_unicode
+from ebook_converter.utils import entities


 XMLDECL_RE = re.compile(r'^\s*<[?]xml.*?[?]>')
 SVG_NS = 'http://www.w3.org/2000/svg'
 XLINK_NS = 'http://www.w3.org/1999/xlink'

-convert_entities = functools.partial(entity_to_unicode,
+convert_entities = functools.partial(entities.entity_to_unicode,
                                     result_exceptions={'<': '&lt;',
                                                        '>': '&gt;',
                                                        "'": '&apos;',
@@ -21,7 +21,6 @@ import math
 import bs4
 from PIL import Image as PILImage

-from ebook_converter import entity_to_unicode
 from ebook_converter.constants_old import __appname__, filesystem_encoding, \
        preferred_encoding
 from ebook_converter.devices.interface import DevicePlugin as Device
@@ -39,6 +38,7 @@ from ebook_converter.ebooks.lrf.pylrs.pylrs import (
 from ebook_converter.ptempfile import PersistentTemporaryFile
 from ebook_converter.utils import encoding as uenc
 from ebook_converter.utils import img as uimg
+from ebook_converter.utils import entities


 def strip_style_comments(match):
@@ -90,7 +90,7 @@ MARKUP_MASSAGE = [  # Close <a /> tags

                  # Replace entities
                  (re.compile(r'&(\S+?);'),
-                   functools.partial(entity_to_unicode,
+                   functools.partial(entities.entity_to_unicode,
                                     exceptions=['lt', 'gt', 'amp', 'quot'])),

                  # Remove comments from within style tags as they can mess up
@@ -6,8 +6,9 @@ import struct
 import zlib

 from ebook_converter.ebooks.lrf import LRFParseError, PRS500_PROFILE
-from ebook_converter import entity_to_unicode, prepare_string_for_xml
+from ebook_converter import prepare_string_for_xml
 from ebook_converter.ebooks.lrf.tags import Tag
+from ebook_converter.utils import entities

 ruby_tags = {0xF575: ['rubyAlignAndAdjust', 'W'],
             0xF576: ['rubyoverhang', 'W', {0: 'none', 1: 'auto'}],
@@ -713,7 +714,8 @@ class Text(LRFStream):
        s = str(text, "utf-16-le")
        if s:
            s = s.translate(self.text_map)
-            self.content.append(self.entity_pattern.sub(entity_to_unicode, s))
+            self.content.append(self.entity_pattern
+                                .sub(entities.entity_to_unicode, s))

    def end_container(self, tag, stream):
        self.content.append(None)
@@ -50,7 +50,7 @@ DEFAULT_SOURCE_ENCODING = "cp1252"      # default is us-windows character set
 DEFAULT_GENREADING      = "fs"          # default is yes to both lrf and lrs

 from ebook_converter.constants_old import __appname__, __version__
-from ebook_converter import entity_to_unicode
+from ebook_converter.utils import entities


 class LrsError(Exception):
@@ -737,7 +737,8 @@ class TableOfContents(object):
 class TocLabel(object):

    def __init__(self, label, textBlock):
-        self.label = escape(re.sub(r'&(\S+?);', entity_to_unicode, label))
+        self.label = escape(re.sub(r'&(\S+?);', entities.entity_to_unicode,
+                                   label))
        self.textBlock = textBlock

    def toElement(self, se):
@@ -9,7 +9,7 @@ import textwrap

 from lxml import html, etree

-from ebook_converter import xml_entity_to_unicode, entity_to_unicode
+from ebook_converter import xml_entity_to_unicode
 from ebook_converter.utils.cleantext import clean_ascii_chars, clean_xml_chars
 from ebook_converter.ebooks import DRMError, unit_convert
 from ebook_converter.ebooks.chardet import strip_encoding_declarations
@@ -20,6 +20,7 @@ from ebook_converter.ebooks.metadata import MetaInformation
 from ebook_converter.ebooks.metadata.opf2 import OPFCreator, OPF
 from ebook_converter.ebooks.metadata.toc import TOC
 from ebook_converter.ebooks.mobi.reader.headers import BookHeader
+from ebook_converter.utils import entities
 from ebook_converter.utils.img import save_cover_data_to, gif_data_to_png_data
 from ebook_converter.utils.img import AnimatedGIF
 from ebook_converter.utils.imghdr import what
@@ -759,7 +760,8 @@ class MobiReader(object):
                                                         ':text()')])
                            except Exception:
                                text = ''
-                            text = ent_pat.sub(entity_to_unicode, text)
+                            text = ent_pat.sub(entities.entity_to_unicode,
+                                               text)
                            item = tocobj.add_item(toc.partition('#')[0],
                                                   href[1:], text)
                            item.left_space = int(self.get_left_whitespace(x))