From 018676c02697573a9064f8e28fa9e2024a21f67e Mon Sep 17 00:00:00 2001
From: gryf <git@vimja.com>
Date: Sun, 3 Jan 2021 19:47:49 +0100
Subject: [PATCH] Moved prepare_string_for_xml to utils.entities.

---
 ebook_converter/__init__.py                   | 11 ------
 .../ebooks/conversion/plugins/txt_input.py    |  3 +-
 ebook_converter/ebooks/fb2/fb2ml.py           | 39 +++++++++++--------
 ebook_converter/ebooks/htmlz/oeb2html.py      | 18 +++++----
 ebook_converter/ebooks/lrf/objects.py         |  4 +-
 ebook_converter/ebooks/pdb/haodoo/reader.py   | 10 ++---
 ebook_converter/ebooks/txt/processor.py       |  5 ++-
 ebook_converter/library/comments.py           |  4 +-
 8 files changed, 45 insertions(+), 49 deletions(-)
diff --git a/ebook_converter/__init__.py b/ebook_converter/__init__.py
index 8fec1d1..7a1d0f4 100644
--- a/ebook_converter/__init__.py
+++ b/ebook_converter/__init__.py
@@ -24,14 +24,3 @@ class CurrentDir(object):
         except EnvironmentError:
             # The previous CWD no longer exists
             pass
-
-
-_ent_pat = re.compile(r'&(\S+?);')
-
-
-def prepare_string_for_xml(raw, attribute=False):
-    raw = _ent_pat.sub(entities.entity_to_unicode, raw)
-    raw = raw.replace('&', '&amp;').replace('<', '&lt;').replace('>', '&gt;')
-    if attribute:
-        raw = raw.replace('"', '&quot;').replace("'", '&apos;')
-    return raw
diff --git a/ebook_converter/ebooks/conversion/plugins/txt_input.py b/ebook_converter/ebooks/conversion/plugins/txt_input.py
index fafdfb2..363e937 100644
--- a/ebook_converter/ebooks/conversion/plugins/txt_input.py
+++ b/ebook_converter/ebooks/conversion/plugins/txt_input.py
@@ -1,6 +1,5 @@
 import os
 
-from ebook_converter import _ent_pat
 from ebook_converter.customize.conversion import InputFormatPlugin
 from ebook_converter.customize.conversion import OptionRecommendation
 from ebook_converter.utils import entities
@@ -198,7 +197,7 @@ class TXTInput(InputFormatPlugin):
         txt = txt.decode(ienc, 'replace')
 
         # Replace entities
-        txt = _ent_pat.sub(entities.xml_entity_to_unicode, txt)
+        txt = entities.ENT_PAT.sub(entities.xml_entity_to_unicode, txt)
 
         # Normalize line endings
         txt = normalize_line_endings(txt)
diff --git a/ebook_converter/ebooks/fb2/fb2ml.py b/ebook_converter/ebooks/fb2/fb2ml.py
index 6e3c086..2c84af9 100644
--- a/ebook_converter/ebooks/fb2/fb2ml.py
+++ b/ebook_converter/ebooks/fb2/fb2ml.py
@@ -10,11 +10,11 @@ import uuid
 from lxml import etree
 
 from ebook_converter import constants as const
-from ebook_converter import prepare_string_for_xml
 from ebook_converter.constants_old import __appname__, __version__
 from ebook_converter.ebooks.oeb import base
 from ebook_converter.ebooks.oeb import parse_utils
 from ebook_converter.polyglot.binary import as_base64_unicode
+from ebook_converter.utils import entities
 from ebook_converter.utils.img import save_cover_data_to
 from ebook_converter.utils.localization import lang_as_iso639_1
 
@@ -149,13 +149,16 @@ class FB2MLizer(object):
                 author_middle = ' '.join(author_parts[1:-1])
                 author_last = author_parts[-1]
             metadata['author'] += '<author>'
-            metadata['author'] += ('<first-name>%s</first-name>' %
-                                   prepare_string_for_xml(author_first))
+            metadata['author'] += (
+                '<first-name>%s</first-name>' %
+                entities.prepare_string_for_xml(author_first))
             if author_middle:
-                metadata['author'] += ('<middle-name>%s</middle-name>' %
-                                       prepare_string_for_xml(author_middle))
-            metadata['author'] += ('<last-name>%s</last-name>' %
-                                   prepare_string_for_xml(author_last))
+                metadata['author'] += (
+                    '<middle-name>%s</middle-name>' %
+                    entities.prepare_string_for_xml(author_middle))
+            metadata['author'] += (
+                '<last-name>%s</last-name>' %
+                entities.prepare_string_for_xml(author_last))
             metadata['author'] += '</author>'
         if not metadata['author']:
             metadata['author'] = ('<author><first-name></first-name>'
@@ -164,7 +167,7 @@ class FB2MLizer(object):
         metadata['keywords'] = ''
         tags = list(map(str, self.oeb_book.metadata.subject))
         if tags:
-            tags = ', '.join(prepare_string_for_xml(x) for x in tags)
+            tags = ', '.join(entities.prepare_string_for_xml(x) for x in tags)
             metadata['keywords'] = '<keywords>%s</keywords>' % tags
 
         metadata['sequence'] = ''
@@ -172,7 +175,8 @@ class FB2MLizer(object):
             index = '1'
             if self.oeb_book.metadata.series_index:
                 index = self.oeb_book.metadata.series_index[0]
-            seq = prepare_string_for_xml(str(self.oeb_book.metadata.series[0]))
+            seq = entities.prepare_string_for_xml(str(self.oeb_book.metadata
+                                                      .series[0]))
             metadata['sequence'] = ('<sequence name="%s" number="%s"/>' %
                                     (seq, index))
 
@@ -193,7 +197,8 @@ class FB2MLizer(object):
             pass
         else:
             year = ('<year>%s</year>' %
-                    prepare_string_for_xml(date.value.partition('-')[0]))
+                    entities.prepare_string_for_xml(date.value
+                                                    .partition('-')[0]))
 
         try:
             publisher = self.oeb_book.metadata['publisher'][0]
@@ -201,11 +206,12 @@ class FB2MLizer(object):
             pass
         else:
             publisher = ('<publisher>%s</publisher>' %
-                         prepare_string_for_xml(publisher.value))
+                         entities.prepare_string_for_xml(publisher.value))
 
         for x in identifiers:
             if x.get(base.tag('opf', 'scheme'), None).lower() == 'isbn':
-                isbn = '<isbn>%s</isbn>' % prepare_string_for_xml(x.value)
+                isbn = ('<isbn>%s</isbn>' %
+                        entities.prepare_string_for_xml(x.value))
 
         metadata['year'] = year
         metadata['isbn'] = isbn
@@ -213,7 +219,7 @@ class FB2MLizer(object):
         for key, value in metadata.items():
             if key not in ('author', 'cover', 'sequence', 'keywords', 'year',
                            'publisher', 'isbn'):
-                metadata[key] = prepare_string_for_xml(value)
+                metadata[key] = entities.prepare_string_for_xml(value)
 
         try:
             comments = self.oeb_book.metadata['description'][0]
@@ -221,7 +227,8 @@ class FB2MLizer(object):
             metadata['comments'] = ''
         else:
             from ebook_converter.utils.html2text import html2text
-            annot = prepare_string_for_xml(html2text(comments.value).strip())
+            annot = entities.prepare_string_for_xml(html2text(comments
+                                                              .value).strip())
             metadata['comments'] = f'<annotation><p>{annot}</p></annotation>'
 
         # Keep the indentation level of the description the same as the body.
@@ -583,7 +590,7 @@ class FB2MLizer(object):
         if hasattr(elem_tree, 'text') and elem_tree.text:
             if not self.in_p:
                 fb2_out.append('<p>')
-            fb2_out.append(prepare_string_for_xml(elem_tree.text))
+            fb2_out.append(entities.prepare_string_for_xml(elem_tree.text))
             if not self.in_p:
                 fb2_out.append('</p>')
 
@@ -600,7 +607,7 @@ class FB2MLizer(object):
         if hasattr(elem_tree, 'tail') and elem_tree.tail:
             if not self.in_p:
                 fb2_out.append('<p>')
-            fb2_out.append(prepare_string_for_xml(elem_tree.tail))
+            fb2_out.append(entities.prepare_string_for_xml(elem_tree.tail))
             if not self.in_p:
                 fb2_out.append('</p>')
 
diff --git a/ebook_converter/ebooks/htmlz/oeb2html.py b/ebook_converter/ebooks/htmlz/oeb2html.py
index 42b0742..c27e103 100644
--- a/ebook_converter/ebooks/htmlz/oeb2html.py
+++ b/ebook_converter/ebooks/htmlz/oeb2html.py
@@ -8,11 +8,11 @@ import urllib.parse
 from functools import partial
 from lxml import html
 
-from ebook_converter import prepare_string_for_xml
 from ebook_converter import constants as const
 from ebook_converter.ebooks.oeb import base
 from ebook_converter.ebooks.oeb import parse_utils
 from ebook_converter.ebooks.oeb.stylizer import Stylizer
+from ebook_converter.utils import entities
 from ebook_converter.utils.logging import default_log
 from ebook_converter.polyglot.builtins import as_bytes
 
@@ -57,7 +57,7 @@ class OEB2HTML(object):
     def mlize_spine(self, oeb_book):
         output = [
             u'<html><head><meta http-equiv="Content-Type" content="text/html;charset=utf-8" /><title>%s</title></head><body>' % (
-                prepare_string_for_xml(self.book_title))
+                entities.prepare_string_for_xml(self.book_title))
         ]
         for item in oeb_book.spine:
             self.log.debug('Converting %s to HTML...' % item.href)
@@ -136,7 +136,7 @@ class OEB2HTML(object):
         return css
 
     def prepare_string_for_html(self, raw):
-        raw = prepare_string_for_xml(raw)
+        raw = entities.prepare_string_for_xml(raw)
         raw = raw.replace(u'\u00ad', '&shy;')
         raw = raw.replace(u'\u2014', '&mdash;')
         raw = raw.replace(u'\u2013', '&ndash;')
@@ -189,7 +189,8 @@ class OEB2HTMLNoCSSizer(OEB2HTML):
         # Turn the rest of the attributes into a string we can write with the tag.
         at = ''
         for k, v in attribs.items():
-            at += ' %s="%s"' % (k, prepare_string_for_xml(v, attribute=True))
+            at += ' %s="%s"' % (k, entities
+                                .prepare_string_for_xml(v, attribute=True))
 
         # Write the tag.
         text.append('<%s%s' % (tag, at))
@@ -281,7 +282,8 @@ class OEB2HTMLInlineCSSizer(OEB2HTML):
         # Turn the rest of the attributes into a string we can write with the tag.
         at = ''
         for k, v in attribs.items():
-            at += ' %s="%s"' % (k, prepare_string_for_xml(v, attribute=True))
+            at += ' %s="%s"' % (k, entities
+                                .prepare_string_for_xml(v, attribute=True))
 
         # Turn style into strings for putting in the tag.
         style_t = ''
@@ -336,7 +338,8 @@ class OEB2HTMLClassCSSizer(OEB2HTML):
             css = u'<link href="style.css" rel="stylesheet" type="text/css" />'
         else:
             css =  u'<style type="text/css">' + self.get_css(oeb_book) + u'</style>'
-        title = u'<title>%s</title>' % prepare_string_for_xml(self.book_title)
+        title = (u'<title>%s</title>' %
+                 entities.prepare_string_for_xml(self.book_title))
         output = [u'<html><head><meta http-equiv="Content-Type" content="text/html;charset=utf-8" />'] + \
             [css] + [title, u'</head><body>'] + output + [u'</body></html>']
         return ''.join(output)
@@ -373,7 +376,8 @@ class OEB2HTMLClassCSSizer(OEB2HTML):
         # Turn the rest of the attributes into a string we can write with the tag.
         at = ''
         for k, v in attribs.items():
-            at += ' %s="%s"' % (k, prepare_string_for_xml(v, attribute=True))
+            at += ' %s="%s"' % (k,
+                entities.prepare_string_for_xml(v, attribute=True))
 
         # Write the tag.
         text.append('<%s%s' % (tag, at))
diff --git a/ebook_converter/ebooks/lrf/objects.py b/ebook_converter/ebooks/lrf/objects.py
index 81c93ba..b3773c9 100644
--- a/ebook_converter/ebooks/lrf/objects.py
+++ b/ebook_converter/ebooks/lrf/objects.py
@@ -6,7 +6,7 @@ import struct
 import zlib
 
 from ebook_converter.ebooks.lrf import LRFParseError, PRS500_PROFILE
-from ebook_converter import prepare_string_for_xml
+from ebook_converter.utils import entities
 from ebook_converter.ebooks.lrf.tags import Tag
 from ebook_converter.utils import entities
 
@@ -877,7 +877,7 @@ class Text(LRFStream):
         open_containers = collections.deque()
         for c in self.content:
             if isinstance(c, str):
-                s += prepare_string_for_xml(c).replace('\0', '')
+                s += entities.prepare_string_for_xml(c).replace('\0', '')
             elif c is None:
                 if open_containers:
                     p = open_containers.pop()
diff --git a/ebook_converter/ebooks/pdb/haodoo/reader.py b/ebook_converter/ebooks/pdb/haodoo/reader.py
index 74e6a18..ec611f5 100644
--- a/ebook_converter/ebooks/pdb/haodoo/reader.py
+++ b/ebook_converter/ebooks/pdb/haodoo/reader.py
@@ -1,19 +1,15 @@
 """
 Read content from Haodoo.net pdb file.
 """
-import struct
 import os
+import struct
 
-from ebook_converter import prepare_string_for_xml
 from ebook_converter.ebooks.pdb.formatreader import FormatReader
 from ebook_converter.ebooks.metadata import MetaInformation
 from ebook_converter.ebooks.txt.processor import opf_writer, HTML_TEMPLATE
+from ebook_converter.utils import entities
 
 
-__license__ = 'GPL v3'
-__copyright__ = '2012, Kan-Ru Chen <kanru@kanru.info>'
-__docformat__ = 'restructuredtext en'
-
 BPDB_IDENT = b'BOOKMTIT'
 UPDB_IDENT = b'BOOKMTIU'
 
@@ -133,7 +129,7 @@ class Reader(FormatReader):
                     line = '<h1 class="chapter">' + line + '</h1>\n'
                     title_added = True
                 else:
-                    line = prepare_string_for_xml(line)
+                    line = entities.prepare_string_for_xml(line)
                 lines.append('<p>%s</p>' % line)
             if not title_added:
                 lines.insert(0, '<h1 class="chapter">' + title + '</h1>\n')
diff --git a/ebook_converter/ebooks/txt/processor.py b/ebook_converter/ebooks/txt/processor.py
index 579159b..ecdfc93 100644
--- a/ebook_converter/ebooks/txt/processor.py
+++ b/ebook_converter/ebooks/txt/processor.py
@@ -3,11 +3,11 @@ Read content from txt file.
 """
 import os, re
 
-from ebook_converter import prepare_string_for_xml
 from ebook_converter.ebooks.metadata.opf2 import OPFCreator
 
 from ebook_converter.ebooks.conversion.preprocess import DocAnalysis
 from ebook_converter.utils.cleantext import clean_ascii_chars
+from ebook_converter.utils import entities
 
 
 HTML_TEMPLATE = '<html><head><meta http-equiv="Content-Type" content="text/html; charset=utf-8"/><title>%s </title></head><body>\n%s\n</body></html>'
@@ -87,7 +87,8 @@ def convert_basic(txt, title='', epub_split_size_kb=0):
     for line in txt.split('\n'):
         if line.strip():
             blank_count = 0
-            lines.append(u'<p>%s</p>' % prepare_string_for_xml(line.replace('\n', ' ')))
+            lines.append(u'<p>%s</p>' % entities
+                         .prepare_string_for_xml(line.replace('\n', ' ')))
         else:
             blank_count += 1
             if blank_count == 2:
diff --git a/ebook_converter/library/comments.py b/ebook_converter/library/comments.py
index 05a8a64..6e550d3 100644
--- a/ebook_converter/library/comments.py
+++ b/ebook_converter/library/comments.py
@@ -2,10 +2,10 @@ import re
 
 import bs4
 
-from ebook_converter import prepare_string_for_xml
 from ebook_converter.constants_old import preferred_encoding
 from ebook_converter.ebooks.BeautifulSoup import html5_parser
 from ebook_converter.utils.html2text import html2text
+from ebook_converter.utils import entities
 
 
 # Hackish - ignoring sentences ending or beginning in numbers to avoid
@@ -51,7 +51,7 @@ def comments_to_html(comments):
         return comments
 
     if '<' not in comments:
-        comments = prepare_string_for_xml(comments)
+        comments = entities.prepare_string_for_xml(comments)
         parts = [u'<p class="description">%s</p>'%x.replace(u'\n', u'<br />')
                 for x in comments.split('\n\n')]
         return '\n'.join(parts)