diff --git a/ebook_converter/__init__.py b/ebook_converter/__init__.py
index 8fec1d1..7a1d0f4 100644
--- a/ebook_converter/__init__.py
+++ b/ebook_converter/__init__.py
@@ -24,14 +24,3 @@ class CurrentDir(object):
except EnvironmentError:
# The previous CWD no longer exists
pass
-
-
-_ent_pat = re.compile(r'&(\S+?);')
-
-
-def prepare_string_for_xml(raw, attribute=False):
- raw = _ent_pat.sub(entities.entity_to_unicode, raw)
- raw = raw.replace('&', '&').replace('<', '<').replace('>', '>')
- if attribute:
- raw = raw.replace('"', '"').replace("'", ''')
- return raw
diff --git a/ebook_converter/ebooks/conversion/plugins/txt_input.py b/ebook_converter/ebooks/conversion/plugins/txt_input.py
index fafdfb2..363e937 100644
--- a/ebook_converter/ebooks/conversion/plugins/txt_input.py
+++ b/ebook_converter/ebooks/conversion/plugins/txt_input.py
@@ -1,6 +1,5 @@
import os
-from ebook_converter import _ent_pat
from ebook_converter.customize.conversion import InputFormatPlugin
from ebook_converter.customize.conversion import OptionRecommendation
from ebook_converter.utils import entities
@@ -198,7 +197,7 @@ class TXTInput(InputFormatPlugin):
txt = txt.decode(ienc, 'replace')
# Replace entities
- txt = _ent_pat.sub(entities.xml_entity_to_unicode, txt)
+ txt = entities.ENT_PAT.sub(entities.xml_entity_to_unicode, txt)
# Normalize line endings
txt = normalize_line_endings(txt)
diff --git a/ebook_converter/ebooks/fb2/fb2ml.py b/ebook_converter/ebooks/fb2/fb2ml.py
index 6e3c086..2c84af9 100644
--- a/ebook_converter/ebooks/fb2/fb2ml.py
+++ b/ebook_converter/ebooks/fb2/fb2ml.py
@@ -10,11 +10,11 @@ import uuid
from lxml import etree
from ebook_converter import constants as const
-from ebook_converter import prepare_string_for_xml
from ebook_converter.constants_old import __appname__, __version__
from ebook_converter.ebooks.oeb import base
from ebook_converter.ebooks.oeb import parse_utils
from ebook_converter.polyglot.binary import as_base64_unicode
+from ebook_converter.utils import entities
from ebook_converter.utils.img import save_cover_data_to
from ebook_converter.utils.localization import lang_as_iso639_1
@@ -149,13 +149,16 @@ class FB2MLizer(object):
author_middle = ' '.join(author_parts[1:-1])
author_last = author_parts[-1]
metadata['author'] += ''
- metadata['author'] += ('%s' %
- prepare_string_for_xml(author_first))
+ metadata['author'] += (
+ '%s' %
+ entities.prepare_string_for_xml(author_first))
if author_middle:
- metadata['author'] += ('%s' %
- prepare_string_for_xml(author_middle))
- metadata['author'] += ('%s' %
- prepare_string_for_xml(author_last))
+ metadata['author'] += (
+ '%s' %
+ entities.prepare_string_for_xml(author_middle))
+ metadata['author'] += (
+ '%s' %
+ entities.prepare_string_for_xml(author_last))
metadata['author'] += ''
if not metadata['author']:
metadata['author'] = (''
@@ -164,7 +167,7 @@ class FB2MLizer(object):
metadata['keywords'] = ''
tags = list(map(str, self.oeb_book.metadata.subject))
if tags:
- tags = ', '.join(prepare_string_for_xml(x) for x in tags)
+ tags = ', '.join(entities.prepare_string_for_xml(x) for x in tags)
metadata['keywords'] = '%s' % tags
metadata['sequence'] = ''
@@ -172,7 +175,8 @@ class FB2MLizer(object):
index = '1'
if self.oeb_book.metadata.series_index:
index = self.oeb_book.metadata.series_index[0]
- seq = prepare_string_for_xml(str(self.oeb_book.metadata.series[0]))
+ seq = entities.prepare_string_for_xml(str(self.oeb_book.metadata
+ .series[0]))
metadata['sequence'] = ('' %
(seq, index))
@@ -193,7 +197,8 @@ class FB2MLizer(object):
pass
else:
year = ('%s' %
- prepare_string_for_xml(date.value.partition('-')[0]))
+ entities.prepare_string_for_xml(date.value
+ .partition('-')[0]))
try:
publisher = self.oeb_book.metadata['publisher'][0]
@@ -201,11 +206,12 @@ class FB2MLizer(object):
pass
else:
publisher = ('%s' %
- prepare_string_for_xml(publisher.value))
+ entities.prepare_string_for_xml(publisher.value))
for x in identifiers:
if x.get(base.tag('opf', 'scheme'), None).lower() == 'isbn':
- isbn = '%s' % prepare_string_for_xml(x.value)
+ isbn = ('%s' %
+ entities.prepare_string_for_xml(x.value))
metadata['year'] = year
metadata['isbn'] = isbn
@@ -213,7 +219,7 @@ class FB2MLizer(object):
for key, value in metadata.items():
if key not in ('author', 'cover', 'sequence', 'keywords', 'year',
'publisher', 'isbn'):
- metadata[key] = prepare_string_for_xml(value)
+ metadata[key] = entities.prepare_string_for_xml(value)
try:
comments = self.oeb_book.metadata['description'][0]
@@ -221,7 +227,8 @@ class FB2MLizer(object):
metadata['comments'] = ''
else:
from ebook_converter.utils.html2text import html2text
- annot = prepare_string_for_xml(html2text(comments.value).strip())
+ annot = entities.prepare_string_for_xml(html2text(comments
+ .value).strip())
metadata['comments'] = f'
{annot}
'
# Keep the indentation level of the description the same as the body.
@@ -583,7 +590,7 @@ class FB2MLizer(object):
if hasattr(elem_tree, 'text') and elem_tree.text:
if not self.in_p:
fb2_out.append('
')
- fb2_out.append(prepare_string_for_xml(elem_tree.text))
+ fb2_out.append(entities.prepare_string_for_xml(elem_tree.text))
if not self.in_p:
fb2_out.append('
')
@@ -600,7 +607,7 @@ class FB2MLizer(object):
if hasattr(elem_tree, 'tail') and elem_tree.tail:
if not self.in_p:
fb2_out.append('
')
- fb2_out.append(prepare_string_for_xml(elem_tree.tail))
+ fb2_out.append(entities.prepare_string_for_xml(elem_tree.tail))
if not self.in_p:
fb2_out.append('
')
diff --git a/ebook_converter/ebooks/htmlz/oeb2html.py b/ebook_converter/ebooks/htmlz/oeb2html.py
index 42b0742..c27e103 100644
--- a/ebook_converter/ebooks/htmlz/oeb2html.py
+++ b/ebook_converter/ebooks/htmlz/oeb2html.py
@@ -8,11 +8,11 @@ import urllib.parse
from functools import partial
from lxml import html
-from ebook_converter import prepare_string_for_xml
from ebook_converter import constants as const
from ebook_converter.ebooks.oeb import base
from ebook_converter.ebooks.oeb import parse_utils
from ebook_converter.ebooks.oeb.stylizer import Stylizer
+from ebook_converter.utils import entities
from ebook_converter.utils.logging import default_log
from ebook_converter.polyglot.builtins import as_bytes
@@ -57,7 +57,7 @@ class OEB2HTML(object):
def mlize_spine(self, oeb_book):
output = [
u'%s' % (
- prepare_string_for_xml(self.book_title))
+ entities.prepare_string_for_xml(self.book_title))
]
for item in oeb_book.spine:
self.log.debug('Converting %s to HTML...' % item.href)
@@ -136,7 +136,7 @@ class OEB2HTML(object):
return css
def prepare_string_for_html(self, raw):
- raw = prepare_string_for_xml(raw)
+ raw = entities.prepare_string_for_xml(raw)
raw = raw.replace(u'\u00ad', '')
raw = raw.replace(u'\u2014', '—')
raw = raw.replace(u'\u2013', '–')
@@ -189,7 +189,8 @@ class OEB2HTMLNoCSSizer(OEB2HTML):
# Turn the rest of the attributes into a string we can write with the tag.
at = ''
for k, v in attribs.items():
- at += ' %s="%s"' % (k, prepare_string_for_xml(v, attribute=True))
+ at += ' %s="%s"' % (k, entities
+ .prepare_string_for_xml(v, attribute=True))
# Write the tag.
text.append('<%s%s' % (tag, at))
@@ -281,7 +282,8 @@ class OEB2HTMLInlineCSSizer(OEB2HTML):
# Turn the rest of the attributes into a string we can write with the tag.
at = ''
for k, v in attribs.items():
- at += ' %s="%s"' % (k, prepare_string_for_xml(v, attribute=True))
+ at += ' %s="%s"' % (k, entities
+ .prepare_string_for_xml(v, attribute=True))
# Turn style into strings for putting in the tag.
style_t = ''
@@ -336,7 +338,8 @@ class OEB2HTMLClassCSSizer(OEB2HTML):
css = u''
else:
css = u''
- title = u'%s' % prepare_string_for_xml(self.book_title)
+ title = (u'%s' %
+ entities.prepare_string_for_xml(self.book_title))
output = [u''] + \
[css] + [title, u''] + output + [u'']
return ''.join(output)
@@ -373,7 +376,8 @@ class OEB2HTMLClassCSSizer(OEB2HTML):
# Turn the rest of the attributes into a string we can write with the tag.
at = ''
for k, v in attribs.items():
- at += ' %s="%s"' % (k, prepare_string_for_xml(v, attribute=True))
+ at += ' %s="%s"' % (k,
+ entities.prepare_string_for_xml(v, attribute=True))
# Write the tag.
text.append('<%s%s' % (tag, at))
diff --git a/ebook_converter/ebooks/lrf/objects.py b/ebook_converter/ebooks/lrf/objects.py
index 81c93ba..b3773c9 100644
--- a/ebook_converter/ebooks/lrf/objects.py
+++ b/ebook_converter/ebooks/lrf/objects.py
@@ -6,7 +6,7 @@ import struct
import zlib
from ebook_converter.ebooks.lrf import LRFParseError, PRS500_PROFILE
-from ebook_converter import prepare_string_for_xml
+from ebook_converter.utils import entities
from ebook_converter.ebooks.lrf.tags import Tag
from ebook_converter.utils import entities
@@ -877,7 +877,7 @@ class Text(LRFStream):
open_containers = collections.deque()
for c in self.content:
if isinstance(c, str):
- s += prepare_string_for_xml(c).replace('\0', '')
+ s += entities.prepare_string_for_xml(c).replace('\0', '')
elif c is None:
if open_containers:
p = open_containers.pop()
diff --git a/ebook_converter/ebooks/pdb/haodoo/reader.py b/ebook_converter/ebooks/pdb/haodoo/reader.py
index 74e6a18..ec611f5 100644
--- a/ebook_converter/ebooks/pdb/haodoo/reader.py
+++ b/ebook_converter/ebooks/pdb/haodoo/reader.py
@@ -1,19 +1,15 @@
"""
Read content from Haodoo.net pdb file.
"""
-import struct
import os
+import struct
-from ebook_converter import prepare_string_for_xml
from ebook_converter.ebooks.pdb.formatreader import FormatReader
from ebook_converter.ebooks.metadata import MetaInformation
from ebook_converter.ebooks.txt.processor import opf_writer, HTML_TEMPLATE
+from ebook_converter.utils import entities
-__license__ = 'GPL v3'
-__copyright__ = '2012, Kan-Ru Chen '
-__docformat__ = 'restructuredtext en'
-
BPDB_IDENT = b'BOOKMTIT'
UPDB_IDENT = b'BOOKMTIU'
@@ -133,7 +129,7 @@ class Reader(FormatReader):
line = '
' + line + '
\n'
title_added = True
else:
- line = prepare_string_for_xml(line)
+ line = entities.prepare_string_for_xml(line)
lines.append('
%s
' % line)
if not title_added:
lines.insert(0, '
' + title + '
\n')
diff --git a/ebook_converter/ebooks/txt/processor.py b/ebook_converter/ebooks/txt/processor.py
index 579159b..ecdfc93 100644
--- a/ebook_converter/ebooks/txt/processor.py
+++ b/ebook_converter/ebooks/txt/processor.py
@@ -3,11 +3,11 @@ Read content from txt file.
"""
import os, re
-from ebook_converter import prepare_string_for_xml
from ebook_converter.ebooks.metadata.opf2 import OPFCreator
from ebook_converter.ebooks.conversion.preprocess import DocAnalysis
from ebook_converter.utils.cleantext import clean_ascii_chars
+from ebook_converter.utils import entities
HTML_TEMPLATE = '%s \n%s\n'
@@ -87,7 +87,8 @@ def convert_basic(txt, title='', epub_split_size_kb=0):
for line in txt.split('\n'):
if line.strip():
blank_count = 0
- lines.append(u'