diff --git a/ebook_converter/__init__.py b/ebook_converter/__init__.py index edfaa45..d6eece9 100644 --- a/ebook_converter/__init__.py +++ b/ebook_converter/__init__.py @@ -27,12 +27,6 @@ class CurrentDir(object): _ent_pat = re.compile(r'&(\S+?);') -xml_entity_to_unicode = partial(entities.entity_to_unicode, - result_exceptions={'"': '"', - "'": ''', - '<': '<', - '>': '>', - '&': '&'}) def replace_entities(raw, encoding='cp1252'): @@ -41,7 +35,8 @@ def replace_entities(raw, encoding='cp1252'): def xml_replace_entities(raw, encoding='cp1252'): - return _ent_pat.sub(partial(xml_entity_to_unicode, encoding=encoding), raw) + return _ent_pat.sub(partial(entities.xml_entity_to_unicode, + encoding=encoding), raw) def prepare_string_for_xml(raw, attribute=False): diff --git a/ebook_converter/ebooks/chardet.py b/ebook_converter/ebooks/chardet.py index ca686b7..f90962d 100644 --- a/ebook_converter/ebooks/chardet.py +++ b/ebook_converter/ebooks/chardet.py @@ -1,9 +1,8 @@ -import re, codecs +import codecs +import re +from ebook_converter.utils import entities -__license__ = 'GPL v3' -__copyright__ = '2009, Kovid Goyal ' -__docformat__ = 'restructuredtext en' _encoding_pats = ( # XML declaration @@ -94,8 +93,7 @@ def find_declared_encoding(raw, limit=50*1024): def substitute_entites(raw): - from ebook_converter import xml_entity_to_unicode - return ENTITY_PATTERN.sub(xml_entity_to_unicode, raw) + return ENTITY_PATTERN.sub(entities.xml_entity_to_unicode, raw) _CHARSET_ALIASES = {"macintosh" : "mac-roman", diff --git a/ebook_converter/ebooks/conversion/plugins/txt_input.py b/ebook_converter/ebooks/conversion/plugins/txt_input.py index 7e575a6..fafdfb2 100644 --- a/ebook_converter/ebooks/conversion/plugins/txt_input.py +++ b/ebook_converter/ebooks/conversion/plugins/txt_input.py @@ -1,13 +1,11 @@ import os -from ebook_converter import _ent_pat, xml_entity_to_unicode -from ebook_converter.customize.conversion import InputFormatPlugin, OptionRecommendation +from ebook_converter import _ent_pat +from ebook_converter.customize.conversion import InputFormatPlugin +from ebook_converter.customize.conversion import OptionRecommendation +from ebook_converter.utils import entities -__license__ = 'GPL 3' -__copyright__ = '2009, John Schember ' -__docformat__ = 'restructuredtext en' - MD_EXTENSIONS = { 'abbr': 'Abbreviations', 'admonition': 'Support admonitions', @@ -200,7 +198,7 @@ class TXTInput(InputFormatPlugin): txt = txt.decode(ienc, 'replace') # Replace entities - txt = _ent_pat.sub(xml_entity_to_unicode, txt) + txt = _ent_pat.sub(entities.xml_entity_to_unicode, txt) # Normalize line endings txt = normalize_line_endings(txt) diff --git a/ebook_converter/ebooks/mobi/reader/mobi6.py b/ebook_converter/ebooks/mobi/reader/mobi6.py index 413a753..ffdd39f 100644 --- a/ebook_converter/ebooks/mobi/reader/mobi6.py +++ b/ebook_converter/ebooks/mobi/reader/mobi6.py @@ -9,7 +9,6 @@ import textwrap from lxml import html, etree -from ebook_converter import xml_entity_to_unicode from ebook_converter.utils.cleantext import clean_ascii_chars, clean_xml_chars from ebook_converter.ebooks import DRMError, unit_convert from ebook_converter.ebooks.chardet import strip_encoding_declarations @@ -183,7 +182,8 @@ class MobiReader(object): self.processed_html) self.processed_html = strip_encoding_declarations(self.processed_html) - self.processed_html = re.sub(r'&(\S+?);', xml_entity_to_unicode, + self.processed_html = re.sub(r'&(\S+?);', + entities.xml_entity_to_unicode, self.processed_html) image_name_map = self.extract_images(processed_records, output_dir) self.replace_page_breaks()