1
0
mirror of https://github.com/gryf/ebook-converter.git synced 2025-12-18 13:10:17 +01:00

Moved xml_entity_to_unicode partial to utils.entities module.

This commit is contained in:
2021-01-03 19:00:17 +01:00
parent 839cc3c79a
commit 877ea68d42
4 changed files with 13 additions and 22 deletions

View File

@@ -27,12 +27,6 @@ class CurrentDir(object):
_ent_pat = re.compile(r'&(\S+?);')
xml_entity_to_unicode = partial(entities.entity_to_unicode,
result_exceptions={'"': '"',
"'": ''',
'<': '&lt;',
'>': '&gt;',
'&': '&amp;'})
def replace_entities(raw, encoding='cp1252'):
@@ -41,7 +35,8 @@ def replace_entities(raw, encoding='cp1252'):
def xml_replace_entities(raw, encoding='cp1252'):
return _ent_pat.sub(partial(xml_entity_to_unicode, encoding=encoding), raw)
return _ent_pat.sub(partial(entities.xml_entity_to_unicode,
encoding=encoding), raw)
def prepare_string_for_xml(raw, attribute=False):

View File

@@ -1,9 +1,8 @@
import re, codecs
import codecs
import re
from ebook_converter.utils import entities
__license__ = 'GPL v3'
__copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>'
__docformat__ = 'restructuredtext en'
_encoding_pats = (
# XML declaration
@@ -94,8 +93,7 @@ def find_declared_encoding(raw, limit=50*1024):
def substitute_entites(raw):
from ebook_converter import xml_entity_to_unicode
return ENTITY_PATTERN.sub(xml_entity_to_unicode, raw)
return ENTITY_PATTERN.sub(entities.xml_entity_to_unicode, raw)
_CHARSET_ALIASES = {"macintosh" : "mac-roman",

View File

@@ -1,13 +1,11 @@
import os
from ebook_converter import _ent_pat, xml_entity_to_unicode
from ebook_converter.customize.conversion import InputFormatPlugin, OptionRecommendation
from ebook_converter import _ent_pat
from ebook_converter.customize.conversion import InputFormatPlugin
from ebook_converter.customize.conversion import OptionRecommendation
from ebook_converter.utils import entities
__license__ = 'GPL 3'
__copyright__ = '2009, John Schember <john@nachtimwald.com>'
__docformat__ = 'restructuredtext en'
MD_EXTENSIONS = {
'abbr': 'Abbreviations',
'admonition': 'Support admonitions',
@@ -200,7 +198,7 @@ class TXTInput(InputFormatPlugin):
txt = txt.decode(ienc, 'replace')
# Replace entities
txt = _ent_pat.sub(xml_entity_to_unicode, txt)
txt = _ent_pat.sub(entities.xml_entity_to_unicode, txt)
# Normalize line endings
txt = normalize_line_endings(txt)

View File

@@ -9,7 +9,6 @@ import textwrap
from lxml import html, etree
from ebook_converter import xml_entity_to_unicode
from ebook_converter.utils.cleantext import clean_ascii_chars, clean_xml_chars
from ebook_converter.ebooks import DRMError, unit_convert
from ebook_converter.ebooks.chardet import strip_encoding_declarations
@@ -183,7 +182,8 @@ class MobiReader(object):
self.processed_html)
self.processed_html = strip_encoding_declarations(self.processed_html)
self.processed_html = re.sub(r'&(\S+?);', xml_entity_to_unicode,
self.processed_html = re.sub(r'&(\S+?);',
entities.xml_entity_to_unicode,
self.processed_html)
image_name_map = self.extract_images(processed_records, output_dir)
self.replace_page_breaks()