1
0
mirror of https://github.com/gryf/ebook-converter.git synced 2026-03-11 12:05:45 +01:00

Moved xml_entity_to_unicode partial to utils.entities module.

This commit is contained in:
2021-01-03 19:00:17 +01:00
parent 839cc3c79a
commit 877ea68d42
4 changed files with 13 additions and 22 deletions

View File

@@ -1,9 +1,8 @@
import re, codecs
import codecs
import re
from ebook_converter.utils import entities
__license__ = 'GPL v3'
__copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>'
__docformat__ = 'restructuredtext en'
_encoding_pats = (
# XML declaration
@@ -94,8 +93,7 @@ def find_declared_encoding(raw, limit=50*1024):
def substitute_entites(raw):
from ebook_converter import xml_entity_to_unicode
return ENTITY_PATTERN.sub(xml_entity_to_unicode, raw)
return ENTITY_PATTERN.sub(entities.xml_entity_to_unicode, raw)
_CHARSET_ALIASES = {"macintosh" : "mac-roman",

View File

@@ -1,13 +1,11 @@
import os
from ebook_converter import _ent_pat, xml_entity_to_unicode
from ebook_converter.customize.conversion import InputFormatPlugin, OptionRecommendation
from ebook_converter import _ent_pat
from ebook_converter.customize.conversion import InputFormatPlugin
from ebook_converter.customize.conversion import OptionRecommendation
from ebook_converter.utils import entities
__license__ = 'GPL 3'
__copyright__ = '2009, John Schember <john@nachtimwald.com>'
__docformat__ = 'restructuredtext en'
MD_EXTENSIONS = {
'abbr': 'Abbreviations',
'admonition': 'Support admonitions',
@@ -200,7 +198,7 @@ class TXTInput(InputFormatPlugin):
txt = txt.decode(ienc, 'replace')
# Replace entities
txt = _ent_pat.sub(xml_entity_to_unicode, txt)
txt = _ent_pat.sub(entities.xml_entity_to_unicode, txt)
# Normalize line endings
txt = normalize_line_endings(txt)

View File

@@ -9,7 +9,6 @@ import textwrap
from lxml import html, etree
from ebook_converter import xml_entity_to_unicode
from ebook_converter.utils.cleantext import clean_ascii_chars, clean_xml_chars
from ebook_converter.ebooks import DRMError, unit_convert
from ebook_converter.ebooks.chardet import strip_encoding_declarations
@@ -183,7 +182,8 @@ class MobiReader(object):
self.processed_html)
self.processed_html = strip_encoding_declarations(self.processed_html)
self.processed_html = re.sub(r'&(\S+?);', xml_entity_to_unicode,
self.processed_html = re.sub(r'&(\S+?);',
entities.xml_entity_to_unicode,
self.processed_html)
image_name_map = self.extract_images(processed_records, output_dir)
self.replace_page_breaks()