From dc4352fd4cbe27dcde1eac147330e6d7e939a682 Mon Sep 17 00:00:00 2001 From: gryf Date: Sun, 3 Jan 2021 19:35:03 +0100 Subject: [PATCH] Moved replace_entities to utils.entities. --- ebook_converter/__init__.py | 5 ----- ebook_converter/ebooks/html/input.py | 4 ++-- ebook_converter/ebooks/metadata/html.py | 5 +++-- ebook_converter/ebooks/mobi/reader/headers.py | 7 ++++--- ebook_converter/ebooks/mobi/reader/ncx.py | 9 +++------ ebook_converter/ebooks/oeb/polish/utils.py | 4 ++-- 6 files changed, 14 insertions(+), 20 deletions(-) diff --git a/ebook_converter/__init__.py b/ebook_converter/__init__.py index d6eece9..86292fd 100644 --- a/ebook_converter/__init__.py +++ b/ebook_converter/__init__.py @@ -29,11 +29,6 @@ class CurrentDir(object): _ent_pat = re.compile(r'&(\S+?);') -def replace_entities(raw, encoding='cp1252'): - return _ent_pat.sub(partial(entities.entity_to_unicode, encoding=encoding), - raw) - - def xml_replace_entities(raw, encoding='cp1252'): return _ent_pat.sub(partial(entities.xml_entity_to_unicode, encoding=encoding), raw) diff --git a/ebook_converter/ebooks/html/input.py b/ebook_converter/ebooks/html/input.py index d327669..a5279cf 100644 --- a/ebook_converter/ebooks/html/input.py +++ b/ebook_converter/ebooks/html/input.py @@ -8,7 +8,7 @@ import sys import urllib.parse from ebook_converter.ebooks.chardet import detect_xml_encoding -from ebook_converter import replace_entities +from ebook_converter.utils import entities class Link(object): @@ -154,7 +154,7 @@ class HTMLFile(object): url = match.group(i) if url: break - url = replace_entities(url) + url = entities.replace_entities(url) try: link = self.resolve(url) except ValueError: diff --git a/ebook_converter/ebooks/metadata/html.py b/ebook_converter/ebooks/metadata/html.py index 46dcb95..6825323 100644 --- a/ebook_converter/ebooks/metadata/html.py +++ b/ebook_converter/ebooks/metadata/html.py @@ -11,7 +11,7 @@ from lxml.etree import Comment from ebook_converter.ebooks.metadata import string_to_authors, authors_to_string from ebook_converter.ebooks.metadata.book.base import Metadata from ebook_converter.ebooks.chardet import xml_to_unicode -from ebook_converter import replace_entities +from ebook_converter.utils import entities from ebook_converter.utils.date import parse_date, is_date_undefined @@ -73,7 +73,8 @@ def handle_comment(data, comment_tags): except KeyError: pass if field: - comment_tags[field].append(replace_entities(match.group('content'))) + comment_tags[field].append( + entities.replace_entities(match.group('content'))) def parse_metadata(src): diff --git a/ebook_converter/ebooks/mobi/reader/headers.py b/ebook_converter/ebooks/mobi/reader/headers.py index 790f90a..d6524d2 100644 --- a/ebook_converter/ebooks/mobi/reader/headers.py +++ b/ebook_converter/ebooks/mobi/reader/headers.py @@ -1,13 +1,13 @@ import struct, re, os -from ebook_converter import replace_entities from ebook_converter.utils.date import parse_date from ebook_converter.ebooks.mobi import MobiError from ebook_converter.ebooks.metadata import MetaInformation, check_isbn from ebook_converter.ebooks.mobi.langcodes import main_language, sub_language, mobi2iana from ebook_converter.utils.cleantext import clean_ascii_chars, clean_xml_chars -from ebook_converter.utils.localization import canonicalize_lang from ebook_converter.utils.config_base import tweaks +from ebook_converter.utils import entities +from ebook_converter.utils.localization import canonicalize_lang __license__ = 'GPL v3' @@ -106,7 +106,8 @@ class EXTHHeader(object): # {{{ # else: # print 'unknown record', idx, repr(content) if title: - self.mi.title = replace_entities(clean_xml_chars(clean_ascii_chars(title))) + title = clean_xml_chars(clean_ascii_chars(title)) + self.mi.title = entities.replace_entities(title) def process_metadata(self, idx, content, codec): if idx == 100: diff --git a/ebook_converter/ebooks/mobi/reader/ncx.py b/ebook_converter/ebooks/mobi/reader/ncx.py index bd0b0d7..98a6690 100644 --- a/ebook_converter/ebooks/mobi/reader/ncx.py +++ b/ebook_converter/ebooks/mobi/reader/ncx.py @@ -1,15 +1,11 @@ import os -from ebook_converter import replace_entities from ebook_converter.ebooks.metadata.toc import TOC from ebook_converter.ebooks.mobi.reader.headers import NULL_INDEX from ebook_converter.ebooks.mobi.reader.index import read_index +from ebook_converter.utils import entities -__license__ = 'GPL v3' -__copyright__ = '2012, Kovid Goyal ' -__docformat__ = 'restructuredtext en' - tag_fieldname_map = { 1: ['pos',0], 2: ['len',0], @@ -87,7 +83,8 @@ def build_toc(index_entries): for item in level_map[lvl]: parent = num_map[item['parent']] child = parent.add_item(item['href'], item['idtag'], - replace_entities(item['text'], encoding=None)) + entities.replace_entities(item['text'], + encoding=None)) num_map[item['num']] = child # Set play orders in depth first order diff --git a/ebook_converter/ebooks/oeb/polish/utils.py b/ebook_converter/ebooks/oeb/polish/utils.py index c3077f5..d0e2d97 100644 --- a/ebook_converter/ebooks/oeb/polish/utils.py +++ b/ebook_converter/ebooks/oeb/polish/utils.py @@ -3,7 +3,7 @@ import os import re import mimetypes -from ebook_converter import replace_entities +from ebook_converter.utils import entities def _upper(string): @@ -185,7 +185,7 @@ def parse_css(data, fname='', is_declaration=False, decode=None, log_lev def handle_entities(text, func): - return func(replace_entities(text)) + return func(entities.replace_entities(text)) def apply_func_to_match_groups(match, func=_upper,