Convert calibre modules to ebook_converter.

Here is the first batch of modules, which are needed for converting several formats to LRF. Some of the logic has been change, more cleanups will follow.
2020-04-11 19:33:43 +02:00
parent 69d2e536c5
commit 0f9792df36
252 changed files with 1925 additions and 2344 deletions
@@ -10,11 +10,11 @@ import re

 from lxml import etree, html

-from calibre import xml_replace_entities, force_unicode
-from calibre.utils.xml_parse import safe_xml_fromstring
-from calibre.constants import filesystem_encoding
-from calibre.ebooks.chardet import xml_to_unicode, strip_encoding_declarations
-from polyglot.builtins import iteritems, itervalues, unicode_type, string_or_bytes, map
+from ebook_converter import xml_replace_entities, force_unicode
+from ebook_converter.utils.xml_parse import safe_xml_fromstring
+from ebook_converter.constants import filesystem_encoding
+from ebook_converter.ebooks.chardet import xml_to_unicode, strip_encoding_declarations
+from ebook_converter.polyglot.builtins import iteritems, itervalues, unicode_type, string_or_bytes, map

 RECOVER_PARSER = etree.XMLParser(recover=True, no_network=True, resolve_entities=False)
 XHTML_NS     = 'http://www.w3.org/1999/xhtml'
@@ -94,7 +94,7 @@ def node_depth(node):

 def html5_parse(data, max_nesting_depth=100):
    from html5_parser import parse
-    from calibre.utils.cleantext import clean_xml_chars
+    from ebook_converter.utils.cleantext import clean_xml_chars
    data = parse(clean_xml_chars(data), maybe_xhtml=True, keep_doctype=False, sanitize_names=True)
    # Check that the asinine HTML 5 algorithm did not result in a tree with
    # insane nesting depths
@@ -160,7 +160,7 @@ def check_for_html5(prefix, root):
 def parse_html(data, log=None, decoder=None, preprocessor=None,
        filename='<string>', non_html_file_tags=frozenset()):
    if log is None:
-        from calibre.utils.logging import default_log
+        from ebook_converter.utils.logging import default_log
        log = default_log

    filename = force_unicode(filename, enc=filesystem_encoding)