Added htmlz and pdf formats.

Added HTML reader/writer and PDF reader.
2020-04-19 13:43:16 +02:00
parent ebeca30bda
commit d2159ed60c
17 changed files with 55 additions and 65 deletions
@@ -41,8 +41,7 @@ class PDFInput(InputFormatPlugin):
        PDFDocument(xml, self.opts, self.log)
        return os.path.join(getcwd(), 'metadata.opf')

-    def convert(self, stream, options, file_ext, log,
-                accelerators):
+    def convert(self, stream, options, file_ext, log, accelerators):
        from ebook_converter.ebooks.metadata.opf2 import OPFCreator
        from ebook_converter.ebooks.pdf.pdftohtml import pdftohtml

@@ -471,7 +471,7 @@ class HTMLPreProcessor(object):
        return re.search('<H2[^><]*id=BookTitle', raw) is not None

    def is_pdftohtml(self, src):
-        return '<!-- created by calibre\'s pdftohtml -->' in src[:1000]
+        return '<!-- created by ebook-converter\'s pdftohtml -->' in src[:1000]

    def __call__(self, html, remove_special_chars=None,
            get_preprocess_html=False):
@@ -627,7 +627,7 @@ class HTMLPreProcessor(object):
            html = preprocessor(html)

        if is_pdftohtml:
-            html = html.replace('<!-- created by calibre\'s pdftohtml -->', '')
+            html = html.replace('<!-- created by ebook-converter\'s pdftohtml -->', '')

        if getattr(self.extra_opts, 'smarten_punctuation', False):
            html = smarten_punctuation(html, self.log)
@@ -43,7 +43,7 @@ class HeuristicProcessor(object):
        self.common_in_text_beginnings = '[\\w\'\"“‘‛]'

    def is_pdftohtml(self, src):
-        return '<!-- created by calibre\'s pdftohtml -->' in src[:1000]
+        return '<!-- created by ebook-converter\'s pdftohtml -->' in src[:1000]

    def is_abbyy(self, src):
        return '<meta name="generator" content="ABBYY FineReader' in src[:1000]