1
0
mirror of https://github.com/gryf/ebook-converter.git synced 2026-03-05 00:15:54 +01:00

Added htmlz and pdf formats.

Added HTML reader/writer and PDF reader.
This commit is contained in:
2020-04-19 13:43:16 +02:00
parent ebeca30bda
commit d2159ed60c
17 changed files with 55 additions and 65 deletions

View File

@@ -41,8 +41,7 @@ class PDFInput(InputFormatPlugin):
PDFDocument(xml, self.opts, self.log)
return os.path.join(getcwd(), 'metadata.opf')
def convert(self, stream, options, file_ext, log,
accelerators):
def convert(self, stream, options, file_ext, log, accelerators):
from ebook_converter.ebooks.metadata.opf2 import OPFCreator
from ebook_converter.ebooks.pdf.pdftohtml import pdftohtml

View File

@@ -471,7 +471,7 @@ class HTMLPreProcessor(object):
return re.search('<H2[^><]*id=BookTitle', raw) is not None
def is_pdftohtml(self, src):
return '<!-- created by calibre\'s pdftohtml -->' in src[:1000]
return '<!-- created by ebook-converter\'s pdftohtml -->' in src[:1000]
def __call__(self, html, remove_special_chars=None,
get_preprocess_html=False):
@@ -627,7 +627,7 @@ class HTMLPreProcessor(object):
html = preprocessor(html)
if is_pdftohtml:
html = html.replace('<!-- created by calibre\'s pdftohtml -->', '')
html = html.replace('<!-- created by ebook-converter\'s pdftohtml -->', '')
if getattr(self.extra_opts, 'smarten_punctuation', False):
html = smarten_punctuation(html, self.log)

View File

@@ -43,7 +43,7 @@ class HeuristicProcessor(object):
self.common_in_text_beginnings = '[\\w\'\"“‘‛]'
def is_pdftohtml(self, src):
return '<!-- created by calibre\'s pdftohtml -->' in src[:1000]
return '<!-- created by ebook-converter\'s pdftohtml -->' in src[:1000]
def is_abbyy(self, src):
return '<meta name="generator" content="ABBYY FineReader' in src[:1000]