mirror of
https://github.com/gryf/ebook-converter.git
synced 2026-03-05 00:15:54 +01:00
Added htmlz and pdf formats.
Added HTML reader/writer and PDF reader.
This commit is contained in:
@@ -41,8 +41,7 @@ class PDFInput(InputFormatPlugin):
|
||||
PDFDocument(xml, self.opts, self.log)
|
||||
return os.path.join(getcwd(), 'metadata.opf')
|
||||
|
||||
def convert(self, stream, options, file_ext, log,
|
||||
accelerators):
|
||||
def convert(self, stream, options, file_ext, log, accelerators):
|
||||
from ebook_converter.ebooks.metadata.opf2 import OPFCreator
|
||||
from ebook_converter.ebooks.pdf.pdftohtml import pdftohtml
|
||||
|
||||
|
||||
@@ -471,7 +471,7 @@ class HTMLPreProcessor(object):
|
||||
return re.search('<H2[^><]*id=BookTitle', raw) is not None
|
||||
|
||||
def is_pdftohtml(self, src):
|
||||
return '<!-- created by calibre\'s pdftohtml -->' in src[:1000]
|
||||
return '<!-- created by ebook-converter\'s pdftohtml -->' in src[:1000]
|
||||
|
||||
def __call__(self, html, remove_special_chars=None,
|
||||
get_preprocess_html=False):
|
||||
@@ -627,7 +627,7 @@ class HTMLPreProcessor(object):
|
||||
html = preprocessor(html)
|
||||
|
||||
if is_pdftohtml:
|
||||
html = html.replace('<!-- created by calibre\'s pdftohtml -->', '')
|
||||
html = html.replace('<!-- created by ebook-converter\'s pdftohtml -->', '')
|
||||
|
||||
if getattr(self.extra_opts, 'smarten_punctuation', False):
|
||||
html = smarten_punctuation(html, self.log)
|
||||
|
||||
@@ -43,7 +43,7 @@ class HeuristicProcessor(object):
|
||||
self.common_in_text_beginnings = '[\\w\'\"“‘‛]'
|
||||
|
||||
def is_pdftohtml(self, src):
|
||||
return '<!-- created by calibre\'s pdftohtml -->' in src[:1000]
|
||||
return '<!-- created by ebook-converter\'s pdftohtml -->' in src[:1000]
|
||||
|
||||
def is_abbyy(self, src):
|
||||
return '<meta name="generator" content="ABBYY FineReader' in src[:1000]
|
||||
|
||||
Reference in New Issue
Block a user