1
0
mirror of https://github.com/gryf/ebook-converter.git synced 2026-02-23 02:25:53 +01:00

Convert calibre modules to ebook_converter.

Here is the first batch of modules, which are needed for converting
several formats to LRF. Some of the logic has been change, more cleanups
will follow.
This commit is contained in:
2020-04-11 19:33:43 +02:00
parent 69d2e536c5
commit 0f9792df36
252 changed files with 1925 additions and 2344 deletions

View File

@@ -8,10 +8,10 @@ __docformat__ = 'restructuredtext en'
import re
from math import ceil
from calibre.ebooks.conversion.preprocess import DocAnalysis, Dehyphenator
from calibre.utils.logging import default_log
from calibre.utils.wordcount import get_wordcount_obj
from polyglot.builtins import unicode_type
from ebook_converter.ebooks.conversion.preprocess import DocAnalysis, Dehyphenator
from ebook_converter.utils.logging import default_log
from ebook_converter.utils.wordcount import get_wordcount_obj
from ebook_converter.polyglot.builtins import unicode_type
class HeuristicProcessor(object):
@@ -49,7 +49,7 @@ class HeuristicProcessor(object):
return '<meta name="generator" content="ABBYY FineReader' in src[:1000]
def chapter_head(self, match):
from calibre.utils.html2text import html2text
from ebook_converter.utils.html2text import html2text
chap = match.group('chap')
title = match.group('title')
if not title:
@@ -400,7 +400,7 @@ class HeuristicProcessor(object):
return content
def txt_process(self, match):
from calibre.ebooks.txt.processor import convert_basic, separate_paragraphs_single_line
from ebook_converter.ebooks.txt.processor import convert_basic, separate_paragraphs_single_line
content = match.group('text')
content = separate_paragraphs_single_line(content)
content = convert_basic(content, epub_split_size_kb=0)
@@ -412,7 +412,7 @@ class HeuristicProcessor(object):
self.log.debug("Running Text Processing")
outerhtml = re.compile(r'.*?(?<=<pre>)(?P<text>.*?)</pre>', re.IGNORECASE|re.DOTALL)
html = outerhtml.sub(self.txt_process, html)
from calibre.ebooks.conversion.preprocess import convert_entities
from ebook_converter.ebooks.conversion.preprocess import convert_entities
html = re.sub(r'&(\S+?);', convert_entities, html)
else:
# Add markup naively
@@ -616,7 +616,7 @@ class HeuristicProcessor(object):
elif re.match('^<img', replacement_break):
scene_break = self.scene_break_open+replacement_break+'</p>'
else:
from calibre.utils.html2text import html2text
from ebook_converter.utils.html2text import html2text
replacement_break = html2text(replacement_break)
replacement_break = re.sub('\\s', '&nbsp;', replacement_break)
scene_break = self.scene_break_open+replacement_break+'</p>'