mirror of
https://github.com/gryf/ebook-converter.git
synced 2026-01-22 21:05:45 +01:00
This is progressing refactor of the calibre code to make it more readable, and transform it to something more coherent. In this patch, there are changes regarding imports for some modules, instead of polluting namespace of each module with some other modules symbols, which often were imported from other modules. Yuck.
63 lines
2.6 KiB
Python
63 lines
2.6 KiB
Python
import copy
|
|
|
|
from lxml import etree
|
|
|
|
from ebook_converter import constants as const
|
|
from ebook_converter.customize.conversion import InputFormatPlugin
|
|
|
|
|
|
class LITInput(InputFormatPlugin):
|
|
|
|
name = 'LIT Input'
|
|
author = 'Marshall T. Vandegrift'
|
|
description = 'Convert LIT files to HTML'
|
|
file_types = {'lit'}
|
|
commit_name = 'lit_input'
|
|
|
|
def convert(self, stream, options, file_ext, log,
|
|
accelerators):
|
|
from ebook_converter.ebooks.lit.reader import LitReader
|
|
from ebook_converter.ebooks.conversion.plumber import create_oebbook
|
|
self.log = log
|
|
return create_oebbook(log, stream, options, reader=LitReader)
|
|
|
|
def postprocess_book(self, oeb, opts, log):
|
|
from ebook_converter.ebooks.oeb.base import XPath, XHTML
|
|
for item in oeb.spine:
|
|
root = item.data
|
|
if not hasattr(root, 'xpath'):
|
|
continue
|
|
for bad in ('metadata', 'guide'):
|
|
metadata = XPath('//h:'+bad)(root)
|
|
if metadata:
|
|
for x in metadata:
|
|
x.getparent().remove(x)
|
|
body = XPath('//h:body')(root)
|
|
if body:
|
|
body = body[0]
|
|
if len(body) == 1 and body[0].tag == XHTML('pre'):
|
|
pre = body[0]
|
|
from ebook_converter.ebooks.txt.processor import \
|
|
convert_basic, separate_paragraphs_single_line
|
|
from ebook_converter.ebooks.chardet import xml_to_unicode
|
|
self.log('LIT file with all text in singe <pre> tag '
|
|
'detected')
|
|
html = separate_paragraphs_single_line(pre.text)
|
|
html = convert_basic(html).replace('<html>',
|
|
'<html xmlns="%s">' %
|
|
const.XHTML_NS)
|
|
html = xml_to_unicode(html, strip_encoding_pats=True,
|
|
resolve_entities=True)[0]
|
|
if opts.smarten_punctuation:
|
|
# SmartyPants skips text inside <pre> tags
|
|
from ebook_converter.ebooks.conversion import \
|
|
preprocess
|
|
html = preprocess.smarten_punctuation(html, self.log)
|
|
root = etree.fromstring(html)
|
|
body = XPath('//h:body')(root)
|
|
pre.tag = XHTML('div')
|
|
pre.text = ''
|
|
for elem in body:
|
|
ne = copy.deepcopy(elem)
|
|
pre.append(ne)
|