mirror of
https://github.com/gryf/ebook-converter.git
synced 2026-01-02 00:22:25 +01:00
Here is the first batch of modules, which are needed for converting several formats to LRF. Some of the logic has been change, more cleanups will follow.
65 lines
2.8 KiB
Python
65 lines
2.8 KiB
Python
#!/usr/bin/env python2
|
|
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
|
|
from __future__ import absolute_import, division, print_function, unicode_literals
|
|
|
|
__license__ = 'GPL v3'
|
|
__copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>'
|
|
__docformat__ = 'restructuredtext en'
|
|
|
|
from ebook_converter.customize.conversion import InputFormatPlugin
|
|
|
|
|
|
class LITInput(InputFormatPlugin):
|
|
|
|
name = 'LIT Input'
|
|
author = 'Marshall T. Vandegrift'
|
|
description = 'Convert LIT files to HTML'
|
|
file_types = {'lit'}
|
|
commit_name = 'lit_input'
|
|
|
|
def convert(self, stream, options, file_ext, log,
|
|
accelerators):
|
|
from ebook_converter.ebooks.lit.reader import LitReader
|
|
from ebook_converter.ebooks.conversion.plumber import create_oebbook
|
|
self.log = log
|
|
return create_oebbook(log, stream, options, reader=LitReader)
|
|
|
|
def postprocess_book(self, oeb, opts, log):
|
|
from ebook_converter.ebooks.oeb.base import XHTML_NS, XPath, XHTML
|
|
for item in oeb.spine:
|
|
root = item.data
|
|
if not hasattr(root, 'xpath'):
|
|
continue
|
|
for bad in ('metadata', 'guide'):
|
|
metadata = XPath('//h:'+bad)(root)
|
|
if metadata:
|
|
for x in metadata:
|
|
x.getparent().remove(x)
|
|
body = XPath('//h:body')(root)
|
|
if body:
|
|
body = body[0]
|
|
if len(body) == 1 and body[0].tag == XHTML('pre'):
|
|
pre = body[0]
|
|
from ebook_converter.ebooks.txt.processor import convert_basic, \
|
|
separate_paragraphs_single_line
|
|
from ebook_converter.ebooks.chardet import xml_to_unicode
|
|
from ebook_converter.utils.xml_parse import safe_xml_fromstring
|
|
import copy
|
|
self.log('LIT file with all text in singe <pre> tag detected')
|
|
html = separate_paragraphs_single_line(pre.text)
|
|
html = convert_basic(html).replace('<html>',
|
|
'<html xmlns="%s">'%XHTML_NS)
|
|
html = xml_to_unicode(html, strip_encoding_pats=True,
|
|
resolve_entities=True)[0]
|
|
if opts.smarten_punctuation:
|
|
# SmartyPants skips text inside <pre> tags
|
|
from ebook_converter.ebooks.conversion.preprocess import smarten_punctuation
|
|
html = smarten_punctuation(html, self.log)
|
|
root = safe_xml_fromstring(html)
|
|
body = XPath('//h:body')(root)
|
|
pre.tag = XHTML('div')
|
|
pre.text = ''
|
|
for elem in body:
|
|
ne = copy.deepcopy(elem)
|
|
pre.append(ne)
|