mirror of
https://github.com/gryf/ebook-converter.git
synced 2026-01-02 16:54:12 +01:00
Here is the first batch of modules, which are needed for converting several formats to LRF. Some of the logic has been change, more cleanups will follow.
227 lines
9.4 KiB
Python
227 lines
9.4 KiB
Python
from __future__ import absolute_import, division, print_function, unicode_literals
|
|
|
|
__license__ = 'GPL 3'
|
|
__copyright__ = '2010, Fabian Grassl <fg@jusmeum.de>'
|
|
__docformat__ = 'restructuredtext en'
|
|
|
|
import os, re, shutil
|
|
from os.path import dirname, abspath, relpath as _relpath, exists, basename
|
|
|
|
from ebook_converter.customize.conversion import OutputFormatPlugin, OptionRecommendation
|
|
from ebook_converter import CurrentDir
|
|
from ebook_converter.ptempfile import PersistentTemporaryDirectory
|
|
from ebook_converter.polyglot.builtins import unicode_type
|
|
|
|
|
|
def relpath(*args):
|
|
return _relpath(*args).replace(os.sep, '/')
|
|
|
|
|
|
class HTMLOutput(OutputFormatPlugin):
|
|
|
|
name = 'HTML Output'
|
|
author = 'Fabian Grassl'
|
|
file_type = 'zip'
|
|
commit_name = 'html_output'
|
|
|
|
options = {
|
|
OptionRecommendation(name='template_css',
|
|
help=_('CSS file used for the output instead of the default file')),
|
|
|
|
OptionRecommendation(name='template_html_index',
|
|
help=_('Template used for generation of the HTML index file instead of the default file')),
|
|
|
|
OptionRecommendation(name='template_html',
|
|
help=_('Template used for the generation of the HTML contents of the book instead of the default file')),
|
|
|
|
OptionRecommendation(name='extract_to',
|
|
help=_('Extract the contents of the generated ZIP file to the '
|
|
'specified directory. WARNING: The contents of the directory '
|
|
'will be deleted.')
|
|
),
|
|
}
|
|
|
|
recommendations = {('pretty_print', True, OptionRecommendation.HIGH)}
|
|
|
|
def generate_toc(self, oeb_book, ref_url, output_dir):
|
|
'''
|
|
Generate table of contents
|
|
'''
|
|
from lxml import etree
|
|
from ebook_converter.polyglot.urllib import unquote
|
|
|
|
from ebook_converter.ebooks.oeb.base import element
|
|
from ebook_converter.utils.cleantext import clean_xml_chars
|
|
with CurrentDir(output_dir):
|
|
def build_node(current_node, parent=None):
|
|
if parent is None:
|
|
parent = etree.Element('ul')
|
|
elif len(current_node.nodes):
|
|
parent = element(parent, ('ul'))
|
|
for node in current_node.nodes:
|
|
point = element(parent, 'li')
|
|
href = relpath(abspath(unquote(node.href)), dirname(ref_url))
|
|
if isinstance(href, bytes):
|
|
href = href.decode('utf-8')
|
|
link = element(point, 'a', href=clean_xml_chars(href))
|
|
title = node.title
|
|
if isinstance(title, bytes):
|
|
title = title.decode('utf-8')
|
|
if title:
|
|
title = re.sub(r'\s+', ' ', title)
|
|
link.text = clean_xml_chars(title)
|
|
build_node(node, point)
|
|
return parent
|
|
wrap = etree.Element('div')
|
|
wrap.append(build_node(oeb_book.toc))
|
|
return wrap
|
|
|
|
def generate_html_toc(self, oeb_book, ref_url, output_dir):
|
|
from lxml import etree
|
|
|
|
root = self.generate_toc(oeb_book, ref_url, output_dir)
|
|
return etree.tostring(root, pretty_print=True, encoding='unicode',
|
|
xml_declaration=False)
|
|
|
|
def convert(self, oeb_book, output_path, input_plugin, opts, log):
|
|
from lxml import etree
|
|
from ebook_converter.utils import zipfile
|
|
from templite import Templite
|
|
from ebook_converter.polyglot.urllib import unquote
|
|
from ebook_converter.ebooks.html.meta import EasyMeta
|
|
|
|
# read template files
|
|
if opts.template_html_index is not None:
|
|
with open(opts.template_html_index, 'rb') as f:
|
|
template_html_index_data = f.read()
|
|
else:
|
|
template_html_index_data = P('templates/html_export_default_index.tmpl', data=True)
|
|
|
|
if opts.template_html is not None:
|
|
with open(opts.template_html, 'rb') as f:
|
|
template_html_data = f.read()
|
|
else:
|
|
template_html_data = P('templates/html_export_default.tmpl', data=True)
|
|
|
|
if opts.template_css is not None:
|
|
with open(opts.template_css, 'rb') as f:
|
|
template_css_data = f.read()
|
|
else:
|
|
template_css_data = P('templates/html_export_default.css', data=True)
|
|
|
|
template_html_index_data = template_html_index_data.decode('utf-8')
|
|
template_html_data = template_html_data.decode('utf-8')
|
|
template_css_data = template_css_data.decode('utf-8')
|
|
|
|
self.log = log
|
|
self.opts = opts
|
|
meta = EasyMeta(oeb_book.metadata)
|
|
|
|
tempdir = os.path.realpath(PersistentTemporaryDirectory())
|
|
output_file = os.path.join(tempdir,
|
|
basename(re.sub(r'\.zip', '', output_path)+'.html'))
|
|
output_dir = re.sub(r'\.html', '', output_file)+'_files'
|
|
|
|
if not exists(output_dir):
|
|
os.makedirs(output_dir)
|
|
|
|
css_path = output_dir+os.sep+'calibreHtmlOutBasicCss.css'
|
|
with open(css_path, 'wb') as f:
|
|
f.write(template_css_data.encode('utf-8'))
|
|
|
|
with open(output_file, 'wb') as f:
|
|
html_toc = self.generate_html_toc(oeb_book, output_file, output_dir)
|
|
templite = Templite(template_html_index_data)
|
|
nextLink = oeb_book.spine[0].href
|
|
nextLink = relpath(output_dir+os.sep+nextLink, dirname(output_file))
|
|
cssLink = relpath(abspath(css_path), dirname(output_file))
|
|
tocUrl = relpath(output_file, dirname(output_file))
|
|
t = templite.render(has_toc=bool(oeb_book.toc.count()),
|
|
toc=html_toc, meta=meta, nextLink=nextLink,
|
|
tocUrl=tocUrl, cssLink=cssLink,
|
|
firstContentPageLink=nextLink)
|
|
if isinstance(t, unicode_type):
|
|
t = t.encode('utf-8')
|
|
f.write(t)
|
|
|
|
with CurrentDir(output_dir):
|
|
for item in oeb_book.manifest:
|
|
path = abspath(unquote(item.href))
|
|
dir = dirname(path)
|
|
if not exists(dir):
|
|
os.makedirs(dir)
|
|
if item.spine_position is not None:
|
|
with open(path, 'wb') as f:
|
|
pass
|
|
else:
|
|
with open(path, 'wb') as f:
|
|
f.write(item.bytes_representation)
|
|
item.unload_data_from_memory(memory=path)
|
|
|
|
for item in oeb_book.spine:
|
|
path = abspath(unquote(item.href))
|
|
dir = dirname(path)
|
|
root = item.data.getroottree()
|
|
|
|
# get & clean HTML <HEAD>-data
|
|
head = root.xpath('//h:head', namespaces={'h': 'http://www.w3.org/1999/xhtml'})[0]
|
|
head_content = etree.tostring(head, pretty_print=True, encoding='unicode')
|
|
head_content = re.sub(r'\<\/?head.*\>', '', head_content)
|
|
head_content = re.sub(re.compile(r'\<style.*\/style\>', re.M|re.S), '', head_content)
|
|
head_content = re.sub(r'<(title)([^>]*)/>', r'<\1\2></\1>', head_content)
|
|
|
|
# get & clean HTML <BODY>-data
|
|
body = root.xpath('//h:body', namespaces={'h': 'http://www.w3.org/1999/xhtml'})[0]
|
|
ebook_content = etree.tostring(body, pretty_print=True, encoding='unicode')
|
|
ebook_content = re.sub(r'\<\/?body.*\>', '', ebook_content)
|
|
ebook_content = re.sub(r'<(div|a|span)([^>]*)/>', r'<\1\2></\1>', ebook_content)
|
|
|
|
# generate link to next page
|
|
if item.spine_position+1 < len(oeb_book.spine):
|
|
nextLink = oeb_book.spine[item.spine_position+1].href
|
|
nextLink = relpath(abspath(nextLink), dir)
|
|
else:
|
|
nextLink = None
|
|
|
|
# generate link to previous page
|
|
if item.spine_position > 0:
|
|
prevLink = oeb_book.spine[item.spine_position-1].href
|
|
prevLink = relpath(abspath(prevLink), dir)
|
|
else:
|
|
prevLink = None
|
|
|
|
cssLink = relpath(abspath(css_path), dir)
|
|
tocUrl = relpath(output_file, dir)
|
|
firstContentPageLink = oeb_book.spine[0].href
|
|
|
|
# render template
|
|
templite = Templite(template_html_data)
|
|
toc = lambda: self.generate_html_toc(oeb_book, path, output_dir)
|
|
t = templite.render(ebookContent=ebook_content,
|
|
prevLink=prevLink, nextLink=nextLink,
|
|
has_toc=bool(oeb_book.toc.count()), toc=toc,
|
|
tocUrl=tocUrl, head_content=head_content,
|
|
meta=meta, cssLink=cssLink,
|
|
firstContentPageLink=firstContentPageLink)
|
|
|
|
# write html to file
|
|
with open(path, 'wb') as f:
|
|
f.write(t.encode('utf-8'))
|
|
item.unload_data_from_memory(memory=path)
|
|
|
|
zfile = zipfile.ZipFile(output_path, "w")
|
|
zfile.add_dir(output_dir, basename(output_dir))
|
|
zfile.write(output_file, basename(output_file), zipfile.ZIP_DEFLATED)
|
|
|
|
if opts.extract_to:
|
|
if os.path.exists(opts.extract_to):
|
|
shutil.rmtree(opts.extract_to)
|
|
os.makedirs(opts.extract_to)
|
|
zfile.extractall(opts.extract_to)
|
|
self.log('Zip file extracted to', opts.extract_to)
|
|
|
|
zfile.close()
|
|
|
|
# cleanup temp dir
|
|
shutil.rmtree(tempdir)
|