import os import pkg_resources import re import shutil from lxml import etree from ebook_converter.customize.conversion import OutputFormatPlugin, OptionRecommendation from ebook_converter.ebooks.oeb.base import element from ebook_converter import polyglot from ebook_converter.ptempfile import PersistentTemporaryDirectory from ebook_converter.utils.cleantext import clean_xml_chars from ebook_converter.utils import directory def relpath(*args): return os.path.relpath(*args).replace(os.sep, '/') class HTMLOutput(OutputFormatPlugin): name = 'HTML Output' author = 'Fabian Grassl' file_type = 'zip' commit_name = 'html_output' options = { OptionRecommendation(name='template_css', help='CSS file used for the output instead of the default file'), OptionRecommendation(name='template_html_index', help='Template used for generation of the HTML index file instead of the default file'), OptionRecommendation(name='template_html', help='Template used for the generation of the HTML contents of the book instead of the default file'), OptionRecommendation(name='extract_to', help='Extract the contents of the generated ZIP file to the ' 'specified directory. WARNING: The contents of the directory ' 'will be deleted.' ), } recommendations = {('pretty_print', True, OptionRecommendation.HIGH)} def generate_toc(self, oeb_book, ref_url, output_dir): ''' Generate table of contents ''' with directory.CurrentDir(output_dir): def build_node(current_node, parent=None): if parent is None: parent = etree.Element('ul') elif len(current_node.nodes): parent = element(parent, ('ul')) for node in current_node.nodes: point = element(parent, 'li') href = relpath(os.path.abspath(polyglot .unquote(node.href)), os.path.dirname(ref_url)) if isinstance(href, bytes): href = href.decode('utf-8') link = element(point, 'a', href=clean_xml_chars(href)) title = node.title if isinstance(title, bytes): title = title.decode('utf-8') if title: title = re.sub(r'\s+', ' ', title) link.text = clean_xml_chars(title) build_node(node, point) return parent wrap = etree.Element('div') wrap.append(build_node(oeb_book.toc)) return wrap def generate_html_toc(self, oeb_book, ref_url, output_dir): from lxml import etree root = self.generate_toc(oeb_book, ref_url, output_dir) return etree.tostring(root, pretty_print=True, encoding='unicode', xml_declaration=False) def convert(self, oeb_book, output_path, input_plugin, opts, log): from lxml import etree from ebook_converter.utils import zipfile from templite import Templite from ebook_converter.ebooks.html.meta import EasyMeta # read template files if opts.template_html_index is not None: with open(opts.template_html_index, 'rb') as f: template_html_index_data = f.read() else: with open(pkg_resources. resource_filename('ebook_converter', 'data/html_export_default_index.tmpl') ) as fobj: template_html_index_data = fobj.read().decode() if opts.template_html is not None: with open(opts.template_html, 'rb') as f: template_html_data = f.read() else: with open(pkg_resources. resource_filename('ebook_converter', 'data/html_export_default.tmpl') ) as fobj: template_html_data = fobj.read().decode() if opts.template_css is not None: with open(opts.template_css, 'rb') as f: template_css_data = f.read() else: with open(pkg_resources. resource_filename('ebook_converter', 'data/html_export_default.css') ) as fobj: template_css_data = fobj.read().decode() template_html_index_data = template_html_index_data.decode('utf-8') template_html_data = template_html_data.decode('utf-8') template_css_data = template_css_data.decode('utf-8') self.log = log self.opts = opts meta = EasyMeta(oeb_book.metadata) tempdir = os.path.realpath(PersistentTemporaryDirectory()) output_file = os.path.join(tempdir, os.path.basename(re.sub(r'\.zip', '', output_path)+'.html')) output_dir = re.sub(r'\.html', '', output_file)+'_files' if not os.path.exists(output_dir): os.makedirs(output_dir) css_path = output_dir+os.sep+'calibreHtmlOutBasicCss.css' with open(css_path, 'wb') as f: f.write(template_css_data.encode('utf-8')) with open(output_file, 'wb') as f: html_toc = self.generate_html_toc(oeb_book, output_file, output_dir) templite = Templite(template_html_index_data) nextLink = oeb_book.spine[0].href nextLink = relpath(output_dir+os.sep+nextLink, os.path.dirname(output_file)) cssLink = relpath(os.path.abspath(css_path), os.path.dirname(output_file)) tocUrl = relpath(output_file, os.path.dirname(output_file)) t = templite.render(has_toc=bool(oeb_book.toc.count()), toc=html_toc, meta=meta, nextLink=nextLink, tocUrl=tocUrl, cssLink=cssLink, firstContentPageLink=nextLink) if isinstance(t, str): t = t.encode('utf-8') f.write(t) with directory.CurrentDir(output_dir): for item in oeb_book.manifest: path = os.path.abspath(polyglot.unquote(item.href)) dir = os.path.dirname(path) if not os.path.exists(dir): os.makedirs(dir) if item.spine_position is not None: with open(path, 'wb') as f: pass else: with open(path, 'wb') as f: f.write(item.bytes_representation) item.unload_data_from_memory(memory=path) for item in oeb_book.spine: path = os.path.abspath(polyglot.unquote(item.href)) dir = os.path.dirname(path) root = item.data.getroottree() # get & clean HTML
-data head = root.xpath('//h:head', namespaces={'h': 'http://www.w3.org/1999/xhtml'})[0] head_content = etree.tostring(head, pretty_print=True, encoding='unicode') head_content = re.sub(r'\<\/?head.*\>', '', head_content) head_content = re.sub(re.compile(r'\