mirror of
https://github.com/gryf/ebook-converter.git
synced 2026-01-02 16:54:12 +01:00
Here is the first batch of modules, which are needed for converting several formats to LRF. Some of the logic has been change, more cleanups will follow.
166 lines
6.2 KiB
Python
166 lines
6.2 KiB
Python
# -*- coding: utf-8 -*-
|
|
from __future__ import absolute_import, division, print_function, unicode_literals
|
|
|
|
__license__ = 'GPL v3'
|
|
__copyright__ = '2009, John Schember <john@nachtimwald.com>'
|
|
__docformat__ = 'restructuredtext en'
|
|
|
|
import glob
|
|
import os
|
|
import shutil
|
|
|
|
from ebook_converter.customize.conversion import InputFormatPlugin
|
|
from ebook_converter.ptempfile import TemporaryDirectory
|
|
from ebook_converter.polyglot.builtins import getcwd
|
|
|
|
|
|
class PMLInput(InputFormatPlugin):
|
|
|
|
name = 'PML Input'
|
|
author = 'John Schember'
|
|
description = 'Convert PML to OEB'
|
|
# pmlz is a zip file containing pml files and png images.
|
|
file_types = {'pml', 'pmlz'}
|
|
commit_name = 'pml_input'
|
|
|
|
def process_pml(self, pml_path, html_path, close_all=False):
|
|
from ebook_converter.ebooks.pml.pmlconverter import PML_HTMLizer
|
|
|
|
pclose = False
|
|
hclose = False
|
|
|
|
if not hasattr(pml_path, 'read'):
|
|
pml_stream = lopen(pml_path, 'rb')
|
|
pclose = True
|
|
else:
|
|
pml_stream = pml_path
|
|
pml_stream.seek(0)
|
|
|
|
if not hasattr(html_path, 'write'):
|
|
html_stream = lopen(html_path, 'wb')
|
|
hclose = True
|
|
else:
|
|
html_stream = html_path
|
|
|
|
ienc = getattr(pml_stream, 'encoding', None)
|
|
if ienc is None:
|
|
ienc = 'cp1252'
|
|
if self.options.input_encoding:
|
|
ienc = self.options.input_encoding
|
|
|
|
self.log.debug('Converting PML to HTML...')
|
|
hizer = PML_HTMLizer()
|
|
html = hizer.parse_pml(pml_stream.read().decode(ienc), html_path)
|
|
html = '<html><head><title></title></head><body>%s</body></html>'%html
|
|
html_stream.write(html.encode('utf-8', 'replace'))
|
|
|
|
if pclose:
|
|
pml_stream.close()
|
|
if hclose:
|
|
html_stream.close()
|
|
|
|
return hizer.get_toc()
|
|
|
|
def get_images(self, stream, tdir, top_level=False):
|
|
images = []
|
|
imgs = []
|
|
|
|
if top_level:
|
|
imgs = glob.glob(os.path.join(tdir, '*.png'))
|
|
# Images not in top level try bookname_img directory because
|
|
# that's where Dropbook likes to see them.
|
|
if not imgs:
|
|
if hasattr(stream, 'name'):
|
|
imgs = glob.glob(os.path.join(tdir, os.path.splitext(os.path.basename(stream.name))[0] + '_img', '*.png'))
|
|
# No images in Dropbook location try generic images directory
|
|
if not imgs:
|
|
imgs = glob.glob(os.path.join(os.path.join(tdir, 'images'), '*.png'))
|
|
if imgs:
|
|
os.makedirs(os.path.join(getcwd(), 'images'))
|
|
for img in imgs:
|
|
pimg_name = os.path.basename(img)
|
|
pimg_path = os.path.join(getcwd(), 'images', pimg_name)
|
|
|
|
images.append('images/' + pimg_name)
|
|
|
|
shutil.copy(img, pimg_path)
|
|
|
|
return images
|
|
|
|
def convert(self, stream, options, file_ext, log,
|
|
accelerators):
|
|
from ebook_converter.ebooks.metadata.toc import TOC
|
|
from ebook_converter.ebooks.metadata.opf2 import OPFCreator
|
|
from ebook_converter.utils.zipfile import ZipFile
|
|
|
|
self.options = options
|
|
self.log = log
|
|
pages, images = [], []
|
|
toc = TOC()
|
|
|
|
if file_ext == 'pmlz':
|
|
log.debug('De-compressing content to temporary directory...')
|
|
with TemporaryDirectory('_unpmlz') as tdir:
|
|
zf = ZipFile(stream)
|
|
zf.extractall(tdir)
|
|
|
|
pmls = glob.glob(os.path.join(tdir, '*.pml'))
|
|
for pml in pmls:
|
|
html_name = os.path.splitext(os.path.basename(pml))[0]+'.html'
|
|
html_path = os.path.join(getcwd(), html_name)
|
|
|
|
pages.append(html_name)
|
|
log.debug('Processing PML item %s...' % pml)
|
|
ttoc = self.process_pml(pml, html_path)
|
|
toc += ttoc
|
|
images = self.get_images(stream, tdir, True)
|
|
else:
|
|
toc = self.process_pml(stream, 'index.html')
|
|
pages.append('index.html')
|
|
|
|
if hasattr(stream, 'name'):
|
|
images = self.get_images(stream, os.path.abspath(os.path.dirname(stream.name)))
|
|
|
|
# We want pages to be orded alphabetically.
|
|
pages.sort()
|
|
|
|
manifest_items = []
|
|
for item in pages+images:
|
|
manifest_items.append((item, None))
|
|
|
|
from ebook_converter.ebooks.metadata.meta import get_metadata
|
|
log.debug('Reading metadata from input file...')
|
|
mi = get_metadata(stream, 'pml')
|
|
if 'images/cover.png' in images:
|
|
mi.cover = 'images/cover.png'
|
|
opf = OPFCreator(getcwd(), mi)
|
|
log.debug('Generating manifest...')
|
|
opf.create_manifest(manifest_items)
|
|
opf.create_spine(pages)
|
|
opf.set_toc(toc)
|
|
with lopen('metadata.opf', 'wb') as opffile:
|
|
with lopen('toc.ncx', 'wb') as tocfile:
|
|
opf.render(opffile, tocfile, 'toc.ncx')
|
|
|
|
return os.path.join(getcwd(), 'metadata.opf')
|
|
|
|
def postprocess_book(self, oeb, opts, log):
|
|
from ebook_converter.ebooks.oeb.base import XHTML, barename
|
|
for item in oeb.spine:
|
|
if hasattr(item.data, 'xpath'):
|
|
for heading in item.data.iterdescendants(*map(XHTML, 'h1 h2 h3 h4 h5 h6'.split())):
|
|
if not len(heading):
|
|
continue
|
|
span = heading[0]
|
|
if not heading.text and not span.text and not len(span) and barename(span.tag) == 'span':
|
|
if not heading.get('id') and span.get('id'):
|
|
heading.set('id', span.get('id'))
|
|
heading.text = span.tail
|
|
heading.remove(span)
|
|
if len(heading) == 1 and heading[0].get('style') == 'text-align: center; margin: auto;':
|
|
div = heading[0]
|
|
if barename(div.tag) == 'div' and not len(div) and not div.get('id') and not heading.get('style'):
|
|
heading.text = (heading.text or '') + (div.text or '') + (div.tail or '')
|
|
heading.remove(div)
|
|
heading.set('style', 'text-align: center')
|