mirror of
https://github.com/gryf/ebook-converter.git
synced 2026-02-24 03:05:48 +01:00
Here is the first batch of modules, which are needed for converting several formats to LRF. Some of the logic has been change, more cleanups will follow.
338 lines
15 KiB
Python
338 lines
15 KiB
Python
#!/usr/bin/env python2
|
|
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
|
|
from __future__ import absolute_import, division, print_function, unicode_literals
|
|
|
|
__license__ = 'GPL v3'
|
|
__copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>'
|
|
__docformat__ = 'restructuredtext en'
|
|
|
|
from ebook_converter.customize.conversion import (OutputFormatPlugin,
|
|
OptionRecommendation)
|
|
from ebook_converter.polyglot.builtins import unicode_type
|
|
|
|
|
|
def remove_html_cover(oeb, log):
|
|
from ebook_converter.ebooks.oeb.base import OEB_DOCS
|
|
|
|
if not oeb.metadata.cover \
|
|
or 'cover' not in oeb.guide:
|
|
return
|
|
href = oeb.guide['cover'].href
|
|
del oeb.guide['cover']
|
|
item = oeb.manifest.hrefs[href]
|
|
if item.spine_position is not None:
|
|
log.warn('Found an HTML cover: ', item.href, 'removing it.',
|
|
'If you find some content missing from the output MOBI, it '
|
|
'is because you misidentified the HTML cover in the input '
|
|
'document')
|
|
oeb.spine.remove(item)
|
|
if item.media_type in OEB_DOCS:
|
|
oeb.manifest.remove(item)
|
|
|
|
|
|
def extract_mobi(output_path, opts):
|
|
if opts.extract_to is not None:
|
|
from ebook_converter.ebooks.mobi.debug.main import inspect_mobi
|
|
ddir = opts.extract_to
|
|
inspect_mobi(output_path, ddir=ddir)
|
|
|
|
|
|
class MOBIOutput(OutputFormatPlugin):
|
|
|
|
name = 'MOBI Output'
|
|
author = 'Kovid Goyal'
|
|
file_type = 'mobi'
|
|
commit_name = 'mobi_output'
|
|
ui_data = {'file_types': ['old', 'both', 'new']}
|
|
|
|
options = {
|
|
OptionRecommendation(name='prefer_author_sort',
|
|
recommended_value=False, level=OptionRecommendation.LOW,
|
|
help=_('When present, use author sort field as author.')
|
|
),
|
|
OptionRecommendation(name='no_inline_toc',
|
|
recommended_value=False, level=OptionRecommendation.LOW,
|
|
help=_('Don\'t add Table of Contents to the book. Useful if '
|
|
'the book has its own table of contents.')),
|
|
OptionRecommendation(name='toc_title', recommended_value=None,
|
|
help=_('Title for any generated in-line table of contents.')
|
|
),
|
|
OptionRecommendation(name='dont_compress',
|
|
recommended_value=False, level=OptionRecommendation.LOW,
|
|
help=_('Disable compression of the file contents.')
|
|
),
|
|
OptionRecommendation(name='personal_doc', recommended_value='[PDOC]',
|
|
help=_('Tag for MOBI files to be marked as personal documents.'
|
|
' This option has no effect on the conversion. It is used'
|
|
' only when sending MOBI files to a device. If the file'
|
|
' being sent has the specified tag, it will be marked as'
|
|
' a personal document when sent to the Kindle.')
|
|
),
|
|
OptionRecommendation(name='mobi_ignore_margins',
|
|
recommended_value=False,
|
|
help=_('Ignore margins in the input document. If False, then '
|
|
'the MOBI output plugin will try to convert margins specified'
|
|
' in the input document, otherwise it will ignore them.')
|
|
),
|
|
OptionRecommendation(name='mobi_toc_at_start',
|
|
recommended_value=False,
|
|
help=_('When adding the Table of Contents to the book, add it at the start of the '
|
|
'book instead of the end. Not recommended.')
|
|
),
|
|
OptionRecommendation(name='extract_to',
|
|
help=_('Extract the contents of the generated %s file to the '
|
|
'specified directory. The contents of the directory are first '
|
|
'deleted, so be careful.') % 'MOBI'
|
|
),
|
|
OptionRecommendation(name='share_not_sync', recommended_value=False,
|
|
help=_('Enable sharing of book content via Facebook etc. '
|
|
' on the Kindle. WARNING: Using this feature means that '
|
|
' the book will not auto sync its last read position '
|
|
' on multiple devices. Complain to Amazon.')
|
|
),
|
|
OptionRecommendation(name='mobi_keep_original_images',
|
|
recommended_value=False,
|
|
help=_('By default calibre converts all images to JPEG format '
|
|
'in the output MOBI file. This is for maximum compatibility '
|
|
'as some older MOBI viewers have problems with other image '
|
|
'formats. This option tells calibre not to do this. '
|
|
'Useful if your document contains lots of GIF/PNG images that '
|
|
'become very large when converted to JPEG.')),
|
|
OptionRecommendation(name='mobi_file_type', choices=ui_data['file_types'], recommended_value='old',
|
|
help=_('By default calibre generates MOBI files that contain the '
|
|
'old MOBI 6 format. This format is compatible with all '
|
|
'devices. However, by changing this setting, you can tell '
|
|
'calibre to generate MOBI files that contain both MOBI 6 and '
|
|
'the new KF8 format, or only the new KF8 format. KF8 has '
|
|
'more features than MOBI 6, but only works with newer Kindles. '
|
|
'Allowed values: {}').format('old, both, new')),
|
|
|
|
}
|
|
|
|
def check_for_periodical(self):
|
|
if self.is_periodical:
|
|
self.periodicalize_toc()
|
|
self.check_for_masthead()
|
|
self.opts.mobi_periodical = True
|
|
else:
|
|
self.opts.mobi_periodical = False
|
|
|
|
def check_for_masthead(self):
|
|
found = 'masthead' in self.oeb.guide
|
|
if not found:
|
|
from ebook_converter.ebooks import generate_masthead
|
|
self.oeb.log.debug('No masthead found in manifest, generating default mastheadImage...')
|
|
raw = generate_masthead(unicode_type(self.oeb.metadata['title'][0]))
|
|
id, href = self.oeb.manifest.generate('masthead', 'masthead')
|
|
self.oeb.manifest.add(id, href, 'image/gif', data=raw)
|
|
self.oeb.guide.add('masthead', 'Masthead Image', href)
|
|
else:
|
|
self.oeb.log.debug('Using mastheadImage supplied in manifest...')
|
|
|
|
def periodicalize_toc(self):
|
|
from ebook_converter.ebooks.oeb.base import TOC
|
|
toc = self.oeb.toc
|
|
if not toc or len(self.oeb.spine) < 3:
|
|
return
|
|
if toc and toc[0].klass != 'periodical':
|
|
one, two = self.oeb.spine[0], self.oeb.spine[1]
|
|
self.log('Converting TOC for MOBI periodical indexing...')
|
|
|
|
articles = {}
|
|
if toc.depth() < 3:
|
|
# single section periodical
|
|
self.oeb.manifest.remove(one)
|
|
self.oeb.manifest.remove(two)
|
|
sections = [TOC(klass='section', title=_('All articles'),
|
|
href=self.oeb.spine[0].href)]
|
|
for x in toc:
|
|
sections[0].nodes.append(x)
|
|
else:
|
|
# multi-section periodical
|
|
self.oeb.manifest.remove(one)
|
|
sections = list(toc)
|
|
for i,x in enumerate(sections):
|
|
x.klass = 'section'
|
|
articles_ = list(x)
|
|
if articles_:
|
|
self.oeb.manifest.remove(self.oeb.manifest.hrefs[x.href])
|
|
x.href = articles_[0].href
|
|
|
|
for sec in sections:
|
|
articles[id(sec)] = []
|
|
for a in list(sec):
|
|
a.klass = 'article'
|
|
articles[id(sec)].append(a)
|
|
sec.nodes.remove(a)
|
|
|
|
root = TOC(klass='periodical', href=self.oeb.spine[0].href,
|
|
title=unicode_type(self.oeb.metadata.title[0]))
|
|
|
|
for s in sections:
|
|
if articles[id(s)]:
|
|
for a in articles[id(s)]:
|
|
s.nodes.append(a)
|
|
root.nodes.append(s)
|
|
|
|
for x in list(toc.nodes):
|
|
toc.nodes.remove(x)
|
|
|
|
toc.nodes.append(root)
|
|
|
|
# Fix up the periodical href to point to first section href
|
|
toc.nodes[0].href = toc.nodes[0].nodes[0].href
|
|
|
|
def convert(self, oeb, output_path, input_plugin, opts, log):
|
|
from ebook_converter.ebooks.mobi.writer2.resources import Resources
|
|
self.log, self.opts, self.oeb = log, opts, oeb
|
|
|
|
mobi_type = opts.mobi_file_type
|
|
if self.is_periodical:
|
|
mobi_type = 'old' # Amazon does not support KF8 periodicals
|
|
create_kf8 = mobi_type in ('new', 'both')
|
|
|
|
remove_html_cover(self.oeb, self.log)
|
|
resources = Resources(oeb, opts, self.is_periodical,
|
|
add_fonts=create_kf8)
|
|
self.check_for_periodical()
|
|
|
|
if create_kf8:
|
|
from ebook_converter.ebooks.mobi.writer8.cleanup import remove_duplicate_anchors
|
|
remove_duplicate_anchors(self.oeb)
|
|
# Split on pagebreaks so that the resulting KF8 is faster to load
|
|
from ebook_converter.ebooks.oeb.transforms.split import Split
|
|
Split()(self.oeb, self.opts)
|
|
|
|
kf8 = self.create_kf8(resources, for_joint=mobi_type=='both'
|
|
) if create_kf8 else None
|
|
if mobi_type == 'new':
|
|
kf8.write(output_path)
|
|
extract_mobi(output_path, opts)
|
|
return
|
|
|
|
self.log('Creating MOBI 6 output')
|
|
self.write_mobi(input_plugin, output_path, kf8, resources)
|
|
|
|
def create_kf8(self, resources, for_joint=False):
|
|
from ebook_converter.ebooks.mobi.writer8.main import create_kf8_book
|
|
return create_kf8_book(self.oeb, self.opts, resources,
|
|
for_joint=for_joint)
|
|
|
|
def write_mobi(self, input_plugin, output_path, kf8, resources):
|
|
from ebook_converter.ebooks.mobi.mobiml import MobiMLizer
|
|
from ebook_converter.ebooks.oeb.transforms.manglecase import CaseMangler
|
|
from ebook_converter.ebooks.oeb.transforms.rasterize import SVGRasterizer, Unavailable
|
|
from ebook_converter.ebooks.oeb.transforms.htmltoc import HTMLTOCAdder
|
|
from ebook_converter.customize.ui import plugin_for_input_format
|
|
|
|
opts, oeb = self.opts, self.oeb
|
|
if not opts.no_inline_toc:
|
|
tocadder = HTMLTOCAdder(title=opts.toc_title, position='start' if
|
|
opts.mobi_toc_at_start else 'end')
|
|
tocadder(oeb, opts)
|
|
mangler = CaseMangler()
|
|
mangler(oeb, opts)
|
|
try:
|
|
rasterizer = SVGRasterizer()
|
|
rasterizer(oeb, opts)
|
|
except Unavailable:
|
|
self.log.warn('SVG rasterizer unavailable, SVG will not be converted')
|
|
else:
|
|
# Add rasterized SVG images
|
|
resources.add_extra_images()
|
|
if hasattr(self.oeb, 'inserted_metadata_jacket'):
|
|
self.workaround_fire_bugs(self.oeb.inserted_metadata_jacket)
|
|
mobimlizer = MobiMLizer(ignore_tables=opts.linearize_tables)
|
|
mobimlizer(oeb, opts)
|
|
write_page_breaks_after_item = input_plugin is not plugin_for_input_format('cbz')
|
|
from ebook_converter.ebooks.mobi.writer2.main import MobiWriter
|
|
writer = MobiWriter(opts, resources, kf8,
|
|
write_page_breaks_after_item=write_page_breaks_after_item)
|
|
writer(oeb, output_path)
|
|
extract_mobi(output_path, opts)
|
|
|
|
def specialize_css_for_output(self, log, opts, item, stylizer):
|
|
from ebook_converter.ebooks.mobi.writer8.cleanup import CSSCleanup
|
|
CSSCleanup(log, opts)(item, stylizer)
|
|
|
|
def workaround_fire_bugs(self, jacket):
|
|
# The idiotic Fire crashes when trying to render the table used to
|
|
# layout the jacket
|
|
from ebook_converter.ebooks.oeb.base import XHTML
|
|
for table in jacket.data.xpath('//*[local-name()="table"]'):
|
|
table.tag = XHTML('div')
|
|
for tr in table.xpath('descendant::*[local-name()="tr"]'):
|
|
cols = tr.xpath('descendant::*[local-name()="td"]')
|
|
tr.tag = XHTML('div')
|
|
for td in cols:
|
|
td.tag = XHTML('span' if cols else 'div')
|
|
|
|
|
|
class AZW3Output(OutputFormatPlugin):
|
|
|
|
name = 'AZW3 Output'
|
|
author = 'Kovid Goyal'
|
|
file_type = 'azw3'
|
|
commit_name = 'azw3_output'
|
|
|
|
options = {
|
|
OptionRecommendation(name='prefer_author_sort',
|
|
recommended_value=False, level=OptionRecommendation.LOW,
|
|
help=_('When present, use author sort field as author.')
|
|
),
|
|
OptionRecommendation(name='no_inline_toc',
|
|
recommended_value=False, level=OptionRecommendation.LOW,
|
|
help=_('Don\'t add Table of Contents to the book. Useful if '
|
|
'the book has its own table of contents.')),
|
|
OptionRecommendation(name='toc_title', recommended_value=None,
|
|
help=_('Title for any generated in-line table of contents.')
|
|
),
|
|
OptionRecommendation(name='dont_compress',
|
|
recommended_value=False, level=OptionRecommendation.LOW,
|
|
help=_('Disable compression of the file contents.')
|
|
),
|
|
OptionRecommendation(name='mobi_toc_at_start',
|
|
recommended_value=False,
|
|
help=_('When adding the Table of Contents to the book, add it at the start of the '
|
|
'book instead of the end. Not recommended.')
|
|
),
|
|
OptionRecommendation(name='extract_to',
|
|
help=_('Extract the contents of the generated %s file to the '
|
|
'specified directory. The contents of the directory are first '
|
|
'deleted, so be careful.') % 'AZW3'),
|
|
OptionRecommendation(name='share_not_sync', recommended_value=False,
|
|
help=_('Enable sharing of book content via Facebook etc. '
|
|
' on the Kindle. WARNING: Using this feature means that '
|
|
' the book will not auto sync its last read position '
|
|
' on multiple devices. Complain to Amazon.')
|
|
),
|
|
}
|
|
|
|
def convert(self, oeb, output_path, input_plugin, opts, log):
|
|
from ebook_converter.ebooks.mobi.writer2.resources import Resources
|
|
from ebook_converter.ebooks.mobi.writer8.main import create_kf8_book
|
|
from ebook_converter.ebooks.mobi.writer8.cleanup import remove_duplicate_anchors
|
|
|
|
self.oeb, self.opts, self.log = oeb, opts, log
|
|
opts.mobi_periodical = self.is_periodical
|
|
passthrough = getattr(opts, 'mobi_passthrough', False)
|
|
remove_duplicate_anchors(oeb)
|
|
|
|
resources = Resources(self.oeb, self.opts, self.is_periodical,
|
|
add_fonts=True, process_images=False)
|
|
if not passthrough:
|
|
remove_html_cover(self.oeb, self.log)
|
|
|
|
# Split on pagebreaks so that the resulting KF8 is faster to load
|
|
from ebook_converter.ebooks.oeb.transforms.split import Split
|
|
Split()(self.oeb, self.opts)
|
|
|
|
kf8 = create_kf8_book(self.oeb, self.opts, resources, for_joint=False)
|
|
|
|
kf8.write(output_path)
|
|
extract_mobi(output_path, opts)
|
|
|
|
def specialize_css_for_output(self, log, opts, item, stylizer):
|
|
from ebook_converter.ebooks.mobi.writer8.cleanup import CSSCleanup
|
|
CSSCleanup(log, opts)(item, stylizer)
|