mirror of
https://github.com/gryf/ebook-converter.git
synced 2026-01-04 18:14:11 +01:00
334 lines
15 KiB
Python
334 lines
15 KiB
Python
from ebook_converter.customize.conversion import (OutputFormatPlugin,
|
|
OptionRecommendation)
|
|
|
|
|
|
__license__ = 'GPL v3'
|
|
__copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>'
|
|
__docformat__ = 'restructuredtext en'
|
|
|
|
|
|
def remove_html_cover(oeb, log):
|
|
from ebook_converter.ebooks.oeb.base import OEB_DOCS
|
|
|
|
if not oeb.metadata.cover \
|
|
or 'cover' not in oeb.guide:
|
|
return
|
|
href = oeb.guide['cover'].href
|
|
del oeb.guide['cover']
|
|
item = oeb.manifest.hrefs[href]
|
|
if item.spine_position is not None:
|
|
log.warn('Found an HTML cover: ', item.href, 'removing it.',
|
|
'If you find some content missing from the output MOBI, it '
|
|
'is because you misidentified the HTML cover in the input '
|
|
'document')
|
|
oeb.spine.remove(item)
|
|
if item.media_type in OEB_DOCS:
|
|
oeb.manifest.remove(item)
|
|
|
|
|
|
def extract_mobi(output_path, opts):
|
|
if opts.extract_to is not None:
|
|
from ebook_converter.ebooks.mobi.debug.main import inspect_mobi
|
|
ddir = opts.extract_to
|
|
inspect_mobi(output_path, ddir=ddir)
|
|
|
|
|
|
class MOBIOutput(OutputFormatPlugin):
|
|
|
|
name = 'MOBI Output'
|
|
author = 'Kovid Goyal'
|
|
file_type = 'mobi'
|
|
commit_name = 'mobi_output'
|
|
ui_data = {'file_types': ['old', 'both', 'new']}
|
|
|
|
options = {
|
|
OptionRecommendation(name='prefer_author_sort',
|
|
recommended_value=False, level=OptionRecommendation.LOW,
|
|
help=_('When present, use author sort field as author.')
|
|
),
|
|
OptionRecommendation(name='no_inline_toc',
|
|
recommended_value=False, level=OptionRecommendation.LOW,
|
|
help=_('Don\'t add Table of Contents to the book. Useful if '
|
|
'the book has its own table of contents.')),
|
|
OptionRecommendation(name='toc_title', recommended_value=None,
|
|
help=_('Title for any generated in-line table of contents.')
|
|
),
|
|
OptionRecommendation(name='dont_compress',
|
|
recommended_value=False, level=OptionRecommendation.LOW,
|
|
help=_('Disable compression of the file contents.')
|
|
),
|
|
OptionRecommendation(name='personal_doc', recommended_value='[PDOC]',
|
|
help=_('Tag for MOBI files to be marked as personal documents.'
|
|
' This option has no effect on the conversion. It is used'
|
|
' only when sending MOBI files to a device. If the file'
|
|
' being sent has the specified tag, it will be marked as'
|
|
' a personal document when sent to the Kindle.')
|
|
),
|
|
OptionRecommendation(name='mobi_ignore_margins',
|
|
recommended_value=False,
|
|
help=_('Ignore margins in the input document. If False, then '
|
|
'the MOBI output plugin will try to convert margins specified'
|
|
' in the input document, otherwise it will ignore them.')
|
|
),
|
|
OptionRecommendation(name='mobi_toc_at_start',
|
|
recommended_value=False,
|
|
help=_('When adding the Table of Contents to the book, add it at the start of the '
|
|
'book instead of the end. Not recommended.')
|
|
),
|
|
OptionRecommendation(name='extract_to',
|
|
help=_('Extract the contents of the generated %s file to the '
|
|
'specified directory. The contents of the directory are first '
|
|
'deleted, so be careful.') % 'MOBI'
|
|
),
|
|
OptionRecommendation(name='share_not_sync', recommended_value=False,
|
|
help=_('Enable sharing of book content via Facebook etc. '
|
|
' on the Kindle. WARNING: Using this feature means that '
|
|
' the book will not auto sync its last read position '
|
|
' on multiple devices. Complain to Amazon.')
|
|
),
|
|
OptionRecommendation(name='mobi_keep_original_images',
|
|
recommended_value=False,
|
|
help=_('By default calibre converts all images to JPEG format '
|
|
'in the output MOBI file. This is for maximum compatibility '
|
|
'as some older MOBI viewers have problems with other image '
|
|
'formats. This option tells calibre not to do this. '
|
|
'Useful if your document contains lots of GIF/PNG images that '
|
|
'become very large when converted to JPEG.')),
|
|
OptionRecommendation(name='mobi_file_type', choices=ui_data['file_types'], recommended_value='old',
|
|
help=_('By default calibre generates MOBI files that contain the '
|
|
'old MOBI 6 format. This format is compatible with all '
|
|
'devices. However, by changing this setting, you can tell '
|
|
'calibre to generate MOBI files that contain both MOBI 6 and '
|
|
'the new KF8 format, or only the new KF8 format. KF8 has '
|
|
'more features than MOBI 6, but only works with newer Kindles. '
|
|
'Allowed values: {}').format('old, both, new')),
|
|
|
|
}
|
|
|
|
def check_for_periodical(self):
|
|
if self.is_periodical:
|
|
self.periodicalize_toc()
|
|
self.check_for_masthead()
|
|
self.opts.mobi_periodical = True
|
|
else:
|
|
self.opts.mobi_periodical = False
|
|
|
|
def check_for_masthead(self):
|
|
found = 'masthead' in self.oeb.guide
|
|
if not found:
|
|
from ebook_converter.ebooks import generate_masthead
|
|
self.oeb.log.debug('No masthead found in manifest, generating default mastheadImage...')
|
|
raw = generate_masthead(str(self.oeb.metadata['title'][0]))
|
|
id, href = self.oeb.manifest.generate('masthead', 'masthead')
|
|
self.oeb.manifest.add(id, href, 'image/gif', data=raw)
|
|
self.oeb.guide.add('masthead', 'Masthead Image', href)
|
|
else:
|
|
self.oeb.log.debug('Using mastheadImage supplied in manifest...')
|
|
|
|
def periodicalize_toc(self):
|
|
from ebook_converter.ebooks.oeb.base import TOC
|
|
toc = self.oeb.toc
|
|
if not toc or len(self.oeb.spine) < 3:
|
|
return
|
|
if toc and toc[0].klass != 'periodical':
|
|
one, two = self.oeb.spine[0], self.oeb.spine[1]
|
|
self.log('Converting TOC for MOBI periodical indexing...')
|
|
|
|
articles = {}
|
|
if toc.depth() < 3:
|
|
# single section periodical
|
|
self.oeb.manifest.remove(one)
|
|
self.oeb.manifest.remove(two)
|
|
sections = [TOC(klass='section', title=_('All articles'),
|
|
href=self.oeb.spine[0].href)]
|
|
for x in toc:
|
|
sections[0].nodes.append(x)
|
|
else:
|
|
# multi-section periodical
|
|
self.oeb.manifest.remove(one)
|
|
sections = list(toc)
|
|
for i,x in enumerate(sections):
|
|
x.klass = 'section'
|
|
articles_ = list(x)
|
|
if articles_:
|
|
self.oeb.manifest.remove(self.oeb.manifest.hrefs[x.href])
|
|
x.href = articles_[0].href
|
|
|
|
for sec in sections:
|
|
articles[id(sec)] = []
|
|
for a in list(sec):
|
|
a.klass = 'article'
|
|
articles[id(sec)].append(a)
|
|
sec.nodes.remove(a)
|
|
|
|
root = TOC(klass='periodical', href=self.oeb.spine[0].href,
|
|
title=str(self.oeb.metadata.title[0]))
|
|
|
|
for s in sections:
|
|
if articles[id(s)]:
|
|
for a in articles[id(s)]:
|
|
s.nodes.append(a)
|
|
root.nodes.append(s)
|
|
|
|
for x in list(toc.nodes):
|
|
toc.nodes.remove(x)
|
|
|
|
toc.nodes.append(root)
|
|
|
|
# Fix up the periodical href to point to first section href
|
|
toc.nodes[0].href = toc.nodes[0].nodes[0].href
|
|
|
|
def convert(self, oeb, output_path, input_plugin, opts, log):
|
|
from ebook_converter.ebooks.mobi.writer2.resources import Resources
|
|
self.log, self.opts, self.oeb = log, opts, oeb
|
|
|
|
mobi_type = opts.mobi_file_type
|
|
if self.is_periodical:
|
|
mobi_type = 'old' # Amazon does not support KF8 periodicals
|
|
create_kf8 = mobi_type in ('new', 'both')
|
|
|
|
remove_html_cover(self.oeb, self.log)
|
|
resources = Resources(oeb, opts, self.is_periodical,
|
|
add_fonts=create_kf8)
|
|
self.check_for_periodical()
|
|
|
|
if create_kf8:
|
|
from ebook_converter.ebooks.mobi.writer8.cleanup import remove_duplicate_anchors
|
|
remove_duplicate_anchors(self.oeb)
|
|
# Split on pagebreaks so that the resulting KF8 is faster to load
|
|
from ebook_converter.ebooks.oeb.transforms.split import Split
|
|
Split()(self.oeb, self.opts)
|
|
|
|
kf8 = self.create_kf8(resources, for_joint=mobi_type=='both'
|
|
) if create_kf8 else None
|
|
if mobi_type == 'new':
|
|
kf8.write(output_path)
|
|
extract_mobi(output_path, opts)
|
|
return
|
|
|
|
self.log('Creating MOBI 6 output')
|
|
self.write_mobi(input_plugin, output_path, kf8, resources)
|
|
|
|
def create_kf8(self, resources, for_joint=False):
|
|
from ebook_converter.ebooks.mobi.writer8.main import create_kf8_book
|
|
return create_kf8_book(self.oeb, self.opts, resources,
|
|
for_joint=for_joint)
|
|
|
|
def write_mobi(self, input_plugin, output_path, kf8, resources):
|
|
from ebook_converter.ebooks.mobi.mobiml import MobiMLizer
|
|
from ebook_converter.ebooks.oeb.transforms.manglecase import CaseMangler
|
|
from ebook_converter.ebooks.oeb.transforms.rasterize import SVGRasterizer, Unavailable
|
|
from ebook_converter.ebooks.oeb.transforms.htmltoc import HTMLTOCAdder
|
|
from ebook_converter.customize.ui import plugin_for_input_format
|
|
|
|
opts, oeb = self.opts, self.oeb
|
|
if not opts.no_inline_toc:
|
|
tocadder = HTMLTOCAdder(title=opts.toc_title, position='start' if
|
|
opts.mobi_toc_at_start else 'end')
|
|
tocadder(oeb, opts)
|
|
mangler = CaseMangler()
|
|
mangler(oeb, opts)
|
|
try:
|
|
rasterizer = SVGRasterizer()
|
|
rasterizer(oeb, opts)
|
|
except Unavailable:
|
|
self.log.warn('SVG rasterizer unavailable, SVG will not be converted')
|
|
else:
|
|
# Add rasterized SVG images
|
|
resources.add_extra_images()
|
|
if hasattr(self.oeb, 'inserted_metadata_jacket'):
|
|
self.workaround_fire_bugs(self.oeb.inserted_metadata_jacket)
|
|
mobimlizer = MobiMLizer(ignore_tables=opts.linearize_tables)
|
|
mobimlizer(oeb, opts)
|
|
write_page_breaks_after_item = input_plugin is not plugin_for_input_format('cbz')
|
|
from ebook_converter.ebooks.mobi.writer2.main import MobiWriter
|
|
writer = MobiWriter(opts, resources, kf8,
|
|
write_page_breaks_after_item=write_page_breaks_after_item)
|
|
writer(oeb, output_path)
|
|
extract_mobi(output_path, opts)
|
|
|
|
def specialize_css_for_output(self, log, opts, item, stylizer):
|
|
from ebook_converter.ebooks.mobi.writer8.cleanup import CSSCleanup
|
|
CSSCleanup(log, opts)(item, stylizer)
|
|
|
|
def workaround_fire_bugs(self, jacket):
|
|
# The idiotic Fire crashes when trying to render the table used to
|
|
# layout the jacket
|
|
from ebook_converter.ebooks.oeb.base import XHTML
|
|
for table in jacket.data.xpath('//*[local-name()="table"]'):
|
|
table.tag = XHTML('div')
|
|
for tr in table.xpath('descendant::*[local-name()="tr"]'):
|
|
cols = tr.xpath('descendant::*[local-name()="td"]')
|
|
tr.tag = XHTML('div')
|
|
for td in cols:
|
|
td.tag = XHTML('span' if cols else 'div')
|
|
|
|
|
|
class AZW3Output(OutputFormatPlugin):
|
|
|
|
name = 'AZW3 Output'
|
|
author = 'Kovid Goyal'
|
|
file_type = 'azw3'
|
|
commit_name = 'azw3_output'
|
|
|
|
options = {
|
|
OptionRecommendation(name='prefer_author_sort',
|
|
recommended_value=False, level=OptionRecommendation.LOW,
|
|
help=_('When present, use author sort field as author.')
|
|
),
|
|
OptionRecommendation(name='no_inline_toc',
|
|
recommended_value=False, level=OptionRecommendation.LOW,
|
|
help=_('Don\'t add Table of Contents to the book. Useful if '
|
|
'the book has its own table of contents.')),
|
|
OptionRecommendation(name='toc_title', recommended_value=None,
|
|
help=_('Title for any generated in-line table of contents.')
|
|
),
|
|
OptionRecommendation(name='dont_compress',
|
|
recommended_value=False, level=OptionRecommendation.LOW,
|
|
help=_('Disable compression of the file contents.')
|
|
),
|
|
OptionRecommendation(name='mobi_toc_at_start',
|
|
recommended_value=False,
|
|
help=_('When adding the Table of Contents to the book, add it at the start of the '
|
|
'book instead of the end. Not recommended.')
|
|
),
|
|
OptionRecommendation(name='extract_to',
|
|
help=_('Extract the contents of the generated %s file to the '
|
|
'specified directory. The contents of the directory are first '
|
|
'deleted, so be careful.') % 'AZW3'),
|
|
OptionRecommendation(name='share_not_sync', recommended_value=False,
|
|
help=_('Enable sharing of book content via Facebook etc. '
|
|
' on the Kindle. WARNING: Using this feature means that '
|
|
' the book will not auto sync its last read position '
|
|
' on multiple devices. Complain to Amazon.')
|
|
),
|
|
}
|
|
|
|
def convert(self, oeb, output_path, input_plugin, opts, log):
|
|
from ebook_converter.ebooks.mobi.writer2.resources import Resources
|
|
from ebook_converter.ebooks.mobi.writer8.main import create_kf8_book
|
|
from ebook_converter.ebooks.mobi.writer8.cleanup import remove_duplicate_anchors
|
|
|
|
self.oeb, self.opts, self.log = oeb, opts, log
|
|
opts.mobi_periodical = self.is_periodical
|
|
passthrough = getattr(opts, 'mobi_passthrough', False)
|
|
remove_duplicate_anchors(oeb)
|
|
|
|
resources = Resources(self.oeb, self.opts, self.is_periodical,
|
|
add_fonts=True, process_images=False)
|
|
if not passthrough:
|
|
remove_html_cover(self.oeb, self.log)
|
|
|
|
# Split on pagebreaks so that the resulting KF8 is faster to load
|
|
from ebook_converter.ebooks.oeb.transforms.split import Split
|
|
Split()(self.oeb, self.opts)
|
|
|
|
kf8 = create_kf8_book(self.oeb, self.opts, resources, for_joint=False)
|
|
|
|
kf8.write(output_path)
|
|
extract_mobi(output_path, opts)
|
|
|
|
def specialize_css_for_output(self, log, opts, item, stylizer):
|
|
from ebook_converter.ebooks.mobi.writer8.cleanup import CSSCleanup
|
|
CSSCleanup(log, opts)(item, stylizer)
|