mirror of
https://github.com/gryf/ebook-converter.git
synced 2026-01-04 09:54:11 +01:00
Here is the first batch of modules, which are needed for converting several formats to LRF. Some of the logic has been change, more cleanups will follow.
67 lines
2.4 KiB
Python
67 lines
2.4 KiB
Python
from __future__ import absolute_import, division, print_function, unicode_literals
|
|
|
|
__license__ = 'GPL 3'
|
|
__copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>'
|
|
__docformat__ = 'restructuredtext en'
|
|
|
|
import os
|
|
|
|
from ebook_converter.customize.conversion import InputFormatPlugin
|
|
from ebook_converter.polyglot.builtins import unicode_type
|
|
|
|
|
|
class MOBIInput(InputFormatPlugin):
|
|
|
|
name = 'MOBI Input'
|
|
author = 'Kovid Goyal'
|
|
description = 'Convert MOBI files (.mobi, .prc, .azw) to HTML'
|
|
file_types = {'mobi', 'prc', 'azw', 'azw3', 'pobi'}
|
|
commit_name = 'mobi_input'
|
|
|
|
def convert(self, stream, options, file_ext, log,
|
|
accelerators):
|
|
self.is_kf8 = False
|
|
self.mobi_is_joint = False
|
|
|
|
from ebook_converter.ebooks.mobi.reader.mobi6 import MobiReader
|
|
from lxml import html
|
|
parse_cache = {}
|
|
try:
|
|
mr = MobiReader(stream, log, options.input_encoding,
|
|
options.debug_pipeline)
|
|
if mr.kf8_type is None:
|
|
mr.extract_content('.', parse_cache)
|
|
|
|
except:
|
|
mr = MobiReader(stream, log, options.input_encoding,
|
|
options.debug_pipeline, try_extra_data_fix=True)
|
|
if mr.kf8_type is None:
|
|
mr.extract_content('.', parse_cache)
|
|
|
|
if mr.kf8_type is not None:
|
|
log('Found KF8 MOBI of type %r'%mr.kf8_type)
|
|
if mr.kf8_type == 'joint':
|
|
self.mobi_is_joint = True
|
|
from ebook_converter.ebooks.mobi.reader.mobi8 import Mobi8Reader
|
|
mr = Mobi8Reader(mr, log)
|
|
opf = os.path.abspath(mr())
|
|
self.encrypted_fonts = mr.encrypted_fonts
|
|
self.is_kf8 = True
|
|
return opf
|
|
|
|
raw = parse_cache.pop('calibre_raw_mobi_markup', False)
|
|
if raw:
|
|
if isinstance(raw, unicode_type):
|
|
raw = raw.encode('utf-8')
|
|
with lopen('debug-raw.html', 'wb') as f:
|
|
f.write(raw)
|
|
from ebook_converter.ebooks.oeb.base import close_self_closing_tags
|
|
for f, root in parse_cache.items():
|
|
raw = html.tostring(root, encoding='utf-8', method='xml',
|
|
include_meta_content_type=False)
|
|
raw = close_self_closing_tags(raw)
|
|
with lopen(f, 'wb') as q:
|
|
q.write(raw)
|
|
accelerators['pagebreaks'] = '//h:div[@class="mbp_pagebreak"]'
|
|
return mr.created_opf_path
|