mirror of
https://github.com/gryf/ebook-converter.git
synced 2026-01-06 11:14:12 +01:00
163 lines
7.2 KiB
Python
163 lines
7.2 KiB
Python
import os
|
|
import shutil
|
|
|
|
from ebook_converter.customize.conversion import OutputFormatPlugin, \
|
|
OptionRecommendation
|
|
from ebook_converter.ptempfile import TemporaryDirectory, TemporaryFile
|
|
|
|
|
|
__license__ = 'GPL 3'
|
|
__copyright__ = '2009, John Schember <john@nachtimwald.com>'
|
|
__docformat__ = 'restructuredtext en'
|
|
|
|
NEWLINE_TYPES = ['system', 'unix', 'old_mac', 'windows']
|
|
|
|
|
|
class TXTOutput(OutputFormatPlugin):
|
|
|
|
name = 'TXT Output'
|
|
author = 'John Schember'
|
|
file_type = 'txt'
|
|
commit_name = 'txt_output'
|
|
ui_data = {
|
|
'newline_types': NEWLINE_TYPES,
|
|
'formatting_types': {
|
|
'plain': _('Plain text'),
|
|
'markdown': _('Markdown formatted text'),
|
|
'textile': _('TexTile formatted text')
|
|
},
|
|
}
|
|
|
|
options = {
|
|
OptionRecommendation(name='newline', recommended_value='system',
|
|
level=OptionRecommendation.LOW,
|
|
short_switch='n', choices=NEWLINE_TYPES,
|
|
help=_('Type of newline to use. Options are %s. Default is \'system\'. '
|
|
'Use \'old_mac\' for compatibility with Mac OS 9 and earlier. '
|
|
'For macOS use \'unix\'. \'system\' will default to the newline '
|
|
'type used by this OS.') % sorted(NEWLINE_TYPES)),
|
|
OptionRecommendation(name='txt_output_encoding', recommended_value='utf-8',
|
|
level=OptionRecommendation.LOW,
|
|
help=_('Specify the character encoding of the output document. '
|
|
'The default is utf-8.')),
|
|
OptionRecommendation(name='inline_toc',
|
|
recommended_value=False, level=OptionRecommendation.LOW,
|
|
help=_('Add Table of Contents to beginning of the book.')),
|
|
OptionRecommendation(name='max_line_length',
|
|
recommended_value=0, level=OptionRecommendation.LOW,
|
|
help=_('The maximum number of characters per line. This splits on '
|
|
'the first space before the specified value. If no space is found '
|
|
'the line will be broken at the space after and will exceed the '
|
|
'specified value. Also, there is a minimum of 25 characters. '
|
|
'Use 0 to disable line splitting.')),
|
|
OptionRecommendation(name='force_max_line_length',
|
|
recommended_value=False, level=OptionRecommendation.LOW,
|
|
help=_('Force splitting on the max-line-length value when no space '
|
|
'is present. Also allows max-line-length to be below the minimum')),
|
|
OptionRecommendation(name='txt_output_formatting',
|
|
recommended_value='plain',
|
|
choices=list(ui_data['formatting_types']),
|
|
help=_('Formatting used within the document.\n'
|
|
'* plain: {plain}\n'
|
|
'* markdown: {markdown}\n'
|
|
'* textile: {textile}').format(**ui_data['formatting_types'])),
|
|
OptionRecommendation(name='keep_links',
|
|
recommended_value=False, level=OptionRecommendation.LOW,
|
|
help=_('Do not remove links within the document. This is only '
|
|
'useful when paired with a txt-output-formatting option that '
|
|
'is not none because links are always removed with plain text output.')),
|
|
OptionRecommendation(name='keep_image_references',
|
|
recommended_value=False, level=OptionRecommendation.LOW,
|
|
help=_('Do not remove image references within the document. This is only '
|
|
'useful when paired with a txt-output-formatting option that '
|
|
'is not none because links are always removed with plain text output.')),
|
|
OptionRecommendation(name='keep_color',
|
|
recommended_value=False, level=OptionRecommendation.LOW,
|
|
help=_('Do not remove font color from output. This is only useful when '
|
|
'txt-output-formatting is set to textile. Textile is the only '
|
|
'formatting that supports setting font color. If this option is '
|
|
'not specified font color will not be set and default to the '
|
|
'color displayed by the reader (generally this is black).')),
|
|
}
|
|
|
|
def convert(self, oeb_book, output_path, input_plugin, opts, log):
|
|
from ebook_converter.ebooks.txt.txtml import TXTMLizer
|
|
from ebook_converter.utils.cleantext import clean_ascii_chars
|
|
from ebook_converter.ebooks.txt.newlines import specified_newlines, TxtNewlines
|
|
|
|
if opts.txt_output_formatting.lower() == 'markdown':
|
|
from ebook_converter.ebooks.txt.markdownml import MarkdownMLizer
|
|
self.writer = MarkdownMLizer(log)
|
|
elif opts.txt_output_formatting.lower() == 'textile':
|
|
from ebook_converter.ebooks.txt.textileml import TextileMLizer
|
|
self.writer = TextileMLizer(log)
|
|
else:
|
|
self.writer = TXTMLizer(log)
|
|
|
|
txt = self.writer.extract_content(oeb_book, opts)
|
|
txt = clean_ascii_chars(txt)
|
|
|
|
log.debug('\tReplacing newlines with selected type...')
|
|
txt = specified_newlines(TxtNewlines(opts.newline).newline, txt)
|
|
|
|
close = False
|
|
if not hasattr(output_path, 'write'):
|
|
close = True
|
|
if not os.path.exists(os.path.dirname(output_path)) and os.path.dirname(output_path) != '':
|
|
os.makedirs(os.path.dirname(output_path))
|
|
out_stream = open(output_path, 'wb')
|
|
else:
|
|
out_stream = output_path
|
|
|
|
out_stream.seek(0)
|
|
out_stream.truncate()
|
|
out_stream.write(txt.encode(opts.txt_output_encoding, 'replace'))
|
|
|
|
if close:
|
|
out_stream.close()
|
|
|
|
|
|
class TXTZOutput(TXTOutput):
|
|
|
|
name = 'TXTZ Output'
|
|
author = 'John Schember'
|
|
file_type = 'txtz'
|
|
|
|
def convert(self, oeb_book, output_path, input_plugin, opts, log):
|
|
from ebook_converter.ebooks.oeb.base import OEB_IMAGES
|
|
from ebook_converter.utils.zipfile import ZipFile
|
|
from lxml import etree
|
|
|
|
with TemporaryDirectory('_txtz_output') as tdir:
|
|
# TXT
|
|
txt_name = 'index.txt'
|
|
if opts.txt_output_formatting.lower() == 'textile':
|
|
txt_name = 'index.text'
|
|
with TemporaryFile(txt_name) as tf:
|
|
TXTOutput.convert(self, oeb_book, tf, input_plugin, opts, log)
|
|
shutil.copy(tf, os.path.join(tdir, txt_name))
|
|
|
|
# Images
|
|
for item in oeb_book.manifest:
|
|
if item.media_type in OEB_IMAGES:
|
|
if hasattr(self.writer, 'images'):
|
|
path = os.path.join(tdir, 'images')
|
|
if item.href in self.writer.images:
|
|
href = self.writer.images[item.href]
|
|
else:
|
|
continue
|
|
else:
|
|
path = os.path.join(tdir, os.path.dirname(item.href))
|
|
href = os.path.basename(item.href)
|
|
if not os.path.exists(path):
|
|
os.makedirs(path)
|
|
with open(os.path.join(path, href), 'wb') as imgf:
|
|
imgf.write(item.data)
|
|
|
|
# Metadata
|
|
with open(os.path.join(tdir, 'metadata.opf'), 'wb') as mdataf:
|
|
mdataf.write(etree.tostring(oeb_book.metadata.to_opf1()))
|
|
|
|
txtz = ZipFile(output_path, 'w')
|
|
txtz.add_dir(tdir)
|