import os import shutil from ebook_converter.customize.conversion import OutputFormatPlugin, \ OptionRecommendation from ebook_converter.ptempfile import TemporaryDirectory, TemporaryFile __license__ = 'GPL 3' __copyright__ = '2009, John Schember ' __docformat__ = 'restructuredtext en' NEWLINE_TYPES = ['system', 'unix', 'old_mac', 'windows'] class TXTOutput(OutputFormatPlugin): name = 'TXT Output' author = 'John Schember' file_type = 'txt' commit_name = 'txt_output' ui_data = { 'newline_types': NEWLINE_TYPES, 'formatting_types': { 'plain': 'Plain text', 'markdown': 'Markdown formatted text', 'textile': 'TexTile formatted text' }, } options = { OptionRecommendation(name='newline', recommended_value='system', level=OptionRecommendation.LOW, short_switch='n', choices=NEWLINE_TYPES, help='Type of newline to use. Options are %s. Default is \'system\'. ' 'Use \'old_mac\' for compatibility with Mac OS 9 and earlier. ' 'For macOS use \'unix\'. \'system\' will default to the newline ' 'type used by this OS.' % sorted(NEWLINE_TYPES)), OptionRecommendation(name='txt_output_encoding', recommended_value='utf-8', level=OptionRecommendation.LOW, help='Specify the character encoding of the output document. ' 'The default is utf-8.'), OptionRecommendation(name='inline_toc', recommended_value=False, level=OptionRecommendation.LOW, help='Add Table of Contents to beginning of the book.'), OptionRecommendation(name='max_line_length', recommended_value=0, level=OptionRecommendation.LOW, help='The maximum number of characters per line. This splits on ' 'the first space before the specified value. If no space is ' 'found the line will be broken at the space after and will ' 'exceed the specified value. Also, there is a minimum of 25 ' 'characters. Use 0 to disable line splitting.'), OptionRecommendation(name='force_max_line_length', recommended_value=False, level=OptionRecommendation.LOW, help='Force splitting on the max-line-length value when no space ' 'is present. Also allows max-line-length to be below the ' 'minimum'), OptionRecommendation(name='txt_output_formatting', recommended_value='plain', choices=list(ui_data['formatting_types']), help='Formatting used within the document.\n' '* plain: {plain}\n' '* markdown: {markdown}\n' '* textile: {textile}' ''.format(**ui_data['formatting_types'])), OptionRecommendation(name='keep_links', recommended_value=False, level=OptionRecommendation.LOW, help='Do not remove links within the document. This is only ' 'useful when paired with a txt-output-formatting option that ' 'is not none because links are always removed with plain ' 'text output.'), OptionRecommendation(name='keep_image_references', recommended_value=False, level=OptionRecommendation.LOW, help='Do not remove image references within the document. This is ' 'only useful when paired with a txt-output-formatting option ' 'that is not none because links are always removed with ' 'plain text output.'), OptionRecommendation(name='keep_color', recommended_value=False, level=OptionRecommendation.LOW, help='Do not remove font color from output. This is only useful ' 'when txt-output-formatting is set to textile. Textile is ' 'the only formatting that supports setting font color. If ' 'this option is not specified font color will not be set and ' 'default to the color displayed by the reader (generally ' 'this is black).') } def convert(self, oeb_book, output_path, input_plugin, opts, log): from ebook_converter.ebooks.txt.txtml import TXTMLizer from ebook_converter.utils.cleantext import clean_ascii_chars from ebook_converter.ebooks.txt.newlines import specified_newlines, TxtNewlines if opts.txt_output_formatting.lower() == 'markdown': from ebook_converter.ebooks.txt.markdownml import MarkdownMLizer self.writer = MarkdownMLizer(log) elif opts.txt_output_formatting.lower() == 'textile': from ebook_converter.ebooks.txt.textileml import TextileMLizer self.writer = TextileMLizer(log) else: self.writer = TXTMLizer(log) txt = self.writer.extract_content(oeb_book, opts) txt = clean_ascii_chars(txt) log.debug('\tReplacing newlines with selected type...') txt = specified_newlines(TxtNewlines(opts.newline).newline, txt) close = False if not hasattr(output_path, 'write'): close = True if not os.path.exists(os.path.dirname(output_path)) and os.path.dirname(output_path) != '': os.makedirs(os.path.dirname(output_path)) out_stream = open(output_path, 'wb') else: out_stream = output_path out_stream.seek(0) out_stream.truncate() out_stream.write(txt.encode(opts.txt_output_encoding, 'replace')) if close: out_stream.close() class TXTZOutput(TXTOutput): name = 'TXTZ Output' author = 'John Schember' file_type = 'txtz' def convert(self, oeb_book, output_path, input_plugin, opts, log): from ebook_converter.ebooks.oeb.base import OEB_IMAGES from ebook_converter.utils.zipfile import ZipFile from lxml import etree with TemporaryDirectory('_txtz_output') as tdir: # TXT txt_name = 'index.txt' if opts.txt_output_formatting.lower() == 'textile': txt_name = 'index.text' with TemporaryFile(txt_name) as tf: TXTOutput.convert(self, oeb_book, tf, input_plugin, opts, log) shutil.copy(tf, os.path.join(tdir, txt_name)) # Images for item in oeb_book.manifest: if item.media_type in OEB_IMAGES: if hasattr(self.writer, 'images'): path = os.path.join(tdir, 'images') if item.href in self.writer.images: href = self.writer.images[item.href] else: continue else: path = os.path.join(tdir, os.path.dirname(item.href)) href = os.path.basename(item.href) if not os.path.exists(path): os.makedirs(path) with open(os.path.join(path, href), 'wb') as imgf: imgf.write(item.data) # Metadata with open(os.path.join(tdir, 'metadata.opf'), 'wb') as mdataf: mdataf.write(etree.tostring(oeb_book.metadata.to_opf1())) txtz = ZipFile(output_path, 'w') txtz.add_dir(tdir)