mirror of
https://github.com/gryf/ebook-converter.git
synced 2026-01-22 12:54:12 +01:00
405 lines
15 KiB
Python
405 lines
15 KiB
Python
"""
|
|
Command line interface to conversion sub-system
|
|
"""
|
|
import collections
|
|
import json
|
|
import numbers
|
|
import optparse
|
|
import os
|
|
import re
|
|
import sys
|
|
|
|
from ebook_converter.utils.config import OptionParser
|
|
from ebook_converter.utils.logging import Log
|
|
from ebook_converter.customize.conversion import OptionRecommendation
|
|
from ebook_converter import init_mimetypes
|
|
|
|
|
|
USAGE = '%prog ' + '''\
|
|
input_file output_file [options]
|
|
|
|
Convert an e-book from one format to another.
|
|
|
|
input_file is the input and output_file is the output. Both must be \
|
|
specified as the first two arguments to the command.
|
|
|
|
The output e-book format is guessed from the file extension of \
|
|
output_file. output_file can also be of the special format .EXT where \
|
|
EXT is the output file extension. In this case, the name of the output \
|
|
file is derived from the name of the input file. Note that the filenames must \
|
|
not start with a hyphen. Finally, if output_file has no extension, then \
|
|
it is treated as a directory and an "open e-book" (OEB) consisting of HTML \
|
|
files is written to that directory. These files are the files that would \
|
|
normally have been passed to the output plugin.
|
|
|
|
After specifying the input \
|
|
and output file you can customize the conversion by specifying various \
|
|
options. The available options depend on the input and output file types. \
|
|
To get help on them specify the input and output file and then use the -h \
|
|
option.
|
|
|
|
For full documentation of the conversion system see
|
|
https://manual.calibre-ebook.com/conversion.html
|
|
'''
|
|
|
|
HEURISTIC_OPTIONS = ['markup_chapter_headings', 'italicize_common_cases',
|
|
'fix_indents', 'html_unwrap_factor', 'unwrap_lines',
|
|
'delete_blank_paragraphs', 'format_scene_breaks',
|
|
'dehyphenate', 'renumber_headings',
|
|
'replace_scene_breaks']
|
|
|
|
DEFAULT_TRUE_OPTIONS = HEURISTIC_OPTIONS + ['remove_fake_margins']
|
|
|
|
|
|
def print_help(parser):
|
|
parser.print_help()
|
|
|
|
|
|
def check_command_line_options(parser, args, log):
|
|
if len(args) < 3 or args[1].startswith('-') or args[2].startswith('-'):
|
|
print_help(parser)
|
|
log.error('\n\nYou must specify the input AND output files')
|
|
raise SystemExit(1)
|
|
|
|
input_file = os.path.abspath(args[1])
|
|
if (not input_file.endswith('.recipe') and
|
|
not os.access(input_file, os.R_OK) and
|
|
not ('-h' in args or '--help' in args)):
|
|
log.error('Cannot read from', input_file)
|
|
raise SystemExit(1)
|
|
if input_file.endswith('.recipe') and not os.access(input_file, os.R_OK):
|
|
input_file = args[1]
|
|
|
|
output_file = args[2]
|
|
if (output_file.startswith('.') and
|
|
output_file[:2] not in {'..', '.'} and
|
|
'/' not in output_file and '\\' not in output_file):
|
|
output_file = os.path.splitext(os.path
|
|
.basename(input_file))[0] + output_file
|
|
output_file = os.path.abspath(output_file)
|
|
|
|
return input_file, output_file
|
|
|
|
|
|
def option_recommendation_to_cli_option(add_option, rec):
|
|
opt = rec.option
|
|
switches = ['-'+opt.short_switch] if opt.short_switch else []
|
|
switches.append('--'+opt.long_switch)
|
|
attrs = dict(dest=opt.name, help=opt.help, choices=opt.choices,
|
|
default=rec.recommended_value)
|
|
if isinstance(rec.recommended_value, type(True)):
|
|
attrs['action'] = 'store_false' if rec.recommended_value else \
|
|
'store_true'
|
|
else:
|
|
if isinstance(rec.recommended_value, numbers.Integral):
|
|
attrs['type'] = 'int'
|
|
if isinstance(rec.recommended_value, numbers.Real):
|
|
attrs['type'] = 'float'
|
|
|
|
if opt.long_switch == 'verbose':
|
|
attrs['action'] = 'count'
|
|
attrs.pop('type', '')
|
|
if opt.name == 'read_metadata_from_opf':
|
|
switches.append('--from-opf')
|
|
if opt.name == 'transform_css_rules':
|
|
attrs['help'] = ('Path to a file containing rules to transform the '
|
|
'CSS styles in this book. The easiest way to create '
|
|
'such a file is to use the wizard for creating rules '
|
|
'in the calibre GUI. Access it in the "Look & '
|
|
'feel->Transform styles" section of the conversion '
|
|
'dialog. Once you create the rules, you can use the '
|
|
'"Export" button to save them to a file.')
|
|
if opt.name in DEFAULT_TRUE_OPTIONS and rec.recommended_value is True:
|
|
switches = ['--disable-'+opt.long_switch]
|
|
add_option(optparse.Option(*switches, **attrs))
|
|
|
|
|
|
def group_titles():
|
|
return 'INPUT OPTIONS', 'OUTPUT OPTIONS'
|
|
|
|
|
|
def recipe_test(option, opt_str, value, parser):
|
|
assert value is None
|
|
value = []
|
|
|
|
def floatable(s):
|
|
try:
|
|
float(s)
|
|
return True
|
|
except ValueError:
|
|
return False
|
|
|
|
for arg in parser.rargs:
|
|
# stop on --foo like options
|
|
if arg[:2] == "--":
|
|
break
|
|
# stop on -a, but not on -3 or -3.0
|
|
if arg[:1] == "-" and len(arg) > 1 and not floatable(arg):
|
|
break
|
|
try:
|
|
value.append(int(arg))
|
|
except (TypeError, ValueError, AttributeError):
|
|
break
|
|
if len(value) == 2:
|
|
break
|
|
del parser.rargs[:len(value)]
|
|
|
|
while len(value) < 2:
|
|
value.append(2)
|
|
|
|
setattr(parser.values, option.dest, tuple(value))
|
|
|
|
|
|
def add_input_output_options(parser, plumber):
|
|
input_options, output_options = \
|
|
plumber.input_options, plumber.output_options
|
|
|
|
def add_options(group, options):
|
|
for opt in options:
|
|
if (plumber.input_fmt == 'recipe' and
|
|
opt.option.long_switch == 'test'):
|
|
group(optparse.Option('--test', dest='test',
|
|
action='callback', callback=recipe_test))
|
|
else:
|
|
option_recommendation_to_cli_option(group, opt)
|
|
|
|
if input_options:
|
|
title = group_titles()[0]
|
|
io = optparse.OptionGroup(parser, title, 'Options to control the '
|
|
'processing of the input %s file' %
|
|
plumber.input_fmt)
|
|
add_options(io.add_option, input_options)
|
|
parser.add_option_group(io)
|
|
|
|
if output_options:
|
|
title = group_titles()[1]
|
|
oo = optparse.OptionGroup(parser, title, 'Options to control the '
|
|
'processing of the output %s' %
|
|
plumber.output_fmt)
|
|
add_options(oo.add_option, output_options)
|
|
parser.add_option_group(oo)
|
|
|
|
|
|
def add_pipeline_options(parser, plumber):
|
|
groups = collections.OrderedDict(
|
|
(('', ('', ['input_profile', 'output_profile'])),
|
|
('LOOK AND FEEL', ('Options to control the look and feel of the '
|
|
'output',
|
|
['base_font_size', 'disable_font_rescaling',
|
|
'font_size_mapping', 'embed_font_family',
|
|
'subset_embedded_fonts', 'embed_all_fonts',
|
|
'line_height', 'minimum_line_height',
|
|
'linearize_tables', 'extra_css', 'filter_css',
|
|
'transform_css_rules', 'expand_css',
|
|
'smarten_punctuation', 'unsmarten_punctuation',
|
|
'margin_top', 'margin_left', 'margin_right',
|
|
'margin_bottom', 'change_justification',
|
|
'insert_blank_line', 'insert_blank_line_size',
|
|
'remove_paragraph_spacing',
|
|
'remove_paragraph_spacing_indent_size',
|
|
'asciiize', 'keep_ligatures'])),
|
|
|
|
('HEURISTIC PROCESSING', ('Modify the document text and structure '
|
|
'using common patterns. Disabled by '
|
|
'default. Use %(en)s to enable. Individual '
|
|
'actions can be disabled with the %(dis)s '
|
|
'options.' % dict(en='--enable-heuristics',
|
|
dis='--disable-*'),
|
|
['enable_heuristics'] + HEURISTIC_OPTIONS)),
|
|
|
|
('SEARCH AND REPLACE', ('Modify the document text and structure '
|
|
'using user defined patterns.',
|
|
['sr1_search', 'sr1_replace', 'sr2_search',
|
|
'sr2_replace', 'sr3_search', 'sr3_replace',
|
|
'search_replace'])),
|
|
|
|
('STRUCTURE DETECTION', ('Control auto-detection of document '
|
|
'structure.',
|
|
['chapter', 'chapter_mark',
|
|
'prefer_metadata_cover',
|
|
'remove_first_image', 'insert_metadata',
|
|
'page_breaks_before', 'remove_fake_margins',
|
|
'start_reading_at'])),
|
|
|
|
('TABLE OF CONTENTS', ('Control the automatic generation of a Table '
|
|
'of Contents. By default, if the source file '
|
|
'has a Table of Contents, it will be used in '
|
|
'preference to the automatically generated '
|
|
'one.',
|
|
['level1_toc', 'level2_toc', 'level3_toc',
|
|
'toc_threshold', 'max_toc_links',
|
|
'no_chapters_in_toc', 'use_auto_toc',
|
|
'toc_filter', 'duplicate_links_in_toc'])),
|
|
|
|
('METADATA', ('Options to set metadata in the output',
|
|
plumber.metadata_option_names +
|
|
['read_metadata_from_opf'])),
|
|
('DEBUG', ('Options to help with debugging the conversion',
|
|
['verbose', 'debug_pipeline']))))
|
|
|
|
for group, (desc, options) in groups.items():
|
|
if group:
|
|
group = optparse.OptionGroup(parser, group, desc)
|
|
parser.add_option_group(group)
|
|
add_option = group.add_option if group != '' else parser.add_option
|
|
|
|
for name in options:
|
|
rec = plumber.get_option_by_name(name)
|
|
if rec.level < rec.HIGH:
|
|
option_recommendation_to_cli_option(add_option, rec)
|
|
|
|
|
|
def option_parser():
|
|
parser = OptionParser(usage=USAGE)
|
|
parser.add_option('--list-recipes', default=False, action='store_true',
|
|
help='List builtin recipe names. You can create an '
|
|
'e-book from a builtin recipe like this: ebook-convert '
|
|
'"Recipe Name.recipe" output.epub')
|
|
return parser
|
|
|
|
|
|
class ProgressBar(object):
|
|
|
|
def __init__(self, log):
|
|
self.log = log
|
|
|
|
def __call__(self, frac, msg=''):
|
|
if msg:
|
|
percent = int(frac*100)
|
|
self.log('%d%% %s' % (percent, msg))
|
|
|
|
|
|
def create_option_parser(args, log):
|
|
if '--version' in args:
|
|
from ebook_converter.constants_old import __appname__
|
|
from ebook_converter.constants_old import __author__
|
|
from ebook_converter.constants_old import __version__
|
|
log(os.path.basename(args[0]), '('+__appname__, __version__+')')
|
|
log('Created by:', __author__)
|
|
raise SystemExit(0)
|
|
if '--list-recipes' in args:
|
|
from ebook_converter.web.feeds.recipes.collection import \
|
|
get_builtin_recipe_titles
|
|
log('Available recipes:')
|
|
titles = sorted(get_builtin_recipe_titles())
|
|
for title in titles:
|
|
try:
|
|
log('\t'+title)
|
|
except Exception:
|
|
log('\t'+repr(title))
|
|
log('%d recipes available' % len(titles))
|
|
raise SystemExit(0)
|
|
|
|
parser = option_parser()
|
|
if len(args) < 3:
|
|
print_help(parser)
|
|
if any(x in args for x in ('-h', '--help')):
|
|
raise SystemExit(0)
|
|
else:
|
|
raise SystemExit(1)
|
|
|
|
input_file, output_file = check_command_line_options(parser, args, log)
|
|
|
|
from ebook_converter.ebooks.conversion.plumber import Plumber
|
|
|
|
reporter = ProgressBar(log)
|
|
if os.path.abspath(input_file) == os.path.abspath(output_file):
|
|
raise ValueError('Input file is the same as the output file')
|
|
|
|
plumber = Plumber(input_file, output_file, log, reporter)
|
|
add_input_output_options(parser, plumber)
|
|
add_pipeline_options(parser, plumber)
|
|
|
|
return parser, plumber
|
|
|
|
|
|
def abspath(x):
|
|
if x.startswith('http:') or x.startswith('https:'):
|
|
return x
|
|
return os.path.abspath(os.path.expanduser(x))
|
|
|
|
|
|
def escape_sr_pattern(exp):
|
|
return exp.replace('\n', '\ue123')
|
|
|
|
|
|
def read_sr_patterns(path, log=None):
|
|
pats = []
|
|
with open(path, 'rb') as f:
|
|
lines = f.read().decode('utf-8').splitlines()
|
|
pat = None
|
|
for line in lines:
|
|
if pat is None:
|
|
if not line.strip():
|
|
continue
|
|
line = line.replace('\ue123', '\n')
|
|
try:
|
|
re.compile(line)
|
|
except Exception:
|
|
msg = 'Invalid regular expression: %r from file: %r' % (line,
|
|
path)
|
|
if log is not None:
|
|
log.error(msg)
|
|
raise SystemExit(1)
|
|
else:
|
|
raise ValueError(msg)
|
|
pat = line
|
|
else:
|
|
pats.append((pat, line))
|
|
pat = None
|
|
return json.dumps(pats)
|
|
|
|
|
|
def main(args=sys.argv):
|
|
log = Log()
|
|
init_mimetypes()
|
|
parser, plumber = create_option_parser(args, log)
|
|
opts, leftover_args = parser.parse_args(args)
|
|
if len(leftover_args) > 3:
|
|
log.error('Extra arguments not understood: %s',
|
|
', '.join(leftover_args[3:]))
|
|
return 1
|
|
for x in ('read_metadata_from_opf', 'cover'):
|
|
if getattr(opts, x, None) is not None:
|
|
setattr(opts, x, abspath(getattr(opts, x)))
|
|
if opts.search_replace:
|
|
opts.search_replace = read_sr_patterns(opts.search_replace, log)
|
|
if opts.transform_css_rules:
|
|
from ebook_converter.ebooks.css_transform_rules import import_rules
|
|
from ebook_converter.ebooks.css_transform_rules import validate_rule
|
|
with open(opts.transform_css_rules, 'rb') as tcr:
|
|
opts.transform_css_rules = rules = list(import_rules(tcr.read()))
|
|
for rule in rules:
|
|
title, msg = validate_rule(rule)
|
|
if title and msg:
|
|
log.error('Failed to parse CSS transform rules')
|
|
log.error(title)
|
|
log.error(msg)
|
|
return 1
|
|
|
|
recommendations = [(n.dest, getattr(opts, n.dest),
|
|
OptionRecommendation.HIGH)
|
|
for n in parser.options_iter() if n.dest]
|
|
plumber.merge_ui_recommendations(recommendations)
|
|
|
|
plumber.run()
|
|
|
|
log('Output saved to', ' ', plumber.output)
|
|
|
|
return 0
|
|
|
|
|
|
def manual_index_strings():
|
|
return '''\
|
|
The options and default values for the options change depending on both the
|
|
input and output formats, so you should always check with::
|
|
|
|
%s
|
|
|
|
Below are the options that are common to all conversion, followed by the
|
|
options specific to every input and output format.'''
|
|
|
|
|
|
if __name__ == '__main__':
|
|
sys.exit(main())
|