mirror of
https://github.com/gryf/ebook-converter.git
synced 2026-01-02 16:54:12 +01:00
427 lines
15 KiB
Python
427 lines
15 KiB
Python
"""
|
|
Command line interface to conversion sub-system
|
|
"""
|
|
import sys, os, numbers
|
|
from optparse import OptionGroup, Option
|
|
from collections import OrderedDict
|
|
|
|
from ebook_converter.utils.config import OptionParser
|
|
from ebook_converter.utils.logging import Log
|
|
from ebook_converter.customize.conversion import OptionRecommendation
|
|
from ebook_converter import patheq
|
|
from ebook_converter.ebooks.conversion import ConversionUserFeedBack
|
|
from ebook_converter.utils.localization import localize_user_manual_link
|
|
from ebook_converter.polyglot.builtins import iteritems
|
|
|
|
|
|
__license__ = 'GPL 3'
|
|
__copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>'
|
|
__docformat__ = 'restructuredtext en'
|
|
|
|
USAGE = '%prog ' + _('''\
|
|
input_file output_file [options]
|
|
|
|
Convert an e-book from one format to another.
|
|
|
|
input_file is the input and output_file is the output. Both must be \
|
|
specified as the first two arguments to the command.
|
|
|
|
The output e-book format is guessed from the file extension of \
|
|
output_file. output_file can also be of the special format .EXT where \
|
|
EXT is the output file extension. In this case, the name of the output \
|
|
file is derived from the name of the input file. Note that the filenames must \
|
|
not start with a hyphen. Finally, if output_file has no extension, then \
|
|
it is treated as a directory and an "open e-book" (OEB) consisting of HTML \
|
|
files is written to that directory. These files are the files that would \
|
|
normally have been passed to the output plugin.
|
|
|
|
After specifying the input \
|
|
and output file you can customize the conversion by specifying various \
|
|
options. The available options depend on the input and output file types. \
|
|
To get help on them specify the input and output file and then use the -h \
|
|
option.
|
|
|
|
For full documentation of the conversion system see
|
|
''') + localize_user_manual_link('https://manual.calibre-ebook.com/conversion.html')
|
|
|
|
HEURISTIC_OPTIONS = ['markup_chapter_headings',
|
|
'italicize_common_cases', 'fix_indents',
|
|
'html_unwrap_factor', 'unwrap_lines',
|
|
'delete_blank_paragraphs', 'format_scene_breaks',
|
|
'dehyphenate', 'renumber_headings',
|
|
'replace_scene_breaks']
|
|
|
|
DEFAULT_TRUE_OPTIONS = HEURISTIC_OPTIONS + ['remove_fake_margins']
|
|
|
|
|
|
def print_help(parser, log):
|
|
parser.print_help()
|
|
|
|
|
|
def check_command_line_options(parser, args, log):
|
|
if len(args) < 3 or args[1].startswith('-') or args[2].startswith('-'):
|
|
print_help(parser, log)
|
|
log.error('\n\nYou must specify the input AND output files')
|
|
raise SystemExit(1)
|
|
|
|
input = os.path.abspath(args[1])
|
|
if not input.endswith('.recipe') and not os.access(input, os.R_OK) and not \
|
|
('-h' in args or '--help' in args):
|
|
log.error('Cannot read from', input)
|
|
raise SystemExit(1)
|
|
if input.endswith('.recipe') and not os.access(input, os.R_OK):
|
|
input = args[1]
|
|
|
|
output = args[2]
|
|
if (output.startswith('.') and output[:2] not in {'..', '.'} and '/' not in
|
|
output and '\\' not in output):
|
|
output = os.path.splitext(os.path.basename(input))[0]+output
|
|
output = os.path.abspath(output)
|
|
|
|
return input, output
|
|
|
|
|
|
def option_recommendation_to_cli_option(add_option, rec):
|
|
opt = rec.option
|
|
switches = ['-'+opt.short_switch] if opt.short_switch else []
|
|
switches.append('--'+opt.long_switch)
|
|
attrs = dict(dest=opt.name, help=opt.help,
|
|
choices=opt.choices, default=rec.recommended_value)
|
|
if isinstance(rec.recommended_value, type(True)):
|
|
attrs['action'] = 'store_false' if rec.recommended_value else \
|
|
'store_true'
|
|
else:
|
|
if isinstance(rec.recommended_value, numbers.Integral):
|
|
attrs['type'] = 'int'
|
|
if isinstance(rec.recommended_value, numbers.Real):
|
|
attrs['type'] = 'float'
|
|
|
|
if opt.long_switch == 'verbose':
|
|
attrs['action'] = 'count'
|
|
attrs.pop('type', '')
|
|
if opt.name == 'read_metadata_from_opf':
|
|
switches.append('--from-opf')
|
|
if opt.name == 'transform_css_rules':
|
|
attrs['help'] = _(
|
|
'Path to a file containing rules to transform the CSS styles'
|
|
' in this book. The easiest way to create such a file is to'
|
|
' use the wizard for creating rules in the calibre GUI. Access'
|
|
' it in the "Look & feel->Transform styles" section of the conversion'
|
|
' dialog. Once you create the rules, you can use the "Export" button'
|
|
' to save them to a file.'
|
|
)
|
|
if opt.name in DEFAULT_TRUE_OPTIONS and rec.recommended_value is True:
|
|
switches = ['--disable-'+opt.long_switch]
|
|
add_option(Option(*switches, **attrs))
|
|
|
|
|
|
def group_titles():
|
|
return _('INPUT OPTIONS'), _('OUTPUT OPTIONS')
|
|
|
|
|
|
def recipe_test(option, opt_str, value, parser):
|
|
assert value is None
|
|
value = []
|
|
|
|
def floatable(s):
|
|
try:
|
|
float(s)
|
|
return True
|
|
except ValueError:
|
|
return False
|
|
|
|
for arg in parser.rargs:
|
|
# stop on --foo like options
|
|
if arg[:2] == "--":
|
|
break
|
|
# stop on -a, but not on -3 or -3.0
|
|
if arg[:1] == "-" and len(arg) > 1 and not floatable(arg):
|
|
break
|
|
try:
|
|
value.append(int(arg))
|
|
except (TypeError, ValueError, AttributeError):
|
|
break
|
|
if len(value) == 2:
|
|
break
|
|
del parser.rargs[:len(value)]
|
|
|
|
while len(value) < 2:
|
|
value.append(2)
|
|
|
|
setattr(parser.values, option.dest, tuple(value))
|
|
|
|
|
|
def add_input_output_options(parser, plumber):
|
|
input_options, output_options = \
|
|
plumber.input_options, plumber.output_options
|
|
|
|
def add_options(group, options):
|
|
for opt in options:
|
|
if plumber.input_fmt == 'recipe' and opt.option.long_switch == 'test':
|
|
group(Option('--test', dest='test', action='callback', callback=recipe_test))
|
|
else:
|
|
option_recommendation_to_cli_option(group, opt)
|
|
|
|
if input_options:
|
|
title = group_titles()[0]
|
|
io = OptionGroup(parser, title, _('Options to control the processing'
|
|
' of the input %s file')%plumber.input_fmt)
|
|
add_options(io.add_option, input_options)
|
|
parser.add_option_group(io)
|
|
|
|
if output_options:
|
|
title = group_titles()[1]
|
|
oo = OptionGroup(parser, title, _('Options to control the processing'
|
|
' of the output %s')%plumber.output_fmt)
|
|
add_options(oo.add_option, output_options)
|
|
parser.add_option_group(oo)
|
|
|
|
|
|
def add_pipeline_options(parser, plumber):
|
|
groups = OrderedDict((
|
|
('' , ('',
|
|
[
|
|
'input_profile',
|
|
'output_profile',
|
|
]
|
|
)),
|
|
(_('LOOK AND FEEL') , (
|
|
_('Options to control the look and feel of the output'),
|
|
[
|
|
'base_font_size', 'disable_font_rescaling',
|
|
'font_size_mapping', 'embed_font_family',
|
|
'subset_embedded_fonts', 'embed_all_fonts',
|
|
'line_height', 'minimum_line_height',
|
|
'linearize_tables',
|
|
'extra_css', 'filter_css', 'transform_css_rules', 'expand_css',
|
|
'smarten_punctuation', 'unsmarten_punctuation',
|
|
'margin_top', 'margin_left', 'margin_right',
|
|
'margin_bottom', 'change_justification',
|
|
'insert_blank_line', 'insert_blank_line_size',
|
|
'remove_paragraph_spacing',
|
|
'remove_paragraph_spacing_indent_size',
|
|
'asciiize', 'keep_ligatures',
|
|
]
|
|
)),
|
|
|
|
(_('HEURISTIC PROCESSING') , (
|
|
_('Modify the document text and structure using common'
|
|
' patterns. Disabled by default. Use %(en)s to enable. '
|
|
' Individual actions can be disabled with the %(dis)s options.')
|
|
% dict(en='--enable-heuristics', dis='--disable-*'),
|
|
['enable_heuristics'] + HEURISTIC_OPTIONS
|
|
)),
|
|
|
|
(_('SEARCH AND REPLACE') , (
|
|
_('Modify the document text and structure using user defined patterns.'),
|
|
[
|
|
'sr1_search', 'sr1_replace',
|
|
'sr2_search', 'sr2_replace',
|
|
'sr3_search', 'sr3_replace',
|
|
'search_replace',
|
|
]
|
|
)),
|
|
|
|
(_('STRUCTURE DETECTION') , (
|
|
_('Control auto-detection of document structure.'),
|
|
[
|
|
'chapter', 'chapter_mark',
|
|
'prefer_metadata_cover', 'remove_first_image',
|
|
'insert_metadata', 'page_breaks_before',
|
|
'remove_fake_margins', 'start_reading_at',
|
|
]
|
|
)),
|
|
|
|
(_('TABLE OF CONTENTS') , (
|
|
_('Control the automatic generation of a Table of Contents. By '
|
|
'default, if the source file has a Table of Contents, it will '
|
|
'be used in preference to the automatically generated one.'),
|
|
[
|
|
'level1_toc', 'level2_toc', 'level3_toc',
|
|
'toc_threshold', 'max_toc_links', 'no_chapters_in_toc',
|
|
'use_auto_toc', 'toc_filter', 'duplicate_links_in_toc',
|
|
]
|
|
)),
|
|
|
|
(_('METADATA') , (_('Options to set metadata in the output'),
|
|
plumber.metadata_option_names + ['read_metadata_from_opf'],
|
|
)),
|
|
(_('DEBUG'), (_('Options to help with debugging the conversion'),
|
|
[
|
|
'verbose',
|
|
'debug_pipeline',
|
|
])),
|
|
|
|
))
|
|
|
|
for group, (desc, options) in iteritems(groups):
|
|
if group:
|
|
group = OptionGroup(parser, group, desc)
|
|
parser.add_option_group(group)
|
|
add_option = group.add_option if group != '' else parser.add_option
|
|
|
|
for name in options:
|
|
rec = plumber.get_option_by_name(name)
|
|
if rec.level < rec.HIGH:
|
|
option_recommendation_to_cli_option(add_option, rec)
|
|
|
|
|
|
def option_parser():
|
|
parser = OptionParser(usage=USAGE)
|
|
parser.add_option('--list-recipes', default=False, action='store_true',
|
|
help=_('List builtin recipe names. You can create an e-book from '
|
|
'a builtin recipe like this: ebook-convert "Recipe Name.recipe" '
|
|
'output.epub'))
|
|
return parser
|
|
|
|
|
|
class ProgressBar(object):
|
|
|
|
def __init__(self, log):
|
|
self.log = log
|
|
|
|
def __call__(self, frac, msg=''):
|
|
if msg:
|
|
percent = int(frac*100)
|
|
self.log('%d%% %s'%(percent, msg))
|
|
|
|
|
|
def create_option_parser(args, log):
|
|
if '--version' in args:
|
|
from ebook_converter.constants import __appname__, __version__, __author__
|
|
log(os.path.basename(args[0]), '('+__appname__, __version__+')')
|
|
log('Created by:', __author__)
|
|
raise SystemExit(0)
|
|
if '--list-recipes' in args:
|
|
from ebook_converter.web.feeds.recipes.collection import get_builtin_recipe_titles
|
|
log('Available recipes:')
|
|
titles = sorted(get_builtin_recipe_titles())
|
|
for title in titles:
|
|
try:
|
|
log('\t'+title)
|
|
except:
|
|
log('\t'+repr(title))
|
|
log('%d recipes available'%len(titles))
|
|
raise SystemExit(0)
|
|
|
|
parser = option_parser()
|
|
if len(args) < 3:
|
|
print_help(parser, log)
|
|
if any(x in args for x in ('-h', '--help')):
|
|
raise SystemExit(0)
|
|
else:
|
|
raise SystemExit(1)
|
|
|
|
input, output = check_command_line_options(parser, args, log)
|
|
|
|
from ebook_converter.ebooks.conversion.plumber import Plumber
|
|
|
|
reporter = ProgressBar(log)
|
|
if patheq(input, output):
|
|
raise ValueError('Input file is the same as the output file')
|
|
|
|
plumber = Plumber(input, output, log, reporter)
|
|
add_input_output_options(parser, plumber)
|
|
add_pipeline_options(parser, plumber)
|
|
|
|
return parser, plumber
|
|
|
|
|
|
def abspath(x):
|
|
if x.startswith('http:') or x.startswith('https:'):
|
|
return x
|
|
return os.path.abspath(os.path.expanduser(x))
|
|
|
|
|
|
def escape_sr_pattern(exp):
|
|
return exp.replace('\n', '\ue123')
|
|
|
|
|
|
def read_sr_patterns(path, log=None):
|
|
import json, re
|
|
pats = []
|
|
with open(path, 'rb') as f:
|
|
lines = f.read().decode('utf-8').splitlines()
|
|
pat = None
|
|
for line in lines:
|
|
if pat is None:
|
|
if not line.strip():
|
|
continue
|
|
line = line.replace('\ue123', '\n')
|
|
try:
|
|
re.compile(line)
|
|
except:
|
|
msg = 'Invalid regular expression: %r from file: %r'%(
|
|
line, path)
|
|
if log is not None:
|
|
log.error(msg)
|
|
raise SystemExit(1)
|
|
else:
|
|
raise ValueError(msg)
|
|
pat = line
|
|
else:
|
|
pats.append((pat, line))
|
|
pat = None
|
|
return json.dumps(pats)
|
|
|
|
|
|
def main(args=sys.argv):
|
|
log = Log()
|
|
parser, plumber = create_option_parser(args, log)
|
|
opts, leftover_args = parser.parse_args(args)
|
|
if len(leftover_args) > 3:
|
|
log.error('Extra arguments not understood:', u', '.join(leftover_args[3:]))
|
|
return 1
|
|
for x in ('read_metadata_from_opf', 'cover'):
|
|
if getattr(opts, x, None) is not None:
|
|
setattr(opts, x, abspath(getattr(opts, x)))
|
|
if opts.search_replace:
|
|
opts.search_replace = read_sr_patterns(opts.search_replace, log)
|
|
if opts.transform_css_rules:
|
|
from ebook_converter.ebooks.css_transform_rules import import_rules, validate_rule
|
|
with open(opts.transform_css_rules, 'rb') as tcr:
|
|
opts.transform_css_rules = rules = list(import_rules(tcr.read()))
|
|
for rule in rules:
|
|
title, msg = validate_rule(rule)
|
|
if title and msg:
|
|
log.error('Failed to parse CSS transform rules')
|
|
log.error(title)
|
|
log.error(msg)
|
|
return 1
|
|
|
|
recommendations = [(n.dest, getattr(opts, n.dest),
|
|
OptionRecommendation.HIGH)
|
|
for n in parser.options_iter()
|
|
if n.dest]
|
|
plumber.merge_ui_recommendations(recommendations)
|
|
|
|
try:
|
|
plumber.run()
|
|
except ConversionUserFeedBack as e:
|
|
ll = {'info': log.info, 'warn': log.warn,
|
|
'error':log.error}.get(e.level, log.info)
|
|
ll(e.title)
|
|
if e.det_msg:
|
|
log.debug(e.detmsg)
|
|
ll(e.msg)
|
|
raise SystemExit(1)
|
|
|
|
log(_('Output saved to'), ' ', plumber.output)
|
|
|
|
return 0
|
|
|
|
|
|
def manual_index_strings():
|
|
return _('''\
|
|
The options and default values for the options change depending on both the
|
|
input and output formats, so you should always check with::
|
|
|
|
%s
|
|
|
|
Below are the options that are common to all conversion, followed by the
|
|
options specific to every input and output format.''')
|
|
|
|
|
|
if __name__ == '__main__':
|
|
sys.exit(main())
|