diff --git a/ebook_converter/ebooks/conversion/cli.py b/ebook_converter/ebooks/conversion/cli.py index 77fc0eb..2602467 100644 --- a/ebook_converter/ebooks/conversion/cli.py +++ b/ebook_converter/ebooks/conversion/cli.py @@ -12,7 +12,7 @@ import re import sys from ebook_converter.utils.config import OptionParser -from ebook_converter.utils.logging import Log +from ebook_converter.utils import logging from ebook_converter.customize.conversion import OptionRecommendation @@ -66,7 +66,7 @@ def check_command_line_options(parser, args, log): if (not input_file.endswith('.recipe') and not os.access(input_file, os.R_OK) and not ('-h' in args or '--help' in args)): - log.error('Cannot read from', input_file) + log.error('Cannot read from %s', input_file) raise SystemExit(1) if input_file.endswith('.recipe') and not os.access(input_file, os.R_OK): input_file = args[1] @@ -267,7 +267,7 @@ class ProgressBar(object): def __call__(self, frac, msg=''): if msg: percent = int(frac*100) - self.log('%d%% %s' % (percent, msg)) + self.log.info('%d%% %s' % (percent, msg)) def create_option_parser(args, log): @@ -275,20 +275,18 @@ def create_option_parser(args, log): from ebook_converter.constants_old import __appname__ from ebook_converter.constants_old import __author__ from ebook_converter.constants_old import __version__ - log(os.path.basename(args[0]), '('+__appname__, __version__+')') - log('Created by:', __author__) + log.info("%s (%s, %s)", os.path.basename(args[0]), __appname__, + __version__) + log.info('Created by: %s', __author__) raise SystemExit(0) if '--list-recipes' in args: from ebook_converter.web.feeds.recipes.collection import \ get_builtin_recipe_titles - log('Available recipes:') + log.info('Available recipes:') titles = sorted(get_builtin_recipe_titles()) for title in titles: - try: - log('\t'+title) - except Exception: - log('\t'+repr(title)) - log('%d recipes available' % len(titles)) + log.info('\t%s', title) + log.info('%d recipes available', len(titles)) raise SystemExit(0) parser = option_parser() @@ -352,7 +350,7 @@ def read_sr_patterns(path, log=None): def main(args=sys.argv): - log = Log() + log = logging.default_log mimetypes.init([pkg_resources.resource_filename('ebook_converter', 'data/mime.types')]) parser, plumber = create_option_parser(args, log) @@ -386,7 +384,7 @@ def main(args=sys.argv): plumber.run() - log('Output saved to', ' ', plumber.output) + log.info('Output saved to %s', plumber.output) return 0 diff --git a/ebook_converter/ebooks/conversion/plugins/epub_input.py b/ebook_converter/ebooks/conversion/plugins/epub_input.py index 73196e3..37a9afe 100644 --- a/ebook_converter/ebooks/conversion/plugins/epub_input.py +++ b/ebook_converter/ebooks/conversion/plugins/epub_input.py @@ -146,7 +146,7 @@ class EPUBInput(InputFormatPlugin): if len(spine) > 1: for item in spine: if item.get('idref') == titlepage_id: - log('Found HTML cover', titlepage_href) + log.info('Found HTML cover %s', titlepage_href) if self.for_viewer: item.attrib.pop('linear', None) else: @@ -192,7 +192,7 @@ class EPUBInput(InputFormatPlugin): elem = [x for x in manifest if x.get('id', '') == idref] if not elem or elem[0].get('href', None) != guide_cover: return - log('Found HTML cover', guide_cover) + log.info('Found HTML cover %s', guide_cover) # Remove from spine as covers must be treated # specially @@ -272,8 +272,8 @@ class EPUBInput(InputFormatPlugin): zf = ZipFile(stream) zf.extractall(os.getcwd()) except Exception: - log.exception('EPUB appears to be invalid ZIP file, trying a' - ' more forgiving ZIP parser') + log.exception('EPUB appears to be invalid ZIP file, trying a ' + 'more forgiving ZIP parser') from ebook_converter.utils.localunzip import extractall stream.seek(0) extractall(stream) diff --git a/ebook_converter/ebooks/conversion/plugins/epub_output.py b/ebook_converter/ebooks/conversion/plugins/epub_output.py index fe87c2d..e3d1b20 100644 --- a/ebook_converter/ebooks/conversion/plugins/epub_output.py +++ b/ebook_converter/ebooks/conversion/plugins/epub_output.py @@ -214,8 +214,8 @@ class EPUBOutput(OutputFormatPlugin): self.workaround_sony_quirks() if self.oeb.toc.count() == 0: - self.log.warn('This EPUB file has no Table of Contents. ' - 'Creating a default TOC') + self.log.warning('This EPUB file has no Table of Contents. ' + 'Creating a default TOC') first = next(iter(self.oeb.spine)) self.oeb.toc.add('Start', first.href) @@ -229,7 +229,7 @@ class EPUBOutput(OutputFormatPlugin): encrypted_fonts = getattr(input_plugin, 'encrypted_fonts', []) if _uuid is None: - self.log.warn('No UUID identifier found') + self.log.warning('No UUID identifier found') _uuid = str(uuid.uuid4()) oeb.metadata.add('identifier', _uuid, scheme='uuid', id=_uuid) @@ -281,7 +281,7 @@ class EPUBOutput(OutputFormatPlugin): os.mkdir(opts.extract_to) with ZipFile(output_path) as zf: zf.extractall(path=opts.extract_to) - self.log.info('EPUB extracted to', opts.extract_to) + self.log.info('EPUB extracted to %s', opts.extract_to) def upgrade_to_epub3(self, tdir, opf): self.log.info('Upgrading to EPUB 3...') @@ -323,7 +323,7 @@ class EPUBOutput(OutputFormatPlugin): if not os.path.exists(path): uris.pop(uri) continue - self.log.debug('Encrypting font:', uri) + self.log.debug('Encrypting font: %s', uri) with open(path, 'r+b') as f: data = f.read(1024) if len(data) >= 1024: @@ -332,7 +332,7 @@ class EPUBOutput(OutputFormatPlugin): f.write(bytes(bytearray(data[i] ^ key[i%16] for i in range(1024)))) else: - self.log.warn('Font', path, 'is invalid, ignoring') + self.log.warning('Font %s is invalid, ignoring', path) if not isinstance(uri, str): uri = uri.decode('utf-8') fonts.append(''' @@ -385,8 +385,9 @@ class EPUBOutput(OutputFormatPlugin): _base, _, frag = href.partition('#') frag = urllib.parse.unquote(frag) if frag and frag_pat.match(frag) is None: - self.log.warn( - 'Removing fragment identifier %r from TOC as Adobe Digital Editions cannot handle it'%frag) + self.log.warning('Removing fragment identifier %r from ' + 'TOC as Adobe Digital Editions cannot ' + 'handle it', frag) node.href = _base for x in self.oeb.spine: @@ -530,8 +531,8 @@ class EPUBOutput(OutputFormatPlugin): for x in self.oeb.spine: if x.href == href: if frag_is_at_top(x.data, frag): - self.log.debug('Removing anchor from TOC href:', - href+'#'+frag) + self.log.debug('Removing anchor from TOC ' + 'href: %s#%s', href, frag) toc.href = href break for x in toc: diff --git a/ebook_converter/ebooks/conversion/plugins/lrf_input.py b/ebook_converter/ebooks/conversion/plugins/lrf_input.py index 6f51f98..b5154b0 100644 --- a/ebook_converter/ebooks/conversion/plugins/lrf_input.py +++ b/ebook_converter/ebooks/conversion/plugins/lrf_input.py @@ -20,7 +20,7 @@ class LRFInput(InputFormatPlugin): from ebook_converter.ebooks.lrf.input import MediaType, Styles, \ TextBlock, Canvas, ImageBlock, RuledLine self.log = log - self.log('Generating XML') + self.log.info('Generating XML') from ebook_converter.ebooks.lrf.lrfparser import LRFDocument d = LRFDocument(stream) d.parse() @@ -50,7 +50,7 @@ class LRFInput(InputFormatPlugin): if imgstr: plot_map[ro] = imgstr[0].get('file') - self.log('Converting XML to HTML...') + self.log.info('Converting XML to HTML...') with open(pkg_resources. resource_filename('ebook_converter', diff --git a/ebook_converter/ebooks/conversion/plumber.py b/ebook_converter/ebooks/conversion/plumber.py index 76ef6be..06553a6 100644 --- a/ebook_converter/ebooks/conversion/plumber.py +++ b/ebook_converter/ebooks/conversion/plumber.py @@ -854,8 +854,8 @@ OptionRecommendation(name='search_replace', try: val = float(val) except ValueError: - self.log.warn('Values of series index and rating must' - ' be numbers. Ignoring', val) + self.log.warning('Values of series index and rating ' + 'must be numbers. Ignoring %s', val) continue elif x in ('timestamp', 'pubdate'): try: @@ -882,8 +882,8 @@ OptionRecommendation(name='search_replace', self.opts_to_mi(mi) if mi.cover: if mi.cover.startswith('http:') or mi.cover.startswith('https:'): - self.log.warn("TODO: Cover image is on remote server, " - "implement downloading using requests") + self.log.warning("TODO: Cover image is on remote server, " + "implement downloading using requests") ext = mi.cover.rpartition('.')[-1].lower().strip() if ext not in ('png', 'jpg', 'jpeg', 'gif'): ext = 'jpg' @@ -909,8 +909,8 @@ OptionRecommendation(name='search_replace', if x.short_name == sval: setattr(self.opts, attr, x) return - self.log.warn( - 'Profile (%s) %r is no longer available, using default'%(which, sval)) + self.log.warning('Profile (%s) %r is no longer available, using ' + 'default', which, sval) for x in profiles(): if x.short_name == 'default': setattr(self.opts, attr, x) @@ -925,14 +925,16 @@ OptionRecommendation(name='search_replace', if self.opts.verbose: self.log.filter_level = self.log.DEBUG if self.changed_options: - self.log('Conversion options changed from defaults:') + self.log.info('Conversion options changed from defaults:') for rec in self.changed_options: if rec.option.name not in ('username', 'password'): - self.log(' ', '%s:' % rec.option.name, repr(rec.recommended_value)) + self.log.info(' %s', rec.option.name, + repr(rec.recommended_value)) if self.opts.verbose > 1: self.log.debug('Resolved conversion options') try: - self.log.debug('ebook_converter version:', constants.VERSION) + self.log.debug('ebook_converter version: %s', + constants.VERSION) odict = dict(self.opts.__dict__) for x in ('username', 'password'): odict.pop(x, None) @@ -968,7 +970,7 @@ OptionRecommendation(name='search_replace', self.input_plugin.save_download(zf) zf.close() - self.log.info('Input debug saved to:', out_dir) + self.log.info('Input debug saved to: %s', out_dir) def run(self): ''' @@ -1022,7 +1024,8 @@ OptionRecommendation(name='search_replace', from ebook_converter.ebooks.azw4.reader import unwrap unwrap(stream, self.output) self.ui_reporter(1.) - self.log(self.output_fmt.upper(), 'output written to', self.output) + self.log.info('%s output written to %s', self.output_fmt.upper(), + self.output) self.flush() return @@ -1056,7 +1059,7 @@ OptionRecommendation(name='search_replace', if self.opts.debug_pipeline is not None: out_dir = os.path.join(self.opts.debug_pipeline, 'parsed') self.dump_oeb(self.oeb, out_dir) - self.log('Parsed HTML written to:', out_dir) + self.log.info('Parsed HTML written to: %s', out_dir) self.input_plugin.specialize(self.oeb, self.opts, self.log, self.output_fmt) @@ -1105,13 +1108,13 @@ OptionRecommendation(name='search_replace', try: fkey = list(map(float, fkey.split(','))) except Exception: - self.log.error('Invalid font size key: %r ignoring'%fkey) + self.log.error('Invalid font size key: %s ignoring', fkey) fkey = self.opts.dest.fkey if self.opts.debug_pipeline is not None: out_dir = os.path.join(self.opts.debug_pipeline, 'structure') self.dump_oeb(self.oeb, out_dir) - self.log('Structured HTML written to:', out_dir) + self.log.info('Structured HTML written to: %s', out_dir) if self.opts.extra_css and os.path.exists(self.opts.extra_css): with open(self.opts.extra_css, 'rb') as f: @@ -1187,9 +1190,9 @@ OptionRecommendation(name='search_replace', if self.opts.debug_pipeline is not None: out_dir = os.path.join(self.opts.debug_pipeline, 'processed') self.dump_oeb(self.oeb, out_dir) - self.log('Processed HTML written to:', out_dir) + self.log.info('Processed HTML written to: %s', out_dir) - self.log.info('Creating %s...'%self.output_plugin.name) + self.log.info('Creating %s...', self.output_plugin.name) our = CompositeProgressReporter(0.67, 1., self.ui_reporter) self.output_plugin.report_progress = our our(0., 'Running %s plugin' % self.output_plugin.name) @@ -1200,7 +1203,8 @@ OptionRecommendation(name='search_replace', self.ui_reporter(1.) run_plugins_on_postprocess(self.output, self.output_fmt) - self.log(self.output_fmt.upper(), 'output written to', self.output) + self.log.info('%s output written to %s', self.output_fmt.upper(), + self.output) self.flush() @@ -1230,7 +1234,7 @@ def create_oebbook(log, path_or_stream, opts, reader=None, if specialize is not None: oeb = specialize(oeb) or oeb # Read OEB Book into OEBBook - log('Parsing all content...') + log.info('Parsing all content...') oeb.removed_items_to_ignore = removed_items if reader is None: from ebook_converter.ebooks.oeb.reader import OEBReader @@ -1241,11 +1245,11 @@ def create_oebbook(log, path_or_stream, opts, reader=None, def create_dummy_plumber(input_format, output_format): - from ebook_converter.utils.logging import Log + from ebook_converter.utils import logging input_format = input_format.lower() output_format = output_format.lower() output_path = 'dummy.'+output_format - log = Log() + log = logging.default_log log.outputs = [] input_file = 'dummy.'+input_format if input_format in ARCHIVE_FMTS: diff --git a/ebook_converter/ebooks/conversion/utils.py b/ebook_converter/ebooks/conversion/utils.py index b9af3f9..2035148 100644 --- a/ebook_converter/ebooks/conversion/utils.py +++ b/ebook_converter/ebooks/conversion/utils.py @@ -5,11 +5,6 @@ from ebook_converter.utils.logging import default_log from ebook_converter.utils.wordcount import get_wordcount_obj -__license__ = 'GPL v3' -__copyright__ = '2010, Kovid Goyal ' -__docformat__ = 'restructuredtext en' - - class HeuristicProcessor(object): def __init__(self, extra_opts=None, log=None): @@ -50,8 +45,8 @@ class HeuristicProcessor(object): title = match.group('title') if not title: self.html_preprocess_sections = self.html_preprocess_sections + 1 - self.log.debug("marked " + str(self.html_preprocess_sections) + - " chapters. - " + str(chap)) + self.log.debug("marked %s chapters. - %s", + self.html_preprocess_sections, str(chap)) return '

'+chap+'

\n' else: delete_whitespace = re.compile('^\\s*(?P.*?)\\s*$') @@ -59,16 +54,16 @@ class HeuristicProcessor(object): txt_chap = delete_quotes.sub('', delete_whitespace.sub('\\g', html2text(chap))) txt_title = delete_quotes.sub('', delete_whitespace.sub('\\g', html2text(title))) self.html_preprocess_sections = self.html_preprocess_sections + 1 - self.log.debug("marked " + str(self.html_preprocess_sections) + - " chapters & titles. - " + str(chap) + ", " + str(title)) + self.log.debug("marked %s chapters & titles. - %s, %s", + self.html_preprocess_sections, chap, title) return '

'+chap+'

\n

'+title+'

\n' def chapter_break(self, match): chap = match.group('section') styles = match.group('styles') self.html_preprocess_sections = self.html_preprocess_sections + 1 - self.log.debug("marked " + str(self.html_preprocess_sections) + - " section markers based on punctuation. - " + str(chap)) + self.log.debug("marked %s section markers based on punctuation. - %s", + self.html_preprocess_sections, chap) return '<'+styles+' style="page-break-before:always">'+chap def analyze_title_matches(self, match): @@ -111,8 +106,6 @@ class HeuristicProcessor(object): line_end = line_end_ere.findall(raw) tot_htm_ends = len(htm_end) tot_ln_fds = len(line_end) - # self.log.debug("There are " + str(tot_ln_fds) + " total Line feeds, and " + - # str(tot_htm_ends) + " marked up endings") if percent > 1: percent = 1 @@ -120,7 +113,6 @@ class HeuristicProcessor(object): percent = 0 min_lns = tot_ln_fds * percent - # self.log.debug("There must be fewer than " + str(min_lns) + " unmarked lines to add markup") return min_lns > tot_htm_ends def dump(self, raw, where): @@ -148,7 +140,6 @@ class HeuristicProcessor(object): return wordcount.words def markup_italicis(self, html): - # self.log.debug("\n\n\nitalicize debugging \n\n\n") ITALICIZE_WORDS = [ 'Etc.', 'etc.', 'viz.', 'ie.', 'i.e.', 'Ie.', 'I.e.', 'eg.', 'e.g.', 'Eg.', 'E.g.', 'et al.', 'et cetera', 'n.b.', 'N.b.', @@ -178,7 +169,6 @@ class HeuristicProcessor(object): for pat in ITALICIZE_STYLE_PATS: for match in re.finditer(pat, search_text): ital_string = str(match.group('words')) - # self.log.debug("italicising "+str(match.group(0))+" with "+ital_string+"") try: html = re.sub(re.escape(str(match.group(0))), '%s' % ital_string, html) except OverflowError: @@ -205,10 +195,11 @@ class HeuristicProcessor(object): if wordcount > 200000: typical_chapters = 15000. self.min_chapters = int(ceil(wordcount / typical_chapters)) - self.log.debug("minimum chapters required are: "+str(self.min_chapters)) + self.log.debug("minimum chapters required are: %s", self.min_chapters) heading = re.compile(']*>', re.IGNORECASE) self.html_preprocess_sections = len(heading.findall(html)) - self.log.debug("found " + str(self.html_preprocess_sections) + " pre-existing headings") + self.log.debug("found %s pre-existing headings", + self.html_preprocess_sections) # Build the Regular Expressions in pieces init_lookahead = "(?=<(p|div))" @@ -298,7 +289,8 @@ class HeuristicProcessor(object): if n_lookahead_req: n_lookahead = re.sub("(ou|in|cha)", "lookahead_", full_chapter_line) if not analyze: - self.log.debug("Marked " + str(self.html_preprocess_sections) + " headings, " + log_message) + self.log.debug("Marked %s headings, %s", + self.html_preprocess_sections, log_message) chapter_marker = arg_ignorecase+init_lookahead+full_chapter_line+blank_lines+lp_n_lookahead_open+n_lookahead+lp_n_lookahead_close+ \ lp_opt_title_open+title_line_open+title_header_open+lp_title+title_header_close+title_line_close+lp_opt_title_close @@ -311,11 +303,12 @@ class HeuristicProcessor(object): if float(self.chapters_with_title) / float(hits) > .5: title_req = True strict_title = False - self.log.debug( - str(type_name)+" had "+str(hits)+ - " hits - "+str(self.chapters_no_title)+" chapters with no title, "+ - str(self.chapters_with_title)+" chapters with titles, "+ - str(float(self.chapters_with_title) / float(hits))+" percent. ") + self.log.debug('%s had %s hits %s chapters with no ' + 'title, %s chapters with titles, %s ' + 'percent.', type_name, hits, + self.chapters_no_title, + self.chapters_with_title, + self.chapters_with_title / hits) if type_name == 'common': analysis_result.append([chapter_type, n_lookahead_req, strict_title, ignorecase, title_req, log_message, type_name]) elif self.min_chapters <= hits < max_chapters or self.min_chapters < 3 > hits: @@ -332,8 +325,9 @@ class HeuristicProcessor(object): words_per_chptr = wordcount if words_per_chptr > 0 and self.html_preprocess_sections > 0: words_per_chptr = wordcount // self.html_preprocess_sections - self.log.debug("Total wordcount is: "+ str(wordcount)+", Average words per section is: "+ - str(words_per_chptr)+", Marked up "+str(self.html_preprocess_sections)+" chapters") + self.log.debug("Total wordcount is: %s, Average words per section " + "is: %s, Marked up %s chapters", wordcount, + words_per_chptr, self.html_preprocess_sections) return html def punctuation_unwrap(self, length, content, format): @@ -427,7 +421,8 @@ class HeuristicProcessor(object): txtindent = re.compile(str(r'<(?Pp|div)(?P[^>]*)>\s*(?P(]*>\s*)+)?\s*(\u00a0){2,}'), re.IGNORECASE) html = txtindent.sub(self.insert_indent, html) if self.found_indents > 1: - self.log.debug("replaced "+str(self.found_indents)+ " nbsp indents with inline styles") + self.log.debug("replaced %s nbsp indents with inline styles", + self.found_indents) return html def cleanup_markup(self, html): @@ -475,8 +470,8 @@ class HeuristicProcessor(object): blanklines = self.blankreg.findall(html) lines = self.linereg.findall(html) if len(lines) > 1: - self.log.debug("There are " + str(len(blanklines)) + " blank lines. " + - str(float(len(blanklines)) / float(len(lines))) + " percent blank") + self.log.debug("There are %s blank lines. %s percent blank", + len(blanklines), len(blanklines) / len(lines)) if float(len(blanklines)) / float(len(lines)) > 0.40: return True @@ -600,8 +595,8 @@ class HeuristicProcessor(object): width = int(re.sub('.*?width(:|=)(?P\\d+).*', '\\g', replacement_break)) except: scene_break = hr_open+'
' - self.log.warn('Invalid replacement scene break' - ' expression, using default') + self.log.warning('Invalid replacement scene break' + ' expression, using default') else: replacement_break = re.sub('(?i)(width=\\d+\\%?|width:\\s*\\d+(\\%|px|pt|em)?;?)', '', replacement_break) divpercent = (100 - width) // 2 @@ -702,20 +697,23 @@ class HeuristicProcessor(object): blockquote_open_loop = blockquote_open if debugabby: self.log.debug('\n\n******\n') - self.log.debug('padding top is: '+str(setting[0])) - self.log.debug('padding right is:' +str(setting[1])) - self.log.debug('padding bottom is: ' + str(setting[2])) - self.log.debug('padding left is: ' +str(setting[3])) + self.log.debug('padding top is: %s', setting[0]) + self.log.debug('padding right is: %s', setting[1]) + self.log.debug('padding bottom is: %s', setting[2]) + self.log.debug('padding left is: %s', setting[3]) # print "text-align is: "+str(text_align) # print "\n***\nline is:\n "+str(match.group(0))+'\n' if debugabby: # print "this line is a paragraph = "+str(is_paragraph)+", previous line was "+str(self.previous_was_paragraph) - self.log.debug("styles for this line were:", styles) - self.log.debug('newline is:') - self.log.debug(blockquote_open_loop+blockquote_close_loop+ - paragraph_before+'

'+content+'

'+paragraph_after+'\n\n\n\n\n') + self.log.debug("styles for this line were: %s", styles) + self.log.debug('newline is: %s', blockquote_open_loop + + blockquote_close_loop + + paragraph_before + + '

%s

' % + (text_indent + text_align, content) + + paragraph_after + + '\n\n\n\n\n') # print "is_paragraph is "+str(is_paragraph)+", previous_was_paragraph is "+str(self.previous_was_paragraph) self.previous_was_paragraph = is_paragraph # print "previous_was_paragraph is now set to "+str(self.previous_was_paragraph)+"\n\n\n" @@ -731,10 +729,10 @@ class HeuristicProcessor(object): try: self.totalwords = self.get_word_count(html) except: - self.log.warn("Can't get wordcount") + self.log.warning("Can't get wordcount") if self.totalwords < 50: - self.log.warn("flow is too short, not running heuristics") + self.log.warning("flow is too short, not running heuristics") return html is_abbyy = self.is_abbyy(html) @@ -801,12 +799,13 @@ class HeuristicProcessor(object): # more of the lines break in the same region of the document then unwrapping is required docanalysis = DocAnalysis(format, html) hardbreaks = docanalysis.line_histogram(.50) - self.log.debug("Hard line breaks check returned "+str(hardbreaks)) + self.log.debug("Hard line breaks check returned %s", hardbreaks) # Calculate Length unwrap_factor = getattr(self.extra_opts, 'html_unwrap_factor', 0.4) length = docanalysis.line_length(unwrap_factor) - self.log.debug("Median line length is " + str(length) + ", calculated with " + format + " format") + self.log.debug("Median line length is %s, calculated with %s format", + length, format) # ##### Unwrap lines ###### if getattr(self.extra_opts, 'unwrap_lines', False): @@ -827,8 +826,9 @@ class HeuristicProcessor(object): # If still no sections after unwrapping mark split points on lines with no punctuation if self.html_preprocess_sections < self.min_chapters and getattr(self.extra_opts, 'markup_chapter_headings', False): - self.log.debug("Looking for more split points based on punctuation," - " currently have " + str(self.html_preprocess_sections)) + self.log.debug("Looking for more split points based on " + "punctuation, currently have %s", + self.html_preprocess_sections) chapdetect3 = re.compile( r'<(?P(p|div)[^>]*)>\s*(?P
(]*>)?\s*(?!([\W]+\s*)+)' r'(<[ibu][^>]*>){0,2}\s*(]*>)?\s*(<[ibu][^>]*>){0,2}\s*(]*>)?\s*' diff --git a/ebook_converter/ebooks/fb2/fb2ml.py b/ebook_converter/ebooks/fb2/fb2ml.py index 696193b..9372c54 100644 --- a/ebook_converter/ebooks/fb2/fb2ml.py +++ b/ebook_converter/ebooks/fb2/fb2ml.py @@ -188,7 +188,7 @@ class FB2MLizer(object): metadata['id'] = str(x).split(':')[-1] break if metadata['id'] is None: - self.log.warn('No UUID identifier found') + self.log.warning('No UUID identifier found') metadata['id'] = str(uuid.uuid4()) try: @@ -311,7 +311,7 @@ class FB2MLizer(object): self.section_level += 1 for item in self.oeb_book.spine: - self.log.debug('Converting %s to FictionBook2 XML' % item.href) + self.log.debug('Converting %s to FictionBook2 XML', item.href) stylizer = Stylizer(item.data, item.href, self.oeb_book, self.opts, self.opts.output_profile) @@ -369,7 +369,7 @@ class FB2MLizer(object): content_type, data)) except Exception as e: self.log.error('Error: Could not include file %s because ' - '%s.' % (item.href, e)) + '%s.', item.href, e) return '\n'.join(images) def create_flat_toc(self, nodes, level): @@ -528,7 +528,7 @@ class FB2MLizer(object): fb2_out.append('' % self.image_hrefs[ihref]) else: - self.log.warn(u'Ignoring image not in manifest: %s' % ihref) + self.log.warning('Ignoring image not in manifest: %s', ihref) if tag in ('br', 'hr') or ems >= 1: if ems < 1: multiplier = 1 diff --git a/ebook_converter/ebooks/htmlz/oeb2html.py b/ebook_converter/ebooks/htmlz/oeb2html.py index 6affd63..6fc78ff 100644 --- a/ebook_converter/ebooks/htmlz/oeb2html.py +++ b/ebook_converter/ebooks/htmlz/oeb2html.py @@ -58,7 +58,7 @@ class OEB2HTML(object): '%s' '' % entities.prepare_string_for_xml(self.book_title)] for item in oeb_book.spine: - self.log.debug('Converting %s to HTML...' % item.href) + self.log.debug('Converting %s to HTML...', item.href) self.rewrite_ids(item.data, item) base.rewrite_links(item.data, partial(self.rewrite_link, page=item)) @@ -342,7 +342,7 @@ class OEB2HTMLClassCSSizer(OEB2HTML): def mlize_spine(self, oeb_book): output = [] for item in oeb_book.spine: - self.log.debug('Converting %s to HTML...' % item.href) + self.log.debug('Converting %s to HTML...', item.href) self.rewrite_ids(item.data, item) base.rewrite_links(item.data, partial(self.rewrite_link, page=item)) diff --git a/ebook_converter/ebooks/lrf/html/convert_from.py b/ebook_converter/ebooks/lrf/html/convert_from.py index 813d8a1..73aa38e 100644 --- a/ebook_converter/ebooks/lrf/html/convert_from.py +++ b/ebook_converter/ebooks/lrf/html/convert_from.py @@ -331,9 +331,9 @@ class HTMLConverter(object): if link['path'] == path: self.links.remove(link) break - self.log.warn('Could not process '+path) + self.log.warning('Could not process %s', path) if self.verbose: - self.log.exception(' ') + self.log.exception(' ') # WAT self.links = self.process_links() self.link_level += 1 paths = [link['path'] for link in self.links] @@ -400,7 +400,7 @@ class HTMLConverter(object): with open(os.path.join(tdir, 'html2lrf-verbose.html'), 'wb') as f: f.write(str(soup).encode('utf-8')) - self.log.info('Written preprocessed HTML to '+f.name) + self.log.info('Written preprocessed HTML to %s', f.name) except Exception: pass @@ -416,8 +416,8 @@ class HTMLConverter(object): self.css[selector] = self.override_css[selector] self.file_name = os.path.basename(path) - self.log.info('Processing %s' % (path if self.verbose else - self.file_name)) + self.log.info('Processing %s', path if self.verbose else + self.file_name) if not os.path.exists(path): # convertlit replaces & with %26 in file names @@ -589,7 +589,7 @@ class HTMLConverter(object): try: index = self.book.pages().index(opage) except ValueError: - self.log.warning('%s is an empty file' % self.file_name) + self.log.warning('%s is an empty file', self.file_name) tb = self.book.create_text_block() self.current_page.append(tb) return tb @@ -656,7 +656,7 @@ class HTMLConverter(object): hasattr(target.parent, 'objId'): self.book.addTocEntry(ascii_text, tb) else: - self.log.debug("Cannot add link %s to TOC" % ascii_text) + self.log.debug("Cannot add link %s to TOC", ascii_text) def get_target_block(fragment, targets): '''Return the correct block for the element''' @@ -1617,7 +1617,7 @@ class HTMLConverter(object): tag[key]] = self.current_block self.current_block.must_append = True else: - self.log.debug('Could not follow link to ', + self.log.debug('Could not follow link to %s', tag['href']) self.process_children(tag, tag_css, tag_pseudo_css) elif tag.has_attr('name') or tag.has_attr('id'): @@ -1642,7 +1642,8 @@ class HTMLConverter(object): self.process_image(path, tag_css, width, height, dropcaps=dropcaps, rescale=True) elif not urllib.parse.urlparse(tag['src'])[0]: - self.log.warn('Could not find image: '+tag['src']) + self.log.warning('Could not find image: %s', + tag['src']) else: self.log.debug("Failed to process: %s", tag) elif tagname in ['style', 'link']: @@ -1665,8 +1666,8 @@ class HTMLConverter(object): self.page_break_found = True ncss, npcss = self.parse_css(src) except IOError: - self.log.warn('Could not read stylesheet: %s', - tag['href']) + self.log.warning('Could not read stylesheet: %s', + tag['href']) if ncss: update_css(ncss, self.css) self.css.update(self.override_css) @@ -1876,10 +1877,10 @@ class HTMLConverter(object): self.process_table(tag, tag_css) except Exception as err: self.log.warning('An error occurred while processing a ' - 'table: %s. Ignoring table markup.' % + 'table: %s. Ignoring table markup.', repr(err)) - self.log.exception('') - self.log.debug('Bad table:\n%s' % str(tag)[:300]) + self.log.exception('') # WAT + self.log.debug('Bad table:\n%s', str(tag)[:300]) self.in_table = False self.process_children(tag, tag_css, tag_pseudo_css) finally: @@ -1977,7 +1978,7 @@ def process_file(path, options, logger): tpath = tf.name # PIL sometimes fails, for example on interlaced PNG files except IOError as err: - logger.warn('Could not read cover image: %s', err) + logger.warning('Could not read cover image: %s', err) options.cover = None else: raise ConversionError('Cannot read from: %s', options.cover) diff --git a/ebook_converter/ebooks/lrf/input.py b/ebook_converter/ebooks/lrf/input.py index 731172e..11bfaf9 100644 --- a/ebook_converter/ebooks/lrf/input.py +++ b/ebook_converter/ebooks/lrf/input.py @@ -48,7 +48,8 @@ class Canvas(etree.XSLTExtension): table.append(tr) for obj, x, y in self.get_objects(canvas): if obj.tag != 'TextBlock': - self.log.warn(obj.tag, 'elements in Canvas not supported') + self.log.warning('%s elements in Canvas not supported', + obj.tag) continue td = table.makeelement('td') self.text_block.render_block(obj, td) @@ -168,7 +169,7 @@ class TextBlock(etree.XSLTExtension): if deepest < 500: return - self.log.warn('Found deeply nested spans. Flattening.') + self.log.warning('Found deeply nested spans. Flattening.') # with open('/t/before.xml', 'wb') as f: # f.write(etree.tostring(node, method='xml')) @@ -270,7 +271,7 @@ class TextBlock(etree.XSLTExtension): self.add_text_to = (img, 'tail') self.add_text(child.tail) else: - self.log.warn('Unhandled Text element:', child.tag) + self.log.warning('Unhandled Text element: %s', child.tag) class Styles(etree.XSLTExtension): diff --git a/ebook_converter/ebooks/odt/input.py b/ebook_converter/ebooks/odt/input.py index ba22f63..602927f 100644 --- a/ebook_converter/ebooks/odt/input.py +++ b/ebook_converter/ebooks/odt/input.py @@ -24,7 +24,8 @@ class Extract(ODF2XHTML): if not os.path.exists('Pictures'): os.makedirs('Pictures') for name in zf.namelist(): - if name.startswith('Pictures') and name not in {'Pictures', 'Pictures/'}: + if (name.startswith('Pictures') and + name not in {'Pictures', 'Pictures/'}): data = zf.read(name) with open(name, 'wb') as f: f.write(data) @@ -46,13 +47,13 @@ class Extract(ODF2XHTML): self.extract_css(root, log) self.epubify_markup(root, log) self.apply_list_starts(root, log) - html = etree.tostring(root, encoding='utf-8', - xml_declaration=True) + html = etree.tostring(root, encoding='utf-8', xml_declaration=True) return html def extract_css(self, root, log): ans = [] - for s in root.xpath('//*[local-name() = "style" and @type="text/css"]'): + for s in root.xpath('//*[local-name() = "style" and ' + '@type="text/css"]'): ans.append(s.text) s.getparent().remove(s) @@ -63,11 +64,11 @@ class Extract(ODF2XHTML): if ns: ns = '{%s}'%ns etree.SubElement(head, ns+'link', {'type':'text/css', - 'rel':'stylesheet', 'href':'odfpy.css'}) + 'rel':'stylesheet', + 'href':'odfpy.css'}) css = u'\n\n'.join(ans) - parser = CSSParser(loglevel=logging.WARNING, - log=_css_logger) + parser = CSSParser(loglevel=logging.WARNING, log=_css_logger) self.css = parser.parseString(css, validate=False) with open('odfpy.css', 'wb') as f: @@ -209,7 +210,8 @@ class Extract(ODF2XHTML): for frm in self.document.topnode.getElementsByType(odFrame): try: if frm.getAttrNS(odTEXTNS,u'anchor-type') == 'page': - log.warn('Document has Pictures anchored to Page, will all end up before first page!') + log.warning('Document has Pictures anchored to Page, will ' + 'all end up before first page!') break except ValueError: pass @@ -234,7 +236,8 @@ class Extract(ODF2XHTML): # now it should be safe to remove the text:p parent = para.parentNode parent.removeChild(para) - log("Removed cover image paragraph from document...") + log.info("Removed cover image paragraph from " + "document...") break def filter_load(self, odffile, mi, log): @@ -267,7 +270,7 @@ class Extract(ODF2XHTML): if not os.path.exists(odir): os.makedirs(odir) with directory.CurrentDir(odir): - log('Extracting ODT file...') + log.info('Extracting ODT file...') stream.seek(0) mi = get_metadata(stream, 'odt') if not mi.title: diff --git a/ebook_converter/ebooks/oeb/base.py b/ebook_converter/ebooks/oeb/base.py index 505e131..a3fc470 100644 --- a/ebook_converter/ebooks/oeb/base.py +++ b/ebook_converter/ebooks/oeb/base.py @@ -904,7 +904,7 @@ class Manifest(object): def _parse_xhtml(self, data): orig_data = data fname = urllib.parse.unquote(self.href) - self.oeb.log.debug('Parsing', fname, '...') + self.oeb.log.debug('Parsing %s ...', fname) self.oeb.html_preprocessor.current_href = self.href try: data = parse_utils.parse_html(data, log=self.oeb.log, @@ -924,7 +924,7 @@ class Manifest(object): if has_html in data: return self._parse_xhtml(data) - self.oeb.log.debug('Converting', self.href, '...') + self.oeb.log.debug('Converting %s ...', self.href) from ebook_converter.ebooks.txt.processor import convert_markdown @@ -941,7 +941,7 @@ class Manifest(object): from css_parser.css import CSSRule log.setLevel(logging.WARN) log.raiseExceptions = False - self.oeb.log.debug('Parsing', self.href, '...') + self.oeb.log.debug('Parsing %s ...', self.href) data = self.oeb.decode(data) data = self.oeb.css_preprocessor(data, add_namespace=False) parser = CSSParser(loglevel=logging.WARNING, @@ -957,11 +957,11 @@ class Manifest(object): def _fetch_css(self, path): hrefs = self.oeb.manifest.hrefs if path not in hrefs: - self.oeb.logger.warn('CSS import of missing file %r' % path) + self.oeb.logger.warning('CSS import of missing file %s', path) return (None, None) item = hrefs[path] if item.media_type not in OEB_STYLES: - self.oeb.logger.warn('CSS import of non-CSS file %r' % path) + self.oeb.logger.warning('CSS import of non-CSS file %s', path) return (None, None) data = item.data.cssText enc = None if isinstance(data, str) else 'utf-8' @@ -1002,8 +1002,8 @@ class Manifest(object): elif mt in OEB_STYLES: data = self._parse_css(data) elif mt == 'text/plain': - self.oeb.log.warn('%s contains data in TXT format' % self.href, - 'converting to HTML') + self.oeb.log.warning('%s contains data in TXT format. ' + 'Converting to HTML', self.href) data = self._parse_txt(data) self.media_type = XHTML_MIME self._data = data diff --git a/ebook_converter/ebooks/oeb/parse_utils.py b/ebook_converter/ebooks/oeb/parse_utils.py index 3d0a119..50d8b95 100644 --- a/ebook_converter/ebooks/oeb/parse_utils.py +++ b/ebook_converter/ebooks/oeb/parse_utils.py @@ -65,7 +65,7 @@ def merge_multiple_html_heads_and_bodies(root, log=None): body.append(x) tuple(map(root.append, (head, body))) if log is not None: - log.warn('Merging multiple and sections') + log.warning('Merging multiple and sections') return root @@ -122,7 +122,7 @@ def clean_word_doc(data, log): for match in re.finditer(r'xmlns:(\S+?)=".*?microsoft.*?"', data): prefixes.append(match.group(1)) if prefixes: - log.warn('Found microsoft markup, cleaning...') + log.warning('Found microsoft markup, cleaning...') # Remove empty tags as they are not rendered by browsers # but can become renderable HTML tags like

if the # document is parsed by an HTML parser @@ -214,13 +214,13 @@ def parse_html(data, log=None, decoder=None, preprocessor=None, data = etree.fromstring(data) check_for_html5(pre, data) except (HTML5Doc, etree.XMLSyntaxError): - log.debug('Parsing %s as HTML' % filename) + log.debug('Parsing %s as HTML', filename) data = raw try: data = html5_parse(data) except Exception: - log.exception( - 'HTML 5 parsing failed, falling back to older parsers') + log.exception('HTML 5 parsing failed, falling back to older ' + 'parsers') data = _html4_parse(data) if has_html4_doctype or data.tag == 'HTML' or (len(data) and (data[-1].get('LANG') or data[-1].get('DIR'))): @@ -239,7 +239,7 @@ def parse_html(data, log=None, decoder=None, preprocessor=None, if barename(data.tag) != 'html': if barename(data.tag) in non_html_file_tags: raise NotHTML(data.tag) - log.warn('File %r does not appear to be (X)HTML'%filename) + log.warning('File %s does not appear to be (X)HTML', filename) nroot = etree.fromstring('') has_body = False for child in list(data): @@ -248,7 +248,7 @@ def parse_html(data, log=None, decoder=None, preprocessor=None, break parent = nroot if not has_body: - log.warn('File %r appears to be a HTML fragment'%filename) + log.warning('File %s appears to be a HTML fragment', filename) nroot = etree.fromstring('') parent = nroot[0] for child in list(data.iter()): @@ -260,7 +260,7 @@ def parse_html(data, log=None, decoder=None, preprocessor=None, # Force into the XHTML namespace if not namespace(data.tag): - log.warn('Forcing', filename, 'into XHTML namespace') + log.warning('Forcing %s into XHTML namespace', filename) data.attrib['xmlns'] = const.XHTML_NS data = etree.tostring(data, encoding='unicode') @@ -272,10 +272,8 @@ def parse_html(data, log=None, decoder=None, preprocessor=None, try: data = etree.fromstring(data) except etree.XMLSyntaxError: - log.warn('Stripping comments from %s'% - filename) - data = re.compile(r'', re.DOTALL).sub('', - data) + log.warning('Stripping comments from %s', filename) + data = re.compile(r'', re.DOTALL).sub('', data) data = data.replace( "", '') @@ -283,7 +281,7 @@ def parse_html(data, log=None, decoder=None, preprocessor=None, try: data = etree.fromstring(data) except etree.XMLSyntaxError: - log.warn('Stripping meta tags from %s'% filename) + log.warning('Stripping meta tags from %s', filename) data = re.sub(r']+?>', '', data) data = etree.fromstring(data) elif namespace(data.tag) != const.XHTML_NS: @@ -308,7 +306,7 @@ def parse_html(data, log=None, decoder=None, preprocessor=None, head = xpath(data, '/h:html/h:head') head = head[0] if head else None if head is None: - log.warn('File %s missing element' % filename) + log.warning('File %s missing element', filename) head = etree.Element(XHTML('head')) data.insert(0, head) title = etree.SubElement(head, XHTML('title')) @@ -335,7 +333,7 @@ def parse_html(data, log=None, decoder=None, preprocessor=None, body.getparent().remove(body) data.append(body) else: - log.warn('File %s missing element' % filename) + log.warning('File %s missing element', filename) etree.SubElement(data, XHTML('body')) # Remove microsoft office markup diff --git a/ebook_converter/ebooks/oeb/polish/container.py b/ebook_converter/ebooks/oeb/polish/container.py index 8ee048a..928325a 100644 --- a/ebook_converter/ebooks/oeb/polish/container.py +++ b/ebook_converter/ebooks/oeb/polish/container.py @@ -1141,8 +1141,8 @@ class EpubContainer(Container): zf = ZipFile(stream) zf.extractall(tdir) except: - log.exception('EPUB appears to be invalid ZIP file, trying a' - ' more forgiving ZIP parser') + log.exception('EPUB appears to be invalid ZIP file, ' + 'trying a more forgiving ZIP parser') from ebook_converter.utils.localunzip import extractall stream.seek(0) extractall(stream, path=tdir) @@ -1481,7 +1481,7 @@ class AZW3Container(Container): 'ebook_converter.ebooks.oeb.polish.container', 'do_explode', args=(pathtoazw3, tdir), no_output=True)['result'] except WorkerError as e: - log(e.orig_tb) + log.error(e.orig_tb) raise InvalidMobi('Failed to explode MOBI') super(AZW3Container, self).__init__(tdir, opf_path, log) self.obfuscated_fonts = {x.replace(os.sep, '/') for x in obfuscated_fonts} diff --git a/ebook_converter/ebooks/oeb/reader.py b/ebook_converter/ebooks/oeb/reader.py index 5cbf259..9f19704 100644 --- a/ebook_converter/ebooks/oeb/reader.py +++ b/ebook_converter/ebooks/oeb/reader.py @@ -111,14 +111,14 @@ class OEBReader(object): encoding=None) try: opf = etree.fromstring(data) - self.logger.warn('OPF contains invalid HTML named entities') + self.logger.warning('OPF contains invalid HTML named entities') except etree.XMLSyntaxError: data = re.sub(r'(?is).+', '', data) data = data.replace('', '') opf = etree.fromstring(data) - self.logger.warn('OPF contains invalid tours section') + self.logger.warning('OPF contains invalid tours section') ns = parse_utils.namespace(opf.tag) if ns not in ('', const.OPF1_NS, const.OPF2_NS): @@ -172,7 +172,7 @@ class OEBReader(object): except KeyboardInterrupt: raise except Exception: - self.logger.exception('Failed to parse content in %s' % + self.logger.exception('Failed to parse content in %s', item.href) bad.append(item) self.oeb.manifest.remove(item) @@ -195,7 +195,7 @@ class OEBReader(object): data = item.data except Exception: self.oeb.log.exception('Failed to read from manifest ' - 'entry with id: %s, ignoring' % + 'entry with id: %s, ignoring', item.id) invalid.add(item) continue @@ -216,7 +216,7 @@ class OEBReader(object): scheme = urllib.parse.urlparse(href).scheme except Exception: self.oeb.log.exception('Skipping invalid href: ' - '%r' % href) + '%s', href) continue if not scheme and href not in known: new.add(href) @@ -244,12 +244,13 @@ class OEBReader(object): continue if not self.oeb.container.exists(href): if href not in warned: - self.logger.warn('Referenced file %r not found' % href) + self.logger.warning('Referenced file %s not found', + href) warned.add(href) continue if href not in warned: - self.logger.warn('Referenced file %r not in manifest' % - href) + self.logger.warning('Referenced file %s not in manifest', + href) warned.add(href) id, _ = manifest.generate(id='added') guessed = mimetypes.guess_type(href)[0] @@ -275,13 +276,13 @@ class OEBReader(object): media_type = media_type.lower() fallback = elem.get('fallback') if href in manifest.hrefs: - self.logger.warn('Duplicate manifest entry for %r' % href) + self.logger.warning('Duplicate manifest entry for %s', href) continue if not self.oeb.container.exists(href): - self.logger.warn('Manifest item %r not found' % href) + self.logger.warning('Manifest item %s not found', href) continue if id in manifest.ids: - self.logger.warn('Duplicate manifest id %r' % id) + self.logger.warning('Duplicate manifest id %s', id) id, href = manifest.generate(id, href) manifest.add(id, href, media_type, fallback) invalid = self._manifest_prune_invalid() @@ -323,8 +324,8 @@ class OEBReader(object): if item.href in removed_items_to_ignore: continue if version >= 2: - self.logger.warn( - 'Spine-referenced file %r not in spine' % item.href) + self.logger.warning('Spine-referenced file %s not in spine', + item.href) spine.add(item, linear=False) def _spine_from_opf(self, opf): @@ -333,7 +334,7 @@ class OEBReader(object): for elem in base.xpath(opf, '/o2:package/o2:spine/o2:itemref'): idref = elem.get('idref') if idref not in manifest.ids: - self.logger.warn('Spine item %r not found' % idref) + self.logger.warning('Spine item %s not found', idref) continue item = manifest.ids[idref] if (item.media_type.lower() in base.OEB_DOCS and @@ -346,8 +347,8 @@ class OEBReader(object): item.media_type = base.XHTML_MIME spine.add(item, elem.get('linear')) else: - self.oeb.log.warn('The item %s is not a XML document.' - ' Removing it from spine.' % item.href) + self.oeb.log.warning('The item %s is not a XML document.' + ' Removing it from spine.', item.href) if len(spine) == 0: raise base.OEBError("Spine is empty") self._spine_add_extra() @@ -369,7 +370,8 @@ class OEBReader(object): corrected_href = href break if corrected_href is None: - self.logger.warn('Guide reference %r not found' % ref_href) + self.logger.warning('Guide reference %s not found', + ref_href) continue ref_href = corrected_href typ = elem.get('type') @@ -411,7 +413,7 @@ class OEBReader(object): if path and path not in self.oeb.manifest.hrefs: path = base.urlnormalize(path) if href and path not in self.oeb.manifest.hrefs: - self.logger.warn('TOC reference %r not found' % href) + self.logger.warning('TOC reference %s not found', href) gc = base.xpath(child, 'ncx:navPoint') if not gc: # This node is useless @@ -488,7 +490,7 @@ class OEBReader(object): continue path, _ = urllib.parse.urldefrag(base.urlnormalize(href)) if path not in self.oeb.manifest.hrefs: - self.logger.warn('TOC reference %r not found' % href) + self.logger.warning('TOC reference %s not found', href) continue id = site.get('id') toc.add(title, href, id=id) @@ -528,7 +530,7 @@ class OEBReader(object): return True def _toc_from_spine(self, opf): - self.log.warn('Generating default TOC from spine...') + self.log.warning('Generating default TOC from spine...') toc = self.oeb.toc titles = [] headers = [] @@ -656,7 +658,7 @@ class OEBReader(object): if item is not None and item.media_type in base.OEB_IMAGES: return item else: - self.logger.warn('Invalid cover image @id %r' % id) + self.logger.warning('Invalid cover image @id %s', id) hcover = self.oeb.spine[0] if 'cover' in self.oeb.guide: href = self.oeb.guide['cover'].href @@ -705,8 +707,8 @@ class OEBReader(object): items = [x for x in self.oeb.manifest if x.href == href] for x in items: if x not in self.oeb.spine: - self.oeb.log.warn('Removing duplicate manifest item with ' - 'id:', x.id) + self.oeb.log.warning('Removing duplicate manifest item ' + 'with id: %s', x.id) self.oeb.manifest.remove_duplicate_item(x) def _all_from_opf(self, opf): diff --git a/ebook_converter/ebooks/oeb/stylizer.py b/ebook_converter/ebooks/oeb/stylizer.py index c3389df..ab06fea 100644 --- a/ebook_converter/ebooks/oeb/stylizer.py +++ b/ebook_converter/ebooks/oeb/stylizer.py @@ -241,11 +241,14 @@ class Stylizer(object): continue hrefs = self.oeb.manifest.hrefs if ihref not in hrefs: - self.logger.warn('Ignoring missing stylesheet in @import rule:', rule.href) + self.logger.warning('Ignoring missing ' + 'stylesheet in @import ' + 'rule: %s', rule.href) continue sitem = hrefs[ihref] if sitem.media_type not in base.OEB_STYLES: - self.logger.warn('CSS @import of non-CSS file %r' % rule.href) + self.logger.warning('CSS @import of non-CSS ' + 'file %s', rule.href) continue stylesheets.append(sitem.data) # Make links to resources absolute, since these rules will @@ -261,14 +264,12 @@ class Stylizer(object): path = item.abshref(href) sitem = oeb.manifest.hrefs.get(path, None) if sitem is None: - self.logger.warn( - 'Stylesheet %r referenced by file %r not in manifest' % - (path, item.href)) + self.logger.warning('Stylesheet %s referenced by file %s ' + 'not in manifest', path, item.href) continue if not hasattr(sitem.data, 'cssRules'): - self.logger.warn( - 'Stylesheet %r referenced by file %r is not CSS'%(path, - item.href)) + self.logger.warning('Stylesheet %s referenced by file %s ' + 'is not CSS', path, item.href) continue stylesheets.append(sitem.data) csses = {'extra_css':extra_css, 'user_css':user_css} @@ -280,9 +281,8 @@ class Stylizer(object): validate=False) stylesheets.append(stylesheet) except Exception: - self.logger.exception('Failed to parse %s, ignoring.'%w) - self.logger.debug('Bad css: ') - self.logger.debug(x) + self.logger.exception('Failed to parse %s, ignoring.', w) + self.logger.debug('Bad css: %s', x) # using oeb to store the rules, page rule and font face rules # and generating them again if opts, profile or stylesheets are different @@ -303,7 +303,8 @@ class Stylizer(object): try: matches = tuple(select(text)) except SelectorError as err: - self.logger.error('Ignoring CSS rule with invalid selector: %r (%s)' % (text, err)) + self.logger.error('Ignoring CSS rule with invalid selector: ' + '%s (%s)', text, err) continue if fl is not None: @@ -367,11 +368,11 @@ class Stylizer(object): def _fetch_css_file(self, path): hrefs = self.oeb.manifest.hrefs if path not in hrefs: - self.logger.warn('CSS import of missing file %r' % path) + self.logger.warning('CSS import of missing file %s', path) return (None, None) item = hrefs[path] if item.media_type not in base.OEB_STYLES: - self.logger.warn('CSS import of non-CSS file %r' % path) + self.logger.warning('CSS import of non-CSS file %r', path) return (None, None) data = item.data.cssText if not isinstance(data, bytes): diff --git a/ebook_converter/ebooks/oeb/transforms/data_url.py b/ebook_converter/ebooks/oeb/transforms/data_url.py index 166ae45..13bfd81 100644 --- a/ebook_converter/ebooks/oeb/transforms/data_url.py +++ b/ebook_converter/ebooks/oeb/transforms/data_url.py @@ -36,16 +36,16 @@ class DataURL(object): data = polyglot.as_bytes(data) fmt = what(None, data) if not fmt: - self.log.warn('Image encoded as data URL has unknown ' - 'format, ignoring') + self.log.warning('Image encoded as data URL has unknown ' + 'format, ignoring') continue img.set('src', item.relhref(self.convert_image_data_uri(data, fmt, oeb))) def convert_image_data_uri(self, data, fmt, oeb): - self.log('Found image encoded as data URI converting it to normal ' - 'image') + self.log.info('Found image encoded as data URI converting it to ' + 'normal image') item_id, item_href = oeb.manifest.generate('data-url-image', 'data-url-image.' + fmt) oeb.manifest.add(item_id, item_href, diff --git a/ebook_converter/ebooks/oeb/transforms/filenames.py b/ebook_converter/ebooks/oeb/transforms/filenames.py index 4b975ee..1e72735 100644 --- a/ebook_converter/ebooks/oeb/transforms/filenames.py +++ b/ebook_converter/ebooks/oeb/transforms/filenames.py @@ -117,8 +117,9 @@ class UniqueFilenames(object): # {{{ self.seen_filenames.add(fname) if self.rename_map: - self.log('Found non-unique filenames, renaming to support broken' - ' EPUB readers like FBReader, Aldiko and Stanza...') + self.log.info('Found non-unique filenames, renaming to support ' + 'broken EPUB readers like FBReader, Aldiko and ' + 'Stanza...') from pprint import pformat self.log.debug(pformat(self.rename_map)) @@ -173,8 +174,8 @@ class FlatFilenames(object): # {{{ oeb.spine.insert(isp, nitem, item.linear) if self.rename_map: - self.log('Found non-flat filenames, renaming to support broken' - ' EPUB readers like FBReader...') + self.log.info('Found non-flat filenames, renaming to support ' + 'broken EPUB readers like FBReader...') from pprint import pformat self.log.debug(pformat(self.rename_map)) self.log.debug(pformat(self.renamed_items_map)) diff --git a/ebook_converter/ebooks/oeb/transforms/flatcss.py b/ebook_converter/ebooks/oeb/transforms/flatcss.py index c336bc4..5492ae3 100644 --- a/ebook_converter/ebooks/oeb/transforms/flatcss.py +++ b/ebook_converter/ebooks/oeb/transforms/flatcss.py @@ -182,8 +182,8 @@ class CSSFlattener(object): else: from ebook_converter.ebooks.oeb.normalize_css import normalize_filter_css self.filter_css = frozenset(normalize_filter_css(self.filter_css)) - self.oeb.log.debug('Filtering CSS properties: %s'% - ', '.join(self.filter_css)) + self.oeb.log.debug('Filtering CSS properties: %s', + ', '.join(self.filter_css)) for item in oeb.manifest.values(): # Make all links to resources absolute, as these sheets will be @@ -231,13 +231,13 @@ class CSSFlattener(object): msg = ('No embeddable fonts found for family: %r'%family) if failure_critical: raise ValueError(msg) - self.oeb.log.warn(msg) + self.oeb.log.warning(msg) return body_font_family, efi if not faces: msg = ('No embeddable fonts found for family: %r'%family) if failure_critical: raise ValueError(msg) - self.oeb.log.warn(msg) + self.oeb.log.warning(msg) return body_font_family, efi for i, font in enumerate(faces): @@ -258,7 +258,7 @@ class CSSFlattener(object): if i == 0: generic_family = panose_to_css_generic_family(font['panose']) body_font_family = "'%s',%s"%(font['font-family'], generic_family) - self.oeb.log('Embedding font: %s'%font['font-family']) + self.oeb.log.info('Embedding font: %s', font['font-family']) for k in ('font-weight', 'font-style', 'font-stretch'): if font[k] != 'normal': cfont[k] = font[k] @@ -323,8 +323,7 @@ class CSSFlattener(object): sbase = max(list(sizes.items()), key=operator.itemgetter(1))[0] except: sbase = 12.0 - self.oeb.logger.info( - "Source base font size is %0.05fpt" % sbase) + self.oeb.logger.info("Source base font size is %0.05fpt", sbase) return sbase def clean_edges(self, cssdict, style, fsize): @@ -346,8 +345,7 @@ class CSSFlattener(object): try: value = round(value / slineh) * dlineh except: - self.oeb.logger.warning( - 'Invalid length:', value) + self.oeb.logger.warning('Invalid length: %s', value) value = 0.0 cssdict[property] = "%0.5fem" % (value / fsize) diff --git a/ebook_converter/ebooks/oeb/transforms/guide.py b/ebook_converter/ebooks/oeb/transforms/guide.py index ffe6feb..f02783a 100644 --- a/ebook_converter/ebooks/oeb/transforms/guide.py +++ b/ebook_converter/ebooks/oeb/transforms/guide.py @@ -1,8 +1,3 @@ -__license__ = 'GPL v3' -__copyright__ = '2009, Kovid Goyal ' -__docformat__ = 'restructuredtext en' - - class Clean(object): '''Clean up guide, leaving only known values ''' @@ -28,7 +23,8 @@ class Clean(object): if covers: ref = covers[0][0] if len(covers) > 1: - self.log('Choosing %s:%s as the cover'%(ref.type, ref.href)) + self.log.info('Choosing %s:%s as the cover', ref.type, + ref.href) ref.type = 'cover' self.oeb.guide.refs['cover'] = ref diff --git a/ebook_converter/ebooks/oeb/transforms/jacket.py b/ebook_converter/ebooks/oeb/transforms/jacket.py index 1afd078..4231809 100644 --- a/ebook_converter/ebooks/oeb/transforms/jacket.py +++ b/ebook_converter/ebooks/oeb/transforms/jacket.py @@ -34,19 +34,19 @@ class RemoveFirstImage: continue removed = self.remove_images(item) if removed > 0: - self.log('Removed first image') + self.log.info('Removed first image') body = XPath('//h:body')(item.data) if body: raw = xml2text(body[0]).strip() imgs = XPath('//h:img|//svg:svg')(item.data) if not raw and not imgs: - self.log('Removing %s as it has no content' % - item.href) + self.log.info('Removing %s as it has no content', + item.href) self.oeb.manifest.remove(item) deleted_item = item break else: - self.log.warn('Could not find first image to remove') + self.log.warning('Could not find first image to remove') if deleted_item is not None: for item in list(self.oeb.toc): href = urllib.parse.urldefrag(item.href)[0] diff --git a/ebook_converter/ebooks/oeb/transforms/metadata.py b/ebook_converter/ebooks/oeb/transforms/metadata.py index 2198276..a5e391d 100644 --- a/ebook_converter/ebooks/oeb/transforms/metadata.py +++ b/ebook_converter/ebooks/oeb/transforms/metadata.py @@ -101,7 +101,7 @@ class MergeMetadata(object): _oim = override_input_metadata self.oeb, self.log = oeb, oeb.log m = self.oeb.metadata - self.log('Merging user specified metadata...') + self.log.info('Merging user specified metadata...') meta_info_to_oeb_metadata(mi, m, oeb.log, override_input_metadata=_oim) cover_id = self.set_cover(mi, opts.prefer_metadata_cover) @@ -210,8 +210,8 @@ class MergeMetadata(object): text = '' text = re.sub(r'\s+', '', text) if not text and not XPath('//h:img|//svg:svg')(item.data): - self.log('Removing %s as it is a wrapper around the cover ' - 'image' % item.href) + self.log.info('Removing %s as it is a wrapper around the ' + 'cover image', item.href) self.oeb.spine.remove(item) self.oeb.manifest.remove(item) self.oeb.guide.remove_by_href(item.href) diff --git a/ebook_converter/ebooks/oeb/transforms/page_margin.py b/ebook_converter/ebooks/oeb/transforms/page_margin.py index bfd81c3..634cae0 100644 --- a/ebook_converter/ebooks/oeb/transforms/page_margin.py +++ b/ebook_converter/ebooks/oeb/transforms/page_margin.py @@ -5,11 +5,6 @@ from ebook_converter.ebooks.oeb import parse_utils from ebook_converter.ebooks.oeb.base import XPath -__license__ = 'GPL v3' -__copyright__ = '2011, Kovid Goyal ' -__docformat__ = 'restructuredtext en' - - class RemoveAdobeMargins(object): ''' Remove margins specified in Adobe's page templates. @@ -19,12 +14,13 @@ class RemoveAdobeMargins(object): self.oeb, self.opts, self.log = oeb, opts, log for item in self.oeb.manifest: - if item.media_type in { - 'application/vnd.adobe-page-template+xml', 'application/vnd.adobe.page-template+xml', - 'application/adobe-page-template+xml', 'application/adobe.page-template+xml', - } and hasattr(item.data, 'xpath'): - self.log('Removing page margins specified in the' - ' Adobe page template') + if (item.media_type in {'application/vnd.adobe-page-template+xml', + 'application/vnd.adobe.page-template+xml', + 'application/adobe-page-template+xml', + 'application/adobe.page-template+xml'} and + hasattr(item.data, 'xpath')): + self.log.info('Removing page margins specified in the ' + 'Adobe page template') for elem in item.data.xpath( '//*[@margin-bottom or @margin-top ' 'or @margin-left or @margin-right]'): @@ -59,7 +55,7 @@ class RemoveFakeMargins(object): if stylesheet is None: return - self.log('Removing fake margins...') + self.log.info('Removing fake margins...') stylesheet = stylesheet.data @@ -73,8 +69,8 @@ class RemoveFakeMargins(object): try: self.process_level(level) except NegativeTextIndent: - self.log.debug('Negative text indent detected at level ' - ' %s, ignoring this level'%level) + self.log.debug('Negative text indent detected at level %s, ' + 'ignoring this level', level) def get_margins(self, elem): cls = elem.get('class', None) @@ -102,19 +98,21 @@ class RemoveFakeMargins(object): self.stats[level+'_left'][lm] += 1 self.stats[level+'_right'][rm] += 1 - self.log.debug(level, ' left margin stats:', self.stats[level+'_left']) - self.log.debug(level, ' right margin stats:', self.stats[level+'_right']) + self.log.debug('%s left margin stats: %s', level, + self.stats[level+'_left']) + self.log.debug('%s right margin stats: %s', level, + self.stats[level+'_right']) remove_left = self.analyze_stats(self.stats[level+'_left']) remove_right = self.analyze_stats(self.stats[level+'_right']) if remove_left: mcl = self.stats[level+'_left'].most_common(1)[0][0] - self.log('Removing level %s left margin of:'%level, mcl) + self.log.info('Removing level %s left margin of: %s', level, mcl) if remove_right: mcr = self.stats[level+'_right'].most_common(1)[0][0] - self.log('Removing level %s right margin of:'%level, mcr) + self.log.info('Removing level %s right margin of: %s', level, mcr) if remove_left or remove_right: for elem in elems: @@ -151,7 +149,7 @@ class RemoveFakeMargins(object): remove = set() for k, v in self.levels.items(): num = len(v) - self.log.debug('Found %d items of level:'%num, k) + self.log.debug('Found %s items of level: %s', num, k) level = int(k.split('_')[-1]) tag = k.split('_')[0] if tag == 'p' and num < 25: @@ -169,7 +167,7 @@ class RemoveFakeMargins(object): for k in remove: self.levels.pop(k) - self.log.debug('Ignoring level', k) + self.log.debug('Ignoring level %s', k) def analyze_stats(self, stats): if not stats: diff --git a/ebook_converter/ebooks/oeb/transforms/rescale.py b/ebook_converter/ebooks/oeb/transforms/rescale.py index d1677be..fa81478 100644 --- a/ebook_converter/ebooks/oeb/transforms/rescale.py +++ b/ebook_converter/ebooks/oeb/transforms/rescale.py @@ -45,12 +45,14 @@ class RescaleImages(object): try: if self.check_colorspaces and img.mode == 'CMYK': - self.log.warn( - 'The image %s is in the CMYK colorspace, converting it ' - 'to RGB as Adobe Digital Editions cannot display CMYK' % item.href) + self.log.warning('The image %s is in the CMYK ' + 'colorspace, converting it to RGB as ' + 'Adobe Digital Editions cannot ' + 'display CMYK', item.href) img = img.convert('RGB') except Exception: - self.log.exception('Failed to convert image %s from CMYK to RGB' % item.href) + self.log.exception('Failed to convert image %s from CMYK ' + 'to RGB', item.href) scaled, new_width, new_height = uimg.fit_image(width, height, page_width, @@ -58,18 +60,20 @@ class RescaleImages(object): if scaled: new_width = max(1, new_width) new_height = max(1, new_height) - self.log('Rescaling image from %dx%d to %dx%d'%( - width, height, new_width, new_height), item.href) + self.log('Rescaling image from %sx%s to %sx%s %s', width, + height, new_width, new_height, item.href) try: img = img.resize((new_width, new_height)) except Exception: - self.log.exception('Failed to rescale image: %s' % item.href) + self.log.exception('Failed to rescale image: %s', + item.href) continue buf = BytesIO() try: img.save(buf, ext) except Exception: - self.log.exception('Failed to rescale image: %s' % item.href) + self.log.exception('Failed to rescale image: %s', + item.href) else: item.data = buf.getvalue() item.unload_data_from_memory() diff --git a/ebook_converter/ebooks/oeb/transforms/split.py b/ebook_converter/ebooks/oeb/transforms/split.py index 0aeeb0b..77e26ca 100644 --- a/ebook_converter/ebooks/oeb/transforms/split.py +++ b/ebook_converter/ebooks/oeb/transforms/split.py @@ -59,7 +59,8 @@ class Split(object): def __call__(self, oeb, opts): self.oeb = oeb self.log = oeb.log - self.log('Splitting markup on page breaks and flow limits, if any...') + self.log.info('Splitting markup on page breaks and flow limits, if ' + 'any...') self.opts = opts self.map = {} for item in list(self.oeb.manifest.items): @@ -127,8 +128,7 @@ class Split(object): page_breaks.add(elem) except SelectorError as err: self.log.warn('Ignoring page breaks specified with invalid ' - 'CSS selector: %r (%s)' % - (selector, err)) + 'CSS selector: %s (%s)', selector, err) for i, elem in enumerate(item.data.iter('*')): try: @@ -221,13 +221,13 @@ class FlowSplitter(object): if self.max_flow_size > 0: lt_found = False - self.log('\tLooking for large trees in %s...' % item.href) + self.log.info('\tLooking for large trees in %s...', item.href) trees = list(self.trees) self.tree_map = {} for i, tree in enumerate(trees): size = len(tostring(tree.getroot())) if size > self.max_flow_size: - self.log('\tFound large tree #%d' % i) + self.log.info('\tFound large tree #%s', i) lt_found = True self.split_trees = [] self.split_to_size(tree) @@ -240,7 +240,7 @@ class FlowSplitter(object): self.was_split = len(self.trees) > 1 if self.was_split: - self.log('\tSplit into %d parts' % len(self.trees)) + self.log('\tSplit into %s parts', len(self.trees)) self.commit() def split_on_page_breaks(self, orig_tree): @@ -259,7 +259,7 @@ class FlowSplitter(object): tree = self.trees[i] elem = pattern(tree) if elem: - self.log.debug('\t\tSplitting on page-break at id=%s' % + self.log.debug('\t\tSplitting on page-break at id=%s', elem[0].get('id')) before_tree, after_tree = self.do_split(tree, elem[0], before) @@ -322,10 +322,10 @@ class FlowSplitter(object): return True def split_text(self, text, root, size): - self.log.debug('\t\t\tSplitting text of length: %d' % len(text)) + self.log.debug('\t\t\tSplitting text of length: %d', len(text)) rest = text.replace('\r', '') parts = re.split('\n\n', rest) - self.log.debug('\t\t\t\tFound %d parts' % len(parts)) + self.log.debug('\t\t\t\tFound %d parts', len(parts)) if max(map(len, parts)) > size: raise SplitError('Cannot split as file contains a

 tag '
                              'with a very large paragraph', root)
@@ -364,7 +364,7 @@ class FlowSplitter(object):
         split_point, before = self.find_split_point(root)
         if split_point is None:
             raise SplitError(self.item.href, root)
-        self.log.debug('\t\t\tSplit point:', split_point.tag,
+        self.log.debug('\t\t\tSplit point: %s %s', split_point.tag,
                        tree.getpath(split_point))
 
         trees = self.do_split(tree, split_point, before)
@@ -380,10 +380,10 @@ class FlowSplitter(object):
                 continue
             elif size <= self.max_flow_size:
                 self.split_trees.append(t)
-                self.log.debug('\t\t\tCommitted sub-tree #%d (%d KB)' %
-                               (len(self.split_trees), size/1024.))
+                self.log.debug('\t\t\tCommitted sub-tree #%s (%s KB)',
+                               len(self.split_trees), size/1024.)
             else:
-                self.log.debug('\t\t\tSplit tree still too large: %d KB' %
+                self.log.debug('\t\t\tSplit tree still too large: %d KB',
                                size/1024)
                 self.split_to_size(t)
 
diff --git a/ebook_converter/ebooks/oeb/transforms/structure.py b/ebook_converter/ebooks/oeb/transforms/structure.py
index 3d06fce..519cb8e 100644
--- a/ebook_converter/ebooks/oeb/transforms/structure.py
+++ b/ebook_converter/ebooks/oeb/transforms/structure.py
@@ -51,7 +51,7 @@ class DetectStructure(object):
         self.log = oeb.log
         self.oeb = oeb
         self.opts = opts
-        self.log('Detecting structure...')
+        self.log.info('Detecting structure...')
 
         self.detect_chapters()
         if self.oeb.auto_generated_toc or opts.use_auto_toc:
@@ -67,15 +67,15 @@ class DetectStructure(object):
                 self.oeb.toc = orig_toc
             else:
                 self.oeb.auto_generated_toc = True
-                self.log('Auto generated TOC with %d entries.' %
-                         self.oeb.toc.count())
+                self.log.info('Auto generated TOC with %s entries.',
+                              self.oeb.toc.count())
 
         if opts.toc_filter is not None:
             regexp = re.compile(opts.toc_filter)
             for node in list(self.oeb.toc.iter()):
                 if not node.title or regexp.search(node.title) is not None:
-                    self.log('Filtering', node.title if node.title else
-                             'empty node', 'from TOC')
+                    self.log.info('Filtering %s from TOC', node.title if
+                                  node.title else 'empty node')
                     self.oeb.toc.remove(node)
 
         if opts.page_breaks_before is not None:
@@ -112,8 +112,8 @@ class DetectStructure(object):
         try:
             expr = XPath(expr)
         except Exception:
-            self.log.warn('Invalid start reading at XPath expression, '
-                          'ignoring: %s' % expr)
+            self.log.warning('Invalid start reading at XPath expression, '
+                             'ignoring: %s', expr)
             return
         for item in self.oeb.spine:
             if not hasattr(item.data, 'xpath'):
@@ -129,11 +129,11 @@ class DetectStructure(object):
                 if 'text' in self.oeb.guide:
                     self.oeb.guide.remove('text')
                 self.oeb.guide.add('text', 'Start', item.href+'#'+eid)
-                self.log('Setting start reading at position to %s in %s' %
-                         (self.opts.start_reading_at, item.href))
+                self.log.info('Setting start reading at position to %s in %s',
+                              self.opts.start_reading_at, item.href)
                 return
-        self.log.warn("Failed to find start reading at position: %s" %
-                      self.opts.start_reading_at)
+        self.log.warning("Failed to find start reading at position: %s",
+                         self.opts.start_reading_at)
 
     def get_toc_parts_for_xpath(self, expr):
         # if an attribute is selected by the xpath expr then truncate it
@@ -155,8 +155,8 @@ class DetectStructure(object):
                 len(ans)
                 return ans
             except Exception:
-                self.log.warn('Invalid chapter expression, ignoring: %s' %
-                              expr)
+                self.log.warning('Invalid chapter expression, ignoring: %s',
+                                 expr)
                 return []
 
         if self.opts.chapter:
@@ -175,7 +175,7 @@ class DetectStructure(object):
                 c[item] += 1
                 text = base.xml2text(elem).strip()
                 text = re.sub(r'\s+', ' ', text.strip())
-                self.log('\tDetected chapter:', text[:50])
+                self.log.info('\tDetected chapter: %s', text[:50])
                 if chapter_mark == 'none':
                     continue
                 if chapter_mark == 'rule':
@@ -221,7 +221,7 @@ class DetectStructure(object):
                 try:
                     purl = urllib.parse.urlparse(href)
                 except ValueError:
-                    self.log.warning('Ignoring malformed URL:', href)
+                    self.log.warning('Ignoring malformed URL: %s', href)
                     continue
                 if not purl[0] or purl[0] == 'file':
                     href, frag = purl.path, purl.fragment
@@ -240,13 +240,14 @@ class DetectStructure(object):
                                 play_order=self.oeb.toc.next_play_order())
                             num += 1
                         except ValueError:
-                            self.oeb.log.exception('Failed to process link: '
-                                                   '%r' % href)
+                            self.oeb.log.critical('Failed to process link: %s',
+                                                  href)
                             # Most likely an incorrectly URL encoded link
                             continue
                         if self.opts.max_toc_links > 0 and \
                                 num >= self.opts.max_toc_links:
-                            self.log('Maximum TOC links reached, stopping.')
+                            self.log.info('Maximum TOC links reached, '
+                                          'stopping.')
                             return
 
     def elem_to_link(self, item, elem, title_attribute, counter):
@@ -277,7 +278,7 @@ class DetectStructure(object):
                 len(ans)
                 return ans
             except Exception:
-                self.log.warn('Invalid ToC expression, ignoring: %s' % expr)
+                self.log.warning('Invalid ToC expression, ignoring: %s', expr)
                 return []
 
         for document in self.oeb.spine:
diff --git a/ebook_converter/ebooks/oeb/transforms/subset.py b/ebook_converter/ebooks/oeb/transforms/subset.py
index 63044ae..f3b9f7d 100644
--- a/ebook_converter/ebooks/oeb/transforms/subset.py
+++ b/ebook_converter/ebooks/oeb/transforms/subset.py
@@ -5,11 +5,6 @@ from ebook_converter.utils.fonts.sfnt.subset import subset, NoGlyphs, Unsupporte
 from ebook_converter.tinycss.fonts3 import parse_font_family
 
 
-__license__ = 'GPL v3'
-__copyright__ = '2012, Kovid Goyal '
-__docformat__ = 'restructuredtext en'
-
-
 def get_font_properties(rule, default=None):
     '''
     Given a CSS rule, extract normalized font properties from
@@ -19,7 +14,7 @@ def get_font_properties(rule, default=None):
     props = {}
     s = rule.style
     for q in ('font-family', 'src', 'font-weight', 'font-stretch',
-            'font-style'):
+              'font-style'):
         g = 'uri' if q == 'src' else 'value'
         try:
             val = s.getProperty(q).propertyValue[0]
@@ -149,18 +144,19 @@ class SubsetFonts(object):
 
         for font in fonts.values():
             if not font['chars']:
-                self.log('The font %s is unused. Removing it.'%font['src'])
+                self.log('The font %s is unused. Removing it.', font['src'])
                 remove(font)
                 continue
             try:
                 raw, old_stats, new_stats = subset(font['item'].data, font['chars'])
             except NoGlyphs:
-                self.log('The font %s has no used glyphs. Removing it.'%font['src'])
+                self.log('The font %s has no used glyphs. Removing it.',
+                         font['src'])
                 remove(font)
                 continue
             except UnsupportedFont as e:
-                self.log.warn('The font %s is unsupported for subsetting. %s'%(
-                    font['src'], e))
+                self.log.warning('The font %s is unsupported for subsetting. '
+                                 '%s', font['src'], e)
                 sz = len(font['item'].data)
                 totals[0] += sz
                 totals[1] += sz
@@ -168,16 +164,16 @@ class SubsetFonts(object):
                 font['item'].data = raw
                 nlen = sum(new_stats.values())
                 olen = sum(old_stats.values())
-                self.log('Decreased the font %s to %.1f%% of its original size'%
-                        (font['src'], nlen/olen *100))
+                self.log('Decreased the font %s to %.1f%% of its original '
+                         'size', font['src'], nlen/olen * 100)
                 totals[0] += nlen
                 totals[1] += olen
 
             font['item'].unload_data_from_memory()
 
         if totals[0]:
-            self.log('Reduced total font size to %.1f%% of original'%
-                    (totals[0]/totals[1] * 100))
+            self.log('Reduced total font size to %.1f%% of original',
+                     totals[0]/totals[1] * 100)
 
     def find_embedded_fonts(self):
         '''
diff --git a/ebook_converter/ebooks/pdb/ereader/reader132.py b/ebook_converter/ebooks/pdb/ereader/reader132.py
index 1e0b8af..bc5241b 100644
--- a/ebook_converter/ebooks/pdb/ereader/reader132.py
+++ b/ebook_converter/ebooks/pdb/ereader/reader132.py
@@ -112,7 +112,7 @@ class Reader132(FormatReader):
 
         pml = ''
         for i in range(1, self.header_record.num_text_pages + 1):
-            self.log.debug('Extracting text page %i' % i)
+            self.log.debug('Extracting text page %s', i)
             pml += self.get_text_page(i)
         hizer = PML_HTMLizer()
         html += hizer.parse_pml(pml, 'index.html')
@@ -123,7 +123,7 @@ class Reader132(FormatReader):
             footnoteids = re.findall(
                 '\\w+(?=\x00)', self.section_data(self.header_record.footnote_offset).decode('cp1252' if self.encoding is None else self.encoding))
             for fid, i in enumerate(range(self.header_record.footnote_offset + 1, self.header_record.footnote_offset + self.header_record.footnote_count)):
-                self.log.debug('Extracting footnote page %i' % i)
+                self.log.debug('Extracting footnote page %s', i)
                 if fid < len(footnoteids):
                     fid = footnoteids[fid]
                 else:
@@ -135,7 +135,7 @@ class Reader132(FormatReader):
             sidebarids = re.findall(
                 '\\w+(?=\x00)', self.section_data(self.header_record.sidebar_offset).decode('cp1252' if self.encoding is None else self.encoding))
             for sid, i in enumerate(range(self.header_record.sidebar_offset + 1, self.header_record.sidebar_offset + self.header_record.sidebar_count)):
-                self.log.debug('Extracting sidebar page %i' % i)
+                self.log.debug('Extracting sidebar page %s', i)
                 if sid < len(sidebarids):
                     sid = sidebarids[sid]
                 else:
@@ -157,7 +157,7 @@ class Reader132(FormatReader):
                 name, img = self.get_image(self.header_record.image_data_offset + i)
                 images.append(name)
                 with open(name, 'wb') as imgf:
-                    self.log.debug('Writing image %s to images/' % name)
+                    self.log.debug('Writing image %s to images/', name)
                     imgf.write(img)
 
         opf_path = self.create_opf(output_dir, images, toc)
diff --git a/ebook_converter/ebooks/pdb/ereader/reader202.py b/ebook_converter/ebooks/pdb/ereader/reader202.py
index e7237fd..304fcca 100644
--- a/ebook_converter/ebooks/pdb/ereader/reader202.py
+++ b/ebook_converter/ebooks/pdb/ereader/reader202.py
@@ -87,7 +87,7 @@ class Reader202(FormatReader):
 
         pml = ''
         for i in range(1, self.header_record.num_text_pages + 1):
-            self.log.debug('Extracting text page %i' % i)
+            self.log.debug('Extracting text page %s', i)
             pml += self.get_text_page(i)
 
         title = self.mi.title
@@ -111,7 +111,7 @@ class Reader202(FormatReader):
                 if name:
                     images.append(name)
                     with open(name, 'wb') as imgf:
-                        self.log.debug('Writing image %s to images/' % name)
+                        self.log.debug('Writing image %s to images/', name)
                         imgf.write(img)
 
         opf_path = self.create_opf(output_dir, images)
diff --git a/ebook_converter/ebooks/pdb/haodoo/reader.py b/ebook_converter/ebooks/pdb/haodoo/reader.py
index ec611f5..6844b65 100644
--- a/ebook_converter/ebooks/pdb/haodoo/reader.py
+++ b/ebook_converter/ebooks/pdb/haodoo/reader.py
@@ -116,9 +116,9 @@ class Reader(FormatReader):
     def extract_content(self, output_dir):
         txt = ''
 
-        self.log.info(u'Decompressing text...')
+        self.log.info('Decompressing text...')
         for i in range(1, self.header_record.num_records + 1):
-            self.log.debug(u'\tDecompressing text section %i' % i)
+            self.log.debug('\tDecompressing text section %s', i)
             title = self.header_record.chapter_titles[i-1]
             lines = []
             title_added = False
@@ -135,7 +135,7 @@ class Reader(FormatReader):
                 lines.insert(0, '

' + title + '

\n') txt += '\n'.join(lines) - self.log.info(u'Converting text to OEB...') + self.log.info('Converting text to OEB...') html = HTML_TEMPLATE % (self.header_record.title, txt) with open(os.path.join(output_dir, 'index.html'), 'wb') as index: index.write(html.encode('utf-8')) diff --git a/ebook_converter/ebooks/pdb/palmdoc/reader.py b/ebook_converter/ebooks/pdb/palmdoc/reader.py index c6f057b..bafdb0c 100644 --- a/ebook_converter/ebooks/pdb/palmdoc/reader.py +++ b/ebook_converter/ebooks/pdb/palmdoc/reader.py @@ -55,7 +55,7 @@ class Reader(FormatReader): self.log.info('Decompressing text...') for i in range(1, self.header_record.num_records + 1): - self.log.debug('\tDecompressing text section %i' % i) + self.log.debug('\tDecompressing text section %s', i) raw_txt += self.decompress_text(i) self.log.info('Converting text to OEB...') diff --git a/ebook_converter/ebooks/pdb/plucker/reader.py b/ebook_converter/ebooks/pdb/plucker/reader.py index 5f43dc2..3eff90b 100644 --- a/ebook_converter/ebooks/pdb/plucker/reader.py +++ b/ebook_converter/ebooks/pdb/plucker/reader.py @@ -360,7 +360,8 @@ class Reader(FormatReader): # plugin assemble the order based on hyperlinks. with directory.CurrentDir(output_dir): for uid, num in self.uid_text_secion_number.items(): - self.log.debug('Writing record with uid: %s as %s.html' % (uid, uid)) + self.log.debug('Writing record with uid: %s as %s.html', + uid, uid) with open('%s.html' % uid, 'wb') as htmlf: html = u'' section_header, section_data = self.sections[num] @@ -393,11 +394,14 @@ class Reader(FormatReader): try: save_cover_data_to(idata, '%s.jpg' % uid, compression_quality=70) images.add(uid) - self.log.debug('Wrote image with uid %s to images/%s.jpg' % (uid, uid)) + self.log.debug('Wrote image with uid %s to ' + 'images/%s.jpg', uid, uid) except Exception as e: - self.log.error('Failed to write image with uid %s: %s' % (uid, e)) + self.log.error('Failed to write image with uid %s: %s', + uid, e) else: - self.log.error('Failed to write image with uid %s: No data.' % uid) + self.log.error('Failed to write image with uid %s: ' + 'No data.', uid) # Composite images. # We're going to use the already compressed .jpg images here. for uid, num in self.uid_composite_image_section_number.items(): @@ -436,9 +440,11 @@ class Reader(FormatReader): y_off += largest_height with open('%s.jpg' % uid) as out: out.write(canvas.export(compression_quality=70)) - self.log.debug('Wrote composite image with uid %s to images/%s.jpg' % (uid, uid)) + self.log.debug('Wrote composite image with uid %s to ' + 'images/%s.jpg', uid, uid) except Exception as e: - self.log.error('Failed to write composite image with uid %s: %s' % (uid, e)) + self.log.error('Failed to write composite image with ' + 'uid %s: %s', uid, e) # Run the HTML through the html processing plugin. from ebook_converter.customize.ui import plugin_for_input_format diff --git a/ebook_converter/ebooks/pdb/ztxt/reader.py b/ebook_converter/ebooks/pdb/ztxt/reader.py index 058157b..6efd45a 100644 --- a/ebook_converter/ebooks/pdb/ztxt/reader.py +++ b/ebook_converter/ebooks/pdb/ztxt/reader.py @@ -54,7 +54,7 @@ class Reader(FormatReader): if (self.header_record.flags & 0x01) == 0: raise zTXTError('Only compression method 1 (random access) is supported') - self.log.debug('Foud ztxt version: %i.%i' % (vmajor, vminor)) + self.log.debug('Foud ztxt version: %s.%s', vmajor, vminor) # Initalize the decompressor self.uncompressor = zlib.decompressobj() @@ -73,7 +73,7 @@ class Reader(FormatReader): self.log.info('Decompressing text...') for i in range(1, self.header_record.num_records + 1): - self.log.debug('\tDecompressing text section %i' % i) + self.log.debug('\tDecompressing text section %s', i) raw_txt += self.decompress_text(i) self.log.info('Converting text to OEB...') diff --git a/ebook_converter/ebooks/txt/markdownml.py b/ebook_converter/ebooks/txt/markdownml.py index 0b862ee..5cbfd0a 100644 --- a/ebook_converter/ebooks/txt/markdownml.py +++ b/ebook_converter/ebooks/txt/markdownml.py @@ -43,7 +43,8 @@ class MarkdownMLizer(OEB2HTML): def mlize_spine(self, oeb_book): output = [''] for item in oeb_book.spine: - self.log.debug('Converting %s to Markdown formatted TXT...' % item.href) + self.log.debug('Converting %s to Markdown formatted TXT...', + item.href) self.rewrite_ids(item.data, item) rewrite_links(item.data, partial(self.rewrite_link, page=item)) stylizer = Stylizer(item.data, item.href, oeb_book, self.opts, self.opts.output_profile) diff --git a/ebook_converter/ebooks/txt/textileml.py b/ebook_converter/ebooks/txt/textileml.py index 7b6f626..8e8aa23 100644 --- a/ebook_converter/ebooks/txt/textileml.py +++ b/ebook_converter/ebooks/txt/textileml.py @@ -58,7 +58,7 @@ class TextileMLizer(OEB2HTML): def mlize_spine(self, oeb_book): output = [''] for item in oeb_book.spine: - self.log.debug('Converting %s to Textile formatted TXT...' % item.href) + self.log.debug('Converting %s to Textile formatted TXT...', item.href) self.rewrite_ids(item.data, item) rewrite_links(item.data, partial(self.rewrite_link, page=item)) stylizer = Stylizer(item.data, item.href, oeb_book, self.opts, self.opts.output_profile) diff --git a/ebook_converter/ebooks/txt/txtml.py b/ebook_converter/ebooks/txt/txtml.py index 9b17a11..a230f97 100644 --- a/ebook_converter/ebooks/txt/txtml.py +++ b/ebook_converter/ebooks/txt/txtml.py @@ -64,7 +64,7 @@ class TXTMLizer(object): output = [u''] output.append(self.get_toc()) for item in self.oeb_book.spine: - self.log.debug('Converting %s to TXT...' % item.href) + self.log.debug('Converting %s to TXT...', item.href) for x in item.data.iterdescendants(etree.Comment): if x.text and '--' in x.text: x.text = x.text.replace('--', '__')