1
0
mirror of https://github.com/gryf/ebook-converter.git synced 2026-04-27 01:01:28 +02:00

Another part of logging adaptation

This commit is contained in:
2021-06-24 19:57:47 +02:00
parent 6dfcaea2dd
commit 230fe90e69
9 changed files with 56 additions and 59 deletions
+21 -19
View File
@@ -230,7 +230,8 @@ class Dehyphenator(object):
if len(firsthalf) > 4 and self.prefixes.match(firsthalf) is None: if len(firsthalf) > 4 and self.prefixes.match(firsthalf) is None:
lookupword = self.removeprefix.sub('', lookupword) lookupword = self.removeprefix.sub('', lookupword)
if self.verbose > 2: if self.verbose > 2:
self.log("lookup word is: "+lookupword+", orig is: " + hyphenated) self.log.info("lookup word is: %s, orig is: %s", lookupword,
hyphenated)
try: try:
searchresult = self.html.find(lookupword.lower()) searchresult = self.html.find(lookupword.lower())
except Exception: except Exception:
@@ -238,41 +239,42 @@ class Dehyphenator(object):
if self.format == 'html_cleanup' or self.format == 'txt_cleanup': if self.format == 'html_cleanup' or self.format == 'txt_cleanup':
if self.html.find(lookupword) != -1 or searchresult != -1: if self.html.find(lookupword) != -1 or searchresult != -1:
if self.verbose > 2: if self.verbose > 2:
self.log(" Cleanup:returned dehyphenated word: " + self.log.info(" Cleanup:returned dehyphenated word: %s",
dehyphenated) dehyphenated)
return dehyphenated return dehyphenated
elif self.html.find(hyphenated) != -1: elif self.html.find(hyphenated) != -1:
if self.verbose > 2: if self.verbose > 2:
self.log(" Cleanup:returned hyphenated word: " + self.log.info(" Cleanup:returned hyphenated word: "
hyphenated) "%s", hyphenated)
return hyphenated return hyphenated
else: else:
if self.verbose > 2: if self.verbose > 2:
self.log(" Cleanup:returning original text " + self.log.info(" Cleanup:returning original "
firsthalf + " + linefeed " + secondhalf) "text %s + linefeed %s", firsthalf,
secondhalf)
return firsthalf+'\u2014'+wraptags+secondhalf return firsthalf+'\u2014'+wraptags+secondhalf
else: else:
if (self.format == 'individual_words' and if (self.format == 'individual_words' and
len(firsthalf) + len(secondhalf) <= 6): len(firsthalf) + len(secondhalf) <= 6):
if self.verbose > 2: if self.verbose > 2:
self.log("too short, returned hyphenated word: " + self.log.info("too short, returned hyphenated word: %s",
hyphenated) hyphenated)
return hyphenated return hyphenated
if len(firsthalf) <= 2 and len(secondhalf) <= 2: if len(firsthalf) <= 2 and len(secondhalf) <= 2:
if self.verbose > 2: if self.verbose > 2:
self.log("too short, returned hyphenated word: " + self.log.info("too short, returned hyphenated word: %s",
hyphenated) hyphenated)
return hyphenated return hyphenated
if self.html.find(lookupword) != -1 or searchresult != -1: if self.html.find(lookupword) != -1 or searchresult != -1:
if self.verbose > 2: if self.verbose > 2:
self.log(" returned dehyphenated word: " + self.log.info(" returned dehyphenated word: ",
dehyphenated) dehyphenated)
return dehyphenated return dehyphenated
else: else:
if self.verbose > 2: if self.verbose > 2:
self.log(" returned hyphenated word: " + self.log.info(" returned hyphenated word: ",
hyphenated) hyphenated)
return hyphenated return hyphenated
def __call__(self, html, format, length=1): def __call__(self, html, format, length=1):
@@ -537,8 +539,8 @@ class HTMLPreProcessor(object):
rules.insert(0, (search_re, replace_txt)) rules.insert(0, (search_re, replace_txt))
user_sr_rules[(search_re, replace_txt)] = search_pattern user_sr_rules[(search_re, replace_txt)] = search_pattern
except Exception as e: except Exception as e:
self.log.error('Failed to parse %r regexp because %s' % self.log.error('Failed to parse %r regexp because %s',
(search, e)) search, e)
# search / replace using the sr?_search / sr?_replace options # search / replace using the sr?_search / sr?_replace options
for i in range(1, 4): for i in range(1, 4):
@@ -619,8 +621,8 @@ class HTMLPreProcessor(object):
except Exception as e: except Exception as e:
if rule in user_sr_rules: if rule in user_sr_rules:
self.log.error('User supplied search & replace rule: %s ' self.log.error('User supplied search & replace rule: %s '
'-> %s failed with error: %s, ignoring.' % '-> %s failed with error: %s, ignoring.',
(user_sr_rules[rule], rule[1], e)) user_sr_rules[rule], rule[1], e)
else: else:
raise raise
+4 -2
View File
@@ -152,7 +152,8 @@ class Fields(object):
if func is not None: if func is not None:
func(field, field_parsers[field.name], log) func(field, field_parsers[field.name], log)
elif field.name not in unknown_fields: elif field.name not in unknown_fields:
log.warn('Encountered unknown field: %s, ignoring it.' % field.name) log.warning('Encountered unknown field: %s, ignoring it.',
field.name)
unknown_fields.add(field.name) unknown_fields.add(field.name)
def get_runs(self, field): def get_runs(self, field):
@@ -187,7 +188,8 @@ class Fields(object):
for runs in self.get_runs(field): for runs in self.get_runs(field):
self.hyperlink_fields.append(({'anchor':dest}, runs)) self.hyperlink_fields.append(({'anchor':dest}, runs))
else: else:
log.warn('Unsupported reference field (%s), ignoring: %r' % (field.name, ref)) log.warning('Unsupported reference field (%s), ignoring: %r',
field.name, ref)
parse_noteref = parse_ref parse_noteref = parse_ref
+8 -8
View File
@@ -8,10 +8,6 @@ from ebook_converter.utils.img import resize_to_fit, image_to_data
from ebook_converter.utils.imghdr import what from ebook_converter.utils.imghdr import what
__license__ = 'GPL v3'
__copyright__ = '2013, Kovid Goyal <kovid at kovidgoyal.net>'
class LinkedImageNotFound(ValueError): class LinkedImageNotFound(ValueError):
def __init__(self, fname): def __init__(self, fname):
@@ -135,12 +131,14 @@ class Images(object):
ext = what(None, raw) or base.rpartition('.')[-1] or 'jpeg' ext = what(None, raw) or base.rpartition('.')[-1] or 'jpeg'
if ext == 'emf': if ext == 'emf':
# For an example, see: https://bugs.launchpad.net/bugs/1224849 # For an example, see: https://bugs.launchpad.net/bugs/1224849
self.log('Found an EMF image: %s, trying to extract embedded raster image' % fname) self.log.info('Found an EMF image: %s, trying to extract '
'embedded raster image', fname)
from ebook_converter.utils.wmf.emf import emf_unwrap from ebook_converter.utils.wmf.emf import emf_unwrap
try: try:
raw = emf_unwrap(raw) raw = emf_unwrap(raw)
except Exception: except Exception:
self.log.exception('Failed to extract embedded raster image from EMF') self.log.exception('Failed to extract embedded raster image '
'from EMF')
else: else:
ext = 'png' ext = 'png'
base = base.rpartition('.')[0] base = base.rpartition('.')[0]
@@ -216,7 +214,8 @@ class Images(object):
try: try:
src = self.generate_filename(rid, name) src = self.generate_filename(rid, name)
except LinkedImageNotFound as err: except LinkedImageNotFound as err:
self.log.warn('Linked image: %s not found, ignoring' % err.fname) self.log.warning('Linked image: %s not found, '
'ignoring', err.fname)
continue continue
img = IMG(src='images/%s' % src) img = IMG(src='images/%s' % src)
img.set('alt', alt or 'Image') img.set('alt', alt or 'Image')
@@ -277,7 +276,8 @@ class Images(object):
try: try:
src = self.generate_filename(rid) src = self.generate_filename(rid)
except LinkedImageNotFound as err: except LinkedImageNotFound as err:
self.log.warn('Linked image: %s not found, ignoring' % err.fname) self.log.warning('Linked image: %s not found, ignoring',
err.fname)
continue continue
img = IMG(src='images/%s' % src, style="display:block") img = IMG(src='images/%s' % src, style="display:block")
alt = get(imagedata, 'o:title') alt = get(imagedata, 'o:title')
+12 -12
View File
@@ -324,11 +324,11 @@ class Convert(object):
try: try:
seraw = self.docx.read(sename) seraw = self.docx.read(sename)
except KeyError: except KeyError:
self.log.warn('Settings %s do not exist' % sename) self.log.warning('Settings %s do not exist', sename)
except EnvironmentError as e: except EnvironmentError as e:
if e.errno != errno.ENOENT: if e.errno != errno.ENOENT:
raise raise
self.log.warn('Settings %s file missing' % sename) self.log.warning('Settings %s file missing', sename)
else: else:
self.settings(etree.fromstring(seraw)) self.settings(etree.fromstring(seraw))
@@ -336,14 +336,14 @@ class Convert(object):
try: try:
foraw = self.docx.read(foname) foraw = self.docx.read(foname)
except KeyError: except KeyError:
self.log.warn('Footnotes %s do not exist' % foname) self.log.warning('Footnotes %s do not exist', foname)
else: else:
forel = self.docx.get_relationships(foname) forel = self.docx.get_relationships(foname)
if enname is not None: if enname is not None:
try: try:
enraw = self.docx.read(enname) enraw = self.docx.read(enname)
except KeyError: except KeyError:
self.log.warn('Endnotes %s do not exist' % enname) self.log.warning('Endnotes %s do not exist', enname)
else: else:
enrel = self.docx.get_relationships(enname) enrel = self.docx.get_relationships(enname)
footnotes(etree.fromstring(foraw) if foraw else None, forel, footnotes(etree.fromstring(foraw) if foraw else None, forel,
@@ -354,7 +354,7 @@ class Convert(object):
try: try:
raw = self.docx.read(fname) raw = self.docx.read(fname)
except KeyError: except KeyError:
self.log.warn('Fonts table %s does not exist' % fname) self.log.warning('Fonts table %s does not exist', fname)
else: else:
fonts(etree.fromstring(raw), embed_relationships, self.docx, fonts(etree.fromstring(raw), embed_relationships, self.docx,
self.dest_dir) self.dest_dir)
@@ -363,7 +363,7 @@ class Convert(object):
try: try:
raw = self.docx.read(tname) raw = self.docx.read(tname)
except KeyError: except KeyError:
self.log.warn('Styles %s do not exist' % sname) self.log.warning('Styles %s do not exist', sname)
else: else:
self.theme(etree.fromstring(raw)) self.theme(etree.fromstring(raw))
@@ -372,7 +372,7 @@ class Convert(object):
try: try:
raw = self.docx.read(sname) raw = self.docx.read(sname)
except KeyError: except KeyError:
self.log.warn('Styles %s do not exist' % sname) self.log.warning('Styles %s do not exist', sname)
else: else:
self.styles(etree.fromstring(raw), fonts, self.theme) self.styles(etree.fromstring(raw), fonts, self.theme)
styles_loaded = True styles_loaded = True
@@ -383,7 +383,7 @@ class Convert(object):
try: try:
raw = self.docx.read(nname) raw = self.docx.read(nname)
except KeyError: except KeyError:
self.log.warn('Numbering styles %s do not exist' % nname) self.log.warning('Numbering styles %s do not exist', nname)
else: else:
numbering(etree.fromstring(raw), self.styles, numbering(etree.fromstring(raw), self.styles,
self.docx.get_relationships(nname)[0]) self.docx.get_relationships(nname)[0])
@@ -619,8 +619,8 @@ class Convert(object):
if anchor and anchor in self.anchor_map: if anchor and anchor in self.anchor_map:
span.set('href', '#' + self.anchor_map[anchor]) span.set('href', '#' + self.anchor_map[anchor])
continue continue
self.log.warn('Hyperlink with unknown target (rid=%s, anchor=%s), ' self.log.warning('Hyperlink with unknown target (rid=%s, '
'ignoring' % (rid, anchor)) 'anchor=%s), ignoring', rid, anchor)
# hrefs that point nowhere give epubcheck a hernia. The element # hrefs that point nowhere give epubcheck a hernia. The element
# should be styled explicitly by Word anyway. # should be styled explicitly by Word anyway.
# span.set('href', '#') # span.set('href', '#')
@@ -645,8 +645,8 @@ class Convert(object):
if anchor in self.anchor_map: if anchor in self.anchor_map:
span.set('href', '#' + self.anchor_map[anchor]) span.set('href', '#' + self.anchor_map[anchor])
continue continue
self.log.warn('Hyperlink field with unknown anchor: %s' % self.log.warning('Hyperlink field with unknown anchor: %s',
anchor) anchor)
else: else:
if url in self.anchor_map: if url in self.anchor_map:
span.set('href', '#' + self.anchor_map[url]) span.set('href', '#' + self.anchor_map[url])
+3 -6
View File
@@ -7,10 +7,6 @@ from ebook_converter.ebooks.metadata.toc import TOC
from ebook_converter.ebooks.oeb.polish.toc import elem_to_toc_text from ebook_converter.ebooks.oeb.polish.toc import elem_to_toc_text
__license__ = 'GPL v3'
__copyright__ = '2013, Kovid Goyal <kovid at kovidgoyal.net>'
def from_headings(body, log, namespace, num_levels=3): def from_headings(body, log, namespace, num_levels=3):
' Create a TOC from headings in the document ' ' Create a TOC from headings in the document '
tocroot = TOC() tocroot = TOC()
@@ -48,7 +44,7 @@ def from_headings(body, log, namespace, num_levels=3):
level_prev[i] = None level_prev[i] = None
if len(tuple(tocroot.flat())) > 1: if len(tuple(tocroot.flat())) > 1:
log('Generating Table of Contents from headings') log.info('Generating Table of Contents from headings')
return tocroot return tocroot
@@ -127,7 +123,8 @@ def from_toc(docx, link_map, styles, object_map, log, namespace):
ml = 0 ml = 0
toc.append(TI(txt, href[1:], ml)) toc.append(TI(txt, href[1:], ml))
if toc: if toc:
log('Found Word Table of Contents, using it to generate the Table of Contents') log.info('Found Word Table of Contents, using it to generate the '
'Table of Contents')
return structure_toc(toc) return structure_toc(toc)
@@ -430,7 +430,7 @@ class Convert(object):
self.current_link = self.current_lang = None self.current_link = self.current_lang = None
for item in self.oeb.spine: for item in self.oeb.spine:
self.log.debug('Processing', item.href) self.log.debug('Processing %s', item.href)
self.process_item(item) self.process_item(item)
if self.add_toc: if self.add_toc:
self.links_manager.process_toc_links(self.oeb) self.links_manager.process_toc_links(self.oeb)
+2 -1
View File
@@ -49,7 +49,8 @@ class ImagesManager(object):
try: try:
fmt, width, height = identify(item.data) fmt, width, height = identify(item.data)
except Exception: except Exception:
self.log.warning('Replacing corrupted image with blank: %s' % href) self.log.warning('Replacing corrupted image with blank: %s',
href)
item.data = I('blank.png', data=True, allow_user_override=False) item.data = I('blank.png', data=True, allow_user_override=False)
fmt, width, height = identify(item.data) fmt, width, height = identify(item.data)
image_fname = 'media/' + self.create_filename(href, fmt) image_fname = 'media/' + self.create_filename(href, fmt)
+2 -5
View File
@@ -6,10 +6,6 @@ import uuid
from ebook_converter.utils.filenames import ascii_text from ebook_converter.utils.filenames import ascii_text
__license__ = 'GPL v3'
__copyright__ = '2015, Kovid Goyal <kovid at kovidgoyal.net>'
def start_text(tag, prefix_len=0, top_level=True): def start_text(tag, prefix_len=0, top_level=True):
ans = tag.text or '' ans = tag.text or ''
limit = 50 - prefix_len limit = 50 - prefix_len
@@ -123,7 +119,8 @@ class LinksManager(object):
bmark = self.anchor_map[(href, self.top_anchor)] bmark = self.anchor_map[(href, self.top_anchor)]
return make_link(parent, anchor=bmark, tooltip=tooltip) return make_link(parent, anchor=bmark, tooltip=tooltip)
else: else:
self.log.warn('Ignoring internal hyperlink with href (%s) pointing to unknown destination' % url) self.log.warning('Ignoring internal hyperlink with href (%s) '
'pointing to unknown destination', url)
if purl.scheme in {'http', 'https', 'ftp'}: if purl.scheme in {'http', 'https', 'ftp'}:
if url not in self.external_links: if url not in self.external_links:
self.external_links[url] = self.document_relationships.add_relationship(url, self.namespace.names['LINKS'], target_mode='External') self.external_links[url] = self.document_relationships.add_relationship(url, self.namespace.names['LINKS'], target_mode='External')
+3 -5
View File
@@ -10,9 +10,6 @@ from ebook_converter.utils.localization import lang_as_iso639_1
from ebook_converter.tinycss.css21 import CSS21Parser from ebook_converter.tinycss.css21 import CSS21Parser
__license__ = 'GPL v3'
__copyright__ = '2015, Kovid Goyal <kovid at kovidgoyal.net>'
css_parser = CSS21Parser() css_parser = CSS21Parser()
border_edges = ('left', 'top', 'right', 'bottom') border_edges = ('left', 'top', 'right', 'bottom')
@@ -738,8 +735,9 @@ class StylesManager(object):
text_style.seq = i text_style.seq = i
self.descendant_text_styles = sorted(descendant_style_map, key=attrgetter('seq')) self.descendant_text_styles = sorted(descendant_style_map, key=attrgetter('seq'))
self.log.debug('%d Text Styles %d Combined styles' % tuple(map(len, ( self.log.debug('%d Text Styles %d Combined styles',
self.descendant_text_styles, self.combined_styles)))) len(self.descendant_text_styles),
len(self.combined_styles))
self.primary_heading_style = None self.primary_heading_style = None
if heading_styles: if heading_styles: