mirror of
https://github.com/gryf/ebook-converter.git
synced 2026-04-19 20:53:35 +02:00
Added first portion of logging adaptation.
Things may be broken at this point - there are still several modules to be adapted.
This commit is contained in:
@@ -904,7 +904,7 @@ class Manifest(object):
|
||||
def _parse_xhtml(self, data):
|
||||
orig_data = data
|
||||
fname = urllib.parse.unquote(self.href)
|
||||
self.oeb.log.debug('Parsing', fname, '...')
|
||||
self.oeb.log.debug('Parsing %s ...', fname)
|
||||
self.oeb.html_preprocessor.current_href = self.href
|
||||
try:
|
||||
data = parse_utils.parse_html(data, log=self.oeb.log,
|
||||
@@ -924,7 +924,7 @@ class Manifest(object):
|
||||
if has_html in data:
|
||||
return self._parse_xhtml(data)
|
||||
|
||||
self.oeb.log.debug('Converting', self.href, '...')
|
||||
self.oeb.log.debug('Converting %s ...', self.href)
|
||||
|
||||
from ebook_converter.ebooks.txt.processor import convert_markdown
|
||||
|
||||
@@ -941,7 +941,7 @@ class Manifest(object):
|
||||
from css_parser.css import CSSRule
|
||||
log.setLevel(logging.WARN)
|
||||
log.raiseExceptions = False
|
||||
self.oeb.log.debug('Parsing', self.href, '...')
|
||||
self.oeb.log.debug('Parsing %s ...', self.href)
|
||||
data = self.oeb.decode(data)
|
||||
data = self.oeb.css_preprocessor(data, add_namespace=False)
|
||||
parser = CSSParser(loglevel=logging.WARNING,
|
||||
@@ -957,11 +957,11 @@ class Manifest(object):
|
||||
def _fetch_css(self, path):
|
||||
hrefs = self.oeb.manifest.hrefs
|
||||
if path not in hrefs:
|
||||
self.oeb.logger.warn('CSS import of missing file %r' % path)
|
||||
self.oeb.logger.warning('CSS import of missing file %s', path)
|
||||
return (None, None)
|
||||
item = hrefs[path]
|
||||
if item.media_type not in OEB_STYLES:
|
||||
self.oeb.logger.warn('CSS import of non-CSS file %r' % path)
|
||||
self.oeb.logger.warning('CSS import of non-CSS file %s', path)
|
||||
return (None, None)
|
||||
data = item.data.cssText
|
||||
enc = None if isinstance(data, str) else 'utf-8'
|
||||
@@ -1002,8 +1002,8 @@ class Manifest(object):
|
||||
elif mt in OEB_STYLES:
|
||||
data = self._parse_css(data)
|
||||
elif mt == 'text/plain':
|
||||
self.oeb.log.warn('%s contains data in TXT format' % self.href,
|
||||
'converting to HTML')
|
||||
self.oeb.log.warning('%s contains data in TXT format. '
|
||||
'Converting to HTML', self.href)
|
||||
data = self._parse_txt(data)
|
||||
self.media_type = XHTML_MIME
|
||||
self._data = data
|
||||
|
||||
@@ -65,7 +65,7 @@ def merge_multiple_html_heads_and_bodies(root, log=None):
|
||||
body.append(x)
|
||||
tuple(map(root.append, (head, body)))
|
||||
if log is not None:
|
||||
log.warn('Merging multiple <head> and <body> sections')
|
||||
log.warning('Merging multiple <head> and <body> sections')
|
||||
return root
|
||||
|
||||
|
||||
@@ -122,7 +122,7 @@ def clean_word_doc(data, log):
|
||||
for match in re.finditer(r'xmlns:(\S+?)=".*?microsoft.*?"', data):
|
||||
prefixes.append(match.group(1))
|
||||
if prefixes:
|
||||
log.warn('Found microsoft markup, cleaning...')
|
||||
log.warning('Found microsoft markup, cleaning...')
|
||||
# Remove empty tags as they are not rendered by browsers
|
||||
# but can become renderable HTML tags like <p/> if the
|
||||
# document is parsed by an HTML parser
|
||||
@@ -214,13 +214,13 @@ def parse_html(data, log=None, decoder=None, preprocessor=None,
|
||||
data = etree.fromstring(data)
|
||||
check_for_html5(pre, data)
|
||||
except (HTML5Doc, etree.XMLSyntaxError):
|
||||
log.debug('Parsing %s as HTML' % filename)
|
||||
log.debug('Parsing %s as HTML', filename)
|
||||
data = raw
|
||||
try:
|
||||
data = html5_parse(data)
|
||||
except Exception:
|
||||
log.exception(
|
||||
'HTML 5 parsing failed, falling back to older parsers')
|
||||
log.exception('HTML 5 parsing failed, falling back to older '
|
||||
'parsers')
|
||||
data = _html4_parse(data)
|
||||
|
||||
if has_html4_doctype or data.tag == 'HTML' or (len(data) and (data[-1].get('LANG') or data[-1].get('DIR'))):
|
||||
@@ -239,7 +239,7 @@ def parse_html(data, log=None, decoder=None, preprocessor=None,
|
||||
if barename(data.tag) != 'html':
|
||||
if barename(data.tag) in non_html_file_tags:
|
||||
raise NotHTML(data.tag)
|
||||
log.warn('File %r does not appear to be (X)HTML'%filename)
|
||||
log.warning('File %s does not appear to be (X)HTML', filename)
|
||||
nroot = etree.fromstring('<html></html>')
|
||||
has_body = False
|
||||
for child in list(data):
|
||||
@@ -248,7 +248,7 @@ def parse_html(data, log=None, decoder=None, preprocessor=None,
|
||||
break
|
||||
parent = nroot
|
||||
if not has_body:
|
||||
log.warn('File %r appears to be a HTML fragment'%filename)
|
||||
log.warning('File %s appears to be a HTML fragment', filename)
|
||||
nroot = etree.fromstring('<html><body/></html>')
|
||||
parent = nroot[0]
|
||||
for child in list(data.iter()):
|
||||
@@ -260,7 +260,7 @@ def parse_html(data, log=None, decoder=None, preprocessor=None,
|
||||
|
||||
# Force into the XHTML namespace
|
||||
if not namespace(data.tag):
|
||||
log.warn('Forcing', filename, 'into XHTML namespace')
|
||||
log.warning('Forcing %s into XHTML namespace', filename)
|
||||
data.attrib['xmlns'] = const.XHTML_NS
|
||||
data = etree.tostring(data, encoding='unicode')
|
||||
|
||||
@@ -272,10 +272,8 @@ def parse_html(data, log=None, decoder=None, preprocessor=None,
|
||||
try:
|
||||
data = etree.fromstring(data)
|
||||
except etree.XMLSyntaxError:
|
||||
log.warn('Stripping comments from %s'%
|
||||
filename)
|
||||
data = re.compile(r'<!--.*?-->', re.DOTALL).sub('',
|
||||
data)
|
||||
log.warning('Stripping comments from %s', filename)
|
||||
data = re.compile(r'<!--.*?-->', re.DOTALL).sub('', data)
|
||||
data = data.replace(
|
||||
"<?xml version='1.0' encoding='utf-8'?><o:p></o:p>",
|
||||
'')
|
||||
@@ -283,7 +281,7 @@ def parse_html(data, log=None, decoder=None, preprocessor=None,
|
||||
try:
|
||||
data = etree.fromstring(data)
|
||||
except etree.XMLSyntaxError:
|
||||
log.warn('Stripping meta tags from %s'% filename)
|
||||
log.warning('Stripping meta tags from %s', filename)
|
||||
data = re.sub(r'<meta\s+[^>]+?>', '', data)
|
||||
data = etree.fromstring(data)
|
||||
elif namespace(data.tag) != const.XHTML_NS:
|
||||
@@ -308,7 +306,7 @@ def parse_html(data, log=None, decoder=None, preprocessor=None,
|
||||
head = xpath(data, '/h:html/h:head')
|
||||
head = head[0] if head else None
|
||||
if head is None:
|
||||
log.warn('File %s missing <head/> element' % filename)
|
||||
log.warning('File %s missing <head/> element', filename)
|
||||
head = etree.Element(XHTML('head'))
|
||||
data.insert(0, head)
|
||||
title = etree.SubElement(head, XHTML('title'))
|
||||
@@ -335,7 +333,7 @@ def parse_html(data, log=None, decoder=None, preprocessor=None,
|
||||
body.getparent().remove(body)
|
||||
data.append(body)
|
||||
else:
|
||||
log.warn('File %s missing <body/> element' % filename)
|
||||
log.warning('File %s missing <body/> element', filename)
|
||||
etree.SubElement(data, XHTML('body'))
|
||||
|
||||
# Remove microsoft office markup
|
||||
|
||||
@@ -1141,8 +1141,8 @@ class EpubContainer(Container):
|
||||
zf = ZipFile(stream)
|
||||
zf.extractall(tdir)
|
||||
except:
|
||||
log.exception('EPUB appears to be invalid ZIP file, trying a'
|
||||
' more forgiving ZIP parser')
|
||||
log.exception('EPUB appears to be invalid ZIP file, '
|
||||
'trying a more forgiving ZIP parser')
|
||||
from ebook_converter.utils.localunzip import extractall
|
||||
stream.seek(0)
|
||||
extractall(stream, path=tdir)
|
||||
@@ -1481,7 +1481,7 @@ class AZW3Container(Container):
|
||||
'ebook_converter.ebooks.oeb.polish.container', 'do_explode',
|
||||
args=(pathtoazw3, tdir), no_output=True)['result']
|
||||
except WorkerError as e:
|
||||
log(e.orig_tb)
|
||||
log.error(e.orig_tb)
|
||||
raise InvalidMobi('Failed to explode MOBI')
|
||||
super(AZW3Container, self).__init__(tdir, opf_path, log)
|
||||
self.obfuscated_fonts = {x.replace(os.sep, '/') for x in obfuscated_fonts}
|
||||
|
||||
@@ -111,14 +111,14 @@ class OEBReader(object):
|
||||
encoding=None)
|
||||
try:
|
||||
opf = etree.fromstring(data)
|
||||
self.logger.warn('OPF contains invalid HTML named entities')
|
||||
self.logger.warning('OPF contains invalid HTML named entities')
|
||||
except etree.XMLSyntaxError:
|
||||
data = re.sub(r'(?is)<tours>.+</tours>', '', data)
|
||||
data = data.replace('<dc-metadata>',
|
||||
'<dc-metadata xmlns:dc="'
|
||||
'http://purl.org/metadata/dublin_core">')
|
||||
opf = etree.fromstring(data)
|
||||
self.logger.warn('OPF contains invalid tours section')
|
||||
self.logger.warning('OPF contains invalid tours section')
|
||||
|
||||
ns = parse_utils.namespace(opf.tag)
|
||||
if ns not in ('', const.OPF1_NS, const.OPF2_NS):
|
||||
@@ -172,7 +172,7 @@ class OEBReader(object):
|
||||
except KeyboardInterrupt:
|
||||
raise
|
||||
except Exception:
|
||||
self.logger.exception('Failed to parse content in %s' %
|
||||
self.logger.exception('Failed to parse content in %s',
|
||||
item.href)
|
||||
bad.append(item)
|
||||
self.oeb.manifest.remove(item)
|
||||
@@ -195,7 +195,7 @@ class OEBReader(object):
|
||||
data = item.data
|
||||
except Exception:
|
||||
self.oeb.log.exception('Failed to read from manifest '
|
||||
'entry with id: %s, ignoring' %
|
||||
'entry with id: %s, ignoring',
|
||||
item.id)
|
||||
invalid.add(item)
|
||||
continue
|
||||
@@ -216,7 +216,7 @@ class OEBReader(object):
|
||||
scheme = urllib.parse.urlparse(href).scheme
|
||||
except Exception:
|
||||
self.oeb.log.exception('Skipping invalid href: '
|
||||
'%r' % href)
|
||||
'%s', href)
|
||||
continue
|
||||
if not scheme and href not in known:
|
||||
new.add(href)
|
||||
@@ -244,12 +244,13 @@ class OEBReader(object):
|
||||
continue
|
||||
if not self.oeb.container.exists(href):
|
||||
if href not in warned:
|
||||
self.logger.warn('Referenced file %r not found' % href)
|
||||
self.logger.warning('Referenced file %s not found',
|
||||
href)
|
||||
warned.add(href)
|
||||
continue
|
||||
if href not in warned:
|
||||
self.logger.warn('Referenced file %r not in manifest' %
|
||||
href)
|
||||
self.logger.warning('Referenced file %s not in manifest',
|
||||
href)
|
||||
warned.add(href)
|
||||
id, _ = manifest.generate(id='added')
|
||||
guessed = mimetypes.guess_type(href)[0]
|
||||
@@ -275,13 +276,13 @@ class OEBReader(object):
|
||||
media_type = media_type.lower()
|
||||
fallback = elem.get('fallback')
|
||||
if href in manifest.hrefs:
|
||||
self.logger.warn('Duplicate manifest entry for %r' % href)
|
||||
self.logger.warning('Duplicate manifest entry for %s', href)
|
||||
continue
|
||||
if not self.oeb.container.exists(href):
|
||||
self.logger.warn('Manifest item %r not found' % href)
|
||||
self.logger.warning('Manifest item %s not found', href)
|
||||
continue
|
||||
if id in manifest.ids:
|
||||
self.logger.warn('Duplicate manifest id %r' % id)
|
||||
self.logger.warning('Duplicate manifest id %s', id)
|
||||
id, href = manifest.generate(id, href)
|
||||
manifest.add(id, href, media_type, fallback)
|
||||
invalid = self._manifest_prune_invalid()
|
||||
@@ -323,8 +324,8 @@ class OEBReader(object):
|
||||
if item.href in removed_items_to_ignore:
|
||||
continue
|
||||
if version >= 2:
|
||||
self.logger.warn(
|
||||
'Spine-referenced file %r not in spine' % item.href)
|
||||
self.logger.warning('Spine-referenced file %s not in spine',
|
||||
item.href)
|
||||
spine.add(item, linear=False)
|
||||
|
||||
def _spine_from_opf(self, opf):
|
||||
@@ -333,7 +334,7 @@ class OEBReader(object):
|
||||
for elem in base.xpath(opf, '/o2:package/o2:spine/o2:itemref'):
|
||||
idref = elem.get('idref')
|
||||
if idref not in manifest.ids:
|
||||
self.logger.warn('Spine item %r not found' % idref)
|
||||
self.logger.warning('Spine item %s not found', idref)
|
||||
continue
|
||||
item = manifest.ids[idref]
|
||||
if (item.media_type.lower() in base.OEB_DOCS and
|
||||
@@ -346,8 +347,8 @@ class OEBReader(object):
|
||||
item.media_type = base.XHTML_MIME
|
||||
spine.add(item, elem.get('linear'))
|
||||
else:
|
||||
self.oeb.log.warn('The item %s is not a XML document.'
|
||||
' Removing it from spine.' % item.href)
|
||||
self.oeb.log.warning('The item %s is not a XML document.'
|
||||
' Removing it from spine.', item.href)
|
||||
if len(spine) == 0:
|
||||
raise base.OEBError("Spine is empty")
|
||||
self._spine_add_extra()
|
||||
@@ -369,7 +370,8 @@ class OEBReader(object):
|
||||
corrected_href = href
|
||||
break
|
||||
if corrected_href is None:
|
||||
self.logger.warn('Guide reference %r not found' % ref_href)
|
||||
self.logger.warning('Guide reference %s not found',
|
||||
ref_href)
|
||||
continue
|
||||
ref_href = corrected_href
|
||||
typ = elem.get('type')
|
||||
@@ -411,7 +413,7 @@ class OEBReader(object):
|
||||
if path and path not in self.oeb.manifest.hrefs:
|
||||
path = base.urlnormalize(path)
|
||||
if href and path not in self.oeb.manifest.hrefs:
|
||||
self.logger.warn('TOC reference %r not found' % href)
|
||||
self.logger.warning('TOC reference %s not found', href)
|
||||
gc = base.xpath(child, 'ncx:navPoint')
|
||||
if not gc:
|
||||
# This node is useless
|
||||
@@ -488,7 +490,7 @@ class OEBReader(object):
|
||||
continue
|
||||
path, _ = urllib.parse.urldefrag(base.urlnormalize(href))
|
||||
if path not in self.oeb.manifest.hrefs:
|
||||
self.logger.warn('TOC reference %r not found' % href)
|
||||
self.logger.warning('TOC reference %s not found', href)
|
||||
continue
|
||||
id = site.get('id')
|
||||
toc.add(title, href, id=id)
|
||||
@@ -528,7 +530,7 @@ class OEBReader(object):
|
||||
return True
|
||||
|
||||
def _toc_from_spine(self, opf):
|
||||
self.log.warn('Generating default TOC from spine...')
|
||||
self.log.warning('Generating default TOC from spine...')
|
||||
toc = self.oeb.toc
|
||||
titles = []
|
||||
headers = []
|
||||
@@ -656,7 +658,7 @@ class OEBReader(object):
|
||||
if item is not None and item.media_type in base.OEB_IMAGES:
|
||||
return item
|
||||
else:
|
||||
self.logger.warn('Invalid cover image @id %r' % id)
|
||||
self.logger.warning('Invalid cover image @id %s', id)
|
||||
hcover = self.oeb.spine[0]
|
||||
if 'cover' in self.oeb.guide:
|
||||
href = self.oeb.guide['cover'].href
|
||||
@@ -705,8 +707,8 @@ class OEBReader(object):
|
||||
items = [x for x in self.oeb.manifest if x.href == href]
|
||||
for x in items:
|
||||
if x not in self.oeb.spine:
|
||||
self.oeb.log.warn('Removing duplicate manifest item with '
|
||||
'id:', x.id)
|
||||
self.oeb.log.warning('Removing duplicate manifest item '
|
||||
'with id: %s', x.id)
|
||||
self.oeb.manifest.remove_duplicate_item(x)
|
||||
|
||||
def _all_from_opf(self, opf):
|
||||
|
||||
@@ -241,11 +241,14 @@ class Stylizer(object):
|
||||
continue
|
||||
hrefs = self.oeb.manifest.hrefs
|
||||
if ihref not in hrefs:
|
||||
self.logger.warn('Ignoring missing stylesheet in @import rule:', rule.href)
|
||||
self.logger.warning('Ignoring missing '
|
||||
'stylesheet in @import '
|
||||
'rule: %s', rule.href)
|
||||
continue
|
||||
sitem = hrefs[ihref]
|
||||
if sitem.media_type not in base.OEB_STYLES:
|
||||
self.logger.warn('CSS @import of non-CSS file %r' % rule.href)
|
||||
self.logger.warning('CSS @import of non-CSS '
|
||||
'file %s', rule.href)
|
||||
continue
|
||||
stylesheets.append(sitem.data)
|
||||
# Make links to resources absolute, since these rules will
|
||||
@@ -261,14 +264,12 @@ class Stylizer(object):
|
||||
path = item.abshref(href)
|
||||
sitem = oeb.manifest.hrefs.get(path, None)
|
||||
if sitem is None:
|
||||
self.logger.warn(
|
||||
'Stylesheet %r referenced by file %r not in manifest' %
|
||||
(path, item.href))
|
||||
self.logger.warning('Stylesheet %s referenced by file %s '
|
||||
'not in manifest', path, item.href)
|
||||
continue
|
||||
if not hasattr(sitem.data, 'cssRules'):
|
||||
self.logger.warn(
|
||||
'Stylesheet %r referenced by file %r is not CSS'%(path,
|
||||
item.href))
|
||||
self.logger.warning('Stylesheet %s referenced by file %s '
|
||||
'is not CSS', path, item.href)
|
||||
continue
|
||||
stylesheets.append(sitem.data)
|
||||
csses = {'extra_css':extra_css, 'user_css':user_css}
|
||||
@@ -280,9 +281,8 @@ class Stylizer(object):
|
||||
validate=False)
|
||||
stylesheets.append(stylesheet)
|
||||
except Exception:
|
||||
self.logger.exception('Failed to parse %s, ignoring.'%w)
|
||||
self.logger.debug('Bad css: ')
|
||||
self.logger.debug(x)
|
||||
self.logger.exception('Failed to parse %s, ignoring.', w)
|
||||
self.logger.debug('Bad css: %s', x)
|
||||
|
||||
# using oeb to store the rules, page rule and font face rules
|
||||
# and generating them again if opts, profile or stylesheets are different
|
||||
@@ -303,7 +303,8 @@ class Stylizer(object):
|
||||
try:
|
||||
matches = tuple(select(text))
|
||||
except SelectorError as err:
|
||||
self.logger.error('Ignoring CSS rule with invalid selector: %r (%s)' % (text, err))
|
||||
self.logger.error('Ignoring CSS rule with invalid selector: '
|
||||
'%s (%s)', text, err)
|
||||
continue
|
||||
|
||||
if fl is not None:
|
||||
@@ -367,11 +368,11 @@ class Stylizer(object):
|
||||
def _fetch_css_file(self, path):
|
||||
hrefs = self.oeb.manifest.hrefs
|
||||
if path not in hrefs:
|
||||
self.logger.warn('CSS import of missing file %r' % path)
|
||||
self.logger.warning('CSS import of missing file %s', path)
|
||||
return (None, None)
|
||||
item = hrefs[path]
|
||||
if item.media_type not in base.OEB_STYLES:
|
||||
self.logger.warn('CSS import of non-CSS file %r' % path)
|
||||
self.logger.warning('CSS import of non-CSS file %r', path)
|
||||
return (None, None)
|
||||
data = item.data.cssText
|
||||
if not isinstance(data, bytes):
|
||||
|
||||
@@ -36,16 +36,16 @@ class DataURL(object):
|
||||
data = polyglot.as_bytes(data)
|
||||
fmt = what(None, data)
|
||||
if not fmt:
|
||||
self.log.warn('Image encoded as data URL has unknown '
|
||||
'format, ignoring')
|
||||
self.log.warning('Image encoded as data URL has unknown '
|
||||
'format, ignoring')
|
||||
continue
|
||||
img.set('src',
|
||||
item.relhref(self.convert_image_data_uri(data, fmt,
|
||||
oeb)))
|
||||
|
||||
def convert_image_data_uri(self, data, fmt, oeb):
|
||||
self.log('Found image encoded as data URI converting it to normal '
|
||||
'image')
|
||||
self.log.info('Found image encoded as data URI converting it to '
|
||||
'normal image')
|
||||
item_id, item_href = oeb.manifest.generate('data-url-image',
|
||||
'data-url-image.' + fmt)
|
||||
oeb.manifest.add(item_id, item_href,
|
||||
|
||||
@@ -117,8 +117,9 @@ class UniqueFilenames(object): # {{{
|
||||
self.seen_filenames.add(fname)
|
||||
|
||||
if self.rename_map:
|
||||
self.log('Found non-unique filenames, renaming to support broken'
|
||||
' EPUB readers like FBReader, Aldiko and Stanza...')
|
||||
self.log.info('Found non-unique filenames, renaming to support '
|
||||
'broken EPUB readers like FBReader, Aldiko and '
|
||||
'Stanza...')
|
||||
from pprint import pformat
|
||||
self.log.debug(pformat(self.rename_map))
|
||||
|
||||
@@ -173,8 +174,8 @@ class FlatFilenames(object): # {{{
|
||||
oeb.spine.insert(isp, nitem, item.linear)
|
||||
|
||||
if self.rename_map:
|
||||
self.log('Found non-flat filenames, renaming to support broken'
|
||||
' EPUB readers like FBReader...')
|
||||
self.log.info('Found non-flat filenames, renaming to support '
|
||||
'broken EPUB readers like FBReader...')
|
||||
from pprint import pformat
|
||||
self.log.debug(pformat(self.rename_map))
|
||||
self.log.debug(pformat(self.renamed_items_map))
|
||||
|
||||
@@ -182,8 +182,8 @@ class CSSFlattener(object):
|
||||
else:
|
||||
from ebook_converter.ebooks.oeb.normalize_css import normalize_filter_css
|
||||
self.filter_css = frozenset(normalize_filter_css(self.filter_css))
|
||||
self.oeb.log.debug('Filtering CSS properties: %s'%
|
||||
', '.join(self.filter_css))
|
||||
self.oeb.log.debug('Filtering CSS properties: %s',
|
||||
', '.join(self.filter_css))
|
||||
|
||||
for item in oeb.manifest.values():
|
||||
# Make all links to resources absolute, as these sheets will be
|
||||
@@ -231,13 +231,13 @@ class CSSFlattener(object):
|
||||
msg = ('No embeddable fonts found for family: %r'%family)
|
||||
if failure_critical:
|
||||
raise ValueError(msg)
|
||||
self.oeb.log.warn(msg)
|
||||
self.oeb.log.warning(msg)
|
||||
return body_font_family, efi
|
||||
if not faces:
|
||||
msg = ('No embeddable fonts found for family: %r'%family)
|
||||
if failure_critical:
|
||||
raise ValueError(msg)
|
||||
self.oeb.log.warn(msg)
|
||||
self.oeb.log.warning(msg)
|
||||
return body_font_family, efi
|
||||
|
||||
for i, font in enumerate(faces):
|
||||
@@ -258,7 +258,7 @@ class CSSFlattener(object):
|
||||
if i == 0:
|
||||
generic_family = panose_to_css_generic_family(font['panose'])
|
||||
body_font_family = "'%s',%s"%(font['font-family'], generic_family)
|
||||
self.oeb.log('Embedding font: %s'%font['font-family'])
|
||||
self.oeb.log.info('Embedding font: %s', font['font-family'])
|
||||
for k in ('font-weight', 'font-style', 'font-stretch'):
|
||||
if font[k] != 'normal':
|
||||
cfont[k] = font[k]
|
||||
@@ -323,8 +323,7 @@ class CSSFlattener(object):
|
||||
sbase = max(list(sizes.items()), key=operator.itemgetter(1))[0]
|
||||
except:
|
||||
sbase = 12.0
|
||||
self.oeb.logger.info(
|
||||
"Source base font size is %0.05fpt" % sbase)
|
||||
self.oeb.logger.info("Source base font size is %0.05fpt", sbase)
|
||||
return sbase
|
||||
|
||||
def clean_edges(self, cssdict, style, fsize):
|
||||
@@ -346,8 +345,7 @@ class CSSFlattener(object):
|
||||
try:
|
||||
value = round(value / slineh) * dlineh
|
||||
except:
|
||||
self.oeb.logger.warning(
|
||||
'Invalid length:', value)
|
||||
self.oeb.logger.warning('Invalid length: %s', value)
|
||||
value = 0.0
|
||||
cssdict[property] = "%0.5fem" % (value / fsize)
|
||||
|
||||
|
||||
@@ -1,8 +1,3 @@
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>'
|
||||
__docformat__ = 'restructuredtext en'
|
||||
|
||||
|
||||
class Clean(object):
|
||||
'''Clean up guide, leaving only known values '''
|
||||
|
||||
@@ -28,7 +23,8 @@ class Clean(object):
|
||||
if covers:
|
||||
ref = covers[0][0]
|
||||
if len(covers) > 1:
|
||||
self.log('Choosing %s:%s as the cover'%(ref.type, ref.href))
|
||||
self.log.info('Choosing %s:%s as the cover', ref.type,
|
||||
ref.href)
|
||||
ref.type = 'cover'
|
||||
self.oeb.guide.refs['cover'] = ref
|
||||
|
||||
|
||||
@@ -34,19 +34,19 @@ class RemoveFirstImage:
|
||||
continue
|
||||
removed = self.remove_images(item)
|
||||
if removed > 0:
|
||||
self.log('Removed first image')
|
||||
self.log.info('Removed first image')
|
||||
body = XPath('//h:body')(item.data)
|
||||
if body:
|
||||
raw = xml2text(body[0]).strip()
|
||||
imgs = XPath('//h:img|//svg:svg')(item.data)
|
||||
if not raw and not imgs:
|
||||
self.log('Removing %s as it has no content' %
|
||||
item.href)
|
||||
self.log.info('Removing %s as it has no content',
|
||||
item.href)
|
||||
self.oeb.manifest.remove(item)
|
||||
deleted_item = item
|
||||
break
|
||||
else:
|
||||
self.log.warn('Could not find first image to remove')
|
||||
self.log.warning('Could not find first image to remove')
|
||||
if deleted_item is not None:
|
||||
for item in list(self.oeb.toc):
|
||||
href = urllib.parse.urldefrag(item.href)[0]
|
||||
|
||||
@@ -101,7 +101,7 @@ class MergeMetadata(object):
|
||||
_oim = override_input_metadata
|
||||
self.oeb, self.log = oeb, oeb.log
|
||||
m = self.oeb.metadata
|
||||
self.log('Merging user specified metadata...')
|
||||
self.log.info('Merging user specified metadata...')
|
||||
meta_info_to_oeb_metadata(mi, m, oeb.log,
|
||||
override_input_metadata=_oim)
|
||||
cover_id = self.set_cover(mi, opts.prefer_metadata_cover)
|
||||
@@ -210,8 +210,8 @@ class MergeMetadata(object):
|
||||
text = ''
|
||||
text = re.sub(r'\s+', '', text)
|
||||
if not text and not XPath('//h:img|//svg:svg')(item.data):
|
||||
self.log('Removing %s as it is a wrapper around the cover '
|
||||
'image' % item.href)
|
||||
self.log.info('Removing %s as it is a wrapper around the '
|
||||
'cover image', item.href)
|
||||
self.oeb.spine.remove(item)
|
||||
self.oeb.manifest.remove(item)
|
||||
self.oeb.guide.remove_by_href(item.href)
|
||||
|
||||
@@ -5,11 +5,6 @@ from ebook_converter.ebooks.oeb import parse_utils
|
||||
from ebook_converter.ebooks.oeb.base import XPath
|
||||
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2011, Kovid Goyal <kovid@kovidgoyal.net>'
|
||||
__docformat__ = 'restructuredtext en'
|
||||
|
||||
|
||||
class RemoveAdobeMargins(object):
|
||||
'''
|
||||
Remove margins specified in Adobe's page templates.
|
||||
@@ -19,12 +14,13 @@ class RemoveAdobeMargins(object):
|
||||
self.oeb, self.opts, self.log = oeb, opts, log
|
||||
|
||||
for item in self.oeb.manifest:
|
||||
if item.media_type in {
|
||||
'application/vnd.adobe-page-template+xml', 'application/vnd.adobe.page-template+xml',
|
||||
'application/adobe-page-template+xml', 'application/adobe.page-template+xml',
|
||||
} and hasattr(item.data, 'xpath'):
|
||||
self.log('Removing page margins specified in the'
|
||||
' Adobe page template')
|
||||
if (item.media_type in {'application/vnd.adobe-page-template+xml',
|
||||
'application/vnd.adobe.page-template+xml',
|
||||
'application/adobe-page-template+xml',
|
||||
'application/adobe.page-template+xml'} and
|
||||
hasattr(item.data, 'xpath')):
|
||||
self.log.info('Removing page margins specified in the '
|
||||
'Adobe page template')
|
||||
for elem in item.data.xpath(
|
||||
'//*[@margin-bottom or @margin-top '
|
||||
'or @margin-left or @margin-right]'):
|
||||
@@ -59,7 +55,7 @@ class RemoveFakeMargins(object):
|
||||
if stylesheet is None:
|
||||
return
|
||||
|
||||
self.log('Removing fake margins...')
|
||||
self.log.info('Removing fake margins...')
|
||||
|
||||
stylesheet = stylesheet.data
|
||||
|
||||
@@ -73,8 +69,8 @@ class RemoveFakeMargins(object):
|
||||
try:
|
||||
self.process_level(level)
|
||||
except NegativeTextIndent:
|
||||
self.log.debug('Negative text indent detected at level '
|
||||
' %s, ignoring this level'%level)
|
||||
self.log.debug('Negative text indent detected at level %s, '
|
||||
'ignoring this level', level)
|
||||
|
||||
def get_margins(self, elem):
|
||||
cls = elem.get('class', None)
|
||||
@@ -102,19 +98,21 @@ class RemoveFakeMargins(object):
|
||||
self.stats[level+'_left'][lm] += 1
|
||||
self.stats[level+'_right'][rm] += 1
|
||||
|
||||
self.log.debug(level, ' left margin stats:', self.stats[level+'_left'])
|
||||
self.log.debug(level, ' right margin stats:', self.stats[level+'_right'])
|
||||
self.log.debug('%s left margin stats: %s', level,
|
||||
self.stats[level+'_left'])
|
||||
self.log.debug('%s right margin stats: %s', level,
|
||||
self.stats[level+'_right'])
|
||||
|
||||
remove_left = self.analyze_stats(self.stats[level+'_left'])
|
||||
remove_right = self.analyze_stats(self.stats[level+'_right'])
|
||||
|
||||
if remove_left:
|
||||
mcl = self.stats[level+'_left'].most_common(1)[0][0]
|
||||
self.log('Removing level %s left margin of:'%level, mcl)
|
||||
self.log.info('Removing level %s left margin of: %s', level, mcl)
|
||||
|
||||
if remove_right:
|
||||
mcr = self.stats[level+'_right'].most_common(1)[0][0]
|
||||
self.log('Removing level %s right margin of:'%level, mcr)
|
||||
self.log.info('Removing level %s right margin of: %s', level, mcr)
|
||||
|
||||
if remove_left or remove_right:
|
||||
for elem in elems:
|
||||
@@ -151,7 +149,7 @@ class RemoveFakeMargins(object):
|
||||
remove = set()
|
||||
for k, v in self.levels.items():
|
||||
num = len(v)
|
||||
self.log.debug('Found %d items of level:'%num, k)
|
||||
self.log.debug('Found %s items of level: %s', num, k)
|
||||
level = int(k.split('_')[-1])
|
||||
tag = k.split('_')[0]
|
||||
if tag == 'p' and num < 25:
|
||||
@@ -169,7 +167,7 @@ class RemoveFakeMargins(object):
|
||||
|
||||
for k in remove:
|
||||
self.levels.pop(k)
|
||||
self.log.debug('Ignoring level', k)
|
||||
self.log.debug('Ignoring level %s', k)
|
||||
|
||||
def analyze_stats(self, stats):
|
||||
if not stats:
|
||||
|
||||
@@ -45,12 +45,14 @@ class RescaleImages(object):
|
||||
|
||||
try:
|
||||
if self.check_colorspaces and img.mode == 'CMYK':
|
||||
self.log.warn(
|
||||
'The image %s is in the CMYK colorspace, converting it '
|
||||
'to RGB as Adobe Digital Editions cannot display CMYK' % item.href)
|
||||
self.log.warning('The image %s is in the CMYK '
|
||||
'colorspace, converting it to RGB as '
|
||||
'Adobe Digital Editions cannot '
|
||||
'display CMYK', item.href)
|
||||
img = img.convert('RGB')
|
||||
except Exception:
|
||||
self.log.exception('Failed to convert image %s from CMYK to RGB' % item.href)
|
||||
self.log.exception('Failed to convert image %s from CMYK '
|
||||
'to RGB', item.href)
|
||||
|
||||
scaled, new_width, new_height = uimg.fit_image(width, height,
|
||||
page_width,
|
||||
@@ -58,18 +60,20 @@ class RescaleImages(object):
|
||||
if scaled:
|
||||
new_width = max(1, new_width)
|
||||
new_height = max(1, new_height)
|
||||
self.log('Rescaling image from %dx%d to %dx%d'%(
|
||||
width, height, new_width, new_height), item.href)
|
||||
self.log('Rescaling image from %sx%s to %sx%s %s', width,
|
||||
height, new_width, new_height, item.href)
|
||||
try:
|
||||
img = img.resize((new_width, new_height))
|
||||
except Exception:
|
||||
self.log.exception('Failed to rescale image: %s' % item.href)
|
||||
self.log.exception('Failed to rescale image: %s',
|
||||
item.href)
|
||||
continue
|
||||
buf = BytesIO()
|
||||
try:
|
||||
img.save(buf, ext)
|
||||
except Exception:
|
||||
self.log.exception('Failed to rescale image: %s' % item.href)
|
||||
self.log.exception('Failed to rescale image: %s',
|
||||
item.href)
|
||||
else:
|
||||
item.data = buf.getvalue()
|
||||
item.unload_data_from_memory()
|
||||
|
||||
@@ -59,7 +59,8 @@ class Split(object):
|
||||
def __call__(self, oeb, opts):
|
||||
self.oeb = oeb
|
||||
self.log = oeb.log
|
||||
self.log('Splitting markup on page breaks and flow limits, if any...')
|
||||
self.log.info('Splitting markup on page breaks and flow limits, if '
|
||||
'any...')
|
||||
self.opts = opts
|
||||
self.map = {}
|
||||
for item in list(self.oeb.manifest.items):
|
||||
@@ -127,8 +128,7 @@ class Split(object):
|
||||
page_breaks.add(elem)
|
||||
except SelectorError as err:
|
||||
self.log.warn('Ignoring page breaks specified with invalid '
|
||||
'CSS selector: %r (%s)' %
|
||||
(selector, err))
|
||||
'CSS selector: %s (%s)', selector, err)
|
||||
|
||||
for i, elem in enumerate(item.data.iter('*')):
|
||||
try:
|
||||
@@ -221,13 +221,13 @@ class FlowSplitter(object):
|
||||
|
||||
if self.max_flow_size > 0:
|
||||
lt_found = False
|
||||
self.log('\tLooking for large trees in %s...' % item.href)
|
||||
self.log.info('\tLooking for large trees in %s...', item.href)
|
||||
trees = list(self.trees)
|
||||
self.tree_map = {}
|
||||
for i, tree in enumerate(trees):
|
||||
size = len(tostring(tree.getroot()))
|
||||
if size > self.max_flow_size:
|
||||
self.log('\tFound large tree #%d' % i)
|
||||
self.log.info('\tFound large tree #%s', i)
|
||||
lt_found = True
|
||||
self.split_trees = []
|
||||
self.split_to_size(tree)
|
||||
@@ -240,7 +240,7 @@ class FlowSplitter(object):
|
||||
|
||||
self.was_split = len(self.trees) > 1
|
||||
if self.was_split:
|
||||
self.log('\tSplit into %d parts' % len(self.trees))
|
||||
self.log('\tSplit into %s parts', len(self.trees))
|
||||
self.commit()
|
||||
|
||||
def split_on_page_breaks(self, orig_tree):
|
||||
@@ -259,7 +259,7 @@ class FlowSplitter(object):
|
||||
tree = self.trees[i]
|
||||
elem = pattern(tree)
|
||||
if elem:
|
||||
self.log.debug('\t\tSplitting on page-break at id=%s' %
|
||||
self.log.debug('\t\tSplitting on page-break at id=%s',
|
||||
elem[0].get('id'))
|
||||
before_tree, after_tree = self.do_split(tree, elem[0],
|
||||
before)
|
||||
@@ -322,10 +322,10 @@ class FlowSplitter(object):
|
||||
return True
|
||||
|
||||
def split_text(self, text, root, size):
|
||||
self.log.debug('\t\t\tSplitting text of length: %d' % len(text))
|
||||
self.log.debug('\t\t\tSplitting text of length: %d', len(text))
|
||||
rest = text.replace('\r', '')
|
||||
parts = re.split('\n\n', rest)
|
||||
self.log.debug('\t\t\t\tFound %d parts' % len(parts))
|
||||
self.log.debug('\t\t\t\tFound %d parts', len(parts))
|
||||
if max(map(len, parts)) > size:
|
||||
raise SplitError('Cannot split as file contains a <pre> tag '
|
||||
'with a very large paragraph', root)
|
||||
@@ -364,7 +364,7 @@ class FlowSplitter(object):
|
||||
split_point, before = self.find_split_point(root)
|
||||
if split_point is None:
|
||||
raise SplitError(self.item.href, root)
|
||||
self.log.debug('\t\t\tSplit point:', split_point.tag,
|
||||
self.log.debug('\t\t\tSplit point: %s %s', split_point.tag,
|
||||
tree.getpath(split_point))
|
||||
|
||||
trees = self.do_split(tree, split_point, before)
|
||||
@@ -380,10 +380,10 @@ class FlowSplitter(object):
|
||||
continue
|
||||
elif size <= self.max_flow_size:
|
||||
self.split_trees.append(t)
|
||||
self.log.debug('\t\t\tCommitted sub-tree #%d (%d KB)' %
|
||||
(len(self.split_trees), size/1024.))
|
||||
self.log.debug('\t\t\tCommitted sub-tree #%s (%s KB)',
|
||||
len(self.split_trees), size/1024.)
|
||||
else:
|
||||
self.log.debug('\t\t\tSplit tree still too large: %d KB' %
|
||||
self.log.debug('\t\t\tSplit tree still too large: %d KB',
|
||||
size/1024)
|
||||
self.split_to_size(t)
|
||||
|
||||
|
||||
@@ -51,7 +51,7 @@ class DetectStructure(object):
|
||||
self.log = oeb.log
|
||||
self.oeb = oeb
|
||||
self.opts = opts
|
||||
self.log('Detecting structure...')
|
||||
self.log.info('Detecting structure...')
|
||||
|
||||
self.detect_chapters()
|
||||
if self.oeb.auto_generated_toc or opts.use_auto_toc:
|
||||
@@ -67,15 +67,15 @@ class DetectStructure(object):
|
||||
self.oeb.toc = orig_toc
|
||||
else:
|
||||
self.oeb.auto_generated_toc = True
|
||||
self.log('Auto generated TOC with %d entries.' %
|
||||
self.oeb.toc.count())
|
||||
self.log.info('Auto generated TOC with %s entries.',
|
||||
self.oeb.toc.count())
|
||||
|
||||
if opts.toc_filter is not None:
|
||||
regexp = re.compile(opts.toc_filter)
|
||||
for node in list(self.oeb.toc.iter()):
|
||||
if not node.title or regexp.search(node.title) is not None:
|
||||
self.log('Filtering', node.title if node.title else
|
||||
'empty node', 'from TOC')
|
||||
self.log.info('Filtering %s from TOC', node.title if
|
||||
node.title else 'empty node')
|
||||
self.oeb.toc.remove(node)
|
||||
|
||||
if opts.page_breaks_before is not None:
|
||||
@@ -112,8 +112,8 @@ class DetectStructure(object):
|
||||
try:
|
||||
expr = XPath(expr)
|
||||
except Exception:
|
||||
self.log.warn('Invalid start reading at XPath expression, '
|
||||
'ignoring: %s' % expr)
|
||||
self.log.warning('Invalid start reading at XPath expression, '
|
||||
'ignoring: %s', expr)
|
||||
return
|
||||
for item in self.oeb.spine:
|
||||
if not hasattr(item.data, 'xpath'):
|
||||
@@ -129,11 +129,11 @@ class DetectStructure(object):
|
||||
if 'text' in self.oeb.guide:
|
||||
self.oeb.guide.remove('text')
|
||||
self.oeb.guide.add('text', 'Start', item.href+'#'+eid)
|
||||
self.log('Setting start reading at position to %s in %s' %
|
||||
(self.opts.start_reading_at, item.href))
|
||||
self.log.info('Setting start reading at position to %s in %s',
|
||||
self.opts.start_reading_at, item.href)
|
||||
return
|
||||
self.log.warn("Failed to find start reading at position: %s" %
|
||||
self.opts.start_reading_at)
|
||||
self.log.warning("Failed to find start reading at position: %s",
|
||||
self.opts.start_reading_at)
|
||||
|
||||
def get_toc_parts_for_xpath(self, expr):
|
||||
# if an attribute is selected by the xpath expr then truncate it
|
||||
@@ -155,8 +155,8 @@ class DetectStructure(object):
|
||||
len(ans)
|
||||
return ans
|
||||
except Exception:
|
||||
self.log.warn('Invalid chapter expression, ignoring: %s' %
|
||||
expr)
|
||||
self.log.warning('Invalid chapter expression, ignoring: %s',
|
||||
expr)
|
||||
return []
|
||||
|
||||
if self.opts.chapter:
|
||||
@@ -175,7 +175,7 @@ class DetectStructure(object):
|
||||
c[item] += 1
|
||||
text = base.xml2text(elem).strip()
|
||||
text = re.sub(r'\s+', ' ', text.strip())
|
||||
self.log('\tDetected chapter:', text[:50])
|
||||
self.log.info('\tDetected chapter: %s', text[:50])
|
||||
if chapter_mark == 'none':
|
||||
continue
|
||||
if chapter_mark == 'rule':
|
||||
@@ -221,7 +221,7 @@ class DetectStructure(object):
|
||||
try:
|
||||
purl = urllib.parse.urlparse(href)
|
||||
except ValueError:
|
||||
self.log.warning('Ignoring malformed URL:', href)
|
||||
self.log.warning('Ignoring malformed URL: %s', href)
|
||||
continue
|
||||
if not purl[0] or purl[0] == 'file':
|
||||
href, frag = purl.path, purl.fragment
|
||||
@@ -240,13 +240,14 @@ class DetectStructure(object):
|
||||
play_order=self.oeb.toc.next_play_order())
|
||||
num += 1
|
||||
except ValueError:
|
||||
self.oeb.log.exception('Failed to process link: '
|
||||
'%r' % href)
|
||||
self.oeb.log.critical('Failed to process link: %s',
|
||||
href)
|
||||
# Most likely an incorrectly URL encoded link
|
||||
continue
|
||||
if self.opts.max_toc_links > 0 and \
|
||||
num >= self.opts.max_toc_links:
|
||||
self.log('Maximum TOC links reached, stopping.')
|
||||
self.log.info('Maximum TOC links reached, '
|
||||
'stopping.')
|
||||
return
|
||||
|
||||
def elem_to_link(self, item, elem, title_attribute, counter):
|
||||
@@ -277,7 +278,7 @@ class DetectStructure(object):
|
||||
len(ans)
|
||||
return ans
|
||||
except Exception:
|
||||
self.log.warn('Invalid ToC expression, ignoring: %s' % expr)
|
||||
self.log.warning('Invalid ToC expression, ignoring: %s', expr)
|
||||
return []
|
||||
|
||||
for document in self.oeb.spine:
|
||||
|
||||
@@ -5,11 +5,6 @@ from ebook_converter.utils.fonts.sfnt.subset import subset, NoGlyphs, Unsupporte
|
||||
from ebook_converter.tinycss.fonts3 import parse_font_family
|
||||
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2012, Kovid Goyal <kovid at kovidgoyal.net>'
|
||||
__docformat__ = 'restructuredtext en'
|
||||
|
||||
|
||||
def get_font_properties(rule, default=None):
|
||||
'''
|
||||
Given a CSS rule, extract normalized font properties from
|
||||
@@ -19,7 +14,7 @@ def get_font_properties(rule, default=None):
|
||||
props = {}
|
||||
s = rule.style
|
||||
for q in ('font-family', 'src', 'font-weight', 'font-stretch',
|
||||
'font-style'):
|
||||
'font-style'):
|
||||
g = 'uri' if q == 'src' else 'value'
|
||||
try:
|
||||
val = s.getProperty(q).propertyValue[0]
|
||||
@@ -149,18 +144,19 @@ class SubsetFonts(object):
|
||||
|
||||
for font in fonts.values():
|
||||
if not font['chars']:
|
||||
self.log('The font %s is unused. Removing it.'%font['src'])
|
||||
self.log('The font %s is unused. Removing it.', font['src'])
|
||||
remove(font)
|
||||
continue
|
||||
try:
|
||||
raw, old_stats, new_stats = subset(font['item'].data, font['chars'])
|
||||
except NoGlyphs:
|
||||
self.log('The font %s has no used glyphs. Removing it.'%font['src'])
|
||||
self.log('The font %s has no used glyphs. Removing it.',
|
||||
font['src'])
|
||||
remove(font)
|
||||
continue
|
||||
except UnsupportedFont as e:
|
||||
self.log.warn('The font %s is unsupported for subsetting. %s'%(
|
||||
font['src'], e))
|
||||
self.log.warning('The font %s is unsupported for subsetting. '
|
||||
'%s', font['src'], e)
|
||||
sz = len(font['item'].data)
|
||||
totals[0] += sz
|
||||
totals[1] += sz
|
||||
@@ -168,16 +164,16 @@ class SubsetFonts(object):
|
||||
font['item'].data = raw
|
||||
nlen = sum(new_stats.values())
|
||||
olen = sum(old_stats.values())
|
||||
self.log('Decreased the font %s to %.1f%% of its original size'%
|
||||
(font['src'], nlen/olen *100))
|
||||
self.log('Decreased the font %s to %.1f%% of its original '
|
||||
'size', font['src'], nlen/olen * 100)
|
||||
totals[0] += nlen
|
||||
totals[1] += olen
|
||||
|
||||
font['item'].unload_data_from_memory()
|
||||
|
||||
if totals[0]:
|
||||
self.log('Reduced total font size to %.1f%% of original'%
|
||||
(totals[0]/totals[1] * 100))
|
||||
self.log('Reduced total font size to %.1f%% of original',
|
||||
totals[0]/totals[1] * 100)
|
||||
|
||||
def find_embedded_fonts(self):
|
||||
'''
|
||||
|
||||
Reference in New Issue
Block a user