1
0
mirror of https://github.com/gryf/ebook-converter.git synced 2026-04-19 04:33:34 +02:00

Use the real constants module.

This is progressing refactor of the calibre code to make it more
readable, and transform it to something more coherent.

In this patch, there are changes regarding imports for some modules,
instead of polluting namespace of each module with some other modules
symbols, which often were imported from other modules. Yuck.
This commit is contained in:
2020-05-29 17:04:53 +02:00
parent ee4801228f
commit ce89f5c9d1
54 changed files with 2383 additions and 2081 deletions

View File

@@ -1,66 +1,61 @@
import textwrap
import urllib.parse
from lxml import etree
from ebook_converter import guess_type
from ebook_converter.utils.imghdr import identify
from ebook_converter.utils.xml_parse import safe_xml_fromstring
from ebook_converter.polyglot.urllib import unquote
__license__ = 'GPL v3'
__copyright__ = '2010, Kovid Goyal <kovid@kovidgoyal.net>'
__docformat__ = 'restructuredtext en'
class CoverManager(object):
SVG_TEMPLATE = textwrap.dedent('''\
<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en">
<head>
<meta http-equiv="Content-Type" content="text/html; charset=UTF-8" />
<meta name="calibre:cover" content="true" />
<title>Cover</title>
<style type="text/css" title="override_css">
@page {padding: 0pt; margin:0pt}
body { text-align: center; padding:0pt; margin: 0pt; }
</style>
</head>
<body>
<div>
<svg version="1.1" xmlns="http://www.w3.org/2000/svg"
xmlns:xlink="http://www.w3.org/1999/xlink"
width="100%%" height="100%%" viewBox="__viewbox__"
preserveAspectRatio="__ar__">
<image width="__width__" height="__height__" xlink:href="%s"/>
</svg>
</div>
</body>
</html>
''')
<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en">
<head>
<meta http-equiv="Content-Type" content="text/html; charset=UTF-8" />
<meta name="calibre:cover" content="true" />
<title>Cover</title>
<style type="text/css" title="override_css">
@page {padding: 0pt; margin:0pt}
body { text-align: center; padding:0pt; margin: 0pt; }
</style>
</head>
<body>
<div>
<svg version="1.1" xmlns="http://www.w3.org/2000/svg"
xmlns:xlink="http://www.w3.org/1999/xlink"
width="100%%" height="100%%" viewBox="__viewbox__"
preserveAspectRatio="__ar__">
<image width="__width__" height="__height__" xlink:href="%s"/>
</svg>
</div>
</body>
</html>''')
NONSVG_TEMPLATE = textwrap.dedent('''\
<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en">
<head>
<meta http-equiv="Content-Type" content="text/html; charset=UTF-8" />
<meta name="calibre:cover" content="true" />
<title>Cover</title>
<style type="text/css" title="override_css">
@page {padding: 0pt; margin:0pt}
body { text-align: center; padding:0pt; margin: 0pt }
div { padding:0pt; margin: 0pt }
img { padding:0pt; margin: 0pt }
</style>
</head>
<body>
<div>
<img src="%s" alt="cover" __style__ />
</div>
</body>
</html>
<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en">
<head>
<meta http-equiv="Content-Type" content="text/html; charset=UTF-8" />
<meta name="calibre:cover" content="true" />
<title>Cover</title>
<style type="text/css" title="override_css">
@page {padding: 0pt; margin:0pt}
body { text-align: center; padding:0pt; margin: 0pt }
div { padding:0pt; margin: 0pt }
img { padding:0pt; margin: 0pt }
</style>
</head>
<body>
<div>
<img src="%s" alt="cover" __style__ />
</div>
</body>
</html>
''')
def __init__(self, no_default_cover=False, no_svg_cover=False,
preserve_aspect_ratio=False, fixed_size=None):
preserve_aspect_ratio=False, fixed_size=None):
self.no_default_cover = no_default_cover
self.no_svg_cover = no_svg_cover
self.preserve_aspect_ratio = preserve_aspect_ratio
@@ -72,9 +67,9 @@ class CoverManager(object):
style = 'style="height: 100%%"'
else:
width, height = fixed_size
style = 'style="height: %s; width: %s"'%(height, width)
style = 'style="height: %s; width: %s"' % (height, width)
self.non_svg_template = self.NONSVG_TEMPLATE.replace('__style__',
style)
style)
def __call__(self, oeb, opts, log):
self.oeb = oeb
@@ -108,22 +103,23 @@ class CoverManager(object):
# if self.preserve_aspect_ratio:
# width, height = 600, 800
self.svg_template = self.svg_template.replace('__viewbox__',
'0 0 %d %d'%(width, height))
'0 0 %d %d' %
(width, height))
self.svg_template = self.svg_template.replace('__width__',
str(width))
str(width))
self.svg_template = self.svg_template.replace('__height__',
str(height))
str(height))
if href is not None:
templ = self.non_svg_template if self.no_svg_cover \
else self.svg_template
tp = templ%unquote(href)
tp = templ % unquote(href)
id, href = m.generate('titlepage', 'titlepage.xhtml')
item = m.add(id, href, guess_type('t.xhtml')[0],
data=safe_xml_fromstring(tp))
data=etree.fromstring(tp))
else:
item = self.oeb.manifest.hrefs[
urllib.parse.urldefrag(self.oeb.guide['titlepage'].href)[0]]
key = urllib.parse.urldefrag(self.oeb.guide['titlepage'].href)[0]
item = self.oeb.manifest.hrefs[key]
if item is not None:
self.oeb.spine.insert(0, item, True)
if 'cover' not in self.oeb.guide.refs:

View File

@@ -1,26 +1,27 @@
"""
CSS flattening transform.
"""
import re, operator, math, numbers
from collections import defaultdict
from xml.dom import SyntaxErr
import collections
import math
import numbers
import operator
import re
from xml import dom
from lxml import etree
import css_parser
from css_parser.css import Property
from css_parser import css as cp_css
from ebook_converter import constants as const
from ebook_converter import guess_type
from ebook_converter.ebooks import unit_convert
from ebook_converter.ebooks.oeb.base import (XHTML, XHTML_NS, CSS_MIME, OEB_STYLES,
namespace, barename, XPath, css_text)
from ebook_converter.ebooks.oeb import base
from ebook_converter.ebooks.oeb import parse_utils
from ebook_converter.ebooks.oeb.stylizer import Stylizer
from ebook_converter.utils.filenames import ascii_filename, ascii_text
from ebook_converter.utils.icu import numeric_sort_key
__license__ = 'GPL v3'
__copyright__ = '2008, Marshall T. Vandegrift <llasram@gmail.com>'
COLLAPSE = re.compile(r'[ \t\r\n\v]+')
STRIPNUM = re.compile(r'[-0-9]+$')
@@ -121,7 +122,7 @@ class EmbedFontsCSSRules(object):
return None
if not self.href:
iid, href = oeb.manifest.generate('page_styles', 'page_styles.css')
rules = [css_text(x) for x in self.rules]
rules = [base.css_text(x) for x in self.rules]
rules = '\n\n'.join(rules)
sheet = css_parser.parseString(rules, validate=False)
self.href = oeb.manifest.add(iid, href, guess_type(href)[0],
@@ -186,7 +187,7 @@ class CSSFlattener(object):
for item in oeb.manifest.values():
# Make all links to resources absolute, as these sheets will be
# consolidated into a single stylesheet at the root of the document
if item.media_type in OEB_STYLES:
if item.media_type in base.OEB_STYLES:
css_parser.replaceUrls(item.data, item.abshref,
ignoreImportRules=True)
@@ -273,7 +274,7 @@ class CSSFlattener(object):
css = ''
for item in self.items:
html = item.data
body = html.find(XHTML('body'))
body = html.find(base.tag('xhtml', 'body'))
if 'style' in html.attrib:
b = body.attrib.get('style', '')
body.set('style', html.get('style') + ';' + b)
@@ -310,11 +311,11 @@ class CSSFlattener(object):
sizes[csize] += len(COLLAPSE.sub(' ', child.tail))
def baseline_spine(self):
sizes = defaultdict(float)
sizes = collections.defaultdict(float)
for item in self.items:
html = item.data
stylizer = self.stylizers[item]
body = html.find(XHTML('body'))
body = html.find(base.tag('xhtml', 'body'))
fsize = self.context.source.fbase
self.baseline_node(body, stylizer, sizes, fsize)
try:
@@ -351,9 +352,9 @@ class CSSFlattener(object):
def flatten_node(self, node, stylizer, names, styles, pseudo_styles, psize, item_id, recurse=True):
if not isinstance(node.tag, (str, bytes)) \
or namespace(node.tag) != XHTML_NS:
or parse_utils.namespace(node.tag) != const.XHTML_NS:
return
tag = barename(node.tag)
tag = parse_utils.barename(node.tag)
style = stylizer.style(node)
cssdict = style.cssdict()
try:
@@ -375,7 +376,7 @@ class CSSFlattener(object):
if 'margin-left' not in cssdict and 'margin-right' not in cssdict:
cssdict['margin-left'] = cssdict['margin-right'] = 'auto'
else:
for table in node.iterchildren(XHTML("table")):
for table in node.iterchildren(base.tag('xhtml', "table")):
ts = stylizer.style(table)
if ts.get('margin-left') is None and ts.get('margin-right') is None:
ts.set('margin-left', 'auto')
@@ -391,11 +392,12 @@ class CSSFlattener(object):
if cssdict.get('vertical-align') == 'inherit':
cssdict['vertical-align'] = node.attrib['valign']
del node.attrib['valign']
if node.tag == XHTML('font'):
if node.tag == base.tag('xhtml', 'font'):
tags = ['descendant::h:%s'%x for x in ('p', 'div', 'table', 'h1',
'h2', 'h3', 'h4', 'h5', 'h6', 'ol', 'ul', 'dl', 'blockquote')]
tag = 'div' if XPath('|'.join(tags))(node) else 'span'
node.tag = XHTML(tag)
# TODO(gryf): this will override tag from line 355. On purpose?
tag = 'div' if base.XPath('|'.join(tags))(node) else 'span'
node.tag = base.tag('xhtml', tag)
if 'size' in node.attrib:
def force_int(raw):
return int(re.search(r'([0-9+-]+)', raw).group(1))
@@ -425,14 +427,14 @@ class CSSFlattener(object):
del node.attrib['face']
if 'color' in node.attrib:
try:
cssdict['color'] = Property('color', node.attrib['color']).value
except (ValueError, SyntaxErr):
cssdict['color'] = cp_css.Property('color', node.attrib['color']).value
except (ValueError, dom.SyntaxErr):
pass
del node.attrib['color']
if 'bgcolor' in node.attrib:
try:
cssdict['background-color'] = Property('background-color', node.attrib['bgcolor']).value
except (ValueError, SyntaxErr):
cssdict['background-color'] = cp_css.Property('background-color', node.attrib['bgcolor']).value
except (ValueError, dom.SyntaxErr):
pass
del node.attrib['bgcolor']
if tag == 'ol' and 'type' in node.attrib:
@@ -573,7 +575,7 @@ class CSSFlattener(object):
def flatten_head(self, item, href, global_href):
html = item.data
head = html.find(XHTML('head'))
head = html.find(base.tag('xhtml', 'head'))
def safe_lower(x):
try:
@@ -583,39 +585,39 @@ class CSSFlattener(object):
return x
for node in html.xpath('//*[local-name()="style" or local-name()="link"]'):
if node.tag == XHTML('link') \
if node.tag == base.tag('xhtml', 'link') \
and safe_lower(node.get('rel', 'stylesheet')) == 'stylesheet' \
and safe_lower(node.get('type', CSS_MIME)) in OEB_STYLES:
and safe_lower(node.get('type', base.CSS_MIME)) in base.OEB_STYLES:
node.getparent().remove(node)
elif node.tag == XHTML('style') \
and node.get('type', CSS_MIME) in OEB_STYLES:
elif node.tag == base.tag('xhtml', 'style') \
and node.get('type', base.CSS_MIME) in base.OEB_STYLES:
node.getparent().remove(node)
href = item.relhref(href)
l = etree.SubElement(head, XHTML('link'),
rel='stylesheet', type=CSS_MIME, href=href)
l = etree.SubElement(head, base.tag('xhtml', 'link'),
rel='stylesheet', type=base.CSS_MIME, href=href)
l.tail='\n'
if global_href:
href = item.relhref(global_href)
l = etree.SubElement(head, XHTML('link'),
rel='stylesheet', type=CSS_MIME, href=href)
l = etree.SubElement(head, base.tag('xhtml', 'link'),
rel='stylesheet', type=base.CSS_MIME, href=href)
l.tail = '\n'
def replace_css(self, css):
manifest = self.oeb.manifest
for item in manifest.values():
if item.media_type in OEB_STYLES:
if item.media_type in base.OEB_STYLES:
manifest.remove(item)
id, href = manifest.generate('css', 'stylesheet.css')
sheet = css_parser.parseString(css, validate=False)
if self.transform_css_rules:
from ebook_converter.ebooks.css_transform_rules import transform_sheet
transform_sheet(self.transform_css_rules, sheet)
item = manifest.add(id, href, CSS_MIME, data=sheet)
item = manifest.add(id, href, base.CSS_MIME, data=sheet)
self.oeb.manifest.main_stylesheet = item
return href
def collect_global_css(self):
global_css = defaultdict(list)
global_css = collections.defaultdict(list)
for item in self.items:
stylizer = self.stylizers[item]
if float(self.context.margin_top) >= 0:
@@ -627,7 +629,7 @@ class CSSFlattener(object):
items = sorted(stylizer.page_rule.items())
css = ';\n'.join("%s: %s" % (key, val) for key, val in items)
css = ('@page {\n%s\n}\n'%css) if items else ''
rules = [css_text(r) for r in stylizer.font_face_rules + self.embed_font_rules]
rules = [base.css_text(r) for r in stylizer.font_face_rules + self.embed_font_rules]
raw = '\n\n'.join(rules)
css += '\n\n' + raw
global_css[css].append(item)
@@ -642,7 +644,7 @@ class CSSFlattener(object):
if self.transform_css_rules:
from ebook_converter.ebooks.css_transform_rules import transform_sheet
transform_sheet(self.transform_css_rules, sheet)
manifest.add(id_, href, CSS_MIME, data=sheet)
manifest.add(id_, href, base.CSS_MIME, data=sheet)
gc_map[css] = href
ans = {}
@@ -652,8 +654,8 @@ class CSSFlattener(object):
return ans
def flatten_spine(self):
names = defaultdict(int)
styles, pseudo_styles = {}, defaultdict(dict)
names = collections.defaultdict(int)
styles, pseudo_styles = {}, collections.defaultdict(dict)
for item in self.items:
html = item.data
stylizer = self.stylizers[item]
@@ -661,7 +663,7 @@ class CSSFlattener(object):
self.specializer(item, stylizer)
fsize = self.context.dest.fbase
self.flatten_node(html, stylizer, names, styles, pseudo_styles, fsize, item.id, recurse=False)
self.flatten_node(html.find(XHTML('body')), stylizer, names, styles, pseudo_styles, fsize, item.id)
self.flatten_node(html.find(base.tag('xhtml', 'body')), stylizer, names, styles, pseudo_styles, fsize, item.id)
items = sorted(((key, val) for (val, key) in styles.items()))
# :hover must come after link and :active must come after :hover
psels = sorted(pseudo_styles, key=lambda x :

View File

@@ -1,46 +1,20 @@
"""
HTML-TOC-adding transform.
"""
from ebook_converter.ebooks.oeb.base import XML, XHTML, XHTML_NS
from ebook_converter.ebooks.oeb.base import XHTML_MIME, CSS_MIME
from ebook_converter.ebooks.oeb.base import element, XPath
from ebook_converter import constants as const
from ebook_converter.ebooks.oeb import base
__all__ = ['HTMLTOCAdder']
__license__ = 'GPL v3'
__copyright__ = '2008, Marshall T. Vandegrift <llasram@gmail.com>'
DEFAULT_TITLE = 'Table of Contents'
STYLE_CSS = {'nested': '.calibre_toc_header {\n text-align: center;\n}\n'
'.calibre_toc_block {\n margin-left: 1.2em;\n text-indent: '
'-1.2em;\n}\n.calibre_toc_block .calibre_toc_block {\n '
'margin-left: 2.4em;\n}\n.calibre_toc_block .calibre_toc_block '
'.calibre_toc_block {\n margin-left: 3.6em;\n}\n',
STYLE_CSS = {
'nested': """
.calibre_toc_header {
text-align: center;
}
.calibre_toc_block {
margin-left: 1.2em;
text-indent: -1.2em;
}
.calibre_toc_block .calibre_toc_block {
margin-left: 2.4em;
}
.calibre_toc_block .calibre_toc_block .calibre_toc_block {
margin-left: 3.6em;
}
""",
'centered': """
.calibre_toc_header {
text-align: center;
}
.calibre_toc_block {
text-align: center;
}
body > .calibre_toc_block {
margin-top: 1.2em;
}
"""
}
'centered': '.calibre_toc_header {\n text-align: center;\n}\n'
'.calibre_toc_block {\n text-align: center;\n}\nbody > '
'.calibre_toc_block {\n margin-top: 1.2em;\n}\n'}
class HTMLTOCAdder(object):
@@ -71,7 +45,7 @@ class HTMLTOCAdder(object):
if href in oeb.manifest.hrefs:
item = oeb.manifest.hrefs[href]
if (hasattr(item.data, 'xpath') and
XPath('//h:a[@href]')(item.data)):
base.XPath('//h:a[@href]')(item.data)):
if oeb.spine.index(item) < 0:
if self.position == 'end':
oeb.spine.add(item, linear=False)
@@ -91,23 +65,24 @@ class HTMLTOCAdder(object):
oeb.logger.error('Unknown TOC style %r' % style)
style = 'nested'
id, css_href = oeb.manifest.generate('tocstyle', 'tocstyle.css')
oeb.manifest.add(id, css_href, CSS_MIME, data=STYLE_CSS[style])
oeb.manifest.add(id, css_href, base.CSS_MIME, data=STYLE_CSS[style])
language = str(oeb.metadata.language[0])
contents = element(None, XHTML('html'), nsmap={None: XHTML_NS},
attrib={XML('lang'): language})
head = element(contents, XHTML('head'))
htitle = element(head, XHTML('title'))
contents = base.element(None, base.tag('xhtml', 'html'),
nsmap={None: const.XHTML_NS},
attrib={base.tag('xml', 'lang'): language})
head = base.element(contents, base.tag('xhtml', 'head'))
htitle = base.element(head, base.tag('xhtml', 'title'))
htitle.text = title
element(head, XHTML('link'), rel='stylesheet', type=CSS_MIME,
href=css_href)
body = element(contents, XHTML('body'),
attrib={'class': 'calibre_toc'})
h1 = element(body, XHTML('h2'),
attrib={'class': 'calibre_toc_header'})
base.element(head, base.tag('xhtml', 'link'), rel='stylesheet',
type=base.CSS_MIME, href=css_href)
body = base.element(contents, base.tag('xhtml', 'body'),
attrib={'class': 'calibre_toc'})
h1 = base.element(body, base.tag('xhtml', 'h2'),
attrib={'class': 'calibre_toc_header'})
h1.text = title
self.add_toc_level(body, oeb.toc)
id, href = oeb.manifest.generate('contents', 'contents.xhtml')
item = oeb.manifest.add(id, href, XHTML_MIME, data=contents)
item = oeb.manifest.add(id, href, base.XHTML_MIME, data=contents)
if self.position == 'end':
oeb.spine.add(item, linear=False)
else:
@@ -116,10 +91,10 @@ class HTMLTOCAdder(object):
def add_toc_level(self, elem, toc):
for node in toc:
block = element(elem, XHTML('div'),
attrib={'class': 'calibre_toc_block'})
line = element(block, XHTML('a'),
attrib={'href': node.href,
'class': 'calibre_toc_line'})
block = base.element(elem, base.tag('xhtml', 'div'),
attrib={'class': 'calibre_toc_block'})
line = base.element(block, base.tag('xhtml', 'a'),
attrib={'href': node.href,
'class': 'calibre_toc_line'})
line.text = node.title
self.add_toc_level(block, node)

View File

@@ -4,9 +4,10 @@ from string import Formatter
import pkg_resources
import urllib.parse
from ebook_converter import constants as const
from ebook_converter import guess_type, strftime
from ebook_converter.constants_old import iswindows
from ebook_converter.ebooks.oeb.base import XPath, XHTML_NS, XHTML, xml2text, urlnormalize
from ebook_converter.ebooks.oeb.base import XPath, xml2text, urlnormalize
from ebook_converter.library.comments import comments_to_html, markdown
from ebook_converter.utils.date import is_date_undefined, as_local_time
from ebook_converter.ebooks.chardet import strip_encoding_declarations
@@ -303,7 +304,7 @@ def render_jacket(mi, output_profile,
'tags_label': 'Tags',
'title': title,
'title_str': title_str,
'xmlns': XHTML_NS}
'xmlns': const.XHTML_NS}
for key in mi.custom_field_keys():
m = mi.get_user_metadata(key, False) or {}
@@ -370,7 +371,7 @@ def render_jacket(mi, output_profile,
# We cannot use data-calibre-rescale 100 on the body tag as that will just
# give the body tag a font size of 1em, which is useless.
for body in root.xpath('//*[local-name()="body"]'):
fw = body.makeelement(XHTML('div'))
fw = body.makeelement(const.XHTML_DIV)
fw.set('data-calibre-rescale', '100')
for child in body:
fw.append(child)
@@ -387,9 +388,9 @@ def linearize_jacket(oeb):
for x in oeb.spine[:4]:
if XPath(JACKET_XPATH)(x.data):
for e in XPath('//h:table|//h:tr|//h:th')(x.data):
e.tag = XHTML('div')
e.tag = const.XHTML_DIV
for e in XPath('//h:td')(x.data):
e.tag = XHTML('span')
e.tag = const.XHTML_SPAN
break

View File

@@ -5,9 +5,9 @@ import string
from lxml import etree
from ebook_converter.ebooks.oeb.base import XHTML, XHTML_NS
from ebook_converter.ebooks.oeb.base import CSS_MIME
from ebook_converter.ebooks.oeb.base import namespace
from ebook_converter import constants as const
from ebook_converter.ebooks.oeb import base
from ebook_converter.ebooks.oeb import parse_utils
from ebook_converter.ebooks.oeb.stylizer import Stylizer
@@ -43,15 +43,16 @@ class CaseMangler(object):
def mangle_spine(self):
id, href = self.oeb.manifest.generate('manglecase', 'manglecase.css')
self.oeb.manifest.add(id, href, CSS_MIME, data=CASE_MANGLER_CSS)
self.oeb.manifest.add(id, href, base.CSS_MIME, data=CASE_MANGLER_CSS)
for item in self.oeb.spine:
html = item.data
relhref = item.relhref(href)
etree.SubElement(html.find(XHTML('head')), XHTML('link'),
rel='stylesheet', href=relhref, type=CSS_MIME)
etree.SubElement(html.find(base.tag('xhtml', 'head')),
base.tag('xhtml', 'link'), rel='stylesheet',
href=relhref, type=base.CSS_MIME)
stylizer = Stylizer(html, item.href, self.oeb, self.opts,
self.profile)
self.mangle_elem(html.find(XHTML('body')), stylizer)
self.mangle_elem(html.find(base.tag('xhtml', 'body')), stylizer)
def text_transform(self, transform, text):
if transform == 'capitalize':
@@ -85,7 +86,8 @@ class CaseMangler(object):
else:
last.tail = text
else:
child = elem.makeelement(XHTML('span'), attrib=attrib)
child = elem.makeelement(base.tag('xhtml', 'span'),
attrib=attrib)
child.text = text.upper()
if last is None:
elem.insert(0, child)
@@ -99,7 +101,7 @@ class CaseMangler(object):
def mangle_elem(self, elem, stylizer):
if not isinstance(elem.tag, (str, bytes)) or \
namespace(elem.tag) != XHTML_NS:
parse_utils.namespace(elem.tag) != const.XHTML_NS:
return
children = list(elem)
style = stylizer.style(elem)

View File

@@ -1,15 +1,12 @@
import os, re
import os
import re
from ebook_converter.ebooks.oeb import base
from ebook_converter.utils.date import isoformat, now
from ebook_converter import guess_type
__license__ = 'GPL v3'
__copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>'
__docformat__ = 'restructuredtext en'
def meta_info_to_oeb_metadata(mi, m, log, override_input_metadata=False):
from ebook_converter.ebooks.oeb.base import OPF
if not mi.is_null('title'):
m.clear('title')
m.add('title', mi.title)
@@ -19,17 +16,17 @@ def meta_info_to_oeb_metadata(mi, m, log, override_input_metadata=False):
m.clear('title_sort')
m.add('title_sort', mi.title_sort)
if not mi.is_null('authors'):
m.filter('creator', lambda x : x.role.lower() in ['aut', ''])
m.filter('creator', lambda x: x.role.lower() in ['aut', ''])
for a in mi.authors:
attrib = {'role':'aut'}
attrib = {'role': 'aut'}
if mi.author_sort:
attrib[OPF('file-as')] = mi.author_sort
attrib[base.tag('opf', 'file-as')] = mi.author_sort
m.add('creator', a, attrib=attrib)
if not mi.is_null('book_producer'):
m.filter('contributor', lambda x : x.role.lower() == 'bkp')
m.filter('contributor', lambda x: x.role.lower() == 'bkp')
m.add('contributor', mi.book_producer, role='bkp')
elif override_input_metadata:
m.filter('contributor', lambda x : x.role.lower() == 'bkp')
m.filter('contributor', lambda x: x.role.lower() == 'bkp')
if not mi.is_null('comments'):
m.clear('description')
m.add('description', mi.comments)
@@ -71,7 +68,7 @@ def meta_info_to_oeb_metadata(mi, m, log, override_input_metadata=False):
m.clear('series_index')
if not mi.is_null('rating'):
m.clear('rating')
m.add('rating', '%.2f'%mi.rating)
m.add('rating', '%.2f' % mi.rating)
elif override_input_metadata:
m.clear('rating')
if not mi.is_null('tags'):
@@ -101,23 +98,25 @@ class MergeMetadata(object):
'Merge in user metadata, including cover'
def __call__(self, oeb, mi, opts, override_input_metadata=False):
_oim = override_input_metadata
self.oeb, self.log = oeb, oeb.log
m = self.oeb.metadata
self.log('Merging user specified metadata...')
meta_info_to_oeb_metadata(mi, m, oeb.log,
override_input_metadata=override_input_metadata)
override_input_metadata=_oim)
cover_id = self.set_cover(mi, opts.prefer_metadata_cover)
m.clear('cover')
if cover_id is not None:
m.add('cover', cover_id)
if mi.uuid is not None:
m.filter('identifier', lambda x:x.id=='uuid_id')
m.filter('identifier', lambda x: x.id == 'uuid_id')
self.oeb.metadata.add('identifier', mi.uuid, id='uuid_id',
scheme='uuid')
self.oeb.uid = self.oeb.metadata.identifier[-1]
if mi.application_id is not None:
m.filter('identifier', lambda x:x.scheme=='calibre')
self.oeb.metadata.add('identifier', mi.application_id, scheme='calibre')
m.filter('identifier', lambda x: x.scheme == 'calibre')
self.oeb.metadata.add('identifier', mi.application_id,
scheme='calibre')
def set_cover(self, mi, prefer_metadata_cover):
cdata, ext = b'', 'jpg'
@@ -138,7 +137,8 @@ class MergeMetadata(object):
if cdata:
self.oeb.guide.remove('cover')
self.oeb.guide.remove('titlepage')
elif self.oeb.plumber_output_format in {'mobi', 'azw3'} and old_cover is not None:
elif (self.oeb.plumber_output_format in {'mobi', 'azw3'} and
old_cover is not None):
# The amazon formats dont support html cover pages, so remove them
# even if no cover was specified.
self.oeb.guide.remove('titlepage')
@@ -156,7 +156,9 @@ class MergeMetadata(object):
new_cover_item = None
if cdata:
id, href = self.oeb.manifest.generate('cover', 'cover.'+ext)
new_cover_item = self.oeb.manifest.add(id, href, guess_type('cover.'+ext)[0], data=cdata)
new_cover_item = self.oeb.manifest.add(id, href,
guess_type('cover.'+ext)[0],
data=cdata)
self.oeb.guide.add('cover', 'Cover', href)
if do_remove_old_cover:
self.remove_old_cover(item, new_cover_item.href)
@@ -186,7 +188,8 @@ class MergeMetadata(object):
if href == cover_item.href:
if new_cover_href is not None:
replacement_href = item.relhref(new_cover_href)
attr = 'src' if img.tag.endswith('img') else XLINK('href')
attr = ('src' if img.tag.endswith('img')
else XLINK('href'))
img.set(attr, replacement_href)
else:
p = img.getparent()
@@ -202,13 +205,14 @@ class MergeMetadata(object):
for item in affected_items:
body = XPath('//h:body')(item.data)
if body:
text = etree.tostring(body[0], method='text', encoding='unicode')
text = etree.tostring(body[0], method='text',
encoding='unicode')
else:
text = ''
text = re.sub(r'\s+', '', text)
if not text and not XPath('//h:img|//svg:svg')(item.data):
self.log('Removing %s as it is a wrapper around'
' the cover image'%item.href)
self.log('Removing %s as it is a wrapper around the cover '
'image' % item.href)
self.oeb.spine.remove(item)
self.oeb.manifest.remove(item)
self.oeb.guide.remove_by_href(item.href)

View File

@@ -1,7 +1,8 @@
import numbers
from collections import Counter
from ebook_converter.ebooks.oeb.base import barename, XPath
from ebook_converter.ebooks.oeb import parse_utils
from ebook_converter.ebooks.oeb.base import XPath
__license__ = 'GPL v3'
@@ -142,7 +143,7 @@ class RemoveFakeMargins(object):
for p in paras(body):
level = level_of(p, body)
level = '%s_%d'%(barename(p.tag), level)
level = '%s_%d' % (parse_utils.barename(p.tag), level)
if level not in self.levels:
self.levels[level] = []
self.levels[level].append(p)

View File

@@ -5,10 +5,8 @@ import os
import re
import urllib.parse
# from PyQt5.Qt import (
# Qt, QByteArray, QBuffer, QIODevice, QColor, QImage, QPainter, QSvgRenderer)
from ebook_converter.ebooks.oeb.base import XHTML, XLINK
from ebook_converter import constants as const
from ebook_converter.ebooks.oeb import base
from ebook_converter.ebooks.oeb.base import SVG_MIME, PNG_MIME
from ebook_converter.ebooks.oeb.base import xml2str, xpath
from ebook_converter.ebooks.oeb.base import urlnormalize
@@ -17,10 +15,7 @@ from ebook_converter.ptempfile import PersistentTemporaryFile
from ebook_converter.utils.imghdr import what
__license__ = 'GPL v3'
__copyright__ = '2008, Marshall T. Vandegrift <llasram@gmail.com>'
IMAGE_TAGS = {XHTML('img'), XHTML('object')}
IMAGE_TAGS = {base.tag('xhtml', 'img'), base.tag('xhtml', 'object')}
KEEP_ATTRS = {'class', 'style', 'width', 'height', 'align'}
@@ -113,7 +108,7 @@ class SVGRasterizer(object):
svg = item.data
hrefs = self.oeb.manifest.hrefs
for elem in xpath(svg, '//svg:*[@xl:href]'):
href = urlnormalize(elem.attrib[XLINK('href')])
href = urlnormalize(elem.attrib[base.tag('xlink', 'href')])
path = urllib.parse.urldefrag(href)[0]
if not path:
continue
@@ -126,7 +121,7 @@ class SVGRasterizer(object):
with PersistentTemporaryFile(suffix='.'+ext) as pt:
pt.write(data)
self.temp_files.append(pt.name)
elem.attrib[XLINK('href')] = pt.name
elem.attrib[base.tag('xlink', 'href')] = pt.name
return svg
def stylizer(self, item):
@@ -171,7 +166,7 @@ class SVGRasterizer(object):
href = os.path.splitext(item.href)[0] + '.png'
id, href = manifest.generate(item.id, href)
manifest.add(id, href, PNG_MIME, data=data)
img = elem.makeelement(XHTML('img'), src=item.relhref(href))
img = elem.makeelement(base.tag('xhtml', 'img'), src=item.relhref(href))
elem.getparent().replace(elem, img)
for prop in ('width', 'height'):
if prop in elem.attrib:
@@ -208,7 +203,7 @@ class SVGRasterizer(object):
id, href = manifest.generate(svgitem.id, href)
manifest.add(id, href, PNG_MIME, data=data)
self.images[key] = href
elem.tag = XHTML('img')
elem.tag = base.tag('xhtml', 'img')
for attr in elem.attrib:
if attr not in KEEP_ATTRS:
del elem.attrib[attr]

View File

@@ -10,10 +10,11 @@ import urllib.parse
from lxml.etree import XPath as _XPath
from lxml import etree
from ebook_converter import constants as const
from ebook_converter import as_unicode, force_unicode
from ebook_converter.ebooks.epub import rules
from ebook_converter.ebooks.oeb.base import (OEB_STYLES, XPNSMAP as NAMESPACES,
rewrite_links, XHTML, urlnormalize)
from ebook_converter.ebooks.oeb.base import \
OEB_STYLES, rewrite_links, urlnormalize
from ebook_converter.ebooks.oeb.polish.split import do_split
from ebook_converter.polyglot.urllib import unquote
from ebook_converter.css_selectors import Select, SelectorError
@@ -22,7 +23,7 @@ from ebook_converter.css_selectors import Select, SelectorError
__license__ = 'GPL v3'
__copyright__ = '2008, Kovid Goyal kovid@kovidgoyal.net'
__docformat__ = 'restructuredtext en'
XPath = functools.partial(_XPath, namespaces=NAMESPACES)
XPath = functools.partial(_XPath, namespaces=const.XPNSMAP)
SPLIT_POINT_ATTR = 'csp'
@@ -104,7 +105,7 @@ class Split(object):
select = Select(item.data)
if not self.page_break_selectors:
return [], []
body = item.data.xpath('//h:body', namespaces=NAMESPACES)
body = item.data.xpath('//h:body', namespaces=const.XPNSMAP)
if not body:
return [], []
descendants = frozenset(body[0].iterdescendants('*'))
@@ -268,13 +269,13 @@ class FlowSplitter(object):
if body is not None:
existing_ids = frozenset(body.xpath('//*/@id'))
for x in ids - existing_ids:
body.insert(0, body.makeelement(XHTML('div'), id=x, style='height:0pt'))
body.insert(0, body.makeelement(const.XHTML_div, id=x, style='height:0pt'))
ids = set()
trees.append(tree)
self.trees = trees
def get_body(self, root):
body = root.xpath('//h:body', namespaces=NAMESPACES)
body = root.xpath('//h:body', namespaces=const.XPNSMAP)
if not body:
return None
return body[0]
@@ -296,7 +297,7 @@ class FlowSplitter(object):
etree.tostring(body, method='text', encoding='unicode'))
if len(txt) > 1:
return False
for img in root.xpath('//h:img', namespaces=NAMESPACES):
for img in root.xpath('//h:img', namespaces=const.XPNSMAP):
if img.get('style', '') != 'display:none':
return False
if root.xpath('//*[local-name() = "svg"]'):
@@ -401,7 +402,7 @@ class FlowSplitter(object):
'//h:br',
'//h:li',
):
elems = root.xpath(path, namespaces=NAMESPACES)
elems = root.xpath(path, namespaces=const.XPNSMAP)
elem = pick_elem(elems)
if elem is not None:
try:
@@ -436,7 +437,7 @@ class FlowSplitter(object):
spine_pos = self.item.spine_position
for current, tree in zip(*map(reversed, (self.files, self.trees))):
for a in tree.getroot().xpath('//h:a[@href]', namespaces=NAMESPACES):
for a in tree.getroot().xpath('//h:a[@href]', namespaces=const.XPNSMAP):
href = a.get('href').strip()
if href.startswith('#'):
anchor = href[1:]

View File

@@ -1,22 +1,19 @@
import collections
import re
import uuid
import urllib.parse
import uuid
from lxml import etree
from collections import OrderedDict, Counter
from ebook_converter.ebooks.oeb.base import XPNSMAP, TOC, XHTML, xml2text, barename
from ebook_converter import constants as const
from ebook_converter.ebooks.oeb import parse_utils
from ebook_converter.ebooks.oeb.base import TOC, xml2text
from ebook_converter.ebooks import ConversionError
__license__ = 'GPL v3'
__copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>'
__docformat__ = 'restructuredtext en'
def XPath(x):
try:
return etree.XPath(x, namespaces=XPNSMAP)
return etree.XPath(x, namespaces=const.XPNSMAP)
except etree.XPathSyntaxError:
raise ConversionError(
'The syntax of the XPath expression %s is invalid.' % repr(x))
@@ -84,7 +81,7 @@ class DetectStructure(object):
try:
prev = next(elem.itersiblings(tag=etree.Element,
preceding=True))
if (barename(elem.tag) in {'h1', 'h2'} and barename(
if (parse_utils.barename(elem.tag) in {'h1', 'h2'} and parse_utils.barename(
prev.tag) in {'h1', 'h2'} and (not prev.tail or
not prev.tail.split())):
# We have two adjacent headings, do not put a page
@@ -165,7 +162,7 @@ class DetectStructure(object):
chapter_mark = self.opts.chapter_mark
page_break_before = 'display: block; page-break-before: always'
page_break_after = 'display: block; page-break-after: always'
c = Counter()
c = collections.Counter()
for item, elem in self.detected_chapters:
c[item] += 1
text = xml2text(elem).strip()
@@ -174,7 +171,7 @@ class DetectStructure(object):
if chapter_mark == 'none':
continue
if chapter_mark == 'rule':
mark = elem.makeelement(XHTML('hr'))
mark = elem.makeelement(const.XHTML_HR)
elif chapter_mark == 'pagebreak':
if c[item] < 3 and at_start(elem):
# For the first two elements in this item, check if they
@@ -184,9 +181,9 @@ class DetectStructure(object):
# feedbooks epubs match both a heading tag and its
# containing div with the default chapter expression.
continue
mark = elem.makeelement(XHTML('div'), style=page_break_after)
mark = elem.makeelement(const.XHTML_DIV, style=page_break_after)
else: # chapter_mark == 'both':
mark = elem.makeelement(XHTML('hr'), style=page_break_before)
mark = elem.makeelement(const.XHTML_HR, style=page_break_before)
try:
elem.addprevious(mark)
except TypeError:
@@ -254,8 +251,8 @@ class DetectStructure(object):
return text, href
def add_leveled_toc_items(self):
added = OrderedDict()
added2 = OrderedDict()
added = collections.OrderedDict()
added2 = collections.OrderedDict()
counter = 1
def find_matches(expr, doc):