mirror of
https://github.com/gryf/ebook-converter.git
synced 2026-01-26 23:55:46 +01:00
Every mime related function in main __init__.py has a flag check for the check if initialization has already done. This is nonsense, since it should be done implicitly early on the converter is starting. This commit straight the things out, and initialization is done in cli module. Also, function guess_type was removed, since it's just a proxy for mimetypes.guess_type function.
686 lines
28 KiB
Python
686 lines
28 KiB
Python
"""
|
|
CSS flattening transform.
|
|
"""
|
|
import collections
|
|
import math
|
|
import mimetypes
|
|
import numbers
|
|
import operator
|
|
import re
|
|
from xml import dom
|
|
|
|
from lxml import etree
|
|
import css_parser
|
|
from css_parser import css as cp_css
|
|
|
|
from ebook_converter import constants as const
|
|
from ebook_converter.ebooks import unit_convert
|
|
from ebook_converter.ebooks.oeb import base
|
|
from ebook_converter.ebooks.oeb import parse_utils
|
|
|
|
from ebook_converter.ebooks.oeb.stylizer import Stylizer
|
|
from ebook_converter.utils.filenames import ascii_filename, ascii_text
|
|
|
|
|
|
COLLAPSE = re.compile(r'[ \t\r\n\v]+')
|
|
STRIPNUM = re.compile(r'[-0-9]+$')
|
|
|
|
|
|
def asfloat(value, default):
|
|
if not isinstance(value, numbers.Number):
|
|
value = default
|
|
return float(value)
|
|
|
|
|
|
class KeyMapper(object):
|
|
|
|
def __init__(self, sbase, dbase, dkey):
|
|
self.sbase = float(sbase)
|
|
self.dprop = [(self.relate(x, dbase), float(x)) for x in dkey]
|
|
self.cache = {}
|
|
|
|
@staticmethod
|
|
def relate(size, base):
|
|
if size == 0:
|
|
return base
|
|
size = float(size)
|
|
base = float(base)
|
|
if abs(size - base) < 0.1:
|
|
return 0
|
|
sign = -1 if size < base else 1
|
|
endp = 0 if size < base else 36
|
|
diff = (abs(base - size) * 3) + ((36 - size) / 100)
|
|
logb = abs(base - endp)
|
|
if logb == 1.0:
|
|
logb = 1.1
|
|
try:
|
|
result = sign * math.log(diff, logb)
|
|
except ValueError:
|
|
if diff < 0:
|
|
# Size is both very large and close to base
|
|
return 0
|
|
if logb == 0:
|
|
logb = 1e-6
|
|
if diff == 0:
|
|
diff = 1e-6
|
|
result = sign * math.log(diff, logb)
|
|
return result
|
|
|
|
def __getitem__(self, ssize):
|
|
ssize = asfloat(ssize, 0)
|
|
if ssize in self.cache:
|
|
return self.cache[ssize]
|
|
dsize = self.map(ssize)
|
|
self.cache[ssize] = dsize
|
|
return dsize
|
|
|
|
def map(self, ssize):
|
|
sbase = self.sbase
|
|
prop = self.relate(ssize, sbase)
|
|
diff = [(abs(prop - p), s) for p, s in self.dprop]
|
|
dsize = min(diff)[1]
|
|
return dsize
|
|
|
|
|
|
class ScaleMapper(object):
|
|
|
|
def __init__(self, sbase, dbase):
|
|
self.dscale = float(dbase) / float(sbase)
|
|
|
|
def __getitem__(self, ssize):
|
|
ssize = asfloat(ssize, 0)
|
|
dsize = ssize * self.dscale
|
|
return dsize
|
|
|
|
|
|
class NullMapper(object):
|
|
|
|
def __init__(self):
|
|
pass
|
|
|
|
def __getitem__(self, ssize):
|
|
return ssize
|
|
|
|
|
|
def FontMapper(sbase=None, dbase=None, dkey=None):
|
|
if sbase and dbase and dkey:
|
|
return KeyMapper(sbase, dbase, dkey)
|
|
elif sbase and dbase:
|
|
return ScaleMapper(sbase, dbase)
|
|
else:
|
|
return NullMapper()
|
|
|
|
|
|
class EmbedFontsCSSRules(object):
|
|
|
|
def __init__(self, body_font_family, rules):
|
|
self.body_font_family, self.rules = body_font_family, rules
|
|
self.href = None
|
|
|
|
def __call__(self, oeb):
|
|
if not self.body_font_family:
|
|
return None
|
|
if not self.href:
|
|
iid, href = oeb.manifest.generate('page_styles', 'page_styles.css')
|
|
rules = [base.css_text(x) for x in self.rules]
|
|
rules = '\n\n'.join(rules)
|
|
sheet = css_parser.parseString(rules, validate=False)
|
|
self.href = oeb.manifest.add(iid, href,
|
|
mimetypes.guess_type(href)[0],
|
|
data=sheet).href
|
|
return self.href
|
|
|
|
|
|
class CSSFlattener(object):
|
|
|
|
def __init__(self, fbase=None, fkey=None, lineh=None, unfloat=False,
|
|
untable=False, page_break_on_body=False, specializer=None,
|
|
transform_css_rules=()):
|
|
self.fbase = fbase
|
|
self.transform_css_rules = transform_css_rules
|
|
if self.transform_css_rules:
|
|
from ebook_converter.ebooks.css_transform_rules import compile_rules
|
|
self.transform_css_rules = compile_rules(self.transform_css_rules)
|
|
self.fkey = fkey
|
|
self.lineh = lineh
|
|
self.unfloat = unfloat
|
|
self.untable = untable
|
|
self.specializer = specializer
|
|
self.page_break_on_body = page_break_on_body
|
|
|
|
@classmethod
|
|
def config(cls, cfg):
|
|
return cfg
|
|
|
|
@classmethod
|
|
def generate(cls, opts):
|
|
return cls()
|
|
|
|
def __call__(self, oeb, context):
|
|
oeb.logger.info('Flattening CSS and remapping font sizes...')
|
|
self.context = self.opts = context
|
|
self.oeb = oeb
|
|
self.items = list(self.oeb.spine)
|
|
titlepage = self.oeb.guide.get('titlepage')
|
|
if titlepage is not None:
|
|
titlepage = titlepage.item
|
|
if titlepage is not None and titlepage not in self.items:
|
|
self.items.append(titlepage)
|
|
epub3_nav = None
|
|
if getattr(self.opts, 'epub3_nav_href', None):
|
|
epub3_nav = self.oeb.manifest.hrefs.get(self.opts.epub3_nav_href)
|
|
if epub3_nav is not None and epub3_nav not in self.items:
|
|
self.items.append(epub3_nav)
|
|
|
|
self.filter_css = frozenset()
|
|
if self.opts.filter_css:
|
|
try:
|
|
self.filter_css = {x.strip().lower() for x in
|
|
self.opts.filter_css.split(',')}
|
|
except:
|
|
self.oeb.log.warning('Failed to parse filter_css, ignoring')
|
|
else:
|
|
from ebook_converter.ebooks.oeb.normalize_css import normalize_filter_css
|
|
self.filter_css = frozenset(normalize_filter_css(self.filter_css))
|
|
self.oeb.log.debug('Filtering CSS properties: %s'%
|
|
', '.join(self.filter_css))
|
|
|
|
for item in oeb.manifest.values():
|
|
# Make all links to resources absolute, as these sheets will be
|
|
# consolidated into a single stylesheet at the root of the document
|
|
if item.media_type in base.OEB_STYLES:
|
|
css_parser.replaceUrls(item.data, item.abshref,
|
|
ignoreImportRules=True)
|
|
|
|
self.body_font_family, self.embed_font_rules = self.get_embed_font_info(
|
|
self.opts.embed_font_family)
|
|
# Store for use in output plugins/transforms that generate content,
|
|
# like the AZW3 output inline ToC.
|
|
self.oeb.store_embed_font_rules = EmbedFontsCSSRules(self.body_font_family,
|
|
self.embed_font_rules)
|
|
self.stylize_spine()
|
|
self.sbase = self.baseline_spine() if self.fbase else None
|
|
self.fmap = FontMapper(self.sbase, self.fbase, self.fkey)
|
|
self.flatten_spine()
|
|
if epub3_nav is not None:
|
|
self.opts.epub3_nav_parsed = epub3_nav.data
|
|
|
|
self.store_page_margins()
|
|
|
|
def store_page_margins(self):
|
|
self.opts._stored_page_margins = {}
|
|
for item, stylizer in self.stylizers.items():
|
|
margins = self.opts._stored_page_margins[item.href] = {}
|
|
for prop, val in stylizer.page_rule.items():
|
|
p, w = prop.partition('-')[::2]
|
|
if p == 'margin':
|
|
margins[w] = unit_convert(
|
|
val, stylizer.profile.width_pts, stylizer.body_font_size,
|
|
stylizer.profile.dpi, body_font_size=stylizer.body_font_size)
|
|
|
|
def get_embed_font_info(self, family, failure_critical=True):
|
|
efi = []
|
|
body_font_family = None
|
|
if not family:
|
|
return body_font_family, efi
|
|
from ebook_converter.utils.fonts.scanner import font_scanner, NoFonts
|
|
from ebook_converter.utils.fonts.utils import panose_to_css_generic_family
|
|
try:
|
|
faces = font_scanner.fonts_for_family(family)
|
|
except NoFonts:
|
|
msg = ('No embeddable fonts found for family: %r'%family)
|
|
if failure_critical:
|
|
raise ValueError(msg)
|
|
self.oeb.log.warn(msg)
|
|
return body_font_family, efi
|
|
if not faces:
|
|
msg = ('No embeddable fonts found for family: %r'%family)
|
|
if failure_critical:
|
|
raise ValueError(msg)
|
|
self.oeb.log.warn(msg)
|
|
return body_font_family, efi
|
|
|
|
for i, font in enumerate(faces):
|
|
ext = 'otf' if font['is_otf'] else 'ttf'
|
|
fid, href = self.oeb.manifest.generate(id=u'font',
|
|
href='fonts/%s.%s'%(ascii_filename(font['full_name']).replace(' ', '-'), ext))
|
|
item = self.oeb.manifest.add(fid, href,
|
|
mimetypes.guess_type('dummy.'+ext)[0],
|
|
data=font_scanner.get_font_data(font))
|
|
item.unload_data_from_memory()
|
|
|
|
cfont = {
|
|
'font-family': '"%s"'%font['font-family'],
|
|
'panose-1': ' '.join(map(str, font['panose'])),
|
|
'src': 'url(%s)'%item.href,
|
|
}
|
|
|
|
if i == 0:
|
|
generic_family = panose_to_css_generic_family(font['panose'])
|
|
body_font_family = "'%s',%s"%(font['font-family'], generic_family)
|
|
self.oeb.log('Embedding font: %s'%font['font-family'])
|
|
for k in ('font-weight', 'font-style', 'font-stretch'):
|
|
if font[k] != 'normal':
|
|
cfont[k] = font[k]
|
|
rule = '@font-face { %s }'%('; '.join('%s:%s'%(k, v) for k, v in
|
|
cfont.items()))
|
|
rule = css_parser.parseString(rule)
|
|
efi.append(rule)
|
|
|
|
return body_font_family, efi
|
|
|
|
def stylize_spine(self):
|
|
self.stylizers = {}
|
|
profile = self.context.source
|
|
css = ''
|
|
for item in self.items:
|
|
html = item.data
|
|
body = html.find(base.tag('xhtml', 'body'))
|
|
if 'style' in html.attrib:
|
|
b = body.attrib.get('style', '')
|
|
body.set('style', html.get('style') + ';' + b)
|
|
del html.attrib['style']
|
|
bs = body.get('style', '').split(';')
|
|
bs.append('margin-top: 0pt')
|
|
bs.append('margin-bottom: 0pt')
|
|
if float(self.context.margin_left) >= 0:
|
|
bs.append('margin-left : %gpt'%
|
|
float(self.context.margin_left))
|
|
if float(self.context.margin_right) >= 0:
|
|
bs.append('margin-right : %gpt'%
|
|
float(self.context.margin_right))
|
|
bs.extend(['padding-left: 0pt', 'padding-right: 0pt'])
|
|
if self.page_break_on_body:
|
|
bs.extend(['page-break-before: always'])
|
|
if self.context.change_justification != 'original':
|
|
bs.append('text-align: '+ self.context.change_justification)
|
|
if self.body_font_family:
|
|
bs.append('font-family: '+self.body_font_family)
|
|
body.set('style', '; '.join(bs))
|
|
stylizer = Stylizer(html, item.href, self.oeb, self.context, profile,
|
|
user_css=self.context.extra_css,
|
|
extra_css=css)
|
|
self.stylizers[item] = stylizer
|
|
|
|
def baseline_node(self, node, stylizer, sizes, csize):
|
|
csize = stylizer.style(node)['font-size']
|
|
if node.text:
|
|
sizes[csize] += len(COLLAPSE.sub(' ', node.text))
|
|
for child in node:
|
|
self.baseline_node(child, stylizer, sizes, csize)
|
|
if child.tail:
|
|
sizes[csize] += len(COLLAPSE.sub(' ', child.tail))
|
|
|
|
def baseline_spine(self):
|
|
sizes = collections.defaultdict(float)
|
|
for item in self.items:
|
|
html = item.data
|
|
stylizer = self.stylizers[item]
|
|
body = html.find(base.tag('xhtml', 'body'))
|
|
fsize = self.context.source.fbase
|
|
self.baseline_node(body, stylizer, sizes, fsize)
|
|
try:
|
|
sbase = max(list(sizes.items()), key=operator.itemgetter(1))[0]
|
|
except:
|
|
sbase = 12.0
|
|
self.oeb.logger.info(
|
|
"Source base font size is %0.05fpt" % sbase)
|
|
return sbase
|
|
|
|
def clean_edges(self, cssdict, style, fsize):
|
|
slineh = self.sbase * 1.26
|
|
dlineh = self.lineh
|
|
for kind in ('margin', 'padding'):
|
|
for edge in ('bottom', 'top'):
|
|
property = "%s-%s" % (kind, edge)
|
|
if property not in cssdict:
|
|
continue
|
|
if '%' in cssdict[property]:
|
|
continue
|
|
value = style[property]
|
|
if value == 0:
|
|
continue
|
|
elif value <= slineh:
|
|
cssdict[property] = "%0.5fem" % (dlineh / fsize)
|
|
else:
|
|
try:
|
|
value = round(value / slineh) * dlineh
|
|
except:
|
|
self.oeb.logger.warning(
|
|
'Invalid length:', value)
|
|
value = 0.0
|
|
cssdict[property] = "%0.5fem" % (value / fsize)
|
|
|
|
def flatten_node(self, node, stylizer, names, styles, pseudo_styles, psize, item_id, recurse=True):
|
|
if not isinstance(node.tag, (str, bytes)) \
|
|
or parse_utils.namespace(node.tag) != const.XHTML_NS:
|
|
return
|
|
tag = parse_utils.barename(node.tag)
|
|
style = stylizer.style(node)
|
|
cssdict = style.cssdict()
|
|
try:
|
|
font_size = style['font-size']
|
|
except:
|
|
font_size = self.sbase if self.sbase is not None else \
|
|
self.context.source.fbase
|
|
if tag == 'body' and isinstance(font_size, numbers.Number):
|
|
stylizer.body_font_size = font_size
|
|
if 'align' in node.attrib:
|
|
if tag != 'img':
|
|
cssdict['text-align'] = node.attrib['align']
|
|
if cssdict['text-align'] == 'center':
|
|
# align=center causes tables to be center aligned,
|
|
# which text-align does not. And the ever trustworthy Word
|
|
# uses this construct in its HTML output. See
|
|
# https://bugs.launchpad.net/bugs/1569583
|
|
if tag == 'table':
|
|
if 'margin-left' not in cssdict and 'margin-right' not in cssdict:
|
|
cssdict['margin-left'] = cssdict['margin-right'] = 'auto'
|
|
else:
|
|
for table in node.iterchildren(base.tag('xhtml', "table")):
|
|
ts = stylizer.style(table)
|
|
if ts.get('margin-left') is None and ts.get('margin-right') is None:
|
|
ts.set('margin-left', 'auto')
|
|
ts.set('margin-right', 'auto')
|
|
else:
|
|
val = node.attrib['align']
|
|
if val in ('middle', 'bottom', 'top'):
|
|
cssdict['vertical-align'] = val
|
|
elif val in ('left', 'right'):
|
|
cssdict['float'] = val
|
|
del node.attrib['align']
|
|
if 'valign' in node.attrib and tag == 'td':
|
|
if cssdict.get('vertical-align') == 'inherit':
|
|
cssdict['vertical-align'] = node.attrib['valign']
|
|
del node.attrib['valign']
|
|
if node.tag == base.tag('xhtml', 'font'):
|
|
tags = ['descendant::h:%s'%x for x in ('p', 'div', 'table', 'h1',
|
|
'h2', 'h3', 'h4', 'h5', 'h6', 'ol', 'ul', 'dl', 'blockquote')]
|
|
# TODO(gryf): this will override tag from line 355. On purpose?
|
|
tag = 'div' if base.XPath('|'.join(tags))(node) else 'span'
|
|
node.tag = base.tag('xhtml', tag)
|
|
if 'size' in node.attrib:
|
|
def force_int(raw):
|
|
return int(re.search(r'([0-9+-]+)', raw).group(1))
|
|
size = node.attrib['size'].strip()
|
|
if size:
|
|
fnums = self.context.source.fnums
|
|
if size[0] in ('+', '-'):
|
|
# Oh, the warcrimes
|
|
try:
|
|
esize = 3 + force_int(size)
|
|
except:
|
|
esize = 3
|
|
if esize < 1:
|
|
esize = 1
|
|
if esize > 7:
|
|
esize = 7
|
|
font_size = fnums[esize]
|
|
else:
|
|
try:
|
|
font_size = fnums[force_int(size)]
|
|
except:
|
|
font_size = fnums[3]
|
|
cssdict['font-size'] = '%.1fpt'%font_size
|
|
del node.attrib['size']
|
|
if 'face' in node.attrib:
|
|
cssdict['font-family'] = node.attrib['face']
|
|
del node.attrib['face']
|
|
if 'color' in node.attrib:
|
|
try:
|
|
cssdict['color'] = cp_css.Property('color', node.attrib['color']).value
|
|
except (ValueError, dom.SyntaxErr):
|
|
pass
|
|
del node.attrib['color']
|
|
if 'bgcolor' in node.attrib:
|
|
try:
|
|
cssdict['background-color'] = cp_css.Property('background-color', node.attrib['bgcolor']).value
|
|
except (ValueError, dom.SyntaxErr):
|
|
pass
|
|
del node.attrib['bgcolor']
|
|
if tag == 'ol' and 'type' in node.attrib:
|
|
del node.attrib['type']
|
|
if cssdict.get('font-weight', '').lower() == 'medium':
|
|
cssdict['font-weight'] = 'normal' # ADE chokes on font-weight medium
|
|
|
|
fsize = font_size
|
|
is_drop_cap = (cssdict.get('float', None) == 'left' and 'font-size' in cssdict and len(node) == 0 and node.text and (
|
|
len(node.text) == 1 or (len(node.text) == 2 and 0x2000 <= ord(node.text[0]) <= 0x206f)))
|
|
# Detect drop caps generated by the docx input plugin
|
|
if node.tag and node.tag.endswith('}p') and len(node) == 0 and node.text and len(node.text.strip()) == 1 and \
|
|
not node.tail and 'line-height' in cssdict and 'font-size' in cssdict:
|
|
dp = node.getparent()
|
|
if dp.tag and dp.tag.endswith('}div') and len(dp) == 1 and not dp.text:
|
|
if stylizer.style(dp).cssdict().get('float', None) == 'left':
|
|
is_drop_cap = True
|
|
if not self.context.disable_font_rescaling and not is_drop_cap:
|
|
_sbase = self.sbase if self.sbase is not None else \
|
|
self.context.source.fbase
|
|
dyn_rescale = node.attrib.pop('data-calibre-rescale', None)
|
|
if dyn_rescale is not None:
|
|
try:
|
|
dyn_rescale = float(dyn_rescale) / 100
|
|
except Exception:
|
|
dyn_rescale = 1
|
|
fsize = self.fmap[_sbase]
|
|
fsize *= dyn_rescale
|
|
cssdict['font-size'] = '%0.5fem'%(fsize/psize)
|
|
psize = fsize
|
|
elif 'font-size' in cssdict or tag == 'body':
|
|
fsize = self.fmap[font_size]
|
|
try:
|
|
cssdict['font-size'] = "%0.5fem" % (fsize / psize)
|
|
except ZeroDivisionError:
|
|
cssdict['font-size'] = '%.1fpt'%fsize
|
|
psize = fsize
|
|
|
|
try:
|
|
minlh = self.context.minimum_line_height / 100.
|
|
slh = style['line-height']
|
|
if not is_drop_cap and isinstance(slh, numbers.Number) and slh < minlh * fsize:
|
|
cssdict['line-height'] = str(minlh)
|
|
except Exception:
|
|
self.oeb.logger.exception('Failed to set minimum line-height')
|
|
|
|
if cssdict:
|
|
for x in self.filter_css:
|
|
popval = cssdict.pop(x, None)
|
|
if self.body_font_family and popval and x == 'font-family' \
|
|
and popval.partition(',')[0][1:-1] == self.body_font_family.partition(',')[0][1:-1]:
|
|
cssdict[x] = popval
|
|
|
|
if cssdict:
|
|
if self.lineh and self.fbase and tag != 'body':
|
|
self.clean_edges(cssdict, style, psize)
|
|
if 'display' in cssdict and cssdict['display'] == 'in-line':
|
|
cssdict['display'] = 'inline'
|
|
if self.unfloat and 'float' in cssdict \
|
|
and cssdict.get('display', 'none') != 'none':
|
|
del cssdict['display']
|
|
if self.untable and 'display' in cssdict \
|
|
and cssdict['display'].startswith('table'):
|
|
display = cssdict['display']
|
|
if display == 'table-cell':
|
|
cssdict['display'] = 'inline'
|
|
else:
|
|
cssdict['display'] = 'block'
|
|
if 'vertical-align' in cssdict \
|
|
and cssdict['vertical-align'] == 'sup':
|
|
cssdict['vertical-align'] = 'super'
|
|
if self.lineh and 'line-height' not in cssdict:
|
|
lineh = self.lineh / psize
|
|
cssdict['line-height'] = "%0.5fem" % lineh
|
|
|
|
if (self.context.remove_paragraph_spacing or self.context.insert_blank_line) and tag in ('p', 'div'):
|
|
if item_id != 'calibre_jacket' or self.context.output_profile.name == 'Kindle':
|
|
for prop in ('margin', 'padding', 'border'):
|
|
for edge in ('top', 'bottom'):
|
|
cssdict['%s-%s'%(prop, edge)] = '0pt'
|
|
if self.context.insert_blank_line:
|
|
cssdict['margin-top'] = cssdict['margin-bottom'] = \
|
|
'%fem'%self.context.insert_blank_line_size
|
|
indent_size = self.context.remove_paragraph_spacing_indent_size
|
|
keep_indents = indent_size < 0.0
|
|
if (self.context.remove_paragraph_spacing and not keep_indents and cssdict.get('text-align', None) not in ('center', 'right')):
|
|
cssdict['text-indent'] = "%1.1fem" % indent_size
|
|
|
|
pseudo_classes = style.pseudo_classes(self.filter_css)
|
|
if cssdict or pseudo_classes:
|
|
keep_classes = set()
|
|
|
|
if cssdict:
|
|
items = sorted(cssdict.items())
|
|
css = ';\n'.join(u'%s: %s' % (key, val) for key, val in items)
|
|
classes = node.get('class', '').strip() or 'calibre'
|
|
classes_list = classes.split()
|
|
# lower() because otherwise if the document uses the same class
|
|
# name with different case, both cases will apply, leading
|
|
# to incorrect results.
|
|
klass = ascii_text(STRIPNUM.sub('', classes_list[0])).lower().strip().replace(' ', '_')
|
|
if css in styles:
|
|
match = styles[css]
|
|
else:
|
|
match = klass + str(names[klass] or '')
|
|
styles[css] = match
|
|
names[klass] += 1
|
|
node.attrib['class'] = match
|
|
keep_classes.add(match)
|
|
|
|
for psel, cssdict in pseudo_classes.items():
|
|
items = sorted(cssdict.items())
|
|
css = ';\n'.join('%s: %s' % (key, val) for key, val in items)
|
|
pstyles = pseudo_styles[psel]
|
|
if css in pstyles:
|
|
match = pstyles[css]
|
|
else:
|
|
# We have to use a different class for each psel as
|
|
# otherwise you can have incorrect styles for a situation
|
|
# like: a:hover { color: red } a:link { color: blue } a.x:hover { color: green }
|
|
# If the pcalibre class for a:hover and a:link is the same,
|
|
# then the class attribute for a.x tags will contain both
|
|
# that class and the class for a.x:hover, which is wrong.
|
|
klass = 'pcalibre'
|
|
match = klass + str(names[klass] or '')
|
|
pstyles[css] = match
|
|
names[klass] += 1
|
|
keep_classes.add(match)
|
|
node.attrib['class'] = ' '.join(keep_classes)
|
|
|
|
elif 'class' in node.attrib:
|
|
del node.attrib['class']
|
|
if 'style' in node.attrib:
|
|
del node.attrib['style']
|
|
if recurse:
|
|
for child in node:
|
|
self.flatten_node(child, stylizer, names, styles, pseudo_styles, psize, item_id)
|
|
|
|
def flatten_head(self, item, href, global_href):
|
|
html = item.data
|
|
head = html.find(base.tag('xhtml', 'head'))
|
|
|
|
def safe_lower(x):
|
|
try:
|
|
x = x.lower()
|
|
except Exception:
|
|
pass
|
|
return x
|
|
|
|
for node in html.xpath('//*[local-name()="style" or local-name()="link"]'):
|
|
if node.tag == base.tag('xhtml', 'link') \
|
|
and safe_lower(node.get('rel', 'stylesheet')) == 'stylesheet' \
|
|
and safe_lower(node.get('type', base.CSS_MIME)) in base.OEB_STYLES:
|
|
node.getparent().remove(node)
|
|
elif node.tag == base.tag('xhtml', 'style') \
|
|
and node.get('type', base.CSS_MIME) in base.OEB_STYLES:
|
|
node.getparent().remove(node)
|
|
href = item.relhref(href)
|
|
l = etree.SubElement(head, base.tag('xhtml', 'link'),
|
|
rel='stylesheet', type=base.CSS_MIME, href=href)
|
|
l.tail='\n'
|
|
if global_href:
|
|
href = item.relhref(global_href)
|
|
l = etree.SubElement(head, base.tag('xhtml', 'link'),
|
|
rel='stylesheet', type=base.CSS_MIME, href=href)
|
|
l.tail = '\n'
|
|
|
|
def replace_css(self, css):
|
|
manifest = self.oeb.manifest
|
|
for item in manifest.values():
|
|
if item.media_type in base.OEB_STYLES:
|
|
manifest.remove(item)
|
|
id, href = manifest.generate('css', 'stylesheet.css')
|
|
sheet = css_parser.parseString(css, validate=False)
|
|
if self.transform_css_rules:
|
|
from ebook_converter.ebooks.css_transform_rules import transform_sheet
|
|
transform_sheet(self.transform_css_rules, sheet)
|
|
item = manifest.add(id, href, base.CSS_MIME, data=sheet)
|
|
self.oeb.manifest.main_stylesheet = item
|
|
return href
|
|
|
|
def collect_global_css(self):
|
|
global_css = collections.defaultdict(list)
|
|
for item in self.items:
|
|
stylizer = self.stylizers[item]
|
|
if float(self.context.margin_top) >= 0:
|
|
stylizer.page_rule['margin-top'] = '%gpt'%\
|
|
float(self.context.margin_top)
|
|
if float(self.context.margin_bottom) >= 0:
|
|
stylizer.page_rule['margin-bottom'] = '%gpt'%\
|
|
float(self.context.margin_bottom)
|
|
items = sorted(stylizer.page_rule.items())
|
|
css = ';\n'.join("%s: %s" % (key, val) for key, val in items)
|
|
css = ('@page {\n%s\n}\n'%css) if items else ''
|
|
rules = [base.css_text(r) for r in stylizer.font_face_rules + self.embed_font_rules]
|
|
raw = '\n\n'.join(rules)
|
|
css += '\n\n' + raw
|
|
global_css[css].append(item)
|
|
|
|
gc_map = {}
|
|
manifest = self.oeb.manifest
|
|
for css in global_css:
|
|
href = None
|
|
if css.strip():
|
|
id_, href = manifest.generate('page_css', 'page_styles.css')
|
|
sheet = css_parser.parseString(css, validate=False)
|
|
if self.transform_css_rules:
|
|
from ebook_converter.ebooks.css_transform_rules import transform_sheet
|
|
transform_sheet(self.transform_css_rules, sheet)
|
|
manifest.add(id_, href, base.CSS_MIME, data=sheet)
|
|
gc_map[css] = href
|
|
|
|
ans = {}
|
|
for css, items in global_css.items():
|
|
for item in items:
|
|
ans[item] = gc_map[css]
|
|
return ans
|
|
|
|
def flatten_spine(self):
|
|
names = collections.defaultdict(int)
|
|
styles, pseudo_styles = {}, collections.defaultdict(dict)
|
|
for item in self.items:
|
|
html = item.data
|
|
stylizer = self.stylizers[item]
|
|
if self.specializer is not None:
|
|
self.specializer(item, stylizer)
|
|
fsize = self.context.dest.fbase
|
|
self.flatten_node(html, stylizer, names, styles, pseudo_styles, fsize, item.id, recurse=False)
|
|
self.flatten_node(html.find(base.tag('xhtml', 'body')), stylizer, names, styles, pseudo_styles, fsize, item.id)
|
|
items = sorted(((key, val) for (val, key) in styles.items()))
|
|
# :hover must come after link and :active must come after :hover
|
|
psels = sorted(pseudo_styles, key=lambda x :
|
|
{'hover':1, 'active':2}.get(x, 0))
|
|
for psel in psels:
|
|
styles = pseudo_styles[psel]
|
|
if not styles:
|
|
continue
|
|
x = sorted(((k+':'+psel, v) for v, k in styles.items()))
|
|
items.extend(x)
|
|
|
|
css = ''.join(".%s {\n%s;\n}\n\n" % (key, val) for key, val in items)
|
|
|
|
href = self.replace_css(css)
|
|
global_css = self.collect_global_css()
|
|
for item in self.items:
|
|
stylizer = self.stylizers[item]
|
|
self.flatten_head(item, href, global_css[item])
|