mirror of
https://github.com/gryf/ebook-converter.git
synced 2026-01-08 12:44:12 +01:00
314 lines
11 KiB
Python
314 lines
11 KiB
Python
from collections import defaultdict
|
|
|
|
from ebook_converter.ebooks.oeb.base import urlnormalize, css_text
|
|
from ebook_converter.utils.fonts.sfnt.subset import subset, NoGlyphs, UnsupportedFont
|
|
from ebook_converter.polyglot.builtins import iteritems, itervalues
|
|
from ebook_converter.tinycss.fonts3 import parse_font_family
|
|
|
|
|
|
__license__ = 'GPL v3'
|
|
__copyright__ = '2012, Kovid Goyal <kovid at kovidgoyal.net>'
|
|
__docformat__ = 'restructuredtext en'
|
|
|
|
|
|
def get_font_properties(rule, default=None):
|
|
'''
|
|
Given a CSS rule, extract normalized font properties from
|
|
it. Note that shorthand font property should already have been expanded
|
|
by the CSS flattening code.
|
|
'''
|
|
props = {}
|
|
s = rule.style
|
|
for q in ('font-family', 'src', 'font-weight', 'font-stretch',
|
|
'font-style'):
|
|
g = 'uri' if q == 'src' else 'value'
|
|
try:
|
|
val = s.getProperty(q).propertyValue[0]
|
|
val = getattr(val, g)
|
|
if q == 'font-family':
|
|
val = parse_font_family(css_text(s.getProperty(q).propertyValue))
|
|
if val and val[0] == 'inherit':
|
|
val = None
|
|
except (IndexError, KeyError, AttributeError, TypeError, ValueError):
|
|
val = None if q in {'src', 'font-family'} else default
|
|
if q in {'font-weight', 'font-stretch', 'font-style'}:
|
|
val = str(val).lower() if (val or val == 0) else val
|
|
if val == 'inherit':
|
|
val = default
|
|
if q == 'font-weight':
|
|
val = {'normal':'400', 'bold':'700'}.get(val, val)
|
|
if val not in {'100', '200', '300', '400', '500', '600', '700',
|
|
'800', '900', 'bolder', 'lighter'}:
|
|
val = default
|
|
if val == 'normal':
|
|
val = '400'
|
|
elif q == 'font-style':
|
|
if val not in {'normal', 'italic', 'oblique'}:
|
|
val = default
|
|
elif q == 'font-stretch':
|
|
if val not in {'normal', 'ultra-condensed', 'extra-condensed',
|
|
'condensed', 'semi-condensed', 'semi-expanded',
|
|
'expanded', 'extra-expanded', 'ultra-expanded'}:
|
|
val = default
|
|
props[q] = val
|
|
return props
|
|
|
|
|
|
def find_font_face_rules(sheet, oeb):
|
|
'''
|
|
Find all @font-face rules in the given sheet and extract the relevant info from them.
|
|
sheet can be either a ManifestItem or a CSSStyleSheet.
|
|
'''
|
|
ans = []
|
|
try:
|
|
rules = sheet.data.cssRules
|
|
except AttributeError:
|
|
rules = sheet.cssRules
|
|
|
|
for i, rule in enumerate(rules):
|
|
if rule.type != rule.FONT_FACE_RULE:
|
|
continue
|
|
props = get_font_properties(rule, default='normal')
|
|
if not props['font-family'] or not props['src']:
|
|
continue
|
|
|
|
try:
|
|
path = sheet.abshref(props['src'])
|
|
except AttributeError:
|
|
path = props['src']
|
|
ff = oeb.manifest.hrefs.get(urlnormalize(path), None)
|
|
if not ff:
|
|
continue
|
|
props['item'] = ff
|
|
if props['font-weight'] in {'bolder', 'lighter'}:
|
|
props['font-weight'] = '400'
|
|
props['weight'] = int(props['font-weight'])
|
|
props['rule'] = rule
|
|
props['chars'] = set()
|
|
ans.append(props)
|
|
|
|
return ans
|
|
|
|
|
|
def elem_style(style_rules, cls, inherited_style):
|
|
'''
|
|
Find the effective style for the given element.
|
|
'''
|
|
classes = cls.split()
|
|
style = inherited_style.copy()
|
|
for cls in classes:
|
|
style.update(style_rules.get(cls, {}))
|
|
wt = style.get('font-weight', None)
|
|
pwt = inherited_style.get('font-weight', '400')
|
|
if wt == 'bolder':
|
|
style['font-weight'] = {
|
|
'100':'400',
|
|
'200':'400',
|
|
'300':'400',
|
|
'400':'700',
|
|
'500':'700',
|
|
}.get(pwt, '900')
|
|
elif wt == 'lighter':
|
|
style['font-weight'] = {
|
|
'600':'400', '700':'400',
|
|
'800':'700', '900':'700'}.get(pwt, '100')
|
|
|
|
return style
|
|
|
|
|
|
class SubsetFonts(object):
|
|
|
|
'''
|
|
Subset all embedded fonts. Must be run after CSS flattening, as it requires
|
|
CSS normalization and flattening to work.
|
|
'''
|
|
|
|
def __call__(self, oeb, log, opts):
|
|
self.oeb, self.log, self.opts = oeb, log, opts
|
|
|
|
self.find_embedded_fonts()
|
|
if not self.embedded_fonts:
|
|
self.log.debug('No embedded fonts found')
|
|
return
|
|
self.find_style_rules()
|
|
self.find_font_usage()
|
|
|
|
totals = [0, 0]
|
|
|
|
def remove(font):
|
|
totals[1] += len(font['item'].data)
|
|
self.oeb.manifest.remove(font['item'])
|
|
font['rule'].parentStyleSheet.deleteRule(font['rule'])
|
|
|
|
fonts = {}
|
|
for font in self.embedded_fonts:
|
|
item, chars = font['item'], font['chars']
|
|
if item.href in fonts:
|
|
fonts[item.href]['chars'] |= chars
|
|
else:
|
|
fonts[item.href] = font
|
|
|
|
for font in itervalues(fonts):
|
|
if not font['chars']:
|
|
self.log('The font %s is unused. Removing it.'%font['src'])
|
|
remove(font)
|
|
continue
|
|
try:
|
|
raw, old_stats, new_stats = subset(font['item'].data, font['chars'])
|
|
except NoGlyphs:
|
|
self.log('The font %s has no used glyphs. Removing it.'%font['src'])
|
|
remove(font)
|
|
continue
|
|
except UnsupportedFont as e:
|
|
self.log.warn('The font %s is unsupported for subsetting. %s'%(
|
|
font['src'], e))
|
|
sz = len(font['item'].data)
|
|
totals[0] += sz
|
|
totals[1] += sz
|
|
else:
|
|
font['item'].data = raw
|
|
nlen = sum(itervalues(new_stats))
|
|
olen = sum(itervalues(old_stats))
|
|
self.log('Decreased the font %s to %.1f%% of its original size'%
|
|
(font['src'], nlen/olen *100))
|
|
totals[0] += nlen
|
|
totals[1] += olen
|
|
|
|
font['item'].unload_data_from_memory()
|
|
|
|
if totals[0]:
|
|
self.log('Reduced total font size to %.1f%% of original'%
|
|
(totals[0]/totals[1] * 100))
|
|
|
|
def find_embedded_fonts(self):
|
|
'''
|
|
Find all @font-face rules and extract the relevant info from them.
|
|
'''
|
|
self.embedded_fonts = []
|
|
for item in self.oeb.manifest:
|
|
if not hasattr(item.data, 'cssRules'):
|
|
continue
|
|
self.embedded_fonts.extend(find_font_face_rules(item, self.oeb))
|
|
|
|
def find_style_rules(self):
|
|
'''
|
|
Extract all font related style information from all stylesheets into a
|
|
dict mapping classes to font properties specified by that class. All
|
|
the heavy lifting has already been done by the CSS flattening code.
|
|
'''
|
|
rules = defaultdict(dict)
|
|
for item in self.oeb.manifest:
|
|
if not hasattr(item.data, 'cssRules'):
|
|
continue
|
|
for i, rule in enumerate(item.data.cssRules):
|
|
if rule.type != rule.STYLE_RULE:
|
|
continue
|
|
props = {k:v for k,v in
|
|
iteritems(get_font_properties(rule)) if v}
|
|
if not props:
|
|
continue
|
|
for sel in rule.selectorList:
|
|
sel = sel.selectorText
|
|
if sel and sel.startswith('.'):
|
|
# We dont care about pseudo-selectors as the worst that
|
|
# can happen is some extra characters will remain in
|
|
# the font
|
|
sel = sel.partition(':')[0]
|
|
rules[sel[1:]].update(props)
|
|
|
|
self.style_rules = dict(rules)
|
|
|
|
def find_font_usage(self):
|
|
for item in self.oeb.manifest:
|
|
if not hasattr(item.data, 'xpath'):
|
|
continue
|
|
for body in item.data.xpath('//*[local-name()="body"]'):
|
|
base = {'font-family':['serif'], 'font-weight': '400',
|
|
'font-style':'normal', 'font-stretch':'normal'}
|
|
self.find_usage_in(body, base)
|
|
|
|
def used_font(self, style):
|
|
'''
|
|
Given a style find the embedded font that matches it. Returns None if
|
|
no match is found (can happen if no family matches).
|
|
'''
|
|
ff = style.get('font-family', [])
|
|
lnames = {str(x).lower() for x in ff}
|
|
matching_set = []
|
|
|
|
# Filter on font-family
|
|
for ef in self.embedded_fonts:
|
|
flnames = {x.lower() for x in ef.get('font-family', [])}
|
|
if not lnames.intersection(flnames):
|
|
continue
|
|
matching_set.append(ef)
|
|
if not matching_set:
|
|
return None
|
|
|
|
# Filter on font-stretch
|
|
widths = {x:i for i, x in enumerate(('ultra-condensed',
|
|
'extra-condensed', 'condensed', 'semi-condensed', 'normal',
|
|
'semi-expanded', 'expanded', 'extra-expanded', 'ultra-expanded'
|
|
))}
|
|
|
|
width = widths[style.get('font-stretch', 'normal')]
|
|
for f in matching_set:
|
|
f['width'] = widths[style.get('font-stretch', 'normal')]
|
|
|
|
min_dist = min(abs(width-f['width']) for f in matching_set)
|
|
nearest = [f for f in matching_set if abs(width-f['width']) ==
|
|
min_dist]
|
|
if width <= 4:
|
|
lmatches = [f for f in nearest if f['width'] <= width]
|
|
else:
|
|
lmatches = [f for f in nearest if f['width'] >= width]
|
|
matching_set = (lmatches or nearest)
|
|
|
|
# Filter on font-style
|
|
fs = style.get('font-style', 'normal')
|
|
order = {
|
|
'oblique':['oblique', 'italic', 'normal'],
|
|
'normal':['normal', 'oblique', 'italic']
|
|
}.get(fs, ['italic', 'oblique', 'normal'])
|
|
for q in order:
|
|
matches = [f for f in matching_set if f.get('font-style', 'normal') == q]
|
|
if matches:
|
|
matching_set = matches
|
|
break
|
|
|
|
# Filter on font weight
|
|
fw = int(style.get('font-weight', '400'))
|
|
if fw == 400:
|
|
q = [400, 500, 300, 200, 100, 600, 700, 800, 900]
|
|
elif fw == 500:
|
|
q = [500, 400, 300, 200, 100, 600, 700, 800, 900]
|
|
elif fw < 400:
|
|
q = [fw] + list(range(fw-100, -100, -100)) + list(range(fw+100,
|
|
100, 1000))
|
|
else:
|
|
q = [fw] + list(range(fw+100, 100, 1000)) + list(range(fw-100,
|
|
-100, -100))
|
|
for wt in q:
|
|
matches = [f for f in matching_set if f['weight'] == wt]
|
|
if matches:
|
|
return matches[0]
|
|
|
|
def find_chars(self, elem):
|
|
ans = set()
|
|
if elem.text:
|
|
ans |= set(elem.text)
|
|
for child in elem:
|
|
if child.tail:
|
|
ans |= set(child.tail)
|
|
return ans
|
|
|
|
def find_usage_in(self, elem, inherited_style):
|
|
style = elem_style(self.style_rules, elem.get('class', '') or '', inherited_style)
|
|
for child in elem:
|
|
self.find_usage_in(child, style)
|
|
font = self.used_font(style)
|
|
if font:
|
|
chars = self.find_chars(elem)
|
|
if chars:
|
|
font['chars'] |= chars
|