mirror of
https://github.com/gryf/ebook-converter.git
synced 2025-12-27 19:42:26 +01:00
Added docx writer related modules
This commit is contained in:
9
ebook_converter/ebooks/docx/writer/__init__.py
Normal file
9
ebook_converter/ebooks/docx/writer/__init__.py
Normal file
@@ -0,0 +1,9 @@
|
||||
#!/usr/bin/env python2
|
||||
# vim:fileencoding=utf-8
|
||||
from __future__ import absolute_import, division, print_function, unicode_literals
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2013, Kovid Goyal <kovid at kovidgoyal.net>'
|
||||
|
||||
|
||||
|
||||
281
ebook_converter/ebooks/docx/writer/container.py
Normal file
281
ebook_converter/ebooks/docx/writer/container.py
Normal file
@@ -0,0 +1,281 @@
|
||||
#!/usr/bin/env python2
|
||||
# vim:fileencoding=utf-8
|
||||
from __future__ import absolute_import, division, print_function, unicode_literals
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2013, Kovid Goyal <kovid at kovidgoyal.net>'
|
||||
|
||||
import textwrap, os
|
||||
|
||||
from lxml import etree
|
||||
from lxml.builder import ElementMaker
|
||||
|
||||
from calibre import guess_type
|
||||
from calibre.constants import numeric_version, __appname__
|
||||
from calibre.ebooks.docx.names import DOCXNamespace
|
||||
from calibre.ebooks.metadata import authors_to_string
|
||||
from calibre.ebooks.pdf.render.common import PAPER_SIZES
|
||||
from calibre.utils.date import utcnow
|
||||
from calibre.utils.localization import canonicalize_lang, lang_as_iso639_1
|
||||
from calibre.utils.zipfile import ZipFile
|
||||
from polyglot.builtins import iteritems, map, unicode_type, native_string_type
|
||||
|
||||
|
||||
def xml2str(root, pretty_print=False, with_tail=False):
|
||||
if hasattr(etree, 'cleanup_namespaces'):
|
||||
etree.cleanup_namespaces(root)
|
||||
ans = etree.tostring(root, encoding='utf-8', xml_declaration=True,
|
||||
pretty_print=pretty_print, with_tail=with_tail)
|
||||
return ans
|
||||
|
||||
|
||||
def page_size(opts):
|
||||
width, height = PAPER_SIZES[opts.docx_page_size]
|
||||
if opts.docx_custom_page_size is not None:
|
||||
width, height = map(float, opts.docx_custom_page_size.partition('x')[0::2])
|
||||
return width, height
|
||||
|
||||
|
||||
def page_margin(opts, which):
|
||||
val = getattr(opts, 'docx_page_margin_' + which)
|
||||
if val == 0.0:
|
||||
val = getattr(opts, 'margin_' + which)
|
||||
return val
|
||||
|
||||
|
||||
def page_effective_area(opts):
|
||||
width, height = page_size(opts)
|
||||
width -= page_margin(opts, 'left') + page_margin(opts, 'right')
|
||||
height -= page_margin(opts, 'top') + page_margin(opts, 'bottom')
|
||||
return width, height # in pts
|
||||
|
||||
|
||||
def create_skeleton(opts, namespaces=None):
|
||||
namespaces = namespaces or DOCXNamespace().namespaces
|
||||
|
||||
def w(x):
|
||||
return '{%s}%s' % (namespaces['w'], x)
|
||||
dn = {k:v for k, v in iteritems(namespaces) if k in {'w', 'r', 'm', 've', 'o', 'wp', 'w10', 'wne', 'a', 'pic'}}
|
||||
E = ElementMaker(namespace=dn['w'], nsmap=dn)
|
||||
doc = E.document()
|
||||
body = E.body()
|
||||
doc.append(body)
|
||||
width, height = page_size(opts)
|
||||
width, height = int(20 * width), int(20 * height)
|
||||
|
||||
def margin(which):
|
||||
val = page_margin(opts, which)
|
||||
return w(which), unicode_type(int(val * 20))
|
||||
body.append(E.sectPr(
|
||||
E.pgSz(**{w('w'):unicode_type(width), w('h'):unicode_type(height)}),
|
||||
E.pgMar(**dict(map(margin, 'left top right bottom'.split()))),
|
||||
E.cols(**{w('space'):'720'}),
|
||||
E.docGrid(**{w('linePitch'):"360"}),
|
||||
))
|
||||
|
||||
dn = {k:v for k, v in iteritems(namespaces) if k in tuple('wra') + ('wp',)}
|
||||
E = ElementMaker(namespace=dn['w'], nsmap=dn)
|
||||
styles = E.styles(
|
||||
E.docDefaults(
|
||||
E.rPrDefault(
|
||||
E.rPr(
|
||||
E.rFonts(**{w('asciiTheme'):"minorHAnsi", w('eastAsiaTheme'):"minorEastAsia", w('hAnsiTheme'):"minorHAnsi", w('cstheme'):"minorBidi"}),
|
||||
E.sz(**{w('val'):'22'}),
|
||||
E.szCs(**{w('val'):'22'}),
|
||||
E.lang(**{w('val'):'en-US', w('eastAsia'):"en-US", w('bidi'):"ar-SA"})
|
||||
)
|
||||
),
|
||||
E.pPrDefault(
|
||||
E.pPr(
|
||||
E.spacing(**{w('after'):"0", w('line'):"276", w('lineRule'):"auto"})
|
||||
)
|
||||
)
|
||||
)
|
||||
)
|
||||
return doc, styles, body
|
||||
|
||||
|
||||
def update_doc_props(root, mi, namespace):
|
||||
def setm(name, text=None, ns='dc'):
|
||||
ans = root.makeelement('{%s}%s' % (namespace.namespaces[ns], name))
|
||||
for child in tuple(root):
|
||||
if child.tag == ans.tag:
|
||||
root.remove(child)
|
||||
ans.text = text
|
||||
root.append(ans)
|
||||
return ans
|
||||
setm('title', mi.title)
|
||||
setm('creator', authors_to_string(mi.authors))
|
||||
if mi.tags:
|
||||
setm('keywords', ', '.join(mi.tags), ns='cp')
|
||||
if mi.comments:
|
||||
setm('description', mi.comments)
|
||||
if mi.languages:
|
||||
l = canonicalize_lang(mi.languages[0])
|
||||
setm('language', lang_as_iso639_1(l) or l)
|
||||
|
||||
|
||||
class DocumentRelationships(object):
|
||||
|
||||
def __init__(self, namespace):
|
||||
self.rmap = {}
|
||||
self.namespace = namespace
|
||||
for typ, target in iteritems({
|
||||
namespace.names['STYLES']: 'styles.xml',
|
||||
namespace.names['NUMBERING']: 'numbering.xml',
|
||||
namespace.names['WEB_SETTINGS']: 'webSettings.xml',
|
||||
namespace.names['FONTS']: 'fontTable.xml',
|
||||
}):
|
||||
self.add_relationship(target, typ)
|
||||
|
||||
def get_relationship_id(self, target, rtype, target_mode=None):
|
||||
return self.rmap.get((target, rtype, target_mode))
|
||||
|
||||
def add_relationship(self, target, rtype, target_mode=None):
|
||||
ans = self.get_relationship_id(target, rtype, target_mode)
|
||||
if ans is None:
|
||||
ans = 'rId%d' % (len(self.rmap) + 1)
|
||||
self.rmap[(target, rtype, target_mode)] = ans
|
||||
return ans
|
||||
|
||||
def add_image(self, target):
|
||||
return self.add_relationship(target, self.namespace.names['IMAGES'])
|
||||
|
||||
def serialize(self):
|
||||
namespaces = self.namespace.namespaces
|
||||
E = ElementMaker(namespace=namespaces['pr'], nsmap={None:namespaces['pr']})
|
||||
relationships = E.Relationships()
|
||||
for (target, rtype, target_mode), rid in iteritems(self.rmap):
|
||||
r = E.Relationship(Id=rid, Type=rtype, Target=target)
|
||||
if target_mode is not None:
|
||||
r.set('TargetMode', target_mode)
|
||||
relationships.append(r)
|
||||
return xml2str(relationships)
|
||||
|
||||
|
||||
class DOCX(object):
|
||||
|
||||
def __init__(self, opts, log):
|
||||
self.namespace = DOCXNamespace()
|
||||
namespaces = self.namespace.namespaces
|
||||
self.opts, self.log = opts, log
|
||||
self.document_relationships = DocumentRelationships(self.namespace)
|
||||
self.font_table = etree.Element('{%s}fonts' % namespaces['w'], nsmap={k:namespaces[k] for k in 'wr'})
|
||||
self.numbering = etree.Element('{%s}numbering' % namespaces['w'], nsmap={k:namespaces[k] for k in 'wr'})
|
||||
E = ElementMaker(namespace=namespaces['pr'], nsmap={None:namespaces['pr']})
|
||||
self.embedded_fonts = E.Relationships()
|
||||
self.fonts = {}
|
||||
self.images = {}
|
||||
|
||||
# Boilerplate {{{
|
||||
@property
|
||||
def contenttypes(self):
|
||||
E = ElementMaker(namespace=self.namespace.namespaces['ct'], nsmap={None:self.namespace.namespaces['ct']})
|
||||
types = E.Types()
|
||||
for partname, mt in iteritems({
|
||||
"/word/footnotes.xml": "application/vnd.openxmlformats-officedocument.wordprocessingml.footnotes+xml",
|
||||
"/word/document.xml": "application/vnd.openxmlformats-officedocument.wordprocessingml.document.main+xml",
|
||||
"/word/numbering.xml": "application/vnd.openxmlformats-officedocument.wordprocessingml.numbering+xml",
|
||||
"/word/styles.xml": "application/vnd.openxmlformats-officedocument.wordprocessingml.styles+xml",
|
||||
"/word/endnotes.xml": "application/vnd.openxmlformats-officedocument.wordprocessingml.endnotes+xml",
|
||||
"/word/settings.xml": "application/vnd.openxmlformats-officedocument.wordprocessingml.settings+xml",
|
||||
"/word/theme/theme1.xml": "application/vnd.openxmlformats-officedocument.theme+xml",
|
||||
"/word/fontTable.xml": "application/vnd.openxmlformats-officedocument.wordprocessingml.fontTable+xml",
|
||||
"/word/webSettings.xml": "application/vnd.openxmlformats-officedocument.wordprocessingml.webSettings+xml",
|
||||
"/docProps/core.xml": "application/vnd.openxmlformats-package.core-properties+xml",
|
||||
"/docProps/app.xml": "application/vnd.openxmlformats-officedocument.extended-properties+xml",
|
||||
}):
|
||||
types.append(E.Override(PartName=partname, ContentType=mt))
|
||||
added = {'png', 'gif', 'jpeg', 'jpg', 'svg', 'xml'}
|
||||
for ext in added:
|
||||
types.append(E.Default(Extension=ext, ContentType=guess_type('a.'+ext)[0]))
|
||||
for ext, mt in iteritems({
|
||||
"rels": "application/vnd.openxmlformats-package.relationships+xml",
|
||||
"odttf": "application/vnd.openxmlformats-officedocument.obfuscatedFont",
|
||||
}):
|
||||
added.add(ext)
|
||||
types.append(E.Default(Extension=ext, ContentType=mt))
|
||||
for fname in self.images:
|
||||
ext = fname.rpartition(os.extsep)[-1]
|
||||
if ext not in added:
|
||||
added.add(ext)
|
||||
mt = guess_type('a.' + ext)[0]
|
||||
if mt:
|
||||
types.append(E.Default(Extension=ext, ContentType=mt))
|
||||
return xml2str(types)
|
||||
|
||||
@property
|
||||
def appproperties(self):
|
||||
E = ElementMaker(namespace=self.namespace.namespaces['ep'], nsmap={None:self.namespace.namespaces['ep']})
|
||||
props = E.Properties(
|
||||
E.Application(__appname__),
|
||||
E.AppVersion('%02d.%04d' % numeric_version[:2]),
|
||||
E.DocSecurity('0'),
|
||||
E.HyperlinksChanged('false'),
|
||||
E.LinksUpToDate('true'),
|
||||
E.ScaleCrop('false'),
|
||||
E.SharedDoc('false'),
|
||||
)
|
||||
if self.mi.publisher:
|
||||
props.append(E.Company(self.mi.publisher))
|
||||
return xml2str(props)
|
||||
|
||||
@property
|
||||
def containerrels(self):
|
||||
return textwrap.dedent('''\
|
||||
<?xml version='1.0' encoding='utf-8'?>
|
||||
<Relationships xmlns="http://schemas.openxmlformats.org/package/2006/relationships">
|
||||
<Relationship Id="rId3" Type="{APPPROPS}" Target="docProps/app.xml"/>
|
||||
<Relationship Id="rId2" Type="{DOCPROPS}" Target="docProps/core.xml"/>
|
||||
<Relationship Id="rId1" Type="{DOCUMENT}" Target="word/document.xml"/>
|
||||
</Relationships>'''.format(**self.namespace.names)).encode('utf-8')
|
||||
|
||||
@property
|
||||
def websettings(self):
|
||||
E = ElementMaker(namespace=self.namespace.namespaces['w'], nsmap={'w':self.namespace.namespaces['w']})
|
||||
ws = E.webSettings(
|
||||
E.optimizeForBrowser, E.allowPNG, E.doNotSaveAsSingleFile)
|
||||
return xml2str(ws)
|
||||
|
||||
# }}}
|
||||
|
||||
def convert_metadata(self, mi):
|
||||
namespaces = self.namespace.namespaces
|
||||
E = ElementMaker(namespace=namespaces['cp'], nsmap={x:namespaces[x] for x in 'cp dc dcterms xsi'.split()})
|
||||
cp = E.coreProperties(E.revision("1"), E.lastModifiedBy('calibre'))
|
||||
ts = utcnow().isoformat(native_string_type('T')).rpartition('.')[0] + 'Z'
|
||||
for x in 'created modified'.split():
|
||||
x = cp.makeelement('{%s}%s' % (namespaces['dcterms'], x), **{'{%s}type' % namespaces['xsi']:'dcterms:W3CDTF'})
|
||||
x.text = ts
|
||||
cp.append(x)
|
||||
self.mi = mi
|
||||
update_doc_props(cp, self.mi, self.namespace)
|
||||
return xml2str(cp)
|
||||
|
||||
def create_empty_document(self, mi):
|
||||
self.document, self.styles = create_skeleton(self.opts)[:2]
|
||||
|
||||
def write(self, path_or_stream, mi, create_empty_document=False):
|
||||
if create_empty_document:
|
||||
self.create_empty_document(mi)
|
||||
with ZipFile(path_or_stream, 'w') as zf:
|
||||
zf.writestr('[Content_Types].xml', self.contenttypes)
|
||||
zf.writestr('_rels/.rels', self.containerrels)
|
||||
zf.writestr('docProps/core.xml', self.convert_metadata(mi))
|
||||
zf.writestr('docProps/app.xml', self.appproperties)
|
||||
zf.writestr('word/webSettings.xml', self.websettings)
|
||||
zf.writestr('word/document.xml', xml2str(self.document))
|
||||
zf.writestr('word/styles.xml', xml2str(self.styles))
|
||||
zf.writestr('word/numbering.xml', xml2str(self.numbering))
|
||||
zf.writestr('word/fontTable.xml', xml2str(self.font_table))
|
||||
zf.writestr('word/_rels/document.xml.rels', self.document_relationships.serialize())
|
||||
zf.writestr('word/_rels/fontTable.xml.rels', xml2str(self.embedded_fonts))
|
||||
for fname, data_getter in iteritems(self.images):
|
||||
zf.writestr(fname, data_getter())
|
||||
for fname, data in iteritems(self.fonts):
|
||||
zf.writestr(fname, data)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
d = DOCX(None, None)
|
||||
print(d.websettings)
|
||||
78
ebook_converter/ebooks/docx/writer/fonts.py
Normal file
78
ebook_converter/ebooks/docx/writer/fonts.py
Normal file
@@ -0,0 +1,78 @@
|
||||
#!/usr/bin/env python2
|
||||
# vim:fileencoding=utf-8
|
||||
from __future__ import absolute_import, division, print_function, unicode_literals
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2015, Kovid Goyal <kovid at kovidgoyal.net>'
|
||||
|
||||
from collections import defaultdict
|
||||
from uuid import uuid4
|
||||
|
||||
from calibre.ebooks.oeb.base import OEB_STYLES
|
||||
from calibre.ebooks.oeb.transforms.subset import find_font_face_rules
|
||||
from polyglot.builtins import range
|
||||
|
||||
|
||||
def obfuscate_font_data(data, key):
|
||||
prefix = bytearray(data[:32])
|
||||
key = bytearray(reversed(key.bytes))
|
||||
prefix = bytes(bytearray(prefix[i]^key[i % len(key)] for i in range(len(prefix))))
|
||||
return prefix + data[32:]
|
||||
|
||||
|
||||
class FontsManager(object):
|
||||
|
||||
def __init__(self, namespace, oeb, opts):
|
||||
self.namespace = namespace
|
||||
self.oeb, self.log, self.opts = oeb, oeb.log, opts
|
||||
|
||||
def serialize(self, text_styles, fonts, embed_relationships, font_data_map):
|
||||
makeelement = self.namespace.makeelement
|
||||
font_families, seen = set(), set()
|
||||
for ts in text_styles:
|
||||
if ts.font_family:
|
||||
lf = ts.font_family.lower()
|
||||
if lf not in seen:
|
||||
seen.add(lf)
|
||||
font_families.add(ts.font_family)
|
||||
family_map = {}
|
||||
for family in sorted(font_families):
|
||||
family_map[family] = makeelement(fonts, 'w:font', w_name=family)
|
||||
|
||||
embedded_fonts = []
|
||||
for item in self.oeb.manifest:
|
||||
if item.media_type in OEB_STYLES and hasattr(item.data, 'cssRules'):
|
||||
embedded_fonts.extend(find_font_face_rules(item, self.oeb))
|
||||
|
||||
num = 0
|
||||
face_map = defaultdict(set)
|
||||
rel_map = {}
|
||||
for ef in embedded_fonts:
|
||||
ff = ef['font-family'][0]
|
||||
if ff not in font_families:
|
||||
continue
|
||||
num += 1
|
||||
bold = ef['weight'] > 400
|
||||
italic = ef['font-style'] != 'normal'
|
||||
tag = 'Regular'
|
||||
if bold or italic:
|
||||
tag = 'Italic'
|
||||
if bold and italic:
|
||||
tag = 'BoldItalic'
|
||||
elif bold:
|
||||
tag = 'Bold'
|
||||
if tag in face_map[ff]:
|
||||
continue
|
||||
face_map[ff].add(tag)
|
||||
font = family_map[ff]
|
||||
key = uuid4()
|
||||
item = ef['item']
|
||||
rid = rel_map.get(item)
|
||||
if rid is None:
|
||||
rel_map[item] = rid = 'rId%d' % num
|
||||
fname = 'fonts/font%d.odttf' % num
|
||||
makeelement(embed_relationships, 'Relationship', Id=rid, Type=self.namespace.names['EMBEDDED_FONT'], Target=fname)
|
||||
font_data_map['word/' + fname] = obfuscate_font_data(item.data, key)
|
||||
makeelement(font, 'w:embed' + tag, r_id=rid,
|
||||
w_fontKey='{%s}' % key.urn.rpartition(':')[-1].upper(),
|
||||
w_subsetted="true" if self.opts.subset_embedded_fonts else "false")
|
||||
617
ebook_converter/ebooks/docx/writer/from_html.py
Normal file
617
ebook_converter/ebooks/docx/writer/from_html.py
Normal file
@@ -0,0 +1,617 @@
|
||||
#!/usr/bin/env python2
|
||||
# vim:fileencoding=utf-8
|
||||
from __future__ import absolute_import, division, print_function, unicode_literals
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2013, Kovid Goyal <kovid at kovidgoyal.net>'
|
||||
|
||||
import re
|
||||
from collections import Counter
|
||||
|
||||
from calibre.ebooks.docx.writer.container import create_skeleton, page_size, page_effective_area
|
||||
from calibre.ebooks.docx.writer.styles import StylesManager, FloatSpec
|
||||
from calibre.ebooks.docx.writer.links import LinksManager
|
||||
from calibre.ebooks.docx.writer.images import ImagesManager
|
||||
from calibre.ebooks.docx.writer.fonts import FontsManager
|
||||
from calibre.ebooks.docx.writer.tables import Table
|
||||
from calibre.ebooks.docx.writer.lists import ListsManager
|
||||
from calibre.ebooks.oeb.stylizer import Stylizer as Sz, Style as St
|
||||
from calibre.ebooks.oeb.base import XPath, barename
|
||||
from calibre.utils.localization import lang_as_iso639_1
|
||||
from polyglot.builtins import unicode_type, string_or_bytes
|
||||
|
||||
|
||||
def lang_for_tag(tag):
|
||||
for attr in ('lang', '{http://www.w3.org/XML/1998/namespace}lang'):
|
||||
val = lang_as_iso639_1(tag.get(attr))
|
||||
if val:
|
||||
return val
|
||||
|
||||
|
||||
class Style(St):
|
||||
|
||||
def __init__(self, *args, **kwargs):
|
||||
St.__init__(self, *args, **kwargs)
|
||||
self._letterSpacing = None
|
||||
|
||||
@property
|
||||
def letterSpacing(self):
|
||||
if self._letterSpacing is not None:
|
||||
val = self._get('letter-spacing')
|
||||
if val == 'normal':
|
||||
self._letterSpacing = val
|
||||
else:
|
||||
self._letterSpacing = self._unit_convert(val)
|
||||
return self._letterSpacing
|
||||
|
||||
|
||||
class Stylizer(Sz):
|
||||
|
||||
def style(self, element):
|
||||
try:
|
||||
return self._styles[element]
|
||||
except KeyError:
|
||||
return Style(element, self)
|
||||
|
||||
|
||||
class TextRun(object):
|
||||
|
||||
ws_pat = None
|
||||
|
||||
def __init__(self, namespace, style, first_html_parent, lang=None):
|
||||
self.first_html_parent = first_html_parent
|
||||
if self.ws_pat is None:
|
||||
TextRun.ws_pat = self.ws_pat = re.compile(r'\s+')
|
||||
self.style = style
|
||||
self.texts = []
|
||||
self.link = None
|
||||
self.lang = lang
|
||||
self.parent_style = None
|
||||
self.makeelement = namespace.makeelement
|
||||
self.descendant_style = None
|
||||
|
||||
def add_text(self, text, preserve_whitespace, bookmark=None, link=None):
|
||||
if not preserve_whitespace:
|
||||
text = self.ws_pat.sub(' ', text)
|
||||
if text.strip() != text:
|
||||
# If preserve_whitespace is False, Word ignores leading and
|
||||
# trailing whitespace
|
||||
preserve_whitespace = True
|
||||
self.texts.append((text, preserve_whitespace, bookmark))
|
||||
self.link = link
|
||||
|
||||
def add_break(self, clear='none', bookmark=None):
|
||||
self.texts.append((None, clear, bookmark))
|
||||
|
||||
def add_image(self, drawing, bookmark=None):
|
||||
self.texts.append((drawing, None, bookmark))
|
||||
|
||||
def serialize(self, p, links_manager):
|
||||
makeelement = self.makeelement
|
||||
parent = p if self.link is None else links_manager.serialize_hyperlink(p, self.link)
|
||||
r = makeelement(parent, 'w:r')
|
||||
rpr = makeelement(r, 'w:rPr', append=False)
|
||||
if getattr(self.descendant_style, 'id', None) is not None:
|
||||
makeelement(rpr, 'w:rStyle', w_val=self.descendant_style.id)
|
||||
if self.lang:
|
||||
makeelement(rpr, 'w:lang', w_bidi=self.lang, w_val=self.lang, w_eastAsia=self.lang)
|
||||
if len(rpr) > 0:
|
||||
r.append(rpr)
|
||||
|
||||
for text, preserve_whitespace, bookmark in self.texts:
|
||||
if bookmark is not None:
|
||||
bid = links_manager.bookmark_id
|
||||
makeelement(r, 'w:bookmarkStart', w_id=unicode_type(bid), w_name=bookmark)
|
||||
if text is None:
|
||||
makeelement(r, 'w:br', w_clear=preserve_whitespace)
|
||||
elif hasattr(text, 'xpath'):
|
||||
r.append(text)
|
||||
else:
|
||||
t = makeelement(r, 'w:t')
|
||||
t.text = text or ''
|
||||
if preserve_whitespace:
|
||||
t.set('{http://www.w3.org/XML/1998/namespace}space', 'preserve')
|
||||
if bookmark is not None:
|
||||
makeelement(r, 'w:bookmarkEnd', w_id=unicode_type(bid))
|
||||
|
||||
def __repr__(self):
|
||||
return repr(self.texts)
|
||||
|
||||
def is_empty(self):
|
||||
if not self.texts:
|
||||
return True
|
||||
if len(self.texts) == 1 and self.texts[0][:2] == ('', False):
|
||||
return True
|
||||
return False
|
||||
|
||||
@property
|
||||
def style_weight(self):
|
||||
ans = 0
|
||||
for text, preserve_whitespace, bookmark in self.texts:
|
||||
if isinstance(text, unicode_type):
|
||||
ans += len(text)
|
||||
return ans
|
||||
|
||||
|
||||
class Block(object):
|
||||
|
||||
def __init__(self, namespace, styles_manager, links_manager, html_block, style, is_table_cell=False, float_spec=None, is_list_item=False, parent_bg=None):
|
||||
self.force_not_empty = False
|
||||
self.namespace = namespace
|
||||
self.bookmarks = set()
|
||||
self.list_tag = (html_block, style) if is_list_item else None
|
||||
self.is_first_block = False
|
||||
self.numbering_id = None
|
||||
self.parent_items = None
|
||||
self.html_block = html_block
|
||||
self.html_tag = barename(html_block.tag)
|
||||
self.float_spec = float_spec
|
||||
if float_spec is not None:
|
||||
float_spec.blocks.append(self)
|
||||
self.html_style = style
|
||||
self.style = styles_manager.create_block_style(style, html_block, is_table_cell=is_table_cell, parent_bg=parent_bg)
|
||||
self.styles_manager, self.links_manager = styles_manager, links_manager
|
||||
self.keep_next = False
|
||||
self.runs = []
|
||||
self.skipped = False
|
||||
self.linked_style = None
|
||||
self.page_break_before = style['page-break-before'] == 'always'
|
||||
self.keep_lines = style['page-break-inside'] == 'avoid'
|
||||
self.page_break_after = False
|
||||
self.block_lang = None
|
||||
|
||||
def resolve_skipped(self, next_block):
|
||||
if not self.is_empty():
|
||||
return
|
||||
if len(self.html_block) > 0 and self.html_block[0] is next_block.html_block:
|
||||
self.skipped = True
|
||||
if self.list_tag is not None:
|
||||
next_block.list_tag = self.list_tag
|
||||
|
||||
def add_text(self, text, style, ignore_leading_whitespace=False, html_parent=None, is_parent_style=False, bookmark=None, link=None, lang=None):
|
||||
ws = style['white-space']
|
||||
preserve_whitespace = ws in {'pre', 'pre-wrap', '-o-pre-wrap'}
|
||||
ts = self.styles_manager.create_text_style(style, is_parent_style=is_parent_style)
|
||||
if self.runs and ts == self.runs[-1].style and link == self.runs[-1].link and lang == self.runs[-1].lang:
|
||||
run = self.runs[-1]
|
||||
else:
|
||||
run = TextRun(self.namespace, ts, self.html_block if html_parent is None else html_parent, lang=lang)
|
||||
self.runs.append(run)
|
||||
if ignore_leading_whitespace and not preserve_whitespace:
|
||||
text = text.lstrip()
|
||||
if preserve_whitespace or ws == 'pre-line':
|
||||
for text in text.splitlines():
|
||||
run.add_text(text, preserve_whitespace, bookmark=bookmark, link=link)
|
||||
bookmark = None
|
||||
run.add_break()
|
||||
else:
|
||||
run.add_text(text, preserve_whitespace, bookmark=bookmark, link=link)
|
||||
|
||||
def add_break(self, clear='none', bookmark=None):
|
||||
if self.runs:
|
||||
run = self.runs[-1]
|
||||
else:
|
||||
run = TextRun(self.namespace, self.styles_manager.create_text_style(self.html_style), self.html_block)
|
||||
self.runs.append(run)
|
||||
run.add_break(clear=clear, bookmark=bookmark)
|
||||
|
||||
def add_image(self, drawing, bookmark=None):
|
||||
if self.runs:
|
||||
run = self.runs[-1]
|
||||
else:
|
||||
run = TextRun(self.namespace, self.styles_manager.create_text_style(self.html_style), self.html_block)
|
||||
self.runs.append(run)
|
||||
run.add_image(drawing, bookmark=bookmark)
|
||||
|
||||
def serialize(self, body):
|
||||
makeelement = self.namespace.makeelement
|
||||
p = makeelement(body, 'w:p')
|
||||
end_bookmarks = []
|
||||
for bmark in self.bookmarks:
|
||||
end_bookmarks.append(unicode_type(self.links_manager.bookmark_id))
|
||||
makeelement(p, 'w:bookmarkStart', w_id=end_bookmarks[-1], w_name=bmark)
|
||||
if self.block_lang:
|
||||
rpr = makeelement(p, 'w:rPr')
|
||||
makeelement(rpr, 'w:lang', w_val=self.block_lang, w_bidi=self.block_lang, w_eastAsia=self.block_lang)
|
||||
|
||||
ppr = makeelement(p, 'w:pPr')
|
||||
if self.keep_next:
|
||||
makeelement(ppr, 'w:keepNext')
|
||||
if self.float_spec is not None:
|
||||
self.float_spec.serialize(self, ppr)
|
||||
if self.numbering_id is not None:
|
||||
numpr = makeelement(ppr, 'w:numPr')
|
||||
makeelement(numpr, 'w:ilvl', w_val=unicode_type(self.numbering_id[1]))
|
||||
makeelement(numpr, 'w:numId', w_val=unicode_type(self.numbering_id[0]))
|
||||
if self.linked_style is not None:
|
||||
makeelement(ppr, 'w:pStyle', w_val=self.linked_style.id)
|
||||
elif self.style.id:
|
||||
makeelement(ppr, 'w:pStyle', w_val=self.style.id)
|
||||
if self.is_first_block:
|
||||
makeelement(ppr, 'w:pageBreakBefore', w_val='off')
|
||||
elif self.page_break_before:
|
||||
makeelement(ppr, 'w:pageBreakBefore', w_val='on')
|
||||
if self.keep_lines:
|
||||
makeelement(ppr, 'w:keepLines', w_val='on')
|
||||
for run in self.runs:
|
||||
run.serialize(p, self.links_manager)
|
||||
for bmark in end_bookmarks:
|
||||
makeelement(p, 'w:bookmarkEnd', w_id=bmark)
|
||||
|
||||
def __repr__(self):
|
||||
return 'Block(%r)' % self.runs
|
||||
__str__ = __repr__
|
||||
|
||||
def is_empty(self):
|
||||
if self.force_not_empty:
|
||||
return False
|
||||
for run in self.runs:
|
||||
if not run.is_empty():
|
||||
return False
|
||||
return True
|
||||
|
||||
|
||||
class Blocks(object):
|
||||
|
||||
def __init__(self, namespace, styles_manager, links_manager):
|
||||
self.top_bookmark = None
|
||||
self.namespace = namespace
|
||||
self.styles_manager = styles_manager
|
||||
self.links_manager = links_manager
|
||||
self.all_blocks = []
|
||||
self.pos = 0
|
||||
self.current_block = None
|
||||
self.items = []
|
||||
self.tables = []
|
||||
self.current_table = None
|
||||
self.open_html_blocks = set()
|
||||
self.html_tag_start_blocks = {}
|
||||
|
||||
def current_or_new_block(self, html_tag, tag_style):
|
||||
return self.current_block or self.start_new_block(html_tag, tag_style)
|
||||
|
||||
def end_current_block(self):
|
||||
if self.current_block is not None:
|
||||
self.all_blocks.append(self.current_block)
|
||||
if self.current_table is not None and self.current_table.current_row is not None:
|
||||
self.current_table.add_block(self.current_block)
|
||||
else:
|
||||
self.block_map[self.current_block] = len(self.items)
|
||||
self.items.append(self.current_block)
|
||||
self.current_block.parent_items = self.items
|
||||
self.current_block = None
|
||||
|
||||
def start_new_block(self, html_block, style, is_table_cell=False, float_spec=None, is_list_item=False):
|
||||
parent_bg = None
|
||||
if html_block is not None:
|
||||
p = html_block.getparent()
|
||||
b = self.html_tag_start_blocks.get(p)
|
||||
if b is not None:
|
||||
ps = self.styles_manager.styles_for_html_blocks.get(p)
|
||||
if ps is not None and ps.background_color is not None:
|
||||
parent_bg = ps.background_color
|
||||
self.end_current_block()
|
||||
self.current_block = Block(
|
||||
self.namespace, self.styles_manager, self.links_manager, html_block, style,
|
||||
is_table_cell=is_table_cell, float_spec=float_spec, is_list_item=is_list_item,
|
||||
parent_bg=parent_bg)
|
||||
self.html_tag_start_blocks[html_block] = self.current_block
|
||||
self.open_html_blocks.add(html_block)
|
||||
return self.current_block
|
||||
|
||||
def start_new_table(self, html_tag, tag_style=None):
|
||||
self.current_table = Table(self.namespace, html_tag, tag_style)
|
||||
self.tables.append(self.current_table)
|
||||
|
||||
def start_new_row(self, html_tag, tag_style):
|
||||
if self.current_table is None:
|
||||
self.start_new_table(html_tag)
|
||||
self.current_table.start_new_row(html_tag, tag_style)
|
||||
|
||||
def start_new_cell(self, html_tag, tag_style):
|
||||
if self.current_table is None:
|
||||
self.start_new_table(html_tag)
|
||||
self.current_table.start_new_cell(html_tag, tag_style)
|
||||
|
||||
def finish_tag(self, html_tag):
|
||||
if self.current_block is not None and html_tag in self.open_html_blocks:
|
||||
start_block = self.html_tag_start_blocks.get(html_tag)
|
||||
if start_block is not None and start_block.html_style['page-break-after'] == 'always':
|
||||
self.current_block.page_break_after = True
|
||||
self.end_current_block()
|
||||
self.open_html_blocks.discard(html_tag)
|
||||
|
||||
if self.current_table is not None:
|
||||
table_finished = self.current_table.finish_tag(html_tag)
|
||||
if table_finished:
|
||||
table = self.tables[-1]
|
||||
del self.tables[-1]
|
||||
if self.tables:
|
||||
self.current_table = self.tables[-1]
|
||||
self.current_table.add_table(table)
|
||||
else:
|
||||
self.current_table = None
|
||||
self.block_map[table] = len(self.items)
|
||||
self.items.append(table)
|
||||
|
||||
def serialize(self, body):
|
||||
for item in self.items:
|
||||
item.serialize(body)
|
||||
|
||||
def delete_block_at(self, pos=None):
|
||||
pos = self.pos if pos is None else pos
|
||||
block = self.all_blocks[pos]
|
||||
del self.all_blocks[pos]
|
||||
bpos = self.block_map.pop(block, None)
|
||||
if bpos is not None:
|
||||
del self.items[bpos]
|
||||
else:
|
||||
items = self.items if block.parent_items is None else block.parent_items
|
||||
items.remove(block)
|
||||
block.parent_items = None
|
||||
if block.float_spec is not None:
|
||||
block.float_spec.blocks.remove(block)
|
||||
try:
|
||||
next_block = self.all_blocks[pos]
|
||||
next_block.bookmarks.update(block.bookmarks)
|
||||
for attr in 'page_break_after page_break_before'.split():
|
||||
setattr(next_block, attr, getattr(block, attr))
|
||||
except (IndexError, KeyError):
|
||||
pass
|
||||
|
||||
def __enter__(self):
|
||||
self.pos = len(self.all_blocks)
|
||||
self.block_map = {}
|
||||
|
||||
def __exit__(self, etype, value, traceback):
|
||||
if value is not None:
|
||||
return # Since there was an exception, the data structures are not in a consistent state
|
||||
if self.current_block is not None:
|
||||
self.all_blocks.append(self.current_block)
|
||||
self.current_block = None
|
||||
if len(self.all_blocks) > self.pos and self.all_blocks[self.pos].is_empty():
|
||||
# Delete the empty block corresponding to the <body> tag when the
|
||||
# body tag has no inline content before its first sub-block
|
||||
self.delete_block_at(self.pos)
|
||||
if self.pos > 0 and self.pos < len(self.all_blocks):
|
||||
# Insert a page break corresponding to the start of the html file
|
||||
self.all_blocks[self.pos].page_break_before = True
|
||||
if self.top_bookmark is not None:
|
||||
self.all_blocks[self.pos].bookmarks.add(self.top_bookmark)
|
||||
self.top_bookmark = None
|
||||
self.block_map = {}
|
||||
|
||||
def apply_page_break_after(self):
|
||||
for i, block in enumerate(self.all_blocks):
|
||||
if block.page_break_after and i < len(self.all_blocks) - 1:
|
||||
next_block = self.all_blocks[i + 1]
|
||||
if next_block.parent_items is block.parent_items and block.parent_items is self.items:
|
||||
next_block.page_break_before = True
|
||||
|
||||
def resolve_language(self):
|
||||
default_lang = self.styles_manager.document_lang
|
||||
for block in self.all_blocks:
|
||||
count = Counter()
|
||||
for run in block.runs:
|
||||
count[run.lang] += 1
|
||||
if count:
|
||||
block.block_lang = bl = count.most_common(1)[0][0]
|
||||
for run in block.runs:
|
||||
if run.lang == bl:
|
||||
run.lang = None
|
||||
if bl == default_lang:
|
||||
block.block_lang = None
|
||||
|
||||
def __repr__(self):
|
||||
return 'Block(%r)' % self.runs
|
||||
|
||||
|
||||
class Convert(object):
|
||||
|
||||
# Word does not apply default styling to hyperlinks, so we ensure they get
|
||||
# default styling (the conversion pipeline does not apply any styling to
|
||||
# them).
|
||||
base_css = '''
|
||||
a[href] { text-decoration: underline; color: blue }
|
||||
'''
|
||||
|
||||
def __init__(self, oeb, docx, mi, add_cover, add_toc):
|
||||
self.oeb, self.docx, self.add_cover, self.add_toc = oeb, docx, add_cover, add_toc
|
||||
self.log, self.opts = docx.log, docx.opts
|
||||
self.mi = mi
|
||||
self.cover_img = None
|
||||
p = self.opts.output_profile
|
||||
p.width_pts, p.height_pts = page_effective_area(self.opts)
|
||||
|
||||
def __call__(self):
|
||||
from calibre.ebooks.oeb.transforms.rasterize import SVGRasterizer
|
||||
self.svg_rasterizer = SVGRasterizer(base_css=self.base_css)
|
||||
self.svg_rasterizer(self.oeb, self.opts)
|
||||
|
||||
self.styles_manager = StylesManager(self.docx.namespace, self.log, self.mi.language)
|
||||
self.links_manager = LinksManager(self.docx.namespace, self.docx.document_relationships, self.log)
|
||||
self.images_manager = ImagesManager(self.oeb, self.docx.document_relationships, self.opts)
|
||||
self.lists_manager = ListsManager(self.docx)
|
||||
self.fonts_manager = FontsManager(self.docx.namespace, self.oeb, self.opts)
|
||||
self.blocks = Blocks(self.docx.namespace, self.styles_manager, self.links_manager)
|
||||
self.current_link = self.current_lang = None
|
||||
|
||||
for item in self.oeb.spine:
|
||||
self.log.debug('Processing', item.href)
|
||||
self.process_item(item)
|
||||
if self.add_toc:
|
||||
self.links_manager.process_toc_links(self.oeb)
|
||||
|
||||
if self.add_cover and self.oeb.metadata.cover and unicode_type(self.oeb.metadata.cover[0]) in self.oeb.manifest.ids:
|
||||
cover_id = unicode_type(self.oeb.metadata.cover[0])
|
||||
item = self.oeb.manifest.ids[cover_id]
|
||||
self.cover_img = self.images_manager.read_image(item.href)
|
||||
|
||||
all_blocks = self.blocks.all_blocks
|
||||
remove_blocks = []
|
||||
for i, block in enumerate(all_blocks):
|
||||
try:
|
||||
nb = all_blocks[i+1]
|
||||
except IndexError:
|
||||
break
|
||||
block.resolve_skipped(nb)
|
||||
if block.skipped:
|
||||
remove_blocks.append((i, block))
|
||||
for pos, block in reversed(remove_blocks):
|
||||
self.blocks.delete_block_at(pos)
|
||||
self.blocks.all_blocks[0].is_first_block = True
|
||||
self.blocks.apply_page_break_after()
|
||||
self.blocks.resolve_language()
|
||||
|
||||
if self.cover_img is not None:
|
||||
self.cover_img = self.images_manager.create_cover_markup(self.cover_img, self.opts.preserve_cover_aspect_ratio, *page_size(self.opts))
|
||||
self.lists_manager.finalize(all_blocks)
|
||||
self.styles_manager.finalize(all_blocks)
|
||||
self.write()
|
||||
|
||||
def process_item(self, item):
|
||||
self.current_item = item
|
||||
stylizer = self.svg_rasterizer.stylizer_cache.get(item)
|
||||
if stylizer is None:
|
||||
stylizer = Stylizer(item.data, item.href, self.oeb, self.opts, profile=self.opts.output_profile, base_css=self.base_css)
|
||||
self.abshref = self.images_manager.abshref = item.abshref
|
||||
|
||||
self.current_lang = lang_for_tag(item.data) or self.styles_manager.document_lang
|
||||
for i, body in enumerate(XPath('//h:body')(item.data)):
|
||||
with self.blocks:
|
||||
self.blocks.top_bookmark = self.links_manager.bookmark_for_anchor(self.links_manager.top_anchor, self.current_item, body)
|
||||
self.process_tag(body, stylizer, is_first_tag=i == 0)
|
||||
|
||||
def process_tag(self, html_tag, stylizer, is_first_tag=False, float_spec=None):
|
||||
tagname = barename(html_tag.tag)
|
||||
tag_style = stylizer.style(html_tag)
|
||||
ignore_tag_contents = tagname in {'script', 'style', 'title', 'meta'} or tag_style.is_hidden
|
||||
display = tag_style._get('display')
|
||||
is_block = False
|
||||
|
||||
if not ignore_tag_contents:
|
||||
previous_link = self.current_link
|
||||
if tagname == 'a' and html_tag.get('href'):
|
||||
self.current_link = (self.current_item, html_tag.get('href'), html_tag.get('title'))
|
||||
previous_lang = self.current_lang
|
||||
tag_lang = lang_for_tag(html_tag)
|
||||
if tag_lang:
|
||||
self.current_lang = tag_lang
|
||||
|
||||
is_float = tag_style['float'] in {'left', 'right'} and not is_first_tag
|
||||
if float_spec is None and is_float:
|
||||
float_spec = FloatSpec(self.docx.namespace, html_tag, tag_style)
|
||||
|
||||
if display in {'inline', 'inline-block'} or tagname == 'br': # <br> has display:block but we dont want to start a new paragraph
|
||||
if is_float and float_spec.is_dropcaps:
|
||||
self.add_block_tag(tagname, html_tag, tag_style, stylizer, float_spec=float_spec)
|
||||
float_spec = None
|
||||
else:
|
||||
self.add_inline_tag(tagname, html_tag, tag_style, stylizer)
|
||||
elif display == 'list-item':
|
||||
self.add_block_tag(tagname, html_tag, tag_style, stylizer, is_list_item=True)
|
||||
elif display.startswith('table') or display == 'inline-table':
|
||||
if display == 'table-cell':
|
||||
self.blocks.start_new_cell(html_tag, tag_style)
|
||||
self.add_block_tag(tagname, html_tag, tag_style, stylizer, is_table_cell=True)
|
||||
elif display == 'table-row':
|
||||
self.blocks.start_new_row(html_tag, tag_style)
|
||||
elif display in {'table', 'inline-table'}:
|
||||
self.blocks.end_current_block()
|
||||
self.blocks.start_new_table(html_tag, tag_style)
|
||||
else:
|
||||
if tagname == 'img' and is_float:
|
||||
# Image is floating so dont start a new paragraph for it
|
||||
self.add_inline_tag(tagname, html_tag, tag_style, stylizer)
|
||||
else:
|
||||
if tagname == 'hr':
|
||||
for edge in 'right bottom left'.split():
|
||||
tag_style.set('border-%s-style' % edge, 'none')
|
||||
self.add_block_tag(tagname, html_tag, tag_style, stylizer, float_spec=float_spec)
|
||||
|
||||
for child in html_tag.iterchildren():
|
||||
if isinstance(getattr(child, 'tag', None), string_or_bytes):
|
||||
self.process_tag(child, stylizer, float_spec=float_spec)
|
||||
else: # Comment/PI/etc.
|
||||
tail = getattr(child, 'tail', None)
|
||||
if tail:
|
||||
block = self.create_block_from_parent(html_tag, stylizer)
|
||||
block.add_text(tail, tag_style, is_parent_style=False, link=self.current_link, lang=self.current_lang)
|
||||
|
||||
is_block = html_tag in self.blocks.open_html_blocks
|
||||
self.blocks.finish_tag(html_tag)
|
||||
if is_block and tag_style['page-break-after'] == 'avoid':
|
||||
self.blocks.all_blocks[-1].keep_next = True
|
||||
|
||||
self.current_link = previous_link
|
||||
self.current_lang = previous_lang
|
||||
|
||||
# Now, process the tail if any
|
||||
|
||||
if display == 'table-row':
|
||||
return # We ignore the tail for these tags
|
||||
|
||||
ignore_whitespace_tail = is_block or display.startswith('table')
|
||||
if not is_first_tag and html_tag.tail and (not ignore_whitespace_tail or not html_tag.tail.isspace()):
|
||||
# Ignore trailing space after a block tag, as otherwise it will
|
||||
# become a new empty paragraph
|
||||
block = self.create_block_from_parent(html_tag, stylizer)
|
||||
block.add_text(html_tag.tail, stylizer.style(html_tag.getparent()), is_parent_style=True, link=self.current_link, lang=self.current_lang)
|
||||
|
||||
def create_block_from_parent(self, html_tag, stylizer):
|
||||
parent = html_tag.getparent()
|
||||
block = self.blocks.current_or_new_block(parent, stylizer.style(parent))
|
||||
# Do not inherit page-break-before from parent
|
||||
block.page_break_before = False
|
||||
return block
|
||||
|
||||
def add_block_tag(self, tagname, html_tag, tag_style, stylizer, is_table_cell=False, float_spec=None, is_list_item=False):
|
||||
block = self.blocks.start_new_block(
|
||||
html_tag, tag_style, is_table_cell=is_table_cell, float_spec=float_spec, is_list_item=is_list_item)
|
||||
anchor = html_tag.get('id') or html_tag.get('name')
|
||||
if anchor:
|
||||
block.bookmarks.add(self.bookmark_for_anchor(anchor, html_tag))
|
||||
if tagname == 'img':
|
||||
self.images_manager.add_image(html_tag, block, stylizer, as_block=True)
|
||||
else:
|
||||
text = html_tag.text
|
||||
if text:
|
||||
block.add_text(text, tag_style, ignore_leading_whitespace=True, is_parent_style=True, link=self.current_link, lang=self.current_lang)
|
||||
elif tagname == 'li' and len(html_tag) and barename(html_tag[0].tag) in ('ul', 'ol') and len(html_tag[0]):
|
||||
block.force_not_empty = True
|
||||
|
||||
def add_inline_tag(self, tagname, html_tag, tag_style, stylizer):
|
||||
anchor = html_tag.get('id') or html_tag.get('name') or None
|
||||
bmark = None
|
||||
if anchor:
|
||||
bmark = self.bookmark_for_anchor(anchor, html_tag)
|
||||
if tagname == 'br':
|
||||
if html_tag.tail or html_tag is not tuple(html_tag.getparent().iterchildren('*'))[-1]:
|
||||
block = self.create_block_from_parent(html_tag, stylizer)
|
||||
block.add_break(clear={'both':'all', 'left':'left', 'right':'right'}.get(tag_style['clear'], 'none'), bookmark=bmark)
|
||||
elif tagname == 'img':
|
||||
block = self.create_block_from_parent(html_tag, stylizer)
|
||||
self.images_manager.add_image(html_tag, block, stylizer, bookmark=bmark)
|
||||
else:
|
||||
if html_tag.text:
|
||||
block = self.create_block_from_parent(html_tag, stylizer)
|
||||
block.add_text(html_tag.text, tag_style, is_parent_style=False, bookmark=bmark, link=self.current_link, lang=self.current_lang)
|
||||
elif bmark:
|
||||
block = self.create_block_from_parent(html_tag, stylizer)
|
||||
block.add_text('', tag_style, is_parent_style=False, bookmark=bmark, link=self.current_link, lang=self.current_lang)
|
||||
|
||||
def bookmark_for_anchor(self, anchor, html_tag):
|
||||
return self.links_manager.bookmark_for_anchor(anchor, self.current_item, html_tag)
|
||||
|
||||
def write(self):
|
||||
self.docx.document, self.docx.styles, body = create_skeleton(self.opts)
|
||||
self.blocks.serialize(body)
|
||||
body.append(body[0]) # Move <sectPr> to the end
|
||||
if self.links_manager.toc:
|
||||
self.links_manager.serialize_toc(body, self.styles_manager.primary_heading_style)
|
||||
if self.cover_img is not None:
|
||||
self.images_manager.write_cover_block(body, self.cover_img)
|
||||
self.styles_manager.serialize(self.docx.styles)
|
||||
self.images_manager.serialize(self.docx.images)
|
||||
self.fonts_manager.serialize(self.styles_manager.text_styles, self.docx.font_table, self.docx.embedded_fonts, self.docx.fonts)
|
||||
self.lists_manager.serialize(self.docx.numbering)
|
||||
219
ebook_converter/ebooks/docx/writer/images.py
Normal file
219
ebook_converter/ebooks/docx/writer/images.py
Normal file
@@ -0,0 +1,219 @@
|
||||
#!/usr/bin/env python2
|
||||
# vim:fileencoding=utf-8
|
||||
from __future__ import absolute_import, division, print_function, unicode_literals
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2015, Kovid Goyal <kovid at kovidgoyal.net>'
|
||||
|
||||
import os
|
||||
import posixpath
|
||||
from collections import namedtuple
|
||||
from functools import partial
|
||||
from polyglot.builtins import iteritems, itervalues, map, unicode_type
|
||||
|
||||
from lxml import etree
|
||||
|
||||
from calibre import fit_image
|
||||
from calibre.ebooks.oeb.base import urlunquote
|
||||
from calibre.ebooks.docx.images import pt_to_emu
|
||||
from calibre.utils.filenames import ascii_filename
|
||||
from calibre.utils.imghdr import identify
|
||||
|
||||
Image = namedtuple('Image', 'rid fname width height fmt item')
|
||||
|
||||
|
||||
def as_num(x):
|
||||
try:
|
||||
return float(x)
|
||||
except Exception:
|
||||
pass
|
||||
return 0
|
||||
|
||||
|
||||
def get_image_margins(style):
|
||||
ans = {}
|
||||
for edge in 'Left Right Top Bottom'.split():
|
||||
val = as_num(getattr(style, 'padding' + edge)) + as_num(getattr(style, 'margin' + edge))
|
||||
ans['dist' + edge[0]] = unicode_type(pt_to_emu(val))
|
||||
return ans
|
||||
|
||||
|
||||
class ImagesManager(object):
|
||||
|
||||
def __init__(self, oeb, document_relationships, opts):
|
||||
self.oeb, self.log = oeb, oeb.log
|
||||
self.page_width, self.page_height = opts.output_profile.width_pts, opts.output_profile.height_pts
|
||||
self.images = {}
|
||||
self.seen_filenames = set()
|
||||
self.document_relationships = document_relationships
|
||||
self.count = 0
|
||||
|
||||
def read_image(self, href):
|
||||
if href not in self.images:
|
||||
item = self.oeb.manifest.hrefs.get(href)
|
||||
if item is None or not isinstance(item.data, bytes):
|
||||
return
|
||||
try:
|
||||
fmt, width, height = identify(item.data)
|
||||
except Exception:
|
||||
self.log.warning('Replacing corrupted image with blank: %s' % href)
|
||||
item.data = I('blank.png', data=True, allow_user_override=False)
|
||||
fmt, width, height = identify(item.data)
|
||||
image_fname = 'media/' + self.create_filename(href, fmt)
|
||||
image_rid = self.document_relationships.add_image(image_fname)
|
||||
self.images[href] = Image(image_rid, image_fname, width, height, fmt, item)
|
||||
item.unload_data_from_memory()
|
||||
return self.images[href]
|
||||
|
||||
def add_image(self, img, block, stylizer, bookmark=None, as_block=False):
|
||||
src = img.get('src')
|
||||
if not src:
|
||||
return
|
||||
href = self.abshref(src)
|
||||
try:
|
||||
rid = self.read_image(href).rid
|
||||
except AttributeError:
|
||||
return
|
||||
drawing = self.create_image_markup(img, stylizer, href, as_block=as_block)
|
||||
block.add_image(drawing, bookmark=bookmark)
|
||||
return rid
|
||||
|
||||
def create_image_markup(self, html_img, stylizer, href, as_block=False):
|
||||
# TODO: img inside a link (clickable image)
|
||||
style = stylizer.style(html_img)
|
||||
floating = style['float']
|
||||
if floating not in {'left', 'right'}:
|
||||
floating = None
|
||||
if as_block:
|
||||
ml, mr = style._get('margin-left'), style._get('margin-right')
|
||||
if ml == 'auto':
|
||||
floating = 'center' if mr == 'auto' else 'right'
|
||||
if mr == 'auto':
|
||||
floating = 'center' if ml == 'auto' else 'right'
|
||||
else:
|
||||
parent = html_img.getparent()
|
||||
if len(parent) == 1 and not (parent.text or '').strip() and not (html_img.tail or '').strip():
|
||||
pstyle = stylizer.style(parent)
|
||||
if 'block' in pstyle['display']:
|
||||
# We have an inline image alone inside a block
|
||||
as_block = True
|
||||
floating = pstyle['float']
|
||||
if floating not in {'left', 'right'}:
|
||||
floating = None
|
||||
if pstyle['text-align'] in ('center', 'right'):
|
||||
floating = pstyle['text-align']
|
||||
floating = floating or 'left'
|
||||
fake_margins = floating is None
|
||||
self.count += 1
|
||||
img = self.images[href]
|
||||
name = urlunquote(posixpath.basename(href))
|
||||
width, height = style.img_size(img.width, img.height)
|
||||
scaled, width, height = fit_image(width, height, self.page_width, self.page_height)
|
||||
width, height = map(pt_to_emu, (width, height))
|
||||
|
||||
makeelement, namespaces = self.document_relationships.namespace.makeelement, self.document_relationships.namespace.namespaces
|
||||
|
||||
root = etree.Element('root', nsmap=namespaces)
|
||||
ans = makeelement(root, 'w:drawing', append=False)
|
||||
if floating is None:
|
||||
parent = makeelement(ans, 'wp:inline')
|
||||
else:
|
||||
parent = makeelement(ans, 'wp:anchor', **get_image_margins(style))
|
||||
# The next three lines are boilerplate that Word requires, even
|
||||
# though the DOCX specs define defaults for all of them
|
||||
parent.set('simplePos', '0'), parent.set('relativeHeight', '1'), parent.set('behindDoc',"0"), parent.set('locked', "0")
|
||||
parent.set('layoutInCell', "1"), parent.set('allowOverlap', '1')
|
||||
makeelement(parent, 'wp:simplePos', x='0', y='0')
|
||||
makeelement(makeelement(parent, 'wp:positionH', relativeFrom='margin'), 'wp:align').text = floating
|
||||
makeelement(makeelement(parent, 'wp:positionV', relativeFrom='line'), 'wp:align').text = 'top'
|
||||
makeelement(parent, 'wp:extent', cx=unicode_type(width), cy=unicode_type(height))
|
||||
if fake_margins:
|
||||
# DOCX does not support setting margins for inline images, so we
|
||||
# fake it by using effect extents to simulate margins
|
||||
makeelement(parent, 'wp:effectExtent', **{k[-1].lower():v for k, v in iteritems(get_image_margins(style))})
|
||||
else:
|
||||
makeelement(parent, 'wp:effectExtent', l='0', r='0', t='0', b='0')
|
||||
if floating is not None:
|
||||
# The idiotic Word requires this to be after the extent settings
|
||||
if as_block:
|
||||
makeelement(parent, 'wp:wrapTopAndBottom')
|
||||
else:
|
||||
makeelement(parent, 'wp:wrapSquare', wrapText='bothSides')
|
||||
self.create_docx_image_markup(parent, name, html_img.get('alt') or name, img.rid, width, height)
|
||||
return ans
|
||||
|
||||
def create_docx_image_markup(self, parent, name, alt, img_rid, width, height):
|
||||
makeelement, namespaces = self.document_relationships.namespace.makeelement, self.document_relationships.namespace.namespaces
|
||||
makeelement(parent, 'wp:docPr', id=unicode_type(self.count), name=name, descr=alt)
|
||||
makeelement(makeelement(parent, 'wp:cNvGraphicFramePr'), 'a:graphicFrameLocks', noChangeAspect="1")
|
||||
g = makeelement(parent, 'a:graphic')
|
||||
gd = makeelement(g, 'a:graphicData', uri=namespaces['pic'])
|
||||
pic = makeelement(gd, 'pic:pic')
|
||||
nvPicPr = makeelement(pic, 'pic:nvPicPr')
|
||||
makeelement(nvPicPr, 'pic:cNvPr', id='0', name=name, descr=alt)
|
||||
makeelement(nvPicPr, 'pic:cNvPicPr')
|
||||
bf = makeelement(pic, 'pic:blipFill')
|
||||
makeelement(bf, 'a:blip', r_embed=img_rid)
|
||||
makeelement(makeelement(bf, 'a:stretch'), 'a:fillRect')
|
||||
spPr = makeelement(pic, 'pic:spPr')
|
||||
xfrm = makeelement(spPr, 'a:xfrm')
|
||||
makeelement(xfrm, 'a:off', x='0', y='0'), makeelement(xfrm, 'a:ext', cx=unicode_type(width), cy=unicode_type(height))
|
||||
makeelement(makeelement(spPr, 'a:prstGeom', prst='rect'), 'a:avLst')
|
||||
|
||||
def create_filename(self, href, fmt):
|
||||
fname = ascii_filename(urlunquote(posixpath.basename(href)))
|
||||
fname = posixpath.splitext(fname)[0]
|
||||
fname = fname[:75].rstrip('.') or 'image'
|
||||
num = 0
|
||||
base = fname
|
||||
while fname.lower() in self.seen_filenames:
|
||||
num += 1
|
||||
fname = base + unicode_type(num)
|
||||
self.seen_filenames.add(fname.lower())
|
||||
fname += os.extsep + fmt.lower()
|
||||
return fname
|
||||
|
||||
def serialize(self, images_map):
|
||||
for img in itervalues(self.images):
|
||||
images_map['word/' + img.fname] = partial(self.get_data, img.item)
|
||||
|
||||
def get_data(self, item):
|
||||
try:
|
||||
return item.data
|
||||
finally:
|
||||
item.unload_data_from_memory(False)
|
||||
|
||||
def create_cover_markup(self, img, preserve_aspect_ratio, width, height):
|
||||
self.count += 1
|
||||
makeelement, namespaces = self.document_relationships.namespace.makeelement, self.document_relationships.namespace.namespaces
|
||||
if preserve_aspect_ratio:
|
||||
if img.width >= img.height:
|
||||
ar = img.height / img.width
|
||||
height = ar * width
|
||||
else:
|
||||
ar = img.width / img.height
|
||||
width = ar * height
|
||||
|
||||
root = etree.Element('root', nsmap=namespaces)
|
||||
ans = makeelement(root, 'w:drawing', append=False)
|
||||
parent = makeelement(ans, 'wp:anchor', **{'dist'+edge:'0' for edge in 'LRTB'})
|
||||
parent.set('simplePos', '0'), parent.set('relativeHeight', '1'), parent.set('behindDoc',"0"), parent.set('locked', "0")
|
||||
parent.set('layoutInCell', "1"), parent.set('allowOverlap', '1')
|
||||
makeelement(parent, 'wp:simplePos', x='0', y='0')
|
||||
makeelement(makeelement(parent, 'wp:positionH', relativeFrom='page'), 'wp:align').text = 'center'
|
||||
makeelement(makeelement(parent, 'wp:positionV', relativeFrom='page'), 'wp:align').text = 'center'
|
||||
width, height = map(pt_to_emu, (width, height))
|
||||
makeelement(parent, 'wp:extent', cx=unicode_type(width), cy=unicode_type(height))
|
||||
makeelement(parent, 'wp:effectExtent', l='0', r='0', t='0', b='0')
|
||||
makeelement(parent, 'wp:wrapTopAndBottom')
|
||||
self.create_docx_image_markup(parent, 'cover.jpg', _('Cover'), img.rid, width, height)
|
||||
return ans
|
||||
|
||||
def write_cover_block(self, body, cover_image):
|
||||
makeelement, namespaces = self.document_relationships.namespace.makeelement, self.document_relationships.namespace.namespaces
|
||||
pbb = body[0].xpath('//*[local-name()="pageBreakBefore"]')[0]
|
||||
pbb.set('{%s}val' % namespaces['w'], 'on')
|
||||
p = makeelement(body, 'w:p', append=False)
|
||||
body.insert(0, p)
|
||||
r = makeelement(p, 'w:r')
|
||||
r.append(cover_image)
|
||||
175
ebook_converter/ebooks/docx/writer/links.py
Normal file
175
ebook_converter/ebooks/docx/writer/links.py
Normal file
@@ -0,0 +1,175 @@
|
||||
#!/usr/bin/env python2
|
||||
# vim:fileencoding=utf-8
|
||||
from __future__ import absolute_import, division, print_function, unicode_literals
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2015, Kovid Goyal <kovid at kovidgoyal.net>'
|
||||
|
||||
import posixpath, re
|
||||
from uuid import uuid4
|
||||
|
||||
from calibre.utils.filenames import ascii_text
|
||||
from polyglot.builtins import unicode_type
|
||||
from polyglot.urllib import urlparse
|
||||
|
||||
|
||||
def start_text(tag, prefix_len=0, top_level=True):
|
||||
ans = tag.text or ''
|
||||
limit = 50 - prefix_len
|
||||
if len(ans) < limit:
|
||||
for child in tag.iterchildren('*'):
|
||||
ans += start_text(child, len(ans), top_level=False) + (child.tail or '')
|
||||
if len(ans) >= limit:
|
||||
break
|
||||
if top_level and len(ans) > limit:
|
||||
ans = ans[:limit] + '...'
|
||||
return ans
|
||||
|
||||
|
||||
class TOCItem(object):
|
||||
|
||||
def __init__(self, title, bmark, level):
|
||||
self.title, self.bmark, self.level = title, bmark, level
|
||||
self.is_first = self.is_last = False
|
||||
|
||||
def serialize(self, body, makeelement):
|
||||
p = makeelement(body, 'w:p', append=False)
|
||||
ppr = makeelement(p, 'w:pPr')
|
||||
makeelement(ppr, 'w:pStyle', w_val="Normal")
|
||||
makeelement(ppr, 'w:ind', w_left='0', w_firstLineChars='0', w_firstLine='0', w_leftChars=unicode_type(200 * self.level))
|
||||
if self.is_first:
|
||||
makeelement(ppr, 'w:pageBreakBefore', w_val='off')
|
||||
r = makeelement(p, 'w:r')
|
||||
makeelement(r, 'w:fldChar', w_fldCharType='begin')
|
||||
r = makeelement(p, 'w:r')
|
||||
makeelement(r, 'w:instrText').text = r' TOC \h '
|
||||
r[0].set('{http://www.w3.org/XML/1998/namespace}space', 'preserve')
|
||||
r = makeelement(p, 'w:r')
|
||||
makeelement(r, 'w:fldChar', w_fldCharType='separate')
|
||||
hl = makeelement(p, 'w:hyperlink', w_anchor=self.bmark)
|
||||
r = makeelement(hl, 'w:r')
|
||||
rpr = makeelement(r, 'w:rPr')
|
||||
makeelement(rpr, 'w:color', w_val='0000FF', w_themeColor='hyperlink')
|
||||
makeelement(rpr, 'w:u', w_val='single')
|
||||
makeelement(r, 'w:t').text = self.title
|
||||
if self.is_last:
|
||||
r = makeelement(p, 'w:r')
|
||||
makeelement(r, 'w:fldChar', w_fldCharType='end')
|
||||
body.insert(0, p)
|
||||
|
||||
|
||||
def sanitize_bookmark_name(base):
|
||||
# Max length allowed by Word appears to be 40, we use 32 to leave some
|
||||
# space for making the name unique
|
||||
return re.sub(r'[^0-9a-zA-Z]', '_', ascii_text(base))[:32].rstrip('_')
|
||||
|
||||
|
||||
class LinksManager(object):
|
||||
|
||||
def __init__(self, namespace, document_relationships, log):
|
||||
self.namespace = namespace
|
||||
self.log = log
|
||||
self.document_relationships = document_relationships
|
||||
self.top_anchor = unicode_type(uuid4().hex)
|
||||
self.anchor_map = {}
|
||||
self.used_bookmark_names = set()
|
||||
self.bmark_id = 0
|
||||
self.document_hrefs = set()
|
||||
self.external_links = {}
|
||||
self.toc = []
|
||||
|
||||
def bookmark_for_anchor(self, anchor, current_item, html_tag):
|
||||
key = (current_item.href, anchor)
|
||||
if key in self.anchor_map:
|
||||
return self.anchor_map[key]
|
||||
if anchor == self.top_anchor:
|
||||
name = ('Top of %s' % posixpath.basename(current_item.href))
|
||||
self.document_hrefs.add(current_item.href)
|
||||
else:
|
||||
name = start_text(html_tag).strip() or anchor
|
||||
name = sanitize_bookmark_name(name)
|
||||
i, bname = 0, name
|
||||
while name in self.used_bookmark_names:
|
||||
i += 1
|
||||
name = bname + ('_%d' % i)
|
||||
self.anchor_map[key] = name
|
||||
self.used_bookmark_names.add(name)
|
||||
return name
|
||||
|
||||
@property
|
||||
def bookmark_id(self):
|
||||
self.bmark_id += 1
|
||||
return self.bmark_id
|
||||
|
||||
def serialize_hyperlink(self, parent, link):
|
||||
item, url, tooltip = link
|
||||
purl = urlparse(url)
|
||||
href = purl.path
|
||||
|
||||
def make_link(parent, anchor=None, id=None, tooltip=None):
|
||||
kw = {}
|
||||
if anchor is not None:
|
||||
kw['w_anchor'] = anchor
|
||||
elif id is not None:
|
||||
kw['r_id'] = id
|
||||
if tooltip:
|
||||
kw['w_tooltip'] = tooltip
|
||||
return self.namespace.makeelement(parent, 'w:hyperlink', **kw)
|
||||
|
||||
if not purl.scheme:
|
||||
href = item.abshref(href)
|
||||
if href in self.document_hrefs:
|
||||
key = (href, purl.fragment or self.top_anchor)
|
||||
if key in self.anchor_map:
|
||||
bmark = self.anchor_map[key]
|
||||
else:
|
||||
bmark = self.anchor_map[(href, self.top_anchor)]
|
||||
return make_link(parent, anchor=bmark, tooltip=tooltip)
|
||||
else:
|
||||
self.log.warn('Ignoring internal hyperlink with href (%s) pointing to unknown destination' % url)
|
||||
if purl.scheme in {'http', 'https', 'ftp'}:
|
||||
if url not in self.external_links:
|
||||
self.external_links[url] = self.document_relationships.add_relationship(url, self.namespace.names['LINKS'], target_mode='External')
|
||||
return make_link(parent, id=self.external_links[url], tooltip=tooltip)
|
||||
return parent
|
||||
|
||||
def process_toc_node(self, toc, level=0):
|
||||
href = toc.href
|
||||
if href:
|
||||
purl = urlparse(href)
|
||||
href = purl.path
|
||||
if href in self.document_hrefs:
|
||||
key = (href, purl.fragment or self.top_anchor)
|
||||
if key in self.anchor_map:
|
||||
bmark = self.anchor_map[key]
|
||||
else:
|
||||
bmark = self.anchor_map[(href, self.top_anchor)]
|
||||
self.toc.append(TOCItem(toc.title, bmark, level))
|
||||
for child in toc:
|
||||
self.process_toc_node(child, level+1)
|
||||
|
||||
def process_toc_links(self, oeb):
|
||||
self.toc = []
|
||||
has_toc = oeb.toc and oeb.toc.count() > 1
|
||||
if not has_toc:
|
||||
return
|
||||
for child in oeb.toc:
|
||||
self.process_toc_node(child)
|
||||
if self.toc:
|
||||
self.toc[0].is_first = True
|
||||
self.toc[-1].is_last = True
|
||||
|
||||
def serialize_toc(self, body, primary_heading_style):
|
||||
pbb = body[0].xpath('//*[local-name()="pageBreakBefore"]')[0]
|
||||
pbb.set('{%s}val' % self.namespace.namespaces['w'], 'on')
|
||||
for block in reversed(self.toc):
|
||||
block.serialize(body, self.namespace.makeelement)
|
||||
title = __('Table of Contents')
|
||||
makeelement = self.namespace.makeelement
|
||||
p = makeelement(body, 'w:p', append=False)
|
||||
ppr = makeelement(p, 'w:pPr')
|
||||
if primary_heading_style is not None:
|
||||
makeelement(ppr, 'w:pStyle', w_val=primary_heading_style.id)
|
||||
makeelement(ppr, 'w:pageBreakBefore', w_val='off')
|
||||
makeelement(makeelement(p, 'w:r'), 'w:t').text = title
|
||||
body.insert(0, p)
|
||||
169
ebook_converter/ebooks/docx/writer/lists.py
Normal file
169
ebook_converter/ebooks/docx/writer/lists.py
Normal file
@@ -0,0 +1,169 @@
|
||||
#!/usr/bin/env python2
|
||||
# vim:fileencoding=utf-8
|
||||
from __future__ import absolute_import, division, print_function, unicode_literals
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2015, Kovid Goyal <kovid at kovidgoyal.net>'
|
||||
|
||||
from collections import defaultdict
|
||||
from operator import attrgetter
|
||||
|
||||
from polyglot.builtins import iteritems, itervalues, unicode_type
|
||||
|
||||
LIST_STYLES = frozenset(
|
||||
'disc circle square decimal decimal-leading-zero lower-roman upper-roman'
|
||||
' lower-greek lower-alpha lower-latin upper-alpha upper-latin hiragana hebrew'
|
||||
' katakana-iroha cjk-ideographic'.split())
|
||||
|
||||
STYLE_MAP = {
|
||||
'disc': 'bullet',
|
||||
'circle': 'o',
|
||||
'square': '\uf0a7',
|
||||
'decimal': 'decimal',
|
||||
'decimal-leading-zero': 'decimalZero',
|
||||
'lower-roman': 'lowerRoman',
|
||||
'upper-roman': 'upperRoman',
|
||||
'lower-alpha': 'lowerLetter',
|
||||
'lower-latin': 'lowerLetter',
|
||||
'upper-alpha': 'upperLetter',
|
||||
'upper-latin': 'upperLetter',
|
||||
'hiragana': 'aiueo',
|
||||
'hebrew': 'hebrew1',
|
||||
'katakana-iroha': 'iroha',
|
||||
'cjk-ideographic': 'chineseCounting',
|
||||
}
|
||||
|
||||
|
||||
def find_list_containers(list_tag, tag_style):
|
||||
node = list_tag
|
||||
stylizer = tag_style._stylizer
|
||||
ans = []
|
||||
while True:
|
||||
parent = node.getparent()
|
||||
if parent is None or parent is node:
|
||||
break
|
||||
node = parent
|
||||
style = stylizer.style(node)
|
||||
lst = (style._style.get('list-style-type', None) or '').lower()
|
||||
if lst in LIST_STYLES:
|
||||
ans.append(node)
|
||||
return ans
|
||||
|
||||
|
||||
class NumberingDefinition(object):
|
||||
|
||||
def __init__(self, top_most, stylizer, namespace):
|
||||
self.namespace = namespace
|
||||
self.top_most = top_most
|
||||
self.stylizer = stylizer
|
||||
self.level_map = defaultdict(list)
|
||||
self.num_id = None
|
||||
|
||||
def finalize(self):
|
||||
items_for_level = defaultdict(list)
|
||||
container_for_level = {}
|
||||
type_for_level = {}
|
||||
for ilvl, items in iteritems(self.level_map):
|
||||
for container, list_tag, block, list_type, tag_style in items:
|
||||
items_for_level[ilvl].append(list_tag)
|
||||
container_for_level[ilvl] = container
|
||||
type_for_level[ilvl] = list_type
|
||||
self.levels = tuple(
|
||||
Level(type_for_level[ilvl], container_for_level[ilvl], items_for_level[ilvl], ilvl=ilvl)
|
||||
for ilvl in sorted(self.level_map)
|
||||
)
|
||||
|
||||
def __hash__(self):
|
||||
return hash(self.levels)
|
||||
|
||||
def link_blocks(self):
|
||||
for ilvl, items in iteritems(self.level_map):
|
||||
for container, list_tag, block, list_type, tag_style in items:
|
||||
block.numbering_id = (self.num_id + 1, ilvl)
|
||||
|
||||
def serialize(self, parent):
|
||||
makeelement = self.namespace.makeelement
|
||||
an = makeelement(parent, 'w:abstractNum', w_abstractNumId=unicode_type(self.num_id))
|
||||
makeelement(an, 'w:multiLevelType', w_val='hybridMultilevel')
|
||||
makeelement(an, 'w:name', w_val='List %d' % (self.num_id + 1))
|
||||
for level in self.levels:
|
||||
level.serialize(an, makeelement)
|
||||
|
||||
|
||||
class Level(object):
|
||||
|
||||
def __init__(self, list_type, container, items, ilvl=0):
|
||||
self.ilvl = ilvl
|
||||
try:
|
||||
self.start = int(container.get('start'))
|
||||
except Exception:
|
||||
self.start = 1
|
||||
if items:
|
||||
try:
|
||||
self.start = int(items[0].get('value'))
|
||||
except Exception:
|
||||
pass
|
||||
if list_type in {'disc', 'circle', 'square'}:
|
||||
self.num_fmt = 'bullet'
|
||||
self.lvl_text = '\uf0b7' if list_type == 'disc' else STYLE_MAP[list_type]
|
||||
else:
|
||||
self.lvl_text = '%{}.'.format(self.ilvl + 1)
|
||||
self.num_fmt = STYLE_MAP.get(list_type, 'decimal')
|
||||
|
||||
def __hash__(self):
|
||||
return hash((self.start, self.num_fmt, self.lvl_text))
|
||||
|
||||
def serialize(self, parent, makeelement):
|
||||
lvl = makeelement(parent, 'w:lvl', w_ilvl=unicode_type(self.ilvl))
|
||||
makeelement(lvl, 'w:start', w_val=unicode_type(self.start))
|
||||
makeelement(lvl, 'w:numFmt', w_val=self.num_fmt)
|
||||
makeelement(lvl, 'w:lvlText', w_val=self.lvl_text)
|
||||
makeelement(lvl, 'w:lvlJc', w_val='left')
|
||||
makeelement(makeelement(lvl, 'w:pPr'), 'w:ind', w_hanging='360', w_left=unicode_type(1152 + self.ilvl * 360))
|
||||
if self.num_fmt == 'bullet':
|
||||
ff = {'\uf0b7':'Symbol', '\uf0a7':'Wingdings'}.get(self.lvl_text, 'Courier New')
|
||||
makeelement(makeelement(lvl, 'w:rPr'), 'w:rFonts', w_ascii=ff, w_hAnsi=ff, w_hint="default")
|
||||
|
||||
|
||||
class ListsManager(object):
|
||||
|
||||
def __init__(self, docx):
|
||||
self.namespace = docx.namespace
|
||||
self.lists = {}
|
||||
|
||||
def finalize(self, all_blocks):
|
||||
lists = {}
|
||||
for block in all_blocks:
|
||||
if block.list_tag is not None:
|
||||
list_tag, tag_style = block.list_tag
|
||||
list_type = (tag_style['list-style-type'] or '').lower()
|
||||
if list_type not in LIST_STYLES:
|
||||
continue
|
||||
container_tags = find_list_containers(list_tag, tag_style)
|
||||
if not container_tags:
|
||||
continue
|
||||
top_most = container_tags[-1]
|
||||
if top_most not in lists:
|
||||
lists[top_most] = NumberingDefinition(top_most, tag_style._stylizer, self.namespace)
|
||||
l = lists[top_most]
|
||||
ilvl = len(container_tags) - 1
|
||||
l.level_map[ilvl].append((container_tags[0], list_tag, block, list_type, tag_style))
|
||||
|
||||
[nd.finalize() for nd in itervalues(lists)]
|
||||
definitions = {}
|
||||
for defn in itervalues(lists):
|
||||
try:
|
||||
defn = definitions[defn]
|
||||
except KeyError:
|
||||
definitions[defn] = defn
|
||||
defn.num_id = len(definitions) - 1
|
||||
defn.link_blocks()
|
||||
self.definitions = sorted(itervalues(definitions), key=attrgetter('num_id'))
|
||||
|
||||
def serialize(self, parent):
|
||||
for defn in self.definitions:
|
||||
defn.serialize(parent)
|
||||
makeelement = self.namespace.makeelement
|
||||
for defn in self.definitions:
|
||||
n = makeelement(parent, 'w:num', w_numId=unicode_type(defn.num_id + 1))
|
||||
makeelement(n, 'w:abstractNumId', w_val=unicode_type(defn.num_id))
|
||||
768
ebook_converter/ebooks/docx/writer/styles.py
Normal file
768
ebook_converter/ebooks/docx/writer/styles.py
Normal file
@@ -0,0 +1,768 @@
|
||||
#!/usr/bin/env python2
|
||||
# vim:fileencoding=utf-8
|
||||
from __future__ import absolute_import, division, print_function, unicode_literals
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2015, Kovid Goyal <kovid at kovidgoyal.net>'
|
||||
|
||||
import numbers
|
||||
from collections import Counter, defaultdict
|
||||
from operator import attrgetter
|
||||
|
||||
from lxml import etree
|
||||
|
||||
from calibre.ebooks import parse_css_length
|
||||
from calibre.ebooks.docx.writer.utils import convert_color, int_or_zero
|
||||
from calibre.utils.localization import lang_as_iso639_1
|
||||
from polyglot.builtins import iteritems, filter, unicode_type
|
||||
from tinycss.css21 import CSS21Parser
|
||||
|
||||
css_parser = CSS21Parser()
|
||||
|
||||
border_edges = ('left', 'top', 'right', 'bottom')
|
||||
border_props = ('padding_%s', 'border_%s_width', 'border_%s_style', 'border_%s_color')
|
||||
ignore = object()
|
||||
|
||||
|
||||
def parse_css_font_family(raw):
|
||||
decl, errs = css_parser.parse_style_attr('font-family:' + raw)
|
||||
if decl:
|
||||
for token in decl[0].value:
|
||||
if token.type in 'STRING IDENT':
|
||||
val = token.value
|
||||
if val == 'inherit':
|
||||
break
|
||||
yield val
|
||||
|
||||
|
||||
def css_font_family_to_docx(raw):
|
||||
generic = {'serif':'Cambria', 'sansserif':'Candara', 'sans-serif':'Candara', 'fantasy':'Comic Sans', 'cursive':'Segoe Script'}
|
||||
for ff in parse_css_font_family(raw):
|
||||
return generic.get(ff.lower(), ff)
|
||||
|
||||
|
||||
def bmap(x):
|
||||
return 'on' if x else 'off'
|
||||
|
||||
|
||||
def is_dropcaps(html_tag, tag_style):
|
||||
return len(html_tag) < 2 and len(etree.tostring(html_tag, method='text', encoding='unicode', with_tail=False)) < 5 and tag_style['float'] == 'left'
|
||||
|
||||
|
||||
class CombinedStyle(object):
|
||||
|
||||
def __init__(self, bs, rs, blocks, namespace):
|
||||
self.bs, self.rs, self.blocks = bs, rs, blocks
|
||||
self.namespace = namespace
|
||||
self.id = self.name = self.seq = None
|
||||
self.outline_level = None
|
||||
|
||||
def apply(self):
|
||||
for block in self.blocks:
|
||||
block.linked_style = self
|
||||
for run in block.runs:
|
||||
run.parent_style = self.rs
|
||||
|
||||
def serialize(self, styles, normal_style):
|
||||
makeelement = self.namespace.makeelement
|
||||
w = lambda x: '{%s}%s' % (self.namespace.namespaces['w'], x)
|
||||
block = makeelement(styles, 'w:style', w_styleId=self.id, w_type='paragraph')
|
||||
makeelement(block, 'w:name', w_val=self.name)
|
||||
makeelement(block, 'w:qFormat')
|
||||
if self is not normal_style:
|
||||
makeelement(block, 'w:basedOn', w_val=normal_style.id)
|
||||
if self.seq == 0:
|
||||
block.set(w('default'), '1')
|
||||
pPr = makeelement(block, 'w:pPr')
|
||||
self.bs.serialize_properties(pPr, normal_style.bs)
|
||||
if self.outline_level is not None:
|
||||
makeelement(pPr, 'w:outlineLvl', w_val=unicode_type(self.outline_level + 1))
|
||||
rPr = makeelement(block, 'w:rPr')
|
||||
self.rs.serialize_properties(rPr, normal_style.rs)
|
||||
|
||||
|
||||
class FloatSpec(object):
|
||||
|
||||
def __init__(self, namespace, html_tag, tag_style):
|
||||
self.makeelement = namespace.makeelement
|
||||
self.is_dropcaps = is_dropcaps(html_tag, tag_style)
|
||||
self.blocks = []
|
||||
if self.is_dropcaps:
|
||||
self.dropcaps_lines = 3
|
||||
else:
|
||||
self.x_align = tag_style['float']
|
||||
self.w = self.h = None
|
||||
if tag_style._get('width') != 'auto':
|
||||
self.w = int(20 * max(tag_style['min-width'], tag_style['width']))
|
||||
if tag_style._get('height') == 'auto':
|
||||
self.h_rule = 'auto'
|
||||
else:
|
||||
if tag_style['min-height'] > 0:
|
||||
self.h_rule, self.h = 'atLeast', tag_style['min-height']
|
||||
else:
|
||||
self.h_rule, self.h = 'exact', tag_style['height']
|
||||
self.h = int(20 * self.h)
|
||||
self.h_space = int(20 * max(tag_style['margin-right'], tag_style['margin-left']))
|
||||
self.v_space = int(20 * max(tag_style['margin-top'], tag_style['margin-bottom']))
|
||||
|
||||
read_css_block_borders(self, tag_style)
|
||||
|
||||
def serialize(self, block, parent):
|
||||
if self.is_dropcaps:
|
||||
attrs = dict(w_dropCap='drop', w_lines=unicode_type(self.dropcaps_lines), w_wrap='around', w_vAnchor='text', w_hAnchor='text')
|
||||
else:
|
||||
attrs = dict(
|
||||
w_wrap='around', w_vAnchor='text', w_hAnchor='text', w_xAlign=self.x_align, w_y='1',
|
||||
w_hSpace=unicode_type(self.h_space), w_vSpace=unicode_type(self.v_space), w_hRule=self.h_rule
|
||||
)
|
||||
if self.w is not None:
|
||||
attrs['w_w'] = unicode_type(self.w)
|
||||
if self.h is not None:
|
||||
attrs['w_h'] = unicode_type(self.h)
|
||||
self.makeelement(parent, 'w:framePr', **attrs)
|
||||
# Margins are already applied by the frame style, so override them to
|
||||
# be zero on individual blocks
|
||||
self.makeelement(parent, 'w:ind', w_left='0', w_leftChars='0', w_right='0', w_rightChars='0')
|
||||
attrs = {}
|
||||
if block is self.blocks[0]:
|
||||
attrs.update(dict(w_before='0', w_beforeLines='0'))
|
||||
if block is self.blocks[-1]:
|
||||
attrs.update(dict(w_after='0', w_afterLines='0'))
|
||||
if attrs:
|
||||
self.makeelement(parent, 'w:spacing', **attrs)
|
||||
# Similarly apply the same border and padding properties to all blocks
|
||||
# in this floatspec
|
||||
bdr = self.makeelement(parent, 'w:pBdr')
|
||||
for edge in border_edges:
|
||||
padding = getattr(self, 'padding_' + edge)
|
||||
width = getattr(self, 'border_%s_width' % edge)
|
||||
bstyle = getattr(self, 'border_%s_style' % edge)
|
||||
self.makeelement(
|
||||
bdr, 'w:'+edge, w_space=unicode_type(padding), w_val=bstyle, w_sz=unicode_type(width), w_color=getattr(self, 'border_%s_color' % edge))
|
||||
|
||||
|
||||
class DOCXStyle(object):
|
||||
|
||||
ALL_PROPS = ()
|
||||
TYPE = 'paragraph'
|
||||
|
||||
def __init__(self, namespace):
|
||||
self.namespace = namespace
|
||||
self.w = lambda x: '{%s}%s' % (namespace.namespaces['w'], x)
|
||||
self.id = self.name = None
|
||||
self.next_style = None
|
||||
self.calculate_hash()
|
||||
|
||||
def calculate_hash(self):
|
||||
self._hash = hash(tuple(
|
||||
getattr(self, x) for x in self.ALL_PROPS))
|
||||
|
||||
def makeelement(self, parent, name, **attrs):
|
||||
return parent.makeelement(self.w(name), **{self.w(k):v for k, v in iteritems(attrs)})
|
||||
|
||||
def __hash__(self):
|
||||
return self._hash
|
||||
|
||||
def __eq__(self, other):
|
||||
for x in self.ALL_PROPS:
|
||||
if getattr(self, x) != getattr(other, x, None):
|
||||
return False
|
||||
return True
|
||||
|
||||
def __ne__(self, other):
|
||||
return not self == other
|
||||
|
||||
def __repr__(self):
|
||||
return etree.tostring(self.serialize(etree.Element(self.__class__.__name__, nsmap={'w':self.namespace.namespaces['w']})), pretty_print=True)
|
||||
__str__ = __repr__
|
||||
|
||||
def serialize(self, styles, normal_style):
|
||||
makeelement = self.makeelement
|
||||
style = makeelement(styles, 'style', styleId=self.id, type=self.TYPE)
|
||||
style.append(makeelement(style, 'name', val=self.name))
|
||||
if self is not normal_style:
|
||||
style.append(makeelement(style, 'basedOn', val=normal_style.id))
|
||||
styles.append(style)
|
||||
return style
|
||||
|
||||
|
||||
LINE_STYLES = {
|
||||
'none' : 'none',
|
||||
'hidden': 'none',
|
||||
'dotted': 'dotted',
|
||||
'dashed': 'dashed',
|
||||
'solid' : 'single',
|
||||
'double': 'double',
|
||||
'groove': 'threeDEngrave',
|
||||
'ridge' : 'threeDEmboss',
|
||||
'inset' : 'inset',
|
||||
'outset': 'outset',
|
||||
}
|
||||
|
||||
|
||||
class TextStyle(DOCXStyle):
|
||||
|
||||
ALL_PROPS = ('font_family', 'font_size', 'bold', 'italic', 'color',
|
||||
'background_color', 'underline', 'strike', 'dstrike', 'caps',
|
||||
'shadow', 'small_caps', 'spacing', 'vertical_align', 'padding',
|
||||
'border_style', 'border_width', 'border_color')
|
||||
TYPE = 'character'
|
||||
|
||||
def __init__(self, namespace, css, is_parent_style=False):
|
||||
self.font_family = css_font_family_to_docx(css['font-family'])
|
||||
try:
|
||||
self.font_size = max(0, int(float(css['font-size']) * 2)) # stylizer normalizes all font sizes into pts
|
||||
except (ValueError, TypeError, AttributeError):
|
||||
self.font_size = None
|
||||
|
||||
fw = css['font-weight']
|
||||
self.bold = (fw.lower() if hasattr(fw, 'lower') else fw) in {'bold', 'bolder'} or int_or_zero(fw) >= 700
|
||||
self.italic = css['font-style'].lower() in {'italic', 'oblique'}
|
||||
self.color = convert_color(css['color'])
|
||||
self.background_color = None if is_parent_style else convert_color(css.backgroundColor)
|
||||
td = set((css.effective_text_decoration or '').split())
|
||||
self.underline = 'underline' in td
|
||||
self.dstrike = 'line-through' in td and 'overline' in td
|
||||
self.strike = not self.dstrike and 'line-through' in td
|
||||
self.text_transform = css['text-transform'] # TODO: If lowercase or capitalize, transform the actual text
|
||||
self.caps = self.text_transform == 'uppercase'
|
||||
self.small_caps = css['font-variant'].lower() in {'small-caps', 'smallcaps'}
|
||||
self.shadow = css['text-shadow'] not in {'none', None}
|
||||
try:
|
||||
self.spacing = int(float(css['letter-spacing']) * 20)
|
||||
except (ValueError, TypeError, AttributeError):
|
||||
self.spacing = None
|
||||
va = css.first_vertical_align
|
||||
if isinstance(va, numbers.Number):
|
||||
self.vertical_align = unicode_type(int(va * 2))
|
||||
else:
|
||||
val = {
|
||||
'top':'superscript', 'text-top':'superscript', 'sup':'superscript', 'super':'superscript',
|
||||
'bottom':'subscript', 'text-bottom':'subscript', 'sub':'subscript'}.get(va)
|
||||
self.vertical_align = val or 'baseline'
|
||||
|
||||
self.padding = self.border_color = self.border_width = self.border_style = None
|
||||
if not is_parent_style:
|
||||
# DOCX does not support individual borders/padding for inline content
|
||||
for edge in border_edges:
|
||||
# In DOCX padding can only be a positive integer
|
||||
try:
|
||||
padding = max(0, int(css['padding-' + edge]))
|
||||
except ValueError:
|
||||
padding = 0
|
||||
if self.padding is None:
|
||||
self.padding = padding
|
||||
elif self.padding != padding:
|
||||
self.padding = ignore
|
||||
val = css['border-%s-width' % edge]
|
||||
if not isinstance(val, numbers.Number):
|
||||
val = {'thin':0.2, 'medium':1, 'thick':2}.get(val, 0)
|
||||
val = min(96, max(2, int(val * 8)))
|
||||
if self.border_width is None:
|
||||
self.border_width = val
|
||||
elif self.border_width != val:
|
||||
self.border_width = ignore
|
||||
color = convert_color(css['border-%s-color' % edge])
|
||||
if self.border_color is None:
|
||||
self.border_color = color
|
||||
elif self.border_color != color:
|
||||
self.border_color = ignore
|
||||
style = LINE_STYLES.get(css['border-%s-style' % edge].lower(), 'none')
|
||||
if self.border_style is None:
|
||||
self.border_style = style
|
||||
elif self.border_style != style:
|
||||
self.border_style = ignore
|
||||
|
||||
if self.padding in (None, ignore):
|
||||
self.padding = 0
|
||||
if self.border_width in (None, ignore):
|
||||
self.border_width = 0
|
||||
if self.border_style in (None, ignore):
|
||||
self.border_style = 'none'
|
||||
if self.border_color in (None, ignore):
|
||||
self.border_color = 'auto'
|
||||
if self.border_style == 'none':
|
||||
self.border_width, self.border_color = 0, 'auto'
|
||||
|
||||
DOCXStyle.__init__(self, namespace)
|
||||
|
||||
def serialize_borders(self, bdr, normal_style):
|
||||
w = self.w
|
||||
is_normal_style = self is normal_style
|
||||
if is_normal_style or self.padding != normal_style.padding:
|
||||
bdr.set(w('space'), unicode_type(self.padding))
|
||||
if is_normal_style or self.border_width != normal_style.border_width:
|
||||
bdr.set(w('sz'), unicode_type(self.border_width))
|
||||
if is_normal_style or self.border_style != normal_style.border_style:
|
||||
bdr.set(w('val'), self.border_style)
|
||||
if is_normal_style or self.border_color != normal_style.border_color:
|
||||
bdr.set(w('color'), self.border_color)
|
||||
return bdr
|
||||
|
||||
def serialize(self, styles, normal_style):
|
||||
makeelement = self.makeelement
|
||||
style_root = DOCXStyle.serialize(self, styles, normal_style)
|
||||
style = makeelement(style_root, 'rPr')
|
||||
self.serialize_properties(style, normal_style)
|
||||
if len(style) > 0:
|
||||
style_root.append(style)
|
||||
return style_root
|
||||
|
||||
def serialize_properties(self, rPr, normal_style):
|
||||
makeelement = self.makeelement
|
||||
is_normal_style = self is normal_style
|
||||
if is_normal_style or self.font_family != normal_style.font_family:
|
||||
rPr.append(makeelement(
|
||||
rPr, 'rFonts', **{k:self.font_family for k in 'ascii cs eastAsia hAnsi'.split()}))
|
||||
|
||||
for name, attr, vmap in (('sz', 'font_size', str), ('b', 'bold', bmap), ('i', 'italic', bmap)):
|
||||
val = getattr(self, attr)
|
||||
if is_normal_style or getattr(normal_style, attr) != val:
|
||||
for suffix in ('', 'Cs'):
|
||||
rPr.append(makeelement(rPr, name + suffix, val=vmap(val)))
|
||||
|
||||
def check_attr(attr):
|
||||
val = getattr(self, attr)
|
||||
return is_normal_style or (val != getattr(normal_style, attr))
|
||||
|
||||
if check_attr('color'):
|
||||
rPr.append(makeelement(rPr, 'color', val=self.color or 'auto'))
|
||||
if check_attr('background_color'):
|
||||
rPr.append(makeelement(rPr, 'shd', fill=self.background_color or 'auto'))
|
||||
if check_attr('underline'):
|
||||
rPr.append(makeelement(rPr, 'u', val='single' if self.underline else 'none'))
|
||||
if check_attr('dstrike'):
|
||||
rPr.append(makeelement(rPr, 'dstrike', val=bmap(self.dstrike)))
|
||||
if check_attr('strike'):
|
||||
rPr.append(makeelement(rPr, 'strike', val=bmap(self.strike)))
|
||||
if check_attr('caps'):
|
||||
rPr.append(makeelement(rPr, 'caps', val=bmap(self.caps)))
|
||||
if check_attr('small_caps'):
|
||||
rPr.append(makeelement(rPr, 'smallCaps', val=bmap(self.small_caps)))
|
||||
if check_attr('shadow'):
|
||||
rPr.append(makeelement(rPr, 'shadow', val=bmap(self.shadow)))
|
||||
if check_attr('spacing'):
|
||||
rPr.append(makeelement(rPr, 'spacing', val=unicode_type(self.spacing or 0)))
|
||||
if is_normal_style:
|
||||
rPr.append(makeelement(rPr, 'vertAlign', val=self.vertical_align if self.vertical_align in {'superscript', 'subscript'} else 'baseline'))
|
||||
elif self.vertical_align != normal_style.vertical_align:
|
||||
if self.vertical_align in {'superscript', 'subscript', 'baseline'}:
|
||||
rPr.append(makeelement(rPr, 'vertAlign', val=self.vertical_align))
|
||||
else:
|
||||
rPr.append(makeelement(rPr, 'position', val=self.vertical_align))
|
||||
|
||||
bdr = self.serialize_borders(makeelement(rPr, 'bdr'), normal_style)
|
||||
if bdr.attrib:
|
||||
rPr.append(bdr)
|
||||
|
||||
|
||||
class DescendantTextStyle(object):
|
||||
|
||||
def __init__(self, parent_style, child_style):
|
||||
self.id = self.name = None
|
||||
self.makeelement = child_style.makeelement
|
||||
|
||||
p = []
|
||||
|
||||
def add(name, **props):
|
||||
p.append((name, frozenset(iteritems(props))))
|
||||
|
||||
def vals(attr):
|
||||
return getattr(parent_style, attr), getattr(child_style, attr)
|
||||
|
||||
def check(attr):
|
||||
pval, cval = vals(attr)
|
||||
return pval != cval
|
||||
|
||||
if parent_style.font_family != child_style.font_family:
|
||||
add('rFonts', **{k:child_style.font_family for k in 'ascii cs eastAsia hAnsi'.split()})
|
||||
|
||||
for name, attr in (('sz', 'font_size'), ('b', 'bold'), ('i', 'italic')):
|
||||
pval, cval = vals(attr)
|
||||
if pval != cval:
|
||||
val = 'on' if attr in {'bold', 'italic'} else unicode_type(cval) # bold, italic are toggle properties
|
||||
for suffix in ('', 'Cs'):
|
||||
add(name + suffix, val=val)
|
||||
|
||||
if check('color'):
|
||||
add('color', val=child_style.color or 'auto')
|
||||
if check('background_color'):
|
||||
add('shd', fill=child_style.background_color or 'auto')
|
||||
if check('underline'):
|
||||
add('u', val='single' if child_style.underline else 'none')
|
||||
if check('dstrike'):
|
||||
add('dstrike', val=bmap(child_style.dstrike))
|
||||
if check('strike'):
|
||||
add('strike', val='on') # toggle property
|
||||
if check('caps'):
|
||||
add('caps', val='on') # toggle property
|
||||
if check('small_caps'):
|
||||
add('smallCaps', val='on') # toggle property
|
||||
if check('shadow'):
|
||||
add('shadow', val='on') # toggle property
|
||||
if check('spacing'):
|
||||
add('spacing', val=unicode_type(child_style.spacing or 0))
|
||||
if check('vertical_align'):
|
||||
val = child_style.vertical_align
|
||||
if val in {'superscript', 'subscript', 'baseline'}:
|
||||
add('vertAlign', val=val)
|
||||
else:
|
||||
add('position', val=val)
|
||||
|
||||
bdr = {}
|
||||
if check('padding'):
|
||||
bdr['space'] = unicode_type(child_style.padding)
|
||||
if check('border_width'):
|
||||
bdr['sz'] = unicode_type(child_style.border_width)
|
||||
if check('border_style'):
|
||||
bdr['val'] = child_style.border_style
|
||||
if check('border_color'):
|
||||
bdr['color'] = child_style.border_color
|
||||
if bdr:
|
||||
add('bdr', **bdr)
|
||||
self.properties = tuple(p)
|
||||
self._hash = hash(self.properties)
|
||||
|
||||
def __hash__(self):
|
||||
return self._hash
|
||||
|
||||
def __eq__(self, other):
|
||||
return self.properties == other.properties
|
||||
|
||||
def __ne__(self, other):
|
||||
return self.properties != other.properties
|
||||
|
||||
def serialize(self, styles):
|
||||
makeelement = self.makeelement
|
||||
style = makeelement(styles, 'style', styleId=self.id, type='character')
|
||||
style.append(makeelement(style, 'name', val=self.name))
|
||||
rpr = makeelement(style, 'rPr')
|
||||
style.append(rpr)
|
||||
for name, attrs in self.properties:
|
||||
rpr.append(makeelement(style, name, **dict(attrs)))
|
||||
styles.append(style)
|
||||
return style
|
||||
|
||||
|
||||
def read_css_block_borders(self, css, store_css_style=False):
|
||||
for edge in border_edges:
|
||||
if css is None:
|
||||
setattr(self, 'padding_' + edge, 0)
|
||||
setattr(self, 'margin_' + edge, 0)
|
||||
setattr(self, 'css_margin_' + edge, '')
|
||||
setattr(self, 'border_%s_width' % edge, 2)
|
||||
setattr(self, 'border_%s_color' % edge, None)
|
||||
setattr(self, 'border_%s_style' % edge, 'none')
|
||||
if store_css_style:
|
||||
setattr(self, 'border_%s_css_style' % edge, 'none')
|
||||
else:
|
||||
# In DOCX padding can only be a positive integer
|
||||
try:
|
||||
setattr(self, 'padding_' + edge, max(0, int(css['padding-' + edge])))
|
||||
except ValueError:
|
||||
setattr(self, 'padding_' + edge, 0) # invalid value for padding
|
||||
# In DOCX margin must be a positive integer in twips (twentieth of a point)
|
||||
try:
|
||||
setattr(self, 'margin_' + edge, max(0, int(css['margin-' + edge] * 20)))
|
||||
except ValueError:
|
||||
setattr(self, 'margin_' + edge, 0) # for e.g.: margin: auto
|
||||
setattr(self, 'css_margin_' + edge, css._style.get('margin-' + edge, ''))
|
||||
val = css['border-%s-width' % edge]
|
||||
if not isinstance(val, numbers.Number):
|
||||
val = {'thin':0.2, 'medium':1, 'thick':2}.get(val, 0)
|
||||
val = min(96, max(2, int(val * 8)))
|
||||
setattr(self, 'border_%s_width' % edge, val)
|
||||
setattr(self, 'border_%s_color' % edge, convert_color(css['border-%s-color' % edge]) or 'auto')
|
||||
setattr(self, 'border_%s_style' % edge, LINE_STYLES.get(css['border-%s-style' % edge].lower(), 'none'))
|
||||
if store_css_style:
|
||||
setattr(self, 'border_%s_css_style' % edge, css['border-%s-style' % edge].lower())
|
||||
|
||||
|
||||
class BlockStyle(DOCXStyle):
|
||||
|
||||
ALL_PROPS = tuple(
|
||||
'text_align css_text_indent text_indent line_height background_color'.split(
|
||||
) + ['margin_' + edge for edge in border_edges
|
||||
] + ['css_margin_' + edge for edge in border_edges
|
||||
] + [x%edge for edge in border_edges for x in border_props]
|
||||
)
|
||||
|
||||
def __init__(self, namespace, css, html_block, is_table_cell=False, parent_bg=None):
|
||||
read_css_block_borders(self, css)
|
||||
if is_table_cell:
|
||||
for edge in border_edges:
|
||||
setattr(self, 'border_%s_style' % edge, 'none')
|
||||
setattr(self, 'border_%s_width' % edge, 0)
|
||||
setattr(self, 'padding_' + edge, 0)
|
||||
setattr(self, 'margin_' + edge, 0)
|
||||
if css is None:
|
||||
self.text_indent = 0
|
||||
self.css_text_indent = None
|
||||
self.line_height = 280
|
||||
self.background_color = None
|
||||
self.text_align = 'left'
|
||||
else:
|
||||
try:
|
||||
self.text_indent = int(css['text-indent'] * 20)
|
||||
self.css_text_indent = css._get('text-indent')
|
||||
except (TypeError, ValueError):
|
||||
self.text_indent = 0
|
||||
self.css_text_indent = None
|
||||
try:
|
||||
self.line_height = max(0, int(css.lineHeight * 20))
|
||||
except (TypeError, ValueError):
|
||||
self.line_height = max(0, int(1.2 * css.fontSize * 20))
|
||||
self.background_color = None if is_table_cell else convert_color(css['background-color'])
|
||||
if not is_table_cell and self.background_color is None:
|
||||
self.background_color = parent_bg
|
||||
try:
|
||||
ws = css['white-space'].lower()
|
||||
preserve_whitespace = ws in {'pre', 'pre-wrap'}
|
||||
except Exception:
|
||||
preserve_whitespace = False
|
||||
try:
|
||||
aval = css['text-align'].lower()
|
||||
if preserve_whitespace:
|
||||
aval = 'start'
|
||||
self.text_align = {'start':'left', 'left':'left', 'end':'right', 'right':'right', 'center':'center', 'justify':'both', 'centre':'center'}.get(
|
||||
aval, 'left')
|
||||
except AttributeError:
|
||||
self.text_align = 'left'
|
||||
|
||||
DOCXStyle.__init__(self, namespace)
|
||||
|
||||
def serialize_borders(self, bdr, normal_style):
|
||||
w = self.w
|
||||
for edge in border_edges:
|
||||
e = bdr.makeelement(w(edge))
|
||||
padding = getattr(self, 'padding_' + edge)
|
||||
if (self is normal_style and padding > 0) or (padding != getattr(normal_style, 'padding_' + edge)):
|
||||
e.set(w('space'), unicode_type(padding))
|
||||
width = getattr(self, 'border_%s_width' % edge)
|
||||
bstyle = getattr(self, 'border_%s_style' % edge)
|
||||
if (self is normal_style and width > 0 and bstyle != 'none'
|
||||
) or width != getattr(normal_style, 'border_%s_width' % edge
|
||||
) or bstyle != getattr(normal_style, 'border_%s_style' % edge):
|
||||
e.set(w('val'), bstyle)
|
||||
e.set(w('sz'), unicode_type(width))
|
||||
e.set(w('color'), getattr(self, 'border_%s_color' % edge))
|
||||
if e.attrib:
|
||||
bdr.append(e)
|
||||
return bdr
|
||||
|
||||
def serialize(self, styles, normal_style):
|
||||
makeelement = self.makeelement
|
||||
style_root = DOCXStyle.serialize(self, styles, normal_style)
|
||||
style = makeelement(style_root, 'pPr')
|
||||
self.serialize_properties(style, normal_style)
|
||||
if len(style) > 0:
|
||||
style_root.append(style)
|
||||
return style_root
|
||||
|
||||
def serialize_properties(self, pPr, normal_style):
|
||||
makeelement, w = self.makeelement, self.w
|
||||
spacing = makeelement(pPr, 'spacing')
|
||||
for edge, attr in iteritems({'top':'before', 'bottom':'after'}):
|
||||
getter = attrgetter('css_margin_' + edge)
|
||||
css_val, css_unit = parse_css_length(getter(self))
|
||||
if css_unit in ('em', 'ex'):
|
||||
lines = max(0, int(css_val * (50 if css_unit == 'ex' else 100)))
|
||||
if (self is normal_style and lines > 0) or getter(self) != getter(normal_style):
|
||||
spacing.set(w(attr + 'Lines'), unicode_type(lines))
|
||||
else:
|
||||
getter = attrgetter('margin_' + edge)
|
||||
val = getter(self)
|
||||
if (self is normal_style and val > 0) or val != getter(normal_style):
|
||||
spacing.set(w(attr), unicode_type(val))
|
||||
|
||||
if self is normal_style or self.line_height != normal_style.line_height:
|
||||
spacing.set(w('line'), unicode_type(self.line_height))
|
||||
spacing.set(w('lineRule'), 'atLeast')
|
||||
|
||||
if spacing.attrib:
|
||||
pPr.append(spacing)
|
||||
|
||||
ind = makeelement(pPr, 'ind')
|
||||
for edge in ('left', 'right'):
|
||||
getter = attrgetter('css_margin_' + edge)
|
||||
css_val, css_unit = parse_css_length(getter(self))
|
||||
if css_unit in ('em', 'ex'):
|
||||
chars = max(0, int(css_val * (50 if css_unit == 'ex' else 100)))
|
||||
if (self is normal_style and chars > 0) or getter(self) != getter(normal_style):
|
||||
ind.set(w(edge + 'Chars'), unicode_type(chars))
|
||||
else:
|
||||
getter = attrgetter('margin_' + edge)
|
||||
val = getter(self)
|
||||
if (self is normal_style and val > 0) or val != getter(normal_style):
|
||||
ind.set(w(edge), unicode_type(val))
|
||||
ind.set(w(edge + 'Chars'), '0') # This is needed to override any declaration in the parent style
|
||||
css_val, css_unit = parse_css_length(self.css_text_indent)
|
||||
if css_unit in ('em', 'ex'):
|
||||
chars = int(css_val * (50 if css_unit == 'ex' else 100))
|
||||
if css_val >= 0:
|
||||
if (self is normal_style and chars > 0) or self.css_text_indent != normal_style.css_text_indent:
|
||||
ind.set(w('firstLineChars'), unicode_type(chars))
|
||||
else:
|
||||
if (self is normal_style and chars < 0) or self.css_text_indent != normal_style.css_text_indent:
|
||||
ind.set(w('hangingChars'), unicode_type(abs(chars)))
|
||||
else:
|
||||
val = self.text_indent
|
||||
if val >= 0:
|
||||
if (self is normal_style and val > 0) or self.text_indent != normal_style.text_indent:
|
||||
ind.set(w('firstLine'), unicode_type(val))
|
||||
ind.set(w('firstLineChars'), '0') # This is needed to override any declaration in the parent style
|
||||
else:
|
||||
if (self is normal_style and val < 0) or self.text_indent != normal_style.text_indent:
|
||||
ind.set(w('hanging'), unicode_type(abs(val)))
|
||||
ind.set(w('hangingChars'), '0')
|
||||
if ind.attrib:
|
||||
pPr.append(ind)
|
||||
|
||||
if (self is normal_style and self.background_color) or self.background_color != normal_style.background_color:
|
||||
pPr.append(makeelement(pPr, 'shd', val='clear', color='auto', fill=self.background_color or 'auto'))
|
||||
|
||||
pbdr = self.serialize_borders(pPr.makeelement(w('pBdr')), normal_style)
|
||||
if len(pbdr):
|
||||
pPr.append(pbdr)
|
||||
|
||||
if self is normal_style or self.text_align != normal_style.text_align:
|
||||
pPr.append(makeelement(pPr, 'jc', val=self.text_align))
|
||||
|
||||
if self is not normal_style and self.next_style is not None:
|
||||
pPr.append(makeelement(pPr, 'next', val=self.next_style))
|
||||
|
||||
|
||||
class StylesManager(object):
|
||||
|
||||
def __init__(self, namespace, log, document_lang):
|
||||
self.namespace = namespace
|
||||
self.document_lang = lang_as_iso639_1(document_lang) or 'en'
|
||||
self.log = log
|
||||
self.block_styles, self.text_styles = {}, {}
|
||||
self.styles_for_html_blocks = {}
|
||||
|
||||
def create_text_style(self, css_style, is_parent_style=False):
|
||||
ans = TextStyle(self.namespace, css_style, is_parent_style=is_parent_style)
|
||||
existing = self.text_styles.get(ans, None)
|
||||
if existing is None:
|
||||
self.text_styles[ans] = ans
|
||||
else:
|
||||
ans = existing
|
||||
return ans
|
||||
|
||||
def create_block_style(self, css_style, html_block, is_table_cell=False, parent_bg=None):
|
||||
ans = BlockStyle(self.namespace, css_style, html_block, is_table_cell=is_table_cell, parent_bg=parent_bg)
|
||||
existing = self.block_styles.get(ans, None)
|
||||
if existing is None:
|
||||
self.block_styles[ans] = ans
|
||||
else:
|
||||
ans = existing
|
||||
self.styles_for_html_blocks[html_block] = ans
|
||||
return ans
|
||||
|
||||
def finalize(self, all_blocks):
|
||||
block_counts, run_counts = Counter(), Counter()
|
||||
block_rmap, run_rmap = defaultdict(list), defaultdict(list)
|
||||
used_pairs = defaultdict(list)
|
||||
heading_styles = defaultdict(list)
|
||||
headings = frozenset('h1 h2 h3 h4 h5 h6'.split())
|
||||
pure_block_styles = set()
|
||||
|
||||
for block in all_blocks:
|
||||
bs = block.style
|
||||
block_counts[bs] += 1
|
||||
block_rmap[block.style].append(block)
|
||||
local_run_counts = Counter()
|
||||
for run in block.runs:
|
||||
count = run.style_weight
|
||||
run_counts[run.style] += count
|
||||
local_run_counts[run.style] += count
|
||||
run_rmap[run.style].append(run)
|
||||
if local_run_counts:
|
||||
rs = local_run_counts.most_common(1)[0][0]
|
||||
used_pairs[(bs, rs)].append(block)
|
||||
if block.html_tag in headings:
|
||||
heading_styles[block.html_tag].append((bs, rs))
|
||||
else:
|
||||
pure_block_styles.add(bs)
|
||||
|
||||
self.pure_block_styles = sorted(pure_block_styles, key=block_counts.__getitem__)
|
||||
bnum = len(unicode_type(max(1, len(pure_block_styles) - 1)))
|
||||
for i, bs in enumerate(self.pure_block_styles):
|
||||
bs.id = bs.name = '%0{}d Block'.format(bnum) % i
|
||||
bs.seq = i
|
||||
if i == 0:
|
||||
self.normal_pure_block_style = bs
|
||||
|
||||
counts = Counter()
|
||||
smap = {}
|
||||
for (bs, rs), blocks in iteritems(used_pairs):
|
||||
s = CombinedStyle(bs, rs, blocks, self.namespace)
|
||||
smap[(bs, rs)] = s
|
||||
counts[s] += sum(1 for b in blocks if not b.is_empty())
|
||||
for i, heading_tag in enumerate(sorted(heading_styles)):
|
||||
styles = sorted((smap[k] for k in heading_styles[heading_tag]), key=counts.__getitem__)
|
||||
styles = list(filter(lambda s:s.outline_level is None, styles))
|
||||
if styles:
|
||||
heading_style = styles[-1]
|
||||
heading_style.outline_level = i
|
||||
|
||||
snum = len(unicode_type(max(1, len(counts) - 1)))
|
||||
heading_styles = []
|
||||
for i, (style, count) in enumerate(counts.most_common()):
|
||||
if i == 0:
|
||||
self.normal_style = style
|
||||
style.id = style.name = 'Normal'
|
||||
else:
|
||||
if style.outline_level is None:
|
||||
val = 'Para %0{}d'.format(snum) % i
|
||||
else:
|
||||
val = 'Heading %d' % (style.outline_level + 1)
|
||||
heading_styles.append(style)
|
||||
style.id = style.name = val
|
||||
style.seq = i
|
||||
self.combined_styles = sorted(counts, key=attrgetter('seq'))
|
||||
[ls.apply() for ls in self.combined_styles]
|
||||
|
||||
descendant_style_map = {}
|
||||
ds_counts = Counter()
|
||||
for block in all_blocks:
|
||||
for run in block.runs:
|
||||
if run.parent_style is not run.style and run.parent_style and run.style:
|
||||
ds = DescendantTextStyle(run.parent_style, run.style)
|
||||
if ds.properties:
|
||||
run.descendant_style = descendant_style_map.get(ds)
|
||||
if run.descendant_style is None:
|
||||
run.descendant_style = descendant_style_map[ds] = ds
|
||||
ds_counts[run.descendant_style] += run.style_weight
|
||||
rnum = len(unicode_type(max(1, len(ds_counts) - 1)))
|
||||
for i, (text_style, count) in enumerate(ds_counts.most_common()):
|
||||
text_style.id = 'Text%d' % i
|
||||
text_style.name = '%0{}d Text'.format(rnum) % i
|
||||
text_style.seq = i
|
||||
self.descendant_text_styles = sorted(descendant_style_map, key=attrgetter('seq'))
|
||||
|
||||
self.log.debug('%d Text Styles %d Combined styles' % tuple(map(len, (
|
||||
self.descendant_text_styles, self.combined_styles))))
|
||||
|
||||
self.primary_heading_style = None
|
||||
if heading_styles:
|
||||
heading_styles.sort(key=attrgetter('outline_level'))
|
||||
self.primary_heading_style = heading_styles[0]
|
||||
else:
|
||||
ms = 0
|
||||
for s in self.combined_styles:
|
||||
if s.rs.font_size > ms:
|
||||
self.primary_heading_style = s
|
||||
ms = s.rs.font_size
|
||||
|
||||
def serialize(self, styles):
|
||||
lang = styles.xpath('descendant::*[local-name()="lang"]')[0]
|
||||
for k in tuple(lang.attrib):
|
||||
lang.attrib[k] = self.document_lang
|
||||
for style in self.combined_styles:
|
||||
style.serialize(styles, self.normal_style)
|
||||
for style in self.descendant_text_styles:
|
||||
style.serialize(styles)
|
||||
for style in sorted(self.pure_block_styles, key=attrgetter('seq')):
|
||||
style.serialize(styles, self.normal_pure_block_style)
|
||||
371
ebook_converter/ebooks/docx/writer/tables.py
Normal file
371
ebook_converter/ebooks/docx/writer/tables.py
Normal file
@@ -0,0 +1,371 @@
|
||||
#!/usr/bin/env python2
|
||||
# vim:fileencoding=utf-8
|
||||
from __future__ import absolute_import, division, print_function, unicode_literals
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2015, Kovid Goyal <kovid at kovidgoyal.net>'
|
||||
|
||||
from collections import namedtuple
|
||||
|
||||
from calibre.ebooks.docx.writer.utils import convert_color
|
||||
from calibre.ebooks.docx.writer.styles import read_css_block_borders as rcbb, border_edges
|
||||
from polyglot.builtins import iteritems, range, unicode_type
|
||||
|
||||
|
||||
class Dummy(object):
|
||||
pass
|
||||
|
||||
|
||||
Border = namedtuple('Border', 'css_style style width color level')
|
||||
border_style_weight = {
|
||||
x:100-i for i, x in enumerate(('double', 'solid', 'dashed', 'dotted', 'ridge', 'outset', 'groove', 'inset'))}
|
||||
|
||||
|
||||
class SpannedCell(object):
|
||||
|
||||
def __init__(self, spanning_cell, horizontal=True):
|
||||
self.spanning_cell = spanning_cell
|
||||
self.horizontal = horizontal
|
||||
self.row_span = self.col_span = 1
|
||||
|
||||
def resolve_borders(self):
|
||||
pass
|
||||
|
||||
def serialize(self, tr, makeelement):
|
||||
tc = makeelement(tr, 'w:tc')
|
||||
tcPr = makeelement(tc, 'w:tcPr')
|
||||
makeelement(tcPr, 'w:%sMerge' % ('h' if self.horizontal else 'v'), w_val='continue')
|
||||
makeelement(tc, 'w:p')
|
||||
|
||||
def applicable_borders(self, edge):
|
||||
return self.spanning_cell.applicable_borders(edge)
|
||||
|
||||
|
||||
def read_css_block_borders(self, css):
|
||||
obj = Dummy()
|
||||
rcbb(obj, css, store_css_style=True)
|
||||
for edge in border_edges:
|
||||
setattr(self, 'border_' + edge, Border(
|
||||
getattr(obj, 'border_%s_css_style' % edge),
|
||||
getattr(obj, 'border_%s_style' % edge),
|
||||
getattr(obj, 'border_%s_width' % edge),
|
||||
getattr(obj, 'border_%s_color' % edge),
|
||||
self.BLEVEL
|
||||
))
|
||||
setattr(self, 'padding_' + edge, getattr(obj, 'padding_' + edge))
|
||||
|
||||
|
||||
def as_percent(x):
|
||||
if x and x.endswith('%'):
|
||||
try:
|
||||
return float(x.rstrip('%'))
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
|
||||
def convert_width(tag_style):
|
||||
if tag_style is not None:
|
||||
w = tag_style._get('width')
|
||||
wp = as_percent(w)
|
||||
if w == 'auto':
|
||||
return ('auto', 0)
|
||||
elif wp is not None:
|
||||
return ('pct', int(wp * 50))
|
||||
else:
|
||||
try:
|
||||
return ('dxa', int(float(tag_style['width']) * 20))
|
||||
except Exception:
|
||||
pass
|
||||
return ('auto', 0)
|
||||
|
||||
|
||||
class Cell(object):
|
||||
|
||||
BLEVEL = 2
|
||||
|
||||
def __init__(self, row, html_tag, tag_style=None):
|
||||
self.row = row
|
||||
self.table = self.row.table
|
||||
self.html_tag = html_tag
|
||||
try:
|
||||
self.row_span = max(0, int(html_tag.get('rowspan', 1)))
|
||||
except Exception:
|
||||
self.row_span = 1
|
||||
try:
|
||||
self.col_span = max(0, int(html_tag.get('colspan', 1)))
|
||||
except Exception:
|
||||
self.col_span = 1
|
||||
if tag_style is None:
|
||||
self.valign = 'center'
|
||||
else:
|
||||
self.valign = {'top':'top', 'bottom':'bottom', 'middle':'center'}.get(tag_style._get('vertical-align'))
|
||||
self.items = []
|
||||
self.width = convert_width(tag_style)
|
||||
self.background_color = None if tag_style is None else convert_color(tag_style.backgroundColor)
|
||||
read_css_block_borders(self, tag_style)
|
||||
|
||||
def add_block(self, block):
|
||||
self.items.append(block)
|
||||
block.parent_items = self.items
|
||||
|
||||
def add_table(self, table):
|
||||
self.items.append(table)
|
||||
return table
|
||||
|
||||
def serialize(self, parent, makeelement):
|
||||
tc = makeelement(parent, 'w:tc')
|
||||
tcPr = makeelement(tc, 'w:tcPr')
|
||||
makeelement(tcPr, 'w:tcW', w_type=self.width[0], w_w=unicode_type(self.width[1]))
|
||||
# For some reason, Word 2007 refuses to honor <w:shd> at the table or row
|
||||
# level, despite what the specs say, so we inherit and apply at the
|
||||
# cell level
|
||||
bc = self.background_color or self.row.background_color or self.row.table.background_color
|
||||
if bc:
|
||||
makeelement(tcPr, 'w:shd', w_val="clear", w_color="auto", w_fill=bc)
|
||||
|
||||
b = makeelement(tcPr, 'w:tcBorders', append=False)
|
||||
for edge, border in iteritems(self.borders):
|
||||
if border is not None and border.width > 0 and border.style != 'none':
|
||||
makeelement(b, 'w:' + edge, w_val=border.style, w_sz=unicode_type(border.width), w_color=border.color)
|
||||
if len(b) > 0:
|
||||
tcPr.append(b)
|
||||
|
||||
m = makeelement(tcPr, 'w:tcMar', append=False)
|
||||
for edge in border_edges:
|
||||
padding = getattr(self, 'padding_' + edge)
|
||||
if edge in {'top', 'bottom'} or (edge == 'left' and self is self.row.first_cell) or (edge == 'right' and self is self.row.last_cell):
|
||||
padding += getattr(self.row, 'padding_' + edge)
|
||||
if padding > 0:
|
||||
makeelement(m, 'w:' + edge, w_type='dxa', w_w=unicode_type(int(padding * 20)))
|
||||
if len(m) > 0:
|
||||
tcPr.append(m)
|
||||
|
||||
if self.valign is not None:
|
||||
makeelement(tcPr, 'w:vAlign', w_val=self.valign)
|
||||
|
||||
if self.row_span > 1:
|
||||
makeelement(tcPr, 'w:vMerge', w_val='restart')
|
||||
if self.col_span > 1:
|
||||
makeelement(tcPr, 'w:hMerge', w_val='restart')
|
||||
|
||||
item = None
|
||||
for item in self.items:
|
||||
item.serialize(tc)
|
||||
if item is None or isinstance(item, Table):
|
||||
# Word 2007 requires the last element in a table cell to be a paragraph
|
||||
makeelement(tc, 'w:p')
|
||||
|
||||
def applicable_borders(self, edge):
|
||||
if edge == 'left':
|
||||
items = {self.table, self.row, self} if self.row.first_cell is self else {self}
|
||||
elif edge == 'top':
|
||||
items = ({self.table} if self.table.first_row is self.row else set()) | {self, self.row}
|
||||
elif edge == 'right':
|
||||
items = {self.table, self, self.row} if self.row.last_cell is self else {self}
|
||||
elif edge == 'bottom':
|
||||
items = ({self.table} if self.table.last_row is self.row else set()) | {self, self.row}
|
||||
return {getattr(x, 'border_' + edge) for x in items}
|
||||
|
||||
def resolve_border(self, edge):
|
||||
# In Word cell borders override table borders, and Word ignores row
|
||||
# borders, so we consolidate all borders as cell borders
|
||||
# In HTML the priority is as described here:
|
||||
# http://www.w3.org/TR/CSS21/tables.html#border-conflict-resolution
|
||||
neighbor = self.neighbor(edge)
|
||||
borders = self.applicable_borders(edge)
|
||||
if neighbor is not None:
|
||||
nedge = {'left':'right', 'top':'bottom', 'right':'left', 'bottom':'top'}[edge]
|
||||
borders |= neighbor.applicable_borders(nedge)
|
||||
|
||||
for b in borders:
|
||||
if b.css_style == 'hidden':
|
||||
return None
|
||||
|
||||
def weight(border):
|
||||
return (
|
||||
0 if border.css_style == 'none' else 1,
|
||||
border.width,
|
||||
border_style_weight.get(border.css_style, 0),
|
||||
border.level)
|
||||
border = sorted(borders, key=weight)[-1]
|
||||
return border
|
||||
|
||||
def resolve_borders(self):
|
||||
self.borders = {edge:self.resolve_border(edge) for edge in border_edges}
|
||||
|
||||
def neighbor(self, edge):
|
||||
idx = self.row.cells.index(self)
|
||||
ans = None
|
||||
if edge == 'left':
|
||||
ans = self.row.cells[idx-1] if idx > 0 else None
|
||||
elif edge == 'right':
|
||||
ans = self.row.cells[idx+1] if (idx + 1) < len(self.row.cells) else None
|
||||
elif edge == 'top':
|
||||
ridx = self.table.rows.index(self.row)
|
||||
if ridx > 0 and idx < len(self.table.rows[ridx-1].cells):
|
||||
ans = self.table.rows[ridx-1].cells[idx]
|
||||
elif edge == 'bottom':
|
||||
ridx = self.table.rows.index(self.row)
|
||||
if ridx + 1 < len(self.table.rows) and idx < len(self.table.rows[ridx+1].cells):
|
||||
ans = self.table.rows[ridx+1].cells[idx]
|
||||
return getattr(ans, 'spanning_cell', ans)
|
||||
|
||||
|
||||
class Row(object):
|
||||
|
||||
BLEVEL = 1
|
||||
|
||||
def __init__(self, table, html_tag, tag_style=None):
|
||||
self.table = table
|
||||
self.html_tag = html_tag
|
||||
self.orig_tag_style = tag_style
|
||||
self.cells = []
|
||||
self.current_cell = None
|
||||
self.background_color = None if tag_style is None else convert_color(tag_style.backgroundColor)
|
||||
read_css_block_borders(self, tag_style)
|
||||
|
||||
@property
|
||||
def first_cell(self):
|
||||
return self.cells[0] if self.cells else None
|
||||
|
||||
@property
|
||||
def last_cell(self):
|
||||
return self.cells[-1] if self.cells else None
|
||||
|
||||
def start_new_cell(self, html_tag, tag_style):
|
||||
self.current_cell = Cell(self, html_tag, tag_style)
|
||||
|
||||
def finish_tag(self, html_tag):
|
||||
if self.current_cell is not None:
|
||||
if html_tag is self.current_cell.html_tag:
|
||||
self.cells.append(self.current_cell)
|
||||
self.current_cell = None
|
||||
|
||||
def add_block(self, block):
|
||||
if self.current_cell is None:
|
||||
self.start_new_cell(self.html_tag, self.orig_tag_style)
|
||||
self.current_cell.add_block(block)
|
||||
|
||||
def add_table(self, table):
|
||||
if self.current_cell is None:
|
||||
self.current_cell = Cell(self, self.html_tag, self.orig_tag_style)
|
||||
return self.current_cell.add_table(table)
|
||||
|
||||
def serialize(self, parent, makeelement):
|
||||
tr = makeelement(parent, 'w:tr')
|
||||
for cell in self.cells:
|
||||
cell.serialize(tr, makeelement)
|
||||
|
||||
|
||||
class Table(object):
|
||||
|
||||
BLEVEL = 0
|
||||
|
||||
def __init__(self, namespace, html_tag, tag_style=None):
|
||||
self.namespace = namespace
|
||||
self.html_tag = html_tag
|
||||
self.orig_tag_style = tag_style
|
||||
self.rows = []
|
||||
self.current_row = None
|
||||
self.width = convert_width(tag_style)
|
||||
self.background_color = None if tag_style is None else convert_color(tag_style.backgroundColor)
|
||||
self.jc = None
|
||||
self.float = None
|
||||
self.margin_left = self.margin_right = self.margin_top = self.margin_bottom = None
|
||||
if tag_style is not None:
|
||||
ml, mr = tag_style._get('margin-left'), tag_style.get('margin-right')
|
||||
if ml == 'auto':
|
||||
self.jc = 'center' if mr == 'auto' else 'right'
|
||||
self.float = tag_style['float']
|
||||
for edge in border_edges:
|
||||
setattr(self, 'margin_' + edge, tag_style['margin-' + edge])
|
||||
read_css_block_borders(self, tag_style)
|
||||
|
||||
@property
|
||||
def first_row(self):
|
||||
return self.rows[0] if self.rows else None
|
||||
|
||||
@property
|
||||
def last_row(self):
|
||||
return self.rows[-1] if self.rows else None
|
||||
|
||||
def finish_tag(self, html_tag):
|
||||
if self.current_row is not None:
|
||||
self.current_row.finish_tag(html_tag)
|
||||
if self.current_row.html_tag is html_tag:
|
||||
self.rows.append(self.current_row)
|
||||
self.current_row = None
|
||||
table_ended = self.html_tag is html_tag
|
||||
if table_ended:
|
||||
self.expand_spanned_cells()
|
||||
for row in self.rows:
|
||||
for cell in row.cells:
|
||||
cell.resolve_borders()
|
||||
return table_ended
|
||||
|
||||
def expand_spanned_cells(self):
|
||||
# Expand horizontally
|
||||
for row in self.rows:
|
||||
for cell in tuple(row.cells):
|
||||
idx = row.cells.index(cell)
|
||||
if cell.col_span > 1 and (cell is row.cells[-1] or not isinstance(row.cells[idx+1], SpannedCell)):
|
||||
row.cells[idx:idx+1] = [cell] + [SpannedCell(cell, horizontal=True) for i in range(1, cell.col_span)]
|
||||
|
||||
# Expand vertically
|
||||
for r, row in enumerate(self.rows):
|
||||
for idx, cell in enumerate(row.cells):
|
||||
if cell.row_span > 1:
|
||||
for nrow in self.rows[r+1:]:
|
||||
sc = SpannedCell(cell, horizontal=False)
|
||||
try:
|
||||
tcell = nrow.cells[idx]
|
||||
except Exception:
|
||||
tcell = None
|
||||
if tcell is None:
|
||||
nrow.cells.extend([SpannedCell(nrow.cells[-1], horizontal=True) for i in range(idx - len(nrow.cells))])
|
||||
nrow.cells.append(sc)
|
||||
else:
|
||||
if isinstance(tcell, SpannedCell):
|
||||
# Conflict between rowspan and colspan
|
||||
break
|
||||
else:
|
||||
nrow.cells.insert(idx, sc)
|
||||
|
||||
def start_new_row(self, html_tag, html_style):
|
||||
if self.current_row is not None:
|
||||
self.rows.append(self.current_row)
|
||||
self.current_row = Row(self, html_tag, html_style)
|
||||
|
||||
def start_new_cell(self, html_tag, html_style):
|
||||
if self.current_row is None:
|
||||
self.start_new_row(html_tag, None)
|
||||
self.current_row.start_new_cell(html_tag, html_style)
|
||||
|
||||
def add_block(self, block):
|
||||
self.current_row.add_block(block)
|
||||
|
||||
def add_table(self, table):
|
||||
if self.current_row is None:
|
||||
self.current_row = Row(self, self.html_tag, self.orig_tag_style)
|
||||
return self.current_row.add_table(table)
|
||||
|
||||
def serialize(self, parent):
|
||||
makeelement = self.namespace.makeelement
|
||||
rows = [r for r in self.rows if r.cells]
|
||||
if not rows:
|
||||
return
|
||||
tbl = makeelement(parent, 'w:tbl')
|
||||
tblPr = makeelement(tbl, 'w:tblPr')
|
||||
makeelement(tblPr, 'w:tblW', w_type=self.width[0], w_w=unicode_type(self.width[1]))
|
||||
if self.float in {'left', 'right'}:
|
||||
kw = {'w_vertAnchor':'text', 'w_horzAnchor':'text', 'w_tblpXSpec':self.float}
|
||||
for edge in border_edges:
|
||||
val = getattr(self, 'margin_' + edge) or 0
|
||||
if {self.float, edge} == {'left', 'right'}:
|
||||
val = max(val, 2)
|
||||
kw['w_' + edge + 'FromText'] = unicode_type(max(0, int(val *20)))
|
||||
makeelement(tblPr, 'w:tblpPr', **kw)
|
||||
if self.jc is not None:
|
||||
makeelement(tblPr, 'w:jc', w_val=self.jc)
|
||||
for row in rows:
|
||||
row.serialize(tbl, makeelement)
|
||||
58
ebook_converter/ebooks/docx/writer/utils.py
Normal file
58
ebook_converter/ebooks/docx/writer/utils.py
Normal file
@@ -0,0 +1,58 @@
|
||||
#!/usr/bin/env python2
|
||||
# vim:fileencoding=utf-8
|
||||
from __future__ import absolute_import, division, print_function, unicode_literals
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2013, Kovid Goyal <kovid at kovidgoyal.net>'
|
||||
|
||||
from tinycss.color3 import parse_color_string
|
||||
|
||||
|
||||
def int_or_zero(raw):
|
||||
try:
|
||||
return int(raw)
|
||||
except (ValueError, TypeError, AttributeError):
|
||||
return 0
|
||||
|
||||
# convert_color() {{{
|
||||
|
||||
|
||||
def convert_color(value):
|
||||
if not value:
|
||||
return
|
||||
if value.lower() == 'currentcolor':
|
||||
return 'auto'
|
||||
val = parse_color_string(value)
|
||||
if val is None:
|
||||
return
|
||||
if val.alpha < 0.01:
|
||||
return
|
||||
return '%02X%02X%02X' % (int(val.red * 255), int(val.green * 255), int(val.blue * 255))
|
||||
|
||||
|
||||
def test_convert_color(return_tests=False):
|
||||
import unittest
|
||||
|
||||
class TestColors(unittest.TestCase):
|
||||
|
||||
def test_color_conversion(self):
|
||||
ae = self.assertEqual
|
||||
cc = convert_color
|
||||
ae(None, cc(None))
|
||||
ae(None, cc('transparent'))
|
||||
ae(None, cc('none'))
|
||||
ae(None, cc('#12j456'))
|
||||
ae('auto', cc('currentColor'))
|
||||
ae('F0F8FF', cc('AliceBlue'))
|
||||
ae('000000', cc('black'))
|
||||
ae('FF0000', cc('red'))
|
||||
ae('00FF00', cc('lime'))
|
||||
ae(cc('#001'), '000011')
|
||||
ae('12345D', cc('#12345d'))
|
||||
ae('FFFFFF', cc('rgb(255, 255, 255)'))
|
||||
ae('FF0000', cc('rgba(255, 0, 0, 23)'))
|
||||
tests = unittest.defaultTestLoader.loadTestsFromTestCase(TestColors)
|
||||
if return_tests:
|
||||
return tests
|
||||
unittest.TextTestRunner(verbosity=4).run(tests)
|
||||
# }}}
|
||||
316
ebook_converter/ebooks/oeb/transforms/subset.py
Normal file
316
ebook_converter/ebooks/oeb/transforms/subset.py
Normal file
@@ -0,0 +1,316 @@
|
||||
#!/usr/bin/env python2
|
||||
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:fdm=marker:ai
|
||||
from __future__ import absolute_import, division, print_function, unicode_literals
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2012, Kovid Goyal <kovid at kovidgoyal.net>'
|
||||
__docformat__ = 'restructuredtext en'
|
||||
|
||||
from collections import defaultdict
|
||||
|
||||
from calibre.ebooks.oeb.base import urlnormalize, css_text
|
||||
from calibre.utils.fonts.sfnt.subset import subset, NoGlyphs, UnsupportedFont
|
||||
from polyglot.builtins import iteritems, itervalues, unicode_type, range
|
||||
from tinycss.fonts3 import parse_font_family
|
||||
|
||||
|
||||
def get_font_properties(rule, default=None):
|
||||
'''
|
||||
Given a CSS rule, extract normalized font properties from
|
||||
it. Note that shorthand font property should already have been expanded
|
||||
by the CSS flattening code.
|
||||
'''
|
||||
props = {}
|
||||
s = rule.style
|
||||
for q in ('font-family', 'src', 'font-weight', 'font-stretch',
|
||||
'font-style'):
|
||||
g = 'uri' if q == 'src' else 'value'
|
||||
try:
|
||||
val = s.getProperty(q).propertyValue[0]
|
||||
val = getattr(val, g)
|
||||
if q == 'font-family':
|
||||
val = parse_font_family(css_text(s.getProperty(q).propertyValue))
|
||||
if val and val[0] == 'inherit':
|
||||
val = None
|
||||
except (IndexError, KeyError, AttributeError, TypeError, ValueError):
|
||||
val = None if q in {'src', 'font-family'} else default
|
||||
if q in {'font-weight', 'font-stretch', 'font-style'}:
|
||||
val = unicode_type(val).lower() if (val or val == 0) else val
|
||||
if val == 'inherit':
|
||||
val = default
|
||||
if q == 'font-weight':
|
||||
val = {'normal':'400', 'bold':'700'}.get(val, val)
|
||||
if val not in {'100', '200', '300', '400', '500', '600', '700',
|
||||
'800', '900', 'bolder', 'lighter'}:
|
||||
val = default
|
||||
if val == 'normal':
|
||||
val = '400'
|
||||
elif q == 'font-style':
|
||||
if val not in {'normal', 'italic', 'oblique'}:
|
||||
val = default
|
||||
elif q == 'font-stretch':
|
||||
if val not in {'normal', 'ultra-condensed', 'extra-condensed',
|
||||
'condensed', 'semi-condensed', 'semi-expanded',
|
||||
'expanded', 'extra-expanded', 'ultra-expanded'}:
|
||||
val = default
|
||||
props[q] = val
|
||||
return props
|
||||
|
||||
|
||||
def find_font_face_rules(sheet, oeb):
|
||||
'''
|
||||
Find all @font-face rules in the given sheet and extract the relevant info from them.
|
||||
sheet can be either a ManifestItem or a CSSStyleSheet.
|
||||
'''
|
||||
ans = []
|
||||
try:
|
||||
rules = sheet.data.cssRules
|
||||
except AttributeError:
|
||||
rules = sheet.cssRules
|
||||
|
||||
for i, rule in enumerate(rules):
|
||||
if rule.type != rule.FONT_FACE_RULE:
|
||||
continue
|
||||
props = get_font_properties(rule, default='normal')
|
||||
if not props['font-family'] or not props['src']:
|
||||
continue
|
||||
|
||||
try:
|
||||
path = sheet.abshref(props['src'])
|
||||
except AttributeError:
|
||||
path = props['src']
|
||||
ff = oeb.manifest.hrefs.get(urlnormalize(path), None)
|
||||
if not ff:
|
||||
continue
|
||||
props['item'] = ff
|
||||
if props['font-weight'] in {'bolder', 'lighter'}:
|
||||
props['font-weight'] = '400'
|
||||
props['weight'] = int(props['font-weight'])
|
||||
props['rule'] = rule
|
||||
props['chars'] = set()
|
||||
ans.append(props)
|
||||
|
||||
return ans
|
||||
|
||||
|
||||
def elem_style(style_rules, cls, inherited_style):
|
||||
'''
|
||||
Find the effective style for the given element.
|
||||
'''
|
||||
classes = cls.split()
|
||||
style = inherited_style.copy()
|
||||
for cls in classes:
|
||||
style.update(style_rules.get(cls, {}))
|
||||
wt = style.get('font-weight', None)
|
||||
pwt = inherited_style.get('font-weight', '400')
|
||||
if wt == 'bolder':
|
||||
style['font-weight'] = {
|
||||
'100':'400',
|
||||
'200':'400',
|
||||
'300':'400',
|
||||
'400':'700',
|
||||
'500':'700',
|
||||
}.get(pwt, '900')
|
||||
elif wt == 'lighter':
|
||||
style['font-weight'] = {
|
||||
'600':'400', '700':'400',
|
||||
'800':'700', '900':'700'}.get(pwt, '100')
|
||||
|
||||
return style
|
||||
|
||||
|
||||
class SubsetFonts(object):
|
||||
|
||||
'''
|
||||
Subset all embedded fonts. Must be run after CSS flattening, as it requires
|
||||
CSS normalization and flattening to work.
|
||||
'''
|
||||
|
||||
def __call__(self, oeb, log, opts):
|
||||
self.oeb, self.log, self.opts = oeb, log, opts
|
||||
|
||||
self.find_embedded_fonts()
|
||||
if not self.embedded_fonts:
|
||||
self.log.debug('No embedded fonts found')
|
||||
return
|
||||
self.find_style_rules()
|
||||
self.find_font_usage()
|
||||
|
||||
totals = [0, 0]
|
||||
|
||||
def remove(font):
|
||||
totals[1] += len(font['item'].data)
|
||||
self.oeb.manifest.remove(font['item'])
|
||||
font['rule'].parentStyleSheet.deleteRule(font['rule'])
|
||||
|
||||
fonts = {}
|
||||
for font in self.embedded_fonts:
|
||||
item, chars = font['item'], font['chars']
|
||||
if item.href in fonts:
|
||||
fonts[item.href]['chars'] |= chars
|
||||
else:
|
||||
fonts[item.href] = font
|
||||
|
||||
for font in itervalues(fonts):
|
||||
if not font['chars']:
|
||||
self.log('The font %s is unused. Removing it.'%font['src'])
|
||||
remove(font)
|
||||
continue
|
||||
try:
|
||||
raw, old_stats, new_stats = subset(font['item'].data, font['chars'])
|
||||
except NoGlyphs:
|
||||
self.log('The font %s has no used glyphs. Removing it.'%font['src'])
|
||||
remove(font)
|
||||
continue
|
||||
except UnsupportedFont as e:
|
||||
self.log.warn('The font %s is unsupported for subsetting. %s'%(
|
||||
font['src'], e))
|
||||
sz = len(font['item'].data)
|
||||
totals[0] += sz
|
||||
totals[1] += sz
|
||||
else:
|
||||
font['item'].data = raw
|
||||
nlen = sum(itervalues(new_stats))
|
||||
olen = sum(itervalues(old_stats))
|
||||
self.log('Decreased the font %s to %.1f%% of its original size'%
|
||||
(font['src'], nlen/olen *100))
|
||||
totals[0] += nlen
|
||||
totals[1] += olen
|
||||
|
||||
font['item'].unload_data_from_memory()
|
||||
|
||||
if totals[0]:
|
||||
self.log('Reduced total font size to %.1f%% of original'%
|
||||
(totals[0]/totals[1] * 100))
|
||||
|
||||
def find_embedded_fonts(self):
|
||||
'''
|
||||
Find all @font-face rules and extract the relevant info from them.
|
||||
'''
|
||||
self.embedded_fonts = []
|
||||
for item in self.oeb.manifest:
|
||||
if not hasattr(item.data, 'cssRules'):
|
||||
continue
|
||||
self.embedded_fonts.extend(find_font_face_rules(item, self.oeb))
|
||||
|
||||
def find_style_rules(self):
|
||||
'''
|
||||
Extract all font related style information from all stylesheets into a
|
||||
dict mapping classes to font properties specified by that class. All
|
||||
the heavy lifting has already been done by the CSS flattening code.
|
||||
'''
|
||||
rules = defaultdict(dict)
|
||||
for item in self.oeb.manifest:
|
||||
if not hasattr(item.data, 'cssRules'):
|
||||
continue
|
||||
for i, rule in enumerate(item.data.cssRules):
|
||||
if rule.type != rule.STYLE_RULE:
|
||||
continue
|
||||
props = {k:v for k,v in
|
||||
iteritems(get_font_properties(rule)) if v}
|
||||
if not props:
|
||||
continue
|
||||
for sel in rule.selectorList:
|
||||
sel = sel.selectorText
|
||||
if sel and sel.startswith('.'):
|
||||
# We dont care about pseudo-selectors as the worst that
|
||||
# can happen is some extra characters will remain in
|
||||
# the font
|
||||
sel = sel.partition(':')[0]
|
||||
rules[sel[1:]].update(props)
|
||||
|
||||
self.style_rules = dict(rules)
|
||||
|
||||
def find_font_usage(self):
|
||||
for item in self.oeb.manifest:
|
||||
if not hasattr(item.data, 'xpath'):
|
||||
continue
|
||||
for body in item.data.xpath('//*[local-name()="body"]'):
|
||||
base = {'font-family':['serif'], 'font-weight': '400',
|
||||
'font-style':'normal', 'font-stretch':'normal'}
|
||||
self.find_usage_in(body, base)
|
||||
|
||||
def used_font(self, style):
|
||||
'''
|
||||
Given a style find the embedded font that matches it. Returns None if
|
||||
no match is found (can happen if no family matches).
|
||||
'''
|
||||
ff = style.get('font-family', [])
|
||||
lnames = {unicode_type(x).lower() for x in ff}
|
||||
matching_set = []
|
||||
|
||||
# Filter on font-family
|
||||
for ef in self.embedded_fonts:
|
||||
flnames = {x.lower() for x in ef.get('font-family', [])}
|
||||
if not lnames.intersection(flnames):
|
||||
continue
|
||||
matching_set.append(ef)
|
||||
if not matching_set:
|
||||
return None
|
||||
|
||||
# Filter on font-stretch
|
||||
widths = {x:i for i, x in enumerate(('ultra-condensed',
|
||||
'extra-condensed', 'condensed', 'semi-condensed', 'normal',
|
||||
'semi-expanded', 'expanded', 'extra-expanded', 'ultra-expanded'
|
||||
))}
|
||||
|
||||
width = widths[style.get('font-stretch', 'normal')]
|
||||
for f in matching_set:
|
||||
f['width'] = widths[style.get('font-stretch', 'normal')]
|
||||
|
||||
min_dist = min(abs(width-f['width']) for f in matching_set)
|
||||
nearest = [f for f in matching_set if abs(width-f['width']) ==
|
||||
min_dist]
|
||||
if width <= 4:
|
||||
lmatches = [f for f in nearest if f['width'] <= width]
|
||||
else:
|
||||
lmatches = [f for f in nearest if f['width'] >= width]
|
||||
matching_set = (lmatches or nearest)
|
||||
|
||||
# Filter on font-style
|
||||
fs = style.get('font-style', 'normal')
|
||||
order = {
|
||||
'oblique':['oblique', 'italic', 'normal'],
|
||||
'normal':['normal', 'oblique', 'italic']
|
||||
}.get(fs, ['italic', 'oblique', 'normal'])
|
||||
for q in order:
|
||||
matches = [f for f in matching_set if f.get('font-style', 'normal') == q]
|
||||
if matches:
|
||||
matching_set = matches
|
||||
break
|
||||
|
||||
# Filter on font weight
|
||||
fw = int(style.get('font-weight', '400'))
|
||||
if fw == 400:
|
||||
q = [400, 500, 300, 200, 100, 600, 700, 800, 900]
|
||||
elif fw == 500:
|
||||
q = [500, 400, 300, 200, 100, 600, 700, 800, 900]
|
||||
elif fw < 400:
|
||||
q = [fw] + list(range(fw-100, -100, -100)) + list(range(fw+100,
|
||||
100, 1000))
|
||||
else:
|
||||
q = [fw] + list(range(fw+100, 100, 1000)) + list(range(fw-100,
|
||||
-100, -100))
|
||||
for wt in q:
|
||||
matches = [f for f in matching_set if f['weight'] == wt]
|
||||
if matches:
|
||||
return matches[0]
|
||||
|
||||
def find_chars(self, elem):
|
||||
ans = set()
|
||||
if elem.text:
|
||||
ans |= set(elem.text)
|
||||
for child in elem:
|
||||
if child.tail:
|
||||
ans |= set(child.tail)
|
||||
return ans
|
||||
|
||||
def find_usage_in(self, elem, inherited_style):
|
||||
style = elem_style(self.style_rules, elem.get('class', '') or '', inherited_style)
|
||||
for child in elem:
|
||||
self.find_usage_in(child, style)
|
||||
font = self.used_font(style)
|
||||
if font:
|
||||
chars = self.find_chars(elem)
|
||||
if chars:
|
||||
font['chars'] |= chars
|
||||
10
ebook_converter/ebooks/pdf/render/__init__.py
Normal file
10
ebook_converter/ebooks/pdf/render/__init__.py
Normal file
@@ -0,0 +1,10 @@
|
||||
#!/usr/bin/env python2
|
||||
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:fdm=marker:ai
|
||||
from __future__ import absolute_import, division, print_function, unicode_literals
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2012, Kovid Goyal <kovid at kovidgoyal.net>'
|
||||
__docformat__ = 'restructuredtext en'
|
||||
|
||||
|
||||
|
||||
247
ebook_converter/ebooks/pdf/render/common.py
Normal file
247
ebook_converter/ebooks/pdf/render/common.py
Normal file
@@ -0,0 +1,247 @@
|
||||
#!/usr/bin/env python2
|
||||
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:fdm=marker:ai
|
||||
from __future__ import absolute_import, division, print_function, unicode_literals
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2012, Kovid Goyal <kovid at kovidgoyal.net>'
|
||||
__docformat__ = 'restructuredtext en'
|
||||
|
||||
import codecs, zlib, numbers
|
||||
from io import BytesIO
|
||||
from datetime import datetime
|
||||
|
||||
from calibre.constants import plugins, ispy3
|
||||
from calibre.utils.logging import default_log
|
||||
from polyglot.builtins import iteritems, unicode_type, codepoint_to_chr
|
||||
from polyglot.binary import as_hex_bytes
|
||||
|
||||
pdf_float = plugins['speedup'][0].pdf_float
|
||||
|
||||
EOL = b'\n'
|
||||
|
||||
# Sizes {{{
|
||||
inch = 72.0
|
||||
cm = inch / 2.54
|
||||
mm = cm * 0.1
|
||||
pica = 12.0
|
||||
didot = 0.375 * mm
|
||||
cicero = 12 * didot
|
||||
|
||||
_W, _H = (21*cm, 29.7*cm)
|
||||
|
||||
A6 = (_W*.5, _H*.5)
|
||||
A5 = (_H*.5, _W)
|
||||
A4 = (_W, _H)
|
||||
A3 = (_H, _W*2)
|
||||
A2 = (_W*2, _H*2)
|
||||
A1 = (_H*2, _W*4)
|
||||
A0 = (_W*4, _H*4)
|
||||
|
||||
LETTER = (8.5*inch, 11*inch)
|
||||
LEGAL = (8.5*inch, 14*inch)
|
||||
ELEVENSEVENTEEN = (11*inch, 17*inch)
|
||||
|
||||
_BW, _BH = (25*cm, 35.3*cm)
|
||||
B6 = (_BW*.5, _BH*.5)
|
||||
B5 = (_BH*.5, _BW)
|
||||
B4 = (_BW, _BH)
|
||||
B3 = (_BH*2, _BW)
|
||||
B2 = (_BW*2, _BH*2)
|
||||
B1 = (_BH*4, _BW*2)
|
||||
B0 = (_BW*4, _BH*4)
|
||||
|
||||
PAPER_SIZES = {k:globals()[k.upper()] for k in ('a0 a1 a2 a3 a4 a5 a6 b0 b1 b2'
|
||||
' b3 b4 b5 b6 letter legal').split()}
|
||||
|
||||
# }}}
|
||||
|
||||
|
||||
def fmtnum(o):
|
||||
if isinstance(o, float):
|
||||
return pdf_float(o)
|
||||
return unicode_type(o)
|
||||
|
||||
|
||||
def serialize(o, stream):
|
||||
if isinstance(o, float):
|
||||
stream.write_raw(pdf_float(o).encode('ascii'))
|
||||
elif isinstance(o, bool):
|
||||
# Must check bool before int as bools are subclasses of int
|
||||
stream.write_raw(b'true' if o else b'false')
|
||||
elif isinstance(o, numbers.Integral):
|
||||
stream.write_raw(unicode_type(o).encode('ascii') if ispy3 else bytes(o))
|
||||
elif hasattr(o, 'pdf_serialize'):
|
||||
o.pdf_serialize(stream)
|
||||
elif o is None:
|
||||
stream.write_raw(b'null')
|
||||
elif isinstance(o, datetime):
|
||||
val = o.strftime("D:%Y%m%d%H%M%%02d%z")%min(59, o.second)
|
||||
if datetime.tzinfo is not None:
|
||||
val = "(%s'%s')"%(val[:-2], val[-2:])
|
||||
stream.write(val.encode('ascii'))
|
||||
else:
|
||||
raise ValueError('Unknown object: %r'%o)
|
||||
|
||||
|
||||
class Name(unicode_type):
|
||||
|
||||
def pdf_serialize(self, stream):
|
||||
raw = self.encode('ascii')
|
||||
if len(raw) > 126:
|
||||
raise ValueError('Name too long: %r'%self)
|
||||
raw = bytearray(raw)
|
||||
sharp = ord(b'#')
|
||||
buf = (
|
||||
codepoint_to_chr(x).encode('ascii') if 33 < x < 126 and x != sharp else
|
||||
'#{:x}'.format(x).encode('ascii') for x in raw)
|
||||
stream.write(b'/'+b''.join(buf))
|
||||
|
||||
|
||||
def escape_pdf_string(bytestring):
|
||||
indices = []
|
||||
bad = []
|
||||
ba = bytearray(bytestring)
|
||||
bad_map = {10:ord('n'), 13:ord('r'), 12:ord('f'), 8:ord('b'), 9:ord('\t'), 92:ord('\\')}
|
||||
for i, num in enumerate(ba):
|
||||
if num == 40: # (
|
||||
indices.append((i, 40))
|
||||
elif num == 41: # )
|
||||
if indices:
|
||||
indices.pop()
|
||||
else:
|
||||
bad.append((i, 41))
|
||||
elif num in bad_map: # '\n\r\f\b\t\\' see Table 3.2 in PDF 1.7 spec
|
||||
bad.append((i, bad_map[num]))
|
||||
bad = sorted(indices + bad, reverse=True)
|
||||
if not bad:
|
||||
return bytestring
|
||||
for i, repl in bad:
|
||||
ba[i:i+1] = (92, repl) # 92 = ord('\')
|
||||
return bytes(ba)
|
||||
|
||||
|
||||
class String(unicode_type):
|
||||
|
||||
def pdf_serialize(self, stream):
|
||||
try:
|
||||
raw = self.encode('latin1')
|
||||
if raw.startswith(codecs.BOM_UTF16_BE):
|
||||
raw = codecs.BOM_UTF16_BE + self.encode('utf-16-be')
|
||||
except UnicodeEncodeError:
|
||||
raw = codecs.BOM_UTF16_BE + self.encode('utf-16-be')
|
||||
stream.write(b'('+escape_pdf_string(raw)+b')')
|
||||
|
||||
|
||||
class UTF16String(unicode_type):
|
||||
|
||||
def pdf_serialize(self, stream):
|
||||
raw = codecs.BOM_UTF16_BE + self.encode('utf-16-be')
|
||||
if False:
|
||||
# Disabled as the parentheses based strings give easier to debug
|
||||
# PDF files
|
||||
stream.write(b'<' + as_hex_bytes(raw) + b'>')
|
||||
else:
|
||||
stream.write(b'('+escape_pdf_string(raw)+b')')
|
||||
|
||||
|
||||
class Dictionary(dict):
|
||||
|
||||
def pdf_serialize(self, stream):
|
||||
stream.write(b'<<' + EOL)
|
||||
sorted_keys = sorted(self,
|
||||
key=lambda x:({'Type':'1', 'Subtype':'2'}.get(
|
||||
x, x)+x))
|
||||
for k in sorted_keys:
|
||||
serialize(Name(k), stream)
|
||||
stream.write(b' ')
|
||||
serialize(self[k], stream)
|
||||
stream.write(EOL)
|
||||
stream.write(b'>>' + EOL)
|
||||
|
||||
|
||||
class InlineDictionary(Dictionary):
|
||||
|
||||
def pdf_serialize(self, stream):
|
||||
stream.write(b'<< ')
|
||||
for k, v in iteritems(self):
|
||||
serialize(Name(k), stream)
|
||||
stream.write(b' ')
|
||||
serialize(v, stream)
|
||||
stream.write(b' ')
|
||||
stream.write(b'>>')
|
||||
|
||||
|
||||
class Array(list):
|
||||
|
||||
def pdf_serialize(self, stream):
|
||||
stream.write(b'[')
|
||||
for i, o in enumerate(self):
|
||||
if i != 0:
|
||||
stream.write(b' ')
|
||||
serialize(o, stream)
|
||||
stream.write(b']')
|
||||
|
||||
|
||||
class Stream(BytesIO):
|
||||
|
||||
def __init__(self, compress=False):
|
||||
BytesIO.__init__(self)
|
||||
self.compress = compress
|
||||
self.filters = Array()
|
||||
|
||||
def add_extra_keys(self, d):
|
||||
pass
|
||||
|
||||
def pdf_serialize(self, stream):
|
||||
raw = self.getvalue()
|
||||
dl = len(raw)
|
||||
filters = self.filters
|
||||
if self.compress:
|
||||
filters.append(Name('FlateDecode'))
|
||||
raw = zlib.compress(raw)
|
||||
|
||||
d = InlineDictionary({'Length':len(raw), 'DL':dl})
|
||||
self.add_extra_keys(d)
|
||||
if filters:
|
||||
d['Filter'] = filters
|
||||
serialize(d, stream)
|
||||
stream.write(EOL+b'stream'+EOL)
|
||||
stream.write(raw)
|
||||
stream.write(EOL+b'endstream'+EOL)
|
||||
|
||||
def write_line(self, raw=b''):
|
||||
self.write(raw if isinstance(raw, bytes) else raw.encode('ascii'))
|
||||
self.write(EOL)
|
||||
|
||||
def write(self, raw):
|
||||
super(Stream, self).write(raw if isinstance(raw, bytes) else
|
||||
raw.encode('ascii'))
|
||||
|
||||
def write_raw(self, raw):
|
||||
BytesIO.write(self, raw)
|
||||
|
||||
|
||||
class Reference(object):
|
||||
|
||||
def __init__(self, num, obj):
|
||||
self.num, self.obj = num, obj
|
||||
|
||||
def pdf_serialize(self, stream):
|
||||
raw = '%d 0 R'%self.num
|
||||
stream.write(raw.encode('ascii'))
|
||||
|
||||
def __repr__(self):
|
||||
return '%d 0 R'%self.num
|
||||
|
||||
def __str__(self):
|
||||
return repr(self)
|
||||
# }}}
|
||||
|
||||
|
||||
def current_log(newlog=None):
|
||||
if newlog:
|
||||
current_log.ans = newlog
|
||||
return current_log.ans or default_log
|
||||
|
||||
|
||||
current_log.ans = None
|
||||
80
ebook_converter/utils/fonts/sfnt/__init__.py
Normal file
80
ebook_converter/utils/fonts/sfnt/__init__.py
Normal file
@@ -0,0 +1,80 @@
|
||||
#!/usr/bin/env python2
|
||||
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:fdm=marker:ai
|
||||
from __future__ import absolute_import, division, print_function, unicode_literals
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2012, Kovid Goyal <kovid at kovidgoyal.net>'
|
||||
__docformat__ = 'restructuredtext en'
|
||||
|
||||
from datetime import datetime, timedelta
|
||||
|
||||
|
||||
def align_block(raw, multiple=4, pad=b'\0'):
|
||||
'''
|
||||
Return raw with enough pad bytes append to ensure its length is a multiple
|
||||
of 4.
|
||||
'''
|
||||
extra = len(raw) % multiple
|
||||
if extra == 0:
|
||||
return raw
|
||||
return raw + pad*(multiple - extra)
|
||||
|
||||
|
||||
class UnknownTable(object):
|
||||
|
||||
def __init__(self, raw):
|
||||
self.raw = raw
|
||||
|
||||
def __call__(self):
|
||||
return self.raw
|
||||
|
||||
def __len__(self):
|
||||
return len(self.raw)
|
||||
|
||||
|
||||
class DateTimeProperty(object):
|
||||
|
||||
def __init__(self, name):
|
||||
self.name = name
|
||||
|
||||
def __get__(self, obj, type=None):
|
||||
return datetime(1904, 1, 1) + timedelta(seconds=getattr(obj,
|
||||
self.name))
|
||||
|
||||
def __set__(self, obj, val):
|
||||
td = val - datetime(1904, 1, 1)
|
||||
setattr(obj, self.name, int(td.total_seconds()))
|
||||
|
||||
|
||||
class FixedProperty(object):
|
||||
|
||||
def __init__(self, name):
|
||||
self.name = name
|
||||
|
||||
def __get__(self, obj, type=None):
|
||||
val = getattr(obj, self.name)
|
||||
return val / 0x10000
|
||||
|
||||
def __set__(self, obj, val):
|
||||
return int(round(val*(0x10000)))
|
||||
|
||||
|
||||
def max_power_of_two(x):
|
||||
"""
|
||||
Return the highest exponent of two, so that
|
||||
(2 ** exponent) <= x
|
||||
"""
|
||||
exponent = 0
|
||||
while x:
|
||||
x = x >> 1
|
||||
exponent += 1
|
||||
return max(exponent - 1, 0)
|
||||
|
||||
|
||||
def load_font(stream_or_path):
|
||||
raw = stream_or_path
|
||||
if hasattr(raw, 'read'):
|
||||
raw = raw.read()
|
||||
from calibre.utils.fonts.sfnt.container import Sfnt
|
||||
return Sfnt(raw)
|
||||
|
||||
10
ebook_converter/utils/fonts/sfnt/cff/__init__.py
Normal file
10
ebook_converter/utils/fonts/sfnt/cff/__init__.py
Normal file
@@ -0,0 +1,10 @@
|
||||
#!/usr/bin/env python2
|
||||
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:fdm=marker:ai
|
||||
from __future__ import absolute_import, division, print_function, unicode_literals
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2012, Kovid Goyal <kovid at kovidgoyal.net>'
|
||||
__docformat__ = 'restructuredtext en'
|
||||
|
||||
|
||||
|
||||
182
ebook_converter/utils/fonts/sfnt/cff/constants.py
Normal file
182
ebook_converter/utils/fonts/sfnt/cff/constants.py
Normal file
@@ -0,0 +1,182 @@
|
||||
#!/usr/bin/env python2
|
||||
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:fdm=marker:ai
|
||||
from __future__ import absolute_import, division, print_function, unicode_literals
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2012, Kovid Goyal <kovid at kovidgoyal.net>'
|
||||
__docformat__ = 'restructuredtext en'
|
||||
|
||||
|
||||
# cff_standard_strings {{{
|
||||
# The 391 Standard Strings as used in the CFF format.
|
||||
# from Adobe Technical None #5176, version 1.0, 18 March 1998
|
||||
|
||||
cff_standard_strings = [
|
||||
'.notdef', 'space', 'exclam', 'quotedbl', 'numbersign', 'dollar', 'percent',
|
||||
'ampersand', 'quoteright', 'parenleft', 'parenright', 'asterisk', 'plus',
|
||||
'comma', 'hyphen', 'period', 'slash', 'zero', 'one', 'two', 'three', 'four',
|
||||
'five', 'six', 'seven', 'eight', 'nine', 'colon', 'semicolon', 'less', 'equal',
|
||||
'greater', 'question', 'at', 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J',
|
||||
'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z',
|
||||
'bracketleft', 'backslash', 'bracketright', 'asciicircum', 'underscore',
|
||||
'quoteleft', 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm',
|
||||
'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', 'braceleft',
|
||||
'bar', 'braceright', 'asciitilde', 'exclamdown', 'cent', 'sterling',
|
||||
'fraction', 'yen', 'florin', 'section', 'currency', 'quotesingle',
|
||||
'quotedblleft', 'guillemotleft', 'guilsinglleft', 'guilsinglright', 'fi', 'fl',
|
||||
'endash', 'dagger', 'daggerdbl', 'periodcentered', 'paragraph', 'bullet',
|
||||
'quotesinglbase', 'quotedblbase', 'quotedblright', 'guillemotright',
|
||||
'ellipsis', 'perthousand', 'questiondown', 'grave', 'acute', 'circumflex',
|
||||
'tilde', 'macron', 'breve', 'dotaccent', 'dieresis', 'ring', 'cedilla',
|
||||
'hungarumlaut', 'ogonek', 'caron', 'emdash', 'AE', 'ordfeminine', 'Lslash',
|
||||
'Oslash', 'OE', 'ordmasculine', 'ae', 'dotlessi', 'lslash', 'oslash', 'oe',
|
||||
'germandbls', 'onesuperior', 'logicalnot', 'mu', 'trademark', 'Eth', 'onehalf',
|
||||
'plusminus', 'Thorn', 'onequarter', 'divide', 'brokenbar', 'degree', 'thorn',
|
||||
'threequarters', 'twosuperior', 'registered', 'minus', 'eth', 'multiply',
|
||||
'threesuperior', 'copyright', 'Aacute', 'Acircumflex', 'Adieresis', 'Agrave',
|
||||
'Aring', 'Atilde', 'Ccedilla', 'Eacute', 'Ecircumflex', 'Edieresis', 'Egrave',
|
||||
'Iacute', 'Icircumflex', 'Idieresis', 'Igrave', 'Ntilde', 'Oacute',
|
||||
'Ocircumflex', 'Odieresis', 'Ograve', 'Otilde', 'Scaron', 'Uacute',
|
||||
'Ucircumflex', 'Udieresis', 'Ugrave', 'Yacute', 'Ydieresis', 'Zcaron',
|
||||
'aacute', 'acircumflex', 'adieresis', 'agrave', 'aring', 'atilde', 'ccedilla',
|
||||
'eacute', 'ecircumflex', 'edieresis', 'egrave', 'iacute', 'icircumflex',
|
||||
'idieresis', 'igrave', 'ntilde', 'oacute', 'ocircumflex', 'odieresis',
|
||||
'ograve', 'otilde', 'scaron', 'uacute', 'ucircumflex', 'udieresis', 'ugrave',
|
||||
'yacute', 'ydieresis', 'zcaron', 'exclamsmall', 'Hungarumlautsmall',
|
||||
'dollaroldstyle', 'dollarsuperior', 'ampersandsmall', 'Acutesmall',
|
||||
'parenleftsuperior', 'parenrightsuperior', 'twodotenleader', 'onedotenleader',
|
||||
'zerooldstyle', 'oneoldstyle', 'twooldstyle', 'threeoldstyle', 'fouroldstyle',
|
||||
'fiveoldstyle', 'sixoldstyle', 'sevenoldstyle', 'eightoldstyle',
|
||||
'nineoldstyle', 'commasuperior', 'threequartersemdash', 'periodsuperior',
|
||||
'questionsmall', 'asuperior', 'bsuperior', 'centsuperior', 'dsuperior',
|
||||
'esuperior', 'isuperior', 'lsuperior', 'msuperior', 'nsuperior', 'osuperior',
|
||||
'rsuperior', 'ssuperior', 'tsuperior', 'ff', 'ffi', 'ffl', 'parenleftinferior',
|
||||
'parenrightinferior', 'Circumflexsmall', 'hyphensuperior', 'Gravesmall',
|
||||
'Asmall', 'Bsmall', 'Csmall', 'Dsmall', 'Esmall', 'Fsmall', 'Gsmall', 'Hsmall',
|
||||
'Ismall', 'Jsmall', 'Ksmall', 'Lsmall', 'Msmall', 'Nsmall', 'Osmall', 'Psmall',
|
||||
'Qsmall', 'Rsmall', 'Ssmall', 'Tsmall', 'Usmall', 'Vsmall', 'Wsmall', 'Xsmall',
|
||||
'Ysmall', 'Zsmall', 'colonmonetary', 'onefitted', 'rupiah', 'Tildesmall',
|
||||
'exclamdownsmall', 'centoldstyle', 'Lslashsmall', 'Scaronsmall', 'Zcaronsmall',
|
||||
'Dieresissmall', 'Brevesmall', 'Caronsmall', 'Dotaccentsmall', 'Macronsmall',
|
||||
'figuredash', 'hypheninferior', 'Ogoneksmall', 'Ringsmall', 'Cedillasmall',
|
||||
'questiondownsmall', 'oneeighth', 'threeeighths', 'fiveeighths',
|
||||
'seveneighths', 'onethird', 'twothirds', 'zerosuperior', 'foursuperior',
|
||||
'fivesuperior', 'sixsuperior', 'sevensuperior', 'eightsuperior',
|
||||
'ninesuperior', 'zeroinferior', 'oneinferior', 'twoinferior', 'threeinferior',
|
||||
'fourinferior', 'fiveinferior', 'sixinferior', 'seveninferior',
|
||||
'eightinferior', 'nineinferior', 'centinferior', 'dollarinferior',
|
||||
'periodinferior', 'commainferior', 'Agravesmall', 'Aacutesmall',
|
||||
'Acircumflexsmall', 'Atildesmall', 'Adieresissmall', 'Aringsmall', 'AEsmall',
|
||||
'Ccedillasmall', 'Egravesmall', 'Eacutesmall', 'Ecircumflexsmall',
|
||||
'Edieresissmall', 'Igravesmall', 'Iacutesmall', 'Icircumflexsmall',
|
||||
'Idieresissmall', 'Ethsmall', 'Ntildesmall', 'Ogravesmall', 'Oacutesmall',
|
||||
'Ocircumflexsmall', 'Otildesmall', 'Odieresissmall', 'OEsmall', 'Oslashsmall',
|
||||
'Ugravesmall', 'Uacutesmall', 'Ucircumflexsmall', 'Udieresissmall',
|
||||
'Yacutesmall', 'Thornsmall', 'Ydieresissmall', '001.000', '001.001', '001.002',
|
||||
'001.003', 'Black', 'Bold', 'Book', 'Light', 'Medium', 'Regular', 'Roman',
|
||||
'Semibold'
|
||||
]
|
||||
# }}}
|
||||
|
||||
|
||||
STANDARD_CHARSETS = [ # {{{
|
||||
# ISOAdobe
|
||||
(".notdef", "space", "exclam", "quotedbl", "numbersign", "dollar",
|
||||
"percent", "ampersand", "quoteright", "parenleft", "parenright",
|
||||
"asterisk", "plus", "comma", "hyphen", "period", "slash", "zero",
|
||||
"one", "two", "three", "four", "five", "six", "seven", "eight", "nine",
|
||||
"colon", "semicolon", "less", "equal", "greater", "question", "at",
|
||||
"A", "B", "C", "D", "E", "F", "G", "H", "I", "J", "K", "L", "M", "N",
|
||||
"O", "P", "Q", "R", "S", "T", "U", "V", "W", "X", "Y", "Z",
|
||||
"bracketleft", "backslash", "bracketright", "asciicircum",
|
||||
"underscore", "quoteleft", "a", "b", "c", "d", "e", "f", "g", "h", "i",
|
||||
"j", "k", "l", "m", "n", "o", "p", "q", "r", "s", "t", "u", "v", "w",
|
||||
"x", "y", "z", "braceleft", "bar", "braceright", "asciitilde",
|
||||
"exclamdown", "cent", "sterling", "fraction", "yen", "florin",
|
||||
"section", "currency", "quotesingle", "quotedblleft", "guillemotleft",
|
||||
"guilsinglleft", "guilsinglright", "fi", "fl", "endash", "dagger",
|
||||
"daggerdbl", "periodcentered", "paragraph", "bullet", "quotesinglbase",
|
||||
"quotedblbase", "quotedblright", "guillemotright", "ellipsis",
|
||||
"perthousand", "questiondown", "grave", "acute", "circumflex", "tilde",
|
||||
"macron", "breve", "dotaccent", "dieresis", "ring", "cedilla",
|
||||
"hungarumlaut", "ogonek", "caron", "emdash", "AE", "ordfeminine",
|
||||
"Lslash", "Oslash", "OE", "ordmasculine", "ae", "dotlessi", "lslash",
|
||||
"oslash", "oe", "germandbls", "onesuperior", "logicalnot", "mu",
|
||||
"trademark", "Eth", "onehalf", "plusminus", "Thorn", "onequarter",
|
||||
"divide", "brokenbar", "degree", "thorn", "threequarters",
|
||||
"twosuperior", "registered", "minus", "eth", "multiply",
|
||||
"threesuperior", "copyright", "Aacute", "Acircumflex", "Adieresis",
|
||||
"Agrave", "Aring", "Atilde", "Ccedilla", "Eacute", "Ecircumflex",
|
||||
"Edieresis", "Egrave", "Iacute", "Icircumflex", "Idieresis", "Igrave",
|
||||
"Ntilde", "Oacute", "Ocircumflex", "Odieresis", "Ograve", "Otilde",
|
||||
"Scaron", "Uacute", "Ucircumflex", "Udieresis", "Ugrave", "Yacute",
|
||||
"Ydieresis", "Zcaron", "aacute", "acircumflex", "adieresis", "agrave",
|
||||
"aring", "atilde", "ccedilla", "eacute", "ecircumflex", "edieresis",
|
||||
"egrave", "iacute", "icircumflex", "idieresis", "igrave", "ntilde",
|
||||
"oacute", "ocircumflex", "odieresis", "ograve", "otilde", "scaron",
|
||||
"uacute", "ucircumflex", "udieresis", "ugrave", "yacute", "ydieresis",
|
||||
"zcaron"),
|
||||
|
||||
# Expert
|
||||
("notdef", "space", "exclamsmall", "Hungarumlautsmall", "dollaroldstyle",
|
||||
"dollarsuperior", "ampersandsmall", "Acutesmall", "parenleftsuperior",
|
||||
"parenrightsuperior", "twodotenleader", "onedotenleader", "comma",
|
||||
"hyphen", "period", "fraction", "zerooldstyle", "oneoldstyle",
|
||||
"twooldstyle", "threeoldstyle", "fouroldstyle", "fiveoldstyle",
|
||||
"sixoldstyle", "sevenoldstyle", "eightoldstyle", "nineoldstyle",
|
||||
"colon", "semicolon", "commasuperior", "threequartersemdash",
|
||||
"periodsuperior", "questionsmall", "asuperior", "bsuperior",
|
||||
"centsuperior", "dsuperior", "esuperior", "isuperior", "lsuperior",
|
||||
"msuperior", "nsuperior", "osuperior", "rsuperior", "ssuperior",
|
||||
"tsuperior", "ff", "fi", "fl", "ffi", "ffl", "parenleftinferior",
|
||||
"parenrightinferior", "Circumflexsmall", "hyphensuperior",
|
||||
"Gravesmall", "Asmall", "Bsmall", "Csmall", "Dsmall", "Esmall",
|
||||
"Fsmall", "Gsmall", "Hsmall", "Ismall", "Jsmall", "Ksmall", "Lsmall",
|
||||
"Msmall", "Nsmall", "Osmall", "Psmall", "Qsmall", "Rsmall", "Ssmall",
|
||||
"Tsmall", "Usmall", "Vsmall", "Wsmall", "Xsmall", "Ysmall", "Zsmall",
|
||||
"colonmonetary", "onefitted", "rupiah", "Tildesmall",
|
||||
"exclamdownsmall", "centoldstyle", "Lslashsmall", "Scaronsmall",
|
||||
"Zcaronsmall", "Dieresissmall", "Brevesmall", "Caronsmall",
|
||||
"Dotaccentsmall", "Macronsmall", "figuredash", "hypheninferior",
|
||||
"Ogoneksmall", "Ringsmall", "Cedillasmall", "onequarter", "onehalf",
|
||||
"threequarters", "questiondownsmall", "oneeighth", "threeeighths",
|
||||
"fiveeighths", "seveneighths", "onethird", "twothirds", "zerosuperior",
|
||||
"onesuperior", "twosuperior", "threesuperior", "foursuperior",
|
||||
"fivesuperior", "sixsuperior", "sevensuperior", "eightsuperior",
|
||||
"ninesuperior", "zeroinferior", "oneinferior", "twoinferior",
|
||||
"threeinferior", "fourinferior", "fiveinferior", "sixinferior",
|
||||
"seveninferior", "eightinferior", "nineinferior", "centinferior",
|
||||
"dollarinferior", "periodinferior", "commainferior", "Agravesmall",
|
||||
"Aacutesmall", "Acircumflexsmall", "Atildesmall", "Adieresissmall",
|
||||
"Aringsmall", "AEsmall", "Ccedillasmall", "Egravesmall", "Eacutesmall",
|
||||
"Ecircumflexsmall", "Edieresissmall", "Igravesmall", "Iacutesmall",
|
||||
"Icircumflexsmall", "Idieresissmall", "Ethsmall", "Ntildesmall",
|
||||
"Ogravesmall", "Oacutesmall", "Ocircumflexsmall", "Otildesmall",
|
||||
"Odieresissmall", "OEsmall", "Oslashsmall", "Ugravesmall",
|
||||
"Uacutesmall", "Ucircumflexsmall", "Udieresissmall", "Yacutesmall",
|
||||
"Thornsmall", "Ydieresissmall"),
|
||||
|
||||
# Expert Subset
|
||||
(".notdef", "space", "dollaroldstyle", "dollarsuperior",
|
||||
"parenleftsuperior", "parenrightsuperior", "twodotenleader",
|
||||
"onedotenleader", "comma", "hyphen", "period", "fraction",
|
||||
"zerooldstyle", "oneoldstyle", "twooldstyle", "threeoldstyle",
|
||||
"fouroldstyle", "fiveoldstyle", "sixoldstyle", "sevenoldstyle",
|
||||
"eightoldstyle", "nineoldstyle", "colon", "semicolon",
|
||||
"commasuperior", "threequartersemdash", "periodsuperior",
|
||||
"asuperior", "bsuperior", "centsuperior", "dsuperior", "esuperior",
|
||||
"isuperior", "lsuperior", "msuperior", "nsuperior", "osuperior",
|
||||
"rsuperior", "ssuperior", "tsuperior", "ff", "fi", "fl", "ffi",
|
||||
"ffl", "parenleftinferior", "parenrightinferior", "hyphensuperior",
|
||||
"colonmonetary", "onefitted", "rupiah", "centoldstyle",
|
||||
"figuredash", "hypheninferior", "onequarter", "onehalf",
|
||||
"threequarters", "oneeighth", "threeeighths", "fiveeighths",
|
||||
"seveneighths", "onethird", "twothirds", "zerosuperior",
|
||||
"onesuperior", "twosuperior", "threesuperior", "foursuperior",
|
||||
"fivesuperior", "sixsuperior", "sevensuperior", "eightsuperior",
|
||||
"ninesuperior", "zeroinferior", "oneinferior", "twoinferior",
|
||||
"threeinferior", "fourinferior", "fiveinferior", "sixinferior",
|
||||
"seveninferior", "eightinferior", "nineinferior", "centinferior",
|
||||
"dollarinferior", "periodinferior", "commainferior"),
|
||||
] # }}}
|
||||
|
||||
311
ebook_converter/utils/fonts/sfnt/cff/dict_data.py
Normal file
311
ebook_converter/utils/fonts/sfnt/cff/dict_data.py
Normal file
@@ -0,0 +1,311 @@
|
||||
#!/usr/bin/env python2
|
||||
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:fdm=marker:ai
|
||||
from __future__ import absolute_import, division, print_function, unicode_literals
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2012, Kovid Goyal <kovid at kovidgoyal.net>'
|
||||
__docformat__ = 'restructuredtext en'
|
||||
|
||||
from struct import pack, unpack_from
|
||||
from polyglot.builtins import range, unicode_type
|
||||
|
||||
t1_operand_encoding = [None] * 256
|
||||
t1_operand_encoding[0:32] = (32) * ["do_operator"]
|
||||
t1_operand_encoding[32:247] = (247 - 32) * ["read_byte"]
|
||||
t1_operand_encoding[247:251] = (251 - 247) * ["read_small_int1"]
|
||||
t1_operand_encoding[251:255] = (255 - 251) * ["read_small_int2"]
|
||||
t1_operand_encoding[255] = "read_long_int"
|
||||
|
||||
t2_operand_encoding = t1_operand_encoding[:]
|
||||
t2_operand_encoding[28] = "read_short_int"
|
||||
t2_operand_encoding[255] = "read_fixed_1616"
|
||||
|
||||
cff_dict_operand_encoding = t2_operand_encoding[:]
|
||||
cff_dict_operand_encoding[29] = "read_long_int"
|
||||
cff_dict_operand_encoding[30] = "read_real_number"
|
||||
cff_dict_operand_encoding[255] = "reserved"
|
||||
|
||||
real_nibbles = ['0', '1', '2', '3', '4', '5', '6', '7', '8', '9',
|
||||
'.', 'E', 'E-', None, '-']
|
||||
real_nibbles_map = {x:i for i, x in enumerate(real_nibbles)}
|
||||
|
||||
|
||||
class ByteCode(dict):
|
||||
|
||||
def read_byte(self, b0, data, index):
|
||||
return b0 - 139, index
|
||||
|
||||
def read_small_int1(self, b0, data, index):
|
||||
b1 = ord(data[index:index+1])
|
||||
return (b0-247)*256 + b1 + 108, index+1
|
||||
|
||||
def read_small_int2(self, b0, data, index):
|
||||
b1 = ord(data[index:index+1])
|
||||
return -(b0-251)*256 - b1 - 108, index+1
|
||||
|
||||
def read_short_int(self, b0, data, index):
|
||||
value, = unpack_from(b">h", data, index)
|
||||
return value, index+2
|
||||
|
||||
def read_long_int(self, b0, data, index):
|
||||
value, = unpack_from(b">l", data, index)
|
||||
return value, index+4
|
||||
|
||||
def read_fixed_1616(self, b0, data, index):
|
||||
value, = unpack_from(b">l", data, index)
|
||||
return value / 65536.0, index+4
|
||||
|
||||
def read_real_number(self, b0, data, index):
|
||||
number = ''
|
||||
while True:
|
||||
b = ord(data[index:index+1])
|
||||
index = index + 1
|
||||
nibble0 = (b & 0xf0) >> 4
|
||||
nibble1 = b & 0x0f
|
||||
if nibble0 == 0xf:
|
||||
break
|
||||
number = number + real_nibbles[nibble0]
|
||||
if nibble1 == 0xf:
|
||||
break
|
||||
number = number + real_nibbles[nibble1]
|
||||
return float(number), index
|
||||
|
||||
def write_float(self, f, encoding='ignored'):
|
||||
s = unicode_type(f).upper()
|
||||
if s[:2] == "0.":
|
||||
s = s[1:]
|
||||
elif s[:3] == "-0.":
|
||||
s = "-" + s[2:]
|
||||
nibbles = []
|
||||
while s:
|
||||
c = s[0]
|
||||
s = s[1:]
|
||||
if c == "E" and s[:1] == "-":
|
||||
s = s[1:]
|
||||
c = "E-"
|
||||
nibbles.append(real_nibbles_map[c])
|
||||
nibbles.append(0xf)
|
||||
if len(nibbles) % 2:
|
||||
nibbles.append(0xf)
|
||||
d = bytearray([30])
|
||||
for i in range(0, len(nibbles), 2):
|
||||
d.append(nibbles[i] << 4 | nibbles[i+1])
|
||||
return bytes(d)
|
||||
|
||||
def write_int(self, value, encoding="cff"):
|
||||
four_byte_op = {'cff':29, 't1':255}.get(encoding, None)
|
||||
|
||||
if -107 <= value <= 107:
|
||||
code = bytes(bytearray([value + 139]))
|
||||
elif 108 <= value <= 1131:
|
||||
value = value - 108
|
||||
code = bytes(bytearray([(value >> 8) + 247, (value & 0xFF)]))
|
||||
elif -1131 <= value <= -108:
|
||||
value = -value - 108
|
||||
code = bytes(bytearray([(value >> 8) + 251, (value & 0xFF)]))
|
||||
elif four_byte_op is None:
|
||||
# T2 only supports 2 byte ints
|
||||
code = bytes(bytearray([28])) + pack(b">h", value)
|
||||
else:
|
||||
code = bytes(bytearray([four_byte_op])) + pack(b">l", value)
|
||||
return code
|
||||
|
||||
def write_offset(self, value):
|
||||
return bytes(bytearray([29])) + pack(b">l", value)
|
||||
|
||||
def write_number(self, value, encoding="cff"):
|
||||
f = self.write_float if isinstance(value, float) else self.write_int
|
||||
return f(value, encoding)
|
||||
|
||||
|
||||
class Dict(ByteCode):
|
||||
|
||||
operand_encoding = cff_dict_operand_encoding
|
||||
TABLE = ()
|
||||
FILTERED = frozenset()
|
||||
OFFSETS = frozenset()
|
||||
|
||||
def __init__(self):
|
||||
ByteCode.__init__(self)
|
||||
|
||||
self.operators = {op:(name, arg) for op, name, arg, default in
|
||||
self.TABLE}
|
||||
self.defaults = {name:default for op, name, arg, default in self.TABLE}
|
||||
|
||||
def safe_get(self, name):
|
||||
return self.get(name, self.defaults[name])
|
||||
|
||||
def decompile(self, strings, global_subrs, data):
|
||||
self.strings = strings
|
||||
self.global_subrs = global_subrs
|
||||
self.stack = []
|
||||
index = 0
|
||||
while index < len(data):
|
||||
b0 = ord(data[index:index+1])
|
||||
index += 1
|
||||
handler = getattr(self, self.operand_encoding[b0])
|
||||
value, index = handler(b0, data, index)
|
||||
if value is not None:
|
||||
self.stack.append(value)
|
||||
|
||||
def do_operator(self, b0, data, index):
|
||||
if b0 == 12:
|
||||
op = (b0, ord(data[index:index+1]))
|
||||
index += 1
|
||||
else:
|
||||
op = b0
|
||||
operator, arg_type = self.operators[op]
|
||||
self.handle_operator(operator, arg_type)
|
||||
return None, index
|
||||
|
||||
def handle_operator(self, operator, arg_type):
|
||||
if isinstance(arg_type, tuple):
|
||||
value = ()
|
||||
for i in range(len(arg_type)-1, -1, -1):
|
||||
arg = arg_type[i]
|
||||
arghandler = getattr(self, 'arg_' + arg)
|
||||
value = (arghandler(operator),) + value
|
||||
else:
|
||||
arghandler = getattr(self, 'arg_' + arg_type)
|
||||
value = arghandler(operator)
|
||||
self[operator] = value
|
||||
|
||||
def arg_number(self, name):
|
||||
return self.stack.pop()
|
||||
|
||||
def arg_SID(self, name):
|
||||
return self.strings[self.stack.pop()]
|
||||
|
||||
def arg_array(self, name):
|
||||
ans = self.stack[:]
|
||||
del self.stack[:]
|
||||
return ans
|
||||
|
||||
def arg_delta(self, name):
|
||||
out = []
|
||||
current = 0
|
||||
for v in self.stack:
|
||||
current = current + v
|
||||
out.append(current)
|
||||
del self.stack[:]
|
||||
return out
|
||||
|
||||
def compile(self, strings):
|
||||
data = []
|
||||
for op, name, arg, default in self.TABLE:
|
||||
if name in self.FILTERED:
|
||||
continue
|
||||
val = self.safe_get(name)
|
||||
opcode = bytes(bytearray(op if isinstance(op, tuple) else [op]))
|
||||
if val != self.defaults[name]:
|
||||
self.encoding_offset = name in self.OFFSETS
|
||||
if isinstance(arg, tuple):
|
||||
if len(val) != len(arg):
|
||||
raise ValueError('Invalid argument %s for operator: %s'
|
||||
%(val, op))
|
||||
for typ, v in zip(arg, val):
|
||||
if typ == 'SID':
|
||||
val = strings(val)
|
||||
data.append(getattr(self, 'encode_'+typ)(v))
|
||||
else:
|
||||
if arg == 'SID':
|
||||
val = strings(val)
|
||||
data.append(getattr(self, 'encode_'+arg)(val))
|
||||
data.append(opcode)
|
||||
self.raw = b''.join(data)
|
||||
return self.raw
|
||||
|
||||
def encode_number(self, val):
|
||||
if self.encoding_offset:
|
||||
return self.write_offset(val)
|
||||
return self.write_number(val)
|
||||
|
||||
def encode_SID(self, val):
|
||||
return self.write_int(val)
|
||||
|
||||
def encode_array(self, val):
|
||||
return b''.join(map(self.encode_number, val))
|
||||
|
||||
def encode_delta(self, value):
|
||||
out = []
|
||||
last = 0
|
||||
for v in value:
|
||||
out.append(v - last)
|
||||
last = v
|
||||
return self.encode_array(out)
|
||||
|
||||
|
||||
class TopDict(Dict):
|
||||
|
||||
TABLE = (
|
||||
# opcode name argument type default
|
||||
((12, 30), 'ROS', ('SID','SID','number'), None,),
|
||||
((12, 20), 'SyntheticBase', 'number', None,),
|
||||
(0, 'version', 'SID', None,),
|
||||
(1, 'Notice', 'SID', None,),
|
||||
((12, 0), 'Copyright', 'SID', None,),
|
||||
(2, 'FullName', 'SID', None,),
|
||||
((12, 38), 'FontName', 'SID', None,),
|
||||
(3, 'FamilyName', 'SID', None,),
|
||||
(4, 'Weight', 'SID', None,),
|
||||
((12, 1), 'isFixedPitch', 'number', 0,),
|
||||
((12, 2), 'ItalicAngle', 'number', 0,),
|
||||
((12, 3), 'UnderlinePosition', 'number', None,),
|
||||
((12, 4), 'UnderlineThickness', 'number', 50,),
|
||||
((12, 5), 'PaintType', 'number', 0,),
|
||||
((12, 6), 'CharstringType', 'number', 2,),
|
||||
((12, 7), 'FontMatrix', 'array', [0.001,0,0,0.001,0,0],),
|
||||
(13, 'UniqueID', 'number', None,),
|
||||
(5, 'FontBBox', 'array', [0,0,0,0],),
|
||||
((12, 8), 'StrokeWidth', 'number', 0,),
|
||||
(14, 'XUID', 'array', None,),
|
||||
((12, 21), 'PostScript', 'SID', None,),
|
||||
((12, 22), 'BaseFontName', 'SID', None,),
|
||||
((12, 23), 'BaseFontBlend', 'delta', None,),
|
||||
((12, 31), 'CIDFontVersion', 'number', 0,),
|
||||
((12, 32), 'CIDFontRevision', 'number', 0,),
|
||||
((12, 33), 'CIDFontType', 'number', 0,),
|
||||
((12, 34), 'CIDCount', 'number', 8720,),
|
||||
(15, 'charset', 'number', 0,),
|
||||
((12, 35), 'UIDBase', 'number', None,),
|
||||
(16, 'Encoding', 'number', 0,),
|
||||
(18, 'Private', ('number','number'), None,),
|
||||
((12, 37), 'FDSelect', 'number', None,),
|
||||
((12, 36), 'FDArray', 'number', None,),
|
||||
(17, 'CharStrings', 'number', None,),
|
||||
)
|
||||
|
||||
# We will not write these operators out
|
||||
FILTERED = {'ROS', 'SyntheticBase', 'UniqueID', 'XUID',
|
||||
'CIDFontVersion', 'CIDFontRevision', 'CIDFontType', 'CIDCount',
|
||||
'UIDBase', 'Encoding', 'FDSelect', 'FDArray'}
|
||||
OFFSETS = {'charset', 'Encoding', 'CharStrings', 'Private'}
|
||||
|
||||
|
||||
class PrivateDict(Dict):
|
||||
|
||||
TABLE = (
|
||||
# opcode name argument type default
|
||||
(6, 'BlueValues', 'delta', None,),
|
||||
(7, 'OtherBlues', 'delta', None,),
|
||||
(8, 'FamilyBlues', 'delta', None,),
|
||||
(9, 'FamilyOtherBlues', 'delta', None,),
|
||||
((12, 9), 'BlueScale', 'number', 0.039625,),
|
||||
((12, 10), 'BlueShift', 'number', 7,),
|
||||
((12, 11), 'BlueFuzz', 'number', 1,),
|
||||
(10, 'StdHW', 'number', None,),
|
||||
(11, 'StdVW', 'number', None,),
|
||||
((12, 12), 'StemSnapH', 'delta', None,),
|
||||
((12, 13), 'StemSnapV', 'delta', None,),
|
||||
((12, 14), 'ForceBold', 'number', 0,),
|
||||
((12, 15), 'ForceBoldThreshold', 'number', None,), # deprecated
|
||||
((12, 16), 'lenIV', 'number', None,), # deprecated
|
||||
((12, 17), 'LanguageGroup', 'number', 0,),
|
||||
((12, 18), 'ExpansionFactor', 'number', 0.06,),
|
||||
((12, 19), 'initialRandomSeed', 'number', 0,),
|
||||
(20, 'defaultWidthX', 'number', 0,),
|
||||
(21, 'nominalWidthX', 'number', 0,),
|
||||
(19, 'Subrs', 'number', None,),
|
||||
)
|
||||
|
||||
OFFSETS = {'Subrs'}
|
||||
221
ebook_converter/utils/fonts/sfnt/cff/table.py
Normal file
221
ebook_converter/utils/fonts/sfnt/cff/table.py
Normal file
@@ -0,0 +1,221 @@
|
||||
#!/usr/bin/env python2
|
||||
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:fdm=marker:ai
|
||||
from __future__ import absolute_import, division, print_function, unicode_literals
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2012, Kovid Goyal <kovid at kovidgoyal.net>'
|
||||
__docformat__ = 'restructuredtext en'
|
||||
|
||||
from struct import unpack_from, unpack, calcsize
|
||||
from functools import partial
|
||||
|
||||
from calibre.utils.fonts.sfnt import UnknownTable
|
||||
from calibre.utils.fonts.sfnt.errors import UnsupportedFont, NoGlyphs
|
||||
from calibre.utils.fonts.sfnt.cff.dict_data import TopDict, PrivateDict
|
||||
from calibre.utils.fonts.sfnt.cff.constants import (cff_standard_strings,
|
||||
STANDARD_CHARSETS)
|
||||
from polyglot.builtins import iteritems, itervalues, range
|
||||
|
||||
# Useful links
|
||||
# http://www.adobe.com/content/dam/Adobe/en/devnet/font/pdfs/5176.CFF.pdf
|
||||
# http://www.adobe.com/content/dam/Adobe/en/devnet/font/pdfs/5177.Type2.pdf
|
||||
|
||||
|
||||
class CFF(object):
|
||||
|
||||
def __init__(self, raw):
|
||||
(self.major_version, self.minor_version, self.header_size,
|
||||
self.offset_size) = unpack_from(b'>4B', raw)
|
||||
if (self.major_version, self.minor_version) != (1, 0):
|
||||
raise UnsupportedFont('The CFF table has unknown version: '
|
||||
'(%d, %d)'%(self.major_version, self.minor_version))
|
||||
offset = self.header_size
|
||||
|
||||
# Read Names Index
|
||||
self.font_names = Index(raw, offset)
|
||||
offset = self.font_names.pos
|
||||
if len(self.font_names) > 1:
|
||||
raise UnsupportedFont('CFF table has more than one font.')
|
||||
|
||||
# Read Top Dict
|
||||
self.top_index = Index(raw, offset)
|
||||
self.top_dict = TopDict()
|
||||
offset = self.top_index.pos
|
||||
|
||||
# Read strings
|
||||
self.strings = Strings(raw, offset)
|
||||
offset = self.strings.pos
|
||||
|
||||
# Read global subroutines
|
||||
self.global_subrs = Subrs(raw, offset)
|
||||
offset = self.global_subrs.pos
|
||||
|
||||
# Decompile Top Dict
|
||||
self.top_dict.decompile(self.strings, self.global_subrs, self.top_index[0])
|
||||
self.is_CID = 'ROS' in self.top_dict
|
||||
if self.is_CID:
|
||||
raise UnsupportedFont('Subsetting of CID keyed fonts is not supported')
|
||||
|
||||
# Read CharStrings (Glyph definitions)
|
||||
try:
|
||||
offset = self.top_dict['CharStrings']
|
||||
except KeyError:
|
||||
raise ValueError('This font has no CharStrings')
|
||||
cs_type = self.top_dict.safe_get('CharstringType')
|
||||
if cs_type != 2:
|
||||
raise UnsupportedFont('This font has unsupported CharstringType: '
|
||||
'%s'%cs_type)
|
||||
self.char_strings = CharStringsIndex(raw, offset)
|
||||
self.num_glyphs = len(self.char_strings)
|
||||
|
||||
# Read Private Dict
|
||||
self.private_dict = self.private_subrs = None
|
||||
pd = self.top_dict.safe_get('Private')
|
||||
if pd:
|
||||
size, offset = pd
|
||||
self.private_dict = PrivateDict()
|
||||
self.private_dict.decompile(self.strings, self.global_subrs,
|
||||
raw[offset:offset+size])
|
||||
if 'Subrs' in self.private_dict:
|
||||
self.private_subrs = Subrs(raw, offset +
|
||||
self.private_dict['Subrs'])
|
||||
|
||||
# Read charset (Glyph names)
|
||||
self.charset = Charset(raw, self.top_dict.safe_get('charset'),
|
||||
self.strings, self.num_glyphs, self.is_CID)
|
||||
|
||||
# import pprint
|
||||
# pprint.pprint(self.top_dict)
|
||||
# pprint.pprint(self.private_dict)
|
||||
|
||||
|
||||
class Index(list):
|
||||
|
||||
def __init__(self, raw, offset, prepend=()):
|
||||
list.__init__(self)
|
||||
self.extend(prepend)
|
||||
|
||||
count = unpack_from(b'>H', raw, offset)[0]
|
||||
offset += 2
|
||||
self.pos = offset
|
||||
|
||||
if count > 0:
|
||||
self.offset_size = unpack_from(b'>B', raw, offset)[0]
|
||||
offset += 1
|
||||
if self.offset_size == 3:
|
||||
offsets = [unpack(b'>L', b'\0' + raw[i:i+3])[0]
|
||||
for i in range(offset, offset+3*(count+1), 3)]
|
||||
else:
|
||||
fmt = {1:'B', 2:'H', 4:'L'}[self.offset_size]
|
||||
fmt = ('>%d%s'%(count+1, fmt)).encode('ascii')
|
||||
offsets = unpack_from(fmt, raw, offset)
|
||||
offset += self.offset_size * (count+1) - 1
|
||||
|
||||
for i in range(len(offsets)-1):
|
||||
off, noff = offsets[i:i+2]
|
||||
obj = raw[offset+off:offset+noff]
|
||||
self.append(obj)
|
||||
|
||||
try:
|
||||
self.pos = offset + offsets[-1]
|
||||
except IndexError:
|
||||
self.pos = offset
|
||||
|
||||
|
||||
class Strings(Index):
|
||||
|
||||
def __init__(self, raw, offset):
|
||||
super(Strings, self).__init__(raw, offset, prepend=[x.encode('ascii')
|
||||
for x in cff_standard_strings])
|
||||
|
||||
|
||||
class Charset(list):
|
||||
|
||||
def __init__(self, raw, offset, strings, num_glyphs, is_CID):
|
||||
super(Charset, self).__init__()
|
||||
self.standard_charset = offset if offset in {0, 1, 2} else None
|
||||
if is_CID and self.standard_charset is not None:
|
||||
raise ValueError("CID font must not use a standard charset")
|
||||
if self.standard_charset is None:
|
||||
self.append(b'.notdef')
|
||||
fmt = unpack_from(b'>B', raw, offset)[0]
|
||||
offset += 1
|
||||
f = {0:self.parse_fmt0, 1:self.parse_fmt1,
|
||||
2:partial(self.parse_fmt1, is_two_byte=True)}.get(fmt, None)
|
||||
if f is None:
|
||||
raise UnsupportedFont('This font uses unsupported charset '
|
||||
'table format: %d'%fmt)
|
||||
f(raw, offset, strings, num_glyphs, is_CID)
|
||||
|
||||
def parse_fmt0(self, raw, offset, strings, num_glyphs, is_CID):
|
||||
fmt = ('>%dH'%(num_glyphs-1)).encode('ascii')
|
||||
ids = unpack_from(fmt, raw, offset)
|
||||
if is_CID:
|
||||
ids = ('cid%05d'%x for x in ids)
|
||||
else:
|
||||
ids = (strings[x] for x in ids)
|
||||
self.extend(ids)
|
||||
|
||||
def parse_fmt1(self, raw, offset, strings, num_glyphs, is_CID,
|
||||
is_two_byte=False):
|
||||
fmt = b'>2H' if is_two_byte else b'>HB'
|
||||
sz = calcsize(fmt)
|
||||
count = 1
|
||||
while count < num_glyphs:
|
||||
first, nleft = unpack_from(fmt, raw, offset)
|
||||
offset += sz
|
||||
count += nleft + 1
|
||||
self.extend('cid%05d'%x if is_CID else strings[x] for x in
|
||||
range(first, first + nleft+1))
|
||||
|
||||
def lookup(self, glyph_id):
|
||||
if self.standard_charset is None:
|
||||
return self[glyph_id]
|
||||
return STANDARD_CHARSETS[self.standard_charset][glyph_id].encode('ascii')
|
||||
|
||||
def safe_lookup(self, glyph_id):
|
||||
try:
|
||||
return self.lookup(glyph_id)
|
||||
except (KeyError, IndexError, ValueError):
|
||||
return None
|
||||
|
||||
|
||||
class Subrs(Index):
|
||||
pass
|
||||
|
||||
|
||||
class CharStringsIndex(Index):
|
||||
pass
|
||||
|
||||
|
||||
class CFFTable(UnknownTable):
|
||||
|
||||
def decompile(self):
|
||||
self.cff = CFF(self.raw)
|
||||
|
||||
def subset(self, character_map, extra_glyphs):
|
||||
from calibre.utils.fonts.sfnt.cff.writer import Subset
|
||||
# Map codes from the cmap table to glyph names, this will be used to
|
||||
# reconstruct character_map for the subset font
|
||||
charset_map = {code:self.cff.charset.safe_lookup(glyph_id) for code,
|
||||
glyph_id in iteritems(character_map)}
|
||||
charset = set(itervalues(charset_map))
|
||||
charset.discard(None)
|
||||
if not charset and character_map:
|
||||
raise NoGlyphs('This font has no glyphs for the specified characters')
|
||||
charset |= {
|
||||
self.cff.charset.safe_lookup(glyph_id) for glyph_id in extra_glyphs}
|
||||
charset.discard(None)
|
||||
s = Subset(self.cff, charset)
|
||||
|
||||
# Rebuild character_map with the glyph ids from the subset font
|
||||
character_map.clear()
|
||||
for code, charname in iteritems(charset_map):
|
||||
glyph_id = s.charname_map.get(charname, None)
|
||||
if glyph_id:
|
||||
character_map[code] = glyph_id
|
||||
|
||||
# Check that raw is parseable
|
||||
CFF(s.raw)
|
||||
|
||||
self.raw = s.raw
|
||||
290
ebook_converter/utils/fonts/sfnt/cmap.py
Normal file
290
ebook_converter/utils/fonts/sfnt/cmap.py
Normal file
@@ -0,0 +1,290 @@
|
||||
#!/usr/bin/env python2
|
||||
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:fdm=marker:ai
|
||||
from __future__ import absolute_import, division, print_function, unicode_literals
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2012, Kovid Goyal <kovid at kovidgoyal.net>'
|
||||
__docformat__ = 'restructuredtext en'
|
||||
|
||||
# Note that the code for creating a BMP table (cmap format 4) is taken with
|
||||
# thanks from the fonttools project (BSD licensed).
|
||||
|
||||
from struct import unpack_from, calcsize, pack
|
||||
from collections import OrderedDict
|
||||
|
||||
from calibre.utils.fonts.utils import read_bmp_prefix
|
||||
from calibre.utils.fonts.sfnt import UnknownTable, max_power_of_two
|
||||
from calibre.utils.fonts.sfnt.errors import UnsupportedFont
|
||||
from polyglot.builtins import range
|
||||
|
||||
|
||||
def split_range(start_code, end_code, cmap): # {{{
|
||||
# Try to split a range of character codes into subranges with consecutive
|
||||
# glyph IDs in such a way that the cmap4 subtable can be stored "most"
|
||||
# efficiently.
|
||||
if start_code == end_code:
|
||||
return [], [end_code]
|
||||
|
||||
last_id = cmap[start_code]
|
||||
last_code = start_code
|
||||
in_order = None
|
||||
ordered_begin = None
|
||||
sub_ranges = []
|
||||
|
||||
# Gather subranges in which the glyph IDs are consecutive.
|
||||
for code in range(start_code + 1, end_code + 1):
|
||||
glyph_id = cmap[code]
|
||||
|
||||
if glyph_id - 1 == last_id:
|
||||
if in_order is None or not in_order:
|
||||
in_order = 1
|
||||
ordered_begin = last_code
|
||||
else:
|
||||
if in_order:
|
||||
in_order = 0
|
||||
sub_ranges.append((ordered_begin, last_code))
|
||||
ordered_begin = None
|
||||
|
||||
last_id = glyph_id
|
||||
last_code = code
|
||||
|
||||
if in_order:
|
||||
sub_ranges.append((ordered_begin, last_code))
|
||||
assert last_code == end_code
|
||||
|
||||
# Now filter out those new subranges that would only make the data bigger.
|
||||
# A new segment cost 8 bytes, not using a new segment costs 2 bytes per
|
||||
# character.
|
||||
new_ranges = []
|
||||
for b, e in sub_ranges:
|
||||
if b == start_code and e == end_code:
|
||||
break # the whole range, we're fine
|
||||
if b == start_code or e == end_code:
|
||||
threshold = 4 # split costs one more segment
|
||||
else:
|
||||
threshold = 8 # split costs two more segments
|
||||
if (e - b + 1) > threshold:
|
||||
new_ranges.append((b, e))
|
||||
sub_ranges = new_ranges
|
||||
|
||||
if not sub_ranges:
|
||||
return [], [end_code]
|
||||
|
||||
if sub_ranges[0][0] != start_code:
|
||||
sub_ranges.insert(0, (start_code, sub_ranges[0][0] - 1))
|
||||
if sub_ranges[-1][1] != end_code:
|
||||
sub_ranges.append((sub_ranges[-1][1] + 1, end_code))
|
||||
|
||||
# Fill the "holes" in the segments list -- those are the segments in which
|
||||
# the glyph IDs are _not_ consecutive.
|
||||
i = 1
|
||||
while i < len(sub_ranges):
|
||||
if sub_ranges[i-1][1] + 1 != sub_ranges[i][0]:
|
||||
sub_ranges.insert(i, (sub_ranges[i-1][1] + 1, sub_ranges[i][0] - 1))
|
||||
i = i + 1
|
||||
i = i + 1
|
||||
|
||||
# Transform the ranges into start_code/end_code lists.
|
||||
start = []
|
||||
end = []
|
||||
for b, e in sub_ranges:
|
||||
start.append(b)
|
||||
end.append(e)
|
||||
start.pop(0)
|
||||
|
||||
assert len(start) + 1 == len(end)
|
||||
return start, end
|
||||
# }}}
|
||||
|
||||
|
||||
def set_id_delta(id_delta): # {{{
|
||||
# The lowest gid in glyphIndexArray, after subtracting id_delta, must be 1.
|
||||
# id_delta is a short, and must be between -32K and 32K
|
||||
# startCode can be between 0 and 64K-1, and the first glyph index can be between 1 and 64K-1
|
||||
# This means that we have a problem because we can need to assign to
|
||||
# id_delta values
|
||||
# between -(64K-2) and 64K -1.
|
||||
# Since the final gi is reconstructed from the glyphArray GID by:
|
||||
# (short)finalGID = (gid + id_delta) % 0x10000),
|
||||
# we can get from a startCode of 0 to a final GID of 64 -1K by subtracting 1, and casting the
|
||||
# negative number to an unsigned short.
|
||||
# Similarly , we can get from a startCode of 64K-1 to a final GID of 1 by adding 2, because of
|
||||
# the modulo arithmetic.
|
||||
|
||||
if id_delta > 0x7FFF:
|
||||
id_delta = id_delta - 0x10000
|
||||
elif id_delta < -0x7FFF:
|
||||
id_delta = id_delta + 0x10000
|
||||
|
||||
return id_delta
|
||||
# }}}
|
||||
|
||||
|
||||
class BMPTable(object):
|
||||
|
||||
def __init__(self, raw):
|
||||
self.raw = raw
|
||||
(self.start_count, self.end_count, self.range_offset, self.id_delta,
|
||||
self.glyph_id_len, self.glyph_id_map, self.array_len) = \
|
||||
read_bmp_prefix(raw, 0)
|
||||
|
||||
def get_glyph_ids(self, codes):
|
||||
for code in codes:
|
||||
found = False
|
||||
for i, ec in enumerate(self.end_count):
|
||||
if ec >= code:
|
||||
sc = self.start_count[i]
|
||||
if sc <= code:
|
||||
found = True
|
||||
ro = self.range_offset[i]
|
||||
if ro == 0:
|
||||
glyph_id = self.id_delta[i] + code
|
||||
else:
|
||||
idx = ro//2 + (code - sc) + i - self.array_len
|
||||
glyph_id = self.glyph_id_map[idx]
|
||||
if glyph_id != 0:
|
||||
glyph_id += self.id_delta[i]
|
||||
yield glyph_id % 0x10000
|
||||
break
|
||||
if not found:
|
||||
yield 0
|
||||
|
||||
def get_glyph_map(self, glyph_ids):
|
||||
ans = {}
|
||||
for i, ec in enumerate(self.end_count):
|
||||
sc = self.start_count[i]
|
||||
for code in range(sc, ec+1):
|
||||
ro = self.range_offset[i]
|
||||
if ro == 0:
|
||||
glyph_id = self.id_delta[i] + code
|
||||
else:
|
||||
idx = ro//2 + (code - sc) + i - self.array_len
|
||||
glyph_id = self.glyph_id_map[idx]
|
||||
if glyph_id != 0:
|
||||
glyph_id += self.id_delta[i]
|
||||
glyph_id %= 0x10000
|
||||
if glyph_id in glyph_ids and code not in ans:
|
||||
ans[code] = glyph_id
|
||||
return ans
|
||||
|
||||
|
||||
class CmapTable(UnknownTable):
|
||||
|
||||
def __init__(self, *args, **kwargs):
|
||||
super(CmapTable, self).__init__(*args, **kwargs)
|
||||
|
||||
self.version, self.num_tables = unpack_from(b'>HH', self.raw)
|
||||
|
||||
self.tables = {}
|
||||
|
||||
offset = 4
|
||||
sz = calcsize(b'>HHL')
|
||||
recs = []
|
||||
for i in range(self.num_tables):
|
||||
platform, encoding, table_offset = unpack_from(b'>HHL', self.raw,
|
||||
offset)
|
||||
offset += sz
|
||||
recs.append((platform, encoding, table_offset))
|
||||
|
||||
self.bmp_table = None
|
||||
|
||||
for i in range(len(recs)):
|
||||
platform, encoding, offset = recs[i]
|
||||
try:
|
||||
next_offset = recs[i+1][-1]
|
||||
except IndexError:
|
||||
next_offset = len(self.raw)
|
||||
table = self.raw[offset:next_offset]
|
||||
if table:
|
||||
fmt = unpack_from(b'>H', table)[0]
|
||||
if platform == 3 and encoding == 1 and fmt == 4:
|
||||
self.bmp_table = BMPTable(table)
|
||||
|
||||
def get_character_map(self, chars):
|
||||
'''
|
||||
Get a mapping of character codes to glyph ids in the font.
|
||||
'''
|
||||
if self.bmp_table is None:
|
||||
raise UnsupportedFont('This font has no Windows BMP cmap subtable.'
|
||||
' Most likely a special purpose font.')
|
||||
chars = sorted(set(chars))
|
||||
ans = OrderedDict()
|
||||
for i, glyph_id in enumerate(self.bmp_table.get_glyph_ids(chars)):
|
||||
if glyph_id > 0:
|
||||
ans[chars[i]] = glyph_id
|
||||
return ans
|
||||
|
||||
def get_glyph_map(self, glyph_ids):
|
||||
'''
|
||||
Get a mapping of character codes to glyph ids for the specified glyph
|
||||
ids.
|
||||
'''
|
||||
if self.bmp_table is None:
|
||||
raise UnsupportedFont('This font has no Windows BMP cmap subtable.'
|
||||
' Most likely a special purpose font.')
|
||||
glyph_ids = frozenset(glyph_ids)
|
||||
return self.bmp_table.get_glyph_map(glyph_ids)
|
||||
|
||||
def set_character_map(self, cmap):
|
||||
self.version, self.num_tables = 0, 1
|
||||
fmt = b'>7H'
|
||||
codes = sorted(cmap)
|
||||
|
||||
if not codes:
|
||||
start_code = [0xffff]
|
||||
end_code = [0xffff]
|
||||
else:
|
||||
last_code = codes[0]
|
||||
end_code = []
|
||||
start_code = [last_code]
|
||||
|
||||
for code in codes[1:]:
|
||||
if code == last_code + 1:
|
||||
last_code = code
|
||||
continue
|
||||
start, end = split_range(start_code[-1], last_code, cmap)
|
||||
start_code.extend(start)
|
||||
end_code.extend(end)
|
||||
start_code.append(code)
|
||||
last_code = code
|
||||
end_code.append(last_code)
|
||||
start_code.append(0xffff)
|
||||
end_code.append(0xffff)
|
||||
|
||||
id_delta = []
|
||||
id_range_offset = []
|
||||
glyph_index_array = []
|
||||
for i in range(len(end_code)-1): # skip the closing codes (0xffff)
|
||||
indices = list(cmap[char_code] for char_code in range(start_code[i], end_code[i] + 1))
|
||||
if indices == list(range(indices[0], indices[0] + len(indices))):
|
||||
# indices is a contiguous list
|
||||
id_delta_temp = set_id_delta(indices[0] - start_code[i])
|
||||
id_delta.append(id_delta_temp)
|
||||
id_range_offset.append(0)
|
||||
else:
|
||||
id_delta.append(0)
|
||||
id_range_offset.append(2 * (len(end_code) + len(glyph_index_array) - i))
|
||||
glyph_index_array.extend(indices)
|
||||
id_delta.append(1) # 0xffff + 1 == 0. So this end code maps to .notdef
|
||||
id_range_offset.append(0)
|
||||
|
||||
seg_count = len(end_code)
|
||||
max_exponent = max_power_of_two(seg_count)
|
||||
search_range = 2 * (2 ** max_exponent)
|
||||
entry_selector = max_exponent
|
||||
range_shift = 2 * seg_count - search_range
|
||||
|
||||
char_code_array = end_code + [0] + start_code
|
||||
char_code_array = pack(b'>%dH'%len(char_code_array), *char_code_array)
|
||||
id_delta_array = pack(b'>%dh'%len(id_delta), *id_delta)
|
||||
rest_array = id_range_offset + glyph_index_array
|
||||
rest_array = pack(b'>%dH'%len(rest_array), *rest_array)
|
||||
data = char_code_array + id_delta_array + rest_array
|
||||
|
||||
length = calcsize(fmt) + len(data)
|
||||
header = pack(fmt, 4, length, 0, 2*seg_count, search_range, entry_selector, range_shift)
|
||||
self.bmp_table = header + data
|
||||
|
||||
fmt = b'>4HL'
|
||||
offset = calcsize(fmt)
|
||||
self.raw = pack(fmt, self.version, self.num_tables, 3, 1, offset) + self.bmp_table
|
||||
252
ebook_converter/utils/fonts/sfnt/common.py
Normal file
252
ebook_converter/utils/fonts/sfnt/common.py
Normal file
@@ -0,0 +1,252 @@
|
||||
#!/usr/bin/env python2
|
||||
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:fdm=marker:ai
|
||||
from __future__ import absolute_import, division, print_function, unicode_literals
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2012, Kovid Goyal <kovid at kovidgoyal.net>'
|
||||
__docformat__ = 'restructuredtext en'
|
||||
|
||||
from struct import unpack_from, calcsize
|
||||
from collections import OrderedDict, namedtuple
|
||||
|
||||
from calibre.utils.fonts.sfnt.errors import UnsupportedFont
|
||||
from polyglot.builtins import range, iteritems
|
||||
|
||||
|
||||
class Unpackable(object):
|
||||
|
||||
def __init__(self, raw, offset):
|
||||
self.raw, self.offset = raw, offset
|
||||
self.start_pos = offset
|
||||
|
||||
def unpack(self, fmt, single_special=True):
|
||||
fmt = fmt.encode('ascii') if not isinstance(fmt, bytes) else fmt
|
||||
ans = unpack_from(b'>'+fmt, self.raw, self.offset)
|
||||
if single_special and len(ans) == 1:
|
||||
ans = ans[0]
|
||||
self.offset += calcsize(fmt)
|
||||
return ans
|
||||
|
||||
|
||||
class SimpleListTable(list):
|
||||
|
||||
'A table that contains a list of subtables'
|
||||
|
||||
child_class = None
|
||||
|
||||
def __init__(self, raw, offset):
|
||||
list.__init__(self)
|
||||
|
||||
data = Unpackable(raw, offset)
|
||||
self.read_extra_header(data)
|
||||
|
||||
count = data.unpack('H')
|
||||
for i in range(count):
|
||||
offset = data.unpack('H')
|
||||
self.append(self.child_class(raw, data.start_pos + offset))
|
||||
self.read_extra_footer(data)
|
||||
|
||||
def read_extra_header(self, data):
|
||||
pass
|
||||
|
||||
def read_extra_footer(self, data):
|
||||
pass
|
||||
|
||||
|
||||
class ListTable(OrderedDict):
|
||||
|
||||
'A table that contains an ordered mapping of table tag to subtable'
|
||||
|
||||
child_class = None
|
||||
|
||||
def __init__(self, raw, offset):
|
||||
OrderedDict.__init__(self)
|
||||
|
||||
data = Unpackable(raw, offset)
|
||||
self.read_extra_header(data)
|
||||
|
||||
count = data.unpack('H')
|
||||
for i in range(count):
|
||||
tag, coffset = data.unpack('4sH')
|
||||
self[tag] = self.child_class(raw, data.start_pos + coffset)
|
||||
|
||||
self.read_extra_footer(data)
|
||||
|
||||
def read_extra_header(self, data):
|
||||
pass
|
||||
|
||||
def read_extra_footer(self, data):
|
||||
pass
|
||||
|
||||
def dump(self, prefix=''):
|
||||
print(prefix, self.__class__.__name__, sep='')
|
||||
prefix += ' '
|
||||
for tag, child in iteritems(self):
|
||||
print(prefix, tag, sep='')
|
||||
child.dump(prefix=prefix+' ')
|
||||
|
||||
|
||||
class IndexTable(list):
|
||||
|
||||
def __init__(self, raw, offset):
|
||||
data = Unpackable(raw, offset)
|
||||
self.read_extra_header(data)
|
||||
|
||||
count = data.unpack('H')
|
||||
for i in range(count):
|
||||
self.append(data.unpack('H'))
|
||||
|
||||
def read_extra_header(self, data):
|
||||
pass
|
||||
|
||||
def dump(self, prefix=''):
|
||||
print(prefix, self.__class__.__name__, sep='')
|
||||
|
||||
|
||||
class LanguageSystemTable(IndexTable):
|
||||
|
||||
def read_extra_header(self, data):
|
||||
self.lookup_order, self.required_feature_index = data.unpack('2H')
|
||||
if self.lookup_order != 0:
|
||||
raise UnsupportedFont('This LanguageSystemTable has an unknown'
|
||||
' lookup order: 0x%x'%self.lookup_order)
|
||||
|
||||
|
||||
class ScriptTable(ListTable):
|
||||
|
||||
child_class = LanguageSystemTable
|
||||
|
||||
def __init__(self, raw, offset):
|
||||
ListTable.__init__(self, raw, offset)
|
||||
|
||||
def read_extra_header(self, data):
|
||||
start_pos = data.offset
|
||||
default_offset = data.unpack('H')
|
||||
self[b'default'] = (LanguageSystemTable(data.raw, start_pos +
|
||||
default_offset) if default_offset else None)
|
||||
|
||||
|
||||
class ScriptListTable(ListTable):
|
||||
|
||||
child_class = ScriptTable
|
||||
|
||||
|
||||
class FeatureTable(IndexTable):
|
||||
|
||||
def read_extra_header(self, data):
|
||||
self.feature_params = data.unpack('H')
|
||||
if False and self.feature_params != 0:
|
||||
# Source code pro sets this to non NULL
|
||||
raise UnsupportedFont(
|
||||
'This FeatureTable has non NULL FeatureParams: 0x%x'%self.feature_params)
|
||||
|
||||
|
||||
class FeatureListTable(ListTable):
|
||||
|
||||
child_class = FeatureTable
|
||||
|
||||
|
||||
class LookupTable(SimpleListTable):
|
||||
|
||||
def read_extra_header(self, data):
|
||||
self.lookup_type, self.lookup_flag = data.unpack('2H')
|
||||
self.set_child_class()
|
||||
|
||||
def set_child_class(self):
|
||||
raise NotImplementedError()
|
||||
|
||||
def read_extra_footer(self, data):
|
||||
if self.lookup_flag & 0x0010:
|
||||
self.mark_filtering_set = data.unpack('H')
|
||||
|
||||
|
||||
def ExtensionSubstitution(raw, offset, subtable_map={}):
|
||||
data = Unpackable(raw, offset)
|
||||
subst_format, extension_lookup_type, offset = data.unpack('2HL')
|
||||
if subst_format != 1:
|
||||
raise UnsupportedFont('ExtensionSubstitution has unknown format: 0x%x'%subst_format)
|
||||
return subtable_map[extension_lookup_type](raw, offset+data.start_pos)
|
||||
|
||||
|
||||
CoverageRange = namedtuple('CoverageRange', 'start end start_coverage_index')
|
||||
|
||||
|
||||
class Coverage(object):
|
||||
|
||||
def __init__(self, raw, offset, parent_table_name):
|
||||
data = Unpackable(raw, offset)
|
||||
self.format, count = data.unpack('2H')
|
||||
|
||||
if self.format not in {1, 2}:
|
||||
raise UnsupportedFont('Unknown Coverage format: 0x%x in %s'%(
|
||||
self.format, parent_table_name))
|
||||
if self.format == 1:
|
||||
self.glyph_ids = data.unpack('%dH'%count, single_special=False)
|
||||
self.glyph_ids_map = {gid:i for i, gid in
|
||||
enumerate(self.glyph_ids)}
|
||||
else:
|
||||
self.ranges = []
|
||||
ranges = data.unpack('%dH'%(3*count), single_special=False)
|
||||
for i in range(count):
|
||||
start, end, start_coverage_index = ranges[i*3:(i+1)*3]
|
||||
self.ranges.append(CoverageRange(start, end, start_coverage_index))
|
||||
|
||||
def coverage_indices(self, glyph_ids):
|
||||
'''Return map of glyph_id -> coverage index. Map contains only those
|
||||
glyph_ids that are covered by this table and that are present in
|
||||
glyph_ids.'''
|
||||
ans = OrderedDict()
|
||||
for gid in glyph_ids:
|
||||
if self.format == 1:
|
||||
idx = self.glyph_ids_map.get(gid, None)
|
||||
if idx is not None:
|
||||
ans[gid] = idx
|
||||
else:
|
||||
for start, end, start_coverage_index in self.ranges:
|
||||
if start <= gid <= end:
|
||||
ans[gid] = start_coverage_index + (gid-start)
|
||||
return ans
|
||||
|
||||
|
||||
class UnknownLookupSubTable(object):
|
||||
|
||||
formats = {}
|
||||
|
||||
def __init__(self, raw, offset):
|
||||
data = Unpackable(raw, offset)
|
||||
self.format = data.unpack('H')
|
||||
if self.format not in self.formats:
|
||||
raise UnsupportedFont('Unknown format for Lookup Subtable %s: 0x%x'%(
|
||||
self.__class__.__name__, self.format))
|
||||
if self.has_initial_coverage:
|
||||
coverage_offset = data.unpack('H') + data.start_pos
|
||||
self.coverage = Coverage(raw, coverage_offset, self.__class__.__name__)
|
||||
self.initialize(data)
|
||||
|
||||
@property
|
||||
def has_initial_coverage(self):
|
||||
return True
|
||||
|
||||
def all_substitutions(self, glyph_ids):
|
||||
''' Return a set of all glyph ids that could be substituted for any
|
||||
subset of the specified glyph ids (which must be a set)'''
|
||||
raise NotImplementedError()
|
||||
|
||||
def read_sets(self, data, read_item=None, set_is_index=False):
|
||||
count = data.unpack('H')
|
||||
sets = data.unpack('%dH'%count, single_special=False)
|
||||
coverage_to_items_map = []
|
||||
for offset in sets:
|
||||
# Read items in the set
|
||||
data.offset = start_pos = offset + data.start_pos
|
||||
count = data.unpack('H')
|
||||
item_offsets = data.unpack('%dH'%count, single_special=False)
|
||||
items = []
|
||||
for offset in item_offsets:
|
||||
data.offset = offset + start_pos
|
||||
if set_is_index:
|
||||
items.append(offset)
|
||||
else:
|
||||
items.append(read_item(data))
|
||||
coverage_to_items_map.append(items)
|
||||
return coverage_to_items_map
|
||||
171
ebook_converter/utils/fonts/sfnt/container.py
Normal file
171
ebook_converter/utils/fonts/sfnt/container.py
Normal file
@@ -0,0 +1,171 @@
|
||||
#!/usr/bin/env python2
|
||||
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:fdm=marker:ai
|
||||
# License: GPLv3 Copyright: 2012, Kovid Goyal <kovid at kovidgoyal.net>
|
||||
from __future__ import absolute_import, division, print_function, unicode_literals
|
||||
|
||||
from collections import OrderedDict
|
||||
from io import BytesIO
|
||||
from struct import calcsize, pack
|
||||
|
||||
from calibre.utils.fonts.sfnt import UnknownTable, align_block, max_power_of_two
|
||||
from calibre.utils.fonts.sfnt.cff.table import CFFTable
|
||||
from calibre.utils.fonts.sfnt.cmap import CmapTable
|
||||
from calibre.utils.fonts.sfnt.errors import UnsupportedFont
|
||||
from calibre.utils.fonts.sfnt.glyf import GlyfTable
|
||||
from calibre.utils.fonts.sfnt.gsub import GSUBTable
|
||||
from calibre.utils.fonts.sfnt.head import (
|
||||
HeadTable, HorizontalHeader, OS2Table, PostTable, VerticalHeader
|
||||
)
|
||||
from calibre.utils.fonts.sfnt.kern import KernTable
|
||||
from calibre.utils.fonts.sfnt.loca import LocaTable
|
||||
from calibre.utils.fonts.sfnt.maxp import MaxpTable
|
||||
from calibre.utils.fonts.utils import checksum_of_block, get_tables, verify_checksums
|
||||
|
||||
# OpenType spec: http://www.microsoft.com/typography/otspec/otff.htm
|
||||
|
||||
|
||||
class Sfnt(object):
|
||||
|
||||
TABLE_MAP = {
|
||||
b'head' : HeadTable,
|
||||
b'hhea' : HorizontalHeader,
|
||||
b'vhea' : VerticalHeader,
|
||||
b'maxp' : MaxpTable,
|
||||
b'loca' : LocaTable,
|
||||
b'glyf' : GlyfTable,
|
||||
b'cmap' : CmapTable,
|
||||
b'CFF ' : CFFTable,
|
||||
b'kern' : KernTable,
|
||||
b'GSUB' : GSUBTable,
|
||||
b'OS/2' : OS2Table,
|
||||
b'post' : PostTable,
|
||||
}
|
||||
|
||||
def __init__(self, raw_or_get_table):
|
||||
self.tables = {}
|
||||
if isinstance(raw_or_get_table, bytes):
|
||||
raw = raw_or_get_table
|
||||
self.sfnt_version = raw[:4]
|
||||
if self.sfnt_version not in {b'\x00\x01\x00\x00', b'OTTO', b'true',
|
||||
b'type1'}:
|
||||
raise UnsupportedFont('Font has unknown sfnt version: %r'%self.sfnt_version)
|
||||
for table_tag, table, table_index, table_offset, table_checksum in get_tables(raw):
|
||||
self.tables[table_tag] = self.TABLE_MAP.get(
|
||||
table_tag, UnknownTable)(table)
|
||||
else:
|
||||
for table_tag in {
|
||||
b'cmap', b'hhea', b'head', b'hmtx', b'maxp', b'name', b'OS/2',
|
||||
b'post', b'cvt ', b'fpgm', b'glyf', b'loca', b'prep', b'CFF ',
|
||||
b'VORG', b'EBDT', b'EBLC', b'EBSC', b'BASE', b'GSUB', b'GPOS',
|
||||
b'GDEF', b'JSTF', b'gasp', b'hdmx', b'kern', b'LTSH', b'PCLT',
|
||||
b'VDMX', b'vhea', b'vmtx', b'MATH'}:
|
||||
table = bytes(raw_or_get_table(table_tag))
|
||||
if table:
|
||||
self.tables[table_tag] = self.TABLE_MAP.get(
|
||||
table_tag, UnknownTable)(table)
|
||||
if not self.tables:
|
||||
raise UnsupportedFont('This font has no tables')
|
||||
self.sfnt_version = (b'\0\x01\0\0' if b'glyf' in self.tables
|
||||
else b'OTTO')
|
||||
|
||||
def __getitem__(self, key):
|
||||
return self.tables[key]
|
||||
|
||||
def __contains__(self, key):
|
||||
return key in self.tables
|
||||
|
||||
def __delitem__(self, key):
|
||||
del self.tables[key]
|
||||
|
||||
def __iter__(self):
|
||||
'''Iterate over the table tags in order.'''
|
||||
for x in sorted(self.tables):
|
||||
yield x
|
||||
# Although the optimal order is not alphabetical, the OTF spec says
|
||||
# they should be alphabetical, so we stick with that. See
|
||||
# http://partners.adobe.com/public/developer/opentype/index_recs.html
|
||||
# for optimal order.
|
||||
# keys = list(self.tables)
|
||||
# order = {x:i for i, x in enumerate((b'head', b'hhea', b'maxp', b'OS/2',
|
||||
# b'hmtx', b'LTSH', b'VDMX', b'hdmx', b'cmap', b'fpgm', b'prep',
|
||||
# b'cvt ', b'loca', b'glyf', b'CFF ', b'kern', b'name', b'post',
|
||||
# b'gasp', b'PCLT', b'DSIG'))}
|
||||
# keys.sort(key=lambda x:order.get(x, 1000))
|
||||
# for x in keys:
|
||||
# yield x
|
||||
|
||||
def pop(self, key, default=None):
|
||||
return self.tables.pop(key, default)
|
||||
|
||||
def get(self, key, default=None):
|
||||
return self.tables.get(key, default)
|
||||
|
||||
def sizes(self):
|
||||
ans = OrderedDict()
|
||||
for tag in self:
|
||||
ans[tag] = len(self[tag])
|
||||
return ans
|
||||
|
||||
def __call__(self, stream=None):
|
||||
stream = BytesIO() if stream is None else stream
|
||||
|
||||
def spack(*args):
|
||||
stream.write(pack(*args))
|
||||
|
||||
stream.seek(0)
|
||||
|
||||
# Write header
|
||||
num_tables = len(self.tables)
|
||||
ln2 = max_power_of_two(num_tables)
|
||||
srange = (2**ln2) * 16
|
||||
spack(b'>4s4H',
|
||||
self.sfnt_version, num_tables, srange, ln2, num_tables * 16 - srange)
|
||||
|
||||
# Write tables
|
||||
head_offset = None
|
||||
table_data = []
|
||||
offset = stream.tell() + (calcsize(b'>4s3L') * num_tables)
|
||||
sizes = OrderedDict()
|
||||
for tag in self:
|
||||
table = self.tables[tag]
|
||||
raw = table()
|
||||
table_len = len(raw)
|
||||
if tag == b'head':
|
||||
head_offset = offset
|
||||
raw = raw[:8] + b'\0\0\0\0' + raw[12:]
|
||||
raw = align_block(raw)
|
||||
checksum = checksum_of_block(raw)
|
||||
spack(b'>4s3L', tag, checksum, offset, table_len)
|
||||
offset += len(raw)
|
||||
table_data.append(raw)
|
||||
sizes[tag] = table_len
|
||||
|
||||
for x in table_data:
|
||||
stream.write(x)
|
||||
|
||||
checksum = checksum_of_block(stream.getvalue())
|
||||
q = (0xB1B0AFBA - checksum) & 0xffffffff
|
||||
stream.seek(head_offset + 8)
|
||||
spack(b'>L', q)
|
||||
|
||||
return stream.getvalue(), sizes
|
||||
|
||||
|
||||
def test_roundtrip(ff=None):
|
||||
if ff is None:
|
||||
data = P('fonts/liberation/LiberationSerif-Regular.ttf', data=True)
|
||||
else:
|
||||
with open(ff, 'rb') as f:
|
||||
data = f.read()
|
||||
rd = Sfnt(data)()[0]
|
||||
verify_checksums(rd)
|
||||
if data[:12] != rd[:12]:
|
||||
raise ValueError('Roundtripping failed, font header not the same')
|
||||
if len(data) != len(rd):
|
||||
raise ValueError('Roundtripping failed, size different (%d vs. %d)'%
|
||||
(len(data), len(rd)))
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
import sys
|
||||
test_roundtrip(sys.argv[-1])
|
||||
16
ebook_converter/utils/fonts/sfnt/errors.py
Normal file
16
ebook_converter/utils/fonts/sfnt/errors.py
Normal file
@@ -0,0 +1,16 @@
|
||||
#!/usr/bin/env python2
|
||||
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:fdm=marker:ai
|
||||
from __future__ import absolute_import, division, print_function, unicode_literals
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2012, Kovid Goyal <kovid at kovidgoyal.net>'
|
||||
__docformat__ = 'restructuredtext en'
|
||||
|
||||
|
||||
class UnsupportedFont(ValueError):
|
||||
pass
|
||||
|
||||
|
||||
class NoGlyphs(ValueError):
|
||||
pass
|
||||
|
||||
95
ebook_converter/utils/fonts/sfnt/glyf.py
Normal file
95
ebook_converter/utils/fonts/sfnt/glyf.py
Normal file
@@ -0,0 +1,95 @@
|
||||
#!/usr/bin/env python2
|
||||
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:fdm=marker:ai
|
||||
from __future__ import absolute_import, division, print_function, unicode_literals
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2012, Kovid Goyal <kovid at kovidgoyal.net>'
|
||||
__docformat__ = 'restructuredtext en'
|
||||
|
||||
from struct import unpack_from
|
||||
from collections import OrderedDict
|
||||
|
||||
from calibre.utils.fonts.sfnt import UnknownTable
|
||||
from polyglot.builtins import iteritems
|
||||
|
||||
ARG_1_AND_2_ARE_WORDS = 0x0001 # if set args are words otherwise they are bytes
|
||||
ARGS_ARE_XY_VALUES = 0x0002 # if set args are xy values, otherwise they are points
|
||||
ROUND_XY_TO_GRID = 0x0004 # for the xy values if above is true
|
||||
WE_HAVE_A_SCALE = 0x0008 # Sx = Sy, otherwise scale == 1.0
|
||||
NON_OVERLAPPING = 0x0010 # set to same value for all components (obsolete!)
|
||||
MORE_COMPONENTS = 0x0020 # indicates at least one more glyph after this one
|
||||
WE_HAVE_AN_X_AND_Y_SCALE = 0x0040 # Sx, Sy
|
||||
WE_HAVE_A_TWO_BY_TWO = 0x0080 # t00, t01, t10, t11
|
||||
WE_HAVE_INSTRUCTIONS = 0x0100 # instructions follow
|
||||
USE_MY_METRICS = 0x0200 # apply these metrics to parent glyph
|
||||
OVERLAP_COMPOUND = 0x0400 # used by Apple in GX fonts
|
||||
SCALED_COMPONENT_OFFSET = 0x0800 # composite designed to have the component offset scaled (designed for Apple)
|
||||
UNSCALED_COMPONENT_OFFSET = 0x1000 # composite designed not to have the component offset scaled (designed for MS)
|
||||
|
||||
|
||||
class SimpleGlyph(object):
|
||||
|
||||
def __init__(self, num_of_countours, raw):
|
||||
self.num_of_countours = num_of_countours
|
||||
self.raw = raw
|
||||
# The list of glyph indices referred to by this glyph, will always be
|
||||
# empty for a simple glyph and not empty for a composite glyph
|
||||
self.glyph_indices = []
|
||||
self.is_composite = False
|
||||
|
||||
def __len__(self):
|
||||
return len(self.raw)
|
||||
|
||||
def __call__(self):
|
||||
return self.raw
|
||||
|
||||
|
||||
class CompositeGlyph(SimpleGlyph):
|
||||
|
||||
def __init__(self, num_of_countours, raw):
|
||||
super(CompositeGlyph, self).__init__(num_of_countours, raw)
|
||||
self.is_composite = True
|
||||
|
||||
flags = MORE_COMPONENTS
|
||||
offset = 10
|
||||
while flags & MORE_COMPONENTS:
|
||||
flags, glyph_index = unpack_from(b'>HH', raw, offset)
|
||||
self.glyph_indices.append(glyph_index)
|
||||
offset += 4
|
||||
if flags & ARG_1_AND_2_ARE_WORDS:
|
||||
offset += 4
|
||||
else:
|
||||
offset += 2
|
||||
if flags & WE_HAVE_A_SCALE:
|
||||
offset += 2
|
||||
elif flags & WE_HAVE_AN_X_AND_Y_SCALE:
|
||||
offset += 4
|
||||
elif flags & WE_HAVE_A_TWO_BY_TWO:
|
||||
offset += 8
|
||||
|
||||
|
||||
class GlyfTable(UnknownTable):
|
||||
|
||||
def glyph_data(self, offset, length, as_raw=False):
|
||||
raw = self.raw[offset:offset+length]
|
||||
if as_raw:
|
||||
return raw
|
||||
num_of_countours = unpack_from(b'>h', raw)[0] if raw else 0
|
||||
if num_of_countours >= 0:
|
||||
return SimpleGlyph(num_of_countours, raw)
|
||||
return CompositeGlyph(num_of_countours, raw)
|
||||
|
||||
def update(self, sorted_glyph_map):
|
||||
ans = OrderedDict()
|
||||
offset = 0
|
||||
block = []
|
||||
for glyph_id, glyph in iteritems(sorted_glyph_map):
|
||||
raw = glyph()
|
||||
pad = 4 - (len(raw) % 4)
|
||||
if pad < 4:
|
||||
raw += b'\0' * pad
|
||||
ans[glyph_id] = offset, len(raw)
|
||||
offset += len(raw)
|
||||
block.append(raw)
|
||||
self.raw = b''.join(block)
|
||||
return ans
|
||||
191
ebook_converter/utils/fonts/sfnt/gsub.py
Normal file
191
ebook_converter/utils/fonts/sfnt/gsub.py
Normal file
@@ -0,0 +1,191 @@
|
||||
#!/usr/bin/env python2
|
||||
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:fdm=marker:ai
|
||||
from __future__ import absolute_import, division, print_function, unicode_literals
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2012, Kovid Goyal <kovid at kovidgoyal.net>'
|
||||
__docformat__ = 'restructuredtext en'
|
||||
|
||||
from struct import unpack_from
|
||||
from functools import partial
|
||||
|
||||
from calibre.utils.fonts.sfnt import UnknownTable, FixedProperty
|
||||
from calibre.utils.fonts.sfnt.errors import UnsupportedFont
|
||||
from calibre.utils.fonts.sfnt.common import (ScriptListTable, FeatureListTable,
|
||||
SimpleListTable, LookupTable, ExtensionSubstitution,
|
||||
UnknownLookupSubTable)
|
||||
from polyglot.builtins import iteritems, itervalues
|
||||
|
||||
|
||||
class SingleSubstitution(UnknownLookupSubTable):
|
||||
|
||||
formats = {1, 2}
|
||||
|
||||
def initialize(self, data):
|
||||
if self.format == 1:
|
||||
self.delta = data.unpack('h')
|
||||
else:
|
||||
count = data.unpack('H')
|
||||
self.substitutes = data.unpack('%dH'%count, single_special=False)
|
||||
|
||||
def all_substitutions(self, glyph_ids):
|
||||
gid_index_map = self.coverage.coverage_indices(glyph_ids)
|
||||
if self.format == 1:
|
||||
return {gid + self.delta for gid in gid_index_map}
|
||||
return {self.substitutes[i] for i in itervalues(gid_index_map)}
|
||||
|
||||
|
||||
class MultipleSubstitution(UnknownLookupSubTable):
|
||||
|
||||
formats = {1}
|
||||
|
||||
def initialize(self, data):
|
||||
self.coverage_to_subs_map = self.read_sets(data, set_is_index=True)
|
||||
|
||||
def all_substitutions(self, glyph_ids):
|
||||
gid_index_map = self.coverage.coverage_indices(glyph_ids)
|
||||
ans = set()
|
||||
for index in itervalues(gid_index_map):
|
||||
glyphs = set(self.coverage_to_subs_map[index])
|
||||
ans |= glyphs
|
||||
return ans
|
||||
|
||||
|
||||
class AlternateSubstitution(MultipleSubstitution):
|
||||
pass
|
||||
|
||||
|
||||
class LigatureSubstitution(UnknownLookupSubTable):
|
||||
|
||||
formats = {1}
|
||||
|
||||
def initialize(self, data):
|
||||
self.coverage_to_lig_map = self.read_sets(data, self.read_ligature)
|
||||
|
||||
def read_ligature(self, data):
|
||||
lig_glyph, count = data.unpack('HH')
|
||||
components = data.unpack('%dH'%(count-1), single_special=False)
|
||||
return (lig_glyph, components)
|
||||
|
||||
def all_substitutions(self, glyph_ids):
|
||||
gid_index_map = self.coverage.coverage_indices(glyph_ids)
|
||||
ans = set()
|
||||
for start_glyph_id, index in iteritems(gid_index_map):
|
||||
for glyph_id, components in self.coverage_to_lig_map[index]:
|
||||
components = (start_glyph_id,) + components
|
||||
if set(components).issubset(glyph_ids):
|
||||
ans.add(glyph_id)
|
||||
return ans
|
||||
|
||||
|
||||
class ContexttualSubstitution(UnknownLookupSubTable):
|
||||
|
||||
formats = {1, 2, 3}
|
||||
|
||||
@property
|
||||
def has_initial_coverage(self):
|
||||
return self.format != 3
|
||||
|
||||
def initialize(self, data):
|
||||
pass # TODO
|
||||
|
||||
def all_substitutions(self, glyph_ids):
|
||||
# This table only defined substitution in terms of other tables
|
||||
return set()
|
||||
|
||||
|
||||
class ChainingContextualSubstitution(UnknownLookupSubTable):
|
||||
|
||||
formats = {1, 2, 3}
|
||||
|
||||
@property
|
||||
def has_initial_coverage(self):
|
||||
return self.format != 3
|
||||
|
||||
def initialize(self, data):
|
||||
pass # TODO
|
||||
|
||||
def all_substitutions(self, glyph_ids):
|
||||
# This table only defined substitution in terms of other tables
|
||||
return set()
|
||||
|
||||
|
||||
class ReverseChainSingleSubstitution(UnknownLookupSubTable):
|
||||
|
||||
formats = {1}
|
||||
|
||||
def initialize(self, data):
|
||||
backtrack_count = data.unpack('H')
|
||||
backtrack_offsets = data.unpack('%dH'%backtrack_count,
|
||||
single_special=False)
|
||||
lookahead_count = data.unpack('H')
|
||||
lookahead_offsets = data.unpack('%dH'%lookahead_count,
|
||||
single_special=False)
|
||||
backtrack_offsets = [data.start_pos + x for x in backtrack_offsets]
|
||||
lookahead_offsets = [data.start_pos + x for x in lookahead_offsets]
|
||||
backtrack_offsets, lookahead_offsets # TODO: Use these
|
||||
count = data.unpack('H')
|
||||
self.substitutes = data.unpack('%dH'%count)
|
||||
|
||||
def all_substitutions(self, glyph_ids):
|
||||
gid_index_map = self.coverage.coverage_indices(glyph_ids)
|
||||
return {self.substitutes[i] for i in itervalues(gid_index_map)}
|
||||
|
||||
|
||||
subtable_map = {
|
||||
1: SingleSubstitution,
|
||||
2: MultipleSubstitution,
|
||||
3: AlternateSubstitution,
|
||||
4: LigatureSubstitution,
|
||||
5: ContexttualSubstitution,
|
||||
6: ChainingContextualSubstitution,
|
||||
8: ReverseChainSingleSubstitution,
|
||||
}
|
||||
|
||||
|
||||
class GSUBLookupTable(LookupTable):
|
||||
|
||||
def set_child_class(self):
|
||||
if self.lookup_type == 7:
|
||||
self.child_class = partial(ExtensionSubstitution,
|
||||
subtable_map=subtable_map)
|
||||
else:
|
||||
self.child_class = subtable_map[self.lookup_type]
|
||||
|
||||
|
||||
class LookupListTable(SimpleListTable):
|
||||
|
||||
child_class = GSUBLookupTable
|
||||
|
||||
|
||||
class GSUBTable(UnknownTable):
|
||||
|
||||
version = FixedProperty('_version')
|
||||
|
||||
def decompile(self):
|
||||
(self._version, self.scriptlist_offset, self.featurelist_offset,
|
||||
self.lookuplist_offset) = unpack_from(b'>L3H', self.raw)
|
||||
if self._version != 0x10000:
|
||||
raise UnsupportedFont('The GSUB table has unknown version: 0x%x'%
|
||||
self._version)
|
||||
|
||||
self.script_list_table = ScriptListTable(self.raw,
|
||||
self.scriptlist_offset)
|
||||
# self.script_list_table.dump()
|
||||
|
||||
self.feature_list_table = FeatureListTable(self.raw,
|
||||
self.featurelist_offset)
|
||||
# self.feature_list_table.dump()
|
||||
|
||||
self.lookup_list_table = LookupListTable(self.raw,
|
||||
self.lookuplist_offset)
|
||||
|
||||
def all_substitutions(self, glyph_ids):
|
||||
glyph_ids = frozenset(glyph_ids)
|
||||
ans = set(glyph_ids)
|
||||
for lookup_table in self.lookup_list_table:
|
||||
for subtable in lookup_table:
|
||||
glyphs = subtable.all_substitutions(ans)
|
||||
if glyphs:
|
||||
ans |= glyphs
|
||||
return ans - {glyph_ids}
|
||||
213
ebook_converter/utils/fonts/sfnt/head.py
Normal file
213
ebook_converter/utils/fonts/sfnt/head.py
Normal file
@@ -0,0 +1,213 @@
|
||||
#!/usr/bin/env python2
|
||||
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:fdm=marker:ai
|
||||
from __future__ import absolute_import, division, print_function, unicode_literals
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2012, Kovid Goyal <kovid at kovidgoyal.net>'
|
||||
__docformat__ = 'restructuredtext en'
|
||||
|
||||
from struct import unpack_from, pack, calcsize
|
||||
|
||||
from calibre.utils.fonts.sfnt import UnknownTable, DateTimeProperty, FixedProperty
|
||||
from calibre.utils.fonts.sfnt.errors import UnsupportedFont
|
||||
from calibre.utils.fonts.sfnt.loca import read_array
|
||||
from polyglot.builtins import zip
|
||||
|
||||
|
||||
class HeadTable(UnknownTable):
|
||||
|
||||
created = DateTimeProperty('_created')
|
||||
modified = DateTimeProperty('_modified')
|
||||
version_number = FixedProperty('_version_number')
|
||||
font_revision = FixedProperty('_font_revision')
|
||||
|
||||
def __init__(self, *args, **kwargs):
|
||||
super(HeadTable, self).__init__(*args, **kwargs)
|
||||
|
||||
field_types = (
|
||||
'_version_number' , 'l',
|
||||
'_font_revision' , 'l',
|
||||
'checksum_adjustment' , 'L',
|
||||
'magic_number' , 'L',
|
||||
'flags' , 'H',
|
||||
'units_per_em' , 'H',
|
||||
'_created' , 'q',
|
||||
'_modified' , 'q',
|
||||
'x_min' , 'h',
|
||||
'y_min' , 'h',
|
||||
'x_max' , 'h',
|
||||
'y_max' , 'h',
|
||||
'mac_style' , 'H',
|
||||
'lowest_rec_ppem' , 'H',
|
||||
'font_direction_hint' , 'h',
|
||||
'index_to_loc_format' , 'h',
|
||||
'glyph_data_format' , 'h'
|
||||
)
|
||||
|
||||
self._fmt = ('>%s'%(''.join(field_types[1::2]))).encode('ascii')
|
||||
self._fields = field_types[0::2]
|
||||
|
||||
for f, val in zip(self._fields, unpack_from(self._fmt, self.raw)):
|
||||
setattr(self, f, val)
|
||||
|
||||
def update(self):
|
||||
vals = [getattr(self, f) for f in self._fields]
|
||||
self.raw = pack(self._fmt, *vals)
|
||||
|
||||
|
||||
class HorizontalHeader(UnknownTable):
|
||||
|
||||
version_number = FixedProperty('_version_number')
|
||||
|
||||
def read_data(self, hmtx):
|
||||
if hasattr(self, 'ascender'):
|
||||
return
|
||||
field_types = (
|
||||
'_version_number' , 'l',
|
||||
'ascender', 'h',
|
||||
'descender', 'h',
|
||||
'line_gap', 'h',
|
||||
'advance_width_max', 'H',
|
||||
'min_left_side_bearing', 'h',
|
||||
'min_right_side_bearing', 'h',
|
||||
'x_max_extent', 'h',
|
||||
'caret_slope_rise', 'h',
|
||||
'caret_slop_run', 'h',
|
||||
'caret_offset', 'h',
|
||||
'r1', 'h',
|
||||
'r2', 'h',
|
||||
'r3', 'h',
|
||||
'r4', 'h',
|
||||
'metric_data_format', 'h',
|
||||
'number_of_h_metrics', 'H',
|
||||
)
|
||||
|
||||
self._fmt = ('>%s'%(''.join(field_types[1::2]))).encode('ascii')
|
||||
self._fields = field_types[0::2]
|
||||
|
||||
for f, val in zip(self._fields, unpack_from(self._fmt, self.raw)):
|
||||
setattr(self, f, val)
|
||||
|
||||
raw = hmtx.raw
|
||||
num = self.number_of_h_metrics
|
||||
if len(raw) < 4*num:
|
||||
raise UnsupportedFont('The hmtx table has insufficient data')
|
||||
long_hor_metric = raw[:4*num]
|
||||
a = read_array(long_hor_metric)
|
||||
self.advance_widths = a[0::2]
|
||||
a = read_array(long_hor_metric, 'h')
|
||||
self.left_side_bearings = a[1::2]
|
||||
|
||||
|
||||
class VerticalHeader(UnknownTable):
|
||||
|
||||
version_number = FixedProperty('_version_number')
|
||||
|
||||
def read_data(self, vmtx):
|
||||
if hasattr(self, 'ascender'):
|
||||
return
|
||||
field_types = (
|
||||
'_version_number' , 'l',
|
||||
'ascender', 'h',
|
||||
'descender', 'h',
|
||||
'line_gap', 'h',
|
||||
'advance_height_max', 'H',
|
||||
'min_top_side_bearing', 'h',
|
||||
'min_bottom_side_bearing', 'h',
|
||||
'y_max_extent', 'h',
|
||||
'caret_slope_rise', 'h',
|
||||
'caret_slop_run', 'h',
|
||||
'caret_offset', 'h',
|
||||
'r1', 'h',
|
||||
'r2', 'h',
|
||||
'r3', 'h',
|
||||
'r4', 'h',
|
||||
'metric_data_format', 'h',
|
||||
'number_of_v_metrics', 'H',
|
||||
)
|
||||
|
||||
self._fmt = ('>%s'%(''.join(field_types[1::2]))).encode('ascii')
|
||||
self._fields = field_types[0::2]
|
||||
|
||||
for f, val in zip(self._fields, unpack_from(self._fmt, self.raw)):
|
||||
setattr(self, f, val)
|
||||
|
||||
raw = vmtx.raw
|
||||
num = self.number_of_v_metrics
|
||||
if len(raw) < 4*num:
|
||||
raise UnsupportedFont('The vmtx table has insufficient data')
|
||||
long_hor_metric = raw[:4*num]
|
||||
long_hor_metric = raw[:4*num]
|
||||
a = read_array(long_hor_metric)
|
||||
self.advance_heights = a[0::2]
|
||||
a = read_array(long_hor_metric, 'h')
|
||||
self.top_side_bearings = a[1::2]
|
||||
|
||||
|
||||
class OS2Table(UnknownTable):
|
||||
|
||||
def read_data(self):
|
||||
if hasattr(self, 'char_width'):
|
||||
return
|
||||
ver, = unpack_from(b'>H', self.raw)
|
||||
field_types = [
|
||||
'version' , 'H',
|
||||
'average_char_width', 'h',
|
||||
'weight_class', 'H',
|
||||
'width_class', 'H',
|
||||
'fs_type', 'H',
|
||||
'subscript_x_size', 'h',
|
||||
'subscript_y_size', 'h',
|
||||
'subscript_x_offset', 'h',
|
||||
'subscript_y_offset', 'h',
|
||||
'superscript_x_size', 'h',
|
||||
'superscript_y_size', 'h',
|
||||
'superscript_x_offset', 'h',
|
||||
'superscript_y_offset', 'h',
|
||||
'strikeout_size', 'h',
|
||||
'strikeout_position', 'h',
|
||||
'family_class', 'h',
|
||||
'panose', '10s',
|
||||
'ranges', '16s',
|
||||
'vendor_id', '4s',
|
||||
'selection', 'H',
|
||||
'first_char_index', 'H',
|
||||
'last_char_index', 'H',
|
||||
'typo_ascender', 'h',
|
||||
'typo_descender', 'h',
|
||||
'typo_line_gap', 'h',
|
||||
'win_ascent', 'H',
|
||||
'win_descent', 'H',
|
||||
]
|
||||
if ver > 1:
|
||||
field_types += [
|
||||
'code_page_range', '8s',
|
||||
'x_height', 'h',
|
||||
'cap_height', 'h',
|
||||
'default_char', 'H',
|
||||
'break_char', 'H',
|
||||
'max_context', 'H',
|
||||
]
|
||||
|
||||
self._fmt = ('>%s'%(''.join(field_types[1::2]))).encode('ascii')
|
||||
self._fields = field_types[0::2]
|
||||
|
||||
for f, val in zip(self._fields, unpack_from(self._fmt, self.raw)):
|
||||
setattr(self, f, val)
|
||||
|
||||
def zero_fstype(self):
|
||||
prefix = calcsize(b'>HhHH')
|
||||
self.raw = self.raw[:prefix] + b'\0\0' + self.raw[prefix+2:]
|
||||
self.fs_type = 0
|
||||
|
||||
|
||||
class PostTable(UnknownTable):
|
||||
|
||||
version_number = FixedProperty('_version')
|
||||
italic_angle = FixedProperty('_italic_angle')
|
||||
|
||||
def read_data(self):
|
||||
if hasattr(self, 'underline_position'):
|
||||
return
|
||||
(self._version, self._italic_angle, self.underline_position,
|
||||
self.underline_thickness) = unpack_from(b'>llhh', self.raw)
|
||||
89
ebook_converter/utils/fonts/sfnt/kern.py
Normal file
89
ebook_converter/utils/fonts/sfnt/kern.py
Normal file
@@ -0,0 +1,89 @@
|
||||
#!/usr/bin/env python2
|
||||
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:fdm=marker:ai
|
||||
from __future__ import absolute_import, division, print_function, unicode_literals
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2012, Kovid Goyal <kovid at kovidgoyal.net>'
|
||||
__docformat__ = 'restructuredtext en'
|
||||
|
||||
from struct import unpack_from, calcsize, pack, error as struct_error
|
||||
|
||||
from calibre.utils.fonts.sfnt import (UnknownTable, FixedProperty,
|
||||
max_power_of_two)
|
||||
from calibre.utils.fonts.sfnt.errors import UnsupportedFont
|
||||
from polyglot.builtins import range
|
||||
|
||||
|
||||
class KernTable(UnknownTable):
|
||||
|
||||
version = FixedProperty('_version')
|
||||
|
||||
def __init__(self, *args, **kwargs):
|
||||
super(KernTable, self).__init__(*args, **kwargs)
|
||||
self._version, self.num_tables = unpack_from(b'>HH', self.raw)
|
||||
if self._version == 1 and len(self.raw) >= 8:
|
||||
self._version, self.num_tables = unpack_from(b'>LL', self.raw)
|
||||
self.headerfmt = b'>HH' if self._version == 0 else b'>LL'
|
||||
|
||||
def restrict_to_glyphs(self, glyph_ids):
|
||||
if self._version not in {0, 0x10000}:
|
||||
raise UnsupportedFont('kern table has version: %x'%self._version)
|
||||
offset = 4 if (self._version == 0) else 8
|
||||
tables = []
|
||||
for i in range(self.num_tables):
|
||||
if self._version == 0:
|
||||
version, length, coverage = unpack_from(b'>3H', self.raw, offset)
|
||||
table_format = version
|
||||
else:
|
||||
length, coverage = unpack_from(b'>LH', self.raw, offset)
|
||||
table_format = coverage & 0xff
|
||||
raw = self.raw[offset:offset+length]
|
||||
if table_format == 0:
|
||||
raw = self.restrict_format_0(raw, glyph_ids)
|
||||
if not raw:
|
||||
continue
|
||||
tables.append(raw)
|
||||
offset += length
|
||||
self.raw = pack(self.headerfmt, self._version, len(tables)) + b''.join(tables)
|
||||
|
||||
def restrict_format_0(self, raw, glyph_ids):
|
||||
if self._version == 0:
|
||||
version, length, coverage, npairs = unpack_from(b'>4H', raw)
|
||||
headerfmt = b'>3H'
|
||||
else:
|
||||
length, coverage, tuple_index, npairs = unpack_from(b'>L3H', raw)
|
||||
headerfmt = b'>L2H'
|
||||
|
||||
offset = calcsize(headerfmt + b'4H')
|
||||
entries = []
|
||||
entrysz = calcsize(b'>2Hh')
|
||||
for i in range(npairs):
|
||||
try:
|
||||
left, right, value = unpack_from(b'>2Hh', raw, offset)
|
||||
except struct_error:
|
||||
offset = len(raw)
|
||||
break # Buggy kern table
|
||||
if left in glyph_ids and right in glyph_ids:
|
||||
entries.append(pack(b'>2Hh', left, right, value))
|
||||
offset += entrysz
|
||||
|
||||
if offset != len(raw):
|
||||
raise UnsupportedFont('This font has extra data at the end of'
|
||||
' a Format 0 kern subtable')
|
||||
|
||||
npairs = len(entries)
|
||||
if npairs == 0:
|
||||
return b''
|
||||
|
||||
entry_selector = max_power_of_two(npairs)
|
||||
search_range = (2 ** entry_selector) * 6
|
||||
range_shift = (npairs - (2 ** entry_selector)) * 6
|
||||
|
||||
entries = b''.join(entries)
|
||||
length = calcsize(headerfmt + b'4H') + len(entries)
|
||||
if self._version == 0:
|
||||
header = pack(headerfmt, version, length, coverage)
|
||||
else:
|
||||
header = pack(headerfmt, length, coverage, tuple_index)
|
||||
return header + pack(b'>4H', npairs, search_range, entry_selector,
|
||||
range_shift) + entries
|
||||
91
ebook_converter/utils/fonts/sfnt/loca.py
Normal file
91
ebook_converter/utils/fonts/sfnt/loca.py
Normal file
@@ -0,0 +1,91 @@
|
||||
#!/usr/bin/env python2
|
||||
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:fdm=marker:ai
|
||||
from __future__ import absolute_import, division, print_function, unicode_literals
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2012, Kovid Goyal <kovid at kovidgoyal.net>'
|
||||
__docformat__ = 'restructuredtext en'
|
||||
|
||||
import array, sys
|
||||
from operator import itemgetter
|
||||
from itertools import repeat
|
||||
|
||||
from calibre.utils.fonts.sfnt import UnknownTable
|
||||
from polyglot.builtins import iteritems, range
|
||||
|
||||
|
||||
def four_byte_type_code():
|
||||
for c in 'IL':
|
||||
a = array.array(c)
|
||||
if a.itemsize == 4:
|
||||
return c
|
||||
|
||||
|
||||
def read_array(data, fmt='H'):
|
||||
ans = array.array(fmt, data)
|
||||
if sys.byteorder != 'big':
|
||||
ans.byteswap()
|
||||
return ans
|
||||
|
||||
|
||||
class LocaTable(UnknownTable):
|
||||
|
||||
def load_offsets(self, head_table, maxp_table):
|
||||
fmt = 'H' if head_table.index_to_loc_format == 0 else four_byte_type_code()
|
||||
locs = read_array(self.raw, fmt)
|
||||
self.offset_map = locs.tolist()
|
||||
if fmt == 'H':
|
||||
self.offset_map = [2*i for i in self.offset_map]
|
||||
self.fmt = fmt
|
||||
|
||||
def glyph_location(self, glyph_id):
|
||||
offset = self.offset_map[glyph_id]
|
||||
next_offset = self.offset_map[glyph_id+1]
|
||||
return offset, next_offset - offset
|
||||
|
||||
def update(self, resolved_glyph_map):
|
||||
'''
|
||||
Update this table to contain pointers only to the glyphs in
|
||||
resolved_glyph_map which must be a map of glyph_ids to (offset, sz)
|
||||
Note that the loca table is generated for all glyphs from 0 to the
|
||||
largest glyph that is either in resolved_glyph_map or was present
|
||||
originally. The pointers to glyphs that have no data will be set to
|
||||
zero. This preserves glyph ids.
|
||||
'''
|
||||
current_max_glyph_id = len(self.offset_map) - 2
|
||||
max_glyph_id = max(resolved_glyph_map or (0,))
|
||||
max_glyph_id = max(max_glyph_id, current_max_glyph_id)
|
||||
self.offset_map = list(repeat(0, max_glyph_id + 2))
|
||||
glyphs = [(glyph_id, x[0], x[1]) for glyph_id, x in
|
||||
iteritems(resolved_glyph_map)]
|
||||
glyphs.sort(key=itemgetter(1))
|
||||
for glyph_id, offset, sz in glyphs:
|
||||
self.offset_map[glyph_id] = offset
|
||||
self.offset_map[glyph_id+1] = offset + sz
|
||||
# Fix all zero entries to be the same as the previous entry, which
|
||||
# means that if the ith entry is zero, the i-1 glyph is not present.
|
||||
for i in range(1, len(self.offset_map)):
|
||||
if self.offset_map[i] == 0:
|
||||
self.offset_map[i] = self.offset_map[i-1]
|
||||
|
||||
vals = self.offset_map
|
||||
max_offset = max(vals) if vals else 0
|
||||
if max_offset < 0x20000 and all(l % 2 == 0 for l in vals):
|
||||
self.fmt = 'H'
|
||||
vals = array.array(self.fmt, (i // 2 for i in vals))
|
||||
else:
|
||||
self.fmt = four_byte_type_code()
|
||||
vals = array.array(self.fmt, vals)
|
||||
|
||||
if sys.byteorder != "big":
|
||||
vals.byteswap()
|
||||
self.raw = vals.tostring()
|
||||
subset = update
|
||||
|
||||
def dump_glyphs(self, sfnt):
|
||||
if not hasattr(self, 'offset_map'):
|
||||
self.load_offsets(sfnt[b'head'], sfnt[b'maxp'])
|
||||
for i in range(len(self.offset_map)-1):
|
||||
off, noff = self.offset_map[i], self.offset_map[i+1]
|
||||
if noff != off:
|
||||
print('Glyph id:', i, 'size:', noff-off)
|
||||
46
ebook_converter/utils/fonts/sfnt/maxp.py
Normal file
46
ebook_converter/utils/fonts/sfnt/maxp.py
Normal file
@@ -0,0 +1,46 @@
|
||||
#!/usr/bin/env python2
|
||||
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:fdm=marker:ai
|
||||
from __future__ import absolute_import, division, print_function, unicode_literals
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2012, Kovid Goyal <kovid at kovidgoyal.net>'
|
||||
__docformat__ = 'restructuredtext en'
|
||||
|
||||
from struct import unpack_from, pack
|
||||
|
||||
from calibre.utils.fonts.sfnt import UnknownTable, FixedProperty
|
||||
from calibre.utils.fonts.sfnt.errors import UnsupportedFont
|
||||
from polyglot.builtins import zip
|
||||
|
||||
|
||||
class MaxpTable(UnknownTable):
|
||||
|
||||
version = FixedProperty('_version')
|
||||
|
||||
def __init__(self, *args, **kwargs):
|
||||
super(MaxpTable, self).__init__(*args, **kwargs)
|
||||
|
||||
self._fmt = b'>lH'
|
||||
self._version, self.num_glyphs = unpack_from(self._fmt, self.raw)
|
||||
self.fields = ('_version', 'num_glyphs')
|
||||
|
||||
if self.version > 1.0:
|
||||
raise UnsupportedFont('This font has a maxp table with version: %s'
|
||||
%self.version)
|
||||
if self.version == 1.0:
|
||||
self.fields = ('_version', 'num_glyphs', 'max_points',
|
||||
'max_contours', 'max_composite_points',
|
||||
'max_composite_contours', 'max_zones',
|
||||
'max_twilight_points', 'max_storage', 'max_function_defs',
|
||||
'max_instruction_defs', 'max_stack_elements',
|
||||
'max_size_of_instructions', 'max_component_elements',
|
||||
'max_component_depth')
|
||||
self._fmt = b'>lH' + b'H'*(len(self.fields)-2)
|
||||
|
||||
vals = unpack_from(self._fmt, self.raw)
|
||||
for f, val in zip(self.fields, vals):
|
||||
setattr(self, f, val)
|
||||
|
||||
def update(self):
|
||||
vals = [getattr(self, f) for f in self.fields]
|
||||
self.raw = pack(self._fmt, *vals)
|
||||
380
ebook_converter/utils/fonts/sfnt/subset.py
Normal file
380
ebook_converter/utils/fonts/sfnt/subset.py
Normal file
@@ -0,0 +1,380 @@
|
||||
#!/usr/bin/env python2
|
||||
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:fdm=marker:ai
|
||||
from __future__ import absolute_import, division, print_function, unicode_literals
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2012, Kovid Goyal <kovid at kovidgoyal.net>'
|
||||
__docformat__ = 'restructuredtext en'
|
||||
|
||||
import traceback
|
||||
from collections import OrderedDict
|
||||
from operator import itemgetter
|
||||
from functools import partial
|
||||
|
||||
from calibre.utils.icu import safe_chr, ord_string
|
||||
from calibre.utils.fonts.sfnt.container import Sfnt
|
||||
from calibre.utils.fonts.sfnt.errors import UnsupportedFont, NoGlyphs
|
||||
from polyglot.builtins import unicode_type, range, iteritems, itervalues, map
|
||||
|
||||
# TrueType outlines {{{
|
||||
|
||||
|
||||
def resolve_glyphs(loca, glyf, character_map, extra_glyphs):
|
||||
unresolved_glyphs = set(itervalues(character_map)) | extra_glyphs
|
||||
unresolved_glyphs.add(0) # We always want the .notdef glyph
|
||||
resolved_glyphs = {}
|
||||
|
||||
while unresolved_glyphs:
|
||||
glyph_id = unresolved_glyphs.pop()
|
||||
try:
|
||||
offset, length = loca.glyph_location(glyph_id)
|
||||
except (IndexError, ValueError, KeyError, TypeError):
|
||||
continue
|
||||
glyph = glyf.glyph_data(offset, length)
|
||||
resolved_glyphs[glyph_id] = glyph
|
||||
for gid in glyph.glyph_indices:
|
||||
if gid not in resolved_glyphs:
|
||||
unresolved_glyphs.add(gid)
|
||||
|
||||
return OrderedDict(sorted(iteritems(resolved_glyphs), key=itemgetter(0)))
|
||||
|
||||
|
||||
def subset_truetype(sfnt, character_map, extra_glyphs):
|
||||
loca = sfnt[b'loca']
|
||||
glyf = sfnt[b'glyf']
|
||||
|
||||
try:
|
||||
head, maxp = sfnt[b'head'], sfnt[b'maxp']
|
||||
except KeyError:
|
||||
raise UnsupportedFont('This font does not contain head and/or maxp tables')
|
||||
loca.load_offsets(head, maxp)
|
||||
|
||||
resolved_glyphs = resolve_glyphs(loca, glyf, character_map, extra_glyphs)
|
||||
if not resolved_glyphs or set(resolved_glyphs) == {0}:
|
||||
raise NoGlyphs('This font has no glyphs for the specified character '
|
||||
'set, subsetting it is pointless')
|
||||
|
||||
# Keep only character codes that have resolved glyphs
|
||||
for code, glyph_id in tuple(iteritems(character_map)):
|
||||
if glyph_id not in resolved_glyphs:
|
||||
del character_map[code]
|
||||
|
||||
# Update the glyf table
|
||||
glyph_offset_map = glyf.update(resolved_glyphs)
|
||||
|
||||
# Update the loca table
|
||||
loca.subset(glyph_offset_map)
|
||||
head.index_to_loc_format = 0 if loca.fmt == 'H' else 1
|
||||
head.update()
|
||||
maxp.num_glyphs = len(loca.offset_map) - 1
|
||||
|
||||
# }}}
|
||||
|
||||
|
||||
def subset_postscript(sfnt, character_map, extra_glyphs):
|
||||
cff = sfnt[b'CFF ']
|
||||
cff.decompile()
|
||||
cff.subset(character_map, extra_glyphs)
|
||||
|
||||
|
||||
def do_warn(warnings, *args):
|
||||
for arg in args:
|
||||
for line in arg.splitlines():
|
||||
if warnings is None:
|
||||
print(line)
|
||||
else:
|
||||
warnings.append(line)
|
||||
if warnings is None:
|
||||
print()
|
||||
else:
|
||||
warnings.append('')
|
||||
|
||||
|
||||
def pdf_subset(sfnt, glyphs):
|
||||
for tag in tuple(sfnt.tables):
|
||||
if tag not in {b'hhea', b'head', b'hmtx', b'maxp',
|
||||
b'OS/2', b'post', b'cvt ', b'fpgm', b'glyf', b'loca',
|
||||
b'prep', b'CFF ', b'VORG'}:
|
||||
# Remove non core tables since they are unused in PDF rendering
|
||||
del sfnt[tag]
|
||||
if b'loca' in sfnt and b'glyf' in sfnt:
|
||||
# TrueType Outlines
|
||||
subset_truetype(sfnt, {}, glyphs)
|
||||
elif b'CFF ' in sfnt:
|
||||
# PostScript Outlines
|
||||
subset_postscript(sfnt, {}, glyphs)
|
||||
else:
|
||||
raise UnsupportedFont('This font does not contain TrueType '
|
||||
'or PostScript outlines')
|
||||
|
||||
|
||||
def safe_ord(x):
|
||||
return ord_string(unicode_type(x))[0]
|
||||
|
||||
|
||||
def subset(raw, individual_chars, ranges=(), warnings=None):
|
||||
warn = partial(do_warn, warnings)
|
||||
|
||||
chars = set(map(safe_ord, individual_chars))
|
||||
for r in ranges:
|
||||
chars |= set(range(safe_ord(r[0]), safe_ord(r[1])+1))
|
||||
|
||||
# Always add the space character for ease of use from the command line
|
||||
if safe_ord(' ') not in chars:
|
||||
chars.add(safe_ord(' '))
|
||||
|
||||
sfnt = Sfnt(raw)
|
||||
old_sizes = sfnt.sizes()
|
||||
|
||||
# Remove the Digital Signature table since it is useless in a subset
|
||||
# font anyway
|
||||
sfnt.pop(b'DSIG', None)
|
||||
|
||||
# Remove non core tables as they aren't likely to be used by renderers
|
||||
# anyway
|
||||
core_tables = {b'cmap', b'hhea', b'head', b'hmtx', b'maxp', b'name',
|
||||
b'OS/2', b'post', b'cvt ', b'fpgm', b'glyf', b'loca', b'prep',
|
||||
b'CFF ', b'VORG', b'EBDT', b'EBLC', b'EBSC', b'BASE', b'GSUB',
|
||||
b'GPOS', b'GDEF', b'JSTF', b'gasp', b'hdmx', b'kern', b'LTSH',
|
||||
b'PCLT', b'VDMX', b'vhea', b'vmtx', b'MATH'}
|
||||
for tag in list(sfnt):
|
||||
if tag not in core_tables:
|
||||
del sfnt[tag]
|
||||
|
||||
try:
|
||||
cmap = sfnt[b'cmap']
|
||||
except KeyError:
|
||||
raise UnsupportedFont('This font has no cmap table')
|
||||
|
||||
# Get mapping of chars to glyph ids for all specified chars
|
||||
character_map = cmap.get_character_map(chars)
|
||||
|
||||
extra_glyphs = set()
|
||||
|
||||
if b'GSUB' in sfnt:
|
||||
# Parse all substitution rules to ensure that glyphs that can be
|
||||
# substituted for the specified set of glyphs are not removed
|
||||
gsub = sfnt[b'GSUB']
|
||||
try:
|
||||
gsub.decompile()
|
||||
extra_glyphs = gsub.all_substitutions(itervalues(character_map))
|
||||
except UnsupportedFont as e:
|
||||
warn('Usupported GSUB table: %s'%e)
|
||||
except Exception:
|
||||
warn('Failed to decompile GSUB table:', traceback.format_exc())
|
||||
|
||||
if b'loca' in sfnt and b'glyf' in sfnt:
|
||||
# TrueType Outlines
|
||||
subset_truetype(sfnt, character_map, extra_glyphs)
|
||||
elif b'CFF ' in sfnt:
|
||||
# PostScript Outlines
|
||||
subset_postscript(sfnt, character_map, extra_glyphs)
|
||||
else:
|
||||
raise UnsupportedFont('This font does not contain TrueType '
|
||||
'or PostScript outlines')
|
||||
|
||||
# Restrict the cmap table to only contain entries for the resolved glyphs
|
||||
cmap.set_character_map(character_map)
|
||||
|
||||
if b'kern' in sfnt:
|
||||
try:
|
||||
sfnt[b'kern'].restrict_to_glyphs(frozenset(itervalues(character_map)))
|
||||
except UnsupportedFont as e:
|
||||
warn('kern table unsupported, ignoring: %s'%e)
|
||||
except Exception:
|
||||
warn('Subsetting of kern table failed, ignoring:',
|
||||
traceback.format_exc())
|
||||
|
||||
raw, new_sizes = sfnt()
|
||||
return raw, old_sizes, new_sizes
|
||||
|
||||
# CLI {{{
|
||||
|
||||
|
||||
def option_parser():
|
||||
import textwrap
|
||||
from calibre.utils.config import OptionParser
|
||||
parser = OptionParser(usage=textwrap.dedent('''\
|
||||
%prog [options] input_font_file output_font_file characters_to_keep
|
||||
|
||||
Subset the specified font, keeping only the glyphs for the characters in
|
||||
characters_to_keep. characters_to_keep is a comma separated list of characters of
|
||||
the form: a,b,c,A-Z,0-9,xyz
|
||||
|
||||
You can specify ranges in the list of characters, as shown above.
|
||||
'''))
|
||||
parser.add_option('-c', '--codes', default=False, action='store_true',
|
||||
help='If specified, the list of characters is interpreted as '
|
||||
'numeric unicode codes instead of characters. So to specify the '
|
||||
'characters a,b you would use 97,98 or U+0061,U+0062')
|
||||
parser.prog = 'subset-font'
|
||||
return parser
|
||||
|
||||
|
||||
def print_stats(old_stats, new_stats):
|
||||
from calibre import prints
|
||||
prints('========= Table comparison (original vs. subset) =========')
|
||||
prints('Table', ' ', '%10s'%'Size', ' ', 'Percent', ' ', '%10s'%'New Size',
|
||||
' New Percent')
|
||||
prints('='*80)
|
||||
old_total = sum(itervalues(old_stats))
|
||||
new_total = sum(itervalues(new_stats))
|
||||
tables = sorted(old_stats, key=lambda x:old_stats[x],
|
||||
reverse=True)
|
||||
for table in tables:
|
||||
osz = old_stats[table]
|
||||
op = osz/old_total * 100
|
||||
nsz = new_stats.get(table, 0)
|
||||
np = nsz/new_total * 100
|
||||
suffix = ' | same size'
|
||||
if nsz != osz:
|
||||
suffix = ' | reduced to %.1f %%'%(nsz/osz * 100)
|
||||
prints('%4s'%table, ' ', '%10s'%osz, ' ', '%5.1f %%'%op, ' ',
|
||||
'%10s'%nsz, ' ', '%5.1f %%'%np, suffix)
|
||||
prints('='*80)
|
||||
|
||||
|
||||
def main(args):
|
||||
import sys, time
|
||||
from calibre import prints
|
||||
parser = option_parser()
|
||||
opts, args = parser.parse_args(args)
|
||||
if len(args) < 4 or len(args) > 4:
|
||||
parser.print_help()
|
||||
raise SystemExit(1)
|
||||
iff, off, chars = args[1:]
|
||||
with open(iff, 'rb') as f:
|
||||
orig = f.read()
|
||||
|
||||
chars = [x for x in chars.split(',')]
|
||||
individual, ranges = set(), set()
|
||||
|
||||
def not_single(c):
|
||||
if len(c) > 1:
|
||||
prints(c, 'is not a single character', file=sys.stderr)
|
||||
raise SystemExit(1)
|
||||
|
||||
def conv_code(c):
|
||||
if c.upper()[:2] in ('U+', '0X'):
|
||||
c = int(c[2:], 16)
|
||||
return safe_chr(int(c))
|
||||
|
||||
for c in chars:
|
||||
if '-' in c:
|
||||
parts = [x.strip() for x in c.split('-')]
|
||||
if len(parts) != 2:
|
||||
prints('Invalid range:', c, file=sys.stderr)
|
||||
raise SystemExit(1)
|
||||
if opts.codes:
|
||||
parts = tuple(map(conv_code, parts))
|
||||
tuple(map(not_single, parts))
|
||||
ranges.add(tuple(parts))
|
||||
else:
|
||||
if opts.codes:
|
||||
c = conv_code(c)
|
||||
not_single(c)
|
||||
individual.add(c)
|
||||
st = time.time()
|
||||
sf, old_stats, new_stats = subset(orig, individual, ranges)
|
||||
taken = time.time() - st
|
||||
reduced = (len(sf)/len(orig)) * 100
|
||||
|
||||
def sz(x):
|
||||
return '%gKB'%(len(x)/1024.)
|
||||
print_stats(old_stats, new_stats)
|
||||
prints('Original size:', sz(orig), 'Subset size:', sz(sf), 'Reduced to: %g%%'%(reduced))
|
||||
prints('Subsetting took %g seconds'%taken)
|
||||
with open(off, 'wb') as f:
|
||||
f.write(sf)
|
||||
prints('Subset font written to:', off)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
try:
|
||||
import init_calibre
|
||||
init_calibre
|
||||
except ImportError:
|
||||
pass
|
||||
import sys
|
||||
main(sys.argv)
|
||||
# }}}
|
||||
|
||||
# Tests {{{
|
||||
|
||||
|
||||
def test_mem():
|
||||
from calibre.utils.mem import memory
|
||||
import gc
|
||||
gc.collect()
|
||||
start_mem = memory()
|
||||
raw = P('fonts/liberation/LiberationSerif-Regular.ttf', data=True)
|
||||
calls = 1000
|
||||
for i in range(calls):
|
||||
subset(raw, (), (('a', 'z'),))
|
||||
del raw
|
||||
for i in range(3):
|
||||
gc.collect()
|
||||
print('Leaked memory per call:', (memory() - start_mem)/calls*1024, 'KB')
|
||||
|
||||
|
||||
def test():
|
||||
raw = P('fonts/liberation/LiberationSerif-Regular.ttf', data=True)
|
||||
sf, old_stats, new_stats = subset(raw, set(('a', 'b', 'c')), ())
|
||||
if len(sf) > 0.3 * len(raw):
|
||||
raise Exception('Subsetting failed')
|
||||
|
||||
|
||||
def all():
|
||||
from calibre.utils.fonts.scanner import font_scanner
|
||||
failed = []
|
||||
unsupported = []
|
||||
warnings = {}
|
||||
total = 0
|
||||
averages = []
|
||||
for family in font_scanner.find_font_families():
|
||||
for font in font_scanner.fonts_for_family(family):
|
||||
raw = font_scanner.get_font_data(font)
|
||||
print('Subsetting', font['full_name'], end='\t')
|
||||
total += 1
|
||||
try:
|
||||
w = []
|
||||
sf, old_stats, new_stats = subset(raw, set(('a', 'b', 'c')),
|
||||
(), w)
|
||||
if w:
|
||||
warnings[font['full_name'] + ' (%s)'%font['path']] = w
|
||||
except NoGlyphs:
|
||||
print('No glyphs!')
|
||||
continue
|
||||
except UnsupportedFont as e:
|
||||
unsupported.append((font['full_name'], font['path'], unicode_type(e)))
|
||||
print('Unsupported!')
|
||||
continue
|
||||
except Exception as e:
|
||||
print('Failed!')
|
||||
failed.append((font['full_name'], font['path'], unicode_type(e)))
|
||||
else:
|
||||
averages.append(sum(itervalues(new_stats))/sum(itervalues(old_stats)) * 100)
|
||||
print('Reduced to:', '%.1f'%averages[-1] , '%')
|
||||
if unsupported:
|
||||
print('\n\nUnsupported:')
|
||||
for name, path, err in unsupported:
|
||||
print(name, path, err)
|
||||
print()
|
||||
if warnings:
|
||||
print('\n\nWarnings:')
|
||||
for name, w in iteritems(warnings):
|
||||
if w:
|
||||
print(name)
|
||||
print('', '\n\t'.join(w), sep='\t')
|
||||
if failed:
|
||||
print('\n\nFailures:')
|
||||
for name, path, err in failed:
|
||||
print(name, path, err)
|
||||
print()
|
||||
|
||||
print('Average reduction to: %.1f%%'%(sum(averages)/len(averages)))
|
||||
print('Total:', total, 'Unsupported:', len(unsupported), 'Failed:',
|
||||
len(failed), 'Warnings:', len(warnings))
|
||||
|
||||
|
||||
# }}}
|
||||
Reference in New Issue
Block a user