mirror of
https://github.com/gryf/ebook-converter.git
synced 2026-04-15 18:43:30 +02:00
Added docx writer related modules
This commit is contained in:
9
ebook_converter/ebooks/docx/writer/__init__.py
Normal file
9
ebook_converter/ebooks/docx/writer/__init__.py
Normal file
@@ -0,0 +1,9 @@
|
||||
#!/usr/bin/env python2
|
||||
# vim:fileencoding=utf-8
|
||||
from __future__ import absolute_import, division, print_function, unicode_literals
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2013, Kovid Goyal <kovid at kovidgoyal.net>'
|
||||
|
||||
|
||||
|
||||
281
ebook_converter/ebooks/docx/writer/container.py
Normal file
281
ebook_converter/ebooks/docx/writer/container.py
Normal file
@@ -0,0 +1,281 @@
|
||||
#!/usr/bin/env python2
|
||||
# vim:fileencoding=utf-8
|
||||
from __future__ import absolute_import, division, print_function, unicode_literals
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2013, Kovid Goyal <kovid at kovidgoyal.net>'
|
||||
|
||||
import textwrap, os
|
||||
|
||||
from lxml import etree
|
||||
from lxml.builder import ElementMaker
|
||||
|
||||
from calibre import guess_type
|
||||
from calibre.constants import numeric_version, __appname__
|
||||
from calibre.ebooks.docx.names import DOCXNamespace
|
||||
from calibre.ebooks.metadata import authors_to_string
|
||||
from calibre.ebooks.pdf.render.common import PAPER_SIZES
|
||||
from calibre.utils.date import utcnow
|
||||
from calibre.utils.localization import canonicalize_lang, lang_as_iso639_1
|
||||
from calibre.utils.zipfile import ZipFile
|
||||
from polyglot.builtins import iteritems, map, unicode_type, native_string_type
|
||||
|
||||
|
||||
def xml2str(root, pretty_print=False, with_tail=False):
|
||||
if hasattr(etree, 'cleanup_namespaces'):
|
||||
etree.cleanup_namespaces(root)
|
||||
ans = etree.tostring(root, encoding='utf-8', xml_declaration=True,
|
||||
pretty_print=pretty_print, with_tail=with_tail)
|
||||
return ans
|
||||
|
||||
|
||||
def page_size(opts):
|
||||
width, height = PAPER_SIZES[opts.docx_page_size]
|
||||
if opts.docx_custom_page_size is not None:
|
||||
width, height = map(float, opts.docx_custom_page_size.partition('x')[0::2])
|
||||
return width, height
|
||||
|
||||
|
||||
def page_margin(opts, which):
|
||||
val = getattr(opts, 'docx_page_margin_' + which)
|
||||
if val == 0.0:
|
||||
val = getattr(opts, 'margin_' + which)
|
||||
return val
|
||||
|
||||
|
||||
def page_effective_area(opts):
|
||||
width, height = page_size(opts)
|
||||
width -= page_margin(opts, 'left') + page_margin(opts, 'right')
|
||||
height -= page_margin(opts, 'top') + page_margin(opts, 'bottom')
|
||||
return width, height # in pts
|
||||
|
||||
|
||||
def create_skeleton(opts, namespaces=None):
|
||||
namespaces = namespaces or DOCXNamespace().namespaces
|
||||
|
||||
def w(x):
|
||||
return '{%s}%s' % (namespaces['w'], x)
|
||||
dn = {k:v for k, v in iteritems(namespaces) if k in {'w', 'r', 'm', 've', 'o', 'wp', 'w10', 'wne', 'a', 'pic'}}
|
||||
E = ElementMaker(namespace=dn['w'], nsmap=dn)
|
||||
doc = E.document()
|
||||
body = E.body()
|
||||
doc.append(body)
|
||||
width, height = page_size(opts)
|
||||
width, height = int(20 * width), int(20 * height)
|
||||
|
||||
def margin(which):
|
||||
val = page_margin(opts, which)
|
||||
return w(which), unicode_type(int(val * 20))
|
||||
body.append(E.sectPr(
|
||||
E.pgSz(**{w('w'):unicode_type(width), w('h'):unicode_type(height)}),
|
||||
E.pgMar(**dict(map(margin, 'left top right bottom'.split()))),
|
||||
E.cols(**{w('space'):'720'}),
|
||||
E.docGrid(**{w('linePitch'):"360"}),
|
||||
))
|
||||
|
||||
dn = {k:v for k, v in iteritems(namespaces) if k in tuple('wra') + ('wp',)}
|
||||
E = ElementMaker(namespace=dn['w'], nsmap=dn)
|
||||
styles = E.styles(
|
||||
E.docDefaults(
|
||||
E.rPrDefault(
|
||||
E.rPr(
|
||||
E.rFonts(**{w('asciiTheme'):"minorHAnsi", w('eastAsiaTheme'):"minorEastAsia", w('hAnsiTheme'):"minorHAnsi", w('cstheme'):"minorBidi"}),
|
||||
E.sz(**{w('val'):'22'}),
|
||||
E.szCs(**{w('val'):'22'}),
|
||||
E.lang(**{w('val'):'en-US', w('eastAsia'):"en-US", w('bidi'):"ar-SA"})
|
||||
)
|
||||
),
|
||||
E.pPrDefault(
|
||||
E.pPr(
|
||||
E.spacing(**{w('after'):"0", w('line'):"276", w('lineRule'):"auto"})
|
||||
)
|
||||
)
|
||||
)
|
||||
)
|
||||
return doc, styles, body
|
||||
|
||||
|
||||
def update_doc_props(root, mi, namespace):
|
||||
def setm(name, text=None, ns='dc'):
|
||||
ans = root.makeelement('{%s}%s' % (namespace.namespaces[ns], name))
|
||||
for child in tuple(root):
|
||||
if child.tag == ans.tag:
|
||||
root.remove(child)
|
||||
ans.text = text
|
||||
root.append(ans)
|
||||
return ans
|
||||
setm('title', mi.title)
|
||||
setm('creator', authors_to_string(mi.authors))
|
||||
if mi.tags:
|
||||
setm('keywords', ', '.join(mi.tags), ns='cp')
|
||||
if mi.comments:
|
||||
setm('description', mi.comments)
|
||||
if mi.languages:
|
||||
l = canonicalize_lang(mi.languages[0])
|
||||
setm('language', lang_as_iso639_1(l) or l)
|
||||
|
||||
|
||||
class DocumentRelationships(object):
|
||||
|
||||
def __init__(self, namespace):
|
||||
self.rmap = {}
|
||||
self.namespace = namespace
|
||||
for typ, target in iteritems({
|
||||
namespace.names['STYLES']: 'styles.xml',
|
||||
namespace.names['NUMBERING']: 'numbering.xml',
|
||||
namespace.names['WEB_SETTINGS']: 'webSettings.xml',
|
||||
namespace.names['FONTS']: 'fontTable.xml',
|
||||
}):
|
||||
self.add_relationship(target, typ)
|
||||
|
||||
def get_relationship_id(self, target, rtype, target_mode=None):
|
||||
return self.rmap.get((target, rtype, target_mode))
|
||||
|
||||
def add_relationship(self, target, rtype, target_mode=None):
|
||||
ans = self.get_relationship_id(target, rtype, target_mode)
|
||||
if ans is None:
|
||||
ans = 'rId%d' % (len(self.rmap) + 1)
|
||||
self.rmap[(target, rtype, target_mode)] = ans
|
||||
return ans
|
||||
|
||||
def add_image(self, target):
|
||||
return self.add_relationship(target, self.namespace.names['IMAGES'])
|
||||
|
||||
def serialize(self):
|
||||
namespaces = self.namespace.namespaces
|
||||
E = ElementMaker(namespace=namespaces['pr'], nsmap={None:namespaces['pr']})
|
||||
relationships = E.Relationships()
|
||||
for (target, rtype, target_mode), rid in iteritems(self.rmap):
|
||||
r = E.Relationship(Id=rid, Type=rtype, Target=target)
|
||||
if target_mode is not None:
|
||||
r.set('TargetMode', target_mode)
|
||||
relationships.append(r)
|
||||
return xml2str(relationships)
|
||||
|
||||
|
||||
class DOCX(object):
|
||||
|
||||
def __init__(self, opts, log):
|
||||
self.namespace = DOCXNamespace()
|
||||
namespaces = self.namespace.namespaces
|
||||
self.opts, self.log = opts, log
|
||||
self.document_relationships = DocumentRelationships(self.namespace)
|
||||
self.font_table = etree.Element('{%s}fonts' % namespaces['w'], nsmap={k:namespaces[k] for k in 'wr'})
|
||||
self.numbering = etree.Element('{%s}numbering' % namespaces['w'], nsmap={k:namespaces[k] for k in 'wr'})
|
||||
E = ElementMaker(namespace=namespaces['pr'], nsmap={None:namespaces['pr']})
|
||||
self.embedded_fonts = E.Relationships()
|
||||
self.fonts = {}
|
||||
self.images = {}
|
||||
|
||||
# Boilerplate {{{
|
||||
@property
|
||||
def contenttypes(self):
|
||||
E = ElementMaker(namespace=self.namespace.namespaces['ct'], nsmap={None:self.namespace.namespaces['ct']})
|
||||
types = E.Types()
|
||||
for partname, mt in iteritems({
|
||||
"/word/footnotes.xml": "application/vnd.openxmlformats-officedocument.wordprocessingml.footnotes+xml",
|
||||
"/word/document.xml": "application/vnd.openxmlformats-officedocument.wordprocessingml.document.main+xml",
|
||||
"/word/numbering.xml": "application/vnd.openxmlformats-officedocument.wordprocessingml.numbering+xml",
|
||||
"/word/styles.xml": "application/vnd.openxmlformats-officedocument.wordprocessingml.styles+xml",
|
||||
"/word/endnotes.xml": "application/vnd.openxmlformats-officedocument.wordprocessingml.endnotes+xml",
|
||||
"/word/settings.xml": "application/vnd.openxmlformats-officedocument.wordprocessingml.settings+xml",
|
||||
"/word/theme/theme1.xml": "application/vnd.openxmlformats-officedocument.theme+xml",
|
||||
"/word/fontTable.xml": "application/vnd.openxmlformats-officedocument.wordprocessingml.fontTable+xml",
|
||||
"/word/webSettings.xml": "application/vnd.openxmlformats-officedocument.wordprocessingml.webSettings+xml",
|
||||
"/docProps/core.xml": "application/vnd.openxmlformats-package.core-properties+xml",
|
||||
"/docProps/app.xml": "application/vnd.openxmlformats-officedocument.extended-properties+xml",
|
||||
}):
|
||||
types.append(E.Override(PartName=partname, ContentType=mt))
|
||||
added = {'png', 'gif', 'jpeg', 'jpg', 'svg', 'xml'}
|
||||
for ext in added:
|
||||
types.append(E.Default(Extension=ext, ContentType=guess_type('a.'+ext)[0]))
|
||||
for ext, mt in iteritems({
|
||||
"rels": "application/vnd.openxmlformats-package.relationships+xml",
|
||||
"odttf": "application/vnd.openxmlformats-officedocument.obfuscatedFont",
|
||||
}):
|
||||
added.add(ext)
|
||||
types.append(E.Default(Extension=ext, ContentType=mt))
|
||||
for fname in self.images:
|
||||
ext = fname.rpartition(os.extsep)[-1]
|
||||
if ext not in added:
|
||||
added.add(ext)
|
||||
mt = guess_type('a.' + ext)[0]
|
||||
if mt:
|
||||
types.append(E.Default(Extension=ext, ContentType=mt))
|
||||
return xml2str(types)
|
||||
|
||||
@property
|
||||
def appproperties(self):
|
||||
E = ElementMaker(namespace=self.namespace.namespaces['ep'], nsmap={None:self.namespace.namespaces['ep']})
|
||||
props = E.Properties(
|
||||
E.Application(__appname__),
|
||||
E.AppVersion('%02d.%04d' % numeric_version[:2]),
|
||||
E.DocSecurity('0'),
|
||||
E.HyperlinksChanged('false'),
|
||||
E.LinksUpToDate('true'),
|
||||
E.ScaleCrop('false'),
|
||||
E.SharedDoc('false'),
|
||||
)
|
||||
if self.mi.publisher:
|
||||
props.append(E.Company(self.mi.publisher))
|
||||
return xml2str(props)
|
||||
|
||||
@property
|
||||
def containerrels(self):
|
||||
return textwrap.dedent('''\
|
||||
<?xml version='1.0' encoding='utf-8'?>
|
||||
<Relationships xmlns="http://schemas.openxmlformats.org/package/2006/relationships">
|
||||
<Relationship Id="rId3" Type="{APPPROPS}" Target="docProps/app.xml"/>
|
||||
<Relationship Id="rId2" Type="{DOCPROPS}" Target="docProps/core.xml"/>
|
||||
<Relationship Id="rId1" Type="{DOCUMENT}" Target="word/document.xml"/>
|
||||
</Relationships>'''.format(**self.namespace.names)).encode('utf-8')
|
||||
|
||||
@property
|
||||
def websettings(self):
|
||||
E = ElementMaker(namespace=self.namespace.namespaces['w'], nsmap={'w':self.namespace.namespaces['w']})
|
||||
ws = E.webSettings(
|
||||
E.optimizeForBrowser, E.allowPNG, E.doNotSaveAsSingleFile)
|
||||
return xml2str(ws)
|
||||
|
||||
# }}}
|
||||
|
||||
def convert_metadata(self, mi):
|
||||
namespaces = self.namespace.namespaces
|
||||
E = ElementMaker(namespace=namespaces['cp'], nsmap={x:namespaces[x] for x in 'cp dc dcterms xsi'.split()})
|
||||
cp = E.coreProperties(E.revision("1"), E.lastModifiedBy('calibre'))
|
||||
ts = utcnow().isoformat(native_string_type('T')).rpartition('.')[0] + 'Z'
|
||||
for x in 'created modified'.split():
|
||||
x = cp.makeelement('{%s}%s' % (namespaces['dcterms'], x), **{'{%s}type' % namespaces['xsi']:'dcterms:W3CDTF'})
|
||||
x.text = ts
|
||||
cp.append(x)
|
||||
self.mi = mi
|
||||
update_doc_props(cp, self.mi, self.namespace)
|
||||
return xml2str(cp)
|
||||
|
||||
def create_empty_document(self, mi):
|
||||
self.document, self.styles = create_skeleton(self.opts)[:2]
|
||||
|
||||
def write(self, path_or_stream, mi, create_empty_document=False):
|
||||
if create_empty_document:
|
||||
self.create_empty_document(mi)
|
||||
with ZipFile(path_or_stream, 'w') as zf:
|
||||
zf.writestr('[Content_Types].xml', self.contenttypes)
|
||||
zf.writestr('_rels/.rels', self.containerrels)
|
||||
zf.writestr('docProps/core.xml', self.convert_metadata(mi))
|
||||
zf.writestr('docProps/app.xml', self.appproperties)
|
||||
zf.writestr('word/webSettings.xml', self.websettings)
|
||||
zf.writestr('word/document.xml', xml2str(self.document))
|
||||
zf.writestr('word/styles.xml', xml2str(self.styles))
|
||||
zf.writestr('word/numbering.xml', xml2str(self.numbering))
|
||||
zf.writestr('word/fontTable.xml', xml2str(self.font_table))
|
||||
zf.writestr('word/_rels/document.xml.rels', self.document_relationships.serialize())
|
||||
zf.writestr('word/_rels/fontTable.xml.rels', xml2str(self.embedded_fonts))
|
||||
for fname, data_getter in iteritems(self.images):
|
||||
zf.writestr(fname, data_getter())
|
||||
for fname, data in iteritems(self.fonts):
|
||||
zf.writestr(fname, data)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
d = DOCX(None, None)
|
||||
print(d.websettings)
|
||||
78
ebook_converter/ebooks/docx/writer/fonts.py
Normal file
78
ebook_converter/ebooks/docx/writer/fonts.py
Normal file
@@ -0,0 +1,78 @@
|
||||
#!/usr/bin/env python2
|
||||
# vim:fileencoding=utf-8
|
||||
from __future__ import absolute_import, division, print_function, unicode_literals
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2015, Kovid Goyal <kovid at kovidgoyal.net>'
|
||||
|
||||
from collections import defaultdict
|
||||
from uuid import uuid4
|
||||
|
||||
from calibre.ebooks.oeb.base import OEB_STYLES
|
||||
from calibre.ebooks.oeb.transforms.subset import find_font_face_rules
|
||||
from polyglot.builtins import range
|
||||
|
||||
|
||||
def obfuscate_font_data(data, key):
|
||||
prefix = bytearray(data[:32])
|
||||
key = bytearray(reversed(key.bytes))
|
||||
prefix = bytes(bytearray(prefix[i]^key[i % len(key)] for i in range(len(prefix))))
|
||||
return prefix + data[32:]
|
||||
|
||||
|
||||
class FontsManager(object):
|
||||
|
||||
def __init__(self, namespace, oeb, opts):
|
||||
self.namespace = namespace
|
||||
self.oeb, self.log, self.opts = oeb, oeb.log, opts
|
||||
|
||||
def serialize(self, text_styles, fonts, embed_relationships, font_data_map):
|
||||
makeelement = self.namespace.makeelement
|
||||
font_families, seen = set(), set()
|
||||
for ts in text_styles:
|
||||
if ts.font_family:
|
||||
lf = ts.font_family.lower()
|
||||
if lf not in seen:
|
||||
seen.add(lf)
|
||||
font_families.add(ts.font_family)
|
||||
family_map = {}
|
||||
for family in sorted(font_families):
|
||||
family_map[family] = makeelement(fonts, 'w:font', w_name=family)
|
||||
|
||||
embedded_fonts = []
|
||||
for item in self.oeb.manifest:
|
||||
if item.media_type in OEB_STYLES and hasattr(item.data, 'cssRules'):
|
||||
embedded_fonts.extend(find_font_face_rules(item, self.oeb))
|
||||
|
||||
num = 0
|
||||
face_map = defaultdict(set)
|
||||
rel_map = {}
|
||||
for ef in embedded_fonts:
|
||||
ff = ef['font-family'][0]
|
||||
if ff not in font_families:
|
||||
continue
|
||||
num += 1
|
||||
bold = ef['weight'] > 400
|
||||
italic = ef['font-style'] != 'normal'
|
||||
tag = 'Regular'
|
||||
if bold or italic:
|
||||
tag = 'Italic'
|
||||
if bold and italic:
|
||||
tag = 'BoldItalic'
|
||||
elif bold:
|
||||
tag = 'Bold'
|
||||
if tag in face_map[ff]:
|
||||
continue
|
||||
face_map[ff].add(tag)
|
||||
font = family_map[ff]
|
||||
key = uuid4()
|
||||
item = ef['item']
|
||||
rid = rel_map.get(item)
|
||||
if rid is None:
|
||||
rel_map[item] = rid = 'rId%d' % num
|
||||
fname = 'fonts/font%d.odttf' % num
|
||||
makeelement(embed_relationships, 'Relationship', Id=rid, Type=self.namespace.names['EMBEDDED_FONT'], Target=fname)
|
||||
font_data_map['word/' + fname] = obfuscate_font_data(item.data, key)
|
||||
makeelement(font, 'w:embed' + tag, r_id=rid,
|
||||
w_fontKey='{%s}' % key.urn.rpartition(':')[-1].upper(),
|
||||
w_subsetted="true" if self.opts.subset_embedded_fonts else "false")
|
||||
617
ebook_converter/ebooks/docx/writer/from_html.py
Normal file
617
ebook_converter/ebooks/docx/writer/from_html.py
Normal file
@@ -0,0 +1,617 @@
|
||||
#!/usr/bin/env python2
|
||||
# vim:fileencoding=utf-8
|
||||
from __future__ import absolute_import, division, print_function, unicode_literals
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2013, Kovid Goyal <kovid at kovidgoyal.net>'
|
||||
|
||||
import re
|
||||
from collections import Counter
|
||||
|
||||
from calibre.ebooks.docx.writer.container import create_skeleton, page_size, page_effective_area
|
||||
from calibre.ebooks.docx.writer.styles import StylesManager, FloatSpec
|
||||
from calibre.ebooks.docx.writer.links import LinksManager
|
||||
from calibre.ebooks.docx.writer.images import ImagesManager
|
||||
from calibre.ebooks.docx.writer.fonts import FontsManager
|
||||
from calibre.ebooks.docx.writer.tables import Table
|
||||
from calibre.ebooks.docx.writer.lists import ListsManager
|
||||
from calibre.ebooks.oeb.stylizer import Stylizer as Sz, Style as St
|
||||
from calibre.ebooks.oeb.base import XPath, barename
|
||||
from calibre.utils.localization import lang_as_iso639_1
|
||||
from polyglot.builtins import unicode_type, string_or_bytes
|
||||
|
||||
|
||||
def lang_for_tag(tag):
|
||||
for attr in ('lang', '{http://www.w3.org/XML/1998/namespace}lang'):
|
||||
val = lang_as_iso639_1(tag.get(attr))
|
||||
if val:
|
||||
return val
|
||||
|
||||
|
||||
class Style(St):
|
||||
|
||||
def __init__(self, *args, **kwargs):
|
||||
St.__init__(self, *args, **kwargs)
|
||||
self._letterSpacing = None
|
||||
|
||||
@property
|
||||
def letterSpacing(self):
|
||||
if self._letterSpacing is not None:
|
||||
val = self._get('letter-spacing')
|
||||
if val == 'normal':
|
||||
self._letterSpacing = val
|
||||
else:
|
||||
self._letterSpacing = self._unit_convert(val)
|
||||
return self._letterSpacing
|
||||
|
||||
|
||||
class Stylizer(Sz):
|
||||
|
||||
def style(self, element):
|
||||
try:
|
||||
return self._styles[element]
|
||||
except KeyError:
|
||||
return Style(element, self)
|
||||
|
||||
|
||||
class TextRun(object):
|
||||
|
||||
ws_pat = None
|
||||
|
||||
def __init__(self, namespace, style, first_html_parent, lang=None):
|
||||
self.first_html_parent = first_html_parent
|
||||
if self.ws_pat is None:
|
||||
TextRun.ws_pat = self.ws_pat = re.compile(r'\s+')
|
||||
self.style = style
|
||||
self.texts = []
|
||||
self.link = None
|
||||
self.lang = lang
|
||||
self.parent_style = None
|
||||
self.makeelement = namespace.makeelement
|
||||
self.descendant_style = None
|
||||
|
||||
def add_text(self, text, preserve_whitespace, bookmark=None, link=None):
|
||||
if not preserve_whitespace:
|
||||
text = self.ws_pat.sub(' ', text)
|
||||
if text.strip() != text:
|
||||
# If preserve_whitespace is False, Word ignores leading and
|
||||
# trailing whitespace
|
||||
preserve_whitespace = True
|
||||
self.texts.append((text, preserve_whitespace, bookmark))
|
||||
self.link = link
|
||||
|
||||
def add_break(self, clear='none', bookmark=None):
|
||||
self.texts.append((None, clear, bookmark))
|
||||
|
||||
def add_image(self, drawing, bookmark=None):
|
||||
self.texts.append((drawing, None, bookmark))
|
||||
|
||||
def serialize(self, p, links_manager):
|
||||
makeelement = self.makeelement
|
||||
parent = p if self.link is None else links_manager.serialize_hyperlink(p, self.link)
|
||||
r = makeelement(parent, 'w:r')
|
||||
rpr = makeelement(r, 'w:rPr', append=False)
|
||||
if getattr(self.descendant_style, 'id', None) is not None:
|
||||
makeelement(rpr, 'w:rStyle', w_val=self.descendant_style.id)
|
||||
if self.lang:
|
||||
makeelement(rpr, 'w:lang', w_bidi=self.lang, w_val=self.lang, w_eastAsia=self.lang)
|
||||
if len(rpr) > 0:
|
||||
r.append(rpr)
|
||||
|
||||
for text, preserve_whitespace, bookmark in self.texts:
|
||||
if bookmark is not None:
|
||||
bid = links_manager.bookmark_id
|
||||
makeelement(r, 'w:bookmarkStart', w_id=unicode_type(bid), w_name=bookmark)
|
||||
if text is None:
|
||||
makeelement(r, 'w:br', w_clear=preserve_whitespace)
|
||||
elif hasattr(text, 'xpath'):
|
||||
r.append(text)
|
||||
else:
|
||||
t = makeelement(r, 'w:t')
|
||||
t.text = text or ''
|
||||
if preserve_whitespace:
|
||||
t.set('{http://www.w3.org/XML/1998/namespace}space', 'preserve')
|
||||
if bookmark is not None:
|
||||
makeelement(r, 'w:bookmarkEnd', w_id=unicode_type(bid))
|
||||
|
||||
def __repr__(self):
|
||||
return repr(self.texts)
|
||||
|
||||
def is_empty(self):
|
||||
if not self.texts:
|
||||
return True
|
||||
if len(self.texts) == 1 and self.texts[0][:2] == ('', False):
|
||||
return True
|
||||
return False
|
||||
|
||||
@property
|
||||
def style_weight(self):
|
||||
ans = 0
|
||||
for text, preserve_whitespace, bookmark in self.texts:
|
||||
if isinstance(text, unicode_type):
|
||||
ans += len(text)
|
||||
return ans
|
||||
|
||||
|
||||
class Block(object):
|
||||
|
||||
def __init__(self, namespace, styles_manager, links_manager, html_block, style, is_table_cell=False, float_spec=None, is_list_item=False, parent_bg=None):
|
||||
self.force_not_empty = False
|
||||
self.namespace = namespace
|
||||
self.bookmarks = set()
|
||||
self.list_tag = (html_block, style) if is_list_item else None
|
||||
self.is_first_block = False
|
||||
self.numbering_id = None
|
||||
self.parent_items = None
|
||||
self.html_block = html_block
|
||||
self.html_tag = barename(html_block.tag)
|
||||
self.float_spec = float_spec
|
||||
if float_spec is not None:
|
||||
float_spec.blocks.append(self)
|
||||
self.html_style = style
|
||||
self.style = styles_manager.create_block_style(style, html_block, is_table_cell=is_table_cell, parent_bg=parent_bg)
|
||||
self.styles_manager, self.links_manager = styles_manager, links_manager
|
||||
self.keep_next = False
|
||||
self.runs = []
|
||||
self.skipped = False
|
||||
self.linked_style = None
|
||||
self.page_break_before = style['page-break-before'] == 'always'
|
||||
self.keep_lines = style['page-break-inside'] == 'avoid'
|
||||
self.page_break_after = False
|
||||
self.block_lang = None
|
||||
|
||||
def resolve_skipped(self, next_block):
|
||||
if not self.is_empty():
|
||||
return
|
||||
if len(self.html_block) > 0 and self.html_block[0] is next_block.html_block:
|
||||
self.skipped = True
|
||||
if self.list_tag is not None:
|
||||
next_block.list_tag = self.list_tag
|
||||
|
||||
def add_text(self, text, style, ignore_leading_whitespace=False, html_parent=None, is_parent_style=False, bookmark=None, link=None, lang=None):
|
||||
ws = style['white-space']
|
||||
preserve_whitespace = ws in {'pre', 'pre-wrap', '-o-pre-wrap'}
|
||||
ts = self.styles_manager.create_text_style(style, is_parent_style=is_parent_style)
|
||||
if self.runs and ts == self.runs[-1].style and link == self.runs[-1].link and lang == self.runs[-1].lang:
|
||||
run = self.runs[-1]
|
||||
else:
|
||||
run = TextRun(self.namespace, ts, self.html_block if html_parent is None else html_parent, lang=lang)
|
||||
self.runs.append(run)
|
||||
if ignore_leading_whitespace and not preserve_whitespace:
|
||||
text = text.lstrip()
|
||||
if preserve_whitespace or ws == 'pre-line':
|
||||
for text in text.splitlines():
|
||||
run.add_text(text, preserve_whitespace, bookmark=bookmark, link=link)
|
||||
bookmark = None
|
||||
run.add_break()
|
||||
else:
|
||||
run.add_text(text, preserve_whitespace, bookmark=bookmark, link=link)
|
||||
|
||||
def add_break(self, clear='none', bookmark=None):
|
||||
if self.runs:
|
||||
run = self.runs[-1]
|
||||
else:
|
||||
run = TextRun(self.namespace, self.styles_manager.create_text_style(self.html_style), self.html_block)
|
||||
self.runs.append(run)
|
||||
run.add_break(clear=clear, bookmark=bookmark)
|
||||
|
||||
def add_image(self, drawing, bookmark=None):
|
||||
if self.runs:
|
||||
run = self.runs[-1]
|
||||
else:
|
||||
run = TextRun(self.namespace, self.styles_manager.create_text_style(self.html_style), self.html_block)
|
||||
self.runs.append(run)
|
||||
run.add_image(drawing, bookmark=bookmark)
|
||||
|
||||
def serialize(self, body):
|
||||
makeelement = self.namespace.makeelement
|
||||
p = makeelement(body, 'w:p')
|
||||
end_bookmarks = []
|
||||
for bmark in self.bookmarks:
|
||||
end_bookmarks.append(unicode_type(self.links_manager.bookmark_id))
|
||||
makeelement(p, 'w:bookmarkStart', w_id=end_bookmarks[-1], w_name=bmark)
|
||||
if self.block_lang:
|
||||
rpr = makeelement(p, 'w:rPr')
|
||||
makeelement(rpr, 'w:lang', w_val=self.block_lang, w_bidi=self.block_lang, w_eastAsia=self.block_lang)
|
||||
|
||||
ppr = makeelement(p, 'w:pPr')
|
||||
if self.keep_next:
|
||||
makeelement(ppr, 'w:keepNext')
|
||||
if self.float_spec is not None:
|
||||
self.float_spec.serialize(self, ppr)
|
||||
if self.numbering_id is not None:
|
||||
numpr = makeelement(ppr, 'w:numPr')
|
||||
makeelement(numpr, 'w:ilvl', w_val=unicode_type(self.numbering_id[1]))
|
||||
makeelement(numpr, 'w:numId', w_val=unicode_type(self.numbering_id[0]))
|
||||
if self.linked_style is not None:
|
||||
makeelement(ppr, 'w:pStyle', w_val=self.linked_style.id)
|
||||
elif self.style.id:
|
||||
makeelement(ppr, 'w:pStyle', w_val=self.style.id)
|
||||
if self.is_first_block:
|
||||
makeelement(ppr, 'w:pageBreakBefore', w_val='off')
|
||||
elif self.page_break_before:
|
||||
makeelement(ppr, 'w:pageBreakBefore', w_val='on')
|
||||
if self.keep_lines:
|
||||
makeelement(ppr, 'w:keepLines', w_val='on')
|
||||
for run in self.runs:
|
||||
run.serialize(p, self.links_manager)
|
||||
for bmark in end_bookmarks:
|
||||
makeelement(p, 'w:bookmarkEnd', w_id=bmark)
|
||||
|
||||
def __repr__(self):
|
||||
return 'Block(%r)' % self.runs
|
||||
__str__ = __repr__
|
||||
|
||||
def is_empty(self):
|
||||
if self.force_not_empty:
|
||||
return False
|
||||
for run in self.runs:
|
||||
if not run.is_empty():
|
||||
return False
|
||||
return True
|
||||
|
||||
|
||||
class Blocks(object):
|
||||
|
||||
def __init__(self, namespace, styles_manager, links_manager):
|
||||
self.top_bookmark = None
|
||||
self.namespace = namespace
|
||||
self.styles_manager = styles_manager
|
||||
self.links_manager = links_manager
|
||||
self.all_blocks = []
|
||||
self.pos = 0
|
||||
self.current_block = None
|
||||
self.items = []
|
||||
self.tables = []
|
||||
self.current_table = None
|
||||
self.open_html_blocks = set()
|
||||
self.html_tag_start_blocks = {}
|
||||
|
||||
def current_or_new_block(self, html_tag, tag_style):
|
||||
return self.current_block or self.start_new_block(html_tag, tag_style)
|
||||
|
||||
def end_current_block(self):
|
||||
if self.current_block is not None:
|
||||
self.all_blocks.append(self.current_block)
|
||||
if self.current_table is not None and self.current_table.current_row is not None:
|
||||
self.current_table.add_block(self.current_block)
|
||||
else:
|
||||
self.block_map[self.current_block] = len(self.items)
|
||||
self.items.append(self.current_block)
|
||||
self.current_block.parent_items = self.items
|
||||
self.current_block = None
|
||||
|
||||
def start_new_block(self, html_block, style, is_table_cell=False, float_spec=None, is_list_item=False):
|
||||
parent_bg = None
|
||||
if html_block is not None:
|
||||
p = html_block.getparent()
|
||||
b = self.html_tag_start_blocks.get(p)
|
||||
if b is not None:
|
||||
ps = self.styles_manager.styles_for_html_blocks.get(p)
|
||||
if ps is not None and ps.background_color is not None:
|
||||
parent_bg = ps.background_color
|
||||
self.end_current_block()
|
||||
self.current_block = Block(
|
||||
self.namespace, self.styles_manager, self.links_manager, html_block, style,
|
||||
is_table_cell=is_table_cell, float_spec=float_spec, is_list_item=is_list_item,
|
||||
parent_bg=parent_bg)
|
||||
self.html_tag_start_blocks[html_block] = self.current_block
|
||||
self.open_html_blocks.add(html_block)
|
||||
return self.current_block
|
||||
|
||||
def start_new_table(self, html_tag, tag_style=None):
|
||||
self.current_table = Table(self.namespace, html_tag, tag_style)
|
||||
self.tables.append(self.current_table)
|
||||
|
||||
def start_new_row(self, html_tag, tag_style):
|
||||
if self.current_table is None:
|
||||
self.start_new_table(html_tag)
|
||||
self.current_table.start_new_row(html_tag, tag_style)
|
||||
|
||||
def start_new_cell(self, html_tag, tag_style):
|
||||
if self.current_table is None:
|
||||
self.start_new_table(html_tag)
|
||||
self.current_table.start_new_cell(html_tag, tag_style)
|
||||
|
||||
def finish_tag(self, html_tag):
|
||||
if self.current_block is not None and html_tag in self.open_html_blocks:
|
||||
start_block = self.html_tag_start_blocks.get(html_tag)
|
||||
if start_block is not None and start_block.html_style['page-break-after'] == 'always':
|
||||
self.current_block.page_break_after = True
|
||||
self.end_current_block()
|
||||
self.open_html_blocks.discard(html_tag)
|
||||
|
||||
if self.current_table is not None:
|
||||
table_finished = self.current_table.finish_tag(html_tag)
|
||||
if table_finished:
|
||||
table = self.tables[-1]
|
||||
del self.tables[-1]
|
||||
if self.tables:
|
||||
self.current_table = self.tables[-1]
|
||||
self.current_table.add_table(table)
|
||||
else:
|
||||
self.current_table = None
|
||||
self.block_map[table] = len(self.items)
|
||||
self.items.append(table)
|
||||
|
||||
def serialize(self, body):
|
||||
for item in self.items:
|
||||
item.serialize(body)
|
||||
|
||||
def delete_block_at(self, pos=None):
|
||||
pos = self.pos if pos is None else pos
|
||||
block = self.all_blocks[pos]
|
||||
del self.all_blocks[pos]
|
||||
bpos = self.block_map.pop(block, None)
|
||||
if bpos is not None:
|
||||
del self.items[bpos]
|
||||
else:
|
||||
items = self.items if block.parent_items is None else block.parent_items
|
||||
items.remove(block)
|
||||
block.parent_items = None
|
||||
if block.float_spec is not None:
|
||||
block.float_spec.blocks.remove(block)
|
||||
try:
|
||||
next_block = self.all_blocks[pos]
|
||||
next_block.bookmarks.update(block.bookmarks)
|
||||
for attr in 'page_break_after page_break_before'.split():
|
||||
setattr(next_block, attr, getattr(block, attr))
|
||||
except (IndexError, KeyError):
|
||||
pass
|
||||
|
||||
def __enter__(self):
|
||||
self.pos = len(self.all_blocks)
|
||||
self.block_map = {}
|
||||
|
||||
def __exit__(self, etype, value, traceback):
|
||||
if value is not None:
|
||||
return # Since there was an exception, the data structures are not in a consistent state
|
||||
if self.current_block is not None:
|
||||
self.all_blocks.append(self.current_block)
|
||||
self.current_block = None
|
||||
if len(self.all_blocks) > self.pos and self.all_blocks[self.pos].is_empty():
|
||||
# Delete the empty block corresponding to the <body> tag when the
|
||||
# body tag has no inline content before its first sub-block
|
||||
self.delete_block_at(self.pos)
|
||||
if self.pos > 0 and self.pos < len(self.all_blocks):
|
||||
# Insert a page break corresponding to the start of the html file
|
||||
self.all_blocks[self.pos].page_break_before = True
|
||||
if self.top_bookmark is not None:
|
||||
self.all_blocks[self.pos].bookmarks.add(self.top_bookmark)
|
||||
self.top_bookmark = None
|
||||
self.block_map = {}
|
||||
|
||||
def apply_page_break_after(self):
|
||||
for i, block in enumerate(self.all_blocks):
|
||||
if block.page_break_after and i < len(self.all_blocks) - 1:
|
||||
next_block = self.all_blocks[i + 1]
|
||||
if next_block.parent_items is block.parent_items and block.parent_items is self.items:
|
||||
next_block.page_break_before = True
|
||||
|
||||
def resolve_language(self):
|
||||
default_lang = self.styles_manager.document_lang
|
||||
for block in self.all_blocks:
|
||||
count = Counter()
|
||||
for run in block.runs:
|
||||
count[run.lang] += 1
|
||||
if count:
|
||||
block.block_lang = bl = count.most_common(1)[0][0]
|
||||
for run in block.runs:
|
||||
if run.lang == bl:
|
||||
run.lang = None
|
||||
if bl == default_lang:
|
||||
block.block_lang = None
|
||||
|
||||
def __repr__(self):
|
||||
return 'Block(%r)' % self.runs
|
||||
|
||||
|
||||
class Convert(object):
|
||||
|
||||
# Word does not apply default styling to hyperlinks, so we ensure they get
|
||||
# default styling (the conversion pipeline does not apply any styling to
|
||||
# them).
|
||||
base_css = '''
|
||||
a[href] { text-decoration: underline; color: blue }
|
||||
'''
|
||||
|
||||
def __init__(self, oeb, docx, mi, add_cover, add_toc):
|
||||
self.oeb, self.docx, self.add_cover, self.add_toc = oeb, docx, add_cover, add_toc
|
||||
self.log, self.opts = docx.log, docx.opts
|
||||
self.mi = mi
|
||||
self.cover_img = None
|
||||
p = self.opts.output_profile
|
||||
p.width_pts, p.height_pts = page_effective_area(self.opts)
|
||||
|
||||
def __call__(self):
|
||||
from calibre.ebooks.oeb.transforms.rasterize import SVGRasterizer
|
||||
self.svg_rasterizer = SVGRasterizer(base_css=self.base_css)
|
||||
self.svg_rasterizer(self.oeb, self.opts)
|
||||
|
||||
self.styles_manager = StylesManager(self.docx.namespace, self.log, self.mi.language)
|
||||
self.links_manager = LinksManager(self.docx.namespace, self.docx.document_relationships, self.log)
|
||||
self.images_manager = ImagesManager(self.oeb, self.docx.document_relationships, self.opts)
|
||||
self.lists_manager = ListsManager(self.docx)
|
||||
self.fonts_manager = FontsManager(self.docx.namespace, self.oeb, self.opts)
|
||||
self.blocks = Blocks(self.docx.namespace, self.styles_manager, self.links_manager)
|
||||
self.current_link = self.current_lang = None
|
||||
|
||||
for item in self.oeb.spine:
|
||||
self.log.debug('Processing', item.href)
|
||||
self.process_item(item)
|
||||
if self.add_toc:
|
||||
self.links_manager.process_toc_links(self.oeb)
|
||||
|
||||
if self.add_cover and self.oeb.metadata.cover and unicode_type(self.oeb.metadata.cover[0]) in self.oeb.manifest.ids:
|
||||
cover_id = unicode_type(self.oeb.metadata.cover[0])
|
||||
item = self.oeb.manifest.ids[cover_id]
|
||||
self.cover_img = self.images_manager.read_image(item.href)
|
||||
|
||||
all_blocks = self.blocks.all_blocks
|
||||
remove_blocks = []
|
||||
for i, block in enumerate(all_blocks):
|
||||
try:
|
||||
nb = all_blocks[i+1]
|
||||
except IndexError:
|
||||
break
|
||||
block.resolve_skipped(nb)
|
||||
if block.skipped:
|
||||
remove_blocks.append((i, block))
|
||||
for pos, block in reversed(remove_blocks):
|
||||
self.blocks.delete_block_at(pos)
|
||||
self.blocks.all_blocks[0].is_first_block = True
|
||||
self.blocks.apply_page_break_after()
|
||||
self.blocks.resolve_language()
|
||||
|
||||
if self.cover_img is not None:
|
||||
self.cover_img = self.images_manager.create_cover_markup(self.cover_img, self.opts.preserve_cover_aspect_ratio, *page_size(self.opts))
|
||||
self.lists_manager.finalize(all_blocks)
|
||||
self.styles_manager.finalize(all_blocks)
|
||||
self.write()
|
||||
|
||||
def process_item(self, item):
|
||||
self.current_item = item
|
||||
stylizer = self.svg_rasterizer.stylizer_cache.get(item)
|
||||
if stylizer is None:
|
||||
stylizer = Stylizer(item.data, item.href, self.oeb, self.opts, profile=self.opts.output_profile, base_css=self.base_css)
|
||||
self.abshref = self.images_manager.abshref = item.abshref
|
||||
|
||||
self.current_lang = lang_for_tag(item.data) or self.styles_manager.document_lang
|
||||
for i, body in enumerate(XPath('//h:body')(item.data)):
|
||||
with self.blocks:
|
||||
self.blocks.top_bookmark = self.links_manager.bookmark_for_anchor(self.links_manager.top_anchor, self.current_item, body)
|
||||
self.process_tag(body, stylizer, is_first_tag=i == 0)
|
||||
|
||||
def process_tag(self, html_tag, stylizer, is_first_tag=False, float_spec=None):
|
||||
tagname = barename(html_tag.tag)
|
||||
tag_style = stylizer.style(html_tag)
|
||||
ignore_tag_contents = tagname in {'script', 'style', 'title', 'meta'} or tag_style.is_hidden
|
||||
display = tag_style._get('display')
|
||||
is_block = False
|
||||
|
||||
if not ignore_tag_contents:
|
||||
previous_link = self.current_link
|
||||
if tagname == 'a' and html_tag.get('href'):
|
||||
self.current_link = (self.current_item, html_tag.get('href'), html_tag.get('title'))
|
||||
previous_lang = self.current_lang
|
||||
tag_lang = lang_for_tag(html_tag)
|
||||
if tag_lang:
|
||||
self.current_lang = tag_lang
|
||||
|
||||
is_float = tag_style['float'] in {'left', 'right'} and not is_first_tag
|
||||
if float_spec is None and is_float:
|
||||
float_spec = FloatSpec(self.docx.namespace, html_tag, tag_style)
|
||||
|
||||
if display in {'inline', 'inline-block'} or tagname == 'br': # <br> has display:block but we dont want to start a new paragraph
|
||||
if is_float and float_spec.is_dropcaps:
|
||||
self.add_block_tag(tagname, html_tag, tag_style, stylizer, float_spec=float_spec)
|
||||
float_spec = None
|
||||
else:
|
||||
self.add_inline_tag(tagname, html_tag, tag_style, stylizer)
|
||||
elif display == 'list-item':
|
||||
self.add_block_tag(tagname, html_tag, tag_style, stylizer, is_list_item=True)
|
||||
elif display.startswith('table') or display == 'inline-table':
|
||||
if display == 'table-cell':
|
||||
self.blocks.start_new_cell(html_tag, tag_style)
|
||||
self.add_block_tag(tagname, html_tag, tag_style, stylizer, is_table_cell=True)
|
||||
elif display == 'table-row':
|
||||
self.blocks.start_new_row(html_tag, tag_style)
|
||||
elif display in {'table', 'inline-table'}:
|
||||
self.blocks.end_current_block()
|
||||
self.blocks.start_new_table(html_tag, tag_style)
|
||||
else:
|
||||
if tagname == 'img' and is_float:
|
||||
# Image is floating so dont start a new paragraph for it
|
||||
self.add_inline_tag(tagname, html_tag, tag_style, stylizer)
|
||||
else:
|
||||
if tagname == 'hr':
|
||||
for edge in 'right bottom left'.split():
|
||||
tag_style.set('border-%s-style' % edge, 'none')
|
||||
self.add_block_tag(tagname, html_tag, tag_style, stylizer, float_spec=float_spec)
|
||||
|
||||
for child in html_tag.iterchildren():
|
||||
if isinstance(getattr(child, 'tag', None), string_or_bytes):
|
||||
self.process_tag(child, stylizer, float_spec=float_spec)
|
||||
else: # Comment/PI/etc.
|
||||
tail = getattr(child, 'tail', None)
|
||||
if tail:
|
||||
block = self.create_block_from_parent(html_tag, stylizer)
|
||||
block.add_text(tail, tag_style, is_parent_style=False, link=self.current_link, lang=self.current_lang)
|
||||
|
||||
is_block = html_tag in self.blocks.open_html_blocks
|
||||
self.blocks.finish_tag(html_tag)
|
||||
if is_block and tag_style['page-break-after'] == 'avoid':
|
||||
self.blocks.all_blocks[-1].keep_next = True
|
||||
|
||||
self.current_link = previous_link
|
||||
self.current_lang = previous_lang
|
||||
|
||||
# Now, process the tail if any
|
||||
|
||||
if display == 'table-row':
|
||||
return # We ignore the tail for these tags
|
||||
|
||||
ignore_whitespace_tail = is_block or display.startswith('table')
|
||||
if not is_first_tag and html_tag.tail and (not ignore_whitespace_tail or not html_tag.tail.isspace()):
|
||||
# Ignore trailing space after a block tag, as otherwise it will
|
||||
# become a new empty paragraph
|
||||
block = self.create_block_from_parent(html_tag, stylizer)
|
||||
block.add_text(html_tag.tail, stylizer.style(html_tag.getparent()), is_parent_style=True, link=self.current_link, lang=self.current_lang)
|
||||
|
||||
def create_block_from_parent(self, html_tag, stylizer):
|
||||
parent = html_tag.getparent()
|
||||
block = self.blocks.current_or_new_block(parent, stylizer.style(parent))
|
||||
# Do not inherit page-break-before from parent
|
||||
block.page_break_before = False
|
||||
return block
|
||||
|
||||
def add_block_tag(self, tagname, html_tag, tag_style, stylizer, is_table_cell=False, float_spec=None, is_list_item=False):
|
||||
block = self.blocks.start_new_block(
|
||||
html_tag, tag_style, is_table_cell=is_table_cell, float_spec=float_spec, is_list_item=is_list_item)
|
||||
anchor = html_tag.get('id') or html_tag.get('name')
|
||||
if anchor:
|
||||
block.bookmarks.add(self.bookmark_for_anchor(anchor, html_tag))
|
||||
if tagname == 'img':
|
||||
self.images_manager.add_image(html_tag, block, stylizer, as_block=True)
|
||||
else:
|
||||
text = html_tag.text
|
||||
if text:
|
||||
block.add_text(text, tag_style, ignore_leading_whitespace=True, is_parent_style=True, link=self.current_link, lang=self.current_lang)
|
||||
elif tagname == 'li' and len(html_tag) and barename(html_tag[0].tag) in ('ul', 'ol') and len(html_tag[0]):
|
||||
block.force_not_empty = True
|
||||
|
||||
def add_inline_tag(self, tagname, html_tag, tag_style, stylizer):
|
||||
anchor = html_tag.get('id') or html_tag.get('name') or None
|
||||
bmark = None
|
||||
if anchor:
|
||||
bmark = self.bookmark_for_anchor(anchor, html_tag)
|
||||
if tagname == 'br':
|
||||
if html_tag.tail or html_tag is not tuple(html_tag.getparent().iterchildren('*'))[-1]:
|
||||
block = self.create_block_from_parent(html_tag, stylizer)
|
||||
block.add_break(clear={'both':'all', 'left':'left', 'right':'right'}.get(tag_style['clear'], 'none'), bookmark=bmark)
|
||||
elif tagname == 'img':
|
||||
block = self.create_block_from_parent(html_tag, stylizer)
|
||||
self.images_manager.add_image(html_tag, block, stylizer, bookmark=bmark)
|
||||
else:
|
||||
if html_tag.text:
|
||||
block = self.create_block_from_parent(html_tag, stylizer)
|
||||
block.add_text(html_tag.text, tag_style, is_parent_style=False, bookmark=bmark, link=self.current_link, lang=self.current_lang)
|
||||
elif bmark:
|
||||
block = self.create_block_from_parent(html_tag, stylizer)
|
||||
block.add_text('', tag_style, is_parent_style=False, bookmark=bmark, link=self.current_link, lang=self.current_lang)
|
||||
|
||||
def bookmark_for_anchor(self, anchor, html_tag):
|
||||
return self.links_manager.bookmark_for_anchor(anchor, self.current_item, html_tag)
|
||||
|
||||
def write(self):
|
||||
self.docx.document, self.docx.styles, body = create_skeleton(self.opts)
|
||||
self.blocks.serialize(body)
|
||||
body.append(body[0]) # Move <sectPr> to the end
|
||||
if self.links_manager.toc:
|
||||
self.links_manager.serialize_toc(body, self.styles_manager.primary_heading_style)
|
||||
if self.cover_img is not None:
|
||||
self.images_manager.write_cover_block(body, self.cover_img)
|
||||
self.styles_manager.serialize(self.docx.styles)
|
||||
self.images_manager.serialize(self.docx.images)
|
||||
self.fonts_manager.serialize(self.styles_manager.text_styles, self.docx.font_table, self.docx.embedded_fonts, self.docx.fonts)
|
||||
self.lists_manager.serialize(self.docx.numbering)
|
||||
219
ebook_converter/ebooks/docx/writer/images.py
Normal file
219
ebook_converter/ebooks/docx/writer/images.py
Normal file
@@ -0,0 +1,219 @@
|
||||
#!/usr/bin/env python2
|
||||
# vim:fileencoding=utf-8
|
||||
from __future__ import absolute_import, division, print_function, unicode_literals
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2015, Kovid Goyal <kovid at kovidgoyal.net>'
|
||||
|
||||
import os
|
||||
import posixpath
|
||||
from collections import namedtuple
|
||||
from functools import partial
|
||||
from polyglot.builtins import iteritems, itervalues, map, unicode_type
|
||||
|
||||
from lxml import etree
|
||||
|
||||
from calibre import fit_image
|
||||
from calibre.ebooks.oeb.base import urlunquote
|
||||
from calibre.ebooks.docx.images import pt_to_emu
|
||||
from calibre.utils.filenames import ascii_filename
|
||||
from calibre.utils.imghdr import identify
|
||||
|
||||
Image = namedtuple('Image', 'rid fname width height fmt item')
|
||||
|
||||
|
||||
def as_num(x):
|
||||
try:
|
||||
return float(x)
|
||||
except Exception:
|
||||
pass
|
||||
return 0
|
||||
|
||||
|
||||
def get_image_margins(style):
|
||||
ans = {}
|
||||
for edge in 'Left Right Top Bottom'.split():
|
||||
val = as_num(getattr(style, 'padding' + edge)) + as_num(getattr(style, 'margin' + edge))
|
||||
ans['dist' + edge[0]] = unicode_type(pt_to_emu(val))
|
||||
return ans
|
||||
|
||||
|
||||
class ImagesManager(object):
|
||||
|
||||
def __init__(self, oeb, document_relationships, opts):
|
||||
self.oeb, self.log = oeb, oeb.log
|
||||
self.page_width, self.page_height = opts.output_profile.width_pts, opts.output_profile.height_pts
|
||||
self.images = {}
|
||||
self.seen_filenames = set()
|
||||
self.document_relationships = document_relationships
|
||||
self.count = 0
|
||||
|
||||
def read_image(self, href):
|
||||
if href not in self.images:
|
||||
item = self.oeb.manifest.hrefs.get(href)
|
||||
if item is None or not isinstance(item.data, bytes):
|
||||
return
|
||||
try:
|
||||
fmt, width, height = identify(item.data)
|
||||
except Exception:
|
||||
self.log.warning('Replacing corrupted image with blank: %s' % href)
|
||||
item.data = I('blank.png', data=True, allow_user_override=False)
|
||||
fmt, width, height = identify(item.data)
|
||||
image_fname = 'media/' + self.create_filename(href, fmt)
|
||||
image_rid = self.document_relationships.add_image(image_fname)
|
||||
self.images[href] = Image(image_rid, image_fname, width, height, fmt, item)
|
||||
item.unload_data_from_memory()
|
||||
return self.images[href]
|
||||
|
||||
def add_image(self, img, block, stylizer, bookmark=None, as_block=False):
|
||||
src = img.get('src')
|
||||
if not src:
|
||||
return
|
||||
href = self.abshref(src)
|
||||
try:
|
||||
rid = self.read_image(href).rid
|
||||
except AttributeError:
|
||||
return
|
||||
drawing = self.create_image_markup(img, stylizer, href, as_block=as_block)
|
||||
block.add_image(drawing, bookmark=bookmark)
|
||||
return rid
|
||||
|
||||
def create_image_markup(self, html_img, stylizer, href, as_block=False):
|
||||
# TODO: img inside a link (clickable image)
|
||||
style = stylizer.style(html_img)
|
||||
floating = style['float']
|
||||
if floating not in {'left', 'right'}:
|
||||
floating = None
|
||||
if as_block:
|
||||
ml, mr = style._get('margin-left'), style._get('margin-right')
|
||||
if ml == 'auto':
|
||||
floating = 'center' if mr == 'auto' else 'right'
|
||||
if mr == 'auto':
|
||||
floating = 'center' if ml == 'auto' else 'right'
|
||||
else:
|
||||
parent = html_img.getparent()
|
||||
if len(parent) == 1 and not (parent.text or '').strip() and not (html_img.tail or '').strip():
|
||||
pstyle = stylizer.style(parent)
|
||||
if 'block' in pstyle['display']:
|
||||
# We have an inline image alone inside a block
|
||||
as_block = True
|
||||
floating = pstyle['float']
|
||||
if floating not in {'left', 'right'}:
|
||||
floating = None
|
||||
if pstyle['text-align'] in ('center', 'right'):
|
||||
floating = pstyle['text-align']
|
||||
floating = floating or 'left'
|
||||
fake_margins = floating is None
|
||||
self.count += 1
|
||||
img = self.images[href]
|
||||
name = urlunquote(posixpath.basename(href))
|
||||
width, height = style.img_size(img.width, img.height)
|
||||
scaled, width, height = fit_image(width, height, self.page_width, self.page_height)
|
||||
width, height = map(pt_to_emu, (width, height))
|
||||
|
||||
makeelement, namespaces = self.document_relationships.namespace.makeelement, self.document_relationships.namespace.namespaces
|
||||
|
||||
root = etree.Element('root', nsmap=namespaces)
|
||||
ans = makeelement(root, 'w:drawing', append=False)
|
||||
if floating is None:
|
||||
parent = makeelement(ans, 'wp:inline')
|
||||
else:
|
||||
parent = makeelement(ans, 'wp:anchor', **get_image_margins(style))
|
||||
# The next three lines are boilerplate that Word requires, even
|
||||
# though the DOCX specs define defaults for all of them
|
||||
parent.set('simplePos', '0'), parent.set('relativeHeight', '1'), parent.set('behindDoc',"0"), parent.set('locked', "0")
|
||||
parent.set('layoutInCell', "1"), parent.set('allowOverlap', '1')
|
||||
makeelement(parent, 'wp:simplePos', x='0', y='0')
|
||||
makeelement(makeelement(parent, 'wp:positionH', relativeFrom='margin'), 'wp:align').text = floating
|
||||
makeelement(makeelement(parent, 'wp:positionV', relativeFrom='line'), 'wp:align').text = 'top'
|
||||
makeelement(parent, 'wp:extent', cx=unicode_type(width), cy=unicode_type(height))
|
||||
if fake_margins:
|
||||
# DOCX does not support setting margins for inline images, so we
|
||||
# fake it by using effect extents to simulate margins
|
||||
makeelement(parent, 'wp:effectExtent', **{k[-1].lower():v for k, v in iteritems(get_image_margins(style))})
|
||||
else:
|
||||
makeelement(parent, 'wp:effectExtent', l='0', r='0', t='0', b='0')
|
||||
if floating is not None:
|
||||
# The idiotic Word requires this to be after the extent settings
|
||||
if as_block:
|
||||
makeelement(parent, 'wp:wrapTopAndBottom')
|
||||
else:
|
||||
makeelement(parent, 'wp:wrapSquare', wrapText='bothSides')
|
||||
self.create_docx_image_markup(parent, name, html_img.get('alt') or name, img.rid, width, height)
|
||||
return ans
|
||||
|
||||
def create_docx_image_markup(self, parent, name, alt, img_rid, width, height):
|
||||
makeelement, namespaces = self.document_relationships.namespace.makeelement, self.document_relationships.namespace.namespaces
|
||||
makeelement(parent, 'wp:docPr', id=unicode_type(self.count), name=name, descr=alt)
|
||||
makeelement(makeelement(parent, 'wp:cNvGraphicFramePr'), 'a:graphicFrameLocks', noChangeAspect="1")
|
||||
g = makeelement(parent, 'a:graphic')
|
||||
gd = makeelement(g, 'a:graphicData', uri=namespaces['pic'])
|
||||
pic = makeelement(gd, 'pic:pic')
|
||||
nvPicPr = makeelement(pic, 'pic:nvPicPr')
|
||||
makeelement(nvPicPr, 'pic:cNvPr', id='0', name=name, descr=alt)
|
||||
makeelement(nvPicPr, 'pic:cNvPicPr')
|
||||
bf = makeelement(pic, 'pic:blipFill')
|
||||
makeelement(bf, 'a:blip', r_embed=img_rid)
|
||||
makeelement(makeelement(bf, 'a:stretch'), 'a:fillRect')
|
||||
spPr = makeelement(pic, 'pic:spPr')
|
||||
xfrm = makeelement(spPr, 'a:xfrm')
|
||||
makeelement(xfrm, 'a:off', x='0', y='0'), makeelement(xfrm, 'a:ext', cx=unicode_type(width), cy=unicode_type(height))
|
||||
makeelement(makeelement(spPr, 'a:prstGeom', prst='rect'), 'a:avLst')
|
||||
|
||||
def create_filename(self, href, fmt):
|
||||
fname = ascii_filename(urlunquote(posixpath.basename(href)))
|
||||
fname = posixpath.splitext(fname)[0]
|
||||
fname = fname[:75].rstrip('.') or 'image'
|
||||
num = 0
|
||||
base = fname
|
||||
while fname.lower() in self.seen_filenames:
|
||||
num += 1
|
||||
fname = base + unicode_type(num)
|
||||
self.seen_filenames.add(fname.lower())
|
||||
fname += os.extsep + fmt.lower()
|
||||
return fname
|
||||
|
||||
def serialize(self, images_map):
|
||||
for img in itervalues(self.images):
|
||||
images_map['word/' + img.fname] = partial(self.get_data, img.item)
|
||||
|
||||
def get_data(self, item):
|
||||
try:
|
||||
return item.data
|
||||
finally:
|
||||
item.unload_data_from_memory(False)
|
||||
|
||||
def create_cover_markup(self, img, preserve_aspect_ratio, width, height):
|
||||
self.count += 1
|
||||
makeelement, namespaces = self.document_relationships.namespace.makeelement, self.document_relationships.namespace.namespaces
|
||||
if preserve_aspect_ratio:
|
||||
if img.width >= img.height:
|
||||
ar = img.height / img.width
|
||||
height = ar * width
|
||||
else:
|
||||
ar = img.width / img.height
|
||||
width = ar * height
|
||||
|
||||
root = etree.Element('root', nsmap=namespaces)
|
||||
ans = makeelement(root, 'w:drawing', append=False)
|
||||
parent = makeelement(ans, 'wp:anchor', **{'dist'+edge:'0' for edge in 'LRTB'})
|
||||
parent.set('simplePos', '0'), parent.set('relativeHeight', '1'), parent.set('behindDoc',"0"), parent.set('locked', "0")
|
||||
parent.set('layoutInCell', "1"), parent.set('allowOverlap', '1')
|
||||
makeelement(parent, 'wp:simplePos', x='0', y='0')
|
||||
makeelement(makeelement(parent, 'wp:positionH', relativeFrom='page'), 'wp:align').text = 'center'
|
||||
makeelement(makeelement(parent, 'wp:positionV', relativeFrom='page'), 'wp:align').text = 'center'
|
||||
width, height = map(pt_to_emu, (width, height))
|
||||
makeelement(parent, 'wp:extent', cx=unicode_type(width), cy=unicode_type(height))
|
||||
makeelement(parent, 'wp:effectExtent', l='0', r='0', t='0', b='0')
|
||||
makeelement(parent, 'wp:wrapTopAndBottom')
|
||||
self.create_docx_image_markup(parent, 'cover.jpg', _('Cover'), img.rid, width, height)
|
||||
return ans
|
||||
|
||||
def write_cover_block(self, body, cover_image):
|
||||
makeelement, namespaces = self.document_relationships.namespace.makeelement, self.document_relationships.namespace.namespaces
|
||||
pbb = body[0].xpath('//*[local-name()="pageBreakBefore"]')[0]
|
||||
pbb.set('{%s}val' % namespaces['w'], 'on')
|
||||
p = makeelement(body, 'w:p', append=False)
|
||||
body.insert(0, p)
|
||||
r = makeelement(p, 'w:r')
|
||||
r.append(cover_image)
|
||||
175
ebook_converter/ebooks/docx/writer/links.py
Normal file
175
ebook_converter/ebooks/docx/writer/links.py
Normal file
@@ -0,0 +1,175 @@
|
||||
#!/usr/bin/env python2
|
||||
# vim:fileencoding=utf-8
|
||||
from __future__ import absolute_import, division, print_function, unicode_literals
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2015, Kovid Goyal <kovid at kovidgoyal.net>'
|
||||
|
||||
import posixpath, re
|
||||
from uuid import uuid4
|
||||
|
||||
from calibre.utils.filenames import ascii_text
|
||||
from polyglot.builtins import unicode_type
|
||||
from polyglot.urllib import urlparse
|
||||
|
||||
|
||||
def start_text(tag, prefix_len=0, top_level=True):
|
||||
ans = tag.text or ''
|
||||
limit = 50 - prefix_len
|
||||
if len(ans) < limit:
|
||||
for child in tag.iterchildren('*'):
|
||||
ans += start_text(child, len(ans), top_level=False) + (child.tail or '')
|
||||
if len(ans) >= limit:
|
||||
break
|
||||
if top_level and len(ans) > limit:
|
||||
ans = ans[:limit] + '...'
|
||||
return ans
|
||||
|
||||
|
||||
class TOCItem(object):
|
||||
|
||||
def __init__(self, title, bmark, level):
|
||||
self.title, self.bmark, self.level = title, bmark, level
|
||||
self.is_first = self.is_last = False
|
||||
|
||||
def serialize(self, body, makeelement):
|
||||
p = makeelement(body, 'w:p', append=False)
|
||||
ppr = makeelement(p, 'w:pPr')
|
||||
makeelement(ppr, 'w:pStyle', w_val="Normal")
|
||||
makeelement(ppr, 'w:ind', w_left='0', w_firstLineChars='0', w_firstLine='0', w_leftChars=unicode_type(200 * self.level))
|
||||
if self.is_first:
|
||||
makeelement(ppr, 'w:pageBreakBefore', w_val='off')
|
||||
r = makeelement(p, 'w:r')
|
||||
makeelement(r, 'w:fldChar', w_fldCharType='begin')
|
||||
r = makeelement(p, 'w:r')
|
||||
makeelement(r, 'w:instrText').text = r' TOC \h '
|
||||
r[0].set('{http://www.w3.org/XML/1998/namespace}space', 'preserve')
|
||||
r = makeelement(p, 'w:r')
|
||||
makeelement(r, 'w:fldChar', w_fldCharType='separate')
|
||||
hl = makeelement(p, 'w:hyperlink', w_anchor=self.bmark)
|
||||
r = makeelement(hl, 'w:r')
|
||||
rpr = makeelement(r, 'w:rPr')
|
||||
makeelement(rpr, 'w:color', w_val='0000FF', w_themeColor='hyperlink')
|
||||
makeelement(rpr, 'w:u', w_val='single')
|
||||
makeelement(r, 'w:t').text = self.title
|
||||
if self.is_last:
|
||||
r = makeelement(p, 'w:r')
|
||||
makeelement(r, 'w:fldChar', w_fldCharType='end')
|
||||
body.insert(0, p)
|
||||
|
||||
|
||||
def sanitize_bookmark_name(base):
|
||||
# Max length allowed by Word appears to be 40, we use 32 to leave some
|
||||
# space for making the name unique
|
||||
return re.sub(r'[^0-9a-zA-Z]', '_', ascii_text(base))[:32].rstrip('_')
|
||||
|
||||
|
||||
class LinksManager(object):
|
||||
|
||||
def __init__(self, namespace, document_relationships, log):
|
||||
self.namespace = namespace
|
||||
self.log = log
|
||||
self.document_relationships = document_relationships
|
||||
self.top_anchor = unicode_type(uuid4().hex)
|
||||
self.anchor_map = {}
|
||||
self.used_bookmark_names = set()
|
||||
self.bmark_id = 0
|
||||
self.document_hrefs = set()
|
||||
self.external_links = {}
|
||||
self.toc = []
|
||||
|
||||
def bookmark_for_anchor(self, anchor, current_item, html_tag):
|
||||
key = (current_item.href, anchor)
|
||||
if key in self.anchor_map:
|
||||
return self.anchor_map[key]
|
||||
if anchor == self.top_anchor:
|
||||
name = ('Top of %s' % posixpath.basename(current_item.href))
|
||||
self.document_hrefs.add(current_item.href)
|
||||
else:
|
||||
name = start_text(html_tag).strip() or anchor
|
||||
name = sanitize_bookmark_name(name)
|
||||
i, bname = 0, name
|
||||
while name in self.used_bookmark_names:
|
||||
i += 1
|
||||
name = bname + ('_%d' % i)
|
||||
self.anchor_map[key] = name
|
||||
self.used_bookmark_names.add(name)
|
||||
return name
|
||||
|
||||
@property
|
||||
def bookmark_id(self):
|
||||
self.bmark_id += 1
|
||||
return self.bmark_id
|
||||
|
||||
def serialize_hyperlink(self, parent, link):
|
||||
item, url, tooltip = link
|
||||
purl = urlparse(url)
|
||||
href = purl.path
|
||||
|
||||
def make_link(parent, anchor=None, id=None, tooltip=None):
|
||||
kw = {}
|
||||
if anchor is not None:
|
||||
kw['w_anchor'] = anchor
|
||||
elif id is not None:
|
||||
kw['r_id'] = id
|
||||
if tooltip:
|
||||
kw['w_tooltip'] = tooltip
|
||||
return self.namespace.makeelement(parent, 'w:hyperlink', **kw)
|
||||
|
||||
if not purl.scheme:
|
||||
href = item.abshref(href)
|
||||
if href in self.document_hrefs:
|
||||
key = (href, purl.fragment or self.top_anchor)
|
||||
if key in self.anchor_map:
|
||||
bmark = self.anchor_map[key]
|
||||
else:
|
||||
bmark = self.anchor_map[(href, self.top_anchor)]
|
||||
return make_link(parent, anchor=bmark, tooltip=tooltip)
|
||||
else:
|
||||
self.log.warn('Ignoring internal hyperlink with href (%s) pointing to unknown destination' % url)
|
||||
if purl.scheme in {'http', 'https', 'ftp'}:
|
||||
if url not in self.external_links:
|
||||
self.external_links[url] = self.document_relationships.add_relationship(url, self.namespace.names['LINKS'], target_mode='External')
|
||||
return make_link(parent, id=self.external_links[url], tooltip=tooltip)
|
||||
return parent
|
||||
|
||||
def process_toc_node(self, toc, level=0):
|
||||
href = toc.href
|
||||
if href:
|
||||
purl = urlparse(href)
|
||||
href = purl.path
|
||||
if href in self.document_hrefs:
|
||||
key = (href, purl.fragment or self.top_anchor)
|
||||
if key in self.anchor_map:
|
||||
bmark = self.anchor_map[key]
|
||||
else:
|
||||
bmark = self.anchor_map[(href, self.top_anchor)]
|
||||
self.toc.append(TOCItem(toc.title, bmark, level))
|
||||
for child in toc:
|
||||
self.process_toc_node(child, level+1)
|
||||
|
||||
def process_toc_links(self, oeb):
|
||||
self.toc = []
|
||||
has_toc = oeb.toc and oeb.toc.count() > 1
|
||||
if not has_toc:
|
||||
return
|
||||
for child in oeb.toc:
|
||||
self.process_toc_node(child)
|
||||
if self.toc:
|
||||
self.toc[0].is_first = True
|
||||
self.toc[-1].is_last = True
|
||||
|
||||
def serialize_toc(self, body, primary_heading_style):
|
||||
pbb = body[0].xpath('//*[local-name()="pageBreakBefore"]')[0]
|
||||
pbb.set('{%s}val' % self.namespace.namespaces['w'], 'on')
|
||||
for block in reversed(self.toc):
|
||||
block.serialize(body, self.namespace.makeelement)
|
||||
title = __('Table of Contents')
|
||||
makeelement = self.namespace.makeelement
|
||||
p = makeelement(body, 'w:p', append=False)
|
||||
ppr = makeelement(p, 'w:pPr')
|
||||
if primary_heading_style is not None:
|
||||
makeelement(ppr, 'w:pStyle', w_val=primary_heading_style.id)
|
||||
makeelement(ppr, 'w:pageBreakBefore', w_val='off')
|
||||
makeelement(makeelement(p, 'w:r'), 'w:t').text = title
|
||||
body.insert(0, p)
|
||||
169
ebook_converter/ebooks/docx/writer/lists.py
Normal file
169
ebook_converter/ebooks/docx/writer/lists.py
Normal file
@@ -0,0 +1,169 @@
|
||||
#!/usr/bin/env python2
|
||||
# vim:fileencoding=utf-8
|
||||
from __future__ import absolute_import, division, print_function, unicode_literals
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2015, Kovid Goyal <kovid at kovidgoyal.net>'
|
||||
|
||||
from collections import defaultdict
|
||||
from operator import attrgetter
|
||||
|
||||
from polyglot.builtins import iteritems, itervalues, unicode_type
|
||||
|
||||
LIST_STYLES = frozenset(
|
||||
'disc circle square decimal decimal-leading-zero lower-roman upper-roman'
|
||||
' lower-greek lower-alpha lower-latin upper-alpha upper-latin hiragana hebrew'
|
||||
' katakana-iroha cjk-ideographic'.split())
|
||||
|
||||
STYLE_MAP = {
|
||||
'disc': 'bullet',
|
||||
'circle': 'o',
|
||||
'square': '\uf0a7',
|
||||
'decimal': 'decimal',
|
||||
'decimal-leading-zero': 'decimalZero',
|
||||
'lower-roman': 'lowerRoman',
|
||||
'upper-roman': 'upperRoman',
|
||||
'lower-alpha': 'lowerLetter',
|
||||
'lower-latin': 'lowerLetter',
|
||||
'upper-alpha': 'upperLetter',
|
||||
'upper-latin': 'upperLetter',
|
||||
'hiragana': 'aiueo',
|
||||
'hebrew': 'hebrew1',
|
||||
'katakana-iroha': 'iroha',
|
||||
'cjk-ideographic': 'chineseCounting',
|
||||
}
|
||||
|
||||
|
||||
def find_list_containers(list_tag, tag_style):
|
||||
node = list_tag
|
||||
stylizer = tag_style._stylizer
|
||||
ans = []
|
||||
while True:
|
||||
parent = node.getparent()
|
||||
if parent is None or parent is node:
|
||||
break
|
||||
node = parent
|
||||
style = stylizer.style(node)
|
||||
lst = (style._style.get('list-style-type', None) or '').lower()
|
||||
if lst in LIST_STYLES:
|
||||
ans.append(node)
|
||||
return ans
|
||||
|
||||
|
||||
class NumberingDefinition(object):
|
||||
|
||||
def __init__(self, top_most, stylizer, namespace):
|
||||
self.namespace = namespace
|
||||
self.top_most = top_most
|
||||
self.stylizer = stylizer
|
||||
self.level_map = defaultdict(list)
|
||||
self.num_id = None
|
||||
|
||||
def finalize(self):
|
||||
items_for_level = defaultdict(list)
|
||||
container_for_level = {}
|
||||
type_for_level = {}
|
||||
for ilvl, items in iteritems(self.level_map):
|
||||
for container, list_tag, block, list_type, tag_style in items:
|
||||
items_for_level[ilvl].append(list_tag)
|
||||
container_for_level[ilvl] = container
|
||||
type_for_level[ilvl] = list_type
|
||||
self.levels = tuple(
|
||||
Level(type_for_level[ilvl], container_for_level[ilvl], items_for_level[ilvl], ilvl=ilvl)
|
||||
for ilvl in sorted(self.level_map)
|
||||
)
|
||||
|
||||
def __hash__(self):
|
||||
return hash(self.levels)
|
||||
|
||||
def link_blocks(self):
|
||||
for ilvl, items in iteritems(self.level_map):
|
||||
for container, list_tag, block, list_type, tag_style in items:
|
||||
block.numbering_id = (self.num_id + 1, ilvl)
|
||||
|
||||
def serialize(self, parent):
|
||||
makeelement = self.namespace.makeelement
|
||||
an = makeelement(parent, 'w:abstractNum', w_abstractNumId=unicode_type(self.num_id))
|
||||
makeelement(an, 'w:multiLevelType', w_val='hybridMultilevel')
|
||||
makeelement(an, 'w:name', w_val='List %d' % (self.num_id + 1))
|
||||
for level in self.levels:
|
||||
level.serialize(an, makeelement)
|
||||
|
||||
|
||||
class Level(object):
|
||||
|
||||
def __init__(self, list_type, container, items, ilvl=0):
|
||||
self.ilvl = ilvl
|
||||
try:
|
||||
self.start = int(container.get('start'))
|
||||
except Exception:
|
||||
self.start = 1
|
||||
if items:
|
||||
try:
|
||||
self.start = int(items[0].get('value'))
|
||||
except Exception:
|
||||
pass
|
||||
if list_type in {'disc', 'circle', 'square'}:
|
||||
self.num_fmt = 'bullet'
|
||||
self.lvl_text = '\uf0b7' if list_type == 'disc' else STYLE_MAP[list_type]
|
||||
else:
|
||||
self.lvl_text = '%{}.'.format(self.ilvl + 1)
|
||||
self.num_fmt = STYLE_MAP.get(list_type, 'decimal')
|
||||
|
||||
def __hash__(self):
|
||||
return hash((self.start, self.num_fmt, self.lvl_text))
|
||||
|
||||
def serialize(self, parent, makeelement):
|
||||
lvl = makeelement(parent, 'w:lvl', w_ilvl=unicode_type(self.ilvl))
|
||||
makeelement(lvl, 'w:start', w_val=unicode_type(self.start))
|
||||
makeelement(lvl, 'w:numFmt', w_val=self.num_fmt)
|
||||
makeelement(lvl, 'w:lvlText', w_val=self.lvl_text)
|
||||
makeelement(lvl, 'w:lvlJc', w_val='left')
|
||||
makeelement(makeelement(lvl, 'w:pPr'), 'w:ind', w_hanging='360', w_left=unicode_type(1152 + self.ilvl * 360))
|
||||
if self.num_fmt == 'bullet':
|
||||
ff = {'\uf0b7':'Symbol', '\uf0a7':'Wingdings'}.get(self.lvl_text, 'Courier New')
|
||||
makeelement(makeelement(lvl, 'w:rPr'), 'w:rFonts', w_ascii=ff, w_hAnsi=ff, w_hint="default")
|
||||
|
||||
|
||||
class ListsManager(object):
|
||||
|
||||
def __init__(self, docx):
|
||||
self.namespace = docx.namespace
|
||||
self.lists = {}
|
||||
|
||||
def finalize(self, all_blocks):
|
||||
lists = {}
|
||||
for block in all_blocks:
|
||||
if block.list_tag is not None:
|
||||
list_tag, tag_style = block.list_tag
|
||||
list_type = (tag_style['list-style-type'] or '').lower()
|
||||
if list_type not in LIST_STYLES:
|
||||
continue
|
||||
container_tags = find_list_containers(list_tag, tag_style)
|
||||
if not container_tags:
|
||||
continue
|
||||
top_most = container_tags[-1]
|
||||
if top_most not in lists:
|
||||
lists[top_most] = NumberingDefinition(top_most, tag_style._stylizer, self.namespace)
|
||||
l = lists[top_most]
|
||||
ilvl = len(container_tags) - 1
|
||||
l.level_map[ilvl].append((container_tags[0], list_tag, block, list_type, tag_style))
|
||||
|
||||
[nd.finalize() for nd in itervalues(lists)]
|
||||
definitions = {}
|
||||
for defn in itervalues(lists):
|
||||
try:
|
||||
defn = definitions[defn]
|
||||
except KeyError:
|
||||
definitions[defn] = defn
|
||||
defn.num_id = len(definitions) - 1
|
||||
defn.link_blocks()
|
||||
self.definitions = sorted(itervalues(definitions), key=attrgetter('num_id'))
|
||||
|
||||
def serialize(self, parent):
|
||||
for defn in self.definitions:
|
||||
defn.serialize(parent)
|
||||
makeelement = self.namespace.makeelement
|
||||
for defn in self.definitions:
|
||||
n = makeelement(parent, 'w:num', w_numId=unicode_type(defn.num_id + 1))
|
||||
makeelement(n, 'w:abstractNumId', w_val=unicode_type(defn.num_id))
|
||||
768
ebook_converter/ebooks/docx/writer/styles.py
Normal file
768
ebook_converter/ebooks/docx/writer/styles.py
Normal file
@@ -0,0 +1,768 @@
|
||||
#!/usr/bin/env python2
|
||||
# vim:fileencoding=utf-8
|
||||
from __future__ import absolute_import, division, print_function, unicode_literals
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2015, Kovid Goyal <kovid at kovidgoyal.net>'
|
||||
|
||||
import numbers
|
||||
from collections import Counter, defaultdict
|
||||
from operator import attrgetter
|
||||
|
||||
from lxml import etree
|
||||
|
||||
from calibre.ebooks import parse_css_length
|
||||
from calibre.ebooks.docx.writer.utils import convert_color, int_or_zero
|
||||
from calibre.utils.localization import lang_as_iso639_1
|
||||
from polyglot.builtins import iteritems, filter, unicode_type
|
||||
from tinycss.css21 import CSS21Parser
|
||||
|
||||
css_parser = CSS21Parser()
|
||||
|
||||
border_edges = ('left', 'top', 'right', 'bottom')
|
||||
border_props = ('padding_%s', 'border_%s_width', 'border_%s_style', 'border_%s_color')
|
||||
ignore = object()
|
||||
|
||||
|
||||
def parse_css_font_family(raw):
|
||||
decl, errs = css_parser.parse_style_attr('font-family:' + raw)
|
||||
if decl:
|
||||
for token in decl[0].value:
|
||||
if token.type in 'STRING IDENT':
|
||||
val = token.value
|
||||
if val == 'inherit':
|
||||
break
|
||||
yield val
|
||||
|
||||
|
||||
def css_font_family_to_docx(raw):
|
||||
generic = {'serif':'Cambria', 'sansserif':'Candara', 'sans-serif':'Candara', 'fantasy':'Comic Sans', 'cursive':'Segoe Script'}
|
||||
for ff in parse_css_font_family(raw):
|
||||
return generic.get(ff.lower(), ff)
|
||||
|
||||
|
||||
def bmap(x):
|
||||
return 'on' if x else 'off'
|
||||
|
||||
|
||||
def is_dropcaps(html_tag, tag_style):
|
||||
return len(html_tag) < 2 and len(etree.tostring(html_tag, method='text', encoding='unicode', with_tail=False)) < 5 and tag_style['float'] == 'left'
|
||||
|
||||
|
||||
class CombinedStyle(object):
|
||||
|
||||
def __init__(self, bs, rs, blocks, namespace):
|
||||
self.bs, self.rs, self.blocks = bs, rs, blocks
|
||||
self.namespace = namespace
|
||||
self.id = self.name = self.seq = None
|
||||
self.outline_level = None
|
||||
|
||||
def apply(self):
|
||||
for block in self.blocks:
|
||||
block.linked_style = self
|
||||
for run in block.runs:
|
||||
run.parent_style = self.rs
|
||||
|
||||
def serialize(self, styles, normal_style):
|
||||
makeelement = self.namespace.makeelement
|
||||
w = lambda x: '{%s}%s' % (self.namespace.namespaces['w'], x)
|
||||
block = makeelement(styles, 'w:style', w_styleId=self.id, w_type='paragraph')
|
||||
makeelement(block, 'w:name', w_val=self.name)
|
||||
makeelement(block, 'w:qFormat')
|
||||
if self is not normal_style:
|
||||
makeelement(block, 'w:basedOn', w_val=normal_style.id)
|
||||
if self.seq == 0:
|
||||
block.set(w('default'), '1')
|
||||
pPr = makeelement(block, 'w:pPr')
|
||||
self.bs.serialize_properties(pPr, normal_style.bs)
|
||||
if self.outline_level is not None:
|
||||
makeelement(pPr, 'w:outlineLvl', w_val=unicode_type(self.outline_level + 1))
|
||||
rPr = makeelement(block, 'w:rPr')
|
||||
self.rs.serialize_properties(rPr, normal_style.rs)
|
||||
|
||||
|
||||
class FloatSpec(object):
|
||||
|
||||
def __init__(self, namespace, html_tag, tag_style):
|
||||
self.makeelement = namespace.makeelement
|
||||
self.is_dropcaps = is_dropcaps(html_tag, tag_style)
|
||||
self.blocks = []
|
||||
if self.is_dropcaps:
|
||||
self.dropcaps_lines = 3
|
||||
else:
|
||||
self.x_align = tag_style['float']
|
||||
self.w = self.h = None
|
||||
if tag_style._get('width') != 'auto':
|
||||
self.w = int(20 * max(tag_style['min-width'], tag_style['width']))
|
||||
if tag_style._get('height') == 'auto':
|
||||
self.h_rule = 'auto'
|
||||
else:
|
||||
if tag_style['min-height'] > 0:
|
||||
self.h_rule, self.h = 'atLeast', tag_style['min-height']
|
||||
else:
|
||||
self.h_rule, self.h = 'exact', tag_style['height']
|
||||
self.h = int(20 * self.h)
|
||||
self.h_space = int(20 * max(tag_style['margin-right'], tag_style['margin-left']))
|
||||
self.v_space = int(20 * max(tag_style['margin-top'], tag_style['margin-bottom']))
|
||||
|
||||
read_css_block_borders(self, tag_style)
|
||||
|
||||
def serialize(self, block, parent):
|
||||
if self.is_dropcaps:
|
||||
attrs = dict(w_dropCap='drop', w_lines=unicode_type(self.dropcaps_lines), w_wrap='around', w_vAnchor='text', w_hAnchor='text')
|
||||
else:
|
||||
attrs = dict(
|
||||
w_wrap='around', w_vAnchor='text', w_hAnchor='text', w_xAlign=self.x_align, w_y='1',
|
||||
w_hSpace=unicode_type(self.h_space), w_vSpace=unicode_type(self.v_space), w_hRule=self.h_rule
|
||||
)
|
||||
if self.w is not None:
|
||||
attrs['w_w'] = unicode_type(self.w)
|
||||
if self.h is not None:
|
||||
attrs['w_h'] = unicode_type(self.h)
|
||||
self.makeelement(parent, 'w:framePr', **attrs)
|
||||
# Margins are already applied by the frame style, so override them to
|
||||
# be zero on individual blocks
|
||||
self.makeelement(parent, 'w:ind', w_left='0', w_leftChars='0', w_right='0', w_rightChars='0')
|
||||
attrs = {}
|
||||
if block is self.blocks[0]:
|
||||
attrs.update(dict(w_before='0', w_beforeLines='0'))
|
||||
if block is self.blocks[-1]:
|
||||
attrs.update(dict(w_after='0', w_afterLines='0'))
|
||||
if attrs:
|
||||
self.makeelement(parent, 'w:spacing', **attrs)
|
||||
# Similarly apply the same border and padding properties to all blocks
|
||||
# in this floatspec
|
||||
bdr = self.makeelement(parent, 'w:pBdr')
|
||||
for edge in border_edges:
|
||||
padding = getattr(self, 'padding_' + edge)
|
||||
width = getattr(self, 'border_%s_width' % edge)
|
||||
bstyle = getattr(self, 'border_%s_style' % edge)
|
||||
self.makeelement(
|
||||
bdr, 'w:'+edge, w_space=unicode_type(padding), w_val=bstyle, w_sz=unicode_type(width), w_color=getattr(self, 'border_%s_color' % edge))
|
||||
|
||||
|
||||
class DOCXStyle(object):
|
||||
|
||||
ALL_PROPS = ()
|
||||
TYPE = 'paragraph'
|
||||
|
||||
def __init__(self, namespace):
|
||||
self.namespace = namespace
|
||||
self.w = lambda x: '{%s}%s' % (namespace.namespaces['w'], x)
|
||||
self.id = self.name = None
|
||||
self.next_style = None
|
||||
self.calculate_hash()
|
||||
|
||||
def calculate_hash(self):
|
||||
self._hash = hash(tuple(
|
||||
getattr(self, x) for x in self.ALL_PROPS))
|
||||
|
||||
def makeelement(self, parent, name, **attrs):
|
||||
return parent.makeelement(self.w(name), **{self.w(k):v for k, v in iteritems(attrs)})
|
||||
|
||||
def __hash__(self):
|
||||
return self._hash
|
||||
|
||||
def __eq__(self, other):
|
||||
for x in self.ALL_PROPS:
|
||||
if getattr(self, x) != getattr(other, x, None):
|
||||
return False
|
||||
return True
|
||||
|
||||
def __ne__(self, other):
|
||||
return not self == other
|
||||
|
||||
def __repr__(self):
|
||||
return etree.tostring(self.serialize(etree.Element(self.__class__.__name__, nsmap={'w':self.namespace.namespaces['w']})), pretty_print=True)
|
||||
__str__ = __repr__
|
||||
|
||||
def serialize(self, styles, normal_style):
|
||||
makeelement = self.makeelement
|
||||
style = makeelement(styles, 'style', styleId=self.id, type=self.TYPE)
|
||||
style.append(makeelement(style, 'name', val=self.name))
|
||||
if self is not normal_style:
|
||||
style.append(makeelement(style, 'basedOn', val=normal_style.id))
|
||||
styles.append(style)
|
||||
return style
|
||||
|
||||
|
||||
LINE_STYLES = {
|
||||
'none' : 'none',
|
||||
'hidden': 'none',
|
||||
'dotted': 'dotted',
|
||||
'dashed': 'dashed',
|
||||
'solid' : 'single',
|
||||
'double': 'double',
|
||||
'groove': 'threeDEngrave',
|
||||
'ridge' : 'threeDEmboss',
|
||||
'inset' : 'inset',
|
||||
'outset': 'outset',
|
||||
}
|
||||
|
||||
|
||||
class TextStyle(DOCXStyle):
|
||||
|
||||
ALL_PROPS = ('font_family', 'font_size', 'bold', 'italic', 'color',
|
||||
'background_color', 'underline', 'strike', 'dstrike', 'caps',
|
||||
'shadow', 'small_caps', 'spacing', 'vertical_align', 'padding',
|
||||
'border_style', 'border_width', 'border_color')
|
||||
TYPE = 'character'
|
||||
|
||||
def __init__(self, namespace, css, is_parent_style=False):
|
||||
self.font_family = css_font_family_to_docx(css['font-family'])
|
||||
try:
|
||||
self.font_size = max(0, int(float(css['font-size']) * 2)) # stylizer normalizes all font sizes into pts
|
||||
except (ValueError, TypeError, AttributeError):
|
||||
self.font_size = None
|
||||
|
||||
fw = css['font-weight']
|
||||
self.bold = (fw.lower() if hasattr(fw, 'lower') else fw) in {'bold', 'bolder'} or int_or_zero(fw) >= 700
|
||||
self.italic = css['font-style'].lower() in {'italic', 'oblique'}
|
||||
self.color = convert_color(css['color'])
|
||||
self.background_color = None if is_parent_style else convert_color(css.backgroundColor)
|
||||
td = set((css.effective_text_decoration or '').split())
|
||||
self.underline = 'underline' in td
|
||||
self.dstrike = 'line-through' in td and 'overline' in td
|
||||
self.strike = not self.dstrike and 'line-through' in td
|
||||
self.text_transform = css['text-transform'] # TODO: If lowercase or capitalize, transform the actual text
|
||||
self.caps = self.text_transform == 'uppercase'
|
||||
self.small_caps = css['font-variant'].lower() in {'small-caps', 'smallcaps'}
|
||||
self.shadow = css['text-shadow'] not in {'none', None}
|
||||
try:
|
||||
self.spacing = int(float(css['letter-spacing']) * 20)
|
||||
except (ValueError, TypeError, AttributeError):
|
||||
self.spacing = None
|
||||
va = css.first_vertical_align
|
||||
if isinstance(va, numbers.Number):
|
||||
self.vertical_align = unicode_type(int(va * 2))
|
||||
else:
|
||||
val = {
|
||||
'top':'superscript', 'text-top':'superscript', 'sup':'superscript', 'super':'superscript',
|
||||
'bottom':'subscript', 'text-bottom':'subscript', 'sub':'subscript'}.get(va)
|
||||
self.vertical_align = val or 'baseline'
|
||||
|
||||
self.padding = self.border_color = self.border_width = self.border_style = None
|
||||
if not is_parent_style:
|
||||
# DOCX does not support individual borders/padding for inline content
|
||||
for edge in border_edges:
|
||||
# In DOCX padding can only be a positive integer
|
||||
try:
|
||||
padding = max(0, int(css['padding-' + edge]))
|
||||
except ValueError:
|
||||
padding = 0
|
||||
if self.padding is None:
|
||||
self.padding = padding
|
||||
elif self.padding != padding:
|
||||
self.padding = ignore
|
||||
val = css['border-%s-width' % edge]
|
||||
if not isinstance(val, numbers.Number):
|
||||
val = {'thin':0.2, 'medium':1, 'thick':2}.get(val, 0)
|
||||
val = min(96, max(2, int(val * 8)))
|
||||
if self.border_width is None:
|
||||
self.border_width = val
|
||||
elif self.border_width != val:
|
||||
self.border_width = ignore
|
||||
color = convert_color(css['border-%s-color' % edge])
|
||||
if self.border_color is None:
|
||||
self.border_color = color
|
||||
elif self.border_color != color:
|
||||
self.border_color = ignore
|
||||
style = LINE_STYLES.get(css['border-%s-style' % edge].lower(), 'none')
|
||||
if self.border_style is None:
|
||||
self.border_style = style
|
||||
elif self.border_style != style:
|
||||
self.border_style = ignore
|
||||
|
||||
if self.padding in (None, ignore):
|
||||
self.padding = 0
|
||||
if self.border_width in (None, ignore):
|
||||
self.border_width = 0
|
||||
if self.border_style in (None, ignore):
|
||||
self.border_style = 'none'
|
||||
if self.border_color in (None, ignore):
|
||||
self.border_color = 'auto'
|
||||
if self.border_style == 'none':
|
||||
self.border_width, self.border_color = 0, 'auto'
|
||||
|
||||
DOCXStyle.__init__(self, namespace)
|
||||
|
||||
def serialize_borders(self, bdr, normal_style):
|
||||
w = self.w
|
||||
is_normal_style = self is normal_style
|
||||
if is_normal_style or self.padding != normal_style.padding:
|
||||
bdr.set(w('space'), unicode_type(self.padding))
|
||||
if is_normal_style or self.border_width != normal_style.border_width:
|
||||
bdr.set(w('sz'), unicode_type(self.border_width))
|
||||
if is_normal_style or self.border_style != normal_style.border_style:
|
||||
bdr.set(w('val'), self.border_style)
|
||||
if is_normal_style or self.border_color != normal_style.border_color:
|
||||
bdr.set(w('color'), self.border_color)
|
||||
return bdr
|
||||
|
||||
def serialize(self, styles, normal_style):
|
||||
makeelement = self.makeelement
|
||||
style_root = DOCXStyle.serialize(self, styles, normal_style)
|
||||
style = makeelement(style_root, 'rPr')
|
||||
self.serialize_properties(style, normal_style)
|
||||
if len(style) > 0:
|
||||
style_root.append(style)
|
||||
return style_root
|
||||
|
||||
def serialize_properties(self, rPr, normal_style):
|
||||
makeelement = self.makeelement
|
||||
is_normal_style = self is normal_style
|
||||
if is_normal_style or self.font_family != normal_style.font_family:
|
||||
rPr.append(makeelement(
|
||||
rPr, 'rFonts', **{k:self.font_family for k in 'ascii cs eastAsia hAnsi'.split()}))
|
||||
|
||||
for name, attr, vmap in (('sz', 'font_size', str), ('b', 'bold', bmap), ('i', 'italic', bmap)):
|
||||
val = getattr(self, attr)
|
||||
if is_normal_style or getattr(normal_style, attr) != val:
|
||||
for suffix in ('', 'Cs'):
|
||||
rPr.append(makeelement(rPr, name + suffix, val=vmap(val)))
|
||||
|
||||
def check_attr(attr):
|
||||
val = getattr(self, attr)
|
||||
return is_normal_style or (val != getattr(normal_style, attr))
|
||||
|
||||
if check_attr('color'):
|
||||
rPr.append(makeelement(rPr, 'color', val=self.color or 'auto'))
|
||||
if check_attr('background_color'):
|
||||
rPr.append(makeelement(rPr, 'shd', fill=self.background_color or 'auto'))
|
||||
if check_attr('underline'):
|
||||
rPr.append(makeelement(rPr, 'u', val='single' if self.underline else 'none'))
|
||||
if check_attr('dstrike'):
|
||||
rPr.append(makeelement(rPr, 'dstrike', val=bmap(self.dstrike)))
|
||||
if check_attr('strike'):
|
||||
rPr.append(makeelement(rPr, 'strike', val=bmap(self.strike)))
|
||||
if check_attr('caps'):
|
||||
rPr.append(makeelement(rPr, 'caps', val=bmap(self.caps)))
|
||||
if check_attr('small_caps'):
|
||||
rPr.append(makeelement(rPr, 'smallCaps', val=bmap(self.small_caps)))
|
||||
if check_attr('shadow'):
|
||||
rPr.append(makeelement(rPr, 'shadow', val=bmap(self.shadow)))
|
||||
if check_attr('spacing'):
|
||||
rPr.append(makeelement(rPr, 'spacing', val=unicode_type(self.spacing or 0)))
|
||||
if is_normal_style:
|
||||
rPr.append(makeelement(rPr, 'vertAlign', val=self.vertical_align if self.vertical_align in {'superscript', 'subscript'} else 'baseline'))
|
||||
elif self.vertical_align != normal_style.vertical_align:
|
||||
if self.vertical_align in {'superscript', 'subscript', 'baseline'}:
|
||||
rPr.append(makeelement(rPr, 'vertAlign', val=self.vertical_align))
|
||||
else:
|
||||
rPr.append(makeelement(rPr, 'position', val=self.vertical_align))
|
||||
|
||||
bdr = self.serialize_borders(makeelement(rPr, 'bdr'), normal_style)
|
||||
if bdr.attrib:
|
||||
rPr.append(bdr)
|
||||
|
||||
|
||||
class DescendantTextStyle(object):
|
||||
|
||||
def __init__(self, parent_style, child_style):
|
||||
self.id = self.name = None
|
||||
self.makeelement = child_style.makeelement
|
||||
|
||||
p = []
|
||||
|
||||
def add(name, **props):
|
||||
p.append((name, frozenset(iteritems(props))))
|
||||
|
||||
def vals(attr):
|
||||
return getattr(parent_style, attr), getattr(child_style, attr)
|
||||
|
||||
def check(attr):
|
||||
pval, cval = vals(attr)
|
||||
return pval != cval
|
||||
|
||||
if parent_style.font_family != child_style.font_family:
|
||||
add('rFonts', **{k:child_style.font_family for k in 'ascii cs eastAsia hAnsi'.split()})
|
||||
|
||||
for name, attr in (('sz', 'font_size'), ('b', 'bold'), ('i', 'italic')):
|
||||
pval, cval = vals(attr)
|
||||
if pval != cval:
|
||||
val = 'on' if attr in {'bold', 'italic'} else unicode_type(cval) # bold, italic are toggle properties
|
||||
for suffix in ('', 'Cs'):
|
||||
add(name + suffix, val=val)
|
||||
|
||||
if check('color'):
|
||||
add('color', val=child_style.color or 'auto')
|
||||
if check('background_color'):
|
||||
add('shd', fill=child_style.background_color or 'auto')
|
||||
if check('underline'):
|
||||
add('u', val='single' if child_style.underline else 'none')
|
||||
if check('dstrike'):
|
||||
add('dstrike', val=bmap(child_style.dstrike))
|
||||
if check('strike'):
|
||||
add('strike', val='on') # toggle property
|
||||
if check('caps'):
|
||||
add('caps', val='on') # toggle property
|
||||
if check('small_caps'):
|
||||
add('smallCaps', val='on') # toggle property
|
||||
if check('shadow'):
|
||||
add('shadow', val='on') # toggle property
|
||||
if check('spacing'):
|
||||
add('spacing', val=unicode_type(child_style.spacing or 0))
|
||||
if check('vertical_align'):
|
||||
val = child_style.vertical_align
|
||||
if val in {'superscript', 'subscript', 'baseline'}:
|
||||
add('vertAlign', val=val)
|
||||
else:
|
||||
add('position', val=val)
|
||||
|
||||
bdr = {}
|
||||
if check('padding'):
|
||||
bdr['space'] = unicode_type(child_style.padding)
|
||||
if check('border_width'):
|
||||
bdr['sz'] = unicode_type(child_style.border_width)
|
||||
if check('border_style'):
|
||||
bdr['val'] = child_style.border_style
|
||||
if check('border_color'):
|
||||
bdr['color'] = child_style.border_color
|
||||
if bdr:
|
||||
add('bdr', **bdr)
|
||||
self.properties = tuple(p)
|
||||
self._hash = hash(self.properties)
|
||||
|
||||
def __hash__(self):
|
||||
return self._hash
|
||||
|
||||
def __eq__(self, other):
|
||||
return self.properties == other.properties
|
||||
|
||||
def __ne__(self, other):
|
||||
return self.properties != other.properties
|
||||
|
||||
def serialize(self, styles):
|
||||
makeelement = self.makeelement
|
||||
style = makeelement(styles, 'style', styleId=self.id, type='character')
|
||||
style.append(makeelement(style, 'name', val=self.name))
|
||||
rpr = makeelement(style, 'rPr')
|
||||
style.append(rpr)
|
||||
for name, attrs in self.properties:
|
||||
rpr.append(makeelement(style, name, **dict(attrs)))
|
||||
styles.append(style)
|
||||
return style
|
||||
|
||||
|
||||
def read_css_block_borders(self, css, store_css_style=False):
|
||||
for edge in border_edges:
|
||||
if css is None:
|
||||
setattr(self, 'padding_' + edge, 0)
|
||||
setattr(self, 'margin_' + edge, 0)
|
||||
setattr(self, 'css_margin_' + edge, '')
|
||||
setattr(self, 'border_%s_width' % edge, 2)
|
||||
setattr(self, 'border_%s_color' % edge, None)
|
||||
setattr(self, 'border_%s_style' % edge, 'none')
|
||||
if store_css_style:
|
||||
setattr(self, 'border_%s_css_style' % edge, 'none')
|
||||
else:
|
||||
# In DOCX padding can only be a positive integer
|
||||
try:
|
||||
setattr(self, 'padding_' + edge, max(0, int(css['padding-' + edge])))
|
||||
except ValueError:
|
||||
setattr(self, 'padding_' + edge, 0) # invalid value for padding
|
||||
# In DOCX margin must be a positive integer in twips (twentieth of a point)
|
||||
try:
|
||||
setattr(self, 'margin_' + edge, max(0, int(css['margin-' + edge] * 20)))
|
||||
except ValueError:
|
||||
setattr(self, 'margin_' + edge, 0) # for e.g.: margin: auto
|
||||
setattr(self, 'css_margin_' + edge, css._style.get('margin-' + edge, ''))
|
||||
val = css['border-%s-width' % edge]
|
||||
if not isinstance(val, numbers.Number):
|
||||
val = {'thin':0.2, 'medium':1, 'thick':2}.get(val, 0)
|
||||
val = min(96, max(2, int(val * 8)))
|
||||
setattr(self, 'border_%s_width' % edge, val)
|
||||
setattr(self, 'border_%s_color' % edge, convert_color(css['border-%s-color' % edge]) or 'auto')
|
||||
setattr(self, 'border_%s_style' % edge, LINE_STYLES.get(css['border-%s-style' % edge].lower(), 'none'))
|
||||
if store_css_style:
|
||||
setattr(self, 'border_%s_css_style' % edge, css['border-%s-style' % edge].lower())
|
||||
|
||||
|
||||
class BlockStyle(DOCXStyle):
|
||||
|
||||
ALL_PROPS = tuple(
|
||||
'text_align css_text_indent text_indent line_height background_color'.split(
|
||||
) + ['margin_' + edge for edge in border_edges
|
||||
] + ['css_margin_' + edge for edge in border_edges
|
||||
] + [x%edge for edge in border_edges for x in border_props]
|
||||
)
|
||||
|
||||
def __init__(self, namespace, css, html_block, is_table_cell=False, parent_bg=None):
|
||||
read_css_block_borders(self, css)
|
||||
if is_table_cell:
|
||||
for edge in border_edges:
|
||||
setattr(self, 'border_%s_style' % edge, 'none')
|
||||
setattr(self, 'border_%s_width' % edge, 0)
|
||||
setattr(self, 'padding_' + edge, 0)
|
||||
setattr(self, 'margin_' + edge, 0)
|
||||
if css is None:
|
||||
self.text_indent = 0
|
||||
self.css_text_indent = None
|
||||
self.line_height = 280
|
||||
self.background_color = None
|
||||
self.text_align = 'left'
|
||||
else:
|
||||
try:
|
||||
self.text_indent = int(css['text-indent'] * 20)
|
||||
self.css_text_indent = css._get('text-indent')
|
||||
except (TypeError, ValueError):
|
||||
self.text_indent = 0
|
||||
self.css_text_indent = None
|
||||
try:
|
||||
self.line_height = max(0, int(css.lineHeight * 20))
|
||||
except (TypeError, ValueError):
|
||||
self.line_height = max(0, int(1.2 * css.fontSize * 20))
|
||||
self.background_color = None if is_table_cell else convert_color(css['background-color'])
|
||||
if not is_table_cell and self.background_color is None:
|
||||
self.background_color = parent_bg
|
||||
try:
|
||||
ws = css['white-space'].lower()
|
||||
preserve_whitespace = ws in {'pre', 'pre-wrap'}
|
||||
except Exception:
|
||||
preserve_whitespace = False
|
||||
try:
|
||||
aval = css['text-align'].lower()
|
||||
if preserve_whitespace:
|
||||
aval = 'start'
|
||||
self.text_align = {'start':'left', 'left':'left', 'end':'right', 'right':'right', 'center':'center', 'justify':'both', 'centre':'center'}.get(
|
||||
aval, 'left')
|
||||
except AttributeError:
|
||||
self.text_align = 'left'
|
||||
|
||||
DOCXStyle.__init__(self, namespace)
|
||||
|
||||
def serialize_borders(self, bdr, normal_style):
|
||||
w = self.w
|
||||
for edge in border_edges:
|
||||
e = bdr.makeelement(w(edge))
|
||||
padding = getattr(self, 'padding_' + edge)
|
||||
if (self is normal_style and padding > 0) or (padding != getattr(normal_style, 'padding_' + edge)):
|
||||
e.set(w('space'), unicode_type(padding))
|
||||
width = getattr(self, 'border_%s_width' % edge)
|
||||
bstyle = getattr(self, 'border_%s_style' % edge)
|
||||
if (self is normal_style and width > 0 and bstyle != 'none'
|
||||
) or width != getattr(normal_style, 'border_%s_width' % edge
|
||||
) or bstyle != getattr(normal_style, 'border_%s_style' % edge):
|
||||
e.set(w('val'), bstyle)
|
||||
e.set(w('sz'), unicode_type(width))
|
||||
e.set(w('color'), getattr(self, 'border_%s_color' % edge))
|
||||
if e.attrib:
|
||||
bdr.append(e)
|
||||
return bdr
|
||||
|
||||
def serialize(self, styles, normal_style):
|
||||
makeelement = self.makeelement
|
||||
style_root = DOCXStyle.serialize(self, styles, normal_style)
|
||||
style = makeelement(style_root, 'pPr')
|
||||
self.serialize_properties(style, normal_style)
|
||||
if len(style) > 0:
|
||||
style_root.append(style)
|
||||
return style_root
|
||||
|
||||
def serialize_properties(self, pPr, normal_style):
|
||||
makeelement, w = self.makeelement, self.w
|
||||
spacing = makeelement(pPr, 'spacing')
|
||||
for edge, attr in iteritems({'top':'before', 'bottom':'after'}):
|
||||
getter = attrgetter('css_margin_' + edge)
|
||||
css_val, css_unit = parse_css_length(getter(self))
|
||||
if css_unit in ('em', 'ex'):
|
||||
lines = max(0, int(css_val * (50 if css_unit == 'ex' else 100)))
|
||||
if (self is normal_style and lines > 0) or getter(self) != getter(normal_style):
|
||||
spacing.set(w(attr + 'Lines'), unicode_type(lines))
|
||||
else:
|
||||
getter = attrgetter('margin_' + edge)
|
||||
val = getter(self)
|
||||
if (self is normal_style and val > 0) or val != getter(normal_style):
|
||||
spacing.set(w(attr), unicode_type(val))
|
||||
|
||||
if self is normal_style or self.line_height != normal_style.line_height:
|
||||
spacing.set(w('line'), unicode_type(self.line_height))
|
||||
spacing.set(w('lineRule'), 'atLeast')
|
||||
|
||||
if spacing.attrib:
|
||||
pPr.append(spacing)
|
||||
|
||||
ind = makeelement(pPr, 'ind')
|
||||
for edge in ('left', 'right'):
|
||||
getter = attrgetter('css_margin_' + edge)
|
||||
css_val, css_unit = parse_css_length(getter(self))
|
||||
if css_unit in ('em', 'ex'):
|
||||
chars = max(0, int(css_val * (50 if css_unit == 'ex' else 100)))
|
||||
if (self is normal_style and chars > 0) or getter(self) != getter(normal_style):
|
||||
ind.set(w(edge + 'Chars'), unicode_type(chars))
|
||||
else:
|
||||
getter = attrgetter('margin_' + edge)
|
||||
val = getter(self)
|
||||
if (self is normal_style and val > 0) or val != getter(normal_style):
|
||||
ind.set(w(edge), unicode_type(val))
|
||||
ind.set(w(edge + 'Chars'), '0') # This is needed to override any declaration in the parent style
|
||||
css_val, css_unit = parse_css_length(self.css_text_indent)
|
||||
if css_unit in ('em', 'ex'):
|
||||
chars = int(css_val * (50 if css_unit == 'ex' else 100))
|
||||
if css_val >= 0:
|
||||
if (self is normal_style and chars > 0) or self.css_text_indent != normal_style.css_text_indent:
|
||||
ind.set(w('firstLineChars'), unicode_type(chars))
|
||||
else:
|
||||
if (self is normal_style and chars < 0) or self.css_text_indent != normal_style.css_text_indent:
|
||||
ind.set(w('hangingChars'), unicode_type(abs(chars)))
|
||||
else:
|
||||
val = self.text_indent
|
||||
if val >= 0:
|
||||
if (self is normal_style and val > 0) or self.text_indent != normal_style.text_indent:
|
||||
ind.set(w('firstLine'), unicode_type(val))
|
||||
ind.set(w('firstLineChars'), '0') # This is needed to override any declaration in the parent style
|
||||
else:
|
||||
if (self is normal_style and val < 0) or self.text_indent != normal_style.text_indent:
|
||||
ind.set(w('hanging'), unicode_type(abs(val)))
|
||||
ind.set(w('hangingChars'), '0')
|
||||
if ind.attrib:
|
||||
pPr.append(ind)
|
||||
|
||||
if (self is normal_style and self.background_color) or self.background_color != normal_style.background_color:
|
||||
pPr.append(makeelement(pPr, 'shd', val='clear', color='auto', fill=self.background_color or 'auto'))
|
||||
|
||||
pbdr = self.serialize_borders(pPr.makeelement(w('pBdr')), normal_style)
|
||||
if len(pbdr):
|
||||
pPr.append(pbdr)
|
||||
|
||||
if self is normal_style or self.text_align != normal_style.text_align:
|
||||
pPr.append(makeelement(pPr, 'jc', val=self.text_align))
|
||||
|
||||
if self is not normal_style and self.next_style is not None:
|
||||
pPr.append(makeelement(pPr, 'next', val=self.next_style))
|
||||
|
||||
|
||||
class StylesManager(object):
|
||||
|
||||
def __init__(self, namespace, log, document_lang):
|
||||
self.namespace = namespace
|
||||
self.document_lang = lang_as_iso639_1(document_lang) or 'en'
|
||||
self.log = log
|
||||
self.block_styles, self.text_styles = {}, {}
|
||||
self.styles_for_html_blocks = {}
|
||||
|
||||
def create_text_style(self, css_style, is_parent_style=False):
|
||||
ans = TextStyle(self.namespace, css_style, is_parent_style=is_parent_style)
|
||||
existing = self.text_styles.get(ans, None)
|
||||
if existing is None:
|
||||
self.text_styles[ans] = ans
|
||||
else:
|
||||
ans = existing
|
||||
return ans
|
||||
|
||||
def create_block_style(self, css_style, html_block, is_table_cell=False, parent_bg=None):
|
||||
ans = BlockStyle(self.namespace, css_style, html_block, is_table_cell=is_table_cell, parent_bg=parent_bg)
|
||||
existing = self.block_styles.get(ans, None)
|
||||
if existing is None:
|
||||
self.block_styles[ans] = ans
|
||||
else:
|
||||
ans = existing
|
||||
self.styles_for_html_blocks[html_block] = ans
|
||||
return ans
|
||||
|
||||
def finalize(self, all_blocks):
|
||||
block_counts, run_counts = Counter(), Counter()
|
||||
block_rmap, run_rmap = defaultdict(list), defaultdict(list)
|
||||
used_pairs = defaultdict(list)
|
||||
heading_styles = defaultdict(list)
|
||||
headings = frozenset('h1 h2 h3 h4 h5 h6'.split())
|
||||
pure_block_styles = set()
|
||||
|
||||
for block in all_blocks:
|
||||
bs = block.style
|
||||
block_counts[bs] += 1
|
||||
block_rmap[block.style].append(block)
|
||||
local_run_counts = Counter()
|
||||
for run in block.runs:
|
||||
count = run.style_weight
|
||||
run_counts[run.style] += count
|
||||
local_run_counts[run.style] += count
|
||||
run_rmap[run.style].append(run)
|
||||
if local_run_counts:
|
||||
rs = local_run_counts.most_common(1)[0][0]
|
||||
used_pairs[(bs, rs)].append(block)
|
||||
if block.html_tag in headings:
|
||||
heading_styles[block.html_tag].append((bs, rs))
|
||||
else:
|
||||
pure_block_styles.add(bs)
|
||||
|
||||
self.pure_block_styles = sorted(pure_block_styles, key=block_counts.__getitem__)
|
||||
bnum = len(unicode_type(max(1, len(pure_block_styles) - 1)))
|
||||
for i, bs in enumerate(self.pure_block_styles):
|
||||
bs.id = bs.name = '%0{}d Block'.format(bnum) % i
|
||||
bs.seq = i
|
||||
if i == 0:
|
||||
self.normal_pure_block_style = bs
|
||||
|
||||
counts = Counter()
|
||||
smap = {}
|
||||
for (bs, rs), blocks in iteritems(used_pairs):
|
||||
s = CombinedStyle(bs, rs, blocks, self.namespace)
|
||||
smap[(bs, rs)] = s
|
||||
counts[s] += sum(1 for b in blocks if not b.is_empty())
|
||||
for i, heading_tag in enumerate(sorted(heading_styles)):
|
||||
styles = sorted((smap[k] for k in heading_styles[heading_tag]), key=counts.__getitem__)
|
||||
styles = list(filter(lambda s:s.outline_level is None, styles))
|
||||
if styles:
|
||||
heading_style = styles[-1]
|
||||
heading_style.outline_level = i
|
||||
|
||||
snum = len(unicode_type(max(1, len(counts) - 1)))
|
||||
heading_styles = []
|
||||
for i, (style, count) in enumerate(counts.most_common()):
|
||||
if i == 0:
|
||||
self.normal_style = style
|
||||
style.id = style.name = 'Normal'
|
||||
else:
|
||||
if style.outline_level is None:
|
||||
val = 'Para %0{}d'.format(snum) % i
|
||||
else:
|
||||
val = 'Heading %d' % (style.outline_level + 1)
|
||||
heading_styles.append(style)
|
||||
style.id = style.name = val
|
||||
style.seq = i
|
||||
self.combined_styles = sorted(counts, key=attrgetter('seq'))
|
||||
[ls.apply() for ls in self.combined_styles]
|
||||
|
||||
descendant_style_map = {}
|
||||
ds_counts = Counter()
|
||||
for block in all_blocks:
|
||||
for run in block.runs:
|
||||
if run.parent_style is not run.style and run.parent_style and run.style:
|
||||
ds = DescendantTextStyle(run.parent_style, run.style)
|
||||
if ds.properties:
|
||||
run.descendant_style = descendant_style_map.get(ds)
|
||||
if run.descendant_style is None:
|
||||
run.descendant_style = descendant_style_map[ds] = ds
|
||||
ds_counts[run.descendant_style] += run.style_weight
|
||||
rnum = len(unicode_type(max(1, len(ds_counts) - 1)))
|
||||
for i, (text_style, count) in enumerate(ds_counts.most_common()):
|
||||
text_style.id = 'Text%d' % i
|
||||
text_style.name = '%0{}d Text'.format(rnum) % i
|
||||
text_style.seq = i
|
||||
self.descendant_text_styles = sorted(descendant_style_map, key=attrgetter('seq'))
|
||||
|
||||
self.log.debug('%d Text Styles %d Combined styles' % tuple(map(len, (
|
||||
self.descendant_text_styles, self.combined_styles))))
|
||||
|
||||
self.primary_heading_style = None
|
||||
if heading_styles:
|
||||
heading_styles.sort(key=attrgetter('outline_level'))
|
||||
self.primary_heading_style = heading_styles[0]
|
||||
else:
|
||||
ms = 0
|
||||
for s in self.combined_styles:
|
||||
if s.rs.font_size > ms:
|
||||
self.primary_heading_style = s
|
||||
ms = s.rs.font_size
|
||||
|
||||
def serialize(self, styles):
|
||||
lang = styles.xpath('descendant::*[local-name()="lang"]')[0]
|
||||
for k in tuple(lang.attrib):
|
||||
lang.attrib[k] = self.document_lang
|
||||
for style in self.combined_styles:
|
||||
style.serialize(styles, self.normal_style)
|
||||
for style in self.descendant_text_styles:
|
||||
style.serialize(styles)
|
||||
for style in sorted(self.pure_block_styles, key=attrgetter('seq')):
|
||||
style.serialize(styles, self.normal_pure_block_style)
|
||||
371
ebook_converter/ebooks/docx/writer/tables.py
Normal file
371
ebook_converter/ebooks/docx/writer/tables.py
Normal file
@@ -0,0 +1,371 @@
|
||||
#!/usr/bin/env python2
|
||||
# vim:fileencoding=utf-8
|
||||
from __future__ import absolute_import, division, print_function, unicode_literals
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2015, Kovid Goyal <kovid at kovidgoyal.net>'
|
||||
|
||||
from collections import namedtuple
|
||||
|
||||
from calibre.ebooks.docx.writer.utils import convert_color
|
||||
from calibre.ebooks.docx.writer.styles import read_css_block_borders as rcbb, border_edges
|
||||
from polyglot.builtins import iteritems, range, unicode_type
|
||||
|
||||
|
||||
class Dummy(object):
|
||||
pass
|
||||
|
||||
|
||||
Border = namedtuple('Border', 'css_style style width color level')
|
||||
border_style_weight = {
|
||||
x:100-i for i, x in enumerate(('double', 'solid', 'dashed', 'dotted', 'ridge', 'outset', 'groove', 'inset'))}
|
||||
|
||||
|
||||
class SpannedCell(object):
|
||||
|
||||
def __init__(self, spanning_cell, horizontal=True):
|
||||
self.spanning_cell = spanning_cell
|
||||
self.horizontal = horizontal
|
||||
self.row_span = self.col_span = 1
|
||||
|
||||
def resolve_borders(self):
|
||||
pass
|
||||
|
||||
def serialize(self, tr, makeelement):
|
||||
tc = makeelement(tr, 'w:tc')
|
||||
tcPr = makeelement(tc, 'w:tcPr')
|
||||
makeelement(tcPr, 'w:%sMerge' % ('h' if self.horizontal else 'v'), w_val='continue')
|
||||
makeelement(tc, 'w:p')
|
||||
|
||||
def applicable_borders(self, edge):
|
||||
return self.spanning_cell.applicable_borders(edge)
|
||||
|
||||
|
||||
def read_css_block_borders(self, css):
|
||||
obj = Dummy()
|
||||
rcbb(obj, css, store_css_style=True)
|
||||
for edge in border_edges:
|
||||
setattr(self, 'border_' + edge, Border(
|
||||
getattr(obj, 'border_%s_css_style' % edge),
|
||||
getattr(obj, 'border_%s_style' % edge),
|
||||
getattr(obj, 'border_%s_width' % edge),
|
||||
getattr(obj, 'border_%s_color' % edge),
|
||||
self.BLEVEL
|
||||
))
|
||||
setattr(self, 'padding_' + edge, getattr(obj, 'padding_' + edge))
|
||||
|
||||
|
||||
def as_percent(x):
|
||||
if x and x.endswith('%'):
|
||||
try:
|
||||
return float(x.rstrip('%'))
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
|
||||
def convert_width(tag_style):
|
||||
if tag_style is not None:
|
||||
w = tag_style._get('width')
|
||||
wp = as_percent(w)
|
||||
if w == 'auto':
|
||||
return ('auto', 0)
|
||||
elif wp is not None:
|
||||
return ('pct', int(wp * 50))
|
||||
else:
|
||||
try:
|
||||
return ('dxa', int(float(tag_style['width']) * 20))
|
||||
except Exception:
|
||||
pass
|
||||
return ('auto', 0)
|
||||
|
||||
|
||||
class Cell(object):
|
||||
|
||||
BLEVEL = 2
|
||||
|
||||
def __init__(self, row, html_tag, tag_style=None):
|
||||
self.row = row
|
||||
self.table = self.row.table
|
||||
self.html_tag = html_tag
|
||||
try:
|
||||
self.row_span = max(0, int(html_tag.get('rowspan', 1)))
|
||||
except Exception:
|
||||
self.row_span = 1
|
||||
try:
|
||||
self.col_span = max(0, int(html_tag.get('colspan', 1)))
|
||||
except Exception:
|
||||
self.col_span = 1
|
||||
if tag_style is None:
|
||||
self.valign = 'center'
|
||||
else:
|
||||
self.valign = {'top':'top', 'bottom':'bottom', 'middle':'center'}.get(tag_style._get('vertical-align'))
|
||||
self.items = []
|
||||
self.width = convert_width(tag_style)
|
||||
self.background_color = None if tag_style is None else convert_color(tag_style.backgroundColor)
|
||||
read_css_block_borders(self, tag_style)
|
||||
|
||||
def add_block(self, block):
|
||||
self.items.append(block)
|
||||
block.parent_items = self.items
|
||||
|
||||
def add_table(self, table):
|
||||
self.items.append(table)
|
||||
return table
|
||||
|
||||
def serialize(self, parent, makeelement):
|
||||
tc = makeelement(parent, 'w:tc')
|
||||
tcPr = makeelement(tc, 'w:tcPr')
|
||||
makeelement(tcPr, 'w:tcW', w_type=self.width[0], w_w=unicode_type(self.width[1]))
|
||||
# For some reason, Word 2007 refuses to honor <w:shd> at the table or row
|
||||
# level, despite what the specs say, so we inherit and apply at the
|
||||
# cell level
|
||||
bc = self.background_color or self.row.background_color or self.row.table.background_color
|
||||
if bc:
|
||||
makeelement(tcPr, 'w:shd', w_val="clear", w_color="auto", w_fill=bc)
|
||||
|
||||
b = makeelement(tcPr, 'w:tcBorders', append=False)
|
||||
for edge, border in iteritems(self.borders):
|
||||
if border is not None and border.width > 0 and border.style != 'none':
|
||||
makeelement(b, 'w:' + edge, w_val=border.style, w_sz=unicode_type(border.width), w_color=border.color)
|
||||
if len(b) > 0:
|
||||
tcPr.append(b)
|
||||
|
||||
m = makeelement(tcPr, 'w:tcMar', append=False)
|
||||
for edge in border_edges:
|
||||
padding = getattr(self, 'padding_' + edge)
|
||||
if edge in {'top', 'bottom'} or (edge == 'left' and self is self.row.first_cell) or (edge == 'right' and self is self.row.last_cell):
|
||||
padding += getattr(self.row, 'padding_' + edge)
|
||||
if padding > 0:
|
||||
makeelement(m, 'w:' + edge, w_type='dxa', w_w=unicode_type(int(padding * 20)))
|
||||
if len(m) > 0:
|
||||
tcPr.append(m)
|
||||
|
||||
if self.valign is not None:
|
||||
makeelement(tcPr, 'w:vAlign', w_val=self.valign)
|
||||
|
||||
if self.row_span > 1:
|
||||
makeelement(tcPr, 'w:vMerge', w_val='restart')
|
||||
if self.col_span > 1:
|
||||
makeelement(tcPr, 'w:hMerge', w_val='restart')
|
||||
|
||||
item = None
|
||||
for item in self.items:
|
||||
item.serialize(tc)
|
||||
if item is None or isinstance(item, Table):
|
||||
# Word 2007 requires the last element in a table cell to be a paragraph
|
||||
makeelement(tc, 'w:p')
|
||||
|
||||
def applicable_borders(self, edge):
|
||||
if edge == 'left':
|
||||
items = {self.table, self.row, self} if self.row.first_cell is self else {self}
|
||||
elif edge == 'top':
|
||||
items = ({self.table} if self.table.first_row is self.row else set()) | {self, self.row}
|
||||
elif edge == 'right':
|
||||
items = {self.table, self, self.row} if self.row.last_cell is self else {self}
|
||||
elif edge == 'bottom':
|
||||
items = ({self.table} if self.table.last_row is self.row else set()) | {self, self.row}
|
||||
return {getattr(x, 'border_' + edge) for x in items}
|
||||
|
||||
def resolve_border(self, edge):
|
||||
# In Word cell borders override table borders, and Word ignores row
|
||||
# borders, so we consolidate all borders as cell borders
|
||||
# In HTML the priority is as described here:
|
||||
# http://www.w3.org/TR/CSS21/tables.html#border-conflict-resolution
|
||||
neighbor = self.neighbor(edge)
|
||||
borders = self.applicable_borders(edge)
|
||||
if neighbor is not None:
|
||||
nedge = {'left':'right', 'top':'bottom', 'right':'left', 'bottom':'top'}[edge]
|
||||
borders |= neighbor.applicable_borders(nedge)
|
||||
|
||||
for b in borders:
|
||||
if b.css_style == 'hidden':
|
||||
return None
|
||||
|
||||
def weight(border):
|
||||
return (
|
||||
0 if border.css_style == 'none' else 1,
|
||||
border.width,
|
||||
border_style_weight.get(border.css_style, 0),
|
||||
border.level)
|
||||
border = sorted(borders, key=weight)[-1]
|
||||
return border
|
||||
|
||||
def resolve_borders(self):
|
||||
self.borders = {edge:self.resolve_border(edge) for edge in border_edges}
|
||||
|
||||
def neighbor(self, edge):
|
||||
idx = self.row.cells.index(self)
|
||||
ans = None
|
||||
if edge == 'left':
|
||||
ans = self.row.cells[idx-1] if idx > 0 else None
|
||||
elif edge == 'right':
|
||||
ans = self.row.cells[idx+1] if (idx + 1) < len(self.row.cells) else None
|
||||
elif edge == 'top':
|
||||
ridx = self.table.rows.index(self.row)
|
||||
if ridx > 0 and idx < len(self.table.rows[ridx-1].cells):
|
||||
ans = self.table.rows[ridx-1].cells[idx]
|
||||
elif edge == 'bottom':
|
||||
ridx = self.table.rows.index(self.row)
|
||||
if ridx + 1 < len(self.table.rows) and idx < len(self.table.rows[ridx+1].cells):
|
||||
ans = self.table.rows[ridx+1].cells[idx]
|
||||
return getattr(ans, 'spanning_cell', ans)
|
||||
|
||||
|
||||
class Row(object):
|
||||
|
||||
BLEVEL = 1
|
||||
|
||||
def __init__(self, table, html_tag, tag_style=None):
|
||||
self.table = table
|
||||
self.html_tag = html_tag
|
||||
self.orig_tag_style = tag_style
|
||||
self.cells = []
|
||||
self.current_cell = None
|
||||
self.background_color = None if tag_style is None else convert_color(tag_style.backgroundColor)
|
||||
read_css_block_borders(self, tag_style)
|
||||
|
||||
@property
|
||||
def first_cell(self):
|
||||
return self.cells[0] if self.cells else None
|
||||
|
||||
@property
|
||||
def last_cell(self):
|
||||
return self.cells[-1] if self.cells else None
|
||||
|
||||
def start_new_cell(self, html_tag, tag_style):
|
||||
self.current_cell = Cell(self, html_tag, tag_style)
|
||||
|
||||
def finish_tag(self, html_tag):
|
||||
if self.current_cell is not None:
|
||||
if html_tag is self.current_cell.html_tag:
|
||||
self.cells.append(self.current_cell)
|
||||
self.current_cell = None
|
||||
|
||||
def add_block(self, block):
|
||||
if self.current_cell is None:
|
||||
self.start_new_cell(self.html_tag, self.orig_tag_style)
|
||||
self.current_cell.add_block(block)
|
||||
|
||||
def add_table(self, table):
|
||||
if self.current_cell is None:
|
||||
self.current_cell = Cell(self, self.html_tag, self.orig_tag_style)
|
||||
return self.current_cell.add_table(table)
|
||||
|
||||
def serialize(self, parent, makeelement):
|
||||
tr = makeelement(parent, 'w:tr')
|
||||
for cell in self.cells:
|
||||
cell.serialize(tr, makeelement)
|
||||
|
||||
|
||||
class Table(object):
|
||||
|
||||
BLEVEL = 0
|
||||
|
||||
def __init__(self, namespace, html_tag, tag_style=None):
|
||||
self.namespace = namespace
|
||||
self.html_tag = html_tag
|
||||
self.orig_tag_style = tag_style
|
||||
self.rows = []
|
||||
self.current_row = None
|
||||
self.width = convert_width(tag_style)
|
||||
self.background_color = None if tag_style is None else convert_color(tag_style.backgroundColor)
|
||||
self.jc = None
|
||||
self.float = None
|
||||
self.margin_left = self.margin_right = self.margin_top = self.margin_bottom = None
|
||||
if tag_style is not None:
|
||||
ml, mr = tag_style._get('margin-left'), tag_style.get('margin-right')
|
||||
if ml == 'auto':
|
||||
self.jc = 'center' if mr == 'auto' else 'right'
|
||||
self.float = tag_style['float']
|
||||
for edge in border_edges:
|
||||
setattr(self, 'margin_' + edge, tag_style['margin-' + edge])
|
||||
read_css_block_borders(self, tag_style)
|
||||
|
||||
@property
|
||||
def first_row(self):
|
||||
return self.rows[0] if self.rows else None
|
||||
|
||||
@property
|
||||
def last_row(self):
|
||||
return self.rows[-1] if self.rows else None
|
||||
|
||||
def finish_tag(self, html_tag):
|
||||
if self.current_row is not None:
|
||||
self.current_row.finish_tag(html_tag)
|
||||
if self.current_row.html_tag is html_tag:
|
||||
self.rows.append(self.current_row)
|
||||
self.current_row = None
|
||||
table_ended = self.html_tag is html_tag
|
||||
if table_ended:
|
||||
self.expand_spanned_cells()
|
||||
for row in self.rows:
|
||||
for cell in row.cells:
|
||||
cell.resolve_borders()
|
||||
return table_ended
|
||||
|
||||
def expand_spanned_cells(self):
|
||||
# Expand horizontally
|
||||
for row in self.rows:
|
||||
for cell in tuple(row.cells):
|
||||
idx = row.cells.index(cell)
|
||||
if cell.col_span > 1 and (cell is row.cells[-1] or not isinstance(row.cells[idx+1], SpannedCell)):
|
||||
row.cells[idx:idx+1] = [cell] + [SpannedCell(cell, horizontal=True) for i in range(1, cell.col_span)]
|
||||
|
||||
# Expand vertically
|
||||
for r, row in enumerate(self.rows):
|
||||
for idx, cell in enumerate(row.cells):
|
||||
if cell.row_span > 1:
|
||||
for nrow in self.rows[r+1:]:
|
||||
sc = SpannedCell(cell, horizontal=False)
|
||||
try:
|
||||
tcell = nrow.cells[idx]
|
||||
except Exception:
|
||||
tcell = None
|
||||
if tcell is None:
|
||||
nrow.cells.extend([SpannedCell(nrow.cells[-1], horizontal=True) for i in range(idx - len(nrow.cells))])
|
||||
nrow.cells.append(sc)
|
||||
else:
|
||||
if isinstance(tcell, SpannedCell):
|
||||
# Conflict between rowspan and colspan
|
||||
break
|
||||
else:
|
||||
nrow.cells.insert(idx, sc)
|
||||
|
||||
def start_new_row(self, html_tag, html_style):
|
||||
if self.current_row is not None:
|
||||
self.rows.append(self.current_row)
|
||||
self.current_row = Row(self, html_tag, html_style)
|
||||
|
||||
def start_new_cell(self, html_tag, html_style):
|
||||
if self.current_row is None:
|
||||
self.start_new_row(html_tag, None)
|
||||
self.current_row.start_new_cell(html_tag, html_style)
|
||||
|
||||
def add_block(self, block):
|
||||
self.current_row.add_block(block)
|
||||
|
||||
def add_table(self, table):
|
||||
if self.current_row is None:
|
||||
self.current_row = Row(self, self.html_tag, self.orig_tag_style)
|
||||
return self.current_row.add_table(table)
|
||||
|
||||
def serialize(self, parent):
|
||||
makeelement = self.namespace.makeelement
|
||||
rows = [r for r in self.rows if r.cells]
|
||||
if not rows:
|
||||
return
|
||||
tbl = makeelement(parent, 'w:tbl')
|
||||
tblPr = makeelement(tbl, 'w:tblPr')
|
||||
makeelement(tblPr, 'w:tblW', w_type=self.width[0], w_w=unicode_type(self.width[1]))
|
||||
if self.float in {'left', 'right'}:
|
||||
kw = {'w_vertAnchor':'text', 'w_horzAnchor':'text', 'w_tblpXSpec':self.float}
|
||||
for edge in border_edges:
|
||||
val = getattr(self, 'margin_' + edge) or 0
|
||||
if {self.float, edge} == {'left', 'right'}:
|
||||
val = max(val, 2)
|
||||
kw['w_' + edge + 'FromText'] = unicode_type(max(0, int(val *20)))
|
||||
makeelement(tblPr, 'w:tblpPr', **kw)
|
||||
if self.jc is not None:
|
||||
makeelement(tblPr, 'w:jc', w_val=self.jc)
|
||||
for row in rows:
|
||||
row.serialize(tbl, makeelement)
|
||||
58
ebook_converter/ebooks/docx/writer/utils.py
Normal file
58
ebook_converter/ebooks/docx/writer/utils.py
Normal file
@@ -0,0 +1,58 @@
|
||||
#!/usr/bin/env python2
|
||||
# vim:fileencoding=utf-8
|
||||
from __future__ import absolute_import, division, print_function, unicode_literals
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2013, Kovid Goyal <kovid at kovidgoyal.net>'
|
||||
|
||||
from tinycss.color3 import parse_color_string
|
||||
|
||||
|
||||
def int_or_zero(raw):
|
||||
try:
|
||||
return int(raw)
|
||||
except (ValueError, TypeError, AttributeError):
|
||||
return 0
|
||||
|
||||
# convert_color() {{{
|
||||
|
||||
|
||||
def convert_color(value):
|
||||
if not value:
|
||||
return
|
||||
if value.lower() == 'currentcolor':
|
||||
return 'auto'
|
||||
val = parse_color_string(value)
|
||||
if val is None:
|
||||
return
|
||||
if val.alpha < 0.01:
|
||||
return
|
||||
return '%02X%02X%02X' % (int(val.red * 255), int(val.green * 255), int(val.blue * 255))
|
||||
|
||||
|
||||
def test_convert_color(return_tests=False):
|
||||
import unittest
|
||||
|
||||
class TestColors(unittest.TestCase):
|
||||
|
||||
def test_color_conversion(self):
|
||||
ae = self.assertEqual
|
||||
cc = convert_color
|
||||
ae(None, cc(None))
|
||||
ae(None, cc('transparent'))
|
||||
ae(None, cc('none'))
|
||||
ae(None, cc('#12j456'))
|
||||
ae('auto', cc('currentColor'))
|
||||
ae('F0F8FF', cc('AliceBlue'))
|
||||
ae('000000', cc('black'))
|
||||
ae('FF0000', cc('red'))
|
||||
ae('00FF00', cc('lime'))
|
||||
ae(cc('#001'), '000011')
|
||||
ae('12345D', cc('#12345d'))
|
||||
ae('FFFFFF', cc('rgb(255, 255, 255)'))
|
||||
ae('FF0000', cc('rgba(255, 0, 0, 23)'))
|
||||
tests = unittest.defaultTestLoader.loadTestsFromTestCase(TestColors)
|
||||
if return_tests:
|
||||
return tests
|
||||
unittest.TextTestRunner(verbosity=4).run(tests)
|
||||
# }}}
|
||||
316
ebook_converter/ebooks/oeb/transforms/subset.py
Normal file
316
ebook_converter/ebooks/oeb/transforms/subset.py
Normal file
@@ -0,0 +1,316 @@
|
||||
#!/usr/bin/env python2
|
||||
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:fdm=marker:ai
|
||||
from __future__ import absolute_import, division, print_function, unicode_literals
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2012, Kovid Goyal <kovid at kovidgoyal.net>'
|
||||
__docformat__ = 'restructuredtext en'
|
||||
|
||||
from collections import defaultdict
|
||||
|
||||
from calibre.ebooks.oeb.base import urlnormalize, css_text
|
||||
from calibre.utils.fonts.sfnt.subset import subset, NoGlyphs, UnsupportedFont
|
||||
from polyglot.builtins import iteritems, itervalues, unicode_type, range
|
||||
from tinycss.fonts3 import parse_font_family
|
||||
|
||||
|
||||
def get_font_properties(rule, default=None):
|
||||
'''
|
||||
Given a CSS rule, extract normalized font properties from
|
||||
it. Note that shorthand font property should already have been expanded
|
||||
by the CSS flattening code.
|
||||
'''
|
||||
props = {}
|
||||
s = rule.style
|
||||
for q in ('font-family', 'src', 'font-weight', 'font-stretch',
|
||||
'font-style'):
|
||||
g = 'uri' if q == 'src' else 'value'
|
||||
try:
|
||||
val = s.getProperty(q).propertyValue[0]
|
||||
val = getattr(val, g)
|
||||
if q == 'font-family':
|
||||
val = parse_font_family(css_text(s.getProperty(q).propertyValue))
|
||||
if val and val[0] == 'inherit':
|
||||
val = None
|
||||
except (IndexError, KeyError, AttributeError, TypeError, ValueError):
|
||||
val = None if q in {'src', 'font-family'} else default
|
||||
if q in {'font-weight', 'font-stretch', 'font-style'}:
|
||||
val = unicode_type(val).lower() if (val or val == 0) else val
|
||||
if val == 'inherit':
|
||||
val = default
|
||||
if q == 'font-weight':
|
||||
val = {'normal':'400', 'bold':'700'}.get(val, val)
|
||||
if val not in {'100', '200', '300', '400', '500', '600', '700',
|
||||
'800', '900', 'bolder', 'lighter'}:
|
||||
val = default
|
||||
if val == 'normal':
|
||||
val = '400'
|
||||
elif q == 'font-style':
|
||||
if val not in {'normal', 'italic', 'oblique'}:
|
||||
val = default
|
||||
elif q == 'font-stretch':
|
||||
if val not in {'normal', 'ultra-condensed', 'extra-condensed',
|
||||
'condensed', 'semi-condensed', 'semi-expanded',
|
||||
'expanded', 'extra-expanded', 'ultra-expanded'}:
|
||||
val = default
|
||||
props[q] = val
|
||||
return props
|
||||
|
||||
|
||||
def find_font_face_rules(sheet, oeb):
|
||||
'''
|
||||
Find all @font-face rules in the given sheet and extract the relevant info from them.
|
||||
sheet can be either a ManifestItem or a CSSStyleSheet.
|
||||
'''
|
||||
ans = []
|
||||
try:
|
||||
rules = sheet.data.cssRules
|
||||
except AttributeError:
|
||||
rules = sheet.cssRules
|
||||
|
||||
for i, rule in enumerate(rules):
|
||||
if rule.type != rule.FONT_FACE_RULE:
|
||||
continue
|
||||
props = get_font_properties(rule, default='normal')
|
||||
if not props['font-family'] or not props['src']:
|
||||
continue
|
||||
|
||||
try:
|
||||
path = sheet.abshref(props['src'])
|
||||
except AttributeError:
|
||||
path = props['src']
|
||||
ff = oeb.manifest.hrefs.get(urlnormalize(path), None)
|
||||
if not ff:
|
||||
continue
|
||||
props['item'] = ff
|
||||
if props['font-weight'] in {'bolder', 'lighter'}:
|
||||
props['font-weight'] = '400'
|
||||
props['weight'] = int(props['font-weight'])
|
||||
props['rule'] = rule
|
||||
props['chars'] = set()
|
||||
ans.append(props)
|
||||
|
||||
return ans
|
||||
|
||||
|
||||
def elem_style(style_rules, cls, inherited_style):
|
||||
'''
|
||||
Find the effective style for the given element.
|
||||
'''
|
||||
classes = cls.split()
|
||||
style = inherited_style.copy()
|
||||
for cls in classes:
|
||||
style.update(style_rules.get(cls, {}))
|
||||
wt = style.get('font-weight', None)
|
||||
pwt = inherited_style.get('font-weight', '400')
|
||||
if wt == 'bolder':
|
||||
style['font-weight'] = {
|
||||
'100':'400',
|
||||
'200':'400',
|
||||
'300':'400',
|
||||
'400':'700',
|
||||
'500':'700',
|
||||
}.get(pwt, '900')
|
||||
elif wt == 'lighter':
|
||||
style['font-weight'] = {
|
||||
'600':'400', '700':'400',
|
||||
'800':'700', '900':'700'}.get(pwt, '100')
|
||||
|
||||
return style
|
||||
|
||||
|
||||
class SubsetFonts(object):
|
||||
|
||||
'''
|
||||
Subset all embedded fonts. Must be run after CSS flattening, as it requires
|
||||
CSS normalization and flattening to work.
|
||||
'''
|
||||
|
||||
def __call__(self, oeb, log, opts):
|
||||
self.oeb, self.log, self.opts = oeb, log, opts
|
||||
|
||||
self.find_embedded_fonts()
|
||||
if not self.embedded_fonts:
|
||||
self.log.debug('No embedded fonts found')
|
||||
return
|
||||
self.find_style_rules()
|
||||
self.find_font_usage()
|
||||
|
||||
totals = [0, 0]
|
||||
|
||||
def remove(font):
|
||||
totals[1] += len(font['item'].data)
|
||||
self.oeb.manifest.remove(font['item'])
|
||||
font['rule'].parentStyleSheet.deleteRule(font['rule'])
|
||||
|
||||
fonts = {}
|
||||
for font in self.embedded_fonts:
|
||||
item, chars = font['item'], font['chars']
|
||||
if item.href in fonts:
|
||||
fonts[item.href]['chars'] |= chars
|
||||
else:
|
||||
fonts[item.href] = font
|
||||
|
||||
for font in itervalues(fonts):
|
||||
if not font['chars']:
|
||||
self.log('The font %s is unused. Removing it.'%font['src'])
|
||||
remove(font)
|
||||
continue
|
||||
try:
|
||||
raw, old_stats, new_stats = subset(font['item'].data, font['chars'])
|
||||
except NoGlyphs:
|
||||
self.log('The font %s has no used glyphs. Removing it.'%font['src'])
|
||||
remove(font)
|
||||
continue
|
||||
except UnsupportedFont as e:
|
||||
self.log.warn('The font %s is unsupported for subsetting. %s'%(
|
||||
font['src'], e))
|
||||
sz = len(font['item'].data)
|
||||
totals[0] += sz
|
||||
totals[1] += sz
|
||||
else:
|
||||
font['item'].data = raw
|
||||
nlen = sum(itervalues(new_stats))
|
||||
olen = sum(itervalues(old_stats))
|
||||
self.log('Decreased the font %s to %.1f%% of its original size'%
|
||||
(font['src'], nlen/olen *100))
|
||||
totals[0] += nlen
|
||||
totals[1] += olen
|
||||
|
||||
font['item'].unload_data_from_memory()
|
||||
|
||||
if totals[0]:
|
||||
self.log('Reduced total font size to %.1f%% of original'%
|
||||
(totals[0]/totals[1] * 100))
|
||||
|
||||
def find_embedded_fonts(self):
|
||||
'''
|
||||
Find all @font-face rules and extract the relevant info from them.
|
||||
'''
|
||||
self.embedded_fonts = []
|
||||
for item in self.oeb.manifest:
|
||||
if not hasattr(item.data, 'cssRules'):
|
||||
continue
|
||||
self.embedded_fonts.extend(find_font_face_rules(item, self.oeb))
|
||||
|
||||
def find_style_rules(self):
|
||||
'''
|
||||
Extract all font related style information from all stylesheets into a
|
||||
dict mapping classes to font properties specified by that class. All
|
||||
the heavy lifting has already been done by the CSS flattening code.
|
||||
'''
|
||||
rules = defaultdict(dict)
|
||||
for item in self.oeb.manifest:
|
||||
if not hasattr(item.data, 'cssRules'):
|
||||
continue
|
||||
for i, rule in enumerate(item.data.cssRules):
|
||||
if rule.type != rule.STYLE_RULE:
|
||||
continue
|
||||
props = {k:v for k,v in
|
||||
iteritems(get_font_properties(rule)) if v}
|
||||
if not props:
|
||||
continue
|
||||
for sel in rule.selectorList:
|
||||
sel = sel.selectorText
|
||||
if sel and sel.startswith('.'):
|
||||
# We dont care about pseudo-selectors as the worst that
|
||||
# can happen is some extra characters will remain in
|
||||
# the font
|
||||
sel = sel.partition(':')[0]
|
||||
rules[sel[1:]].update(props)
|
||||
|
||||
self.style_rules = dict(rules)
|
||||
|
||||
def find_font_usage(self):
|
||||
for item in self.oeb.manifest:
|
||||
if not hasattr(item.data, 'xpath'):
|
||||
continue
|
||||
for body in item.data.xpath('//*[local-name()="body"]'):
|
||||
base = {'font-family':['serif'], 'font-weight': '400',
|
||||
'font-style':'normal', 'font-stretch':'normal'}
|
||||
self.find_usage_in(body, base)
|
||||
|
||||
def used_font(self, style):
|
||||
'''
|
||||
Given a style find the embedded font that matches it. Returns None if
|
||||
no match is found (can happen if no family matches).
|
||||
'''
|
||||
ff = style.get('font-family', [])
|
||||
lnames = {unicode_type(x).lower() for x in ff}
|
||||
matching_set = []
|
||||
|
||||
# Filter on font-family
|
||||
for ef in self.embedded_fonts:
|
||||
flnames = {x.lower() for x in ef.get('font-family', [])}
|
||||
if not lnames.intersection(flnames):
|
||||
continue
|
||||
matching_set.append(ef)
|
||||
if not matching_set:
|
||||
return None
|
||||
|
||||
# Filter on font-stretch
|
||||
widths = {x:i for i, x in enumerate(('ultra-condensed',
|
||||
'extra-condensed', 'condensed', 'semi-condensed', 'normal',
|
||||
'semi-expanded', 'expanded', 'extra-expanded', 'ultra-expanded'
|
||||
))}
|
||||
|
||||
width = widths[style.get('font-stretch', 'normal')]
|
||||
for f in matching_set:
|
||||
f['width'] = widths[style.get('font-stretch', 'normal')]
|
||||
|
||||
min_dist = min(abs(width-f['width']) for f in matching_set)
|
||||
nearest = [f for f in matching_set if abs(width-f['width']) ==
|
||||
min_dist]
|
||||
if width <= 4:
|
||||
lmatches = [f for f in nearest if f['width'] <= width]
|
||||
else:
|
||||
lmatches = [f for f in nearest if f['width'] >= width]
|
||||
matching_set = (lmatches or nearest)
|
||||
|
||||
# Filter on font-style
|
||||
fs = style.get('font-style', 'normal')
|
||||
order = {
|
||||
'oblique':['oblique', 'italic', 'normal'],
|
||||
'normal':['normal', 'oblique', 'italic']
|
||||
}.get(fs, ['italic', 'oblique', 'normal'])
|
||||
for q in order:
|
||||
matches = [f for f in matching_set if f.get('font-style', 'normal') == q]
|
||||
if matches:
|
||||
matching_set = matches
|
||||
break
|
||||
|
||||
# Filter on font weight
|
||||
fw = int(style.get('font-weight', '400'))
|
||||
if fw == 400:
|
||||
q = [400, 500, 300, 200, 100, 600, 700, 800, 900]
|
||||
elif fw == 500:
|
||||
q = [500, 400, 300, 200, 100, 600, 700, 800, 900]
|
||||
elif fw < 400:
|
||||
q = [fw] + list(range(fw-100, -100, -100)) + list(range(fw+100,
|
||||
100, 1000))
|
||||
else:
|
||||
q = [fw] + list(range(fw+100, 100, 1000)) + list(range(fw-100,
|
||||
-100, -100))
|
||||
for wt in q:
|
||||
matches = [f for f in matching_set if f['weight'] == wt]
|
||||
if matches:
|
||||
return matches[0]
|
||||
|
||||
def find_chars(self, elem):
|
||||
ans = set()
|
||||
if elem.text:
|
||||
ans |= set(elem.text)
|
||||
for child in elem:
|
||||
if child.tail:
|
||||
ans |= set(child.tail)
|
||||
return ans
|
||||
|
||||
def find_usage_in(self, elem, inherited_style):
|
||||
style = elem_style(self.style_rules, elem.get('class', '') or '', inherited_style)
|
||||
for child in elem:
|
||||
self.find_usage_in(child, style)
|
||||
font = self.used_font(style)
|
||||
if font:
|
||||
chars = self.find_chars(elem)
|
||||
if chars:
|
||||
font['chars'] |= chars
|
||||
10
ebook_converter/ebooks/pdf/render/__init__.py
Normal file
10
ebook_converter/ebooks/pdf/render/__init__.py
Normal file
@@ -0,0 +1,10 @@
|
||||
#!/usr/bin/env python2
|
||||
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:fdm=marker:ai
|
||||
from __future__ import absolute_import, division, print_function, unicode_literals
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2012, Kovid Goyal <kovid at kovidgoyal.net>'
|
||||
__docformat__ = 'restructuredtext en'
|
||||
|
||||
|
||||
|
||||
247
ebook_converter/ebooks/pdf/render/common.py
Normal file
247
ebook_converter/ebooks/pdf/render/common.py
Normal file
@@ -0,0 +1,247 @@
|
||||
#!/usr/bin/env python2
|
||||
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:fdm=marker:ai
|
||||
from __future__ import absolute_import, division, print_function, unicode_literals
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2012, Kovid Goyal <kovid at kovidgoyal.net>'
|
||||
__docformat__ = 'restructuredtext en'
|
||||
|
||||
import codecs, zlib, numbers
|
||||
from io import BytesIO
|
||||
from datetime import datetime
|
||||
|
||||
from calibre.constants import plugins, ispy3
|
||||
from calibre.utils.logging import default_log
|
||||
from polyglot.builtins import iteritems, unicode_type, codepoint_to_chr
|
||||
from polyglot.binary import as_hex_bytes
|
||||
|
||||
pdf_float = plugins['speedup'][0].pdf_float
|
||||
|
||||
EOL = b'\n'
|
||||
|
||||
# Sizes {{{
|
||||
inch = 72.0
|
||||
cm = inch / 2.54
|
||||
mm = cm * 0.1
|
||||
pica = 12.0
|
||||
didot = 0.375 * mm
|
||||
cicero = 12 * didot
|
||||
|
||||
_W, _H = (21*cm, 29.7*cm)
|
||||
|
||||
A6 = (_W*.5, _H*.5)
|
||||
A5 = (_H*.5, _W)
|
||||
A4 = (_W, _H)
|
||||
A3 = (_H, _W*2)
|
||||
A2 = (_W*2, _H*2)
|
||||
A1 = (_H*2, _W*4)
|
||||
A0 = (_W*4, _H*4)
|
||||
|
||||
LETTER = (8.5*inch, 11*inch)
|
||||
LEGAL = (8.5*inch, 14*inch)
|
||||
ELEVENSEVENTEEN = (11*inch, 17*inch)
|
||||
|
||||
_BW, _BH = (25*cm, 35.3*cm)
|
||||
B6 = (_BW*.5, _BH*.5)
|
||||
B5 = (_BH*.5, _BW)
|
||||
B4 = (_BW, _BH)
|
||||
B3 = (_BH*2, _BW)
|
||||
B2 = (_BW*2, _BH*2)
|
||||
B1 = (_BH*4, _BW*2)
|
||||
B0 = (_BW*4, _BH*4)
|
||||
|
||||
PAPER_SIZES = {k:globals()[k.upper()] for k in ('a0 a1 a2 a3 a4 a5 a6 b0 b1 b2'
|
||||
' b3 b4 b5 b6 letter legal').split()}
|
||||
|
||||
# }}}
|
||||
|
||||
|
||||
def fmtnum(o):
|
||||
if isinstance(o, float):
|
||||
return pdf_float(o)
|
||||
return unicode_type(o)
|
||||
|
||||
|
||||
def serialize(o, stream):
|
||||
if isinstance(o, float):
|
||||
stream.write_raw(pdf_float(o).encode('ascii'))
|
||||
elif isinstance(o, bool):
|
||||
# Must check bool before int as bools are subclasses of int
|
||||
stream.write_raw(b'true' if o else b'false')
|
||||
elif isinstance(o, numbers.Integral):
|
||||
stream.write_raw(unicode_type(o).encode('ascii') if ispy3 else bytes(o))
|
||||
elif hasattr(o, 'pdf_serialize'):
|
||||
o.pdf_serialize(stream)
|
||||
elif o is None:
|
||||
stream.write_raw(b'null')
|
||||
elif isinstance(o, datetime):
|
||||
val = o.strftime("D:%Y%m%d%H%M%%02d%z")%min(59, o.second)
|
||||
if datetime.tzinfo is not None:
|
||||
val = "(%s'%s')"%(val[:-2], val[-2:])
|
||||
stream.write(val.encode('ascii'))
|
||||
else:
|
||||
raise ValueError('Unknown object: %r'%o)
|
||||
|
||||
|
||||
class Name(unicode_type):
|
||||
|
||||
def pdf_serialize(self, stream):
|
||||
raw = self.encode('ascii')
|
||||
if len(raw) > 126:
|
||||
raise ValueError('Name too long: %r'%self)
|
||||
raw = bytearray(raw)
|
||||
sharp = ord(b'#')
|
||||
buf = (
|
||||
codepoint_to_chr(x).encode('ascii') if 33 < x < 126 and x != sharp else
|
||||
'#{:x}'.format(x).encode('ascii') for x in raw)
|
||||
stream.write(b'/'+b''.join(buf))
|
||||
|
||||
|
||||
def escape_pdf_string(bytestring):
|
||||
indices = []
|
||||
bad = []
|
||||
ba = bytearray(bytestring)
|
||||
bad_map = {10:ord('n'), 13:ord('r'), 12:ord('f'), 8:ord('b'), 9:ord('\t'), 92:ord('\\')}
|
||||
for i, num in enumerate(ba):
|
||||
if num == 40: # (
|
||||
indices.append((i, 40))
|
||||
elif num == 41: # )
|
||||
if indices:
|
||||
indices.pop()
|
||||
else:
|
||||
bad.append((i, 41))
|
||||
elif num in bad_map: # '\n\r\f\b\t\\' see Table 3.2 in PDF 1.7 spec
|
||||
bad.append((i, bad_map[num]))
|
||||
bad = sorted(indices + bad, reverse=True)
|
||||
if not bad:
|
||||
return bytestring
|
||||
for i, repl in bad:
|
||||
ba[i:i+1] = (92, repl) # 92 = ord('\')
|
||||
return bytes(ba)
|
||||
|
||||
|
||||
class String(unicode_type):
|
||||
|
||||
def pdf_serialize(self, stream):
|
||||
try:
|
||||
raw = self.encode('latin1')
|
||||
if raw.startswith(codecs.BOM_UTF16_BE):
|
||||
raw = codecs.BOM_UTF16_BE + self.encode('utf-16-be')
|
||||
except UnicodeEncodeError:
|
||||
raw = codecs.BOM_UTF16_BE + self.encode('utf-16-be')
|
||||
stream.write(b'('+escape_pdf_string(raw)+b')')
|
||||
|
||||
|
||||
class UTF16String(unicode_type):
|
||||
|
||||
def pdf_serialize(self, stream):
|
||||
raw = codecs.BOM_UTF16_BE + self.encode('utf-16-be')
|
||||
if False:
|
||||
# Disabled as the parentheses based strings give easier to debug
|
||||
# PDF files
|
||||
stream.write(b'<' + as_hex_bytes(raw) + b'>')
|
||||
else:
|
||||
stream.write(b'('+escape_pdf_string(raw)+b')')
|
||||
|
||||
|
||||
class Dictionary(dict):
|
||||
|
||||
def pdf_serialize(self, stream):
|
||||
stream.write(b'<<' + EOL)
|
||||
sorted_keys = sorted(self,
|
||||
key=lambda x:({'Type':'1', 'Subtype':'2'}.get(
|
||||
x, x)+x))
|
||||
for k in sorted_keys:
|
||||
serialize(Name(k), stream)
|
||||
stream.write(b' ')
|
||||
serialize(self[k], stream)
|
||||
stream.write(EOL)
|
||||
stream.write(b'>>' + EOL)
|
||||
|
||||
|
||||
class InlineDictionary(Dictionary):
|
||||
|
||||
def pdf_serialize(self, stream):
|
||||
stream.write(b'<< ')
|
||||
for k, v in iteritems(self):
|
||||
serialize(Name(k), stream)
|
||||
stream.write(b' ')
|
||||
serialize(v, stream)
|
||||
stream.write(b' ')
|
||||
stream.write(b'>>')
|
||||
|
||||
|
||||
class Array(list):
|
||||
|
||||
def pdf_serialize(self, stream):
|
||||
stream.write(b'[')
|
||||
for i, o in enumerate(self):
|
||||
if i != 0:
|
||||
stream.write(b' ')
|
||||
serialize(o, stream)
|
||||
stream.write(b']')
|
||||
|
||||
|
||||
class Stream(BytesIO):
|
||||
|
||||
def __init__(self, compress=False):
|
||||
BytesIO.__init__(self)
|
||||
self.compress = compress
|
||||
self.filters = Array()
|
||||
|
||||
def add_extra_keys(self, d):
|
||||
pass
|
||||
|
||||
def pdf_serialize(self, stream):
|
||||
raw = self.getvalue()
|
||||
dl = len(raw)
|
||||
filters = self.filters
|
||||
if self.compress:
|
||||
filters.append(Name('FlateDecode'))
|
||||
raw = zlib.compress(raw)
|
||||
|
||||
d = InlineDictionary({'Length':len(raw), 'DL':dl})
|
||||
self.add_extra_keys(d)
|
||||
if filters:
|
||||
d['Filter'] = filters
|
||||
serialize(d, stream)
|
||||
stream.write(EOL+b'stream'+EOL)
|
||||
stream.write(raw)
|
||||
stream.write(EOL+b'endstream'+EOL)
|
||||
|
||||
def write_line(self, raw=b''):
|
||||
self.write(raw if isinstance(raw, bytes) else raw.encode('ascii'))
|
||||
self.write(EOL)
|
||||
|
||||
def write(self, raw):
|
||||
super(Stream, self).write(raw if isinstance(raw, bytes) else
|
||||
raw.encode('ascii'))
|
||||
|
||||
def write_raw(self, raw):
|
||||
BytesIO.write(self, raw)
|
||||
|
||||
|
||||
class Reference(object):
|
||||
|
||||
def __init__(self, num, obj):
|
||||
self.num, self.obj = num, obj
|
||||
|
||||
def pdf_serialize(self, stream):
|
||||
raw = '%d 0 R'%self.num
|
||||
stream.write(raw.encode('ascii'))
|
||||
|
||||
def __repr__(self):
|
||||
return '%d 0 R'%self.num
|
||||
|
||||
def __str__(self):
|
||||
return repr(self)
|
||||
# }}}
|
||||
|
||||
|
||||
def current_log(newlog=None):
|
||||
if newlog:
|
||||
current_log.ans = newlog
|
||||
return current_log.ans or default_log
|
||||
|
||||
|
||||
current_log.ans = None
|
||||
Reference in New Issue
Block a user