mirror of
https://github.com/gryf/ebook-converter.git
synced 2026-03-13 13:15:53 +01:00
Every mime related function in main __init__.py has a flag check for the check if initialization has already done. This is nonsense, since it should be done implicitly early on the converter is starting. This commit straight the things out, and initialization is done in cli module. Also, function guess_type was removed, since it's just a proxy for mimetypes.guess_type function.
275 lines
12 KiB
Python
275 lines
12 KiB
Python
import mimetypes
|
|
import textwrap, os
|
|
|
|
from lxml import etree
|
|
from lxml.builder import ElementMaker
|
|
|
|
from ebook_converter.constants_old import numeric_version, __appname__
|
|
from ebook_converter.ebooks.docx.names import DOCXNamespace
|
|
from ebook_converter.ebooks.metadata import authors_to_string
|
|
from ebook_converter.ebooks.pdf.render.common import PAPER_SIZES
|
|
from ebook_converter.utils.date import utcnow
|
|
from ebook_converter.utils.localization import canonicalize_lang, lang_as_iso639_1
|
|
from ebook_converter.utils.zipfile import ZipFile
|
|
|
|
|
|
def xml2str(root, pretty_print=False, with_tail=False):
|
|
if hasattr(etree, 'cleanup_namespaces'):
|
|
etree.cleanup_namespaces(root)
|
|
ans = etree.tostring(root, encoding='utf-8', xml_declaration=True,
|
|
pretty_print=pretty_print, with_tail=with_tail)
|
|
return ans
|
|
|
|
|
|
def page_size(opts):
|
|
width, height = PAPER_SIZES[opts.docx_page_size]
|
|
if opts.docx_custom_page_size is not None:
|
|
width, height = map(float, opts.docx_custom_page_size.partition('x')[0::2])
|
|
return width, height
|
|
|
|
|
|
def page_margin(opts, which):
|
|
val = getattr(opts, 'docx_page_margin_' + which)
|
|
if val == 0.0:
|
|
val = getattr(opts, 'margin_' + which)
|
|
return val
|
|
|
|
|
|
def page_effective_area(opts):
|
|
width, height = page_size(opts)
|
|
width -= page_margin(opts, 'left') + page_margin(opts, 'right')
|
|
height -= page_margin(opts, 'top') + page_margin(opts, 'bottom')
|
|
return width, height # in pts
|
|
|
|
|
|
def create_skeleton(opts, namespaces=None):
|
|
namespaces = namespaces or DOCXNamespace().namespaces
|
|
|
|
def w(x):
|
|
return '{%s}%s' % (namespaces['w'], x)
|
|
dn = {k:v for k, v in namespaces.items() if k in {'w', 'r', 'm', 've', 'o', 'wp', 'w10', 'wne', 'a', 'pic'}}
|
|
E = ElementMaker(namespace=dn['w'], nsmap=dn)
|
|
doc = E.document()
|
|
body = E.body()
|
|
doc.append(body)
|
|
width, height = page_size(opts)
|
|
width, height = int(20 * width), int(20 * height)
|
|
|
|
def margin(which):
|
|
val = page_margin(opts, which)
|
|
return w(which), str(int(val * 20))
|
|
body.append(E.sectPr(
|
|
E.pgSz(**{w('w'):str(width), w('h'):str(height)}),
|
|
E.pgMar(**dict(map(margin, 'left top right bottom'.split()))),
|
|
E.cols(**{w('space'):'720'}),
|
|
E.docGrid(**{w('linePitch'):"360"}),
|
|
))
|
|
|
|
dn = {k:v for k, v in namespaces.items() if k in tuple('wra') + ('wp',)}
|
|
E = ElementMaker(namespace=dn['w'], nsmap=dn)
|
|
styles = E.styles(
|
|
E.docDefaults(
|
|
E.rPrDefault(
|
|
E.rPr(
|
|
E.rFonts(**{w('asciiTheme'):"minorHAnsi", w('eastAsiaTheme'):"minorEastAsia", w('hAnsiTheme'):"minorHAnsi", w('cstheme'):"minorBidi"}),
|
|
E.sz(**{w('val'):'22'}),
|
|
E.szCs(**{w('val'):'22'}),
|
|
E.lang(**{w('val'):'en-US', w('eastAsia'):"en-US", w('bidi'):"ar-SA"})
|
|
)
|
|
),
|
|
E.pPrDefault(
|
|
E.pPr(
|
|
E.spacing(**{w('after'):"0", w('line'):"276", w('lineRule'):"auto"})
|
|
)
|
|
)
|
|
)
|
|
)
|
|
return doc, styles, body
|
|
|
|
|
|
def update_doc_props(root, mi, namespace):
|
|
def setm(name, text=None, ns='dc'):
|
|
ans = root.makeelement('{%s}%s' % (namespace.namespaces[ns], name))
|
|
for child in tuple(root):
|
|
if child.tag == ans.tag:
|
|
root.remove(child)
|
|
ans.text = text
|
|
root.append(ans)
|
|
return ans
|
|
setm('title', mi.title)
|
|
setm('creator', authors_to_string(mi.authors))
|
|
if mi.tags:
|
|
setm('keywords', ', '.join(mi.tags), ns='cp')
|
|
if mi.comments:
|
|
setm('description', mi.comments)
|
|
if mi.languages:
|
|
l = canonicalize_lang(mi.languages[0])
|
|
setm('language', lang_as_iso639_1(l) or l)
|
|
|
|
|
|
class DocumentRelationships(object):
|
|
|
|
def __init__(self, namespace):
|
|
self.rmap = {}
|
|
self.namespace = namespace
|
|
for typ, target in {namespace.names['STYLES']: 'styles.xml',
|
|
namespace.names['NUMBERING']: 'numbering.xml',
|
|
namespace.names['WEB_SETTINGS']: 'webSettings.xml',
|
|
namespace.names['FONTS']: 'fontTable.xml',
|
|
}.items():
|
|
self.add_relationship(target, typ)
|
|
|
|
def get_relationship_id(self, target, rtype, target_mode=None):
|
|
return self.rmap.get((target, rtype, target_mode))
|
|
|
|
def add_relationship(self, target, rtype, target_mode=None):
|
|
ans = self.get_relationship_id(target, rtype, target_mode)
|
|
if ans is None:
|
|
ans = 'rId%d' % (len(self.rmap) + 1)
|
|
self.rmap[(target, rtype, target_mode)] = ans
|
|
return ans
|
|
|
|
def add_image(self, target):
|
|
return self.add_relationship(target, self.namespace.names['IMAGES'])
|
|
|
|
def serialize(self):
|
|
namespaces = self.namespace.namespaces
|
|
E = ElementMaker(namespace=namespaces['pr'], nsmap={None:namespaces['pr']})
|
|
relationships = E.Relationships()
|
|
for (target, rtype, target_mode), rid in self.rmap.items():
|
|
r = E.Relationship(Id=rid, Type=rtype, Target=target)
|
|
if target_mode is not None:
|
|
r.set('TargetMode', target_mode)
|
|
relationships.append(r)
|
|
return xml2str(relationships)
|
|
|
|
|
|
class DOCX(object):
|
|
|
|
def __init__(self, opts, log):
|
|
self.namespace = DOCXNamespace()
|
|
namespaces = self.namespace.namespaces
|
|
self.opts, self.log = opts, log
|
|
self.document_relationships = DocumentRelationships(self.namespace)
|
|
self.font_table = etree.Element('{%s}fonts' % namespaces['w'], nsmap={k:namespaces[k] for k in 'wr'})
|
|
self.numbering = etree.Element('{%s}numbering' % namespaces['w'], nsmap={k:namespaces[k] for k in 'wr'})
|
|
E = ElementMaker(namespace=namespaces['pr'], nsmap={None:namespaces['pr']})
|
|
self.embedded_fonts = E.Relationships()
|
|
self.fonts = {}
|
|
self.images = {}
|
|
|
|
# Boilerplate {{{
|
|
@property
|
|
def contenttypes(self):
|
|
E = ElementMaker(namespace=self.namespace.namespaces['ct'], nsmap={None:self.namespace.namespaces['ct']})
|
|
types = E.Types()
|
|
for partname, mt in {
|
|
"/word/footnotes.xml": "application/vnd.openxmlformats-officedocument.wordprocessingml.footnotes+xml",
|
|
"/word/document.xml": "application/vnd.openxmlformats-officedocument.wordprocessingml.document.main+xml",
|
|
"/word/numbering.xml": "application/vnd.openxmlformats-officedocument.wordprocessingml.numbering+xml",
|
|
"/word/styles.xml": "application/vnd.openxmlformats-officedocument.wordprocessingml.styles+xml",
|
|
"/word/endnotes.xml": "application/vnd.openxmlformats-officedocument.wordprocessingml.endnotes+xml",
|
|
"/word/settings.xml": "application/vnd.openxmlformats-officedocument.wordprocessingml.settings+xml",
|
|
"/word/theme/theme1.xml": "application/vnd.openxmlformats-officedocument.theme+xml",
|
|
"/word/fontTable.xml": "application/vnd.openxmlformats-officedocument.wordprocessingml.fontTable+xml",
|
|
"/word/webSettings.xml": "application/vnd.openxmlformats-officedocument.wordprocessingml.webSettings+xml",
|
|
"/docProps/core.xml": "application/vnd.openxmlformats-package.core-properties+xml",
|
|
"/docProps/app.xml": "application/vnd.openxmlformats-officedocument.extended-properties+xml",
|
|
}.items():
|
|
types.append(E.Override(PartName=partname, ContentType=mt))
|
|
added = {'png', 'gif', 'jpeg', 'jpg', 'svg', 'xml'}
|
|
for ext in added:
|
|
types.append(E.Default(Extension=ext,
|
|
ContentType=mimetypes.guess_type('a.' +
|
|
ext)[0]))
|
|
for ext, mt in {"rels": "application/vnd.openxmlformats-package"
|
|
".relationships+xml",
|
|
"odttf": "application/vnd.openxmlformats-"
|
|
"officedocument.obfuscatedFont"}.items():
|
|
added.add(ext)
|
|
types.append(E.Default(Extension=ext, ContentType=mt))
|
|
for fname in self.images:
|
|
ext = fname.rpartition(os.extsep)[-1]
|
|
if ext not in added:
|
|
added.add(ext)
|
|
mt = mimetypes.guess_type('a.' + ext)[0]
|
|
if mt:
|
|
types.append(E.Default(Extension=ext, ContentType=mt))
|
|
return xml2str(types)
|
|
|
|
@property
|
|
def appproperties(self):
|
|
E = ElementMaker(namespace=self.namespace.namespaces['ep'], nsmap={None:self.namespace.namespaces['ep']})
|
|
props = E.Properties(
|
|
E.Application(__appname__),
|
|
E.AppVersion('%02d.%04d' % numeric_version[:2]),
|
|
E.DocSecurity('0'),
|
|
E.HyperlinksChanged('false'),
|
|
E.LinksUpToDate('true'),
|
|
E.ScaleCrop('false'),
|
|
E.SharedDoc('false'),
|
|
)
|
|
if self.mi.publisher:
|
|
props.append(E.Company(self.mi.publisher))
|
|
return xml2str(props)
|
|
|
|
@property
|
|
def containerrels(self):
|
|
return textwrap.dedent('''\
|
|
<?xml version='1.0' encoding='utf-8'?>
|
|
<Relationships xmlns="http://schemas.openxmlformats.org/package/2006/relationships">
|
|
<Relationship Id="rId3" Type="{APPPROPS}" Target="docProps/app.xml"/>
|
|
<Relationship Id="rId2" Type="{DOCPROPS}" Target="docProps/core.xml"/>
|
|
<Relationship Id="rId1" Type="{DOCUMENT}" Target="word/document.xml"/>
|
|
</Relationships>'''.format(**self.namespace.names)).encode('utf-8')
|
|
|
|
@property
|
|
def websettings(self):
|
|
E = ElementMaker(namespace=self.namespace.namespaces['w'], nsmap={'w':self.namespace.namespaces['w']})
|
|
ws = E.webSettings(
|
|
E.optimizeForBrowser, E.allowPNG, E.doNotSaveAsSingleFile)
|
|
return xml2str(ws)
|
|
|
|
# }}}
|
|
|
|
def convert_metadata(self, mi):
|
|
namespaces = self.namespace.namespaces
|
|
E = ElementMaker(namespace=namespaces['cp'], nsmap={x:namespaces[x] for x in 'cp dc dcterms xsi'.split()})
|
|
cp = E.coreProperties(E.revision("1"), E.lastModifiedBy('calibre'))
|
|
ts = utcnow().isoformat('T').rpartition('.')[0] + 'Z'
|
|
for x in 'created modified'.split():
|
|
x = cp.makeelement('{%s}%s' % (namespaces['dcterms'], x), **{'{%s}type' % namespaces['xsi']:'dcterms:W3CDTF'})
|
|
x.text = ts
|
|
cp.append(x)
|
|
self.mi = mi
|
|
update_doc_props(cp, self.mi, self.namespace)
|
|
return xml2str(cp)
|
|
|
|
def create_empty_document(self, mi):
|
|
self.document, self.styles = create_skeleton(self.opts)[:2]
|
|
|
|
def write(self, path_or_stream, mi, create_empty_document=False):
|
|
if create_empty_document:
|
|
self.create_empty_document(mi)
|
|
with ZipFile(path_or_stream, 'w') as zf:
|
|
zf.writestr('[Content_Types].xml', self.contenttypes)
|
|
zf.writestr('_rels/.rels', self.containerrels)
|
|
zf.writestr('docProps/core.xml', self.convert_metadata(mi))
|
|
zf.writestr('docProps/app.xml', self.appproperties)
|
|
zf.writestr('word/webSettings.xml', self.websettings)
|
|
zf.writestr('word/document.xml', xml2str(self.document))
|
|
zf.writestr('word/styles.xml', xml2str(self.styles))
|
|
zf.writestr('word/numbering.xml', xml2str(self.numbering))
|
|
zf.writestr('word/fontTable.xml', xml2str(self.font_table))
|
|
zf.writestr('word/_rels/document.xml.rels', self.document_relationships.serialize())
|
|
zf.writestr('word/_rels/fontTable.xml.rels', xml2str(self.embedded_fonts))
|
|
for fname, data_getter in self.images.items():
|
|
zf.writestr(fname, data_getter())
|
|
for fname, data in self.fonts.items():
|
|
zf.writestr(fname, data)
|
|
|
|
|
|
if __name__ == '__main__':
|
|
d = DOCX(None, None)
|
|
print(d.websettings)
|