mirror of
https://github.com/gryf/ebook-converter.git
synced 2026-04-23 22:51:30 +02:00
Fixed flake8 issues to several modules
This commit is contained in:
@@ -1,5 +1,11 @@
|
||||
import sys, os, re, math, errno, uuid, numbers
|
||||
from collections import OrderedDict, defaultdict
|
||||
import sys
|
||||
import os
|
||||
import re
|
||||
import math
|
||||
import errno
|
||||
import uuid
|
||||
import numbers
|
||||
import collections
|
||||
import mimetypes
|
||||
|
||||
from lxml import etree
|
||||
@@ -7,23 +13,24 @@ from lxml import html
|
||||
from lxml.html.builder import (
|
||||
HTML, HEAD, TITLE, BODY, LINK, META, P, SPAN, BR, DIV, A, DT, DL, DD, H1)
|
||||
|
||||
from ebook_converter import guess_type
|
||||
from ebook_converter.ebooks.docx.container import DOCX
|
||||
from ebook_converter.ebooks.docx.names import XML, generate_anchor
|
||||
from ebook_converter.ebooks.docx.styles import Styles, inherit, PageProperties
|
||||
from ebook_converter.ebooks.docx.numbering import Numbering
|
||||
from ebook_converter.ebooks.docx.fonts import Fonts, is_symbol_font, map_symbol_text
|
||||
from ebook_converter.ebooks.docx.images import Images
|
||||
from ebook_converter.ebooks.docx.tables import Tables
|
||||
from ebook_converter.ebooks.docx.footnotes import Footnotes
|
||||
from ebook_converter.ebooks.docx.cleanup import cleanup_markup
|
||||
from ebook_converter.ebooks.docx.container import DOCX
|
||||
from ebook_converter.ebooks.docx.fields import Fields
|
||||
from ebook_converter.ebooks.docx.fonts import Fonts
|
||||
from ebook_converter.ebooks.docx.fonts import is_symbol_font
|
||||
from ebook_converter.ebooks.docx.fonts import map_symbol_text
|
||||
from ebook_converter.ebooks.docx.footnotes import Footnotes
|
||||
from ebook_converter.ebooks.docx.images import Images
|
||||
from ebook_converter.ebooks.docx.names import XML, generate_anchor
|
||||
from ebook_converter.ebooks.docx.numbering import Numbering
|
||||
from ebook_converter.ebooks.docx.settings import Settings
|
||||
from ebook_converter.ebooks.docx.styles import Styles, inherit, PageProperties
|
||||
from ebook_converter.ebooks.docx.tables import Tables
|
||||
from ebook_converter.ebooks.docx.theme import Theme
|
||||
from ebook_converter.ebooks.docx.toc import create_toc
|
||||
from ebook_converter.ebooks.docx.fields import Fields
|
||||
from ebook_converter.ebooks.docx.settings import Settings
|
||||
from ebook_converter.ebooks.metadata.opf2 import OPFCreator
|
||||
from ebook_converter.utils.localization import canonicalize_lang, lang_as_iso639_1
|
||||
|
||||
from ebook_converter.utils.localization import canonicalize_lang
|
||||
from ebook_converter.utils.localization import lang_as_iso639_1
|
||||
|
||||
|
||||
NBSP = '\xa0'
|
||||
@@ -54,7 +61,9 @@ def html_lang(docx_lang):
|
||||
|
||||
class Convert(object):
|
||||
|
||||
def __init__(self, path_or_stream, dest_dir=None, log=None, detect_cover=True, notes_text=None, notes_nopb=False, nosupsub=False):
|
||||
def __init__(self, path_or_stream, dest_dir=None, log=None,
|
||||
detect_cover=True, notes_text=None, notes_nopb=False,
|
||||
nosupsub=False):
|
||||
self.docx = DOCX(path_or_stream, log=log)
|
||||
self.namespace = self.docx.namespace
|
||||
self.ms_pat = re.compile(r'\s{2,}')
|
||||
@@ -73,7 +82,7 @@ class Convert(object):
|
||||
self.fields = Fields(self.namespace)
|
||||
self.styles = Styles(self.namespace, self.tables)
|
||||
self.images = Images(self.namespace, self.log)
|
||||
self.object_map = OrderedDict()
|
||||
self.object_map = collections.OrderedDict()
|
||||
self.html = HTML(
|
||||
HEAD(
|
||||
META(charset='utf-8'),
|
||||
@@ -82,9 +91,9 @@ class Convert(object):
|
||||
),
|
||||
self.body
|
||||
)
|
||||
self.html.text='\n\t'
|
||||
self.html[0].text='\n\t\t'
|
||||
self.html[0].tail='\n'
|
||||
self.html.text = '\n\t'
|
||||
self.html[0].text = '\n\t\t'
|
||||
self.html[0].tail = '\n'
|
||||
for child in self.html[0]:
|
||||
child.tail = '\n\t\t'
|
||||
self.html[0][-1].tail = '\n\t'
|
||||
@@ -98,17 +107,18 @@ class Convert(object):
|
||||
|
||||
def __call__(self):
|
||||
doc = self.docx.document
|
||||
relationships_by_id, relationships_by_type = self.docx.document_relationships
|
||||
(relationships_by_id,
|
||||
relationships_by_type) = self.docx.document_relationships
|
||||
self.resolve_alternate_content(doc)
|
||||
self.fields(doc, self.log)
|
||||
self.read_styles(relationships_by_type)
|
||||
self.images(relationships_by_id)
|
||||
self.layers = OrderedDict()
|
||||
self.layers = collections.OrderedDict()
|
||||
self.framed = [[]]
|
||||
self.frame_map = {}
|
||||
self.framed_map = {}
|
||||
self.anchor_map = {}
|
||||
self.link_map = defaultdict(list)
|
||||
self.link_map = collections.defaultdict(list)
|
||||
self.link_source_map = {}
|
||||
self.toc_anchor = None
|
||||
self.block_runs = []
|
||||
@@ -142,7 +152,8 @@ class Convert(object):
|
||||
dl = DL(id=anchor)
|
||||
dl.set('class', 'footnote')
|
||||
self.body.append(dl)
|
||||
dl.append(DT('[', A('←' + text, href='#back_%s' % anchor, title=text)))
|
||||
dl.append(DT('[', A('←' + text, href='#back_%s' % anchor,
|
||||
title=text)))
|
||||
dl[-1][0].tail = ']'
|
||||
dl.append(DD())
|
||||
paras = []
|
||||
@@ -159,7 +170,8 @@ class Convert(object):
|
||||
self.mark_block_runs(paras)
|
||||
|
||||
for p, wp in self.object_map.items():
|
||||
if len(p) > 0 and not p.text and len(p[0]) > 0 and not p[0].text and p[0][0].get('class', None) == 'tab':
|
||||
if (len(p) > 0 and not p.text and len(p[0]) > 0 and
|
||||
not p[0].text and p[0][0].get('class', None) == 'tab'):
|
||||
# Paragraph uses tabs for indentation, convert to text-indent
|
||||
parent = p[0]
|
||||
tabs = []
|
||||
@@ -172,7 +184,9 @@ class Convert(object):
|
||||
break
|
||||
indent = len(tabs) * self.settings.default_tab_stop
|
||||
style = self.styles.resolve(wp)
|
||||
if style.text_indent is inherit or (hasattr(style.text_indent, 'endswith') and style.text_indent.endswith('pt')):
|
||||
if (style.text_indent is inherit or
|
||||
(hasattr(style.text_indent, 'endswith') and
|
||||
style.text_indent.endswith('pt'))):
|
||||
if style.text_indent is not inherit:
|
||||
indent = float(style.text_indent[:-2]) + indent
|
||||
style.text_indent = '%.3gpt' % indent
|
||||
@@ -197,7 +211,8 @@ class Convert(object):
|
||||
except (TypeError, ValueError):
|
||||
lvl = 0
|
||||
numbered.append((html_obj, num_id, lvl))
|
||||
self.numbering.apply_markup(numbered, self.body, self.styles, self.object_map, self.images)
|
||||
self.numbering.apply_markup(numbered, self.body, self.styles,
|
||||
self.object_map, self.images)
|
||||
self.apply_frames()
|
||||
|
||||
if len(self.body) > 0:
|
||||
@@ -232,13 +247,15 @@ class Convert(object):
|
||||
self.fields.polish_markup(self.object_map)
|
||||
|
||||
self.log.debug('Cleaning up redundant markup generated by Word')
|
||||
self.cover_image = cleanup_markup(self.log, self.html, self.styles, self.dest_dir, self.detect_cover, self.namespace.XPath)
|
||||
self.cover_image = cleanup_markup(self.log, self.html, self.styles,
|
||||
self.dest_dir, self.detect_cover,
|
||||
self.namespace.XPath)
|
||||
|
||||
return self.write(doc)
|
||||
|
||||
def read_page_properties(self, doc):
|
||||
current = []
|
||||
self.page_map = OrderedDict()
|
||||
self.page_map = collections.OrderedDict()
|
||||
self.section_starts = []
|
||||
|
||||
for p in self.namespace.descendants(doc, 'w:p', 'w:tbl'):
|
||||
@@ -267,7 +284,8 @@ class Convert(object):
|
||||
def resolve_alternate_content(self, doc):
|
||||
# For proprietary extensions in Word documents use the fallback, spec
|
||||
# compliant form
|
||||
# See https://wiki.openoffice.org/wiki/OOXML/Markup_Compatibility_and_Extensibility
|
||||
# See https://wiki.openoffice.org/wiki/
|
||||
# OOXML/Markup_Compatibility_and_Extensibility
|
||||
for ac in self.namespace.descendants(doc, 'mc:AlternateContent'):
|
||||
choices = self.namespace.XPath('./mc:Choice')(ac)
|
||||
fallbacks = self.namespace.XPath('./mc:Fallback')(ac)
|
||||
@@ -284,7 +302,8 @@ class Convert(object):
|
||||
cname[-1] = defname
|
||||
if self.docx.exists('/'.join(cname)):
|
||||
name = name
|
||||
if name and name.startswith('word/word') and not self.docx.exists(name):
|
||||
if (name and name.startswith('word/word') and
|
||||
not self.docx.exists(name)):
|
||||
name = name.partition('/')[2]
|
||||
return name
|
||||
|
||||
@@ -327,7 +346,8 @@ class Convert(object):
|
||||
self.log.warn('Endnotes %s do not exist' % enname)
|
||||
else:
|
||||
enrel = self.docx.get_relationships(enname)
|
||||
footnotes(etree.fromstring(foraw) if foraw else None, forel, etree.fromstring(enraw) if enraw else None, enrel)
|
||||
footnotes(etree.fromstring(foraw) if foraw else None, forel,
|
||||
etree.fromstring(enraw) if enraw else None, enrel)
|
||||
|
||||
if fname is not None:
|
||||
embed_relationships = self.docx.get_relationships(fname)[0]
|
||||
@@ -336,7 +356,8 @@ class Convert(object):
|
||||
except KeyError:
|
||||
self.log.warn('Fonts table %s does not exist' % fname)
|
||||
else:
|
||||
fonts(etree.fromstring(raw), embed_relationships, self.docx, self.dest_dir)
|
||||
fonts(etree.fromstring(raw), embed_relationships, self.docx,
|
||||
self.dest_dir)
|
||||
|
||||
if tname is not None:
|
||||
try:
|
||||
@@ -364,16 +385,20 @@ class Convert(object):
|
||||
except KeyError:
|
||||
self.log.warn('Numbering styles %s do not exist' % nname)
|
||||
else:
|
||||
numbering(etree.fromstring(raw), self.styles, self.docx.get_relationships(nname)[0])
|
||||
numbering(etree.fromstring(raw), self.styles,
|
||||
self.docx.get_relationships(nname)[0])
|
||||
|
||||
self.styles.resolve_numbering(numbering)
|
||||
|
||||
def write(self, doc):
|
||||
toc = create_toc(doc, self.body, self.resolved_link_map, self.styles, self.object_map, self.log, self.namespace)
|
||||
raw = html.tostring(self.html, encoding='utf-8', doctype='<!DOCTYPE html>')
|
||||
toc = create_toc(doc, self.body, self.resolved_link_map, self.styles,
|
||||
self.object_map, self.log, self.namespace)
|
||||
raw = html.tostring(self.html, encoding='utf-8',
|
||||
doctype='<!DOCTYPE html>')
|
||||
with open(os.path.join(self.dest_dir, 'index.html'), 'wb') as f:
|
||||
f.write(raw)
|
||||
css = self.styles.generate_css(self.dest_dir, self.docx, self.notes_nopb, self.nosupsub)
|
||||
css = self.styles.generate_css(self.dest_dir, self.docx,
|
||||
self.notes_nopb, self.nosupsub)
|
||||
if css:
|
||||
with open(os.path.join(self.dest_dir, 'docx.css'), 'wb') as f:
|
||||
f.write(css.encode('utf-8'))
|
||||
@@ -394,23 +419,29 @@ class Convert(object):
|
||||
title='Table of Contents',
|
||||
type='toc'))
|
||||
toc_file = os.path.join(self.dest_dir, 'toc.ncx')
|
||||
with open(os.path.join(self.dest_dir, 'metadata.opf'), 'wb') as of, open(toc_file, 'wb') as ncx:
|
||||
with open(os.path.join(self.dest_dir,
|
||||
'metadata.opf'), 'wb') as of, open(toc_file,
|
||||
'wb') as ncx:
|
||||
opf.render(of, ncx, 'toc.ncx', process_guide=process_guide)
|
||||
if os.path.getsize(toc_file) == 0:
|
||||
os.remove(toc_file)
|
||||
return os.path.join(self.dest_dir, 'metadata.opf')
|
||||
|
||||
def read_block_anchors(self, doc):
|
||||
doc_anchors = frozenset(self.namespace.XPath('./w:body/w:bookmarkStart[@w:name]')(doc))
|
||||
doc_anchors = frozenset(self.namespace.XPath('./w:body/w:bookmarkStart'
|
||||
'[@w:name]')(doc))
|
||||
if doc_anchors:
|
||||
current_bm = set()
|
||||
rmap = {v:k for k, v in self.object_map.items()}
|
||||
for p in self.namespace.descendants(doc, 'w:p', 'w:bookmarkStart[@w:name]'):
|
||||
rmap = {v: k for k, v in self.object_map.items()}
|
||||
for p in self.namespace.descendants(doc, 'w:p',
|
||||
'w:bookmarkStart[@w:name]'):
|
||||
if p.tag.endswith('}p'):
|
||||
if current_bm and p in rmap:
|
||||
para = rmap[p]
|
||||
if 'id' not in para.attrib:
|
||||
para.set('id', generate_anchor(next(iter(current_bm)), frozenset(self.anchor_map.values())))
|
||||
_bm = next(iter(current_bm))
|
||||
_am = frozenset(self.anchor_map.values())
|
||||
para.set('id', generate_anchor(_bm, _am))
|
||||
for name in current_bm:
|
||||
self.anchor_map[name] = para.get('id')
|
||||
current_bm = set()
|
||||
@@ -442,13 +473,15 @@ class Convert(object):
|
||||
except AttributeError:
|
||||
break
|
||||
|
||||
for x in self.namespace.descendants(p, 'w:r', 'w:bookmarkStart', 'w:hyperlink', 'w:instrText'):
|
||||
for x in self.namespace.descendants(p, 'w:r', 'w:bookmarkStart',
|
||||
'w:hyperlink', 'w:instrText'):
|
||||
if p_parent(x) is not p:
|
||||
continue
|
||||
if x.tag.endswith('}r'):
|
||||
span = self.convert_run(x)
|
||||
if current_anchor is not None:
|
||||
(dest if len(dest) == 0 else span).set('id', current_anchor)
|
||||
(dest if len(dest) == 0 else span).set('id',
|
||||
current_anchor)
|
||||
current_anchor = None
|
||||
if current_hyperlink is not None:
|
||||
try:
|
||||
@@ -462,11 +495,14 @@ class Convert(object):
|
||||
self.layers[p].append(x)
|
||||
elif x.tag.endswith('}bookmarkStart'):
|
||||
anchor = self.namespace.get(x, 'w:name')
|
||||
if anchor and anchor not in self.anchor_map and anchor != '_GoBack':
|
||||
if (anchor and anchor not in self.anchor_map and
|
||||
anchor != '_GoBack'):
|
||||
# _GoBack is a special bookmark inserted by Word 2010 for
|
||||
# the return to previous edit feature, we ignore it
|
||||
old_anchor = current_anchor
|
||||
self.anchor_map[anchor] = current_anchor = generate_anchor(anchor, frozenset(self.anchor_map.values()))
|
||||
current_anchor = generate_anchor(
|
||||
anchor, frozenset(self .anchor_map.values()))
|
||||
self.anchor_map[anchor] = current_anchor
|
||||
if old_anchor is not None:
|
||||
# The previous anchor was not applied to any element
|
||||
for a, t in tuple(self.anchor_map.items()):
|
||||
@@ -474,10 +510,13 @@ class Convert(object):
|
||||
self.anchor_map[a] = current_anchor
|
||||
elif x.tag.endswith('}hyperlink'):
|
||||
current_hyperlink = x
|
||||
elif x.tag.endswith('}instrText') and x.text and x.text.strip().startswith('TOC '):
|
||||
elif (x.tag.endswith('}instrText') and x.text and
|
||||
x.text.strip().startswith('TOC ')):
|
||||
old_anchor = current_anchor
|
||||
anchor = str(uuid.uuid4())
|
||||
self.anchor_map[anchor] = current_anchor = generate_anchor('toc', frozenset(self.anchor_map.values()))
|
||||
current_anchor = generate_anchor(
|
||||
'toc', frozenset(self.anchor_map.values()))
|
||||
self.anchor_map[anchor] = current_anchor
|
||||
self.toc_anchor = current_anchor
|
||||
if old_anchor is not None:
|
||||
# The previous anchor was not applied to any element
|
||||
@@ -489,7 +528,8 @@ class Convert(object):
|
||||
dest.set('id', current_anchor)
|
||||
current_anchor = None
|
||||
|
||||
m = re.match(r'heading\s+(\d+)$', style.style_name or '', re.IGNORECASE)
|
||||
m = re.match(r'heading\s+(\d+)$', style.style_name or '',
|
||||
re.IGNORECASE)
|
||||
if m is not None:
|
||||
n = min(6, max(1, int(m.group(1))))
|
||||
dest.tag = 'h%d' % n
|
||||
@@ -533,7 +573,8 @@ class Convert(object):
|
||||
if len(dest) > 0 and not dest[-1].tail:
|
||||
if dest[-1].tag == 'br':
|
||||
dest[-1].tail = NBSP
|
||||
elif len(dest[-1]) > 0 and dest[-1][-1].tag == 'br' and not dest[-1][-1].tail:
|
||||
elif (len(dest[-1]) > 0 and dest[-1][-1].tag == 'br' and
|
||||
not dest[-1][-1].tail):
|
||||
dest[-1][-1].tail = NBSP
|
||||
|
||||
return dest
|
||||
@@ -578,12 +619,12 @@ class Convert(object):
|
||||
if anchor and anchor in self.anchor_map:
|
||||
span.set('href', '#' + self.anchor_map[anchor])
|
||||
continue
|
||||
self.log.warn('Hyperlink with unknown target (rid=%s, anchor=%s), ignoring' %
|
||||
(rid, anchor))
|
||||
self.log.warn('Hyperlink with unknown target (rid=%s, anchor=%s), '
|
||||
'ignoring' % (rid, anchor))
|
||||
# hrefs that point nowhere give epubcheck a hernia. The element
|
||||
# should be styled explicitly by Word anyway.
|
||||
# span.set('href', '#')
|
||||
rmap = {v:k for k, v in self.object_map.items()}
|
||||
rmap = {v: k for k, v in self.object_map.items()}
|
||||
for hyperlink, runs in self.fields.hyperlink_fields:
|
||||
spans = [rmap[r] for r in runs if r in rmap]
|
||||
if not spans:
|
||||
@@ -604,7 +645,8 @@ class Convert(object):
|
||||
if anchor in self.anchor_map:
|
||||
span.set('href', '#' + self.anchor_map[anchor])
|
||||
continue
|
||||
self.log.warn('Hyperlink field with unknown anchor: %s' % anchor)
|
||||
self.log.warn('Hyperlink field with unknown anchor: %s' %
|
||||
anchor)
|
||||
else:
|
||||
if url in self.anchor_map:
|
||||
span.set('href', '#' + self.anchor_map[url])
|
||||
@@ -652,7 +694,8 @@ class Convert(object):
|
||||
# actually needs it, i.e. if it has more than one
|
||||
# consecutive space or it has newlines or tabs.
|
||||
multi_spaces = self.ms_pat.search(ctext) is not None
|
||||
preserve = multi_spaces or self.ws_pat.search(ctext) is not None
|
||||
preserve = (multi_spaces or
|
||||
self.ws_pat.search(ctext) is not None)
|
||||
if preserve:
|
||||
text.add_elem(SPAN(ctext, style="white-space:pre-wrap"))
|
||||
ans.append(text.elem)
|
||||
@@ -668,24 +711,30 @@ class Convert(object):
|
||||
else:
|
||||
clear = child.get('clear', None)
|
||||
if clear in {'all', 'left', 'right'}:
|
||||
br = BR(style='clear:%s'%('both' if clear == 'all' else clear))
|
||||
br = BR(style='clear:%s' % ('both' if clear == 'all'
|
||||
else clear))
|
||||
else:
|
||||
br = BR()
|
||||
text.add_elem(br)
|
||||
ans.append(text.elem)
|
||||
elif self.namespace.is_tag(child, 'w:drawing') or self.namespace.is_tag(child, 'w:pict'):
|
||||
for img in self.images.to_html(child, self.current_page, self.docx, self.dest_dir):
|
||||
elif (self.namespace.is_tag(child, 'w:drawing') or
|
||||
self.namespace.is_tag(child, 'w:pict')):
|
||||
for img in self.images.to_html(child, self.current_page,
|
||||
self.docx, self.dest_dir):
|
||||
text.add_elem(img)
|
||||
ans.append(text.elem)
|
||||
elif self.namespace.is_tag(child, 'w:footnoteReference') or self.namespace.is_tag(child, 'w:endnoteReference'):
|
||||
elif (self.namespace.is_tag(child, 'w:footnoteReference') or
|
||||
self.namespace.is_tag(child, 'w:endnoteReference')):
|
||||
anchor, name = self.footnotes.get_ref(child)
|
||||
if anchor and name:
|
||||
l = A(name, id='back_%s' % anchor, href='#' + anchor, title=name)
|
||||
l.set('class', 'noteref')
|
||||
text.add_elem(l)
|
||||
_l = A(name, id='back_%s' % anchor, href='#' + anchor,
|
||||
title=name)
|
||||
_l.set('class', 'noteref')
|
||||
text.add_elem(_l)
|
||||
ans.append(text.elem)
|
||||
elif self.namespace.is_tag(child, 'w:tab'):
|
||||
spaces = int(math.ceil((self.settings.default_tab_stop / 36) * 6))
|
||||
spaces = int(math.ceil((self.settings.default_tab_stop / 36) *
|
||||
6))
|
||||
text.add_elem(SPAN(NBSP * spaces))
|
||||
ans.append(text.elem)
|
||||
ans[-1].set('class', 'tab')
|
||||
@@ -699,7 +748,8 @@ class Convert(object):
|
||||
style = self.styles.resolve_run(run)
|
||||
if style.vert_align in {'superscript', 'subscript'}:
|
||||
if ans.text or len(ans):
|
||||
ans.set('data-docx-vert', 'sup' if style.vert_align == 'superscript' else 'sub')
|
||||
ans.set('data-docx-vert',
|
||||
'sup' if style.vert_align == 'superscript' else 'sub')
|
||||
if style.lang is not inherit:
|
||||
lang = html_lang(style.lang)
|
||||
if lang is not None and lang != self.doc_lang:
|
||||
@@ -738,12 +788,14 @@ class Convert(object):
|
||||
idx = parent.index(paras[0])
|
||||
frame = DIV(*paras)
|
||||
parent.insert(idx, frame)
|
||||
self.framed_map[frame] = css = style.css(self.page_map[self.object_map[paras[0]]])
|
||||
self.framed_map[frame] = css = style.css(
|
||||
self.page_map[self.object_map[paras[0]]])
|
||||
self.styles.register(css, 'frame')
|
||||
|
||||
if not self.block_runs:
|
||||
return
|
||||
rmap = {v:k for k, v in self.object_map.items()}
|
||||
|
||||
rmap = {v: k for k, v in self.object_map.items()}
|
||||
for border_style, blocks in self.block_runs:
|
||||
paras = tuple(rmap[p] for p in blocks)
|
||||
for p in paras:
|
||||
@@ -796,17 +848,20 @@ class Convert(object):
|
||||
else:
|
||||
border_style = style.clone_border_styles()
|
||||
if has_visible_border:
|
||||
border_style.margin_top, style.margin_top = style.margin_top, inherit
|
||||
style.margin_top = inherit
|
||||
border_style.margin_top = style.margin_top
|
||||
if p is not run[-1]:
|
||||
style.padding_bottom = 0
|
||||
else:
|
||||
if has_visible_border:
|
||||
border_style.margin_bottom, style.margin_bottom = style.margin_bottom, inherit
|
||||
style.margin_bottom = inherit
|
||||
border_style.margin_bottom = style.margin_bottom
|
||||
style.clear_borders()
|
||||
if p is not run[-1]:
|
||||
style.apply_between_border()
|
||||
if has_visible_border:
|
||||
border_style.margin_left, border_style.margin_right = max_left,max_right
|
||||
border_style.margin_left = max_left
|
||||
border_style.margin_right = max_right
|
||||
self.block_runs.append((border_style, run))
|
||||
|
||||
run = []
|
||||
|
||||
@@ -1,5 +1,6 @@
|
||||
import mimetypes
|
||||
import textwrap, os
|
||||
import os
|
||||
import textwrap
|
||||
|
||||
from lxml import etree
|
||||
from lxml.builder import ElementMaker
|
||||
@@ -9,22 +10,48 @@ from ebook_converter.ebooks.docx.names import DOCXNamespace
|
||||
from ebook_converter.ebooks.metadata import authors_to_string
|
||||
from ebook_converter.ebooks.pdf.render.common import PAPER_SIZES
|
||||
from ebook_converter.utils.date import utcnow
|
||||
from ebook_converter.utils.localization import canonicalize_lang, lang_as_iso639_1
|
||||
from ebook_converter.utils.localization import canonicalize_lang
|
||||
from ebook_converter.utils.localization import lang_as_iso639_1
|
||||
from ebook_converter.utils.zipfile import ZipFile
|
||||
|
||||
|
||||
WORD_TYPES = {"/word/footnotes.xml": "application/vnd.openxmlformats-"
|
||||
"officedocument.wordprocessingml.footnotes+xml",
|
||||
"/word/document.xml": "application/vnd.openxmlformats-"
|
||||
"officedocument.wordprocessingml.document.main+xml",
|
||||
"/word/numbering.xml": "application/vnd.openxmlformats-"
|
||||
"officedocument.wordprocessingml.numbering+xml",
|
||||
"/word/styles.xml": "application/vnd.openxmlformats-"
|
||||
"officedocument.wordprocessingml.styles+xml",
|
||||
"/word/endnotes.xml": "application/vnd.openxmlformats-"
|
||||
"officedocument.wordprocessingml.endnotes+xml",
|
||||
"/word/settings.xml": "application/vnd.openxmlformats-"
|
||||
"officedocument.wordprocessingml.settings+xml",
|
||||
"/word/theme/theme1.xml": "application/vnd.openxmlformats-"
|
||||
"officedocument.theme+xml",
|
||||
"/word/fontTable.xml": "application/vnd.openxmlformats-"
|
||||
"officedocument.wordprocessingml.fontTable+xml",
|
||||
"/word/webSettings.xml": "application/vnd.openxmlformats-"
|
||||
"officedocument.wordprocessingml.webSettings+xml",
|
||||
"/docProps/core.xml": "application/vnd.openxmlformats-package."
|
||||
"core-properties+xml",
|
||||
"/docProps/app.xml": "application/vnd.openxmlformats-"
|
||||
"officedocument.extended-properties+xml"}
|
||||
|
||||
|
||||
def xml2str(root, pretty_print=False, with_tail=False):
|
||||
if hasattr(etree, 'cleanup_namespaces'):
|
||||
etree.cleanup_namespaces(root)
|
||||
ans = etree.tostring(root, encoding='utf-8', xml_declaration=True,
|
||||
pretty_print=pretty_print, with_tail=with_tail)
|
||||
pretty_print=pretty_print, with_tail=with_tail)
|
||||
return ans
|
||||
|
||||
|
||||
def page_size(opts):
|
||||
width, height = PAPER_SIZES[opts.docx_page_size]
|
||||
if opts.docx_custom_page_size is not None:
|
||||
width, height = map(float, opts.docx_custom_page_size.partition('x')[0::2])
|
||||
width, height = map(float,
|
||||
opts.docx_custom_page_size.partition('x')[0::2])
|
||||
return width, height
|
||||
|
||||
|
||||
@@ -47,7 +74,9 @@ def create_skeleton(opts, namespaces=None):
|
||||
|
||||
def w(x):
|
||||
return '{%s}%s' % (namespaces['w'], x)
|
||||
dn = {k:v for k, v in namespaces.items() if k in {'w', 'r', 'm', 've', 'o', 'wp', 'w10', 'wne', 'a', 'pic'}}
|
||||
dn = {k: v for k, v in namespaces.items() if k in {'w', 'r', 'm', 've',
|
||||
'o', 'wp', 'w10', 'wne',
|
||||
'a', 'pic'}}
|
||||
E = ElementMaker(namespace=dn['w'], nsmap=dn)
|
||||
doc = E.document()
|
||||
body = E.body()
|
||||
@@ -59,27 +88,32 @@ def create_skeleton(opts, namespaces=None):
|
||||
val = page_margin(opts, which)
|
||||
return w(which), str(int(val * 20))
|
||||
body.append(E.sectPr(
|
||||
E.pgSz(**{w('w'):str(width), w('h'):str(height)}),
|
||||
E.pgSz(**{w('w'): str(width), w('h'): str(height)}),
|
||||
E.pgMar(**dict(map(margin, 'left top right bottom'.split()))),
|
||||
E.cols(**{w('space'):'720'}),
|
||||
E.docGrid(**{w('linePitch'):"360"}),
|
||||
E.cols(**{w('space'): '720'}),
|
||||
E.docGrid(**{w('linePitch'): "360"}),
|
||||
))
|
||||
|
||||
dn = {k:v for k, v in namespaces.items() if k in tuple('wra') + ('wp',)}
|
||||
dn = {k: v for k, v in namespaces.items() if k in tuple('wra') + ('wp',)}
|
||||
E = ElementMaker(namespace=dn['w'], nsmap=dn)
|
||||
styles = E.styles(
|
||||
E.docDefaults(
|
||||
E.rPrDefault(
|
||||
E.rPr(
|
||||
E.rFonts(**{w('asciiTheme'):"minorHAnsi", w('eastAsiaTheme'):"minorEastAsia", w('hAnsiTheme'):"minorHAnsi", w('cstheme'):"minorBidi"}),
|
||||
E.sz(**{w('val'):'22'}),
|
||||
E.szCs(**{w('val'):'22'}),
|
||||
E.lang(**{w('val'):'en-US', w('eastAsia'):"en-US", w('bidi'):"ar-SA"})
|
||||
E.rFonts(**{w('asciiTheme'): "minorHAnsi",
|
||||
w('eastAsiaTheme'): "minorEastAsia",
|
||||
w('hAnsiTheme'): "minorHAnsi",
|
||||
w('cstheme'): "minorBidi"}),
|
||||
E.sz(**{w('val'): '22'}),
|
||||
E.szCs(**{w('val'): '22'}),
|
||||
E.lang(**{w('val'): 'en-US', w('eastAsia'): "en-US",
|
||||
w('bidi'): "ar-SA"})
|
||||
)
|
||||
),
|
||||
E.pPrDefault(
|
||||
E.pPr(
|
||||
E.spacing(**{w('after'):"0", w('line'):"276", w('lineRule'):"auto"})
|
||||
E.spacing(**{w('after'): "0", w('line'): "276",
|
||||
w('lineRule'): "auto"})
|
||||
)
|
||||
)
|
||||
)
|
||||
@@ -103,8 +137,8 @@ def update_doc_props(root, mi, namespace):
|
||||
if mi.comments:
|
||||
setm('description', mi.comments)
|
||||
if mi.languages:
|
||||
l = canonicalize_lang(mi.languages[0])
|
||||
setm('language', lang_as_iso639_1(l) or l)
|
||||
_l = canonicalize_lang(mi.languages[0])
|
||||
setm('language', lang_as_iso639_1(_l) or _l)
|
||||
|
||||
|
||||
class DocumentRelationships(object):
|
||||
@@ -115,8 +149,7 @@ class DocumentRelationships(object):
|
||||
for typ, target in {namespace.names['STYLES']: 'styles.xml',
|
||||
namespace.names['NUMBERING']: 'numbering.xml',
|
||||
namespace.names['WEB_SETTINGS']: 'webSettings.xml',
|
||||
namespace.names['FONTS']: 'fontTable.xml',
|
||||
}.items():
|
||||
namespace.names['FONTS']: 'fontTable.xml'}.items():
|
||||
self.add_relationship(target, typ)
|
||||
|
||||
def get_relationship_id(self, target, rtype, target_mode=None):
|
||||
@@ -134,7 +167,8 @@ class DocumentRelationships(object):
|
||||
|
||||
def serialize(self):
|
||||
namespaces = self.namespace.namespaces
|
||||
E = ElementMaker(namespace=namespaces['pr'], nsmap={None:namespaces['pr']})
|
||||
E = ElementMaker(namespace=namespaces['pr'],
|
||||
nsmap={None: namespaces['pr']})
|
||||
relationships = E.Relationships()
|
||||
for (target, rtype, target_mode), rid in self.rmap.items():
|
||||
r = E.Relationship(Id=rid, Type=rtype, Target=target)
|
||||
@@ -151,9 +185,12 @@ class DOCX(object):
|
||||
namespaces = self.namespace.namespaces
|
||||
self.opts, self.log = opts, log
|
||||
self.document_relationships = DocumentRelationships(self.namespace)
|
||||
self.font_table = etree.Element('{%s}fonts' % namespaces['w'], nsmap={k:namespaces[k] for k in 'wr'})
|
||||
self.numbering = etree.Element('{%s}numbering' % namespaces['w'], nsmap={k:namespaces[k] for k in 'wr'})
|
||||
E = ElementMaker(namespace=namespaces['pr'], nsmap={None:namespaces['pr']})
|
||||
self.font_table = etree.Element('{%s}fonts' % namespaces['w'],
|
||||
nsmap={k: namespaces[k] for k in 'wr'})
|
||||
self.numbering = etree.Element('{%s}numbering' % namespaces['w'],
|
||||
nsmap={k: namespaces[k] for k in 'wr'})
|
||||
E = ElementMaker(namespace=namespaces['pr'],
|
||||
nsmap={None: namespaces['pr']})
|
||||
self.embedded_fonts = E.Relationships()
|
||||
self.fonts = {}
|
||||
self.images = {}
|
||||
@@ -161,21 +198,10 @@ class DOCX(object):
|
||||
# Boilerplate {{{
|
||||
@property
|
||||
def contenttypes(self):
|
||||
E = ElementMaker(namespace=self.namespace.namespaces['ct'], nsmap={None:self.namespace.namespaces['ct']})
|
||||
E = ElementMaker(namespace=self.namespace.namespaces['ct'],
|
||||
nsmap={None: self.namespace.namespaces['ct']})
|
||||
types = E.Types()
|
||||
for partname, mt in {
|
||||
"/word/footnotes.xml": "application/vnd.openxmlformats-officedocument.wordprocessingml.footnotes+xml",
|
||||
"/word/document.xml": "application/vnd.openxmlformats-officedocument.wordprocessingml.document.main+xml",
|
||||
"/word/numbering.xml": "application/vnd.openxmlformats-officedocument.wordprocessingml.numbering+xml",
|
||||
"/word/styles.xml": "application/vnd.openxmlformats-officedocument.wordprocessingml.styles+xml",
|
||||
"/word/endnotes.xml": "application/vnd.openxmlformats-officedocument.wordprocessingml.endnotes+xml",
|
||||
"/word/settings.xml": "application/vnd.openxmlformats-officedocument.wordprocessingml.settings+xml",
|
||||
"/word/theme/theme1.xml": "application/vnd.openxmlformats-officedocument.theme+xml",
|
||||
"/word/fontTable.xml": "application/vnd.openxmlformats-officedocument.wordprocessingml.fontTable+xml",
|
||||
"/word/webSettings.xml": "application/vnd.openxmlformats-officedocument.wordprocessingml.webSettings+xml",
|
||||
"/docProps/core.xml": "application/vnd.openxmlformats-package.core-properties+xml",
|
||||
"/docProps/app.xml": "application/vnd.openxmlformats-officedocument.extended-properties+xml",
|
||||
}.items():
|
||||
for partname, mt in WORD_TYPES.items():
|
||||
types.append(E.Override(PartName=partname, ContentType=mt))
|
||||
added = {'png', 'gif', 'jpeg', 'jpg', 'svg', 'xml'}
|
||||
for ext in added:
|
||||
@@ -199,7 +225,8 @@ class DOCX(object):
|
||||
|
||||
@property
|
||||
def appproperties(self):
|
||||
E = ElementMaker(namespace=self.namespace.namespaces['ep'], nsmap={None:self.namespace.namespaces['ep']})
|
||||
E = ElementMaker(namespace=self.namespace.namespaces['ep'],
|
||||
nsmap={None: self.namespace.namespaces['ep']})
|
||||
props = E.Properties(
|
||||
E.Application(__appname__),
|
||||
E.AppVersion('%02d.%04d' % numeric_version[:2]),
|
||||
@@ -216,16 +243,17 @@ class DOCX(object):
|
||||
@property
|
||||
def containerrels(self):
|
||||
return textwrap.dedent('''\
|
||||
<?xml version='1.0' encoding='utf-8'?>
|
||||
<Relationships xmlns="http://schemas.openxmlformats.org/package/2006/relationships">
|
||||
<Relationship Id="rId3" Type="{APPPROPS}" Target="docProps/app.xml"/>
|
||||
<Relationship Id="rId2" Type="{DOCPROPS}" Target="docProps/core.xml"/>
|
||||
<Relationship Id="rId1" Type="{DOCUMENT}" Target="word/document.xml"/>
|
||||
</Relationships>'''.format(**self.namespace.names)).encode('utf-8')
|
||||
<?xml version='1.0' encoding='utf-8'?>
|
||||
<Relationships xmlns="http://schemas.openxmlformats.org/package/2006/relationships">
|
||||
<Relationship Id="rId3" Type="{APPPROPS}" Target="docProps/app.xml"/>
|
||||
<Relationship Id="rId2" Type="{DOCPROPS}" Target="docProps/core.xml"/>
|
||||
<Relationship Id="rId1" Type="{DOCUMENT}" Target="word/document.xml"/>
|
||||
</Relationships>'''.format(**self.namespace.names)).encode('utf-8') # noqa
|
||||
|
||||
@property
|
||||
def websettings(self):
|
||||
E = ElementMaker(namespace=self.namespace.namespaces['w'], nsmap={'w':self.namespace.namespaces['w']})
|
||||
E = ElementMaker(namespace=self.namespace.namespaces['w'],
|
||||
nsmap={'w': self.namespace.namespaces['w']})
|
||||
ws = E.webSettings(
|
||||
E.optimizeForBrowser, E.allowPNG, E.doNotSaveAsSingleFile)
|
||||
return xml2str(ws)
|
||||
@@ -234,11 +262,15 @@ class DOCX(object):
|
||||
|
||||
def convert_metadata(self, mi):
|
||||
namespaces = self.namespace.namespaces
|
||||
E = ElementMaker(namespace=namespaces['cp'], nsmap={x:namespaces[x] for x in 'cp dc dcterms xsi'.split()})
|
||||
E = ElementMaker(namespace=namespaces['cp'],
|
||||
nsmap={x: namespaces[x]
|
||||
for x in 'cp dc dcterms xsi'.split()})
|
||||
cp = E.coreProperties(E.revision("1"), E.lastModifiedBy('calibre'))
|
||||
ts = utcnow().isoformat('T').rpartition('.')[0] + 'Z'
|
||||
for x in 'created modified'.split():
|
||||
x = cp.makeelement('{%s}%s' % (namespaces['dcterms'], x), **{'{%s}type' % namespaces['xsi']:'dcterms:W3CDTF'})
|
||||
x = cp.makeelement('{%s}%s' % (namespaces['dcterms'], x),
|
||||
**{'{%s}type' %
|
||||
namespaces['xsi']: 'dcterms:W3CDTF'})
|
||||
x.text = ts
|
||||
cp.append(x)
|
||||
self.mi = mi
|
||||
@@ -261,8 +293,10 @@ class DOCX(object):
|
||||
zf.writestr('word/styles.xml', xml2str(self.styles))
|
||||
zf.writestr('word/numbering.xml', xml2str(self.numbering))
|
||||
zf.writestr('word/fontTable.xml', xml2str(self.font_table))
|
||||
zf.writestr('word/_rels/document.xml.rels', self.document_relationships.serialize())
|
||||
zf.writestr('word/_rels/fontTable.xml.rels', xml2str(self.embedded_fonts))
|
||||
zf.writestr('word/_rels/document.xml.rels',
|
||||
self.document_relationships.serialize())
|
||||
zf.writestr('word/_rels/fontTable.xml.rels',
|
||||
xml2str(self.embedded_fonts))
|
||||
for fname, data_getter in self.images.items():
|
||||
zf.writestr(fname, data_getter())
|
||||
for fname, data in self.fonts.items():
|
||||
|
||||
Reference in New Issue
Block a user