import collections import functools import os import posixpath import urllib.parse from lxml import etree from ebook_converter import fit_image from ebook_converter.ebooks.docx.images import pt_to_emu from ebook_converter.utils.filenames import ascii_filename from ebook_converter.utils.imghdr import identify Image = collections.namedtuple('Image', 'rid fname width height fmt item') def as_num(x): try: return float(x) except Exception: pass return 0 def get_image_margins(style): ans = {} for edge in 'Left Right Top Bottom'.split(): val = as_num(getattr(style, 'padding' + edge)) + as_num(getattr(style, 'margin' + edge)) ans['dist' + edge[0]] = str(pt_to_emu(val)) return ans class ImagesManager(object): def __init__(self, oeb, document_relationships, opts): self.oeb, self.log = oeb, oeb.log self.page_width, self.page_height = opts.output_profile.width_pts, opts.output_profile.height_pts self.images = {} self.seen_filenames = set() self.document_relationships = document_relationships self.count = 0 def read_image(self, href): if href not in self.images: item = self.oeb.manifest.hrefs.get(href) if item is None or not isinstance(item.data, bytes): return try: fmt, width, height = identify(item.data) except Exception: self.log.warning('Replacing corrupted image with blank: %s' % href) item.data = I('blank.png', data=True, allow_user_override=False) fmt, width, height = identify(item.data) image_fname = 'media/' + self.create_filename(href, fmt) image_rid = self.document_relationships.add_image(image_fname) self.images[href] = Image(image_rid, image_fname, width, height, fmt, item) item.unload_data_from_memory() return self.images[href] def add_image(self, img, block, stylizer, bookmark=None, as_block=False): src = img.get('src') if not src: return href = self.abshref(src) try: rid = self.read_image(href).rid except AttributeError: return drawing = self.create_image_markup(img, stylizer, href, as_block=as_block) block.add_image(drawing, bookmark=bookmark) return rid def create_image_markup(self, html_img, stylizer, href, as_block=False): # TODO: img inside a link (clickable image) style = stylizer.style(html_img) floating = style['float'] if floating not in {'left', 'right'}: floating = None if as_block: ml, mr = style._get('margin-left'), style._get('margin-right') if ml == 'auto': floating = 'center' if mr == 'auto' else 'right' if mr == 'auto': floating = 'center' if ml == 'auto' else 'right' else: parent = html_img.getparent() if len(parent) == 1 and not (parent.text or '').strip() and not (html_img.tail or '').strip(): pstyle = stylizer.style(parent) if 'block' in pstyle['display']: # We have an inline image alone inside a block as_block = True floating = pstyle['float'] if floating not in {'left', 'right'}: floating = None if pstyle['text-align'] in ('center', 'right'): floating = pstyle['text-align'] floating = floating or 'left' fake_margins = floating is None self.count += 1 img = self.images[href] name = urllib.parse.unquote(posixpath.basename(href)) width, height = style.img_size(img.width, img.height) scaled, width, height = fit_image(width, height, self.page_width, self.page_height) width, height = map(pt_to_emu, (width, height)) makeelement, namespaces = self.document_relationships.namespace.makeelement, self.document_relationships.namespace.namespaces root = etree.Element('root', nsmap=namespaces) ans = makeelement(root, 'w:drawing', append=False) if floating is None: parent = makeelement(ans, 'wp:inline') else: parent = makeelement(ans, 'wp:anchor', **get_image_margins(style)) # The next three lines are boilerplate that Word requires, even # though the DOCX specs define defaults for all of them parent.set('simplePos', '0'), parent.set('relativeHeight', '1'), parent.set('behindDoc',"0"), parent.set('locked', "0") parent.set('layoutInCell', "1"), parent.set('allowOverlap', '1') makeelement(parent, 'wp:simplePos', x='0', y='0') makeelement(makeelement(parent, 'wp:positionH', relativeFrom='margin'), 'wp:align').text = floating makeelement(makeelement(parent, 'wp:positionV', relativeFrom='line'), 'wp:align').text = 'top' makeelement(parent, 'wp:extent', cx=str(width), cy=str(height)) if fake_margins: # DOCX does not support setting margins for inline images, so we # fake it by using effect extents to simulate margins makeelement(parent, 'wp:effectExtent', **{k[-1].lower():v for k, v in get_image_margins(style).items()}) else: makeelement(parent, 'wp:effectExtent', l='0', r='0', t='0', b='0') if floating is not None: # The idiotic Word requires this to be after the extent settings if as_block: makeelement(parent, 'wp:wrapTopAndBottom') else: makeelement(parent, 'wp:wrapSquare', wrapText='bothSides') self.create_docx_image_markup(parent, name, html_img.get('alt') or name, img.rid, width, height) return ans def create_docx_image_markup(self, parent, name, alt, img_rid, width, height): makeelement, namespaces = self.document_relationships.namespace.makeelement, self.document_relationships.namespace.namespaces makeelement(parent, 'wp:docPr', id=str(self.count), name=name, descr=alt) makeelement(makeelement(parent, 'wp:cNvGraphicFramePr'), 'a:graphicFrameLocks', noChangeAspect="1") g = makeelement(parent, 'a:graphic') gd = makeelement(g, 'a:graphicData', uri=namespaces['pic']) pic = makeelement(gd, 'pic:pic') nvPicPr = makeelement(pic, 'pic:nvPicPr') makeelement(nvPicPr, 'pic:cNvPr', id='0', name=name, descr=alt) makeelement(nvPicPr, 'pic:cNvPicPr') bf = makeelement(pic, 'pic:blipFill') makeelement(bf, 'a:blip', r_embed=img_rid) makeelement(makeelement(bf, 'a:stretch'), 'a:fillRect') spPr = makeelement(pic, 'pic:spPr') xfrm = makeelement(spPr, 'a:xfrm') makeelement(xfrm, 'a:off', x='0', y='0'), makeelement(xfrm, 'a:ext', cx=str(width), cy=str(height)) makeelement(makeelement(spPr, 'a:prstGeom', prst='rect'), 'a:avLst') def create_filename(self, href, fmt): fname = ascii_filename(urllib.parse.unquote(posixpath.basename(href))) fname = posixpath.splitext(fname)[0] fname = fname[:75].rstrip('.') or 'image' num = 0 base = fname while fname.lower() in self.seen_filenames: num += 1 fname = base + str(num) self.seen_filenames.add(fname.lower()) fname += os.extsep + fmt.lower() return fname def serialize(self, images_map): for img in self.images.values(): images_map['word/' + img.fname] = functools.partial(self.get_data, img.item) def get_data(self, item): try: return item.data finally: item.unload_data_from_memory(False) def create_cover_markup(self, img, preserve_aspect_ratio, width, height): self.count += 1 makeelement, namespaces = self.document_relationships.namespace.makeelement, self.document_relationships.namespace.namespaces if preserve_aspect_ratio: if img.width >= img.height: ar = img.height / img.width height = ar * width else: ar = img.width / img.height width = ar * height root = etree.Element('root', nsmap=namespaces) ans = makeelement(root, 'w:drawing', append=False) parent = makeelement(ans, 'wp:anchor', **{'dist'+edge:'0' for edge in 'LRTB'}) parent.set('simplePos', '0'), parent.set('relativeHeight', '1'), parent.set('behindDoc',"0"), parent.set('locked', "0") parent.set('layoutInCell', "1"), parent.set('allowOverlap', '1') makeelement(parent, 'wp:simplePos', x='0', y='0') makeelement(makeelement(parent, 'wp:positionH', relativeFrom='page'), 'wp:align').text = 'center' makeelement(makeelement(parent, 'wp:positionV', relativeFrom='page'), 'wp:align').text = 'center' width, height = map(pt_to_emu, (width, height)) makeelement(parent, 'wp:extent', cx=str(width), cy=str(height)) makeelement(parent, 'wp:effectExtent', l='0', r='0', t='0', b='0') makeelement(parent, 'wp:wrapTopAndBottom') self.create_docx_image_markup(parent, 'cover.jpg', 'Cover', img.rid, width, height) return ans def write_cover_block(self, body, cover_image): makeelement, namespaces = self.document_relationships.namespace.makeelement, self.document_relationships.namespace.namespaces pbb = body[0].xpath('//*[local-name()="pageBreakBefore"]')[0] pbb.set('{%s}val' % namespaces['w'], 'on') p = makeelement(body, 'w:p', append=False) body.insert(0, p) r = makeelement(p, 'w:r') r.append(cover_image)