import mimetypes import os import pkg_resources import re import string import sys import urllib.parse from xml.sax import saxutils from ebook_converter import constants as const from ebook_converter.utils import date from ebook_converter.ebooks.oeb import base from ebook_converter.ebooks.oeb.base import XPath, xml2text, urlnormalize from ebook_converter.library.comments import comments_to_html, markdown from ebook_converter.ebooks.chardet import strip_encoding_declarations from ebook_converter.ebooks.metadata import fmt_sidx, rating_to_stars JACKET_XPATH = '//h:meta[@name="calibre-content" and @content="jacket"]' class SafeFormatter(string.Formatter): def get_value(self, *args, **kwargs): try: return string.Formatter.get_value(self, *args, **kwargs) except KeyError: return '' class Base(object): def remove_images(self, item, limit=1): path = XPath('//h:img[@src]') removed = 0 for img in path(item.data): if removed >= limit: break href = item.abshref(img.get('src')) image = self.oeb.manifest.hrefs.get(href) if image is None: href = urlnormalize(href) image = self.oeb.manifest.hrefs.get(href) if image is not None: self.oeb.manifest.remove(image) self.oeb.guide.remove_by_href(href) img.getparent().remove(img) removed += 1 return removed class RemoveFirstImage(Base): def remove_first_image(self): deleted_item = None for item in self.oeb.spine: if XPath(JACKET_XPATH)(item.data): continue removed = self.remove_images(item) if removed > 0: self.log('Removed first image') body = XPath('//h:body')(item.data) if body: raw = xml2text(body[0]).strip() imgs = XPath('//h:img|//svg:svg')(item.data) if not raw and not imgs: self.log('Removing %s as it has no content' % item.href) self.oeb.manifest.remove(item) deleted_item = item break else: self.log.warn('Could not find first image to remove') if deleted_item is not None: for item in list(self.oeb.toc): href = urllib.parse.urldefrag(item.href)[0] if href == deleted_item.href: self.oeb.toc.remove(item) self.oeb.guide.remove_by_href(deleted_item.href) def __call__(self, oeb, opts, metadata): """ Add metadata in jacket.xhtml if specified in opts If not specified, remove previous jacket instance """ self.oeb, self.opts, self.log = oeb, opts, oeb.log if opts.remove_first_image: self.remove_first_image() class Jacket(Base): """ Book jacket manipulation. Remove first image and insert comments at start of book. """ def insert_metadata(self, mi): self.log('Inserting metadata into book...') try: tags = list(map(str, self.oeb.metadata.subject)) except Exception: tags = [] try: comments = str(self.oeb.metadata.description[0]) except Exception: comments = '' try: title = str(self.oeb.metadata.title[0]) except Exception: title = 'Unknown' try: authors = list(map(str, self.oeb.metadata.creator)) except Exception: authors = ['Unknown'] root = render_jacket(mi, self.opts.output_profile, alt_title=title, alt_tags=tags, alt_authors=authors, alt_comments=comments, rescale_fonts=True) id, href = self.oeb.manifest.generate('calibre_jacket', 'jacket.xhtml') jacket = self.oeb.manifest.add(id, href, mimetypes.guess_type(href)[0], data=root) self.oeb.spine.insert(0, jacket, True) self.oeb.inserted_metadata_jacket = jacket for img, path in referenced_images(root): self.oeb.log('Embedding referenced image %s into jacket' % path) ext = path.rpartition('.')[-1].lower() item_id, href = self.oeb.manifest.generate('jacket_image', 'jacket_img.' + ext) with open(path, 'rb') as f: item = self.oeb.manifest.add( item_id, href, mimetypes.guess_type(href)[0], data=f.read()) item.unload_data_from_memory() img.set('src', jacket.relhref(item.href)) def remove_existing_jacket(self): for x in self.oeb.spine[:4]: if XPath(JACKET_XPATH)(x.data): self.remove_images(x, limit=sys.maxsize) self.oeb.manifest.remove(x) self.log('Removed existing jacket') break def __call__(self, oeb, opts, metadata): """ Add metadata in jacket.xhtml if specified in opts If not specified, remove previous jacket instance """ self.oeb, self.opts, self.log = oeb, opts, oeb.log self.remove_existing_jacket() if opts.insert_metadata: self.insert_metadata(metadata) # Render Jacket {{{ def get_rating(rating, rchar, e_rchar): ans = '' try: num = float(rating) / 2 except Exception: return ans num = max(0, num) num = min(num, 5) if num < 1: return ans ans = ("%s%s") % (rchar * int(num), e_rchar * (5 - int(num))) return ans class Series(str): def __new__(self, series, series_index): if series and series_index is not None: _roman = saxutils.escape(fmt_sidx(series_index, use_roman=True)) _no_roman = saxutils.escape(fmt_sidx(series_index, use_roman=False)) roman = '{1} of {0}'.format(saxutils.escape(series), _roman) combined = '{1} of {0}'.format(saxutils.escape(series), _no_roman) else: combined = roman = saxutils.escape(series or u'') s = str.__new__(self, combined) s.roman = roman s.name = saxutils.escape(series or '') s.number = saxutils.escape(fmt_sidx(series_index or 1.0, use_roman=False)) s.roman_number = saxutils.escape(fmt_sidx(series_index or 1.0, use_roman=True)) return s class Tags(str): def __new__(self, tags, output_profile): tags = [saxutils.escape(x) for x in tags or ()] t = str.__new__(self, ', '.join(tags)) t.alphabetical = ', '.join(sorted(tags)) t.tags_list = tags return t def postprocess_jacket(root, output_profile, has_data): # Post-process the generated html to strip out empty header items def extract(tag): parent = tag.getparent() idx = parent.index(tag) parent.remove(tag) if tag.tail: if idx == 0: parent.text = (parent.text or '') + tag.tail else: if idx >= len(parent): idx = -1 parent[-1].tail = (parent[-1].tail or '') + tag.tail def extract_class(cls): for tag in root.xpath('//*[@class="_"]'.replace('_', cls)): extract(tag) for key in 'series rating tags'.split(): if not has_data[key]: extract_class('cbj_' + key) if not has_data['pubdate']: extract_class('cbj_pubdata') if output_profile.short_name != 'kindle': extract_class('cbj_kindle_banner_hr') def render_jacket(mi, output_profile, alt_title='Unknown', alt_tags=[], alt_comments='', alt_publisher='', rescale_fonts=False, alt_authors=None): with open(pkg_resources.resource_filename('ebook_converter', 'data/jacket/stylesheet.css'), 'rb') as fobj: css = fobj.read().decode() with open(pkg_resources.resource_filename('ebook_converter', 'data/jacket/template.xhtml'), 'rb') as fobj: template = fobj.read().decode() template = re.sub(r'', '', template, flags=re.DOTALL) css = re.sub(r'/\*.*?\*/', '', css, flags=re.DOTALL) try: title_str = alt_title if mi.is_null('title') else mi.title except Exception: title_str = 'Unknown' title_str = saxutils.escape(title_str) title = '%s' % title_str series = Series(mi.series, mi.series_index) try: if not mi.is_null('publisher'): publisher = mi.publisher else: publisher = alt_publisher except Exception: publisher = '' publisher = saxutils.escape(publisher) try: if date.is_date_undefined(mi.pubdate): pubdate = '' else: dt = date.as_local_time(mi.pubdate) pubdate = date.strftime('%Y', dt.timetuple()) except Exception: pubdate = '' rating = get_rating(mi.rating, output_profile.ratings_char, output_profile.empty_ratings_char) tags = Tags((mi.tags if mi.tags else alt_tags), output_profile) comments = mi.comments if mi.comments else alt_comments comments = comments.strip() if comments: comments = comments_to_html(comments) orig = mi.authors if mi.is_null('authors'): mi.authors = list(alt_authors or ('Unknown',)) try: author = mi.format_authors() except Exception: author = '' mi.authors = orig author = saxutils.escape(author) has_data = {} def generate_html(comments): args = {'author': author, 'comments': comments, 'css': css, 'footer': '', 'pubdate': pubdate, 'pubdate_label': 'Published', 'publisher': publisher, 'rating': rating, 'rating_label': 'Rating', 'searchable_tags': ' '.join(saxutils.escape(t) + 'ttt' for t in tags.tags_list), 'series': series, 'series_label': 'Series', 'tags': tags, 'tags_label': 'Tags', 'title': title, 'title_str': title_str, 'xmlns': const.XHTML_NS} for key in mi.custom_field_keys(): m = mi.get_user_metadata(key, False) or {} try: display_name, val = mi.format_field_extended(key)[:2] dkey = key.replace('#', '_') dt = m.get('datatype') if dt == 'series': args[dkey] = Series(mi.get(key), mi.get(key + '_index')) elif dt == 'rating': args[dkey] = rating_to_stars(mi.get(key), m.get('display', {}) .get('allow_half_stars', False)) elif dt == 'comments': val = val or '' display = m.get('display', {}) ctype = display.get('interpret_as') or 'html' if ctype == 'long-text': val = ('
%s
' % saxutils.escape(val)) elif ctype == 'short-text': val = '%s' % saxutils.escape(val) elif ctype == 'markdown': val = markdown(val) else: val = comments_to_html(val) args[dkey] = val else: args[dkey] = saxutils.escape(val) args[dkey+'_label'] = saxutils.escape(display_name) except Exception: # if the val (custom column contents) is None, don't add to # args pass if False: print("Custom column values available in jacket template:") for key in args.keys(): if key.startswith('_') and not key.endswith('_label'): print(" %s: %s" % ('#' + key[1:], args[key])) # Used in the comment describing use of custom columns in templates # Don't change this unless you also change it in template.xhtml args['_genre_label'] = args.get('_genre_label', '{_genre_label}') args['_genre'] = args.get('_genre', '{_genre}') formatter = SafeFormatter() generated_html = formatter.format(template, **args) has_data['series'] = bool(series) has_data['tags'] = bool(tags) has_data['rating'] = bool(rating) has_data['pubdate'] = bool(pubdate) return strip_encoding_declarations(generated_html) from ebook_converter.ebooks.oeb.polish.parsing import parse raw = generate_html(comments) root = parse(raw, line_numbers=False, force_html5_parse=True) if rescale_fonts: # We ensure that the conversion pipeline will set the font sizes for # text in the jacket to the same size as the font sizes for the rest of # the text in the book. That means that as long as the jacket uses # relative font sizes (em or %), the post conversion font size will be # the same as for text in the main book. So text with size x em will # be rescaled to the same value in both the jacket and the main # content. # # We cannot use data-calibre-rescale 100 on the body tag as that will # just give the body tag a font size of 1em, which is useless. for body in root.xpath('//*[local-name()="body"]'): fw = body.makeelement(base.tag('xhtml', 'div')) fw.set('data-calibre-rescale', '100') for child in body: fw.append(child) body.append(fw) postprocess_jacket(root, output_profile, has_data) from ebook_converter.ebooks.oeb.polish.pretty import pretty_html_tree pretty_html_tree(None, root) return root # }}} def linearize_jacket(oeb): for x in oeb.spine[:4]: if XPath(JACKET_XPATH)(x.data): for e in XPath('//h:table|//h:tr|//h:th')(x.data): e.tag = base.tag('xhtml', 'div') for e in XPath('//h:td')(x.data): e.tag = base.tag('xhtml', 'span') break def referenced_images(root): for img in XPath('//h:img[@src]')(root): src = img.get('src') if src.startswith('file://'): path = src[7:] if os.path.exists(path): yield img, path