1
0
mirror of https://github.com/gryf/ebook-converter.git synced 2026-02-23 18:45:55 +01:00
Files
ebook-converter/ebook_converter/ebooks/oeb/transforms/jacket.py
gryf ce89f5c9d1 Use the real constants module.
This is progressing refactor of the calibre code to make it more
readable, and transform it to something more coherent.

In this patch, there are changes regarding imports for some modules,
instead of polluting namespace of each module with some other modules
symbols, which often were imported from other modules. Yuck.
2020-05-29 17:04:53 +02:00

406 lines
14 KiB
Python

import sys, os, re
from xml.sax.saxutils import escape
from string import Formatter
import pkg_resources
import urllib.parse
from ebook_converter import constants as const
from ebook_converter import guess_type, strftime
from ebook_converter.constants_old import iswindows
from ebook_converter.ebooks.oeb.base import XPath, xml2text, urlnormalize
from ebook_converter.library.comments import comments_to_html, markdown
from ebook_converter.utils.date import is_date_undefined, as_local_time
from ebook_converter.ebooks.chardet import strip_encoding_declarations
from ebook_converter.ebooks.metadata import fmt_sidx, rating_to_stars
__license__ = 'GPL v3'
__copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>'
__docformat__ = 'restructuredtext en'
JACKET_XPATH = '//h:meta[@name="calibre-content" and @content="jacket"]'
class SafeFormatter(Formatter):
def get_value(self, *args, **kwargs):
try:
return Formatter.get_value(self, *args, **kwargs)
except KeyError:
return ''
class Base(object):
def remove_images(self, item, limit=1):
path = XPath('//h:img[@src]')
removed = 0
for img in path(item.data):
if removed >= limit:
break
href = item.abshref(img.get('src'))
image = self.oeb.manifest.hrefs.get(href)
if image is None:
href = urlnormalize(href)
image = self.oeb.manifest.hrefs.get(href)
if image is not None:
self.oeb.manifest.remove(image)
self.oeb.guide.remove_by_href(href)
img.getparent().remove(img)
removed += 1
return removed
class RemoveFirstImage(Base):
def remove_first_image(self):
deleted_item = None
for item in self.oeb.spine:
if XPath(JACKET_XPATH)(item.data):
continue
removed = self.remove_images(item)
if removed > 0:
self.log('Removed first image')
body = XPath('//h:body')(item.data)
if body:
raw = xml2text(body[0]).strip()
imgs = XPath('//h:img|//svg:svg')(item.data)
if not raw and not imgs:
self.log('Removing %s as it has no content'%item.href)
self.oeb.manifest.remove(item)
deleted_item = item
break
else:
self.log.warn('Could not find first image to remove')
if deleted_item is not None:
for item in list(self.oeb.toc):
href = urllib.parse.urldefrag(item.href)[0]
if href == deleted_item.href:
self.oeb.toc.remove(item)
self.oeb.guide.remove_by_href(deleted_item.href)
def __call__(self, oeb, opts, metadata):
'''
Add metadata in jacket.xhtml if specified in opts
If not specified, remove previous jacket instance
'''
self.oeb, self.opts, self.log = oeb, opts, oeb.log
if opts.remove_first_image:
self.remove_first_image()
class Jacket(Base):
'''
Book jacket manipulation. Remove first image and insert comments at start of
book.
'''
def insert_metadata(self, mi):
self.log('Inserting metadata into book...')
try:
tags = list(map(str, self.oeb.metadata.subject))
except Exception:
tags = []
try:
comments = str(self.oeb.metadata.description[0])
except:
comments = ''
try:
title = str(self.oeb.metadata.title[0])
except:
title = 'Unknown'
try:
authors = list(map(str, self.oeb.metadata.creator))
except:
authors = ['Unknown']
root = render_jacket(mi, self.opts.output_profile,
alt_title=title, alt_tags=tags, alt_authors=authors,
alt_comments=comments, rescale_fonts=True)
id, href = self.oeb.manifest.generate('calibre_jacket', 'jacket.xhtml')
jacket = self.oeb.manifest.add(id, href, guess_type(href)[0], data=root)
self.oeb.spine.insert(0, jacket, True)
self.oeb.inserted_metadata_jacket = jacket
for img, path in referenced_images(root):
self.oeb.log('Embedding referenced image %s into jacket' % path)
ext = path.rpartition('.')[-1].lower()
item_id, href = self.oeb.manifest.generate('jacket_image', 'jacket_img.'+ext)
with open(path, 'rb') as f:
item = self.oeb.manifest.add(item_id, href, guess_type(href)[0], data=f.read())
item.unload_data_from_memory()
img.set('src', jacket.relhref(item.href))
def remove_existing_jacket(self):
for x in self.oeb.spine[:4]:
if XPath(JACKET_XPATH)(x.data):
self.remove_images(x, limit=sys.maxsize)
self.oeb.manifest.remove(x)
self.log('Removed existing jacket')
break
def __call__(self, oeb, opts, metadata):
'''
Add metadata in jacket.xhtml if specified in opts
If not specified, remove previous jacket instance
'''
self.oeb, self.opts, self.log = oeb, opts, oeb.log
self.remove_existing_jacket()
if opts.insert_metadata:
self.insert_metadata(metadata)
# Render Jacket {{{
def get_rating(rating, rchar, e_rchar):
ans = ''
try:
num = float(rating)/2
except:
return ans
num = max(0, num)
num = min(num, 5)
if num < 1:
return ans
ans = ("%s%s") % (rchar * int(num), e_rchar * (5 - int(num)))
return ans
class Series(str):
def __new__(self, series, series_index):
if series and series_index is not None:
roman = '{1} of <em>{0}</em>'.format(
escape(series), escape(fmt_sidx(series_index, use_roman=True)))
combined = '{1} of <em>{0}</em>'.format(
escape(series), escape(fmt_sidx(series_index,
use_roman=False)))
else:
combined = roman = escape(series or u'')
s = str.__new__(self, combined)
s.roman = roman
s.name = escape(series or '')
s.number = escape(fmt_sidx(series_index or 1.0, use_roman=False))
s.roman_number = escape(fmt_sidx(series_index or 1.0, use_roman=True))
return s
class Tags(str):
def __new__(self, tags, output_profile):
tags = [escape(x) for x in tags or ()]
t = str.__new__(self, ', '.join(tags))
t.alphabetical = ', '.join(sorted(tags))
t.tags_list = tags
return t
def postprocess_jacket(root, output_profile, has_data):
# Post-process the generated html to strip out empty header items
def extract(tag):
parent = tag.getparent()
idx = parent.index(tag)
parent.remove(tag)
if tag.tail:
if idx == 0:
parent.text = (parent.text or '') + tag.tail
else:
if idx >= len(parent):
idx = -1
parent[-1].tail = (parent[-1].tail or '') + tag.tail
def extract_class(cls):
for tag in root.xpath('//*[@class="_"]'.replace('_', cls)):
extract(tag)
for key in 'series rating tags'.split():
if not has_data[key]:
extract_class('cbj_' + key)
if not has_data['pubdate']:
extract_class('cbj_pubdata')
if output_profile.short_name != 'kindle':
extract_class('cbj_kindle_banner_hr')
def render_jacket(mi, output_profile,
alt_title='Unknown', alt_tags=[], alt_comments='',
alt_publisher='', rescale_fonts=False, alt_authors=None):
with open(pkg_resources.resource_filename('ebook_converter',
'data/jacket/stylesheet.css'),
'rb') as fobj:
css = fobj.read().decode()
with open(pkg_resources.resource_filename('ebook_converter',
'data/jacket/template.xhtml'),
'rb') as fobj:
template = fobj.read().decode()
template = re.sub(r'<!--.*?-->', '', template, flags=re.DOTALL)
css = re.sub(r'/\*.*?\*/', '', css, flags=re.DOTALL)
try:
title_str = alt_title if mi.is_null('title') else mi.title
except:
title_str = 'Unknown'
title_str = escape(title_str)
title = '<span class="title">%s</span>' % title_str
series = Series(mi.series, mi.series_index)
try:
publisher = mi.publisher if not mi.is_null('publisher') else alt_publisher
except:
publisher = ''
publisher = escape(publisher)
try:
if is_date_undefined(mi.pubdate):
pubdate = ''
else:
dt = as_local_time(mi.pubdate)
pubdate = strftime('%Y', dt.timetuple())
except:
pubdate = ''
rating = get_rating(mi.rating, output_profile.ratings_char, output_profile.empty_ratings_char)
tags = Tags((mi.tags if mi.tags else alt_tags), output_profile)
comments = mi.comments if mi.comments else alt_comments
comments = comments.strip()
if comments:
comments = comments_to_html(comments)
orig = mi.authors
if mi.is_null('authors'):
mi.authors = list(alt_authors or ('Unknown',))
try:
author = mi.format_authors()
except:
author = ''
mi.authors = orig
author = escape(author)
has_data = {}
def generate_html(comments):
args = {'author': author,
'comments': comments,
'css': css,
'footer': '',
'pubdate': pubdate,
'pubdate_label': 'Published',
'publisher': publisher,
'rating': rating,
'rating_label': 'Rating',
'searchable_tags': ' '.join(escape(t) + 'ttt'
for t in tags.tags_list),
'series': series,
'series_label': 'Series',
'tags': tags,
'tags_label': 'Tags',
'title': title,
'title_str': title_str,
'xmlns': const.XHTML_NS}
for key in mi.custom_field_keys():
m = mi.get_user_metadata(key, False) or {}
try:
display_name, val = mi.format_field_extended(key)[:2]
dkey = key.replace('#', '_')
dt = m.get('datatype')
if dt == 'series':
args[dkey] = Series(mi.get(key), mi.get(key + '_index'))
elif dt == 'rating':
args[dkey] = rating_to_stars(mi.get(key), m.get('display', {}).get('allow_half_stars', False))
elif dt == 'comments':
val = val or ''
display = m.get('display', {})
ctype = display.get('interpret_as') or 'html'
if ctype == 'long-text':
val = '<pre style="white-space:pre-wrap">%s</pre>' % escape(val)
elif ctype == 'short-text':
val = '<span>%s</span>' % escape(val)
elif ctype == 'markdown':
val = markdown(val)
else:
val = comments_to_html(val)
args[dkey] = val
else:
args[dkey] = escape(val)
args[dkey+'_label'] = escape(display_name)
except Exception:
# if the val (custom column contents) is None, don't add to args
pass
if False:
print("Custom column values available in jacket template:")
for key in args.keys():
if key.startswith('_') and not key.endswith('_label'):
print(" %s: %s" % ('#' + key[1:], args[key]))
# Used in the comment describing use of custom columns in templates
# Don't change this unless you also change it in template.xhtml
args['_genre_label'] = args.get('_genre_label', '{_genre_label}')
args['_genre'] = args.get('_genre', '{_genre}')
formatter = SafeFormatter()
generated_html = formatter.format(template, **args)
has_data['series'] = bool(series)
has_data['tags'] = bool(tags)
has_data['rating'] = bool(rating)
has_data['pubdate'] = bool(pubdate)
return strip_encoding_declarations(generated_html)
from ebook_converter.ebooks.oeb.polish.parsing import parse
raw = generate_html(comments)
root = parse(raw, line_numbers=False, force_html5_parse=True)
if rescale_fonts:
# We ensure that the conversion pipeline will set the font sizes for
# text in the jacket to the same size as the font sizes for the rest of
# the text in the book. That means that as long as the jacket uses
# relative font sizes (em or %), the post conversion font size will be
# the same as for text in the main book. So text with size x em will
# be rescaled to the same value in both the jacket and the main content.
#
# We cannot use data-calibre-rescale 100 on the body tag as that will just
# give the body tag a font size of 1em, which is useless.
for body in root.xpath('//*[local-name()="body"]'):
fw = body.makeelement(const.XHTML_DIV)
fw.set('data-calibre-rescale', '100')
for child in body:
fw.append(child)
body.append(fw)
postprocess_jacket(root, output_profile, has_data)
from ebook_converter.ebooks.oeb.polish.pretty import pretty_html_tree
pretty_html_tree(None, root)
return root
# }}}
def linearize_jacket(oeb):
for x in oeb.spine[:4]:
if XPath(JACKET_XPATH)(x.data):
for e in XPath('//h:table|//h:tr|//h:th')(x.data):
e.tag = const.XHTML_DIV
for e in XPath('//h:td')(x.data):
e.tag = const.XHTML_SPAN
break
def referenced_images(root):
for img in XPath('//h:img[@src]')(root):
src = img.get('src')
if src.startswith('file://'):
path = src[7:]
if iswindows and path.startswith('/'):
path = path[1:]
if os.path.exists(path):
yield img, path