mirror of
https://github.com/gryf/ebook-converter.git
synced 2026-04-18 03:53:32 +02:00
Added epub write support
This commit is contained in:
762
ebook_converter/ebooks/covers.py
Normal file
762
ebook_converter/ebooks/covers.py
Normal file
@@ -0,0 +1,762 @@
|
|||||||
|
#!/usr/bin/env python2
|
||||||
|
# vim:fileencoding=utf-8
|
||||||
|
from __future__ import absolute_import, division, print_function, unicode_literals
|
||||||
|
|
||||||
|
__license__ = 'GPL v3'
|
||||||
|
__copyright__ = '2014, Kovid Goyal <kovid at kovidgoyal.net>'
|
||||||
|
|
||||||
|
import re, random, unicodedata, numbers
|
||||||
|
from collections import namedtuple
|
||||||
|
from contextlib import contextmanager
|
||||||
|
from math import ceil, sqrt, cos, sin, atan2
|
||||||
|
from polyglot.builtins import iteritems, itervalues, map, zip, string_or_bytes
|
||||||
|
from itertools import chain
|
||||||
|
|
||||||
|
from PyQt5.Qt import (
|
||||||
|
QImage, Qt, QFont, QPainter, QPointF, QTextLayout, QTextOption,
|
||||||
|
QFontMetrics, QTextCharFormat, QColor, QRect, QBrush, QLinearGradient,
|
||||||
|
QPainterPath, QPen, QRectF, QTransform, QRadialGradient
|
||||||
|
)
|
||||||
|
|
||||||
|
from calibre import force_unicode, fit_image
|
||||||
|
from calibre.constants import __appname__, __version__
|
||||||
|
from calibre.ebooks.metadata import fmt_sidx
|
||||||
|
from calibre.ebooks.metadata.book.base import Metadata
|
||||||
|
from calibre.ebooks.metadata.book.formatter import SafeFormat
|
||||||
|
from calibre.gui2 import ensure_app, config, load_builtin_fonts, pixmap_to_data
|
||||||
|
from calibre.utils.cleantext import clean_ascii_chars, clean_xml_chars
|
||||||
|
from calibre.utils.config import JSONConfig
|
||||||
|
|
||||||
|
# Default settings {{{
|
||||||
|
cprefs = JSONConfig('cover_generation')
|
||||||
|
cprefs.defaults['title_font_size'] = 120 # px
|
||||||
|
cprefs.defaults['subtitle_font_size'] = 80 # px
|
||||||
|
cprefs.defaults['footer_font_size'] = 80 # px
|
||||||
|
cprefs.defaults['cover_width'] = 1200 # px
|
||||||
|
cprefs.defaults['cover_height'] = 1600 # px
|
||||||
|
cprefs.defaults['title_font_family'] = None
|
||||||
|
cprefs.defaults['subtitle_font_family'] = None
|
||||||
|
cprefs.defaults['footer_font_family'] = None
|
||||||
|
cprefs.defaults['color_themes'] = {}
|
||||||
|
cprefs.defaults['disabled_color_themes'] = []
|
||||||
|
cprefs.defaults['disabled_styles'] = []
|
||||||
|
cprefs.defaults['title_template'] = '<b>{title}'
|
||||||
|
cprefs.defaults['subtitle_template'] = '''{series:'test($, strcat("<i>", $, "</i> - ", raw_field("formatted_series_index")), "")'}'''
|
||||||
|
cprefs.defaults['footer_template'] = r'''program:
|
||||||
|
# Show at most two authors, on separate lines.
|
||||||
|
authors = field('authors');
|
||||||
|
num = count(authors, ' & ');
|
||||||
|
authors = sublist(authors, 0, 2, ' & ');
|
||||||
|
authors = list_re(authors, ' & ', '(.+)', '<b>\1');
|
||||||
|
authors = re(authors, ' & ', '<br>');
|
||||||
|
re(authors, '&&', '&')
|
||||||
|
'''
|
||||||
|
Prefs = namedtuple('Prefs', ' '.join(sorted(cprefs.defaults)))
|
||||||
|
|
||||||
|
_use_roman = None
|
||||||
|
|
||||||
|
|
||||||
|
def get_use_roman():
|
||||||
|
global _use_roman
|
||||||
|
if _use_roman is None:
|
||||||
|
return config['use_roman_numerals_for_series_number']
|
||||||
|
return _use_roman
|
||||||
|
|
||||||
|
|
||||||
|
def set_use_roman(val):
|
||||||
|
global _use_roman
|
||||||
|
_use_roman = bool(val)
|
||||||
|
|
||||||
|
# }}}
|
||||||
|
|
||||||
|
|
||||||
|
# Draw text {{{
|
||||||
|
Point = namedtuple('Point', 'x y')
|
||||||
|
|
||||||
|
|
||||||
|
def parse_text_formatting(text):
|
||||||
|
pos = 0
|
||||||
|
tokens = []
|
||||||
|
for m in re.finditer(r'</?([a-zA-Z1-6]+)/?>', text):
|
||||||
|
q = text[pos:m.start()]
|
||||||
|
if q:
|
||||||
|
tokens.append((False, q))
|
||||||
|
tokens.append((True, (m.group(1).lower(), '/' in m.group()[:2])))
|
||||||
|
pos = m.end()
|
||||||
|
if tokens:
|
||||||
|
if text[pos:]:
|
||||||
|
tokens.append((False, text[pos:]))
|
||||||
|
else:
|
||||||
|
tokens = [(False, text)]
|
||||||
|
|
||||||
|
ranges, open_ranges, text = [], [], []
|
||||||
|
offset = 0
|
||||||
|
for is_tag, tok in tokens:
|
||||||
|
if is_tag:
|
||||||
|
tag, closing = tok
|
||||||
|
if closing:
|
||||||
|
if open_ranges:
|
||||||
|
r = open_ranges.pop()
|
||||||
|
r[-1] = offset - r[-2]
|
||||||
|
if r[-1] > 0:
|
||||||
|
ranges.append(r)
|
||||||
|
else:
|
||||||
|
if tag in {'b', 'strong', 'i', 'em'}:
|
||||||
|
open_ranges.append([tag, offset, -1])
|
||||||
|
else:
|
||||||
|
offset += len(tok.replace('&', '&'))
|
||||||
|
text.append(tok)
|
||||||
|
text = ''.join(text)
|
||||||
|
formats = []
|
||||||
|
for tag, start, length in chain(ranges, open_ranges):
|
||||||
|
fmt = QTextCharFormat()
|
||||||
|
if tag in {'b', 'strong'}:
|
||||||
|
fmt.setFontWeight(QFont.Bold)
|
||||||
|
elif tag in {'i', 'em'}:
|
||||||
|
fmt.setFontItalic(True)
|
||||||
|
else:
|
||||||
|
continue
|
||||||
|
if length == -1:
|
||||||
|
length = len(text) - start
|
||||||
|
if length > 0:
|
||||||
|
r = QTextLayout.FormatRange()
|
||||||
|
r.format = fmt
|
||||||
|
r.start, r.length = start, length
|
||||||
|
formats.append(r)
|
||||||
|
return text, formats
|
||||||
|
|
||||||
|
|
||||||
|
class Block(object):
|
||||||
|
|
||||||
|
def __init__(self, text='', width=0, font=None, img=None, max_height=100, align=Qt.AlignCenter):
|
||||||
|
self.layouts = []
|
||||||
|
self._position = Point(0, 0)
|
||||||
|
self.leading = self.line_spacing = 0
|
||||||
|
if font is not None:
|
||||||
|
fm = QFontMetrics(font, img)
|
||||||
|
self.leading = fm.leading()
|
||||||
|
self.line_spacing = fm.lineSpacing()
|
||||||
|
for text in text.split('<br>') if text else ():
|
||||||
|
text, formats = parse_text_formatting(sanitize(text))
|
||||||
|
l = QTextLayout(unescape_formatting(text), font, img)
|
||||||
|
l.setAdditionalFormats(formats)
|
||||||
|
to = QTextOption(align)
|
||||||
|
to.setWrapMode(QTextOption.WrapAtWordBoundaryOrAnywhere)
|
||||||
|
l.setTextOption(to)
|
||||||
|
|
||||||
|
l.beginLayout()
|
||||||
|
height = 0
|
||||||
|
while height + 3*self.leading < max_height:
|
||||||
|
line = l.createLine()
|
||||||
|
if not line.isValid():
|
||||||
|
break
|
||||||
|
line.setLineWidth(width)
|
||||||
|
height += self.leading
|
||||||
|
line.setPosition(QPointF(0, height))
|
||||||
|
height += line.height()
|
||||||
|
max_height -= height
|
||||||
|
l.endLayout()
|
||||||
|
if self.layouts:
|
||||||
|
self.layouts.append(self.leading)
|
||||||
|
else:
|
||||||
|
self._position = Point(l.position().x(), l.position().y())
|
||||||
|
self.layouts.append(l)
|
||||||
|
if self.layouts:
|
||||||
|
self.layouts.append(self.leading)
|
||||||
|
|
||||||
|
@property
|
||||||
|
def height(self):
|
||||||
|
return int(ceil(sum(l if isinstance(l, numbers.Number) else l.boundingRect().height() for l in self.layouts)))
|
||||||
|
|
||||||
|
@property
|
||||||
|
def position(self):
|
||||||
|
return self._position
|
||||||
|
|
||||||
|
@position.setter
|
||||||
|
def position(self, new_pos):
|
||||||
|
(x, y) = new_pos
|
||||||
|
self._position = Point(x, y)
|
||||||
|
if self.layouts:
|
||||||
|
self.layouts[0].setPosition(QPointF(x, y))
|
||||||
|
y += self.layouts[0].boundingRect().height()
|
||||||
|
for l in self.layouts[1:]:
|
||||||
|
if isinstance(l, numbers.Number):
|
||||||
|
y += l
|
||||||
|
else:
|
||||||
|
l.setPosition(QPointF(x, y))
|
||||||
|
y += l.boundingRect().height()
|
||||||
|
|
||||||
|
def draw(self, painter):
|
||||||
|
for l in self.layouts:
|
||||||
|
if hasattr(l, 'draw'):
|
||||||
|
# Etch effect for the text
|
||||||
|
painter.save()
|
||||||
|
painter.setRenderHints(QPainter.TextAntialiasing | QPainter.Antialiasing)
|
||||||
|
painter.save()
|
||||||
|
painter.setPen(QColor(255, 255, 255, 125))
|
||||||
|
l.draw(painter, QPointF(1, 1))
|
||||||
|
painter.restore()
|
||||||
|
l.draw(painter, QPointF())
|
||||||
|
painter.restore()
|
||||||
|
|
||||||
|
|
||||||
|
def layout_text(prefs, img, title, subtitle, footer, max_height, style):
|
||||||
|
width = img.width() - 2 * style.hmargin
|
||||||
|
title, subtitle, footer = title, subtitle, footer
|
||||||
|
title_font = QFont(prefs.title_font_family or 'Liberation Serif')
|
||||||
|
title_font.setPixelSize(prefs.title_font_size)
|
||||||
|
title_font.setStyleStrategy(QFont.PreferAntialias)
|
||||||
|
title_block = Block(title, width, title_font, img, max_height, style.TITLE_ALIGN)
|
||||||
|
title_block.position = style.hmargin, style.vmargin
|
||||||
|
subtitle_block = Block()
|
||||||
|
if subtitle:
|
||||||
|
subtitle_font = QFont(prefs.subtitle_font_family or 'Liberation Sans')
|
||||||
|
subtitle_font.setPixelSize(prefs.subtitle_font_size)
|
||||||
|
subtitle_font.setStyleStrategy(QFont.PreferAntialias)
|
||||||
|
gap = 2 * title_block.leading
|
||||||
|
mh = max_height - title_block.height - gap
|
||||||
|
subtitle_block = Block(subtitle, width, subtitle_font, img, mh, style.SUBTITLE_ALIGN)
|
||||||
|
subtitle_block.position = style.hmargin, title_block.position.y + title_block.height + gap
|
||||||
|
|
||||||
|
footer_font = QFont(prefs.footer_font_family or 'Liberation Serif')
|
||||||
|
footer_font.setStyleStrategy(QFont.PreferAntialias)
|
||||||
|
footer_font.setPixelSize(prefs.footer_font_size)
|
||||||
|
footer_block = Block(footer, width, footer_font, img, max_height, style.FOOTER_ALIGN)
|
||||||
|
footer_block.position = style.hmargin, img.height() - style.vmargin - footer_block.height
|
||||||
|
|
||||||
|
return title_block, subtitle_block, footer_block
|
||||||
|
|
||||||
|
# }}}
|
||||||
|
|
||||||
|
# Format text using templates {{{
|
||||||
|
|
||||||
|
|
||||||
|
def sanitize(s):
|
||||||
|
return unicodedata.normalize('NFC', clean_xml_chars(clean_ascii_chars(force_unicode(s or ''))))
|
||||||
|
|
||||||
|
|
||||||
|
_formatter = None
|
||||||
|
_template_cache = {}
|
||||||
|
|
||||||
|
|
||||||
|
def escape_formatting(val):
|
||||||
|
return val.replace('&', '&').replace('<', '<').replace('>', '>')
|
||||||
|
|
||||||
|
|
||||||
|
def unescape_formatting(val):
|
||||||
|
return val.replace('<', '<').replace('>', '>').replace('&', '&')
|
||||||
|
|
||||||
|
|
||||||
|
class Formatter(SafeFormat):
|
||||||
|
|
||||||
|
def get_value(self, orig_key, args, kwargs):
|
||||||
|
ans = SafeFormat.get_value(self, orig_key, args, kwargs)
|
||||||
|
return escape_formatting(ans)
|
||||||
|
|
||||||
|
|
||||||
|
def formatter():
|
||||||
|
global _formatter
|
||||||
|
if _formatter is None:
|
||||||
|
_formatter = Formatter()
|
||||||
|
return _formatter
|
||||||
|
|
||||||
|
|
||||||
|
def format_fields(mi, prefs):
|
||||||
|
f = formatter()
|
||||||
|
|
||||||
|
def safe_format(field):
|
||||||
|
return f.safe_format(
|
||||||
|
getattr(prefs, field), mi, _('Template error'), mi, template_cache=_template_cache
|
||||||
|
)
|
||||||
|
return map(safe_format, ('title_template', 'subtitle_template', 'footer_template'))
|
||||||
|
|
||||||
|
|
||||||
|
@contextmanager
|
||||||
|
def preserve_fields(obj, fields):
|
||||||
|
if isinstance(fields, string_or_bytes):
|
||||||
|
fields = fields.split()
|
||||||
|
null = object()
|
||||||
|
mem = {f:getattr(obj, f, null) for f in fields}
|
||||||
|
try:
|
||||||
|
yield
|
||||||
|
finally:
|
||||||
|
for f, val in iteritems(mem):
|
||||||
|
if val is null:
|
||||||
|
delattr(obj, f)
|
||||||
|
else:
|
||||||
|
setattr(obj, f, val)
|
||||||
|
|
||||||
|
|
||||||
|
def format_text(mi, prefs):
|
||||||
|
with preserve_fields(mi, 'authors formatted_series_index'):
|
||||||
|
mi.authors = [a for a in mi.authors if a != _('Unknown')]
|
||||||
|
mi.formatted_series_index = fmt_sidx(mi.series_index or 0, use_roman=get_use_roman())
|
||||||
|
return tuple(format_fields(mi, prefs))
|
||||||
|
# }}}
|
||||||
|
|
||||||
|
|
||||||
|
# Colors {{{
|
||||||
|
ColorTheme = namedtuple('ColorTheme', 'color1 color2 contrast_color1 contrast_color2')
|
||||||
|
|
||||||
|
|
||||||
|
def to_theme(x):
|
||||||
|
return {k:v for k, v in zip(ColorTheme._fields[:4], x.split())}
|
||||||
|
|
||||||
|
|
||||||
|
fallback_colors = to_theme('ffffff 000000 000000 ffffff')
|
||||||
|
|
||||||
|
default_color_themes = {
|
||||||
|
'Earth' : to_theme('e8d9ac c7b07b 564628 382d1a'),
|
||||||
|
'Grass' : to_theme('d8edb5 abc8a4 375d3b 183128'),
|
||||||
|
'Water' : to_theme('d3dcf2 829fe4 00448d 00305a'),
|
||||||
|
'Silver': to_theme('e6f1f5 aab3b6 6e7476 3b3e40'),
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def theme_to_colors(theme):
|
||||||
|
colors = {k:QColor('#' + theme[k]) for k in ColorTheme._fields}
|
||||||
|
return ColorTheme(**colors)
|
||||||
|
|
||||||
|
|
||||||
|
def load_color_themes(prefs):
|
||||||
|
t = default_color_themes.copy()
|
||||||
|
t.update(prefs.color_themes)
|
||||||
|
disabled = frozenset(prefs.disabled_color_themes)
|
||||||
|
ans = [theme_to_colors(v) for k, v in iteritems(t) if k not in disabled]
|
||||||
|
if not ans:
|
||||||
|
# Ignore disabled and return only the builtin color themes
|
||||||
|
ans = [theme_to_colors(v) for k, v in iteritems(default_color_themes)]
|
||||||
|
return ans
|
||||||
|
|
||||||
|
|
||||||
|
def color(color_theme, name):
|
||||||
|
ans = getattr(color_theme, name)
|
||||||
|
if not ans.isValid():
|
||||||
|
ans = QColor('#' + fallback_colors[name])
|
||||||
|
return ans
|
||||||
|
|
||||||
|
# }}}
|
||||||
|
|
||||||
|
# Styles {{{
|
||||||
|
|
||||||
|
|
||||||
|
class Style(object):
|
||||||
|
|
||||||
|
TITLE_ALIGN = SUBTITLE_ALIGN = FOOTER_ALIGN = Qt.AlignHCenter | Qt.AlignTop
|
||||||
|
|
||||||
|
def __init__(self, color_theme, prefs):
|
||||||
|
self.load_colors(color_theme)
|
||||||
|
self.calculate_margins(prefs)
|
||||||
|
|
||||||
|
def calculate_margins(self, prefs):
|
||||||
|
self.hmargin = int((50 / 600) * prefs.cover_width)
|
||||||
|
self.vmargin = int((50 / 800) * prefs.cover_height)
|
||||||
|
|
||||||
|
def load_colors(self, color_theme):
|
||||||
|
self.color1 = color(color_theme, 'color1')
|
||||||
|
self.color2 = color(color_theme, 'color2')
|
||||||
|
self.ccolor1 = color(color_theme, 'contrast_color1')
|
||||||
|
self.ccolor2 = color(color_theme, 'contrast_color2')
|
||||||
|
|
||||||
|
|
||||||
|
class Cross(Style):
|
||||||
|
|
||||||
|
NAME = 'The Cross'
|
||||||
|
GUI_NAME = _('The Cross')
|
||||||
|
|
||||||
|
def __call__(self, painter, rect, color_theme, title_block, subtitle_block, footer_block):
|
||||||
|
painter.fillRect(rect, self.color1)
|
||||||
|
r = QRect(0, int(title_block.position.y), rect.width(),
|
||||||
|
title_block.height + subtitle_block.height + subtitle_block.line_spacing // 2 + title_block.leading)
|
||||||
|
painter.save()
|
||||||
|
p = QPainterPath()
|
||||||
|
p.addRoundedRect(QRectF(r), 10, 10 * r.width()/r.height(), Qt.RelativeSize)
|
||||||
|
painter.setClipPath(p)
|
||||||
|
painter.setRenderHint(QPainter.Antialiasing)
|
||||||
|
painter.fillRect(r, self.color2)
|
||||||
|
painter.restore()
|
||||||
|
r = QRect(0, 0, int(title_block.position.x), rect.height())
|
||||||
|
painter.fillRect(r, self.color2)
|
||||||
|
return self.ccolor2, self.ccolor2, self.ccolor1
|
||||||
|
|
||||||
|
|
||||||
|
class Half(Style):
|
||||||
|
|
||||||
|
NAME = 'Half and Half'
|
||||||
|
GUI_NAME = _('Half and half')
|
||||||
|
|
||||||
|
def __call__(self, painter, rect, color_theme, title_block, subtitle_block, footer_block):
|
||||||
|
g = QLinearGradient(QPointF(0, 0), QPointF(0, rect.height()))
|
||||||
|
g.setStops([(0, self.color1), (0.7, self.color2), (1, self.color1)])
|
||||||
|
painter.fillRect(rect, QBrush(g))
|
||||||
|
return self.ccolor1, self.ccolor1, self.ccolor1
|
||||||
|
|
||||||
|
|
||||||
|
def rotate_vector(angle, x, y):
|
||||||
|
return x * cos(angle) - y * sin(angle), x * sin(angle) + y * cos(angle)
|
||||||
|
|
||||||
|
|
||||||
|
def draw_curved_line(painter_path, dx, dy, c1_frac, c1_amp, c2_frac, c2_amp):
|
||||||
|
length = sqrt(dx * dx + dy * dy)
|
||||||
|
angle = atan2(dy, dx)
|
||||||
|
c1 = QPointF(*rotate_vector(angle, c1_frac * length, c1_amp * length))
|
||||||
|
c2 = QPointF(*rotate_vector(angle, c2_frac * length, c2_amp * length))
|
||||||
|
pos = painter_path.currentPosition()
|
||||||
|
painter_path.cubicTo(pos + c1, pos + c2, pos + QPointF(dx, dy))
|
||||||
|
|
||||||
|
|
||||||
|
class Banner(Style):
|
||||||
|
|
||||||
|
NAME = 'Banner'
|
||||||
|
GUI_NAME = _('Banner')
|
||||||
|
GRADE = 0.07
|
||||||
|
|
||||||
|
def calculate_margins(self, prefs):
|
||||||
|
Style.calculate_margins(self, prefs)
|
||||||
|
self.hmargin = int(0.15 * prefs.cover_width)
|
||||||
|
self.fold_width = int(0.1 * prefs.cover_width)
|
||||||
|
|
||||||
|
def __call__(self, painter, rect, color_theme, title_block, subtitle_block, footer_block):
|
||||||
|
painter.fillRect(rect, self.color1)
|
||||||
|
top = title_block.position.y + 2
|
||||||
|
extra_spacing = subtitle_block.line_spacing // 2 if subtitle_block.line_spacing else title_block.line_spacing // 3
|
||||||
|
height = title_block.height + subtitle_block.height + extra_spacing + title_block.leading
|
||||||
|
right = rect.right() - self.hmargin
|
||||||
|
width = right - self.hmargin
|
||||||
|
|
||||||
|
# Draw main banner
|
||||||
|
p = main = QPainterPath(QPointF(self.hmargin, top))
|
||||||
|
draw_curved_line(p, rect.width() - 2 * self.hmargin, 0, 0.1, -0.1, 0.9, -0.1)
|
||||||
|
deltax = self.GRADE * height
|
||||||
|
p.lineTo(right + deltax, top + height)
|
||||||
|
right_corner = p.currentPosition()
|
||||||
|
draw_curved_line(p, - width - 2 * deltax, 0, 0.1, 0.05, 0.9, 0.05)
|
||||||
|
left_corner = p.currentPosition()
|
||||||
|
p.closeSubpath()
|
||||||
|
|
||||||
|
# Draw fold rectangles
|
||||||
|
rwidth = self.fold_width
|
||||||
|
yfrac = 0.1
|
||||||
|
width23 = int(0.67 * rwidth)
|
||||||
|
rtop = top + height * yfrac
|
||||||
|
|
||||||
|
def draw_fold(x, m=1, corner=left_corner):
|
||||||
|
ans = p = QPainterPath(QPointF(x, rtop))
|
||||||
|
draw_curved_line(p, rwidth*m, 0, 0.1, 0.1*m, 0.5, -0.2*m)
|
||||||
|
fold_upper = p.currentPosition()
|
||||||
|
p.lineTo(p.currentPosition() + QPointF(-deltax*m, height))
|
||||||
|
fold_corner = p.currentPosition()
|
||||||
|
draw_curved_line(p, -rwidth*m, 0, 0.2, -0.1*m, 0.8, -0.1*m)
|
||||||
|
draw_curved_line(p, deltax*m, -height, 0.2, 0.1*m, 0.8, 0.1*m)
|
||||||
|
p = inner_fold = QPainterPath(corner)
|
||||||
|
dp = fold_corner - p.currentPosition()
|
||||||
|
draw_curved_line(p, dp.x(), dp.y(), 0.5, 0.3*m, 1, 0*m)
|
||||||
|
p.lineTo(fold_upper), p.closeSubpath()
|
||||||
|
return ans, inner_fold
|
||||||
|
|
||||||
|
left_fold, left_inner = draw_fold(self.hmargin - width23)
|
||||||
|
right_fold, right_inner = draw_fold(right + width23, m=-1, corner=right_corner)
|
||||||
|
|
||||||
|
painter.save()
|
||||||
|
painter.setRenderHint(QPainter.Antialiasing)
|
||||||
|
pen = QPen(self.ccolor2)
|
||||||
|
pen.setWidth(3)
|
||||||
|
pen.setJoinStyle(Qt.RoundJoin)
|
||||||
|
painter.setPen(pen)
|
||||||
|
for r in (left_fold, right_fold):
|
||||||
|
painter.fillPath(r, QBrush(self.color2))
|
||||||
|
painter.drawPath(r)
|
||||||
|
for r in (left_inner, right_inner):
|
||||||
|
painter.fillPath(r, QBrush(self.color2.darker()))
|
||||||
|
painter.drawPath(r)
|
||||||
|
painter.fillPath(main, QBrush(self.color2))
|
||||||
|
painter.drawPath(main)
|
||||||
|
painter.restore()
|
||||||
|
return self.ccolor2, self.ccolor2, self.ccolor1
|
||||||
|
|
||||||
|
|
||||||
|
class Ornamental(Style):
|
||||||
|
|
||||||
|
NAME = 'Ornamental'
|
||||||
|
GUI_NAME = _('Ornamental')
|
||||||
|
|
||||||
|
# SVG vectors {{{
|
||||||
|
CORNER_VECTOR = "m 67.791903,64.260958 c -4.308097,-2.07925 -4.086719,-8.29575 0.334943,-9.40552 4.119758,-1.03399 8.732363,5.05239 5.393055,7.1162 -0.55,0.33992 -1,1.04147 -1,1.55902 0,1.59332 2.597425,1.04548 5.365141,-1.1316 1.999416,-1.57274 2.634859,-2.96609 2.634859,-5.7775 0,-9.55787 -9.827495,-13.42961 -24.43221,-9.62556 -3.218823,0.83839 -5.905663,1.40089 -5.970755,1.25 -0.06509,-0.1509 -0.887601,-1.19493 -1.827799,-2.32007 -1.672708,-2.00174 -1.636693,-2.03722 1.675668,-1.65052 1.861815,0.21736 6.685863,-0.35719 10.720107,-1.27678 12.280767,-2.79934 20.195487,-0.0248 22.846932,8.0092 3.187273,9.65753 -6.423297,17.7497 -15.739941,13.25313 z m 49.881417,-20.53932 c -3.19204,-2.701 -3.72967,-6.67376 -1.24009,-9.16334 2.48236,-2.48236 5.35141,-2.67905 7.51523,-0.51523 1.85966,1.85966 2.07045,6.52954 0.37143,8.22857 -2.04025,2.04024 3.28436,1.44595 6.92316,-0.77272 9.66959,-5.89579 0.88581,-18.22422 -13.0777,-18.35516 -5.28594,-0.0496 -10.31098,1.88721 -14.26764,5.4991 -1.98835,1.81509 -2.16454,1.82692 -2.7936,0.18763 -0.40973,-1.06774 0.12141,-2.82197 1.3628,-4.50104 2.46349,-3.33205 1.67564,-4.01299 -2.891784,-2.49938 -2.85998,0.94777 -3.81038,2.05378 -5.59837,6.51495 -1.184469,2.95536 -3.346819,6.86882 -4.805219,8.69657 -1.4584,1.82776 -2.65164,4.02223 -2.65164,4.87662 0,3.24694 -4.442667,0.59094 -5.872557,-3.51085 -1.361274,-3.90495 0.408198,-8.63869 4.404043,-11.78183 5.155844,-4.05558 1.612374,-3.42079 -9.235926,1.65457 -12.882907,6.02725 -16.864953,7.18038 -24.795556,7.18038 -8.471637,0 -13.38802,-1.64157 -17.634617,-5.88816 -2.832233,-2.83224 -3.849773,-4.81378 -4.418121,-8.6038 -1.946289,-12.9787795 8.03227,-20.91713135 19.767685,-15.7259993 5.547225,2.4538018 6.993631,6.1265383 3.999564,10.1557393 -5.468513,7.35914 -15.917883,-0.19431 -10.657807,-7.7041155 1.486298,-2.1219878 1.441784,-2.2225068 -0.984223,-2.2225068 -1.397511,0 -4.010527,1.3130878 -5.806704,2.9179718 -2.773359,2.4779995 -3.265777,3.5977995 -3.265777,7.4266705 0,5.10943 2.254112,8.84197 7.492986,12.40748 8.921325,6.07175 19.286666,5.61396 37.12088,-1.63946 15.35037,-6.24321 21.294999,-7.42408 34.886123,-6.92999 11.77046,0.4279 19.35803,3.05537 24.34054,8.42878 4.97758,5.3681 2.53939,13.58271 -4.86733,16.39873 -4.17361,1.58681 -11.00702,1.19681 -13.31978,-0.76018 z m 26.50156,-0.0787 c -2.26347,-2.50111 -2.07852,-7.36311 0.39995,-10.51398 2.68134,-3.40877 10.49035,-5.69409 18.87656,-5.52426 l 6.5685,0.13301 -7.84029,0.82767 c -8.47925,0.89511 -12.76997,2.82233 -16.03465,7.20213 -1.92294,2.57976 -1.96722,3.00481 -0.57298,5.5 1.00296,1.79495 2.50427,2.81821 4.46514,3.04333 2.92852,0.33623 2.93789,0.32121 1.08045,-1.73124 -1.53602,-1.69728 -1.64654,-2.34411 -0.61324,-3.58916 2.84565,-3.4288 7.14497,-0.49759 5.03976,3.43603 -1.86726,3.48903 -8.65528,4.21532 -11.3692,1.21647 z m -4.17462,-14.20302 c -0.38836,-0.62838 -0.23556,-1.61305 0.33954,-2.18816 1.3439,-1.34389 4.47714,-0.17168 3.93038,1.47045 -0.5566,1.67168 -3.38637,2.14732 -4.26992,0.71771 z m -8.48037,-9.1829 c -12.462,-4.1101 -12.53952,-4.12156 -25.49998,-3.7694 -24.020921,0.65269 -32.338219,0.31756 -37.082166,-1.49417 -5.113999,-1.95305 -8.192504,-6.3647405 -6.485463,-9.2940713 0.566827,-0.972691 1.020091,-1.181447 1.037211,-0.477701 0.01685,0.692606 1.268676,1.2499998 2.807321,1.2499998 1.685814,0 4.868609,1.571672 8.10041,4.0000015 4.221481,3.171961 6.182506,3.999221 9.473089,3.996261 l 4.149585,-0.004 -3.249996,-1.98156 c -3.056252,-1.863441 -4.051566,-3.8760635 -2.623216,-5.3044145 0.794,-0.794 6.188222,1.901516 9.064482,4.5295635 1.858669,1.698271 3.461409,1.980521 10.559493,1.859621 11.30984,-0.19266 20.89052,1.29095 31.97905,4.95208 7.63881,2.52213 11.51931,3.16471 22.05074,3.65141 7.02931,0.32486 13.01836,0.97543 13.30902,1.44571 0.29065,0.47029 -5.2356,0.83436 -12.28056,0.80906 -12.25942,-0.044 -13.34537,-0.2229 -25.30902,-4.16865 z" # noqa
|
||||||
|
# }}}
|
||||||
|
PATH_CACHE = {}
|
||||||
|
VIEWPORT = (400, 500)
|
||||||
|
|
||||||
|
def calculate_margins(self, prefs):
|
||||||
|
self.hmargin = int((51 / self.VIEWPORT[0]) * prefs.cover_width)
|
||||||
|
self.vmargin = int((83 / self.VIEWPORT[1]) * prefs.cover_height)
|
||||||
|
|
||||||
|
def __call__(self, painter, rect, color_theme, title_block, subtitle_block, footer_block):
|
||||||
|
if not self.PATH_CACHE:
|
||||||
|
from calibre.utils.speedups import svg_path_to_painter_path
|
||||||
|
try:
|
||||||
|
self.__class__.PATH_CACHE['corner'] = svg_path_to_painter_path(self.CORNER_VECTOR)
|
||||||
|
except Exception:
|
||||||
|
import traceback
|
||||||
|
traceback.print_exc()
|
||||||
|
p = painter
|
||||||
|
painter.setRenderHint(QPainter.Antialiasing)
|
||||||
|
g = QRadialGradient(QPointF(rect.center()), rect.width())
|
||||||
|
g.setColorAt(0, self.color1), g.setColorAt(1, self.color2)
|
||||||
|
painter.fillRect(rect, QBrush(g))
|
||||||
|
painter.save()
|
||||||
|
painter.setWindow(0, 0, *self.VIEWPORT)
|
||||||
|
try:
|
||||||
|
path = self.PATH_CACHE['corner']
|
||||||
|
except KeyError:
|
||||||
|
path = QPainterPath()
|
||||||
|
pen = p.pen()
|
||||||
|
pen.setColor(self.ccolor1)
|
||||||
|
p.setPen(pen)
|
||||||
|
|
||||||
|
def corner():
|
||||||
|
b = QBrush(self.ccolor1)
|
||||||
|
p.fillPath(path, b)
|
||||||
|
p.rotate(90), p.translate(100, -100), p.scale(1, -1), p.translate(-103, -97)
|
||||||
|
p.fillPath(path, b)
|
||||||
|
p.setWorldTransform(QTransform())
|
||||||
|
# Top-left corner
|
||||||
|
corner()
|
||||||
|
# Top right corner
|
||||||
|
p.scale(-1, 1), p.translate(-400, 0), corner()
|
||||||
|
# Bottom left corner
|
||||||
|
p.scale(1, -1), p.translate(0, -500), corner()
|
||||||
|
# Bottom right corner
|
||||||
|
p.scale(-1, -1), p.translate(-400, -500), corner()
|
||||||
|
for y in (28.4, 471.7):
|
||||||
|
p.drawLine(QPointF(160, y), QPointF(240, y))
|
||||||
|
for x in (31.3, 368.7):
|
||||||
|
p.drawLine(QPointF(x, 155), QPointF(x, 345))
|
||||||
|
pen.setWidthF(1.8)
|
||||||
|
p.setPen(pen)
|
||||||
|
for y in (23.8, 476.7):
|
||||||
|
p.drawLine(QPointF(160, y), QPointF(240, y))
|
||||||
|
for x in (26.3, 373.7):
|
||||||
|
p.drawLine(QPointF(x, 155), QPointF(x, 345))
|
||||||
|
painter.restore()
|
||||||
|
|
||||||
|
return self.ccolor2, self.ccolor2, self.ccolor1
|
||||||
|
|
||||||
|
|
||||||
|
class Blocks(Style):
|
||||||
|
|
||||||
|
NAME = 'Blocks'
|
||||||
|
GUI_NAME = _('Blocks')
|
||||||
|
FOOTER_ALIGN = Qt.AlignRight | Qt.AlignTop
|
||||||
|
|
||||||
|
def __call__(self, painter, rect, color_theme, title_block, subtitle_block, footer_block):
|
||||||
|
painter.fillRect(rect, self.color1)
|
||||||
|
y = rect.height() - rect.height() // 3
|
||||||
|
r = QRect(rect)
|
||||||
|
r.setBottom(y)
|
||||||
|
painter.fillRect(rect, self.color1)
|
||||||
|
r = QRect(rect)
|
||||||
|
r.setTop(y)
|
||||||
|
painter.fillRect(r, self.color2)
|
||||||
|
return self.ccolor1, self.ccolor1, self.ccolor2
|
||||||
|
|
||||||
|
|
||||||
|
def all_styles():
|
||||||
|
return set(
|
||||||
|
x.NAME for x in itervalues(globals()) if
|
||||||
|
isinstance(x, type) and issubclass(x, Style) and x is not Style
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def load_styles(prefs, respect_disabled=True):
|
||||||
|
disabled = frozenset(prefs.disabled_styles) if respect_disabled else ()
|
||||||
|
ans = tuple(x for x in itervalues(globals()) if
|
||||||
|
isinstance(x, type) and issubclass(x, Style) and x is not Style and x.NAME not in disabled)
|
||||||
|
if not ans and disabled:
|
||||||
|
# If all styles have been disabled, ignore the disabling and return all
|
||||||
|
# the styles
|
||||||
|
ans = load_styles(prefs, respect_disabled=False)
|
||||||
|
return ans
|
||||||
|
|
||||||
|
# }}}
|
||||||
|
|
||||||
|
|
||||||
|
def init_environment():
|
||||||
|
ensure_app()
|
||||||
|
load_builtin_fonts()
|
||||||
|
|
||||||
|
|
||||||
|
def generate_cover(mi, prefs=None, as_qimage=False):
|
||||||
|
init_environment()
|
||||||
|
prefs = prefs or cprefs
|
||||||
|
prefs = {k:prefs.get(k) for k in cprefs.defaults}
|
||||||
|
prefs = Prefs(**prefs)
|
||||||
|
color_theme = random.choice(load_color_themes(prefs))
|
||||||
|
style = random.choice(load_styles(prefs))(color_theme, prefs)
|
||||||
|
title, subtitle, footer = format_text(mi, prefs)
|
||||||
|
img = QImage(prefs.cover_width, prefs.cover_height, QImage.Format_ARGB32)
|
||||||
|
title_block, subtitle_block, footer_block = layout_text(
|
||||||
|
prefs, img, title, subtitle, footer, img.height() // 3, style)
|
||||||
|
p = QPainter(img)
|
||||||
|
rect = QRect(0, 0, img.width(), img.height())
|
||||||
|
colors = style(p, rect, color_theme, title_block, subtitle_block, footer_block)
|
||||||
|
for block, color in zip((title_block, subtitle_block, footer_block), colors):
|
||||||
|
p.setPen(color)
|
||||||
|
block.draw(p)
|
||||||
|
p.end()
|
||||||
|
img.setText('Generated cover', '%s %s' % (__appname__, __version__))
|
||||||
|
if as_qimage:
|
||||||
|
return img
|
||||||
|
return pixmap_to_data(img)
|
||||||
|
|
||||||
|
|
||||||
|
def override_prefs(base_prefs, **overrides):
|
||||||
|
ans = {k:overrides.get(k, base_prefs[k]) for k in cprefs.defaults}
|
||||||
|
override_color_theme = overrides.get('override_color_theme')
|
||||||
|
if override_color_theme is not None:
|
||||||
|
all_themes = set(default_color_themes) | set(ans['color_themes'])
|
||||||
|
if override_color_theme in all_themes:
|
||||||
|
all_themes.discard(override_color_theme)
|
||||||
|
ans['disabled_color_themes'] = all_themes
|
||||||
|
override_style = overrides.get('override_style')
|
||||||
|
if override_style is not None:
|
||||||
|
styles = all_styles()
|
||||||
|
if override_style in styles:
|
||||||
|
styles.discard(override_style)
|
||||||
|
ans['disabled_styles'] = styles
|
||||||
|
|
||||||
|
return ans
|
||||||
|
|
||||||
|
|
||||||
|
def create_cover(title, authors, series=None, series_index=1, prefs=None, as_qimage=False):
|
||||||
|
' Create a cover from the specified title, author and series. Any user set'
|
||||||
|
' templates are ignored, to ensure that the specified metadata is used. '
|
||||||
|
mi = Metadata(title, authors)
|
||||||
|
if series:
|
||||||
|
mi.series, mi.series_index = series, series_index
|
||||||
|
d = cprefs.defaults
|
||||||
|
prefs = override_prefs(
|
||||||
|
prefs or cprefs, title_template=d['title_template'], subtitle_template=d['subtitle_template'], footer_template=d['footer_template'])
|
||||||
|
return generate_cover(mi, prefs=prefs, as_qimage=as_qimage)
|
||||||
|
|
||||||
|
|
||||||
|
def calibre_cover2(title, author_string='', series_string='', prefs=None, as_qimage=False, logo_path=None):
|
||||||
|
init_environment()
|
||||||
|
title, subtitle, footer = '<b>' + escape_formatting(title), '<i>' + escape_formatting(series_string), '<b>' + escape_formatting(author_string)
|
||||||
|
prefs = prefs or cprefs
|
||||||
|
prefs = {k:prefs.get(k) for k in cprefs.defaults}
|
||||||
|
scale = 800. / prefs['cover_height']
|
||||||
|
scale_cover(prefs, scale)
|
||||||
|
prefs = Prefs(**prefs)
|
||||||
|
img = QImage(prefs.cover_width, prefs.cover_height, QImage.Format_ARGB32)
|
||||||
|
img.fill(Qt.white)
|
||||||
|
# colors = to_theme('ffffff ffffff 000000 000000')
|
||||||
|
color_theme = theme_to_colors(fallback_colors)
|
||||||
|
|
||||||
|
class CalibeLogoStyle(Style):
|
||||||
|
NAME = GUI_NAME = 'calibre'
|
||||||
|
|
||||||
|
def __call__(self, painter, rect, color_theme, title_block, subtitle_block, footer_block):
|
||||||
|
top = title_block.position.y + 10
|
||||||
|
extra_spacing = subtitle_block.line_spacing // 2 if subtitle_block.line_spacing else title_block.line_spacing // 3
|
||||||
|
height = title_block.height + subtitle_block.height + extra_spacing + title_block.leading
|
||||||
|
top += height + 25
|
||||||
|
bottom = footer_block.position.y - 50
|
||||||
|
logo = QImage(logo_path or I('library.png'))
|
||||||
|
pwidth, pheight = rect.width(), bottom - top
|
||||||
|
scaled, width, height = fit_image(logo.width(), logo.height(), pwidth, pheight)
|
||||||
|
x, y = (pwidth - width) // 2, (pheight - height) // 2
|
||||||
|
rect = QRect(x, top + y, width, height)
|
||||||
|
painter.setRenderHint(QPainter.SmoothPixmapTransform)
|
||||||
|
painter.drawImage(rect, logo)
|
||||||
|
return self.ccolor1, self.ccolor1, self.ccolor1
|
||||||
|
style = CalibeLogoStyle(color_theme, prefs)
|
||||||
|
title_block, subtitle_block, footer_block = layout_text(
|
||||||
|
prefs, img, title, subtitle, footer, img.height() // 3, style)
|
||||||
|
p = QPainter(img)
|
||||||
|
rect = QRect(0, 0, img.width(), img.height())
|
||||||
|
colors = style(p, rect, color_theme, title_block, subtitle_block, footer_block)
|
||||||
|
for block, color in zip((title_block, subtitle_block, footer_block), colors):
|
||||||
|
p.setPen(color)
|
||||||
|
block.draw(p)
|
||||||
|
p.end()
|
||||||
|
img.setText('Generated cover', '%s %s' % (__appname__, __version__))
|
||||||
|
if as_qimage:
|
||||||
|
return img
|
||||||
|
return pixmap_to_data(img)
|
||||||
|
|
||||||
|
|
||||||
|
def message_image(text, width=500, height=400, font_size=20):
|
||||||
|
init_environment()
|
||||||
|
img = QImage(width, height, QImage.Format_ARGB32)
|
||||||
|
img.fill(Qt.white)
|
||||||
|
p = QPainter(img)
|
||||||
|
f = QFont()
|
||||||
|
f.setPixelSize(font_size)
|
||||||
|
p.setFont(f)
|
||||||
|
r = img.rect().adjusted(10, 10, -10, -10)
|
||||||
|
p.drawText(r, Qt.AlignJustify | Qt.AlignVCenter | Qt.TextWordWrap, text)
|
||||||
|
p.end()
|
||||||
|
return pixmap_to_data(img)
|
||||||
|
|
||||||
|
|
||||||
|
def scale_cover(prefs, scale):
|
||||||
|
for x in ('cover_width', 'cover_height', 'title_font_size', 'subtitle_font_size', 'footer_font_size'):
|
||||||
|
prefs[x] = int(scale * prefs[x])
|
||||||
|
|
||||||
|
|
||||||
|
def generate_masthead(title, output_path=None, width=600, height=60, as_qimage=False, font_family=None):
|
||||||
|
init_environment()
|
||||||
|
font_family = font_family or cprefs['title_font_family'] or 'Liberation Serif'
|
||||||
|
img = QImage(width, height, QImage.Format_ARGB32)
|
||||||
|
img.fill(Qt.white)
|
||||||
|
p = QPainter(img)
|
||||||
|
p.setRenderHints(QPainter.Antialiasing | QPainter.TextAntialiasing)
|
||||||
|
f = QFont(font_family)
|
||||||
|
f.setStyleStrategy(QFont.PreferAntialias)
|
||||||
|
f.setPixelSize((height * 3) // 4), f.setBold(True)
|
||||||
|
p.setFont(f)
|
||||||
|
p.drawText(img.rect(), Qt.AlignLeft | Qt.AlignVCenter, sanitize(title))
|
||||||
|
p.end()
|
||||||
|
if as_qimage:
|
||||||
|
return img
|
||||||
|
data = pixmap_to_data(img)
|
||||||
|
if output_path is None:
|
||||||
|
return data
|
||||||
|
with open(output_path, 'wb') as f:
|
||||||
|
f.write(data)
|
||||||
|
|
||||||
|
|
||||||
|
def test(scale=0.25):
|
||||||
|
from PyQt5.Qt import QLabel, QPixmap, QMainWindow, QWidget, QScrollArea, QGridLayout
|
||||||
|
from calibre.gui2 import Application
|
||||||
|
app = Application([])
|
||||||
|
mi = Metadata('Unknown', ['Kovid Goyal', 'John & Doe', 'Author'])
|
||||||
|
mi.series = 'A series & styles'
|
||||||
|
m = QMainWindow()
|
||||||
|
sa = QScrollArea(m)
|
||||||
|
w = QWidget(m)
|
||||||
|
sa.setWidget(w)
|
||||||
|
l = QGridLayout(w)
|
||||||
|
w.setLayout(l), l.setSpacing(30)
|
||||||
|
scale *= w.devicePixelRatioF()
|
||||||
|
labels = []
|
||||||
|
for r, color in enumerate(sorted(default_color_themes)):
|
||||||
|
for c, style in enumerate(sorted(all_styles())):
|
||||||
|
mi.series_index = c + 1
|
||||||
|
mi.title = 'An algorithmic cover [%s]' % color
|
||||||
|
prefs = override_prefs(cprefs, override_color_theme=color, override_style=style)
|
||||||
|
scale_cover(prefs, scale)
|
||||||
|
img = generate_cover(mi, prefs=prefs, as_qimage=True)
|
||||||
|
img.setDevicePixelRatio(w.devicePixelRatioF())
|
||||||
|
la = QLabel()
|
||||||
|
la.setPixmap(QPixmap.fromImage(img))
|
||||||
|
l.addWidget(la, r, c)
|
||||||
|
labels.append(la)
|
||||||
|
m.setCentralWidget(sa)
|
||||||
|
w.resize(w.sizeHint())
|
||||||
|
m.show()
|
||||||
|
app.exec_()
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
test()
|
||||||
49
ebook_converter/ebooks/epub/__init__.py
Normal file
49
ebook_converter/ebooks/epub/__init__.py
Normal file
@@ -0,0 +1,49 @@
|
|||||||
|
from __future__ import absolute_import, division, print_function, unicode_literals
|
||||||
|
|
||||||
|
__license__ = 'GPL v3'
|
||||||
|
__copyright__ = '2008, Kovid Goyal kovid@kovidgoyal.net'
|
||||||
|
__docformat__ = 'restructuredtext en'
|
||||||
|
|
||||||
|
'''
|
||||||
|
Conversion to EPUB.
|
||||||
|
'''
|
||||||
|
from calibre.utils.zipfile import ZipFile, ZIP_STORED
|
||||||
|
|
||||||
|
|
||||||
|
def rules(stylesheets):
|
||||||
|
for s in stylesheets:
|
||||||
|
if hasattr(s, 'cssText'):
|
||||||
|
for r in s:
|
||||||
|
if r.type == r.STYLE_RULE:
|
||||||
|
yield r
|
||||||
|
|
||||||
|
|
||||||
|
def simple_container_xml(opf_path, extra_entries=''):
|
||||||
|
return '''\
|
||||||
|
<?xml version="1.0"?>
|
||||||
|
<container version="1.0" xmlns="urn:oasis:names:tc:opendocument:xmlns:container">
|
||||||
|
<rootfiles>
|
||||||
|
<rootfile full-path="{0}" media-type="application/oebps-package+xml"/>
|
||||||
|
{extra_entries}
|
||||||
|
</rootfiles>
|
||||||
|
</container>
|
||||||
|
'''.format(opf_path, extra_entries=extra_entries)
|
||||||
|
|
||||||
|
|
||||||
|
def initialize_container(path_to_container, opf_name='metadata.opf',
|
||||||
|
extra_entries=[]):
|
||||||
|
'''
|
||||||
|
Create an empty EPUB document, with a default skeleton.
|
||||||
|
'''
|
||||||
|
rootfiles = ''
|
||||||
|
for path, mimetype, _ in extra_entries:
|
||||||
|
rootfiles += '<rootfile full-path="{0}" media-type="{1}"/>'.format(
|
||||||
|
path, mimetype)
|
||||||
|
CONTAINER = simple_container_xml(opf_name, rootfiles).encode('utf-8')
|
||||||
|
zf = ZipFile(path_to_container, 'w')
|
||||||
|
zf.writestr('mimetype', b'application/epub+zip', compression=ZIP_STORED)
|
||||||
|
zf.writestr('META-INF/', b'', 0o755)
|
||||||
|
zf.writestr('META-INF/container.xml', CONTAINER)
|
||||||
|
for path, _, data in extra_entries:
|
||||||
|
zf.writestr(path, data)
|
||||||
|
return zf
|
||||||
389
ebook_converter/ebooks/oeb/polish/css.py
Normal file
389
ebook_converter/ebooks/oeb/polish/css.py
Normal file
@@ -0,0 +1,389 @@
|
|||||||
|
#!/usr/bin/env python2
|
||||||
|
# vim:fileencoding=utf-8
|
||||||
|
from __future__ import absolute_import, division, print_function, unicode_literals
|
||||||
|
|
||||||
|
__license__ = 'GPL v3'
|
||||||
|
__copyright__ = '2014, Kovid Goyal <kovid at kovidgoyal.net>'
|
||||||
|
|
||||||
|
from collections import defaultdict
|
||||||
|
from functools import partial
|
||||||
|
|
||||||
|
from css_parser.css import CSSRule, CSSStyleDeclaration
|
||||||
|
from css_selectors import parse, SelectorSyntaxError
|
||||||
|
|
||||||
|
from calibre import force_unicode
|
||||||
|
from calibre.ebooks.oeb.base import OEB_STYLES, OEB_DOCS, XHTML, css_text
|
||||||
|
from calibre.ebooks.oeb.normalize_css import normalize_filter_css, normalizers
|
||||||
|
from calibre.ebooks.oeb.polish.pretty import pretty_script_or_style, pretty_xml_tree, serialize
|
||||||
|
from calibre.utils.icu import numeric_sort_key
|
||||||
|
from css_selectors import Select, SelectorError
|
||||||
|
from polyglot.builtins import iteritems, itervalues, unicode_type, filter
|
||||||
|
|
||||||
|
|
||||||
|
def filter_used_rules(rules, log, select):
|
||||||
|
for rule in rules:
|
||||||
|
used = False
|
||||||
|
for selector in rule.selectorList:
|
||||||
|
try:
|
||||||
|
if select.has_matches(selector.selectorText):
|
||||||
|
used = True
|
||||||
|
break
|
||||||
|
except SelectorError:
|
||||||
|
# Cannot parse/execute this selector, be safe and assume it
|
||||||
|
# matches something
|
||||||
|
used = True
|
||||||
|
break
|
||||||
|
if not used:
|
||||||
|
yield rule
|
||||||
|
|
||||||
|
|
||||||
|
def get_imported_sheets(name, container, sheets, recursion_level=10, sheet=None):
|
||||||
|
ans = set()
|
||||||
|
sheet = sheet or sheets[name]
|
||||||
|
for rule in sheet.cssRules.rulesOfType(CSSRule.IMPORT_RULE):
|
||||||
|
if rule.href:
|
||||||
|
iname = container.href_to_name(rule.href, name)
|
||||||
|
if iname in sheets:
|
||||||
|
ans.add(iname)
|
||||||
|
if recursion_level > 0:
|
||||||
|
for imported_sheet in tuple(ans):
|
||||||
|
ans |= get_imported_sheets(imported_sheet, container, sheets, recursion_level=recursion_level-1)
|
||||||
|
ans.discard(name)
|
||||||
|
return ans
|
||||||
|
|
||||||
|
|
||||||
|
def merge_declarations(first, second):
|
||||||
|
for prop in second.getProperties():
|
||||||
|
first.setProperty(prop)
|
||||||
|
|
||||||
|
|
||||||
|
def merge_identical_selectors(sheet):
|
||||||
|
' Merge rules that have identical selectors '
|
||||||
|
selector_map = defaultdict(list)
|
||||||
|
for rule in sheet.cssRules.rulesOfType(CSSRule.STYLE_RULE):
|
||||||
|
selector_map[rule.selectorText].append(rule)
|
||||||
|
remove = []
|
||||||
|
for rule_group in itervalues(selector_map):
|
||||||
|
if len(rule_group) > 1:
|
||||||
|
for i in range(1, len(rule_group)):
|
||||||
|
merge_declarations(rule_group[0].style, rule_group[i].style)
|
||||||
|
remove.append(rule_group[i])
|
||||||
|
for rule in remove:
|
||||||
|
sheet.cssRules.remove(rule)
|
||||||
|
return len(remove)
|
||||||
|
|
||||||
|
|
||||||
|
def remove_unused_css(container, report=None, remove_unused_classes=False, merge_rules=False):
|
||||||
|
'''
|
||||||
|
Remove all unused CSS rules from the book. An unused CSS rule is one that does not match any actual content.
|
||||||
|
|
||||||
|
:param report: An optional callable that takes a single argument. It is called with information about the operations being performed.
|
||||||
|
:param remove_unused_classes: If True, class attributes in the HTML that do not match any CSS rules are also removed.
|
||||||
|
:param merge_rules: If True, rules with identical selectors are merged.
|
||||||
|
'''
|
||||||
|
report = report or (lambda x:x)
|
||||||
|
|
||||||
|
def safe_parse(name):
|
||||||
|
try:
|
||||||
|
return container.parsed(name)
|
||||||
|
except TypeError:
|
||||||
|
pass
|
||||||
|
sheets = {name:safe_parse(name) for name, mt in iteritems(container.mime_map) if mt in OEB_STYLES}
|
||||||
|
sheets = {k:v for k, v in iteritems(sheets) if v is not None}
|
||||||
|
num_merged = 0
|
||||||
|
if merge_rules:
|
||||||
|
for name, sheet in iteritems(sheets):
|
||||||
|
num = merge_identical_selectors(sheet)
|
||||||
|
if num:
|
||||||
|
container.dirty(name)
|
||||||
|
num_merged += num
|
||||||
|
import_map = {name:get_imported_sheets(name, container, sheets) for name in sheets}
|
||||||
|
if remove_unused_classes:
|
||||||
|
class_map = {name:{icu_lower(x) for x in classes_in_rule_list(sheet.cssRules)} for name, sheet in iteritems(sheets)}
|
||||||
|
style_rules = {name:tuple(sheet.cssRules.rulesOfType(CSSRule.STYLE_RULE)) for name, sheet in iteritems(sheets)}
|
||||||
|
|
||||||
|
num_of_removed_rules = num_of_removed_classes = 0
|
||||||
|
|
||||||
|
for name, mt in iteritems(container.mime_map):
|
||||||
|
if mt not in OEB_DOCS:
|
||||||
|
continue
|
||||||
|
root = container.parsed(name)
|
||||||
|
select = Select(root, ignore_inappropriate_pseudo_classes=True)
|
||||||
|
used_classes = set()
|
||||||
|
for style in root.xpath('//*[local-name()="style"]'):
|
||||||
|
if style.get('type', 'text/css') == 'text/css' and style.text:
|
||||||
|
sheet = container.parse_css(style.text)
|
||||||
|
if merge_rules:
|
||||||
|
num = merge_identical_selectors(sheet)
|
||||||
|
if num:
|
||||||
|
num_merged += num
|
||||||
|
container.dirty(name)
|
||||||
|
if remove_unused_classes:
|
||||||
|
used_classes |= {icu_lower(x) for x in classes_in_rule_list(sheet.cssRules)}
|
||||||
|
imports = get_imported_sheets(name, container, sheets, sheet=sheet)
|
||||||
|
for imported_sheet in imports:
|
||||||
|
style_rules[imported_sheet] = tuple(filter_used_rules(style_rules[imported_sheet], container.log, select))
|
||||||
|
if remove_unused_classes:
|
||||||
|
used_classes |= class_map[imported_sheet]
|
||||||
|
rules = tuple(sheet.cssRules.rulesOfType(CSSRule.STYLE_RULE))
|
||||||
|
unused_rules = tuple(filter_used_rules(rules, container.log, select))
|
||||||
|
if unused_rules:
|
||||||
|
num_of_removed_rules += len(unused_rules)
|
||||||
|
[sheet.cssRules.remove(r) for r in unused_rules]
|
||||||
|
style.text = force_unicode(sheet.cssText, 'utf-8')
|
||||||
|
pretty_script_or_style(container, style)
|
||||||
|
container.dirty(name)
|
||||||
|
|
||||||
|
for link in root.xpath('//*[local-name()="link" and @href]'):
|
||||||
|
sname = container.href_to_name(link.get('href'), name)
|
||||||
|
if sname not in sheets:
|
||||||
|
continue
|
||||||
|
style_rules[sname] = tuple(filter_used_rules(style_rules[sname], container.log, select))
|
||||||
|
if remove_unused_classes:
|
||||||
|
used_classes |= class_map[sname]
|
||||||
|
|
||||||
|
for iname in import_map[sname]:
|
||||||
|
style_rules[iname] = tuple(filter_used_rules(style_rules[iname], container.log, select))
|
||||||
|
if remove_unused_classes:
|
||||||
|
used_classes |= class_map[iname]
|
||||||
|
|
||||||
|
if remove_unused_classes:
|
||||||
|
for elem in root.xpath('//*[@class]'):
|
||||||
|
original_classes, classes = elem.get('class', '').split(), []
|
||||||
|
for x in original_classes:
|
||||||
|
if icu_lower(x) in used_classes:
|
||||||
|
classes.append(x)
|
||||||
|
if len(classes) != len(original_classes):
|
||||||
|
if classes:
|
||||||
|
elem.set('class', ' '.join(classes))
|
||||||
|
else:
|
||||||
|
del elem.attrib['class']
|
||||||
|
num_of_removed_classes += len(original_classes) - len(classes)
|
||||||
|
container.dirty(name)
|
||||||
|
|
||||||
|
for name, sheet in iteritems(sheets):
|
||||||
|
unused_rules = style_rules[name]
|
||||||
|
if unused_rules:
|
||||||
|
num_of_removed_rules += len(unused_rules)
|
||||||
|
[sheet.cssRules.remove(r) for r in unused_rules]
|
||||||
|
container.dirty(name)
|
||||||
|
|
||||||
|
num_changes = num_of_removed_rules + num_merged + num_of_removed_classes
|
||||||
|
if num_changes > 0:
|
||||||
|
if num_of_removed_rules > 0:
|
||||||
|
report(ngettext('Removed one unused CSS style rule', 'Removed {} unused CSS style rules',
|
||||||
|
num_of_removed_rules).format(num_of_removed_rules))
|
||||||
|
if num_of_removed_classes > 0:
|
||||||
|
report(ngettext('Removed one unused class from the HTML', 'Removed {} unused classes from the HTML',
|
||||||
|
num_of_removed_classes).format(num_of_removed_classes))
|
||||||
|
if num_merged > 0:
|
||||||
|
report(ngettext('Merged one CSS style rule', 'Merged {} CSS style rules',
|
||||||
|
num_merged).format(num_merged))
|
||||||
|
if num_of_removed_rules == 0:
|
||||||
|
report(_('No unused CSS style rules found'))
|
||||||
|
if remove_unused_classes and num_of_removed_classes == 0:
|
||||||
|
report(_('No unused class attributes found'))
|
||||||
|
if merge_rules and num_merged == 0:
|
||||||
|
report(_('No style rules that could be merged found'))
|
||||||
|
return num_changes > 0
|
||||||
|
|
||||||
|
|
||||||
|
def filter_declaration(style, properties=()):
|
||||||
|
changed = False
|
||||||
|
for prop in properties:
|
||||||
|
if style.removeProperty(prop) != '':
|
||||||
|
changed = True
|
||||||
|
all_props = set(style.keys())
|
||||||
|
for prop in style.getProperties():
|
||||||
|
n = normalizers.get(prop.name, None)
|
||||||
|
if n is not None:
|
||||||
|
normalized = n(prop.name, prop.propertyValue)
|
||||||
|
removed = properties.intersection(set(normalized))
|
||||||
|
if removed:
|
||||||
|
changed = True
|
||||||
|
style.removeProperty(prop.name)
|
||||||
|
for prop in set(normalized) - removed - all_props:
|
||||||
|
style.setProperty(prop, normalized[prop])
|
||||||
|
return changed
|
||||||
|
|
||||||
|
|
||||||
|
def filter_sheet(sheet, properties=()):
|
||||||
|
from css_parser.css import CSSRule
|
||||||
|
changed = False
|
||||||
|
remove = []
|
||||||
|
for rule in sheet.cssRules.rulesOfType(CSSRule.STYLE_RULE):
|
||||||
|
if filter_declaration(rule.style, properties):
|
||||||
|
changed = True
|
||||||
|
if rule.style.length == 0:
|
||||||
|
remove.append(rule)
|
||||||
|
for rule in remove:
|
||||||
|
sheet.cssRules.remove(rule)
|
||||||
|
return changed
|
||||||
|
|
||||||
|
|
||||||
|
def transform_inline_styles(container, name, transform_sheet, transform_style):
|
||||||
|
root = container.parsed(name)
|
||||||
|
changed = False
|
||||||
|
for style in root.xpath('//*[local-name()="style"]'):
|
||||||
|
if style.text and (style.get('type') or 'text/css').lower() == 'text/css':
|
||||||
|
sheet = container.parse_css(style.text)
|
||||||
|
if transform_sheet(sheet):
|
||||||
|
changed = True
|
||||||
|
style.text = force_unicode(sheet.cssText, 'utf-8')
|
||||||
|
pretty_script_or_style(container, style)
|
||||||
|
for elem in root.xpath('//*[@style]'):
|
||||||
|
text = elem.get('style', None)
|
||||||
|
if text:
|
||||||
|
style = container.parse_css(text, is_declaration=True)
|
||||||
|
if transform_style(style):
|
||||||
|
changed = True
|
||||||
|
if style.length == 0:
|
||||||
|
del elem.attrib['style']
|
||||||
|
else:
|
||||||
|
elem.set('style', force_unicode(style.getCssText(separator=' '), 'utf-8'))
|
||||||
|
return changed
|
||||||
|
|
||||||
|
|
||||||
|
def transform_css(container, transform_sheet=None, transform_style=None, names=()):
|
||||||
|
if not names:
|
||||||
|
types = OEB_STYLES | OEB_DOCS
|
||||||
|
names = []
|
||||||
|
for name, mt in iteritems(container.mime_map):
|
||||||
|
if mt in types:
|
||||||
|
names.append(name)
|
||||||
|
|
||||||
|
doc_changed = False
|
||||||
|
|
||||||
|
for name in names:
|
||||||
|
mt = container.mime_map[name]
|
||||||
|
if mt in OEB_STYLES:
|
||||||
|
sheet = container.parsed(name)
|
||||||
|
if transform_sheet(sheet):
|
||||||
|
container.dirty(name)
|
||||||
|
doc_changed = True
|
||||||
|
elif mt in OEB_DOCS:
|
||||||
|
if transform_inline_styles(container, name, transform_sheet, transform_style):
|
||||||
|
container.dirty(name)
|
||||||
|
doc_changed = True
|
||||||
|
|
||||||
|
return doc_changed
|
||||||
|
|
||||||
|
|
||||||
|
def filter_css(container, properties, names=()):
|
||||||
|
'''
|
||||||
|
Remove the specified CSS properties from all CSS rules in the book.
|
||||||
|
|
||||||
|
:param properties: Set of properties to remove. For example: :code:`{'font-family', 'color'}`.
|
||||||
|
:param names: The files from which to remove the properties. Defaults to all HTML and CSS files in the book.
|
||||||
|
'''
|
||||||
|
properties = normalize_filter_css(properties)
|
||||||
|
return transform_css(container, transform_sheet=partial(filter_sheet, properties=properties),
|
||||||
|
transform_style=partial(filter_declaration, properties=properties), names=names)
|
||||||
|
|
||||||
|
|
||||||
|
def _classes_in_selector(selector, classes):
|
||||||
|
for attr in ('selector', 'subselector', 'parsed_tree'):
|
||||||
|
s = getattr(selector, attr, None)
|
||||||
|
if s is not None:
|
||||||
|
_classes_in_selector(s, classes)
|
||||||
|
cn = getattr(selector, 'class_name', None)
|
||||||
|
if cn is not None:
|
||||||
|
classes.add(cn)
|
||||||
|
|
||||||
|
|
||||||
|
def classes_in_selector(text):
|
||||||
|
classes = set()
|
||||||
|
try:
|
||||||
|
for selector in parse(text):
|
||||||
|
_classes_in_selector(selector, classes)
|
||||||
|
except SelectorSyntaxError:
|
||||||
|
pass
|
||||||
|
return classes
|
||||||
|
|
||||||
|
|
||||||
|
def classes_in_rule_list(css_rules):
|
||||||
|
classes = set()
|
||||||
|
for rule in css_rules:
|
||||||
|
if rule.type == rule.STYLE_RULE:
|
||||||
|
classes |= classes_in_selector(rule.selectorText)
|
||||||
|
elif hasattr(rule, 'cssRules'):
|
||||||
|
classes |= classes_in_rule_list(rule.cssRules)
|
||||||
|
return classes
|
||||||
|
|
||||||
|
|
||||||
|
def iter_declarations(sheet_or_rule):
|
||||||
|
if hasattr(sheet_or_rule, 'cssRules'):
|
||||||
|
for rule in sheet_or_rule.cssRules:
|
||||||
|
for x in iter_declarations(rule):
|
||||||
|
yield x
|
||||||
|
elif hasattr(sheet_or_rule, 'style'):
|
||||||
|
yield sheet_or_rule.style
|
||||||
|
elif isinstance(sheet_or_rule, CSSStyleDeclaration):
|
||||||
|
yield sheet_or_rule
|
||||||
|
|
||||||
|
|
||||||
|
def remove_property_value(prop, predicate):
|
||||||
|
''' Remove the Values that match the predicate from this property. If all
|
||||||
|
values of the property would be removed, the property is removed from its
|
||||||
|
parent instead. Note that this means the property must have a parent (a
|
||||||
|
CSSStyleDeclaration). '''
|
||||||
|
removed_vals = list(filter(predicate, prop.propertyValue))
|
||||||
|
if len(removed_vals) == len(prop.propertyValue):
|
||||||
|
prop.parent.removeProperty(prop.name)
|
||||||
|
else:
|
||||||
|
x = css_text(prop.propertyValue)
|
||||||
|
for v in removed_vals:
|
||||||
|
x = x.replace(css_text(v), '').strip()
|
||||||
|
prop.propertyValue.cssText = x
|
||||||
|
return bool(removed_vals)
|
||||||
|
|
||||||
|
|
||||||
|
RULE_PRIORITIES = {t:i for i, t in enumerate((CSSRule.COMMENT, CSSRule.CHARSET_RULE, CSSRule.IMPORT_RULE, CSSRule.NAMESPACE_RULE))}
|
||||||
|
|
||||||
|
|
||||||
|
def sort_sheet(container, sheet_or_text):
|
||||||
|
''' Sort the rules in a stylesheet. Note that in the general case this can
|
||||||
|
change the effective styles, but for most common sheets, it should be safe.
|
||||||
|
'''
|
||||||
|
sheet = container.parse_css(sheet_or_text) if isinstance(sheet_or_text, unicode_type) else sheet_or_text
|
||||||
|
|
||||||
|
def text_sort_key(x):
|
||||||
|
return numeric_sort_key(unicode_type(x or ''))
|
||||||
|
|
||||||
|
def selector_sort_key(x):
|
||||||
|
return (x.specificity, text_sort_key(x.selectorText))
|
||||||
|
|
||||||
|
def rule_sort_key(rule):
|
||||||
|
primary = RULE_PRIORITIES.get(rule.type, len(RULE_PRIORITIES))
|
||||||
|
secondary = text_sort_key(getattr(rule, 'atkeyword', '') or '')
|
||||||
|
tertiary = None
|
||||||
|
if rule.type == CSSRule.STYLE_RULE:
|
||||||
|
primary += 1
|
||||||
|
selectors = sorted(rule.selectorList, key=selector_sort_key)
|
||||||
|
tertiary = selector_sort_key(selectors[0])
|
||||||
|
rule.selectorText = ', '.join(s.selectorText for s in selectors)
|
||||||
|
elif rule.type == CSSRule.FONT_FACE_RULE:
|
||||||
|
try:
|
||||||
|
tertiary = text_sort_key(rule.style.getPropertyValue('font-family'))
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
|
||||||
|
return primary, secondary, tertiary
|
||||||
|
sheet.cssRules.sort(key=rule_sort_key)
|
||||||
|
return sheet
|
||||||
|
|
||||||
|
|
||||||
|
def add_stylesheet_links(container, name, text):
|
||||||
|
root = container.parse_xhtml(text, name)
|
||||||
|
head = root.xpath('//*[local-name() = "head"]')
|
||||||
|
if not head:
|
||||||
|
return
|
||||||
|
head = head[0]
|
||||||
|
sheets = tuple(container.manifest_items_of_type(lambda mt: mt in OEB_STYLES))
|
||||||
|
if not sheets:
|
||||||
|
return
|
||||||
|
for sname in sheets:
|
||||||
|
link = head.makeelement(XHTML('link'), type='text/css', rel='stylesheet', href=container.name_to_href(sname, name))
|
||||||
|
head.append(link)
|
||||||
|
pretty_xml_tree(head)
|
||||||
|
return serialize(root, 'text/html')
|
||||||
404
ebook_converter/ebooks/oeb/polish/replace.py
Normal file
404
ebook_converter/ebooks/oeb/polish/replace.py
Normal file
@@ -0,0 +1,404 @@
|
|||||||
|
#!/usr/bin/env python2
|
||||||
|
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:fdm=marker:ai
|
||||||
|
from __future__ import absolute_import, division, print_function, unicode_literals
|
||||||
|
|
||||||
|
__license__ = 'GPL v3'
|
||||||
|
__copyright__ = '2013, Kovid Goyal <kovid at kovidgoyal.net>'
|
||||||
|
__docformat__ = 'restructuredtext en'
|
||||||
|
|
||||||
|
import codecs, shutil, os, posixpath
|
||||||
|
from polyglot.builtins import iteritems, itervalues, map
|
||||||
|
from functools import partial
|
||||||
|
from collections import Counter, defaultdict
|
||||||
|
|
||||||
|
from calibre import sanitize_file_name
|
||||||
|
from calibre.ebooks.chardet import strip_encoding_declarations
|
||||||
|
from calibre.ebooks.oeb.base import css_text
|
||||||
|
from calibre.ebooks.oeb.polish.css import iter_declarations, remove_property_value
|
||||||
|
from calibre.ebooks.oeb.polish.utils import extract
|
||||||
|
from polyglot.urllib import urlparse, urlunparse
|
||||||
|
|
||||||
|
|
||||||
|
class LinkReplacer(object):
|
||||||
|
|
||||||
|
def __init__(self, base, container, link_map, frag_map):
|
||||||
|
self.base = base
|
||||||
|
self.frag_map = frag_map
|
||||||
|
self.link_map = link_map
|
||||||
|
self.container = container
|
||||||
|
self.replaced = False
|
||||||
|
|
||||||
|
def __call__(self, url):
|
||||||
|
if url and url.startswith('#'):
|
||||||
|
repl = self.frag_map(self.base, url[1:])
|
||||||
|
if not repl or repl == url[1:]:
|
||||||
|
return url
|
||||||
|
self.replaced = True
|
||||||
|
return '#' + repl
|
||||||
|
name = self.container.href_to_name(url, self.base)
|
||||||
|
if not name:
|
||||||
|
return url
|
||||||
|
nname = self.link_map.get(name, None)
|
||||||
|
if not nname:
|
||||||
|
return url
|
||||||
|
purl = urlparse(url)
|
||||||
|
href = self.container.name_to_href(nname, self.base)
|
||||||
|
if purl.fragment:
|
||||||
|
nfrag = self.frag_map(name, purl.fragment)
|
||||||
|
if nfrag:
|
||||||
|
href += '#%s'%nfrag
|
||||||
|
if href != url:
|
||||||
|
self.replaced = True
|
||||||
|
return href
|
||||||
|
|
||||||
|
|
||||||
|
class IdReplacer(object):
|
||||||
|
|
||||||
|
def __init__(self, base, container, id_map):
|
||||||
|
self.base, self.container, self.replaced = base, container, False
|
||||||
|
self.id_map = id_map
|
||||||
|
|
||||||
|
def __call__(self, url):
|
||||||
|
if url and url.startswith('#'):
|
||||||
|
repl = self.id_map.get(self.base, {}).get(url[1:])
|
||||||
|
if repl is None or repl == url[1:]:
|
||||||
|
return url
|
||||||
|
self.replaced = True
|
||||||
|
return '#' + repl
|
||||||
|
name = self.container.href_to_name(url, self.base)
|
||||||
|
if not name:
|
||||||
|
return url
|
||||||
|
id_map = self.id_map.get(name)
|
||||||
|
if id_map is None:
|
||||||
|
return url
|
||||||
|
purl = urlparse(url)
|
||||||
|
nfrag = id_map.get(purl.fragment)
|
||||||
|
if nfrag is None:
|
||||||
|
return url
|
||||||
|
purl = purl._replace(fragment=nfrag)
|
||||||
|
href = urlunparse(purl)
|
||||||
|
if href != url:
|
||||||
|
self.replaced = True
|
||||||
|
return href
|
||||||
|
|
||||||
|
|
||||||
|
class LinkRebaser(object):
|
||||||
|
|
||||||
|
def __init__(self, container, old_name, new_name):
|
||||||
|
self.old_name, self.new_name = old_name, new_name
|
||||||
|
self.container = container
|
||||||
|
self.replaced = False
|
||||||
|
|
||||||
|
def __call__(self, url):
|
||||||
|
if url and url.startswith('#'):
|
||||||
|
return url
|
||||||
|
purl = urlparse(url)
|
||||||
|
frag = purl.fragment
|
||||||
|
name = self.container.href_to_name(url, self.old_name)
|
||||||
|
if not name:
|
||||||
|
return url
|
||||||
|
if name == self.old_name:
|
||||||
|
name = self.new_name
|
||||||
|
href = self.container.name_to_href(name, self.new_name)
|
||||||
|
if frag:
|
||||||
|
href += '#' + frag
|
||||||
|
if href != url:
|
||||||
|
self.replaced = True
|
||||||
|
return href
|
||||||
|
|
||||||
|
|
||||||
|
def replace_links(container, link_map, frag_map=lambda name, frag:frag, replace_in_opf=False):
|
||||||
|
'''
|
||||||
|
Replace links to files in the container. Will iterate over all files in the container and change the specified links in them.
|
||||||
|
|
||||||
|
:param link_map: A mapping of old canonical name to new canonical name. For example: :code:`{'images/old.png': 'images/new.png'}`
|
||||||
|
:param frag_map: A callable that takes two arguments ``(name, anchor)`` and
|
||||||
|
returns a new anchor. This is useful if you need to change the anchors in
|
||||||
|
HTML files. By default, it does nothing.
|
||||||
|
:param replace_in_opf: If False, links are not replaced in the OPF file.
|
||||||
|
|
||||||
|
'''
|
||||||
|
for name, media_type in iteritems(container.mime_map):
|
||||||
|
if name == container.opf_name and not replace_in_opf:
|
||||||
|
continue
|
||||||
|
repl = LinkReplacer(name, container, link_map, frag_map)
|
||||||
|
container.replace_links(name, repl)
|
||||||
|
|
||||||
|
|
||||||
|
def replace_ids(container, id_map):
|
||||||
|
'''
|
||||||
|
Replace all links in the container that pointed to the changed ids.
|
||||||
|
|
||||||
|
:param id_map: A mapping of {name:id_map} where each id_map is a mapping of {old_id:new_id}
|
||||||
|
:return: True iff at least one link was changed
|
||||||
|
|
||||||
|
'''
|
||||||
|
changed = False
|
||||||
|
for name, media_type in iteritems(container.mime_map):
|
||||||
|
repl = IdReplacer(name, container, id_map)
|
||||||
|
container.replace_links(name, repl)
|
||||||
|
if name == container.opf_name:
|
||||||
|
imap = id_map.get(name, {})
|
||||||
|
for item in container.opf_xpath('//*[@idref]'):
|
||||||
|
old_id = item.get('idref')
|
||||||
|
if old_id is not None:
|
||||||
|
new_id = imap.get(old_id)
|
||||||
|
if new_id is not None:
|
||||||
|
item.set('idref', new_id)
|
||||||
|
if repl.replaced:
|
||||||
|
changed = True
|
||||||
|
return changed
|
||||||
|
|
||||||
|
|
||||||
|
def smarten_punctuation(container, report):
|
||||||
|
from calibre.ebooks.conversion.preprocess import smarten_punctuation
|
||||||
|
smartened = False
|
||||||
|
for path in container.spine_items:
|
||||||
|
name = container.abspath_to_name(path)
|
||||||
|
changed = False
|
||||||
|
with container.open(name, 'r+b') as f:
|
||||||
|
html = container.decode(f.read())
|
||||||
|
newhtml = smarten_punctuation(html, container.log)
|
||||||
|
if newhtml != html:
|
||||||
|
changed = True
|
||||||
|
report(_('Smartened punctuation in: %s')%name)
|
||||||
|
newhtml = strip_encoding_declarations(newhtml)
|
||||||
|
f.seek(0)
|
||||||
|
f.truncate()
|
||||||
|
f.write(codecs.BOM_UTF8 + newhtml.encode('utf-8'))
|
||||||
|
if changed:
|
||||||
|
# Add an encoding declaration (it will be added automatically when
|
||||||
|
# serialized)
|
||||||
|
root = container.parsed(name)
|
||||||
|
for m in root.xpath('descendant::*[local-name()="meta" and @http-equiv]'):
|
||||||
|
m.getparent().remove(m)
|
||||||
|
container.dirty(name)
|
||||||
|
smartened = True
|
||||||
|
if not smartened:
|
||||||
|
report(_('No punctuation that could be smartened found'))
|
||||||
|
return smartened
|
||||||
|
|
||||||
|
|
||||||
|
def rename_files(container, file_map):
|
||||||
|
'''
|
||||||
|
Rename files in the container, automatically updating all links to them.
|
||||||
|
|
||||||
|
:param file_map: A mapping of old canonical name to new canonical name, for
|
||||||
|
example: :code:`{'text/chapter1.html': 'chapter1.html'}`.
|
||||||
|
'''
|
||||||
|
overlap = set(file_map).intersection(set(itervalues(file_map)))
|
||||||
|
if overlap:
|
||||||
|
raise ValueError('Circular rename detected. The files %s are both rename targets and destinations' % ', '.join(overlap))
|
||||||
|
for name, dest in iteritems(file_map):
|
||||||
|
if container.exists(dest):
|
||||||
|
if name != dest and name.lower() == dest.lower():
|
||||||
|
# A case change on an OS with a case insensitive file-system.
|
||||||
|
continue
|
||||||
|
raise ValueError('Cannot rename {0} to {1} as {1} already exists'.format(name, dest))
|
||||||
|
if len(tuple(itervalues(file_map))) != len(set(itervalues(file_map))):
|
||||||
|
raise ValueError('Cannot rename, the set of destination files contains duplicates')
|
||||||
|
link_map = {}
|
||||||
|
for current_name, new_name in iteritems(file_map):
|
||||||
|
container.rename(current_name, new_name)
|
||||||
|
if new_name != container.opf_name: # OPF is handled by the container
|
||||||
|
link_map[current_name] = new_name
|
||||||
|
replace_links(container, link_map, replace_in_opf=True)
|
||||||
|
|
||||||
|
|
||||||
|
def replace_file(container, name, path, basename, force_mt=None):
|
||||||
|
dirname, base = name.rpartition('/')[0::2]
|
||||||
|
nname = sanitize_file_name(basename)
|
||||||
|
if dirname:
|
||||||
|
nname = dirname + '/' + nname
|
||||||
|
with open(path, 'rb') as src:
|
||||||
|
if name != nname:
|
||||||
|
count = 0
|
||||||
|
b, e = nname.rpartition('.')[0::2]
|
||||||
|
while container.exists(nname):
|
||||||
|
count += 1
|
||||||
|
nname = b + ('_%d.%s' % (count, e))
|
||||||
|
rename_files(container, {name:nname})
|
||||||
|
mt = force_mt or container.guess_type(nname)
|
||||||
|
container.mime_map[nname] = mt
|
||||||
|
for itemid, q in iteritems(container.manifest_id_map):
|
||||||
|
if q == nname:
|
||||||
|
for item in container.opf_xpath('//opf:manifest/opf:item[@href and @id="%s"]' % itemid):
|
||||||
|
item.set('media-type', mt)
|
||||||
|
container.dirty(container.opf_name)
|
||||||
|
with container.open(nname, 'wb') as dest:
|
||||||
|
shutil.copyfileobj(src, dest)
|
||||||
|
|
||||||
|
|
||||||
|
def mt_to_category(container, mt):
|
||||||
|
from calibre.ebooks.oeb.polish.utils import guess_type
|
||||||
|
from calibre.ebooks.oeb.polish.container import OEB_FONTS
|
||||||
|
from calibre.ebooks.oeb.base import OEB_DOCS, OEB_STYLES
|
||||||
|
if mt in OEB_DOCS:
|
||||||
|
category = 'text'
|
||||||
|
elif mt in OEB_STYLES:
|
||||||
|
category = 'style'
|
||||||
|
elif mt in OEB_FONTS:
|
||||||
|
category = 'font'
|
||||||
|
elif mt == guess_type('a.opf'):
|
||||||
|
category = 'opf'
|
||||||
|
elif mt == guess_type('a.ncx'):
|
||||||
|
category = 'toc'
|
||||||
|
else:
|
||||||
|
category = mt.partition('/')[0]
|
||||||
|
return category
|
||||||
|
|
||||||
|
|
||||||
|
def get_recommended_folders(container, names):
|
||||||
|
''' Return the folders that are recommended for the given filenames. The
|
||||||
|
recommendation is based on where the majority of files of the same type are
|
||||||
|
located in the container. If no files of a particular type are present, the
|
||||||
|
recommended folder is assumed to be the folder containing the OPF file. '''
|
||||||
|
from calibre.ebooks.oeb.polish.utils import guess_type
|
||||||
|
counts = defaultdict(Counter)
|
||||||
|
for name, mt in iteritems(container.mime_map):
|
||||||
|
folder = name.rpartition('/')[0] if '/' in name else ''
|
||||||
|
counts[mt_to_category(container, mt)][folder] += 1
|
||||||
|
|
||||||
|
try:
|
||||||
|
opf_folder = counts['opf'].most_common(1)[0][0]
|
||||||
|
except KeyError:
|
||||||
|
opf_folder = ''
|
||||||
|
|
||||||
|
recommendations = {category:counter.most_common(1)[0][0] for category, counter in iteritems(counts)}
|
||||||
|
return {n:recommendations.get(mt_to_category(container, guess_type(os.path.basename(n))), opf_folder) for n in names}
|
||||||
|
|
||||||
|
|
||||||
|
def normalize_case(container, val):
|
||||||
|
|
||||||
|
def safe_listdir(x):
|
||||||
|
try:
|
||||||
|
return os.listdir(x)
|
||||||
|
except EnvironmentError:
|
||||||
|
return ()
|
||||||
|
|
||||||
|
parts = val.split('/')
|
||||||
|
ans = []
|
||||||
|
for i in range(len(parts)):
|
||||||
|
q = '/'.join(parts[:i+1])
|
||||||
|
x = container.name_to_abspath(q)
|
||||||
|
xl = parts[i].lower()
|
||||||
|
candidates = [c for c in safe_listdir(os.path.dirname(x)) if c != parts[i] and c.lower() == xl]
|
||||||
|
ans.append(candidates[0] if candidates else parts[i])
|
||||||
|
return '/'.join(ans)
|
||||||
|
|
||||||
|
|
||||||
|
def rationalize_folders(container, folder_type_map):
|
||||||
|
all_names = set(container.mime_map)
|
||||||
|
new_names = set()
|
||||||
|
name_map = {}
|
||||||
|
for key in tuple(folder_type_map):
|
||||||
|
val = folder_type_map[key]
|
||||||
|
folder_type_map[key] = normalize_case(container, val)
|
||||||
|
for name in all_names:
|
||||||
|
if name.startswith('META-INF/'):
|
||||||
|
continue
|
||||||
|
category = mt_to_category(container, container.mime_map[name])
|
||||||
|
folder = folder_type_map.get(category, None)
|
||||||
|
if folder is not None:
|
||||||
|
bn = posixpath.basename(name)
|
||||||
|
new_name = posixpath.join(folder, bn)
|
||||||
|
if new_name != name:
|
||||||
|
c = 0
|
||||||
|
while new_name in all_names or new_name in new_names:
|
||||||
|
c += 1
|
||||||
|
n, ext = bn.rpartition('.')[0::2]
|
||||||
|
new_name = posixpath.join(folder, '%s_%d.%s' % (n, c, ext))
|
||||||
|
name_map[name] = new_name
|
||||||
|
new_names.add(new_name)
|
||||||
|
return name_map
|
||||||
|
|
||||||
|
|
||||||
|
def remove_links_in_sheet(href_to_name, sheet, predicate):
|
||||||
|
import_rules_to_remove = []
|
||||||
|
changed = False
|
||||||
|
for i, r in enumerate(sheet):
|
||||||
|
if r.type == r.IMPORT_RULE:
|
||||||
|
name = href_to_name(r.href)
|
||||||
|
if predicate(name, r.href, None):
|
||||||
|
import_rules_to_remove.append(i)
|
||||||
|
for i in sorted(import_rules_to_remove, reverse=True):
|
||||||
|
sheet.deleteRule(i)
|
||||||
|
changed = True
|
||||||
|
|
||||||
|
for dec in iter_declarations(sheet):
|
||||||
|
changed = remove_links_in_declaration(href_to_name, dec, predicate) or changed
|
||||||
|
return changed
|
||||||
|
|
||||||
|
|
||||||
|
def remove_links_in_declaration(href_to_name, style, predicate):
|
||||||
|
def check_pval(v):
|
||||||
|
if v.type == v.URI:
|
||||||
|
name = href_to_name(v.uri)
|
||||||
|
return predicate(name, v.uri, None)
|
||||||
|
return False
|
||||||
|
|
||||||
|
changed = False
|
||||||
|
|
||||||
|
for p in tuple(style.getProperties(all=True)):
|
||||||
|
changed = remove_property_value(p, check_pval) or changed
|
||||||
|
return changed
|
||||||
|
|
||||||
|
|
||||||
|
def remove_links_to(container, predicate):
|
||||||
|
''' predicate must be a function that takes the arguments (name, href,
|
||||||
|
fragment=None) and returns True iff the link should be removed '''
|
||||||
|
from calibre.ebooks.oeb.base import iterlinks, OEB_DOCS, OEB_STYLES, XPath, XHTML
|
||||||
|
stylepath = XPath('//h:style')
|
||||||
|
styleattrpath = XPath('//*[@style]')
|
||||||
|
changed = set()
|
||||||
|
for name, mt in iteritems(container.mime_map):
|
||||||
|
removed = False
|
||||||
|
if mt in OEB_DOCS:
|
||||||
|
root = container.parsed(name)
|
||||||
|
for el, attr, href, pos in iterlinks(root, find_links_in_css=False):
|
||||||
|
hname = container.href_to_name(href, name)
|
||||||
|
frag = href.partition('#')[-1]
|
||||||
|
if predicate(hname, href, frag):
|
||||||
|
if attr is None:
|
||||||
|
el.text = None
|
||||||
|
else:
|
||||||
|
if el.tag == XHTML('link') or el.tag == XHTML('img'):
|
||||||
|
extract(el)
|
||||||
|
else:
|
||||||
|
del el.attrib[attr]
|
||||||
|
removed = True
|
||||||
|
for tag in stylepath(root):
|
||||||
|
if tag.text and (tag.get('type') or 'text/css').lower() == 'text/css':
|
||||||
|
sheet = container.parse_css(tag.text)
|
||||||
|
if remove_links_in_sheet(partial(container.href_to_name, base=name), sheet, predicate):
|
||||||
|
tag.text = css_text(sheet)
|
||||||
|
removed = True
|
||||||
|
for tag in styleattrpath(root):
|
||||||
|
style = tag.get('style')
|
||||||
|
if style:
|
||||||
|
style = container.parse_css(style, is_declaration=True)
|
||||||
|
if remove_links_in_declaration(partial(container.href_to_name, base=name), style, predicate):
|
||||||
|
removed = True
|
||||||
|
tag.set('style', css_text(style))
|
||||||
|
elif mt in OEB_STYLES:
|
||||||
|
removed = remove_links_in_sheet(partial(container.href_to_name, base=name), container.parsed(name), predicate)
|
||||||
|
if removed:
|
||||||
|
changed.add(name)
|
||||||
|
tuple(map(container.dirty, changed))
|
||||||
|
return changed
|
||||||
|
|
||||||
|
|
||||||
|
def get_spine_order_for_all_files(container):
|
||||||
|
linear_names, non_linear_names = [], []
|
||||||
|
for name, is_linear in container.spine_names:
|
||||||
|
(linear_names if is_linear else non_linear_names).append(name)
|
||||||
|
all_names = linear_names + non_linear_names
|
||||||
|
spine_names = frozenset(all_names)
|
||||||
|
ans = {}
|
||||||
|
for spine_pos, name in enumerate(all_names):
|
||||||
|
ans.setdefault(name, (spine_pos, -1))
|
||||||
|
for i, href in enumerate(container.iterlinks(name, get_line_numbers=False)):
|
||||||
|
lname = container.href_to_name(href, name)
|
||||||
|
if lname not in spine_names:
|
||||||
|
ans.setdefault(lname, (spine_pos, i))
|
||||||
|
return ans
|
||||||
517
ebook_converter/ebooks/oeb/polish/split.py
Normal file
517
ebook_converter/ebooks/oeb/polish/split.py
Normal file
@@ -0,0 +1,517 @@
|
|||||||
|
#!/usr/bin/env python2
|
||||||
|
# vim:fileencoding=utf-8
|
||||||
|
from __future__ import absolute_import, division, print_function, unicode_literals
|
||||||
|
|
||||||
|
__license__ = 'GPL v3'
|
||||||
|
__copyright__ = '2013, Kovid Goyal <kovid at kovidgoyal.net>'
|
||||||
|
|
||||||
|
import copy, os, re
|
||||||
|
from polyglot.builtins import map, string_or_bytes, range
|
||||||
|
|
||||||
|
from calibre.ebooks.oeb.base import barename, XPNSMAP, XPath, OPF, XHTML, OEB_DOCS
|
||||||
|
from calibre.ebooks.oeb.polish.errors import MalformedMarkup
|
||||||
|
from calibre.ebooks.oeb.polish.toc import node_from_loc
|
||||||
|
from calibre.ebooks.oeb.polish.replace import LinkRebaser
|
||||||
|
from polyglot.builtins import iteritems, unicode_type
|
||||||
|
from polyglot.urllib import urlparse
|
||||||
|
|
||||||
|
|
||||||
|
class AbortError(ValueError):
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
def in_table(node):
|
||||||
|
while node is not None:
|
||||||
|
if node.tag.endswith('}table'):
|
||||||
|
return True
|
||||||
|
node = node.getparent()
|
||||||
|
return False
|
||||||
|
|
||||||
|
|
||||||
|
def adjust_split_point(split_point, log):
|
||||||
|
'''
|
||||||
|
Move the split point up its ancestor chain if it has no content
|
||||||
|
before it. This handles the common case:
|
||||||
|
<div id="chapter1"><h2>Chapter 1</h2>...</div> with a page break on the
|
||||||
|
h2.
|
||||||
|
'''
|
||||||
|
sp = split_point
|
||||||
|
while True:
|
||||||
|
parent = sp.getparent()
|
||||||
|
if (
|
||||||
|
parent is None or
|
||||||
|
barename(parent.tag) in {'body', 'html'} or
|
||||||
|
(parent.text and parent.text.strip()) or
|
||||||
|
parent.index(sp) > 0
|
||||||
|
):
|
||||||
|
break
|
||||||
|
sp = parent
|
||||||
|
|
||||||
|
if sp is not split_point:
|
||||||
|
log.debug('Adjusted split point to ancestor')
|
||||||
|
|
||||||
|
return sp
|
||||||
|
|
||||||
|
|
||||||
|
def get_body(root):
|
||||||
|
return root.find('h:body', namespaces=XPNSMAP)
|
||||||
|
|
||||||
|
|
||||||
|
def do_split(split_point, log, before=True):
|
||||||
|
'''
|
||||||
|
Split tree into a *before* and an *after* tree at ``split_point``.
|
||||||
|
|
||||||
|
:param split_point: The Element at which to split
|
||||||
|
:param before: If True tree is split before split_point, otherwise after split_point
|
||||||
|
:return: before_tree, after_tree
|
||||||
|
'''
|
||||||
|
if before:
|
||||||
|
# We cannot adjust for after since moving an after split point to a
|
||||||
|
# parent will cause breakage if the parent contains any content
|
||||||
|
# after the original split point
|
||||||
|
split_point = adjust_split_point(split_point, log)
|
||||||
|
tree = split_point.getroottree()
|
||||||
|
path = tree.getpath(split_point)
|
||||||
|
|
||||||
|
tree, tree2 = copy.deepcopy(tree), copy.deepcopy(tree)
|
||||||
|
root, root2 = tree.getroot(), tree2.getroot()
|
||||||
|
body, body2 = map(get_body, (root, root2))
|
||||||
|
split_point = root.xpath(path)[0]
|
||||||
|
split_point2 = root2.xpath(path)[0]
|
||||||
|
|
||||||
|
def nix_element(elem, top=True):
|
||||||
|
# Remove elem unless top is False in which case replace elem by its
|
||||||
|
# children
|
||||||
|
parent = elem.getparent()
|
||||||
|
if top:
|
||||||
|
parent.remove(elem)
|
||||||
|
else:
|
||||||
|
index = parent.index(elem)
|
||||||
|
parent[index:index+1] = list(elem.iterchildren())
|
||||||
|
|
||||||
|
# Tree 1
|
||||||
|
hit_split_point = False
|
||||||
|
keep_descendants = False
|
||||||
|
split_point_descendants = frozenset(split_point.iterdescendants())
|
||||||
|
for elem in tuple(body.iterdescendants()):
|
||||||
|
if elem is split_point:
|
||||||
|
hit_split_point = True
|
||||||
|
if before:
|
||||||
|
nix_element(elem)
|
||||||
|
else:
|
||||||
|
# We want to keep the descendants of the split point in
|
||||||
|
# Tree 1
|
||||||
|
keep_descendants = True
|
||||||
|
# We want the split point element, but not its tail
|
||||||
|
elem.tail = '\n'
|
||||||
|
|
||||||
|
continue
|
||||||
|
if hit_split_point:
|
||||||
|
if keep_descendants:
|
||||||
|
if elem in split_point_descendants:
|
||||||
|
# elem is a descendant keep it
|
||||||
|
continue
|
||||||
|
else:
|
||||||
|
# We are out of split_point, so prevent further set
|
||||||
|
# lookups of split_point_descendants
|
||||||
|
keep_descendants = False
|
||||||
|
nix_element(elem)
|
||||||
|
|
||||||
|
# Tree 2
|
||||||
|
ancestors = frozenset(XPath('ancestor::*')(split_point2))
|
||||||
|
for elem in tuple(body2.iterdescendants()):
|
||||||
|
if elem is split_point2:
|
||||||
|
if not before:
|
||||||
|
# Keep the split point element's tail, if it contains non-whitespace
|
||||||
|
# text
|
||||||
|
tail = elem.tail
|
||||||
|
if tail and not tail.isspace():
|
||||||
|
parent = elem.getparent()
|
||||||
|
idx = parent.index(elem)
|
||||||
|
if idx == 0:
|
||||||
|
parent.text = (parent.text or '') + tail
|
||||||
|
else:
|
||||||
|
sib = parent[idx-1]
|
||||||
|
sib.tail = (sib.tail or '') + tail
|
||||||
|
# Remove the element itself
|
||||||
|
nix_element(elem)
|
||||||
|
break
|
||||||
|
if elem in ancestors:
|
||||||
|
# We have to preserve the ancestors as they could have CSS
|
||||||
|
# styles that are inherited/applicable, like font or
|
||||||
|
# width. So we only remove the text, if any.
|
||||||
|
elem.text = '\n'
|
||||||
|
else:
|
||||||
|
nix_element(elem, top=False)
|
||||||
|
|
||||||
|
body2.text = '\n'
|
||||||
|
|
||||||
|
return tree, tree2
|
||||||
|
|
||||||
|
|
||||||
|
class SplitLinkReplacer(object):
|
||||||
|
|
||||||
|
def __init__(self, base, bottom_anchors, top_name, bottom_name, container):
|
||||||
|
self.bottom_anchors, self.bottom_name = bottom_anchors, bottom_name
|
||||||
|
self.container, self.top_name = container, top_name
|
||||||
|
self.base = base
|
||||||
|
self.replaced = False
|
||||||
|
|
||||||
|
def __call__(self, url):
|
||||||
|
if url and url.startswith('#'):
|
||||||
|
return url
|
||||||
|
name = self.container.href_to_name(url, self.base)
|
||||||
|
if name != self.top_name:
|
||||||
|
return url
|
||||||
|
purl = urlparse(url)
|
||||||
|
if purl.fragment and purl.fragment in self.bottom_anchors:
|
||||||
|
url = self.container.name_to_href(self.bottom_name, self.base) + '#' + purl.fragment
|
||||||
|
self.replaced = True
|
||||||
|
return url
|
||||||
|
|
||||||
|
|
||||||
|
def split(container, name, loc_or_xpath, before=True, totals=None):
|
||||||
|
'''
|
||||||
|
Split the file specified by name at the position specified by loc_or_xpath.
|
||||||
|
Splitting automatically migrates all links and references to the affected
|
||||||
|
files.
|
||||||
|
|
||||||
|
:param loc_or_xpath: Should be an XPath expression such as
|
||||||
|
//h:div[@id="split_here"]. Can also be a *loc* which is used internally to
|
||||||
|
implement splitting in the preview panel.
|
||||||
|
:param before: If True the split occurs before the identified element otherwise after it.
|
||||||
|
:param totals: Used internally
|
||||||
|
'''
|
||||||
|
|
||||||
|
root = container.parsed(name)
|
||||||
|
if isinstance(loc_or_xpath, unicode_type):
|
||||||
|
split_point = root.xpath(loc_or_xpath)[0]
|
||||||
|
else:
|
||||||
|
try:
|
||||||
|
split_point = node_from_loc(root, loc_or_xpath, totals=totals)
|
||||||
|
except MalformedMarkup:
|
||||||
|
# The webkit HTML parser and the container parser have yielded
|
||||||
|
# different node counts, this can happen if the file is valid XML
|
||||||
|
# but contains constructs like nested <p> tags. So force parse it
|
||||||
|
# with the HTML 5 parser and try again.
|
||||||
|
raw = container.raw_data(name)
|
||||||
|
root = container.parse_xhtml(raw, fname=name, force_html5_parse=True)
|
||||||
|
try:
|
||||||
|
split_point = node_from_loc(root, loc_or_xpath, totals=totals)
|
||||||
|
except MalformedMarkup:
|
||||||
|
raise MalformedMarkup(_('The file %s has malformed markup. Try running the Fix HTML tool'
|
||||||
|
' before splitting') % name)
|
||||||
|
container.replace(name, root)
|
||||||
|
if in_table(split_point):
|
||||||
|
raise AbortError('Cannot split inside tables')
|
||||||
|
if split_point.tag.endswith('}body'):
|
||||||
|
raise AbortError('Cannot split on the <body> tag')
|
||||||
|
tree1, tree2 = do_split(split_point, container.log, before=before)
|
||||||
|
root1, root2 = tree1.getroot(), tree2.getroot()
|
||||||
|
anchors_in_top = frozenset(root1.xpath('//*/@id')) | frozenset(root1.xpath('//*/@name')) | {''}
|
||||||
|
anchors_in_bottom = frozenset(root2.xpath('//*/@id')) | frozenset(root2.xpath('//*/@name'))
|
||||||
|
base, ext = name.rpartition('.')[0::2]
|
||||||
|
base = re.sub(r'_split\d+$', '', base)
|
||||||
|
nname, s = None, 0
|
||||||
|
while not nname or container.exists(nname):
|
||||||
|
s += 1
|
||||||
|
nname = '%s_split%d.%s' % (base, s, ext)
|
||||||
|
manifest_item = container.generate_item(nname, media_type=container.mime_map[name])
|
||||||
|
bottom_name = container.href_to_name(manifest_item.get('href'), container.opf_name)
|
||||||
|
|
||||||
|
# Fix links in the split trees
|
||||||
|
for r in (root1, root2):
|
||||||
|
for a in r.xpath('//*[@href]'):
|
||||||
|
url = a.get('href')
|
||||||
|
if url.startswith('#'):
|
||||||
|
fname = name
|
||||||
|
else:
|
||||||
|
fname = container.href_to_name(url, name)
|
||||||
|
if fname == name:
|
||||||
|
purl = urlparse(url)
|
||||||
|
if purl.fragment in anchors_in_top:
|
||||||
|
if r is root2:
|
||||||
|
a.set('href', '%s#%s' % (container.name_to_href(name, bottom_name), purl.fragment))
|
||||||
|
else:
|
||||||
|
a.set('href', '#' + purl.fragment)
|
||||||
|
elif purl.fragment in anchors_in_bottom:
|
||||||
|
if r is root1:
|
||||||
|
a.set('href', '%s#%s' % (container.name_to_href(bottom_name, name), purl.fragment))
|
||||||
|
else:
|
||||||
|
a.set('href', '#' + purl.fragment)
|
||||||
|
|
||||||
|
# Fix all links in the container that point to anchors in the bottom tree
|
||||||
|
for fname, media_type in iteritems(container.mime_map):
|
||||||
|
if fname not in {name, bottom_name}:
|
||||||
|
repl = SplitLinkReplacer(fname, anchors_in_bottom, name, bottom_name, container)
|
||||||
|
container.replace_links(fname, repl)
|
||||||
|
|
||||||
|
container.replace(name, root1)
|
||||||
|
container.replace(bottom_name, root2)
|
||||||
|
|
||||||
|
spine = container.opf_xpath('//opf:spine')[0]
|
||||||
|
for spine_item, spine_name, linear in container.spine_iter:
|
||||||
|
if spine_name == name:
|
||||||
|
break
|
||||||
|
index = spine.index(spine_item) + 1
|
||||||
|
|
||||||
|
si = spine.makeelement(OPF('itemref'), idref=manifest_item.get('id'))
|
||||||
|
if not linear:
|
||||||
|
si.set('linear', 'no')
|
||||||
|
container.insert_into_xml(spine, si, index=index)
|
||||||
|
container.dirty(container.opf_name)
|
||||||
|
return bottom_name
|
||||||
|
|
||||||
|
|
||||||
|
def multisplit(container, name, xpath, before=True):
|
||||||
|
'''
|
||||||
|
Split the specified file at multiple locations (all tags that match the specified XPath expression). See also: :func:`split`.
|
||||||
|
Splitting automatically migrates all links and references to the affected
|
||||||
|
files.
|
||||||
|
|
||||||
|
:param before: If True the splits occur before the identified element otherwise after it.
|
||||||
|
'''
|
||||||
|
root = container.parsed(name)
|
||||||
|
nodes = root.xpath(xpath, namespaces=XPNSMAP)
|
||||||
|
if not nodes:
|
||||||
|
raise AbortError(_('The expression %s did not match any nodes') % xpath)
|
||||||
|
for split_point in nodes:
|
||||||
|
if in_table(split_point):
|
||||||
|
raise AbortError('Cannot split inside tables')
|
||||||
|
if split_point.tag.endswith('}body'):
|
||||||
|
raise AbortError('Cannot split on the <body> tag')
|
||||||
|
|
||||||
|
for i, tag in enumerate(nodes):
|
||||||
|
tag.set('calibre-split-point', unicode_type(i))
|
||||||
|
|
||||||
|
current = name
|
||||||
|
all_names = [name]
|
||||||
|
for i in range(len(nodes)):
|
||||||
|
current = split(container, current, '//*[@calibre-split-point="%d"]' % i, before=before)
|
||||||
|
all_names.append(current)
|
||||||
|
|
||||||
|
for x in all_names:
|
||||||
|
for tag in container.parsed(x).xpath('//*[@calibre-split-point]'):
|
||||||
|
tag.attrib.pop('calibre-split-point')
|
||||||
|
container.dirty(x)
|
||||||
|
|
||||||
|
return all_names[1:]
|
||||||
|
|
||||||
|
|
||||||
|
class MergeLinkReplacer(object):
|
||||||
|
|
||||||
|
def __init__(self, base, anchor_map, master, container):
|
||||||
|
self.container, self.anchor_map = container, anchor_map
|
||||||
|
self.master = master
|
||||||
|
self.base = base
|
||||||
|
self.replaced = False
|
||||||
|
|
||||||
|
def __call__(self, url):
|
||||||
|
if url and url.startswith('#'):
|
||||||
|
return url
|
||||||
|
name = self.container.href_to_name(url, self.base)
|
||||||
|
amap = self.anchor_map.get(name, None)
|
||||||
|
if amap is None:
|
||||||
|
return url
|
||||||
|
purl = urlparse(url)
|
||||||
|
frag = purl.fragment or ''
|
||||||
|
frag = amap.get(frag, frag)
|
||||||
|
url = self.container.name_to_href(self.master, self.base) + '#' + frag
|
||||||
|
self.replaced = True
|
||||||
|
return url
|
||||||
|
|
||||||
|
|
||||||
|
def add_text(body, text):
|
||||||
|
if len(body) > 0:
|
||||||
|
body[-1].tail = (body[-1].tail or '') + text
|
||||||
|
else:
|
||||||
|
body.text = (body.text or '') + text
|
||||||
|
|
||||||
|
|
||||||
|
def all_anchors(root):
|
||||||
|
return set(root.xpath('//*/@id')) | set(root.xpath('//*/@name'))
|
||||||
|
|
||||||
|
|
||||||
|
def all_stylesheets(container, name):
|
||||||
|
for link in XPath('//h:head/h:link[@href]')(container.parsed(name)):
|
||||||
|
name = container.href_to_name(link.get('href'), name)
|
||||||
|
typ = link.get('type', 'text/css')
|
||||||
|
if typ == 'text/css':
|
||||||
|
yield name
|
||||||
|
|
||||||
|
|
||||||
|
def unique_anchor(seen_anchors, current):
|
||||||
|
c = 0
|
||||||
|
ans = current
|
||||||
|
while ans in seen_anchors:
|
||||||
|
c += 1
|
||||||
|
ans = '%s_%d' % (current, c)
|
||||||
|
return ans
|
||||||
|
|
||||||
|
|
||||||
|
def remove_name_attributes(root):
|
||||||
|
# Remove all name attributes, replacing them with id attributes
|
||||||
|
for elem in root.xpath('//*[@id and @name]'):
|
||||||
|
del elem.attrib['name']
|
||||||
|
for elem in root.xpath('//*[@name]'):
|
||||||
|
elem.set('id', elem.attrib.pop('name'))
|
||||||
|
|
||||||
|
|
||||||
|
def merge_html(container, names, master, insert_page_breaks=False):
|
||||||
|
p = container.parsed
|
||||||
|
root = p(master)
|
||||||
|
|
||||||
|
# Ensure master has a <head>
|
||||||
|
head = root.find('h:head', namespaces=XPNSMAP)
|
||||||
|
if head is None:
|
||||||
|
head = root.makeelement(XHTML('head'))
|
||||||
|
container.insert_into_xml(root, head, 0)
|
||||||
|
|
||||||
|
seen_anchors = all_anchors(root)
|
||||||
|
seen_stylesheets = set(all_stylesheets(container, master))
|
||||||
|
master_body = p(master).findall('h:body', namespaces=XPNSMAP)[-1]
|
||||||
|
master_base = os.path.dirname(master)
|
||||||
|
anchor_map = {n:{} for n in names if n != master}
|
||||||
|
first_anchor_map = {}
|
||||||
|
|
||||||
|
for name in names:
|
||||||
|
if name == master:
|
||||||
|
continue
|
||||||
|
# Insert new stylesheets into master
|
||||||
|
for sheet in all_stylesheets(container, name):
|
||||||
|
if sheet not in seen_stylesheets:
|
||||||
|
seen_stylesheets.add(sheet)
|
||||||
|
link = head.makeelement(XHTML('link'), rel='stylesheet', type='text/css', href=container.name_to_href(sheet, master))
|
||||||
|
container.insert_into_xml(head, link)
|
||||||
|
|
||||||
|
# Rebase links if master is in a different directory
|
||||||
|
if os.path.dirname(name) != master_base:
|
||||||
|
container.replace_links(name, LinkRebaser(container, name, master))
|
||||||
|
|
||||||
|
root = p(name)
|
||||||
|
children = []
|
||||||
|
for body in p(name).findall('h:body', namespaces=XPNSMAP):
|
||||||
|
children.append(body.text if body.text and body.text.strip() else '\n\n')
|
||||||
|
children.extend(body)
|
||||||
|
|
||||||
|
first_child = ''
|
||||||
|
for first_child in children:
|
||||||
|
if not isinstance(first_child, string_or_bytes):
|
||||||
|
break
|
||||||
|
if isinstance(first_child, string_or_bytes):
|
||||||
|
# body contained only text, no tags
|
||||||
|
first_child = body.makeelement(XHTML('p'))
|
||||||
|
first_child.text, children[0] = children[0], first_child
|
||||||
|
|
||||||
|
amap = anchor_map[name]
|
||||||
|
remove_name_attributes(root)
|
||||||
|
|
||||||
|
for elem in root.xpath('//*[@id]'):
|
||||||
|
val = elem.get('id')
|
||||||
|
if not val:
|
||||||
|
continue
|
||||||
|
if val in seen_anchors:
|
||||||
|
nval = unique_anchor(seen_anchors, val)
|
||||||
|
elem.set('id', nval)
|
||||||
|
amap[val] = nval
|
||||||
|
else:
|
||||||
|
seen_anchors.add(val)
|
||||||
|
|
||||||
|
if 'id' not in first_child.attrib:
|
||||||
|
first_child.set('id', unique_anchor(seen_anchors, 'top'))
|
||||||
|
seen_anchors.add(first_child.get('id'))
|
||||||
|
first_anchor_map[name] = first_child.get('id')
|
||||||
|
|
||||||
|
if insert_page_breaks:
|
||||||
|
first_child.set('style', first_child.get('style', '') + '; page-break-before: always')
|
||||||
|
|
||||||
|
amap[''] = first_child.get('id')
|
||||||
|
|
||||||
|
# Fix links that point to local changed anchors
|
||||||
|
for a in XPath('//h:a[starts-with(@href, "#")]')(root):
|
||||||
|
q = a.get('href')[1:]
|
||||||
|
if q in amap:
|
||||||
|
a.set('href', '#' + amap[q])
|
||||||
|
|
||||||
|
for child in children:
|
||||||
|
if isinstance(child, string_or_bytes):
|
||||||
|
add_text(master_body, child)
|
||||||
|
else:
|
||||||
|
master_body.append(copy.deepcopy(child))
|
||||||
|
|
||||||
|
container.remove_item(name, remove_from_guide=False)
|
||||||
|
|
||||||
|
# Fix all links in the container that point to merged files
|
||||||
|
for fname, media_type in iteritems(container.mime_map):
|
||||||
|
repl = MergeLinkReplacer(fname, anchor_map, master, container)
|
||||||
|
container.replace_links(fname, repl)
|
||||||
|
|
||||||
|
return first_anchor_map
|
||||||
|
|
||||||
|
|
||||||
|
def merge_css(container, names, master):
|
||||||
|
p = container.parsed
|
||||||
|
msheet = p(master)
|
||||||
|
master_base = os.path.dirname(master)
|
||||||
|
merged = set()
|
||||||
|
|
||||||
|
for name in names:
|
||||||
|
if name == master:
|
||||||
|
continue
|
||||||
|
# Rebase links if master is in a different directory
|
||||||
|
if os.path.dirname(name) != master_base:
|
||||||
|
container.replace_links(name, LinkRebaser(container, name, master))
|
||||||
|
|
||||||
|
sheet = p(name)
|
||||||
|
|
||||||
|
# Remove charset rules
|
||||||
|
cr = [r for r in sheet.cssRules if r.type == r.CHARSET_RULE]
|
||||||
|
[sheet.deleteRule(sheet.cssRules.index(r)) for r in cr]
|
||||||
|
for rule in sheet.cssRules:
|
||||||
|
msheet.add(rule)
|
||||||
|
|
||||||
|
container.remove_item(name)
|
||||||
|
merged.add(name)
|
||||||
|
|
||||||
|
# Remove links to merged stylesheets in the html files, replacing with a
|
||||||
|
# link to the master sheet
|
||||||
|
for name, mt in iteritems(container.mime_map):
|
||||||
|
if mt in OEB_DOCS:
|
||||||
|
removed = False
|
||||||
|
root = p(name)
|
||||||
|
for link in XPath('//h:link[@href]')(root):
|
||||||
|
q = container.href_to_name(link.get('href'), name)
|
||||||
|
if q in merged:
|
||||||
|
container.remove_from_xml(link)
|
||||||
|
removed = True
|
||||||
|
if removed:
|
||||||
|
container.dirty(name)
|
||||||
|
if removed and master not in set(all_stylesheets(container, name)):
|
||||||
|
head = root.find('h:head', namespaces=XPNSMAP)
|
||||||
|
if head is not None:
|
||||||
|
link = head.makeelement(XHTML('link'), type='text/css', rel='stylesheet', href=container.name_to_href(master, name))
|
||||||
|
container.insert_into_xml(head, link)
|
||||||
|
|
||||||
|
|
||||||
|
def merge(container, category, names, master):
|
||||||
|
'''
|
||||||
|
Merge the specified files into a single file, automatically migrating all
|
||||||
|
links and references to the affected files. The file must all either be HTML or CSS files.
|
||||||
|
|
||||||
|
:param category: Must be either ``'text'`` for HTML files or ``'styles'`` for CSS files
|
||||||
|
:param names: The list of files to be merged
|
||||||
|
:param master: Which of the merged files is the *master* file, that is, the file that will remain after merging.
|
||||||
|
'''
|
||||||
|
if category not in {'text', 'styles'}:
|
||||||
|
raise AbortError('Cannot merge files of type: %s' % category)
|
||||||
|
if len(names) < 2:
|
||||||
|
raise AbortError('Must specify at least two files to be merged')
|
||||||
|
if master not in names:
|
||||||
|
raise AbortError('The master file (%s) must be one of the files being merged' % master)
|
||||||
|
|
||||||
|
if category == 'text':
|
||||||
|
merge_html(container, names, master)
|
||||||
|
elif category == 'styles':
|
||||||
|
merge_css(container, names, master)
|
||||||
|
|
||||||
|
container.dirty(master)
|
||||||
172
ebook_converter/ebooks/oeb/transforms/cover.py
Normal file
172
ebook_converter/ebooks/oeb/transforms/cover.py
Normal file
@@ -0,0 +1,172 @@
|
|||||||
|
#!/usr/bin/env python2
|
||||||
|
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
|
||||||
|
from __future__ import absolute_import, division, print_function, unicode_literals
|
||||||
|
|
||||||
|
__license__ = 'GPL v3'
|
||||||
|
__copyright__ = '2010, Kovid Goyal <kovid@kovidgoyal.net>'
|
||||||
|
__docformat__ = 'restructuredtext en'
|
||||||
|
|
||||||
|
import textwrap
|
||||||
|
|
||||||
|
from calibre import guess_type
|
||||||
|
from calibre.utils.imghdr import identify
|
||||||
|
from calibre.utils.xml_parse import safe_xml_fromstring
|
||||||
|
from polyglot.builtins import unicode_type
|
||||||
|
from polyglot.urllib import unquote
|
||||||
|
|
||||||
|
|
||||||
|
class CoverManager(object):
|
||||||
|
|
||||||
|
SVG_TEMPLATE = textwrap.dedent('''\
|
||||||
|
<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en">
|
||||||
|
<head>
|
||||||
|
<meta http-equiv="Content-Type" content="text/html; charset=UTF-8" />
|
||||||
|
<meta name="calibre:cover" content="true" />
|
||||||
|
<title>Cover</title>
|
||||||
|
<style type="text/css" title="override_css">
|
||||||
|
@page {padding: 0pt; margin:0pt}
|
||||||
|
body { text-align: center; padding:0pt; margin: 0pt; }
|
||||||
|
</style>
|
||||||
|
</head>
|
||||||
|
<body>
|
||||||
|
<div>
|
||||||
|
<svg version="1.1" xmlns="http://www.w3.org/2000/svg"
|
||||||
|
xmlns:xlink="http://www.w3.org/1999/xlink"
|
||||||
|
width="100%%" height="100%%" viewBox="__viewbox__"
|
||||||
|
preserveAspectRatio="__ar__">
|
||||||
|
<image width="__width__" height="__height__" xlink:href="%s"/>
|
||||||
|
</svg>
|
||||||
|
</div>
|
||||||
|
</body>
|
||||||
|
</html>
|
||||||
|
''')
|
||||||
|
|
||||||
|
NONSVG_TEMPLATE = textwrap.dedent('''\
|
||||||
|
<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en">
|
||||||
|
<head>
|
||||||
|
<meta http-equiv="Content-Type" content="text/html; charset=UTF-8" />
|
||||||
|
<meta name="calibre:cover" content="true" />
|
||||||
|
<title>Cover</title>
|
||||||
|
<style type="text/css" title="override_css">
|
||||||
|
@page {padding: 0pt; margin:0pt}
|
||||||
|
body { text-align: center; padding:0pt; margin: 0pt }
|
||||||
|
div { padding:0pt; margin: 0pt }
|
||||||
|
img { padding:0pt; margin: 0pt }
|
||||||
|
</style>
|
||||||
|
</head>
|
||||||
|
<body>
|
||||||
|
<div>
|
||||||
|
<img src="%s" alt="cover" __style__ />
|
||||||
|
</div>
|
||||||
|
</body>
|
||||||
|
</html>
|
||||||
|
''')
|
||||||
|
|
||||||
|
def __init__(self, no_default_cover=False, no_svg_cover=False,
|
||||||
|
preserve_aspect_ratio=False, fixed_size=None):
|
||||||
|
self.no_default_cover = no_default_cover
|
||||||
|
self.no_svg_cover = no_svg_cover
|
||||||
|
self.preserve_aspect_ratio = preserve_aspect_ratio
|
||||||
|
|
||||||
|
ar = 'xMidYMid meet' if preserve_aspect_ratio else 'none'
|
||||||
|
self.svg_template = self.SVG_TEMPLATE.replace('__ar__', ar)
|
||||||
|
|
||||||
|
if fixed_size is None:
|
||||||
|
style = 'style="height: 100%%"'
|
||||||
|
else:
|
||||||
|
width, height = fixed_size
|
||||||
|
style = 'style="height: %s; width: %s"'%(height, width)
|
||||||
|
self.non_svg_template = self.NONSVG_TEMPLATE.replace('__style__',
|
||||||
|
style)
|
||||||
|
|
||||||
|
def __call__(self, oeb, opts, log):
|
||||||
|
self.oeb = oeb
|
||||||
|
self.log = log
|
||||||
|
self.insert_cover()
|
||||||
|
|
||||||
|
def default_cover(self):
|
||||||
|
'''
|
||||||
|
Create a generic cover for books that dont have a cover
|
||||||
|
'''
|
||||||
|
if self.no_default_cover:
|
||||||
|
return None
|
||||||
|
self.log('Generating default cover')
|
||||||
|
m = self.oeb.metadata
|
||||||
|
title = unicode_type(m.title[0])
|
||||||
|
authors = [unicode_type(x) for x in m.creator if x.role == 'aut']
|
||||||
|
try:
|
||||||
|
from calibre.ebooks.covers import create_cover
|
||||||
|
series = series_index = None
|
||||||
|
if m.series:
|
||||||
|
try:
|
||||||
|
series, series_index = unicode_type(m.series[0]), m.series_index[0]
|
||||||
|
except IndexError:
|
||||||
|
pass
|
||||||
|
img_data = create_cover(title, authors, series, series_index)
|
||||||
|
id, href = self.oeb.manifest.generate('cover',
|
||||||
|
'cover_image.jpg')
|
||||||
|
item = self.oeb.manifest.add(id, href, guess_type('t.jpg')[0],
|
||||||
|
data=img_data)
|
||||||
|
m.clear('cover')
|
||||||
|
m.add('cover', item.id)
|
||||||
|
|
||||||
|
return item.href
|
||||||
|
except:
|
||||||
|
self.log.exception('Failed to generate default cover')
|
||||||
|
return None
|
||||||
|
|
||||||
|
def inspect_cover(self, href):
|
||||||
|
from calibre.ebooks.oeb.base import urlnormalize
|
||||||
|
for x in self.oeb.manifest:
|
||||||
|
if x.href == urlnormalize(href):
|
||||||
|
try:
|
||||||
|
raw = x.data
|
||||||
|
return identify(raw)[1:]
|
||||||
|
except Exception:
|
||||||
|
self.log.exception('Failed to read cover image dimensions')
|
||||||
|
return -1, -1
|
||||||
|
|
||||||
|
def insert_cover(self):
|
||||||
|
from calibre.ebooks.oeb.base import urldefrag
|
||||||
|
g, m = self.oeb.guide, self.oeb.manifest
|
||||||
|
item = None
|
||||||
|
if 'titlepage' not in g:
|
||||||
|
if 'cover' in g:
|
||||||
|
href = g['cover'].href
|
||||||
|
else:
|
||||||
|
href = self.default_cover()
|
||||||
|
if href is None:
|
||||||
|
return
|
||||||
|
width, height = self.inspect_cover(href)
|
||||||
|
if width == -1 or height == -1:
|
||||||
|
self.log.warning('Failed to read cover dimensions')
|
||||||
|
width, height = 600, 800
|
||||||
|
# if self.preserve_aspect_ratio:
|
||||||
|
# width, height = 600, 800
|
||||||
|
self.svg_template = self.svg_template.replace('__viewbox__',
|
||||||
|
'0 0 %d %d'%(width, height))
|
||||||
|
self.svg_template = self.svg_template.replace('__width__',
|
||||||
|
unicode_type(width))
|
||||||
|
self.svg_template = self.svg_template.replace('__height__',
|
||||||
|
unicode_type(height))
|
||||||
|
|
||||||
|
if href is not None:
|
||||||
|
templ = self.non_svg_template if self.no_svg_cover \
|
||||||
|
else self.svg_template
|
||||||
|
tp = templ%unquote(href)
|
||||||
|
id, href = m.generate('titlepage', 'titlepage.xhtml')
|
||||||
|
item = m.add(id, href, guess_type('t.xhtml')[0],
|
||||||
|
data=safe_xml_fromstring(tp))
|
||||||
|
else:
|
||||||
|
item = self.oeb.manifest.hrefs[
|
||||||
|
urldefrag(self.oeb.guide['titlepage'].href)[0]]
|
||||||
|
if item is not None:
|
||||||
|
self.oeb.spine.insert(0, item, True)
|
||||||
|
if 'cover' not in self.oeb.guide.refs:
|
||||||
|
self.oeb.guide.add('cover', 'Title Page', 'a')
|
||||||
|
self.oeb.guide.refs['cover'].href = item.href
|
||||||
|
if 'titlepage' in self.oeb.guide.refs:
|
||||||
|
self.oeb.guide.refs['titlepage'].href = item.href
|
||||||
|
titem = getattr(self.oeb.toc, 'item_that_refers_to_cover', None)
|
||||||
|
if titem is not None:
|
||||||
|
titem.href = item.href
|
||||||
187
ebook_converter/ebooks/oeb/transforms/filenames.py
Normal file
187
ebook_converter/ebooks/oeb/transforms/filenames.py
Normal file
@@ -0,0 +1,187 @@
|
|||||||
|
#!/usr/bin/env python2
|
||||||
|
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
|
||||||
|
from __future__ import absolute_import, division, print_function, unicode_literals
|
||||||
|
|
||||||
|
__license__ = 'GPL v3'
|
||||||
|
__copyright__ = '2010, Kovid Goyal <kovid@kovidgoyal.net>'
|
||||||
|
__docformat__ = 'restructuredtext en'
|
||||||
|
|
||||||
|
import posixpath
|
||||||
|
|
||||||
|
from lxml import etree
|
||||||
|
|
||||||
|
from calibre.ebooks.oeb.base import rewrite_links, urlnormalize
|
||||||
|
from polyglot.urllib import urldefrag, urlparse
|
||||||
|
|
||||||
|
|
||||||
|
class RenameFiles(object): # {{{
|
||||||
|
|
||||||
|
'''
|
||||||
|
Rename files and adjust all links pointing to them. Note that the spine
|
||||||
|
and manifest are not touched by this transform.
|
||||||
|
'''
|
||||||
|
|
||||||
|
def __init__(self, rename_map, renamed_items_map=None):
|
||||||
|
self.rename_map = rename_map
|
||||||
|
self.renamed_items_map = renamed_items_map
|
||||||
|
|
||||||
|
def __call__(self, oeb, opts):
|
||||||
|
import css_parser
|
||||||
|
self.log = oeb.logger
|
||||||
|
self.opts = opts
|
||||||
|
self.oeb = oeb
|
||||||
|
|
||||||
|
for item in oeb.manifest.items:
|
||||||
|
self.current_item = item
|
||||||
|
if etree.iselement(item.data):
|
||||||
|
rewrite_links(self.current_item.data, self.url_replacer)
|
||||||
|
elif hasattr(item.data, 'cssText'):
|
||||||
|
css_parser.replaceUrls(item.data, self.url_replacer)
|
||||||
|
|
||||||
|
if self.oeb.guide:
|
||||||
|
for ref in self.oeb.guide.values():
|
||||||
|
href = urlnormalize(ref.href)
|
||||||
|
href, frag = urldefrag(href)
|
||||||
|
replacement = self.rename_map.get(href, None)
|
||||||
|
if replacement is not None:
|
||||||
|
nhref = replacement
|
||||||
|
if frag:
|
||||||
|
nhref += '#' + frag
|
||||||
|
ref.href = nhref
|
||||||
|
|
||||||
|
if self.oeb.toc:
|
||||||
|
self.fix_toc_entry(self.oeb.toc)
|
||||||
|
|
||||||
|
def fix_toc_entry(self, toc):
|
||||||
|
if toc.href:
|
||||||
|
href = urlnormalize(toc.href)
|
||||||
|
href, frag = urldefrag(href)
|
||||||
|
replacement = self.rename_map.get(href, None)
|
||||||
|
|
||||||
|
if replacement is not None:
|
||||||
|
nhref = replacement
|
||||||
|
if frag:
|
||||||
|
nhref = '#'.join((nhref, frag))
|
||||||
|
toc.href = nhref
|
||||||
|
|
||||||
|
for x in toc:
|
||||||
|
self.fix_toc_entry(x)
|
||||||
|
|
||||||
|
def url_replacer(self, orig_url):
|
||||||
|
url = urlnormalize(orig_url)
|
||||||
|
parts = urlparse(url)
|
||||||
|
if parts.scheme:
|
||||||
|
# Only rewrite local URLs
|
||||||
|
return orig_url
|
||||||
|
path, frag = urldefrag(url)
|
||||||
|
if self.renamed_items_map:
|
||||||
|
orig_item = self.renamed_items_map.get(self.current_item.href, self.current_item)
|
||||||
|
else:
|
||||||
|
orig_item = self.current_item
|
||||||
|
|
||||||
|
href = orig_item.abshref(path)
|
||||||
|
replacement = self.current_item.relhref(self.rename_map.get(href, href))
|
||||||
|
if frag:
|
||||||
|
replacement += '#' + frag
|
||||||
|
return replacement
|
||||||
|
|
||||||
|
# }}}
|
||||||
|
|
||||||
|
|
||||||
|
class UniqueFilenames(object): # {{{
|
||||||
|
|
||||||
|
'Ensure that every item in the manifest has a unique filename'
|
||||||
|
|
||||||
|
def __call__(self, oeb, opts):
|
||||||
|
self.log = oeb.logger
|
||||||
|
self.opts = opts
|
||||||
|
self.oeb = oeb
|
||||||
|
|
||||||
|
self.seen_filenames = set()
|
||||||
|
self.rename_map = {}
|
||||||
|
|
||||||
|
for item in list(oeb.manifest.items):
|
||||||
|
fname = posixpath.basename(item.href)
|
||||||
|
if fname in self.seen_filenames:
|
||||||
|
suffix = self.unique_suffix(fname)
|
||||||
|
data = item.data
|
||||||
|
base, ext = posixpath.splitext(item.href)
|
||||||
|
nhref = base + suffix + ext
|
||||||
|
nhref = oeb.manifest.generate(href=nhref)[1]
|
||||||
|
spine_pos = item.spine_position
|
||||||
|
oeb.manifest.remove(item)
|
||||||
|
nitem = oeb.manifest.add(item.id, nhref, item.media_type, data=data,
|
||||||
|
fallback=item.fallback)
|
||||||
|
self.seen_filenames.add(posixpath.basename(nhref))
|
||||||
|
self.rename_map[item.href] = nhref
|
||||||
|
if spine_pos is not None:
|
||||||
|
oeb.spine.insert(spine_pos, nitem, item.linear)
|
||||||
|
else:
|
||||||
|
self.seen_filenames.add(fname)
|
||||||
|
|
||||||
|
if self.rename_map:
|
||||||
|
self.log('Found non-unique filenames, renaming to support broken'
|
||||||
|
' EPUB readers like FBReader, Aldiko and Stanza...')
|
||||||
|
from pprint import pformat
|
||||||
|
self.log.debug(pformat(self.rename_map))
|
||||||
|
|
||||||
|
renamer = RenameFiles(self.rename_map)
|
||||||
|
renamer(oeb, opts)
|
||||||
|
|
||||||
|
def unique_suffix(self, fname):
|
||||||
|
base, ext = posixpath.splitext(fname)
|
||||||
|
c = 0
|
||||||
|
while True:
|
||||||
|
c += 1
|
||||||
|
suffix = '_u%d'%c
|
||||||
|
candidate = base + suffix + ext
|
||||||
|
if candidate not in self.seen_filenames:
|
||||||
|
return suffix
|
||||||
|
# }}}
|
||||||
|
|
||||||
|
|
||||||
|
class FlatFilenames(object): # {{{
|
||||||
|
|
||||||
|
'Ensure that every item in the manifest has a unique filename without subdirectories.'
|
||||||
|
|
||||||
|
def __call__(self, oeb, opts):
|
||||||
|
self.log = oeb.logger
|
||||||
|
self.opts = opts
|
||||||
|
self.oeb = oeb
|
||||||
|
|
||||||
|
self.rename_map = {}
|
||||||
|
self.renamed_items_map = {}
|
||||||
|
|
||||||
|
for item in list(oeb.manifest.items):
|
||||||
|
# Flatten URL by removing directories.
|
||||||
|
# Example: a/b/c/index.html -> a_b_c_index.html
|
||||||
|
nhref = item.href.replace("/", "_")
|
||||||
|
|
||||||
|
if item.href == nhref:
|
||||||
|
# URL hasn't changed, skip item.
|
||||||
|
continue
|
||||||
|
|
||||||
|
data = item.data
|
||||||
|
isp = item.spine_position
|
||||||
|
nhref = oeb.manifest.generate(href=nhref)[1]
|
||||||
|
if isp is not None:
|
||||||
|
oeb.spine.remove(item)
|
||||||
|
oeb.manifest.remove(item)
|
||||||
|
|
||||||
|
nitem = oeb.manifest.add(item.id, nhref, item.media_type, data=data,
|
||||||
|
fallback=item.fallback)
|
||||||
|
self.rename_map[item.href] = nhref
|
||||||
|
self.renamed_items_map[nhref] = item
|
||||||
|
if isp is not None:
|
||||||
|
oeb.spine.insert(isp, nitem, item.linear)
|
||||||
|
|
||||||
|
if self.rename_map:
|
||||||
|
self.log('Found non-flat filenames, renaming to support broken'
|
||||||
|
' EPUB readers like FBReader...')
|
||||||
|
from pprint import pformat
|
||||||
|
self.log.debug(pformat(self.rename_map))
|
||||||
|
self.log.debug(pformat(self.renamed_items_map))
|
||||||
|
|
||||||
|
renamer = RenameFiles(self.rename_map, self.renamed_items_map)
|
||||||
|
renamer(oeb, opts)
|
||||||
|
# }}}
|
||||||
81
ebook_converter/ebooks/oeb/transforms/rescale.py
Normal file
81
ebook_converter/ebooks/oeb/transforms/rescale.py
Normal file
@@ -0,0 +1,81 @@
|
|||||||
|
#!/usr/bin/env python2
|
||||||
|
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
|
||||||
|
from __future__ import absolute_import, division, print_function, unicode_literals
|
||||||
|
|
||||||
|
__license__ = 'GPL v3'
|
||||||
|
__copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>'
|
||||||
|
__docformat__ = 'restructuredtext en'
|
||||||
|
|
||||||
|
from calibre import fit_image
|
||||||
|
|
||||||
|
|
||||||
|
class RescaleImages(object):
|
||||||
|
|
||||||
|
'Rescale all images to fit inside given screen size'
|
||||||
|
|
||||||
|
def __init__(self, check_colorspaces=False):
|
||||||
|
self.check_colorspaces = check_colorspaces
|
||||||
|
|
||||||
|
def __call__(self, oeb, opts):
|
||||||
|
self.oeb, self.opts, self.log = oeb, opts, oeb.log
|
||||||
|
self.rescale()
|
||||||
|
|
||||||
|
def rescale(self):
|
||||||
|
from PIL import Image
|
||||||
|
from io import BytesIO
|
||||||
|
|
||||||
|
is_image_collection = getattr(self.opts, 'is_image_collection', False)
|
||||||
|
|
||||||
|
if is_image_collection:
|
||||||
|
page_width, page_height = self.opts.dest.comic_screen_size
|
||||||
|
else:
|
||||||
|
page_width, page_height = self.opts.dest.width, self.opts.dest.height
|
||||||
|
page_width -= (self.opts.margin_left + self.opts.margin_right) * self.opts.dest.dpi/72
|
||||||
|
page_height -= (self.opts.margin_top + self.opts.margin_bottom) * self.opts.dest.dpi/72
|
||||||
|
|
||||||
|
for item in self.oeb.manifest:
|
||||||
|
if item.media_type.startswith('image'):
|
||||||
|
ext = item.media_type.split('/')[-1].upper()
|
||||||
|
if ext == 'JPG':
|
||||||
|
ext = 'JPEG'
|
||||||
|
if ext not in ('PNG', 'JPEG', 'GIF'):
|
||||||
|
ext = 'JPEG'
|
||||||
|
|
||||||
|
raw = item.data
|
||||||
|
if hasattr(raw, 'xpath') or not raw:
|
||||||
|
# Probably an svg image
|
||||||
|
continue
|
||||||
|
try:
|
||||||
|
img = Image.open(BytesIO(raw))
|
||||||
|
except Exception:
|
||||||
|
continue
|
||||||
|
width, height = img.size
|
||||||
|
|
||||||
|
try:
|
||||||
|
if self.check_colorspaces and img.mode == 'CMYK':
|
||||||
|
self.log.warn(
|
||||||
|
'The image %s is in the CMYK colorspace, converting it '
|
||||||
|
'to RGB as Adobe Digital Editions cannot display CMYK' % item.href)
|
||||||
|
img = img.convert('RGB')
|
||||||
|
except Exception:
|
||||||
|
self.log.exception('Failed to convert image %s from CMYK to RGB' % item.href)
|
||||||
|
|
||||||
|
scaled, new_width, new_height = fit_image(width, height, page_width, page_height)
|
||||||
|
if scaled:
|
||||||
|
new_width = max(1, new_width)
|
||||||
|
new_height = max(1, new_height)
|
||||||
|
self.log('Rescaling image from %dx%d to %dx%d'%(
|
||||||
|
width, height, new_width, new_height), item.href)
|
||||||
|
try:
|
||||||
|
img = img.resize((new_width, new_height))
|
||||||
|
except Exception:
|
||||||
|
self.log.exception('Failed to rescale image: %s' % item.href)
|
||||||
|
continue
|
||||||
|
buf = BytesIO()
|
||||||
|
try:
|
||||||
|
img.save(buf, ext)
|
||||||
|
except Exception:
|
||||||
|
self.log.exception('Failed to rescale image: %s' % item.href)
|
||||||
|
else:
|
||||||
|
item.data = buf.getvalue()
|
||||||
|
item.unload_data_from_memory()
|
||||||
488
ebook_converter/ebooks/oeb/transforms/split.py
Normal file
488
ebook_converter/ebooks/oeb/transforms/split.py
Normal file
@@ -0,0 +1,488 @@
|
|||||||
|
from __future__ import absolute_import, division, print_function, unicode_literals
|
||||||
|
__license__ = 'GPL v3'
|
||||||
|
__copyright__ = '2008, Kovid Goyal kovid@kovidgoyal.net'
|
||||||
|
__docformat__ = 'restructuredtext en'
|
||||||
|
|
||||||
|
'''
|
||||||
|
Splitting of the XHTML flows. Splitting can happen on page boundaries or can be
|
||||||
|
forced at "likely" locations to conform to size limitations. This transform
|
||||||
|
assumes a prior call to the flatcss transform.
|
||||||
|
'''
|
||||||
|
|
||||||
|
import os, functools, collections, re, copy
|
||||||
|
from collections import OrderedDict
|
||||||
|
|
||||||
|
from lxml.etree import XPath as _XPath
|
||||||
|
from lxml import etree
|
||||||
|
|
||||||
|
from calibre import as_unicode, force_unicode
|
||||||
|
from calibre.ebooks.epub import rules
|
||||||
|
from calibre.ebooks.oeb.base import (OEB_STYLES, XPNSMAP as NAMESPACES,
|
||||||
|
urldefrag, rewrite_links, XHTML, urlnormalize)
|
||||||
|
from calibre.ebooks.oeb.polish.split import do_split
|
||||||
|
from polyglot.builtins import iteritems, range, map, unicode_type
|
||||||
|
from polyglot.urllib import unquote
|
||||||
|
from css_selectors import Select, SelectorError
|
||||||
|
|
||||||
|
XPath = functools.partial(_XPath, namespaces=NAMESPACES)
|
||||||
|
|
||||||
|
SPLIT_POINT_ATTR = 'csp'
|
||||||
|
|
||||||
|
|
||||||
|
def tostring(root):
|
||||||
|
return etree.tostring(root, encoding='utf-8')
|
||||||
|
|
||||||
|
|
||||||
|
class SplitError(ValueError):
|
||||||
|
|
||||||
|
def __init__(self, path, root):
|
||||||
|
size = len(tostring(root))/1024.
|
||||||
|
ValueError.__init__(self,
|
||||||
|
_('Could not find reasonable point at which to split: '
|
||||||
|
'%(path)s Sub-tree size: %(size)d KB')%dict(
|
||||||
|
path=path, size=size))
|
||||||
|
|
||||||
|
|
||||||
|
class Split(object):
|
||||||
|
|
||||||
|
def __init__(self, split_on_page_breaks=True, page_breaks_xpath=None,
|
||||||
|
max_flow_size=0, remove_css_pagebreaks=True):
|
||||||
|
self.split_on_page_breaks = split_on_page_breaks
|
||||||
|
self.page_breaks_xpath = page_breaks_xpath
|
||||||
|
self.max_flow_size = max_flow_size
|
||||||
|
self.page_break_selectors = None
|
||||||
|
self.remove_css_pagebreaks = remove_css_pagebreaks
|
||||||
|
if self.page_breaks_xpath is not None:
|
||||||
|
self.page_break_selectors = [(XPath(self.page_breaks_xpath), False)]
|
||||||
|
|
||||||
|
def __call__(self, oeb, opts):
|
||||||
|
self.oeb = oeb
|
||||||
|
self.log = oeb.log
|
||||||
|
self.log('Splitting markup on page breaks and flow limits, if any...')
|
||||||
|
self.opts = opts
|
||||||
|
self.map = {}
|
||||||
|
for item in list(self.oeb.manifest.items):
|
||||||
|
if item.spine_position is not None and etree.iselement(item.data):
|
||||||
|
self.split_item(item)
|
||||||
|
|
||||||
|
self.fix_links()
|
||||||
|
|
||||||
|
def split_item(self, item):
|
||||||
|
page_breaks, page_break_ids = [], []
|
||||||
|
if self.split_on_page_breaks:
|
||||||
|
page_breaks, page_break_ids = self.find_page_breaks(item)
|
||||||
|
|
||||||
|
splitter = FlowSplitter(item, page_breaks, page_break_ids,
|
||||||
|
self.max_flow_size, self.oeb, self.opts)
|
||||||
|
if splitter.was_split:
|
||||||
|
am = splitter.anchor_map
|
||||||
|
self.map[item.href] = collections.defaultdict(
|
||||||
|
am.default_factory, am)
|
||||||
|
|
||||||
|
def find_page_breaks(self, item):
|
||||||
|
if self.page_break_selectors is None:
|
||||||
|
self.page_break_selectors = set()
|
||||||
|
stylesheets = [x.data for x in self.oeb.manifest if x.media_type in
|
||||||
|
OEB_STYLES]
|
||||||
|
for rule in rules(stylesheets):
|
||||||
|
before = force_unicode(getattr(rule.style.getPropertyCSSValue(
|
||||||
|
'page-break-before'), 'cssText', '').strip().lower())
|
||||||
|
after = force_unicode(getattr(rule.style.getPropertyCSSValue(
|
||||||
|
'page-break-after'), 'cssText', '').strip().lower())
|
||||||
|
try:
|
||||||
|
if before and before not in {'avoid', 'auto', 'inherit'}:
|
||||||
|
self.page_break_selectors.add((rule.selectorText, True))
|
||||||
|
if self.remove_css_pagebreaks:
|
||||||
|
rule.style.removeProperty('page-break-before')
|
||||||
|
except:
|
||||||
|
pass
|
||||||
|
try:
|
||||||
|
if after and after not in {'avoid', 'auto', 'inherit'}:
|
||||||
|
self.page_break_selectors.add((rule.selectorText, False))
|
||||||
|
if self.remove_css_pagebreaks:
|
||||||
|
rule.style.removeProperty('page-break-after')
|
||||||
|
except:
|
||||||
|
pass
|
||||||
|
page_breaks = set()
|
||||||
|
select = Select(item.data)
|
||||||
|
if not self.page_break_selectors:
|
||||||
|
return [], []
|
||||||
|
body = item.data.xpath('//h:body', namespaces=NAMESPACES)
|
||||||
|
if not body:
|
||||||
|
return [], []
|
||||||
|
descendants = frozenset(body[0].iterdescendants('*'))
|
||||||
|
|
||||||
|
for selector, before in self.page_break_selectors:
|
||||||
|
try:
|
||||||
|
for elem in select(selector):
|
||||||
|
if elem in descendants and elem.tag.rpartition('}')[2].lower() not in {'html', 'body', 'head', 'style', 'script', 'meta', 'link'}:
|
||||||
|
elem.set('pb_before', '1' if before else '0')
|
||||||
|
page_breaks.add(elem)
|
||||||
|
except SelectorError as err:
|
||||||
|
self.log.warn('Ignoring page breaks specified with invalid CSS selector: %r (%s)' % (selector, as_unicode(err)))
|
||||||
|
|
||||||
|
for i, elem in enumerate(item.data.iter('*')):
|
||||||
|
try:
|
||||||
|
elem.set('pb_order', unicode_type(i))
|
||||||
|
except TypeError: # Cant set attributes on comment nodes etc.
|
||||||
|
continue
|
||||||
|
|
||||||
|
page_breaks = list(page_breaks)
|
||||||
|
page_breaks.sort(key=lambda x:int(x.get('pb_order')))
|
||||||
|
page_break_ids, page_breaks_ = [], []
|
||||||
|
for i, x in enumerate(page_breaks):
|
||||||
|
x.set('id', x.get('id', 'calibre_pb_%d'%i))
|
||||||
|
id = x.get('id')
|
||||||
|
try:
|
||||||
|
xp = XPath('//*[@id="%s"]'%id)
|
||||||
|
except:
|
||||||
|
try:
|
||||||
|
xp = XPath("//*[@id='%s']"%id)
|
||||||
|
except:
|
||||||
|
# The id has both a quote and an apostrophe or some other
|
||||||
|
# Just replace it since I doubt its going to work anywhere else
|
||||||
|
# either
|
||||||
|
id = 'calibre_pb_%d'%i
|
||||||
|
x.set('id', id)
|
||||||
|
xp = XPath('//*[@id=%r]'%id)
|
||||||
|
page_breaks_.append((xp, x.get('pb_before', '0') == '1'))
|
||||||
|
page_break_ids.append(id)
|
||||||
|
|
||||||
|
for elem in item.data.iter(etree.Element):
|
||||||
|
elem.attrib.pop('pb_order', False)
|
||||||
|
elem.attrib.pop('pb_before', False)
|
||||||
|
|
||||||
|
return page_breaks_, page_break_ids
|
||||||
|
|
||||||
|
def fix_links(self):
|
||||||
|
'''
|
||||||
|
Fix references to the split files in other content files.
|
||||||
|
'''
|
||||||
|
for item in self.oeb.manifest:
|
||||||
|
if etree.iselement(item.data):
|
||||||
|
self.current_item = item
|
||||||
|
rewrite_links(item.data, self.rewrite_links)
|
||||||
|
|
||||||
|
def rewrite_links(self, url):
|
||||||
|
href, frag = urldefrag(url)
|
||||||
|
try:
|
||||||
|
href = self.current_item.abshref(href)
|
||||||
|
except ValueError:
|
||||||
|
# Unparseable URL
|
||||||
|
return url
|
||||||
|
try:
|
||||||
|
href = urlnormalize(href)
|
||||||
|
except ValueError:
|
||||||
|
# href has non utf-8 quoting
|
||||||
|
return url
|
||||||
|
if href in self.map:
|
||||||
|
anchor_map = self.map[href]
|
||||||
|
nhref = anchor_map[frag if frag else None]
|
||||||
|
nhref = self.current_item.relhref(nhref)
|
||||||
|
if frag:
|
||||||
|
nhref = '#'.join((unquote(nhref), frag))
|
||||||
|
|
||||||
|
return nhref
|
||||||
|
return url
|
||||||
|
|
||||||
|
|
||||||
|
class FlowSplitter(object):
|
||||||
|
'The actual splitting logic'
|
||||||
|
|
||||||
|
def __init__(self, item, page_breaks, page_break_ids, max_flow_size, oeb,
|
||||||
|
opts):
|
||||||
|
self.item = item
|
||||||
|
self.oeb = oeb
|
||||||
|
self.opts = opts
|
||||||
|
self.log = oeb.log
|
||||||
|
self.page_breaks = page_breaks
|
||||||
|
self.page_break_ids = page_break_ids
|
||||||
|
self.max_flow_size = max_flow_size
|
||||||
|
self.base = item.href
|
||||||
|
self.csp_counter = 0
|
||||||
|
|
||||||
|
base, ext = os.path.splitext(self.base)
|
||||||
|
self.base = base.replace('%', '%%')+'_split_%.3d'+ext
|
||||||
|
|
||||||
|
self.trees = [self.item.data.getroottree()]
|
||||||
|
self.splitting_on_page_breaks = True
|
||||||
|
if self.page_breaks:
|
||||||
|
self.split_on_page_breaks(self.trees[0])
|
||||||
|
self.splitting_on_page_breaks = False
|
||||||
|
|
||||||
|
if self.max_flow_size > 0:
|
||||||
|
lt_found = False
|
||||||
|
self.log('\tLooking for large trees in %s...'%item.href)
|
||||||
|
trees = list(self.trees)
|
||||||
|
self.tree_map = {}
|
||||||
|
for i, tree in enumerate(trees):
|
||||||
|
size = len(tostring(tree.getroot()))
|
||||||
|
if size > self.max_flow_size:
|
||||||
|
self.log('\tFound large tree #%d'%i)
|
||||||
|
lt_found = True
|
||||||
|
self.split_trees = []
|
||||||
|
self.split_to_size(tree)
|
||||||
|
self.tree_map[tree] = self.split_trees
|
||||||
|
if not lt_found:
|
||||||
|
self.log('\tNo large trees found')
|
||||||
|
self.trees = []
|
||||||
|
for x in trees:
|
||||||
|
self.trees.extend(self.tree_map.get(x, [x]))
|
||||||
|
|
||||||
|
self.was_split = len(self.trees) > 1
|
||||||
|
if self.was_split:
|
||||||
|
self.log('\tSplit into %d parts'%len(self.trees))
|
||||||
|
self.commit()
|
||||||
|
|
||||||
|
def split_on_page_breaks(self, orig_tree):
|
||||||
|
ordered_ids = OrderedDict()
|
||||||
|
all_page_break_ids = frozenset(self.page_break_ids)
|
||||||
|
for elem_id in orig_tree.xpath('//*/@id'):
|
||||||
|
if elem_id in all_page_break_ids:
|
||||||
|
ordered_ids[elem_id] = self.page_breaks[
|
||||||
|
self.page_break_ids.index(elem_id)]
|
||||||
|
|
||||||
|
self.trees = [orig_tree]
|
||||||
|
while ordered_ids:
|
||||||
|
pb_id, (pattern, before) = next(iteritems(ordered_ids))
|
||||||
|
del ordered_ids[pb_id]
|
||||||
|
for i in range(len(self.trees)-1, -1, -1):
|
||||||
|
tree = self.trees[i]
|
||||||
|
elem = pattern(tree)
|
||||||
|
if elem:
|
||||||
|
self.log.debug('\t\tSplitting on page-break at id=%s'%
|
||||||
|
elem[0].get('id'))
|
||||||
|
before_tree, after_tree = self.do_split(tree, elem[0], before)
|
||||||
|
self.trees[i:i+1] = [before_tree, after_tree]
|
||||||
|
break
|
||||||
|
|
||||||
|
trees, ids = [], set()
|
||||||
|
for tree in self.trees:
|
||||||
|
root = tree.getroot()
|
||||||
|
if self.is_page_empty(root):
|
||||||
|
discarded_ids = root.xpath('//*[@id]')
|
||||||
|
for x in discarded_ids:
|
||||||
|
x = x.get('id')
|
||||||
|
if not x.startswith('calibre_'):
|
||||||
|
ids.add(x)
|
||||||
|
else:
|
||||||
|
if ids:
|
||||||
|
body = self.get_body(root)
|
||||||
|
if body is not None:
|
||||||
|
existing_ids = frozenset(body.xpath('//*/@id'))
|
||||||
|
for x in ids - existing_ids:
|
||||||
|
body.insert(0, body.makeelement(XHTML('div'), id=x, style='height:0pt'))
|
||||||
|
ids = set()
|
||||||
|
trees.append(tree)
|
||||||
|
self.trees = trees
|
||||||
|
|
||||||
|
def get_body(self, root):
|
||||||
|
body = root.xpath('//h:body', namespaces=NAMESPACES)
|
||||||
|
if not body:
|
||||||
|
return None
|
||||||
|
return body[0]
|
||||||
|
|
||||||
|
def do_split(self, tree, split_point, before):
|
||||||
|
'''
|
||||||
|
Split ``tree`` into a *before* and *after* tree at ``split_point``.
|
||||||
|
|
||||||
|
:param before: If True tree is split before split_point, otherwise after split_point
|
||||||
|
:return: before_tree, after_tree
|
||||||
|
'''
|
||||||
|
return do_split(split_point, self.log, before=before)
|
||||||
|
|
||||||
|
def is_page_empty(self, root):
|
||||||
|
body = self.get_body(root)
|
||||||
|
if body is None:
|
||||||
|
return False
|
||||||
|
txt = re.sub(r'\s+|\xa0', '',
|
||||||
|
etree.tostring(body, method='text', encoding='unicode'))
|
||||||
|
if len(txt) > 1:
|
||||||
|
return False
|
||||||
|
for img in root.xpath('//h:img', namespaces=NAMESPACES):
|
||||||
|
if img.get('style', '') != 'display:none':
|
||||||
|
return False
|
||||||
|
if root.xpath('//*[local-name() = "svg"]'):
|
||||||
|
return False
|
||||||
|
return True
|
||||||
|
|
||||||
|
def split_text(self, text, root, size):
|
||||||
|
self.log.debug('\t\t\tSplitting text of length: %d'%len(text))
|
||||||
|
rest = text.replace('\r', '')
|
||||||
|
parts = re.split('\n\n', rest)
|
||||||
|
self.log.debug('\t\t\t\tFound %d parts'%len(parts))
|
||||||
|
if max(map(len, parts)) > size:
|
||||||
|
raise SplitError('Cannot split as file contains a <pre> tag '
|
||||||
|
'with a very large paragraph', root)
|
||||||
|
ans = []
|
||||||
|
buf = ''
|
||||||
|
for part in parts:
|
||||||
|
if len(buf) + len(part) < size:
|
||||||
|
buf += '\n\n'+part
|
||||||
|
else:
|
||||||
|
ans.append(buf)
|
||||||
|
buf = part
|
||||||
|
return ans
|
||||||
|
|
||||||
|
def split_to_size(self, tree):
|
||||||
|
self.log.debug('\t\tSplitting...')
|
||||||
|
root = tree.getroot()
|
||||||
|
# Split large <pre> tags if they contain only text
|
||||||
|
for pre in XPath('//h:pre')(root):
|
||||||
|
if len(tuple(pre.iterchildren(etree.Element))) > 0:
|
||||||
|
continue
|
||||||
|
if pre.text and len(pre.text) > self.max_flow_size*0.5:
|
||||||
|
self.log.debug('\t\tSplitting large <pre> tag')
|
||||||
|
frags = self.split_text(pre.text, root, int(0.2*self.max_flow_size))
|
||||||
|
new_pres = []
|
||||||
|
for frag in frags:
|
||||||
|
pre2 = copy.copy(pre)
|
||||||
|
pre2.text = frag
|
||||||
|
pre2.tail = ''
|
||||||
|
new_pres.append(pre2)
|
||||||
|
new_pres[-1].tail = pre.tail
|
||||||
|
p = pre.getparent()
|
||||||
|
i = p.index(pre)
|
||||||
|
p[i:i+1] = new_pres
|
||||||
|
|
||||||
|
split_point, before = self.find_split_point(root)
|
||||||
|
if split_point is None:
|
||||||
|
raise SplitError(self.item.href, root)
|
||||||
|
self.log.debug('\t\t\tSplit point:', split_point.tag, tree.getpath(split_point))
|
||||||
|
|
||||||
|
trees = self.do_split(tree, split_point, before)
|
||||||
|
sizes = [len(tostring(t.getroot())) for t in trees]
|
||||||
|
if min(sizes) < 5*1024:
|
||||||
|
self.log.debug('\t\t\tSplit tree too small')
|
||||||
|
self.split_to_size(tree)
|
||||||
|
return
|
||||||
|
|
||||||
|
for t, size in zip(trees, sizes):
|
||||||
|
r = t.getroot()
|
||||||
|
if self.is_page_empty(r):
|
||||||
|
continue
|
||||||
|
elif size <= self.max_flow_size:
|
||||||
|
self.split_trees.append(t)
|
||||||
|
self.log.debug(
|
||||||
|
'\t\t\tCommitted sub-tree #%d (%d KB)'%(
|
||||||
|
len(self.split_trees), size/1024.))
|
||||||
|
else:
|
||||||
|
self.log.debug(
|
||||||
|
'\t\t\tSplit tree still too large: %d KB' % (size/1024.))
|
||||||
|
self.split_to_size(t)
|
||||||
|
|
||||||
|
def find_split_point(self, root):
|
||||||
|
'''
|
||||||
|
Find the tag at which to split the tree rooted at `root`.
|
||||||
|
Search order is:
|
||||||
|
* Heading tags
|
||||||
|
* <div> tags
|
||||||
|
* <pre> tags
|
||||||
|
* <hr> tags
|
||||||
|
* <p> tags
|
||||||
|
* <br> tags
|
||||||
|
* <li> tags
|
||||||
|
|
||||||
|
We try to split in the "middle" of the file (as defined by tag counts.
|
||||||
|
'''
|
||||||
|
def pick_elem(elems):
|
||||||
|
if elems:
|
||||||
|
elems = [i for i in elems if i.get(SPLIT_POINT_ATTR, '0') !=
|
||||||
|
'1']
|
||||||
|
if elems:
|
||||||
|
i = int(len(elems)//2)
|
||||||
|
elems[i].set(SPLIT_POINT_ATTR, '1')
|
||||||
|
return elems[i]
|
||||||
|
|
||||||
|
for path in (
|
||||||
|
'//*[re:match(name(), "h[1-6]", "i")]',
|
||||||
|
'/h:html/h:body/h:div',
|
||||||
|
'//h:pre',
|
||||||
|
'//h:hr',
|
||||||
|
'//h:p',
|
||||||
|
'//h:div',
|
||||||
|
'//h:br',
|
||||||
|
'//h:li',
|
||||||
|
):
|
||||||
|
elems = root.xpath(path, namespaces=NAMESPACES)
|
||||||
|
elem = pick_elem(elems)
|
||||||
|
if elem is not None:
|
||||||
|
try:
|
||||||
|
XPath(elem.getroottree().getpath(elem))
|
||||||
|
except:
|
||||||
|
continue
|
||||||
|
return elem, True
|
||||||
|
|
||||||
|
return None, True
|
||||||
|
|
||||||
|
def commit(self):
|
||||||
|
'''
|
||||||
|
Commit all changes caused by the split. Calculates an *anchor_map* for
|
||||||
|
all anchors in the original tree. Internal links are re-directed. The
|
||||||
|
original file is deleted and the split files are saved.
|
||||||
|
'''
|
||||||
|
if not self.was_split:
|
||||||
|
return
|
||||||
|
self.anchor_map = collections.defaultdict(lambda :self.base%0)
|
||||||
|
self.files = []
|
||||||
|
|
||||||
|
for i, tree in enumerate(self.trees):
|
||||||
|
root = tree.getroot()
|
||||||
|
self.files.append(self.base%i)
|
||||||
|
for elem in root.xpath('//*[@id or @name]'):
|
||||||
|
for anchor in elem.get('id', ''), elem.get('name', ''):
|
||||||
|
if anchor != '' and anchor not in self.anchor_map:
|
||||||
|
self.anchor_map[anchor] = self.files[-1]
|
||||||
|
for elem in root.xpath('//*[@%s]'%SPLIT_POINT_ATTR):
|
||||||
|
elem.attrib.pop(SPLIT_POINT_ATTR, '0')
|
||||||
|
|
||||||
|
spine_pos = self.item.spine_position
|
||||||
|
|
||||||
|
for current, tree in zip(*map(reversed, (self.files, self.trees))):
|
||||||
|
for a in tree.getroot().xpath('//h:a[@href]', namespaces=NAMESPACES):
|
||||||
|
href = a.get('href').strip()
|
||||||
|
if href.startswith('#'):
|
||||||
|
anchor = href[1:]
|
||||||
|
file = self.anchor_map[anchor]
|
||||||
|
file = self.item.relhref(file)
|
||||||
|
if file != current:
|
||||||
|
a.set('href', file+href)
|
||||||
|
|
||||||
|
new_id = self.oeb.manifest.generate(id=self.item.id)[0]
|
||||||
|
new_item = self.oeb.manifest.add(new_id, current,
|
||||||
|
self.item.media_type, data=tree.getroot())
|
||||||
|
self.oeb.spine.insert(spine_pos, new_item, self.item.linear)
|
||||||
|
|
||||||
|
if self.oeb.guide:
|
||||||
|
for ref in self.oeb.guide.values():
|
||||||
|
href, frag = urldefrag(ref.href)
|
||||||
|
if href == self.item.href:
|
||||||
|
nhref = self.anchor_map[frag if frag else None]
|
||||||
|
if frag:
|
||||||
|
nhref = '#'.join((nhref, frag))
|
||||||
|
ref.href = nhref
|
||||||
|
|
||||||
|
def fix_toc_entry(toc):
|
||||||
|
if toc.href:
|
||||||
|
href, frag = urldefrag(toc.href)
|
||||||
|
if href == self.item.href:
|
||||||
|
nhref = self.anchor_map[frag if frag else None]
|
||||||
|
if frag:
|
||||||
|
nhref = '#'.join((nhref, frag))
|
||||||
|
toc.href = nhref
|
||||||
|
for x in toc:
|
||||||
|
fix_toc_entry(x)
|
||||||
|
|
||||||
|
if self.oeb.toc:
|
||||||
|
fix_toc_entry(self.oeb.toc)
|
||||||
|
|
||||||
|
if self.oeb.pages:
|
||||||
|
for page in self.oeb.pages:
|
||||||
|
href, frag = urldefrag(page.href)
|
||||||
|
if href == self.item.href:
|
||||||
|
nhref = self.anchor_map[frag if frag else None]
|
||||||
|
if frag:
|
||||||
|
nhref = '#'.join((nhref, frag))
|
||||||
|
page.href = nhref
|
||||||
|
|
||||||
|
self.oeb.manifest.remove(self.item)
|
||||||
Reference in New Issue
Block a user