Added epub write support

2020-04-13 12:46:37 +02:00
parent 9f18513787
commit 79cad46732
9 changed files with 3049 additions and 0 deletions
@@ -0,0 +1,762 @@
 #!/usr/bin/env python2
 # vim:fileencoding=utf-8
 from __future__ import absolute_import, division, print_function, unicode_literals
 __license__ = 'GPL v3'
 __copyright__ = '2014, Kovid Goyal <kovid at kovidgoyal.net>'
 import re, random, unicodedata, numbers
 from collections import namedtuple
 from contextlib import contextmanager
 from math import ceil, sqrt, cos, sin, atan2
 from polyglot.builtins import iteritems, itervalues, map, zip, string_or_bytes
 from itertools import chain
 from PyQt5.Qt import (
    QImage, Qt, QFont, QPainter, QPointF, QTextLayout, QTextOption,
    QFontMetrics, QTextCharFormat, QColor, QRect, QBrush, QLinearGradient,
    QPainterPath, QPen, QRectF, QTransform, QRadialGradient
 )
 from calibre import force_unicode, fit_image
 from calibre.constants import __appname__, __version__
 from calibre.ebooks.metadata import fmt_sidx
 from calibre.ebooks.metadata.book.base import Metadata
 from calibre.ebooks.metadata.book.formatter import SafeFormat
 from calibre.gui2 import ensure_app, config, load_builtin_fonts, pixmap_to_data
 from calibre.utils.cleantext import clean_ascii_chars, clean_xml_chars
 from calibre.utils.config import JSONConfig
 # Default settings {{{
 cprefs = JSONConfig('cover_generation')
 cprefs.defaults['title_font_size'] = 120  # px
 cprefs.defaults['subtitle_font_size'] = 80  # px
 cprefs.defaults['footer_font_size'] = 80  # px
 cprefs.defaults['cover_width'] = 1200  # px
 cprefs.defaults['cover_height'] = 1600  # px
 cprefs.defaults['title_font_family'] = None
 cprefs.defaults['subtitle_font_family'] = None
 cprefs.defaults['footer_font_family'] = None
 cprefs.defaults['color_themes'] = {}
 cprefs.defaults['disabled_color_themes'] = []
 cprefs.defaults['disabled_styles'] = []
 cprefs.defaults['title_template'] = '<b>{title}'
 cprefs.defaults['subtitle_template'] = '''{series:'test($, strcat("<i>", $, "</i> - ", raw_field("formatted_series_index")), "")'}'''
 cprefs.defaults['footer_template'] = r'''program:
 # Show at most two authors, on separate lines.
 authors = field('authors');
 num = count(authors, ' &amp; ');
 authors = sublist(authors, 0, 2, ' &amp; ');
 authors = list_re(authors, ' &amp; ', '(.+)', '<b>\1');
 authors = re(authors, ' &amp; ', '<br>');
 re(authors, '&amp;&amp;', '&amp;')
 '''
 Prefs = namedtuple('Prefs', ' '.join(sorted(cprefs.defaults)))
 _use_roman = None
 def get_use_roman():
    global _use_roman
    if _use_roman is None:
        return config['use_roman_numerals_for_series_number']
    return _use_roman
 def set_use_roman(val):
    global _use_roman
    _use_roman = bool(val)
 # }}}
 # Draw text {{{
 Point = namedtuple('Point', 'x y')
 def parse_text_formatting(text):
    pos = 0
    tokens = []
    for m in re.finditer(r'</?([a-zA-Z1-6]+)/?>', text):
        q = text[pos:m.start()]
        if q:
            tokens.append((False, q))
        tokens.append((True, (m.group(1).lower(), '/' in m.group()[:2])))
        pos = m.end()
    if tokens:
        if text[pos:]:
            tokens.append((False, text[pos:]))
    else:
        tokens = [(False, text)]
    ranges, open_ranges, text = [], [], []
    offset = 0
    for is_tag, tok in tokens:
        if is_tag:
            tag, closing = tok
            if closing:
                if open_ranges:
                    r = open_ranges.pop()
                    r[-1] = offset - r[-2]
                    if r[-1] > 0:
                        ranges.append(r)
            else:
                if tag in {'b', 'strong', 'i', 'em'}:
                    open_ranges.append([tag, offset, -1])
        else:
            offset += len(tok.replace('&amp;', '&'))
            text.append(tok)
    text = ''.join(text)
    formats = []
    for tag, start, length in chain(ranges, open_ranges):
        fmt = QTextCharFormat()
        if tag in {'b', 'strong'}:
            fmt.setFontWeight(QFont.Bold)
        elif tag in {'i', 'em'}:
            fmt.setFontItalic(True)
        else:
            continue
        if length == -1:
            length = len(text) - start
        if length > 0:
            r = QTextLayout.FormatRange()
            r.format = fmt
            r.start, r.length = start, length
            formats.append(r)
    return text, formats
 class Block(object):
    def __init__(self, text='', width=0, font=None, img=None, max_height=100, align=Qt.AlignCenter):
        self.layouts = []
        self._position = Point(0, 0)
        self.leading = self.line_spacing = 0
        if font is not None:
            fm = QFontMetrics(font, img)
            self.leading = fm.leading()
            self.line_spacing = fm.lineSpacing()
        for text in text.split('<br>') if text else ():
            text, formats = parse_text_formatting(sanitize(text))
            l = QTextLayout(unescape_formatting(text), font, img)
            l.setAdditionalFormats(formats)
            to = QTextOption(align)
            to.setWrapMode(QTextOption.WrapAtWordBoundaryOrAnywhere)
            l.setTextOption(to)
            l.beginLayout()
            height = 0
            while height + 3*self.leading < max_height:
                line = l.createLine()
                if not line.isValid():
                    break
                line.setLineWidth(width)
                height += self.leading
                line.setPosition(QPointF(0, height))
                height += line.height()
            max_height -= height
            l.endLayout()
            if self.layouts:
                self.layouts.append(self.leading)
            else:
                self._position = Point(l.position().x(), l.position().y())
            self.layouts.append(l)
        if self.layouts:
            self.layouts.append(self.leading)
    @property
    def height(self):
        return int(ceil(sum(l if isinstance(l, numbers.Number) else l.boundingRect().height() for l in self.layouts)))
    @property
    def position(self):
        return self._position
    @position.setter
    def position(self, new_pos):
        (x, y) = new_pos
        self._position = Point(x, y)
        if self.layouts:
            self.layouts[0].setPosition(QPointF(x, y))
            y += self.layouts[0].boundingRect().height()
            for l in self.layouts[1:]:
                if isinstance(l, numbers.Number):
                    y += l
                else:
                    l.setPosition(QPointF(x, y))
                    y += l.boundingRect().height()
    def draw(self, painter):
        for l in self.layouts:
            if hasattr(l, 'draw'):
                # Etch effect for the text
                painter.save()
                painter.setRenderHints(QPainter.TextAntialiasing | QPainter.Antialiasing)
                painter.save()
                painter.setPen(QColor(255, 255, 255, 125))
                l.draw(painter, QPointF(1, 1))
                painter.restore()
                l.draw(painter, QPointF())
                painter.restore()
 def layout_text(prefs, img, title, subtitle, footer, max_height, style):
    width = img.width() - 2 * style.hmargin
    title, subtitle, footer = title, subtitle, footer
    title_font = QFont(prefs.title_font_family or 'Liberation Serif')
    title_font.setPixelSize(prefs.title_font_size)
    title_font.setStyleStrategy(QFont.PreferAntialias)
    title_block = Block(title, width, title_font, img, max_height, style.TITLE_ALIGN)
    title_block.position = style.hmargin, style.vmargin
    subtitle_block = Block()
    if subtitle:
        subtitle_font = QFont(prefs.subtitle_font_family or 'Liberation Sans')
        subtitle_font.setPixelSize(prefs.subtitle_font_size)
        subtitle_font.setStyleStrategy(QFont.PreferAntialias)
        gap = 2 * title_block.leading
        mh = max_height - title_block.height - gap
        subtitle_block = Block(subtitle, width, subtitle_font, img, mh, style.SUBTITLE_ALIGN)
        subtitle_block.position = style.hmargin, title_block.position.y + title_block.height + gap
    footer_font = QFont(prefs.footer_font_family or 'Liberation Serif')
    footer_font.setStyleStrategy(QFont.PreferAntialias)
    footer_font.setPixelSize(prefs.footer_font_size)
    footer_block = Block(footer, width, footer_font, img, max_height, style.FOOTER_ALIGN)
    footer_block.position = style.hmargin, img.height() - style.vmargin - footer_block.height
    return title_block, subtitle_block, footer_block
 # }}}
 # Format text using templates {{{
 def sanitize(s):
    return unicodedata.normalize('NFC', clean_xml_chars(clean_ascii_chars(force_unicode(s or ''))))
 _formatter = None
 _template_cache = {}
 def escape_formatting(val):
    return val.replace('&', '&amp;').replace('<', '&lt;').replace('>', '&gt;')
 def unescape_formatting(val):
    return val.replace('&lt;', '<').replace('&gt;', '>').replace('&amp;', '&')
 class Formatter(SafeFormat):
    def get_value(self, orig_key, args, kwargs):
        ans = SafeFormat.get_value(self, orig_key, args, kwargs)
        return escape_formatting(ans)
 def formatter():
    global _formatter
    if _formatter is None:
        _formatter = Formatter()
    return _formatter
 def format_fields(mi, prefs):
    f = formatter()
    def safe_format(field):
        return f.safe_format(
            getattr(prefs, field), mi, _('Template error'), mi, template_cache=_template_cache
        )
    return map(safe_format, ('title_template', 'subtitle_template', 'footer_template'))
@contextmanager
 def preserve_fields(obj, fields):
    if isinstance(fields, string_or_bytes):
        fields = fields.split()
    null = object()
    mem = {f:getattr(obj, f, null) for f in fields}
    try:
        yield
    finally:
        for f, val in iteritems(mem):
            if val is null:
                delattr(obj, f)
            else:
                setattr(obj, f, val)
 def format_text(mi, prefs):
    with preserve_fields(mi, 'authors formatted_series_index'):
        mi.authors = [a for a in mi.authors if a != _('Unknown')]
        mi.formatted_series_index = fmt_sidx(mi.series_index or 0, use_roman=get_use_roman())
        return tuple(format_fields(mi, prefs))
 # }}}
 # Colors {{{
 ColorTheme = namedtuple('ColorTheme', 'color1 color2 contrast_color1 contrast_color2')
 def to_theme(x):
    return {k:v for k, v in zip(ColorTheme._fields[:4], x.split())}
 fallback_colors = to_theme('ffffff 000000 000000 ffffff')
 default_color_themes = {
    'Earth' : to_theme('e8d9ac c7b07b 564628 382d1a'),
    'Grass' : to_theme('d8edb5 abc8a4 375d3b 183128'),
    'Water' : to_theme('d3dcf2 829fe4 00448d 00305a'),
    'Silver': to_theme('e6f1f5 aab3b6 6e7476 3b3e40'),
 }
 def theme_to_colors(theme):
    colors = {k:QColor('#' + theme[k]) for k in ColorTheme._fields}
    return ColorTheme(**colors)
 def load_color_themes(prefs):
    t = default_color_themes.copy()
    t.update(prefs.color_themes)
    disabled = frozenset(prefs.disabled_color_themes)
    ans = [theme_to_colors(v) for k, v in iteritems(t) if k not in disabled]
    if not ans:
        # Ignore disabled and return only the builtin color themes
        ans = [theme_to_colors(v) for k, v in iteritems(default_color_themes)]
    return ans
 def color(color_theme, name):
    ans = getattr(color_theme, name)
    if not ans.isValid():
        ans = QColor('#' + fallback_colors[name])
    return ans
 # }}}
 # Styles {{{
 class Style(object):
    TITLE_ALIGN = SUBTITLE_ALIGN = FOOTER_ALIGN = Qt.AlignHCenter | Qt.AlignTop
    def __init__(self, color_theme, prefs):
        self.load_colors(color_theme)
        self.calculate_margins(prefs)
    def calculate_margins(self, prefs):
        self.hmargin = int((50 / 600) * prefs.cover_width)
        self.vmargin = int((50 / 800) * prefs.cover_height)
    def load_colors(self, color_theme):
        self.color1 = color(color_theme, 'color1')
        self.color2 = color(color_theme, 'color2')
        self.ccolor1 = color(color_theme, 'contrast_color1')
        self.ccolor2 = color(color_theme, 'contrast_color2')
 class Cross(Style):
    NAME = 'The Cross'
    GUI_NAME = _('The Cross')
    def __call__(self, painter, rect, color_theme, title_block, subtitle_block, footer_block):
        painter.fillRect(rect, self.color1)
        r = QRect(0, int(title_block.position.y), rect.width(),
                  title_block.height + subtitle_block.height + subtitle_block.line_spacing // 2 + title_block.leading)
        painter.save()
        p = QPainterPath()
        p.addRoundedRect(QRectF(r), 10, 10 * r.width()/r.height(), Qt.RelativeSize)
        painter.setClipPath(p)
        painter.setRenderHint(QPainter.Antialiasing)
        painter.fillRect(r, self.color2)
        painter.restore()
        r = QRect(0, 0, int(title_block.position.x), rect.height())
        painter.fillRect(r, self.color2)
        return self.ccolor2, self.ccolor2, self.ccolor1
 class Half(Style):
    NAME = 'Half and Half'
    GUI_NAME = _('Half and half')
    def __call__(self, painter, rect, color_theme, title_block, subtitle_block, footer_block):
        g = QLinearGradient(QPointF(0, 0), QPointF(0, rect.height()))
        g.setStops([(0, self.color1), (0.7, self.color2), (1, self.color1)])
        painter.fillRect(rect, QBrush(g))
        return self.ccolor1, self.ccolor1, self.ccolor1
 def rotate_vector(angle, x, y):
    return x * cos(angle) - y * sin(angle), x * sin(angle) + y * cos(angle)
 def draw_curved_line(painter_path, dx, dy, c1_frac, c1_amp, c2_frac, c2_amp):
    length = sqrt(dx * dx + dy * dy)
    angle = atan2(dy, dx)
    c1 = QPointF(*rotate_vector(angle, c1_frac * length, c1_amp * length))
    c2 = QPointF(*rotate_vector(angle, c2_frac * length, c2_amp * length))
    pos = painter_path.currentPosition()
    painter_path.cubicTo(pos + c1, pos + c2, pos + QPointF(dx, dy))
 class Banner(Style):
    NAME = 'Banner'
    GUI_NAME = _('Banner')
    GRADE = 0.07
    def calculate_margins(self, prefs):
        Style.calculate_margins(self, prefs)
        self.hmargin = int(0.15 * prefs.cover_width)
        self.fold_width = int(0.1 * prefs.cover_width)
    def __call__(self, painter, rect, color_theme, title_block, subtitle_block, footer_block):
        painter.fillRect(rect, self.color1)
        top = title_block.position.y + 2
        extra_spacing = subtitle_block.line_spacing // 2 if subtitle_block.line_spacing else title_block.line_spacing // 3
        height = title_block.height + subtitle_block.height + extra_spacing + title_block.leading
        right = rect.right() - self.hmargin
        width = right - self.hmargin
        # Draw main banner
        p = main = QPainterPath(QPointF(self.hmargin, top))
        draw_curved_line(p, rect.width() - 2 * self.hmargin, 0, 0.1, -0.1, 0.9, -0.1)
        deltax = self.GRADE * height
        p.lineTo(right + deltax, top + height)
        right_corner = p.currentPosition()
        draw_curved_line(p, - width - 2 * deltax, 0, 0.1, 0.05, 0.9, 0.05)
        left_corner = p.currentPosition()
        p.closeSubpath()
        # Draw fold rectangles
        rwidth = self.fold_width
        yfrac = 0.1
        width23 = int(0.67 * rwidth)
        rtop = top + height * yfrac
        def draw_fold(x, m=1, corner=left_corner):
            ans = p = QPainterPath(QPointF(x, rtop))
            draw_curved_line(p, rwidth*m, 0, 0.1, 0.1*m, 0.5, -0.2*m)
            fold_upper = p.currentPosition()
            p.lineTo(p.currentPosition() + QPointF(-deltax*m, height))
            fold_corner = p.currentPosition()
            draw_curved_line(p, -rwidth*m, 0, 0.2, -0.1*m, 0.8, -0.1*m)
            draw_curved_line(p, deltax*m, -height, 0.2, 0.1*m, 0.8, 0.1*m)
            p = inner_fold = QPainterPath(corner)
            dp = fold_corner - p.currentPosition()
            draw_curved_line(p, dp.x(), dp.y(), 0.5, 0.3*m, 1, 0*m)
            p.lineTo(fold_upper), p.closeSubpath()
            return ans, inner_fold
        left_fold, left_inner = draw_fold(self.hmargin - width23)
        right_fold, right_inner = draw_fold(right + width23, m=-1, corner=right_corner)
        painter.save()
        painter.setRenderHint(QPainter.Antialiasing)
        pen = QPen(self.ccolor2)
        pen.setWidth(3)
        pen.setJoinStyle(Qt.RoundJoin)
        painter.setPen(pen)
        for r in (left_fold, right_fold):
            painter.fillPath(r, QBrush(self.color2))
            painter.drawPath(r)
        for r in (left_inner, right_inner):
            painter.fillPath(r, QBrush(self.color2.darker()))
            painter.drawPath(r)
        painter.fillPath(main, QBrush(self.color2))
        painter.drawPath(main)
        painter.restore()
        return self.ccolor2, self.ccolor2, self.ccolor1
 class Ornamental(Style):
    NAME = 'Ornamental'
    GUI_NAME = _('Ornamental')
    # SVG vectors {{{
    CORNER_VECTOR = "m 67.791903,64.260958 c -4.308097,-2.07925 -4.086719,-8.29575 0.334943,-9.40552 4.119758,-1.03399 8.732363,5.05239 5.393055,7.1162 -0.55,0.33992 -1,1.04147 -1,1.55902 0,1.59332 2.597425,1.04548 5.365141,-1.1316 1.999416,-1.57274 2.634859,-2.96609 2.634859,-5.7775 0,-9.55787 -9.827495,-13.42961 -24.43221,-9.62556 -3.218823,0.83839 -5.905663,1.40089 -5.970755,1.25 -0.06509,-0.1509 -0.887601,-1.19493 -1.827799,-2.32007 -1.672708,-2.00174 -1.636693,-2.03722 1.675668,-1.65052 1.861815,0.21736 6.685863,-0.35719 10.720107,-1.27678 12.280767,-2.79934 20.195487,-0.0248 22.846932,8.0092 3.187273,9.65753 -6.423297,17.7497 -15.739941,13.25313 z m 49.881417,-20.53932 c -3.19204,-2.701 -3.72967,-6.67376 -1.24009,-9.16334 2.48236,-2.48236 5.35141,-2.67905 7.51523,-0.51523 1.85966,1.85966 2.07045,6.52954 0.37143,8.22857 -2.04025,2.04024 3.28436,1.44595 6.92316,-0.77272 9.66959,-5.89579 0.88581,-18.22422 -13.0777,-18.35516 -5.28594,-0.0496 -10.31098,1.88721 -14.26764,5.4991 -1.98835,1.81509 -2.16454,1.82692 -2.7936,0.18763 -0.40973,-1.06774 0.12141,-2.82197 1.3628,-4.50104 2.46349,-3.33205 1.67564,-4.01299 -2.891784,-2.49938 -2.85998,0.94777 -3.81038,2.05378 -5.59837,6.51495 -1.184469,2.95536 -3.346819,6.86882 -4.805219,8.69657 -1.4584,1.82776 -2.65164,4.02223 -2.65164,4.87662 0,3.24694 -4.442667,0.59094 -5.872557,-3.51085 -1.361274,-3.90495 0.408198,-8.63869 4.404043,-11.78183 5.155844,-4.05558 1.612374,-3.42079 -9.235926,1.65457 -12.882907,6.02725 -16.864953,7.18038 -24.795556,7.18038 -8.471637,0 -13.38802,-1.64157 -17.634617,-5.88816 -2.832233,-2.83224 -3.849773,-4.81378 -4.418121,-8.6038 -1.946289,-12.9787795 8.03227,-20.91713135 19.767685,-15.7259993 5.547225,2.4538018 6.993631,6.1265383 3.999564,10.1557393 -5.468513,7.35914 -15.917883,-0.19431 -10.657807,-7.7041155 1.486298,-2.1219878 1.441784,-2.2225068 -0.984223,-2.2225068 -1.397511,0 -4.010527,1.3130878 -5.806704,2.9179718 -2.773359,2.4779995 -3.265777,3.5977995 -3.265777,7.4266705 0,5.10943 2.254112,8.84197 7.492986,12.40748 8.921325,6.07175 19.286666,5.61396 37.12088,-1.63946 15.35037,-6.24321 21.294999,-7.42408 34.886123,-6.92999 11.77046,0.4279 19.35803,3.05537 24.34054,8.42878 4.97758,5.3681 2.53939,13.58271 -4.86733,16.39873 -4.17361,1.58681 -11.00702,1.19681 -13.31978,-0.76018 z m 26.50156,-0.0787 c -2.26347,-2.50111 -2.07852,-7.36311 0.39995,-10.51398 2.68134,-3.40877 10.49035,-5.69409 18.87656,-5.52426 l 6.5685,0.13301 -7.84029,0.82767 c -8.47925,0.89511 -12.76997,2.82233 -16.03465,7.20213 -1.92294,2.57976 -1.96722,3.00481 -0.57298,5.5 1.00296,1.79495 2.50427,2.81821 4.46514,3.04333 2.92852,0.33623 2.93789,0.32121 1.08045,-1.73124 -1.53602,-1.69728 -1.64654,-2.34411 -0.61324,-3.58916 2.84565,-3.4288 7.14497,-0.49759 5.03976,3.43603 -1.86726,3.48903 -8.65528,4.21532 -11.3692,1.21647 z m -4.17462,-14.20302 c -0.38836,-0.62838 -0.23556,-1.61305 0.33954,-2.18816 1.3439,-1.34389 4.47714,-0.17168 3.93038,1.47045 -0.5566,1.67168 -3.38637,2.14732 -4.26992,0.71771 z m -8.48037,-9.1829 c -12.462,-4.1101 -12.53952,-4.12156 -25.49998,-3.7694 -24.020921,0.65269 -32.338219,0.31756 -37.082166,-1.49417 -5.113999,-1.95305 -8.192504,-6.3647405 -6.485463,-9.2940713 0.566827,-0.972691 1.020091,-1.181447 1.037211,-0.477701 0.01685,0.692606 1.268676,1.2499998 2.807321,1.2499998 1.685814,0 4.868609,1.571672 8.10041,4.0000015 4.221481,3.171961 6.182506,3.999221 9.473089,3.996261 l 4.149585,-0.004 -3.249996,-1.98156 c -3.056252,-1.863441 -4.051566,-3.8760635 -2.623216,-5.3044145 0.794,-0.794 6.188222,1.901516 9.064482,4.5295635 1.858669,1.698271 3.461409,1.980521 10.559493,1.859621 11.30984,-0.19266 20.89052,1.29095 31.97905,4.95208 7.63881,2.52213 11.51931,3.16471 22.05074,3.65141 7.02931,0.32486 13.01836,0.97543 13.30902,1.44571 0.29065,0.47029 -5.2356,0.83436 -12.28056,0.80906 -12.25942,-0.044 -13.34537,-0.2229 -25.30902,-4.16865 z"  # noqa
    # }}}
    PATH_CACHE = {}
    VIEWPORT = (400, 500)
    def calculate_margins(self, prefs):
        self.hmargin = int((51 / self.VIEWPORT[0]) * prefs.cover_width)
        self.vmargin = int((83 / self.VIEWPORT[1]) * prefs.cover_height)
    def __call__(self, painter, rect, color_theme, title_block, subtitle_block, footer_block):
        if not self.PATH_CACHE:
            from calibre.utils.speedups import svg_path_to_painter_path
            try:
                self.__class__.PATH_CACHE['corner'] = svg_path_to_painter_path(self.CORNER_VECTOR)
            except Exception:
                import traceback
                traceback.print_exc()
        p = painter
        painter.setRenderHint(QPainter.Antialiasing)
        g = QRadialGradient(QPointF(rect.center()), rect.width())
        g.setColorAt(0, self.color1), g.setColorAt(1, self.color2)
        painter.fillRect(rect, QBrush(g))
        painter.save()
        painter.setWindow(0, 0, *self.VIEWPORT)
        try:
            path = self.PATH_CACHE['corner']
        except KeyError:
            path = QPainterPath()
        pen = p.pen()
        pen.setColor(self.ccolor1)
        p.setPen(pen)
        def corner():
            b = QBrush(self.ccolor1)
            p.fillPath(path, b)
            p.rotate(90), p.translate(100, -100), p.scale(1, -1), p.translate(-103, -97)
            p.fillPath(path, b)
            p.setWorldTransform(QTransform())
        # Top-left corner
        corner()
        # Top right corner
        p.scale(-1, 1), p.translate(-400, 0), corner()
        # Bottom left corner
        p.scale(1, -1), p.translate(0, -500), corner()
        # Bottom right corner
        p.scale(-1, -1), p.translate(-400, -500), corner()
        for y in (28.4, 471.7):
            p.drawLine(QPointF(160, y), QPointF(240, y))
        for x in (31.3, 368.7):
            p.drawLine(QPointF(x, 155), QPointF(x, 345))
        pen.setWidthF(1.8)
        p.setPen(pen)
        for y in (23.8, 476.7):
            p.drawLine(QPointF(160, y), QPointF(240, y))
        for x in (26.3, 373.7):
            p.drawLine(QPointF(x, 155), QPointF(x, 345))
        painter.restore()
        return self.ccolor2, self.ccolor2, self.ccolor1
 class Blocks(Style):
    NAME = 'Blocks'
    GUI_NAME = _('Blocks')
    FOOTER_ALIGN = Qt.AlignRight | Qt.AlignTop
    def __call__(self, painter, rect, color_theme, title_block, subtitle_block, footer_block):
        painter.fillRect(rect, self.color1)
        y = rect.height() - rect.height() // 3
        r = QRect(rect)
        r.setBottom(y)
        painter.fillRect(rect, self.color1)
        r = QRect(rect)
        r.setTop(y)
        painter.fillRect(r, self.color2)
        return self.ccolor1, self.ccolor1, self.ccolor2
 def all_styles():
    return set(
        x.NAME for x in itervalues(globals()) if
        isinstance(x, type) and issubclass(x, Style) and x is not Style
    )
 def load_styles(prefs, respect_disabled=True):
    disabled = frozenset(prefs.disabled_styles) if respect_disabled else ()
    ans = tuple(x for x in itervalues(globals()) if
            isinstance(x, type) and issubclass(x, Style) and x is not Style and x.NAME not in disabled)
    if not ans and disabled:
        # If all styles have been disabled, ignore the disabling and return all
        # the styles
        ans = load_styles(prefs, respect_disabled=False)
    return ans
 # }}}
 def init_environment():
    ensure_app()
    load_builtin_fonts()
 def generate_cover(mi, prefs=None, as_qimage=False):
    init_environment()
    prefs = prefs or cprefs
    prefs = {k:prefs.get(k) for k in cprefs.defaults}
    prefs = Prefs(**prefs)
    color_theme = random.choice(load_color_themes(prefs))
    style = random.choice(load_styles(prefs))(color_theme, prefs)
    title, subtitle, footer = format_text(mi, prefs)
    img = QImage(prefs.cover_width, prefs.cover_height, QImage.Format_ARGB32)
    title_block, subtitle_block, footer_block = layout_text(
        prefs, img, title, subtitle, footer, img.height() // 3, style)
    p = QPainter(img)
    rect = QRect(0, 0, img.width(), img.height())
    colors = style(p, rect, color_theme, title_block, subtitle_block, footer_block)
    for block, color in zip((title_block, subtitle_block, footer_block), colors):
        p.setPen(color)
        block.draw(p)
    p.end()
    img.setText('Generated cover', '%s %s' % (__appname__, __version__))
    if as_qimage:
        return img
    return pixmap_to_data(img)
 def override_prefs(base_prefs, **overrides):
    ans = {k:overrides.get(k, base_prefs[k]) for k in cprefs.defaults}
    override_color_theme = overrides.get('override_color_theme')
    if override_color_theme is not None:
        all_themes = set(default_color_themes) | set(ans['color_themes'])
        if override_color_theme in all_themes:
            all_themes.discard(override_color_theme)
            ans['disabled_color_themes'] = all_themes
    override_style = overrides.get('override_style')
    if override_style is not None:
        styles = all_styles()
        if override_style in styles:
            styles.discard(override_style)
            ans['disabled_styles'] = styles
    return ans
 def create_cover(title, authors, series=None, series_index=1, prefs=None, as_qimage=False):
    ' Create a cover from the specified title, author and series. Any user set'
    ' templates are ignored, to ensure that the specified metadata is used. '
    mi = Metadata(title, authors)
    if series:
        mi.series, mi.series_index = series, series_index
    d = cprefs.defaults
    prefs = override_prefs(
        prefs or cprefs, title_template=d['title_template'], subtitle_template=d['subtitle_template'], footer_template=d['footer_template'])
    return generate_cover(mi, prefs=prefs, as_qimage=as_qimage)
 def calibre_cover2(title, author_string='', series_string='', prefs=None, as_qimage=False, logo_path=None):
    init_environment()
    title, subtitle, footer = '<b>' + escape_formatting(title), '<i>' + escape_formatting(series_string), '<b>' + escape_formatting(author_string)
    prefs = prefs or cprefs
    prefs = {k:prefs.get(k) for k in cprefs.defaults}
    scale = 800. / prefs['cover_height']
    scale_cover(prefs, scale)
    prefs = Prefs(**prefs)
    img = QImage(prefs.cover_width, prefs.cover_height, QImage.Format_ARGB32)
    img.fill(Qt.white)
    # colors = to_theme('ffffff ffffff 000000 000000')
    color_theme = theme_to_colors(fallback_colors)
    class CalibeLogoStyle(Style):
        NAME = GUI_NAME = 'calibre'
        def __call__(self, painter, rect, color_theme, title_block, subtitle_block, footer_block):
            top = title_block.position.y + 10
            extra_spacing = subtitle_block.line_spacing // 2 if subtitle_block.line_spacing else title_block.line_spacing // 3
            height = title_block.height + subtitle_block.height + extra_spacing + title_block.leading
            top += height + 25
            bottom = footer_block.position.y - 50
            logo = QImage(logo_path or I('library.png'))
            pwidth, pheight = rect.width(), bottom - top
            scaled, width, height = fit_image(logo.width(), logo.height(), pwidth, pheight)
            x, y = (pwidth - width) // 2, (pheight - height) // 2
            rect = QRect(x, top + y, width, height)
            painter.setRenderHint(QPainter.SmoothPixmapTransform)
            painter.drawImage(rect, logo)
            return self.ccolor1, self.ccolor1, self.ccolor1
    style = CalibeLogoStyle(color_theme, prefs)
    title_block, subtitle_block, footer_block = layout_text(
        prefs, img, title, subtitle, footer, img.height() // 3, style)
    p = QPainter(img)
    rect = QRect(0, 0, img.width(), img.height())
    colors = style(p, rect, color_theme, title_block, subtitle_block, footer_block)
    for block, color in zip((title_block, subtitle_block, footer_block), colors):
        p.setPen(color)
        block.draw(p)
    p.end()
    img.setText('Generated cover', '%s %s' % (__appname__, __version__))
    if as_qimage:
        return img
    return pixmap_to_data(img)
 def message_image(text, width=500, height=400, font_size=20):
    init_environment()
    img = QImage(width, height, QImage.Format_ARGB32)
    img.fill(Qt.white)
    p = QPainter(img)
    f = QFont()
    f.setPixelSize(font_size)
    p.setFont(f)
    r = img.rect().adjusted(10, 10, -10, -10)
    p.drawText(r, Qt.AlignJustify | Qt.AlignVCenter | Qt.TextWordWrap, text)
    p.end()
    return pixmap_to_data(img)
 def scale_cover(prefs, scale):
    for x in ('cover_width', 'cover_height', 'title_font_size', 'subtitle_font_size', 'footer_font_size'):
        prefs[x] = int(scale * prefs[x])
 def generate_masthead(title, output_path=None, width=600, height=60, as_qimage=False, font_family=None):
    init_environment()
    font_family = font_family or cprefs['title_font_family'] or 'Liberation Serif'
    img = QImage(width, height, QImage.Format_ARGB32)
    img.fill(Qt.white)
    p = QPainter(img)
    p.setRenderHints(QPainter.Antialiasing | QPainter.TextAntialiasing)
    f = QFont(font_family)
    f.setStyleStrategy(QFont.PreferAntialias)
    f.setPixelSize((height * 3) // 4), f.setBold(True)
    p.setFont(f)
    p.drawText(img.rect(), Qt.AlignLeft | Qt.AlignVCenter, sanitize(title))
    p.end()
    if as_qimage:
        return img
    data = pixmap_to_data(img)
    if output_path is None:
        return data
    with open(output_path, 'wb') as f:
        f.write(data)
 def test(scale=0.25):
    from PyQt5.Qt import QLabel, QPixmap, QMainWindow, QWidget, QScrollArea, QGridLayout
    from calibre.gui2 import Application
    app = Application([])
    mi = Metadata('Unknown', ['Kovid Goyal', 'John & Doe', 'Author'])
    mi.series = 'A series & styles'
    m = QMainWindow()
    sa = QScrollArea(m)
    w = QWidget(m)
    sa.setWidget(w)
    l = QGridLayout(w)
    w.setLayout(l), l.setSpacing(30)
    scale *= w.devicePixelRatioF()
    labels = []
    for r, color in enumerate(sorted(default_color_themes)):
        for c, style in enumerate(sorted(all_styles())):
            mi.series_index = c + 1
            mi.title = 'An algorithmic cover [%s]' % color
            prefs = override_prefs(cprefs, override_color_theme=color, override_style=style)
            scale_cover(prefs, scale)
            img = generate_cover(mi, prefs=prefs, as_qimage=True)
            img.setDevicePixelRatio(w.devicePixelRatioF())
            la = QLabel()
            la.setPixmap(QPixmap.fromImage(img))
            l.addWidget(la, r, c)
            labels.append(la)
    m.setCentralWidget(sa)
    w.resize(w.sizeHint())
    m.show()
    app.exec_()
 if __name__ == '__main__':
    test()
@@ -0,0 +1,49 @@
 from __future__ import absolute_import, division, print_function, unicode_literals
 __license__   = 'GPL v3'
 __copyright__ = '2008, Kovid Goyal kovid@kovidgoyal.net'
 __docformat__ = 'restructuredtext en'
 '''
 Conversion to EPUB.
 '''
 from calibre.utils.zipfile import ZipFile, ZIP_STORED
 def rules(stylesheets):
    for s in stylesheets:
        if hasattr(s, 'cssText'):
            for r in s:
                if r.type == r.STYLE_RULE:
                    yield r
 def simple_container_xml(opf_path, extra_entries=''):
    return '''\
 <?xml version="1.0"?>
 <container version="1.0" xmlns="urn:oasis:names:tc:opendocument:xmlns:container">
   <rootfiles>
      <rootfile full-path="{0}" media-type="application/oebps-package+xml"/>
      {extra_entries}
   </rootfiles>
 </container>
    '''.format(opf_path, extra_entries=extra_entries)
 def initialize_container(path_to_container, opf_name='metadata.opf',
        extra_entries=[]):
    '''
    Create an empty EPUB document, with a default skeleton.
    '''
    rootfiles = ''
    for path, mimetype, _ in extra_entries:
        rootfiles += '<rootfile full-path="{0}" media-type="{1}"/>'.format(
                path, mimetype)
    CONTAINER = simple_container_xml(opf_name, rootfiles).encode('utf-8')
    zf = ZipFile(path_to_container, 'w')
    zf.writestr('mimetype', b'application/epub+zip', compression=ZIP_STORED)
    zf.writestr('META-INF/', b'', 0o755)
    zf.writestr('META-INF/container.xml', CONTAINER)
    for path, _, data in extra_entries:
        zf.writestr(path, data)
    return zf
@@ -0,0 +1,389 @@
 #!/usr/bin/env python2
 # vim:fileencoding=utf-8
 from __future__ import absolute_import, division, print_function, unicode_literals
 __license__ = 'GPL v3'
 __copyright__ = '2014, Kovid Goyal <kovid at kovidgoyal.net>'
 from collections import defaultdict
 from functools import partial
 from css_parser.css import CSSRule, CSSStyleDeclaration
 from css_selectors import parse, SelectorSyntaxError
 from calibre import force_unicode
 from calibre.ebooks.oeb.base import OEB_STYLES, OEB_DOCS, XHTML, css_text
 from calibre.ebooks.oeb.normalize_css import normalize_filter_css, normalizers
 from calibre.ebooks.oeb.polish.pretty import pretty_script_or_style, pretty_xml_tree, serialize
 from calibre.utils.icu import numeric_sort_key
 from css_selectors import Select, SelectorError
 from polyglot.builtins import iteritems, itervalues, unicode_type, filter
 def filter_used_rules(rules, log, select):
    for rule in rules:
        used = False
        for selector in rule.selectorList:
            try:
                if select.has_matches(selector.selectorText):
                    used = True
                    break
            except SelectorError:
                # Cannot parse/execute this selector, be safe and assume it
                # matches something
                used = True
                break
        if not used:
            yield rule
 def get_imported_sheets(name, container, sheets, recursion_level=10, sheet=None):
    ans = set()
    sheet = sheet or sheets[name]
    for rule in sheet.cssRules.rulesOfType(CSSRule.IMPORT_RULE):
        if rule.href:
            iname = container.href_to_name(rule.href, name)
            if iname in sheets:
                ans.add(iname)
    if recursion_level > 0:
        for imported_sheet in tuple(ans):
            ans |= get_imported_sheets(imported_sheet, container, sheets, recursion_level=recursion_level-1)
    ans.discard(name)
    return ans
 def merge_declarations(first, second):
    for prop in second.getProperties():
        first.setProperty(prop)
 def merge_identical_selectors(sheet):
    ' Merge rules that have identical selectors '
    selector_map = defaultdict(list)
    for rule in sheet.cssRules.rulesOfType(CSSRule.STYLE_RULE):
        selector_map[rule.selectorText].append(rule)
    remove = []
    for rule_group in itervalues(selector_map):
        if len(rule_group) > 1:
            for i in range(1, len(rule_group)):
                merge_declarations(rule_group[0].style, rule_group[i].style)
                remove.append(rule_group[i])
    for rule in remove:
        sheet.cssRules.remove(rule)
    return len(remove)
 def remove_unused_css(container, report=None, remove_unused_classes=False, merge_rules=False):
    '''
    Remove all unused CSS rules from the book. An unused CSS rule is one that does not match any actual content.
    :param report: An optional callable that takes a single argument. It is called with information about the operations being performed.
    :param remove_unused_classes: If True, class attributes in the HTML that do not match any CSS rules are also removed.
    :param merge_rules: If True, rules with identical selectors are merged.
    '''
    report = report or (lambda x:x)
    def safe_parse(name):
        try:
            return container.parsed(name)
        except TypeError:
            pass
    sheets = {name:safe_parse(name) for name, mt in iteritems(container.mime_map) if mt in OEB_STYLES}
    sheets = {k:v for k, v in iteritems(sheets) if v is not None}
    num_merged = 0
    if merge_rules:
        for name, sheet in iteritems(sheets):
            num = merge_identical_selectors(sheet)
            if num:
                container.dirty(name)
                num_merged += num
    import_map = {name:get_imported_sheets(name, container, sheets) for name in sheets}
    if remove_unused_classes:
        class_map = {name:{icu_lower(x) for x in classes_in_rule_list(sheet.cssRules)} for name, sheet in iteritems(sheets)}
    style_rules = {name:tuple(sheet.cssRules.rulesOfType(CSSRule.STYLE_RULE)) for name, sheet in iteritems(sheets)}
    num_of_removed_rules = num_of_removed_classes = 0
    for name, mt in iteritems(container.mime_map):
        if mt not in OEB_DOCS:
            continue
        root = container.parsed(name)
        select = Select(root, ignore_inappropriate_pseudo_classes=True)
        used_classes = set()
        for style in root.xpath('//*[local-name()="style"]'):
            if style.get('type', 'text/css') == 'text/css' and style.text:
                sheet = container.parse_css(style.text)
                if merge_rules:
                    num = merge_identical_selectors(sheet)
                    if num:
                        num_merged += num
                        container.dirty(name)
                if remove_unused_classes:
                    used_classes |= {icu_lower(x) for x in classes_in_rule_list(sheet.cssRules)}
                imports = get_imported_sheets(name, container, sheets, sheet=sheet)
                for imported_sheet in imports:
                    style_rules[imported_sheet] = tuple(filter_used_rules(style_rules[imported_sheet], container.log, select))
                    if remove_unused_classes:
                        used_classes |= class_map[imported_sheet]
                rules = tuple(sheet.cssRules.rulesOfType(CSSRule.STYLE_RULE))
                unused_rules = tuple(filter_used_rules(rules, container.log, select))
                if unused_rules:
                    num_of_removed_rules += len(unused_rules)
                    [sheet.cssRules.remove(r) for r in unused_rules]
                    style.text = force_unicode(sheet.cssText, 'utf-8')
                    pretty_script_or_style(container, style)
                    container.dirty(name)
        for link in root.xpath('//*[local-name()="link" and @href]'):
            sname = container.href_to_name(link.get('href'), name)
            if sname not in sheets:
                continue
            style_rules[sname] = tuple(filter_used_rules(style_rules[sname], container.log, select))
            if remove_unused_classes:
                used_classes |= class_map[sname]
            for iname in import_map[sname]:
                style_rules[iname] = tuple(filter_used_rules(style_rules[iname], container.log, select))
                if remove_unused_classes:
                    used_classes |= class_map[iname]
        if remove_unused_classes:
            for elem in root.xpath('//*[@class]'):
                original_classes, classes = elem.get('class', '').split(), []
                for x in original_classes:
                    if icu_lower(x) in used_classes:
                        classes.append(x)
                if len(classes) != len(original_classes):
                    if classes:
                        elem.set('class', ' '.join(classes))
                    else:
                        del elem.attrib['class']
                    num_of_removed_classes += len(original_classes) - len(classes)
                    container.dirty(name)
    for name, sheet in iteritems(sheets):
        unused_rules = style_rules[name]
        if unused_rules:
            num_of_removed_rules += len(unused_rules)
            [sheet.cssRules.remove(r) for r in unused_rules]
            container.dirty(name)
    num_changes = num_of_removed_rules + num_merged + num_of_removed_classes
    if num_changes > 0:
        if num_of_removed_rules > 0:
            report(ngettext('Removed one unused CSS style rule', 'Removed {} unused CSS style rules',
                            num_of_removed_rules).format(num_of_removed_rules))
        if num_of_removed_classes > 0:
            report(ngettext('Removed one unused class from the HTML', 'Removed {} unused classes from the HTML',
                   num_of_removed_classes).format(num_of_removed_classes))
        if num_merged > 0:
            report(ngettext('Merged one CSS style rule', 'Merged {} CSS style rules',
                            num_merged).format(num_merged))
    if num_of_removed_rules == 0:
        report(_('No unused CSS style rules found'))
    if remove_unused_classes and num_of_removed_classes == 0:
        report(_('No unused class attributes found'))
    if merge_rules and num_merged == 0:
        report(_('No style rules that could be merged found'))
    return num_changes > 0
 def filter_declaration(style, properties=()):
    changed = False
    for prop in properties:
        if style.removeProperty(prop) != '':
            changed = True
    all_props = set(style.keys())
    for prop in style.getProperties():
        n = normalizers.get(prop.name, None)
        if n is not None:
            normalized = n(prop.name, prop.propertyValue)
            removed = properties.intersection(set(normalized))
            if removed:
                changed = True
                style.removeProperty(prop.name)
                for prop in set(normalized) - removed - all_props:
                    style.setProperty(prop, normalized[prop])
    return changed
 def filter_sheet(sheet, properties=()):
    from css_parser.css import CSSRule
    changed = False
    remove = []
    for rule in sheet.cssRules.rulesOfType(CSSRule.STYLE_RULE):
        if filter_declaration(rule.style, properties):
            changed = True
            if rule.style.length == 0:
                remove.append(rule)
    for rule in remove:
        sheet.cssRules.remove(rule)
    return changed
 def transform_inline_styles(container, name, transform_sheet, transform_style):
    root = container.parsed(name)
    changed = False
    for style in root.xpath('//*[local-name()="style"]'):
        if style.text and (style.get('type') or 'text/css').lower() == 'text/css':
            sheet = container.parse_css(style.text)
            if transform_sheet(sheet):
                changed = True
                style.text = force_unicode(sheet.cssText, 'utf-8')
                pretty_script_or_style(container, style)
    for elem in root.xpath('//*[@style]'):
        text = elem.get('style', None)
        if text:
            style = container.parse_css(text, is_declaration=True)
            if transform_style(style):
                changed = True
                if style.length == 0:
                    del elem.attrib['style']
                else:
                    elem.set('style', force_unicode(style.getCssText(separator=' '), 'utf-8'))
    return changed
 def transform_css(container, transform_sheet=None, transform_style=None, names=()):
    if not names:
        types = OEB_STYLES | OEB_DOCS
        names = []
        for name, mt in iteritems(container.mime_map):
            if mt in types:
                names.append(name)
    doc_changed = False
    for name in names:
        mt = container.mime_map[name]
        if mt in OEB_STYLES:
            sheet = container.parsed(name)
            if transform_sheet(sheet):
                container.dirty(name)
                doc_changed = True
        elif mt in OEB_DOCS:
            if transform_inline_styles(container, name, transform_sheet, transform_style):
                container.dirty(name)
                doc_changed = True
    return doc_changed
 def filter_css(container, properties, names=()):
    '''
    Remove the specified CSS properties from all CSS rules in the book.
    :param properties: Set of properties to remove. For example: :code:`{'font-family', 'color'}`.
    :param names: The files from which to remove the properties. Defaults to all HTML and CSS files in the book.
    '''
    properties = normalize_filter_css(properties)
    return transform_css(container, transform_sheet=partial(filter_sheet, properties=properties),
                         transform_style=partial(filter_declaration, properties=properties), names=names)
 def _classes_in_selector(selector, classes):
    for attr in ('selector', 'subselector', 'parsed_tree'):
        s = getattr(selector, attr, None)
        if s is not None:
            _classes_in_selector(s, classes)
    cn = getattr(selector, 'class_name', None)
    if cn is not None:
        classes.add(cn)
 def classes_in_selector(text):
    classes = set()
    try:
        for selector in parse(text):
            _classes_in_selector(selector, classes)
    except SelectorSyntaxError:
        pass
    return classes
 def classes_in_rule_list(css_rules):
    classes = set()
    for rule in css_rules:
        if rule.type == rule.STYLE_RULE:
            classes |= classes_in_selector(rule.selectorText)
        elif hasattr(rule, 'cssRules'):
            classes |= classes_in_rule_list(rule.cssRules)
    return classes
 def iter_declarations(sheet_or_rule):
    if hasattr(sheet_or_rule, 'cssRules'):
        for rule in sheet_or_rule.cssRules:
            for x in iter_declarations(rule):
                yield x
    elif hasattr(sheet_or_rule, 'style'):
        yield sheet_or_rule.style
    elif isinstance(sheet_or_rule, CSSStyleDeclaration):
        yield sheet_or_rule
 def remove_property_value(prop, predicate):
    ''' Remove the Values that match the predicate from this property. If all
    values of the property would be removed, the property is removed from its
    parent instead. Note that this means the property must have a parent (a
    CSSStyleDeclaration). '''
    removed_vals = list(filter(predicate, prop.propertyValue))
    if len(removed_vals) == len(prop.propertyValue):
        prop.parent.removeProperty(prop.name)
    else:
        x = css_text(prop.propertyValue)
        for v in removed_vals:
            x = x.replace(css_text(v), '').strip()
        prop.propertyValue.cssText = x
    return bool(removed_vals)
 RULE_PRIORITIES = {t:i for i, t in enumerate((CSSRule.COMMENT, CSSRule.CHARSET_RULE, CSSRule.IMPORT_RULE, CSSRule.NAMESPACE_RULE))}
 def sort_sheet(container, sheet_or_text):
    ''' Sort the rules in a stylesheet. Note that in the general case this can
    change the effective styles, but for most common sheets, it should be safe.
    '''
    sheet = container.parse_css(sheet_or_text) if isinstance(sheet_or_text, unicode_type) else sheet_or_text
    def text_sort_key(x):
        return numeric_sort_key(unicode_type(x or ''))
    def selector_sort_key(x):
        return (x.specificity, text_sort_key(x.selectorText))
    def rule_sort_key(rule):
        primary = RULE_PRIORITIES.get(rule.type, len(RULE_PRIORITIES))
        secondary = text_sort_key(getattr(rule, 'atkeyword', '') or '')
        tertiary = None
        if rule.type == CSSRule.STYLE_RULE:
            primary += 1
            selectors = sorted(rule.selectorList, key=selector_sort_key)
            tertiary = selector_sort_key(selectors[0])
            rule.selectorText = ', '.join(s.selectorText for s in selectors)
        elif rule.type == CSSRule.FONT_FACE_RULE:
            try:
                tertiary = text_sort_key(rule.style.getPropertyValue('font-family'))
            except Exception:
                pass
        return primary, secondary, tertiary
    sheet.cssRules.sort(key=rule_sort_key)
    return sheet
 def add_stylesheet_links(container, name, text):
    root = container.parse_xhtml(text, name)
    head = root.xpath('//*[local-name() = "head"]')
    if not head:
        return
    head = head[0]
    sheets = tuple(container.manifest_items_of_type(lambda mt: mt in OEB_STYLES))
    if not sheets:
        return
    for sname in sheets:
        link = head.makeelement(XHTML('link'), type='text/css', rel='stylesheet', href=container.name_to_href(sname, name))
        head.append(link)
    pretty_xml_tree(head)
    return serialize(root, 'text/html')
@@ -0,0 +1,404 @@
 #!/usr/bin/env python2
 # vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:fdm=marker:ai
 from __future__ import absolute_import, division, print_function, unicode_literals
 __license__   = 'GPL v3'
 __copyright__ = '2013, Kovid Goyal <kovid at kovidgoyal.net>'
 __docformat__ = 'restructuredtext en'
 import codecs, shutil, os, posixpath
 from polyglot.builtins import iteritems, itervalues, map
 from functools import partial
 from collections import Counter, defaultdict
 from calibre import sanitize_file_name
 from calibre.ebooks.chardet import strip_encoding_declarations
 from calibre.ebooks.oeb.base import css_text
 from calibre.ebooks.oeb.polish.css import iter_declarations, remove_property_value
 from calibre.ebooks.oeb.polish.utils import extract
 from polyglot.urllib import urlparse, urlunparse
 class LinkReplacer(object):
    def __init__(self, base, container, link_map, frag_map):
        self.base = base
        self.frag_map = frag_map
        self.link_map = link_map
        self.container = container
        self.replaced = False
    def __call__(self, url):
        if url and url.startswith('#'):
            repl = self.frag_map(self.base, url[1:])
            if not repl or repl == url[1:]:
                return url
            self.replaced = True
            return '#' + repl
        name = self.container.href_to_name(url, self.base)
        if not name:
            return url
        nname = self.link_map.get(name, None)
        if not nname:
            return url
        purl = urlparse(url)
        href = self.container.name_to_href(nname, self.base)
        if purl.fragment:
            nfrag = self.frag_map(name, purl.fragment)
            if nfrag:
                href += '#%s'%nfrag
        if href != url:
            self.replaced = True
        return href
 class IdReplacer(object):
    def __init__(self, base, container, id_map):
        self.base, self.container, self.replaced = base, container, False
        self.id_map = id_map
    def __call__(self, url):
        if url and url.startswith('#'):
            repl = self.id_map.get(self.base, {}).get(url[1:])
            if repl is None or repl == url[1:]:
                return url
            self.replaced = True
            return '#' + repl
        name = self.container.href_to_name(url, self.base)
        if not name:
            return url
        id_map = self.id_map.get(name)
        if id_map is None:
            return url
        purl = urlparse(url)
        nfrag = id_map.get(purl.fragment)
        if nfrag is None:
            return url
        purl = purl._replace(fragment=nfrag)
        href = urlunparse(purl)
        if href != url:
            self.replaced = True
        return href
 class LinkRebaser(object):
    def __init__(self, container, old_name, new_name):
        self.old_name, self.new_name = old_name, new_name
        self.container = container
        self.replaced = False
    def __call__(self, url):
        if url and url.startswith('#'):
            return url
        purl = urlparse(url)
        frag = purl.fragment
        name = self.container.href_to_name(url, self.old_name)
        if not name:
            return url
        if name == self.old_name:
            name = self.new_name
        href = self.container.name_to_href(name, self.new_name)
        if frag:
            href += '#' + frag
        if href != url:
            self.replaced = True
        return href
 def replace_links(container, link_map, frag_map=lambda name, frag:frag, replace_in_opf=False):
    '''
    Replace links to files in the container. Will iterate over all files in the container and change the specified links in them.
    :param link_map: A mapping of old canonical name to new canonical name. For example: :code:`{'images/old.png': 'images/new.png'}`
    :param frag_map: A callable that takes two arguments ``(name, anchor)`` and
        returns a new anchor. This is useful if you need to change the anchors in
        HTML files. By default, it does nothing.
    :param replace_in_opf: If False, links are not replaced in the OPF file.
    '''
    for name, media_type in iteritems(container.mime_map):
        if name == container.opf_name and not replace_in_opf:
            continue
        repl = LinkReplacer(name, container, link_map, frag_map)
        container.replace_links(name, repl)
 def replace_ids(container, id_map):
    '''
    Replace all links in the container that pointed to the changed ids.
    :param id_map: A mapping of {name:id_map} where each id_map is a mapping of {old_id:new_id}
    :return: True iff at least one link was changed
    '''
    changed = False
    for name, media_type in iteritems(container.mime_map):
        repl = IdReplacer(name, container, id_map)
        container.replace_links(name, repl)
        if name == container.opf_name:
            imap = id_map.get(name, {})
            for item in container.opf_xpath('//*[@idref]'):
                old_id = item.get('idref')
                if old_id is not None:
                    new_id = imap.get(old_id)
                    if new_id is not None:
                        item.set('idref', new_id)
        if repl.replaced:
            changed = True
    return changed
 def smarten_punctuation(container, report):
    from calibre.ebooks.conversion.preprocess import smarten_punctuation
    smartened = False
    for path in container.spine_items:
        name = container.abspath_to_name(path)
        changed = False
        with container.open(name, 'r+b') as f:
            html = container.decode(f.read())
            newhtml = smarten_punctuation(html, container.log)
            if newhtml != html:
                changed = True
                report(_('Smartened punctuation in: %s')%name)
                newhtml = strip_encoding_declarations(newhtml)
                f.seek(0)
                f.truncate()
                f.write(codecs.BOM_UTF8 + newhtml.encode('utf-8'))
        if changed:
            # Add an encoding declaration (it will be added automatically when
            # serialized)
            root = container.parsed(name)
            for m in root.xpath('descendant::*[local-name()="meta" and @http-equiv]'):
                m.getparent().remove(m)
            container.dirty(name)
            smartened = True
    if not smartened:
        report(_('No punctuation that could be smartened found'))
    return smartened
 def rename_files(container, file_map):
    '''
    Rename files in the container, automatically updating all links to them.
    :param file_map: A mapping of old canonical name to new canonical name, for
        example: :code:`{'text/chapter1.html': 'chapter1.html'}`.
    '''
    overlap = set(file_map).intersection(set(itervalues(file_map)))
    if overlap:
        raise ValueError('Circular rename detected. The files %s are both rename targets and destinations' % ', '.join(overlap))
    for name, dest in iteritems(file_map):
        if container.exists(dest):
            if name != dest and name.lower() == dest.lower():
                # A case change on an OS with a case insensitive file-system.
                continue
            raise ValueError('Cannot rename {0} to {1} as {1} already exists'.format(name, dest))
    if len(tuple(itervalues(file_map))) != len(set(itervalues(file_map))):
        raise ValueError('Cannot rename, the set of destination files contains duplicates')
    link_map = {}
    for current_name, new_name in iteritems(file_map):
        container.rename(current_name, new_name)
        if new_name != container.opf_name:  # OPF is handled by the container
            link_map[current_name] = new_name
    replace_links(container, link_map, replace_in_opf=True)
 def replace_file(container, name, path, basename, force_mt=None):
    dirname, base = name.rpartition('/')[0::2]
    nname = sanitize_file_name(basename)
    if dirname:
        nname = dirname + '/' + nname
    with open(path, 'rb') as src:
        if name != nname:
            count = 0
            b, e = nname.rpartition('.')[0::2]
            while container.exists(nname):
                count += 1
                nname = b + ('_%d.%s' % (count, e))
            rename_files(container, {name:nname})
            mt = force_mt or container.guess_type(nname)
            container.mime_map[nname] = mt
            for itemid, q in iteritems(container.manifest_id_map):
                if q == nname:
                    for item in container.opf_xpath('//opf:manifest/opf:item[@href and @id="%s"]' % itemid):
                        item.set('media-type', mt)
        container.dirty(container.opf_name)
        with container.open(nname, 'wb') as dest:
            shutil.copyfileobj(src, dest)
 def mt_to_category(container, mt):
    from calibre.ebooks.oeb.polish.utils import guess_type
    from calibre.ebooks.oeb.polish.container import OEB_FONTS
    from calibre.ebooks.oeb.base import OEB_DOCS, OEB_STYLES
    if mt in OEB_DOCS:
        category = 'text'
    elif mt in OEB_STYLES:
        category = 'style'
    elif mt in OEB_FONTS:
        category = 'font'
    elif mt == guess_type('a.opf'):
        category = 'opf'
    elif mt == guess_type('a.ncx'):
        category = 'toc'
    else:
        category = mt.partition('/')[0]
    return category
 def get_recommended_folders(container, names):
    ''' Return the folders that are recommended for the given filenames. The
    recommendation is based on where the majority of files of the same type are
    located in the container. If no files of a particular type are present, the
    recommended folder is assumed to be the folder containing the OPF file. '''
    from calibre.ebooks.oeb.polish.utils import guess_type
    counts = defaultdict(Counter)
    for name, mt in iteritems(container.mime_map):
        folder = name.rpartition('/')[0] if '/' in name else ''
        counts[mt_to_category(container, mt)][folder] += 1
    try:
        opf_folder = counts['opf'].most_common(1)[0][0]
    except KeyError:
        opf_folder = ''
    recommendations = {category:counter.most_common(1)[0][0] for category, counter in iteritems(counts)}
    return {n:recommendations.get(mt_to_category(container, guess_type(os.path.basename(n))), opf_folder) for n in names}
 def normalize_case(container, val):
    def safe_listdir(x):
        try:
            return os.listdir(x)
        except EnvironmentError:
            return ()
    parts = val.split('/')
    ans = []
    for i in range(len(parts)):
        q = '/'.join(parts[:i+1])
        x = container.name_to_abspath(q)
        xl = parts[i].lower()
        candidates = [c for c in safe_listdir(os.path.dirname(x)) if c != parts[i] and c.lower() == xl]
        ans.append(candidates[0] if candidates else parts[i])
    return '/'.join(ans)
 def rationalize_folders(container, folder_type_map):
    all_names = set(container.mime_map)
    new_names = set()
    name_map = {}
    for key in tuple(folder_type_map):
        val = folder_type_map[key]
        folder_type_map[key] = normalize_case(container, val)
    for name in all_names:
        if name.startswith('META-INF/'):
            continue
        category = mt_to_category(container, container.mime_map[name])
        folder = folder_type_map.get(category, None)
        if folder is not None:
            bn = posixpath.basename(name)
            new_name = posixpath.join(folder, bn)
            if new_name != name:
                c = 0
                while new_name in all_names or new_name in new_names:
                    c += 1
                    n, ext = bn.rpartition('.')[0::2]
                    new_name = posixpath.join(folder, '%s_%d.%s' % (n, c, ext))
                name_map[name] = new_name
                new_names.add(new_name)
    return name_map
 def remove_links_in_sheet(href_to_name, sheet, predicate):
    import_rules_to_remove = []
    changed = False
    for i, r in enumerate(sheet):
        if r.type == r.IMPORT_RULE:
            name = href_to_name(r.href)
            if predicate(name, r.href, None):
                import_rules_to_remove.append(i)
    for i in sorted(import_rules_to_remove, reverse=True):
        sheet.deleteRule(i)
        changed = True
    for dec in iter_declarations(sheet):
        changed = remove_links_in_declaration(href_to_name, dec, predicate) or changed
    return changed
 def remove_links_in_declaration(href_to_name, style, predicate):
    def check_pval(v):
        if v.type == v.URI:
            name = href_to_name(v.uri)
            return predicate(name, v.uri, None)
        return False
    changed = False
    for p in tuple(style.getProperties(all=True)):
        changed = remove_property_value(p, check_pval) or changed
    return changed
 def remove_links_to(container, predicate):
    ''' predicate must be a function that takes the arguments (name, href,
    fragment=None) and returns True iff the link should be removed '''
    from calibre.ebooks.oeb.base import iterlinks, OEB_DOCS, OEB_STYLES, XPath, XHTML
    stylepath = XPath('//h:style')
    styleattrpath = XPath('//*[@style]')
    changed = set()
    for name, mt in iteritems(container.mime_map):
        removed = False
        if mt in OEB_DOCS:
            root = container.parsed(name)
            for el, attr, href, pos in iterlinks(root, find_links_in_css=False):
                hname = container.href_to_name(href, name)
                frag = href.partition('#')[-1]
                if predicate(hname, href, frag):
                    if attr is None:
                        el.text = None
                    else:
                        if el.tag == XHTML('link') or el.tag == XHTML('img'):
                            extract(el)
                        else:
                            del el.attrib[attr]
                    removed = True
            for tag in stylepath(root):
                if tag.text and (tag.get('type') or 'text/css').lower() == 'text/css':
                    sheet = container.parse_css(tag.text)
                    if remove_links_in_sheet(partial(container.href_to_name, base=name), sheet, predicate):
                        tag.text = css_text(sheet)
                        removed = True
            for tag in styleattrpath(root):
                style = tag.get('style')
                if style:
                    style = container.parse_css(style, is_declaration=True)
                    if remove_links_in_declaration(partial(container.href_to_name, base=name), style, predicate):
                        removed = True
                        tag.set('style', css_text(style))
        elif mt in OEB_STYLES:
            removed = remove_links_in_sheet(partial(container.href_to_name, base=name), container.parsed(name), predicate)
        if removed:
            changed.add(name)
    tuple(map(container.dirty, changed))
    return changed
 def get_spine_order_for_all_files(container):
    linear_names, non_linear_names = [], []
    for name, is_linear in container.spine_names:
        (linear_names if is_linear else non_linear_names).append(name)
    all_names = linear_names + non_linear_names
    spine_names = frozenset(all_names)
    ans = {}
    for spine_pos, name in enumerate(all_names):
        ans.setdefault(name, (spine_pos, -1))
        for i, href in enumerate(container.iterlinks(name, get_line_numbers=False)):
            lname = container.href_to_name(href, name)
            if lname not in spine_names:
                ans.setdefault(lname, (spine_pos, i))
    return ans
@@ -0,0 +1,517 @@
 #!/usr/bin/env python2
 # vim:fileencoding=utf-8
 from __future__ import absolute_import, division, print_function, unicode_literals
 __license__ = 'GPL v3'
 __copyright__ = '2013, Kovid Goyal <kovid at kovidgoyal.net>'
 import copy, os, re
 from polyglot.builtins import map, string_or_bytes, range
 from calibre.ebooks.oeb.base import barename, XPNSMAP, XPath, OPF, XHTML, OEB_DOCS
 from calibre.ebooks.oeb.polish.errors import MalformedMarkup
 from calibre.ebooks.oeb.polish.toc import node_from_loc
 from calibre.ebooks.oeb.polish.replace import LinkRebaser
 from polyglot.builtins import iteritems, unicode_type
 from polyglot.urllib import urlparse
 class AbortError(ValueError):
    pass
 def in_table(node):
    while node is not None:
        if node.tag.endswith('}table'):
            return True
        node = node.getparent()
    return False
 def adjust_split_point(split_point, log):
    '''
    Move the split point up its ancestor chain if it has no content
    before it. This handles the common case:
    <div id="chapter1"><h2>Chapter 1</h2>...</div> with a page break on the
    h2.
    '''
    sp = split_point
    while True:
        parent = sp.getparent()
        if (
            parent is None or
            barename(parent.tag) in {'body', 'html'} or
            (parent.text and parent.text.strip()) or
            parent.index(sp) > 0
        ):
            break
        sp = parent
    if sp is not split_point:
        log.debug('Adjusted split point to ancestor')
    return sp
 def get_body(root):
    return root.find('h:body', namespaces=XPNSMAP)
 def do_split(split_point, log, before=True):
    '''
    Split tree into a *before* and an *after* tree at ``split_point``.
    :param split_point: The Element at which to split
    :param before: If True tree is split before split_point, otherwise after split_point
    :return: before_tree, after_tree
    '''
    if before:
        # We cannot adjust for after since moving an after split point to a
        # parent will cause breakage if the parent contains any content
        # after the original split point
        split_point = adjust_split_point(split_point, log)
    tree         = split_point.getroottree()
    path         = tree.getpath(split_point)
    tree, tree2  = copy.deepcopy(tree), copy.deepcopy(tree)
    root, root2  = tree.getroot(), tree2.getroot()
    body, body2  = map(get_body, (root, root2))
    split_point  = root.xpath(path)[0]
    split_point2 = root2.xpath(path)[0]
    def nix_element(elem, top=True):
        # Remove elem unless top is False in which case replace elem by its
        # children
        parent = elem.getparent()
        if top:
            parent.remove(elem)
        else:
            index = parent.index(elem)
            parent[index:index+1] = list(elem.iterchildren())
    # Tree 1
    hit_split_point = False
    keep_descendants = False
    split_point_descendants = frozenset(split_point.iterdescendants())
    for elem in tuple(body.iterdescendants()):
        if elem is split_point:
            hit_split_point = True
            if before:
                nix_element(elem)
            else:
                # We want to keep the descendants of the split point in
                # Tree 1
                keep_descendants = True
                # We want the split point element, but not its tail
                elem.tail = '\n'
            continue
        if hit_split_point:
            if keep_descendants:
                if elem in split_point_descendants:
                    # elem is a descendant keep it
                    continue
                else:
                    # We are out of split_point, so prevent further set
                    # lookups of split_point_descendants
                    keep_descendants = False
            nix_element(elem)
    # Tree 2
    ancestors = frozenset(XPath('ancestor::*')(split_point2))
    for elem in tuple(body2.iterdescendants()):
        if elem is split_point2:
            if not before:
                # Keep the split point element's tail, if it contains non-whitespace
                # text
                tail = elem.tail
                if tail and not tail.isspace():
                    parent = elem.getparent()
                    idx = parent.index(elem)
                    if idx == 0:
                        parent.text = (parent.text or '') + tail
                    else:
                        sib = parent[idx-1]
                        sib.tail = (sib.tail or '') + tail
                # Remove the element itself
                nix_element(elem)
            break
        if elem in ancestors:
            # We have to preserve the ancestors as they could have CSS
            # styles that are inherited/applicable, like font or
            # width. So we only remove the text, if any.
            elem.text = '\n'
        else:
            nix_element(elem, top=False)
    body2.text = '\n'
    return tree, tree2
 class SplitLinkReplacer(object):
    def __init__(self, base, bottom_anchors, top_name, bottom_name, container):
        self.bottom_anchors, self.bottom_name = bottom_anchors, bottom_name
        self.container, self.top_name = container, top_name
        self.base = base
        self.replaced = False
    def __call__(self, url):
        if url and url.startswith('#'):
            return url
        name = self.container.href_to_name(url, self.base)
        if name != self.top_name:
            return url
        purl = urlparse(url)
        if purl.fragment and purl.fragment in self.bottom_anchors:
            url = self.container.name_to_href(self.bottom_name, self.base) + '#' + purl.fragment
            self.replaced = True
        return url
 def split(container, name, loc_or_xpath, before=True, totals=None):
    '''
    Split the file specified by name at the position specified by loc_or_xpath.
    Splitting automatically migrates all links and references to the affected
    files.
    :param loc_or_xpath: Should be an XPath expression such as
        //h:div[@id="split_here"]. Can also be a *loc* which is used internally to
        implement splitting in the preview panel.
    :param before: If True the split occurs before the identified element otherwise after it.
    :param totals: Used internally
    '''
    root = container.parsed(name)
    if isinstance(loc_or_xpath, unicode_type):
        split_point = root.xpath(loc_or_xpath)[0]
    else:
        try:
            split_point = node_from_loc(root, loc_or_xpath, totals=totals)
        except MalformedMarkup:
            # The webkit HTML parser and the container parser have yielded
            # different node counts, this can happen if the file is valid XML
            # but contains constructs like nested <p> tags. So force parse it
            # with the HTML 5 parser and try again.
            raw = container.raw_data(name)
            root = container.parse_xhtml(raw, fname=name, force_html5_parse=True)
            try:
                split_point = node_from_loc(root, loc_or_xpath, totals=totals)
            except MalformedMarkup:
                raise MalformedMarkup(_('The file %s has malformed markup. Try running the Fix HTML tool'
                                        ' before splitting') % name)
            container.replace(name, root)
    if in_table(split_point):
        raise AbortError('Cannot split inside tables')
    if split_point.tag.endswith('}body'):
        raise AbortError('Cannot split on the <body> tag')
    tree1, tree2 = do_split(split_point, container.log, before=before)
    root1, root2 = tree1.getroot(), tree2.getroot()
    anchors_in_top = frozenset(root1.xpath('//*/@id')) | frozenset(root1.xpath('//*/@name')) | {''}
    anchors_in_bottom = frozenset(root2.xpath('//*/@id')) | frozenset(root2.xpath('//*/@name'))
    base, ext = name.rpartition('.')[0::2]
    base = re.sub(r'_split\d+$', '', base)
    nname, s = None, 0
    while not nname or container.exists(nname):
        s += 1
        nname = '%s_split%d.%s' % (base, s, ext)
    manifest_item = container.generate_item(nname, media_type=container.mime_map[name])
    bottom_name = container.href_to_name(manifest_item.get('href'), container.opf_name)
    # Fix links in the split trees
    for r in (root1, root2):
        for a in r.xpath('//*[@href]'):
            url = a.get('href')
            if url.startswith('#'):
                fname = name
            else:
                fname = container.href_to_name(url, name)
            if fname == name:
                purl = urlparse(url)
                if purl.fragment in anchors_in_top:
                    if r is root2:
                        a.set('href', '%s#%s' % (container.name_to_href(name, bottom_name), purl.fragment))
                    else:
                        a.set('href', '#' + purl.fragment)
                elif purl.fragment in anchors_in_bottom:
                    if r is root1:
                        a.set('href', '%s#%s' % (container.name_to_href(bottom_name, name), purl.fragment))
                    else:
                        a.set('href', '#' + purl.fragment)
    # Fix all links in the container that point to anchors in the bottom tree
    for fname, media_type in iteritems(container.mime_map):
        if fname not in {name, bottom_name}:
            repl = SplitLinkReplacer(fname, anchors_in_bottom, name, bottom_name, container)
            container.replace_links(fname, repl)
    container.replace(name, root1)
    container.replace(bottom_name, root2)
    spine = container.opf_xpath('//opf:spine')[0]
    for spine_item, spine_name, linear in container.spine_iter:
        if spine_name == name:
            break
    index = spine.index(spine_item) + 1
    si = spine.makeelement(OPF('itemref'), idref=manifest_item.get('id'))
    if not linear:
        si.set('linear', 'no')
    container.insert_into_xml(spine, si, index=index)
    container.dirty(container.opf_name)
    return bottom_name
 def multisplit(container, name, xpath, before=True):
    '''
    Split the specified file at multiple locations (all tags that match the specified XPath expression). See also: :func:`split`.
    Splitting automatically migrates all links and references to the affected
    files.
    :param before: If True the splits occur before the identified element otherwise after it.
    '''
    root = container.parsed(name)
    nodes = root.xpath(xpath, namespaces=XPNSMAP)
    if not nodes:
        raise AbortError(_('The expression %s did not match any nodes') % xpath)
    for split_point in nodes:
        if in_table(split_point):
            raise AbortError('Cannot split inside tables')
        if split_point.tag.endswith('}body'):
            raise AbortError('Cannot split on the <body> tag')
    for i, tag in enumerate(nodes):
        tag.set('calibre-split-point', unicode_type(i))
    current = name
    all_names = [name]
    for i in range(len(nodes)):
        current = split(container, current, '//*[@calibre-split-point="%d"]' % i, before=before)
        all_names.append(current)
    for x in all_names:
        for tag in container.parsed(x).xpath('//*[@calibre-split-point]'):
            tag.attrib.pop('calibre-split-point')
        container.dirty(x)
    return all_names[1:]
 class MergeLinkReplacer(object):
    def __init__(self, base, anchor_map, master, container):
        self.container, self.anchor_map = container, anchor_map
        self.master = master
        self.base = base
        self.replaced = False
    def __call__(self, url):
        if url and url.startswith('#'):
            return url
        name = self.container.href_to_name(url, self.base)
        amap = self.anchor_map.get(name, None)
        if amap is None:
            return url
        purl = urlparse(url)
        frag = purl.fragment or ''
        frag = amap.get(frag, frag)
        url = self.container.name_to_href(self.master, self.base) + '#' + frag
        self.replaced = True
        return url
 def add_text(body, text):
    if len(body) > 0:
        body[-1].tail = (body[-1].tail or '') + text
    else:
        body.text = (body.text or '') + text
 def all_anchors(root):
    return set(root.xpath('//*/@id')) | set(root.xpath('//*/@name'))
 def all_stylesheets(container, name):
    for link in XPath('//h:head/h:link[@href]')(container.parsed(name)):
        name = container.href_to_name(link.get('href'), name)
        typ = link.get('type', 'text/css')
        if typ == 'text/css':
            yield name
 def unique_anchor(seen_anchors, current):
    c = 0
    ans = current
    while ans in seen_anchors:
        c += 1
        ans = '%s_%d' % (current, c)
    return ans
 def remove_name_attributes(root):
    # Remove all name attributes, replacing them with id attributes
    for elem in root.xpath('//*[@id and @name]'):
        del elem.attrib['name']
    for elem in root.xpath('//*[@name]'):
        elem.set('id', elem.attrib.pop('name'))
 def merge_html(container, names, master, insert_page_breaks=False):
    p = container.parsed
    root = p(master)
    # Ensure master has a <head>
    head = root.find('h:head', namespaces=XPNSMAP)
    if head is None:
        head = root.makeelement(XHTML('head'))
        container.insert_into_xml(root, head, 0)
    seen_anchors = all_anchors(root)
    seen_stylesheets = set(all_stylesheets(container, master))
    master_body = p(master).findall('h:body', namespaces=XPNSMAP)[-1]
    master_base = os.path.dirname(master)
    anchor_map = {n:{} for n in names if n != master}
    first_anchor_map = {}
    for name in names:
        if name == master:
            continue
        # Insert new stylesheets into master
        for sheet in all_stylesheets(container, name):
            if sheet not in seen_stylesheets:
                seen_stylesheets.add(sheet)
                link = head.makeelement(XHTML('link'), rel='stylesheet', type='text/css', href=container.name_to_href(sheet, master))
                container.insert_into_xml(head, link)
        # Rebase links if master is in a different directory
        if os.path.dirname(name) != master_base:
            container.replace_links(name, LinkRebaser(container, name, master))
        root = p(name)
        children = []
        for body in p(name).findall('h:body', namespaces=XPNSMAP):
            children.append(body.text if body.text and body.text.strip() else '\n\n')
            children.extend(body)
        first_child = ''
        for first_child in children:
            if not isinstance(first_child, string_or_bytes):
                break
        if isinstance(first_child, string_or_bytes):
            # body contained only text, no tags
            first_child = body.makeelement(XHTML('p'))
            first_child.text, children[0] = children[0], first_child
        amap = anchor_map[name]
        remove_name_attributes(root)
        for elem in root.xpath('//*[@id]'):
            val = elem.get('id')
            if not val:
                continue
            if val in seen_anchors:
                nval = unique_anchor(seen_anchors, val)
                elem.set('id', nval)
                amap[val] = nval
            else:
                seen_anchors.add(val)
        if 'id' not in first_child.attrib:
            first_child.set('id', unique_anchor(seen_anchors, 'top'))
            seen_anchors.add(first_child.get('id'))
        first_anchor_map[name] = first_child.get('id')
        if insert_page_breaks:
            first_child.set('style', first_child.get('style', '') + '; page-break-before: always')
        amap[''] = first_child.get('id')
        # Fix links that point to local changed anchors
        for a in XPath('//h:a[starts-with(@href, "#")]')(root):
            q = a.get('href')[1:]
            if q in amap:
                a.set('href', '#' + amap[q])
        for child in children:
            if isinstance(child, string_or_bytes):
                add_text(master_body, child)
            else:
                master_body.append(copy.deepcopy(child))
        container.remove_item(name, remove_from_guide=False)
    # Fix all links in the container that point to merged files
    for fname, media_type in iteritems(container.mime_map):
        repl = MergeLinkReplacer(fname, anchor_map, master, container)
        container.replace_links(fname, repl)
    return first_anchor_map
 def merge_css(container, names, master):
    p = container.parsed
    msheet = p(master)
    master_base = os.path.dirname(master)
    merged = set()
    for name in names:
        if name == master:
            continue
        # Rebase links if master is in a different directory
        if os.path.dirname(name) != master_base:
            container.replace_links(name, LinkRebaser(container, name, master))
        sheet = p(name)
        # Remove charset rules
        cr = [r for r in sheet.cssRules if r.type == r.CHARSET_RULE]
        [sheet.deleteRule(sheet.cssRules.index(r)) for r in cr]
        for rule in sheet.cssRules:
            msheet.add(rule)
        container.remove_item(name)
        merged.add(name)
    # Remove links to merged stylesheets in the html files, replacing with a
    # link to the master sheet
    for name, mt in iteritems(container.mime_map):
        if mt in OEB_DOCS:
            removed = False
            root = p(name)
            for link in XPath('//h:link[@href]')(root):
                q = container.href_to_name(link.get('href'), name)
                if q in merged:
                    container.remove_from_xml(link)
                    removed = True
            if removed:
                container.dirty(name)
            if removed and master not in set(all_stylesheets(container, name)):
                head = root.find('h:head', namespaces=XPNSMAP)
                if head is not None:
                    link = head.makeelement(XHTML('link'), type='text/css', rel='stylesheet', href=container.name_to_href(master, name))
                    container.insert_into_xml(head, link)
 def merge(container, category, names, master):
    '''
    Merge the specified files into a single file, automatically migrating all
    links and references to the affected files. The file must all either be HTML or CSS files.
    :param category: Must be either ``'text'`` for HTML files or ``'styles'`` for CSS files
    :param names: The list of files to be merged
    :param master: Which of the merged files is the *master* file, that is, the file that will remain after merging.
    '''
    if category not in {'text', 'styles'}:
        raise AbortError('Cannot merge files of type: %s' % category)
    if len(names) < 2:
        raise AbortError('Must specify at least two files to be merged')
    if master not in names:
        raise AbortError('The master file (%s) must be one of the files being merged' % master)
    if category == 'text':
        merge_html(container, names, master)
    elif category == 'styles':
        merge_css(container, names, master)
    container.dirty(master)
@@ -0,0 +1,172 @@
 #!/usr/bin/env python2
 # vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
 from __future__ import absolute_import, division, print_function, unicode_literals
 __license__   = 'GPL v3'
 __copyright__ = '2010, Kovid Goyal <kovid@kovidgoyal.net>'
 __docformat__ = 'restructuredtext en'
 import textwrap
 from calibre import guess_type
 from calibre.utils.imghdr import identify
 from calibre.utils.xml_parse import safe_xml_fromstring
 from polyglot.builtins import unicode_type
 from polyglot.urllib import unquote
 class CoverManager(object):
    SVG_TEMPLATE = textwrap.dedent('''\
        <html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en">
            <head>
                <meta http-equiv="Content-Type" content="text/html; charset=UTF-8" />
                <meta name="calibre:cover" content="true" />
                <title>Cover</title>
                <style type="text/css" title="override_css">
                    @page {padding: 0pt; margin:0pt}
                    body { text-align: center; padding:0pt; margin: 0pt; }
                </style>
            </head>
            <body>
                <div>
                    <svg version="1.1" xmlns="http://www.w3.org/2000/svg"
                        xmlns:xlink="http://www.w3.org/1999/xlink"
                        width="100%%" height="100%%" viewBox="__viewbox__"
                        preserveAspectRatio="__ar__">
                        <image width="__width__" height="__height__" xlink:href="%s"/>
                    </svg>
                </div>
            </body>
        </html>
        ''')
    NONSVG_TEMPLATE = textwrap.dedent('''\
        <html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en">
            <head>
                <meta http-equiv="Content-Type" content="text/html; charset=UTF-8" />
                <meta name="calibre:cover" content="true" />
                <title>Cover</title>
                <style type="text/css" title="override_css">
                    @page {padding: 0pt; margin:0pt}
                    body { text-align: center; padding:0pt; margin: 0pt }
                    div { padding:0pt; margin: 0pt }
                    img { padding:0pt; margin: 0pt }
                </style>
            </head>
            <body>
                <div>
                    <img src="%s" alt="cover" __style__ />
                </div>
            </body>
        </html>
    ''')
    def __init__(self, no_default_cover=False, no_svg_cover=False,
            preserve_aspect_ratio=False, fixed_size=None):
        self.no_default_cover = no_default_cover
        self.no_svg_cover = no_svg_cover
        self.preserve_aspect_ratio = preserve_aspect_ratio
        ar = 'xMidYMid meet' if preserve_aspect_ratio else 'none'
        self.svg_template = self.SVG_TEMPLATE.replace('__ar__', ar)
        if fixed_size is None:
            style = 'style="height: 100%%"'
        else:
            width, height = fixed_size
            style = 'style="height: %s; width: %s"'%(height, width)
        self.non_svg_template = self.NONSVG_TEMPLATE.replace('__style__',
                style)
    def __call__(self, oeb, opts, log):
        self.oeb = oeb
        self.log = log
        self.insert_cover()
    def default_cover(self):
        '''
        Create a generic cover for books that dont have a cover
        '''
        if self.no_default_cover:
            return None
        self.log('Generating default cover')
        m = self.oeb.metadata
        title = unicode_type(m.title[0])
        authors = [unicode_type(x) for x in m.creator if x.role == 'aut']
        try:
            from calibre.ebooks.covers import create_cover
            series = series_index = None
            if m.series:
                try:
                    series, series_index = unicode_type(m.series[0]), m.series_index[0]
                except IndexError:
                    pass
            img_data = create_cover(title, authors, series, series_index)
            id, href = self.oeb.manifest.generate('cover',
                    'cover_image.jpg')
            item = self.oeb.manifest.add(id, href, guess_type('t.jpg')[0],
                        data=img_data)
            m.clear('cover')
            m.add('cover', item.id)
            return item.href
        except:
            self.log.exception('Failed to generate default cover')
        return None
    def inspect_cover(self, href):
        from calibre.ebooks.oeb.base import urlnormalize
        for x in self.oeb.manifest:
            if x.href == urlnormalize(href):
                try:
                    raw = x.data
                    return identify(raw)[1:]
                except Exception:
                    self.log.exception('Failed to read cover image dimensions')
        return -1, -1
    def insert_cover(self):
        from calibre.ebooks.oeb.base import urldefrag
        g, m = self.oeb.guide, self.oeb.manifest
        item = None
        if 'titlepage' not in g:
            if 'cover' in g:
                href = g['cover'].href
            else:
                href = self.default_cover()
            if href is None:
                return
            width, height = self.inspect_cover(href)
            if width == -1 or height == -1:
                self.log.warning('Failed to read cover dimensions')
                width, height = 600, 800
            # if self.preserve_aspect_ratio:
            #    width, height = 600, 800
            self.svg_template = self.svg_template.replace('__viewbox__',
                    '0 0 %d %d'%(width, height))
            self.svg_template = self.svg_template.replace('__width__',
                    unicode_type(width))
            self.svg_template = self.svg_template.replace('__height__',
                    unicode_type(height))
            if href is not None:
                templ = self.non_svg_template if self.no_svg_cover \
                        else self.svg_template
                tp = templ%unquote(href)
                id, href = m.generate('titlepage', 'titlepage.xhtml')
                item = m.add(id, href, guess_type('t.xhtml')[0],
                        data=safe_xml_fromstring(tp))
        else:
            item = self.oeb.manifest.hrefs[
                    urldefrag(self.oeb.guide['titlepage'].href)[0]]
        if item is not None:
            self.oeb.spine.insert(0, item, True)
            if 'cover' not in self.oeb.guide.refs:
                self.oeb.guide.add('cover', 'Title Page', 'a')
            self.oeb.guide.refs['cover'].href = item.href
            if 'titlepage' in self.oeb.guide.refs:
                self.oeb.guide.refs['titlepage'].href = item.href
            titem = getattr(self.oeb.toc, 'item_that_refers_to_cover', None)
            if titem is not None:
                titem.href = item.href
@@ -0,0 +1,187 @@
 #!/usr/bin/env python2
 # vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
 from __future__ import absolute_import, division, print_function, unicode_literals
 __license__   = 'GPL v3'
 __copyright__ = '2010, Kovid Goyal <kovid@kovidgoyal.net>'
 __docformat__ = 'restructuredtext en'
 import posixpath
 from lxml import etree
 from calibre.ebooks.oeb.base import rewrite_links, urlnormalize
 from polyglot.urllib import urldefrag, urlparse
 class RenameFiles(object):  # {{{
    '''
    Rename files and adjust all links pointing to them. Note that the spine
    and manifest are not touched by this transform.
    '''
    def __init__(self, rename_map, renamed_items_map=None):
        self.rename_map = rename_map
        self.renamed_items_map = renamed_items_map
    def __call__(self, oeb, opts):
        import css_parser
        self.log = oeb.logger
        self.opts = opts
        self.oeb = oeb
        for item in oeb.manifest.items:
            self.current_item = item
            if etree.iselement(item.data):
                rewrite_links(self.current_item.data, self.url_replacer)
            elif hasattr(item.data, 'cssText'):
                css_parser.replaceUrls(item.data, self.url_replacer)
        if self.oeb.guide:
            for ref in self.oeb.guide.values():
                href = urlnormalize(ref.href)
                href, frag = urldefrag(href)
                replacement = self.rename_map.get(href, None)
                if replacement is not None:
                    nhref = replacement
                    if frag:
                        nhref += '#' + frag
                    ref.href = nhref
        if self.oeb.toc:
            self.fix_toc_entry(self.oeb.toc)
    def fix_toc_entry(self, toc):
        if toc.href:
            href = urlnormalize(toc.href)
            href, frag = urldefrag(href)
            replacement = self.rename_map.get(href, None)
            if replacement is not None:
                nhref = replacement
                if frag:
                    nhref = '#'.join((nhref, frag))
                toc.href = nhref
        for x in toc:
            self.fix_toc_entry(x)
    def url_replacer(self, orig_url):
        url = urlnormalize(orig_url)
        parts = urlparse(url)
        if parts.scheme:
            # Only rewrite local URLs
            return orig_url
        path, frag = urldefrag(url)
        if self.renamed_items_map:
            orig_item = self.renamed_items_map.get(self.current_item.href, self.current_item)
        else:
            orig_item = self.current_item
        href = orig_item.abshref(path)
        replacement = self.current_item.relhref(self.rename_map.get(href, href))
        if frag:
            replacement += '#' + frag
        return replacement
 # }}}
 class UniqueFilenames(object):  # {{{
    'Ensure that every item in the manifest has a unique filename'
    def __call__(self, oeb, opts):
        self.log = oeb.logger
        self.opts = opts
        self.oeb = oeb
        self.seen_filenames = set()
        self.rename_map = {}
        for item in list(oeb.manifest.items):
            fname = posixpath.basename(item.href)
            if fname in self.seen_filenames:
                suffix = self.unique_suffix(fname)
                data = item.data
                base, ext = posixpath.splitext(item.href)
                nhref = base + suffix + ext
                nhref = oeb.manifest.generate(href=nhref)[1]
                spine_pos = item.spine_position
                oeb.manifest.remove(item)
                nitem = oeb.manifest.add(item.id, nhref, item.media_type, data=data,
                        fallback=item.fallback)
                self.seen_filenames.add(posixpath.basename(nhref))
                self.rename_map[item.href] = nhref
                if spine_pos is not None:
                    oeb.spine.insert(spine_pos, nitem, item.linear)
            else:
                self.seen_filenames.add(fname)
        if self.rename_map:
            self.log('Found non-unique filenames, renaming to support broken'
                    ' EPUB readers like FBReader, Aldiko and Stanza...')
            from pprint import pformat
            self.log.debug(pformat(self.rename_map))
            renamer = RenameFiles(self.rename_map)
            renamer(oeb, opts)
    def unique_suffix(self, fname):
        base, ext = posixpath.splitext(fname)
        c = 0
        while True:
            c += 1
            suffix = '_u%d'%c
            candidate = base + suffix + ext
            if candidate not in self.seen_filenames:
                return suffix
 # }}}
 class FlatFilenames(object):  # {{{
    'Ensure that every item in the manifest has a unique filename without subdirectories.'
    def __call__(self, oeb, opts):
        self.log = oeb.logger
        self.opts = opts
        self.oeb = oeb
        self.rename_map = {}
        self.renamed_items_map = {}
        for item in list(oeb.manifest.items):
            # Flatten URL by removing directories.
            # Example: a/b/c/index.html -> a_b_c_index.html
            nhref = item.href.replace("/", "_")
            if item.href == nhref:
                # URL hasn't changed, skip item.
                continue
            data = item.data
            isp = item.spine_position
            nhref = oeb.manifest.generate(href=nhref)[1]
            if isp is not None:
                oeb.spine.remove(item)
            oeb.manifest.remove(item)
            nitem = oeb.manifest.add(item.id, nhref, item.media_type, data=data,
                                     fallback=item.fallback)
            self.rename_map[item.href] = nhref
            self.renamed_items_map[nhref] = item
            if isp is not None:
                oeb.spine.insert(isp, nitem, item.linear)
        if self.rename_map:
            self.log('Found non-flat filenames, renaming to support broken'
                    ' EPUB readers like FBReader...')
            from pprint import pformat
            self.log.debug(pformat(self.rename_map))
            self.log.debug(pformat(self.renamed_items_map))
            renamer = RenameFiles(self.rename_map, self.renamed_items_map)
            renamer(oeb, opts)
 # }}}
@@ -0,0 +1,81 @@
 #!/usr/bin/env python2
 # vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
 from __future__ import absolute_import, division, print_function, unicode_literals
 __license__   = 'GPL v3'
 __copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>'
 __docformat__ = 'restructuredtext en'
 from calibre import fit_image
 class RescaleImages(object):
    'Rescale all images to fit inside given screen size'
    def __init__(self, check_colorspaces=False):
        self.check_colorspaces = check_colorspaces
    def __call__(self, oeb, opts):
        self.oeb, self.opts, self.log = oeb, opts, oeb.log
        self.rescale()
    def rescale(self):
        from PIL import Image
        from io import BytesIO
        is_image_collection = getattr(self.opts, 'is_image_collection', False)
        if is_image_collection:
            page_width, page_height = self.opts.dest.comic_screen_size
        else:
            page_width, page_height = self.opts.dest.width, self.opts.dest.height
            page_width -= (self.opts.margin_left + self.opts.margin_right) * self.opts.dest.dpi/72
            page_height -= (self.opts.margin_top + self.opts.margin_bottom) * self.opts.dest.dpi/72
        for item in self.oeb.manifest:
            if item.media_type.startswith('image'):
                ext = item.media_type.split('/')[-1].upper()
                if ext == 'JPG':
                    ext = 'JPEG'
                if ext not in ('PNG', 'JPEG', 'GIF'):
                    ext = 'JPEG'
                raw = item.data
                if hasattr(raw, 'xpath') or not raw:
                    # Probably an svg image
                    continue
                try:
                    img = Image.open(BytesIO(raw))
                except Exception:
                    continue
                width, height = img.size
                try:
                    if self.check_colorspaces and img.mode == 'CMYK':
                        self.log.warn(
                            'The image %s is in the CMYK colorspace, converting it '
                            'to RGB as Adobe Digital Editions cannot display CMYK' % item.href)
                        img = img.convert('RGB')
                except Exception:
                    self.log.exception('Failed to convert image %s from CMYK to RGB' % item.href)
                scaled, new_width, new_height = fit_image(width, height, page_width, page_height)
                if scaled:
                    new_width = max(1, new_width)
                    new_height = max(1, new_height)
                    self.log('Rescaling image from %dx%d to %dx%d'%(
                        width, height, new_width, new_height), item.href)
                    try:
                        img = img.resize((new_width, new_height))
                    except Exception:
                        self.log.exception('Failed to rescale image: %s' % item.href)
                        continue
                    buf = BytesIO()
                    try:
                        img.save(buf, ext)
                    except Exception:
                        self.log.exception('Failed to rescale image: %s' % item.href)
                    else:
                        item.data = buf.getvalue()
                        item.unload_data_from_memory()
@@ -0,0 +1,488 @@
 from __future__ import absolute_import, division, print_function, unicode_literals
 __license__   = 'GPL v3'
 __copyright__ = '2008, Kovid Goyal kovid@kovidgoyal.net'
 __docformat__ = 'restructuredtext en'
 '''
 Splitting of the XHTML flows. Splitting can happen on page boundaries or can be
 forced at "likely" locations to conform to size limitations. This transform
 assumes a prior call to the flatcss transform.
 '''
 import os, functools, collections, re, copy
 from collections import OrderedDict
 from lxml.etree import XPath as _XPath
 from lxml import etree
 from calibre import as_unicode, force_unicode
 from calibre.ebooks.epub import rules
 from calibre.ebooks.oeb.base import (OEB_STYLES, XPNSMAP as NAMESPACES,
        urldefrag, rewrite_links, XHTML, urlnormalize)
 from calibre.ebooks.oeb.polish.split import do_split
 from polyglot.builtins import iteritems, range, map, unicode_type
 from polyglot.urllib import unquote
 from css_selectors import Select, SelectorError
 XPath = functools.partial(_XPath, namespaces=NAMESPACES)
 SPLIT_POINT_ATTR = 'csp'
 def tostring(root):
    return etree.tostring(root, encoding='utf-8')
 class SplitError(ValueError):
    def __init__(self, path, root):
        size = len(tostring(root))/1024.
        ValueError.__init__(self,
            _('Could not find reasonable point at which to split: '
                '%(path)s Sub-tree size: %(size)d KB')%dict(
                            path=path, size=size))
 class Split(object):
    def __init__(self, split_on_page_breaks=True, page_breaks_xpath=None,
            max_flow_size=0, remove_css_pagebreaks=True):
        self.split_on_page_breaks = split_on_page_breaks
        self.page_breaks_xpath = page_breaks_xpath
        self.max_flow_size = max_flow_size
        self.page_break_selectors = None
        self.remove_css_pagebreaks = remove_css_pagebreaks
        if self.page_breaks_xpath is not None:
            self.page_break_selectors = [(XPath(self.page_breaks_xpath), False)]
    def __call__(self, oeb, opts):
        self.oeb = oeb
        self.log = oeb.log
        self.log('Splitting markup on page breaks and flow limits, if any...')
        self.opts = opts
        self.map = {}
        for item in list(self.oeb.manifest.items):
            if item.spine_position is not None and etree.iselement(item.data):
                self.split_item(item)
        self.fix_links()
    def split_item(self, item):
        page_breaks, page_break_ids = [], []
        if self.split_on_page_breaks:
            page_breaks, page_break_ids = self.find_page_breaks(item)
        splitter = FlowSplitter(item, page_breaks, page_break_ids,
                self.max_flow_size, self.oeb, self.opts)
        if splitter.was_split:
            am = splitter.anchor_map
            self.map[item.href] = collections.defaultdict(
                    am.default_factory, am)
    def find_page_breaks(self, item):
        if self.page_break_selectors is None:
            self.page_break_selectors = set()
            stylesheets = [x.data for x in self.oeb.manifest if x.media_type in
                    OEB_STYLES]
            for rule in rules(stylesheets):
                before = force_unicode(getattr(rule.style.getPropertyCSSValue(
                    'page-break-before'), 'cssText', '').strip().lower())
                after  = force_unicode(getattr(rule.style.getPropertyCSSValue(
                    'page-break-after'), 'cssText', '').strip().lower())
                try:
                    if before and before not in {'avoid', 'auto', 'inherit'}:
                        self.page_break_selectors.add((rule.selectorText, True))
                        if self.remove_css_pagebreaks:
                            rule.style.removeProperty('page-break-before')
                except:
                    pass
                try:
                    if after and after not in {'avoid', 'auto', 'inherit'}:
                        self.page_break_selectors.add((rule.selectorText, False))
                        if self.remove_css_pagebreaks:
                            rule.style.removeProperty('page-break-after')
                except:
                    pass
        page_breaks = set()
        select = Select(item.data)
        if not self.page_break_selectors:
            return [], []
        body = item.data.xpath('//h:body', namespaces=NAMESPACES)
        if not body:
            return [], []
        descendants = frozenset(body[0].iterdescendants('*'))
        for selector, before in self.page_break_selectors:
            try:
                for elem in select(selector):
                    if elem in descendants and elem.tag.rpartition('}')[2].lower() not in {'html', 'body', 'head', 'style', 'script', 'meta', 'link'}:
                        elem.set('pb_before', '1' if before else '0')
                        page_breaks.add(elem)
            except SelectorError as err:
                self.log.warn('Ignoring page breaks specified with invalid CSS selector: %r (%s)' % (selector, as_unicode(err)))
        for i, elem in enumerate(item.data.iter('*')):
            try:
                elem.set('pb_order', unicode_type(i))
            except TypeError:  # Cant set attributes on comment nodes etc.
                continue
        page_breaks = list(page_breaks)
        page_breaks.sort(key=lambda x:int(x.get('pb_order')))
        page_break_ids, page_breaks_ = [], []
        for i, x in enumerate(page_breaks):
            x.set('id', x.get('id', 'calibre_pb_%d'%i))
            id = x.get('id')
            try:
                xp = XPath('//*[@id="%s"]'%id)
            except:
                try:
                    xp = XPath("//*[@id='%s']"%id)
                except:
                    # The id has both a quote and an apostrophe or some other
                    # Just replace it since I doubt its going to work anywhere else
                    # either
                    id = 'calibre_pb_%d'%i
                    x.set('id', id)
                    xp = XPath('//*[@id=%r]'%id)
            page_breaks_.append((xp, x.get('pb_before', '0') == '1'))
            page_break_ids.append(id)
        for elem in item.data.iter(etree.Element):
            elem.attrib.pop('pb_order', False)
            elem.attrib.pop('pb_before', False)
        return page_breaks_, page_break_ids
    def fix_links(self):
        '''
        Fix references to the split files in other content files.
        '''
        for item in self.oeb.manifest:
            if etree.iselement(item.data):
                self.current_item = item
                rewrite_links(item.data, self.rewrite_links)
    def rewrite_links(self, url):
        href, frag = urldefrag(url)
        try:
            href = self.current_item.abshref(href)
        except ValueError:
            # Unparseable URL
            return url
        try:
            href = urlnormalize(href)
        except ValueError:
            # href has non utf-8 quoting
            return url
        if href in self.map:
            anchor_map = self.map[href]
            nhref = anchor_map[frag if frag else None]
            nhref = self.current_item.relhref(nhref)
            if frag:
                nhref = '#'.join((unquote(nhref), frag))
            return nhref
        return url
 class FlowSplitter(object):
    'The actual splitting logic'
    def __init__(self, item, page_breaks, page_break_ids, max_flow_size, oeb,
            opts):
        self.item           = item
        self.oeb            = oeb
        self.opts           = opts
        self.log            = oeb.log
        self.page_breaks    = page_breaks
        self.page_break_ids = page_break_ids
        self.max_flow_size  = max_flow_size
        self.base           = item.href
        self.csp_counter    = 0
        base, ext = os.path.splitext(self.base)
        self.base = base.replace('%', '%%')+'_split_%.3d'+ext
        self.trees = [self.item.data.getroottree()]
        self.splitting_on_page_breaks = True
        if self.page_breaks:
            self.split_on_page_breaks(self.trees[0])
        self.splitting_on_page_breaks = False
        if self.max_flow_size > 0:
            lt_found = False
            self.log('\tLooking for large trees in %s...'%item.href)
            trees = list(self.trees)
            self.tree_map = {}
            for i, tree in enumerate(trees):
                size = len(tostring(tree.getroot()))
                if size > self.max_flow_size:
                    self.log('\tFound large tree #%d'%i)
                    lt_found = True
                    self.split_trees = []
                    self.split_to_size(tree)
                    self.tree_map[tree] = self.split_trees
            if not lt_found:
                self.log('\tNo large trees found')
            self.trees = []
            for x in trees:
                self.trees.extend(self.tree_map.get(x, [x]))
        self.was_split = len(self.trees) > 1
        if self.was_split:
            self.log('\tSplit into %d parts'%len(self.trees))
        self.commit()
    def split_on_page_breaks(self, orig_tree):
        ordered_ids = OrderedDict()
        all_page_break_ids = frozenset(self.page_break_ids)
        for elem_id in orig_tree.xpath('//*/@id'):
            if elem_id in all_page_break_ids:
                ordered_ids[elem_id] = self.page_breaks[
                    self.page_break_ids.index(elem_id)]
        self.trees = [orig_tree]
        while ordered_ids:
            pb_id, (pattern, before) = next(iteritems(ordered_ids))
            del ordered_ids[pb_id]
            for i in range(len(self.trees)-1, -1, -1):
                tree = self.trees[i]
                elem = pattern(tree)
                if elem:
                    self.log.debug('\t\tSplitting on page-break at id=%s'%
                                elem[0].get('id'))
                    before_tree, after_tree = self.do_split(tree, elem[0], before)
                    self.trees[i:i+1] = [before_tree, after_tree]
                    break
        trees, ids = [], set()
        for tree in self.trees:
            root = tree.getroot()
            if self.is_page_empty(root):
                discarded_ids = root.xpath('//*[@id]')
                for x in discarded_ids:
                    x = x.get('id')
                    if not x.startswith('calibre_'):
                        ids.add(x)
            else:
                if ids:
                    body = self.get_body(root)
                    if body is not None:
                        existing_ids = frozenset(body.xpath('//*/@id'))
                        for x in ids - existing_ids:
                            body.insert(0, body.makeelement(XHTML('div'), id=x, style='height:0pt'))
                ids = set()
                trees.append(tree)
        self.trees = trees
    def get_body(self, root):
        body = root.xpath('//h:body', namespaces=NAMESPACES)
        if not body:
            return None
        return body[0]
    def do_split(self, tree, split_point, before):
        '''
        Split ``tree`` into a *before* and *after* tree at ``split_point``.
        :param before: If True tree is split before split_point, otherwise after split_point
        :return: before_tree, after_tree
        '''
        return do_split(split_point, self.log, before=before)
    def is_page_empty(self, root):
        body = self.get_body(root)
        if body is None:
            return False
        txt = re.sub(r'\s+|\xa0', '',
                etree.tostring(body, method='text', encoding='unicode'))
        if len(txt) > 1:
            return False
        for img in root.xpath('//h:img', namespaces=NAMESPACES):
            if img.get('style', '') != 'display:none':
                return False
        if root.xpath('//*[local-name() = "svg"]'):
            return False
        return True
    def split_text(self, text, root, size):
        self.log.debug('\t\t\tSplitting text of length: %d'%len(text))
        rest = text.replace('\r', '')
        parts = re.split('\n\n', rest)
        self.log.debug('\t\t\t\tFound %d parts'%len(parts))
        if max(map(len, parts)) > size:
            raise SplitError('Cannot split as file contains a <pre> tag '
                'with a very large paragraph', root)
        ans = []
        buf = ''
        for part in parts:
            if len(buf) + len(part) < size:
                buf += '\n\n'+part
            else:
                ans.append(buf)
                buf = part
        return ans
    def split_to_size(self, tree):
        self.log.debug('\t\tSplitting...')
        root = tree.getroot()
        # Split large <pre> tags if they contain only text
        for pre in XPath('//h:pre')(root):
            if len(tuple(pre.iterchildren(etree.Element))) > 0:
                continue
            if pre.text and len(pre.text) > self.max_flow_size*0.5:
                self.log.debug('\t\tSplitting large <pre> tag')
                frags = self.split_text(pre.text, root, int(0.2*self.max_flow_size))
                new_pres = []
                for frag in frags:
                    pre2 = copy.copy(pre)
                    pre2.text = frag
                    pre2.tail = ''
                    new_pres.append(pre2)
                new_pres[-1].tail = pre.tail
                p = pre.getparent()
                i = p.index(pre)
                p[i:i+1] = new_pres
        split_point, before = self.find_split_point(root)
        if split_point is None:
            raise SplitError(self.item.href, root)
        self.log.debug('\t\t\tSplit point:', split_point.tag, tree.getpath(split_point))
        trees = self.do_split(tree, split_point, before)
        sizes = [len(tostring(t.getroot())) for t in trees]
        if min(sizes) < 5*1024:
            self.log.debug('\t\t\tSplit tree too small')
            self.split_to_size(tree)
            return
        for t, size in zip(trees, sizes):
            r = t.getroot()
            if self.is_page_empty(r):
                continue
            elif size <= self.max_flow_size:
                self.split_trees.append(t)
                self.log.debug(
                    '\t\t\tCommitted sub-tree #%d (%d KB)'%(
                               len(self.split_trees), size/1024.))
            else:
                self.log.debug(
                        '\t\t\tSplit tree still too large: %d KB' % (size/1024.))
                self.split_to_size(t)
    def find_split_point(self, root):
        '''
        Find the tag at which to split the tree rooted at `root`.
        Search order is:
            * Heading tags
            * <div> tags
            * <pre> tags
            * <hr> tags
            * <p> tags
            * <br> tags
            * <li> tags
        We try to split in the "middle" of the file (as defined by tag counts.
        '''
        def pick_elem(elems):
            if elems:
                elems = [i for i in elems if i.get(SPLIT_POINT_ATTR, '0') !=
                        '1']
                if elems:
                    i = int(len(elems)//2)
                    elems[i].set(SPLIT_POINT_ATTR, '1')
                    return elems[i]
        for path in (
                     '//*[re:match(name(), "h[1-6]", "i")]',
                     '/h:html/h:body/h:div',
                     '//h:pre',
                     '//h:hr',
                     '//h:p',
                     '//h:div',
                     '//h:br',
                     '//h:li',
                     ):
            elems = root.xpath(path, namespaces=NAMESPACES)
            elem = pick_elem(elems)
            if elem is not None:
                try:
                    XPath(elem.getroottree().getpath(elem))
                except:
                    continue
                return elem, True
        return None, True
    def commit(self):
        '''
        Commit all changes caused by the split. Calculates an *anchor_map* for
        all anchors in the original tree. Internal links are re-directed. The
        original file is deleted and the split files are saved.
        '''
        if not self.was_split:
            return
        self.anchor_map = collections.defaultdict(lambda :self.base%0)
        self.files = []
        for i, tree in enumerate(self.trees):
            root = tree.getroot()
            self.files.append(self.base%i)
            for elem in root.xpath('//*[@id or @name]'):
                for anchor in elem.get('id', ''), elem.get('name', ''):
                    if anchor != '' and anchor not in self.anchor_map:
                        self.anchor_map[anchor] = self.files[-1]
            for elem in root.xpath('//*[@%s]'%SPLIT_POINT_ATTR):
                elem.attrib.pop(SPLIT_POINT_ATTR, '0')
        spine_pos = self.item.spine_position
        for current, tree in zip(*map(reversed, (self.files, self.trees))):
            for a in tree.getroot().xpath('//h:a[@href]', namespaces=NAMESPACES):
                href = a.get('href').strip()
                if href.startswith('#'):
                    anchor = href[1:]
                    file = self.anchor_map[anchor]
                    file = self.item.relhref(file)
                    if file != current:
                        a.set('href', file+href)
            new_id = self.oeb.manifest.generate(id=self.item.id)[0]
            new_item = self.oeb.manifest.add(new_id, current,
                    self.item.media_type, data=tree.getroot())
            self.oeb.spine.insert(spine_pos, new_item, self.item.linear)
        if self.oeb.guide:
            for ref in self.oeb.guide.values():
                href, frag = urldefrag(ref.href)
                if href == self.item.href:
                    nhref = self.anchor_map[frag if frag else None]
                    if frag:
                        nhref = '#'.join((nhref, frag))
                    ref.href = nhref
        def fix_toc_entry(toc):
            if toc.href:
                href, frag = urldefrag(toc.href)
                if href == self.item.href:
                    nhref = self.anchor_map[frag if frag else None]
                    if frag:
                        nhref = '#'.join((nhref, frag))
                    toc.href = nhref
            for x in toc:
                fix_toc_entry(x)
        if self.oeb.toc:
            fix_toc_entry(self.oeb.toc)
        if self.oeb.pages:
            for page in self.oeb.pages:
                href, frag = urldefrag(page.href)
                if href == self.item.href:
                    nhref = self.anchor_map[frag if frag else None]
                    if frag:
                        nhref = '#'.join((nhref, frag))
                    page.href = nhref
        self.oeb.manifest.remove(self.item)