Revisited jacket functions.

2026-02-09 18:35:44 +01:00 · 2021-03-27 16:35:40 +01:00
parent 6e293ff9dc
commit 6d08cf7da2
4 changed files with 1 additions and 530 deletions
--- a/ebook_converter/ebooks/conversion/plumber.py
+++ b/ebook_converter/ebooks/conversion/plumber.py
@@ -1108,11 +1108,6 @@ OptionRecommendation(name='search_replace',
                self.log.error('Invalid font size key: %r ignoring'%fkey)
                fkey = self.opts.dest.fkey

-        from ebook_converter.ebooks.oeb.transforms.jacket import Jacket
-        Jacket()(self.oeb, self.opts, self.user_metadata)
-        pr(0.4)
-        self.flush()
-
        if self.opts.debug_pipeline is not None:
            out_dir = os.path.join(self.opts.debug_pipeline, 'structure')
            self.dump_oeb(self.oeb, out_dir)
--- a/ebook_converter/ebooks/metadata/init.py
+++ b/ebook_converter/ebooks/metadata/init.py
@@ -425,11 +425,3 @@ def check_doi(doi):
    if doi_check is not None:
        return doi_check.group()
    return None
-
-
-def rating_to_stars(value, allow_half_stars=False, star='★', half='½'):
-    r = max(0, min(int(value or 0), 10))
-    ans = star * (r // 2)
-    if allow_half_stars and r % 2:
-        ans += half
-    return ans
--- a/ebook_converter/ebooks/oeb/transforms/jacket.py
+++ b/ebook_converter/ebooks/oeb/transforms/jacket.py
@@ -1,34 +1,13 @@
-import mimetypes
-import os
-import pkg_resources
-import re
-import string
-import sys
 import urllib.parse
-from xml.sax import saxutils

-from ebook_converter import constants as const
-from ebook_converter.utils import date
 from ebook_converter.ebooks.oeb import base
 from ebook_converter.ebooks.oeb.base import XPath, xml2text, urlnormalize
-from ebook_converter.library.comments import comments_to_html, markdown
-from ebook_converter.ebooks.chardet import strip_encoding_declarations
-from ebook_converter.ebooks.metadata import fmt_sidx, rating_to_stars


 JACKET_XPATH = '//h:meta[@name="calibre-content" and @content="jacket"]'


-class SafeFormatter(string.Formatter):
-
-    def get_value(self, *args, **kwargs):
-        try:
-            return string.Formatter.get_value(self, *args, **kwargs)
-        except KeyError:
-            return ''
-
-
-class Base(object):
+class RemoveFirstImage:

    def remove_images(self, item, limit=1):
        path = XPath('//h:img[@src]')
@@ -48,9 +27,6 @@ class Base(object):
                removed += 1
        return removed

-
-class RemoveFirstImage(Base):
-
    def remove_first_image(self):
        deleted_item = None
        for item in self.oeb.spine:
@@ -88,321 +64,6 @@ class RemoveFirstImage(Base):
            self.remove_first_image()


-class Jacket(Base):
-    """
-    Book jacket manipulation. Remove first image and insert comments at start
-    of book.
-    """
-
-    def insert_metadata(self, mi):
-        self.log('Inserting metadata into book...')
-
-        try:
-            tags = list(map(str, self.oeb.metadata.subject))
-        except Exception:
-            tags = []
-
-        try:
-            comments = str(self.oeb.metadata.description[0])
-        except Exception:
-            comments = ''
-
-        try:
-            title = str(self.oeb.metadata.title[0])
-        except Exception:
-            title = 'Unknown'
-
-        try:
-            authors = list(map(str, self.oeb.metadata.creator))
-        except Exception:
-            authors = ['Unknown']
-
-        root = render_jacket(mi, self.opts.output_profile,
-                             alt_title=title, alt_tags=tags,
-                             alt_authors=authors,
-                             alt_comments=comments,
-                             rescale_fonts=True)
-        id, href = self.oeb.manifest.generate('calibre_jacket', 'jacket.xhtml')
-
-        jacket = self.oeb.manifest.add(id, href, mimetypes.guess_type(href)[0],
-                                       data=root)
-        self.oeb.spine.insert(0, jacket, True)
-        self.oeb.inserted_metadata_jacket = jacket
-        for img, path in referenced_images(root):
-            self.oeb.log('Embedding referenced image %s into jacket' % path)
-            ext = path.rpartition('.')[-1].lower()
-            item_id, href = self.oeb.manifest.generate('jacket_image',
-                                                       'jacket_img.' + ext)
-            with open(path, 'rb') as f:
-                item = self.oeb.manifest.add(
-                    item_id, href, mimetypes.guess_type(href)[0],
-                    data=f.read())
-            item.unload_data_from_memory()
-            img.set('src', jacket.relhref(item.href))
-
-    def remove_existing_jacket(self):
-        for x in self.oeb.spine[:4]:
-            if XPath(JACKET_XPATH)(x.data):
-                self.remove_images(x, limit=sys.maxsize)
-                self.oeb.manifest.remove(x)
-                self.log('Removed existing jacket')
-                break
-
-    def __call__(self, oeb, opts, metadata):
-        """
-        Add metadata in jacket.xhtml if specified in opts
-        If not specified, remove previous jacket instance
-        """
-        self.oeb, self.opts, self.log = oeb, opts, oeb.log
-        self.remove_existing_jacket()
-        if opts.insert_metadata:
-            self.insert_metadata(metadata)
-
-# Render Jacket {{{
-
-
-def get_rating(rating, rchar, e_rchar):
-    ans = ''
-    try:
-        num = float(rating) / 2
-    except Exception:
-        return ans
-    num = max(0, num)
-    num = min(num, 5)
-    if num < 1:
-        return ans
-
-    ans = ("%s%s") % (rchar * int(num), e_rchar * (5 - int(num)))
-    return ans
-
-
-class Series(str):
-
-    def __new__(self, series, series_index):
-        if series and series_index is not None:
-            _roman = saxutils.escape(fmt_sidx(series_index, use_roman=True))
-            _no_roman = saxutils.escape(fmt_sidx(series_index,
-                                                 use_roman=False))
-            roman = '{1} of <em>{0}</em>'.format(saxutils.escape(series),
-                                                 _roman)
-            combined = '{1} of <em>{0}</em>'.format(saxutils.escape(series),
-                                                    _no_roman)
-        else:
-            combined = roman = saxutils.escape(series or u'')
-        s = str.__new__(self, combined)
-        s.roman = roman
-        s.name = saxutils.escape(series or '')
-        s.number = saxutils.escape(fmt_sidx(series_index or 1.0,
-                                            use_roman=False))
-        s.roman_number = saxutils.escape(fmt_sidx(series_index or 1.0,
-                                                  use_roman=True))
-        return s
-
-
-class Tags(str):
-
-    def __new__(self, tags, output_profile):
-        tags = [saxutils.escape(x) for x in tags or ()]
-        t = str.__new__(self, ', '.join(tags))
-        t.alphabetical = ', '.join(sorted(tags))
-        t.tags_list = tags
-        return t
-
-
-def postprocess_jacket(root, output_profile, has_data):
-    # Post-process the generated html to strip out empty header items
-
-    def extract(tag):
-        parent = tag.getparent()
-        idx = parent.index(tag)
-        parent.remove(tag)
-        if tag.tail:
-            if idx == 0:
-                parent.text = (parent.text or '') + tag.tail
-            else:
-                if idx >= len(parent):
-                    idx = -1
-                parent[-1].tail = (parent[-1].tail or '') + tag.tail
-
-    def extract_class(cls):
-        for tag in root.xpath('//*[@class="_"]'.replace('_', cls)):
-            extract(tag)
-
-    for key in 'series rating tags'.split():
-        if not has_data[key]:
-            extract_class('cbj_' + key)
-    if not has_data['pubdate']:
-        extract_class('cbj_pubdata')
-    if output_profile.short_name != 'kindle':
-        extract_class('cbj_kindle_banner_hr')
-
-
-def render_jacket(mi, output_profile, alt_title='Unknown', alt_tags=[],
-                  alt_comments='', alt_publisher='', rescale_fonts=False,
-                  alt_authors=None):
-    with open(pkg_resources.resource_filename('ebook_converter',
-                                              'data/jacket/stylesheet.css'),
-              'rb') as fobj:
-        css = fobj.read().decode()
-    with open(pkg_resources.resource_filename('ebook_converter',
-                                              'data/jacket/template.xhtml'),
-              'rb') as fobj:
-        template = fobj.read().decode()
-
-    template = re.sub(r'<!--.*?-->', '', template, flags=re.DOTALL)
-    css = re.sub(r'/\*.*?\*/', '', css, flags=re.DOTALL)
-
-    try:
-        title_str = alt_title if mi.is_null('title') else mi.title
-    except Exception:
-        title_str = 'Unknown'
-    title_str = saxutils.escape(title_str)
-    title = '<span class="title">%s</span>' % title_str
-
-    series = Series(mi.series, mi.series_index)
-    try:
-        if not mi.is_null('publisher'):
-            publisher = mi.publisher
-        else:
-            publisher = alt_publisher
-    except Exception:
-        publisher = ''
-    publisher = saxutils.escape(publisher)
-
-    try:
-        if date.is_date_undefined(mi.pubdate):
-            pubdate = ''
-        else:
-            dt = date.as_local_time(mi.pubdate)
-            pubdate = date.strftime('%Y', dt.timetuple())
-    except Exception:
-        pubdate = ''
-
-    rating = get_rating(mi.rating, output_profile.ratings_char,
-                        output_profile.empty_ratings_char)
-
-    tags = Tags((mi.tags if mi.tags else alt_tags), output_profile)
-
-    comments = mi.comments if mi.comments else alt_comments
-    comments = comments.strip()
-    if comments:
-        comments = comments_to_html(comments)
-
-    orig = mi.authors
-    if mi.is_null('authors'):
-        mi.authors = list(alt_authors or ('Unknown',))
-    try:
-        author = mi.format_authors()
-    except Exception:
-        author = ''
-    mi.authors = orig
-    author = saxutils.escape(author)
-    has_data = {}
-
-    def generate_html(comments):
-        args = {'author': author,
-                'comments': comments,
-                'css': css,
-                'footer': '',
-                'pubdate': pubdate,
-                'pubdate_label': 'Published',
-                'publisher': publisher,
-                'rating': rating,
-                'rating_label': 'Rating',
-                'searchable_tags': ' '.join(saxutils.escape(t) + 'ttt'
-                                            for t in tags.tags_list),
-                'series': series,
-                'series_label': 'Series',
-                'tags': tags,
-                'tags_label': 'Tags',
-                'title': title,
-                'title_str': title_str,
-                'xmlns': const.XHTML_NS}
-
-        for key in mi.custom_field_keys():
-            m = mi.get_user_metadata(key, False) or {}
-            try:
-                display_name, val = mi.format_field_extended(key)[:2]
-                dkey = key.replace('#', '_')
-                dt = m.get('datatype')
-                if dt == 'series':
-                    args[dkey] = Series(mi.get(key), mi.get(key + '_index'))
-                elif dt == 'rating':
-                    args[dkey] = rating_to_stars(mi.get(key),
-                                                 m.get('display', {})
-                                                 .get('allow_half_stars',
-                                                      False))
-                elif dt == 'comments':
-                    val = val or ''
-                    display = m.get('display', {})
-                    ctype = display.get('interpret_as') or 'html'
-                    if ctype == 'long-text':
-                        val = ('<pre style="white-space:pre-wrap">%s</pre>' %
-                               saxutils.escape(val))
-                    elif ctype == 'short-text':
-                        val = '<span>%s</span>' % saxutils.escape(val)
-                    elif ctype == 'markdown':
-                        val = markdown(val)
-                    else:
-                        val = comments_to_html(val)
-                    args[dkey] = val
-                else:
-                    args[dkey] = saxutils.escape(val)
-                args[dkey+'_label'] = saxutils.escape(display_name)
-            except Exception:
-                # if the val (custom column contents) is None, don't add to
-                # args
-                pass
-
-        if False:
-            print("Custom column values available in jacket template:")
-            for key in args.keys():
-                if key.startswith('_') and not key.endswith('_label'):
-                    print(" %s: %s" % ('#' + key[1:], args[key]))
-
-        # Used in the comment describing use of custom columns in templates
-        # Don't change this unless you also change it in template.xhtml
-        args['_genre_label'] = args.get('_genre_label', '{_genre_label}')
-        args['_genre'] = args.get('_genre', '{_genre}')
-
-        formatter = SafeFormatter()
-        generated_html = formatter.format(template, **args)
-        has_data['series'] = bool(series)
-        has_data['tags'] = bool(tags)
-        has_data['rating'] = bool(rating)
-        has_data['pubdate'] = bool(pubdate)
-
-        return strip_encoding_declarations(generated_html)
-
-    from ebook_converter.ebooks.oeb.polish.parsing import parse
-    raw = generate_html(comments)
-    root = parse(raw, line_numbers=False, force_html5_parse=True)
-
-    if rescale_fonts:
-        # We ensure that the conversion pipeline will set the font sizes for
-        # text in the jacket to the same size as the font sizes for the rest of
-        # the text in the book. That means that as long as the jacket uses
-        # relative font sizes (em or %), the post conversion font size will be
-        # the same as for text in the main book. So text with size x em will
-        # be rescaled to the same value in both the jacket and the main
-        # content.
-        #
-        # We cannot use data-calibre-rescale 100 on the body tag as that will
-        # just give the body tag a font size of 1em, which is useless.
-        for body in root.xpath('//*[local-name()="body"]'):
-            fw = body.makeelement(base.tag('xhtml', 'div'))
-            fw.set('data-calibre-rescale', '100')
-            for child in body:
-                fw.append(child)
-            body.append(fw)
-    postprocess_jacket(root, output_profile, has_data)
-    from ebook_converter.ebooks.oeb.polish.pretty import pretty_html_tree
-    pretty_html_tree(None, root)
-    return root
-
-# }}}
-
-
 def linearize_jacket(oeb):
    for x in oeb.spine[:4]:
        if XPath(JACKET_XPATH)(x.data):
@@ -411,12 +72,3 @@ def linearize_jacket(oeb):
            for e in XPath('//h:td')(x.data):
                e.tag = base.tag('xhtml', 'span')
            break
-
-
-def referenced_images(root):
-    for img in XPath('//h:img[@src]')(root):
-        src = img.get('src')
-        if src.startswith('file://'):
-            path = src[7:]
-            if os.path.exists(path):
-                yield img, path
--- a/ebook_converter/library/comments.py
+++ b/ebook_converter/library/comments.py
@@ -1,168 +0,0 @@
-import re
-
-import bs4
-
-from ebook_converter.constants_old import preferred_encoding
-from ebook_converter.ebooks.BeautifulSoup import html5_parser
-from ebook_converter.utils.html2text import html2text
-from ebook_converter.utils import entities
-
-
-# Hackish - ignoring sentences ending or beginning in numbers to avoid
-# confusion with decimal points.
-lost_cr_pat = re.compile('([a-z])([\\.\\?!])([A-Z])')
-lost_cr_exception_pat = re.compile(r'(Ph\.D)|(D\.Phil)|((Dr|Mr|Mrs|Ms)\.[A-Z])')
-sanitize_pat = re.compile(r'<script|<table|<tr|<td|<th|<style|<iframe',
-        re.IGNORECASE)
-
-
-def comments_to_html(comments):
-    '''
-    Convert random comment text to normalized, xml-legal block of <p>s
-    'plain text' returns as
-    <p>plain text</p>
-
-    'plain text with <i>minimal</i> <b>markup</b>' returns as
-    <p>plain text with <i>minimal</i> <b>markup</b></p>
-
-    '<p>pre-formatted text</p> returns untouched
-
-    'A line of text\n\nFollowed by a line of text' returns as
-    <p>A line of text</p>
-    <p>Followed by a line of text</p>
-
-    'A line of text.\nA second line of text.\rA third line of text' returns as
-    <p>A line of text.<br />A second line of text.<br />A third line of text.</p>
-
-    '...end of a paragraph.Somehow the break was lost...' returns as
-    <p>...end of a paragraph.</p>
-    <p>Somehow the break was lost...</p>
-
-    Deprecated HTML returns as HTML via BeautifulSoup()
-
-    '''
-    if not comments:
-        return u'<p></p>'
-    if not isinstance(comments, str):
-        comments = comments.decode(preferred_encoding, 'replace')
-
-    if comments.lstrip().startswith('<'):
-        # Comment is already HTML do not mess with it
-        return comments
-
-    if '<' not in comments:
-        comments = entities.prepare_string_for_xml(comments)
-        parts = [u'<p class="description">%s</p>'%x.replace(u'\n', u'<br />')
-                for x in comments.split('\n\n')]
-        return '\n'.join(parts)
-
-    if sanitize_pat.search(comments) is not None:
-        try:
-            return sanitize_comments_html(comments)
-        except:
-            import traceback
-            traceback.print_exc()
-            return u'<p></p>'
-
-    # Explode lost CRs to \n\n
-    comments = lost_cr_exception_pat.sub(lambda m: m.group().replace('.',
-        '.\r'), comments)
-    for lost_cr in lost_cr_pat.finditer(comments):
-        comments = comments.replace(lost_cr.group(),
-                                    '%s%s\n\n%s' % (lost_cr.group(1),
-                                                    lost_cr.group(2),
-                                                    lost_cr.group(3)))
-
-    comments = comments.replace(u'\r', u'')
-    # Convert \n\n to <p>s
-    comments = comments.replace(u'\n\n', u'<p>')
-    # Convert solo returns to <br />
-    comments = comments.replace(u'\n', '<br />')
-    # Convert two hyphens to emdash
-    comments = comments.replace('--', '&mdash;')
-
-    soup = html5_parser('<div>' + comments + '</div>').find('div')
-    result = html5_parser('<div>')
-    container = result.find('div')
-    rtc = 0
-    open_pTag = False
-
-    all_tokens = list(soup.contents)
-    inline_tags = ('br', 'b', 'i', 'em', 'strong', 'span', 'font', 'a', 'hr')
-    for token in all_tokens:
-        if isinstance(token, (bs4.CData, bs4.Comment, bs4.Declaration,
-                              bs4.ProcessingInstruction)):
-            continue
-        if isinstance(token, bs4.NavigableString):
-            if not open_pTag:
-                pTag = result.new_tag('p')
-                open_pTag = True
-                ptc = 0
-            pTag.insert(ptc, token)
-            ptc += 1
-        elif token.name in inline_tags:
-            if not open_pTag:
-                pTag = result.new_tag('p')
-                open_pTag = True
-                ptc = 0
-            pTag.insert(ptc, token)
-            ptc += 1
-        else:
-            if open_pTag:
-                container.insert(rtc, pTag)
-                rtc += 1
-                open_pTag = False
-                ptc = 0
-            container.insert(rtc, token)
-            rtc += 1
-
-    if open_pTag:
-        container.insert(rtc, pTag)
-
-    for p in container.findAll('p'):
-        p['class'] = 'description'
-
-    return container.decode_contents()
-
-
-def markdown(val):
-    try:
-        md = markdown.Markdown
-    except AttributeError:
-        from ebook_converter.ebooks.markdown import Markdown
-        md = markdown.Markdown = Markdown()
-    return md.convert(val)
-
-
-def merge_comments(one, two):
-    return comments_to_html(one) + '\n\n' + comments_to_html(two)
-
-
-def sanitize_comments_html(html):
-    from ebook_converter.ebooks.markdown import Markdown
-    text = html2text(html)
-    md = Markdown()
-    html = md.convert(text)
-    return html
-
-
-def find_tests():
-    import unittest
-
-    class Test(unittest.TestCase):
-
-        def test_comments_to_html(self):
-            for pat, val in [
-                    (b'lineone\n\nlinetwo',
-                        '<p class="description">lineone</p>\n<p class="description">linetwo</p>'),
-
-                    ('a <b>b&c</b>\nf',
-                        '<p class="description">a <b>b&amp;c</b><br/>f</p>'),
-
-                    ('a <?xml asd> b\n\ncd',
-                        '<p class="description">a  b</p><p class="description">cd</p>'),
-            ]:
-                cval = comments_to_html(pat)
-                self.assertEqual(cval, val)
-
-    return unittest.defaultTestLoader.loadTestsFromTestCase(Test)