mirror of
https://github.com/gryf/ebook-converter.git
synced 2026-03-30 17:03:31 +02:00
Revisited jacket functions.
This commit is contained in:
@@ -1108,11 +1108,6 @@ OptionRecommendation(name='search_replace',
|
|||||||
self.log.error('Invalid font size key: %r ignoring'%fkey)
|
self.log.error('Invalid font size key: %r ignoring'%fkey)
|
||||||
fkey = self.opts.dest.fkey
|
fkey = self.opts.dest.fkey
|
||||||
|
|
||||||
from ebook_converter.ebooks.oeb.transforms.jacket import Jacket
|
|
||||||
Jacket()(self.oeb, self.opts, self.user_metadata)
|
|
||||||
pr(0.4)
|
|
||||||
self.flush()
|
|
||||||
|
|
||||||
if self.opts.debug_pipeline is not None:
|
if self.opts.debug_pipeline is not None:
|
||||||
out_dir = os.path.join(self.opts.debug_pipeline, 'structure')
|
out_dir = os.path.join(self.opts.debug_pipeline, 'structure')
|
||||||
self.dump_oeb(self.oeb, out_dir)
|
self.dump_oeb(self.oeb, out_dir)
|
||||||
|
|||||||
@@ -425,11 +425,3 @@ def check_doi(doi):
|
|||||||
if doi_check is not None:
|
if doi_check is not None:
|
||||||
return doi_check.group()
|
return doi_check.group()
|
||||||
return None
|
return None
|
||||||
|
|
||||||
|
|
||||||
def rating_to_stars(value, allow_half_stars=False, star='★', half='½'):
|
|
||||||
r = max(0, min(int(value or 0), 10))
|
|
||||||
ans = star * (r // 2)
|
|
||||||
if allow_half_stars and r % 2:
|
|
||||||
ans += half
|
|
||||||
return ans
|
|
||||||
|
|||||||
@@ -1,34 +1,13 @@
|
|||||||
import mimetypes
|
|
||||||
import os
|
|
||||||
import pkg_resources
|
|
||||||
import re
|
|
||||||
import string
|
|
||||||
import sys
|
|
||||||
import urllib.parse
|
import urllib.parse
|
||||||
from xml.sax import saxutils
|
|
||||||
|
|
||||||
from ebook_converter import constants as const
|
|
||||||
from ebook_converter.utils import date
|
|
||||||
from ebook_converter.ebooks.oeb import base
|
from ebook_converter.ebooks.oeb import base
|
||||||
from ebook_converter.ebooks.oeb.base import XPath, xml2text, urlnormalize
|
from ebook_converter.ebooks.oeb.base import XPath, xml2text, urlnormalize
|
||||||
from ebook_converter.library.comments import comments_to_html, markdown
|
|
||||||
from ebook_converter.ebooks.chardet import strip_encoding_declarations
|
|
||||||
from ebook_converter.ebooks.metadata import fmt_sidx, rating_to_stars
|
|
||||||
|
|
||||||
|
|
||||||
JACKET_XPATH = '//h:meta[@name="calibre-content" and @content="jacket"]'
|
JACKET_XPATH = '//h:meta[@name="calibre-content" and @content="jacket"]'
|
||||||
|
|
||||||
|
|
||||||
class SafeFormatter(string.Formatter):
|
class RemoveFirstImage:
|
||||||
|
|
||||||
def get_value(self, *args, **kwargs):
|
|
||||||
try:
|
|
||||||
return string.Formatter.get_value(self, *args, **kwargs)
|
|
||||||
except KeyError:
|
|
||||||
return ''
|
|
||||||
|
|
||||||
|
|
||||||
class Base(object):
|
|
||||||
|
|
||||||
def remove_images(self, item, limit=1):
|
def remove_images(self, item, limit=1):
|
||||||
path = XPath('//h:img[@src]')
|
path = XPath('//h:img[@src]')
|
||||||
@@ -48,9 +27,6 @@ class Base(object):
|
|||||||
removed += 1
|
removed += 1
|
||||||
return removed
|
return removed
|
||||||
|
|
||||||
|
|
||||||
class RemoveFirstImage(Base):
|
|
||||||
|
|
||||||
def remove_first_image(self):
|
def remove_first_image(self):
|
||||||
deleted_item = None
|
deleted_item = None
|
||||||
for item in self.oeb.spine:
|
for item in self.oeb.spine:
|
||||||
@@ -88,321 +64,6 @@ class RemoveFirstImage(Base):
|
|||||||
self.remove_first_image()
|
self.remove_first_image()
|
||||||
|
|
||||||
|
|
||||||
class Jacket(Base):
|
|
||||||
"""
|
|
||||||
Book jacket manipulation. Remove first image and insert comments at start
|
|
||||||
of book.
|
|
||||||
"""
|
|
||||||
|
|
||||||
def insert_metadata(self, mi):
|
|
||||||
self.log('Inserting metadata into book...')
|
|
||||||
|
|
||||||
try:
|
|
||||||
tags = list(map(str, self.oeb.metadata.subject))
|
|
||||||
except Exception:
|
|
||||||
tags = []
|
|
||||||
|
|
||||||
try:
|
|
||||||
comments = str(self.oeb.metadata.description[0])
|
|
||||||
except Exception:
|
|
||||||
comments = ''
|
|
||||||
|
|
||||||
try:
|
|
||||||
title = str(self.oeb.metadata.title[0])
|
|
||||||
except Exception:
|
|
||||||
title = 'Unknown'
|
|
||||||
|
|
||||||
try:
|
|
||||||
authors = list(map(str, self.oeb.metadata.creator))
|
|
||||||
except Exception:
|
|
||||||
authors = ['Unknown']
|
|
||||||
|
|
||||||
root = render_jacket(mi, self.opts.output_profile,
|
|
||||||
alt_title=title, alt_tags=tags,
|
|
||||||
alt_authors=authors,
|
|
||||||
alt_comments=comments,
|
|
||||||
rescale_fonts=True)
|
|
||||||
id, href = self.oeb.manifest.generate('calibre_jacket', 'jacket.xhtml')
|
|
||||||
|
|
||||||
jacket = self.oeb.manifest.add(id, href, mimetypes.guess_type(href)[0],
|
|
||||||
data=root)
|
|
||||||
self.oeb.spine.insert(0, jacket, True)
|
|
||||||
self.oeb.inserted_metadata_jacket = jacket
|
|
||||||
for img, path in referenced_images(root):
|
|
||||||
self.oeb.log('Embedding referenced image %s into jacket' % path)
|
|
||||||
ext = path.rpartition('.')[-1].lower()
|
|
||||||
item_id, href = self.oeb.manifest.generate('jacket_image',
|
|
||||||
'jacket_img.' + ext)
|
|
||||||
with open(path, 'rb') as f:
|
|
||||||
item = self.oeb.manifest.add(
|
|
||||||
item_id, href, mimetypes.guess_type(href)[0],
|
|
||||||
data=f.read())
|
|
||||||
item.unload_data_from_memory()
|
|
||||||
img.set('src', jacket.relhref(item.href))
|
|
||||||
|
|
||||||
def remove_existing_jacket(self):
|
|
||||||
for x in self.oeb.spine[:4]:
|
|
||||||
if XPath(JACKET_XPATH)(x.data):
|
|
||||||
self.remove_images(x, limit=sys.maxsize)
|
|
||||||
self.oeb.manifest.remove(x)
|
|
||||||
self.log('Removed existing jacket')
|
|
||||||
break
|
|
||||||
|
|
||||||
def __call__(self, oeb, opts, metadata):
|
|
||||||
"""
|
|
||||||
Add metadata in jacket.xhtml if specified in opts
|
|
||||||
If not specified, remove previous jacket instance
|
|
||||||
"""
|
|
||||||
self.oeb, self.opts, self.log = oeb, opts, oeb.log
|
|
||||||
self.remove_existing_jacket()
|
|
||||||
if opts.insert_metadata:
|
|
||||||
self.insert_metadata(metadata)
|
|
||||||
|
|
||||||
# Render Jacket {{{
|
|
||||||
|
|
||||||
|
|
||||||
def get_rating(rating, rchar, e_rchar):
|
|
||||||
ans = ''
|
|
||||||
try:
|
|
||||||
num = float(rating) / 2
|
|
||||||
except Exception:
|
|
||||||
return ans
|
|
||||||
num = max(0, num)
|
|
||||||
num = min(num, 5)
|
|
||||||
if num < 1:
|
|
||||||
return ans
|
|
||||||
|
|
||||||
ans = ("%s%s") % (rchar * int(num), e_rchar * (5 - int(num)))
|
|
||||||
return ans
|
|
||||||
|
|
||||||
|
|
||||||
class Series(str):
|
|
||||||
|
|
||||||
def __new__(self, series, series_index):
|
|
||||||
if series and series_index is not None:
|
|
||||||
_roman = saxutils.escape(fmt_sidx(series_index, use_roman=True))
|
|
||||||
_no_roman = saxutils.escape(fmt_sidx(series_index,
|
|
||||||
use_roman=False))
|
|
||||||
roman = '{1} of <em>{0}</em>'.format(saxutils.escape(series),
|
|
||||||
_roman)
|
|
||||||
combined = '{1} of <em>{0}</em>'.format(saxutils.escape(series),
|
|
||||||
_no_roman)
|
|
||||||
else:
|
|
||||||
combined = roman = saxutils.escape(series or u'')
|
|
||||||
s = str.__new__(self, combined)
|
|
||||||
s.roman = roman
|
|
||||||
s.name = saxutils.escape(series or '')
|
|
||||||
s.number = saxutils.escape(fmt_sidx(series_index or 1.0,
|
|
||||||
use_roman=False))
|
|
||||||
s.roman_number = saxutils.escape(fmt_sidx(series_index or 1.0,
|
|
||||||
use_roman=True))
|
|
||||||
return s
|
|
||||||
|
|
||||||
|
|
||||||
class Tags(str):
|
|
||||||
|
|
||||||
def __new__(self, tags, output_profile):
|
|
||||||
tags = [saxutils.escape(x) for x in tags or ()]
|
|
||||||
t = str.__new__(self, ', '.join(tags))
|
|
||||||
t.alphabetical = ', '.join(sorted(tags))
|
|
||||||
t.tags_list = tags
|
|
||||||
return t
|
|
||||||
|
|
||||||
|
|
||||||
def postprocess_jacket(root, output_profile, has_data):
|
|
||||||
# Post-process the generated html to strip out empty header items
|
|
||||||
|
|
||||||
def extract(tag):
|
|
||||||
parent = tag.getparent()
|
|
||||||
idx = parent.index(tag)
|
|
||||||
parent.remove(tag)
|
|
||||||
if tag.tail:
|
|
||||||
if idx == 0:
|
|
||||||
parent.text = (parent.text or '') + tag.tail
|
|
||||||
else:
|
|
||||||
if idx >= len(parent):
|
|
||||||
idx = -1
|
|
||||||
parent[-1].tail = (parent[-1].tail or '') + tag.tail
|
|
||||||
|
|
||||||
def extract_class(cls):
|
|
||||||
for tag in root.xpath('//*[@class="_"]'.replace('_', cls)):
|
|
||||||
extract(tag)
|
|
||||||
|
|
||||||
for key in 'series rating tags'.split():
|
|
||||||
if not has_data[key]:
|
|
||||||
extract_class('cbj_' + key)
|
|
||||||
if not has_data['pubdate']:
|
|
||||||
extract_class('cbj_pubdata')
|
|
||||||
if output_profile.short_name != 'kindle':
|
|
||||||
extract_class('cbj_kindle_banner_hr')
|
|
||||||
|
|
||||||
|
|
||||||
def render_jacket(mi, output_profile, alt_title='Unknown', alt_tags=[],
|
|
||||||
alt_comments='', alt_publisher='', rescale_fonts=False,
|
|
||||||
alt_authors=None):
|
|
||||||
with open(pkg_resources.resource_filename('ebook_converter',
|
|
||||||
'data/jacket/stylesheet.css'),
|
|
||||||
'rb') as fobj:
|
|
||||||
css = fobj.read().decode()
|
|
||||||
with open(pkg_resources.resource_filename('ebook_converter',
|
|
||||||
'data/jacket/template.xhtml'),
|
|
||||||
'rb') as fobj:
|
|
||||||
template = fobj.read().decode()
|
|
||||||
|
|
||||||
template = re.sub(r'<!--.*?-->', '', template, flags=re.DOTALL)
|
|
||||||
css = re.sub(r'/\*.*?\*/', '', css, flags=re.DOTALL)
|
|
||||||
|
|
||||||
try:
|
|
||||||
title_str = alt_title if mi.is_null('title') else mi.title
|
|
||||||
except Exception:
|
|
||||||
title_str = 'Unknown'
|
|
||||||
title_str = saxutils.escape(title_str)
|
|
||||||
title = '<span class="title">%s</span>' % title_str
|
|
||||||
|
|
||||||
series = Series(mi.series, mi.series_index)
|
|
||||||
try:
|
|
||||||
if not mi.is_null('publisher'):
|
|
||||||
publisher = mi.publisher
|
|
||||||
else:
|
|
||||||
publisher = alt_publisher
|
|
||||||
except Exception:
|
|
||||||
publisher = ''
|
|
||||||
publisher = saxutils.escape(publisher)
|
|
||||||
|
|
||||||
try:
|
|
||||||
if date.is_date_undefined(mi.pubdate):
|
|
||||||
pubdate = ''
|
|
||||||
else:
|
|
||||||
dt = date.as_local_time(mi.pubdate)
|
|
||||||
pubdate = date.strftime('%Y', dt.timetuple())
|
|
||||||
except Exception:
|
|
||||||
pubdate = ''
|
|
||||||
|
|
||||||
rating = get_rating(mi.rating, output_profile.ratings_char,
|
|
||||||
output_profile.empty_ratings_char)
|
|
||||||
|
|
||||||
tags = Tags((mi.tags if mi.tags else alt_tags), output_profile)
|
|
||||||
|
|
||||||
comments = mi.comments if mi.comments else alt_comments
|
|
||||||
comments = comments.strip()
|
|
||||||
if comments:
|
|
||||||
comments = comments_to_html(comments)
|
|
||||||
|
|
||||||
orig = mi.authors
|
|
||||||
if mi.is_null('authors'):
|
|
||||||
mi.authors = list(alt_authors or ('Unknown',))
|
|
||||||
try:
|
|
||||||
author = mi.format_authors()
|
|
||||||
except Exception:
|
|
||||||
author = ''
|
|
||||||
mi.authors = orig
|
|
||||||
author = saxutils.escape(author)
|
|
||||||
has_data = {}
|
|
||||||
|
|
||||||
def generate_html(comments):
|
|
||||||
args = {'author': author,
|
|
||||||
'comments': comments,
|
|
||||||
'css': css,
|
|
||||||
'footer': '',
|
|
||||||
'pubdate': pubdate,
|
|
||||||
'pubdate_label': 'Published',
|
|
||||||
'publisher': publisher,
|
|
||||||
'rating': rating,
|
|
||||||
'rating_label': 'Rating',
|
|
||||||
'searchable_tags': ' '.join(saxutils.escape(t) + 'ttt'
|
|
||||||
for t in tags.tags_list),
|
|
||||||
'series': series,
|
|
||||||
'series_label': 'Series',
|
|
||||||
'tags': tags,
|
|
||||||
'tags_label': 'Tags',
|
|
||||||
'title': title,
|
|
||||||
'title_str': title_str,
|
|
||||||
'xmlns': const.XHTML_NS}
|
|
||||||
|
|
||||||
for key in mi.custom_field_keys():
|
|
||||||
m = mi.get_user_metadata(key, False) or {}
|
|
||||||
try:
|
|
||||||
display_name, val = mi.format_field_extended(key)[:2]
|
|
||||||
dkey = key.replace('#', '_')
|
|
||||||
dt = m.get('datatype')
|
|
||||||
if dt == 'series':
|
|
||||||
args[dkey] = Series(mi.get(key), mi.get(key + '_index'))
|
|
||||||
elif dt == 'rating':
|
|
||||||
args[dkey] = rating_to_stars(mi.get(key),
|
|
||||||
m.get('display', {})
|
|
||||||
.get('allow_half_stars',
|
|
||||||
False))
|
|
||||||
elif dt == 'comments':
|
|
||||||
val = val or ''
|
|
||||||
display = m.get('display', {})
|
|
||||||
ctype = display.get('interpret_as') or 'html'
|
|
||||||
if ctype == 'long-text':
|
|
||||||
val = ('<pre style="white-space:pre-wrap">%s</pre>' %
|
|
||||||
saxutils.escape(val))
|
|
||||||
elif ctype == 'short-text':
|
|
||||||
val = '<span>%s</span>' % saxutils.escape(val)
|
|
||||||
elif ctype == 'markdown':
|
|
||||||
val = markdown(val)
|
|
||||||
else:
|
|
||||||
val = comments_to_html(val)
|
|
||||||
args[dkey] = val
|
|
||||||
else:
|
|
||||||
args[dkey] = saxutils.escape(val)
|
|
||||||
args[dkey+'_label'] = saxutils.escape(display_name)
|
|
||||||
except Exception:
|
|
||||||
# if the val (custom column contents) is None, don't add to
|
|
||||||
# args
|
|
||||||
pass
|
|
||||||
|
|
||||||
if False:
|
|
||||||
print("Custom column values available in jacket template:")
|
|
||||||
for key in args.keys():
|
|
||||||
if key.startswith('_') and not key.endswith('_label'):
|
|
||||||
print(" %s: %s" % ('#' + key[1:], args[key]))
|
|
||||||
|
|
||||||
# Used in the comment describing use of custom columns in templates
|
|
||||||
# Don't change this unless you also change it in template.xhtml
|
|
||||||
args['_genre_label'] = args.get('_genre_label', '{_genre_label}')
|
|
||||||
args['_genre'] = args.get('_genre', '{_genre}')
|
|
||||||
|
|
||||||
formatter = SafeFormatter()
|
|
||||||
generated_html = formatter.format(template, **args)
|
|
||||||
has_data['series'] = bool(series)
|
|
||||||
has_data['tags'] = bool(tags)
|
|
||||||
has_data['rating'] = bool(rating)
|
|
||||||
has_data['pubdate'] = bool(pubdate)
|
|
||||||
|
|
||||||
return strip_encoding_declarations(generated_html)
|
|
||||||
|
|
||||||
from ebook_converter.ebooks.oeb.polish.parsing import parse
|
|
||||||
raw = generate_html(comments)
|
|
||||||
root = parse(raw, line_numbers=False, force_html5_parse=True)
|
|
||||||
|
|
||||||
if rescale_fonts:
|
|
||||||
# We ensure that the conversion pipeline will set the font sizes for
|
|
||||||
# text in the jacket to the same size as the font sizes for the rest of
|
|
||||||
# the text in the book. That means that as long as the jacket uses
|
|
||||||
# relative font sizes (em or %), the post conversion font size will be
|
|
||||||
# the same as for text in the main book. So text with size x em will
|
|
||||||
# be rescaled to the same value in both the jacket and the main
|
|
||||||
# content.
|
|
||||||
#
|
|
||||||
# We cannot use data-calibre-rescale 100 on the body tag as that will
|
|
||||||
# just give the body tag a font size of 1em, which is useless.
|
|
||||||
for body in root.xpath('//*[local-name()="body"]'):
|
|
||||||
fw = body.makeelement(base.tag('xhtml', 'div'))
|
|
||||||
fw.set('data-calibre-rescale', '100')
|
|
||||||
for child in body:
|
|
||||||
fw.append(child)
|
|
||||||
body.append(fw)
|
|
||||||
postprocess_jacket(root, output_profile, has_data)
|
|
||||||
from ebook_converter.ebooks.oeb.polish.pretty import pretty_html_tree
|
|
||||||
pretty_html_tree(None, root)
|
|
||||||
return root
|
|
||||||
|
|
||||||
# }}}
|
|
||||||
|
|
||||||
|
|
||||||
def linearize_jacket(oeb):
|
def linearize_jacket(oeb):
|
||||||
for x in oeb.spine[:4]:
|
for x in oeb.spine[:4]:
|
||||||
if XPath(JACKET_XPATH)(x.data):
|
if XPath(JACKET_XPATH)(x.data):
|
||||||
@@ -411,12 +72,3 @@ def linearize_jacket(oeb):
|
|||||||
for e in XPath('//h:td')(x.data):
|
for e in XPath('//h:td')(x.data):
|
||||||
e.tag = base.tag('xhtml', 'span')
|
e.tag = base.tag('xhtml', 'span')
|
||||||
break
|
break
|
||||||
|
|
||||||
|
|
||||||
def referenced_images(root):
|
|
||||||
for img in XPath('//h:img[@src]')(root):
|
|
||||||
src = img.get('src')
|
|
||||||
if src.startswith('file://'):
|
|
||||||
path = src[7:]
|
|
||||||
if os.path.exists(path):
|
|
||||||
yield img, path
|
|
||||||
|
|||||||
@@ -1,168 +0,0 @@
|
|||||||
import re
|
|
||||||
|
|
||||||
import bs4
|
|
||||||
|
|
||||||
from ebook_converter.constants_old import preferred_encoding
|
|
||||||
from ebook_converter.ebooks.BeautifulSoup import html5_parser
|
|
||||||
from ebook_converter.utils.html2text import html2text
|
|
||||||
from ebook_converter.utils import entities
|
|
||||||
|
|
||||||
|
|
||||||
# Hackish - ignoring sentences ending or beginning in numbers to avoid
|
|
||||||
# confusion with decimal points.
|
|
||||||
lost_cr_pat = re.compile('([a-z])([\\.\\?!])([A-Z])')
|
|
||||||
lost_cr_exception_pat = re.compile(r'(Ph\.D)|(D\.Phil)|((Dr|Mr|Mrs|Ms)\.[A-Z])')
|
|
||||||
sanitize_pat = re.compile(r'<script|<table|<tr|<td|<th|<style|<iframe',
|
|
||||||
re.IGNORECASE)
|
|
||||||
|
|
||||||
|
|
||||||
def comments_to_html(comments):
|
|
||||||
'''
|
|
||||||
Convert random comment text to normalized, xml-legal block of <p>s
|
|
||||||
'plain text' returns as
|
|
||||||
<p>plain text</p>
|
|
||||||
|
|
||||||
'plain text with <i>minimal</i> <b>markup</b>' returns as
|
|
||||||
<p>plain text with <i>minimal</i> <b>markup</b></p>
|
|
||||||
|
|
||||||
'<p>pre-formatted text</p> returns untouched
|
|
||||||
|
|
||||||
'A line of text\n\nFollowed by a line of text' returns as
|
|
||||||
<p>A line of text</p>
|
|
||||||
<p>Followed by a line of text</p>
|
|
||||||
|
|
||||||
'A line of text.\nA second line of text.\rA third line of text' returns as
|
|
||||||
<p>A line of text.<br />A second line of text.<br />A third line of text.</p>
|
|
||||||
|
|
||||||
'...end of a paragraph.Somehow the break was lost...' returns as
|
|
||||||
<p>...end of a paragraph.</p>
|
|
||||||
<p>Somehow the break was lost...</p>
|
|
||||||
|
|
||||||
Deprecated HTML returns as HTML via BeautifulSoup()
|
|
||||||
|
|
||||||
'''
|
|
||||||
if not comments:
|
|
||||||
return u'<p></p>'
|
|
||||||
if not isinstance(comments, str):
|
|
||||||
comments = comments.decode(preferred_encoding, 'replace')
|
|
||||||
|
|
||||||
if comments.lstrip().startswith('<'):
|
|
||||||
# Comment is already HTML do not mess with it
|
|
||||||
return comments
|
|
||||||
|
|
||||||
if '<' not in comments:
|
|
||||||
comments = entities.prepare_string_for_xml(comments)
|
|
||||||
parts = [u'<p class="description">%s</p>'%x.replace(u'\n', u'<br />')
|
|
||||||
for x in comments.split('\n\n')]
|
|
||||||
return '\n'.join(parts)
|
|
||||||
|
|
||||||
if sanitize_pat.search(comments) is not None:
|
|
||||||
try:
|
|
||||||
return sanitize_comments_html(comments)
|
|
||||||
except:
|
|
||||||
import traceback
|
|
||||||
traceback.print_exc()
|
|
||||||
return u'<p></p>'
|
|
||||||
|
|
||||||
# Explode lost CRs to \n\n
|
|
||||||
comments = lost_cr_exception_pat.sub(lambda m: m.group().replace('.',
|
|
||||||
'.\r'), comments)
|
|
||||||
for lost_cr in lost_cr_pat.finditer(comments):
|
|
||||||
comments = comments.replace(lost_cr.group(),
|
|
||||||
'%s%s\n\n%s' % (lost_cr.group(1),
|
|
||||||
lost_cr.group(2),
|
|
||||||
lost_cr.group(3)))
|
|
||||||
|
|
||||||
comments = comments.replace(u'\r', u'')
|
|
||||||
# Convert \n\n to <p>s
|
|
||||||
comments = comments.replace(u'\n\n', u'<p>')
|
|
||||||
# Convert solo returns to <br />
|
|
||||||
comments = comments.replace(u'\n', '<br />')
|
|
||||||
# Convert two hyphens to emdash
|
|
||||||
comments = comments.replace('--', '—')
|
|
||||||
|
|
||||||
soup = html5_parser('<div>' + comments + '</div>').find('div')
|
|
||||||
result = html5_parser('<div>')
|
|
||||||
container = result.find('div')
|
|
||||||
rtc = 0
|
|
||||||
open_pTag = False
|
|
||||||
|
|
||||||
all_tokens = list(soup.contents)
|
|
||||||
inline_tags = ('br', 'b', 'i', 'em', 'strong', 'span', 'font', 'a', 'hr')
|
|
||||||
for token in all_tokens:
|
|
||||||
if isinstance(token, (bs4.CData, bs4.Comment, bs4.Declaration,
|
|
||||||
bs4.ProcessingInstruction)):
|
|
||||||
continue
|
|
||||||
if isinstance(token, bs4.NavigableString):
|
|
||||||
if not open_pTag:
|
|
||||||
pTag = result.new_tag('p')
|
|
||||||
open_pTag = True
|
|
||||||
ptc = 0
|
|
||||||
pTag.insert(ptc, token)
|
|
||||||
ptc += 1
|
|
||||||
elif token.name in inline_tags:
|
|
||||||
if not open_pTag:
|
|
||||||
pTag = result.new_tag('p')
|
|
||||||
open_pTag = True
|
|
||||||
ptc = 0
|
|
||||||
pTag.insert(ptc, token)
|
|
||||||
ptc += 1
|
|
||||||
else:
|
|
||||||
if open_pTag:
|
|
||||||
container.insert(rtc, pTag)
|
|
||||||
rtc += 1
|
|
||||||
open_pTag = False
|
|
||||||
ptc = 0
|
|
||||||
container.insert(rtc, token)
|
|
||||||
rtc += 1
|
|
||||||
|
|
||||||
if open_pTag:
|
|
||||||
container.insert(rtc, pTag)
|
|
||||||
|
|
||||||
for p in container.findAll('p'):
|
|
||||||
p['class'] = 'description'
|
|
||||||
|
|
||||||
return container.decode_contents()
|
|
||||||
|
|
||||||
|
|
||||||
def markdown(val):
|
|
||||||
try:
|
|
||||||
md = markdown.Markdown
|
|
||||||
except AttributeError:
|
|
||||||
from ebook_converter.ebooks.markdown import Markdown
|
|
||||||
md = markdown.Markdown = Markdown()
|
|
||||||
return md.convert(val)
|
|
||||||
|
|
||||||
|
|
||||||
def merge_comments(one, two):
|
|
||||||
return comments_to_html(one) + '\n\n' + comments_to_html(two)
|
|
||||||
|
|
||||||
|
|
||||||
def sanitize_comments_html(html):
|
|
||||||
from ebook_converter.ebooks.markdown import Markdown
|
|
||||||
text = html2text(html)
|
|
||||||
md = Markdown()
|
|
||||||
html = md.convert(text)
|
|
||||||
return html
|
|
||||||
|
|
||||||
|
|
||||||
def find_tests():
|
|
||||||
import unittest
|
|
||||||
|
|
||||||
class Test(unittest.TestCase):
|
|
||||||
|
|
||||||
def test_comments_to_html(self):
|
|
||||||
for pat, val in [
|
|
||||||
(b'lineone\n\nlinetwo',
|
|
||||||
'<p class="description">lineone</p>\n<p class="description">linetwo</p>'),
|
|
||||||
|
|
||||||
('a <b>b&c</b>\nf',
|
|
||||||
'<p class="description">a <b>b&c</b><br/>f</p>'),
|
|
||||||
|
|
||||||
('a <?xml asd> b\n\ncd',
|
|
||||||
'<p class="description">a b</p><p class="description">cd</p>'),
|
|
||||||
]:
|
|
||||||
cval = comments_to_html(pat)
|
|
||||||
self.assertEqual(cval, val)
|
|
||||||
|
|
||||||
return unittest.defaultTestLoader.loadTestsFromTestCase(Test)
|
|
||||||
Reference in New Issue
Block a user