mirror of
https://github.com/gryf/ebook-converter.git
synced 2026-02-24 03:05:48 +01:00
618 lines
26 KiB
Python
618 lines
26 KiB
Python
"""
|
|
Transform XHTML/OPS-ish content into Mobipocket HTML 3.2.
|
|
"""
|
|
import copy
|
|
import re
|
|
import numbers
|
|
from lxml import etree
|
|
|
|
from ebook_converter import constants as const
|
|
from ebook_converter.ebooks.oeb import base
|
|
from ebook_converter.ebooks.oeb import parse_utils
|
|
from ebook_converter.ebooks.oeb.stylizer import Stylizer
|
|
from ebook_converter.ebooks.oeb.transforms.flatcss import KeyMapper
|
|
from ebook_converter.ebooks.mobi.utils import convert_color_for_font_tag
|
|
from ebook_converter.utils.imghdr import identify
|
|
|
|
|
|
MBP_NS = 'http://mobipocket.com/ns/mbp'
|
|
|
|
|
|
def MBP(name):
|
|
return '{%s}%s' % (MBP_NS, name)
|
|
|
|
|
|
MOBI_NSMAP = {None: const.XHTML_NS, 'mbp': const.MBP_NS}
|
|
INLINE_TAGS = {'span', 'a', 'code', 'u', 's', 'big', 'strike', 'tt', 'font', 'q', 'i', 'b', 'em', 'strong', 'sup', 'sub'}
|
|
HEADER_TAGS = {'h1', 'h2', 'h3', 'h4', 'h5', 'h6'}
|
|
# GR: Added 'caption' to both sets
|
|
NESTABLE_TAGS = {'ol', 'ul', 'li', 'table', 'tr', 'td', 'th', 'caption'}
|
|
TABLE_TAGS = {'table', 'tr', 'td', 'th', 'caption'}
|
|
|
|
SPECIAL_TAGS = {'hr', 'br'}
|
|
CONTENT_TAGS = {'img', 'hr', 'br'}
|
|
|
|
NOT_VTAGS = HEADER_TAGS | NESTABLE_TAGS | TABLE_TAGS | SPECIAL_TAGS | \
|
|
CONTENT_TAGS
|
|
LEAF_TAGS = {'base', 'basefont', 'frame', 'link', 'meta', 'area', 'br',
|
|
'col', 'hr', 'img', 'input', 'param'}
|
|
PAGE_BREAKS = {'always', 'left', 'right'}
|
|
|
|
COLLAPSE = re.compile(r'[ \t\r\n\v]+')
|
|
|
|
|
|
def asfloat(value):
|
|
if not isinstance(value, numbers.Number):
|
|
return 0.0
|
|
return float(value)
|
|
|
|
|
|
def isspace(text):
|
|
if not text:
|
|
return True
|
|
if '\xa0' in text:
|
|
return False
|
|
return text.isspace()
|
|
|
|
|
|
class BlockState(object):
|
|
|
|
def __init__(self, body):
|
|
self.body = body
|
|
self.nested = []
|
|
self.para = None
|
|
self.inline = None
|
|
self.anchor = None
|
|
self.vpadding = 0.
|
|
self.vmargin = 0.
|
|
self.pbreak = False
|
|
self.istate = None
|
|
self.content = False
|
|
|
|
|
|
class FormatState(object):
|
|
|
|
def __init__(self):
|
|
self.rendered = False
|
|
self.left = 0.
|
|
self.halign = 'auto'
|
|
self.indent = 0.
|
|
self.fsize = 3
|
|
self.ids = set()
|
|
self.italic = False
|
|
self.bold = False
|
|
self.strikethrough = False
|
|
self.underline = False
|
|
self.preserve = False
|
|
self.pre_wrap = False
|
|
self.family = 'serif'
|
|
self.bgcolor = 'transparent'
|
|
self.fgcolor = 'black'
|
|
self.href = None
|
|
self.list_num = 0
|
|
self.attrib = {}
|
|
|
|
def __eq__(self, other):
|
|
return self.fsize == other.fsize \
|
|
and self.italic == other.italic \
|
|
and self.bold == other.bold \
|
|
and self.href == other.href \
|
|
and self.preserve == other.preserve \
|
|
and self.pre_wrap == other.pre_wrap \
|
|
and self.family == other.family \
|
|
and self.bgcolor == other.bgcolor \
|
|
and self.fgcolor == other.fgcolor \
|
|
and self.strikethrough == other.strikethrough \
|
|
and self.underline == other.underline
|
|
|
|
def __ne__(self, other):
|
|
return not self.__eq__(other)
|
|
|
|
|
|
class MobiMLizer(object):
|
|
|
|
def __init__(self, ignore_tables=False):
|
|
self.ignore_tables = ignore_tables
|
|
|
|
def __call__(self, oeb, context):
|
|
oeb.logger.info('Converting XHTML to Mobipocket markup...')
|
|
self.oeb = oeb
|
|
self.log = self.oeb.logger
|
|
self.opts = context
|
|
self.profile = profile = context.dest
|
|
self.fnums = fnums = dict((v, k) for k, v in profile.fnums.items())
|
|
self.fmap = KeyMapper(profile.fbase, profile.fbase, fnums.keys())
|
|
self.mobimlize_spine()
|
|
|
|
def mobimlize_spine(self):
|
|
'Iterate over the spine and convert it to MOBIML'
|
|
for item in self.oeb.spine:
|
|
stylizer = Stylizer(item.data, item.href, self.oeb, self.opts, self.profile)
|
|
body = item.data.find(base.tag('xhtml', 'body'))
|
|
nroot = etree.Element(base.tag('xhtml', 'html'), nsmap=MOBI_NSMAP)
|
|
nbody = etree.SubElement(nroot, base.tag('xhtml', 'body'))
|
|
self.current_spine_item = item
|
|
self.mobimlize_elem(body, stylizer, BlockState(nbody),
|
|
[FormatState()])
|
|
item.data = nroot
|
|
# print(etree.tostring(nroot))
|
|
|
|
def mobimlize_font(self, ptsize):
|
|
return self.fnums[self.fmap[ptsize]]
|
|
|
|
def mobimlize_measure(self, ptsize):
|
|
if isinstance(ptsize, (str, bytes)):
|
|
return ptsize
|
|
embase = self.profile.fbase
|
|
if round(ptsize) < embase:
|
|
return "%dpt" % int(round(ptsize))
|
|
return "%dem" % int(round(ptsize / embase))
|
|
|
|
def preize_text(self, text, pre_wrap=False):
|
|
text = str(text)
|
|
if pre_wrap:
|
|
# Replace n consecutive spaces with n-1 NBSP + space
|
|
text = re.sub(r' {2,}', lambda m:('\xa0'*(len(m.group())-1) + ' '), text)
|
|
else:
|
|
text = text.replace(' ', '\xa0')
|
|
|
|
text = text.replace('\r\n', '\n')
|
|
text = text.replace('\r', '\n')
|
|
lines = text.split('\n')
|
|
result = lines[:1]
|
|
for line in lines[1:]:
|
|
result.append(etree.Element(base.tag('xhtml', 'br')))
|
|
if line:
|
|
result.append(line)
|
|
return result
|
|
|
|
def mobimlize_content(self, tag, text, bstate, istates):
|
|
'Convert text content'
|
|
if text or tag != 'br':
|
|
bstate.content = True
|
|
istate = istates[-1]
|
|
para = bstate.para
|
|
if tag in SPECIAL_TAGS and not text:
|
|
para = para if para is not None else bstate.body
|
|
elif para is None or tag in ('td', 'th'):
|
|
body = bstate.body
|
|
if bstate.pbreak:
|
|
etree.SubElement(body, MBP('pagebreak'))
|
|
bstate.pbreak = False
|
|
bstate.istate = None
|
|
bstate.anchor = None
|
|
parent = bstate.nested[-1] if bstate.nested else bstate.body
|
|
indent = istate.indent
|
|
left = istate.left
|
|
if isinstance(indent, (str, bytes)):
|
|
indent = 0
|
|
if indent < 0 and abs(indent) < left:
|
|
left += indent
|
|
indent = 0
|
|
elif indent != 0 and abs(indent) < self.profile.fbase:
|
|
indent = (indent / abs(indent)) * self.profile.fbase
|
|
if tag in NESTABLE_TAGS and not istate.rendered:
|
|
para = wrapper = etree.SubElement(
|
|
parent, base.tag('xhtml', tag), attrib=istate.attrib)
|
|
bstate.nested.append(para)
|
|
if tag == 'li' and len(istates) > 1:
|
|
istates[-2].list_num += 1
|
|
para.attrib['value'] = str(istates[-2].list_num)
|
|
elif tag in NESTABLE_TAGS and istate.rendered:
|
|
para = wrapper = bstate.nested[-1]
|
|
elif not self.opts.mobi_ignore_margins and left > 0 and indent >= 0:
|
|
ems = self.profile.mobi_ems_per_blockquote
|
|
para = wrapper = etree.SubElement(parent, base.tag('xhtml', 'blockquote'))
|
|
para = wrapper
|
|
emleft = int(round(left / self.profile.fbase)) - ems
|
|
emleft = min((emleft, 10))
|
|
while emleft > ems / 2:
|
|
para = etree.SubElement(para, base.tag('xhtml', 'blockquote'))
|
|
emleft -= ems
|
|
else:
|
|
para = wrapper = etree.SubElement(parent, base.tag('xhtml', 'p'))
|
|
bstate.inline = bstate.para = para
|
|
vspace = bstate.vpadding + bstate.vmargin
|
|
bstate.vpadding = bstate.vmargin = 0
|
|
if tag not in TABLE_TAGS:
|
|
if tag in ('ul', 'ol') and vspace > 0:
|
|
wrapper.addprevious(etree.Element(base.tag('xhtml', 'div'),
|
|
height=self.mobimlize_measure(vspace)))
|
|
else:
|
|
wrapper.attrib['height'] = self.mobimlize_measure(vspace)
|
|
para.attrib['width'] = self.mobimlize_measure(indent)
|
|
elif tag == 'table' and vspace > 0:
|
|
vspace = int(round(vspace / self.profile.fbase))
|
|
while vspace > 0:
|
|
wrapper.addprevious(etree.Element(base.tag('xhtml', 'br')))
|
|
vspace -= 1
|
|
if istate.halign != 'auto' and isinstance(istate.halign, (bytes, str)):
|
|
if isinstance(istate.halign, bytes):
|
|
istate.halign = istate.halign.decode('utf-8')
|
|
para.attrib['align'] = istate.halign
|
|
istate.rendered = True
|
|
pstate = bstate.istate
|
|
if tag in CONTENT_TAGS:
|
|
bstate.inline = para
|
|
pstate = bstate.istate = None
|
|
try:
|
|
etree.SubElement(para, base.tag('xhtml', tag), attrib=istate.attrib)
|
|
except:
|
|
print('Invalid subelement:', para, tag, istate.attrib)
|
|
raise
|
|
elif tag in TABLE_TAGS:
|
|
para.attrib['valign'] = 'top'
|
|
if istate.ids:
|
|
for id_ in istate.ids:
|
|
anchor = etree.Element(base.tag('xhtml', 'a'), attrib={'id': id_})
|
|
if tag == 'li':
|
|
try:
|
|
last = bstate.body[-1][-1]
|
|
except:
|
|
break
|
|
last.insert(0, anchor)
|
|
anchor.tail = last.text
|
|
last.text = None
|
|
else:
|
|
last = bstate.body[-1]
|
|
# We use append instead of addprevious so that inline
|
|
# anchors in large blocks point to the correct place. See
|
|
# https://bugs.launchpad.net/calibre/+bug/899831
|
|
# This could potentially break if inserting an anchor at
|
|
# this point in the markup is illegal, but I cannot think
|
|
# of such a case offhand.
|
|
if parse_utils.barename(last.tag) in LEAF_TAGS:
|
|
last.addprevious(anchor)
|
|
else:
|
|
last.append(anchor)
|
|
|
|
istate.ids.clear()
|
|
if not text:
|
|
return
|
|
if not pstate or istate != pstate:
|
|
inline = para
|
|
fsize = istate.fsize
|
|
href = istate.href
|
|
if not href:
|
|
bstate.anchor = None
|
|
elif pstate and pstate.href == href:
|
|
inline = bstate.anchor
|
|
else:
|
|
inline = etree.SubElement(inline, base.tag('xhtml', 'a'), href=href)
|
|
bstate.anchor = inline
|
|
|
|
if fsize != 3:
|
|
inline = etree.SubElement(inline, base.tag('xhtml', 'font'),
|
|
size=str(fsize))
|
|
if istate.family == 'monospace':
|
|
inline = etree.SubElement(inline, base.tag('xhtml', 'tt'))
|
|
if istate.italic:
|
|
inline = etree.SubElement(inline, base.tag('xhtml', 'i'))
|
|
if istate.bold:
|
|
inline = etree.SubElement(inline, base.tag('xhtml', 'b'))
|
|
if istate.bgcolor is not None and istate.bgcolor != 'transparent' :
|
|
inline = etree.SubElement(inline, base.tag('xhtml', 'span'),
|
|
bgcolor=convert_color_for_font_tag(istate.bgcolor))
|
|
if istate.fgcolor != 'black':
|
|
inline = etree.SubElement(inline, base.tag('xhtml', 'font'),
|
|
color=convert_color_for_font_tag(istate.fgcolor))
|
|
if istate.strikethrough:
|
|
inline = etree.SubElement(inline, base.tag('xhtml', 's'))
|
|
if istate.underline:
|
|
inline = etree.SubElement(inline, base.tag('xhtml', 'u'))
|
|
bstate.inline = inline
|
|
bstate.istate = istate
|
|
inline = bstate.inline
|
|
content = self.preize_text(text, pre_wrap=istate.pre_wrap) if istate.preserve or istate.pre_wrap else [text]
|
|
for item in content:
|
|
if isinstance(item, (str, bytes)):
|
|
if len(inline) == 0:
|
|
inline.text = (inline.text or '') + item
|
|
else:
|
|
last = inline[-1]
|
|
last.tail = (last.tail or '') + item
|
|
else:
|
|
inline.append(item)
|
|
|
|
def mobimlize_elem(self, elem, stylizer, bstate, istates,
|
|
ignore_valign=False):
|
|
if not isinstance(elem.tag, (str, bytes)) \
|
|
or parse_utils.namespace(elem.tag) != const.XHTML_NS:
|
|
return
|
|
style = stylizer.style(elem)
|
|
# <mbp:frame-set/> does not exist lalalala
|
|
if ((style['display'] in ('none', 'oeb-page-head', 'oeb-page-foot') or style['visibility'] == 'hidden') and
|
|
elem.get('data-calibre-jacket-searchable-tags', None) != '1'):
|
|
id_ = elem.get('id', None)
|
|
if id_:
|
|
# Keep anchors so people can use display:none
|
|
# to generate hidden TOCs
|
|
tail = elem.tail
|
|
elem.clear()
|
|
elem.text = None
|
|
elem.set('id', id_)
|
|
elem.tail = tail
|
|
elem.tag = base.tag('xhtml', 'a')
|
|
else:
|
|
return
|
|
tag = parse_utils.barename(elem.tag)
|
|
istate = copy.copy(istates[-1])
|
|
istate.rendered = False
|
|
istate.list_num = 0
|
|
if tag == 'ol' and 'start' in elem.attrib:
|
|
try:
|
|
istate.list_num = int(elem.attrib['start'])-1
|
|
except:
|
|
pass
|
|
istates.append(istate)
|
|
left = 0
|
|
display = style['display']
|
|
if display == 'table-cell':
|
|
display = 'inline'
|
|
elif display.startswith('table'):
|
|
display = 'block'
|
|
isblock = (not display.startswith('inline') and style['display'] !=
|
|
'none')
|
|
isblock = isblock and style['float'] == 'none'
|
|
isblock = isblock and tag != 'br'
|
|
if isblock:
|
|
bstate.para = None
|
|
istate.halign = style['text-align']
|
|
rawti = style._get('text-indent')
|
|
istate.indent = style['text-indent']
|
|
if hasattr(rawti, 'strip') and '%' in rawti:
|
|
# We have a percentage text indent, these can come out looking
|
|
# too large if the user chooses a wide output profile like
|
|
# tablet
|
|
istate.indent = min(style._unit_convert(rawti, base=500), istate.indent)
|
|
if style['margin-left'] == 'auto' \
|
|
and style['margin-right'] == 'auto':
|
|
istate.halign = 'center'
|
|
margin = asfloat(style['margin-left'])
|
|
padding = asfloat(style['padding-left'])
|
|
if tag != 'body':
|
|
left = margin + padding
|
|
istate.left += left
|
|
vmargin = asfloat(style['margin-top'])
|
|
bstate.vmargin = max((bstate.vmargin, vmargin))
|
|
vpadding = asfloat(style['padding-top'])
|
|
if vpadding > 0:
|
|
bstate.vpadding += bstate.vmargin
|
|
bstate.vmargin = 0
|
|
bstate.vpadding += vpadding
|
|
elif not istate.href:
|
|
margin = asfloat(style['margin-left'])
|
|
padding = asfloat(style['padding-left'])
|
|
lspace = margin + padding
|
|
if lspace > 0:
|
|
spaces = int(round((lspace * 3) / style['font-size']))
|
|
elem.text = ('\xa0' * spaces) + (elem.text or '')
|
|
margin = asfloat(style['margin-right'])
|
|
padding = asfloat(style['padding-right'])
|
|
rspace = margin + padding
|
|
if rspace > 0:
|
|
spaces = int(round((rspace * 3) / style['font-size']))
|
|
if len(elem) == 0:
|
|
elem.text = (elem.text or '') + ('\xa0' * spaces)
|
|
else:
|
|
last = elem[-1]
|
|
last.text = (last.text or '') + ('\xa0' * spaces)
|
|
if bstate.content and style['page-break-before'] in PAGE_BREAKS:
|
|
bstate.pbreak = True
|
|
istate.fsize = self.mobimlize_font(style['font-size'])
|
|
istate.italic = True if style['font-style'] == 'italic' else False
|
|
weight = style['font-weight']
|
|
istate.bold = weight in ('bold', 'bolder') or asfloat(weight) > 400
|
|
istate.preserve = style['white-space'] == 'pre'
|
|
istate.pre_wrap = style['white-space'] == 'pre-wrap'
|
|
istate.bgcolor = style['background-color']
|
|
istate.fgcolor = style['color']
|
|
istate.strikethrough = style.effective_text_decoration == 'line-through'
|
|
istate.underline = style.effective_text_decoration == 'underline'
|
|
ff = style['font-family'].lower() if hasattr(style['font-family'], 'lower') else ''
|
|
if 'monospace' in ff or 'courier' in ff or ff.endswith(' mono'):
|
|
istate.family = 'monospace'
|
|
elif ('sans-serif' in ff or 'sansserif' in ff or 'verdana' in ff or
|
|
'arial' in ff or 'helvetica' in ff):
|
|
istate.family = 'sans-serif'
|
|
else:
|
|
istate.family = 'serif'
|
|
if 'id' in elem.attrib:
|
|
istate.ids.add(elem.attrib['id'])
|
|
if 'name' in elem.attrib:
|
|
istate.ids.add(elem.attrib['name'])
|
|
if tag == 'a' and 'href' in elem.attrib:
|
|
istate.href = elem.attrib['href']
|
|
istate.attrib.clear()
|
|
if tag == 'img' and 'src' in elem.attrib:
|
|
istate.attrib['src'] = elem.attrib['src']
|
|
istate.attrib['align'] = 'baseline'
|
|
cssdict = style.cssdict()
|
|
valign = cssdict.get('vertical-align', None)
|
|
if valign in ('top', 'bottom', 'middle'):
|
|
istate.attrib['align'] = valign
|
|
for prop in ('width', 'height'):
|
|
if cssdict[prop] != 'auto':
|
|
value = style[prop]
|
|
if value == getattr(self.profile, prop):
|
|
result = '100%'
|
|
else:
|
|
# Amazon's renderer does not support
|
|
# img sizes in units other than px
|
|
# See #7520 for test case
|
|
try:
|
|
pixs = int(round(float(value) /
|
|
(72/self.profile.dpi)))
|
|
except:
|
|
continue
|
|
result = str(pixs)
|
|
istate.attrib[prop] = result
|
|
if 'width' not in istate.attrib or 'height' not in istate.attrib:
|
|
href = self.current_spine_item.abshref(elem.attrib['src'])
|
|
try:
|
|
item = self.oeb.manifest.hrefs[base.urlnormalize(href)]
|
|
except:
|
|
self.oeb.logger.warning('Failed to find image:', href)
|
|
else:
|
|
try:
|
|
width, height = identify(item.data)[1:]
|
|
except Exception:
|
|
self.oeb.logger.warning('Invalid image:', href)
|
|
else:
|
|
if 'width' not in istate.attrib and 'height' not in \
|
|
istate.attrib:
|
|
istate.attrib['width'] = str(width)
|
|
istate.attrib['height'] = str(height)
|
|
else:
|
|
ar = width / height
|
|
if 'width' not in istate.attrib:
|
|
try:
|
|
width = int(istate.attrib['height'])*ar
|
|
except:
|
|
pass
|
|
istate.attrib['width'] = str(int(width))
|
|
else:
|
|
try:
|
|
height = int(istate.attrib['width'])/ar
|
|
except:
|
|
pass
|
|
istate.attrib['height'] = str(int(height))
|
|
item.unload_data_from_memory()
|
|
elif tag == 'hr' and asfloat(style['width']) > 0 and style._get('width') not in {'100%', 'auto'}:
|
|
raww = style._get('width')
|
|
if hasattr(raww, 'strip') and '%' in raww:
|
|
istate.attrib['width'] = raww
|
|
else:
|
|
prop = style['width'] / self.profile.width
|
|
istate.attrib['width'] = "%d%%" % int(round(prop * 100))
|
|
elif display == 'table':
|
|
tag = 'table'
|
|
elif display == 'table-row':
|
|
tag = 'tr'
|
|
elif display == 'table-cell':
|
|
tag = 'td'
|
|
if tag in TABLE_TAGS and self.ignore_tables:
|
|
tag = 'span' if tag == 'td' else 'div'
|
|
|
|
if tag in ('table', 'td', 'tr'):
|
|
col = style.backgroundColor
|
|
if col:
|
|
elem.set('bgcolor', col)
|
|
css = style.cssdict()
|
|
if 'border' in css or 'border-width' in css:
|
|
elem.set('border', '1')
|
|
if tag in TABLE_TAGS:
|
|
for attr in ('rowspan', 'colspan', 'width', 'border', 'scope',
|
|
'bgcolor'):
|
|
if attr in elem.attrib:
|
|
istate.attrib[attr] = elem.attrib[attr]
|
|
if tag == 'q':
|
|
t = elem.text
|
|
if not t:
|
|
t = ''
|
|
elem.text = '\u201c' + t
|
|
t = elem.tail
|
|
if not t:
|
|
t = ''
|
|
elem.tail = '\u201d' + t
|
|
text = None
|
|
if elem.text:
|
|
if istate.preserve or istate.pre_wrap:
|
|
text = elem.text
|
|
elif (len(elem) > 0 and isspace(elem.text) and hasattr(elem[0].tag, 'rpartition') and
|
|
elem[0].tag.rpartition('}')[-1] not in INLINE_TAGS):
|
|
text = None
|
|
else:
|
|
text = COLLAPSE.sub(' ', elem.text)
|
|
valign = style['vertical-align']
|
|
not_baseline = valign in ('super', 'sub', 'text-top',
|
|
'text-bottom', 'top', 'bottom') or (
|
|
isinstance(valign, numbers.Number) and abs(valign) != 0)
|
|
issup = valign in ('super', 'text-top', 'top') or (
|
|
isinstance(valign, numbers.Number) and valign > 0)
|
|
vtag = 'sup' if issup else 'sub'
|
|
if not_baseline and not ignore_valign and tag not in NOT_VTAGS and not isblock:
|
|
nroot = etree.Element(base.tag('xhtml', 'html'), nsmap=MOBI_NSMAP)
|
|
vbstate = BlockState(etree.SubElement(nroot, base.tag('xhtml', 'body')))
|
|
vbstate.para = etree.SubElement(vbstate.body, base.tag('xhtml', 'p'))
|
|
self.mobimlize_elem(elem, stylizer, vbstate, istates,
|
|
ignore_valign=True)
|
|
if len(istates) > 0:
|
|
istates.pop()
|
|
if len(istates) == 0:
|
|
istates.append(FormatState())
|
|
at_start = bstate.para is None
|
|
if at_start:
|
|
self.mobimlize_content('span', '', bstate, istates)
|
|
parent = bstate.para if bstate.inline is None else bstate.inline
|
|
if parent is not None:
|
|
vtag = etree.SubElement(parent, base.tag('xhtml', vtag))
|
|
vtag = etree.SubElement(vtag, base.tag('xhtml', 'small'))
|
|
# Add anchors
|
|
for child in vbstate.body:
|
|
if child is not vbstate.para:
|
|
vtag.append(child)
|
|
else:
|
|
break
|
|
if vbstate.para is not None:
|
|
if vbstate.para.text:
|
|
vtag.text = vbstate.para.text
|
|
for child in vbstate.para:
|
|
vtag.append(child)
|
|
return
|
|
|
|
if tag == 'blockquote':
|
|
old_mim = self.opts.mobi_ignore_margins
|
|
self.opts.mobi_ignore_margins = False
|
|
|
|
if (text or tag in CONTENT_TAGS or tag in NESTABLE_TAGS or (
|
|
# We have an id but no text and no children, the id should still
|
|
# be added.
|
|
istate.ids and tag in ('a', 'span', 'i', 'b', 'u') and
|
|
len(elem)==0)):
|
|
if tag == 'li' and len(istates) > 1 and 'value' in elem.attrib:
|
|
try:
|
|
value = int(elem.attrib['value'])
|
|
istates[-2].list_num = value - 1
|
|
except:
|
|
pass
|
|
self.mobimlize_content(tag, text, bstate, istates)
|
|
for child in elem:
|
|
self.mobimlize_elem(child, stylizer, bstate, istates)
|
|
tail = None
|
|
if child.tail:
|
|
if istate.preserve or istate.pre_wrap:
|
|
tail = child.tail
|
|
elif bstate.para is None and isspace(child.tail):
|
|
tail = None
|
|
else:
|
|
tail = COLLAPSE.sub(' ', child.tail)
|
|
if tail:
|
|
self.mobimlize_content(tag, tail, bstate, istates)
|
|
|
|
if tag == 'blockquote':
|
|
self.opts.mobi_ignore_margins = old_mim
|
|
|
|
if bstate.content and style['page-break-after'] in PAGE_BREAKS:
|
|
bstate.pbreak = True
|
|
if isblock:
|
|
para = bstate.para
|
|
if para is not None and para.text == '\xa0' and len(para) < 1:
|
|
if style.height > 2:
|
|
para.getparent().replace(para, etree.Element(base.tag('xhtml', 'br')))
|
|
else:
|
|
# This is too small to be rendered effectively, drop it
|
|
para.getparent().remove(para)
|
|
bstate.para = None
|
|
bstate.istate = None
|
|
vmargin = asfloat(style['margin-bottom'])
|
|
bstate.vmargin = max((bstate.vmargin, vmargin))
|
|
vpadding = asfloat(style['padding-bottom'])
|
|
if vpadding > 0:
|
|
bstate.vpadding += bstate.vmargin
|
|
bstate.vmargin = 0
|
|
bstate.vpadding += vpadding
|
|
if bstate.nested and bstate.nested[-1].tag == elem.tag:
|
|
bstate.nested.pop()
|
|
istates.pop()
|