mirror of
https://github.com/gryf/ebook-converter.git
synced 2026-01-27 08:15:46 +01:00
This is progressing refactor of the calibre code to make it more readable, and transform it to something more coherent. In this patch, there are changes regarding imports for some modules, instead of polluting namespace of each module with some other modules symbols, which often were imported from other modules. Yuck.
623 lines
24 KiB
Python
623 lines
24 KiB
Python
"""
|
|
Transform OEB content into FB2 markup
|
|
"""
|
|
from datetime import datetime
|
|
import re
|
|
import textwrap
|
|
import urllib.parse
|
|
import uuid
|
|
|
|
from lxml import etree
|
|
|
|
from ebook_converter import constants as const
|
|
from ebook_converter import prepare_string_for_xml
|
|
from ebook_converter.constants_old import __appname__, __version__
|
|
from ebook_converter.utils.localization import lang_as_iso639_1
|
|
from ebook_converter.utils.img import save_cover_data_to
|
|
from ebook_converter.ebooks.oeb.base import urlnormalize
|
|
from ebook_converter.polyglot.binary import as_base64_unicode
|
|
|
|
|
|
__license__ = 'GPL 3'
|
|
__copyright__ = '2009, John Schember <john@nachtimwald.com>'
|
|
__docformat__ = 'restructuredtext en'
|
|
|
|
|
|
class FB2MLizer(object):
|
|
'''
|
|
Todo: * Include more FB2 specific tags in the conversion.
|
|
* Handle notes and anchor links.
|
|
'''
|
|
|
|
def __init__(self, log):
|
|
self.log = log
|
|
self.reset_state()
|
|
|
|
def reset_state(self):
|
|
# Used to ensure text and tags are always within <p> and </p>
|
|
self.in_p = False
|
|
# Mapping of image names. OEB allows for images to have the same name
|
|
# but be stored in different directories. FB2 images are all in a flat
|
|
# layout so we rename all images into a sequential numbering system to
|
|
# ensure there are no collisions between image names.
|
|
self.image_hrefs = {}
|
|
# Mapping of toc items and their
|
|
self.toc = {}
|
|
# Used to see whether a new <section> needs to be opened
|
|
self.section_level = 0
|
|
|
|
def extract_content(self, oeb_book, opts):
|
|
self.log.info('Converting XHTML to FB2 markup...')
|
|
self.oeb_book = oeb_book
|
|
self.opts = opts
|
|
self.reset_state()
|
|
|
|
# Used for adding <section>s and <title>s to allow readers
|
|
# to generate toc from the document.
|
|
if self.opts.sectionize == 'toc':
|
|
self.create_flat_toc(self.oeb_book.toc, 1)
|
|
|
|
return self.fb2mlize_spine()
|
|
|
|
def fb2mlize_spine(self):
|
|
output = (
|
|
self.fb2_header(),
|
|
self.get_text(),
|
|
self.fb2mlize_images(),
|
|
self.fb2_footer(),
|
|
)
|
|
output = self.clean_text('\n'.join(output))
|
|
|
|
if self.opts.pretty_print:
|
|
output = etree.tostring(etree.fromstring(output),
|
|
encoding='unicode', pretty_print=True)
|
|
|
|
return '<?xml version="1.0" encoding="UTF-8"?>\n' + output
|
|
|
|
def clean_text(self, text):
|
|
# Remove pointless tags, but keep their contents.
|
|
text = re.sub(r'(?mu)<(strong|emphasis|strikethrough|sub|sup)>'
|
|
r'(\s*)</\1>', r'\2', text)
|
|
|
|
# Clean up paragraphs endings.
|
|
text = re.sub(r'(?mu)\s+</p>', '</p>', text)
|
|
# Condense empty paragraphs into a line break.
|
|
text = re.sub(r'(?mu)(?:<p></p>\s*){3,}', '<empty-line/>', text)
|
|
# Remove empty paragraphs.
|
|
text = re.sub(r'(?mu)<p></p>\s*', '', text)
|
|
# Put the paragraph following a paragraph on a separate line.
|
|
text = re.sub(r'(?mu)</p>\s*<p>', '</p>\n<p>', text)
|
|
|
|
if self.opts.insert_blank_line:
|
|
text = re.sub(r'(?mu)</p>', '</p><empty-line/>', text)
|
|
|
|
# Clean up title endings.
|
|
text = re.sub(r'(?mu)\s+</title>', '</title>', text)
|
|
# Remove empty title elements.
|
|
text = re.sub(r'(?mu)<title></title>\s*', '', text)
|
|
# Put the paragraph following a title on a separate line.
|
|
text = re.sub(r'(?mu)</title>\s*<p>', '</title>\n<p>', text)
|
|
|
|
# Put line breaks between paragraphs on a separate line.
|
|
text = re.sub(r'(?mu)</(p|title)>\s*<empty-line/>',
|
|
r'</\1>\n<empty-line/>', text)
|
|
text = re.sub(r'(?mu)<empty-line/>\s*<p>', '<empty-line/>\n<p>', text)
|
|
|
|
# Remove empty sections.
|
|
text = re.sub(r'(?mu)<section>\s*</section>', '', text)
|
|
# Clean up sections starts and ends.
|
|
text = re.sub(r'(?mu)\s*<section>', '\n<section>', text)
|
|
text = re.sub(r'(?mu)<section>\s*', '<section>\n', text)
|
|
text = re.sub(r'(?mu)\s*</section>', '\n</section>', text)
|
|
text = re.sub(r'(?mu)</section>\s*', '</section>\n', text)
|
|
|
|
return text
|
|
|
|
def fb2_header(self):
|
|
from ebook_converter.ebooks.oeb.base import OPF
|
|
metadata = {}
|
|
metadata['title'] = self.oeb_book.metadata.title[0].value
|
|
metadata['appname'] = __appname__
|
|
metadata['version'] = __version__
|
|
metadata['date'] = '%i.%i.%i' % (datetime.now().day,
|
|
datetime.now().month,
|
|
datetime.now().year)
|
|
if self.oeb_book.metadata.language:
|
|
lc = lang_as_iso639_1(self.oeb_book.metadata.language[0].value)
|
|
if not lc:
|
|
lc = self.oeb_book.metadata.language[0].value
|
|
metadata['lang'] = lc or 'en'
|
|
else:
|
|
metadata['lang'] = u'en'
|
|
metadata['id'] = None
|
|
metadata['cover'] = self.get_cover()
|
|
metadata['genre'] = self.opts.fb2_genre
|
|
|
|
metadata['author'] = ''
|
|
for auth in self.oeb_book.metadata.creator:
|
|
author_first = ''
|
|
author_middle = ''
|
|
author_last = ''
|
|
author_parts = auth.value.split(' ')
|
|
if len(author_parts) == 1:
|
|
author_last = author_parts[0]
|
|
elif len(author_parts) == 2:
|
|
author_first = author_parts[0]
|
|
author_last = author_parts[1]
|
|
else:
|
|
author_first = author_parts[0]
|
|
author_middle = ' '.join(author_parts[1:-1])
|
|
author_last = author_parts[-1]
|
|
metadata['author'] += '<author>'
|
|
metadata['author'] += ('<first-name>%s</first-name>' %
|
|
prepare_string_for_xml(author_first))
|
|
if author_middle:
|
|
metadata['author'] += ('<middle-name>%s</middle-name>' %
|
|
prepare_string_for_xml(author_middle))
|
|
metadata['author'] += ('<last-name>%s</last-name>' %
|
|
prepare_string_for_xml(author_last))
|
|
metadata['author'] += '</author>'
|
|
if not metadata['author']:
|
|
metadata['author'] = ('<author><first-name></first-name>'
|
|
'<last-name></last-name></author>')
|
|
|
|
metadata['keywords'] = ''
|
|
tags = list(map(str, self.oeb_book.metadata.subject))
|
|
if tags:
|
|
tags = ', '.join(prepare_string_for_xml(x) for x in tags)
|
|
metadata['keywords'] = '<keywords>%s</keywords>' % tags
|
|
|
|
metadata['sequence'] = ''
|
|
if self.oeb_book.metadata.series:
|
|
index = '1'
|
|
if self.oeb_book.metadata.series_index:
|
|
index = self.oeb_book.metadata.series_index[0]
|
|
seq = prepare_string_for_xml(str(self.oeb_book.metadata.series[0]))
|
|
metadata['sequence'] = ('<sequence name="%s" number="%s"/>' %
|
|
(seq, index))
|
|
|
|
year = publisher = isbn = ''
|
|
identifiers = self.oeb_book.metadata['identifier']
|
|
for x in identifiers:
|
|
if (x.get(OPF('scheme'), None).lower() == 'uuid' or
|
|
str(x).startswith('urn:uuid:')):
|
|
metadata['id'] = str(x).split(':')[-1]
|
|
break
|
|
if metadata['id'] is None:
|
|
self.log.warn('No UUID identifier found')
|
|
metadata['id'] = str(uuid.uuid4())
|
|
|
|
try:
|
|
date = self.oeb_book.metadata['date'][0]
|
|
except IndexError:
|
|
pass
|
|
else:
|
|
year = ('<year>%s</year>' %
|
|
prepare_string_for_xml(date.value.partition('-')[0]))
|
|
|
|
try:
|
|
publisher = self.oeb_book.metadata['publisher'][0]
|
|
except IndexError:
|
|
pass
|
|
else:
|
|
publisher = ('<publisher>%s</publisher>' %
|
|
prepare_string_for_xml(publisher.value))
|
|
|
|
for x in identifiers:
|
|
if x.get(OPF('scheme'), None).lower() == 'isbn':
|
|
isbn = '<isbn>%s</isbn>' % prepare_string_for_xml(x.value)
|
|
|
|
metadata['year'] = year
|
|
metadata['isbn'] = isbn
|
|
metadata['publisher'] = publisher
|
|
for key, value in metadata.items():
|
|
if key not in ('author', 'cover', 'sequence', 'keywords', 'year',
|
|
'publisher', 'isbn'):
|
|
metadata[key] = prepare_string_for_xml(value)
|
|
|
|
try:
|
|
comments = self.oeb_book.metadata['description'][0]
|
|
except Exception:
|
|
metadata['comments'] = ''
|
|
else:
|
|
from ebook_converter.utils.html2text import html2text
|
|
annot = prepare_string_for_xml(html2text(comments.value).strip())
|
|
metadata['comments'] = f'<annotation><p>{annot}</p></annotation>'
|
|
|
|
# Keep the indentation level of the description the same as the body.
|
|
header = textwrap.dedent('''\
|
|
<FictionBook xmlns="http://www.gribuser.ru/xml/fictionbook/2.0" xmlns:l="http://www.w3.org/1999/xlink">
|
|
<description>
|
|
<title-info>
|
|
<genre>%(genre)s</genre>
|
|
%(author)s
|
|
<book-title>%(title)s</book-title>
|
|
%(cover)s
|
|
<lang>%(lang)s</lang>
|
|
%(keywords)s
|
|
%(sequence)s
|
|
%(comments)s
|
|
</title-info>
|
|
<document-info>
|
|
%(author)s
|
|
<program-used>%(appname)s %(version)s</program-used>
|
|
<date>%(date)s</date>
|
|
<id>%(id)s</id>
|
|
<version>1.0</version>
|
|
</document-info>
|
|
<publish-info>
|
|
%(publisher)s
|
|
%(year)s
|
|
%(isbn)s
|
|
</publish-info>
|
|
</description>''') % metadata
|
|
|
|
# Remove empty lines.
|
|
return '\n'.join(filter(str.strip, header.splitlines()))
|
|
|
|
def fb2_footer(self):
|
|
return '</FictionBook>'
|
|
|
|
def get_cover(self):
|
|
from ebook_converter.ebooks.oeb.base import OEB_RASTER_IMAGES
|
|
|
|
cover_href = None
|
|
|
|
# Get the raster cover if it's available.
|
|
if (self.oeb_book.metadata.cover and
|
|
str(self.oeb_book.metadata.cover[0]) in
|
|
self.oeb_book.manifest.ids):
|
|
id = str(self.oeb_book.metadata.cover[0])
|
|
cover_item = self.oeb_book.manifest.ids[id]
|
|
if cover_item.media_type in OEB_RASTER_IMAGES:
|
|
cover_href = cover_item.href
|
|
else:
|
|
# Figure out if we have a title page or a cover page
|
|
page_name = ''
|
|
if 'titlepage' in self.oeb_book.guide:
|
|
page_name = 'titlepage'
|
|
elif 'cover' in self.oeb_book.guide:
|
|
page_name = 'cover'
|
|
|
|
if page_name:
|
|
key = self.oeb_book.guide[page_name].href
|
|
cover_item = self.oeb_book.manifest.hrefs[key]
|
|
# Get the first image in the page
|
|
for img in cover_item.xpath('//img'):
|
|
cover_href = cover_item.abshref(img.get('src'))
|
|
break
|
|
|
|
if cover_href:
|
|
# Only write the image tag if it is in the manifest.
|
|
if (cover_href in self.oeb_book.manifest.hrefs and
|
|
cover_href not in self.image_hrefs):
|
|
self.image_hrefs[cover_href] = 'img_%s' % len(self.image_hrefs)
|
|
return ('<coverpage><image l:href="#%s"/></coverpage>' %
|
|
self.image_hrefs[cover_href])
|
|
return ''
|
|
|
|
def get_text(self):
|
|
from ebook_converter.ebooks.oeb.base import XHTML
|
|
from ebook_converter.ebooks.oeb.stylizer import Stylizer
|
|
text = ['<body>']
|
|
|
|
# Create main section if there are no others to create
|
|
if self.opts.sectionize == 'nothing':
|
|
text.append('<section>')
|
|
self.section_level += 1
|
|
|
|
for item in self.oeb_book.spine:
|
|
self.log.debug('Converting %s to FictionBook2 XML' % item.href)
|
|
stylizer = Stylizer(item.data, item.href, self.oeb_book, self.opts,
|
|
self.opts.output_profile)
|
|
|
|
# Start a <section> if we must sectionize each file or if the TOC
|
|
# references this page
|
|
page_section_open = False
|
|
if (self.opts.sectionize == 'files' or
|
|
None in self.toc.get(item.href, ())):
|
|
text.append('<section>')
|
|
page_section_open = True
|
|
self.section_level += 1
|
|
|
|
text += self.dump_text(item.data.find(XHTML('body')), stylizer,
|
|
item)
|
|
|
|
if page_section_open:
|
|
text.append('</section>')
|
|
self.section_level -= 1
|
|
|
|
# Close any open sections
|
|
while self.section_level > 0:
|
|
text.append('</section>')
|
|
self.section_level -= 1
|
|
|
|
text.append('</body>')
|
|
return ''.join(text)
|
|
|
|
def fb2mlize_images(self):
|
|
"""
|
|
This function uses the self.image_hrefs dictionary mapping. It is
|
|
populated by the dump_text function.
|
|
"""
|
|
from ebook_converter.ebooks.oeb.base import OEB_RASTER_IMAGES
|
|
|
|
images = []
|
|
for item in self.oeb_book.manifest:
|
|
# Don't write the image if it's not referenced in the document's
|
|
# text.
|
|
if item.href not in self.image_hrefs:
|
|
continue
|
|
if item.media_type in OEB_RASTER_IMAGES:
|
|
try:
|
|
if item.media_type not in ('image/jpeg', 'image/png'):
|
|
imdata = save_cover_data_to(item.data,
|
|
compression_quality=70)
|
|
raw_data = as_base64_unicode(imdata)
|
|
content_type = 'image/jpeg'
|
|
else:
|
|
raw_data = as_base64_unicode(item.data)
|
|
content_type = item.media_type
|
|
# Don't put the encoded image on a single line.
|
|
step = 72
|
|
data = '\n'.join(raw_data[i:i+step]
|
|
for i in range(0, len(raw_data), step))
|
|
images.append('<binary id="%s" content-type="%s">%s'
|
|
'</binary>' % (self.image_hrefs[item.href],
|
|
content_type, data))
|
|
except Exception as e:
|
|
self.log.error('Error: Could not include file %s because '
|
|
'%s.' % (item.href, e))
|
|
return '\n'.join(images)
|
|
|
|
def create_flat_toc(self, nodes, level):
|
|
for item in nodes:
|
|
href, mid, id = item.href.partition('#')
|
|
if not id:
|
|
self.toc[href] = {None: 'page'}
|
|
else:
|
|
if not self.toc.get(href, None):
|
|
self.toc[href] = {}
|
|
self.toc[href][id] = level
|
|
self.create_flat_toc(item.nodes, level + 1)
|
|
|
|
def ensure_p(self):
|
|
if self.in_p:
|
|
return [], []
|
|
else:
|
|
self.in_p = True
|
|
return ['<p>'], ['p']
|
|
|
|
def close_open_p(self, tags):
|
|
text = ['']
|
|
added_p = False
|
|
|
|
if self.in_p:
|
|
# Close all up to p. Close p. Reopen all closed tags including p.
|
|
closed_tags = []
|
|
tags.reverse()
|
|
for t in tags:
|
|
text.append('</%s>' % t)
|
|
closed_tags.append(t)
|
|
if t == 'p':
|
|
break
|
|
closed_tags.reverse()
|
|
for t in closed_tags:
|
|
text.append('<%s>' % t)
|
|
else:
|
|
text.append('<p>')
|
|
added_p = True
|
|
self.in_p = True
|
|
|
|
return text, added_p
|
|
|
|
def handle_simple_tag(self, tag, tags):
|
|
s_out = []
|
|
s_tags = []
|
|
if tag not in tags:
|
|
p_out, p_tags = self.ensure_p()
|
|
s_out += p_out
|
|
s_tags += p_tags
|
|
s_out.append('<%s>' % tag)
|
|
s_tags.append(tag)
|
|
return s_out, s_tags
|
|
|
|
def dump_text(self, elem_tree, stylizer, page, tag_stack=[]):
|
|
'''
|
|
This function is intended to be used in a recursive manner. dump_text
|
|
will run though all elements in the elem_tree and call itself on each
|
|
element.
|
|
|
|
self.image_hrefs will be populated by calling this function.
|
|
|
|
@param elem_tree: etree representation of XHTML content to be
|
|
transformed.
|
|
@param stylizer: Used to track the style of elements within the tree.
|
|
@param page: OEB page used to determine absolute urls.
|
|
@param tag_stack: List of open FB2 tags to take into account.
|
|
|
|
@return: List of string representing the XHTML converted to FB2 markup.
|
|
'''
|
|
from ebook_converter.ebooks.oeb.base import barename
|
|
from ebook_converter.ebooks.oeb.base import namespace
|
|
elem = elem_tree
|
|
|
|
# Ensure what we are converting is not a string and that the fist tag
|
|
# is part of the XHTML namespace.
|
|
if (not isinstance(elem_tree.tag, (str, bytes)) or
|
|
namespace(elem_tree.tag) != const.XHTML_NS):
|
|
p = elem.getparent()
|
|
if (p is not None and isinstance(p.tag, (str, bytes)) and
|
|
namespace(p.tag) == const.XHTML_NS and elem.tail):
|
|
return [elem.tail]
|
|
return []
|
|
|
|
style = stylizer.style(elem_tree)
|
|
if style['display'] in ('none', 'oeb-page-head', 'oeb-page-foot') \
|
|
or style['visibility'] == 'hidden':
|
|
if hasattr(elem, 'tail') and elem.tail:
|
|
return [elem.tail]
|
|
return []
|
|
|
|
# FB2 generated output.
|
|
fb2_out = []
|
|
# FB2 tags in the order they are opened. This will be used to close
|
|
# the tags.
|
|
tags = []
|
|
# First tag in tree
|
|
tag = barename(elem_tree.tag)
|
|
# Number of blank lines above tag
|
|
try:
|
|
ems = int(round((float(style.marginTop) / style.fontSize) - 1))
|
|
if ems < 0:
|
|
ems = 0
|
|
except Exception:
|
|
ems = 0
|
|
|
|
# Convert TOC entries to <title>s and add <section>s
|
|
if self.opts.sectionize == 'toc':
|
|
# A section cannot be a child of any other element than another
|
|
# section, so leave the tag alone if there are parents
|
|
if not tag_stack:
|
|
# There are two reasons to start a new section here: the TOC
|
|
# pointed to this page (then we use the first non-<body> on
|
|
# the page as a <title>), or the TOC pointed to a specific
|
|
# element
|
|
newlevel = 0
|
|
toc_entry = self.toc.get(page.href, None)
|
|
if toc_entry is not None:
|
|
if None in toc_entry:
|
|
if (tag != 'body' and hasattr(elem_tree, 'text') and
|
|
elem_tree.text):
|
|
newlevel = 1
|
|
self.toc[page.href] = None
|
|
if (not newlevel and
|
|
elem_tree.attrib.get('id', None) is not None):
|
|
newlevel = toc_entry.get(elem_tree.attrib.get('id',
|
|
None),
|
|
None)
|
|
|
|
# Start a new section if necessary
|
|
if newlevel:
|
|
while newlevel <= self.section_level:
|
|
fb2_out.append('</section>')
|
|
self.section_level -= 1
|
|
fb2_out.append('<section>')
|
|
self.section_level += 1
|
|
fb2_out.append('<title>')
|
|
tags.append('title')
|
|
if self.section_level == 0:
|
|
# If none of the prior processing made a section, make one now
|
|
# to be FB2 spec compliant
|
|
fb2_out.append('<section>')
|
|
self.section_level += 1
|
|
|
|
# Process the XHTML tag and styles. Converted to an FB2 tag.
|
|
# Use individual if statement not if else. There can be only one XHTML
|
|
# tag but it can have multiple styles.
|
|
if tag == 'img' and elem_tree.attrib.get('src', None):
|
|
# Only write the image tag if it is in the manifest.
|
|
ihref = urlnormalize(page.abshref(elem_tree.attrib['src']))
|
|
if ihref in self.oeb_book.manifest.hrefs:
|
|
if ihref not in self.image_hrefs:
|
|
self.image_hrefs[ihref] = 'img_%s' % len(self.image_hrefs)
|
|
p_txt, p_tag = self.ensure_p()
|
|
fb2_out += p_txt
|
|
tags += p_tag
|
|
fb2_out.append('<image l:href="#%s"/>' %
|
|
self.image_hrefs[ihref])
|
|
else:
|
|
self.log.warn(u'Ignoring image not in manifest: %s' % ihref)
|
|
if tag in ('br', 'hr') or ems >= 1:
|
|
if ems < 1:
|
|
multiplier = 1
|
|
else:
|
|
multiplier = ems
|
|
if self.in_p:
|
|
closed_tags = []
|
|
open_tags = tag_stack+tags
|
|
open_tags.reverse()
|
|
for t in open_tags:
|
|
fb2_out.append('</%s>' % t)
|
|
closed_tags.append(t)
|
|
if t == 'p':
|
|
break
|
|
fb2_out.append('<empty-line/>' * multiplier)
|
|
closed_tags.reverse()
|
|
for t in closed_tags:
|
|
fb2_out.append('<%s>' % t)
|
|
else:
|
|
fb2_out.append('<empty-line/>' * multiplier)
|
|
if tag in ('div', 'li', 'p'):
|
|
p_text, added_p = self.close_open_p(tag_stack+tags)
|
|
fb2_out += p_text
|
|
if added_p:
|
|
tags.append('p')
|
|
if tag == 'a' and elem_tree.attrib.get('href', None):
|
|
# Handle only external links for now
|
|
if urllib.parse.urlparse(elem_tree.attrib['href']).netloc:
|
|
p_txt, p_tag = self.ensure_p()
|
|
fb2_out += p_txt
|
|
tags += p_tag
|
|
fb2_out.append('<a l:href="%s">' %
|
|
urlnormalize(elem_tree.attrib['href']))
|
|
tags.append('a')
|
|
if tag == 'b' or style['font-weight'] in ('bold', 'bolder'):
|
|
s_out, s_tags = self.handle_simple_tag('strong', tag_stack+tags)
|
|
fb2_out += s_out
|
|
tags += s_tags
|
|
if tag == 'i' or style['font-style'] == 'italic':
|
|
s_out, s_tags = self.handle_simple_tag('emphasis', tag_stack+tags)
|
|
fb2_out += s_out
|
|
tags += s_tags
|
|
if (tag in ('del', 'strike') or
|
|
style['text-decoration'] == 'line-through'):
|
|
s_out, s_tags = self.handle_simple_tag('strikethrough',
|
|
tag_stack+tags)
|
|
fb2_out += s_out
|
|
tags += s_tags
|
|
if tag == 'sub':
|
|
s_out, s_tags = self.handle_simple_tag('sub', tag_stack+tags)
|
|
fb2_out += s_out
|
|
tags += s_tags
|
|
if tag == 'sup':
|
|
s_out, s_tags = self.handle_simple_tag('sup', tag_stack+tags)
|
|
fb2_out += s_out
|
|
tags += s_tags
|
|
|
|
# Process element text.
|
|
if hasattr(elem_tree, 'text') and elem_tree.text:
|
|
if not self.in_p:
|
|
fb2_out.append('<p>')
|
|
fb2_out.append(prepare_string_for_xml(elem_tree.text))
|
|
if not self.in_p:
|
|
fb2_out.append('</p>')
|
|
|
|
# Process sub-elements.
|
|
for item in elem_tree:
|
|
fb2_out += self.dump_text(item, stylizer, page, tag_stack+tags)
|
|
|
|
# Close open FB2 tags.
|
|
tags.reverse()
|
|
fb2_out += self.close_tags(tags)
|
|
|
|
# Process element text that comes after the close of the XHTML tag but
|
|
# before the next XHTML tag.
|
|
if hasattr(elem_tree, 'tail') and elem_tree.tail:
|
|
if not self.in_p:
|
|
fb2_out.append('<p>')
|
|
fb2_out.append(prepare_string_for_xml(elem_tree.tail))
|
|
if not self.in_p:
|
|
fb2_out.append('</p>')
|
|
|
|
return fb2_out
|
|
|
|
def close_tags(self, tags):
|
|
text = []
|
|
for tag in tags:
|
|
text.append('</%s>' % tag)
|
|
if tag == 'p':
|
|
self.in_p = False
|
|
|
|
return text
|