mirror of
https://github.com/gryf/ebook-converter.git
synced 2026-01-25 14:55:46 +01:00
617 lines
24 KiB
Python
617 lines
24 KiB
Python
"""
|
|
Transform OEB content into FB2 markup
|
|
"""
|
|
from datetime import datetime
|
|
import re
|
|
import textwrap
|
|
import urllib.parse
|
|
import uuid
|
|
|
|
from lxml import etree
|
|
|
|
from ebook_converter import constants as const
|
|
from ebook_converter import prepare_string_for_xml
|
|
from ebook_converter.constants_old import __appname__, __version__
|
|
from ebook_converter.ebooks.oeb import base
|
|
from ebook_converter.ebooks.oeb import parse_utils
|
|
from ebook_converter.polyglot.binary import as_base64_unicode
|
|
from ebook_converter.utils.img import save_cover_data_to
|
|
from ebook_converter.utils.localization import lang_as_iso639_1
|
|
|
|
|
|
__license__ = 'GPL 3'
|
|
__copyright__ = '2009, John Schember <john@nachtimwald.com>'
|
|
__docformat__ = 'restructuredtext en'
|
|
|
|
|
|
class FB2MLizer(object):
|
|
'''
|
|
Todo: * Include more FB2 specific tags in the conversion.
|
|
* Handle notes and anchor links.
|
|
'''
|
|
|
|
def __init__(self, log):
|
|
self.log = log
|
|
self.reset_state()
|
|
|
|
def reset_state(self):
|
|
# Used to ensure text and tags are always within <p> and </p>
|
|
self.in_p = False
|
|
# Mapping of image names. OEB allows for images to have the same name
|
|
# but be stored in different directories. FB2 images are all in a flat
|
|
# layout so we rename all images into a sequential numbering system to
|
|
# ensure there are no collisions between image names.
|
|
self.image_hrefs = {}
|
|
# Mapping of toc items and their
|
|
self.toc = {}
|
|
# Used to see whether a new <section> needs to be opened
|
|
self.section_level = 0
|
|
|
|
def extract_content(self, oeb_book, opts):
|
|
self.log.info('Converting XHTML to FB2 markup...')
|
|
self.oeb_book = oeb_book
|
|
self.opts = opts
|
|
self.reset_state()
|
|
|
|
# Used for adding <section>s and <title>s to allow readers
|
|
# to generate toc from the document.
|
|
if self.opts.sectionize == 'toc':
|
|
self.create_flat_toc(self.oeb_book.toc, 1)
|
|
|
|
return self.fb2mlize_spine()
|
|
|
|
def fb2mlize_spine(self):
|
|
output = (
|
|
self.fb2_header(),
|
|
self.get_text(),
|
|
self.fb2mlize_images(),
|
|
self.fb2_footer(),
|
|
)
|
|
output = self.clean_text('\n'.join(output))
|
|
|
|
if self.opts.pretty_print:
|
|
output = etree.tostring(etree.fromstring(output),
|
|
encoding='unicode', pretty_print=True)
|
|
|
|
return '<?xml version="1.0" encoding="UTF-8"?>\n' + output
|
|
|
|
def clean_text(self, text):
|
|
# Remove pointless tags, but keep their contents.
|
|
text = re.sub(r'(?mu)<(strong|emphasis|strikethrough|sub|sup)>'
|
|
r'(\s*)</\1>', r'\2', text)
|
|
|
|
# Clean up paragraphs endings.
|
|
text = re.sub(r'(?mu)\s+</p>', '</p>', text)
|
|
# Condense empty paragraphs into a line break.
|
|
text = re.sub(r'(?mu)(?:<p></p>\s*){3,}', '<empty-line/>', text)
|
|
# Remove empty paragraphs.
|
|
text = re.sub(r'(?mu)<p></p>\s*', '', text)
|
|
# Put the paragraph following a paragraph on a separate line.
|
|
text = re.sub(r'(?mu)</p>\s*<p>', '</p>\n<p>', text)
|
|
|
|
if self.opts.insert_blank_line:
|
|
text = re.sub(r'(?mu)</p>', '</p><empty-line/>', text)
|
|
|
|
# Clean up title endings.
|
|
text = re.sub(r'(?mu)\s+</title>', '</title>', text)
|
|
# Remove empty title elements.
|
|
text = re.sub(r'(?mu)<title></title>\s*', '', text)
|
|
# Put the paragraph following a title on a separate line.
|
|
text = re.sub(r'(?mu)</title>\s*<p>', '</title>\n<p>', text)
|
|
|
|
# Put line breaks between paragraphs on a separate line.
|
|
text = re.sub(r'(?mu)</(p|title)>\s*<empty-line/>',
|
|
r'</\1>\n<empty-line/>', text)
|
|
text = re.sub(r'(?mu)<empty-line/>\s*<p>', '<empty-line/>\n<p>', text)
|
|
|
|
# Remove empty sections.
|
|
text = re.sub(r'(?mu)<section>\s*</section>', '', text)
|
|
# Clean up sections starts and ends.
|
|
text = re.sub(r'(?mu)\s*<section>', '\n<section>', text)
|
|
text = re.sub(r'(?mu)<section>\s*', '<section>\n', text)
|
|
text = re.sub(r'(?mu)\s*</section>', '\n</section>', text)
|
|
text = re.sub(r'(?mu)</section>\s*', '</section>\n', text)
|
|
|
|
return text
|
|
|
|
def fb2_header(self):
|
|
metadata = {}
|
|
metadata['title'] = self.oeb_book.metadata.title[0].value
|
|
metadata['appname'] = __appname__
|
|
metadata['version'] = __version__
|
|
metadata['date'] = '%i.%i.%i' % (datetime.now().day,
|
|
datetime.now().month,
|
|
datetime.now().year)
|
|
if self.oeb_book.metadata.language:
|
|
lc = lang_as_iso639_1(self.oeb_book.metadata.language[0].value)
|
|
if not lc:
|
|
lc = self.oeb_book.metadata.language[0].value
|
|
metadata['lang'] = lc or 'en'
|
|
else:
|
|
metadata['lang'] = u'en'
|
|
metadata['id'] = None
|
|
metadata['cover'] = self.get_cover()
|
|
metadata['genre'] = self.opts.fb2_genre
|
|
|
|
metadata['author'] = ''
|
|
for auth in self.oeb_book.metadata.creator:
|
|
author_first = ''
|
|
author_middle = ''
|
|
author_last = ''
|
|
author_parts = auth.value.split(' ')
|
|
if len(author_parts) == 1:
|
|
author_last = author_parts[0]
|
|
elif len(author_parts) == 2:
|
|
author_first = author_parts[0]
|
|
author_last = author_parts[1]
|
|
else:
|
|
author_first = author_parts[0]
|
|
author_middle = ' '.join(author_parts[1:-1])
|
|
author_last = author_parts[-1]
|
|
metadata['author'] += '<author>'
|
|
metadata['author'] += ('<first-name>%s</first-name>' %
|
|
prepare_string_for_xml(author_first))
|
|
if author_middle:
|
|
metadata['author'] += ('<middle-name>%s</middle-name>' %
|
|
prepare_string_for_xml(author_middle))
|
|
metadata['author'] += ('<last-name>%s</last-name>' %
|
|
prepare_string_for_xml(author_last))
|
|
metadata['author'] += '</author>'
|
|
if not metadata['author']:
|
|
metadata['author'] = ('<author><first-name></first-name>'
|
|
'<last-name></last-name></author>')
|
|
|
|
metadata['keywords'] = ''
|
|
tags = list(map(str, self.oeb_book.metadata.subject))
|
|
if tags:
|
|
tags = ', '.join(prepare_string_for_xml(x) for x in tags)
|
|
metadata['keywords'] = '<keywords>%s</keywords>' % tags
|
|
|
|
metadata['sequence'] = ''
|
|
if self.oeb_book.metadata.series:
|
|
index = '1'
|
|
if self.oeb_book.metadata.series_index:
|
|
index = self.oeb_book.metadata.series_index[0]
|
|
seq = prepare_string_for_xml(str(self.oeb_book.metadata.series[0]))
|
|
metadata['sequence'] = ('<sequence name="%s" number="%s"/>' %
|
|
(seq, index))
|
|
|
|
year = publisher = isbn = ''
|
|
identifiers = self.oeb_book.metadata['identifier']
|
|
for x in identifiers:
|
|
if (x.get(base.tag('opf', 'scheme'), None).lower() == 'uuid' or
|
|
str(x).startswith('urn:uuid:')):
|
|
metadata['id'] = str(x).split(':')[-1]
|
|
break
|
|
if metadata['id'] is None:
|
|
self.log.warn('No UUID identifier found')
|
|
metadata['id'] = str(uuid.uuid4())
|
|
|
|
try:
|
|
date = self.oeb_book.metadata['date'][0]
|
|
except IndexError:
|
|
pass
|
|
else:
|
|
year = ('<year>%s</year>' %
|
|
prepare_string_for_xml(date.value.partition('-')[0]))
|
|
|
|
try:
|
|
publisher = self.oeb_book.metadata['publisher'][0]
|
|
except IndexError:
|
|
pass
|
|
else:
|
|
publisher = ('<publisher>%s</publisher>' %
|
|
prepare_string_for_xml(publisher.value))
|
|
|
|
for x in identifiers:
|
|
if x.get(base.tag('opf', 'scheme'), None).lower() == 'isbn':
|
|
isbn = '<isbn>%s</isbn>' % prepare_string_for_xml(x.value)
|
|
|
|
metadata['year'] = year
|
|
metadata['isbn'] = isbn
|
|
metadata['publisher'] = publisher
|
|
for key, value in metadata.items():
|
|
if key not in ('author', 'cover', 'sequence', 'keywords', 'year',
|
|
'publisher', 'isbn'):
|
|
metadata[key] = prepare_string_for_xml(value)
|
|
|
|
try:
|
|
comments = self.oeb_book.metadata['description'][0]
|
|
except Exception:
|
|
metadata['comments'] = ''
|
|
else:
|
|
from ebook_converter.utils.html2text import html2text
|
|
annot = prepare_string_for_xml(html2text(comments.value).strip())
|
|
metadata['comments'] = f'<annotation><p>{annot}</p></annotation>'
|
|
|
|
# Keep the indentation level of the description the same as the body.
|
|
header = textwrap.dedent('''\
|
|
<FictionBook xmlns="http://www.gribuser.ru/xml/fictionbook/2.0" xmlns:l="http://www.w3.org/1999/xlink">
|
|
<description>
|
|
<title-info>
|
|
<genre>%(genre)s</genre>
|
|
%(author)s
|
|
<book-title>%(title)s</book-title>
|
|
%(cover)s
|
|
<lang>%(lang)s</lang>
|
|
%(keywords)s
|
|
%(sequence)s
|
|
%(comments)s
|
|
</title-info>
|
|
<document-info>
|
|
%(author)s
|
|
<program-used>%(appname)s %(version)s</program-used>
|
|
<date>%(date)s</date>
|
|
<id>%(id)s</id>
|
|
<version>1.0</version>
|
|
</document-info>
|
|
<publish-info>
|
|
%(publisher)s
|
|
%(year)s
|
|
%(isbn)s
|
|
</publish-info>
|
|
</description>''') % metadata
|
|
|
|
# Remove empty lines.
|
|
return '\n'.join(filter(str.strip, header.splitlines()))
|
|
|
|
def fb2_footer(self):
|
|
return '</FictionBook>'
|
|
|
|
def get_cover(self):
|
|
cover_href = None
|
|
|
|
# Get the raster cover if it's available.
|
|
if (self.oeb_book.metadata.cover and
|
|
str(self.oeb_book.metadata.cover[0]) in
|
|
self.oeb_book.manifest.ids):
|
|
id = str(self.oeb_book.metadata.cover[0])
|
|
cover_item = self.oeb_book.manifest.ids[id]
|
|
if cover_item.media_type in base.OEB_RASTER_IMAGES:
|
|
cover_href = cover_item.href
|
|
else:
|
|
# Figure out if we have a title page or a cover page
|
|
page_name = ''
|
|
if 'titlepage' in self.oeb_book.guide:
|
|
page_name = 'titlepage'
|
|
elif 'cover' in self.oeb_book.guide:
|
|
page_name = 'cover'
|
|
|
|
if page_name:
|
|
key = self.oeb_book.guide[page_name].href
|
|
cover_item = self.oeb_book.manifest.hrefs[key]
|
|
# Get the first image in the page
|
|
for img in cover_item.xpath('//img'):
|
|
cover_href = cover_item.abshref(img.get('src'))
|
|
break
|
|
|
|
if cover_href:
|
|
# Only write the image tag if it is in the manifest.
|
|
if (cover_href in self.oeb_book.manifest.hrefs and
|
|
cover_href not in self.image_hrefs):
|
|
self.image_hrefs[cover_href] = 'img_%s' % len(self.image_hrefs)
|
|
return ('<coverpage><image l:href="#%s"/></coverpage>' %
|
|
self.image_hrefs[cover_href])
|
|
return ''
|
|
|
|
def get_text(self):
|
|
from ebook_converter.ebooks.oeb.stylizer import Stylizer
|
|
text = ['<body>']
|
|
|
|
# Create main section if there are no others to create
|
|
if self.opts.sectionize == 'nothing':
|
|
text.append('<section>')
|
|
self.section_level += 1
|
|
|
|
for item in self.oeb_book.spine:
|
|
self.log.debug('Converting %s to FictionBook2 XML' % item.href)
|
|
stylizer = Stylizer(item.data, item.href, self.oeb_book, self.opts,
|
|
self.opts.output_profile)
|
|
|
|
# Start a <section> if we must sectionize each file or if the TOC
|
|
# references this page
|
|
page_section_open = False
|
|
if (self.opts.sectionize == 'files' or
|
|
None in self.toc.get(item.href, ())):
|
|
text.append('<section>')
|
|
page_section_open = True
|
|
self.section_level += 1
|
|
|
|
text += self.dump_text(item.data.find(base.tag('xhtml', 'body')),
|
|
stylizer, item)
|
|
|
|
if page_section_open:
|
|
text.append('</section>')
|
|
self.section_level -= 1
|
|
|
|
# Close any open sections
|
|
while self.section_level > 0:
|
|
text.append('</section>')
|
|
self.section_level -= 1
|
|
|
|
text.append('</body>')
|
|
return ''.join(text)
|
|
|
|
def fb2mlize_images(self):
|
|
"""
|
|
This function uses the self.image_hrefs dictionary mapping. It is
|
|
populated by the dump_text function.
|
|
"""
|
|
images = []
|
|
for item in self.oeb_book.manifest:
|
|
# Don't write the image if it's not referenced in the document's
|
|
# text.
|
|
if item.href not in self.image_hrefs:
|
|
continue
|
|
if item.media_type in base.OEB_RASTER_IMAGES:
|
|
try:
|
|
if item.media_type not in ('image/jpeg', 'image/png'):
|
|
imdata = save_cover_data_to(item.data,
|
|
compression_quality=70)
|
|
raw_data = as_base64_unicode(imdata)
|
|
content_type = 'image/jpeg'
|
|
else:
|
|
raw_data = as_base64_unicode(item.data)
|
|
content_type = item.media_type
|
|
# Don't put the encoded image on a single line.
|
|
step = 72
|
|
data = '\n'.join(raw_data[i:i+step]
|
|
for i in range(0, len(raw_data), step))
|
|
images.append('<binary id="%s" content-type="%s">%s'
|
|
'</binary>' % (self.image_hrefs[item.href],
|
|
content_type, data))
|
|
except Exception as e:
|
|
self.log.error('Error: Could not include file %s because '
|
|
'%s.' % (item.href, e))
|
|
return '\n'.join(images)
|
|
|
|
def create_flat_toc(self, nodes, level):
|
|
for item in nodes:
|
|
href, mid, id = item.href.partition('#')
|
|
if not id:
|
|
self.toc[href] = {None: 'page'}
|
|
else:
|
|
if not self.toc.get(href, None):
|
|
self.toc[href] = {}
|
|
self.toc[href][id] = level
|
|
self.create_flat_toc(item.nodes, level + 1)
|
|
|
|
def ensure_p(self):
|
|
if self.in_p:
|
|
return [], []
|
|
else:
|
|
self.in_p = True
|
|
return ['<p>'], ['p']
|
|
|
|
def close_open_p(self, tags):
|
|
text = ['']
|
|
added_p = False
|
|
|
|
if self.in_p:
|
|
# Close all up to p. Close p. Reopen all closed tags including p.
|
|
closed_tags = []
|
|
tags.reverse()
|
|
for t in tags:
|
|
text.append('</%s>' % t)
|
|
closed_tags.append(t)
|
|
if t == 'p':
|
|
break
|
|
closed_tags.reverse()
|
|
for t in closed_tags:
|
|
text.append('<%s>' % t)
|
|
else:
|
|
text.append('<p>')
|
|
added_p = True
|
|
self.in_p = True
|
|
|
|
return text, added_p
|
|
|
|
def handle_simple_tag(self, tag, tags):
|
|
s_out = []
|
|
s_tags = []
|
|
if tag not in tags:
|
|
p_out, p_tags = self.ensure_p()
|
|
s_out += p_out
|
|
s_tags += p_tags
|
|
s_out.append('<%s>' % tag)
|
|
s_tags.append(tag)
|
|
return s_out, s_tags
|
|
|
|
def dump_text(self, elem_tree, stylizer, page, tag_stack=[]):
|
|
"""
|
|
This function is intended to be used in a recursive manner. dump_text
|
|
will run though all elements in the elem_tree and call itself on each
|
|
element.
|
|
|
|
self.image_hrefs will be populated by calling this function.
|
|
|
|
@param elem_tree: etree representation of XHTML content to be
|
|
transformed.
|
|
@param stylizer: Used to track the style of elements within the tree.
|
|
@param page: OEB page used to determine absolute urls.
|
|
@param tag_stack: List of open FB2 tags to take into account.
|
|
|
|
@return: List of string representing the XHTML converted to FB2 markup.
|
|
"""
|
|
elem = elem_tree
|
|
|
|
# Ensure what we are converting is not a string and that the fist tag
|
|
# is part of the XHTML namespace.
|
|
if (not isinstance(elem_tree.tag, (str, bytes)) or
|
|
parse_utils.namespace(elem_tree.tag) != const.XHTML_NS):
|
|
p = elem.getparent()
|
|
if (p is not None and isinstance(p.tag, (str, bytes)) and
|
|
parse_utils.namespace(p.tag) == const.XHTML_NS and
|
|
elem.tail):
|
|
return [elem.tail]
|
|
return []
|
|
|
|
style = stylizer.style(elem_tree)
|
|
if style['display'] in ('none', 'oeb-page-head', 'oeb-page-foot') \
|
|
or style['visibility'] == 'hidden':
|
|
if hasattr(elem, 'tail') and elem.tail:
|
|
return [elem.tail]
|
|
return []
|
|
|
|
# FB2 generated output.
|
|
fb2_out = []
|
|
# FB2 tags in the order they are opened. This will be used to close
|
|
# the tags.
|
|
tags = []
|
|
# First tag in tree
|
|
tag = parse_utils.barename(elem_tree.tag)
|
|
# Number of blank lines above tag
|
|
try:
|
|
ems = int(round((float(style.marginTop) / style.fontSize) - 1))
|
|
if ems < 0:
|
|
ems = 0
|
|
except Exception:
|
|
ems = 0
|
|
|
|
# Convert TOC entries to <title>s and add <section>s
|
|
if self.opts.sectionize == 'toc':
|
|
# A section cannot be a child of any other element than another
|
|
# section, so leave the tag alone if there are parents
|
|
if not tag_stack:
|
|
# There are two reasons to start a new section here: the TOC
|
|
# pointed to this page (then we use the first non-<body> on
|
|
# the page as a <title>), or the TOC pointed to a specific
|
|
# element
|
|
newlevel = 0
|
|
toc_entry = self.toc.get(page.href, None)
|
|
if toc_entry is not None:
|
|
if None in toc_entry:
|
|
if (tag != 'body' and hasattr(elem_tree, 'text') and
|
|
elem_tree.text):
|
|
newlevel = 1
|
|
self.toc[page.href] = None
|
|
if (not newlevel and
|
|
elem_tree.attrib.get('id', None) is not None):
|
|
newlevel = toc_entry.get(elem_tree.attrib.get('id',
|
|
None),
|
|
None)
|
|
|
|
# Start a new section if necessary
|
|
if newlevel:
|
|
while newlevel <= self.section_level:
|
|
fb2_out.append('</section>')
|
|
self.section_level -= 1
|
|
fb2_out.append('<section>')
|
|
self.section_level += 1
|
|
fb2_out.append('<title>')
|
|
tags.append('title')
|
|
if self.section_level == 0:
|
|
# If none of the prior processing made a section, make one now
|
|
# to be FB2 spec compliant
|
|
fb2_out.append('<section>')
|
|
self.section_level += 1
|
|
|
|
# Process the XHTML tag and styles. Converted to an FB2 tag.
|
|
# Use individual if statement not if else. There can be only one XHTML
|
|
# tag but it can have multiple styles.
|
|
if tag == 'img' and elem_tree.attrib.get('src', None):
|
|
# Only write the image tag if it is in the manifest.
|
|
ihref = base.urlnormalize(page.abshref(elem_tree.attrib['src']))
|
|
if ihref in self.oeb_book.manifest.hrefs:
|
|
if ihref not in self.image_hrefs:
|
|
self.image_hrefs[ihref] = 'img_%s' % len(self.image_hrefs)
|
|
p_txt, p_tag = self.ensure_p()
|
|
fb2_out += p_txt
|
|
tags += p_tag
|
|
fb2_out.append('<image l:href="#%s"/>' %
|
|
self.image_hrefs[ihref])
|
|
else:
|
|
self.log.warn(u'Ignoring image not in manifest: %s' % ihref)
|
|
if tag in ('br', 'hr') or ems >= 1:
|
|
if ems < 1:
|
|
multiplier = 1
|
|
else:
|
|
multiplier = ems
|
|
if self.in_p:
|
|
closed_tags = []
|
|
open_tags = tag_stack+tags
|
|
open_tags.reverse()
|
|
for t in open_tags:
|
|
fb2_out.append('</%s>' % t)
|
|
closed_tags.append(t)
|
|
if t == 'p':
|
|
break
|
|
fb2_out.append('<empty-line/>' * multiplier)
|
|
closed_tags.reverse()
|
|
for t in closed_tags:
|
|
fb2_out.append('<%s>' % t)
|
|
else:
|
|
fb2_out.append('<empty-line/>' * multiplier)
|
|
if tag in ('div', 'li', 'p'):
|
|
p_text, added_p = self.close_open_p(tag_stack+tags)
|
|
fb2_out += p_text
|
|
if added_p:
|
|
tags.append('p')
|
|
if tag == 'a' and elem_tree.attrib.get('href', None):
|
|
# Handle only external links for now
|
|
if urllib.parse.urlparse(elem_tree.attrib['href']).netloc:
|
|
p_txt, p_tag = self.ensure_p()
|
|
fb2_out += p_txt
|
|
tags += p_tag
|
|
fb2_out.append('<a l:href="%s">' %
|
|
base.urlnormalize(elem_tree.attrib['href']))
|
|
tags.append('a')
|
|
if tag == 'b' or style['font-weight'] in ('bold', 'bolder'):
|
|
s_out, s_tags = self.handle_simple_tag('strong', tag_stack+tags)
|
|
fb2_out += s_out
|
|
tags += s_tags
|
|
if tag == 'i' or style['font-style'] == 'italic':
|
|
s_out, s_tags = self.handle_simple_tag('emphasis', tag_stack+tags)
|
|
fb2_out += s_out
|
|
tags += s_tags
|
|
if (tag in ('del', 'strike') or
|
|
style['text-decoration'] == 'line-through'):
|
|
s_out, s_tags = self.handle_simple_tag('strikethrough',
|
|
tag_stack+tags)
|
|
fb2_out += s_out
|
|
tags += s_tags
|
|
if tag == 'sub':
|
|
s_out, s_tags = self.handle_simple_tag('sub', tag_stack+tags)
|
|
fb2_out += s_out
|
|
tags += s_tags
|
|
if tag == 'sup':
|
|
s_out, s_tags = self.handle_simple_tag('sup', tag_stack+tags)
|
|
fb2_out += s_out
|
|
tags += s_tags
|
|
|
|
# Process element text.
|
|
if hasattr(elem_tree, 'text') and elem_tree.text:
|
|
if not self.in_p:
|
|
fb2_out.append('<p>')
|
|
fb2_out.append(prepare_string_for_xml(elem_tree.text))
|
|
if not self.in_p:
|
|
fb2_out.append('</p>')
|
|
|
|
# Process sub-elements.
|
|
for item in elem_tree:
|
|
fb2_out += self.dump_text(item, stylizer, page, tag_stack+tags)
|
|
|
|
# Close open FB2 tags.
|
|
tags.reverse()
|
|
fb2_out += self.close_tags(tags)
|
|
|
|
# Process element text that comes after the close of the XHTML tag but
|
|
# before the next XHTML tag.
|
|
if hasattr(elem_tree, 'tail') and elem_tree.tail:
|
|
if not self.in_p:
|
|
fb2_out.append('<p>')
|
|
fb2_out.append(prepare_string_for_xml(elem_tree.tail))
|
|
if not self.in_p:
|
|
fb2_out.append('</p>')
|
|
|
|
return fb2_out
|
|
|
|
def close_tags(self, tags):
|
|
text = []
|
|
for tag in tags:
|
|
text.append('</%s>' % tag)
|
|
if tag == 'p':
|
|
self.in_p = False
|
|
|
|
return text
|