1
0
mirror of https://github.com/gryf/ebook-converter.git synced 2026-04-02 10:23:34 +02:00

Fixing leftovers from first concept of constants

This commit is contained in:
2020-06-07 11:59:00 +02:00
parent 7419954e0c
commit a69884d724
9 changed files with 652 additions and 464 deletions

View File

@@ -7,6 +7,7 @@ import urllib.parse
from ebook_converter import constants as const
from ebook_converter import guess_type, strftime
from ebook_converter.constants_old import iswindows
from ebook_converter.ebooks.oeb import base
from ebook_converter.ebooks.oeb.base import XPath, xml2text, urlnormalize
from ebook_converter.library.comments import comments_to_html, markdown
from ebook_converter.utils.date import is_date_undefined, as_local_time
@@ -371,7 +372,7 @@ def render_jacket(mi, output_profile,
# We cannot use data-calibre-rescale 100 on the body tag as that will just
# give the body tag a font size of 1em, which is useless.
for body in root.xpath('//*[local-name()="body"]'):
fw = body.makeelement(const.XHTML_DIV)
fw = body.makeelement(base.tag('xhtml', 'div'))
fw.set('data-calibre-rescale', '100')
for child in body:
fw.append(child)
@@ -388,9 +389,9 @@ def linearize_jacket(oeb):
for x in oeb.spine[:4]:
if XPath(JACKET_XPATH)(x.data):
for e in XPath('//h:table|//h:tr|//h:th')(x.data):
e.tag = const.XHTML_DIV
e.tag = base.tag('xhtml', 'div')
for e in XPath('//h:td')(x.data):
e.tag = const.XHTML_SPAN
e.tag = base.tag('xhtml', 'span')
break

View File

@@ -3,8 +3,11 @@ Splitting of the XHTML flows. Splitting can happen on page boundaries or can be
forced at "likely" locations to conform to size limitations. This transform
assumes a prior call to the flatcss transform.
"""
import os, functools, collections, re, copy
from collections import OrderedDict
import collections
import copy
import functools
import os
import re
import urllib.parse
from lxml.etree import XPath as _XPath
@@ -13,8 +16,7 @@ from lxml import etree
from ebook_converter import constants as const
from ebook_converter import as_unicode, force_unicode
from ebook_converter.ebooks.epub import rules
from ebook_converter.ebooks.oeb.base import \
OEB_STYLES, rewrite_links, urlnormalize
from ebook_converter.ebooks.oeb import base
from ebook_converter.ebooks.oeb.polish.split import do_split
from ebook_converter.polyglot.urllib import unquote
from ebook_converter.css_selectors import Select, SelectorError
@@ -44,14 +46,15 @@ class SplitError(ValueError):
class Split(object):
def __init__(self, split_on_page_breaks=True, page_breaks_xpath=None,
max_flow_size=0, remove_css_pagebreaks=True):
max_flow_size=0, remove_css_pagebreaks=True):
self.split_on_page_breaks = split_on_page_breaks
self.page_breaks_xpath = page_breaks_xpath
self.max_flow_size = max_flow_size
self.page_break_selectors = None
self.remove_css_pagebreaks = remove_css_pagebreaks
if self.page_breaks_xpath is not None:
self.page_break_selectors = [(XPath(self.page_breaks_xpath), False)]
self.page_break_selectors = [(XPath(self.page_breaks_xpath),
False)]
def __call__(self, oeb, opts):
self.oeb = oeb
@@ -71,7 +74,7 @@ class Split(object):
page_breaks, page_break_ids = self.find_page_breaks(item)
splitter = FlowSplitter(item, page_breaks, page_break_ids,
self.max_flow_size, self.oeb, self.opts)
self.max_flow_size, self.oeb, self.opts)
if splitter.was_split:
am = splitter.anchor_map
self.map[item.href] = collections.defaultdict(
@@ -81,25 +84,27 @@ class Split(object):
if self.page_break_selectors is None:
self.page_break_selectors = set()
stylesheets = [x.data for x in self.oeb.manifest if x.media_type in
OEB_STYLES]
base.OEB_STYLES]
for rule in rules(stylesheets):
before = force_unicode(getattr(rule.style.getPropertyCSSValue(
'page-break-before'), 'cssText', '').strip().lower())
after = force_unicode(getattr(rule.style.getPropertyCSSValue(
after = force_unicode(getattr(rule.style.getPropertyCSSValue(
'page-break-after'), 'cssText', '').strip().lower())
try:
if before and before not in {'avoid', 'auto', 'inherit'}:
self.page_break_selectors.add((rule.selectorText, True))
self.page_break_selectors.add((rule.selectorText,
True))
if self.remove_css_pagebreaks:
rule.style.removeProperty('page-break-before')
except:
except Exception:
pass
try:
if after and after not in {'avoid', 'auto', 'inherit'}:
self.page_break_selectors.add((rule.selectorText, False))
self.page_break_selectors.add((rule.selectorText,
False))
if self.remove_css_pagebreaks:
rule.style.removeProperty('page-break-after')
except:
except Exception:
pass
page_breaks = set()
select = Select(item.data)
@@ -110,14 +115,18 @@ class Split(object):
return [], []
descendants = frozenset(body[0].iterdescendants('*'))
_tags = {'html', 'body', 'head', 'style', 'script', 'meta', 'link'}
for selector, before in self.page_break_selectors:
try:
for elem in select(selector):
if elem in descendants and elem.tag.rpartition('}')[2].lower() not in {'html', 'body', 'head', 'style', 'script', 'meta', 'link'}:
if (elem in descendants and
elem.tag.rpartition('}')[2].lower() not in _tags):
elem.set('pb_before', '1' if before else '0')
page_breaks.add(elem)
except SelectorError as err:
self.log.warn('Ignoring page breaks specified with invalid CSS selector: %r (%s)' % (selector, as_unicode(err)))
self.log.warn('Ignoring page breaks specified with invalid '
'CSS selector: %r (%s)' %
(selector, as_unicode(err)))
for i, elem in enumerate(item.data.iter('*')):
try:
@@ -126,23 +135,23 @@ class Split(object):
continue
page_breaks = list(page_breaks)
page_breaks.sort(key=lambda x:int(x.get('pb_order')))
page_breaks.sort(key=lambda x: int(x.get('pb_order')))
page_break_ids, page_breaks_ = [], []
for i, x in enumerate(page_breaks):
x.set('id', x.get('id', 'calibre_pb_%d'%i))
x.set('id', x.get('id', 'calibre_pb_%d' % i))
id = x.get('id')
try:
xp = XPath('//*[@id="%s"]'%id)
except:
xp = XPath('//*[@id="%s"]' % id)
except Exception:
try:
xp = XPath("//*[@id='%s']"%id)
except:
xp = XPath("//*[@id='%s']" % id)
except Exception:
# The id has both a quote and an apostrophe or some other
# Just replace it since I doubt its going to work anywhere else
# either
id = 'calibre_pb_%d'%i
# Just replace it since I doubt its going to work anywhere
# else either
id = 'calibre_pb_%d' % i
x.set('id', id)
xp = XPath('//*[@id=%r]'%id)
xp = XPath('//*[@id=%r]' % id)
page_breaks_.append((xp, x.get('pb_before', '0') == '1'))
page_break_ids.append(id)
@@ -159,7 +168,7 @@ class Split(object):
for item in self.oeb.manifest:
if etree.iselement(item.data):
self.current_item = item
rewrite_links(item.data, self.rewrite_links)
base.rewrite_links(item.data, self.rewrite_links)
def rewrite_links(self, url):
href, frag = urllib.parse.urldefrag(url)
@@ -169,7 +178,7 @@ class Split(object):
# Unparseable URL
return url
try:
href = urlnormalize(href)
href = base.urlnormalize(href)
except ValueError:
# href has non utf-8 quoting
return url
@@ -188,19 +197,19 @@ class FlowSplitter(object):
'The actual splitting logic'
def __init__(self, item, page_breaks, page_break_ids, max_flow_size, oeb,
opts):
self.item = item
self.oeb = oeb
self.opts = opts
self.log = oeb.log
self.page_breaks = page_breaks
opts):
self.item = item
self.oeb = oeb
self.opts = opts
self.log = oeb.log
self.page_breaks = page_breaks
self.page_break_ids = page_break_ids
self.max_flow_size = max_flow_size
self.base = item.href
self.csp_counter = 0
self.max_flow_size = max_flow_size
self.base = item.href
self.csp_counter = 0
base, ext = os.path.splitext(self.base)
self.base = base.replace('%', '%%')+'_split_%.3d'+ext
name, ext = os.path.splitext(self.base)
self.base = name.replace('%', '%%') + '_split_%.3d' + ext
self.trees = [self.item.data.getroottree()]
self.splitting_on_page_breaks = True
@@ -210,13 +219,13 @@ class FlowSplitter(object):
if self.max_flow_size > 0:
lt_found = False
self.log('\tLooking for large trees in %s...'%item.href)
self.log('\tLooking for large trees in %s...' % item.href)
trees = list(self.trees)
self.tree_map = {}
for i, tree in enumerate(trees):
size = len(tostring(tree.getroot()))
if size > self.max_flow_size:
self.log('\tFound large tree #%d'%i)
self.log('\tFound large tree #%d' % i)
lt_found = True
self.split_trees = []
self.split_to_size(tree)
@@ -229,11 +238,11 @@ class FlowSplitter(object):
self.was_split = len(self.trees) > 1
if self.was_split:
self.log('\tSplit into %d parts'%len(self.trees))
self.log('\tSplit into %d parts' % len(self.trees))
self.commit()
def split_on_page_breaks(self, orig_tree):
ordered_ids = OrderedDict()
ordered_ids = collections.OrderedDict()
all_page_break_ids = frozenset(self.page_break_ids)
for elem_id in orig_tree.xpath('//*/@id'):
if elem_id in all_page_break_ids:
@@ -248,9 +257,10 @@ class FlowSplitter(object):
tree = self.trees[i]
elem = pattern(tree)
if elem:
self.log.debug('\t\tSplitting on page-break at id=%s'%
elem[0].get('id'))
before_tree, after_tree = self.do_split(tree, elem[0], before)
self.log.debug('\t\tSplitting on page-break at id=%s' %
elem[0].get('id'))
before_tree, after_tree = self.do_split(tree, elem[0],
before)
self.trees[i:i+1] = [before_tree, after_tree]
break
@@ -269,7 +279,11 @@ class FlowSplitter(object):
if body is not None:
existing_ids = frozenset(body.xpath('//*/@id'))
for x in ids - existing_ids:
body.insert(0, body.makeelement(const.XHTML_div, id=x, style='height:0pt'))
body.insert(0,
body.makeelement(base.tag('xhtml',
'div'),
id=x,
style='height:0pt'))
ids = set()
trees.append(tree)
self.trees = trees
@@ -281,12 +295,13 @@ class FlowSplitter(object):
return body[0]
def do_split(self, tree, split_point, before):
'''
"""
Split ``tree`` into a *before* and *after* tree at ``split_point``.
:param before: If True tree is split before split_point, otherwise after split_point
:param before: If True tree is split before split_point, otherwise
after split_point
:return: before_tree, after_tree
'''
"""
return do_split(split_point, self.log, before=before)
def is_page_empty(self, root):
@@ -294,7 +309,7 @@ class FlowSplitter(object):
if body is None:
return False
txt = re.sub(r'\s+|\xa0', '',
etree.tostring(body, method='text', encoding='unicode'))
etree.tostring(body, method='text', encoding='unicode'))
if len(txt) > 1:
return False
for img in root.xpath('//h:img', namespaces=const.XPNSMAP):
@@ -305,13 +320,13 @@ class FlowSplitter(object):
return True
def split_text(self, text, root, size):
self.log.debug('\t\t\tSplitting text of length: %d'%len(text))
self.log.debug('\t\t\tSplitting text of length: %d' % len(text))
rest = text.replace('\r', '')
parts = re.split('\n\n', rest)
self.log.debug('\t\t\t\tFound %d parts'%len(parts))
self.log.debug('\t\t\t\tFound %d parts' % len(parts))
if max(map(len, parts)) > size:
raise SplitError('Cannot split as file contains a <pre> tag '
'with a very large paragraph', root)
'with a very large paragraph', root)
ans = []
buf = ''
for part in parts:
@@ -331,7 +346,8 @@ class FlowSplitter(object):
continue
if pre.text and len(pre.text) > self.max_flow_size*0.5:
self.log.debug('\t\tSplitting large <pre> tag')
frags = self.split_text(pre.text, root, int(0.2*self.max_flow_size))
frags = self.split_text(pre.text, root,
int(0.2 * self.max_flow_size))
new_pres = []
for frag in frags:
pre2 = copy.copy(pre)
@@ -346,7 +362,8 @@ class FlowSplitter(object):
split_point, before = self.find_split_point(root)
if split_point is None:
raise SplitError(self.item.href, root)
self.log.debug('\t\t\tSplit point:', split_point.tag, tree.getpath(split_point))
self.log.debug('\t\t\tSplit point:', split_point.tag,
tree.getpath(split_point))
trees = self.do_split(tree, split_point, before)
sizes = [len(tostring(t.getroot())) for t in trees]
@@ -361,12 +378,11 @@ class FlowSplitter(object):
continue
elif size <= self.max_flow_size:
self.split_trees.append(t)
self.log.debug(
'\t\t\tCommitted sub-tree #%d (%d KB)'%(
len(self.split_trees), size/1024.))
self.log.debug('\t\t\tCommitted sub-tree #%d (%d KB)' %
(len(self.split_trees), size/1024.))
else:
self.log.debug(
'\t\t\tSplit tree still too large: %d KB' % (size/1024.))
self.log.debug('\t\t\tSplit tree still too large: %d KB' %
size/1024)
self.split_to_size(t)
def find_split_point(self, root):
@@ -385,8 +401,8 @@ class FlowSplitter(object):
'''
def pick_elem(elems):
if elems:
elems = [i for i in elems if i.get(SPLIT_POINT_ATTR, '0') !=
'1']
elems = [i for i in elems
if i.get(SPLIT_POINT_ATTR, '0') != '1']
if elems:
i = int(len(elems)//2)
elems[i].set(SPLIT_POINT_ATTR, '1')
@@ -407,7 +423,7 @@ class FlowSplitter(object):
if elem is not None:
try:
XPath(elem.getroottree().getpath(elem))
except:
except Exception:
continue
return elem, True
@@ -421,23 +437,24 @@ class FlowSplitter(object):
'''
if not self.was_split:
return
self.anchor_map = collections.defaultdict(lambda :self.base%0)
self.anchor_map = collections.defaultdict(lambda: self.base % 0)
self.files = []
for i, tree in enumerate(self.trees):
root = tree.getroot()
self.files.append(self.base%i)
self.files.append(self.base % i)
for elem in root.xpath('//*[@id or @name]'):
for anchor in elem.get('id', ''), elem.get('name', ''):
if anchor != '' and anchor not in self.anchor_map:
self.anchor_map[anchor] = self.files[-1]
for elem in root.xpath('//*[@%s]'%SPLIT_POINT_ATTR):
for elem in root.xpath('//*[@%s]' % SPLIT_POINT_ATTR):
elem.attrib.pop(SPLIT_POINT_ATTR, '0')
spine_pos = self.item.spine_position
for current, tree in zip(*map(reversed, (self.files, self.trees))):
for a in tree.getroot().xpath('//h:a[@href]', namespaces=const.XPNSMAP):
for a in tree.getroot().xpath('//h:a[@href]',
namespaces=const.XPNSMAP):
href = a.get('href').strip()
if href.startswith('#'):
anchor = href[1:]
@@ -448,7 +465,8 @@ class FlowSplitter(object):
new_id = self.oeb.manifest.generate(id=self.item.id)[0]
new_item = self.oeb.manifest.add(new_id, current,
self.item.media_type, data=tree.getroot())
self.item.media_type,
data=tree.getroot())
self.oeb.spine.insert(spine_pos, new_item, self.item.linear)
if self.oeb.guide:

View File

@@ -7,7 +7,7 @@ from lxml import etree
from ebook_converter import constants as const
from ebook_converter.ebooks.oeb import parse_utils
from ebook_converter.ebooks.oeb.base import TOC, xml2text
from ebook_converter.ebooks.oeb import base
from ebook_converter.ebooks import ConversionError
@@ -15,8 +15,8 @@ def XPath(x):
try:
return etree.XPath(x, namespaces=const.XPNSMAP)
except etree.XPathSyntaxError:
raise ConversionError(
'The syntax of the XPath expression %s is invalid.' % repr(x))
raise ConversionError('The syntax of the XPath expression %s is '
'invalid.' % repr(x))
def isspace(x):
@@ -33,9 +33,13 @@ def at_start(elem):
for x in body.iter():
if x is elem:
return True
if hasattr(getattr(x, 'tag', None), 'rpartition') and x.tag.rpartition('}')[-1] in {'img', 'svg'}:
if hasattr(getattr(x, 'tag', None),
'rpartition') and x.tag.rpartition('}')[-1] in {'img',
'svg'}:
return False
if isspace(getattr(x, 'text', None)) and (x in ancestors or isspace(getattr(x, 'tail', None))):
if isspace(getattr(x, 'text', None)) and (x in ancestors or
isspace(getattr(x, 'tail',
None))):
continue
return False
return False
@@ -52,7 +56,7 @@ class DetectStructure(object):
self.detect_chapters()
if self.oeb.auto_generated_toc or opts.use_auto_toc:
orig_toc = self.oeb.toc
self.oeb.toc = TOC()
self.oeb.toc = base.TOC()
self.create_level_based_toc()
if self.oeb.toc.count() < 1:
if not opts.no_chapters_in_toc and self.detected_chapters:
@@ -64,14 +68,14 @@ class DetectStructure(object):
else:
self.oeb.auto_generated_toc = True
self.log('Auto generated TOC with %d entries.' %
self.oeb.toc.count())
self.oeb.toc.count())
if opts.toc_filter is not None:
regexp = re.compile(opts.toc_filter)
for node in list(self.oeb.toc.iter()):
if not node.title or regexp.search(node.title) is not None:
self.log('Filtering', node.title if node.title else
'empty node', 'from TOC')
'empty node', 'from TOC')
self.oeb.toc.remove(node)
if opts.page_breaks_before is not None:
@@ -80,10 +84,11 @@ class DetectStructure(object):
for elem in pb_xpath(item.data):
try:
prev = next(elem.itersiblings(tag=etree.Element,
preceding=True))
if (parse_utils.barename(elem.tag) in {'h1', 'h2'} and parse_utils.barename(
prev.tag) in {'h1', 'h2'} and (not prev.tail or
not prev.tail.split())):
preceding=True))
if (parse_utils.barename(elem.tag) in {'h1', 'h2'} and
parse_utils.barename(prev.tag) in {'h1',
'h2'} and
(not prev.tail or not prev.tail.split())):
# We have two adjacent headings, do not put a page
# break on the second one
continue
@@ -106,9 +111,9 @@ class DetectStructure(object):
expr = self.opts.start_reading_at
try:
expr = XPath(expr)
except:
self.log.warn(
'Invalid start reading at XPath expression, ignoring: %s'%expr)
except Exception:
self.log.warn('Invalid start reading at XPath expression, '
'ignoring: %s' % expr)
return
for item in self.oeb.spine:
if not hasattr(item.data, 'xpath'):
@@ -118,16 +123,17 @@ class DetectStructure(object):
elem = matches[0]
eid = elem.get('id', None)
if not eid:
eid = 'start_reading_at_'+str(uuid.uuid4()).replace('-', '')
eid = 'start_reading_at_' + str(uuid.uuid4()).replace('-',
'')
elem.set('id', eid)
if 'text' in self.oeb.guide:
self.oeb.guide.remove('text')
self.oeb.guide.add('text', 'Start', item.href+'#'+eid)
self.log('Setting start reading at position to %s in %s'%(
self.opts.start_reading_at, item.href))
self.log('Setting start reading at position to %s in %s' %
(self.opts.start_reading_at, item.href))
return
self.log.warn("Failed to find start reading at position: %s"%
self.opts.start_reading_at)
self.log.warn("Failed to find start reading at position: %s" %
self.opts.start_reading_at)
def get_toc_parts_for_xpath(self, expr):
# if an attribute is selected by the xpath expr then truncate it
@@ -148,12 +154,14 @@ class DetectStructure(object):
ans = XPath(expr)(doc)
len(ans)
return ans
except:
self.log.warn('Invalid chapter expression, ignoring: %s'%expr)
except Exception:
self.log.warn('Invalid chapter expression, ignoring: %s' %
expr)
return []
if self.opts.chapter:
chapter_path, title_attribute = self.get_toc_parts_for_xpath(self.opts.chapter)
chapter_path, title_attribute = (
self.get_toc_parts_for_xpath(self.opts.chapter))
self.chapter_title_attribute = title_attribute
for item in self.oeb.spine:
for x in find_matches(chapter_path, item.data):
@@ -165,25 +173,28 @@ class DetectStructure(object):
c = collections.Counter()
for item, elem in self.detected_chapters:
c[item] += 1
text = xml2text(elem).strip()
text = base.xml2text(elem).strip()
text = re.sub(r'\s+', ' ', text.strip())
self.log('\tDetected chapter:', text[:50])
if chapter_mark == 'none':
continue
if chapter_mark == 'rule':
mark = elem.makeelement(const.XHTML_HR)
mark = elem.makeelement(base.tag('xhtml', 'hr'))
elif chapter_mark == 'pagebreak':
if c[item] < 3 and at_start(elem):
# For the first two elements in this item, check if they
# are at the start of the file, in which case inserting a
# page break in unnecessary and can lead to extra blank
# pages in the PDF Output plugin. We need to use two as
# feedbooks epubs match both a heading tag and its
# containing div with the default chapter expression.
# For the first two elements in this item, check if
# they are at the start of the file, in which case
# inserting a page break in unnecessary and can lead
# to extra blank pages in the PDF Output plugin. We
# need to use two as feedbooks epubs match both a
# heading tag and its containing div with the default
# chapter expression.
continue
mark = elem.makeelement(const.XHTML_DIV, style=page_break_after)
mark = elem.makeelement(base.tag('xhtml', 'div'),
style=page_break_after)
else: # chapter_mark == 'both':
mark = elem.makeelement(const.XHTML_HR, style=page_break_before)
mark = elem.makeelement(base.tag('xhtml', 'hr'),
style=page_break_before)
try:
elem.addprevious(mark)
except TypeError:
@@ -196,7 +207,9 @@ class DetectStructure(object):
def create_toc_from_chapters(self):
counter = self.oeb.toc.next_play_order()
for item, elem in self.detected_chapters:
text, href = self.elem_to_link(item, elem, self.chapter_title_attribute, counter)
text, href = self.elem_to_link(item, elem,
self.chapter_title_attribute,
counter)
self.oeb.toc.add(text, href, play_order=counter)
counter += 1
@@ -216,18 +229,21 @@ class DetectStructure(object):
if frag:
href = '#'.join((href, frag))
if not self.oeb.toc.has_href(href):
text = xml2text(a)
text = base.xml2text(a)
text = text[:100].strip()
if (not self.opts.duplicate_links_in_toc and
self.oeb.toc.has_text(text)):
continue
try:
self.oeb.toc.add(text, href,
self.oeb.toc.add(
text, href,
play_order=self.oeb.toc.next_play_order())
num += 1
except ValueError:
self.oeb.log.exception('Failed to process link: %r' % href)
continue # Most likely an incorrectly URL encoded link
self.oeb.log.exception('Failed to process link: '
'%r' % href)
# Most likely an incorrectly URL encoded link
continue
if self.opts.max_toc_links > 0 and \
num >= self.opts.max_toc_links:
self.log('Maximum TOC links reached, stopping.')
@@ -238,14 +254,14 @@ class DetectStructure(object):
if title_attribute is not None:
text = elem.get(title_attribute, '')
if not text:
text = xml2text(elem).strip()
text = base.xml2text(elem).strip()
if not text:
text = elem.get('title', '')
if not text:
text = elem.get('alt', '')
text = re.sub(r'\s+', ' ', text.strip())
text = text[:1000].strip()
id = elem.get('id', 'calibre_toc_%d'%counter)
id = elem.get('id', 'calibre_toc_%d' % counter)
elem.set('id', id)
href = '#'.join((item.href, id))
return text, href
@@ -260,26 +276,29 @@ class DetectStructure(object):
ans = XPath(expr)(doc)
len(ans)
return ans
except:
self.log.warn('Invalid ToC expression, ignoring: %s'%expr)
except Exception:
self.log.warn('Invalid ToC expression, ignoring: %s' % expr)
return []
for document in self.oeb.spine:
previous_level1 = list(added.values())[-1] if added else None
previous_level2 = list(added2.values())[-1] if added2 else None
level1_toc, level1_title = self.get_toc_parts_for_xpath(self.opts.level1_toc)
(level1_toc,
level1_title) = self.get_toc_parts_for_xpath(self.opts.level1_toc)
for elem in find_matches(level1_toc, document.data):
text, _href = self.elem_to_link(document, elem, level1_title, counter)
text, _href = self.elem_to_link(document, elem, level1_title,
counter)
counter += 1
if text:
node = self.oeb.toc.add(text, _href,
play_order=self.oeb.toc.next_play_order())
node = self.oeb.toc.add(
text, _href, play_order=self.oeb.toc.next_play_order())
added[elem] = node
# node.add('Top', _href)
if self.opts.level2_toc is not None and added:
level2_toc, level2_title = self.get_toc_parts_for_xpath(self.opts.level2_toc)
level2_toc, level2_title = self.get_toc_parts_for_xpath(
self.opts.level2_toc)
for elem in find_matches(level2_toc, document.data):
level1 = None
for item in document.data.iterdescendants():
@@ -290,15 +309,19 @@ class DetectStructure(object):
if previous_level1 is None:
break
level1 = previous_level1
text, _href = self.elem_to_link(document, elem, level2_title, counter)
text, _href = self.elem_to_link(document, elem,
level2_title,
counter)
counter += 1
if text:
added2[elem] = level1.add(text, _href,
added2[elem] = level1.add(
text, _href,
play_order=self.oeb.toc.next_play_order())
break
if self.opts.level3_toc is not None and added2:
level3_toc, level3_title = self.get_toc_parts_for_xpath(self.opts.level3_toc)
level3_toc, level3_title = self.get_toc_parts_for_xpath(
self.opts.level3_toc)
for elem in find_matches(level3_toc, document.data):
level2 = None
for item in document.data.iterdescendants():
@@ -309,10 +332,13 @@ class DetectStructure(object):
if previous_level2 is None:
break
level2 = previous_level2
text, _href = \
self.elem_to_link(document, elem, level3_title, counter)
text, _href = self.elem_to_link(document,
elem,
level3_title,
counter)
counter += 1
if text:
level2.add(text, _href,
play_order=self.oeb.toc.next_play_order())
play_order=self.oeb
.toc.next_play_order())
break