1
0
mirror of https://github.com/gryf/ebook-converter.git synced 2026-04-24 15:11:30 +02:00

Use the real constants module.

This is progressing refactor of the calibre code to make it more
readable, and transform it to something more coherent.

In this patch, there are changes regarding imports for some modules,
instead of polluting namespace of each module with some other modules
symbols, which often were imported from other modules. Yuck.
This commit is contained in:
2020-05-29 17:04:53 +02:00
parent ee4801228f
commit ce89f5c9d1
54 changed files with 2383 additions and 2081 deletions
+44 -40
View File
@@ -10,17 +10,13 @@ from lxml.builder import ElementMaker
from ebook_converter.constants_old import __appname__, __version__
from ebook_converter.ebooks.chardet import xml_to_unicode
from ebook_converter.utils.xml_parse import safe_xml_fromstring
from ebook_converter.utils.cleantext import clean_xml_chars
from ebook_converter.polyglot.urllib import unquote
__license__ = 'GPL v3'
__copyright__ = '2010, Kovid Goyal <kovid at kovidgoyal.net>'
NCX_NS = "http://www.daisy.org/z3986/2005/ncx/"
CALIBRE_NS = "http://calibre.kovidgoyal.net/2009/metadata"
NSMAP = {None: NCX_NS, 'calibre':CALIBRE_NS}
NSMAP = {None: NCX_NS, 'calibre': CALIBRE_NS}
E = ElementMaker(namespace=NCX_NS, nsmap=NSMAP)
C = ElementMaker(namespace=CALIBRE_NS, nsmap=NSMAP)
@@ -30,8 +26,10 @@ def parse_html_toc(data):
from ebook_converter.utils.cleantext import clean_xml_chars
from lxml import etree
if isinstance(data, bytes):
data = xml_to_unicode(data, strip_encoding_pats=True, resolve_entities=True)[0]
root = parse(clean_xml_chars(data), maybe_xhtml=True, keep_doctype=False, sanitize_names=True)
data = xml_to_unicode(data, strip_encoding_pats=True,
resolve_entities=True)[0]
root = parse(clean_xml_chars(data), maybe_xhtml=True, keep_doctype=False,
sanitize_names=True)
for a in root.xpath('//*[@href and local-name()="a"]'):
purl = urllib.parse.urlparse(unquote(a.get('href')))
href, fragment = purl[2], purl[5]
@@ -48,8 +46,8 @@ def parse_html_toc(data):
class TOC(list):
def __init__(self, href=None, fragment=None, text=None, parent=None,
play_order=0, base_path=os.getcwd(), type='unknown', author=None,
description=None, toc_thumbnail=None):
play_order=0, base_path=os.getcwd(), type='unknown',
author=None, description=None, toc_thumbnail=None):
self.href = href
self.fragment = fragment
if not self.fragment:
@@ -64,7 +62,7 @@ class TOC(list):
self.toc_thumbnail = toc_thumbnail
def __str__(self):
lines = ['TOC: %s#%s %s'%(self.href, self.fragment, self.text)]
lines = ['TOC: %s#%s %s' % (self.href, self.fragment, self.text)]
for child in self:
c = str(child).splitlines()
for l in c:
@@ -91,12 +89,14 @@ class TOC(list):
entry.parent = None
def add_item(self, href, fragment, text, play_order=None, type='unknown',
author=None, description=None, toc_thumbnail=None):
author=None, description=None, toc_thumbnail=None):
if play_order is None:
play_order = (self[-1].play_order if len(self) else self.play_order) + 1
play_order = (self[-1].play_order
if len(self) else self.play_order) + 1
self.append(TOC(href=href, fragment=fragment, text=text, parent=self,
base_path=self.base_path, play_order=play_order,
type=type, author=author, description=description, toc_thumbnail=toc_thumbnail))
type=type, author=author, description=description,
toc_thumbnail=toc_thumbnail))
return self[-1]
def top_level_items(self):
@@ -121,7 +121,10 @@ class TOC(list):
@property
def abspath(self):
'Return the file this toc entry points to as a absolute path to a file on the system.'
"""
Return the file this toc entry points to as a absolute path to a file
on the system.
"""
if self.href is None:
return None
@@ -136,8 +139,9 @@ class TOC(list):
toc = toc['toc']
if toc is None:
try:
toc = opfreader.soup.find('guide').find('reference', attrs={'type':'toc'})['href']
except:
toc = (opfreader.soup.find('guide')
.find('reference', attrs={'type': 'toc'})['href'])
except Exception:
for item in opfreader.manifest:
if 'toc' in item.href().lower():
toc = item.href()
@@ -151,13 +155,15 @@ class TOC(list):
toc = os.path.join(self.base_path, toc)
try:
if not os.path.exists(toc):
bn = os.path.basename(toc)
bn = bn.replace('_top.htm', '_toc.htm') # Bug in BAEN OPF files
bn = os.path.basename(toc)
# Bug in BAEN OPF files
bn = bn.replace('_top.htm', '_toc.htm')
toc = os.path.join(os.path.dirname(toc), bn)
self.read_html_toc(toc)
except:
print('WARNING: Could not read Table of Contents. Continuing anyway.')
except Exception:
print('WARNING: Could not read Table of Contents. '
'Continuing anyway.')
else:
path = opfreader.manifest.item(toc.lower())
path = getattr(path, 'path', path)
@@ -177,9 +183,9 @@ class TOC(list):
self.base_path = os.path.dirname(toc)
if root is None:
with open(toc, 'rb') as f:
raw = xml_to_unicode(f.read(), assume_utf8=True,
strip_encoding_pats=True)[0]
root = safe_xml_fromstring(raw)
raw = xml_to_unicode(f.read(), assume_utf8=True,
strip_encoding_pats=True)[0]
root = etree.fromstring(raw)
xpn = {'re': 'http://exslt.org/regular-expressions'}
XPath = functools.partial(etree.XPath, namespaces=xpn)
@@ -197,7 +203,7 @@ class TOC(list):
def process_navpoint(np, dest):
try:
play_order = int(get_attr(np, 1))
except:
except Exception:
play_order = 1
href = fragment = text = None
nd = dest
@@ -207,7 +213,7 @@ class TOC(list):
text = ''
for txt in txt_path(nl):
text += etree.tostring(txt, method='text',
encoding='unicode', with_tail=False)
encoding='unicode', with_tail=False)
content = content_path(np)
if content and text:
content = content[0]
@@ -242,17 +248,14 @@ class TOC(list):
self.add_item(href, fragment, txt)
def render(self, stream, uid):
root = E.ncx(
E.head(
E.meta(name='dtb:uid', content=str(uid)),
E.meta(name='dtb:depth', content=str(self.depth())),
E.meta(name='dtb:generator', content='%s (%s)'%(__appname__,
__version__)),
E.meta(name='dtb:totalPageCount', content='0'),
E.meta(name='dtb:maxPageNumber', content='0'),
),
E.docTitle(E.text('Table of Contents')),
)
root = E.ncx(E.head(E.meta(name='dtb:uid', content=str(uid)),
E.meta(name='dtb:depth',
content=str(self.depth())),
E.meta(name='dtb:generator', content='%s (%s)' %
(__appname__, __version__)),
E.meta(name='dtb:totalPageCount', content='0'),
E.meta(name='dtb:maxPageNumber', content='0')),
E.docTitle(E.text('Table of Contents')))
navmap = E.navMap()
root.append(navmap)
root.set('{http://www.w3.org/XML/1998/namespace}lang', 'en')
@@ -263,12 +266,12 @@ class TOC(list):
if not text:
text = ''
c[1] += 1
item_id = 'num_%d'%c[1]
item_id = 'num_%d' % c[1]
text = clean_xml_chars(text)
elem = E.navPoint(
E.navLabel(E.text(re.sub(r'\s+', ' ', text))),
E.content(src=str(np.href)+(('#' + str(np.fragment))
if np.fragment else '')),
if np.fragment else '')),
id=item_id,
playOrder=str(np.play_order)
)
@@ -282,7 +285,8 @@ class TOC(list):
try:
elem.append(C.meta(desc, name='description'))
except ValueError:
elem.append(C.meta(clean_xml_chars(desc), name='description'))
elem.append(C.meta(clean_xml_chars(desc),
name='description'))
idx = getattr(np, 'toc_thumbnail', None)
if idx:
elem.append(C.meta(idx, name='toc_thumbnail'))
@@ -293,5 +297,5 @@ class TOC(list):
for np in self:
navpoint(navmap, np)
raw = etree.tostring(root, encoding='utf-8', xml_declaration=True,
pretty_print=True)
pretty_print=True)
stream.write(raw)