1
0
mirror of https://github.com/gryf/ebook-converter.git synced 2026-03-28 06:23:32 +01:00

Use the real constants module.

This is progressing refactor of the calibre code to make it more
readable, and transform it to something more coherent.

In this patch, there are changes regarding imports for some modules,
instead of polluting namespace of each module with some other modules
symbols, which often were imported from other modules. Yuck.
This commit is contained in:
2020-05-29 17:04:53 +02:00
parent ee4801228f
commit ce89f5c9d1
54 changed files with 2383 additions and 2081 deletions

View File

@@ -1,14 +1,19 @@
import os, re, posixpath
from itertools import cycle
import hashlib
import itertools
import os
import re
import traceback
import uuid
from ebook_converter.customize.conversion import InputFormatPlugin, OptionRecommendation
from lxml import etree
from ebook_converter.ebooks.metadata import opf2 as opf_meta
from ebook_converter.ebooks.oeb import base
from ebook_converter.customize.conversion import InputFormatPlugin
from ebook_converter.customize.conversion import OptionRecommendation
__license__ = 'GPL 3'
__copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>'
__docformat__ = 'restructuredtext en'
ADOBE_OBFUSCATION = 'http://ns.adobe.com/pdf/enc#RC'
ADOBE_OBFUSCATION = 'http://ns.adobe.com/pdf/enc#RC'
IDPF_OBFUSCATION = 'http://www.idpf.org/2008/embedding'
@@ -16,8 +21,8 @@ def decrypt_font_data(key, data, algorithm):
is_adobe = algorithm == ADOBE_OBFUSCATION
crypt_len = 1024 if is_adobe else 1040
crypt = bytearray(data[:crypt_len])
key = cycle(iter(bytearray(key)))
decrypt = bytes(bytearray(x^next(key) for x in crypt))
key = itertools.cycle(iter(bytearray(key)))
decrypt = bytes(bytearray(x ^ next(key) for x in crypt))
return decrypt + data[crypt_len:]
@@ -29,18 +34,16 @@ def decrypt_font(key, path, algorithm):
class EPUBInput(InputFormatPlugin):
name = 'EPUB Input'
author = 'Kovid Goyal'
name = 'EPUB Input'
author = 'Kovid Goyal'
description = 'Convert EPUB files (.epub) to HTML'
file_types = {'epub'}
file_types = {'epub'}
output_encoding = None
commit_name = 'epub_input'
recommendations = {('page_breaks_before', '/', OptionRecommendation.MED)}
def process_encryption(self, encfile, opf, log):
from lxml import etree
import uuid, hashlib
idpf_key = opf.raw_unique_identifier
if idpf_key:
idpf_key = re.sub('[\u0020\u0009\u000d\u000a]', '', idpf_key)
@@ -56,27 +59,28 @@ class EPUBInput(InputFormatPlugin):
try:
key = item.text.rpartition(':')[-1]
key = uuid.UUID(key).bytes
except:
import traceback
except Exception:
traceback.print_exc()
key = None
try:
root = etree.parse(encfile)
for em in root.xpath('descendant::*[contains(name(), "EncryptionMethod")]'):
for em in root.xpath('descendant::*[contains(name(), '
'"EncryptionMethod")]'):
algorithm = em.get('Algorithm', '')
if algorithm not in {ADOBE_OBFUSCATION, IDPF_OBFUSCATION}:
return False
cr = em.getparent().xpath('descendant::*[contains(name(), "CipherReference")]')[0]
cr = em.getparent().xpath('descendant::*[contains(name(), '
'"CipherReference")]')[0]
uri = cr.get('URI')
path = os.path.abspath(os.path.join(os.path.dirname(encfile), '..', *uri.split('/')))
path = os.path.abspath(os.path.join(os.path.dirname(encfile),
'..', *uri.split('/')))
tkey = (key if algorithm == ADOBE_OBFUSCATION else idpf_key)
if (tkey and os.path.exists(path)):
self._encrypted_font_uris.append(uri)
decrypt_font(tkey, path, algorithm)
return True
except:
import traceback
except Exception:
traceback.print_exc()
return False
@@ -97,8 +101,11 @@ class EPUBInput(InputFormatPlugin):
return t
def rationalize_cover3(self, opf, log):
''' If there is a reference to the cover/titlepage via manifest properties, convert to
entries in the <guide> so that the rest of the pipeline picks it up. '''
"""
If there is a reference to the cover/titlepage via manifest
properties, convert to entries in the <guide> so that the rest of the
pipeline picks it up.
"""
from ebook_converter.ebooks.metadata.opf3 import items_with_property
removed = guide_titlepage_href = guide_titlepage_id = None
@@ -128,7 +135,8 @@ class EPUBInput(InputFormatPlugin):
titlepage_id, titlepage_href = tid, href.partition('#')[0]
break
if titlepage_href is None:
titlepage_href, titlepage_id = guide_titlepage_href, guide_titlepage_id
titlepage_href = guide_titlepage_href
titlepage_id = guide_titlepage_id
if titlepage_href is not None:
self.set_guide_type(opf, 'titlepage', titlepage_href, 'Title Page')
spine = list(opf.iterspine())
@@ -148,7 +156,6 @@ class EPUBInput(InputFormatPlugin):
means, at most one entry with type="cover" that points to a raster
cover and at most one entry with type="titlepage" that points to an
HTML titlepage. '''
from ebook_converter.ebooks.oeb.base import OPF
removed = None
from lxml import etree
guide_cover, guide_elem = None, None
@@ -160,12 +167,14 @@ class EPUBInput(InputFormatPlugin):
raster_cover = opf.raster_cover
if raster_cover:
if guide_elem is None:
g = opf.root.makeelement(OPF('guide'))
g = opf.root.makeelement(base.tag('opf', 'guide'))
opf.root.append(g)
else:
g = guide_elem.getparent()
guide_cover = raster_cover
guide_elem = g.makeelement(OPF('reference'), attrib={'href':raster_cover, 'type':'cover'})
guide_elem = g.makeelement(base.tag('opf', 'reference'),
attrib={'href': raster_cover,
'type': 'cover'})
g.append(guide_elem)
return
spine = list(opf.iterspine())
@@ -186,7 +195,8 @@ class EPUBInput(InputFormatPlugin):
# specially
if not self.for_viewer:
if len(spine) == 1:
log.warn('There is only a single spine item and it is marked as the cover. Removing cover marking.')
log.warn('There is only a single spine item and it is marked '
'as the cover. Removing cover marking.')
for guide_elem in tuple(opf.iterguide()):
if guide_elem.get('type', '').lower() == 'cover':
guide_elem.getparent().remove(guide_elem)
@@ -215,8 +225,9 @@ class EPUBInput(InputFormatPlugin):
# Render the titlepage to create a raster cover
from ebook_converter.ebooks import render_html_svg_workaround
guide_elem.set('href', 'calibre_raster_cover.jpg')
t = etree.SubElement(
elem[0].getparent(), OPF('item'), href=guide_elem.get('href'), id='calibre_raster_cover')
t = etree.SubElement(elem[0].getparent(), base.tag('opf', 'item'),
href=guide_elem.get('href'),
id='calibre_raster_cover')
t.set('media-type', 'image/jpeg')
if os.path.exists(guide_cover):
renderer = render_html_svg_workaround(guide_cover, log)
@@ -229,17 +240,16 @@ class EPUBInput(InputFormatPlugin):
return removed
def find_opf(self):
from ebook_converter.utils.xml_parse import safe_xml_fromstring
def attr(n, attr):
for k, v in n.attrib.items():
if k.endswith(attr):
return v
try:
with open('META-INF/container.xml', 'rb') as f:
root = safe_xml_fromstring(f.read())
root = etree.fromstring(f.read())
for r in root.xpath('//*[local-name()="rootfile"]'):
if attr(r, 'media-type') != "application/oebps-package+xml":
if (attr(r, 'media-type') !=
"application/oebps-package+xml"):
continue
path = attr(r, 'full-path')
if not path:
@@ -248,20 +258,18 @@ class EPUBInput(InputFormatPlugin):
if os.path.exists(path):
return path
except Exception:
import traceback
traceback.print_exc()
def convert(self, stream, options, file_ext, log, accelerators):
from ebook_converter.utils.zipfile import ZipFile
from ebook_converter import walk
from ebook_converter.ebooks import DRMError
from ebook_converter.ebooks.metadata.opf2 import OPF
try:
zf = ZipFile(stream)
zf.extractall(os.getcwd())
except:
except Exception:
log.exception('EPUB appears to be invalid ZIP file, trying a'
' more forgiving ZIP parser')
' more forgiving ZIP parser')
from ebook_converter.utils.localunzip import extractall
stream.seek(0)
extractall(stream)
@@ -276,11 +284,12 @@ class EPUBInput(InputFormatPlugin):
path = getattr(stream, 'name', 'stream')
if opf is None:
raise ValueError('%s is not a valid EPUB file (could not find opf)'%path)
raise ValueError('%s is not a valid EPUB file (could not find '
'opf)' % path)
opf = os.path.relpath(opf, os.getcwd())
parts = os.path.split(opf)
opf = OPF(opf, os.path.dirname(os.path.abspath(opf)))
# parts = os.path.split(opf)
opf = opf_meta.OPF(opf, os.path.dirname(os.path.abspath(opf)))
self._encrypted_font_uris = []
if os.path.exists(encfile):
@@ -288,18 +297,23 @@ class EPUBInput(InputFormatPlugin):
raise DRMError(os.path.basename(path))
self.encrypted_fonts = self._encrypted_font_uris
if len(parts) > 1 and parts[0]:
delta = '/'.join(parts[:-1])+'/'
# XXX(gryf): this code would fail pretty ugly, thus, this part was
# never used.
# if len(parts) > 1 and parts[0]:
# delta = '/'.join(parts[:-1])+'/'
def normpath(x):
return posixpath.normpath(delta + elem.get('href'))
# def normpath(x):
# return posixpath.normpath(delta + elem.get('href'))
for elem in opf.itermanifest():
elem.set('href', normpath(elem.get('href')))
for elem in opf.iterguide():
elem.set('href', normpath(elem.get('href')))
# for elem in opf.itermanifest():
# elem.set('href', normpath(elem.get('href')))
# for elem in opf.iterguide():
# elem.set('href', normpath(elem.get('href')))
f = self.rationalize_cover3 if opf.package_version >= 3.0 else self.rationalize_cover2
if opf.package_version >= 3.0:
f = self.rationalize_cover3
else:
f = self.rationalize_cover2
self.removed_cover = f(opf, log)
if self.removed_cover:
self.removed_items_to_ignore = (self.removed_cover,)
@@ -352,15 +366,18 @@ class EPUBInput(InputFormatPlugin):
from lxml import etree
from ebook_converter.ebooks.chardet import xml_to_unicode
from ebook_converter.ebooks.oeb.polish.parsing import parse
from ebook_converter.ebooks.oeb.base import EPUB_NS, XHTML, NCX_MIME, NCX, urlnormalize, urlunquote, serialize
from ebook_converter.ebooks.oeb.base import EPUB_NS, XHTML, NCX_MIME, \
NCX, urlnormalize, urlunquote, serialize
from ebook_converter.ebooks.oeb.polish.toc import first_child
from ebook_converter.utils.xml_parse import safe_xml_fromstring
from tempfile import NamedTemporaryFile
with open(nav_path, 'rb') as f:
raw = f.read()
raw = xml_to_unicode(raw, strip_encoding_pats=True, assume_utf8=True)[0]
raw = xml_to_unicode(raw, strip_encoding_pats=True,
assume_utf8=True)[0]
root = parse(raw, log=log)
ncx = safe_xml_fromstring('<ncx xmlns="http://www.daisy.org/z3986/2005/ncx/" version="2005-1" xml:lang="eng"><navMap/></ncx>')
ncx = etree.fromstring('<ncx xmlns="http://www.daisy.org/z3986/2005/'
'ncx/" version="2005-1" xml:lang="eng">'
'<navMap/></ncx>')
navmap = ncx[0]
et = '{%s}type' % EPUB_NS
bn = os.path.basename(nav_path)
@@ -368,8 +385,8 @@ class EPUBInput(InputFormatPlugin):
def add_from_li(li, parent):
href = text = None
for x in li.iterchildren(XHTML('a'), XHTML('span')):
text = etree.tostring(
x, method='text', encoding='unicode', with_tail=False).strip() or ' '.join(
text = etree.tostring(x, method='text', encoding='unicode',
with_tail=False).strip() or ' '.join(
x.xpath('descendant-or-self::*/@title')).strip()
href = x.get('href')
if href:
@@ -382,7 +399,7 @@ class EPUBInput(InputFormatPlugin):
np[0].append(np.makeelement(NCX('text')))
np[0][0].text = text
if href:
np.append(np.makeelement(NCX('content'), attrib={'src':href}))
np.append(np.makeelement(NCX('content'), attrib={'src': href}))
return np
def process_nav_node(node, toc_parent):
@@ -401,20 +418,25 @@ class EPUBInput(InputFormatPlugin):
else:
return
with NamedTemporaryFile(suffix='.ncx', dir=os.path.dirname(nav_path), delete=False) as f:
with NamedTemporaryFile(suffix='.ncx', dir=os.path.dirname(nav_path),
delete=False) as f:
f.write(etree.tostring(ncx, encoding='utf-8'))
ncx_href = os.path.relpath(f.name, os.getcwd()).replace(os.sep, '/')
ncx_id = opf.create_manifest_item(ncx_href, NCX_MIME, append=True).get('id')
ncx_id = opf.create_manifest_item(ncx_href, NCX_MIME,
append=True).get('id')
for spine in opf.root.xpath('//*[local-name()="spine"]'):
spine.set('toc', ncx_id)
opts.epub3_nav_href = urlnormalize(os.path.relpath(nav_path).replace(os.sep, '/'))
url = os.path.relpath(nav_path).replace(os.sep, '/')
opts.epub3_nav_href = urlnormalize(url)
opts.epub3_nav_parsed = root
if getattr(self, 'removed_cover', None):
changed = False
base_path = os.path.dirname(nav_path)
for elem in root.xpath('//*[@href]'):
href, frag = elem.get('href').partition('#')[::2]
link_path = os.path.relpath(os.path.join(base_path, urlunquote(href)), base_path)
link_path = os.path.relpath(os.path.join(base_path,
urlunquote(href)),
base_path)
abs_href = urlnormalize(link_path)
if abs_href == self.removed_cover:
changed = True