1
0
mirror of https://github.com/gryf/ebook-converter.git synced 2026-04-22 22:21:31 +02:00

Fixing leftovers from first concept of constants

This commit is contained in:
2020-06-07 11:59:00 +02:00
parent 7419954e0c
commit a69884d724
9 changed files with 652 additions and 464 deletions
+33 -36
View File
@@ -1,5 +1,8 @@
import collections
import errno
import hashlib
import io
import itertools
import logging
import os
import re
@@ -7,13 +10,10 @@ import shutil
import sys
import time
import unicodedata
import uuid
from collections import defaultdict
from io import BytesIO
from itertools import count
import urllib.parse
import uuid
from css_parser import getUrls, replaceUrls
import css_parser
from lxml import etree
from ebook_converter import constants as const
@@ -35,10 +35,7 @@ from ebook_converter.ebooks.metadata.utils import parse_opf_version
from ebook_converter.ebooks.mobi import MobiError
from ebook_converter.ebooks.mobi.reader.headers import MetadataHeader
from ebook_converter.ebooks.mobi.tweak import set_cover
from ebook_converter.ebooks.oeb.base import (
OEB_DOCS, OEB_STYLES, Manifest, itercsslinks, iterlinks,
rewrite_links, serialize, urlquote, urlunquote
)
from ebook_converter.ebooks.oeb import base as oeb_base
from ebook_converter.ebooks.oeb.parse_utils import NotHTML, parse_html
from ebook_converter.ebooks.oeb.polish.errors import DRMError, InvalidBook
from ebook_converter.ebooks.oeb.polish.parsing import parse as parse_html_tweak
@@ -96,7 +93,7 @@ def abspath_to_name(path, root):
return relpath(os.path.abspath(path), root).replace(os.sep, '/')
def name_to_href(name, root, base=None, quote=urlquote):
def name_to_href(name, root, base=None, quote=oeb_base.urlquote):
fullpath = name_to_abspath(name, root)
basepath = root if base is None else os.path.dirname(name_to_abspath(base, root))
path = relpath(fullpath, basepath).replace(os.sep, '/')
@@ -111,7 +108,7 @@ def href_to_name(href, root, base=None):
return None
if purl.scheme or not purl.path:
return None
href = urlunquote(purl.path)
href = oeb_base.urlunquote(purl.path)
if iswindows and ':' in href:
# path manipulations on windows fail for paths with : in them, so we
# assume all such paths are invalid/absolute paths.
@@ -324,7 +321,7 @@ class Container(ContainerBase): # {{{
item_id = 'id' + '%d'%c
manifest = self.opf_xpath('//opf:manifest')[0]
href = self.name_to_href(name, self.opf_name)
item = manifest.makeelement(const.OPF_ITEM,
item = manifest.makeelement(oeb_base.tag('opf', 'item'),
id=item_id, href=href)
item.set('media-type', self.mime_map[name])
self.insert_into_xml(manifest, item)
@@ -340,7 +337,7 @@ class Container(ContainerBase): # {{{
def make_name_unique(self, name):
''' Ensure that `name` does not already exist in this book. If it does, return a modified version that does not exist. '''
counter = count()
counter = itertools.count()
while self.has_name_case_insensitive(name) or self.manifest_has_name(name):
c = next(counter) + 1
base, ext = name.rpartition('.')[::2]
@@ -377,10 +374,10 @@ class Container(ContainerBase): # {{{
if self.ok_to_be_unmanifested(name):
return name
item_id = self.add_name_to_manifest(name, process_manifest_item=process_manifest_item)
if mt in OEB_DOCS:
if mt in oeb_base.OEB_DOCS:
manifest = self.opf_xpath('//opf:manifest')[0]
spine = self.opf_xpath('//opf:spine')[0]
si = manifest.makeelement(const.OPF_ITEMREF, idref=item_id)
si = manifest.makeelement(oeb_base.tag('opf', 'itemref'), idref=item_id)
self.insert_into_xml(spine, si, index=spine_index)
return name
@@ -442,12 +439,12 @@ class Container(ContainerBase): # {{{
replace_func.file_type = 'opf'
for elem in self.opf_xpath('//*[@href]'):
elem.set('href', replace_func(elem.get('href')))
elif media_type.lower() in OEB_DOCS:
elif media_type.lower() in oeb_base.OEB_DOCS:
replace_func.file_type = 'text'
rewrite_links(self.parsed(name), replace_func)
elif media_type.lower() in OEB_STYLES:
oeb_base.rewrite_links(self.parsed(name), replace_func)
elif media_type.lower() in oeb_base.OEB_STYLES:
replace_func.file_type = 'style'
replaceUrls(self.parsed(name), replace_func)
css_parser.replaceUrls(self.parsed(name), replace_func)
elif media_type.lower() == guess_type('toc.ncx'):
replace_func.file_type = 'ncx'
for elem in self.parsed(name).xpath('//*[@src]'):
@@ -467,21 +464,21 @@ class Container(ContainerBase): # {{{
if name == self.opf_name:
for elem in self.opf_xpath('//*[@href]'):
yield (elem.get('href'), elem.sourceline, 0) if get_line_numbers else elem.get('href')
elif media_type.lower() in OEB_DOCS:
for el, attr, link, pos in iterlinks(self.parsed(name)):
elif media_type.lower() in oeb_base.OEB_DOCS:
for el, attr, link, pos in oeb_base.iterlinks(self.parsed(name)):
yield (link, el.sourceline, pos) if get_line_numbers else link
elif media_type.lower() in OEB_STYLES:
elif media_type.lower() in oeb_base.OEB_STYLES:
if get_line_numbers:
with self.open(name, 'rb') as f:
raw = self.decode(f.read()).replace('\r\n', '\n').replace('\r', '\n')
position = PositionFinder(raw)
is_in_comment = CommentFinder(raw)
for link, offset in itercsslinks(raw):
for link, offset in oeb_base.itercsslinks(raw):
if not is_in_comment(offset):
lnum, col = position(offset)
yield link, lnum, col
else:
for link in getUrls(self.parsed(name)):
for link in css_parser.getUrls(self.parsed(name)):
yield link
elif media_type.lower() == guess_type('toc.ncx'):
for elem in self.parsed(name).xpath('//*[@src]'):
@@ -533,7 +530,7 @@ class Container(ContainerBase): # {{{
def opf_xpath(self, expr):
' Convenience method to evaluate an XPath expression on the OPF file, has the opf: and dc: namespace prefixes pre-defined. '
return self.opf.xpath(expr, namespaces=const.OPF_NAMESPACES)
return self.opf.xpath(expr, namespaces=oeb_base.tag('opf', 'namespaces'))
def has_name(self, name):
''' Return True iff a file with the same canonical name as that specified exists. Unlike :meth:`exists` this method is always case-sensitive. '''
@@ -580,11 +577,11 @@ class Container(ContainerBase): # {{{
def parse(self, path, mime):
with open(path, 'rb') as src:
data = src.read()
if mime in OEB_DOCS:
if mime in oeb_base.OEB_DOCS:
data = self.parse_xhtml(data, self.relpath(path))
elif mime[-4:] in {'+xml', '/xml'}:
data = self.parse_xml(data)
elif mime in OEB_STYLES:
elif mime in oeb_base.OEB_STYLES:
data = self.parse_css(data, self.relpath(path))
return data
@@ -597,7 +594,7 @@ class Container(ContainerBase): # {{{
'''
ans = self.open(name).read()
mime = self.mime_map.get(name, guess_type(name))
if decode and (mime in OEB_STYLES or mime in OEB_DOCS or mime == 'text/plain' or mime[-4:] in {'+xml', '/xml'}):
if decode and (mime in oeb_base.OEB_STYLES or mime in oeb_base.OEB_DOCS or mime == 'text/plain' or mime[-4:] in {'+xml', '/xml'}):
ans = self.decode(ans, normalize_to_nfc=normalize_to_nfc)
return ans
@@ -637,7 +634,7 @@ class Container(ContainerBase): # {{{
so use it sparingly. '''
from ebook_converter.ebooks.metadata.opf2 import OPF as O
mi = self.serialize_item(self.opf_name)
return O(BytesIO(mi), basedir=self.opf_dir, unquote_urls=False,
return O(io.BytesIO(mi), basedir=self.opf_dir, unquote_urls=False,
populate_spine=False).to_book_metadata()
@property
@@ -662,7 +659,7 @@ class Container(ContainerBase): # {{{
@property
def manifest_type_map(self):
' Mapping of manifest media-type to list of canonical names of that media-type '
ans = defaultdict(list)
ans = collections.defaultdict(list)
for item in self.opf_xpath('//opf:manifest/opf:item[@href and @media-type]'):
ans[item.get('media-type').lower()].append(self.href_to_name(
item.get('href'), self.opf_name))
@@ -813,7 +810,7 @@ class Container(ContainerBase): # {{{
spine = self.opf_xpath('//opf:spine')[0]
spine.text = tail
for name, linear in spine_items:
i = spine.makeelement(const.OPF_ITEMREF,
i = spine.makeelement(oeb_base.tag('opf', 'itemref'),
nsmap={'opf': const.OPF2_NS})
i.tail = tail
i.set('idref', imap[name])
@@ -922,7 +919,7 @@ class Container(ContainerBase): # {{{
return ans[0]
self.dirty(self.opf_name)
package = self.opf_xpath('//opf:package')[0]
item = package.makeelement(OPF(name))
item = package.makeelement(oeb_base.tag('opf', name))
item.tail = '\n'
package.append(item)
return item
@@ -945,7 +942,7 @@ class Container(ContainerBase): # {{{
item_id = id_prefix + '%d'%c
manifest = self.opf_xpath('//opf:manifest')[0]
item = manifest.makeelement(const.OPF_ITEM,
item = manifest.makeelement(oeb_base.tag('opf', 'item'),
id=item_id, href=href)
item.set('media-type', media_type)
self.insert_into_xml(manifest, item)
@@ -992,7 +989,7 @@ class Container(ContainerBase): # {{{
data = root = self.parsed(name)
if name == self.opf_name:
self.format_opf()
data = serialize(data, self.mime_map[name], pretty_print=name in
data = oeb_base.serialize(data, self.mime_map[name], pretty_print=name in
self.pretty_print)
if name == self.opf_name and root.nsmap.get(None) == const.OPF2_NS:
# Needed as I can't get lxml to output opf:role and
@@ -1181,7 +1178,7 @@ class EpubContainer(Container):
)
if not opf_files:
raise InvalidEpub('META-INF/container.xml contains no link to OPF file')
opf_path = os.path.join(self.root, *(urlunquote(opf_files[0].get('full-path')).split('/')))
opf_path = os.path.join(self.root, *(oeb_base.urlunquote(opf_files[0].get('full-path')).split('/')))
if not exists(opf_path):
raise InvalidEpub('OPF file does not exist at location pointed to'
' by META-INF/container.xml')
@@ -1412,7 +1409,7 @@ def do_explode(path, dest):
def opf_to_azw3(opf, outpath, container):
from ebook_converter.ebooks.conversion.plumber import Plumber, create_oebbook
class Item(Manifest.Item):
class Item(oeb_base.Manifest.Item):
def _parse_css(self, data):
# The default CSS parser used by oeb.base inserts the h namespace