mirror of
https://github.com/gryf/ebook-converter.git
synced 2026-04-02 02:13:38 +02:00
Fixing leftovers from first concept of constants
This commit is contained in:
@@ -1,14 +1,16 @@
|
||||
import collections
|
||||
import functools
|
||||
import json
|
||||
import re
|
||||
from collections import defaultdict, namedtuple
|
||||
from functools import wraps
|
||||
|
||||
from lxml import etree
|
||||
|
||||
from ebook_converter import constants as const
|
||||
from ebook_converter import prints
|
||||
from ebook_converter.ebooks.metadata import authors_to_string, check_isbn, string_to_authors
|
||||
from ebook_converter.ebooks.metadata.book.base import Metadata
|
||||
from ebook_converter.ebooks.metadata import authors_to_string
|
||||
from ebook_converter.ebooks.metadata import check_isbn
|
||||
from ebook_converter.ebooks.metadata import string_to_authors
|
||||
from ebook_converter.ebooks.metadata.book import base
|
||||
from ebook_converter.ebooks.metadata.book.json_codec import (
|
||||
decode_is_multiple, encode_is_multiple, object_to_unicode
|
||||
)
|
||||
@@ -17,17 +19,30 @@ from ebook_converter.ebooks.metadata.utils import (
|
||||
pretty_print_opf
|
||||
)
|
||||
from ebook_converter.utils.config import from_json, to_json
|
||||
from ebook_converter.utils.date import (
|
||||
fix_only_date, is_date_undefined, isoformat, parse_date as parse_date_, utcnow,
|
||||
w3cdtf
|
||||
)
|
||||
from ebook_converter.utils.date import (fix_only_date, is_date_undefined,
|
||||
isoformat, parse_date as parse_date_,
|
||||
utcnow, w3cdtf)
|
||||
from ebook_converter.utils.iso8601 import parse_iso8601
|
||||
from ebook_converter.utils.localization import canonicalize_lang
|
||||
|
||||
|
||||
RES_PREFIXES = {'dcterms': 'http://purl.org/dc/terms/',
|
||||
'epubsc': 'http://idpf.org/epub/vocab/sc/#',
|
||||
'marc': 'http://id.loc.gov/vocabulary/',
|
||||
'media': 'http://www.idpf.org/epub/vocab/overlays/#',
|
||||
'onix': 'http://www.editeur.org/ONIX/book/codelists/'
|
||||
'current.html#',
|
||||
'rendition': 'http://www.idpf.org/vocab/rendition/#',
|
||||
'schema': 'http://schema.org/',
|
||||
'xsd': 'http://www.w3.org/2001/XMLSchema#'}
|
||||
|
||||
CALIBRE_PREFIX = 'https://calibre-ebook.com'
|
||||
KNOWN_PREFIXES = RES_PREFIXES.copy()
|
||||
KNOWN_PREFIXES['calibre'] = CALIBRE_PREFIX
|
||||
|
||||
# Utils {{{
|
||||
_xpath_cache = {}
|
||||
_re_cache = {}
|
||||
_XPATH_CACHE = {}
|
||||
_RE_CACHE = {}
|
||||
|
||||
|
||||
def uniq(vals):
|
||||
@@ -39,22 +54,23 @@ def uniq(vals):
|
||||
|
||||
|
||||
def dump_dict(cats):
|
||||
return json.dumps(object_to_unicode(cats or {}), ensure_ascii=False, skipkeys=True)
|
||||
return json.dumps(object_to_unicode(cats or {}), ensure_ascii=False,
|
||||
skipkeys=True)
|
||||
|
||||
|
||||
def XPath(x):
|
||||
try:
|
||||
return _xpath_cache[x]
|
||||
return _XPATH_CACHE[x]
|
||||
except KeyError:
|
||||
_xpath_cache[x] = ans = etree.XPath(x, namespaces=const.OPF2_NSMAP)
|
||||
_XPATH_CACHE[x] = ans = etree.XPath(x, namespaces=const.OPF2_NSMAP)
|
||||
return ans
|
||||
|
||||
|
||||
def regex(r, flags=0):
|
||||
try:
|
||||
return _re_cache[(r, flags)]
|
||||
return _RE_CACHE[(r, flags)]
|
||||
except KeyError:
|
||||
_re_cache[(r, flags)] = ans = re.compile(r, flags)
|
||||
_RE_CACHE[(r, flags)] = ans = re.compile(r, flags)
|
||||
return ans
|
||||
|
||||
|
||||
@@ -82,7 +98,7 @@ def properties_for_id(item_id, refines):
|
||||
|
||||
|
||||
def properties_for_id_with_scheme(item_id, prefixes, refines):
|
||||
ans = defaultdict(list)
|
||||
ans = collections.defaultdict(list)
|
||||
if item_id:
|
||||
for elem in refines[item_id]:
|
||||
key = elem.get('property')
|
||||
@@ -126,7 +142,7 @@ def normalize_whitespace(text):
|
||||
|
||||
|
||||
def simple_text(f):
|
||||
@wraps(f)
|
||||
@functools.wraps(f)
|
||||
def wrapper(*args, **kw):
|
||||
return normalize_whitespace(f(*args, **kw))
|
||||
return wrapper
|
||||
@@ -135,7 +151,7 @@ def simple_text(f):
|
||||
def items_with_property(root, q, prefixes=None):
|
||||
if prefixes is None:
|
||||
prefixes = read_prefixes(root)
|
||||
q = expand_prefix(q, known_prefixes).lower()
|
||||
q = expand_prefix(q, KNOWN_PREFIXES).lower()
|
||||
for item in XPath("./opf:manifest/opf:item[@properties]")(root):
|
||||
for prop in (item.get('properties') or '').lower().split():
|
||||
prop = expand_prefix(prop, prefixes)
|
||||
@@ -150,43 +166,32 @@ def items_with_property(root, q, prefixes=None):
|
||||
# http://www.idpf.org/epub/vocab/package/pfx/
|
||||
|
||||
|
||||
reserved_prefixes = {
|
||||
'dcterms': 'http://purl.org/dc/terms/',
|
||||
'epubsc': 'http://idpf.org/epub/vocab/sc/#',
|
||||
'marc': 'http://id.loc.gov/vocabulary/',
|
||||
'media': 'http://www.idpf.org/epub/vocab/overlays/#',
|
||||
'onix': 'http://www.editeur.org/ONIX/book/codelists/current.html#',
|
||||
'rendition':'http://www.idpf.org/vocab/rendition/#',
|
||||
'schema': 'http://schema.org/',
|
||||
'xsd': 'http://www.w3.org/2001/XMLSchema#',
|
||||
}
|
||||
|
||||
CALIBRE_PREFIX = 'https://calibre-ebook.com'
|
||||
known_prefixes = reserved_prefixes.copy()
|
||||
known_prefixes['calibre'] = CALIBRE_PREFIX
|
||||
|
||||
|
||||
def parse_prefixes(x):
|
||||
return {m.group(1):m.group(2) for m in re.finditer(r'(\S+): \s*(\S+)', x)}
|
||||
return {m.group(1): m.group(2)
|
||||
for m in re.finditer(r'(\S+): \s*(\S+)', x)}
|
||||
|
||||
|
||||
def read_prefixes(root):
|
||||
ans = reserved_prefixes.copy()
|
||||
ans = RES_PREFIXES.copy()
|
||||
ans.update(parse_prefixes(root.get('prefix') or ''))
|
||||
return ans
|
||||
|
||||
|
||||
def expand_prefix(raw, prefixes):
|
||||
return regex(r'(\S+)\s*:\s*(\S+)').sub(lambda m:(prefixes.get(m.group(1), m.group(1)) + ':' + m.group(2)), raw or '')
|
||||
return (regex(r'(\S+)\s*:\s*(\S+)')
|
||||
.sub(lambda m: (prefixes.get(m.group(1),
|
||||
m.group(1)) + ':' + m.group(2)),
|
||||
raw or ''))
|
||||
|
||||
|
||||
def ensure_prefix(root, prefixes, prefix, value=None):
|
||||
if prefixes is None:
|
||||
prefixes = read_prefixes(root)
|
||||
prefixes[prefix] = value or reserved_prefixes[prefix]
|
||||
prefixes = {k:v for k, v in prefixes.items() if reserved_prefixes.get(k) != v}
|
||||
prefixes[prefix] = value or RES_PREFIXES[prefix]
|
||||
prefixes = {k: v for k, v in prefixes.items() if RES_PREFIXES.get(k) != v}
|
||||
if prefixes:
|
||||
root.set('prefix', ' '.join('%s: %s' % (k, v) for k, v in prefixes.items()))
|
||||
root.set('prefix', ' '.join('%s: %s' % (k, v)
|
||||
for k, v in prefixes.items()))
|
||||
else:
|
||||
root.attrib.pop('prefix', None)
|
||||
|
||||
@@ -196,7 +201,7 @@ def ensure_prefix(root, prefixes, prefix, value=None):
|
||||
|
||||
|
||||
def read_refines(root):
|
||||
ans = defaultdict(list)
|
||||
ans = collections.defaultdict(list)
|
||||
for meta in XPath('./opf:metadata/opf:meta[@refines]')(root):
|
||||
r = meta.get('refines') or ''
|
||||
if r.startswith('#'):
|
||||
@@ -213,7 +218,7 @@ def set_refines(elem, existing_refines, *new_refines):
|
||||
remove_refines(elem, existing_refines)
|
||||
for ref in reversed(new_refines):
|
||||
prop, val, scheme = ref
|
||||
r = elem.makeelement(const.OPF_META)
|
||||
r = elem.makeelement(base.tag('opf', 'meta'))
|
||||
r.set('refines', '#' + eid), r.set('property', prop)
|
||||
r.text = val.strip()
|
||||
if scheme:
|
||||
@@ -249,7 +254,7 @@ def parse_identifier(ident, val, refines):
|
||||
# Try the OPF 2 style opf:scheme attribute, which will be present, for
|
||||
# example, in EPUB 3 files that have had their metadata set by an
|
||||
# application that only understands EPUB 2.
|
||||
scheme = ident.get(const.OPF_SCHEME)
|
||||
scheme = ident.get(base.tag('opf', 'scheme'))
|
||||
if scheme and not lval.startswith('urn:'):
|
||||
return finalize(scheme, val)
|
||||
|
||||
@@ -267,7 +272,7 @@ def parse_identifier(ident, val, refines):
|
||||
|
||||
|
||||
def read_identifiers(root, prefixes, refines):
|
||||
ans = defaultdict(list)
|
||||
ans = collections.defaultdict(list)
|
||||
for ident in XPath('./opf:metadata/dc:identifier')(root):
|
||||
val = (ident.text or '').strip()
|
||||
if val:
|
||||
@@ -277,7 +282,8 @@ def read_identifiers(root, prefixes, refines):
|
||||
return ans
|
||||
|
||||
|
||||
def set_identifiers(root, prefixes, refines, new_identifiers, force_identifiers=False):
|
||||
def set_identifiers(root, prefixes, refines, new_identifiers,
|
||||
force_identifiers=False):
|
||||
uid = root.get('unique-identifier')
|
||||
package_identifier = None
|
||||
for ident in XPath('./opf:metadata/dc:identifier')(root):
|
||||
@@ -289,12 +295,15 @@ def set_identifiers(root, prefixes, refines, new_identifiers, force_identifiers=
|
||||
ident.getparent().remove(ident)
|
||||
continue
|
||||
scheme, val = parse_identifier(ident, val, refines)
|
||||
if not scheme or not val or force_identifiers or scheme in new_identifiers:
|
||||
if (not scheme or
|
||||
not val or
|
||||
force_identifiers or
|
||||
scheme in new_identifiers):
|
||||
remove_element(ident, refines)
|
||||
continue
|
||||
metadata = XPath('./opf:metadata')(root)[0]
|
||||
for scheme, val in new_identifiers.items():
|
||||
ident = metadata.makeelement(const.DC_IDENT)
|
||||
ident = metadata.makeelement(base.tag('dc', 'ident'))
|
||||
ident.text = '%s:%s' % (scheme, val)
|
||||
if package_identifier is None:
|
||||
metadata.append(ident)
|
||||
@@ -312,11 +321,12 @@ def identifier_writer(name):
|
||||
if is_package_id:
|
||||
package_identifier = ident
|
||||
val = (ident.text or '').strip()
|
||||
if (val.startswith(name + ':') or ident.get(const.OPF_SCHEME) == name) and not is_package_id:
|
||||
if (val.startswith(name + ':') or
|
||||
ident.get(base.tag('opf', 'scheme')) == name) and not is_package_id:
|
||||
remove_element(ident, refines)
|
||||
metadata = XPath('./opf:metadata')(root)[0]
|
||||
if ival:
|
||||
ident = metadata.makeelement(const.DC_IDENT)
|
||||
ident = metadata.makeelement(base.tag('dc', 'ident'))
|
||||
ident.text = '%s:%s' % (name, ival)
|
||||
if package_identifier is None:
|
||||
metadata.append(ident)
|
||||
@@ -366,7 +376,8 @@ def read_title_sort(root, prefixes, refines):
|
||||
if fa:
|
||||
return fa
|
||||
# Look for OPF 2.0 style title_sort
|
||||
for m in XPath('./opf:metadata/opf:meta[@name="calibre:title_sort"]')(root):
|
||||
for m in XPath('./opf:metadata/opf:meta[@name="calibre:'
|
||||
'title_sort"]')(root):
|
||||
ans = m.get('content')
|
||||
if ans:
|
||||
return ans
|
||||
@@ -376,12 +387,13 @@ def set_title(root, prefixes, refines, title, title_sort=None):
|
||||
main_title = find_main_title(root, refines, remove_blanks=True)
|
||||
if main_title is None:
|
||||
m = XPath('./opf:metadata')(root)[0]
|
||||
main_title = m.makeelement(const.DC_TITLE)
|
||||
main_title = m.makeelement(base.tag('dc', 'title'))
|
||||
m.insert(0, main_title)
|
||||
main_title.text = title or None
|
||||
ts = [refdef('file-as', title_sort)] if title_sort else ()
|
||||
set_refines(main_title, refines, refdef('title-type', 'main'), *ts)
|
||||
for m in XPath('./opf:metadata/opf:meta[@name="calibre:title_sort"]')(root):
|
||||
for m in XPath('./opf:metadata/opf:meta[@name="calibre:'
|
||||
'title_sort"]')(root):
|
||||
remove_element(m, refines)
|
||||
|
||||
# }}}
|
||||
@@ -405,28 +417,32 @@ def set_languages(root, prefixes, refines, languages):
|
||||
val = (lang.text or '').strip()
|
||||
if val:
|
||||
opf_languages.append(val)
|
||||
languages = list(filter(lambda x: x and x != 'und', normalize_languages(opf_languages, languages)))
|
||||
languages = list(filter(lambda x: x and x != 'und',
|
||||
normalize_languages(opf_languages, languages)))
|
||||
if not languages:
|
||||
# EPUB spec says dc:language is required
|
||||
languages = ['und']
|
||||
metadata = XPath('./opf:metadata')(root)[0]
|
||||
for lang in uniq(languages):
|
||||
l = metadata.makeelement(const.DC_LANG)
|
||||
l.text = lang
|
||||
metadata.append(l)
|
||||
dc_lang = metadata.makeelement(base.tag('dc', 'lang'))
|
||||
dc_lang.text = lang
|
||||
metadata.append(dc_lang)
|
||||
# }}}
|
||||
|
||||
# Creator/Contributor {{{
|
||||
|
||||
|
||||
Author = namedtuple('Author', 'name sort')
|
||||
Author = collections.namedtuple('Author', 'name sort')
|
||||
|
||||
|
||||
def is_relators_role(props, q):
|
||||
for role in props.get('role'):
|
||||
if role:
|
||||
scheme_ns, scheme, role = role
|
||||
if role.lower() == q and (scheme_ns is None or (scheme_ns, scheme) == (reserved_prefixes['marc'], 'relators')):
|
||||
if (role.lower() == q and
|
||||
(scheme_ns is None or
|
||||
(scheme_ns, scheme) == (RES_PREFIXES['marc'],
|
||||
'relators'))):
|
||||
return True
|
||||
return False
|
||||
|
||||
@@ -440,15 +456,16 @@ def read_authors(root, prefixes, refines):
|
||||
if file_as:
|
||||
aus = file_as[0][-1]
|
||||
else:
|
||||
aus = item.get(const.OPF_FILE_AS) or None
|
||||
aus = item.get(base.tag('opf', 'file_as')) or None
|
||||
return Author(normalize_whitespace(val), normalize_whitespace(aus))
|
||||
|
||||
for item in XPath('./opf:metadata/dc:creator')(root):
|
||||
val = (item.text or '').strip()
|
||||
if val:
|
||||
props = properties_for_id_with_scheme(item.get('id'), prefixes, refines)
|
||||
props = properties_for_id_with_scheme(item.get('id'), prefixes,
|
||||
refines)
|
||||
role = props.get('role')
|
||||
opf_role = item.get(const.OPF_ROLE)
|
||||
opf_role = item.get(base.tag('opf', 'role'))
|
||||
if role:
|
||||
if is_relators_role(props, 'aut'):
|
||||
roled_authors.append(author(item, props, val))
|
||||
@@ -464,23 +481,30 @@ def read_authors(root, prefixes, refines):
|
||||
def set_authors(root, prefixes, refines, authors):
|
||||
ensure_prefix(root, prefixes, 'marc')
|
||||
for item in XPath('./opf:metadata/dc:creator')(root):
|
||||
props = properties_for_id_with_scheme(item.get('id'), prefixes, refines)
|
||||
opf_role = item.get(const.OPF_ROLE)
|
||||
if (opf_role and opf_role.lower() != 'aut') or (props.get('role') and not is_relators_role(props, 'aut')):
|
||||
props = properties_for_id_with_scheme(item.get('id'), prefixes,
|
||||
refines)
|
||||
opf_role = item.get(base.tag('opf', 'role'))
|
||||
if ((opf_role and opf_role.lower() != 'aut') or
|
||||
(props.get('role') and not is_relators_role(props, 'aut'))):
|
||||
continue
|
||||
remove_element(item, refines)
|
||||
metadata = XPath('./opf:metadata')(root)[0]
|
||||
for author in authors:
|
||||
if author.name:
|
||||
a = metadata.makeelement(const.DC_CREATOR)
|
||||
a = metadata.makeelement(base.tag('dc', 'creator'))
|
||||
aid = ensure_id(a)
|
||||
a.text = author.name
|
||||
metadata.append(a)
|
||||
m = metadata.makeelement(const.OPF_META, attrib={'refines':'#'+aid, 'property':'role', 'scheme':'marc:relators'})
|
||||
m = metadata.makeelement(base.tag('opf', 'meta'),
|
||||
attrib={'refines': '#' + aid,
|
||||
'property': 'role',
|
||||
'scheme': 'marc:relators'})
|
||||
m.text = 'aut'
|
||||
metadata.append(m)
|
||||
if author.sort:
|
||||
m = metadata.makeelement(const.OPF_META, attrib={'refines':'#'+aid, 'property':'file-as'})
|
||||
m = metadata.makeelement(base.tag('opf', 'meta'),
|
||||
attrib={'refines': '#' + aid,
|
||||
'property': 'file-as'})
|
||||
m.text = author.sort
|
||||
metadata.append(m)
|
||||
|
||||
@@ -490,9 +514,10 @@ def read_book_producers(root, prefixes, refines):
|
||||
for item in XPath('./opf:metadata/dc:contributor')(root):
|
||||
val = (item.text or '').strip()
|
||||
if val:
|
||||
props = properties_for_id_with_scheme(item.get('id'), prefixes, refines)
|
||||
props = properties_for_id_with_scheme(item.get('id'), prefixes,
|
||||
refines)
|
||||
role = props.get('role')
|
||||
opf_role = item.get(const.OPF_ROLE)
|
||||
opf_role = item.get(base.tag('opf', 'role'))
|
||||
if role:
|
||||
if is_relators_role(props, 'bkp'):
|
||||
ans.append(normalize_whitespace(val))
|
||||
@@ -503,19 +528,24 @@ def read_book_producers(root, prefixes, refines):
|
||||
|
||||
def set_book_producers(root, prefixes, refines, producers):
|
||||
for item in XPath('./opf:metadata/dc:contributor')(root):
|
||||
props = properties_for_id_with_scheme(item.get('id'), prefixes, refines)
|
||||
opf_role = item.get(const.OPF_ROLE)
|
||||
if (opf_role and opf_role.lower() != 'bkp') or (props.get('role') and not is_relators_role(props, 'bkp')):
|
||||
props = properties_for_id_with_scheme(item.get('id'), prefixes,
|
||||
refines)
|
||||
opf_role = item.get(base.tag('opf', 'role'))
|
||||
if ((opf_role and opf_role.lower() != 'bkp') or
|
||||
(props.get('role') and not is_relators_role(props, 'bkp'))):
|
||||
continue
|
||||
remove_element(item, refines)
|
||||
metadata = XPath('./opf:metadata')(root)[0]
|
||||
for bkp in producers:
|
||||
if bkp:
|
||||
a = metadata.makeelement(const.DC_CONTRIBUTOR)
|
||||
a = metadata.makeelement(base.tag('dc', 'contributor'))
|
||||
aid = ensure_id(a)
|
||||
a.text = bkp
|
||||
metadata.append(a)
|
||||
m = metadata.makeelement(const.OPF_META, attrib={'refines':'#'+aid, 'property':'role', 'scheme':'marc:relators'})
|
||||
m = metadata.makeelement(base.tag('opf', 'meta'),
|
||||
attrib={'refines': '#' + aid,
|
||||
'property': 'role',
|
||||
'scheme': 'marc:relators'})
|
||||
m.text = 'bkp'
|
||||
metadata.append(m)
|
||||
# }}}
|
||||
@@ -531,7 +561,9 @@ def parse_date(raw, is_w3cdtf=False):
|
||||
ans = fix_only_date(ans)
|
||||
else:
|
||||
ans = parse_date_(raw, assume_utc=True)
|
||||
if ' ' not in raw and 'T' not in raw and (ans.hour, ans.minute, ans.second) == (0, 0, 0):
|
||||
if (' ' not in raw and
|
||||
'T' not in raw and
|
||||
(ans.hour, ans.minute, ans.second) == (0, 0, 0)):
|
||||
ans = fix_only_date(ans)
|
||||
return ans
|
||||
|
||||
@@ -552,14 +584,14 @@ def set_pubdate(root, prefixes, refines, val):
|
||||
if not is_date_undefined(val):
|
||||
val = isoformat(val)
|
||||
m = XPath('./opf:metadata')(root)[0]
|
||||
d = m.makeelement(const.DC_DATE)
|
||||
d = m.makeelement(base.tag('dc', 'date'))
|
||||
d.text = val
|
||||
m.append(d)
|
||||
|
||||
|
||||
def read_timestamp(root, prefixes, refines):
|
||||
pq = '%s:timestamp' % CALIBRE_PREFIX
|
||||
sq = '%s:w3cdtf' % reserved_prefixes['dcterms']
|
||||
sq = '%s:w3cdtf' % RES_PREFIXES['dcterms']
|
||||
for meta in XPath('./opf:metadata/opf:meta[@property]')(root):
|
||||
val = (meta.text or '').strip()
|
||||
if val:
|
||||
@@ -570,7 +602,8 @@ def read_timestamp(root, prefixes, refines):
|
||||
return parse_date(val, is_w3cdtf=scheme == sq)
|
||||
except Exception:
|
||||
continue
|
||||
for meta in XPath('./opf:metadata/opf:meta[@name="calibre:timestamp"]')(root):
|
||||
for meta in XPath('./opf:metadata/opf:meta[@name="calibre:'
|
||||
'timestamp"]')(root):
|
||||
val = meta.get('content')
|
||||
if val:
|
||||
try:
|
||||
@@ -584,7 +617,9 @@ def create_timestamp(root, prefixes, m, val):
|
||||
ensure_prefix(root, prefixes, 'calibre', CALIBRE_PREFIX)
|
||||
ensure_prefix(root, prefixes, 'dcterms')
|
||||
val = w3cdtf(val)
|
||||
d = m.makeelement(const.OPF_META, attrib={'property':'calibre:timestamp', 'scheme':'dcterms:W3CDTF'})
|
||||
d = m.makeelement(base.tag('opf', 'meta'),
|
||||
attrib={'property': 'calibre:timestamp',
|
||||
'scheme': 'dcterms:W3CDTF'})
|
||||
d.text = val
|
||||
m.append(d)
|
||||
|
||||
@@ -599,8 +634,8 @@ def set_timestamp(root, prefixes, refines, val):
|
||||
|
||||
|
||||
def read_last_modified(root, prefixes, refines):
|
||||
pq = '%s:modified' % reserved_prefixes['dcterms']
|
||||
sq = '%s:w3cdtf' % reserved_prefixes['dcterms']
|
||||
pq = '%s:modified' % RES_PREFIXES['dcterms']
|
||||
sq = '%s:w3cdtf' % RES_PREFIXES['dcterms']
|
||||
for meta in XPath('./opf:metadata/opf:meta[@property]')(root):
|
||||
val = (meta.text or '').strip()
|
||||
if val:
|
||||
@@ -614,7 +649,7 @@ def read_last_modified(root, prefixes, refines):
|
||||
|
||||
|
||||
def set_last_modified(root, prefixes, refines, val=None):
|
||||
pq = '%s:modified' % reserved_prefixes['dcterms']
|
||||
pq = '%s:modified' % RES_PREFIXES['dcterms']
|
||||
val = w3cdtf(val or utcnow())
|
||||
for meta in XPath('./opf:metadata/opf:meta[@property]')(root):
|
||||
prop = expand_prefix(meta.get('property'), prefixes)
|
||||
@@ -625,7 +660,9 @@ def set_last_modified(root, prefixes, refines, val=None):
|
||||
else:
|
||||
ensure_prefix(root, prefixes, 'dcterms')
|
||||
m = XPath('./opf:metadata')(root)[0]
|
||||
meta = m.makeelement(const.OPF_META, attrib={'property':'dcterms:modified', 'scheme':'dcterms:W3CDTF'})
|
||||
meta = m.makeelement(base.tag('opf', 'meta'),
|
||||
attrib={'property': 'dcterms:modified',
|
||||
'scheme': 'dcterms:W3CDTF'})
|
||||
m.append(meta)
|
||||
meta.text = val
|
||||
# }}}
|
||||
@@ -648,7 +685,7 @@ def set_comments(root, prefixes, refines, val):
|
||||
if val:
|
||||
val = val.strip()
|
||||
if val:
|
||||
c = m.makeelement(const.DC_DESC)
|
||||
c = m.makeelement(base.tag('dc', 'desc'))
|
||||
c.text = val
|
||||
m.append(c)
|
||||
# }}}
|
||||
@@ -670,7 +707,7 @@ def set_publisher(root, prefixes, refines, val):
|
||||
if val:
|
||||
val = val.strip()
|
||||
if val:
|
||||
c = m.makeelement(const.DC_PUBLISHER('publisher'))
|
||||
c = m.makeelement(base.tag('dc', 'publisher'))
|
||||
c.text = normalize_whitespace(val)
|
||||
m.append(c)
|
||||
# }}}
|
||||
@@ -693,7 +730,7 @@ def set_tags(root, prefixes, refines, val):
|
||||
if val:
|
||||
val = uniq(list(filter(None, val)))
|
||||
for x in val:
|
||||
c = m.makeelement(const.DC_SUBJ)
|
||||
c = m.makeelement(base.tag('dc', 'subj'))
|
||||
c.text = normalize_whitespace(x)
|
||||
if c.text:
|
||||
m.append(c)
|
||||
@@ -725,7 +762,7 @@ def read_rating(root, prefixes, refines):
|
||||
def create_rating(root, prefixes, val):
|
||||
ensure_prefix(root, prefixes, 'calibre', CALIBRE_PREFIX)
|
||||
m = XPath('./opf:metadata')(root)[0]
|
||||
d = m.makeelement(const.OPF_META, attrib={'property':'calibre:rating'})
|
||||
d = m.makeelement(base.tag('opf', 'meta'), attrib={'property': 'calibre:rating'})
|
||||
d.text = val
|
||||
m.append(d)
|
||||
|
||||
@@ -747,7 +784,8 @@ def set_rating(root, prefixes, refines, val):
|
||||
|
||||
def read_series(root, prefixes, refines):
|
||||
series_index = 1.0
|
||||
for meta in XPath('./opf:metadata/opf:meta[@property="belongs-to-collection" and @id]')(root):
|
||||
for meta in XPath('./opf:metadata/opf:meta[@property="'
|
||||
'belongs-to-collection" and @id]')(root):
|
||||
val = (meta.text or '').strip()
|
||||
if val:
|
||||
props = properties_for_id(meta.get('id'), refines)
|
||||
@@ -757,13 +795,15 @@ def read_series(root, prefixes, refines):
|
||||
except Exception:
|
||||
pass
|
||||
return normalize_whitespace(val), series_index
|
||||
for si in XPath('./opf:metadata/opf:meta[@name="calibre:series_index"]/@content')(root):
|
||||
for si in XPath('./opf:metadata/opf:meta[@name="calibre:series_index"]'
|
||||
'/@content')(root):
|
||||
try:
|
||||
series_index = float(si)
|
||||
break
|
||||
except:
|
||||
except Exception:
|
||||
pass
|
||||
for s in XPath('./opf:metadata/opf:meta[@name="calibre:series"]/@content')(root):
|
||||
for s in XPath('./opf:metadata/opf:meta[@name="calibre:series"]'
|
||||
'/@content')(root):
|
||||
s = normalize_whitespace(s)
|
||||
if s:
|
||||
return s, series_index
|
||||
@@ -772,16 +812,20 @@ def read_series(root, prefixes, refines):
|
||||
|
||||
def create_series(root, refines, series, series_index):
|
||||
m = XPath('./opf:metadata')(root)[0]
|
||||
d = m.makeelement(const.OPF_META, attrib={'property':'belongs-to-collection'})
|
||||
d = m.makeelement(base.tag('opf', 'meta'),
|
||||
attrib={'property': 'belongs-to-collection'})
|
||||
d.text = series
|
||||
m.append(d)
|
||||
set_refines(d, refines, refdef('collection-type', 'series'), refdef('group-position', series_index))
|
||||
set_refines(d, refines, refdef('collection-type', 'series'),
|
||||
refdef('group-position', series_index))
|
||||
|
||||
|
||||
def set_series(root, prefixes, refines, series, series_index):
|
||||
for meta in XPath('./opf:metadata/opf:meta[@name="calibre:series" or @name="calibre:series_index"]')(root):
|
||||
for meta in XPath('./opf:metadata/opf:meta[@name="calibre:series" or '
|
||||
'@name="calibre:series_index"]')(root):
|
||||
remove_element(meta, refines)
|
||||
for meta in XPath('./opf:metadata/opf:meta[@property="belongs-to-collection"]')(root):
|
||||
for meta in XPath('./opf:metadata/opf:meta[@property="'
|
||||
'belongs-to-collection"]')(root):
|
||||
remove_element(meta, refines)
|
||||
if series:
|
||||
create_series(root, refines, series, '%.2g' % series_index)
|
||||
@@ -806,7 +850,8 @@ def dict_reader(name, load=json.loads, try2=True):
|
||||
except Exception:
|
||||
continue
|
||||
if try2:
|
||||
for meta in XPath('./opf:metadata/opf:meta[@name="calibre:%s"]' % name)(root):
|
||||
for meta in XPath('./opf:metadata/opf:meta[@name="calibre:%s"]' %
|
||||
name)(root):
|
||||
val = meta.get('content')
|
||||
if val:
|
||||
try:
|
||||
@@ -827,7 +872,8 @@ def dict_writer(name, serialize=dump_dict, remove2=True):
|
||||
|
||||
def writer(root, prefixes, refines, val):
|
||||
if remove2:
|
||||
for meta in XPath('./opf:metadata/opf:meta[@name="calibre:%s"]' % name)(root):
|
||||
for meta in XPath('./opf:metadata/opf:meta[@name="calibre:%s"]' %
|
||||
name)(root):
|
||||
remove_element(meta, refines)
|
||||
for meta in XPath('./opf:metadata/opf:meta[@property]')(root):
|
||||
prop = expand_prefix(meta.get('property'), prefixes)
|
||||
@@ -836,7 +882,8 @@ def dict_writer(name, serialize=dump_dict, remove2=True):
|
||||
if val:
|
||||
ensure_prefix(root, prefixes, 'calibre', CALIBRE_PREFIX)
|
||||
m = XPath('./opf:metadata')(root)[0]
|
||||
d = m.makeelement(const.OPF_META, attrib={'property':'calibre:%s' % name})
|
||||
d = m.makeelement(base.tag('opf', 'meta'),
|
||||
attrib={'property': 'calibre:%s' % name})
|
||||
d.text = serialize(val)
|
||||
m.append(d)
|
||||
return writer
|
||||
@@ -855,12 +902,14 @@ def deserialize_user_metadata(val):
|
||||
return ans
|
||||
|
||||
|
||||
read_user_metadata3 = dict_reader('user_metadata', load=deserialize_user_metadata, try2=False)
|
||||
read_user_metadata3 = dict_reader('user_metadata',
|
||||
load=deserialize_user_metadata, try2=False)
|
||||
|
||||
|
||||
def read_user_metadata2(root, remove_tags=False):
|
||||
ans = {}
|
||||
for meta in XPath('./opf:metadata/opf:meta[starts-with(@name, "calibre:user_metadata:")]')(root):
|
||||
for meta in XPath('./opf:metadata/opf:meta[starts-with(@name, '
|
||||
'"calibre:user_metadata:")]')(root):
|
||||
name = meta.get('name')
|
||||
name = ':'.join(name.split(':')[2:])
|
||||
if not name or not name.startswith('#'):
|
||||
@@ -881,18 +930,23 @@ def read_user_metadata2(root, remove_tags=False):
|
||||
|
||||
|
||||
def read_user_metadata(root, prefixes, refines):
|
||||
return read_user_metadata3(root, prefixes, refines) or read_user_metadata2(root)
|
||||
return read_user_metadata3(root, prefixes,
|
||||
refines) or read_user_metadata2(root)
|
||||
|
||||
|
||||
def serialize_user_metadata(val):
|
||||
return json.dumps(object_to_unicode(val), ensure_ascii=False, default=to_json, indent=2, sort_keys=True)
|
||||
return json.dumps(object_to_unicode(val), ensure_ascii=False,
|
||||
default=to_json, indent=2, sort_keys=True)
|
||||
|
||||
|
||||
set_user_metadata3 = dict_writer('user_metadata', serialize=serialize_user_metadata, remove2=False)
|
||||
set_user_metadata3 = dict_writer('user_metadata',
|
||||
serialize=serialize_user_metadata,
|
||||
remove2=False)
|
||||
|
||||
|
||||
def set_user_metadata(root, prefixes, refines, val):
|
||||
for meta in XPath('./opf:metadata/opf:meta[starts-with(@name, "calibre:user_metadata:")]')(root):
|
||||
for meta in XPath('./opf:metadata/opf:meta[starts-with(@name, '
|
||||
'"calibre:user_metadata:")]')(root):
|
||||
remove_element(meta, refines)
|
||||
if val:
|
||||
nval = {}
|
||||
@@ -921,26 +975,32 @@ def read_raster_cover(root, prefixes, refines):
|
||||
if href:
|
||||
return href
|
||||
|
||||
for item_id in XPath('./opf:metadata/opf:meta[@name="cover"]/@content')(root):
|
||||
for item in XPath('./opf:manifest/opf:item[@id and @href and @media-type]')(root):
|
||||
for item_id in XPath('./opf:metadata/opf:meta[@name="cover"]'
|
||||
'/@content')(root):
|
||||
for item in XPath('./opf:manifest/opf:item[@id and @href and '
|
||||
'@media-type]')(root):
|
||||
if item.get('id') == item_id:
|
||||
href = get_href(item)
|
||||
if href:
|
||||
return href
|
||||
|
||||
|
||||
def ensure_is_only_raster_cover(root, prefixes, refines, raster_cover_item_href):
|
||||
def ensure_is_only_raster_cover(root, prefixes, refines,
|
||||
raster_cover_item_href):
|
||||
for item in XPath('./opf:metadata/opf:meta[@name="cover"]')(root):
|
||||
remove_element(item, refines)
|
||||
for item in items_with_property(root, 'cover-image', prefixes):
|
||||
prop = normalize_whitespace(item.get('properties').replace('cover-image', ''))
|
||||
prop = normalize_whitespace(item.get('properties')
|
||||
.replace('cover-image', ''))
|
||||
if prop:
|
||||
item.set('properties', prop)
|
||||
else:
|
||||
del item.attrib['properties']
|
||||
for item in XPath('./opf:manifest/opf:item')(root):
|
||||
if item.get('href') == raster_cover_item_href:
|
||||
item.set('properties', normalize_whitespace((item.get('properties') or '') + ' cover-image'))
|
||||
item.set('properties',
|
||||
normalize_whitespace((item.get('properties')
|
||||
or '') + ' cover-image'))
|
||||
|
||||
# }}}
|
||||
|
||||
@@ -960,7 +1020,7 @@ def set_last_modified_in_opf(root):
|
||||
|
||||
|
||||
def read_metadata(root, ver=None, return_extra_data=False):
|
||||
ans = Metadata('Unknown', ['Unknown'])
|
||||
ans = base.Metadata('Unknown', ['Unknown'])
|
||||
prefixes, refines = read_prefixes(root), read_refines(root)
|
||||
identifiers = read_identifiers(root, prefixes, refines)
|
||||
ids = {}
|
||||
@@ -1000,12 +1060,16 @@ def read_metadata(root, ver=None, return_extra_data=False):
|
||||
s, si = read_series(root, prefixes, refines)
|
||||
if s:
|
||||
ans.series, ans.series_index = s, si
|
||||
ans.author_link_map = read_author_link_map(root, prefixes, refines) or ans.author_link_map
|
||||
ans.user_categories = read_user_categories(root, prefixes, refines) or ans.user_categories
|
||||
for name, fm in (read_user_metadata(root, prefixes, refines) or {}).items():
|
||||
ans.author_link_map = read_author_link_map(root, prefixes,
|
||||
refines) or ans.author_link_map
|
||||
ans.user_categories = read_user_categories(root, prefixes,
|
||||
refines) or ans.user_categories
|
||||
for name, fm in (read_user_metadata(root, prefixes,
|
||||
refines) or {}).items():
|
||||
ans.set_user_metadata(name, fm)
|
||||
if return_extra_data:
|
||||
ans = ans, ver, read_raster_cover(root, prefixes, refines), first_spine_item(root, prefixes, refines)
|
||||
ans = (ans, ver, read_raster_cover(root, prefixes, refines),
|
||||
first_spine_item(root, prefixes, refines))
|
||||
return ans
|
||||
|
||||
|
||||
@@ -1014,7 +1078,9 @@ def get_metadata(stream):
|
||||
return read_metadata(root)
|
||||
|
||||
|
||||
def apply_metadata(root, mi, cover_prefix='', cover_data=None, apply_null=False, update_timestamp=False, force_identifiers=False, add_missing_cover=True):
|
||||
def apply_metadata(root, mi, cover_prefix='', cover_data=None,
|
||||
apply_null=False, update_timestamp=False,
|
||||
force_identifiers=False, add_missing_cover=True):
|
||||
prefixes, refines = read_prefixes(root), read_refines(root)
|
||||
current_mi = read_metadata(root)
|
||||
if apply_null:
|
||||
@@ -1024,7 +1090,8 @@ def apply_metadata(root, mi, cover_prefix='', cover_data=None, apply_null=False,
|
||||
def ok(x):
|
||||
return not mi.is_null(x)
|
||||
if ok('identifiers'):
|
||||
set_identifiers(root, prefixes, refines, mi.identifiers, force_identifiers=force_identifiers)
|
||||
set_identifiers(root, prefixes, refines, mi.identifiers,
|
||||
force_identifiers=force_identifiers)
|
||||
if ok('title'):
|
||||
set_title(root, prefixes, refines, mi.title, mi.title_sort)
|
||||
if ok('languages'):
|
||||
@@ -1052,16 +1119,21 @@ def apply_metadata(root, mi, cover_prefix='', cover_data=None, apply_null=False,
|
||||
if ok('series'):
|
||||
set_series(root, prefixes, refines, mi.series, mi.series_index or 1)
|
||||
if ok('author_link_map'):
|
||||
set_author_link_map(root, prefixes, refines, getattr(mi, 'author_link_map', None))
|
||||
set_author_link_map(root, prefixes, refines,
|
||||
getattr(mi, 'author_link_map', None))
|
||||
if ok('user_categories'):
|
||||
set_user_categories(root, prefixes, refines, getattr(mi, 'user_categories', None))
|
||||
set_user_categories(root, prefixes, refines,
|
||||
getattr(mi, 'user_categories', None))
|
||||
# We ignore apply_null for the next two to match the behavior with opf2.py
|
||||
if mi.application_id:
|
||||
set_application_id(root, prefixes, refines, mi.application_id)
|
||||
if mi.uuid:
|
||||
set_uuid(root, prefixes, refines, mi.uuid)
|
||||
new_user_metadata, current_user_metadata = mi.get_all_user_metadata(True), current_mi.get_all_user_metadata(True)
|
||||
|
||||
new_user_metadata = mi.get_all_user_metadata(True)
|
||||
current_user_metadata = current_mi.get_all_user_metadata(True)
|
||||
missing = object()
|
||||
|
||||
for key in tuple(new_user_metadata):
|
||||
meta = new_user_metadata.get(key)
|
||||
if meta is None:
|
||||
@@ -1098,7 +1170,9 @@ def apply_metadata(root, mi, cover_prefix='', cover_data=None, apply_null=False,
|
||||
return raster_cover
|
||||
|
||||
|
||||
def set_metadata(stream, mi, cover_prefix='', cover_data=None, apply_null=False, update_timestamp=False, force_identifiers=False, add_missing_cover=True):
|
||||
def set_metadata(stream, mi, cover_prefix='', cover_data=None,
|
||||
apply_null=False, update_timestamp=False,
|
||||
force_identifiers=False, add_missing_cover=True):
|
||||
root = parse_opf(stream)
|
||||
return apply_metadata(
|
||||
root, mi, cover_prefix=cover_prefix, cover_data=cover_data,
|
||||
|
||||
Reference in New Issue
Block a user