1
0
mirror of https://github.com/gryf/ebook-converter.git synced 2026-01-24 14:05:46 +01:00
Files
ebook-converter/ebook_converter/ebooks/metadata/utils.py
gryf ce89f5c9d1 Use the real constants module.
This is progressing refactor of the calibre code to make it more
readable, and transform it to something more coherent.

In this patch, there are changes regarding imports for some modules,
instead of polluting namespace of each module with some other modules
symbols, which often were imported from other modules. Yuck.
2020-05-29 17:04:53 +02:00

104 lines
3.0 KiB
Python

from collections import namedtuple
from lxml import etree
from ebook_converter.ebooks.chardet import xml_to_unicode
from ebook_converter.ebooks.oeb import base
from ebook_converter.ebooks.oeb.polish.utils import guess_type
from ebook_converter.spell import parse_lang_code
from ebook_converter.utils.cleantext import clean_xml_chars
from ebook_converter.utils.localization import lang_as_iso639_1
OPFVersion = namedtuple('OPFVersion', 'major minor patch')
def parse_opf_version(raw):
parts = (raw or '').split('.')
try:
major = int(parts[0])
except Exception:
return OPFVersion(2, 0, 0)
try:
v = list(map(int, raw.split('.')))
except Exception:
v = [major, 0, 0]
while len(v) < 3:
v.append(0)
v = v[:3]
return OPFVersion(*v)
def parse_opf(stream_or_path):
stream = stream_or_path
if not hasattr(stream, 'read'):
stream = open(stream, 'rb')
raw = stream.read()
if not raw:
raise ValueError('Empty file: '+getattr(stream, 'name', 'stream'))
raw, encoding = xml_to_unicode(raw, strip_encoding_pats=True,
resolve_entities=True, assume_utf8=True)
raw = raw[raw.find('<'):]
root = etree.fromstring(clean_xml_chars(raw))
if root is None:
raise ValueError('Not an OPF file')
return root
def normalize_languages(opf_languages, mi_languages):
"""
Preserve original country codes and use 2-letter lang codes where possible
"""
def parse(x):
try:
return parse_lang_code(x)
except ValueError:
return None
opf_languages = filter(None, map(parse, opf_languages))
cc_map = {c.langcode: c.countrycode for c in opf_languages}
mi_languages = filter(None, map(parse, mi_languages))
def norm(x):
lc = x.langcode
cc = x.countrycode or cc_map.get(lc, None)
lc = lang_as_iso639_1(lc) or lc
if cc:
lc += '-' + cc
return lc
return list(map(norm, mi_languages))
def ensure_unique(template, existing):
b, e = template.rpartition('.')[::2]
if b and e:
e = '.' + e
else:
b, e = template, ''
q = template
c = 0
while q in existing:
c += 1
q = '%s-%d%s' % (b, c, e)
return q
def create_manifest_item(root, href_template, id_template, media_type=None):
all_ids = frozenset(root.xpath('//*/@id'))
all_hrefs = frozenset(root.xpath('//*/@href'))
href = ensure_unique(href_template, all_hrefs)
item_id = ensure_unique(id_template, all_ids)
manifest = root.find(base.tag('opf', 'manifest'))
if manifest is not None:
i = manifest.makeelement(base.tag('opf', 'item'))
i.set('href', href), i.set('id', item_id)
i.set('media-type', media_type or guess_type(href_template))
manifest.append(i)
return i
def pretty_print_opf(root):
from ebook_converter.ebooks.oeb.polish.pretty import pretty_opf, \
pretty_xml_tree
pretty_opf(root)
pretty_xml_tree(root)