mirror of
https://github.com/gryf/ebook-converter.git
synced 2026-01-08 12:44:12 +01:00
216 lines
8.4 KiB
Python
216 lines
8.4 KiB
Python
import os, re
|
|
from ebook_converter.utils.date import isoformat, now
|
|
from ebook_converter import guess_type
|
|
from ebook_converter.polyglot.builtins import iteritems
|
|
|
|
|
|
__license__ = 'GPL v3'
|
|
__copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>'
|
|
__docformat__ = 'restructuredtext en'
|
|
|
|
|
|
def meta_info_to_oeb_metadata(mi, m, log, override_input_metadata=False):
|
|
from ebook_converter.ebooks.oeb.base import OPF
|
|
if not mi.is_null('title'):
|
|
m.clear('title')
|
|
m.add('title', mi.title)
|
|
if mi.title_sort:
|
|
if not m.title:
|
|
m.add('title', mi.title_sort)
|
|
m.clear('title_sort')
|
|
m.add('title_sort', mi.title_sort)
|
|
if not mi.is_null('authors'):
|
|
m.filter('creator', lambda x : x.role.lower() in ['aut', ''])
|
|
for a in mi.authors:
|
|
attrib = {'role':'aut'}
|
|
if mi.author_sort:
|
|
attrib[OPF('file-as')] = mi.author_sort
|
|
m.add('creator', a, attrib=attrib)
|
|
if not mi.is_null('book_producer'):
|
|
m.filter('contributor', lambda x : x.role.lower() == 'bkp')
|
|
m.add('contributor', mi.book_producer, role='bkp')
|
|
elif override_input_metadata:
|
|
m.filter('contributor', lambda x : x.role.lower() == 'bkp')
|
|
if not mi.is_null('comments'):
|
|
m.clear('description')
|
|
m.add('description', mi.comments)
|
|
elif override_input_metadata:
|
|
m.clear('description')
|
|
if not mi.is_null('publisher'):
|
|
m.clear('publisher')
|
|
m.add('publisher', mi.publisher)
|
|
elif override_input_metadata:
|
|
m.clear('publisher')
|
|
if not mi.is_null('series'):
|
|
m.clear('series')
|
|
m.add('series', mi.series)
|
|
elif override_input_metadata:
|
|
m.clear('series')
|
|
identifiers = mi.get_identifiers()
|
|
set_isbn = False
|
|
for typ, val in iteritems(identifiers):
|
|
has = False
|
|
if typ.lower() == 'isbn':
|
|
set_isbn = True
|
|
for x in m.identifier:
|
|
if x.scheme.lower() == typ.lower():
|
|
x.content = val
|
|
has = True
|
|
if not has:
|
|
m.add('identifier', val, scheme=typ.upper())
|
|
if override_input_metadata and not set_isbn:
|
|
m.filter('identifier', lambda x: x.scheme.lower() == 'isbn')
|
|
if not mi.is_null('languages'):
|
|
m.clear('language')
|
|
for lang in mi.languages:
|
|
if lang and lang.lower() not in ('und', ''):
|
|
m.add('language', lang)
|
|
if not mi.is_null('series_index'):
|
|
m.clear('series_index')
|
|
m.add('series_index', mi.format_series_index())
|
|
elif override_input_metadata:
|
|
m.clear('series_index')
|
|
if not mi.is_null('rating'):
|
|
m.clear('rating')
|
|
m.add('rating', '%.2f'%mi.rating)
|
|
elif override_input_metadata:
|
|
m.clear('rating')
|
|
if not mi.is_null('tags'):
|
|
m.clear('subject')
|
|
for t in mi.tags:
|
|
m.add('subject', t)
|
|
elif override_input_metadata:
|
|
m.clear('subject')
|
|
if not mi.is_null('pubdate'):
|
|
m.clear('date')
|
|
m.add('date', isoformat(mi.pubdate))
|
|
if not mi.is_null('timestamp'):
|
|
m.clear('timestamp')
|
|
m.add('timestamp', isoformat(mi.timestamp))
|
|
if not mi.is_null('rights'):
|
|
m.clear('rights')
|
|
m.add('rights', mi.rights)
|
|
if not mi.is_null('publication_type'):
|
|
m.clear('publication_type')
|
|
m.add('publication_type', mi.publication_type)
|
|
|
|
if not m.timestamp:
|
|
m.add('timestamp', isoformat(now()))
|
|
|
|
|
|
class MergeMetadata(object):
|
|
'Merge in user metadata, including cover'
|
|
|
|
def __call__(self, oeb, mi, opts, override_input_metadata=False):
|
|
self.oeb, self.log = oeb, oeb.log
|
|
m = self.oeb.metadata
|
|
self.log('Merging user specified metadata...')
|
|
meta_info_to_oeb_metadata(mi, m, oeb.log,
|
|
override_input_metadata=override_input_metadata)
|
|
cover_id = self.set_cover(mi, opts.prefer_metadata_cover)
|
|
m.clear('cover')
|
|
if cover_id is not None:
|
|
m.add('cover', cover_id)
|
|
if mi.uuid is not None:
|
|
m.filter('identifier', lambda x:x.id=='uuid_id')
|
|
self.oeb.metadata.add('identifier', mi.uuid, id='uuid_id',
|
|
scheme='uuid')
|
|
self.oeb.uid = self.oeb.metadata.identifier[-1]
|
|
if mi.application_id is not None:
|
|
m.filter('identifier', lambda x:x.scheme=='calibre')
|
|
self.oeb.metadata.add('identifier', mi.application_id, scheme='calibre')
|
|
|
|
def set_cover(self, mi, prefer_metadata_cover):
|
|
cdata, ext = b'', 'jpg'
|
|
if mi.cover and os.access(mi.cover, os.R_OK):
|
|
with open(mi.cover, 'rb') as f:
|
|
cdata = f.read()
|
|
ext = mi.cover.rpartition('.')[-1].lower().strip()
|
|
elif mi.cover_data and mi.cover_data[-1]:
|
|
cdata = mi.cover_data[1]
|
|
ext = mi.cover_data[0]
|
|
if ext not in ('png', 'jpg', 'jpeg'):
|
|
ext = 'jpg'
|
|
id = old_cover = None
|
|
if 'cover' in self.oeb.guide:
|
|
old_cover = self.oeb.guide['cover']
|
|
if prefer_metadata_cover and old_cover is not None:
|
|
cdata = b''
|
|
if cdata:
|
|
self.oeb.guide.remove('cover')
|
|
self.oeb.guide.remove('titlepage')
|
|
elif self.oeb.plumber_output_format in {'mobi', 'azw3'} and old_cover is not None:
|
|
# The amazon formats dont support html cover pages, so remove them
|
|
# even if no cover was specified.
|
|
self.oeb.guide.remove('titlepage')
|
|
do_remove_old_cover = False
|
|
if old_cover is not None:
|
|
if old_cover.href in self.oeb.manifest.hrefs:
|
|
item = self.oeb.manifest.hrefs[old_cover.href]
|
|
if not cdata:
|
|
return item.id
|
|
do_remove_old_cover = True
|
|
elif not cdata:
|
|
id = self.oeb.manifest.generate(id='cover')[0]
|
|
self.oeb.manifest.add(id, old_cover.href, 'image/jpeg')
|
|
return id
|
|
new_cover_item = None
|
|
if cdata:
|
|
id, href = self.oeb.manifest.generate('cover', 'cover.'+ext)
|
|
new_cover_item = self.oeb.manifest.add(id, href, guess_type('cover.'+ext)[0], data=cdata)
|
|
self.oeb.guide.add('cover', 'Cover', href)
|
|
if do_remove_old_cover:
|
|
self.remove_old_cover(item, new_cover_item.href)
|
|
return id
|
|
|
|
def remove_old_cover(self, cover_item, new_cover_href=None):
|
|
from ebook_converter.ebooks.oeb.base import XPath, XLINK
|
|
from lxml import etree
|
|
|
|
self.oeb.manifest.remove(cover_item)
|
|
|
|
# Remove any references to the cover in the HTML
|
|
affected_items = set()
|
|
xp = XPath('//h:img[@src]|//svg:image[@xl:href]')
|
|
for i, item in enumerate(self.oeb.spine):
|
|
try:
|
|
images = xp(item.data)
|
|
except Exception:
|
|
images = ()
|
|
removed = False
|
|
for img in images:
|
|
href = img.get('src') or img.get(XLINK('href'))
|
|
try:
|
|
href = item.abshref(href)
|
|
except Exception:
|
|
continue # Invalid URL, ignore
|
|
if href == cover_item.href:
|
|
if new_cover_href is not None:
|
|
replacement_href = item.relhref(new_cover_href)
|
|
attr = 'src' if img.tag.endswith('img') else XLINK('href')
|
|
img.set(attr, replacement_href)
|
|
else:
|
|
p = img.getparent()
|
|
if p.tag.endswith('}svg'):
|
|
p.getparent().remove(p)
|
|
else:
|
|
p.remove(img)
|
|
removed = True
|
|
if removed:
|
|
affected_items.add(item)
|
|
|
|
# Check if the resulting HTML has no content, if so remove it
|
|
for item in affected_items:
|
|
body = XPath('//h:body')(item.data)
|
|
if body:
|
|
text = etree.tostring(body[0], method='text', encoding='unicode')
|
|
else:
|
|
text = ''
|
|
text = re.sub(r'\s+', '', text)
|
|
if not text and not XPath('//h:img|//svg:svg')(item.data):
|
|
self.log('Removing %s as it is a wrapper around'
|
|
' the cover image'%item.href)
|
|
self.oeb.spine.remove(item)
|
|
self.oeb.manifest.remove(item)
|
|
self.oeb.guide.remove_by_href(item.href)
|