mirror of
https://github.com/gryf/ebook-converter.git
synced 2026-02-17 23:05:45 +01:00
Here is the first batch of modules, which are needed for converting several formats to LRF. Some of the logic has been change, more cleanups will follow.
219 lines
8.5 KiB
Python
219 lines
8.5 KiB
Python
#!/usr/bin/env python2
|
|
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
|
|
from __future__ import absolute_import, division, print_function, unicode_literals
|
|
|
|
__license__ = 'GPL v3'
|
|
__copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>'
|
|
__docformat__ = 'restructuredtext en'
|
|
|
|
import os, re
|
|
from ebook_converter.utils.date import isoformat, now
|
|
from ebook_converter import guess_type
|
|
from ebook_converter.polyglot.builtins import iteritems
|
|
|
|
|
|
def meta_info_to_oeb_metadata(mi, m, log, override_input_metadata=False):
|
|
from ebook_converter.ebooks.oeb.base import OPF
|
|
if not mi.is_null('title'):
|
|
m.clear('title')
|
|
m.add('title', mi.title)
|
|
if mi.title_sort:
|
|
if not m.title:
|
|
m.add('title', mi.title_sort)
|
|
m.clear('title_sort')
|
|
m.add('title_sort', mi.title_sort)
|
|
if not mi.is_null('authors'):
|
|
m.filter('creator', lambda x : x.role.lower() in ['aut', ''])
|
|
for a in mi.authors:
|
|
attrib = {'role':'aut'}
|
|
if mi.author_sort:
|
|
attrib[OPF('file-as')] = mi.author_sort
|
|
m.add('creator', a, attrib=attrib)
|
|
if not mi.is_null('book_producer'):
|
|
m.filter('contributor', lambda x : x.role.lower() == 'bkp')
|
|
m.add('contributor', mi.book_producer, role='bkp')
|
|
elif override_input_metadata:
|
|
m.filter('contributor', lambda x : x.role.lower() == 'bkp')
|
|
if not mi.is_null('comments'):
|
|
m.clear('description')
|
|
m.add('description', mi.comments)
|
|
elif override_input_metadata:
|
|
m.clear('description')
|
|
if not mi.is_null('publisher'):
|
|
m.clear('publisher')
|
|
m.add('publisher', mi.publisher)
|
|
elif override_input_metadata:
|
|
m.clear('publisher')
|
|
if not mi.is_null('series'):
|
|
m.clear('series')
|
|
m.add('series', mi.series)
|
|
elif override_input_metadata:
|
|
m.clear('series')
|
|
identifiers = mi.get_identifiers()
|
|
set_isbn = False
|
|
for typ, val in iteritems(identifiers):
|
|
has = False
|
|
if typ.lower() == 'isbn':
|
|
set_isbn = True
|
|
for x in m.identifier:
|
|
if x.scheme.lower() == typ.lower():
|
|
x.content = val
|
|
has = True
|
|
if not has:
|
|
m.add('identifier', val, scheme=typ.upper())
|
|
if override_input_metadata and not set_isbn:
|
|
m.filter('identifier', lambda x: x.scheme.lower() == 'isbn')
|
|
if not mi.is_null('languages'):
|
|
m.clear('language')
|
|
for lang in mi.languages:
|
|
if lang and lang.lower() not in ('und', ''):
|
|
m.add('language', lang)
|
|
if not mi.is_null('series_index'):
|
|
m.clear('series_index')
|
|
m.add('series_index', mi.format_series_index())
|
|
elif override_input_metadata:
|
|
m.clear('series_index')
|
|
if not mi.is_null('rating'):
|
|
m.clear('rating')
|
|
m.add('rating', '%.2f'%mi.rating)
|
|
elif override_input_metadata:
|
|
m.clear('rating')
|
|
if not mi.is_null('tags'):
|
|
m.clear('subject')
|
|
for t in mi.tags:
|
|
m.add('subject', t)
|
|
elif override_input_metadata:
|
|
m.clear('subject')
|
|
if not mi.is_null('pubdate'):
|
|
m.clear('date')
|
|
m.add('date', isoformat(mi.pubdate))
|
|
if not mi.is_null('timestamp'):
|
|
m.clear('timestamp')
|
|
m.add('timestamp', isoformat(mi.timestamp))
|
|
if not mi.is_null('rights'):
|
|
m.clear('rights')
|
|
m.add('rights', mi.rights)
|
|
if not mi.is_null('publication_type'):
|
|
m.clear('publication_type')
|
|
m.add('publication_type', mi.publication_type)
|
|
|
|
if not m.timestamp:
|
|
m.add('timestamp', isoformat(now()))
|
|
|
|
|
|
class MergeMetadata(object):
|
|
'Merge in user metadata, including cover'
|
|
|
|
def __call__(self, oeb, mi, opts, override_input_metadata=False):
|
|
self.oeb, self.log = oeb, oeb.log
|
|
m = self.oeb.metadata
|
|
self.log('Merging user specified metadata...')
|
|
meta_info_to_oeb_metadata(mi, m, oeb.log,
|
|
override_input_metadata=override_input_metadata)
|
|
cover_id = self.set_cover(mi, opts.prefer_metadata_cover)
|
|
m.clear('cover')
|
|
if cover_id is not None:
|
|
m.add('cover', cover_id)
|
|
if mi.uuid is not None:
|
|
m.filter('identifier', lambda x:x.id=='uuid_id')
|
|
self.oeb.metadata.add('identifier', mi.uuid, id='uuid_id',
|
|
scheme='uuid')
|
|
self.oeb.uid = self.oeb.metadata.identifier[-1]
|
|
if mi.application_id is not None:
|
|
m.filter('identifier', lambda x:x.scheme=='calibre')
|
|
self.oeb.metadata.add('identifier', mi.application_id, scheme='calibre')
|
|
|
|
def set_cover(self, mi, prefer_metadata_cover):
|
|
cdata, ext = b'', 'jpg'
|
|
if mi.cover and os.access(mi.cover, os.R_OK):
|
|
with open(mi.cover, 'rb') as f:
|
|
cdata = f.read()
|
|
ext = mi.cover.rpartition('.')[-1].lower().strip()
|
|
elif mi.cover_data and mi.cover_data[-1]:
|
|
cdata = mi.cover_data[1]
|
|
ext = mi.cover_data[0]
|
|
if ext not in ('png', 'jpg', 'jpeg'):
|
|
ext = 'jpg'
|
|
id = old_cover = None
|
|
if 'cover' in self.oeb.guide:
|
|
old_cover = self.oeb.guide['cover']
|
|
if prefer_metadata_cover and old_cover is not None:
|
|
cdata = b''
|
|
if cdata:
|
|
self.oeb.guide.remove('cover')
|
|
self.oeb.guide.remove('titlepage')
|
|
elif self.oeb.plumber_output_format in {'mobi', 'azw3'} and old_cover is not None:
|
|
# The amazon formats dont support html cover pages, so remove them
|
|
# even if no cover was specified.
|
|
self.oeb.guide.remove('titlepage')
|
|
do_remove_old_cover = False
|
|
if old_cover is not None:
|
|
if old_cover.href in self.oeb.manifest.hrefs:
|
|
item = self.oeb.manifest.hrefs[old_cover.href]
|
|
if not cdata:
|
|
return item.id
|
|
do_remove_old_cover = True
|
|
elif not cdata:
|
|
id = self.oeb.manifest.generate(id='cover')[0]
|
|
self.oeb.manifest.add(id, old_cover.href, 'image/jpeg')
|
|
return id
|
|
new_cover_item = None
|
|
if cdata:
|
|
id, href = self.oeb.manifest.generate('cover', 'cover.'+ext)
|
|
new_cover_item = self.oeb.manifest.add(id, href, guess_type('cover.'+ext)[0], data=cdata)
|
|
self.oeb.guide.add('cover', 'Cover', href)
|
|
if do_remove_old_cover:
|
|
self.remove_old_cover(item, new_cover_item.href)
|
|
return id
|
|
|
|
def remove_old_cover(self, cover_item, new_cover_href=None):
|
|
from ebook_converter.ebooks.oeb.base import XPath, XLINK
|
|
from lxml import etree
|
|
|
|
self.oeb.manifest.remove(cover_item)
|
|
|
|
# Remove any references to the cover in the HTML
|
|
affected_items = set()
|
|
xp = XPath('//h:img[@src]|//svg:image[@xl:href]')
|
|
for i, item in enumerate(self.oeb.spine):
|
|
try:
|
|
images = xp(item.data)
|
|
except Exception:
|
|
images = ()
|
|
removed = False
|
|
for img in images:
|
|
href = img.get('src') or img.get(XLINK('href'))
|
|
try:
|
|
href = item.abshref(href)
|
|
except Exception:
|
|
continue # Invalid URL, ignore
|
|
if href == cover_item.href:
|
|
if new_cover_href is not None:
|
|
replacement_href = item.relhref(new_cover_href)
|
|
attr = 'src' if img.tag.endswith('img') else XLINK('href')
|
|
img.set(attr, replacement_href)
|
|
else:
|
|
p = img.getparent()
|
|
if p.tag.endswith('}svg'):
|
|
p.getparent().remove(p)
|
|
else:
|
|
p.remove(img)
|
|
removed = True
|
|
if removed:
|
|
affected_items.add(item)
|
|
|
|
# Check if the resulting HTML has no content, if so remove it
|
|
for item in affected_items:
|
|
body = XPath('//h:body')(item.data)
|
|
if body:
|
|
text = etree.tostring(body[0], method='text', encoding='unicode')
|
|
else:
|
|
text = ''
|
|
text = re.sub(r'\s+', '', text)
|
|
if not text and not XPath('//h:img|//svg:svg')(item.data):
|
|
self.log('Removing %s as it is a wrapper around'
|
|
' the cover image'%item.href)
|
|
self.oeb.spine.remove(item)
|
|
self.oeb.manifest.remove(item)
|
|
self.oeb.guide.remove_by_href(item.href)
|