mirror of
https://github.com/gryf/ebook-converter.git
synced 2026-02-15 21:45:52 +01:00
75 lines
2.6 KiB
Python
75 lines
2.6 KiB
Python
import urllib.parse
|
|
|
|
from ebook_converter.ebooks.oeb import base
|
|
from ebook_converter.ebooks.oeb.base import XPath, xml2text, urlnormalize
|
|
|
|
|
|
JACKET_XPATH = '//h:meta[@name="calibre-content" and @content="jacket"]'
|
|
|
|
|
|
class RemoveFirstImage:
|
|
|
|
def remove_images(self, item, limit=1):
|
|
path = XPath('//h:img[@src]')
|
|
removed = 0
|
|
for img in path(item.data):
|
|
if removed >= limit:
|
|
break
|
|
href = item.abshref(img.get('src'))
|
|
image = self.oeb.manifest.hrefs.get(href)
|
|
if image is None:
|
|
href = urlnormalize(href)
|
|
image = self.oeb.manifest.hrefs.get(href)
|
|
if image is not None:
|
|
self.oeb.manifest.remove(image)
|
|
self.oeb.guide.remove_by_href(href)
|
|
img.getparent().remove(img)
|
|
removed += 1
|
|
return removed
|
|
|
|
def remove_first_image(self):
|
|
deleted_item = None
|
|
for item in self.oeb.spine:
|
|
if XPath(JACKET_XPATH)(item.data):
|
|
continue
|
|
removed = self.remove_images(item)
|
|
if removed > 0:
|
|
self.log.info('Removed first image')
|
|
body = XPath('//h:body')(item.data)
|
|
if body:
|
|
raw = xml2text(body[0]).strip()
|
|
imgs = XPath('//h:img|//svg:svg')(item.data)
|
|
if not raw and not imgs:
|
|
self.log.info('Removing %s as it has no content',
|
|
item.href)
|
|
self.oeb.manifest.remove(item)
|
|
deleted_item = item
|
|
break
|
|
else:
|
|
self.log.warning('Could not find first image to remove')
|
|
if deleted_item is not None:
|
|
for item in list(self.oeb.toc):
|
|
href = urllib.parse.urldefrag(item.href)[0]
|
|
if href == deleted_item.href:
|
|
self.oeb.toc.remove(item)
|
|
self.oeb.guide.remove_by_href(deleted_item.href)
|
|
|
|
def __call__(self, oeb, opts, metadata):
|
|
"""
|
|
Add metadata in jacket.xhtml if specified in opts
|
|
If not specified, remove previous jacket instance
|
|
"""
|
|
self.oeb, self.opts, self.log = oeb, opts, oeb.log
|
|
if opts.remove_first_image:
|
|
self.remove_first_image()
|
|
|
|
|
|
def linearize_jacket(oeb):
|
|
for x in oeb.spine[:4]:
|
|
if XPath(JACKET_XPATH)(x.data):
|
|
for e in XPath('//h:table|//h:tr|//h:th')(x.data):
|
|
e.tag = base.tag('xhtml', 'div')
|
|
for e in XPath('//h:td')(x.data):
|
|
e.tag = base.tag('xhtml', 'span')
|
|
break
|