mirror of
https://github.com/gryf/ebook-converter.git
synced 2026-01-07 04:04:12 +01:00
50 lines
2.0 KiB
Python
50 lines
2.0 KiB
Python
import re
|
|
from ebook_converter.ebooks.oeb.base import XPath, urlunquote
|
|
from ebook_converter.polyglot.builtins import as_bytes
|
|
|
|
|
|
__license__ = 'GPL v3'
|
|
__copyright__ = '2014, Kovid Goyal <kovid at kovidgoyal.net>'
|
|
|
|
|
|
class DataURL(object):
|
|
|
|
def __call__(self, oeb, opts):
|
|
from ebook_converter.utils.imghdr import what
|
|
self.log = oeb.log
|
|
attr_path = XPath('//h:img[@src]')
|
|
for item in oeb.spine:
|
|
root = item.data
|
|
if not hasattr(root, 'xpath'):
|
|
continue
|
|
for img in attr_path(root):
|
|
raw = img.get('src', '')
|
|
if not raw.startswith('data:'):
|
|
continue
|
|
header, data = raw.partition(',')[0::2]
|
|
if not header.startswith('data:image/') or not data:
|
|
continue
|
|
if ';base64' in header:
|
|
data = re.sub(r'\s+', '', data)
|
|
from ebook_converter.polyglot.binary import from_base64_bytes
|
|
try:
|
|
data = from_base64_bytes(data)
|
|
except Exception:
|
|
self.log.error('Found invalid base64 encoded data URI, ignoring it')
|
|
continue
|
|
else:
|
|
data = urlunquote(data)
|
|
data = as_bytes(data)
|
|
fmt = what(None, data)
|
|
if not fmt:
|
|
self.log.warn('Image encoded as data URL has unknown format, ignoring')
|
|
continue
|
|
img.set('src', item.relhref(self.convert_image_data_uri(data, fmt, oeb)))
|
|
|
|
def convert_image_data_uri(self, data, fmt, oeb):
|
|
self.log('Found image encoded as data URI converting it to normal image')
|
|
from ebook_converter import guess_type
|
|
item_id, item_href = oeb.manifest.generate('data-url-image', 'data-url-image.' + fmt)
|
|
oeb.manifest.add(item_id, item_href, guess_type(item_href)[0], data=data)
|
|
return item_href
|