1
0
mirror of https://github.com/gryf/ebook-converter.git synced 2026-03-05 08:25:46 +01:00
Files
ebook-converter/ebook_converter/ebooks/oeb/transforms/data_url.py
gryf 1465e4267f Sorted out mime initialization.
Every mime related function in main __init__.py has a flag check for the
check if initialization has already done. This is nonsense, since it
should be done implicitly early on the converter is starting.

This commit straight the things out, and initialization is done in cli
module.

Also, function guess_type was removed, since it's just a proxy for
mimetypes.guess_type function.
2020-06-14 15:41:18 +02:00

52 lines
2.0 KiB
Python

import mimetypes
import re
from ebook_converter.ebooks.oeb.base import XPath, urlunquote
from ebook_converter.polyglot.builtins import as_bytes
__license__ = 'GPL v3'
__copyright__ = '2014, Kovid Goyal <kovid at kovidgoyal.net>'
class DataURL(object):
def __call__(self, oeb, opts):
from ebook_converter.utils.imghdr import what
self.log = oeb.log
attr_path = XPath('//h:img[@src]')
for item in oeb.spine:
root = item.data
if not hasattr(root, 'xpath'):
continue
for img in attr_path(root):
raw = img.get('src', '')
if not raw.startswith('data:'):
continue
header, data = raw.partition(',')[0::2]
if not header.startswith('data:image/') or not data:
continue
if ';base64' in header:
data = re.sub(r'\s+', '', data)
from ebook_converter.polyglot.binary import from_base64_bytes
try:
data = from_base64_bytes(data)
except Exception:
self.log.error('Found invalid base64 encoded data URI, ignoring it')
continue
else:
data = urlunquote(data)
data = as_bytes(data)
fmt = what(None, data)
if not fmt:
self.log.warn('Image encoded as data URL has unknown format, ignoring')
continue
img.set('src', item.relhref(self.convert_image_data_uri(data, fmt, oeb)))
def convert_image_data_uri(self, data, fmt, oeb):
self.log('Found image encoded as data URI converting it to normal image')
from ebook_converter import guess_type
item_id, item_href = oeb.manifest.generate('data-url-image', 'data-url-image.' + fmt)
oeb.manifest.add(item_id, item_href,
mimetypes.guess_type(item_href)[0], data=data)
return item_href