mirror of
https://github.com/gryf/ebook-converter.git
synced 2025-12-29 04:52:26 +01:00
Here is the first batch of modules, which are needed for converting several formats to LRF. Some of the logic has been change, more cleanups will follow.
264 lines
6.4 KiB
Python
264 lines
6.4 KiB
Python
#!/usr/bin/env python2
|
|
# vim:fileencoding=utf-8
|
|
# License: GPLv3 Copyright: 2016, Kovid Goyal <kovid at kovidgoyal.net>
|
|
from __future__ import absolute_import, division, print_function, unicode_literals
|
|
|
|
from struct import unpack, error
|
|
import os
|
|
from ebook_converter.utils.speedups import ReadOnlyFileBuffer
|
|
from ebook_converter.constants import ispy3
|
|
from ebook_converter.polyglot.builtins import string_or_bytes, unicode_type
|
|
|
|
""" Recognize image file formats and sizes based on their first few bytes."""
|
|
|
|
HSIZE = 120
|
|
|
|
|
|
def what(file, h=None):
|
|
' Recognize image headers '
|
|
if h is None:
|
|
if isinstance(file, string_or_bytes):
|
|
with lopen(file, 'rb') as f:
|
|
h = f.read(HSIZE)
|
|
else:
|
|
location = file.tell()
|
|
h = file.read(HSIZE)
|
|
file.seek(location)
|
|
if isinstance(h, bytes):
|
|
h = memoryview(h)
|
|
for tf in tests:
|
|
res = tf(h)
|
|
if res:
|
|
return res
|
|
# There exist some jpeg files with no headers, only the starting two bits
|
|
# If we cannot identify as anything else, identify as jpeg.
|
|
if h[:2] == b'\xff\xd8':
|
|
return 'jpeg'
|
|
return None
|
|
|
|
|
|
def identify(src):
|
|
''' Recognize file format and sizes. Returns format, width, height. width
|
|
and height will be -1 if not found and fmt will be None if the image is not
|
|
recognized. '''
|
|
width = height = -1
|
|
|
|
if isinstance(src, unicode_type):
|
|
stream = lopen(src, 'rb')
|
|
elif isinstance(src, bytes):
|
|
stream = ReadOnlyFileBuffer(src)
|
|
else:
|
|
stream = src
|
|
|
|
pos = stream.tell()
|
|
head = stream.read(HSIZE)
|
|
stream.seek(pos)
|
|
fmt = what(None, head)
|
|
|
|
if fmt in {'jpeg', 'gif', 'png', 'jpeg2000'}:
|
|
size = len(head)
|
|
if fmt == 'png':
|
|
# PNG
|
|
s = head[16:24] if size >= 24 and head[12:16] == b'IHDR' else head[8:16]
|
|
try:
|
|
width, height = unpack(b">LL", s)
|
|
except error:
|
|
return fmt, width, height
|
|
elif fmt == 'jpeg':
|
|
# JPEG
|
|
pos = stream.tell()
|
|
try:
|
|
height, width = jpeg_dimensions(stream)
|
|
except Exception:
|
|
return fmt, width, height
|
|
finally:
|
|
stream.seek(pos)
|
|
elif fmt == 'gif':
|
|
# GIF
|
|
try:
|
|
width, height = unpack(b"<HH", head[6:10])
|
|
except error:
|
|
return fmt, width, height
|
|
elif size >= 56 and fmt == 'jpeg2000':
|
|
# JPEG2000
|
|
try:
|
|
height, width = unpack(b'>LL', head[48:56])
|
|
except error:
|
|
return fmt, width, height
|
|
return fmt, width, height
|
|
|
|
# ---------------------------------#
|
|
# Subroutines per image file type #
|
|
# ---------------------------------#
|
|
|
|
|
|
tests = []
|
|
|
|
|
|
def test(f):
|
|
tests.append(f)
|
|
return f
|
|
|
|
|
|
@test
|
|
def jpeg(h):
|
|
"""JPEG data in JFIF format (Changed by Kovid to mimic the file utility,
|
|
the original code was failing with some jpegs that included ICC_PROFILE
|
|
data, for example: http://nationalpostnews.files.wordpress.com/2013/03/budget.jpeg?w=300&h=1571)"""
|
|
if h[6:10] in (b'JFIF', b'Exif'):
|
|
return 'jpeg'
|
|
if h[:2] == b'\xff\xd8':
|
|
q = h[:32].tobytes()
|
|
if b'JFIF' in q or b'8BIM' in q:
|
|
return 'jpeg'
|
|
|
|
|
|
def jpeg_dimensions(stream):
|
|
# A JPEG marker is two bytes of the form 0xff x where 0 < x < 0xff
|
|
# See section B.1.1.2 of https://www.w3.org/Graphics/JPEG/itu-t81.pdf
|
|
# We read the dimensions from the first SOFn section we come across
|
|
stream.seek(2, os.SEEK_CUR)
|
|
|
|
def read(n):
|
|
ans = stream.read(n)
|
|
if len(ans) != n:
|
|
raise ValueError('Truncated JPEG data')
|
|
return ans
|
|
|
|
if ispy3:
|
|
def read_byte():
|
|
return read(1)[0]
|
|
else:
|
|
def read_byte():
|
|
return ord(read(1)[0])
|
|
|
|
x = None
|
|
while True:
|
|
# Find next marker
|
|
while x != 0xff:
|
|
x = read_byte()
|
|
# Soak up padding
|
|
marker = 0xff
|
|
while marker == 0xff:
|
|
marker = read_byte()
|
|
q = marker
|
|
if 0xc0 <= q <= 0xcf and q != 0xc4 and q != 0xcc:
|
|
# SOFn marker
|
|
stream.seek(3, os.SEEK_CUR)
|
|
return unpack(b'>HH', read(4))
|
|
elif 0xd8 <= q <= 0xda:
|
|
break # start of image, end of image, start of scan, no point
|
|
elif q == 0:
|
|
return -1, -1 # Corrupted JPEG
|
|
elif q == 0x01 or 0xd0 <= q <= 0xd7:
|
|
# Standalone marker
|
|
continue
|
|
else:
|
|
# skip this section
|
|
size = unpack(b'>H', read(2))[0]
|
|
stream.seek(size - 2, os.SEEK_CUR)
|
|
# standalone marker, keep going
|
|
|
|
return -1, -1
|
|
|
|
|
|
@test
|
|
def png(h):
|
|
if h[:8] == b"\211PNG\r\n\032\n":
|
|
return 'png'
|
|
|
|
|
|
@test
|
|
def gif(h):
|
|
"""GIF ('87 and '89 variants)"""
|
|
if h[:6] in (b'GIF87a', b'GIF89a'):
|
|
return 'gif'
|
|
|
|
|
|
@test
|
|
def tiff(h):
|
|
"""TIFF (can be in Motorola or Intel byte order)"""
|
|
if h[:2] in (b'MM', b'II'):
|
|
if h[2:4] == b'\xbc\x01':
|
|
return 'jxr'
|
|
return 'tiff'
|
|
|
|
|
|
@test
|
|
def webp(h):
|
|
if h[:4] == b'RIFF' and h[8:12] == b'WEBP':
|
|
return 'webp'
|
|
|
|
|
|
@test
|
|
def rgb(h):
|
|
"""SGI image library"""
|
|
if h[:2] == b'\001\332':
|
|
return 'rgb'
|
|
|
|
|
|
@test
|
|
def pbm(h):
|
|
"""PBM (portable bitmap)"""
|
|
if len(h) >= 3 and \
|
|
h[0] == b'P' and h[1] in b'14' and h[2] in b' \t\n\r':
|
|
return 'pbm'
|
|
|
|
|
|
@test
|
|
def pgm(h):
|
|
"""PGM (portable graymap)"""
|
|
if len(h) >= 3 and \
|
|
h[0] == b'P' and h[1] in b'25' and h[2] in b' \t\n\r':
|
|
return 'pgm'
|
|
|
|
|
|
@test
|
|
def ppm(h):
|
|
"""PPM (portable pixmap)"""
|
|
if len(h) >= 3 and \
|
|
h[0] == b'P' and h[1] in b'36' and h[2] in b' \t\n\r':
|
|
return 'ppm'
|
|
|
|
|
|
@test
|
|
def rast(h):
|
|
"""Sun raster file"""
|
|
if h[:4] == b'\x59\xA6\x6A\x95':
|
|
return 'rast'
|
|
|
|
|
|
@test
|
|
def xbm(h):
|
|
"""X bitmap (X10 or X11)"""
|
|
s = b'#define '
|
|
if h[:len(s)] == s:
|
|
return 'xbm'
|
|
|
|
|
|
@test
|
|
def bmp(h):
|
|
if h[:2] == b'BM':
|
|
return 'bmp'
|
|
|
|
|
|
@test
|
|
def emf(h):
|
|
if h[:4] == b'\x01\0\0\0' and h[40:44] == b' EMF':
|
|
return 'emf'
|
|
|
|
|
|
@test
|
|
def jpeg2000(h):
|
|
if h[:12] == b'\x00\x00\x00\x0cjP \r\n\x87\n':
|
|
return 'jpeg2000'
|
|
|
|
|
|
@test
|
|
def svg(h):
|
|
if h[:4] == b'<svg' or (h[:2] == b'<?' and h[2:5].tobytes().lower() == b'xml' and b'<svg' in h.tobytes()):
|
|
return 'svg'
|
|
|
|
|
|
tests = tuple(tests)
|