mirror of
https://github.com/gryf/ebook-converter.git
synced 2025-12-28 20:42:27 +01:00
264 lines
6.3 KiB
Python
264 lines
6.3 KiB
Python
#!/usr/bin/env python2
|
|
# vim:fileencoding=utf-8
|
|
# License: GPLv3 Copyright: 2016, Kovid Goyal <kovid at kovidgoyal.net>
|
|
from __future__ import absolute_import, division, print_function, unicode_literals
|
|
|
|
from struct import unpack, error
|
|
import os
|
|
from calibre.utils.speedups import ReadOnlyFileBuffer
|
|
from calibre.constants import ispy3
|
|
from polyglot.builtins import string_or_bytes, unicode_type
|
|
|
|
""" Recognize image file formats and sizes based on their first few bytes."""
|
|
|
|
HSIZE = 120
|
|
|
|
|
|
def what(file, h=None):
|
|
' Recognize image headers '
|
|
if h is None:
|
|
if isinstance(file, string_or_bytes):
|
|
with lopen(file, 'rb') as f:
|
|
h = f.read(HSIZE)
|
|
else:
|
|
location = file.tell()
|
|
h = file.read(HSIZE)
|
|
file.seek(location)
|
|
if isinstance(h, bytes):
|
|
h = memoryview(h)
|
|
for tf in tests:
|
|
res = tf(h)
|
|
if res:
|
|
return res
|
|
# There exist some jpeg files with no headers, only the starting two bits
|
|
# If we cannot identify as anything else, identify as jpeg.
|
|
if h[:2] == b'\xff\xd8':
|
|
return 'jpeg'
|
|
return None
|
|
|
|
|
|
def identify(src):
|
|
''' Recognize file format and sizes. Returns format, width, height. width
|
|
and height will be -1 if not found and fmt will be None if the image is not
|
|
recognized. '''
|
|
width = height = -1
|
|
|
|
if isinstance(src, unicode_type):
|
|
stream = lopen(src, 'rb')
|
|
elif isinstance(src, bytes):
|
|
stream = ReadOnlyFileBuffer(src)
|
|
else:
|
|
stream = src
|
|
|
|
pos = stream.tell()
|
|
head = stream.read(HSIZE)
|
|
stream.seek(pos)
|
|
fmt = what(None, head)
|
|
|
|
if fmt in {'jpeg', 'gif', 'png', 'jpeg2000'}:
|
|
size = len(head)
|
|
if fmt == 'png':
|
|
# PNG
|
|
s = head[16:24] if size >= 24 and head[12:16] == b'IHDR' else head[8:16]
|
|
try:
|
|
width, height = unpack(b">LL", s)
|
|
except error:
|
|
return fmt, width, height
|
|
elif fmt == 'jpeg':
|
|
# JPEG
|
|
pos = stream.tell()
|
|
try:
|
|
height, width = jpeg_dimensions(stream)
|
|
except Exception:
|
|
return fmt, width, height
|
|
finally:
|
|
stream.seek(pos)
|
|
elif fmt == 'gif':
|
|
# GIF
|
|
try:
|
|
width, height = unpack(b"<HH", head[6:10])
|
|
except error:
|
|
return fmt, width, height
|
|
elif size >= 56 and fmt == 'jpeg2000':
|
|
# JPEG2000
|
|
try:
|
|
height, width = unpack(b'>LL', head[48:56])
|
|
except error:
|
|
return fmt, width, height
|
|
return fmt, width, height
|
|
|
|
# ---------------------------------#
|
|
# Subroutines per image file type #
|
|
# ---------------------------------#
|
|
|
|
|
|
tests = []
|
|
|
|
|
|
def test(f):
|
|
tests.append(f)
|
|
return f
|
|
|
|
|
|
@test
|
|
def jpeg(h):
|
|
"""JPEG data in JFIF format (Changed by Kovid to mimic the file utility,
|
|
the original code was failing with some jpegs that included ICC_PROFILE
|
|
data, for example: http://nationalpostnews.files.wordpress.com/2013/03/budget.jpeg?w=300&h=1571)"""
|
|
if h[6:10] in (b'JFIF', b'Exif'):
|
|
return 'jpeg'
|
|
if h[:2] == b'\xff\xd8':
|
|
q = h[:32].tobytes()
|
|
if b'JFIF' in q or b'8BIM' in q:
|
|
return 'jpeg'
|
|
|
|
|
|
def jpeg_dimensions(stream):
|
|
# A JPEG marker is two bytes of the form 0xff x where 0 < x < 0xff
|
|
# See section B.1.1.2 of https://www.w3.org/Graphics/JPEG/itu-t81.pdf
|
|
# We read the dimensions from the first SOFn section we come across
|
|
stream.seek(2, os.SEEK_CUR)
|
|
|
|
def read(n):
|
|
ans = stream.read(n)
|
|
if len(ans) != n:
|
|
raise ValueError('Truncated JPEG data')
|
|
return ans
|
|
|
|
if ispy3:
|
|
def read_byte():
|
|
return read(1)[0]
|
|
else:
|
|
def read_byte():
|
|
return ord(read(1)[0])
|
|
|
|
x = None
|
|
while True:
|
|
# Find next marker
|
|
while x != 0xff:
|
|
x = read_byte()
|
|
# Soak up padding
|
|
marker = 0xff
|
|
while marker == 0xff:
|
|
marker = read_byte()
|
|
q = marker
|
|
if 0xc0 <= q <= 0xcf and q != 0xc4 and q != 0xcc:
|
|
# SOFn marker
|
|
stream.seek(3, os.SEEK_CUR)
|
|
return unpack(b'>HH', read(4))
|
|
elif 0xd8 <= q <= 0xda:
|
|
break # start of image, end of image, start of scan, no point
|
|
elif q == 0:
|
|
return -1, -1 # Corrupted JPEG
|
|
elif q == 0x01 or 0xd0 <= q <= 0xd7:
|
|
# Standalone marker
|
|
continue
|
|
else:
|
|
# skip this section
|
|
size = unpack(b'>H', read(2))[0]
|
|
stream.seek(size - 2, os.SEEK_CUR)
|
|
# standalone marker, keep going
|
|
|
|
return -1, -1
|
|
|
|
|
|
@test
|
|
def png(h):
|
|
if h[:8] == b"\211PNG\r\n\032\n":
|
|
return 'png'
|
|
|
|
|
|
@test
|
|
def gif(h):
|
|
"""GIF ('87 and '89 variants)"""
|
|
if h[:6] in (b'GIF87a', b'GIF89a'):
|
|
return 'gif'
|
|
|
|
|
|
@test
|
|
def tiff(h):
|
|
"""TIFF (can be in Motorola or Intel byte order)"""
|
|
if h[:2] in (b'MM', b'II'):
|
|
if h[2:4] == b'\xbc\x01':
|
|
return 'jxr'
|
|
return 'tiff'
|
|
|
|
|
|
@test
|
|
def webp(h):
|
|
if h[:4] == b'RIFF' and h[8:12] == b'WEBP':
|
|
return 'webp'
|
|
|
|
|
|
@test
|
|
def rgb(h):
|
|
"""SGI image library"""
|
|
if h[:2] == b'\001\332':
|
|
return 'rgb'
|
|
|
|
|
|
@test
|
|
def pbm(h):
|
|
"""PBM (portable bitmap)"""
|
|
if len(h) >= 3 and \
|
|
h[0] == b'P' and h[1] in b'14' and h[2] in b' \t\n\r':
|
|
return 'pbm'
|
|
|
|
|
|
@test
|
|
def pgm(h):
|
|
"""PGM (portable graymap)"""
|
|
if len(h) >= 3 and \
|
|
h[0] == b'P' and h[1] in b'25' and h[2] in b' \t\n\r':
|
|
return 'pgm'
|
|
|
|
|
|
@test
|
|
def ppm(h):
|
|
"""PPM (portable pixmap)"""
|
|
if len(h) >= 3 and \
|
|
h[0] == b'P' and h[1] in b'36' and h[2] in b' \t\n\r':
|
|
return 'ppm'
|
|
|
|
|
|
@test
|
|
def rast(h):
|
|
"""Sun raster file"""
|
|
if h[:4] == b'\x59\xA6\x6A\x95':
|
|
return 'rast'
|
|
|
|
|
|
@test
|
|
def xbm(h):
|
|
"""X bitmap (X10 or X11)"""
|
|
s = b'#define '
|
|
if h[:len(s)] == s:
|
|
return 'xbm'
|
|
|
|
|
|
@test
|
|
def bmp(h):
|
|
if h[:2] == b'BM':
|
|
return 'bmp'
|
|
|
|
|
|
@test
|
|
def emf(h):
|
|
if h[:4] == b'\x01\0\0\0' and h[40:44] == b' EMF':
|
|
return 'emf'
|
|
|
|
|
|
@test
|
|
def jpeg2000(h):
|
|
if h[:12] == b'\x00\x00\x00\x0cjP \r\n\x87\n':
|
|
return 'jpeg2000'
|
|
|
|
|
|
@test
|
|
def svg(h):
|
|
if h[:4] == b'<svg' or (h[:2] == b'<?' and h[2:5].tobytes().lower() == b'xml' and b'<svg' in h.tobytes()):
|
|
return 'svg'
|
|
|
|
|
|
tests = tuple(tests)
|