1
0
mirror of https://github.com/gryf/ebook-converter.git synced 2026-03-28 22:53:43 +01:00

Initial import

This commit is contained in:
2020-03-31 17:15:23 +02:00
commit d97ea9b0bc
311 changed files with 131419 additions and 0 deletions

View File

@@ -0,0 +1,7 @@
#!/usr/bin/env python2
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
from __future__ import absolute_import, division, print_function, unicode_literals
__license__ = 'GPL v3'
__copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>'
__docformat__ = 'restructuredtext en'

View File

@@ -0,0 +1,122 @@
#!/usr/bin/env python2
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:fdm=marker:ai
from __future__ import absolute_import, division, print_function, unicode_literals
__license__ = 'GPL v3'
__copyright__ = '2012, Kovid Goyal <kovid at kovidgoyal.net>'
__docformat__ = 'restructuredtext en'
from io import BytesIO
from struct import calcsize, unpack, unpack_from
from collections import namedtuple
from calibre.utils.fonts.utils import get_font_names2, get_font_characteristics
from polyglot.builtins import range, unicode_type
class UnsupportedFont(ValueError):
pass
FontCharacteristics = namedtuple('FontCharacteristics',
'weight, is_italic, is_bold, is_regular, fs_type, panose, width, is_oblique, is_wws, os2_version')
FontNames = namedtuple('FontNames',
'family_name, subfamily_name, full_name, preferred_family_name, preferred_subfamily_name, wws_family_name, wws_subfamily_name')
class FontMetadata(object):
def __init__(self, bytes_or_stream):
if not hasattr(bytes_or_stream, 'read'):
bytes_or_stream = BytesIO(bytes_or_stream)
f = bytes_or_stream
f.seek(0)
header = f.read(4)
if header not in {b'\x00\x01\x00\x00', b'OTTO'}:
raise UnsupportedFont('Not a supported sfnt variant')
self.is_otf = header == b'OTTO'
self.read_table_metadata(f)
self.read_names(f)
self.read_characteristics(f)
f.seek(0)
self.font_family = self.names.family_name
wt = self.characteristics.weight
if wt == 400:
wt = 'normal'
elif wt == 700:
wt = 'bold'
else:
wt = unicode_type(wt)
self.font_weight = wt
self.font_stretch = ('ultra-condensed', 'extra-condensed',
'condensed', 'semi-condensed', 'normal', 'semi-expanded',
'expanded', 'extra-expanded', 'ultra-expanded')[
self.characteristics.width-1]
if self.characteristics.is_oblique:
self.font_style = 'oblique'
elif self.characteristics.is_italic:
self.font_style = 'italic'
else:
self.font_style = 'normal'
def read_table_metadata(self, f):
f.seek(4)
num_tables = unpack(b'>H', f.read(2))[0]
# Start of table record entries
f.seek(4 + 4*2)
table_record = b'>4s3L'
sz = calcsize(table_record)
self.tables = {}
block = f.read(sz * num_tables)
for i in range(num_tables):
table_tag, table_checksum, table_offset, table_length = \
unpack_from(table_record, block, i*sz)
self.tables[table_tag.lower()] = (table_offset, table_length,
table_checksum)
def read_names(self, f):
if b'name' not in self.tables:
raise UnsupportedFont('This font has no name table')
toff, tlen = self.tables[b'name'][:2]
f.seek(toff)
table = f.read(tlen)
if len(table) != tlen:
raise UnsupportedFont('This font has a name table of incorrect length')
vals = get_font_names2(table, raw_is_table=True)
self.names = FontNames(*vals)
def read_characteristics(self, f):
if b'os/2' not in self.tables:
raise UnsupportedFont('This font has no OS/2 table')
toff, tlen = self.tables[b'os/2'][:2]
f.seek(toff)
table = f.read(tlen)
if len(table) != tlen:
raise UnsupportedFont('This font has an OS/2 table of incorrect length')
vals = get_font_characteristics(table, raw_is_table=True)
self.characteristics = FontCharacteristics(*vals)
def to_dict(self):
ans = {
'is_otf':self.is_otf,
'font-family':self.font_family,
'font-weight':self.font_weight,
'font-style':self.font_style,
'font-stretch':self.font_stretch
}
for f in self.names._fields:
ans[f] = getattr(self.names, f)
for f in self.characteristics._fields:
ans[f] = getattr(self.characteristics, f)
return ans
if __name__ == '__main__':
import sys
with open(sys.argv[-1], 'rb') as f:
fm = FontMetadata(f)
import pprint
pprint.pprint(fm.to_dict())

View File

@@ -0,0 +1,412 @@
#!/usr/bin/env python2
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:fdm=marker:ai
from __future__ import absolute_import, division, print_function, unicode_literals
__license__ = 'GPL v3'
__copyright__ = '2012, Kovid Goyal <kovid at kovidgoyal.net>'
__docformat__ = 'restructuredtext en'
import os
from collections import defaultdict
from threading import Thread
from calibre import walk, prints, as_unicode
from calibre.constants import (config_dir, iswindows, isosx, plugins, DEBUG,
isworker, filesystem_encoding)
from calibre.utils.fonts.metadata import FontMetadata, UnsupportedFont
from calibre.utils.icu import sort_key
from polyglot.builtins import itervalues, unicode_type, filter
class NoFonts(ValueError):
pass
# Font dirs {{{
def default_font_dirs():
return [
'/opt/share/fonts',
'/usr/share/fonts',
'/usr/local/share/fonts',
os.path.expanduser('~/.local/share/fonts'),
os.path.expanduser('~/.fonts')
]
def fc_list():
import ctypes
from ctypes.util import find_library
lib = find_library('fontconfig')
if lib is None:
return default_font_dirs()
try:
lib = ctypes.CDLL(lib)
except:
return default_font_dirs()
prototype = ctypes.CFUNCTYPE(ctypes.c_void_p, ctypes.c_void_p)
try:
get_font_dirs = prototype(('FcConfigGetFontDirs', lib))
except (AttributeError):
return default_font_dirs()
prototype = ctypes.CFUNCTYPE(ctypes.c_char_p, ctypes.c_void_p)
try:
next_dir = prototype(('FcStrListNext', lib))
except (AttributeError):
return default_font_dirs()
prototype = ctypes.CFUNCTYPE(None, ctypes.c_void_p)
try:
end = prototype(('FcStrListDone', lib))
except (AttributeError):
return default_font_dirs()
str_list = get_font_dirs(ctypes.c_void_p())
if not str_list:
return default_font_dirs()
ans = []
while True:
d = next_dir(str_list)
if not d:
break
if d:
try:
ans.append(d.decode(filesystem_encoding))
except ValueError:
prints('Ignoring undecodeable font path: %r' % d)
continue
end(str_list)
if len(ans) < 3:
return default_font_dirs()
parents, visited = [], set()
for f in ans:
path = os.path.normpath(os.path.abspath(os.path.realpath(f)))
if path == '/':
continue
head, tail = os.path.split(path)
while head and tail:
if head in visited:
break
head, tail = os.path.split(head)
else:
parents.append(path)
visited.add(path)
return parents
def font_dirs():
if iswindows:
winutil, err = plugins['winutil']
if err:
raise RuntimeError('Failed to load winutil: %s'%err)
try:
return [winutil.special_folder_path(winutil.CSIDL_FONTS)]
except ValueError:
return [r'C:\Windows\Fonts']
if isosx:
return [
'/Library/Fonts',
'/System/Library/Fonts',
'/usr/share/fonts',
'/var/root/Library/Fonts',
os.path.expanduser('~/.fonts'),
os.path.expanduser('~/Library/Fonts'),
]
return fc_list()
# }}}
# Build font family maps {{{
def font_priority(font):
'''
Try to ensure that the "Regular" face is the first font for a given
family.
'''
style_normal = font['font-style'] == 'normal'
width_normal = font['font-stretch'] == 'normal'
weight_normal = font['font-weight'] == 'normal'
num_normal = sum(filter(None, (style_normal, width_normal,
weight_normal)))
subfamily_name = (font['wws_subfamily_name'] or
font['preferred_subfamily_name'] or font['subfamily_name'])
if num_normal == 3 and subfamily_name == 'Regular':
return 0
if num_normal == 3:
return 1
if subfamily_name == 'Regular':
return 2
return 3 + (3 - num_normal)
def path_significance(path, folders):
path = os.path.normcase(os.path.abspath(path))
for i, q in enumerate(folders):
if path.startswith(q):
return i
return -1
def build_families(cached_fonts, folders, family_attr='font-family'):
families = defaultdict(list)
for f in itervalues(cached_fonts):
if not f:
continue
lf = icu_lower(f.get(family_attr) or '')
if lf:
families[lf].append(f)
for fonts in itervalues(families):
# Look for duplicate font files and choose the copy that is from a
# more significant font directory (prefer user directories over
# system directories).
fmap = {}
remove = []
for f in fonts:
fingerprint = (icu_lower(f['font-family']), f['font-weight'],
f['font-stretch'], f['font-style'])
if fingerprint in fmap:
opath = fmap[fingerprint]['path']
npath = f['path']
if path_significance(npath, folders) >= path_significance(opath, folders):
remove.append(fmap[fingerprint])
fmap[fingerprint] = f
else:
remove.append(f)
else:
fmap[fingerprint] = f
for font in remove:
fonts.remove(font)
fonts.sort(key=font_priority)
font_family_map = dict.copy(families)
font_families = tuple(sorted((f[0]['font-family'] for f in
itervalues(font_family_map)), key=sort_key))
return font_family_map, font_families
# }}}
class FontScanner(Thread):
CACHE_VERSION = 2
def __init__(self, folders=[], allowed_extensions={'ttf', 'otf'}):
Thread.__init__(self)
self.folders = folders + font_dirs() + [os.path.join(config_dir, 'fonts'),
P('fonts/liberation')]
self.folders = [os.path.normcase(os.path.abspath(f)) for f in
self.folders]
self.font_families = ()
self.allowed_extensions = allowed_extensions
# API {{{
def find_font_families(self):
self.join()
return self.font_families
def fonts_for_family(self, family):
'''
Return a list of the faces belonging to the specified family. The first
face is the "Regular" face of family. Each face is a dictionary with
many keys, the most important of which are: path, font-family,
font-weight, font-style, font-stretch. The font-* properties follow the
CSS 3 Fonts specification.
'''
self.join()
try:
return self.font_family_map[icu_lower(family)]
except KeyError:
raise NoFonts('No fonts found for the family: %r'%family)
def legacy_fonts_for_family(self, family):
'''
Return a simple set of regular, bold, italic and bold-italic faces for
the specified family. Returns a dictionary with each element being a
2-tuple of (path to font, full font name) and the keys being: normal,
bold, italic, bi.
'''
ans = {}
try:
faces = self.fonts_for_family(family)
except NoFonts:
return ans
for i, face in enumerate(faces):
if i == 0:
key = 'normal'
elif face['font-style'] in {'italic', 'oblique'}:
key = 'bi' if face['font-weight'] == 'bold' else 'italic'
elif face['font-weight'] == 'bold':
key = 'bold'
else:
continue
ans[key] = (face['path'], face['full_name'])
return ans
def get_font_data(self, font_or_path):
path = font_or_path
if isinstance(font_or_path, dict):
path = font_or_path['path']
with lopen(path, 'rb') as f:
return f.read()
def find_font_for_text(self, text, allowed_families={'serif', 'sans-serif'},
preferred_families=('serif', 'sans-serif', 'monospace', 'cursive', 'fantasy')):
'''
Find a font on the system capable of rendering the given text.
Returns a font family (as given by fonts_for_family()) that has a
"normal" font and that can render the supplied text. If no such font
exists, returns None.
:return: (family name, faces) or None, None
'''
from calibre.utils.fonts.utils import (supports_text,
panose_to_css_generic_family, get_printable_characters)
if not isinstance(text, unicode_type):
raise TypeError(u'%r is not unicode'%text)
text = get_printable_characters(text)
found = {}
def filter_faces(font):
try:
raw = self.get_font_data(font)
return supports_text(raw, text)
except:
pass
return False
for family in self.find_font_families():
faces = list(filter(filter_faces, self.fonts_for_family(family)))
if not faces:
continue
generic_family = panose_to_css_generic_family(faces[0]['panose'])
if generic_family in allowed_families or generic_family == preferred_families[0]:
return (family, faces)
elif generic_family not in found:
found[generic_family] = (family, faces)
for f in preferred_families:
if f in found:
return found[f]
return None, None
# }}}
def reload_cache(self):
if not hasattr(self, 'cache'):
from calibre.utils.config import JSONConfig
self.cache = JSONConfig('fonts/scanner_cache')
else:
self.cache.refresh()
if self.cache.get('version', None) != self.CACHE_VERSION:
self.cache.clear()
self.cached_fonts = self.cache.get('fonts', {})
def run(self):
self.do_scan()
def do_scan(self):
self.reload_cache()
if isworker:
# Dont scan font files in worker processes, use whatever is
# cached. Font files typically dont change frequently enough to
# justify a rescan in a worker process.
self.build_families()
return
cached_fonts = self.cached_fonts.copy()
self.cached_fonts.clear()
for folder in self.folders:
if not os.path.isdir(folder):
continue
try:
files = tuple(walk(folder))
except EnvironmentError as e:
if DEBUG:
prints('Failed to walk font folder:', folder,
as_unicode(e))
continue
for candidate in files:
if (candidate.rpartition('.')[-1].lower() not in self.allowed_extensions or not os.path.isfile(candidate)):
continue
candidate = os.path.normcase(os.path.abspath(candidate))
try:
s = os.stat(candidate)
except EnvironmentError:
continue
fileid = '{0}||{1}:{2}'.format(candidate, s.st_size, s.st_mtime)
if fileid in cached_fonts:
# Use previously cached metadata, since the file size and
# last modified timestamp have not changed.
self.cached_fonts[fileid] = cached_fonts[fileid]
continue
try:
self.read_font_metadata(candidate, fileid)
except Exception as e:
if DEBUG:
prints('Failed to read metadata from font file:',
candidate, as_unicode(e))
continue
if frozenset(cached_fonts) != frozenset(self.cached_fonts):
# Write out the cache only if some font files have changed
self.write_cache()
self.build_families()
def build_families(self):
self.font_family_map, self.font_families = build_families(self.cached_fonts, self.folders)
def write_cache(self):
with self.cache:
self.cache['version'] = self.CACHE_VERSION
self.cache['fonts'] = self.cached_fonts
def force_rescan(self):
self.cached_fonts = {}
self.write_cache()
def read_font_metadata(self, path, fileid):
with lopen(path, 'rb') as f:
try:
fm = FontMetadata(f)
except UnsupportedFont:
self.cached_fonts[fileid] = {}
else:
data = fm.to_dict()
data['path'] = path
self.cached_fonts[fileid] = data
def dump_fonts(self):
self.join()
for family in self.font_families:
prints(family)
for font in self.fonts_for_family(family):
prints('\t%s: %s'%(font['full_name'], font['path']))
prints(end='\t')
for key in ('font-stretch', 'font-weight', 'font-style'):
prints('%s: %s'%(key, font[key]), end=' ')
prints()
prints('\tSub-family:', font['wws_subfamily_name'] or
font['preferred_subfamily_name'] or
font['subfamily_name'])
prints()
prints()
font_scanner = FontScanner()
font_scanner.start()
def force_rescan():
font_scanner.join()
font_scanner.force_rescan()
font_scanner.run()
if __name__ == '__main__':
font_scanner.dump_fonts()

View File

@@ -0,0 +1,503 @@
#!/usr/bin/env python2
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:fdm=marker:ai
from __future__ import absolute_import, division, print_function, unicode_literals
__license__ = 'GPL v3'
__copyright__ = '2012, Kovid Goyal <kovid at kovidgoyal.net>'
__docformat__ = 'restructuredtext en'
import struct
from io import BytesIO
from collections import defaultdict
from polyglot.builtins import iteritems, itervalues, unicode_type, range, as_bytes
class UnsupportedFont(ValueError):
pass
def get_printable_characters(text):
import unicodedata
return u''.join(x for x in unicodedata.normalize('NFC', text)
if unicodedata.category(x)[0] not in {'C', 'Z', 'M'})
def is_truetype_font(raw):
sfnt_version = raw[:4]
return (sfnt_version in {b'\x00\x01\x00\x00', b'OTTO'}, sfnt_version)
def get_tables(raw):
num_tables = struct.unpack_from(b'>H', raw, 4)[0]
offset = 4*3 # start of the table record entries
for i in range(num_tables):
table_tag, table_checksum, table_offset, table_length = struct.unpack_from(
b'>4s3L', raw, offset)
yield (table_tag, raw[table_offset:table_offset+table_length], offset,
table_offset, table_checksum)
offset += 4*4
def get_table(raw, name):
''' Get the raw table bytes for the specified table in the font '''
name = as_bytes(name.lower())
for table_tag, table, table_index, table_offset, table_checksum in get_tables(raw):
if table_tag.lower() == name:
return table, table_index, table_offset, table_checksum
return None, None, None, None
def get_font_characteristics(raw, raw_is_table=False, return_all=False):
'''
Return (weight, is_italic, is_bold, is_regular, fs_type, panose, width,
is_oblique, is_wws). These
values are taken from the OS/2 table of the font. See
http://www.microsoft.com/typography/otspec/os2.htm for details
'''
if raw_is_table:
os2_table = raw
else:
os2_table = get_table(raw, 'os/2')[0]
if os2_table is None:
raise UnsupportedFont('Not a supported font, has no OS/2 table')
common_fields = b'>Hh3H11h'
(version, char_width, weight, width, fs_type, subscript_x_size,
subscript_y_size, subscript_x_offset, subscript_y_offset,
superscript_x_size, superscript_y_size, superscript_x_offset,
superscript_y_offset, strikeout_size, strikeout_position,
family_class) = struct.unpack_from(common_fields, os2_table)
offset = struct.calcsize(common_fields)
panose = struct.unpack_from(b'>10B', os2_table, offset)
offset += 10
(range1, range2, range3, range4) = struct.unpack_from(b'>4L', os2_table, offset)
offset += struct.calcsize(b'>4L')
vendor_id = os2_table[offset:offset+4]
vendor_id
offset += 4
selection, = struct.unpack_from(b'>H', os2_table, offset)
is_italic = (selection & (1 << 0)) != 0
is_bold = (selection & (1 << 5)) != 0
is_regular = (selection & (1 << 6)) != 0
is_wws = (selection & (1 << 8)) != 0
is_oblique = (selection & (1 << 9)) != 0
if return_all:
return (version, char_width, weight, width, fs_type, subscript_x_size,
subscript_y_size, subscript_x_offset, subscript_y_offset,
superscript_x_size, superscript_y_size, superscript_x_offset,
superscript_y_offset, strikeout_size, strikeout_position,
family_class, panose, selection, is_italic, is_bold, is_regular)
return weight, is_italic, is_bold, is_regular, fs_type, panose, width, is_oblique, is_wws, version
def panose_to_css_generic_family(panose):
proportion = panose[3]
if proportion == 9:
return 'monospace'
family_type = panose[0]
if family_type == 3:
return 'cursive'
if family_type == 4:
return 'fantasy'
serif_style = panose[1]
if serif_style in (11, 12, 13):
return 'sans-serif'
return 'serif'
def decode_name_record(recs):
'''
Get the English names of this font. See
http://www.microsoft.com/typography/otspec/name.htm for details.
'''
if not recs:
return None
unicode_names = {}
windows_names = {}
mac_names = {}
for platform_id, encoding_id, language_id, src in recs:
if language_id > 0x8000:
continue
if platform_id == 0:
if encoding_id < 4:
try:
unicode_names[language_id] = src.decode('utf-16-be')
except ValueError:
continue
elif platform_id == 1:
try:
mac_names[language_id] = src.decode('utf-8')
except ValueError:
continue
elif platform_id == 2:
codec = {0:'ascii', 1:'utf-16-be', 2:'iso-8859-1'}.get(encoding_id,
None)
if codec is None:
continue
try:
unicode_names[language_id] = src.decode(codec)
except ValueError:
continue
elif platform_id == 3:
codec = {1:16, 10:32}.get(encoding_id, None)
if codec is None:
continue
try:
windows_names[language_id] = src.decode('utf-%d-be'%codec)
except ValueError:
continue
# First try the windows names
# First look for the US English name
if 1033 in windows_names:
return windows_names[1033]
# Look for some other english name variant
for lang in (3081, 10249, 4105, 9225, 16393, 6153, 8201, 17417, 5129,
13321, 18441, 7177, 11273, 2057, 12297):
if lang in windows_names:
return windows_names[lang]
# Look for Mac name
if 0 in mac_names:
return mac_names[0]
# Use unicode names
for val in itervalues(unicode_names):
return val
return None
def _get_font_names(raw, raw_is_table=False):
if raw_is_table:
table = raw
else:
table = get_table(raw, 'name')[0]
if table is None:
raise UnsupportedFont('Not a supported font, has no name table')
table_type, count, string_offset = struct.unpack_from(b'>3H', table)
records = defaultdict(list)
for i in range(count):
try:
platform_id, encoding_id, language_id, name_id, length, offset = \
struct.unpack_from(b'>6H', table, 6+i*12)
except struct.error:
break
offset += string_offset
src = table[offset:offset+length]
records[name_id].append((platform_id, encoding_id, language_id,
src))
return records
def get_font_names(raw, raw_is_table=False):
records = _get_font_names(raw, raw_is_table)
family_name = decode_name_record(records[1])
subfamily_name = decode_name_record(records[2])
full_name = decode_name_record(records[4])
return family_name, subfamily_name, full_name
def get_font_names2(raw, raw_is_table=False):
records = _get_font_names(raw, raw_is_table)
family_name = decode_name_record(records[1])
subfamily_name = decode_name_record(records[2])
full_name = decode_name_record(records[4])
preferred_family_name = decode_name_record(records[16])
preferred_subfamily_name = decode_name_record(records[17])
wws_family_name = decode_name_record(records[21])
wws_subfamily_name = decode_name_record(records[22])
return (family_name, subfamily_name, full_name, preferred_family_name,
preferred_subfamily_name, wws_family_name, wws_subfamily_name)
def get_all_font_names(raw, raw_is_table=False):
records = _get_font_names(raw, raw_is_table)
ans = {}
for name, num in iteritems({'family_name':1, 'subfamily_name':2, 'full_name':4,
'preferred_family_name':16, 'preferred_subfamily_name':17,
'wws_family_name':21, 'wws_subfamily_name':22}):
try:
ans[name] = decode_name_record(records[num])
except (IndexError, KeyError, ValueError):
continue
if not ans[name]:
del ans[name]
for platform_id, encoding_id, language_id, src in records[6]:
if (platform_id, encoding_id, language_id) == (1, 0, 0):
try:
ans['postscript_name'] = src.decode('utf-8')
break
except ValueError:
continue
elif (platform_id, encoding_id, language_id) == (3, 1, 1033):
try:
ans['postscript_name'] = src.decode('utf-16-be')
break
except ValueError:
continue
return ans
def checksum_of_block(raw):
extra = 4 - len(raw)%4
raw += b'\0'*extra
num = len(raw)//4
return sum(struct.unpack(b'>%dI'%num, raw)) % (1<<32)
def verify_checksums(raw):
head_table = None
for table_tag, table, table_index, table_offset, table_checksum in get_tables(raw):
if table_tag.lower() == b'head':
version, fontrev, checksum_adj = struct.unpack_from(b'>ffL', table)
head_table = table
offset = table_offset
checksum = table_checksum
elif checksum_of_block(table) != table_checksum:
raise ValueError('The %r table has an incorrect checksum'%table_tag)
if head_table is not None:
table = head_table
table = table[:8] + struct.pack(b'>I', 0) + table[12:]
raw = raw[:offset] + table + raw[offset+len(table):]
# Check the checksum of the head table
if checksum_of_block(table) != checksum:
raise ValueError('Checksum of head table not correct')
# Check the checksum of the entire font
checksum = checksum_of_block(raw)
q = (0xB1B0AFBA - checksum) & 0xffffffff
if q != checksum_adj:
raise ValueError('Checksum of entire font incorrect')
def set_checksum_adjustment(f):
offset = get_table(f.getvalue(), 'head')[2]
offset += 8
f.seek(offset)
f.write(struct.pack(b'>I', 0))
checksum = checksum_of_block(f.getvalue())
q = (0xB1B0AFBA - checksum) & 0xffffffff
f.seek(offset)
f.write(struct.pack(b'>I', q))
def set_table_checksum(f, name):
table, table_index, table_offset, table_checksum = get_table(f.getvalue(), name)
checksum = checksum_of_block(table)
if checksum != table_checksum:
f.seek(table_index + 4)
f.write(struct.pack(b'>I', checksum))
def remove_embed_restriction(raw):
ok, sig = is_truetype_font(raw)
if not ok:
raise UnsupportedFont('Not a supported font, sfnt_version: %r'%sig)
table, table_index, table_offset = get_table(raw, 'os/2')[:3]
if table is None:
raise UnsupportedFont('Not a supported font, has no OS/2 table')
fs_type_offset = struct.calcsize(b'>HhHH')
fs_type = struct.unpack_from(b'>H', table, fs_type_offset)[0]
if fs_type == 0:
return raw
f = BytesIO(raw)
f.seek(fs_type_offset + table_offset)
f.write(struct.pack(b'>H', 0))
set_table_checksum(f, 'os/2')
set_checksum_adjustment(f)
raw = f.getvalue()
verify_checksums(raw)
return raw
def is_font_embeddable(raw):
# https://www.microsoft.com/typography/otspec/os2.htm#fst
ok, sig = is_truetype_font(raw)
if not ok:
raise UnsupportedFont('Not a supported font, sfnt_version: %r'%sig)
table, table_index, table_offset = get_table(raw, 'os/2')[:3]
if table is None:
raise UnsupportedFont('Not a supported font, has no OS/2 table')
fs_type_offset = struct.calcsize(b'>HhHH')
fs_type = struct.unpack_from(b'>H', table, fs_type_offset)[0]
if fs_type == 0 or fs_type & 0x8:
return True, fs_type
if fs_type & 1:
return False, fs_type
if fs_type & 0x200:
return False, fs_type
return True, fs_type
def read_bmp_prefix(table, bmp):
length, language, segcount = struct.unpack_from(b'>3H', table, bmp+2)
array_len = segcount //2
offset = bmp + 7*2
array_sz = 2*array_len
array = b'>%dH'%array_len
end_count = struct.unpack_from(array, table, offset)
offset += array_sz + 2
start_count = struct.unpack_from(array, table, offset)
offset += array_sz
id_delta = struct.unpack_from(array.replace(b'H', b'h'), table, offset)
offset += array_sz
range_offset = struct.unpack_from(array, table, offset)
if length + bmp < offset + array_sz:
raise ValueError('cmap subtable length is too small')
glyph_id_len = (length + bmp - (offset + array_sz))//2
glyph_id_map = struct.unpack_from(b'>%dH'%glyph_id_len, table, offset +
array_sz)
return (start_count, end_count, range_offset, id_delta, glyph_id_len,
glyph_id_map, array_len)
def get_bmp_glyph_ids(table, bmp, codes):
(start_count, end_count, range_offset, id_delta, glyph_id_len,
glyph_id_map, array_len) = read_bmp_prefix(table, bmp)
for code in codes:
found = False
for i, ec in enumerate(end_count):
if ec >= code:
sc = start_count[i]
if sc <= code:
found = True
ro = range_offset[i]
if ro == 0:
glyph_id = id_delta[i] + code
else:
idx = ro//2 + (code - sc) + i - array_len
glyph_id = glyph_id_map[idx]
if glyph_id != 0:
glyph_id += id_delta[i]
yield glyph_id % 0x10000
break
if not found:
yield 0
def get_glyph_ids(raw, text, raw_is_table=False):
if not isinstance(text, unicode_type):
raise TypeError('%r is not a unicode object'%text)
if raw_is_table:
table = raw
else:
table = get_table(raw, 'cmap')[0]
if table is None:
raise UnsupportedFont('Not a supported font, has no cmap table')
version, num_tables = struct.unpack_from(b'>HH', table)
bmp_table = None
for i in range(num_tables):
platform_id, encoding_id, offset = struct.unpack_from(b'>HHL', table,
4 + (i*8))
if platform_id == 3 and encoding_id == 1:
table_format = struct.unpack_from(b'>H', table, offset)[0]
if table_format == 4:
bmp_table = offset
break
if bmp_table is None:
raise UnsupportedFont('Not a supported font, has no format 4 cmap table')
for glyph_id in get_bmp_glyph_ids(table, bmp_table, map(ord, text)):
yield glyph_id
def supports_text(raw, text, has_only_printable_chars=False):
if not isinstance(text, unicode_type):
raise TypeError('%r is not a unicode object'%text)
if not has_only_printable_chars:
text = get_printable_characters(text)
try:
for glyph_id in get_glyph_ids(raw, text):
if glyph_id == 0:
return False
except:
return False
return True
def get_font_for_text(text, candidate_font_data=None):
ok = False
if candidate_font_data is not None:
ok = supports_text(candidate_font_data, text)
if not ok:
from calibre.utils.fonts.scanner import font_scanner
family, faces = font_scanner.find_font_for_text(text)
if faces:
with lopen(faces[0]['path'], 'rb') as f:
candidate_font_data = f.read()
return candidate_font_data
def test_glyph_ids():
from calibre.utils.fonts.free_type import FreeType
data = P('fonts/liberation/LiberationSerif-Regular.ttf', data=True)
ft = FreeType()
font = ft.load_font(data)
text = u'诶йab'
ft_glyphs = tuple(font.glyph_ids(text))
glyphs = tuple(get_glyph_ids(data, text))
if ft_glyphs != glyphs:
raise Exception('My code and FreeType differ on the glyph ids')
def test_supports_text():
data = P('fonts/calibreSymbols.otf', data=True)
if not supports_text(data, '.★½'):
raise RuntimeError('Incorrectly returning that text is not supported')
if supports_text(data, 'abc'):
raise RuntimeError('Incorrectly claiming that text is supported')
def test_find_font():
from calibre.utils.fonts.scanner import font_scanner
abcd = '诶比西迪'
family = font_scanner.find_font_for_text(abcd)[0]
print('Family for Chinese text:', family)
family = font_scanner.find_font_for_text(abcd)[0]
abcd = 'لوحة المفاتيح العربية'
print('Family for Arabic text:', family)
def test():
test_glyph_ids()
test_supports_text()
test_find_font()
def main():
import sys, os
for arg in sys.argv[1:]:
print(os.path.basename(arg))
with open(arg, 'rb') as f:
raw = f.read()
print(get_font_names(raw))
characs = get_font_characteristics(raw)
print(characs)
print(panose_to_css_generic_family(characs[5]))
verify_checksums(raw)
remove_embed_restriction(raw)
if __name__ == '__main__':
main()