mirror of
https://github.com/gryf/ebook-converter.git
synced 2026-01-09 05:04:12 +01:00
219 lines
7.6 KiB
Python
219 lines
7.6 KiB
Python
from struct import unpack_from, unpack, calcsize
|
|
from functools import partial
|
|
|
|
from ebook_converter.utils.fonts.sfnt import UnknownTable
|
|
from ebook_converter.utils.fonts.sfnt.errors import UnsupportedFont, NoGlyphs
|
|
from ebook_converter.utils.fonts.sfnt.cff.dict_data import TopDict, PrivateDict
|
|
from ebook_converter.utils.fonts.sfnt.cff.constants import (cff_standard_strings,
|
|
STANDARD_CHARSETS)
|
|
from ebook_converter.polyglot.builtins import iteritems, itervalues, range
|
|
|
|
|
|
__license__ = 'GPL v3'
|
|
__copyright__ = '2012, Kovid Goyal <kovid at kovidgoyal.net>'
|
|
__docformat__ = 'restructuredtext en'
|
|
|
|
# Useful links
|
|
# http://www.adobe.com/content/dam/Adobe/en/devnet/font/pdfs/5176.CFF.pdf
|
|
# http://www.adobe.com/content/dam/Adobe/en/devnet/font/pdfs/5177.Type2.pdf
|
|
|
|
|
|
class CFF(object):
|
|
|
|
def __init__(self, raw):
|
|
(self.major_version, self.minor_version, self.header_size,
|
|
self.offset_size) = unpack_from(b'>4B', raw)
|
|
if (self.major_version, self.minor_version) != (1, 0):
|
|
raise UnsupportedFont('The CFF table has unknown version: '
|
|
'(%d, %d)'%(self.major_version, self.minor_version))
|
|
offset = self.header_size
|
|
|
|
# Read Names Index
|
|
self.font_names = Index(raw, offset)
|
|
offset = self.font_names.pos
|
|
if len(self.font_names) > 1:
|
|
raise UnsupportedFont('CFF table has more than one font.')
|
|
|
|
# Read Top Dict
|
|
self.top_index = Index(raw, offset)
|
|
self.top_dict = TopDict()
|
|
offset = self.top_index.pos
|
|
|
|
# Read strings
|
|
self.strings = Strings(raw, offset)
|
|
offset = self.strings.pos
|
|
|
|
# Read global subroutines
|
|
self.global_subrs = Subrs(raw, offset)
|
|
offset = self.global_subrs.pos
|
|
|
|
# Decompile Top Dict
|
|
self.top_dict.decompile(self.strings, self.global_subrs, self.top_index[0])
|
|
self.is_CID = 'ROS' in self.top_dict
|
|
if self.is_CID:
|
|
raise UnsupportedFont('Subsetting of CID keyed fonts is not supported')
|
|
|
|
# Read CharStrings (Glyph definitions)
|
|
try:
|
|
offset = self.top_dict['CharStrings']
|
|
except KeyError:
|
|
raise ValueError('This font has no CharStrings')
|
|
cs_type = self.top_dict.safe_get('CharstringType')
|
|
if cs_type != 2:
|
|
raise UnsupportedFont('This font has unsupported CharstringType: '
|
|
'%s'%cs_type)
|
|
self.char_strings = CharStringsIndex(raw, offset)
|
|
self.num_glyphs = len(self.char_strings)
|
|
|
|
# Read Private Dict
|
|
self.private_dict = self.private_subrs = None
|
|
pd = self.top_dict.safe_get('Private')
|
|
if pd:
|
|
size, offset = pd
|
|
self.private_dict = PrivateDict()
|
|
self.private_dict.decompile(self.strings, self.global_subrs,
|
|
raw[offset:offset+size])
|
|
if 'Subrs' in self.private_dict:
|
|
self.private_subrs = Subrs(raw, offset +
|
|
self.private_dict['Subrs'])
|
|
|
|
# Read charset (Glyph names)
|
|
self.charset = Charset(raw, self.top_dict.safe_get('charset'),
|
|
self.strings, self.num_glyphs, self.is_CID)
|
|
|
|
# import pprint
|
|
# pprint.pprint(self.top_dict)
|
|
# pprint.pprint(self.private_dict)
|
|
|
|
|
|
class Index(list):
|
|
|
|
def __init__(self, raw, offset, prepend=()):
|
|
list.__init__(self)
|
|
self.extend(prepend)
|
|
|
|
count = unpack_from(b'>H', raw, offset)[0]
|
|
offset += 2
|
|
self.pos = offset
|
|
|
|
if count > 0:
|
|
self.offset_size = unpack_from(b'>B', raw, offset)[0]
|
|
offset += 1
|
|
if self.offset_size == 3:
|
|
offsets = [unpack(b'>L', b'\0' + raw[i:i+3])[0]
|
|
for i in range(offset, offset+3*(count+1), 3)]
|
|
else:
|
|
fmt = {1:'B', 2:'H', 4:'L'}[self.offset_size]
|
|
fmt = ('>%d%s'%(count+1, fmt)).encode('ascii')
|
|
offsets = unpack_from(fmt, raw, offset)
|
|
offset += self.offset_size * (count+1) - 1
|
|
|
|
for i in range(len(offsets)-1):
|
|
off, noff = offsets[i:i+2]
|
|
obj = raw[offset+off:offset+noff]
|
|
self.append(obj)
|
|
|
|
try:
|
|
self.pos = offset + offsets[-1]
|
|
except IndexError:
|
|
self.pos = offset
|
|
|
|
|
|
class Strings(Index):
|
|
|
|
def __init__(self, raw, offset):
|
|
super(Strings, self).__init__(raw, offset, prepend=[x.encode('ascii')
|
|
for x in cff_standard_strings])
|
|
|
|
|
|
class Charset(list):
|
|
|
|
def __init__(self, raw, offset, strings, num_glyphs, is_CID):
|
|
super(Charset, self).__init__()
|
|
self.standard_charset = offset if offset in {0, 1, 2} else None
|
|
if is_CID and self.standard_charset is not None:
|
|
raise ValueError("CID font must not use a standard charset")
|
|
if self.standard_charset is None:
|
|
self.append(b'.notdef')
|
|
fmt = unpack_from(b'>B', raw, offset)[0]
|
|
offset += 1
|
|
f = {0:self.parse_fmt0, 1:self.parse_fmt1,
|
|
2:partial(self.parse_fmt1, is_two_byte=True)}.get(fmt, None)
|
|
if f is None:
|
|
raise UnsupportedFont('This font uses unsupported charset '
|
|
'table format: %d'%fmt)
|
|
f(raw, offset, strings, num_glyphs, is_CID)
|
|
|
|
def parse_fmt0(self, raw, offset, strings, num_glyphs, is_CID):
|
|
fmt = ('>%dH'%(num_glyphs-1)).encode('ascii')
|
|
ids = unpack_from(fmt, raw, offset)
|
|
if is_CID:
|
|
ids = ('cid%05d'%x for x in ids)
|
|
else:
|
|
ids = (strings[x] for x in ids)
|
|
self.extend(ids)
|
|
|
|
def parse_fmt1(self, raw, offset, strings, num_glyphs, is_CID,
|
|
is_two_byte=False):
|
|
fmt = b'>2H' if is_two_byte else b'>HB'
|
|
sz = calcsize(fmt)
|
|
count = 1
|
|
while count < num_glyphs:
|
|
first, nleft = unpack_from(fmt, raw, offset)
|
|
offset += sz
|
|
count += nleft + 1
|
|
self.extend('cid%05d'%x if is_CID else strings[x] for x in
|
|
range(first, first + nleft+1))
|
|
|
|
def lookup(self, glyph_id):
|
|
if self.standard_charset is None:
|
|
return self[glyph_id]
|
|
return STANDARD_CHARSETS[self.standard_charset][glyph_id].encode('ascii')
|
|
|
|
def safe_lookup(self, glyph_id):
|
|
try:
|
|
return self.lookup(glyph_id)
|
|
except (KeyError, IndexError, ValueError):
|
|
return None
|
|
|
|
|
|
class Subrs(Index):
|
|
pass
|
|
|
|
|
|
class CharStringsIndex(Index):
|
|
pass
|
|
|
|
|
|
class CFFTable(UnknownTable):
|
|
|
|
def decompile(self):
|
|
self.cff = CFF(self.raw)
|
|
|
|
def subset(self, character_map, extra_glyphs):
|
|
from ebook_converter.utils.fonts.sfnt.cff.writer import Subset
|
|
# Map codes from the cmap table to glyph names, this will be used to
|
|
# reconstruct character_map for the subset font
|
|
charset_map = {code:self.cff.charset.safe_lookup(glyph_id) for code,
|
|
glyph_id in iteritems(character_map)}
|
|
charset = set(itervalues(charset_map))
|
|
charset.discard(None)
|
|
if not charset and character_map:
|
|
raise NoGlyphs('This font has no glyphs for the specified characters')
|
|
charset |= {
|
|
self.cff.charset.safe_lookup(glyph_id) for glyph_id in extra_glyphs}
|
|
charset.discard(None)
|
|
s = Subset(self.cff, charset)
|
|
|
|
# Rebuild character_map with the glyph ids from the subset font
|
|
character_map.clear()
|
|
for code, charname in iteritems(charset_map):
|
|
glyph_id = s.charname_map.get(charname, None)
|
|
if glyph_id:
|
|
character_map[code] = glyph_id
|
|
|
|
# Check that raw is parseable
|
|
CFF(s.raw)
|
|
|
|
self.raw = s.raw
|