1
0
mirror of https://github.com/gryf/ebook-converter.git synced 2026-03-27 22:03:32 +01:00

Added docx writer related modules

This commit is contained in:
2020-04-13 16:33:15 +02:00
parent ae80ae5640
commit 98b2dd8d4f
29 changed files with 5956 additions and 0 deletions

View File

@@ -0,0 +1,80 @@
#!/usr/bin/env python2
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:fdm=marker:ai
from __future__ import absolute_import, division, print_function, unicode_literals
__license__ = 'GPL v3'
__copyright__ = '2012, Kovid Goyal <kovid at kovidgoyal.net>'
__docformat__ = 'restructuredtext en'
from datetime import datetime, timedelta
def align_block(raw, multiple=4, pad=b'\0'):
'''
Return raw with enough pad bytes append to ensure its length is a multiple
of 4.
'''
extra = len(raw) % multiple
if extra == 0:
return raw
return raw + pad*(multiple - extra)
class UnknownTable(object):
def __init__(self, raw):
self.raw = raw
def __call__(self):
return self.raw
def __len__(self):
return len(self.raw)
class DateTimeProperty(object):
def __init__(self, name):
self.name = name
def __get__(self, obj, type=None):
return datetime(1904, 1, 1) + timedelta(seconds=getattr(obj,
self.name))
def __set__(self, obj, val):
td = val - datetime(1904, 1, 1)
setattr(obj, self.name, int(td.total_seconds()))
class FixedProperty(object):
def __init__(self, name):
self.name = name
def __get__(self, obj, type=None):
val = getattr(obj, self.name)
return val / 0x10000
def __set__(self, obj, val):
return int(round(val*(0x10000)))
def max_power_of_two(x):
"""
Return the highest exponent of two, so that
(2 ** exponent) <= x
"""
exponent = 0
while x:
x = x >> 1
exponent += 1
return max(exponent - 1, 0)
def load_font(stream_or_path):
raw = stream_or_path
if hasattr(raw, 'read'):
raw = raw.read()
from calibre.utils.fonts.sfnt.container import Sfnt
return Sfnt(raw)

View File

@@ -0,0 +1,10 @@
#!/usr/bin/env python2
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:fdm=marker:ai
from __future__ import absolute_import, division, print_function, unicode_literals
__license__ = 'GPL v3'
__copyright__ = '2012, Kovid Goyal <kovid at kovidgoyal.net>'
__docformat__ = 'restructuredtext en'

View File

@@ -0,0 +1,182 @@
#!/usr/bin/env python2
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:fdm=marker:ai
from __future__ import absolute_import, division, print_function, unicode_literals
__license__ = 'GPL v3'
__copyright__ = '2012, Kovid Goyal <kovid at kovidgoyal.net>'
__docformat__ = 'restructuredtext en'
# cff_standard_strings {{{
# The 391 Standard Strings as used in the CFF format.
# from Adobe Technical None #5176, version 1.0, 18 March 1998
cff_standard_strings = [
'.notdef', 'space', 'exclam', 'quotedbl', 'numbersign', 'dollar', 'percent',
'ampersand', 'quoteright', 'parenleft', 'parenright', 'asterisk', 'plus',
'comma', 'hyphen', 'period', 'slash', 'zero', 'one', 'two', 'three', 'four',
'five', 'six', 'seven', 'eight', 'nine', 'colon', 'semicolon', 'less', 'equal',
'greater', 'question', 'at', 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J',
'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z',
'bracketleft', 'backslash', 'bracketright', 'asciicircum', 'underscore',
'quoteleft', 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm',
'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', 'braceleft',
'bar', 'braceright', 'asciitilde', 'exclamdown', 'cent', 'sterling',
'fraction', 'yen', 'florin', 'section', 'currency', 'quotesingle',
'quotedblleft', 'guillemotleft', 'guilsinglleft', 'guilsinglright', 'fi', 'fl',
'endash', 'dagger', 'daggerdbl', 'periodcentered', 'paragraph', 'bullet',
'quotesinglbase', 'quotedblbase', 'quotedblright', 'guillemotright',
'ellipsis', 'perthousand', 'questiondown', 'grave', 'acute', 'circumflex',
'tilde', 'macron', 'breve', 'dotaccent', 'dieresis', 'ring', 'cedilla',
'hungarumlaut', 'ogonek', 'caron', 'emdash', 'AE', 'ordfeminine', 'Lslash',
'Oslash', 'OE', 'ordmasculine', 'ae', 'dotlessi', 'lslash', 'oslash', 'oe',
'germandbls', 'onesuperior', 'logicalnot', 'mu', 'trademark', 'Eth', 'onehalf',
'plusminus', 'Thorn', 'onequarter', 'divide', 'brokenbar', 'degree', 'thorn',
'threequarters', 'twosuperior', 'registered', 'minus', 'eth', 'multiply',
'threesuperior', 'copyright', 'Aacute', 'Acircumflex', 'Adieresis', 'Agrave',
'Aring', 'Atilde', 'Ccedilla', 'Eacute', 'Ecircumflex', 'Edieresis', 'Egrave',
'Iacute', 'Icircumflex', 'Idieresis', 'Igrave', 'Ntilde', 'Oacute',
'Ocircumflex', 'Odieresis', 'Ograve', 'Otilde', 'Scaron', 'Uacute',
'Ucircumflex', 'Udieresis', 'Ugrave', 'Yacute', 'Ydieresis', 'Zcaron',
'aacute', 'acircumflex', 'adieresis', 'agrave', 'aring', 'atilde', 'ccedilla',
'eacute', 'ecircumflex', 'edieresis', 'egrave', 'iacute', 'icircumflex',
'idieresis', 'igrave', 'ntilde', 'oacute', 'ocircumflex', 'odieresis',
'ograve', 'otilde', 'scaron', 'uacute', 'ucircumflex', 'udieresis', 'ugrave',
'yacute', 'ydieresis', 'zcaron', 'exclamsmall', 'Hungarumlautsmall',
'dollaroldstyle', 'dollarsuperior', 'ampersandsmall', 'Acutesmall',
'parenleftsuperior', 'parenrightsuperior', 'twodotenleader', 'onedotenleader',
'zerooldstyle', 'oneoldstyle', 'twooldstyle', 'threeoldstyle', 'fouroldstyle',
'fiveoldstyle', 'sixoldstyle', 'sevenoldstyle', 'eightoldstyle',
'nineoldstyle', 'commasuperior', 'threequartersemdash', 'periodsuperior',
'questionsmall', 'asuperior', 'bsuperior', 'centsuperior', 'dsuperior',
'esuperior', 'isuperior', 'lsuperior', 'msuperior', 'nsuperior', 'osuperior',
'rsuperior', 'ssuperior', 'tsuperior', 'ff', 'ffi', 'ffl', 'parenleftinferior',
'parenrightinferior', 'Circumflexsmall', 'hyphensuperior', 'Gravesmall',
'Asmall', 'Bsmall', 'Csmall', 'Dsmall', 'Esmall', 'Fsmall', 'Gsmall', 'Hsmall',
'Ismall', 'Jsmall', 'Ksmall', 'Lsmall', 'Msmall', 'Nsmall', 'Osmall', 'Psmall',
'Qsmall', 'Rsmall', 'Ssmall', 'Tsmall', 'Usmall', 'Vsmall', 'Wsmall', 'Xsmall',
'Ysmall', 'Zsmall', 'colonmonetary', 'onefitted', 'rupiah', 'Tildesmall',
'exclamdownsmall', 'centoldstyle', 'Lslashsmall', 'Scaronsmall', 'Zcaronsmall',
'Dieresissmall', 'Brevesmall', 'Caronsmall', 'Dotaccentsmall', 'Macronsmall',
'figuredash', 'hypheninferior', 'Ogoneksmall', 'Ringsmall', 'Cedillasmall',
'questiondownsmall', 'oneeighth', 'threeeighths', 'fiveeighths',
'seveneighths', 'onethird', 'twothirds', 'zerosuperior', 'foursuperior',
'fivesuperior', 'sixsuperior', 'sevensuperior', 'eightsuperior',
'ninesuperior', 'zeroinferior', 'oneinferior', 'twoinferior', 'threeinferior',
'fourinferior', 'fiveinferior', 'sixinferior', 'seveninferior',
'eightinferior', 'nineinferior', 'centinferior', 'dollarinferior',
'periodinferior', 'commainferior', 'Agravesmall', 'Aacutesmall',
'Acircumflexsmall', 'Atildesmall', 'Adieresissmall', 'Aringsmall', 'AEsmall',
'Ccedillasmall', 'Egravesmall', 'Eacutesmall', 'Ecircumflexsmall',
'Edieresissmall', 'Igravesmall', 'Iacutesmall', 'Icircumflexsmall',
'Idieresissmall', 'Ethsmall', 'Ntildesmall', 'Ogravesmall', 'Oacutesmall',
'Ocircumflexsmall', 'Otildesmall', 'Odieresissmall', 'OEsmall', 'Oslashsmall',
'Ugravesmall', 'Uacutesmall', 'Ucircumflexsmall', 'Udieresissmall',
'Yacutesmall', 'Thornsmall', 'Ydieresissmall', '001.000', '001.001', '001.002',
'001.003', 'Black', 'Bold', 'Book', 'Light', 'Medium', 'Regular', 'Roman',
'Semibold'
]
# }}}
STANDARD_CHARSETS = [ # {{{
# ISOAdobe
(".notdef", "space", "exclam", "quotedbl", "numbersign", "dollar",
"percent", "ampersand", "quoteright", "parenleft", "parenright",
"asterisk", "plus", "comma", "hyphen", "period", "slash", "zero",
"one", "two", "three", "four", "five", "six", "seven", "eight", "nine",
"colon", "semicolon", "less", "equal", "greater", "question", "at",
"A", "B", "C", "D", "E", "F", "G", "H", "I", "J", "K", "L", "M", "N",
"O", "P", "Q", "R", "S", "T", "U", "V", "W", "X", "Y", "Z",
"bracketleft", "backslash", "bracketright", "asciicircum",
"underscore", "quoteleft", "a", "b", "c", "d", "e", "f", "g", "h", "i",
"j", "k", "l", "m", "n", "o", "p", "q", "r", "s", "t", "u", "v", "w",
"x", "y", "z", "braceleft", "bar", "braceright", "asciitilde",
"exclamdown", "cent", "sterling", "fraction", "yen", "florin",
"section", "currency", "quotesingle", "quotedblleft", "guillemotleft",
"guilsinglleft", "guilsinglright", "fi", "fl", "endash", "dagger",
"daggerdbl", "periodcentered", "paragraph", "bullet", "quotesinglbase",
"quotedblbase", "quotedblright", "guillemotright", "ellipsis",
"perthousand", "questiondown", "grave", "acute", "circumflex", "tilde",
"macron", "breve", "dotaccent", "dieresis", "ring", "cedilla",
"hungarumlaut", "ogonek", "caron", "emdash", "AE", "ordfeminine",
"Lslash", "Oslash", "OE", "ordmasculine", "ae", "dotlessi", "lslash",
"oslash", "oe", "germandbls", "onesuperior", "logicalnot", "mu",
"trademark", "Eth", "onehalf", "plusminus", "Thorn", "onequarter",
"divide", "brokenbar", "degree", "thorn", "threequarters",
"twosuperior", "registered", "minus", "eth", "multiply",
"threesuperior", "copyright", "Aacute", "Acircumflex", "Adieresis",
"Agrave", "Aring", "Atilde", "Ccedilla", "Eacute", "Ecircumflex",
"Edieresis", "Egrave", "Iacute", "Icircumflex", "Idieresis", "Igrave",
"Ntilde", "Oacute", "Ocircumflex", "Odieresis", "Ograve", "Otilde",
"Scaron", "Uacute", "Ucircumflex", "Udieresis", "Ugrave", "Yacute",
"Ydieresis", "Zcaron", "aacute", "acircumflex", "adieresis", "agrave",
"aring", "atilde", "ccedilla", "eacute", "ecircumflex", "edieresis",
"egrave", "iacute", "icircumflex", "idieresis", "igrave", "ntilde",
"oacute", "ocircumflex", "odieresis", "ograve", "otilde", "scaron",
"uacute", "ucircumflex", "udieresis", "ugrave", "yacute", "ydieresis",
"zcaron"),
# Expert
("notdef", "space", "exclamsmall", "Hungarumlautsmall", "dollaroldstyle",
"dollarsuperior", "ampersandsmall", "Acutesmall", "parenleftsuperior",
"parenrightsuperior", "twodotenleader", "onedotenleader", "comma",
"hyphen", "period", "fraction", "zerooldstyle", "oneoldstyle",
"twooldstyle", "threeoldstyle", "fouroldstyle", "fiveoldstyle",
"sixoldstyle", "sevenoldstyle", "eightoldstyle", "nineoldstyle",
"colon", "semicolon", "commasuperior", "threequartersemdash",
"periodsuperior", "questionsmall", "asuperior", "bsuperior",
"centsuperior", "dsuperior", "esuperior", "isuperior", "lsuperior",
"msuperior", "nsuperior", "osuperior", "rsuperior", "ssuperior",
"tsuperior", "ff", "fi", "fl", "ffi", "ffl", "parenleftinferior",
"parenrightinferior", "Circumflexsmall", "hyphensuperior",
"Gravesmall", "Asmall", "Bsmall", "Csmall", "Dsmall", "Esmall",
"Fsmall", "Gsmall", "Hsmall", "Ismall", "Jsmall", "Ksmall", "Lsmall",
"Msmall", "Nsmall", "Osmall", "Psmall", "Qsmall", "Rsmall", "Ssmall",
"Tsmall", "Usmall", "Vsmall", "Wsmall", "Xsmall", "Ysmall", "Zsmall",
"colonmonetary", "onefitted", "rupiah", "Tildesmall",
"exclamdownsmall", "centoldstyle", "Lslashsmall", "Scaronsmall",
"Zcaronsmall", "Dieresissmall", "Brevesmall", "Caronsmall",
"Dotaccentsmall", "Macronsmall", "figuredash", "hypheninferior",
"Ogoneksmall", "Ringsmall", "Cedillasmall", "onequarter", "onehalf",
"threequarters", "questiondownsmall", "oneeighth", "threeeighths",
"fiveeighths", "seveneighths", "onethird", "twothirds", "zerosuperior",
"onesuperior", "twosuperior", "threesuperior", "foursuperior",
"fivesuperior", "sixsuperior", "sevensuperior", "eightsuperior",
"ninesuperior", "zeroinferior", "oneinferior", "twoinferior",
"threeinferior", "fourinferior", "fiveinferior", "sixinferior",
"seveninferior", "eightinferior", "nineinferior", "centinferior",
"dollarinferior", "periodinferior", "commainferior", "Agravesmall",
"Aacutesmall", "Acircumflexsmall", "Atildesmall", "Adieresissmall",
"Aringsmall", "AEsmall", "Ccedillasmall", "Egravesmall", "Eacutesmall",
"Ecircumflexsmall", "Edieresissmall", "Igravesmall", "Iacutesmall",
"Icircumflexsmall", "Idieresissmall", "Ethsmall", "Ntildesmall",
"Ogravesmall", "Oacutesmall", "Ocircumflexsmall", "Otildesmall",
"Odieresissmall", "OEsmall", "Oslashsmall", "Ugravesmall",
"Uacutesmall", "Ucircumflexsmall", "Udieresissmall", "Yacutesmall",
"Thornsmall", "Ydieresissmall"),
# Expert Subset
(".notdef", "space", "dollaroldstyle", "dollarsuperior",
"parenleftsuperior", "parenrightsuperior", "twodotenleader",
"onedotenleader", "comma", "hyphen", "period", "fraction",
"zerooldstyle", "oneoldstyle", "twooldstyle", "threeoldstyle",
"fouroldstyle", "fiveoldstyle", "sixoldstyle", "sevenoldstyle",
"eightoldstyle", "nineoldstyle", "colon", "semicolon",
"commasuperior", "threequartersemdash", "periodsuperior",
"asuperior", "bsuperior", "centsuperior", "dsuperior", "esuperior",
"isuperior", "lsuperior", "msuperior", "nsuperior", "osuperior",
"rsuperior", "ssuperior", "tsuperior", "ff", "fi", "fl", "ffi",
"ffl", "parenleftinferior", "parenrightinferior", "hyphensuperior",
"colonmonetary", "onefitted", "rupiah", "centoldstyle",
"figuredash", "hypheninferior", "onequarter", "onehalf",
"threequarters", "oneeighth", "threeeighths", "fiveeighths",
"seveneighths", "onethird", "twothirds", "zerosuperior",
"onesuperior", "twosuperior", "threesuperior", "foursuperior",
"fivesuperior", "sixsuperior", "sevensuperior", "eightsuperior",
"ninesuperior", "zeroinferior", "oneinferior", "twoinferior",
"threeinferior", "fourinferior", "fiveinferior", "sixinferior",
"seveninferior", "eightinferior", "nineinferior", "centinferior",
"dollarinferior", "periodinferior", "commainferior"),
] # }}}

View File

@@ -0,0 +1,311 @@
#!/usr/bin/env python2
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:fdm=marker:ai
from __future__ import absolute_import, division, print_function, unicode_literals
__license__ = 'GPL v3'
__copyright__ = '2012, Kovid Goyal <kovid at kovidgoyal.net>'
__docformat__ = 'restructuredtext en'
from struct import pack, unpack_from
from polyglot.builtins import range, unicode_type
t1_operand_encoding = [None] * 256
t1_operand_encoding[0:32] = (32) * ["do_operator"]
t1_operand_encoding[32:247] = (247 - 32) * ["read_byte"]
t1_operand_encoding[247:251] = (251 - 247) * ["read_small_int1"]
t1_operand_encoding[251:255] = (255 - 251) * ["read_small_int2"]
t1_operand_encoding[255] = "read_long_int"
t2_operand_encoding = t1_operand_encoding[:]
t2_operand_encoding[28] = "read_short_int"
t2_operand_encoding[255] = "read_fixed_1616"
cff_dict_operand_encoding = t2_operand_encoding[:]
cff_dict_operand_encoding[29] = "read_long_int"
cff_dict_operand_encoding[30] = "read_real_number"
cff_dict_operand_encoding[255] = "reserved"
real_nibbles = ['0', '1', '2', '3', '4', '5', '6', '7', '8', '9',
'.', 'E', 'E-', None, '-']
real_nibbles_map = {x:i for i, x in enumerate(real_nibbles)}
class ByteCode(dict):
def read_byte(self, b0, data, index):
return b0 - 139, index
def read_small_int1(self, b0, data, index):
b1 = ord(data[index:index+1])
return (b0-247)*256 + b1 + 108, index+1
def read_small_int2(self, b0, data, index):
b1 = ord(data[index:index+1])
return -(b0-251)*256 - b1 - 108, index+1
def read_short_int(self, b0, data, index):
value, = unpack_from(b">h", data, index)
return value, index+2
def read_long_int(self, b0, data, index):
value, = unpack_from(b">l", data, index)
return value, index+4
def read_fixed_1616(self, b0, data, index):
value, = unpack_from(b">l", data, index)
return value / 65536.0, index+4
def read_real_number(self, b0, data, index):
number = ''
while True:
b = ord(data[index:index+1])
index = index + 1
nibble0 = (b & 0xf0) >> 4
nibble1 = b & 0x0f
if nibble0 == 0xf:
break
number = number + real_nibbles[nibble0]
if nibble1 == 0xf:
break
number = number + real_nibbles[nibble1]
return float(number), index
def write_float(self, f, encoding='ignored'):
s = unicode_type(f).upper()
if s[:2] == "0.":
s = s[1:]
elif s[:3] == "-0.":
s = "-" + s[2:]
nibbles = []
while s:
c = s[0]
s = s[1:]
if c == "E" and s[:1] == "-":
s = s[1:]
c = "E-"
nibbles.append(real_nibbles_map[c])
nibbles.append(0xf)
if len(nibbles) % 2:
nibbles.append(0xf)
d = bytearray([30])
for i in range(0, len(nibbles), 2):
d.append(nibbles[i] << 4 | nibbles[i+1])
return bytes(d)
def write_int(self, value, encoding="cff"):
four_byte_op = {'cff':29, 't1':255}.get(encoding, None)
if -107 <= value <= 107:
code = bytes(bytearray([value + 139]))
elif 108 <= value <= 1131:
value = value - 108
code = bytes(bytearray([(value >> 8) + 247, (value & 0xFF)]))
elif -1131 <= value <= -108:
value = -value - 108
code = bytes(bytearray([(value >> 8) + 251, (value & 0xFF)]))
elif four_byte_op is None:
# T2 only supports 2 byte ints
code = bytes(bytearray([28])) + pack(b">h", value)
else:
code = bytes(bytearray([four_byte_op])) + pack(b">l", value)
return code
def write_offset(self, value):
return bytes(bytearray([29])) + pack(b">l", value)
def write_number(self, value, encoding="cff"):
f = self.write_float if isinstance(value, float) else self.write_int
return f(value, encoding)
class Dict(ByteCode):
operand_encoding = cff_dict_operand_encoding
TABLE = ()
FILTERED = frozenset()
OFFSETS = frozenset()
def __init__(self):
ByteCode.__init__(self)
self.operators = {op:(name, arg) for op, name, arg, default in
self.TABLE}
self.defaults = {name:default for op, name, arg, default in self.TABLE}
def safe_get(self, name):
return self.get(name, self.defaults[name])
def decompile(self, strings, global_subrs, data):
self.strings = strings
self.global_subrs = global_subrs
self.stack = []
index = 0
while index < len(data):
b0 = ord(data[index:index+1])
index += 1
handler = getattr(self, self.operand_encoding[b0])
value, index = handler(b0, data, index)
if value is not None:
self.stack.append(value)
def do_operator(self, b0, data, index):
if b0 == 12:
op = (b0, ord(data[index:index+1]))
index += 1
else:
op = b0
operator, arg_type = self.operators[op]
self.handle_operator(operator, arg_type)
return None, index
def handle_operator(self, operator, arg_type):
if isinstance(arg_type, tuple):
value = ()
for i in range(len(arg_type)-1, -1, -1):
arg = arg_type[i]
arghandler = getattr(self, 'arg_' + arg)
value = (arghandler(operator),) + value
else:
arghandler = getattr(self, 'arg_' + arg_type)
value = arghandler(operator)
self[operator] = value
def arg_number(self, name):
return self.stack.pop()
def arg_SID(self, name):
return self.strings[self.stack.pop()]
def arg_array(self, name):
ans = self.stack[:]
del self.stack[:]
return ans
def arg_delta(self, name):
out = []
current = 0
for v in self.stack:
current = current + v
out.append(current)
del self.stack[:]
return out
def compile(self, strings):
data = []
for op, name, arg, default in self.TABLE:
if name in self.FILTERED:
continue
val = self.safe_get(name)
opcode = bytes(bytearray(op if isinstance(op, tuple) else [op]))
if val != self.defaults[name]:
self.encoding_offset = name in self.OFFSETS
if isinstance(arg, tuple):
if len(val) != len(arg):
raise ValueError('Invalid argument %s for operator: %s'
%(val, op))
for typ, v in zip(arg, val):
if typ == 'SID':
val = strings(val)
data.append(getattr(self, 'encode_'+typ)(v))
else:
if arg == 'SID':
val = strings(val)
data.append(getattr(self, 'encode_'+arg)(val))
data.append(opcode)
self.raw = b''.join(data)
return self.raw
def encode_number(self, val):
if self.encoding_offset:
return self.write_offset(val)
return self.write_number(val)
def encode_SID(self, val):
return self.write_int(val)
def encode_array(self, val):
return b''.join(map(self.encode_number, val))
def encode_delta(self, value):
out = []
last = 0
for v in value:
out.append(v - last)
last = v
return self.encode_array(out)
class TopDict(Dict):
TABLE = (
# opcode name argument type default
((12, 30), 'ROS', ('SID','SID','number'), None,),
((12, 20), 'SyntheticBase', 'number', None,),
(0, 'version', 'SID', None,),
(1, 'Notice', 'SID', None,),
((12, 0), 'Copyright', 'SID', None,),
(2, 'FullName', 'SID', None,),
((12, 38), 'FontName', 'SID', None,),
(3, 'FamilyName', 'SID', None,),
(4, 'Weight', 'SID', None,),
((12, 1), 'isFixedPitch', 'number', 0,),
((12, 2), 'ItalicAngle', 'number', 0,),
((12, 3), 'UnderlinePosition', 'number', None,),
((12, 4), 'UnderlineThickness', 'number', 50,),
((12, 5), 'PaintType', 'number', 0,),
((12, 6), 'CharstringType', 'number', 2,),
((12, 7), 'FontMatrix', 'array', [0.001,0,0,0.001,0,0],),
(13, 'UniqueID', 'number', None,),
(5, 'FontBBox', 'array', [0,0,0,0],),
((12, 8), 'StrokeWidth', 'number', 0,),
(14, 'XUID', 'array', None,),
((12, 21), 'PostScript', 'SID', None,),
((12, 22), 'BaseFontName', 'SID', None,),
((12, 23), 'BaseFontBlend', 'delta', None,),
((12, 31), 'CIDFontVersion', 'number', 0,),
((12, 32), 'CIDFontRevision', 'number', 0,),
((12, 33), 'CIDFontType', 'number', 0,),
((12, 34), 'CIDCount', 'number', 8720,),
(15, 'charset', 'number', 0,),
((12, 35), 'UIDBase', 'number', None,),
(16, 'Encoding', 'number', 0,),
(18, 'Private', ('number','number'), None,),
((12, 37), 'FDSelect', 'number', None,),
((12, 36), 'FDArray', 'number', None,),
(17, 'CharStrings', 'number', None,),
)
# We will not write these operators out
FILTERED = {'ROS', 'SyntheticBase', 'UniqueID', 'XUID',
'CIDFontVersion', 'CIDFontRevision', 'CIDFontType', 'CIDCount',
'UIDBase', 'Encoding', 'FDSelect', 'FDArray'}
OFFSETS = {'charset', 'Encoding', 'CharStrings', 'Private'}
class PrivateDict(Dict):
TABLE = (
# opcode name argument type default
(6, 'BlueValues', 'delta', None,),
(7, 'OtherBlues', 'delta', None,),
(8, 'FamilyBlues', 'delta', None,),
(9, 'FamilyOtherBlues', 'delta', None,),
((12, 9), 'BlueScale', 'number', 0.039625,),
((12, 10), 'BlueShift', 'number', 7,),
((12, 11), 'BlueFuzz', 'number', 1,),
(10, 'StdHW', 'number', None,),
(11, 'StdVW', 'number', None,),
((12, 12), 'StemSnapH', 'delta', None,),
((12, 13), 'StemSnapV', 'delta', None,),
((12, 14), 'ForceBold', 'number', 0,),
((12, 15), 'ForceBoldThreshold', 'number', None,), # deprecated
((12, 16), 'lenIV', 'number', None,), # deprecated
((12, 17), 'LanguageGroup', 'number', 0,),
((12, 18), 'ExpansionFactor', 'number', 0.06,),
((12, 19), 'initialRandomSeed', 'number', 0,),
(20, 'defaultWidthX', 'number', 0,),
(21, 'nominalWidthX', 'number', 0,),
(19, 'Subrs', 'number', None,),
)
OFFSETS = {'Subrs'}

View File

@@ -0,0 +1,221 @@
#!/usr/bin/env python2
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:fdm=marker:ai
from __future__ import absolute_import, division, print_function, unicode_literals
__license__ = 'GPL v3'
__copyright__ = '2012, Kovid Goyal <kovid at kovidgoyal.net>'
__docformat__ = 'restructuredtext en'
from struct import unpack_from, unpack, calcsize
from functools import partial
from calibre.utils.fonts.sfnt import UnknownTable
from calibre.utils.fonts.sfnt.errors import UnsupportedFont, NoGlyphs
from calibre.utils.fonts.sfnt.cff.dict_data import TopDict, PrivateDict
from calibre.utils.fonts.sfnt.cff.constants import (cff_standard_strings,
STANDARD_CHARSETS)
from polyglot.builtins import iteritems, itervalues, range
# Useful links
# http://www.adobe.com/content/dam/Adobe/en/devnet/font/pdfs/5176.CFF.pdf
# http://www.adobe.com/content/dam/Adobe/en/devnet/font/pdfs/5177.Type2.pdf
class CFF(object):
def __init__(self, raw):
(self.major_version, self.minor_version, self.header_size,
self.offset_size) = unpack_from(b'>4B', raw)
if (self.major_version, self.minor_version) != (1, 0):
raise UnsupportedFont('The CFF table has unknown version: '
'(%d, %d)'%(self.major_version, self.minor_version))
offset = self.header_size
# Read Names Index
self.font_names = Index(raw, offset)
offset = self.font_names.pos
if len(self.font_names) > 1:
raise UnsupportedFont('CFF table has more than one font.')
# Read Top Dict
self.top_index = Index(raw, offset)
self.top_dict = TopDict()
offset = self.top_index.pos
# Read strings
self.strings = Strings(raw, offset)
offset = self.strings.pos
# Read global subroutines
self.global_subrs = Subrs(raw, offset)
offset = self.global_subrs.pos
# Decompile Top Dict
self.top_dict.decompile(self.strings, self.global_subrs, self.top_index[0])
self.is_CID = 'ROS' in self.top_dict
if self.is_CID:
raise UnsupportedFont('Subsetting of CID keyed fonts is not supported')
# Read CharStrings (Glyph definitions)
try:
offset = self.top_dict['CharStrings']
except KeyError:
raise ValueError('This font has no CharStrings')
cs_type = self.top_dict.safe_get('CharstringType')
if cs_type != 2:
raise UnsupportedFont('This font has unsupported CharstringType: '
'%s'%cs_type)
self.char_strings = CharStringsIndex(raw, offset)
self.num_glyphs = len(self.char_strings)
# Read Private Dict
self.private_dict = self.private_subrs = None
pd = self.top_dict.safe_get('Private')
if pd:
size, offset = pd
self.private_dict = PrivateDict()
self.private_dict.decompile(self.strings, self.global_subrs,
raw[offset:offset+size])
if 'Subrs' in self.private_dict:
self.private_subrs = Subrs(raw, offset +
self.private_dict['Subrs'])
# Read charset (Glyph names)
self.charset = Charset(raw, self.top_dict.safe_get('charset'),
self.strings, self.num_glyphs, self.is_CID)
# import pprint
# pprint.pprint(self.top_dict)
# pprint.pprint(self.private_dict)
class Index(list):
def __init__(self, raw, offset, prepend=()):
list.__init__(self)
self.extend(prepend)
count = unpack_from(b'>H', raw, offset)[0]
offset += 2
self.pos = offset
if count > 0:
self.offset_size = unpack_from(b'>B', raw, offset)[0]
offset += 1
if self.offset_size == 3:
offsets = [unpack(b'>L', b'\0' + raw[i:i+3])[0]
for i in range(offset, offset+3*(count+1), 3)]
else:
fmt = {1:'B', 2:'H', 4:'L'}[self.offset_size]
fmt = ('>%d%s'%(count+1, fmt)).encode('ascii')
offsets = unpack_from(fmt, raw, offset)
offset += self.offset_size * (count+1) - 1
for i in range(len(offsets)-1):
off, noff = offsets[i:i+2]
obj = raw[offset+off:offset+noff]
self.append(obj)
try:
self.pos = offset + offsets[-1]
except IndexError:
self.pos = offset
class Strings(Index):
def __init__(self, raw, offset):
super(Strings, self).__init__(raw, offset, prepend=[x.encode('ascii')
for x in cff_standard_strings])
class Charset(list):
def __init__(self, raw, offset, strings, num_glyphs, is_CID):
super(Charset, self).__init__()
self.standard_charset = offset if offset in {0, 1, 2} else None
if is_CID and self.standard_charset is not None:
raise ValueError("CID font must not use a standard charset")
if self.standard_charset is None:
self.append(b'.notdef')
fmt = unpack_from(b'>B', raw, offset)[0]
offset += 1
f = {0:self.parse_fmt0, 1:self.parse_fmt1,
2:partial(self.parse_fmt1, is_two_byte=True)}.get(fmt, None)
if f is None:
raise UnsupportedFont('This font uses unsupported charset '
'table format: %d'%fmt)
f(raw, offset, strings, num_glyphs, is_CID)
def parse_fmt0(self, raw, offset, strings, num_glyphs, is_CID):
fmt = ('>%dH'%(num_glyphs-1)).encode('ascii')
ids = unpack_from(fmt, raw, offset)
if is_CID:
ids = ('cid%05d'%x for x in ids)
else:
ids = (strings[x] for x in ids)
self.extend(ids)
def parse_fmt1(self, raw, offset, strings, num_glyphs, is_CID,
is_two_byte=False):
fmt = b'>2H' if is_two_byte else b'>HB'
sz = calcsize(fmt)
count = 1
while count < num_glyphs:
first, nleft = unpack_from(fmt, raw, offset)
offset += sz
count += nleft + 1
self.extend('cid%05d'%x if is_CID else strings[x] for x in
range(first, first + nleft+1))
def lookup(self, glyph_id):
if self.standard_charset is None:
return self[glyph_id]
return STANDARD_CHARSETS[self.standard_charset][glyph_id].encode('ascii')
def safe_lookup(self, glyph_id):
try:
return self.lookup(glyph_id)
except (KeyError, IndexError, ValueError):
return None
class Subrs(Index):
pass
class CharStringsIndex(Index):
pass
class CFFTable(UnknownTable):
def decompile(self):
self.cff = CFF(self.raw)
def subset(self, character_map, extra_glyphs):
from calibre.utils.fonts.sfnt.cff.writer import Subset
# Map codes from the cmap table to glyph names, this will be used to
# reconstruct character_map for the subset font
charset_map = {code:self.cff.charset.safe_lookup(glyph_id) for code,
glyph_id in iteritems(character_map)}
charset = set(itervalues(charset_map))
charset.discard(None)
if not charset and character_map:
raise NoGlyphs('This font has no glyphs for the specified characters')
charset |= {
self.cff.charset.safe_lookup(glyph_id) for glyph_id in extra_glyphs}
charset.discard(None)
s = Subset(self.cff, charset)
# Rebuild character_map with the glyph ids from the subset font
character_map.clear()
for code, charname in iteritems(charset_map):
glyph_id = s.charname_map.get(charname, None)
if glyph_id:
character_map[code] = glyph_id
# Check that raw is parseable
CFF(s.raw)
self.raw = s.raw

View File

@@ -0,0 +1,290 @@
#!/usr/bin/env python2
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:fdm=marker:ai
from __future__ import absolute_import, division, print_function, unicode_literals
__license__ = 'GPL v3'
__copyright__ = '2012, Kovid Goyal <kovid at kovidgoyal.net>'
__docformat__ = 'restructuredtext en'
# Note that the code for creating a BMP table (cmap format 4) is taken with
# thanks from the fonttools project (BSD licensed).
from struct import unpack_from, calcsize, pack
from collections import OrderedDict
from calibre.utils.fonts.utils import read_bmp_prefix
from calibre.utils.fonts.sfnt import UnknownTable, max_power_of_two
from calibre.utils.fonts.sfnt.errors import UnsupportedFont
from polyglot.builtins import range
def split_range(start_code, end_code, cmap): # {{{
# Try to split a range of character codes into subranges with consecutive
# glyph IDs in such a way that the cmap4 subtable can be stored "most"
# efficiently.
if start_code == end_code:
return [], [end_code]
last_id = cmap[start_code]
last_code = start_code
in_order = None
ordered_begin = None
sub_ranges = []
# Gather subranges in which the glyph IDs are consecutive.
for code in range(start_code + 1, end_code + 1):
glyph_id = cmap[code]
if glyph_id - 1 == last_id:
if in_order is None or not in_order:
in_order = 1
ordered_begin = last_code
else:
if in_order:
in_order = 0
sub_ranges.append((ordered_begin, last_code))
ordered_begin = None
last_id = glyph_id
last_code = code
if in_order:
sub_ranges.append((ordered_begin, last_code))
assert last_code == end_code
# Now filter out those new subranges that would only make the data bigger.
# A new segment cost 8 bytes, not using a new segment costs 2 bytes per
# character.
new_ranges = []
for b, e in sub_ranges:
if b == start_code and e == end_code:
break # the whole range, we're fine
if b == start_code or e == end_code:
threshold = 4 # split costs one more segment
else:
threshold = 8 # split costs two more segments
if (e - b + 1) > threshold:
new_ranges.append((b, e))
sub_ranges = new_ranges
if not sub_ranges:
return [], [end_code]
if sub_ranges[0][0] != start_code:
sub_ranges.insert(0, (start_code, sub_ranges[0][0] - 1))
if sub_ranges[-1][1] != end_code:
sub_ranges.append((sub_ranges[-1][1] + 1, end_code))
# Fill the "holes" in the segments list -- those are the segments in which
# the glyph IDs are _not_ consecutive.
i = 1
while i < len(sub_ranges):
if sub_ranges[i-1][1] + 1 != sub_ranges[i][0]:
sub_ranges.insert(i, (sub_ranges[i-1][1] + 1, sub_ranges[i][0] - 1))
i = i + 1
i = i + 1
# Transform the ranges into start_code/end_code lists.
start = []
end = []
for b, e in sub_ranges:
start.append(b)
end.append(e)
start.pop(0)
assert len(start) + 1 == len(end)
return start, end
# }}}
def set_id_delta(id_delta): # {{{
# The lowest gid in glyphIndexArray, after subtracting id_delta, must be 1.
# id_delta is a short, and must be between -32K and 32K
# startCode can be between 0 and 64K-1, and the first glyph index can be between 1 and 64K-1
# This means that we have a problem because we can need to assign to
# id_delta values
# between -(64K-2) and 64K -1.
# Since the final gi is reconstructed from the glyphArray GID by:
# (short)finalGID = (gid + id_delta) % 0x10000),
# we can get from a startCode of 0 to a final GID of 64 -1K by subtracting 1, and casting the
# negative number to an unsigned short.
# Similarly , we can get from a startCode of 64K-1 to a final GID of 1 by adding 2, because of
# the modulo arithmetic.
if id_delta > 0x7FFF:
id_delta = id_delta - 0x10000
elif id_delta < -0x7FFF:
id_delta = id_delta + 0x10000
return id_delta
# }}}
class BMPTable(object):
def __init__(self, raw):
self.raw = raw
(self.start_count, self.end_count, self.range_offset, self.id_delta,
self.glyph_id_len, self.glyph_id_map, self.array_len) = \
read_bmp_prefix(raw, 0)
def get_glyph_ids(self, codes):
for code in codes:
found = False
for i, ec in enumerate(self.end_count):
if ec >= code:
sc = self.start_count[i]
if sc <= code:
found = True
ro = self.range_offset[i]
if ro == 0:
glyph_id = self.id_delta[i] + code
else:
idx = ro//2 + (code - sc) + i - self.array_len
glyph_id = self.glyph_id_map[idx]
if glyph_id != 0:
glyph_id += self.id_delta[i]
yield glyph_id % 0x10000
break
if not found:
yield 0
def get_glyph_map(self, glyph_ids):
ans = {}
for i, ec in enumerate(self.end_count):
sc = self.start_count[i]
for code in range(sc, ec+1):
ro = self.range_offset[i]
if ro == 0:
glyph_id = self.id_delta[i] + code
else:
idx = ro//2 + (code - sc) + i - self.array_len
glyph_id = self.glyph_id_map[idx]
if glyph_id != 0:
glyph_id += self.id_delta[i]
glyph_id %= 0x10000
if glyph_id in glyph_ids and code not in ans:
ans[code] = glyph_id
return ans
class CmapTable(UnknownTable):
def __init__(self, *args, **kwargs):
super(CmapTable, self).__init__(*args, **kwargs)
self.version, self.num_tables = unpack_from(b'>HH', self.raw)
self.tables = {}
offset = 4
sz = calcsize(b'>HHL')
recs = []
for i in range(self.num_tables):
platform, encoding, table_offset = unpack_from(b'>HHL', self.raw,
offset)
offset += sz
recs.append((platform, encoding, table_offset))
self.bmp_table = None
for i in range(len(recs)):
platform, encoding, offset = recs[i]
try:
next_offset = recs[i+1][-1]
except IndexError:
next_offset = len(self.raw)
table = self.raw[offset:next_offset]
if table:
fmt = unpack_from(b'>H', table)[0]
if platform == 3 and encoding == 1 and fmt == 4:
self.bmp_table = BMPTable(table)
def get_character_map(self, chars):
'''
Get a mapping of character codes to glyph ids in the font.
'''
if self.bmp_table is None:
raise UnsupportedFont('This font has no Windows BMP cmap subtable.'
' Most likely a special purpose font.')
chars = sorted(set(chars))
ans = OrderedDict()
for i, glyph_id in enumerate(self.bmp_table.get_glyph_ids(chars)):
if glyph_id > 0:
ans[chars[i]] = glyph_id
return ans
def get_glyph_map(self, glyph_ids):
'''
Get a mapping of character codes to glyph ids for the specified glyph
ids.
'''
if self.bmp_table is None:
raise UnsupportedFont('This font has no Windows BMP cmap subtable.'
' Most likely a special purpose font.')
glyph_ids = frozenset(glyph_ids)
return self.bmp_table.get_glyph_map(glyph_ids)
def set_character_map(self, cmap):
self.version, self.num_tables = 0, 1
fmt = b'>7H'
codes = sorted(cmap)
if not codes:
start_code = [0xffff]
end_code = [0xffff]
else:
last_code = codes[0]
end_code = []
start_code = [last_code]
for code in codes[1:]:
if code == last_code + 1:
last_code = code
continue
start, end = split_range(start_code[-1], last_code, cmap)
start_code.extend(start)
end_code.extend(end)
start_code.append(code)
last_code = code
end_code.append(last_code)
start_code.append(0xffff)
end_code.append(0xffff)
id_delta = []
id_range_offset = []
glyph_index_array = []
for i in range(len(end_code)-1): # skip the closing codes (0xffff)
indices = list(cmap[char_code] for char_code in range(start_code[i], end_code[i] + 1))
if indices == list(range(indices[0], indices[0] + len(indices))):
# indices is a contiguous list
id_delta_temp = set_id_delta(indices[0] - start_code[i])
id_delta.append(id_delta_temp)
id_range_offset.append(0)
else:
id_delta.append(0)
id_range_offset.append(2 * (len(end_code) + len(glyph_index_array) - i))
glyph_index_array.extend(indices)
id_delta.append(1) # 0xffff + 1 == 0. So this end code maps to .notdef
id_range_offset.append(0)
seg_count = len(end_code)
max_exponent = max_power_of_two(seg_count)
search_range = 2 * (2 ** max_exponent)
entry_selector = max_exponent
range_shift = 2 * seg_count - search_range
char_code_array = end_code + [0] + start_code
char_code_array = pack(b'>%dH'%len(char_code_array), *char_code_array)
id_delta_array = pack(b'>%dh'%len(id_delta), *id_delta)
rest_array = id_range_offset + glyph_index_array
rest_array = pack(b'>%dH'%len(rest_array), *rest_array)
data = char_code_array + id_delta_array + rest_array
length = calcsize(fmt) + len(data)
header = pack(fmt, 4, length, 0, 2*seg_count, search_range, entry_selector, range_shift)
self.bmp_table = header + data
fmt = b'>4HL'
offset = calcsize(fmt)
self.raw = pack(fmt, self.version, self.num_tables, 3, 1, offset) + self.bmp_table

View File

@@ -0,0 +1,252 @@
#!/usr/bin/env python2
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:fdm=marker:ai
from __future__ import absolute_import, division, print_function, unicode_literals
__license__ = 'GPL v3'
__copyright__ = '2012, Kovid Goyal <kovid at kovidgoyal.net>'
__docformat__ = 'restructuredtext en'
from struct import unpack_from, calcsize
from collections import OrderedDict, namedtuple
from calibre.utils.fonts.sfnt.errors import UnsupportedFont
from polyglot.builtins import range, iteritems
class Unpackable(object):
def __init__(self, raw, offset):
self.raw, self.offset = raw, offset
self.start_pos = offset
def unpack(self, fmt, single_special=True):
fmt = fmt.encode('ascii') if not isinstance(fmt, bytes) else fmt
ans = unpack_from(b'>'+fmt, self.raw, self.offset)
if single_special and len(ans) == 1:
ans = ans[0]
self.offset += calcsize(fmt)
return ans
class SimpleListTable(list):
'A table that contains a list of subtables'
child_class = None
def __init__(self, raw, offset):
list.__init__(self)
data = Unpackable(raw, offset)
self.read_extra_header(data)
count = data.unpack('H')
for i in range(count):
offset = data.unpack('H')
self.append(self.child_class(raw, data.start_pos + offset))
self.read_extra_footer(data)
def read_extra_header(self, data):
pass
def read_extra_footer(self, data):
pass
class ListTable(OrderedDict):
'A table that contains an ordered mapping of table tag to subtable'
child_class = None
def __init__(self, raw, offset):
OrderedDict.__init__(self)
data = Unpackable(raw, offset)
self.read_extra_header(data)
count = data.unpack('H')
for i in range(count):
tag, coffset = data.unpack('4sH')
self[tag] = self.child_class(raw, data.start_pos + coffset)
self.read_extra_footer(data)
def read_extra_header(self, data):
pass
def read_extra_footer(self, data):
pass
def dump(self, prefix=''):
print(prefix, self.__class__.__name__, sep='')
prefix += ' '
for tag, child in iteritems(self):
print(prefix, tag, sep='')
child.dump(prefix=prefix+' ')
class IndexTable(list):
def __init__(self, raw, offset):
data = Unpackable(raw, offset)
self.read_extra_header(data)
count = data.unpack('H')
for i in range(count):
self.append(data.unpack('H'))
def read_extra_header(self, data):
pass
def dump(self, prefix=''):
print(prefix, self.__class__.__name__, sep='')
class LanguageSystemTable(IndexTable):
def read_extra_header(self, data):
self.lookup_order, self.required_feature_index = data.unpack('2H')
if self.lookup_order != 0:
raise UnsupportedFont('This LanguageSystemTable has an unknown'
' lookup order: 0x%x'%self.lookup_order)
class ScriptTable(ListTable):
child_class = LanguageSystemTable
def __init__(self, raw, offset):
ListTable.__init__(self, raw, offset)
def read_extra_header(self, data):
start_pos = data.offset
default_offset = data.unpack('H')
self[b'default'] = (LanguageSystemTable(data.raw, start_pos +
default_offset) if default_offset else None)
class ScriptListTable(ListTable):
child_class = ScriptTable
class FeatureTable(IndexTable):
def read_extra_header(self, data):
self.feature_params = data.unpack('H')
if False and self.feature_params != 0:
# Source code pro sets this to non NULL
raise UnsupportedFont(
'This FeatureTable has non NULL FeatureParams: 0x%x'%self.feature_params)
class FeatureListTable(ListTable):
child_class = FeatureTable
class LookupTable(SimpleListTable):
def read_extra_header(self, data):
self.lookup_type, self.lookup_flag = data.unpack('2H')
self.set_child_class()
def set_child_class(self):
raise NotImplementedError()
def read_extra_footer(self, data):
if self.lookup_flag & 0x0010:
self.mark_filtering_set = data.unpack('H')
def ExtensionSubstitution(raw, offset, subtable_map={}):
data = Unpackable(raw, offset)
subst_format, extension_lookup_type, offset = data.unpack('2HL')
if subst_format != 1:
raise UnsupportedFont('ExtensionSubstitution has unknown format: 0x%x'%subst_format)
return subtable_map[extension_lookup_type](raw, offset+data.start_pos)
CoverageRange = namedtuple('CoverageRange', 'start end start_coverage_index')
class Coverage(object):
def __init__(self, raw, offset, parent_table_name):
data = Unpackable(raw, offset)
self.format, count = data.unpack('2H')
if self.format not in {1, 2}:
raise UnsupportedFont('Unknown Coverage format: 0x%x in %s'%(
self.format, parent_table_name))
if self.format == 1:
self.glyph_ids = data.unpack('%dH'%count, single_special=False)
self.glyph_ids_map = {gid:i for i, gid in
enumerate(self.glyph_ids)}
else:
self.ranges = []
ranges = data.unpack('%dH'%(3*count), single_special=False)
for i in range(count):
start, end, start_coverage_index = ranges[i*3:(i+1)*3]
self.ranges.append(CoverageRange(start, end, start_coverage_index))
def coverage_indices(self, glyph_ids):
'''Return map of glyph_id -> coverage index. Map contains only those
glyph_ids that are covered by this table and that are present in
glyph_ids.'''
ans = OrderedDict()
for gid in glyph_ids:
if self.format == 1:
idx = self.glyph_ids_map.get(gid, None)
if idx is not None:
ans[gid] = idx
else:
for start, end, start_coverage_index in self.ranges:
if start <= gid <= end:
ans[gid] = start_coverage_index + (gid-start)
return ans
class UnknownLookupSubTable(object):
formats = {}
def __init__(self, raw, offset):
data = Unpackable(raw, offset)
self.format = data.unpack('H')
if self.format not in self.formats:
raise UnsupportedFont('Unknown format for Lookup Subtable %s: 0x%x'%(
self.__class__.__name__, self.format))
if self.has_initial_coverage:
coverage_offset = data.unpack('H') + data.start_pos
self.coverage = Coverage(raw, coverage_offset, self.__class__.__name__)
self.initialize(data)
@property
def has_initial_coverage(self):
return True
def all_substitutions(self, glyph_ids):
''' Return a set of all glyph ids that could be substituted for any
subset of the specified glyph ids (which must be a set)'''
raise NotImplementedError()
def read_sets(self, data, read_item=None, set_is_index=False):
count = data.unpack('H')
sets = data.unpack('%dH'%count, single_special=False)
coverage_to_items_map = []
for offset in sets:
# Read items in the set
data.offset = start_pos = offset + data.start_pos
count = data.unpack('H')
item_offsets = data.unpack('%dH'%count, single_special=False)
items = []
for offset in item_offsets:
data.offset = offset + start_pos
if set_is_index:
items.append(offset)
else:
items.append(read_item(data))
coverage_to_items_map.append(items)
return coverage_to_items_map

View File

@@ -0,0 +1,171 @@
#!/usr/bin/env python2
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:fdm=marker:ai
# License: GPLv3 Copyright: 2012, Kovid Goyal <kovid at kovidgoyal.net>
from __future__ import absolute_import, division, print_function, unicode_literals
from collections import OrderedDict
from io import BytesIO
from struct import calcsize, pack
from calibre.utils.fonts.sfnt import UnknownTable, align_block, max_power_of_two
from calibre.utils.fonts.sfnt.cff.table import CFFTable
from calibre.utils.fonts.sfnt.cmap import CmapTable
from calibre.utils.fonts.sfnt.errors import UnsupportedFont
from calibre.utils.fonts.sfnt.glyf import GlyfTable
from calibre.utils.fonts.sfnt.gsub import GSUBTable
from calibre.utils.fonts.sfnt.head import (
HeadTable, HorizontalHeader, OS2Table, PostTable, VerticalHeader
)
from calibre.utils.fonts.sfnt.kern import KernTable
from calibre.utils.fonts.sfnt.loca import LocaTable
from calibre.utils.fonts.sfnt.maxp import MaxpTable
from calibre.utils.fonts.utils import checksum_of_block, get_tables, verify_checksums
# OpenType spec: http://www.microsoft.com/typography/otspec/otff.htm
class Sfnt(object):
TABLE_MAP = {
b'head' : HeadTable,
b'hhea' : HorizontalHeader,
b'vhea' : VerticalHeader,
b'maxp' : MaxpTable,
b'loca' : LocaTable,
b'glyf' : GlyfTable,
b'cmap' : CmapTable,
b'CFF ' : CFFTable,
b'kern' : KernTable,
b'GSUB' : GSUBTable,
b'OS/2' : OS2Table,
b'post' : PostTable,
}
def __init__(self, raw_or_get_table):
self.tables = {}
if isinstance(raw_or_get_table, bytes):
raw = raw_or_get_table
self.sfnt_version = raw[:4]
if self.sfnt_version not in {b'\x00\x01\x00\x00', b'OTTO', b'true',
b'type1'}:
raise UnsupportedFont('Font has unknown sfnt version: %r'%self.sfnt_version)
for table_tag, table, table_index, table_offset, table_checksum in get_tables(raw):
self.tables[table_tag] = self.TABLE_MAP.get(
table_tag, UnknownTable)(table)
else:
for table_tag in {
b'cmap', b'hhea', b'head', b'hmtx', b'maxp', b'name', b'OS/2',
b'post', b'cvt ', b'fpgm', b'glyf', b'loca', b'prep', b'CFF ',
b'VORG', b'EBDT', b'EBLC', b'EBSC', b'BASE', b'GSUB', b'GPOS',
b'GDEF', b'JSTF', b'gasp', b'hdmx', b'kern', b'LTSH', b'PCLT',
b'VDMX', b'vhea', b'vmtx', b'MATH'}:
table = bytes(raw_or_get_table(table_tag))
if table:
self.tables[table_tag] = self.TABLE_MAP.get(
table_tag, UnknownTable)(table)
if not self.tables:
raise UnsupportedFont('This font has no tables')
self.sfnt_version = (b'\0\x01\0\0' if b'glyf' in self.tables
else b'OTTO')
def __getitem__(self, key):
return self.tables[key]
def __contains__(self, key):
return key in self.tables
def __delitem__(self, key):
del self.tables[key]
def __iter__(self):
'''Iterate over the table tags in order.'''
for x in sorted(self.tables):
yield x
# Although the optimal order is not alphabetical, the OTF spec says
# they should be alphabetical, so we stick with that. See
# http://partners.adobe.com/public/developer/opentype/index_recs.html
# for optimal order.
# keys = list(self.tables)
# order = {x:i for i, x in enumerate((b'head', b'hhea', b'maxp', b'OS/2',
# b'hmtx', b'LTSH', b'VDMX', b'hdmx', b'cmap', b'fpgm', b'prep',
# b'cvt ', b'loca', b'glyf', b'CFF ', b'kern', b'name', b'post',
# b'gasp', b'PCLT', b'DSIG'))}
# keys.sort(key=lambda x:order.get(x, 1000))
# for x in keys:
# yield x
def pop(self, key, default=None):
return self.tables.pop(key, default)
def get(self, key, default=None):
return self.tables.get(key, default)
def sizes(self):
ans = OrderedDict()
for tag in self:
ans[tag] = len(self[tag])
return ans
def __call__(self, stream=None):
stream = BytesIO() if stream is None else stream
def spack(*args):
stream.write(pack(*args))
stream.seek(0)
# Write header
num_tables = len(self.tables)
ln2 = max_power_of_two(num_tables)
srange = (2**ln2) * 16
spack(b'>4s4H',
self.sfnt_version, num_tables, srange, ln2, num_tables * 16 - srange)
# Write tables
head_offset = None
table_data = []
offset = stream.tell() + (calcsize(b'>4s3L') * num_tables)
sizes = OrderedDict()
for tag in self:
table = self.tables[tag]
raw = table()
table_len = len(raw)
if tag == b'head':
head_offset = offset
raw = raw[:8] + b'\0\0\0\0' + raw[12:]
raw = align_block(raw)
checksum = checksum_of_block(raw)
spack(b'>4s3L', tag, checksum, offset, table_len)
offset += len(raw)
table_data.append(raw)
sizes[tag] = table_len
for x in table_data:
stream.write(x)
checksum = checksum_of_block(stream.getvalue())
q = (0xB1B0AFBA - checksum) & 0xffffffff
stream.seek(head_offset + 8)
spack(b'>L', q)
return stream.getvalue(), sizes
def test_roundtrip(ff=None):
if ff is None:
data = P('fonts/liberation/LiberationSerif-Regular.ttf', data=True)
else:
with open(ff, 'rb') as f:
data = f.read()
rd = Sfnt(data)()[0]
verify_checksums(rd)
if data[:12] != rd[:12]:
raise ValueError('Roundtripping failed, font header not the same')
if len(data) != len(rd):
raise ValueError('Roundtripping failed, size different (%d vs. %d)'%
(len(data), len(rd)))
if __name__ == '__main__':
import sys
test_roundtrip(sys.argv[-1])

View File

@@ -0,0 +1,16 @@
#!/usr/bin/env python2
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:fdm=marker:ai
from __future__ import absolute_import, division, print_function, unicode_literals
__license__ = 'GPL v3'
__copyright__ = '2012, Kovid Goyal <kovid at kovidgoyal.net>'
__docformat__ = 'restructuredtext en'
class UnsupportedFont(ValueError):
pass
class NoGlyphs(ValueError):
pass

View File

@@ -0,0 +1,95 @@
#!/usr/bin/env python2
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:fdm=marker:ai
from __future__ import absolute_import, division, print_function, unicode_literals
__license__ = 'GPL v3'
__copyright__ = '2012, Kovid Goyal <kovid at kovidgoyal.net>'
__docformat__ = 'restructuredtext en'
from struct import unpack_from
from collections import OrderedDict
from calibre.utils.fonts.sfnt import UnknownTable
from polyglot.builtins import iteritems
ARG_1_AND_2_ARE_WORDS = 0x0001 # if set args are words otherwise they are bytes
ARGS_ARE_XY_VALUES = 0x0002 # if set args are xy values, otherwise they are points
ROUND_XY_TO_GRID = 0x0004 # for the xy values if above is true
WE_HAVE_A_SCALE = 0x0008 # Sx = Sy, otherwise scale == 1.0
NON_OVERLAPPING = 0x0010 # set to same value for all components (obsolete!)
MORE_COMPONENTS = 0x0020 # indicates at least one more glyph after this one
WE_HAVE_AN_X_AND_Y_SCALE = 0x0040 # Sx, Sy
WE_HAVE_A_TWO_BY_TWO = 0x0080 # t00, t01, t10, t11
WE_HAVE_INSTRUCTIONS = 0x0100 # instructions follow
USE_MY_METRICS = 0x0200 # apply these metrics to parent glyph
OVERLAP_COMPOUND = 0x0400 # used by Apple in GX fonts
SCALED_COMPONENT_OFFSET = 0x0800 # composite designed to have the component offset scaled (designed for Apple)
UNSCALED_COMPONENT_OFFSET = 0x1000 # composite designed not to have the component offset scaled (designed for MS)
class SimpleGlyph(object):
def __init__(self, num_of_countours, raw):
self.num_of_countours = num_of_countours
self.raw = raw
# The list of glyph indices referred to by this glyph, will always be
# empty for a simple glyph and not empty for a composite glyph
self.glyph_indices = []
self.is_composite = False
def __len__(self):
return len(self.raw)
def __call__(self):
return self.raw
class CompositeGlyph(SimpleGlyph):
def __init__(self, num_of_countours, raw):
super(CompositeGlyph, self).__init__(num_of_countours, raw)
self.is_composite = True
flags = MORE_COMPONENTS
offset = 10
while flags & MORE_COMPONENTS:
flags, glyph_index = unpack_from(b'>HH', raw, offset)
self.glyph_indices.append(glyph_index)
offset += 4
if flags & ARG_1_AND_2_ARE_WORDS:
offset += 4
else:
offset += 2
if flags & WE_HAVE_A_SCALE:
offset += 2
elif flags & WE_HAVE_AN_X_AND_Y_SCALE:
offset += 4
elif flags & WE_HAVE_A_TWO_BY_TWO:
offset += 8
class GlyfTable(UnknownTable):
def glyph_data(self, offset, length, as_raw=False):
raw = self.raw[offset:offset+length]
if as_raw:
return raw
num_of_countours = unpack_from(b'>h', raw)[0] if raw else 0
if num_of_countours >= 0:
return SimpleGlyph(num_of_countours, raw)
return CompositeGlyph(num_of_countours, raw)
def update(self, sorted_glyph_map):
ans = OrderedDict()
offset = 0
block = []
for glyph_id, glyph in iteritems(sorted_glyph_map):
raw = glyph()
pad = 4 - (len(raw) % 4)
if pad < 4:
raw += b'\0' * pad
ans[glyph_id] = offset, len(raw)
offset += len(raw)
block.append(raw)
self.raw = b''.join(block)
return ans

View File

@@ -0,0 +1,191 @@
#!/usr/bin/env python2
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:fdm=marker:ai
from __future__ import absolute_import, division, print_function, unicode_literals
__license__ = 'GPL v3'
__copyright__ = '2012, Kovid Goyal <kovid at kovidgoyal.net>'
__docformat__ = 'restructuredtext en'
from struct import unpack_from
from functools import partial
from calibre.utils.fonts.sfnt import UnknownTable, FixedProperty
from calibre.utils.fonts.sfnt.errors import UnsupportedFont
from calibre.utils.fonts.sfnt.common import (ScriptListTable, FeatureListTable,
SimpleListTable, LookupTable, ExtensionSubstitution,
UnknownLookupSubTable)
from polyglot.builtins import iteritems, itervalues
class SingleSubstitution(UnknownLookupSubTable):
formats = {1, 2}
def initialize(self, data):
if self.format == 1:
self.delta = data.unpack('h')
else:
count = data.unpack('H')
self.substitutes = data.unpack('%dH'%count, single_special=False)
def all_substitutions(self, glyph_ids):
gid_index_map = self.coverage.coverage_indices(glyph_ids)
if self.format == 1:
return {gid + self.delta for gid in gid_index_map}
return {self.substitutes[i] for i in itervalues(gid_index_map)}
class MultipleSubstitution(UnknownLookupSubTable):
formats = {1}
def initialize(self, data):
self.coverage_to_subs_map = self.read_sets(data, set_is_index=True)
def all_substitutions(self, glyph_ids):
gid_index_map = self.coverage.coverage_indices(glyph_ids)
ans = set()
for index in itervalues(gid_index_map):
glyphs = set(self.coverage_to_subs_map[index])
ans |= glyphs
return ans
class AlternateSubstitution(MultipleSubstitution):
pass
class LigatureSubstitution(UnknownLookupSubTable):
formats = {1}
def initialize(self, data):
self.coverage_to_lig_map = self.read_sets(data, self.read_ligature)
def read_ligature(self, data):
lig_glyph, count = data.unpack('HH')
components = data.unpack('%dH'%(count-1), single_special=False)
return (lig_glyph, components)
def all_substitutions(self, glyph_ids):
gid_index_map = self.coverage.coverage_indices(glyph_ids)
ans = set()
for start_glyph_id, index in iteritems(gid_index_map):
for glyph_id, components in self.coverage_to_lig_map[index]:
components = (start_glyph_id,) + components
if set(components).issubset(glyph_ids):
ans.add(glyph_id)
return ans
class ContexttualSubstitution(UnknownLookupSubTable):
formats = {1, 2, 3}
@property
def has_initial_coverage(self):
return self.format != 3
def initialize(self, data):
pass # TODO
def all_substitutions(self, glyph_ids):
# This table only defined substitution in terms of other tables
return set()
class ChainingContextualSubstitution(UnknownLookupSubTable):
formats = {1, 2, 3}
@property
def has_initial_coverage(self):
return self.format != 3
def initialize(self, data):
pass # TODO
def all_substitutions(self, glyph_ids):
# This table only defined substitution in terms of other tables
return set()
class ReverseChainSingleSubstitution(UnknownLookupSubTable):
formats = {1}
def initialize(self, data):
backtrack_count = data.unpack('H')
backtrack_offsets = data.unpack('%dH'%backtrack_count,
single_special=False)
lookahead_count = data.unpack('H')
lookahead_offsets = data.unpack('%dH'%lookahead_count,
single_special=False)
backtrack_offsets = [data.start_pos + x for x in backtrack_offsets]
lookahead_offsets = [data.start_pos + x for x in lookahead_offsets]
backtrack_offsets, lookahead_offsets # TODO: Use these
count = data.unpack('H')
self.substitutes = data.unpack('%dH'%count)
def all_substitutions(self, glyph_ids):
gid_index_map = self.coverage.coverage_indices(glyph_ids)
return {self.substitutes[i] for i in itervalues(gid_index_map)}
subtable_map = {
1: SingleSubstitution,
2: MultipleSubstitution,
3: AlternateSubstitution,
4: LigatureSubstitution,
5: ContexttualSubstitution,
6: ChainingContextualSubstitution,
8: ReverseChainSingleSubstitution,
}
class GSUBLookupTable(LookupTable):
def set_child_class(self):
if self.lookup_type == 7:
self.child_class = partial(ExtensionSubstitution,
subtable_map=subtable_map)
else:
self.child_class = subtable_map[self.lookup_type]
class LookupListTable(SimpleListTable):
child_class = GSUBLookupTable
class GSUBTable(UnknownTable):
version = FixedProperty('_version')
def decompile(self):
(self._version, self.scriptlist_offset, self.featurelist_offset,
self.lookuplist_offset) = unpack_from(b'>L3H', self.raw)
if self._version != 0x10000:
raise UnsupportedFont('The GSUB table has unknown version: 0x%x'%
self._version)
self.script_list_table = ScriptListTable(self.raw,
self.scriptlist_offset)
# self.script_list_table.dump()
self.feature_list_table = FeatureListTable(self.raw,
self.featurelist_offset)
# self.feature_list_table.dump()
self.lookup_list_table = LookupListTable(self.raw,
self.lookuplist_offset)
def all_substitutions(self, glyph_ids):
glyph_ids = frozenset(glyph_ids)
ans = set(glyph_ids)
for lookup_table in self.lookup_list_table:
for subtable in lookup_table:
glyphs = subtable.all_substitutions(ans)
if glyphs:
ans |= glyphs
return ans - {glyph_ids}

View File

@@ -0,0 +1,213 @@
#!/usr/bin/env python2
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:fdm=marker:ai
from __future__ import absolute_import, division, print_function, unicode_literals
__license__ = 'GPL v3'
__copyright__ = '2012, Kovid Goyal <kovid at kovidgoyal.net>'
__docformat__ = 'restructuredtext en'
from struct import unpack_from, pack, calcsize
from calibre.utils.fonts.sfnt import UnknownTable, DateTimeProperty, FixedProperty
from calibre.utils.fonts.sfnt.errors import UnsupportedFont
from calibre.utils.fonts.sfnt.loca import read_array
from polyglot.builtins import zip
class HeadTable(UnknownTable):
created = DateTimeProperty('_created')
modified = DateTimeProperty('_modified')
version_number = FixedProperty('_version_number')
font_revision = FixedProperty('_font_revision')
def __init__(self, *args, **kwargs):
super(HeadTable, self).__init__(*args, **kwargs)
field_types = (
'_version_number' , 'l',
'_font_revision' , 'l',
'checksum_adjustment' , 'L',
'magic_number' , 'L',
'flags' , 'H',
'units_per_em' , 'H',
'_created' , 'q',
'_modified' , 'q',
'x_min' , 'h',
'y_min' , 'h',
'x_max' , 'h',
'y_max' , 'h',
'mac_style' , 'H',
'lowest_rec_ppem' , 'H',
'font_direction_hint' , 'h',
'index_to_loc_format' , 'h',
'glyph_data_format' , 'h'
)
self._fmt = ('>%s'%(''.join(field_types[1::2]))).encode('ascii')
self._fields = field_types[0::2]
for f, val in zip(self._fields, unpack_from(self._fmt, self.raw)):
setattr(self, f, val)
def update(self):
vals = [getattr(self, f) for f in self._fields]
self.raw = pack(self._fmt, *vals)
class HorizontalHeader(UnknownTable):
version_number = FixedProperty('_version_number')
def read_data(self, hmtx):
if hasattr(self, 'ascender'):
return
field_types = (
'_version_number' , 'l',
'ascender', 'h',
'descender', 'h',
'line_gap', 'h',
'advance_width_max', 'H',
'min_left_side_bearing', 'h',
'min_right_side_bearing', 'h',
'x_max_extent', 'h',
'caret_slope_rise', 'h',
'caret_slop_run', 'h',
'caret_offset', 'h',
'r1', 'h',
'r2', 'h',
'r3', 'h',
'r4', 'h',
'metric_data_format', 'h',
'number_of_h_metrics', 'H',
)
self._fmt = ('>%s'%(''.join(field_types[1::2]))).encode('ascii')
self._fields = field_types[0::2]
for f, val in zip(self._fields, unpack_from(self._fmt, self.raw)):
setattr(self, f, val)
raw = hmtx.raw
num = self.number_of_h_metrics
if len(raw) < 4*num:
raise UnsupportedFont('The hmtx table has insufficient data')
long_hor_metric = raw[:4*num]
a = read_array(long_hor_metric)
self.advance_widths = a[0::2]
a = read_array(long_hor_metric, 'h')
self.left_side_bearings = a[1::2]
class VerticalHeader(UnknownTable):
version_number = FixedProperty('_version_number')
def read_data(self, vmtx):
if hasattr(self, 'ascender'):
return
field_types = (
'_version_number' , 'l',
'ascender', 'h',
'descender', 'h',
'line_gap', 'h',
'advance_height_max', 'H',
'min_top_side_bearing', 'h',
'min_bottom_side_bearing', 'h',
'y_max_extent', 'h',
'caret_slope_rise', 'h',
'caret_slop_run', 'h',
'caret_offset', 'h',
'r1', 'h',
'r2', 'h',
'r3', 'h',
'r4', 'h',
'metric_data_format', 'h',
'number_of_v_metrics', 'H',
)
self._fmt = ('>%s'%(''.join(field_types[1::2]))).encode('ascii')
self._fields = field_types[0::2]
for f, val in zip(self._fields, unpack_from(self._fmt, self.raw)):
setattr(self, f, val)
raw = vmtx.raw
num = self.number_of_v_metrics
if len(raw) < 4*num:
raise UnsupportedFont('The vmtx table has insufficient data')
long_hor_metric = raw[:4*num]
long_hor_metric = raw[:4*num]
a = read_array(long_hor_metric)
self.advance_heights = a[0::2]
a = read_array(long_hor_metric, 'h')
self.top_side_bearings = a[1::2]
class OS2Table(UnknownTable):
def read_data(self):
if hasattr(self, 'char_width'):
return
ver, = unpack_from(b'>H', self.raw)
field_types = [
'version' , 'H',
'average_char_width', 'h',
'weight_class', 'H',
'width_class', 'H',
'fs_type', 'H',
'subscript_x_size', 'h',
'subscript_y_size', 'h',
'subscript_x_offset', 'h',
'subscript_y_offset', 'h',
'superscript_x_size', 'h',
'superscript_y_size', 'h',
'superscript_x_offset', 'h',
'superscript_y_offset', 'h',
'strikeout_size', 'h',
'strikeout_position', 'h',
'family_class', 'h',
'panose', '10s',
'ranges', '16s',
'vendor_id', '4s',
'selection', 'H',
'first_char_index', 'H',
'last_char_index', 'H',
'typo_ascender', 'h',
'typo_descender', 'h',
'typo_line_gap', 'h',
'win_ascent', 'H',
'win_descent', 'H',
]
if ver > 1:
field_types += [
'code_page_range', '8s',
'x_height', 'h',
'cap_height', 'h',
'default_char', 'H',
'break_char', 'H',
'max_context', 'H',
]
self._fmt = ('>%s'%(''.join(field_types[1::2]))).encode('ascii')
self._fields = field_types[0::2]
for f, val in zip(self._fields, unpack_from(self._fmt, self.raw)):
setattr(self, f, val)
def zero_fstype(self):
prefix = calcsize(b'>HhHH')
self.raw = self.raw[:prefix] + b'\0\0' + self.raw[prefix+2:]
self.fs_type = 0
class PostTable(UnknownTable):
version_number = FixedProperty('_version')
italic_angle = FixedProperty('_italic_angle')
def read_data(self):
if hasattr(self, 'underline_position'):
return
(self._version, self._italic_angle, self.underline_position,
self.underline_thickness) = unpack_from(b'>llhh', self.raw)

View File

@@ -0,0 +1,89 @@
#!/usr/bin/env python2
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:fdm=marker:ai
from __future__ import absolute_import, division, print_function, unicode_literals
__license__ = 'GPL v3'
__copyright__ = '2012, Kovid Goyal <kovid at kovidgoyal.net>'
__docformat__ = 'restructuredtext en'
from struct import unpack_from, calcsize, pack, error as struct_error
from calibre.utils.fonts.sfnt import (UnknownTable, FixedProperty,
max_power_of_two)
from calibre.utils.fonts.sfnt.errors import UnsupportedFont
from polyglot.builtins import range
class KernTable(UnknownTable):
version = FixedProperty('_version')
def __init__(self, *args, **kwargs):
super(KernTable, self).__init__(*args, **kwargs)
self._version, self.num_tables = unpack_from(b'>HH', self.raw)
if self._version == 1 and len(self.raw) >= 8:
self._version, self.num_tables = unpack_from(b'>LL', self.raw)
self.headerfmt = b'>HH' if self._version == 0 else b'>LL'
def restrict_to_glyphs(self, glyph_ids):
if self._version not in {0, 0x10000}:
raise UnsupportedFont('kern table has version: %x'%self._version)
offset = 4 if (self._version == 0) else 8
tables = []
for i in range(self.num_tables):
if self._version == 0:
version, length, coverage = unpack_from(b'>3H', self.raw, offset)
table_format = version
else:
length, coverage = unpack_from(b'>LH', self.raw, offset)
table_format = coverage & 0xff
raw = self.raw[offset:offset+length]
if table_format == 0:
raw = self.restrict_format_0(raw, glyph_ids)
if not raw:
continue
tables.append(raw)
offset += length
self.raw = pack(self.headerfmt, self._version, len(tables)) + b''.join(tables)
def restrict_format_0(self, raw, glyph_ids):
if self._version == 0:
version, length, coverage, npairs = unpack_from(b'>4H', raw)
headerfmt = b'>3H'
else:
length, coverage, tuple_index, npairs = unpack_from(b'>L3H', raw)
headerfmt = b'>L2H'
offset = calcsize(headerfmt + b'4H')
entries = []
entrysz = calcsize(b'>2Hh')
for i in range(npairs):
try:
left, right, value = unpack_from(b'>2Hh', raw, offset)
except struct_error:
offset = len(raw)
break # Buggy kern table
if left in glyph_ids and right in glyph_ids:
entries.append(pack(b'>2Hh', left, right, value))
offset += entrysz
if offset != len(raw):
raise UnsupportedFont('This font has extra data at the end of'
' a Format 0 kern subtable')
npairs = len(entries)
if npairs == 0:
return b''
entry_selector = max_power_of_two(npairs)
search_range = (2 ** entry_selector) * 6
range_shift = (npairs - (2 ** entry_selector)) * 6
entries = b''.join(entries)
length = calcsize(headerfmt + b'4H') + len(entries)
if self._version == 0:
header = pack(headerfmt, version, length, coverage)
else:
header = pack(headerfmt, length, coverage, tuple_index)
return header + pack(b'>4H', npairs, search_range, entry_selector,
range_shift) + entries

View File

@@ -0,0 +1,91 @@
#!/usr/bin/env python2
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:fdm=marker:ai
from __future__ import absolute_import, division, print_function, unicode_literals
__license__ = 'GPL v3'
__copyright__ = '2012, Kovid Goyal <kovid at kovidgoyal.net>'
__docformat__ = 'restructuredtext en'
import array, sys
from operator import itemgetter
from itertools import repeat
from calibre.utils.fonts.sfnt import UnknownTable
from polyglot.builtins import iteritems, range
def four_byte_type_code():
for c in 'IL':
a = array.array(c)
if a.itemsize == 4:
return c
def read_array(data, fmt='H'):
ans = array.array(fmt, data)
if sys.byteorder != 'big':
ans.byteswap()
return ans
class LocaTable(UnknownTable):
def load_offsets(self, head_table, maxp_table):
fmt = 'H' if head_table.index_to_loc_format == 0 else four_byte_type_code()
locs = read_array(self.raw, fmt)
self.offset_map = locs.tolist()
if fmt == 'H':
self.offset_map = [2*i for i in self.offset_map]
self.fmt = fmt
def glyph_location(self, glyph_id):
offset = self.offset_map[glyph_id]
next_offset = self.offset_map[glyph_id+1]
return offset, next_offset - offset
def update(self, resolved_glyph_map):
'''
Update this table to contain pointers only to the glyphs in
resolved_glyph_map which must be a map of glyph_ids to (offset, sz)
Note that the loca table is generated for all glyphs from 0 to the
largest glyph that is either in resolved_glyph_map or was present
originally. The pointers to glyphs that have no data will be set to
zero. This preserves glyph ids.
'''
current_max_glyph_id = len(self.offset_map) - 2
max_glyph_id = max(resolved_glyph_map or (0,))
max_glyph_id = max(max_glyph_id, current_max_glyph_id)
self.offset_map = list(repeat(0, max_glyph_id + 2))
glyphs = [(glyph_id, x[0], x[1]) for glyph_id, x in
iteritems(resolved_glyph_map)]
glyphs.sort(key=itemgetter(1))
for glyph_id, offset, sz in glyphs:
self.offset_map[glyph_id] = offset
self.offset_map[glyph_id+1] = offset + sz
# Fix all zero entries to be the same as the previous entry, which
# means that if the ith entry is zero, the i-1 glyph is not present.
for i in range(1, len(self.offset_map)):
if self.offset_map[i] == 0:
self.offset_map[i] = self.offset_map[i-1]
vals = self.offset_map
max_offset = max(vals) if vals else 0
if max_offset < 0x20000 and all(l % 2 == 0 for l in vals):
self.fmt = 'H'
vals = array.array(self.fmt, (i // 2 for i in vals))
else:
self.fmt = four_byte_type_code()
vals = array.array(self.fmt, vals)
if sys.byteorder != "big":
vals.byteswap()
self.raw = vals.tostring()
subset = update
def dump_glyphs(self, sfnt):
if not hasattr(self, 'offset_map'):
self.load_offsets(sfnt[b'head'], sfnt[b'maxp'])
for i in range(len(self.offset_map)-1):
off, noff = self.offset_map[i], self.offset_map[i+1]
if noff != off:
print('Glyph id:', i, 'size:', noff-off)

View File

@@ -0,0 +1,46 @@
#!/usr/bin/env python2
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:fdm=marker:ai
from __future__ import absolute_import, division, print_function, unicode_literals
__license__ = 'GPL v3'
__copyright__ = '2012, Kovid Goyal <kovid at kovidgoyal.net>'
__docformat__ = 'restructuredtext en'
from struct import unpack_from, pack
from calibre.utils.fonts.sfnt import UnknownTable, FixedProperty
from calibre.utils.fonts.sfnt.errors import UnsupportedFont
from polyglot.builtins import zip
class MaxpTable(UnknownTable):
version = FixedProperty('_version')
def __init__(self, *args, **kwargs):
super(MaxpTable, self).__init__(*args, **kwargs)
self._fmt = b'>lH'
self._version, self.num_glyphs = unpack_from(self._fmt, self.raw)
self.fields = ('_version', 'num_glyphs')
if self.version > 1.0:
raise UnsupportedFont('This font has a maxp table with version: %s'
%self.version)
if self.version == 1.0:
self.fields = ('_version', 'num_glyphs', 'max_points',
'max_contours', 'max_composite_points',
'max_composite_contours', 'max_zones',
'max_twilight_points', 'max_storage', 'max_function_defs',
'max_instruction_defs', 'max_stack_elements',
'max_size_of_instructions', 'max_component_elements',
'max_component_depth')
self._fmt = b'>lH' + b'H'*(len(self.fields)-2)
vals = unpack_from(self._fmt, self.raw)
for f, val in zip(self.fields, vals):
setattr(self, f, val)
def update(self):
vals = [getattr(self, f) for f in self.fields]
self.raw = pack(self._fmt, *vals)

View File

@@ -0,0 +1,380 @@
#!/usr/bin/env python2
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:fdm=marker:ai
from __future__ import absolute_import, division, print_function, unicode_literals
__license__ = 'GPL v3'
__copyright__ = '2012, Kovid Goyal <kovid at kovidgoyal.net>'
__docformat__ = 'restructuredtext en'
import traceback
from collections import OrderedDict
from operator import itemgetter
from functools import partial
from calibre.utils.icu import safe_chr, ord_string
from calibre.utils.fonts.sfnt.container import Sfnt
from calibre.utils.fonts.sfnt.errors import UnsupportedFont, NoGlyphs
from polyglot.builtins import unicode_type, range, iteritems, itervalues, map
# TrueType outlines {{{
def resolve_glyphs(loca, glyf, character_map, extra_glyphs):
unresolved_glyphs = set(itervalues(character_map)) | extra_glyphs
unresolved_glyphs.add(0) # We always want the .notdef glyph
resolved_glyphs = {}
while unresolved_glyphs:
glyph_id = unresolved_glyphs.pop()
try:
offset, length = loca.glyph_location(glyph_id)
except (IndexError, ValueError, KeyError, TypeError):
continue
glyph = glyf.glyph_data(offset, length)
resolved_glyphs[glyph_id] = glyph
for gid in glyph.glyph_indices:
if gid not in resolved_glyphs:
unresolved_glyphs.add(gid)
return OrderedDict(sorted(iteritems(resolved_glyphs), key=itemgetter(0)))
def subset_truetype(sfnt, character_map, extra_glyphs):
loca = sfnt[b'loca']
glyf = sfnt[b'glyf']
try:
head, maxp = sfnt[b'head'], sfnt[b'maxp']
except KeyError:
raise UnsupportedFont('This font does not contain head and/or maxp tables')
loca.load_offsets(head, maxp)
resolved_glyphs = resolve_glyphs(loca, glyf, character_map, extra_glyphs)
if not resolved_glyphs or set(resolved_glyphs) == {0}:
raise NoGlyphs('This font has no glyphs for the specified character '
'set, subsetting it is pointless')
# Keep only character codes that have resolved glyphs
for code, glyph_id in tuple(iteritems(character_map)):
if glyph_id not in resolved_glyphs:
del character_map[code]
# Update the glyf table
glyph_offset_map = glyf.update(resolved_glyphs)
# Update the loca table
loca.subset(glyph_offset_map)
head.index_to_loc_format = 0 if loca.fmt == 'H' else 1
head.update()
maxp.num_glyphs = len(loca.offset_map) - 1
# }}}
def subset_postscript(sfnt, character_map, extra_glyphs):
cff = sfnt[b'CFF ']
cff.decompile()
cff.subset(character_map, extra_glyphs)
def do_warn(warnings, *args):
for arg in args:
for line in arg.splitlines():
if warnings is None:
print(line)
else:
warnings.append(line)
if warnings is None:
print()
else:
warnings.append('')
def pdf_subset(sfnt, glyphs):
for tag in tuple(sfnt.tables):
if tag not in {b'hhea', b'head', b'hmtx', b'maxp',
b'OS/2', b'post', b'cvt ', b'fpgm', b'glyf', b'loca',
b'prep', b'CFF ', b'VORG'}:
# Remove non core tables since they are unused in PDF rendering
del sfnt[tag]
if b'loca' in sfnt and b'glyf' in sfnt:
# TrueType Outlines
subset_truetype(sfnt, {}, glyphs)
elif b'CFF ' in sfnt:
# PostScript Outlines
subset_postscript(sfnt, {}, glyphs)
else:
raise UnsupportedFont('This font does not contain TrueType '
'or PostScript outlines')
def safe_ord(x):
return ord_string(unicode_type(x))[0]
def subset(raw, individual_chars, ranges=(), warnings=None):
warn = partial(do_warn, warnings)
chars = set(map(safe_ord, individual_chars))
for r in ranges:
chars |= set(range(safe_ord(r[0]), safe_ord(r[1])+1))
# Always add the space character for ease of use from the command line
if safe_ord(' ') not in chars:
chars.add(safe_ord(' '))
sfnt = Sfnt(raw)
old_sizes = sfnt.sizes()
# Remove the Digital Signature table since it is useless in a subset
# font anyway
sfnt.pop(b'DSIG', None)
# Remove non core tables as they aren't likely to be used by renderers
# anyway
core_tables = {b'cmap', b'hhea', b'head', b'hmtx', b'maxp', b'name',
b'OS/2', b'post', b'cvt ', b'fpgm', b'glyf', b'loca', b'prep',
b'CFF ', b'VORG', b'EBDT', b'EBLC', b'EBSC', b'BASE', b'GSUB',
b'GPOS', b'GDEF', b'JSTF', b'gasp', b'hdmx', b'kern', b'LTSH',
b'PCLT', b'VDMX', b'vhea', b'vmtx', b'MATH'}
for tag in list(sfnt):
if tag not in core_tables:
del sfnt[tag]
try:
cmap = sfnt[b'cmap']
except KeyError:
raise UnsupportedFont('This font has no cmap table')
# Get mapping of chars to glyph ids for all specified chars
character_map = cmap.get_character_map(chars)
extra_glyphs = set()
if b'GSUB' in sfnt:
# Parse all substitution rules to ensure that glyphs that can be
# substituted for the specified set of glyphs are not removed
gsub = sfnt[b'GSUB']
try:
gsub.decompile()
extra_glyphs = gsub.all_substitutions(itervalues(character_map))
except UnsupportedFont as e:
warn('Usupported GSUB table: %s'%e)
except Exception:
warn('Failed to decompile GSUB table:', traceback.format_exc())
if b'loca' in sfnt and b'glyf' in sfnt:
# TrueType Outlines
subset_truetype(sfnt, character_map, extra_glyphs)
elif b'CFF ' in sfnt:
# PostScript Outlines
subset_postscript(sfnt, character_map, extra_glyphs)
else:
raise UnsupportedFont('This font does not contain TrueType '
'or PostScript outlines')
# Restrict the cmap table to only contain entries for the resolved glyphs
cmap.set_character_map(character_map)
if b'kern' in sfnt:
try:
sfnt[b'kern'].restrict_to_glyphs(frozenset(itervalues(character_map)))
except UnsupportedFont as e:
warn('kern table unsupported, ignoring: %s'%e)
except Exception:
warn('Subsetting of kern table failed, ignoring:',
traceback.format_exc())
raw, new_sizes = sfnt()
return raw, old_sizes, new_sizes
# CLI {{{
def option_parser():
import textwrap
from calibre.utils.config import OptionParser
parser = OptionParser(usage=textwrap.dedent('''\
%prog [options] input_font_file output_font_file characters_to_keep
Subset the specified font, keeping only the glyphs for the characters in
characters_to_keep. characters_to_keep is a comma separated list of characters of
the form: a,b,c,A-Z,0-9,xyz
You can specify ranges in the list of characters, as shown above.
'''))
parser.add_option('-c', '--codes', default=False, action='store_true',
help='If specified, the list of characters is interpreted as '
'numeric unicode codes instead of characters. So to specify the '
'characters a,b you would use 97,98 or U+0061,U+0062')
parser.prog = 'subset-font'
return parser
def print_stats(old_stats, new_stats):
from calibre import prints
prints('========= Table comparison (original vs. subset) =========')
prints('Table', ' ', '%10s'%'Size', ' ', 'Percent', ' ', '%10s'%'New Size',
' New Percent')
prints('='*80)
old_total = sum(itervalues(old_stats))
new_total = sum(itervalues(new_stats))
tables = sorted(old_stats, key=lambda x:old_stats[x],
reverse=True)
for table in tables:
osz = old_stats[table]
op = osz/old_total * 100
nsz = new_stats.get(table, 0)
np = nsz/new_total * 100
suffix = ' | same size'
if nsz != osz:
suffix = ' | reduced to %.1f %%'%(nsz/osz * 100)
prints('%4s'%table, ' ', '%10s'%osz, ' ', '%5.1f %%'%op, ' ',
'%10s'%nsz, ' ', '%5.1f %%'%np, suffix)
prints('='*80)
def main(args):
import sys, time
from calibre import prints
parser = option_parser()
opts, args = parser.parse_args(args)
if len(args) < 4 or len(args) > 4:
parser.print_help()
raise SystemExit(1)
iff, off, chars = args[1:]
with open(iff, 'rb') as f:
orig = f.read()
chars = [x for x in chars.split(',')]
individual, ranges = set(), set()
def not_single(c):
if len(c) > 1:
prints(c, 'is not a single character', file=sys.stderr)
raise SystemExit(1)
def conv_code(c):
if c.upper()[:2] in ('U+', '0X'):
c = int(c[2:], 16)
return safe_chr(int(c))
for c in chars:
if '-' in c:
parts = [x.strip() for x in c.split('-')]
if len(parts) != 2:
prints('Invalid range:', c, file=sys.stderr)
raise SystemExit(1)
if opts.codes:
parts = tuple(map(conv_code, parts))
tuple(map(not_single, parts))
ranges.add(tuple(parts))
else:
if opts.codes:
c = conv_code(c)
not_single(c)
individual.add(c)
st = time.time()
sf, old_stats, new_stats = subset(orig, individual, ranges)
taken = time.time() - st
reduced = (len(sf)/len(orig)) * 100
def sz(x):
return '%gKB'%(len(x)/1024.)
print_stats(old_stats, new_stats)
prints('Original size:', sz(orig), 'Subset size:', sz(sf), 'Reduced to: %g%%'%(reduced))
prints('Subsetting took %g seconds'%taken)
with open(off, 'wb') as f:
f.write(sf)
prints('Subset font written to:', off)
if __name__ == '__main__':
try:
import init_calibre
init_calibre
except ImportError:
pass
import sys
main(sys.argv)
# }}}
# Tests {{{
def test_mem():
from calibre.utils.mem import memory
import gc
gc.collect()
start_mem = memory()
raw = P('fonts/liberation/LiberationSerif-Regular.ttf', data=True)
calls = 1000
for i in range(calls):
subset(raw, (), (('a', 'z'),))
del raw
for i in range(3):
gc.collect()
print('Leaked memory per call:', (memory() - start_mem)/calls*1024, 'KB')
def test():
raw = P('fonts/liberation/LiberationSerif-Regular.ttf', data=True)
sf, old_stats, new_stats = subset(raw, set(('a', 'b', 'c')), ())
if len(sf) > 0.3 * len(raw):
raise Exception('Subsetting failed')
def all():
from calibre.utils.fonts.scanner import font_scanner
failed = []
unsupported = []
warnings = {}
total = 0
averages = []
for family in font_scanner.find_font_families():
for font in font_scanner.fonts_for_family(family):
raw = font_scanner.get_font_data(font)
print('Subsetting', font['full_name'], end='\t')
total += 1
try:
w = []
sf, old_stats, new_stats = subset(raw, set(('a', 'b', 'c')),
(), w)
if w:
warnings[font['full_name'] + ' (%s)'%font['path']] = w
except NoGlyphs:
print('No glyphs!')
continue
except UnsupportedFont as e:
unsupported.append((font['full_name'], font['path'], unicode_type(e)))
print('Unsupported!')
continue
except Exception as e:
print('Failed!')
failed.append((font['full_name'], font['path'], unicode_type(e)))
else:
averages.append(sum(itervalues(new_stats))/sum(itervalues(old_stats)) * 100)
print('Reduced to:', '%.1f'%averages[-1] , '%')
if unsupported:
print('\n\nUnsupported:')
for name, path, err in unsupported:
print(name, path, err)
print()
if warnings:
print('\n\nWarnings:')
for name, w in iteritems(warnings):
if w:
print(name)
print('', '\n\t'.join(w), sep='\t')
if failed:
print('\n\nFailures:')
for name, path, err in failed:
print(name, path, err)
print()
print('Average reduction to: %.1f%%'%(sum(averages)/len(averages)))
print('Total:', total, 'Unsupported:', len(unsupported), 'Failed:',
len(failed), 'Warnings:', len(warnings))
# }}}