1
0
mirror of https://github.com/gryf/ebook-converter.git synced 2026-04-20 13:11:27 +02:00

Added docx writer related modules

This commit is contained in:
2020-04-13 16:33:15 +02:00
parent ae80ae5640
commit 98b2dd8d4f
29 changed files with 5956 additions and 0 deletions
@@ -0,0 +1,10 @@
#!/usr/bin/env python2
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:fdm=marker:ai
from __future__ import absolute_import, division, print_function, unicode_literals
__license__ = 'GPL v3'
__copyright__ = '2012, Kovid Goyal <kovid at kovidgoyal.net>'
__docformat__ = 'restructuredtext en'
@@ -0,0 +1,182 @@
#!/usr/bin/env python2
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:fdm=marker:ai
from __future__ import absolute_import, division, print_function, unicode_literals
__license__ = 'GPL v3'
__copyright__ = '2012, Kovid Goyal <kovid at kovidgoyal.net>'
__docformat__ = 'restructuredtext en'
# cff_standard_strings {{{
# The 391 Standard Strings as used in the CFF format.
# from Adobe Technical None #5176, version 1.0, 18 March 1998
cff_standard_strings = [
'.notdef', 'space', 'exclam', 'quotedbl', 'numbersign', 'dollar', 'percent',
'ampersand', 'quoteright', 'parenleft', 'parenright', 'asterisk', 'plus',
'comma', 'hyphen', 'period', 'slash', 'zero', 'one', 'two', 'three', 'four',
'five', 'six', 'seven', 'eight', 'nine', 'colon', 'semicolon', 'less', 'equal',
'greater', 'question', 'at', 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J',
'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z',
'bracketleft', 'backslash', 'bracketright', 'asciicircum', 'underscore',
'quoteleft', 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm',
'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', 'braceleft',
'bar', 'braceright', 'asciitilde', 'exclamdown', 'cent', 'sterling',
'fraction', 'yen', 'florin', 'section', 'currency', 'quotesingle',
'quotedblleft', 'guillemotleft', 'guilsinglleft', 'guilsinglright', 'fi', 'fl',
'endash', 'dagger', 'daggerdbl', 'periodcentered', 'paragraph', 'bullet',
'quotesinglbase', 'quotedblbase', 'quotedblright', 'guillemotright',
'ellipsis', 'perthousand', 'questiondown', 'grave', 'acute', 'circumflex',
'tilde', 'macron', 'breve', 'dotaccent', 'dieresis', 'ring', 'cedilla',
'hungarumlaut', 'ogonek', 'caron', 'emdash', 'AE', 'ordfeminine', 'Lslash',
'Oslash', 'OE', 'ordmasculine', 'ae', 'dotlessi', 'lslash', 'oslash', 'oe',
'germandbls', 'onesuperior', 'logicalnot', 'mu', 'trademark', 'Eth', 'onehalf',
'plusminus', 'Thorn', 'onequarter', 'divide', 'brokenbar', 'degree', 'thorn',
'threequarters', 'twosuperior', 'registered', 'minus', 'eth', 'multiply',
'threesuperior', 'copyright', 'Aacute', 'Acircumflex', 'Adieresis', 'Agrave',
'Aring', 'Atilde', 'Ccedilla', 'Eacute', 'Ecircumflex', 'Edieresis', 'Egrave',
'Iacute', 'Icircumflex', 'Idieresis', 'Igrave', 'Ntilde', 'Oacute',
'Ocircumflex', 'Odieresis', 'Ograve', 'Otilde', 'Scaron', 'Uacute',
'Ucircumflex', 'Udieresis', 'Ugrave', 'Yacute', 'Ydieresis', 'Zcaron',
'aacute', 'acircumflex', 'adieresis', 'agrave', 'aring', 'atilde', 'ccedilla',
'eacute', 'ecircumflex', 'edieresis', 'egrave', 'iacute', 'icircumflex',
'idieresis', 'igrave', 'ntilde', 'oacute', 'ocircumflex', 'odieresis',
'ograve', 'otilde', 'scaron', 'uacute', 'ucircumflex', 'udieresis', 'ugrave',
'yacute', 'ydieresis', 'zcaron', 'exclamsmall', 'Hungarumlautsmall',
'dollaroldstyle', 'dollarsuperior', 'ampersandsmall', 'Acutesmall',
'parenleftsuperior', 'parenrightsuperior', 'twodotenleader', 'onedotenleader',
'zerooldstyle', 'oneoldstyle', 'twooldstyle', 'threeoldstyle', 'fouroldstyle',
'fiveoldstyle', 'sixoldstyle', 'sevenoldstyle', 'eightoldstyle',
'nineoldstyle', 'commasuperior', 'threequartersemdash', 'periodsuperior',
'questionsmall', 'asuperior', 'bsuperior', 'centsuperior', 'dsuperior',
'esuperior', 'isuperior', 'lsuperior', 'msuperior', 'nsuperior', 'osuperior',
'rsuperior', 'ssuperior', 'tsuperior', 'ff', 'ffi', 'ffl', 'parenleftinferior',
'parenrightinferior', 'Circumflexsmall', 'hyphensuperior', 'Gravesmall',
'Asmall', 'Bsmall', 'Csmall', 'Dsmall', 'Esmall', 'Fsmall', 'Gsmall', 'Hsmall',
'Ismall', 'Jsmall', 'Ksmall', 'Lsmall', 'Msmall', 'Nsmall', 'Osmall', 'Psmall',
'Qsmall', 'Rsmall', 'Ssmall', 'Tsmall', 'Usmall', 'Vsmall', 'Wsmall', 'Xsmall',
'Ysmall', 'Zsmall', 'colonmonetary', 'onefitted', 'rupiah', 'Tildesmall',
'exclamdownsmall', 'centoldstyle', 'Lslashsmall', 'Scaronsmall', 'Zcaronsmall',
'Dieresissmall', 'Brevesmall', 'Caronsmall', 'Dotaccentsmall', 'Macronsmall',
'figuredash', 'hypheninferior', 'Ogoneksmall', 'Ringsmall', 'Cedillasmall',
'questiondownsmall', 'oneeighth', 'threeeighths', 'fiveeighths',
'seveneighths', 'onethird', 'twothirds', 'zerosuperior', 'foursuperior',
'fivesuperior', 'sixsuperior', 'sevensuperior', 'eightsuperior',
'ninesuperior', 'zeroinferior', 'oneinferior', 'twoinferior', 'threeinferior',
'fourinferior', 'fiveinferior', 'sixinferior', 'seveninferior',
'eightinferior', 'nineinferior', 'centinferior', 'dollarinferior',
'periodinferior', 'commainferior', 'Agravesmall', 'Aacutesmall',
'Acircumflexsmall', 'Atildesmall', 'Adieresissmall', 'Aringsmall', 'AEsmall',
'Ccedillasmall', 'Egravesmall', 'Eacutesmall', 'Ecircumflexsmall',
'Edieresissmall', 'Igravesmall', 'Iacutesmall', 'Icircumflexsmall',
'Idieresissmall', 'Ethsmall', 'Ntildesmall', 'Ogravesmall', 'Oacutesmall',
'Ocircumflexsmall', 'Otildesmall', 'Odieresissmall', 'OEsmall', 'Oslashsmall',
'Ugravesmall', 'Uacutesmall', 'Ucircumflexsmall', 'Udieresissmall',
'Yacutesmall', 'Thornsmall', 'Ydieresissmall', '001.000', '001.001', '001.002',
'001.003', 'Black', 'Bold', 'Book', 'Light', 'Medium', 'Regular', 'Roman',
'Semibold'
]
# }}}
STANDARD_CHARSETS = [ # {{{
# ISOAdobe
(".notdef", "space", "exclam", "quotedbl", "numbersign", "dollar",
"percent", "ampersand", "quoteright", "parenleft", "parenright",
"asterisk", "plus", "comma", "hyphen", "period", "slash", "zero",
"one", "two", "three", "four", "five", "six", "seven", "eight", "nine",
"colon", "semicolon", "less", "equal", "greater", "question", "at",
"A", "B", "C", "D", "E", "F", "G", "H", "I", "J", "K", "L", "M", "N",
"O", "P", "Q", "R", "S", "T", "U", "V", "W", "X", "Y", "Z",
"bracketleft", "backslash", "bracketright", "asciicircum",
"underscore", "quoteleft", "a", "b", "c", "d", "e", "f", "g", "h", "i",
"j", "k", "l", "m", "n", "o", "p", "q", "r", "s", "t", "u", "v", "w",
"x", "y", "z", "braceleft", "bar", "braceright", "asciitilde",
"exclamdown", "cent", "sterling", "fraction", "yen", "florin",
"section", "currency", "quotesingle", "quotedblleft", "guillemotleft",
"guilsinglleft", "guilsinglright", "fi", "fl", "endash", "dagger",
"daggerdbl", "periodcentered", "paragraph", "bullet", "quotesinglbase",
"quotedblbase", "quotedblright", "guillemotright", "ellipsis",
"perthousand", "questiondown", "grave", "acute", "circumflex", "tilde",
"macron", "breve", "dotaccent", "dieresis", "ring", "cedilla",
"hungarumlaut", "ogonek", "caron", "emdash", "AE", "ordfeminine",
"Lslash", "Oslash", "OE", "ordmasculine", "ae", "dotlessi", "lslash",
"oslash", "oe", "germandbls", "onesuperior", "logicalnot", "mu",
"trademark", "Eth", "onehalf", "plusminus", "Thorn", "onequarter",
"divide", "brokenbar", "degree", "thorn", "threequarters",
"twosuperior", "registered", "minus", "eth", "multiply",
"threesuperior", "copyright", "Aacute", "Acircumflex", "Adieresis",
"Agrave", "Aring", "Atilde", "Ccedilla", "Eacute", "Ecircumflex",
"Edieresis", "Egrave", "Iacute", "Icircumflex", "Idieresis", "Igrave",
"Ntilde", "Oacute", "Ocircumflex", "Odieresis", "Ograve", "Otilde",
"Scaron", "Uacute", "Ucircumflex", "Udieresis", "Ugrave", "Yacute",
"Ydieresis", "Zcaron", "aacute", "acircumflex", "adieresis", "agrave",
"aring", "atilde", "ccedilla", "eacute", "ecircumflex", "edieresis",
"egrave", "iacute", "icircumflex", "idieresis", "igrave", "ntilde",
"oacute", "ocircumflex", "odieresis", "ograve", "otilde", "scaron",
"uacute", "ucircumflex", "udieresis", "ugrave", "yacute", "ydieresis",
"zcaron"),
# Expert
("notdef", "space", "exclamsmall", "Hungarumlautsmall", "dollaroldstyle",
"dollarsuperior", "ampersandsmall", "Acutesmall", "parenleftsuperior",
"parenrightsuperior", "twodotenleader", "onedotenleader", "comma",
"hyphen", "period", "fraction", "zerooldstyle", "oneoldstyle",
"twooldstyle", "threeoldstyle", "fouroldstyle", "fiveoldstyle",
"sixoldstyle", "sevenoldstyle", "eightoldstyle", "nineoldstyle",
"colon", "semicolon", "commasuperior", "threequartersemdash",
"periodsuperior", "questionsmall", "asuperior", "bsuperior",
"centsuperior", "dsuperior", "esuperior", "isuperior", "lsuperior",
"msuperior", "nsuperior", "osuperior", "rsuperior", "ssuperior",
"tsuperior", "ff", "fi", "fl", "ffi", "ffl", "parenleftinferior",
"parenrightinferior", "Circumflexsmall", "hyphensuperior",
"Gravesmall", "Asmall", "Bsmall", "Csmall", "Dsmall", "Esmall",
"Fsmall", "Gsmall", "Hsmall", "Ismall", "Jsmall", "Ksmall", "Lsmall",
"Msmall", "Nsmall", "Osmall", "Psmall", "Qsmall", "Rsmall", "Ssmall",
"Tsmall", "Usmall", "Vsmall", "Wsmall", "Xsmall", "Ysmall", "Zsmall",
"colonmonetary", "onefitted", "rupiah", "Tildesmall",
"exclamdownsmall", "centoldstyle", "Lslashsmall", "Scaronsmall",
"Zcaronsmall", "Dieresissmall", "Brevesmall", "Caronsmall",
"Dotaccentsmall", "Macronsmall", "figuredash", "hypheninferior",
"Ogoneksmall", "Ringsmall", "Cedillasmall", "onequarter", "onehalf",
"threequarters", "questiondownsmall", "oneeighth", "threeeighths",
"fiveeighths", "seveneighths", "onethird", "twothirds", "zerosuperior",
"onesuperior", "twosuperior", "threesuperior", "foursuperior",
"fivesuperior", "sixsuperior", "sevensuperior", "eightsuperior",
"ninesuperior", "zeroinferior", "oneinferior", "twoinferior",
"threeinferior", "fourinferior", "fiveinferior", "sixinferior",
"seveninferior", "eightinferior", "nineinferior", "centinferior",
"dollarinferior", "periodinferior", "commainferior", "Agravesmall",
"Aacutesmall", "Acircumflexsmall", "Atildesmall", "Adieresissmall",
"Aringsmall", "AEsmall", "Ccedillasmall", "Egravesmall", "Eacutesmall",
"Ecircumflexsmall", "Edieresissmall", "Igravesmall", "Iacutesmall",
"Icircumflexsmall", "Idieresissmall", "Ethsmall", "Ntildesmall",
"Ogravesmall", "Oacutesmall", "Ocircumflexsmall", "Otildesmall",
"Odieresissmall", "OEsmall", "Oslashsmall", "Ugravesmall",
"Uacutesmall", "Ucircumflexsmall", "Udieresissmall", "Yacutesmall",
"Thornsmall", "Ydieresissmall"),
# Expert Subset
(".notdef", "space", "dollaroldstyle", "dollarsuperior",
"parenleftsuperior", "parenrightsuperior", "twodotenleader",
"onedotenleader", "comma", "hyphen", "period", "fraction",
"zerooldstyle", "oneoldstyle", "twooldstyle", "threeoldstyle",
"fouroldstyle", "fiveoldstyle", "sixoldstyle", "sevenoldstyle",
"eightoldstyle", "nineoldstyle", "colon", "semicolon",
"commasuperior", "threequartersemdash", "periodsuperior",
"asuperior", "bsuperior", "centsuperior", "dsuperior", "esuperior",
"isuperior", "lsuperior", "msuperior", "nsuperior", "osuperior",
"rsuperior", "ssuperior", "tsuperior", "ff", "fi", "fl", "ffi",
"ffl", "parenleftinferior", "parenrightinferior", "hyphensuperior",
"colonmonetary", "onefitted", "rupiah", "centoldstyle",
"figuredash", "hypheninferior", "onequarter", "onehalf",
"threequarters", "oneeighth", "threeeighths", "fiveeighths",
"seveneighths", "onethird", "twothirds", "zerosuperior",
"onesuperior", "twosuperior", "threesuperior", "foursuperior",
"fivesuperior", "sixsuperior", "sevensuperior", "eightsuperior",
"ninesuperior", "zeroinferior", "oneinferior", "twoinferior",
"threeinferior", "fourinferior", "fiveinferior", "sixinferior",
"seveninferior", "eightinferior", "nineinferior", "centinferior",
"dollarinferior", "periodinferior", "commainferior"),
] # }}}
@@ -0,0 +1,311 @@
#!/usr/bin/env python2
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:fdm=marker:ai
from __future__ import absolute_import, division, print_function, unicode_literals
__license__ = 'GPL v3'
__copyright__ = '2012, Kovid Goyal <kovid at kovidgoyal.net>'
__docformat__ = 'restructuredtext en'
from struct import pack, unpack_from
from polyglot.builtins import range, unicode_type
t1_operand_encoding = [None] * 256
t1_operand_encoding[0:32] = (32) * ["do_operator"]
t1_operand_encoding[32:247] = (247 - 32) * ["read_byte"]
t1_operand_encoding[247:251] = (251 - 247) * ["read_small_int1"]
t1_operand_encoding[251:255] = (255 - 251) * ["read_small_int2"]
t1_operand_encoding[255] = "read_long_int"
t2_operand_encoding = t1_operand_encoding[:]
t2_operand_encoding[28] = "read_short_int"
t2_operand_encoding[255] = "read_fixed_1616"
cff_dict_operand_encoding = t2_operand_encoding[:]
cff_dict_operand_encoding[29] = "read_long_int"
cff_dict_operand_encoding[30] = "read_real_number"
cff_dict_operand_encoding[255] = "reserved"
real_nibbles = ['0', '1', '2', '3', '4', '5', '6', '7', '8', '9',
'.', 'E', 'E-', None, '-']
real_nibbles_map = {x:i for i, x in enumerate(real_nibbles)}
class ByteCode(dict):
def read_byte(self, b0, data, index):
return b0 - 139, index
def read_small_int1(self, b0, data, index):
b1 = ord(data[index:index+1])
return (b0-247)*256 + b1 + 108, index+1
def read_small_int2(self, b0, data, index):
b1 = ord(data[index:index+1])
return -(b0-251)*256 - b1 - 108, index+1
def read_short_int(self, b0, data, index):
value, = unpack_from(b">h", data, index)
return value, index+2
def read_long_int(self, b0, data, index):
value, = unpack_from(b">l", data, index)
return value, index+4
def read_fixed_1616(self, b0, data, index):
value, = unpack_from(b">l", data, index)
return value / 65536.0, index+4
def read_real_number(self, b0, data, index):
number = ''
while True:
b = ord(data[index:index+1])
index = index + 1
nibble0 = (b & 0xf0) >> 4
nibble1 = b & 0x0f
if nibble0 == 0xf:
break
number = number + real_nibbles[nibble0]
if nibble1 == 0xf:
break
number = number + real_nibbles[nibble1]
return float(number), index
def write_float(self, f, encoding='ignored'):
s = unicode_type(f).upper()
if s[:2] == "0.":
s = s[1:]
elif s[:3] == "-0.":
s = "-" + s[2:]
nibbles = []
while s:
c = s[0]
s = s[1:]
if c == "E" and s[:1] == "-":
s = s[1:]
c = "E-"
nibbles.append(real_nibbles_map[c])
nibbles.append(0xf)
if len(nibbles) % 2:
nibbles.append(0xf)
d = bytearray([30])
for i in range(0, len(nibbles), 2):
d.append(nibbles[i] << 4 | nibbles[i+1])
return bytes(d)
def write_int(self, value, encoding="cff"):
four_byte_op = {'cff':29, 't1':255}.get(encoding, None)
if -107 <= value <= 107:
code = bytes(bytearray([value + 139]))
elif 108 <= value <= 1131:
value = value - 108
code = bytes(bytearray([(value >> 8) + 247, (value & 0xFF)]))
elif -1131 <= value <= -108:
value = -value - 108
code = bytes(bytearray([(value >> 8) + 251, (value & 0xFF)]))
elif four_byte_op is None:
# T2 only supports 2 byte ints
code = bytes(bytearray([28])) + pack(b">h", value)
else:
code = bytes(bytearray([four_byte_op])) + pack(b">l", value)
return code
def write_offset(self, value):
return bytes(bytearray([29])) + pack(b">l", value)
def write_number(self, value, encoding="cff"):
f = self.write_float if isinstance(value, float) else self.write_int
return f(value, encoding)
class Dict(ByteCode):
operand_encoding = cff_dict_operand_encoding
TABLE = ()
FILTERED = frozenset()
OFFSETS = frozenset()
def __init__(self):
ByteCode.__init__(self)
self.operators = {op:(name, arg) for op, name, arg, default in
self.TABLE}
self.defaults = {name:default for op, name, arg, default in self.TABLE}
def safe_get(self, name):
return self.get(name, self.defaults[name])
def decompile(self, strings, global_subrs, data):
self.strings = strings
self.global_subrs = global_subrs
self.stack = []
index = 0
while index < len(data):
b0 = ord(data[index:index+1])
index += 1
handler = getattr(self, self.operand_encoding[b0])
value, index = handler(b0, data, index)
if value is not None:
self.stack.append(value)
def do_operator(self, b0, data, index):
if b0 == 12:
op = (b0, ord(data[index:index+1]))
index += 1
else:
op = b0
operator, arg_type = self.operators[op]
self.handle_operator(operator, arg_type)
return None, index
def handle_operator(self, operator, arg_type):
if isinstance(arg_type, tuple):
value = ()
for i in range(len(arg_type)-1, -1, -1):
arg = arg_type[i]
arghandler = getattr(self, 'arg_' + arg)
value = (arghandler(operator),) + value
else:
arghandler = getattr(self, 'arg_' + arg_type)
value = arghandler(operator)
self[operator] = value
def arg_number(self, name):
return self.stack.pop()
def arg_SID(self, name):
return self.strings[self.stack.pop()]
def arg_array(self, name):
ans = self.stack[:]
del self.stack[:]
return ans
def arg_delta(self, name):
out = []
current = 0
for v in self.stack:
current = current + v
out.append(current)
del self.stack[:]
return out
def compile(self, strings):
data = []
for op, name, arg, default in self.TABLE:
if name in self.FILTERED:
continue
val = self.safe_get(name)
opcode = bytes(bytearray(op if isinstance(op, tuple) else [op]))
if val != self.defaults[name]:
self.encoding_offset = name in self.OFFSETS
if isinstance(arg, tuple):
if len(val) != len(arg):
raise ValueError('Invalid argument %s for operator: %s'
%(val, op))
for typ, v in zip(arg, val):
if typ == 'SID':
val = strings(val)
data.append(getattr(self, 'encode_'+typ)(v))
else:
if arg == 'SID':
val = strings(val)
data.append(getattr(self, 'encode_'+arg)(val))
data.append(opcode)
self.raw = b''.join(data)
return self.raw
def encode_number(self, val):
if self.encoding_offset:
return self.write_offset(val)
return self.write_number(val)
def encode_SID(self, val):
return self.write_int(val)
def encode_array(self, val):
return b''.join(map(self.encode_number, val))
def encode_delta(self, value):
out = []
last = 0
for v in value:
out.append(v - last)
last = v
return self.encode_array(out)
class TopDict(Dict):
TABLE = (
# opcode name argument type default
((12, 30), 'ROS', ('SID','SID','number'), None,),
((12, 20), 'SyntheticBase', 'number', None,),
(0, 'version', 'SID', None,),
(1, 'Notice', 'SID', None,),
((12, 0), 'Copyright', 'SID', None,),
(2, 'FullName', 'SID', None,),
((12, 38), 'FontName', 'SID', None,),
(3, 'FamilyName', 'SID', None,),
(4, 'Weight', 'SID', None,),
((12, 1), 'isFixedPitch', 'number', 0,),
((12, 2), 'ItalicAngle', 'number', 0,),
((12, 3), 'UnderlinePosition', 'number', None,),
((12, 4), 'UnderlineThickness', 'number', 50,),
((12, 5), 'PaintType', 'number', 0,),
((12, 6), 'CharstringType', 'number', 2,),
((12, 7), 'FontMatrix', 'array', [0.001,0,0,0.001,0,0],),
(13, 'UniqueID', 'number', None,),
(5, 'FontBBox', 'array', [0,0,0,0],),
((12, 8), 'StrokeWidth', 'number', 0,),
(14, 'XUID', 'array', None,),
((12, 21), 'PostScript', 'SID', None,),
((12, 22), 'BaseFontName', 'SID', None,),
((12, 23), 'BaseFontBlend', 'delta', None,),
((12, 31), 'CIDFontVersion', 'number', 0,),
((12, 32), 'CIDFontRevision', 'number', 0,),
((12, 33), 'CIDFontType', 'number', 0,),
((12, 34), 'CIDCount', 'number', 8720,),
(15, 'charset', 'number', 0,),
((12, 35), 'UIDBase', 'number', None,),
(16, 'Encoding', 'number', 0,),
(18, 'Private', ('number','number'), None,),
((12, 37), 'FDSelect', 'number', None,),
((12, 36), 'FDArray', 'number', None,),
(17, 'CharStrings', 'number', None,),
)
# We will not write these operators out
FILTERED = {'ROS', 'SyntheticBase', 'UniqueID', 'XUID',
'CIDFontVersion', 'CIDFontRevision', 'CIDFontType', 'CIDCount',
'UIDBase', 'Encoding', 'FDSelect', 'FDArray'}
OFFSETS = {'charset', 'Encoding', 'CharStrings', 'Private'}
class PrivateDict(Dict):
TABLE = (
# opcode name argument type default
(6, 'BlueValues', 'delta', None,),
(7, 'OtherBlues', 'delta', None,),
(8, 'FamilyBlues', 'delta', None,),
(9, 'FamilyOtherBlues', 'delta', None,),
((12, 9), 'BlueScale', 'number', 0.039625,),
((12, 10), 'BlueShift', 'number', 7,),
((12, 11), 'BlueFuzz', 'number', 1,),
(10, 'StdHW', 'number', None,),
(11, 'StdVW', 'number', None,),
((12, 12), 'StemSnapH', 'delta', None,),
((12, 13), 'StemSnapV', 'delta', None,),
((12, 14), 'ForceBold', 'number', 0,),
((12, 15), 'ForceBoldThreshold', 'number', None,), # deprecated
((12, 16), 'lenIV', 'number', None,), # deprecated
((12, 17), 'LanguageGroup', 'number', 0,),
((12, 18), 'ExpansionFactor', 'number', 0.06,),
((12, 19), 'initialRandomSeed', 'number', 0,),
(20, 'defaultWidthX', 'number', 0,),
(21, 'nominalWidthX', 'number', 0,),
(19, 'Subrs', 'number', None,),
)
OFFSETS = {'Subrs'}
@@ -0,0 +1,221 @@
#!/usr/bin/env python2
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:fdm=marker:ai
from __future__ import absolute_import, division, print_function, unicode_literals
__license__ = 'GPL v3'
__copyright__ = '2012, Kovid Goyal <kovid at kovidgoyal.net>'
__docformat__ = 'restructuredtext en'
from struct import unpack_from, unpack, calcsize
from functools import partial
from calibre.utils.fonts.sfnt import UnknownTable
from calibre.utils.fonts.sfnt.errors import UnsupportedFont, NoGlyphs
from calibre.utils.fonts.sfnt.cff.dict_data import TopDict, PrivateDict
from calibre.utils.fonts.sfnt.cff.constants import (cff_standard_strings,
STANDARD_CHARSETS)
from polyglot.builtins import iteritems, itervalues, range
# Useful links
# http://www.adobe.com/content/dam/Adobe/en/devnet/font/pdfs/5176.CFF.pdf
# http://www.adobe.com/content/dam/Adobe/en/devnet/font/pdfs/5177.Type2.pdf
class CFF(object):
def __init__(self, raw):
(self.major_version, self.minor_version, self.header_size,
self.offset_size) = unpack_from(b'>4B', raw)
if (self.major_version, self.minor_version) != (1, 0):
raise UnsupportedFont('The CFF table has unknown version: '
'(%d, %d)'%(self.major_version, self.minor_version))
offset = self.header_size
# Read Names Index
self.font_names = Index(raw, offset)
offset = self.font_names.pos
if len(self.font_names) > 1:
raise UnsupportedFont('CFF table has more than one font.')
# Read Top Dict
self.top_index = Index(raw, offset)
self.top_dict = TopDict()
offset = self.top_index.pos
# Read strings
self.strings = Strings(raw, offset)
offset = self.strings.pos
# Read global subroutines
self.global_subrs = Subrs(raw, offset)
offset = self.global_subrs.pos
# Decompile Top Dict
self.top_dict.decompile(self.strings, self.global_subrs, self.top_index[0])
self.is_CID = 'ROS' in self.top_dict
if self.is_CID:
raise UnsupportedFont('Subsetting of CID keyed fonts is not supported')
# Read CharStrings (Glyph definitions)
try:
offset = self.top_dict['CharStrings']
except KeyError:
raise ValueError('This font has no CharStrings')
cs_type = self.top_dict.safe_get('CharstringType')
if cs_type != 2:
raise UnsupportedFont('This font has unsupported CharstringType: '
'%s'%cs_type)
self.char_strings = CharStringsIndex(raw, offset)
self.num_glyphs = len(self.char_strings)
# Read Private Dict
self.private_dict = self.private_subrs = None
pd = self.top_dict.safe_get('Private')
if pd:
size, offset = pd
self.private_dict = PrivateDict()
self.private_dict.decompile(self.strings, self.global_subrs,
raw[offset:offset+size])
if 'Subrs' in self.private_dict:
self.private_subrs = Subrs(raw, offset +
self.private_dict['Subrs'])
# Read charset (Glyph names)
self.charset = Charset(raw, self.top_dict.safe_get('charset'),
self.strings, self.num_glyphs, self.is_CID)
# import pprint
# pprint.pprint(self.top_dict)
# pprint.pprint(self.private_dict)
class Index(list):
def __init__(self, raw, offset, prepend=()):
list.__init__(self)
self.extend(prepend)
count = unpack_from(b'>H', raw, offset)[0]
offset += 2
self.pos = offset
if count > 0:
self.offset_size = unpack_from(b'>B', raw, offset)[0]
offset += 1
if self.offset_size == 3:
offsets = [unpack(b'>L', b'\0' + raw[i:i+3])[0]
for i in range(offset, offset+3*(count+1), 3)]
else:
fmt = {1:'B', 2:'H', 4:'L'}[self.offset_size]
fmt = ('>%d%s'%(count+1, fmt)).encode('ascii')
offsets = unpack_from(fmt, raw, offset)
offset += self.offset_size * (count+1) - 1
for i in range(len(offsets)-1):
off, noff = offsets[i:i+2]
obj = raw[offset+off:offset+noff]
self.append(obj)
try:
self.pos = offset + offsets[-1]
except IndexError:
self.pos = offset
class Strings(Index):
def __init__(self, raw, offset):
super(Strings, self).__init__(raw, offset, prepend=[x.encode('ascii')
for x in cff_standard_strings])
class Charset(list):
def __init__(self, raw, offset, strings, num_glyphs, is_CID):
super(Charset, self).__init__()
self.standard_charset = offset if offset in {0, 1, 2} else None
if is_CID and self.standard_charset is not None:
raise ValueError("CID font must not use a standard charset")
if self.standard_charset is None:
self.append(b'.notdef')
fmt = unpack_from(b'>B', raw, offset)[0]
offset += 1
f = {0:self.parse_fmt0, 1:self.parse_fmt1,
2:partial(self.parse_fmt1, is_two_byte=True)}.get(fmt, None)
if f is None:
raise UnsupportedFont('This font uses unsupported charset '
'table format: %d'%fmt)
f(raw, offset, strings, num_glyphs, is_CID)
def parse_fmt0(self, raw, offset, strings, num_glyphs, is_CID):
fmt = ('>%dH'%(num_glyphs-1)).encode('ascii')
ids = unpack_from(fmt, raw, offset)
if is_CID:
ids = ('cid%05d'%x for x in ids)
else:
ids = (strings[x] for x in ids)
self.extend(ids)
def parse_fmt1(self, raw, offset, strings, num_glyphs, is_CID,
is_two_byte=False):
fmt = b'>2H' if is_two_byte else b'>HB'
sz = calcsize(fmt)
count = 1
while count < num_glyphs:
first, nleft = unpack_from(fmt, raw, offset)
offset += sz
count += nleft + 1
self.extend('cid%05d'%x if is_CID else strings[x] for x in
range(first, first + nleft+1))
def lookup(self, glyph_id):
if self.standard_charset is None:
return self[glyph_id]
return STANDARD_CHARSETS[self.standard_charset][glyph_id].encode('ascii')
def safe_lookup(self, glyph_id):
try:
return self.lookup(glyph_id)
except (KeyError, IndexError, ValueError):
return None
class Subrs(Index):
pass
class CharStringsIndex(Index):
pass
class CFFTable(UnknownTable):
def decompile(self):
self.cff = CFF(self.raw)
def subset(self, character_map, extra_glyphs):
from calibre.utils.fonts.sfnt.cff.writer import Subset
# Map codes from the cmap table to glyph names, this will be used to
# reconstruct character_map for the subset font
charset_map = {code:self.cff.charset.safe_lookup(glyph_id) for code,
glyph_id in iteritems(character_map)}
charset = set(itervalues(charset_map))
charset.discard(None)
if not charset and character_map:
raise NoGlyphs('This font has no glyphs for the specified characters')
charset |= {
self.cff.charset.safe_lookup(glyph_id) for glyph_id in extra_glyphs}
charset.discard(None)
s = Subset(self.cff, charset)
# Rebuild character_map with the glyph ids from the subset font
character_map.clear()
for code, charname in iteritems(charset_map):
glyph_id = s.charname_map.get(charname, None)
if glyph_id:
character_map[code] = glyph_id
# Check that raw is parseable
CFF(s.raw)
self.raw = s.raw