Added docx writer related modules

2020-04-13 16:33:15 +02:00
parent ae80ae5640
commit 98b2dd8d4f
29 changed files with 5956 additions and 0 deletions
@@ -0,0 +1,10 @@
+#!/usr/bin/env python2
+# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:fdm=marker:ai
+from __future__ import absolute_import, division, print_function, unicode_literals
+
+__license__   = 'GPL v3'
+__copyright__ = '2012, Kovid Goyal <kovid at kovidgoyal.net>'
+__docformat__ = 'restructuredtext en'
+
+
+
@@ -0,0 +1,182 @@
+#!/usr/bin/env python2
+# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:fdm=marker:ai
+from __future__ import absolute_import, division, print_function, unicode_literals
+
+__license__   = 'GPL v3'
+__copyright__ = '2012, Kovid Goyal <kovid at kovidgoyal.net>'
+__docformat__ = 'restructuredtext en'
+
+
+# cff_standard_strings {{{
+# The 391 Standard Strings as used in the CFF format.
+# from Adobe Technical None #5176, version 1.0, 18 March 1998
+
+cff_standard_strings = [
+'.notdef', 'space', 'exclam', 'quotedbl', 'numbersign', 'dollar', 'percent',
+'ampersand', 'quoteright', 'parenleft', 'parenright', 'asterisk', 'plus',
+'comma', 'hyphen', 'period', 'slash', 'zero', 'one', 'two', 'three', 'four',
+'five', 'six', 'seven', 'eight', 'nine', 'colon', 'semicolon', 'less', 'equal',
+'greater', 'question', 'at', 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J',
+'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z',
+'bracketleft', 'backslash', 'bracketright', 'asciicircum', 'underscore',
+'quoteleft', 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm',
+'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', 'braceleft',
+'bar', 'braceright', 'asciitilde', 'exclamdown', 'cent', 'sterling',
+'fraction', 'yen', 'florin', 'section', 'currency', 'quotesingle',
+'quotedblleft', 'guillemotleft', 'guilsinglleft', 'guilsinglright', 'fi', 'fl',
+'endash', 'dagger', 'daggerdbl', 'periodcentered', 'paragraph', 'bullet',
+'quotesinglbase', 'quotedblbase', 'quotedblright', 'guillemotright',
+'ellipsis', 'perthousand', 'questiondown', 'grave', 'acute', 'circumflex',
+'tilde', 'macron', 'breve', 'dotaccent', 'dieresis', 'ring', 'cedilla',
+'hungarumlaut', 'ogonek', 'caron', 'emdash', 'AE', 'ordfeminine', 'Lslash',
+'Oslash', 'OE', 'ordmasculine', 'ae', 'dotlessi', 'lslash', 'oslash', 'oe',
+'germandbls', 'onesuperior', 'logicalnot', 'mu', 'trademark', 'Eth', 'onehalf',
+'plusminus', 'Thorn', 'onequarter', 'divide', 'brokenbar', 'degree', 'thorn',
+'threequarters', 'twosuperior', 'registered', 'minus', 'eth', 'multiply',
+'threesuperior', 'copyright', 'Aacute', 'Acircumflex', 'Adieresis', 'Agrave',
+'Aring', 'Atilde', 'Ccedilla', 'Eacute', 'Ecircumflex', 'Edieresis', 'Egrave',
+'Iacute', 'Icircumflex', 'Idieresis', 'Igrave', 'Ntilde', 'Oacute',
+'Ocircumflex', 'Odieresis', 'Ograve', 'Otilde', 'Scaron', 'Uacute',
+'Ucircumflex', 'Udieresis', 'Ugrave', 'Yacute', 'Ydieresis', 'Zcaron',
+'aacute', 'acircumflex', 'adieresis', 'agrave', 'aring', 'atilde', 'ccedilla',
+'eacute', 'ecircumflex', 'edieresis', 'egrave', 'iacute', 'icircumflex',
+'idieresis', 'igrave', 'ntilde', 'oacute', 'ocircumflex', 'odieresis',
+'ograve', 'otilde', 'scaron', 'uacute', 'ucircumflex', 'udieresis', 'ugrave',
+'yacute', 'ydieresis', 'zcaron', 'exclamsmall', 'Hungarumlautsmall',
+'dollaroldstyle', 'dollarsuperior', 'ampersandsmall', 'Acutesmall',
+'parenleftsuperior', 'parenrightsuperior', 'twodotenleader', 'onedotenleader',
+'zerooldstyle', 'oneoldstyle', 'twooldstyle', 'threeoldstyle', 'fouroldstyle',
+'fiveoldstyle', 'sixoldstyle', 'sevenoldstyle', 'eightoldstyle',
+'nineoldstyle', 'commasuperior', 'threequartersemdash', 'periodsuperior',
+'questionsmall', 'asuperior', 'bsuperior', 'centsuperior', 'dsuperior',
+'esuperior', 'isuperior', 'lsuperior', 'msuperior', 'nsuperior', 'osuperior',
+'rsuperior', 'ssuperior', 'tsuperior', 'ff', 'ffi', 'ffl', 'parenleftinferior',
+'parenrightinferior', 'Circumflexsmall', 'hyphensuperior', 'Gravesmall',
+'Asmall', 'Bsmall', 'Csmall', 'Dsmall', 'Esmall', 'Fsmall', 'Gsmall', 'Hsmall',
+'Ismall', 'Jsmall', 'Ksmall', 'Lsmall', 'Msmall', 'Nsmall', 'Osmall', 'Psmall',
+'Qsmall', 'Rsmall', 'Ssmall', 'Tsmall', 'Usmall', 'Vsmall', 'Wsmall', 'Xsmall',
+'Ysmall', 'Zsmall', 'colonmonetary', 'onefitted', 'rupiah', 'Tildesmall',
+'exclamdownsmall', 'centoldstyle', 'Lslashsmall', 'Scaronsmall', 'Zcaronsmall',
+'Dieresissmall', 'Brevesmall', 'Caronsmall', 'Dotaccentsmall', 'Macronsmall',
+'figuredash', 'hypheninferior', 'Ogoneksmall', 'Ringsmall', 'Cedillasmall',
+'questiondownsmall', 'oneeighth', 'threeeighths', 'fiveeighths',
+'seveneighths', 'onethird', 'twothirds', 'zerosuperior', 'foursuperior',
+'fivesuperior', 'sixsuperior', 'sevensuperior', 'eightsuperior',
+'ninesuperior', 'zeroinferior', 'oneinferior', 'twoinferior', 'threeinferior',
+'fourinferior', 'fiveinferior', 'sixinferior', 'seveninferior',
+'eightinferior', 'nineinferior', 'centinferior', 'dollarinferior',
+'periodinferior', 'commainferior', 'Agravesmall', 'Aacutesmall',
+'Acircumflexsmall', 'Atildesmall', 'Adieresissmall', 'Aringsmall', 'AEsmall',
+'Ccedillasmall', 'Egravesmall', 'Eacutesmall', 'Ecircumflexsmall',
+'Edieresissmall', 'Igravesmall', 'Iacutesmall', 'Icircumflexsmall',
+'Idieresissmall', 'Ethsmall', 'Ntildesmall', 'Ogravesmall', 'Oacutesmall',
+'Ocircumflexsmall', 'Otildesmall', 'Odieresissmall', 'OEsmall', 'Oslashsmall',
+'Ugravesmall', 'Uacutesmall', 'Ucircumflexsmall', 'Udieresissmall',
+'Yacutesmall', 'Thornsmall', 'Ydieresissmall', '001.000', '001.001', '001.002',
+'001.003', 'Black', 'Bold', 'Book', 'Light', 'Medium', 'Regular', 'Roman',
+'Semibold'
+]
+# }}}
+
+
+STANDARD_CHARSETS = [  # {{{
+# ISOAdobe
+(".notdef", "space", "exclam", "quotedbl", "numbersign", "dollar",
+    "percent", "ampersand", "quoteright", "parenleft", "parenright",
+    "asterisk", "plus", "comma", "hyphen", "period", "slash", "zero",
+    "one", "two", "three", "four", "five", "six", "seven", "eight", "nine",
+    "colon", "semicolon", "less", "equal", "greater", "question", "at",
+    "A", "B", "C", "D", "E", "F", "G", "H", "I", "J", "K", "L", "M", "N",
+    "O", "P", "Q", "R", "S", "T", "U", "V", "W", "X", "Y", "Z",
+    "bracketleft", "backslash", "bracketright", "asciicircum",
+    "underscore", "quoteleft", "a", "b", "c", "d", "e", "f", "g", "h", "i",
+    "j", "k", "l", "m", "n", "o", "p", "q", "r", "s", "t", "u", "v", "w",
+    "x", "y", "z", "braceleft", "bar", "braceright", "asciitilde",
+    "exclamdown", "cent", "sterling", "fraction", "yen", "florin",
+    "section", "currency", "quotesingle", "quotedblleft", "guillemotleft",
+    "guilsinglleft", "guilsinglright", "fi", "fl", "endash", "dagger",
+    "daggerdbl", "periodcentered", "paragraph", "bullet", "quotesinglbase",
+    "quotedblbase", "quotedblright", "guillemotright", "ellipsis",
+    "perthousand", "questiondown", "grave", "acute", "circumflex", "tilde",
+    "macron", "breve", "dotaccent", "dieresis", "ring", "cedilla",
+    "hungarumlaut", "ogonek", "caron", "emdash", "AE", "ordfeminine",
+    "Lslash", "Oslash", "OE", "ordmasculine", "ae", "dotlessi", "lslash",
+    "oslash", "oe", "germandbls", "onesuperior", "logicalnot", "mu",
+    "trademark", "Eth", "onehalf", "plusminus", "Thorn", "onequarter",
+    "divide", "brokenbar", "degree", "thorn", "threequarters",
+    "twosuperior", "registered", "minus", "eth", "multiply",
+    "threesuperior", "copyright", "Aacute", "Acircumflex", "Adieresis",
+    "Agrave", "Aring", "Atilde", "Ccedilla", "Eacute", "Ecircumflex",
+    "Edieresis", "Egrave", "Iacute", "Icircumflex", "Idieresis", "Igrave",
+    "Ntilde", "Oacute", "Ocircumflex", "Odieresis", "Ograve", "Otilde",
+    "Scaron", "Uacute", "Ucircumflex", "Udieresis", "Ugrave", "Yacute",
+    "Ydieresis", "Zcaron", "aacute", "acircumflex", "adieresis", "agrave",
+    "aring", "atilde", "ccedilla", "eacute", "ecircumflex", "edieresis",
+    "egrave", "iacute", "icircumflex", "idieresis", "igrave", "ntilde",
+    "oacute", "ocircumflex", "odieresis", "ograve", "otilde", "scaron",
+    "uacute", "ucircumflex", "udieresis", "ugrave", "yacute", "ydieresis",
+    "zcaron"),
+
+# Expert
+("notdef", "space", "exclamsmall", "Hungarumlautsmall", "dollaroldstyle",
+    "dollarsuperior", "ampersandsmall", "Acutesmall", "parenleftsuperior",
+    "parenrightsuperior", "twodotenleader", "onedotenleader", "comma",
+    "hyphen", "period", "fraction", "zerooldstyle", "oneoldstyle",
+    "twooldstyle", "threeoldstyle", "fouroldstyle", "fiveoldstyle",
+    "sixoldstyle", "sevenoldstyle", "eightoldstyle", "nineoldstyle",
+    "colon", "semicolon", "commasuperior", "threequartersemdash",
+    "periodsuperior", "questionsmall", "asuperior", "bsuperior",
+    "centsuperior", "dsuperior", "esuperior", "isuperior", "lsuperior",
+    "msuperior", "nsuperior", "osuperior", "rsuperior", "ssuperior",
+    "tsuperior", "ff", "fi", "fl", "ffi", "ffl", "parenleftinferior",
+    "parenrightinferior", "Circumflexsmall", "hyphensuperior",
+    "Gravesmall", "Asmall", "Bsmall", "Csmall", "Dsmall", "Esmall",
+    "Fsmall", "Gsmall", "Hsmall", "Ismall", "Jsmall", "Ksmall", "Lsmall",
+    "Msmall", "Nsmall", "Osmall", "Psmall", "Qsmall", "Rsmall", "Ssmall",
+    "Tsmall", "Usmall", "Vsmall", "Wsmall", "Xsmall", "Ysmall", "Zsmall",
+    "colonmonetary", "onefitted", "rupiah", "Tildesmall",
+    "exclamdownsmall", "centoldstyle", "Lslashsmall", "Scaronsmall",
+    "Zcaronsmall", "Dieresissmall", "Brevesmall", "Caronsmall",
+    "Dotaccentsmall", "Macronsmall", "figuredash", "hypheninferior",
+    "Ogoneksmall", "Ringsmall", "Cedillasmall", "onequarter", "onehalf",
+    "threequarters", "questiondownsmall", "oneeighth", "threeeighths",
+    "fiveeighths", "seveneighths", "onethird", "twothirds", "zerosuperior",
+    "onesuperior", "twosuperior", "threesuperior", "foursuperior",
+    "fivesuperior", "sixsuperior", "sevensuperior", "eightsuperior",
+    "ninesuperior", "zeroinferior", "oneinferior", "twoinferior",
+    "threeinferior", "fourinferior", "fiveinferior", "sixinferior",
+    "seveninferior", "eightinferior", "nineinferior", "centinferior",
+    "dollarinferior", "periodinferior", "commainferior", "Agravesmall",
+    "Aacutesmall", "Acircumflexsmall", "Atildesmall", "Adieresissmall",
+    "Aringsmall", "AEsmall", "Ccedillasmall", "Egravesmall", "Eacutesmall",
+    "Ecircumflexsmall", "Edieresissmall", "Igravesmall", "Iacutesmall",
+    "Icircumflexsmall", "Idieresissmall", "Ethsmall", "Ntildesmall",
+    "Ogravesmall", "Oacutesmall", "Ocircumflexsmall", "Otildesmall",
+    "Odieresissmall", "OEsmall", "Oslashsmall", "Ugravesmall",
+    "Uacutesmall", "Ucircumflexsmall", "Udieresissmall", "Yacutesmall",
+    "Thornsmall", "Ydieresissmall"),
+
+# Expert Subset
+(".notdef", "space", "dollaroldstyle", "dollarsuperior",
+        "parenleftsuperior", "parenrightsuperior", "twodotenleader",
+        "onedotenleader", "comma", "hyphen", "period", "fraction",
+        "zerooldstyle", "oneoldstyle", "twooldstyle", "threeoldstyle",
+        "fouroldstyle", "fiveoldstyle", "sixoldstyle", "sevenoldstyle",
+        "eightoldstyle", "nineoldstyle", "colon", "semicolon",
+        "commasuperior", "threequartersemdash", "periodsuperior",
+        "asuperior", "bsuperior", "centsuperior", "dsuperior", "esuperior",
+        "isuperior", "lsuperior", "msuperior", "nsuperior", "osuperior",
+        "rsuperior", "ssuperior", "tsuperior", "ff", "fi", "fl", "ffi",
+        "ffl", "parenleftinferior", "parenrightinferior", "hyphensuperior",
+        "colonmonetary", "onefitted", "rupiah", "centoldstyle",
+        "figuredash", "hypheninferior", "onequarter", "onehalf",
+        "threequarters", "oneeighth", "threeeighths", "fiveeighths",
+        "seveneighths", "onethird", "twothirds", "zerosuperior",
+        "onesuperior", "twosuperior", "threesuperior", "foursuperior",
+        "fivesuperior", "sixsuperior", "sevensuperior", "eightsuperior",
+        "ninesuperior", "zeroinferior", "oneinferior", "twoinferior",
+        "threeinferior", "fourinferior", "fiveinferior", "sixinferior",
+        "seveninferior", "eightinferior", "nineinferior", "centinferior",
+        "dollarinferior", "periodinferior", "commainferior"),
+]  # }}}
+
@@ -0,0 +1,311 @@
+#!/usr/bin/env python2
+# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:fdm=marker:ai
+from __future__ import absolute_import, division, print_function, unicode_literals
+
+__license__   = 'GPL v3'
+__copyright__ = '2012, Kovid Goyal <kovid at kovidgoyal.net>'
+__docformat__ = 'restructuredtext en'
+
+from struct import pack, unpack_from
+from polyglot.builtins import range, unicode_type
+
+t1_operand_encoding = [None] * 256
+t1_operand_encoding[0:32] = (32) * ["do_operator"]
+t1_operand_encoding[32:247] = (247 - 32) * ["read_byte"]
+t1_operand_encoding[247:251] = (251 - 247) * ["read_small_int1"]
+t1_operand_encoding[251:255] = (255 - 251) * ["read_small_int2"]
+t1_operand_encoding[255] = "read_long_int"
+
+t2_operand_encoding = t1_operand_encoding[:]
+t2_operand_encoding[28] = "read_short_int"
+t2_operand_encoding[255] = "read_fixed_1616"
+
+cff_dict_operand_encoding = t2_operand_encoding[:]
+cff_dict_operand_encoding[29] = "read_long_int"
+cff_dict_operand_encoding[30] = "read_real_number"
+cff_dict_operand_encoding[255] = "reserved"
+
+real_nibbles = ['0', '1', '2', '3', '4', '5', '6', '7', '8', '9',
+        '.', 'E', 'E-', None, '-']
+real_nibbles_map = {x:i for i, x in enumerate(real_nibbles)}
+
+
+class ByteCode(dict):
+
+    def read_byte(self, b0, data, index):
+        return b0 - 139, index
+
+    def read_small_int1(self, b0, data, index):
+        b1 = ord(data[index:index+1])
+        return (b0-247)*256 + b1 + 108, index+1
+
+    def read_small_int2(self, b0, data, index):
+        b1 = ord(data[index:index+1])
+        return -(b0-251)*256 - b1 - 108, index+1
+
+    def read_short_int(self, b0, data, index):
+        value, = unpack_from(b">h", data, index)
+        return value, index+2
+
+    def read_long_int(self, b0, data, index):
+        value, = unpack_from(b">l", data, index)
+        return value, index+4
+
+    def read_fixed_1616(self, b0, data, index):
+        value, = unpack_from(b">l", data, index)
+        return value / 65536.0, index+4
+
+    def read_real_number(self, b0, data, index):
+        number = ''
+        while True:
+            b = ord(data[index:index+1])
+            index = index + 1
+            nibble0 = (b & 0xf0) >> 4
+            nibble1 = b & 0x0f
+            if nibble0 == 0xf:
+                break
+            number = number + real_nibbles[nibble0]
+            if nibble1 == 0xf:
+                break
+            number = number + real_nibbles[nibble1]
+        return float(number), index
+
+    def write_float(self, f, encoding='ignored'):
+        s = unicode_type(f).upper()
+        if s[:2] == "0.":
+            s = s[1:]
+        elif s[:3] == "-0.":
+            s = "-" + s[2:]
+        nibbles = []
+        while s:
+            c = s[0]
+            s = s[1:]
+            if c == "E" and s[:1] == "-":
+                s = s[1:]
+                c = "E-"
+            nibbles.append(real_nibbles_map[c])
+        nibbles.append(0xf)
+        if len(nibbles) % 2:
+            nibbles.append(0xf)
+        d = bytearray([30])
+        for i in range(0, len(nibbles), 2):
+            d.append(nibbles[i] << 4 | nibbles[i+1])
+        return bytes(d)
+
+    def write_int(self, value, encoding="cff"):
+        four_byte_op = {'cff':29, 't1':255}.get(encoding, None)
+
+        if -107 <= value <= 107:
+            code = bytes(bytearray([value + 139]))
+        elif 108 <= value <= 1131:
+            value = value - 108
+            code = bytes(bytearray([(value >> 8) + 247, (value & 0xFF)]))
+        elif -1131 <= value <= -108:
+            value = -value - 108
+            code = bytes(bytearray([(value >> 8) + 251, (value & 0xFF)]))
+        elif four_byte_op is None:
+            # T2 only supports 2 byte ints
+            code = bytes(bytearray([28])) + pack(b">h", value)
+        else:
+            code = bytes(bytearray([four_byte_op])) + pack(b">l", value)
+        return code
+
+    def write_offset(self, value):
+        return bytes(bytearray([29])) + pack(b">l", value)
+
+    def write_number(self, value, encoding="cff"):
+        f = self.write_float if isinstance(value, float) else self.write_int
+        return f(value, encoding)
+
+
+class Dict(ByteCode):
+
+    operand_encoding = cff_dict_operand_encoding
+    TABLE = ()
+    FILTERED = frozenset()
+    OFFSETS = frozenset()
+
+    def __init__(self):
+        ByteCode.__init__(self)
+
+        self.operators = {op:(name, arg) for op, name, arg, default in
+                self.TABLE}
+        self.defaults = {name:default for op, name, arg, default in self.TABLE}
+
+    def safe_get(self, name):
+        return self.get(name, self.defaults[name])
+
+    def decompile(self, strings, global_subrs, data):
+        self.strings = strings
+        self.global_subrs = global_subrs
+        self.stack = []
+        index = 0
+        while index < len(data):
+            b0 = ord(data[index:index+1])
+            index += 1
+            handler = getattr(self, self.operand_encoding[b0])
+            value, index = handler(b0, data, index)
+            if value is not None:
+                self.stack.append(value)
+
+    def do_operator(self, b0, data, index):
+        if b0 == 12:
+            op = (b0, ord(data[index:index+1]))
+            index += 1
+        else:
+            op = b0
+        operator, arg_type = self.operators[op]
+        self.handle_operator(operator, arg_type)
+        return None, index
+
+    def handle_operator(self, operator, arg_type):
+        if isinstance(arg_type, tuple):
+            value = ()
+            for i in range(len(arg_type)-1, -1, -1):
+                arg = arg_type[i]
+                arghandler = getattr(self, 'arg_' + arg)
+                value = (arghandler(operator),) + value
+        else:
+            arghandler = getattr(self, 'arg_' + arg_type)
+            value = arghandler(operator)
+        self[operator] = value
+
+    def arg_number(self, name):
+        return self.stack.pop()
+
+    def arg_SID(self, name):
+        return self.strings[self.stack.pop()]
+
+    def arg_array(self, name):
+        ans = self.stack[:]
+        del self.stack[:]
+        return ans
+
+    def arg_delta(self, name):
+        out = []
+        current = 0
+        for v in self.stack:
+            current = current + v
+            out.append(current)
+        del self.stack[:]
+        return out
+
+    def compile(self, strings):
+        data = []
+        for op, name, arg, default in self.TABLE:
+            if name in self.FILTERED:
+                continue
+            val = self.safe_get(name)
+            opcode = bytes(bytearray(op if isinstance(op, tuple) else [op]))
+            if val != self.defaults[name]:
+                self.encoding_offset = name in self.OFFSETS
+                if isinstance(arg, tuple):
+                    if len(val) != len(arg):
+                        raise ValueError('Invalid argument %s for operator: %s'
+                                %(val, op))
+                    for typ, v in zip(arg, val):
+                        if typ == 'SID':
+                            val = strings(val)
+                        data.append(getattr(self, 'encode_'+typ)(v))
+                else:
+                    if arg == 'SID':
+                        val = strings(val)
+                    data.append(getattr(self, 'encode_'+arg)(val))
+                data.append(opcode)
+        self.raw = b''.join(data)
+        return self.raw
+
+    def encode_number(self, val):
+        if self.encoding_offset:
+            return self.write_offset(val)
+        return self.write_number(val)
+
+    def encode_SID(self, val):
+        return self.write_int(val)
+
+    def encode_array(self, val):
+        return b''.join(map(self.encode_number, val))
+
+    def encode_delta(self, value):
+        out = []
+        last = 0
+        for v in value:
+            out.append(v - last)
+            last = v
+        return self.encode_array(out)
+
+
+class TopDict(Dict):
+
+    TABLE = (
+    # opcode     name                  argument type   default
+    ((12, 30), 'ROS',        ('SID','SID','number'), None,),
+    ((12, 20), 'SyntheticBase',      'number',       None,),
+    (0,        'version',            'SID',          None,),
+    (1,        'Notice',             'SID',          None,),
+    ((12, 0),  'Copyright',          'SID',          None,),
+    (2,        'FullName',           'SID',          None,),
+    ((12, 38), 'FontName',           'SID',          None,),
+    (3,        'FamilyName',         'SID',          None,),
+    (4,        'Weight',             'SID',          None,),
+    ((12, 1),  'isFixedPitch',       'number',       0,),
+    ((12, 2),  'ItalicAngle',        'number',       0,),
+    ((12, 3),  'UnderlinePosition',  'number',       None,),
+    ((12, 4),  'UnderlineThickness', 'number',       50,),
+    ((12, 5),  'PaintType',          'number',       0,),
+    ((12, 6),  'CharstringType',     'number',       2,),
+    ((12, 7),  'FontMatrix',         'array',  [0.001,0,0,0.001,0,0],),
+    (13,       'UniqueID',           'number',       None,),
+    (5,        'FontBBox',           'array',  [0,0,0,0],),
+    ((12, 8),  'StrokeWidth',        'number',       0,),
+    (14,       'XUID',               'array',        None,),
+    ((12, 21), 'PostScript',         'SID',          None,),
+    ((12, 22), 'BaseFontName',       'SID',          None,),
+    ((12, 23), 'BaseFontBlend',      'delta',        None,),
+    ((12, 31), 'CIDFontVersion',     'number',       0,),
+    ((12, 32), 'CIDFontRevision',    'number',       0,),
+    ((12, 33), 'CIDFontType',        'number',       0,),
+    ((12, 34), 'CIDCount',           'number',       8720,),
+    (15,       'charset',            'number',       0,),
+    ((12, 35), 'UIDBase',            'number',       None,),
+    (16,       'Encoding',           'number',       0,),
+    (18,       'Private',       ('number','number'), None,),
+    ((12, 37), 'FDSelect',           'number',       None,),
+    ((12, 36), 'FDArray',            'number',       None,),
+    (17,       'CharStrings',        'number',       None,),
+    )
+
+    # We will not write these operators out
+    FILTERED = {'ROS', 'SyntheticBase', 'UniqueID', 'XUID',
+            'CIDFontVersion', 'CIDFontRevision', 'CIDFontType', 'CIDCount',
+            'UIDBase', 'Encoding', 'FDSelect', 'FDArray'}
+    OFFSETS = {'charset', 'Encoding', 'CharStrings', 'Private'}
+
+
+class PrivateDict(Dict):
+
+    TABLE = (
+    #   opcode     name                  argument type   default
+    (6,        'BlueValues',         'delta',        None,),
+    (7,        'OtherBlues',         'delta',        None,),
+    (8,        'FamilyBlues',        'delta',        None,),
+    (9,        'FamilyOtherBlues',   'delta',        None,),
+    ((12, 9),  'BlueScale',          'number',       0.039625,),
+    ((12, 10), 'BlueShift',          'number',       7,),
+    ((12, 11), 'BlueFuzz',           'number',       1,),
+    (10,       'StdHW',              'number',       None,),
+    (11,       'StdVW',              'number',       None,),
+    ((12, 12), 'StemSnapH',          'delta',        None,),
+    ((12, 13), 'StemSnapV',          'delta',        None,),
+    ((12, 14), 'ForceBold',          'number',       0,),
+    ((12, 15), 'ForceBoldThreshold', 'number',       None,),  # deprecated
+    ((12, 16), 'lenIV',              'number',       None,),  # deprecated
+    ((12, 17), 'LanguageGroup',      'number',       0,),
+    ((12, 18), 'ExpansionFactor',    'number',       0.06,),
+    ((12, 19), 'initialRandomSeed',  'number',       0,),
+    (20,       'defaultWidthX',      'number',       0,),
+    (21,       'nominalWidthX',      'number',       0,),
+    (19,       'Subrs',              'number',       None,),
+    )
+
+    OFFSETS = {'Subrs'}
@@ -0,0 +1,221 @@
+#!/usr/bin/env python2
+# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:fdm=marker:ai
+from __future__ import absolute_import, division, print_function, unicode_literals
+
+__license__   = 'GPL v3'
+__copyright__ = '2012, Kovid Goyal <kovid at kovidgoyal.net>'
+__docformat__ = 'restructuredtext en'
+
+from struct import unpack_from, unpack, calcsize
+from functools import partial
+
+from calibre.utils.fonts.sfnt import UnknownTable
+from calibre.utils.fonts.sfnt.errors import UnsupportedFont, NoGlyphs
+from calibre.utils.fonts.sfnt.cff.dict_data import TopDict, PrivateDict
+from calibre.utils.fonts.sfnt.cff.constants import (cff_standard_strings,
+        STANDARD_CHARSETS)
+from polyglot.builtins import iteritems, itervalues, range
+
+# Useful links
+# http://www.adobe.com/content/dam/Adobe/en/devnet/font/pdfs/5176.CFF.pdf
+# http://www.adobe.com/content/dam/Adobe/en/devnet/font/pdfs/5177.Type2.pdf
+
+
+class CFF(object):
+
+    def __init__(self, raw):
+        (self.major_version, self.minor_version, self.header_size,
+                self.offset_size) = unpack_from(b'>4B', raw)
+        if (self.major_version, self.minor_version) != (1, 0):
+            raise UnsupportedFont('The CFF table has unknown version: '
+                    '(%d, %d)'%(self.major_version, self.minor_version))
+        offset = self.header_size
+
+        # Read Names Index
+        self.font_names = Index(raw, offset)
+        offset = self.font_names.pos
+        if len(self.font_names) > 1:
+            raise UnsupportedFont('CFF table has more than one font.')
+
+        # Read Top Dict
+        self.top_index = Index(raw, offset)
+        self.top_dict = TopDict()
+        offset = self.top_index.pos
+
+        # Read strings
+        self.strings = Strings(raw, offset)
+        offset = self.strings.pos
+
+        # Read global subroutines
+        self.global_subrs = Subrs(raw, offset)
+        offset = self.global_subrs.pos
+
+        # Decompile Top Dict
+        self.top_dict.decompile(self.strings, self.global_subrs, self.top_index[0])
+        self.is_CID = 'ROS' in self.top_dict
+        if self.is_CID:
+            raise UnsupportedFont('Subsetting of CID keyed fonts is not supported')
+
+        # Read CharStrings (Glyph definitions)
+        try:
+            offset = self.top_dict['CharStrings']
+        except KeyError:
+            raise ValueError('This font has no CharStrings')
+        cs_type = self.top_dict.safe_get('CharstringType')
+        if cs_type != 2:
+            raise UnsupportedFont('This font has unsupported CharstringType: '
+                    '%s'%cs_type)
+        self.char_strings = CharStringsIndex(raw, offset)
+        self.num_glyphs = len(self.char_strings)
+
+        # Read Private Dict
+        self.private_dict = self.private_subrs = None
+        pd = self.top_dict.safe_get('Private')
+        if pd:
+            size, offset = pd
+            self.private_dict = PrivateDict()
+            self.private_dict.decompile(self.strings, self.global_subrs,
+                    raw[offset:offset+size])
+            if 'Subrs' in self.private_dict:
+                self.private_subrs = Subrs(raw, offset +
+                        self.private_dict['Subrs'])
+
+        # Read charset (Glyph names)
+        self.charset = Charset(raw, self.top_dict.safe_get('charset'),
+                self.strings, self.num_glyphs, self.is_CID)
+
+        # import pprint
+        # pprint.pprint(self.top_dict)
+        # pprint.pprint(self.private_dict)
+
+
+class Index(list):
+
+    def __init__(self, raw, offset, prepend=()):
+        list.__init__(self)
+        self.extend(prepend)
+
+        count = unpack_from(b'>H', raw, offset)[0]
+        offset += 2
+        self.pos = offset
+
+        if count > 0:
+            self.offset_size = unpack_from(b'>B', raw, offset)[0]
+            offset += 1
+            if self.offset_size == 3:
+                offsets = [unpack(b'>L', b'\0' + raw[i:i+3])[0]
+                            for i in range(offset, offset+3*(count+1), 3)]
+            else:
+                fmt = {1:'B', 2:'H', 4:'L'}[self.offset_size]
+                fmt = ('>%d%s'%(count+1, fmt)).encode('ascii')
+                offsets = unpack_from(fmt, raw, offset)
+            offset += self.offset_size * (count+1) - 1
+
+            for i in range(len(offsets)-1):
+                off, noff = offsets[i:i+2]
+                obj = raw[offset+off:offset+noff]
+                self.append(obj)
+
+            try:
+                self.pos = offset + offsets[-1]
+            except IndexError:
+                self.pos = offset
+
+
+class Strings(Index):
+
+    def __init__(self, raw, offset):
+        super(Strings, self).__init__(raw, offset, prepend=[x.encode('ascii')
+            for x in cff_standard_strings])
+
+
+class Charset(list):
+
+    def __init__(self, raw, offset, strings, num_glyphs, is_CID):
+        super(Charset, self).__init__()
+        self.standard_charset = offset if offset in {0, 1, 2} else None
+        if is_CID and self.standard_charset is not None:
+            raise ValueError("CID font must not use a standard charset")
+        if self.standard_charset is None:
+            self.append(b'.notdef')
+            fmt = unpack_from(b'>B', raw, offset)[0]
+            offset += 1
+            f = {0:self.parse_fmt0, 1:self.parse_fmt1,
+                2:partial(self.parse_fmt1, is_two_byte=True)}.get(fmt, None)
+            if f is None:
+                raise UnsupportedFont('This font uses unsupported charset '
+                        'table format: %d'%fmt)
+            f(raw, offset, strings, num_glyphs, is_CID)
+
+    def parse_fmt0(self, raw, offset, strings, num_glyphs, is_CID):
+        fmt = ('>%dH'%(num_glyphs-1)).encode('ascii')
+        ids = unpack_from(fmt, raw, offset)
+        if is_CID:
+            ids = ('cid%05d'%x for x in ids)
+        else:
+            ids = (strings[x] for x in ids)
+        self.extend(ids)
+
+    def parse_fmt1(self, raw, offset, strings, num_glyphs, is_CID,
+            is_two_byte=False):
+        fmt = b'>2H' if is_two_byte else b'>HB'
+        sz = calcsize(fmt)
+        count = 1
+        while count < num_glyphs:
+            first, nleft = unpack_from(fmt, raw, offset)
+            offset += sz
+            count += nleft + 1
+            self.extend('cid%05d'%x if is_CID else strings[x] for x in
+                    range(first, first + nleft+1))
+
+    def lookup(self, glyph_id):
+        if self.standard_charset is None:
+            return self[glyph_id]
+        return STANDARD_CHARSETS[self.standard_charset][glyph_id].encode('ascii')
+
+    def safe_lookup(self, glyph_id):
+        try:
+            return self.lookup(glyph_id)
+        except (KeyError, IndexError, ValueError):
+            return None
+
+
+class Subrs(Index):
+    pass
+
+
+class CharStringsIndex(Index):
+    pass
+
+
+class CFFTable(UnknownTable):
+
+    def decompile(self):
+        self.cff = CFF(self.raw)
+
+    def subset(self, character_map, extra_glyphs):
+        from calibre.utils.fonts.sfnt.cff.writer import Subset
+        # Map codes from the cmap table to glyph names, this will be used to
+        # reconstruct character_map for the subset font
+        charset_map = {code:self.cff.charset.safe_lookup(glyph_id) for code,
+                glyph_id in iteritems(character_map)}
+        charset = set(itervalues(charset_map))
+        charset.discard(None)
+        if not charset and character_map:
+            raise NoGlyphs('This font has no glyphs for the specified characters')
+        charset |= {
+            self.cff.charset.safe_lookup(glyph_id) for glyph_id in extra_glyphs}
+        charset.discard(None)
+        s = Subset(self.cff, charset)
+
+        # Rebuild character_map with the glyph ids from the subset font
+        character_map.clear()
+        for code, charname in iteritems(charset_map):
+            glyph_id = s.charname_map.get(charname, None)
+            if glyph_id:
+                character_map[code] = glyph_id
+
+        # Check that raw is parseable
+        CFF(s.raw)
+
+        self.raw = s.raw