Added docx writer related modules

2026-04-04 11:43:33 +02:00 · 2020-04-13 16:33:15 +02:00
parent ae80ae5640
commit 98b2dd8d4f
29 changed files with 5956 additions and 0 deletions
--- a/ebook_converter/utils/fonts/sfnt/init.py
+++ b/ebook_converter/utils/fonts/sfnt/init.py
@@ -0,0 +1,80 @@
+#!/usr/bin/env python2
+# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:fdm=marker:ai
+from __future__ import absolute_import, division, print_function, unicode_literals
+
+__license__   = 'GPL v3'
+__copyright__ = '2012, Kovid Goyal <kovid at kovidgoyal.net>'
+__docformat__ = 'restructuredtext en'
+
+from datetime import datetime, timedelta
+
+
+def align_block(raw, multiple=4, pad=b'\0'):
+    '''
+    Return raw with enough pad bytes append to ensure its length is a multiple
+    of 4.
+    '''
+    extra = len(raw) % multiple
+    if extra == 0:
+        return raw
+    return raw + pad*(multiple - extra)
+
+
+class UnknownTable(object):
+
+    def __init__(self, raw):
+        self.raw = raw
+
+    def __call__(self):
+        return self.raw
+
+    def __len__(self):
+        return len(self.raw)
+
+
+class DateTimeProperty(object):
+
+    def __init__(self, name):
+        self.name = name
+
+    def __get__(self, obj, type=None):
+        return datetime(1904, 1, 1) + timedelta(seconds=getattr(obj,
+            self.name))
+
+    def __set__(self, obj, val):
+        td = val - datetime(1904, 1, 1)
+        setattr(obj, self.name, int(td.total_seconds()))
+
+
+class FixedProperty(object):
+
+    def __init__(self, name):
+        self.name = name
+
+    def __get__(self, obj, type=None):
+        val = getattr(obj, self.name)
+        return val / 0x10000
+
+    def __set__(self, obj, val):
+        return int(round(val*(0x10000)))
+
+
+def max_power_of_two(x):
+    """
+Return the highest exponent of two, so that
+    (2 ** exponent) <= x
+    """
+    exponent = 0
+    while x:
+        x = x >> 1
+        exponent += 1
+    return max(exponent - 1, 0)
+
+
+def load_font(stream_or_path):
+    raw = stream_or_path
+    if hasattr(raw, 'read'):
+        raw = raw.read()
+    from calibre.utils.fonts.sfnt.container import Sfnt
+    return Sfnt(raw)
+
--- a/ebook_converter/utils/fonts/sfnt/cff/init.py
+++ b/ebook_converter/utils/fonts/sfnt/cff/init.py
@@ -0,0 +1,10 @@
+#!/usr/bin/env python2
+# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:fdm=marker:ai
+from __future__ import absolute_import, division, print_function, unicode_literals
+
+__license__   = 'GPL v3'
+__copyright__ = '2012, Kovid Goyal <kovid at kovidgoyal.net>'
+__docformat__ = 'restructuredtext en'
+
+
+
--- a/ebook_converter/utils/fonts/sfnt/cff/constants.py
+++ b/ebook_converter/utils/fonts/sfnt/cff/constants.py
@@ -0,0 +1,182 @@
+#!/usr/bin/env python2
+# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:fdm=marker:ai
+from __future__ import absolute_import, division, print_function, unicode_literals
+
+__license__   = 'GPL v3'
+__copyright__ = '2012, Kovid Goyal <kovid at kovidgoyal.net>'
+__docformat__ = 'restructuredtext en'
+
+
+# cff_standard_strings {{{
+# The 391 Standard Strings as used in the CFF format.
+# from Adobe Technical None #5176, version 1.0, 18 March 1998
+
+cff_standard_strings = [
+'.notdef', 'space', 'exclam', 'quotedbl', 'numbersign', 'dollar', 'percent',
+'ampersand', 'quoteright', 'parenleft', 'parenright', 'asterisk', 'plus',
+'comma', 'hyphen', 'period', 'slash', 'zero', 'one', 'two', 'three', 'four',
+'five', 'six', 'seven', 'eight', 'nine', 'colon', 'semicolon', 'less', 'equal',
+'greater', 'question', 'at', 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J',
+'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z',
+'bracketleft', 'backslash', 'bracketright', 'asciicircum', 'underscore',
+'quoteleft', 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm',
+'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', 'braceleft',
+'bar', 'braceright', 'asciitilde', 'exclamdown', 'cent', 'sterling',
+'fraction', 'yen', 'florin', 'section', 'currency', 'quotesingle',
+'quotedblleft', 'guillemotleft', 'guilsinglleft', 'guilsinglright', 'fi', 'fl',
+'endash', 'dagger', 'daggerdbl', 'periodcentered', 'paragraph', 'bullet',
+'quotesinglbase', 'quotedblbase', 'quotedblright', 'guillemotright',
+'ellipsis', 'perthousand', 'questiondown', 'grave', 'acute', 'circumflex',
+'tilde', 'macron', 'breve', 'dotaccent', 'dieresis', 'ring', 'cedilla',
+'hungarumlaut', 'ogonek', 'caron', 'emdash', 'AE', 'ordfeminine', 'Lslash',
+'Oslash', 'OE', 'ordmasculine', 'ae', 'dotlessi', 'lslash', 'oslash', 'oe',
+'germandbls', 'onesuperior', 'logicalnot', 'mu', 'trademark', 'Eth', 'onehalf',
+'plusminus', 'Thorn', 'onequarter', 'divide', 'brokenbar', 'degree', 'thorn',
+'threequarters', 'twosuperior', 'registered', 'minus', 'eth', 'multiply',
+'threesuperior', 'copyright', 'Aacute', 'Acircumflex', 'Adieresis', 'Agrave',
+'Aring', 'Atilde', 'Ccedilla', 'Eacute', 'Ecircumflex', 'Edieresis', 'Egrave',
+'Iacute', 'Icircumflex', 'Idieresis', 'Igrave', 'Ntilde', 'Oacute',
+'Ocircumflex', 'Odieresis', 'Ograve', 'Otilde', 'Scaron', 'Uacute',
+'Ucircumflex', 'Udieresis', 'Ugrave', 'Yacute', 'Ydieresis', 'Zcaron',
+'aacute', 'acircumflex', 'adieresis', 'agrave', 'aring', 'atilde', 'ccedilla',
+'eacute', 'ecircumflex', 'edieresis', 'egrave', 'iacute', 'icircumflex',
+'idieresis', 'igrave', 'ntilde', 'oacute', 'ocircumflex', 'odieresis',
+'ograve', 'otilde', 'scaron', 'uacute', 'ucircumflex', 'udieresis', 'ugrave',
+'yacute', 'ydieresis', 'zcaron', 'exclamsmall', 'Hungarumlautsmall',
+'dollaroldstyle', 'dollarsuperior', 'ampersandsmall', 'Acutesmall',
+'parenleftsuperior', 'parenrightsuperior', 'twodotenleader', 'onedotenleader',
+'zerooldstyle', 'oneoldstyle', 'twooldstyle', 'threeoldstyle', 'fouroldstyle',
+'fiveoldstyle', 'sixoldstyle', 'sevenoldstyle', 'eightoldstyle',
+'nineoldstyle', 'commasuperior', 'threequartersemdash', 'periodsuperior',
+'questionsmall', 'asuperior', 'bsuperior', 'centsuperior', 'dsuperior',
+'esuperior', 'isuperior', 'lsuperior', 'msuperior', 'nsuperior', 'osuperior',
+'rsuperior', 'ssuperior', 'tsuperior', 'ff', 'ffi', 'ffl', 'parenleftinferior',
+'parenrightinferior', 'Circumflexsmall', 'hyphensuperior', 'Gravesmall',
+'Asmall', 'Bsmall', 'Csmall', 'Dsmall', 'Esmall', 'Fsmall', 'Gsmall', 'Hsmall',
+'Ismall', 'Jsmall', 'Ksmall', 'Lsmall', 'Msmall', 'Nsmall', 'Osmall', 'Psmall',
+'Qsmall', 'Rsmall', 'Ssmall', 'Tsmall', 'Usmall', 'Vsmall', 'Wsmall', 'Xsmall',
+'Ysmall', 'Zsmall', 'colonmonetary', 'onefitted', 'rupiah', 'Tildesmall',
+'exclamdownsmall', 'centoldstyle', 'Lslashsmall', 'Scaronsmall', 'Zcaronsmall',
+'Dieresissmall', 'Brevesmall', 'Caronsmall', 'Dotaccentsmall', 'Macronsmall',
+'figuredash', 'hypheninferior', 'Ogoneksmall', 'Ringsmall', 'Cedillasmall',
+'questiondownsmall', 'oneeighth', 'threeeighths', 'fiveeighths',
+'seveneighths', 'onethird', 'twothirds', 'zerosuperior', 'foursuperior',
+'fivesuperior', 'sixsuperior', 'sevensuperior', 'eightsuperior',
+'ninesuperior', 'zeroinferior', 'oneinferior', 'twoinferior', 'threeinferior',
+'fourinferior', 'fiveinferior', 'sixinferior', 'seveninferior',
+'eightinferior', 'nineinferior', 'centinferior', 'dollarinferior',
+'periodinferior', 'commainferior', 'Agravesmall', 'Aacutesmall',
+'Acircumflexsmall', 'Atildesmall', 'Adieresissmall', 'Aringsmall', 'AEsmall',
+'Ccedillasmall', 'Egravesmall', 'Eacutesmall', 'Ecircumflexsmall',
+'Edieresissmall', 'Igravesmall', 'Iacutesmall', 'Icircumflexsmall',
+'Idieresissmall', 'Ethsmall', 'Ntildesmall', 'Ogravesmall', 'Oacutesmall',
+'Ocircumflexsmall', 'Otildesmall', 'Odieresissmall', 'OEsmall', 'Oslashsmall',
+'Ugravesmall', 'Uacutesmall', 'Ucircumflexsmall', 'Udieresissmall',
+'Yacutesmall', 'Thornsmall', 'Ydieresissmall', '001.000', '001.001', '001.002',
+'001.003', 'Black', 'Bold', 'Book', 'Light', 'Medium', 'Regular', 'Roman',
+'Semibold'
+]
+# }}}
+
+
+STANDARD_CHARSETS = [  # {{{
+# ISOAdobe
+(".notdef", "space", "exclam", "quotedbl", "numbersign", "dollar",
+    "percent", "ampersand", "quoteright", "parenleft", "parenright",
+    "asterisk", "plus", "comma", "hyphen", "period", "slash", "zero",
+    "one", "two", "three", "four", "five", "six", "seven", "eight", "nine",
+    "colon", "semicolon", "less", "equal", "greater", "question", "at",
+    "A", "B", "C", "D", "E", "F", "G", "H", "I", "J", "K", "L", "M", "N",
+    "O", "P", "Q", "R", "S", "T", "U", "V", "W", "X", "Y", "Z",
+    "bracketleft", "backslash", "bracketright", "asciicircum",
+    "underscore", "quoteleft", "a", "b", "c", "d", "e", "f", "g", "h", "i",
+    "j", "k", "l", "m", "n", "o", "p", "q", "r", "s", "t", "u", "v", "w",
+    "x", "y", "z", "braceleft", "bar", "braceright", "asciitilde",
+    "exclamdown", "cent", "sterling", "fraction", "yen", "florin",
+    "section", "currency", "quotesingle", "quotedblleft", "guillemotleft",
+    "guilsinglleft", "guilsinglright", "fi", "fl", "endash", "dagger",
+    "daggerdbl", "periodcentered", "paragraph", "bullet", "quotesinglbase",
+    "quotedblbase", "quotedblright", "guillemotright", "ellipsis",
+    "perthousand", "questiondown", "grave", "acute", "circumflex", "tilde",
+    "macron", "breve", "dotaccent", "dieresis", "ring", "cedilla",
+    "hungarumlaut", "ogonek", "caron", "emdash", "AE", "ordfeminine",
+    "Lslash", "Oslash", "OE", "ordmasculine", "ae", "dotlessi", "lslash",
+    "oslash", "oe", "germandbls", "onesuperior", "logicalnot", "mu",
+    "trademark", "Eth", "onehalf", "plusminus", "Thorn", "onequarter",
+    "divide", "brokenbar", "degree", "thorn", "threequarters",
+    "twosuperior", "registered", "minus", "eth", "multiply",
+    "threesuperior", "copyright", "Aacute", "Acircumflex", "Adieresis",
+    "Agrave", "Aring", "Atilde", "Ccedilla", "Eacute", "Ecircumflex",
+    "Edieresis", "Egrave", "Iacute", "Icircumflex", "Idieresis", "Igrave",
+    "Ntilde", "Oacute", "Ocircumflex", "Odieresis", "Ograve", "Otilde",
+    "Scaron", "Uacute", "Ucircumflex", "Udieresis", "Ugrave", "Yacute",
+    "Ydieresis", "Zcaron", "aacute", "acircumflex", "adieresis", "agrave",
+    "aring", "atilde", "ccedilla", "eacute", "ecircumflex", "edieresis",
+    "egrave", "iacute", "icircumflex", "idieresis", "igrave", "ntilde",
+    "oacute", "ocircumflex", "odieresis", "ograve", "otilde", "scaron",
+    "uacute", "ucircumflex", "udieresis", "ugrave", "yacute", "ydieresis",
+    "zcaron"),
+
+# Expert
+("notdef", "space", "exclamsmall", "Hungarumlautsmall", "dollaroldstyle",
+    "dollarsuperior", "ampersandsmall", "Acutesmall", "parenleftsuperior",
+    "parenrightsuperior", "twodotenleader", "onedotenleader", "comma",
+    "hyphen", "period", "fraction", "zerooldstyle", "oneoldstyle",
+    "twooldstyle", "threeoldstyle", "fouroldstyle", "fiveoldstyle",
+    "sixoldstyle", "sevenoldstyle", "eightoldstyle", "nineoldstyle",
+    "colon", "semicolon", "commasuperior", "threequartersemdash",
+    "periodsuperior", "questionsmall", "asuperior", "bsuperior",
+    "centsuperior", "dsuperior", "esuperior", "isuperior", "lsuperior",
+    "msuperior", "nsuperior", "osuperior", "rsuperior", "ssuperior",
+    "tsuperior", "ff", "fi", "fl", "ffi", "ffl", "parenleftinferior",
+    "parenrightinferior", "Circumflexsmall", "hyphensuperior",
+    "Gravesmall", "Asmall", "Bsmall", "Csmall", "Dsmall", "Esmall",
+    "Fsmall", "Gsmall", "Hsmall", "Ismall", "Jsmall", "Ksmall", "Lsmall",
+    "Msmall", "Nsmall", "Osmall", "Psmall", "Qsmall", "Rsmall", "Ssmall",
+    "Tsmall", "Usmall", "Vsmall", "Wsmall", "Xsmall", "Ysmall", "Zsmall",
+    "colonmonetary", "onefitted", "rupiah", "Tildesmall",
+    "exclamdownsmall", "centoldstyle", "Lslashsmall", "Scaronsmall",
+    "Zcaronsmall", "Dieresissmall", "Brevesmall", "Caronsmall",
+    "Dotaccentsmall", "Macronsmall", "figuredash", "hypheninferior",
+    "Ogoneksmall", "Ringsmall", "Cedillasmall", "onequarter", "onehalf",
+    "threequarters", "questiondownsmall", "oneeighth", "threeeighths",
+    "fiveeighths", "seveneighths", "onethird", "twothirds", "zerosuperior",
+    "onesuperior", "twosuperior", "threesuperior", "foursuperior",
+    "fivesuperior", "sixsuperior", "sevensuperior", "eightsuperior",
+    "ninesuperior", "zeroinferior", "oneinferior", "twoinferior",
+    "threeinferior", "fourinferior", "fiveinferior", "sixinferior",
+    "seveninferior", "eightinferior", "nineinferior", "centinferior",
+    "dollarinferior", "periodinferior", "commainferior", "Agravesmall",
+    "Aacutesmall", "Acircumflexsmall", "Atildesmall", "Adieresissmall",
+    "Aringsmall", "AEsmall", "Ccedillasmall", "Egravesmall", "Eacutesmall",
+    "Ecircumflexsmall", "Edieresissmall", "Igravesmall", "Iacutesmall",
+    "Icircumflexsmall", "Idieresissmall", "Ethsmall", "Ntildesmall",
+    "Ogravesmall", "Oacutesmall", "Ocircumflexsmall", "Otildesmall",
+    "Odieresissmall", "OEsmall", "Oslashsmall", "Ugravesmall",
+    "Uacutesmall", "Ucircumflexsmall", "Udieresissmall", "Yacutesmall",
+    "Thornsmall", "Ydieresissmall"),
+
+# Expert Subset
+(".notdef", "space", "dollaroldstyle", "dollarsuperior",
+        "parenleftsuperior", "parenrightsuperior", "twodotenleader",
+        "onedotenleader", "comma", "hyphen", "period", "fraction",
+        "zerooldstyle", "oneoldstyle", "twooldstyle", "threeoldstyle",
+        "fouroldstyle", "fiveoldstyle", "sixoldstyle", "sevenoldstyle",
+        "eightoldstyle", "nineoldstyle", "colon", "semicolon",
+        "commasuperior", "threequartersemdash", "periodsuperior",
+        "asuperior", "bsuperior", "centsuperior", "dsuperior", "esuperior",
+        "isuperior", "lsuperior", "msuperior", "nsuperior", "osuperior",
+        "rsuperior", "ssuperior", "tsuperior", "ff", "fi", "fl", "ffi",
+        "ffl", "parenleftinferior", "parenrightinferior", "hyphensuperior",
+        "colonmonetary", "onefitted", "rupiah", "centoldstyle",
+        "figuredash", "hypheninferior", "onequarter", "onehalf",
+        "threequarters", "oneeighth", "threeeighths", "fiveeighths",
+        "seveneighths", "onethird", "twothirds", "zerosuperior",
+        "onesuperior", "twosuperior", "threesuperior", "foursuperior",
+        "fivesuperior", "sixsuperior", "sevensuperior", "eightsuperior",
+        "ninesuperior", "zeroinferior", "oneinferior", "twoinferior",
+        "threeinferior", "fourinferior", "fiveinferior", "sixinferior",
+        "seveninferior", "eightinferior", "nineinferior", "centinferior",
+        "dollarinferior", "periodinferior", "commainferior"),
+]  # }}}
+
--- a/ebook_converter/utils/fonts/sfnt/cff/dict_data.py
+++ b/ebook_converter/utils/fonts/sfnt/cff/dict_data.py
@@ -0,0 +1,311 @@
+#!/usr/bin/env python2
+# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:fdm=marker:ai
+from __future__ import absolute_import, division, print_function, unicode_literals
+
+__license__   = 'GPL v3'
+__copyright__ = '2012, Kovid Goyal <kovid at kovidgoyal.net>'
+__docformat__ = 'restructuredtext en'
+
+from struct import pack, unpack_from
+from polyglot.builtins import range, unicode_type
+
+t1_operand_encoding = [None] * 256
+t1_operand_encoding[0:32] = (32) * ["do_operator"]
+t1_operand_encoding[32:247] = (247 - 32) * ["read_byte"]
+t1_operand_encoding[247:251] = (251 - 247) * ["read_small_int1"]
+t1_operand_encoding[251:255] = (255 - 251) * ["read_small_int2"]
+t1_operand_encoding[255] = "read_long_int"
+
+t2_operand_encoding = t1_operand_encoding[:]
+t2_operand_encoding[28] = "read_short_int"
+t2_operand_encoding[255] = "read_fixed_1616"
+
+cff_dict_operand_encoding = t2_operand_encoding[:]
+cff_dict_operand_encoding[29] = "read_long_int"
+cff_dict_operand_encoding[30] = "read_real_number"
+cff_dict_operand_encoding[255] = "reserved"
+
+real_nibbles = ['0', '1', '2', '3', '4', '5', '6', '7', '8', '9',
+        '.', 'E', 'E-', None, '-']
+real_nibbles_map = {x:i for i, x in enumerate(real_nibbles)}
+
+
+class ByteCode(dict):
+
+    def read_byte(self, b0, data, index):
+        return b0 - 139, index
+
+    def read_small_int1(self, b0, data, index):
+        b1 = ord(data[index:index+1])
+        return (b0-247)*256 + b1 + 108, index+1
+
+    def read_small_int2(self, b0, data, index):
+        b1 = ord(data[index:index+1])
+        return -(b0-251)*256 - b1 - 108, index+1
+
+    def read_short_int(self, b0, data, index):
+        value, = unpack_from(b">h", data, index)
+        return value, index+2
+
+    def read_long_int(self, b0, data, index):
+        value, = unpack_from(b">l", data, index)
+        return value, index+4
+
+    def read_fixed_1616(self, b0, data, index):
+        value, = unpack_from(b">l", data, index)
+        return value / 65536.0, index+4
+
+    def read_real_number(self, b0, data, index):
+        number = ''
+        while True:
+            b = ord(data[index:index+1])
+            index = index + 1
+            nibble0 = (b & 0xf0) >> 4
+            nibble1 = b & 0x0f
+            if nibble0 == 0xf:
+                break
+            number = number + real_nibbles[nibble0]
+            if nibble1 == 0xf:
+                break
+            number = number + real_nibbles[nibble1]
+        return float(number), index
+
+    def write_float(self, f, encoding='ignored'):
+        s = unicode_type(f).upper()
+        if s[:2] == "0.":
+            s = s[1:]
+        elif s[:3] == "-0.":
+            s = "-" + s[2:]
+        nibbles = []
+        while s:
+            c = s[0]
+            s = s[1:]
+            if c == "E" and s[:1] == "-":
+                s = s[1:]
+                c = "E-"
+            nibbles.append(real_nibbles_map[c])
+        nibbles.append(0xf)
+        if len(nibbles) % 2:
+            nibbles.append(0xf)
+        d = bytearray([30])
+        for i in range(0, len(nibbles), 2):
+            d.append(nibbles[i] << 4 | nibbles[i+1])
+        return bytes(d)
+
+    def write_int(self, value, encoding="cff"):
+        four_byte_op = {'cff':29, 't1':255}.get(encoding, None)
+
+        if -107 <= value <= 107:
+            code = bytes(bytearray([value + 139]))
+        elif 108 <= value <= 1131:
+            value = value - 108
+            code = bytes(bytearray([(value >> 8) + 247, (value & 0xFF)]))
+        elif -1131 <= value <= -108:
+            value = -value - 108
+            code = bytes(bytearray([(value >> 8) + 251, (value & 0xFF)]))
+        elif four_byte_op is None:
+            # T2 only supports 2 byte ints
+            code = bytes(bytearray([28])) + pack(b">h", value)
+        else:
+            code = bytes(bytearray([four_byte_op])) + pack(b">l", value)
+        return code
+
+    def write_offset(self, value):
+        return bytes(bytearray([29])) + pack(b">l", value)
+
+    def write_number(self, value, encoding="cff"):
+        f = self.write_float if isinstance(value, float) else self.write_int
+        return f(value, encoding)
+
+
+class Dict(ByteCode):
+
+    operand_encoding = cff_dict_operand_encoding
+    TABLE = ()
+    FILTERED = frozenset()
+    OFFSETS = frozenset()
+
+    def __init__(self):
+        ByteCode.__init__(self)
+
+        self.operators = {op:(name, arg) for op, name, arg, default in
+                self.TABLE}
+        self.defaults = {name:default for op, name, arg, default in self.TABLE}
+
+    def safe_get(self, name):
+        return self.get(name, self.defaults[name])
+
+    def decompile(self, strings, global_subrs, data):
+        self.strings = strings
+        self.global_subrs = global_subrs
+        self.stack = []
+        index = 0
+        while index < len(data):
+            b0 = ord(data[index:index+1])
+            index += 1
+            handler = getattr(self, self.operand_encoding[b0])
+            value, index = handler(b0, data, index)
+            if value is not None:
+                self.stack.append(value)
+
+    def do_operator(self, b0, data, index):
+        if b0 == 12:
+            op = (b0, ord(data[index:index+1]))
+            index += 1
+        else:
+            op = b0
+        operator, arg_type = self.operators[op]
+        self.handle_operator(operator, arg_type)
+        return None, index
+
+    def handle_operator(self, operator, arg_type):
+        if isinstance(arg_type, tuple):
+            value = ()
+            for i in range(len(arg_type)-1, -1, -1):
+                arg = arg_type[i]
+                arghandler = getattr(self, 'arg_' + arg)
+                value = (arghandler(operator),) + value
+        else:
+            arghandler = getattr(self, 'arg_' + arg_type)
+            value = arghandler(operator)
+        self[operator] = value
+
+    def arg_number(self, name):
+        return self.stack.pop()
+
+    def arg_SID(self, name):
+        return self.strings[self.stack.pop()]
+
+    def arg_array(self, name):
+        ans = self.stack[:]
+        del self.stack[:]
+        return ans
+
+    def arg_delta(self, name):
+        out = []
+        current = 0
+        for v in self.stack:
+            current = current + v
+            out.append(current)
+        del self.stack[:]
+        return out
+
+    def compile(self, strings):
+        data = []
+        for op, name, arg, default in self.TABLE:
+            if name in self.FILTERED:
+                continue
+            val = self.safe_get(name)
+            opcode = bytes(bytearray(op if isinstance(op, tuple) else [op]))
+            if val != self.defaults[name]:
+                self.encoding_offset = name in self.OFFSETS
+                if isinstance(arg, tuple):
+                    if len(val) != len(arg):
+                        raise ValueError('Invalid argument %s for operator: %s'
+                                %(val, op))
+                    for typ, v in zip(arg, val):
+                        if typ == 'SID':
+                            val = strings(val)
+                        data.append(getattr(self, 'encode_'+typ)(v))
+                else:
+                    if arg == 'SID':
+                        val = strings(val)
+                    data.append(getattr(self, 'encode_'+arg)(val))
+                data.append(opcode)
+        self.raw = b''.join(data)
+        return self.raw
+
+    def encode_number(self, val):
+        if self.encoding_offset:
+            return self.write_offset(val)
+        return self.write_number(val)
+
+    def encode_SID(self, val):
+        return self.write_int(val)
+
+    def encode_array(self, val):
+        return b''.join(map(self.encode_number, val))
+
+    def encode_delta(self, value):
+        out = []
+        last = 0
+        for v in value:
+            out.append(v - last)
+            last = v
+        return self.encode_array(out)
+
+
+class TopDict(Dict):
+
+    TABLE = (
+    # opcode     name                  argument type   default
+    ((12, 30), 'ROS',        ('SID','SID','number'), None,),
+    ((12, 20), 'SyntheticBase',      'number',       None,),
+    (0,        'version',            'SID',          None,),
+    (1,        'Notice',             'SID',          None,),
+    ((12, 0),  'Copyright',          'SID',          None,),
+    (2,        'FullName',           'SID',          None,),
+    ((12, 38), 'FontName',           'SID',          None,),
+    (3,        'FamilyName',         'SID',          None,),
+    (4,        'Weight',             'SID',          None,),
+    ((12, 1),  'isFixedPitch',       'number',       0,),
+    ((12, 2),  'ItalicAngle',        'number',       0,),
+    ((12, 3),  'UnderlinePosition',  'number',       None,),
+    ((12, 4),  'UnderlineThickness', 'number',       50,),
+    ((12, 5),  'PaintType',          'number',       0,),
+    ((12, 6),  'CharstringType',     'number',       2,),
+    ((12, 7),  'FontMatrix',         'array',  [0.001,0,0,0.001,0,0],),
+    (13,       'UniqueID',           'number',       None,),
+    (5,        'FontBBox',           'array',  [0,0,0,0],),
+    ((12, 8),  'StrokeWidth',        'number',       0,),
+    (14,       'XUID',               'array',        None,),
+    ((12, 21), 'PostScript',         'SID',          None,),
+    ((12, 22), 'BaseFontName',       'SID',          None,),
+    ((12, 23), 'BaseFontBlend',      'delta',        None,),
+    ((12, 31), 'CIDFontVersion',     'number',       0,),
+    ((12, 32), 'CIDFontRevision',    'number',       0,),
+    ((12, 33), 'CIDFontType',        'number',       0,),
+    ((12, 34), 'CIDCount',           'number',       8720,),
+    (15,       'charset',            'number',       0,),
+    ((12, 35), 'UIDBase',            'number',       None,),
+    (16,       'Encoding',           'number',       0,),
+    (18,       'Private',       ('number','number'), None,),
+    ((12, 37), 'FDSelect',           'number',       None,),
+    ((12, 36), 'FDArray',            'number',       None,),
+    (17,       'CharStrings',        'number',       None,),
+    )
+
+    # We will not write these operators out
+    FILTERED = {'ROS', 'SyntheticBase', 'UniqueID', 'XUID',
+            'CIDFontVersion', 'CIDFontRevision', 'CIDFontType', 'CIDCount',
+            'UIDBase', 'Encoding', 'FDSelect', 'FDArray'}
+    OFFSETS = {'charset', 'Encoding', 'CharStrings', 'Private'}
+
+
+class PrivateDict(Dict):
+
+    TABLE = (
+    #   opcode     name                  argument type   default
+    (6,        'BlueValues',         'delta',        None,),
+    (7,        'OtherBlues',         'delta',        None,),
+    (8,        'FamilyBlues',        'delta',        None,),
+    (9,        'FamilyOtherBlues',   'delta',        None,),
+    ((12, 9),  'BlueScale',          'number',       0.039625,),
+    ((12, 10), 'BlueShift',          'number',       7,),
+    ((12, 11), 'BlueFuzz',           'number',       1,),
+    (10,       'StdHW',              'number',       None,),
+    (11,       'StdVW',              'number',       None,),
+    ((12, 12), 'StemSnapH',          'delta',        None,),
+    ((12, 13), 'StemSnapV',          'delta',        None,),
+    ((12, 14), 'ForceBold',          'number',       0,),
+    ((12, 15), 'ForceBoldThreshold', 'number',       None,),  # deprecated
+    ((12, 16), 'lenIV',              'number',       None,),  # deprecated
+    ((12, 17), 'LanguageGroup',      'number',       0,),
+    ((12, 18), 'ExpansionFactor',    'number',       0.06,),
+    ((12, 19), 'initialRandomSeed',  'number',       0,),
+    (20,       'defaultWidthX',      'number',       0,),
+    (21,       'nominalWidthX',      'number',       0,),
+    (19,       'Subrs',              'number',       None,),
+    )
+
+    OFFSETS = {'Subrs'}
--- a/ebook_converter/utils/fonts/sfnt/cff/table.py
+++ b/ebook_converter/utils/fonts/sfnt/cff/table.py
@@ -0,0 +1,221 @@
+#!/usr/bin/env python2
+# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:fdm=marker:ai
+from __future__ import absolute_import, division, print_function, unicode_literals
+
+__license__   = 'GPL v3'
+__copyright__ = '2012, Kovid Goyal <kovid at kovidgoyal.net>'
+__docformat__ = 'restructuredtext en'
+
+from struct import unpack_from, unpack, calcsize
+from functools import partial
+
+from calibre.utils.fonts.sfnt import UnknownTable
+from calibre.utils.fonts.sfnt.errors import UnsupportedFont, NoGlyphs
+from calibre.utils.fonts.sfnt.cff.dict_data import TopDict, PrivateDict
+from calibre.utils.fonts.sfnt.cff.constants import (cff_standard_strings,
+        STANDARD_CHARSETS)
+from polyglot.builtins import iteritems, itervalues, range
+
+# Useful links
+# http://www.adobe.com/content/dam/Adobe/en/devnet/font/pdfs/5176.CFF.pdf
+# http://www.adobe.com/content/dam/Adobe/en/devnet/font/pdfs/5177.Type2.pdf
+
+
+class CFF(object):
+
+    def __init__(self, raw):
+        (self.major_version, self.minor_version, self.header_size,
+                self.offset_size) = unpack_from(b'>4B', raw)
+        if (self.major_version, self.minor_version) != (1, 0):
+            raise UnsupportedFont('The CFF table has unknown version: '
+                    '(%d, %d)'%(self.major_version, self.minor_version))
+        offset = self.header_size
+
+        # Read Names Index
+        self.font_names = Index(raw, offset)
+        offset = self.font_names.pos
+        if len(self.font_names) > 1:
+            raise UnsupportedFont('CFF table has more than one font.')
+
+        # Read Top Dict
+        self.top_index = Index(raw, offset)
+        self.top_dict = TopDict()
+        offset = self.top_index.pos
+
+        # Read strings
+        self.strings = Strings(raw, offset)
+        offset = self.strings.pos
+
+        # Read global subroutines
+        self.global_subrs = Subrs(raw, offset)
+        offset = self.global_subrs.pos
+
+        # Decompile Top Dict
+        self.top_dict.decompile(self.strings, self.global_subrs, self.top_index[0])
+        self.is_CID = 'ROS' in self.top_dict
+        if self.is_CID:
+            raise UnsupportedFont('Subsetting of CID keyed fonts is not supported')
+
+        # Read CharStrings (Glyph definitions)
+        try:
+            offset = self.top_dict['CharStrings']
+        except KeyError:
+            raise ValueError('This font has no CharStrings')
+        cs_type = self.top_dict.safe_get('CharstringType')
+        if cs_type != 2:
+            raise UnsupportedFont('This font has unsupported CharstringType: '
+                    '%s'%cs_type)
+        self.char_strings = CharStringsIndex(raw, offset)
+        self.num_glyphs = len(self.char_strings)
+
+        # Read Private Dict
+        self.private_dict = self.private_subrs = None
+        pd = self.top_dict.safe_get('Private')
+        if pd:
+            size, offset = pd
+            self.private_dict = PrivateDict()
+            self.private_dict.decompile(self.strings, self.global_subrs,
+                    raw[offset:offset+size])
+            if 'Subrs' in self.private_dict:
+                self.private_subrs = Subrs(raw, offset +
+                        self.private_dict['Subrs'])
+
+        # Read charset (Glyph names)
+        self.charset = Charset(raw, self.top_dict.safe_get('charset'),
+                self.strings, self.num_glyphs, self.is_CID)
+
+        # import pprint
+        # pprint.pprint(self.top_dict)
+        # pprint.pprint(self.private_dict)
+
+
+class Index(list):
+
+    def __init__(self, raw, offset, prepend=()):
+        list.__init__(self)
+        self.extend(prepend)
+
+        count = unpack_from(b'>H', raw, offset)[0]
+        offset += 2
+        self.pos = offset
+
+        if count > 0:
+            self.offset_size = unpack_from(b'>B', raw, offset)[0]
+            offset += 1
+            if self.offset_size == 3:
+                offsets = [unpack(b'>L', b'\0' + raw[i:i+3])[0]
+                            for i in range(offset, offset+3*(count+1), 3)]
+            else:
+                fmt = {1:'B', 2:'H', 4:'L'}[self.offset_size]
+                fmt = ('>%d%s'%(count+1, fmt)).encode('ascii')
+                offsets = unpack_from(fmt, raw, offset)
+            offset += self.offset_size * (count+1) - 1
+
+            for i in range(len(offsets)-1):
+                off, noff = offsets[i:i+2]
+                obj = raw[offset+off:offset+noff]
+                self.append(obj)
+
+            try:
+                self.pos = offset + offsets[-1]
+            except IndexError:
+                self.pos = offset
+
+
+class Strings(Index):
+
+    def __init__(self, raw, offset):
+        super(Strings, self).__init__(raw, offset, prepend=[x.encode('ascii')
+            for x in cff_standard_strings])
+
+
+class Charset(list):
+
+    def __init__(self, raw, offset, strings, num_glyphs, is_CID):
+        super(Charset, self).__init__()
+        self.standard_charset = offset if offset in {0, 1, 2} else None
+        if is_CID and self.standard_charset is not None:
+            raise ValueError("CID font must not use a standard charset")
+        if self.standard_charset is None:
+            self.append(b'.notdef')
+            fmt = unpack_from(b'>B', raw, offset)[0]
+            offset += 1
+            f = {0:self.parse_fmt0, 1:self.parse_fmt1,
+                2:partial(self.parse_fmt1, is_two_byte=True)}.get(fmt, None)
+            if f is None:
+                raise UnsupportedFont('This font uses unsupported charset '
+                        'table format: %d'%fmt)
+            f(raw, offset, strings, num_glyphs, is_CID)
+
+    def parse_fmt0(self, raw, offset, strings, num_glyphs, is_CID):
+        fmt = ('>%dH'%(num_glyphs-1)).encode('ascii')
+        ids = unpack_from(fmt, raw, offset)
+        if is_CID:
+            ids = ('cid%05d'%x for x in ids)
+        else:
+            ids = (strings[x] for x in ids)
+        self.extend(ids)
+
+    def parse_fmt1(self, raw, offset, strings, num_glyphs, is_CID,
+            is_two_byte=False):
+        fmt = b'>2H' if is_two_byte else b'>HB'
+        sz = calcsize(fmt)
+        count = 1
+        while count < num_glyphs:
+            first, nleft = unpack_from(fmt, raw, offset)
+            offset += sz
+            count += nleft + 1
+            self.extend('cid%05d'%x if is_CID else strings[x] for x in
+                    range(first, first + nleft+1))
+
+    def lookup(self, glyph_id):
+        if self.standard_charset is None:
+            return self[glyph_id]
+        return STANDARD_CHARSETS[self.standard_charset][glyph_id].encode('ascii')
+
+    def safe_lookup(self, glyph_id):
+        try:
+            return self.lookup(glyph_id)
+        except (KeyError, IndexError, ValueError):
+            return None
+
+
+class Subrs(Index):
+    pass
+
+
+class CharStringsIndex(Index):
+    pass
+
+
+class CFFTable(UnknownTable):
+
+    def decompile(self):
+        self.cff = CFF(self.raw)
+
+    def subset(self, character_map, extra_glyphs):
+        from calibre.utils.fonts.sfnt.cff.writer import Subset
+        # Map codes from the cmap table to glyph names, this will be used to
+        # reconstruct character_map for the subset font
+        charset_map = {code:self.cff.charset.safe_lookup(glyph_id) for code,
+                glyph_id in iteritems(character_map)}
+        charset = set(itervalues(charset_map))
+        charset.discard(None)
+        if not charset and character_map:
+            raise NoGlyphs('This font has no glyphs for the specified characters')
+        charset |= {
+            self.cff.charset.safe_lookup(glyph_id) for glyph_id in extra_glyphs}
+        charset.discard(None)
+        s = Subset(self.cff, charset)
+
+        # Rebuild character_map with the glyph ids from the subset font
+        character_map.clear()
+        for code, charname in iteritems(charset_map):
+            glyph_id = s.charname_map.get(charname, None)
+            if glyph_id:
+                character_map[code] = glyph_id
+
+        # Check that raw is parseable
+        CFF(s.raw)
+
+        self.raw = s.raw
--- a/ebook_converter/utils/fonts/sfnt/cmap.py
+++ b/ebook_converter/utils/fonts/sfnt/cmap.py
@@ -0,0 +1,290 @@
+#!/usr/bin/env python2
+# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:fdm=marker:ai
+from __future__ import absolute_import, division, print_function, unicode_literals
+
+__license__   = 'GPL v3'
+__copyright__ = '2012, Kovid Goyal <kovid at kovidgoyal.net>'
+__docformat__ = 'restructuredtext en'
+
+# Note that the code for creating a BMP table (cmap format 4) is taken with
+# thanks from the fonttools project (BSD licensed).
+
+from struct import unpack_from, calcsize, pack
+from collections import OrderedDict
+
+from calibre.utils.fonts.utils import read_bmp_prefix
+from calibre.utils.fonts.sfnt import UnknownTable, max_power_of_two
+from calibre.utils.fonts.sfnt.errors import UnsupportedFont
+from polyglot.builtins import range
+
+
+def split_range(start_code, end_code, cmap):  # {{{
+    # Try to split a range of character codes into subranges with consecutive
+    # glyph IDs in such a way that the cmap4 subtable can be stored "most"
+    # efficiently.
+    if start_code == end_code:
+        return [], [end_code]
+
+    last_id = cmap[start_code]
+    last_code = start_code
+    in_order = None
+    ordered_begin = None
+    sub_ranges = []
+
+    # Gather subranges in which the glyph IDs are consecutive.
+    for code in range(start_code + 1, end_code + 1):
+        glyph_id = cmap[code]
+
+        if glyph_id - 1 == last_id:
+            if in_order is None or not in_order:
+                in_order = 1
+                ordered_begin = last_code
+        else:
+            if in_order:
+                in_order = 0
+                sub_ranges.append((ordered_begin, last_code))
+                ordered_begin = None
+
+        last_id = glyph_id
+        last_code = code
+
+    if in_order:
+        sub_ranges.append((ordered_begin, last_code))
+    assert last_code == end_code
+
+    # Now filter out those new subranges that would only make the data bigger.
+    # A new segment cost 8 bytes, not using a new segment costs 2 bytes per
+    # character.
+    new_ranges = []
+    for b, e in sub_ranges:
+        if b == start_code and e == end_code:
+            break  # the whole range, we're fine
+        if b == start_code or e == end_code:
+            threshold = 4  # split costs one more segment
+        else:
+            threshold = 8  # split costs two more segments
+        if (e - b + 1) > threshold:
+            new_ranges.append((b, e))
+    sub_ranges = new_ranges
+
+    if not sub_ranges:
+        return [], [end_code]
+
+    if sub_ranges[0][0] != start_code:
+        sub_ranges.insert(0, (start_code, sub_ranges[0][0] - 1))
+    if sub_ranges[-1][1] != end_code:
+        sub_ranges.append((sub_ranges[-1][1] + 1, end_code))
+
+    # Fill the "holes" in the segments list -- those are the segments in which
+    # the glyph IDs are _not_ consecutive.
+    i = 1
+    while i < len(sub_ranges):
+        if sub_ranges[i-1][1] + 1 != sub_ranges[i][0]:
+            sub_ranges.insert(i, (sub_ranges[i-1][1] + 1, sub_ranges[i][0] - 1))
+            i = i + 1
+        i = i + 1
+
+    # Transform the ranges into start_code/end_code lists.
+    start = []
+    end = []
+    for b, e in sub_ranges:
+        start.append(b)
+        end.append(e)
+    start.pop(0)
+
+    assert len(start) + 1 == len(end)
+    return start, end
+# }}}
+
+
+def set_id_delta(id_delta):  # {{{
+    # The lowest gid in glyphIndexArray, after subtracting id_delta, must be 1.
+    # id_delta is a short, and must be between -32K and 32K
+    # startCode can be between 0 and 64K-1, and the first glyph index can be between 1 and 64K-1
+    # This means that we have a problem because we can need to assign to
+    # id_delta values
+    # between -(64K-2) and 64K -1.
+    # Since the final gi is reconstructed from the glyphArray GID by:
+    #    (short)finalGID = (gid +  id_delta) % 0x10000),
+    # we can get from a startCode of 0 to a final GID of 64 -1K by subtracting 1, and casting the
+    # negative number to an unsigned short.
+    # Similarly , we can get from a startCode of 64K-1 to a final GID of 1 by adding 2, because of
+    # the modulo arithmetic.
+
+    if id_delta > 0x7FFF:
+        id_delta = id_delta - 0x10000
+    elif id_delta <  -0x7FFF:
+        id_delta = id_delta + 0x10000
+
+    return id_delta
+# }}}
+
+
+class BMPTable(object):
+
+    def __init__(self, raw):
+        self.raw = raw
+        (self.start_count, self.end_count, self.range_offset, self.id_delta,
+         self.glyph_id_len, self.glyph_id_map, self.array_len) = \
+                read_bmp_prefix(raw, 0)
+
+    def get_glyph_ids(self, codes):
+        for code in codes:
+            found = False
+            for i, ec in enumerate(self.end_count):
+                if ec >= code:
+                    sc = self.start_count[i]
+                    if sc <= code:
+                        found = True
+                        ro = self.range_offset[i]
+                        if ro == 0:
+                            glyph_id = self.id_delta[i] + code
+                        else:
+                            idx = ro//2 + (code - sc) + i - self.array_len
+                            glyph_id = self.glyph_id_map[idx]
+                            if glyph_id != 0:
+                                glyph_id += self.id_delta[i]
+                        yield glyph_id % 0x10000
+                        break
+            if not found:
+                yield 0
+
+    def get_glyph_map(self, glyph_ids):
+        ans = {}
+        for i, ec in enumerate(self.end_count):
+            sc = self.start_count[i]
+            for code in range(sc, ec+1):
+                ro = self.range_offset[i]
+                if ro == 0:
+                    glyph_id = self.id_delta[i] + code
+                else:
+                    idx = ro//2 + (code - sc) + i - self.array_len
+                    glyph_id = self.glyph_id_map[idx]
+                    if glyph_id != 0:
+                        glyph_id += self.id_delta[i]
+                glyph_id %= 0x10000
+                if glyph_id in glyph_ids and code not in ans:
+                    ans[code] = glyph_id
+        return ans
+
+
+class CmapTable(UnknownTable):
+
+    def __init__(self, *args, **kwargs):
+        super(CmapTable, self).__init__(*args, **kwargs)
+
+        self.version, self.num_tables = unpack_from(b'>HH', self.raw)
+
+        self.tables = {}
+
+        offset = 4
+        sz = calcsize(b'>HHL')
+        recs = []
+        for i in range(self.num_tables):
+            platform, encoding, table_offset = unpack_from(b'>HHL', self.raw,
+                    offset)
+            offset += sz
+            recs.append((platform, encoding, table_offset))
+
+        self.bmp_table = None
+
+        for i in range(len(recs)):
+            platform, encoding, offset = recs[i]
+            try:
+                next_offset = recs[i+1][-1]
+            except IndexError:
+                next_offset = len(self.raw)
+            table = self.raw[offset:next_offset]
+            if table:
+                fmt = unpack_from(b'>H', table)[0]
+                if platform == 3 and encoding == 1 and fmt == 4:
+                    self.bmp_table = BMPTable(table)
+
+    def get_character_map(self, chars):
+        '''
+        Get a mapping of character codes to glyph ids in the font.
+        '''
+        if self.bmp_table is None:
+            raise UnsupportedFont('This font has no Windows BMP cmap subtable.'
+                    ' Most likely a special purpose font.')
+        chars = sorted(set(chars))
+        ans = OrderedDict()
+        for i, glyph_id in enumerate(self.bmp_table.get_glyph_ids(chars)):
+            if glyph_id > 0:
+                ans[chars[i]] = glyph_id
+        return ans
+
+    def get_glyph_map(self, glyph_ids):
+        '''
+        Get a mapping of character codes to glyph ids for the specified glyph
+        ids.
+        '''
+        if self.bmp_table is None:
+            raise UnsupportedFont('This font has no Windows BMP cmap subtable.'
+                    ' Most likely a special purpose font.')
+        glyph_ids = frozenset(glyph_ids)
+        return self.bmp_table.get_glyph_map(glyph_ids)
+
+    def set_character_map(self, cmap):
+        self.version, self.num_tables = 0, 1
+        fmt = b'>7H'
+        codes = sorted(cmap)
+
+        if not codes:
+            start_code = [0xffff]
+            end_code = [0xffff]
+        else:
+            last_code = codes[0]
+            end_code = []
+            start_code = [last_code]
+
+            for code in codes[1:]:
+                if code == last_code + 1:
+                    last_code = code
+                    continue
+                start, end = split_range(start_code[-1], last_code, cmap)
+                start_code.extend(start)
+                end_code.extend(end)
+                start_code.append(code)
+                last_code = code
+            end_code.append(last_code)
+            start_code.append(0xffff)
+            end_code.append(0xffff)
+
+        id_delta = []
+        id_range_offset = []
+        glyph_index_array = []
+        for i in range(len(end_code)-1):  # skip the closing codes (0xffff)
+            indices = list(cmap[char_code] for char_code in range(start_code[i], end_code[i] + 1))
+            if indices == list(range(indices[0], indices[0] + len(indices))):
+                # indices is a contiguous list
+                id_delta_temp = set_id_delta(indices[0] - start_code[i])
+                id_delta.append(id_delta_temp)
+                id_range_offset.append(0)
+            else:
+                id_delta.append(0)
+                id_range_offset.append(2 * (len(end_code) + len(glyph_index_array) - i))
+                glyph_index_array.extend(indices)
+        id_delta.append(1)  # 0xffff + 1 == 0. So this end code maps to .notdef
+        id_range_offset.append(0)
+
+        seg_count = len(end_code)
+        max_exponent = max_power_of_two(seg_count)
+        search_range = 2 * (2 ** max_exponent)
+        entry_selector = max_exponent
+        range_shift = 2 * seg_count - search_range
+
+        char_code_array = end_code + [0] + start_code
+        char_code_array = pack(b'>%dH'%len(char_code_array), *char_code_array)
+        id_delta_array = pack(b'>%dh'%len(id_delta), *id_delta)
+        rest_array = id_range_offset + glyph_index_array
+        rest_array = pack(b'>%dH'%len(rest_array), *rest_array)
+        data = char_code_array + id_delta_array + rest_array
+
+        length = calcsize(fmt) + len(data)
+        header = pack(fmt, 4, length, 0, 2*seg_count, search_range, entry_selector, range_shift)
+        self.bmp_table = header + data
+
+        fmt = b'>4HL'
+        offset = calcsize(fmt)
+        self.raw = pack(fmt, self.version, self.num_tables, 3, 1, offset) + self.bmp_table
--- a/ebook_converter/utils/fonts/sfnt/common.py
+++ b/ebook_converter/utils/fonts/sfnt/common.py
@@ -0,0 +1,252 @@
+#!/usr/bin/env python2
+# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:fdm=marker:ai
+from __future__ import absolute_import, division, print_function, unicode_literals
+
+__license__   = 'GPL v3'
+__copyright__ = '2012, Kovid Goyal <kovid at kovidgoyal.net>'
+__docformat__ = 'restructuredtext en'
+
+from struct import unpack_from, calcsize
+from collections import OrderedDict, namedtuple
+
+from calibre.utils.fonts.sfnt.errors import UnsupportedFont
+from polyglot.builtins import range, iteritems
+
+
+class Unpackable(object):
+
+    def __init__(self, raw, offset):
+        self.raw, self.offset = raw, offset
+        self.start_pos = offset
+
+    def unpack(self, fmt, single_special=True):
+        fmt = fmt.encode('ascii') if not isinstance(fmt, bytes) else fmt
+        ans = unpack_from(b'>'+fmt, self.raw, self.offset)
+        if single_special and len(ans) == 1:
+            ans = ans[0]
+        self.offset += calcsize(fmt)
+        return ans
+
+
+class SimpleListTable(list):
+
+    'A table that contains a list of subtables'
+
+    child_class = None
+
+    def __init__(self, raw, offset):
+        list.__init__(self)
+
+        data = Unpackable(raw, offset)
+        self.read_extra_header(data)
+
+        count = data.unpack('H')
+        for i in range(count):
+            offset = data.unpack('H')
+            self.append(self.child_class(raw, data.start_pos + offset))
+        self.read_extra_footer(data)
+
+    def read_extra_header(self, data):
+        pass
+
+    def read_extra_footer(self, data):
+        pass
+
+
+class ListTable(OrderedDict):
+
+    'A table that contains an ordered mapping of table tag to subtable'
+
+    child_class = None
+
+    def __init__(self, raw, offset):
+        OrderedDict.__init__(self)
+
+        data = Unpackable(raw, offset)
+        self.read_extra_header(data)
+
+        count = data.unpack('H')
+        for i in range(count):
+            tag, coffset = data.unpack('4sH')
+            self[tag] = self.child_class(raw, data.start_pos + coffset)
+
+        self.read_extra_footer(data)
+
+    def read_extra_header(self, data):
+        pass
+
+    def read_extra_footer(self, data):
+        pass
+
+    def dump(self, prefix=''):
+        print(prefix, self.__class__.__name__, sep='')
+        prefix += '  '
+        for tag, child in iteritems(self):
+            print(prefix, tag, sep='')
+            child.dump(prefix=prefix+'  ')
+
+
+class IndexTable(list):
+
+    def __init__(self, raw, offset):
+        data = Unpackable(raw, offset)
+        self.read_extra_header(data)
+
+        count = data.unpack('H')
+        for i in range(count):
+            self.append(data.unpack('H'))
+
+    def read_extra_header(self, data):
+        pass
+
+    def dump(self, prefix=''):
+        print(prefix, self.__class__.__name__, sep='')
+
+
+class LanguageSystemTable(IndexTable):
+
+    def read_extra_header(self, data):
+        self.lookup_order, self.required_feature_index = data.unpack('2H')
+        if self.lookup_order != 0:
+            raise UnsupportedFont('This LanguageSystemTable has an unknown'
+                    ' lookup order: 0x%x'%self.lookup_order)
+
+
+class ScriptTable(ListTable):
+
+    child_class = LanguageSystemTable
+
+    def __init__(self, raw, offset):
+        ListTable.__init__(self, raw, offset)
+
+    def read_extra_header(self, data):
+        start_pos = data.offset
+        default_offset = data.unpack('H')
+        self[b'default'] = (LanguageSystemTable(data.raw, start_pos +
+            default_offset) if default_offset else None)
+
+
+class ScriptListTable(ListTable):
+
+    child_class = ScriptTable
+
+
+class FeatureTable(IndexTable):
+
+    def read_extra_header(self, data):
+        self.feature_params = data.unpack('H')
+        if False and self.feature_params != 0:
+            # Source code pro sets this to non NULL
+            raise UnsupportedFont(
+                'This FeatureTable has non NULL FeatureParams: 0x%x'%self.feature_params)
+
+
+class FeatureListTable(ListTable):
+
+    child_class = FeatureTable
+
+
+class LookupTable(SimpleListTable):
+
+    def read_extra_header(self, data):
+        self.lookup_type, self.lookup_flag = data.unpack('2H')
+        self.set_child_class()
+
+    def set_child_class(self):
+        raise NotImplementedError()
+
+    def read_extra_footer(self, data):
+        if self.lookup_flag & 0x0010:
+            self.mark_filtering_set = data.unpack('H')
+
+
+def ExtensionSubstitution(raw, offset, subtable_map={}):
+    data = Unpackable(raw, offset)
+    subst_format, extension_lookup_type, offset = data.unpack('2HL')
+    if subst_format != 1:
+        raise UnsupportedFont('ExtensionSubstitution has unknown format: 0x%x'%subst_format)
+    return subtable_map[extension_lookup_type](raw, offset+data.start_pos)
+
+
+CoverageRange = namedtuple('CoverageRange', 'start end start_coverage_index')
+
+
+class Coverage(object):
+
+    def __init__(self, raw, offset, parent_table_name):
+        data = Unpackable(raw, offset)
+        self.format, count = data.unpack('2H')
+
+        if self.format not in {1, 2}:
+            raise UnsupportedFont('Unknown Coverage format: 0x%x in %s'%(
+                self.format, parent_table_name))
+        if self.format == 1:
+            self.glyph_ids = data.unpack('%dH'%count, single_special=False)
+            self.glyph_ids_map = {gid:i for i, gid in
+                    enumerate(self.glyph_ids)}
+        else:
+            self.ranges = []
+            ranges = data.unpack('%dH'%(3*count), single_special=False)
+            for i in range(count):
+                start, end, start_coverage_index = ranges[i*3:(i+1)*3]
+                self.ranges.append(CoverageRange(start, end, start_coverage_index))
+
+    def coverage_indices(self, glyph_ids):
+        '''Return map of glyph_id -> coverage index. Map contains only those
+        glyph_ids that are covered by this table and that are present in
+        glyph_ids.'''
+        ans = OrderedDict()
+        for gid in glyph_ids:
+            if self.format == 1:
+                idx = self.glyph_ids_map.get(gid, None)
+                if idx is not None:
+                    ans[gid] = idx
+            else:
+                for start, end, start_coverage_index in self.ranges:
+                    if start <= gid <= end:
+                        ans[gid] = start_coverage_index + (gid-start)
+        return ans
+
+
+class UnknownLookupSubTable(object):
+
+    formats = {}
+
+    def __init__(self, raw, offset):
+        data = Unpackable(raw, offset)
+        self.format = data.unpack('H')
+        if self.format not in self.formats:
+            raise UnsupportedFont('Unknown format for Lookup Subtable %s: 0x%x'%(
+                self.__class__.__name__, self.format))
+        if self.has_initial_coverage:
+            coverage_offset = data.unpack('H') + data.start_pos
+            self.coverage = Coverage(raw, coverage_offset, self.__class__.__name__)
+        self.initialize(data)
+
+    @property
+    def has_initial_coverage(self):
+        return True
+
+    def all_substitutions(self, glyph_ids):
+        ''' Return a set of all glyph ids that could be substituted for any
+        subset of the specified glyph ids (which must be a set)'''
+        raise NotImplementedError()
+
+    def read_sets(self, data, read_item=None, set_is_index=False):
+        count = data.unpack('H')
+        sets = data.unpack('%dH'%count, single_special=False)
+        coverage_to_items_map = []
+        for offset in sets:
+            # Read items in the set
+            data.offset = start_pos = offset + data.start_pos
+            count = data.unpack('H')
+            item_offsets = data.unpack('%dH'%count, single_special=False)
+            items = []
+            for offset in item_offsets:
+                data.offset = offset + start_pos
+                if set_is_index:
+                    items.append(offset)
+                else:
+                    items.append(read_item(data))
+            coverage_to_items_map.append(items)
+        return coverage_to_items_map
--- a/ebook_converter/utils/fonts/sfnt/container.py
+++ b/ebook_converter/utils/fonts/sfnt/container.py
@@ -0,0 +1,171 @@
+#!/usr/bin/env python2
+# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:fdm=marker:ai
+# License: GPLv3 Copyright: 2012, Kovid Goyal <kovid at kovidgoyal.net>
+from __future__ import absolute_import, division, print_function, unicode_literals
+
+from collections import OrderedDict
+from io import BytesIO
+from struct import calcsize, pack
+
+from calibre.utils.fonts.sfnt import UnknownTable, align_block, max_power_of_two
+from calibre.utils.fonts.sfnt.cff.table import CFFTable
+from calibre.utils.fonts.sfnt.cmap import CmapTable
+from calibre.utils.fonts.sfnt.errors import UnsupportedFont
+from calibre.utils.fonts.sfnt.glyf import GlyfTable
+from calibre.utils.fonts.sfnt.gsub import GSUBTable
+from calibre.utils.fonts.sfnt.head import (
+    HeadTable, HorizontalHeader, OS2Table, PostTable, VerticalHeader
+)
+from calibre.utils.fonts.sfnt.kern import KernTable
+from calibre.utils.fonts.sfnt.loca import LocaTable
+from calibre.utils.fonts.sfnt.maxp import MaxpTable
+from calibre.utils.fonts.utils import checksum_of_block, get_tables, verify_checksums
+
+# OpenType spec: http://www.microsoft.com/typography/otspec/otff.htm
+
+
+class Sfnt(object):
+
+    TABLE_MAP = {
+        b'head' : HeadTable,
+        b'hhea' : HorizontalHeader,
+        b'vhea' : VerticalHeader,
+        b'maxp' : MaxpTable,
+        b'loca' : LocaTable,
+        b'glyf' : GlyfTable,
+        b'cmap' : CmapTable,
+        b'CFF ' : CFFTable,
+        b'kern' : KernTable,
+        b'GSUB' : GSUBTable,
+        b'OS/2' : OS2Table,
+        b'post' : PostTable,
+    }
+
+    def __init__(self, raw_or_get_table):
+        self.tables = {}
+        if isinstance(raw_or_get_table, bytes):
+            raw = raw_or_get_table
+            self.sfnt_version = raw[:4]
+            if self.sfnt_version not in {b'\x00\x01\x00\x00', b'OTTO', b'true',
+                    b'type1'}:
+                raise UnsupportedFont('Font has unknown sfnt version: %r'%self.sfnt_version)
+            for table_tag, table, table_index, table_offset, table_checksum in get_tables(raw):
+                self.tables[table_tag] = self.TABLE_MAP.get(
+                    table_tag, UnknownTable)(table)
+        else:
+            for table_tag in {
+                b'cmap', b'hhea', b'head', b'hmtx', b'maxp', b'name', b'OS/2',
+                b'post', b'cvt ', b'fpgm', b'glyf', b'loca', b'prep', b'CFF ',
+                b'VORG', b'EBDT', b'EBLC', b'EBSC', b'BASE', b'GSUB', b'GPOS',
+                b'GDEF', b'JSTF', b'gasp', b'hdmx', b'kern', b'LTSH', b'PCLT',
+                b'VDMX', b'vhea', b'vmtx', b'MATH'}:
+                table = bytes(raw_or_get_table(table_tag))
+                if table:
+                    self.tables[table_tag] = self.TABLE_MAP.get(
+                        table_tag, UnknownTable)(table)
+            if not self.tables:
+                raise UnsupportedFont('This font has no tables')
+            self.sfnt_version = (b'\0\x01\0\0' if b'glyf' in self.tables
+                                    else b'OTTO')
+
+    def __getitem__(self, key):
+        return self.tables[key]
+
+    def __contains__(self, key):
+        return key in self.tables
+
+    def __delitem__(self, key):
+        del self.tables[key]
+
+    def __iter__(self):
+        '''Iterate over the table tags in order.'''
+        for x in sorted(self.tables):
+            yield x
+        # Although the optimal order is not alphabetical, the OTF spec says
+        # they should be alphabetical, so we stick with that. See
+        # http://partners.adobe.com/public/developer/opentype/index_recs.html
+        # for optimal order.
+        # keys = list(self.tables)
+        # order = {x:i for i, x in enumerate((b'head', b'hhea', b'maxp', b'OS/2',
+        #     b'hmtx', b'LTSH', b'VDMX', b'hdmx', b'cmap', b'fpgm', b'prep',
+        #     b'cvt ', b'loca', b'glyf', b'CFF ', b'kern', b'name', b'post',
+        #     b'gasp', b'PCLT', b'DSIG'))}
+        # keys.sort(key=lambda x:order.get(x, 1000))
+        # for x in keys:
+        #     yield x
+
+    def pop(self, key, default=None):
+        return self.tables.pop(key, default)
+
+    def get(self, key, default=None):
+        return self.tables.get(key, default)
+
+    def sizes(self):
+        ans = OrderedDict()
+        for tag in self:
+            ans[tag] = len(self[tag])
+        return ans
+
+    def __call__(self, stream=None):
+        stream = BytesIO() if stream is None else stream
+
+        def spack(*args):
+            stream.write(pack(*args))
+
+        stream.seek(0)
+
+        # Write header
+        num_tables = len(self.tables)
+        ln2 = max_power_of_two(num_tables)
+        srange = (2**ln2) * 16
+        spack(b'>4s4H',
+            self.sfnt_version, num_tables, srange, ln2, num_tables * 16 - srange)
+
+        # Write tables
+        head_offset = None
+        table_data = []
+        offset = stream.tell() + (calcsize(b'>4s3L') * num_tables)
+        sizes = OrderedDict()
+        for tag in self:
+            table = self.tables[tag]
+            raw = table()
+            table_len = len(raw)
+            if tag == b'head':
+                head_offset = offset
+                raw = raw[:8] + b'\0\0\0\0' + raw[12:]
+            raw = align_block(raw)
+            checksum = checksum_of_block(raw)
+            spack(b'>4s3L', tag, checksum, offset, table_len)
+            offset += len(raw)
+            table_data.append(raw)
+            sizes[tag] = table_len
+
+        for x in table_data:
+            stream.write(x)
+
+        checksum = checksum_of_block(stream.getvalue())
+        q = (0xB1B0AFBA - checksum) & 0xffffffff
+        stream.seek(head_offset + 8)
+        spack(b'>L', q)
+
+        return stream.getvalue(), sizes
+
+
+def test_roundtrip(ff=None):
+    if ff is None:
+        data = P('fonts/liberation/LiberationSerif-Regular.ttf', data=True)
+    else:
+        with open(ff, 'rb') as f:
+            data = f.read()
+    rd = Sfnt(data)()[0]
+    verify_checksums(rd)
+    if data[:12] != rd[:12]:
+        raise ValueError('Roundtripping failed, font header not the same')
+    if len(data) != len(rd):
+        raise ValueError('Roundtripping failed, size different (%d vs. %d)'%
+                         (len(data), len(rd)))
+
+
+if __name__ == '__main__':
+    import sys
+    test_roundtrip(sys.argv[-1])
--- a/ebook_converter/utils/fonts/sfnt/errors.py
+++ b/ebook_converter/utils/fonts/sfnt/errors.py
@@ -0,0 +1,16 @@
+#!/usr/bin/env python2
+# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:fdm=marker:ai
+from __future__ import absolute_import, division, print_function, unicode_literals
+
+__license__   = 'GPL v3'
+__copyright__ = '2012, Kovid Goyal <kovid at kovidgoyal.net>'
+__docformat__ = 'restructuredtext en'
+
+
+class UnsupportedFont(ValueError):
+    pass
+
+
+class NoGlyphs(ValueError):
+    pass
+
--- a/ebook_converter/utils/fonts/sfnt/glyf.py
+++ b/ebook_converter/utils/fonts/sfnt/glyf.py
@@ -0,0 +1,95 @@
+#!/usr/bin/env python2
+# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:fdm=marker:ai
+from __future__ import absolute_import, division, print_function, unicode_literals
+
+__license__   = 'GPL v3'
+__copyright__ = '2012, Kovid Goyal <kovid at kovidgoyal.net>'
+__docformat__ = 'restructuredtext en'
+
+from struct import unpack_from
+from collections import OrderedDict
+
+from calibre.utils.fonts.sfnt import UnknownTable
+from polyglot.builtins import iteritems
+
+ARG_1_AND_2_ARE_WORDS      = 0x0001  # if set args are words otherwise they are bytes
+ARGS_ARE_XY_VALUES         = 0x0002  # if set args are xy values, otherwise they are points
+ROUND_XY_TO_GRID           = 0x0004  # for the xy values if above is true
+WE_HAVE_A_SCALE            = 0x0008  # Sx = Sy, otherwise scale == 1.0
+NON_OVERLAPPING            = 0x0010  # set to same value for all components (obsolete!)
+MORE_COMPONENTS            = 0x0020  # indicates at least one more glyph after this one
+WE_HAVE_AN_X_AND_Y_SCALE   = 0x0040  # Sx, Sy
+WE_HAVE_A_TWO_BY_TWO       = 0x0080  # t00, t01, t10, t11
+WE_HAVE_INSTRUCTIONS       = 0x0100  # instructions follow
+USE_MY_METRICS             = 0x0200  # apply these metrics to parent glyph
+OVERLAP_COMPOUND           = 0x0400  # used by Apple in GX fonts
+SCALED_COMPONENT_OFFSET    = 0x0800  # composite designed to have the component offset scaled (designed for Apple)
+UNSCALED_COMPONENT_OFFSET  = 0x1000  # composite designed not to have the component offset scaled (designed for MS)
+
+
+class SimpleGlyph(object):
+
+    def __init__(self, num_of_countours, raw):
+        self.num_of_countours = num_of_countours
+        self.raw = raw
+        # The list of glyph indices referred to by this glyph, will always be
+        # empty for a simple glyph and not empty for a composite glyph
+        self.glyph_indices = []
+        self.is_composite = False
+
+    def __len__(self):
+        return len(self.raw)
+
+    def __call__(self):
+        return self.raw
+
+
+class CompositeGlyph(SimpleGlyph):
+
+    def __init__(self, num_of_countours, raw):
+        super(CompositeGlyph, self).__init__(num_of_countours, raw)
+        self.is_composite = True
+
+        flags = MORE_COMPONENTS
+        offset = 10
+        while flags & MORE_COMPONENTS:
+            flags, glyph_index = unpack_from(b'>HH', raw, offset)
+            self.glyph_indices.append(glyph_index)
+            offset += 4
+            if flags & ARG_1_AND_2_ARE_WORDS:
+                offset += 4
+            else:
+                offset += 2
+            if flags & WE_HAVE_A_SCALE:
+                offset += 2
+            elif flags & WE_HAVE_AN_X_AND_Y_SCALE:
+                offset += 4
+            elif flags & WE_HAVE_A_TWO_BY_TWO:
+                offset += 8
+
+
+class GlyfTable(UnknownTable):
+
+    def glyph_data(self, offset, length, as_raw=False):
+        raw = self.raw[offset:offset+length]
+        if as_raw:
+            return raw
+        num_of_countours = unpack_from(b'>h', raw)[0] if raw else 0
+        if num_of_countours >= 0:
+            return SimpleGlyph(num_of_countours, raw)
+        return CompositeGlyph(num_of_countours, raw)
+
+    def update(self, sorted_glyph_map):
+        ans = OrderedDict()
+        offset = 0
+        block = []
+        for glyph_id, glyph in iteritems(sorted_glyph_map):
+            raw = glyph()
+            pad = 4 - (len(raw) % 4)
+            if pad < 4:
+                raw += b'\0' * pad
+            ans[glyph_id] = offset, len(raw)
+            offset += len(raw)
+            block.append(raw)
+        self.raw = b''.join(block)
+        return ans
--- a/ebook_converter/utils/fonts/sfnt/gsub.py
+++ b/ebook_converter/utils/fonts/sfnt/gsub.py
@@ -0,0 +1,191 @@
+#!/usr/bin/env python2
+# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:fdm=marker:ai
+from __future__ import absolute_import, division, print_function, unicode_literals
+
+__license__   = 'GPL v3'
+__copyright__ = '2012, Kovid Goyal <kovid at kovidgoyal.net>'
+__docformat__ = 'restructuredtext en'
+
+from struct import unpack_from
+from functools import partial
+
+from calibre.utils.fonts.sfnt import UnknownTable, FixedProperty
+from calibre.utils.fonts.sfnt.errors import UnsupportedFont
+from calibre.utils.fonts.sfnt.common import (ScriptListTable, FeatureListTable,
+        SimpleListTable, LookupTable, ExtensionSubstitution,
+        UnknownLookupSubTable)
+from polyglot.builtins import iteritems, itervalues
+
+
+class SingleSubstitution(UnknownLookupSubTable):
+
+    formats = {1, 2}
+
+    def initialize(self, data):
+        if self.format == 1:
+            self.delta = data.unpack('h')
+        else:
+            count = data.unpack('H')
+            self.substitutes = data.unpack('%dH'%count, single_special=False)
+
+    def all_substitutions(self, glyph_ids):
+        gid_index_map = self.coverage.coverage_indices(glyph_ids)
+        if self.format == 1:
+            return {gid + self.delta for gid in gid_index_map}
+        return {self.substitutes[i] for i in itervalues(gid_index_map)}
+
+
+class MultipleSubstitution(UnknownLookupSubTable):
+
+    formats = {1}
+
+    def initialize(self, data):
+        self.coverage_to_subs_map = self.read_sets(data, set_is_index=True)
+
+    def all_substitutions(self, glyph_ids):
+        gid_index_map = self.coverage.coverage_indices(glyph_ids)
+        ans = set()
+        for index in itervalues(gid_index_map):
+            glyphs = set(self.coverage_to_subs_map[index])
+            ans |= glyphs
+        return ans
+
+
+class AlternateSubstitution(MultipleSubstitution):
+    pass
+
+
+class LigatureSubstitution(UnknownLookupSubTable):
+
+    formats = {1}
+
+    def initialize(self, data):
+        self.coverage_to_lig_map = self.read_sets(data, self.read_ligature)
+
+    def read_ligature(self, data):
+        lig_glyph, count = data.unpack('HH')
+        components = data.unpack('%dH'%(count-1), single_special=False)
+        return (lig_glyph, components)
+
+    def all_substitutions(self, glyph_ids):
+        gid_index_map = self.coverage.coverage_indices(glyph_ids)
+        ans = set()
+        for start_glyph_id, index in iteritems(gid_index_map):
+            for glyph_id, components in self.coverage_to_lig_map[index]:
+                components = (start_glyph_id,) + components
+                if set(components).issubset(glyph_ids):
+                    ans.add(glyph_id)
+        return ans
+
+
+class ContexttualSubstitution(UnknownLookupSubTable):
+
+    formats = {1, 2, 3}
+
+    @property
+    def has_initial_coverage(self):
+        return self.format != 3
+
+    def initialize(self, data):
+        pass  # TODO
+
+    def all_substitutions(self, glyph_ids):
+        # This table only defined substitution in terms of other tables
+        return set()
+
+
+class ChainingContextualSubstitution(UnknownLookupSubTable):
+
+    formats = {1, 2, 3}
+
+    @property
+    def has_initial_coverage(self):
+        return self.format != 3
+
+    def initialize(self, data):
+        pass  # TODO
+
+    def all_substitutions(self, glyph_ids):
+        # This table only defined substitution in terms of other tables
+        return set()
+
+
+class ReverseChainSingleSubstitution(UnknownLookupSubTable):
+
+    formats = {1}
+
+    def initialize(self, data):
+        backtrack_count = data.unpack('H')
+        backtrack_offsets = data.unpack('%dH'%backtrack_count,
+                single_special=False)
+        lookahead_count = data.unpack('H')
+        lookahead_offsets = data.unpack('%dH'%lookahead_count,
+                single_special=False)
+        backtrack_offsets = [data.start_pos + x for x in backtrack_offsets]
+        lookahead_offsets = [data.start_pos + x for x in lookahead_offsets]
+        backtrack_offsets, lookahead_offsets  # TODO: Use these
+        count = data.unpack('H')
+        self.substitutes = data.unpack('%dH'%count)
+
+    def all_substitutions(self, glyph_ids):
+        gid_index_map = self.coverage.coverage_indices(glyph_ids)
+        return {self.substitutes[i] for i in itervalues(gid_index_map)}
+
+
+subtable_map = {
+        1: SingleSubstitution,
+        2: MultipleSubstitution,
+        3: AlternateSubstitution,
+        4: LigatureSubstitution,
+        5: ContexttualSubstitution,
+        6: ChainingContextualSubstitution,
+        8: ReverseChainSingleSubstitution,
+}
+
+
+class GSUBLookupTable(LookupTable):
+
+    def set_child_class(self):
+        if self.lookup_type == 7:
+            self.child_class = partial(ExtensionSubstitution,
+                    subtable_map=subtable_map)
+        else:
+            self.child_class = subtable_map[self.lookup_type]
+
+
+class LookupListTable(SimpleListTable):
+
+    child_class = GSUBLookupTable
+
+
+class GSUBTable(UnknownTable):
+
+    version = FixedProperty('_version')
+
+    def decompile(self):
+        (self._version, self.scriptlist_offset, self.featurelist_offset,
+                self.lookuplist_offset) = unpack_from(b'>L3H', self.raw)
+        if self._version != 0x10000:
+            raise UnsupportedFont('The GSUB table has unknown version: 0x%x'%
+                    self._version)
+
+        self.script_list_table = ScriptListTable(self.raw,
+                self.scriptlist_offset)
+        # self.script_list_table.dump()
+
+        self.feature_list_table = FeatureListTable(self.raw,
+                self.featurelist_offset)
+        # self.feature_list_table.dump()
+
+        self.lookup_list_table = LookupListTable(self.raw,
+                self.lookuplist_offset)
+
+    def all_substitutions(self, glyph_ids):
+        glyph_ids = frozenset(glyph_ids)
+        ans = set(glyph_ids)
+        for lookup_table in self.lookup_list_table:
+            for subtable in lookup_table:
+                glyphs = subtable.all_substitutions(ans)
+                if glyphs:
+                    ans |= glyphs
+        return ans - {glyph_ids}
--- a/ebook_converter/utils/fonts/sfnt/head.py
+++ b/ebook_converter/utils/fonts/sfnt/head.py
@@ -0,0 +1,213 @@
+#!/usr/bin/env python2
+# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:fdm=marker:ai
+from __future__ import absolute_import, division, print_function, unicode_literals
+
+__license__   = 'GPL v3'
+__copyright__ = '2012, Kovid Goyal <kovid at kovidgoyal.net>'
+__docformat__ = 'restructuredtext en'
+
+from struct import unpack_from, pack, calcsize
+
+from calibre.utils.fonts.sfnt import UnknownTable, DateTimeProperty, FixedProperty
+from calibre.utils.fonts.sfnt.errors import UnsupportedFont
+from calibre.utils.fonts.sfnt.loca import read_array
+from polyglot.builtins import zip
+
+
+class HeadTable(UnknownTable):
+
+    created = DateTimeProperty('_created')
+    modified = DateTimeProperty('_modified')
+    version_number = FixedProperty('_version_number')
+    font_revision = FixedProperty('_font_revision')
+
+    def __init__(self, *args, **kwargs):
+        super(HeadTable, self).__init__(*args, **kwargs)
+
+        field_types = (
+                '_version_number' , 'l',
+                '_font_revision'  , 'l',
+                'checksum_adjustment' , 'L',
+                'magic_number' , 'L',
+                'flags' , 'H',
+                'units_per_em' , 'H',
+                '_created' , 'q',
+                '_modified' , 'q',
+                'x_min' , 'h',
+                'y_min' , 'h',
+                'x_max' , 'h',
+                'y_max' , 'h',
+                'mac_style' , 'H',
+                'lowest_rec_ppem' , 'H',
+                'font_direction_hint' , 'h',
+                'index_to_loc_format' , 'h',
+                'glyph_data_format'   , 'h'
+        )
+
+        self._fmt = ('>%s'%(''.join(field_types[1::2]))).encode('ascii')
+        self._fields = field_types[0::2]
+
+        for f, val in zip(self._fields, unpack_from(self._fmt, self.raw)):
+            setattr(self, f, val)
+
+    def update(self):
+        vals = [getattr(self, f) for f in self._fields]
+        self.raw = pack(self._fmt, *vals)
+
+
+class HorizontalHeader(UnknownTable):
+
+    version_number = FixedProperty('_version_number')
+
+    def read_data(self, hmtx):
+        if hasattr(self, 'ascender'):
+            return
+        field_types = (
+            '_version_number' , 'l',
+            'ascender', 'h',
+            'descender', 'h',
+            'line_gap', 'h',
+            'advance_width_max', 'H',
+            'min_left_side_bearing', 'h',
+            'min_right_side_bearing', 'h',
+            'x_max_extent', 'h',
+            'caret_slope_rise', 'h',
+            'caret_slop_run', 'h',
+            'caret_offset', 'h',
+            'r1', 'h',
+            'r2', 'h',
+            'r3', 'h',
+            'r4', 'h',
+            'metric_data_format', 'h',
+            'number_of_h_metrics', 'H',
+        )
+
+        self._fmt = ('>%s'%(''.join(field_types[1::2]))).encode('ascii')
+        self._fields = field_types[0::2]
+
+        for f, val in zip(self._fields, unpack_from(self._fmt, self.raw)):
+            setattr(self, f, val)
+
+        raw = hmtx.raw
+        num = self.number_of_h_metrics
+        if len(raw) < 4*num:
+            raise UnsupportedFont('The hmtx table has insufficient data')
+        long_hor_metric = raw[:4*num]
+        a = read_array(long_hor_metric)
+        self.advance_widths = a[0::2]
+        a = read_array(long_hor_metric, 'h')
+        self.left_side_bearings = a[1::2]
+
+
+class VerticalHeader(UnknownTable):
+
+    version_number = FixedProperty('_version_number')
+
+    def read_data(self, vmtx):
+        if hasattr(self, 'ascender'):
+            return
+        field_types = (
+            '_version_number' , 'l',
+            'ascender', 'h',
+            'descender', 'h',
+            'line_gap', 'h',
+            'advance_height_max', 'H',
+            'min_top_side_bearing', 'h',
+            'min_bottom_side_bearing', 'h',
+            'y_max_extent', 'h',
+            'caret_slope_rise', 'h',
+            'caret_slop_run', 'h',
+            'caret_offset', 'h',
+            'r1', 'h',
+            'r2', 'h',
+            'r3', 'h',
+            'r4', 'h',
+            'metric_data_format', 'h',
+            'number_of_v_metrics', 'H',
+        )
+
+        self._fmt = ('>%s'%(''.join(field_types[1::2]))).encode('ascii')
+        self._fields = field_types[0::2]
+
+        for f, val in zip(self._fields, unpack_from(self._fmt, self.raw)):
+            setattr(self, f, val)
+
+        raw = vmtx.raw
+        num = self.number_of_v_metrics
+        if len(raw) < 4*num:
+            raise UnsupportedFont('The vmtx table has insufficient data')
+        long_hor_metric = raw[:4*num]
+        long_hor_metric = raw[:4*num]
+        a = read_array(long_hor_metric)
+        self.advance_heights = a[0::2]
+        a = read_array(long_hor_metric, 'h')
+        self.top_side_bearings = a[1::2]
+
+
+class OS2Table(UnknownTable):
+
+    def read_data(self):
+        if hasattr(self, 'char_width'):
+            return
+        ver, = unpack_from(b'>H', self.raw)
+        field_types = [
+            'version' , 'H',
+            'average_char_width', 'h',
+            'weight_class', 'H',
+            'width_class', 'H',
+            'fs_type', 'H',
+            'subscript_x_size', 'h',
+            'subscript_y_size', 'h',
+            'subscript_x_offset', 'h',
+            'subscript_y_offset', 'h',
+            'superscript_x_size', 'h',
+            'superscript_y_size', 'h',
+            'superscript_x_offset', 'h',
+            'superscript_y_offset', 'h',
+            'strikeout_size', 'h',
+            'strikeout_position', 'h',
+            'family_class', 'h',
+            'panose', '10s',
+            'ranges', '16s',
+            'vendor_id', '4s',
+            'selection', 'H',
+            'first_char_index', 'H',
+            'last_char_index', 'H',
+            'typo_ascender', 'h',
+            'typo_descender', 'h',
+            'typo_line_gap', 'h',
+            'win_ascent', 'H',
+            'win_descent', 'H',
+        ]
+        if ver > 1:
+            field_types += [
+                'code_page_range', '8s',
+                'x_height', 'h',
+                'cap_height', 'h',
+                'default_char', 'H',
+                'break_char', 'H',
+                'max_context', 'H',
+            ]
+
+        self._fmt = ('>%s'%(''.join(field_types[1::2]))).encode('ascii')
+        self._fields = field_types[0::2]
+
+        for f, val in zip(self._fields, unpack_from(self._fmt, self.raw)):
+            setattr(self, f, val)
+
+    def zero_fstype(self):
+        prefix = calcsize(b'>HhHH')
+        self.raw = self.raw[:prefix] + b'\0\0' + self.raw[prefix+2:]
+        self.fs_type = 0
+
+
+class PostTable(UnknownTable):
+
+    version_number = FixedProperty('_version')
+    italic_angle = FixedProperty('_italic_angle')
+
+    def read_data(self):
+        if hasattr(self, 'underline_position'):
+            return
+        (self._version, self._italic_angle, self.underline_position,
+         self.underline_thickness) = unpack_from(b'>llhh', self.raw)
--- a/ebook_converter/utils/fonts/sfnt/kern.py
+++ b/ebook_converter/utils/fonts/sfnt/kern.py
@@ -0,0 +1,89 @@
+#!/usr/bin/env python2
+# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:fdm=marker:ai
+from __future__ import absolute_import, division, print_function, unicode_literals
+
+__license__   = 'GPL v3'
+__copyright__ = '2012, Kovid Goyal <kovid at kovidgoyal.net>'
+__docformat__ = 'restructuredtext en'
+
+from struct import unpack_from, calcsize, pack, error as struct_error
+
+from calibre.utils.fonts.sfnt import (UnknownTable, FixedProperty,
+        max_power_of_two)
+from calibre.utils.fonts.sfnt.errors import UnsupportedFont
+from polyglot.builtins import range
+
+
+class KernTable(UnknownTable):
+
+    version = FixedProperty('_version')
+
+    def __init__(self, *args, **kwargs):
+        super(KernTable, self).__init__(*args, **kwargs)
+        self._version, self.num_tables = unpack_from(b'>HH', self.raw)
+        if self._version == 1 and len(self.raw) >= 8:
+            self._version, self.num_tables = unpack_from(b'>LL', self.raw)
+        self.headerfmt = b'>HH' if self._version == 0 else b'>LL'
+
+    def restrict_to_glyphs(self, glyph_ids):
+        if self._version not in {0, 0x10000}:
+            raise UnsupportedFont('kern table has version: %x'%self._version)
+        offset = 4 if (self._version == 0) else 8
+        tables = []
+        for i in range(self.num_tables):
+            if self._version == 0:
+                version, length, coverage = unpack_from(b'>3H', self.raw, offset)
+                table_format = version
+            else:
+                length, coverage = unpack_from(b'>LH', self.raw, offset)
+                table_format = coverage & 0xff
+            raw = self.raw[offset:offset+length]
+            if table_format == 0:
+                raw = self.restrict_format_0(raw, glyph_ids)
+                if not raw:
+                    continue
+            tables.append(raw)
+            offset += length
+        self.raw = pack(self.headerfmt, self._version, len(tables)) + b''.join(tables)
+
+    def restrict_format_0(self, raw, glyph_ids):
+        if self._version == 0:
+            version, length, coverage, npairs = unpack_from(b'>4H', raw)
+            headerfmt = b'>3H'
+        else:
+            length, coverage, tuple_index, npairs = unpack_from(b'>L3H', raw)
+            headerfmt = b'>L2H'
+
+        offset = calcsize(headerfmt + b'4H')
+        entries = []
+        entrysz = calcsize(b'>2Hh')
+        for i in range(npairs):
+            try:
+                left, right, value = unpack_from(b'>2Hh', raw, offset)
+            except struct_error:
+                offset = len(raw)
+                break  # Buggy kern table
+            if left in glyph_ids and right in glyph_ids:
+                entries.append(pack(b'>2Hh', left, right, value))
+            offset += entrysz
+
+        if offset != len(raw):
+            raise UnsupportedFont('This font has extra data at the end of'
+                    ' a Format 0 kern subtable')
+
+        npairs = len(entries)
+        if npairs == 0:
+            return b''
+
+        entry_selector = max_power_of_two(npairs)
+        search_range = (2 ** entry_selector) * 6
+        range_shift = (npairs - (2 ** entry_selector)) * 6
+
+        entries = b''.join(entries)
+        length = calcsize(headerfmt + b'4H') + len(entries)
+        if self._version == 0:
+            header = pack(headerfmt, version, length, coverage)
+        else:
+            header = pack(headerfmt, length, coverage, tuple_index)
+        return header + pack(b'>4H', npairs, search_range, entry_selector,
+                range_shift) + entries
--- a/ebook_converter/utils/fonts/sfnt/loca.py
+++ b/ebook_converter/utils/fonts/sfnt/loca.py
@@ -0,0 +1,91 @@
+#!/usr/bin/env python2
+# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:fdm=marker:ai
+from __future__ import absolute_import, division, print_function, unicode_literals
+
+__license__   = 'GPL v3'
+__copyright__ = '2012, Kovid Goyal <kovid at kovidgoyal.net>'
+__docformat__ = 'restructuredtext en'
+
+import array, sys
+from operator import itemgetter
+from itertools import repeat
+
+from calibre.utils.fonts.sfnt import UnknownTable
+from polyglot.builtins import iteritems, range
+
+
+def four_byte_type_code():
+    for c in 'IL':
+        a = array.array(c)
+        if a.itemsize == 4:
+            return c
+
+
+def read_array(data, fmt='H'):
+    ans = array.array(fmt, data)
+    if sys.byteorder != 'big':
+        ans.byteswap()
+    return ans
+
+
+class LocaTable(UnknownTable):
+
+    def load_offsets(self, head_table, maxp_table):
+        fmt = 'H' if head_table.index_to_loc_format == 0 else four_byte_type_code()
+        locs = read_array(self.raw, fmt)
+        self.offset_map = locs.tolist()
+        if fmt == 'H':
+            self.offset_map = [2*i for i in self.offset_map]
+        self.fmt = fmt
+
+    def glyph_location(self, glyph_id):
+        offset = self.offset_map[glyph_id]
+        next_offset = self.offset_map[glyph_id+1]
+        return offset, next_offset - offset
+
+    def update(self, resolved_glyph_map):
+        '''
+        Update this table to contain pointers only to the glyphs in
+        resolved_glyph_map which must be a map of glyph_ids to (offset, sz)
+        Note that the loca table is generated for all glyphs from 0 to the
+        largest glyph that is either in resolved_glyph_map or was present
+        originally. The pointers to glyphs that have no data will be set to
+        zero. This preserves glyph ids.
+        '''
+        current_max_glyph_id = len(self.offset_map) - 2
+        max_glyph_id = max(resolved_glyph_map or (0,))
+        max_glyph_id = max(max_glyph_id, current_max_glyph_id)
+        self.offset_map = list(repeat(0, max_glyph_id + 2))
+        glyphs = [(glyph_id, x[0], x[1]) for glyph_id, x in
+                    iteritems(resolved_glyph_map)]
+        glyphs.sort(key=itemgetter(1))
+        for glyph_id, offset, sz in glyphs:
+            self.offset_map[glyph_id] = offset
+            self.offset_map[glyph_id+1] = offset + sz
+        # Fix all zero entries to be the same as the previous entry, which
+        # means that if the ith entry is zero, the i-1 glyph is not present.
+        for i in range(1, len(self.offset_map)):
+            if self.offset_map[i] == 0:
+                self.offset_map[i] = self.offset_map[i-1]
+
+        vals = self.offset_map
+        max_offset = max(vals) if vals else 0
+        if max_offset < 0x20000 and all(l % 2 == 0 for l in vals):
+            self.fmt = 'H'
+            vals = array.array(self.fmt, (i // 2 for i in vals))
+        else:
+            self.fmt = four_byte_type_code()
+            vals = array.array(self.fmt, vals)
+
+        if sys.byteorder != "big":
+            vals.byteswap()
+        self.raw = vals.tostring()
+    subset = update
+
+    def dump_glyphs(self, sfnt):
+        if not hasattr(self, 'offset_map'):
+            self.load_offsets(sfnt[b'head'], sfnt[b'maxp'])
+        for i in range(len(self.offset_map)-1):
+            off, noff = self.offset_map[i], self.offset_map[i+1]
+            if noff != off:
+                print('Glyph id:', i, 'size:', noff-off)
--- a/ebook_converter/utils/fonts/sfnt/maxp.py
+++ b/ebook_converter/utils/fonts/sfnt/maxp.py
@@ -0,0 +1,46 @@
+#!/usr/bin/env python2
+# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:fdm=marker:ai
+from __future__ import absolute_import, division, print_function, unicode_literals
+
+__license__   = 'GPL v3'
+__copyright__ = '2012, Kovid Goyal <kovid at kovidgoyal.net>'
+__docformat__ = 'restructuredtext en'
+
+from struct import unpack_from, pack
+
+from calibre.utils.fonts.sfnt import UnknownTable, FixedProperty
+from calibre.utils.fonts.sfnt.errors import UnsupportedFont
+from polyglot.builtins import zip
+
+
+class MaxpTable(UnknownTable):
+
+    version = FixedProperty('_version')
+
+    def __init__(self, *args, **kwargs):
+        super(MaxpTable, self).__init__(*args, **kwargs)
+
+        self._fmt = b'>lH'
+        self._version, self.num_glyphs = unpack_from(self._fmt, self.raw)
+        self.fields = ('_version', 'num_glyphs')
+
+        if self.version > 1.0:
+            raise UnsupportedFont('This font has a maxp table with version: %s'
+                    %self.version)
+        if self.version == 1.0:
+            self.fields = ('_version', 'num_glyphs', 'max_points',
+                    'max_contours', 'max_composite_points',
+                    'max_composite_contours', 'max_zones',
+                    'max_twilight_points', 'max_storage', 'max_function_defs',
+                    'max_instruction_defs', 'max_stack_elements',
+                    'max_size_of_instructions', 'max_component_elements',
+                    'max_component_depth')
+            self._fmt = b'>lH' + b'H'*(len(self.fields)-2)
+
+            vals = unpack_from(self._fmt, self.raw)
+            for f, val in zip(self.fields, vals):
+                setattr(self, f, val)
+
+    def update(self):
+        vals = [getattr(self, f) for f in self.fields]
+        self.raw = pack(self._fmt, *vals)
--- a/ebook_converter/utils/fonts/sfnt/subset.py
+++ b/ebook_converter/utils/fonts/sfnt/subset.py
@@ -0,0 +1,380 @@
+#!/usr/bin/env python2
+# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:fdm=marker:ai
+from __future__ import absolute_import, division, print_function, unicode_literals
+
+__license__   = 'GPL v3'
+__copyright__ = '2012, Kovid Goyal <kovid at kovidgoyal.net>'
+__docformat__ = 'restructuredtext en'
+
+import traceback
+from collections import OrderedDict
+from operator import itemgetter
+from functools import partial
+
+from calibre.utils.icu import safe_chr, ord_string
+from calibre.utils.fonts.sfnt.container import Sfnt
+from calibre.utils.fonts.sfnt.errors import UnsupportedFont, NoGlyphs
+from polyglot.builtins import unicode_type, range, iteritems, itervalues, map
+
+# TrueType outlines {{{
+
+
+def resolve_glyphs(loca, glyf, character_map, extra_glyphs):
+    unresolved_glyphs = set(itervalues(character_map)) | extra_glyphs
+    unresolved_glyphs.add(0)  # We always want the .notdef glyph
+    resolved_glyphs = {}
+
+    while unresolved_glyphs:
+        glyph_id = unresolved_glyphs.pop()
+        try:
+            offset, length = loca.glyph_location(glyph_id)
+        except (IndexError, ValueError, KeyError, TypeError):
+            continue
+        glyph = glyf.glyph_data(offset, length)
+        resolved_glyphs[glyph_id] = glyph
+        for gid in glyph.glyph_indices:
+            if gid not in resolved_glyphs:
+                unresolved_glyphs.add(gid)
+
+    return OrderedDict(sorted(iteritems(resolved_glyphs), key=itemgetter(0)))
+
+
+def subset_truetype(sfnt, character_map, extra_glyphs):
+    loca = sfnt[b'loca']
+    glyf = sfnt[b'glyf']
+
+    try:
+        head, maxp = sfnt[b'head'], sfnt[b'maxp']
+    except KeyError:
+        raise UnsupportedFont('This font does not contain head and/or maxp tables')
+    loca.load_offsets(head, maxp)
+
+    resolved_glyphs = resolve_glyphs(loca, glyf, character_map, extra_glyphs)
+    if not resolved_glyphs or set(resolved_glyphs) == {0}:
+        raise NoGlyphs('This font has no glyphs for the specified character '
+                'set, subsetting it is pointless')
+
+    # Keep only character codes that have resolved glyphs
+    for code, glyph_id in tuple(iteritems(character_map)):
+        if glyph_id not in resolved_glyphs:
+            del character_map[code]
+
+    # Update the glyf table
+    glyph_offset_map = glyf.update(resolved_glyphs)
+
+    # Update the loca table
+    loca.subset(glyph_offset_map)
+    head.index_to_loc_format = 0 if loca.fmt == 'H' else 1
+    head.update()
+    maxp.num_glyphs = len(loca.offset_map) - 1
+
+# }}}
+
+
+def subset_postscript(sfnt, character_map, extra_glyphs):
+    cff = sfnt[b'CFF ']
+    cff.decompile()
+    cff.subset(character_map, extra_glyphs)
+
+
+def do_warn(warnings, *args):
+    for arg in args:
+        for line in arg.splitlines():
+            if warnings is None:
+                print(line)
+            else:
+                warnings.append(line)
+    if warnings is None:
+        print()
+    else:
+        warnings.append('')
+
+
+def pdf_subset(sfnt, glyphs):
+    for tag in tuple(sfnt.tables):
+        if tag not in {b'hhea', b'head', b'hmtx', b'maxp',
+                       b'OS/2', b'post', b'cvt ', b'fpgm', b'glyf', b'loca',
+                       b'prep', b'CFF ', b'VORG'}:
+            # Remove non core tables since they are unused in PDF rendering
+            del sfnt[tag]
+    if b'loca' in sfnt and b'glyf' in sfnt:
+        # TrueType Outlines
+        subset_truetype(sfnt, {}, glyphs)
+    elif b'CFF ' in sfnt:
+        # PostScript Outlines
+        subset_postscript(sfnt, {}, glyphs)
+    else:
+        raise UnsupportedFont('This font does not contain TrueType '
+                'or PostScript outlines')
+
+
+def safe_ord(x):
+    return ord_string(unicode_type(x))[0]
+
+
+def subset(raw, individual_chars, ranges=(), warnings=None):
+    warn = partial(do_warn, warnings)
+
+    chars = set(map(safe_ord, individual_chars))
+    for r in ranges:
+        chars |= set(range(safe_ord(r[0]), safe_ord(r[1])+1))
+
+    # Always add the space character for ease of use from the command line
+    if safe_ord(' ') not in chars:
+        chars.add(safe_ord(' '))
+
+    sfnt = Sfnt(raw)
+    old_sizes = sfnt.sizes()
+
+    # Remove the Digital Signature table since it is useless in a subset
+    # font anyway
+    sfnt.pop(b'DSIG', None)
+
+    # Remove non core tables as they aren't likely to be used by renderers
+    # anyway
+    core_tables = {b'cmap', b'hhea', b'head', b'hmtx', b'maxp', b'name',
+            b'OS/2', b'post', b'cvt ', b'fpgm', b'glyf', b'loca', b'prep',
+            b'CFF ', b'VORG', b'EBDT', b'EBLC', b'EBSC', b'BASE', b'GSUB',
+            b'GPOS', b'GDEF', b'JSTF', b'gasp', b'hdmx', b'kern', b'LTSH',
+            b'PCLT', b'VDMX', b'vhea', b'vmtx', b'MATH'}
+    for tag in list(sfnt):
+        if tag not in core_tables:
+            del sfnt[tag]
+
+    try:
+        cmap = sfnt[b'cmap']
+    except KeyError:
+        raise UnsupportedFont('This font has no cmap table')
+
+    # Get mapping of chars to glyph ids for all specified chars
+    character_map = cmap.get_character_map(chars)
+
+    extra_glyphs = set()
+
+    if b'GSUB' in sfnt:
+        # Parse all substitution rules to ensure that glyphs that can be
+        # substituted for the specified set of glyphs are not removed
+        gsub = sfnt[b'GSUB']
+        try:
+            gsub.decompile()
+            extra_glyphs = gsub.all_substitutions(itervalues(character_map))
+        except UnsupportedFont as e:
+            warn('Usupported GSUB table: %s'%e)
+        except Exception:
+            warn('Failed to decompile GSUB table:', traceback.format_exc())
+
+    if b'loca' in sfnt and b'glyf' in sfnt:
+        # TrueType Outlines
+        subset_truetype(sfnt, character_map, extra_glyphs)
+    elif b'CFF ' in sfnt:
+        # PostScript Outlines
+        subset_postscript(sfnt, character_map, extra_glyphs)
+    else:
+        raise UnsupportedFont('This font does not contain TrueType '
+                'or PostScript outlines')
+
+    # Restrict the cmap table to only contain entries for the resolved glyphs
+    cmap.set_character_map(character_map)
+
+    if b'kern' in sfnt:
+        try:
+            sfnt[b'kern'].restrict_to_glyphs(frozenset(itervalues(character_map)))
+        except UnsupportedFont as e:
+            warn('kern table unsupported, ignoring: %s'%e)
+        except Exception:
+            warn('Subsetting of kern table failed, ignoring:',
+                    traceback.format_exc())
+
+    raw, new_sizes = sfnt()
+    return raw, old_sizes, new_sizes
+
+# CLI {{{
+
+
+def option_parser():
+    import textwrap
+    from calibre.utils.config import OptionParser
+    parser = OptionParser(usage=textwrap.dedent('''\
+            %prog [options] input_font_file output_font_file characters_to_keep
+
+            Subset the specified font, keeping only the glyphs for the characters in
+            characters_to_keep. characters_to_keep is a comma separated list of characters of
+            the form: a,b,c,A-Z,0-9,xyz
+
+            You can specify ranges in the list of characters, as shown above.
+            '''))
+    parser.add_option('-c', '--codes', default=False, action='store_true',
+            help='If specified, the list of characters is interpreted as '
+            'numeric unicode codes instead of characters. So to specify the '
+            'characters a,b you would use 97,98 or U+0061,U+0062')
+    parser.prog = 'subset-font'
+    return parser
+
+
+def print_stats(old_stats, new_stats):
+    from calibre import prints
+    prints('========= Table comparison (original vs. subset) =========')
+    prints('Table', ' ', '%10s'%'Size', '  ', 'Percent', '   ', '%10s'%'New Size',
+            ' New Percent')
+    prints('='*80)
+    old_total = sum(itervalues(old_stats))
+    new_total = sum(itervalues(new_stats))
+    tables = sorted(old_stats, key=lambda x:old_stats[x],
+            reverse=True)
+    for table in tables:
+        osz = old_stats[table]
+        op = osz/old_total * 100
+        nsz = new_stats.get(table, 0)
+        np = nsz/new_total * 100
+        suffix = ' | same size'
+        if nsz != osz:
+            suffix = ' | reduced to %.1f %%'%(nsz/osz * 100)
+        prints('%4s'%table, '  ', '%10s'%osz, '  ', '%5.1f %%'%op, '   ',
+                '%10s'%nsz, '  ', '%5.1f %%'%np, suffix)
+    prints('='*80)
+
+
+def main(args):
+    import sys, time
+    from calibre import prints
+    parser = option_parser()
+    opts, args = parser.parse_args(args)
+    if len(args) < 4 or len(args) > 4:
+        parser.print_help()
+        raise SystemExit(1)
+    iff, off, chars = args[1:]
+    with open(iff, 'rb') as f:
+        orig = f.read()
+
+    chars = [x for x in chars.split(',')]
+    individual, ranges = set(), set()
+
+    def not_single(c):
+        if len(c) > 1:
+            prints(c, 'is not a single character', file=sys.stderr)
+            raise SystemExit(1)
+
+    def conv_code(c):
+        if c.upper()[:2] in ('U+', '0X'):
+            c = int(c[2:], 16)
+        return safe_chr(int(c))
+
+    for c in chars:
+        if '-' in c:
+            parts = [x.strip() for x in c.split('-')]
+            if len(parts) != 2:
+                prints('Invalid range:', c, file=sys.stderr)
+                raise SystemExit(1)
+            if opts.codes:
+                parts = tuple(map(conv_code, parts))
+            tuple(map(not_single, parts))
+            ranges.add(tuple(parts))
+        else:
+            if opts.codes:
+                c = conv_code(c)
+            not_single(c)
+            individual.add(c)
+    st = time.time()
+    sf, old_stats, new_stats = subset(orig, individual, ranges)
+    taken = time.time() - st
+    reduced = (len(sf)/len(orig)) * 100
+
+    def sz(x):
+        return '%gKB'%(len(x)/1024.)
+    print_stats(old_stats, new_stats)
+    prints('Original size:', sz(orig), 'Subset size:', sz(sf), 'Reduced to: %g%%'%(reduced))
+    prints('Subsetting took %g seconds'%taken)
+    with open(off, 'wb') as f:
+        f.write(sf)
+    prints('Subset font written to:', off)
+
+
+if __name__ == '__main__':
+    try:
+        import init_calibre
+        init_calibre
+    except ImportError:
+        pass
+    import sys
+    main(sys.argv)
+# }}}
+
+# Tests {{{
+
+
+def test_mem():
+    from calibre.utils.mem import memory
+    import gc
+    gc.collect()
+    start_mem = memory()
+    raw = P('fonts/liberation/LiberationSerif-Regular.ttf', data=True)
+    calls = 1000
+    for i in range(calls):
+        subset(raw, (), (('a', 'z'),))
+    del raw
+    for i in range(3):
+        gc.collect()
+    print('Leaked memory per call:', (memory() - start_mem)/calls*1024, 'KB')
+
+
+def test():
+    raw = P('fonts/liberation/LiberationSerif-Regular.ttf', data=True)
+    sf, old_stats, new_stats = subset(raw, set(('a', 'b', 'c')), ())
+    if len(sf) > 0.3 * len(raw):
+        raise Exception('Subsetting failed')
+
+
+def all():
+    from calibre.utils.fonts.scanner import font_scanner
+    failed = []
+    unsupported = []
+    warnings = {}
+    total = 0
+    averages = []
+    for family in font_scanner.find_font_families():
+        for font in font_scanner.fonts_for_family(family):
+            raw = font_scanner.get_font_data(font)
+            print('Subsetting', font['full_name'], end='\t')
+            total += 1
+            try:
+                w = []
+                sf, old_stats, new_stats = subset(raw, set(('a', 'b', 'c')),
+                        (), w)
+                if w:
+                    warnings[font['full_name'] + ' (%s)'%font['path']] = w
+            except NoGlyphs:
+                print('No glyphs!')
+                continue
+            except UnsupportedFont as e:
+                unsupported.append((font['full_name'], font['path'], unicode_type(e)))
+                print('Unsupported!')
+                continue
+            except Exception as e:
+                print('Failed!')
+                failed.append((font['full_name'], font['path'], unicode_type(e)))
+            else:
+                averages.append(sum(itervalues(new_stats))/sum(itervalues(old_stats)) * 100)
+                print('Reduced to:', '%.1f'%averages[-1] , '%')
+    if unsupported:
+        print('\n\nUnsupported:')
+        for name, path, err in unsupported:
+            print(name, path, err)
+            print()
+    if warnings:
+        print('\n\nWarnings:')
+    for name, w in iteritems(warnings):
+        if w:
+            print(name)
+            print('', '\n\t'.join(w), sep='\t')
+    if failed:
+        print('\n\nFailures:')
+        for name, path, err in failed:
+            print(name, path, err)
+            print()
+
+    print('Average reduction to: %.1f%%'%(sum(averages)/len(averages)))
+    print('Total:', total, 'Unsupported:', len(unsupported), 'Failed:',
+            len(failed), 'Warnings:', len(warnings))
+
+
+# }}}