Initial import

2026-04-04 20:03:34 +02:00 · 2020-03-31 17:15:23 +02:00
commit d97ea9b0bc
311 changed files with 131419 additions and 0 deletions
--- a/ebook_converter/utils/fonts/init.py
+++ b/ebook_converter/utils/fonts/init.py
@@ -0,0 +1,7 @@
+#!/usr/bin/env python2
+# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
+from __future__ import absolute_import, division, print_function, unicode_literals
+
+__license__   = 'GPL v3'
+__copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>'
+__docformat__ = 'restructuredtext en'
--- a/ebook_converter/utils/fonts/metadata.py
+++ b/ebook_converter/utils/fonts/metadata.py
@@ -0,0 +1,122 @@
+#!/usr/bin/env python2
+# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:fdm=marker:ai
+from __future__ import absolute_import, division, print_function, unicode_literals
+
+__license__   = 'GPL v3'
+__copyright__ = '2012, Kovid Goyal <kovid at kovidgoyal.net>'
+__docformat__ = 'restructuredtext en'
+
+from io import BytesIO
+from struct import calcsize, unpack, unpack_from
+from collections import namedtuple
+
+from calibre.utils.fonts.utils import get_font_names2, get_font_characteristics
+from polyglot.builtins import range, unicode_type
+
+
+class UnsupportedFont(ValueError):
+    pass
+
+
+FontCharacteristics = namedtuple('FontCharacteristics',
+    'weight, is_italic, is_bold, is_regular, fs_type, panose, width, is_oblique, is_wws, os2_version')
+FontNames = namedtuple('FontNames',
+    'family_name, subfamily_name, full_name, preferred_family_name, preferred_subfamily_name, wws_family_name, wws_subfamily_name')
+
+
+class FontMetadata(object):
+
+    def __init__(self, bytes_or_stream):
+        if not hasattr(bytes_or_stream, 'read'):
+            bytes_or_stream = BytesIO(bytes_or_stream)
+        f = bytes_or_stream
+        f.seek(0)
+        header = f.read(4)
+        if header not in {b'\x00\x01\x00\x00', b'OTTO'}:
+            raise UnsupportedFont('Not a supported sfnt variant')
+
+        self.is_otf = header == b'OTTO'
+        self.read_table_metadata(f)
+        self.read_names(f)
+        self.read_characteristics(f)
+
+        f.seek(0)
+        self.font_family = self.names.family_name
+        wt = self.characteristics.weight
+        if wt == 400:
+            wt = 'normal'
+        elif wt == 700:
+            wt = 'bold'
+        else:
+            wt = unicode_type(wt)
+        self.font_weight = wt
+
+        self.font_stretch = ('ultra-condensed', 'extra-condensed',
+                'condensed', 'semi-condensed', 'normal', 'semi-expanded',
+                'expanded', 'extra-expanded', 'ultra-expanded')[
+                        self.characteristics.width-1]
+        if self.characteristics.is_oblique:
+            self.font_style = 'oblique'
+        elif self.characteristics.is_italic:
+            self.font_style = 'italic'
+        else:
+            self.font_style = 'normal'
+
+    def read_table_metadata(self, f):
+        f.seek(4)
+        num_tables = unpack(b'>H', f.read(2))[0]
+        # Start of table record entries
+        f.seek(4 + 4*2)
+        table_record = b'>4s3L'
+        sz = calcsize(table_record)
+        self.tables = {}
+        block = f.read(sz * num_tables)
+        for i in range(num_tables):
+            table_tag, table_checksum, table_offset, table_length = \
+                    unpack_from(table_record, block, i*sz)
+            self.tables[table_tag.lower()] = (table_offset, table_length,
+                    table_checksum)
+
+    def read_names(self, f):
+        if b'name' not in self.tables:
+            raise UnsupportedFont('This font has no name table')
+        toff, tlen = self.tables[b'name'][:2]
+        f.seek(toff)
+        table = f.read(tlen)
+        if len(table) != tlen:
+            raise UnsupportedFont('This font has a name table of incorrect length')
+        vals = get_font_names2(table, raw_is_table=True)
+        self.names = FontNames(*vals)
+
+    def read_characteristics(self, f):
+        if b'os/2' not in self.tables:
+            raise UnsupportedFont('This font has no OS/2 table')
+        toff, tlen = self.tables[b'os/2'][:2]
+        f.seek(toff)
+        table = f.read(tlen)
+        if len(table) != tlen:
+            raise UnsupportedFont('This font has an OS/2 table of incorrect length')
+        vals = get_font_characteristics(table, raw_is_table=True)
+        self.characteristics = FontCharacteristics(*vals)
+
+    def to_dict(self):
+        ans = {
+                'is_otf':self.is_otf,
+                'font-family':self.font_family,
+                'font-weight':self.font_weight,
+                'font-style':self.font_style,
+                'font-stretch':self.font_stretch
+        }
+        for f in self.names._fields:
+            ans[f] = getattr(self.names, f)
+        for f in self.characteristics._fields:
+            ans[f] = getattr(self.characteristics, f)
+        return ans
+
+
+if __name__ == '__main__':
+    import sys
+    with open(sys.argv[-1], 'rb') as f:
+        fm = FontMetadata(f)
+        import pprint
+        pprint.pprint(fm.to_dict())
--- a/ebook_converter/utils/fonts/scanner.py
+++ b/ebook_converter/utils/fonts/scanner.py
@@ -0,0 +1,412 @@
+#!/usr/bin/env python2
+# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:fdm=marker:ai
+from __future__ import absolute_import, division, print_function, unicode_literals
+
+__license__   = 'GPL v3'
+__copyright__ = '2012, Kovid Goyal <kovid at kovidgoyal.net>'
+__docformat__ = 'restructuredtext en'
+
+import os
+from collections import defaultdict
+from threading import Thread
+
+from calibre import walk, prints, as_unicode
+from calibre.constants import (config_dir, iswindows, isosx, plugins, DEBUG,
+        isworker, filesystem_encoding)
+from calibre.utils.fonts.metadata import FontMetadata, UnsupportedFont
+from calibre.utils.icu import sort_key
+from polyglot.builtins import itervalues, unicode_type, filter
+
+
+class NoFonts(ValueError):
+    pass
+
+# Font dirs {{{
+
+
+def default_font_dirs():
+    return [
+        '/opt/share/fonts',
+        '/usr/share/fonts',
+        '/usr/local/share/fonts',
+        os.path.expanduser('~/.local/share/fonts'),
+        os.path.expanduser('~/.fonts')
+    ]
+
+
+def fc_list():
+    import ctypes
+    from ctypes.util import find_library
+
+    lib = find_library('fontconfig')
+    if lib is None:
+        return default_font_dirs()
+    try:
+        lib = ctypes.CDLL(lib)
+    except:
+        return default_font_dirs()
+
+    prototype = ctypes.CFUNCTYPE(ctypes.c_void_p, ctypes.c_void_p)
+    try:
+        get_font_dirs = prototype(('FcConfigGetFontDirs', lib))
+    except (AttributeError):
+        return default_font_dirs()
+    prototype = ctypes.CFUNCTYPE(ctypes.c_char_p, ctypes.c_void_p)
+    try:
+        next_dir = prototype(('FcStrListNext', lib))
+    except (AttributeError):
+        return default_font_dirs()
+
+    prototype = ctypes.CFUNCTYPE(None, ctypes.c_void_p)
+    try:
+        end = prototype(('FcStrListDone', lib))
+    except (AttributeError):
+        return default_font_dirs()
+
+    str_list = get_font_dirs(ctypes.c_void_p())
+    if not str_list:
+        return default_font_dirs()
+
+    ans = []
+    while True:
+        d = next_dir(str_list)
+        if not d:
+            break
+        if d:
+            try:
+                ans.append(d.decode(filesystem_encoding))
+            except ValueError:
+                prints('Ignoring undecodeable font path: %r' % d)
+                continue
+    end(str_list)
+    if len(ans) < 3:
+        return default_font_dirs()
+
+    parents, visited = [], set()
+    for f in ans:
+        path = os.path.normpath(os.path.abspath(os.path.realpath(f)))
+        if path == '/':
+            continue
+        head, tail = os.path.split(path)
+        while head and tail:
+            if head in visited:
+                break
+            head, tail = os.path.split(head)
+        else:
+            parents.append(path)
+            visited.add(path)
+    return parents
+
+
+def font_dirs():
+    if iswindows:
+        winutil, err = plugins['winutil']
+        if err:
+            raise RuntimeError('Failed to load winutil: %s'%err)
+        try:
+            return [winutil.special_folder_path(winutil.CSIDL_FONTS)]
+        except ValueError:
+            return [r'C:\Windows\Fonts']
+    if isosx:
+        return [
+                '/Library/Fonts',
+                '/System/Library/Fonts',
+                '/usr/share/fonts',
+                '/var/root/Library/Fonts',
+                os.path.expanduser('~/.fonts'),
+                os.path.expanduser('~/Library/Fonts'),
+                ]
+    return fc_list()
+# }}}
+
+# Build font family maps {{{
+
+
+def font_priority(font):
+    '''
+    Try to ensure that  the "Regular" face is the first font for a given
+    family.
+    '''
+    style_normal = font['font-style'] == 'normal'
+    width_normal = font['font-stretch'] == 'normal'
+    weight_normal = font['font-weight'] == 'normal'
+    num_normal = sum(filter(None, (style_normal, width_normal,
+        weight_normal)))
+    subfamily_name = (font['wws_subfamily_name'] or
+            font['preferred_subfamily_name'] or font['subfamily_name'])
+    if num_normal == 3 and subfamily_name == 'Regular':
+        return 0
+    if num_normal == 3:
+        return 1
+    if subfamily_name == 'Regular':
+        return 2
+    return 3 + (3 - num_normal)
+
+
+def path_significance(path, folders):
+    path = os.path.normcase(os.path.abspath(path))
+    for i, q in enumerate(folders):
+        if path.startswith(q):
+            return i
+    return -1
+
+
+def build_families(cached_fonts, folders, family_attr='font-family'):
+    families = defaultdict(list)
+    for f in itervalues(cached_fonts):
+        if not f:
+            continue
+        lf = icu_lower(f.get(family_attr) or '')
+        if lf:
+            families[lf].append(f)
+
+    for fonts in itervalues(families):
+        # Look for duplicate font files and choose the copy that is from a
+        # more significant font directory (prefer user directories over
+        # system directories).
+        fmap = {}
+        remove = []
+        for f in fonts:
+            fingerprint = (icu_lower(f['font-family']), f['font-weight'],
+                    f['font-stretch'], f['font-style'])
+            if fingerprint in fmap:
+                opath = fmap[fingerprint]['path']
+                npath = f['path']
+                if path_significance(npath, folders) >= path_significance(opath, folders):
+                    remove.append(fmap[fingerprint])
+                    fmap[fingerprint] = f
+                else:
+                    remove.append(f)
+            else:
+                fmap[fingerprint] = f
+        for font in remove:
+            fonts.remove(font)
+        fonts.sort(key=font_priority)
+
+    font_family_map = dict.copy(families)
+    font_families = tuple(sorted((f[0]['font-family'] for f in
+            itervalues(font_family_map)), key=sort_key))
+    return font_family_map, font_families
+# }}}
+
+
+class FontScanner(Thread):
+
+    CACHE_VERSION = 2
+
+    def __init__(self, folders=[], allowed_extensions={'ttf', 'otf'}):
+        Thread.__init__(self)
+        self.folders = folders + font_dirs() + [os.path.join(config_dir, 'fonts'),
+                P('fonts/liberation')]
+        self.folders = [os.path.normcase(os.path.abspath(f)) for f in
+                self.folders]
+        self.font_families = ()
+        self.allowed_extensions = allowed_extensions
+
+    # API {{{
+    def find_font_families(self):
+        self.join()
+        return self.font_families
+
+    def fonts_for_family(self, family):
+        '''
+        Return a list of the faces belonging to the specified family. The first
+        face is the "Regular" face of family. Each face is a dictionary with
+        many keys, the most important of which are: path, font-family,
+        font-weight, font-style, font-stretch. The font-* properties follow the
+        CSS 3 Fonts specification.
+        '''
+        self.join()
+        try:
+            return self.font_family_map[icu_lower(family)]
+        except KeyError:
+            raise NoFonts('No fonts found for the family: %r'%family)
+
+    def legacy_fonts_for_family(self, family):
+        '''
+        Return a simple set of regular, bold, italic and bold-italic faces for
+        the specified family. Returns a dictionary with each element being a
+        2-tuple of (path to font, full font name) and the keys being: normal,
+        bold, italic, bi.
+        '''
+        ans = {}
+        try:
+            faces = self.fonts_for_family(family)
+        except NoFonts:
+            return ans
+        for i, face in enumerate(faces):
+            if i == 0:
+                key = 'normal'
+            elif face['font-style'] in {'italic', 'oblique'}:
+                key = 'bi' if face['font-weight'] == 'bold' else 'italic'
+            elif face['font-weight'] == 'bold':
+                key = 'bold'
+            else:
+                continue
+            ans[key] = (face['path'], face['full_name'])
+        return ans
+
+    def get_font_data(self, font_or_path):
+        path = font_or_path
+        if isinstance(font_or_path, dict):
+            path = font_or_path['path']
+        with lopen(path, 'rb') as f:
+            return f.read()
+
+    def find_font_for_text(self, text, allowed_families={'serif', 'sans-serif'},
+            preferred_families=('serif', 'sans-serif', 'monospace', 'cursive', 'fantasy')):
+        '''
+        Find a font on the system capable of rendering the given text.
+
+        Returns a font family (as given by fonts_for_family()) that has a
+        "normal" font and that can render the supplied text. If no such font
+        exists, returns None.
+
+        :return: (family name, faces) or None, None
+        '''
+        from calibre.utils.fonts.utils import (supports_text,
+                panose_to_css_generic_family, get_printable_characters)
+        if not isinstance(text, unicode_type):
+            raise TypeError(u'%r is not unicode'%text)
+        text = get_printable_characters(text)
+        found = {}
+
+        def filter_faces(font):
+            try:
+                raw = self.get_font_data(font)
+                return supports_text(raw, text)
+            except:
+                pass
+            return False
+
+        for family in self.find_font_families():
+            faces = list(filter(filter_faces, self.fonts_for_family(family)))
+            if not faces:
+                continue
+            generic_family = panose_to_css_generic_family(faces[0]['panose'])
+            if generic_family in allowed_families or generic_family == preferred_families[0]:
+                return (family, faces)
+            elif generic_family not in found:
+                found[generic_family] = (family, faces)
+
+        for f in preferred_families:
+            if f in found:
+                return found[f]
+        return None, None
+    # }}}
+
+    def reload_cache(self):
+        if not hasattr(self, 'cache'):
+            from calibre.utils.config import JSONConfig
+            self.cache = JSONConfig('fonts/scanner_cache')
+        else:
+            self.cache.refresh()
+        if self.cache.get('version', None) != self.CACHE_VERSION:
+            self.cache.clear()
+        self.cached_fonts = self.cache.get('fonts', {})
+
+    def run(self):
+        self.do_scan()
+
+    def do_scan(self):
+        self.reload_cache()
+
+        if isworker:
+            # Dont scan font files in worker processes, use whatever is
+            # cached. Font files typically dont change frequently enough to
+            # justify a rescan in a worker process.
+            self.build_families()
+            return
+
+        cached_fonts = self.cached_fonts.copy()
+        self.cached_fonts.clear()
+        for folder in self.folders:
+            if not os.path.isdir(folder):
+                continue
+            try:
+                files = tuple(walk(folder))
+            except EnvironmentError as e:
+                if DEBUG:
+                    prints('Failed to walk font folder:', folder,
+                            as_unicode(e))
+                continue
+            for candidate in files:
+                if (candidate.rpartition('.')[-1].lower() not in self.allowed_extensions or not os.path.isfile(candidate)):
+                    continue
+                candidate = os.path.normcase(os.path.abspath(candidate))
+                try:
+                    s = os.stat(candidate)
+                except EnvironmentError:
+                    continue
+                fileid = '{0}||{1}:{2}'.format(candidate, s.st_size, s.st_mtime)
+                if fileid in cached_fonts:
+                    # Use previously cached metadata, since the file size and
+                    # last modified timestamp have not changed.
+                    self.cached_fonts[fileid] = cached_fonts[fileid]
+                    continue
+                try:
+                    self.read_font_metadata(candidate, fileid)
+                except Exception as e:
+                    if DEBUG:
+                        prints('Failed to read metadata from font file:',
+                                candidate, as_unicode(e))
+                    continue
+
+        if frozenset(cached_fonts) != frozenset(self.cached_fonts):
+            # Write out the cache only if some font files have changed
+            self.write_cache()
+
+        self.build_families()
+
+    def build_families(self):
+        self.font_family_map, self.font_families = build_families(self.cached_fonts, self.folders)
+
+    def write_cache(self):
+        with self.cache:
+            self.cache['version'] = self.CACHE_VERSION
+            self.cache['fonts'] = self.cached_fonts
+
+    def force_rescan(self):
+        self.cached_fonts = {}
+        self.write_cache()
+
+    def read_font_metadata(self, path, fileid):
+        with lopen(path, 'rb') as f:
+            try:
+                fm = FontMetadata(f)
+            except UnsupportedFont:
+                self.cached_fonts[fileid] = {}
+            else:
+                data = fm.to_dict()
+                data['path'] = path
+                self.cached_fonts[fileid] = data
+
+    def dump_fonts(self):
+        self.join()
+        for family in self.font_families:
+            prints(family)
+            for font in self.fonts_for_family(family):
+                prints('\t%s: %s'%(font['full_name'], font['path']))
+                prints(end='\t')
+                for key in ('font-stretch', 'font-weight', 'font-style'):
+                    prints('%s: %s'%(key, font[key]), end=' ')
+                prints()
+                prints('\tSub-family:', font['wws_subfamily_name'] or
+                        font['preferred_subfamily_name'] or
+                        font['subfamily_name'])
+                prints()
+            prints()
+
+
+font_scanner = FontScanner()
+font_scanner.start()
+
+
+def force_rescan():
+    font_scanner.join()
+    font_scanner.force_rescan()
+    font_scanner.run()
+
+
+if __name__ == '__main__':
+    font_scanner.dump_fonts()
--- a/ebook_converter/utils/fonts/utils.py
+++ b/ebook_converter/utils/fonts/utils.py
@@ -0,0 +1,503 @@
+#!/usr/bin/env python2
+# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:fdm=marker:ai
+from __future__ import absolute_import, division, print_function, unicode_literals
+
+__license__   = 'GPL v3'
+__copyright__ = '2012, Kovid Goyal <kovid at kovidgoyal.net>'
+__docformat__ = 'restructuredtext en'
+
+import struct
+from io import BytesIO
+from collections import defaultdict
+
+from polyglot.builtins import iteritems, itervalues, unicode_type, range, as_bytes
+
+
+class UnsupportedFont(ValueError):
+    pass
+
+
+def get_printable_characters(text):
+    import unicodedata
+    return u''.join(x for x in unicodedata.normalize('NFC', text)
+            if unicodedata.category(x)[0] not in {'C', 'Z', 'M'})
+
+
+def is_truetype_font(raw):
+    sfnt_version = raw[:4]
+    return (sfnt_version in {b'\x00\x01\x00\x00', b'OTTO'}, sfnt_version)
+
+
+def get_tables(raw):
+    num_tables = struct.unpack_from(b'>H', raw, 4)[0]
+    offset = 4*3  # start of the table record entries
+    for i in range(num_tables):
+        table_tag, table_checksum, table_offset, table_length = struct.unpack_from(
+                    b'>4s3L', raw, offset)
+        yield (table_tag, raw[table_offset:table_offset+table_length], offset,
+                table_offset, table_checksum)
+        offset += 4*4
+
+
+def get_table(raw, name):
+    ''' Get the raw table bytes for the specified table in the font '''
+    name = as_bytes(name.lower())
+    for table_tag, table, table_index, table_offset, table_checksum in get_tables(raw):
+        if table_tag.lower() == name:
+            return table, table_index, table_offset, table_checksum
+    return None, None, None, None
+
+
+def get_font_characteristics(raw, raw_is_table=False, return_all=False):
+    '''
+    Return (weight, is_italic, is_bold, is_regular, fs_type, panose, width,
+    is_oblique, is_wws). These
+    values are taken from the OS/2 table of the font. See
+    http://www.microsoft.com/typography/otspec/os2.htm for details
+    '''
+    if raw_is_table:
+        os2_table = raw
+    else:
+        os2_table = get_table(raw, 'os/2')[0]
+        if os2_table is None:
+            raise UnsupportedFont('Not a supported font, has no OS/2 table')
+
+    common_fields = b'>Hh3H11h'
+    (version, char_width, weight, width, fs_type, subscript_x_size,
+            subscript_y_size, subscript_x_offset, subscript_y_offset,
+            superscript_x_size, superscript_y_size, superscript_x_offset,
+            superscript_y_offset, strikeout_size, strikeout_position,
+            family_class) = struct.unpack_from(common_fields, os2_table)
+    offset = struct.calcsize(common_fields)
+    panose = struct.unpack_from(b'>10B', os2_table, offset)
+    offset += 10
+    (range1, range2, range3, range4) = struct.unpack_from(b'>4L', os2_table, offset)
+    offset += struct.calcsize(b'>4L')
+    vendor_id = os2_table[offset:offset+4]
+    vendor_id
+    offset += 4
+    selection, = struct.unpack_from(b'>H', os2_table, offset)
+
+    is_italic = (selection & (1 << 0)) != 0
+    is_bold = (selection & (1 << 5)) != 0
+    is_regular = (selection & (1 << 6)) != 0
+    is_wws = (selection & (1 << 8)) != 0
+    is_oblique = (selection & (1 << 9)) != 0
+    if return_all:
+        return (version, char_width, weight, width, fs_type, subscript_x_size,
+            subscript_y_size, subscript_x_offset, subscript_y_offset,
+            superscript_x_size, superscript_y_size, superscript_x_offset,
+            superscript_y_offset, strikeout_size, strikeout_position,
+            family_class, panose, selection, is_italic, is_bold, is_regular)
+
+    return weight, is_italic, is_bold, is_regular, fs_type, panose, width, is_oblique, is_wws, version
+
+
+def panose_to_css_generic_family(panose):
+    proportion = panose[3]
+    if proportion == 9:
+        return 'monospace'
+    family_type = panose[0]
+    if family_type == 3:
+        return 'cursive'
+    if family_type == 4:
+        return 'fantasy'
+    serif_style = panose[1]
+    if serif_style in (11, 12, 13):
+        return 'sans-serif'
+    return 'serif'
+
+
+def decode_name_record(recs):
+    '''
+    Get the English names of this font. See
+    http://www.microsoft.com/typography/otspec/name.htm for details.
+    '''
+    if not recs:
+        return None
+    unicode_names = {}
+    windows_names = {}
+    mac_names = {}
+    for platform_id, encoding_id, language_id, src in recs:
+        if language_id > 0x8000:
+            continue
+        if platform_id == 0:
+            if encoding_id < 4:
+                try:
+                    unicode_names[language_id] = src.decode('utf-16-be')
+                except ValueError:
+                    continue
+        elif platform_id == 1:
+            try:
+                mac_names[language_id] = src.decode('utf-8')
+            except ValueError:
+                continue
+        elif platform_id == 2:
+            codec = {0:'ascii', 1:'utf-16-be', 2:'iso-8859-1'}.get(encoding_id,
+                    None)
+            if codec is None:
+                continue
+            try:
+                unicode_names[language_id] = src.decode(codec)
+            except ValueError:
+                continue
+        elif platform_id == 3:
+            codec = {1:16, 10:32}.get(encoding_id, None)
+            if codec is None:
+                continue
+            try:
+                windows_names[language_id] = src.decode('utf-%d-be'%codec)
+            except ValueError:
+                continue
+
+    # First try the windows names
+    # First look for the US English name
+    if 1033 in windows_names:
+        return windows_names[1033]
+    # Look for some other english name variant
+    for lang in (3081, 10249, 4105, 9225, 16393, 6153, 8201, 17417, 5129,
+            13321, 18441, 7177, 11273, 2057, 12297):
+        if lang in windows_names:
+            return windows_names[lang]
+
+    # Look for Mac name
+    if 0 in mac_names:
+        return mac_names[0]
+
+    # Use unicode names
+    for val in itervalues(unicode_names):
+        return val
+
+    return None
+
+
+def _get_font_names(raw, raw_is_table=False):
+    if raw_is_table:
+        table = raw
+    else:
+        table = get_table(raw, 'name')[0]
+        if table is None:
+            raise UnsupportedFont('Not a supported font, has no name table')
+    table_type, count, string_offset = struct.unpack_from(b'>3H', table)
+
+    records = defaultdict(list)
+
+    for i in range(count):
+        try:
+            platform_id, encoding_id, language_id, name_id, length, offset = \
+                    struct.unpack_from(b'>6H', table, 6+i*12)
+        except struct.error:
+            break
+        offset += string_offset
+        src = table[offset:offset+length]
+        records[name_id].append((platform_id, encoding_id, language_id,
+            src))
+
+    return records
+
+
+def get_font_names(raw, raw_is_table=False):
+    records = _get_font_names(raw, raw_is_table)
+    family_name = decode_name_record(records[1])
+    subfamily_name = decode_name_record(records[2])
+    full_name = decode_name_record(records[4])
+
+    return family_name, subfamily_name, full_name
+
+
+def get_font_names2(raw, raw_is_table=False):
+    records = _get_font_names(raw, raw_is_table)
+
+    family_name = decode_name_record(records[1])
+    subfamily_name = decode_name_record(records[2])
+    full_name = decode_name_record(records[4])
+
+    preferred_family_name = decode_name_record(records[16])
+    preferred_subfamily_name = decode_name_record(records[17])
+
+    wws_family_name = decode_name_record(records[21])
+    wws_subfamily_name = decode_name_record(records[22])
+
+    return (family_name, subfamily_name, full_name, preferred_family_name,
+            preferred_subfamily_name, wws_family_name, wws_subfamily_name)
+
+
+def get_all_font_names(raw, raw_is_table=False):
+    records = _get_font_names(raw, raw_is_table)
+    ans = {}
+
+    for name, num in iteritems({'family_name':1, 'subfamily_name':2, 'full_name':4,
+            'preferred_family_name':16, 'preferred_subfamily_name':17,
+            'wws_family_name':21, 'wws_subfamily_name':22}):
+        try:
+            ans[name] = decode_name_record(records[num])
+        except (IndexError, KeyError, ValueError):
+            continue
+        if not ans[name]:
+            del ans[name]
+
+    for platform_id, encoding_id, language_id, src in records[6]:
+        if (platform_id, encoding_id, language_id) == (1, 0, 0):
+            try:
+                ans['postscript_name'] = src.decode('utf-8')
+                break
+            except ValueError:
+                continue
+        elif (platform_id, encoding_id, language_id) == (3, 1, 1033):
+            try:
+                ans['postscript_name'] = src.decode('utf-16-be')
+                break
+            except ValueError:
+                continue
+
+    return ans
+
+
+def checksum_of_block(raw):
+    extra = 4 - len(raw)%4
+    raw += b'\0'*extra
+    num = len(raw)//4
+    return sum(struct.unpack(b'>%dI'%num, raw)) % (1<<32)
+
+
+def verify_checksums(raw):
+    head_table = None
+    for table_tag, table, table_index, table_offset, table_checksum in get_tables(raw):
+        if table_tag.lower() == b'head':
+            version, fontrev, checksum_adj = struct.unpack_from(b'>ffL', table)
+            head_table = table
+            offset = table_offset
+            checksum = table_checksum
+        elif checksum_of_block(table) != table_checksum:
+            raise ValueError('The %r table has an incorrect checksum'%table_tag)
+
+    if head_table is not None:
+        table = head_table
+        table = table[:8] + struct.pack(b'>I', 0) + table[12:]
+        raw = raw[:offset] + table + raw[offset+len(table):]
+        # Check the checksum of the head table
+        if checksum_of_block(table) != checksum:
+            raise ValueError('Checksum of head table not correct')
+        # Check the checksum of the entire font
+        checksum = checksum_of_block(raw)
+        q = (0xB1B0AFBA - checksum) & 0xffffffff
+        if q != checksum_adj:
+            raise ValueError('Checksum of entire font incorrect')
+
+
+def set_checksum_adjustment(f):
+    offset = get_table(f.getvalue(), 'head')[2]
+    offset += 8
+    f.seek(offset)
+    f.write(struct.pack(b'>I', 0))
+    checksum = checksum_of_block(f.getvalue())
+    q = (0xB1B0AFBA - checksum) & 0xffffffff
+    f.seek(offset)
+    f.write(struct.pack(b'>I', q))
+
+
+def set_table_checksum(f, name):
+    table, table_index, table_offset, table_checksum = get_table(f.getvalue(), name)
+    checksum = checksum_of_block(table)
+    if checksum != table_checksum:
+        f.seek(table_index + 4)
+        f.write(struct.pack(b'>I', checksum))
+
+
+def remove_embed_restriction(raw):
+    ok, sig = is_truetype_font(raw)
+    if not ok:
+        raise UnsupportedFont('Not a supported font, sfnt_version: %r'%sig)
+
+    table, table_index, table_offset = get_table(raw, 'os/2')[:3]
+    if table is None:
+        raise UnsupportedFont('Not a supported font, has no OS/2 table')
+
+    fs_type_offset = struct.calcsize(b'>HhHH')
+    fs_type = struct.unpack_from(b'>H', table, fs_type_offset)[0]
+    if fs_type == 0:
+        return raw
+
+    f = BytesIO(raw)
+    f.seek(fs_type_offset + table_offset)
+    f.write(struct.pack(b'>H', 0))
+
+    set_table_checksum(f, 'os/2')
+    set_checksum_adjustment(f)
+    raw = f.getvalue()
+    verify_checksums(raw)
+    return raw
+
+
+def is_font_embeddable(raw):
+    # https://www.microsoft.com/typography/otspec/os2.htm#fst
+    ok, sig = is_truetype_font(raw)
+    if not ok:
+        raise UnsupportedFont('Not a supported font, sfnt_version: %r'%sig)
+
+    table, table_index, table_offset = get_table(raw, 'os/2')[:3]
+    if table is None:
+        raise UnsupportedFont('Not a supported font, has no OS/2 table')
+    fs_type_offset = struct.calcsize(b'>HhHH')
+    fs_type = struct.unpack_from(b'>H', table, fs_type_offset)[0]
+    if fs_type == 0 or fs_type & 0x8:
+        return True, fs_type
+    if fs_type & 1:
+        return False, fs_type
+    if fs_type & 0x200:
+        return False, fs_type
+    return True, fs_type
+
+
+def read_bmp_prefix(table, bmp):
+    length, language, segcount = struct.unpack_from(b'>3H', table, bmp+2)
+    array_len = segcount //2
+    offset = bmp + 7*2
+    array_sz = 2*array_len
+    array = b'>%dH'%array_len
+    end_count = struct.unpack_from(array, table, offset)
+    offset += array_sz + 2
+    start_count = struct.unpack_from(array, table, offset)
+    offset += array_sz
+    id_delta = struct.unpack_from(array.replace(b'H', b'h'), table, offset)
+    offset += array_sz
+    range_offset = struct.unpack_from(array, table, offset)
+    if length + bmp < offset + array_sz:
+        raise ValueError('cmap subtable length is too small')
+    glyph_id_len = (length + bmp - (offset + array_sz))//2
+    glyph_id_map = struct.unpack_from(b'>%dH'%glyph_id_len, table, offset +
+            array_sz)
+    return (start_count, end_count, range_offset, id_delta, glyph_id_len,
+            glyph_id_map, array_len)
+
+
+def get_bmp_glyph_ids(table, bmp, codes):
+    (start_count, end_count, range_offset, id_delta, glyph_id_len,
+     glyph_id_map, array_len) = read_bmp_prefix(table, bmp)
+
+    for code in codes:
+        found = False
+        for i, ec in enumerate(end_count):
+            if ec >= code:
+                sc = start_count[i]
+                if sc <= code:
+                    found = True
+                    ro = range_offset[i]
+                    if ro == 0:
+                        glyph_id = id_delta[i] + code
+                    else:
+                        idx = ro//2 + (code - sc) + i - array_len
+                        glyph_id = glyph_id_map[idx]
+                        if glyph_id != 0:
+                            glyph_id += id_delta[i]
+                    yield glyph_id % 0x10000
+                    break
+        if not found:
+            yield 0
+
+
+def get_glyph_ids(raw, text, raw_is_table=False):
+    if not isinstance(text, unicode_type):
+        raise TypeError('%r is not a unicode object'%text)
+    if raw_is_table:
+        table = raw
+    else:
+        table = get_table(raw, 'cmap')[0]
+        if table is None:
+            raise UnsupportedFont('Not a supported font, has no cmap table')
+    version, num_tables = struct.unpack_from(b'>HH', table)
+    bmp_table = None
+    for i in range(num_tables):
+        platform_id, encoding_id, offset = struct.unpack_from(b'>HHL', table,
+                4 + (i*8))
+        if platform_id == 3 and encoding_id == 1:
+            table_format = struct.unpack_from(b'>H', table, offset)[0]
+            if table_format == 4:
+                bmp_table = offset
+                break
+    if bmp_table is None:
+        raise UnsupportedFont('Not a supported font, has no format 4 cmap table')
+
+    for glyph_id in get_bmp_glyph_ids(table, bmp_table, map(ord, text)):
+        yield glyph_id
+
+
+def supports_text(raw, text, has_only_printable_chars=False):
+    if not isinstance(text, unicode_type):
+        raise TypeError('%r is not a unicode object'%text)
+    if not has_only_printable_chars:
+        text = get_printable_characters(text)
+    try:
+        for glyph_id in get_glyph_ids(raw, text):
+            if glyph_id == 0:
+                return False
+    except:
+        return False
+    return True
+
+
+def get_font_for_text(text, candidate_font_data=None):
+    ok = False
+    if candidate_font_data is not None:
+        ok = supports_text(candidate_font_data, text)
+    if not ok:
+        from calibre.utils.fonts.scanner import font_scanner
+        family, faces = font_scanner.find_font_for_text(text)
+        if faces:
+            with lopen(faces[0]['path'], 'rb') as f:
+                candidate_font_data = f.read()
+    return candidate_font_data
+
+
+def test_glyph_ids():
+    from calibre.utils.fonts.free_type import FreeType
+    data = P('fonts/liberation/LiberationSerif-Regular.ttf', data=True)
+    ft = FreeType()
+    font = ft.load_font(data)
+    text = u'诶йab'
+    ft_glyphs = tuple(font.glyph_ids(text))
+    glyphs = tuple(get_glyph_ids(data, text))
+    if ft_glyphs != glyphs:
+        raise Exception('My code and FreeType differ on the glyph ids')
+
+
+def test_supports_text():
+    data = P('fonts/calibreSymbols.otf', data=True)
+    if not supports_text(data, '.★½'):
+        raise RuntimeError('Incorrectly returning that text is not supported')
+    if supports_text(data, 'abc'):
+        raise RuntimeError('Incorrectly claiming that text is supported')
+
+
+def test_find_font():
+    from calibre.utils.fonts.scanner import font_scanner
+    abcd = '诶比西迪'
+    family = font_scanner.find_font_for_text(abcd)[0]
+    print('Family for Chinese text:', family)
+    family = font_scanner.find_font_for_text(abcd)[0]
+    abcd = 'لوحة المفاتيح العربية'
+    print('Family for Arabic text:', family)
+
+
+def test():
+    test_glyph_ids()
+    test_supports_text()
+    test_find_font()
+
+
+def main():
+    import sys, os
+    for arg in sys.argv[1:]:
+        print(os.path.basename(arg))
+        with open(arg, 'rb') as f:
+            raw = f.read()
+        print(get_font_names(raw))
+        characs = get_font_characteristics(raw)
+        print(characs)
+        print(panose_to_css_generic_family(characs[5]))
+        verify_checksums(raw)
+        remove_embed_restriction(raw)
+
+
+if __name__ == '__main__':
+    main()