Initial import

2026-03-03 23:35:48 +01:00 · 2020-03-31 17:15:23 +02:00
commit d97ea9b0bc
311 changed files with 131419 additions and 0 deletions
--- a/ebook_converter/utils/init.py
+++ b/ebook_converter/utils/init.py
@@ -0,0 +1,56 @@
+#!/usr/bin/env  python2
+
+from __future__ import absolute_import, division, print_function, unicode_literals
+
+__license__   = 'GPL v3'
+__copyright__ = '2008, Kovid Goyal kovid@kovidgoyal.net'
+__docformat__ = 'restructuredtext en'
+
+'''
+Miscelleaneous utilities.
+'''
+
+from time import time
+from polyglot.builtins import as_bytes
+
+
+def join_with_timeout(q, timeout=2):
+    ''' Join the queue q with a specified timeout. Blocks until all tasks on
+    the queue are done or times out with a runtime error. '''
+    q.all_tasks_done.acquire()
+    try:
+        endtime = time() + timeout
+        while q.unfinished_tasks:
+            remaining = endtime - time()
+            if remaining <= 0.0:
+                raise RuntimeError('Waiting for queue to clear timed out')
+            q.all_tasks_done.wait(remaining)
+    finally:
+        q.all_tasks_done.release()
+
+
+def unpickle_binary_string(data):
+    # Maintains compatibility with python's pickle module protocol version 2
+    import struct
+    PROTO, SHORT_BINSTRING, BINSTRING = b'\x80', b'U', b'T'
+    if data.startswith(PROTO + b'\x02'):
+        offset = 2
+        which = data[offset:offset+1]
+        offset += 1
+        if which == BINSTRING:
+            sz, = struct.unpack_from('<i', data, offset)
+            offset += struct.calcsize('<i')
+        elif which == SHORT_BINSTRING:
+            sz = ord(data[offset:offset+1])
+            offset += 1
+        else:
+            return
+        return data[offset:offset + sz]
+
+
+def pickle_binary_string(data):
+    # Maintains compatibility with python's pickle module protocol version 2
+    import struct
+    PROTO, STOP, BINSTRING = b'\x80', b'.', b'T'
+    data = as_bytes(data)
+    return PROTO + b'\x02' + BINSTRING + struct.pack(b'<i', len(data)) + data + STOP
--- a/ebook_converter/utils/cleantext.py
+++ b/ebook_converter/utils/cleantext.py
@@ -0,0 +1,98 @@
+#!/usr/bin/env python2
+# vim:fileencoding=utf-8
+# License: GPLv3 Copyright: 2010, Kovid Goyal <kovid at kovidgoyal.net>
+from __future__ import absolute_import, division, print_function, unicode_literals
+
+import re
+from polyglot.builtins import codepoint_to_chr, map, range, filter
+from polyglot.html_entities import name2codepoint
+from calibre.constants import plugins, preferred_encoding
+
+_ncxc = plugins['speedup'][0].clean_xml_chars
+
+
+def native_clean_xml_chars(x):
+    if isinstance(x, bytes):
+        x = x.decode(preferred_encoding)
+    return _ncxc(x)
+
+
+def ascii_pat(for_binary=False):
+    attr = 'binary' if for_binary else 'text'
+    ans = getattr(ascii_pat, attr, None)
+    if ans is None:
+        chars = set(range(32)) - {9, 10, 13}
+        chars.add(127)
+        pat = '|'.join(map(codepoint_to_chr, chars))
+        if for_binary:
+            pat = pat.encode('ascii')
+        ans = re.compile(pat)
+        setattr(ascii_pat, attr, ans)
+    return ans
+
+
+def clean_ascii_chars(txt, charlist=None):
+    r'''
+    Remove ASCII control chars.
+    This is all control chars except \t, \n and \r
+    '''
+    is_binary = isinstance(txt, bytes)
+    empty = b'' if is_binary else ''
+    if not txt:
+        return empty
+
+    if charlist is None:
+        pat = ascii_pat(is_binary)
+    else:
+        pat = '|'.join(map(codepoint_to_chr, charlist))
+        if is_binary:
+            pat = pat.encode('utf-8')
+    return pat.sub(empty, txt)
+
+
+def allowed(x):
+    x = ord(x)
+    return (x != 127 and (31 < x < 0xd7ff or x in (9, 10, 13))) or (0xe000 < x < 0xfffd) or (0x10000 < x < 0x10ffff)
+
+
+def py_clean_xml_chars(unicode_string):
+    return ''.join(filter(allowed, unicode_string))
+
+
+clean_xml_chars = native_clean_xml_chars or py_clean_xml_chars
+
+
+def test_clean_xml_chars():
+    raw = 'asd\x02a\U00010437x\ud801b\udffe\ud802'
+    if native_clean_xml_chars(raw) != 'asda\U00010437xb':
+        raise ValueError('Failed to XML clean: %r' % raw)
+
+
+# Fredrik Lundh: http://effbot.org/zone/re-sub.htm#unescape-html
+# Removes HTML or XML character references and entities from a text string.
+#
+# @param text The HTML (or XML) source text.
+# @return The plain text, as a Unicode string, if necessary.
+
+def unescape(text, rm=False, rchar=''):
+    def fixup(m, rm=rm, rchar=rchar):
+        text = m.group(0)
+        if text[:2] == "&#":
+            # character reference
+            try:
+                if text[:3] == "&#x":
+                    return codepoint_to_chr(int(text[3:-1], 16))
+                else:
+                    return codepoint_to_chr(int(text[2:-1]))
+            except ValueError:
+                pass
+        else:
+            # named entity
+            try:
+                text = codepoint_to_chr(name2codepoint[text[1:-1]])
+            except KeyError:
+                pass
+        if rm:
+            return rchar  # replace by char
+        return text  # leave as is
+    return re.sub("&#?\\w+;", fixup, text)
--- a/ebook_converter/utils/config.py
+++ b/ebook_converter/utils/config.py
@@ -0,0 +1,464 @@
+from __future__ import absolute_import, division, print_function, unicode_literals
+
+__license__   = 'GPL v3'
+__copyright__ = '2008, Kovid Goyal kovid@kovidgoyal.net'
+__docformat__ = 'restructuredtext en'
+
+'''
+Manage application-wide preferences.
+'''
+
+import optparse
+import os
+from copy import deepcopy
+
+from calibre.constants import (
+    CONFIG_DIR_MODE, __appname__, __author__, config_dir, get_version, iswindows
+)
+from calibre.utils.config_base import (
+    Config, ConfigInterface, ConfigProxy, Option, OptionSet, OptionValues,
+    StringConfig, json_dumps, json_loads, make_config_dir, plugin_dir, prefs,
+    tweaks, from_json, to_json
+)
+from calibre.utils.lock import ExclusiveFile
+from polyglot.builtins import string_or_bytes, native_string_type
+
+
+# optparse uses gettext.gettext instead of _ from builtins, so we
+# monkey patch it.
+optparse._ = _
+
+if False:
+    # Make pyflakes happy
+    Config, ConfigProxy, Option, OptionValues, StringConfig, OptionSet,
+    ConfigInterface, tweaks, plugin_dir, prefs, from_json, to_json
+
+
+def check_config_write_access():
+    return os.access(config_dir, os.W_OK) and os.access(config_dir, os.X_OK)
+
+
+class CustomHelpFormatter(optparse.IndentedHelpFormatter):
+
+    def format_usage(self, usage):
+        from calibre.utils.terminal import colored
+        parts = usage.split(' ')
+        if parts:
+            parts[0] = colored(parts[0], fg='yellow', bold=True)
+        usage = ' '.join(parts)
+        return colored(_('Usage'), fg='blue', bold=True) + ': ' + usage
+
+    def format_heading(self, heading):
+        from calibre.utils.terminal import colored
+        return "%*s%s:\n" % (self.current_indent, '',
+                                 colored(heading, fg='blue', bold=True))
+
+    def format_option(self, option):
+        import textwrap
+        from calibre.utils.terminal import colored
+
+        result = []
+        opts = self.option_strings[option]
+        opt_width = self.help_position - self.current_indent - 2
+        if len(opts) > opt_width:
+            opts = "%*s%s\n" % (self.current_indent, "",
+                                    colored(opts, fg='green'))
+            indent_first = self.help_position
+        else:                       # start help on same line as opts
+            opts = "%*s%-*s  " % (self.current_indent, "", opt_width +
+                    len(colored('', fg='green')), colored(opts, fg='green'))
+            indent_first = 0
+        result.append(opts)
+        if option.help:
+            help_text = self.expand_default(option).split('\n')
+            help_lines = []
+
+            for line in help_text:
+                help_lines.extend(textwrap.wrap(line, self.help_width))
+            result.append("%*s%s\n" % (indent_first, "", help_lines[0]))
+            result.extend(["%*s%s\n" % (self.help_position, "", line)
+                           for line in help_lines[1:]])
+        elif opts[-1] != "\n":
+            result.append("\n")
+        return "".join(result)+'\n'
+
+
+class OptionParser(optparse.OptionParser):
+
+    def __init__(self,
+                 usage='%prog [options] filename',
+                 version=None,
+                 epilog=None,
+                 gui_mode=False,
+                 conflict_handler='resolve',
+                 **kwds):
+        import textwrap
+        from calibre.utils.terminal import colored
+
+        usage = textwrap.dedent(usage)
+        if epilog is None:
+            epilog = _('Created by ')+colored(__author__, fg='cyan')
+        usage += '\n\n'+_('''Whenever you pass arguments to %prog that have spaces in them, '''
+                          '''enclose the arguments in quotation marks. For example: "{}"''').format(
+                               "C:\\some path with spaces" if iswindows else '/some path/with spaces') +'\n'
+        if version is None:
+            version = '%%prog (%s %s)'%(__appname__, get_version())
+        optparse.OptionParser.__init__(self, usage=usage, version=version, epilog=epilog,
+                               formatter=CustomHelpFormatter(),
+                               conflict_handler=conflict_handler, **kwds)
+        self.gui_mode = gui_mode
+        if False:
+            # Translatable string from optparse
+            _("Options")
+            _("show this help message and exit")
+            _("show program's version number and exit")
+
+    def print_usage(self, file=None):
+        from calibre.utils.terminal import ANSIStream
+        s = ANSIStream(file)
+        optparse.OptionParser.print_usage(self, file=s)
+
+    def print_help(self, file=None):
+        from calibre.utils.terminal import ANSIStream
+        s = ANSIStream(file)
+        optparse.OptionParser.print_help(self, file=s)
+
+    def print_version(self, file=None):
+        from calibre.utils.terminal import ANSIStream
+        s = ANSIStream(file)
+        optparse.OptionParser.print_version(self, file=s)
+
+    def error(self, msg):
+        if self.gui_mode:
+            raise Exception(msg)
+        optparse.OptionParser.error(self, msg)
+
+    def merge(self, parser):
+        '''
+        Add options from parser to self. In case of conflicts, conflicting options from
+        parser are skipped.
+        '''
+        opts   = list(parser.option_list)
+        groups = list(parser.option_groups)
+
+        def merge_options(options, container):
+            for opt in deepcopy(options):
+                if not self.has_option(opt.get_opt_string()):
+                    container.add_option(opt)
+
+        merge_options(opts, self)
+
+        for group in groups:
+            g = self.add_option_group(group.title)
+            merge_options(group.option_list, g)
+
+    def subsume(self, group_name, msg=''):
+        '''
+        Move all existing options into a subgroup named
+        C{group_name} with description C{msg}.
+        '''
+        opts = [opt for opt in self.options_iter() if opt.get_opt_string() not in ('--version', '--help')]
+        self.option_groups = []
+        subgroup = self.add_option_group(group_name, msg)
+        for opt in opts:
+            self.remove_option(opt.get_opt_string())
+            subgroup.add_option(opt)
+
+    def options_iter(self):
+        for opt in self.option_list:
+            if native_string_type(opt).strip():
+                yield opt
+        for gr in self.option_groups:
+            for opt in gr.option_list:
+                if native_string_type(opt).strip():
+                    yield opt
+
+    def option_by_dest(self, dest):
+        for opt in self.options_iter():
+            if opt.dest == dest:
+                return opt
+
+    def merge_options(self, lower, upper):
+        '''
+        Merge options in lower and upper option lists into upper.
+        Default values in upper are overridden by
+        non default values in lower.
+        '''
+        for dest in lower.__dict__.keys():
+            if dest not in upper.__dict__:
+                continue
+            opt = self.option_by_dest(dest)
+            if lower.__dict__[dest] != opt.default and \
+               upper.__dict__[dest] == opt.default:
+                upper.__dict__[dest] = lower.__dict__[dest]
+
+    def add_option_group(self, *args, **kwargs):
+        if isinstance(args[0], string_or_bytes):
+            args = list(args)
+            args[0] = native_string_type(args[0])
+        return optparse.OptionParser.add_option_group(self, *args, **kwargs)
+
+
+class DynamicConfig(dict):
+    '''
+    A replacement for QSettings that supports dynamic config keys.
+    Returns `None` if a config key is not found. Note that the config
+    data is stored in a JSON file.
+    '''
+
+    def __init__(self, name='dynamic'):
+        dict.__init__(self, {})
+        self.name = name
+        self.defaults = {}
+        self.refresh()
+
+    @property
+    def file_path(self):
+        return os.path.join(config_dir, self.name+'.pickle.json')
+
+    def decouple(self, prefix):
+        self.name = prefix + self.name
+        self.refresh()
+
+    def read_old_serialized_representation(self):
+        from calibre.utils.shared_file import share_open
+        from calibre.utils.serialize import pickle_loads
+        path = self.file_path.rpartition('.')[0]
+        try:
+            with share_open(path, 'rb') as f:
+                raw = f.read()
+        except EnvironmentError:
+            raw = b''
+        try:
+            d = pickle_loads(raw).copy()
+        except Exception:
+            d = {}
+        return d
+
+    def refresh(self, clear_current=True):
+        d = {}
+        migrate = False
+        if clear_current:
+            self.clear()
+        if os.path.exists(self.file_path):
+            with ExclusiveFile(self.file_path) as f:
+                raw = f.read()
+            if raw:
+                try:
+                    d = json_loads(raw)
+                except Exception as err:
+                    print('Failed to de-serialize JSON representation of stored dynamic data for {} with error: {}'.format(
+                        self.name, err))
+            else:
+                d = self.read_old_serialized_representation()
+                migrate = bool(d)
+        else:
+            d = self.read_old_serialized_representation()
+            migrate = bool(d)
+        if migrate and d:
+            raw = json_dumps(d, ignore_unserializable=True)
+            with ExclusiveFile(self.file_path) as f:
+                f.seek(0), f.truncate()
+                f.write(raw)
+
+        self.update(d)
+
+    def __getitem__(self, key):
+        try:
+            return dict.__getitem__(self, key)
+        except KeyError:
+            return self.defaults.get(key, None)
+
+    def get(self, key, default=None):
+        try:
+            return dict.__getitem__(self, key)
+        except KeyError:
+            return self.defaults.get(key, default)
+
+    def __setitem__(self, key, val):
+        dict.__setitem__(self, key, val)
+        self.commit()
+
+    def set(self, key, val):
+        self.__setitem__(key, val)
+
+    def commit(self):
+        if not getattr(self, 'name', None):
+            return
+        if not os.path.exists(self.file_path):
+            make_config_dir()
+        raw = json_dumps(self)
+        with ExclusiveFile(self.file_path) as f:
+            f.seek(0)
+            f.truncate()
+            f.write(raw)
+
+
+dynamic = DynamicConfig()
+
+
+class XMLConfig(dict):
+
+    '''
+    Similar to :class:`DynamicConfig`, except that it uses an XML storage
+    backend instead of a pickle file.
+
+    See `https://docs.python.org/dev/library/plistlib.html`_ for the supported
+    data types.
+    '''
+
+    EXTENSION = '.plist'
+
+    def __init__(self, rel_path_to_cf_file, base_path=config_dir):
+        dict.__init__(self)
+        self.no_commit = False
+        self.defaults = {}
+        self.file_path = os.path.join(base_path,
+                *(rel_path_to_cf_file.split('/')))
+        self.file_path = os.path.abspath(self.file_path)
+        if not self.file_path.endswith(self.EXTENSION):
+            self.file_path += self.EXTENSION
+
+        self.refresh()
+
+    def mtime(self):
+        try:
+            return os.path.getmtime(self.file_path)
+        except EnvironmentError:
+            return 0
+
+    def touch(self):
+        try:
+            os.utime(self.file_path, None)
+        except EnvironmentError:
+            pass
+
+    def raw_to_object(self, raw):
+        from polyglot.plistlib import loads
+        return loads(raw)
+
+    def to_raw(self):
+        from polyglot.plistlib import dumps
+        return dumps(self)
+
+    def decouple(self, prefix):
+        self.file_path = os.path.join(os.path.dirname(self.file_path), prefix + os.path.basename(self.file_path))
+        self.refresh()
+
+    def refresh(self, clear_current=True):
+        d = {}
+        if os.path.exists(self.file_path):
+            with ExclusiveFile(self.file_path) as f:
+                raw = f.read()
+                try:
+                    d = self.raw_to_object(raw) if raw.strip() else {}
+                except SystemError:
+                    pass
+                except:
+                    import traceback
+                    traceback.print_exc()
+                    d = {}
+        if clear_current:
+            self.clear()
+        self.update(d)
+
+    def __getitem__(self, key):
+        from polyglot.plistlib import Data
+        try:
+            ans = dict.__getitem__(self, key)
+            if isinstance(ans, Data):
+                ans = ans.data
+            return ans
+        except KeyError:
+            return self.defaults.get(key, None)
+
+    def get(self, key, default=None):
+        from polyglot.plistlib import Data
+        try:
+            ans = dict.__getitem__(self, key)
+            if isinstance(ans, Data):
+                ans = ans.data
+            return ans
+        except KeyError:
+            return self.defaults.get(key, default)
+
+    def __setitem__(self, key, val):
+        from polyglot.plistlib import Data
+        if isinstance(val, bytes):
+            val = Data(val)
+        dict.__setitem__(self, key, val)
+        self.commit()
+
+    def set(self, key, val):
+        self.__setitem__(key, val)
+
+    def __delitem__(self, key):
+        try:
+            dict.__delitem__(self, key)
+        except KeyError:
+            pass  # ignore missing keys
+        else:
+            self.commit()
+
+    def commit(self):
+        if self.no_commit:
+            return
+        if hasattr(self, 'file_path') and self.file_path:
+            dpath = os.path.dirname(self.file_path)
+            if not os.path.exists(dpath):
+                os.makedirs(dpath, mode=CONFIG_DIR_MODE)
+            with ExclusiveFile(self.file_path) as f:
+                raw = self.to_raw()
+                f.seek(0)
+                f.truncate()
+                f.write(raw)
+
+    def __enter__(self):
+        self.no_commit = True
+
+    def __exit__(self, *args):
+        self.no_commit = False
+        self.commit()
+
+
+class JSONConfig(XMLConfig):
+
+    EXTENSION = '.json'
+
+    def raw_to_object(self, raw):
+        return json_loads(raw)
+
+    def to_raw(self):
+        return json_dumps(self)
+
+    def __getitem__(self, key):
+        try:
+            return dict.__getitem__(self, key)
+        except KeyError:
+            return self.defaults[key]
+
+    def get(self, key, default=None):
+        try:
+            return dict.__getitem__(self, key)
+        except KeyError:
+            return self.defaults.get(key, default)
+
+    def __setitem__(self, key, val):
+        dict.__setitem__(self, key, val)
+        self.commit()
+
+
+class DevicePrefs:
+
+    def __init__(self, global_prefs):
+        self.global_prefs = global_prefs
+        self.overrides = {}
+
+    def set_overrides(self, **kwargs):
+        self.overrides = kwargs.copy()
+
+    def __getitem__(self, key):
+        return self.overrides.get(key, self.global_prefs[key])
+
+
+device_prefs = DevicePrefs(prefs)
--- a/ebook_converter/utils/config_base.py
+++ b/ebook_converter/utils/config_base.py
@@ -0,0 +1,674 @@
+#!/usr/bin/env python2
+# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
+
+from __future__ import absolute_import, division, print_function, unicode_literals
+
+__license__   = 'GPL v3'
+__copyright__ = '2011, Kovid Goyal <kovid@kovidgoyal.net>'
+__docformat__ = 'restructuredtext en'
+
+import os, re, traceback, numbers
+from functools import partial
+from collections import defaultdict
+from copy import deepcopy
+
+from calibre.utils.lock import ExclusiveFile
+from calibre.constants import config_dir, CONFIG_DIR_MODE, ispy3, preferred_encoding, filesystem_encoding, iswindows
+from polyglot.builtins import unicode_type, iteritems, map
+
+plugin_dir = os.path.join(config_dir, 'plugins')
+
+
+def parse_old_style(src):
+    if ispy3:
+        import pickle as cPickle
+    else:
+        import cPickle
+    options = {'cPickle':cPickle}
+    try:
+        if not isinstance(src, unicode_type):
+            src = src.decode('utf-8')
+        src = src.replace('PyQt%d.QtCore' % 4, 'PyQt5.QtCore')
+        src = re.sub(r'cPickle\.loads\(([\'"])', r'cPickle.loads(b\1', src)
+        exec(src, options)
+    except Exception as err:
+        try:
+            print('Failed to parse old style options string with error: {}'.format(err))
+        except Exception:
+            pass
+    return options
+
+
+def to_json(obj):
+    import datetime
+    if isinstance(obj, bytearray):
+        from base64 import standard_b64encode
+        return {'__class__': 'bytearray',
+                '__value__': standard_b64encode(bytes(obj)).decode('ascii')}
+    if isinstance(obj, datetime.datetime):
+        from calibre.utils.date import isoformat
+        return {'__class__': 'datetime.datetime',
+                '__value__': isoformat(obj, as_utc=True)}
+    if isinstance(obj, (set, frozenset)):
+        return {'__class__': 'set', '__value__': tuple(obj)}
+    if isinstance(obj, bytes):
+        return obj.decode('utf-8')
+    if hasattr(obj, 'toBase64'):  # QByteArray
+        return {'__class__': 'bytearray',
+                '__value__': bytes(obj.toBase64()).decode('ascii')}
+    raise TypeError(repr(obj) + ' is not JSON serializable')
+
+
+def safe_to_json(obj):
+    try:
+        return to_json(obj)
+    except Exception:
+        pass
+
+
+def from_json(obj):
+    custom = obj.get('__class__')
+    if custom is not None:
+        if custom == 'bytearray':
+            from base64 import standard_b64decode
+            return bytearray(standard_b64decode(obj['__value__'].encode('ascii')))
+        if custom == 'datetime.datetime':
+            from calibre.utils.iso8601 import parse_iso8601
+            return parse_iso8601(obj['__value__'], assume_utc=True)
+        if custom == 'set':
+            return set(obj['__value__'])
+    return obj
+
+
+def force_unicode(x):
+    try:
+        return x.decode('mbcs' if iswindows else preferred_encoding)
+    except UnicodeDecodeError:
+        try:
+            return x.decode(filesystem_encoding)
+        except UnicodeDecodeError:
+            return x.decode('utf-8', 'replace')
+
+
+def force_unicode_recursive(obj):
+    if isinstance(obj, bytes):
+        return force_unicode(obj)
+    if isinstance(obj, (list, tuple)):
+        return type(obj)(map(force_unicode_recursive, obj))
+    if isinstance(obj, dict):
+        return {force_unicode_recursive(k): force_unicode_recursive(v) for k, v in iteritems(obj)}
+    return obj
+
+
+def json_dumps(obj, ignore_unserializable=False):
+    import json
+    try:
+        ans = json.dumps(obj, indent=2, default=safe_to_json if ignore_unserializable else to_json, sort_keys=True, ensure_ascii=False)
+    except UnicodeDecodeError:
+        obj = force_unicode_recursive(obj)
+        ans = json.dumps(obj, indent=2, default=safe_to_json if ignore_unserializable else to_json, sort_keys=True, ensure_ascii=False)
+    if not isinstance(ans, bytes):
+        ans = ans.encode('utf-8')
+    return ans
+
+
+def json_loads(raw):
+    import json
+    if isinstance(raw, bytes):
+        raw = raw.decode('utf-8')
+    return json.loads(raw, object_hook=from_json)
+
+
+def make_config_dir():
+    if not os.path.exists(plugin_dir):
+        os.makedirs(plugin_dir, mode=CONFIG_DIR_MODE)
+
+
+class Option(object):
+
+    def __init__(self, name, switches=[], help='', type=None, choices=None,
+                 check=None, group=None, default=None, action=None, metavar=None):
+        if choices:
+            type = 'choice'
+
+        self.name     = name
+        self.switches = switches
+        self.help     = help.replace('%default', repr(default)) if help else None
+        self.type     = type
+        if self.type is None and action is None and choices is None:
+            if isinstance(default, float):
+                self.type = 'float'
+            elif isinstance(default, numbers.Integral) and not isinstance(default, bool):
+                self.type = 'int'
+
+        self.choices  = choices
+        self.check    = check
+        self.group    = group
+        self.default  = default
+        self.action   = action
+        self.metavar  = metavar
+
+    def __eq__(self, other):
+        return self.name == getattr(other, 'name', other)
+
+    def __repr__(self):
+        return 'Option: '+self.name
+
+    def __str__(self):
+        return repr(self)
+
+
+class OptionValues(object):
+
+    def copy(self):
+        return deepcopy(self)
+
+
+class OptionSet(object):
+
+    OVERRIDE_PAT = re.compile(r'#{3,100} Override Options #{15}(.*?)#{3,100} End Override #{3,100}',
+                              re.DOTALL|re.IGNORECASE)
+
+    def __init__(self, description=''):
+        self.description = description
+        self.defaults = {}
+        self.preferences = []
+        self.group_list  = []
+        self.groups      = {}
+        self.set_buffer  = {}
+        self.loads_pat = None
+
+    def has_option(self, name_or_option_object):
+        if name_or_option_object in self.preferences:
+            return True
+        for p in self.preferences:
+            if p.name == name_or_option_object:
+                return True
+        return False
+
+    def get_option(self, name_or_option_object):
+        idx = self.preferences.index(name_or_option_object)
+        if idx > -1:
+            return self.preferences[idx]
+        for p in self.preferences:
+            if p.name == name_or_option_object:
+                return p
+
+    def add_group(self, name, description=''):
+        if name in self.group_list:
+            raise ValueError('A group by the name %s already exists in this set'%name)
+        self.groups[name] = description
+        self.group_list.append(name)
+        return partial(self.add_opt, group=name)
+
+    def update(self, other):
+        for name in other.groups.keys():
+            self.groups[name] = other.groups[name]
+            if name not in self.group_list:
+                self.group_list.append(name)
+        for pref in other.preferences:
+            if pref in self.preferences:
+                self.preferences.remove(pref)
+            self.preferences.append(pref)
+
+    def smart_update(self, opts1, opts2):
+        '''
+        Updates the preference values in opts1 using only the non-default preference values in opts2.
+        '''
+        for pref in self.preferences:
+            new = getattr(opts2, pref.name, pref.default)
+            if new != pref.default:
+                setattr(opts1, pref.name, new)
+
+    def remove_opt(self, name):
+        if name in self.preferences:
+            self.preferences.remove(name)
+
+    def add_opt(self, name, switches=[], help=None, type=None, choices=None,
+                 group=None, default=None, action=None, metavar=None):
+        '''
+        Add an option to this section.
+
+        :param name:       The name of this option. Must be a valid Python identifier.
+                           Must also be unique in this OptionSet and all its subsets.
+        :param switches:   List of command line switches for this option
+                           (as supplied to :module:`optparse`). If empty, this
+                           option will not be added to the command line parser.
+        :param help:       Help text.
+        :param type:       Type checking of option values. Supported types are:
+                           `None, 'choice', 'complex', 'float', 'int', 'string'`.
+        :param choices:    List of strings or `None`.
+        :param group:      Group this option belongs to. You must previously
+                           have created this group with a call to :method:`add_group`.
+        :param default:    The default value for this option.
+        :param action:     The action to pass to optparse. Supported values are:
+                           `None, 'count'`. For choices and boolean options,
+                           action is automatically set correctly.
+        '''
+        pref = Option(name, switches=switches, help=help, type=type, choices=choices,
+                 group=group, default=default, action=action, metavar=None)
+        if group is not None and group not in self.groups.keys():
+            raise ValueError('Group %s has not been added to this section'%group)
+        if pref in self.preferences:
+            raise ValueError('An option with the name %s already exists in this set.'%name)
+        self.preferences.append(pref)
+        self.defaults[name] = default
+
+    def retranslate_help(self):
+        t = _
+        for opt in self.preferences:
+            if opt.help:
+                opt.help = t(opt.help)
+                if opt.name == 'use_primary_find_in_search':
+                    opt.help = opt.help.format(u'ñ')
+
+    def option_parser(self, user_defaults=None, usage='', gui_mode=False):
+        from calibre.utils.config import OptionParser
+        parser = OptionParser(usage, gui_mode=gui_mode)
+        groups = defaultdict(lambda : parser)
+        for group, desc in self.groups.items():
+            groups[group] = parser.add_option_group(group.upper(), desc)
+
+        for pref in self.preferences:
+            if not pref.switches:
+                continue
+            g = groups[pref.group]
+            action = pref.action
+            if action is None:
+                action = 'store'
+                if pref.default is True or pref.default is False:
+                    action = 'store_' + ('false' if pref.default else 'true')
+            args = dict(
+                        dest=pref.name,
+                        help=pref.help,
+                        metavar=pref.metavar,
+                        type=pref.type,
+                        choices=pref.choices,
+                        default=getattr(user_defaults, pref.name, pref.default),
+                        action=action,
+                        )
+            g.add_option(*pref.switches, **args)
+
+        return parser
+
+    def get_override_section(self, src):
+        match = self.OVERRIDE_PAT.search(src)
+        if match:
+            return match.group()
+        return ''
+
+    def parse_string(self, src):
+        options = {}
+        if src:
+            is_old_style = (isinstance(src, bytes) and src.startswith(b'#')) or (isinstance(src, unicode_type) and src.startswith(u'#'))
+            if is_old_style:
+                options = parse_old_style(src)
+            else:
+                try:
+                    options = json_loads(src)
+                    if not isinstance(options, dict):
+                        raise Exception('options is not a dictionary')
+                except Exception as err:
+                    try:
+                        print('Failed to parse options string with error: {}'.format(err))
+                    except Exception:
+                        pass
+        opts = OptionValues()
+        for pref in self.preferences:
+            val = options.get(pref.name, pref.default)
+            formatter = __builtins__.get(pref.type, None)
+            if callable(formatter):
+                val = formatter(val)
+            setattr(opts, pref.name, val)
+
+        return opts
+
+    def serialize(self, opts, ignore_unserializable=False):
+        data = {pref.name: getattr(opts, pref.name, pref.default) for pref in self.preferences}
+        return json_dumps(data, ignore_unserializable=ignore_unserializable)
+
+
+class ConfigInterface(object):
+
+    def __init__(self, description):
+        self.option_set       = OptionSet(description=description)
+        self.add_opt          = self.option_set.add_opt
+        self.add_group        = self.option_set.add_group
+        self.remove_opt       = self.remove = self.option_set.remove_opt
+        self.parse_string     = self.option_set.parse_string
+        self.get_option       = self.option_set.get_option
+        self.preferences      = self.option_set.preferences
+
+    def update(self, other):
+        self.option_set.update(other.option_set)
+
+    def option_parser(self, usage='', gui_mode=False):
+        return self.option_set.option_parser(user_defaults=self.parse(),
+                                             usage=usage, gui_mode=gui_mode)
+
+    def smart_update(self, opts1, opts2):
+        self.option_set.smart_update(opts1, opts2)
+
+
+class Config(ConfigInterface):
+    '''
+    A file based configuration.
+    '''
+
+    def __init__(self, basename, description=''):
+        ConfigInterface.__init__(self, description)
+        self.filename_base = basename
+
+    @property
+    def config_file_path(self):
+        return os.path.join(config_dir, self.filename_base + '.py.json')
+
+    def parse(self):
+        src = ''
+        migrate = False
+        path = self.config_file_path
+        if os.path.exists(path):
+            with ExclusiveFile(path) as f:
+                try:
+                    src = f.read().decode('utf-8')
+                except ValueError:
+                    print("Failed to parse", path)
+                    traceback.print_exc()
+        if not src:
+            path = path.rpartition('.')[0]
+            from calibre.utils.shared_file import share_open
+            try:
+                with share_open(path, 'rb') as f:
+                    src = f.read().decode('utf-8')
+            except Exception:
+                pass
+            else:
+                migrate = bool(src)
+        ans = self.option_set.parse_string(src)
+        if migrate:
+            new_src = self.option_set.serialize(ans, ignore_unserializable=True)
+            with ExclusiveFile(self.config_file_path) as f:
+                f.seek(0), f.truncate()
+                f.write(new_src)
+        return ans
+
+    def set(self, name, val):
+        if not self.option_set.has_option(name):
+            raise ValueError('The option %s is not defined.'%name)
+        if not os.path.exists(config_dir):
+            make_config_dir()
+        with ExclusiveFile(self.config_file_path) as f:
+            src = f.read()
+            opts = self.option_set.parse_string(src)
+            setattr(opts, name, val)
+            src = self.option_set.serialize(opts)
+            f.seek(0)
+            f.truncate()
+            if isinstance(src, unicode_type):
+                src = src.encode('utf-8')
+            f.write(src)
+
+
+class StringConfig(ConfigInterface):
+    '''
+    A string based configuration
+    '''
+
+    def __init__(self, src, description=''):
+        ConfigInterface.__init__(self, description)
+        self.set_src(src)
+
+    def set_src(self, src):
+        self.src = src
+        if isinstance(self.src, bytes):
+            self.src = self.src.decode('utf-8')
+
+    def parse(self):
+        return self.option_set.parse_string(self.src)
+
+    def set(self, name, val):
+        if not self.option_set.has_option(name):
+            raise ValueError('The option %s is not defined.'%name)
+        opts = self.option_set.parse_string(self.src)
+        setattr(opts, name, val)
+        self.set_src(self.option_set.serialize(opts))
+
+
+class ConfigProxy(object):
+    '''
+    A Proxy to minimize file reads for widely used config settings
+    '''
+
+    def __init__(self, config):
+        self.__config = config
+        self.__opts   = None
+
+    @property
+    def defaults(self):
+        return self.__config.option_set.defaults
+
+    def refresh(self):
+        self.__opts = self.__config.parse()
+
+    def retranslate_help(self):
+        self.__config.option_set.retranslate_help()
+
+    def __getitem__(self, key):
+        return self.get(key)
+
+    def __setitem__(self, key, val):
+        return self.set(key, val)
+
+    def __delitem__(self, key):
+        self.set(key, self.defaults[key])
+
+    def get(self, key):
+        if self.__opts is None:
+            self.refresh()
+        return getattr(self.__opts, key)
+
+    def set(self, key, val):
+        if self.__opts is None:
+            self.refresh()
+        setattr(self.__opts, key, val)
+        return self.__config.set(key, val)
+
+    def help(self, key):
+        return self.__config.get_option(key).help
+
+
+def create_global_prefs(conf_obj=None):
+    c = Config('global', 'calibre wide preferences') if conf_obj is None else conf_obj
+    c.add_opt('database_path',
+              default=os.path.expanduser('~/library1.db'),
+              help=_('Path to the database in which books are stored'))
+    c.add_opt('filename_pattern', default=u'(?P<title>.+) - (?P<author>[^_]+)',
+              help=_('Pattern to guess metadata from filenames'))
+    c.add_opt('isbndb_com_key', default='',
+              help=_('Access key for isbndb.com'))
+    c.add_opt('network_timeout', default=5,
+              help=_('Default timeout for network operations (seconds)'))
+    c.add_opt('library_path', default=None,
+              help=_('Path to directory in which your library of books is stored'))
+    c.add_opt('language', default=None,
+              help=_('The language in which to display the user interface'))
+    c.add_opt('output_format', default='EPUB',
+              help=_('The default output format for e-book conversions. When auto-converting'
+                  ' to send to a device this can be overridden by individual device preferences.'
+                  ' These can be changed by right clicking the device icon in calibre and'
+                  ' choosing "Configure".'))
+    c.add_opt('input_format_order', default=['EPUB', 'AZW3', 'MOBI', 'LIT', 'PRC',
+        'FB2', 'HTML', 'HTM', 'XHTM', 'SHTML', 'XHTML', 'ZIP', 'DOCX', 'ODT', 'RTF', 'PDF',
+        'TXT'],
+              help=_('Ordered list of formats to prefer for input.'))
+    c.add_opt('read_file_metadata', default=True,
+              help=_('Read metadata from files'))
+    c.add_opt('worker_process_priority', default='normal',
+              help=_('The priority of worker processes. A higher priority '
+                  'means they run faster and consume more resources. '
+                  'Most tasks like conversion/news download/adding books/etc. '
+                  'are affected by this setting.'))
+    c.add_opt('swap_author_names', default=False,
+            help=_('Swap author first and last names when reading metadata'))
+    c.add_opt('add_formats_to_existing', default=False,
+            help=_('Add new formats to existing book records'))
+    c.add_opt('check_for_dupes_on_ctl', default=False,
+            help=_('Check for duplicates when copying to another library'))
+    c.add_opt('installation_uuid', default=None, help='Installation UUID')
+    c.add_opt('new_book_tags', default=[], help=_('Tags to apply to books added to the library'))
+    c.add_opt('mark_new_books', default=False, help=_(
+        'Mark newly added books. The mark is a temporary mark that is automatically removed when calibre is restarted.'))
+
+    # these are here instead of the gui preferences because calibredb and
+    # calibre server can execute searches
+    c.add_opt('saved_searches', default={}, help=_('List of named saved searches'))
+    c.add_opt('user_categories', default={}, help=_('User-created Tag browser categories'))
+    c.add_opt('manage_device_metadata', default='manual',
+        help=_('How and when calibre updates metadata on the device.'))
+    c.add_opt('limit_search_columns', default=False,
+            help=_('When searching for text without using lookup '
+            'prefixes, as for example, Red instead of title:Red, '
+            'limit the columns searched to those named below.'))
+    c.add_opt('limit_search_columns_to',
+            default=['title', 'authors', 'tags', 'series', 'publisher'],
+            help=_('Choose columns to be searched when not using prefixes, '
+                'as for example, when searching for Red instead of '
+                'title:Red. Enter a list of search/lookup names '
+                'separated by commas. Only takes effect if you set the option '
+                'to limit search columns above.'))
+    c.add_opt('use_primary_find_in_search', default=True,
+            help=_(u'Characters typed in the search box will match their '
+                   'accented versions, based on the language you have chosen '
+                   'for the calibre interface. For example, in '
+                   u'English, searching for n will match both {} and n, but if '
+                   'your language is Spanish it will only match n. Note that '
+                   'this is much slower than a simple search on very large '
+                   'libraries. Also, this option will have no effect if you turn '
+                   'on case-sensitive searching'))
+    c.add_opt('case_sensitive', default=False, help=_(
+        'Make searches case-sensitive'))
+
+    c.add_opt('migrated', default=False, help='For Internal use. Don\'t modify.')
+    return c
+
+
+prefs = ConfigProxy(create_global_prefs())
+if prefs['installation_uuid'] is None:
+    import uuid
+    prefs['installation_uuid'] = unicode_type(uuid.uuid4())
+
+# Read tweaks
+
+
+def tweaks_file():
+    return os.path.join(config_dir, 'tweaks.json')
+
+
+def make_unicode(obj):
+    if isinstance(obj, bytes):
+        try:
+            return obj.decode('utf-8')
+        except UnicodeDecodeError:
+            return obj.decode(preferred_encoding, errors='replace')
+    if isinstance(obj, (list, tuple)):
+        return list(map(make_unicode, obj))
+    if isinstance(obj, dict):
+        return {make_unicode(k): make_unicode(v) for k, v in iteritems(obj)}
+    return obj
+
+
+def normalize_tweak(val):
+    if isinstance(val, (list, tuple)):
+        return tuple(map(normalize_tweak, val))
+    if isinstance(val, dict):
+        return {k: normalize_tweak(v) for k, v in iteritems(val)}
+    return val
+
+
+def write_custom_tweaks(tweaks_dict):
+    make_config_dir()
+    tweaks_dict = make_unicode(tweaks_dict)
+    changed_tweaks = {}
+    default_tweaks = exec_tweaks(default_tweaks_raw())
+    for key, cval in iteritems(tweaks_dict):
+        if key in default_tweaks and normalize_tweak(cval) == normalize_tweak(default_tweaks[key]):
+            continue
+        changed_tweaks[key] = cval
+    raw = json_dumps(changed_tweaks)
+    with open(tweaks_file(), 'wb') as f:
+        f.write(raw)
+
+
+def exec_tweaks(path):
+    if isinstance(path, bytes):
+        raw = path
+        fname = '<string>'
+    else:
+        with open(path, 'rb') as f:
+            raw = f.read()
+            fname = f.name
+    code = compile(raw, fname, 'exec')
+    l = {}
+    g = {'__file__': fname}
+    exec(code, g, l)
+    return l
+
+
+def read_custom_tweaks():
+    make_config_dir()
+    tf = tweaks_file()
+    ans = {}
+    if os.path.exists(tf):
+        with open(tf, 'rb') as f:
+            raw = f.read()
+        raw = raw.strip()
+        if not raw:
+            return ans
+        try:
+            return json_loads(raw)
+        except Exception:
+            import traceback
+            traceback.print_exc()
+            return ans
+    old_tweaks_file = tf.rpartition('.')[0] + '.py'
+    if os.path.exists(old_tweaks_file):
+        ans = exec_tweaks(old_tweaks_file)
+        ans = make_unicode(ans)
+        write_custom_tweaks(ans)
+    return ans
+
+
+def default_tweaks_raw():
+    return P('default_tweaks.py', data=True, allow_user_override=False)
+
+
+def read_tweaks():
+    default_tweaks = exec_tweaks(default_tweaks_raw())
+    try:
+        custom_tweaks = read_custom_tweaks()
+    except Exception:
+        custom_tweaks = {}
+    default_tweaks.update(custom_tweaks)
+    return default_tweaks
+
+
+tweaks = read_tweaks()
+
+
+def reset_tweaks_to_default():
+    default_tweaks = exec_tweaks(default_tweaks_raw())
+    tweaks.clear()
+    tweaks.update(default_tweaks)
+
+
+class Tweak(object):
+
+    def __init__(self, name, value):
+        self.name, self.value = name, value
+
+    def __enter__(self):
+        self.origval = tweaks[self.name]
+        tweaks[self.name] = self.value
+
+    def __exit__(self, *args):
+        tweaks[self.name] = self.origval
--- a/ebook_converter/utils/date.py
+++ b/ebook_converter/utils/date.py
@@ -0,0 +1,485 @@
+#!/usr/bin/env python2
+# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
+from __future__ import absolute_import, division, print_function, unicode_literals
+
+__license__   = 'GPL v3'
+__copyright__ = '2010, Kovid Goyal <kovid@kovidgoyal.net>'
+__docformat__ = 'restructuredtext en'
+
+import re
+from datetime import datetime, time as dtime, timedelta, MINYEAR, MAXYEAR
+from functools import partial
+
+from calibre import strftime
+from calibre.constants import iswindows, isosx, plugins, preferred_encoding
+from calibre.utils.iso8601 import utc_tz, local_tz, UNDEFINED_DATE
+from calibre.utils.localization import lcdata
+from polyglot.builtins import unicode_type, native_string_type
+
+_utc_tz = utc_tz
+_local_tz = local_tz
+
+# When parsing ambiguous dates that could be either dd-MM Or MM-dd use the
+# user's locale preferences
+if iswindows:
+    import ctypes
+    LOCALE_SSHORTDATE, LOCALE_USER_DEFAULT = 0x1f, 0
+    buf = ctypes.create_string_buffer(b'\0', 255)
+    try:
+        ctypes.windll.kernel32.GetLocaleInfoA(LOCALE_USER_DEFAULT, LOCALE_SSHORTDATE, buf, 255)
+        parse_date_day_first = buf.value.index(b'd') < buf.value.index(b'M')
+    except:
+        parse_date_day_first = False
+    del ctypes, LOCALE_SSHORTDATE, buf, LOCALE_USER_DEFAULT
+elif isosx:
+    try:
+        date_fmt = plugins['usbobserver'][0].date_format()
+        parse_date_day_first = date_fmt.index('d') < date_fmt.index('M')
+    except:
+        parse_date_day_first = False
+else:
+    try:
+        def first_index(raw, queries):
+            for q in queries:
+                try:
+                    return raw.index(native_string_type(q))
+                except ValueError:
+                    pass
+            return -1
+
+        import locale
+        raw = locale.nl_langinfo(locale.D_FMT)
+        parse_date_day_first = first_index(raw, ('%d', '%a', '%A')) < first_index(raw, ('%m', '%b', '%B'))
+        del raw, first_index
+    except:
+        parse_date_day_first = False
+
+DEFAULT_DATE = datetime(2000,1,1, tzinfo=utc_tz)
+EPOCH = datetime(1970, 1, 1, tzinfo=_utc_tz)
+
+
+def is_date_undefined(qt_or_dt):
+    d = qt_or_dt
+    if d is None:
+        return True
+    if hasattr(d, 'toString'):
+        if hasattr(d, 'date'):
+            d = d.date()
+        try:
+            d = datetime(d.year(), d.month(), d.day(), tzinfo=utc_tz)
+        except ValueError:
+            return True  # Undefined QDate
+    return d.year < UNDEFINED_DATE.year or (
+            d.year == UNDEFINED_DATE.year and
+            d.month == UNDEFINED_DATE.month and
+            d.day == UNDEFINED_DATE.day)
+
+
+_iso_pat = None
+
+
+def iso_pat():
+    global _iso_pat
+    if _iso_pat is None:
+        _iso_pat = re.compile(r'\d{4}[/.-]\d{1,2}[/.-]\d{1,2}')
+    return _iso_pat
+
+
+def parse_date(date_string, assume_utc=False, as_utc=True, default=None):
+    '''
+    Parse a date/time string into a timezone aware datetime object. The timezone
+    is always either UTC or the local timezone.
+
+    :param assume_utc: If True and date_string does not specify a timezone,
+    assume UTC, otherwise assume local timezone.
+
+    :param as_utc: If True, return a UTC datetime
+
+    :param default: Missing fields are filled in from default. If None, the
+    current month and year are used.
+    '''
+    from dateutil.parser import parse
+    if not date_string:
+        return UNDEFINED_DATE
+    if isinstance(date_string, bytes):
+        date_string = date_string.decode(preferred_encoding, 'replace')
+    if default is None:
+        func = datetime.utcnow if assume_utc else datetime.now
+        default = func().replace(day=15, hour=0, minute=0, second=0, microsecond=0,
+                tzinfo=_utc_tz if assume_utc else _local_tz)
+    if iso_pat().match(date_string) is not None:
+        dt = parse(date_string, default=default)
+    else:
+        dt = parse(date_string, default=default, dayfirst=parse_date_day_first)
+    if dt.tzinfo is None:
+        dt = dt.replace(tzinfo=_utc_tz if assume_utc else _local_tz)
+    return dt.astimezone(_utc_tz if as_utc else _local_tz)
+
+
+def fix_only_date(val):
+    n = val + timedelta(days=1)
+    if n.month > val.month:
+        val = val.replace(day=val.day-1)
+    if val.day == 1:
+        val = val.replace(day=2)
+    return val
+
+
+def parse_only_date(raw, assume_utc=True, as_utc=True):
+    '''
+    Parse a date string that contains no time information in a manner that
+    guarantees that the month and year are always correct in all timezones, and
+    the day is at most one day wrong.
+    '''
+    f = utcnow if assume_utc else now
+    default = f().replace(hour=0, minute=0, second=0, microsecond=0,
+            day=15)
+    return fix_only_date(parse_date(raw, default=default, assume_utc=assume_utc, as_utc=as_utc))
+
+
+def strptime(val, fmt, assume_utc=False, as_utc=True):
+    dt = datetime.strptime(val, fmt)
+    if dt.tzinfo is None:
+        dt = dt.replace(tzinfo=_utc_tz if assume_utc else _local_tz)
+    return dt.astimezone(_utc_tz if as_utc else _local_tz)
+
+
+def dt_factory(time_t, assume_utc=False, as_utc=True):
+    dt = datetime(*(time_t[0:6]))
+    if dt.tzinfo is None:
+        dt = dt.replace(tzinfo=_utc_tz if assume_utc else _local_tz)
+    return dt.astimezone(_utc_tz if as_utc else _local_tz)
+
+
+safeyear = lambda x: min(max(x, MINYEAR), MAXYEAR)
+
+
+def qt_to_dt(qdate_or_qdatetime, as_utc=True):
+    o = qdate_or_qdatetime
+    if hasattr(o, 'toUTC'):
+        # QDateTime
+        o = o.toUTC()
+        d, t = o.date(), o.time()
+        try:
+            ans = datetime(safeyear(d.year()), d.month(), d.day(), t.hour(), t.minute(), t.second(), t.msec()*1000, utc_tz)
+        except ValueError:
+            ans = datetime(safeyear(d.year()), d.month(), 1, t.hour(), t.minute(), t.second(), t.msec()*1000, utc_tz)
+        if not as_utc:
+            ans = ans.astimezone(local_tz)
+        return ans
+
+    try:
+        dt = datetime(safeyear(o.year()), o.month(), o.day()).replace(tzinfo=_local_tz)
+    except ValueError:
+        dt = datetime(safeyear(o.year()), o.month(), 1).replace(tzinfo=_local_tz)
+    return dt.astimezone(_utc_tz if as_utc else _local_tz)
+
+
+def fromtimestamp(ctime, as_utc=True):
+    dt = datetime.utcfromtimestamp(ctime).replace(tzinfo=_utc_tz)
+    if not as_utc:
+        dt = dt.astimezone(_local_tz)
+    return dt
+
+
+def fromordinal(day, as_utc=True):
+    return datetime.fromordinal(day).replace(
+            tzinfo=_utc_tz if as_utc else _local_tz)
+
+
+def isoformat(date_time, assume_utc=False, as_utc=True, sep='T'):
+    if not hasattr(date_time, 'tzinfo'):
+        return unicode_type(date_time.isoformat())
+    if date_time.tzinfo is None:
+        date_time = date_time.replace(tzinfo=_utc_tz if assume_utc else
+                _local_tz)
+    date_time = date_time.astimezone(_utc_tz if as_utc else _local_tz)
+    # native_string_type(sep) because isoformat barfs with unicode sep on python 2.x
+    return unicode_type(date_time.isoformat(native_string_type(sep)))
+
+
+def internal_iso_format_string():
+    return 'yyyy-MM-ddThh:mm:ss'
+
+
+def w3cdtf(date_time, assume_utc=False):
+    if hasattr(date_time, 'tzinfo'):
+        if date_time.tzinfo is None:
+            date_time = date_time.replace(tzinfo=_utc_tz if assume_utc else
+                    _local_tz)
+        date_time = date_time.astimezone(_utc_tz if as_utc else _local_tz)
+    return unicode_type(date_time.strftime('%Y-%m-%dT%H:%M:%SZ'))
+
+
+def as_local_time(date_time, assume_utc=True):
+    if not hasattr(date_time, 'tzinfo'):
+        return date_time
+    if date_time.tzinfo is None:
+        date_time = date_time.replace(tzinfo=_utc_tz if assume_utc else
+                _local_tz)
+    return date_time.astimezone(_local_tz)
+
+
+def dt_as_local(dt):
+    if dt.tzinfo is local_tz:
+        return dt
+    return dt.astimezone(local_tz)
+
+
+def as_utc(date_time, assume_utc=True):
+    if not hasattr(date_time, 'tzinfo'):
+        return date_time
+    if date_time.tzinfo is None:
+        date_time = date_time.replace(tzinfo=_utc_tz if assume_utc else
+                _local_tz)
+    return date_time.astimezone(_utc_tz)
+
+
+def now():
+    return datetime.now().replace(tzinfo=_local_tz)
+
+
+def utcnow():
+    return datetime.utcnow().replace(tzinfo=_utc_tz)
+
+
+def utcfromtimestamp(stamp):
+    try:
+        return datetime.utcfromtimestamp(stamp).replace(tzinfo=_utc_tz)
+    except ValueError:
+        # Raised if stamp is out of range for the platforms gmtime function
+        # For example, this happens with negative values on windows
+        try:
+            return EPOCH + timedelta(seconds=stamp)
+        except (ValueError, OverflowError):
+            # datetime can only represent years between 1 and 9999
+            import traceback
+            traceback.print_exc()
+    return utcnow()
+
+
+def timestampfromdt(dt, assume_utc=True):
+    return (as_utc(dt, assume_utc=assume_utc) - EPOCH).total_seconds()
+
+# Format date functions {{{
+
+
+def fd_format_hour(dt, ampm, hr):
+    l = len(hr)
+    h = dt.hour
+    if ampm:
+        h = h%12
+    if l == 1:
+        return '%d'%h
+    return '%02d'%h
+
+
+def fd_format_minute(dt, ampm, min):
+    l = len(min)
+    if l == 1:
+        return '%d'%dt.minute
+    return '%02d'%dt.minute
+
+
+def fd_format_second(dt, ampm, sec):
+    l = len(sec)
+    if l == 1:
+        return '%d'%dt.second
+    return '%02d'%dt.second
+
+
+def fd_format_ampm(dt, ampm, ap):
+    res = strftime('%p', t=dt.timetuple())
+    if ap == 'AP':
+        return res
+    return res.lower()
+
+
+def fd_format_day(dt, ampm, dy):
+    l = len(dy)
+    if l == 1:
+        return '%d'%dt.day
+    if l == 2:
+        return '%02d'%dt.day
+    return lcdata['abday' if l == 3 else 'day'][(dt.weekday() + 1) % 7]
+
+
+def fd_format_month(dt, ampm, mo):
+    l = len(mo)
+    if l == 1:
+        return '%d'%dt.month
+    if l == 2:
+        return '%02d'%dt.month
+    return lcdata['abmon' if l == 3 else 'mon'][dt.month - 1]
+
+
+def fd_format_year(dt, ampm, yr):
+    if len(yr) == 2:
+        return '%02d'%(dt.year % 100)
+    return '%04d'%dt.year
+
+
+fd_function_index = {
+        'd': fd_format_day,
+        'M': fd_format_month,
+        'y': fd_format_year,
+        'h': fd_format_hour,
+        'm': fd_format_minute,
+        's': fd_format_second,
+        'a': fd_format_ampm,
+        'A': fd_format_ampm,
+    }
+
+
+def fd_repl_func(dt, ampm, mo):
+    s = mo.group(0)
+    if not s:
+        return ''
+    return fd_function_index[s[0]](dt, ampm, s)
+
+
+def format_date(dt, format, assume_utc=False, as_utc=False):
+    ''' Return a date formatted as a string using a subset of Qt's formatting codes '''
+    if not format:
+        format = 'dd MMM yyyy'
+
+    if not isinstance(dt, datetime):
+        dt = datetime.combine(dt, dtime())
+
+    if hasattr(dt, 'tzinfo'):
+        if dt.tzinfo is None:
+            dt = dt.replace(tzinfo=_utc_tz if assume_utc else
+                    _local_tz)
+        dt = dt.astimezone(_utc_tz if as_utc else _local_tz)
+
+    if format == 'iso':
+        return isoformat(dt, assume_utc=assume_utc, as_utc=as_utc)
+
+    if dt == UNDEFINED_DATE:
+        return ''
+
+    repl_func = partial(fd_repl_func, dt, 'ap' in format.lower())
+    return re.sub(
+        '(s{1,2})|(m{1,2})|(h{1,2})|(ap)|(AP)|(d{1,4}|M{1,4}|(?:yyyy|yy))',
+        repl_func, format)
+
+# }}}
+
+# Clean date functions {{{
+
+
+def cd_has_hour(tt, dt):
+    tt['hour'] = dt.hour
+    return ''
+
+
+def cd_has_minute(tt, dt):
+    tt['min'] = dt.minute
+    return ''
+
+
+def cd_has_second(tt, dt):
+    tt['sec'] = dt.second
+    return ''
+
+
+def cd_has_day(tt, dt):
+    tt['day'] = dt.day
+    return ''
+
+
+def cd_has_month(tt, dt):
+    tt['mon'] = dt.month
+    return ''
+
+
+def cd_has_year(tt, dt):
+    tt['year'] = dt.year
+    return ''
+
+
+cd_function_index = {
+        'd': cd_has_day,
+        'M': cd_has_month,
+        'y': cd_has_year,
+        'h': cd_has_hour,
+        'm': cd_has_minute,
+        's': cd_has_second
+    }
+
+
+def cd_repl_func(tt, dt, match_object):
+    s = match_object.group(0)
+    if not s:
+        return ''
+    return cd_function_index[s[0]](tt, dt)
+
+
+def clean_date_for_sort(dt, fmt=None):
+    ''' Return dt with fields not in shown in format set to a default '''
+    if not fmt:
+        fmt = 'yyMd'
+
+    if not isinstance(dt, datetime):
+        dt = datetime.combine(dt, dtime())
+
+    if hasattr(dt, 'tzinfo'):
+        if dt.tzinfo is None:
+            dt = dt.replace(tzinfo=_local_tz)
+        dt = as_local_time(dt)
+
+    if fmt == 'iso':
+        fmt = 'yyMdhms'
+
+    tt = {'year':UNDEFINED_DATE.year, 'mon':UNDEFINED_DATE.month,
+          'day':UNDEFINED_DATE.day, 'hour':UNDEFINED_DATE.hour,
+          'min':UNDEFINED_DATE.minute, 'sec':UNDEFINED_DATE.second}
+
+    repl_func = partial(cd_repl_func, tt, dt)
+    re.sub('(s{1,2})|(m{1,2})|(h{1,2})|(d{1,4}|M{1,4}|(?:yyyy|yy))', repl_func, fmt)
+    return dt.replace(year=tt['year'], month=tt['mon'], day=tt['day'], hour=tt['hour'],
+                      minute=tt['min'], second=tt['sec'], microsecond=0)
+# }}}
+
+
+def replace_months(datestr, clang):
+    # Replace months by english equivalent for parse_date
+    frtoen = {
+        '[jJ]anvier': 'jan',
+        '[fF].vrier': 'feb',
+        '[mM]ars': 'mar',
+        '[aA]vril': 'apr',
+        '[mM]ai': 'may',
+        '[jJ]uin': 'jun',
+        '[jJ]uillet': 'jul',
+        '[aA]o.t': 'aug',
+        '[sS]eptembre': 'sep',
+        '[Oo]ctobre': 'oct',
+        '[nN]ovembre': 'nov',
+        '[dD].cembre': 'dec'}
+    detoen = {
+        '[jJ]anuar': 'jan',
+        '[fF]ebruar': 'feb',
+        '[mM].rz': 'mar',
+        '[aA]pril': 'apr',
+        '[mM]ai': 'may',
+        '[jJ]uni': 'jun',
+        '[jJ]uli': 'jul',
+        '[aA]ugust': 'aug',
+        '[sS]eptember': 'sep',
+        '[Oo]ktober': 'oct',
+        '[nN]ovember': 'nov',
+        '[dD]ezember': 'dec'}
+
+    if clang == 'fr':
+        dictoen = frtoen
+    elif clang == 'de':
+        dictoen = detoen
+    else:
+        return datestr
+
+    for k in dictoen:
+        tmp = re.sub(k, dictoen[k], datestr)
+        if tmp != datestr:
+            break
+    return tmp
--- a/ebook_converter/utils/default_tweaks.py
+++ b/ebook_converter/utils/default_tweaks.py
@@ -0,0 +1,568 @@
+#!/usr/bin/env python2
+# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
+# License: GPLv3 Copyright: 2010, Kovid Goyal <kovid at kovidgoyal.net>
+from __future__ import unicode_literals
+
+# Contains various tweaks that affect calibre behavior. Only edit this file if
+# you know what you are doing. If you delete this file, it will be recreated from
+# defaults.
+
+#: Auto increment series index
+# The algorithm used to assign a book added to an existing series a series number.
+# New series numbers assigned using this tweak are always integer values, except
+# if a constant non-integer is specified.
+# Possible values are:
+#   next - First available integer larger than the largest existing number
+#   first_free - First available integer larger than 0
+#   next_free - First available integer larger than the smallest existing number
+#   last_free - First available integer smaller than the largest existing number. Return largest existing + 1 if no free number is found
+#   const - Assign the number 1 always
+#   no_change - Do not change the series index
+#   a number - Assign that number always. The number is not in quotes. Note that 0.0 can be used here.
+# Examples:
+#   series_index_auto_increment = 'next'
+#   series_index_auto_increment = 'next_free'
+#   series_index_auto_increment = 16.5
+#
+# Set the use_series_auto_increment_tweak_when_importing tweak to True to
+# use the above values when importing/adding books. If this tweak is set to
+# False (the default) then the series number will be set to 1 if it is not
+# explicitly set during the import. If set to True, then the
+# series index will be set according to the series_index_auto_increment setting.
+# Note that the use_series_auto_increment_tweak_when_importing tweak is used
+# only when a value is not provided during import. If the importing regular
+# expression produces a value for series_index, or if you are reading metadata
+# from books and the import plugin produces a value, than that value will
+# be used irrespective of the setting of the tweak.
+series_index_auto_increment = 'next'
+use_series_auto_increment_tweak_when_importing = False
+
+#: Add separator after completing an author name
+# Should the completion separator be append
+# to the end of the completed text to
+# automatically begin a new completion operation
+# for authors.
+# Can be either True or False
+authors_completer_append_separator = False
+
+#: Author sort name algorithm
+# The algorithm used to copy author to author_sort.
+# Possible values are:
+#  invert: use "fn ln" -> "ln, fn"
+#  copy  : copy author to author_sort without modification
+#  comma : use 'copy' if there is a ',' in the name, otherwise use 'invert'
+#  nocomma : "fn ln" -> "ln fn" (without the comma)
+# When this tweak is changed, the author_sort values stored with each author
+# must be recomputed by right-clicking on an author in the left-hand tags pane,
+# selecting 'manage authors', and pressing 'Recalculate all author sort values'.
+# The author name suffixes are words that are ignored when they occur at the
+# end of an author name. The case of the suffix is ignored and trailing
+# periods are automatically handled. The same is true for prefixes.
+# The author name copy words are a set of words which if they occur in an
+# author name cause the automatically generated author sort string to be
+# identical to the author name. This means that the sort for a string like Acme
+# Inc. will be Acme Inc. instead of Inc., Acme
+author_sort_copy_method = 'comma'
+author_name_suffixes = ('Jr', 'Sr', 'Inc', 'Ph.D', 'Phd',
+                        'MD', 'M.D', 'I', 'II', 'III', 'IV',
+                        'Junior', 'Senior')
+author_name_prefixes = ('Mr', 'Mrs', 'Ms', 'Dr', 'Prof')
+author_name_copywords = ('Corporation', 'Company', 'Co.', 'Agency', 'Council',
+        'Committee', 'Inc.', 'Institute', 'Society', 'Club', 'Team')
+
+#: Splitting multiple author names
+# By default, calibre splits a string containing multiple author names on
+# ampersands and the words "and" and "with". You can customize the splitting
+# by changing the regular expression below. Strings are split on whatever the
+# specified regular expression matches, in addition to ampersands.
+# Default: r'(?i),?\s+(and|with)\s+'
+authors_split_regex = r'(?i),?\s+(and|with)\s+'
+
+#: Use author sort in Tag browser
+# Set which author field to display in the tags pane (the list of authors,
+# series, publishers etc on the left hand side). The choices are author and
+# author_sort. This tweak affects only what is displayed under the authors
+# category in the tags pane and Content server. Please note that if you set this
+# to author_sort, it is very possible to see duplicate names in the list because
+# although it is guaranteed that author names are unique, there is no such
+# guarantee for author_sort values. Showing duplicates won't break anything, but
+# it could lead to some confusion. When using 'author_sort', the tooltip will
+# show the author's name.
+# Examples:
+#   categories_use_field_for_author_name = 'author'
+#   categories_use_field_for_author_name = 'author_sort'
+categories_use_field_for_author_name = 'author'
+
+#: Control partitioning of Tag browser
+# When partitioning the tags browser, the format of the subcategory label is
+# controlled by a template: categories_collapsed_name_template if sorting by
+# name, categories_collapsed_rating_template if sorting by average rating, and
+# categories_collapsed_popularity_template if sorting by popularity. There are
+# two variables available to the template: first and last. The variable 'first'
+# is the initial item in the subcategory, and the variable 'last' is the final
+# item in the subcategory. Both variables are 'objects'; they each have multiple
+# values that are obtained by using a suffix. For example, first.name for an
+# author category will be the name of the author. The sub-values available are:
+#  name: the printable name of the item
+#  count: the number of books that references this item
+#  avg_rating: the average rating of all the books referencing this item
+#  sort: the sort value. For authors, this is the author_sort for that author
+#  category: the category (e.g., authors, series) that the item is in.
+# Note that the "r'" in front of the { is necessary if there are backslashes
+# (\ characters) in the template. It doesn't hurt anything to leave it there
+# even if there aren't any backslashes.
+categories_collapsed_name_template = r'{first.sort:shorten(4,,0)} - {last.sort:shorten(4,,0)}'
+categories_collapsed_rating_template = r'{first.avg_rating:4.2f:ifempty(0)} - {last.avg_rating:4.2f:ifempty(0)}'
+categories_collapsed_popularity_template = r'{first.count:d} - {last.count:d}'
+
+#: Control order of categories in the Tag browser
+# Change the following dict to change the order that categories are displayed in
+# the Tag browser. Items are named using their lookup name, and will be sorted
+# using the number supplied. The lookup name '*' stands for all names that
+# otherwise do not appear. Two names with the same value will be sorted
+# using the default order; the one used when the dict is empty.
+# Example: tag_browser_category_order = {'series':1, 'tags':2, '*':3}
+# resulting in the order series, tags, then everything else in default order.
+tag_browser_category_order = {'*':1}
+
+
+#: Specify columns to sort the booklist by on startup
+# Provide a set of columns to be sorted on when calibre starts.
+# The argument is None if saved sort history is to be used
+# otherwise it is a list of column,order pairs. Column is the
+# lookup/search name, found using the tooltip for the column
+# Order is 0 for ascending, 1 for descending.
+# For example, set it to [('authors',0),('title',0)] to sort by
+# title within authors.
+sort_columns_at_startup = None
+
+#: Control how dates are displayed
+# Format to be used for publication date and the timestamp (date).
+#  A string controlling how the publication date is displayed in the GUI
+#  d     the day as number without a leading zero (1 to 31)
+#  dd    the day as number with a leading zero (01 to 31)
+#  ddd   the abbreviated localized day name (e.g. 'Mon' to 'Sun').
+#  dddd  the long localized day name (e.g. 'Monday' to 'Sunday').
+#  M     the month as number without a leading zero (1-12)
+#  MM    the month as number with a leading zero (01-12)
+#  MMM   the abbreviated localized month name (e.g. 'Jan' to 'Dec').
+#  MMMM  the long localized month name (e.g. 'January' to 'December').
+#  yy    the year as two digit number (00-99)
+#  yyyy  the year as four digit number
+#  h     the hours without a leading 0 (0 to 11 or 0 to 23, depending on am/pm) '
+#  hh    the hours with a leading 0 (00 to 11 or 00 to 23, depending on am/pm) '
+#  m     the minutes without a leading 0 (0 to 59) '
+#  mm    the minutes with a leading 0 (00 to 59) '
+#  s     the seconds without a leading 0 (0 to 59) '
+#  ss    the seconds with a leading 0 (00 to 59) '
+#  ap    use a 12-hour clock instead of a 24-hour clock, with "ap" replaced by the localized string for am or pm
+#  AP    use a 12-hour clock instead of a 24-hour clock, with "AP" replaced by the localized string for AM or PM
+#  iso   the date with time and timezone. Must be the only format present
+#  For example, given the date of 9 Jan 2010, the following formats show
+#  MMM yyyy ==> Jan 2010    yyyy ==> 2010       dd MMM yyyy ==> 09 Jan 2010
+#  MM/yyyy ==> 01/2010      d/M/yy ==> 9/1/10   yy ==> 10
+#
+# publication default if not set: MMM yyyy
+# timestamp default if not set: dd MMM yyyy
+# last_modified_display_format if not set: dd MMM yyyy
+gui_pubdate_display_format = 'MMM yyyy'
+gui_timestamp_display_format = 'dd MMM yyyy'
+gui_last_modified_display_format = 'dd MMM yyyy'
+
+#: Control sorting of titles and series in the library display
+# Control title and series sorting in the library view. If set to
+# 'library_order', the title sort field will be used instead of the title.
+# Unless you have manually edited the title sort field, leading articles such as
+# The and A will be ignored. If set to 'strictly_alphabetic', the titles will be
+# sorted as-is (sort by title instead of title sort). For example, with
+# library_order, The Client will sort under 'C'. With strictly_alphabetic, the
+# book will sort under 'T'.
+# This flag affects calibre's library display. It has no effect on devices. In
+# addition, titles for books added before changing the flag will retain their
+# order until the title is edited. Editing a title and hitting return
+# without changing anything is sufficient to change the sort. Or you can use
+# the 'Update title sort' action in the Bulk metadata edit dialog to update
+# it for many books at once.
+title_series_sorting = 'library_order'
+
+#: Control formatting of title and series when used in templates
+# Control how title and series names are formatted when saving to disk/sending
+# to device. The behavior depends on the field being processed. If processing
+# title, then if this tweak is set to 'library_order', the title will be
+# replaced with title_sort. If it is set to 'strictly_alphabetic', then the
+# title will not be changed. If processing series, then if set to
+# 'library_order', articles such as 'The' and 'An' will be moved to the end. If
+# set to 'strictly_alphabetic', the series will be sent without change.
+# For example, if the tweak is set to library_order, "The Lord of the Rings"
+# will become "Lord of the Rings, The". If the tweak is set to
+# strictly_alphabetic, it would remain "The Lord of the Rings". Note that the
+# formatter function raw_field will return the base value for title and
+# series regardless of the setting of this tweak.
+save_template_title_series_sorting = 'library_order'
+
+#: Set the list of words considered to be "articles" for sort strings
+# Set the list of words that are to be considered 'articles' when computing the
+# title sort strings. The articles differ by language. By default, calibre uses
+# a combination of articles from English and whatever language the calibre user
+# interface is set to. In addition, in some contexts where the book language is
+# available, the language of the book is used. You can change the list of
+# articles for a given language or add a new language by editing
+# per_language_title_sort_articles. To tell calibre to use a language other
+# than the user interface language, set, default_language_for_title_sort. For
+# example, to use German, set it to 'deu'. A value of None means the user
+# interface language is used. The setting title_sort_articles is ignored
+# (present only for legacy reasons).
+per_language_title_sort_articles = {
+        # English
+        'eng'  : (r'A\s+', r'The\s+', r'An\s+'),
+        # Esperanto
+        'epo': (r'La\s+', r"L'", 'L´'),
+        # Spanish
+        'spa'  : (r'El\s+', r'La\s+', r'Lo\s+', r'Los\s+', r'Las\s+', r'Un\s+',
+                  r'Una\s+', r'Unos\s+', r'Unas\s+'),
+        # French
+        'fra'  : (r'Le\s+', r'La\s+', r"L'", u'L´', u'L’', r'Les\s+', r'Un\s+', r'Une\s+',
+                  r'Des\s+', r'De\s+La\s+', r'De\s+', r"D'", u'D´', u'L’'),
+        # Italian
+        'ita': ('Lo\\s+', 'Il\\s+', "L'", 'L´', 'La\\s+', 'Gli\\s+',
+                'I\\s+', 'Le\\s+', 'Uno\\s+', 'Un\\s+', 'Una\\s+', "Un'",
+                'Un´', 'Dei\\s+', 'Degli\\s+', 'Delle\\s+', 'Del\\s+',
+                'Della\\s+', 'Dello\\s+', "Dell'", 'Dell´'),
+        # Portuguese
+        'por'  : (r'A\s+', r'O\s+', r'Os\s+', r'As\s+', r'Um\s+', r'Uns\s+',
+                  r'Uma\s+', r'Umas\s+', ),
+        # Romanian
+        'ron'  : (r'Un\s+', r'O\s+', r'Nişte\s+', ),
+        # German
+        'deu'  : (r'Der\s+', r'Die\s+', r'Das\s+', r'Den\s+', r'Ein\s+',
+                  r'Eine\s+', r'Einen\s+', r'Dem\s+', r'Des\s+', r'Einem\s+',
+                  r'Eines\s+'),
+        # Dutch
+        'nld'  : (r'De\s+', r'Het\s+', r'Een\s+', r"'n\s+", r"'s\s+", r'Ene\s+',
+                  r'Ener\s+', r'Enes\s+', r'Den\s+', r'Der\s+', r'Des\s+',
+                  r"'t\s+"),
+        # Swedish
+        'swe'  : (r'En\s+', r'Ett\s+', r'Det\s+', r'Den\s+', r'De\s+', ),
+        # Turkish
+        'tur'  : (r'Bir\s+', ),
+        # Afrikaans
+        'afr'  : (r"'n\s+", r'Die\s+', ),
+        # Greek
+        'ell'  : (r'O\s+', r'I\s+', r'To\s+', r'Ta\s+', r'Tus\s+', r'Tis\s+',
+                  r"'Enas\s+", r"'Mia\s+", r"'Ena\s+", r"'Enan\s+", ),
+        # Hungarian
+        'hun'  : (r'A\s+', r'Az\s+', r'Egy\s+',),
+}
+default_language_for_title_sort = None
+title_sort_articles=r'^(A|The|An)\s+'
+
+#: Specify a folder calibre should connect to at startup
+# Specify a folder that calibre should connect to at startup using
+# connect_to_folder. This must be a full path to the folder. If the folder does
+# not exist when calibre starts, it is ignored.
+# Example for Windows:
+#     auto_connect_to_folder = 'C:/Users/someone/Desktop/testlib'
+# Example for other operating systems:
+#     auto_connect_to_folder = '/home/dropbox/My Dropbox/someone/library'
+auto_connect_to_folder = ''
+
+#: Specify renaming rules for SONY collections
+# Specify renaming rules for sony collections. This tweak is only applicable if
+# metadata management is set to automatic. Collections on Sonys are named
+# depending upon whether the field is standard or custom. A collection derived
+# from a standard field is named for the value in that field. For example, if
+# the standard 'series' column contains the value 'Darkover', then the
+# collection name is 'Darkover'. A collection derived from a custom field will
+# have the name of the field added to the value. For example, if a custom series
+# column named 'My Series' contains the name 'Darkover', then the collection
+# will by default be named 'Darkover (My Series)'. For purposes of this
+# documentation, 'Darkover' is called the value and 'My Series' is called the
+# category. If two books have fields that generate the same collection name,
+# then both books will be in that collection.
+# This set of tweaks lets you specify for a standard or custom field how
+# the collections are to be named. You can use it to add a description to a
+# standard field, for example 'Foo (Tag)' instead of the 'Foo'. You can also use
+# it to force multiple fields to end up in the same collection. For example, you
+# could force the values in 'series', '#my_series_1', and '#my_series_2' to
+# appear in collections named 'some_value (Series)', thereby merging all of the
+# fields into one set of collections.
+# There are two related tweaks. The first determines the category name to use
+# for a metadata field.  The second is a template, used to determines how the
+# value and category are combined to create the collection name.
+# The syntax of the first tweak, sony_collection_renaming_rules, is:
+# {'field_lookup_name':'category_name_to_use', 'lookup_name':'name', ...}
+# The second tweak, sony_collection_name_template, is a template. It uses the
+# same template language as plugboards and save templates. This tweak controls
+# how the value and category are combined together to make the collection name.
+# The only two fields available are {category} and {value}. The {value} field is
+# never empty. The {category} field can be empty. The default is to put the
+# value first, then the category enclosed in parentheses, it isn't empty:
+# '{value} {category:|(|)}'
+# Examples: The first three examples assume that the second tweak
+# has not been changed.
+#  1: I want three series columns to be merged into one set of collections. The
+#  column lookup names are 'series', '#series_1' and '#series_2'. I want nothing
+#  in the parenthesis. The value to use in the tweak value would be:
+#    sony_collection_renaming_rules={'series':'', '#series_1':'', '#series_2':''}
+#  2: I want the word '(Series)' to appear on collections made from series, and
+#  the word '(Tag)' to appear on collections made from tags. Use:
+#    sony_collection_renaming_rules={'series':'Series', 'tags':'Tag'}
+#  3: I want 'series' and '#myseries' to be merged, and for the collection name
+#  to have '(Series)' appended. The renaming rule is:
+#    sony_collection_renaming_rules={'series':'Series', '#myseries':'Series'}
+#  4: Same as example 2, but instead of having the category name in parentheses
+#  and appended to the value, I want it prepended and separated by a colon, such
+#  as in Series: Darkover. I must change the template used to format the category name
+#  The resulting two tweaks are:
+#    sony_collection_renaming_rules={'series':'Series', 'tags':'Tag'}
+#    sony_collection_name_template='{category:||: }{value}'
+sony_collection_renaming_rules={}
+sony_collection_name_template='{value}{category:| (|)}'
+
+#: Specify how SONY collections are sorted
+# Specify how sony collections are sorted. This tweak is only applicable if
+# metadata management is set to automatic. You can indicate which metadata is to
+# be used to sort on a collection-by-collection basis. The format of the tweak
+# is a list of metadata fields from which collections are made, followed by the
+# name of the metadata field containing the sort value.
+# Example: The following indicates that collections built from pubdate and tags
+# are to be sorted by the value in the custom column '#mydate', that collections
+# built from 'series' are to be sorted by 'series_index', and that all other
+# collections are to be sorted by title. If a collection metadata field is not
+# named, then if it is a series- based collection it is sorted by series order,
+# otherwise it is sorted by title order.
+# [(['pubdate', 'tags'],'#mydate'), (['series'],'series_index'), (['*'], 'title')]
+# Note that the bracketing and parentheses are required. The syntax is
+# [ ( [list of fields], sort field ) , ( [ list of fields ] , sort field ) ]
+# Default: empty (no rules), so no collection attributes are named.
+sony_collection_sorting_rules = []
+
+#: Control how tags are applied when copying books to another library
+# Set this to True to ensure that tags in 'Tags to add when adding
+# a book' are added when copying books to another library
+add_new_book_tags_when_importing_books = False
+
+#: Set custom metadata fields that the Content server will or will not display.
+# Controls what fields are displayed when clicking the "Search" button in the
+# browser to search your calibre library.
+# content_server_will_display is a list of custom fields to be displayed.
+# content_server_wont_display is a list of custom fields not to be displayed.
+# wont_display has priority over will_display.
+# The special value '*' means all custom fields. The value [] means no entries.
+# Defaults:
+#    content_server_will_display = ['*']
+#    content_server_wont_display = []
+#
+# Examples:
+#
+# To display only the custom fields #mytags and #genre:
+#   content_server_will_display = ['#mytags', '#genre']
+#   content_server_wont_display = []
+#
+# To display all fields except #mycomments:
+#   content_server_will_display = ['*']
+#   content_server_wont_display['#mycomments']
+content_server_will_display = ['*']
+content_server_wont_display = []
+
+#: Set the maximum number of sort 'levels'
+# Set the maximum number of sort 'levels' that calibre will use to resort the
+# library after certain operations such as searches or device insertion. Each
+# sort level adds a performance penalty. If the database is large (thousands of
+# books) the penalty might be noticeable. If you are not concerned about multi-
+# level sorts, and if you are seeing a slowdown, reduce the value of this tweak.
+maximum_resort_levels = 5
+
+#: Choose whether dates are sorted using visible fields
+# Date values contain both a date and a time. When sorted, all the fields are
+# used, regardless of what is displayed. Set this tweak to True to use only
+# the fields that are being displayed.
+sort_dates_using_visible_fields = False
+
+#: Fuzz value for trimming covers
+# The value used for the fuzz distance when trimming a cover.
+# Colors within this distance are considered equal.
+# The distance is in absolute intensity units.
+cover_trim_fuzz_value = 10
+
+#: Control behavior of the book list
+# You can control the behavior of double clicks and pressing enter on the books list.
+# Choices: open_viewer, do_nothing,
+# edit_cell, edit_metadata. Selecting anything other than open_viewer has the
+# side effect of disabling editing a field using a single click.
+# Default: open_viewer.
+# Example: doubleclick_on_library_view = 'do_nothing'
+# You can also control whether the book list scrolls horizontal per column or
+# per pixel. Default is per column.
+doubleclick_on_library_view = 'open_viewer'
+enter_key_behavior = 'do_nothing'
+horizontal_scrolling_per_column = True
+
+#: Language to use when sorting
+# Setting this tweak will force sorting to use the
+# collating order for the specified language. This might be useful if you run
+# calibre in English but want sorting to work in the language where you live.
+# Set the tweak to the desired ISO 639-1 language code, in lower case.
+# You can find the list of supported locales at
+# https://en.wikipedia.org/wiki/List_of_ISO_639-1_codes
+# Default: locale_for_sorting = '' -- use the language calibre displays in
+# Example: locale_for_sorting = 'fr' -- sort using French rules.
+# Example: locale_for_sorting = 'nb' -- sort using Norwegian rules.
+locale_for_sorting =  ''
+
+#: Number of columns for custom metadata in the edit metadata dialog
+# Set whether to use one or two columns for custom metadata when editing
+# metadata  one book at a time. If True, then the fields are laid out using two
+# columns. If False, one column is used.
+metadata_single_use_2_cols_for_custom_fields = True
+
+#: Order of custom column(s) in edit metadata
+# Controls the order that custom columns are listed in edit metadata single
+# and bulk. The columns listed in the tweak are displayed first and in the
+# order provided. Any columns not listed are displayed after the listed ones,
+# in alphabetical order. Do note that this tweak does not change the size of
+# the edit widgets. Putting comments widgets in this list may result in some
+# odd widget spacing when using two-column mode.
+# Enter a comma-separated list of custom field lookup names, as in
+# metadata_edit_custom_column_order = ['#genre', '#mytags', '#etc']
+metadata_edit_custom_column_order = []
+
+#: The number of seconds to wait before sending emails
+# The number of seconds to wait before sending emails when using a
+# public email server like gmx/hotmail/gmail. Default is: 5 minutes
+# Setting it to lower may cause the server's SPAM controls to kick in,
+# making email sending fail. Changes will take effect only after a restart of
+# calibre. You can also change the list of hosts that calibre considers
+# to be public relays here. Any relay host ending with one of the suffixes
+# in the list below will be considered a public email server.
+public_smtp_relay_delay = 301
+public_smtp_relay_host_suffixes = ['gmail.com', 'live.com', 'gmx.com']
+
+#: The maximum width and height for covers saved in the calibre library
+# All covers in the calibre library will be resized, preserving aspect ratio,
+# to fit within this size. This is to prevent slowdowns caused by extremely
+# large covers
+maximum_cover_size = (1650, 2200)
+
+#: Where to send downloaded news
+# When automatically sending downloaded news to a connected device, calibre
+# will by default send it to the main memory. By changing this tweak, you can
+# control where it is sent. Valid values are "main", "carda", "cardb". Note
+# that if there isn't enough free space available on the location you choose,
+# the files will be sent to the location with the most free space.
+send_news_to_device_location = "main"
+
+#: Unified toolbar on macOS
+# If you enable this option and restart calibre, the toolbar will be 'unified'
+# with the titlebar as is normal for macOS applications. However, doing this has
+# various bugs, for instance the minimum width of the toolbar becomes twice
+# what it should be and it causes other random bugs on some systems, so turn it
+# on at your own risk!
+unified_title_toolbar_on_osx = False
+
+#: Save original file when converting/polishing from same format to same format
+# When calibre does a conversion from the same format to the same format, for
+# example, from EPUB to EPUB, the original file is saved, so that in case the
+# conversion is poor, you can tweak the settings and run it again. By setting
+# this to False you can prevent calibre from saving the original file.
+# Similarly, by setting save_original_format_when_polishing to False you can
+# prevent calibre from saving the original file when polishing.
+save_original_format = True
+save_original_format_when_polishing = True
+
+#: Number of recently viewed books to show
+# Right-clicking the "View" button shows a list of recently viewed books. Control
+# how many should be shown, here.
+gui_view_history_size = 15
+
+#: Change the font size of book details in the interface
+# Change the font size at which book details are rendered in the side panel and
+# comments are rendered in the metadata edit dialog. Set it to a positive or
+# negative number to increase or decrease the font size.
+change_book_details_font_size_by = 0
+
+#: What format to default to when using the "Unpack book" feature
+# The "Unpack book" feature of calibre allows direct editing of a book format.
+# If multiple formats are available, calibre will offer you a choice
+# of formats, defaulting to your preferred output format if it is available.
+# Set this tweak to a specific value of 'EPUB' or 'AZW3' to always default
+# to that format rather than your output format preference.
+# Set to a value of 'remember' to use whichever format you chose last time you
+# used the "Unpack book" feature.
+# Examples:
+#   default_tweak_format = None       (Use output format)
+#   default_tweak_format = 'EPUB'
+#   default_tweak_format = 'remember'
+default_tweak_format = None
+
+#: Do not preselect a completion when editing authors/tags/series/etc.
+# This means that you can make changes and press Enter and your changes will
+# not be overwritten by a matching completion. However, if you wish to use the
+# completions you will now have to press Tab to select one before pressing
+# Enter. Which technique you prefer will depend on the state of metadata in
+# your library and your personal editing style.
+preselect_first_completion = False
+
+#: Completion mode when editing authors/tags/series/etc.
+# By default, when completing items, calibre will show you all the candidates
+# that start with the text you have already typed. You can instead have it show
+# all candidates that contain the text you have already typed. To do this, set
+# completion_mode to 'contains'. For example, if you type asi it will match both
+# Asimov and Quasimodo, whereas the default behavior would match only Asimov.
+completion_mode = 'prefix'
+
+#: Recognize numbers inside text when sorting
+# This means that when sorting on text fields like title the text "Book 2"
+# will sort before the text "Book 100". If you want this behavior, set
+# numeric_collation = True note that doing so will cause problems with text
+# that starts with numbers and is a little slower.
+numeric_collation = False
+
+#: Sort the list of libraries alphabetically
+# The list of libraries in the Copy to library and Quick switch menus are
+# normally sorted by most used. However, if there are more than a certain
+# number of such libraries, the sorting becomes alphabetic. You can set that
+# number here. The default is ten libraries.
+many_libraries = 10
+
+#: Choose available output formats for conversion
+# Restrict the list of available output formats in the conversion dialogs.
+# For example, if you only want to convert to EPUB and AZW3, change this to
+# restrict_output_formats = ['EPUB', 'AZW3']. The default value of None causes
+# all available output formats to be present.
+restrict_output_formats = None
+
+#: Set the thumbnail image quality used by the Content server
+# The quality of a thumbnail is largely controlled by the compression quality
+# used when creating it. Set this to a larger number to improve the quality.
+# Note that the thumbnails get much larger with larger compression quality
+# numbers.
+# The value can be between 50 and 99
+content_server_thumbnail_compression_quality = 75
+
+#: Image file types to treat as e-books when dropping onto the "Book details" panel
+# Normally, if you drop any image file in a format known to calibre onto the
+# "Book details" panel, it will be used to set the cover. If you want to store
+# some image types as e-books instead, you can set this tweak.
+# Examples:
+#    cover_drop_exclude = {'tiff', 'webp'}
+cover_drop_exclude = ()
+
+#: Show the Saved searches box in the Search bar
+# In newer versions of calibre, only a single button that allows you to add a
+# new Saved search is shown in the Search bar. If you would like to have the
+# old Saved searches box with its two buttons back, set this tweak to True.
+show_saved_search_box = False
+
+#: Exclude fields when copy/pasting metadata
+# You can ask calibre to not paste some metadata fields when using the
+# Edit metadata->Copy metadata/Paste metadata actions. For example,
+# exclude_fields_on_paste = ['cover', 'timestamp', '#mycolumn']
+# to prevent pasting of the cover, Date and custom column, mycolumn.
+exclude_fields_on_paste = []
+
+#: Skip internet connected check
+# Skip checking whether the internet is available before downloading news.
+# Useful if for some reason your operating systems network checking
+# facilities are not reliable (for example NetworkManager on Linux).
+skip_network_check = False
--- a/ebook_converter/utils/filenames.py
+++ b/ebook_converter/utils/filenames.py
@@ -0,0 +1,642 @@
+from __future__ import absolute_import, division, print_function, unicode_literals
+'''
+Make strings safe for use as ASCII filenames, while trying to preserve as much
+meaning as possible.
+'''
+
+import errno
+import os
+import shutil
+import time
+from math import ceil
+
+from calibre import force_unicode, isbytestring, prints, sanitize_file_name
+from calibre.constants import (
+    filesystem_encoding, iswindows, plugins, preferred_encoding, isosx, ispy3
+)
+from calibre.utils.localization import get_udc
+from polyglot.builtins import iteritems, itervalues, unicode_type, range
+
+
+def ascii_text(orig):
+    udc = get_udc()
+    try:
+        ascii = udc.decode(orig)
+    except Exception:
+        if isinstance(orig, unicode_type):
+            orig = orig.encode('ascii', 'replace')
+        ascii = orig.decode(preferred_encoding, 'replace')
+    if isinstance(ascii, bytes):
+        ascii = ascii.decode('ascii', 'replace')
+    return ascii
+
+
+def ascii_filename(orig, substitute='_'):
+    if isinstance(substitute, bytes):
+        substitute = substitute.decode(filesystem_encoding)
+    orig = ascii_text(orig).replace('?', '_')
+    ans = ''.join(x if ord(x) >= 32 else substitute for x in orig)
+    return sanitize_file_name(ans, substitute=substitute)
+
+
+def shorten_component(s, by_what):
+    l = len(s)
+    if l < by_what:
+        return s
+    l = (l - by_what)//2
+    if l <= 0:
+        return s
+    return s[:l] + s[-l:]
+
+
+def limit_component(x, limit=254):
+    # windows and macs use ytf-16 codepoints for length, linux uses arbitrary
+    # binary data, but we will assume utf-8
+    filename_encoding_for_length = 'utf-16' if iswindows or isosx else 'utf-8'
+
+    def encoded_length():
+        q = x if isinstance(x, bytes) else x.encode(filename_encoding_for_length)
+        return len(q)
+
+    while encoded_length() > limit:
+        delta = encoded_length() - limit
+        x = shorten_component(x, max(2, delta // 2))
+
+    return x
+
+
+def shorten_components_to(length, components, more_to_take=0, last_has_extension=True):
+    components = [limit_component(cx) for cx in components]
+    filepath = os.sep.join(components)
+    extra = len(filepath) - (length - more_to_take)
+    if extra < 1:
+        return components
+    deltas = []
+    for x in components:
+        pct = len(x)/float(len(filepath))
+        deltas.append(int(ceil(pct*extra)))
+    ans = []
+
+    for i, x in enumerate(components):
+        delta = deltas[i]
+        if delta > len(x):
+            r = x[0] if x is components[-1] else ''
+        else:
+            if last_has_extension and x is components[-1]:
+                b, e = os.path.splitext(x)
+                if e == '.':
+                    e = ''
+                r = shorten_component(b, delta)+e
+                if r.startswith('.'):
+                    r = x[0]+r
+            else:
+                r = shorten_component(x, delta)
+            r = r.strip()
+            if not r:
+                r = x.strip()[0] if x.strip() else 'x'
+        ans.append(r)
+    if len(os.sep.join(ans)) > length:
+        return shorten_components_to(length, components, more_to_take+2)
+    return ans
+
+
+def find_executable_in_path(name, path=None):
+    if path is None:
+        path = os.environ.get('PATH', '')
+    exts = '.exe .cmd .bat'.split() if iswindows and not name.endswith('.exe') else ('',)
+    path = path.split(os.pathsep)
+    for x in path:
+        for ext in exts:
+            q = os.path.abspath(os.path.join(x, name)) + ext
+            if os.access(q, os.X_OK):
+                return q
+
+
+def is_case_sensitive(path):
+    '''
+    Return True if the filesystem is case sensitive.
+
+    path must be the path to an existing directory. You must have permission
+    to create and delete files in this directory. The results of this test
+    apply to the filesystem containing the directory in path.
+    '''
+    is_case_sensitive = False
+    if not iswindows:
+        name1, name2 = ('calibre_test_case_sensitivity.txt',
+                        'calibre_TesT_CaSe_sensitiVitY.Txt')
+        f1, f2 = os.path.join(path, name1), os.path.join(path, name2)
+        if os.path.exists(f1):
+            os.remove(f1)
+        open(f1, 'w').close()
+        is_case_sensitive = not os.path.exists(f2)
+        os.remove(f1)
+    return is_case_sensitive
+
+
+def case_preserving_open_file(path, mode='wb', mkdir_mode=0o777):
+    '''
+    Open the file pointed to by path with the specified mode. If any
+    directories in path do not exist, they are created. Returns the
+    opened file object and the path to the opened file object. This path is
+    guaranteed to have the same case as the on disk path. For case insensitive
+    filesystems, the returned path may be different from the passed in path.
+    The returned path is always unicode and always an absolute path.
+
+    If mode is None, then this function assumes that path points to a directory
+    and return the path to the directory as the file object.
+
+    mkdir_mode specifies the mode with which any missing directories in path
+    are created.
+    '''
+    if isbytestring(path):
+        path = path.decode(filesystem_encoding)
+
+    path = os.path.abspath(path)
+
+    sep = force_unicode(os.sep, 'ascii')
+
+    if path.endswith(sep):
+        path = path[:-1]
+    if not path:
+        raise ValueError('Path must not point to root')
+
+    components = path.split(sep)
+    if not components:
+        raise ValueError('Invalid path: %r'%path)
+
+    cpath = sep
+    if iswindows:
+        # Always upper case the drive letter and add a trailing slash so that
+        # the first os.listdir works correctly
+        cpath = components[0].upper() + sep
+
+    bdir = path if mode is None else os.path.dirname(path)
+    if not os.path.exists(bdir):
+        os.makedirs(bdir, mkdir_mode)
+
+    # Walk all the directories in path, putting the on disk case version of
+    # the directory into cpath
+    dirs = components[1:] if mode is None else components[1:-1]
+    for comp in dirs:
+        cdir = os.path.join(cpath, comp)
+        cl = comp.lower()
+        try:
+            candidates = [c for c in os.listdir(cpath) if c.lower() == cl]
+        except:
+            # Dont have permission to do the listdir, assume the case is
+            # correct as we have no way to check it.
+            pass
+        else:
+            if len(candidates) == 1:
+                cdir = os.path.join(cpath, candidates[0])
+            # else: We are on a case sensitive file system so cdir must already
+            # be correct
+        cpath = cdir
+
+    if mode is None:
+        ans = fpath = cpath
+    else:
+        fname = components[-1]
+        ans = lopen(os.path.join(cpath, fname), mode)
+        # Ensure file and all its metadata is written to disk so that subsequent
+        # listdir() has file name in it. I don't know if this is actually
+        # necessary, but given the diversity of platforms, best to be safe.
+        ans.flush()
+        os.fsync(ans.fileno())
+
+        cl = fname.lower()
+        try:
+            candidates = [c for c in os.listdir(cpath) if c.lower() == cl]
+        except EnvironmentError:
+            # The containing directory, somehow disappeared?
+            candidates = []
+        if len(candidates) == 1:
+            fpath = os.path.join(cpath, candidates[0])
+        else:
+            # We are on a case sensitive filesystem
+            fpath = os.path.join(cpath, fname)
+    return ans, fpath
+
+
+def windows_get_fileid(path):
+    ''' The fileid uniquely identifies actual file contents (it is the same for
+    all hardlinks to a file). Similar to inode number on linux. '''
+    import win32file
+    from pywintypes import error
+    if isbytestring(path):
+        path = path.decode(filesystem_encoding)
+    try:
+        h = win32file.CreateFileW(path, 0, 0, None, win32file.OPEN_EXISTING,
+                win32file.FILE_FLAG_BACKUP_SEMANTICS, 0)
+        try:
+            data = win32file.GetFileInformationByHandle(h)
+        finally:
+            win32file.CloseHandle(h)
+    except (error, EnvironmentError):
+        return None
+    return data[4], data[8], data[9]
+
+
+def samefile_windows(src, dst):
+    samestring = (os.path.normcase(os.path.abspath(src)) ==
+            os.path.normcase(os.path.abspath(dst)))
+    if samestring:
+        return True
+
+    a, b = windows_get_fileid(src), windows_get_fileid(dst)
+    if a is None and b is None:
+        return False
+    return a == b
+
+
+def samefile(src, dst):
+    '''
+    Check if two paths point to the same actual file on the filesystem. Handles
+    symlinks, case insensitivity, mapped drives, etc.
+
+    Returns True iff both paths exist and point to the same file on disk.
+
+    Note: On windows will return True if the two string are identical (up to
+    case) even if the file does not exist. This is because I have no way of
+    knowing how reliable the GetFileInformationByHandle method is.
+    '''
+    if iswindows:
+        return samefile_windows(src, dst)
+
+    if hasattr(os.path, 'samefile'):
+        # Unix
+        try:
+            return os.path.samefile(src, dst)
+        except EnvironmentError:
+            return False
+
+    # All other platforms: check for same pathname.
+    samestring = (os.path.normcase(os.path.abspath(src)) ==
+            os.path.normcase(os.path.abspath(dst)))
+    return samestring
+
+
+def windows_get_size(path):
+    ''' On windows file sizes are only accurately stored in the actual file,
+    not in the directory entry (which could be out of date). So we open the
+    file, and get the actual size. '''
+    import win32file
+    if isbytestring(path):
+        path = path.decode(filesystem_encoding)
+    h = win32file.CreateFileW(
+        path, 0, win32file.FILE_SHARE_READ | win32file.FILE_SHARE_WRITE | win32file.FILE_SHARE_DELETE,
+        None, win32file.OPEN_EXISTING, 0, None)
+    try:
+        return win32file.GetFileSize(h)
+    finally:
+        win32file.CloseHandle(h)
+
+
+def windows_hardlink(src, dest):
+    import win32file, pywintypes
+    try:
+        win32file.CreateHardLink(dest, src)
+    except pywintypes.error as e:
+        msg = 'Creating hardlink from %s to %s failed: %%s' % (src, dest)
+        raise OSError(msg % e)
+    src_size = os.path.getsize(src)
+    # We open and close dest, to ensure its directory entry is updated
+    # see http://blogs.msdn.com/b/oldnewthing/archive/2011/12/26/10251026.aspx
+    for i in range(10):
+        # If we are on a network filesystem, we have to wait for some indeterminate time, since
+        # network file systems are the best thing since sliced bread
+        try:
+            if windows_get_size(dest) == src_size:
+                return
+        except EnvironmentError:
+            pass
+        time.sleep(0.3)
+
+    sz = windows_get_size(dest)
+    if sz != src_size:
+        msg = 'Creating hardlink from %s to %s failed: %%s' % (src, dest)
+        raise OSError(msg % ('hardlink size: %d not the same as source size' % sz))
+
+
+def windows_fast_hardlink(src, dest):
+    import win32file, pywintypes
+    try:
+        win32file.CreateHardLink(dest, src)
+    except pywintypes.error as e:
+        msg = 'Creating hardlink from %s to %s failed: %%s' % (src, dest)
+        raise OSError(msg % e)
+    ssz, dsz = windows_get_size(src), windows_get_size(dest)
+    if ssz != dsz:
+        msg = 'Creating hardlink from %s to %s failed: %%s' % (src, dest)
+        raise OSError(msg % ('hardlink size: %d not the same as source size: %s' % (dsz, ssz)))
+
+
+def windows_nlinks(path):
+    import win32file
+    dwFlagsAndAttributes = win32file.FILE_FLAG_BACKUP_SEMANTICS if os.path.isdir(path) else 0
+    if isbytestring(path):
+        path = path.decode(filesystem_encoding)
+    handle = win32file.CreateFileW(path, win32file.GENERIC_READ, win32file.FILE_SHARE_READ, None, win32file.OPEN_EXISTING, dwFlagsAndAttributes, None)
+    try:
+        return win32file.GetFileInformationByHandle(handle)[7]
+    finally:
+        handle.Close()
+
+
+class WindowsAtomicFolderMove(object):
+
+    '''
+    Move all the files inside a specified folder in an atomic fashion,
+    preventing any other process from locking a file while the operation is
+    incomplete. Raises an IOError if another process has locked a file before
+    the operation starts. Note that this only operates on the files in the
+    folder, not any sub-folders.
+    '''
+
+    def __init__(self, path):
+        self.handle_map = {}
+
+        import win32file, winerror
+        from pywintypes import error
+        from collections import defaultdict
+
+        if isbytestring(path):
+            path = path.decode(filesystem_encoding)
+
+        if not os.path.exists(path):
+            return
+
+        names = os.listdir(path)
+        name_to_fileid = {x:windows_get_fileid(os.path.join(path, x)) for x in names}
+        fileid_to_names = defaultdict(set)
+        for name, fileid in iteritems(name_to_fileid):
+            fileid_to_names[fileid].add(name)
+
+        for x in names:
+            f = os.path.normcase(os.path.abspath(os.path.join(path, x)))
+            if not os.path.isfile(f):
+                continue
+            try:
+                # Ensure the file is not read-only
+                win32file.SetFileAttributes(f, win32file.FILE_ATTRIBUTE_NORMAL)
+            except:
+                pass
+
+            try:
+                h = win32file.CreateFileW(f, win32file.GENERIC_READ,
+                        win32file.FILE_SHARE_DELETE, None,
+                        win32file.OPEN_EXISTING, win32file.FILE_FLAG_SEQUENTIAL_SCAN, 0)
+            except error as e:
+                if getattr(e, 'winerror', 0) == winerror.ERROR_SHARING_VIOLATION:
+                    # The file could be a hardlink to an already opened file,
+                    # in which case we use the same handle for both files
+                    fileid = name_to_fileid[x]
+                    found = False
+                    if fileid is not None:
+                        for other in fileid_to_names[fileid]:
+                            other = os.path.normcase(os.path.abspath(os.path.join(path, other)))
+                            if other in self.handle_map:
+                                self.handle_map[f] = self.handle_map[other]
+                                found = True
+                                break
+                    if found:
+                        continue
+
+                self.close_handles()
+                if getattr(e, 'winerror', 0) == winerror.ERROR_SHARING_VIOLATION:
+                    err = IOError(errno.EACCES,
+                            _('File is open in another process'))
+                    err.filename = f
+                    raise err
+                prints('CreateFile failed for: %r' % f)
+                raise
+            except:
+                self.close_handles()
+                prints('CreateFile failed for: %r' % f)
+                raise
+            self.handle_map[f] = h
+
+    def copy_path_to(self, path, dest):
+        import win32file
+        handle = None
+        for p, h in iteritems(self.handle_map):
+            if samefile_windows(path, p):
+                handle = h
+                break
+        if handle is None:
+            if os.path.exists(path):
+                raise ValueError('The file %r did not exist when this move'
+                        ' operation was started'%path)
+            else:
+                raise ValueError('The file %r does not exist'%path)
+        try:
+            windows_hardlink(path, dest)
+            return
+        except:
+            pass
+
+        win32file.SetFilePointer(handle, 0, win32file.FILE_BEGIN)
+        with lopen(dest, 'wb') as f:
+            while True:
+                hr, raw = win32file.ReadFile(handle, 1024*1024)
+                if hr != 0:
+                    raise IOError(hr, 'Error while reading from %r'%path)
+                if not raw:
+                    break
+                f.write(raw)
+
+    def release_file(self, path):
+        ' Release the lock on the file pointed to by path. Will also release the lock on any hardlinks to path '
+        key = None
+        for p, h in iteritems(self.handle_map):
+            if samefile_windows(path, p):
+                key = (p, h)
+                break
+        if key is not None:
+            import win32file
+            win32file.CloseHandle(key[1])
+            remove = [f for f, h in iteritems(self.handle_map) if h is key[1]]
+            for x in remove:
+                self.handle_map.pop(x)
+
+    def close_handles(self):
+        import win32file
+        for h in itervalues(self.handle_map):
+            win32file.CloseHandle(h)
+        self.handle_map = {}
+
+    def delete_originals(self):
+        import win32file
+        for path in self.handle_map:
+            win32file.DeleteFile(path)
+        self.close_handles()
+
+
+def hardlink_file(src, dest):
+    if iswindows:
+        windows_hardlink(src, dest)
+        return
+    os.link(src, dest)
+
+
+def nlinks_file(path):
+    ' Return number of hardlinks to the file '
+    if iswindows:
+        return windows_nlinks(path)
+    return os.stat(path).st_nlink
+
+
+if iswindows:
+    def rename_file(a, b):
+        move_file = plugins['winutil'][0].move_file
+        if isinstance(a, bytes):
+            a = a.decode('mbcs')
+        if isinstance(b, bytes):
+            b = b.decode('mbcs')
+        move_file(a, b)
+
+
+def atomic_rename(oldpath, newpath):
+    '''Replace the file newpath with the file oldpath. Can fail if the files
+    are on different volumes. If succeeds, guaranteed to be atomic. newpath may
+    or may not exist. If it exists, it is replaced. '''
+    if iswindows:
+        for i in range(10):
+            try:
+                rename_file(oldpath, newpath)
+                break
+            except Exception:
+                if i > 8:
+                    raise
+                # Try the rename repeatedly in case something like a virus
+                # scanner has opened one of the files (I love windows)
+                time.sleep(1)
+    else:
+        os.rename(oldpath, newpath)
+
+
+def remove_dir_if_empty(path, ignore_metadata_caches=False):
+    ''' Remove a directory if it is empty or contains only the folder metadata
+    caches from different OSes. To delete the folder if it contains only
+    metadata caches, set ignore_metadata_caches to True.'''
+    try:
+        os.rmdir(path)
+    except OSError as e:
+        if e.errno == errno.ENOTEMPTY or len(os.listdir(path)) > 0:
+            # Some linux systems appear to raise an EPERM instead of an
+            # ENOTEMPTY, see https://bugs.launchpad.net/bugs/1240797
+            if ignore_metadata_caches:
+                try:
+                    found = False
+                    for x in os.listdir(path):
+                        if x.lower() in {'.ds_store', 'thumbs.db'}:
+                            found = True
+                            x = os.path.join(path, x)
+                            if os.path.isdir(x):
+                                import shutil
+                                shutil.rmtree(x)
+                            else:
+                                os.remove(x)
+                except Exception:  # We could get an error, if, for example, windows has locked Thumbs.db
+                    found = False
+                if found:
+                    remove_dir_if_empty(path)
+            return
+        raise
+
+
+expanduser = os.path.expanduser
+
+
+def format_permissions(st_mode):
+    import stat
+    for func, letter in (x.split(':') for x in 'REG:- DIR:d BLK:b CHR:c FIFO:p LNK:l SOCK:s'.split()):
+        if getattr(stat, 'S_IS' + func)(st_mode):
+            break
+    else:
+        letter = '?'
+    rwx = ('---', '--x', '-w-', '-wx', 'r--', 'r-x', 'rw-', 'rwx')
+    ans = [letter] + list(rwx[(st_mode >> 6) & 7]) + list(rwx[(st_mode >> 3) & 7]) + list(rwx[(st_mode & 7)])
+    if st_mode & stat.S_ISUID:
+        ans[3] = 's' if (st_mode & stat.S_IXUSR) else 'S'
+    if st_mode & stat.S_ISGID:
+        ans[6] = 's' if (st_mode & stat.S_IXGRP) else 'l'
+    if st_mode & stat.S_ISVTX:
+        ans[9] = 't' if (st_mode & stat.S_IXUSR) else 'T'
+    return ''.join(ans)
+
+
+def copyfile(src, dest):
+    shutil.copyfile(src, dest)
+    try:
+        shutil.copystat(src, dest)
+    except Exception:
+        pass
+
+
+def get_hardlink_function(src, dest):
+    if iswindows:
+        import win32file, win32api
+        colon = b':' if isinstance(dest, bytes) else ':'
+        root = dest[0] + colon
+        try:
+            is_suitable = win32file.GetDriveType(root) not in (win32file.DRIVE_REMOTE, win32file.DRIVE_CDROM)
+            # See https://msdn.microsoft.com/en-us/library/windows/desktop/aa364993(v=vs.85).aspx
+            supports_hard_links = win32api.GetVolumeInformation(root + os.sep)[3] & 0x00400000
+        except Exception:
+            supports_hard_links = is_suitable = False
+        hardlink = windows_fast_hardlink if is_suitable and supports_hard_links and src[0].lower() == dest[0].lower() else None
+    else:
+        hardlink = os.link
+    return hardlink
+
+
+def copyfile_using_links(path, dest, dest_is_dir=True, filecopyfunc=copyfile):
+    path, dest = os.path.abspath(path), os.path.abspath(dest)
+    if dest_is_dir:
+        dest = os.path.join(dest, os.path.basename(path))
+    hardlink = get_hardlink_function(path, dest)
+    try:
+        hardlink(path, dest)
+    except Exception:
+        filecopyfunc(path, dest)
+
+
+def copytree_using_links(path, dest, dest_is_parent=True, filecopyfunc=copyfile):
+    path, dest = os.path.abspath(path), os.path.abspath(dest)
+    if dest_is_parent:
+        dest = os.path.join(dest, os.path.basename(path))
+    hardlink = get_hardlink_function(path, dest)
+    try:
+        os.makedirs(dest)
+    except EnvironmentError as e:
+        if e.errno != errno.EEXIST:
+            raise
+    for dirpath, dirnames, filenames in os.walk(path):
+        base = os.path.relpath(dirpath, path)
+        dest_base = os.path.join(dest, base)
+        for dname in dirnames:
+            try:
+                os.mkdir(os.path.join(dest_base, dname))
+            except EnvironmentError as e:
+                if e.errno != errno.EEXIST:
+                    raise
+        for fname in filenames:
+            src, df = os.path.join(dirpath, fname), os.path.join(dest_base, fname)
+            try:
+                hardlink(src, df)
+            except Exception:
+                filecopyfunc(src, df)
+
+
+if not ispy3 and not iswindows:
+    # On POSIX in python2 if you pass a unicode path to rmtree
+    # it tries to decode all filenames it encounters while walking
+    # the tree which leads to unicode errors on Linux where there
+    # can be non-decodeable filenames.
+    def rmtree(x, **kw):
+        if not isinstance(x, bytes):
+            x = x.encode('utf-8')
+        return shutil.rmtree(x, **kw)
+else:
+    rmtree = shutil.rmtree
--- a/ebook_converter/utils/fonts/init.py
+++ b/ebook_converter/utils/fonts/init.py
@@ -0,0 +1,7 @@
+#!/usr/bin/env python2
+# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
+from __future__ import absolute_import, division, print_function, unicode_literals
+
+__license__   = 'GPL v3'
+__copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>'
+__docformat__ = 'restructuredtext en'
--- a/ebook_converter/utils/fonts/metadata.py
+++ b/ebook_converter/utils/fonts/metadata.py
@@ -0,0 +1,122 @@
+#!/usr/bin/env python2
+# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:fdm=marker:ai
+from __future__ import absolute_import, division, print_function, unicode_literals
+
+__license__   = 'GPL v3'
+__copyright__ = '2012, Kovid Goyal <kovid at kovidgoyal.net>'
+__docformat__ = 'restructuredtext en'
+
+from io import BytesIO
+from struct import calcsize, unpack, unpack_from
+from collections import namedtuple
+
+from calibre.utils.fonts.utils import get_font_names2, get_font_characteristics
+from polyglot.builtins import range, unicode_type
+
+
+class UnsupportedFont(ValueError):
+    pass
+
+
+FontCharacteristics = namedtuple('FontCharacteristics',
+    'weight, is_italic, is_bold, is_regular, fs_type, panose, width, is_oblique, is_wws, os2_version')
+FontNames = namedtuple('FontNames',
+    'family_name, subfamily_name, full_name, preferred_family_name, preferred_subfamily_name, wws_family_name, wws_subfamily_name')
+
+
+class FontMetadata(object):
+
+    def __init__(self, bytes_or_stream):
+        if not hasattr(bytes_or_stream, 'read'):
+            bytes_or_stream = BytesIO(bytes_or_stream)
+        f = bytes_or_stream
+        f.seek(0)
+        header = f.read(4)
+        if header not in {b'\x00\x01\x00\x00', b'OTTO'}:
+            raise UnsupportedFont('Not a supported sfnt variant')
+
+        self.is_otf = header == b'OTTO'
+        self.read_table_metadata(f)
+        self.read_names(f)
+        self.read_characteristics(f)
+
+        f.seek(0)
+        self.font_family = self.names.family_name
+        wt = self.characteristics.weight
+        if wt == 400:
+            wt = 'normal'
+        elif wt == 700:
+            wt = 'bold'
+        else:
+            wt = unicode_type(wt)
+        self.font_weight = wt
+
+        self.font_stretch = ('ultra-condensed', 'extra-condensed',
+                'condensed', 'semi-condensed', 'normal', 'semi-expanded',
+                'expanded', 'extra-expanded', 'ultra-expanded')[
+                        self.characteristics.width-1]
+        if self.characteristics.is_oblique:
+            self.font_style = 'oblique'
+        elif self.characteristics.is_italic:
+            self.font_style = 'italic'
+        else:
+            self.font_style = 'normal'
+
+    def read_table_metadata(self, f):
+        f.seek(4)
+        num_tables = unpack(b'>H', f.read(2))[0]
+        # Start of table record entries
+        f.seek(4 + 4*2)
+        table_record = b'>4s3L'
+        sz = calcsize(table_record)
+        self.tables = {}
+        block = f.read(sz * num_tables)
+        for i in range(num_tables):
+            table_tag, table_checksum, table_offset, table_length = \
+                    unpack_from(table_record, block, i*sz)
+            self.tables[table_tag.lower()] = (table_offset, table_length,
+                    table_checksum)
+
+    def read_names(self, f):
+        if b'name' not in self.tables:
+            raise UnsupportedFont('This font has no name table')
+        toff, tlen = self.tables[b'name'][:2]
+        f.seek(toff)
+        table = f.read(tlen)
+        if len(table) != tlen:
+            raise UnsupportedFont('This font has a name table of incorrect length')
+        vals = get_font_names2(table, raw_is_table=True)
+        self.names = FontNames(*vals)
+
+    def read_characteristics(self, f):
+        if b'os/2' not in self.tables:
+            raise UnsupportedFont('This font has no OS/2 table')
+        toff, tlen = self.tables[b'os/2'][:2]
+        f.seek(toff)
+        table = f.read(tlen)
+        if len(table) != tlen:
+            raise UnsupportedFont('This font has an OS/2 table of incorrect length')
+        vals = get_font_characteristics(table, raw_is_table=True)
+        self.characteristics = FontCharacteristics(*vals)
+
+    def to_dict(self):
+        ans = {
+                'is_otf':self.is_otf,
+                'font-family':self.font_family,
+                'font-weight':self.font_weight,
+                'font-style':self.font_style,
+                'font-stretch':self.font_stretch
+        }
+        for f in self.names._fields:
+            ans[f] = getattr(self.names, f)
+        for f in self.characteristics._fields:
+            ans[f] = getattr(self.characteristics, f)
+        return ans
+
+
+if __name__ == '__main__':
+    import sys
+    with open(sys.argv[-1], 'rb') as f:
+        fm = FontMetadata(f)
+        import pprint
+        pprint.pprint(fm.to_dict())
--- a/ebook_converter/utils/fonts/scanner.py
+++ b/ebook_converter/utils/fonts/scanner.py
@@ -0,0 +1,412 @@
+#!/usr/bin/env python2
+# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:fdm=marker:ai
+from __future__ import absolute_import, division, print_function, unicode_literals
+
+__license__   = 'GPL v3'
+__copyright__ = '2012, Kovid Goyal <kovid at kovidgoyal.net>'
+__docformat__ = 'restructuredtext en'
+
+import os
+from collections import defaultdict
+from threading import Thread
+
+from calibre import walk, prints, as_unicode
+from calibre.constants import (config_dir, iswindows, isosx, plugins, DEBUG,
+        isworker, filesystem_encoding)
+from calibre.utils.fonts.metadata import FontMetadata, UnsupportedFont
+from calibre.utils.icu import sort_key
+from polyglot.builtins import itervalues, unicode_type, filter
+
+
+class NoFonts(ValueError):
+    pass
+
+# Font dirs {{{
+
+
+def default_font_dirs():
+    return [
+        '/opt/share/fonts',
+        '/usr/share/fonts',
+        '/usr/local/share/fonts',
+        os.path.expanduser('~/.local/share/fonts'),
+        os.path.expanduser('~/.fonts')
+    ]
+
+
+def fc_list():
+    import ctypes
+    from ctypes.util import find_library
+
+    lib = find_library('fontconfig')
+    if lib is None:
+        return default_font_dirs()
+    try:
+        lib = ctypes.CDLL(lib)
+    except:
+        return default_font_dirs()
+
+    prototype = ctypes.CFUNCTYPE(ctypes.c_void_p, ctypes.c_void_p)
+    try:
+        get_font_dirs = prototype(('FcConfigGetFontDirs', lib))
+    except (AttributeError):
+        return default_font_dirs()
+    prototype = ctypes.CFUNCTYPE(ctypes.c_char_p, ctypes.c_void_p)
+    try:
+        next_dir = prototype(('FcStrListNext', lib))
+    except (AttributeError):
+        return default_font_dirs()
+
+    prototype = ctypes.CFUNCTYPE(None, ctypes.c_void_p)
+    try:
+        end = prototype(('FcStrListDone', lib))
+    except (AttributeError):
+        return default_font_dirs()
+
+    str_list = get_font_dirs(ctypes.c_void_p())
+    if not str_list:
+        return default_font_dirs()
+
+    ans = []
+    while True:
+        d = next_dir(str_list)
+        if not d:
+            break
+        if d:
+            try:
+                ans.append(d.decode(filesystem_encoding))
+            except ValueError:
+                prints('Ignoring undecodeable font path: %r' % d)
+                continue
+    end(str_list)
+    if len(ans) < 3:
+        return default_font_dirs()
+
+    parents, visited = [], set()
+    for f in ans:
+        path = os.path.normpath(os.path.abspath(os.path.realpath(f)))
+        if path == '/':
+            continue
+        head, tail = os.path.split(path)
+        while head and tail:
+            if head in visited:
+                break
+            head, tail = os.path.split(head)
+        else:
+            parents.append(path)
+            visited.add(path)
+    return parents
+
+
+def font_dirs():
+    if iswindows:
+        winutil, err = plugins['winutil']
+        if err:
+            raise RuntimeError('Failed to load winutil: %s'%err)
+        try:
+            return [winutil.special_folder_path(winutil.CSIDL_FONTS)]
+        except ValueError:
+            return [r'C:\Windows\Fonts']
+    if isosx:
+        return [
+                '/Library/Fonts',
+                '/System/Library/Fonts',
+                '/usr/share/fonts',
+                '/var/root/Library/Fonts',
+                os.path.expanduser('~/.fonts'),
+                os.path.expanduser('~/Library/Fonts'),
+                ]
+    return fc_list()
+# }}}
+
+# Build font family maps {{{
+
+
+def font_priority(font):
+    '''
+    Try to ensure that  the "Regular" face is the first font for a given
+    family.
+    '''
+    style_normal = font['font-style'] == 'normal'
+    width_normal = font['font-stretch'] == 'normal'
+    weight_normal = font['font-weight'] == 'normal'
+    num_normal = sum(filter(None, (style_normal, width_normal,
+        weight_normal)))
+    subfamily_name = (font['wws_subfamily_name'] or
+            font['preferred_subfamily_name'] or font['subfamily_name'])
+    if num_normal == 3 and subfamily_name == 'Regular':
+        return 0
+    if num_normal == 3:
+        return 1
+    if subfamily_name == 'Regular':
+        return 2
+    return 3 + (3 - num_normal)
+
+
+def path_significance(path, folders):
+    path = os.path.normcase(os.path.abspath(path))
+    for i, q in enumerate(folders):
+        if path.startswith(q):
+            return i
+    return -1
+
+
+def build_families(cached_fonts, folders, family_attr='font-family'):
+    families = defaultdict(list)
+    for f in itervalues(cached_fonts):
+        if not f:
+            continue
+        lf = icu_lower(f.get(family_attr) or '')
+        if lf:
+            families[lf].append(f)
+
+    for fonts in itervalues(families):
+        # Look for duplicate font files and choose the copy that is from a
+        # more significant font directory (prefer user directories over
+        # system directories).
+        fmap = {}
+        remove = []
+        for f in fonts:
+            fingerprint = (icu_lower(f['font-family']), f['font-weight'],
+                    f['font-stretch'], f['font-style'])
+            if fingerprint in fmap:
+                opath = fmap[fingerprint]['path']
+                npath = f['path']
+                if path_significance(npath, folders) >= path_significance(opath, folders):
+                    remove.append(fmap[fingerprint])
+                    fmap[fingerprint] = f
+                else:
+                    remove.append(f)
+            else:
+                fmap[fingerprint] = f
+        for font in remove:
+            fonts.remove(font)
+        fonts.sort(key=font_priority)
+
+    font_family_map = dict.copy(families)
+    font_families = tuple(sorted((f[0]['font-family'] for f in
+            itervalues(font_family_map)), key=sort_key))
+    return font_family_map, font_families
+# }}}
+
+
+class FontScanner(Thread):
+
+    CACHE_VERSION = 2
+
+    def __init__(self, folders=[], allowed_extensions={'ttf', 'otf'}):
+        Thread.__init__(self)
+        self.folders = folders + font_dirs() + [os.path.join(config_dir, 'fonts'),
+                P('fonts/liberation')]
+        self.folders = [os.path.normcase(os.path.abspath(f)) for f in
+                self.folders]
+        self.font_families = ()
+        self.allowed_extensions = allowed_extensions
+
+    # API {{{
+    def find_font_families(self):
+        self.join()
+        return self.font_families
+
+    def fonts_for_family(self, family):
+        '''
+        Return a list of the faces belonging to the specified family. The first
+        face is the "Regular" face of family. Each face is a dictionary with
+        many keys, the most important of which are: path, font-family,
+        font-weight, font-style, font-stretch. The font-* properties follow the
+        CSS 3 Fonts specification.
+        '''
+        self.join()
+        try:
+            return self.font_family_map[icu_lower(family)]
+        except KeyError:
+            raise NoFonts('No fonts found for the family: %r'%family)
+
+    def legacy_fonts_for_family(self, family):
+        '''
+        Return a simple set of regular, bold, italic and bold-italic faces for
+        the specified family. Returns a dictionary with each element being a
+        2-tuple of (path to font, full font name) and the keys being: normal,
+        bold, italic, bi.
+        '''
+        ans = {}
+        try:
+            faces = self.fonts_for_family(family)
+        except NoFonts:
+            return ans
+        for i, face in enumerate(faces):
+            if i == 0:
+                key = 'normal'
+            elif face['font-style'] in {'italic', 'oblique'}:
+                key = 'bi' if face['font-weight'] == 'bold' else 'italic'
+            elif face['font-weight'] == 'bold':
+                key = 'bold'
+            else:
+                continue
+            ans[key] = (face['path'], face['full_name'])
+        return ans
+
+    def get_font_data(self, font_or_path):
+        path = font_or_path
+        if isinstance(font_or_path, dict):
+            path = font_or_path['path']
+        with lopen(path, 'rb') as f:
+            return f.read()
+
+    def find_font_for_text(self, text, allowed_families={'serif', 'sans-serif'},
+            preferred_families=('serif', 'sans-serif', 'monospace', 'cursive', 'fantasy')):
+        '''
+        Find a font on the system capable of rendering the given text.
+
+        Returns a font family (as given by fonts_for_family()) that has a
+        "normal" font and that can render the supplied text. If no such font
+        exists, returns None.
+
+        :return: (family name, faces) or None, None
+        '''
+        from calibre.utils.fonts.utils import (supports_text,
+                panose_to_css_generic_family, get_printable_characters)
+        if not isinstance(text, unicode_type):
+            raise TypeError(u'%r is not unicode'%text)
+        text = get_printable_characters(text)
+        found = {}
+
+        def filter_faces(font):
+            try:
+                raw = self.get_font_data(font)
+                return supports_text(raw, text)
+            except:
+                pass
+            return False
+
+        for family in self.find_font_families():
+            faces = list(filter(filter_faces, self.fonts_for_family(family)))
+            if not faces:
+                continue
+            generic_family = panose_to_css_generic_family(faces[0]['panose'])
+            if generic_family in allowed_families or generic_family == preferred_families[0]:
+                return (family, faces)
+            elif generic_family not in found:
+                found[generic_family] = (family, faces)
+
+        for f in preferred_families:
+            if f in found:
+                return found[f]
+        return None, None
+    # }}}
+
+    def reload_cache(self):
+        if not hasattr(self, 'cache'):
+            from calibre.utils.config import JSONConfig
+            self.cache = JSONConfig('fonts/scanner_cache')
+        else:
+            self.cache.refresh()
+        if self.cache.get('version', None) != self.CACHE_VERSION:
+            self.cache.clear()
+        self.cached_fonts = self.cache.get('fonts', {})
+
+    def run(self):
+        self.do_scan()
+
+    def do_scan(self):
+        self.reload_cache()
+
+        if isworker:
+            # Dont scan font files in worker processes, use whatever is
+            # cached. Font files typically dont change frequently enough to
+            # justify a rescan in a worker process.
+            self.build_families()
+            return
+
+        cached_fonts = self.cached_fonts.copy()
+        self.cached_fonts.clear()
+        for folder in self.folders:
+            if not os.path.isdir(folder):
+                continue
+            try:
+                files = tuple(walk(folder))
+            except EnvironmentError as e:
+                if DEBUG:
+                    prints('Failed to walk font folder:', folder,
+                            as_unicode(e))
+                continue
+            for candidate in files:
+                if (candidate.rpartition('.')[-1].lower() not in self.allowed_extensions or not os.path.isfile(candidate)):
+                    continue
+                candidate = os.path.normcase(os.path.abspath(candidate))
+                try:
+                    s = os.stat(candidate)
+                except EnvironmentError:
+                    continue
+                fileid = '{0}||{1}:{2}'.format(candidate, s.st_size, s.st_mtime)
+                if fileid in cached_fonts:
+                    # Use previously cached metadata, since the file size and
+                    # last modified timestamp have not changed.
+                    self.cached_fonts[fileid] = cached_fonts[fileid]
+                    continue
+                try:
+                    self.read_font_metadata(candidate, fileid)
+                except Exception as e:
+                    if DEBUG:
+                        prints('Failed to read metadata from font file:',
+                                candidate, as_unicode(e))
+                    continue
+
+        if frozenset(cached_fonts) != frozenset(self.cached_fonts):
+            # Write out the cache only if some font files have changed
+            self.write_cache()
+
+        self.build_families()
+
+    def build_families(self):
+        self.font_family_map, self.font_families = build_families(self.cached_fonts, self.folders)
+
+    def write_cache(self):
+        with self.cache:
+            self.cache['version'] = self.CACHE_VERSION
+            self.cache['fonts'] = self.cached_fonts
+
+    def force_rescan(self):
+        self.cached_fonts = {}
+        self.write_cache()
+
+    def read_font_metadata(self, path, fileid):
+        with lopen(path, 'rb') as f:
+            try:
+                fm = FontMetadata(f)
+            except UnsupportedFont:
+                self.cached_fonts[fileid] = {}
+            else:
+                data = fm.to_dict()
+                data['path'] = path
+                self.cached_fonts[fileid] = data
+
+    def dump_fonts(self):
+        self.join()
+        for family in self.font_families:
+            prints(family)
+            for font in self.fonts_for_family(family):
+                prints('\t%s: %s'%(font['full_name'], font['path']))
+                prints(end='\t')
+                for key in ('font-stretch', 'font-weight', 'font-style'):
+                    prints('%s: %s'%(key, font[key]), end=' ')
+                prints()
+                prints('\tSub-family:', font['wws_subfamily_name'] or
+                        font['preferred_subfamily_name'] or
+                        font['subfamily_name'])
+                prints()
+            prints()
+
+
+font_scanner = FontScanner()
+font_scanner.start()
+
+
+def force_rescan():
+    font_scanner.join()
+    font_scanner.force_rescan()
+    font_scanner.run()
+
+
+if __name__ == '__main__':
+    font_scanner.dump_fonts()
--- a/ebook_converter/utils/fonts/utils.py
+++ b/ebook_converter/utils/fonts/utils.py
@@ -0,0 +1,503 @@
+#!/usr/bin/env python2
+# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:fdm=marker:ai
+from __future__ import absolute_import, division, print_function, unicode_literals
+
+__license__   = 'GPL v3'
+__copyright__ = '2012, Kovid Goyal <kovid at kovidgoyal.net>'
+__docformat__ = 'restructuredtext en'
+
+import struct
+from io import BytesIO
+from collections import defaultdict
+
+from polyglot.builtins import iteritems, itervalues, unicode_type, range, as_bytes
+
+
+class UnsupportedFont(ValueError):
+    pass
+
+
+def get_printable_characters(text):
+    import unicodedata
+    return u''.join(x for x in unicodedata.normalize('NFC', text)
+            if unicodedata.category(x)[0] not in {'C', 'Z', 'M'})
+
+
+def is_truetype_font(raw):
+    sfnt_version = raw[:4]
+    return (sfnt_version in {b'\x00\x01\x00\x00', b'OTTO'}, sfnt_version)
+
+
+def get_tables(raw):
+    num_tables = struct.unpack_from(b'>H', raw, 4)[0]
+    offset = 4*3  # start of the table record entries
+    for i in range(num_tables):
+        table_tag, table_checksum, table_offset, table_length = struct.unpack_from(
+                    b'>4s3L', raw, offset)
+        yield (table_tag, raw[table_offset:table_offset+table_length], offset,
+                table_offset, table_checksum)
+        offset += 4*4
+
+
+def get_table(raw, name):
+    ''' Get the raw table bytes for the specified table in the font '''
+    name = as_bytes(name.lower())
+    for table_tag, table, table_index, table_offset, table_checksum in get_tables(raw):
+        if table_tag.lower() == name:
+            return table, table_index, table_offset, table_checksum
+    return None, None, None, None
+
+
+def get_font_characteristics(raw, raw_is_table=False, return_all=False):
+    '''
+    Return (weight, is_italic, is_bold, is_regular, fs_type, panose, width,
+    is_oblique, is_wws). These
+    values are taken from the OS/2 table of the font. See
+    http://www.microsoft.com/typography/otspec/os2.htm for details
+    '''
+    if raw_is_table:
+        os2_table = raw
+    else:
+        os2_table = get_table(raw, 'os/2')[0]
+        if os2_table is None:
+            raise UnsupportedFont('Not a supported font, has no OS/2 table')
+
+    common_fields = b'>Hh3H11h'
+    (version, char_width, weight, width, fs_type, subscript_x_size,
+            subscript_y_size, subscript_x_offset, subscript_y_offset,
+            superscript_x_size, superscript_y_size, superscript_x_offset,
+            superscript_y_offset, strikeout_size, strikeout_position,
+            family_class) = struct.unpack_from(common_fields, os2_table)
+    offset = struct.calcsize(common_fields)
+    panose = struct.unpack_from(b'>10B', os2_table, offset)
+    offset += 10
+    (range1, range2, range3, range4) = struct.unpack_from(b'>4L', os2_table, offset)
+    offset += struct.calcsize(b'>4L')
+    vendor_id = os2_table[offset:offset+4]
+    vendor_id
+    offset += 4
+    selection, = struct.unpack_from(b'>H', os2_table, offset)
+
+    is_italic = (selection & (1 << 0)) != 0
+    is_bold = (selection & (1 << 5)) != 0
+    is_regular = (selection & (1 << 6)) != 0
+    is_wws = (selection & (1 << 8)) != 0
+    is_oblique = (selection & (1 << 9)) != 0
+    if return_all:
+        return (version, char_width, weight, width, fs_type, subscript_x_size,
+            subscript_y_size, subscript_x_offset, subscript_y_offset,
+            superscript_x_size, superscript_y_size, superscript_x_offset,
+            superscript_y_offset, strikeout_size, strikeout_position,
+            family_class, panose, selection, is_italic, is_bold, is_regular)
+
+    return weight, is_italic, is_bold, is_regular, fs_type, panose, width, is_oblique, is_wws, version
+
+
+def panose_to_css_generic_family(panose):
+    proportion = panose[3]
+    if proportion == 9:
+        return 'monospace'
+    family_type = panose[0]
+    if family_type == 3:
+        return 'cursive'
+    if family_type == 4:
+        return 'fantasy'
+    serif_style = panose[1]
+    if serif_style in (11, 12, 13):
+        return 'sans-serif'
+    return 'serif'
+
+
+def decode_name_record(recs):
+    '''
+    Get the English names of this font. See
+    http://www.microsoft.com/typography/otspec/name.htm for details.
+    '''
+    if not recs:
+        return None
+    unicode_names = {}
+    windows_names = {}
+    mac_names = {}
+    for platform_id, encoding_id, language_id, src in recs:
+        if language_id > 0x8000:
+            continue
+        if platform_id == 0:
+            if encoding_id < 4:
+                try:
+                    unicode_names[language_id] = src.decode('utf-16-be')
+                except ValueError:
+                    continue
+        elif platform_id == 1:
+            try:
+                mac_names[language_id] = src.decode('utf-8')
+            except ValueError:
+                continue
+        elif platform_id == 2:
+            codec = {0:'ascii', 1:'utf-16-be', 2:'iso-8859-1'}.get(encoding_id,
+                    None)
+            if codec is None:
+                continue
+            try:
+                unicode_names[language_id] = src.decode(codec)
+            except ValueError:
+                continue
+        elif platform_id == 3:
+            codec = {1:16, 10:32}.get(encoding_id, None)
+            if codec is None:
+                continue
+            try:
+                windows_names[language_id] = src.decode('utf-%d-be'%codec)
+            except ValueError:
+                continue
+
+    # First try the windows names
+    # First look for the US English name
+    if 1033 in windows_names:
+        return windows_names[1033]
+    # Look for some other english name variant
+    for lang in (3081, 10249, 4105, 9225, 16393, 6153, 8201, 17417, 5129,
+            13321, 18441, 7177, 11273, 2057, 12297):
+        if lang in windows_names:
+            return windows_names[lang]
+
+    # Look for Mac name
+    if 0 in mac_names:
+        return mac_names[0]
+
+    # Use unicode names
+    for val in itervalues(unicode_names):
+        return val
+
+    return None
+
+
+def _get_font_names(raw, raw_is_table=False):
+    if raw_is_table:
+        table = raw
+    else:
+        table = get_table(raw, 'name')[0]
+        if table is None:
+            raise UnsupportedFont('Not a supported font, has no name table')
+    table_type, count, string_offset = struct.unpack_from(b'>3H', table)
+
+    records = defaultdict(list)
+
+    for i in range(count):
+        try:
+            platform_id, encoding_id, language_id, name_id, length, offset = \
+                    struct.unpack_from(b'>6H', table, 6+i*12)
+        except struct.error:
+            break
+        offset += string_offset
+        src = table[offset:offset+length]
+        records[name_id].append((platform_id, encoding_id, language_id,
+            src))
+
+    return records
+
+
+def get_font_names(raw, raw_is_table=False):
+    records = _get_font_names(raw, raw_is_table)
+    family_name = decode_name_record(records[1])
+    subfamily_name = decode_name_record(records[2])
+    full_name = decode_name_record(records[4])
+
+    return family_name, subfamily_name, full_name
+
+
+def get_font_names2(raw, raw_is_table=False):
+    records = _get_font_names(raw, raw_is_table)
+
+    family_name = decode_name_record(records[1])
+    subfamily_name = decode_name_record(records[2])
+    full_name = decode_name_record(records[4])
+
+    preferred_family_name = decode_name_record(records[16])
+    preferred_subfamily_name = decode_name_record(records[17])
+
+    wws_family_name = decode_name_record(records[21])
+    wws_subfamily_name = decode_name_record(records[22])
+
+    return (family_name, subfamily_name, full_name, preferred_family_name,
+            preferred_subfamily_name, wws_family_name, wws_subfamily_name)
+
+
+def get_all_font_names(raw, raw_is_table=False):
+    records = _get_font_names(raw, raw_is_table)
+    ans = {}
+
+    for name, num in iteritems({'family_name':1, 'subfamily_name':2, 'full_name':4,
+            'preferred_family_name':16, 'preferred_subfamily_name':17,
+            'wws_family_name':21, 'wws_subfamily_name':22}):
+        try:
+            ans[name] = decode_name_record(records[num])
+        except (IndexError, KeyError, ValueError):
+            continue
+        if not ans[name]:
+            del ans[name]
+
+    for platform_id, encoding_id, language_id, src in records[6]:
+        if (platform_id, encoding_id, language_id) == (1, 0, 0):
+            try:
+                ans['postscript_name'] = src.decode('utf-8')
+                break
+            except ValueError:
+                continue
+        elif (platform_id, encoding_id, language_id) == (3, 1, 1033):
+            try:
+                ans['postscript_name'] = src.decode('utf-16-be')
+                break
+            except ValueError:
+                continue
+
+    return ans
+
+
+def checksum_of_block(raw):
+    extra = 4 - len(raw)%4
+    raw += b'\0'*extra
+    num = len(raw)//4
+    return sum(struct.unpack(b'>%dI'%num, raw)) % (1<<32)
+
+
+def verify_checksums(raw):
+    head_table = None
+    for table_tag, table, table_index, table_offset, table_checksum in get_tables(raw):
+        if table_tag.lower() == b'head':
+            version, fontrev, checksum_adj = struct.unpack_from(b'>ffL', table)
+            head_table = table
+            offset = table_offset
+            checksum = table_checksum
+        elif checksum_of_block(table) != table_checksum:
+            raise ValueError('The %r table has an incorrect checksum'%table_tag)
+
+    if head_table is not None:
+        table = head_table
+        table = table[:8] + struct.pack(b'>I', 0) + table[12:]
+        raw = raw[:offset] + table + raw[offset+len(table):]
+        # Check the checksum of the head table
+        if checksum_of_block(table) != checksum:
+            raise ValueError('Checksum of head table not correct')
+        # Check the checksum of the entire font
+        checksum = checksum_of_block(raw)
+        q = (0xB1B0AFBA - checksum) & 0xffffffff
+        if q != checksum_adj:
+            raise ValueError('Checksum of entire font incorrect')
+
+
+def set_checksum_adjustment(f):
+    offset = get_table(f.getvalue(), 'head')[2]
+    offset += 8
+    f.seek(offset)
+    f.write(struct.pack(b'>I', 0))
+    checksum = checksum_of_block(f.getvalue())
+    q = (0xB1B0AFBA - checksum) & 0xffffffff
+    f.seek(offset)
+    f.write(struct.pack(b'>I', q))
+
+
+def set_table_checksum(f, name):
+    table, table_index, table_offset, table_checksum = get_table(f.getvalue(), name)
+    checksum = checksum_of_block(table)
+    if checksum != table_checksum:
+        f.seek(table_index + 4)
+        f.write(struct.pack(b'>I', checksum))
+
+
+def remove_embed_restriction(raw):
+    ok, sig = is_truetype_font(raw)
+    if not ok:
+        raise UnsupportedFont('Not a supported font, sfnt_version: %r'%sig)
+
+    table, table_index, table_offset = get_table(raw, 'os/2')[:3]
+    if table is None:
+        raise UnsupportedFont('Not a supported font, has no OS/2 table')
+
+    fs_type_offset = struct.calcsize(b'>HhHH')
+    fs_type = struct.unpack_from(b'>H', table, fs_type_offset)[0]
+    if fs_type == 0:
+        return raw
+
+    f = BytesIO(raw)
+    f.seek(fs_type_offset + table_offset)
+    f.write(struct.pack(b'>H', 0))
+
+    set_table_checksum(f, 'os/2')
+    set_checksum_adjustment(f)
+    raw = f.getvalue()
+    verify_checksums(raw)
+    return raw
+
+
+def is_font_embeddable(raw):
+    # https://www.microsoft.com/typography/otspec/os2.htm#fst
+    ok, sig = is_truetype_font(raw)
+    if not ok:
+        raise UnsupportedFont('Not a supported font, sfnt_version: %r'%sig)
+
+    table, table_index, table_offset = get_table(raw, 'os/2')[:3]
+    if table is None:
+        raise UnsupportedFont('Not a supported font, has no OS/2 table')
+    fs_type_offset = struct.calcsize(b'>HhHH')
+    fs_type = struct.unpack_from(b'>H', table, fs_type_offset)[0]
+    if fs_type == 0 or fs_type & 0x8:
+        return True, fs_type
+    if fs_type & 1:
+        return False, fs_type
+    if fs_type & 0x200:
+        return False, fs_type
+    return True, fs_type
+
+
+def read_bmp_prefix(table, bmp):
+    length, language, segcount = struct.unpack_from(b'>3H', table, bmp+2)
+    array_len = segcount //2
+    offset = bmp + 7*2
+    array_sz = 2*array_len
+    array = b'>%dH'%array_len
+    end_count = struct.unpack_from(array, table, offset)
+    offset += array_sz + 2
+    start_count = struct.unpack_from(array, table, offset)
+    offset += array_sz
+    id_delta = struct.unpack_from(array.replace(b'H', b'h'), table, offset)
+    offset += array_sz
+    range_offset = struct.unpack_from(array, table, offset)
+    if length + bmp < offset + array_sz:
+        raise ValueError('cmap subtable length is too small')
+    glyph_id_len = (length + bmp - (offset + array_sz))//2
+    glyph_id_map = struct.unpack_from(b'>%dH'%glyph_id_len, table, offset +
+            array_sz)
+    return (start_count, end_count, range_offset, id_delta, glyph_id_len,
+            glyph_id_map, array_len)
+
+
+def get_bmp_glyph_ids(table, bmp, codes):
+    (start_count, end_count, range_offset, id_delta, glyph_id_len,
+     glyph_id_map, array_len) = read_bmp_prefix(table, bmp)
+
+    for code in codes:
+        found = False
+        for i, ec in enumerate(end_count):
+            if ec >= code:
+                sc = start_count[i]
+                if sc <= code:
+                    found = True
+                    ro = range_offset[i]
+                    if ro == 0:
+                        glyph_id = id_delta[i] + code
+                    else:
+                        idx = ro//2 + (code - sc) + i - array_len
+                        glyph_id = glyph_id_map[idx]
+                        if glyph_id != 0:
+                            glyph_id += id_delta[i]
+                    yield glyph_id % 0x10000
+                    break
+        if not found:
+            yield 0
+
+
+def get_glyph_ids(raw, text, raw_is_table=False):
+    if not isinstance(text, unicode_type):
+        raise TypeError('%r is not a unicode object'%text)
+    if raw_is_table:
+        table = raw
+    else:
+        table = get_table(raw, 'cmap')[0]
+        if table is None:
+            raise UnsupportedFont('Not a supported font, has no cmap table')
+    version, num_tables = struct.unpack_from(b'>HH', table)
+    bmp_table = None
+    for i in range(num_tables):
+        platform_id, encoding_id, offset = struct.unpack_from(b'>HHL', table,
+                4 + (i*8))
+        if platform_id == 3 and encoding_id == 1:
+            table_format = struct.unpack_from(b'>H', table, offset)[0]
+            if table_format == 4:
+                bmp_table = offset
+                break
+    if bmp_table is None:
+        raise UnsupportedFont('Not a supported font, has no format 4 cmap table')
+
+    for glyph_id in get_bmp_glyph_ids(table, bmp_table, map(ord, text)):
+        yield glyph_id
+
+
+def supports_text(raw, text, has_only_printable_chars=False):
+    if not isinstance(text, unicode_type):
+        raise TypeError('%r is not a unicode object'%text)
+    if not has_only_printable_chars:
+        text = get_printable_characters(text)
+    try:
+        for glyph_id in get_glyph_ids(raw, text):
+            if glyph_id == 0:
+                return False
+    except:
+        return False
+    return True
+
+
+def get_font_for_text(text, candidate_font_data=None):
+    ok = False
+    if candidate_font_data is not None:
+        ok = supports_text(candidate_font_data, text)
+    if not ok:
+        from calibre.utils.fonts.scanner import font_scanner
+        family, faces = font_scanner.find_font_for_text(text)
+        if faces:
+            with lopen(faces[0]['path'], 'rb') as f:
+                candidate_font_data = f.read()
+    return candidate_font_data
+
+
+def test_glyph_ids():
+    from calibre.utils.fonts.free_type import FreeType
+    data = P('fonts/liberation/LiberationSerif-Regular.ttf', data=True)
+    ft = FreeType()
+    font = ft.load_font(data)
+    text = u'诶йab'
+    ft_glyphs = tuple(font.glyph_ids(text))
+    glyphs = tuple(get_glyph_ids(data, text))
+    if ft_glyphs != glyphs:
+        raise Exception('My code and FreeType differ on the glyph ids')
+
+
+def test_supports_text():
+    data = P('fonts/calibreSymbols.otf', data=True)
+    if not supports_text(data, '.★½'):
+        raise RuntimeError('Incorrectly returning that text is not supported')
+    if supports_text(data, 'abc'):
+        raise RuntimeError('Incorrectly claiming that text is supported')
+
+
+def test_find_font():
+    from calibre.utils.fonts.scanner import font_scanner
+    abcd = '诶比西迪'
+    family = font_scanner.find_font_for_text(abcd)[0]
+    print('Family for Chinese text:', family)
+    family = font_scanner.find_font_for_text(abcd)[0]
+    abcd = 'لوحة المفاتيح العربية'
+    print('Family for Arabic text:', family)
+
+
+def test():
+    test_glyph_ids()
+    test_supports_text()
+    test_find_font()
+
+
+def main():
+    import sys, os
+    for arg in sys.argv[1:]:
+        print(os.path.basename(arg))
+        with open(arg, 'rb') as f:
+            raw = f.read()
+        print(get_font_names(raw))
+        characs = get_font_characteristics(raw)
+        print(characs)
+        print(panose_to_css_generic_family(characs[5]))
+        verify_checksums(raw)
+        remove_embed_restriction(raw)
+
+
+if __name__ == '__main__':
+    main()
--- a/ebook_converter/utils/formatter.py
+++ b/ebook_converter/utils/formatter.py
@@ -0,0 +1,416 @@
+from __future__ import absolute_import, division, print_function, unicode_literals
+'''
+Created on 23 Sep 2010
+
+@author: charles
+'''
+
+__license__   = 'GPL v3'
+__copyright__ = '2010, Kovid Goyal <kovid@kovidgoyal.net>'
+__docformat__ = 'restructuredtext en'
+
+import re, string, traceback, numbers
+
+from calibre import prints
+from calibre.constants import DEBUG
+from calibre.utils.formatter_functions import formatter_functions
+from polyglot.builtins import unicode_type, error_message
+
+
+class _Parser(object):
+    LEX_OP  = 1
+    LEX_ID  = 2
+    LEX_STR = 3
+    LEX_NUM = 4
+    LEX_EOF = 5
+
+    LEX_CONSTANTS = frozenset((LEX_STR, LEX_NUM))
+
+    def __init__(self, val, prog, funcs, parent):
+        self.lex_pos = 0
+        self.prog = prog[0]
+        self.prog_len = len(self.prog)
+        if prog[1] != '':
+            self.error(_('failed to scan program. Invalid input {0}').format(prog[1]))
+        self.parent = parent
+        self.parent_kwargs = parent.kwargs
+        self.parent_book = parent.book
+        self.locals = {'$':val}
+        self.funcs = funcs
+
+    def error(self, message):
+        m = 'Formatter: ' + message + _(' near ')
+        if self.lex_pos > 0:
+            m = '{0} {1}'.format(m, self.prog[self.lex_pos-1][1])
+        elif self.lex_pos < self.prog_len:
+            m = '{0} {1}'.format(m, self.prog[self.lex_pos+1][1])
+        else:
+            m = '{0} {1}'.format(m, _('end of program'))
+        raise ValueError(m)
+
+    def token(self):
+        if self.lex_pos >= self.prog_len:
+            return None
+        token = self.prog[self.lex_pos][1]
+        self.lex_pos += 1
+        return token
+
+    def consume(self):
+        self.lex_pos += 1
+
+    def token_op_is_a_equals(self):
+        if self.lex_pos >= self.prog_len:
+            return False
+        token = self.prog[self.lex_pos]
+        return token[0] == self.LEX_OP and token[1] == '='
+
+    def token_op_is_a_lparen(self):
+        if self.lex_pos >= self.prog_len:
+            return False
+        token = self.prog[self.lex_pos]
+        return token[0] == self.LEX_OP and token[1] == '('
+
+    def token_op_is_a_rparen(self):
+        if self.lex_pos >= self.prog_len:
+            return False
+        token = self.prog[self.lex_pos]
+        return token[0] == self.LEX_OP and token[1] == ')'
+
+    def token_op_is_a_comma(self):
+        if self.lex_pos >= self.prog_len:
+            return False
+        token = self.prog[self.lex_pos]
+        return token[0] == self.LEX_OP and token[1] == ','
+
+    def token_op_is_a_semicolon(self):
+        if self.lex_pos >= self.prog_len:
+            return False
+        token = self.prog[self.lex_pos]
+        return token[0] == self.LEX_OP and token[1] == ';'
+
+    def token_is_id(self):
+        if self.lex_pos >= self.prog_len:
+            return False
+        return self.prog[self.lex_pos][0] == self.LEX_ID
+
+    def token_is_constant(self):
+        if self.lex_pos >= self.prog_len:
+            return False
+        return self.prog[self.lex_pos][0] in self.LEX_CONSTANTS
+
+    def token_is_eof(self):
+        if self.lex_pos >= self.prog_len:
+            return True
+        token = self.prog[self.lex_pos]
+        return token[0] == self.LEX_EOF
+
+    def program(self):
+        val = self.statement()
+        if not self.token_is_eof():
+            self.error(_('syntax error - program ends before EOF'))
+        return val
+
+    def statement(self):
+        while True:
+            val = self.expr()
+            if self.token_is_eof():
+                return val
+            if not self.token_op_is_a_semicolon():
+                return val
+            self.consume()
+            if self.token_is_eof():
+                return val
+
+    def expr(self):
+        if self.token_is_id():
+            # We have an identifier. Determine if it is a function
+            id = self.token()
+            if not self.token_op_is_a_lparen():
+                if self.token_op_is_a_equals():
+                    # classic assignment statement
+                    self.consume()
+                    cls = self.funcs['assign']
+                    return cls.eval_(self.parent, self.parent_kwargs,
+                                    self.parent_book, self.locals, id, self.expr())
+                val = self.locals.get(id, None)
+                if val is None:
+                    self.error(_('Unknown identifier ') + id)
+                return val
+            # We have a function.
+            # Check if it is a known one. We do this here so error reporting is
+            # better, as it can identify the tokens near the problem.
+            id = id.strip()
+            if id not in self.funcs:
+                self.error(_('unknown function {0}').format(id))
+
+            # Eat the paren
+            self.consume()
+            args = list()
+            while not self.token_op_is_a_rparen():
+                if id == 'assign' and len(args) == 0:
+                    # Must handle the lvalue semantics of the assign function.
+                    # The first argument is the name of the destination, not
+                    # the value.
+                    if not self.token_is_id():
+                        self.error('assign requires the first parameter be an id')
+                    args.append(self.token())
+                else:
+                    # evaluate the argument (recursive call)
+                    args.append(self.statement())
+                if not self.token_op_is_a_comma():
+                    break
+                self.consume()
+            if self.token() != ')':
+                self.error(_('missing closing parenthesis'))
+
+            # Evaluate the function
+            cls = self.funcs[id]
+            if cls.arg_count != -1 and len(args) != cls.arg_count:
+                self.error('incorrect number of arguments for function {}'.format(id))
+            return cls.eval_(self.parent, self.parent_kwargs,
+                            self.parent_book, self.locals, *args)
+        elif self.token_is_constant():
+            # String or number
+            return self.token()
+        else:
+            self.error(_('expression is not function or constant'))
+
+
+class TemplateFormatter(string.Formatter):
+    '''
+    Provides a format function that substitutes '' for any missing value
+    '''
+
+    _validation_string = 'This Is Some Text THAT SHOULD be LONG Enough.%^&*'
+
+    # Dict to do recursion detection. It is up to the individual get_value
+    # method to use it. It is cleared when starting to format a template
+    composite_values = {}
+
+    def __init__(self):
+        string.Formatter.__init__(self)
+        self.book = None
+        self.kwargs = None
+        self.strip_results = True
+        self.locals = {}
+        self.funcs = formatter_functions().get_functions()
+
+    def _do_format(self, val, fmt):
+        if not fmt or not val:
+            return val
+        if val == self._validation_string:
+            val = '0'
+        typ = fmt[-1]
+        if typ == 's':
+            pass
+        elif 'bcdoxXn'.find(typ) >= 0:
+            try:
+                val = int(val)
+            except Exception:
+                raise ValueError(
+                    _('format: type {0} requires an integer value, got {1}').format(typ, val))
+        elif 'eEfFgGn%'.find(typ) >= 0:
+            try:
+                val = float(val)
+            except:
+                raise ValueError(
+                    _('format: type {0} requires a decimal (float) value, got {1}').format(typ, val))
+        return unicode_type(('{0:'+fmt+'}').format(val))
+
+    def _explode_format_string(self, fmt):
+        try:
+            matches = self.format_string_re.match(fmt)
+            if matches is None or matches.lastindex != 3:
+                return fmt, '', ''
+            return matches.groups()
+        except:
+            if DEBUG:
+                traceback.print_exc()
+            return fmt, '', ''
+
+    format_string_re = re.compile(r'^(.*)\|([^\|]*)\|(.*)$', re.DOTALL)
+    compress_spaces = re.compile(r'\s+')
+    backslash_comma_to_comma = re.compile(r'\\,')
+
+    arg_parser = re.Scanner([
+                (r',', lambda x,t: ''),
+                (r'.*?((?<!\\),)', lambda x,t: t[:-1]),
+                (r'.*?\)', lambda x,t: t[:-1]),
+        ])
+
+    # ################# 'Functional' template language ######################
+
+    lex_scanner = re.Scanner([
+                (r'[(),=;]', lambda x,t: (1, t)),
+                (r'-?[\d\.]+', lambda x,t: (3, t)),
+                (r'\$', lambda x,t: (2, t)),
+                (r'\w+', lambda x,t: (2, t)),
+                (r'".*?((?<!\\)")', lambda x,t: (3, t[1:-1])),
+                (r'\'.*?((?<!\\)\')', lambda x,t: (3, t[1:-1])),
+                (r'\n#.*?(?:(?=\n)|$)', None),
+                (r'\s',                 None)
+        ], flags=re.DOTALL)
+
+    def _eval_program(self, val, prog, column_name):
+        # keep a cache of the lex'ed program under the theory that re-lexing
+        # is much more expensive than the cache lookup. This is certainly true
+        # for more than a few tokens, but it isn't clear for simple programs.
+        if column_name is not None and self.template_cache is not None:
+            lprog = self.template_cache.get(column_name, None)
+            if not lprog:
+                lprog = self.lex_scanner.scan(prog)
+                self.template_cache[column_name] = lprog
+        else:
+            lprog = self.lex_scanner.scan(prog)
+        parser = _Parser(val, lprog, self.funcs, self)
+        return parser.program()
+
+    # ################# Override parent classes methods #####################
+
+    def get_value(self, key, args, kwargs):
+        raise Exception('get_value must be implemented in the subclass')
+
+    def format_field(self, val, fmt):
+        # ensure we are dealing with a string.
+        if isinstance(val, numbers.Number):
+            if val:
+                val = unicode_type(val)
+            else:
+                val = ''
+        # Handle conditional text
+        fmt, prefix, suffix = self._explode_format_string(fmt)
+
+        # Handle functions
+        # First see if we have a functional-style expression
+        if fmt.startswith('\''):
+            p = 0
+        else:
+            p = fmt.find(':\'')
+            if p >= 0:
+                p += 1
+        if p >= 0 and fmt[-1] == '\'':
+            val = self._eval_program(val, fmt[p+1:-1], None)
+            colon = fmt[0:p].find(':')
+            if colon < 0:
+                dispfmt = ''
+            else:
+                dispfmt = fmt[0:colon]
+        else:
+            # check for old-style function references
+            p = fmt.find('(')
+            dispfmt = fmt
+            if p >= 0 and fmt[-1] == ')':
+                colon = fmt[0:p].find(':')
+                if colon < 0:
+                    dispfmt = ''
+                    colon = 0
+                else:
+                    dispfmt = fmt[0:colon]
+                    colon += 1
+
+                fname = fmt[colon:p].strip()
+                if fname in self.funcs:
+                    func = self.funcs[fname]
+                    if func.arg_count == 2:
+                        # only one arg expected. Don't bother to scan. Avoids need
+                        # for escaping characters
+                        args = [fmt[p+1:-1]]
+                    else:
+                        args = self.arg_parser.scan(fmt[p+1:])[0]
+                        args = [self.backslash_comma_to_comma.sub(',', a) for a in args]
+                    if (func.arg_count == 1 and (len(args) != 1 or args[0])) or \
+                            (func.arg_count > 1 and func.arg_count != len(args)+1):
+                        raise ValueError('Incorrect number of arguments for function '+ fmt[0:p])
+                    if func.arg_count == 1:
+                        val = func.eval_(self, self.kwargs, self.book, self.locals, val)
+                        if self.strip_results:
+                            val = val.strip()
+                    else:
+                        val = func.eval_(self, self.kwargs, self.book, self.locals, val, *args)
+                        if self.strip_results:
+                            val = val.strip()
+                else:
+                    return _('%s: unknown function')%fname
+        if val:
+            val = self._do_format(val, dispfmt)
+        if not val:
+            return ''
+        return prefix + val + suffix
+
+    def evaluate(self, fmt, args, kwargs):
+        if fmt.startswith('program:'):
+            ans = self._eval_program(kwargs.get('$', None), fmt[8:], self.column_name)
+        else:
+            ans = self.vformat(fmt, args, kwargs)
+        if self.strip_results:
+            return self.compress_spaces.sub(' ', ans).strip()
+        return ans
+
+    # ######### a formatter that throws exceptions ############
+
+    def unsafe_format(self, fmt, kwargs, book, strip_results=True):
+        self.strip_results = strip_results
+        self.column_name = self.template_cache = None
+        self.kwargs = kwargs
+        self.book = book
+        self.composite_values = {}
+        self.locals = {}
+        return self.evaluate(fmt, [], kwargs)
+
+    # ######### a formatter guaranteed not to throw an exception ############
+
+    def safe_format(self, fmt, kwargs, error_value, book,
+                    column_name=None, template_cache=None,
+                    strip_results=True, template_functions=None):
+        self.strip_results = strip_results
+        self.column_name = column_name
+        self.template_cache = template_cache
+        self.kwargs = kwargs
+        self.book = book
+        if template_functions:
+            self.funcs = template_functions
+        else:
+            self.funcs = formatter_functions().get_functions()
+        self.composite_values = {}
+        self.locals = {}
+        try:
+            ans = self.evaluate(fmt, [], kwargs)
+        except Exception as e:
+            if DEBUG:  # and getattr(e, 'is_locking_error', False):
+                traceback.print_exc()
+                if column_name:
+                    prints('Error evaluating column named:', column_name)
+            ans = error_value + ' ' + error_message(e)
+        return ans
+
+
+class ValidateFormatter(TemplateFormatter):
+    '''
+    Provides a formatter that substitutes the validation string for every value
+    '''
+
+    def get_value(self, key, args, kwargs):
+        return self._validation_string
+
+    def validate(self, x):
+        from calibre.ebooks.metadata.book.base import Metadata
+        return self.safe_format(x, {}, 'VALIDATE ERROR', Metadata(''))
+
+
+validation_formatter = ValidateFormatter()
+
+
+class EvalFormatter(TemplateFormatter):
+    '''
+    A template formatter that uses a simple dict instead of an mi instance
+    '''
+
+    def get_value(self, key, args, kwargs):
+        if key == '':
+            return ''
+        key = key.lower()
+        return kwargs.get(key, _('No such variable ') + key)
+
+
+# DEPRECATED. This is not thread safe. Do not use.
+eval_formatter = EvalFormatter()
--- a/ebook_converter/utils/formatter_functions.py
+++ b/ebook_converter/utils/formatter_functions.py
--- a/ebook_converter/utils/html2text.py
+++ b/ebook_converter/utils/html2text.py
@@ -0,0 +1,42 @@
+#!/usr/bin/env python2
+# vim:fileencoding=utf-8
+# License: GPLv3 Copyright: 2019, Kovid Goyal <kovid at kovidgoyal.net>
+from __future__ import absolute_import, division, print_function, unicode_literals
+
+
+def html2text(html):
+    from html2text import HTML2Text
+    import re
+    if isinstance(html, bytes):
+        from calibre.ebooks.chardet import xml_to_unicode
+        html = xml_to_unicode(html, strip_encoding_pats=True, resolve_entities=True)[0]
+    # replace <u> tags with <span> as <u> becomes emphasis in html2text
+    html = re.sub(
+            r'<\s*(?P<solidus>/?)\s*[uU]\b(?P<rest>[^>]*)>',
+            r'<\g<solidus>span\g<rest>>', html)
+    h2t = HTML2Text()
+    h2t.default_image_alt = _('Unnamed image')
+    h2t.body_width = 0
+    h2t.single_line_break = True
+    h2t.emphasis_mark = '*'
+    return h2t.handle(html)
+
+
+def find_tests():
+    import unittest
+
+    class TestH2T(unittest.TestCase):
+
+        def test_html2text_behavior(self):
+            for src, expected in {
+                '<u>test</U>': 'test\n',
+                '<i>test</i>': '*test*\n',
+                '<a href="http://else.where/other">other</a>': '[other](http://else.where/other)\n',
+                '<img src="test.jpeg">': '![Unnamed image](test.jpeg)\n',
+                '<a href="#t">test</a> <span id="t">dest</span>': 'test dest\n',
+                '<>a': '<>a\n',
+                '<p>a<p>b': 'a\nb\n',
+            }.items():
+                self.assertEqual(html2text(src), expected)
+
+    return unittest.defaultTestLoader.loadTestsFromTestCase(TestH2T)
--- a/ebook_converter/utils/icu.py
+++ b/ebook_converter/utils/icu.py
@@ -0,0 +1,323 @@
+#!/usr/bin/env python2
+# vim:fileencoding=utf-8
+from __future__ import absolute_import, division, print_function, unicode_literals
+
+__license__   = 'GPL v3'
+__copyright__ = '2010, Kovid Goyal <kovid@kovidgoyal.net>'
+__docformat__ = 'restructuredtext en'
+
+import sys
+from polyglot.builtins import filter
+
+is_narrow_build = sys.maxunicode < 0x10ffff
+
+# Setup code {{{
+import codecs
+
+from calibre.constants import plugins
+from calibre.utils.config_base import tweaks
+from polyglot.builtins import unicode_type, cmp
+
+_locale = _collator = _primary_collator = _sort_collator = _numeric_collator = _case_sensitive_collator = None
+cmp
+
+_none = u''
+_none2 = b''
+_cmap = {}
+
+_icu, err = plugins['icu']
+if _icu is None:
+    raise RuntimeError('Failed to load icu with error: %s' % err)
+del err
+icu_unicode_version = getattr(_icu, 'unicode_version', None)
+_nmodes = {m:getattr(_icu, m) for m in ('NFC', 'NFD', 'NFKC', 'NFKD')}
+
+# Ensure that the python internal filesystem and default encodings are not ASCII
+
+
+def is_ascii(name):
+    try:
+        return codecs.lookup(name).name == b'ascii'
+    except (TypeError, LookupError):
+        return True
+
+
+try:
+    if is_ascii(sys.getdefaultencoding()):
+        _icu.set_default_encoding(b'utf-8')
+except:
+    import traceback
+    traceback.print_exc()
+
+try:
+    if is_ascii(sys.getfilesystemencoding()):
+        _icu.set_filesystem_encoding(b'utf-8')
+except:
+    import traceback
+    traceback.print_exc()
+del is_ascii
+
+
+def collator():
+    global _collator, _locale
+    if _collator is None:
+        if _locale is None:
+            from calibre.utils.localization import get_lang
+            if tweaks['locale_for_sorting']:
+                _locale = tweaks['locale_for_sorting']
+            else:
+                _locale = get_lang()
+        try:
+            _collator = _icu.Collator(_locale)
+        except Exception as e:
+            print('Failed to load collator for locale: %r with error %r, using English' % (_locale, e))
+            _collator = _icu.Collator('en')
+    return _collator
+
+
+def change_locale(locale=None):
+    global _locale, _collator, _primary_collator, _sort_collator, _numeric_collator, _case_sensitive_collator
+    _collator = _primary_collator = _sort_collator = _numeric_collator = _case_sensitive_collator = None
+    _locale = locale
+
+
+def primary_collator():
+    'Ignores case differences and accented characters'
+    global _primary_collator
+    if _primary_collator is None:
+        _primary_collator = collator().clone()
+        _primary_collator.strength = _icu.UCOL_PRIMARY
+    return _primary_collator
+
+
+def sort_collator():
+    'Ignores case differences and recognizes numbers in strings (if the tweak is set)'
+    global _sort_collator
+    if _sort_collator is None:
+        _sort_collator = collator().clone()
+        _sort_collator.strength = _icu.UCOL_SECONDARY
+        _sort_collator.numeric = tweaks['numeric_collation']
+    return _sort_collator
+
+
+def numeric_collator():
+    'Uses natural sorting for numbers inside strings so something2 will sort before something10'
+    global _numeric_collator
+    if _numeric_collator is None:
+        _numeric_collator = collator().clone()
+        _numeric_collator.strength = _icu.UCOL_SECONDARY
+        _numeric_collator.numeric = True
+    return _numeric_collator
+
+
+def case_sensitive_collator():
+    'Always sorts upper case letter before lower case'
+    global _case_sensitive_collator
+    if _case_sensitive_collator is None:
+        _case_sensitive_collator = collator().clone()
+        _case_sensitive_collator.numeric = sort_collator().numeric
+        _case_sensitive_collator.upper_first = True
+    return _case_sensitive_collator
+
+# Templates that will be used to generate various concrete
+# function implementations based on different collators, to allow lazy loading
+# of collators, with maximum runtime performance
+
+
+_sort_key_template = '''
+def {name}(obj):
+    try:
+        try:
+            return {collator}.{func}(obj)
+        except AttributeError:
+            pass
+        return {collator_func}().{func}(obj)
+    except TypeError:
+        if isinstance(obj, bytes):
+            try:
+                obj = obj.decode(sys.getdefaultencoding())
+            except ValueError:
+                return obj
+            return {collator}.{func}(obj)
+    return b''
+'''
+
+_strcmp_template = '''
+def {name}(a, b):
+    try:
+        try:
+            return {collator}.{func}(a, b)
+        except AttributeError:
+            pass
+        return {collator_func}().{func}(a, b)
+    except TypeError:
+        if isinstance(a, bytes):
+            try:
+                a = a.decode(sys.getdefaultencoding())
+            except ValueError:
+                return cmp(a, b)
+        elif a is None:
+            a = u''
+        if isinstance(b, bytes):
+            try:
+                b = b.decode(sys.getdefaultencoding())
+            except ValueError:
+                return cmp(a, b)
+        elif b is None:
+            b = u''
+        return {collator}.{func}(a, b)
+'''
+
+_change_case_template = '''
+def {name}(x):
+    try:
+        try:
+            return _icu.change_case(x, _icu.{which}, _locale)
+        except NotImplementedError:
+            pass
+        collator()  # sets _locale
+        return _icu.change_case(x, _icu.{which}, _locale)
+    except TypeError:
+        if isinstance(x, bytes):
+            try:
+                x = x.decode(sys.getdefaultencoding())
+            except ValueError:
+                return x
+            return _icu.change_case(x, _icu.{which}, _locale)
+        raise
+'''
+
+
+def _make_func(template, name, **kwargs):
+    l = globals()
+    kwargs['name'] = name
+    kwargs['func'] = kwargs.get('func', 'sort_key')
+    exec(template.format(**kwargs), l)
+    return l[name]
+
+
+# }}}
+
+# ################ The string functions ########################################
+sort_key = _make_func(_sort_key_template, 'sort_key', collator='_sort_collator', collator_func='sort_collator')
+
+numeric_sort_key = _make_func(_sort_key_template, 'numeric_sort_key', collator='_numeric_collator', collator_func='numeric_collator')
+
+primary_sort_key = _make_func(_sort_key_template, 'primary_sort_key', collator='_primary_collator', collator_func='primary_collator')
+
+case_sensitive_sort_key = _make_func(_sort_key_template, 'case_sensitive_sort_key',
+                                     collator='_case_sensitive_collator', collator_func='case_sensitive_collator')
+
+collation_order = _make_func(_sort_key_template, 'collation_order', collator='_sort_collator', collator_func='sort_collator', func='collation_order')
+
+strcmp = _make_func(_strcmp_template, 'strcmp', collator='_sort_collator', collator_func='sort_collator', func='strcmp')
+
+case_sensitive_strcmp = _make_func(
+    _strcmp_template, 'case_sensitive_strcmp', collator='_case_sensitive_collator', collator_func='case_sensitive_collator', func='strcmp')
+
+primary_strcmp = _make_func(_strcmp_template, 'primary_strcmp', collator='_primary_collator', collator_func='primary_collator', func='strcmp')
+
+upper = _make_func(_change_case_template, 'upper', which='UPPER_CASE')
+
+lower = _make_func(_change_case_template, 'lower', which='LOWER_CASE')
+
+title_case = _make_func(_change_case_template, 'title_case', which='TITLE_CASE')
+
+
+def capitalize(x):
+    try:
+        return upper(x[0]) + lower(x[1:])
+    except (IndexError, TypeError, AttributeError):
+        return x
+
+
+try:
+    swapcase = _icu.swap_case
+except AttributeError:  # For people running from source
+    swapcase = lambda x:x.swapcase()
+
+find = _make_func(_strcmp_template, 'find', collator='_collator', collator_func='collator', func='find')
+
+primary_find = _make_func(_strcmp_template, 'primary_find', collator='_primary_collator', collator_func='primary_collator', func='find')
+
+contains = _make_func(_strcmp_template, 'contains', collator='_collator', collator_func='collator', func='contains')
+
+primary_contains = _make_func(_strcmp_template, 'primary_contains', collator='_primary_collator', collator_func='primary_collator', func='contains')
+
+startswith = _make_func(_strcmp_template, 'startswith', collator='_collator', collator_func='collator', func='startswith')
+
+primary_startswith = _make_func(_strcmp_template, 'primary_startswith', collator='_primary_collator', collator_func='primary_collator', func='startswith')
+
+safe_chr = _icu.chr
+
+ord_string = _icu.ord_string
+
+
+def character_name(string):
+    try:
+        return _icu.character_name(unicode_type(string)) or None
+    except (TypeError, ValueError, KeyError):
+        pass
+
+
+def character_name_from_code(code):
+    try:
+        return _icu.character_name_from_code(code) or ''
+    except (TypeError, ValueError, KeyError):
+        return ''
+
+
+def normalize(text, mode='NFC'):
+    # This is very slightly slower than using unicodedata.normalize, so stick with
+    # that unless you have very good reasons not too. Also, it's speed
+    # decreases on wide python builds, where conversion to/from ICU's string
+    # representation is slower.
+    return _icu.normalize(_nmodes[mode], unicode_type(text))
+
+
+def contractions(col=None):
+    global _cmap
+    col = col or _collator
+    if col is None:
+        col = collator()
+    ans = _cmap.get(collator, None)
+    if ans is None:
+        ans = col.contractions()
+        ans = frozenset(filter(None, ans))
+        _cmap[col] = ans
+    return ans
+
+
+def partition_by_first_letter(items, reverse=False, key=lambda x:x):
+    # Build a list of 'equal' first letters by noticing changes
+    # in ICU's 'ordinal' for the first letter.
+    from collections import OrderedDict
+    items = sorted(items, key=lambda x:sort_key(key(x)), reverse=reverse)
+    ans = OrderedDict()
+    last_c, last_ordnum = ' ', 0
+    for item in items:
+        c = icu_upper(key(item) or ' ')
+        ordnum, ordlen = collation_order(c)
+        if last_ordnum != ordnum:
+            if not is_narrow_build:
+                ordlen = 1
+            last_c = c[0:ordlen]
+            last_ordnum = ordnum
+        try:
+            ans[last_c].append(item)
+        except KeyError:
+            ans[last_c] = [item]
+    return ans
+
+
+# Return the number of unicode codepoints in a string
+string_length = _icu.string_length if is_narrow_build else len
+
+# Return the number of UTF-16 codepoints in a string
+utf16_length = len if is_narrow_build else _icu.utf16_length
+
+################################################################################
+
+if __name__ == '__main__':
+    from calibre.utils.icu_test import run
+    run(verbosity=4)
--- a/ebook_converter/utils/img.py
+++ b/ebook_converter/utils/img.py
@@ -0,0 +1,690 @@
+#!/usr/bin/env python2
+# vim:fileencoding=utf-8
+# License: GPLv3 Copyright: 2015-2019, Kovid Goyal <kovid at kovidgoyal.net>
+from __future__ import absolute_import, division, print_function, unicode_literals
+
+import errno
+import os
+import shutil
+import subprocess
+import sys
+import tempfile
+from io import BytesIO
+from threading import Thread
+
+# We use explicit module imports so tracebacks when importing are more useful
+from PyQt5.QtCore import QBuffer, QByteArray, Qt
+from PyQt5.QtGui import QColor, QImage, QImageReader, QImageWriter, QPixmap, QTransform
+
+from calibre import fit_image, force_unicode
+from calibre.constants import iswindows, plugins, ispy3
+from calibre.ptempfile import TemporaryDirectory
+from calibre.utils.config_base import tweaks
+from calibre.utils.filenames import atomic_rename
+from calibre.utils.imghdr import what
+from polyglot.builtins import string_or_bytes, unicode_type
+
+# Utilities {{{
+imageops, imageops_err = plugins['imageops']
+if imageops is None:
+    raise RuntimeError(imageops_err)
+
+
+class NotImage(ValueError):
+    pass
+
+
+def normalize_format_name(fmt):
+    fmt = fmt.lower()
+    if fmt == 'jpg':
+        fmt = 'jpeg'
+    return fmt
+
+
+def get_exe_path(name):
+    from calibre.ebooks.pdf.pdftohtml import PDFTOHTML
+    base = os.path.dirname(PDFTOHTML)
+    if iswindows:
+        name += '-calibre.exe'
+    if not base:
+        return name
+    return os.path.join(base, name)
+
+
+def load_jxr_data(data):
+    with TemporaryDirectory() as tdir:
+        if iswindows and isinstance(tdir, unicode_type):
+            tdir = tdir.encode('mbcs')
+        with lopen(os.path.join(tdir, 'input.jxr'), 'wb') as f:
+            f.write(data)
+        cmd = [get_exe_path('JxrDecApp'), '-i', 'input.jxr', '-o', 'output.tif']
+        creationflags = 0x08 if iswindows else 0
+        subprocess.Popen(cmd, cwd=tdir, stdout=lopen(os.devnull, 'wb'), stderr=subprocess.STDOUT, creationflags=creationflags).wait()
+        i = QImage()
+        if not i.load(os.path.join(tdir, 'output.tif')):
+            raise NotImage('Failed to convert JPEG-XR image')
+        return i
+
+# }}}
+
+# png <-> gif {{{
+
+
+def png_data_to_gif_data(data):
+    from PIL import Image
+    img = Image.open(BytesIO(data))
+    buf = BytesIO()
+    if img.mode in ('p', 'P'):
+        transparency = img.info.get('transparency')
+        if transparency is not None:
+            img.save(buf, 'gif', transparency=transparency)
+        else:
+            img.save(buf, 'gif')
+    elif img.mode in ('rgba', 'RGBA'):
+        alpha = img.split()[3]
+        mask = Image.eval(alpha, lambda a: 255 if a <=128 else 0)
+        img = img.convert('RGB').convert('P', palette=Image.ADAPTIVE, colors=255)
+        img.paste(255, mask)
+        img.save(buf, 'gif', transparency=255)
+    else:
+        img = img.convert('P', palette=Image.ADAPTIVE)
+        img.save(buf, 'gif')
+    return buf.getvalue()
+
+
+class AnimatedGIF(ValueError):
+    pass
+
+
+def gif_data_to_png_data(data, discard_animation=False):
+    from PIL import Image
+    img = Image.open(BytesIO(data))
+    if img.is_animated and not discard_animation:
+        raise AnimatedGIF()
+    buf = BytesIO()
+    img.save(buf, 'png')
+    return buf.getvalue()
+
+# }}}
+
+# Loading images {{{
+
+
+def null_image():
+    ' Create an invalid image. For internal use. '
+    return QImage()
+
+
+def image_from_data(data):
+    ' Create an image object from data, which should be a bytestring. '
+    if isinstance(data, QImage):
+        return data
+    i = QImage()
+    if not i.loadFromData(data):
+        q = what(None, data)
+        if q == 'jxr':
+            return load_jxr_data(data)
+        raise NotImage('Not a valid image (detected type: {})'.format(q))
+    return i
+
+
+def image_from_path(path):
+    ' Load an image from the specified path. '
+    with lopen(path, 'rb') as f:
+        return image_from_data(f.read())
+
+
+def image_from_x(x):
+    ' Create an image from a bytestring or a path or a file like object. '
+    if isinstance(x, unicode_type):
+        return image_from_path(x)
+    if hasattr(x, 'read'):
+        return image_from_data(x.read())
+    if isinstance(x, (bytes, QImage)):
+        return image_from_data(x)
+    if isinstance(x, bytearray):
+        return image_from_data(bytes(x))
+    if isinstance(x, QPixmap):
+        return x.toImage()
+    raise TypeError('Unknown image src type: %s' % type(x))
+
+
+def image_and_format_from_data(data):
+    ' Create an image object from the specified data which should be a bytestring and also return the format of the image '
+    ba = QByteArray(data)
+    buf = QBuffer(ba)
+    buf.open(QBuffer.ReadOnly)
+    r = QImageReader(buf)
+    fmt = bytes(r.format()).decode('utf-8')
+    return r.read(), fmt
+# }}}
+
+# Saving images {{{
+
+
+def image_to_data(img, compression_quality=95, fmt='JPEG', png_compression_level=9, jpeg_optimized=True, jpeg_progressive=False):
+    '''
+    Serialize image to bytestring in the specified format.
+
+    :param compression_quality: is for JPEG and goes from 0 to 100. 100 being lowest compression, highest image quality
+    :param png_compression_level: is for PNG and goes from 0-9. 9 being highest compression.
+    :param jpeg_optimized: Turns on the 'optimize' option for libjpeg which losslessly reduce file size
+    :param jpeg_progressive: Turns on the 'progressive scan' option for libjpeg which allows JPEG images to be downloaded in streaming fashion
+    '''
+    fmt = fmt.upper()
+    ba = QByteArray()
+    buf = QBuffer(ba)
+    buf.open(QBuffer.WriteOnly)
+    if fmt == 'GIF':
+        w = QImageWriter(buf, b'PNG')
+        w.setQuality(90)
+        if not w.write(img):
+            raise ValueError('Failed to export image as ' + fmt + ' with error: ' + w.errorString())
+        return png_data_to_gif_data(ba.data())
+    is_jpeg = fmt in ('JPG', 'JPEG')
+    w = QImageWriter(buf, fmt.encode('ascii'))
+    if is_jpeg:
+        if img.hasAlphaChannel():
+            img = blend_image(img)
+        # QImageWriter only gained the following options in Qt 5.5
+        if jpeg_optimized:
+            w.setOptimizedWrite(True)
+        if jpeg_progressive:
+            w.setProgressiveScanWrite(True)
+        w.setQuality(compression_quality)
+    elif fmt == 'PNG':
+        cl = min(9, max(0, png_compression_level))
+        w.setQuality(10 * (9-cl))
+    if not w.write(img):
+        raise ValueError('Failed to export image as ' + fmt + ' with error: ' + w.errorString())
+    return ba.data()
+
+
+def save_image(img, path, **kw):
+    ''' Save image to the specified path. Image format is taken from the file
+    extension. You can pass the same keyword arguments as for the
+    `image_to_data()` function. '''
+    fmt = path.rpartition('.')[-1]
+    kw['fmt'] = kw.get('fmt', fmt)
+    with lopen(path, 'wb') as f:
+        f.write(image_to_data(image_from_data(img), **kw))
+
+
+def save_cover_data_to(
+    data, path=None,
+    bgcolor='#ffffff',
+    resize_to=None,
+    compression_quality=90,
+    minify_to=None,
+    grayscale=False,
+    eink=False, letterbox=False,
+    data_fmt='jpeg'
+):
+    '''
+    Saves image in data to path, in the format specified by the path
+    extension. Removes any transparency. If there is no transparency and no
+    resize and the input and output image formats are the same, no changes are
+    made.
+
+    :param data: Image data as bytestring
+    :param path: If None img data is returned, in JPEG format
+    :param data_fmt: The fmt to return data in when path is None. Defaults to JPEG
+    :param compression_quality: The quality of the image after compression.
+        Number between 1 and 100. 1 means highest compression, 100 means no
+        compression (lossless). When generating PNG this number is divided by 10
+        for the png_compression_level.
+    :param bgcolor: The color for transparent pixels. Must be specified in hex.
+    :param resize_to: A tuple (width, height) or None for no resizing
+    :param minify_to: A tuple (width, height) to specify maximum target size.
+        The image will be resized to fit into this target size. If None the
+        value from the tweak is used.
+    :param grayscale: If True, the image is converted to grayscale,
+        if that's not already the case.
+    :param eink: If True, the image is dithered down to the 16 specific shades
+        of gray of the eInk palette.
+        Works best with formats that actually support color indexing (i.e., PNG)
+    :param letterbox: If True, in addition to fit resize_to inside minify_to,
+        the image will be letterboxed (i.e., centered on a black background).
+    '''
+    fmt = normalize_format_name(data_fmt if path is None else os.path.splitext(path)[1][1:])
+    if isinstance(data, QImage):
+        img = data
+        changed = True
+    else:
+        img, orig_fmt = image_and_format_from_data(data)
+        orig_fmt = normalize_format_name(orig_fmt)
+        changed = fmt != orig_fmt
+    if resize_to is not None:
+        changed = True
+        img = img.scaled(resize_to[0], resize_to[1], Qt.IgnoreAspectRatio, Qt.SmoothTransformation)
+    owidth, oheight = img.width(), img.height()
+    nwidth, nheight = tweaks['maximum_cover_size'] if minify_to is None else minify_to
+    if letterbox:
+        img = blend_on_canvas(img, nwidth, nheight, bgcolor='#000000')
+        # Check if we were minified
+        if oheight != nheight or owidth != nwidth:
+            changed = True
+    else:
+        scaled, nwidth, nheight = fit_image(owidth, oheight, nwidth, nheight)
+        if scaled:
+            changed = True
+            img = img.scaled(nwidth, nheight, Qt.IgnoreAspectRatio, Qt.SmoothTransformation)
+    if img.hasAlphaChannel():
+        changed = True
+        img = blend_image(img, bgcolor)
+    if grayscale and not eink:
+        if not img.allGray():
+            changed = True
+            img = grayscale_image(img)
+    if eink:
+        # NOTE: Keep in mind that JPG does NOT actually support indexed colors, so the JPG algorithm will then smush everything back into a 256c mess...
+        #       Thankfully, Nickel handles PNG just fine, and we potentially generate smaller files to boot, because they can be properly color indexed ;).
+        img = eink_dither_image(img)
+        changed = True
+    if path is None:
+        return image_to_data(img, compression_quality, fmt, compression_quality // 10) if changed else data
+    with lopen(path, 'wb') as f:
+        f.write(image_to_data(img, compression_quality, fmt, compression_quality // 10) if changed else data)
+# }}}
+
+# Overlaying images {{{
+
+
+def blend_on_canvas(img, width, height, bgcolor='#ffffff'):
+    ' Blend the `img` onto a canvas with the specified background color and size '
+    w, h = img.width(), img.height()
+    scaled, nw, nh = fit_image(w, h, width, height)
+    if scaled:
+        img = img.scaled(nw, nh, Qt.IgnoreAspectRatio, Qt.SmoothTransformation)
+        w, h = nw, nh
+    canvas = QImage(width, height, QImage.Format_RGB32)
+    canvas.fill(QColor(bgcolor))
+    overlay_image(img, canvas, (width - w)//2, (height - h)//2)
+    return canvas
+
+
+class Canvas(object):
+
+    def __init__(self, width, height, bgcolor='#ffffff'):
+        self.img = QImage(width, height, QImage.Format_RGB32)
+        self.img.fill(QColor(bgcolor))
+
+    def __enter__(self):
+        return self
+
+    def __exit__(self, *args):
+        pass
+
+    def compose(self, img, x=0, y=0):
+        img = image_from_data(img)
+        overlay_image(img, self.img, x, y)
+
+    def export(self, fmt='JPEG', compression_quality=95):
+        return image_to_data(self.img, compression_quality=compression_quality, fmt=fmt)
+
+
+def create_canvas(width, height, bgcolor='#ffffff'):
+    'Create a blank canvas of the specified size and color '
+    img = QImage(width, height, QImage.Format_RGB32)
+    img.fill(QColor(bgcolor))
+    return img
+
+
+def overlay_image(img, canvas=None, left=0, top=0):
+    ' Overlay the `img` onto the canvas at the specified position '
+    if canvas is None:
+        canvas = QImage(img.size(), QImage.Format_RGB32)
+        canvas.fill(Qt.white)
+    left, top = int(left), int(top)
+    imageops.overlay(img, canvas, left, top)
+    return canvas
+
+
+def texture_image(canvas, texture):
+    ' Repeatedly tile the image `texture` across and down the image `canvas` '
+    if canvas.hasAlphaChannel():
+        canvas = blend_image(canvas)
+    return imageops.texture_image(canvas, texture)
+
+
+def blend_image(img, bgcolor='#ffffff'):
+    ' Used to convert images that have semi-transparent pixels to opaque by blending with the specified color '
+    canvas = QImage(img.size(), QImage.Format_RGB32)
+    canvas.fill(QColor(bgcolor))
+    overlay_image(img, canvas)
+    return canvas
+# }}}
+
+# Image borders {{{
+
+
+def add_borders_to_image(img, left=0, top=0, right=0, bottom=0, border_color='#ffffff'):
+    img = image_from_data(img)
+    if not (left > 0 or right > 0 or top > 0 or bottom > 0):
+        return img
+    canvas = QImage(img.width() + left + right, img.height() + top + bottom, QImage.Format_RGB32)
+    canvas.fill(QColor(border_color))
+    overlay_image(img, canvas, left, top)
+    return canvas
+
+
+def remove_borders_from_image(img, fuzz=None):
+    ''' Try to auto-detect and remove any borders from the image. Returns
+    the image itself if no borders could be removed. `fuzz` is a measure of
+    what colors are considered identical (must be a number between 0 and 255 in
+    absolute intensity units). Default is from a tweak whose default value is 10. '''
+    fuzz = tweaks['cover_trim_fuzz_value'] if fuzz is None else fuzz
+    img = image_from_data(img)
+    ans = imageops.remove_borders(img, max(0, fuzz))
+    return ans if ans.size() != img.size() else img
+# }}}
+
+# Cropping/scaling of images {{{
+
+
+def resize_image(img, width, height):
+    return img.scaled(int(width), int(height), Qt.IgnoreAspectRatio, Qt.SmoothTransformation)
+
+
+def resize_to_fit(img, width, height):
+    img = image_from_data(img)
+    resize_needed, nw, nh = fit_image(img.width(), img.height(), width, height)
+    if resize_needed:
+        img = resize_image(img, nw, nh)
+    return resize_needed, img
+
+
+def clone_image(img):
+    ''' Returns a shallow copy of the image. However, the underlying data buffer
+    will be automatically copied-on-write '''
+    return QImage(img)
+
+
+def scale_image(data, width=60, height=80, compression_quality=70, as_png=False, preserve_aspect_ratio=True):
+    ''' Scale an image, returning it as either JPEG or PNG data (bytestring).
+    Transparency is alpha blended with white when converting to JPEG. Is thread
+    safe and does not require a QApplication. '''
+    # We use Qt instead of ImageMagick here because ImageMagick seems to use
+    # some kind of memory pool, causing memory consumption to sky rocket.
+    img = image_from_data(data)
+    if preserve_aspect_ratio:
+        scaled, nwidth, nheight = fit_image(img.width(), img.height(), width, height)
+        if scaled:
+            img = img.scaled(nwidth, nheight, Qt.KeepAspectRatio, Qt.SmoothTransformation)
+    else:
+        if img.width() != width or img.height() != height:
+            img = img.scaled(width, height, Qt.IgnoreAspectRatio, Qt.SmoothTransformation)
+    fmt = 'PNG' if as_png else 'JPEG'
+    w, h = img.width(), img.height()
+    return w, h, image_to_data(img, compression_quality=compression_quality, fmt=fmt)
+
+
+def crop_image(img, x, y, width, height):
+    '''
+    Return the specified section of the image.
+
+    :param x, y: The top left corner of the crop box
+    :param width, height: The width and height of the crop box. Note that if
+    the crop box exceeds the source images dimensions, width and height will be
+    auto-truncated.
+    '''
+    img = image_from_data(img)
+    width = min(width, img.width() - x)
+    height = min(height, img.height() - y)
+    return img.copy(x, y, width, height)
+
+# }}}
+
+# Image transformations {{{
+
+
+def grayscale_image(img):
+    return imageops.grayscale(image_from_data(img))
+
+
+def set_image_opacity(img, alpha=0.5):
+    ''' Change the opacity of `img`. Note that the alpha value is multiplied to
+    any existing alpha values, so you cannot use this function to convert a
+    semi-transparent image to an opaque one. For that use `blend_image()`. '''
+    return imageops.set_opacity(image_from_data(img), alpha)
+
+
+def flip_image(img, horizontal=False, vertical=False):
+    return image_from_data(img).mirrored(horizontal, vertical)
+
+
+def image_has_transparent_pixels(img):
+    ' Return True iff the image has at least one semi-transparent pixel '
+    img = image_from_data(img)
+    if img.isNull():
+        return False
+    return imageops.has_transparent_pixels(img)
+
+
+def rotate_image(img, degrees):
+    t = QTransform()
+    t.rotate(degrees)
+    return image_from_data(img).transformed(t)
+
+
+def gaussian_sharpen_image(img, radius=0, sigma=3, high_quality=True):
+    return imageops.gaussian_sharpen(image_from_data(img), max(0, radius), sigma, high_quality)
+
+
+def gaussian_blur_image(img, radius=-1, sigma=3):
+    return imageops.gaussian_blur(image_from_data(img), max(0, radius), sigma)
+
+
+def despeckle_image(img):
+    return imageops.despeckle(image_from_data(img))
+
+
+def oil_paint_image(img, radius=-1, high_quality=True):
+    return imageops.oil_paint(image_from_data(img), radius, high_quality)
+
+
+def normalize_image(img):
+    return imageops.normalize(image_from_data(img))
+
+
+def quantize_image(img, max_colors=256, dither=True, palette=''):
+    ''' Quantize the image to contain a maximum of `max_colors` colors. By
+    default a palette is chosen automatically, if you want to use a fixed
+    palette, then pass in a list of color names in the `palette` variable. If
+    you, specify a palette `max_colors` is ignored. Note that it is possible
+    for the actual number of colors used to be less than max_colors.
+
+    :param max_colors: Max. number of colors in the auto-generated palette. Must be between 2 and 256.
+    :param dither: Whether to use dithering or not. dithering is almost always a good thing.
+    :param palette: Use a manually specified palette instead. For example: palette='red green blue #eee'
+    '''
+    img = image_from_data(img)
+    if img.hasAlphaChannel():
+        img = blend_image(img)
+    if palette and isinstance(palette, string_or_bytes):
+        palette = palette.split()
+    return imageops.quantize(img, max_colors, dither, [QColor(x).rgb() for x in palette])
+
+
+def eink_dither_image(img):
+    ''' Dither the source image down to the eInk palette of 16 shades of grey,
+    using ImageMagick's OrderedDither algorithm.
+
+    NOTE: No need to call grayscale_image first, as this will inline a grayscaling pass if need be.
+
+    Returns a QImage in Grayscale8 pixel format.
+    '''
+    img = image_from_data(img)
+    if img.hasAlphaChannel():
+        img = blend_image(img)
+    return imageops.ordered_dither(img)
+
+# }}}
+
+# Optimization of images {{{
+
+
+def run_optimizer(file_path, cmd, as_filter=False, input_data=None):
+    file_path = os.path.abspath(file_path)
+    cwd = os.path.dirname(file_path)
+    ext = os.path.splitext(file_path)[1]
+    if not ext or len(ext) > 10 or not ext.startswith('.'):
+        ext = '.jpg'
+    fd, outfile = tempfile.mkstemp(dir=cwd, suffix=ext)
+    try:
+        if as_filter:
+            outf = os.fdopen(fd, 'wb')
+        else:
+            os.close(fd)
+        iname, oname = os.path.basename(file_path), os.path.basename(outfile)
+
+        def repl(q, r):
+            cmd[cmd.index(q)] = r
+        if not as_filter:
+            repl(True, iname), repl(False, oname)
+        if iswindows and not ispy3:
+            # subprocess in python 2 cannot handle unicode strings that are not
+            # encodeable in mbcs, so we fail here, where it is more explicit,
+            # instead.
+            cmd = [x.encode('mbcs') if isinstance(x, unicode_type) else x for x in cmd]
+            if isinstance(cwd, unicode_type):
+                cwd = cwd.encode('mbcs')
+        stdin = subprocess.PIPE if as_filter else None
+        stderr = subprocess.PIPE if as_filter else subprocess.STDOUT
+        creationflags = 0x08 if iswindows else 0
+        p = subprocess.Popen(cmd, cwd=cwd, stdout=subprocess.PIPE, stderr=stderr, stdin=stdin, creationflags=creationflags)
+        stderr = p.stderr if as_filter else p.stdout
+        if as_filter:
+            src = input_data or open(file_path, 'rb')
+
+            def copy(src, dest):
+                try:
+                    shutil.copyfileobj(src, dest)
+                finally:
+                    src.close(), dest.close()
+            inw = Thread(name='CopyInput', target=copy, args=(src, p.stdin))
+            inw.daemon = True
+            inw.start()
+            outw = Thread(name='CopyOutput', target=copy, args=(p.stdout, outf))
+            outw.daemon = True
+            outw.start()
+        raw = force_unicode(stderr.read())
+        if p.wait() != 0:
+            return raw
+        else:
+            if as_filter:
+                outw.join(60.0), inw.join(60.0)
+            try:
+                sz = os.path.getsize(outfile)
+            except EnvironmentError:
+                sz = 0
+            if sz < 1:
+                return '%s returned a zero size image' % cmd[0]
+            shutil.copystat(file_path, outfile)
+            atomic_rename(outfile, file_path)
+    finally:
+        try:
+            os.remove(outfile)
+        except EnvironmentError as err:
+            if err.errno != errno.ENOENT:
+                raise
+        try:
+            os.remove(outfile + '.bak')  # optipng creates these files
+        except EnvironmentError as err:
+            if err.errno != errno.ENOENT:
+                raise
+
+
+def optimize_jpeg(file_path):
+    exe = get_exe_path('jpegtran')
+    cmd = [exe] + '-copy none -optimize -progressive -maxmemory 100M -outfile'.split() + [False, True]
+    return run_optimizer(file_path, cmd)
+
+
+def optimize_png(file_path, level=7):
+    ' level goes from 1 to 7 with 7 being maximum compression '
+    exe = get_exe_path('optipng')
+    cmd = [exe] + '-fix -clobber -strip all -o{} -out'.format(level).split() + [False, True]
+    return run_optimizer(file_path, cmd)
+
+
+def encode_jpeg(file_path, quality=80):
+    from calibre.utils.speedups import ReadOnlyFileBuffer
+    quality = max(0, min(100, int(quality)))
+    exe = get_exe_path('cjpeg')
+    cmd = [exe] + '-optimize -progressive -maxmemory 100M -quality'.split() + [unicode_type(quality)]
+    img = QImage()
+    if not img.load(file_path):
+        raise ValueError('%s is not a valid image file' % file_path)
+    ba = QByteArray()
+    buf = QBuffer(ba)
+    buf.open(QBuffer.WriteOnly)
+    if not img.save(buf, 'PPM'):
+        raise ValueError('Failed to export image to PPM')
+    return run_optimizer(file_path, cmd, as_filter=True, input_data=ReadOnlyFileBuffer(ba.data()))
+# }}}
+
+
+def test():  # {{{
+    from calibre.ptempfile import TemporaryDirectory
+    from calibre import CurrentDir
+    from glob import glob
+    img = image_from_data(I('lt.png', data=True, allow_user_override=False))
+    with TemporaryDirectory() as tdir, CurrentDir(tdir):
+        save_image(img, 'test.jpg')
+        ret = optimize_jpeg('test.jpg')
+        if ret is not None:
+            raise SystemExit('optimize_jpeg failed: %s' % ret)
+        ret = encode_jpeg('test.jpg')
+        if ret is not None:
+            raise SystemExit('encode_jpeg failed: %s' % ret)
+        shutil.copyfile(I('lt.png'), 'test.png')
+        ret = optimize_png('test.png')
+        if ret is not None:
+            raise SystemExit('optimize_png failed: %s' % ret)
+        if glob('*.bak'):
+            raise SystemExit('Spurious .bak files left behind')
+    quantize_image(img)
+    oil_paint_image(img)
+    gaussian_sharpen_image(img)
+    gaussian_blur_image(img)
+    despeckle_image(img)
+    remove_borders_from_image(img)
+    image_to_data(img, fmt='GIF')
+    raw = subprocess.Popen([get_exe_path('JxrDecApp'), '-h'], creationflags=0x08 if iswindows else 0, stdout=subprocess.PIPE).stdout.read()
+    if b'JPEG XR Decoder Utility' not in raw:
+        raise SystemExit('Failed to run JxrDecApp')
+# }}}
+
+
+if __name__ == '__main__':  # {{{
+    args = sys.argv[1:]
+    infile = args.pop(0)
+    img = image_from_data(lopen(infile, 'rb').read())
+    func = globals()[args[0]]
+    kw = {}
+    args.pop(0)
+    outf = None
+    while args:
+        k = args.pop(0)
+        if '=' in k:
+            n, v = k.partition('=')[::2]
+            if v in ('True', 'False'):
+                v = True if v == 'True' else False
+            try:
+                v = int(v)
+            except Exception:
+                try:
+                    v = float(v)
+                except Exception:
+                    pass
+            kw[n] = v
+        else:
+            outf = k
+    if outf is None:
+        bn = os.path.basename(infile)
+        outf = bn.rpartition('.')[0] + '.' + '-output' + bn.rpartition('.')[-1]
+    img = func(img, **kw)
+    with lopen(outf, 'wb') as f:
+        f.write(image_to_data(img, fmt=outf.rpartition('.')[-1]))
+# }}}
--- a/ebook_converter/utils/imghdr.py
+++ b/ebook_converter/utils/imghdr.py
@@ -0,0 +1,263 @@
+#!/usr/bin/env python2
+# vim:fileencoding=utf-8
+# License: GPLv3 Copyright: 2016, Kovid Goyal <kovid at kovidgoyal.net>
+from __future__ import absolute_import, division, print_function, unicode_literals
+
+from struct import unpack, error
+import os
+from calibre.utils.speedups import ReadOnlyFileBuffer
+from calibre.constants import ispy3
+from polyglot.builtins import string_or_bytes, unicode_type
+
+""" Recognize image file formats and sizes based on their first few bytes."""
+
+HSIZE = 120
+
+
+def what(file, h=None):
+    ' Recognize image headers '
+    if h is None:
+        if isinstance(file, string_or_bytes):
+            with lopen(file, 'rb') as f:
+                h = f.read(HSIZE)
+        else:
+            location = file.tell()
+            h = file.read(HSIZE)
+            file.seek(location)
+    if isinstance(h, bytes):
+        h = memoryview(h)
+    for tf in tests:
+        res = tf(h)
+        if res:
+            return res
+    # There exist some jpeg files with no headers, only the starting two bits
+    # If we cannot identify as anything else, identify as jpeg.
+    if h[:2] == b'\xff\xd8':
+        return 'jpeg'
+    return None
+
+
+def identify(src):
+    ''' Recognize file format and sizes. Returns format, width, height. width
+    and height will be -1 if not found and fmt will be None if the image is not
+    recognized. '''
+    width = height = -1
+
+    if isinstance(src, unicode_type):
+        stream = lopen(src, 'rb')
+    elif isinstance(src, bytes):
+        stream = ReadOnlyFileBuffer(src)
+    else:
+        stream = src
+
+    pos = stream.tell()
+    head = stream.read(HSIZE)
+    stream.seek(pos)
+    fmt = what(None, head)
+
+    if fmt in {'jpeg', 'gif', 'png', 'jpeg2000'}:
+        size = len(head)
+        if fmt == 'png':
+            # PNG
+            s = head[16:24] if size >= 24 and head[12:16] == b'IHDR' else head[8:16]
+            try:
+                width, height = unpack(b">LL", s)
+            except error:
+                return fmt, width, height
+        elif fmt == 'jpeg':
+            # JPEG
+            pos = stream.tell()
+            try:
+                height, width = jpeg_dimensions(stream)
+            except Exception:
+                return fmt, width, height
+            finally:
+                stream.seek(pos)
+        elif fmt == 'gif':
+            # GIF
+            try:
+                width, height = unpack(b"<HH", head[6:10])
+            except error:
+                return fmt, width, height
+        elif size >= 56 and fmt == 'jpeg2000':
+            # JPEG2000
+            try:
+                height, width = unpack(b'>LL', head[48:56])
+            except error:
+                return fmt, width, height
+    return fmt, width, height
+
+# ---------------------------------#
+# Subroutines per image file type #
+# ---------------------------------#
+
+
+tests = []
+
+
+def test(f):
+    tests.append(f)
+    return f
+
+
+@test
+def jpeg(h):
+    """JPEG data in JFIF format (Changed by Kovid to mimic the file utility,
+    the original code was failing with some jpegs that included ICC_PROFILE
+    data, for example: http://nationalpostnews.files.wordpress.com/2013/03/budget.jpeg?w=300&h=1571)"""
+    if h[6:10] in (b'JFIF', b'Exif'):
+        return 'jpeg'
+    if h[:2] == b'\xff\xd8':
+        q = h[:32].tobytes()
+        if b'JFIF' in q or b'8BIM' in q:
+            return 'jpeg'
+
+
+def jpeg_dimensions(stream):
+    # A JPEG marker is two bytes of the form 0xff x where 0 < x < 0xff
+    # See section B.1.1.2 of https://www.w3.org/Graphics/JPEG/itu-t81.pdf
+    # We read the dimensions from the first SOFn section we come across
+    stream.seek(2, os.SEEK_CUR)
+
+    def read(n):
+        ans = stream.read(n)
+        if len(ans) != n:
+            raise ValueError('Truncated JPEG data')
+        return ans
+
+    if ispy3:
+        def read_byte():
+            return read(1)[0]
+    else:
+        def read_byte():
+            return ord(read(1)[0])
+
+    x = None
+    while True:
+        # Find next marker
+        while x != 0xff:
+            x = read_byte()
+        # Soak up padding
+        marker = 0xff
+        while marker == 0xff:
+            marker = read_byte()
+        q = marker
+        if 0xc0 <= q <= 0xcf and q != 0xc4 and q != 0xcc:
+            # SOFn marker
+            stream.seek(3, os.SEEK_CUR)
+            return unpack(b'>HH', read(4))
+        elif 0xd8 <= q <= 0xda:
+            break  # start of image, end of image, start of scan, no point
+        elif q == 0:
+            return -1, -1  # Corrupted JPEG
+        elif q == 0x01 or 0xd0 <= q <= 0xd7:
+            # Standalone marker
+            continue
+        else:
+            # skip this section
+            size = unpack(b'>H', read(2))[0]
+            stream.seek(size - 2, os.SEEK_CUR)
+        # standalone marker, keep going
+
+    return -1, -1
+
+
+@test
+def png(h):
+    if h[:8] == b"\211PNG\r\n\032\n":
+        return 'png'
+
+
+@test
+def gif(h):
+    """GIF ('87 and '89 variants)"""
+    if h[:6] in (b'GIF87a', b'GIF89a'):
+        return 'gif'
+
+
+@test
+def tiff(h):
+    """TIFF (can be in Motorola or Intel byte order)"""
+    if h[:2] in (b'MM', b'II'):
+        if h[2:4] == b'\xbc\x01':
+            return 'jxr'
+        return 'tiff'
+
+
+@test
+def webp(h):
+    if h[:4] == b'RIFF' and h[8:12] == b'WEBP':
+        return 'webp'
+
+
+@test
+def rgb(h):
+    """SGI image library"""
+    if h[:2] == b'\001\332':
+        return 'rgb'
+
+
+@test
+def pbm(h):
+    """PBM (portable bitmap)"""
+    if len(h) >= 3 and \
+        h[0] == b'P' and h[1] in b'14' and h[2] in b' \t\n\r':
+        return 'pbm'
+
+
+@test
+def pgm(h):
+    """PGM (portable graymap)"""
+    if len(h) >= 3 and \
+        h[0] == b'P' and h[1] in b'25' and h[2] in b' \t\n\r':
+        return 'pgm'
+
+
+@test
+def ppm(h):
+    """PPM (portable pixmap)"""
+    if len(h) >= 3 and \
+        h[0] == b'P' and h[1] in b'36' and h[2] in b' \t\n\r':
+        return 'ppm'
+
+
+@test
+def rast(h):
+    """Sun raster file"""
+    if h[:4] == b'\x59\xA6\x6A\x95':
+        return 'rast'
+
+
+@test
+def xbm(h):
+    """X bitmap (X10 or X11)"""
+    s = b'#define '
+    if h[:len(s)] == s:
+        return 'xbm'
+
+
+@test
+def bmp(h):
+    if h[:2] == b'BM':
+        return 'bmp'
+
+
+@test
+def emf(h):
+    if h[:4] == b'\x01\0\0\0' and h[40:44] == b' EMF':
+        return 'emf'
+
+
+@test
+def jpeg2000(h):
+    if h[:12] == b'\x00\x00\x00\x0cjP  \r\n\x87\n':
+        return 'jpeg2000'
+
+
+@test
+def svg(h):
+    if h[:4] == b'<svg' or (h[:2] == b'<?' and h[2:5].tobytes().lower() == b'xml' and b'<svg' in h.tobytes()):
+        return 'svg'
+
+
+tests = tuple(tests)
--- a/ebook_converter/utils/ipc/init.py
+++ b/ebook_converter/utils/ipc/init.py
@@ -0,0 +1,83 @@
+#!/usr/bin/env python2
+# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
+from __future__ import absolute_import, division, print_function, unicode_literals
+
+__license__   = 'GPL v3'
+__copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>'
+__docformat__ = 'restructuredtext en'
+
+import os, errno, sys
+from threading import Thread
+
+from calibre import force_unicode
+from calibre.constants import iswindows, get_windows_username, islinux, filesystem_encoding, ispy3
+from calibre.utils.filenames import ascii_filename
+from polyglot.functools import lru_cache
+
+VADDRESS = None
+
+
+def eintr_retry_call(func, *args, **kwargs):
+    while True:
+        try:
+            return func(*args, **kwargs)
+        except EnvironmentError as e:
+            if getattr(e, 'errno', None) == errno.EINTR:
+                continue
+            raise
+
+
+@lru_cache()
+def socket_address(which):
+    if iswindows:
+        ans = r'\\.\pipe\Calibre' + which
+        try:
+            user = get_windows_username()
+        except Exception:
+            user = None
+        if user:
+            user = ascii_filename(user).replace(' ', '_')
+            if user:
+                ans += '-' + user[:100] + 'x'
+    else:
+        user = force_unicode(os.environ.get('USER') or os.path.basename(os.path.expanduser('~')), filesystem_encoding)
+        sock_name = '{}-calibre-{}.socket'.format(ascii_filename(user).replace(' ', '_'), which)
+        if islinux:
+            ans = '\0' + sock_name
+        else:
+            from tempfile import gettempdir
+            tmp = force_unicode(gettempdir(), filesystem_encoding)
+            ans = os.path.join(tmp, sock_name)
+    if not ispy3 and not isinstance(ans, bytes):
+        ans = ans.encode(filesystem_encoding)
+    return ans
+
+
+def gui_socket_address():
+    return socket_address('GUI' if iswindows else 'gui')
+
+
+def viewer_socket_address():
+    return socket_address('Viewer' if iswindows else 'viewer')
+
+
+class RC(Thread):
+
+    def __init__(self, print_error=True, socket_address=None):
+        self.print_error = print_error
+        self.socket_address = socket_address or gui_socket_address()
+        Thread.__init__(self)
+        self.conn = None
+        self.daemon = True
+
+    def run(self):
+        from multiprocessing.connection import Client
+        self.done = False
+        try:
+            self.conn = Client(self.socket_address)
+            self.done = True
+        except Exception:
+            if self.print_error:
+                print('Failed to connect to address {}', file=sys.stderr).format(repr(self.socket_address))
+                import traceback
+                traceback.print_exc()
--- a/ebook_converter/utils/ipc/launch.py
+++ b/ebook_converter/utils/ipc/launch.py
@@ -0,0 +1,237 @@
+#!/usr/bin/env python2
+# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
+from __future__ import absolute_import, division, print_function, unicode_literals
+
+__license__   = 'GPL v3'
+__copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>'
+__docformat__ = 'restructuredtext en'
+
+import subprocess, os, sys, time
+from functools import partial
+
+from calibre.constants import iswindows, isosx, isfrozen, filesystem_encoding, ispy3
+from calibre.utils.config import prefs
+from calibre.ptempfile import PersistentTemporaryFile, base_dir
+from calibre.utils.serialize import msgpack_dumps
+from polyglot.builtins import iteritems, unicode_type, string_or_bytes, environ_item, native_string_type, getcwd
+from polyglot.binary import as_hex_unicode
+
+if iswindows:
+    import win32process
+    try:
+        windows_null_file = open(os.devnull, 'wb')
+    except:
+        raise RuntimeError('NUL file missing in windows. This indicates a'
+                ' corrupted windows. You should contact Microsoft'
+                ' for assistance and/or follow the steps described here: https://bytes.com/topic/net/answers/264804-compile-error-null-device-missing')
+
+
+def renice(niceness):
+    try:
+        os.nice(niceness)
+    except:
+        pass
+
+
+class Worker(object):
+    '''
+    Platform independent object for launching child processes. All processes
+    have the environment variable :envvar:`CALIBRE_WORKER` set.
+
+    Useful attributes: ``is_alive``, ``returncode``, ``pid``
+    Useful methods: ``kill``
+
+    To launch child simply call the Worker object. By default, the child's
+    output is redirected to an on disk file, the path to which is returned by
+    the call.
+    '''
+
+    exe_name = 'calibre-parallel'
+
+    @property
+    def executable(self):
+        if hasattr(sys, 'running_from_setup'):
+            return [sys.executable, os.path.join(sys.setup_dir, 'run-calibre-worker.py')]
+        if getattr(sys, 'run_local', False):
+            return [sys.executable, sys.run_local, self.exe_name]
+        e = self.exe_name
+        if iswindows:
+            return os.path.join(os.path.dirname(sys.executable),
+                   e+'.exe' if isfrozen else 'Scripts\\%s.exe'%e)
+        if isosx:
+            return os.path.join(sys.binaries_path, e)
+
+        if isfrozen:
+            return os.path.join(sys.executables_location, e)
+
+        if hasattr(sys, 'executables_location'):
+            c = os.path.join(sys.executables_location, e)
+            if os.access(c, os.X_OK):
+                return c
+        return e
+
+    @property
+    def gui_executable(self):
+        if isosx and not hasattr(sys, 'running_from_setup'):
+            if self.job_name == 'ebook-viewer':
+                base = os.path.dirname(sys.binaries_path)
+                return os.path.join(base, 'ebook-viewer.app/Contents/MacOS/', self.exe_name)
+            if self.job_name == 'ebook-edit':
+                base = os.path.dirname(sys.binaries_path)
+                return os.path.join(base, 'ebook-viewer.app/Contents/ebook-edit.app/Contents/MacOS/', self.exe_name)
+
+            return os.path.join(sys.binaries_path, self.exe_name)
+
+        return self.executable
+
+    @property
+    def env(self):
+        if ispy3:
+            env = os.environ.copy()
+        else:
+            # We use this inefficient method of copying the environment variables
+            # because of non ascii env vars on windows. See https://bugs.launchpad.net/bugs/811191
+            env = {}
+            for key in os.environ:
+                try:
+                    val = os.environ[key]
+                    if isinstance(val, unicode_type):
+                        # On windows subprocess cannot handle unicode env vars
+                        try:
+                            val = val.encode(filesystem_encoding)
+                        except ValueError:
+                            val = val.encode('utf-8')
+                    if isinstance(key, unicode_type):
+                        key = key.encode('ascii')
+                    env[key] = val
+                except:
+                    pass
+        env[native_string_type('CALIBRE_WORKER')] = environ_item('1')
+        td = as_hex_unicode(msgpack_dumps(base_dir()))
+        env[native_string_type('CALIBRE_WORKER_TEMP_DIR')] = environ_item(td)
+        env.update(self._env)
+        return env
+
+    @property
+    def is_alive(self):
+        return hasattr(self, 'child') and self.child.poll() is None
+
+    @property
+    def returncode(self):
+        if not hasattr(self, 'child'):
+            return None
+        self.child.poll()
+        return self.child.returncode
+
+    @property
+    def pid(self):
+        if not hasattr(self, 'child'):
+            return None
+        return getattr(self.child, 'pid', None)
+
+    def close_log_file(self):
+        try:
+            self._file.close()
+        except:
+            pass
+
+    def kill(self):
+        self.close_log_file()
+        try:
+            if self.is_alive:
+                if iswindows:
+                    return self.child.kill()
+                try:
+                    self.child.terminate()
+                    st = time.time()
+                    while self.is_alive and time.time()-st < 2:
+                        time.sleep(0.2)
+                finally:
+                    if self.is_alive:
+                        self.child.kill()
+        except:
+            pass
+
+    def __init__(self, env, gui=False, job_name=None):
+        self._env = {}
+        self.gui = gui
+        self.job_name = job_name
+        if ispy3:
+            self._env = env.copy()
+        else:
+            # Windows cannot handle unicode env vars
+            for k, v in iteritems(env):
+                try:
+                    if isinstance(k, unicode_type):
+                        k = k.encode('ascii')
+                    if isinstance(v, unicode_type):
+                        try:
+                            v = v.encode(filesystem_encoding)
+                        except:
+                            v = v.encode('utf-8')
+                    self._env[k] = v
+                except:
+                    pass
+
+    def __call__(self, redirect_output=True, cwd=None, priority=None):
+        '''
+        If redirect_output is True, output from the child is redirected
+        to a file on disk and this method returns the path to that file.
+        '''
+        exe = self.gui_executable if self.gui else self.executable
+        env = self.env
+        try:
+            origwd = cwd or os.path.abspath(getcwd())
+        except EnvironmentError:
+            # cwd no longer exists
+            origwd = cwd or os.path.expanduser('~')
+        env[native_string_type('ORIGWD')] = environ_item(as_hex_unicode(msgpack_dumps(origwd)))
+        _cwd = cwd
+        if priority is None:
+            priority = prefs['worker_process_priority']
+        cmd = [exe] if isinstance(exe, string_or_bytes) else exe
+        args = {
+                'env' : env,
+                'cwd' : _cwd,
+                }
+        if iswindows:
+            priority = {
+                    'high'   : win32process.HIGH_PRIORITY_CLASS,
+                    'normal' : win32process.NORMAL_PRIORITY_CLASS,
+                    'low'    : win32process.IDLE_PRIORITY_CLASS}[priority]
+            args['creationflags'] = win32process.CREATE_NO_WINDOW|priority
+        else:
+            niceness = {
+                    'normal' : 0,
+                    'low'    : 10,
+                    'high'   : 20,
+            }[priority]
+            args['preexec_fn'] = partial(renice, niceness)
+        ret = None
+        if redirect_output:
+            self._file = PersistentTemporaryFile('_worker_redirect.log')
+            args['stdout'] = self._file._fd
+            args['stderr'] = subprocess.STDOUT
+            if iswindows:
+                args['stdin'] = subprocess.PIPE
+            ret = self._file.name
+
+        if iswindows and 'stdin' not in args:
+            # On windows when using the pythonw interpreter,
+            # stdout, stderr and stdin may not be valid
+            args['stdin'] = subprocess.PIPE
+            args['stdout'] = windows_null_file
+            args['stderr'] = subprocess.STDOUT
+
+        if not iswindows:
+            # Close inherited file descriptors in worker
+            # On windows, this is done in the worker process
+            # itself
+            args['close_fds'] = True
+
+        self.child = subprocess.Popen(cmd, **args)
+        if 'stdin' in args:
+            self.child.stdin.close()
+
+        self.log_path = ret
+        return ret
--- a/ebook_converter/utils/ipc/simple_worker.py
+++ b/ebook_converter/utils/ipc/simple_worker.py
@@ -0,0 +1,348 @@
+#!/usr/bin/env python2
+# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
+from __future__ import absolute_import, division, print_function, unicode_literals
+
+__license__   = 'GPL v3'
+__copyright__ = '2012, Kovid Goyal <kovid@kovidgoyal.net>'
+__docformat__ = 'restructuredtext en'
+
+import os, time, traceback, importlib
+from multiprocessing.connection import Client
+from threading import Thread
+from contextlib import closing
+
+from calibre.constants import iswindows
+from calibre.utils.ipc import eintr_retry_call
+from calibre.utils.ipc.launch import Worker
+from calibre.utils.serialize import msgpack_loads, msgpack_dumps
+from calibre.utils.monotonic import monotonic
+from polyglot.builtins import unicode_type, string_or_bytes, environ_item
+from polyglot.binary import as_hex_unicode, from_hex_bytes
+
+
+class WorkerError(Exception):
+
+    def __init__(self, msg, orig_tb='', log_path=None):
+        Exception.__init__(self, msg)
+        self.orig_tb = orig_tb
+        self.log_path = log_path
+
+
+class ConnectedWorker(Thread):
+
+    def __init__(self, listener, args):
+        Thread.__init__(self)
+        self.daemon = True
+
+        self.listener = listener
+        self.args = args
+        self.accepted = False
+        self.tb = None
+        self.res = None
+
+    def run(self):
+        conn = None
+        try:
+            conn = eintr_retry_call(self.listener.accept)
+        except BaseException:
+            self.tb = traceback.format_exc()
+            return
+        self.accepted = True
+        with closing(conn):
+            try:
+                eintr_retry_call(conn.send, self.args)
+                self.res = eintr_retry_call(conn.recv)
+            except BaseException:
+                self.tb = traceback.format_exc()
+
+
+class OffloadWorker(object):
+
+    def __init__(self, listener, worker):
+        self.listener = listener
+        self.worker = worker
+        self.conn = None
+        self.kill_thread = t = Thread(target=self.worker.kill)
+        t.daemon = True
+
+    def __call__(self, module, func, *args, **kwargs):
+        if self.conn is None:
+            self.conn = eintr_retry_call(self.listener.accept)
+        eintr_retry_call(self.conn.send, (module, func, args, kwargs))
+        return eintr_retry_call(self.conn.recv)
+
+    def shutdown(self):
+        try:
+            eintr_retry_call(self.conn.send, None)
+        except IOError:
+            pass
+        except:
+            import traceback
+            traceback.print_exc()
+        finally:
+            self.conn = None
+            try:
+                os.remove(self.worker.log_path)
+            except:
+                pass
+            self.kill_thread.start()
+
+    def is_alive(self):
+        return self.worker.is_alive or self.kill_thread.is_alive()
+
+
+def communicate(ans, worker, listener, args, timeout=300, heartbeat=None,
+        abort=None):
+    cw = ConnectedWorker(listener, args)
+    cw.start()
+    st = monotonic()
+    check_heartbeat = callable(heartbeat)
+
+    while worker.is_alive and cw.is_alive():
+        cw.join(0.01)
+        delta = monotonic() - st
+        if not cw.accepted and delta > min(10, timeout):
+            break
+        hung = not heartbeat() if check_heartbeat else delta > timeout
+        if hung:
+            raise WorkerError('Worker appears to have hung')
+        if abort is not None and abort.is_set():
+            # The worker process will be killed by fork_job, after we return
+            return
+
+    if not cw.accepted:
+        if not cw.tb:
+            raise WorkerError('Failed to connect to worker process')
+        raise WorkerError('Failed to connect to worker process', cw.tb)
+
+    if cw.tb:
+        raise WorkerError('Failed to communicate with worker process', cw.tb)
+    if cw.res is None:
+        raise WorkerError('Something strange happened. The worker process was aborted without an exception.')
+    if cw.res.get('tb', None):
+        raise WorkerError('Worker failed', cw.res['tb'])
+    ans['result'] = cw.res['result']
+
+
+def create_worker(env, priority='normal', cwd=None, func='main'):
+    from calibre.utils.ipc.server import create_listener
+    auth_key = os.urandom(32)
+    address, listener = create_listener(auth_key)
+
+    env = dict(env)
+    env.update({
+        'CALIBRE_WORKER_ADDRESS': environ_item(as_hex_unicode(msgpack_dumps(address))),
+        'CALIBRE_WORKER_KEY': environ_item(as_hex_unicode(auth_key)),
+        'CALIBRE_SIMPLE_WORKER': environ_item('calibre.utils.ipc.simple_worker:%s' % func),
+    })
+
+    w = Worker(env)
+    w(cwd=cwd, priority=priority)
+    return listener, w
+
+
+def start_pipe_worker(command, env=None, priority='normal', **process_args):
+    import subprocess
+    from functools import partial
+    w = Worker(env or {})
+    args = {'stdout':subprocess.PIPE, 'stdin':subprocess.PIPE, 'env':w.env}
+    args.update(process_args)
+    if iswindows:
+        import win32process
+        priority = {
+                'high'   : win32process.HIGH_PRIORITY_CLASS,
+                'normal' : win32process.NORMAL_PRIORITY_CLASS,
+                'low'    : win32process.IDLE_PRIORITY_CLASS}[priority]
+        args['creationflags'] = win32process.CREATE_NO_WINDOW|priority
+    else:
+        def renice(niceness):
+            try:
+                os.nice(niceness)
+            except:
+                pass
+        niceness = {'normal' : 0, 'low'    : 10, 'high'   : 20}[priority]
+        args['preexec_fn'] = partial(renice, niceness)
+        args['close_fds'] = True
+
+    exe = w.executable
+    cmd = [exe] if isinstance(exe, string_or_bytes) else exe
+    p = subprocess.Popen(cmd + ['--pipe-worker', command], **args)
+    return p
+
+
+def two_part_fork_job(env=None, priority='normal', cwd=None):
+    env = env or {}
+    listener, w = create_worker(env, priority, cwd)
+
+    def run_job(
+        mod_name, func_name, args=(), kwargs=None, timeout=300,  # seconds
+        no_output=False, heartbeat=None, abort=None, module_is_source_code=False
+    ):
+        ans = {'result':None, 'stdout_stderr':None}
+        kwargs = kwargs or {}
+        try:
+            communicate(ans, w, listener, (mod_name, func_name, args, kwargs,
+                module_is_source_code), timeout=timeout, heartbeat=heartbeat,
+                abort=abort)
+        except WorkerError as e:
+            if not no_output:
+                e.log_path = w.log_path
+            raise
+        finally:
+            t = Thread(target=w.kill)
+            t.daemon=True
+            t.start()
+            if no_output:
+                try:
+                    os.remove(w.log_path)
+                except:
+                    pass
+        if not no_output:
+            ans['stdout_stderr'] = w.log_path
+        return ans
+    run_job.worker = w
+
+    return run_job
+
+
+def fork_job(mod_name, func_name, args=(), kwargs=None, timeout=300,  # seconds
+        cwd=None, priority='normal', env={}, no_output=False, heartbeat=None,
+        abort=None, module_is_source_code=False):
+    '''
+    Run a job in a worker process. A job is simply a function that will be
+    called with the supplied arguments, in the worker process.
+    The result of the function will be returned.
+    If an error occurs a WorkerError is raised.
+
+    :param mod_name: Module to import in the worker process
+
+    :param func_name: Function to call in the worker process from the imported
+    module
+
+    :param args: Positional arguments to pass to the function
+
+    :param kwargs: Keyword arguments to pass to the function
+
+    :param timeout: The time in seconds to wait for the worker process to
+    complete. If it takes longer a WorkerError is raised and the process is
+    killed.
+
+    :param cwd: The working directory for the worker process. I recommend
+    against using this, unless you are sure the path is pure ASCII.
+
+    :param priority: The process priority for the worker process
+
+    :param env: Extra environment variables to set for the worker process
+
+    :param no_output: If True, the stdout and stderr of the worker process are
+    discarded
+
+    :param heartbeat: If not None, it is used to check if the worker has hung,
+    instead of a simple timeout. It must be a callable that takes no
+    arguments and returns True or False. The worker will be assumed to have
+    hung if this function returns False. At that point, the process will be
+    killed and a WorkerError will be raised.
+
+    :param abort: If not None, it must be an Event. As soon as abort.is_set()
+    returns True, the worker process is killed. No error is raised.
+
+    :param module_is_source_code: If True, the ``mod`` is treated as python
+    source rather than a module name to import. The source is executed as a
+    module. Useful if you want to use fork_job from within a script to run some
+    dynamically generated python.
+
+    :return: A dictionary with the keys result and stdout_stderr. result is the
+    return value of the function (it must be picklable). stdout_stderr is the
+    path to a file that contains the stdout and stderr of the worker process.
+    If you set no_output=True, then this will not be present.
+    '''
+    return two_part_fork_job(env, priority, cwd)(
+        mod_name, func_name, args=args, kwargs=kwargs, timeout=timeout,
+        no_output=no_output, heartbeat=heartbeat, abort=abort,
+        module_is_source_code=module_is_source_code
+    )
+
+
+def offload_worker(env={}, priority='normal', cwd=None):
+    listener, w = create_worker(env=env, priority=priority, cwd=cwd, func='offload')
+    return OffloadWorker(listener, w)
+
+
+def compile_code(src):
+    import re, io
+    if not isinstance(src, unicode_type):
+        match = re.search(br'coding[:=]\s*([-\w.]+)', src[:200])
+        enc = match.group(1).decode('utf-8') if match else 'utf-8'
+        src = src.decode(enc)
+    # Python complains if there is a coding declaration in a unicode string
+    src = re.sub(r'^#.*coding\s*[:=]\s*([-\w.]+)', '#', src, flags=re.MULTILINE)
+    # Translate newlines to \n
+    src = io.StringIO(src, newline=None).getvalue()
+
+    namespace = {
+            'time':time, 're':re, 'os':os, 'io':io,
+    }
+    exec(src, namespace)
+    return namespace
+
+
+def main():
+    # The entry point for the simple worker process
+    address = msgpack_loads(from_hex_bytes(os.environ['CALIBRE_WORKER_ADDRESS']))
+    key     = from_hex_bytes(os.environ['CALIBRE_WORKER_KEY'])
+    with closing(Client(address, authkey=key)) as conn:
+        args = eintr_retry_call(conn.recv)
+        try:
+            mod, func, args, kwargs, module_is_source_code = args
+            if module_is_source_code:
+                importlib.import_module('calibre.customize.ui')  # Load plugins
+                mod = compile_code(mod)
+                func = mod[func]
+            else:
+                try:
+                    mod = importlib.import_module(mod)
+                except ImportError:
+                    importlib.import_module('calibre.customize.ui')  # Load plugins
+                    mod = importlib.import_module(mod)
+                func = getattr(mod, func)
+            res = {'result':func(*args, **kwargs)}
+        except:
+            res = {'tb': traceback.format_exc()}
+
+        try:
+            conn.send(res)
+        except:
+            # Maybe EINTR
+            conn.send(res)
+
+
+def offload():
+    # The entry point for the offload worker process
+    address = msgpack_loads(from_hex_bytes(os.environ['CALIBRE_WORKER_ADDRESS']))
+    key     = from_hex_bytes(os.environ['CALIBRE_WORKER_KEY'])
+    func_cache = {}
+    with closing(Client(address, authkey=key)) as conn:
+        while True:
+            args = eintr_retry_call(conn.recv)
+            if args is None:
+                break
+            res = {'result':None, 'tb':None}
+            try:
+                mod, func, args, kwargs = args
+                if mod is None:
+                    eintr_retry_call(conn.send, res)
+                    continue
+                f = func_cache.get((mod, func), None)
+                if f is None:
+                    try:
+                        m = importlib.import_module(mod)
+                    except ImportError:
+                        importlib.import_module('calibre.customize.ui')  # Load plugins
+                        m = importlib.import_module(mod)
+                    func_cache[(mod, func)] = f = getattr(m, func)
+                res['result'] = f(*args, **kwargs)
+            except:
+                import traceback
+                res['tb'] = traceback.format_exc()
+
+            eintr_retry_call(conn.send, res)
--- a/ebook_converter/utils/iso8601.py
+++ b/ebook_converter/utils/iso8601.py
@@ -0,0 +1,65 @@
+#!/usr/bin/env python2
+# vim:fileencoding=utf-8
+# License: GPLv3 Copyright: 2016, Kovid Goyal <kovid at kovidgoyal.net>
+
+from __future__ import absolute_import, division, print_function, unicode_literals
+from datetime import datetime
+
+from dateutil.tz import tzlocal, tzutc, tzoffset
+
+from calibre.constants import plugins
+speedup, err = plugins['speedup']
+if not speedup:
+    raise RuntimeError(err)
+
+
+class SafeLocalTimeZone(tzlocal):
+
+    def _isdst(self, dt):
+        # This method in tzlocal raises ValueError if dt is out of range (in
+        # older versions of dateutil)
+        # In such cases, just assume that dt is not DST.
+        try:
+            return super(SafeLocalTimeZone, self)._isdst(dt)
+        except Exception:
+            pass
+        return False
+
+    def _naive_is_dst(self, dt):
+        # This method in tzlocal raises ValueError if dt is out of range (in
+        # newer versions of dateutil)
+        # In such cases, just assume that dt is not DST.
+        try:
+            return super(SafeLocalTimeZone, self)._naive_is_dst(dt)
+        except Exception:
+            pass
+        return False
+
+
+utc_tz = tzutc()
+local_tz = SafeLocalTimeZone()
+del tzutc, tzlocal
+UNDEFINED_DATE = datetime(101,1,1, tzinfo=utc_tz)
+
+
+def parse_iso8601(date_string, assume_utc=False, as_utc=True):
+    if not date_string:
+        return UNDEFINED_DATE
+    dt, aware, tzseconds = speedup.parse_iso8601(date_string)
+    tz = utc_tz if assume_utc else local_tz
+    if aware:  # timezone was specified
+        if tzseconds == 0:
+            tz = utc_tz
+        else:
+            sign = '-' if tzseconds < 0 else '+'
+            description = "%s%02d:%02d" % (sign, abs(tzseconds) // 3600, (abs(tzseconds) % 3600) // 60)
+            tz = tzoffset(description, tzseconds)
+    dt = dt.replace(tzinfo=tz)
+    if as_utc and tz is utc_tz:
+        return dt
+    return dt.astimezone(utc_tz if as_utc else local_tz)
+
+
+if __name__ == '__main__':
+    import sys
+    print(parse_iso8601(sys.argv[-1]))
--- a/ebook_converter/utils/localization.py
+++ b/ebook_converter/utils/localization.py
@@ -0,0 +1,548 @@
+#!/usr/bin/env python2
+# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
+from __future__ import absolute_import, division, print_function, unicode_literals
+
+__license__   = 'GPL v3'
+__copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>'
+__docformat__ = 'restructuredtext en'
+
+import os, locale, re, io, sys
+from gettext import GNUTranslations, NullTranslations
+
+from polyglot.builtins import is_py3, iteritems, unicode_type
+
+_available_translations = None
+
+
+def available_translations():
+    global _available_translations
+    if _available_translations is None:
+        stats = P('localization/stats.calibre_msgpack', allow_user_override=False)
+        if os.path.exists(stats):
+            from calibre.utils.serialize import msgpack_loads
+            with open(stats, 'rb') as f:
+                stats = msgpack_loads(f.read())
+        else:
+            stats = {}
+        _available_translations = [x for x in stats if stats[x] > 0.1]
+    return _available_translations
+
+
+def get_system_locale():
+    from calibre.constants import iswindows, isosx, plugins
+    lang = None
+    if iswindows:
+        try:
+            from calibre.constants import get_windows_user_locale_name
+            lang = get_windows_user_locale_name()
+            lang = lang.strip()
+            if not lang:
+                lang = None
+        except:
+            pass  # Windows XP does not have the GetUserDefaultLocaleName fn
+    elif isosx:
+        try:
+            lang = plugins['usbobserver'][0].user_locale() or None
+        except:
+            # Fallback to environment vars if something bad happened
+            import traceback
+            traceback.print_exc()
+    if lang is None:
+        try:
+            envvars = ['LANGUAGE', 'LC_ALL', 'LC_CTYPE', 'LC_MESSAGES', 'LANG']
+            lang = locale.getdefaultlocale(envvars)[0]
+
+            # lang is None in two cases: either the environment variable is not
+            # set or it's "C". Stop looking for a language in the latter case.
+            if lang is None:
+                for var in envvars:
+                    if os.environ.get(var) == 'C':
+                        lang = 'en_US'
+                        break
+        except:
+            pass  # This happens on Ubuntu apparently
+        if lang is None and 'LANG' in os.environ:  # Needed for OS X
+            try:
+                lang = os.environ['LANG']
+            except:
+                pass
+    if lang:
+        lang = lang.replace('-', '_')
+        lang = '_'.join(lang.split('_')[:2])
+    return lang
+
+
+def sanitize_lang(lang):
+    if lang:
+        match = re.match('[a-z]{2,3}(_[A-Z]{2}){0,1}', lang)
+        if match:
+            lang = match.group()
+    if lang == 'zh':
+        lang = 'zh_CN'
+    if not lang:
+        lang = 'en'
+    return lang
+
+
+def get_lang():
+    'Try to figure out what language to display the interface in'
+    from calibre.utils.config_base import prefs
+    lang = prefs['language']
+    lang = os.environ.get('CALIBRE_OVERRIDE_LANG', lang)
+    if lang:
+        return lang
+    try:
+        lang = get_system_locale()
+    except:
+        import traceback
+        traceback.print_exc()
+        lang = None
+    return sanitize_lang(lang)
+
+
+def is_rtl():
+    return get_lang()[:2].lower() in {'he', 'ar'}
+
+
+def get_lc_messages_path(lang):
+    hlang = None
+    if zf_exists():
+        if lang in available_translations():
+            hlang = lang
+        else:
+            xlang = lang.split('_')[0].lower()
+            if xlang in available_translations():
+                hlang = xlang
+    return hlang
+
+
+def zf_exists():
+    return os.path.exists(P('localization/locales.zip',
+                allow_user_override=False))
+
+
+_lang_trans = None
+
+
+def get_all_translators():
+    from zipfile import ZipFile
+    with ZipFile(P('localization/locales.zip', allow_user_override=False), 'r') as zf:
+        for lang in available_translations():
+            mpath = get_lc_messages_path(lang)
+            if mpath is not None:
+                buf = io.BytesIO(zf.read(mpath + '/messages.mo'))
+                yield lang, GNUTranslations(buf)
+
+
+def get_single_translator(mpath, which='messages'):
+    from zipfile import ZipFile
+    with ZipFile(P('localization/locales.zip', allow_user_override=False), 'r') as zf:
+        path = '{}/{}.mo'.format(mpath, which)
+        data = zf.read(path)
+        buf = io.BytesIO(data)
+        try:
+            return GNUTranslations(buf)
+        except Exception as e:
+            import traceback
+            traceback.print_exc()
+            import hashlib
+            sig = hashlib.sha1(data).hexdigest()
+            raise ValueError('Failed to load translations for: {} (size: {} and signature: {}) with error: {}'.format(
+                path, len(data), sig, e))
+
+
+def get_iso639_translator(lang):
+    lang = sanitize_lang(lang)
+    mpath = get_lc_messages_path(lang) if lang else None
+    return get_single_translator(mpath, 'iso639') if mpath else None
+
+
+def get_translator(bcp_47_code):
+    parts = bcp_47_code.replace('-', '_').split('_')[:2]
+    parts[0] = lang_as_iso639_1(parts[0].lower()) or 'en'
+    if len(parts) > 1:
+        parts[1] = parts[1].upper()
+    lang = '_'.join(parts)
+    lang = {'pt':'pt_BR', 'zh':'zh_CN'}.get(lang, lang)
+    available = available_translations()
+    found = True
+    if lang == 'en' or lang.startswith('en_'):
+        return found, lang, NullTranslations()
+    if lang not in available:
+        lang = {'pt':'pt_BR', 'zh':'zh_CN'}.get(parts[0], parts[0])
+        if lang not in available:
+            lang = get_lang()
+            if lang not in available:
+                lang = 'en'
+            found = False
+    if lang == 'en':
+        return True, lang, NullTranslations()
+    return found, lang, get_single_translator(lang)
+
+
+lcdata = {
+    'abday': ('Sun', 'Mon', 'Tue', 'Wed', 'Thu', 'Fri', 'Sat'),
+    'abmon': ('Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec'),
+    'd_fmt': '%m/%d/%Y',
+    'd_t_fmt': '%a %d %b %Y %r %Z',
+    'day': ('Sunday', 'Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday'),
+    'mon': ('January', 'February', 'March', 'April', 'May', 'June', 'July', 'August', 'September', 'October', 'November', 'December'),
+    'noexpr': '^[nN].*',
+    'radixchar': '.',
+    't_fmt': '%r',
+    't_fmt_ampm': '%I:%M:%S %p',
+    'thousep': ',',
+    'yesexpr': '^[yY].*'
+}
+
+
+def load_po(path):
+    from calibre.translations.msgfmt import make
+    buf = io.BytesIO()
+    try:
+        make(path, buf)
+    except Exception:
+        print(('Failed to compile translations file: %s, ignoring') % path)
+        buf = None
+    else:
+        buf = io.BytesIO(buf.getvalue())
+    return buf
+
+
+def set_translators():
+    global _lang_trans, lcdata
+    # To test different translations invoke as
+    # CALIBRE_OVERRIDE_LANG=de_DE.utf8 program
+    lang = get_lang()
+    t = buf = iso639 = None
+
+    if 'CALIBRE_TEST_TRANSLATION' in os.environ:
+        buf = load_po(os.path.expanduser(os.environ['CALIBRE_TEST_TRANSLATION']))
+
+    if lang:
+        mpath = get_lc_messages_path(lang)
+        if buf is None and mpath and os.access(mpath + '.po', os.R_OK):
+            buf = load_po(mpath + '.po')
+
+        if mpath is not None:
+            from zipfile import ZipFile
+            with ZipFile(P('localization/locales.zip',
+                allow_user_override=False), 'r') as zf:
+                if buf is None:
+                    buf = io.BytesIO(zf.read(mpath + '/messages.mo'))
+                if mpath == 'nds':
+                    mpath = 'de'
+                isof = mpath + '/iso639.mo'
+                try:
+                    iso639 = io.BytesIO(zf.read(isof))
+                except:
+                    pass  # No iso639 translations for this lang
+                if buf is not None:
+                    from calibre.utils.serialize import msgpack_loads
+                    try:
+                        lcdata = msgpack_loads(zf.read(mpath + '/lcdata.calibre_msgpack'))
+                    except:
+                        pass  # No lcdata
+
+    if buf is not None:
+        t = GNUTranslations(buf)
+        if iso639 is not None:
+            iso639 = _lang_trans = GNUTranslations(iso639)
+            t.add_fallback(iso639)
+
+    if t is None:
+        t = NullTranslations()
+
+    try:
+        set_translators.lang = t.info().get('language')
+    except Exception:
+        pass
+    if is_py3:
+        t.install(names=('ngettext',))
+    else:
+        t.install(unicode=True, names=('ngettext',))
+    # Now that we have installed a translator, we have to retranslate the help
+    # for the global prefs object as it was instantiated in get_lang(), before
+    # the translator was installed.
+    from calibre.utils.config_base import prefs
+    prefs.retranslate_help()
+
+
+set_translators.lang = None
+
+
+_iso639 = None
+_extra_lang_codes = {
+        'pt_BR' : _('Brazilian Portuguese'),
+        'en_GB' : _('English (UK)'),
+        'zh_CN' : _('Simplified Chinese'),
+        'zh_TW' : _('Traditional Chinese'),
+        'en'    : _('English'),
+        'en_US' : _('English (United States)'),
+        'en_AR' : _('English (Argentina)'),
+        'en_AU' : _('English (Australia)'),
+        'en_JP' : _('English (Japan)'),
+        'en_DE' : _('English (Germany)'),
+        'en_BG' : _('English (Bulgaria)'),
+        'en_EG' : _('English (Egypt)'),
+        'en_NZ' : _('English (New Zealand)'),
+        'en_CA' : _('English (Canada)'),
+        'en_GR' : _('English (Greece)'),
+        'en_IN' : _('English (India)'),
+        'en_NP' : _('English (Nepal)'),
+        'en_TH' : _('English (Thailand)'),
+        'en_TR' : _('English (Turkey)'),
+        'en_CY' : _('English (Cyprus)'),
+        'en_CZ' : _('English (Czech Republic)'),
+        'en_PH' : _('English (Philippines)'),
+        'en_PK' : _('English (Pakistan)'),
+        'en_PL' : _('English (Poland)'),
+        'en_HR' : _('English (Croatia)'),
+        'en_HU' : _('English (Hungary)'),
+        'en_ID' : _('English (Indonesia)'),
+        'en_IL' : _('English (Israel)'),
+        'en_RU' : _('English (Russia)'),
+        'en_SG' : _('English (Singapore)'),
+        'en_YE' : _('English (Yemen)'),
+        'en_IE' : _('English (Ireland)'),
+        'en_CN' : _('English (China)'),
+        'en_TW' : _('English (Taiwan)'),
+        'en_ZA' : _('English (South Africa)'),
+        'es_PY' : _('Spanish (Paraguay)'),
+        'es_UY' : _('Spanish (Uruguay)'),
+        'es_AR' : _('Spanish (Argentina)'),
+        'es_CR' : _('Spanish (Costa Rica)'),
+        'es_MX' : _('Spanish (Mexico)'),
+        'es_CU' : _('Spanish (Cuba)'),
+        'es_CL' : _('Spanish (Chile)'),
+        'es_EC' : _('Spanish (Ecuador)'),
+        'es_HN' : _('Spanish (Honduras)'),
+        'es_VE' : _('Spanish (Venezuela)'),
+        'es_BO' : _('Spanish (Bolivia)'),
+        'es_NI' : _('Spanish (Nicaragua)'),
+        'es_CO' : _('Spanish (Colombia)'),
+        'de_AT' : _('German (AT)'),
+        'fr_BE' : _('French (BE)'),
+        'nl'    : _('Dutch (NL)'),
+        'nl_BE' : _('Dutch (BE)'),
+        'und'   : _('Unknown')
+        }
+
+if False:
+    # Extra strings needed for Qt
+
+    # NOTE: Ante Meridian (i.e. like 10:00 AM)
+    _('AM')
+    # NOTE: Post Meridian (i.e. like 10:00 PM)
+    _('PM')
+    # NOTE: Ante Meridian (i.e. like 10:00 am)
+    _('am')
+    # NOTE: Post Meridian (i.e. like 10:00 pm)
+    _('pm')
+    _('&Copy')
+    _('Select All')
+    _('Copy Link')
+    _('&Select All')
+    _('Copy &Link Location')
+    _('&Undo')
+    _('&Redo')
+    _('Cu&t')
+    _('&Paste')
+    _('Paste and Match Style')
+    _('Directions')
+    _('Left to Right')
+    _('Right to Left')
+    _('Fonts')
+    _('&Step up')
+    _('Step &down')
+    _('Close without Saving')
+    _('Close Tab')
+
+_lcase_map = {}
+for k in _extra_lang_codes:
+    _lcase_map[k.lower()] = k
+
+
+def _load_iso639():
+    global _iso639
+    if _iso639 is None:
+        ip = P('localization/iso639.calibre_msgpack', allow_user_override=False, data=True)
+        from calibre.utils.serialize import msgpack_loads
+        _iso639 = msgpack_loads(ip)
+        if 'by_3' not in _iso639:
+            _iso639['by_3'] = _iso639['by_3t']
+    return _iso639
+
+
+def get_iso_language(lang_trans, lang):
+    iso639 = _load_iso639()
+    ans = lang
+    lang = lang.split('_')[0].lower()
+    if len(lang) == 2:
+        ans = iso639['by_2'].get(lang, ans)
+    elif len(lang) == 3:
+        if lang in iso639['by_3']:
+            ans = iso639['by_3'][lang]
+    return lang_trans(ans)
+
+
+def get_language(lang):
+    translate = _
+    lang = _lcase_map.get(lang, lang)
+    if lang in _extra_lang_codes:
+        # The translator was not active when _extra_lang_codes was defined, so
+        # re-translate
+        return translate(_extra_lang_codes[lang])
+    attr = 'gettext' if sys.version_info.major > 2 else 'ugettext'
+    return get_iso_language(getattr(_lang_trans, attr, translate), lang)
+
+
+def calibre_langcode_to_name(lc, localize=True):
+    iso639 = _load_iso639()
+    translate = _ if localize else lambda x: x
+    try:
+        return translate(iso639['by_3'][lc])
+    except:
+        pass
+    return lc
+
+
+def canonicalize_lang(raw):
+    if not raw:
+        return None
+    if not isinstance(raw, unicode_type):
+        raw = raw.decode('utf-8', 'ignore')
+    raw = raw.lower().strip()
+    if not raw:
+        return None
+    raw = raw.replace('_', '-').partition('-')[0].strip()
+    if not raw:
+        return None
+    iso639 = _load_iso639()
+    m2to3 = iso639['2to3']
+
+    if len(raw) == 2:
+        ans = m2to3.get(raw, None)
+        if ans is not None:
+            return ans
+    elif len(raw) == 3:
+        if raw in iso639['by_3']:
+            return raw
+
+    return iso639['name_map'].get(raw, None)
+
+
+_lang_map = None
+
+
+def lang_map():
+    ' Return mapping of ISO 639 3 letter codes to localized language names '
+    iso639 = _load_iso639()
+    translate = _
+    global _lang_map
+    if _lang_map is None:
+        _lang_map = {k:translate(v) for k, v in iteritems(iso639['by_3'])}
+    return _lang_map
+
+
+def lang_map_for_ui():
+    ans = getattr(lang_map_for_ui, 'ans', None)
+    if ans is None:
+        ans = lang_map().copy()
+        for x in ('zxx', 'mis', 'mul'):
+            ans.pop(x, None)
+        lang_map_for_ui.ans = ans
+    return ans
+
+
+def langnames_to_langcodes(names):
+    '''
+    Given a list of localized language names return a mapping of the names to 3
+    letter ISO 639 language codes. If a name is not recognized, it is mapped to
+    None.
+    '''
+    iso639 = _load_iso639()
+    translate = _
+    ans = {}
+    names = set(names)
+    for k, v in iteritems(iso639['by_3']):
+        tv = translate(v)
+        if tv in names:
+            names.remove(tv)
+            ans[tv] = k
+        if not names:
+            break
+    for x in names:
+        ans[x] = None
+
+    return ans
+
+
+def lang_as_iso639_1(name_or_code):
+    code = canonicalize_lang(name_or_code)
+    if code is not None:
+        iso639 = _load_iso639()
+        return iso639['3to2'].get(code, None)
+
+
+_udc = None
+
+
+def get_udc():
+    global _udc
+    if _udc is None:
+        from calibre.ebooks.unihandecode import Unihandecoder
+        _udc = Unihandecoder(lang=get_lang())
+    return _udc
+
+
+def user_manual_stats():
+    stats = getattr(user_manual_stats, 'stats', None)
+    if stats is None:
+        import json
+        try:
+            stats = json.loads(P('user-manual-translation-stats.json', allow_user_override=False, data=True))
+        except EnvironmentError:
+            stats = {}
+        user_manual_stats.stats = stats
+    return stats
+
+
+def localize_user_manual_link(url):
+    lc = lang_as_iso639_1(get_lang())
+    if lc == 'en':
+        return url
+    stats = user_manual_stats()
+    if stats.get(lc, 0) < 0.3:
+        return url
+    from polyglot.urllib import urlparse, urlunparse
+    parts = urlparse(url)
+    path = re.sub(r'/generated/[a-z]+/', '/generated/%s/' % lc, parts.path or '')
+    path = '/%s%s' % (lc, path)
+    parts = list(parts)
+    parts[2] = path
+    return urlunparse(parts)
+
+
+def website_languages():
+    stats = getattr(website_languages, 'stats', None)
+    if stats is None:
+        try:
+            stats = frozenset(P('localization/website-languages.txt', allow_user_override=False, data=True).split())
+        except EnvironmentError:
+            stats = frozenset()
+        website_languages.stats = stats
+    return stats
+
+
+def localize_website_link(url):
+    lc = lang_as_iso639_1(get_lang())
+    langs = website_languages()
+    if lc == 'en' or lc not in langs:
+        return url
+    from polyglot.urllib import urlparse, urlunparse
+    parts = urlparse(url)
+    path = '/{}{}'.format(lc, parts.path)
+    parts = list(parts)
+    parts[2] = path
+    return urlunparse(parts)
--- a/ebook_converter/utils/localunzip.py
+++ b/ebook_converter/utils/localunzip.py
@@ -0,0 +1,340 @@
+#!/usr/bin/env python2
+# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:fdm=marker:ai
+from __future__ import absolute_import, division, print_function, unicode_literals
+
+__license__   = 'GPL v3'
+__copyright__ = '2012, Kovid Goyal <kovid at kovidgoyal.net>'
+__docformat__ = 'restructuredtext en'
+
+'''
+Try to read invalid zip files with missing or damaged central directories.
+These are apparently produced in large numbers by the fruitcakes over at B&N.
+
+Tries to only use the local headers to extract data from the damaged zip file.
+'''
+
+import os, sys, zlib, shutil
+from struct import calcsize, unpack, pack
+from collections import namedtuple, OrderedDict
+from tempfile import SpooledTemporaryFile
+
+from polyglot.builtins import itervalues, getcwd
+
+HEADER_SIG = 0x04034b50
+HEADER_BYTE_SIG = pack(b'<L', HEADER_SIG)
+local_header_fmt = b'<L5HL2L2H'
+local_header_sz = calcsize(local_header_fmt)
+ZIP_STORED, ZIP_DEFLATED = 0, 8
+DATA_DESCRIPTOR_SIG = pack(b'<L', 0x08074b50)
+
+LocalHeader = namedtuple('LocalHeader',
+        'signature min_version flags compression_method mod_time mod_date '
+        'crc32 compressed_size uncompressed_size filename_length extra_length '
+        'filename extra')
+
+
+if hasattr(sys, 'getwindowsversion'):
+    windows_reserved_filenames = (
+        'CON', 'PRN', 'AUX', 'CLOCK$', 'NUL' 'COM0', 'COM1', 'COM2', 'COM3',
+        'COM4', 'COM5', 'COM6', 'COM7', 'COM8', 'COM9' 'LPT0', 'LPT1', 'LPT2',
+        'LPT3', 'LPT4', 'LPT5', 'LPT6', 'LPT7', 'LPT8', 'LPT9')
+
+    def is_reserved_filename(x):
+        base = x.partition('.')[0].upper()
+        return base in windows_reserved_filenames
+else:
+    def is_reserved_filename(x):
+        return False
+
+
+def decode_arcname(name):
+    if isinstance(name, bytes):
+        from calibre.ebooks.chardet import detect
+        try:
+            name = name.decode('utf-8')
+        except:
+            res = detect(name)
+            encoding = res['encoding']
+            try:
+                name = name.decode(encoding)
+            except:
+                name = name.decode('utf-8', 'replace')
+    return name
+
+
+def find_local_header(f):
+    pos = f.tell()
+    raw = f.read(50*1024)
+    try:
+        f.seek(pos + raw.index(HEADER_BYTE_SIG))
+    except ValueError:
+        f.seek(pos)
+        return
+    raw = f.read(local_header_sz)
+    if len(raw) != local_header_sz:
+        f.seek(pos)
+        return
+    header = LocalHeader(*(unpack(local_header_fmt, raw) + (None, None)))
+    if header.signature == HEADER_SIG:
+        return header
+    f.seek(pos)
+
+
+def find_data_descriptor(f):
+    pos = f.tell()
+    DD = namedtuple('DataDescriptor', 'crc32 compressed_size uncompressed_size')
+    raw = b'a'*16
+    try:
+        while len(raw) >= 16:
+            raw = f.read(50*1024)
+            idx = raw.find(DATA_DESCRIPTOR_SIG)
+            if idx != -1:
+                f.seek(f.tell() - len(raw) + idx + len(DATA_DESCRIPTOR_SIG))
+                return DD(*unpack(b'<LLL', f.read(12)))
+            # Rewind to handle the case of the signature being cut off
+            # by the 50K boundary
+            f.seek(f.tell()-len(DATA_DESCRIPTOR_SIG))
+
+        raise ValueError('Failed to find data descriptor signature. '
+                         'Data descriptors without signatures are not '
+                         'supported.')
+    finally:
+        f.seek(pos)
+
+
+def read_local_file_header(f):
+    pos = f.tell()
+    raw = f.read(local_header_sz)
+    if len(raw) != local_header_sz:
+        f.seek(pos)
+        return
+    header = LocalHeader(*(unpack(local_header_fmt, raw) + (None, None)))
+    if header.signature != HEADER_SIG:
+        f.seek(pos)
+        header = find_local_header(f)
+        if header is None:
+            return
+    if header.min_version > 20:
+        raise ValueError('This ZIP file uses unsupported features')
+    if header.flags & 0b1:
+        raise ValueError('This ZIP file is encrypted')
+    if header.flags & (1 << 13):
+        raise ValueError('This ZIP file uses masking, unsupported.')
+    if header.compression_method not in {ZIP_STORED, ZIP_DEFLATED}:
+        raise ValueError('This ZIP file uses an unsupported compression method')
+    has_data_descriptors = header.flags & (1 << 3)
+    fname = extra = None
+    if header.filename_length > 0:
+        fname = f.read(header.filename_length)
+        if len(fname) != header.filename_length:
+            return
+        try:
+            fname = fname.decode('ascii')
+        except UnicodeDecodeError:
+            if header.flags & (1 << 11):
+                try:
+                    fname = fname.decode('utf-8')
+                except UnicodeDecodeError:
+                    pass
+        fname = decode_arcname(fname).replace('\\', '/')
+
+    if header.extra_length > 0:
+        extra = f.read(header.extra_length)
+        if len(extra) != header.extra_length:
+            return
+    if has_data_descriptors:
+        desc = find_data_descriptor(f)
+        header = header._replace(crc32=desc.crc32,
+                                 compressed_size=desc.compressed_size,
+                                 uncompressed_size=desc.uncompressed_size)
+    return LocalHeader(*(
+        header[:-2] + (fname, extra)
+        ))
+
+
+def read_compressed_data(f, header):
+    cdata = f.read(header.compressed_size)
+    return cdata
+
+
+def copy_stored_file(src, size, dest):
+    read = 0
+    amt = min(size, 20*1024)
+    while read < size:
+        raw = src.read(min(size-read, amt))
+        if not raw:
+            raise ValueError('Premature end of file')
+        dest.write(raw)
+        read += len(raw)
+
+
+def copy_compressed_file(src, size, dest):
+    d = zlib.decompressobj(-15)
+    read = 0
+    amt = min(size, 20*1024)
+    while read < size:
+        raw = src.read(min(size-read, amt))
+        if not raw and read < size:
+            raise ValueError('Invalid ZIP file, local header is damaged')
+        read += len(raw)
+        dest.write(d.decompress(raw, 200*1024))
+        count = 0
+        while d.unconsumed_tail:
+            count += 1
+            dest.write(d.decompress(d.unconsumed_tail, 200*1024))
+
+            if count > 100:
+                raise ValueError('This ZIP file contains a ZIP bomb in %s'%
+                        os.path.basename(dest.name))
+
+
+def _extractall(f, path=None, file_info=None):
+    found = False
+    while True:
+        header = read_local_file_header(f)
+        if not header:
+            break
+        has_data_descriptors = header.flags & (1 << 3)
+        seekval = header.compressed_size + (16 if has_data_descriptors else 0)
+        found = True
+        # Sanitize path changing absolute to relative paths and removing .. and
+        # .
+        fname = header.filename.replace(os.sep, '/')
+        fname = os.path.splitdrive(fname)[1]
+        parts = [x for x in fname.split('/') if x not in {'', os.path.pardir, os.path.curdir}]
+        if not parts:
+            continue
+        if header.uncompressed_size == 0:
+            # Directory
+            f.seek(f.tell()+seekval)
+            if path is not None:
+                bdir = os.path.join(path, *parts)
+                if not os.path.exists(bdir):
+                    os.makedirs(bdir)
+            continue
+
+        # File
+        if file_info is not None:
+            file_info[header.filename] = (f.tell(), header)
+        if path is not None:
+            bdir = os.path.join(path, *(parts[:-1]))
+            if not os.path.exists(bdir):
+                os.makedirs(bdir)
+            dest = os.path.join(path, *parts)
+            try:
+                df = open(dest, 'wb')
+            except EnvironmentError:
+                if is_reserved_filename(os.path.basename(dest)):
+                    raise ValueError('This ZIP file contains a file with a reserved filename'
+                            ' that cannot be processed on Windows: {}'.format(os.path.basename(dest)))
+                raise
+            with df:
+                if header.compression_method == ZIP_STORED:
+                    copy_stored_file(f, header.compressed_size, df)
+                else:
+                    copy_compressed_file(f, header.compressed_size, df)
+        else:
+            f.seek(f.tell()+seekval)
+
+    if not found:
+        raise ValueError('Not a ZIP file')
+
+
+def extractall(path_or_stream, path=None):
+    f = path_or_stream
+    close_at_end = False
+    if not hasattr(f, 'read'):
+        f = open(f, 'rb')
+        close_at_end = True
+    if path is None:
+        path = getcwd()
+    pos = f.tell()
+    try:
+        _extractall(f, path)
+    finally:
+        f.seek(pos)
+        if close_at_end:
+            f.close()
+
+
+class LocalZipFile(object):
+
+    def __init__(self, stream):
+        self.file_info = OrderedDict()
+        _extractall(stream, file_info=self.file_info)
+        self.stream = stream
+
+    def _get_file_info(self, name):
+        fi = self.file_info.get(name)
+        if fi is None:
+            raise ValueError('This ZIP container has no file named: %s'%name)
+        return fi
+
+    def open(self, name, spool_size=5*1024*1024):
+        if isinstance(name, LocalHeader):
+            name = name.filename
+        offset, header = self._get_file_info(name)
+        self.stream.seek(offset)
+        dest = SpooledTemporaryFile(max_size=spool_size)
+
+        if header.compression_method == ZIP_STORED:
+            copy_stored_file(self.stream, header.compressed_size, dest)
+        else:
+            copy_compressed_file(self.stream, header.compressed_size, dest)
+        dest.seek(0)
+        return dest
+
+    def getinfo(self, name):
+        offset, header = self._get_file_info(name)
+        return header
+
+    def read(self, name, spool_size=5*1024*1024):
+        with self.open(name, spool_size=spool_size) as f:
+            return f.read()
+
+    def extractall(self, path=None):
+        self.stream.seek(0)
+        _extractall(self.stream, path=(path or getcwd()))
+
+    def close(self):
+        pass
+
+    def safe_replace(self, name, datastream, extra_replacements={},
+        add_missing=False):
+        from calibre.utils.zipfile import ZipFile, ZipInfo
+        replacements = {name:datastream}
+        replacements.update(extra_replacements)
+        names = frozenset(list(replacements.keys()))
+        found = set()
+
+        def rbytes(name):
+            r = replacements[name]
+            if not isinstance(r, bytes):
+                r = r.read()
+            return r
+
+        with SpooledTemporaryFile(max_size=100*1024*1024) as temp:
+            ztemp = ZipFile(temp, 'w')
+            for offset, header in itervalues(self.file_info):
+                if header.filename in names:
+                    zi = ZipInfo(header.filename)
+                    zi.compress_type = header.compression_method
+                    ztemp.writestr(zi, rbytes(header.filename))
+                    found.add(header.filename)
+                else:
+                    ztemp.writestr(header.filename, self.read(header.filename,
+                        spool_size=0))
+            if add_missing:
+                for name in names - found:
+                    ztemp.writestr(name, rbytes(name))
+            ztemp.close()
+            zipstream = self.stream
+            temp.seek(0)
+            zipstream.seek(0)
+            zipstream.truncate()
+            shutil.copyfileobj(temp, zipstream)
+            zipstream.flush()
+
+
+if __name__ == '__main__':
+    extractall(sys.argv[-1])
--- a/ebook_converter/utils/lock.py
+++ b/ebook_converter/utils/lock.py
@@ -0,0 +1,205 @@
+#!/usr/bin/env python2
+# vim:fileencoding=utf-8
+# License: GPLv3 Copyright: 2017, Kovid Goyal <kovid at kovidgoyal.net>
+
+from __future__ import absolute_import, division, print_function, unicode_literals
+
+import atexit
+import errno
+import os
+import stat
+import tempfile
+import time
+from functools import partial
+
+from calibre.constants import (
+    __appname__, fcntl, filesystem_encoding, islinux, isosx, iswindows, plugins, ispy3
+)
+from calibre.utils.monotonic import monotonic
+
+speedup = plugins['speedup'][0]
+if iswindows:
+    import msvcrt, win32file, pywintypes, winerror, win32api, win32event
+    from calibre.constants import get_windows_username
+    excl_file_mode = stat.S_IREAD | stat.S_IWRITE
+else:
+    excl_file_mode = stat.S_IRUSR | stat.S_IWUSR | stat.S_IRGRP | stat.S_IROTH
+
+
+def unix_open(path):
+    flags = os.O_RDWR | os.O_CREAT
+    has_cloexec = False
+    if hasattr(speedup, 'O_CLOEXEC'):
+        try:
+            fd = os.open(path, flags | speedup.O_CLOEXEC, excl_file_mode)
+            has_cloexec = True
+        except EnvironmentError as err:
+            # Kernel may not support O_CLOEXEC
+            if err.errno != errno.EINVAL:
+                raise
+
+    if not has_cloexec:
+        fd = os.open(path, flags, excl_file_mode)
+        fcntl.fcntl(fd, fcntl.F_SETFD, fcntl.FD_CLOEXEC)
+    return os.fdopen(fd, 'r+b')
+
+
+def unix_retry(err):
+    return err.errno in (errno.EACCES, errno.EAGAIN, errno.ENOLCK, errno.EINTR)
+
+
+def windows_open(path):
+    if isinstance(path, bytes):
+        path = path.decode('mbcs')
+    try:
+        h = win32file.CreateFileW(
+            path,
+            win32file.GENERIC_READ |
+            win32file.GENERIC_WRITE,  # Open for reading and writing
+            0,  # Open exclusive
+            None,  # No security attributes, ensures handle is not inherited by children
+            win32file.OPEN_ALWAYS,  # If file does not exist, create it
+            win32file.FILE_ATTRIBUTE_NORMAL,  # Normal attributes
+            None,  # No template file
+        )
+    except pywintypes.error as err:
+        raise WindowsError(err[0], err[2], path)
+    fd = msvcrt.open_osfhandle(h.Detach(), 0)
+    return os.fdopen(fd, 'r+b')
+
+
+def windows_retry(err):
+    return err.winerror in (
+        winerror.ERROR_SHARING_VIOLATION, winerror.ERROR_LOCK_VIOLATION
+    )
+
+
+def retry_for_a_time(timeout, sleep_time, func, error_retry, *args):
+    limit = monotonic() + timeout
+    while True:
+        try:
+            return func(*args)
+        except EnvironmentError as err:
+            if not error_retry(err) or monotonic() > limit:
+                raise
+        time.sleep(sleep_time)
+
+
+def lock_file(path, timeout=15, sleep_time=0.2):
+    if iswindows:
+        return retry_for_a_time(
+            timeout, sleep_time, windows_open, windows_retry, path
+        )
+    f = unix_open(path)
+    retry_for_a_time(
+        timeout, sleep_time, fcntl.flock, unix_retry,
+        f.fileno(), fcntl.LOCK_EX | fcntl.LOCK_NB
+    )
+    return f
+
+
+class ExclusiveFile(object):
+
+    def __init__(self, path, timeout=15, sleep_time=0.2):
+        if iswindows and isinstance(path, bytes):
+            path = path.decode(filesystem_encoding)
+        self.path = path
+        self.timeout = timeout
+        self.sleep_time = sleep_time
+
+    def __enter__(self):
+        self.file = lock_file(self.path, self.timeout, self.sleep_time)
+        return self.file
+
+    def __exit__(self, type, value, traceback):
+        self.file.close()
+
+
+def _clean_lock_file(file_obj):
+    try:
+        os.remove(file_obj.name)
+    except EnvironmentError:
+        pass
+    try:
+        file_obj.close()
+    except EnvironmentError:
+        pass
+
+
+if iswindows:
+
+    def create_single_instance_mutex(name, per_user=True):
+        mutexname = '{}-singleinstance-{}-{}'.format(
+            __appname__, (get_windows_username() if per_user else ''), name
+        )
+        mutex = win32event.CreateMutex(None, False, mutexname)
+        if not mutex:
+            return
+        err = win32api.GetLastError()
+        if err == winerror.ERROR_ALREADY_EXISTS:
+            # Close this handle other wise this handle will prevent the mutex
+            # from being deleted when the process that created it exits.
+            win32api.CloseHandle(mutex)
+            return
+        return partial(win32api.CloseHandle, mutex)
+
+elif islinux:
+
+    def create_single_instance_mutex(name, per_user=True):
+        import socket
+        from calibre.utils.ipc import eintr_retry_call
+        name = '%s-singleinstance-%s-%s' % (
+            __appname__, (os.geteuid() if per_user else ''), name
+        )
+        name = name
+        address = '\0' + name.replace(' ', '_')
+        if not ispy3:
+            address = address.encode('utf-8')
+        sock = socket.socket(family=socket.AF_UNIX)
+        try:
+            eintr_retry_call(sock.bind, address)
+        except socket.error as err:
+            if getattr(err, 'errno', None) == errno.EADDRINUSE:
+                return
+            raise
+        fd = sock.fileno()
+        old_flags = fcntl.fcntl(fd, fcntl.F_GETFD)
+        fcntl.fcntl(fd, fcntl.F_SETFD, old_flags | fcntl.FD_CLOEXEC)
+        return sock.close
+
+else:
+
+    def singleinstance_path(name, per_user=True):
+        name = '%s-singleinstance-%s-%s.lock' % (
+            __appname__, (os.geteuid() if per_user else ''), name
+        )
+        home = os.path.expanduser('~')
+        locs = ['/var/lock', home, tempfile.gettempdir()]
+        if isosx:
+            locs.insert(0, '/Library/Caches')
+        for loc in locs:
+            if os.access(loc, os.W_OK | os.R_OK | os.X_OK):
+                return os.path.join(loc, ('.' if loc is home else '') + name)
+        raise EnvironmentError(
+            'Failed to find a suitable filesystem location for the lock file'
+        )
+
+    def create_single_instance_mutex(name, per_user=True):
+        from calibre.utils.ipc import eintr_retry_call
+        path = singleinstance_path(name, per_user)
+        f = lopen(path, 'w')
+        try:
+            eintr_retry_call(fcntl.lockf, f.fileno(), fcntl.LOCK_EX | fcntl.LOCK_NB)
+            return partial(_clean_lock_file, f)
+        except EnvironmentError as err:
+            if err.errno not in (errno.EAGAIN, errno.EACCES):
+                raise
+
+
+def singleinstance(name):
+    ' Ensure that only a single process holding exists with the specified mutex key '
+    release_mutex = create_single_instance_mutex(name)
+    if release_mutex is None:
+        return False
+    atexit.register(release_mutex)
+    return True
--- a/ebook_converter/utils/logging.py
+++ b/ebook_converter/utils/logging.py
@@ -0,0 +1,275 @@
+from __future__ import absolute_import, division, print_function, unicode_literals
+__license__ = 'GPL 3'
+__copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>'
+__docformat__ = 'restructuredtext en'
+
+'A simplified logging system'
+
+DEBUG = 0
+INFO  = 1
+WARN  = 2
+ERROR = 3
+
+import sys, traceback, io
+from functools import partial
+from threading import Lock
+
+from calibre import isbytestring, force_unicode, as_unicode, prints
+from polyglot.builtins import unicode_type, iteritems
+
+
+class Stream(object):
+
+    def __init__(self, stream=None):
+        if stream is None:
+            stream = io.BytesIO()
+        self.stream = getattr(stream, 'buffer', stream)
+        self._prints = partial(prints, safe_encode=True, file=stream)
+
+    def flush(self):
+        self.stream.flush()
+
+    def prints(self, level, *args, **kwargs):
+        self._prints(*args, **kwargs)
+
+
+class ANSIStream(Stream):
+
+    def __init__(self, stream=sys.stdout):
+        Stream.__init__(self, stream)
+        self.color = {
+            DEBUG: u'green',
+            INFO: None,
+            WARN: u'yellow',
+            ERROR: u'red',
+        }
+
+    def prints(self, level, *args, **kwargs):
+        from calibre.utils.terminal import ColoredStream
+        with ColoredStream(self.stream, self.color[level]):
+            self._prints(*args, **kwargs)
+
+    def flush(self):
+        self.stream.flush()
+
+
+class FileStream(Stream):
+
+    def __init__(self, stream=None):
+        Stream.__init__(self, stream)
+
+    def prints(self, level, *args, **kwargs):
+        self._prints(*args, **kwargs)
+
+
+class HTMLStream(Stream):
+
+    color = {
+        DEBUG: b'<span style="color:green">',
+        INFO: b'<span>',
+        WARN: b'<span style="color:blue">',
+        ERROR: b'<span style="color:red">'
+    }
+    normal = b'</span>'
+
+    def __init__(self, stream=sys.stdout):
+        Stream.__init__(self, stream)
+
+    def prints(self, level, *args, **kwargs):
+        self.stream.write(self.color[level])
+        kwargs['file'] = self.stream
+        self._prints(*args, **kwargs)
+        self.stream.write(self.normal)
+
+    def flush(self):
+        self.stream.flush()
+
+
+class UnicodeHTMLStream(HTMLStream):
+
+    color = {k: v.decode('ascii') for k, v in iteritems(HTMLStream.color)}
+    normal = HTMLStream.normal.decode('ascii')
+
+    def __init__(self):
+        self.clear()
+
+    def flush(self):
+        pass
+
+    def prints(self, level, *args, **kwargs):
+        col = self.color[level]
+        if col != self.last_col:
+            if self.data:
+                self.data.append(self.normal)
+            self.data.append(col)
+            self.last_col = col
+
+        sep  = kwargs.get(u'sep', u' ')
+        end  = kwargs.get(u'end', u'\n')
+
+        for arg in args:
+            if isbytestring(arg):
+                arg = force_unicode(arg)
+            elif not isinstance(arg, unicode_type):
+                arg = as_unicode(arg)
+            self.data.append(arg+sep)
+            self.plain_text.append(arg+sep)
+        self.data.append(end)
+        self.plain_text.append(end)
+
+    def clear(self):
+        self.data = []
+        self.plain_text = []
+        self.last_col = self.color[INFO]
+
+    @property
+    def html(self):
+        end = self.normal if self.data else u''
+        return u''.join(self.data) + end
+
+    def dump(self):
+        return [self.data, self.plain_text, self.last_col]
+
+    def load(self, dump):
+        self.data, self.plain_text, self.last_col = dump
+
+    def append_dump(self, dump):
+        d, p, lc = dump
+        self.data.extend(d)
+        self.plain_text.extend(p)
+        self.last_col = lc
+
+
+class Log(object):
+
+    DEBUG = DEBUG
+    INFO  = INFO
+    WARN  = WARN
+    ERROR = ERROR
+
+    def __init__(self, level=INFO):
+        self.filter_level = level
+        default_output = ANSIStream()
+        self.outputs = [default_output]
+
+        self.debug = partial(self.print_with_flush, DEBUG)
+        self.info  = partial(self.print_with_flush, INFO)
+        self.warn  = self.warning = partial(self.print_with_flush, WARN)
+        self.error = partial(self.print_with_flush, ERROR)
+
+    def prints(self, level, *args, **kwargs):
+        if level < self.filter_level:
+            return
+        for output in self.outputs:
+            output.prints(level, *args, **kwargs)
+
+    def print_with_flush(self, level, *args, **kwargs):
+        if level < self.filter_level:
+            return
+        for output in self.outputs:
+            output.prints(level, *args, **kwargs)
+        self.flush()
+
+    def exception(self, *args, **kwargs):
+        limit = kwargs.pop('limit', None)
+        self.print_with_flush(ERROR, *args, **kwargs)
+        self.print_with_flush(DEBUG, traceback.format_exc(limit))
+
+    def __call__(self, *args, **kwargs):
+        self.info(*args, **kwargs)
+
+    def __enter__(self):
+        self.orig_filter_level = self.filter_level
+        self.filter_level = self.ERROR + 100
+
+    def __exit__(self, *args):
+        self.filter_level = self.orig_filter_level
+
+    def flush(self):
+        for o in self.outputs:
+            if hasattr(o, 'flush'):
+                o.flush()
+
+    def close(self):
+        for o in self.outputs:
+            if hasattr(o, 'close'):
+                o.close()
+
+
+class DevNull(Log):
+
+    def __init__(self):
+        Log.__init__(self, level=Log.ERROR)
+        self.outputs = []
+
+
+class ThreadSafeLog(Log):
+    exception_traceback_level = Log.DEBUG
+
+    def __init__(self, level=Log.INFO):
+        Log.__init__(self, level=level)
+        self._lock = Lock()
+
+    def prints(self, *args, **kwargs):
+        with self._lock:
+            Log.prints(self, *args, **kwargs)
+
+    def print_with_flush(self, *args, **kwargs):
+        with self._lock:
+            Log.print_with_flush(self, *args, **kwargs)
+
+    def exception(self, *args, **kwargs):
+        limit = kwargs.pop('limit', None)
+        with self._lock:
+            Log.print_with_flush(self, ERROR, *args, **kwargs)
+            Log.print_with_flush(self, self.exception_traceback_level, traceback.format_exc(limit))
+
+
+class ThreadSafeWrapper(Log):
+
+    def __init__(self, other_log):
+        Log.__init__(self, level=other_log.filter_level)
+        self.outputs = list(other_log.outputs)
+        self._lock = Lock()
+
+    def prints(self, *args, **kwargs):
+        with self._lock:
+            Log.prints(self, *args, **kwargs)
+
+    def print_with_flush(self, *args, **kwargs):
+        with self._lock:
+            Log.print_with_flush(self, *args, **kwargs)
+
+
+class GUILog(ThreadSafeLog):
+
+    '''
+    Logs in HTML and plain text as unicode. Ideal for display in a GUI context.
+    '''
+
+    def __init__(self):
+        ThreadSafeLog.__init__(self, level=self.DEBUG)
+        self.outputs = [UnicodeHTMLStream()]
+
+    def clear(self):
+        self.outputs[0].clear()
+
+    @property
+    def html(self):
+        return self.outputs[0].html
+
+    @property
+    def plain_text(self):
+        return u''.join(self.outputs[0].plain_text)
+
+    def dump(self):
+        return self.outputs[0].dump()
+
+    def load(self, dump):
+        return self.outputs[0].load(dump)
+
+    def append_dump(self, dump):
+        return self.outputs[0].append_dump(dump)
+
+
+default_log = Log()
--- a/ebook_converter/utils/monotonic.py
+++ b/ebook_converter/utils/monotonic.py
@@ -0,0 +1,13 @@
+# vim:fileencoding=utf-8
+from __future__ import absolute_import, division, print_function, unicode_literals
+
+try:
+    from time import monotonic
+except ImportError:
+    from calibre.constants import plugins
+
+    monotonicp, err = plugins['monotonic']
+    if err:
+        raise RuntimeError('Failed to load the monotonic module with error: ' + err)
+    monotonic = monotonicp.monotonic
+    del monotonicp, err
--- a/ebook_converter/utils/mreplace.py
+++ b/ebook_converter/utils/mreplace.py
@@ -0,0 +1,44 @@
+# multiple replace from dictionnary : http://code.activestate.com/recipes/81330/
+from __future__ import unicode_literals
+__license__   = 'GPL v3'
+__copyright__ = '2010, sengian <sengian1 @ gmail.com>'
+__docformat__ = 'restructuredtext en'
+
+import re
+try:
+    from collections import UserDict
+except ImportError:
+    from UserDict import UserDict
+
+
+class MReplace(UserDict):
+
+    def __init__(self, data=None, case_sensitive=True):
+        UserDict.__init__(self, data)
+        self.re = None
+        self.regex = None
+        self.case_sensitive = case_sensitive
+        self.compile_regex()
+
+    def compile_regex(self):
+        if len(self.data) > 0:
+            keys = sorted(self.data, key=len, reverse=True)
+            if isinstance(keys[0], bytes):
+                tmp = b"(%s)" % b"|".join(map(re.escape, keys))
+            else:
+                tmp = "(%s)" % "|".join(map(re.escape, keys))
+            if self.re != tmp:
+                self.re = tmp
+                if self.case_sensitive:
+                    self.regex = re.compile(self.re)
+                else:
+                    self.regex = re.compile(self.re, re.I)
+
+    def __call__(self, mo):
+        return self[mo.string[mo.start():mo.end()]]
+
+    def mreplace(self, text):
+        # Replace without regex compile
+        if len(self.data) < 1 or self.re is None:
+            return text
+        return self.regex.sub(self, text)
--- a/ebook_converter/utils/resources.py
+++ b/ebook_converter/utils/resources.py
@@ -0,0 +1,106 @@
+#!/usr/bin/env python2
+# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
+from __future__ import absolute_import, division, print_function, unicode_literals
+
+__license__   = 'GPL v3'
+__copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>'
+__docformat__ = 'restructuredtext en'
+
+
+import sys, os
+
+from calibre import config_dir
+from polyglot.builtins import builtins
+
+
+user_dir = os.path.join(config_dir, 'resources')
+
+
+class PathResolver(object):
+
+    def __init__(self):
+        self.locations = [sys.resources_location]
+        self.cache = {}
+
+        def suitable(path):
+            try:
+                return os.path.exists(path) and os.path.isdir(path) and \
+                       os.listdir(path)
+            except:
+                pass
+            return False
+
+        self.default_path = sys.resources_location
+
+        dev_path = os.environ.get('CALIBRE_DEVELOP_FROM', None)
+        self.using_develop_from = False
+        if dev_path is not None:
+            dev_path = os.path.join(os.path.abspath(
+                os.path.dirname(dev_path)), 'resources')
+            if suitable(dev_path):
+                self.locations.insert(0, dev_path)
+                self.default_path = dev_path
+                self.using_develop_from = True
+
+        self.user_path = None
+        if suitable(user_dir):
+            self.locations.insert(0, user_dir)
+            self.user_path = user_dir
+
+    def __call__(self, path, allow_user_override=True):
+        path = path.replace(os.sep, '/')
+        key = (path, allow_user_override)
+        ans = self.cache.get(key, None)
+        if ans is None:
+            for base in self.locations:
+                if not allow_user_override and base == self.user_path:
+                    continue
+                fpath = os.path.join(base, *path.split('/'))
+                if os.path.exists(fpath):
+                    ans = fpath
+                    break
+
+            if ans is None:
+                ans = os.path.join(self.default_path, *path.split('/'))
+
+            self.cache[key] = ans
+
+        return ans
+
+    def set_data(self, path, data=None):
+        self.cache.pop((path, True), None)
+        fpath = os.path.join(user_dir, *path.split('/'))
+        if data is None:
+            if os.path.exists(fpath):
+                os.remove(fpath)
+        else:
+            base = os.path.dirname(fpath)
+            if not os.path.exists(base):
+                os.makedirs(base)
+            with open(fpath, 'wb') as f:
+                f.write(data)
+
+
+_resolver = PathResolver()
+
+
+def get_path(path, data=False, allow_user_override=True):
+    fpath = _resolver(path, allow_user_override=allow_user_override)
+    if data:
+        with open(fpath, 'rb') as f:
+            return f.read()
+    return fpath
+
+
+def get_image_path(path, data=False, allow_user_override=True):
+    if not path:
+        return get_path('images', allow_user_override=allow_user_override)
+    return get_path('images/'+path, data=data, allow_user_override=allow_user_override)
+
+
+def set_data(path, data=None):
+    return _resolver.set_data(path, data)
+
+
+builtins.__dict__['P'] = get_path
+builtins.__dict__['I'] = get_image_path
--- a/ebook_converter/utils/serialize.py
+++ b/ebook_converter/utils/serialize.py
@@ -0,0 +1,139 @@
+#!/usr/bin/env python2
+# vim:fileencoding=utf-8
+# License: GPLv3 Copyright: 2017, Kovid Goyal <kovid at kovidgoyal.net>
+
+from __future__ import absolute_import, division, print_function, unicode_literals
+
+from polyglot.builtins import unicode_type
+from calibre.constants import ispy3
+
+
+MSGPACK_MIME = 'application/x-msgpack'
+CANARY = 'jPoAv3zOyHvQ5JFNYg4hJ9'
+
+
+def encoded(typ, data, ExtType):
+    if ExtType is None:
+        return {CANARY: typ, 'v': data}
+    return ExtType(typ, msgpack_dumps(data))
+
+
+def create_encoder(for_json=False):
+    from datetime import datetime
+    ExtType = None
+    if not for_json:
+        import msgpack
+        ExtType = msgpack.ExtType
+
+    def encoder(obj):
+        if isinstance(obj, datetime):
+            return encoded(0, unicode_type(obj.isoformat()), ExtType)
+        if isinstance(obj, (set, frozenset)):
+            return encoded(1, tuple(obj), ExtType)
+        if getattr(obj, '__calibre_serializable__', False):
+            from calibre.ebooks.metadata.book.base import Metadata
+            from calibre.library.field_metadata import FieldMetadata, fm_as_dict
+            from calibre.db.categories import Tag
+            if isinstance(obj, Metadata):
+                from calibre.ebooks.metadata.book.serialize import metadata_as_dict
+                return encoded(
+                    2, metadata_as_dict(obj, encode_cover_data=for_json), ExtType
+                )
+            elif isinstance(obj, FieldMetadata):
+                return encoded(3, fm_as_dict(obj), ExtType)
+            elif isinstance(obj, Tag):
+                return encoded(4, obj.as_dict(), ExtType)
+        if for_json and isinstance(obj, bytes):
+            return obj.decode('utf-8')
+        raise TypeError('Cannot serialize objects of type {}'.format(type(obj)))
+
+    return encoder
+
+
+def msgpack_dumps(obj):
+    import msgpack
+    return msgpack.packb(obj, default=create_encoder(), use_bin_type=True)
+
+
+def json_dumps(data, **kw):
+    import json
+    kw['default'] = create_encoder(for_json=True)
+    kw['ensure_ascii'] = False
+    ans = json.dumps(data, **kw)
+    if not isinstance(ans, bytes):
+        ans = ans.encode('utf-8')
+    return ans
+
+
+def decode_metadata(x, for_json):
+    from polyglot.binary import from_base64_bytes
+    from calibre.ebooks.metadata.book.serialize import metadata_from_dict
+    obj = metadata_from_dict(x)
+    if for_json and obj.cover_data and obj.cover_data[1]:
+        obj.cover_data = obj.cover_data[0], from_base64_bytes(obj.cover_data[1])
+    return obj
+
+
+def decode_field_metadata(x, for_json):
+    from calibre.library.field_metadata import fm_from_dict
+    return fm_from_dict(x)
+
+
+def decode_category_tag(x, for_json):
+    from calibre.db.categories import Tag
+    return Tag.from_dict(x)
+
+
+def decode_datetime(x, fj):
+    from calibre.utils.iso8601 import parse_iso8601
+    return parse_iso8601(x, assume_utc=True)
+
+
+decoders = (
+    decode_datetime,
+    lambda x, fj: set(x),
+    decode_metadata, decode_field_metadata, decode_category_tag
+)
+
+
+def json_decoder(obj):
+    typ = obj.get(CANARY)
+    if typ is None:
+        return obj
+    return decoders[typ](obj['v'], True)
+
+
+def msgpack_decoder(code, data):
+    return decoders[code](msgpack_loads(data), False)
+
+
+def msgpack_loads(dump, use_list=True):
+    # use_list controls whether msgpack arrays are unpacked as lists or tuples
+    import msgpack
+    return msgpack.unpackb(dump, ext_hook=msgpack_decoder, raw=False, use_list=use_list)
+
+
+def json_loads(data):
+    import json
+    return json.loads(data, object_hook=json_decoder)
+
+
+if ispy3:
+
+    def pickle_dumps(data):
+        import pickle
+        return pickle.dumps(data, -1)
+
+    def pickle_loads(dump):
+        import pickle
+        return pickle.loads(dump, encoding='utf-8')
+
+else:
+
+    def pickle_dumps(data):
+        import cPickle as pickle
+        return pickle.dumps(data, -1)
+
+    def pickle_loads(dump):
+        import cPickle as pickle
+        return pickle.loads(dump)
--- a/ebook_converter/utils/shared_file.py
+++ b/ebook_converter/utils/shared_file.py
@@ -0,0 +1,222 @@
+#!/usr/bin/env python2
+# vim:fileencoding=utf-8
+from __future__ import absolute_import, division, print_function, unicode_literals
+
+__license__ = 'GPL v3'
+__copyright__ = '2015, Kovid Goyal <kovid at kovidgoyal.net>'
+
+import os, sys
+
+from polyglot.builtins import reraise
+
+from calibre.constants import iswindows, plugins, ispy3
+
+'''
+This module defines a share_open() function which is a replacement for
+python's builtin open() function.
+
+This replacement, opens 'shareable' files on all platforms. That is files that
+can be read from and written to and deleted at the same time by multiple
+processes. All file handles are non-inheritable, as in Python 3, but unlike,
+Python 2. Non-inheritance is atomic.
+
+Caveats on windows: On windows sharing is co-operative, i.e. it only works if
+all processes involved open the file with share_open(). Also while you can
+delete a file that is open, you cannot open a new file with the same filename
+until all open file handles are closed. You also cannot delete the containing
+directory until all file handles are closed. To get around this, rename the
+file before deleting it.
+'''
+
+speedup, err = plugins['speedup']
+
+if not speedup:
+    raise RuntimeError('Failed to load the speedup plugin with error: %s' % err)
+
+valid_modes = {'a', 'a+', 'a+b', 'ab', 'r', 'rb', 'r+', 'r+b', 'w', 'wb', 'w+', 'w+b'}
+
+
+def validate_mode(mode):
+    return mode in valid_modes
+
+
+class FlagConstants(object):
+
+    def __init__(self):
+        for x in 'APPEND CREAT TRUNC EXCL RDWR RDONLY WRONLY'.split():
+            x = 'O_' + x
+            setattr(self, x, getattr(os, x))
+        for x in 'RANDOM SEQUENTIAL TEXT BINARY'.split():
+            x = 'O_' + x
+            setattr(self, x, getattr(os, x, 0))
+
+
+fc = FlagConstants()
+
+
+def flags_from_mode(mode):
+    if not validate_mode(mode):
+        raise ValueError('The mode is invalid')
+    m = mode[0]
+    random = '+' in mode
+    binary = 'b' in mode
+    if m == 'a':
+        flags = fc.O_APPEND | fc.O_CREAT
+        if random:
+            flags |= fc.O_RDWR | fc.O_RANDOM
+        else:
+            flags |= fc.O_WRONLY | fc.O_SEQUENTIAL
+    elif m == 'r':
+        if random:
+            flags = fc.O_RDWR | fc.O_RANDOM
+        else:
+            flags = fc.O_RDONLY | fc.O_SEQUENTIAL
+    elif m == 'w':
+        if random:
+            flags = fc.O_RDWR | fc.O_RANDOM
+        else:
+            flags = fc.O_WRONLY | fc.O_SEQUENTIAL
+        flags |= fc.O_TRUNC | fc.O_CREAT
+    flags |= (fc.O_BINARY if binary else fc.O_TEXT)
+    return flags
+
+
+if iswindows:
+    from numbers import Integral
+    import msvcrt
+    import win32file, pywintypes
+    CREATE_NEW                  = win32file.CREATE_NEW
+    CREATE_ALWAYS               = win32file.CREATE_ALWAYS
+    OPEN_EXISTING               = win32file.OPEN_EXISTING
+    OPEN_ALWAYS                 = win32file.OPEN_ALWAYS
+    TRUNCATE_EXISTING           = win32file.TRUNCATE_EXISTING
+    FILE_SHARE_READ             = win32file.FILE_SHARE_READ
+    FILE_SHARE_WRITE            = win32file.FILE_SHARE_WRITE
+    FILE_SHARE_DELETE           = win32file.FILE_SHARE_DELETE
+    FILE_SHARE_VALID_FLAGS      = FILE_SHARE_READ | FILE_SHARE_WRITE | FILE_SHARE_DELETE
+    FILE_ATTRIBUTE_READONLY     = win32file.FILE_ATTRIBUTE_READONLY
+    FILE_ATTRIBUTE_NORMAL       = win32file.FILE_ATTRIBUTE_NORMAL
+    FILE_ATTRIBUTE_TEMPORARY    = win32file.FILE_ATTRIBUTE_TEMPORARY
+    FILE_FLAG_DELETE_ON_CLOSE   = win32file.FILE_FLAG_DELETE_ON_CLOSE
+    FILE_FLAG_SEQUENTIAL_SCAN   = win32file.FILE_FLAG_SEQUENTIAL_SCAN
+    FILE_FLAG_RANDOM_ACCESS     = win32file.FILE_FLAG_RANDOM_ACCESS
+    GENERIC_READ                = win32file.GENERIC_READ & 0xffffffff
+    GENERIC_WRITE               = win32file.GENERIC_WRITE & 0xffffffff
+    DELETE                      = 0x00010000
+
+    _ACCESS_MASK = os.O_RDONLY | os.O_WRONLY | os.O_RDWR
+    _ACCESS_MAP  = {
+        os.O_RDONLY : GENERIC_READ,
+        os.O_WRONLY : GENERIC_WRITE,
+        os.O_RDWR   : GENERIC_READ | GENERIC_WRITE
+    }
+
+    _CREATE_MASK = os.O_CREAT | os.O_EXCL | os.O_TRUNC
+    _CREATE_MAP  = {
+        0                                   : OPEN_EXISTING,
+        os.O_EXCL                           : OPEN_EXISTING,
+        os.O_CREAT                          : OPEN_ALWAYS,
+        os.O_CREAT | os.O_EXCL              : CREATE_NEW,
+        os.O_CREAT | os.O_TRUNC | os.O_EXCL : CREATE_NEW,
+        os.O_TRUNC                          : TRUNCATE_EXISTING,
+        os.O_TRUNC | os.O_EXCL              : TRUNCATE_EXISTING,
+        os.O_CREAT | os.O_TRUNC             : CREATE_ALWAYS
+    }
+
+    def raise_winerror(pywinerr):
+        reraise(
+            WindowsError,
+            WindowsError(pywinerr.winerror,
+                         (pywinerr.funcname or '') + b': ' + (pywinerr.strerror or '')),
+            sys.exc_info()[2])
+
+    def os_open(path, flags, mode=0o777, share_flags=FILE_SHARE_VALID_FLAGS):
+        '''
+        Replacement for os.open() allowing moving or unlinking before closing
+        '''
+        if not isinstance(flags, Integral):
+            raise TypeError('flags must be an integer')
+        if not isinstance(mode, Integral):
+            raise TypeError('mode must be an integer')
+
+        if share_flags & ~FILE_SHARE_VALID_FLAGS:
+            raise ValueError('bad share_flags: %r' % share_flags)
+
+        access_flags = _ACCESS_MAP[flags & _ACCESS_MASK]
+        create_flags = _CREATE_MAP[flags & _CREATE_MASK]
+        attrib_flags = FILE_ATTRIBUTE_NORMAL
+
+        if flags & os.O_CREAT and mode & ~0o444 == 0:
+            attrib_flags = FILE_ATTRIBUTE_READONLY
+
+        if flags & os.O_TEMPORARY:
+            share_flags |= FILE_SHARE_DELETE
+            attrib_flags |= FILE_FLAG_DELETE_ON_CLOSE
+            access_flags |= DELETE
+
+        if flags & os.O_SHORT_LIVED:
+            attrib_flags |= FILE_ATTRIBUTE_TEMPORARY
+
+        if flags & os.O_SEQUENTIAL:
+            attrib_flags |= FILE_FLAG_SEQUENTIAL_SCAN
+
+        if flags & os.O_RANDOM:
+            attrib_flags |= FILE_FLAG_RANDOM_ACCESS
+
+        try:
+            h = win32file.CreateFileW(
+                path, access_flags, share_flags, None, create_flags, attrib_flags, None)
+        except pywintypes.error as e:
+            raise_winerror(e)
+        ans = msvcrt.open_osfhandle(h, flags | os.O_NOINHERIT)
+        h.Detach()  # We dont want the handle to be automatically closed when h is deleted
+        return ans
+
+    def share_open(path, mode='r', buffering=-1):
+        flags = flags_from_mode(mode)
+        return speedup.fdopen(os_open(path, flags), path, mode, buffering)
+
+else:
+    if ispy3:
+        # See PEP 446
+        share_open = open
+    else:
+        def share_open(path, mode='r', buffering=-1):
+            flags = flags_from_mode(mode) | speedup.O_CLOEXEC
+            return speedup.fdopen(os.open(path, flags), path, mode, buffering)
+
+    def raise_winerror(x):
+        reraise(NotImplementedError, None, sys.exc_info()[2])
+
+
+def find_tests():
+    import unittest
+    from calibre.ptempfile import TemporaryDirectory
+
+    class SharedFileTest(unittest.TestCase):
+
+        def test_shared_file(self):
+            eq = self.assertEqual
+
+            with TemporaryDirectory() as tdir:
+                fname = os.path.join(tdir, 'test.txt')
+                with share_open(fname, 'wb') as f:
+                    f.write(b'a' * 20 * 1024)
+                    eq(fname, f.name)
+                f = share_open(fname, 'rb')
+                eq(f.read(1), b'a')
+                if iswindows:
+                    os.rename(fname, fname+'.moved')
+                    os.remove(fname+'.moved')
+                else:
+                    os.remove(fname)
+                eq(f.read(1), b'a')
+                f2 = share_open(fname, 'w+b')
+                f2.write(b'b' * 10 * 1024)
+                f2.seek(0)
+                eq(f.read(10000), b'a'*10000)
+                eq(f2.read(100), b'b' * 100)
+                f3 = share_open(fname, 'rb')
+                eq(f3.read(100), b'b' * 100)
+
+    return unittest.defaultTestLoader.loadTestsFromTestCase(SharedFileTest)
--- a/ebook_converter/utils/short_uuid.py
+++ b/ebook_converter/utils/short_uuid.py
@@ -0,0 +1,61 @@
+#!/usr/bin/env python2
+# vim:fileencoding=utf-8
+# License: GPLv3 Copyright: 2015, Kovid Goyal <kovid at kovidgoyal.net>
+from __future__ import absolute_import, division, print_function, unicode_literals
+
+'''
+Generate UUID encoded using a user specified alphabet.
+'''
+
+import string, math, uuid as _uuid
+
+from polyglot.builtins import unicode_type
+
+
+def num_to_string(number, alphabet, alphabet_len, pad_to_length=None):
+    ans = []
+    number = max(0, number)
+    while number:
+        number, digit = divmod(number, alphabet_len)
+        ans.append(alphabet[digit])
+    if pad_to_length is not None and pad_to_length > len(ans):
+        ans.append(alphabet[0] * (pad_to_length - len(ans)))
+    return ''.join(ans)
+
+
+def string_to_num(string, alphabet_map, alphabet_len):
+    ans = 0
+    for char in reversed(string):
+        ans = ans * alphabet_len + alphabet_map[char]
+    return ans
+
+
+class ShortUUID(object):
+
+    def __init__(self, alphabet=None):
+        # We do not include zero and one in the default alphabet as they can be
+        # confused with the letters O and I in some fonts. And removing them
+        # does not change the uuid_pad_len.
+        self.alphabet = tuple(sorted(unicode_type(alphabet or (string.digits + string.ascii_letters)[2:])))
+        self.alphabet_len = len(self.alphabet)
+        self.alphabet_map = {c:i for i, c in enumerate(self.alphabet)}
+        self.uuid_pad_len = int(math.ceil(math.log(1 << 128, self.alphabet_len)))
+
+    def uuid4(self, pad_to_length=None):
+        if pad_to_length is None:
+            pad_to_length = self.uuid_pad_len
+        return num_to_string(_uuid.uuid4().int, self.alphabet, self.alphabet_len, pad_to_length)
+
+    def uuid5(self, namespace, name, pad_to_length=None):
+        if pad_to_length is None:
+            pad_to_length = self.uuid_pad_len
+        return num_to_string(_uuid.uuid5(namespace, name).int, self.alphabet, self.alphabet_len, pad_to_length)
+
+    def decode(self, encoded):
+        return _uuid.UUID(int=string_to_num(encoded, self.alphabet_map, self.alphabet_len))
+
+
+_global_instance = ShortUUID()
+uuid4 = _global_instance.uuid4
+uuid5 = _global_instance.uuid5
+decode = _global_instance.decode
--- a/ebook_converter/utils/smartypants.py
+++ b/ebook_converter/utils/smartypants.py
@@ -0,0 +1,888 @@
+#!/usr/bin/python2
+# vim:fileencoding=utf-8
+
+from __future__ import absolute_import, division, print_function, unicode_literals
+
+__author__ = "Chad Miller <smartypantspy@chad.org>, Kovid Goyal <kovid at kovidgoyal.net>"
+__description__ = "Smart-quotes, smart-ellipses, and smart-dashes for weblog entries in pyblosxom"
+
+r"""
+==============
+smartypants.py
+==============
+
+----------------------------
+SmartyPants ported to Python
+----------------------------
+
+Ported by `Chad Miller`_
+Copyright (c) 2004, 2007 Chad Miller
+
+original `SmartyPants`_ by `John Gruber`_
+Copyright (c) 2003 John Gruber
+
+
+Synopsis
+========
+
+A smart-quotes plugin for Pyblosxom_.
+
+The priginal "SmartyPants" is a free web publishing plug-in for Movable Type,
+Blosxom, and BBEdit that easily translates plain ASCII punctuation characters
+into "smart" typographic punctuation HTML entities.
+
+This software, *smartypants.py*, endeavours to be a functional port of
+SmartyPants to Python, for use with Pyblosxom_.
+
+
+Description
+===========
+
+SmartyPants can perform the following transformations:
+
+- Straight quotes ( " and ' ) into "curly" quote HTML entities
+- Backticks-style quotes (\`\`like this'') into "curly" quote HTML entities
+- Dashes (``--`` and ``---``) into en- and em-dash entities
+- Three consecutive dots (``...`` or ``. . .``) into an ellipsis entity
+
+This means you can write, edit, and save your posts using plain old
+ASCII straight quotes, plain dashes, and plain dots, but your published
+posts (and final HTML output) will appear with smart quotes, em-dashes,
+and proper ellipses.
+
+SmartyPants does not modify characters within ``<pre>``, ``<code>``, ``<kbd>``,
+``<math>`` or ``<script>`` tag blocks. Typically, these tags are used to
+display text where smart quotes and other "smart punctuation" would not be
+appropriate, such as source code or example markup.
+
+
+Backslash Escapes
+=================
+
+If you need to use literal straight quotes (or plain hyphens and
+periods), SmartyPants accepts the following backslash escape sequences
+to force non-smart punctuation. It does so by transforming the escape
+sequence into a decimal-encoded HTML entity:
+
+(FIXME:  table here.)
+
+.. comment    It sucks that there's a disconnect between the visual layout and table markup when special characters are involved.
+.. comment ======  =====  =========
+.. comment Escape  Value  Character
+.. comment ======  =====  =========
+.. comment \\\\\\\\    &#92;  \\\\
+.. comment \\\\"     &#34;  "
+.. comment \\\\'     &#39;  '
+.. comment \\\\.     &#46;  .
+.. comment \\\\-     &#45;  \-
+.. comment \\\\`     &#96;  \`
+.. comment ======  =====  =========
+
+This is useful, for example, when you want to use straight quotes as
+foot and inch marks: 6'2" tall; a 17" iMac.
+
+Options
+=======
+
+For Pyblosxom users, the ``smartypants_attributes`` attribute is where you
+specify configuration options.
+
+Numeric values are the easiest way to configure SmartyPants' behavior:
+
+"0"
+    Suppress all transformations. (Do nothing.)
+"1"
+    Performs default SmartyPants transformations: quotes (including
+    \`\`backticks'' -style), em-dashes, and ellipses. "``--``" (dash dash)
+    is used to signify an em-dash; there is no support for en-dashes.
+
+"2"
+    Same as smarty_pants="1", except that it uses the old-school typewriter
+    shorthand for dashes:  "``--``" (dash dash) for en-dashes, "``---``"
+    (dash dash dash)
+    for em-dashes.
+
+"3"
+    Same as smarty_pants="2", but inverts the shorthand for dashes:
+    "``--``" (dash dash) for em-dashes, and "``---``" (dash dash dash) for
+    en-dashes.
+
+"-1"
+    Stupefy mode. Reverses the SmartyPants transformation process, turning
+    the HTML entities produced by SmartyPants into their ASCII equivalents.
+    E.g.  "&#8220;" is turned into a simple double-quote ("), "&#8212;" is
+    turned into two dashes, etc.
+
+
+The following single-character attribute values can be combined to toggle
+individual transformations from within the smarty_pants attribute. For
+example, to educate normal quotes and em-dashes, but not ellipses or
+\`\`backticks'' -style quotes:
+
+``py['smartypants_attributes'] = "1"``
+
+"q"
+    Educates normal quote characters: (") and (').
+
+"b"
+    Educates \`\`backticks'' -style double quotes.
+
+"B"
+    Educates \`\`backticks'' -style double quotes and \`single' quotes.
+
+"d"
+    Educates em-dashes.
+
+"D"
+    Educates em-dashes and en-dashes, using old-school typewriter shorthand:
+    (dash dash) for en-dashes, (dash dash dash) for em-dashes.
+
+"i"
+    Educates em-dashes and en-dashes, using inverted old-school typewriter
+    shorthand: (dash dash) for em-dashes, (dash dash dash) for en-dashes.
+
+"e"
+    Educates ellipses.
+
+"w"
+    Translates any instance of ``&quot;`` into a normal double-quote character.
+    This should be of no interest to most people, but of particular interest
+    to anyone who writes their posts using Dreamweaver, as Dreamweaver
+    inexplicably uses this entity to represent a literal double-quote
+    character. SmartyPants only educates normal quotes, not entities (because
+    ordinarily, entities are used for the explicit purpose of representing the
+    specific character they represent). The "w" option must be used in
+    conjunction with one (or both) of the other quote options ("q" or "b").
+    Thus, if you wish to apply all SmartyPants transformations (quotes, en-
+    and em-dashes, and ellipses) and also translate ``&quot;`` entities into
+    regular quotes so SmartyPants can educate them, you should pass the
+    following to the smarty_pants attribute:
+
+The ``smartypants_forbidden_flavours`` list contains pyblosxom flavours for
+which no Smarty Pants rendering will occur.
+
+
+Caveats
+=======
+
+Why You Might Not Want to Use Smart Quotes in Your Weblog
+---------------------------------------------------------
+
+For one thing, you might not care.
+
+Most normal, mentally stable individuals do not take notice of proper
+typographic punctuation. Many design and typography nerds, however, break
+out in a nasty rash when they encounter, say, a restaurant sign that uses
+a straight apostrophe to spell "Joe's".
+
+If you're the sort of person who just doesn't care, you might well want to
+continue not caring. Using straight quotes -- and sticking to the 7-bit
+ASCII character set in general -- is certainly a simpler way to live.
+
+Even if you I *do* care about accurate typography, you still might want to
+think twice before educating the quote characters in your weblog. One side
+effect of publishing curly quote HTML entities is that it makes your
+weblog a bit harder for others to quote from using copy-and-paste. What
+happens is that when someone copies text from your blog, the copied text
+contains the 8-bit curly quote characters (as well as the 8-bit characters
+for em-dashes and ellipses, if you use these options). These characters
+are not standard across different text encoding methods, which is why they
+need to be encoded as HTML entities.
+
+People copying text from your weblog, however, may not notice that you're
+using curly quotes, and they'll go ahead and paste the unencoded 8-bit
+characters copied from their browser into an email message or their own
+weblog. When pasted as raw "smart quotes", these characters are likely to
+get mangled beyond recognition.
+
+That said, my own opinion is that any decent text editor or email client
+makes it easy to stupefy smart quote characters into their 7-bit
+equivalents, and I don't consider it my problem if you're using an
+indecent text editor or email client.
+
+
+Algorithmic Shortcomings
+------------------------
+
+One situation in which quotes will get curled the wrong way is when
+apostrophes are used at the start of leading contractions. For example:
+
+``'Twas the night before Christmas.``
+
+In the case above, SmartyPants will turn the apostrophe into an opening
+single-quote, when in fact it should be a closing one. I don't think
+this problem can be solved in the general case -- every word processor
+I've tried gets this wrong as well. In such cases, it's best to use the
+proper HTML entity for closing single-quotes (``&#8217;``) by hand.
+
+
+Bugs
+====
+
+To file bug reports or feature requests (other than topics listed in the
+Caveats section above) please send email to: mailto:smartypantspy@chad.org
+
+If the bug involves quotes being curled the wrong way, please send example
+text to illustrate.
+
+To Do list
+----------
+
+- Provide a function for use within templates to quote anything at all.
+
+
+Version History
+===============
+
+1.5_1.6: Fri, 27 Jul 2007 07:06:40 -0400
+    - Fixed bug where blocks of precious unalterable text was instead
+      interpreted.  Thanks to Le Roux and Dirk van Oosterbosch.
+
+1.5_1.5: Sat, 13 Aug 2005 15:50:24 -0400
+    - Fix bogus magical quotation when there is no hint that the
+      user wants it, e.g., in "21st century".  Thanks to Nathan Hamblen.
+    - Be smarter about quotes before terminating numbers in an en-dash'ed
+      range.
+
+1.5_1.4: Thu, 10 Feb 2005 20:24:36 -0500
+    - Fix a date-processing bug, as reported by jacob childress.
+    - Begin a test-suite for ensuring correct output.
+    - Removed import of "string", since I didn't really need it.
+      (This was my first every Python program.  Sue me!)
+
+1.5_1.3: Wed, 15 Sep 2004 18:25:58 -0400
+    - Abort processing if the flavour is in forbidden-list.  Default of
+      [ "rss" ]   (Idea of Wolfgang SCHNERRING.)
+    - Remove stray virgules from en-dashes.  Patch by Wolfgang SCHNERRING.
+
+1.5_1.2: Mon, 24 May 2004 08:14:54 -0400
+    - Some single quotes weren't replaced properly.  Diff-tesuji played
+      by Benjamin GEIGER.
+
+1.5_1.1: Sun, 14 Mar 2004 14:38:28 -0500
+    - Support upcoming pyblosxom 0.9 plugin verification feature.
+
+1.5_1.0: Tue, 09 Mar 2004 08:08:35 -0500
+    - Initial release
+
+Version Information
+-------------------
+
+Version numbers will track the SmartyPants_ version numbers, with the addition
+of an underscore and the smartypants.py version on the end.
+
+New versions will be available at `http://wiki.chad.org/SmartyPantsPy`_
+
+.. _http://wiki.chad.org/SmartyPantsPy: http://wiki.chad.org/SmartyPantsPy
+
+Authors
+=======
+
+`John Gruber`_ did all of the hard work of writing this software in Perl for
+`Movable Type`_ and almost all of this useful documentation.  `Chad Miller`_
+ported it to Python to use with Pyblosxom_.
+
+
+Additional Credits
+==================
+
+Portions of the SmartyPants original work are based on Brad Choate's nifty
+MTRegex plug-in.  `Brad Choate`_ also contributed a few bits of source code to
+this plug-in.  Brad Choate is a fine hacker indeed.
+
+`Jeremy Hedley`_ and `Charles Wiltgen`_ deserve mention for exemplary beta
+testing of the original SmartyPants.
+
+`Rael Dornfest`_ ported SmartyPants to Blosxom.
+
+.. _Brad Choate: http://bradchoate.com/
+.. _Jeremy Hedley: http://antipixel.com/
+.. _Charles Wiltgen: http://playbacktime.com/
+.. _Rael Dornfest: http://raelity.org/
+
+
+Copyright and License
+=====================
+
+SmartyPants_ license::
+
+    Copyright (c) 2003 John Gruber
+    (https://daringfireball.net/)
+    All rights reserved.
+
+    Redistribution and use in source and binary forms, with or without
+    modification, are permitted provided that the following conditions are
+    met:
+
+    *   Redistributions of source code must retain the above copyright
+        notice, this list of conditions and the following disclaimer.
+
+    *   Redistributions in binary form must reproduce the above copyright
+        notice, this list of conditions and the following disclaimer in
+        the documentation and/or other materials provided with the
+        distribution.
+
+    *   Neither the name "SmartyPants" nor the names of its contributors
+        may be used to endorse or promote products derived from this
+        software without specific prior written permission.
+
+    This software is provided by the copyright holders and contributors "as
+    is" and any express or implied warranties, including, but not limited
+    to, the implied warranties of merchantability and fitness for a
+    particular purpose are disclaimed. In no event shall the copyright
+    owner or contributors be liable for any direct, indirect, incidental,
+    special, exemplary, or consequential damages (including, but not
+    limited to, procurement of substitute goods or services; loss of use,
+    data, or profits; or business interruption) however caused and on any
+    theory of liability, whether in contract, strict liability, or tort
+    (including negligence or otherwise) arising in any way out of the use
+    of this software, even if advised of the possibility of such damage.
+
+
+smartypants.py license::
+
+    smartypants.py is a derivative work of SmartyPants.
+
+    Redistribution and use in source and binary forms, with or without
+    modification, are permitted provided that the following conditions are
+    met:
+
+    *   Redistributions of source code must retain the above copyright
+        notice, this list of conditions and the following disclaimer.
+
+    *   Redistributions in binary form must reproduce the above copyright
+        notice, this list of conditions and the following disclaimer in
+        the documentation and/or other materials provided with the
+        distribution.
+
+    This software is provided by the copyright holders and contributors "as
+    is" and any express or implied warranties, including, but not limited
+    to, the implied warranties of merchantability and fitness for a
+    particular purpose are disclaimed. In no event shall the copyright
+    owner or contributors be liable for any direct, indirect, incidental,
+    special, exemplary, or consequential damages (including, but not
+    limited to, procurement of substitute goods or services; loss of use,
+    data, or profits; or business interruption) however caused and on any
+    theory of liability, whether in contract, strict liability, or tort
+    (including negligence or otherwise) arising in any way out of the use
+    of this software, even if advised of the possibility of such damage.
+
+
+
+.. _John Gruber: https://daringfireball.net/
+.. _Chad Miller: http://web.chad.org/
+
+.. _Pyblosxom: http://roughingit.subtlehints.net/pyblosxom
+.. _SmartyPants: https://daringfireball.net/projects/smartypants/
+.. _Movable Type: http://www.movabletype.org/
+
+"""
+
+import re
+
+# style added by Kovid
+tags_to_skip_regex = re.compile(r"<(/)?(style|pre|code|kbd|script|math)[^>]*>", re.I)
+self_closing_regex = re.compile(r'/\s*>$')
+
+
+# interal functions below here
+
+def parse_attr(attr):
+    do_dashes = do_backticks = do_quotes = do_ellipses = do_stupefy = 0
+
+    if attr == "1":
+        do_quotes    = 1
+        do_backticks = 1
+        do_dashes    = 1
+        do_ellipses  = 1
+    elif attr == "2":
+        # Do everything, turn all options on, use old school dash shorthand.
+        do_quotes    = 1
+        do_backticks = 1
+        do_dashes    = 2
+        do_ellipses  = 1
+    elif attr == "3":
+        # Do everything, turn all options on, use inverted old school dash shorthand.
+        do_quotes    = 1
+        do_backticks = 1
+        do_dashes    = 3
+        do_ellipses  = 1
+    elif attr == "-1":
+        # Special "stupefy" mode.
+        do_stupefy   = 1
+    else:
+        for c in attr:
+            if c == "q":
+                do_quotes = 1
+            elif c == "b":
+                do_backticks = 1
+            elif c == "B":
+                do_backticks = 2
+            elif c == "d":
+                do_dashes = 1
+            elif c == "D":
+                do_dashes = 2
+            elif c == "i":
+                do_dashes = 3
+            elif c == "e":
+                do_ellipses = 1
+            else:
+                pass
+                # ignore unknown option
+    return do_dashes, do_backticks, do_quotes, do_ellipses, do_stupefy
+
+
+def smartyPants(text, attr='1'):
+    # Parse attributes:
+    # 0 : do nothing
+    # 1 : set all
+    # 2 : set all, using old school en- and em- dash shortcuts
+    # 3 : set all, using inverted old school en and em- dash shortcuts
+    #
+    # q : quotes
+    # b : backtick quotes (``double'' only)
+    # B : backtick quotes (``double'' and `single')
+    # d : dashes
+    # D : old school dashes
+    # i : inverted old school dashes
+    # e : ellipses
+
+    if attr == "0":
+        # Do nothing.
+        return text
+
+    do_dashes, do_backticks, do_quotes, do_ellipses, do_stupefy = parse_attr(attr)
+    dashes_func = {1: educateDashes, 2: educateDashesOldSchool, 3: educateDashesOldSchoolInverted}.get(do_dashes, lambda x: x)
+    backticks_func = {1: educateBackticks, 2: lambda x: educateSingleBackticks(educateBackticks(x))}.get(do_backticks, lambda x: x)
+    ellipses_func = {1: educateEllipses}.get(do_ellipses, lambda x: x)
+    stupefy_func = {1: stupefyEntities}.get(do_stupefy, lambda x: x)
+    skipped_tag_stack = []
+    tokens = _tokenize(text)
+    result = []
+    in_pre = False
+
+    prev_token_last_char = ""
+    # This is a cheat, used to get some context
+    # for one-character tokens that consist of
+    # just a quote char. What we do is remember
+    # the last character of the previous text
+    # token, to use as context to curl single-
+    # character quote tokens correctly.
+
+    for cur_token in tokens:
+        if cur_token[0] == "tag":
+            # Don't mess with quotes inside some tags.  This does not handle self <closing/> tags!
+            result.append(cur_token[1])
+            skip_match = tags_to_skip_regex.match(cur_token[1])
+            if skip_match is not None:
+                is_self_closing = self_closing_regex.search(skip_match.group()) is not None
+                if not is_self_closing:
+                    if not skip_match.group(1):
+                        skipped_tag_stack.append(skip_match.group(2).lower())
+                        in_pre = True
+                    else:
+                        if len(skipped_tag_stack) > 0:
+                            if skip_match.group(2).lower() == skipped_tag_stack[-1]:
+                                skipped_tag_stack.pop()
+                            else:
+                                pass
+                                # This close doesn't match the open.  This isn't XHTML.  We should barf here.
+                        if len(skipped_tag_stack) == 0:
+                            in_pre = False
+        else:
+            t = cur_token[1]
+            last_char = t[-1:]  # Remember last char of this token before processing.
+            if not in_pre:
+                t = processEscapes(t)
+
+                t = re.sub('&quot;', '"', t)
+                t = dashes_func(t)
+                t = ellipses_func(t)
+                # Note: backticks need to be processed before quotes.
+                t = backticks_func(t)
+
+                if do_quotes != 0:
+                    if t == "'":
+                        # Special case: single-character ' token
+                        if re.match(r"\S", prev_token_last_char):
+                            t = "&#8217;"
+                        else:
+                            t = "&#8216;"
+                    elif t == '"':
+                        # Special case: single-character " token
+                        if re.match(r"\S", prev_token_last_char):
+                            t = "&#8221;"
+                        else:
+                            t = "&#8220;"
+
+                    else:
+                        # Normal case:
+                        t = educateQuotes(t)
+
+                t = stupefy_func(t)
+
+            prev_token_last_char = last_char
+            result.append(t)
+
+    return "".join(result)
+
+
+def educateQuotes(text):
+    """
+    Parameter:  String.
+
+    Returns:    The string, with "educated" curly quote HTML entities.
+
+    Example input:  "Isn't this fun?"
+    Example output: &#8220;Isn&#8217;t this fun?&#8221;
+    """
+
+    punct_class = r"""[!"#\$\%'()*+,-.\/:;<=>?\@\[\\\]\^_`{|}~]"""
+
+    # Special case if the very first character is a quote
+    # followed by punctuation at a non-word-break. Close the quotes by brute force:
+    text = re.sub(r"""^'(?=%s\\B)""" % (punct_class,), r"""&#8217;""", text)
+    text = re.sub(r"""^"(?=%s\\B)""" % (punct_class,), r"""&#8221;""", text)
+
+    # Special case for double sets of quotes, e.g.:
+    #   <p>He said, "'Quoted' words in a larger quote."</p>
+    text = re.sub(r""""'(?=\w)""", """&#8220;&#8216;""", text)
+    text = re.sub(r"""'"(?=\w)""", """&#8216;&#8220;""", text)
+    text = re.sub(r'''""(?=\w)''', """&#8220;&#8220;""", text)
+    text = re.sub(r"""''(?=\w)""", """&#8216;&#8216;""", text)
+    text = re.sub(r'''\"\'''',     """&#8221;&#8217;""", text)
+    text = re.sub(r'''\'\"''',     """&#8217;&#8221;""", text)
+    text = re.sub(r'''""''',       """&#8221;&#8221;""", text)
+    text = re.sub(r"""''""",       """&#8217;&#8217;""", text)
+
+    # Special case for decade abbreviations (the '80s --> ’80s):
+    # See http://practicaltypography.com/apostrophes.html
+    text = re.sub(r"""(\W|^)'(?=\d{2}s)""", r"""\1&#8217;""", text)
+    # Measurements in feet and inches or longitude/latitude: 19' 43.5" --> 19′ 43.5″
+    text = re.sub(r'''(\W|^)([-0-9.]+\s*)'(\s*[-0-9.]+)"''', r'\1\2&#8242;\3&#8243;', text)
+
+    # Special case for Quotes at inside of other entities, e.g.:
+    #   <p>A double quote--"within dashes"--would be nice.</p>
+    text = re.sub(r"""(?<=\W)"(?=\w)""", r"""&#8220;""", text)
+    text = re.sub(r"""(?<=\W)'(?=\w)""", r"""&#8216;""", text)
+    text = re.sub(r"""(?<=\w)"(?=\W)""", r"""&#8221;""", text)
+    text = re.sub(r"""(?<=\w)'(?=\W)""", r"""&#8217;""", text)
+
+    # The following are commented out as smartypants tokenizes text by
+    # stripping out html tags. Therefore, there is no guarantee that the
+    # start-of-line and end-ol-line regex operators will match anything
+    # meaningful
+
+    # Special case for Quotes at end of line with a preceeding space (may change just to end of line)
+    # text = re.sub(r"""(?<=\s)"$""", r"""&#8221;""", text)
+    # text = re.sub(r"""(?<=\s)'$""", r"""&#8217;""", text)
+
+    # Special case for Quotes at beginning of line with a space - multiparagraph quoted text:
+    # text = re.sub(r"""^"(?=\s)""", r"""&#8220;""", text)
+    # text = re.sub(r"""^'(?=\s)""", r"""&#8216;""", text)
+
+    close_class = r"""[^\ \t\r\n\[\{\(\-]"""
+    dec_dashes = r"""&#8211;|&#8212;"""
+
+    # Get most opening single quotes:
+    opening_single_quotes_regex = re.compile(r"""
+            (
+                \s          |   # a whitespace char, or
+                &nbsp;      |   # a non-breaking space entity, or
+                --          |   # dashes, or
+                &[mn]dash;  |   # named dash entities
+                %s          |   # or decimal entities
+                &\#x201[34];    # or hex
+            )
+            '                 # the quote
+            (?=\w)            # followed by a word character
+            """ % (dec_dashes,), re.VERBOSE)
+    text = opening_single_quotes_regex.sub(r"""\1&#8216;""", text)
+
+    closing_single_quotes_regex = re.compile(r"""
+            (%s)
+            '
+            (?!\s | s\b | \d)
+            """ % (close_class,), re.VERBOSE)
+    text = closing_single_quotes_regex.sub(r"""\1&#8217;""", text)
+
+    closing_single_quotes_regex = re.compile(r"""
+            (%s)
+            '
+            (\s | s\b)
+            """ % (close_class,), re.VERBOSE)
+    text = closing_single_quotes_regex.sub(r"""\1&#8217;\2""", text)
+
+    # Any remaining single quotes should be opening ones:
+    text = re.sub(r"""'""", r"""&#8216;""", text)
+
+    # Get most opening double quotes:
+    opening_double_quotes_regex = re.compile(r"""
+            (
+                \s          |   # a whitespace char, or
+                &nbsp;      |   # a non-breaking space entity, or
+                --          |   # dashes, or
+                &[mn]dash;  |   # named dash entities
+                %s          |   # or decimal entities
+                &\#x201[34];    # or hex
+            )
+            "                 # the quote
+            (?=\w)            # followed by a word character
+            """ % (dec_dashes,), re.VERBOSE)
+    text = opening_double_quotes_regex.sub(r"""\1&#8220;""", text)
+
+    # Double closing quotes:
+    closing_double_quotes_regex = re.compile(r"""
+            #(%s)?   # character that indicates the quote should be closing
+            "
+            (?=\s)
+            """ % (close_class,), re.VERBOSE)
+    text = closing_double_quotes_regex.sub(r"""&#8221;""", text)
+
+    closing_double_quotes_regex = re.compile(r"""
+            (%s)   # character that indicates the quote should be closing
+            "
+            """ % (close_class,), re.VERBOSE)
+    text = closing_double_quotes_regex.sub(r"""\1&#8221;""", text)
+
+    if text.endswith('-"'):
+        # A string that endswith -" is sometimes used for dialogue
+        text = text[:-1] + '&#8221;'
+
+    # Any remaining quotes should be opening ones.
+    text = re.sub(r'"', r"""&#8220;""", text)
+
+    return text
+
+
+def educateBackticks(text):
+    """
+    Parameter:  String.
+    Returns:    The string, with ``backticks'' -style double quotes
+                translated into HTML curly quote entities.
+    Example input:  ``Isn't this fun?''
+    Example output: &#8220;Isn't this fun?&#8221;
+    """
+
+    text = re.sub(r"""``""", r"""&#8220;""", text)
+    text = re.sub(r"""''""", r"""&#8221;""", text)
+    return text
+
+
+def educateSingleBackticks(text):
+    """
+    Parameter:  String.
+    Returns:    The string, with `backticks' -style single quotes
+                translated into HTML curly quote entities.
+
+    Example input:  `Isn't this fun?'
+    Example output: &#8216;Isn&#8217;t this fun?&#8217;
+    """
+
+    text = re.sub(r"""`""", r"""&#8216;""", text)
+    text = re.sub(r"""'""", r"""&#8217;""", text)
+    return text
+
+
+def educateDashes(text):
+    """
+    Parameter:  String.
+
+    Returns:    The string, with each instance of "--" translated to
+                an em-dash HTML entity.
+    """
+
+    text = re.sub(r"""---""", r"""&#8211;""", text)  # en  (yes, backwards)
+    text = re.sub(r"""--""", r"""&#8212;""", text)  # em (yes, backwards)
+    return text
+
+
+def educateDashesOldSchool(text):
+    """
+    Parameter:  String.
+
+    Returns:    The string, with each instance of "--" translated to
+                an en-dash HTML entity, and each "---" translated to
+                an em-dash HTML entity.
+    """
+
+    text = re.sub(r"""---""", r"""&#8212;""", text)    # em (yes, backwards)
+    text = re.sub(r"""--""", r"""&#8211;""", text)    # en (yes, backwards)
+    return text
+
+
+def educateDashesOldSchoolInverted(text):
+    """
+    Parameter:  String.
+
+    Returns:    The string, with each instance of "--" translated to
+                an em-dash HTML entity, and each "---" translated to
+                an en-dash HTML entity. Two reasons why: First, unlike the
+                en- and em-dash syntax supported by
+                EducateDashesOldSchool(), it's compatible with existing
+                entries written before SmartyPants 1.1, back when "--" was
+                only used for em-dashes.  Second, em-dashes are more
+                common than en-dashes, and so it sort of makes sense that
+                the shortcut should be shorter to type. (Thanks to Aaron
+                Swartz for the idea.)
+    """
+    text = re.sub(r"""---""", r"""&#8211;""", text)    # em
+    text = re.sub(r"""--""", r"""&#8212;""", text)    # en
+    return text
+
+
+def educateEllipses(text):
+    """
+    Parameter:  String.
+    Returns:    The string, with each instance of "..." translated to
+                an ellipsis HTML entity.
+
+    Example input:  Huh...?
+    Example output: Huh&#8230;?
+    """
+
+    text = re.sub(r"""\.\.\.""", r"""&#8230;""", text)
+    text = re.sub(r"""\. \. \.""", r"""&#8230;""", text)
+    return text
+
+
+def stupefyEntities(text):
+    """
+    Parameter:  String.
+    Returns:    The string, with each SmartyPants HTML entity translated to
+                its ASCII counterpart.
+
+    Example input:  &#8220;Hello &#8212; world.&#8221;
+    Example output: "Hello -- world."
+    """
+
+    text = re.sub(r"""&#8211;""", r"""-""", text)  # en-dash
+    text = re.sub(r"""&#8212;""", r"""--""", text)  # em-dash
+
+    text = re.sub(r"""&#8216;""", r"""'""", text)  # open single quote
+    text = re.sub(r"""&#8217;""", r"""'""", text)  # close single quote
+
+    text = re.sub(r"""&#8220;""", r'''"''', text)  # open double quote
+    text = re.sub(r"""&#8221;""", r'''"''', text)  # close double quote
+
+    text = re.sub(r"""&#8230;""", r"""...""", text)  # ellipsis
+
+    return text
+
+
+def processEscapes(text):
+    r"""
+    Parameter:  String.
+    Returns:    The string, with after processing the following backslash
+                escape sequences. This is useful if you want to force a "dumb"
+                quote or other character to appear.
+
+                Escape  Value
+                ------  -----
+                \\      &#92;
+                \"      &#34;
+                \'      &#39;
+                \.      &#46;
+                \-      &#45;
+                \`      &#96;
+    """
+    text = re.sub(r"""\\\\""", r"""&#92;""", text)
+    text = re.sub(r'''\\"''', r"""&#34;""", text)
+    text = re.sub(r"""\\'""", r"""&#39;""", text)
+    text = re.sub(r"""\\\.""", r"""&#46;""", text)
+    text = re.sub(r"""\\-""", r"""&#45;""", text)
+    text = re.sub(r"""\\`""", r"""&#96;""", text)
+
+    return text
+
+
+def _tokenize(html):
+    """
+    Parameter:  String containing HTML markup.
+    Returns:    Reference to an array of the tokens comprising the input
+                string. Each token is either a tag (possibly with nested,
+                tags contained therein, such as <a href="<MTFoo>">, or a
+                run of text between tags. Each element of the array is a
+                two-element array; the first is either 'tag' or 'text';
+                the second is the actual value.
+
+    Based on the _tokenize() subroutine from Brad Choate's MTRegex plugin.
+        <http://www.bradchoate.com/past/mtregex.php>
+    """
+
+    tokens = []
+
+    # depth = 6
+    # nested_tags = "|".join(['(?:<(?:[^<>]',] * depth) + (')*>)' * depth)
+    # match = r"""(?: <! ( -- .*? -- \s* )+ > ) |  # comments
+    # (?: <\? .*? \?> ) |  # directives
+    # %s  # nested tags       """ % (nested_tags,)
+    tag_soup = re.compile(r"""([^<]*)(<[^>]*>)""")
+
+    token_match = tag_soup.search(html)
+
+    previous_end = 0
+    while token_match is not None:
+        if token_match.group(1):
+            tokens.append(['text', token_match.group(1)])
+
+        tokens.append(['tag', token_match.group(2)])
+
+        previous_end = token_match.end()
+        token_match = tag_soup.search(html, token_match.end())
+
+    if previous_end < len(html):
+        tokens.append(['text', html[previous_end:]])
+
+    return tokens
+
+
+def run_tests(return_tests=False):
+    import unittest
+    sp = smartyPants
+
+    class TestSmartypantsAllAttributes(unittest.TestCase):
+        # the default attribute is "1", which means "all".
+
+        def test_dates(self):
+            self.assertEqual(sp("one two '60s"), "one two &#8217;60s")
+            self.assertEqual(sp("1440-80's"), "1440-80&#8217;s")
+            self.assertEqual(sp("1440-'80s"), "1440-&#8217;80s")
+            self.assertEqual(sp("1440---'80s"), "1440&#8211;&#8217;80s")
+            self.assertEqual(sp("1960s"), "1960s")  # no effect.
+            self.assertEqual(sp("1960's"), "1960&#8217;s")
+            self.assertEqual(sp("one two '60s"), "one two &#8217;60s")
+            self.assertEqual(sp("'60s"), "&#8217;60s")
+
+        def test_measurements(self):
+            ae = self.assertEqual
+            ae(sp("one two 1.1'2.2\""), "one two 1.1&#8242;2.2&#8243;")
+            ae(sp("1' 2\""), "1&#8242; 2&#8243;")
+
+        def test_skip_tags(self):
+            self.assertEqual(
+                sp("""<script type="text/javascript">\n<!--\nvar href = "http://www.google.com";\nvar linktext = "google";\ndocument.write('<a href="' + href + '">' + linktext + "</a>");\n//-->\n</script>"""),  # noqa
+                   """<script type="text/javascript">\n<!--\nvar href = "http://www.google.com";\nvar linktext = "google";\ndocument.write('<a href="' + href + '">' + linktext + "</a>");\n//-->\n</script>""")  # noqa
+            self.assertEqual(
+                sp("""<p>He said &quot;Let's write some code.&quot; This code here <code>if True:\n\tprint &quot;Okay&quot;</code> is python code.</p>"""),
+                   """<p>He said &#8220;Let&#8217;s write some code.&#8221; This code here <code>if True:\n\tprint &quot;Okay&quot;</code> is python code.</p>""")  # noqa
+
+            self.assertEqual(
+                sp('''<script/><p>It's ok</p>'''),
+                '''<script/><p>It&#8217;s ok</p>''')
+
+        def test_ordinal_numbers(self):
+            self.assertEqual(sp("21st century"), "21st century")  # no effect.
+            self.assertEqual(sp("3rd"), "3rd")  # no effect.
+
+        def test_educated_quotes(self):
+            self.assertEqual(sp('''"Isn't this fun?"'''), '''&#8220;Isn&#8217;t this fun?&#8221;''')
+
+    tests = unittest.defaultTestLoader.loadTestsFromTestCase(TestSmartypantsAllAttributes)
+    if return_tests:
+        return tests
+    unittest.TextTestRunner(verbosity=4).run(tests)
+
+
+if __name__ == "__main__":
+    run_tests()
--- a/ebook_converter/utils/speedups.py
+++ b/ebook_converter/utils/speedups.py
@@ -0,0 +1,205 @@
+#!/usr/bin/env python2
+# vim:fileencoding=utf-8
+# License: GPLv3 Copyright: 2016, Kovid Goyal <kovid at kovidgoyal.net>
+
+from __future__ import absolute_import, division, print_function, unicode_literals
+
+import os
+from polyglot.builtins import range, unicode_type
+
+
+class ReadOnlyFileBuffer(object):
+
+    ''' A zero copy implementation of a file like object. Uses memoryviews for efficiency. '''
+
+    def __init__(self, raw):
+        self.sz, self.mv = len(raw), (raw if isinstance(raw, memoryview) else memoryview(raw))
+        self.pos = 0
+
+    def tell(self):
+        return self.pos
+
+    def read(self, n=None):
+        if n is None:
+            ans = self.mv[self.pos:]
+            self.pos = self.sz
+            return ans
+        ans = self.mv[self.pos:self.pos+n]
+        self.pos = min(self.pos + n, self.sz)
+        return ans
+
+    def seek(self, pos, whence=os.SEEK_SET):
+        if whence == os.SEEK_SET:
+            self.pos = pos
+        elif whence == os.SEEK_END:
+            self.pos = self.sz + pos
+        else:
+            self.pos += pos
+        self.pos = max(0, min(self.pos, self.sz))
+        return self.pos
+
+    def getvalue(self):
+        return self.mv
+
+    def close(self):
+        pass
+
+
+def svg_path_to_painter_path(d):
+    '''
+    Convert a tiny SVG 1.2 path into a QPainterPath.
+
+    :param d: The value of the d attribute of an SVG <path> tag
+    '''
+    from PyQt5.Qt import QPainterPath
+    cmd = last_cmd = b''
+    path = QPainterPath()
+    moveto_abs, moveto_rel = b'M', b'm'
+    closepath1, closepath2 = b'Z', b'z'
+    lineto_abs, lineto_rel = b'L', b'l'
+    hline_abs, hline_rel = b'H', b'h'
+    vline_abs, vline_rel = b'V', b'v'
+    curveto_abs, curveto_rel = b'C', b'c'
+    smoothcurveto_abs, smoothcurveto_rel = b'S', b's'
+    quadcurveto_abs, quadcurveto_rel = b'Q', b'q'
+    smoothquadcurveto_abs, smoothquadcurveto_rel = b'T', b't'
+
+    # Store the last parsed values
+    # x/y = end position
+    # x1/y1 and x2/y2 = bezier control points
+    x = y = x1 = y1 = x2 = y2 = 0
+
+    if isinstance(d, unicode_type):
+        d = d.encode('ascii')
+    d = d.replace(b',', b' ').replace(b'\n', b' ')
+    end = len(d)
+    pos = [0]
+
+    def read_byte():
+        p = pos[0]
+        pos[0] += 1
+        return d[p:p+1]
+
+    def parse_float():
+        chars = []
+        while pos[0] < end:
+            c = read_byte()
+            if c == b' ' and not chars:
+                continue
+            if c in b'-.0123456789':
+                chars.append(c)
+            else:
+                break
+        if not chars:
+            raise ValueError('Premature end of input while expecting a number')
+        return float(b''.join(chars))
+
+    def parse_floats(num, x_offset=0, y_offset=0):
+        for i in range(num):
+            val = parse_float()
+            yield val + (x_offset if i % 2 == 0 else y_offset)
+
+    repeated_command = None
+
+    while pos[0] < end:
+        last_cmd = cmd
+        cmd = read_byte() if repeated_command is None else repeated_command
+        repeated_command = None
+
+        if cmd == b' ':
+            continue
+        if cmd == moveto_abs:
+            x, y = parse_float(), parse_float()
+            path.moveTo(x, y)
+        elif cmd == moveto_rel:
+            x += parse_float()
+            y += parse_float()
+            path.moveTo(x, y)
+        elif cmd == closepath1 or cmd == closepath2:
+            path.closeSubpath()
+        elif cmd == lineto_abs:
+            x, y = parse_floats(2)
+            path.lineTo(x, y)
+        elif cmd == lineto_rel:
+            x += parse_float()
+            y += parse_float()
+            path.lineTo(x, y)
+        elif cmd == hline_abs:
+            x = parse_float()
+            path.lineTo(x, y)
+        elif cmd == hline_rel:
+            x += parse_float()
+            path.lineTo(x, y)
+        elif cmd == vline_abs:
+            y = parse_float()
+            path.lineTo(x, y)
+        elif cmd == vline_rel:
+            y += parse_float()
+            path.lineTo(x, y)
+        elif cmd == curveto_abs:
+            x1, y1, x2, y2, x, y = parse_floats(6)
+            path.cubicTo(x1, y1, x2, y2, x, y)
+        elif cmd == curveto_rel:
+            x1, y1, x2, y2, x, y = parse_floats(6, x, y)
+            path.cubicTo(x1, y1, x2, y2, x, y)
+        elif cmd == smoothcurveto_abs:
+            if last_cmd == curveto_abs or last_cmd == curveto_rel or last_cmd == smoothcurveto_abs or last_cmd == smoothcurveto_rel:
+                x1 = 2 * x - x2
+                y1 = 2 * y - y2
+            else:
+                x1, y1 = x, y
+            x2, y2, x, y = parse_floats(4)
+            path.cubicTo(x1, y1, x2, y2, x, y)
+        elif cmd == smoothcurveto_rel:
+            if last_cmd == curveto_abs or last_cmd == curveto_rel or last_cmd == smoothcurveto_abs or last_cmd == smoothcurveto_rel:
+                x1 = 2 * x - x2
+                y1 = 2 * y - y2
+            else:
+                x1, y1 = x, y
+            x2, y2, x, y = parse_floats(4, x, y)
+            path.cubicTo(x1, y1, x2, y2, x, y)
+        elif cmd == quadcurveto_abs:
+            x1, y1, x, y = parse_floats(4)
+            path.quadTo(x1, y1, x, y)
+        elif cmd == quadcurveto_rel:
+            x1, y1, x, y = parse_floats(4, x, y)
+            path.quadTo(x1, y1, x, y)
+        elif cmd == smoothquadcurveto_abs:
+            if last_cmd in (quadcurveto_abs, quadcurveto_rel, smoothquadcurveto_abs, smoothquadcurveto_rel):
+                x1 = 2 * x - x1
+                y1 = 2 * y - y1
+            else:
+                x1, y1 = x, y
+            x, y = parse_floats(2)
+            path.quadTo(x1, y1, x, y)
+        elif cmd == smoothquadcurveto_rel:
+            if last_cmd in (quadcurveto_abs, quadcurveto_rel, smoothquadcurveto_abs, smoothquadcurveto_rel):
+                x1 = 2 * x - x1
+                y1 = 2 * y - y1
+            else:
+                x1, y1 = x, y
+            x, y = parse_floats(2, x, y)
+            path.quadTo(x1, y1, x, y)
+        elif cmd in b'-.0123456789':
+            # A new number begins
+            # In this case, multiple parameters tuples are specified for the last command
+            # We rewind to reparse data correctly
+            pos[0] -= 1
+
+            # Handle extra parameters
+            if last_cmd == moveto_abs:
+                repeated_command = cmd = lineto_abs
+            elif last_cmd == moveto_rel:
+                repeated_command = cmd = lineto_rel
+            elif last_cmd in (closepath1, closepath2):
+                raise ValueError('Extra parameters after close path command')
+            elif last_cmd in (
+                lineto_abs, lineto_rel, hline_abs, hline_rel, vline_abs,
+                vline_rel, curveto_abs, curveto_rel,smoothcurveto_abs,
+                smoothcurveto_rel, quadcurveto_abs, quadcurveto_rel,
+                smoothquadcurveto_abs, smoothquadcurveto_rel
+            ):
+                repeated_command = cmd = last_cmd
+        else:
+            raise ValueError('Unknown path command: %s' % cmd)
+    return path
--- a/ebook_converter/utils/terminal.py
+++ b/ebook_converter/utils/terminal.py
@@ -0,0 +1,443 @@
+#!/usr/bin/env python2
+# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:fdm=marker:ai
+from __future__ import absolute_import, division, print_function, unicode_literals
+
+__license__   = 'GPL v3'
+__copyright__ = '2012, Kovid Goyal <kovid at kovidgoyal.net>'
+__docformat__ = 'restructuredtext en'
+
+import os, sys, re
+
+from calibre.constants import iswindows, ispy3
+from polyglot.builtins import iteritems, range, zip, native_string_type
+
+if iswindows:
+    import ctypes.wintypes
+
+    class CONSOLE_SCREEN_BUFFER_INFO(ctypes.Structure):
+        _fields_ = [
+            ('dwSize', ctypes.wintypes._COORD),
+            ('dwCursorPosition', ctypes.wintypes._COORD),
+            ('wAttributes', ctypes.wintypes.WORD),
+            ('srWindow', ctypes.wintypes._SMALL_RECT),
+            ('dwMaximumWindowSize', ctypes.wintypes._COORD)
+        ]
+
+
+def fmt(code):
+    return '\033[%dm' % code
+
+
+RATTRIBUTES = dict(
+        zip(range(1, 9), (
+            'bold',
+            'dark',
+            '',
+            'underline',
+            'blink',
+            '',
+            'reverse',
+            'concealed'
+            )
+        ))
+ATTRIBUTES = {v:fmt(k) for k, v in iteritems(RATTRIBUTES)}
+del ATTRIBUTES['']
+
+RBACKGROUNDS = dict(
+        zip(range(41, 48), (
+            'red',
+            'green',
+            'yellow',
+            'blue',
+            'magenta',
+            'cyan',
+            'white'
+            ),
+    ))
+BACKGROUNDS = {v:fmt(k) for k, v in iteritems(RBACKGROUNDS)}
+
+RCOLORS = dict(
+        zip(range(31, 38), (
+            'red',
+            'green',
+            'yellow',
+            'blue',
+            'magenta',
+            'cyan',
+            'white',
+            ),
+        ))
+COLORS = {v:fmt(k) for k, v in iteritems(RCOLORS)}
+
+RESET = fmt(0)
+
+if iswindows:
+    # From wincon.h
+    WCOLORS = {c:i for i, c in enumerate((
+        'black', 'blue', 'green', 'cyan', 'red', 'magenta', 'yellow', 'white'))}
+
+    def to_flag(fg, bg, bold):
+        val = 0
+        if bold:
+            val |= 0x08
+        if fg in WCOLORS:
+            val |= WCOLORS[fg]
+        if bg in WCOLORS:
+            val |= (WCOLORS[bg] << 4)
+        return val
+
+
+def colored(text, fg=None, bg=None, bold=False):
+    prefix = []
+    if fg is not None:
+        prefix.append(COLORS[fg])
+    if bg is not None:
+        prefix.append(BACKGROUNDS[bg])
+    if bold:
+        prefix.append(ATTRIBUTES['bold'])
+    prefix = ''.join(prefix)
+    suffix = RESET
+    if isinstance(text, bytes):
+        prefix = prefix.encode('ascii')
+        suffix = suffix.encode('ascii')
+    return prefix + text + suffix
+
+
+class Detect(object):
+
+    def __init__(self, stream):
+        self.stream = stream or sys.stdout
+        self.isatty = getattr(self.stream, 'isatty', lambda : False)()
+        force_ansi = 'CALIBRE_FORCE_ANSI' in os.environ
+        if not self.isatty and force_ansi:
+            self.isatty = True
+        self.isansi = force_ansi or not iswindows
+        self.set_console = self.write_console = None
+        self.is_console = False
+        if not self.isansi:
+            try:
+                import msvcrt
+                self.msvcrt = msvcrt
+                self.file_handle = msvcrt.get_osfhandle(self.stream.fileno())
+                from ctypes import windll, wintypes, byref, POINTER, WinDLL
+                mode = wintypes.DWORD(0)
+                f = windll.kernel32.GetConsoleMode
+                f.argtypes, f.restype = [wintypes.HANDLE, POINTER(wintypes.DWORD)], wintypes.BOOL
+                if f(self.file_handle, byref(mode)):
+                    # Stream is a console
+                    self.set_console = windll.kernel32.SetConsoleTextAttribute
+                    self.default_console_text_attributes = WCOLORS['white']
+                    kernel32 = WinDLL(native_string_type('kernel32'), use_last_error=True)
+                    self.write_console = kernel32.WriteConsoleW
+                    self.write_console.argtypes = [wintypes.HANDLE, wintypes.c_wchar_p, wintypes.DWORD, POINTER(wintypes.DWORD), wintypes.LPVOID]
+                    self.write_console.restype = wintypes.BOOL
+                    kernel32.GetConsoleScreenBufferInfo.argtypes = [wintypes.HANDLE, ctypes.POINTER(CONSOLE_SCREEN_BUFFER_INFO)]
+                    kernel32.GetConsoleScreenBufferInfo.restype = wintypes.BOOL
+                    csbi = CONSOLE_SCREEN_BUFFER_INFO()
+                    if kernel32.GetConsoleScreenBufferInfo(self.file_handle, byref(csbi)):
+                        self.default_console_text_attributes = csbi.wAttributes
+                    self.is_console = True
+            except:
+                pass
+
+    def write_unicode_text(self, text, ignore_errors=False):
+        ' Windows only method that writes unicode strings correctly to the windows console using the Win32 API '
+        if self.is_console:
+            from ctypes import wintypes, byref, c_wchar_p
+            written = wintypes.DWORD(0)
+            text = text.replace('\0', '')
+            chunk = len(text)
+            while text:
+                t, text = text[:chunk], text[chunk:]
+                wt = c_wchar_p(t)
+                if ispy3:
+                    text_len = len(t.encode('utf-16'))
+                else:
+                    # Use the fact that len(t) == wcslen(wt) in python 2.7 on
+                    # windows where the python unicode type uses UTF-16
+                    text_len = len(t)
+                if not self.write_console(self.file_handle, wt, text_len, byref(written), None):
+                    # Older versions of windows can fail to write large strings
+                    # to console with WriteConsoleW (seen it happen on Win XP)
+                    import winerror
+                    err = ctypes.get_last_error()
+                    if err == winerror.ERROR_NOT_ENOUGH_MEMORY and chunk >= 128:
+                        # Retry with a smaller chunk size (give up if chunk < 128)
+                        chunk = chunk // 2
+                        text = t + text
+                        continue
+                    if err == winerror.ERROR_GEN_FAILURE:
+                        # On newer windows, this happens when trying to write
+                        # non-ascii chars to the console and the console is set
+                        # to use raster fonts (the default). In this case
+                        # rather than failing, write an informative error
+                        # message and the asciized version of the text.
+                        print('Non-ASCII text detected. You must set your Console\'s font to'
+                               ' Lucida Console or Consolas or some other TrueType font to see this text', file=self.stream, end=' -- ')
+                        from calibre.utils.filenames import ascii_text
+                        print(ascii_text(t + text), file=self.stream, end='')
+                        continue
+                    if not ignore_errors:
+                        raise ctypes.WinError(err)
+
+
+class ColoredStream(Detect):
+
+    def __init__(self, stream=None, fg=None, bg=None, bold=False):
+        stream = getattr(stream, 'buffer', stream)
+        Detect.__init__(self, stream)
+        self.fg, self.bg, self.bold = fg, bg, bold
+        if self.set_console is not None:
+            self.wval = to_flag(self.fg, self.bg, bold)
+            if not self.bg:
+                self.wval |= self.default_console_text_attributes & 0xF0
+
+    def cwrite(self, what):
+        if not isinstance(what, bytes):
+            what = what.encode('ascii')
+        self.stream.write(what)
+
+    def __enter__(self):
+        if not self.isatty:
+            return self
+        if self.isansi:
+            if self.bold:
+                self.cwrite(ATTRIBUTES['bold'])
+            if self.bg is not None:
+                self.cwrite(BACKGROUNDS[self.bg])
+            if self.fg is not None:
+                self.cwrite(COLORS[self.fg])
+        elif self.set_console is not None:
+            if self.wval != 0:
+                self.set_console(self.file_handle, self.wval)
+        return self
+
+    def __exit__(self, *args, **kwargs):
+        if not self.isatty:
+            return
+        if not self.fg and not self.bg and not self.bold:
+            return
+        if self.isansi:
+            self.cwrite(RESET)
+            self.stream.flush()
+        elif self.set_console is not None:
+            self.set_console(self.file_handle, self.default_console_text_attributes)
+
+
+class ANSIStream(Detect):
+
+    ANSI_RE = r'\033\[((?:\d|;)*)([a-zA-Z])'
+
+    def __init__(self, stream=None):
+        super(ANSIStream, self).__init__(stream)
+        self.encoding = getattr(self.stream, 'encoding', 'utf-8') or 'utf-8'
+        self.stream_takes_unicode = hasattr(self.stream, 'buffer')
+        self.last_state = (None, None, False)
+        self._ansi_re_bin = self._ansi_re_unicode = None
+
+    def ansi_re(self, binary=False):
+        attr = '_ansi_re_bin' if binary else '_ansi_re_unicode'
+        ans = getattr(self, attr)
+        if ans is None:
+            expr = self.ANSI_RE
+            if binary:
+                expr = expr.encode('ascii')
+            ans = re.compile(expr)
+            setattr(self, attr, ans)
+        return ans
+
+    def write(self, text):
+        if not self.isatty:
+            return self.strip_and_write(text)
+
+        if self.isansi:
+            return self.stream.write(text)
+
+        if not self.isansi and self.set_console is None:
+            return self.strip_and_write(text)
+
+        self.write_and_convert(text)
+
+    def polyglot_write(self, text):
+        binary = isinstance(text, bytes)
+        stream = self.stream
+        if self.stream_takes_unicode:
+            if binary:
+                stream = self.stream.buffer
+        else:
+            if not binary:
+                text = text.encode(self.encoding, 'replace')
+        stream.write(text)
+
+    def strip_and_write(self, text):
+        binary = isinstance(text, bytes)
+        pat = self.ansi_re(binary)
+        repl = b'' if binary else ''
+        self.polyglot_write(pat.sub(repl, text))
+
+    def write_and_convert(self, text):
+        '''
+        Write the given text to our wrapped stream, stripping any ANSI
+        sequences from the text, and optionally converting them into win32
+        calls.
+        '''
+        cursor = 0
+        binary = isinstance(text, bytes)
+        for match in self.ansi_re(binary).finditer(text):
+            start, end = match.span()
+            self.write_plain_text(text, cursor, start)
+            self.convert_ansi(*match.groups())
+            cursor = end
+        self.write_plain_text(text, cursor, len(text))
+        self.set_console(self.file_handle, self.default_console_text_attributes)
+        self.stream.flush()
+
+    def write_plain_text(self, text, start, end):
+        if start < end:
+            text = text[start:end]
+            if self.is_console and isinstance(text, bytes):
+                try:
+                    utext = text.decode(self.encoding)
+                except ValueError:
+                    pass
+                else:
+                    return self.write_unicode_text(utext)
+            self.polyglot_write(text)
+
+    def convert_ansi(self, paramstring, command):
+        if isinstance(paramstring, bytes):
+            paramstring = paramstring.decode('ascii', 'replace')
+        if isinstance(command, bytes):
+            command = command.decode('ascii', 'replace')
+        params = self.extract_params(paramstring)
+        self.call_win32(command, params)
+
+    def extract_params(self, paramstring):
+        def split(paramstring):
+            for p in paramstring.split(';'):
+                if p:
+                    yield int(p)
+        return tuple(split(paramstring))
+
+    def call_win32(self, command, params):
+        if command != 'm':
+            return
+        fg, bg, bold = self.last_state
+
+        for param in params:
+            if param in RCOLORS:
+                fg = RCOLORS[param]
+            elif param in RBACKGROUNDS:
+                bg = RBACKGROUNDS[param]
+            elif param == 1:
+                bold = True
+            elif param == 0:
+                fg, bg, bold = None, None, False
+
+        self.last_state = (fg, bg, bold)
+        if fg or bg or bold:
+            val = to_flag(fg, bg, bold)
+            if not bg:
+                val |= self.default_console_text_attributes & 0xF0
+            self.set_console(self.file_handle, val)
+        else:
+            self.set_console(self.file_handle, self.default_console_text_attributes)
+
+
+def windows_terminfo():
+    from ctypes import Structure, byref
+    from ctypes.wintypes import SHORT, WORD
+
+    class COORD(Structure):
+
+        """struct in wincon.h"""
+        _fields_ = [
+            ('X', SHORT),
+            ('Y', SHORT),
+        ]
+
+    class SMALL_RECT(Structure):
+
+        """struct in wincon.h."""
+        _fields_ = [
+            ("Left", SHORT),
+            ("Top", SHORT),
+            ("Right", SHORT),
+            ("Bottom", SHORT),
+        ]
+
+    class CONSOLE_SCREEN_BUFFER_INFO(Structure):
+
+        """struct in wincon.h."""
+        _fields_ = [
+            ("dwSize", COORD),
+            ("dwCursorPosition", COORD),
+            ("wAttributes", WORD),
+            ("srWindow", SMALL_RECT),
+            ("dwMaximumWindowSize", COORD),
+        ]
+    csbi = CONSOLE_SCREEN_BUFFER_INFO()
+    import msvcrt
+    file_handle = msvcrt.get_osfhandle(sys.stdout.fileno())
+    from ctypes import windll
+    success = windll.kernel32.GetConsoleScreenBufferInfo(file_handle,
+                                                         byref(csbi))
+    if not success:
+        raise Exception('stdout is not a console?')
+    return csbi
+
+
+def get_term_geometry():
+    import fcntl, termios, struct
+
+    def ioctl_GWINSZ(fd):
+        try:
+            return struct.unpack(b'HHHH', fcntl.ioctl(fd, termios.TIOCGWINSZ, b'\0'*8))[:2]
+        except Exception:
+            return None, None
+
+    for f in (sys.stdin, sys.stdout, sys.stderr):
+        lines, cols = ioctl_GWINSZ(f.fileno())
+        if lines is not None:
+            return lines, cols
+    try:
+        fd = os.open(os.ctermid(), os.O_RDONLY)
+        try:
+            lines, cols = ioctl_GWINSZ(fd)
+            if lines is not None:
+                return lines, cols
+        finally:
+            os.close(fd)
+    except Exception:
+        pass
+    return None, None
+
+
+def geometry():
+    if iswindows:
+        try:
+
+            ti = windows_terminfo()
+            return (ti.dwSize.X or 80, ti.dwSize.Y or 25)
+        except:
+            return 80, 25
+    else:
+        try:
+            lines, cols = get_term_geometry()
+            if lines is not None:
+                return cols, lines
+        except Exception:
+            pass
+        return 80, 25
+
+
+def test():
+    s = ANSIStream()
+
+    text = [colored(t, fg=t)+'. '+colored(t, fg=t, bold=True)+'.' for t in
+            ('red', 'yellow', 'green', 'white', 'cyan', 'magenta', 'blue',)]
+    s.write('\n'.join(text))
+    u = u'\u041c\u0438\u0445\u0430\u0438\u043b fällen'
+    print()
+    s.write_unicode_text(u)
+    print()
--- a/ebook_converter/utils/titlecase.py
+++ b/ebook_converter/utils/titlecase.py
@@ -0,0 +1,109 @@
+#!/usr/bin/env python2
+# -*- coding: utf-8 -*-
+from __future__ import absolute_import, division, print_function, unicode_literals
+
+"""
+Original Perl version by: John Gruber https://daringfireball.net/ 10 May 2008
+Python version by Stuart Colville http://muffinresearch.co.uk
+Modifications to make it work with non-ascii chars by Kovid Goyal
+License: http://www.opensource.org/licenses/mit-license.php
+"""
+
+import re
+
+from calibre.utils.icu import capitalize, upper
+from polyglot.builtins import unicode_type
+
+__all__ = ['titlecase']
+__version__ = '0.5'
+
+SMALL = 'a|an|and|as|at|but|by|en|for|if|in|of|on|or|the|to|v\\.?|via|vs\\.?'
+PUNCT = r"""!"#$%&'‘’()*+,\-‒–—―./:;?@[\\\]_`{|}~"""
+
+SMALL_WORDS = re.compile(r'^(%s)$' % SMALL, re.I)
+INLINE_PERIOD = re.compile(r'[a-z][.][a-z]', re.I)
+UC_ELSEWHERE = re.compile(r'[%s]*?[a-zA-Z]+[A-Z]+?' % PUNCT)
+CAPFIRST = re.compile(unicode_type(r"^[%s]*?(\w)" % PUNCT), flags=re.UNICODE)
+SMALL_FIRST = re.compile(r'^([%s]*)(%s)\b' % (PUNCT, SMALL), re.I|re.U)
+SMALL_LAST = re.compile(r'\b(%s)[%s]?$' % (SMALL, PUNCT), re.I|re.U)
+SMALL_AFTER_NUM = re.compile(r'(\d+\s+)(a|an|the)\b', re.I|re.U)
+SUBPHRASE = re.compile(r'([:.;?!][ ])(%s)' % SMALL)
+APOS_SECOND = re.compile(r"^[dol]{1}['‘]{1}[a-z]+$", re.I)
+UC_INITIALS = re.compile(r"^(?:[A-Z]{1}\.{1}|[A-Z]{1}\.{1}[A-Z]{1})+$")
+
+_lang = None
+
+
+def lang():
+    global _lang
+    if _lang is None:
+        from calibre.utils.localization import get_lang
+        _lang = get_lang().lower()
+    return _lang
+
+
+def titlecase(text):
+    """
+    Titlecases input text
+
+    This filter changes all words to Title Caps, and attempts to be clever
+    about *un*capitalizing SMALL words like a/an/the in the input.
+
+    The list of "SMALL words" which are not capped comes from
+    the New York Times Manual of Style, plus 'vs' and 'v'.
+
+    """
+
+    all_caps = upper(text) == text
+
+    pat = re.compile(r'(\s+)')
+    line = []
+    for word in pat.split(text):
+        if not word:
+            continue
+        if pat.match(word) is not None:
+            line.append(word)
+            continue
+        if all_caps:
+            if UC_INITIALS.match(word):
+                line.append(word)
+                continue
+            else:
+                word = icu_lower(word)
+
+        if APOS_SECOND.match(word):
+            word = word.replace(word[0], icu_upper(word[0]), 1)
+            word = word[:2] + icu_upper(word[2]) + word[3:]
+            line.append(word)
+            continue
+        if INLINE_PERIOD.search(word) or UC_ELSEWHERE.match(word):
+            line.append(word)
+            continue
+        if SMALL_WORDS.match(word):
+            line.append(icu_lower(word))
+            continue
+
+        hyphenated = []
+        for item in word.split('-'):
+            hyphenated.append(CAPFIRST.sub(lambda m: icu_upper(m.group(0)), item))
+        line.append("-".join(hyphenated))
+
+    result = "".join(line)
+
+    result = SMALL_FIRST.sub(lambda m: '%s%s' % (
+        m.group(1),
+        capitalize(m.group(2))
+    ), result)
+
+    result = SMALL_AFTER_NUM.sub(lambda m: '%s%s' % (m.group(1),
+        capitalize(m.group(2))
+    ), result)
+
+    result = SMALL_LAST.sub(lambda m: capitalize(m.group(0)), result)
+
+    result = SUBPHRASE.sub(lambda m: '%s%s' % (
+        m.group(1),
+        capitalize(m.group(2))
+    ), result)
+
+    return result
--- a/ebook_converter/utils/wordcount.py
+++ b/ebook_converter/utils/wordcount.py
@@ -0,0 +1,95 @@
+#!/usr/bin/python2
+# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
+
+from __future__ import absolute_import, division, print_function, unicode_literals
+
+"""
+Get word, character, and Asian character counts
+
+1. Get a word count as a dictionary:
+    wc = get_wordcount(text)
+    words = wc['words'] # etc.
+
+2. Get a word count as an object
+    wc = get_wordcount_obj(text)
+    words = wc.words # etc.
+
+properties counted:
+    * characters
+    * chars_no_spaces
+    * asian_chars
+    * non_asian_words
+    * words
+
+Sourced from:
+http://ginstrom.com/scribbles/2008/05/17/counting-words-etc-in-an-html-file-with-python/
+http://ginstrom.com/scribbles/2007/10/06/counting-words-characters-and-asian-characters-with-python/
+"""
+__version__ = 0.1
+__author__ = "Ryan Ginstrom"
+
+IDEOGRAPHIC_SPACE = 0x3000
+
+
+def is_asian(char):
+    """Is the character Asian?"""
+
+    # 0x3000 is ideographic space (i.e. double-byte space)
+    # Anything over is an Asian character
+    return ord(char) > IDEOGRAPHIC_SPACE
+
+
+def filter_jchars(c):
+    """Filters Asian characters to spaces"""
+    if is_asian(c):
+        return ' '
+    return c
+
+
+def nonj_len(word):
+    """Returns number of non-Asian words in {word}
+    - 日本語AアジアンB -> 2
+    - hello -> 1
+    @param word: A word, possibly containing Asian characters
+    """
+    # Here are the steps:
+    # 本spam日eggs
+    # -> [' ', 's', 'p', 'a', 'm', ' ', 'e', 'g', 'g', 's']
+    # -> ' spam eggs'
+    # -> ['spam', 'eggs']
+    # The length of which is 2!
+    chars = [filter_jchars(c) for c in word]
+    return len(''.join(chars).split())
+
+
+def get_wordcount(text):
+    """Get the word/character count for text
+
+    @param text: The text of the segment
+    """
+
+    characters = len(text)
+    chars_no_spaces = sum(not x.isspace() for x in text)
+    asian_chars =  sum(is_asian(x) for x in text)
+    non_asian_words = nonj_len(text)
+    words = non_asian_words + asian_chars
+
+    return dict(characters=characters,
+                chars_no_spaces=chars_no_spaces,
+                asian_chars=asian_chars,
+                non_asian_words=non_asian_words,
+                words=words)
+
+
+def dict2obj(dictionary):
+    """Transform a dictionary into an object"""
+    class Obj(object):
+
+        def __init__(self, dictionary):
+            self.__dict__.update(dictionary)
+    return Obj(dictionary)
+
+
+def get_wordcount_obj(text):
+    """Get the wordcount as an object rather than a dictionary"""
+    return dict2obj(get_wordcount(text))
--- a/ebook_converter/utils/xml_parse.py
+++ b/ebook_converter/utils/xml_parse.py
@@ -0,0 +1,67 @@
+#!/usr/bin/env python2
+# vim:fileencoding=utf-8
+# License: GPL v3 Copyright: 2019, Kovid Goyal <kovid at kovidgoyal.net>
+
+from __future__ import absolute_import, division, print_function, unicode_literals
+
+from lxml import etree
+
+# resolving of SYSTEM entities is turned off as entities can cause
+# reads of local files, for example:
+# <!DOCTYPE foo [ <!ENTITY passwd SYSTEM "file:///etc/passwd" >]>
+
+fs = etree.fromstring
+
+
+class Resolver(etree.Resolver):
+
+    def resolve(self, url, id, context):
+        return self.resolve_string('', context)
+
+
+def create_parser(recover):
+    parser = etree.XMLParser(recover=recover, no_network=True)
+    parser.resolvers.add(Resolver())
+    return parser
+
+
+def safe_xml_fromstring(string_or_bytes, recover=True):
+    return fs(string_or_bytes, parser=create_parser(recover))
+
+
+def find_tests():
+    import unittest, tempfile, os
+
+    class TestXMLParse(unittest.TestCase):
+
+        def setUp(self):
+            with tempfile.NamedTemporaryFile(delete=False) as tf:
+                tf.write(b'external')
+                self.temp_file = tf.name
+
+        def tearDown(self):
+            os.remove(self.temp_file)
+
+        def test_safe_xml_fromstring(self):
+            templ = '''<!DOCTYPE foo [ <!ENTITY e {id} "{val}" > ]><r>&e;</r>'''
+            external = 'file:///' + self.temp_file.replace(os.sep, '/')
+            self.assertEqual(etree.fromstring(templ.format(id='SYSTEM', val=external)).text, 'external')
+            for eid, val, expected in (
+                ('', 'normal entity', 'normal entity'),
+                ('', external, external),
+
+                ('SYSTEM', external, None),
+                ('SYSTEM', 'http://example.com', None),
+
+                ('PUBLIC', external, None),
+                ('PUBLIC', 'http://example.com', None),
+            ):
+                got = getattr(safe_xml_fromstring(templ.format(id=eid, val=val)), 'text', None)
+                self.assertEqual(got, expected)
+
+    return unittest.defaultTestLoader.loadTestsFromTestCase(TestXMLParse)
+
+
+if __name__ == '__main__':
+    from calibre.utils.run_tests import run_tests
+    run_tests(find_tests)
--- a/ebook_converter/utils/zipfile.py
+++ b/ebook_converter/utils/zipfile.py