mirror of
https://github.com/gryf/ebook-converter.git
synced 2026-02-22 18:15:49 +01:00
Initial import
This commit is contained in:
56
ebook_converter/utils/__init__.py
Normal file
56
ebook_converter/utils/__init__.py
Normal file
@@ -0,0 +1,56 @@
|
||||
#!/usr/bin/env python2
|
||||
|
||||
from __future__ import absolute_import, division, print_function, unicode_literals
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2008, Kovid Goyal kovid@kovidgoyal.net'
|
||||
__docformat__ = 'restructuredtext en'
|
||||
|
||||
'''
|
||||
Miscelleaneous utilities.
|
||||
'''
|
||||
|
||||
from time import time
|
||||
from polyglot.builtins import as_bytes
|
||||
|
||||
|
||||
def join_with_timeout(q, timeout=2):
|
||||
''' Join the queue q with a specified timeout. Blocks until all tasks on
|
||||
the queue are done or times out with a runtime error. '''
|
||||
q.all_tasks_done.acquire()
|
||||
try:
|
||||
endtime = time() + timeout
|
||||
while q.unfinished_tasks:
|
||||
remaining = endtime - time()
|
||||
if remaining <= 0.0:
|
||||
raise RuntimeError('Waiting for queue to clear timed out')
|
||||
q.all_tasks_done.wait(remaining)
|
||||
finally:
|
||||
q.all_tasks_done.release()
|
||||
|
||||
|
||||
def unpickle_binary_string(data):
|
||||
# Maintains compatibility with python's pickle module protocol version 2
|
||||
import struct
|
||||
PROTO, SHORT_BINSTRING, BINSTRING = b'\x80', b'U', b'T'
|
||||
if data.startswith(PROTO + b'\x02'):
|
||||
offset = 2
|
||||
which = data[offset:offset+1]
|
||||
offset += 1
|
||||
if which == BINSTRING:
|
||||
sz, = struct.unpack_from('<i', data, offset)
|
||||
offset += struct.calcsize('<i')
|
||||
elif which == SHORT_BINSTRING:
|
||||
sz = ord(data[offset:offset+1])
|
||||
offset += 1
|
||||
else:
|
||||
return
|
||||
return data[offset:offset + sz]
|
||||
|
||||
|
||||
def pickle_binary_string(data):
|
||||
# Maintains compatibility with python's pickle module protocol version 2
|
||||
import struct
|
||||
PROTO, STOP, BINSTRING = b'\x80', b'.', b'T'
|
||||
data = as_bytes(data)
|
||||
return PROTO + b'\x02' + BINSTRING + struct.pack(b'<i', len(data)) + data + STOP
|
||||
98
ebook_converter/utils/cleantext.py
Normal file
98
ebook_converter/utils/cleantext.py
Normal file
@@ -0,0 +1,98 @@
|
||||
#!/usr/bin/env python2
|
||||
# vim:fileencoding=utf-8
|
||||
# License: GPLv3 Copyright: 2010, Kovid Goyal <kovid at kovidgoyal.net>
|
||||
from __future__ import absolute_import, division, print_function, unicode_literals
|
||||
|
||||
import re
|
||||
from polyglot.builtins import codepoint_to_chr, map, range, filter
|
||||
from polyglot.html_entities import name2codepoint
|
||||
from calibre.constants import plugins, preferred_encoding
|
||||
|
||||
_ncxc = plugins['speedup'][0].clean_xml_chars
|
||||
|
||||
|
||||
def native_clean_xml_chars(x):
|
||||
if isinstance(x, bytes):
|
||||
x = x.decode(preferred_encoding)
|
||||
return _ncxc(x)
|
||||
|
||||
|
||||
def ascii_pat(for_binary=False):
|
||||
attr = 'binary' if for_binary else 'text'
|
||||
ans = getattr(ascii_pat, attr, None)
|
||||
if ans is None:
|
||||
chars = set(range(32)) - {9, 10, 13}
|
||||
chars.add(127)
|
||||
pat = '|'.join(map(codepoint_to_chr, chars))
|
||||
if for_binary:
|
||||
pat = pat.encode('ascii')
|
||||
ans = re.compile(pat)
|
||||
setattr(ascii_pat, attr, ans)
|
||||
return ans
|
||||
|
||||
|
||||
def clean_ascii_chars(txt, charlist=None):
|
||||
r'''
|
||||
Remove ASCII control chars.
|
||||
This is all control chars except \t, \n and \r
|
||||
'''
|
||||
is_binary = isinstance(txt, bytes)
|
||||
empty = b'' if is_binary else ''
|
||||
if not txt:
|
||||
return empty
|
||||
|
||||
if charlist is None:
|
||||
pat = ascii_pat(is_binary)
|
||||
else:
|
||||
pat = '|'.join(map(codepoint_to_chr, charlist))
|
||||
if is_binary:
|
||||
pat = pat.encode('utf-8')
|
||||
return pat.sub(empty, txt)
|
||||
|
||||
|
||||
def allowed(x):
|
||||
x = ord(x)
|
||||
return (x != 127 and (31 < x < 0xd7ff or x in (9, 10, 13))) or (0xe000 < x < 0xfffd) or (0x10000 < x < 0x10ffff)
|
||||
|
||||
|
||||
def py_clean_xml_chars(unicode_string):
|
||||
return ''.join(filter(allowed, unicode_string))
|
||||
|
||||
|
||||
clean_xml_chars = native_clean_xml_chars or py_clean_xml_chars
|
||||
|
||||
|
||||
def test_clean_xml_chars():
|
||||
raw = 'asd\x02a\U00010437x\ud801b\udffe\ud802'
|
||||
if native_clean_xml_chars(raw) != 'asda\U00010437xb':
|
||||
raise ValueError('Failed to XML clean: %r' % raw)
|
||||
|
||||
|
||||
# Fredrik Lundh: http://effbot.org/zone/re-sub.htm#unescape-html
|
||||
# Removes HTML or XML character references and entities from a text string.
|
||||
#
|
||||
# @param text The HTML (or XML) source text.
|
||||
# @return The plain text, as a Unicode string, if necessary.
|
||||
|
||||
def unescape(text, rm=False, rchar=''):
|
||||
def fixup(m, rm=rm, rchar=rchar):
|
||||
text = m.group(0)
|
||||
if text[:2] == "&#":
|
||||
# character reference
|
||||
try:
|
||||
if text[:3] == "&#x":
|
||||
return codepoint_to_chr(int(text[3:-1], 16))
|
||||
else:
|
||||
return codepoint_to_chr(int(text[2:-1]))
|
||||
except ValueError:
|
||||
pass
|
||||
else:
|
||||
# named entity
|
||||
try:
|
||||
text = codepoint_to_chr(name2codepoint[text[1:-1]])
|
||||
except KeyError:
|
||||
pass
|
||||
if rm:
|
||||
return rchar # replace by char
|
||||
return text # leave as is
|
||||
return re.sub("&#?\\w+;", fixup, text)
|
||||
464
ebook_converter/utils/config.py
Normal file
464
ebook_converter/utils/config.py
Normal file
@@ -0,0 +1,464 @@
|
||||
from __future__ import absolute_import, division, print_function, unicode_literals
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2008, Kovid Goyal kovid@kovidgoyal.net'
|
||||
__docformat__ = 'restructuredtext en'
|
||||
|
||||
'''
|
||||
Manage application-wide preferences.
|
||||
'''
|
||||
|
||||
import optparse
|
||||
import os
|
||||
from copy import deepcopy
|
||||
|
||||
from calibre.constants import (
|
||||
CONFIG_DIR_MODE, __appname__, __author__, config_dir, get_version, iswindows
|
||||
)
|
||||
from calibre.utils.config_base import (
|
||||
Config, ConfigInterface, ConfigProxy, Option, OptionSet, OptionValues,
|
||||
StringConfig, json_dumps, json_loads, make_config_dir, plugin_dir, prefs,
|
||||
tweaks, from_json, to_json
|
||||
)
|
||||
from calibre.utils.lock import ExclusiveFile
|
||||
from polyglot.builtins import string_or_bytes, native_string_type
|
||||
|
||||
|
||||
# optparse uses gettext.gettext instead of _ from builtins, so we
|
||||
# monkey patch it.
|
||||
optparse._ = _
|
||||
|
||||
if False:
|
||||
# Make pyflakes happy
|
||||
Config, ConfigProxy, Option, OptionValues, StringConfig, OptionSet,
|
||||
ConfigInterface, tweaks, plugin_dir, prefs, from_json, to_json
|
||||
|
||||
|
||||
def check_config_write_access():
|
||||
return os.access(config_dir, os.W_OK) and os.access(config_dir, os.X_OK)
|
||||
|
||||
|
||||
class CustomHelpFormatter(optparse.IndentedHelpFormatter):
|
||||
|
||||
def format_usage(self, usage):
|
||||
from calibre.utils.terminal import colored
|
||||
parts = usage.split(' ')
|
||||
if parts:
|
||||
parts[0] = colored(parts[0], fg='yellow', bold=True)
|
||||
usage = ' '.join(parts)
|
||||
return colored(_('Usage'), fg='blue', bold=True) + ': ' + usage
|
||||
|
||||
def format_heading(self, heading):
|
||||
from calibre.utils.terminal import colored
|
||||
return "%*s%s:\n" % (self.current_indent, '',
|
||||
colored(heading, fg='blue', bold=True))
|
||||
|
||||
def format_option(self, option):
|
||||
import textwrap
|
||||
from calibre.utils.terminal import colored
|
||||
|
||||
result = []
|
||||
opts = self.option_strings[option]
|
||||
opt_width = self.help_position - self.current_indent - 2
|
||||
if len(opts) > opt_width:
|
||||
opts = "%*s%s\n" % (self.current_indent, "",
|
||||
colored(opts, fg='green'))
|
||||
indent_first = self.help_position
|
||||
else: # start help on same line as opts
|
||||
opts = "%*s%-*s " % (self.current_indent, "", opt_width +
|
||||
len(colored('', fg='green')), colored(opts, fg='green'))
|
||||
indent_first = 0
|
||||
result.append(opts)
|
||||
if option.help:
|
||||
help_text = self.expand_default(option).split('\n')
|
||||
help_lines = []
|
||||
|
||||
for line in help_text:
|
||||
help_lines.extend(textwrap.wrap(line, self.help_width))
|
||||
result.append("%*s%s\n" % (indent_first, "", help_lines[0]))
|
||||
result.extend(["%*s%s\n" % (self.help_position, "", line)
|
||||
for line in help_lines[1:]])
|
||||
elif opts[-1] != "\n":
|
||||
result.append("\n")
|
||||
return "".join(result)+'\n'
|
||||
|
||||
|
||||
class OptionParser(optparse.OptionParser):
|
||||
|
||||
def __init__(self,
|
||||
usage='%prog [options] filename',
|
||||
version=None,
|
||||
epilog=None,
|
||||
gui_mode=False,
|
||||
conflict_handler='resolve',
|
||||
**kwds):
|
||||
import textwrap
|
||||
from calibre.utils.terminal import colored
|
||||
|
||||
usage = textwrap.dedent(usage)
|
||||
if epilog is None:
|
||||
epilog = _('Created by ')+colored(__author__, fg='cyan')
|
||||
usage += '\n\n'+_('''Whenever you pass arguments to %prog that have spaces in them, '''
|
||||
'''enclose the arguments in quotation marks. For example: "{}"''').format(
|
||||
"C:\\some path with spaces" if iswindows else '/some path/with spaces') +'\n'
|
||||
if version is None:
|
||||
version = '%%prog (%s %s)'%(__appname__, get_version())
|
||||
optparse.OptionParser.__init__(self, usage=usage, version=version, epilog=epilog,
|
||||
formatter=CustomHelpFormatter(),
|
||||
conflict_handler=conflict_handler, **kwds)
|
||||
self.gui_mode = gui_mode
|
||||
if False:
|
||||
# Translatable string from optparse
|
||||
_("Options")
|
||||
_("show this help message and exit")
|
||||
_("show program's version number and exit")
|
||||
|
||||
def print_usage(self, file=None):
|
||||
from calibre.utils.terminal import ANSIStream
|
||||
s = ANSIStream(file)
|
||||
optparse.OptionParser.print_usage(self, file=s)
|
||||
|
||||
def print_help(self, file=None):
|
||||
from calibre.utils.terminal import ANSIStream
|
||||
s = ANSIStream(file)
|
||||
optparse.OptionParser.print_help(self, file=s)
|
||||
|
||||
def print_version(self, file=None):
|
||||
from calibre.utils.terminal import ANSIStream
|
||||
s = ANSIStream(file)
|
||||
optparse.OptionParser.print_version(self, file=s)
|
||||
|
||||
def error(self, msg):
|
||||
if self.gui_mode:
|
||||
raise Exception(msg)
|
||||
optparse.OptionParser.error(self, msg)
|
||||
|
||||
def merge(self, parser):
|
||||
'''
|
||||
Add options from parser to self. In case of conflicts, conflicting options from
|
||||
parser are skipped.
|
||||
'''
|
||||
opts = list(parser.option_list)
|
||||
groups = list(parser.option_groups)
|
||||
|
||||
def merge_options(options, container):
|
||||
for opt in deepcopy(options):
|
||||
if not self.has_option(opt.get_opt_string()):
|
||||
container.add_option(opt)
|
||||
|
||||
merge_options(opts, self)
|
||||
|
||||
for group in groups:
|
||||
g = self.add_option_group(group.title)
|
||||
merge_options(group.option_list, g)
|
||||
|
||||
def subsume(self, group_name, msg=''):
|
||||
'''
|
||||
Move all existing options into a subgroup named
|
||||
C{group_name} with description C{msg}.
|
||||
'''
|
||||
opts = [opt for opt in self.options_iter() if opt.get_opt_string() not in ('--version', '--help')]
|
||||
self.option_groups = []
|
||||
subgroup = self.add_option_group(group_name, msg)
|
||||
for opt in opts:
|
||||
self.remove_option(opt.get_opt_string())
|
||||
subgroup.add_option(opt)
|
||||
|
||||
def options_iter(self):
|
||||
for opt in self.option_list:
|
||||
if native_string_type(opt).strip():
|
||||
yield opt
|
||||
for gr in self.option_groups:
|
||||
for opt in gr.option_list:
|
||||
if native_string_type(opt).strip():
|
||||
yield opt
|
||||
|
||||
def option_by_dest(self, dest):
|
||||
for opt in self.options_iter():
|
||||
if opt.dest == dest:
|
||||
return opt
|
||||
|
||||
def merge_options(self, lower, upper):
|
||||
'''
|
||||
Merge options in lower and upper option lists into upper.
|
||||
Default values in upper are overridden by
|
||||
non default values in lower.
|
||||
'''
|
||||
for dest in lower.__dict__.keys():
|
||||
if dest not in upper.__dict__:
|
||||
continue
|
||||
opt = self.option_by_dest(dest)
|
||||
if lower.__dict__[dest] != opt.default and \
|
||||
upper.__dict__[dest] == opt.default:
|
||||
upper.__dict__[dest] = lower.__dict__[dest]
|
||||
|
||||
def add_option_group(self, *args, **kwargs):
|
||||
if isinstance(args[0], string_or_bytes):
|
||||
args = list(args)
|
||||
args[0] = native_string_type(args[0])
|
||||
return optparse.OptionParser.add_option_group(self, *args, **kwargs)
|
||||
|
||||
|
||||
class DynamicConfig(dict):
|
||||
'''
|
||||
A replacement for QSettings that supports dynamic config keys.
|
||||
Returns `None` if a config key is not found. Note that the config
|
||||
data is stored in a JSON file.
|
||||
'''
|
||||
|
||||
def __init__(self, name='dynamic'):
|
||||
dict.__init__(self, {})
|
||||
self.name = name
|
||||
self.defaults = {}
|
||||
self.refresh()
|
||||
|
||||
@property
|
||||
def file_path(self):
|
||||
return os.path.join(config_dir, self.name+'.pickle.json')
|
||||
|
||||
def decouple(self, prefix):
|
||||
self.name = prefix + self.name
|
||||
self.refresh()
|
||||
|
||||
def read_old_serialized_representation(self):
|
||||
from calibre.utils.shared_file import share_open
|
||||
from calibre.utils.serialize import pickle_loads
|
||||
path = self.file_path.rpartition('.')[0]
|
||||
try:
|
||||
with share_open(path, 'rb') as f:
|
||||
raw = f.read()
|
||||
except EnvironmentError:
|
||||
raw = b''
|
||||
try:
|
||||
d = pickle_loads(raw).copy()
|
||||
except Exception:
|
||||
d = {}
|
||||
return d
|
||||
|
||||
def refresh(self, clear_current=True):
|
||||
d = {}
|
||||
migrate = False
|
||||
if clear_current:
|
||||
self.clear()
|
||||
if os.path.exists(self.file_path):
|
||||
with ExclusiveFile(self.file_path) as f:
|
||||
raw = f.read()
|
||||
if raw:
|
||||
try:
|
||||
d = json_loads(raw)
|
||||
except Exception as err:
|
||||
print('Failed to de-serialize JSON representation of stored dynamic data for {} with error: {}'.format(
|
||||
self.name, err))
|
||||
else:
|
||||
d = self.read_old_serialized_representation()
|
||||
migrate = bool(d)
|
||||
else:
|
||||
d = self.read_old_serialized_representation()
|
||||
migrate = bool(d)
|
||||
if migrate and d:
|
||||
raw = json_dumps(d, ignore_unserializable=True)
|
||||
with ExclusiveFile(self.file_path) as f:
|
||||
f.seek(0), f.truncate()
|
||||
f.write(raw)
|
||||
|
||||
self.update(d)
|
||||
|
||||
def __getitem__(self, key):
|
||||
try:
|
||||
return dict.__getitem__(self, key)
|
||||
except KeyError:
|
||||
return self.defaults.get(key, None)
|
||||
|
||||
def get(self, key, default=None):
|
||||
try:
|
||||
return dict.__getitem__(self, key)
|
||||
except KeyError:
|
||||
return self.defaults.get(key, default)
|
||||
|
||||
def __setitem__(self, key, val):
|
||||
dict.__setitem__(self, key, val)
|
||||
self.commit()
|
||||
|
||||
def set(self, key, val):
|
||||
self.__setitem__(key, val)
|
||||
|
||||
def commit(self):
|
||||
if not getattr(self, 'name', None):
|
||||
return
|
||||
if not os.path.exists(self.file_path):
|
||||
make_config_dir()
|
||||
raw = json_dumps(self)
|
||||
with ExclusiveFile(self.file_path) as f:
|
||||
f.seek(0)
|
||||
f.truncate()
|
||||
f.write(raw)
|
||||
|
||||
|
||||
dynamic = DynamicConfig()
|
||||
|
||||
|
||||
class XMLConfig(dict):
|
||||
|
||||
'''
|
||||
Similar to :class:`DynamicConfig`, except that it uses an XML storage
|
||||
backend instead of a pickle file.
|
||||
|
||||
See `https://docs.python.org/dev/library/plistlib.html`_ for the supported
|
||||
data types.
|
||||
'''
|
||||
|
||||
EXTENSION = '.plist'
|
||||
|
||||
def __init__(self, rel_path_to_cf_file, base_path=config_dir):
|
||||
dict.__init__(self)
|
||||
self.no_commit = False
|
||||
self.defaults = {}
|
||||
self.file_path = os.path.join(base_path,
|
||||
*(rel_path_to_cf_file.split('/')))
|
||||
self.file_path = os.path.abspath(self.file_path)
|
||||
if not self.file_path.endswith(self.EXTENSION):
|
||||
self.file_path += self.EXTENSION
|
||||
|
||||
self.refresh()
|
||||
|
||||
def mtime(self):
|
||||
try:
|
||||
return os.path.getmtime(self.file_path)
|
||||
except EnvironmentError:
|
||||
return 0
|
||||
|
||||
def touch(self):
|
||||
try:
|
||||
os.utime(self.file_path, None)
|
||||
except EnvironmentError:
|
||||
pass
|
||||
|
||||
def raw_to_object(self, raw):
|
||||
from polyglot.plistlib import loads
|
||||
return loads(raw)
|
||||
|
||||
def to_raw(self):
|
||||
from polyglot.plistlib import dumps
|
||||
return dumps(self)
|
||||
|
||||
def decouple(self, prefix):
|
||||
self.file_path = os.path.join(os.path.dirname(self.file_path), prefix + os.path.basename(self.file_path))
|
||||
self.refresh()
|
||||
|
||||
def refresh(self, clear_current=True):
|
||||
d = {}
|
||||
if os.path.exists(self.file_path):
|
||||
with ExclusiveFile(self.file_path) as f:
|
||||
raw = f.read()
|
||||
try:
|
||||
d = self.raw_to_object(raw) if raw.strip() else {}
|
||||
except SystemError:
|
||||
pass
|
||||
except:
|
||||
import traceback
|
||||
traceback.print_exc()
|
||||
d = {}
|
||||
if clear_current:
|
||||
self.clear()
|
||||
self.update(d)
|
||||
|
||||
def __getitem__(self, key):
|
||||
from polyglot.plistlib import Data
|
||||
try:
|
||||
ans = dict.__getitem__(self, key)
|
||||
if isinstance(ans, Data):
|
||||
ans = ans.data
|
||||
return ans
|
||||
except KeyError:
|
||||
return self.defaults.get(key, None)
|
||||
|
||||
def get(self, key, default=None):
|
||||
from polyglot.plistlib import Data
|
||||
try:
|
||||
ans = dict.__getitem__(self, key)
|
||||
if isinstance(ans, Data):
|
||||
ans = ans.data
|
||||
return ans
|
||||
except KeyError:
|
||||
return self.defaults.get(key, default)
|
||||
|
||||
def __setitem__(self, key, val):
|
||||
from polyglot.plistlib import Data
|
||||
if isinstance(val, bytes):
|
||||
val = Data(val)
|
||||
dict.__setitem__(self, key, val)
|
||||
self.commit()
|
||||
|
||||
def set(self, key, val):
|
||||
self.__setitem__(key, val)
|
||||
|
||||
def __delitem__(self, key):
|
||||
try:
|
||||
dict.__delitem__(self, key)
|
||||
except KeyError:
|
||||
pass # ignore missing keys
|
||||
else:
|
||||
self.commit()
|
||||
|
||||
def commit(self):
|
||||
if self.no_commit:
|
||||
return
|
||||
if hasattr(self, 'file_path') and self.file_path:
|
||||
dpath = os.path.dirname(self.file_path)
|
||||
if not os.path.exists(dpath):
|
||||
os.makedirs(dpath, mode=CONFIG_DIR_MODE)
|
||||
with ExclusiveFile(self.file_path) as f:
|
||||
raw = self.to_raw()
|
||||
f.seek(0)
|
||||
f.truncate()
|
||||
f.write(raw)
|
||||
|
||||
def __enter__(self):
|
||||
self.no_commit = True
|
||||
|
||||
def __exit__(self, *args):
|
||||
self.no_commit = False
|
||||
self.commit()
|
||||
|
||||
|
||||
class JSONConfig(XMLConfig):
|
||||
|
||||
EXTENSION = '.json'
|
||||
|
||||
def raw_to_object(self, raw):
|
||||
return json_loads(raw)
|
||||
|
||||
def to_raw(self):
|
||||
return json_dumps(self)
|
||||
|
||||
def __getitem__(self, key):
|
||||
try:
|
||||
return dict.__getitem__(self, key)
|
||||
except KeyError:
|
||||
return self.defaults[key]
|
||||
|
||||
def get(self, key, default=None):
|
||||
try:
|
||||
return dict.__getitem__(self, key)
|
||||
except KeyError:
|
||||
return self.defaults.get(key, default)
|
||||
|
||||
def __setitem__(self, key, val):
|
||||
dict.__setitem__(self, key, val)
|
||||
self.commit()
|
||||
|
||||
|
||||
class DevicePrefs:
|
||||
|
||||
def __init__(self, global_prefs):
|
||||
self.global_prefs = global_prefs
|
||||
self.overrides = {}
|
||||
|
||||
def set_overrides(self, **kwargs):
|
||||
self.overrides = kwargs.copy()
|
||||
|
||||
def __getitem__(self, key):
|
||||
return self.overrides.get(key, self.global_prefs[key])
|
||||
|
||||
|
||||
device_prefs = DevicePrefs(prefs)
|
||||
674
ebook_converter/utils/config_base.py
Normal file
674
ebook_converter/utils/config_base.py
Normal file
@@ -0,0 +1,674 @@
|
||||
#!/usr/bin/env python2
|
||||
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
|
||||
|
||||
from __future__ import absolute_import, division, print_function, unicode_literals
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2011, Kovid Goyal <kovid@kovidgoyal.net>'
|
||||
__docformat__ = 'restructuredtext en'
|
||||
|
||||
import os, re, traceback, numbers
|
||||
from functools import partial
|
||||
from collections import defaultdict
|
||||
from copy import deepcopy
|
||||
|
||||
from calibre.utils.lock import ExclusiveFile
|
||||
from calibre.constants import config_dir, CONFIG_DIR_MODE, ispy3, preferred_encoding, filesystem_encoding, iswindows
|
||||
from polyglot.builtins import unicode_type, iteritems, map
|
||||
|
||||
plugin_dir = os.path.join(config_dir, 'plugins')
|
||||
|
||||
|
||||
def parse_old_style(src):
|
||||
if ispy3:
|
||||
import pickle as cPickle
|
||||
else:
|
||||
import cPickle
|
||||
options = {'cPickle':cPickle}
|
||||
try:
|
||||
if not isinstance(src, unicode_type):
|
||||
src = src.decode('utf-8')
|
||||
src = src.replace('PyQt%d.QtCore' % 4, 'PyQt5.QtCore')
|
||||
src = re.sub(r'cPickle\.loads\(([\'"])', r'cPickle.loads(b\1', src)
|
||||
exec(src, options)
|
||||
except Exception as err:
|
||||
try:
|
||||
print('Failed to parse old style options string with error: {}'.format(err))
|
||||
except Exception:
|
||||
pass
|
||||
return options
|
||||
|
||||
|
||||
def to_json(obj):
|
||||
import datetime
|
||||
if isinstance(obj, bytearray):
|
||||
from base64 import standard_b64encode
|
||||
return {'__class__': 'bytearray',
|
||||
'__value__': standard_b64encode(bytes(obj)).decode('ascii')}
|
||||
if isinstance(obj, datetime.datetime):
|
||||
from calibre.utils.date import isoformat
|
||||
return {'__class__': 'datetime.datetime',
|
||||
'__value__': isoformat(obj, as_utc=True)}
|
||||
if isinstance(obj, (set, frozenset)):
|
||||
return {'__class__': 'set', '__value__': tuple(obj)}
|
||||
if isinstance(obj, bytes):
|
||||
return obj.decode('utf-8')
|
||||
if hasattr(obj, 'toBase64'): # QByteArray
|
||||
return {'__class__': 'bytearray',
|
||||
'__value__': bytes(obj.toBase64()).decode('ascii')}
|
||||
raise TypeError(repr(obj) + ' is not JSON serializable')
|
||||
|
||||
|
||||
def safe_to_json(obj):
|
||||
try:
|
||||
return to_json(obj)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
|
||||
def from_json(obj):
|
||||
custom = obj.get('__class__')
|
||||
if custom is not None:
|
||||
if custom == 'bytearray':
|
||||
from base64 import standard_b64decode
|
||||
return bytearray(standard_b64decode(obj['__value__'].encode('ascii')))
|
||||
if custom == 'datetime.datetime':
|
||||
from calibre.utils.iso8601 import parse_iso8601
|
||||
return parse_iso8601(obj['__value__'], assume_utc=True)
|
||||
if custom == 'set':
|
||||
return set(obj['__value__'])
|
||||
return obj
|
||||
|
||||
|
||||
def force_unicode(x):
|
||||
try:
|
||||
return x.decode('mbcs' if iswindows else preferred_encoding)
|
||||
except UnicodeDecodeError:
|
||||
try:
|
||||
return x.decode(filesystem_encoding)
|
||||
except UnicodeDecodeError:
|
||||
return x.decode('utf-8', 'replace')
|
||||
|
||||
|
||||
def force_unicode_recursive(obj):
|
||||
if isinstance(obj, bytes):
|
||||
return force_unicode(obj)
|
||||
if isinstance(obj, (list, tuple)):
|
||||
return type(obj)(map(force_unicode_recursive, obj))
|
||||
if isinstance(obj, dict):
|
||||
return {force_unicode_recursive(k): force_unicode_recursive(v) for k, v in iteritems(obj)}
|
||||
return obj
|
||||
|
||||
|
||||
def json_dumps(obj, ignore_unserializable=False):
|
||||
import json
|
||||
try:
|
||||
ans = json.dumps(obj, indent=2, default=safe_to_json if ignore_unserializable else to_json, sort_keys=True, ensure_ascii=False)
|
||||
except UnicodeDecodeError:
|
||||
obj = force_unicode_recursive(obj)
|
||||
ans = json.dumps(obj, indent=2, default=safe_to_json if ignore_unserializable else to_json, sort_keys=True, ensure_ascii=False)
|
||||
if not isinstance(ans, bytes):
|
||||
ans = ans.encode('utf-8')
|
||||
return ans
|
||||
|
||||
|
||||
def json_loads(raw):
|
||||
import json
|
||||
if isinstance(raw, bytes):
|
||||
raw = raw.decode('utf-8')
|
||||
return json.loads(raw, object_hook=from_json)
|
||||
|
||||
|
||||
def make_config_dir():
|
||||
if not os.path.exists(plugin_dir):
|
||||
os.makedirs(plugin_dir, mode=CONFIG_DIR_MODE)
|
||||
|
||||
|
||||
class Option(object):
|
||||
|
||||
def __init__(self, name, switches=[], help='', type=None, choices=None,
|
||||
check=None, group=None, default=None, action=None, metavar=None):
|
||||
if choices:
|
||||
type = 'choice'
|
||||
|
||||
self.name = name
|
||||
self.switches = switches
|
||||
self.help = help.replace('%default', repr(default)) if help else None
|
||||
self.type = type
|
||||
if self.type is None and action is None and choices is None:
|
||||
if isinstance(default, float):
|
||||
self.type = 'float'
|
||||
elif isinstance(default, numbers.Integral) and not isinstance(default, bool):
|
||||
self.type = 'int'
|
||||
|
||||
self.choices = choices
|
||||
self.check = check
|
||||
self.group = group
|
||||
self.default = default
|
||||
self.action = action
|
||||
self.metavar = metavar
|
||||
|
||||
def __eq__(self, other):
|
||||
return self.name == getattr(other, 'name', other)
|
||||
|
||||
def __repr__(self):
|
||||
return 'Option: '+self.name
|
||||
|
||||
def __str__(self):
|
||||
return repr(self)
|
||||
|
||||
|
||||
class OptionValues(object):
|
||||
|
||||
def copy(self):
|
||||
return deepcopy(self)
|
||||
|
||||
|
||||
class OptionSet(object):
|
||||
|
||||
OVERRIDE_PAT = re.compile(r'#{3,100} Override Options #{15}(.*?)#{3,100} End Override #{3,100}',
|
||||
re.DOTALL|re.IGNORECASE)
|
||||
|
||||
def __init__(self, description=''):
|
||||
self.description = description
|
||||
self.defaults = {}
|
||||
self.preferences = []
|
||||
self.group_list = []
|
||||
self.groups = {}
|
||||
self.set_buffer = {}
|
||||
self.loads_pat = None
|
||||
|
||||
def has_option(self, name_or_option_object):
|
||||
if name_or_option_object in self.preferences:
|
||||
return True
|
||||
for p in self.preferences:
|
||||
if p.name == name_or_option_object:
|
||||
return True
|
||||
return False
|
||||
|
||||
def get_option(self, name_or_option_object):
|
||||
idx = self.preferences.index(name_or_option_object)
|
||||
if idx > -1:
|
||||
return self.preferences[idx]
|
||||
for p in self.preferences:
|
||||
if p.name == name_or_option_object:
|
||||
return p
|
||||
|
||||
def add_group(self, name, description=''):
|
||||
if name in self.group_list:
|
||||
raise ValueError('A group by the name %s already exists in this set'%name)
|
||||
self.groups[name] = description
|
||||
self.group_list.append(name)
|
||||
return partial(self.add_opt, group=name)
|
||||
|
||||
def update(self, other):
|
||||
for name in other.groups.keys():
|
||||
self.groups[name] = other.groups[name]
|
||||
if name not in self.group_list:
|
||||
self.group_list.append(name)
|
||||
for pref in other.preferences:
|
||||
if pref in self.preferences:
|
||||
self.preferences.remove(pref)
|
||||
self.preferences.append(pref)
|
||||
|
||||
def smart_update(self, opts1, opts2):
|
||||
'''
|
||||
Updates the preference values in opts1 using only the non-default preference values in opts2.
|
||||
'''
|
||||
for pref in self.preferences:
|
||||
new = getattr(opts2, pref.name, pref.default)
|
||||
if new != pref.default:
|
||||
setattr(opts1, pref.name, new)
|
||||
|
||||
def remove_opt(self, name):
|
||||
if name in self.preferences:
|
||||
self.preferences.remove(name)
|
||||
|
||||
def add_opt(self, name, switches=[], help=None, type=None, choices=None,
|
||||
group=None, default=None, action=None, metavar=None):
|
||||
'''
|
||||
Add an option to this section.
|
||||
|
||||
:param name: The name of this option. Must be a valid Python identifier.
|
||||
Must also be unique in this OptionSet and all its subsets.
|
||||
:param switches: List of command line switches for this option
|
||||
(as supplied to :module:`optparse`). If empty, this
|
||||
option will not be added to the command line parser.
|
||||
:param help: Help text.
|
||||
:param type: Type checking of option values. Supported types are:
|
||||
`None, 'choice', 'complex', 'float', 'int', 'string'`.
|
||||
:param choices: List of strings or `None`.
|
||||
:param group: Group this option belongs to. You must previously
|
||||
have created this group with a call to :method:`add_group`.
|
||||
:param default: The default value for this option.
|
||||
:param action: The action to pass to optparse. Supported values are:
|
||||
`None, 'count'`. For choices and boolean options,
|
||||
action is automatically set correctly.
|
||||
'''
|
||||
pref = Option(name, switches=switches, help=help, type=type, choices=choices,
|
||||
group=group, default=default, action=action, metavar=None)
|
||||
if group is not None and group not in self.groups.keys():
|
||||
raise ValueError('Group %s has not been added to this section'%group)
|
||||
if pref in self.preferences:
|
||||
raise ValueError('An option with the name %s already exists in this set.'%name)
|
||||
self.preferences.append(pref)
|
||||
self.defaults[name] = default
|
||||
|
||||
def retranslate_help(self):
|
||||
t = _
|
||||
for opt in self.preferences:
|
||||
if opt.help:
|
||||
opt.help = t(opt.help)
|
||||
if opt.name == 'use_primary_find_in_search':
|
||||
opt.help = opt.help.format(u'ñ')
|
||||
|
||||
def option_parser(self, user_defaults=None, usage='', gui_mode=False):
|
||||
from calibre.utils.config import OptionParser
|
||||
parser = OptionParser(usage, gui_mode=gui_mode)
|
||||
groups = defaultdict(lambda : parser)
|
||||
for group, desc in self.groups.items():
|
||||
groups[group] = parser.add_option_group(group.upper(), desc)
|
||||
|
||||
for pref in self.preferences:
|
||||
if not pref.switches:
|
||||
continue
|
||||
g = groups[pref.group]
|
||||
action = pref.action
|
||||
if action is None:
|
||||
action = 'store'
|
||||
if pref.default is True or pref.default is False:
|
||||
action = 'store_' + ('false' if pref.default else 'true')
|
||||
args = dict(
|
||||
dest=pref.name,
|
||||
help=pref.help,
|
||||
metavar=pref.metavar,
|
||||
type=pref.type,
|
||||
choices=pref.choices,
|
||||
default=getattr(user_defaults, pref.name, pref.default),
|
||||
action=action,
|
||||
)
|
||||
g.add_option(*pref.switches, **args)
|
||||
|
||||
return parser
|
||||
|
||||
def get_override_section(self, src):
|
||||
match = self.OVERRIDE_PAT.search(src)
|
||||
if match:
|
||||
return match.group()
|
||||
return ''
|
||||
|
||||
def parse_string(self, src):
|
||||
options = {}
|
||||
if src:
|
||||
is_old_style = (isinstance(src, bytes) and src.startswith(b'#')) or (isinstance(src, unicode_type) and src.startswith(u'#'))
|
||||
if is_old_style:
|
||||
options = parse_old_style(src)
|
||||
else:
|
||||
try:
|
||||
options = json_loads(src)
|
||||
if not isinstance(options, dict):
|
||||
raise Exception('options is not a dictionary')
|
||||
except Exception as err:
|
||||
try:
|
||||
print('Failed to parse options string with error: {}'.format(err))
|
||||
except Exception:
|
||||
pass
|
||||
opts = OptionValues()
|
||||
for pref in self.preferences:
|
||||
val = options.get(pref.name, pref.default)
|
||||
formatter = __builtins__.get(pref.type, None)
|
||||
if callable(formatter):
|
||||
val = formatter(val)
|
||||
setattr(opts, pref.name, val)
|
||||
|
||||
return opts
|
||||
|
||||
def serialize(self, opts, ignore_unserializable=False):
|
||||
data = {pref.name: getattr(opts, pref.name, pref.default) for pref in self.preferences}
|
||||
return json_dumps(data, ignore_unserializable=ignore_unserializable)
|
||||
|
||||
|
||||
class ConfigInterface(object):
|
||||
|
||||
def __init__(self, description):
|
||||
self.option_set = OptionSet(description=description)
|
||||
self.add_opt = self.option_set.add_opt
|
||||
self.add_group = self.option_set.add_group
|
||||
self.remove_opt = self.remove = self.option_set.remove_opt
|
||||
self.parse_string = self.option_set.parse_string
|
||||
self.get_option = self.option_set.get_option
|
||||
self.preferences = self.option_set.preferences
|
||||
|
||||
def update(self, other):
|
||||
self.option_set.update(other.option_set)
|
||||
|
||||
def option_parser(self, usage='', gui_mode=False):
|
||||
return self.option_set.option_parser(user_defaults=self.parse(),
|
||||
usage=usage, gui_mode=gui_mode)
|
||||
|
||||
def smart_update(self, opts1, opts2):
|
||||
self.option_set.smart_update(opts1, opts2)
|
||||
|
||||
|
||||
class Config(ConfigInterface):
|
||||
'''
|
||||
A file based configuration.
|
||||
'''
|
||||
|
||||
def __init__(self, basename, description=''):
|
||||
ConfigInterface.__init__(self, description)
|
||||
self.filename_base = basename
|
||||
|
||||
@property
|
||||
def config_file_path(self):
|
||||
return os.path.join(config_dir, self.filename_base + '.py.json')
|
||||
|
||||
def parse(self):
|
||||
src = ''
|
||||
migrate = False
|
||||
path = self.config_file_path
|
||||
if os.path.exists(path):
|
||||
with ExclusiveFile(path) as f:
|
||||
try:
|
||||
src = f.read().decode('utf-8')
|
||||
except ValueError:
|
||||
print("Failed to parse", path)
|
||||
traceback.print_exc()
|
||||
if not src:
|
||||
path = path.rpartition('.')[0]
|
||||
from calibre.utils.shared_file import share_open
|
||||
try:
|
||||
with share_open(path, 'rb') as f:
|
||||
src = f.read().decode('utf-8')
|
||||
except Exception:
|
||||
pass
|
||||
else:
|
||||
migrate = bool(src)
|
||||
ans = self.option_set.parse_string(src)
|
||||
if migrate:
|
||||
new_src = self.option_set.serialize(ans, ignore_unserializable=True)
|
||||
with ExclusiveFile(self.config_file_path) as f:
|
||||
f.seek(0), f.truncate()
|
||||
f.write(new_src)
|
||||
return ans
|
||||
|
||||
def set(self, name, val):
|
||||
if not self.option_set.has_option(name):
|
||||
raise ValueError('The option %s is not defined.'%name)
|
||||
if not os.path.exists(config_dir):
|
||||
make_config_dir()
|
||||
with ExclusiveFile(self.config_file_path) as f:
|
||||
src = f.read()
|
||||
opts = self.option_set.parse_string(src)
|
||||
setattr(opts, name, val)
|
||||
src = self.option_set.serialize(opts)
|
||||
f.seek(0)
|
||||
f.truncate()
|
||||
if isinstance(src, unicode_type):
|
||||
src = src.encode('utf-8')
|
||||
f.write(src)
|
||||
|
||||
|
||||
class StringConfig(ConfigInterface):
|
||||
'''
|
||||
A string based configuration
|
||||
'''
|
||||
|
||||
def __init__(self, src, description=''):
|
||||
ConfigInterface.__init__(self, description)
|
||||
self.set_src(src)
|
||||
|
||||
def set_src(self, src):
|
||||
self.src = src
|
||||
if isinstance(self.src, bytes):
|
||||
self.src = self.src.decode('utf-8')
|
||||
|
||||
def parse(self):
|
||||
return self.option_set.parse_string(self.src)
|
||||
|
||||
def set(self, name, val):
|
||||
if not self.option_set.has_option(name):
|
||||
raise ValueError('The option %s is not defined.'%name)
|
||||
opts = self.option_set.parse_string(self.src)
|
||||
setattr(opts, name, val)
|
||||
self.set_src(self.option_set.serialize(opts))
|
||||
|
||||
|
||||
class ConfigProxy(object):
|
||||
'''
|
||||
A Proxy to minimize file reads for widely used config settings
|
||||
'''
|
||||
|
||||
def __init__(self, config):
|
||||
self.__config = config
|
||||
self.__opts = None
|
||||
|
||||
@property
|
||||
def defaults(self):
|
||||
return self.__config.option_set.defaults
|
||||
|
||||
def refresh(self):
|
||||
self.__opts = self.__config.parse()
|
||||
|
||||
def retranslate_help(self):
|
||||
self.__config.option_set.retranslate_help()
|
||||
|
||||
def __getitem__(self, key):
|
||||
return self.get(key)
|
||||
|
||||
def __setitem__(self, key, val):
|
||||
return self.set(key, val)
|
||||
|
||||
def __delitem__(self, key):
|
||||
self.set(key, self.defaults[key])
|
||||
|
||||
def get(self, key):
|
||||
if self.__opts is None:
|
||||
self.refresh()
|
||||
return getattr(self.__opts, key)
|
||||
|
||||
def set(self, key, val):
|
||||
if self.__opts is None:
|
||||
self.refresh()
|
||||
setattr(self.__opts, key, val)
|
||||
return self.__config.set(key, val)
|
||||
|
||||
def help(self, key):
|
||||
return self.__config.get_option(key).help
|
||||
|
||||
|
||||
def create_global_prefs(conf_obj=None):
|
||||
c = Config('global', 'calibre wide preferences') if conf_obj is None else conf_obj
|
||||
c.add_opt('database_path',
|
||||
default=os.path.expanduser('~/library1.db'),
|
||||
help=_('Path to the database in which books are stored'))
|
||||
c.add_opt('filename_pattern', default=u'(?P<title>.+) - (?P<author>[^_]+)',
|
||||
help=_('Pattern to guess metadata from filenames'))
|
||||
c.add_opt('isbndb_com_key', default='',
|
||||
help=_('Access key for isbndb.com'))
|
||||
c.add_opt('network_timeout', default=5,
|
||||
help=_('Default timeout for network operations (seconds)'))
|
||||
c.add_opt('library_path', default=None,
|
||||
help=_('Path to directory in which your library of books is stored'))
|
||||
c.add_opt('language', default=None,
|
||||
help=_('The language in which to display the user interface'))
|
||||
c.add_opt('output_format', default='EPUB',
|
||||
help=_('The default output format for e-book conversions. When auto-converting'
|
||||
' to send to a device this can be overridden by individual device preferences.'
|
||||
' These can be changed by right clicking the device icon in calibre and'
|
||||
' choosing "Configure".'))
|
||||
c.add_opt('input_format_order', default=['EPUB', 'AZW3', 'MOBI', 'LIT', 'PRC',
|
||||
'FB2', 'HTML', 'HTM', 'XHTM', 'SHTML', 'XHTML', 'ZIP', 'DOCX', 'ODT', 'RTF', 'PDF',
|
||||
'TXT'],
|
||||
help=_('Ordered list of formats to prefer for input.'))
|
||||
c.add_opt('read_file_metadata', default=True,
|
||||
help=_('Read metadata from files'))
|
||||
c.add_opt('worker_process_priority', default='normal',
|
||||
help=_('The priority of worker processes. A higher priority '
|
||||
'means they run faster and consume more resources. '
|
||||
'Most tasks like conversion/news download/adding books/etc. '
|
||||
'are affected by this setting.'))
|
||||
c.add_opt('swap_author_names', default=False,
|
||||
help=_('Swap author first and last names when reading metadata'))
|
||||
c.add_opt('add_formats_to_existing', default=False,
|
||||
help=_('Add new formats to existing book records'))
|
||||
c.add_opt('check_for_dupes_on_ctl', default=False,
|
||||
help=_('Check for duplicates when copying to another library'))
|
||||
c.add_opt('installation_uuid', default=None, help='Installation UUID')
|
||||
c.add_opt('new_book_tags', default=[], help=_('Tags to apply to books added to the library'))
|
||||
c.add_opt('mark_new_books', default=False, help=_(
|
||||
'Mark newly added books. The mark is a temporary mark that is automatically removed when calibre is restarted.'))
|
||||
|
||||
# these are here instead of the gui preferences because calibredb and
|
||||
# calibre server can execute searches
|
||||
c.add_opt('saved_searches', default={}, help=_('List of named saved searches'))
|
||||
c.add_opt('user_categories', default={}, help=_('User-created Tag browser categories'))
|
||||
c.add_opt('manage_device_metadata', default='manual',
|
||||
help=_('How and when calibre updates metadata on the device.'))
|
||||
c.add_opt('limit_search_columns', default=False,
|
||||
help=_('When searching for text without using lookup '
|
||||
'prefixes, as for example, Red instead of title:Red, '
|
||||
'limit the columns searched to those named below.'))
|
||||
c.add_opt('limit_search_columns_to',
|
||||
default=['title', 'authors', 'tags', 'series', 'publisher'],
|
||||
help=_('Choose columns to be searched when not using prefixes, '
|
||||
'as for example, when searching for Red instead of '
|
||||
'title:Red. Enter a list of search/lookup names '
|
||||
'separated by commas. Only takes effect if you set the option '
|
||||
'to limit search columns above.'))
|
||||
c.add_opt('use_primary_find_in_search', default=True,
|
||||
help=_(u'Characters typed in the search box will match their '
|
||||
'accented versions, based on the language you have chosen '
|
||||
'for the calibre interface. For example, in '
|
||||
u'English, searching for n will match both {} and n, but if '
|
||||
'your language is Spanish it will only match n. Note that '
|
||||
'this is much slower than a simple search on very large '
|
||||
'libraries. Also, this option will have no effect if you turn '
|
||||
'on case-sensitive searching'))
|
||||
c.add_opt('case_sensitive', default=False, help=_(
|
||||
'Make searches case-sensitive'))
|
||||
|
||||
c.add_opt('migrated', default=False, help='For Internal use. Don\'t modify.')
|
||||
return c
|
||||
|
||||
|
||||
prefs = ConfigProxy(create_global_prefs())
|
||||
if prefs['installation_uuid'] is None:
|
||||
import uuid
|
||||
prefs['installation_uuid'] = unicode_type(uuid.uuid4())
|
||||
|
||||
# Read tweaks
|
||||
|
||||
|
||||
def tweaks_file():
|
||||
return os.path.join(config_dir, 'tweaks.json')
|
||||
|
||||
|
||||
def make_unicode(obj):
|
||||
if isinstance(obj, bytes):
|
||||
try:
|
||||
return obj.decode('utf-8')
|
||||
except UnicodeDecodeError:
|
||||
return obj.decode(preferred_encoding, errors='replace')
|
||||
if isinstance(obj, (list, tuple)):
|
||||
return list(map(make_unicode, obj))
|
||||
if isinstance(obj, dict):
|
||||
return {make_unicode(k): make_unicode(v) for k, v in iteritems(obj)}
|
||||
return obj
|
||||
|
||||
|
||||
def normalize_tweak(val):
|
||||
if isinstance(val, (list, tuple)):
|
||||
return tuple(map(normalize_tweak, val))
|
||||
if isinstance(val, dict):
|
||||
return {k: normalize_tweak(v) for k, v in iteritems(val)}
|
||||
return val
|
||||
|
||||
|
||||
def write_custom_tweaks(tweaks_dict):
|
||||
make_config_dir()
|
||||
tweaks_dict = make_unicode(tweaks_dict)
|
||||
changed_tweaks = {}
|
||||
default_tweaks = exec_tweaks(default_tweaks_raw())
|
||||
for key, cval in iteritems(tweaks_dict):
|
||||
if key in default_tweaks and normalize_tweak(cval) == normalize_tweak(default_tweaks[key]):
|
||||
continue
|
||||
changed_tweaks[key] = cval
|
||||
raw = json_dumps(changed_tweaks)
|
||||
with open(tweaks_file(), 'wb') as f:
|
||||
f.write(raw)
|
||||
|
||||
|
||||
def exec_tweaks(path):
|
||||
if isinstance(path, bytes):
|
||||
raw = path
|
||||
fname = '<string>'
|
||||
else:
|
||||
with open(path, 'rb') as f:
|
||||
raw = f.read()
|
||||
fname = f.name
|
||||
code = compile(raw, fname, 'exec')
|
||||
l = {}
|
||||
g = {'__file__': fname}
|
||||
exec(code, g, l)
|
||||
return l
|
||||
|
||||
|
||||
def read_custom_tweaks():
|
||||
make_config_dir()
|
||||
tf = tweaks_file()
|
||||
ans = {}
|
||||
if os.path.exists(tf):
|
||||
with open(tf, 'rb') as f:
|
||||
raw = f.read()
|
||||
raw = raw.strip()
|
||||
if not raw:
|
||||
return ans
|
||||
try:
|
||||
return json_loads(raw)
|
||||
except Exception:
|
||||
import traceback
|
||||
traceback.print_exc()
|
||||
return ans
|
||||
old_tweaks_file = tf.rpartition('.')[0] + '.py'
|
||||
if os.path.exists(old_tweaks_file):
|
||||
ans = exec_tweaks(old_tweaks_file)
|
||||
ans = make_unicode(ans)
|
||||
write_custom_tweaks(ans)
|
||||
return ans
|
||||
|
||||
|
||||
def default_tweaks_raw():
|
||||
return P('default_tweaks.py', data=True, allow_user_override=False)
|
||||
|
||||
|
||||
def read_tweaks():
|
||||
default_tweaks = exec_tweaks(default_tweaks_raw())
|
||||
try:
|
||||
custom_tweaks = read_custom_tweaks()
|
||||
except Exception:
|
||||
custom_tweaks = {}
|
||||
default_tweaks.update(custom_tweaks)
|
||||
return default_tweaks
|
||||
|
||||
|
||||
tweaks = read_tweaks()
|
||||
|
||||
|
||||
def reset_tweaks_to_default():
|
||||
default_tweaks = exec_tweaks(default_tweaks_raw())
|
||||
tweaks.clear()
|
||||
tweaks.update(default_tweaks)
|
||||
|
||||
|
||||
class Tweak(object):
|
||||
|
||||
def __init__(self, name, value):
|
||||
self.name, self.value = name, value
|
||||
|
||||
def __enter__(self):
|
||||
self.origval = tweaks[self.name]
|
||||
tweaks[self.name] = self.value
|
||||
|
||||
def __exit__(self, *args):
|
||||
tweaks[self.name] = self.origval
|
||||
485
ebook_converter/utils/date.py
Normal file
485
ebook_converter/utils/date.py
Normal file
@@ -0,0 +1,485 @@
|
||||
#!/usr/bin/env python2
|
||||
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
|
||||
from __future__ import absolute_import, division, print_function, unicode_literals
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2010, Kovid Goyal <kovid@kovidgoyal.net>'
|
||||
__docformat__ = 'restructuredtext en'
|
||||
|
||||
import re
|
||||
from datetime import datetime, time as dtime, timedelta, MINYEAR, MAXYEAR
|
||||
from functools import partial
|
||||
|
||||
from calibre import strftime
|
||||
from calibre.constants import iswindows, isosx, plugins, preferred_encoding
|
||||
from calibre.utils.iso8601 import utc_tz, local_tz, UNDEFINED_DATE
|
||||
from calibre.utils.localization import lcdata
|
||||
from polyglot.builtins import unicode_type, native_string_type
|
||||
|
||||
_utc_tz = utc_tz
|
||||
_local_tz = local_tz
|
||||
|
||||
# When parsing ambiguous dates that could be either dd-MM Or MM-dd use the
|
||||
# user's locale preferences
|
||||
if iswindows:
|
||||
import ctypes
|
||||
LOCALE_SSHORTDATE, LOCALE_USER_DEFAULT = 0x1f, 0
|
||||
buf = ctypes.create_string_buffer(b'\0', 255)
|
||||
try:
|
||||
ctypes.windll.kernel32.GetLocaleInfoA(LOCALE_USER_DEFAULT, LOCALE_SSHORTDATE, buf, 255)
|
||||
parse_date_day_first = buf.value.index(b'd') < buf.value.index(b'M')
|
||||
except:
|
||||
parse_date_day_first = False
|
||||
del ctypes, LOCALE_SSHORTDATE, buf, LOCALE_USER_DEFAULT
|
||||
elif isosx:
|
||||
try:
|
||||
date_fmt = plugins['usbobserver'][0].date_format()
|
||||
parse_date_day_first = date_fmt.index('d') < date_fmt.index('M')
|
||||
except:
|
||||
parse_date_day_first = False
|
||||
else:
|
||||
try:
|
||||
def first_index(raw, queries):
|
||||
for q in queries:
|
||||
try:
|
||||
return raw.index(native_string_type(q))
|
||||
except ValueError:
|
||||
pass
|
||||
return -1
|
||||
|
||||
import locale
|
||||
raw = locale.nl_langinfo(locale.D_FMT)
|
||||
parse_date_day_first = first_index(raw, ('%d', '%a', '%A')) < first_index(raw, ('%m', '%b', '%B'))
|
||||
del raw, first_index
|
||||
except:
|
||||
parse_date_day_first = False
|
||||
|
||||
DEFAULT_DATE = datetime(2000,1,1, tzinfo=utc_tz)
|
||||
EPOCH = datetime(1970, 1, 1, tzinfo=_utc_tz)
|
||||
|
||||
|
||||
def is_date_undefined(qt_or_dt):
|
||||
d = qt_or_dt
|
||||
if d is None:
|
||||
return True
|
||||
if hasattr(d, 'toString'):
|
||||
if hasattr(d, 'date'):
|
||||
d = d.date()
|
||||
try:
|
||||
d = datetime(d.year(), d.month(), d.day(), tzinfo=utc_tz)
|
||||
except ValueError:
|
||||
return True # Undefined QDate
|
||||
return d.year < UNDEFINED_DATE.year or (
|
||||
d.year == UNDEFINED_DATE.year and
|
||||
d.month == UNDEFINED_DATE.month and
|
||||
d.day == UNDEFINED_DATE.day)
|
||||
|
||||
|
||||
_iso_pat = None
|
||||
|
||||
|
||||
def iso_pat():
|
||||
global _iso_pat
|
||||
if _iso_pat is None:
|
||||
_iso_pat = re.compile(r'\d{4}[/.-]\d{1,2}[/.-]\d{1,2}')
|
||||
return _iso_pat
|
||||
|
||||
|
||||
def parse_date(date_string, assume_utc=False, as_utc=True, default=None):
|
||||
'''
|
||||
Parse a date/time string into a timezone aware datetime object. The timezone
|
||||
is always either UTC or the local timezone.
|
||||
|
||||
:param assume_utc: If True and date_string does not specify a timezone,
|
||||
assume UTC, otherwise assume local timezone.
|
||||
|
||||
:param as_utc: If True, return a UTC datetime
|
||||
|
||||
:param default: Missing fields are filled in from default. If None, the
|
||||
current month and year are used.
|
||||
'''
|
||||
from dateutil.parser import parse
|
||||
if not date_string:
|
||||
return UNDEFINED_DATE
|
||||
if isinstance(date_string, bytes):
|
||||
date_string = date_string.decode(preferred_encoding, 'replace')
|
||||
if default is None:
|
||||
func = datetime.utcnow if assume_utc else datetime.now
|
||||
default = func().replace(day=15, hour=0, minute=0, second=0, microsecond=0,
|
||||
tzinfo=_utc_tz if assume_utc else _local_tz)
|
||||
if iso_pat().match(date_string) is not None:
|
||||
dt = parse(date_string, default=default)
|
||||
else:
|
||||
dt = parse(date_string, default=default, dayfirst=parse_date_day_first)
|
||||
if dt.tzinfo is None:
|
||||
dt = dt.replace(tzinfo=_utc_tz if assume_utc else _local_tz)
|
||||
return dt.astimezone(_utc_tz if as_utc else _local_tz)
|
||||
|
||||
|
||||
def fix_only_date(val):
|
||||
n = val + timedelta(days=1)
|
||||
if n.month > val.month:
|
||||
val = val.replace(day=val.day-1)
|
||||
if val.day == 1:
|
||||
val = val.replace(day=2)
|
||||
return val
|
||||
|
||||
|
||||
def parse_only_date(raw, assume_utc=True, as_utc=True):
|
||||
'''
|
||||
Parse a date string that contains no time information in a manner that
|
||||
guarantees that the month and year are always correct in all timezones, and
|
||||
the day is at most one day wrong.
|
||||
'''
|
||||
f = utcnow if assume_utc else now
|
||||
default = f().replace(hour=0, minute=0, second=0, microsecond=0,
|
||||
day=15)
|
||||
return fix_only_date(parse_date(raw, default=default, assume_utc=assume_utc, as_utc=as_utc))
|
||||
|
||||
|
||||
def strptime(val, fmt, assume_utc=False, as_utc=True):
|
||||
dt = datetime.strptime(val, fmt)
|
||||
if dt.tzinfo is None:
|
||||
dt = dt.replace(tzinfo=_utc_tz if assume_utc else _local_tz)
|
||||
return dt.astimezone(_utc_tz if as_utc else _local_tz)
|
||||
|
||||
|
||||
def dt_factory(time_t, assume_utc=False, as_utc=True):
|
||||
dt = datetime(*(time_t[0:6]))
|
||||
if dt.tzinfo is None:
|
||||
dt = dt.replace(tzinfo=_utc_tz if assume_utc else _local_tz)
|
||||
return dt.astimezone(_utc_tz if as_utc else _local_tz)
|
||||
|
||||
|
||||
safeyear = lambda x: min(max(x, MINYEAR), MAXYEAR)
|
||||
|
||||
|
||||
def qt_to_dt(qdate_or_qdatetime, as_utc=True):
|
||||
o = qdate_or_qdatetime
|
||||
if hasattr(o, 'toUTC'):
|
||||
# QDateTime
|
||||
o = o.toUTC()
|
||||
d, t = o.date(), o.time()
|
||||
try:
|
||||
ans = datetime(safeyear(d.year()), d.month(), d.day(), t.hour(), t.minute(), t.second(), t.msec()*1000, utc_tz)
|
||||
except ValueError:
|
||||
ans = datetime(safeyear(d.year()), d.month(), 1, t.hour(), t.minute(), t.second(), t.msec()*1000, utc_tz)
|
||||
if not as_utc:
|
||||
ans = ans.astimezone(local_tz)
|
||||
return ans
|
||||
|
||||
try:
|
||||
dt = datetime(safeyear(o.year()), o.month(), o.day()).replace(tzinfo=_local_tz)
|
||||
except ValueError:
|
||||
dt = datetime(safeyear(o.year()), o.month(), 1).replace(tzinfo=_local_tz)
|
||||
return dt.astimezone(_utc_tz if as_utc else _local_tz)
|
||||
|
||||
|
||||
def fromtimestamp(ctime, as_utc=True):
|
||||
dt = datetime.utcfromtimestamp(ctime).replace(tzinfo=_utc_tz)
|
||||
if not as_utc:
|
||||
dt = dt.astimezone(_local_tz)
|
||||
return dt
|
||||
|
||||
|
||||
def fromordinal(day, as_utc=True):
|
||||
return datetime.fromordinal(day).replace(
|
||||
tzinfo=_utc_tz if as_utc else _local_tz)
|
||||
|
||||
|
||||
def isoformat(date_time, assume_utc=False, as_utc=True, sep='T'):
|
||||
if not hasattr(date_time, 'tzinfo'):
|
||||
return unicode_type(date_time.isoformat())
|
||||
if date_time.tzinfo is None:
|
||||
date_time = date_time.replace(tzinfo=_utc_tz if assume_utc else
|
||||
_local_tz)
|
||||
date_time = date_time.astimezone(_utc_tz if as_utc else _local_tz)
|
||||
# native_string_type(sep) because isoformat barfs with unicode sep on python 2.x
|
||||
return unicode_type(date_time.isoformat(native_string_type(sep)))
|
||||
|
||||
|
||||
def internal_iso_format_string():
|
||||
return 'yyyy-MM-ddThh:mm:ss'
|
||||
|
||||
|
||||
def w3cdtf(date_time, assume_utc=False):
|
||||
if hasattr(date_time, 'tzinfo'):
|
||||
if date_time.tzinfo is None:
|
||||
date_time = date_time.replace(tzinfo=_utc_tz if assume_utc else
|
||||
_local_tz)
|
||||
date_time = date_time.astimezone(_utc_tz if as_utc else _local_tz)
|
||||
return unicode_type(date_time.strftime('%Y-%m-%dT%H:%M:%SZ'))
|
||||
|
||||
|
||||
def as_local_time(date_time, assume_utc=True):
|
||||
if not hasattr(date_time, 'tzinfo'):
|
||||
return date_time
|
||||
if date_time.tzinfo is None:
|
||||
date_time = date_time.replace(tzinfo=_utc_tz if assume_utc else
|
||||
_local_tz)
|
||||
return date_time.astimezone(_local_tz)
|
||||
|
||||
|
||||
def dt_as_local(dt):
|
||||
if dt.tzinfo is local_tz:
|
||||
return dt
|
||||
return dt.astimezone(local_tz)
|
||||
|
||||
|
||||
def as_utc(date_time, assume_utc=True):
|
||||
if not hasattr(date_time, 'tzinfo'):
|
||||
return date_time
|
||||
if date_time.tzinfo is None:
|
||||
date_time = date_time.replace(tzinfo=_utc_tz if assume_utc else
|
||||
_local_tz)
|
||||
return date_time.astimezone(_utc_tz)
|
||||
|
||||
|
||||
def now():
|
||||
return datetime.now().replace(tzinfo=_local_tz)
|
||||
|
||||
|
||||
def utcnow():
|
||||
return datetime.utcnow().replace(tzinfo=_utc_tz)
|
||||
|
||||
|
||||
def utcfromtimestamp(stamp):
|
||||
try:
|
||||
return datetime.utcfromtimestamp(stamp).replace(tzinfo=_utc_tz)
|
||||
except ValueError:
|
||||
# Raised if stamp is out of range for the platforms gmtime function
|
||||
# For example, this happens with negative values on windows
|
||||
try:
|
||||
return EPOCH + timedelta(seconds=stamp)
|
||||
except (ValueError, OverflowError):
|
||||
# datetime can only represent years between 1 and 9999
|
||||
import traceback
|
||||
traceback.print_exc()
|
||||
return utcnow()
|
||||
|
||||
|
||||
def timestampfromdt(dt, assume_utc=True):
|
||||
return (as_utc(dt, assume_utc=assume_utc) - EPOCH).total_seconds()
|
||||
|
||||
# Format date functions {{{
|
||||
|
||||
|
||||
def fd_format_hour(dt, ampm, hr):
|
||||
l = len(hr)
|
||||
h = dt.hour
|
||||
if ampm:
|
||||
h = h%12
|
||||
if l == 1:
|
||||
return '%d'%h
|
||||
return '%02d'%h
|
||||
|
||||
|
||||
def fd_format_minute(dt, ampm, min):
|
||||
l = len(min)
|
||||
if l == 1:
|
||||
return '%d'%dt.minute
|
||||
return '%02d'%dt.minute
|
||||
|
||||
|
||||
def fd_format_second(dt, ampm, sec):
|
||||
l = len(sec)
|
||||
if l == 1:
|
||||
return '%d'%dt.second
|
||||
return '%02d'%dt.second
|
||||
|
||||
|
||||
def fd_format_ampm(dt, ampm, ap):
|
||||
res = strftime('%p', t=dt.timetuple())
|
||||
if ap == 'AP':
|
||||
return res
|
||||
return res.lower()
|
||||
|
||||
|
||||
def fd_format_day(dt, ampm, dy):
|
||||
l = len(dy)
|
||||
if l == 1:
|
||||
return '%d'%dt.day
|
||||
if l == 2:
|
||||
return '%02d'%dt.day
|
||||
return lcdata['abday' if l == 3 else 'day'][(dt.weekday() + 1) % 7]
|
||||
|
||||
|
||||
def fd_format_month(dt, ampm, mo):
|
||||
l = len(mo)
|
||||
if l == 1:
|
||||
return '%d'%dt.month
|
||||
if l == 2:
|
||||
return '%02d'%dt.month
|
||||
return lcdata['abmon' if l == 3 else 'mon'][dt.month - 1]
|
||||
|
||||
|
||||
def fd_format_year(dt, ampm, yr):
|
||||
if len(yr) == 2:
|
||||
return '%02d'%(dt.year % 100)
|
||||
return '%04d'%dt.year
|
||||
|
||||
|
||||
fd_function_index = {
|
||||
'd': fd_format_day,
|
||||
'M': fd_format_month,
|
||||
'y': fd_format_year,
|
||||
'h': fd_format_hour,
|
||||
'm': fd_format_minute,
|
||||
's': fd_format_second,
|
||||
'a': fd_format_ampm,
|
||||
'A': fd_format_ampm,
|
||||
}
|
||||
|
||||
|
||||
def fd_repl_func(dt, ampm, mo):
|
||||
s = mo.group(0)
|
||||
if not s:
|
||||
return ''
|
||||
return fd_function_index[s[0]](dt, ampm, s)
|
||||
|
||||
|
||||
def format_date(dt, format, assume_utc=False, as_utc=False):
|
||||
''' Return a date formatted as a string using a subset of Qt's formatting codes '''
|
||||
if not format:
|
||||
format = 'dd MMM yyyy'
|
||||
|
||||
if not isinstance(dt, datetime):
|
||||
dt = datetime.combine(dt, dtime())
|
||||
|
||||
if hasattr(dt, 'tzinfo'):
|
||||
if dt.tzinfo is None:
|
||||
dt = dt.replace(tzinfo=_utc_tz if assume_utc else
|
||||
_local_tz)
|
||||
dt = dt.astimezone(_utc_tz if as_utc else _local_tz)
|
||||
|
||||
if format == 'iso':
|
||||
return isoformat(dt, assume_utc=assume_utc, as_utc=as_utc)
|
||||
|
||||
if dt == UNDEFINED_DATE:
|
||||
return ''
|
||||
|
||||
repl_func = partial(fd_repl_func, dt, 'ap' in format.lower())
|
||||
return re.sub(
|
||||
'(s{1,2})|(m{1,2})|(h{1,2})|(ap)|(AP)|(d{1,4}|M{1,4}|(?:yyyy|yy))',
|
||||
repl_func, format)
|
||||
|
||||
# }}}
|
||||
|
||||
# Clean date functions {{{
|
||||
|
||||
|
||||
def cd_has_hour(tt, dt):
|
||||
tt['hour'] = dt.hour
|
||||
return ''
|
||||
|
||||
|
||||
def cd_has_minute(tt, dt):
|
||||
tt['min'] = dt.minute
|
||||
return ''
|
||||
|
||||
|
||||
def cd_has_second(tt, dt):
|
||||
tt['sec'] = dt.second
|
||||
return ''
|
||||
|
||||
|
||||
def cd_has_day(tt, dt):
|
||||
tt['day'] = dt.day
|
||||
return ''
|
||||
|
||||
|
||||
def cd_has_month(tt, dt):
|
||||
tt['mon'] = dt.month
|
||||
return ''
|
||||
|
||||
|
||||
def cd_has_year(tt, dt):
|
||||
tt['year'] = dt.year
|
||||
return ''
|
||||
|
||||
|
||||
cd_function_index = {
|
||||
'd': cd_has_day,
|
||||
'M': cd_has_month,
|
||||
'y': cd_has_year,
|
||||
'h': cd_has_hour,
|
||||
'm': cd_has_minute,
|
||||
's': cd_has_second
|
||||
}
|
||||
|
||||
|
||||
def cd_repl_func(tt, dt, match_object):
|
||||
s = match_object.group(0)
|
||||
if not s:
|
||||
return ''
|
||||
return cd_function_index[s[0]](tt, dt)
|
||||
|
||||
|
||||
def clean_date_for_sort(dt, fmt=None):
|
||||
''' Return dt with fields not in shown in format set to a default '''
|
||||
if not fmt:
|
||||
fmt = 'yyMd'
|
||||
|
||||
if not isinstance(dt, datetime):
|
||||
dt = datetime.combine(dt, dtime())
|
||||
|
||||
if hasattr(dt, 'tzinfo'):
|
||||
if dt.tzinfo is None:
|
||||
dt = dt.replace(tzinfo=_local_tz)
|
||||
dt = as_local_time(dt)
|
||||
|
||||
if fmt == 'iso':
|
||||
fmt = 'yyMdhms'
|
||||
|
||||
tt = {'year':UNDEFINED_DATE.year, 'mon':UNDEFINED_DATE.month,
|
||||
'day':UNDEFINED_DATE.day, 'hour':UNDEFINED_DATE.hour,
|
||||
'min':UNDEFINED_DATE.minute, 'sec':UNDEFINED_DATE.second}
|
||||
|
||||
repl_func = partial(cd_repl_func, tt, dt)
|
||||
re.sub('(s{1,2})|(m{1,2})|(h{1,2})|(d{1,4}|M{1,4}|(?:yyyy|yy))', repl_func, fmt)
|
||||
return dt.replace(year=tt['year'], month=tt['mon'], day=tt['day'], hour=tt['hour'],
|
||||
minute=tt['min'], second=tt['sec'], microsecond=0)
|
||||
# }}}
|
||||
|
||||
|
||||
def replace_months(datestr, clang):
|
||||
# Replace months by english equivalent for parse_date
|
||||
frtoen = {
|
||||
'[jJ]anvier': 'jan',
|
||||
'[fF].vrier': 'feb',
|
||||
'[mM]ars': 'mar',
|
||||
'[aA]vril': 'apr',
|
||||
'[mM]ai': 'may',
|
||||
'[jJ]uin': 'jun',
|
||||
'[jJ]uillet': 'jul',
|
||||
'[aA]o.t': 'aug',
|
||||
'[sS]eptembre': 'sep',
|
||||
'[Oo]ctobre': 'oct',
|
||||
'[nN]ovembre': 'nov',
|
||||
'[dD].cembre': 'dec'}
|
||||
detoen = {
|
||||
'[jJ]anuar': 'jan',
|
||||
'[fF]ebruar': 'feb',
|
||||
'[mM].rz': 'mar',
|
||||
'[aA]pril': 'apr',
|
||||
'[mM]ai': 'may',
|
||||
'[jJ]uni': 'jun',
|
||||
'[jJ]uli': 'jul',
|
||||
'[aA]ugust': 'aug',
|
||||
'[sS]eptember': 'sep',
|
||||
'[Oo]ktober': 'oct',
|
||||
'[nN]ovember': 'nov',
|
||||
'[dD]ezember': 'dec'}
|
||||
|
||||
if clang == 'fr':
|
||||
dictoen = frtoen
|
||||
elif clang == 'de':
|
||||
dictoen = detoen
|
||||
else:
|
||||
return datestr
|
||||
|
||||
for k in dictoen:
|
||||
tmp = re.sub(k, dictoen[k], datestr)
|
||||
if tmp != datestr:
|
||||
break
|
||||
return tmp
|
||||
568
ebook_converter/utils/default_tweaks.py
Normal file
568
ebook_converter/utils/default_tweaks.py
Normal file
@@ -0,0 +1,568 @@
|
||||
#!/usr/bin/env python2
|
||||
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
|
||||
# License: GPLv3 Copyright: 2010, Kovid Goyal <kovid at kovidgoyal.net>
|
||||
from __future__ import unicode_literals
|
||||
|
||||
# Contains various tweaks that affect calibre behavior. Only edit this file if
|
||||
# you know what you are doing. If you delete this file, it will be recreated from
|
||||
# defaults.
|
||||
|
||||
#: Auto increment series index
|
||||
# The algorithm used to assign a book added to an existing series a series number.
|
||||
# New series numbers assigned using this tweak are always integer values, except
|
||||
# if a constant non-integer is specified.
|
||||
# Possible values are:
|
||||
# next - First available integer larger than the largest existing number
|
||||
# first_free - First available integer larger than 0
|
||||
# next_free - First available integer larger than the smallest existing number
|
||||
# last_free - First available integer smaller than the largest existing number. Return largest existing + 1 if no free number is found
|
||||
# const - Assign the number 1 always
|
||||
# no_change - Do not change the series index
|
||||
# a number - Assign that number always. The number is not in quotes. Note that 0.0 can be used here.
|
||||
# Examples:
|
||||
# series_index_auto_increment = 'next'
|
||||
# series_index_auto_increment = 'next_free'
|
||||
# series_index_auto_increment = 16.5
|
||||
#
|
||||
# Set the use_series_auto_increment_tweak_when_importing tweak to True to
|
||||
# use the above values when importing/adding books. If this tweak is set to
|
||||
# False (the default) then the series number will be set to 1 if it is not
|
||||
# explicitly set during the import. If set to True, then the
|
||||
# series index will be set according to the series_index_auto_increment setting.
|
||||
# Note that the use_series_auto_increment_tweak_when_importing tweak is used
|
||||
# only when a value is not provided during import. If the importing regular
|
||||
# expression produces a value for series_index, or if you are reading metadata
|
||||
# from books and the import plugin produces a value, than that value will
|
||||
# be used irrespective of the setting of the tweak.
|
||||
series_index_auto_increment = 'next'
|
||||
use_series_auto_increment_tweak_when_importing = False
|
||||
|
||||
#: Add separator after completing an author name
|
||||
# Should the completion separator be append
|
||||
# to the end of the completed text to
|
||||
# automatically begin a new completion operation
|
||||
# for authors.
|
||||
# Can be either True or False
|
||||
authors_completer_append_separator = False
|
||||
|
||||
#: Author sort name algorithm
|
||||
# The algorithm used to copy author to author_sort.
|
||||
# Possible values are:
|
||||
# invert: use "fn ln" -> "ln, fn"
|
||||
# copy : copy author to author_sort without modification
|
||||
# comma : use 'copy' if there is a ',' in the name, otherwise use 'invert'
|
||||
# nocomma : "fn ln" -> "ln fn" (without the comma)
|
||||
# When this tweak is changed, the author_sort values stored with each author
|
||||
# must be recomputed by right-clicking on an author in the left-hand tags pane,
|
||||
# selecting 'manage authors', and pressing 'Recalculate all author sort values'.
|
||||
# The author name suffixes are words that are ignored when they occur at the
|
||||
# end of an author name. The case of the suffix is ignored and trailing
|
||||
# periods are automatically handled. The same is true for prefixes.
|
||||
# The author name copy words are a set of words which if they occur in an
|
||||
# author name cause the automatically generated author sort string to be
|
||||
# identical to the author name. This means that the sort for a string like Acme
|
||||
# Inc. will be Acme Inc. instead of Inc., Acme
|
||||
author_sort_copy_method = 'comma'
|
||||
author_name_suffixes = ('Jr', 'Sr', 'Inc', 'Ph.D', 'Phd',
|
||||
'MD', 'M.D', 'I', 'II', 'III', 'IV',
|
||||
'Junior', 'Senior')
|
||||
author_name_prefixes = ('Mr', 'Mrs', 'Ms', 'Dr', 'Prof')
|
||||
author_name_copywords = ('Corporation', 'Company', 'Co.', 'Agency', 'Council',
|
||||
'Committee', 'Inc.', 'Institute', 'Society', 'Club', 'Team')
|
||||
|
||||
#: Splitting multiple author names
|
||||
# By default, calibre splits a string containing multiple author names on
|
||||
# ampersands and the words "and" and "with". You can customize the splitting
|
||||
# by changing the regular expression below. Strings are split on whatever the
|
||||
# specified regular expression matches, in addition to ampersands.
|
||||
# Default: r'(?i),?\s+(and|with)\s+'
|
||||
authors_split_regex = r'(?i),?\s+(and|with)\s+'
|
||||
|
||||
#: Use author sort in Tag browser
|
||||
# Set which author field to display in the tags pane (the list of authors,
|
||||
# series, publishers etc on the left hand side). The choices are author and
|
||||
# author_sort. This tweak affects only what is displayed under the authors
|
||||
# category in the tags pane and Content server. Please note that if you set this
|
||||
# to author_sort, it is very possible to see duplicate names in the list because
|
||||
# although it is guaranteed that author names are unique, there is no such
|
||||
# guarantee for author_sort values. Showing duplicates won't break anything, but
|
||||
# it could lead to some confusion. When using 'author_sort', the tooltip will
|
||||
# show the author's name.
|
||||
# Examples:
|
||||
# categories_use_field_for_author_name = 'author'
|
||||
# categories_use_field_for_author_name = 'author_sort'
|
||||
categories_use_field_for_author_name = 'author'
|
||||
|
||||
#: Control partitioning of Tag browser
|
||||
# When partitioning the tags browser, the format of the subcategory label is
|
||||
# controlled by a template: categories_collapsed_name_template if sorting by
|
||||
# name, categories_collapsed_rating_template if sorting by average rating, and
|
||||
# categories_collapsed_popularity_template if sorting by popularity. There are
|
||||
# two variables available to the template: first and last. The variable 'first'
|
||||
# is the initial item in the subcategory, and the variable 'last' is the final
|
||||
# item in the subcategory. Both variables are 'objects'; they each have multiple
|
||||
# values that are obtained by using a suffix. For example, first.name for an
|
||||
# author category will be the name of the author. The sub-values available are:
|
||||
# name: the printable name of the item
|
||||
# count: the number of books that references this item
|
||||
# avg_rating: the average rating of all the books referencing this item
|
||||
# sort: the sort value. For authors, this is the author_sort for that author
|
||||
# category: the category (e.g., authors, series) that the item is in.
|
||||
# Note that the "r'" in front of the { is necessary if there are backslashes
|
||||
# (\ characters) in the template. It doesn't hurt anything to leave it there
|
||||
# even if there aren't any backslashes.
|
||||
categories_collapsed_name_template = r'{first.sort:shorten(4,,0)} - {last.sort:shorten(4,,0)}'
|
||||
categories_collapsed_rating_template = r'{first.avg_rating:4.2f:ifempty(0)} - {last.avg_rating:4.2f:ifempty(0)}'
|
||||
categories_collapsed_popularity_template = r'{first.count:d} - {last.count:d}'
|
||||
|
||||
#: Control order of categories in the Tag browser
|
||||
# Change the following dict to change the order that categories are displayed in
|
||||
# the Tag browser. Items are named using their lookup name, and will be sorted
|
||||
# using the number supplied. The lookup name '*' stands for all names that
|
||||
# otherwise do not appear. Two names with the same value will be sorted
|
||||
# using the default order; the one used when the dict is empty.
|
||||
# Example: tag_browser_category_order = {'series':1, 'tags':2, '*':3}
|
||||
# resulting in the order series, tags, then everything else in default order.
|
||||
tag_browser_category_order = {'*':1}
|
||||
|
||||
|
||||
#: Specify columns to sort the booklist by on startup
|
||||
# Provide a set of columns to be sorted on when calibre starts.
|
||||
# The argument is None if saved sort history is to be used
|
||||
# otherwise it is a list of column,order pairs. Column is the
|
||||
# lookup/search name, found using the tooltip for the column
|
||||
# Order is 0 for ascending, 1 for descending.
|
||||
# For example, set it to [('authors',0),('title',0)] to sort by
|
||||
# title within authors.
|
||||
sort_columns_at_startup = None
|
||||
|
||||
#: Control how dates are displayed
|
||||
# Format to be used for publication date and the timestamp (date).
|
||||
# A string controlling how the publication date is displayed in the GUI
|
||||
# d the day as number without a leading zero (1 to 31)
|
||||
# dd the day as number with a leading zero (01 to 31)
|
||||
# ddd the abbreviated localized day name (e.g. 'Mon' to 'Sun').
|
||||
# dddd the long localized day name (e.g. 'Monday' to 'Sunday').
|
||||
# M the month as number without a leading zero (1-12)
|
||||
# MM the month as number with a leading zero (01-12)
|
||||
# MMM the abbreviated localized month name (e.g. 'Jan' to 'Dec').
|
||||
# MMMM the long localized month name (e.g. 'January' to 'December').
|
||||
# yy the year as two digit number (00-99)
|
||||
# yyyy the year as four digit number
|
||||
# h the hours without a leading 0 (0 to 11 or 0 to 23, depending on am/pm) '
|
||||
# hh the hours with a leading 0 (00 to 11 or 00 to 23, depending on am/pm) '
|
||||
# m the minutes without a leading 0 (0 to 59) '
|
||||
# mm the minutes with a leading 0 (00 to 59) '
|
||||
# s the seconds without a leading 0 (0 to 59) '
|
||||
# ss the seconds with a leading 0 (00 to 59) '
|
||||
# ap use a 12-hour clock instead of a 24-hour clock, with "ap" replaced by the localized string for am or pm
|
||||
# AP use a 12-hour clock instead of a 24-hour clock, with "AP" replaced by the localized string for AM or PM
|
||||
# iso the date with time and timezone. Must be the only format present
|
||||
# For example, given the date of 9 Jan 2010, the following formats show
|
||||
# MMM yyyy ==> Jan 2010 yyyy ==> 2010 dd MMM yyyy ==> 09 Jan 2010
|
||||
# MM/yyyy ==> 01/2010 d/M/yy ==> 9/1/10 yy ==> 10
|
||||
#
|
||||
# publication default if not set: MMM yyyy
|
||||
# timestamp default if not set: dd MMM yyyy
|
||||
# last_modified_display_format if not set: dd MMM yyyy
|
||||
gui_pubdate_display_format = 'MMM yyyy'
|
||||
gui_timestamp_display_format = 'dd MMM yyyy'
|
||||
gui_last_modified_display_format = 'dd MMM yyyy'
|
||||
|
||||
#: Control sorting of titles and series in the library display
|
||||
# Control title and series sorting in the library view. If set to
|
||||
# 'library_order', the title sort field will be used instead of the title.
|
||||
# Unless you have manually edited the title sort field, leading articles such as
|
||||
# The and A will be ignored. If set to 'strictly_alphabetic', the titles will be
|
||||
# sorted as-is (sort by title instead of title sort). For example, with
|
||||
# library_order, The Client will sort under 'C'. With strictly_alphabetic, the
|
||||
# book will sort under 'T'.
|
||||
# This flag affects calibre's library display. It has no effect on devices. In
|
||||
# addition, titles for books added before changing the flag will retain their
|
||||
# order until the title is edited. Editing a title and hitting return
|
||||
# without changing anything is sufficient to change the sort. Or you can use
|
||||
# the 'Update title sort' action in the Bulk metadata edit dialog to update
|
||||
# it for many books at once.
|
||||
title_series_sorting = 'library_order'
|
||||
|
||||
#: Control formatting of title and series when used in templates
|
||||
# Control how title and series names are formatted when saving to disk/sending
|
||||
# to device. The behavior depends on the field being processed. If processing
|
||||
# title, then if this tweak is set to 'library_order', the title will be
|
||||
# replaced with title_sort. If it is set to 'strictly_alphabetic', then the
|
||||
# title will not be changed. If processing series, then if set to
|
||||
# 'library_order', articles such as 'The' and 'An' will be moved to the end. If
|
||||
# set to 'strictly_alphabetic', the series will be sent without change.
|
||||
# For example, if the tweak is set to library_order, "The Lord of the Rings"
|
||||
# will become "Lord of the Rings, The". If the tweak is set to
|
||||
# strictly_alphabetic, it would remain "The Lord of the Rings". Note that the
|
||||
# formatter function raw_field will return the base value for title and
|
||||
# series regardless of the setting of this tweak.
|
||||
save_template_title_series_sorting = 'library_order'
|
||||
|
||||
#: Set the list of words considered to be "articles" for sort strings
|
||||
# Set the list of words that are to be considered 'articles' when computing the
|
||||
# title sort strings. The articles differ by language. By default, calibre uses
|
||||
# a combination of articles from English and whatever language the calibre user
|
||||
# interface is set to. In addition, in some contexts where the book language is
|
||||
# available, the language of the book is used. You can change the list of
|
||||
# articles for a given language or add a new language by editing
|
||||
# per_language_title_sort_articles. To tell calibre to use a language other
|
||||
# than the user interface language, set, default_language_for_title_sort. For
|
||||
# example, to use German, set it to 'deu'. A value of None means the user
|
||||
# interface language is used. The setting title_sort_articles is ignored
|
||||
# (present only for legacy reasons).
|
||||
per_language_title_sort_articles = {
|
||||
# English
|
||||
'eng' : (r'A\s+', r'The\s+', r'An\s+'),
|
||||
# Esperanto
|
||||
'epo': (r'La\s+', r"L'", 'L´'),
|
||||
# Spanish
|
||||
'spa' : (r'El\s+', r'La\s+', r'Lo\s+', r'Los\s+', r'Las\s+', r'Un\s+',
|
||||
r'Una\s+', r'Unos\s+', r'Unas\s+'),
|
||||
# French
|
||||
'fra' : (r'Le\s+', r'La\s+', r"L'", u'L´', u'L’', r'Les\s+', r'Un\s+', r'Une\s+',
|
||||
r'Des\s+', r'De\s+La\s+', r'De\s+', r"D'", u'D´', u'L’'),
|
||||
# Italian
|
||||
'ita': ('Lo\\s+', 'Il\\s+', "L'", 'L´', 'La\\s+', 'Gli\\s+',
|
||||
'I\\s+', 'Le\\s+', 'Uno\\s+', 'Un\\s+', 'Una\\s+', "Un'",
|
||||
'Un´', 'Dei\\s+', 'Degli\\s+', 'Delle\\s+', 'Del\\s+',
|
||||
'Della\\s+', 'Dello\\s+', "Dell'", 'Dell´'),
|
||||
# Portuguese
|
||||
'por' : (r'A\s+', r'O\s+', r'Os\s+', r'As\s+', r'Um\s+', r'Uns\s+',
|
||||
r'Uma\s+', r'Umas\s+', ),
|
||||
# Romanian
|
||||
'ron' : (r'Un\s+', r'O\s+', r'Nişte\s+', ),
|
||||
# German
|
||||
'deu' : (r'Der\s+', r'Die\s+', r'Das\s+', r'Den\s+', r'Ein\s+',
|
||||
r'Eine\s+', r'Einen\s+', r'Dem\s+', r'Des\s+', r'Einem\s+',
|
||||
r'Eines\s+'),
|
||||
# Dutch
|
||||
'nld' : (r'De\s+', r'Het\s+', r'Een\s+', r"'n\s+", r"'s\s+", r'Ene\s+',
|
||||
r'Ener\s+', r'Enes\s+', r'Den\s+', r'Der\s+', r'Des\s+',
|
||||
r"'t\s+"),
|
||||
# Swedish
|
||||
'swe' : (r'En\s+', r'Ett\s+', r'Det\s+', r'Den\s+', r'De\s+', ),
|
||||
# Turkish
|
||||
'tur' : (r'Bir\s+', ),
|
||||
# Afrikaans
|
||||
'afr' : (r"'n\s+", r'Die\s+', ),
|
||||
# Greek
|
||||
'ell' : (r'O\s+', r'I\s+', r'To\s+', r'Ta\s+', r'Tus\s+', r'Tis\s+',
|
||||
r"'Enas\s+", r"'Mia\s+", r"'Ena\s+", r"'Enan\s+", ),
|
||||
# Hungarian
|
||||
'hun' : (r'A\s+', r'Az\s+', r'Egy\s+',),
|
||||
}
|
||||
default_language_for_title_sort = None
|
||||
title_sort_articles=r'^(A|The|An)\s+'
|
||||
|
||||
#: Specify a folder calibre should connect to at startup
|
||||
# Specify a folder that calibre should connect to at startup using
|
||||
# connect_to_folder. This must be a full path to the folder. If the folder does
|
||||
# not exist when calibre starts, it is ignored.
|
||||
# Example for Windows:
|
||||
# auto_connect_to_folder = 'C:/Users/someone/Desktop/testlib'
|
||||
# Example for other operating systems:
|
||||
# auto_connect_to_folder = '/home/dropbox/My Dropbox/someone/library'
|
||||
auto_connect_to_folder = ''
|
||||
|
||||
#: Specify renaming rules for SONY collections
|
||||
# Specify renaming rules for sony collections. This tweak is only applicable if
|
||||
# metadata management is set to automatic. Collections on Sonys are named
|
||||
# depending upon whether the field is standard or custom. A collection derived
|
||||
# from a standard field is named for the value in that field. For example, if
|
||||
# the standard 'series' column contains the value 'Darkover', then the
|
||||
# collection name is 'Darkover'. A collection derived from a custom field will
|
||||
# have the name of the field added to the value. For example, if a custom series
|
||||
# column named 'My Series' contains the name 'Darkover', then the collection
|
||||
# will by default be named 'Darkover (My Series)'. For purposes of this
|
||||
# documentation, 'Darkover' is called the value and 'My Series' is called the
|
||||
# category. If two books have fields that generate the same collection name,
|
||||
# then both books will be in that collection.
|
||||
# This set of tweaks lets you specify for a standard or custom field how
|
||||
# the collections are to be named. You can use it to add a description to a
|
||||
# standard field, for example 'Foo (Tag)' instead of the 'Foo'. You can also use
|
||||
# it to force multiple fields to end up in the same collection. For example, you
|
||||
# could force the values in 'series', '#my_series_1', and '#my_series_2' to
|
||||
# appear in collections named 'some_value (Series)', thereby merging all of the
|
||||
# fields into one set of collections.
|
||||
# There are two related tweaks. The first determines the category name to use
|
||||
# for a metadata field. The second is a template, used to determines how the
|
||||
# value and category are combined to create the collection name.
|
||||
# The syntax of the first tweak, sony_collection_renaming_rules, is:
|
||||
# {'field_lookup_name':'category_name_to_use', 'lookup_name':'name', ...}
|
||||
# The second tweak, sony_collection_name_template, is a template. It uses the
|
||||
# same template language as plugboards and save templates. This tweak controls
|
||||
# how the value and category are combined together to make the collection name.
|
||||
# The only two fields available are {category} and {value}. The {value} field is
|
||||
# never empty. The {category} field can be empty. The default is to put the
|
||||
# value first, then the category enclosed in parentheses, it isn't empty:
|
||||
# '{value} {category:|(|)}'
|
||||
# Examples: The first three examples assume that the second tweak
|
||||
# has not been changed.
|
||||
# 1: I want three series columns to be merged into one set of collections. The
|
||||
# column lookup names are 'series', '#series_1' and '#series_2'. I want nothing
|
||||
# in the parenthesis. The value to use in the tweak value would be:
|
||||
# sony_collection_renaming_rules={'series':'', '#series_1':'', '#series_2':''}
|
||||
# 2: I want the word '(Series)' to appear on collections made from series, and
|
||||
# the word '(Tag)' to appear on collections made from tags. Use:
|
||||
# sony_collection_renaming_rules={'series':'Series', 'tags':'Tag'}
|
||||
# 3: I want 'series' and '#myseries' to be merged, and for the collection name
|
||||
# to have '(Series)' appended. The renaming rule is:
|
||||
# sony_collection_renaming_rules={'series':'Series', '#myseries':'Series'}
|
||||
# 4: Same as example 2, but instead of having the category name in parentheses
|
||||
# and appended to the value, I want it prepended and separated by a colon, such
|
||||
# as in Series: Darkover. I must change the template used to format the category name
|
||||
# The resulting two tweaks are:
|
||||
# sony_collection_renaming_rules={'series':'Series', 'tags':'Tag'}
|
||||
# sony_collection_name_template='{category:||: }{value}'
|
||||
sony_collection_renaming_rules={}
|
||||
sony_collection_name_template='{value}{category:| (|)}'
|
||||
|
||||
#: Specify how SONY collections are sorted
|
||||
# Specify how sony collections are sorted. This tweak is only applicable if
|
||||
# metadata management is set to automatic. You can indicate which metadata is to
|
||||
# be used to sort on a collection-by-collection basis. The format of the tweak
|
||||
# is a list of metadata fields from which collections are made, followed by the
|
||||
# name of the metadata field containing the sort value.
|
||||
# Example: The following indicates that collections built from pubdate and tags
|
||||
# are to be sorted by the value in the custom column '#mydate', that collections
|
||||
# built from 'series' are to be sorted by 'series_index', and that all other
|
||||
# collections are to be sorted by title. If a collection metadata field is not
|
||||
# named, then if it is a series- based collection it is sorted by series order,
|
||||
# otherwise it is sorted by title order.
|
||||
# [(['pubdate', 'tags'],'#mydate'), (['series'],'series_index'), (['*'], 'title')]
|
||||
# Note that the bracketing and parentheses are required. The syntax is
|
||||
# [ ( [list of fields], sort field ) , ( [ list of fields ] , sort field ) ]
|
||||
# Default: empty (no rules), so no collection attributes are named.
|
||||
sony_collection_sorting_rules = []
|
||||
|
||||
#: Control how tags are applied when copying books to another library
|
||||
# Set this to True to ensure that tags in 'Tags to add when adding
|
||||
# a book' are added when copying books to another library
|
||||
add_new_book_tags_when_importing_books = False
|
||||
|
||||
#: Set custom metadata fields that the Content server will or will not display.
|
||||
# Controls what fields are displayed when clicking the "Search" button in the
|
||||
# browser to search your calibre library.
|
||||
# content_server_will_display is a list of custom fields to be displayed.
|
||||
# content_server_wont_display is a list of custom fields not to be displayed.
|
||||
# wont_display has priority over will_display.
|
||||
# The special value '*' means all custom fields. The value [] means no entries.
|
||||
# Defaults:
|
||||
# content_server_will_display = ['*']
|
||||
# content_server_wont_display = []
|
||||
#
|
||||
# Examples:
|
||||
#
|
||||
# To display only the custom fields #mytags and #genre:
|
||||
# content_server_will_display = ['#mytags', '#genre']
|
||||
# content_server_wont_display = []
|
||||
#
|
||||
# To display all fields except #mycomments:
|
||||
# content_server_will_display = ['*']
|
||||
# content_server_wont_display['#mycomments']
|
||||
content_server_will_display = ['*']
|
||||
content_server_wont_display = []
|
||||
|
||||
#: Set the maximum number of sort 'levels'
|
||||
# Set the maximum number of sort 'levels' that calibre will use to resort the
|
||||
# library after certain operations such as searches or device insertion. Each
|
||||
# sort level adds a performance penalty. If the database is large (thousands of
|
||||
# books) the penalty might be noticeable. If you are not concerned about multi-
|
||||
# level sorts, and if you are seeing a slowdown, reduce the value of this tweak.
|
||||
maximum_resort_levels = 5
|
||||
|
||||
#: Choose whether dates are sorted using visible fields
|
||||
# Date values contain both a date and a time. When sorted, all the fields are
|
||||
# used, regardless of what is displayed. Set this tweak to True to use only
|
||||
# the fields that are being displayed.
|
||||
sort_dates_using_visible_fields = False
|
||||
|
||||
#: Fuzz value for trimming covers
|
||||
# The value used for the fuzz distance when trimming a cover.
|
||||
# Colors within this distance are considered equal.
|
||||
# The distance is in absolute intensity units.
|
||||
cover_trim_fuzz_value = 10
|
||||
|
||||
#: Control behavior of the book list
|
||||
# You can control the behavior of double clicks and pressing enter on the books list.
|
||||
# Choices: open_viewer, do_nothing,
|
||||
# edit_cell, edit_metadata. Selecting anything other than open_viewer has the
|
||||
# side effect of disabling editing a field using a single click.
|
||||
# Default: open_viewer.
|
||||
# Example: doubleclick_on_library_view = 'do_nothing'
|
||||
# You can also control whether the book list scrolls horizontal per column or
|
||||
# per pixel. Default is per column.
|
||||
doubleclick_on_library_view = 'open_viewer'
|
||||
enter_key_behavior = 'do_nothing'
|
||||
horizontal_scrolling_per_column = True
|
||||
|
||||
#: Language to use when sorting
|
||||
# Setting this tweak will force sorting to use the
|
||||
# collating order for the specified language. This might be useful if you run
|
||||
# calibre in English but want sorting to work in the language where you live.
|
||||
# Set the tweak to the desired ISO 639-1 language code, in lower case.
|
||||
# You can find the list of supported locales at
|
||||
# https://en.wikipedia.org/wiki/List_of_ISO_639-1_codes
|
||||
# Default: locale_for_sorting = '' -- use the language calibre displays in
|
||||
# Example: locale_for_sorting = 'fr' -- sort using French rules.
|
||||
# Example: locale_for_sorting = 'nb' -- sort using Norwegian rules.
|
||||
locale_for_sorting = ''
|
||||
|
||||
#: Number of columns for custom metadata in the edit metadata dialog
|
||||
# Set whether to use one or two columns for custom metadata when editing
|
||||
# metadata one book at a time. If True, then the fields are laid out using two
|
||||
# columns. If False, one column is used.
|
||||
metadata_single_use_2_cols_for_custom_fields = True
|
||||
|
||||
#: Order of custom column(s) in edit metadata
|
||||
# Controls the order that custom columns are listed in edit metadata single
|
||||
# and bulk. The columns listed in the tweak are displayed first and in the
|
||||
# order provided. Any columns not listed are displayed after the listed ones,
|
||||
# in alphabetical order. Do note that this tweak does not change the size of
|
||||
# the edit widgets. Putting comments widgets in this list may result in some
|
||||
# odd widget spacing when using two-column mode.
|
||||
# Enter a comma-separated list of custom field lookup names, as in
|
||||
# metadata_edit_custom_column_order = ['#genre', '#mytags', '#etc']
|
||||
metadata_edit_custom_column_order = []
|
||||
|
||||
#: The number of seconds to wait before sending emails
|
||||
# The number of seconds to wait before sending emails when using a
|
||||
# public email server like gmx/hotmail/gmail. Default is: 5 minutes
|
||||
# Setting it to lower may cause the server's SPAM controls to kick in,
|
||||
# making email sending fail. Changes will take effect only after a restart of
|
||||
# calibre. You can also change the list of hosts that calibre considers
|
||||
# to be public relays here. Any relay host ending with one of the suffixes
|
||||
# in the list below will be considered a public email server.
|
||||
public_smtp_relay_delay = 301
|
||||
public_smtp_relay_host_suffixes = ['gmail.com', 'live.com', 'gmx.com']
|
||||
|
||||
#: The maximum width and height for covers saved in the calibre library
|
||||
# All covers in the calibre library will be resized, preserving aspect ratio,
|
||||
# to fit within this size. This is to prevent slowdowns caused by extremely
|
||||
# large covers
|
||||
maximum_cover_size = (1650, 2200)
|
||||
|
||||
#: Where to send downloaded news
|
||||
# When automatically sending downloaded news to a connected device, calibre
|
||||
# will by default send it to the main memory. By changing this tweak, you can
|
||||
# control where it is sent. Valid values are "main", "carda", "cardb". Note
|
||||
# that if there isn't enough free space available on the location you choose,
|
||||
# the files will be sent to the location with the most free space.
|
||||
send_news_to_device_location = "main"
|
||||
|
||||
#: Unified toolbar on macOS
|
||||
# If you enable this option and restart calibre, the toolbar will be 'unified'
|
||||
# with the titlebar as is normal for macOS applications. However, doing this has
|
||||
# various bugs, for instance the minimum width of the toolbar becomes twice
|
||||
# what it should be and it causes other random bugs on some systems, so turn it
|
||||
# on at your own risk!
|
||||
unified_title_toolbar_on_osx = False
|
||||
|
||||
#: Save original file when converting/polishing from same format to same format
|
||||
# When calibre does a conversion from the same format to the same format, for
|
||||
# example, from EPUB to EPUB, the original file is saved, so that in case the
|
||||
# conversion is poor, you can tweak the settings and run it again. By setting
|
||||
# this to False you can prevent calibre from saving the original file.
|
||||
# Similarly, by setting save_original_format_when_polishing to False you can
|
||||
# prevent calibre from saving the original file when polishing.
|
||||
save_original_format = True
|
||||
save_original_format_when_polishing = True
|
||||
|
||||
#: Number of recently viewed books to show
|
||||
# Right-clicking the "View" button shows a list of recently viewed books. Control
|
||||
# how many should be shown, here.
|
||||
gui_view_history_size = 15
|
||||
|
||||
#: Change the font size of book details in the interface
|
||||
# Change the font size at which book details are rendered in the side panel and
|
||||
# comments are rendered in the metadata edit dialog. Set it to a positive or
|
||||
# negative number to increase or decrease the font size.
|
||||
change_book_details_font_size_by = 0
|
||||
|
||||
#: What format to default to when using the "Unpack book" feature
|
||||
# The "Unpack book" feature of calibre allows direct editing of a book format.
|
||||
# If multiple formats are available, calibre will offer you a choice
|
||||
# of formats, defaulting to your preferred output format if it is available.
|
||||
# Set this tweak to a specific value of 'EPUB' or 'AZW3' to always default
|
||||
# to that format rather than your output format preference.
|
||||
# Set to a value of 'remember' to use whichever format you chose last time you
|
||||
# used the "Unpack book" feature.
|
||||
# Examples:
|
||||
# default_tweak_format = None (Use output format)
|
||||
# default_tweak_format = 'EPUB'
|
||||
# default_tweak_format = 'remember'
|
||||
default_tweak_format = None
|
||||
|
||||
#: Do not preselect a completion when editing authors/tags/series/etc.
|
||||
# This means that you can make changes and press Enter and your changes will
|
||||
# not be overwritten by a matching completion. However, if you wish to use the
|
||||
# completions you will now have to press Tab to select one before pressing
|
||||
# Enter. Which technique you prefer will depend on the state of metadata in
|
||||
# your library and your personal editing style.
|
||||
preselect_first_completion = False
|
||||
|
||||
#: Completion mode when editing authors/tags/series/etc.
|
||||
# By default, when completing items, calibre will show you all the candidates
|
||||
# that start with the text you have already typed. You can instead have it show
|
||||
# all candidates that contain the text you have already typed. To do this, set
|
||||
# completion_mode to 'contains'. For example, if you type asi it will match both
|
||||
# Asimov and Quasimodo, whereas the default behavior would match only Asimov.
|
||||
completion_mode = 'prefix'
|
||||
|
||||
#: Recognize numbers inside text when sorting
|
||||
# This means that when sorting on text fields like title the text "Book 2"
|
||||
# will sort before the text "Book 100". If you want this behavior, set
|
||||
# numeric_collation = True note that doing so will cause problems with text
|
||||
# that starts with numbers and is a little slower.
|
||||
numeric_collation = False
|
||||
|
||||
#: Sort the list of libraries alphabetically
|
||||
# The list of libraries in the Copy to library and Quick switch menus are
|
||||
# normally sorted by most used. However, if there are more than a certain
|
||||
# number of such libraries, the sorting becomes alphabetic. You can set that
|
||||
# number here. The default is ten libraries.
|
||||
many_libraries = 10
|
||||
|
||||
#: Choose available output formats for conversion
|
||||
# Restrict the list of available output formats in the conversion dialogs.
|
||||
# For example, if you only want to convert to EPUB and AZW3, change this to
|
||||
# restrict_output_formats = ['EPUB', 'AZW3']. The default value of None causes
|
||||
# all available output formats to be present.
|
||||
restrict_output_formats = None
|
||||
|
||||
#: Set the thumbnail image quality used by the Content server
|
||||
# The quality of a thumbnail is largely controlled by the compression quality
|
||||
# used when creating it. Set this to a larger number to improve the quality.
|
||||
# Note that the thumbnails get much larger with larger compression quality
|
||||
# numbers.
|
||||
# The value can be between 50 and 99
|
||||
content_server_thumbnail_compression_quality = 75
|
||||
|
||||
#: Image file types to treat as e-books when dropping onto the "Book details" panel
|
||||
# Normally, if you drop any image file in a format known to calibre onto the
|
||||
# "Book details" panel, it will be used to set the cover. If you want to store
|
||||
# some image types as e-books instead, you can set this tweak.
|
||||
# Examples:
|
||||
# cover_drop_exclude = {'tiff', 'webp'}
|
||||
cover_drop_exclude = ()
|
||||
|
||||
#: Show the Saved searches box in the Search bar
|
||||
# In newer versions of calibre, only a single button that allows you to add a
|
||||
# new Saved search is shown in the Search bar. If you would like to have the
|
||||
# old Saved searches box with its two buttons back, set this tweak to True.
|
||||
show_saved_search_box = False
|
||||
|
||||
#: Exclude fields when copy/pasting metadata
|
||||
# You can ask calibre to not paste some metadata fields when using the
|
||||
# Edit metadata->Copy metadata/Paste metadata actions. For example,
|
||||
# exclude_fields_on_paste = ['cover', 'timestamp', '#mycolumn']
|
||||
# to prevent pasting of the cover, Date and custom column, mycolumn.
|
||||
exclude_fields_on_paste = []
|
||||
|
||||
#: Skip internet connected check
|
||||
# Skip checking whether the internet is available before downloading news.
|
||||
# Useful if for some reason your operating systems network checking
|
||||
# facilities are not reliable (for example NetworkManager on Linux).
|
||||
skip_network_check = False
|
||||
642
ebook_converter/utils/filenames.py
Normal file
642
ebook_converter/utils/filenames.py
Normal file
@@ -0,0 +1,642 @@
|
||||
from __future__ import absolute_import, division, print_function, unicode_literals
|
||||
'''
|
||||
Make strings safe for use as ASCII filenames, while trying to preserve as much
|
||||
meaning as possible.
|
||||
'''
|
||||
|
||||
import errno
|
||||
import os
|
||||
import shutil
|
||||
import time
|
||||
from math import ceil
|
||||
|
||||
from calibre import force_unicode, isbytestring, prints, sanitize_file_name
|
||||
from calibre.constants import (
|
||||
filesystem_encoding, iswindows, plugins, preferred_encoding, isosx, ispy3
|
||||
)
|
||||
from calibre.utils.localization import get_udc
|
||||
from polyglot.builtins import iteritems, itervalues, unicode_type, range
|
||||
|
||||
|
||||
def ascii_text(orig):
|
||||
udc = get_udc()
|
||||
try:
|
||||
ascii = udc.decode(orig)
|
||||
except Exception:
|
||||
if isinstance(orig, unicode_type):
|
||||
orig = orig.encode('ascii', 'replace')
|
||||
ascii = orig.decode(preferred_encoding, 'replace')
|
||||
if isinstance(ascii, bytes):
|
||||
ascii = ascii.decode('ascii', 'replace')
|
||||
return ascii
|
||||
|
||||
|
||||
def ascii_filename(orig, substitute='_'):
|
||||
if isinstance(substitute, bytes):
|
||||
substitute = substitute.decode(filesystem_encoding)
|
||||
orig = ascii_text(orig).replace('?', '_')
|
||||
ans = ''.join(x if ord(x) >= 32 else substitute for x in orig)
|
||||
return sanitize_file_name(ans, substitute=substitute)
|
||||
|
||||
|
||||
def shorten_component(s, by_what):
|
||||
l = len(s)
|
||||
if l < by_what:
|
||||
return s
|
||||
l = (l - by_what)//2
|
||||
if l <= 0:
|
||||
return s
|
||||
return s[:l] + s[-l:]
|
||||
|
||||
|
||||
def limit_component(x, limit=254):
|
||||
# windows and macs use ytf-16 codepoints for length, linux uses arbitrary
|
||||
# binary data, but we will assume utf-8
|
||||
filename_encoding_for_length = 'utf-16' if iswindows or isosx else 'utf-8'
|
||||
|
||||
def encoded_length():
|
||||
q = x if isinstance(x, bytes) else x.encode(filename_encoding_for_length)
|
||||
return len(q)
|
||||
|
||||
while encoded_length() > limit:
|
||||
delta = encoded_length() - limit
|
||||
x = shorten_component(x, max(2, delta // 2))
|
||||
|
||||
return x
|
||||
|
||||
|
||||
def shorten_components_to(length, components, more_to_take=0, last_has_extension=True):
|
||||
components = [limit_component(cx) for cx in components]
|
||||
filepath = os.sep.join(components)
|
||||
extra = len(filepath) - (length - more_to_take)
|
||||
if extra < 1:
|
||||
return components
|
||||
deltas = []
|
||||
for x in components:
|
||||
pct = len(x)/float(len(filepath))
|
||||
deltas.append(int(ceil(pct*extra)))
|
||||
ans = []
|
||||
|
||||
for i, x in enumerate(components):
|
||||
delta = deltas[i]
|
||||
if delta > len(x):
|
||||
r = x[0] if x is components[-1] else ''
|
||||
else:
|
||||
if last_has_extension and x is components[-1]:
|
||||
b, e = os.path.splitext(x)
|
||||
if e == '.':
|
||||
e = ''
|
||||
r = shorten_component(b, delta)+e
|
||||
if r.startswith('.'):
|
||||
r = x[0]+r
|
||||
else:
|
||||
r = shorten_component(x, delta)
|
||||
r = r.strip()
|
||||
if not r:
|
||||
r = x.strip()[0] if x.strip() else 'x'
|
||||
ans.append(r)
|
||||
if len(os.sep.join(ans)) > length:
|
||||
return shorten_components_to(length, components, more_to_take+2)
|
||||
return ans
|
||||
|
||||
|
||||
def find_executable_in_path(name, path=None):
|
||||
if path is None:
|
||||
path = os.environ.get('PATH', '')
|
||||
exts = '.exe .cmd .bat'.split() if iswindows and not name.endswith('.exe') else ('',)
|
||||
path = path.split(os.pathsep)
|
||||
for x in path:
|
||||
for ext in exts:
|
||||
q = os.path.abspath(os.path.join(x, name)) + ext
|
||||
if os.access(q, os.X_OK):
|
||||
return q
|
||||
|
||||
|
||||
def is_case_sensitive(path):
|
||||
'''
|
||||
Return True if the filesystem is case sensitive.
|
||||
|
||||
path must be the path to an existing directory. You must have permission
|
||||
to create and delete files in this directory. The results of this test
|
||||
apply to the filesystem containing the directory in path.
|
||||
'''
|
||||
is_case_sensitive = False
|
||||
if not iswindows:
|
||||
name1, name2 = ('calibre_test_case_sensitivity.txt',
|
||||
'calibre_TesT_CaSe_sensitiVitY.Txt')
|
||||
f1, f2 = os.path.join(path, name1), os.path.join(path, name2)
|
||||
if os.path.exists(f1):
|
||||
os.remove(f1)
|
||||
open(f1, 'w').close()
|
||||
is_case_sensitive = not os.path.exists(f2)
|
||||
os.remove(f1)
|
||||
return is_case_sensitive
|
||||
|
||||
|
||||
def case_preserving_open_file(path, mode='wb', mkdir_mode=0o777):
|
||||
'''
|
||||
Open the file pointed to by path with the specified mode. If any
|
||||
directories in path do not exist, they are created. Returns the
|
||||
opened file object and the path to the opened file object. This path is
|
||||
guaranteed to have the same case as the on disk path. For case insensitive
|
||||
filesystems, the returned path may be different from the passed in path.
|
||||
The returned path is always unicode and always an absolute path.
|
||||
|
||||
If mode is None, then this function assumes that path points to a directory
|
||||
and return the path to the directory as the file object.
|
||||
|
||||
mkdir_mode specifies the mode with which any missing directories in path
|
||||
are created.
|
||||
'''
|
||||
if isbytestring(path):
|
||||
path = path.decode(filesystem_encoding)
|
||||
|
||||
path = os.path.abspath(path)
|
||||
|
||||
sep = force_unicode(os.sep, 'ascii')
|
||||
|
||||
if path.endswith(sep):
|
||||
path = path[:-1]
|
||||
if not path:
|
||||
raise ValueError('Path must not point to root')
|
||||
|
||||
components = path.split(sep)
|
||||
if not components:
|
||||
raise ValueError('Invalid path: %r'%path)
|
||||
|
||||
cpath = sep
|
||||
if iswindows:
|
||||
# Always upper case the drive letter and add a trailing slash so that
|
||||
# the first os.listdir works correctly
|
||||
cpath = components[0].upper() + sep
|
||||
|
||||
bdir = path if mode is None else os.path.dirname(path)
|
||||
if not os.path.exists(bdir):
|
||||
os.makedirs(bdir, mkdir_mode)
|
||||
|
||||
# Walk all the directories in path, putting the on disk case version of
|
||||
# the directory into cpath
|
||||
dirs = components[1:] if mode is None else components[1:-1]
|
||||
for comp in dirs:
|
||||
cdir = os.path.join(cpath, comp)
|
||||
cl = comp.lower()
|
||||
try:
|
||||
candidates = [c for c in os.listdir(cpath) if c.lower() == cl]
|
||||
except:
|
||||
# Dont have permission to do the listdir, assume the case is
|
||||
# correct as we have no way to check it.
|
||||
pass
|
||||
else:
|
||||
if len(candidates) == 1:
|
||||
cdir = os.path.join(cpath, candidates[0])
|
||||
# else: We are on a case sensitive file system so cdir must already
|
||||
# be correct
|
||||
cpath = cdir
|
||||
|
||||
if mode is None:
|
||||
ans = fpath = cpath
|
||||
else:
|
||||
fname = components[-1]
|
||||
ans = lopen(os.path.join(cpath, fname), mode)
|
||||
# Ensure file and all its metadata is written to disk so that subsequent
|
||||
# listdir() has file name in it. I don't know if this is actually
|
||||
# necessary, but given the diversity of platforms, best to be safe.
|
||||
ans.flush()
|
||||
os.fsync(ans.fileno())
|
||||
|
||||
cl = fname.lower()
|
||||
try:
|
||||
candidates = [c for c in os.listdir(cpath) if c.lower() == cl]
|
||||
except EnvironmentError:
|
||||
# The containing directory, somehow disappeared?
|
||||
candidates = []
|
||||
if len(candidates) == 1:
|
||||
fpath = os.path.join(cpath, candidates[0])
|
||||
else:
|
||||
# We are on a case sensitive filesystem
|
||||
fpath = os.path.join(cpath, fname)
|
||||
return ans, fpath
|
||||
|
||||
|
||||
def windows_get_fileid(path):
|
||||
''' The fileid uniquely identifies actual file contents (it is the same for
|
||||
all hardlinks to a file). Similar to inode number on linux. '''
|
||||
import win32file
|
||||
from pywintypes import error
|
||||
if isbytestring(path):
|
||||
path = path.decode(filesystem_encoding)
|
||||
try:
|
||||
h = win32file.CreateFileW(path, 0, 0, None, win32file.OPEN_EXISTING,
|
||||
win32file.FILE_FLAG_BACKUP_SEMANTICS, 0)
|
||||
try:
|
||||
data = win32file.GetFileInformationByHandle(h)
|
||||
finally:
|
||||
win32file.CloseHandle(h)
|
||||
except (error, EnvironmentError):
|
||||
return None
|
||||
return data[4], data[8], data[9]
|
||||
|
||||
|
||||
def samefile_windows(src, dst):
|
||||
samestring = (os.path.normcase(os.path.abspath(src)) ==
|
||||
os.path.normcase(os.path.abspath(dst)))
|
||||
if samestring:
|
||||
return True
|
||||
|
||||
a, b = windows_get_fileid(src), windows_get_fileid(dst)
|
||||
if a is None and b is None:
|
||||
return False
|
||||
return a == b
|
||||
|
||||
|
||||
def samefile(src, dst):
|
||||
'''
|
||||
Check if two paths point to the same actual file on the filesystem. Handles
|
||||
symlinks, case insensitivity, mapped drives, etc.
|
||||
|
||||
Returns True iff both paths exist and point to the same file on disk.
|
||||
|
||||
Note: On windows will return True if the two string are identical (up to
|
||||
case) even if the file does not exist. This is because I have no way of
|
||||
knowing how reliable the GetFileInformationByHandle method is.
|
||||
'''
|
||||
if iswindows:
|
||||
return samefile_windows(src, dst)
|
||||
|
||||
if hasattr(os.path, 'samefile'):
|
||||
# Unix
|
||||
try:
|
||||
return os.path.samefile(src, dst)
|
||||
except EnvironmentError:
|
||||
return False
|
||||
|
||||
# All other platforms: check for same pathname.
|
||||
samestring = (os.path.normcase(os.path.abspath(src)) ==
|
||||
os.path.normcase(os.path.abspath(dst)))
|
||||
return samestring
|
||||
|
||||
|
||||
def windows_get_size(path):
|
||||
''' On windows file sizes are only accurately stored in the actual file,
|
||||
not in the directory entry (which could be out of date). So we open the
|
||||
file, and get the actual size. '''
|
||||
import win32file
|
||||
if isbytestring(path):
|
||||
path = path.decode(filesystem_encoding)
|
||||
h = win32file.CreateFileW(
|
||||
path, 0, win32file.FILE_SHARE_READ | win32file.FILE_SHARE_WRITE | win32file.FILE_SHARE_DELETE,
|
||||
None, win32file.OPEN_EXISTING, 0, None)
|
||||
try:
|
||||
return win32file.GetFileSize(h)
|
||||
finally:
|
||||
win32file.CloseHandle(h)
|
||||
|
||||
|
||||
def windows_hardlink(src, dest):
|
||||
import win32file, pywintypes
|
||||
try:
|
||||
win32file.CreateHardLink(dest, src)
|
||||
except pywintypes.error as e:
|
||||
msg = 'Creating hardlink from %s to %s failed: %%s' % (src, dest)
|
||||
raise OSError(msg % e)
|
||||
src_size = os.path.getsize(src)
|
||||
# We open and close dest, to ensure its directory entry is updated
|
||||
# see http://blogs.msdn.com/b/oldnewthing/archive/2011/12/26/10251026.aspx
|
||||
for i in range(10):
|
||||
# If we are on a network filesystem, we have to wait for some indeterminate time, since
|
||||
# network file systems are the best thing since sliced bread
|
||||
try:
|
||||
if windows_get_size(dest) == src_size:
|
||||
return
|
||||
except EnvironmentError:
|
||||
pass
|
||||
time.sleep(0.3)
|
||||
|
||||
sz = windows_get_size(dest)
|
||||
if sz != src_size:
|
||||
msg = 'Creating hardlink from %s to %s failed: %%s' % (src, dest)
|
||||
raise OSError(msg % ('hardlink size: %d not the same as source size' % sz))
|
||||
|
||||
|
||||
def windows_fast_hardlink(src, dest):
|
||||
import win32file, pywintypes
|
||||
try:
|
||||
win32file.CreateHardLink(dest, src)
|
||||
except pywintypes.error as e:
|
||||
msg = 'Creating hardlink from %s to %s failed: %%s' % (src, dest)
|
||||
raise OSError(msg % e)
|
||||
ssz, dsz = windows_get_size(src), windows_get_size(dest)
|
||||
if ssz != dsz:
|
||||
msg = 'Creating hardlink from %s to %s failed: %%s' % (src, dest)
|
||||
raise OSError(msg % ('hardlink size: %d not the same as source size: %s' % (dsz, ssz)))
|
||||
|
||||
|
||||
def windows_nlinks(path):
|
||||
import win32file
|
||||
dwFlagsAndAttributes = win32file.FILE_FLAG_BACKUP_SEMANTICS if os.path.isdir(path) else 0
|
||||
if isbytestring(path):
|
||||
path = path.decode(filesystem_encoding)
|
||||
handle = win32file.CreateFileW(path, win32file.GENERIC_READ, win32file.FILE_SHARE_READ, None, win32file.OPEN_EXISTING, dwFlagsAndAttributes, None)
|
||||
try:
|
||||
return win32file.GetFileInformationByHandle(handle)[7]
|
||||
finally:
|
||||
handle.Close()
|
||||
|
||||
|
||||
class WindowsAtomicFolderMove(object):
|
||||
|
||||
'''
|
||||
Move all the files inside a specified folder in an atomic fashion,
|
||||
preventing any other process from locking a file while the operation is
|
||||
incomplete. Raises an IOError if another process has locked a file before
|
||||
the operation starts. Note that this only operates on the files in the
|
||||
folder, not any sub-folders.
|
||||
'''
|
||||
|
||||
def __init__(self, path):
|
||||
self.handle_map = {}
|
||||
|
||||
import win32file, winerror
|
||||
from pywintypes import error
|
||||
from collections import defaultdict
|
||||
|
||||
if isbytestring(path):
|
||||
path = path.decode(filesystem_encoding)
|
||||
|
||||
if not os.path.exists(path):
|
||||
return
|
||||
|
||||
names = os.listdir(path)
|
||||
name_to_fileid = {x:windows_get_fileid(os.path.join(path, x)) for x in names}
|
||||
fileid_to_names = defaultdict(set)
|
||||
for name, fileid in iteritems(name_to_fileid):
|
||||
fileid_to_names[fileid].add(name)
|
||||
|
||||
for x in names:
|
||||
f = os.path.normcase(os.path.abspath(os.path.join(path, x)))
|
||||
if not os.path.isfile(f):
|
||||
continue
|
||||
try:
|
||||
# Ensure the file is not read-only
|
||||
win32file.SetFileAttributes(f, win32file.FILE_ATTRIBUTE_NORMAL)
|
||||
except:
|
||||
pass
|
||||
|
||||
try:
|
||||
h = win32file.CreateFileW(f, win32file.GENERIC_READ,
|
||||
win32file.FILE_SHARE_DELETE, None,
|
||||
win32file.OPEN_EXISTING, win32file.FILE_FLAG_SEQUENTIAL_SCAN, 0)
|
||||
except error as e:
|
||||
if getattr(e, 'winerror', 0) == winerror.ERROR_SHARING_VIOLATION:
|
||||
# The file could be a hardlink to an already opened file,
|
||||
# in which case we use the same handle for both files
|
||||
fileid = name_to_fileid[x]
|
||||
found = False
|
||||
if fileid is not None:
|
||||
for other in fileid_to_names[fileid]:
|
||||
other = os.path.normcase(os.path.abspath(os.path.join(path, other)))
|
||||
if other in self.handle_map:
|
||||
self.handle_map[f] = self.handle_map[other]
|
||||
found = True
|
||||
break
|
||||
if found:
|
||||
continue
|
||||
|
||||
self.close_handles()
|
||||
if getattr(e, 'winerror', 0) == winerror.ERROR_SHARING_VIOLATION:
|
||||
err = IOError(errno.EACCES,
|
||||
_('File is open in another process'))
|
||||
err.filename = f
|
||||
raise err
|
||||
prints('CreateFile failed for: %r' % f)
|
||||
raise
|
||||
except:
|
||||
self.close_handles()
|
||||
prints('CreateFile failed for: %r' % f)
|
||||
raise
|
||||
self.handle_map[f] = h
|
||||
|
||||
def copy_path_to(self, path, dest):
|
||||
import win32file
|
||||
handle = None
|
||||
for p, h in iteritems(self.handle_map):
|
||||
if samefile_windows(path, p):
|
||||
handle = h
|
||||
break
|
||||
if handle is None:
|
||||
if os.path.exists(path):
|
||||
raise ValueError('The file %r did not exist when this move'
|
||||
' operation was started'%path)
|
||||
else:
|
||||
raise ValueError('The file %r does not exist'%path)
|
||||
try:
|
||||
windows_hardlink(path, dest)
|
||||
return
|
||||
except:
|
||||
pass
|
||||
|
||||
win32file.SetFilePointer(handle, 0, win32file.FILE_BEGIN)
|
||||
with lopen(dest, 'wb') as f:
|
||||
while True:
|
||||
hr, raw = win32file.ReadFile(handle, 1024*1024)
|
||||
if hr != 0:
|
||||
raise IOError(hr, 'Error while reading from %r'%path)
|
||||
if not raw:
|
||||
break
|
||||
f.write(raw)
|
||||
|
||||
def release_file(self, path):
|
||||
' Release the lock on the file pointed to by path. Will also release the lock on any hardlinks to path '
|
||||
key = None
|
||||
for p, h in iteritems(self.handle_map):
|
||||
if samefile_windows(path, p):
|
||||
key = (p, h)
|
||||
break
|
||||
if key is not None:
|
||||
import win32file
|
||||
win32file.CloseHandle(key[1])
|
||||
remove = [f for f, h in iteritems(self.handle_map) if h is key[1]]
|
||||
for x in remove:
|
||||
self.handle_map.pop(x)
|
||||
|
||||
def close_handles(self):
|
||||
import win32file
|
||||
for h in itervalues(self.handle_map):
|
||||
win32file.CloseHandle(h)
|
||||
self.handle_map = {}
|
||||
|
||||
def delete_originals(self):
|
||||
import win32file
|
||||
for path in self.handle_map:
|
||||
win32file.DeleteFile(path)
|
||||
self.close_handles()
|
||||
|
||||
|
||||
def hardlink_file(src, dest):
|
||||
if iswindows:
|
||||
windows_hardlink(src, dest)
|
||||
return
|
||||
os.link(src, dest)
|
||||
|
||||
|
||||
def nlinks_file(path):
|
||||
' Return number of hardlinks to the file '
|
||||
if iswindows:
|
||||
return windows_nlinks(path)
|
||||
return os.stat(path).st_nlink
|
||||
|
||||
|
||||
if iswindows:
|
||||
def rename_file(a, b):
|
||||
move_file = plugins['winutil'][0].move_file
|
||||
if isinstance(a, bytes):
|
||||
a = a.decode('mbcs')
|
||||
if isinstance(b, bytes):
|
||||
b = b.decode('mbcs')
|
||||
move_file(a, b)
|
||||
|
||||
|
||||
def atomic_rename(oldpath, newpath):
|
||||
'''Replace the file newpath with the file oldpath. Can fail if the files
|
||||
are on different volumes. If succeeds, guaranteed to be atomic. newpath may
|
||||
or may not exist. If it exists, it is replaced. '''
|
||||
if iswindows:
|
||||
for i in range(10):
|
||||
try:
|
||||
rename_file(oldpath, newpath)
|
||||
break
|
||||
except Exception:
|
||||
if i > 8:
|
||||
raise
|
||||
# Try the rename repeatedly in case something like a virus
|
||||
# scanner has opened one of the files (I love windows)
|
||||
time.sleep(1)
|
||||
else:
|
||||
os.rename(oldpath, newpath)
|
||||
|
||||
|
||||
def remove_dir_if_empty(path, ignore_metadata_caches=False):
|
||||
''' Remove a directory if it is empty or contains only the folder metadata
|
||||
caches from different OSes. To delete the folder if it contains only
|
||||
metadata caches, set ignore_metadata_caches to True.'''
|
||||
try:
|
||||
os.rmdir(path)
|
||||
except OSError as e:
|
||||
if e.errno == errno.ENOTEMPTY or len(os.listdir(path)) > 0:
|
||||
# Some linux systems appear to raise an EPERM instead of an
|
||||
# ENOTEMPTY, see https://bugs.launchpad.net/bugs/1240797
|
||||
if ignore_metadata_caches:
|
||||
try:
|
||||
found = False
|
||||
for x in os.listdir(path):
|
||||
if x.lower() in {'.ds_store', 'thumbs.db'}:
|
||||
found = True
|
||||
x = os.path.join(path, x)
|
||||
if os.path.isdir(x):
|
||||
import shutil
|
||||
shutil.rmtree(x)
|
||||
else:
|
||||
os.remove(x)
|
||||
except Exception: # We could get an error, if, for example, windows has locked Thumbs.db
|
||||
found = False
|
||||
if found:
|
||||
remove_dir_if_empty(path)
|
||||
return
|
||||
raise
|
||||
|
||||
|
||||
expanduser = os.path.expanduser
|
||||
|
||||
|
||||
def format_permissions(st_mode):
|
||||
import stat
|
||||
for func, letter in (x.split(':') for x in 'REG:- DIR:d BLK:b CHR:c FIFO:p LNK:l SOCK:s'.split()):
|
||||
if getattr(stat, 'S_IS' + func)(st_mode):
|
||||
break
|
||||
else:
|
||||
letter = '?'
|
||||
rwx = ('---', '--x', '-w-', '-wx', 'r--', 'r-x', 'rw-', 'rwx')
|
||||
ans = [letter] + list(rwx[(st_mode >> 6) & 7]) + list(rwx[(st_mode >> 3) & 7]) + list(rwx[(st_mode & 7)])
|
||||
if st_mode & stat.S_ISUID:
|
||||
ans[3] = 's' if (st_mode & stat.S_IXUSR) else 'S'
|
||||
if st_mode & stat.S_ISGID:
|
||||
ans[6] = 's' if (st_mode & stat.S_IXGRP) else 'l'
|
||||
if st_mode & stat.S_ISVTX:
|
||||
ans[9] = 't' if (st_mode & stat.S_IXUSR) else 'T'
|
||||
return ''.join(ans)
|
||||
|
||||
|
||||
def copyfile(src, dest):
|
||||
shutil.copyfile(src, dest)
|
||||
try:
|
||||
shutil.copystat(src, dest)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
|
||||
def get_hardlink_function(src, dest):
|
||||
if iswindows:
|
||||
import win32file, win32api
|
||||
colon = b':' if isinstance(dest, bytes) else ':'
|
||||
root = dest[0] + colon
|
||||
try:
|
||||
is_suitable = win32file.GetDriveType(root) not in (win32file.DRIVE_REMOTE, win32file.DRIVE_CDROM)
|
||||
# See https://msdn.microsoft.com/en-us/library/windows/desktop/aa364993(v=vs.85).aspx
|
||||
supports_hard_links = win32api.GetVolumeInformation(root + os.sep)[3] & 0x00400000
|
||||
except Exception:
|
||||
supports_hard_links = is_suitable = False
|
||||
hardlink = windows_fast_hardlink if is_suitable and supports_hard_links and src[0].lower() == dest[0].lower() else None
|
||||
else:
|
||||
hardlink = os.link
|
||||
return hardlink
|
||||
|
||||
|
||||
def copyfile_using_links(path, dest, dest_is_dir=True, filecopyfunc=copyfile):
|
||||
path, dest = os.path.abspath(path), os.path.abspath(dest)
|
||||
if dest_is_dir:
|
||||
dest = os.path.join(dest, os.path.basename(path))
|
||||
hardlink = get_hardlink_function(path, dest)
|
||||
try:
|
||||
hardlink(path, dest)
|
||||
except Exception:
|
||||
filecopyfunc(path, dest)
|
||||
|
||||
|
||||
def copytree_using_links(path, dest, dest_is_parent=True, filecopyfunc=copyfile):
|
||||
path, dest = os.path.abspath(path), os.path.abspath(dest)
|
||||
if dest_is_parent:
|
||||
dest = os.path.join(dest, os.path.basename(path))
|
||||
hardlink = get_hardlink_function(path, dest)
|
||||
try:
|
||||
os.makedirs(dest)
|
||||
except EnvironmentError as e:
|
||||
if e.errno != errno.EEXIST:
|
||||
raise
|
||||
for dirpath, dirnames, filenames in os.walk(path):
|
||||
base = os.path.relpath(dirpath, path)
|
||||
dest_base = os.path.join(dest, base)
|
||||
for dname in dirnames:
|
||||
try:
|
||||
os.mkdir(os.path.join(dest_base, dname))
|
||||
except EnvironmentError as e:
|
||||
if e.errno != errno.EEXIST:
|
||||
raise
|
||||
for fname in filenames:
|
||||
src, df = os.path.join(dirpath, fname), os.path.join(dest_base, fname)
|
||||
try:
|
||||
hardlink(src, df)
|
||||
except Exception:
|
||||
filecopyfunc(src, df)
|
||||
|
||||
|
||||
if not ispy3 and not iswindows:
|
||||
# On POSIX in python2 if you pass a unicode path to rmtree
|
||||
# it tries to decode all filenames it encounters while walking
|
||||
# the tree which leads to unicode errors on Linux where there
|
||||
# can be non-decodeable filenames.
|
||||
def rmtree(x, **kw):
|
||||
if not isinstance(x, bytes):
|
||||
x = x.encode('utf-8')
|
||||
return shutil.rmtree(x, **kw)
|
||||
else:
|
||||
rmtree = shutil.rmtree
|
||||
7
ebook_converter/utils/fonts/__init__.py
Normal file
7
ebook_converter/utils/fonts/__init__.py
Normal file
@@ -0,0 +1,7 @@
|
||||
#!/usr/bin/env python2
|
||||
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
|
||||
from __future__ import absolute_import, division, print_function, unicode_literals
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>'
|
||||
__docformat__ = 'restructuredtext en'
|
||||
122
ebook_converter/utils/fonts/metadata.py
Normal file
122
ebook_converter/utils/fonts/metadata.py
Normal file
@@ -0,0 +1,122 @@
|
||||
#!/usr/bin/env python2
|
||||
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:fdm=marker:ai
|
||||
from __future__ import absolute_import, division, print_function, unicode_literals
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2012, Kovid Goyal <kovid at kovidgoyal.net>'
|
||||
__docformat__ = 'restructuredtext en'
|
||||
|
||||
from io import BytesIO
|
||||
from struct import calcsize, unpack, unpack_from
|
||||
from collections import namedtuple
|
||||
|
||||
from calibre.utils.fonts.utils import get_font_names2, get_font_characteristics
|
||||
from polyglot.builtins import range, unicode_type
|
||||
|
||||
|
||||
class UnsupportedFont(ValueError):
|
||||
pass
|
||||
|
||||
|
||||
FontCharacteristics = namedtuple('FontCharacteristics',
|
||||
'weight, is_italic, is_bold, is_regular, fs_type, panose, width, is_oblique, is_wws, os2_version')
|
||||
FontNames = namedtuple('FontNames',
|
||||
'family_name, subfamily_name, full_name, preferred_family_name, preferred_subfamily_name, wws_family_name, wws_subfamily_name')
|
||||
|
||||
|
||||
class FontMetadata(object):
|
||||
|
||||
def __init__(self, bytes_or_stream):
|
||||
if not hasattr(bytes_or_stream, 'read'):
|
||||
bytes_or_stream = BytesIO(bytes_or_stream)
|
||||
f = bytes_or_stream
|
||||
f.seek(0)
|
||||
header = f.read(4)
|
||||
if header not in {b'\x00\x01\x00\x00', b'OTTO'}:
|
||||
raise UnsupportedFont('Not a supported sfnt variant')
|
||||
|
||||
self.is_otf = header == b'OTTO'
|
||||
self.read_table_metadata(f)
|
||||
self.read_names(f)
|
||||
self.read_characteristics(f)
|
||||
|
||||
f.seek(0)
|
||||
self.font_family = self.names.family_name
|
||||
wt = self.characteristics.weight
|
||||
if wt == 400:
|
||||
wt = 'normal'
|
||||
elif wt == 700:
|
||||
wt = 'bold'
|
||||
else:
|
||||
wt = unicode_type(wt)
|
||||
self.font_weight = wt
|
||||
|
||||
self.font_stretch = ('ultra-condensed', 'extra-condensed',
|
||||
'condensed', 'semi-condensed', 'normal', 'semi-expanded',
|
||||
'expanded', 'extra-expanded', 'ultra-expanded')[
|
||||
self.characteristics.width-1]
|
||||
if self.characteristics.is_oblique:
|
||||
self.font_style = 'oblique'
|
||||
elif self.characteristics.is_italic:
|
||||
self.font_style = 'italic'
|
||||
else:
|
||||
self.font_style = 'normal'
|
||||
|
||||
def read_table_metadata(self, f):
|
||||
f.seek(4)
|
||||
num_tables = unpack(b'>H', f.read(2))[0]
|
||||
# Start of table record entries
|
||||
f.seek(4 + 4*2)
|
||||
table_record = b'>4s3L'
|
||||
sz = calcsize(table_record)
|
||||
self.tables = {}
|
||||
block = f.read(sz * num_tables)
|
||||
for i in range(num_tables):
|
||||
table_tag, table_checksum, table_offset, table_length = \
|
||||
unpack_from(table_record, block, i*sz)
|
||||
self.tables[table_tag.lower()] = (table_offset, table_length,
|
||||
table_checksum)
|
||||
|
||||
def read_names(self, f):
|
||||
if b'name' not in self.tables:
|
||||
raise UnsupportedFont('This font has no name table')
|
||||
toff, tlen = self.tables[b'name'][:2]
|
||||
f.seek(toff)
|
||||
table = f.read(tlen)
|
||||
if len(table) != tlen:
|
||||
raise UnsupportedFont('This font has a name table of incorrect length')
|
||||
vals = get_font_names2(table, raw_is_table=True)
|
||||
self.names = FontNames(*vals)
|
||||
|
||||
def read_characteristics(self, f):
|
||||
if b'os/2' not in self.tables:
|
||||
raise UnsupportedFont('This font has no OS/2 table')
|
||||
toff, tlen = self.tables[b'os/2'][:2]
|
||||
f.seek(toff)
|
||||
table = f.read(tlen)
|
||||
if len(table) != tlen:
|
||||
raise UnsupportedFont('This font has an OS/2 table of incorrect length')
|
||||
vals = get_font_characteristics(table, raw_is_table=True)
|
||||
self.characteristics = FontCharacteristics(*vals)
|
||||
|
||||
def to_dict(self):
|
||||
ans = {
|
||||
'is_otf':self.is_otf,
|
||||
'font-family':self.font_family,
|
||||
'font-weight':self.font_weight,
|
||||
'font-style':self.font_style,
|
||||
'font-stretch':self.font_stretch
|
||||
}
|
||||
for f in self.names._fields:
|
||||
ans[f] = getattr(self.names, f)
|
||||
for f in self.characteristics._fields:
|
||||
ans[f] = getattr(self.characteristics, f)
|
||||
return ans
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
import sys
|
||||
with open(sys.argv[-1], 'rb') as f:
|
||||
fm = FontMetadata(f)
|
||||
import pprint
|
||||
pprint.pprint(fm.to_dict())
|
||||
412
ebook_converter/utils/fonts/scanner.py
Normal file
412
ebook_converter/utils/fonts/scanner.py
Normal file
@@ -0,0 +1,412 @@
|
||||
#!/usr/bin/env python2
|
||||
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:fdm=marker:ai
|
||||
from __future__ import absolute_import, division, print_function, unicode_literals
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2012, Kovid Goyal <kovid at kovidgoyal.net>'
|
||||
__docformat__ = 'restructuredtext en'
|
||||
|
||||
import os
|
||||
from collections import defaultdict
|
||||
from threading import Thread
|
||||
|
||||
from calibre import walk, prints, as_unicode
|
||||
from calibre.constants import (config_dir, iswindows, isosx, plugins, DEBUG,
|
||||
isworker, filesystem_encoding)
|
||||
from calibre.utils.fonts.metadata import FontMetadata, UnsupportedFont
|
||||
from calibre.utils.icu import sort_key
|
||||
from polyglot.builtins import itervalues, unicode_type, filter
|
||||
|
||||
|
||||
class NoFonts(ValueError):
|
||||
pass
|
||||
|
||||
# Font dirs {{{
|
||||
|
||||
|
||||
def default_font_dirs():
|
||||
return [
|
||||
'/opt/share/fonts',
|
||||
'/usr/share/fonts',
|
||||
'/usr/local/share/fonts',
|
||||
os.path.expanduser('~/.local/share/fonts'),
|
||||
os.path.expanduser('~/.fonts')
|
||||
]
|
||||
|
||||
|
||||
def fc_list():
|
||||
import ctypes
|
||||
from ctypes.util import find_library
|
||||
|
||||
lib = find_library('fontconfig')
|
||||
if lib is None:
|
||||
return default_font_dirs()
|
||||
try:
|
||||
lib = ctypes.CDLL(lib)
|
||||
except:
|
||||
return default_font_dirs()
|
||||
|
||||
prototype = ctypes.CFUNCTYPE(ctypes.c_void_p, ctypes.c_void_p)
|
||||
try:
|
||||
get_font_dirs = prototype(('FcConfigGetFontDirs', lib))
|
||||
except (AttributeError):
|
||||
return default_font_dirs()
|
||||
prototype = ctypes.CFUNCTYPE(ctypes.c_char_p, ctypes.c_void_p)
|
||||
try:
|
||||
next_dir = prototype(('FcStrListNext', lib))
|
||||
except (AttributeError):
|
||||
return default_font_dirs()
|
||||
|
||||
prototype = ctypes.CFUNCTYPE(None, ctypes.c_void_p)
|
||||
try:
|
||||
end = prototype(('FcStrListDone', lib))
|
||||
except (AttributeError):
|
||||
return default_font_dirs()
|
||||
|
||||
str_list = get_font_dirs(ctypes.c_void_p())
|
||||
if not str_list:
|
||||
return default_font_dirs()
|
||||
|
||||
ans = []
|
||||
while True:
|
||||
d = next_dir(str_list)
|
||||
if not d:
|
||||
break
|
||||
if d:
|
||||
try:
|
||||
ans.append(d.decode(filesystem_encoding))
|
||||
except ValueError:
|
||||
prints('Ignoring undecodeable font path: %r' % d)
|
||||
continue
|
||||
end(str_list)
|
||||
if len(ans) < 3:
|
||||
return default_font_dirs()
|
||||
|
||||
parents, visited = [], set()
|
||||
for f in ans:
|
||||
path = os.path.normpath(os.path.abspath(os.path.realpath(f)))
|
||||
if path == '/':
|
||||
continue
|
||||
head, tail = os.path.split(path)
|
||||
while head and tail:
|
||||
if head in visited:
|
||||
break
|
||||
head, tail = os.path.split(head)
|
||||
else:
|
||||
parents.append(path)
|
||||
visited.add(path)
|
||||
return parents
|
||||
|
||||
|
||||
def font_dirs():
|
||||
if iswindows:
|
||||
winutil, err = plugins['winutil']
|
||||
if err:
|
||||
raise RuntimeError('Failed to load winutil: %s'%err)
|
||||
try:
|
||||
return [winutil.special_folder_path(winutil.CSIDL_FONTS)]
|
||||
except ValueError:
|
||||
return [r'C:\Windows\Fonts']
|
||||
if isosx:
|
||||
return [
|
||||
'/Library/Fonts',
|
||||
'/System/Library/Fonts',
|
||||
'/usr/share/fonts',
|
||||
'/var/root/Library/Fonts',
|
||||
os.path.expanduser('~/.fonts'),
|
||||
os.path.expanduser('~/Library/Fonts'),
|
||||
]
|
||||
return fc_list()
|
||||
# }}}
|
||||
|
||||
# Build font family maps {{{
|
||||
|
||||
|
||||
def font_priority(font):
|
||||
'''
|
||||
Try to ensure that the "Regular" face is the first font for a given
|
||||
family.
|
||||
'''
|
||||
style_normal = font['font-style'] == 'normal'
|
||||
width_normal = font['font-stretch'] == 'normal'
|
||||
weight_normal = font['font-weight'] == 'normal'
|
||||
num_normal = sum(filter(None, (style_normal, width_normal,
|
||||
weight_normal)))
|
||||
subfamily_name = (font['wws_subfamily_name'] or
|
||||
font['preferred_subfamily_name'] or font['subfamily_name'])
|
||||
if num_normal == 3 and subfamily_name == 'Regular':
|
||||
return 0
|
||||
if num_normal == 3:
|
||||
return 1
|
||||
if subfamily_name == 'Regular':
|
||||
return 2
|
||||
return 3 + (3 - num_normal)
|
||||
|
||||
|
||||
def path_significance(path, folders):
|
||||
path = os.path.normcase(os.path.abspath(path))
|
||||
for i, q in enumerate(folders):
|
||||
if path.startswith(q):
|
||||
return i
|
||||
return -1
|
||||
|
||||
|
||||
def build_families(cached_fonts, folders, family_attr='font-family'):
|
||||
families = defaultdict(list)
|
||||
for f in itervalues(cached_fonts):
|
||||
if not f:
|
||||
continue
|
||||
lf = icu_lower(f.get(family_attr) or '')
|
||||
if lf:
|
||||
families[lf].append(f)
|
||||
|
||||
for fonts in itervalues(families):
|
||||
# Look for duplicate font files and choose the copy that is from a
|
||||
# more significant font directory (prefer user directories over
|
||||
# system directories).
|
||||
fmap = {}
|
||||
remove = []
|
||||
for f in fonts:
|
||||
fingerprint = (icu_lower(f['font-family']), f['font-weight'],
|
||||
f['font-stretch'], f['font-style'])
|
||||
if fingerprint in fmap:
|
||||
opath = fmap[fingerprint]['path']
|
||||
npath = f['path']
|
||||
if path_significance(npath, folders) >= path_significance(opath, folders):
|
||||
remove.append(fmap[fingerprint])
|
||||
fmap[fingerprint] = f
|
||||
else:
|
||||
remove.append(f)
|
||||
else:
|
||||
fmap[fingerprint] = f
|
||||
for font in remove:
|
||||
fonts.remove(font)
|
||||
fonts.sort(key=font_priority)
|
||||
|
||||
font_family_map = dict.copy(families)
|
||||
font_families = tuple(sorted((f[0]['font-family'] for f in
|
||||
itervalues(font_family_map)), key=sort_key))
|
||||
return font_family_map, font_families
|
||||
# }}}
|
||||
|
||||
|
||||
class FontScanner(Thread):
|
||||
|
||||
CACHE_VERSION = 2
|
||||
|
||||
def __init__(self, folders=[], allowed_extensions={'ttf', 'otf'}):
|
||||
Thread.__init__(self)
|
||||
self.folders = folders + font_dirs() + [os.path.join(config_dir, 'fonts'),
|
||||
P('fonts/liberation')]
|
||||
self.folders = [os.path.normcase(os.path.abspath(f)) for f in
|
||||
self.folders]
|
||||
self.font_families = ()
|
||||
self.allowed_extensions = allowed_extensions
|
||||
|
||||
# API {{{
|
||||
def find_font_families(self):
|
||||
self.join()
|
||||
return self.font_families
|
||||
|
||||
def fonts_for_family(self, family):
|
||||
'''
|
||||
Return a list of the faces belonging to the specified family. The first
|
||||
face is the "Regular" face of family. Each face is a dictionary with
|
||||
many keys, the most important of which are: path, font-family,
|
||||
font-weight, font-style, font-stretch. The font-* properties follow the
|
||||
CSS 3 Fonts specification.
|
||||
'''
|
||||
self.join()
|
||||
try:
|
||||
return self.font_family_map[icu_lower(family)]
|
||||
except KeyError:
|
||||
raise NoFonts('No fonts found for the family: %r'%family)
|
||||
|
||||
def legacy_fonts_for_family(self, family):
|
||||
'''
|
||||
Return a simple set of regular, bold, italic and bold-italic faces for
|
||||
the specified family. Returns a dictionary with each element being a
|
||||
2-tuple of (path to font, full font name) and the keys being: normal,
|
||||
bold, italic, bi.
|
||||
'''
|
||||
ans = {}
|
||||
try:
|
||||
faces = self.fonts_for_family(family)
|
||||
except NoFonts:
|
||||
return ans
|
||||
for i, face in enumerate(faces):
|
||||
if i == 0:
|
||||
key = 'normal'
|
||||
elif face['font-style'] in {'italic', 'oblique'}:
|
||||
key = 'bi' if face['font-weight'] == 'bold' else 'italic'
|
||||
elif face['font-weight'] == 'bold':
|
||||
key = 'bold'
|
||||
else:
|
||||
continue
|
||||
ans[key] = (face['path'], face['full_name'])
|
||||
return ans
|
||||
|
||||
def get_font_data(self, font_or_path):
|
||||
path = font_or_path
|
||||
if isinstance(font_or_path, dict):
|
||||
path = font_or_path['path']
|
||||
with lopen(path, 'rb') as f:
|
||||
return f.read()
|
||||
|
||||
def find_font_for_text(self, text, allowed_families={'serif', 'sans-serif'},
|
||||
preferred_families=('serif', 'sans-serif', 'monospace', 'cursive', 'fantasy')):
|
||||
'''
|
||||
Find a font on the system capable of rendering the given text.
|
||||
|
||||
Returns a font family (as given by fonts_for_family()) that has a
|
||||
"normal" font and that can render the supplied text. If no such font
|
||||
exists, returns None.
|
||||
|
||||
:return: (family name, faces) or None, None
|
||||
'''
|
||||
from calibre.utils.fonts.utils import (supports_text,
|
||||
panose_to_css_generic_family, get_printable_characters)
|
||||
if not isinstance(text, unicode_type):
|
||||
raise TypeError(u'%r is not unicode'%text)
|
||||
text = get_printable_characters(text)
|
||||
found = {}
|
||||
|
||||
def filter_faces(font):
|
||||
try:
|
||||
raw = self.get_font_data(font)
|
||||
return supports_text(raw, text)
|
||||
except:
|
||||
pass
|
||||
return False
|
||||
|
||||
for family in self.find_font_families():
|
||||
faces = list(filter(filter_faces, self.fonts_for_family(family)))
|
||||
if not faces:
|
||||
continue
|
||||
generic_family = panose_to_css_generic_family(faces[0]['panose'])
|
||||
if generic_family in allowed_families or generic_family == preferred_families[0]:
|
||||
return (family, faces)
|
||||
elif generic_family not in found:
|
||||
found[generic_family] = (family, faces)
|
||||
|
||||
for f in preferred_families:
|
||||
if f in found:
|
||||
return found[f]
|
||||
return None, None
|
||||
# }}}
|
||||
|
||||
def reload_cache(self):
|
||||
if not hasattr(self, 'cache'):
|
||||
from calibre.utils.config import JSONConfig
|
||||
self.cache = JSONConfig('fonts/scanner_cache')
|
||||
else:
|
||||
self.cache.refresh()
|
||||
if self.cache.get('version', None) != self.CACHE_VERSION:
|
||||
self.cache.clear()
|
||||
self.cached_fonts = self.cache.get('fonts', {})
|
||||
|
||||
def run(self):
|
||||
self.do_scan()
|
||||
|
||||
def do_scan(self):
|
||||
self.reload_cache()
|
||||
|
||||
if isworker:
|
||||
# Dont scan font files in worker processes, use whatever is
|
||||
# cached. Font files typically dont change frequently enough to
|
||||
# justify a rescan in a worker process.
|
||||
self.build_families()
|
||||
return
|
||||
|
||||
cached_fonts = self.cached_fonts.copy()
|
||||
self.cached_fonts.clear()
|
||||
for folder in self.folders:
|
||||
if not os.path.isdir(folder):
|
||||
continue
|
||||
try:
|
||||
files = tuple(walk(folder))
|
||||
except EnvironmentError as e:
|
||||
if DEBUG:
|
||||
prints('Failed to walk font folder:', folder,
|
||||
as_unicode(e))
|
||||
continue
|
||||
for candidate in files:
|
||||
if (candidate.rpartition('.')[-1].lower() not in self.allowed_extensions or not os.path.isfile(candidate)):
|
||||
continue
|
||||
candidate = os.path.normcase(os.path.abspath(candidate))
|
||||
try:
|
||||
s = os.stat(candidate)
|
||||
except EnvironmentError:
|
||||
continue
|
||||
fileid = '{0}||{1}:{2}'.format(candidate, s.st_size, s.st_mtime)
|
||||
if fileid in cached_fonts:
|
||||
# Use previously cached metadata, since the file size and
|
||||
# last modified timestamp have not changed.
|
||||
self.cached_fonts[fileid] = cached_fonts[fileid]
|
||||
continue
|
||||
try:
|
||||
self.read_font_metadata(candidate, fileid)
|
||||
except Exception as e:
|
||||
if DEBUG:
|
||||
prints('Failed to read metadata from font file:',
|
||||
candidate, as_unicode(e))
|
||||
continue
|
||||
|
||||
if frozenset(cached_fonts) != frozenset(self.cached_fonts):
|
||||
# Write out the cache only if some font files have changed
|
||||
self.write_cache()
|
||||
|
||||
self.build_families()
|
||||
|
||||
def build_families(self):
|
||||
self.font_family_map, self.font_families = build_families(self.cached_fonts, self.folders)
|
||||
|
||||
def write_cache(self):
|
||||
with self.cache:
|
||||
self.cache['version'] = self.CACHE_VERSION
|
||||
self.cache['fonts'] = self.cached_fonts
|
||||
|
||||
def force_rescan(self):
|
||||
self.cached_fonts = {}
|
||||
self.write_cache()
|
||||
|
||||
def read_font_metadata(self, path, fileid):
|
||||
with lopen(path, 'rb') as f:
|
||||
try:
|
||||
fm = FontMetadata(f)
|
||||
except UnsupportedFont:
|
||||
self.cached_fonts[fileid] = {}
|
||||
else:
|
||||
data = fm.to_dict()
|
||||
data['path'] = path
|
||||
self.cached_fonts[fileid] = data
|
||||
|
||||
def dump_fonts(self):
|
||||
self.join()
|
||||
for family in self.font_families:
|
||||
prints(family)
|
||||
for font in self.fonts_for_family(family):
|
||||
prints('\t%s: %s'%(font['full_name'], font['path']))
|
||||
prints(end='\t')
|
||||
for key in ('font-stretch', 'font-weight', 'font-style'):
|
||||
prints('%s: %s'%(key, font[key]), end=' ')
|
||||
prints()
|
||||
prints('\tSub-family:', font['wws_subfamily_name'] or
|
||||
font['preferred_subfamily_name'] or
|
||||
font['subfamily_name'])
|
||||
prints()
|
||||
prints()
|
||||
|
||||
|
||||
font_scanner = FontScanner()
|
||||
font_scanner.start()
|
||||
|
||||
|
||||
def force_rescan():
|
||||
font_scanner.join()
|
||||
font_scanner.force_rescan()
|
||||
font_scanner.run()
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
font_scanner.dump_fonts()
|
||||
503
ebook_converter/utils/fonts/utils.py
Normal file
503
ebook_converter/utils/fonts/utils.py
Normal file
@@ -0,0 +1,503 @@
|
||||
#!/usr/bin/env python2
|
||||
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:fdm=marker:ai
|
||||
from __future__ import absolute_import, division, print_function, unicode_literals
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2012, Kovid Goyal <kovid at kovidgoyal.net>'
|
||||
__docformat__ = 'restructuredtext en'
|
||||
|
||||
import struct
|
||||
from io import BytesIO
|
||||
from collections import defaultdict
|
||||
|
||||
from polyglot.builtins import iteritems, itervalues, unicode_type, range, as_bytes
|
||||
|
||||
|
||||
class UnsupportedFont(ValueError):
|
||||
pass
|
||||
|
||||
|
||||
def get_printable_characters(text):
|
||||
import unicodedata
|
||||
return u''.join(x for x in unicodedata.normalize('NFC', text)
|
||||
if unicodedata.category(x)[0] not in {'C', 'Z', 'M'})
|
||||
|
||||
|
||||
def is_truetype_font(raw):
|
||||
sfnt_version = raw[:4]
|
||||
return (sfnt_version in {b'\x00\x01\x00\x00', b'OTTO'}, sfnt_version)
|
||||
|
||||
|
||||
def get_tables(raw):
|
||||
num_tables = struct.unpack_from(b'>H', raw, 4)[0]
|
||||
offset = 4*3 # start of the table record entries
|
||||
for i in range(num_tables):
|
||||
table_tag, table_checksum, table_offset, table_length = struct.unpack_from(
|
||||
b'>4s3L', raw, offset)
|
||||
yield (table_tag, raw[table_offset:table_offset+table_length], offset,
|
||||
table_offset, table_checksum)
|
||||
offset += 4*4
|
||||
|
||||
|
||||
def get_table(raw, name):
|
||||
''' Get the raw table bytes for the specified table in the font '''
|
||||
name = as_bytes(name.lower())
|
||||
for table_tag, table, table_index, table_offset, table_checksum in get_tables(raw):
|
||||
if table_tag.lower() == name:
|
||||
return table, table_index, table_offset, table_checksum
|
||||
return None, None, None, None
|
||||
|
||||
|
||||
def get_font_characteristics(raw, raw_is_table=False, return_all=False):
|
||||
'''
|
||||
Return (weight, is_italic, is_bold, is_regular, fs_type, panose, width,
|
||||
is_oblique, is_wws). These
|
||||
values are taken from the OS/2 table of the font. See
|
||||
http://www.microsoft.com/typography/otspec/os2.htm for details
|
||||
'''
|
||||
if raw_is_table:
|
||||
os2_table = raw
|
||||
else:
|
||||
os2_table = get_table(raw, 'os/2')[0]
|
||||
if os2_table is None:
|
||||
raise UnsupportedFont('Not a supported font, has no OS/2 table')
|
||||
|
||||
common_fields = b'>Hh3H11h'
|
||||
(version, char_width, weight, width, fs_type, subscript_x_size,
|
||||
subscript_y_size, subscript_x_offset, subscript_y_offset,
|
||||
superscript_x_size, superscript_y_size, superscript_x_offset,
|
||||
superscript_y_offset, strikeout_size, strikeout_position,
|
||||
family_class) = struct.unpack_from(common_fields, os2_table)
|
||||
offset = struct.calcsize(common_fields)
|
||||
panose = struct.unpack_from(b'>10B', os2_table, offset)
|
||||
offset += 10
|
||||
(range1, range2, range3, range4) = struct.unpack_from(b'>4L', os2_table, offset)
|
||||
offset += struct.calcsize(b'>4L')
|
||||
vendor_id = os2_table[offset:offset+4]
|
||||
vendor_id
|
||||
offset += 4
|
||||
selection, = struct.unpack_from(b'>H', os2_table, offset)
|
||||
|
||||
is_italic = (selection & (1 << 0)) != 0
|
||||
is_bold = (selection & (1 << 5)) != 0
|
||||
is_regular = (selection & (1 << 6)) != 0
|
||||
is_wws = (selection & (1 << 8)) != 0
|
||||
is_oblique = (selection & (1 << 9)) != 0
|
||||
if return_all:
|
||||
return (version, char_width, weight, width, fs_type, subscript_x_size,
|
||||
subscript_y_size, subscript_x_offset, subscript_y_offset,
|
||||
superscript_x_size, superscript_y_size, superscript_x_offset,
|
||||
superscript_y_offset, strikeout_size, strikeout_position,
|
||||
family_class, panose, selection, is_italic, is_bold, is_regular)
|
||||
|
||||
return weight, is_italic, is_bold, is_regular, fs_type, panose, width, is_oblique, is_wws, version
|
||||
|
||||
|
||||
def panose_to_css_generic_family(panose):
|
||||
proportion = panose[3]
|
||||
if proportion == 9:
|
||||
return 'monospace'
|
||||
family_type = panose[0]
|
||||
if family_type == 3:
|
||||
return 'cursive'
|
||||
if family_type == 4:
|
||||
return 'fantasy'
|
||||
serif_style = panose[1]
|
||||
if serif_style in (11, 12, 13):
|
||||
return 'sans-serif'
|
||||
return 'serif'
|
||||
|
||||
|
||||
def decode_name_record(recs):
|
||||
'''
|
||||
Get the English names of this font. See
|
||||
http://www.microsoft.com/typography/otspec/name.htm for details.
|
||||
'''
|
||||
if not recs:
|
||||
return None
|
||||
unicode_names = {}
|
||||
windows_names = {}
|
||||
mac_names = {}
|
||||
for platform_id, encoding_id, language_id, src in recs:
|
||||
if language_id > 0x8000:
|
||||
continue
|
||||
if platform_id == 0:
|
||||
if encoding_id < 4:
|
||||
try:
|
||||
unicode_names[language_id] = src.decode('utf-16-be')
|
||||
except ValueError:
|
||||
continue
|
||||
elif platform_id == 1:
|
||||
try:
|
||||
mac_names[language_id] = src.decode('utf-8')
|
||||
except ValueError:
|
||||
continue
|
||||
elif platform_id == 2:
|
||||
codec = {0:'ascii', 1:'utf-16-be', 2:'iso-8859-1'}.get(encoding_id,
|
||||
None)
|
||||
if codec is None:
|
||||
continue
|
||||
try:
|
||||
unicode_names[language_id] = src.decode(codec)
|
||||
except ValueError:
|
||||
continue
|
||||
elif platform_id == 3:
|
||||
codec = {1:16, 10:32}.get(encoding_id, None)
|
||||
if codec is None:
|
||||
continue
|
||||
try:
|
||||
windows_names[language_id] = src.decode('utf-%d-be'%codec)
|
||||
except ValueError:
|
||||
continue
|
||||
|
||||
# First try the windows names
|
||||
# First look for the US English name
|
||||
if 1033 in windows_names:
|
||||
return windows_names[1033]
|
||||
# Look for some other english name variant
|
||||
for lang in (3081, 10249, 4105, 9225, 16393, 6153, 8201, 17417, 5129,
|
||||
13321, 18441, 7177, 11273, 2057, 12297):
|
||||
if lang in windows_names:
|
||||
return windows_names[lang]
|
||||
|
||||
# Look for Mac name
|
||||
if 0 in mac_names:
|
||||
return mac_names[0]
|
||||
|
||||
# Use unicode names
|
||||
for val in itervalues(unicode_names):
|
||||
return val
|
||||
|
||||
return None
|
||||
|
||||
|
||||
def _get_font_names(raw, raw_is_table=False):
|
||||
if raw_is_table:
|
||||
table = raw
|
||||
else:
|
||||
table = get_table(raw, 'name')[0]
|
||||
if table is None:
|
||||
raise UnsupportedFont('Not a supported font, has no name table')
|
||||
table_type, count, string_offset = struct.unpack_from(b'>3H', table)
|
||||
|
||||
records = defaultdict(list)
|
||||
|
||||
for i in range(count):
|
||||
try:
|
||||
platform_id, encoding_id, language_id, name_id, length, offset = \
|
||||
struct.unpack_from(b'>6H', table, 6+i*12)
|
||||
except struct.error:
|
||||
break
|
||||
offset += string_offset
|
||||
src = table[offset:offset+length]
|
||||
records[name_id].append((platform_id, encoding_id, language_id,
|
||||
src))
|
||||
|
||||
return records
|
||||
|
||||
|
||||
def get_font_names(raw, raw_is_table=False):
|
||||
records = _get_font_names(raw, raw_is_table)
|
||||
family_name = decode_name_record(records[1])
|
||||
subfamily_name = decode_name_record(records[2])
|
||||
full_name = decode_name_record(records[4])
|
||||
|
||||
return family_name, subfamily_name, full_name
|
||||
|
||||
|
||||
def get_font_names2(raw, raw_is_table=False):
|
||||
records = _get_font_names(raw, raw_is_table)
|
||||
|
||||
family_name = decode_name_record(records[1])
|
||||
subfamily_name = decode_name_record(records[2])
|
||||
full_name = decode_name_record(records[4])
|
||||
|
||||
preferred_family_name = decode_name_record(records[16])
|
||||
preferred_subfamily_name = decode_name_record(records[17])
|
||||
|
||||
wws_family_name = decode_name_record(records[21])
|
||||
wws_subfamily_name = decode_name_record(records[22])
|
||||
|
||||
return (family_name, subfamily_name, full_name, preferred_family_name,
|
||||
preferred_subfamily_name, wws_family_name, wws_subfamily_name)
|
||||
|
||||
|
||||
def get_all_font_names(raw, raw_is_table=False):
|
||||
records = _get_font_names(raw, raw_is_table)
|
||||
ans = {}
|
||||
|
||||
for name, num in iteritems({'family_name':1, 'subfamily_name':2, 'full_name':4,
|
||||
'preferred_family_name':16, 'preferred_subfamily_name':17,
|
||||
'wws_family_name':21, 'wws_subfamily_name':22}):
|
||||
try:
|
||||
ans[name] = decode_name_record(records[num])
|
||||
except (IndexError, KeyError, ValueError):
|
||||
continue
|
||||
if not ans[name]:
|
||||
del ans[name]
|
||||
|
||||
for platform_id, encoding_id, language_id, src in records[6]:
|
||||
if (platform_id, encoding_id, language_id) == (1, 0, 0):
|
||||
try:
|
||||
ans['postscript_name'] = src.decode('utf-8')
|
||||
break
|
||||
except ValueError:
|
||||
continue
|
||||
elif (platform_id, encoding_id, language_id) == (3, 1, 1033):
|
||||
try:
|
||||
ans['postscript_name'] = src.decode('utf-16-be')
|
||||
break
|
||||
except ValueError:
|
||||
continue
|
||||
|
||||
return ans
|
||||
|
||||
|
||||
def checksum_of_block(raw):
|
||||
extra = 4 - len(raw)%4
|
||||
raw += b'\0'*extra
|
||||
num = len(raw)//4
|
||||
return sum(struct.unpack(b'>%dI'%num, raw)) % (1<<32)
|
||||
|
||||
|
||||
def verify_checksums(raw):
|
||||
head_table = None
|
||||
for table_tag, table, table_index, table_offset, table_checksum in get_tables(raw):
|
||||
if table_tag.lower() == b'head':
|
||||
version, fontrev, checksum_adj = struct.unpack_from(b'>ffL', table)
|
||||
head_table = table
|
||||
offset = table_offset
|
||||
checksum = table_checksum
|
||||
elif checksum_of_block(table) != table_checksum:
|
||||
raise ValueError('The %r table has an incorrect checksum'%table_tag)
|
||||
|
||||
if head_table is not None:
|
||||
table = head_table
|
||||
table = table[:8] + struct.pack(b'>I', 0) + table[12:]
|
||||
raw = raw[:offset] + table + raw[offset+len(table):]
|
||||
# Check the checksum of the head table
|
||||
if checksum_of_block(table) != checksum:
|
||||
raise ValueError('Checksum of head table not correct')
|
||||
# Check the checksum of the entire font
|
||||
checksum = checksum_of_block(raw)
|
||||
q = (0xB1B0AFBA - checksum) & 0xffffffff
|
||||
if q != checksum_adj:
|
||||
raise ValueError('Checksum of entire font incorrect')
|
||||
|
||||
|
||||
def set_checksum_adjustment(f):
|
||||
offset = get_table(f.getvalue(), 'head')[2]
|
||||
offset += 8
|
||||
f.seek(offset)
|
||||
f.write(struct.pack(b'>I', 0))
|
||||
checksum = checksum_of_block(f.getvalue())
|
||||
q = (0xB1B0AFBA - checksum) & 0xffffffff
|
||||
f.seek(offset)
|
||||
f.write(struct.pack(b'>I', q))
|
||||
|
||||
|
||||
def set_table_checksum(f, name):
|
||||
table, table_index, table_offset, table_checksum = get_table(f.getvalue(), name)
|
||||
checksum = checksum_of_block(table)
|
||||
if checksum != table_checksum:
|
||||
f.seek(table_index + 4)
|
||||
f.write(struct.pack(b'>I', checksum))
|
||||
|
||||
|
||||
def remove_embed_restriction(raw):
|
||||
ok, sig = is_truetype_font(raw)
|
||||
if not ok:
|
||||
raise UnsupportedFont('Not a supported font, sfnt_version: %r'%sig)
|
||||
|
||||
table, table_index, table_offset = get_table(raw, 'os/2')[:3]
|
||||
if table is None:
|
||||
raise UnsupportedFont('Not a supported font, has no OS/2 table')
|
||||
|
||||
fs_type_offset = struct.calcsize(b'>HhHH')
|
||||
fs_type = struct.unpack_from(b'>H', table, fs_type_offset)[0]
|
||||
if fs_type == 0:
|
||||
return raw
|
||||
|
||||
f = BytesIO(raw)
|
||||
f.seek(fs_type_offset + table_offset)
|
||||
f.write(struct.pack(b'>H', 0))
|
||||
|
||||
set_table_checksum(f, 'os/2')
|
||||
set_checksum_adjustment(f)
|
||||
raw = f.getvalue()
|
||||
verify_checksums(raw)
|
||||
return raw
|
||||
|
||||
|
||||
def is_font_embeddable(raw):
|
||||
# https://www.microsoft.com/typography/otspec/os2.htm#fst
|
||||
ok, sig = is_truetype_font(raw)
|
||||
if not ok:
|
||||
raise UnsupportedFont('Not a supported font, sfnt_version: %r'%sig)
|
||||
|
||||
table, table_index, table_offset = get_table(raw, 'os/2')[:3]
|
||||
if table is None:
|
||||
raise UnsupportedFont('Not a supported font, has no OS/2 table')
|
||||
fs_type_offset = struct.calcsize(b'>HhHH')
|
||||
fs_type = struct.unpack_from(b'>H', table, fs_type_offset)[0]
|
||||
if fs_type == 0 or fs_type & 0x8:
|
||||
return True, fs_type
|
||||
if fs_type & 1:
|
||||
return False, fs_type
|
||||
if fs_type & 0x200:
|
||||
return False, fs_type
|
||||
return True, fs_type
|
||||
|
||||
|
||||
def read_bmp_prefix(table, bmp):
|
||||
length, language, segcount = struct.unpack_from(b'>3H', table, bmp+2)
|
||||
array_len = segcount //2
|
||||
offset = bmp + 7*2
|
||||
array_sz = 2*array_len
|
||||
array = b'>%dH'%array_len
|
||||
end_count = struct.unpack_from(array, table, offset)
|
||||
offset += array_sz + 2
|
||||
start_count = struct.unpack_from(array, table, offset)
|
||||
offset += array_sz
|
||||
id_delta = struct.unpack_from(array.replace(b'H', b'h'), table, offset)
|
||||
offset += array_sz
|
||||
range_offset = struct.unpack_from(array, table, offset)
|
||||
if length + bmp < offset + array_sz:
|
||||
raise ValueError('cmap subtable length is too small')
|
||||
glyph_id_len = (length + bmp - (offset + array_sz))//2
|
||||
glyph_id_map = struct.unpack_from(b'>%dH'%glyph_id_len, table, offset +
|
||||
array_sz)
|
||||
return (start_count, end_count, range_offset, id_delta, glyph_id_len,
|
||||
glyph_id_map, array_len)
|
||||
|
||||
|
||||
def get_bmp_glyph_ids(table, bmp, codes):
|
||||
(start_count, end_count, range_offset, id_delta, glyph_id_len,
|
||||
glyph_id_map, array_len) = read_bmp_prefix(table, bmp)
|
||||
|
||||
for code in codes:
|
||||
found = False
|
||||
for i, ec in enumerate(end_count):
|
||||
if ec >= code:
|
||||
sc = start_count[i]
|
||||
if sc <= code:
|
||||
found = True
|
||||
ro = range_offset[i]
|
||||
if ro == 0:
|
||||
glyph_id = id_delta[i] + code
|
||||
else:
|
||||
idx = ro//2 + (code - sc) + i - array_len
|
||||
glyph_id = glyph_id_map[idx]
|
||||
if glyph_id != 0:
|
||||
glyph_id += id_delta[i]
|
||||
yield glyph_id % 0x10000
|
||||
break
|
||||
if not found:
|
||||
yield 0
|
||||
|
||||
|
||||
def get_glyph_ids(raw, text, raw_is_table=False):
|
||||
if not isinstance(text, unicode_type):
|
||||
raise TypeError('%r is not a unicode object'%text)
|
||||
if raw_is_table:
|
||||
table = raw
|
||||
else:
|
||||
table = get_table(raw, 'cmap')[0]
|
||||
if table is None:
|
||||
raise UnsupportedFont('Not a supported font, has no cmap table')
|
||||
version, num_tables = struct.unpack_from(b'>HH', table)
|
||||
bmp_table = None
|
||||
for i in range(num_tables):
|
||||
platform_id, encoding_id, offset = struct.unpack_from(b'>HHL', table,
|
||||
4 + (i*8))
|
||||
if platform_id == 3 and encoding_id == 1:
|
||||
table_format = struct.unpack_from(b'>H', table, offset)[0]
|
||||
if table_format == 4:
|
||||
bmp_table = offset
|
||||
break
|
||||
if bmp_table is None:
|
||||
raise UnsupportedFont('Not a supported font, has no format 4 cmap table')
|
||||
|
||||
for glyph_id in get_bmp_glyph_ids(table, bmp_table, map(ord, text)):
|
||||
yield glyph_id
|
||||
|
||||
|
||||
def supports_text(raw, text, has_only_printable_chars=False):
|
||||
if not isinstance(text, unicode_type):
|
||||
raise TypeError('%r is not a unicode object'%text)
|
||||
if not has_only_printable_chars:
|
||||
text = get_printable_characters(text)
|
||||
try:
|
||||
for glyph_id in get_glyph_ids(raw, text):
|
||||
if glyph_id == 0:
|
||||
return False
|
||||
except:
|
||||
return False
|
||||
return True
|
||||
|
||||
|
||||
def get_font_for_text(text, candidate_font_data=None):
|
||||
ok = False
|
||||
if candidate_font_data is not None:
|
||||
ok = supports_text(candidate_font_data, text)
|
||||
if not ok:
|
||||
from calibre.utils.fonts.scanner import font_scanner
|
||||
family, faces = font_scanner.find_font_for_text(text)
|
||||
if faces:
|
||||
with lopen(faces[0]['path'], 'rb') as f:
|
||||
candidate_font_data = f.read()
|
||||
return candidate_font_data
|
||||
|
||||
|
||||
def test_glyph_ids():
|
||||
from calibre.utils.fonts.free_type import FreeType
|
||||
data = P('fonts/liberation/LiberationSerif-Regular.ttf', data=True)
|
||||
ft = FreeType()
|
||||
font = ft.load_font(data)
|
||||
text = u'诶йab'
|
||||
ft_glyphs = tuple(font.glyph_ids(text))
|
||||
glyphs = tuple(get_glyph_ids(data, text))
|
||||
if ft_glyphs != glyphs:
|
||||
raise Exception('My code and FreeType differ on the glyph ids')
|
||||
|
||||
|
||||
def test_supports_text():
|
||||
data = P('fonts/calibreSymbols.otf', data=True)
|
||||
if not supports_text(data, '.★½'):
|
||||
raise RuntimeError('Incorrectly returning that text is not supported')
|
||||
if supports_text(data, 'abc'):
|
||||
raise RuntimeError('Incorrectly claiming that text is supported')
|
||||
|
||||
|
||||
def test_find_font():
|
||||
from calibre.utils.fonts.scanner import font_scanner
|
||||
abcd = '诶比西迪'
|
||||
family = font_scanner.find_font_for_text(abcd)[0]
|
||||
print('Family for Chinese text:', family)
|
||||
family = font_scanner.find_font_for_text(abcd)[0]
|
||||
abcd = 'لوحة المفاتيح العربية'
|
||||
print('Family for Arabic text:', family)
|
||||
|
||||
|
||||
def test():
|
||||
test_glyph_ids()
|
||||
test_supports_text()
|
||||
test_find_font()
|
||||
|
||||
|
||||
def main():
|
||||
import sys, os
|
||||
for arg in sys.argv[1:]:
|
||||
print(os.path.basename(arg))
|
||||
with open(arg, 'rb') as f:
|
||||
raw = f.read()
|
||||
print(get_font_names(raw))
|
||||
characs = get_font_characteristics(raw)
|
||||
print(characs)
|
||||
print(panose_to_css_generic_family(characs[5]))
|
||||
verify_checksums(raw)
|
||||
remove_embed_restriction(raw)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
416
ebook_converter/utils/formatter.py
Normal file
416
ebook_converter/utils/formatter.py
Normal file
@@ -0,0 +1,416 @@
|
||||
from __future__ import absolute_import, division, print_function, unicode_literals
|
||||
'''
|
||||
Created on 23 Sep 2010
|
||||
|
||||
@author: charles
|
||||
'''
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2010, Kovid Goyal <kovid@kovidgoyal.net>'
|
||||
__docformat__ = 'restructuredtext en'
|
||||
|
||||
import re, string, traceback, numbers
|
||||
|
||||
from calibre import prints
|
||||
from calibre.constants import DEBUG
|
||||
from calibre.utils.formatter_functions import formatter_functions
|
||||
from polyglot.builtins import unicode_type, error_message
|
||||
|
||||
|
||||
class _Parser(object):
|
||||
LEX_OP = 1
|
||||
LEX_ID = 2
|
||||
LEX_STR = 3
|
||||
LEX_NUM = 4
|
||||
LEX_EOF = 5
|
||||
|
||||
LEX_CONSTANTS = frozenset((LEX_STR, LEX_NUM))
|
||||
|
||||
def __init__(self, val, prog, funcs, parent):
|
||||
self.lex_pos = 0
|
||||
self.prog = prog[0]
|
||||
self.prog_len = len(self.prog)
|
||||
if prog[1] != '':
|
||||
self.error(_('failed to scan program. Invalid input {0}').format(prog[1]))
|
||||
self.parent = parent
|
||||
self.parent_kwargs = parent.kwargs
|
||||
self.parent_book = parent.book
|
||||
self.locals = {'$':val}
|
||||
self.funcs = funcs
|
||||
|
||||
def error(self, message):
|
||||
m = 'Formatter: ' + message + _(' near ')
|
||||
if self.lex_pos > 0:
|
||||
m = '{0} {1}'.format(m, self.prog[self.lex_pos-1][1])
|
||||
elif self.lex_pos < self.prog_len:
|
||||
m = '{0} {1}'.format(m, self.prog[self.lex_pos+1][1])
|
||||
else:
|
||||
m = '{0} {1}'.format(m, _('end of program'))
|
||||
raise ValueError(m)
|
||||
|
||||
def token(self):
|
||||
if self.lex_pos >= self.prog_len:
|
||||
return None
|
||||
token = self.prog[self.lex_pos][1]
|
||||
self.lex_pos += 1
|
||||
return token
|
||||
|
||||
def consume(self):
|
||||
self.lex_pos += 1
|
||||
|
||||
def token_op_is_a_equals(self):
|
||||
if self.lex_pos >= self.prog_len:
|
||||
return False
|
||||
token = self.prog[self.lex_pos]
|
||||
return token[0] == self.LEX_OP and token[1] == '='
|
||||
|
||||
def token_op_is_a_lparen(self):
|
||||
if self.lex_pos >= self.prog_len:
|
||||
return False
|
||||
token = self.prog[self.lex_pos]
|
||||
return token[0] == self.LEX_OP and token[1] == '('
|
||||
|
||||
def token_op_is_a_rparen(self):
|
||||
if self.lex_pos >= self.prog_len:
|
||||
return False
|
||||
token = self.prog[self.lex_pos]
|
||||
return token[0] == self.LEX_OP and token[1] == ')'
|
||||
|
||||
def token_op_is_a_comma(self):
|
||||
if self.lex_pos >= self.prog_len:
|
||||
return False
|
||||
token = self.prog[self.lex_pos]
|
||||
return token[0] == self.LEX_OP and token[1] == ','
|
||||
|
||||
def token_op_is_a_semicolon(self):
|
||||
if self.lex_pos >= self.prog_len:
|
||||
return False
|
||||
token = self.prog[self.lex_pos]
|
||||
return token[0] == self.LEX_OP and token[1] == ';'
|
||||
|
||||
def token_is_id(self):
|
||||
if self.lex_pos >= self.prog_len:
|
||||
return False
|
||||
return self.prog[self.lex_pos][0] == self.LEX_ID
|
||||
|
||||
def token_is_constant(self):
|
||||
if self.lex_pos >= self.prog_len:
|
||||
return False
|
||||
return self.prog[self.lex_pos][0] in self.LEX_CONSTANTS
|
||||
|
||||
def token_is_eof(self):
|
||||
if self.lex_pos >= self.prog_len:
|
||||
return True
|
||||
token = self.prog[self.lex_pos]
|
||||
return token[0] == self.LEX_EOF
|
||||
|
||||
def program(self):
|
||||
val = self.statement()
|
||||
if not self.token_is_eof():
|
||||
self.error(_('syntax error - program ends before EOF'))
|
||||
return val
|
||||
|
||||
def statement(self):
|
||||
while True:
|
||||
val = self.expr()
|
||||
if self.token_is_eof():
|
||||
return val
|
||||
if not self.token_op_is_a_semicolon():
|
||||
return val
|
||||
self.consume()
|
||||
if self.token_is_eof():
|
||||
return val
|
||||
|
||||
def expr(self):
|
||||
if self.token_is_id():
|
||||
# We have an identifier. Determine if it is a function
|
||||
id = self.token()
|
||||
if not self.token_op_is_a_lparen():
|
||||
if self.token_op_is_a_equals():
|
||||
# classic assignment statement
|
||||
self.consume()
|
||||
cls = self.funcs['assign']
|
||||
return cls.eval_(self.parent, self.parent_kwargs,
|
||||
self.parent_book, self.locals, id, self.expr())
|
||||
val = self.locals.get(id, None)
|
||||
if val is None:
|
||||
self.error(_('Unknown identifier ') + id)
|
||||
return val
|
||||
# We have a function.
|
||||
# Check if it is a known one. We do this here so error reporting is
|
||||
# better, as it can identify the tokens near the problem.
|
||||
id = id.strip()
|
||||
if id not in self.funcs:
|
||||
self.error(_('unknown function {0}').format(id))
|
||||
|
||||
# Eat the paren
|
||||
self.consume()
|
||||
args = list()
|
||||
while not self.token_op_is_a_rparen():
|
||||
if id == 'assign' and len(args) == 0:
|
||||
# Must handle the lvalue semantics of the assign function.
|
||||
# The first argument is the name of the destination, not
|
||||
# the value.
|
||||
if not self.token_is_id():
|
||||
self.error('assign requires the first parameter be an id')
|
||||
args.append(self.token())
|
||||
else:
|
||||
# evaluate the argument (recursive call)
|
||||
args.append(self.statement())
|
||||
if not self.token_op_is_a_comma():
|
||||
break
|
||||
self.consume()
|
||||
if self.token() != ')':
|
||||
self.error(_('missing closing parenthesis'))
|
||||
|
||||
# Evaluate the function
|
||||
cls = self.funcs[id]
|
||||
if cls.arg_count != -1 and len(args) != cls.arg_count:
|
||||
self.error('incorrect number of arguments for function {}'.format(id))
|
||||
return cls.eval_(self.parent, self.parent_kwargs,
|
||||
self.parent_book, self.locals, *args)
|
||||
elif self.token_is_constant():
|
||||
# String or number
|
||||
return self.token()
|
||||
else:
|
||||
self.error(_('expression is not function or constant'))
|
||||
|
||||
|
||||
class TemplateFormatter(string.Formatter):
|
||||
'''
|
||||
Provides a format function that substitutes '' for any missing value
|
||||
'''
|
||||
|
||||
_validation_string = 'This Is Some Text THAT SHOULD be LONG Enough.%^&*'
|
||||
|
||||
# Dict to do recursion detection. It is up to the individual get_value
|
||||
# method to use it. It is cleared when starting to format a template
|
||||
composite_values = {}
|
||||
|
||||
def __init__(self):
|
||||
string.Formatter.__init__(self)
|
||||
self.book = None
|
||||
self.kwargs = None
|
||||
self.strip_results = True
|
||||
self.locals = {}
|
||||
self.funcs = formatter_functions().get_functions()
|
||||
|
||||
def _do_format(self, val, fmt):
|
||||
if not fmt or not val:
|
||||
return val
|
||||
if val == self._validation_string:
|
||||
val = '0'
|
||||
typ = fmt[-1]
|
||||
if typ == 's':
|
||||
pass
|
||||
elif 'bcdoxXn'.find(typ) >= 0:
|
||||
try:
|
||||
val = int(val)
|
||||
except Exception:
|
||||
raise ValueError(
|
||||
_('format: type {0} requires an integer value, got {1}').format(typ, val))
|
||||
elif 'eEfFgGn%'.find(typ) >= 0:
|
||||
try:
|
||||
val = float(val)
|
||||
except:
|
||||
raise ValueError(
|
||||
_('format: type {0} requires a decimal (float) value, got {1}').format(typ, val))
|
||||
return unicode_type(('{0:'+fmt+'}').format(val))
|
||||
|
||||
def _explode_format_string(self, fmt):
|
||||
try:
|
||||
matches = self.format_string_re.match(fmt)
|
||||
if matches is None or matches.lastindex != 3:
|
||||
return fmt, '', ''
|
||||
return matches.groups()
|
||||
except:
|
||||
if DEBUG:
|
||||
traceback.print_exc()
|
||||
return fmt, '', ''
|
||||
|
||||
format_string_re = re.compile(r'^(.*)\|([^\|]*)\|(.*)$', re.DOTALL)
|
||||
compress_spaces = re.compile(r'\s+')
|
||||
backslash_comma_to_comma = re.compile(r'\\,')
|
||||
|
||||
arg_parser = re.Scanner([
|
||||
(r',', lambda x,t: ''),
|
||||
(r'.*?((?<!\\),)', lambda x,t: t[:-1]),
|
||||
(r'.*?\)', lambda x,t: t[:-1]),
|
||||
])
|
||||
|
||||
# ################# 'Functional' template language ######################
|
||||
|
||||
lex_scanner = re.Scanner([
|
||||
(r'[(),=;]', lambda x,t: (1, t)),
|
||||
(r'-?[\d\.]+', lambda x,t: (3, t)),
|
||||
(r'\$', lambda x,t: (2, t)),
|
||||
(r'\w+', lambda x,t: (2, t)),
|
||||
(r'".*?((?<!\\)")', lambda x,t: (3, t[1:-1])),
|
||||
(r'\'.*?((?<!\\)\')', lambda x,t: (3, t[1:-1])),
|
||||
(r'\n#.*?(?:(?=\n)|$)', None),
|
||||
(r'\s', None)
|
||||
], flags=re.DOTALL)
|
||||
|
||||
def _eval_program(self, val, prog, column_name):
|
||||
# keep a cache of the lex'ed program under the theory that re-lexing
|
||||
# is much more expensive than the cache lookup. This is certainly true
|
||||
# for more than a few tokens, but it isn't clear for simple programs.
|
||||
if column_name is not None and self.template_cache is not None:
|
||||
lprog = self.template_cache.get(column_name, None)
|
||||
if not lprog:
|
||||
lprog = self.lex_scanner.scan(prog)
|
||||
self.template_cache[column_name] = lprog
|
||||
else:
|
||||
lprog = self.lex_scanner.scan(prog)
|
||||
parser = _Parser(val, lprog, self.funcs, self)
|
||||
return parser.program()
|
||||
|
||||
# ################# Override parent classes methods #####################
|
||||
|
||||
def get_value(self, key, args, kwargs):
|
||||
raise Exception('get_value must be implemented in the subclass')
|
||||
|
||||
def format_field(self, val, fmt):
|
||||
# ensure we are dealing with a string.
|
||||
if isinstance(val, numbers.Number):
|
||||
if val:
|
||||
val = unicode_type(val)
|
||||
else:
|
||||
val = ''
|
||||
# Handle conditional text
|
||||
fmt, prefix, suffix = self._explode_format_string(fmt)
|
||||
|
||||
# Handle functions
|
||||
# First see if we have a functional-style expression
|
||||
if fmt.startswith('\''):
|
||||
p = 0
|
||||
else:
|
||||
p = fmt.find(':\'')
|
||||
if p >= 0:
|
||||
p += 1
|
||||
if p >= 0 and fmt[-1] == '\'':
|
||||
val = self._eval_program(val, fmt[p+1:-1], None)
|
||||
colon = fmt[0:p].find(':')
|
||||
if colon < 0:
|
||||
dispfmt = ''
|
||||
else:
|
||||
dispfmt = fmt[0:colon]
|
||||
else:
|
||||
# check for old-style function references
|
||||
p = fmt.find('(')
|
||||
dispfmt = fmt
|
||||
if p >= 0 and fmt[-1] == ')':
|
||||
colon = fmt[0:p].find(':')
|
||||
if colon < 0:
|
||||
dispfmt = ''
|
||||
colon = 0
|
||||
else:
|
||||
dispfmt = fmt[0:colon]
|
||||
colon += 1
|
||||
|
||||
fname = fmt[colon:p].strip()
|
||||
if fname in self.funcs:
|
||||
func = self.funcs[fname]
|
||||
if func.arg_count == 2:
|
||||
# only one arg expected. Don't bother to scan. Avoids need
|
||||
# for escaping characters
|
||||
args = [fmt[p+1:-1]]
|
||||
else:
|
||||
args = self.arg_parser.scan(fmt[p+1:])[0]
|
||||
args = [self.backslash_comma_to_comma.sub(',', a) for a in args]
|
||||
if (func.arg_count == 1 and (len(args) != 1 or args[0])) or \
|
||||
(func.arg_count > 1 and func.arg_count != len(args)+1):
|
||||
raise ValueError('Incorrect number of arguments for function '+ fmt[0:p])
|
||||
if func.arg_count == 1:
|
||||
val = func.eval_(self, self.kwargs, self.book, self.locals, val)
|
||||
if self.strip_results:
|
||||
val = val.strip()
|
||||
else:
|
||||
val = func.eval_(self, self.kwargs, self.book, self.locals, val, *args)
|
||||
if self.strip_results:
|
||||
val = val.strip()
|
||||
else:
|
||||
return _('%s: unknown function')%fname
|
||||
if val:
|
||||
val = self._do_format(val, dispfmt)
|
||||
if not val:
|
||||
return ''
|
||||
return prefix + val + suffix
|
||||
|
||||
def evaluate(self, fmt, args, kwargs):
|
||||
if fmt.startswith('program:'):
|
||||
ans = self._eval_program(kwargs.get('$', None), fmt[8:], self.column_name)
|
||||
else:
|
||||
ans = self.vformat(fmt, args, kwargs)
|
||||
if self.strip_results:
|
||||
return self.compress_spaces.sub(' ', ans).strip()
|
||||
return ans
|
||||
|
||||
# ######### a formatter that throws exceptions ############
|
||||
|
||||
def unsafe_format(self, fmt, kwargs, book, strip_results=True):
|
||||
self.strip_results = strip_results
|
||||
self.column_name = self.template_cache = None
|
||||
self.kwargs = kwargs
|
||||
self.book = book
|
||||
self.composite_values = {}
|
||||
self.locals = {}
|
||||
return self.evaluate(fmt, [], kwargs)
|
||||
|
||||
# ######### a formatter guaranteed not to throw an exception ############
|
||||
|
||||
def safe_format(self, fmt, kwargs, error_value, book,
|
||||
column_name=None, template_cache=None,
|
||||
strip_results=True, template_functions=None):
|
||||
self.strip_results = strip_results
|
||||
self.column_name = column_name
|
||||
self.template_cache = template_cache
|
||||
self.kwargs = kwargs
|
||||
self.book = book
|
||||
if template_functions:
|
||||
self.funcs = template_functions
|
||||
else:
|
||||
self.funcs = formatter_functions().get_functions()
|
||||
self.composite_values = {}
|
||||
self.locals = {}
|
||||
try:
|
||||
ans = self.evaluate(fmt, [], kwargs)
|
||||
except Exception as e:
|
||||
if DEBUG: # and getattr(e, 'is_locking_error', False):
|
||||
traceback.print_exc()
|
||||
if column_name:
|
||||
prints('Error evaluating column named:', column_name)
|
||||
ans = error_value + ' ' + error_message(e)
|
||||
return ans
|
||||
|
||||
|
||||
class ValidateFormatter(TemplateFormatter):
|
||||
'''
|
||||
Provides a formatter that substitutes the validation string for every value
|
||||
'''
|
||||
|
||||
def get_value(self, key, args, kwargs):
|
||||
return self._validation_string
|
||||
|
||||
def validate(self, x):
|
||||
from calibre.ebooks.metadata.book.base import Metadata
|
||||
return self.safe_format(x, {}, 'VALIDATE ERROR', Metadata(''))
|
||||
|
||||
|
||||
validation_formatter = ValidateFormatter()
|
||||
|
||||
|
||||
class EvalFormatter(TemplateFormatter):
|
||||
'''
|
||||
A template formatter that uses a simple dict instead of an mi instance
|
||||
'''
|
||||
|
||||
def get_value(self, key, args, kwargs):
|
||||
if key == '':
|
||||
return ''
|
||||
key = key.lower()
|
||||
return kwargs.get(key, _('No such variable ') + key)
|
||||
|
||||
|
||||
# DEPRECATED. This is not thread safe. Do not use.
|
||||
eval_formatter = EvalFormatter()
|
||||
1692
ebook_converter/utils/formatter_functions.py
Normal file
1692
ebook_converter/utils/formatter_functions.py
Normal file
File diff suppressed because it is too large
Load Diff
42
ebook_converter/utils/html2text.py
Normal file
42
ebook_converter/utils/html2text.py
Normal file
@@ -0,0 +1,42 @@
|
||||
#!/usr/bin/env python2
|
||||
# vim:fileencoding=utf-8
|
||||
# License: GPLv3 Copyright: 2019, Kovid Goyal <kovid at kovidgoyal.net>
|
||||
from __future__ import absolute_import, division, print_function, unicode_literals
|
||||
|
||||
|
||||
def html2text(html):
|
||||
from html2text import HTML2Text
|
||||
import re
|
||||
if isinstance(html, bytes):
|
||||
from calibre.ebooks.chardet import xml_to_unicode
|
||||
html = xml_to_unicode(html, strip_encoding_pats=True, resolve_entities=True)[0]
|
||||
# replace <u> tags with <span> as <u> becomes emphasis in html2text
|
||||
html = re.sub(
|
||||
r'<\s*(?P<solidus>/?)\s*[uU]\b(?P<rest>[^>]*)>',
|
||||
r'<\g<solidus>span\g<rest>>', html)
|
||||
h2t = HTML2Text()
|
||||
h2t.default_image_alt = _('Unnamed image')
|
||||
h2t.body_width = 0
|
||||
h2t.single_line_break = True
|
||||
h2t.emphasis_mark = '*'
|
||||
return h2t.handle(html)
|
||||
|
||||
|
||||
def find_tests():
|
||||
import unittest
|
||||
|
||||
class TestH2T(unittest.TestCase):
|
||||
|
||||
def test_html2text_behavior(self):
|
||||
for src, expected in {
|
||||
'<u>test</U>': 'test\n',
|
||||
'<i>test</i>': '*test*\n',
|
||||
'<a href="http://else.where/other">other</a>': '[other](http://else.where/other)\n',
|
||||
'<img src="test.jpeg">': '\n',
|
||||
'<a href="#t">test</a> <span id="t">dest</span>': 'test dest\n',
|
||||
'<>a': '<>a\n',
|
||||
'<p>a<p>b': 'a\nb\n',
|
||||
}.items():
|
||||
self.assertEqual(html2text(src), expected)
|
||||
|
||||
return unittest.defaultTestLoader.loadTestsFromTestCase(TestH2T)
|
||||
323
ebook_converter/utils/icu.py
Normal file
323
ebook_converter/utils/icu.py
Normal file
@@ -0,0 +1,323 @@
|
||||
#!/usr/bin/env python2
|
||||
# vim:fileencoding=utf-8
|
||||
from __future__ import absolute_import, division, print_function, unicode_literals
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2010, Kovid Goyal <kovid@kovidgoyal.net>'
|
||||
__docformat__ = 'restructuredtext en'
|
||||
|
||||
import sys
|
||||
from polyglot.builtins import filter
|
||||
|
||||
is_narrow_build = sys.maxunicode < 0x10ffff
|
||||
|
||||
# Setup code {{{
|
||||
import codecs
|
||||
|
||||
from calibre.constants import plugins
|
||||
from calibre.utils.config_base import tweaks
|
||||
from polyglot.builtins import unicode_type, cmp
|
||||
|
||||
_locale = _collator = _primary_collator = _sort_collator = _numeric_collator = _case_sensitive_collator = None
|
||||
cmp
|
||||
|
||||
_none = u''
|
||||
_none2 = b''
|
||||
_cmap = {}
|
||||
|
||||
_icu, err = plugins['icu']
|
||||
if _icu is None:
|
||||
raise RuntimeError('Failed to load icu with error: %s' % err)
|
||||
del err
|
||||
icu_unicode_version = getattr(_icu, 'unicode_version', None)
|
||||
_nmodes = {m:getattr(_icu, m) for m in ('NFC', 'NFD', 'NFKC', 'NFKD')}
|
||||
|
||||
# Ensure that the python internal filesystem and default encodings are not ASCII
|
||||
|
||||
|
||||
def is_ascii(name):
|
||||
try:
|
||||
return codecs.lookup(name).name == b'ascii'
|
||||
except (TypeError, LookupError):
|
||||
return True
|
||||
|
||||
|
||||
try:
|
||||
if is_ascii(sys.getdefaultencoding()):
|
||||
_icu.set_default_encoding(b'utf-8')
|
||||
except:
|
||||
import traceback
|
||||
traceback.print_exc()
|
||||
|
||||
try:
|
||||
if is_ascii(sys.getfilesystemencoding()):
|
||||
_icu.set_filesystem_encoding(b'utf-8')
|
||||
except:
|
||||
import traceback
|
||||
traceback.print_exc()
|
||||
del is_ascii
|
||||
|
||||
|
||||
def collator():
|
||||
global _collator, _locale
|
||||
if _collator is None:
|
||||
if _locale is None:
|
||||
from calibre.utils.localization import get_lang
|
||||
if tweaks['locale_for_sorting']:
|
||||
_locale = tweaks['locale_for_sorting']
|
||||
else:
|
||||
_locale = get_lang()
|
||||
try:
|
||||
_collator = _icu.Collator(_locale)
|
||||
except Exception as e:
|
||||
print('Failed to load collator for locale: %r with error %r, using English' % (_locale, e))
|
||||
_collator = _icu.Collator('en')
|
||||
return _collator
|
||||
|
||||
|
||||
def change_locale(locale=None):
|
||||
global _locale, _collator, _primary_collator, _sort_collator, _numeric_collator, _case_sensitive_collator
|
||||
_collator = _primary_collator = _sort_collator = _numeric_collator = _case_sensitive_collator = None
|
||||
_locale = locale
|
||||
|
||||
|
||||
def primary_collator():
|
||||
'Ignores case differences and accented characters'
|
||||
global _primary_collator
|
||||
if _primary_collator is None:
|
||||
_primary_collator = collator().clone()
|
||||
_primary_collator.strength = _icu.UCOL_PRIMARY
|
||||
return _primary_collator
|
||||
|
||||
|
||||
def sort_collator():
|
||||
'Ignores case differences and recognizes numbers in strings (if the tweak is set)'
|
||||
global _sort_collator
|
||||
if _sort_collator is None:
|
||||
_sort_collator = collator().clone()
|
||||
_sort_collator.strength = _icu.UCOL_SECONDARY
|
||||
_sort_collator.numeric = tweaks['numeric_collation']
|
||||
return _sort_collator
|
||||
|
||||
|
||||
def numeric_collator():
|
||||
'Uses natural sorting for numbers inside strings so something2 will sort before something10'
|
||||
global _numeric_collator
|
||||
if _numeric_collator is None:
|
||||
_numeric_collator = collator().clone()
|
||||
_numeric_collator.strength = _icu.UCOL_SECONDARY
|
||||
_numeric_collator.numeric = True
|
||||
return _numeric_collator
|
||||
|
||||
|
||||
def case_sensitive_collator():
|
||||
'Always sorts upper case letter before lower case'
|
||||
global _case_sensitive_collator
|
||||
if _case_sensitive_collator is None:
|
||||
_case_sensitive_collator = collator().clone()
|
||||
_case_sensitive_collator.numeric = sort_collator().numeric
|
||||
_case_sensitive_collator.upper_first = True
|
||||
return _case_sensitive_collator
|
||||
|
||||
# Templates that will be used to generate various concrete
|
||||
# function implementations based on different collators, to allow lazy loading
|
||||
# of collators, with maximum runtime performance
|
||||
|
||||
|
||||
_sort_key_template = '''
|
||||
def {name}(obj):
|
||||
try:
|
||||
try:
|
||||
return {collator}.{func}(obj)
|
||||
except AttributeError:
|
||||
pass
|
||||
return {collator_func}().{func}(obj)
|
||||
except TypeError:
|
||||
if isinstance(obj, bytes):
|
||||
try:
|
||||
obj = obj.decode(sys.getdefaultencoding())
|
||||
except ValueError:
|
||||
return obj
|
||||
return {collator}.{func}(obj)
|
||||
return b''
|
||||
'''
|
||||
|
||||
_strcmp_template = '''
|
||||
def {name}(a, b):
|
||||
try:
|
||||
try:
|
||||
return {collator}.{func}(a, b)
|
||||
except AttributeError:
|
||||
pass
|
||||
return {collator_func}().{func}(a, b)
|
||||
except TypeError:
|
||||
if isinstance(a, bytes):
|
||||
try:
|
||||
a = a.decode(sys.getdefaultencoding())
|
||||
except ValueError:
|
||||
return cmp(a, b)
|
||||
elif a is None:
|
||||
a = u''
|
||||
if isinstance(b, bytes):
|
||||
try:
|
||||
b = b.decode(sys.getdefaultencoding())
|
||||
except ValueError:
|
||||
return cmp(a, b)
|
||||
elif b is None:
|
||||
b = u''
|
||||
return {collator}.{func}(a, b)
|
||||
'''
|
||||
|
||||
_change_case_template = '''
|
||||
def {name}(x):
|
||||
try:
|
||||
try:
|
||||
return _icu.change_case(x, _icu.{which}, _locale)
|
||||
except NotImplementedError:
|
||||
pass
|
||||
collator() # sets _locale
|
||||
return _icu.change_case(x, _icu.{which}, _locale)
|
||||
except TypeError:
|
||||
if isinstance(x, bytes):
|
||||
try:
|
||||
x = x.decode(sys.getdefaultencoding())
|
||||
except ValueError:
|
||||
return x
|
||||
return _icu.change_case(x, _icu.{which}, _locale)
|
||||
raise
|
||||
'''
|
||||
|
||||
|
||||
def _make_func(template, name, **kwargs):
|
||||
l = globals()
|
||||
kwargs['name'] = name
|
||||
kwargs['func'] = kwargs.get('func', 'sort_key')
|
||||
exec(template.format(**kwargs), l)
|
||||
return l[name]
|
||||
|
||||
|
||||
# }}}
|
||||
|
||||
# ################ The string functions ########################################
|
||||
sort_key = _make_func(_sort_key_template, 'sort_key', collator='_sort_collator', collator_func='sort_collator')
|
||||
|
||||
numeric_sort_key = _make_func(_sort_key_template, 'numeric_sort_key', collator='_numeric_collator', collator_func='numeric_collator')
|
||||
|
||||
primary_sort_key = _make_func(_sort_key_template, 'primary_sort_key', collator='_primary_collator', collator_func='primary_collator')
|
||||
|
||||
case_sensitive_sort_key = _make_func(_sort_key_template, 'case_sensitive_sort_key',
|
||||
collator='_case_sensitive_collator', collator_func='case_sensitive_collator')
|
||||
|
||||
collation_order = _make_func(_sort_key_template, 'collation_order', collator='_sort_collator', collator_func='sort_collator', func='collation_order')
|
||||
|
||||
strcmp = _make_func(_strcmp_template, 'strcmp', collator='_sort_collator', collator_func='sort_collator', func='strcmp')
|
||||
|
||||
case_sensitive_strcmp = _make_func(
|
||||
_strcmp_template, 'case_sensitive_strcmp', collator='_case_sensitive_collator', collator_func='case_sensitive_collator', func='strcmp')
|
||||
|
||||
primary_strcmp = _make_func(_strcmp_template, 'primary_strcmp', collator='_primary_collator', collator_func='primary_collator', func='strcmp')
|
||||
|
||||
upper = _make_func(_change_case_template, 'upper', which='UPPER_CASE')
|
||||
|
||||
lower = _make_func(_change_case_template, 'lower', which='LOWER_CASE')
|
||||
|
||||
title_case = _make_func(_change_case_template, 'title_case', which='TITLE_CASE')
|
||||
|
||||
|
||||
def capitalize(x):
|
||||
try:
|
||||
return upper(x[0]) + lower(x[1:])
|
||||
except (IndexError, TypeError, AttributeError):
|
||||
return x
|
||||
|
||||
|
||||
try:
|
||||
swapcase = _icu.swap_case
|
||||
except AttributeError: # For people running from source
|
||||
swapcase = lambda x:x.swapcase()
|
||||
|
||||
find = _make_func(_strcmp_template, 'find', collator='_collator', collator_func='collator', func='find')
|
||||
|
||||
primary_find = _make_func(_strcmp_template, 'primary_find', collator='_primary_collator', collator_func='primary_collator', func='find')
|
||||
|
||||
contains = _make_func(_strcmp_template, 'contains', collator='_collator', collator_func='collator', func='contains')
|
||||
|
||||
primary_contains = _make_func(_strcmp_template, 'primary_contains', collator='_primary_collator', collator_func='primary_collator', func='contains')
|
||||
|
||||
startswith = _make_func(_strcmp_template, 'startswith', collator='_collator', collator_func='collator', func='startswith')
|
||||
|
||||
primary_startswith = _make_func(_strcmp_template, 'primary_startswith', collator='_primary_collator', collator_func='primary_collator', func='startswith')
|
||||
|
||||
safe_chr = _icu.chr
|
||||
|
||||
ord_string = _icu.ord_string
|
||||
|
||||
|
||||
def character_name(string):
|
||||
try:
|
||||
return _icu.character_name(unicode_type(string)) or None
|
||||
except (TypeError, ValueError, KeyError):
|
||||
pass
|
||||
|
||||
|
||||
def character_name_from_code(code):
|
||||
try:
|
||||
return _icu.character_name_from_code(code) or ''
|
||||
except (TypeError, ValueError, KeyError):
|
||||
return ''
|
||||
|
||||
|
||||
def normalize(text, mode='NFC'):
|
||||
# This is very slightly slower than using unicodedata.normalize, so stick with
|
||||
# that unless you have very good reasons not too. Also, it's speed
|
||||
# decreases on wide python builds, where conversion to/from ICU's string
|
||||
# representation is slower.
|
||||
return _icu.normalize(_nmodes[mode], unicode_type(text))
|
||||
|
||||
|
||||
def contractions(col=None):
|
||||
global _cmap
|
||||
col = col or _collator
|
||||
if col is None:
|
||||
col = collator()
|
||||
ans = _cmap.get(collator, None)
|
||||
if ans is None:
|
||||
ans = col.contractions()
|
||||
ans = frozenset(filter(None, ans))
|
||||
_cmap[col] = ans
|
||||
return ans
|
||||
|
||||
|
||||
def partition_by_first_letter(items, reverse=False, key=lambda x:x):
|
||||
# Build a list of 'equal' first letters by noticing changes
|
||||
# in ICU's 'ordinal' for the first letter.
|
||||
from collections import OrderedDict
|
||||
items = sorted(items, key=lambda x:sort_key(key(x)), reverse=reverse)
|
||||
ans = OrderedDict()
|
||||
last_c, last_ordnum = ' ', 0
|
||||
for item in items:
|
||||
c = icu_upper(key(item) or ' ')
|
||||
ordnum, ordlen = collation_order(c)
|
||||
if last_ordnum != ordnum:
|
||||
if not is_narrow_build:
|
||||
ordlen = 1
|
||||
last_c = c[0:ordlen]
|
||||
last_ordnum = ordnum
|
||||
try:
|
||||
ans[last_c].append(item)
|
||||
except KeyError:
|
||||
ans[last_c] = [item]
|
||||
return ans
|
||||
|
||||
|
||||
# Return the number of unicode codepoints in a string
|
||||
string_length = _icu.string_length if is_narrow_build else len
|
||||
|
||||
# Return the number of UTF-16 codepoints in a string
|
||||
utf16_length = len if is_narrow_build else _icu.utf16_length
|
||||
|
||||
################################################################################
|
||||
|
||||
if __name__ == '__main__':
|
||||
from calibre.utils.icu_test import run
|
||||
run(verbosity=4)
|
||||
690
ebook_converter/utils/img.py
Normal file
690
ebook_converter/utils/img.py
Normal file
@@ -0,0 +1,690 @@
|
||||
#!/usr/bin/env python2
|
||||
# vim:fileencoding=utf-8
|
||||
# License: GPLv3 Copyright: 2015-2019, Kovid Goyal <kovid at kovidgoyal.net>
|
||||
from __future__ import absolute_import, division, print_function, unicode_literals
|
||||
|
||||
import errno
|
||||
import os
|
||||
import shutil
|
||||
import subprocess
|
||||
import sys
|
||||
import tempfile
|
||||
from io import BytesIO
|
||||
from threading import Thread
|
||||
|
||||
# We use explicit module imports so tracebacks when importing are more useful
|
||||
from PyQt5.QtCore import QBuffer, QByteArray, Qt
|
||||
from PyQt5.QtGui import QColor, QImage, QImageReader, QImageWriter, QPixmap, QTransform
|
||||
|
||||
from calibre import fit_image, force_unicode
|
||||
from calibre.constants import iswindows, plugins, ispy3
|
||||
from calibre.ptempfile import TemporaryDirectory
|
||||
from calibre.utils.config_base import tweaks
|
||||
from calibre.utils.filenames import atomic_rename
|
||||
from calibre.utils.imghdr import what
|
||||
from polyglot.builtins import string_or_bytes, unicode_type
|
||||
|
||||
# Utilities {{{
|
||||
imageops, imageops_err = plugins['imageops']
|
||||
if imageops is None:
|
||||
raise RuntimeError(imageops_err)
|
||||
|
||||
|
||||
class NotImage(ValueError):
|
||||
pass
|
||||
|
||||
|
||||
def normalize_format_name(fmt):
|
||||
fmt = fmt.lower()
|
||||
if fmt == 'jpg':
|
||||
fmt = 'jpeg'
|
||||
return fmt
|
||||
|
||||
|
||||
def get_exe_path(name):
|
||||
from calibre.ebooks.pdf.pdftohtml import PDFTOHTML
|
||||
base = os.path.dirname(PDFTOHTML)
|
||||
if iswindows:
|
||||
name += '-calibre.exe'
|
||||
if not base:
|
||||
return name
|
||||
return os.path.join(base, name)
|
||||
|
||||
|
||||
def load_jxr_data(data):
|
||||
with TemporaryDirectory() as tdir:
|
||||
if iswindows and isinstance(tdir, unicode_type):
|
||||
tdir = tdir.encode('mbcs')
|
||||
with lopen(os.path.join(tdir, 'input.jxr'), 'wb') as f:
|
||||
f.write(data)
|
||||
cmd = [get_exe_path('JxrDecApp'), '-i', 'input.jxr', '-o', 'output.tif']
|
||||
creationflags = 0x08 if iswindows else 0
|
||||
subprocess.Popen(cmd, cwd=tdir, stdout=lopen(os.devnull, 'wb'), stderr=subprocess.STDOUT, creationflags=creationflags).wait()
|
||||
i = QImage()
|
||||
if not i.load(os.path.join(tdir, 'output.tif')):
|
||||
raise NotImage('Failed to convert JPEG-XR image')
|
||||
return i
|
||||
|
||||
# }}}
|
||||
|
||||
# png <-> gif {{{
|
||||
|
||||
|
||||
def png_data_to_gif_data(data):
|
||||
from PIL import Image
|
||||
img = Image.open(BytesIO(data))
|
||||
buf = BytesIO()
|
||||
if img.mode in ('p', 'P'):
|
||||
transparency = img.info.get('transparency')
|
||||
if transparency is not None:
|
||||
img.save(buf, 'gif', transparency=transparency)
|
||||
else:
|
||||
img.save(buf, 'gif')
|
||||
elif img.mode in ('rgba', 'RGBA'):
|
||||
alpha = img.split()[3]
|
||||
mask = Image.eval(alpha, lambda a: 255 if a <=128 else 0)
|
||||
img = img.convert('RGB').convert('P', palette=Image.ADAPTIVE, colors=255)
|
||||
img.paste(255, mask)
|
||||
img.save(buf, 'gif', transparency=255)
|
||||
else:
|
||||
img = img.convert('P', palette=Image.ADAPTIVE)
|
||||
img.save(buf, 'gif')
|
||||
return buf.getvalue()
|
||||
|
||||
|
||||
class AnimatedGIF(ValueError):
|
||||
pass
|
||||
|
||||
|
||||
def gif_data_to_png_data(data, discard_animation=False):
|
||||
from PIL import Image
|
||||
img = Image.open(BytesIO(data))
|
||||
if img.is_animated and not discard_animation:
|
||||
raise AnimatedGIF()
|
||||
buf = BytesIO()
|
||||
img.save(buf, 'png')
|
||||
return buf.getvalue()
|
||||
|
||||
# }}}
|
||||
|
||||
# Loading images {{{
|
||||
|
||||
|
||||
def null_image():
|
||||
' Create an invalid image. For internal use. '
|
||||
return QImage()
|
||||
|
||||
|
||||
def image_from_data(data):
|
||||
' Create an image object from data, which should be a bytestring. '
|
||||
if isinstance(data, QImage):
|
||||
return data
|
||||
i = QImage()
|
||||
if not i.loadFromData(data):
|
||||
q = what(None, data)
|
||||
if q == 'jxr':
|
||||
return load_jxr_data(data)
|
||||
raise NotImage('Not a valid image (detected type: {})'.format(q))
|
||||
return i
|
||||
|
||||
|
||||
def image_from_path(path):
|
||||
' Load an image from the specified path. '
|
||||
with lopen(path, 'rb') as f:
|
||||
return image_from_data(f.read())
|
||||
|
||||
|
||||
def image_from_x(x):
|
||||
' Create an image from a bytestring or a path or a file like object. '
|
||||
if isinstance(x, unicode_type):
|
||||
return image_from_path(x)
|
||||
if hasattr(x, 'read'):
|
||||
return image_from_data(x.read())
|
||||
if isinstance(x, (bytes, QImage)):
|
||||
return image_from_data(x)
|
||||
if isinstance(x, bytearray):
|
||||
return image_from_data(bytes(x))
|
||||
if isinstance(x, QPixmap):
|
||||
return x.toImage()
|
||||
raise TypeError('Unknown image src type: %s' % type(x))
|
||||
|
||||
|
||||
def image_and_format_from_data(data):
|
||||
' Create an image object from the specified data which should be a bytestring and also return the format of the image '
|
||||
ba = QByteArray(data)
|
||||
buf = QBuffer(ba)
|
||||
buf.open(QBuffer.ReadOnly)
|
||||
r = QImageReader(buf)
|
||||
fmt = bytes(r.format()).decode('utf-8')
|
||||
return r.read(), fmt
|
||||
# }}}
|
||||
|
||||
# Saving images {{{
|
||||
|
||||
|
||||
def image_to_data(img, compression_quality=95, fmt='JPEG', png_compression_level=9, jpeg_optimized=True, jpeg_progressive=False):
|
||||
'''
|
||||
Serialize image to bytestring in the specified format.
|
||||
|
||||
:param compression_quality: is for JPEG and goes from 0 to 100. 100 being lowest compression, highest image quality
|
||||
:param png_compression_level: is for PNG and goes from 0-9. 9 being highest compression.
|
||||
:param jpeg_optimized: Turns on the 'optimize' option for libjpeg which losslessly reduce file size
|
||||
:param jpeg_progressive: Turns on the 'progressive scan' option for libjpeg which allows JPEG images to be downloaded in streaming fashion
|
||||
'''
|
||||
fmt = fmt.upper()
|
||||
ba = QByteArray()
|
||||
buf = QBuffer(ba)
|
||||
buf.open(QBuffer.WriteOnly)
|
||||
if fmt == 'GIF':
|
||||
w = QImageWriter(buf, b'PNG')
|
||||
w.setQuality(90)
|
||||
if not w.write(img):
|
||||
raise ValueError('Failed to export image as ' + fmt + ' with error: ' + w.errorString())
|
||||
return png_data_to_gif_data(ba.data())
|
||||
is_jpeg = fmt in ('JPG', 'JPEG')
|
||||
w = QImageWriter(buf, fmt.encode('ascii'))
|
||||
if is_jpeg:
|
||||
if img.hasAlphaChannel():
|
||||
img = blend_image(img)
|
||||
# QImageWriter only gained the following options in Qt 5.5
|
||||
if jpeg_optimized:
|
||||
w.setOptimizedWrite(True)
|
||||
if jpeg_progressive:
|
||||
w.setProgressiveScanWrite(True)
|
||||
w.setQuality(compression_quality)
|
||||
elif fmt == 'PNG':
|
||||
cl = min(9, max(0, png_compression_level))
|
||||
w.setQuality(10 * (9-cl))
|
||||
if not w.write(img):
|
||||
raise ValueError('Failed to export image as ' + fmt + ' with error: ' + w.errorString())
|
||||
return ba.data()
|
||||
|
||||
|
||||
def save_image(img, path, **kw):
|
||||
''' Save image to the specified path. Image format is taken from the file
|
||||
extension. You can pass the same keyword arguments as for the
|
||||
`image_to_data()` function. '''
|
||||
fmt = path.rpartition('.')[-1]
|
||||
kw['fmt'] = kw.get('fmt', fmt)
|
||||
with lopen(path, 'wb') as f:
|
||||
f.write(image_to_data(image_from_data(img), **kw))
|
||||
|
||||
|
||||
def save_cover_data_to(
|
||||
data, path=None,
|
||||
bgcolor='#ffffff',
|
||||
resize_to=None,
|
||||
compression_quality=90,
|
||||
minify_to=None,
|
||||
grayscale=False,
|
||||
eink=False, letterbox=False,
|
||||
data_fmt='jpeg'
|
||||
):
|
||||
'''
|
||||
Saves image in data to path, in the format specified by the path
|
||||
extension. Removes any transparency. If there is no transparency and no
|
||||
resize and the input and output image formats are the same, no changes are
|
||||
made.
|
||||
|
||||
:param data: Image data as bytestring
|
||||
:param path: If None img data is returned, in JPEG format
|
||||
:param data_fmt: The fmt to return data in when path is None. Defaults to JPEG
|
||||
:param compression_quality: The quality of the image after compression.
|
||||
Number between 1 and 100. 1 means highest compression, 100 means no
|
||||
compression (lossless). When generating PNG this number is divided by 10
|
||||
for the png_compression_level.
|
||||
:param bgcolor: The color for transparent pixels. Must be specified in hex.
|
||||
:param resize_to: A tuple (width, height) or None for no resizing
|
||||
:param minify_to: A tuple (width, height) to specify maximum target size.
|
||||
The image will be resized to fit into this target size. If None the
|
||||
value from the tweak is used.
|
||||
:param grayscale: If True, the image is converted to grayscale,
|
||||
if that's not already the case.
|
||||
:param eink: If True, the image is dithered down to the 16 specific shades
|
||||
of gray of the eInk palette.
|
||||
Works best with formats that actually support color indexing (i.e., PNG)
|
||||
:param letterbox: If True, in addition to fit resize_to inside minify_to,
|
||||
the image will be letterboxed (i.e., centered on a black background).
|
||||
'''
|
||||
fmt = normalize_format_name(data_fmt if path is None else os.path.splitext(path)[1][1:])
|
||||
if isinstance(data, QImage):
|
||||
img = data
|
||||
changed = True
|
||||
else:
|
||||
img, orig_fmt = image_and_format_from_data(data)
|
||||
orig_fmt = normalize_format_name(orig_fmt)
|
||||
changed = fmt != orig_fmt
|
||||
if resize_to is not None:
|
||||
changed = True
|
||||
img = img.scaled(resize_to[0], resize_to[1], Qt.IgnoreAspectRatio, Qt.SmoothTransformation)
|
||||
owidth, oheight = img.width(), img.height()
|
||||
nwidth, nheight = tweaks['maximum_cover_size'] if minify_to is None else minify_to
|
||||
if letterbox:
|
||||
img = blend_on_canvas(img, nwidth, nheight, bgcolor='#000000')
|
||||
# Check if we were minified
|
||||
if oheight != nheight or owidth != nwidth:
|
||||
changed = True
|
||||
else:
|
||||
scaled, nwidth, nheight = fit_image(owidth, oheight, nwidth, nheight)
|
||||
if scaled:
|
||||
changed = True
|
||||
img = img.scaled(nwidth, nheight, Qt.IgnoreAspectRatio, Qt.SmoothTransformation)
|
||||
if img.hasAlphaChannel():
|
||||
changed = True
|
||||
img = blend_image(img, bgcolor)
|
||||
if grayscale and not eink:
|
||||
if not img.allGray():
|
||||
changed = True
|
||||
img = grayscale_image(img)
|
||||
if eink:
|
||||
# NOTE: Keep in mind that JPG does NOT actually support indexed colors, so the JPG algorithm will then smush everything back into a 256c mess...
|
||||
# Thankfully, Nickel handles PNG just fine, and we potentially generate smaller files to boot, because they can be properly color indexed ;).
|
||||
img = eink_dither_image(img)
|
||||
changed = True
|
||||
if path is None:
|
||||
return image_to_data(img, compression_quality, fmt, compression_quality // 10) if changed else data
|
||||
with lopen(path, 'wb') as f:
|
||||
f.write(image_to_data(img, compression_quality, fmt, compression_quality // 10) if changed else data)
|
||||
# }}}
|
||||
|
||||
# Overlaying images {{{
|
||||
|
||||
|
||||
def blend_on_canvas(img, width, height, bgcolor='#ffffff'):
|
||||
' Blend the `img` onto a canvas with the specified background color and size '
|
||||
w, h = img.width(), img.height()
|
||||
scaled, nw, nh = fit_image(w, h, width, height)
|
||||
if scaled:
|
||||
img = img.scaled(nw, nh, Qt.IgnoreAspectRatio, Qt.SmoothTransformation)
|
||||
w, h = nw, nh
|
||||
canvas = QImage(width, height, QImage.Format_RGB32)
|
||||
canvas.fill(QColor(bgcolor))
|
||||
overlay_image(img, canvas, (width - w)//2, (height - h)//2)
|
||||
return canvas
|
||||
|
||||
|
||||
class Canvas(object):
|
||||
|
||||
def __init__(self, width, height, bgcolor='#ffffff'):
|
||||
self.img = QImage(width, height, QImage.Format_RGB32)
|
||||
self.img.fill(QColor(bgcolor))
|
||||
|
||||
def __enter__(self):
|
||||
return self
|
||||
|
||||
def __exit__(self, *args):
|
||||
pass
|
||||
|
||||
def compose(self, img, x=0, y=0):
|
||||
img = image_from_data(img)
|
||||
overlay_image(img, self.img, x, y)
|
||||
|
||||
def export(self, fmt='JPEG', compression_quality=95):
|
||||
return image_to_data(self.img, compression_quality=compression_quality, fmt=fmt)
|
||||
|
||||
|
||||
def create_canvas(width, height, bgcolor='#ffffff'):
|
||||
'Create a blank canvas of the specified size and color '
|
||||
img = QImage(width, height, QImage.Format_RGB32)
|
||||
img.fill(QColor(bgcolor))
|
||||
return img
|
||||
|
||||
|
||||
def overlay_image(img, canvas=None, left=0, top=0):
|
||||
' Overlay the `img` onto the canvas at the specified position '
|
||||
if canvas is None:
|
||||
canvas = QImage(img.size(), QImage.Format_RGB32)
|
||||
canvas.fill(Qt.white)
|
||||
left, top = int(left), int(top)
|
||||
imageops.overlay(img, canvas, left, top)
|
||||
return canvas
|
||||
|
||||
|
||||
def texture_image(canvas, texture):
|
||||
' Repeatedly tile the image `texture` across and down the image `canvas` '
|
||||
if canvas.hasAlphaChannel():
|
||||
canvas = blend_image(canvas)
|
||||
return imageops.texture_image(canvas, texture)
|
||||
|
||||
|
||||
def blend_image(img, bgcolor='#ffffff'):
|
||||
' Used to convert images that have semi-transparent pixels to opaque by blending with the specified color '
|
||||
canvas = QImage(img.size(), QImage.Format_RGB32)
|
||||
canvas.fill(QColor(bgcolor))
|
||||
overlay_image(img, canvas)
|
||||
return canvas
|
||||
# }}}
|
||||
|
||||
# Image borders {{{
|
||||
|
||||
|
||||
def add_borders_to_image(img, left=0, top=0, right=0, bottom=0, border_color='#ffffff'):
|
||||
img = image_from_data(img)
|
||||
if not (left > 0 or right > 0 or top > 0 or bottom > 0):
|
||||
return img
|
||||
canvas = QImage(img.width() + left + right, img.height() + top + bottom, QImage.Format_RGB32)
|
||||
canvas.fill(QColor(border_color))
|
||||
overlay_image(img, canvas, left, top)
|
||||
return canvas
|
||||
|
||||
|
||||
def remove_borders_from_image(img, fuzz=None):
|
||||
''' Try to auto-detect and remove any borders from the image. Returns
|
||||
the image itself if no borders could be removed. `fuzz` is a measure of
|
||||
what colors are considered identical (must be a number between 0 and 255 in
|
||||
absolute intensity units). Default is from a tweak whose default value is 10. '''
|
||||
fuzz = tweaks['cover_trim_fuzz_value'] if fuzz is None else fuzz
|
||||
img = image_from_data(img)
|
||||
ans = imageops.remove_borders(img, max(0, fuzz))
|
||||
return ans if ans.size() != img.size() else img
|
||||
# }}}
|
||||
|
||||
# Cropping/scaling of images {{{
|
||||
|
||||
|
||||
def resize_image(img, width, height):
|
||||
return img.scaled(int(width), int(height), Qt.IgnoreAspectRatio, Qt.SmoothTransformation)
|
||||
|
||||
|
||||
def resize_to_fit(img, width, height):
|
||||
img = image_from_data(img)
|
||||
resize_needed, nw, nh = fit_image(img.width(), img.height(), width, height)
|
||||
if resize_needed:
|
||||
img = resize_image(img, nw, nh)
|
||||
return resize_needed, img
|
||||
|
||||
|
||||
def clone_image(img):
|
||||
''' Returns a shallow copy of the image. However, the underlying data buffer
|
||||
will be automatically copied-on-write '''
|
||||
return QImage(img)
|
||||
|
||||
|
||||
def scale_image(data, width=60, height=80, compression_quality=70, as_png=False, preserve_aspect_ratio=True):
|
||||
''' Scale an image, returning it as either JPEG or PNG data (bytestring).
|
||||
Transparency is alpha blended with white when converting to JPEG. Is thread
|
||||
safe and does not require a QApplication. '''
|
||||
# We use Qt instead of ImageMagick here because ImageMagick seems to use
|
||||
# some kind of memory pool, causing memory consumption to sky rocket.
|
||||
img = image_from_data(data)
|
||||
if preserve_aspect_ratio:
|
||||
scaled, nwidth, nheight = fit_image(img.width(), img.height(), width, height)
|
||||
if scaled:
|
||||
img = img.scaled(nwidth, nheight, Qt.KeepAspectRatio, Qt.SmoothTransformation)
|
||||
else:
|
||||
if img.width() != width or img.height() != height:
|
||||
img = img.scaled(width, height, Qt.IgnoreAspectRatio, Qt.SmoothTransformation)
|
||||
fmt = 'PNG' if as_png else 'JPEG'
|
||||
w, h = img.width(), img.height()
|
||||
return w, h, image_to_data(img, compression_quality=compression_quality, fmt=fmt)
|
||||
|
||||
|
||||
def crop_image(img, x, y, width, height):
|
||||
'''
|
||||
Return the specified section of the image.
|
||||
|
||||
:param x, y: The top left corner of the crop box
|
||||
:param width, height: The width and height of the crop box. Note that if
|
||||
the crop box exceeds the source images dimensions, width and height will be
|
||||
auto-truncated.
|
||||
'''
|
||||
img = image_from_data(img)
|
||||
width = min(width, img.width() - x)
|
||||
height = min(height, img.height() - y)
|
||||
return img.copy(x, y, width, height)
|
||||
|
||||
# }}}
|
||||
|
||||
# Image transformations {{{
|
||||
|
||||
|
||||
def grayscale_image(img):
|
||||
return imageops.grayscale(image_from_data(img))
|
||||
|
||||
|
||||
def set_image_opacity(img, alpha=0.5):
|
||||
''' Change the opacity of `img`. Note that the alpha value is multiplied to
|
||||
any existing alpha values, so you cannot use this function to convert a
|
||||
semi-transparent image to an opaque one. For that use `blend_image()`. '''
|
||||
return imageops.set_opacity(image_from_data(img), alpha)
|
||||
|
||||
|
||||
def flip_image(img, horizontal=False, vertical=False):
|
||||
return image_from_data(img).mirrored(horizontal, vertical)
|
||||
|
||||
|
||||
def image_has_transparent_pixels(img):
|
||||
' Return True iff the image has at least one semi-transparent pixel '
|
||||
img = image_from_data(img)
|
||||
if img.isNull():
|
||||
return False
|
||||
return imageops.has_transparent_pixels(img)
|
||||
|
||||
|
||||
def rotate_image(img, degrees):
|
||||
t = QTransform()
|
||||
t.rotate(degrees)
|
||||
return image_from_data(img).transformed(t)
|
||||
|
||||
|
||||
def gaussian_sharpen_image(img, radius=0, sigma=3, high_quality=True):
|
||||
return imageops.gaussian_sharpen(image_from_data(img), max(0, radius), sigma, high_quality)
|
||||
|
||||
|
||||
def gaussian_blur_image(img, radius=-1, sigma=3):
|
||||
return imageops.gaussian_blur(image_from_data(img), max(0, radius), sigma)
|
||||
|
||||
|
||||
def despeckle_image(img):
|
||||
return imageops.despeckle(image_from_data(img))
|
||||
|
||||
|
||||
def oil_paint_image(img, radius=-1, high_quality=True):
|
||||
return imageops.oil_paint(image_from_data(img), radius, high_quality)
|
||||
|
||||
|
||||
def normalize_image(img):
|
||||
return imageops.normalize(image_from_data(img))
|
||||
|
||||
|
||||
def quantize_image(img, max_colors=256, dither=True, palette=''):
|
||||
''' Quantize the image to contain a maximum of `max_colors` colors. By
|
||||
default a palette is chosen automatically, if you want to use a fixed
|
||||
palette, then pass in a list of color names in the `palette` variable. If
|
||||
you, specify a palette `max_colors` is ignored. Note that it is possible
|
||||
for the actual number of colors used to be less than max_colors.
|
||||
|
||||
:param max_colors: Max. number of colors in the auto-generated palette. Must be between 2 and 256.
|
||||
:param dither: Whether to use dithering or not. dithering is almost always a good thing.
|
||||
:param palette: Use a manually specified palette instead. For example: palette='red green blue #eee'
|
||||
'''
|
||||
img = image_from_data(img)
|
||||
if img.hasAlphaChannel():
|
||||
img = blend_image(img)
|
||||
if palette and isinstance(palette, string_or_bytes):
|
||||
palette = palette.split()
|
||||
return imageops.quantize(img, max_colors, dither, [QColor(x).rgb() for x in palette])
|
||||
|
||||
|
||||
def eink_dither_image(img):
|
||||
''' Dither the source image down to the eInk palette of 16 shades of grey,
|
||||
using ImageMagick's OrderedDither algorithm.
|
||||
|
||||
NOTE: No need to call grayscale_image first, as this will inline a grayscaling pass if need be.
|
||||
|
||||
Returns a QImage in Grayscale8 pixel format.
|
||||
'''
|
||||
img = image_from_data(img)
|
||||
if img.hasAlphaChannel():
|
||||
img = blend_image(img)
|
||||
return imageops.ordered_dither(img)
|
||||
|
||||
# }}}
|
||||
|
||||
# Optimization of images {{{
|
||||
|
||||
|
||||
def run_optimizer(file_path, cmd, as_filter=False, input_data=None):
|
||||
file_path = os.path.abspath(file_path)
|
||||
cwd = os.path.dirname(file_path)
|
||||
ext = os.path.splitext(file_path)[1]
|
||||
if not ext or len(ext) > 10 or not ext.startswith('.'):
|
||||
ext = '.jpg'
|
||||
fd, outfile = tempfile.mkstemp(dir=cwd, suffix=ext)
|
||||
try:
|
||||
if as_filter:
|
||||
outf = os.fdopen(fd, 'wb')
|
||||
else:
|
||||
os.close(fd)
|
||||
iname, oname = os.path.basename(file_path), os.path.basename(outfile)
|
||||
|
||||
def repl(q, r):
|
||||
cmd[cmd.index(q)] = r
|
||||
if not as_filter:
|
||||
repl(True, iname), repl(False, oname)
|
||||
if iswindows and not ispy3:
|
||||
# subprocess in python 2 cannot handle unicode strings that are not
|
||||
# encodeable in mbcs, so we fail here, where it is more explicit,
|
||||
# instead.
|
||||
cmd = [x.encode('mbcs') if isinstance(x, unicode_type) else x for x in cmd]
|
||||
if isinstance(cwd, unicode_type):
|
||||
cwd = cwd.encode('mbcs')
|
||||
stdin = subprocess.PIPE if as_filter else None
|
||||
stderr = subprocess.PIPE if as_filter else subprocess.STDOUT
|
||||
creationflags = 0x08 if iswindows else 0
|
||||
p = subprocess.Popen(cmd, cwd=cwd, stdout=subprocess.PIPE, stderr=stderr, stdin=stdin, creationflags=creationflags)
|
||||
stderr = p.stderr if as_filter else p.stdout
|
||||
if as_filter:
|
||||
src = input_data or open(file_path, 'rb')
|
||||
|
||||
def copy(src, dest):
|
||||
try:
|
||||
shutil.copyfileobj(src, dest)
|
||||
finally:
|
||||
src.close(), dest.close()
|
||||
inw = Thread(name='CopyInput', target=copy, args=(src, p.stdin))
|
||||
inw.daemon = True
|
||||
inw.start()
|
||||
outw = Thread(name='CopyOutput', target=copy, args=(p.stdout, outf))
|
||||
outw.daemon = True
|
||||
outw.start()
|
||||
raw = force_unicode(stderr.read())
|
||||
if p.wait() != 0:
|
||||
return raw
|
||||
else:
|
||||
if as_filter:
|
||||
outw.join(60.0), inw.join(60.0)
|
||||
try:
|
||||
sz = os.path.getsize(outfile)
|
||||
except EnvironmentError:
|
||||
sz = 0
|
||||
if sz < 1:
|
||||
return '%s returned a zero size image' % cmd[0]
|
||||
shutil.copystat(file_path, outfile)
|
||||
atomic_rename(outfile, file_path)
|
||||
finally:
|
||||
try:
|
||||
os.remove(outfile)
|
||||
except EnvironmentError as err:
|
||||
if err.errno != errno.ENOENT:
|
||||
raise
|
||||
try:
|
||||
os.remove(outfile + '.bak') # optipng creates these files
|
||||
except EnvironmentError as err:
|
||||
if err.errno != errno.ENOENT:
|
||||
raise
|
||||
|
||||
|
||||
def optimize_jpeg(file_path):
|
||||
exe = get_exe_path('jpegtran')
|
||||
cmd = [exe] + '-copy none -optimize -progressive -maxmemory 100M -outfile'.split() + [False, True]
|
||||
return run_optimizer(file_path, cmd)
|
||||
|
||||
|
||||
def optimize_png(file_path, level=7):
|
||||
' level goes from 1 to 7 with 7 being maximum compression '
|
||||
exe = get_exe_path('optipng')
|
||||
cmd = [exe] + '-fix -clobber -strip all -o{} -out'.format(level).split() + [False, True]
|
||||
return run_optimizer(file_path, cmd)
|
||||
|
||||
|
||||
def encode_jpeg(file_path, quality=80):
|
||||
from calibre.utils.speedups import ReadOnlyFileBuffer
|
||||
quality = max(0, min(100, int(quality)))
|
||||
exe = get_exe_path('cjpeg')
|
||||
cmd = [exe] + '-optimize -progressive -maxmemory 100M -quality'.split() + [unicode_type(quality)]
|
||||
img = QImage()
|
||||
if not img.load(file_path):
|
||||
raise ValueError('%s is not a valid image file' % file_path)
|
||||
ba = QByteArray()
|
||||
buf = QBuffer(ba)
|
||||
buf.open(QBuffer.WriteOnly)
|
||||
if not img.save(buf, 'PPM'):
|
||||
raise ValueError('Failed to export image to PPM')
|
||||
return run_optimizer(file_path, cmd, as_filter=True, input_data=ReadOnlyFileBuffer(ba.data()))
|
||||
# }}}
|
||||
|
||||
|
||||
def test(): # {{{
|
||||
from calibre.ptempfile import TemporaryDirectory
|
||||
from calibre import CurrentDir
|
||||
from glob import glob
|
||||
img = image_from_data(I('lt.png', data=True, allow_user_override=False))
|
||||
with TemporaryDirectory() as tdir, CurrentDir(tdir):
|
||||
save_image(img, 'test.jpg')
|
||||
ret = optimize_jpeg('test.jpg')
|
||||
if ret is not None:
|
||||
raise SystemExit('optimize_jpeg failed: %s' % ret)
|
||||
ret = encode_jpeg('test.jpg')
|
||||
if ret is not None:
|
||||
raise SystemExit('encode_jpeg failed: %s' % ret)
|
||||
shutil.copyfile(I('lt.png'), 'test.png')
|
||||
ret = optimize_png('test.png')
|
||||
if ret is not None:
|
||||
raise SystemExit('optimize_png failed: %s' % ret)
|
||||
if glob('*.bak'):
|
||||
raise SystemExit('Spurious .bak files left behind')
|
||||
quantize_image(img)
|
||||
oil_paint_image(img)
|
||||
gaussian_sharpen_image(img)
|
||||
gaussian_blur_image(img)
|
||||
despeckle_image(img)
|
||||
remove_borders_from_image(img)
|
||||
image_to_data(img, fmt='GIF')
|
||||
raw = subprocess.Popen([get_exe_path('JxrDecApp'), '-h'], creationflags=0x08 if iswindows else 0, stdout=subprocess.PIPE).stdout.read()
|
||||
if b'JPEG XR Decoder Utility' not in raw:
|
||||
raise SystemExit('Failed to run JxrDecApp')
|
||||
# }}}
|
||||
|
||||
|
||||
if __name__ == '__main__': # {{{
|
||||
args = sys.argv[1:]
|
||||
infile = args.pop(0)
|
||||
img = image_from_data(lopen(infile, 'rb').read())
|
||||
func = globals()[args[0]]
|
||||
kw = {}
|
||||
args.pop(0)
|
||||
outf = None
|
||||
while args:
|
||||
k = args.pop(0)
|
||||
if '=' in k:
|
||||
n, v = k.partition('=')[::2]
|
||||
if v in ('True', 'False'):
|
||||
v = True if v == 'True' else False
|
||||
try:
|
||||
v = int(v)
|
||||
except Exception:
|
||||
try:
|
||||
v = float(v)
|
||||
except Exception:
|
||||
pass
|
||||
kw[n] = v
|
||||
else:
|
||||
outf = k
|
||||
if outf is None:
|
||||
bn = os.path.basename(infile)
|
||||
outf = bn.rpartition('.')[0] + '.' + '-output' + bn.rpartition('.')[-1]
|
||||
img = func(img, **kw)
|
||||
with lopen(outf, 'wb') as f:
|
||||
f.write(image_to_data(img, fmt=outf.rpartition('.')[-1]))
|
||||
# }}}
|
||||
263
ebook_converter/utils/imghdr.py
Normal file
263
ebook_converter/utils/imghdr.py
Normal file
@@ -0,0 +1,263 @@
|
||||
#!/usr/bin/env python2
|
||||
# vim:fileencoding=utf-8
|
||||
# License: GPLv3 Copyright: 2016, Kovid Goyal <kovid at kovidgoyal.net>
|
||||
from __future__ import absolute_import, division, print_function, unicode_literals
|
||||
|
||||
from struct import unpack, error
|
||||
import os
|
||||
from calibre.utils.speedups import ReadOnlyFileBuffer
|
||||
from calibre.constants import ispy3
|
||||
from polyglot.builtins import string_or_bytes, unicode_type
|
||||
|
||||
""" Recognize image file formats and sizes based on their first few bytes."""
|
||||
|
||||
HSIZE = 120
|
||||
|
||||
|
||||
def what(file, h=None):
|
||||
' Recognize image headers '
|
||||
if h is None:
|
||||
if isinstance(file, string_or_bytes):
|
||||
with lopen(file, 'rb') as f:
|
||||
h = f.read(HSIZE)
|
||||
else:
|
||||
location = file.tell()
|
||||
h = file.read(HSIZE)
|
||||
file.seek(location)
|
||||
if isinstance(h, bytes):
|
||||
h = memoryview(h)
|
||||
for tf in tests:
|
||||
res = tf(h)
|
||||
if res:
|
||||
return res
|
||||
# There exist some jpeg files with no headers, only the starting two bits
|
||||
# If we cannot identify as anything else, identify as jpeg.
|
||||
if h[:2] == b'\xff\xd8':
|
||||
return 'jpeg'
|
||||
return None
|
||||
|
||||
|
||||
def identify(src):
|
||||
''' Recognize file format and sizes. Returns format, width, height. width
|
||||
and height will be -1 if not found and fmt will be None if the image is not
|
||||
recognized. '''
|
||||
width = height = -1
|
||||
|
||||
if isinstance(src, unicode_type):
|
||||
stream = lopen(src, 'rb')
|
||||
elif isinstance(src, bytes):
|
||||
stream = ReadOnlyFileBuffer(src)
|
||||
else:
|
||||
stream = src
|
||||
|
||||
pos = stream.tell()
|
||||
head = stream.read(HSIZE)
|
||||
stream.seek(pos)
|
||||
fmt = what(None, head)
|
||||
|
||||
if fmt in {'jpeg', 'gif', 'png', 'jpeg2000'}:
|
||||
size = len(head)
|
||||
if fmt == 'png':
|
||||
# PNG
|
||||
s = head[16:24] if size >= 24 and head[12:16] == b'IHDR' else head[8:16]
|
||||
try:
|
||||
width, height = unpack(b">LL", s)
|
||||
except error:
|
||||
return fmt, width, height
|
||||
elif fmt == 'jpeg':
|
||||
# JPEG
|
||||
pos = stream.tell()
|
||||
try:
|
||||
height, width = jpeg_dimensions(stream)
|
||||
except Exception:
|
||||
return fmt, width, height
|
||||
finally:
|
||||
stream.seek(pos)
|
||||
elif fmt == 'gif':
|
||||
# GIF
|
||||
try:
|
||||
width, height = unpack(b"<HH", head[6:10])
|
||||
except error:
|
||||
return fmt, width, height
|
||||
elif size >= 56 and fmt == 'jpeg2000':
|
||||
# JPEG2000
|
||||
try:
|
||||
height, width = unpack(b'>LL', head[48:56])
|
||||
except error:
|
||||
return fmt, width, height
|
||||
return fmt, width, height
|
||||
|
||||
# ---------------------------------#
|
||||
# Subroutines per image file type #
|
||||
# ---------------------------------#
|
||||
|
||||
|
||||
tests = []
|
||||
|
||||
|
||||
def test(f):
|
||||
tests.append(f)
|
||||
return f
|
||||
|
||||
|
||||
@test
|
||||
def jpeg(h):
|
||||
"""JPEG data in JFIF format (Changed by Kovid to mimic the file utility,
|
||||
the original code was failing with some jpegs that included ICC_PROFILE
|
||||
data, for example: http://nationalpostnews.files.wordpress.com/2013/03/budget.jpeg?w=300&h=1571)"""
|
||||
if h[6:10] in (b'JFIF', b'Exif'):
|
||||
return 'jpeg'
|
||||
if h[:2] == b'\xff\xd8':
|
||||
q = h[:32].tobytes()
|
||||
if b'JFIF' in q or b'8BIM' in q:
|
||||
return 'jpeg'
|
||||
|
||||
|
||||
def jpeg_dimensions(stream):
|
||||
# A JPEG marker is two bytes of the form 0xff x where 0 < x < 0xff
|
||||
# See section B.1.1.2 of https://www.w3.org/Graphics/JPEG/itu-t81.pdf
|
||||
# We read the dimensions from the first SOFn section we come across
|
||||
stream.seek(2, os.SEEK_CUR)
|
||||
|
||||
def read(n):
|
||||
ans = stream.read(n)
|
||||
if len(ans) != n:
|
||||
raise ValueError('Truncated JPEG data')
|
||||
return ans
|
||||
|
||||
if ispy3:
|
||||
def read_byte():
|
||||
return read(1)[0]
|
||||
else:
|
||||
def read_byte():
|
||||
return ord(read(1)[0])
|
||||
|
||||
x = None
|
||||
while True:
|
||||
# Find next marker
|
||||
while x != 0xff:
|
||||
x = read_byte()
|
||||
# Soak up padding
|
||||
marker = 0xff
|
||||
while marker == 0xff:
|
||||
marker = read_byte()
|
||||
q = marker
|
||||
if 0xc0 <= q <= 0xcf and q != 0xc4 and q != 0xcc:
|
||||
# SOFn marker
|
||||
stream.seek(3, os.SEEK_CUR)
|
||||
return unpack(b'>HH', read(4))
|
||||
elif 0xd8 <= q <= 0xda:
|
||||
break # start of image, end of image, start of scan, no point
|
||||
elif q == 0:
|
||||
return -1, -1 # Corrupted JPEG
|
||||
elif q == 0x01 or 0xd0 <= q <= 0xd7:
|
||||
# Standalone marker
|
||||
continue
|
||||
else:
|
||||
# skip this section
|
||||
size = unpack(b'>H', read(2))[0]
|
||||
stream.seek(size - 2, os.SEEK_CUR)
|
||||
# standalone marker, keep going
|
||||
|
||||
return -1, -1
|
||||
|
||||
|
||||
@test
|
||||
def png(h):
|
||||
if h[:8] == b"\211PNG\r\n\032\n":
|
||||
return 'png'
|
||||
|
||||
|
||||
@test
|
||||
def gif(h):
|
||||
"""GIF ('87 and '89 variants)"""
|
||||
if h[:6] in (b'GIF87a', b'GIF89a'):
|
||||
return 'gif'
|
||||
|
||||
|
||||
@test
|
||||
def tiff(h):
|
||||
"""TIFF (can be in Motorola or Intel byte order)"""
|
||||
if h[:2] in (b'MM', b'II'):
|
||||
if h[2:4] == b'\xbc\x01':
|
||||
return 'jxr'
|
||||
return 'tiff'
|
||||
|
||||
|
||||
@test
|
||||
def webp(h):
|
||||
if h[:4] == b'RIFF' and h[8:12] == b'WEBP':
|
||||
return 'webp'
|
||||
|
||||
|
||||
@test
|
||||
def rgb(h):
|
||||
"""SGI image library"""
|
||||
if h[:2] == b'\001\332':
|
||||
return 'rgb'
|
||||
|
||||
|
||||
@test
|
||||
def pbm(h):
|
||||
"""PBM (portable bitmap)"""
|
||||
if len(h) >= 3 and \
|
||||
h[0] == b'P' and h[1] in b'14' and h[2] in b' \t\n\r':
|
||||
return 'pbm'
|
||||
|
||||
|
||||
@test
|
||||
def pgm(h):
|
||||
"""PGM (portable graymap)"""
|
||||
if len(h) >= 3 and \
|
||||
h[0] == b'P' and h[1] in b'25' and h[2] in b' \t\n\r':
|
||||
return 'pgm'
|
||||
|
||||
|
||||
@test
|
||||
def ppm(h):
|
||||
"""PPM (portable pixmap)"""
|
||||
if len(h) >= 3 and \
|
||||
h[0] == b'P' and h[1] in b'36' and h[2] in b' \t\n\r':
|
||||
return 'ppm'
|
||||
|
||||
|
||||
@test
|
||||
def rast(h):
|
||||
"""Sun raster file"""
|
||||
if h[:4] == b'\x59\xA6\x6A\x95':
|
||||
return 'rast'
|
||||
|
||||
|
||||
@test
|
||||
def xbm(h):
|
||||
"""X bitmap (X10 or X11)"""
|
||||
s = b'#define '
|
||||
if h[:len(s)] == s:
|
||||
return 'xbm'
|
||||
|
||||
|
||||
@test
|
||||
def bmp(h):
|
||||
if h[:2] == b'BM':
|
||||
return 'bmp'
|
||||
|
||||
|
||||
@test
|
||||
def emf(h):
|
||||
if h[:4] == b'\x01\0\0\0' and h[40:44] == b' EMF':
|
||||
return 'emf'
|
||||
|
||||
|
||||
@test
|
||||
def jpeg2000(h):
|
||||
if h[:12] == b'\x00\x00\x00\x0cjP \r\n\x87\n':
|
||||
return 'jpeg2000'
|
||||
|
||||
|
||||
@test
|
||||
def svg(h):
|
||||
if h[:4] == b'<svg' or (h[:2] == b'<?' and h[2:5].tobytes().lower() == b'xml' and b'<svg' in h.tobytes()):
|
||||
return 'svg'
|
||||
|
||||
|
||||
tests = tuple(tests)
|
||||
83
ebook_converter/utils/ipc/__init__.py
Normal file
83
ebook_converter/utils/ipc/__init__.py
Normal file
@@ -0,0 +1,83 @@
|
||||
#!/usr/bin/env python2
|
||||
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
|
||||
from __future__ import absolute_import, division, print_function, unicode_literals
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>'
|
||||
__docformat__ = 'restructuredtext en'
|
||||
|
||||
import os, errno, sys
|
||||
from threading import Thread
|
||||
|
||||
from calibre import force_unicode
|
||||
from calibre.constants import iswindows, get_windows_username, islinux, filesystem_encoding, ispy3
|
||||
from calibre.utils.filenames import ascii_filename
|
||||
from polyglot.functools import lru_cache
|
||||
|
||||
VADDRESS = None
|
||||
|
||||
|
||||
def eintr_retry_call(func, *args, **kwargs):
|
||||
while True:
|
||||
try:
|
||||
return func(*args, **kwargs)
|
||||
except EnvironmentError as e:
|
||||
if getattr(e, 'errno', None) == errno.EINTR:
|
||||
continue
|
||||
raise
|
||||
|
||||
|
||||
@lru_cache()
|
||||
def socket_address(which):
|
||||
if iswindows:
|
||||
ans = r'\\.\pipe\Calibre' + which
|
||||
try:
|
||||
user = get_windows_username()
|
||||
except Exception:
|
||||
user = None
|
||||
if user:
|
||||
user = ascii_filename(user).replace(' ', '_')
|
||||
if user:
|
||||
ans += '-' + user[:100] + 'x'
|
||||
else:
|
||||
user = force_unicode(os.environ.get('USER') or os.path.basename(os.path.expanduser('~')), filesystem_encoding)
|
||||
sock_name = '{}-calibre-{}.socket'.format(ascii_filename(user).replace(' ', '_'), which)
|
||||
if islinux:
|
||||
ans = '\0' + sock_name
|
||||
else:
|
||||
from tempfile import gettempdir
|
||||
tmp = force_unicode(gettempdir(), filesystem_encoding)
|
||||
ans = os.path.join(tmp, sock_name)
|
||||
if not ispy3 and not isinstance(ans, bytes):
|
||||
ans = ans.encode(filesystem_encoding)
|
||||
return ans
|
||||
|
||||
|
||||
def gui_socket_address():
|
||||
return socket_address('GUI' if iswindows else 'gui')
|
||||
|
||||
|
||||
def viewer_socket_address():
|
||||
return socket_address('Viewer' if iswindows else 'viewer')
|
||||
|
||||
|
||||
class RC(Thread):
|
||||
|
||||
def __init__(self, print_error=True, socket_address=None):
|
||||
self.print_error = print_error
|
||||
self.socket_address = socket_address or gui_socket_address()
|
||||
Thread.__init__(self)
|
||||
self.conn = None
|
||||
self.daemon = True
|
||||
|
||||
def run(self):
|
||||
from multiprocessing.connection import Client
|
||||
self.done = False
|
||||
try:
|
||||
self.conn = Client(self.socket_address)
|
||||
self.done = True
|
||||
except Exception:
|
||||
if self.print_error:
|
||||
print('Failed to connect to address {}', file=sys.stderr).format(repr(self.socket_address))
|
||||
import traceback
|
||||
traceback.print_exc()
|
||||
237
ebook_converter/utils/ipc/launch.py
Normal file
237
ebook_converter/utils/ipc/launch.py
Normal file
@@ -0,0 +1,237 @@
|
||||
#!/usr/bin/env python2
|
||||
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
|
||||
from __future__ import absolute_import, division, print_function, unicode_literals
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>'
|
||||
__docformat__ = 'restructuredtext en'
|
||||
|
||||
import subprocess, os, sys, time
|
||||
from functools import partial
|
||||
|
||||
from calibre.constants import iswindows, isosx, isfrozen, filesystem_encoding, ispy3
|
||||
from calibre.utils.config import prefs
|
||||
from calibre.ptempfile import PersistentTemporaryFile, base_dir
|
||||
from calibre.utils.serialize import msgpack_dumps
|
||||
from polyglot.builtins import iteritems, unicode_type, string_or_bytes, environ_item, native_string_type, getcwd
|
||||
from polyglot.binary import as_hex_unicode
|
||||
|
||||
if iswindows:
|
||||
import win32process
|
||||
try:
|
||||
windows_null_file = open(os.devnull, 'wb')
|
||||
except:
|
||||
raise RuntimeError('NUL file missing in windows. This indicates a'
|
||||
' corrupted windows. You should contact Microsoft'
|
||||
' for assistance and/or follow the steps described here: https://bytes.com/topic/net/answers/264804-compile-error-null-device-missing')
|
||||
|
||||
|
||||
def renice(niceness):
|
||||
try:
|
||||
os.nice(niceness)
|
||||
except:
|
||||
pass
|
||||
|
||||
|
||||
class Worker(object):
|
||||
'''
|
||||
Platform independent object for launching child processes. All processes
|
||||
have the environment variable :envvar:`CALIBRE_WORKER` set.
|
||||
|
||||
Useful attributes: ``is_alive``, ``returncode``, ``pid``
|
||||
Useful methods: ``kill``
|
||||
|
||||
To launch child simply call the Worker object. By default, the child's
|
||||
output is redirected to an on disk file, the path to which is returned by
|
||||
the call.
|
||||
'''
|
||||
|
||||
exe_name = 'calibre-parallel'
|
||||
|
||||
@property
|
||||
def executable(self):
|
||||
if hasattr(sys, 'running_from_setup'):
|
||||
return [sys.executable, os.path.join(sys.setup_dir, 'run-calibre-worker.py')]
|
||||
if getattr(sys, 'run_local', False):
|
||||
return [sys.executable, sys.run_local, self.exe_name]
|
||||
e = self.exe_name
|
||||
if iswindows:
|
||||
return os.path.join(os.path.dirname(sys.executable),
|
||||
e+'.exe' if isfrozen else 'Scripts\\%s.exe'%e)
|
||||
if isosx:
|
||||
return os.path.join(sys.binaries_path, e)
|
||||
|
||||
if isfrozen:
|
||||
return os.path.join(sys.executables_location, e)
|
||||
|
||||
if hasattr(sys, 'executables_location'):
|
||||
c = os.path.join(sys.executables_location, e)
|
||||
if os.access(c, os.X_OK):
|
||||
return c
|
||||
return e
|
||||
|
||||
@property
|
||||
def gui_executable(self):
|
||||
if isosx and not hasattr(sys, 'running_from_setup'):
|
||||
if self.job_name == 'ebook-viewer':
|
||||
base = os.path.dirname(sys.binaries_path)
|
||||
return os.path.join(base, 'ebook-viewer.app/Contents/MacOS/', self.exe_name)
|
||||
if self.job_name == 'ebook-edit':
|
||||
base = os.path.dirname(sys.binaries_path)
|
||||
return os.path.join(base, 'ebook-viewer.app/Contents/ebook-edit.app/Contents/MacOS/', self.exe_name)
|
||||
|
||||
return os.path.join(sys.binaries_path, self.exe_name)
|
||||
|
||||
return self.executable
|
||||
|
||||
@property
|
||||
def env(self):
|
||||
if ispy3:
|
||||
env = os.environ.copy()
|
||||
else:
|
||||
# We use this inefficient method of copying the environment variables
|
||||
# because of non ascii env vars on windows. See https://bugs.launchpad.net/bugs/811191
|
||||
env = {}
|
||||
for key in os.environ:
|
||||
try:
|
||||
val = os.environ[key]
|
||||
if isinstance(val, unicode_type):
|
||||
# On windows subprocess cannot handle unicode env vars
|
||||
try:
|
||||
val = val.encode(filesystem_encoding)
|
||||
except ValueError:
|
||||
val = val.encode('utf-8')
|
||||
if isinstance(key, unicode_type):
|
||||
key = key.encode('ascii')
|
||||
env[key] = val
|
||||
except:
|
||||
pass
|
||||
env[native_string_type('CALIBRE_WORKER')] = environ_item('1')
|
||||
td = as_hex_unicode(msgpack_dumps(base_dir()))
|
||||
env[native_string_type('CALIBRE_WORKER_TEMP_DIR')] = environ_item(td)
|
||||
env.update(self._env)
|
||||
return env
|
||||
|
||||
@property
|
||||
def is_alive(self):
|
||||
return hasattr(self, 'child') and self.child.poll() is None
|
||||
|
||||
@property
|
||||
def returncode(self):
|
||||
if not hasattr(self, 'child'):
|
||||
return None
|
||||
self.child.poll()
|
||||
return self.child.returncode
|
||||
|
||||
@property
|
||||
def pid(self):
|
||||
if not hasattr(self, 'child'):
|
||||
return None
|
||||
return getattr(self.child, 'pid', None)
|
||||
|
||||
def close_log_file(self):
|
||||
try:
|
||||
self._file.close()
|
||||
except:
|
||||
pass
|
||||
|
||||
def kill(self):
|
||||
self.close_log_file()
|
||||
try:
|
||||
if self.is_alive:
|
||||
if iswindows:
|
||||
return self.child.kill()
|
||||
try:
|
||||
self.child.terminate()
|
||||
st = time.time()
|
||||
while self.is_alive and time.time()-st < 2:
|
||||
time.sleep(0.2)
|
||||
finally:
|
||||
if self.is_alive:
|
||||
self.child.kill()
|
||||
except:
|
||||
pass
|
||||
|
||||
def __init__(self, env, gui=False, job_name=None):
|
||||
self._env = {}
|
||||
self.gui = gui
|
||||
self.job_name = job_name
|
||||
if ispy3:
|
||||
self._env = env.copy()
|
||||
else:
|
||||
# Windows cannot handle unicode env vars
|
||||
for k, v in iteritems(env):
|
||||
try:
|
||||
if isinstance(k, unicode_type):
|
||||
k = k.encode('ascii')
|
||||
if isinstance(v, unicode_type):
|
||||
try:
|
||||
v = v.encode(filesystem_encoding)
|
||||
except:
|
||||
v = v.encode('utf-8')
|
||||
self._env[k] = v
|
||||
except:
|
||||
pass
|
||||
|
||||
def __call__(self, redirect_output=True, cwd=None, priority=None):
|
||||
'''
|
||||
If redirect_output is True, output from the child is redirected
|
||||
to a file on disk and this method returns the path to that file.
|
||||
'''
|
||||
exe = self.gui_executable if self.gui else self.executable
|
||||
env = self.env
|
||||
try:
|
||||
origwd = cwd or os.path.abspath(getcwd())
|
||||
except EnvironmentError:
|
||||
# cwd no longer exists
|
||||
origwd = cwd or os.path.expanduser('~')
|
||||
env[native_string_type('ORIGWD')] = environ_item(as_hex_unicode(msgpack_dumps(origwd)))
|
||||
_cwd = cwd
|
||||
if priority is None:
|
||||
priority = prefs['worker_process_priority']
|
||||
cmd = [exe] if isinstance(exe, string_or_bytes) else exe
|
||||
args = {
|
||||
'env' : env,
|
||||
'cwd' : _cwd,
|
||||
}
|
||||
if iswindows:
|
||||
priority = {
|
||||
'high' : win32process.HIGH_PRIORITY_CLASS,
|
||||
'normal' : win32process.NORMAL_PRIORITY_CLASS,
|
||||
'low' : win32process.IDLE_PRIORITY_CLASS}[priority]
|
||||
args['creationflags'] = win32process.CREATE_NO_WINDOW|priority
|
||||
else:
|
||||
niceness = {
|
||||
'normal' : 0,
|
||||
'low' : 10,
|
||||
'high' : 20,
|
||||
}[priority]
|
||||
args['preexec_fn'] = partial(renice, niceness)
|
||||
ret = None
|
||||
if redirect_output:
|
||||
self._file = PersistentTemporaryFile('_worker_redirect.log')
|
||||
args['stdout'] = self._file._fd
|
||||
args['stderr'] = subprocess.STDOUT
|
||||
if iswindows:
|
||||
args['stdin'] = subprocess.PIPE
|
||||
ret = self._file.name
|
||||
|
||||
if iswindows and 'stdin' not in args:
|
||||
# On windows when using the pythonw interpreter,
|
||||
# stdout, stderr and stdin may not be valid
|
||||
args['stdin'] = subprocess.PIPE
|
||||
args['stdout'] = windows_null_file
|
||||
args['stderr'] = subprocess.STDOUT
|
||||
|
||||
if not iswindows:
|
||||
# Close inherited file descriptors in worker
|
||||
# On windows, this is done in the worker process
|
||||
# itself
|
||||
args['close_fds'] = True
|
||||
|
||||
self.child = subprocess.Popen(cmd, **args)
|
||||
if 'stdin' in args:
|
||||
self.child.stdin.close()
|
||||
|
||||
self.log_path = ret
|
||||
return ret
|
||||
348
ebook_converter/utils/ipc/simple_worker.py
Normal file
348
ebook_converter/utils/ipc/simple_worker.py
Normal file
@@ -0,0 +1,348 @@
|
||||
#!/usr/bin/env python2
|
||||
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
|
||||
from __future__ import absolute_import, division, print_function, unicode_literals
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2012, Kovid Goyal <kovid@kovidgoyal.net>'
|
||||
__docformat__ = 'restructuredtext en'
|
||||
|
||||
import os, time, traceback, importlib
|
||||
from multiprocessing.connection import Client
|
||||
from threading import Thread
|
||||
from contextlib import closing
|
||||
|
||||
from calibre.constants import iswindows
|
||||
from calibre.utils.ipc import eintr_retry_call
|
||||
from calibre.utils.ipc.launch import Worker
|
||||
from calibre.utils.serialize import msgpack_loads, msgpack_dumps
|
||||
from calibre.utils.monotonic import monotonic
|
||||
from polyglot.builtins import unicode_type, string_or_bytes, environ_item
|
||||
from polyglot.binary import as_hex_unicode, from_hex_bytes
|
||||
|
||||
|
||||
class WorkerError(Exception):
|
||||
|
||||
def __init__(self, msg, orig_tb='', log_path=None):
|
||||
Exception.__init__(self, msg)
|
||||
self.orig_tb = orig_tb
|
||||
self.log_path = log_path
|
||||
|
||||
|
||||
class ConnectedWorker(Thread):
|
||||
|
||||
def __init__(self, listener, args):
|
||||
Thread.__init__(self)
|
||||
self.daemon = True
|
||||
|
||||
self.listener = listener
|
||||
self.args = args
|
||||
self.accepted = False
|
||||
self.tb = None
|
||||
self.res = None
|
||||
|
||||
def run(self):
|
||||
conn = None
|
||||
try:
|
||||
conn = eintr_retry_call(self.listener.accept)
|
||||
except BaseException:
|
||||
self.tb = traceback.format_exc()
|
||||
return
|
||||
self.accepted = True
|
||||
with closing(conn):
|
||||
try:
|
||||
eintr_retry_call(conn.send, self.args)
|
||||
self.res = eintr_retry_call(conn.recv)
|
||||
except BaseException:
|
||||
self.tb = traceback.format_exc()
|
||||
|
||||
|
||||
class OffloadWorker(object):
|
||||
|
||||
def __init__(self, listener, worker):
|
||||
self.listener = listener
|
||||
self.worker = worker
|
||||
self.conn = None
|
||||
self.kill_thread = t = Thread(target=self.worker.kill)
|
||||
t.daemon = True
|
||||
|
||||
def __call__(self, module, func, *args, **kwargs):
|
||||
if self.conn is None:
|
||||
self.conn = eintr_retry_call(self.listener.accept)
|
||||
eintr_retry_call(self.conn.send, (module, func, args, kwargs))
|
||||
return eintr_retry_call(self.conn.recv)
|
||||
|
||||
def shutdown(self):
|
||||
try:
|
||||
eintr_retry_call(self.conn.send, None)
|
||||
except IOError:
|
||||
pass
|
||||
except:
|
||||
import traceback
|
||||
traceback.print_exc()
|
||||
finally:
|
||||
self.conn = None
|
||||
try:
|
||||
os.remove(self.worker.log_path)
|
||||
except:
|
||||
pass
|
||||
self.kill_thread.start()
|
||||
|
||||
def is_alive(self):
|
||||
return self.worker.is_alive or self.kill_thread.is_alive()
|
||||
|
||||
|
||||
def communicate(ans, worker, listener, args, timeout=300, heartbeat=None,
|
||||
abort=None):
|
||||
cw = ConnectedWorker(listener, args)
|
||||
cw.start()
|
||||
st = monotonic()
|
||||
check_heartbeat = callable(heartbeat)
|
||||
|
||||
while worker.is_alive and cw.is_alive():
|
||||
cw.join(0.01)
|
||||
delta = monotonic() - st
|
||||
if not cw.accepted and delta > min(10, timeout):
|
||||
break
|
||||
hung = not heartbeat() if check_heartbeat else delta > timeout
|
||||
if hung:
|
||||
raise WorkerError('Worker appears to have hung')
|
||||
if abort is not None and abort.is_set():
|
||||
# The worker process will be killed by fork_job, after we return
|
||||
return
|
||||
|
||||
if not cw.accepted:
|
||||
if not cw.tb:
|
||||
raise WorkerError('Failed to connect to worker process')
|
||||
raise WorkerError('Failed to connect to worker process', cw.tb)
|
||||
|
||||
if cw.tb:
|
||||
raise WorkerError('Failed to communicate with worker process', cw.tb)
|
||||
if cw.res is None:
|
||||
raise WorkerError('Something strange happened. The worker process was aborted without an exception.')
|
||||
if cw.res.get('tb', None):
|
||||
raise WorkerError('Worker failed', cw.res['tb'])
|
||||
ans['result'] = cw.res['result']
|
||||
|
||||
|
||||
def create_worker(env, priority='normal', cwd=None, func='main'):
|
||||
from calibre.utils.ipc.server import create_listener
|
||||
auth_key = os.urandom(32)
|
||||
address, listener = create_listener(auth_key)
|
||||
|
||||
env = dict(env)
|
||||
env.update({
|
||||
'CALIBRE_WORKER_ADDRESS': environ_item(as_hex_unicode(msgpack_dumps(address))),
|
||||
'CALIBRE_WORKER_KEY': environ_item(as_hex_unicode(auth_key)),
|
||||
'CALIBRE_SIMPLE_WORKER': environ_item('calibre.utils.ipc.simple_worker:%s' % func),
|
||||
})
|
||||
|
||||
w = Worker(env)
|
||||
w(cwd=cwd, priority=priority)
|
||||
return listener, w
|
||||
|
||||
|
||||
def start_pipe_worker(command, env=None, priority='normal', **process_args):
|
||||
import subprocess
|
||||
from functools import partial
|
||||
w = Worker(env or {})
|
||||
args = {'stdout':subprocess.PIPE, 'stdin':subprocess.PIPE, 'env':w.env}
|
||||
args.update(process_args)
|
||||
if iswindows:
|
||||
import win32process
|
||||
priority = {
|
||||
'high' : win32process.HIGH_PRIORITY_CLASS,
|
||||
'normal' : win32process.NORMAL_PRIORITY_CLASS,
|
||||
'low' : win32process.IDLE_PRIORITY_CLASS}[priority]
|
||||
args['creationflags'] = win32process.CREATE_NO_WINDOW|priority
|
||||
else:
|
||||
def renice(niceness):
|
||||
try:
|
||||
os.nice(niceness)
|
||||
except:
|
||||
pass
|
||||
niceness = {'normal' : 0, 'low' : 10, 'high' : 20}[priority]
|
||||
args['preexec_fn'] = partial(renice, niceness)
|
||||
args['close_fds'] = True
|
||||
|
||||
exe = w.executable
|
||||
cmd = [exe] if isinstance(exe, string_or_bytes) else exe
|
||||
p = subprocess.Popen(cmd + ['--pipe-worker', command], **args)
|
||||
return p
|
||||
|
||||
|
||||
def two_part_fork_job(env=None, priority='normal', cwd=None):
|
||||
env = env or {}
|
||||
listener, w = create_worker(env, priority, cwd)
|
||||
|
||||
def run_job(
|
||||
mod_name, func_name, args=(), kwargs=None, timeout=300, # seconds
|
||||
no_output=False, heartbeat=None, abort=None, module_is_source_code=False
|
||||
):
|
||||
ans = {'result':None, 'stdout_stderr':None}
|
||||
kwargs = kwargs or {}
|
||||
try:
|
||||
communicate(ans, w, listener, (mod_name, func_name, args, kwargs,
|
||||
module_is_source_code), timeout=timeout, heartbeat=heartbeat,
|
||||
abort=abort)
|
||||
except WorkerError as e:
|
||||
if not no_output:
|
||||
e.log_path = w.log_path
|
||||
raise
|
||||
finally:
|
||||
t = Thread(target=w.kill)
|
||||
t.daemon=True
|
||||
t.start()
|
||||
if no_output:
|
||||
try:
|
||||
os.remove(w.log_path)
|
||||
except:
|
||||
pass
|
||||
if not no_output:
|
||||
ans['stdout_stderr'] = w.log_path
|
||||
return ans
|
||||
run_job.worker = w
|
||||
|
||||
return run_job
|
||||
|
||||
|
||||
def fork_job(mod_name, func_name, args=(), kwargs=None, timeout=300, # seconds
|
||||
cwd=None, priority='normal', env={}, no_output=False, heartbeat=None,
|
||||
abort=None, module_is_source_code=False):
|
||||
'''
|
||||
Run a job in a worker process. A job is simply a function that will be
|
||||
called with the supplied arguments, in the worker process.
|
||||
The result of the function will be returned.
|
||||
If an error occurs a WorkerError is raised.
|
||||
|
||||
:param mod_name: Module to import in the worker process
|
||||
|
||||
:param func_name: Function to call in the worker process from the imported
|
||||
module
|
||||
|
||||
:param args: Positional arguments to pass to the function
|
||||
|
||||
:param kwargs: Keyword arguments to pass to the function
|
||||
|
||||
:param timeout: The time in seconds to wait for the worker process to
|
||||
complete. If it takes longer a WorkerError is raised and the process is
|
||||
killed.
|
||||
|
||||
:param cwd: The working directory for the worker process. I recommend
|
||||
against using this, unless you are sure the path is pure ASCII.
|
||||
|
||||
:param priority: The process priority for the worker process
|
||||
|
||||
:param env: Extra environment variables to set for the worker process
|
||||
|
||||
:param no_output: If True, the stdout and stderr of the worker process are
|
||||
discarded
|
||||
|
||||
:param heartbeat: If not None, it is used to check if the worker has hung,
|
||||
instead of a simple timeout. It must be a callable that takes no
|
||||
arguments and returns True or False. The worker will be assumed to have
|
||||
hung if this function returns False. At that point, the process will be
|
||||
killed and a WorkerError will be raised.
|
||||
|
||||
:param abort: If not None, it must be an Event. As soon as abort.is_set()
|
||||
returns True, the worker process is killed. No error is raised.
|
||||
|
||||
:param module_is_source_code: If True, the ``mod`` is treated as python
|
||||
source rather than a module name to import. The source is executed as a
|
||||
module. Useful if you want to use fork_job from within a script to run some
|
||||
dynamically generated python.
|
||||
|
||||
:return: A dictionary with the keys result and stdout_stderr. result is the
|
||||
return value of the function (it must be picklable). stdout_stderr is the
|
||||
path to a file that contains the stdout and stderr of the worker process.
|
||||
If you set no_output=True, then this will not be present.
|
||||
'''
|
||||
return two_part_fork_job(env, priority, cwd)(
|
||||
mod_name, func_name, args=args, kwargs=kwargs, timeout=timeout,
|
||||
no_output=no_output, heartbeat=heartbeat, abort=abort,
|
||||
module_is_source_code=module_is_source_code
|
||||
)
|
||||
|
||||
|
||||
def offload_worker(env={}, priority='normal', cwd=None):
|
||||
listener, w = create_worker(env=env, priority=priority, cwd=cwd, func='offload')
|
||||
return OffloadWorker(listener, w)
|
||||
|
||||
|
||||
def compile_code(src):
|
||||
import re, io
|
||||
if not isinstance(src, unicode_type):
|
||||
match = re.search(br'coding[:=]\s*([-\w.]+)', src[:200])
|
||||
enc = match.group(1).decode('utf-8') if match else 'utf-8'
|
||||
src = src.decode(enc)
|
||||
# Python complains if there is a coding declaration in a unicode string
|
||||
src = re.sub(r'^#.*coding\s*[:=]\s*([-\w.]+)', '#', src, flags=re.MULTILINE)
|
||||
# Translate newlines to \n
|
||||
src = io.StringIO(src, newline=None).getvalue()
|
||||
|
||||
namespace = {
|
||||
'time':time, 're':re, 'os':os, 'io':io,
|
||||
}
|
||||
exec(src, namespace)
|
||||
return namespace
|
||||
|
||||
|
||||
def main():
|
||||
# The entry point for the simple worker process
|
||||
address = msgpack_loads(from_hex_bytes(os.environ['CALIBRE_WORKER_ADDRESS']))
|
||||
key = from_hex_bytes(os.environ['CALIBRE_WORKER_KEY'])
|
||||
with closing(Client(address, authkey=key)) as conn:
|
||||
args = eintr_retry_call(conn.recv)
|
||||
try:
|
||||
mod, func, args, kwargs, module_is_source_code = args
|
||||
if module_is_source_code:
|
||||
importlib.import_module('calibre.customize.ui') # Load plugins
|
||||
mod = compile_code(mod)
|
||||
func = mod[func]
|
||||
else:
|
||||
try:
|
||||
mod = importlib.import_module(mod)
|
||||
except ImportError:
|
||||
importlib.import_module('calibre.customize.ui') # Load plugins
|
||||
mod = importlib.import_module(mod)
|
||||
func = getattr(mod, func)
|
||||
res = {'result':func(*args, **kwargs)}
|
||||
except:
|
||||
res = {'tb': traceback.format_exc()}
|
||||
|
||||
try:
|
||||
conn.send(res)
|
||||
except:
|
||||
# Maybe EINTR
|
||||
conn.send(res)
|
||||
|
||||
|
||||
def offload():
|
||||
# The entry point for the offload worker process
|
||||
address = msgpack_loads(from_hex_bytes(os.environ['CALIBRE_WORKER_ADDRESS']))
|
||||
key = from_hex_bytes(os.environ['CALIBRE_WORKER_KEY'])
|
||||
func_cache = {}
|
||||
with closing(Client(address, authkey=key)) as conn:
|
||||
while True:
|
||||
args = eintr_retry_call(conn.recv)
|
||||
if args is None:
|
||||
break
|
||||
res = {'result':None, 'tb':None}
|
||||
try:
|
||||
mod, func, args, kwargs = args
|
||||
if mod is None:
|
||||
eintr_retry_call(conn.send, res)
|
||||
continue
|
||||
f = func_cache.get((mod, func), None)
|
||||
if f is None:
|
||||
try:
|
||||
m = importlib.import_module(mod)
|
||||
except ImportError:
|
||||
importlib.import_module('calibre.customize.ui') # Load plugins
|
||||
m = importlib.import_module(mod)
|
||||
func_cache[(mod, func)] = f = getattr(m, func)
|
||||
res['result'] = f(*args, **kwargs)
|
||||
except:
|
||||
import traceback
|
||||
res['tb'] = traceback.format_exc()
|
||||
|
||||
eintr_retry_call(conn.send, res)
|
||||
65
ebook_converter/utils/iso8601.py
Normal file
65
ebook_converter/utils/iso8601.py
Normal file
@@ -0,0 +1,65 @@
|
||||
#!/usr/bin/env python2
|
||||
# vim:fileencoding=utf-8
|
||||
# License: GPLv3 Copyright: 2016, Kovid Goyal <kovid at kovidgoyal.net>
|
||||
|
||||
from __future__ import absolute_import, division, print_function, unicode_literals
|
||||
from datetime import datetime
|
||||
|
||||
from dateutil.tz import tzlocal, tzutc, tzoffset
|
||||
|
||||
from calibre.constants import plugins
|
||||
speedup, err = plugins['speedup']
|
||||
if not speedup:
|
||||
raise RuntimeError(err)
|
||||
|
||||
|
||||
class SafeLocalTimeZone(tzlocal):
|
||||
|
||||
def _isdst(self, dt):
|
||||
# This method in tzlocal raises ValueError if dt is out of range (in
|
||||
# older versions of dateutil)
|
||||
# In such cases, just assume that dt is not DST.
|
||||
try:
|
||||
return super(SafeLocalTimeZone, self)._isdst(dt)
|
||||
except Exception:
|
||||
pass
|
||||
return False
|
||||
|
||||
def _naive_is_dst(self, dt):
|
||||
# This method in tzlocal raises ValueError if dt is out of range (in
|
||||
# newer versions of dateutil)
|
||||
# In such cases, just assume that dt is not DST.
|
||||
try:
|
||||
return super(SafeLocalTimeZone, self)._naive_is_dst(dt)
|
||||
except Exception:
|
||||
pass
|
||||
return False
|
||||
|
||||
|
||||
utc_tz = tzutc()
|
||||
local_tz = SafeLocalTimeZone()
|
||||
del tzutc, tzlocal
|
||||
UNDEFINED_DATE = datetime(101,1,1, tzinfo=utc_tz)
|
||||
|
||||
|
||||
def parse_iso8601(date_string, assume_utc=False, as_utc=True):
|
||||
if not date_string:
|
||||
return UNDEFINED_DATE
|
||||
dt, aware, tzseconds = speedup.parse_iso8601(date_string)
|
||||
tz = utc_tz if assume_utc else local_tz
|
||||
if aware: # timezone was specified
|
||||
if tzseconds == 0:
|
||||
tz = utc_tz
|
||||
else:
|
||||
sign = '-' if tzseconds < 0 else '+'
|
||||
description = "%s%02d:%02d" % (sign, abs(tzseconds) // 3600, (abs(tzseconds) % 3600) // 60)
|
||||
tz = tzoffset(description, tzseconds)
|
||||
dt = dt.replace(tzinfo=tz)
|
||||
if as_utc and tz is utc_tz:
|
||||
return dt
|
||||
return dt.astimezone(utc_tz if as_utc else local_tz)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
import sys
|
||||
print(parse_iso8601(sys.argv[-1]))
|
||||
548
ebook_converter/utils/localization.py
Normal file
548
ebook_converter/utils/localization.py
Normal file
@@ -0,0 +1,548 @@
|
||||
#!/usr/bin/env python2
|
||||
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
|
||||
from __future__ import absolute_import, division, print_function, unicode_literals
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>'
|
||||
__docformat__ = 'restructuredtext en'
|
||||
|
||||
import os, locale, re, io, sys
|
||||
from gettext import GNUTranslations, NullTranslations
|
||||
|
||||
from polyglot.builtins import is_py3, iteritems, unicode_type
|
||||
|
||||
_available_translations = None
|
||||
|
||||
|
||||
def available_translations():
|
||||
global _available_translations
|
||||
if _available_translations is None:
|
||||
stats = P('localization/stats.calibre_msgpack', allow_user_override=False)
|
||||
if os.path.exists(stats):
|
||||
from calibre.utils.serialize import msgpack_loads
|
||||
with open(stats, 'rb') as f:
|
||||
stats = msgpack_loads(f.read())
|
||||
else:
|
||||
stats = {}
|
||||
_available_translations = [x for x in stats if stats[x] > 0.1]
|
||||
return _available_translations
|
||||
|
||||
|
||||
def get_system_locale():
|
||||
from calibre.constants import iswindows, isosx, plugins
|
||||
lang = None
|
||||
if iswindows:
|
||||
try:
|
||||
from calibre.constants import get_windows_user_locale_name
|
||||
lang = get_windows_user_locale_name()
|
||||
lang = lang.strip()
|
||||
if not lang:
|
||||
lang = None
|
||||
except:
|
||||
pass # Windows XP does not have the GetUserDefaultLocaleName fn
|
||||
elif isosx:
|
||||
try:
|
||||
lang = plugins['usbobserver'][0].user_locale() or None
|
||||
except:
|
||||
# Fallback to environment vars if something bad happened
|
||||
import traceback
|
||||
traceback.print_exc()
|
||||
if lang is None:
|
||||
try:
|
||||
envvars = ['LANGUAGE', 'LC_ALL', 'LC_CTYPE', 'LC_MESSAGES', 'LANG']
|
||||
lang = locale.getdefaultlocale(envvars)[0]
|
||||
|
||||
# lang is None in two cases: either the environment variable is not
|
||||
# set or it's "C". Stop looking for a language in the latter case.
|
||||
if lang is None:
|
||||
for var in envvars:
|
||||
if os.environ.get(var) == 'C':
|
||||
lang = 'en_US'
|
||||
break
|
||||
except:
|
||||
pass # This happens on Ubuntu apparently
|
||||
if lang is None and 'LANG' in os.environ: # Needed for OS X
|
||||
try:
|
||||
lang = os.environ['LANG']
|
||||
except:
|
||||
pass
|
||||
if lang:
|
||||
lang = lang.replace('-', '_')
|
||||
lang = '_'.join(lang.split('_')[:2])
|
||||
return lang
|
||||
|
||||
|
||||
def sanitize_lang(lang):
|
||||
if lang:
|
||||
match = re.match('[a-z]{2,3}(_[A-Z]{2}){0,1}', lang)
|
||||
if match:
|
||||
lang = match.group()
|
||||
if lang == 'zh':
|
||||
lang = 'zh_CN'
|
||||
if not lang:
|
||||
lang = 'en'
|
||||
return lang
|
||||
|
||||
|
||||
def get_lang():
|
||||
'Try to figure out what language to display the interface in'
|
||||
from calibre.utils.config_base import prefs
|
||||
lang = prefs['language']
|
||||
lang = os.environ.get('CALIBRE_OVERRIDE_LANG', lang)
|
||||
if lang:
|
||||
return lang
|
||||
try:
|
||||
lang = get_system_locale()
|
||||
except:
|
||||
import traceback
|
||||
traceback.print_exc()
|
||||
lang = None
|
||||
return sanitize_lang(lang)
|
||||
|
||||
|
||||
def is_rtl():
|
||||
return get_lang()[:2].lower() in {'he', 'ar'}
|
||||
|
||||
|
||||
def get_lc_messages_path(lang):
|
||||
hlang = None
|
||||
if zf_exists():
|
||||
if lang in available_translations():
|
||||
hlang = lang
|
||||
else:
|
||||
xlang = lang.split('_')[0].lower()
|
||||
if xlang in available_translations():
|
||||
hlang = xlang
|
||||
return hlang
|
||||
|
||||
|
||||
def zf_exists():
|
||||
return os.path.exists(P('localization/locales.zip',
|
||||
allow_user_override=False))
|
||||
|
||||
|
||||
_lang_trans = None
|
||||
|
||||
|
||||
def get_all_translators():
|
||||
from zipfile import ZipFile
|
||||
with ZipFile(P('localization/locales.zip', allow_user_override=False), 'r') as zf:
|
||||
for lang in available_translations():
|
||||
mpath = get_lc_messages_path(lang)
|
||||
if mpath is not None:
|
||||
buf = io.BytesIO(zf.read(mpath + '/messages.mo'))
|
||||
yield lang, GNUTranslations(buf)
|
||||
|
||||
|
||||
def get_single_translator(mpath, which='messages'):
|
||||
from zipfile import ZipFile
|
||||
with ZipFile(P('localization/locales.zip', allow_user_override=False), 'r') as zf:
|
||||
path = '{}/{}.mo'.format(mpath, which)
|
||||
data = zf.read(path)
|
||||
buf = io.BytesIO(data)
|
||||
try:
|
||||
return GNUTranslations(buf)
|
||||
except Exception as e:
|
||||
import traceback
|
||||
traceback.print_exc()
|
||||
import hashlib
|
||||
sig = hashlib.sha1(data).hexdigest()
|
||||
raise ValueError('Failed to load translations for: {} (size: {} and signature: {}) with error: {}'.format(
|
||||
path, len(data), sig, e))
|
||||
|
||||
|
||||
def get_iso639_translator(lang):
|
||||
lang = sanitize_lang(lang)
|
||||
mpath = get_lc_messages_path(lang) if lang else None
|
||||
return get_single_translator(mpath, 'iso639') if mpath else None
|
||||
|
||||
|
||||
def get_translator(bcp_47_code):
|
||||
parts = bcp_47_code.replace('-', '_').split('_')[:2]
|
||||
parts[0] = lang_as_iso639_1(parts[0].lower()) or 'en'
|
||||
if len(parts) > 1:
|
||||
parts[1] = parts[1].upper()
|
||||
lang = '_'.join(parts)
|
||||
lang = {'pt':'pt_BR', 'zh':'zh_CN'}.get(lang, lang)
|
||||
available = available_translations()
|
||||
found = True
|
||||
if lang == 'en' or lang.startswith('en_'):
|
||||
return found, lang, NullTranslations()
|
||||
if lang not in available:
|
||||
lang = {'pt':'pt_BR', 'zh':'zh_CN'}.get(parts[0], parts[0])
|
||||
if lang not in available:
|
||||
lang = get_lang()
|
||||
if lang not in available:
|
||||
lang = 'en'
|
||||
found = False
|
||||
if lang == 'en':
|
||||
return True, lang, NullTranslations()
|
||||
return found, lang, get_single_translator(lang)
|
||||
|
||||
|
||||
lcdata = {
|
||||
'abday': ('Sun', 'Mon', 'Tue', 'Wed', 'Thu', 'Fri', 'Sat'),
|
||||
'abmon': ('Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec'),
|
||||
'd_fmt': '%m/%d/%Y',
|
||||
'd_t_fmt': '%a %d %b %Y %r %Z',
|
||||
'day': ('Sunday', 'Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday'),
|
||||
'mon': ('January', 'February', 'March', 'April', 'May', 'June', 'July', 'August', 'September', 'October', 'November', 'December'),
|
||||
'noexpr': '^[nN].*',
|
||||
'radixchar': '.',
|
||||
't_fmt': '%r',
|
||||
't_fmt_ampm': '%I:%M:%S %p',
|
||||
'thousep': ',',
|
||||
'yesexpr': '^[yY].*'
|
||||
}
|
||||
|
||||
|
||||
def load_po(path):
|
||||
from calibre.translations.msgfmt import make
|
||||
buf = io.BytesIO()
|
||||
try:
|
||||
make(path, buf)
|
||||
except Exception:
|
||||
print(('Failed to compile translations file: %s, ignoring') % path)
|
||||
buf = None
|
||||
else:
|
||||
buf = io.BytesIO(buf.getvalue())
|
||||
return buf
|
||||
|
||||
|
||||
def set_translators():
|
||||
global _lang_trans, lcdata
|
||||
# To test different translations invoke as
|
||||
# CALIBRE_OVERRIDE_LANG=de_DE.utf8 program
|
||||
lang = get_lang()
|
||||
t = buf = iso639 = None
|
||||
|
||||
if 'CALIBRE_TEST_TRANSLATION' in os.environ:
|
||||
buf = load_po(os.path.expanduser(os.environ['CALIBRE_TEST_TRANSLATION']))
|
||||
|
||||
if lang:
|
||||
mpath = get_lc_messages_path(lang)
|
||||
if buf is None and mpath and os.access(mpath + '.po', os.R_OK):
|
||||
buf = load_po(mpath + '.po')
|
||||
|
||||
if mpath is not None:
|
||||
from zipfile import ZipFile
|
||||
with ZipFile(P('localization/locales.zip',
|
||||
allow_user_override=False), 'r') as zf:
|
||||
if buf is None:
|
||||
buf = io.BytesIO(zf.read(mpath + '/messages.mo'))
|
||||
if mpath == 'nds':
|
||||
mpath = 'de'
|
||||
isof = mpath + '/iso639.mo'
|
||||
try:
|
||||
iso639 = io.BytesIO(zf.read(isof))
|
||||
except:
|
||||
pass # No iso639 translations for this lang
|
||||
if buf is not None:
|
||||
from calibre.utils.serialize import msgpack_loads
|
||||
try:
|
||||
lcdata = msgpack_loads(zf.read(mpath + '/lcdata.calibre_msgpack'))
|
||||
except:
|
||||
pass # No lcdata
|
||||
|
||||
if buf is not None:
|
||||
t = GNUTranslations(buf)
|
||||
if iso639 is not None:
|
||||
iso639 = _lang_trans = GNUTranslations(iso639)
|
||||
t.add_fallback(iso639)
|
||||
|
||||
if t is None:
|
||||
t = NullTranslations()
|
||||
|
||||
try:
|
||||
set_translators.lang = t.info().get('language')
|
||||
except Exception:
|
||||
pass
|
||||
if is_py3:
|
||||
t.install(names=('ngettext',))
|
||||
else:
|
||||
t.install(unicode=True, names=('ngettext',))
|
||||
# Now that we have installed a translator, we have to retranslate the help
|
||||
# for the global prefs object as it was instantiated in get_lang(), before
|
||||
# the translator was installed.
|
||||
from calibre.utils.config_base import prefs
|
||||
prefs.retranslate_help()
|
||||
|
||||
|
||||
set_translators.lang = None
|
||||
|
||||
|
||||
_iso639 = None
|
||||
_extra_lang_codes = {
|
||||
'pt_BR' : _('Brazilian Portuguese'),
|
||||
'en_GB' : _('English (UK)'),
|
||||
'zh_CN' : _('Simplified Chinese'),
|
||||
'zh_TW' : _('Traditional Chinese'),
|
||||
'en' : _('English'),
|
||||
'en_US' : _('English (United States)'),
|
||||
'en_AR' : _('English (Argentina)'),
|
||||
'en_AU' : _('English (Australia)'),
|
||||
'en_JP' : _('English (Japan)'),
|
||||
'en_DE' : _('English (Germany)'),
|
||||
'en_BG' : _('English (Bulgaria)'),
|
||||
'en_EG' : _('English (Egypt)'),
|
||||
'en_NZ' : _('English (New Zealand)'),
|
||||
'en_CA' : _('English (Canada)'),
|
||||
'en_GR' : _('English (Greece)'),
|
||||
'en_IN' : _('English (India)'),
|
||||
'en_NP' : _('English (Nepal)'),
|
||||
'en_TH' : _('English (Thailand)'),
|
||||
'en_TR' : _('English (Turkey)'),
|
||||
'en_CY' : _('English (Cyprus)'),
|
||||
'en_CZ' : _('English (Czech Republic)'),
|
||||
'en_PH' : _('English (Philippines)'),
|
||||
'en_PK' : _('English (Pakistan)'),
|
||||
'en_PL' : _('English (Poland)'),
|
||||
'en_HR' : _('English (Croatia)'),
|
||||
'en_HU' : _('English (Hungary)'),
|
||||
'en_ID' : _('English (Indonesia)'),
|
||||
'en_IL' : _('English (Israel)'),
|
||||
'en_RU' : _('English (Russia)'),
|
||||
'en_SG' : _('English (Singapore)'),
|
||||
'en_YE' : _('English (Yemen)'),
|
||||
'en_IE' : _('English (Ireland)'),
|
||||
'en_CN' : _('English (China)'),
|
||||
'en_TW' : _('English (Taiwan)'),
|
||||
'en_ZA' : _('English (South Africa)'),
|
||||
'es_PY' : _('Spanish (Paraguay)'),
|
||||
'es_UY' : _('Spanish (Uruguay)'),
|
||||
'es_AR' : _('Spanish (Argentina)'),
|
||||
'es_CR' : _('Spanish (Costa Rica)'),
|
||||
'es_MX' : _('Spanish (Mexico)'),
|
||||
'es_CU' : _('Spanish (Cuba)'),
|
||||
'es_CL' : _('Spanish (Chile)'),
|
||||
'es_EC' : _('Spanish (Ecuador)'),
|
||||
'es_HN' : _('Spanish (Honduras)'),
|
||||
'es_VE' : _('Spanish (Venezuela)'),
|
||||
'es_BO' : _('Spanish (Bolivia)'),
|
||||
'es_NI' : _('Spanish (Nicaragua)'),
|
||||
'es_CO' : _('Spanish (Colombia)'),
|
||||
'de_AT' : _('German (AT)'),
|
||||
'fr_BE' : _('French (BE)'),
|
||||
'nl' : _('Dutch (NL)'),
|
||||
'nl_BE' : _('Dutch (BE)'),
|
||||
'und' : _('Unknown')
|
||||
}
|
||||
|
||||
if False:
|
||||
# Extra strings needed for Qt
|
||||
|
||||
# NOTE: Ante Meridian (i.e. like 10:00 AM)
|
||||
_('AM')
|
||||
# NOTE: Post Meridian (i.e. like 10:00 PM)
|
||||
_('PM')
|
||||
# NOTE: Ante Meridian (i.e. like 10:00 am)
|
||||
_('am')
|
||||
# NOTE: Post Meridian (i.e. like 10:00 pm)
|
||||
_('pm')
|
||||
_('&Copy')
|
||||
_('Select All')
|
||||
_('Copy Link')
|
||||
_('&Select All')
|
||||
_('Copy &Link Location')
|
||||
_('&Undo')
|
||||
_('&Redo')
|
||||
_('Cu&t')
|
||||
_('&Paste')
|
||||
_('Paste and Match Style')
|
||||
_('Directions')
|
||||
_('Left to Right')
|
||||
_('Right to Left')
|
||||
_('Fonts')
|
||||
_('&Step up')
|
||||
_('Step &down')
|
||||
_('Close without Saving')
|
||||
_('Close Tab')
|
||||
|
||||
_lcase_map = {}
|
||||
for k in _extra_lang_codes:
|
||||
_lcase_map[k.lower()] = k
|
||||
|
||||
|
||||
def _load_iso639():
|
||||
global _iso639
|
||||
if _iso639 is None:
|
||||
ip = P('localization/iso639.calibre_msgpack', allow_user_override=False, data=True)
|
||||
from calibre.utils.serialize import msgpack_loads
|
||||
_iso639 = msgpack_loads(ip)
|
||||
if 'by_3' not in _iso639:
|
||||
_iso639['by_3'] = _iso639['by_3t']
|
||||
return _iso639
|
||||
|
||||
|
||||
def get_iso_language(lang_trans, lang):
|
||||
iso639 = _load_iso639()
|
||||
ans = lang
|
||||
lang = lang.split('_')[0].lower()
|
||||
if len(lang) == 2:
|
||||
ans = iso639['by_2'].get(lang, ans)
|
||||
elif len(lang) == 3:
|
||||
if lang in iso639['by_3']:
|
||||
ans = iso639['by_3'][lang]
|
||||
return lang_trans(ans)
|
||||
|
||||
|
||||
def get_language(lang):
|
||||
translate = _
|
||||
lang = _lcase_map.get(lang, lang)
|
||||
if lang in _extra_lang_codes:
|
||||
# The translator was not active when _extra_lang_codes was defined, so
|
||||
# re-translate
|
||||
return translate(_extra_lang_codes[lang])
|
||||
attr = 'gettext' if sys.version_info.major > 2 else 'ugettext'
|
||||
return get_iso_language(getattr(_lang_trans, attr, translate), lang)
|
||||
|
||||
|
||||
def calibre_langcode_to_name(lc, localize=True):
|
||||
iso639 = _load_iso639()
|
||||
translate = _ if localize else lambda x: x
|
||||
try:
|
||||
return translate(iso639['by_3'][lc])
|
||||
except:
|
||||
pass
|
||||
return lc
|
||||
|
||||
|
||||
def canonicalize_lang(raw):
|
||||
if not raw:
|
||||
return None
|
||||
if not isinstance(raw, unicode_type):
|
||||
raw = raw.decode('utf-8', 'ignore')
|
||||
raw = raw.lower().strip()
|
||||
if not raw:
|
||||
return None
|
||||
raw = raw.replace('_', '-').partition('-')[0].strip()
|
||||
if not raw:
|
||||
return None
|
||||
iso639 = _load_iso639()
|
||||
m2to3 = iso639['2to3']
|
||||
|
||||
if len(raw) == 2:
|
||||
ans = m2to3.get(raw, None)
|
||||
if ans is not None:
|
||||
return ans
|
||||
elif len(raw) == 3:
|
||||
if raw in iso639['by_3']:
|
||||
return raw
|
||||
|
||||
return iso639['name_map'].get(raw, None)
|
||||
|
||||
|
||||
_lang_map = None
|
||||
|
||||
|
||||
def lang_map():
|
||||
' Return mapping of ISO 639 3 letter codes to localized language names '
|
||||
iso639 = _load_iso639()
|
||||
translate = _
|
||||
global _lang_map
|
||||
if _lang_map is None:
|
||||
_lang_map = {k:translate(v) for k, v in iteritems(iso639['by_3'])}
|
||||
return _lang_map
|
||||
|
||||
|
||||
def lang_map_for_ui():
|
||||
ans = getattr(lang_map_for_ui, 'ans', None)
|
||||
if ans is None:
|
||||
ans = lang_map().copy()
|
||||
for x in ('zxx', 'mis', 'mul'):
|
||||
ans.pop(x, None)
|
||||
lang_map_for_ui.ans = ans
|
||||
return ans
|
||||
|
||||
|
||||
def langnames_to_langcodes(names):
|
||||
'''
|
||||
Given a list of localized language names return a mapping of the names to 3
|
||||
letter ISO 639 language codes. If a name is not recognized, it is mapped to
|
||||
None.
|
||||
'''
|
||||
iso639 = _load_iso639()
|
||||
translate = _
|
||||
ans = {}
|
||||
names = set(names)
|
||||
for k, v in iteritems(iso639['by_3']):
|
||||
tv = translate(v)
|
||||
if tv in names:
|
||||
names.remove(tv)
|
||||
ans[tv] = k
|
||||
if not names:
|
||||
break
|
||||
for x in names:
|
||||
ans[x] = None
|
||||
|
||||
return ans
|
||||
|
||||
|
||||
def lang_as_iso639_1(name_or_code):
|
||||
code = canonicalize_lang(name_or_code)
|
||||
if code is not None:
|
||||
iso639 = _load_iso639()
|
||||
return iso639['3to2'].get(code, None)
|
||||
|
||||
|
||||
_udc = None
|
||||
|
||||
|
||||
def get_udc():
|
||||
global _udc
|
||||
if _udc is None:
|
||||
from calibre.ebooks.unihandecode import Unihandecoder
|
||||
_udc = Unihandecoder(lang=get_lang())
|
||||
return _udc
|
||||
|
||||
|
||||
def user_manual_stats():
|
||||
stats = getattr(user_manual_stats, 'stats', None)
|
||||
if stats is None:
|
||||
import json
|
||||
try:
|
||||
stats = json.loads(P('user-manual-translation-stats.json', allow_user_override=False, data=True))
|
||||
except EnvironmentError:
|
||||
stats = {}
|
||||
user_manual_stats.stats = stats
|
||||
return stats
|
||||
|
||||
|
||||
def localize_user_manual_link(url):
|
||||
lc = lang_as_iso639_1(get_lang())
|
||||
if lc == 'en':
|
||||
return url
|
||||
stats = user_manual_stats()
|
||||
if stats.get(lc, 0) < 0.3:
|
||||
return url
|
||||
from polyglot.urllib import urlparse, urlunparse
|
||||
parts = urlparse(url)
|
||||
path = re.sub(r'/generated/[a-z]+/', '/generated/%s/' % lc, parts.path or '')
|
||||
path = '/%s%s' % (lc, path)
|
||||
parts = list(parts)
|
||||
parts[2] = path
|
||||
return urlunparse(parts)
|
||||
|
||||
|
||||
def website_languages():
|
||||
stats = getattr(website_languages, 'stats', None)
|
||||
if stats is None:
|
||||
try:
|
||||
stats = frozenset(P('localization/website-languages.txt', allow_user_override=False, data=True).split())
|
||||
except EnvironmentError:
|
||||
stats = frozenset()
|
||||
website_languages.stats = stats
|
||||
return stats
|
||||
|
||||
|
||||
def localize_website_link(url):
|
||||
lc = lang_as_iso639_1(get_lang())
|
||||
langs = website_languages()
|
||||
if lc == 'en' or lc not in langs:
|
||||
return url
|
||||
from polyglot.urllib import urlparse, urlunparse
|
||||
parts = urlparse(url)
|
||||
path = '/{}{}'.format(lc, parts.path)
|
||||
parts = list(parts)
|
||||
parts[2] = path
|
||||
return urlunparse(parts)
|
||||
340
ebook_converter/utils/localunzip.py
Normal file
340
ebook_converter/utils/localunzip.py
Normal file
@@ -0,0 +1,340 @@
|
||||
#!/usr/bin/env python2
|
||||
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:fdm=marker:ai
|
||||
from __future__ import absolute_import, division, print_function, unicode_literals
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2012, Kovid Goyal <kovid at kovidgoyal.net>'
|
||||
__docformat__ = 'restructuredtext en'
|
||||
|
||||
'''
|
||||
Try to read invalid zip files with missing or damaged central directories.
|
||||
These are apparently produced in large numbers by the fruitcakes over at B&N.
|
||||
|
||||
Tries to only use the local headers to extract data from the damaged zip file.
|
||||
'''
|
||||
|
||||
import os, sys, zlib, shutil
|
||||
from struct import calcsize, unpack, pack
|
||||
from collections import namedtuple, OrderedDict
|
||||
from tempfile import SpooledTemporaryFile
|
||||
|
||||
from polyglot.builtins import itervalues, getcwd
|
||||
|
||||
HEADER_SIG = 0x04034b50
|
||||
HEADER_BYTE_SIG = pack(b'<L', HEADER_SIG)
|
||||
local_header_fmt = b'<L5HL2L2H'
|
||||
local_header_sz = calcsize(local_header_fmt)
|
||||
ZIP_STORED, ZIP_DEFLATED = 0, 8
|
||||
DATA_DESCRIPTOR_SIG = pack(b'<L', 0x08074b50)
|
||||
|
||||
LocalHeader = namedtuple('LocalHeader',
|
||||
'signature min_version flags compression_method mod_time mod_date '
|
||||
'crc32 compressed_size uncompressed_size filename_length extra_length '
|
||||
'filename extra')
|
||||
|
||||
|
||||
if hasattr(sys, 'getwindowsversion'):
|
||||
windows_reserved_filenames = (
|
||||
'CON', 'PRN', 'AUX', 'CLOCK$', 'NUL' 'COM0', 'COM1', 'COM2', 'COM3',
|
||||
'COM4', 'COM5', 'COM6', 'COM7', 'COM8', 'COM9' 'LPT0', 'LPT1', 'LPT2',
|
||||
'LPT3', 'LPT4', 'LPT5', 'LPT6', 'LPT7', 'LPT8', 'LPT9')
|
||||
|
||||
def is_reserved_filename(x):
|
||||
base = x.partition('.')[0].upper()
|
||||
return base in windows_reserved_filenames
|
||||
else:
|
||||
def is_reserved_filename(x):
|
||||
return False
|
||||
|
||||
|
||||
def decode_arcname(name):
|
||||
if isinstance(name, bytes):
|
||||
from calibre.ebooks.chardet import detect
|
||||
try:
|
||||
name = name.decode('utf-8')
|
||||
except:
|
||||
res = detect(name)
|
||||
encoding = res['encoding']
|
||||
try:
|
||||
name = name.decode(encoding)
|
||||
except:
|
||||
name = name.decode('utf-8', 'replace')
|
||||
return name
|
||||
|
||||
|
||||
def find_local_header(f):
|
||||
pos = f.tell()
|
||||
raw = f.read(50*1024)
|
||||
try:
|
||||
f.seek(pos + raw.index(HEADER_BYTE_SIG))
|
||||
except ValueError:
|
||||
f.seek(pos)
|
||||
return
|
||||
raw = f.read(local_header_sz)
|
||||
if len(raw) != local_header_sz:
|
||||
f.seek(pos)
|
||||
return
|
||||
header = LocalHeader(*(unpack(local_header_fmt, raw) + (None, None)))
|
||||
if header.signature == HEADER_SIG:
|
||||
return header
|
||||
f.seek(pos)
|
||||
|
||||
|
||||
def find_data_descriptor(f):
|
||||
pos = f.tell()
|
||||
DD = namedtuple('DataDescriptor', 'crc32 compressed_size uncompressed_size')
|
||||
raw = b'a'*16
|
||||
try:
|
||||
while len(raw) >= 16:
|
||||
raw = f.read(50*1024)
|
||||
idx = raw.find(DATA_DESCRIPTOR_SIG)
|
||||
if idx != -1:
|
||||
f.seek(f.tell() - len(raw) + idx + len(DATA_DESCRIPTOR_SIG))
|
||||
return DD(*unpack(b'<LLL', f.read(12)))
|
||||
# Rewind to handle the case of the signature being cut off
|
||||
# by the 50K boundary
|
||||
f.seek(f.tell()-len(DATA_DESCRIPTOR_SIG))
|
||||
|
||||
raise ValueError('Failed to find data descriptor signature. '
|
||||
'Data descriptors without signatures are not '
|
||||
'supported.')
|
||||
finally:
|
||||
f.seek(pos)
|
||||
|
||||
|
||||
def read_local_file_header(f):
|
||||
pos = f.tell()
|
||||
raw = f.read(local_header_sz)
|
||||
if len(raw) != local_header_sz:
|
||||
f.seek(pos)
|
||||
return
|
||||
header = LocalHeader(*(unpack(local_header_fmt, raw) + (None, None)))
|
||||
if header.signature != HEADER_SIG:
|
||||
f.seek(pos)
|
||||
header = find_local_header(f)
|
||||
if header is None:
|
||||
return
|
||||
if header.min_version > 20:
|
||||
raise ValueError('This ZIP file uses unsupported features')
|
||||
if header.flags & 0b1:
|
||||
raise ValueError('This ZIP file is encrypted')
|
||||
if header.flags & (1 << 13):
|
||||
raise ValueError('This ZIP file uses masking, unsupported.')
|
||||
if header.compression_method not in {ZIP_STORED, ZIP_DEFLATED}:
|
||||
raise ValueError('This ZIP file uses an unsupported compression method')
|
||||
has_data_descriptors = header.flags & (1 << 3)
|
||||
fname = extra = None
|
||||
if header.filename_length > 0:
|
||||
fname = f.read(header.filename_length)
|
||||
if len(fname) != header.filename_length:
|
||||
return
|
||||
try:
|
||||
fname = fname.decode('ascii')
|
||||
except UnicodeDecodeError:
|
||||
if header.flags & (1 << 11):
|
||||
try:
|
||||
fname = fname.decode('utf-8')
|
||||
except UnicodeDecodeError:
|
||||
pass
|
||||
fname = decode_arcname(fname).replace('\\', '/')
|
||||
|
||||
if header.extra_length > 0:
|
||||
extra = f.read(header.extra_length)
|
||||
if len(extra) != header.extra_length:
|
||||
return
|
||||
if has_data_descriptors:
|
||||
desc = find_data_descriptor(f)
|
||||
header = header._replace(crc32=desc.crc32,
|
||||
compressed_size=desc.compressed_size,
|
||||
uncompressed_size=desc.uncompressed_size)
|
||||
return LocalHeader(*(
|
||||
header[:-2] + (fname, extra)
|
||||
))
|
||||
|
||||
|
||||
def read_compressed_data(f, header):
|
||||
cdata = f.read(header.compressed_size)
|
||||
return cdata
|
||||
|
||||
|
||||
def copy_stored_file(src, size, dest):
|
||||
read = 0
|
||||
amt = min(size, 20*1024)
|
||||
while read < size:
|
||||
raw = src.read(min(size-read, amt))
|
||||
if not raw:
|
||||
raise ValueError('Premature end of file')
|
||||
dest.write(raw)
|
||||
read += len(raw)
|
||||
|
||||
|
||||
def copy_compressed_file(src, size, dest):
|
||||
d = zlib.decompressobj(-15)
|
||||
read = 0
|
||||
amt = min(size, 20*1024)
|
||||
while read < size:
|
||||
raw = src.read(min(size-read, amt))
|
||||
if not raw and read < size:
|
||||
raise ValueError('Invalid ZIP file, local header is damaged')
|
||||
read += len(raw)
|
||||
dest.write(d.decompress(raw, 200*1024))
|
||||
count = 0
|
||||
while d.unconsumed_tail:
|
||||
count += 1
|
||||
dest.write(d.decompress(d.unconsumed_tail, 200*1024))
|
||||
|
||||
if count > 100:
|
||||
raise ValueError('This ZIP file contains a ZIP bomb in %s'%
|
||||
os.path.basename(dest.name))
|
||||
|
||||
|
||||
def _extractall(f, path=None, file_info=None):
|
||||
found = False
|
||||
while True:
|
||||
header = read_local_file_header(f)
|
||||
if not header:
|
||||
break
|
||||
has_data_descriptors = header.flags & (1 << 3)
|
||||
seekval = header.compressed_size + (16 if has_data_descriptors else 0)
|
||||
found = True
|
||||
# Sanitize path changing absolute to relative paths and removing .. and
|
||||
# .
|
||||
fname = header.filename.replace(os.sep, '/')
|
||||
fname = os.path.splitdrive(fname)[1]
|
||||
parts = [x for x in fname.split('/') if x not in {'', os.path.pardir, os.path.curdir}]
|
||||
if not parts:
|
||||
continue
|
||||
if header.uncompressed_size == 0:
|
||||
# Directory
|
||||
f.seek(f.tell()+seekval)
|
||||
if path is not None:
|
||||
bdir = os.path.join(path, *parts)
|
||||
if not os.path.exists(bdir):
|
||||
os.makedirs(bdir)
|
||||
continue
|
||||
|
||||
# File
|
||||
if file_info is not None:
|
||||
file_info[header.filename] = (f.tell(), header)
|
||||
if path is not None:
|
||||
bdir = os.path.join(path, *(parts[:-1]))
|
||||
if not os.path.exists(bdir):
|
||||
os.makedirs(bdir)
|
||||
dest = os.path.join(path, *parts)
|
||||
try:
|
||||
df = open(dest, 'wb')
|
||||
except EnvironmentError:
|
||||
if is_reserved_filename(os.path.basename(dest)):
|
||||
raise ValueError('This ZIP file contains a file with a reserved filename'
|
||||
' that cannot be processed on Windows: {}'.format(os.path.basename(dest)))
|
||||
raise
|
||||
with df:
|
||||
if header.compression_method == ZIP_STORED:
|
||||
copy_stored_file(f, header.compressed_size, df)
|
||||
else:
|
||||
copy_compressed_file(f, header.compressed_size, df)
|
||||
else:
|
||||
f.seek(f.tell()+seekval)
|
||||
|
||||
if not found:
|
||||
raise ValueError('Not a ZIP file')
|
||||
|
||||
|
||||
def extractall(path_or_stream, path=None):
|
||||
f = path_or_stream
|
||||
close_at_end = False
|
||||
if not hasattr(f, 'read'):
|
||||
f = open(f, 'rb')
|
||||
close_at_end = True
|
||||
if path is None:
|
||||
path = getcwd()
|
||||
pos = f.tell()
|
||||
try:
|
||||
_extractall(f, path)
|
||||
finally:
|
||||
f.seek(pos)
|
||||
if close_at_end:
|
||||
f.close()
|
||||
|
||||
|
||||
class LocalZipFile(object):
|
||||
|
||||
def __init__(self, stream):
|
||||
self.file_info = OrderedDict()
|
||||
_extractall(stream, file_info=self.file_info)
|
||||
self.stream = stream
|
||||
|
||||
def _get_file_info(self, name):
|
||||
fi = self.file_info.get(name)
|
||||
if fi is None:
|
||||
raise ValueError('This ZIP container has no file named: %s'%name)
|
||||
return fi
|
||||
|
||||
def open(self, name, spool_size=5*1024*1024):
|
||||
if isinstance(name, LocalHeader):
|
||||
name = name.filename
|
||||
offset, header = self._get_file_info(name)
|
||||
self.stream.seek(offset)
|
||||
dest = SpooledTemporaryFile(max_size=spool_size)
|
||||
|
||||
if header.compression_method == ZIP_STORED:
|
||||
copy_stored_file(self.stream, header.compressed_size, dest)
|
||||
else:
|
||||
copy_compressed_file(self.stream, header.compressed_size, dest)
|
||||
dest.seek(0)
|
||||
return dest
|
||||
|
||||
def getinfo(self, name):
|
||||
offset, header = self._get_file_info(name)
|
||||
return header
|
||||
|
||||
def read(self, name, spool_size=5*1024*1024):
|
||||
with self.open(name, spool_size=spool_size) as f:
|
||||
return f.read()
|
||||
|
||||
def extractall(self, path=None):
|
||||
self.stream.seek(0)
|
||||
_extractall(self.stream, path=(path or getcwd()))
|
||||
|
||||
def close(self):
|
||||
pass
|
||||
|
||||
def safe_replace(self, name, datastream, extra_replacements={},
|
||||
add_missing=False):
|
||||
from calibre.utils.zipfile import ZipFile, ZipInfo
|
||||
replacements = {name:datastream}
|
||||
replacements.update(extra_replacements)
|
||||
names = frozenset(list(replacements.keys()))
|
||||
found = set()
|
||||
|
||||
def rbytes(name):
|
||||
r = replacements[name]
|
||||
if not isinstance(r, bytes):
|
||||
r = r.read()
|
||||
return r
|
||||
|
||||
with SpooledTemporaryFile(max_size=100*1024*1024) as temp:
|
||||
ztemp = ZipFile(temp, 'w')
|
||||
for offset, header in itervalues(self.file_info):
|
||||
if header.filename in names:
|
||||
zi = ZipInfo(header.filename)
|
||||
zi.compress_type = header.compression_method
|
||||
ztemp.writestr(zi, rbytes(header.filename))
|
||||
found.add(header.filename)
|
||||
else:
|
||||
ztemp.writestr(header.filename, self.read(header.filename,
|
||||
spool_size=0))
|
||||
if add_missing:
|
||||
for name in names - found:
|
||||
ztemp.writestr(name, rbytes(name))
|
||||
ztemp.close()
|
||||
zipstream = self.stream
|
||||
temp.seek(0)
|
||||
zipstream.seek(0)
|
||||
zipstream.truncate()
|
||||
shutil.copyfileobj(temp, zipstream)
|
||||
zipstream.flush()
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
extractall(sys.argv[-1])
|
||||
205
ebook_converter/utils/lock.py
Normal file
205
ebook_converter/utils/lock.py
Normal file
@@ -0,0 +1,205 @@
|
||||
#!/usr/bin/env python2
|
||||
# vim:fileencoding=utf-8
|
||||
# License: GPLv3 Copyright: 2017, Kovid Goyal <kovid at kovidgoyal.net>
|
||||
|
||||
from __future__ import absolute_import, division, print_function, unicode_literals
|
||||
|
||||
import atexit
|
||||
import errno
|
||||
import os
|
||||
import stat
|
||||
import tempfile
|
||||
import time
|
||||
from functools import partial
|
||||
|
||||
from calibre.constants import (
|
||||
__appname__, fcntl, filesystem_encoding, islinux, isosx, iswindows, plugins, ispy3
|
||||
)
|
||||
from calibre.utils.monotonic import monotonic
|
||||
|
||||
speedup = plugins['speedup'][0]
|
||||
if iswindows:
|
||||
import msvcrt, win32file, pywintypes, winerror, win32api, win32event
|
||||
from calibre.constants import get_windows_username
|
||||
excl_file_mode = stat.S_IREAD | stat.S_IWRITE
|
||||
else:
|
||||
excl_file_mode = stat.S_IRUSR | stat.S_IWUSR | stat.S_IRGRP | stat.S_IROTH
|
||||
|
||||
|
||||
def unix_open(path):
|
||||
flags = os.O_RDWR | os.O_CREAT
|
||||
has_cloexec = False
|
||||
if hasattr(speedup, 'O_CLOEXEC'):
|
||||
try:
|
||||
fd = os.open(path, flags | speedup.O_CLOEXEC, excl_file_mode)
|
||||
has_cloexec = True
|
||||
except EnvironmentError as err:
|
||||
# Kernel may not support O_CLOEXEC
|
||||
if err.errno != errno.EINVAL:
|
||||
raise
|
||||
|
||||
if not has_cloexec:
|
||||
fd = os.open(path, flags, excl_file_mode)
|
||||
fcntl.fcntl(fd, fcntl.F_SETFD, fcntl.FD_CLOEXEC)
|
||||
return os.fdopen(fd, 'r+b')
|
||||
|
||||
|
||||
def unix_retry(err):
|
||||
return err.errno in (errno.EACCES, errno.EAGAIN, errno.ENOLCK, errno.EINTR)
|
||||
|
||||
|
||||
def windows_open(path):
|
||||
if isinstance(path, bytes):
|
||||
path = path.decode('mbcs')
|
||||
try:
|
||||
h = win32file.CreateFileW(
|
||||
path,
|
||||
win32file.GENERIC_READ |
|
||||
win32file.GENERIC_WRITE, # Open for reading and writing
|
||||
0, # Open exclusive
|
||||
None, # No security attributes, ensures handle is not inherited by children
|
||||
win32file.OPEN_ALWAYS, # If file does not exist, create it
|
||||
win32file.FILE_ATTRIBUTE_NORMAL, # Normal attributes
|
||||
None, # No template file
|
||||
)
|
||||
except pywintypes.error as err:
|
||||
raise WindowsError(err[0], err[2], path)
|
||||
fd = msvcrt.open_osfhandle(h.Detach(), 0)
|
||||
return os.fdopen(fd, 'r+b')
|
||||
|
||||
|
||||
def windows_retry(err):
|
||||
return err.winerror in (
|
||||
winerror.ERROR_SHARING_VIOLATION, winerror.ERROR_LOCK_VIOLATION
|
||||
)
|
||||
|
||||
|
||||
def retry_for_a_time(timeout, sleep_time, func, error_retry, *args):
|
||||
limit = monotonic() + timeout
|
||||
while True:
|
||||
try:
|
||||
return func(*args)
|
||||
except EnvironmentError as err:
|
||||
if not error_retry(err) or monotonic() > limit:
|
||||
raise
|
||||
time.sleep(sleep_time)
|
||||
|
||||
|
||||
def lock_file(path, timeout=15, sleep_time=0.2):
|
||||
if iswindows:
|
||||
return retry_for_a_time(
|
||||
timeout, sleep_time, windows_open, windows_retry, path
|
||||
)
|
||||
f = unix_open(path)
|
||||
retry_for_a_time(
|
||||
timeout, sleep_time, fcntl.flock, unix_retry,
|
||||
f.fileno(), fcntl.LOCK_EX | fcntl.LOCK_NB
|
||||
)
|
||||
return f
|
||||
|
||||
|
||||
class ExclusiveFile(object):
|
||||
|
||||
def __init__(self, path, timeout=15, sleep_time=0.2):
|
||||
if iswindows and isinstance(path, bytes):
|
||||
path = path.decode(filesystem_encoding)
|
||||
self.path = path
|
||||
self.timeout = timeout
|
||||
self.sleep_time = sleep_time
|
||||
|
||||
def __enter__(self):
|
||||
self.file = lock_file(self.path, self.timeout, self.sleep_time)
|
||||
return self.file
|
||||
|
||||
def __exit__(self, type, value, traceback):
|
||||
self.file.close()
|
||||
|
||||
|
||||
def _clean_lock_file(file_obj):
|
||||
try:
|
||||
os.remove(file_obj.name)
|
||||
except EnvironmentError:
|
||||
pass
|
||||
try:
|
||||
file_obj.close()
|
||||
except EnvironmentError:
|
||||
pass
|
||||
|
||||
|
||||
if iswindows:
|
||||
|
||||
def create_single_instance_mutex(name, per_user=True):
|
||||
mutexname = '{}-singleinstance-{}-{}'.format(
|
||||
__appname__, (get_windows_username() if per_user else ''), name
|
||||
)
|
||||
mutex = win32event.CreateMutex(None, False, mutexname)
|
||||
if not mutex:
|
||||
return
|
||||
err = win32api.GetLastError()
|
||||
if err == winerror.ERROR_ALREADY_EXISTS:
|
||||
# Close this handle other wise this handle will prevent the mutex
|
||||
# from being deleted when the process that created it exits.
|
||||
win32api.CloseHandle(mutex)
|
||||
return
|
||||
return partial(win32api.CloseHandle, mutex)
|
||||
|
||||
elif islinux:
|
||||
|
||||
def create_single_instance_mutex(name, per_user=True):
|
||||
import socket
|
||||
from calibre.utils.ipc import eintr_retry_call
|
||||
name = '%s-singleinstance-%s-%s' % (
|
||||
__appname__, (os.geteuid() if per_user else ''), name
|
||||
)
|
||||
name = name
|
||||
address = '\0' + name.replace(' ', '_')
|
||||
if not ispy3:
|
||||
address = address.encode('utf-8')
|
||||
sock = socket.socket(family=socket.AF_UNIX)
|
||||
try:
|
||||
eintr_retry_call(sock.bind, address)
|
||||
except socket.error as err:
|
||||
if getattr(err, 'errno', None) == errno.EADDRINUSE:
|
||||
return
|
||||
raise
|
||||
fd = sock.fileno()
|
||||
old_flags = fcntl.fcntl(fd, fcntl.F_GETFD)
|
||||
fcntl.fcntl(fd, fcntl.F_SETFD, old_flags | fcntl.FD_CLOEXEC)
|
||||
return sock.close
|
||||
|
||||
else:
|
||||
|
||||
def singleinstance_path(name, per_user=True):
|
||||
name = '%s-singleinstance-%s-%s.lock' % (
|
||||
__appname__, (os.geteuid() if per_user else ''), name
|
||||
)
|
||||
home = os.path.expanduser('~')
|
||||
locs = ['/var/lock', home, tempfile.gettempdir()]
|
||||
if isosx:
|
||||
locs.insert(0, '/Library/Caches')
|
||||
for loc in locs:
|
||||
if os.access(loc, os.W_OK | os.R_OK | os.X_OK):
|
||||
return os.path.join(loc, ('.' if loc is home else '') + name)
|
||||
raise EnvironmentError(
|
||||
'Failed to find a suitable filesystem location for the lock file'
|
||||
)
|
||||
|
||||
def create_single_instance_mutex(name, per_user=True):
|
||||
from calibre.utils.ipc import eintr_retry_call
|
||||
path = singleinstance_path(name, per_user)
|
||||
f = lopen(path, 'w')
|
||||
try:
|
||||
eintr_retry_call(fcntl.lockf, f.fileno(), fcntl.LOCK_EX | fcntl.LOCK_NB)
|
||||
return partial(_clean_lock_file, f)
|
||||
except EnvironmentError as err:
|
||||
if err.errno not in (errno.EAGAIN, errno.EACCES):
|
||||
raise
|
||||
|
||||
|
||||
def singleinstance(name):
|
||||
' Ensure that only a single process holding exists with the specified mutex key '
|
||||
release_mutex = create_single_instance_mutex(name)
|
||||
if release_mutex is None:
|
||||
return False
|
||||
atexit.register(release_mutex)
|
||||
return True
|
||||
275
ebook_converter/utils/logging.py
Normal file
275
ebook_converter/utils/logging.py
Normal file
@@ -0,0 +1,275 @@
|
||||
from __future__ import absolute_import, division, print_function, unicode_literals
|
||||
__license__ = 'GPL 3'
|
||||
__copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>'
|
||||
__docformat__ = 'restructuredtext en'
|
||||
|
||||
'A simplified logging system'
|
||||
|
||||
DEBUG = 0
|
||||
INFO = 1
|
||||
WARN = 2
|
||||
ERROR = 3
|
||||
|
||||
import sys, traceback, io
|
||||
from functools import partial
|
||||
from threading import Lock
|
||||
|
||||
from calibre import isbytestring, force_unicode, as_unicode, prints
|
||||
from polyglot.builtins import unicode_type, iteritems
|
||||
|
||||
|
||||
class Stream(object):
|
||||
|
||||
def __init__(self, stream=None):
|
||||
if stream is None:
|
||||
stream = io.BytesIO()
|
||||
self.stream = getattr(stream, 'buffer', stream)
|
||||
self._prints = partial(prints, safe_encode=True, file=stream)
|
||||
|
||||
def flush(self):
|
||||
self.stream.flush()
|
||||
|
||||
def prints(self, level, *args, **kwargs):
|
||||
self._prints(*args, **kwargs)
|
||||
|
||||
|
||||
class ANSIStream(Stream):
|
||||
|
||||
def __init__(self, stream=sys.stdout):
|
||||
Stream.__init__(self, stream)
|
||||
self.color = {
|
||||
DEBUG: u'green',
|
||||
INFO: None,
|
||||
WARN: u'yellow',
|
||||
ERROR: u'red',
|
||||
}
|
||||
|
||||
def prints(self, level, *args, **kwargs):
|
||||
from calibre.utils.terminal import ColoredStream
|
||||
with ColoredStream(self.stream, self.color[level]):
|
||||
self._prints(*args, **kwargs)
|
||||
|
||||
def flush(self):
|
||||
self.stream.flush()
|
||||
|
||||
|
||||
class FileStream(Stream):
|
||||
|
||||
def __init__(self, stream=None):
|
||||
Stream.__init__(self, stream)
|
||||
|
||||
def prints(self, level, *args, **kwargs):
|
||||
self._prints(*args, **kwargs)
|
||||
|
||||
|
||||
class HTMLStream(Stream):
|
||||
|
||||
color = {
|
||||
DEBUG: b'<span style="color:green">',
|
||||
INFO: b'<span>',
|
||||
WARN: b'<span style="color:blue">',
|
||||
ERROR: b'<span style="color:red">'
|
||||
}
|
||||
normal = b'</span>'
|
||||
|
||||
def __init__(self, stream=sys.stdout):
|
||||
Stream.__init__(self, stream)
|
||||
|
||||
def prints(self, level, *args, **kwargs):
|
||||
self.stream.write(self.color[level])
|
||||
kwargs['file'] = self.stream
|
||||
self._prints(*args, **kwargs)
|
||||
self.stream.write(self.normal)
|
||||
|
||||
def flush(self):
|
||||
self.stream.flush()
|
||||
|
||||
|
||||
class UnicodeHTMLStream(HTMLStream):
|
||||
|
||||
color = {k: v.decode('ascii') for k, v in iteritems(HTMLStream.color)}
|
||||
normal = HTMLStream.normal.decode('ascii')
|
||||
|
||||
def __init__(self):
|
||||
self.clear()
|
||||
|
||||
def flush(self):
|
||||
pass
|
||||
|
||||
def prints(self, level, *args, **kwargs):
|
||||
col = self.color[level]
|
||||
if col != self.last_col:
|
||||
if self.data:
|
||||
self.data.append(self.normal)
|
||||
self.data.append(col)
|
||||
self.last_col = col
|
||||
|
||||
sep = kwargs.get(u'sep', u' ')
|
||||
end = kwargs.get(u'end', u'\n')
|
||||
|
||||
for arg in args:
|
||||
if isbytestring(arg):
|
||||
arg = force_unicode(arg)
|
||||
elif not isinstance(arg, unicode_type):
|
||||
arg = as_unicode(arg)
|
||||
self.data.append(arg+sep)
|
||||
self.plain_text.append(arg+sep)
|
||||
self.data.append(end)
|
||||
self.plain_text.append(end)
|
||||
|
||||
def clear(self):
|
||||
self.data = []
|
||||
self.plain_text = []
|
||||
self.last_col = self.color[INFO]
|
||||
|
||||
@property
|
||||
def html(self):
|
||||
end = self.normal if self.data else u''
|
||||
return u''.join(self.data) + end
|
||||
|
||||
def dump(self):
|
||||
return [self.data, self.plain_text, self.last_col]
|
||||
|
||||
def load(self, dump):
|
||||
self.data, self.plain_text, self.last_col = dump
|
||||
|
||||
def append_dump(self, dump):
|
||||
d, p, lc = dump
|
||||
self.data.extend(d)
|
||||
self.plain_text.extend(p)
|
||||
self.last_col = lc
|
||||
|
||||
|
||||
class Log(object):
|
||||
|
||||
DEBUG = DEBUG
|
||||
INFO = INFO
|
||||
WARN = WARN
|
||||
ERROR = ERROR
|
||||
|
||||
def __init__(self, level=INFO):
|
||||
self.filter_level = level
|
||||
default_output = ANSIStream()
|
||||
self.outputs = [default_output]
|
||||
|
||||
self.debug = partial(self.print_with_flush, DEBUG)
|
||||
self.info = partial(self.print_with_flush, INFO)
|
||||
self.warn = self.warning = partial(self.print_with_flush, WARN)
|
||||
self.error = partial(self.print_with_flush, ERROR)
|
||||
|
||||
def prints(self, level, *args, **kwargs):
|
||||
if level < self.filter_level:
|
||||
return
|
||||
for output in self.outputs:
|
||||
output.prints(level, *args, **kwargs)
|
||||
|
||||
def print_with_flush(self, level, *args, **kwargs):
|
||||
if level < self.filter_level:
|
||||
return
|
||||
for output in self.outputs:
|
||||
output.prints(level, *args, **kwargs)
|
||||
self.flush()
|
||||
|
||||
def exception(self, *args, **kwargs):
|
||||
limit = kwargs.pop('limit', None)
|
||||
self.print_with_flush(ERROR, *args, **kwargs)
|
||||
self.print_with_flush(DEBUG, traceback.format_exc(limit))
|
||||
|
||||
def __call__(self, *args, **kwargs):
|
||||
self.info(*args, **kwargs)
|
||||
|
||||
def __enter__(self):
|
||||
self.orig_filter_level = self.filter_level
|
||||
self.filter_level = self.ERROR + 100
|
||||
|
||||
def __exit__(self, *args):
|
||||
self.filter_level = self.orig_filter_level
|
||||
|
||||
def flush(self):
|
||||
for o in self.outputs:
|
||||
if hasattr(o, 'flush'):
|
||||
o.flush()
|
||||
|
||||
def close(self):
|
||||
for o in self.outputs:
|
||||
if hasattr(o, 'close'):
|
||||
o.close()
|
||||
|
||||
|
||||
class DevNull(Log):
|
||||
|
||||
def __init__(self):
|
||||
Log.__init__(self, level=Log.ERROR)
|
||||
self.outputs = []
|
||||
|
||||
|
||||
class ThreadSafeLog(Log):
|
||||
exception_traceback_level = Log.DEBUG
|
||||
|
||||
def __init__(self, level=Log.INFO):
|
||||
Log.__init__(self, level=level)
|
||||
self._lock = Lock()
|
||||
|
||||
def prints(self, *args, **kwargs):
|
||||
with self._lock:
|
||||
Log.prints(self, *args, **kwargs)
|
||||
|
||||
def print_with_flush(self, *args, **kwargs):
|
||||
with self._lock:
|
||||
Log.print_with_flush(self, *args, **kwargs)
|
||||
|
||||
def exception(self, *args, **kwargs):
|
||||
limit = kwargs.pop('limit', None)
|
||||
with self._lock:
|
||||
Log.print_with_flush(self, ERROR, *args, **kwargs)
|
||||
Log.print_with_flush(self, self.exception_traceback_level, traceback.format_exc(limit))
|
||||
|
||||
|
||||
class ThreadSafeWrapper(Log):
|
||||
|
||||
def __init__(self, other_log):
|
||||
Log.__init__(self, level=other_log.filter_level)
|
||||
self.outputs = list(other_log.outputs)
|
||||
self._lock = Lock()
|
||||
|
||||
def prints(self, *args, **kwargs):
|
||||
with self._lock:
|
||||
Log.prints(self, *args, **kwargs)
|
||||
|
||||
def print_with_flush(self, *args, **kwargs):
|
||||
with self._lock:
|
||||
Log.print_with_flush(self, *args, **kwargs)
|
||||
|
||||
|
||||
class GUILog(ThreadSafeLog):
|
||||
|
||||
'''
|
||||
Logs in HTML and plain text as unicode. Ideal for display in a GUI context.
|
||||
'''
|
||||
|
||||
def __init__(self):
|
||||
ThreadSafeLog.__init__(self, level=self.DEBUG)
|
||||
self.outputs = [UnicodeHTMLStream()]
|
||||
|
||||
def clear(self):
|
||||
self.outputs[0].clear()
|
||||
|
||||
@property
|
||||
def html(self):
|
||||
return self.outputs[0].html
|
||||
|
||||
@property
|
||||
def plain_text(self):
|
||||
return u''.join(self.outputs[0].plain_text)
|
||||
|
||||
def dump(self):
|
||||
return self.outputs[0].dump()
|
||||
|
||||
def load(self, dump):
|
||||
return self.outputs[0].load(dump)
|
||||
|
||||
def append_dump(self, dump):
|
||||
return self.outputs[0].append_dump(dump)
|
||||
|
||||
|
||||
default_log = Log()
|
||||
13
ebook_converter/utils/monotonic.py
Normal file
13
ebook_converter/utils/monotonic.py
Normal file
@@ -0,0 +1,13 @@
|
||||
# vim:fileencoding=utf-8
|
||||
from __future__ import absolute_import, division, print_function, unicode_literals
|
||||
|
||||
try:
|
||||
from time import monotonic
|
||||
except ImportError:
|
||||
from calibre.constants import plugins
|
||||
|
||||
monotonicp, err = plugins['monotonic']
|
||||
if err:
|
||||
raise RuntimeError('Failed to load the monotonic module with error: ' + err)
|
||||
monotonic = monotonicp.monotonic
|
||||
del monotonicp, err
|
||||
44
ebook_converter/utils/mreplace.py
Normal file
44
ebook_converter/utils/mreplace.py
Normal file
@@ -0,0 +1,44 @@
|
||||
# multiple replace from dictionnary : http://code.activestate.com/recipes/81330/
|
||||
from __future__ import unicode_literals
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2010, sengian <sengian1 @ gmail.com>'
|
||||
__docformat__ = 'restructuredtext en'
|
||||
|
||||
import re
|
||||
try:
|
||||
from collections import UserDict
|
||||
except ImportError:
|
||||
from UserDict import UserDict
|
||||
|
||||
|
||||
class MReplace(UserDict):
|
||||
|
||||
def __init__(self, data=None, case_sensitive=True):
|
||||
UserDict.__init__(self, data)
|
||||
self.re = None
|
||||
self.regex = None
|
||||
self.case_sensitive = case_sensitive
|
||||
self.compile_regex()
|
||||
|
||||
def compile_regex(self):
|
||||
if len(self.data) > 0:
|
||||
keys = sorted(self.data, key=len, reverse=True)
|
||||
if isinstance(keys[0], bytes):
|
||||
tmp = b"(%s)" % b"|".join(map(re.escape, keys))
|
||||
else:
|
||||
tmp = "(%s)" % "|".join(map(re.escape, keys))
|
||||
if self.re != tmp:
|
||||
self.re = tmp
|
||||
if self.case_sensitive:
|
||||
self.regex = re.compile(self.re)
|
||||
else:
|
||||
self.regex = re.compile(self.re, re.I)
|
||||
|
||||
def __call__(self, mo):
|
||||
return self[mo.string[mo.start():mo.end()]]
|
||||
|
||||
def mreplace(self, text):
|
||||
# Replace without regex compile
|
||||
if len(self.data) < 1 or self.re is None:
|
||||
return text
|
||||
return self.regex.sub(self, text)
|
||||
106
ebook_converter/utils/resources.py
Normal file
106
ebook_converter/utils/resources.py
Normal file
@@ -0,0 +1,106 @@
|
||||
#!/usr/bin/env python2
|
||||
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
|
||||
from __future__ import absolute_import, division, print_function, unicode_literals
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>'
|
||||
__docformat__ = 'restructuredtext en'
|
||||
|
||||
|
||||
import sys, os
|
||||
|
||||
from calibre import config_dir
|
||||
from polyglot.builtins import builtins
|
||||
|
||||
|
||||
user_dir = os.path.join(config_dir, 'resources')
|
||||
|
||||
|
||||
class PathResolver(object):
|
||||
|
||||
def __init__(self):
|
||||
self.locations = [sys.resources_location]
|
||||
self.cache = {}
|
||||
|
||||
def suitable(path):
|
||||
try:
|
||||
return os.path.exists(path) and os.path.isdir(path) and \
|
||||
os.listdir(path)
|
||||
except:
|
||||
pass
|
||||
return False
|
||||
|
||||
self.default_path = sys.resources_location
|
||||
|
||||
dev_path = os.environ.get('CALIBRE_DEVELOP_FROM', None)
|
||||
self.using_develop_from = False
|
||||
if dev_path is not None:
|
||||
dev_path = os.path.join(os.path.abspath(
|
||||
os.path.dirname(dev_path)), 'resources')
|
||||
if suitable(dev_path):
|
||||
self.locations.insert(0, dev_path)
|
||||
self.default_path = dev_path
|
||||
self.using_develop_from = True
|
||||
|
||||
self.user_path = None
|
||||
if suitable(user_dir):
|
||||
self.locations.insert(0, user_dir)
|
||||
self.user_path = user_dir
|
||||
|
||||
def __call__(self, path, allow_user_override=True):
|
||||
path = path.replace(os.sep, '/')
|
||||
key = (path, allow_user_override)
|
||||
ans = self.cache.get(key, None)
|
||||
if ans is None:
|
||||
for base in self.locations:
|
||||
if not allow_user_override and base == self.user_path:
|
||||
continue
|
||||
fpath = os.path.join(base, *path.split('/'))
|
||||
if os.path.exists(fpath):
|
||||
ans = fpath
|
||||
break
|
||||
|
||||
if ans is None:
|
||||
ans = os.path.join(self.default_path, *path.split('/'))
|
||||
|
||||
self.cache[key] = ans
|
||||
|
||||
return ans
|
||||
|
||||
def set_data(self, path, data=None):
|
||||
self.cache.pop((path, True), None)
|
||||
fpath = os.path.join(user_dir, *path.split('/'))
|
||||
if data is None:
|
||||
if os.path.exists(fpath):
|
||||
os.remove(fpath)
|
||||
else:
|
||||
base = os.path.dirname(fpath)
|
||||
if not os.path.exists(base):
|
||||
os.makedirs(base)
|
||||
with open(fpath, 'wb') as f:
|
||||
f.write(data)
|
||||
|
||||
|
||||
_resolver = PathResolver()
|
||||
|
||||
|
||||
def get_path(path, data=False, allow_user_override=True):
|
||||
fpath = _resolver(path, allow_user_override=allow_user_override)
|
||||
if data:
|
||||
with open(fpath, 'rb') as f:
|
||||
return f.read()
|
||||
return fpath
|
||||
|
||||
|
||||
def get_image_path(path, data=False, allow_user_override=True):
|
||||
if not path:
|
||||
return get_path('images', allow_user_override=allow_user_override)
|
||||
return get_path('images/'+path, data=data, allow_user_override=allow_user_override)
|
||||
|
||||
|
||||
def set_data(path, data=None):
|
||||
return _resolver.set_data(path, data)
|
||||
|
||||
|
||||
builtins.__dict__['P'] = get_path
|
||||
builtins.__dict__['I'] = get_image_path
|
||||
139
ebook_converter/utils/serialize.py
Normal file
139
ebook_converter/utils/serialize.py
Normal file
@@ -0,0 +1,139 @@
|
||||
#!/usr/bin/env python2
|
||||
# vim:fileencoding=utf-8
|
||||
# License: GPLv3 Copyright: 2017, Kovid Goyal <kovid at kovidgoyal.net>
|
||||
|
||||
from __future__ import absolute_import, division, print_function, unicode_literals
|
||||
|
||||
from polyglot.builtins import unicode_type
|
||||
from calibre.constants import ispy3
|
||||
|
||||
|
||||
MSGPACK_MIME = 'application/x-msgpack'
|
||||
CANARY = 'jPoAv3zOyHvQ5JFNYg4hJ9'
|
||||
|
||||
|
||||
def encoded(typ, data, ExtType):
|
||||
if ExtType is None:
|
||||
return {CANARY: typ, 'v': data}
|
||||
return ExtType(typ, msgpack_dumps(data))
|
||||
|
||||
|
||||
def create_encoder(for_json=False):
|
||||
from datetime import datetime
|
||||
ExtType = None
|
||||
if not for_json:
|
||||
import msgpack
|
||||
ExtType = msgpack.ExtType
|
||||
|
||||
def encoder(obj):
|
||||
if isinstance(obj, datetime):
|
||||
return encoded(0, unicode_type(obj.isoformat()), ExtType)
|
||||
if isinstance(obj, (set, frozenset)):
|
||||
return encoded(1, tuple(obj), ExtType)
|
||||
if getattr(obj, '__calibre_serializable__', False):
|
||||
from calibre.ebooks.metadata.book.base import Metadata
|
||||
from calibre.library.field_metadata import FieldMetadata, fm_as_dict
|
||||
from calibre.db.categories import Tag
|
||||
if isinstance(obj, Metadata):
|
||||
from calibre.ebooks.metadata.book.serialize import metadata_as_dict
|
||||
return encoded(
|
||||
2, metadata_as_dict(obj, encode_cover_data=for_json), ExtType
|
||||
)
|
||||
elif isinstance(obj, FieldMetadata):
|
||||
return encoded(3, fm_as_dict(obj), ExtType)
|
||||
elif isinstance(obj, Tag):
|
||||
return encoded(4, obj.as_dict(), ExtType)
|
||||
if for_json and isinstance(obj, bytes):
|
||||
return obj.decode('utf-8')
|
||||
raise TypeError('Cannot serialize objects of type {}'.format(type(obj)))
|
||||
|
||||
return encoder
|
||||
|
||||
|
||||
def msgpack_dumps(obj):
|
||||
import msgpack
|
||||
return msgpack.packb(obj, default=create_encoder(), use_bin_type=True)
|
||||
|
||||
|
||||
def json_dumps(data, **kw):
|
||||
import json
|
||||
kw['default'] = create_encoder(for_json=True)
|
||||
kw['ensure_ascii'] = False
|
||||
ans = json.dumps(data, **kw)
|
||||
if not isinstance(ans, bytes):
|
||||
ans = ans.encode('utf-8')
|
||||
return ans
|
||||
|
||||
|
||||
def decode_metadata(x, for_json):
|
||||
from polyglot.binary import from_base64_bytes
|
||||
from calibre.ebooks.metadata.book.serialize import metadata_from_dict
|
||||
obj = metadata_from_dict(x)
|
||||
if for_json and obj.cover_data and obj.cover_data[1]:
|
||||
obj.cover_data = obj.cover_data[0], from_base64_bytes(obj.cover_data[1])
|
||||
return obj
|
||||
|
||||
|
||||
def decode_field_metadata(x, for_json):
|
||||
from calibre.library.field_metadata import fm_from_dict
|
||||
return fm_from_dict(x)
|
||||
|
||||
|
||||
def decode_category_tag(x, for_json):
|
||||
from calibre.db.categories import Tag
|
||||
return Tag.from_dict(x)
|
||||
|
||||
|
||||
def decode_datetime(x, fj):
|
||||
from calibre.utils.iso8601 import parse_iso8601
|
||||
return parse_iso8601(x, assume_utc=True)
|
||||
|
||||
|
||||
decoders = (
|
||||
decode_datetime,
|
||||
lambda x, fj: set(x),
|
||||
decode_metadata, decode_field_metadata, decode_category_tag
|
||||
)
|
||||
|
||||
|
||||
def json_decoder(obj):
|
||||
typ = obj.get(CANARY)
|
||||
if typ is None:
|
||||
return obj
|
||||
return decoders[typ](obj['v'], True)
|
||||
|
||||
|
||||
def msgpack_decoder(code, data):
|
||||
return decoders[code](msgpack_loads(data), False)
|
||||
|
||||
|
||||
def msgpack_loads(dump, use_list=True):
|
||||
# use_list controls whether msgpack arrays are unpacked as lists or tuples
|
||||
import msgpack
|
||||
return msgpack.unpackb(dump, ext_hook=msgpack_decoder, raw=False, use_list=use_list)
|
||||
|
||||
|
||||
def json_loads(data):
|
||||
import json
|
||||
return json.loads(data, object_hook=json_decoder)
|
||||
|
||||
|
||||
if ispy3:
|
||||
|
||||
def pickle_dumps(data):
|
||||
import pickle
|
||||
return pickle.dumps(data, -1)
|
||||
|
||||
def pickle_loads(dump):
|
||||
import pickle
|
||||
return pickle.loads(dump, encoding='utf-8')
|
||||
|
||||
else:
|
||||
|
||||
def pickle_dumps(data):
|
||||
import cPickle as pickle
|
||||
return pickle.dumps(data, -1)
|
||||
|
||||
def pickle_loads(dump):
|
||||
import cPickle as pickle
|
||||
return pickle.loads(dump)
|
||||
222
ebook_converter/utils/shared_file.py
Normal file
222
ebook_converter/utils/shared_file.py
Normal file
@@ -0,0 +1,222 @@
|
||||
#!/usr/bin/env python2
|
||||
# vim:fileencoding=utf-8
|
||||
from __future__ import absolute_import, division, print_function, unicode_literals
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2015, Kovid Goyal <kovid at kovidgoyal.net>'
|
||||
|
||||
import os, sys
|
||||
|
||||
from polyglot.builtins import reraise
|
||||
|
||||
from calibre.constants import iswindows, plugins, ispy3
|
||||
|
||||
'''
|
||||
This module defines a share_open() function which is a replacement for
|
||||
python's builtin open() function.
|
||||
|
||||
This replacement, opens 'shareable' files on all platforms. That is files that
|
||||
can be read from and written to and deleted at the same time by multiple
|
||||
processes. All file handles are non-inheritable, as in Python 3, but unlike,
|
||||
Python 2. Non-inheritance is atomic.
|
||||
|
||||
Caveats on windows: On windows sharing is co-operative, i.e. it only works if
|
||||
all processes involved open the file with share_open(). Also while you can
|
||||
delete a file that is open, you cannot open a new file with the same filename
|
||||
until all open file handles are closed. You also cannot delete the containing
|
||||
directory until all file handles are closed. To get around this, rename the
|
||||
file before deleting it.
|
||||
'''
|
||||
|
||||
speedup, err = plugins['speedup']
|
||||
|
||||
if not speedup:
|
||||
raise RuntimeError('Failed to load the speedup plugin with error: %s' % err)
|
||||
|
||||
valid_modes = {'a', 'a+', 'a+b', 'ab', 'r', 'rb', 'r+', 'r+b', 'w', 'wb', 'w+', 'w+b'}
|
||||
|
||||
|
||||
def validate_mode(mode):
|
||||
return mode in valid_modes
|
||||
|
||||
|
||||
class FlagConstants(object):
|
||||
|
||||
def __init__(self):
|
||||
for x in 'APPEND CREAT TRUNC EXCL RDWR RDONLY WRONLY'.split():
|
||||
x = 'O_' + x
|
||||
setattr(self, x, getattr(os, x))
|
||||
for x in 'RANDOM SEQUENTIAL TEXT BINARY'.split():
|
||||
x = 'O_' + x
|
||||
setattr(self, x, getattr(os, x, 0))
|
||||
|
||||
|
||||
fc = FlagConstants()
|
||||
|
||||
|
||||
def flags_from_mode(mode):
|
||||
if not validate_mode(mode):
|
||||
raise ValueError('The mode is invalid')
|
||||
m = mode[0]
|
||||
random = '+' in mode
|
||||
binary = 'b' in mode
|
||||
if m == 'a':
|
||||
flags = fc.O_APPEND | fc.O_CREAT
|
||||
if random:
|
||||
flags |= fc.O_RDWR | fc.O_RANDOM
|
||||
else:
|
||||
flags |= fc.O_WRONLY | fc.O_SEQUENTIAL
|
||||
elif m == 'r':
|
||||
if random:
|
||||
flags = fc.O_RDWR | fc.O_RANDOM
|
||||
else:
|
||||
flags = fc.O_RDONLY | fc.O_SEQUENTIAL
|
||||
elif m == 'w':
|
||||
if random:
|
||||
flags = fc.O_RDWR | fc.O_RANDOM
|
||||
else:
|
||||
flags = fc.O_WRONLY | fc.O_SEQUENTIAL
|
||||
flags |= fc.O_TRUNC | fc.O_CREAT
|
||||
flags |= (fc.O_BINARY if binary else fc.O_TEXT)
|
||||
return flags
|
||||
|
||||
|
||||
if iswindows:
|
||||
from numbers import Integral
|
||||
import msvcrt
|
||||
import win32file, pywintypes
|
||||
CREATE_NEW = win32file.CREATE_NEW
|
||||
CREATE_ALWAYS = win32file.CREATE_ALWAYS
|
||||
OPEN_EXISTING = win32file.OPEN_EXISTING
|
||||
OPEN_ALWAYS = win32file.OPEN_ALWAYS
|
||||
TRUNCATE_EXISTING = win32file.TRUNCATE_EXISTING
|
||||
FILE_SHARE_READ = win32file.FILE_SHARE_READ
|
||||
FILE_SHARE_WRITE = win32file.FILE_SHARE_WRITE
|
||||
FILE_SHARE_DELETE = win32file.FILE_SHARE_DELETE
|
||||
FILE_SHARE_VALID_FLAGS = FILE_SHARE_READ | FILE_SHARE_WRITE | FILE_SHARE_DELETE
|
||||
FILE_ATTRIBUTE_READONLY = win32file.FILE_ATTRIBUTE_READONLY
|
||||
FILE_ATTRIBUTE_NORMAL = win32file.FILE_ATTRIBUTE_NORMAL
|
||||
FILE_ATTRIBUTE_TEMPORARY = win32file.FILE_ATTRIBUTE_TEMPORARY
|
||||
FILE_FLAG_DELETE_ON_CLOSE = win32file.FILE_FLAG_DELETE_ON_CLOSE
|
||||
FILE_FLAG_SEQUENTIAL_SCAN = win32file.FILE_FLAG_SEQUENTIAL_SCAN
|
||||
FILE_FLAG_RANDOM_ACCESS = win32file.FILE_FLAG_RANDOM_ACCESS
|
||||
GENERIC_READ = win32file.GENERIC_READ & 0xffffffff
|
||||
GENERIC_WRITE = win32file.GENERIC_WRITE & 0xffffffff
|
||||
DELETE = 0x00010000
|
||||
|
||||
_ACCESS_MASK = os.O_RDONLY | os.O_WRONLY | os.O_RDWR
|
||||
_ACCESS_MAP = {
|
||||
os.O_RDONLY : GENERIC_READ,
|
||||
os.O_WRONLY : GENERIC_WRITE,
|
||||
os.O_RDWR : GENERIC_READ | GENERIC_WRITE
|
||||
}
|
||||
|
||||
_CREATE_MASK = os.O_CREAT | os.O_EXCL | os.O_TRUNC
|
||||
_CREATE_MAP = {
|
||||
0 : OPEN_EXISTING,
|
||||
os.O_EXCL : OPEN_EXISTING,
|
||||
os.O_CREAT : OPEN_ALWAYS,
|
||||
os.O_CREAT | os.O_EXCL : CREATE_NEW,
|
||||
os.O_CREAT | os.O_TRUNC | os.O_EXCL : CREATE_NEW,
|
||||
os.O_TRUNC : TRUNCATE_EXISTING,
|
||||
os.O_TRUNC | os.O_EXCL : TRUNCATE_EXISTING,
|
||||
os.O_CREAT | os.O_TRUNC : CREATE_ALWAYS
|
||||
}
|
||||
|
||||
def raise_winerror(pywinerr):
|
||||
reraise(
|
||||
WindowsError,
|
||||
WindowsError(pywinerr.winerror,
|
||||
(pywinerr.funcname or '') + b': ' + (pywinerr.strerror or '')),
|
||||
sys.exc_info()[2])
|
||||
|
||||
def os_open(path, flags, mode=0o777, share_flags=FILE_SHARE_VALID_FLAGS):
|
||||
'''
|
||||
Replacement for os.open() allowing moving or unlinking before closing
|
||||
'''
|
||||
if not isinstance(flags, Integral):
|
||||
raise TypeError('flags must be an integer')
|
||||
if not isinstance(mode, Integral):
|
||||
raise TypeError('mode must be an integer')
|
||||
|
||||
if share_flags & ~FILE_SHARE_VALID_FLAGS:
|
||||
raise ValueError('bad share_flags: %r' % share_flags)
|
||||
|
||||
access_flags = _ACCESS_MAP[flags & _ACCESS_MASK]
|
||||
create_flags = _CREATE_MAP[flags & _CREATE_MASK]
|
||||
attrib_flags = FILE_ATTRIBUTE_NORMAL
|
||||
|
||||
if flags & os.O_CREAT and mode & ~0o444 == 0:
|
||||
attrib_flags = FILE_ATTRIBUTE_READONLY
|
||||
|
||||
if flags & os.O_TEMPORARY:
|
||||
share_flags |= FILE_SHARE_DELETE
|
||||
attrib_flags |= FILE_FLAG_DELETE_ON_CLOSE
|
||||
access_flags |= DELETE
|
||||
|
||||
if flags & os.O_SHORT_LIVED:
|
||||
attrib_flags |= FILE_ATTRIBUTE_TEMPORARY
|
||||
|
||||
if flags & os.O_SEQUENTIAL:
|
||||
attrib_flags |= FILE_FLAG_SEQUENTIAL_SCAN
|
||||
|
||||
if flags & os.O_RANDOM:
|
||||
attrib_flags |= FILE_FLAG_RANDOM_ACCESS
|
||||
|
||||
try:
|
||||
h = win32file.CreateFileW(
|
||||
path, access_flags, share_flags, None, create_flags, attrib_flags, None)
|
||||
except pywintypes.error as e:
|
||||
raise_winerror(e)
|
||||
ans = msvcrt.open_osfhandle(h, flags | os.O_NOINHERIT)
|
||||
h.Detach() # We dont want the handle to be automatically closed when h is deleted
|
||||
return ans
|
||||
|
||||
def share_open(path, mode='r', buffering=-1):
|
||||
flags = flags_from_mode(mode)
|
||||
return speedup.fdopen(os_open(path, flags), path, mode, buffering)
|
||||
|
||||
else:
|
||||
if ispy3:
|
||||
# See PEP 446
|
||||
share_open = open
|
||||
else:
|
||||
def share_open(path, mode='r', buffering=-1):
|
||||
flags = flags_from_mode(mode) | speedup.O_CLOEXEC
|
||||
return speedup.fdopen(os.open(path, flags), path, mode, buffering)
|
||||
|
||||
def raise_winerror(x):
|
||||
reraise(NotImplementedError, None, sys.exc_info()[2])
|
||||
|
||||
|
||||
def find_tests():
|
||||
import unittest
|
||||
from calibre.ptempfile import TemporaryDirectory
|
||||
|
||||
class SharedFileTest(unittest.TestCase):
|
||||
|
||||
def test_shared_file(self):
|
||||
eq = self.assertEqual
|
||||
|
||||
with TemporaryDirectory() as tdir:
|
||||
fname = os.path.join(tdir, 'test.txt')
|
||||
with share_open(fname, 'wb') as f:
|
||||
f.write(b'a' * 20 * 1024)
|
||||
eq(fname, f.name)
|
||||
f = share_open(fname, 'rb')
|
||||
eq(f.read(1), b'a')
|
||||
if iswindows:
|
||||
os.rename(fname, fname+'.moved')
|
||||
os.remove(fname+'.moved')
|
||||
else:
|
||||
os.remove(fname)
|
||||
eq(f.read(1), b'a')
|
||||
f2 = share_open(fname, 'w+b')
|
||||
f2.write(b'b' * 10 * 1024)
|
||||
f2.seek(0)
|
||||
eq(f.read(10000), b'a'*10000)
|
||||
eq(f2.read(100), b'b' * 100)
|
||||
f3 = share_open(fname, 'rb')
|
||||
eq(f3.read(100), b'b' * 100)
|
||||
|
||||
return unittest.defaultTestLoader.loadTestsFromTestCase(SharedFileTest)
|
||||
61
ebook_converter/utils/short_uuid.py
Normal file
61
ebook_converter/utils/short_uuid.py
Normal file
@@ -0,0 +1,61 @@
|
||||
#!/usr/bin/env python2
|
||||
# vim:fileencoding=utf-8
|
||||
# License: GPLv3 Copyright: 2015, Kovid Goyal <kovid at kovidgoyal.net>
|
||||
from __future__ import absolute_import, division, print_function, unicode_literals
|
||||
|
||||
'''
|
||||
Generate UUID encoded using a user specified alphabet.
|
||||
'''
|
||||
|
||||
import string, math, uuid as _uuid
|
||||
|
||||
from polyglot.builtins import unicode_type
|
||||
|
||||
|
||||
def num_to_string(number, alphabet, alphabet_len, pad_to_length=None):
|
||||
ans = []
|
||||
number = max(0, number)
|
||||
while number:
|
||||
number, digit = divmod(number, alphabet_len)
|
||||
ans.append(alphabet[digit])
|
||||
if pad_to_length is not None and pad_to_length > len(ans):
|
||||
ans.append(alphabet[0] * (pad_to_length - len(ans)))
|
||||
return ''.join(ans)
|
||||
|
||||
|
||||
def string_to_num(string, alphabet_map, alphabet_len):
|
||||
ans = 0
|
||||
for char in reversed(string):
|
||||
ans = ans * alphabet_len + alphabet_map[char]
|
||||
return ans
|
||||
|
||||
|
||||
class ShortUUID(object):
|
||||
|
||||
def __init__(self, alphabet=None):
|
||||
# We do not include zero and one in the default alphabet as they can be
|
||||
# confused with the letters O and I in some fonts. And removing them
|
||||
# does not change the uuid_pad_len.
|
||||
self.alphabet = tuple(sorted(unicode_type(alphabet or (string.digits + string.ascii_letters)[2:])))
|
||||
self.alphabet_len = len(self.alphabet)
|
||||
self.alphabet_map = {c:i for i, c in enumerate(self.alphabet)}
|
||||
self.uuid_pad_len = int(math.ceil(math.log(1 << 128, self.alphabet_len)))
|
||||
|
||||
def uuid4(self, pad_to_length=None):
|
||||
if pad_to_length is None:
|
||||
pad_to_length = self.uuid_pad_len
|
||||
return num_to_string(_uuid.uuid4().int, self.alphabet, self.alphabet_len, pad_to_length)
|
||||
|
||||
def uuid5(self, namespace, name, pad_to_length=None):
|
||||
if pad_to_length is None:
|
||||
pad_to_length = self.uuid_pad_len
|
||||
return num_to_string(_uuid.uuid5(namespace, name).int, self.alphabet, self.alphabet_len, pad_to_length)
|
||||
|
||||
def decode(self, encoded):
|
||||
return _uuid.UUID(int=string_to_num(encoded, self.alphabet_map, self.alphabet_len))
|
||||
|
||||
|
||||
_global_instance = ShortUUID()
|
||||
uuid4 = _global_instance.uuid4
|
||||
uuid5 = _global_instance.uuid5
|
||||
decode = _global_instance.decode
|
||||
888
ebook_converter/utils/smartypants.py
Normal file
888
ebook_converter/utils/smartypants.py
Normal file
@@ -0,0 +1,888 @@
|
||||
#!/usr/bin/python2
|
||||
# vim:fileencoding=utf-8
|
||||
|
||||
from __future__ import absolute_import, division, print_function, unicode_literals
|
||||
|
||||
__author__ = "Chad Miller <smartypantspy@chad.org>, Kovid Goyal <kovid at kovidgoyal.net>"
|
||||
__description__ = "Smart-quotes, smart-ellipses, and smart-dashes for weblog entries in pyblosxom"
|
||||
|
||||
r"""
|
||||
==============
|
||||
smartypants.py
|
||||
==============
|
||||
|
||||
----------------------------
|
||||
SmartyPants ported to Python
|
||||
----------------------------
|
||||
|
||||
Ported by `Chad Miller`_
|
||||
Copyright (c) 2004, 2007 Chad Miller
|
||||
|
||||
original `SmartyPants`_ by `John Gruber`_
|
||||
Copyright (c) 2003 John Gruber
|
||||
|
||||
|
||||
Synopsis
|
||||
========
|
||||
|
||||
A smart-quotes plugin for Pyblosxom_.
|
||||
|
||||
The priginal "SmartyPants" is a free web publishing plug-in for Movable Type,
|
||||
Blosxom, and BBEdit that easily translates plain ASCII punctuation characters
|
||||
into "smart" typographic punctuation HTML entities.
|
||||
|
||||
This software, *smartypants.py*, endeavours to be a functional port of
|
||||
SmartyPants to Python, for use with Pyblosxom_.
|
||||
|
||||
|
||||
Description
|
||||
===========
|
||||
|
||||
SmartyPants can perform the following transformations:
|
||||
|
||||
- Straight quotes ( " and ' ) into "curly" quote HTML entities
|
||||
- Backticks-style quotes (\`\`like this'') into "curly" quote HTML entities
|
||||
- Dashes (``--`` and ``---``) into en- and em-dash entities
|
||||
- Three consecutive dots (``...`` or ``. . .``) into an ellipsis entity
|
||||
|
||||
This means you can write, edit, and save your posts using plain old
|
||||
ASCII straight quotes, plain dashes, and plain dots, but your published
|
||||
posts (and final HTML output) will appear with smart quotes, em-dashes,
|
||||
and proper ellipses.
|
||||
|
||||
SmartyPants does not modify characters within ``<pre>``, ``<code>``, ``<kbd>``,
|
||||
``<math>`` or ``<script>`` tag blocks. Typically, these tags are used to
|
||||
display text where smart quotes and other "smart punctuation" would not be
|
||||
appropriate, such as source code or example markup.
|
||||
|
||||
|
||||
Backslash Escapes
|
||||
=================
|
||||
|
||||
If you need to use literal straight quotes (or plain hyphens and
|
||||
periods), SmartyPants accepts the following backslash escape sequences
|
||||
to force non-smart punctuation. It does so by transforming the escape
|
||||
sequence into a decimal-encoded HTML entity:
|
||||
|
||||
(FIXME: table here.)
|
||||
|
||||
.. comment It sucks that there's a disconnect between the visual layout and table markup when special characters are involved.
|
||||
.. comment ====== ===== =========
|
||||
.. comment Escape Value Character
|
||||
.. comment ====== ===== =========
|
||||
.. comment \\\\\\\\ \ \\\\
|
||||
.. comment \\\\" " "
|
||||
.. comment \\\\' ' '
|
||||
.. comment \\\\. . .
|
||||
.. comment \\\\- - \-
|
||||
.. comment \\\\` ` \`
|
||||
.. comment ====== ===== =========
|
||||
|
||||
This is useful, for example, when you want to use straight quotes as
|
||||
foot and inch marks: 6'2" tall; a 17" iMac.
|
||||
|
||||
Options
|
||||
=======
|
||||
|
||||
For Pyblosxom users, the ``smartypants_attributes`` attribute is where you
|
||||
specify configuration options.
|
||||
|
||||
Numeric values are the easiest way to configure SmartyPants' behavior:
|
||||
|
||||
"0"
|
||||
Suppress all transformations. (Do nothing.)
|
||||
"1"
|
||||
Performs default SmartyPants transformations: quotes (including
|
||||
\`\`backticks'' -style), em-dashes, and ellipses. "``--``" (dash dash)
|
||||
is used to signify an em-dash; there is no support for en-dashes.
|
||||
|
||||
"2"
|
||||
Same as smarty_pants="1", except that it uses the old-school typewriter
|
||||
shorthand for dashes: "``--``" (dash dash) for en-dashes, "``---``"
|
||||
(dash dash dash)
|
||||
for em-dashes.
|
||||
|
||||
"3"
|
||||
Same as smarty_pants="2", but inverts the shorthand for dashes:
|
||||
"``--``" (dash dash) for em-dashes, and "``---``" (dash dash dash) for
|
||||
en-dashes.
|
||||
|
||||
"-1"
|
||||
Stupefy mode. Reverses the SmartyPants transformation process, turning
|
||||
the HTML entities produced by SmartyPants into their ASCII equivalents.
|
||||
E.g. "“" is turned into a simple double-quote ("), "—" is
|
||||
turned into two dashes, etc.
|
||||
|
||||
|
||||
The following single-character attribute values can be combined to toggle
|
||||
individual transformations from within the smarty_pants attribute. For
|
||||
example, to educate normal quotes and em-dashes, but not ellipses or
|
||||
\`\`backticks'' -style quotes:
|
||||
|
||||
``py['smartypants_attributes'] = "1"``
|
||||
|
||||
"q"
|
||||
Educates normal quote characters: (") and (').
|
||||
|
||||
"b"
|
||||
Educates \`\`backticks'' -style double quotes.
|
||||
|
||||
"B"
|
||||
Educates \`\`backticks'' -style double quotes and \`single' quotes.
|
||||
|
||||
"d"
|
||||
Educates em-dashes.
|
||||
|
||||
"D"
|
||||
Educates em-dashes and en-dashes, using old-school typewriter shorthand:
|
||||
(dash dash) for en-dashes, (dash dash dash) for em-dashes.
|
||||
|
||||
"i"
|
||||
Educates em-dashes and en-dashes, using inverted old-school typewriter
|
||||
shorthand: (dash dash) for em-dashes, (dash dash dash) for en-dashes.
|
||||
|
||||
"e"
|
||||
Educates ellipses.
|
||||
|
||||
"w"
|
||||
Translates any instance of ``"`` into a normal double-quote character.
|
||||
This should be of no interest to most people, but of particular interest
|
||||
to anyone who writes their posts using Dreamweaver, as Dreamweaver
|
||||
inexplicably uses this entity to represent a literal double-quote
|
||||
character. SmartyPants only educates normal quotes, not entities (because
|
||||
ordinarily, entities are used for the explicit purpose of representing the
|
||||
specific character they represent). The "w" option must be used in
|
||||
conjunction with one (or both) of the other quote options ("q" or "b").
|
||||
Thus, if you wish to apply all SmartyPants transformations (quotes, en-
|
||||
and em-dashes, and ellipses) and also translate ``"`` entities into
|
||||
regular quotes so SmartyPants can educate them, you should pass the
|
||||
following to the smarty_pants attribute:
|
||||
|
||||
The ``smartypants_forbidden_flavours`` list contains pyblosxom flavours for
|
||||
which no Smarty Pants rendering will occur.
|
||||
|
||||
|
||||
Caveats
|
||||
=======
|
||||
|
||||
Why You Might Not Want to Use Smart Quotes in Your Weblog
|
||||
---------------------------------------------------------
|
||||
|
||||
For one thing, you might not care.
|
||||
|
||||
Most normal, mentally stable individuals do not take notice of proper
|
||||
typographic punctuation. Many design and typography nerds, however, break
|
||||
out in a nasty rash when they encounter, say, a restaurant sign that uses
|
||||
a straight apostrophe to spell "Joe's".
|
||||
|
||||
If you're the sort of person who just doesn't care, you might well want to
|
||||
continue not caring. Using straight quotes -- and sticking to the 7-bit
|
||||
ASCII character set in general -- is certainly a simpler way to live.
|
||||
|
||||
Even if you I *do* care about accurate typography, you still might want to
|
||||
think twice before educating the quote characters in your weblog. One side
|
||||
effect of publishing curly quote HTML entities is that it makes your
|
||||
weblog a bit harder for others to quote from using copy-and-paste. What
|
||||
happens is that when someone copies text from your blog, the copied text
|
||||
contains the 8-bit curly quote characters (as well as the 8-bit characters
|
||||
for em-dashes and ellipses, if you use these options). These characters
|
||||
are not standard across different text encoding methods, which is why they
|
||||
need to be encoded as HTML entities.
|
||||
|
||||
People copying text from your weblog, however, may not notice that you're
|
||||
using curly quotes, and they'll go ahead and paste the unencoded 8-bit
|
||||
characters copied from their browser into an email message or their own
|
||||
weblog. When pasted as raw "smart quotes", these characters are likely to
|
||||
get mangled beyond recognition.
|
||||
|
||||
That said, my own opinion is that any decent text editor or email client
|
||||
makes it easy to stupefy smart quote characters into their 7-bit
|
||||
equivalents, and I don't consider it my problem if you're using an
|
||||
indecent text editor or email client.
|
||||
|
||||
|
||||
Algorithmic Shortcomings
|
||||
------------------------
|
||||
|
||||
One situation in which quotes will get curled the wrong way is when
|
||||
apostrophes are used at the start of leading contractions. For example:
|
||||
|
||||
``'Twas the night before Christmas.``
|
||||
|
||||
In the case above, SmartyPants will turn the apostrophe into an opening
|
||||
single-quote, when in fact it should be a closing one. I don't think
|
||||
this problem can be solved in the general case -- every word processor
|
||||
I've tried gets this wrong as well. In such cases, it's best to use the
|
||||
proper HTML entity for closing single-quotes (``’``) by hand.
|
||||
|
||||
|
||||
Bugs
|
||||
====
|
||||
|
||||
To file bug reports or feature requests (other than topics listed in the
|
||||
Caveats section above) please send email to: mailto:smartypantspy@chad.org
|
||||
|
||||
If the bug involves quotes being curled the wrong way, please send example
|
||||
text to illustrate.
|
||||
|
||||
To Do list
|
||||
----------
|
||||
|
||||
- Provide a function for use within templates to quote anything at all.
|
||||
|
||||
|
||||
Version History
|
||||
===============
|
||||
|
||||
1.5_1.6: Fri, 27 Jul 2007 07:06:40 -0400
|
||||
- Fixed bug where blocks of precious unalterable text was instead
|
||||
interpreted. Thanks to Le Roux and Dirk van Oosterbosch.
|
||||
|
||||
1.5_1.5: Sat, 13 Aug 2005 15:50:24 -0400
|
||||
- Fix bogus magical quotation when there is no hint that the
|
||||
user wants it, e.g., in "21st century". Thanks to Nathan Hamblen.
|
||||
- Be smarter about quotes before terminating numbers in an en-dash'ed
|
||||
range.
|
||||
|
||||
1.5_1.4: Thu, 10 Feb 2005 20:24:36 -0500
|
||||
- Fix a date-processing bug, as reported by jacob childress.
|
||||
- Begin a test-suite for ensuring correct output.
|
||||
- Removed import of "string", since I didn't really need it.
|
||||
(This was my first every Python program. Sue me!)
|
||||
|
||||
1.5_1.3: Wed, 15 Sep 2004 18:25:58 -0400
|
||||
- Abort processing if the flavour is in forbidden-list. Default of
|
||||
[ "rss" ] (Idea of Wolfgang SCHNERRING.)
|
||||
- Remove stray virgules from en-dashes. Patch by Wolfgang SCHNERRING.
|
||||
|
||||
1.5_1.2: Mon, 24 May 2004 08:14:54 -0400
|
||||
- Some single quotes weren't replaced properly. Diff-tesuji played
|
||||
by Benjamin GEIGER.
|
||||
|
||||
1.5_1.1: Sun, 14 Mar 2004 14:38:28 -0500
|
||||
- Support upcoming pyblosxom 0.9 plugin verification feature.
|
||||
|
||||
1.5_1.0: Tue, 09 Mar 2004 08:08:35 -0500
|
||||
- Initial release
|
||||
|
||||
Version Information
|
||||
-------------------
|
||||
|
||||
Version numbers will track the SmartyPants_ version numbers, with the addition
|
||||
of an underscore and the smartypants.py version on the end.
|
||||
|
||||
New versions will be available at `http://wiki.chad.org/SmartyPantsPy`_
|
||||
|
||||
.. _http://wiki.chad.org/SmartyPantsPy: http://wiki.chad.org/SmartyPantsPy
|
||||
|
||||
Authors
|
||||
=======
|
||||
|
||||
`John Gruber`_ did all of the hard work of writing this software in Perl for
|
||||
`Movable Type`_ and almost all of this useful documentation. `Chad Miller`_
|
||||
ported it to Python to use with Pyblosxom_.
|
||||
|
||||
|
||||
Additional Credits
|
||||
==================
|
||||
|
||||
Portions of the SmartyPants original work are based on Brad Choate's nifty
|
||||
MTRegex plug-in. `Brad Choate`_ also contributed a few bits of source code to
|
||||
this plug-in. Brad Choate is a fine hacker indeed.
|
||||
|
||||
`Jeremy Hedley`_ and `Charles Wiltgen`_ deserve mention for exemplary beta
|
||||
testing of the original SmartyPants.
|
||||
|
||||
`Rael Dornfest`_ ported SmartyPants to Blosxom.
|
||||
|
||||
.. _Brad Choate: http://bradchoate.com/
|
||||
.. _Jeremy Hedley: http://antipixel.com/
|
||||
.. _Charles Wiltgen: http://playbacktime.com/
|
||||
.. _Rael Dornfest: http://raelity.org/
|
||||
|
||||
|
||||
Copyright and License
|
||||
=====================
|
||||
|
||||
SmartyPants_ license::
|
||||
|
||||
Copyright (c) 2003 John Gruber
|
||||
(https://daringfireball.net/)
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in
|
||||
the documentation and/or other materials provided with the
|
||||
distribution.
|
||||
|
||||
* Neither the name "SmartyPants" nor the names of its contributors
|
||||
may be used to endorse or promote products derived from this
|
||||
software without specific prior written permission.
|
||||
|
||||
This software is provided by the copyright holders and contributors "as
|
||||
is" and any express or implied warranties, including, but not limited
|
||||
to, the implied warranties of merchantability and fitness for a
|
||||
particular purpose are disclaimed. In no event shall the copyright
|
||||
owner or contributors be liable for any direct, indirect, incidental,
|
||||
special, exemplary, or consequential damages (including, but not
|
||||
limited to, procurement of substitute goods or services; loss of use,
|
||||
data, or profits; or business interruption) however caused and on any
|
||||
theory of liability, whether in contract, strict liability, or tort
|
||||
(including negligence or otherwise) arising in any way out of the use
|
||||
of this software, even if advised of the possibility of such damage.
|
||||
|
||||
|
||||
smartypants.py license::
|
||||
|
||||
smartypants.py is a derivative work of SmartyPants.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in
|
||||
the documentation and/or other materials provided with the
|
||||
distribution.
|
||||
|
||||
This software is provided by the copyright holders and contributors "as
|
||||
is" and any express or implied warranties, including, but not limited
|
||||
to, the implied warranties of merchantability and fitness for a
|
||||
particular purpose are disclaimed. In no event shall the copyright
|
||||
owner or contributors be liable for any direct, indirect, incidental,
|
||||
special, exemplary, or consequential damages (including, but not
|
||||
limited to, procurement of substitute goods or services; loss of use,
|
||||
data, or profits; or business interruption) however caused and on any
|
||||
theory of liability, whether in contract, strict liability, or tort
|
||||
(including negligence or otherwise) arising in any way out of the use
|
||||
of this software, even if advised of the possibility of such damage.
|
||||
|
||||
|
||||
|
||||
.. _John Gruber: https://daringfireball.net/
|
||||
.. _Chad Miller: http://web.chad.org/
|
||||
|
||||
.. _Pyblosxom: http://roughingit.subtlehints.net/pyblosxom
|
||||
.. _SmartyPants: https://daringfireball.net/projects/smartypants/
|
||||
.. _Movable Type: http://www.movabletype.org/
|
||||
|
||||
"""
|
||||
|
||||
import re
|
||||
|
||||
# style added by Kovid
|
||||
tags_to_skip_regex = re.compile(r"<(/)?(style|pre|code|kbd|script|math)[^>]*>", re.I)
|
||||
self_closing_regex = re.compile(r'/\s*>$')
|
||||
|
||||
|
||||
# interal functions below here
|
||||
|
||||
def parse_attr(attr):
|
||||
do_dashes = do_backticks = do_quotes = do_ellipses = do_stupefy = 0
|
||||
|
||||
if attr == "1":
|
||||
do_quotes = 1
|
||||
do_backticks = 1
|
||||
do_dashes = 1
|
||||
do_ellipses = 1
|
||||
elif attr == "2":
|
||||
# Do everything, turn all options on, use old school dash shorthand.
|
||||
do_quotes = 1
|
||||
do_backticks = 1
|
||||
do_dashes = 2
|
||||
do_ellipses = 1
|
||||
elif attr == "3":
|
||||
# Do everything, turn all options on, use inverted old school dash shorthand.
|
||||
do_quotes = 1
|
||||
do_backticks = 1
|
||||
do_dashes = 3
|
||||
do_ellipses = 1
|
||||
elif attr == "-1":
|
||||
# Special "stupefy" mode.
|
||||
do_stupefy = 1
|
||||
else:
|
||||
for c in attr:
|
||||
if c == "q":
|
||||
do_quotes = 1
|
||||
elif c == "b":
|
||||
do_backticks = 1
|
||||
elif c == "B":
|
||||
do_backticks = 2
|
||||
elif c == "d":
|
||||
do_dashes = 1
|
||||
elif c == "D":
|
||||
do_dashes = 2
|
||||
elif c == "i":
|
||||
do_dashes = 3
|
||||
elif c == "e":
|
||||
do_ellipses = 1
|
||||
else:
|
||||
pass
|
||||
# ignore unknown option
|
||||
return do_dashes, do_backticks, do_quotes, do_ellipses, do_stupefy
|
||||
|
||||
|
||||
def smartyPants(text, attr='1'):
|
||||
# Parse attributes:
|
||||
# 0 : do nothing
|
||||
# 1 : set all
|
||||
# 2 : set all, using old school en- and em- dash shortcuts
|
||||
# 3 : set all, using inverted old school en and em- dash shortcuts
|
||||
#
|
||||
# q : quotes
|
||||
# b : backtick quotes (``double'' only)
|
||||
# B : backtick quotes (``double'' and `single')
|
||||
# d : dashes
|
||||
# D : old school dashes
|
||||
# i : inverted old school dashes
|
||||
# e : ellipses
|
||||
|
||||
if attr == "0":
|
||||
# Do nothing.
|
||||
return text
|
||||
|
||||
do_dashes, do_backticks, do_quotes, do_ellipses, do_stupefy = parse_attr(attr)
|
||||
dashes_func = {1: educateDashes, 2: educateDashesOldSchool, 3: educateDashesOldSchoolInverted}.get(do_dashes, lambda x: x)
|
||||
backticks_func = {1: educateBackticks, 2: lambda x: educateSingleBackticks(educateBackticks(x))}.get(do_backticks, lambda x: x)
|
||||
ellipses_func = {1: educateEllipses}.get(do_ellipses, lambda x: x)
|
||||
stupefy_func = {1: stupefyEntities}.get(do_stupefy, lambda x: x)
|
||||
skipped_tag_stack = []
|
||||
tokens = _tokenize(text)
|
||||
result = []
|
||||
in_pre = False
|
||||
|
||||
prev_token_last_char = ""
|
||||
# This is a cheat, used to get some context
|
||||
# for one-character tokens that consist of
|
||||
# just a quote char. What we do is remember
|
||||
# the last character of the previous text
|
||||
# token, to use as context to curl single-
|
||||
# character quote tokens correctly.
|
||||
|
||||
for cur_token in tokens:
|
||||
if cur_token[0] == "tag":
|
||||
# Don't mess with quotes inside some tags. This does not handle self <closing/> tags!
|
||||
result.append(cur_token[1])
|
||||
skip_match = tags_to_skip_regex.match(cur_token[1])
|
||||
if skip_match is not None:
|
||||
is_self_closing = self_closing_regex.search(skip_match.group()) is not None
|
||||
if not is_self_closing:
|
||||
if not skip_match.group(1):
|
||||
skipped_tag_stack.append(skip_match.group(2).lower())
|
||||
in_pre = True
|
||||
else:
|
||||
if len(skipped_tag_stack) > 0:
|
||||
if skip_match.group(2).lower() == skipped_tag_stack[-1]:
|
||||
skipped_tag_stack.pop()
|
||||
else:
|
||||
pass
|
||||
# This close doesn't match the open. This isn't XHTML. We should barf here.
|
||||
if len(skipped_tag_stack) == 0:
|
||||
in_pre = False
|
||||
else:
|
||||
t = cur_token[1]
|
||||
last_char = t[-1:] # Remember last char of this token before processing.
|
||||
if not in_pre:
|
||||
t = processEscapes(t)
|
||||
|
||||
t = re.sub('"', '"', t)
|
||||
t = dashes_func(t)
|
||||
t = ellipses_func(t)
|
||||
# Note: backticks need to be processed before quotes.
|
||||
t = backticks_func(t)
|
||||
|
||||
if do_quotes != 0:
|
||||
if t == "'":
|
||||
# Special case: single-character ' token
|
||||
if re.match(r"\S", prev_token_last_char):
|
||||
t = "’"
|
||||
else:
|
||||
t = "‘"
|
||||
elif t == '"':
|
||||
# Special case: single-character " token
|
||||
if re.match(r"\S", prev_token_last_char):
|
||||
t = "”"
|
||||
else:
|
||||
t = "“"
|
||||
|
||||
else:
|
||||
# Normal case:
|
||||
t = educateQuotes(t)
|
||||
|
||||
t = stupefy_func(t)
|
||||
|
||||
prev_token_last_char = last_char
|
||||
result.append(t)
|
||||
|
||||
return "".join(result)
|
||||
|
||||
|
||||
def educateQuotes(text):
|
||||
"""
|
||||
Parameter: String.
|
||||
|
||||
Returns: The string, with "educated" curly quote HTML entities.
|
||||
|
||||
Example input: "Isn't this fun?"
|
||||
Example output: “Isn’t this fun?”
|
||||
"""
|
||||
|
||||
punct_class = r"""[!"#\$\%'()*+,-.\/:;<=>?\@\[\\\]\^_`{|}~]"""
|
||||
|
||||
# Special case if the very first character is a quote
|
||||
# followed by punctuation at a non-word-break. Close the quotes by brute force:
|
||||
text = re.sub(r"""^'(?=%s\\B)""" % (punct_class,), r"""’""", text)
|
||||
text = re.sub(r"""^"(?=%s\\B)""" % (punct_class,), r"""”""", text)
|
||||
|
||||
# Special case for double sets of quotes, e.g.:
|
||||
# <p>He said, "'Quoted' words in a larger quote."</p>
|
||||
text = re.sub(r""""'(?=\w)""", """“‘""", text)
|
||||
text = re.sub(r"""'"(?=\w)""", """‘“""", text)
|
||||
text = re.sub(r'''""(?=\w)''', """““""", text)
|
||||
text = re.sub(r"""''(?=\w)""", """‘‘""", text)
|
||||
text = re.sub(r'''\"\'''', """”’""", text)
|
||||
text = re.sub(r'''\'\"''', """’”""", text)
|
||||
text = re.sub(r'''""''', """””""", text)
|
||||
text = re.sub(r"""''""", """’’""", text)
|
||||
|
||||
# Special case for decade abbreviations (the '80s --> ’80s):
|
||||
# See http://practicaltypography.com/apostrophes.html
|
||||
text = re.sub(r"""(\W|^)'(?=\d{2}s)""", r"""\1’""", text)
|
||||
# Measurements in feet and inches or longitude/latitude: 19' 43.5" --> 19′ 43.5″
|
||||
text = re.sub(r'''(\W|^)([-0-9.]+\s*)'(\s*[-0-9.]+)"''', r'\1\2′\3″', text)
|
||||
|
||||
# Special case for Quotes at inside of other entities, e.g.:
|
||||
# <p>A double quote--"within dashes"--would be nice.</p>
|
||||
text = re.sub(r"""(?<=\W)"(?=\w)""", r"""“""", text)
|
||||
text = re.sub(r"""(?<=\W)'(?=\w)""", r"""‘""", text)
|
||||
text = re.sub(r"""(?<=\w)"(?=\W)""", r"""”""", text)
|
||||
text = re.sub(r"""(?<=\w)'(?=\W)""", r"""’""", text)
|
||||
|
||||
# The following are commented out as smartypants tokenizes text by
|
||||
# stripping out html tags. Therefore, there is no guarantee that the
|
||||
# start-of-line and end-ol-line regex operators will match anything
|
||||
# meaningful
|
||||
|
||||
# Special case for Quotes at end of line with a preceeding space (may change just to end of line)
|
||||
# text = re.sub(r"""(?<=\s)"$""", r"""”""", text)
|
||||
# text = re.sub(r"""(?<=\s)'$""", r"""’""", text)
|
||||
|
||||
# Special case for Quotes at beginning of line with a space - multiparagraph quoted text:
|
||||
# text = re.sub(r"""^"(?=\s)""", r"""“""", text)
|
||||
# text = re.sub(r"""^'(?=\s)""", r"""‘""", text)
|
||||
|
||||
close_class = r"""[^\ \t\r\n\[\{\(\-]"""
|
||||
dec_dashes = r"""–|—"""
|
||||
|
||||
# Get most opening single quotes:
|
||||
opening_single_quotes_regex = re.compile(r"""
|
||||
(
|
||||
\s | # a whitespace char, or
|
||||
| # a non-breaking space entity, or
|
||||
-- | # dashes, or
|
||||
&[mn]dash; | # named dash entities
|
||||
%s | # or decimal entities
|
||||
&\#x201[34]; # or hex
|
||||
)
|
||||
' # the quote
|
||||
(?=\w) # followed by a word character
|
||||
""" % (dec_dashes,), re.VERBOSE)
|
||||
text = opening_single_quotes_regex.sub(r"""\1‘""", text)
|
||||
|
||||
closing_single_quotes_regex = re.compile(r"""
|
||||
(%s)
|
||||
'
|
||||
(?!\s | s\b | \d)
|
||||
""" % (close_class,), re.VERBOSE)
|
||||
text = closing_single_quotes_regex.sub(r"""\1’""", text)
|
||||
|
||||
closing_single_quotes_regex = re.compile(r"""
|
||||
(%s)
|
||||
'
|
||||
(\s | s\b)
|
||||
""" % (close_class,), re.VERBOSE)
|
||||
text = closing_single_quotes_regex.sub(r"""\1’\2""", text)
|
||||
|
||||
# Any remaining single quotes should be opening ones:
|
||||
text = re.sub(r"""'""", r"""‘""", text)
|
||||
|
||||
# Get most opening double quotes:
|
||||
opening_double_quotes_regex = re.compile(r"""
|
||||
(
|
||||
\s | # a whitespace char, or
|
||||
| # a non-breaking space entity, or
|
||||
-- | # dashes, or
|
||||
&[mn]dash; | # named dash entities
|
||||
%s | # or decimal entities
|
||||
&\#x201[34]; # or hex
|
||||
)
|
||||
" # the quote
|
||||
(?=\w) # followed by a word character
|
||||
""" % (dec_dashes,), re.VERBOSE)
|
||||
text = opening_double_quotes_regex.sub(r"""\1“""", text)
|
||||
|
||||
# Double closing quotes:
|
||||
closing_double_quotes_regex = re.compile(r"""
|
||||
#(%s)? # character that indicates the quote should be closing
|
||||
"
|
||||
(?=\s)
|
||||
""" % (close_class,), re.VERBOSE)
|
||||
text = closing_double_quotes_regex.sub(r"""”""", text)
|
||||
|
||||
closing_double_quotes_regex = re.compile(r"""
|
||||
(%s) # character that indicates the quote should be closing
|
||||
"
|
||||
""" % (close_class,), re.VERBOSE)
|
||||
text = closing_double_quotes_regex.sub(r"""\1”""", text)
|
||||
|
||||
if text.endswith('-"'):
|
||||
# A string that endswith -" is sometimes used for dialogue
|
||||
text = text[:-1] + '”'
|
||||
|
||||
# Any remaining quotes should be opening ones.
|
||||
text = re.sub(r'"', r"""“""", text)
|
||||
|
||||
return text
|
||||
|
||||
|
||||
def educateBackticks(text):
|
||||
"""
|
||||
Parameter: String.
|
||||
Returns: The string, with ``backticks'' -style double quotes
|
||||
translated into HTML curly quote entities.
|
||||
Example input: ``Isn't this fun?''
|
||||
Example output: “Isn't this fun?”
|
||||
"""
|
||||
|
||||
text = re.sub(r"""``""", r"""“""", text)
|
||||
text = re.sub(r"""''""", r"""”""", text)
|
||||
return text
|
||||
|
||||
|
||||
def educateSingleBackticks(text):
|
||||
"""
|
||||
Parameter: String.
|
||||
Returns: The string, with `backticks' -style single quotes
|
||||
translated into HTML curly quote entities.
|
||||
|
||||
Example input: `Isn't this fun?'
|
||||
Example output: ‘Isn’t this fun?’
|
||||
"""
|
||||
|
||||
text = re.sub(r"""`""", r"""‘""", text)
|
||||
text = re.sub(r"""'""", r"""’""", text)
|
||||
return text
|
||||
|
||||
|
||||
def educateDashes(text):
|
||||
"""
|
||||
Parameter: String.
|
||||
|
||||
Returns: The string, with each instance of "--" translated to
|
||||
an em-dash HTML entity.
|
||||
"""
|
||||
|
||||
text = re.sub(r"""---""", r"""–""", text) # en (yes, backwards)
|
||||
text = re.sub(r"""--""", r"""—""", text) # em (yes, backwards)
|
||||
return text
|
||||
|
||||
|
||||
def educateDashesOldSchool(text):
|
||||
"""
|
||||
Parameter: String.
|
||||
|
||||
Returns: The string, with each instance of "--" translated to
|
||||
an en-dash HTML entity, and each "---" translated to
|
||||
an em-dash HTML entity.
|
||||
"""
|
||||
|
||||
text = re.sub(r"""---""", r"""—""", text) # em (yes, backwards)
|
||||
text = re.sub(r"""--""", r"""–""", text) # en (yes, backwards)
|
||||
return text
|
||||
|
||||
|
||||
def educateDashesOldSchoolInverted(text):
|
||||
"""
|
||||
Parameter: String.
|
||||
|
||||
Returns: The string, with each instance of "--" translated to
|
||||
an em-dash HTML entity, and each "---" translated to
|
||||
an en-dash HTML entity. Two reasons why: First, unlike the
|
||||
en- and em-dash syntax supported by
|
||||
EducateDashesOldSchool(), it's compatible with existing
|
||||
entries written before SmartyPants 1.1, back when "--" was
|
||||
only used for em-dashes. Second, em-dashes are more
|
||||
common than en-dashes, and so it sort of makes sense that
|
||||
the shortcut should be shorter to type. (Thanks to Aaron
|
||||
Swartz for the idea.)
|
||||
"""
|
||||
text = re.sub(r"""---""", r"""–""", text) # em
|
||||
text = re.sub(r"""--""", r"""—""", text) # en
|
||||
return text
|
||||
|
||||
|
||||
def educateEllipses(text):
|
||||
"""
|
||||
Parameter: String.
|
||||
Returns: The string, with each instance of "..." translated to
|
||||
an ellipsis HTML entity.
|
||||
|
||||
Example input: Huh...?
|
||||
Example output: Huh…?
|
||||
"""
|
||||
|
||||
text = re.sub(r"""\.\.\.""", r"""…""", text)
|
||||
text = re.sub(r"""\. \. \.""", r"""…""", text)
|
||||
return text
|
||||
|
||||
|
||||
def stupefyEntities(text):
|
||||
"""
|
||||
Parameter: String.
|
||||
Returns: The string, with each SmartyPants HTML entity translated to
|
||||
its ASCII counterpart.
|
||||
|
||||
Example input: “Hello — world.”
|
||||
Example output: "Hello -- world."
|
||||
"""
|
||||
|
||||
text = re.sub(r"""–""", r"""-""", text) # en-dash
|
||||
text = re.sub(r"""—""", r"""--""", text) # em-dash
|
||||
|
||||
text = re.sub(r"""‘""", r"""'""", text) # open single quote
|
||||
text = re.sub(r"""’""", r"""'""", text) # close single quote
|
||||
|
||||
text = re.sub(r"""“""", r'''"''', text) # open double quote
|
||||
text = re.sub(r"""”""", r'''"''', text) # close double quote
|
||||
|
||||
text = re.sub(r"""…""", r"""...""", text) # ellipsis
|
||||
|
||||
return text
|
||||
|
||||
|
||||
def processEscapes(text):
|
||||
r"""
|
||||
Parameter: String.
|
||||
Returns: The string, with after processing the following backslash
|
||||
escape sequences. This is useful if you want to force a "dumb"
|
||||
quote or other character to appear.
|
||||
|
||||
Escape Value
|
||||
------ -----
|
||||
\\ \
|
||||
\" "
|
||||
\' '
|
||||
\. .
|
||||
\- -
|
||||
\` `
|
||||
"""
|
||||
text = re.sub(r"""\\\\""", r"""\""", text)
|
||||
text = re.sub(r'''\\"''', r""""""", text)
|
||||
text = re.sub(r"""\\'""", r"""'""", text)
|
||||
text = re.sub(r"""\\\.""", r""".""", text)
|
||||
text = re.sub(r"""\\-""", r"""-""", text)
|
||||
text = re.sub(r"""\\`""", r"""`""", text)
|
||||
|
||||
return text
|
||||
|
||||
|
||||
def _tokenize(html):
|
||||
"""
|
||||
Parameter: String containing HTML markup.
|
||||
Returns: Reference to an array of the tokens comprising the input
|
||||
string. Each token is either a tag (possibly with nested,
|
||||
tags contained therein, such as <a href="<MTFoo>">, or a
|
||||
run of text between tags. Each element of the array is a
|
||||
two-element array; the first is either 'tag' or 'text';
|
||||
the second is the actual value.
|
||||
|
||||
Based on the _tokenize() subroutine from Brad Choate's MTRegex plugin.
|
||||
<http://www.bradchoate.com/past/mtregex.php>
|
||||
"""
|
||||
|
||||
tokens = []
|
||||
|
||||
# depth = 6
|
||||
# nested_tags = "|".join(['(?:<(?:[^<>]',] * depth) + (')*>)' * depth)
|
||||
# match = r"""(?: <! ( -- .*? -- \s* )+ > ) | # comments
|
||||
# (?: <\? .*? \?> ) | # directives
|
||||
# %s # nested tags """ % (nested_tags,)
|
||||
tag_soup = re.compile(r"""([^<]*)(<[^>]*>)""")
|
||||
|
||||
token_match = tag_soup.search(html)
|
||||
|
||||
previous_end = 0
|
||||
while token_match is not None:
|
||||
if token_match.group(1):
|
||||
tokens.append(['text', token_match.group(1)])
|
||||
|
||||
tokens.append(['tag', token_match.group(2)])
|
||||
|
||||
previous_end = token_match.end()
|
||||
token_match = tag_soup.search(html, token_match.end())
|
||||
|
||||
if previous_end < len(html):
|
||||
tokens.append(['text', html[previous_end:]])
|
||||
|
||||
return tokens
|
||||
|
||||
|
||||
def run_tests(return_tests=False):
|
||||
import unittest
|
||||
sp = smartyPants
|
||||
|
||||
class TestSmartypantsAllAttributes(unittest.TestCase):
|
||||
# the default attribute is "1", which means "all".
|
||||
|
||||
def test_dates(self):
|
||||
self.assertEqual(sp("one two '60s"), "one two ’60s")
|
||||
self.assertEqual(sp("1440-80's"), "1440-80’s")
|
||||
self.assertEqual(sp("1440-'80s"), "1440-’80s")
|
||||
self.assertEqual(sp("1440---'80s"), "1440–’80s")
|
||||
self.assertEqual(sp("1960s"), "1960s") # no effect.
|
||||
self.assertEqual(sp("1960's"), "1960’s")
|
||||
self.assertEqual(sp("one two '60s"), "one two ’60s")
|
||||
self.assertEqual(sp("'60s"), "’60s")
|
||||
|
||||
def test_measurements(self):
|
||||
ae = self.assertEqual
|
||||
ae(sp("one two 1.1'2.2\""), "one two 1.1′2.2″")
|
||||
ae(sp("1' 2\""), "1′ 2″")
|
||||
|
||||
def test_skip_tags(self):
|
||||
self.assertEqual(
|
||||
sp("""<script type="text/javascript">\n<!--\nvar href = "http://www.google.com";\nvar linktext = "google";\ndocument.write('<a href="' + href + '">' + linktext + "</a>");\n//-->\n</script>"""), # noqa
|
||||
"""<script type="text/javascript">\n<!--\nvar href = "http://www.google.com";\nvar linktext = "google";\ndocument.write('<a href="' + href + '">' + linktext + "</a>");\n//-->\n</script>""") # noqa
|
||||
self.assertEqual(
|
||||
sp("""<p>He said "Let's write some code." This code here <code>if True:\n\tprint "Okay"</code> is python code.</p>"""),
|
||||
"""<p>He said “Let’s write some code.” This code here <code>if True:\n\tprint "Okay"</code> is python code.</p>""") # noqa
|
||||
|
||||
self.assertEqual(
|
||||
sp('''<script/><p>It's ok</p>'''),
|
||||
'''<script/><p>It’s ok</p>''')
|
||||
|
||||
def test_ordinal_numbers(self):
|
||||
self.assertEqual(sp("21st century"), "21st century") # no effect.
|
||||
self.assertEqual(sp("3rd"), "3rd") # no effect.
|
||||
|
||||
def test_educated_quotes(self):
|
||||
self.assertEqual(sp('''"Isn't this fun?"'''), '''“Isn’t this fun?”''')
|
||||
|
||||
tests = unittest.defaultTestLoader.loadTestsFromTestCase(TestSmartypantsAllAttributes)
|
||||
if return_tests:
|
||||
return tests
|
||||
unittest.TextTestRunner(verbosity=4).run(tests)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
run_tests()
|
||||
205
ebook_converter/utils/speedups.py
Normal file
205
ebook_converter/utils/speedups.py
Normal file
@@ -0,0 +1,205 @@
|
||||
#!/usr/bin/env python2
|
||||
# vim:fileencoding=utf-8
|
||||
# License: GPLv3 Copyright: 2016, Kovid Goyal <kovid at kovidgoyal.net>
|
||||
|
||||
from __future__ import absolute_import, division, print_function, unicode_literals
|
||||
|
||||
import os
|
||||
from polyglot.builtins import range, unicode_type
|
||||
|
||||
|
||||
class ReadOnlyFileBuffer(object):
|
||||
|
||||
''' A zero copy implementation of a file like object. Uses memoryviews for efficiency. '''
|
||||
|
||||
def __init__(self, raw):
|
||||
self.sz, self.mv = len(raw), (raw if isinstance(raw, memoryview) else memoryview(raw))
|
||||
self.pos = 0
|
||||
|
||||
def tell(self):
|
||||
return self.pos
|
||||
|
||||
def read(self, n=None):
|
||||
if n is None:
|
||||
ans = self.mv[self.pos:]
|
||||
self.pos = self.sz
|
||||
return ans
|
||||
ans = self.mv[self.pos:self.pos+n]
|
||||
self.pos = min(self.pos + n, self.sz)
|
||||
return ans
|
||||
|
||||
def seek(self, pos, whence=os.SEEK_SET):
|
||||
if whence == os.SEEK_SET:
|
||||
self.pos = pos
|
||||
elif whence == os.SEEK_END:
|
||||
self.pos = self.sz + pos
|
||||
else:
|
||||
self.pos += pos
|
||||
self.pos = max(0, min(self.pos, self.sz))
|
||||
return self.pos
|
||||
|
||||
def getvalue(self):
|
||||
return self.mv
|
||||
|
||||
def close(self):
|
||||
pass
|
||||
|
||||
|
||||
def svg_path_to_painter_path(d):
|
||||
'''
|
||||
Convert a tiny SVG 1.2 path into a QPainterPath.
|
||||
|
||||
:param d: The value of the d attribute of an SVG <path> tag
|
||||
'''
|
||||
from PyQt5.Qt import QPainterPath
|
||||
cmd = last_cmd = b''
|
||||
path = QPainterPath()
|
||||
moveto_abs, moveto_rel = b'M', b'm'
|
||||
closepath1, closepath2 = b'Z', b'z'
|
||||
lineto_abs, lineto_rel = b'L', b'l'
|
||||
hline_abs, hline_rel = b'H', b'h'
|
||||
vline_abs, vline_rel = b'V', b'v'
|
||||
curveto_abs, curveto_rel = b'C', b'c'
|
||||
smoothcurveto_abs, smoothcurveto_rel = b'S', b's'
|
||||
quadcurveto_abs, quadcurveto_rel = b'Q', b'q'
|
||||
smoothquadcurveto_abs, smoothquadcurveto_rel = b'T', b't'
|
||||
|
||||
# Store the last parsed values
|
||||
# x/y = end position
|
||||
# x1/y1 and x2/y2 = bezier control points
|
||||
x = y = x1 = y1 = x2 = y2 = 0
|
||||
|
||||
if isinstance(d, unicode_type):
|
||||
d = d.encode('ascii')
|
||||
d = d.replace(b',', b' ').replace(b'\n', b' ')
|
||||
end = len(d)
|
||||
pos = [0]
|
||||
|
||||
def read_byte():
|
||||
p = pos[0]
|
||||
pos[0] += 1
|
||||
return d[p:p+1]
|
||||
|
||||
def parse_float():
|
||||
chars = []
|
||||
while pos[0] < end:
|
||||
c = read_byte()
|
||||
if c == b' ' and not chars:
|
||||
continue
|
||||
if c in b'-.0123456789':
|
||||
chars.append(c)
|
||||
else:
|
||||
break
|
||||
if not chars:
|
||||
raise ValueError('Premature end of input while expecting a number')
|
||||
return float(b''.join(chars))
|
||||
|
||||
def parse_floats(num, x_offset=0, y_offset=0):
|
||||
for i in range(num):
|
||||
val = parse_float()
|
||||
yield val + (x_offset if i % 2 == 0 else y_offset)
|
||||
|
||||
repeated_command = None
|
||||
|
||||
while pos[0] < end:
|
||||
last_cmd = cmd
|
||||
cmd = read_byte() if repeated_command is None else repeated_command
|
||||
repeated_command = None
|
||||
|
||||
if cmd == b' ':
|
||||
continue
|
||||
if cmd == moveto_abs:
|
||||
x, y = parse_float(), parse_float()
|
||||
path.moveTo(x, y)
|
||||
elif cmd == moveto_rel:
|
||||
x += parse_float()
|
||||
y += parse_float()
|
||||
path.moveTo(x, y)
|
||||
elif cmd == closepath1 or cmd == closepath2:
|
||||
path.closeSubpath()
|
||||
elif cmd == lineto_abs:
|
||||
x, y = parse_floats(2)
|
||||
path.lineTo(x, y)
|
||||
elif cmd == lineto_rel:
|
||||
x += parse_float()
|
||||
y += parse_float()
|
||||
path.lineTo(x, y)
|
||||
elif cmd == hline_abs:
|
||||
x = parse_float()
|
||||
path.lineTo(x, y)
|
||||
elif cmd == hline_rel:
|
||||
x += parse_float()
|
||||
path.lineTo(x, y)
|
||||
elif cmd == vline_abs:
|
||||
y = parse_float()
|
||||
path.lineTo(x, y)
|
||||
elif cmd == vline_rel:
|
||||
y += parse_float()
|
||||
path.lineTo(x, y)
|
||||
elif cmd == curveto_abs:
|
||||
x1, y1, x2, y2, x, y = parse_floats(6)
|
||||
path.cubicTo(x1, y1, x2, y2, x, y)
|
||||
elif cmd == curveto_rel:
|
||||
x1, y1, x2, y2, x, y = parse_floats(6, x, y)
|
||||
path.cubicTo(x1, y1, x2, y2, x, y)
|
||||
elif cmd == smoothcurveto_abs:
|
||||
if last_cmd == curveto_abs or last_cmd == curveto_rel or last_cmd == smoothcurveto_abs or last_cmd == smoothcurveto_rel:
|
||||
x1 = 2 * x - x2
|
||||
y1 = 2 * y - y2
|
||||
else:
|
||||
x1, y1 = x, y
|
||||
x2, y2, x, y = parse_floats(4)
|
||||
path.cubicTo(x1, y1, x2, y2, x, y)
|
||||
elif cmd == smoothcurveto_rel:
|
||||
if last_cmd == curveto_abs or last_cmd == curveto_rel or last_cmd == smoothcurveto_abs or last_cmd == smoothcurveto_rel:
|
||||
x1 = 2 * x - x2
|
||||
y1 = 2 * y - y2
|
||||
else:
|
||||
x1, y1 = x, y
|
||||
x2, y2, x, y = parse_floats(4, x, y)
|
||||
path.cubicTo(x1, y1, x2, y2, x, y)
|
||||
elif cmd == quadcurveto_abs:
|
||||
x1, y1, x, y = parse_floats(4)
|
||||
path.quadTo(x1, y1, x, y)
|
||||
elif cmd == quadcurveto_rel:
|
||||
x1, y1, x, y = parse_floats(4, x, y)
|
||||
path.quadTo(x1, y1, x, y)
|
||||
elif cmd == smoothquadcurveto_abs:
|
||||
if last_cmd in (quadcurveto_abs, quadcurveto_rel, smoothquadcurveto_abs, smoothquadcurveto_rel):
|
||||
x1 = 2 * x - x1
|
||||
y1 = 2 * y - y1
|
||||
else:
|
||||
x1, y1 = x, y
|
||||
x, y = parse_floats(2)
|
||||
path.quadTo(x1, y1, x, y)
|
||||
elif cmd == smoothquadcurveto_rel:
|
||||
if last_cmd in (quadcurveto_abs, quadcurveto_rel, smoothquadcurveto_abs, smoothquadcurveto_rel):
|
||||
x1 = 2 * x - x1
|
||||
y1 = 2 * y - y1
|
||||
else:
|
||||
x1, y1 = x, y
|
||||
x, y = parse_floats(2, x, y)
|
||||
path.quadTo(x1, y1, x, y)
|
||||
elif cmd in b'-.0123456789':
|
||||
# A new number begins
|
||||
# In this case, multiple parameters tuples are specified for the last command
|
||||
# We rewind to reparse data correctly
|
||||
pos[0] -= 1
|
||||
|
||||
# Handle extra parameters
|
||||
if last_cmd == moveto_abs:
|
||||
repeated_command = cmd = lineto_abs
|
||||
elif last_cmd == moveto_rel:
|
||||
repeated_command = cmd = lineto_rel
|
||||
elif last_cmd in (closepath1, closepath2):
|
||||
raise ValueError('Extra parameters after close path command')
|
||||
elif last_cmd in (
|
||||
lineto_abs, lineto_rel, hline_abs, hline_rel, vline_abs,
|
||||
vline_rel, curveto_abs, curveto_rel,smoothcurveto_abs,
|
||||
smoothcurveto_rel, quadcurveto_abs, quadcurveto_rel,
|
||||
smoothquadcurveto_abs, smoothquadcurveto_rel
|
||||
):
|
||||
repeated_command = cmd = last_cmd
|
||||
else:
|
||||
raise ValueError('Unknown path command: %s' % cmd)
|
||||
return path
|
||||
443
ebook_converter/utils/terminal.py
Normal file
443
ebook_converter/utils/terminal.py
Normal file
@@ -0,0 +1,443 @@
|
||||
#!/usr/bin/env python2
|
||||
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:fdm=marker:ai
|
||||
from __future__ import absolute_import, division, print_function, unicode_literals
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2012, Kovid Goyal <kovid at kovidgoyal.net>'
|
||||
__docformat__ = 'restructuredtext en'
|
||||
|
||||
import os, sys, re
|
||||
|
||||
from calibre.constants import iswindows, ispy3
|
||||
from polyglot.builtins import iteritems, range, zip, native_string_type
|
||||
|
||||
if iswindows:
|
||||
import ctypes.wintypes
|
||||
|
||||
class CONSOLE_SCREEN_BUFFER_INFO(ctypes.Structure):
|
||||
_fields_ = [
|
||||
('dwSize', ctypes.wintypes._COORD),
|
||||
('dwCursorPosition', ctypes.wintypes._COORD),
|
||||
('wAttributes', ctypes.wintypes.WORD),
|
||||
('srWindow', ctypes.wintypes._SMALL_RECT),
|
||||
('dwMaximumWindowSize', ctypes.wintypes._COORD)
|
||||
]
|
||||
|
||||
|
||||
def fmt(code):
|
||||
return '\033[%dm' % code
|
||||
|
||||
|
||||
RATTRIBUTES = dict(
|
||||
zip(range(1, 9), (
|
||||
'bold',
|
||||
'dark',
|
||||
'',
|
||||
'underline',
|
||||
'blink',
|
||||
'',
|
||||
'reverse',
|
||||
'concealed'
|
||||
)
|
||||
))
|
||||
ATTRIBUTES = {v:fmt(k) for k, v in iteritems(RATTRIBUTES)}
|
||||
del ATTRIBUTES['']
|
||||
|
||||
RBACKGROUNDS = dict(
|
||||
zip(range(41, 48), (
|
||||
'red',
|
||||
'green',
|
||||
'yellow',
|
||||
'blue',
|
||||
'magenta',
|
||||
'cyan',
|
||||
'white'
|
||||
),
|
||||
))
|
||||
BACKGROUNDS = {v:fmt(k) for k, v in iteritems(RBACKGROUNDS)}
|
||||
|
||||
RCOLORS = dict(
|
||||
zip(range(31, 38), (
|
||||
'red',
|
||||
'green',
|
||||
'yellow',
|
||||
'blue',
|
||||
'magenta',
|
||||
'cyan',
|
||||
'white',
|
||||
),
|
||||
))
|
||||
COLORS = {v:fmt(k) for k, v in iteritems(RCOLORS)}
|
||||
|
||||
RESET = fmt(0)
|
||||
|
||||
if iswindows:
|
||||
# From wincon.h
|
||||
WCOLORS = {c:i for i, c in enumerate((
|
||||
'black', 'blue', 'green', 'cyan', 'red', 'magenta', 'yellow', 'white'))}
|
||||
|
||||
def to_flag(fg, bg, bold):
|
||||
val = 0
|
||||
if bold:
|
||||
val |= 0x08
|
||||
if fg in WCOLORS:
|
||||
val |= WCOLORS[fg]
|
||||
if bg in WCOLORS:
|
||||
val |= (WCOLORS[bg] << 4)
|
||||
return val
|
||||
|
||||
|
||||
def colored(text, fg=None, bg=None, bold=False):
|
||||
prefix = []
|
||||
if fg is not None:
|
||||
prefix.append(COLORS[fg])
|
||||
if bg is not None:
|
||||
prefix.append(BACKGROUNDS[bg])
|
||||
if bold:
|
||||
prefix.append(ATTRIBUTES['bold'])
|
||||
prefix = ''.join(prefix)
|
||||
suffix = RESET
|
||||
if isinstance(text, bytes):
|
||||
prefix = prefix.encode('ascii')
|
||||
suffix = suffix.encode('ascii')
|
||||
return prefix + text + suffix
|
||||
|
||||
|
||||
class Detect(object):
|
||||
|
||||
def __init__(self, stream):
|
||||
self.stream = stream or sys.stdout
|
||||
self.isatty = getattr(self.stream, 'isatty', lambda : False)()
|
||||
force_ansi = 'CALIBRE_FORCE_ANSI' in os.environ
|
||||
if not self.isatty and force_ansi:
|
||||
self.isatty = True
|
||||
self.isansi = force_ansi or not iswindows
|
||||
self.set_console = self.write_console = None
|
||||
self.is_console = False
|
||||
if not self.isansi:
|
||||
try:
|
||||
import msvcrt
|
||||
self.msvcrt = msvcrt
|
||||
self.file_handle = msvcrt.get_osfhandle(self.stream.fileno())
|
||||
from ctypes import windll, wintypes, byref, POINTER, WinDLL
|
||||
mode = wintypes.DWORD(0)
|
||||
f = windll.kernel32.GetConsoleMode
|
||||
f.argtypes, f.restype = [wintypes.HANDLE, POINTER(wintypes.DWORD)], wintypes.BOOL
|
||||
if f(self.file_handle, byref(mode)):
|
||||
# Stream is a console
|
||||
self.set_console = windll.kernel32.SetConsoleTextAttribute
|
||||
self.default_console_text_attributes = WCOLORS['white']
|
||||
kernel32 = WinDLL(native_string_type('kernel32'), use_last_error=True)
|
||||
self.write_console = kernel32.WriteConsoleW
|
||||
self.write_console.argtypes = [wintypes.HANDLE, wintypes.c_wchar_p, wintypes.DWORD, POINTER(wintypes.DWORD), wintypes.LPVOID]
|
||||
self.write_console.restype = wintypes.BOOL
|
||||
kernel32.GetConsoleScreenBufferInfo.argtypes = [wintypes.HANDLE, ctypes.POINTER(CONSOLE_SCREEN_BUFFER_INFO)]
|
||||
kernel32.GetConsoleScreenBufferInfo.restype = wintypes.BOOL
|
||||
csbi = CONSOLE_SCREEN_BUFFER_INFO()
|
||||
if kernel32.GetConsoleScreenBufferInfo(self.file_handle, byref(csbi)):
|
||||
self.default_console_text_attributes = csbi.wAttributes
|
||||
self.is_console = True
|
||||
except:
|
||||
pass
|
||||
|
||||
def write_unicode_text(self, text, ignore_errors=False):
|
||||
' Windows only method that writes unicode strings correctly to the windows console using the Win32 API '
|
||||
if self.is_console:
|
||||
from ctypes import wintypes, byref, c_wchar_p
|
||||
written = wintypes.DWORD(0)
|
||||
text = text.replace('\0', '')
|
||||
chunk = len(text)
|
||||
while text:
|
||||
t, text = text[:chunk], text[chunk:]
|
||||
wt = c_wchar_p(t)
|
||||
if ispy3:
|
||||
text_len = len(t.encode('utf-16'))
|
||||
else:
|
||||
# Use the fact that len(t) == wcslen(wt) in python 2.7 on
|
||||
# windows where the python unicode type uses UTF-16
|
||||
text_len = len(t)
|
||||
if not self.write_console(self.file_handle, wt, text_len, byref(written), None):
|
||||
# Older versions of windows can fail to write large strings
|
||||
# to console with WriteConsoleW (seen it happen on Win XP)
|
||||
import winerror
|
||||
err = ctypes.get_last_error()
|
||||
if err == winerror.ERROR_NOT_ENOUGH_MEMORY and chunk >= 128:
|
||||
# Retry with a smaller chunk size (give up if chunk < 128)
|
||||
chunk = chunk // 2
|
||||
text = t + text
|
||||
continue
|
||||
if err == winerror.ERROR_GEN_FAILURE:
|
||||
# On newer windows, this happens when trying to write
|
||||
# non-ascii chars to the console and the console is set
|
||||
# to use raster fonts (the default). In this case
|
||||
# rather than failing, write an informative error
|
||||
# message and the asciized version of the text.
|
||||
print('Non-ASCII text detected. You must set your Console\'s font to'
|
||||
' Lucida Console or Consolas or some other TrueType font to see this text', file=self.stream, end=' -- ')
|
||||
from calibre.utils.filenames import ascii_text
|
||||
print(ascii_text(t + text), file=self.stream, end='')
|
||||
continue
|
||||
if not ignore_errors:
|
||||
raise ctypes.WinError(err)
|
||||
|
||||
|
||||
class ColoredStream(Detect):
|
||||
|
||||
def __init__(self, stream=None, fg=None, bg=None, bold=False):
|
||||
stream = getattr(stream, 'buffer', stream)
|
||||
Detect.__init__(self, stream)
|
||||
self.fg, self.bg, self.bold = fg, bg, bold
|
||||
if self.set_console is not None:
|
||||
self.wval = to_flag(self.fg, self.bg, bold)
|
||||
if not self.bg:
|
||||
self.wval |= self.default_console_text_attributes & 0xF0
|
||||
|
||||
def cwrite(self, what):
|
||||
if not isinstance(what, bytes):
|
||||
what = what.encode('ascii')
|
||||
self.stream.write(what)
|
||||
|
||||
def __enter__(self):
|
||||
if not self.isatty:
|
||||
return self
|
||||
if self.isansi:
|
||||
if self.bold:
|
||||
self.cwrite(ATTRIBUTES['bold'])
|
||||
if self.bg is not None:
|
||||
self.cwrite(BACKGROUNDS[self.bg])
|
||||
if self.fg is not None:
|
||||
self.cwrite(COLORS[self.fg])
|
||||
elif self.set_console is not None:
|
||||
if self.wval != 0:
|
||||
self.set_console(self.file_handle, self.wval)
|
||||
return self
|
||||
|
||||
def __exit__(self, *args, **kwargs):
|
||||
if not self.isatty:
|
||||
return
|
||||
if not self.fg and not self.bg and not self.bold:
|
||||
return
|
||||
if self.isansi:
|
||||
self.cwrite(RESET)
|
||||
self.stream.flush()
|
||||
elif self.set_console is not None:
|
||||
self.set_console(self.file_handle, self.default_console_text_attributes)
|
||||
|
||||
|
||||
class ANSIStream(Detect):
|
||||
|
||||
ANSI_RE = r'\033\[((?:\d|;)*)([a-zA-Z])'
|
||||
|
||||
def __init__(self, stream=None):
|
||||
super(ANSIStream, self).__init__(stream)
|
||||
self.encoding = getattr(self.stream, 'encoding', 'utf-8') or 'utf-8'
|
||||
self.stream_takes_unicode = hasattr(self.stream, 'buffer')
|
||||
self.last_state = (None, None, False)
|
||||
self._ansi_re_bin = self._ansi_re_unicode = None
|
||||
|
||||
def ansi_re(self, binary=False):
|
||||
attr = '_ansi_re_bin' if binary else '_ansi_re_unicode'
|
||||
ans = getattr(self, attr)
|
||||
if ans is None:
|
||||
expr = self.ANSI_RE
|
||||
if binary:
|
||||
expr = expr.encode('ascii')
|
||||
ans = re.compile(expr)
|
||||
setattr(self, attr, ans)
|
||||
return ans
|
||||
|
||||
def write(self, text):
|
||||
if not self.isatty:
|
||||
return self.strip_and_write(text)
|
||||
|
||||
if self.isansi:
|
||||
return self.stream.write(text)
|
||||
|
||||
if not self.isansi and self.set_console is None:
|
||||
return self.strip_and_write(text)
|
||||
|
||||
self.write_and_convert(text)
|
||||
|
||||
def polyglot_write(self, text):
|
||||
binary = isinstance(text, bytes)
|
||||
stream = self.stream
|
||||
if self.stream_takes_unicode:
|
||||
if binary:
|
||||
stream = self.stream.buffer
|
||||
else:
|
||||
if not binary:
|
||||
text = text.encode(self.encoding, 'replace')
|
||||
stream.write(text)
|
||||
|
||||
def strip_and_write(self, text):
|
||||
binary = isinstance(text, bytes)
|
||||
pat = self.ansi_re(binary)
|
||||
repl = b'' if binary else ''
|
||||
self.polyglot_write(pat.sub(repl, text))
|
||||
|
||||
def write_and_convert(self, text):
|
||||
'''
|
||||
Write the given text to our wrapped stream, stripping any ANSI
|
||||
sequences from the text, and optionally converting them into win32
|
||||
calls.
|
||||
'''
|
||||
cursor = 0
|
||||
binary = isinstance(text, bytes)
|
||||
for match in self.ansi_re(binary).finditer(text):
|
||||
start, end = match.span()
|
||||
self.write_plain_text(text, cursor, start)
|
||||
self.convert_ansi(*match.groups())
|
||||
cursor = end
|
||||
self.write_plain_text(text, cursor, len(text))
|
||||
self.set_console(self.file_handle, self.default_console_text_attributes)
|
||||
self.stream.flush()
|
||||
|
||||
def write_plain_text(self, text, start, end):
|
||||
if start < end:
|
||||
text = text[start:end]
|
||||
if self.is_console and isinstance(text, bytes):
|
||||
try:
|
||||
utext = text.decode(self.encoding)
|
||||
except ValueError:
|
||||
pass
|
||||
else:
|
||||
return self.write_unicode_text(utext)
|
||||
self.polyglot_write(text)
|
||||
|
||||
def convert_ansi(self, paramstring, command):
|
||||
if isinstance(paramstring, bytes):
|
||||
paramstring = paramstring.decode('ascii', 'replace')
|
||||
if isinstance(command, bytes):
|
||||
command = command.decode('ascii', 'replace')
|
||||
params = self.extract_params(paramstring)
|
||||
self.call_win32(command, params)
|
||||
|
||||
def extract_params(self, paramstring):
|
||||
def split(paramstring):
|
||||
for p in paramstring.split(';'):
|
||||
if p:
|
||||
yield int(p)
|
||||
return tuple(split(paramstring))
|
||||
|
||||
def call_win32(self, command, params):
|
||||
if command != 'm':
|
||||
return
|
||||
fg, bg, bold = self.last_state
|
||||
|
||||
for param in params:
|
||||
if param in RCOLORS:
|
||||
fg = RCOLORS[param]
|
||||
elif param in RBACKGROUNDS:
|
||||
bg = RBACKGROUNDS[param]
|
||||
elif param == 1:
|
||||
bold = True
|
||||
elif param == 0:
|
||||
fg, bg, bold = None, None, False
|
||||
|
||||
self.last_state = (fg, bg, bold)
|
||||
if fg or bg or bold:
|
||||
val = to_flag(fg, bg, bold)
|
||||
if not bg:
|
||||
val |= self.default_console_text_attributes & 0xF0
|
||||
self.set_console(self.file_handle, val)
|
||||
else:
|
||||
self.set_console(self.file_handle, self.default_console_text_attributes)
|
||||
|
||||
|
||||
def windows_terminfo():
|
||||
from ctypes import Structure, byref
|
||||
from ctypes.wintypes import SHORT, WORD
|
||||
|
||||
class COORD(Structure):
|
||||
|
||||
"""struct in wincon.h"""
|
||||
_fields_ = [
|
||||
('X', SHORT),
|
||||
('Y', SHORT),
|
||||
]
|
||||
|
||||
class SMALL_RECT(Structure):
|
||||
|
||||
"""struct in wincon.h."""
|
||||
_fields_ = [
|
||||
("Left", SHORT),
|
||||
("Top", SHORT),
|
||||
("Right", SHORT),
|
||||
("Bottom", SHORT),
|
||||
]
|
||||
|
||||
class CONSOLE_SCREEN_BUFFER_INFO(Structure):
|
||||
|
||||
"""struct in wincon.h."""
|
||||
_fields_ = [
|
||||
("dwSize", COORD),
|
||||
("dwCursorPosition", COORD),
|
||||
("wAttributes", WORD),
|
||||
("srWindow", SMALL_RECT),
|
||||
("dwMaximumWindowSize", COORD),
|
||||
]
|
||||
csbi = CONSOLE_SCREEN_BUFFER_INFO()
|
||||
import msvcrt
|
||||
file_handle = msvcrt.get_osfhandle(sys.stdout.fileno())
|
||||
from ctypes import windll
|
||||
success = windll.kernel32.GetConsoleScreenBufferInfo(file_handle,
|
||||
byref(csbi))
|
||||
if not success:
|
||||
raise Exception('stdout is not a console?')
|
||||
return csbi
|
||||
|
||||
|
||||
def get_term_geometry():
|
||||
import fcntl, termios, struct
|
||||
|
||||
def ioctl_GWINSZ(fd):
|
||||
try:
|
||||
return struct.unpack(b'HHHH', fcntl.ioctl(fd, termios.TIOCGWINSZ, b'\0'*8))[:2]
|
||||
except Exception:
|
||||
return None, None
|
||||
|
||||
for f in (sys.stdin, sys.stdout, sys.stderr):
|
||||
lines, cols = ioctl_GWINSZ(f.fileno())
|
||||
if lines is not None:
|
||||
return lines, cols
|
||||
try:
|
||||
fd = os.open(os.ctermid(), os.O_RDONLY)
|
||||
try:
|
||||
lines, cols = ioctl_GWINSZ(fd)
|
||||
if lines is not None:
|
||||
return lines, cols
|
||||
finally:
|
||||
os.close(fd)
|
||||
except Exception:
|
||||
pass
|
||||
return None, None
|
||||
|
||||
|
||||
def geometry():
|
||||
if iswindows:
|
||||
try:
|
||||
|
||||
ti = windows_terminfo()
|
||||
return (ti.dwSize.X or 80, ti.dwSize.Y or 25)
|
||||
except:
|
||||
return 80, 25
|
||||
else:
|
||||
try:
|
||||
lines, cols = get_term_geometry()
|
||||
if lines is not None:
|
||||
return cols, lines
|
||||
except Exception:
|
||||
pass
|
||||
return 80, 25
|
||||
|
||||
|
||||
def test():
|
||||
s = ANSIStream()
|
||||
|
||||
text = [colored(t, fg=t)+'. '+colored(t, fg=t, bold=True)+'.' for t in
|
||||
('red', 'yellow', 'green', 'white', 'cyan', 'magenta', 'blue',)]
|
||||
s.write('\n'.join(text))
|
||||
u = u'\u041c\u0438\u0445\u0430\u0438\u043b fällen'
|
||||
print()
|
||||
s.write_unicode_text(u)
|
||||
print()
|
||||
109
ebook_converter/utils/titlecase.py
Normal file
109
ebook_converter/utils/titlecase.py
Normal file
@@ -0,0 +1,109 @@
|
||||
#!/usr/bin/env python2
|
||||
# -*- coding: utf-8 -*-
|
||||
from __future__ import absolute_import, division, print_function, unicode_literals
|
||||
|
||||
"""
|
||||
Original Perl version by: John Gruber https://daringfireball.net/ 10 May 2008
|
||||
Python version by Stuart Colville http://muffinresearch.co.uk
|
||||
Modifications to make it work with non-ascii chars by Kovid Goyal
|
||||
License: http://www.opensource.org/licenses/mit-license.php
|
||||
"""
|
||||
|
||||
import re
|
||||
|
||||
from calibre.utils.icu import capitalize, upper
|
||||
from polyglot.builtins import unicode_type
|
||||
|
||||
__all__ = ['titlecase']
|
||||
__version__ = '0.5'
|
||||
|
||||
SMALL = 'a|an|and|as|at|but|by|en|for|if|in|of|on|or|the|to|v\\.?|via|vs\\.?'
|
||||
PUNCT = r"""!"#$%&'‘’()*+,\-‒–—―./:;?@[\\\]_`{|}~"""
|
||||
|
||||
SMALL_WORDS = re.compile(r'^(%s)$' % SMALL, re.I)
|
||||
INLINE_PERIOD = re.compile(r'[a-z][.][a-z]', re.I)
|
||||
UC_ELSEWHERE = re.compile(r'[%s]*?[a-zA-Z]+[A-Z]+?' % PUNCT)
|
||||
CAPFIRST = re.compile(unicode_type(r"^[%s]*?(\w)" % PUNCT), flags=re.UNICODE)
|
||||
SMALL_FIRST = re.compile(r'^([%s]*)(%s)\b' % (PUNCT, SMALL), re.I|re.U)
|
||||
SMALL_LAST = re.compile(r'\b(%s)[%s]?$' % (SMALL, PUNCT), re.I|re.U)
|
||||
SMALL_AFTER_NUM = re.compile(r'(\d+\s+)(a|an|the)\b', re.I|re.U)
|
||||
SUBPHRASE = re.compile(r'([:.;?!][ ])(%s)' % SMALL)
|
||||
APOS_SECOND = re.compile(r"^[dol]{1}['‘]{1}[a-z]+$", re.I)
|
||||
UC_INITIALS = re.compile(r"^(?:[A-Z]{1}\.{1}|[A-Z]{1}\.{1}[A-Z]{1})+$")
|
||||
|
||||
_lang = None
|
||||
|
||||
|
||||
def lang():
|
||||
global _lang
|
||||
if _lang is None:
|
||||
from calibre.utils.localization import get_lang
|
||||
_lang = get_lang().lower()
|
||||
return _lang
|
||||
|
||||
|
||||
def titlecase(text):
|
||||
"""
|
||||
Titlecases input text
|
||||
|
||||
This filter changes all words to Title Caps, and attempts to be clever
|
||||
about *un*capitalizing SMALL words like a/an/the in the input.
|
||||
|
||||
The list of "SMALL words" which are not capped comes from
|
||||
the New York Times Manual of Style, plus 'vs' and 'v'.
|
||||
|
||||
"""
|
||||
|
||||
all_caps = upper(text) == text
|
||||
|
||||
pat = re.compile(r'(\s+)')
|
||||
line = []
|
||||
for word in pat.split(text):
|
||||
if not word:
|
||||
continue
|
||||
if pat.match(word) is not None:
|
||||
line.append(word)
|
||||
continue
|
||||
if all_caps:
|
||||
if UC_INITIALS.match(word):
|
||||
line.append(word)
|
||||
continue
|
||||
else:
|
||||
word = icu_lower(word)
|
||||
|
||||
if APOS_SECOND.match(word):
|
||||
word = word.replace(word[0], icu_upper(word[0]), 1)
|
||||
word = word[:2] + icu_upper(word[2]) + word[3:]
|
||||
line.append(word)
|
||||
continue
|
||||
if INLINE_PERIOD.search(word) or UC_ELSEWHERE.match(word):
|
||||
line.append(word)
|
||||
continue
|
||||
if SMALL_WORDS.match(word):
|
||||
line.append(icu_lower(word))
|
||||
continue
|
||||
|
||||
hyphenated = []
|
||||
for item in word.split('-'):
|
||||
hyphenated.append(CAPFIRST.sub(lambda m: icu_upper(m.group(0)), item))
|
||||
line.append("-".join(hyphenated))
|
||||
|
||||
result = "".join(line)
|
||||
|
||||
result = SMALL_FIRST.sub(lambda m: '%s%s' % (
|
||||
m.group(1),
|
||||
capitalize(m.group(2))
|
||||
), result)
|
||||
|
||||
result = SMALL_AFTER_NUM.sub(lambda m: '%s%s' % (m.group(1),
|
||||
capitalize(m.group(2))
|
||||
), result)
|
||||
|
||||
result = SMALL_LAST.sub(lambda m: capitalize(m.group(0)), result)
|
||||
|
||||
result = SUBPHRASE.sub(lambda m: '%s%s' % (
|
||||
m.group(1),
|
||||
capitalize(m.group(2))
|
||||
), result)
|
||||
|
||||
return result
|
||||
95
ebook_converter/utils/wordcount.py
Normal file
95
ebook_converter/utils/wordcount.py
Normal file
@@ -0,0 +1,95 @@
|
||||
#!/usr/bin/python2
|
||||
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
|
||||
|
||||
from __future__ import absolute_import, division, print_function, unicode_literals
|
||||
|
||||
"""
|
||||
Get word, character, and Asian character counts
|
||||
|
||||
1. Get a word count as a dictionary:
|
||||
wc = get_wordcount(text)
|
||||
words = wc['words'] # etc.
|
||||
|
||||
2. Get a word count as an object
|
||||
wc = get_wordcount_obj(text)
|
||||
words = wc.words # etc.
|
||||
|
||||
properties counted:
|
||||
* characters
|
||||
* chars_no_spaces
|
||||
* asian_chars
|
||||
* non_asian_words
|
||||
* words
|
||||
|
||||
Sourced from:
|
||||
http://ginstrom.com/scribbles/2008/05/17/counting-words-etc-in-an-html-file-with-python/
|
||||
http://ginstrom.com/scribbles/2007/10/06/counting-words-characters-and-asian-characters-with-python/
|
||||
"""
|
||||
__version__ = 0.1
|
||||
__author__ = "Ryan Ginstrom"
|
||||
|
||||
IDEOGRAPHIC_SPACE = 0x3000
|
||||
|
||||
|
||||
def is_asian(char):
|
||||
"""Is the character Asian?"""
|
||||
|
||||
# 0x3000 is ideographic space (i.e. double-byte space)
|
||||
# Anything over is an Asian character
|
||||
return ord(char) > IDEOGRAPHIC_SPACE
|
||||
|
||||
|
||||
def filter_jchars(c):
|
||||
"""Filters Asian characters to spaces"""
|
||||
if is_asian(c):
|
||||
return ' '
|
||||
return c
|
||||
|
||||
|
||||
def nonj_len(word):
|
||||
"""Returns number of non-Asian words in {word}
|
||||
- 日本語AアジアンB -> 2
|
||||
- hello -> 1
|
||||
@param word: A word, possibly containing Asian characters
|
||||
"""
|
||||
# Here are the steps:
|
||||
# 本spam日eggs
|
||||
# -> [' ', 's', 'p', 'a', 'm', ' ', 'e', 'g', 'g', 's']
|
||||
# -> ' spam eggs'
|
||||
# -> ['spam', 'eggs']
|
||||
# The length of which is 2!
|
||||
chars = [filter_jchars(c) for c in word]
|
||||
return len(''.join(chars).split())
|
||||
|
||||
|
||||
def get_wordcount(text):
|
||||
"""Get the word/character count for text
|
||||
|
||||
@param text: The text of the segment
|
||||
"""
|
||||
|
||||
characters = len(text)
|
||||
chars_no_spaces = sum(not x.isspace() for x in text)
|
||||
asian_chars = sum(is_asian(x) for x in text)
|
||||
non_asian_words = nonj_len(text)
|
||||
words = non_asian_words + asian_chars
|
||||
|
||||
return dict(characters=characters,
|
||||
chars_no_spaces=chars_no_spaces,
|
||||
asian_chars=asian_chars,
|
||||
non_asian_words=non_asian_words,
|
||||
words=words)
|
||||
|
||||
|
||||
def dict2obj(dictionary):
|
||||
"""Transform a dictionary into an object"""
|
||||
class Obj(object):
|
||||
|
||||
def __init__(self, dictionary):
|
||||
self.__dict__.update(dictionary)
|
||||
return Obj(dictionary)
|
||||
|
||||
|
||||
def get_wordcount_obj(text):
|
||||
"""Get the wordcount as an object rather than a dictionary"""
|
||||
return dict2obj(get_wordcount(text))
|
||||
67
ebook_converter/utils/xml_parse.py
Normal file
67
ebook_converter/utils/xml_parse.py
Normal file
@@ -0,0 +1,67 @@
|
||||
#!/usr/bin/env python2
|
||||
# vim:fileencoding=utf-8
|
||||
# License: GPL v3 Copyright: 2019, Kovid Goyal <kovid at kovidgoyal.net>
|
||||
|
||||
from __future__ import absolute_import, division, print_function, unicode_literals
|
||||
|
||||
from lxml import etree
|
||||
|
||||
# resolving of SYSTEM entities is turned off as entities can cause
|
||||
# reads of local files, for example:
|
||||
# <!DOCTYPE foo [ <!ENTITY passwd SYSTEM "file:///etc/passwd" >]>
|
||||
|
||||
fs = etree.fromstring
|
||||
|
||||
|
||||
class Resolver(etree.Resolver):
|
||||
|
||||
def resolve(self, url, id, context):
|
||||
return self.resolve_string('', context)
|
||||
|
||||
|
||||
def create_parser(recover):
|
||||
parser = etree.XMLParser(recover=recover, no_network=True)
|
||||
parser.resolvers.add(Resolver())
|
||||
return parser
|
||||
|
||||
|
||||
def safe_xml_fromstring(string_or_bytes, recover=True):
|
||||
return fs(string_or_bytes, parser=create_parser(recover))
|
||||
|
||||
|
||||
def find_tests():
|
||||
import unittest, tempfile, os
|
||||
|
||||
class TestXMLParse(unittest.TestCase):
|
||||
|
||||
def setUp(self):
|
||||
with tempfile.NamedTemporaryFile(delete=False) as tf:
|
||||
tf.write(b'external')
|
||||
self.temp_file = tf.name
|
||||
|
||||
def tearDown(self):
|
||||
os.remove(self.temp_file)
|
||||
|
||||
def test_safe_xml_fromstring(self):
|
||||
templ = '''<!DOCTYPE foo [ <!ENTITY e {id} "{val}" > ]><r>&e;</r>'''
|
||||
external = 'file:///' + self.temp_file.replace(os.sep, '/')
|
||||
self.assertEqual(etree.fromstring(templ.format(id='SYSTEM', val=external)).text, 'external')
|
||||
for eid, val, expected in (
|
||||
('', 'normal entity', 'normal entity'),
|
||||
('', external, external),
|
||||
|
||||
('SYSTEM', external, None),
|
||||
('SYSTEM', 'http://example.com', None),
|
||||
|
||||
('PUBLIC', external, None),
|
||||
('PUBLIC', 'http://example.com', None),
|
||||
):
|
||||
got = getattr(safe_xml_fromstring(templ.format(id=eid, val=val)), 'text', None)
|
||||
self.assertEqual(got, expected)
|
||||
|
||||
return unittest.defaultTestLoader.loadTestsFromTestCase(TestXMLParse)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
from calibre.utils.run_tests import run_tests
|
||||
run_tests(find_tests)
|
||||
1694
ebook_converter/utils/zipfile.py
Normal file
1694
ebook_converter/utils/zipfile.py
Normal file
File diff suppressed because it is too large
Load Diff
Reference in New Issue
Block a user