1
0
mirror of https://github.com/gryf/ebook-converter.git synced 2026-02-22 18:15:49 +01:00

Initial import

This commit is contained in:
2020-03-31 17:15:23 +02:00
commit d97ea9b0bc
311 changed files with 131419 additions and 0 deletions

View File

@@ -0,0 +1,56 @@
#!/usr/bin/env python2
from __future__ import absolute_import, division, print_function, unicode_literals
__license__ = 'GPL v3'
__copyright__ = '2008, Kovid Goyal kovid@kovidgoyal.net'
__docformat__ = 'restructuredtext en'
'''
Miscelleaneous utilities.
'''
from time import time
from polyglot.builtins import as_bytes
def join_with_timeout(q, timeout=2):
''' Join the queue q with a specified timeout. Blocks until all tasks on
the queue are done or times out with a runtime error. '''
q.all_tasks_done.acquire()
try:
endtime = time() + timeout
while q.unfinished_tasks:
remaining = endtime - time()
if remaining <= 0.0:
raise RuntimeError('Waiting for queue to clear timed out')
q.all_tasks_done.wait(remaining)
finally:
q.all_tasks_done.release()
def unpickle_binary_string(data):
# Maintains compatibility with python's pickle module protocol version 2
import struct
PROTO, SHORT_BINSTRING, BINSTRING = b'\x80', b'U', b'T'
if data.startswith(PROTO + b'\x02'):
offset = 2
which = data[offset:offset+1]
offset += 1
if which == BINSTRING:
sz, = struct.unpack_from('<i', data, offset)
offset += struct.calcsize('<i')
elif which == SHORT_BINSTRING:
sz = ord(data[offset:offset+1])
offset += 1
else:
return
return data[offset:offset + sz]
def pickle_binary_string(data):
# Maintains compatibility with python's pickle module protocol version 2
import struct
PROTO, STOP, BINSTRING = b'\x80', b'.', b'T'
data = as_bytes(data)
return PROTO + b'\x02' + BINSTRING + struct.pack(b'<i', len(data)) + data + STOP

View File

@@ -0,0 +1,98 @@
#!/usr/bin/env python2
# vim:fileencoding=utf-8
# License: GPLv3 Copyright: 2010, Kovid Goyal <kovid at kovidgoyal.net>
from __future__ import absolute_import, division, print_function, unicode_literals
import re
from polyglot.builtins import codepoint_to_chr, map, range, filter
from polyglot.html_entities import name2codepoint
from calibre.constants import plugins, preferred_encoding
_ncxc = plugins['speedup'][0].clean_xml_chars
def native_clean_xml_chars(x):
if isinstance(x, bytes):
x = x.decode(preferred_encoding)
return _ncxc(x)
def ascii_pat(for_binary=False):
attr = 'binary' if for_binary else 'text'
ans = getattr(ascii_pat, attr, None)
if ans is None:
chars = set(range(32)) - {9, 10, 13}
chars.add(127)
pat = '|'.join(map(codepoint_to_chr, chars))
if for_binary:
pat = pat.encode('ascii')
ans = re.compile(pat)
setattr(ascii_pat, attr, ans)
return ans
def clean_ascii_chars(txt, charlist=None):
r'''
Remove ASCII control chars.
This is all control chars except \t, \n and \r
'''
is_binary = isinstance(txt, bytes)
empty = b'' if is_binary else ''
if not txt:
return empty
if charlist is None:
pat = ascii_pat(is_binary)
else:
pat = '|'.join(map(codepoint_to_chr, charlist))
if is_binary:
pat = pat.encode('utf-8')
return pat.sub(empty, txt)
def allowed(x):
x = ord(x)
return (x != 127 and (31 < x < 0xd7ff or x in (9, 10, 13))) or (0xe000 < x < 0xfffd) or (0x10000 < x < 0x10ffff)
def py_clean_xml_chars(unicode_string):
return ''.join(filter(allowed, unicode_string))
clean_xml_chars = native_clean_xml_chars or py_clean_xml_chars
def test_clean_xml_chars():
raw = 'asd\x02a\U00010437x\ud801b\udffe\ud802'
if native_clean_xml_chars(raw) != 'asda\U00010437xb':
raise ValueError('Failed to XML clean: %r' % raw)
# Fredrik Lundh: http://effbot.org/zone/re-sub.htm#unescape-html
# Removes HTML or XML character references and entities from a text string.
#
# @param text The HTML (or XML) source text.
# @return The plain text, as a Unicode string, if necessary.
def unescape(text, rm=False, rchar=''):
def fixup(m, rm=rm, rchar=rchar):
text = m.group(0)
if text[:2] == "&#":
# character reference
try:
if text[:3] == "&#x":
return codepoint_to_chr(int(text[3:-1], 16))
else:
return codepoint_to_chr(int(text[2:-1]))
except ValueError:
pass
else:
# named entity
try:
text = codepoint_to_chr(name2codepoint[text[1:-1]])
except KeyError:
pass
if rm:
return rchar # replace by char
return text # leave as is
return re.sub("&#?\\w+;", fixup, text)

View File

@@ -0,0 +1,464 @@
from __future__ import absolute_import, division, print_function, unicode_literals
__license__ = 'GPL v3'
__copyright__ = '2008, Kovid Goyal kovid@kovidgoyal.net'
__docformat__ = 'restructuredtext en'
'''
Manage application-wide preferences.
'''
import optparse
import os
from copy import deepcopy
from calibre.constants import (
CONFIG_DIR_MODE, __appname__, __author__, config_dir, get_version, iswindows
)
from calibre.utils.config_base import (
Config, ConfigInterface, ConfigProxy, Option, OptionSet, OptionValues,
StringConfig, json_dumps, json_loads, make_config_dir, plugin_dir, prefs,
tweaks, from_json, to_json
)
from calibre.utils.lock import ExclusiveFile
from polyglot.builtins import string_or_bytes, native_string_type
# optparse uses gettext.gettext instead of _ from builtins, so we
# monkey patch it.
optparse._ = _
if False:
# Make pyflakes happy
Config, ConfigProxy, Option, OptionValues, StringConfig, OptionSet,
ConfigInterface, tweaks, plugin_dir, prefs, from_json, to_json
def check_config_write_access():
return os.access(config_dir, os.W_OK) and os.access(config_dir, os.X_OK)
class CustomHelpFormatter(optparse.IndentedHelpFormatter):
def format_usage(self, usage):
from calibre.utils.terminal import colored
parts = usage.split(' ')
if parts:
parts[0] = colored(parts[0], fg='yellow', bold=True)
usage = ' '.join(parts)
return colored(_('Usage'), fg='blue', bold=True) + ': ' + usage
def format_heading(self, heading):
from calibre.utils.terminal import colored
return "%*s%s:\n" % (self.current_indent, '',
colored(heading, fg='blue', bold=True))
def format_option(self, option):
import textwrap
from calibre.utils.terminal import colored
result = []
opts = self.option_strings[option]
opt_width = self.help_position - self.current_indent - 2
if len(opts) > opt_width:
opts = "%*s%s\n" % (self.current_indent, "",
colored(opts, fg='green'))
indent_first = self.help_position
else: # start help on same line as opts
opts = "%*s%-*s " % (self.current_indent, "", opt_width +
len(colored('', fg='green')), colored(opts, fg='green'))
indent_first = 0
result.append(opts)
if option.help:
help_text = self.expand_default(option).split('\n')
help_lines = []
for line in help_text:
help_lines.extend(textwrap.wrap(line, self.help_width))
result.append("%*s%s\n" % (indent_first, "", help_lines[0]))
result.extend(["%*s%s\n" % (self.help_position, "", line)
for line in help_lines[1:]])
elif opts[-1] != "\n":
result.append("\n")
return "".join(result)+'\n'
class OptionParser(optparse.OptionParser):
def __init__(self,
usage='%prog [options] filename',
version=None,
epilog=None,
gui_mode=False,
conflict_handler='resolve',
**kwds):
import textwrap
from calibre.utils.terminal import colored
usage = textwrap.dedent(usage)
if epilog is None:
epilog = _('Created by ')+colored(__author__, fg='cyan')
usage += '\n\n'+_('''Whenever you pass arguments to %prog that have spaces in them, '''
'''enclose the arguments in quotation marks. For example: "{}"''').format(
"C:\\some path with spaces" if iswindows else '/some path/with spaces') +'\n'
if version is None:
version = '%%prog (%s %s)'%(__appname__, get_version())
optparse.OptionParser.__init__(self, usage=usage, version=version, epilog=epilog,
formatter=CustomHelpFormatter(),
conflict_handler=conflict_handler, **kwds)
self.gui_mode = gui_mode
if False:
# Translatable string from optparse
_("Options")
_("show this help message and exit")
_("show program's version number and exit")
def print_usage(self, file=None):
from calibre.utils.terminal import ANSIStream
s = ANSIStream(file)
optparse.OptionParser.print_usage(self, file=s)
def print_help(self, file=None):
from calibre.utils.terminal import ANSIStream
s = ANSIStream(file)
optparse.OptionParser.print_help(self, file=s)
def print_version(self, file=None):
from calibre.utils.terminal import ANSIStream
s = ANSIStream(file)
optparse.OptionParser.print_version(self, file=s)
def error(self, msg):
if self.gui_mode:
raise Exception(msg)
optparse.OptionParser.error(self, msg)
def merge(self, parser):
'''
Add options from parser to self. In case of conflicts, conflicting options from
parser are skipped.
'''
opts = list(parser.option_list)
groups = list(parser.option_groups)
def merge_options(options, container):
for opt in deepcopy(options):
if not self.has_option(opt.get_opt_string()):
container.add_option(opt)
merge_options(opts, self)
for group in groups:
g = self.add_option_group(group.title)
merge_options(group.option_list, g)
def subsume(self, group_name, msg=''):
'''
Move all existing options into a subgroup named
C{group_name} with description C{msg}.
'''
opts = [opt for opt in self.options_iter() if opt.get_opt_string() not in ('--version', '--help')]
self.option_groups = []
subgroup = self.add_option_group(group_name, msg)
for opt in opts:
self.remove_option(opt.get_opt_string())
subgroup.add_option(opt)
def options_iter(self):
for opt in self.option_list:
if native_string_type(opt).strip():
yield opt
for gr in self.option_groups:
for opt in gr.option_list:
if native_string_type(opt).strip():
yield opt
def option_by_dest(self, dest):
for opt in self.options_iter():
if opt.dest == dest:
return opt
def merge_options(self, lower, upper):
'''
Merge options in lower and upper option lists into upper.
Default values in upper are overridden by
non default values in lower.
'''
for dest in lower.__dict__.keys():
if dest not in upper.__dict__:
continue
opt = self.option_by_dest(dest)
if lower.__dict__[dest] != opt.default and \
upper.__dict__[dest] == opt.default:
upper.__dict__[dest] = lower.__dict__[dest]
def add_option_group(self, *args, **kwargs):
if isinstance(args[0], string_or_bytes):
args = list(args)
args[0] = native_string_type(args[0])
return optparse.OptionParser.add_option_group(self, *args, **kwargs)
class DynamicConfig(dict):
'''
A replacement for QSettings that supports dynamic config keys.
Returns `None` if a config key is not found. Note that the config
data is stored in a JSON file.
'''
def __init__(self, name='dynamic'):
dict.__init__(self, {})
self.name = name
self.defaults = {}
self.refresh()
@property
def file_path(self):
return os.path.join(config_dir, self.name+'.pickle.json')
def decouple(self, prefix):
self.name = prefix + self.name
self.refresh()
def read_old_serialized_representation(self):
from calibre.utils.shared_file import share_open
from calibre.utils.serialize import pickle_loads
path = self.file_path.rpartition('.')[0]
try:
with share_open(path, 'rb') as f:
raw = f.read()
except EnvironmentError:
raw = b''
try:
d = pickle_loads(raw).copy()
except Exception:
d = {}
return d
def refresh(self, clear_current=True):
d = {}
migrate = False
if clear_current:
self.clear()
if os.path.exists(self.file_path):
with ExclusiveFile(self.file_path) as f:
raw = f.read()
if raw:
try:
d = json_loads(raw)
except Exception as err:
print('Failed to de-serialize JSON representation of stored dynamic data for {} with error: {}'.format(
self.name, err))
else:
d = self.read_old_serialized_representation()
migrate = bool(d)
else:
d = self.read_old_serialized_representation()
migrate = bool(d)
if migrate and d:
raw = json_dumps(d, ignore_unserializable=True)
with ExclusiveFile(self.file_path) as f:
f.seek(0), f.truncate()
f.write(raw)
self.update(d)
def __getitem__(self, key):
try:
return dict.__getitem__(self, key)
except KeyError:
return self.defaults.get(key, None)
def get(self, key, default=None):
try:
return dict.__getitem__(self, key)
except KeyError:
return self.defaults.get(key, default)
def __setitem__(self, key, val):
dict.__setitem__(self, key, val)
self.commit()
def set(self, key, val):
self.__setitem__(key, val)
def commit(self):
if not getattr(self, 'name', None):
return
if not os.path.exists(self.file_path):
make_config_dir()
raw = json_dumps(self)
with ExclusiveFile(self.file_path) as f:
f.seek(0)
f.truncate()
f.write(raw)
dynamic = DynamicConfig()
class XMLConfig(dict):
'''
Similar to :class:`DynamicConfig`, except that it uses an XML storage
backend instead of a pickle file.
See `https://docs.python.org/dev/library/plistlib.html`_ for the supported
data types.
'''
EXTENSION = '.plist'
def __init__(self, rel_path_to_cf_file, base_path=config_dir):
dict.__init__(self)
self.no_commit = False
self.defaults = {}
self.file_path = os.path.join(base_path,
*(rel_path_to_cf_file.split('/')))
self.file_path = os.path.abspath(self.file_path)
if not self.file_path.endswith(self.EXTENSION):
self.file_path += self.EXTENSION
self.refresh()
def mtime(self):
try:
return os.path.getmtime(self.file_path)
except EnvironmentError:
return 0
def touch(self):
try:
os.utime(self.file_path, None)
except EnvironmentError:
pass
def raw_to_object(self, raw):
from polyglot.plistlib import loads
return loads(raw)
def to_raw(self):
from polyglot.plistlib import dumps
return dumps(self)
def decouple(self, prefix):
self.file_path = os.path.join(os.path.dirname(self.file_path), prefix + os.path.basename(self.file_path))
self.refresh()
def refresh(self, clear_current=True):
d = {}
if os.path.exists(self.file_path):
with ExclusiveFile(self.file_path) as f:
raw = f.read()
try:
d = self.raw_to_object(raw) if raw.strip() else {}
except SystemError:
pass
except:
import traceback
traceback.print_exc()
d = {}
if clear_current:
self.clear()
self.update(d)
def __getitem__(self, key):
from polyglot.plistlib import Data
try:
ans = dict.__getitem__(self, key)
if isinstance(ans, Data):
ans = ans.data
return ans
except KeyError:
return self.defaults.get(key, None)
def get(self, key, default=None):
from polyglot.plistlib import Data
try:
ans = dict.__getitem__(self, key)
if isinstance(ans, Data):
ans = ans.data
return ans
except KeyError:
return self.defaults.get(key, default)
def __setitem__(self, key, val):
from polyglot.plistlib import Data
if isinstance(val, bytes):
val = Data(val)
dict.__setitem__(self, key, val)
self.commit()
def set(self, key, val):
self.__setitem__(key, val)
def __delitem__(self, key):
try:
dict.__delitem__(self, key)
except KeyError:
pass # ignore missing keys
else:
self.commit()
def commit(self):
if self.no_commit:
return
if hasattr(self, 'file_path') and self.file_path:
dpath = os.path.dirname(self.file_path)
if not os.path.exists(dpath):
os.makedirs(dpath, mode=CONFIG_DIR_MODE)
with ExclusiveFile(self.file_path) as f:
raw = self.to_raw()
f.seek(0)
f.truncate()
f.write(raw)
def __enter__(self):
self.no_commit = True
def __exit__(self, *args):
self.no_commit = False
self.commit()
class JSONConfig(XMLConfig):
EXTENSION = '.json'
def raw_to_object(self, raw):
return json_loads(raw)
def to_raw(self):
return json_dumps(self)
def __getitem__(self, key):
try:
return dict.__getitem__(self, key)
except KeyError:
return self.defaults[key]
def get(self, key, default=None):
try:
return dict.__getitem__(self, key)
except KeyError:
return self.defaults.get(key, default)
def __setitem__(self, key, val):
dict.__setitem__(self, key, val)
self.commit()
class DevicePrefs:
def __init__(self, global_prefs):
self.global_prefs = global_prefs
self.overrides = {}
def set_overrides(self, **kwargs):
self.overrides = kwargs.copy()
def __getitem__(self, key):
return self.overrides.get(key, self.global_prefs[key])
device_prefs = DevicePrefs(prefs)

View File

@@ -0,0 +1,674 @@
#!/usr/bin/env python2
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
from __future__ import absolute_import, division, print_function, unicode_literals
__license__ = 'GPL v3'
__copyright__ = '2011, Kovid Goyal <kovid@kovidgoyal.net>'
__docformat__ = 'restructuredtext en'
import os, re, traceback, numbers
from functools import partial
from collections import defaultdict
from copy import deepcopy
from calibre.utils.lock import ExclusiveFile
from calibre.constants import config_dir, CONFIG_DIR_MODE, ispy3, preferred_encoding, filesystem_encoding, iswindows
from polyglot.builtins import unicode_type, iteritems, map
plugin_dir = os.path.join(config_dir, 'plugins')
def parse_old_style(src):
if ispy3:
import pickle as cPickle
else:
import cPickle
options = {'cPickle':cPickle}
try:
if not isinstance(src, unicode_type):
src = src.decode('utf-8')
src = src.replace('PyQt%d.QtCore' % 4, 'PyQt5.QtCore')
src = re.sub(r'cPickle\.loads\(([\'"])', r'cPickle.loads(b\1', src)
exec(src, options)
except Exception as err:
try:
print('Failed to parse old style options string with error: {}'.format(err))
except Exception:
pass
return options
def to_json(obj):
import datetime
if isinstance(obj, bytearray):
from base64 import standard_b64encode
return {'__class__': 'bytearray',
'__value__': standard_b64encode(bytes(obj)).decode('ascii')}
if isinstance(obj, datetime.datetime):
from calibre.utils.date import isoformat
return {'__class__': 'datetime.datetime',
'__value__': isoformat(obj, as_utc=True)}
if isinstance(obj, (set, frozenset)):
return {'__class__': 'set', '__value__': tuple(obj)}
if isinstance(obj, bytes):
return obj.decode('utf-8')
if hasattr(obj, 'toBase64'): # QByteArray
return {'__class__': 'bytearray',
'__value__': bytes(obj.toBase64()).decode('ascii')}
raise TypeError(repr(obj) + ' is not JSON serializable')
def safe_to_json(obj):
try:
return to_json(obj)
except Exception:
pass
def from_json(obj):
custom = obj.get('__class__')
if custom is not None:
if custom == 'bytearray':
from base64 import standard_b64decode
return bytearray(standard_b64decode(obj['__value__'].encode('ascii')))
if custom == 'datetime.datetime':
from calibre.utils.iso8601 import parse_iso8601
return parse_iso8601(obj['__value__'], assume_utc=True)
if custom == 'set':
return set(obj['__value__'])
return obj
def force_unicode(x):
try:
return x.decode('mbcs' if iswindows else preferred_encoding)
except UnicodeDecodeError:
try:
return x.decode(filesystem_encoding)
except UnicodeDecodeError:
return x.decode('utf-8', 'replace')
def force_unicode_recursive(obj):
if isinstance(obj, bytes):
return force_unicode(obj)
if isinstance(obj, (list, tuple)):
return type(obj)(map(force_unicode_recursive, obj))
if isinstance(obj, dict):
return {force_unicode_recursive(k): force_unicode_recursive(v) for k, v in iteritems(obj)}
return obj
def json_dumps(obj, ignore_unserializable=False):
import json
try:
ans = json.dumps(obj, indent=2, default=safe_to_json if ignore_unserializable else to_json, sort_keys=True, ensure_ascii=False)
except UnicodeDecodeError:
obj = force_unicode_recursive(obj)
ans = json.dumps(obj, indent=2, default=safe_to_json if ignore_unserializable else to_json, sort_keys=True, ensure_ascii=False)
if not isinstance(ans, bytes):
ans = ans.encode('utf-8')
return ans
def json_loads(raw):
import json
if isinstance(raw, bytes):
raw = raw.decode('utf-8')
return json.loads(raw, object_hook=from_json)
def make_config_dir():
if not os.path.exists(plugin_dir):
os.makedirs(plugin_dir, mode=CONFIG_DIR_MODE)
class Option(object):
def __init__(self, name, switches=[], help='', type=None, choices=None,
check=None, group=None, default=None, action=None, metavar=None):
if choices:
type = 'choice'
self.name = name
self.switches = switches
self.help = help.replace('%default', repr(default)) if help else None
self.type = type
if self.type is None and action is None and choices is None:
if isinstance(default, float):
self.type = 'float'
elif isinstance(default, numbers.Integral) and not isinstance(default, bool):
self.type = 'int'
self.choices = choices
self.check = check
self.group = group
self.default = default
self.action = action
self.metavar = metavar
def __eq__(self, other):
return self.name == getattr(other, 'name', other)
def __repr__(self):
return 'Option: '+self.name
def __str__(self):
return repr(self)
class OptionValues(object):
def copy(self):
return deepcopy(self)
class OptionSet(object):
OVERRIDE_PAT = re.compile(r'#{3,100} Override Options #{15}(.*?)#{3,100} End Override #{3,100}',
re.DOTALL|re.IGNORECASE)
def __init__(self, description=''):
self.description = description
self.defaults = {}
self.preferences = []
self.group_list = []
self.groups = {}
self.set_buffer = {}
self.loads_pat = None
def has_option(self, name_or_option_object):
if name_or_option_object in self.preferences:
return True
for p in self.preferences:
if p.name == name_or_option_object:
return True
return False
def get_option(self, name_or_option_object):
idx = self.preferences.index(name_or_option_object)
if idx > -1:
return self.preferences[idx]
for p in self.preferences:
if p.name == name_or_option_object:
return p
def add_group(self, name, description=''):
if name in self.group_list:
raise ValueError('A group by the name %s already exists in this set'%name)
self.groups[name] = description
self.group_list.append(name)
return partial(self.add_opt, group=name)
def update(self, other):
for name in other.groups.keys():
self.groups[name] = other.groups[name]
if name not in self.group_list:
self.group_list.append(name)
for pref in other.preferences:
if pref in self.preferences:
self.preferences.remove(pref)
self.preferences.append(pref)
def smart_update(self, opts1, opts2):
'''
Updates the preference values in opts1 using only the non-default preference values in opts2.
'''
for pref in self.preferences:
new = getattr(opts2, pref.name, pref.default)
if new != pref.default:
setattr(opts1, pref.name, new)
def remove_opt(self, name):
if name in self.preferences:
self.preferences.remove(name)
def add_opt(self, name, switches=[], help=None, type=None, choices=None,
group=None, default=None, action=None, metavar=None):
'''
Add an option to this section.
:param name: The name of this option. Must be a valid Python identifier.
Must also be unique in this OptionSet and all its subsets.
:param switches: List of command line switches for this option
(as supplied to :module:`optparse`). If empty, this
option will not be added to the command line parser.
:param help: Help text.
:param type: Type checking of option values. Supported types are:
`None, 'choice', 'complex', 'float', 'int', 'string'`.
:param choices: List of strings or `None`.
:param group: Group this option belongs to. You must previously
have created this group with a call to :method:`add_group`.
:param default: The default value for this option.
:param action: The action to pass to optparse. Supported values are:
`None, 'count'`. For choices and boolean options,
action is automatically set correctly.
'''
pref = Option(name, switches=switches, help=help, type=type, choices=choices,
group=group, default=default, action=action, metavar=None)
if group is not None and group not in self.groups.keys():
raise ValueError('Group %s has not been added to this section'%group)
if pref in self.preferences:
raise ValueError('An option with the name %s already exists in this set.'%name)
self.preferences.append(pref)
self.defaults[name] = default
def retranslate_help(self):
t = _
for opt in self.preferences:
if opt.help:
opt.help = t(opt.help)
if opt.name == 'use_primary_find_in_search':
opt.help = opt.help.format(u'ñ')
def option_parser(self, user_defaults=None, usage='', gui_mode=False):
from calibre.utils.config import OptionParser
parser = OptionParser(usage, gui_mode=gui_mode)
groups = defaultdict(lambda : parser)
for group, desc in self.groups.items():
groups[group] = parser.add_option_group(group.upper(), desc)
for pref in self.preferences:
if not pref.switches:
continue
g = groups[pref.group]
action = pref.action
if action is None:
action = 'store'
if pref.default is True or pref.default is False:
action = 'store_' + ('false' if pref.default else 'true')
args = dict(
dest=pref.name,
help=pref.help,
metavar=pref.metavar,
type=pref.type,
choices=pref.choices,
default=getattr(user_defaults, pref.name, pref.default),
action=action,
)
g.add_option(*pref.switches, **args)
return parser
def get_override_section(self, src):
match = self.OVERRIDE_PAT.search(src)
if match:
return match.group()
return ''
def parse_string(self, src):
options = {}
if src:
is_old_style = (isinstance(src, bytes) and src.startswith(b'#')) or (isinstance(src, unicode_type) and src.startswith(u'#'))
if is_old_style:
options = parse_old_style(src)
else:
try:
options = json_loads(src)
if not isinstance(options, dict):
raise Exception('options is not a dictionary')
except Exception as err:
try:
print('Failed to parse options string with error: {}'.format(err))
except Exception:
pass
opts = OptionValues()
for pref in self.preferences:
val = options.get(pref.name, pref.default)
formatter = __builtins__.get(pref.type, None)
if callable(formatter):
val = formatter(val)
setattr(opts, pref.name, val)
return opts
def serialize(self, opts, ignore_unserializable=False):
data = {pref.name: getattr(opts, pref.name, pref.default) for pref in self.preferences}
return json_dumps(data, ignore_unserializable=ignore_unserializable)
class ConfigInterface(object):
def __init__(self, description):
self.option_set = OptionSet(description=description)
self.add_opt = self.option_set.add_opt
self.add_group = self.option_set.add_group
self.remove_opt = self.remove = self.option_set.remove_opt
self.parse_string = self.option_set.parse_string
self.get_option = self.option_set.get_option
self.preferences = self.option_set.preferences
def update(self, other):
self.option_set.update(other.option_set)
def option_parser(self, usage='', gui_mode=False):
return self.option_set.option_parser(user_defaults=self.parse(),
usage=usage, gui_mode=gui_mode)
def smart_update(self, opts1, opts2):
self.option_set.smart_update(opts1, opts2)
class Config(ConfigInterface):
'''
A file based configuration.
'''
def __init__(self, basename, description=''):
ConfigInterface.__init__(self, description)
self.filename_base = basename
@property
def config_file_path(self):
return os.path.join(config_dir, self.filename_base + '.py.json')
def parse(self):
src = ''
migrate = False
path = self.config_file_path
if os.path.exists(path):
with ExclusiveFile(path) as f:
try:
src = f.read().decode('utf-8')
except ValueError:
print("Failed to parse", path)
traceback.print_exc()
if not src:
path = path.rpartition('.')[0]
from calibre.utils.shared_file import share_open
try:
with share_open(path, 'rb') as f:
src = f.read().decode('utf-8')
except Exception:
pass
else:
migrate = bool(src)
ans = self.option_set.parse_string(src)
if migrate:
new_src = self.option_set.serialize(ans, ignore_unserializable=True)
with ExclusiveFile(self.config_file_path) as f:
f.seek(0), f.truncate()
f.write(new_src)
return ans
def set(self, name, val):
if not self.option_set.has_option(name):
raise ValueError('The option %s is not defined.'%name)
if not os.path.exists(config_dir):
make_config_dir()
with ExclusiveFile(self.config_file_path) as f:
src = f.read()
opts = self.option_set.parse_string(src)
setattr(opts, name, val)
src = self.option_set.serialize(opts)
f.seek(0)
f.truncate()
if isinstance(src, unicode_type):
src = src.encode('utf-8')
f.write(src)
class StringConfig(ConfigInterface):
'''
A string based configuration
'''
def __init__(self, src, description=''):
ConfigInterface.__init__(self, description)
self.set_src(src)
def set_src(self, src):
self.src = src
if isinstance(self.src, bytes):
self.src = self.src.decode('utf-8')
def parse(self):
return self.option_set.parse_string(self.src)
def set(self, name, val):
if not self.option_set.has_option(name):
raise ValueError('The option %s is not defined.'%name)
opts = self.option_set.parse_string(self.src)
setattr(opts, name, val)
self.set_src(self.option_set.serialize(opts))
class ConfigProxy(object):
'''
A Proxy to minimize file reads for widely used config settings
'''
def __init__(self, config):
self.__config = config
self.__opts = None
@property
def defaults(self):
return self.__config.option_set.defaults
def refresh(self):
self.__opts = self.__config.parse()
def retranslate_help(self):
self.__config.option_set.retranslate_help()
def __getitem__(self, key):
return self.get(key)
def __setitem__(self, key, val):
return self.set(key, val)
def __delitem__(self, key):
self.set(key, self.defaults[key])
def get(self, key):
if self.__opts is None:
self.refresh()
return getattr(self.__opts, key)
def set(self, key, val):
if self.__opts is None:
self.refresh()
setattr(self.__opts, key, val)
return self.__config.set(key, val)
def help(self, key):
return self.__config.get_option(key).help
def create_global_prefs(conf_obj=None):
c = Config('global', 'calibre wide preferences') if conf_obj is None else conf_obj
c.add_opt('database_path',
default=os.path.expanduser('~/library1.db'),
help=_('Path to the database in which books are stored'))
c.add_opt('filename_pattern', default=u'(?P<title>.+) - (?P<author>[^_]+)',
help=_('Pattern to guess metadata from filenames'))
c.add_opt('isbndb_com_key', default='',
help=_('Access key for isbndb.com'))
c.add_opt('network_timeout', default=5,
help=_('Default timeout for network operations (seconds)'))
c.add_opt('library_path', default=None,
help=_('Path to directory in which your library of books is stored'))
c.add_opt('language', default=None,
help=_('The language in which to display the user interface'))
c.add_opt('output_format', default='EPUB',
help=_('The default output format for e-book conversions. When auto-converting'
' to send to a device this can be overridden by individual device preferences.'
' These can be changed by right clicking the device icon in calibre and'
' choosing "Configure".'))
c.add_opt('input_format_order', default=['EPUB', 'AZW3', 'MOBI', 'LIT', 'PRC',
'FB2', 'HTML', 'HTM', 'XHTM', 'SHTML', 'XHTML', 'ZIP', 'DOCX', 'ODT', 'RTF', 'PDF',
'TXT'],
help=_('Ordered list of formats to prefer for input.'))
c.add_opt('read_file_metadata', default=True,
help=_('Read metadata from files'))
c.add_opt('worker_process_priority', default='normal',
help=_('The priority of worker processes. A higher priority '
'means they run faster and consume more resources. '
'Most tasks like conversion/news download/adding books/etc. '
'are affected by this setting.'))
c.add_opt('swap_author_names', default=False,
help=_('Swap author first and last names when reading metadata'))
c.add_opt('add_formats_to_existing', default=False,
help=_('Add new formats to existing book records'))
c.add_opt('check_for_dupes_on_ctl', default=False,
help=_('Check for duplicates when copying to another library'))
c.add_opt('installation_uuid', default=None, help='Installation UUID')
c.add_opt('new_book_tags', default=[], help=_('Tags to apply to books added to the library'))
c.add_opt('mark_new_books', default=False, help=_(
'Mark newly added books. The mark is a temporary mark that is automatically removed when calibre is restarted.'))
# these are here instead of the gui preferences because calibredb and
# calibre server can execute searches
c.add_opt('saved_searches', default={}, help=_('List of named saved searches'))
c.add_opt('user_categories', default={}, help=_('User-created Tag browser categories'))
c.add_opt('manage_device_metadata', default='manual',
help=_('How and when calibre updates metadata on the device.'))
c.add_opt('limit_search_columns', default=False,
help=_('When searching for text without using lookup '
'prefixes, as for example, Red instead of title:Red, '
'limit the columns searched to those named below.'))
c.add_opt('limit_search_columns_to',
default=['title', 'authors', 'tags', 'series', 'publisher'],
help=_('Choose columns to be searched when not using prefixes, '
'as for example, when searching for Red instead of '
'title:Red. Enter a list of search/lookup names '
'separated by commas. Only takes effect if you set the option '
'to limit search columns above.'))
c.add_opt('use_primary_find_in_search', default=True,
help=_(u'Characters typed in the search box will match their '
'accented versions, based on the language you have chosen '
'for the calibre interface. For example, in '
u'English, searching for n will match both {} and n, but if '
'your language is Spanish it will only match n. Note that '
'this is much slower than a simple search on very large '
'libraries. Also, this option will have no effect if you turn '
'on case-sensitive searching'))
c.add_opt('case_sensitive', default=False, help=_(
'Make searches case-sensitive'))
c.add_opt('migrated', default=False, help='For Internal use. Don\'t modify.')
return c
prefs = ConfigProxy(create_global_prefs())
if prefs['installation_uuid'] is None:
import uuid
prefs['installation_uuid'] = unicode_type(uuid.uuid4())
# Read tweaks
def tweaks_file():
return os.path.join(config_dir, 'tweaks.json')
def make_unicode(obj):
if isinstance(obj, bytes):
try:
return obj.decode('utf-8')
except UnicodeDecodeError:
return obj.decode(preferred_encoding, errors='replace')
if isinstance(obj, (list, tuple)):
return list(map(make_unicode, obj))
if isinstance(obj, dict):
return {make_unicode(k): make_unicode(v) for k, v in iteritems(obj)}
return obj
def normalize_tweak(val):
if isinstance(val, (list, tuple)):
return tuple(map(normalize_tweak, val))
if isinstance(val, dict):
return {k: normalize_tweak(v) for k, v in iteritems(val)}
return val
def write_custom_tweaks(tweaks_dict):
make_config_dir()
tweaks_dict = make_unicode(tweaks_dict)
changed_tweaks = {}
default_tweaks = exec_tweaks(default_tweaks_raw())
for key, cval in iteritems(tweaks_dict):
if key in default_tweaks and normalize_tweak(cval) == normalize_tweak(default_tweaks[key]):
continue
changed_tweaks[key] = cval
raw = json_dumps(changed_tweaks)
with open(tweaks_file(), 'wb') as f:
f.write(raw)
def exec_tweaks(path):
if isinstance(path, bytes):
raw = path
fname = '<string>'
else:
with open(path, 'rb') as f:
raw = f.read()
fname = f.name
code = compile(raw, fname, 'exec')
l = {}
g = {'__file__': fname}
exec(code, g, l)
return l
def read_custom_tweaks():
make_config_dir()
tf = tweaks_file()
ans = {}
if os.path.exists(tf):
with open(tf, 'rb') as f:
raw = f.read()
raw = raw.strip()
if not raw:
return ans
try:
return json_loads(raw)
except Exception:
import traceback
traceback.print_exc()
return ans
old_tweaks_file = tf.rpartition('.')[0] + '.py'
if os.path.exists(old_tweaks_file):
ans = exec_tweaks(old_tweaks_file)
ans = make_unicode(ans)
write_custom_tweaks(ans)
return ans
def default_tweaks_raw():
return P('default_tweaks.py', data=True, allow_user_override=False)
def read_tweaks():
default_tweaks = exec_tweaks(default_tweaks_raw())
try:
custom_tweaks = read_custom_tweaks()
except Exception:
custom_tweaks = {}
default_tweaks.update(custom_tweaks)
return default_tweaks
tweaks = read_tweaks()
def reset_tweaks_to_default():
default_tweaks = exec_tweaks(default_tweaks_raw())
tweaks.clear()
tweaks.update(default_tweaks)
class Tweak(object):
def __init__(self, name, value):
self.name, self.value = name, value
def __enter__(self):
self.origval = tweaks[self.name]
tweaks[self.name] = self.value
def __exit__(self, *args):
tweaks[self.name] = self.origval

View File

@@ -0,0 +1,485 @@
#!/usr/bin/env python2
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
from __future__ import absolute_import, division, print_function, unicode_literals
__license__ = 'GPL v3'
__copyright__ = '2010, Kovid Goyal <kovid@kovidgoyal.net>'
__docformat__ = 'restructuredtext en'
import re
from datetime import datetime, time as dtime, timedelta, MINYEAR, MAXYEAR
from functools import partial
from calibre import strftime
from calibre.constants import iswindows, isosx, plugins, preferred_encoding
from calibre.utils.iso8601 import utc_tz, local_tz, UNDEFINED_DATE
from calibre.utils.localization import lcdata
from polyglot.builtins import unicode_type, native_string_type
_utc_tz = utc_tz
_local_tz = local_tz
# When parsing ambiguous dates that could be either dd-MM Or MM-dd use the
# user's locale preferences
if iswindows:
import ctypes
LOCALE_SSHORTDATE, LOCALE_USER_DEFAULT = 0x1f, 0
buf = ctypes.create_string_buffer(b'\0', 255)
try:
ctypes.windll.kernel32.GetLocaleInfoA(LOCALE_USER_DEFAULT, LOCALE_SSHORTDATE, buf, 255)
parse_date_day_first = buf.value.index(b'd') < buf.value.index(b'M')
except:
parse_date_day_first = False
del ctypes, LOCALE_SSHORTDATE, buf, LOCALE_USER_DEFAULT
elif isosx:
try:
date_fmt = plugins['usbobserver'][0].date_format()
parse_date_day_first = date_fmt.index('d') < date_fmt.index('M')
except:
parse_date_day_first = False
else:
try:
def first_index(raw, queries):
for q in queries:
try:
return raw.index(native_string_type(q))
except ValueError:
pass
return -1
import locale
raw = locale.nl_langinfo(locale.D_FMT)
parse_date_day_first = first_index(raw, ('%d', '%a', '%A')) < first_index(raw, ('%m', '%b', '%B'))
del raw, first_index
except:
parse_date_day_first = False
DEFAULT_DATE = datetime(2000,1,1, tzinfo=utc_tz)
EPOCH = datetime(1970, 1, 1, tzinfo=_utc_tz)
def is_date_undefined(qt_or_dt):
d = qt_or_dt
if d is None:
return True
if hasattr(d, 'toString'):
if hasattr(d, 'date'):
d = d.date()
try:
d = datetime(d.year(), d.month(), d.day(), tzinfo=utc_tz)
except ValueError:
return True # Undefined QDate
return d.year < UNDEFINED_DATE.year or (
d.year == UNDEFINED_DATE.year and
d.month == UNDEFINED_DATE.month and
d.day == UNDEFINED_DATE.day)
_iso_pat = None
def iso_pat():
global _iso_pat
if _iso_pat is None:
_iso_pat = re.compile(r'\d{4}[/.-]\d{1,2}[/.-]\d{1,2}')
return _iso_pat
def parse_date(date_string, assume_utc=False, as_utc=True, default=None):
'''
Parse a date/time string into a timezone aware datetime object. The timezone
is always either UTC or the local timezone.
:param assume_utc: If True and date_string does not specify a timezone,
assume UTC, otherwise assume local timezone.
:param as_utc: If True, return a UTC datetime
:param default: Missing fields are filled in from default. If None, the
current month and year are used.
'''
from dateutil.parser import parse
if not date_string:
return UNDEFINED_DATE
if isinstance(date_string, bytes):
date_string = date_string.decode(preferred_encoding, 'replace')
if default is None:
func = datetime.utcnow if assume_utc else datetime.now
default = func().replace(day=15, hour=0, minute=0, second=0, microsecond=0,
tzinfo=_utc_tz if assume_utc else _local_tz)
if iso_pat().match(date_string) is not None:
dt = parse(date_string, default=default)
else:
dt = parse(date_string, default=default, dayfirst=parse_date_day_first)
if dt.tzinfo is None:
dt = dt.replace(tzinfo=_utc_tz if assume_utc else _local_tz)
return dt.astimezone(_utc_tz if as_utc else _local_tz)
def fix_only_date(val):
n = val + timedelta(days=1)
if n.month > val.month:
val = val.replace(day=val.day-1)
if val.day == 1:
val = val.replace(day=2)
return val
def parse_only_date(raw, assume_utc=True, as_utc=True):
'''
Parse a date string that contains no time information in a manner that
guarantees that the month and year are always correct in all timezones, and
the day is at most one day wrong.
'''
f = utcnow if assume_utc else now
default = f().replace(hour=0, minute=0, second=0, microsecond=0,
day=15)
return fix_only_date(parse_date(raw, default=default, assume_utc=assume_utc, as_utc=as_utc))
def strptime(val, fmt, assume_utc=False, as_utc=True):
dt = datetime.strptime(val, fmt)
if dt.tzinfo is None:
dt = dt.replace(tzinfo=_utc_tz if assume_utc else _local_tz)
return dt.astimezone(_utc_tz if as_utc else _local_tz)
def dt_factory(time_t, assume_utc=False, as_utc=True):
dt = datetime(*(time_t[0:6]))
if dt.tzinfo is None:
dt = dt.replace(tzinfo=_utc_tz if assume_utc else _local_tz)
return dt.astimezone(_utc_tz if as_utc else _local_tz)
safeyear = lambda x: min(max(x, MINYEAR), MAXYEAR)
def qt_to_dt(qdate_or_qdatetime, as_utc=True):
o = qdate_or_qdatetime
if hasattr(o, 'toUTC'):
# QDateTime
o = o.toUTC()
d, t = o.date(), o.time()
try:
ans = datetime(safeyear(d.year()), d.month(), d.day(), t.hour(), t.minute(), t.second(), t.msec()*1000, utc_tz)
except ValueError:
ans = datetime(safeyear(d.year()), d.month(), 1, t.hour(), t.minute(), t.second(), t.msec()*1000, utc_tz)
if not as_utc:
ans = ans.astimezone(local_tz)
return ans
try:
dt = datetime(safeyear(o.year()), o.month(), o.day()).replace(tzinfo=_local_tz)
except ValueError:
dt = datetime(safeyear(o.year()), o.month(), 1).replace(tzinfo=_local_tz)
return dt.astimezone(_utc_tz if as_utc else _local_tz)
def fromtimestamp(ctime, as_utc=True):
dt = datetime.utcfromtimestamp(ctime).replace(tzinfo=_utc_tz)
if not as_utc:
dt = dt.astimezone(_local_tz)
return dt
def fromordinal(day, as_utc=True):
return datetime.fromordinal(day).replace(
tzinfo=_utc_tz if as_utc else _local_tz)
def isoformat(date_time, assume_utc=False, as_utc=True, sep='T'):
if not hasattr(date_time, 'tzinfo'):
return unicode_type(date_time.isoformat())
if date_time.tzinfo is None:
date_time = date_time.replace(tzinfo=_utc_tz if assume_utc else
_local_tz)
date_time = date_time.astimezone(_utc_tz if as_utc else _local_tz)
# native_string_type(sep) because isoformat barfs with unicode sep on python 2.x
return unicode_type(date_time.isoformat(native_string_type(sep)))
def internal_iso_format_string():
return 'yyyy-MM-ddThh:mm:ss'
def w3cdtf(date_time, assume_utc=False):
if hasattr(date_time, 'tzinfo'):
if date_time.tzinfo is None:
date_time = date_time.replace(tzinfo=_utc_tz if assume_utc else
_local_tz)
date_time = date_time.astimezone(_utc_tz if as_utc else _local_tz)
return unicode_type(date_time.strftime('%Y-%m-%dT%H:%M:%SZ'))
def as_local_time(date_time, assume_utc=True):
if not hasattr(date_time, 'tzinfo'):
return date_time
if date_time.tzinfo is None:
date_time = date_time.replace(tzinfo=_utc_tz if assume_utc else
_local_tz)
return date_time.astimezone(_local_tz)
def dt_as_local(dt):
if dt.tzinfo is local_tz:
return dt
return dt.astimezone(local_tz)
def as_utc(date_time, assume_utc=True):
if not hasattr(date_time, 'tzinfo'):
return date_time
if date_time.tzinfo is None:
date_time = date_time.replace(tzinfo=_utc_tz if assume_utc else
_local_tz)
return date_time.astimezone(_utc_tz)
def now():
return datetime.now().replace(tzinfo=_local_tz)
def utcnow():
return datetime.utcnow().replace(tzinfo=_utc_tz)
def utcfromtimestamp(stamp):
try:
return datetime.utcfromtimestamp(stamp).replace(tzinfo=_utc_tz)
except ValueError:
# Raised if stamp is out of range for the platforms gmtime function
# For example, this happens with negative values on windows
try:
return EPOCH + timedelta(seconds=stamp)
except (ValueError, OverflowError):
# datetime can only represent years between 1 and 9999
import traceback
traceback.print_exc()
return utcnow()
def timestampfromdt(dt, assume_utc=True):
return (as_utc(dt, assume_utc=assume_utc) - EPOCH).total_seconds()
# Format date functions {{{
def fd_format_hour(dt, ampm, hr):
l = len(hr)
h = dt.hour
if ampm:
h = h%12
if l == 1:
return '%d'%h
return '%02d'%h
def fd_format_minute(dt, ampm, min):
l = len(min)
if l == 1:
return '%d'%dt.minute
return '%02d'%dt.minute
def fd_format_second(dt, ampm, sec):
l = len(sec)
if l == 1:
return '%d'%dt.second
return '%02d'%dt.second
def fd_format_ampm(dt, ampm, ap):
res = strftime('%p', t=dt.timetuple())
if ap == 'AP':
return res
return res.lower()
def fd_format_day(dt, ampm, dy):
l = len(dy)
if l == 1:
return '%d'%dt.day
if l == 2:
return '%02d'%dt.day
return lcdata['abday' if l == 3 else 'day'][(dt.weekday() + 1) % 7]
def fd_format_month(dt, ampm, mo):
l = len(mo)
if l == 1:
return '%d'%dt.month
if l == 2:
return '%02d'%dt.month
return lcdata['abmon' if l == 3 else 'mon'][dt.month - 1]
def fd_format_year(dt, ampm, yr):
if len(yr) == 2:
return '%02d'%(dt.year % 100)
return '%04d'%dt.year
fd_function_index = {
'd': fd_format_day,
'M': fd_format_month,
'y': fd_format_year,
'h': fd_format_hour,
'm': fd_format_minute,
's': fd_format_second,
'a': fd_format_ampm,
'A': fd_format_ampm,
}
def fd_repl_func(dt, ampm, mo):
s = mo.group(0)
if not s:
return ''
return fd_function_index[s[0]](dt, ampm, s)
def format_date(dt, format, assume_utc=False, as_utc=False):
''' Return a date formatted as a string using a subset of Qt's formatting codes '''
if not format:
format = 'dd MMM yyyy'
if not isinstance(dt, datetime):
dt = datetime.combine(dt, dtime())
if hasattr(dt, 'tzinfo'):
if dt.tzinfo is None:
dt = dt.replace(tzinfo=_utc_tz if assume_utc else
_local_tz)
dt = dt.astimezone(_utc_tz if as_utc else _local_tz)
if format == 'iso':
return isoformat(dt, assume_utc=assume_utc, as_utc=as_utc)
if dt == UNDEFINED_DATE:
return ''
repl_func = partial(fd_repl_func, dt, 'ap' in format.lower())
return re.sub(
'(s{1,2})|(m{1,2})|(h{1,2})|(ap)|(AP)|(d{1,4}|M{1,4}|(?:yyyy|yy))',
repl_func, format)
# }}}
# Clean date functions {{{
def cd_has_hour(tt, dt):
tt['hour'] = dt.hour
return ''
def cd_has_minute(tt, dt):
tt['min'] = dt.minute
return ''
def cd_has_second(tt, dt):
tt['sec'] = dt.second
return ''
def cd_has_day(tt, dt):
tt['day'] = dt.day
return ''
def cd_has_month(tt, dt):
tt['mon'] = dt.month
return ''
def cd_has_year(tt, dt):
tt['year'] = dt.year
return ''
cd_function_index = {
'd': cd_has_day,
'M': cd_has_month,
'y': cd_has_year,
'h': cd_has_hour,
'm': cd_has_minute,
's': cd_has_second
}
def cd_repl_func(tt, dt, match_object):
s = match_object.group(0)
if not s:
return ''
return cd_function_index[s[0]](tt, dt)
def clean_date_for_sort(dt, fmt=None):
''' Return dt with fields not in shown in format set to a default '''
if not fmt:
fmt = 'yyMd'
if not isinstance(dt, datetime):
dt = datetime.combine(dt, dtime())
if hasattr(dt, 'tzinfo'):
if dt.tzinfo is None:
dt = dt.replace(tzinfo=_local_tz)
dt = as_local_time(dt)
if fmt == 'iso':
fmt = 'yyMdhms'
tt = {'year':UNDEFINED_DATE.year, 'mon':UNDEFINED_DATE.month,
'day':UNDEFINED_DATE.day, 'hour':UNDEFINED_DATE.hour,
'min':UNDEFINED_DATE.minute, 'sec':UNDEFINED_DATE.second}
repl_func = partial(cd_repl_func, tt, dt)
re.sub('(s{1,2})|(m{1,2})|(h{1,2})|(d{1,4}|M{1,4}|(?:yyyy|yy))', repl_func, fmt)
return dt.replace(year=tt['year'], month=tt['mon'], day=tt['day'], hour=tt['hour'],
minute=tt['min'], second=tt['sec'], microsecond=0)
# }}}
def replace_months(datestr, clang):
# Replace months by english equivalent for parse_date
frtoen = {
'[jJ]anvier': 'jan',
'[fF].vrier': 'feb',
'[mM]ars': 'mar',
'[aA]vril': 'apr',
'[mM]ai': 'may',
'[jJ]uin': 'jun',
'[jJ]uillet': 'jul',
'[aA]o.t': 'aug',
'[sS]eptembre': 'sep',
'[Oo]ctobre': 'oct',
'[nN]ovembre': 'nov',
'[dD].cembre': 'dec'}
detoen = {
'[jJ]anuar': 'jan',
'[fF]ebruar': 'feb',
'[mM].rz': 'mar',
'[aA]pril': 'apr',
'[mM]ai': 'may',
'[jJ]uni': 'jun',
'[jJ]uli': 'jul',
'[aA]ugust': 'aug',
'[sS]eptember': 'sep',
'[Oo]ktober': 'oct',
'[nN]ovember': 'nov',
'[dD]ezember': 'dec'}
if clang == 'fr':
dictoen = frtoen
elif clang == 'de':
dictoen = detoen
else:
return datestr
for k in dictoen:
tmp = re.sub(k, dictoen[k], datestr)
if tmp != datestr:
break
return tmp

View File

@@ -0,0 +1,568 @@
#!/usr/bin/env python2
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
# License: GPLv3 Copyright: 2010, Kovid Goyal <kovid at kovidgoyal.net>
from __future__ import unicode_literals
# Contains various tweaks that affect calibre behavior. Only edit this file if
# you know what you are doing. If you delete this file, it will be recreated from
# defaults.
#: Auto increment series index
# The algorithm used to assign a book added to an existing series a series number.
# New series numbers assigned using this tweak are always integer values, except
# if a constant non-integer is specified.
# Possible values are:
# next - First available integer larger than the largest existing number
# first_free - First available integer larger than 0
# next_free - First available integer larger than the smallest existing number
# last_free - First available integer smaller than the largest existing number. Return largest existing + 1 if no free number is found
# const - Assign the number 1 always
# no_change - Do not change the series index
# a number - Assign that number always. The number is not in quotes. Note that 0.0 can be used here.
# Examples:
# series_index_auto_increment = 'next'
# series_index_auto_increment = 'next_free'
# series_index_auto_increment = 16.5
#
# Set the use_series_auto_increment_tweak_when_importing tweak to True to
# use the above values when importing/adding books. If this tweak is set to
# False (the default) then the series number will be set to 1 if it is not
# explicitly set during the import. If set to True, then the
# series index will be set according to the series_index_auto_increment setting.
# Note that the use_series_auto_increment_tweak_when_importing tweak is used
# only when a value is not provided during import. If the importing regular
# expression produces a value for series_index, or if you are reading metadata
# from books and the import plugin produces a value, than that value will
# be used irrespective of the setting of the tweak.
series_index_auto_increment = 'next'
use_series_auto_increment_tweak_when_importing = False
#: Add separator after completing an author name
# Should the completion separator be append
# to the end of the completed text to
# automatically begin a new completion operation
# for authors.
# Can be either True or False
authors_completer_append_separator = False
#: Author sort name algorithm
# The algorithm used to copy author to author_sort.
# Possible values are:
# invert: use "fn ln" -> "ln, fn"
# copy : copy author to author_sort without modification
# comma : use 'copy' if there is a ',' in the name, otherwise use 'invert'
# nocomma : "fn ln" -> "ln fn" (without the comma)
# When this tweak is changed, the author_sort values stored with each author
# must be recomputed by right-clicking on an author in the left-hand tags pane,
# selecting 'manage authors', and pressing 'Recalculate all author sort values'.
# The author name suffixes are words that are ignored when they occur at the
# end of an author name. The case of the suffix is ignored and trailing
# periods are automatically handled. The same is true for prefixes.
# The author name copy words are a set of words which if they occur in an
# author name cause the automatically generated author sort string to be
# identical to the author name. This means that the sort for a string like Acme
# Inc. will be Acme Inc. instead of Inc., Acme
author_sort_copy_method = 'comma'
author_name_suffixes = ('Jr', 'Sr', 'Inc', 'Ph.D', 'Phd',
'MD', 'M.D', 'I', 'II', 'III', 'IV',
'Junior', 'Senior')
author_name_prefixes = ('Mr', 'Mrs', 'Ms', 'Dr', 'Prof')
author_name_copywords = ('Corporation', 'Company', 'Co.', 'Agency', 'Council',
'Committee', 'Inc.', 'Institute', 'Society', 'Club', 'Team')
#: Splitting multiple author names
# By default, calibre splits a string containing multiple author names on
# ampersands and the words "and" and "with". You can customize the splitting
# by changing the regular expression below. Strings are split on whatever the
# specified regular expression matches, in addition to ampersands.
# Default: r'(?i),?\s+(and|with)\s+'
authors_split_regex = r'(?i),?\s+(and|with)\s+'
#: Use author sort in Tag browser
# Set which author field to display in the tags pane (the list of authors,
# series, publishers etc on the left hand side). The choices are author and
# author_sort. This tweak affects only what is displayed under the authors
# category in the tags pane and Content server. Please note that if you set this
# to author_sort, it is very possible to see duplicate names in the list because
# although it is guaranteed that author names are unique, there is no such
# guarantee for author_sort values. Showing duplicates won't break anything, but
# it could lead to some confusion. When using 'author_sort', the tooltip will
# show the author's name.
# Examples:
# categories_use_field_for_author_name = 'author'
# categories_use_field_for_author_name = 'author_sort'
categories_use_field_for_author_name = 'author'
#: Control partitioning of Tag browser
# When partitioning the tags browser, the format of the subcategory label is
# controlled by a template: categories_collapsed_name_template if sorting by
# name, categories_collapsed_rating_template if sorting by average rating, and
# categories_collapsed_popularity_template if sorting by popularity. There are
# two variables available to the template: first and last. The variable 'first'
# is the initial item in the subcategory, and the variable 'last' is the final
# item in the subcategory. Both variables are 'objects'; they each have multiple
# values that are obtained by using a suffix. For example, first.name for an
# author category will be the name of the author. The sub-values available are:
# name: the printable name of the item
# count: the number of books that references this item
# avg_rating: the average rating of all the books referencing this item
# sort: the sort value. For authors, this is the author_sort for that author
# category: the category (e.g., authors, series) that the item is in.
# Note that the "r'" in front of the { is necessary if there are backslashes
# (\ characters) in the template. It doesn't hurt anything to leave it there
# even if there aren't any backslashes.
categories_collapsed_name_template = r'{first.sort:shorten(4,,0)} - {last.sort:shorten(4,,0)}'
categories_collapsed_rating_template = r'{first.avg_rating:4.2f:ifempty(0)} - {last.avg_rating:4.2f:ifempty(0)}'
categories_collapsed_popularity_template = r'{first.count:d} - {last.count:d}'
#: Control order of categories in the Tag browser
# Change the following dict to change the order that categories are displayed in
# the Tag browser. Items are named using their lookup name, and will be sorted
# using the number supplied. The lookup name '*' stands for all names that
# otherwise do not appear. Two names with the same value will be sorted
# using the default order; the one used when the dict is empty.
# Example: tag_browser_category_order = {'series':1, 'tags':2, '*':3}
# resulting in the order series, tags, then everything else in default order.
tag_browser_category_order = {'*':1}
#: Specify columns to sort the booklist by on startup
# Provide a set of columns to be sorted on when calibre starts.
# The argument is None if saved sort history is to be used
# otherwise it is a list of column,order pairs. Column is the
# lookup/search name, found using the tooltip for the column
# Order is 0 for ascending, 1 for descending.
# For example, set it to [('authors',0),('title',0)] to sort by
# title within authors.
sort_columns_at_startup = None
#: Control how dates are displayed
# Format to be used for publication date and the timestamp (date).
# A string controlling how the publication date is displayed in the GUI
# d the day as number without a leading zero (1 to 31)
# dd the day as number with a leading zero (01 to 31)
# ddd the abbreviated localized day name (e.g. 'Mon' to 'Sun').
# dddd the long localized day name (e.g. 'Monday' to 'Sunday').
# M the month as number without a leading zero (1-12)
# MM the month as number with a leading zero (01-12)
# MMM the abbreviated localized month name (e.g. 'Jan' to 'Dec').
# MMMM the long localized month name (e.g. 'January' to 'December').
# yy the year as two digit number (00-99)
# yyyy the year as four digit number
# h the hours without a leading 0 (0 to 11 or 0 to 23, depending on am/pm) '
# hh the hours with a leading 0 (00 to 11 or 00 to 23, depending on am/pm) '
# m the minutes without a leading 0 (0 to 59) '
# mm the minutes with a leading 0 (00 to 59) '
# s the seconds without a leading 0 (0 to 59) '
# ss the seconds with a leading 0 (00 to 59) '
# ap use a 12-hour clock instead of a 24-hour clock, with "ap" replaced by the localized string for am or pm
# AP use a 12-hour clock instead of a 24-hour clock, with "AP" replaced by the localized string for AM or PM
# iso the date with time and timezone. Must be the only format present
# For example, given the date of 9 Jan 2010, the following formats show
# MMM yyyy ==> Jan 2010 yyyy ==> 2010 dd MMM yyyy ==> 09 Jan 2010
# MM/yyyy ==> 01/2010 d/M/yy ==> 9/1/10 yy ==> 10
#
# publication default if not set: MMM yyyy
# timestamp default if not set: dd MMM yyyy
# last_modified_display_format if not set: dd MMM yyyy
gui_pubdate_display_format = 'MMM yyyy'
gui_timestamp_display_format = 'dd MMM yyyy'
gui_last_modified_display_format = 'dd MMM yyyy'
#: Control sorting of titles and series in the library display
# Control title and series sorting in the library view. If set to
# 'library_order', the title sort field will be used instead of the title.
# Unless you have manually edited the title sort field, leading articles such as
# The and A will be ignored. If set to 'strictly_alphabetic', the titles will be
# sorted as-is (sort by title instead of title sort). For example, with
# library_order, The Client will sort under 'C'. With strictly_alphabetic, the
# book will sort under 'T'.
# This flag affects calibre's library display. It has no effect on devices. In
# addition, titles for books added before changing the flag will retain their
# order until the title is edited. Editing a title and hitting return
# without changing anything is sufficient to change the sort. Or you can use
# the 'Update title sort' action in the Bulk metadata edit dialog to update
# it for many books at once.
title_series_sorting = 'library_order'
#: Control formatting of title and series when used in templates
# Control how title and series names are formatted when saving to disk/sending
# to device. The behavior depends on the field being processed. If processing
# title, then if this tweak is set to 'library_order', the title will be
# replaced with title_sort. If it is set to 'strictly_alphabetic', then the
# title will not be changed. If processing series, then if set to
# 'library_order', articles such as 'The' and 'An' will be moved to the end. If
# set to 'strictly_alphabetic', the series will be sent without change.
# For example, if the tweak is set to library_order, "The Lord of the Rings"
# will become "Lord of the Rings, The". If the tweak is set to
# strictly_alphabetic, it would remain "The Lord of the Rings". Note that the
# formatter function raw_field will return the base value for title and
# series regardless of the setting of this tweak.
save_template_title_series_sorting = 'library_order'
#: Set the list of words considered to be "articles" for sort strings
# Set the list of words that are to be considered 'articles' when computing the
# title sort strings. The articles differ by language. By default, calibre uses
# a combination of articles from English and whatever language the calibre user
# interface is set to. In addition, in some contexts where the book language is
# available, the language of the book is used. You can change the list of
# articles for a given language or add a new language by editing
# per_language_title_sort_articles. To tell calibre to use a language other
# than the user interface language, set, default_language_for_title_sort. For
# example, to use German, set it to 'deu'. A value of None means the user
# interface language is used. The setting title_sort_articles is ignored
# (present only for legacy reasons).
per_language_title_sort_articles = {
# English
'eng' : (r'A\s+', r'The\s+', r'An\s+'),
# Esperanto
'epo': (r'La\s+', r"L'", 'L´'),
# Spanish
'spa' : (r'El\s+', r'La\s+', r'Lo\s+', r'Los\s+', r'Las\s+', r'Un\s+',
r'Una\s+', r'Unos\s+', r'Unas\s+'),
# French
'fra' : (r'Le\s+', r'La\s+', r"L'", u'L´', u'L', r'Les\s+', r'Un\s+', r'Une\s+',
r'Des\s+', r'De\s+La\s+', r'De\s+', r"D'", u'D´', u'L'),
# Italian
'ita': ('Lo\\s+', 'Il\\s+', "L'", 'L´', 'La\\s+', 'Gli\\s+',
'I\\s+', 'Le\\s+', 'Uno\\s+', 'Un\\s+', 'Una\\s+', "Un'",
'Un´', 'Dei\\s+', 'Degli\\s+', 'Delle\\s+', 'Del\\s+',
'Della\\s+', 'Dello\\s+', "Dell'", 'Dell´'),
# Portuguese
'por' : (r'A\s+', r'O\s+', r'Os\s+', r'As\s+', r'Um\s+', r'Uns\s+',
r'Uma\s+', r'Umas\s+', ),
# Romanian
'ron' : (r'Un\s+', r'O\s+', r'Nişte\s+', ),
# German
'deu' : (r'Der\s+', r'Die\s+', r'Das\s+', r'Den\s+', r'Ein\s+',
r'Eine\s+', r'Einen\s+', r'Dem\s+', r'Des\s+', r'Einem\s+',
r'Eines\s+'),
# Dutch
'nld' : (r'De\s+', r'Het\s+', r'Een\s+', r"'n\s+", r"'s\s+", r'Ene\s+',
r'Ener\s+', r'Enes\s+', r'Den\s+', r'Der\s+', r'Des\s+',
r"'t\s+"),
# Swedish
'swe' : (r'En\s+', r'Ett\s+', r'Det\s+', r'Den\s+', r'De\s+', ),
# Turkish
'tur' : (r'Bir\s+', ),
# Afrikaans
'afr' : (r"'n\s+", r'Die\s+', ),
# Greek
'ell' : (r'O\s+', r'I\s+', r'To\s+', r'Ta\s+', r'Tus\s+', r'Tis\s+',
r"'Enas\s+", r"'Mia\s+", r"'Ena\s+", r"'Enan\s+", ),
# Hungarian
'hun' : (r'A\s+', r'Az\s+', r'Egy\s+',),
}
default_language_for_title_sort = None
title_sort_articles=r'^(A|The|An)\s+'
#: Specify a folder calibre should connect to at startup
# Specify a folder that calibre should connect to at startup using
# connect_to_folder. This must be a full path to the folder. If the folder does
# not exist when calibre starts, it is ignored.
# Example for Windows:
# auto_connect_to_folder = 'C:/Users/someone/Desktop/testlib'
# Example for other operating systems:
# auto_connect_to_folder = '/home/dropbox/My Dropbox/someone/library'
auto_connect_to_folder = ''
#: Specify renaming rules for SONY collections
# Specify renaming rules for sony collections. This tweak is only applicable if
# metadata management is set to automatic. Collections on Sonys are named
# depending upon whether the field is standard or custom. A collection derived
# from a standard field is named for the value in that field. For example, if
# the standard 'series' column contains the value 'Darkover', then the
# collection name is 'Darkover'. A collection derived from a custom field will
# have the name of the field added to the value. For example, if a custom series
# column named 'My Series' contains the name 'Darkover', then the collection
# will by default be named 'Darkover (My Series)'. For purposes of this
# documentation, 'Darkover' is called the value and 'My Series' is called the
# category. If two books have fields that generate the same collection name,
# then both books will be in that collection.
# This set of tweaks lets you specify for a standard or custom field how
# the collections are to be named. You can use it to add a description to a
# standard field, for example 'Foo (Tag)' instead of the 'Foo'. You can also use
# it to force multiple fields to end up in the same collection. For example, you
# could force the values in 'series', '#my_series_1', and '#my_series_2' to
# appear in collections named 'some_value (Series)', thereby merging all of the
# fields into one set of collections.
# There are two related tweaks. The first determines the category name to use
# for a metadata field. The second is a template, used to determines how the
# value and category are combined to create the collection name.
# The syntax of the first tweak, sony_collection_renaming_rules, is:
# {'field_lookup_name':'category_name_to_use', 'lookup_name':'name', ...}
# The second tweak, sony_collection_name_template, is a template. It uses the
# same template language as plugboards and save templates. This tweak controls
# how the value and category are combined together to make the collection name.
# The only two fields available are {category} and {value}. The {value} field is
# never empty. The {category} field can be empty. The default is to put the
# value first, then the category enclosed in parentheses, it isn't empty:
# '{value} {category:|(|)}'
# Examples: The first three examples assume that the second tweak
# has not been changed.
# 1: I want three series columns to be merged into one set of collections. The
# column lookup names are 'series', '#series_1' and '#series_2'. I want nothing
# in the parenthesis. The value to use in the tweak value would be:
# sony_collection_renaming_rules={'series':'', '#series_1':'', '#series_2':''}
# 2: I want the word '(Series)' to appear on collections made from series, and
# the word '(Tag)' to appear on collections made from tags. Use:
# sony_collection_renaming_rules={'series':'Series', 'tags':'Tag'}
# 3: I want 'series' and '#myseries' to be merged, and for the collection name
# to have '(Series)' appended. The renaming rule is:
# sony_collection_renaming_rules={'series':'Series', '#myseries':'Series'}
# 4: Same as example 2, but instead of having the category name in parentheses
# and appended to the value, I want it prepended and separated by a colon, such
# as in Series: Darkover. I must change the template used to format the category name
# The resulting two tweaks are:
# sony_collection_renaming_rules={'series':'Series', 'tags':'Tag'}
# sony_collection_name_template='{category:||: }{value}'
sony_collection_renaming_rules={}
sony_collection_name_template='{value}{category:| (|)}'
#: Specify how SONY collections are sorted
# Specify how sony collections are sorted. This tweak is only applicable if
# metadata management is set to automatic. You can indicate which metadata is to
# be used to sort on a collection-by-collection basis. The format of the tweak
# is a list of metadata fields from which collections are made, followed by the
# name of the metadata field containing the sort value.
# Example: The following indicates that collections built from pubdate and tags
# are to be sorted by the value in the custom column '#mydate', that collections
# built from 'series' are to be sorted by 'series_index', and that all other
# collections are to be sorted by title. If a collection metadata field is not
# named, then if it is a series- based collection it is sorted by series order,
# otherwise it is sorted by title order.
# [(['pubdate', 'tags'],'#mydate'), (['series'],'series_index'), (['*'], 'title')]
# Note that the bracketing and parentheses are required. The syntax is
# [ ( [list of fields], sort field ) , ( [ list of fields ] , sort field ) ]
# Default: empty (no rules), so no collection attributes are named.
sony_collection_sorting_rules = []
#: Control how tags are applied when copying books to another library
# Set this to True to ensure that tags in 'Tags to add when adding
# a book' are added when copying books to another library
add_new_book_tags_when_importing_books = False
#: Set custom metadata fields that the Content server will or will not display.
# Controls what fields are displayed when clicking the "Search" button in the
# browser to search your calibre library.
# content_server_will_display is a list of custom fields to be displayed.
# content_server_wont_display is a list of custom fields not to be displayed.
# wont_display has priority over will_display.
# The special value '*' means all custom fields. The value [] means no entries.
# Defaults:
# content_server_will_display = ['*']
# content_server_wont_display = []
#
# Examples:
#
# To display only the custom fields #mytags and #genre:
# content_server_will_display = ['#mytags', '#genre']
# content_server_wont_display = []
#
# To display all fields except #mycomments:
# content_server_will_display = ['*']
# content_server_wont_display['#mycomments']
content_server_will_display = ['*']
content_server_wont_display = []
#: Set the maximum number of sort 'levels'
# Set the maximum number of sort 'levels' that calibre will use to resort the
# library after certain operations such as searches or device insertion. Each
# sort level adds a performance penalty. If the database is large (thousands of
# books) the penalty might be noticeable. If you are not concerned about multi-
# level sorts, and if you are seeing a slowdown, reduce the value of this tweak.
maximum_resort_levels = 5
#: Choose whether dates are sorted using visible fields
# Date values contain both a date and a time. When sorted, all the fields are
# used, regardless of what is displayed. Set this tweak to True to use only
# the fields that are being displayed.
sort_dates_using_visible_fields = False
#: Fuzz value for trimming covers
# The value used for the fuzz distance when trimming a cover.
# Colors within this distance are considered equal.
# The distance is in absolute intensity units.
cover_trim_fuzz_value = 10
#: Control behavior of the book list
# You can control the behavior of double clicks and pressing enter on the books list.
# Choices: open_viewer, do_nothing,
# edit_cell, edit_metadata. Selecting anything other than open_viewer has the
# side effect of disabling editing a field using a single click.
# Default: open_viewer.
# Example: doubleclick_on_library_view = 'do_nothing'
# You can also control whether the book list scrolls horizontal per column or
# per pixel. Default is per column.
doubleclick_on_library_view = 'open_viewer'
enter_key_behavior = 'do_nothing'
horizontal_scrolling_per_column = True
#: Language to use when sorting
# Setting this tweak will force sorting to use the
# collating order for the specified language. This might be useful if you run
# calibre in English but want sorting to work in the language where you live.
# Set the tweak to the desired ISO 639-1 language code, in lower case.
# You can find the list of supported locales at
# https://en.wikipedia.org/wiki/List_of_ISO_639-1_codes
# Default: locale_for_sorting = '' -- use the language calibre displays in
# Example: locale_for_sorting = 'fr' -- sort using French rules.
# Example: locale_for_sorting = 'nb' -- sort using Norwegian rules.
locale_for_sorting = ''
#: Number of columns for custom metadata in the edit metadata dialog
# Set whether to use one or two columns for custom metadata when editing
# metadata one book at a time. If True, then the fields are laid out using two
# columns. If False, one column is used.
metadata_single_use_2_cols_for_custom_fields = True
#: Order of custom column(s) in edit metadata
# Controls the order that custom columns are listed in edit metadata single
# and bulk. The columns listed in the tweak are displayed first and in the
# order provided. Any columns not listed are displayed after the listed ones,
# in alphabetical order. Do note that this tweak does not change the size of
# the edit widgets. Putting comments widgets in this list may result in some
# odd widget spacing when using two-column mode.
# Enter a comma-separated list of custom field lookup names, as in
# metadata_edit_custom_column_order = ['#genre', '#mytags', '#etc']
metadata_edit_custom_column_order = []
#: The number of seconds to wait before sending emails
# The number of seconds to wait before sending emails when using a
# public email server like gmx/hotmail/gmail. Default is: 5 minutes
# Setting it to lower may cause the server's SPAM controls to kick in,
# making email sending fail. Changes will take effect only after a restart of
# calibre. You can also change the list of hosts that calibre considers
# to be public relays here. Any relay host ending with one of the suffixes
# in the list below will be considered a public email server.
public_smtp_relay_delay = 301
public_smtp_relay_host_suffixes = ['gmail.com', 'live.com', 'gmx.com']
#: The maximum width and height for covers saved in the calibre library
# All covers in the calibre library will be resized, preserving aspect ratio,
# to fit within this size. This is to prevent slowdowns caused by extremely
# large covers
maximum_cover_size = (1650, 2200)
#: Where to send downloaded news
# When automatically sending downloaded news to a connected device, calibre
# will by default send it to the main memory. By changing this tweak, you can
# control where it is sent. Valid values are "main", "carda", "cardb". Note
# that if there isn't enough free space available on the location you choose,
# the files will be sent to the location with the most free space.
send_news_to_device_location = "main"
#: Unified toolbar on macOS
# If you enable this option and restart calibre, the toolbar will be 'unified'
# with the titlebar as is normal for macOS applications. However, doing this has
# various bugs, for instance the minimum width of the toolbar becomes twice
# what it should be and it causes other random bugs on some systems, so turn it
# on at your own risk!
unified_title_toolbar_on_osx = False
#: Save original file when converting/polishing from same format to same format
# When calibre does a conversion from the same format to the same format, for
# example, from EPUB to EPUB, the original file is saved, so that in case the
# conversion is poor, you can tweak the settings and run it again. By setting
# this to False you can prevent calibre from saving the original file.
# Similarly, by setting save_original_format_when_polishing to False you can
# prevent calibre from saving the original file when polishing.
save_original_format = True
save_original_format_when_polishing = True
#: Number of recently viewed books to show
# Right-clicking the "View" button shows a list of recently viewed books. Control
# how many should be shown, here.
gui_view_history_size = 15
#: Change the font size of book details in the interface
# Change the font size at which book details are rendered in the side panel and
# comments are rendered in the metadata edit dialog. Set it to a positive or
# negative number to increase or decrease the font size.
change_book_details_font_size_by = 0
#: What format to default to when using the "Unpack book" feature
# The "Unpack book" feature of calibre allows direct editing of a book format.
# If multiple formats are available, calibre will offer you a choice
# of formats, defaulting to your preferred output format if it is available.
# Set this tweak to a specific value of 'EPUB' or 'AZW3' to always default
# to that format rather than your output format preference.
# Set to a value of 'remember' to use whichever format you chose last time you
# used the "Unpack book" feature.
# Examples:
# default_tweak_format = None (Use output format)
# default_tweak_format = 'EPUB'
# default_tweak_format = 'remember'
default_tweak_format = None
#: Do not preselect a completion when editing authors/tags/series/etc.
# This means that you can make changes and press Enter and your changes will
# not be overwritten by a matching completion. However, if you wish to use the
# completions you will now have to press Tab to select one before pressing
# Enter. Which technique you prefer will depend on the state of metadata in
# your library and your personal editing style.
preselect_first_completion = False
#: Completion mode when editing authors/tags/series/etc.
# By default, when completing items, calibre will show you all the candidates
# that start with the text you have already typed. You can instead have it show
# all candidates that contain the text you have already typed. To do this, set
# completion_mode to 'contains'. For example, if you type asi it will match both
# Asimov and Quasimodo, whereas the default behavior would match only Asimov.
completion_mode = 'prefix'
#: Recognize numbers inside text when sorting
# This means that when sorting on text fields like title the text "Book 2"
# will sort before the text "Book 100". If you want this behavior, set
# numeric_collation = True note that doing so will cause problems with text
# that starts with numbers and is a little slower.
numeric_collation = False
#: Sort the list of libraries alphabetically
# The list of libraries in the Copy to library and Quick switch menus are
# normally sorted by most used. However, if there are more than a certain
# number of such libraries, the sorting becomes alphabetic. You can set that
# number here. The default is ten libraries.
many_libraries = 10
#: Choose available output formats for conversion
# Restrict the list of available output formats in the conversion dialogs.
# For example, if you only want to convert to EPUB and AZW3, change this to
# restrict_output_formats = ['EPUB', 'AZW3']. The default value of None causes
# all available output formats to be present.
restrict_output_formats = None
#: Set the thumbnail image quality used by the Content server
# The quality of a thumbnail is largely controlled by the compression quality
# used when creating it. Set this to a larger number to improve the quality.
# Note that the thumbnails get much larger with larger compression quality
# numbers.
# The value can be between 50 and 99
content_server_thumbnail_compression_quality = 75
#: Image file types to treat as e-books when dropping onto the "Book details" panel
# Normally, if you drop any image file in a format known to calibre onto the
# "Book details" panel, it will be used to set the cover. If you want to store
# some image types as e-books instead, you can set this tweak.
# Examples:
# cover_drop_exclude = {'tiff', 'webp'}
cover_drop_exclude = ()
#: Show the Saved searches box in the Search bar
# In newer versions of calibre, only a single button that allows you to add a
# new Saved search is shown in the Search bar. If you would like to have the
# old Saved searches box with its two buttons back, set this tweak to True.
show_saved_search_box = False
#: Exclude fields when copy/pasting metadata
# You can ask calibre to not paste some metadata fields when using the
# Edit metadata->Copy metadata/Paste metadata actions. For example,
# exclude_fields_on_paste = ['cover', 'timestamp', '#mycolumn']
# to prevent pasting of the cover, Date and custom column, mycolumn.
exclude_fields_on_paste = []
#: Skip internet connected check
# Skip checking whether the internet is available before downloading news.
# Useful if for some reason your operating systems network checking
# facilities are not reliable (for example NetworkManager on Linux).
skip_network_check = False

View File

@@ -0,0 +1,642 @@
from __future__ import absolute_import, division, print_function, unicode_literals
'''
Make strings safe for use as ASCII filenames, while trying to preserve as much
meaning as possible.
'''
import errno
import os
import shutil
import time
from math import ceil
from calibre import force_unicode, isbytestring, prints, sanitize_file_name
from calibre.constants import (
filesystem_encoding, iswindows, plugins, preferred_encoding, isosx, ispy3
)
from calibre.utils.localization import get_udc
from polyglot.builtins import iteritems, itervalues, unicode_type, range
def ascii_text(orig):
udc = get_udc()
try:
ascii = udc.decode(orig)
except Exception:
if isinstance(orig, unicode_type):
orig = orig.encode('ascii', 'replace')
ascii = orig.decode(preferred_encoding, 'replace')
if isinstance(ascii, bytes):
ascii = ascii.decode('ascii', 'replace')
return ascii
def ascii_filename(orig, substitute='_'):
if isinstance(substitute, bytes):
substitute = substitute.decode(filesystem_encoding)
orig = ascii_text(orig).replace('?', '_')
ans = ''.join(x if ord(x) >= 32 else substitute for x in orig)
return sanitize_file_name(ans, substitute=substitute)
def shorten_component(s, by_what):
l = len(s)
if l < by_what:
return s
l = (l - by_what)//2
if l <= 0:
return s
return s[:l] + s[-l:]
def limit_component(x, limit=254):
# windows and macs use ytf-16 codepoints for length, linux uses arbitrary
# binary data, but we will assume utf-8
filename_encoding_for_length = 'utf-16' if iswindows or isosx else 'utf-8'
def encoded_length():
q = x if isinstance(x, bytes) else x.encode(filename_encoding_for_length)
return len(q)
while encoded_length() > limit:
delta = encoded_length() - limit
x = shorten_component(x, max(2, delta // 2))
return x
def shorten_components_to(length, components, more_to_take=0, last_has_extension=True):
components = [limit_component(cx) for cx in components]
filepath = os.sep.join(components)
extra = len(filepath) - (length - more_to_take)
if extra < 1:
return components
deltas = []
for x in components:
pct = len(x)/float(len(filepath))
deltas.append(int(ceil(pct*extra)))
ans = []
for i, x in enumerate(components):
delta = deltas[i]
if delta > len(x):
r = x[0] if x is components[-1] else ''
else:
if last_has_extension and x is components[-1]:
b, e = os.path.splitext(x)
if e == '.':
e = ''
r = shorten_component(b, delta)+e
if r.startswith('.'):
r = x[0]+r
else:
r = shorten_component(x, delta)
r = r.strip()
if not r:
r = x.strip()[0] if x.strip() else 'x'
ans.append(r)
if len(os.sep.join(ans)) > length:
return shorten_components_to(length, components, more_to_take+2)
return ans
def find_executable_in_path(name, path=None):
if path is None:
path = os.environ.get('PATH', '')
exts = '.exe .cmd .bat'.split() if iswindows and not name.endswith('.exe') else ('',)
path = path.split(os.pathsep)
for x in path:
for ext in exts:
q = os.path.abspath(os.path.join(x, name)) + ext
if os.access(q, os.X_OK):
return q
def is_case_sensitive(path):
'''
Return True if the filesystem is case sensitive.
path must be the path to an existing directory. You must have permission
to create and delete files in this directory. The results of this test
apply to the filesystem containing the directory in path.
'''
is_case_sensitive = False
if not iswindows:
name1, name2 = ('calibre_test_case_sensitivity.txt',
'calibre_TesT_CaSe_sensitiVitY.Txt')
f1, f2 = os.path.join(path, name1), os.path.join(path, name2)
if os.path.exists(f1):
os.remove(f1)
open(f1, 'w').close()
is_case_sensitive = not os.path.exists(f2)
os.remove(f1)
return is_case_sensitive
def case_preserving_open_file(path, mode='wb', mkdir_mode=0o777):
'''
Open the file pointed to by path with the specified mode. If any
directories in path do not exist, they are created. Returns the
opened file object and the path to the opened file object. This path is
guaranteed to have the same case as the on disk path. For case insensitive
filesystems, the returned path may be different from the passed in path.
The returned path is always unicode and always an absolute path.
If mode is None, then this function assumes that path points to a directory
and return the path to the directory as the file object.
mkdir_mode specifies the mode with which any missing directories in path
are created.
'''
if isbytestring(path):
path = path.decode(filesystem_encoding)
path = os.path.abspath(path)
sep = force_unicode(os.sep, 'ascii')
if path.endswith(sep):
path = path[:-1]
if not path:
raise ValueError('Path must not point to root')
components = path.split(sep)
if not components:
raise ValueError('Invalid path: %r'%path)
cpath = sep
if iswindows:
# Always upper case the drive letter and add a trailing slash so that
# the first os.listdir works correctly
cpath = components[0].upper() + sep
bdir = path if mode is None else os.path.dirname(path)
if not os.path.exists(bdir):
os.makedirs(bdir, mkdir_mode)
# Walk all the directories in path, putting the on disk case version of
# the directory into cpath
dirs = components[1:] if mode is None else components[1:-1]
for comp in dirs:
cdir = os.path.join(cpath, comp)
cl = comp.lower()
try:
candidates = [c for c in os.listdir(cpath) if c.lower() == cl]
except:
# Dont have permission to do the listdir, assume the case is
# correct as we have no way to check it.
pass
else:
if len(candidates) == 1:
cdir = os.path.join(cpath, candidates[0])
# else: We are on a case sensitive file system so cdir must already
# be correct
cpath = cdir
if mode is None:
ans = fpath = cpath
else:
fname = components[-1]
ans = lopen(os.path.join(cpath, fname), mode)
# Ensure file and all its metadata is written to disk so that subsequent
# listdir() has file name in it. I don't know if this is actually
# necessary, but given the diversity of platforms, best to be safe.
ans.flush()
os.fsync(ans.fileno())
cl = fname.lower()
try:
candidates = [c for c in os.listdir(cpath) if c.lower() == cl]
except EnvironmentError:
# The containing directory, somehow disappeared?
candidates = []
if len(candidates) == 1:
fpath = os.path.join(cpath, candidates[0])
else:
# We are on a case sensitive filesystem
fpath = os.path.join(cpath, fname)
return ans, fpath
def windows_get_fileid(path):
''' The fileid uniquely identifies actual file contents (it is the same for
all hardlinks to a file). Similar to inode number on linux. '''
import win32file
from pywintypes import error
if isbytestring(path):
path = path.decode(filesystem_encoding)
try:
h = win32file.CreateFileW(path, 0, 0, None, win32file.OPEN_EXISTING,
win32file.FILE_FLAG_BACKUP_SEMANTICS, 0)
try:
data = win32file.GetFileInformationByHandle(h)
finally:
win32file.CloseHandle(h)
except (error, EnvironmentError):
return None
return data[4], data[8], data[9]
def samefile_windows(src, dst):
samestring = (os.path.normcase(os.path.abspath(src)) ==
os.path.normcase(os.path.abspath(dst)))
if samestring:
return True
a, b = windows_get_fileid(src), windows_get_fileid(dst)
if a is None and b is None:
return False
return a == b
def samefile(src, dst):
'''
Check if two paths point to the same actual file on the filesystem. Handles
symlinks, case insensitivity, mapped drives, etc.
Returns True iff both paths exist and point to the same file on disk.
Note: On windows will return True if the two string are identical (up to
case) even if the file does not exist. This is because I have no way of
knowing how reliable the GetFileInformationByHandle method is.
'''
if iswindows:
return samefile_windows(src, dst)
if hasattr(os.path, 'samefile'):
# Unix
try:
return os.path.samefile(src, dst)
except EnvironmentError:
return False
# All other platforms: check for same pathname.
samestring = (os.path.normcase(os.path.abspath(src)) ==
os.path.normcase(os.path.abspath(dst)))
return samestring
def windows_get_size(path):
''' On windows file sizes are only accurately stored in the actual file,
not in the directory entry (which could be out of date). So we open the
file, and get the actual size. '''
import win32file
if isbytestring(path):
path = path.decode(filesystem_encoding)
h = win32file.CreateFileW(
path, 0, win32file.FILE_SHARE_READ | win32file.FILE_SHARE_WRITE | win32file.FILE_SHARE_DELETE,
None, win32file.OPEN_EXISTING, 0, None)
try:
return win32file.GetFileSize(h)
finally:
win32file.CloseHandle(h)
def windows_hardlink(src, dest):
import win32file, pywintypes
try:
win32file.CreateHardLink(dest, src)
except pywintypes.error as e:
msg = 'Creating hardlink from %s to %s failed: %%s' % (src, dest)
raise OSError(msg % e)
src_size = os.path.getsize(src)
# We open and close dest, to ensure its directory entry is updated
# see http://blogs.msdn.com/b/oldnewthing/archive/2011/12/26/10251026.aspx
for i in range(10):
# If we are on a network filesystem, we have to wait for some indeterminate time, since
# network file systems are the best thing since sliced bread
try:
if windows_get_size(dest) == src_size:
return
except EnvironmentError:
pass
time.sleep(0.3)
sz = windows_get_size(dest)
if sz != src_size:
msg = 'Creating hardlink from %s to %s failed: %%s' % (src, dest)
raise OSError(msg % ('hardlink size: %d not the same as source size' % sz))
def windows_fast_hardlink(src, dest):
import win32file, pywintypes
try:
win32file.CreateHardLink(dest, src)
except pywintypes.error as e:
msg = 'Creating hardlink from %s to %s failed: %%s' % (src, dest)
raise OSError(msg % e)
ssz, dsz = windows_get_size(src), windows_get_size(dest)
if ssz != dsz:
msg = 'Creating hardlink from %s to %s failed: %%s' % (src, dest)
raise OSError(msg % ('hardlink size: %d not the same as source size: %s' % (dsz, ssz)))
def windows_nlinks(path):
import win32file
dwFlagsAndAttributes = win32file.FILE_FLAG_BACKUP_SEMANTICS if os.path.isdir(path) else 0
if isbytestring(path):
path = path.decode(filesystem_encoding)
handle = win32file.CreateFileW(path, win32file.GENERIC_READ, win32file.FILE_SHARE_READ, None, win32file.OPEN_EXISTING, dwFlagsAndAttributes, None)
try:
return win32file.GetFileInformationByHandle(handle)[7]
finally:
handle.Close()
class WindowsAtomicFolderMove(object):
'''
Move all the files inside a specified folder in an atomic fashion,
preventing any other process from locking a file while the operation is
incomplete. Raises an IOError if another process has locked a file before
the operation starts. Note that this only operates on the files in the
folder, not any sub-folders.
'''
def __init__(self, path):
self.handle_map = {}
import win32file, winerror
from pywintypes import error
from collections import defaultdict
if isbytestring(path):
path = path.decode(filesystem_encoding)
if not os.path.exists(path):
return
names = os.listdir(path)
name_to_fileid = {x:windows_get_fileid(os.path.join(path, x)) for x in names}
fileid_to_names = defaultdict(set)
for name, fileid in iteritems(name_to_fileid):
fileid_to_names[fileid].add(name)
for x in names:
f = os.path.normcase(os.path.abspath(os.path.join(path, x)))
if not os.path.isfile(f):
continue
try:
# Ensure the file is not read-only
win32file.SetFileAttributes(f, win32file.FILE_ATTRIBUTE_NORMAL)
except:
pass
try:
h = win32file.CreateFileW(f, win32file.GENERIC_READ,
win32file.FILE_SHARE_DELETE, None,
win32file.OPEN_EXISTING, win32file.FILE_FLAG_SEQUENTIAL_SCAN, 0)
except error as e:
if getattr(e, 'winerror', 0) == winerror.ERROR_SHARING_VIOLATION:
# The file could be a hardlink to an already opened file,
# in which case we use the same handle for both files
fileid = name_to_fileid[x]
found = False
if fileid is not None:
for other in fileid_to_names[fileid]:
other = os.path.normcase(os.path.abspath(os.path.join(path, other)))
if other in self.handle_map:
self.handle_map[f] = self.handle_map[other]
found = True
break
if found:
continue
self.close_handles()
if getattr(e, 'winerror', 0) == winerror.ERROR_SHARING_VIOLATION:
err = IOError(errno.EACCES,
_('File is open in another process'))
err.filename = f
raise err
prints('CreateFile failed for: %r' % f)
raise
except:
self.close_handles()
prints('CreateFile failed for: %r' % f)
raise
self.handle_map[f] = h
def copy_path_to(self, path, dest):
import win32file
handle = None
for p, h in iteritems(self.handle_map):
if samefile_windows(path, p):
handle = h
break
if handle is None:
if os.path.exists(path):
raise ValueError('The file %r did not exist when this move'
' operation was started'%path)
else:
raise ValueError('The file %r does not exist'%path)
try:
windows_hardlink(path, dest)
return
except:
pass
win32file.SetFilePointer(handle, 0, win32file.FILE_BEGIN)
with lopen(dest, 'wb') as f:
while True:
hr, raw = win32file.ReadFile(handle, 1024*1024)
if hr != 0:
raise IOError(hr, 'Error while reading from %r'%path)
if not raw:
break
f.write(raw)
def release_file(self, path):
' Release the lock on the file pointed to by path. Will also release the lock on any hardlinks to path '
key = None
for p, h in iteritems(self.handle_map):
if samefile_windows(path, p):
key = (p, h)
break
if key is not None:
import win32file
win32file.CloseHandle(key[1])
remove = [f for f, h in iteritems(self.handle_map) if h is key[1]]
for x in remove:
self.handle_map.pop(x)
def close_handles(self):
import win32file
for h in itervalues(self.handle_map):
win32file.CloseHandle(h)
self.handle_map = {}
def delete_originals(self):
import win32file
for path in self.handle_map:
win32file.DeleteFile(path)
self.close_handles()
def hardlink_file(src, dest):
if iswindows:
windows_hardlink(src, dest)
return
os.link(src, dest)
def nlinks_file(path):
' Return number of hardlinks to the file '
if iswindows:
return windows_nlinks(path)
return os.stat(path).st_nlink
if iswindows:
def rename_file(a, b):
move_file = plugins['winutil'][0].move_file
if isinstance(a, bytes):
a = a.decode('mbcs')
if isinstance(b, bytes):
b = b.decode('mbcs')
move_file(a, b)
def atomic_rename(oldpath, newpath):
'''Replace the file newpath with the file oldpath. Can fail if the files
are on different volumes. If succeeds, guaranteed to be atomic. newpath may
or may not exist. If it exists, it is replaced. '''
if iswindows:
for i in range(10):
try:
rename_file(oldpath, newpath)
break
except Exception:
if i > 8:
raise
# Try the rename repeatedly in case something like a virus
# scanner has opened one of the files (I love windows)
time.sleep(1)
else:
os.rename(oldpath, newpath)
def remove_dir_if_empty(path, ignore_metadata_caches=False):
''' Remove a directory if it is empty or contains only the folder metadata
caches from different OSes. To delete the folder if it contains only
metadata caches, set ignore_metadata_caches to True.'''
try:
os.rmdir(path)
except OSError as e:
if e.errno == errno.ENOTEMPTY or len(os.listdir(path)) > 0:
# Some linux systems appear to raise an EPERM instead of an
# ENOTEMPTY, see https://bugs.launchpad.net/bugs/1240797
if ignore_metadata_caches:
try:
found = False
for x in os.listdir(path):
if x.lower() in {'.ds_store', 'thumbs.db'}:
found = True
x = os.path.join(path, x)
if os.path.isdir(x):
import shutil
shutil.rmtree(x)
else:
os.remove(x)
except Exception: # We could get an error, if, for example, windows has locked Thumbs.db
found = False
if found:
remove_dir_if_empty(path)
return
raise
expanduser = os.path.expanduser
def format_permissions(st_mode):
import stat
for func, letter in (x.split(':') for x in 'REG:- DIR:d BLK:b CHR:c FIFO:p LNK:l SOCK:s'.split()):
if getattr(stat, 'S_IS' + func)(st_mode):
break
else:
letter = '?'
rwx = ('---', '--x', '-w-', '-wx', 'r--', 'r-x', 'rw-', 'rwx')
ans = [letter] + list(rwx[(st_mode >> 6) & 7]) + list(rwx[(st_mode >> 3) & 7]) + list(rwx[(st_mode & 7)])
if st_mode & stat.S_ISUID:
ans[3] = 's' if (st_mode & stat.S_IXUSR) else 'S'
if st_mode & stat.S_ISGID:
ans[6] = 's' if (st_mode & stat.S_IXGRP) else 'l'
if st_mode & stat.S_ISVTX:
ans[9] = 't' if (st_mode & stat.S_IXUSR) else 'T'
return ''.join(ans)
def copyfile(src, dest):
shutil.copyfile(src, dest)
try:
shutil.copystat(src, dest)
except Exception:
pass
def get_hardlink_function(src, dest):
if iswindows:
import win32file, win32api
colon = b':' if isinstance(dest, bytes) else ':'
root = dest[0] + colon
try:
is_suitable = win32file.GetDriveType(root) not in (win32file.DRIVE_REMOTE, win32file.DRIVE_CDROM)
# See https://msdn.microsoft.com/en-us/library/windows/desktop/aa364993(v=vs.85).aspx
supports_hard_links = win32api.GetVolumeInformation(root + os.sep)[3] & 0x00400000
except Exception:
supports_hard_links = is_suitable = False
hardlink = windows_fast_hardlink if is_suitable and supports_hard_links and src[0].lower() == dest[0].lower() else None
else:
hardlink = os.link
return hardlink
def copyfile_using_links(path, dest, dest_is_dir=True, filecopyfunc=copyfile):
path, dest = os.path.abspath(path), os.path.abspath(dest)
if dest_is_dir:
dest = os.path.join(dest, os.path.basename(path))
hardlink = get_hardlink_function(path, dest)
try:
hardlink(path, dest)
except Exception:
filecopyfunc(path, dest)
def copytree_using_links(path, dest, dest_is_parent=True, filecopyfunc=copyfile):
path, dest = os.path.abspath(path), os.path.abspath(dest)
if dest_is_parent:
dest = os.path.join(dest, os.path.basename(path))
hardlink = get_hardlink_function(path, dest)
try:
os.makedirs(dest)
except EnvironmentError as e:
if e.errno != errno.EEXIST:
raise
for dirpath, dirnames, filenames in os.walk(path):
base = os.path.relpath(dirpath, path)
dest_base = os.path.join(dest, base)
for dname in dirnames:
try:
os.mkdir(os.path.join(dest_base, dname))
except EnvironmentError as e:
if e.errno != errno.EEXIST:
raise
for fname in filenames:
src, df = os.path.join(dirpath, fname), os.path.join(dest_base, fname)
try:
hardlink(src, df)
except Exception:
filecopyfunc(src, df)
if not ispy3 and not iswindows:
# On POSIX in python2 if you pass a unicode path to rmtree
# it tries to decode all filenames it encounters while walking
# the tree which leads to unicode errors on Linux where there
# can be non-decodeable filenames.
def rmtree(x, **kw):
if not isinstance(x, bytes):
x = x.encode('utf-8')
return shutil.rmtree(x, **kw)
else:
rmtree = shutil.rmtree

View File

@@ -0,0 +1,7 @@
#!/usr/bin/env python2
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
from __future__ import absolute_import, division, print_function, unicode_literals
__license__ = 'GPL v3'
__copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>'
__docformat__ = 'restructuredtext en'

View File

@@ -0,0 +1,122 @@
#!/usr/bin/env python2
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:fdm=marker:ai
from __future__ import absolute_import, division, print_function, unicode_literals
__license__ = 'GPL v3'
__copyright__ = '2012, Kovid Goyal <kovid at kovidgoyal.net>'
__docformat__ = 'restructuredtext en'
from io import BytesIO
from struct import calcsize, unpack, unpack_from
from collections import namedtuple
from calibre.utils.fonts.utils import get_font_names2, get_font_characteristics
from polyglot.builtins import range, unicode_type
class UnsupportedFont(ValueError):
pass
FontCharacteristics = namedtuple('FontCharacteristics',
'weight, is_italic, is_bold, is_regular, fs_type, panose, width, is_oblique, is_wws, os2_version')
FontNames = namedtuple('FontNames',
'family_name, subfamily_name, full_name, preferred_family_name, preferred_subfamily_name, wws_family_name, wws_subfamily_name')
class FontMetadata(object):
def __init__(self, bytes_or_stream):
if not hasattr(bytes_or_stream, 'read'):
bytes_or_stream = BytesIO(bytes_or_stream)
f = bytes_or_stream
f.seek(0)
header = f.read(4)
if header not in {b'\x00\x01\x00\x00', b'OTTO'}:
raise UnsupportedFont('Not a supported sfnt variant')
self.is_otf = header == b'OTTO'
self.read_table_metadata(f)
self.read_names(f)
self.read_characteristics(f)
f.seek(0)
self.font_family = self.names.family_name
wt = self.characteristics.weight
if wt == 400:
wt = 'normal'
elif wt == 700:
wt = 'bold'
else:
wt = unicode_type(wt)
self.font_weight = wt
self.font_stretch = ('ultra-condensed', 'extra-condensed',
'condensed', 'semi-condensed', 'normal', 'semi-expanded',
'expanded', 'extra-expanded', 'ultra-expanded')[
self.characteristics.width-1]
if self.characteristics.is_oblique:
self.font_style = 'oblique'
elif self.characteristics.is_italic:
self.font_style = 'italic'
else:
self.font_style = 'normal'
def read_table_metadata(self, f):
f.seek(4)
num_tables = unpack(b'>H', f.read(2))[0]
# Start of table record entries
f.seek(4 + 4*2)
table_record = b'>4s3L'
sz = calcsize(table_record)
self.tables = {}
block = f.read(sz * num_tables)
for i in range(num_tables):
table_tag, table_checksum, table_offset, table_length = \
unpack_from(table_record, block, i*sz)
self.tables[table_tag.lower()] = (table_offset, table_length,
table_checksum)
def read_names(self, f):
if b'name' not in self.tables:
raise UnsupportedFont('This font has no name table')
toff, tlen = self.tables[b'name'][:2]
f.seek(toff)
table = f.read(tlen)
if len(table) != tlen:
raise UnsupportedFont('This font has a name table of incorrect length')
vals = get_font_names2(table, raw_is_table=True)
self.names = FontNames(*vals)
def read_characteristics(self, f):
if b'os/2' not in self.tables:
raise UnsupportedFont('This font has no OS/2 table')
toff, tlen = self.tables[b'os/2'][:2]
f.seek(toff)
table = f.read(tlen)
if len(table) != tlen:
raise UnsupportedFont('This font has an OS/2 table of incorrect length')
vals = get_font_characteristics(table, raw_is_table=True)
self.characteristics = FontCharacteristics(*vals)
def to_dict(self):
ans = {
'is_otf':self.is_otf,
'font-family':self.font_family,
'font-weight':self.font_weight,
'font-style':self.font_style,
'font-stretch':self.font_stretch
}
for f in self.names._fields:
ans[f] = getattr(self.names, f)
for f in self.characteristics._fields:
ans[f] = getattr(self.characteristics, f)
return ans
if __name__ == '__main__':
import sys
with open(sys.argv[-1], 'rb') as f:
fm = FontMetadata(f)
import pprint
pprint.pprint(fm.to_dict())

View File

@@ -0,0 +1,412 @@
#!/usr/bin/env python2
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:fdm=marker:ai
from __future__ import absolute_import, division, print_function, unicode_literals
__license__ = 'GPL v3'
__copyright__ = '2012, Kovid Goyal <kovid at kovidgoyal.net>'
__docformat__ = 'restructuredtext en'
import os
from collections import defaultdict
from threading import Thread
from calibre import walk, prints, as_unicode
from calibre.constants import (config_dir, iswindows, isosx, plugins, DEBUG,
isworker, filesystem_encoding)
from calibre.utils.fonts.metadata import FontMetadata, UnsupportedFont
from calibre.utils.icu import sort_key
from polyglot.builtins import itervalues, unicode_type, filter
class NoFonts(ValueError):
pass
# Font dirs {{{
def default_font_dirs():
return [
'/opt/share/fonts',
'/usr/share/fonts',
'/usr/local/share/fonts',
os.path.expanduser('~/.local/share/fonts'),
os.path.expanduser('~/.fonts')
]
def fc_list():
import ctypes
from ctypes.util import find_library
lib = find_library('fontconfig')
if lib is None:
return default_font_dirs()
try:
lib = ctypes.CDLL(lib)
except:
return default_font_dirs()
prototype = ctypes.CFUNCTYPE(ctypes.c_void_p, ctypes.c_void_p)
try:
get_font_dirs = prototype(('FcConfigGetFontDirs', lib))
except (AttributeError):
return default_font_dirs()
prototype = ctypes.CFUNCTYPE(ctypes.c_char_p, ctypes.c_void_p)
try:
next_dir = prototype(('FcStrListNext', lib))
except (AttributeError):
return default_font_dirs()
prototype = ctypes.CFUNCTYPE(None, ctypes.c_void_p)
try:
end = prototype(('FcStrListDone', lib))
except (AttributeError):
return default_font_dirs()
str_list = get_font_dirs(ctypes.c_void_p())
if not str_list:
return default_font_dirs()
ans = []
while True:
d = next_dir(str_list)
if not d:
break
if d:
try:
ans.append(d.decode(filesystem_encoding))
except ValueError:
prints('Ignoring undecodeable font path: %r' % d)
continue
end(str_list)
if len(ans) < 3:
return default_font_dirs()
parents, visited = [], set()
for f in ans:
path = os.path.normpath(os.path.abspath(os.path.realpath(f)))
if path == '/':
continue
head, tail = os.path.split(path)
while head and tail:
if head in visited:
break
head, tail = os.path.split(head)
else:
parents.append(path)
visited.add(path)
return parents
def font_dirs():
if iswindows:
winutil, err = plugins['winutil']
if err:
raise RuntimeError('Failed to load winutil: %s'%err)
try:
return [winutil.special_folder_path(winutil.CSIDL_FONTS)]
except ValueError:
return [r'C:\Windows\Fonts']
if isosx:
return [
'/Library/Fonts',
'/System/Library/Fonts',
'/usr/share/fonts',
'/var/root/Library/Fonts',
os.path.expanduser('~/.fonts'),
os.path.expanduser('~/Library/Fonts'),
]
return fc_list()
# }}}
# Build font family maps {{{
def font_priority(font):
'''
Try to ensure that the "Regular" face is the first font for a given
family.
'''
style_normal = font['font-style'] == 'normal'
width_normal = font['font-stretch'] == 'normal'
weight_normal = font['font-weight'] == 'normal'
num_normal = sum(filter(None, (style_normal, width_normal,
weight_normal)))
subfamily_name = (font['wws_subfamily_name'] or
font['preferred_subfamily_name'] or font['subfamily_name'])
if num_normal == 3 and subfamily_name == 'Regular':
return 0
if num_normal == 3:
return 1
if subfamily_name == 'Regular':
return 2
return 3 + (3 - num_normal)
def path_significance(path, folders):
path = os.path.normcase(os.path.abspath(path))
for i, q in enumerate(folders):
if path.startswith(q):
return i
return -1
def build_families(cached_fonts, folders, family_attr='font-family'):
families = defaultdict(list)
for f in itervalues(cached_fonts):
if not f:
continue
lf = icu_lower(f.get(family_attr) or '')
if lf:
families[lf].append(f)
for fonts in itervalues(families):
# Look for duplicate font files and choose the copy that is from a
# more significant font directory (prefer user directories over
# system directories).
fmap = {}
remove = []
for f in fonts:
fingerprint = (icu_lower(f['font-family']), f['font-weight'],
f['font-stretch'], f['font-style'])
if fingerprint in fmap:
opath = fmap[fingerprint]['path']
npath = f['path']
if path_significance(npath, folders) >= path_significance(opath, folders):
remove.append(fmap[fingerprint])
fmap[fingerprint] = f
else:
remove.append(f)
else:
fmap[fingerprint] = f
for font in remove:
fonts.remove(font)
fonts.sort(key=font_priority)
font_family_map = dict.copy(families)
font_families = tuple(sorted((f[0]['font-family'] for f in
itervalues(font_family_map)), key=sort_key))
return font_family_map, font_families
# }}}
class FontScanner(Thread):
CACHE_VERSION = 2
def __init__(self, folders=[], allowed_extensions={'ttf', 'otf'}):
Thread.__init__(self)
self.folders = folders + font_dirs() + [os.path.join(config_dir, 'fonts'),
P('fonts/liberation')]
self.folders = [os.path.normcase(os.path.abspath(f)) for f in
self.folders]
self.font_families = ()
self.allowed_extensions = allowed_extensions
# API {{{
def find_font_families(self):
self.join()
return self.font_families
def fonts_for_family(self, family):
'''
Return a list of the faces belonging to the specified family. The first
face is the "Regular" face of family. Each face is a dictionary with
many keys, the most important of which are: path, font-family,
font-weight, font-style, font-stretch. The font-* properties follow the
CSS 3 Fonts specification.
'''
self.join()
try:
return self.font_family_map[icu_lower(family)]
except KeyError:
raise NoFonts('No fonts found for the family: %r'%family)
def legacy_fonts_for_family(self, family):
'''
Return a simple set of regular, bold, italic and bold-italic faces for
the specified family. Returns a dictionary with each element being a
2-tuple of (path to font, full font name) and the keys being: normal,
bold, italic, bi.
'''
ans = {}
try:
faces = self.fonts_for_family(family)
except NoFonts:
return ans
for i, face in enumerate(faces):
if i == 0:
key = 'normal'
elif face['font-style'] in {'italic', 'oblique'}:
key = 'bi' if face['font-weight'] == 'bold' else 'italic'
elif face['font-weight'] == 'bold':
key = 'bold'
else:
continue
ans[key] = (face['path'], face['full_name'])
return ans
def get_font_data(self, font_or_path):
path = font_or_path
if isinstance(font_or_path, dict):
path = font_or_path['path']
with lopen(path, 'rb') as f:
return f.read()
def find_font_for_text(self, text, allowed_families={'serif', 'sans-serif'},
preferred_families=('serif', 'sans-serif', 'monospace', 'cursive', 'fantasy')):
'''
Find a font on the system capable of rendering the given text.
Returns a font family (as given by fonts_for_family()) that has a
"normal" font and that can render the supplied text. If no such font
exists, returns None.
:return: (family name, faces) or None, None
'''
from calibre.utils.fonts.utils import (supports_text,
panose_to_css_generic_family, get_printable_characters)
if not isinstance(text, unicode_type):
raise TypeError(u'%r is not unicode'%text)
text = get_printable_characters(text)
found = {}
def filter_faces(font):
try:
raw = self.get_font_data(font)
return supports_text(raw, text)
except:
pass
return False
for family in self.find_font_families():
faces = list(filter(filter_faces, self.fonts_for_family(family)))
if not faces:
continue
generic_family = panose_to_css_generic_family(faces[0]['panose'])
if generic_family in allowed_families or generic_family == preferred_families[0]:
return (family, faces)
elif generic_family not in found:
found[generic_family] = (family, faces)
for f in preferred_families:
if f in found:
return found[f]
return None, None
# }}}
def reload_cache(self):
if not hasattr(self, 'cache'):
from calibre.utils.config import JSONConfig
self.cache = JSONConfig('fonts/scanner_cache')
else:
self.cache.refresh()
if self.cache.get('version', None) != self.CACHE_VERSION:
self.cache.clear()
self.cached_fonts = self.cache.get('fonts', {})
def run(self):
self.do_scan()
def do_scan(self):
self.reload_cache()
if isworker:
# Dont scan font files in worker processes, use whatever is
# cached. Font files typically dont change frequently enough to
# justify a rescan in a worker process.
self.build_families()
return
cached_fonts = self.cached_fonts.copy()
self.cached_fonts.clear()
for folder in self.folders:
if not os.path.isdir(folder):
continue
try:
files = tuple(walk(folder))
except EnvironmentError as e:
if DEBUG:
prints('Failed to walk font folder:', folder,
as_unicode(e))
continue
for candidate in files:
if (candidate.rpartition('.')[-1].lower() not in self.allowed_extensions or not os.path.isfile(candidate)):
continue
candidate = os.path.normcase(os.path.abspath(candidate))
try:
s = os.stat(candidate)
except EnvironmentError:
continue
fileid = '{0}||{1}:{2}'.format(candidate, s.st_size, s.st_mtime)
if fileid in cached_fonts:
# Use previously cached metadata, since the file size and
# last modified timestamp have not changed.
self.cached_fonts[fileid] = cached_fonts[fileid]
continue
try:
self.read_font_metadata(candidate, fileid)
except Exception as e:
if DEBUG:
prints('Failed to read metadata from font file:',
candidate, as_unicode(e))
continue
if frozenset(cached_fonts) != frozenset(self.cached_fonts):
# Write out the cache only if some font files have changed
self.write_cache()
self.build_families()
def build_families(self):
self.font_family_map, self.font_families = build_families(self.cached_fonts, self.folders)
def write_cache(self):
with self.cache:
self.cache['version'] = self.CACHE_VERSION
self.cache['fonts'] = self.cached_fonts
def force_rescan(self):
self.cached_fonts = {}
self.write_cache()
def read_font_metadata(self, path, fileid):
with lopen(path, 'rb') as f:
try:
fm = FontMetadata(f)
except UnsupportedFont:
self.cached_fonts[fileid] = {}
else:
data = fm.to_dict()
data['path'] = path
self.cached_fonts[fileid] = data
def dump_fonts(self):
self.join()
for family in self.font_families:
prints(family)
for font in self.fonts_for_family(family):
prints('\t%s: %s'%(font['full_name'], font['path']))
prints(end='\t')
for key in ('font-stretch', 'font-weight', 'font-style'):
prints('%s: %s'%(key, font[key]), end=' ')
prints()
prints('\tSub-family:', font['wws_subfamily_name'] or
font['preferred_subfamily_name'] or
font['subfamily_name'])
prints()
prints()
font_scanner = FontScanner()
font_scanner.start()
def force_rescan():
font_scanner.join()
font_scanner.force_rescan()
font_scanner.run()
if __name__ == '__main__':
font_scanner.dump_fonts()

View File

@@ -0,0 +1,503 @@
#!/usr/bin/env python2
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:fdm=marker:ai
from __future__ import absolute_import, division, print_function, unicode_literals
__license__ = 'GPL v3'
__copyright__ = '2012, Kovid Goyal <kovid at kovidgoyal.net>'
__docformat__ = 'restructuredtext en'
import struct
from io import BytesIO
from collections import defaultdict
from polyglot.builtins import iteritems, itervalues, unicode_type, range, as_bytes
class UnsupportedFont(ValueError):
pass
def get_printable_characters(text):
import unicodedata
return u''.join(x for x in unicodedata.normalize('NFC', text)
if unicodedata.category(x)[0] not in {'C', 'Z', 'M'})
def is_truetype_font(raw):
sfnt_version = raw[:4]
return (sfnt_version in {b'\x00\x01\x00\x00', b'OTTO'}, sfnt_version)
def get_tables(raw):
num_tables = struct.unpack_from(b'>H', raw, 4)[0]
offset = 4*3 # start of the table record entries
for i in range(num_tables):
table_tag, table_checksum, table_offset, table_length = struct.unpack_from(
b'>4s3L', raw, offset)
yield (table_tag, raw[table_offset:table_offset+table_length], offset,
table_offset, table_checksum)
offset += 4*4
def get_table(raw, name):
''' Get the raw table bytes for the specified table in the font '''
name = as_bytes(name.lower())
for table_tag, table, table_index, table_offset, table_checksum in get_tables(raw):
if table_tag.lower() == name:
return table, table_index, table_offset, table_checksum
return None, None, None, None
def get_font_characteristics(raw, raw_is_table=False, return_all=False):
'''
Return (weight, is_italic, is_bold, is_regular, fs_type, panose, width,
is_oblique, is_wws). These
values are taken from the OS/2 table of the font. See
http://www.microsoft.com/typography/otspec/os2.htm for details
'''
if raw_is_table:
os2_table = raw
else:
os2_table = get_table(raw, 'os/2')[0]
if os2_table is None:
raise UnsupportedFont('Not a supported font, has no OS/2 table')
common_fields = b'>Hh3H11h'
(version, char_width, weight, width, fs_type, subscript_x_size,
subscript_y_size, subscript_x_offset, subscript_y_offset,
superscript_x_size, superscript_y_size, superscript_x_offset,
superscript_y_offset, strikeout_size, strikeout_position,
family_class) = struct.unpack_from(common_fields, os2_table)
offset = struct.calcsize(common_fields)
panose = struct.unpack_from(b'>10B', os2_table, offset)
offset += 10
(range1, range2, range3, range4) = struct.unpack_from(b'>4L', os2_table, offset)
offset += struct.calcsize(b'>4L')
vendor_id = os2_table[offset:offset+4]
vendor_id
offset += 4
selection, = struct.unpack_from(b'>H', os2_table, offset)
is_italic = (selection & (1 << 0)) != 0
is_bold = (selection & (1 << 5)) != 0
is_regular = (selection & (1 << 6)) != 0
is_wws = (selection & (1 << 8)) != 0
is_oblique = (selection & (1 << 9)) != 0
if return_all:
return (version, char_width, weight, width, fs_type, subscript_x_size,
subscript_y_size, subscript_x_offset, subscript_y_offset,
superscript_x_size, superscript_y_size, superscript_x_offset,
superscript_y_offset, strikeout_size, strikeout_position,
family_class, panose, selection, is_italic, is_bold, is_regular)
return weight, is_italic, is_bold, is_regular, fs_type, panose, width, is_oblique, is_wws, version
def panose_to_css_generic_family(panose):
proportion = panose[3]
if proportion == 9:
return 'monospace'
family_type = panose[0]
if family_type == 3:
return 'cursive'
if family_type == 4:
return 'fantasy'
serif_style = panose[1]
if serif_style in (11, 12, 13):
return 'sans-serif'
return 'serif'
def decode_name_record(recs):
'''
Get the English names of this font. See
http://www.microsoft.com/typography/otspec/name.htm for details.
'''
if not recs:
return None
unicode_names = {}
windows_names = {}
mac_names = {}
for platform_id, encoding_id, language_id, src in recs:
if language_id > 0x8000:
continue
if platform_id == 0:
if encoding_id < 4:
try:
unicode_names[language_id] = src.decode('utf-16-be')
except ValueError:
continue
elif platform_id == 1:
try:
mac_names[language_id] = src.decode('utf-8')
except ValueError:
continue
elif platform_id == 2:
codec = {0:'ascii', 1:'utf-16-be', 2:'iso-8859-1'}.get(encoding_id,
None)
if codec is None:
continue
try:
unicode_names[language_id] = src.decode(codec)
except ValueError:
continue
elif platform_id == 3:
codec = {1:16, 10:32}.get(encoding_id, None)
if codec is None:
continue
try:
windows_names[language_id] = src.decode('utf-%d-be'%codec)
except ValueError:
continue
# First try the windows names
# First look for the US English name
if 1033 in windows_names:
return windows_names[1033]
# Look for some other english name variant
for lang in (3081, 10249, 4105, 9225, 16393, 6153, 8201, 17417, 5129,
13321, 18441, 7177, 11273, 2057, 12297):
if lang in windows_names:
return windows_names[lang]
# Look for Mac name
if 0 in mac_names:
return mac_names[0]
# Use unicode names
for val in itervalues(unicode_names):
return val
return None
def _get_font_names(raw, raw_is_table=False):
if raw_is_table:
table = raw
else:
table = get_table(raw, 'name')[0]
if table is None:
raise UnsupportedFont('Not a supported font, has no name table')
table_type, count, string_offset = struct.unpack_from(b'>3H', table)
records = defaultdict(list)
for i in range(count):
try:
platform_id, encoding_id, language_id, name_id, length, offset = \
struct.unpack_from(b'>6H', table, 6+i*12)
except struct.error:
break
offset += string_offset
src = table[offset:offset+length]
records[name_id].append((platform_id, encoding_id, language_id,
src))
return records
def get_font_names(raw, raw_is_table=False):
records = _get_font_names(raw, raw_is_table)
family_name = decode_name_record(records[1])
subfamily_name = decode_name_record(records[2])
full_name = decode_name_record(records[4])
return family_name, subfamily_name, full_name
def get_font_names2(raw, raw_is_table=False):
records = _get_font_names(raw, raw_is_table)
family_name = decode_name_record(records[1])
subfamily_name = decode_name_record(records[2])
full_name = decode_name_record(records[4])
preferred_family_name = decode_name_record(records[16])
preferred_subfamily_name = decode_name_record(records[17])
wws_family_name = decode_name_record(records[21])
wws_subfamily_name = decode_name_record(records[22])
return (family_name, subfamily_name, full_name, preferred_family_name,
preferred_subfamily_name, wws_family_name, wws_subfamily_name)
def get_all_font_names(raw, raw_is_table=False):
records = _get_font_names(raw, raw_is_table)
ans = {}
for name, num in iteritems({'family_name':1, 'subfamily_name':2, 'full_name':4,
'preferred_family_name':16, 'preferred_subfamily_name':17,
'wws_family_name':21, 'wws_subfamily_name':22}):
try:
ans[name] = decode_name_record(records[num])
except (IndexError, KeyError, ValueError):
continue
if not ans[name]:
del ans[name]
for platform_id, encoding_id, language_id, src in records[6]:
if (platform_id, encoding_id, language_id) == (1, 0, 0):
try:
ans['postscript_name'] = src.decode('utf-8')
break
except ValueError:
continue
elif (platform_id, encoding_id, language_id) == (3, 1, 1033):
try:
ans['postscript_name'] = src.decode('utf-16-be')
break
except ValueError:
continue
return ans
def checksum_of_block(raw):
extra = 4 - len(raw)%4
raw += b'\0'*extra
num = len(raw)//4
return sum(struct.unpack(b'>%dI'%num, raw)) % (1<<32)
def verify_checksums(raw):
head_table = None
for table_tag, table, table_index, table_offset, table_checksum in get_tables(raw):
if table_tag.lower() == b'head':
version, fontrev, checksum_adj = struct.unpack_from(b'>ffL', table)
head_table = table
offset = table_offset
checksum = table_checksum
elif checksum_of_block(table) != table_checksum:
raise ValueError('The %r table has an incorrect checksum'%table_tag)
if head_table is not None:
table = head_table
table = table[:8] + struct.pack(b'>I', 0) + table[12:]
raw = raw[:offset] + table + raw[offset+len(table):]
# Check the checksum of the head table
if checksum_of_block(table) != checksum:
raise ValueError('Checksum of head table not correct')
# Check the checksum of the entire font
checksum = checksum_of_block(raw)
q = (0xB1B0AFBA - checksum) & 0xffffffff
if q != checksum_adj:
raise ValueError('Checksum of entire font incorrect')
def set_checksum_adjustment(f):
offset = get_table(f.getvalue(), 'head')[2]
offset += 8
f.seek(offset)
f.write(struct.pack(b'>I', 0))
checksum = checksum_of_block(f.getvalue())
q = (0xB1B0AFBA - checksum) & 0xffffffff
f.seek(offset)
f.write(struct.pack(b'>I', q))
def set_table_checksum(f, name):
table, table_index, table_offset, table_checksum = get_table(f.getvalue(), name)
checksum = checksum_of_block(table)
if checksum != table_checksum:
f.seek(table_index + 4)
f.write(struct.pack(b'>I', checksum))
def remove_embed_restriction(raw):
ok, sig = is_truetype_font(raw)
if not ok:
raise UnsupportedFont('Not a supported font, sfnt_version: %r'%sig)
table, table_index, table_offset = get_table(raw, 'os/2')[:3]
if table is None:
raise UnsupportedFont('Not a supported font, has no OS/2 table')
fs_type_offset = struct.calcsize(b'>HhHH')
fs_type = struct.unpack_from(b'>H', table, fs_type_offset)[0]
if fs_type == 0:
return raw
f = BytesIO(raw)
f.seek(fs_type_offset + table_offset)
f.write(struct.pack(b'>H', 0))
set_table_checksum(f, 'os/2')
set_checksum_adjustment(f)
raw = f.getvalue()
verify_checksums(raw)
return raw
def is_font_embeddable(raw):
# https://www.microsoft.com/typography/otspec/os2.htm#fst
ok, sig = is_truetype_font(raw)
if not ok:
raise UnsupportedFont('Not a supported font, sfnt_version: %r'%sig)
table, table_index, table_offset = get_table(raw, 'os/2')[:3]
if table is None:
raise UnsupportedFont('Not a supported font, has no OS/2 table')
fs_type_offset = struct.calcsize(b'>HhHH')
fs_type = struct.unpack_from(b'>H', table, fs_type_offset)[0]
if fs_type == 0 or fs_type & 0x8:
return True, fs_type
if fs_type & 1:
return False, fs_type
if fs_type & 0x200:
return False, fs_type
return True, fs_type
def read_bmp_prefix(table, bmp):
length, language, segcount = struct.unpack_from(b'>3H', table, bmp+2)
array_len = segcount //2
offset = bmp + 7*2
array_sz = 2*array_len
array = b'>%dH'%array_len
end_count = struct.unpack_from(array, table, offset)
offset += array_sz + 2
start_count = struct.unpack_from(array, table, offset)
offset += array_sz
id_delta = struct.unpack_from(array.replace(b'H', b'h'), table, offset)
offset += array_sz
range_offset = struct.unpack_from(array, table, offset)
if length + bmp < offset + array_sz:
raise ValueError('cmap subtable length is too small')
glyph_id_len = (length + bmp - (offset + array_sz))//2
glyph_id_map = struct.unpack_from(b'>%dH'%glyph_id_len, table, offset +
array_sz)
return (start_count, end_count, range_offset, id_delta, glyph_id_len,
glyph_id_map, array_len)
def get_bmp_glyph_ids(table, bmp, codes):
(start_count, end_count, range_offset, id_delta, glyph_id_len,
glyph_id_map, array_len) = read_bmp_prefix(table, bmp)
for code in codes:
found = False
for i, ec in enumerate(end_count):
if ec >= code:
sc = start_count[i]
if sc <= code:
found = True
ro = range_offset[i]
if ro == 0:
glyph_id = id_delta[i] + code
else:
idx = ro//2 + (code - sc) + i - array_len
glyph_id = glyph_id_map[idx]
if glyph_id != 0:
glyph_id += id_delta[i]
yield glyph_id % 0x10000
break
if not found:
yield 0
def get_glyph_ids(raw, text, raw_is_table=False):
if not isinstance(text, unicode_type):
raise TypeError('%r is not a unicode object'%text)
if raw_is_table:
table = raw
else:
table = get_table(raw, 'cmap')[0]
if table is None:
raise UnsupportedFont('Not a supported font, has no cmap table')
version, num_tables = struct.unpack_from(b'>HH', table)
bmp_table = None
for i in range(num_tables):
platform_id, encoding_id, offset = struct.unpack_from(b'>HHL', table,
4 + (i*8))
if platform_id == 3 and encoding_id == 1:
table_format = struct.unpack_from(b'>H', table, offset)[0]
if table_format == 4:
bmp_table = offset
break
if bmp_table is None:
raise UnsupportedFont('Not a supported font, has no format 4 cmap table')
for glyph_id in get_bmp_glyph_ids(table, bmp_table, map(ord, text)):
yield glyph_id
def supports_text(raw, text, has_only_printable_chars=False):
if not isinstance(text, unicode_type):
raise TypeError('%r is not a unicode object'%text)
if not has_only_printable_chars:
text = get_printable_characters(text)
try:
for glyph_id in get_glyph_ids(raw, text):
if glyph_id == 0:
return False
except:
return False
return True
def get_font_for_text(text, candidate_font_data=None):
ok = False
if candidate_font_data is not None:
ok = supports_text(candidate_font_data, text)
if not ok:
from calibre.utils.fonts.scanner import font_scanner
family, faces = font_scanner.find_font_for_text(text)
if faces:
with lopen(faces[0]['path'], 'rb') as f:
candidate_font_data = f.read()
return candidate_font_data
def test_glyph_ids():
from calibre.utils.fonts.free_type import FreeType
data = P('fonts/liberation/LiberationSerif-Regular.ttf', data=True)
ft = FreeType()
font = ft.load_font(data)
text = u'诶йab'
ft_glyphs = tuple(font.glyph_ids(text))
glyphs = tuple(get_glyph_ids(data, text))
if ft_glyphs != glyphs:
raise Exception('My code and FreeType differ on the glyph ids')
def test_supports_text():
data = P('fonts/calibreSymbols.otf', data=True)
if not supports_text(data, '.★½'):
raise RuntimeError('Incorrectly returning that text is not supported')
if supports_text(data, 'abc'):
raise RuntimeError('Incorrectly claiming that text is supported')
def test_find_font():
from calibre.utils.fonts.scanner import font_scanner
abcd = '诶比西迪'
family = font_scanner.find_font_for_text(abcd)[0]
print('Family for Chinese text:', family)
family = font_scanner.find_font_for_text(abcd)[0]
abcd = 'لوحة المفاتيح العربية'
print('Family for Arabic text:', family)
def test():
test_glyph_ids()
test_supports_text()
test_find_font()
def main():
import sys, os
for arg in sys.argv[1:]:
print(os.path.basename(arg))
with open(arg, 'rb') as f:
raw = f.read()
print(get_font_names(raw))
characs = get_font_characteristics(raw)
print(characs)
print(panose_to_css_generic_family(characs[5]))
verify_checksums(raw)
remove_embed_restriction(raw)
if __name__ == '__main__':
main()

View File

@@ -0,0 +1,416 @@
from __future__ import absolute_import, division, print_function, unicode_literals
'''
Created on 23 Sep 2010
@author: charles
'''
__license__ = 'GPL v3'
__copyright__ = '2010, Kovid Goyal <kovid@kovidgoyal.net>'
__docformat__ = 'restructuredtext en'
import re, string, traceback, numbers
from calibre import prints
from calibre.constants import DEBUG
from calibre.utils.formatter_functions import formatter_functions
from polyglot.builtins import unicode_type, error_message
class _Parser(object):
LEX_OP = 1
LEX_ID = 2
LEX_STR = 3
LEX_NUM = 4
LEX_EOF = 5
LEX_CONSTANTS = frozenset((LEX_STR, LEX_NUM))
def __init__(self, val, prog, funcs, parent):
self.lex_pos = 0
self.prog = prog[0]
self.prog_len = len(self.prog)
if prog[1] != '':
self.error(_('failed to scan program. Invalid input {0}').format(prog[1]))
self.parent = parent
self.parent_kwargs = parent.kwargs
self.parent_book = parent.book
self.locals = {'$':val}
self.funcs = funcs
def error(self, message):
m = 'Formatter: ' + message + _(' near ')
if self.lex_pos > 0:
m = '{0} {1}'.format(m, self.prog[self.lex_pos-1][1])
elif self.lex_pos < self.prog_len:
m = '{0} {1}'.format(m, self.prog[self.lex_pos+1][1])
else:
m = '{0} {1}'.format(m, _('end of program'))
raise ValueError(m)
def token(self):
if self.lex_pos >= self.prog_len:
return None
token = self.prog[self.lex_pos][1]
self.lex_pos += 1
return token
def consume(self):
self.lex_pos += 1
def token_op_is_a_equals(self):
if self.lex_pos >= self.prog_len:
return False
token = self.prog[self.lex_pos]
return token[0] == self.LEX_OP and token[1] == '='
def token_op_is_a_lparen(self):
if self.lex_pos >= self.prog_len:
return False
token = self.prog[self.lex_pos]
return token[0] == self.LEX_OP and token[1] == '('
def token_op_is_a_rparen(self):
if self.lex_pos >= self.prog_len:
return False
token = self.prog[self.lex_pos]
return token[0] == self.LEX_OP and token[1] == ')'
def token_op_is_a_comma(self):
if self.lex_pos >= self.prog_len:
return False
token = self.prog[self.lex_pos]
return token[0] == self.LEX_OP and token[1] == ','
def token_op_is_a_semicolon(self):
if self.lex_pos >= self.prog_len:
return False
token = self.prog[self.lex_pos]
return token[0] == self.LEX_OP and token[1] == ';'
def token_is_id(self):
if self.lex_pos >= self.prog_len:
return False
return self.prog[self.lex_pos][0] == self.LEX_ID
def token_is_constant(self):
if self.lex_pos >= self.prog_len:
return False
return self.prog[self.lex_pos][0] in self.LEX_CONSTANTS
def token_is_eof(self):
if self.lex_pos >= self.prog_len:
return True
token = self.prog[self.lex_pos]
return token[0] == self.LEX_EOF
def program(self):
val = self.statement()
if not self.token_is_eof():
self.error(_('syntax error - program ends before EOF'))
return val
def statement(self):
while True:
val = self.expr()
if self.token_is_eof():
return val
if not self.token_op_is_a_semicolon():
return val
self.consume()
if self.token_is_eof():
return val
def expr(self):
if self.token_is_id():
# We have an identifier. Determine if it is a function
id = self.token()
if not self.token_op_is_a_lparen():
if self.token_op_is_a_equals():
# classic assignment statement
self.consume()
cls = self.funcs['assign']
return cls.eval_(self.parent, self.parent_kwargs,
self.parent_book, self.locals, id, self.expr())
val = self.locals.get(id, None)
if val is None:
self.error(_('Unknown identifier ') + id)
return val
# We have a function.
# Check if it is a known one. We do this here so error reporting is
# better, as it can identify the tokens near the problem.
id = id.strip()
if id not in self.funcs:
self.error(_('unknown function {0}').format(id))
# Eat the paren
self.consume()
args = list()
while not self.token_op_is_a_rparen():
if id == 'assign' and len(args) == 0:
# Must handle the lvalue semantics of the assign function.
# The first argument is the name of the destination, not
# the value.
if not self.token_is_id():
self.error('assign requires the first parameter be an id')
args.append(self.token())
else:
# evaluate the argument (recursive call)
args.append(self.statement())
if not self.token_op_is_a_comma():
break
self.consume()
if self.token() != ')':
self.error(_('missing closing parenthesis'))
# Evaluate the function
cls = self.funcs[id]
if cls.arg_count != -1 and len(args) != cls.arg_count:
self.error('incorrect number of arguments for function {}'.format(id))
return cls.eval_(self.parent, self.parent_kwargs,
self.parent_book, self.locals, *args)
elif self.token_is_constant():
# String or number
return self.token()
else:
self.error(_('expression is not function or constant'))
class TemplateFormatter(string.Formatter):
'''
Provides a format function that substitutes '' for any missing value
'''
_validation_string = 'This Is Some Text THAT SHOULD be LONG Enough.%^&*'
# Dict to do recursion detection. It is up to the individual get_value
# method to use it. It is cleared when starting to format a template
composite_values = {}
def __init__(self):
string.Formatter.__init__(self)
self.book = None
self.kwargs = None
self.strip_results = True
self.locals = {}
self.funcs = formatter_functions().get_functions()
def _do_format(self, val, fmt):
if not fmt or not val:
return val
if val == self._validation_string:
val = '0'
typ = fmt[-1]
if typ == 's':
pass
elif 'bcdoxXn'.find(typ) >= 0:
try:
val = int(val)
except Exception:
raise ValueError(
_('format: type {0} requires an integer value, got {1}').format(typ, val))
elif 'eEfFgGn%'.find(typ) >= 0:
try:
val = float(val)
except:
raise ValueError(
_('format: type {0} requires a decimal (float) value, got {1}').format(typ, val))
return unicode_type(('{0:'+fmt+'}').format(val))
def _explode_format_string(self, fmt):
try:
matches = self.format_string_re.match(fmt)
if matches is None or matches.lastindex != 3:
return fmt, '', ''
return matches.groups()
except:
if DEBUG:
traceback.print_exc()
return fmt, '', ''
format_string_re = re.compile(r'^(.*)\|([^\|]*)\|(.*)$', re.DOTALL)
compress_spaces = re.compile(r'\s+')
backslash_comma_to_comma = re.compile(r'\\,')
arg_parser = re.Scanner([
(r',', lambda x,t: ''),
(r'.*?((?<!\\),)', lambda x,t: t[:-1]),
(r'.*?\)', lambda x,t: t[:-1]),
])
# ################# 'Functional' template language ######################
lex_scanner = re.Scanner([
(r'[(),=;]', lambda x,t: (1, t)),
(r'-?[\d\.]+', lambda x,t: (3, t)),
(r'\$', lambda x,t: (2, t)),
(r'\w+', lambda x,t: (2, t)),
(r'".*?((?<!\\)")', lambda x,t: (3, t[1:-1])),
(r'\'.*?((?<!\\)\')', lambda x,t: (3, t[1:-1])),
(r'\n#.*?(?:(?=\n)|$)', None),
(r'\s', None)
], flags=re.DOTALL)
def _eval_program(self, val, prog, column_name):
# keep a cache of the lex'ed program under the theory that re-lexing
# is much more expensive than the cache lookup. This is certainly true
# for more than a few tokens, but it isn't clear for simple programs.
if column_name is not None and self.template_cache is not None:
lprog = self.template_cache.get(column_name, None)
if not lprog:
lprog = self.lex_scanner.scan(prog)
self.template_cache[column_name] = lprog
else:
lprog = self.lex_scanner.scan(prog)
parser = _Parser(val, lprog, self.funcs, self)
return parser.program()
# ################# Override parent classes methods #####################
def get_value(self, key, args, kwargs):
raise Exception('get_value must be implemented in the subclass')
def format_field(self, val, fmt):
# ensure we are dealing with a string.
if isinstance(val, numbers.Number):
if val:
val = unicode_type(val)
else:
val = ''
# Handle conditional text
fmt, prefix, suffix = self._explode_format_string(fmt)
# Handle functions
# First see if we have a functional-style expression
if fmt.startswith('\''):
p = 0
else:
p = fmt.find(':\'')
if p >= 0:
p += 1
if p >= 0 and fmt[-1] == '\'':
val = self._eval_program(val, fmt[p+1:-1], None)
colon = fmt[0:p].find(':')
if colon < 0:
dispfmt = ''
else:
dispfmt = fmt[0:colon]
else:
# check for old-style function references
p = fmt.find('(')
dispfmt = fmt
if p >= 0 and fmt[-1] == ')':
colon = fmt[0:p].find(':')
if colon < 0:
dispfmt = ''
colon = 0
else:
dispfmt = fmt[0:colon]
colon += 1
fname = fmt[colon:p].strip()
if fname in self.funcs:
func = self.funcs[fname]
if func.arg_count == 2:
# only one arg expected. Don't bother to scan. Avoids need
# for escaping characters
args = [fmt[p+1:-1]]
else:
args = self.arg_parser.scan(fmt[p+1:])[0]
args = [self.backslash_comma_to_comma.sub(',', a) for a in args]
if (func.arg_count == 1 and (len(args) != 1 or args[0])) or \
(func.arg_count > 1 and func.arg_count != len(args)+1):
raise ValueError('Incorrect number of arguments for function '+ fmt[0:p])
if func.arg_count == 1:
val = func.eval_(self, self.kwargs, self.book, self.locals, val)
if self.strip_results:
val = val.strip()
else:
val = func.eval_(self, self.kwargs, self.book, self.locals, val, *args)
if self.strip_results:
val = val.strip()
else:
return _('%s: unknown function')%fname
if val:
val = self._do_format(val, dispfmt)
if not val:
return ''
return prefix + val + suffix
def evaluate(self, fmt, args, kwargs):
if fmt.startswith('program:'):
ans = self._eval_program(kwargs.get('$', None), fmt[8:], self.column_name)
else:
ans = self.vformat(fmt, args, kwargs)
if self.strip_results:
return self.compress_spaces.sub(' ', ans).strip()
return ans
# ######### a formatter that throws exceptions ############
def unsafe_format(self, fmt, kwargs, book, strip_results=True):
self.strip_results = strip_results
self.column_name = self.template_cache = None
self.kwargs = kwargs
self.book = book
self.composite_values = {}
self.locals = {}
return self.evaluate(fmt, [], kwargs)
# ######### a formatter guaranteed not to throw an exception ############
def safe_format(self, fmt, kwargs, error_value, book,
column_name=None, template_cache=None,
strip_results=True, template_functions=None):
self.strip_results = strip_results
self.column_name = column_name
self.template_cache = template_cache
self.kwargs = kwargs
self.book = book
if template_functions:
self.funcs = template_functions
else:
self.funcs = formatter_functions().get_functions()
self.composite_values = {}
self.locals = {}
try:
ans = self.evaluate(fmt, [], kwargs)
except Exception as e:
if DEBUG: # and getattr(e, 'is_locking_error', False):
traceback.print_exc()
if column_name:
prints('Error evaluating column named:', column_name)
ans = error_value + ' ' + error_message(e)
return ans
class ValidateFormatter(TemplateFormatter):
'''
Provides a formatter that substitutes the validation string for every value
'''
def get_value(self, key, args, kwargs):
return self._validation_string
def validate(self, x):
from calibre.ebooks.metadata.book.base import Metadata
return self.safe_format(x, {}, 'VALIDATE ERROR', Metadata(''))
validation_formatter = ValidateFormatter()
class EvalFormatter(TemplateFormatter):
'''
A template formatter that uses a simple dict instead of an mi instance
'''
def get_value(self, key, args, kwargs):
if key == '':
return ''
key = key.lower()
return kwargs.get(key, _('No such variable ') + key)
# DEPRECATED. This is not thread safe. Do not use.
eval_formatter = EvalFormatter()

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,42 @@
#!/usr/bin/env python2
# vim:fileencoding=utf-8
# License: GPLv3 Copyright: 2019, Kovid Goyal <kovid at kovidgoyal.net>
from __future__ import absolute_import, division, print_function, unicode_literals
def html2text(html):
from html2text import HTML2Text
import re
if isinstance(html, bytes):
from calibre.ebooks.chardet import xml_to_unicode
html = xml_to_unicode(html, strip_encoding_pats=True, resolve_entities=True)[0]
# replace <u> tags with <span> as <u> becomes emphasis in html2text
html = re.sub(
r'<\s*(?P<solidus>/?)\s*[uU]\b(?P<rest>[^>]*)>',
r'<\g<solidus>span\g<rest>>', html)
h2t = HTML2Text()
h2t.default_image_alt = _('Unnamed image')
h2t.body_width = 0
h2t.single_line_break = True
h2t.emphasis_mark = '*'
return h2t.handle(html)
def find_tests():
import unittest
class TestH2T(unittest.TestCase):
def test_html2text_behavior(self):
for src, expected in {
'<u>test</U>': 'test\n',
'<i>test</i>': '*test*\n',
'<a href="http://else.where/other">other</a>': '[other](http://else.where/other)\n',
'<img src="test.jpeg">': '![Unnamed image](test.jpeg)\n',
'<a href="#t">test</a> <span id="t">dest</span>': 'test dest\n',
'<>a': '<>a\n',
'<p>a<p>b': 'a\nb\n',
}.items():
self.assertEqual(html2text(src), expected)
return unittest.defaultTestLoader.loadTestsFromTestCase(TestH2T)

View File

@@ -0,0 +1,323 @@
#!/usr/bin/env python2
# vim:fileencoding=utf-8
from __future__ import absolute_import, division, print_function, unicode_literals
__license__ = 'GPL v3'
__copyright__ = '2010, Kovid Goyal <kovid@kovidgoyal.net>'
__docformat__ = 'restructuredtext en'
import sys
from polyglot.builtins import filter
is_narrow_build = sys.maxunicode < 0x10ffff
# Setup code {{{
import codecs
from calibre.constants import plugins
from calibre.utils.config_base import tweaks
from polyglot.builtins import unicode_type, cmp
_locale = _collator = _primary_collator = _sort_collator = _numeric_collator = _case_sensitive_collator = None
cmp
_none = u''
_none2 = b''
_cmap = {}
_icu, err = plugins['icu']
if _icu is None:
raise RuntimeError('Failed to load icu with error: %s' % err)
del err
icu_unicode_version = getattr(_icu, 'unicode_version', None)
_nmodes = {m:getattr(_icu, m) for m in ('NFC', 'NFD', 'NFKC', 'NFKD')}
# Ensure that the python internal filesystem and default encodings are not ASCII
def is_ascii(name):
try:
return codecs.lookup(name).name == b'ascii'
except (TypeError, LookupError):
return True
try:
if is_ascii(sys.getdefaultencoding()):
_icu.set_default_encoding(b'utf-8')
except:
import traceback
traceback.print_exc()
try:
if is_ascii(sys.getfilesystemencoding()):
_icu.set_filesystem_encoding(b'utf-8')
except:
import traceback
traceback.print_exc()
del is_ascii
def collator():
global _collator, _locale
if _collator is None:
if _locale is None:
from calibre.utils.localization import get_lang
if tweaks['locale_for_sorting']:
_locale = tweaks['locale_for_sorting']
else:
_locale = get_lang()
try:
_collator = _icu.Collator(_locale)
except Exception as e:
print('Failed to load collator for locale: %r with error %r, using English' % (_locale, e))
_collator = _icu.Collator('en')
return _collator
def change_locale(locale=None):
global _locale, _collator, _primary_collator, _sort_collator, _numeric_collator, _case_sensitive_collator
_collator = _primary_collator = _sort_collator = _numeric_collator = _case_sensitive_collator = None
_locale = locale
def primary_collator():
'Ignores case differences and accented characters'
global _primary_collator
if _primary_collator is None:
_primary_collator = collator().clone()
_primary_collator.strength = _icu.UCOL_PRIMARY
return _primary_collator
def sort_collator():
'Ignores case differences and recognizes numbers in strings (if the tweak is set)'
global _sort_collator
if _sort_collator is None:
_sort_collator = collator().clone()
_sort_collator.strength = _icu.UCOL_SECONDARY
_sort_collator.numeric = tweaks['numeric_collation']
return _sort_collator
def numeric_collator():
'Uses natural sorting for numbers inside strings so something2 will sort before something10'
global _numeric_collator
if _numeric_collator is None:
_numeric_collator = collator().clone()
_numeric_collator.strength = _icu.UCOL_SECONDARY
_numeric_collator.numeric = True
return _numeric_collator
def case_sensitive_collator():
'Always sorts upper case letter before lower case'
global _case_sensitive_collator
if _case_sensitive_collator is None:
_case_sensitive_collator = collator().clone()
_case_sensitive_collator.numeric = sort_collator().numeric
_case_sensitive_collator.upper_first = True
return _case_sensitive_collator
# Templates that will be used to generate various concrete
# function implementations based on different collators, to allow lazy loading
# of collators, with maximum runtime performance
_sort_key_template = '''
def {name}(obj):
try:
try:
return {collator}.{func}(obj)
except AttributeError:
pass
return {collator_func}().{func}(obj)
except TypeError:
if isinstance(obj, bytes):
try:
obj = obj.decode(sys.getdefaultencoding())
except ValueError:
return obj
return {collator}.{func}(obj)
return b''
'''
_strcmp_template = '''
def {name}(a, b):
try:
try:
return {collator}.{func}(a, b)
except AttributeError:
pass
return {collator_func}().{func}(a, b)
except TypeError:
if isinstance(a, bytes):
try:
a = a.decode(sys.getdefaultencoding())
except ValueError:
return cmp(a, b)
elif a is None:
a = u''
if isinstance(b, bytes):
try:
b = b.decode(sys.getdefaultencoding())
except ValueError:
return cmp(a, b)
elif b is None:
b = u''
return {collator}.{func}(a, b)
'''
_change_case_template = '''
def {name}(x):
try:
try:
return _icu.change_case(x, _icu.{which}, _locale)
except NotImplementedError:
pass
collator() # sets _locale
return _icu.change_case(x, _icu.{which}, _locale)
except TypeError:
if isinstance(x, bytes):
try:
x = x.decode(sys.getdefaultencoding())
except ValueError:
return x
return _icu.change_case(x, _icu.{which}, _locale)
raise
'''
def _make_func(template, name, **kwargs):
l = globals()
kwargs['name'] = name
kwargs['func'] = kwargs.get('func', 'sort_key')
exec(template.format(**kwargs), l)
return l[name]
# }}}
# ################ The string functions ########################################
sort_key = _make_func(_sort_key_template, 'sort_key', collator='_sort_collator', collator_func='sort_collator')
numeric_sort_key = _make_func(_sort_key_template, 'numeric_sort_key', collator='_numeric_collator', collator_func='numeric_collator')
primary_sort_key = _make_func(_sort_key_template, 'primary_sort_key', collator='_primary_collator', collator_func='primary_collator')
case_sensitive_sort_key = _make_func(_sort_key_template, 'case_sensitive_sort_key',
collator='_case_sensitive_collator', collator_func='case_sensitive_collator')
collation_order = _make_func(_sort_key_template, 'collation_order', collator='_sort_collator', collator_func='sort_collator', func='collation_order')
strcmp = _make_func(_strcmp_template, 'strcmp', collator='_sort_collator', collator_func='sort_collator', func='strcmp')
case_sensitive_strcmp = _make_func(
_strcmp_template, 'case_sensitive_strcmp', collator='_case_sensitive_collator', collator_func='case_sensitive_collator', func='strcmp')
primary_strcmp = _make_func(_strcmp_template, 'primary_strcmp', collator='_primary_collator', collator_func='primary_collator', func='strcmp')
upper = _make_func(_change_case_template, 'upper', which='UPPER_CASE')
lower = _make_func(_change_case_template, 'lower', which='LOWER_CASE')
title_case = _make_func(_change_case_template, 'title_case', which='TITLE_CASE')
def capitalize(x):
try:
return upper(x[0]) + lower(x[1:])
except (IndexError, TypeError, AttributeError):
return x
try:
swapcase = _icu.swap_case
except AttributeError: # For people running from source
swapcase = lambda x:x.swapcase()
find = _make_func(_strcmp_template, 'find', collator='_collator', collator_func='collator', func='find')
primary_find = _make_func(_strcmp_template, 'primary_find', collator='_primary_collator', collator_func='primary_collator', func='find')
contains = _make_func(_strcmp_template, 'contains', collator='_collator', collator_func='collator', func='contains')
primary_contains = _make_func(_strcmp_template, 'primary_contains', collator='_primary_collator', collator_func='primary_collator', func='contains')
startswith = _make_func(_strcmp_template, 'startswith', collator='_collator', collator_func='collator', func='startswith')
primary_startswith = _make_func(_strcmp_template, 'primary_startswith', collator='_primary_collator', collator_func='primary_collator', func='startswith')
safe_chr = _icu.chr
ord_string = _icu.ord_string
def character_name(string):
try:
return _icu.character_name(unicode_type(string)) or None
except (TypeError, ValueError, KeyError):
pass
def character_name_from_code(code):
try:
return _icu.character_name_from_code(code) or ''
except (TypeError, ValueError, KeyError):
return ''
def normalize(text, mode='NFC'):
# This is very slightly slower than using unicodedata.normalize, so stick with
# that unless you have very good reasons not too. Also, it's speed
# decreases on wide python builds, where conversion to/from ICU's string
# representation is slower.
return _icu.normalize(_nmodes[mode], unicode_type(text))
def contractions(col=None):
global _cmap
col = col or _collator
if col is None:
col = collator()
ans = _cmap.get(collator, None)
if ans is None:
ans = col.contractions()
ans = frozenset(filter(None, ans))
_cmap[col] = ans
return ans
def partition_by_first_letter(items, reverse=False, key=lambda x:x):
# Build a list of 'equal' first letters by noticing changes
# in ICU's 'ordinal' for the first letter.
from collections import OrderedDict
items = sorted(items, key=lambda x:sort_key(key(x)), reverse=reverse)
ans = OrderedDict()
last_c, last_ordnum = ' ', 0
for item in items:
c = icu_upper(key(item) or ' ')
ordnum, ordlen = collation_order(c)
if last_ordnum != ordnum:
if not is_narrow_build:
ordlen = 1
last_c = c[0:ordlen]
last_ordnum = ordnum
try:
ans[last_c].append(item)
except KeyError:
ans[last_c] = [item]
return ans
# Return the number of unicode codepoints in a string
string_length = _icu.string_length if is_narrow_build else len
# Return the number of UTF-16 codepoints in a string
utf16_length = len if is_narrow_build else _icu.utf16_length
################################################################################
if __name__ == '__main__':
from calibre.utils.icu_test import run
run(verbosity=4)

View File

@@ -0,0 +1,690 @@
#!/usr/bin/env python2
# vim:fileencoding=utf-8
# License: GPLv3 Copyright: 2015-2019, Kovid Goyal <kovid at kovidgoyal.net>
from __future__ import absolute_import, division, print_function, unicode_literals
import errno
import os
import shutil
import subprocess
import sys
import tempfile
from io import BytesIO
from threading import Thread
# We use explicit module imports so tracebacks when importing are more useful
from PyQt5.QtCore import QBuffer, QByteArray, Qt
from PyQt5.QtGui import QColor, QImage, QImageReader, QImageWriter, QPixmap, QTransform
from calibre import fit_image, force_unicode
from calibre.constants import iswindows, plugins, ispy3
from calibre.ptempfile import TemporaryDirectory
from calibre.utils.config_base import tweaks
from calibre.utils.filenames import atomic_rename
from calibre.utils.imghdr import what
from polyglot.builtins import string_or_bytes, unicode_type
# Utilities {{{
imageops, imageops_err = plugins['imageops']
if imageops is None:
raise RuntimeError(imageops_err)
class NotImage(ValueError):
pass
def normalize_format_name(fmt):
fmt = fmt.lower()
if fmt == 'jpg':
fmt = 'jpeg'
return fmt
def get_exe_path(name):
from calibre.ebooks.pdf.pdftohtml import PDFTOHTML
base = os.path.dirname(PDFTOHTML)
if iswindows:
name += '-calibre.exe'
if not base:
return name
return os.path.join(base, name)
def load_jxr_data(data):
with TemporaryDirectory() as tdir:
if iswindows and isinstance(tdir, unicode_type):
tdir = tdir.encode('mbcs')
with lopen(os.path.join(tdir, 'input.jxr'), 'wb') as f:
f.write(data)
cmd = [get_exe_path('JxrDecApp'), '-i', 'input.jxr', '-o', 'output.tif']
creationflags = 0x08 if iswindows else 0
subprocess.Popen(cmd, cwd=tdir, stdout=lopen(os.devnull, 'wb'), stderr=subprocess.STDOUT, creationflags=creationflags).wait()
i = QImage()
if not i.load(os.path.join(tdir, 'output.tif')):
raise NotImage('Failed to convert JPEG-XR image')
return i
# }}}
# png <-> gif {{{
def png_data_to_gif_data(data):
from PIL import Image
img = Image.open(BytesIO(data))
buf = BytesIO()
if img.mode in ('p', 'P'):
transparency = img.info.get('transparency')
if transparency is not None:
img.save(buf, 'gif', transparency=transparency)
else:
img.save(buf, 'gif')
elif img.mode in ('rgba', 'RGBA'):
alpha = img.split()[3]
mask = Image.eval(alpha, lambda a: 255 if a <=128 else 0)
img = img.convert('RGB').convert('P', palette=Image.ADAPTIVE, colors=255)
img.paste(255, mask)
img.save(buf, 'gif', transparency=255)
else:
img = img.convert('P', palette=Image.ADAPTIVE)
img.save(buf, 'gif')
return buf.getvalue()
class AnimatedGIF(ValueError):
pass
def gif_data_to_png_data(data, discard_animation=False):
from PIL import Image
img = Image.open(BytesIO(data))
if img.is_animated and not discard_animation:
raise AnimatedGIF()
buf = BytesIO()
img.save(buf, 'png')
return buf.getvalue()
# }}}
# Loading images {{{
def null_image():
' Create an invalid image. For internal use. '
return QImage()
def image_from_data(data):
' Create an image object from data, which should be a bytestring. '
if isinstance(data, QImage):
return data
i = QImage()
if not i.loadFromData(data):
q = what(None, data)
if q == 'jxr':
return load_jxr_data(data)
raise NotImage('Not a valid image (detected type: {})'.format(q))
return i
def image_from_path(path):
' Load an image from the specified path. '
with lopen(path, 'rb') as f:
return image_from_data(f.read())
def image_from_x(x):
' Create an image from a bytestring or a path or a file like object. '
if isinstance(x, unicode_type):
return image_from_path(x)
if hasattr(x, 'read'):
return image_from_data(x.read())
if isinstance(x, (bytes, QImage)):
return image_from_data(x)
if isinstance(x, bytearray):
return image_from_data(bytes(x))
if isinstance(x, QPixmap):
return x.toImage()
raise TypeError('Unknown image src type: %s' % type(x))
def image_and_format_from_data(data):
' Create an image object from the specified data which should be a bytestring and also return the format of the image '
ba = QByteArray(data)
buf = QBuffer(ba)
buf.open(QBuffer.ReadOnly)
r = QImageReader(buf)
fmt = bytes(r.format()).decode('utf-8')
return r.read(), fmt
# }}}
# Saving images {{{
def image_to_data(img, compression_quality=95, fmt='JPEG', png_compression_level=9, jpeg_optimized=True, jpeg_progressive=False):
'''
Serialize image to bytestring in the specified format.
:param compression_quality: is for JPEG and goes from 0 to 100. 100 being lowest compression, highest image quality
:param png_compression_level: is for PNG and goes from 0-9. 9 being highest compression.
:param jpeg_optimized: Turns on the 'optimize' option for libjpeg which losslessly reduce file size
:param jpeg_progressive: Turns on the 'progressive scan' option for libjpeg which allows JPEG images to be downloaded in streaming fashion
'''
fmt = fmt.upper()
ba = QByteArray()
buf = QBuffer(ba)
buf.open(QBuffer.WriteOnly)
if fmt == 'GIF':
w = QImageWriter(buf, b'PNG')
w.setQuality(90)
if not w.write(img):
raise ValueError('Failed to export image as ' + fmt + ' with error: ' + w.errorString())
return png_data_to_gif_data(ba.data())
is_jpeg = fmt in ('JPG', 'JPEG')
w = QImageWriter(buf, fmt.encode('ascii'))
if is_jpeg:
if img.hasAlphaChannel():
img = blend_image(img)
# QImageWriter only gained the following options in Qt 5.5
if jpeg_optimized:
w.setOptimizedWrite(True)
if jpeg_progressive:
w.setProgressiveScanWrite(True)
w.setQuality(compression_quality)
elif fmt == 'PNG':
cl = min(9, max(0, png_compression_level))
w.setQuality(10 * (9-cl))
if not w.write(img):
raise ValueError('Failed to export image as ' + fmt + ' with error: ' + w.errorString())
return ba.data()
def save_image(img, path, **kw):
''' Save image to the specified path. Image format is taken from the file
extension. You can pass the same keyword arguments as for the
`image_to_data()` function. '''
fmt = path.rpartition('.')[-1]
kw['fmt'] = kw.get('fmt', fmt)
with lopen(path, 'wb') as f:
f.write(image_to_data(image_from_data(img), **kw))
def save_cover_data_to(
data, path=None,
bgcolor='#ffffff',
resize_to=None,
compression_quality=90,
minify_to=None,
grayscale=False,
eink=False, letterbox=False,
data_fmt='jpeg'
):
'''
Saves image in data to path, in the format specified by the path
extension. Removes any transparency. If there is no transparency and no
resize and the input and output image formats are the same, no changes are
made.
:param data: Image data as bytestring
:param path: If None img data is returned, in JPEG format
:param data_fmt: The fmt to return data in when path is None. Defaults to JPEG
:param compression_quality: The quality of the image after compression.
Number between 1 and 100. 1 means highest compression, 100 means no
compression (lossless). When generating PNG this number is divided by 10
for the png_compression_level.
:param bgcolor: The color for transparent pixels. Must be specified in hex.
:param resize_to: A tuple (width, height) or None for no resizing
:param minify_to: A tuple (width, height) to specify maximum target size.
The image will be resized to fit into this target size. If None the
value from the tweak is used.
:param grayscale: If True, the image is converted to grayscale,
if that's not already the case.
:param eink: If True, the image is dithered down to the 16 specific shades
of gray of the eInk palette.
Works best with formats that actually support color indexing (i.e., PNG)
:param letterbox: If True, in addition to fit resize_to inside minify_to,
the image will be letterboxed (i.e., centered on a black background).
'''
fmt = normalize_format_name(data_fmt if path is None else os.path.splitext(path)[1][1:])
if isinstance(data, QImage):
img = data
changed = True
else:
img, orig_fmt = image_and_format_from_data(data)
orig_fmt = normalize_format_name(orig_fmt)
changed = fmt != orig_fmt
if resize_to is not None:
changed = True
img = img.scaled(resize_to[0], resize_to[1], Qt.IgnoreAspectRatio, Qt.SmoothTransformation)
owidth, oheight = img.width(), img.height()
nwidth, nheight = tweaks['maximum_cover_size'] if minify_to is None else minify_to
if letterbox:
img = blend_on_canvas(img, nwidth, nheight, bgcolor='#000000')
# Check if we were minified
if oheight != nheight or owidth != nwidth:
changed = True
else:
scaled, nwidth, nheight = fit_image(owidth, oheight, nwidth, nheight)
if scaled:
changed = True
img = img.scaled(nwidth, nheight, Qt.IgnoreAspectRatio, Qt.SmoothTransformation)
if img.hasAlphaChannel():
changed = True
img = blend_image(img, bgcolor)
if grayscale and not eink:
if not img.allGray():
changed = True
img = grayscale_image(img)
if eink:
# NOTE: Keep in mind that JPG does NOT actually support indexed colors, so the JPG algorithm will then smush everything back into a 256c mess...
# Thankfully, Nickel handles PNG just fine, and we potentially generate smaller files to boot, because they can be properly color indexed ;).
img = eink_dither_image(img)
changed = True
if path is None:
return image_to_data(img, compression_quality, fmt, compression_quality // 10) if changed else data
with lopen(path, 'wb') as f:
f.write(image_to_data(img, compression_quality, fmt, compression_quality // 10) if changed else data)
# }}}
# Overlaying images {{{
def blend_on_canvas(img, width, height, bgcolor='#ffffff'):
' Blend the `img` onto a canvas with the specified background color and size '
w, h = img.width(), img.height()
scaled, nw, nh = fit_image(w, h, width, height)
if scaled:
img = img.scaled(nw, nh, Qt.IgnoreAspectRatio, Qt.SmoothTransformation)
w, h = nw, nh
canvas = QImage(width, height, QImage.Format_RGB32)
canvas.fill(QColor(bgcolor))
overlay_image(img, canvas, (width - w)//2, (height - h)//2)
return canvas
class Canvas(object):
def __init__(self, width, height, bgcolor='#ffffff'):
self.img = QImage(width, height, QImage.Format_RGB32)
self.img.fill(QColor(bgcolor))
def __enter__(self):
return self
def __exit__(self, *args):
pass
def compose(self, img, x=0, y=0):
img = image_from_data(img)
overlay_image(img, self.img, x, y)
def export(self, fmt='JPEG', compression_quality=95):
return image_to_data(self.img, compression_quality=compression_quality, fmt=fmt)
def create_canvas(width, height, bgcolor='#ffffff'):
'Create a blank canvas of the specified size and color '
img = QImage(width, height, QImage.Format_RGB32)
img.fill(QColor(bgcolor))
return img
def overlay_image(img, canvas=None, left=0, top=0):
' Overlay the `img` onto the canvas at the specified position '
if canvas is None:
canvas = QImage(img.size(), QImage.Format_RGB32)
canvas.fill(Qt.white)
left, top = int(left), int(top)
imageops.overlay(img, canvas, left, top)
return canvas
def texture_image(canvas, texture):
' Repeatedly tile the image `texture` across and down the image `canvas` '
if canvas.hasAlphaChannel():
canvas = blend_image(canvas)
return imageops.texture_image(canvas, texture)
def blend_image(img, bgcolor='#ffffff'):
' Used to convert images that have semi-transparent pixels to opaque by blending with the specified color '
canvas = QImage(img.size(), QImage.Format_RGB32)
canvas.fill(QColor(bgcolor))
overlay_image(img, canvas)
return canvas
# }}}
# Image borders {{{
def add_borders_to_image(img, left=0, top=0, right=0, bottom=0, border_color='#ffffff'):
img = image_from_data(img)
if not (left > 0 or right > 0 or top > 0 or bottom > 0):
return img
canvas = QImage(img.width() + left + right, img.height() + top + bottom, QImage.Format_RGB32)
canvas.fill(QColor(border_color))
overlay_image(img, canvas, left, top)
return canvas
def remove_borders_from_image(img, fuzz=None):
''' Try to auto-detect and remove any borders from the image. Returns
the image itself if no borders could be removed. `fuzz` is a measure of
what colors are considered identical (must be a number between 0 and 255 in
absolute intensity units). Default is from a tweak whose default value is 10. '''
fuzz = tweaks['cover_trim_fuzz_value'] if fuzz is None else fuzz
img = image_from_data(img)
ans = imageops.remove_borders(img, max(0, fuzz))
return ans if ans.size() != img.size() else img
# }}}
# Cropping/scaling of images {{{
def resize_image(img, width, height):
return img.scaled(int(width), int(height), Qt.IgnoreAspectRatio, Qt.SmoothTransformation)
def resize_to_fit(img, width, height):
img = image_from_data(img)
resize_needed, nw, nh = fit_image(img.width(), img.height(), width, height)
if resize_needed:
img = resize_image(img, nw, nh)
return resize_needed, img
def clone_image(img):
''' Returns a shallow copy of the image. However, the underlying data buffer
will be automatically copied-on-write '''
return QImage(img)
def scale_image(data, width=60, height=80, compression_quality=70, as_png=False, preserve_aspect_ratio=True):
''' Scale an image, returning it as either JPEG or PNG data (bytestring).
Transparency is alpha blended with white when converting to JPEG. Is thread
safe and does not require a QApplication. '''
# We use Qt instead of ImageMagick here because ImageMagick seems to use
# some kind of memory pool, causing memory consumption to sky rocket.
img = image_from_data(data)
if preserve_aspect_ratio:
scaled, nwidth, nheight = fit_image(img.width(), img.height(), width, height)
if scaled:
img = img.scaled(nwidth, nheight, Qt.KeepAspectRatio, Qt.SmoothTransformation)
else:
if img.width() != width or img.height() != height:
img = img.scaled(width, height, Qt.IgnoreAspectRatio, Qt.SmoothTransformation)
fmt = 'PNG' if as_png else 'JPEG'
w, h = img.width(), img.height()
return w, h, image_to_data(img, compression_quality=compression_quality, fmt=fmt)
def crop_image(img, x, y, width, height):
'''
Return the specified section of the image.
:param x, y: The top left corner of the crop box
:param width, height: The width and height of the crop box. Note that if
the crop box exceeds the source images dimensions, width and height will be
auto-truncated.
'''
img = image_from_data(img)
width = min(width, img.width() - x)
height = min(height, img.height() - y)
return img.copy(x, y, width, height)
# }}}
# Image transformations {{{
def grayscale_image(img):
return imageops.grayscale(image_from_data(img))
def set_image_opacity(img, alpha=0.5):
''' Change the opacity of `img`. Note that the alpha value is multiplied to
any existing alpha values, so you cannot use this function to convert a
semi-transparent image to an opaque one. For that use `blend_image()`. '''
return imageops.set_opacity(image_from_data(img), alpha)
def flip_image(img, horizontal=False, vertical=False):
return image_from_data(img).mirrored(horizontal, vertical)
def image_has_transparent_pixels(img):
' Return True iff the image has at least one semi-transparent pixel '
img = image_from_data(img)
if img.isNull():
return False
return imageops.has_transparent_pixels(img)
def rotate_image(img, degrees):
t = QTransform()
t.rotate(degrees)
return image_from_data(img).transformed(t)
def gaussian_sharpen_image(img, radius=0, sigma=3, high_quality=True):
return imageops.gaussian_sharpen(image_from_data(img), max(0, radius), sigma, high_quality)
def gaussian_blur_image(img, radius=-1, sigma=3):
return imageops.gaussian_blur(image_from_data(img), max(0, radius), sigma)
def despeckle_image(img):
return imageops.despeckle(image_from_data(img))
def oil_paint_image(img, radius=-1, high_quality=True):
return imageops.oil_paint(image_from_data(img), radius, high_quality)
def normalize_image(img):
return imageops.normalize(image_from_data(img))
def quantize_image(img, max_colors=256, dither=True, palette=''):
''' Quantize the image to contain a maximum of `max_colors` colors. By
default a palette is chosen automatically, if you want to use a fixed
palette, then pass in a list of color names in the `palette` variable. If
you, specify a palette `max_colors` is ignored. Note that it is possible
for the actual number of colors used to be less than max_colors.
:param max_colors: Max. number of colors in the auto-generated palette. Must be between 2 and 256.
:param dither: Whether to use dithering or not. dithering is almost always a good thing.
:param palette: Use a manually specified palette instead. For example: palette='red green blue #eee'
'''
img = image_from_data(img)
if img.hasAlphaChannel():
img = blend_image(img)
if palette and isinstance(palette, string_or_bytes):
palette = palette.split()
return imageops.quantize(img, max_colors, dither, [QColor(x).rgb() for x in palette])
def eink_dither_image(img):
''' Dither the source image down to the eInk palette of 16 shades of grey,
using ImageMagick's OrderedDither algorithm.
NOTE: No need to call grayscale_image first, as this will inline a grayscaling pass if need be.
Returns a QImage in Grayscale8 pixel format.
'''
img = image_from_data(img)
if img.hasAlphaChannel():
img = blend_image(img)
return imageops.ordered_dither(img)
# }}}
# Optimization of images {{{
def run_optimizer(file_path, cmd, as_filter=False, input_data=None):
file_path = os.path.abspath(file_path)
cwd = os.path.dirname(file_path)
ext = os.path.splitext(file_path)[1]
if not ext or len(ext) > 10 or not ext.startswith('.'):
ext = '.jpg'
fd, outfile = tempfile.mkstemp(dir=cwd, suffix=ext)
try:
if as_filter:
outf = os.fdopen(fd, 'wb')
else:
os.close(fd)
iname, oname = os.path.basename(file_path), os.path.basename(outfile)
def repl(q, r):
cmd[cmd.index(q)] = r
if not as_filter:
repl(True, iname), repl(False, oname)
if iswindows and not ispy3:
# subprocess in python 2 cannot handle unicode strings that are not
# encodeable in mbcs, so we fail here, where it is more explicit,
# instead.
cmd = [x.encode('mbcs') if isinstance(x, unicode_type) else x for x in cmd]
if isinstance(cwd, unicode_type):
cwd = cwd.encode('mbcs')
stdin = subprocess.PIPE if as_filter else None
stderr = subprocess.PIPE if as_filter else subprocess.STDOUT
creationflags = 0x08 if iswindows else 0
p = subprocess.Popen(cmd, cwd=cwd, stdout=subprocess.PIPE, stderr=stderr, stdin=stdin, creationflags=creationflags)
stderr = p.stderr if as_filter else p.stdout
if as_filter:
src = input_data or open(file_path, 'rb')
def copy(src, dest):
try:
shutil.copyfileobj(src, dest)
finally:
src.close(), dest.close()
inw = Thread(name='CopyInput', target=copy, args=(src, p.stdin))
inw.daemon = True
inw.start()
outw = Thread(name='CopyOutput', target=copy, args=(p.stdout, outf))
outw.daemon = True
outw.start()
raw = force_unicode(stderr.read())
if p.wait() != 0:
return raw
else:
if as_filter:
outw.join(60.0), inw.join(60.0)
try:
sz = os.path.getsize(outfile)
except EnvironmentError:
sz = 0
if sz < 1:
return '%s returned a zero size image' % cmd[0]
shutil.copystat(file_path, outfile)
atomic_rename(outfile, file_path)
finally:
try:
os.remove(outfile)
except EnvironmentError as err:
if err.errno != errno.ENOENT:
raise
try:
os.remove(outfile + '.bak') # optipng creates these files
except EnvironmentError as err:
if err.errno != errno.ENOENT:
raise
def optimize_jpeg(file_path):
exe = get_exe_path('jpegtran')
cmd = [exe] + '-copy none -optimize -progressive -maxmemory 100M -outfile'.split() + [False, True]
return run_optimizer(file_path, cmd)
def optimize_png(file_path, level=7):
' level goes from 1 to 7 with 7 being maximum compression '
exe = get_exe_path('optipng')
cmd = [exe] + '-fix -clobber -strip all -o{} -out'.format(level).split() + [False, True]
return run_optimizer(file_path, cmd)
def encode_jpeg(file_path, quality=80):
from calibre.utils.speedups import ReadOnlyFileBuffer
quality = max(0, min(100, int(quality)))
exe = get_exe_path('cjpeg')
cmd = [exe] + '-optimize -progressive -maxmemory 100M -quality'.split() + [unicode_type(quality)]
img = QImage()
if not img.load(file_path):
raise ValueError('%s is not a valid image file' % file_path)
ba = QByteArray()
buf = QBuffer(ba)
buf.open(QBuffer.WriteOnly)
if not img.save(buf, 'PPM'):
raise ValueError('Failed to export image to PPM')
return run_optimizer(file_path, cmd, as_filter=True, input_data=ReadOnlyFileBuffer(ba.data()))
# }}}
def test(): # {{{
from calibre.ptempfile import TemporaryDirectory
from calibre import CurrentDir
from glob import glob
img = image_from_data(I('lt.png', data=True, allow_user_override=False))
with TemporaryDirectory() as tdir, CurrentDir(tdir):
save_image(img, 'test.jpg')
ret = optimize_jpeg('test.jpg')
if ret is not None:
raise SystemExit('optimize_jpeg failed: %s' % ret)
ret = encode_jpeg('test.jpg')
if ret is not None:
raise SystemExit('encode_jpeg failed: %s' % ret)
shutil.copyfile(I('lt.png'), 'test.png')
ret = optimize_png('test.png')
if ret is not None:
raise SystemExit('optimize_png failed: %s' % ret)
if glob('*.bak'):
raise SystemExit('Spurious .bak files left behind')
quantize_image(img)
oil_paint_image(img)
gaussian_sharpen_image(img)
gaussian_blur_image(img)
despeckle_image(img)
remove_borders_from_image(img)
image_to_data(img, fmt='GIF')
raw = subprocess.Popen([get_exe_path('JxrDecApp'), '-h'], creationflags=0x08 if iswindows else 0, stdout=subprocess.PIPE).stdout.read()
if b'JPEG XR Decoder Utility' not in raw:
raise SystemExit('Failed to run JxrDecApp')
# }}}
if __name__ == '__main__': # {{{
args = sys.argv[1:]
infile = args.pop(0)
img = image_from_data(lopen(infile, 'rb').read())
func = globals()[args[0]]
kw = {}
args.pop(0)
outf = None
while args:
k = args.pop(0)
if '=' in k:
n, v = k.partition('=')[::2]
if v in ('True', 'False'):
v = True if v == 'True' else False
try:
v = int(v)
except Exception:
try:
v = float(v)
except Exception:
pass
kw[n] = v
else:
outf = k
if outf is None:
bn = os.path.basename(infile)
outf = bn.rpartition('.')[0] + '.' + '-output' + bn.rpartition('.')[-1]
img = func(img, **kw)
with lopen(outf, 'wb') as f:
f.write(image_to_data(img, fmt=outf.rpartition('.')[-1]))
# }}}

View File

@@ -0,0 +1,263 @@
#!/usr/bin/env python2
# vim:fileencoding=utf-8
# License: GPLv3 Copyright: 2016, Kovid Goyal <kovid at kovidgoyal.net>
from __future__ import absolute_import, division, print_function, unicode_literals
from struct import unpack, error
import os
from calibre.utils.speedups import ReadOnlyFileBuffer
from calibre.constants import ispy3
from polyglot.builtins import string_or_bytes, unicode_type
""" Recognize image file formats and sizes based on their first few bytes."""
HSIZE = 120
def what(file, h=None):
' Recognize image headers '
if h is None:
if isinstance(file, string_or_bytes):
with lopen(file, 'rb') as f:
h = f.read(HSIZE)
else:
location = file.tell()
h = file.read(HSIZE)
file.seek(location)
if isinstance(h, bytes):
h = memoryview(h)
for tf in tests:
res = tf(h)
if res:
return res
# There exist some jpeg files with no headers, only the starting two bits
# If we cannot identify as anything else, identify as jpeg.
if h[:2] == b'\xff\xd8':
return 'jpeg'
return None
def identify(src):
''' Recognize file format and sizes. Returns format, width, height. width
and height will be -1 if not found and fmt will be None if the image is not
recognized. '''
width = height = -1
if isinstance(src, unicode_type):
stream = lopen(src, 'rb')
elif isinstance(src, bytes):
stream = ReadOnlyFileBuffer(src)
else:
stream = src
pos = stream.tell()
head = stream.read(HSIZE)
stream.seek(pos)
fmt = what(None, head)
if fmt in {'jpeg', 'gif', 'png', 'jpeg2000'}:
size = len(head)
if fmt == 'png':
# PNG
s = head[16:24] if size >= 24 and head[12:16] == b'IHDR' else head[8:16]
try:
width, height = unpack(b">LL", s)
except error:
return fmt, width, height
elif fmt == 'jpeg':
# JPEG
pos = stream.tell()
try:
height, width = jpeg_dimensions(stream)
except Exception:
return fmt, width, height
finally:
stream.seek(pos)
elif fmt == 'gif':
# GIF
try:
width, height = unpack(b"<HH", head[6:10])
except error:
return fmt, width, height
elif size >= 56 and fmt == 'jpeg2000':
# JPEG2000
try:
height, width = unpack(b'>LL', head[48:56])
except error:
return fmt, width, height
return fmt, width, height
# ---------------------------------#
# Subroutines per image file type #
# ---------------------------------#
tests = []
def test(f):
tests.append(f)
return f
@test
def jpeg(h):
"""JPEG data in JFIF format (Changed by Kovid to mimic the file utility,
the original code was failing with some jpegs that included ICC_PROFILE
data, for example: http://nationalpostnews.files.wordpress.com/2013/03/budget.jpeg?w=300&h=1571)"""
if h[6:10] in (b'JFIF', b'Exif'):
return 'jpeg'
if h[:2] == b'\xff\xd8':
q = h[:32].tobytes()
if b'JFIF' in q or b'8BIM' in q:
return 'jpeg'
def jpeg_dimensions(stream):
# A JPEG marker is two bytes of the form 0xff x where 0 < x < 0xff
# See section B.1.1.2 of https://www.w3.org/Graphics/JPEG/itu-t81.pdf
# We read the dimensions from the first SOFn section we come across
stream.seek(2, os.SEEK_CUR)
def read(n):
ans = stream.read(n)
if len(ans) != n:
raise ValueError('Truncated JPEG data')
return ans
if ispy3:
def read_byte():
return read(1)[0]
else:
def read_byte():
return ord(read(1)[0])
x = None
while True:
# Find next marker
while x != 0xff:
x = read_byte()
# Soak up padding
marker = 0xff
while marker == 0xff:
marker = read_byte()
q = marker
if 0xc0 <= q <= 0xcf and q != 0xc4 and q != 0xcc:
# SOFn marker
stream.seek(3, os.SEEK_CUR)
return unpack(b'>HH', read(4))
elif 0xd8 <= q <= 0xda:
break # start of image, end of image, start of scan, no point
elif q == 0:
return -1, -1 # Corrupted JPEG
elif q == 0x01 or 0xd0 <= q <= 0xd7:
# Standalone marker
continue
else:
# skip this section
size = unpack(b'>H', read(2))[0]
stream.seek(size - 2, os.SEEK_CUR)
# standalone marker, keep going
return -1, -1
@test
def png(h):
if h[:8] == b"\211PNG\r\n\032\n":
return 'png'
@test
def gif(h):
"""GIF ('87 and '89 variants)"""
if h[:6] in (b'GIF87a', b'GIF89a'):
return 'gif'
@test
def tiff(h):
"""TIFF (can be in Motorola or Intel byte order)"""
if h[:2] in (b'MM', b'II'):
if h[2:4] == b'\xbc\x01':
return 'jxr'
return 'tiff'
@test
def webp(h):
if h[:4] == b'RIFF' and h[8:12] == b'WEBP':
return 'webp'
@test
def rgb(h):
"""SGI image library"""
if h[:2] == b'\001\332':
return 'rgb'
@test
def pbm(h):
"""PBM (portable bitmap)"""
if len(h) >= 3 and \
h[0] == b'P' and h[1] in b'14' and h[2] in b' \t\n\r':
return 'pbm'
@test
def pgm(h):
"""PGM (portable graymap)"""
if len(h) >= 3 and \
h[0] == b'P' and h[1] in b'25' and h[2] in b' \t\n\r':
return 'pgm'
@test
def ppm(h):
"""PPM (portable pixmap)"""
if len(h) >= 3 and \
h[0] == b'P' and h[1] in b'36' and h[2] in b' \t\n\r':
return 'ppm'
@test
def rast(h):
"""Sun raster file"""
if h[:4] == b'\x59\xA6\x6A\x95':
return 'rast'
@test
def xbm(h):
"""X bitmap (X10 or X11)"""
s = b'#define '
if h[:len(s)] == s:
return 'xbm'
@test
def bmp(h):
if h[:2] == b'BM':
return 'bmp'
@test
def emf(h):
if h[:4] == b'\x01\0\0\0' and h[40:44] == b' EMF':
return 'emf'
@test
def jpeg2000(h):
if h[:12] == b'\x00\x00\x00\x0cjP \r\n\x87\n':
return 'jpeg2000'
@test
def svg(h):
if h[:4] == b'<svg' or (h[:2] == b'<?' and h[2:5].tobytes().lower() == b'xml' and b'<svg' in h.tobytes()):
return 'svg'
tests = tuple(tests)

View File

@@ -0,0 +1,83 @@
#!/usr/bin/env python2
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
from __future__ import absolute_import, division, print_function, unicode_literals
__license__ = 'GPL v3'
__copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>'
__docformat__ = 'restructuredtext en'
import os, errno, sys
from threading import Thread
from calibre import force_unicode
from calibre.constants import iswindows, get_windows_username, islinux, filesystem_encoding, ispy3
from calibre.utils.filenames import ascii_filename
from polyglot.functools import lru_cache
VADDRESS = None
def eintr_retry_call(func, *args, **kwargs):
while True:
try:
return func(*args, **kwargs)
except EnvironmentError as e:
if getattr(e, 'errno', None) == errno.EINTR:
continue
raise
@lru_cache()
def socket_address(which):
if iswindows:
ans = r'\\.\pipe\Calibre' + which
try:
user = get_windows_username()
except Exception:
user = None
if user:
user = ascii_filename(user).replace(' ', '_')
if user:
ans += '-' + user[:100] + 'x'
else:
user = force_unicode(os.environ.get('USER') or os.path.basename(os.path.expanduser('~')), filesystem_encoding)
sock_name = '{}-calibre-{}.socket'.format(ascii_filename(user).replace(' ', '_'), which)
if islinux:
ans = '\0' + sock_name
else:
from tempfile import gettempdir
tmp = force_unicode(gettempdir(), filesystem_encoding)
ans = os.path.join(tmp, sock_name)
if not ispy3 and not isinstance(ans, bytes):
ans = ans.encode(filesystem_encoding)
return ans
def gui_socket_address():
return socket_address('GUI' if iswindows else 'gui')
def viewer_socket_address():
return socket_address('Viewer' if iswindows else 'viewer')
class RC(Thread):
def __init__(self, print_error=True, socket_address=None):
self.print_error = print_error
self.socket_address = socket_address or gui_socket_address()
Thread.__init__(self)
self.conn = None
self.daemon = True
def run(self):
from multiprocessing.connection import Client
self.done = False
try:
self.conn = Client(self.socket_address)
self.done = True
except Exception:
if self.print_error:
print('Failed to connect to address {}', file=sys.stderr).format(repr(self.socket_address))
import traceback
traceback.print_exc()

View File

@@ -0,0 +1,237 @@
#!/usr/bin/env python2
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
from __future__ import absolute_import, division, print_function, unicode_literals
__license__ = 'GPL v3'
__copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>'
__docformat__ = 'restructuredtext en'
import subprocess, os, sys, time
from functools import partial
from calibre.constants import iswindows, isosx, isfrozen, filesystem_encoding, ispy3
from calibre.utils.config import prefs
from calibre.ptempfile import PersistentTemporaryFile, base_dir
from calibre.utils.serialize import msgpack_dumps
from polyglot.builtins import iteritems, unicode_type, string_or_bytes, environ_item, native_string_type, getcwd
from polyglot.binary import as_hex_unicode
if iswindows:
import win32process
try:
windows_null_file = open(os.devnull, 'wb')
except:
raise RuntimeError('NUL file missing in windows. This indicates a'
' corrupted windows. You should contact Microsoft'
' for assistance and/or follow the steps described here: https://bytes.com/topic/net/answers/264804-compile-error-null-device-missing')
def renice(niceness):
try:
os.nice(niceness)
except:
pass
class Worker(object):
'''
Platform independent object for launching child processes. All processes
have the environment variable :envvar:`CALIBRE_WORKER` set.
Useful attributes: ``is_alive``, ``returncode``, ``pid``
Useful methods: ``kill``
To launch child simply call the Worker object. By default, the child's
output is redirected to an on disk file, the path to which is returned by
the call.
'''
exe_name = 'calibre-parallel'
@property
def executable(self):
if hasattr(sys, 'running_from_setup'):
return [sys.executable, os.path.join(sys.setup_dir, 'run-calibre-worker.py')]
if getattr(sys, 'run_local', False):
return [sys.executable, sys.run_local, self.exe_name]
e = self.exe_name
if iswindows:
return os.path.join(os.path.dirname(sys.executable),
e+'.exe' if isfrozen else 'Scripts\\%s.exe'%e)
if isosx:
return os.path.join(sys.binaries_path, e)
if isfrozen:
return os.path.join(sys.executables_location, e)
if hasattr(sys, 'executables_location'):
c = os.path.join(sys.executables_location, e)
if os.access(c, os.X_OK):
return c
return e
@property
def gui_executable(self):
if isosx and not hasattr(sys, 'running_from_setup'):
if self.job_name == 'ebook-viewer':
base = os.path.dirname(sys.binaries_path)
return os.path.join(base, 'ebook-viewer.app/Contents/MacOS/', self.exe_name)
if self.job_name == 'ebook-edit':
base = os.path.dirname(sys.binaries_path)
return os.path.join(base, 'ebook-viewer.app/Contents/ebook-edit.app/Contents/MacOS/', self.exe_name)
return os.path.join(sys.binaries_path, self.exe_name)
return self.executable
@property
def env(self):
if ispy3:
env = os.environ.copy()
else:
# We use this inefficient method of copying the environment variables
# because of non ascii env vars on windows. See https://bugs.launchpad.net/bugs/811191
env = {}
for key in os.environ:
try:
val = os.environ[key]
if isinstance(val, unicode_type):
# On windows subprocess cannot handle unicode env vars
try:
val = val.encode(filesystem_encoding)
except ValueError:
val = val.encode('utf-8')
if isinstance(key, unicode_type):
key = key.encode('ascii')
env[key] = val
except:
pass
env[native_string_type('CALIBRE_WORKER')] = environ_item('1')
td = as_hex_unicode(msgpack_dumps(base_dir()))
env[native_string_type('CALIBRE_WORKER_TEMP_DIR')] = environ_item(td)
env.update(self._env)
return env
@property
def is_alive(self):
return hasattr(self, 'child') and self.child.poll() is None
@property
def returncode(self):
if not hasattr(self, 'child'):
return None
self.child.poll()
return self.child.returncode
@property
def pid(self):
if not hasattr(self, 'child'):
return None
return getattr(self.child, 'pid', None)
def close_log_file(self):
try:
self._file.close()
except:
pass
def kill(self):
self.close_log_file()
try:
if self.is_alive:
if iswindows:
return self.child.kill()
try:
self.child.terminate()
st = time.time()
while self.is_alive and time.time()-st < 2:
time.sleep(0.2)
finally:
if self.is_alive:
self.child.kill()
except:
pass
def __init__(self, env, gui=False, job_name=None):
self._env = {}
self.gui = gui
self.job_name = job_name
if ispy3:
self._env = env.copy()
else:
# Windows cannot handle unicode env vars
for k, v in iteritems(env):
try:
if isinstance(k, unicode_type):
k = k.encode('ascii')
if isinstance(v, unicode_type):
try:
v = v.encode(filesystem_encoding)
except:
v = v.encode('utf-8')
self._env[k] = v
except:
pass
def __call__(self, redirect_output=True, cwd=None, priority=None):
'''
If redirect_output is True, output from the child is redirected
to a file on disk and this method returns the path to that file.
'''
exe = self.gui_executable if self.gui else self.executable
env = self.env
try:
origwd = cwd or os.path.abspath(getcwd())
except EnvironmentError:
# cwd no longer exists
origwd = cwd or os.path.expanduser('~')
env[native_string_type('ORIGWD')] = environ_item(as_hex_unicode(msgpack_dumps(origwd)))
_cwd = cwd
if priority is None:
priority = prefs['worker_process_priority']
cmd = [exe] if isinstance(exe, string_or_bytes) else exe
args = {
'env' : env,
'cwd' : _cwd,
}
if iswindows:
priority = {
'high' : win32process.HIGH_PRIORITY_CLASS,
'normal' : win32process.NORMAL_PRIORITY_CLASS,
'low' : win32process.IDLE_PRIORITY_CLASS}[priority]
args['creationflags'] = win32process.CREATE_NO_WINDOW|priority
else:
niceness = {
'normal' : 0,
'low' : 10,
'high' : 20,
}[priority]
args['preexec_fn'] = partial(renice, niceness)
ret = None
if redirect_output:
self._file = PersistentTemporaryFile('_worker_redirect.log')
args['stdout'] = self._file._fd
args['stderr'] = subprocess.STDOUT
if iswindows:
args['stdin'] = subprocess.PIPE
ret = self._file.name
if iswindows and 'stdin' not in args:
# On windows when using the pythonw interpreter,
# stdout, stderr and stdin may not be valid
args['stdin'] = subprocess.PIPE
args['stdout'] = windows_null_file
args['stderr'] = subprocess.STDOUT
if not iswindows:
# Close inherited file descriptors in worker
# On windows, this is done in the worker process
# itself
args['close_fds'] = True
self.child = subprocess.Popen(cmd, **args)
if 'stdin' in args:
self.child.stdin.close()
self.log_path = ret
return ret

View File

@@ -0,0 +1,348 @@
#!/usr/bin/env python2
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
from __future__ import absolute_import, division, print_function, unicode_literals
__license__ = 'GPL v3'
__copyright__ = '2012, Kovid Goyal <kovid@kovidgoyal.net>'
__docformat__ = 'restructuredtext en'
import os, time, traceback, importlib
from multiprocessing.connection import Client
from threading import Thread
from contextlib import closing
from calibre.constants import iswindows
from calibre.utils.ipc import eintr_retry_call
from calibre.utils.ipc.launch import Worker
from calibre.utils.serialize import msgpack_loads, msgpack_dumps
from calibre.utils.monotonic import monotonic
from polyglot.builtins import unicode_type, string_or_bytes, environ_item
from polyglot.binary import as_hex_unicode, from_hex_bytes
class WorkerError(Exception):
def __init__(self, msg, orig_tb='', log_path=None):
Exception.__init__(self, msg)
self.orig_tb = orig_tb
self.log_path = log_path
class ConnectedWorker(Thread):
def __init__(self, listener, args):
Thread.__init__(self)
self.daemon = True
self.listener = listener
self.args = args
self.accepted = False
self.tb = None
self.res = None
def run(self):
conn = None
try:
conn = eintr_retry_call(self.listener.accept)
except BaseException:
self.tb = traceback.format_exc()
return
self.accepted = True
with closing(conn):
try:
eintr_retry_call(conn.send, self.args)
self.res = eintr_retry_call(conn.recv)
except BaseException:
self.tb = traceback.format_exc()
class OffloadWorker(object):
def __init__(self, listener, worker):
self.listener = listener
self.worker = worker
self.conn = None
self.kill_thread = t = Thread(target=self.worker.kill)
t.daemon = True
def __call__(self, module, func, *args, **kwargs):
if self.conn is None:
self.conn = eintr_retry_call(self.listener.accept)
eintr_retry_call(self.conn.send, (module, func, args, kwargs))
return eintr_retry_call(self.conn.recv)
def shutdown(self):
try:
eintr_retry_call(self.conn.send, None)
except IOError:
pass
except:
import traceback
traceback.print_exc()
finally:
self.conn = None
try:
os.remove(self.worker.log_path)
except:
pass
self.kill_thread.start()
def is_alive(self):
return self.worker.is_alive or self.kill_thread.is_alive()
def communicate(ans, worker, listener, args, timeout=300, heartbeat=None,
abort=None):
cw = ConnectedWorker(listener, args)
cw.start()
st = monotonic()
check_heartbeat = callable(heartbeat)
while worker.is_alive and cw.is_alive():
cw.join(0.01)
delta = monotonic() - st
if not cw.accepted and delta > min(10, timeout):
break
hung = not heartbeat() if check_heartbeat else delta > timeout
if hung:
raise WorkerError('Worker appears to have hung')
if abort is not None and abort.is_set():
# The worker process will be killed by fork_job, after we return
return
if not cw.accepted:
if not cw.tb:
raise WorkerError('Failed to connect to worker process')
raise WorkerError('Failed to connect to worker process', cw.tb)
if cw.tb:
raise WorkerError('Failed to communicate with worker process', cw.tb)
if cw.res is None:
raise WorkerError('Something strange happened. The worker process was aborted without an exception.')
if cw.res.get('tb', None):
raise WorkerError('Worker failed', cw.res['tb'])
ans['result'] = cw.res['result']
def create_worker(env, priority='normal', cwd=None, func='main'):
from calibre.utils.ipc.server import create_listener
auth_key = os.urandom(32)
address, listener = create_listener(auth_key)
env = dict(env)
env.update({
'CALIBRE_WORKER_ADDRESS': environ_item(as_hex_unicode(msgpack_dumps(address))),
'CALIBRE_WORKER_KEY': environ_item(as_hex_unicode(auth_key)),
'CALIBRE_SIMPLE_WORKER': environ_item('calibre.utils.ipc.simple_worker:%s' % func),
})
w = Worker(env)
w(cwd=cwd, priority=priority)
return listener, w
def start_pipe_worker(command, env=None, priority='normal', **process_args):
import subprocess
from functools import partial
w = Worker(env or {})
args = {'stdout':subprocess.PIPE, 'stdin':subprocess.PIPE, 'env':w.env}
args.update(process_args)
if iswindows:
import win32process
priority = {
'high' : win32process.HIGH_PRIORITY_CLASS,
'normal' : win32process.NORMAL_PRIORITY_CLASS,
'low' : win32process.IDLE_PRIORITY_CLASS}[priority]
args['creationflags'] = win32process.CREATE_NO_WINDOW|priority
else:
def renice(niceness):
try:
os.nice(niceness)
except:
pass
niceness = {'normal' : 0, 'low' : 10, 'high' : 20}[priority]
args['preexec_fn'] = partial(renice, niceness)
args['close_fds'] = True
exe = w.executable
cmd = [exe] if isinstance(exe, string_or_bytes) else exe
p = subprocess.Popen(cmd + ['--pipe-worker', command], **args)
return p
def two_part_fork_job(env=None, priority='normal', cwd=None):
env = env or {}
listener, w = create_worker(env, priority, cwd)
def run_job(
mod_name, func_name, args=(), kwargs=None, timeout=300, # seconds
no_output=False, heartbeat=None, abort=None, module_is_source_code=False
):
ans = {'result':None, 'stdout_stderr':None}
kwargs = kwargs or {}
try:
communicate(ans, w, listener, (mod_name, func_name, args, kwargs,
module_is_source_code), timeout=timeout, heartbeat=heartbeat,
abort=abort)
except WorkerError as e:
if not no_output:
e.log_path = w.log_path
raise
finally:
t = Thread(target=w.kill)
t.daemon=True
t.start()
if no_output:
try:
os.remove(w.log_path)
except:
pass
if not no_output:
ans['stdout_stderr'] = w.log_path
return ans
run_job.worker = w
return run_job
def fork_job(mod_name, func_name, args=(), kwargs=None, timeout=300, # seconds
cwd=None, priority='normal', env={}, no_output=False, heartbeat=None,
abort=None, module_is_source_code=False):
'''
Run a job in a worker process. A job is simply a function that will be
called with the supplied arguments, in the worker process.
The result of the function will be returned.
If an error occurs a WorkerError is raised.
:param mod_name: Module to import in the worker process
:param func_name: Function to call in the worker process from the imported
module
:param args: Positional arguments to pass to the function
:param kwargs: Keyword arguments to pass to the function
:param timeout: The time in seconds to wait for the worker process to
complete. If it takes longer a WorkerError is raised and the process is
killed.
:param cwd: The working directory for the worker process. I recommend
against using this, unless you are sure the path is pure ASCII.
:param priority: The process priority for the worker process
:param env: Extra environment variables to set for the worker process
:param no_output: If True, the stdout and stderr of the worker process are
discarded
:param heartbeat: If not None, it is used to check if the worker has hung,
instead of a simple timeout. It must be a callable that takes no
arguments and returns True or False. The worker will be assumed to have
hung if this function returns False. At that point, the process will be
killed and a WorkerError will be raised.
:param abort: If not None, it must be an Event. As soon as abort.is_set()
returns True, the worker process is killed. No error is raised.
:param module_is_source_code: If True, the ``mod`` is treated as python
source rather than a module name to import. The source is executed as a
module. Useful if you want to use fork_job from within a script to run some
dynamically generated python.
:return: A dictionary with the keys result and stdout_stderr. result is the
return value of the function (it must be picklable). stdout_stderr is the
path to a file that contains the stdout and stderr of the worker process.
If you set no_output=True, then this will not be present.
'''
return two_part_fork_job(env, priority, cwd)(
mod_name, func_name, args=args, kwargs=kwargs, timeout=timeout,
no_output=no_output, heartbeat=heartbeat, abort=abort,
module_is_source_code=module_is_source_code
)
def offload_worker(env={}, priority='normal', cwd=None):
listener, w = create_worker(env=env, priority=priority, cwd=cwd, func='offload')
return OffloadWorker(listener, w)
def compile_code(src):
import re, io
if not isinstance(src, unicode_type):
match = re.search(br'coding[:=]\s*([-\w.]+)', src[:200])
enc = match.group(1).decode('utf-8') if match else 'utf-8'
src = src.decode(enc)
# Python complains if there is a coding declaration in a unicode string
src = re.sub(r'^#.*coding\s*[:=]\s*([-\w.]+)', '#', src, flags=re.MULTILINE)
# Translate newlines to \n
src = io.StringIO(src, newline=None).getvalue()
namespace = {
'time':time, 're':re, 'os':os, 'io':io,
}
exec(src, namespace)
return namespace
def main():
# The entry point for the simple worker process
address = msgpack_loads(from_hex_bytes(os.environ['CALIBRE_WORKER_ADDRESS']))
key = from_hex_bytes(os.environ['CALIBRE_WORKER_KEY'])
with closing(Client(address, authkey=key)) as conn:
args = eintr_retry_call(conn.recv)
try:
mod, func, args, kwargs, module_is_source_code = args
if module_is_source_code:
importlib.import_module('calibre.customize.ui') # Load plugins
mod = compile_code(mod)
func = mod[func]
else:
try:
mod = importlib.import_module(mod)
except ImportError:
importlib.import_module('calibre.customize.ui') # Load plugins
mod = importlib.import_module(mod)
func = getattr(mod, func)
res = {'result':func(*args, **kwargs)}
except:
res = {'tb': traceback.format_exc()}
try:
conn.send(res)
except:
# Maybe EINTR
conn.send(res)
def offload():
# The entry point for the offload worker process
address = msgpack_loads(from_hex_bytes(os.environ['CALIBRE_WORKER_ADDRESS']))
key = from_hex_bytes(os.environ['CALIBRE_WORKER_KEY'])
func_cache = {}
with closing(Client(address, authkey=key)) as conn:
while True:
args = eintr_retry_call(conn.recv)
if args is None:
break
res = {'result':None, 'tb':None}
try:
mod, func, args, kwargs = args
if mod is None:
eintr_retry_call(conn.send, res)
continue
f = func_cache.get((mod, func), None)
if f is None:
try:
m = importlib.import_module(mod)
except ImportError:
importlib.import_module('calibre.customize.ui') # Load plugins
m = importlib.import_module(mod)
func_cache[(mod, func)] = f = getattr(m, func)
res['result'] = f(*args, **kwargs)
except:
import traceback
res['tb'] = traceback.format_exc()
eintr_retry_call(conn.send, res)

View File

@@ -0,0 +1,65 @@
#!/usr/bin/env python2
# vim:fileencoding=utf-8
# License: GPLv3 Copyright: 2016, Kovid Goyal <kovid at kovidgoyal.net>
from __future__ import absolute_import, division, print_function, unicode_literals
from datetime import datetime
from dateutil.tz import tzlocal, tzutc, tzoffset
from calibre.constants import plugins
speedup, err = plugins['speedup']
if not speedup:
raise RuntimeError(err)
class SafeLocalTimeZone(tzlocal):
def _isdst(self, dt):
# This method in tzlocal raises ValueError if dt is out of range (in
# older versions of dateutil)
# In such cases, just assume that dt is not DST.
try:
return super(SafeLocalTimeZone, self)._isdst(dt)
except Exception:
pass
return False
def _naive_is_dst(self, dt):
# This method in tzlocal raises ValueError if dt is out of range (in
# newer versions of dateutil)
# In such cases, just assume that dt is not DST.
try:
return super(SafeLocalTimeZone, self)._naive_is_dst(dt)
except Exception:
pass
return False
utc_tz = tzutc()
local_tz = SafeLocalTimeZone()
del tzutc, tzlocal
UNDEFINED_DATE = datetime(101,1,1, tzinfo=utc_tz)
def parse_iso8601(date_string, assume_utc=False, as_utc=True):
if not date_string:
return UNDEFINED_DATE
dt, aware, tzseconds = speedup.parse_iso8601(date_string)
tz = utc_tz if assume_utc else local_tz
if aware: # timezone was specified
if tzseconds == 0:
tz = utc_tz
else:
sign = '-' if tzseconds < 0 else '+'
description = "%s%02d:%02d" % (sign, abs(tzseconds) // 3600, (abs(tzseconds) % 3600) // 60)
tz = tzoffset(description, tzseconds)
dt = dt.replace(tzinfo=tz)
if as_utc and tz is utc_tz:
return dt
return dt.astimezone(utc_tz if as_utc else local_tz)
if __name__ == '__main__':
import sys
print(parse_iso8601(sys.argv[-1]))

View File

@@ -0,0 +1,548 @@
#!/usr/bin/env python2
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
from __future__ import absolute_import, division, print_function, unicode_literals
__license__ = 'GPL v3'
__copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>'
__docformat__ = 'restructuredtext en'
import os, locale, re, io, sys
from gettext import GNUTranslations, NullTranslations
from polyglot.builtins import is_py3, iteritems, unicode_type
_available_translations = None
def available_translations():
global _available_translations
if _available_translations is None:
stats = P('localization/stats.calibre_msgpack', allow_user_override=False)
if os.path.exists(stats):
from calibre.utils.serialize import msgpack_loads
with open(stats, 'rb') as f:
stats = msgpack_loads(f.read())
else:
stats = {}
_available_translations = [x for x in stats if stats[x] > 0.1]
return _available_translations
def get_system_locale():
from calibre.constants import iswindows, isosx, plugins
lang = None
if iswindows:
try:
from calibre.constants import get_windows_user_locale_name
lang = get_windows_user_locale_name()
lang = lang.strip()
if not lang:
lang = None
except:
pass # Windows XP does not have the GetUserDefaultLocaleName fn
elif isosx:
try:
lang = plugins['usbobserver'][0].user_locale() or None
except:
# Fallback to environment vars if something bad happened
import traceback
traceback.print_exc()
if lang is None:
try:
envvars = ['LANGUAGE', 'LC_ALL', 'LC_CTYPE', 'LC_MESSAGES', 'LANG']
lang = locale.getdefaultlocale(envvars)[0]
# lang is None in two cases: either the environment variable is not
# set or it's "C". Stop looking for a language in the latter case.
if lang is None:
for var in envvars:
if os.environ.get(var) == 'C':
lang = 'en_US'
break
except:
pass # This happens on Ubuntu apparently
if lang is None and 'LANG' in os.environ: # Needed for OS X
try:
lang = os.environ['LANG']
except:
pass
if lang:
lang = lang.replace('-', '_')
lang = '_'.join(lang.split('_')[:2])
return lang
def sanitize_lang(lang):
if lang:
match = re.match('[a-z]{2,3}(_[A-Z]{2}){0,1}', lang)
if match:
lang = match.group()
if lang == 'zh':
lang = 'zh_CN'
if not lang:
lang = 'en'
return lang
def get_lang():
'Try to figure out what language to display the interface in'
from calibre.utils.config_base import prefs
lang = prefs['language']
lang = os.environ.get('CALIBRE_OVERRIDE_LANG', lang)
if lang:
return lang
try:
lang = get_system_locale()
except:
import traceback
traceback.print_exc()
lang = None
return sanitize_lang(lang)
def is_rtl():
return get_lang()[:2].lower() in {'he', 'ar'}
def get_lc_messages_path(lang):
hlang = None
if zf_exists():
if lang in available_translations():
hlang = lang
else:
xlang = lang.split('_')[0].lower()
if xlang in available_translations():
hlang = xlang
return hlang
def zf_exists():
return os.path.exists(P('localization/locales.zip',
allow_user_override=False))
_lang_trans = None
def get_all_translators():
from zipfile import ZipFile
with ZipFile(P('localization/locales.zip', allow_user_override=False), 'r') as zf:
for lang in available_translations():
mpath = get_lc_messages_path(lang)
if mpath is not None:
buf = io.BytesIO(zf.read(mpath + '/messages.mo'))
yield lang, GNUTranslations(buf)
def get_single_translator(mpath, which='messages'):
from zipfile import ZipFile
with ZipFile(P('localization/locales.zip', allow_user_override=False), 'r') as zf:
path = '{}/{}.mo'.format(mpath, which)
data = zf.read(path)
buf = io.BytesIO(data)
try:
return GNUTranslations(buf)
except Exception as e:
import traceback
traceback.print_exc()
import hashlib
sig = hashlib.sha1(data).hexdigest()
raise ValueError('Failed to load translations for: {} (size: {} and signature: {}) with error: {}'.format(
path, len(data), sig, e))
def get_iso639_translator(lang):
lang = sanitize_lang(lang)
mpath = get_lc_messages_path(lang) if lang else None
return get_single_translator(mpath, 'iso639') if mpath else None
def get_translator(bcp_47_code):
parts = bcp_47_code.replace('-', '_').split('_')[:2]
parts[0] = lang_as_iso639_1(parts[0].lower()) or 'en'
if len(parts) > 1:
parts[1] = parts[1].upper()
lang = '_'.join(parts)
lang = {'pt':'pt_BR', 'zh':'zh_CN'}.get(lang, lang)
available = available_translations()
found = True
if lang == 'en' or lang.startswith('en_'):
return found, lang, NullTranslations()
if lang not in available:
lang = {'pt':'pt_BR', 'zh':'zh_CN'}.get(parts[0], parts[0])
if lang not in available:
lang = get_lang()
if lang not in available:
lang = 'en'
found = False
if lang == 'en':
return True, lang, NullTranslations()
return found, lang, get_single_translator(lang)
lcdata = {
'abday': ('Sun', 'Mon', 'Tue', 'Wed', 'Thu', 'Fri', 'Sat'),
'abmon': ('Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec'),
'd_fmt': '%m/%d/%Y',
'd_t_fmt': '%a %d %b %Y %r %Z',
'day': ('Sunday', 'Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday'),
'mon': ('January', 'February', 'March', 'April', 'May', 'June', 'July', 'August', 'September', 'October', 'November', 'December'),
'noexpr': '^[nN].*',
'radixchar': '.',
't_fmt': '%r',
't_fmt_ampm': '%I:%M:%S %p',
'thousep': ',',
'yesexpr': '^[yY].*'
}
def load_po(path):
from calibre.translations.msgfmt import make
buf = io.BytesIO()
try:
make(path, buf)
except Exception:
print(('Failed to compile translations file: %s, ignoring') % path)
buf = None
else:
buf = io.BytesIO(buf.getvalue())
return buf
def set_translators():
global _lang_trans, lcdata
# To test different translations invoke as
# CALIBRE_OVERRIDE_LANG=de_DE.utf8 program
lang = get_lang()
t = buf = iso639 = None
if 'CALIBRE_TEST_TRANSLATION' in os.environ:
buf = load_po(os.path.expanduser(os.environ['CALIBRE_TEST_TRANSLATION']))
if lang:
mpath = get_lc_messages_path(lang)
if buf is None and mpath and os.access(mpath + '.po', os.R_OK):
buf = load_po(mpath + '.po')
if mpath is not None:
from zipfile import ZipFile
with ZipFile(P('localization/locales.zip',
allow_user_override=False), 'r') as zf:
if buf is None:
buf = io.BytesIO(zf.read(mpath + '/messages.mo'))
if mpath == 'nds':
mpath = 'de'
isof = mpath + '/iso639.mo'
try:
iso639 = io.BytesIO(zf.read(isof))
except:
pass # No iso639 translations for this lang
if buf is not None:
from calibre.utils.serialize import msgpack_loads
try:
lcdata = msgpack_loads(zf.read(mpath + '/lcdata.calibre_msgpack'))
except:
pass # No lcdata
if buf is not None:
t = GNUTranslations(buf)
if iso639 is not None:
iso639 = _lang_trans = GNUTranslations(iso639)
t.add_fallback(iso639)
if t is None:
t = NullTranslations()
try:
set_translators.lang = t.info().get('language')
except Exception:
pass
if is_py3:
t.install(names=('ngettext',))
else:
t.install(unicode=True, names=('ngettext',))
# Now that we have installed a translator, we have to retranslate the help
# for the global prefs object as it was instantiated in get_lang(), before
# the translator was installed.
from calibre.utils.config_base import prefs
prefs.retranslate_help()
set_translators.lang = None
_iso639 = None
_extra_lang_codes = {
'pt_BR' : _('Brazilian Portuguese'),
'en_GB' : _('English (UK)'),
'zh_CN' : _('Simplified Chinese'),
'zh_TW' : _('Traditional Chinese'),
'en' : _('English'),
'en_US' : _('English (United States)'),
'en_AR' : _('English (Argentina)'),
'en_AU' : _('English (Australia)'),
'en_JP' : _('English (Japan)'),
'en_DE' : _('English (Germany)'),
'en_BG' : _('English (Bulgaria)'),
'en_EG' : _('English (Egypt)'),
'en_NZ' : _('English (New Zealand)'),
'en_CA' : _('English (Canada)'),
'en_GR' : _('English (Greece)'),
'en_IN' : _('English (India)'),
'en_NP' : _('English (Nepal)'),
'en_TH' : _('English (Thailand)'),
'en_TR' : _('English (Turkey)'),
'en_CY' : _('English (Cyprus)'),
'en_CZ' : _('English (Czech Republic)'),
'en_PH' : _('English (Philippines)'),
'en_PK' : _('English (Pakistan)'),
'en_PL' : _('English (Poland)'),
'en_HR' : _('English (Croatia)'),
'en_HU' : _('English (Hungary)'),
'en_ID' : _('English (Indonesia)'),
'en_IL' : _('English (Israel)'),
'en_RU' : _('English (Russia)'),
'en_SG' : _('English (Singapore)'),
'en_YE' : _('English (Yemen)'),
'en_IE' : _('English (Ireland)'),
'en_CN' : _('English (China)'),
'en_TW' : _('English (Taiwan)'),
'en_ZA' : _('English (South Africa)'),
'es_PY' : _('Spanish (Paraguay)'),
'es_UY' : _('Spanish (Uruguay)'),
'es_AR' : _('Spanish (Argentina)'),
'es_CR' : _('Spanish (Costa Rica)'),
'es_MX' : _('Spanish (Mexico)'),
'es_CU' : _('Spanish (Cuba)'),
'es_CL' : _('Spanish (Chile)'),
'es_EC' : _('Spanish (Ecuador)'),
'es_HN' : _('Spanish (Honduras)'),
'es_VE' : _('Spanish (Venezuela)'),
'es_BO' : _('Spanish (Bolivia)'),
'es_NI' : _('Spanish (Nicaragua)'),
'es_CO' : _('Spanish (Colombia)'),
'de_AT' : _('German (AT)'),
'fr_BE' : _('French (BE)'),
'nl' : _('Dutch (NL)'),
'nl_BE' : _('Dutch (BE)'),
'und' : _('Unknown')
}
if False:
# Extra strings needed for Qt
# NOTE: Ante Meridian (i.e. like 10:00 AM)
_('AM')
# NOTE: Post Meridian (i.e. like 10:00 PM)
_('PM')
# NOTE: Ante Meridian (i.e. like 10:00 am)
_('am')
# NOTE: Post Meridian (i.e. like 10:00 pm)
_('pm')
_('&Copy')
_('Select All')
_('Copy Link')
_('&Select All')
_('Copy &Link Location')
_('&Undo')
_('&Redo')
_('Cu&t')
_('&Paste')
_('Paste and Match Style')
_('Directions')
_('Left to Right')
_('Right to Left')
_('Fonts')
_('&Step up')
_('Step &down')
_('Close without Saving')
_('Close Tab')
_lcase_map = {}
for k in _extra_lang_codes:
_lcase_map[k.lower()] = k
def _load_iso639():
global _iso639
if _iso639 is None:
ip = P('localization/iso639.calibre_msgpack', allow_user_override=False, data=True)
from calibre.utils.serialize import msgpack_loads
_iso639 = msgpack_loads(ip)
if 'by_3' not in _iso639:
_iso639['by_3'] = _iso639['by_3t']
return _iso639
def get_iso_language(lang_trans, lang):
iso639 = _load_iso639()
ans = lang
lang = lang.split('_')[0].lower()
if len(lang) == 2:
ans = iso639['by_2'].get(lang, ans)
elif len(lang) == 3:
if lang in iso639['by_3']:
ans = iso639['by_3'][lang]
return lang_trans(ans)
def get_language(lang):
translate = _
lang = _lcase_map.get(lang, lang)
if lang in _extra_lang_codes:
# The translator was not active when _extra_lang_codes was defined, so
# re-translate
return translate(_extra_lang_codes[lang])
attr = 'gettext' if sys.version_info.major > 2 else 'ugettext'
return get_iso_language(getattr(_lang_trans, attr, translate), lang)
def calibre_langcode_to_name(lc, localize=True):
iso639 = _load_iso639()
translate = _ if localize else lambda x: x
try:
return translate(iso639['by_3'][lc])
except:
pass
return lc
def canonicalize_lang(raw):
if not raw:
return None
if not isinstance(raw, unicode_type):
raw = raw.decode('utf-8', 'ignore')
raw = raw.lower().strip()
if not raw:
return None
raw = raw.replace('_', '-').partition('-')[0].strip()
if not raw:
return None
iso639 = _load_iso639()
m2to3 = iso639['2to3']
if len(raw) == 2:
ans = m2to3.get(raw, None)
if ans is not None:
return ans
elif len(raw) == 3:
if raw in iso639['by_3']:
return raw
return iso639['name_map'].get(raw, None)
_lang_map = None
def lang_map():
' Return mapping of ISO 639 3 letter codes to localized language names '
iso639 = _load_iso639()
translate = _
global _lang_map
if _lang_map is None:
_lang_map = {k:translate(v) for k, v in iteritems(iso639['by_3'])}
return _lang_map
def lang_map_for_ui():
ans = getattr(lang_map_for_ui, 'ans', None)
if ans is None:
ans = lang_map().copy()
for x in ('zxx', 'mis', 'mul'):
ans.pop(x, None)
lang_map_for_ui.ans = ans
return ans
def langnames_to_langcodes(names):
'''
Given a list of localized language names return a mapping of the names to 3
letter ISO 639 language codes. If a name is not recognized, it is mapped to
None.
'''
iso639 = _load_iso639()
translate = _
ans = {}
names = set(names)
for k, v in iteritems(iso639['by_3']):
tv = translate(v)
if tv in names:
names.remove(tv)
ans[tv] = k
if not names:
break
for x in names:
ans[x] = None
return ans
def lang_as_iso639_1(name_or_code):
code = canonicalize_lang(name_or_code)
if code is not None:
iso639 = _load_iso639()
return iso639['3to2'].get(code, None)
_udc = None
def get_udc():
global _udc
if _udc is None:
from calibre.ebooks.unihandecode import Unihandecoder
_udc = Unihandecoder(lang=get_lang())
return _udc
def user_manual_stats():
stats = getattr(user_manual_stats, 'stats', None)
if stats is None:
import json
try:
stats = json.loads(P('user-manual-translation-stats.json', allow_user_override=False, data=True))
except EnvironmentError:
stats = {}
user_manual_stats.stats = stats
return stats
def localize_user_manual_link(url):
lc = lang_as_iso639_1(get_lang())
if lc == 'en':
return url
stats = user_manual_stats()
if stats.get(lc, 0) < 0.3:
return url
from polyglot.urllib import urlparse, urlunparse
parts = urlparse(url)
path = re.sub(r'/generated/[a-z]+/', '/generated/%s/' % lc, parts.path or '')
path = '/%s%s' % (lc, path)
parts = list(parts)
parts[2] = path
return urlunparse(parts)
def website_languages():
stats = getattr(website_languages, 'stats', None)
if stats is None:
try:
stats = frozenset(P('localization/website-languages.txt', allow_user_override=False, data=True).split())
except EnvironmentError:
stats = frozenset()
website_languages.stats = stats
return stats
def localize_website_link(url):
lc = lang_as_iso639_1(get_lang())
langs = website_languages()
if lc == 'en' or lc not in langs:
return url
from polyglot.urllib import urlparse, urlunparse
parts = urlparse(url)
path = '/{}{}'.format(lc, parts.path)
parts = list(parts)
parts[2] = path
return urlunparse(parts)

View File

@@ -0,0 +1,340 @@
#!/usr/bin/env python2
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:fdm=marker:ai
from __future__ import absolute_import, division, print_function, unicode_literals
__license__ = 'GPL v3'
__copyright__ = '2012, Kovid Goyal <kovid at kovidgoyal.net>'
__docformat__ = 'restructuredtext en'
'''
Try to read invalid zip files with missing or damaged central directories.
These are apparently produced in large numbers by the fruitcakes over at B&N.
Tries to only use the local headers to extract data from the damaged zip file.
'''
import os, sys, zlib, shutil
from struct import calcsize, unpack, pack
from collections import namedtuple, OrderedDict
from tempfile import SpooledTemporaryFile
from polyglot.builtins import itervalues, getcwd
HEADER_SIG = 0x04034b50
HEADER_BYTE_SIG = pack(b'<L', HEADER_SIG)
local_header_fmt = b'<L5HL2L2H'
local_header_sz = calcsize(local_header_fmt)
ZIP_STORED, ZIP_DEFLATED = 0, 8
DATA_DESCRIPTOR_SIG = pack(b'<L', 0x08074b50)
LocalHeader = namedtuple('LocalHeader',
'signature min_version flags compression_method mod_time mod_date '
'crc32 compressed_size uncompressed_size filename_length extra_length '
'filename extra')
if hasattr(sys, 'getwindowsversion'):
windows_reserved_filenames = (
'CON', 'PRN', 'AUX', 'CLOCK$', 'NUL' 'COM0', 'COM1', 'COM2', 'COM3',
'COM4', 'COM5', 'COM6', 'COM7', 'COM8', 'COM9' 'LPT0', 'LPT1', 'LPT2',
'LPT3', 'LPT4', 'LPT5', 'LPT6', 'LPT7', 'LPT8', 'LPT9')
def is_reserved_filename(x):
base = x.partition('.')[0].upper()
return base in windows_reserved_filenames
else:
def is_reserved_filename(x):
return False
def decode_arcname(name):
if isinstance(name, bytes):
from calibre.ebooks.chardet import detect
try:
name = name.decode('utf-8')
except:
res = detect(name)
encoding = res['encoding']
try:
name = name.decode(encoding)
except:
name = name.decode('utf-8', 'replace')
return name
def find_local_header(f):
pos = f.tell()
raw = f.read(50*1024)
try:
f.seek(pos + raw.index(HEADER_BYTE_SIG))
except ValueError:
f.seek(pos)
return
raw = f.read(local_header_sz)
if len(raw) != local_header_sz:
f.seek(pos)
return
header = LocalHeader(*(unpack(local_header_fmt, raw) + (None, None)))
if header.signature == HEADER_SIG:
return header
f.seek(pos)
def find_data_descriptor(f):
pos = f.tell()
DD = namedtuple('DataDescriptor', 'crc32 compressed_size uncompressed_size')
raw = b'a'*16
try:
while len(raw) >= 16:
raw = f.read(50*1024)
idx = raw.find(DATA_DESCRIPTOR_SIG)
if idx != -1:
f.seek(f.tell() - len(raw) + idx + len(DATA_DESCRIPTOR_SIG))
return DD(*unpack(b'<LLL', f.read(12)))
# Rewind to handle the case of the signature being cut off
# by the 50K boundary
f.seek(f.tell()-len(DATA_DESCRIPTOR_SIG))
raise ValueError('Failed to find data descriptor signature. '
'Data descriptors without signatures are not '
'supported.')
finally:
f.seek(pos)
def read_local_file_header(f):
pos = f.tell()
raw = f.read(local_header_sz)
if len(raw) != local_header_sz:
f.seek(pos)
return
header = LocalHeader(*(unpack(local_header_fmt, raw) + (None, None)))
if header.signature != HEADER_SIG:
f.seek(pos)
header = find_local_header(f)
if header is None:
return
if header.min_version > 20:
raise ValueError('This ZIP file uses unsupported features')
if header.flags & 0b1:
raise ValueError('This ZIP file is encrypted')
if header.flags & (1 << 13):
raise ValueError('This ZIP file uses masking, unsupported.')
if header.compression_method not in {ZIP_STORED, ZIP_DEFLATED}:
raise ValueError('This ZIP file uses an unsupported compression method')
has_data_descriptors = header.flags & (1 << 3)
fname = extra = None
if header.filename_length > 0:
fname = f.read(header.filename_length)
if len(fname) != header.filename_length:
return
try:
fname = fname.decode('ascii')
except UnicodeDecodeError:
if header.flags & (1 << 11):
try:
fname = fname.decode('utf-8')
except UnicodeDecodeError:
pass
fname = decode_arcname(fname).replace('\\', '/')
if header.extra_length > 0:
extra = f.read(header.extra_length)
if len(extra) != header.extra_length:
return
if has_data_descriptors:
desc = find_data_descriptor(f)
header = header._replace(crc32=desc.crc32,
compressed_size=desc.compressed_size,
uncompressed_size=desc.uncompressed_size)
return LocalHeader(*(
header[:-2] + (fname, extra)
))
def read_compressed_data(f, header):
cdata = f.read(header.compressed_size)
return cdata
def copy_stored_file(src, size, dest):
read = 0
amt = min(size, 20*1024)
while read < size:
raw = src.read(min(size-read, amt))
if not raw:
raise ValueError('Premature end of file')
dest.write(raw)
read += len(raw)
def copy_compressed_file(src, size, dest):
d = zlib.decompressobj(-15)
read = 0
amt = min(size, 20*1024)
while read < size:
raw = src.read(min(size-read, amt))
if not raw and read < size:
raise ValueError('Invalid ZIP file, local header is damaged')
read += len(raw)
dest.write(d.decompress(raw, 200*1024))
count = 0
while d.unconsumed_tail:
count += 1
dest.write(d.decompress(d.unconsumed_tail, 200*1024))
if count > 100:
raise ValueError('This ZIP file contains a ZIP bomb in %s'%
os.path.basename(dest.name))
def _extractall(f, path=None, file_info=None):
found = False
while True:
header = read_local_file_header(f)
if not header:
break
has_data_descriptors = header.flags & (1 << 3)
seekval = header.compressed_size + (16 if has_data_descriptors else 0)
found = True
# Sanitize path changing absolute to relative paths and removing .. and
# .
fname = header.filename.replace(os.sep, '/')
fname = os.path.splitdrive(fname)[1]
parts = [x for x in fname.split('/') if x not in {'', os.path.pardir, os.path.curdir}]
if not parts:
continue
if header.uncompressed_size == 0:
# Directory
f.seek(f.tell()+seekval)
if path is not None:
bdir = os.path.join(path, *parts)
if not os.path.exists(bdir):
os.makedirs(bdir)
continue
# File
if file_info is not None:
file_info[header.filename] = (f.tell(), header)
if path is not None:
bdir = os.path.join(path, *(parts[:-1]))
if not os.path.exists(bdir):
os.makedirs(bdir)
dest = os.path.join(path, *parts)
try:
df = open(dest, 'wb')
except EnvironmentError:
if is_reserved_filename(os.path.basename(dest)):
raise ValueError('This ZIP file contains a file with a reserved filename'
' that cannot be processed on Windows: {}'.format(os.path.basename(dest)))
raise
with df:
if header.compression_method == ZIP_STORED:
copy_stored_file(f, header.compressed_size, df)
else:
copy_compressed_file(f, header.compressed_size, df)
else:
f.seek(f.tell()+seekval)
if not found:
raise ValueError('Not a ZIP file')
def extractall(path_or_stream, path=None):
f = path_or_stream
close_at_end = False
if not hasattr(f, 'read'):
f = open(f, 'rb')
close_at_end = True
if path is None:
path = getcwd()
pos = f.tell()
try:
_extractall(f, path)
finally:
f.seek(pos)
if close_at_end:
f.close()
class LocalZipFile(object):
def __init__(self, stream):
self.file_info = OrderedDict()
_extractall(stream, file_info=self.file_info)
self.stream = stream
def _get_file_info(self, name):
fi = self.file_info.get(name)
if fi is None:
raise ValueError('This ZIP container has no file named: %s'%name)
return fi
def open(self, name, spool_size=5*1024*1024):
if isinstance(name, LocalHeader):
name = name.filename
offset, header = self._get_file_info(name)
self.stream.seek(offset)
dest = SpooledTemporaryFile(max_size=spool_size)
if header.compression_method == ZIP_STORED:
copy_stored_file(self.stream, header.compressed_size, dest)
else:
copy_compressed_file(self.stream, header.compressed_size, dest)
dest.seek(0)
return dest
def getinfo(self, name):
offset, header = self._get_file_info(name)
return header
def read(self, name, spool_size=5*1024*1024):
with self.open(name, spool_size=spool_size) as f:
return f.read()
def extractall(self, path=None):
self.stream.seek(0)
_extractall(self.stream, path=(path or getcwd()))
def close(self):
pass
def safe_replace(self, name, datastream, extra_replacements={},
add_missing=False):
from calibre.utils.zipfile import ZipFile, ZipInfo
replacements = {name:datastream}
replacements.update(extra_replacements)
names = frozenset(list(replacements.keys()))
found = set()
def rbytes(name):
r = replacements[name]
if not isinstance(r, bytes):
r = r.read()
return r
with SpooledTemporaryFile(max_size=100*1024*1024) as temp:
ztemp = ZipFile(temp, 'w')
for offset, header in itervalues(self.file_info):
if header.filename in names:
zi = ZipInfo(header.filename)
zi.compress_type = header.compression_method
ztemp.writestr(zi, rbytes(header.filename))
found.add(header.filename)
else:
ztemp.writestr(header.filename, self.read(header.filename,
spool_size=0))
if add_missing:
for name in names - found:
ztemp.writestr(name, rbytes(name))
ztemp.close()
zipstream = self.stream
temp.seek(0)
zipstream.seek(0)
zipstream.truncate()
shutil.copyfileobj(temp, zipstream)
zipstream.flush()
if __name__ == '__main__':
extractall(sys.argv[-1])

View File

@@ -0,0 +1,205 @@
#!/usr/bin/env python2
# vim:fileencoding=utf-8
# License: GPLv3 Copyright: 2017, Kovid Goyal <kovid at kovidgoyal.net>
from __future__ import absolute_import, division, print_function, unicode_literals
import atexit
import errno
import os
import stat
import tempfile
import time
from functools import partial
from calibre.constants import (
__appname__, fcntl, filesystem_encoding, islinux, isosx, iswindows, plugins, ispy3
)
from calibre.utils.monotonic import monotonic
speedup = plugins['speedup'][0]
if iswindows:
import msvcrt, win32file, pywintypes, winerror, win32api, win32event
from calibre.constants import get_windows_username
excl_file_mode = stat.S_IREAD | stat.S_IWRITE
else:
excl_file_mode = stat.S_IRUSR | stat.S_IWUSR | stat.S_IRGRP | stat.S_IROTH
def unix_open(path):
flags = os.O_RDWR | os.O_CREAT
has_cloexec = False
if hasattr(speedup, 'O_CLOEXEC'):
try:
fd = os.open(path, flags | speedup.O_CLOEXEC, excl_file_mode)
has_cloexec = True
except EnvironmentError as err:
# Kernel may not support O_CLOEXEC
if err.errno != errno.EINVAL:
raise
if not has_cloexec:
fd = os.open(path, flags, excl_file_mode)
fcntl.fcntl(fd, fcntl.F_SETFD, fcntl.FD_CLOEXEC)
return os.fdopen(fd, 'r+b')
def unix_retry(err):
return err.errno in (errno.EACCES, errno.EAGAIN, errno.ENOLCK, errno.EINTR)
def windows_open(path):
if isinstance(path, bytes):
path = path.decode('mbcs')
try:
h = win32file.CreateFileW(
path,
win32file.GENERIC_READ |
win32file.GENERIC_WRITE, # Open for reading and writing
0, # Open exclusive
None, # No security attributes, ensures handle is not inherited by children
win32file.OPEN_ALWAYS, # If file does not exist, create it
win32file.FILE_ATTRIBUTE_NORMAL, # Normal attributes
None, # No template file
)
except pywintypes.error as err:
raise WindowsError(err[0], err[2], path)
fd = msvcrt.open_osfhandle(h.Detach(), 0)
return os.fdopen(fd, 'r+b')
def windows_retry(err):
return err.winerror in (
winerror.ERROR_SHARING_VIOLATION, winerror.ERROR_LOCK_VIOLATION
)
def retry_for_a_time(timeout, sleep_time, func, error_retry, *args):
limit = monotonic() + timeout
while True:
try:
return func(*args)
except EnvironmentError as err:
if not error_retry(err) or monotonic() > limit:
raise
time.sleep(sleep_time)
def lock_file(path, timeout=15, sleep_time=0.2):
if iswindows:
return retry_for_a_time(
timeout, sleep_time, windows_open, windows_retry, path
)
f = unix_open(path)
retry_for_a_time(
timeout, sleep_time, fcntl.flock, unix_retry,
f.fileno(), fcntl.LOCK_EX | fcntl.LOCK_NB
)
return f
class ExclusiveFile(object):
def __init__(self, path, timeout=15, sleep_time=0.2):
if iswindows and isinstance(path, bytes):
path = path.decode(filesystem_encoding)
self.path = path
self.timeout = timeout
self.sleep_time = sleep_time
def __enter__(self):
self.file = lock_file(self.path, self.timeout, self.sleep_time)
return self.file
def __exit__(self, type, value, traceback):
self.file.close()
def _clean_lock_file(file_obj):
try:
os.remove(file_obj.name)
except EnvironmentError:
pass
try:
file_obj.close()
except EnvironmentError:
pass
if iswindows:
def create_single_instance_mutex(name, per_user=True):
mutexname = '{}-singleinstance-{}-{}'.format(
__appname__, (get_windows_username() if per_user else ''), name
)
mutex = win32event.CreateMutex(None, False, mutexname)
if not mutex:
return
err = win32api.GetLastError()
if err == winerror.ERROR_ALREADY_EXISTS:
# Close this handle other wise this handle will prevent the mutex
# from being deleted when the process that created it exits.
win32api.CloseHandle(mutex)
return
return partial(win32api.CloseHandle, mutex)
elif islinux:
def create_single_instance_mutex(name, per_user=True):
import socket
from calibre.utils.ipc import eintr_retry_call
name = '%s-singleinstance-%s-%s' % (
__appname__, (os.geteuid() if per_user else ''), name
)
name = name
address = '\0' + name.replace(' ', '_')
if not ispy3:
address = address.encode('utf-8')
sock = socket.socket(family=socket.AF_UNIX)
try:
eintr_retry_call(sock.bind, address)
except socket.error as err:
if getattr(err, 'errno', None) == errno.EADDRINUSE:
return
raise
fd = sock.fileno()
old_flags = fcntl.fcntl(fd, fcntl.F_GETFD)
fcntl.fcntl(fd, fcntl.F_SETFD, old_flags | fcntl.FD_CLOEXEC)
return sock.close
else:
def singleinstance_path(name, per_user=True):
name = '%s-singleinstance-%s-%s.lock' % (
__appname__, (os.geteuid() if per_user else ''), name
)
home = os.path.expanduser('~')
locs = ['/var/lock', home, tempfile.gettempdir()]
if isosx:
locs.insert(0, '/Library/Caches')
for loc in locs:
if os.access(loc, os.W_OK | os.R_OK | os.X_OK):
return os.path.join(loc, ('.' if loc is home else '') + name)
raise EnvironmentError(
'Failed to find a suitable filesystem location for the lock file'
)
def create_single_instance_mutex(name, per_user=True):
from calibre.utils.ipc import eintr_retry_call
path = singleinstance_path(name, per_user)
f = lopen(path, 'w')
try:
eintr_retry_call(fcntl.lockf, f.fileno(), fcntl.LOCK_EX | fcntl.LOCK_NB)
return partial(_clean_lock_file, f)
except EnvironmentError as err:
if err.errno not in (errno.EAGAIN, errno.EACCES):
raise
def singleinstance(name):
' Ensure that only a single process holding exists with the specified mutex key '
release_mutex = create_single_instance_mutex(name)
if release_mutex is None:
return False
atexit.register(release_mutex)
return True

View File

@@ -0,0 +1,275 @@
from __future__ import absolute_import, division, print_function, unicode_literals
__license__ = 'GPL 3'
__copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>'
__docformat__ = 'restructuredtext en'
'A simplified logging system'
DEBUG = 0
INFO = 1
WARN = 2
ERROR = 3
import sys, traceback, io
from functools import partial
from threading import Lock
from calibre import isbytestring, force_unicode, as_unicode, prints
from polyglot.builtins import unicode_type, iteritems
class Stream(object):
def __init__(self, stream=None):
if stream is None:
stream = io.BytesIO()
self.stream = getattr(stream, 'buffer', stream)
self._prints = partial(prints, safe_encode=True, file=stream)
def flush(self):
self.stream.flush()
def prints(self, level, *args, **kwargs):
self._prints(*args, **kwargs)
class ANSIStream(Stream):
def __init__(self, stream=sys.stdout):
Stream.__init__(self, stream)
self.color = {
DEBUG: u'green',
INFO: None,
WARN: u'yellow',
ERROR: u'red',
}
def prints(self, level, *args, **kwargs):
from calibre.utils.terminal import ColoredStream
with ColoredStream(self.stream, self.color[level]):
self._prints(*args, **kwargs)
def flush(self):
self.stream.flush()
class FileStream(Stream):
def __init__(self, stream=None):
Stream.__init__(self, stream)
def prints(self, level, *args, **kwargs):
self._prints(*args, **kwargs)
class HTMLStream(Stream):
color = {
DEBUG: b'<span style="color:green">',
INFO: b'<span>',
WARN: b'<span style="color:blue">',
ERROR: b'<span style="color:red">'
}
normal = b'</span>'
def __init__(self, stream=sys.stdout):
Stream.__init__(self, stream)
def prints(self, level, *args, **kwargs):
self.stream.write(self.color[level])
kwargs['file'] = self.stream
self._prints(*args, **kwargs)
self.stream.write(self.normal)
def flush(self):
self.stream.flush()
class UnicodeHTMLStream(HTMLStream):
color = {k: v.decode('ascii') for k, v in iteritems(HTMLStream.color)}
normal = HTMLStream.normal.decode('ascii')
def __init__(self):
self.clear()
def flush(self):
pass
def prints(self, level, *args, **kwargs):
col = self.color[level]
if col != self.last_col:
if self.data:
self.data.append(self.normal)
self.data.append(col)
self.last_col = col
sep = kwargs.get(u'sep', u' ')
end = kwargs.get(u'end', u'\n')
for arg in args:
if isbytestring(arg):
arg = force_unicode(arg)
elif not isinstance(arg, unicode_type):
arg = as_unicode(arg)
self.data.append(arg+sep)
self.plain_text.append(arg+sep)
self.data.append(end)
self.plain_text.append(end)
def clear(self):
self.data = []
self.plain_text = []
self.last_col = self.color[INFO]
@property
def html(self):
end = self.normal if self.data else u''
return u''.join(self.data) + end
def dump(self):
return [self.data, self.plain_text, self.last_col]
def load(self, dump):
self.data, self.plain_text, self.last_col = dump
def append_dump(self, dump):
d, p, lc = dump
self.data.extend(d)
self.plain_text.extend(p)
self.last_col = lc
class Log(object):
DEBUG = DEBUG
INFO = INFO
WARN = WARN
ERROR = ERROR
def __init__(self, level=INFO):
self.filter_level = level
default_output = ANSIStream()
self.outputs = [default_output]
self.debug = partial(self.print_with_flush, DEBUG)
self.info = partial(self.print_with_flush, INFO)
self.warn = self.warning = partial(self.print_with_flush, WARN)
self.error = partial(self.print_with_flush, ERROR)
def prints(self, level, *args, **kwargs):
if level < self.filter_level:
return
for output in self.outputs:
output.prints(level, *args, **kwargs)
def print_with_flush(self, level, *args, **kwargs):
if level < self.filter_level:
return
for output in self.outputs:
output.prints(level, *args, **kwargs)
self.flush()
def exception(self, *args, **kwargs):
limit = kwargs.pop('limit', None)
self.print_with_flush(ERROR, *args, **kwargs)
self.print_with_flush(DEBUG, traceback.format_exc(limit))
def __call__(self, *args, **kwargs):
self.info(*args, **kwargs)
def __enter__(self):
self.orig_filter_level = self.filter_level
self.filter_level = self.ERROR + 100
def __exit__(self, *args):
self.filter_level = self.orig_filter_level
def flush(self):
for o in self.outputs:
if hasattr(o, 'flush'):
o.flush()
def close(self):
for o in self.outputs:
if hasattr(o, 'close'):
o.close()
class DevNull(Log):
def __init__(self):
Log.__init__(self, level=Log.ERROR)
self.outputs = []
class ThreadSafeLog(Log):
exception_traceback_level = Log.DEBUG
def __init__(self, level=Log.INFO):
Log.__init__(self, level=level)
self._lock = Lock()
def prints(self, *args, **kwargs):
with self._lock:
Log.prints(self, *args, **kwargs)
def print_with_flush(self, *args, **kwargs):
with self._lock:
Log.print_with_flush(self, *args, **kwargs)
def exception(self, *args, **kwargs):
limit = kwargs.pop('limit', None)
with self._lock:
Log.print_with_flush(self, ERROR, *args, **kwargs)
Log.print_with_flush(self, self.exception_traceback_level, traceback.format_exc(limit))
class ThreadSafeWrapper(Log):
def __init__(self, other_log):
Log.__init__(self, level=other_log.filter_level)
self.outputs = list(other_log.outputs)
self._lock = Lock()
def prints(self, *args, **kwargs):
with self._lock:
Log.prints(self, *args, **kwargs)
def print_with_flush(self, *args, **kwargs):
with self._lock:
Log.print_with_flush(self, *args, **kwargs)
class GUILog(ThreadSafeLog):
'''
Logs in HTML and plain text as unicode. Ideal for display in a GUI context.
'''
def __init__(self):
ThreadSafeLog.__init__(self, level=self.DEBUG)
self.outputs = [UnicodeHTMLStream()]
def clear(self):
self.outputs[0].clear()
@property
def html(self):
return self.outputs[0].html
@property
def plain_text(self):
return u''.join(self.outputs[0].plain_text)
def dump(self):
return self.outputs[0].dump()
def load(self, dump):
return self.outputs[0].load(dump)
def append_dump(self, dump):
return self.outputs[0].append_dump(dump)
default_log = Log()

View File

@@ -0,0 +1,13 @@
# vim:fileencoding=utf-8
from __future__ import absolute_import, division, print_function, unicode_literals
try:
from time import monotonic
except ImportError:
from calibre.constants import plugins
monotonicp, err = plugins['monotonic']
if err:
raise RuntimeError('Failed to load the monotonic module with error: ' + err)
monotonic = monotonicp.monotonic
del monotonicp, err

View File

@@ -0,0 +1,44 @@
# multiple replace from dictionnary : http://code.activestate.com/recipes/81330/
from __future__ import unicode_literals
__license__ = 'GPL v3'
__copyright__ = '2010, sengian <sengian1 @ gmail.com>'
__docformat__ = 'restructuredtext en'
import re
try:
from collections import UserDict
except ImportError:
from UserDict import UserDict
class MReplace(UserDict):
def __init__(self, data=None, case_sensitive=True):
UserDict.__init__(self, data)
self.re = None
self.regex = None
self.case_sensitive = case_sensitive
self.compile_regex()
def compile_regex(self):
if len(self.data) > 0:
keys = sorted(self.data, key=len, reverse=True)
if isinstance(keys[0], bytes):
tmp = b"(%s)" % b"|".join(map(re.escape, keys))
else:
tmp = "(%s)" % "|".join(map(re.escape, keys))
if self.re != tmp:
self.re = tmp
if self.case_sensitive:
self.regex = re.compile(self.re)
else:
self.regex = re.compile(self.re, re.I)
def __call__(self, mo):
return self[mo.string[mo.start():mo.end()]]
def mreplace(self, text):
# Replace without regex compile
if len(self.data) < 1 or self.re is None:
return text
return self.regex.sub(self, text)

View File

@@ -0,0 +1,106 @@
#!/usr/bin/env python2
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
from __future__ import absolute_import, division, print_function, unicode_literals
__license__ = 'GPL v3'
__copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>'
__docformat__ = 'restructuredtext en'
import sys, os
from calibre import config_dir
from polyglot.builtins import builtins
user_dir = os.path.join(config_dir, 'resources')
class PathResolver(object):
def __init__(self):
self.locations = [sys.resources_location]
self.cache = {}
def suitable(path):
try:
return os.path.exists(path) and os.path.isdir(path) and \
os.listdir(path)
except:
pass
return False
self.default_path = sys.resources_location
dev_path = os.environ.get('CALIBRE_DEVELOP_FROM', None)
self.using_develop_from = False
if dev_path is not None:
dev_path = os.path.join(os.path.abspath(
os.path.dirname(dev_path)), 'resources')
if suitable(dev_path):
self.locations.insert(0, dev_path)
self.default_path = dev_path
self.using_develop_from = True
self.user_path = None
if suitable(user_dir):
self.locations.insert(0, user_dir)
self.user_path = user_dir
def __call__(self, path, allow_user_override=True):
path = path.replace(os.sep, '/')
key = (path, allow_user_override)
ans = self.cache.get(key, None)
if ans is None:
for base in self.locations:
if not allow_user_override and base == self.user_path:
continue
fpath = os.path.join(base, *path.split('/'))
if os.path.exists(fpath):
ans = fpath
break
if ans is None:
ans = os.path.join(self.default_path, *path.split('/'))
self.cache[key] = ans
return ans
def set_data(self, path, data=None):
self.cache.pop((path, True), None)
fpath = os.path.join(user_dir, *path.split('/'))
if data is None:
if os.path.exists(fpath):
os.remove(fpath)
else:
base = os.path.dirname(fpath)
if not os.path.exists(base):
os.makedirs(base)
with open(fpath, 'wb') as f:
f.write(data)
_resolver = PathResolver()
def get_path(path, data=False, allow_user_override=True):
fpath = _resolver(path, allow_user_override=allow_user_override)
if data:
with open(fpath, 'rb') as f:
return f.read()
return fpath
def get_image_path(path, data=False, allow_user_override=True):
if not path:
return get_path('images', allow_user_override=allow_user_override)
return get_path('images/'+path, data=data, allow_user_override=allow_user_override)
def set_data(path, data=None):
return _resolver.set_data(path, data)
builtins.__dict__['P'] = get_path
builtins.__dict__['I'] = get_image_path

View File

@@ -0,0 +1,139 @@
#!/usr/bin/env python2
# vim:fileencoding=utf-8
# License: GPLv3 Copyright: 2017, Kovid Goyal <kovid at kovidgoyal.net>
from __future__ import absolute_import, division, print_function, unicode_literals
from polyglot.builtins import unicode_type
from calibre.constants import ispy3
MSGPACK_MIME = 'application/x-msgpack'
CANARY = 'jPoAv3zOyHvQ5JFNYg4hJ9'
def encoded(typ, data, ExtType):
if ExtType is None:
return {CANARY: typ, 'v': data}
return ExtType(typ, msgpack_dumps(data))
def create_encoder(for_json=False):
from datetime import datetime
ExtType = None
if not for_json:
import msgpack
ExtType = msgpack.ExtType
def encoder(obj):
if isinstance(obj, datetime):
return encoded(0, unicode_type(obj.isoformat()), ExtType)
if isinstance(obj, (set, frozenset)):
return encoded(1, tuple(obj), ExtType)
if getattr(obj, '__calibre_serializable__', False):
from calibre.ebooks.metadata.book.base import Metadata
from calibre.library.field_metadata import FieldMetadata, fm_as_dict
from calibre.db.categories import Tag
if isinstance(obj, Metadata):
from calibre.ebooks.metadata.book.serialize import metadata_as_dict
return encoded(
2, metadata_as_dict(obj, encode_cover_data=for_json), ExtType
)
elif isinstance(obj, FieldMetadata):
return encoded(3, fm_as_dict(obj), ExtType)
elif isinstance(obj, Tag):
return encoded(4, obj.as_dict(), ExtType)
if for_json and isinstance(obj, bytes):
return obj.decode('utf-8')
raise TypeError('Cannot serialize objects of type {}'.format(type(obj)))
return encoder
def msgpack_dumps(obj):
import msgpack
return msgpack.packb(obj, default=create_encoder(), use_bin_type=True)
def json_dumps(data, **kw):
import json
kw['default'] = create_encoder(for_json=True)
kw['ensure_ascii'] = False
ans = json.dumps(data, **kw)
if not isinstance(ans, bytes):
ans = ans.encode('utf-8')
return ans
def decode_metadata(x, for_json):
from polyglot.binary import from_base64_bytes
from calibre.ebooks.metadata.book.serialize import metadata_from_dict
obj = metadata_from_dict(x)
if for_json and obj.cover_data and obj.cover_data[1]:
obj.cover_data = obj.cover_data[0], from_base64_bytes(obj.cover_data[1])
return obj
def decode_field_metadata(x, for_json):
from calibre.library.field_metadata import fm_from_dict
return fm_from_dict(x)
def decode_category_tag(x, for_json):
from calibre.db.categories import Tag
return Tag.from_dict(x)
def decode_datetime(x, fj):
from calibre.utils.iso8601 import parse_iso8601
return parse_iso8601(x, assume_utc=True)
decoders = (
decode_datetime,
lambda x, fj: set(x),
decode_metadata, decode_field_metadata, decode_category_tag
)
def json_decoder(obj):
typ = obj.get(CANARY)
if typ is None:
return obj
return decoders[typ](obj['v'], True)
def msgpack_decoder(code, data):
return decoders[code](msgpack_loads(data), False)
def msgpack_loads(dump, use_list=True):
# use_list controls whether msgpack arrays are unpacked as lists or tuples
import msgpack
return msgpack.unpackb(dump, ext_hook=msgpack_decoder, raw=False, use_list=use_list)
def json_loads(data):
import json
return json.loads(data, object_hook=json_decoder)
if ispy3:
def pickle_dumps(data):
import pickle
return pickle.dumps(data, -1)
def pickle_loads(dump):
import pickle
return pickle.loads(dump, encoding='utf-8')
else:
def pickle_dumps(data):
import cPickle as pickle
return pickle.dumps(data, -1)
def pickle_loads(dump):
import cPickle as pickle
return pickle.loads(dump)

View File

@@ -0,0 +1,222 @@
#!/usr/bin/env python2
# vim:fileencoding=utf-8
from __future__ import absolute_import, division, print_function, unicode_literals
__license__ = 'GPL v3'
__copyright__ = '2015, Kovid Goyal <kovid at kovidgoyal.net>'
import os, sys
from polyglot.builtins import reraise
from calibre.constants import iswindows, plugins, ispy3
'''
This module defines a share_open() function which is a replacement for
python's builtin open() function.
This replacement, opens 'shareable' files on all platforms. That is files that
can be read from and written to and deleted at the same time by multiple
processes. All file handles are non-inheritable, as in Python 3, but unlike,
Python 2. Non-inheritance is atomic.
Caveats on windows: On windows sharing is co-operative, i.e. it only works if
all processes involved open the file with share_open(). Also while you can
delete a file that is open, you cannot open a new file with the same filename
until all open file handles are closed. You also cannot delete the containing
directory until all file handles are closed. To get around this, rename the
file before deleting it.
'''
speedup, err = plugins['speedup']
if not speedup:
raise RuntimeError('Failed to load the speedup plugin with error: %s' % err)
valid_modes = {'a', 'a+', 'a+b', 'ab', 'r', 'rb', 'r+', 'r+b', 'w', 'wb', 'w+', 'w+b'}
def validate_mode(mode):
return mode in valid_modes
class FlagConstants(object):
def __init__(self):
for x in 'APPEND CREAT TRUNC EXCL RDWR RDONLY WRONLY'.split():
x = 'O_' + x
setattr(self, x, getattr(os, x))
for x in 'RANDOM SEQUENTIAL TEXT BINARY'.split():
x = 'O_' + x
setattr(self, x, getattr(os, x, 0))
fc = FlagConstants()
def flags_from_mode(mode):
if not validate_mode(mode):
raise ValueError('The mode is invalid')
m = mode[0]
random = '+' in mode
binary = 'b' in mode
if m == 'a':
flags = fc.O_APPEND | fc.O_CREAT
if random:
flags |= fc.O_RDWR | fc.O_RANDOM
else:
flags |= fc.O_WRONLY | fc.O_SEQUENTIAL
elif m == 'r':
if random:
flags = fc.O_RDWR | fc.O_RANDOM
else:
flags = fc.O_RDONLY | fc.O_SEQUENTIAL
elif m == 'w':
if random:
flags = fc.O_RDWR | fc.O_RANDOM
else:
flags = fc.O_WRONLY | fc.O_SEQUENTIAL
flags |= fc.O_TRUNC | fc.O_CREAT
flags |= (fc.O_BINARY if binary else fc.O_TEXT)
return flags
if iswindows:
from numbers import Integral
import msvcrt
import win32file, pywintypes
CREATE_NEW = win32file.CREATE_NEW
CREATE_ALWAYS = win32file.CREATE_ALWAYS
OPEN_EXISTING = win32file.OPEN_EXISTING
OPEN_ALWAYS = win32file.OPEN_ALWAYS
TRUNCATE_EXISTING = win32file.TRUNCATE_EXISTING
FILE_SHARE_READ = win32file.FILE_SHARE_READ
FILE_SHARE_WRITE = win32file.FILE_SHARE_WRITE
FILE_SHARE_DELETE = win32file.FILE_SHARE_DELETE
FILE_SHARE_VALID_FLAGS = FILE_SHARE_READ | FILE_SHARE_WRITE | FILE_SHARE_DELETE
FILE_ATTRIBUTE_READONLY = win32file.FILE_ATTRIBUTE_READONLY
FILE_ATTRIBUTE_NORMAL = win32file.FILE_ATTRIBUTE_NORMAL
FILE_ATTRIBUTE_TEMPORARY = win32file.FILE_ATTRIBUTE_TEMPORARY
FILE_FLAG_DELETE_ON_CLOSE = win32file.FILE_FLAG_DELETE_ON_CLOSE
FILE_FLAG_SEQUENTIAL_SCAN = win32file.FILE_FLAG_SEQUENTIAL_SCAN
FILE_FLAG_RANDOM_ACCESS = win32file.FILE_FLAG_RANDOM_ACCESS
GENERIC_READ = win32file.GENERIC_READ & 0xffffffff
GENERIC_WRITE = win32file.GENERIC_WRITE & 0xffffffff
DELETE = 0x00010000
_ACCESS_MASK = os.O_RDONLY | os.O_WRONLY | os.O_RDWR
_ACCESS_MAP = {
os.O_RDONLY : GENERIC_READ,
os.O_WRONLY : GENERIC_WRITE,
os.O_RDWR : GENERIC_READ | GENERIC_WRITE
}
_CREATE_MASK = os.O_CREAT | os.O_EXCL | os.O_TRUNC
_CREATE_MAP = {
0 : OPEN_EXISTING,
os.O_EXCL : OPEN_EXISTING,
os.O_CREAT : OPEN_ALWAYS,
os.O_CREAT | os.O_EXCL : CREATE_NEW,
os.O_CREAT | os.O_TRUNC | os.O_EXCL : CREATE_NEW,
os.O_TRUNC : TRUNCATE_EXISTING,
os.O_TRUNC | os.O_EXCL : TRUNCATE_EXISTING,
os.O_CREAT | os.O_TRUNC : CREATE_ALWAYS
}
def raise_winerror(pywinerr):
reraise(
WindowsError,
WindowsError(pywinerr.winerror,
(pywinerr.funcname or '') + b': ' + (pywinerr.strerror or '')),
sys.exc_info()[2])
def os_open(path, flags, mode=0o777, share_flags=FILE_SHARE_VALID_FLAGS):
'''
Replacement for os.open() allowing moving or unlinking before closing
'''
if not isinstance(flags, Integral):
raise TypeError('flags must be an integer')
if not isinstance(mode, Integral):
raise TypeError('mode must be an integer')
if share_flags & ~FILE_SHARE_VALID_FLAGS:
raise ValueError('bad share_flags: %r' % share_flags)
access_flags = _ACCESS_MAP[flags & _ACCESS_MASK]
create_flags = _CREATE_MAP[flags & _CREATE_MASK]
attrib_flags = FILE_ATTRIBUTE_NORMAL
if flags & os.O_CREAT and mode & ~0o444 == 0:
attrib_flags = FILE_ATTRIBUTE_READONLY
if flags & os.O_TEMPORARY:
share_flags |= FILE_SHARE_DELETE
attrib_flags |= FILE_FLAG_DELETE_ON_CLOSE
access_flags |= DELETE
if flags & os.O_SHORT_LIVED:
attrib_flags |= FILE_ATTRIBUTE_TEMPORARY
if flags & os.O_SEQUENTIAL:
attrib_flags |= FILE_FLAG_SEQUENTIAL_SCAN
if flags & os.O_RANDOM:
attrib_flags |= FILE_FLAG_RANDOM_ACCESS
try:
h = win32file.CreateFileW(
path, access_flags, share_flags, None, create_flags, attrib_flags, None)
except pywintypes.error as e:
raise_winerror(e)
ans = msvcrt.open_osfhandle(h, flags | os.O_NOINHERIT)
h.Detach() # We dont want the handle to be automatically closed when h is deleted
return ans
def share_open(path, mode='r', buffering=-1):
flags = flags_from_mode(mode)
return speedup.fdopen(os_open(path, flags), path, mode, buffering)
else:
if ispy3:
# See PEP 446
share_open = open
else:
def share_open(path, mode='r', buffering=-1):
flags = flags_from_mode(mode) | speedup.O_CLOEXEC
return speedup.fdopen(os.open(path, flags), path, mode, buffering)
def raise_winerror(x):
reraise(NotImplementedError, None, sys.exc_info()[2])
def find_tests():
import unittest
from calibre.ptempfile import TemporaryDirectory
class SharedFileTest(unittest.TestCase):
def test_shared_file(self):
eq = self.assertEqual
with TemporaryDirectory() as tdir:
fname = os.path.join(tdir, 'test.txt')
with share_open(fname, 'wb') as f:
f.write(b'a' * 20 * 1024)
eq(fname, f.name)
f = share_open(fname, 'rb')
eq(f.read(1), b'a')
if iswindows:
os.rename(fname, fname+'.moved')
os.remove(fname+'.moved')
else:
os.remove(fname)
eq(f.read(1), b'a')
f2 = share_open(fname, 'w+b')
f2.write(b'b' * 10 * 1024)
f2.seek(0)
eq(f.read(10000), b'a'*10000)
eq(f2.read(100), b'b' * 100)
f3 = share_open(fname, 'rb')
eq(f3.read(100), b'b' * 100)
return unittest.defaultTestLoader.loadTestsFromTestCase(SharedFileTest)

View File

@@ -0,0 +1,61 @@
#!/usr/bin/env python2
# vim:fileencoding=utf-8
# License: GPLv3 Copyright: 2015, Kovid Goyal <kovid at kovidgoyal.net>
from __future__ import absolute_import, division, print_function, unicode_literals
'''
Generate UUID encoded using a user specified alphabet.
'''
import string, math, uuid as _uuid
from polyglot.builtins import unicode_type
def num_to_string(number, alphabet, alphabet_len, pad_to_length=None):
ans = []
number = max(0, number)
while number:
number, digit = divmod(number, alphabet_len)
ans.append(alphabet[digit])
if pad_to_length is not None and pad_to_length > len(ans):
ans.append(alphabet[0] * (pad_to_length - len(ans)))
return ''.join(ans)
def string_to_num(string, alphabet_map, alphabet_len):
ans = 0
for char in reversed(string):
ans = ans * alphabet_len + alphabet_map[char]
return ans
class ShortUUID(object):
def __init__(self, alphabet=None):
# We do not include zero and one in the default alphabet as they can be
# confused with the letters O and I in some fonts. And removing them
# does not change the uuid_pad_len.
self.alphabet = tuple(sorted(unicode_type(alphabet or (string.digits + string.ascii_letters)[2:])))
self.alphabet_len = len(self.alphabet)
self.alphabet_map = {c:i for i, c in enumerate(self.alphabet)}
self.uuid_pad_len = int(math.ceil(math.log(1 << 128, self.alphabet_len)))
def uuid4(self, pad_to_length=None):
if pad_to_length is None:
pad_to_length = self.uuid_pad_len
return num_to_string(_uuid.uuid4().int, self.alphabet, self.alphabet_len, pad_to_length)
def uuid5(self, namespace, name, pad_to_length=None):
if pad_to_length is None:
pad_to_length = self.uuid_pad_len
return num_to_string(_uuid.uuid5(namespace, name).int, self.alphabet, self.alphabet_len, pad_to_length)
def decode(self, encoded):
return _uuid.UUID(int=string_to_num(encoded, self.alphabet_map, self.alphabet_len))
_global_instance = ShortUUID()
uuid4 = _global_instance.uuid4
uuid5 = _global_instance.uuid5
decode = _global_instance.decode

View File

@@ -0,0 +1,888 @@
#!/usr/bin/python2
# vim:fileencoding=utf-8
from __future__ import absolute_import, division, print_function, unicode_literals
__author__ = "Chad Miller <smartypantspy@chad.org>, Kovid Goyal <kovid at kovidgoyal.net>"
__description__ = "Smart-quotes, smart-ellipses, and smart-dashes for weblog entries in pyblosxom"
r"""
==============
smartypants.py
==============
----------------------------
SmartyPants ported to Python
----------------------------
Ported by `Chad Miller`_
Copyright (c) 2004, 2007 Chad Miller
original `SmartyPants`_ by `John Gruber`_
Copyright (c) 2003 John Gruber
Synopsis
========
A smart-quotes plugin for Pyblosxom_.
The priginal "SmartyPants" is a free web publishing plug-in for Movable Type,
Blosxom, and BBEdit that easily translates plain ASCII punctuation characters
into "smart" typographic punctuation HTML entities.
This software, *smartypants.py*, endeavours to be a functional port of
SmartyPants to Python, for use with Pyblosxom_.
Description
===========
SmartyPants can perform the following transformations:
- Straight quotes ( " and ' ) into "curly" quote HTML entities
- Backticks-style quotes (\`\`like this'') into "curly" quote HTML entities
- Dashes (``--`` and ``---``) into en- and em-dash entities
- Three consecutive dots (``...`` or ``. . .``) into an ellipsis entity
This means you can write, edit, and save your posts using plain old
ASCII straight quotes, plain dashes, and plain dots, but your published
posts (and final HTML output) will appear with smart quotes, em-dashes,
and proper ellipses.
SmartyPants does not modify characters within ``<pre>``, ``<code>``, ``<kbd>``,
``<math>`` or ``<script>`` tag blocks. Typically, these tags are used to
display text where smart quotes and other "smart punctuation" would not be
appropriate, such as source code or example markup.
Backslash Escapes
=================
If you need to use literal straight quotes (or plain hyphens and
periods), SmartyPants accepts the following backslash escape sequences
to force non-smart punctuation. It does so by transforming the escape
sequence into a decimal-encoded HTML entity:
(FIXME: table here.)
.. comment It sucks that there's a disconnect between the visual layout and table markup when special characters are involved.
.. comment ====== ===== =========
.. comment Escape Value Character
.. comment ====== ===== =========
.. comment \\\\\\\\ &#92; \\\\
.. comment \\\\" &#34; "
.. comment \\\\' &#39; '
.. comment \\\\. &#46; .
.. comment \\\\- &#45; \-
.. comment \\\\` &#96; \`
.. comment ====== ===== =========
This is useful, for example, when you want to use straight quotes as
foot and inch marks: 6'2" tall; a 17" iMac.
Options
=======
For Pyblosxom users, the ``smartypants_attributes`` attribute is where you
specify configuration options.
Numeric values are the easiest way to configure SmartyPants' behavior:
"0"
Suppress all transformations. (Do nothing.)
"1"
Performs default SmartyPants transformations: quotes (including
\`\`backticks'' -style), em-dashes, and ellipses. "``--``" (dash dash)
is used to signify an em-dash; there is no support for en-dashes.
"2"
Same as smarty_pants="1", except that it uses the old-school typewriter
shorthand for dashes: "``--``" (dash dash) for en-dashes, "``---``"
(dash dash dash)
for em-dashes.
"3"
Same as smarty_pants="2", but inverts the shorthand for dashes:
"``--``" (dash dash) for em-dashes, and "``---``" (dash dash dash) for
en-dashes.
"-1"
Stupefy mode. Reverses the SmartyPants transformation process, turning
the HTML entities produced by SmartyPants into their ASCII equivalents.
E.g. "&#8220;" is turned into a simple double-quote ("), "&#8212;" is
turned into two dashes, etc.
The following single-character attribute values can be combined to toggle
individual transformations from within the smarty_pants attribute. For
example, to educate normal quotes and em-dashes, but not ellipses or
\`\`backticks'' -style quotes:
``py['smartypants_attributes'] = "1"``
"q"
Educates normal quote characters: (") and (').
"b"
Educates \`\`backticks'' -style double quotes.
"B"
Educates \`\`backticks'' -style double quotes and \`single' quotes.
"d"
Educates em-dashes.
"D"
Educates em-dashes and en-dashes, using old-school typewriter shorthand:
(dash dash) for en-dashes, (dash dash dash) for em-dashes.
"i"
Educates em-dashes and en-dashes, using inverted old-school typewriter
shorthand: (dash dash) for em-dashes, (dash dash dash) for en-dashes.
"e"
Educates ellipses.
"w"
Translates any instance of ``&quot;`` into a normal double-quote character.
This should be of no interest to most people, but of particular interest
to anyone who writes their posts using Dreamweaver, as Dreamweaver
inexplicably uses this entity to represent a literal double-quote
character. SmartyPants only educates normal quotes, not entities (because
ordinarily, entities are used for the explicit purpose of representing the
specific character they represent). The "w" option must be used in
conjunction with one (or both) of the other quote options ("q" or "b").
Thus, if you wish to apply all SmartyPants transformations (quotes, en-
and em-dashes, and ellipses) and also translate ``&quot;`` entities into
regular quotes so SmartyPants can educate them, you should pass the
following to the smarty_pants attribute:
The ``smartypants_forbidden_flavours`` list contains pyblosxom flavours for
which no Smarty Pants rendering will occur.
Caveats
=======
Why You Might Not Want to Use Smart Quotes in Your Weblog
---------------------------------------------------------
For one thing, you might not care.
Most normal, mentally stable individuals do not take notice of proper
typographic punctuation. Many design and typography nerds, however, break
out in a nasty rash when they encounter, say, a restaurant sign that uses
a straight apostrophe to spell "Joe's".
If you're the sort of person who just doesn't care, you might well want to
continue not caring. Using straight quotes -- and sticking to the 7-bit
ASCII character set in general -- is certainly a simpler way to live.
Even if you I *do* care about accurate typography, you still might want to
think twice before educating the quote characters in your weblog. One side
effect of publishing curly quote HTML entities is that it makes your
weblog a bit harder for others to quote from using copy-and-paste. What
happens is that when someone copies text from your blog, the copied text
contains the 8-bit curly quote characters (as well as the 8-bit characters
for em-dashes and ellipses, if you use these options). These characters
are not standard across different text encoding methods, which is why they
need to be encoded as HTML entities.
People copying text from your weblog, however, may not notice that you're
using curly quotes, and they'll go ahead and paste the unencoded 8-bit
characters copied from their browser into an email message or their own
weblog. When pasted as raw "smart quotes", these characters are likely to
get mangled beyond recognition.
That said, my own opinion is that any decent text editor or email client
makes it easy to stupefy smart quote characters into their 7-bit
equivalents, and I don't consider it my problem if you're using an
indecent text editor or email client.
Algorithmic Shortcomings
------------------------
One situation in which quotes will get curled the wrong way is when
apostrophes are used at the start of leading contractions. For example:
``'Twas the night before Christmas.``
In the case above, SmartyPants will turn the apostrophe into an opening
single-quote, when in fact it should be a closing one. I don't think
this problem can be solved in the general case -- every word processor
I've tried gets this wrong as well. In such cases, it's best to use the
proper HTML entity for closing single-quotes (``&#8217;``) by hand.
Bugs
====
To file bug reports or feature requests (other than topics listed in the
Caveats section above) please send email to: mailto:smartypantspy@chad.org
If the bug involves quotes being curled the wrong way, please send example
text to illustrate.
To Do list
----------
- Provide a function for use within templates to quote anything at all.
Version History
===============
1.5_1.6: Fri, 27 Jul 2007 07:06:40 -0400
- Fixed bug where blocks of precious unalterable text was instead
interpreted. Thanks to Le Roux and Dirk van Oosterbosch.
1.5_1.5: Sat, 13 Aug 2005 15:50:24 -0400
- Fix bogus magical quotation when there is no hint that the
user wants it, e.g., in "21st century". Thanks to Nathan Hamblen.
- Be smarter about quotes before terminating numbers in an en-dash'ed
range.
1.5_1.4: Thu, 10 Feb 2005 20:24:36 -0500
- Fix a date-processing bug, as reported by jacob childress.
- Begin a test-suite for ensuring correct output.
- Removed import of "string", since I didn't really need it.
(This was my first every Python program. Sue me!)
1.5_1.3: Wed, 15 Sep 2004 18:25:58 -0400
- Abort processing if the flavour is in forbidden-list. Default of
[ "rss" ] (Idea of Wolfgang SCHNERRING.)
- Remove stray virgules from en-dashes. Patch by Wolfgang SCHNERRING.
1.5_1.2: Mon, 24 May 2004 08:14:54 -0400
- Some single quotes weren't replaced properly. Diff-tesuji played
by Benjamin GEIGER.
1.5_1.1: Sun, 14 Mar 2004 14:38:28 -0500
- Support upcoming pyblosxom 0.9 plugin verification feature.
1.5_1.0: Tue, 09 Mar 2004 08:08:35 -0500
- Initial release
Version Information
-------------------
Version numbers will track the SmartyPants_ version numbers, with the addition
of an underscore and the smartypants.py version on the end.
New versions will be available at `http://wiki.chad.org/SmartyPantsPy`_
.. _http://wiki.chad.org/SmartyPantsPy: http://wiki.chad.org/SmartyPantsPy
Authors
=======
`John Gruber`_ did all of the hard work of writing this software in Perl for
`Movable Type`_ and almost all of this useful documentation. `Chad Miller`_
ported it to Python to use with Pyblosxom_.
Additional Credits
==================
Portions of the SmartyPants original work are based on Brad Choate's nifty
MTRegex plug-in. `Brad Choate`_ also contributed a few bits of source code to
this plug-in. Brad Choate is a fine hacker indeed.
`Jeremy Hedley`_ and `Charles Wiltgen`_ deserve mention for exemplary beta
testing of the original SmartyPants.
`Rael Dornfest`_ ported SmartyPants to Blosxom.
.. _Brad Choate: http://bradchoate.com/
.. _Jeremy Hedley: http://antipixel.com/
.. _Charles Wiltgen: http://playbacktime.com/
.. _Rael Dornfest: http://raelity.org/
Copyright and License
=====================
SmartyPants_ license::
Copyright (c) 2003 John Gruber
(https://daringfireball.net/)
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in
the documentation and/or other materials provided with the
distribution.
* Neither the name "SmartyPants" nor the names of its contributors
may be used to endorse or promote products derived from this
software without specific prior written permission.
This software is provided by the copyright holders and contributors "as
is" and any express or implied warranties, including, but not limited
to, the implied warranties of merchantability and fitness for a
particular purpose are disclaimed. In no event shall the copyright
owner or contributors be liable for any direct, indirect, incidental,
special, exemplary, or consequential damages (including, but not
limited to, procurement of substitute goods or services; loss of use,
data, or profits; or business interruption) however caused and on any
theory of liability, whether in contract, strict liability, or tort
(including negligence or otherwise) arising in any way out of the use
of this software, even if advised of the possibility of such damage.
smartypants.py license::
smartypants.py is a derivative work of SmartyPants.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in
the documentation and/or other materials provided with the
distribution.
This software is provided by the copyright holders and contributors "as
is" and any express or implied warranties, including, but not limited
to, the implied warranties of merchantability and fitness for a
particular purpose are disclaimed. In no event shall the copyright
owner or contributors be liable for any direct, indirect, incidental,
special, exemplary, or consequential damages (including, but not
limited to, procurement of substitute goods or services; loss of use,
data, or profits; or business interruption) however caused and on any
theory of liability, whether in contract, strict liability, or tort
(including negligence or otherwise) arising in any way out of the use
of this software, even if advised of the possibility of such damage.
.. _John Gruber: https://daringfireball.net/
.. _Chad Miller: http://web.chad.org/
.. _Pyblosxom: http://roughingit.subtlehints.net/pyblosxom
.. _SmartyPants: https://daringfireball.net/projects/smartypants/
.. _Movable Type: http://www.movabletype.org/
"""
import re
# style added by Kovid
tags_to_skip_regex = re.compile(r"<(/)?(style|pre|code|kbd|script|math)[^>]*>", re.I)
self_closing_regex = re.compile(r'/\s*>$')
# interal functions below here
def parse_attr(attr):
do_dashes = do_backticks = do_quotes = do_ellipses = do_stupefy = 0
if attr == "1":
do_quotes = 1
do_backticks = 1
do_dashes = 1
do_ellipses = 1
elif attr == "2":
# Do everything, turn all options on, use old school dash shorthand.
do_quotes = 1
do_backticks = 1
do_dashes = 2
do_ellipses = 1
elif attr == "3":
# Do everything, turn all options on, use inverted old school dash shorthand.
do_quotes = 1
do_backticks = 1
do_dashes = 3
do_ellipses = 1
elif attr == "-1":
# Special "stupefy" mode.
do_stupefy = 1
else:
for c in attr:
if c == "q":
do_quotes = 1
elif c == "b":
do_backticks = 1
elif c == "B":
do_backticks = 2
elif c == "d":
do_dashes = 1
elif c == "D":
do_dashes = 2
elif c == "i":
do_dashes = 3
elif c == "e":
do_ellipses = 1
else:
pass
# ignore unknown option
return do_dashes, do_backticks, do_quotes, do_ellipses, do_stupefy
def smartyPants(text, attr='1'):
# Parse attributes:
# 0 : do nothing
# 1 : set all
# 2 : set all, using old school en- and em- dash shortcuts
# 3 : set all, using inverted old school en and em- dash shortcuts
#
# q : quotes
# b : backtick quotes (``double'' only)
# B : backtick quotes (``double'' and `single')
# d : dashes
# D : old school dashes
# i : inverted old school dashes
# e : ellipses
if attr == "0":
# Do nothing.
return text
do_dashes, do_backticks, do_quotes, do_ellipses, do_stupefy = parse_attr(attr)
dashes_func = {1: educateDashes, 2: educateDashesOldSchool, 3: educateDashesOldSchoolInverted}.get(do_dashes, lambda x: x)
backticks_func = {1: educateBackticks, 2: lambda x: educateSingleBackticks(educateBackticks(x))}.get(do_backticks, lambda x: x)
ellipses_func = {1: educateEllipses}.get(do_ellipses, lambda x: x)
stupefy_func = {1: stupefyEntities}.get(do_stupefy, lambda x: x)
skipped_tag_stack = []
tokens = _tokenize(text)
result = []
in_pre = False
prev_token_last_char = ""
# This is a cheat, used to get some context
# for one-character tokens that consist of
# just a quote char. What we do is remember
# the last character of the previous text
# token, to use as context to curl single-
# character quote tokens correctly.
for cur_token in tokens:
if cur_token[0] == "tag":
# Don't mess with quotes inside some tags. This does not handle self <closing/> tags!
result.append(cur_token[1])
skip_match = tags_to_skip_regex.match(cur_token[1])
if skip_match is not None:
is_self_closing = self_closing_regex.search(skip_match.group()) is not None
if not is_self_closing:
if not skip_match.group(1):
skipped_tag_stack.append(skip_match.group(2).lower())
in_pre = True
else:
if len(skipped_tag_stack) > 0:
if skip_match.group(2).lower() == skipped_tag_stack[-1]:
skipped_tag_stack.pop()
else:
pass
# This close doesn't match the open. This isn't XHTML. We should barf here.
if len(skipped_tag_stack) == 0:
in_pre = False
else:
t = cur_token[1]
last_char = t[-1:] # Remember last char of this token before processing.
if not in_pre:
t = processEscapes(t)
t = re.sub('&quot;', '"', t)
t = dashes_func(t)
t = ellipses_func(t)
# Note: backticks need to be processed before quotes.
t = backticks_func(t)
if do_quotes != 0:
if t == "'":
# Special case: single-character ' token
if re.match(r"\S", prev_token_last_char):
t = "&#8217;"
else:
t = "&#8216;"
elif t == '"':
# Special case: single-character " token
if re.match(r"\S", prev_token_last_char):
t = "&#8221;"
else:
t = "&#8220;"
else:
# Normal case:
t = educateQuotes(t)
t = stupefy_func(t)
prev_token_last_char = last_char
result.append(t)
return "".join(result)
def educateQuotes(text):
"""
Parameter: String.
Returns: The string, with "educated" curly quote HTML entities.
Example input: "Isn't this fun?"
Example output: &#8220;Isn&#8217;t this fun?&#8221;
"""
punct_class = r"""[!"#\$\%'()*+,-.\/:;<=>?\@\[\\\]\^_`{|}~]"""
# Special case if the very first character is a quote
# followed by punctuation at a non-word-break. Close the quotes by brute force:
text = re.sub(r"""^'(?=%s\\B)""" % (punct_class,), r"""&#8217;""", text)
text = re.sub(r"""^"(?=%s\\B)""" % (punct_class,), r"""&#8221;""", text)
# Special case for double sets of quotes, e.g.:
# <p>He said, "'Quoted' words in a larger quote."</p>
text = re.sub(r""""'(?=\w)""", """&#8220;&#8216;""", text)
text = re.sub(r"""'"(?=\w)""", """&#8216;&#8220;""", text)
text = re.sub(r'''""(?=\w)''', """&#8220;&#8220;""", text)
text = re.sub(r"""''(?=\w)""", """&#8216;&#8216;""", text)
text = re.sub(r'''\"\'''', """&#8221;&#8217;""", text)
text = re.sub(r'''\'\"''', """&#8217;&#8221;""", text)
text = re.sub(r'''""''', """&#8221;&#8221;""", text)
text = re.sub(r"""''""", """&#8217;&#8217;""", text)
# Special case for decade abbreviations (the '80s --> 80s):
# See http://practicaltypography.com/apostrophes.html
text = re.sub(r"""(\W|^)'(?=\d{2}s)""", r"""\1&#8217;""", text)
# Measurements in feet and inches or longitude/latitude: 19' 43.5" --> 19 43.5″
text = re.sub(r'''(\W|^)([-0-9.]+\s*)'(\s*[-0-9.]+)"''', r'\1\2&#8242;\3&#8243;', text)
# Special case for Quotes at inside of other entities, e.g.:
# <p>A double quote--"within dashes"--would be nice.</p>
text = re.sub(r"""(?<=\W)"(?=\w)""", r"""&#8220;""", text)
text = re.sub(r"""(?<=\W)'(?=\w)""", r"""&#8216;""", text)
text = re.sub(r"""(?<=\w)"(?=\W)""", r"""&#8221;""", text)
text = re.sub(r"""(?<=\w)'(?=\W)""", r"""&#8217;""", text)
# The following are commented out as smartypants tokenizes text by
# stripping out html tags. Therefore, there is no guarantee that the
# start-of-line and end-ol-line regex operators will match anything
# meaningful
# Special case for Quotes at end of line with a preceeding space (may change just to end of line)
# text = re.sub(r"""(?<=\s)"$""", r"""&#8221;""", text)
# text = re.sub(r"""(?<=\s)'$""", r"""&#8217;""", text)
# Special case for Quotes at beginning of line with a space - multiparagraph quoted text:
# text = re.sub(r"""^"(?=\s)""", r"""&#8220;""", text)
# text = re.sub(r"""^'(?=\s)""", r"""&#8216;""", text)
close_class = r"""[^\ \t\r\n\[\{\(\-]"""
dec_dashes = r"""&#8211;|&#8212;"""
# Get most opening single quotes:
opening_single_quotes_regex = re.compile(r"""
(
\s | # a whitespace char, or
&nbsp; | # a non-breaking space entity, or
-- | # dashes, or
&[mn]dash; | # named dash entities
%s | # or decimal entities
&\#x201[34]; # or hex
)
' # the quote
(?=\w) # followed by a word character
""" % (dec_dashes,), re.VERBOSE)
text = opening_single_quotes_regex.sub(r"""\1&#8216;""", text)
closing_single_quotes_regex = re.compile(r"""
(%s)
'
(?!\s | s\b | \d)
""" % (close_class,), re.VERBOSE)
text = closing_single_quotes_regex.sub(r"""\1&#8217;""", text)
closing_single_quotes_regex = re.compile(r"""
(%s)
'
(\s | s\b)
""" % (close_class,), re.VERBOSE)
text = closing_single_quotes_regex.sub(r"""\1&#8217;\2""", text)
# Any remaining single quotes should be opening ones:
text = re.sub(r"""'""", r"""&#8216;""", text)
# Get most opening double quotes:
opening_double_quotes_regex = re.compile(r"""
(
\s | # a whitespace char, or
&nbsp; | # a non-breaking space entity, or
-- | # dashes, or
&[mn]dash; | # named dash entities
%s | # or decimal entities
&\#x201[34]; # or hex
)
" # the quote
(?=\w) # followed by a word character
""" % (dec_dashes,), re.VERBOSE)
text = opening_double_quotes_regex.sub(r"""\1&#8220;""", text)
# Double closing quotes:
closing_double_quotes_regex = re.compile(r"""
#(%s)? # character that indicates the quote should be closing
"
(?=\s)
""" % (close_class,), re.VERBOSE)
text = closing_double_quotes_regex.sub(r"""&#8221;""", text)
closing_double_quotes_regex = re.compile(r"""
(%s) # character that indicates the quote should be closing
"
""" % (close_class,), re.VERBOSE)
text = closing_double_quotes_regex.sub(r"""\1&#8221;""", text)
if text.endswith('-"'):
# A string that endswith -" is sometimes used for dialogue
text = text[:-1] + '&#8221;'
# Any remaining quotes should be opening ones.
text = re.sub(r'"', r"""&#8220;""", text)
return text
def educateBackticks(text):
"""
Parameter: String.
Returns: The string, with ``backticks'' -style double quotes
translated into HTML curly quote entities.
Example input: ``Isn't this fun?''
Example output: &#8220;Isn't this fun?&#8221;
"""
text = re.sub(r"""``""", r"""&#8220;""", text)
text = re.sub(r"""''""", r"""&#8221;""", text)
return text
def educateSingleBackticks(text):
"""
Parameter: String.
Returns: The string, with `backticks' -style single quotes
translated into HTML curly quote entities.
Example input: `Isn't this fun?'
Example output: &#8216;Isn&#8217;t this fun?&#8217;
"""
text = re.sub(r"""`""", r"""&#8216;""", text)
text = re.sub(r"""'""", r"""&#8217;""", text)
return text
def educateDashes(text):
"""
Parameter: String.
Returns: The string, with each instance of "--" translated to
an em-dash HTML entity.
"""
text = re.sub(r"""---""", r"""&#8211;""", text) # en (yes, backwards)
text = re.sub(r"""--""", r"""&#8212;""", text) # em (yes, backwards)
return text
def educateDashesOldSchool(text):
"""
Parameter: String.
Returns: The string, with each instance of "--" translated to
an en-dash HTML entity, and each "---" translated to
an em-dash HTML entity.
"""
text = re.sub(r"""---""", r"""&#8212;""", text) # em (yes, backwards)
text = re.sub(r"""--""", r"""&#8211;""", text) # en (yes, backwards)
return text
def educateDashesOldSchoolInverted(text):
"""
Parameter: String.
Returns: The string, with each instance of "--" translated to
an em-dash HTML entity, and each "---" translated to
an en-dash HTML entity. Two reasons why: First, unlike the
en- and em-dash syntax supported by
EducateDashesOldSchool(), it's compatible with existing
entries written before SmartyPants 1.1, back when "--" was
only used for em-dashes. Second, em-dashes are more
common than en-dashes, and so it sort of makes sense that
the shortcut should be shorter to type. (Thanks to Aaron
Swartz for the idea.)
"""
text = re.sub(r"""---""", r"""&#8211;""", text) # em
text = re.sub(r"""--""", r"""&#8212;""", text) # en
return text
def educateEllipses(text):
"""
Parameter: String.
Returns: The string, with each instance of "..." translated to
an ellipsis HTML entity.
Example input: Huh...?
Example output: Huh&#8230;?
"""
text = re.sub(r"""\.\.\.""", r"""&#8230;""", text)
text = re.sub(r"""\. \. \.""", r"""&#8230;""", text)
return text
def stupefyEntities(text):
"""
Parameter: String.
Returns: The string, with each SmartyPants HTML entity translated to
its ASCII counterpart.
Example input: &#8220;Hello &#8212; world.&#8221;
Example output: "Hello -- world."
"""
text = re.sub(r"""&#8211;""", r"""-""", text) # en-dash
text = re.sub(r"""&#8212;""", r"""--""", text) # em-dash
text = re.sub(r"""&#8216;""", r"""'""", text) # open single quote
text = re.sub(r"""&#8217;""", r"""'""", text) # close single quote
text = re.sub(r"""&#8220;""", r'''"''', text) # open double quote
text = re.sub(r"""&#8221;""", r'''"''', text) # close double quote
text = re.sub(r"""&#8230;""", r"""...""", text) # ellipsis
return text
def processEscapes(text):
r"""
Parameter: String.
Returns: The string, with after processing the following backslash
escape sequences. This is useful if you want to force a "dumb"
quote or other character to appear.
Escape Value
------ -----
\\ &#92;
\" &#34;
\' &#39;
\. &#46;
\- &#45;
\` &#96;
"""
text = re.sub(r"""\\\\""", r"""&#92;""", text)
text = re.sub(r'''\\"''', r"""&#34;""", text)
text = re.sub(r"""\\'""", r"""&#39;""", text)
text = re.sub(r"""\\\.""", r"""&#46;""", text)
text = re.sub(r"""\\-""", r"""&#45;""", text)
text = re.sub(r"""\\`""", r"""&#96;""", text)
return text
def _tokenize(html):
"""
Parameter: String containing HTML markup.
Returns: Reference to an array of the tokens comprising the input
string. Each token is either a tag (possibly with nested,
tags contained therein, such as <a href="<MTFoo>">, or a
run of text between tags. Each element of the array is a
two-element array; the first is either 'tag' or 'text';
the second is the actual value.
Based on the _tokenize() subroutine from Brad Choate's MTRegex plugin.
<http://www.bradchoate.com/past/mtregex.php>
"""
tokens = []
# depth = 6
# nested_tags = "|".join(['(?:<(?:[^<>]',] * depth) + (')*>)' * depth)
# match = r"""(?: <! ( -- .*? -- \s* )+ > ) | # comments
# (?: <\? .*? \?> ) | # directives
# %s # nested tags """ % (nested_tags,)
tag_soup = re.compile(r"""([^<]*)(<[^>]*>)""")
token_match = tag_soup.search(html)
previous_end = 0
while token_match is not None:
if token_match.group(1):
tokens.append(['text', token_match.group(1)])
tokens.append(['tag', token_match.group(2)])
previous_end = token_match.end()
token_match = tag_soup.search(html, token_match.end())
if previous_end < len(html):
tokens.append(['text', html[previous_end:]])
return tokens
def run_tests(return_tests=False):
import unittest
sp = smartyPants
class TestSmartypantsAllAttributes(unittest.TestCase):
# the default attribute is "1", which means "all".
def test_dates(self):
self.assertEqual(sp("one two '60s"), "one two &#8217;60s")
self.assertEqual(sp("1440-80's"), "1440-80&#8217;s")
self.assertEqual(sp("1440-'80s"), "1440-&#8217;80s")
self.assertEqual(sp("1440---'80s"), "1440&#8211;&#8217;80s")
self.assertEqual(sp("1960s"), "1960s") # no effect.
self.assertEqual(sp("1960's"), "1960&#8217;s")
self.assertEqual(sp("one two '60s"), "one two &#8217;60s")
self.assertEqual(sp("'60s"), "&#8217;60s")
def test_measurements(self):
ae = self.assertEqual
ae(sp("one two 1.1'2.2\""), "one two 1.1&#8242;2.2&#8243;")
ae(sp("1' 2\""), "1&#8242; 2&#8243;")
def test_skip_tags(self):
self.assertEqual(
sp("""<script type="text/javascript">\n<!--\nvar href = "http://www.google.com";\nvar linktext = "google";\ndocument.write('<a href="' + href + '">' + linktext + "</a>");\n//-->\n</script>"""), # noqa
"""<script type="text/javascript">\n<!--\nvar href = "http://www.google.com";\nvar linktext = "google";\ndocument.write('<a href="' + href + '">' + linktext + "</a>");\n//-->\n</script>""") # noqa
self.assertEqual(
sp("""<p>He said &quot;Let's write some code.&quot; This code here <code>if True:\n\tprint &quot;Okay&quot;</code> is python code.</p>"""),
"""<p>He said &#8220;Let&#8217;s write some code.&#8221; This code here <code>if True:\n\tprint &quot;Okay&quot;</code> is python code.</p>""") # noqa
self.assertEqual(
sp('''<script/><p>It's ok</p>'''),
'''<script/><p>It&#8217;s ok</p>''')
def test_ordinal_numbers(self):
self.assertEqual(sp("21st century"), "21st century") # no effect.
self.assertEqual(sp("3rd"), "3rd") # no effect.
def test_educated_quotes(self):
self.assertEqual(sp('''"Isn't this fun?"'''), '''&#8220;Isn&#8217;t this fun?&#8221;''')
tests = unittest.defaultTestLoader.loadTestsFromTestCase(TestSmartypantsAllAttributes)
if return_tests:
return tests
unittest.TextTestRunner(verbosity=4).run(tests)
if __name__ == "__main__":
run_tests()

View File

@@ -0,0 +1,205 @@
#!/usr/bin/env python2
# vim:fileencoding=utf-8
# License: GPLv3 Copyright: 2016, Kovid Goyal <kovid at kovidgoyal.net>
from __future__ import absolute_import, division, print_function, unicode_literals
import os
from polyglot.builtins import range, unicode_type
class ReadOnlyFileBuffer(object):
''' A zero copy implementation of a file like object. Uses memoryviews for efficiency. '''
def __init__(self, raw):
self.sz, self.mv = len(raw), (raw if isinstance(raw, memoryview) else memoryview(raw))
self.pos = 0
def tell(self):
return self.pos
def read(self, n=None):
if n is None:
ans = self.mv[self.pos:]
self.pos = self.sz
return ans
ans = self.mv[self.pos:self.pos+n]
self.pos = min(self.pos + n, self.sz)
return ans
def seek(self, pos, whence=os.SEEK_SET):
if whence == os.SEEK_SET:
self.pos = pos
elif whence == os.SEEK_END:
self.pos = self.sz + pos
else:
self.pos += pos
self.pos = max(0, min(self.pos, self.sz))
return self.pos
def getvalue(self):
return self.mv
def close(self):
pass
def svg_path_to_painter_path(d):
'''
Convert a tiny SVG 1.2 path into a QPainterPath.
:param d: The value of the d attribute of an SVG <path> tag
'''
from PyQt5.Qt import QPainterPath
cmd = last_cmd = b''
path = QPainterPath()
moveto_abs, moveto_rel = b'M', b'm'
closepath1, closepath2 = b'Z', b'z'
lineto_abs, lineto_rel = b'L', b'l'
hline_abs, hline_rel = b'H', b'h'
vline_abs, vline_rel = b'V', b'v'
curveto_abs, curveto_rel = b'C', b'c'
smoothcurveto_abs, smoothcurveto_rel = b'S', b's'
quadcurveto_abs, quadcurveto_rel = b'Q', b'q'
smoothquadcurveto_abs, smoothquadcurveto_rel = b'T', b't'
# Store the last parsed values
# x/y = end position
# x1/y1 and x2/y2 = bezier control points
x = y = x1 = y1 = x2 = y2 = 0
if isinstance(d, unicode_type):
d = d.encode('ascii')
d = d.replace(b',', b' ').replace(b'\n', b' ')
end = len(d)
pos = [0]
def read_byte():
p = pos[0]
pos[0] += 1
return d[p:p+1]
def parse_float():
chars = []
while pos[0] < end:
c = read_byte()
if c == b' ' and not chars:
continue
if c in b'-.0123456789':
chars.append(c)
else:
break
if not chars:
raise ValueError('Premature end of input while expecting a number')
return float(b''.join(chars))
def parse_floats(num, x_offset=0, y_offset=0):
for i in range(num):
val = parse_float()
yield val + (x_offset if i % 2 == 0 else y_offset)
repeated_command = None
while pos[0] < end:
last_cmd = cmd
cmd = read_byte() if repeated_command is None else repeated_command
repeated_command = None
if cmd == b' ':
continue
if cmd == moveto_abs:
x, y = parse_float(), parse_float()
path.moveTo(x, y)
elif cmd == moveto_rel:
x += parse_float()
y += parse_float()
path.moveTo(x, y)
elif cmd == closepath1 or cmd == closepath2:
path.closeSubpath()
elif cmd == lineto_abs:
x, y = parse_floats(2)
path.lineTo(x, y)
elif cmd == lineto_rel:
x += parse_float()
y += parse_float()
path.lineTo(x, y)
elif cmd == hline_abs:
x = parse_float()
path.lineTo(x, y)
elif cmd == hline_rel:
x += parse_float()
path.lineTo(x, y)
elif cmd == vline_abs:
y = parse_float()
path.lineTo(x, y)
elif cmd == vline_rel:
y += parse_float()
path.lineTo(x, y)
elif cmd == curveto_abs:
x1, y1, x2, y2, x, y = parse_floats(6)
path.cubicTo(x1, y1, x2, y2, x, y)
elif cmd == curveto_rel:
x1, y1, x2, y2, x, y = parse_floats(6, x, y)
path.cubicTo(x1, y1, x2, y2, x, y)
elif cmd == smoothcurveto_abs:
if last_cmd == curveto_abs or last_cmd == curveto_rel or last_cmd == smoothcurveto_abs or last_cmd == smoothcurveto_rel:
x1 = 2 * x - x2
y1 = 2 * y - y2
else:
x1, y1 = x, y
x2, y2, x, y = parse_floats(4)
path.cubicTo(x1, y1, x2, y2, x, y)
elif cmd == smoothcurveto_rel:
if last_cmd == curveto_abs or last_cmd == curveto_rel or last_cmd == smoothcurveto_abs or last_cmd == smoothcurveto_rel:
x1 = 2 * x - x2
y1 = 2 * y - y2
else:
x1, y1 = x, y
x2, y2, x, y = parse_floats(4, x, y)
path.cubicTo(x1, y1, x2, y2, x, y)
elif cmd == quadcurveto_abs:
x1, y1, x, y = parse_floats(4)
path.quadTo(x1, y1, x, y)
elif cmd == quadcurveto_rel:
x1, y1, x, y = parse_floats(4, x, y)
path.quadTo(x1, y1, x, y)
elif cmd == smoothquadcurveto_abs:
if last_cmd in (quadcurveto_abs, quadcurveto_rel, smoothquadcurveto_abs, smoothquadcurveto_rel):
x1 = 2 * x - x1
y1 = 2 * y - y1
else:
x1, y1 = x, y
x, y = parse_floats(2)
path.quadTo(x1, y1, x, y)
elif cmd == smoothquadcurveto_rel:
if last_cmd in (quadcurveto_abs, quadcurveto_rel, smoothquadcurveto_abs, smoothquadcurveto_rel):
x1 = 2 * x - x1
y1 = 2 * y - y1
else:
x1, y1 = x, y
x, y = parse_floats(2, x, y)
path.quadTo(x1, y1, x, y)
elif cmd in b'-.0123456789':
# A new number begins
# In this case, multiple parameters tuples are specified for the last command
# We rewind to reparse data correctly
pos[0] -= 1
# Handle extra parameters
if last_cmd == moveto_abs:
repeated_command = cmd = lineto_abs
elif last_cmd == moveto_rel:
repeated_command = cmd = lineto_rel
elif last_cmd in (closepath1, closepath2):
raise ValueError('Extra parameters after close path command')
elif last_cmd in (
lineto_abs, lineto_rel, hline_abs, hline_rel, vline_abs,
vline_rel, curveto_abs, curveto_rel,smoothcurveto_abs,
smoothcurveto_rel, quadcurveto_abs, quadcurveto_rel,
smoothquadcurveto_abs, smoothquadcurveto_rel
):
repeated_command = cmd = last_cmd
else:
raise ValueError('Unknown path command: %s' % cmd)
return path

View File

@@ -0,0 +1,443 @@
#!/usr/bin/env python2
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:fdm=marker:ai
from __future__ import absolute_import, division, print_function, unicode_literals
__license__ = 'GPL v3'
__copyright__ = '2012, Kovid Goyal <kovid at kovidgoyal.net>'
__docformat__ = 'restructuredtext en'
import os, sys, re
from calibre.constants import iswindows, ispy3
from polyglot.builtins import iteritems, range, zip, native_string_type
if iswindows:
import ctypes.wintypes
class CONSOLE_SCREEN_BUFFER_INFO(ctypes.Structure):
_fields_ = [
('dwSize', ctypes.wintypes._COORD),
('dwCursorPosition', ctypes.wintypes._COORD),
('wAttributes', ctypes.wintypes.WORD),
('srWindow', ctypes.wintypes._SMALL_RECT),
('dwMaximumWindowSize', ctypes.wintypes._COORD)
]
def fmt(code):
return '\033[%dm' % code
RATTRIBUTES = dict(
zip(range(1, 9), (
'bold',
'dark',
'',
'underline',
'blink',
'',
'reverse',
'concealed'
)
))
ATTRIBUTES = {v:fmt(k) for k, v in iteritems(RATTRIBUTES)}
del ATTRIBUTES['']
RBACKGROUNDS = dict(
zip(range(41, 48), (
'red',
'green',
'yellow',
'blue',
'magenta',
'cyan',
'white'
),
))
BACKGROUNDS = {v:fmt(k) for k, v in iteritems(RBACKGROUNDS)}
RCOLORS = dict(
zip(range(31, 38), (
'red',
'green',
'yellow',
'blue',
'magenta',
'cyan',
'white',
),
))
COLORS = {v:fmt(k) for k, v in iteritems(RCOLORS)}
RESET = fmt(0)
if iswindows:
# From wincon.h
WCOLORS = {c:i for i, c in enumerate((
'black', 'blue', 'green', 'cyan', 'red', 'magenta', 'yellow', 'white'))}
def to_flag(fg, bg, bold):
val = 0
if bold:
val |= 0x08
if fg in WCOLORS:
val |= WCOLORS[fg]
if bg in WCOLORS:
val |= (WCOLORS[bg] << 4)
return val
def colored(text, fg=None, bg=None, bold=False):
prefix = []
if fg is not None:
prefix.append(COLORS[fg])
if bg is not None:
prefix.append(BACKGROUNDS[bg])
if bold:
prefix.append(ATTRIBUTES['bold'])
prefix = ''.join(prefix)
suffix = RESET
if isinstance(text, bytes):
prefix = prefix.encode('ascii')
suffix = suffix.encode('ascii')
return prefix + text + suffix
class Detect(object):
def __init__(self, stream):
self.stream = stream or sys.stdout
self.isatty = getattr(self.stream, 'isatty', lambda : False)()
force_ansi = 'CALIBRE_FORCE_ANSI' in os.environ
if not self.isatty and force_ansi:
self.isatty = True
self.isansi = force_ansi or not iswindows
self.set_console = self.write_console = None
self.is_console = False
if not self.isansi:
try:
import msvcrt
self.msvcrt = msvcrt
self.file_handle = msvcrt.get_osfhandle(self.stream.fileno())
from ctypes import windll, wintypes, byref, POINTER, WinDLL
mode = wintypes.DWORD(0)
f = windll.kernel32.GetConsoleMode
f.argtypes, f.restype = [wintypes.HANDLE, POINTER(wintypes.DWORD)], wintypes.BOOL
if f(self.file_handle, byref(mode)):
# Stream is a console
self.set_console = windll.kernel32.SetConsoleTextAttribute
self.default_console_text_attributes = WCOLORS['white']
kernel32 = WinDLL(native_string_type('kernel32'), use_last_error=True)
self.write_console = kernel32.WriteConsoleW
self.write_console.argtypes = [wintypes.HANDLE, wintypes.c_wchar_p, wintypes.DWORD, POINTER(wintypes.DWORD), wintypes.LPVOID]
self.write_console.restype = wintypes.BOOL
kernel32.GetConsoleScreenBufferInfo.argtypes = [wintypes.HANDLE, ctypes.POINTER(CONSOLE_SCREEN_BUFFER_INFO)]
kernel32.GetConsoleScreenBufferInfo.restype = wintypes.BOOL
csbi = CONSOLE_SCREEN_BUFFER_INFO()
if kernel32.GetConsoleScreenBufferInfo(self.file_handle, byref(csbi)):
self.default_console_text_attributes = csbi.wAttributes
self.is_console = True
except:
pass
def write_unicode_text(self, text, ignore_errors=False):
' Windows only method that writes unicode strings correctly to the windows console using the Win32 API '
if self.is_console:
from ctypes import wintypes, byref, c_wchar_p
written = wintypes.DWORD(0)
text = text.replace('\0', '')
chunk = len(text)
while text:
t, text = text[:chunk], text[chunk:]
wt = c_wchar_p(t)
if ispy3:
text_len = len(t.encode('utf-16'))
else:
# Use the fact that len(t) == wcslen(wt) in python 2.7 on
# windows where the python unicode type uses UTF-16
text_len = len(t)
if not self.write_console(self.file_handle, wt, text_len, byref(written), None):
# Older versions of windows can fail to write large strings
# to console with WriteConsoleW (seen it happen on Win XP)
import winerror
err = ctypes.get_last_error()
if err == winerror.ERROR_NOT_ENOUGH_MEMORY and chunk >= 128:
# Retry with a smaller chunk size (give up if chunk < 128)
chunk = chunk // 2
text = t + text
continue
if err == winerror.ERROR_GEN_FAILURE:
# On newer windows, this happens when trying to write
# non-ascii chars to the console and the console is set
# to use raster fonts (the default). In this case
# rather than failing, write an informative error
# message and the asciized version of the text.
print('Non-ASCII text detected. You must set your Console\'s font to'
' Lucida Console or Consolas or some other TrueType font to see this text', file=self.stream, end=' -- ')
from calibre.utils.filenames import ascii_text
print(ascii_text(t + text), file=self.stream, end='')
continue
if not ignore_errors:
raise ctypes.WinError(err)
class ColoredStream(Detect):
def __init__(self, stream=None, fg=None, bg=None, bold=False):
stream = getattr(stream, 'buffer', stream)
Detect.__init__(self, stream)
self.fg, self.bg, self.bold = fg, bg, bold
if self.set_console is not None:
self.wval = to_flag(self.fg, self.bg, bold)
if not self.bg:
self.wval |= self.default_console_text_attributes & 0xF0
def cwrite(self, what):
if not isinstance(what, bytes):
what = what.encode('ascii')
self.stream.write(what)
def __enter__(self):
if not self.isatty:
return self
if self.isansi:
if self.bold:
self.cwrite(ATTRIBUTES['bold'])
if self.bg is not None:
self.cwrite(BACKGROUNDS[self.bg])
if self.fg is not None:
self.cwrite(COLORS[self.fg])
elif self.set_console is not None:
if self.wval != 0:
self.set_console(self.file_handle, self.wval)
return self
def __exit__(self, *args, **kwargs):
if not self.isatty:
return
if not self.fg and not self.bg and not self.bold:
return
if self.isansi:
self.cwrite(RESET)
self.stream.flush()
elif self.set_console is not None:
self.set_console(self.file_handle, self.default_console_text_attributes)
class ANSIStream(Detect):
ANSI_RE = r'\033\[((?:\d|;)*)([a-zA-Z])'
def __init__(self, stream=None):
super(ANSIStream, self).__init__(stream)
self.encoding = getattr(self.stream, 'encoding', 'utf-8') or 'utf-8'
self.stream_takes_unicode = hasattr(self.stream, 'buffer')
self.last_state = (None, None, False)
self._ansi_re_bin = self._ansi_re_unicode = None
def ansi_re(self, binary=False):
attr = '_ansi_re_bin' if binary else '_ansi_re_unicode'
ans = getattr(self, attr)
if ans is None:
expr = self.ANSI_RE
if binary:
expr = expr.encode('ascii')
ans = re.compile(expr)
setattr(self, attr, ans)
return ans
def write(self, text):
if not self.isatty:
return self.strip_and_write(text)
if self.isansi:
return self.stream.write(text)
if not self.isansi and self.set_console is None:
return self.strip_and_write(text)
self.write_and_convert(text)
def polyglot_write(self, text):
binary = isinstance(text, bytes)
stream = self.stream
if self.stream_takes_unicode:
if binary:
stream = self.stream.buffer
else:
if not binary:
text = text.encode(self.encoding, 'replace')
stream.write(text)
def strip_and_write(self, text):
binary = isinstance(text, bytes)
pat = self.ansi_re(binary)
repl = b'' if binary else ''
self.polyglot_write(pat.sub(repl, text))
def write_and_convert(self, text):
'''
Write the given text to our wrapped stream, stripping any ANSI
sequences from the text, and optionally converting them into win32
calls.
'''
cursor = 0
binary = isinstance(text, bytes)
for match in self.ansi_re(binary).finditer(text):
start, end = match.span()
self.write_plain_text(text, cursor, start)
self.convert_ansi(*match.groups())
cursor = end
self.write_plain_text(text, cursor, len(text))
self.set_console(self.file_handle, self.default_console_text_attributes)
self.stream.flush()
def write_plain_text(self, text, start, end):
if start < end:
text = text[start:end]
if self.is_console and isinstance(text, bytes):
try:
utext = text.decode(self.encoding)
except ValueError:
pass
else:
return self.write_unicode_text(utext)
self.polyglot_write(text)
def convert_ansi(self, paramstring, command):
if isinstance(paramstring, bytes):
paramstring = paramstring.decode('ascii', 'replace')
if isinstance(command, bytes):
command = command.decode('ascii', 'replace')
params = self.extract_params(paramstring)
self.call_win32(command, params)
def extract_params(self, paramstring):
def split(paramstring):
for p in paramstring.split(';'):
if p:
yield int(p)
return tuple(split(paramstring))
def call_win32(self, command, params):
if command != 'm':
return
fg, bg, bold = self.last_state
for param in params:
if param in RCOLORS:
fg = RCOLORS[param]
elif param in RBACKGROUNDS:
bg = RBACKGROUNDS[param]
elif param == 1:
bold = True
elif param == 0:
fg, bg, bold = None, None, False
self.last_state = (fg, bg, bold)
if fg or bg or bold:
val = to_flag(fg, bg, bold)
if not bg:
val |= self.default_console_text_attributes & 0xF0
self.set_console(self.file_handle, val)
else:
self.set_console(self.file_handle, self.default_console_text_attributes)
def windows_terminfo():
from ctypes import Structure, byref
from ctypes.wintypes import SHORT, WORD
class COORD(Structure):
"""struct in wincon.h"""
_fields_ = [
('X', SHORT),
('Y', SHORT),
]
class SMALL_RECT(Structure):
"""struct in wincon.h."""
_fields_ = [
("Left", SHORT),
("Top", SHORT),
("Right", SHORT),
("Bottom", SHORT),
]
class CONSOLE_SCREEN_BUFFER_INFO(Structure):
"""struct in wincon.h."""
_fields_ = [
("dwSize", COORD),
("dwCursorPosition", COORD),
("wAttributes", WORD),
("srWindow", SMALL_RECT),
("dwMaximumWindowSize", COORD),
]
csbi = CONSOLE_SCREEN_BUFFER_INFO()
import msvcrt
file_handle = msvcrt.get_osfhandle(sys.stdout.fileno())
from ctypes import windll
success = windll.kernel32.GetConsoleScreenBufferInfo(file_handle,
byref(csbi))
if not success:
raise Exception('stdout is not a console?')
return csbi
def get_term_geometry():
import fcntl, termios, struct
def ioctl_GWINSZ(fd):
try:
return struct.unpack(b'HHHH', fcntl.ioctl(fd, termios.TIOCGWINSZ, b'\0'*8))[:2]
except Exception:
return None, None
for f in (sys.stdin, sys.stdout, sys.stderr):
lines, cols = ioctl_GWINSZ(f.fileno())
if lines is not None:
return lines, cols
try:
fd = os.open(os.ctermid(), os.O_RDONLY)
try:
lines, cols = ioctl_GWINSZ(fd)
if lines is not None:
return lines, cols
finally:
os.close(fd)
except Exception:
pass
return None, None
def geometry():
if iswindows:
try:
ti = windows_terminfo()
return (ti.dwSize.X or 80, ti.dwSize.Y or 25)
except:
return 80, 25
else:
try:
lines, cols = get_term_geometry()
if lines is not None:
return cols, lines
except Exception:
pass
return 80, 25
def test():
s = ANSIStream()
text = [colored(t, fg=t)+'. '+colored(t, fg=t, bold=True)+'.' for t in
('red', 'yellow', 'green', 'white', 'cyan', 'magenta', 'blue',)]
s.write('\n'.join(text))
u = u'\u041c\u0438\u0445\u0430\u0438\u043b fällen'
print()
s.write_unicode_text(u)
print()

View File

@@ -0,0 +1,109 @@
#!/usr/bin/env python2
# -*- coding: utf-8 -*-
from __future__ import absolute_import, division, print_function, unicode_literals
"""
Original Perl version by: John Gruber https://daringfireball.net/ 10 May 2008
Python version by Stuart Colville http://muffinresearch.co.uk
Modifications to make it work with non-ascii chars by Kovid Goyal
License: http://www.opensource.org/licenses/mit-license.php
"""
import re
from calibre.utils.icu import capitalize, upper
from polyglot.builtins import unicode_type
__all__ = ['titlecase']
__version__ = '0.5'
SMALL = 'a|an|and|as|at|but|by|en|for|if|in|of|on|or|the|to|v\\.?|via|vs\\.?'
PUNCT = r"""!"#$%&'()*+,\-‒–—―./:;?@[\\\]_`{|}~"""
SMALL_WORDS = re.compile(r'^(%s)$' % SMALL, re.I)
INLINE_PERIOD = re.compile(r'[a-z][.][a-z]', re.I)
UC_ELSEWHERE = re.compile(r'[%s]*?[a-zA-Z]+[A-Z]+?' % PUNCT)
CAPFIRST = re.compile(unicode_type(r"^[%s]*?(\w)" % PUNCT), flags=re.UNICODE)
SMALL_FIRST = re.compile(r'^([%s]*)(%s)\b' % (PUNCT, SMALL), re.I|re.U)
SMALL_LAST = re.compile(r'\b(%s)[%s]?$' % (SMALL, PUNCT), re.I|re.U)
SMALL_AFTER_NUM = re.compile(r'(\d+\s+)(a|an|the)\b', re.I|re.U)
SUBPHRASE = re.compile(r'([:.;?!][ ])(%s)' % SMALL)
APOS_SECOND = re.compile(r"^[dol]{1}[']{1}[a-z]+$", re.I)
UC_INITIALS = re.compile(r"^(?:[A-Z]{1}\.{1}|[A-Z]{1}\.{1}[A-Z]{1})+$")
_lang = None
def lang():
global _lang
if _lang is None:
from calibre.utils.localization import get_lang
_lang = get_lang().lower()
return _lang
def titlecase(text):
"""
Titlecases input text
This filter changes all words to Title Caps, and attempts to be clever
about *un*capitalizing SMALL words like a/an/the in the input.
The list of "SMALL words" which are not capped comes from
the New York Times Manual of Style, plus 'vs' and 'v'.
"""
all_caps = upper(text) == text
pat = re.compile(r'(\s+)')
line = []
for word in pat.split(text):
if not word:
continue
if pat.match(word) is not None:
line.append(word)
continue
if all_caps:
if UC_INITIALS.match(word):
line.append(word)
continue
else:
word = icu_lower(word)
if APOS_SECOND.match(word):
word = word.replace(word[0], icu_upper(word[0]), 1)
word = word[:2] + icu_upper(word[2]) + word[3:]
line.append(word)
continue
if INLINE_PERIOD.search(word) or UC_ELSEWHERE.match(word):
line.append(word)
continue
if SMALL_WORDS.match(word):
line.append(icu_lower(word))
continue
hyphenated = []
for item in word.split('-'):
hyphenated.append(CAPFIRST.sub(lambda m: icu_upper(m.group(0)), item))
line.append("-".join(hyphenated))
result = "".join(line)
result = SMALL_FIRST.sub(lambda m: '%s%s' % (
m.group(1),
capitalize(m.group(2))
), result)
result = SMALL_AFTER_NUM.sub(lambda m: '%s%s' % (m.group(1),
capitalize(m.group(2))
), result)
result = SMALL_LAST.sub(lambda m: capitalize(m.group(0)), result)
result = SUBPHRASE.sub(lambda m: '%s%s' % (
m.group(1),
capitalize(m.group(2))
), result)
return result

View File

@@ -0,0 +1,95 @@
#!/usr/bin/python2
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
from __future__ import absolute_import, division, print_function, unicode_literals
"""
Get word, character, and Asian character counts
1. Get a word count as a dictionary:
wc = get_wordcount(text)
words = wc['words'] # etc.
2. Get a word count as an object
wc = get_wordcount_obj(text)
words = wc.words # etc.
properties counted:
* characters
* chars_no_spaces
* asian_chars
* non_asian_words
* words
Sourced from:
http://ginstrom.com/scribbles/2008/05/17/counting-words-etc-in-an-html-file-with-python/
http://ginstrom.com/scribbles/2007/10/06/counting-words-characters-and-asian-characters-with-python/
"""
__version__ = 0.1
__author__ = "Ryan Ginstrom"
IDEOGRAPHIC_SPACE = 0x3000
def is_asian(char):
"""Is the character Asian?"""
# 0x3000 is ideographic space (i.e. double-byte space)
# Anything over is an Asian character
return ord(char) > IDEOGRAPHIC_SPACE
def filter_jchars(c):
"""Filters Asian characters to spaces"""
if is_asian(c):
return ' '
return c
def nonj_len(word):
"""Returns number of non-Asian words in {word}
- 日本語AアジアンB -> 2
- hello -> 1
@param word: A word, possibly containing Asian characters
"""
# Here are the steps:
# 本spam日eggs
# -> [' ', 's', 'p', 'a', 'm', ' ', 'e', 'g', 'g', 's']
# -> ' spam eggs'
# -> ['spam', 'eggs']
# The length of which is 2!
chars = [filter_jchars(c) for c in word]
return len(''.join(chars).split())
def get_wordcount(text):
"""Get the word/character count for text
@param text: The text of the segment
"""
characters = len(text)
chars_no_spaces = sum(not x.isspace() for x in text)
asian_chars = sum(is_asian(x) for x in text)
non_asian_words = nonj_len(text)
words = non_asian_words + asian_chars
return dict(characters=characters,
chars_no_spaces=chars_no_spaces,
asian_chars=asian_chars,
non_asian_words=non_asian_words,
words=words)
def dict2obj(dictionary):
"""Transform a dictionary into an object"""
class Obj(object):
def __init__(self, dictionary):
self.__dict__.update(dictionary)
return Obj(dictionary)
def get_wordcount_obj(text):
"""Get the wordcount as an object rather than a dictionary"""
return dict2obj(get_wordcount(text))

View File

@@ -0,0 +1,67 @@
#!/usr/bin/env python2
# vim:fileencoding=utf-8
# License: GPL v3 Copyright: 2019, Kovid Goyal <kovid at kovidgoyal.net>
from __future__ import absolute_import, division, print_function, unicode_literals
from lxml import etree
# resolving of SYSTEM entities is turned off as entities can cause
# reads of local files, for example:
# <!DOCTYPE foo [ <!ENTITY passwd SYSTEM "file:///etc/passwd" >]>
fs = etree.fromstring
class Resolver(etree.Resolver):
def resolve(self, url, id, context):
return self.resolve_string('', context)
def create_parser(recover):
parser = etree.XMLParser(recover=recover, no_network=True)
parser.resolvers.add(Resolver())
return parser
def safe_xml_fromstring(string_or_bytes, recover=True):
return fs(string_or_bytes, parser=create_parser(recover))
def find_tests():
import unittest, tempfile, os
class TestXMLParse(unittest.TestCase):
def setUp(self):
with tempfile.NamedTemporaryFile(delete=False) as tf:
tf.write(b'external')
self.temp_file = tf.name
def tearDown(self):
os.remove(self.temp_file)
def test_safe_xml_fromstring(self):
templ = '''<!DOCTYPE foo [ <!ENTITY e {id} "{val}" > ]><r>&e;</r>'''
external = 'file:///' + self.temp_file.replace(os.sep, '/')
self.assertEqual(etree.fromstring(templ.format(id='SYSTEM', val=external)).text, 'external')
for eid, val, expected in (
('', 'normal entity', 'normal entity'),
('', external, external),
('SYSTEM', external, None),
('SYSTEM', 'http://example.com', None),
('PUBLIC', external, None),
('PUBLIC', 'http://example.com', None),
):
got = getattr(safe_xml_fromstring(templ.format(id=eid, val=val)), 'text', None)
self.assertEqual(got, expected)
return unittest.defaultTestLoader.loadTestsFromTestCase(TestXMLParse)
if __name__ == '__main__':
from calibre.utils.run_tests import run_tests
run_tests(find_tests)

File diff suppressed because it is too large Load Diff