1
0
mirror of https://github.com/gryf/ebook-converter.git synced 2026-03-29 08:03:31 +02:00

Initial import

This commit is contained in:
2020-03-31 17:15:23 +02:00
commit d97ea9b0bc
311 changed files with 131419 additions and 0 deletions

View File

@@ -0,0 +1,87 @@
from __future__ import absolute_import, division, print_function, unicode_literals
__license__ = 'GPL v3'
__copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
''' Code to manage ebook library'''
import os
from polyglot.builtins import range
def db(path=None, read_only=False):
from calibre.db.legacy import LibraryDatabase
from calibre.utils.config import prefs
return LibraryDatabase(os.path.expanduser(path) if path else prefs['library_path'],
read_only=read_only)
def generate_test_db(library_path, # {{{
num_of_records=20000,
num_of_authors=6000,
num_of_tags=10000,
tag_length=7,
author_length=7,
title_length=10,
max_authors=10,
max_tags=10
):
import random, string, os, sys, time
from calibre.constants import preferred_encoding
if not os.path.exists(library_path):
os.makedirs(library_path)
letters = string.letters.decode(preferred_encoding)
def randstr(length):
return ''.join(random.choice(letters) for i in
range(length))
all_tags = [randstr(tag_length) for j in range(num_of_tags)]
print('Generated', num_of_tags, 'tags')
all_authors = [randstr(author_length) for j in range(num_of_authors)]
print('Generated', num_of_authors, 'authors')
all_titles = [randstr(title_length) for j in range(num_of_records)]
print('Generated', num_of_records, 'titles')
testdb = db(library_path)
print('Creating', num_of_records, 'records...')
start = time.time()
for i, title in enumerate(all_titles):
print(i+1, end=' ')
sys.stdout.flush()
authors = random.randint(1, max_authors)
authors = [random.choice(all_authors) for i in range(authors)]
tags = random.randint(0, max_tags)
tags = [random.choice(all_tags) for i in range(tags)]
from calibre.ebooks.metadata.book.base import Metadata
mi = Metadata(title, authors)
mi.tags = tags
testdb.import_book(mi, [])
t = time.time() - start
print('\nGenerated', num_of_records, 'records in:', t, 'seconds')
print('Time per record:', t/num_of_records)
# }}}
def current_library_path():
from calibre.utils.config import prefs
path = prefs['library_path']
if path:
path = path.replace('\\', '/')
while path.endswith('/'):
path = path[:-1]
return path
def current_library_name():
import posixpath
path = current_library_path()
if path:
return posixpath.basename(path)

View File

@@ -0,0 +1,30 @@
#!/usr/bin/env python2
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
from __future__ import absolute_import, division, print_function, unicode_literals
__license__ = 'GPL v3'
__copyright__ = '2012, Kovid Goyal <kovid@kovidgoyal.net>'
__docformat__ = 'restructuredtext en'
FIELDS = ['all', 'title', 'title_sort', 'author_sort', 'authors', 'comments',
'cover', 'formats','id', 'isbn', 'library_name','ondevice', 'pubdate', 'publisher',
'rating', 'series_index', 'series', 'size', 'tags', 'timestamp',
'uuid', 'languages', 'identifiers']
# Allowed fields for template
TEMPLATE_ALLOWED_FIELDS = ['author_sort', 'authors', 'id', 'isbn', 'pubdate', 'title_sort',
'publisher', 'series_index', 'series', 'tags', 'timestamp', 'title', 'uuid']
class AuthorSortMismatchException(Exception):
pass
class EmptyCatalogException(Exception):
pass
class InvalidGenresSourceFieldException(Exception):
pass

View File

@@ -0,0 +1,402 @@
#!/usr/bin/env python2
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
from __future__ import absolute_import, division, print_function, unicode_literals
__license__ = 'GPL v3'
__copyright__ = '2012, Kovid Goyal <kovid@kovidgoyal.net>'
__docformat__ = 'restructuredtext en'
import re, codecs, os, numbers
from collections import namedtuple
from calibre import strftime
from calibre.customize import CatalogPlugin
from calibre.library.catalogs import FIELDS, TEMPLATE_ALLOWED_FIELDS
from calibre.customize.conversion import DummyReporter
from calibre.ebooks.metadata import format_isbn
from polyglot.builtins import filter, string_or_bytes, unicode_type
class BIBTEX(CatalogPlugin):
'BIBTEX catalog generator'
Option = namedtuple('Option', 'option, default, dest, action, help')
name = 'Catalog_BIBTEX'
description = 'BIBTEX catalog generator'
supported_platforms = ['windows', 'osx', 'linux']
author = 'Sengian'
version = (1, 0, 0)
file_types = {'bib'}
cli_options = [
Option('--fields',
default='all',
dest='fields',
action=None,
help=_('The fields to output when cataloging books in the '
'database. Should be a comma-separated list of fields.\n'
'Available fields: %(fields)s.\n'
'plus user-created custom fields.\n'
'Example: %(opt)s=title,authors,tags\n'
"Default: '%%default'\n"
"Applies to: BIBTEX output format")%dict(
fields=', '.join(FIELDS), opt='--fields')),
Option('--sort-by',
default='id',
dest='sort_by',
action=None,
help=_('Output field to sort on.\n'
'Available fields: author_sort, id, rating, size, timestamp, title.\n'
"Default: '%default'\n"
"Applies to: BIBTEX output format")),
Option('--create-citation',
default='True',
dest='impcit',
action=None,
help=_('Create a citation for BibTeX entries.\n'
'Boolean value: True, False\n'
"Default: '%default'\n"
"Applies to: BIBTEX output format")),
Option('--add-files-path',
default='True',
dest='addfiles',
action=None,
help=_('Create a file entry if formats is selected for BibTeX entries.\n'
'Boolean value: True, False\n'
"Default: '%default'\n"
"Applies to: BIBTEX output format")),
Option('--citation-template',
default='{authors}{id}',
dest='bib_cit',
action=None,
help=_('The template for citation creation from database fields.\n'
'Should be a template with {} enclosed fields.\n'
'Available fields: %s.\n'
"Default: '%%default'\n"
"Applies to: BIBTEX output format")%', '.join(TEMPLATE_ALLOWED_FIELDS)),
Option('--choose-encoding',
default='utf8',
dest='bibfile_enc',
action=None,
help=_('BibTeX file encoding output.\n'
'Available types: utf8, cp1252, ascii.\n'
"Default: '%default'\n"
"Applies to: BIBTEX output format")),
Option('--choose-encoding-configuration',
default='strict',
dest='bibfile_enctag',
action=None,
help=_('BibTeX file encoding flag.\n'
'Available types: strict, replace, ignore, backslashreplace.\n'
"Default: '%default'\n"
"Applies to: BIBTEX output format")),
Option('--entry-type',
default='book',
dest='bib_entry',
action=None,
help=_('Entry type for BibTeX catalog.\n'
'Available types: book, misc, mixed.\n'
"Default: '%default'\n"
"Applies to: BIBTEX output format"))]
def run(self, path_to_output, opts, db, notification=DummyReporter()):
from calibre.utils.date import isoformat
from calibre.utils.html2text import html2text
from calibre.utils.bibtex import BibTeX
from calibre.library.save_to_disk import preprocess_template
from calibre.utils.logging import default_log as log
from calibre.utils.filenames import ascii_text
library_name = os.path.basename(db.library_path)
def create_bibtex_entry(entry, fields, mode, template_citation,
bibtexdict, db, citation_bibtex=True, calibre_files=True):
# Bibtex doesn't like UTF-8 but keep unicode until writing
# Define starting chain or if book valid strict and not book return a Fail string
bibtex_entry = []
if mode != "misc" and check_entry_book_valid(entry) :
bibtex_entry.append('@book{')
elif mode != "book" :
bibtex_entry.append('@misc{')
else :
# case strict book
return ''
if citation_bibtex :
# Citation tag
bibtex_entry.append(make_bibtex_citation(entry, template_citation,
bibtexdict))
bibtex_entry = [' '.join(bibtex_entry)]
for field in fields:
if field.startswith('#'):
item = db.get_field(entry['id'],field,index_is_id=True)
if isinstance(item, (bool, numbers.Number)):
item = repr(item)
elif field == 'title_sort':
item = entry['sort']
elif field == 'library_name':
item = library_name
else:
item = entry[field]
# check if the field should be included (none or empty)
if item is None:
continue
try:
if len(item) == 0 :
continue
except TypeError:
pass
if field == 'authors' :
bibtex_entry.append('author = "%s"' % bibtexdict.bibtex_author_format(item))
elif field == 'id' :
bibtex_entry.append('calibreid = "%s"' % int(item))
elif field == 'rating' :
bibtex_entry.append('rating = "%s"' % int(item))
elif field == 'size' :
bibtex_entry.append('%s = "%s octets"' % (field, int(item)))
elif field == 'tags' :
# A list to flatten
bibtex_entry.append('tags = "%s"' % bibtexdict.utf8ToBibtex(', '.join(item)))
elif field == 'comments' :
# \n removal
item = item.replace('\r\n', ' ')
item = item.replace('\n', ' ')
# unmatched brace removal (users should use \leftbrace or \rightbrace for single braces)
item = bibtexdict.stripUnmatchedSyntax(item, '{', '}')
# html to text
try:
item = html2text(item)
except:
log.warn("Failed to convert comments to text")
bibtex_entry.append('note = "%s"' % bibtexdict.utf8ToBibtex(item))
elif field == 'isbn' :
# Could be 9, 10 or 13 digits
bibtex_entry.append('isbn = "%s"' % format_isbn(item))
elif field == 'formats' :
# Add file path if format is selected
formats = [format.rpartition('.')[2].lower() for format in item]
bibtex_entry.append('formats = "%s"' % ', '.join(formats))
if calibre_files:
files = [':%s:%s' % (format, format.rpartition('.')[2].upper())
for format in item]
bibtex_entry.append('file = "%s"' % ', '.join(files))
elif field == 'series_index' :
bibtex_entry.append('volume = "%s"' % int(item))
elif field == 'timestamp' :
bibtex_entry.append('timestamp = "%s"' % isoformat(item).partition('T')[0])
elif field == 'pubdate' :
bibtex_entry.append('year = "%s"' % item.year)
bibtex_entry.append('month = "%s"' % bibtexdict.utf8ToBibtex(strftime("%b", item)))
elif field.startswith('#') and isinstance(item, string_or_bytes):
bibtex_entry.append('custom_%s = "%s"' % (field[1:],
bibtexdict.utf8ToBibtex(item)))
elif isinstance(item, string_or_bytes):
# elif field in ['title', 'publisher', 'cover', 'uuid', 'ondevice',
# 'author_sort', 'series', 'title_sort'] :
bibtex_entry.append('%s = "%s"' % (field, bibtexdict.utf8ToBibtex(item)))
bibtex_entry = ',\n '.join(bibtex_entry)
bibtex_entry += ' }\n\n'
return bibtex_entry
def check_entry_book_valid(entry):
# Check that the required fields are ok for a book entry
for field in ['title', 'authors', 'publisher'] :
if entry[field] is None or len(entry[field]) == 0 :
return False
if entry['pubdate'] is None :
return False
else :
return True
def make_bibtex_citation(entry, template_citation, bibtexclass):
# define a function to replace the template entry by its value
def tpl_replace(objtplname) :
tpl_field = re.sub(r'[\{\}]', '', objtplname.group())
if tpl_field in TEMPLATE_ALLOWED_FIELDS :
if tpl_field in ['pubdate', 'timestamp'] :
tpl_field = isoformat(entry[tpl_field]).partition('T')[0]
elif tpl_field in ['tags', 'authors'] :
tpl_field =entry[tpl_field][0]
elif tpl_field in ['id', 'series_index'] :
tpl_field = unicode_type(entry[tpl_field])
else :
tpl_field = entry[tpl_field]
return ascii_text(tpl_field)
else:
return ''
if len(template_citation) >0 :
tpl_citation = bibtexclass.utf8ToBibtex(
bibtexclass.ValidateCitationKey(re.sub(r'\{[^{}]*\}',
tpl_replace, template_citation)))
if len(tpl_citation) >0 :
return tpl_citation
if len(entry["isbn"]) > 0 :
template_citation = '%s' % re.sub(r'[\D]','', entry["isbn"])
else :
template_citation = '%s' % unicode_type(entry["id"])
return bibtexclass.ValidateCitationKey(template_citation)
self.fmt = path_to_output.rpartition('.')[2]
self.notification = notification
# Combobox options
bibfile_enc = ['utf8', 'cp1252', 'ascii']
bibfile_enctag = ['strict', 'replace', 'ignore', 'backslashreplace']
bib_entry = ['mixed', 'misc', 'book']
# Needed beacause CLI return str vs int by widget
try:
bibfile_enc = bibfile_enc[opts.bibfile_enc]
bibfile_enctag = bibfile_enctag[opts.bibfile_enctag]
bib_entry = bib_entry[opts.bib_entry]
except:
if opts.bibfile_enc in bibfile_enc :
bibfile_enc = opts.bibfile_enc
else :
log.warn("Incorrect --choose-encoding flag, revert to default")
bibfile_enc = bibfile_enc[0]
if opts.bibfile_enctag in bibfile_enctag :
bibfile_enctag = opts.bibfile_enctag
else :
log.warn("Incorrect --choose-encoding-configuration flag, revert to default")
bibfile_enctag = bibfile_enctag[0]
if opts.bib_entry in bib_entry :
bib_entry = opts.bib_entry
else :
log.warn("Incorrect --entry-type flag, revert to default")
bib_entry = bib_entry[0]
if opts.verbose:
opts_dict = vars(opts)
log("%s(): Generating %s" % (self.name,self.fmt))
if opts.connected_device['is_device_connected']:
log(" connected_device: %s" % opts.connected_device['name'])
if opts_dict['search_text']:
log(" --search='%s'" % opts_dict['search_text'])
if opts_dict['ids']:
log(" Book count: %d" % len(opts_dict['ids']))
if opts_dict['search_text']:
log(" (--search ignored when a subset of the database is specified)")
if opts_dict['fields']:
if opts_dict['fields'] == 'all':
log(" Fields: %s" % ', '.join(FIELDS[1:]))
else:
log(" Fields: %s" % opts_dict['fields'])
log(" Output file will be encoded in %s with %s flag" % (bibfile_enc, bibfile_enctag))
log(" BibTeX entry type is %s with a citation like '%s' flag" % (bib_entry, opts_dict['bib_cit']))
# If a list of ids are provided, don't use search_text
if opts.ids:
opts.search_text = None
data = self.search_sort_db(db, opts)
if not len(data):
log.error("\nNo matching database entries for search criteria '%s'" % opts.search_text)
# Get the requested output fields as a list
fields = self.get_output_fields(db, opts)
if not len(data):
log.error("\nNo matching database entries for search criteria '%s'" % opts.search_text)
# Initialize BibTeX class
bibtexc = BibTeX()
# Entries writing after Bibtex formating (or not)
if bibfile_enc != 'ascii' :
bibtexc.ascii_bibtex = False
else :
bibtexc.ascii_bibtex = True
# Check citation choice and go to default in case of bad CLI
if isinstance(opts.impcit, string_or_bytes) :
if opts.impcit == 'False' :
citation_bibtex= False
elif opts.impcit == 'True' :
citation_bibtex= True
else :
log.warn("Incorrect --create-citation, revert to default")
citation_bibtex= True
else :
citation_bibtex= opts.impcit
# Check add file entry and go to default in case of bad CLI
if isinstance(opts.addfiles, string_or_bytes) :
if opts.addfiles == 'False' :
addfiles_bibtex = False
elif opts.addfiles == 'True' :
addfiles_bibtex = True
else :
log.warn("Incorrect --add-files-path, revert to default")
addfiles_bibtex= True
else :
addfiles_bibtex = opts.addfiles
# Preprocess for error and light correction
template_citation = preprocess_template(opts.bib_cit)
# Open output and write entries
with codecs.open(path_to_output, 'w', bibfile_enc, bibfile_enctag)\
as outfile:
# File header
nb_entries = len(data)
# check in book strict if all is ok else throw a warning into log
if bib_entry == 'book' :
nb_books = len(list(filter(check_entry_book_valid, data)))
if nb_books < nb_entries :
log.warn("Only %d entries in %d are book compatible" % (nb_books, nb_entries))
nb_entries = nb_books
# If connected device, add 'On Device' values to data
if opts.connected_device['is_device_connected'] and 'ondevice' in fields:
for entry in data:
entry['ondevice'] = db.catalog_plugin_on_device_temp_mapping[entry['id']]['ondevice']
outfile.write('%%%Calibre catalog\n%%%{0} entries in catalog\n\n'.format(nb_entries))
outfile.write('@preamble{"This catalog of %d entries was generated by calibre on %s"}\n\n'
% (nb_entries, strftime("%A, %d. %B %Y %H:%M")))
for entry in data:
outfile.write(create_bibtex_entry(entry, fields, bib_entry, template_citation,
bibtexc, db, citation_bibtex, addfiles_bibtex))

View File

@@ -0,0 +1,241 @@
#!/usr/bin/env python2
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
from __future__ import absolute_import, division, print_function, unicode_literals
__license__ = 'GPL v3'
__copyright__ = '2012, Kovid Goyal <kovid@kovidgoyal.net>'
__docformat__ = 'restructuredtext en'
import re, codecs, os
from collections import namedtuple
from calibre.customize import CatalogPlugin
from calibre.library.catalogs import FIELDS
from calibre.customize.conversion import DummyReporter
from polyglot.builtins import unicode_type
class CSV_XML(CatalogPlugin):
'CSV/XML catalog generator'
Option = namedtuple('Option', 'option, default, dest, action, help')
name = 'Catalog_CSV_XML'
description = 'CSV/XML catalog generator'
supported_platforms = ['windows', 'osx', 'linux']
author = 'Greg Riker'
version = (1, 0, 0)
file_types = {'csv', 'xml'}
cli_options = [
Option('--fields',
default='all',
dest='fields',
action=None,
help=_('The fields to output when cataloging books in the '
'database. Should be a comma-separated list of fields.\n'
'Available fields: %(fields)s,\n'
'plus user-created custom fields.\n'
'Example: %(opt)s=title,authors,tags\n'
"Default: '%%default'\n"
"Applies to: CSV, XML output formats") % dict(
fields=', '.join(FIELDS), opt='--fields')),
Option('--sort-by',
default='id',
dest='sort_by',
action=None,
help=_('Output field to sort on.\n'
'Available fields: author_sort, id, rating, size, timestamp, title_sort\n'
"Default: '%default'\n"
"Applies to: CSV, XML output formats"))]
def run(self, path_to_output, opts, db, notification=DummyReporter()):
from calibre.library import current_library_name
from calibre.utils.date import isoformat
from calibre.utils.html2text import html2text
from calibre.utils.logging import default_log as log
from lxml import etree
from calibre.ebooks.metadata import authors_to_string
self.fmt = path_to_output.rpartition('.')[2]
self.notification = notification
current_library = current_library_name()
if getattr(opts, 'library_path', None):
current_library = os.path.basename(opts.library_path)
if opts.verbose:
opts_dict = vars(opts)
log("%s('%s'): Generating %s" % (self.name, current_library, self.fmt.upper()))
if opts.connected_device['is_device_connected']:
log(" connected_device: %s" % opts.connected_device['name'])
if opts_dict['search_text']:
log(" --search='%s'" % opts_dict['search_text'])
if opts_dict['ids']:
log(" Book count: %d" % len(opts_dict['ids']))
if opts_dict['search_text']:
log(" (--search ignored when a subset of the database is specified)")
if opts_dict['fields']:
if opts_dict['fields'] == 'all':
log(" Fields: %s" % ', '.join(FIELDS[1:]))
else:
log(" Fields: %s" % opts_dict['fields'])
# If a list of ids are provided, don't use search_text
if opts.ids:
opts.search_text = None
data = self.search_sort_db(db, opts)
if not len(data):
log.error("\nNo matching database entries for search criteria '%s'" % opts.search_text)
# raise SystemExit(1)
# Get the requested output fields as a list
fields = self.get_output_fields(db, opts)
# If connected device, add 'On Device' values to data
if opts.connected_device['is_device_connected'] and 'ondevice' in fields:
for entry in data:
entry['ondevice'] = db.catalog_plugin_on_device_temp_mapping[entry['id']]['ondevice']
fm = {x: db.field_metadata.get(x, {}) for x in fields}
if self.fmt == 'csv':
outfile = codecs.open(path_to_output, 'w', 'utf8')
# Write a UTF-8 BOM
outfile.write('\ufeff')
# Output the field headers
outfile.write('%s\n' % ','.join(fields))
# Output the entry fields
for entry in data:
outstr = []
for field in fields:
if field.startswith('#'):
item = db.get_field(entry['id'], field, index_is_id=True)
if isinstance(item, (list, tuple)):
if fm.get(field, {}).get('display', {}).get('is_names', False):
item = ' & '.join(item)
else:
item = ', '.join(item)
elif field == 'library_name':
item = current_library
elif field == 'title_sort':
item = entry['sort']
else:
item = entry[field]
if item is None:
outstr.append('""')
continue
elif field == 'formats':
fmt_list = []
for format in item:
fmt_list.append(format.rpartition('.')[2].lower())
item = ', '.join(fmt_list)
elif field == 'authors':
item = authors_to_string(item)
elif field == 'tags':
item = ', '.join(item)
elif field == 'isbn':
# Could be 9, 10 or 13 digits, with hyphens, possibly ending in 'X'
item = '%s' % re.sub(r'[^\dX-]', '', item)
elif fm.get(field, {}).get('datatype') == 'datetime':
item = isoformat(item, as_utc=False)
elif field == 'comments':
item = item.replace('\r\n', ' ')
item = item.replace('\n', ' ')
elif fm.get(field, {}).get('datatype', None) == 'rating' and item:
item = '%.2g' % (item / 2)
# Convert HTML to markdown text
if isinstance(item, unicode_type):
opening_tag = re.search(r'<(\w+)( |>)', item)
if opening_tag:
closing_tag = re.search(r'<\/%s>$' % opening_tag.group(1), item)
if closing_tag:
item = html2text(item)
outstr.append('"%s"' % unicode_type(item).replace('"', '""'))
outfile.write(','.join(outstr) + '\n')
outfile.close()
elif self.fmt == 'xml':
from lxml.builder import E
root = E.calibredb()
for r in data:
record = E.record()
root.append(record)
for field in fields:
if field.startswith('#'):
val = db.get_field(r['id'], field, index_is_id=True)
if not isinstance(val, unicode_type):
val = unicode_type(val)
item = getattr(E, field.replace('#', '_'))(val)
record.append(item)
for field in ('id', 'uuid', 'publisher', 'rating', 'size',
'isbn', 'ondevice', 'identifiers'):
if field in fields:
val = r[field]
if not val:
continue
if not isinstance(val, (bytes, unicode_type)):
if (fm.get(field, {}).get('datatype', None) ==
'rating' and val):
val = '%.2g' % (val / 2)
val = unicode_type(val)
item = getattr(E, field)(val)
record.append(item)
if 'title' in fields:
title = E.title(r['title'], sort=r['sort'])
record.append(title)
if 'authors' in fields:
aus = E.authors(sort=r['author_sort'])
for au in r['authors']:
aus.append(E.author(au))
record.append(aus)
for field in ('timestamp', 'pubdate'):
if field in fields:
record.append(getattr(E, field)(isoformat(r[field], as_utc=False)))
if 'tags' in fields and r['tags']:
tags = E.tags()
for tag in r['tags']:
tags.append(E.tag(tag))
record.append(tags)
if 'comments' in fields and r['comments']:
record.append(E.comments(r['comments']))
if 'series' in fields and r['series']:
record.append(E.series(r['series'],
index=unicode_type(r['series_index'])))
if 'cover' in fields and r['cover']:
record.append(E.cover(r['cover'].replace(os.sep, '/')))
if 'formats' in fields and r['formats']:
fmt = E.formats()
for f in r['formats']:
fmt.append(E.format(f.replace(os.sep, '/')))
record.append(fmt)
if 'library_name' in fields:
record.append(E.library_name(current_library))
with open(path_to_output, 'wb') as f:
f.write(etree.tostring(root, encoding='utf-8',
xml_declaration=True, pretty_print=True))

View File

@@ -0,0 +1,506 @@
#!/usr/bin/env python2
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
from __future__ import absolute_import, division, print_function, unicode_literals
__license__ = 'GPL v3'
__copyright__ = '2012, Kovid Goyal <kovid@kovidgoyal.net>'
__docformat__ = 'restructuredtext en'
import datetime, os, time
from collections import namedtuple
from calibre import strftime
from calibre.customize import CatalogPlugin
from calibre.customize.conversion import OptionRecommendation, DummyReporter
from calibre.library import current_library_name
from calibre.library.catalogs import AuthorSortMismatchException, EmptyCatalogException
from calibre.ptempfile import PersistentTemporaryFile
from calibre.utils.localization import calibre_langcode_to_name, canonicalize_lang, get_lang
from polyglot.builtins import unicode_type
Option = namedtuple('Option', 'option, default, dest, action, help')
class EPUB_MOBI(CatalogPlugin):
'EPUB catalog generator'
name = 'Catalog_EPUB_MOBI'
description = 'AZW3/EPUB/MOBI catalog generator'
supported_platforms = ['windows', 'osx', 'linux']
minimum_calibre_version = (0, 7, 40)
author = 'Greg Riker'
version = (1, 0, 0)
file_types = {'azw3', 'epub', 'mobi'}
THUMB_SMALLEST = "1.0"
THUMB_LARGEST = "2.0"
cli_options = [Option('--catalog-title', # {{{
default='My Books',
dest='catalog_title',
action=None,
help=_('Title of generated catalog used as title in metadata.\n'
"Default: '%default'\n"
"Applies to: AZW3, EPUB, MOBI output formats")),
Option('--cross-reference-authors',
default=False,
dest='cross_reference_authors',
action='store_true',
help=_("Create cross-references in Authors section for books with multiple authors.\n"
"Default: '%default'\n"
"Applies to: AZW3, EPUB, MOBI output formats")),
Option('--debug-pipeline',
default=None,
dest='debug_pipeline',
action=None,
help=_("Save the output from different stages of the conversion "
"pipeline to the specified "
"directory. Useful if you are unsure at which stage "
"of the conversion process a bug is occurring.\n"
"Default: '%default'\n"
"Applies to: AZW3, EPUB, MOBI output formats")),
Option('--exclude-genre',
default=r'\[.+\]|^\+$',
dest='exclude_genre',
action=None,
help=_("Regex describing tags to exclude as genres.\n"
"Default: '%default' excludes bracketed tags, e.g. '[Project Gutenberg]', and '+', the default tag for read books.\n"
"Applies to: AZW3, EPUB, MOBI output formats")),
Option('--exclusion-rules',
default="(('Catalogs','Tags','Catalog'),)",
dest='exclusion_rules',
action=None,
help=_("Specifies the rules used to exclude books from the generated catalog.\n"
"The model for an exclusion rule is either\n('<rule name>','Tags','<comma-separated list of tags>') or\n"
"('<rule name>','<custom column>','<pattern>').\n"
"For example:\n"
"(('Archived books','#status','Archived'),)\n"
"will exclude a book with a value of 'Archived' in the custom column 'status'.\n"
"When multiple rules are defined, all rules will be applied.\n"
"Default: \n" + '"' + '%default' + '"' + "\n"
"Applies to: AZW3, EPUB, MOBI output formats")),
Option('--generate-authors',
default=False,
dest='generate_authors',
action='store_true',
help=_("Include 'Authors' section in catalog.\n"
"Default: '%default'\n"
"Applies to: AZW3, EPUB, MOBI output formats")),
Option('--generate-descriptions',
default=False,
dest='generate_descriptions',
action='store_true',
help=_("Include 'Descriptions' section in catalog.\n"
"Default: '%default'\n"
"Applies to: AZW3, EPUB, MOBI output formats")),
Option('--generate-genres',
default=False,
dest='generate_genres',
action='store_true',
help=_("Include 'Genres' section in catalog.\n"
"Default: '%default'\n"
"Applies to: AZW3, EPUB, MOBI output formats")),
Option('--generate-titles',
default=False,
dest='generate_titles',
action='store_true',
help=_("Include 'Titles' section in catalog.\n"
"Default: '%default'\n"
"Applies to: AZW3, EPUB, MOBI output formats")),
Option('--generate-series',
default=False,
dest='generate_series',
action='store_true',
help=_("Include 'Series' section in catalog.\n"
"Default: '%default'\n"
"Applies to: AZW3, EPUB, MOBI output formats")),
Option('--generate-recently-added',
default=False,
dest='generate_recently_added',
action='store_true',
help=_("Include 'Recently Added' section in catalog.\n"
"Default: '%default'\n"
"Applies to: AZW3, EPUB, MOBI output formats")),
Option('--genre-source-field',
default=_('Tags'),
dest='genre_source_field',
action=None,
help=_("Source field for 'Genres' section.\n"
"Default: '%default'\n"
"Applies to: AZW3, EPUB, MOBI output formats")),
Option('--header-note-source-field',
default='',
dest='header_note_source_field',
action=None,
help=_("Custom field containing note text to insert in Description header.\n"
"Default: '%default'\n"
"Applies to: AZW3, EPUB, MOBI output formats")),
Option('--merge-comments-rule',
default='::',
dest='merge_comments_rule',
action=None,
help=_("#<custom field>:[before|after]:[True|False] specifying:\n"
" <custom field> Custom field containing notes to merge with Comments\n"
" [before|after] Placement of notes with respect to Comments\n"
" [True|False] - A horizontal rule is inserted between notes and Comments\n"
"Default: '%default'\n"
"Applies to: AZW3, EPUB, MOBI output formats")),
Option('--output-profile',
default=None,
dest='output_profile',
action=None,
help=_("Specifies the output profile. In some cases, an output profile is required to optimize"
" the catalog for the device. For example, 'kindle' or 'kindle_dx' creates a structured"
" Table of Contents with Sections and Articles.\n"
"Default: '%default'\n"
"Applies to: AZW3, EPUB, MOBI output formats")),
Option('--prefix-rules',
default="(('Read books','tags','+','\u2713'),('Wishlist item','tags','Wishlist','\u00d7'))",
dest='prefix_rules',
action=None,
help=_("Specifies the rules used to include prefixes indicating read books, wishlist items and other user-specified prefixes.\n"
"The model for a prefix rule is ('<rule name>','<source field>','<pattern>','<prefix>').\n"
"When multiple rules are defined, the first matching rule will be used.\n"
"Default:\n" + '"' + '%default' + '"' + "\n"
"Applies to: AZW3, EPUB, MOBI output formats")),
Option('--preset',
default=None,
dest='preset',
action=None,
help=_("Use a named preset created with the GUI catalog builder.\n"
"A preset specifies all settings for building a catalog.\n"
"Default: '%default'\n"
"Applies to: AZW3, EPUB, MOBI output formats")),
Option('--use-existing-cover',
default=False,
dest='use_existing_cover',
action='store_true',
help=_("Replace existing cover when generating the catalog.\n"
"Default: '%default'\n"
"Applies to: AZW3, EPUB, MOBI output formats")),
Option('--thumb-width',
default='1.0',
dest='thumb_width',
action=None,
help=_("Size hint (in inches) for book covers in catalog.\n"
"Range: 1.0 - 2.0\n"
"Default: '%default'\n"
"Applies to: AZW3, EPUB, MOBI output formats")),
]
# }}}
def run(self, path_to_output, opts, db, notification=DummyReporter()):
from calibre.library.catalogs.epub_mobi_builder import CatalogBuilder
from calibre.utils.logging import default_log as log
from calibre.utils.config import JSONConfig
# If preset specified from the cli, insert stored options from JSON file
if hasattr(opts, 'preset') and opts.preset:
available_presets = JSONConfig("catalog_presets")
if opts.preset not in available_presets:
if available_presets:
print(_('Error: Preset "%s" not found.' % opts.preset))
print(_('Stored presets: %s' % ', '.join([p for p in sorted(available_presets.keys())])))
else:
print(_('Error: No stored presets.'))
return 1
# Copy the relevant preset values to the opts object
for item in available_presets[opts.preset]:
if item not in ['exclusion_rules_tw', 'format', 'prefix_rules_tw']:
setattr(opts, item, available_presets[opts.preset][item])
# Provide an unconnected device
opts.connected_device = {
'is_device_connected': False,
'kind': None,
'name': None,
'save_template': None,
'serial': None,
'storage': None,
}
# Convert prefix_rules and exclusion_rules from JSON lists to tuples
prs = []
for rule in opts.prefix_rules:
prs.append(tuple(rule))
opts.prefix_rules = tuple(prs)
ers = []
for rule in opts.exclusion_rules:
ers.append(tuple(rule))
opts.exclusion_rules = tuple(ers)
opts.log = log
opts.fmt = self.fmt = path_to_output.rpartition('.')[2]
# Add local options
opts.creator = '%s, %s %s, %s' % (strftime('%A'), strftime('%B'), strftime('%d').lstrip('0'), strftime('%Y'))
opts.creator_sort_as = '%s %s' % ('calibre', strftime('%Y-%m-%d'))
opts.connected_kindle = False
# Finalize output_profile
op = opts.output_profile
if op is None:
op = 'default'
if opts.connected_device['name'] and 'kindle' in opts.connected_device['name'].lower():
opts.connected_kindle = True
if opts.connected_device['serial'] and \
opts.connected_device['serial'][:4] in ['B004', 'B005']:
op = "kindle_dx"
else:
op = "kindle"
opts.description_clip = 380 if op.endswith('dx') or 'kindle' not in op else 100
opts.author_clip = 100 if op.endswith('dx') or 'kindle' not in op else 60
opts.output_profile = op
opts.basename = "Catalog"
opts.cli_environment = not hasattr(opts, 'sync')
# Hard-wired to always sort descriptions by author, with series after non-series
opts.sort_descriptions_by_author = True
build_log = []
build_log.append("%s('%s'): Generating %s %sin %s environment, locale: '%s'" %
(self.name,
current_library_name(),
self.fmt,
'for %s ' % opts.output_profile if opts.output_profile else '',
'CLI' if opts.cli_environment else 'GUI',
calibre_langcode_to_name(canonicalize_lang(get_lang()), localize=False))
)
# If exclude_genre is blank, assume user wants all tags as genres
if opts.exclude_genre.strip() == '':
# opts.exclude_genre = '\[^.\]'
# build_log.append(" converting empty exclude_genre to '\[^.\]'")
opts.exclude_genre = 'a^'
build_log.append(" converting empty exclude_genre to 'a^'")
if opts.connected_device['is_device_connected'] and \
opts.connected_device['kind'] == 'device':
if opts.connected_device['serial']:
build_log.append(" connected_device: '%s' #%s%s " %
(opts.connected_device['name'],
opts.connected_device['serial'][0:4],
'x' * (len(opts.connected_device['serial']) - 4)))
for storage in opts.connected_device['storage']:
if storage:
build_log.append(" mount point: %s" % storage)
else:
build_log.append(" connected_device: '%s'" % opts.connected_device['name'])
try:
for storage in opts.connected_device['storage']:
if storage:
build_log.append(" mount point: %s" % storage)
except:
build_log.append(" (no mount points)")
else:
build_log.append(" connected_device: '%s'" % opts.connected_device['name'])
opts_dict = vars(opts)
if opts_dict['ids']:
build_log.append(" book count: %d" % len(opts_dict['ids']))
sections_list = []
if opts.generate_authors:
sections_list.append('Authors')
if opts.generate_titles:
sections_list.append('Titles')
if opts.generate_series:
sections_list.append('Series')
if opts.generate_genres:
sections_list.append('Genres')
if opts.generate_recently_added:
sections_list.append('Recently Added')
if opts.generate_descriptions:
sections_list.append('Descriptions')
if not sections_list:
if opts.cli_environment:
opts.log.warn('*** No Section switches specified, enabling all Sections ***')
opts.generate_authors = True
opts.generate_titles = True
opts.generate_series = True
opts.generate_genres = True
opts.generate_recently_added = True
opts.generate_descriptions = True
sections_list = ['Authors', 'Titles', 'Series', 'Genres', 'Recently Added', 'Descriptions']
else:
opts.log.warn('\n*** No enabled Sections, terminating catalog generation ***')
return ["No Included Sections", "No enabled Sections.\nCheck E-book options tab\n'Included sections'\n"]
if opts.fmt == 'mobi' and sections_list == ['Descriptions']:
warning = _("\n*** Adding 'By authors' section required for MOBI output ***")
opts.log.warn(warning)
sections_list.insert(0, 'Authors')
opts.generate_authors = True
opts.log(" Sections: %s" % ', '.join(sections_list))
opts.section_list = sections_list
# Limit thumb_width to 1.0" - 2.0"
try:
if float(opts.thumb_width) < float(self.THUMB_SMALLEST):
log.warning("coercing thumb_width from '%s' to '%s'" % (opts.thumb_width, self.THUMB_SMALLEST))
opts.thumb_width = self.THUMB_SMALLEST
if float(opts.thumb_width) > float(self.THUMB_LARGEST):
log.warning("coercing thumb_width from '%s' to '%s'" % (opts.thumb_width, self.THUMB_LARGEST))
opts.thumb_width = self.THUMB_LARGEST
opts.thumb_width = "%.2f" % float(opts.thumb_width)
except:
log.error("coercing thumb_width from '%s' to '%s'" % (opts.thumb_width, self.THUMB_SMALLEST))
opts.thumb_width = "1.0"
# eval prefix_rules if passed from command line
if type(opts.prefix_rules) is not tuple:
try:
opts.prefix_rules = eval(opts.prefix_rules)
except:
log.error("malformed --prefix-rules: %s" % opts.prefix_rules)
raise
for rule in opts.prefix_rules:
if len(rule) != 4:
log.error("incorrect number of args for --prefix-rules: %s" % repr(rule))
# eval exclusion_rules if passed from command line
if type(opts.exclusion_rules) is not tuple:
try:
opts.exclusion_rules = eval(opts.exclusion_rules)
except:
log.error("malformed --exclusion-rules: %s" % opts.exclusion_rules)
raise
for rule in opts.exclusion_rules:
if len(rule) != 3:
log.error("incorrect number of args for --exclusion-rules: %s" % repr(rule))
# Display opts
keys = sorted(opts_dict.keys())
build_log.append(" opts:")
for key in keys:
if key in ['catalog_title', 'author_clip', 'connected_kindle', 'creator',
'cross_reference_authors', 'description_clip', 'exclude_book_marker',
'exclude_genre', 'exclude_tags', 'exclusion_rules', 'fmt',
'genre_source_field', 'header_note_source_field', 'merge_comments_rule',
'output_profile', 'prefix_rules', 'preset', 'read_book_marker',
'search_text', 'sort_by', 'sort_descriptions_by_author', 'sync',
'thumb_width', 'use_existing_cover', 'wishlist_tag']:
build_log.append(" %s: %s" % (key, repr(opts_dict[key])))
if opts.verbose:
log('\n'.join(line for line in build_log))
# Capture start_time
opts.start_time = time.time()
self.opts = opts
if opts.verbose:
log.info(" Begin catalog source generation (%s)" %
unicode_type(datetime.timedelta(seconds=int(time.time() - opts.start_time))))
# Launch the Catalog builder
catalog = CatalogBuilder(db, opts, self, report_progress=notification)
try:
catalog.build_sources()
if opts.verbose:
log.info(" Completed catalog source generation (%s)\n" %
unicode_type(datetime.timedelta(seconds=int(time.time() - opts.start_time))))
except (AuthorSortMismatchException, EmptyCatalogException) as e:
log.error(" *** Terminated catalog generation: %s ***" % e)
except:
log.error(" unhandled exception in catalog generator")
raise
else:
recommendations = []
recommendations.append(('remove_fake_margins', False,
OptionRecommendation.HIGH))
recommendations.append(('comments', '', OptionRecommendation.HIGH))
"""
>>> Use to debug generated catalog code before pipeline conversion <<<
"""
GENERATE_DEBUG_EPUB = False
if GENERATE_DEBUG_EPUB:
catalog_debug_path = os.path.join(os.path.expanduser('~'), 'Desktop', 'Catalog debug')
setattr(opts, 'debug_pipeline', os.path.expanduser(catalog_debug_path))
dp = getattr(opts, 'debug_pipeline', None)
if dp is not None:
recommendations.append(('debug_pipeline', dp,
OptionRecommendation.HIGH))
if opts.output_profile and opts.output_profile.startswith("kindle"):
recommendations.append(('output_profile', opts.output_profile,
OptionRecommendation.HIGH))
recommendations.append(('book_producer', opts.output_profile,
OptionRecommendation.HIGH))
if opts.fmt == 'mobi':
recommendations.append(('no_inline_toc', True,
OptionRecommendation.HIGH))
recommendations.append(('verbose', 2,
OptionRecommendation.HIGH))
# Use existing cover or generate new cover
cpath = None
existing_cover = False
try:
search_text = 'title:"%s" author:%s' % (
opts.catalog_title.replace('"', '\\"'), 'calibre')
matches = db.search(search_text, return_matches=True, sort_results=False)
if matches:
cpath = db.cover(matches[0], index_is_id=True, as_path=True)
if cpath and os.path.exists(cpath):
existing_cover = True
except:
pass
if self.opts.use_existing_cover and not existing_cover:
log.warning("no existing catalog cover found")
if self.opts.use_existing_cover and existing_cover:
recommendations.append(('cover', cpath, OptionRecommendation.HIGH))
log.info("using existing catalog cover")
else:
from calibre.ebooks.covers import calibre_cover2
log.info("replacing catalog cover")
new_cover_path = PersistentTemporaryFile(suffix='.jpg')
new_cover = calibre_cover2(opts.catalog_title, 'calibre')
new_cover_path.write(new_cover)
new_cover_path.close()
recommendations.append(('cover', new_cover_path.name, OptionRecommendation.HIGH))
# Run ebook-convert
from calibre.ebooks.conversion.plumber import Plumber
plumber = Plumber(os.path.join(catalog.catalog_path, opts.basename + '.opf'),
path_to_output, log, report_progress=notification,
abort_after_input_dump=False)
plumber.merge_ui_recommendations(recommendations)
plumber.run()
try:
os.remove(cpath)
except:
pass
if GENERATE_DEBUG_EPUB:
from calibre.ebooks.epub import initialize_container
from calibre.ebooks.tweak import zip_rebuilder
from calibre.utils.zipfile import ZipFile
input_path = os.path.join(catalog_debug_path, 'input')
epub_shell = os.path.join(catalog_debug_path, 'epub_shell.zip')
initialize_container(epub_shell, opf_name='content.opf')
with ZipFile(epub_shell, 'r') as zf:
zf.extractall(path=input_path)
os.remove(epub_shell)
zip_rebuilder(input_path, os.path.join(catalog_debug_path, 'input.epub'))
if opts.verbose:
log.info(" Catalog creation complete (%s)\n" %
unicode_type(datetime.timedelta(seconds=int(time.time() - opts.start_time))))
# returns to gui2.actions.catalog:catalog_generated()
return catalog.error

View File

@@ -0,0 +1,174 @@
#!/usr/bin/env python2
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
# License: GPLv3 Copyright: 2010, Kovid Goyal <kovid at kovidgoyal.net>
from __future__ import absolute_import, division, print_function, unicode_literals
import re
from calibre import prepare_string_for_xml
from calibre.constants import preferred_encoding
from calibre.ebooks.BeautifulSoup import (
BeautifulSoup, CData, Comment, Declaration, NavigableString,
ProcessingInstruction
)
from calibre.utils.html2text import html2text
from polyglot.builtins import unicode_type
# Hackish - ignoring sentences ending or beginning in numbers to avoid
# confusion with decimal points.
lost_cr_pat = re.compile('([a-z])([\\.\\?!])([A-Z])')
lost_cr_exception_pat = re.compile(r'(Ph\.D)|(D\.Phil)|((Dr|Mr|Mrs|Ms)\.[A-Z])')
sanitize_pat = re.compile(r'<script|<table|<tr|<td|<th|<style|<iframe',
re.IGNORECASE)
def comments_to_html(comments):
'''
Convert random comment text to normalized, xml-legal block of <p>s
'plain text' returns as
<p>plain text</p>
'plain text with <i>minimal</i> <b>markup</b>' returns as
<p>plain text with <i>minimal</i> <b>markup</b></p>
'<p>pre-formatted text</p> returns untouched
'A line of text\n\nFollowed by a line of text' returns as
<p>A line of text</p>
<p>Followed by a line of text</p>
'A line of text.\nA second line of text.\rA third line of text' returns as
<p>A line of text.<br />A second line of text.<br />A third line of text.</p>
'...end of a paragraph.Somehow the break was lost...' returns as
<p>...end of a paragraph.</p>
<p>Somehow the break was lost...</p>
Deprecated HTML returns as HTML via BeautifulSoup()
'''
if not comments:
return u'<p></p>'
if not isinstance(comments, unicode_type):
comments = comments.decode(preferred_encoding, 'replace')
if comments.lstrip().startswith('<'):
# Comment is already HTML do not mess with it
return comments
if '<' not in comments:
comments = prepare_string_for_xml(comments)
parts = [u'<p class="description">%s</p>'%x.replace(u'\n', u'<br />')
for x in comments.split('\n\n')]
return '\n'.join(parts)
if sanitize_pat.search(comments) is not None:
try:
return sanitize_comments_html(comments)
except:
import traceback
traceback.print_exc()
return u'<p></p>'
# Explode lost CRs to \n\n
comments = lost_cr_exception_pat.sub(lambda m: m.group().replace('.',
'.\r'), comments)
for lost_cr in lost_cr_pat.finditer(comments):
comments = comments.replace(lost_cr.group(),
'%s%s\n\n%s' % (lost_cr.group(1),
lost_cr.group(2),
lost_cr.group(3)))
comments = comments.replace(u'\r', u'')
# Convert \n\n to <p>s
comments = comments.replace(u'\n\n', u'<p>')
# Convert solo returns to <br />
comments = comments.replace(u'\n', '<br />')
# Convert two hyphens to emdash
comments = comments.replace('--', '&mdash;')
soup = BeautifulSoup('<div>' + comments + '</div>').find('div')
result = BeautifulSoup('<div>')
container = result.find('div')
rtc = 0
open_pTag = False
all_tokens = list(soup.contents)
inline_tags = ('br', 'b', 'i', 'em', 'strong', 'span', 'font', 'a', 'hr')
for token in all_tokens:
if isinstance(token, (CData, Comment, Declaration, ProcessingInstruction)):
continue
if isinstance(token, NavigableString):
if not open_pTag:
pTag = result.new_tag('p')
open_pTag = True
ptc = 0
pTag.insert(ptc, token)
ptc += 1
elif token.name in inline_tags:
if not open_pTag:
pTag = result.new_tag('p')
open_pTag = True
ptc = 0
pTag.insert(ptc, token)
ptc += 1
else:
if open_pTag:
container.insert(rtc, pTag)
rtc += 1
open_pTag = False
ptc = 0
container.insert(rtc, token)
rtc += 1
if open_pTag:
container.insert(rtc, pTag)
for p in container.findAll('p'):
p['class'] = 'description'
return container.decode_contents()
def markdown(val):
try:
md = markdown.Markdown
except AttributeError:
from calibre.ebooks.markdown import Markdown
md = markdown.Markdown = Markdown()
return md.convert(val)
def merge_comments(one, two):
return comments_to_html(one) + '\n\n' + comments_to_html(two)
def sanitize_comments_html(html):
from calibre.ebooks.markdown import Markdown
text = html2text(html)
md = Markdown()
html = md.convert(text)
return html
def find_tests():
import unittest
class Test(unittest.TestCase):
def test_comments_to_html(self):
for pat, val in [
(b'lineone\n\nlinetwo',
'<p class="description">lineone</p>\n<p class="description">linetwo</p>'),
('a <b>b&c</b>\nf',
'<p class="description">a <b>b&amp;c</b><br/>f</p>'),
('a <?xml asd> b\n\ncd',
'<p class="description">a b</p><p class="description">cd</p>'),
]:
cval = comments_to_html(pat)
self.assertEqual(cval, val)
return unittest.defaultTestLoader.loadTestsFromTestCase(Test)

View File

@@ -0,0 +1,702 @@
from __future__ import absolute_import, division, print_function, unicode_literals
'''
Created on 25 May 2010
@author: charles
'''
import traceback
from collections import OrderedDict
from calibre.utils.config_base import tweaks
from polyglot.builtins import iteritems, itervalues
category_icon_map = {
'authors' : 'user_profile.png',
'series' : 'series.png',
'formats' : 'book.png',
'publisher' : 'publisher.png',
'rating' : 'rating.png',
'news' : 'news.png',
'tags' : 'tags.png',
'custom:' : 'column.png',
'user:' : 'tb_folder.png',
'search' : 'search.png',
'identifiers': 'identifiers.png',
'gst' : 'catalog.png',
'languages' : 'languages.png',
}
# Builtin metadata {{{
def _builtin_field_metadata():
# This is a function so that changing the UI language allows newly created
# field metadata objects to have correctly translated labels for builtin
# fields.
return [
('authors', {'table':'authors',
'column':'name',
'link_column':'author',
'category_sort':'sort',
'datatype':'text',
'is_multiple':{'cache_to_list': ',',
'ui_to_list': '&',
'list_to_ui': ' & '},
'kind':'field',
'name':_('Authors'),
'search_terms':['authors', 'author'],
'is_custom':False,
'is_category':True,
'is_csp': False}),
('languages', {'table':'languages',
'column':'lang_code',
'link_column':'lang_code',
'category_sort':'lang_code',
'datatype':'text',
'is_multiple':{'cache_to_list': ',',
'ui_to_list': ',',
'list_to_ui': ', '},
'kind':'field',
'name':_('Languages'),
'search_terms':['languages', 'language'],
'is_custom':False,
'is_category':True,
'is_csp': False}),
('series', {'table':'series',
'column':'name',
'link_column':'series',
'category_sort':'(title_sort(name))',
'datatype':'series',
'is_multiple':{},
'kind':'field',
'name':ngettext('Series', 'Series', 1),
'search_terms':['series'],
'is_custom':False,
'is_category':True,
'is_csp': False}),
('formats', {'table':None,
'column':None,
'datatype':'text',
'is_multiple':{'cache_to_list': ',',
'ui_to_list': ',',
'list_to_ui': ', '},
'kind':'field',
'name':_('Formats'),
'search_terms':['formats', 'format'],
'is_custom':False,
'is_category':True,
'is_csp': False}),
('publisher', {'table':'publishers',
'column':'name',
'link_column':'publisher',
'category_sort':'name',
'datatype':'text',
'is_multiple':{},
'kind':'field',
'name':_('Publisher'),
'search_terms':['publisher'],
'is_custom':False,
'is_category':True,
'is_csp': False}),
('rating', {'table':'ratings',
'column':'rating',
'link_column':'rating',
'category_sort':'rating',
'datatype':'rating',
'is_multiple':{},
'kind':'field',
'name':_('Rating'),
'search_terms':['rating'],
'is_custom':False,
'is_category':True,
'is_csp': False}),
('news', {'table':'news',
'column':'name',
'category_sort':'name',
'datatype':None,
'is_multiple':{},
'kind':'category',
'name':_('News'),
'search_terms':[],
'is_custom':False,
'is_category':True,
'is_csp': False}),
('tags', {'table':'tags',
'column':'name',
'link_column': 'tag',
'category_sort':'name',
'datatype':'text',
'is_multiple':{'cache_to_list': ',',
'ui_to_list': ',',
'list_to_ui': ', '},
'kind':'field',
'name':_('Tags'),
'search_terms':['tags', 'tag'],
'is_custom':False,
'is_category':True,
'is_csp': False}),
('identifiers', {'table':None,
'column':None,
'datatype':'text',
'is_multiple':{'cache_to_list': ',',
'ui_to_list': ',',
'list_to_ui': ', '},
'kind':'field',
'name':_('Identifiers'),
'search_terms':['identifiers', 'identifier', 'isbn'],
'is_custom':False,
'is_category':True,
'is_csp': True}),
('author_sort',{'table':None,
'column':None,
'datatype':'text',
'is_multiple':{},
'kind':'field',
'name':_('Author sort'),
'search_terms':['author_sort'],
'is_custom':False,
'is_category':False,
'is_csp': False}),
('au_map', {'table':None,
'column':None,
'datatype':'text',
'is_multiple':{'cache_to_list': ',',
'ui_to_list': None,
'list_to_ui': None},
'kind':'field',
'name':None,
'search_terms':[],
'is_custom':False,
'is_category':False,
'is_csp': False}),
('comments', {'table':None,
'column':None,
'datatype':'text',
'is_multiple':{},
'kind':'field',
'name':_('Comments'),
'search_terms':['comments', 'comment'],
'is_custom':False,
'is_category':False,
'is_csp': False}),
('cover', {'table':None,
'column':None,
'datatype':'int',
'is_multiple':{},
'kind':'field',
'name':_('Cover'),
'search_terms':['cover'],
'is_custom':False,
'is_category':False,
'is_csp': False}),
('id', {'table':None,
'column':None,
'datatype':'int',
'is_multiple':{},
'kind':'field',
'name':None,
'search_terms':['id'],
'is_custom':False,
'is_category':False,
'is_csp': False}),
('last_modified', {'table':None,
'column':None,
'datatype':'datetime',
'is_multiple':{},
'kind':'field',
'name':_('Modified'),
'search_terms':['last_modified'],
'is_custom':False,
'is_category':False,
'is_csp': False}),
('ondevice', {'table':None,
'column':None,
'datatype':'text',
'is_multiple':{},
'kind':'field',
'name':_('On device'),
'search_terms':['ondevice'],
'is_custom':False,
'is_category':False,
'is_csp': False}),
('path', {'table':None,
'column':None,
'datatype':'text',
'is_multiple':{},
'kind':'field',
'name':_('Path'),
'search_terms':[],
'is_custom':False,
'is_category':False,
'is_csp': False}),
('pubdate', {'table':None,
'column':None,
'datatype':'datetime',
'is_multiple':{},
'kind':'field',
'name':_('Published'),
'search_terms':['pubdate'],
'is_custom':False,
'is_category':False,
'is_csp': False}),
('marked', {'table':None,
'column':None,
'datatype':'text',
'is_multiple':{},
'kind':'field',
'name': None,
'search_terms':['marked'],
'is_custom':False,
'is_category':False,
'is_csp': False}),
('series_index',{'table':None,
'column':None,
'datatype':'float',
'is_multiple':{},
'kind':'field',
'name':None,
'search_terms':['series_index'],
'is_custom':False,
'is_category':False,
'is_csp': False}),
('series_sort', {'table':None,
'column':None,
'datatype':'text',
'is_multiple':{},
'kind':'field',
'name':_('Series sort'),
'search_terms':['series_sort'],
'is_custom':False,
'is_category':False,
'is_csp': False}),
('sort', {'table':None,
'column':None,
'datatype':'text',
'is_multiple':{},
'kind':'field',
'name':_('Title sort'),
'search_terms':['title_sort'],
'is_custom':False,
'is_category':False,
'is_csp': False}),
('size', {'table':None,
'column':None,
'datatype':'float',
'is_multiple':{},
'kind':'field',
'name':_('Size'),
'search_terms':['size'],
'is_custom':False,
'is_category':False,
'is_csp': False}),
('timestamp', {'table':None,
'column':None,
'datatype':'datetime',
'is_multiple':{},
'kind':'field',
'name':_('Date'),
'search_terms':['date'],
'is_custom':False,
'is_category':False,
'is_csp': False}),
('title', {'table':None,
'column':None,
'datatype':'text',
'is_multiple':{},
'kind':'field',
'name':_('Title'),
'search_terms':['title'],
'is_custom':False,
'is_category':False,
'is_csp': False}),
('uuid', {'table':None,
'column':None,
'datatype':'text',
'is_multiple':{},
'kind':'field',
'name':None,
'search_terms':['uuid'],
'is_custom':False,
'is_category':False,
'is_csp': False}),
]
# }}}
class FieldMetadata(object):
'''
key: the key to the dictionary is:
- for standard fields, the metadata field name.
- for custom fields, the metadata field name prefixed by '#'
This is done to create two 'namespaces' so the names don't clash
label: the actual column label. No prefixing.
datatype: the type of information in the field. Valid values are listed in
VALID_DATA_TYPES below.
is_multiple: valid for the text datatype. If {}, the field is to be
treated as a single term. If not None, it contains a dict of the form
{'cache_to_list': ',',
'ui_to_list': ',',
'list_to_ui': ', '}
where the cache_to_list contains the character used to split the value in
the meta2 table, ui_to_list contains the character used to create a list
from a value shown in the ui (each resulting value must be strip()ed and
empty values removed), and list_to_ui contains the string used in join()
to create a displayable string from the list.
kind == field: is a db field.
kind == category: standard tag category that isn't a field. see news.
kind == user: user-defined tag category.
kind == search: saved-searches category.
is_category: is a tag browser category. If true, then:
table: name of the db table used to construct item list
column: name of the column in the normalized table to join on
link_column: name of the column in the connection table to join on. This
key should not be present if there is no link table
category_sort: the field in the normalized table to sort on. This
key must be present if is_category is True
If these are None, then the category constructor must know how
to build the item list (e.g., formats, news).
The order below is the order that the categories will
appear in the tags pane.
name: the text that is to be used when displaying the field. Column headings
in the GUI, etc.
search_terms: the terms that can be used to identify the field when
searching. They can be thought of as aliases for metadata keys, but are only
valid when passed to search().
is_custom: the field has been added by the user.
rec_index: the index of the field in the db metadata record.
is_csp: field contains colon-separated pairs. Must also be text, is_multiple
'''
VALID_DATA_TYPES = frozenset([None, 'rating', 'text', 'comments', 'datetime',
'int', 'float', 'bool', 'series', 'composite', 'enumeration'])
# search labels that are not db columns
search_items = ['all', 'search', 'vl']
__calibre_serializable__ = True
def __init__(self):
self._field_metadata = _builtin_field_metadata()
self._tb_cats = OrderedDict()
self._tb_custom_fields = {}
self._search_term_map = {}
self.custom_label_to_key_map = {}
for k,v in self._field_metadata:
if v['kind'] == 'field' and v['datatype'] not in self.VALID_DATA_TYPES:
raise ValueError('Unknown datatype %s for field %s'%(v['datatype'], k))
self._tb_cats[k] = v
self._tb_cats[k]['label'] = k
self._tb_cats[k]['display'] = {}
self._tb_cats[k]['is_editable'] = True
self._add_search_terms_to_map(k, v['search_terms'])
self._tb_cats['timestamp']['display'] = {
'date_format': tweaks['gui_timestamp_display_format']}
self._tb_cats['pubdate']['display'] = {
'date_format': tweaks['gui_pubdate_display_format']}
self._tb_cats['last_modified']['display'] = {
'date_format': tweaks['gui_last_modified_display_format']}
self.custom_field_prefix = '#'
self.get = self._tb_cats.get
def __getitem__(self, key):
if key == 'title_sort':
return self._tb_cats['sort']
return self._tb_cats[key]
def __setitem__(self, key, val):
raise AttributeError('Assigning to this object is forbidden')
def __delitem__(self, key):
del self._tb_cats[key]
def __iter__(self):
for key in self._tb_cats:
yield key
def __contains__(self, key):
return key in self._tb_cats or key == 'title_sort'
def has_key(self, key):
return key in self
def keys(self):
return list(self._tb_cats.keys())
def __eq__(self, other):
if not isinstance(other, FieldMetadata):
return False
for attr in ('_tb_custom_fields', '_search_term_map', 'custom_label_to_key_map', 'custom_field_prefix'):
if getattr(self, attr) != getattr(other, attr):
return False
return dict(self._tb_cats) == dict(other._tb_cats)
def __ne__(self, other):
return not self.__eq__(other)
def sortable_field_keys(self):
return [k for k in self._tb_cats.keys()
if self._tb_cats[k]['kind']=='field' and
self._tb_cats[k]['datatype'] is not None]
def ui_sortable_field_keys(self):
ans = {k:self._tb_cats[k]['name'] for k in set(self.sortable_field_keys()) - {
'sort', 'author_sort', 'au_map', 'series_sort', 'marked',
'series_index', 'path', 'formats', 'identifiers', 'uuid',
'comments',
} if self._tb_cats[k]['name']}
ans['cover'] = _('Has cover')
return ans
def displayable_field_keys(self):
return [k for k in self._tb_cats.keys()
if self._tb_cats[k]['kind']=='field' and
self._tb_cats[k]['datatype'] is not None and
k not in ('au_map', 'marked', 'ondevice', 'cover', 'series_sort') and
not self.is_series_index(k)]
def standard_field_keys(self):
return [k for k in self._tb_cats.keys()
if self._tb_cats[k]['kind']=='field' and
not self._tb_cats[k]['is_custom']]
def custom_field_keys(self, include_composites=True):
res = []
for k in self._tb_cats.keys():
fm = self._tb_cats[k]
if fm['kind']=='field' and fm['is_custom'] and \
(fm['datatype'] != 'composite' or include_composites):
res.append(k)
return res
def all_field_keys(self):
return [k for k in self._tb_cats.keys() if self._tb_cats[k]['kind']=='field']
def iterkeys(self):
for key in self._tb_cats:
yield key
def itervalues(self):
return itervalues(self._tb_cats)
def values(self):
return list(self._tb_cats.values())
def iteritems(self):
for key in self._tb_cats:
yield (key, self._tb_cats[key])
iter_items = iteritems
def custom_iteritems(self):
for key, meta in iteritems(self._tb_custom_fields):
yield (key, meta)
def items(self):
return list(self.iter_items())
def is_custom_field(self, key):
return key.startswith(self.custom_field_prefix)
def is_ignorable_field(self, key):
'Custom fields and user categories are ignorable'
return self.is_custom_field(key) or key.startswith('@')
def ignorable_field_keys(self):
return [k for k in self._tb_cats if self.is_ignorable_field(k)]
def is_series_index(self, key):
try:
m = self._tb_cats[key]
return (m['datatype'] == 'float' and key.endswith('_index') and
key[:-6] in self._tb_cats)
except (KeyError, ValueError, TypeError, AttributeError):
return False
def key_to_label(self, key):
if 'label' not in self._tb_cats[key]:
return key
return self._tb_cats[key]['label']
def label_to_key(self, label, prefer_custom=False):
if prefer_custom:
if label in self.custom_label_to_key_map:
return self.custom_label_to_key_map[label]
if 'label' in self._tb_cats:
return label
if not prefer_custom:
if label in self.custom_label_to_key_map:
return self.custom_label_to_key_map[label]
raise ValueError('Unknown key [%s]'%(label))
def all_metadata(self):
l = {}
for k in self._tb_cats:
l[k] = self._tb_cats[k]
return l
def custom_field_metadata(self, include_composites=True):
if include_composites:
return self._tb_custom_fields
l = {}
for k in self.custom_field_keys(include_composites):
l[k] = self._tb_cats[k]
return l
def add_custom_field(self, label, table, column, datatype, colnum, name,
display, is_editable, is_multiple, is_category,
is_csp=False):
key = self.custom_field_prefix + label
if key in self._tb_cats:
raise ValueError('Duplicate custom field [%s]'%(label))
if datatype not in self.VALID_DATA_TYPES:
raise ValueError('Unknown datatype %s for field %s'%(datatype, key))
self._tb_cats[key] = {'table':table, 'column':column,
'datatype':datatype, 'is_multiple':is_multiple,
'kind':'field', 'name':name,
'search_terms':[key], 'label':label,
'colnum':colnum, 'display':display,
'is_custom':True, 'is_category':is_category,
'link_column':'value','category_sort':'value',
'is_csp' : is_csp, 'is_editable': is_editable,}
self._tb_custom_fields[key] = self._tb_cats[key]
self._add_search_terms_to_map(key, [key])
self.custom_label_to_key_map[label] = key
if datatype == 'series':
key += '_index'
self._tb_cats[key] = {'table':None, 'column':None,
'datatype':'float', 'is_multiple':{},
'kind':'field', 'name':'',
'search_terms':[key], 'label':label+'_index',
'colnum':None, 'display':{},
'is_custom':False, 'is_category':False,
'link_column':None, 'category_sort':None,
'is_editable': False, 'is_csp': False}
self._add_search_terms_to_map(key, [key])
self.custom_label_to_key_map[label+'_index'] = key
def remove_dynamic_categories(self):
for key in list(self._tb_cats.keys()):
val = self._tb_cats[key]
if val['is_category'] and val['kind'] in ('user', 'search'):
for k in self._tb_cats[key]['search_terms']:
if k in self._search_term_map:
del self._search_term_map[k]
del self._tb_cats[key]
def remove_user_categories(self):
for key in list(self._tb_cats.keys()):
val = self._tb_cats[key]
if val['is_category'] and val['kind'] == 'user':
for k in self._tb_cats[key]['search_terms']:
if k in self._search_term_map:
del self._search_term_map[k]
del self._tb_cats[key]
def _remove_grouped_search_terms(self):
to_remove = [v for v in self._search_term_map
if isinstance(self._search_term_map[v], list)]
for v in to_remove:
del self._search_term_map[v]
def add_grouped_search_terms(self, gst):
self._remove_grouped_search_terms()
for t in gst:
try:
self._add_search_terms_to_map(gst[t], [t])
except ValueError:
traceback.print_exc()
def cc_series_index_column_for(self, key):
return self._tb_cats[key]['rec_index'] + 1
def add_user_category(self, label, name):
if label in self._tb_cats:
raise ValueError('Duplicate user field [%s]'%(label))
st = [label]
if icu_lower(label) != label:
st.append(icu_lower(label))
self._tb_cats[label] = {'table':None, 'column':None,
'datatype':None, 'is_multiple':{},
'kind':'user', 'name':name,
'search_terms':st, 'is_custom':False,
'is_category':True, 'is_csp': False}
self._add_search_terms_to_map(label, st)
def add_search_category(self, label, name, fail_on_existing=True):
if label in self._tb_cats:
if not fail_on_existing:
return
raise ValueError('Duplicate user field [%s]'%(label))
self._tb_cats[label] = {'table':None, 'column':None,
'datatype':None, 'is_multiple':{},
'kind':'search', 'name':name,
'search_terms':[], 'is_custom':False,
'is_category':True, 'is_csp': False}
def set_field_record_index(self, label, index, prefer_custom=False):
if prefer_custom:
key = self.custom_field_prefix+label
if key not in self._tb_cats:
key = label
else:
if label in self._tb_cats:
key = label
else:
key = self.custom_field_prefix+label
self._tb_cats[key]['rec_index'] = index # let the exception fly ...
def get_search_terms(self):
s_keys = sorted(self._search_term_map.keys())
for v in self.search_items:
s_keys.append(v)
return s_keys
def _add_search_terms_to_map(self, key, terms):
if terms is not None:
for t in terms:
if t in self._search_term_map:
raise ValueError('Attempt to add duplicate search term "%s"'%t)
self._search_term_map[t] = key
def search_term_to_field_key(self, term):
return self._search_term_map.get(term, term)
def searchable_fields(self):
return [k for k in self._tb_cats.keys()
if self._tb_cats[k]['kind']=='field' and
len(self._tb_cats[k]['search_terms']) > 0]
# The following two methods are to support serialization
# Note that they do not create copies of internal structures, for performance,
# so they are not safe to use for anything else
def fm_as_dict(self):
return {
'custom_fields': self._tb_custom_fields,
'search_term_map': self._search_term_map,
'custom_label_to_key_map': self.custom_label_to_key_map,
'user_categories': {k:v for k, v in iteritems(self._tb_cats) if v['kind'] == 'user'},
'search_categories': {k:v for k, v in iteritems(self._tb_cats) if v['kind'] == 'search'},
}
def fm_from_dict(src):
ans = FieldMetadata()
ans._tb_custom_fields = src['custom_fields']
ans._search_term_map = src['search_term_map']
ans.custom_label_to_key_map = src['custom_label_to_key_map']
for q in ('custom_fields', 'user_categories', 'search_categories'):
for k, v in iteritems(src[q]):
ans._tb_cats[k] = v
return ans