mirror of
https://github.com/gryf/ebook-converter.git
synced 2026-03-29 08:03:31 +02:00
Initial import
This commit is contained in:
87
ebook_converter/library/__init__.py
Normal file
87
ebook_converter/library/__init__.py
Normal file
@@ -0,0 +1,87 @@
|
||||
from __future__ import absolute_import, division, print_function, unicode_literals
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
|
||||
|
||||
''' Code to manage ebook library'''
|
||||
|
||||
|
||||
import os
|
||||
from polyglot.builtins import range
|
||||
|
||||
|
||||
def db(path=None, read_only=False):
|
||||
from calibre.db.legacy import LibraryDatabase
|
||||
from calibre.utils.config import prefs
|
||||
return LibraryDatabase(os.path.expanduser(path) if path else prefs['library_path'],
|
||||
read_only=read_only)
|
||||
|
||||
|
||||
def generate_test_db(library_path, # {{{
|
||||
num_of_records=20000,
|
||||
num_of_authors=6000,
|
||||
num_of_tags=10000,
|
||||
tag_length=7,
|
||||
author_length=7,
|
||||
title_length=10,
|
||||
max_authors=10,
|
||||
max_tags=10
|
||||
):
|
||||
import random, string, os, sys, time
|
||||
from calibre.constants import preferred_encoding
|
||||
|
||||
if not os.path.exists(library_path):
|
||||
os.makedirs(library_path)
|
||||
|
||||
letters = string.letters.decode(preferred_encoding)
|
||||
|
||||
def randstr(length):
|
||||
return ''.join(random.choice(letters) for i in
|
||||
range(length))
|
||||
|
||||
all_tags = [randstr(tag_length) for j in range(num_of_tags)]
|
||||
print('Generated', num_of_tags, 'tags')
|
||||
all_authors = [randstr(author_length) for j in range(num_of_authors)]
|
||||
print('Generated', num_of_authors, 'authors')
|
||||
all_titles = [randstr(title_length) for j in range(num_of_records)]
|
||||
print('Generated', num_of_records, 'titles')
|
||||
|
||||
testdb = db(library_path)
|
||||
|
||||
print('Creating', num_of_records, 'records...')
|
||||
|
||||
start = time.time()
|
||||
|
||||
for i, title in enumerate(all_titles):
|
||||
print(i+1, end=' ')
|
||||
sys.stdout.flush()
|
||||
authors = random.randint(1, max_authors)
|
||||
authors = [random.choice(all_authors) for i in range(authors)]
|
||||
tags = random.randint(0, max_tags)
|
||||
tags = [random.choice(all_tags) for i in range(tags)]
|
||||
from calibre.ebooks.metadata.book.base import Metadata
|
||||
mi = Metadata(title, authors)
|
||||
mi.tags = tags
|
||||
testdb.import_book(mi, [])
|
||||
|
||||
t = time.time() - start
|
||||
print('\nGenerated', num_of_records, 'records in:', t, 'seconds')
|
||||
print('Time per record:', t/num_of_records)
|
||||
# }}}
|
||||
|
||||
|
||||
def current_library_path():
|
||||
from calibre.utils.config import prefs
|
||||
path = prefs['library_path']
|
||||
if path:
|
||||
path = path.replace('\\', '/')
|
||||
while path.endswith('/'):
|
||||
path = path[:-1]
|
||||
return path
|
||||
|
||||
|
||||
def current_library_name():
|
||||
import posixpath
|
||||
path = current_library_path()
|
||||
if path:
|
||||
return posixpath.basename(path)
|
||||
30
ebook_converter/library/catalogs/__init__.py
Normal file
30
ebook_converter/library/catalogs/__init__.py
Normal file
@@ -0,0 +1,30 @@
|
||||
#!/usr/bin/env python2
|
||||
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
|
||||
from __future__ import absolute_import, division, print_function, unicode_literals
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2012, Kovid Goyal <kovid@kovidgoyal.net>'
|
||||
__docformat__ = 'restructuredtext en'
|
||||
|
||||
|
||||
FIELDS = ['all', 'title', 'title_sort', 'author_sort', 'authors', 'comments',
|
||||
'cover', 'formats','id', 'isbn', 'library_name','ondevice', 'pubdate', 'publisher',
|
||||
'rating', 'series_index', 'series', 'size', 'tags', 'timestamp',
|
||||
'uuid', 'languages', 'identifiers']
|
||||
|
||||
# Allowed fields for template
|
||||
TEMPLATE_ALLOWED_FIELDS = ['author_sort', 'authors', 'id', 'isbn', 'pubdate', 'title_sort',
|
||||
'publisher', 'series_index', 'series', 'tags', 'timestamp', 'title', 'uuid']
|
||||
|
||||
|
||||
class AuthorSortMismatchException(Exception):
|
||||
pass
|
||||
|
||||
|
||||
class EmptyCatalogException(Exception):
|
||||
pass
|
||||
|
||||
|
||||
class InvalidGenresSourceFieldException(Exception):
|
||||
pass
|
||||
|
||||
402
ebook_converter/library/catalogs/bibtex.py
Normal file
402
ebook_converter/library/catalogs/bibtex.py
Normal file
@@ -0,0 +1,402 @@
|
||||
#!/usr/bin/env python2
|
||||
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
|
||||
from __future__ import absolute_import, division, print_function, unicode_literals
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2012, Kovid Goyal <kovid@kovidgoyal.net>'
|
||||
__docformat__ = 'restructuredtext en'
|
||||
|
||||
import re, codecs, os, numbers
|
||||
from collections import namedtuple
|
||||
|
||||
from calibre import strftime
|
||||
from calibre.customize import CatalogPlugin
|
||||
from calibre.library.catalogs import FIELDS, TEMPLATE_ALLOWED_FIELDS
|
||||
from calibre.customize.conversion import DummyReporter
|
||||
from calibre.ebooks.metadata import format_isbn
|
||||
from polyglot.builtins import filter, string_or_bytes, unicode_type
|
||||
|
||||
|
||||
class BIBTEX(CatalogPlugin):
|
||||
'BIBTEX catalog generator'
|
||||
|
||||
Option = namedtuple('Option', 'option, default, dest, action, help')
|
||||
|
||||
name = 'Catalog_BIBTEX'
|
||||
description = 'BIBTEX catalog generator'
|
||||
supported_platforms = ['windows', 'osx', 'linux']
|
||||
author = 'Sengian'
|
||||
version = (1, 0, 0)
|
||||
file_types = {'bib'}
|
||||
|
||||
cli_options = [
|
||||
Option('--fields',
|
||||
default='all',
|
||||
dest='fields',
|
||||
action=None,
|
||||
help=_('The fields to output when cataloging books in the '
|
||||
'database. Should be a comma-separated list of fields.\n'
|
||||
'Available fields: %(fields)s.\n'
|
||||
'plus user-created custom fields.\n'
|
||||
'Example: %(opt)s=title,authors,tags\n'
|
||||
"Default: '%%default'\n"
|
||||
"Applies to: BIBTEX output format")%dict(
|
||||
fields=', '.join(FIELDS), opt='--fields')),
|
||||
|
||||
Option('--sort-by',
|
||||
default='id',
|
||||
dest='sort_by',
|
||||
action=None,
|
||||
help=_('Output field to sort on.\n'
|
||||
'Available fields: author_sort, id, rating, size, timestamp, title.\n'
|
||||
"Default: '%default'\n"
|
||||
"Applies to: BIBTEX output format")),
|
||||
|
||||
Option('--create-citation',
|
||||
default='True',
|
||||
dest='impcit',
|
||||
action=None,
|
||||
help=_('Create a citation for BibTeX entries.\n'
|
||||
'Boolean value: True, False\n'
|
||||
"Default: '%default'\n"
|
||||
"Applies to: BIBTEX output format")),
|
||||
|
||||
Option('--add-files-path',
|
||||
default='True',
|
||||
dest='addfiles',
|
||||
action=None,
|
||||
help=_('Create a file entry if formats is selected for BibTeX entries.\n'
|
||||
'Boolean value: True, False\n'
|
||||
"Default: '%default'\n"
|
||||
"Applies to: BIBTEX output format")),
|
||||
|
||||
Option('--citation-template',
|
||||
default='{authors}{id}',
|
||||
dest='bib_cit',
|
||||
action=None,
|
||||
help=_('The template for citation creation from database fields.\n'
|
||||
'Should be a template with {} enclosed fields.\n'
|
||||
'Available fields: %s.\n'
|
||||
"Default: '%%default'\n"
|
||||
"Applies to: BIBTEX output format")%', '.join(TEMPLATE_ALLOWED_FIELDS)),
|
||||
|
||||
Option('--choose-encoding',
|
||||
default='utf8',
|
||||
dest='bibfile_enc',
|
||||
action=None,
|
||||
help=_('BibTeX file encoding output.\n'
|
||||
'Available types: utf8, cp1252, ascii.\n'
|
||||
"Default: '%default'\n"
|
||||
"Applies to: BIBTEX output format")),
|
||||
|
||||
Option('--choose-encoding-configuration',
|
||||
default='strict',
|
||||
dest='bibfile_enctag',
|
||||
action=None,
|
||||
help=_('BibTeX file encoding flag.\n'
|
||||
'Available types: strict, replace, ignore, backslashreplace.\n'
|
||||
"Default: '%default'\n"
|
||||
"Applies to: BIBTEX output format")),
|
||||
|
||||
Option('--entry-type',
|
||||
default='book',
|
||||
dest='bib_entry',
|
||||
action=None,
|
||||
help=_('Entry type for BibTeX catalog.\n'
|
||||
'Available types: book, misc, mixed.\n'
|
||||
"Default: '%default'\n"
|
||||
"Applies to: BIBTEX output format"))]
|
||||
|
||||
def run(self, path_to_output, opts, db, notification=DummyReporter()):
|
||||
from calibre.utils.date import isoformat
|
||||
from calibre.utils.html2text import html2text
|
||||
from calibre.utils.bibtex import BibTeX
|
||||
from calibre.library.save_to_disk import preprocess_template
|
||||
from calibre.utils.logging import default_log as log
|
||||
from calibre.utils.filenames import ascii_text
|
||||
|
||||
library_name = os.path.basename(db.library_path)
|
||||
|
||||
def create_bibtex_entry(entry, fields, mode, template_citation,
|
||||
bibtexdict, db, citation_bibtex=True, calibre_files=True):
|
||||
|
||||
# Bibtex doesn't like UTF-8 but keep unicode until writing
|
||||
# Define starting chain or if book valid strict and not book return a Fail string
|
||||
|
||||
bibtex_entry = []
|
||||
if mode != "misc" and check_entry_book_valid(entry) :
|
||||
bibtex_entry.append('@book{')
|
||||
elif mode != "book" :
|
||||
bibtex_entry.append('@misc{')
|
||||
else :
|
||||
# case strict book
|
||||
return ''
|
||||
|
||||
if citation_bibtex :
|
||||
# Citation tag
|
||||
bibtex_entry.append(make_bibtex_citation(entry, template_citation,
|
||||
bibtexdict))
|
||||
bibtex_entry = [' '.join(bibtex_entry)]
|
||||
|
||||
for field in fields:
|
||||
if field.startswith('#'):
|
||||
item = db.get_field(entry['id'],field,index_is_id=True)
|
||||
if isinstance(item, (bool, numbers.Number)):
|
||||
item = repr(item)
|
||||
elif field == 'title_sort':
|
||||
item = entry['sort']
|
||||
elif field == 'library_name':
|
||||
item = library_name
|
||||
else:
|
||||
item = entry[field]
|
||||
|
||||
# check if the field should be included (none or empty)
|
||||
if item is None:
|
||||
continue
|
||||
try:
|
||||
if len(item) == 0 :
|
||||
continue
|
||||
except TypeError:
|
||||
pass
|
||||
|
||||
if field == 'authors' :
|
||||
bibtex_entry.append('author = "%s"' % bibtexdict.bibtex_author_format(item))
|
||||
|
||||
elif field == 'id' :
|
||||
bibtex_entry.append('calibreid = "%s"' % int(item))
|
||||
|
||||
elif field == 'rating' :
|
||||
bibtex_entry.append('rating = "%s"' % int(item))
|
||||
|
||||
elif field == 'size' :
|
||||
bibtex_entry.append('%s = "%s octets"' % (field, int(item)))
|
||||
|
||||
elif field == 'tags' :
|
||||
# A list to flatten
|
||||
bibtex_entry.append('tags = "%s"' % bibtexdict.utf8ToBibtex(', '.join(item)))
|
||||
|
||||
elif field == 'comments' :
|
||||
# \n removal
|
||||
item = item.replace('\r\n', ' ')
|
||||
item = item.replace('\n', ' ')
|
||||
# unmatched brace removal (users should use \leftbrace or \rightbrace for single braces)
|
||||
item = bibtexdict.stripUnmatchedSyntax(item, '{', '}')
|
||||
# html to text
|
||||
try:
|
||||
item = html2text(item)
|
||||
except:
|
||||
log.warn("Failed to convert comments to text")
|
||||
bibtex_entry.append('note = "%s"' % bibtexdict.utf8ToBibtex(item))
|
||||
|
||||
elif field == 'isbn' :
|
||||
# Could be 9, 10 or 13 digits
|
||||
bibtex_entry.append('isbn = "%s"' % format_isbn(item))
|
||||
|
||||
elif field == 'formats' :
|
||||
# Add file path if format is selected
|
||||
formats = [format.rpartition('.')[2].lower() for format in item]
|
||||
bibtex_entry.append('formats = "%s"' % ', '.join(formats))
|
||||
if calibre_files:
|
||||
files = [':%s:%s' % (format, format.rpartition('.')[2].upper())
|
||||
for format in item]
|
||||
bibtex_entry.append('file = "%s"' % ', '.join(files))
|
||||
|
||||
elif field == 'series_index' :
|
||||
bibtex_entry.append('volume = "%s"' % int(item))
|
||||
|
||||
elif field == 'timestamp' :
|
||||
bibtex_entry.append('timestamp = "%s"' % isoformat(item).partition('T')[0])
|
||||
|
||||
elif field == 'pubdate' :
|
||||
bibtex_entry.append('year = "%s"' % item.year)
|
||||
bibtex_entry.append('month = "%s"' % bibtexdict.utf8ToBibtex(strftime("%b", item)))
|
||||
|
||||
elif field.startswith('#') and isinstance(item, string_or_bytes):
|
||||
bibtex_entry.append('custom_%s = "%s"' % (field[1:],
|
||||
bibtexdict.utf8ToBibtex(item)))
|
||||
|
||||
elif isinstance(item, string_or_bytes):
|
||||
# elif field in ['title', 'publisher', 'cover', 'uuid', 'ondevice',
|
||||
# 'author_sort', 'series', 'title_sort'] :
|
||||
bibtex_entry.append('%s = "%s"' % (field, bibtexdict.utf8ToBibtex(item)))
|
||||
|
||||
bibtex_entry = ',\n '.join(bibtex_entry)
|
||||
bibtex_entry += ' }\n\n'
|
||||
|
||||
return bibtex_entry
|
||||
|
||||
def check_entry_book_valid(entry):
|
||||
# Check that the required fields are ok for a book entry
|
||||
for field in ['title', 'authors', 'publisher'] :
|
||||
if entry[field] is None or len(entry[field]) == 0 :
|
||||
return False
|
||||
if entry['pubdate'] is None :
|
||||
return False
|
||||
else :
|
||||
return True
|
||||
|
||||
def make_bibtex_citation(entry, template_citation, bibtexclass):
|
||||
|
||||
# define a function to replace the template entry by its value
|
||||
def tpl_replace(objtplname) :
|
||||
|
||||
tpl_field = re.sub(r'[\{\}]', '', objtplname.group())
|
||||
|
||||
if tpl_field in TEMPLATE_ALLOWED_FIELDS :
|
||||
if tpl_field in ['pubdate', 'timestamp'] :
|
||||
tpl_field = isoformat(entry[tpl_field]).partition('T')[0]
|
||||
elif tpl_field in ['tags', 'authors'] :
|
||||
tpl_field =entry[tpl_field][0]
|
||||
elif tpl_field in ['id', 'series_index'] :
|
||||
tpl_field = unicode_type(entry[tpl_field])
|
||||
else :
|
||||
tpl_field = entry[tpl_field]
|
||||
return ascii_text(tpl_field)
|
||||
else:
|
||||
return ''
|
||||
|
||||
if len(template_citation) >0 :
|
||||
tpl_citation = bibtexclass.utf8ToBibtex(
|
||||
bibtexclass.ValidateCitationKey(re.sub(r'\{[^{}]*\}',
|
||||
tpl_replace, template_citation)))
|
||||
|
||||
if len(tpl_citation) >0 :
|
||||
return tpl_citation
|
||||
|
||||
if len(entry["isbn"]) > 0 :
|
||||
template_citation = '%s' % re.sub(r'[\D]','', entry["isbn"])
|
||||
|
||||
else :
|
||||
template_citation = '%s' % unicode_type(entry["id"])
|
||||
|
||||
return bibtexclass.ValidateCitationKey(template_citation)
|
||||
|
||||
self.fmt = path_to_output.rpartition('.')[2]
|
||||
self.notification = notification
|
||||
|
||||
# Combobox options
|
||||
bibfile_enc = ['utf8', 'cp1252', 'ascii']
|
||||
bibfile_enctag = ['strict', 'replace', 'ignore', 'backslashreplace']
|
||||
bib_entry = ['mixed', 'misc', 'book']
|
||||
|
||||
# Needed beacause CLI return str vs int by widget
|
||||
try:
|
||||
bibfile_enc = bibfile_enc[opts.bibfile_enc]
|
||||
bibfile_enctag = bibfile_enctag[opts.bibfile_enctag]
|
||||
bib_entry = bib_entry[opts.bib_entry]
|
||||
except:
|
||||
if opts.bibfile_enc in bibfile_enc :
|
||||
bibfile_enc = opts.bibfile_enc
|
||||
else :
|
||||
log.warn("Incorrect --choose-encoding flag, revert to default")
|
||||
bibfile_enc = bibfile_enc[0]
|
||||
if opts.bibfile_enctag in bibfile_enctag :
|
||||
bibfile_enctag = opts.bibfile_enctag
|
||||
else :
|
||||
log.warn("Incorrect --choose-encoding-configuration flag, revert to default")
|
||||
bibfile_enctag = bibfile_enctag[0]
|
||||
if opts.bib_entry in bib_entry :
|
||||
bib_entry = opts.bib_entry
|
||||
else :
|
||||
log.warn("Incorrect --entry-type flag, revert to default")
|
||||
bib_entry = bib_entry[0]
|
||||
|
||||
if opts.verbose:
|
||||
opts_dict = vars(opts)
|
||||
log("%s(): Generating %s" % (self.name,self.fmt))
|
||||
if opts.connected_device['is_device_connected']:
|
||||
log(" connected_device: %s" % opts.connected_device['name'])
|
||||
if opts_dict['search_text']:
|
||||
log(" --search='%s'" % opts_dict['search_text'])
|
||||
|
||||
if opts_dict['ids']:
|
||||
log(" Book count: %d" % len(opts_dict['ids']))
|
||||
if opts_dict['search_text']:
|
||||
log(" (--search ignored when a subset of the database is specified)")
|
||||
|
||||
if opts_dict['fields']:
|
||||
if opts_dict['fields'] == 'all':
|
||||
log(" Fields: %s" % ', '.join(FIELDS[1:]))
|
||||
else:
|
||||
log(" Fields: %s" % opts_dict['fields'])
|
||||
|
||||
log(" Output file will be encoded in %s with %s flag" % (bibfile_enc, bibfile_enctag))
|
||||
|
||||
log(" BibTeX entry type is %s with a citation like '%s' flag" % (bib_entry, opts_dict['bib_cit']))
|
||||
|
||||
# If a list of ids are provided, don't use search_text
|
||||
if opts.ids:
|
||||
opts.search_text = None
|
||||
|
||||
data = self.search_sort_db(db, opts)
|
||||
|
||||
if not len(data):
|
||||
log.error("\nNo matching database entries for search criteria '%s'" % opts.search_text)
|
||||
|
||||
# Get the requested output fields as a list
|
||||
fields = self.get_output_fields(db, opts)
|
||||
|
||||
if not len(data):
|
||||
log.error("\nNo matching database entries for search criteria '%s'" % opts.search_text)
|
||||
|
||||
# Initialize BibTeX class
|
||||
bibtexc = BibTeX()
|
||||
|
||||
# Entries writing after Bibtex formating (or not)
|
||||
if bibfile_enc != 'ascii' :
|
||||
bibtexc.ascii_bibtex = False
|
||||
else :
|
||||
bibtexc.ascii_bibtex = True
|
||||
|
||||
# Check citation choice and go to default in case of bad CLI
|
||||
if isinstance(opts.impcit, string_or_bytes) :
|
||||
if opts.impcit == 'False' :
|
||||
citation_bibtex= False
|
||||
elif opts.impcit == 'True' :
|
||||
citation_bibtex= True
|
||||
else :
|
||||
log.warn("Incorrect --create-citation, revert to default")
|
||||
citation_bibtex= True
|
||||
else :
|
||||
citation_bibtex= opts.impcit
|
||||
|
||||
# Check add file entry and go to default in case of bad CLI
|
||||
if isinstance(opts.addfiles, string_or_bytes) :
|
||||
if opts.addfiles == 'False' :
|
||||
addfiles_bibtex = False
|
||||
elif opts.addfiles == 'True' :
|
||||
addfiles_bibtex = True
|
||||
else :
|
||||
log.warn("Incorrect --add-files-path, revert to default")
|
||||
addfiles_bibtex= True
|
||||
else :
|
||||
addfiles_bibtex = opts.addfiles
|
||||
|
||||
# Preprocess for error and light correction
|
||||
template_citation = preprocess_template(opts.bib_cit)
|
||||
|
||||
# Open output and write entries
|
||||
with codecs.open(path_to_output, 'w', bibfile_enc, bibfile_enctag)\
|
||||
as outfile:
|
||||
# File header
|
||||
nb_entries = len(data)
|
||||
|
||||
# check in book strict if all is ok else throw a warning into log
|
||||
if bib_entry == 'book' :
|
||||
nb_books = len(list(filter(check_entry_book_valid, data)))
|
||||
if nb_books < nb_entries :
|
||||
log.warn("Only %d entries in %d are book compatible" % (nb_books, nb_entries))
|
||||
nb_entries = nb_books
|
||||
|
||||
# If connected device, add 'On Device' values to data
|
||||
if opts.connected_device['is_device_connected'] and 'ondevice' in fields:
|
||||
for entry in data:
|
||||
entry['ondevice'] = db.catalog_plugin_on_device_temp_mapping[entry['id']]['ondevice']
|
||||
|
||||
outfile.write('%%%Calibre catalog\n%%%{0} entries in catalog\n\n'.format(nb_entries))
|
||||
outfile.write('@preamble{"This catalog of %d entries was generated by calibre on %s"}\n\n'
|
||||
% (nb_entries, strftime("%A, %d. %B %Y %H:%M")))
|
||||
|
||||
for entry in data:
|
||||
outfile.write(create_bibtex_entry(entry, fields, bib_entry, template_citation,
|
||||
bibtexc, db, citation_bibtex, addfiles_bibtex))
|
||||
241
ebook_converter/library/catalogs/csv_xml.py
Normal file
241
ebook_converter/library/catalogs/csv_xml.py
Normal file
@@ -0,0 +1,241 @@
|
||||
#!/usr/bin/env python2
|
||||
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
|
||||
from __future__ import absolute_import, division, print_function, unicode_literals
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2012, Kovid Goyal <kovid@kovidgoyal.net>'
|
||||
__docformat__ = 'restructuredtext en'
|
||||
|
||||
import re, codecs, os
|
||||
from collections import namedtuple
|
||||
|
||||
from calibre.customize import CatalogPlugin
|
||||
from calibre.library.catalogs import FIELDS
|
||||
from calibre.customize.conversion import DummyReporter
|
||||
from polyglot.builtins import unicode_type
|
||||
|
||||
|
||||
class CSV_XML(CatalogPlugin):
|
||||
|
||||
'CSV/XML catalog generator'
|
||||
|
||||
Option = namedtuple('Option', 'option, default, dest, action, help')
|
||||
|
||||
name = 'Catalog_CSV_XML'
|
||||
description = 'CSV/XML catalog generator'
|
||||
supported_platforms = ['windows', 'osx', 'linux']
|
||||
author = 'Greg Riker'
|
||||
version = (1, 0, 0)
|
||||
file_types = {'csv', 'xml'}
|
||||
|
||||
cli_options = [
|
||||
Option('--fields',
|
||||
default='all',
|
||||
dest='fields',
|
||||
action=None,
|
||||
help=_('The fields to output when cataloging books in the '
|
||||
'database. Should be a comma-separated list of fields.\n'
|
||||
'Available fields: %(fields)s,\n'
|
||||
'plus user-created custom fields.\n'
|
||||
'Example: %(opt)s=title,authors,tags\n'
|
||||
"Default: '%%default'\n"
|
||||
"Applies to: CSV, XML output formats") % dict(
|
||||
fields=', '.join(FIELDS), opt='--fields')),
|
||||
|
||||
Option('--sort-by',
|
||||
default='id',
|
||||
dest='sort_by',
|
||||
action=None,
|
||||
help=_('Output field to sort on.\n'
|
||||
'Available fields: author_sort, id, rating, size, timestamp, title_sort\n'
|
||||
"Default: '%default'\n"
|
||||
"Applies to: CSV, XML output formats"))]
|
||||
|
||||
def run(self, path_to_output, opts, db, notification=DummyReporter()):
|
||||
from calibre.library import current_library_name
|
||||
from calibre.utils.date import isoformat
|
||||
from calibre.utils.html2text import html2text
|
||||
from calibre.utils.logging import default_log as log
|
||||
from lxml import etree
|
||||
from calibre.ebooks.metadata import authors_to_string
|
||||
|
||||
self.fmt = path_to_output.rpartition('.')[2]
|
||||
self.notification = notification
|
||||
current_library = current_library_name()
|
||||
if getattr(opts, 'library_path', None):
|
||||
current_library = os.path.basename(opts.library_path)
|
||||
|
||||
if opts.verbose:
|
||||
opts_dict = vars(opts)
|
||||
log("%s('%s'): Generating %s" % (self.name, current_library, self.fmt.upper()))
|
||||
if opts.connected_device['is_device_connected']:
|
||||
log(" connected_device: %s" % opts.connected_device['name'])
|
||||
if opts_dict['search_text']:
|
||||
log(" --search='%s'" % opts_dict['search_text'])
|
||||
|
||||
if opts_dict['ids']:
|
||||
log(" Book count: %d" % len(opts_dict['ids']))
|
||||
if opts_dict['search_text']:
|
||||
log(" (--search ignored when a subset of the database is specified)")
|
||||
|
||||
if opts_dict['fields']:
|
||||
if opts_dict['fields'] == 'all':
|
||||
log(" Fields: %s" % ', '.join(FIELDS[1:]))
|
||||
else:
|
||||
log(" Fields: %s" % opts_dict['fields'])
|
||||
|
||||
# If a list of ids are provided, don't use search_text
|
||||
if opts.ids:
|
||||
opts.search_text = None
|
||||
|
||||
data = self.search_sort_db(db, opts)
|
||||
|
||||
if not len(data):
|
||||
log.error("\nNo matching database entries for search criteria '%s'" % opts.search_text)
|
||||
# raise SystemExit(1)
|
||||
|
||||
# Get the requested output fields as a list
|
||||
fields = self.get_output_fields(db, opts)
|
||||
|
||||
# If connected device, add 'On Device' values to data
|
||||
if opts.connected_device['is_device_connected'] and 'ondevice' in fields:
|
||||
for entry in data:
|
||||
entry['ondevice'] = db.catalog_plugin_on_device_temp_mapping[entry['id']]['ondevice']
|
||||
|
||||
fm = {x: db.field_metadata.get(x, {}) for x in fields}
|
||||
|
||||
if self.fmt == 'csv':
|
||||
outfile = codecs.open(path_to_output, 'w', 'utf8')
|
||||
|
||||
# Write a UTF-8 BOM
|
||||
outfile.write('\ufeff')
|
||||
|
||||
# Output the field headers
|
||||
outfile.write('%s\n' % ','.join(fields))
|
||||
|
||||
# Output the entry fields
|
||||
for entry in data:
|
||||
outstr = []
|
||||
for field in fields:
|
||||
if field.startswith('#'):
|
||||
item = db.get_field(entry['id'], field, index_is_id=True)
|
||||
if isinstance(item, (list, tuple)):
|
||||
if fm.get(field, {}).get('display', {}).get('is_names', False):
|
||||
item = ' & '.join(item)
|
||||
else:
|
||||
item = ', '.join(item)
|
||||
elif field == 'library_name':
|
||||
item = current_library
|
||||
elif field == 'title_sort':
|
||||
item = entry['sort']
|
||||
else:
|
||||
item = entry[field]
|
||||
|
||||
if item is None:
|
||||
outstr.append('""')
|
||||
continue
|
||||
elif field == 'formats':
|
||||
fmt_list = []
|
||||
for format in item:
|
||||
fmt_list.append(format.rpartition('.')[2].lower())
|
||||
item = ', '.join(fmt_list)
|
||||
elif field == 'authors':
|
||||
item = authors_to_string(item)
|
||||
elif field == 'tags':
|
||||
item = ', '.join(item)
|
||||
elif field == 'isbn':
|
||||
# Could be 9, 10 or 13 digits, with hyphens, possibly ending in 'X'
|
||||
item = '%s' % re.sub(r'[^\dX-]', '', item)
|
||||
elif fm.get(field, {}).get('datatype') == 'datetime':
|
||||
item = isoformat(item, as_utc=False)
|
||||
elif field == 'comments':
|
||||
item = item.replace('\r\n', ' ')
|
||||
item = item.replace('\n', ' ')
|
||||
elif fm.get(field, {}).get('datatype', None) == 'rating' and item:
|
||||
item = '%.2g' % (item / 2)
|
||||
|
||||
# Convert HTML to markdown text
|
||||
if isinstance(item, unicode_type):
|
||||
opening_tag = re.search(r'<(\w+)( |>)', item)
|
||||
if opening_tag:
|
||||
closing_tag = re.search(r'<\/%s>$' % opening_tag.group(1), item)
|
||||
if closing_tag:
|
||||
item = html2text(item)
|
||||
|
||||
outstr.append('"%s"' % unicode_type(item).replace('"', '""'))
|
||||
|
||||
outfile.write(','.join(outstr) + '\n')
|
||||
outfile.close()
|
||||
|
||||
elif self.fmt == 'xml':
|
||||
from lxml.builder import E
|
||||
|
||||
root = E.calibredb()
|
||||
for r in data:
|
||||
record = E.record()
|
||||
root.append(record)
|
||||
|
||||
for field in fields:
|
||||
if field.startswith('#'):
|
||||
val = db.get_field(r['id'], field, index_is_id=True)
|
||||
if not isinstance(val, unicode_type):
|
||||
val = unicode_type(val)
|
||||
item = getattr(E, field.replace('#', '_'))(val)
|
||||
record.append(item)
|
||||
|
||||
for field in ('id', 'uuid', 'publisher', 'rating', 'size',
|
||||
'isbn', 'ondevice', 'identifiers'):
|
||||
if field in fields:
|
||||
val = r[field]
|
||||
if not val:
|
||||
continue
|
||||
if not isinstance(val, (bytes, unicode_type)):
|
||||
if (fm.get(field, {}).get('datatype', None) ==
|
||||
'rating' and val):
|
||||
val = '%.2g' % (val / 2)
|
||||
val = unicode_type(val)
|
||||
item = getattr(E, field)(val)
|
||||
record.append(item)
|
||||
|
||||
if 'title' in fields:
|
||||
title = E.title(r['title'], sort=r['sort'])
|
||||
record.append(title)
|
||||
|
||||
if 'authors' in fields:
|
||||
aus = E.authors(sort=r['author_sort'])
|
||||
for au in r['authors']:
|
||||
aus.append(E.author(au))
|
||||
record.append(aus)
|
||||
|
||||
for field in ('timestamp', 'pubdate'):
|
||||
if field in fields:
|
||||
record.append(getattr(E, field)(isoformat(r[field], as_utc=False)))
|
||||
|
||||
if 'tags' in fields and r['tags']:
|
||||
tags = E.tags()
|
||||
for tag in r['tags']:
|
||||
tags.append(E.tag(tag))
|
||||
record.append(tags)
|
||||
|
||||
if 'comments' in fields and r['comments']:
|
||||
record.append(E.comments(r['comments']))
|
||||
|
||||
if 'series' in fields and r['series']:
|
||||
record.append(E.series(r['series'],
|
||||
index=unicode_type(r['series_index'])))
|
||||
|
||||
if 'cover' in fields and r['cover']:
|
||||
record.append(E.cover(r['cover'].replace(os.sep, '/')))
|
||||
|
||||
if 'formats' in fields and r['formats']:
|
||||
fmt = E.formats()
|
||||
for f in r['formats']:
|
||||
fmt.append(E.format(f.replace(os.sep, '/')))
|
||||
record.append(fmt)
|
||||
|
||||
if 'library_name' in fields:
|
||||
record.append(E.library_name(current_library))
|
||||
|
||||
with open(path_to_output, 'wb') as f:
|
||||
f.write(etree.tostring(root, encoding='utf-8',
|
||||
xml_declaration=True, pretty_print=True))
|
||||
506
ebook_converter/library/catalogs/epub_mobi.py
Normal file
506
ebook_converter/library/catalogs/epub_mobi.py
Normal file
@@ -0,0 +1,506 @@
|
||||
#!/usr/bin/env python2
|
||||
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
|
||||
from __future__ import absolute_import, division, print_function, unicode_literals
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2012, Kovid Goyal <kovid@kovidgoyal.net>'
|
||||
__docformat__ = 'restructuredtext en'
|
||||
|
||||
import datetime, os, time
|
||||
from collections import namedtuple
|
||||
|
||||
from calibre import strftime
|
||||
from calibre.customize import CatalogPlugin
|
||||
from calibre.customize.conversion import OptionRecommendation, DummyReporter
|
||||
from calibre.library import current_library_name
|
||||
from calibre.library.catalogs import AuthorSortMismatchException, EmptyCatalogException
|
||||
from calibre.ptempfile import PersistentTemporaryFile
|
||||
from calibre.utils.localization import calibre_langcode_to_name, canonicalize_lang, get_lang
|
||||
from polyglot.builtins import unicode_type
|
||||
|
||||
Option = namedtuple('Option', 'option, default, dest, action, help')
|
||||
|
||||
|
||||
class EPUB_MOBI(CatalogPlugin):
|
||||
|
||||
'EPUB catalog generator'
|
||||
|
||||
name = 'Catalog_EPUB_MOBI'
|
||||
description = 'AZW3/EPUB/MOBI catalog generator'
|
||||
supported_platforms = ['windows', 'osx', 'linux']
|
||||
minimum_calibre_version = (0, 7, 40)
|
||||
author = 'Greg Riker'
|
||||
version = (1, 0, 0)
|
||||
file_types = {'azw3', 'epub', 'mobi'}
|
||||
|
||||
THUMB_SMALLEST = "1.0"
|
||||
THUMB_LARGEST = "2.0"
|
||||
|
||||
cli_options = [Option('--catalog-title', # {{{
|
||||
default='My Books',
|
||||
dest='catalog_title',
|
||||
action=None,
|
||||
help=_('Title of generated catalog used as title in metadata.\n'
|
||||
"Default: '%default'\n"
|
||||
"Applies to: AZW3, EPUB, MOBI output formats")),
|
||||
Option('--cross-reference-authors',
|
||||
default=False,
|
||||
dest='cross_reference_authors',
|
||||
action='store_true',
|
||||
help=_("Create cross-references in Authors section for books with multiple authors.\n"
|
||||
"Default: '%default'\n"
|
||||
"Applies to: AZW3, EPUB, MOBI output formats")),
|
||||
Option('--debug-pipeline',
|
||||
default=None,
|
||||
dest='debug_pipeline',
|
||||
action=None,
|
||||
help=_("Save the output from different stages of the conversion "
|
||||
"pipeline to the specified "
|
||||
"directory. Useful if you are unsure at which stage "
|
||||
"of the conversion process a bug is occurring.\n"
|
||||
"Default: '%default'\n"
|
||||
"Applies to: AZW3, EPUB, MOBI output formats")),
|
||||
Option('--exclude-genre',
|
||||
default=r'\[.+\]|^\+$',
|
||||
dest='exclude_genre',
|
||||
action=None,
|
||||
help=_("Regex describing tags to exclude as genres.\n"
|
||||
"Default: '%default' excludes bracketed tags, e.g. '[Project Gutenberg]', and '+', the default tag for read books.\n"
|
||||
"Applies to: AZW3, EPUB, MOBI output formats")),
|
||||
Option('--exclusion-rules',
|
||||
default="(('Catalogs','Tags','Catalog'),)",
|
||||
dest='exclusion_rules',
|
||||
action=None,
|
||||
help=_("Specifies the rules used to exclude books from the generated catalog.\n"
|
||||
"The model for an exclusion rule is either\n('<rule name>','Tags','<comma-separated list of tags>') or\n"
|
||||
"('<rule name>','<custom column>','<pattern>').\n"
|
||||
"For example:\n"
|
||||
"(('Archived books','#status','Archived'),)\n"
|
||||
"will exclude a book with a value of 'Archived' in the custom column 'status'.\n"
|
||||
"When multiple rules are defined, all rules will be applied.\n"
|
||||
"Default: \n" + '"' + '%default' + '"' + "\n"
|
||||
"Applies to: AZW3, EPUB, MOBI output formats")),
|
||||
Option('--generate-authors',
|
||||
default=False,
|
||||
dest='generate_authors',
|
||||
action='store_true',
|
||||
help=_("Include 'Authors' section in catalog.\n"
|
||||
"Default: '%default'\n"
|
||||
"Applies to: AZW3, EPUB, MOBI output formats")),
|
||||
Option('--generate-descriptions',
|
||||
default=False,
|
||||
dest='generate_descriptions',
|
||||
action='store_true',
|
||||
help=_("Include 'Descriptions' section in catalog.\n"
|
||||
"Default: '%default'\n"
|
||||
"Applies to: AZW3, EPUB, MOBI output formats")),
|
||||
Option('--generate-genres',
|
||||
default=False,
|
||||
dest='generate_genres',
|
||||
action='store_true',
|
||||
help=_("Include 'Genres' section in catalog.\n"
|
||||
"Default: '%default'\n"
|
||||
"Applies to: AZW3, EPUB, MOBI output formats")),
|
||||
Option('--generate-titles',
|
||||
default=False,
|
||||
dest='generate_titles',
|
||||
action='store_true',
|
||||
help=_("Include 'Titles' section in catalog.\n"
|
||||
"Default: '%default'\n"
|
||||
"Applies to: AZW3, EPUB, MOBI output formats")),
|
||||
Option('--generate-series',
|
||||
default=False,
|
||||
dest='generate_series',
|
||||
action='store_true',
|
||||
help=_("Include 'Series' section in catalog.\n"
|
||||
"Default: '%default'\n"
|
||||
"Applies to: AZW3, EPUB, MOBI output formats")),
|
||||
Option('--generate-recently-added',
|
||||
default=False,
|
||||
dest='generate_recently_added',
|
||||
action='store_true',
|
||||
help=_("Include 'Recently Added' section in catalog.\n"
|
||||
"Default: '%default'\n"
|
||||
"Applies to: AZW3, EPUB, MOBI output formats")),
|
||||
Option('--genre-source-field',
|
||||
default=_('Tags'),
|
||||
dest='genre_source_field',
|
||||
action=None,
|
||||
help=_("Source field for 'Genres' section.\n"
|
||||
"Default: '%default'\n"
|
||||
"Applies to: AZW3, EPUB, MOBI output formats")),
|
||||
Option('--header-note-source-field',
|
||||
default='',
|
||||
dest='header_note_source_field',
|
||||
action=None,
|
||||
help=_("Custom field containing note text to insert in Description header.\n"
|
||||
"Default: '%default'\n"
|
||||
"Applies to: AZW3, EPUB, MOBI output formats")),
|
||||
Option('--merge-comments-rule',
|
||||
default='::',
|
||||
dest='merge_comments_rule',
|
||||
action=None,
|
||||
help=_("#<custom field>:[before|after]:[True|False] specifying:\n"
|
||||
" <custom field> Custom field containing notes to merge with Comments\n"
|
||||
" [before|after] Placement of notes with respect to Comments\n"
|
||||
" [True|False] - A horizontal rule is inserted between notes and Comments\n"
|
||||
"Default: '%default'\n"
|
||||
"Applies to: AZW3, EPUB, MOBI output formats")),
|
||||
Option('--output-profile',
|
||||
default=None,
|
||||
dest='output_profile',
|
||||
action=None,
|
||||
help=_("Specifies the output profile. In some cases, an output profile is required to optimize"
|
||||
" the catalog for the device. For example, 'kindle' or 'kindle_dx' creates a structured"
|
||||
" Table of Contents with Sections and Articles.\n"
|
||||
"Default: '%default'\n"
|
||||
"Applies to: AZW3, EPUB, MOBI output formats")),
|
||||
Option('--prefix-rules',
|
||||
default="(('Read books','tags','+','\u2713'),('Wishlist item','tags','Wishlist','\u00d7'))",
|
||||
dest='prefix_rules',
|
||||
action=None,
|
||||
help=_("Specifies the rules used to include prefixes indicating read books, wishlist items and other user-specified prefixes.\n"
|
||||
"The model for a prefix rule is ('<rule name>','<source field>','<pattern>','<prefix>').\n"
|
||||
"When multiple rules are defined, the first matching rule will be used.\n"
|
||||
"Default:\n" + '"' + '%default' + '"' + "\n"
|
||||
"Applies to: AZW3, EPUB, MOBI output formats")),
|
||||
Option('--preset',
|
||||
default=None,
|
||||
dest='preset',
|
||||
action=None,
|
||||
help=_("Use a named preset created with the GUI catalog builder.\n"
|
||||
"A preset specifies all settings for building a catalog.\n"
|
||||
"Default: '%default'\n"
|
||||
"Applies to: AZW3, EPUB, MOBI output formats")),
|
||||
Option('--use-existing-cover',
|
||||
default=False,
|
||||
dest='use_existing_cover',
|
||||
action='store_true',
|
||||
help=_("Replace existing cover when generating the catalog.\n"
|
||||
"Default: '%default'\n"
|
||||
"Applies to: AZW3, EPUB, MOBI output formats")),
|
||||
Option('--thumb-width',
|
||||
default='1.0',
|
||||
dest='thumb_width',
|
||||
action=None,
|
||||
help=_("Size hint (in inches) for book covers in catalog.\n"
|
||||
"Range: 1.0 - 2.0\n"
|
||||
"Default: '%default'\n"
|
||||
"Applies to: AZW3, EPUB, MOBI output formats")),
|
||||
]
|
||||
# }}}
|
||||
|
||||
def run(self, path_to_output, opts, db, notification=DummyReporter()):
|
||||
from calibre.library.catalogs.epub_mobi_builder import CatalogBuilder
|
||||
from calibre.utils.logging import default_log as log
|
||||
from calibre.utils.config import JSONConfig
|
||||
|
||||
# If preset specified from the cli, insert stored options from JSON file
|
||||
if hasattr(opts, 'preset') and opts.preset:
|
||||
available_presets = JSONConfig("catalog_presets")
|
||||
if opts.preset not in available_presets:
|
||||
if available_presets:
|
||||
print(_('Error: Preset "%s" not found.' % opts.preset))
|
||||
print(_('Stored presets: %s' % ', '.join([p for p in sorted(available_presets.keys())])))
|
||||
else:
|
||||
print(_('Error: No stored presets.'))
|
||||
return 1
|
||||
|
||||
# Copy the relevant preset values to the opts object
|
||||
for item in available_presets[opts.preset]:
|
||||
if item not in ['exclusion_rules_tw', 'format', 'prefix_rules_tw']:
|
||||
setattr(opts, item, available_presets[opts.preset][item])
|
||||
|
||||
# Provide an unconnected device
|
||||
opts.connected_device = {
|
||||
'is_device_connected': False,
|
||||
'kind': None,
|
||||
'name': None,
|
||||
'save_template': None,
|
||||
'serial': None,
|
||||
'storage': None,
|
||||
}
|
||||
|
||||
# Convert prefix_rules and exclusion_rules from JSON lists to tuples
|
||||
prs = []
|
||||
for rule in opts.prefix_rules:
|
||||
prs.append(tuple(rule))
|
||||
opts.prefix_rules = tuple(prs)
|
||||
|
||||
ers = []
|
||||
for rule in opts.exclusion_rules:
|
||||
ers.append(tuple(rule))
|
||||
opts.exclusion_rules = tuple(ers)
|
||||
|
||||
opts.log = log
|
||||
opts.fmt = self.fmt = path_to_output.rpartition('.')[2]
|
||||
|
||||
# Add local options
|
||||
opts.creator = '%s, %s %s, %s' % (strftime('%A'), strftime('%B'), strftime('%d').lstrip('0'), strftime('%Y'))
|
||||
opts.creator_sort_as = '%s %s' % ('calibre', strftime('%Y-%m-%d'))
|
||||
opts.connected_kindle = False
|
||||
|
||||
# Finalize output_profile
|
||||
op = opts.output_profile
|
||||
if op is None:
|
||||
op = 'default'
|
||||
|
||||
if opts.connected_device['name'] and 'kindle' in opts.connected_device['name'].lower():
|
||||
opts.connected_kindle = True
|
||||
if opts.connected_device['serial'] and \
|
||||
opts.connected_device['serial'][:4] in ['B004', 'B005']:
|
||||
op = "kindle_dx"
|
||||
else:
|
||||
op = "kindle"
|
||||
|
||||
opts.description_clip = 380 if op.endswith('dx') or 'kindle' not in op else 100
|
||||
opts.author_clip = 100 if op.endswith('dx') or 'kindle' not in op else 60
|
||||
opts.output_profile = op
|
||||
|
||||
opts.basename = "Catalog"
|
||||
opts.cli_environment = not hasattr(opts, 'sync')
|
||||
|
||||
# Hard-wired to always sort descriptions by author, with series after non-series
|
||||
opts.sort_descriptions_by_author = True
|
||||
|
||||
build_log = []
|
||||
|
||||
build_log.append("%s('%s'): Generating %s %sin %s environment, locale: '%s'" %
|
||||
(self.name,
|
||||
current_library_name(),
|
||||
self.fmt,
|
||||
'for %s ' % opts.output_profile if opts.output_profile else '',
|
||||
'CLI' if opts.cli_environment else 'GUI',
|
||||
calibre_langcode_to_name(canonicalize_lang(get_lang()), localize=False))
|
||||
)
|
||||
|
||||
# If exclude_genre is blank, assume user wants all tags as genres
|
||||
if opts.exclude_genre.strip() == '':
|
||||
# opts.exclude_genre = '\[^.\]'
|
||||
# build_log.append(" converting empty exclude_genre to '\[^.\]'")
|
||||
opts.exclude_genre = 'a^'
|
||||
build_log.append(" converting empty exclude_genre to 'a^'")
|
||||
if opts.connected_device['is_device_connected'] and \
|
||||
opts.connected_device['kind'] == 'device':
|
||||
if opts.connected_device['serial']:
|
||||
build_log.append(" connected_device: '%s' #%s%s " %
|
||||
(opts.connected_device['name'],
|
||||
opts.connected_device['serial'][0:4],
|
||||
'x' * (len(opts.connected_device['serial']) - 4)))
|
||||
for storage in opts.connected_device['storage']:
|
||||
if storage:
|
||||
build_log.append(" mount point: %s" % storage)
|
||||
else:
|
||||
build_log.append(" connected_device: '%s'" % opts.connected_device['name'])
|
||||
try:
|
||||
for storage in opts.connected_device['storage']:
|
||||
if storage:
|
||||
build_log.append(" mount point: %s" % storage)
|
||||
except:
|
||||
build_log.append(" (no mount points)")
|
||||
else:
|
||||
build_log.append(" connected_device: '%s'" % opts.connected_device['name'])
|
||||
|
||||
opts_dict = vars(opts)
|
||||
if opts_dict['ids']:
|
||||
build_log.append(" book count: %d" % len(opts_dict['ids']))
|
||||
|
||||
sections_list = []
|
||||
if opts.generate_authors:
|
||||
sections_list.append('Authors')
|
||||
if opts.generate_titles:
|
||||
sections_list.append('Titles')
|
||||
if opts.generate_series:
|
||||
sections_list.append('Series')
|
||||
if opts.generate_genres:
|
||||
sections_list.append('Genres')
|
||||
if opts.generate_recently_added:
|
||||
sections_list.append('Recently Added')
|
||||
if opts.generate_descriptions:
|
||||
sections_list.append('Descriptions')
|
||||
|
||||
if not sections_list:
|
||||
if opts.cli_environment:
|
||||
opts.log.warn('*** No Section switches specified, enabling all Sections ***')
|
||||
opts.generate_authors = True
|
||||
opts.generate_titles = True
|
||||
opts.generate_series = True
|
||||
opts.generate_genres = True
|
||||
opts.generate_recently_added = True
|
||||
opts.generate_descriptions = True
|
||||
sections_list = ['Authors', 'Titles', 'Series', 'Genres', 'Recently Added', 'Descriptions']
|
||||
else:
|
||||
opts.log.warn('\n*** No enabled Sections, terminating catalog generation ***')
|
||||
return ["No Included Sections", "No enabled Sections.\nCheck E-book options tab\n'Included sections'\n"]
|
||||
if opts.fmt == 'mobi' and sections_list == ['Descriptions']:
|
||||
warning = _("\n*** Adding 'By authors' section required for MOBI output ***")
|
||||
opts.log.warn(warning)
|
||||
sections_list.insert(0, 'Authors')
|
||||
opts.generate_authors = True
|
||||
|
||||
opts.log(" Sections: %s" % ', '.join(sections_list))
|
||||
opts.section_list = sections_list
|
||||
|
||||
# Limit thumb_width to 1.0" - 2.0"
|
||||
try:
|
||||
if float(opts.thumb_width) < float(self.THUMB_SMALLEST):
|
||||
log.warning("coercing thumb_width from '%s' to '%s'" % (opts.thumb_width, self.THUMB_SMALLEST))
|
||||
opts.thumb_width = self.THUMB_SMALLEST
|
||||
if float(opts.thumb_width) > float(self.THUMB_LARGEST):
|
||||
log.warning("coercing thumb_width from '%s' to '%s'" % (opts.thumb_width, self.THUMB_LARGEST))
|
||||
opts.thumb_width = self.THUMB_LARGEST
|
||||
opts.thumb_width = "%.2f" % float(opts.thumb_width)
|
||||
except:
|
||||
log.error("coercing thumb_width from '%s' to '%s'" % (opts.thumb_width, self.THUMB_SMALLEST))
|
||||
opts.thumb_width = "1.0"
|
||||
|
||||
# eval prefix_rules if passed from command line
|
||||
if type(opts.prefix_rules) is not tuple:
|
||||
try:
|
||||
opts.prefix_rules = eval(opts.prefix_rules)
|
||||
except:
|
||||
log.error("malformed --prefix-rules: %s" % opts.prefix_rules)
|
||||
raise
|
||||
for rule in opts.prefix_rules:
|
||||
if len(rule) != 4:
|
||||
log.error("incorrect number of args for --prefix-rules: %s" % repr(rule))
|
||||
|
||||
# eval exclusion_rules if passed from command line
|
||||
if type(opts.exclusion_rules) is not tuple:
|
||||
try:
|
||||
opts.exclusion_rules = eval(opts.exclusion_rules)
|
||||
except:
|
||||
log.error("malformed --exclusion-rules: %s" % opts.exclusion_rules)
|
||||
raise
|
||||
for rule in opts.exclusion_rules:
|
||||
if len(rule) != 3:
|
||||
log.error("incorrect number of args for --exclusion-rules: %s" % repr(rule))
|
||||
|
||||
# Display opts
|
||||
keys = sorted(opts_dict.keys())
|
||||
build_log.append(" opts:")
|
||||
for key in keys:
|
||||
if key in ['catalog_title', 'author_clip', 'connected_kindle', 'creator',
|
||||
'cross_reference_authors', 'description_clip', 'exclude_book_marker',
|
||||
'exclude_genre', 'exclude_tags', 'exclusion_rules', 'fmt',
|
||||
'genre_source_field', 'header_note_source_field', 'merge_comments_rule',
|
||||
'output_profile', 'prefix_rules', 'preset', 'read_book_marker',
|
||||
'search_text', 'sort_by', 'sort_descriptions_by_author', 'sync',
|
||||
'thumb_width', 'use_existing_cover', 'wishlist_tag']:
|
||||
build_log.append(" %s: %s" % (key, repr(opts_dict[key])))
|
||||
if opts.verbose:
|
||||
log('\n'.join(line for line in build_log))
|
||||
|
||||
# Capture start_time
|
||||
opts.start_time = time.time()
|
||||
|
||||
self.opts = opts
|
||||
|
||||
if opts.verbose:
|
||||
log.info(" Begin catalog source generation (%s)" %
|
||||
unicode_type(datetime.timedelta(seconds=int(time.time() - opts.start_time))))
|
||||
|
||||
# Launch the Catalog builder
|
||||
catalog = CatalogBuilder(db, opts, self, report_progress=notification)
|
||||
|
||||
try:
|
||||
catalog.build_sources()
|
||||
if opts.verbose:
|
||||
log.info(" Completed catalog source generation (%s)\n" %
|
||||
unicode_type(datetime.timedelta(seconds=int(time.time() - opts.start_time))))
|
||||
except (AuthorSortMismatchException, EmptyCatalogException) as e:
|
||||
log.error(" *** Terminated catalog generation: %s ***" % e)
|
||||
except:
|
||||
log.error(" unhandled exception in catalog generator")
|
||||
raise
|
||||
|
||||
else:
|
||||
recommendations = []
|
||||
recommendations.append(('remove_fake_margins', False,
|
||||
OptionRecommendation.HIGH))
|
||||
recommendations.append(('comments', '', OptionRecommendation.HIGH))
|
||||
|
||||
"""
|
||||
>>> Use to debug generated catalog code before pipeline conversion <<<
|
||||
"""
|
||||
GENERATE_DEBUG_EPUB = False
|
||||
if GENERATE_DEBUG_EPUB:
|
||||
catalog_debug_path = os.path.join(os.path.expanduser('~'), 'Desktop', 'Catalog debug')
|
||||
setattr(opts, 'debug_pipeline', os.path.expanduser(catalog_debug_path))
|
||||
|
||||
dp = getattr(opts, 'debug_pipeline', None)
|
||||
if dp is not None:
|
||||
recommendations.append(('debug_pipeline', dp,
|
||||
OptionRecommendation.HIGH))
|
||||
|
||||
if opts.output_profile and opts.output_profile.startswith("kindle"):
|
||||
recommendations.append(('output_profile', opts.output_profile,
|
||||
OptionRecommendation.HIGH))
|
||||
recommendations.append(('book_producer', opts.output_profile,
|
||||
OptionRecommendation.HIGH))
|
||||
if opts.fmt == 'mobi':
|
||||
recommendations.append(('no_inline_toc', True,
|
||||
OptionRecommendation.HIGH))
|
||||
recommendations.append(('verbose', 2,
|
||||
OptionRecommendation.HIGH))
|
||||
|
||||
# Use existing cover or generate new cover
|
||||
cpath = None
|
||||
existing_cover = False
|
||||
try:
|
||||
search_text = 'title:"%s" author:%s' % (
|
||||
opts.catalog_title.replace('"', '\\"'), 'calibre')
|
||||
matches = db.search(search_text, return_matches=True, sort_results=False)
|
||||
if matches:
|
||||
cpath = db.cover(matches[0], index_is_id=True, as_path=True)
|
||||
if cpath and os.path.exists(cpath):
|
||||
existing_cover = True
|
||||
except:
|
||||
pass
|
||||
|
||||
if self.opts.use_existing_cover and not existing_cover:
|
||||
log.warning("no existing catalog cover found")
|
||||
|
||||
if self.opts.use_existing_cover and existing_cover:
|
||||
recommendations.append(('cover', cpath, OptionRecommendation.HIGH))
|
||||
log.info("using existing catalog cover")
|
||||
else:
|
||||
from calibre.ebooks.covers import calibre_cover2
|
||||
log.info("replacing catalog cover")
|
||||
new_cover_path = PersistentTemporaryFile(suffix='.jpg')
|
||||
new_cover = calibre_cover2(opts.catalog_title, 'calibre')
|
||||
new_cover_path.write(new_cover)
|
||||
new_cover_path.close()
|
||||
recommendations.append(('cover', new_cover_path.name, OptionRecommendation.HIGH))
|
||||
|
||||
# Run ebook-convert
|
||||
from calibre.ebooks.conversion.plumber import Plumber
|
||||
plumber = Plumber(os.path.join(catalog.catalog_path, opts.basename + '.opf'),
|
||||
path_to_output, log, report_progress=notification,
|
||||
abort_after_input_dump=False)
|
||||
plumber.merge_ui_recommendations(recommendations)
|
||||
plumber.run()
|
||||
|
||||
try:
|
||||
os.remove(cpath)
|
||||
except:
|
||||
pass
|
||||
|
||||
if GENERATE_DEBUG_EPUB:
|
||||
from calibre.ebooks.epub import initialize_container
|
||||
from calibre.ebooks.tweak import zip_rebuilder
|
||||
from calibre.utils.zipfile import ZipFile
|
||||
input_path = os.path.join(catalog_debug_path, 'input')
|
||||
epub_shell = os.path.join(catalog_debug_path, 'epub_shell.zip')
|
||||
initialize_container(epub_shell, opf_name='content.opf')
|
||||
with ZipFile(epub_shell, 'r') as zf:
|
||||
zf.extractall(path=input_path)
|
||||
os.remove(epub_shell)
|
||||
zip_rebuilder(input_path, os.path.join(catalog_debug_path, 'input.epub'))
|
||||
|
||||
if opts.verbose:
|
||||
log.info(" Catalog creation complete (%s)\n" %
|
||||
unicode_type(datetime.timedelta(seconds=int(time.time() - opts.start_time))))
|
||||
|
||||
# returns to gui2.actions.catalog:catalog_generated()
|
||||
return catalog.error
|
||||
174
ebook_converter/library/comments.py
Normal file
174
ebook_converter/library/comments.py
Normal file
@@ -0,0 +1,174 @@
|
||||
#!/usr/bin/env python2
|
||||
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
|
||||
# License: GPLv3 Copyright: 2010, Kovid Goyal <kovid at kovidgoyal.net>
|
||||
|
||||
from __future__ import absolute_import, division, print_function, unicode_literals
|
||||
|
||||
import re
|
||||
|
||||
from calibre import prepare_string_for_xml
|
||||
from calibre.constants import preferred_encoding
|
||||
from calibre.ebooks.BeautifulSoup import (
|
||||
BeautifulSoup, CData, Comment, Declaration, NavigableString,
|
||||
ProcessingInstruction
|
||||
)
|
||||
from calibre.utils.html2text import html2text
|
||||
from polyglot.builtins import unicode_type
|
||||
|
||||
# Hackish - ignoring sentences ending or beginning in numbers to avoid
|
||||
# confusion with decimal points.
|
||||
lost_cr_pat = re.compile('([a-z])([\\.\\?!])([A-Z])')
|
||||
lost_cr_exception_pat = re.compile(r'(Ph\.D)|(D\.Phil)|((Dr|Mr|Mrs|Ms)\.[A-Z])')
|
||||
sanitize_pat = re.compile(r'<script|<table|<tr|<td|<th|<style|<iframe',
|
||||
re.IGNORECASE)
|
||||
|
||||
|
||||
def comments_to_html(comments):
|
||||
'''
|
||||
Convert random comment text to normalized, xml-legal block of <p>s
|
||||
'plain text' returns as
|
||||
<p>plain text</p>
|
||||
|
||||
'plain text with <i>minimal</i> <b>markup</b>' returns as
|
||||
<p>plain text with <i>minimal</i> <b>markup</b></p>
|
||||
|
||||
'<p>pre-formatted text</p> returns untouched
|
||||
|
||||
'A line of text\n\nFollowed by a line of text' returns as
|
||||
<p>A line of text</p>
|
||||
<p>Followed by a line of text</p>
|
||||
|
||||
'A line of text.\nA second line of text.\rA third line of text' returns as
|
||||
<p>A line of text.<br />A second line of text.<br />A third line of text.</p>
|
||||
|
||||
'...end of a paragraph.Somehow the break was lost...' returns as
|
||||
<p>...end of a paragraph.</p>
|
||||
<p>Somehow the break was lost...</p>
|
||||
|
||||
Deprecated HTML returns as HTML via BeautifulSoup()
|
||||
|
||||
'''
|
||||
if not comments:
|
||||
return u'<p></p>'
|
||||
if not isinstance(comments, unicode_type):
|
||||
comments = comments.decode(preferred_encoding, 'replace')
|
||||
|
||||
if comments.lstrip().startswith('<'):
|
||||
# Comment is already HTML do not mess with it
|
||||
return comments
|
||||
|
||||
if '<' not in comments:
|
||||
comments = prepare_string_for_xml(comments)
|
||||
parts = [u'<p class="description">%s</p>'%x.replace(u'\n', u'<br />')
|
||||
for x in comments.split('\n\n')]
|
||||
return '\n'.join(parts)
|
||||
|
||||
if sanitize_pat.search(comments) is not None:
|
||||
try:
|
||||
return sanitize_comments_html(comments)
|
||||
except:
|
||||
import traceback
|
||||
traceback.print_exc()
|
||||
return u'<p></p>'
|
||||
|
||||
# Explode lost CRs to \n\n
|
||||
comments = lost_cr_exception_pat.sub(lambda m: m.group().replace('.',
|
||||
'.\r'), comments)
|
||||
for lost_cr in lost_cr_pat.finditer(comments):
|
||||
comments = comments.replace(lost_cr.group(),
|
||||
'%s%s\n\n%s' % (lost_cr.group(1),
|
||||
lost_cr.group(2),
|
||||
lost_cr.group(3)))
|
||||
|
||||
comments = comments.replace(u'\r', u'')
|
||||
# Convert \n\n to <p>s
|
||||
comments = comments.replace(u'\n\n', u'<p>')
|
||||
# Convert solo returns to <br />
|
||||
comments = comments.replace(u'\n', '<br />')
|
||||
# Convert two hyphens to emdash
|
||||
comments = comments.replace('--', '—')
|
||||
|
||||
soup = BeautifulSoup('<div>' + comments + '</div>').find('div')
|
||||
result = BeautifulSoup('<div>')
|
||||
container = result.find('div')
|
||||
rtc = 0
|
||||
open_pTag = False
|
||||
|
||||
all_tokens = list(soup.contents)
|
||||
inline_tags = ('br', 'b', 'i', 'em', 'strong', 'span', 'font', 'a', 'hr')
|
||||
for token in all_tokens:
|
||||
if isinstance(token, (CData, Comment, Declaration, ProcessingInstruction)):
|
||||
continue
|
||||
if isinstance(token, NavigableString):
|
||||
if not open_pTag:
|
||||
pTag = result.new_tag('p')
|
||||
open_pTag = True
|
||||
ptc = 0
|
||||
pTag.insert(ptc, token)
|
||||
ptc += 1
|
||||
elif token.name in inline_tags:
|
||||
if not open_pTag:
|
||||
pTag = result.new_tag('p')
|
||||
open_pTag = True
|
||||
ptc = 0
|
||||
pTag.insert(ptc, token)
|
||||
ptc += 1
|
||||
else:
|
||||
if open_pTag:
|
||||
container.insert(rtc, pTag)
|
||||
rtc += 1
|
||||
open_pTag = False
|
||||
ptc = 0
|
||||
container.insert(rtc, token)
|
||||
rtc += 1
|
||||
|
||||
if open_pTag:
|
||||
container.insert(rtc, pTag)
|
||||
|
||||
for p in container.findAll('p'):
|
||||
p['class'] = 'description'
|
||||
|
||||
return container.decode_contents()
|
||||
|
||||
|
||||
def markdown(val):
|
||||
try:
|
||||
md = markdown.Markdown
|
||||
except AttributeError:
|
||||
from calibre.ebooks.markdown import Markdown
|
||||
md = markdown.Markdown = Markdown()
|
||||
return md.convert(val)
|
||||
|
||||
|
||||
def merge_comments(one, two):
|
||||
return comments_to_html(one) + '\n\n' + comments_to_html(two)
|
||||
|
||||
|
||||
def sanitize_comments_html(html):
|
||||
from calibre.ebooks.markdown import Markdown
|
||||
text = html2text(html)
|
||||
md = Markdown()
|
||||
html = md.convert(text)
|
||||
return html
|
||||
|
||||
|
||||
def find_tests():
|
||||
import unittest
|
||||
|
||||
class Test(unittest.TestCase):
|
||||
|
||||
def test_comments_to_html(self):
|
||||
for pat, val in [
|
||||
(b'lineone\n\nlinetwo',
|
||||
'<p class="description">lineone</p>\n<p class="description">linetwo</p>'),
|
||||
|
||||
('a <b>b&c</b>\nf',
|
||||
'<p class="description">a <b>b&c</b><br/>f</p>'),
|
||||
|
||||
('a <?xml asd> b\n\ncd',
|
||||
'<p class="description">a b</p><p class="description">cd</p>'),
|
||||
]:
|
||||
cval = comments_to_html(pat)
|
||||
self.assertEqual(cval, val)
|
||||
|
||||
return unittest.defaultTestLoader.loadTestsFromTestCase(Test)
|
||||
702
ebook_converter/library/field_metadata.py
Normal file
702
ebook_converter/library/field_metadata.py
Normal file
@@ -0,0 +1,702 @@
|
||||
from __future__ import absolute_import, division, print_function, unicode_literals
|
||||
|
||||
'''
|
||||
Created on 25 May 2010
|
||||
|
||||
@author: charles
|
||||
'''
|
||||
|
||||
import traceback
|
||||
from collections import OrderedDict
|
||||
|
||||
from calibre.utils.config_base import tweaks
|
||||
from polyglot.builtins import iteritems, itervalues
|
||||
|
||||
category_icon_map = {
|
||||
'authors' : 'user_profile.png',
|
||||
'series' : 'series.png',
|
||||
'formats' : 'book.png',
|
||||
'publisher' : 'publisher.png',
|
||||
'rating' : 'rating.png',
|
||||
'news' : 'news.png',
|
||||
'tags' : 'tags.png',
|
||||
'custom:' : 'column.png',
|
||||
'user:' : 'tb_folder.png',
|
||||
'search' : 'search.png',
|
||||
'identifiers': 'identifiers.png',
|
||||
'gst' : 'catalog.png',
|
||||
'languages' : 'languages.png',
|
||||
}
|
||||
|
||||
# Builtin metadata {{{
|
||||
|
||||
|
||||
def _builtin_field_metadata():
|
||||
# This is a function so that changing the UI language allows newly created
|
||||
# field metadata objects to have correctly translated labels for builtin
|
||||
# fields.
|
||||
return [
|
||||
('authors', {'table':'authors',
|
||||
'column':'name',
|
||||
'link_column':'author',
|
||||
'category_sort':'sort',
|
||||
'datatype':'text',
|
||||
'is_multiple':{'cache_to_list': ',',
|
||||
'ui_to_list': '&',
|
||||
'list_to_ui': ' & '},
|
||||
'kind':'field',
|
||||
'name':_('Authors'),
|
||||
'search_terms':['authors', 'author'],
|
||||
'is_custom':False,
|
||||
'is_category':True,
|
||||
'is_csp': False}),
|
||||
('languages', {'table':'languages',
|
||||
'column':'lang_code',
|
||||
'link_column':'lang_code',
|
||||
'category_sort':'lang_code',
|
||||
'datatype':'text',
|
||||
'is_multiple':{'cache_to_list': ',',
|
||||
'ui_to_list': ',',
|
||||
'list_to_ui': ', '},
|
||||
'kind':'field',
|
||||
'name':_('Languages'),
|
||||
'search_terms':['languages', 'language'],
|
||||
'is_custom':False,
|
||||
'is_category':True,
|
||||
'is_csp': False}),
|
||||
|
||||
('series', {'table':'series',
|
||||
'column':'name',
|
||||
'link_column':'series',
|
||||
'category_sort':'(title_sort(name))',
|
||||
'datatype':'series',
|
||||
'is_multiple':{},
|
||||
'kind':'field',
|
||||
'name':ngettext('Series', 'Series', 1),
|
||||
'search_terms':['series'],
|
||||
'is_custom':False,
|
||||
'is_category':True,
|
||||
'is_csp': False}),
|
||||
('formats', {'table':None,
|
||||
'column':None,
|
||||
'datatype':'text',
|
||||
'is_multiple':{'cache_to_list': ',',
|
||||
'ui_to_list': ',',
|
||||
'list_to_ui': ', '},
|
||||
'kind':'field',
|
||||
'name':_('Formats'),
|
||||
'search_terms':['formats', 'format'],
|
||||
'is_custom':False,
|
||||
'is_category':True,
|
||||
'is_csp': False}),
|
||||
('publisher', {'table':'publishers',
|
||||
'column':'name',
|
||||
'link_column':'publisher',
|
||||
'category_sort':'name',
|
||||
'datatype':'text',
|
||||
'is_multiple':{},
|
||||
'kind':'field',
|
||||
'name':_('Publisher'),
|
||||
'search_terms':['publisher'],
|
||||
'is_custom':False,
|
||||
'is_category':True,
|
||||
'is_csp': False}),
|
||||
('rating', {'table':'ratings',
|
||||
'column':'rating',
|
||||
'link_column':'rating',
|
||||
'category_sort':'rating',
|
||||
'datatype':'rating',
|
||||
'is_multiple':{},
|
||||
'kind':'field',
|
||||
'name':_('Rating'),
|
||||
'search_terms':['rating'],
|
||||
'is_custom':False,
|
||||
'is_category':True,
|
||||
'is_csp': False}),
|
||||
('news', {'table':'news',
|
||||
'column':'name',
|
||||
'category_sort':'name',
|
||||
'datatype':None,
|
||||
'is_multiple':{},
|
||||
'kind':'category',
|
||||
'name':_('News'),
|
||||
'search_terms':[],
|
||||
'is_custom':False,
|
||||
'is_category':True,
|
||||
'is_csp': False}),
|
||||
('tags', {'table':'tags',
|
||||
'column':'name',
|
||||
'link_column': 'tag',
|
||||
'category_sort':'name',
|
||||
'datatype':'text',
|
||||
'is_multiple':{'cache_to_list': ',',
|
||||
'ui_to_list': ',',
|
||||
'list_to_ui': ', '},
|
||||
'kind':'field',
|
||||
'name':_('Tags'),
|
||||
'search_terms':['tags', 'tag'],
|
||||
'is_custom':False,
|
||||
'is_category':True,
|
||||
'is_csp': False}),
|
||||
('identifiers', {'table':None,
|
||||
'column':None,
|
||||
'datatype':'text',
|
||||
'is_multiple':{'cache_to_list': ',',
|
||||
'ui_to_list': ',',
|
||||
'list_to_ui': ', '},
|
||||
'kind':'field',
|
||||
'name':_('Identifiers'),
|
||||
'search_terms':['identifiers', 'identifier', 'isbn'],
|
||||
'is_custom':False,
|
||||
'is_category':True,
|
||||
'is_csp': True}),
|
||||
('author_sort',{'table':None,
|
||||
'column':None,
|
||||
'datatype':'text',
|
||||
'is_multiple':{},
|
||||
'kind':'field',
|
||||
'name':_('Author sort'),
|
||||
'search_terms':['author_sort'],
|
||||
'is_custom':False,
|
||||
'is_category':False,
|
||||
'is_csp': False}),
|
||||
('au_map', {'table':None,
|
||||
'column':None,
|
||||
'datatype':'text',
|
||||
'is_multiple':{'cache_to_list': ',',
|
||||
'ui_to_list': None,
|
||||
'list_to_ui': None},
|
||||
'kind':'field',
|
||||
'name':None,
|
||||
'search_terms':[],
|
||||
'is_custom':False,
|
||||
'is_category':False,
|
||||
'is_csp': False}),
|
||||
('comments', {'table':None,
|
||||
'column':None,
|
||||
'datatype':'text',
|
||||
'is_multiple':{},
|
||||
'kind':'field',
|
||||
'name':_('Comments'),
|
||||
'search_terms':['comments', 'comment'],
|
||||
'is_custom':False,
|
||||
'is_category':False,
|
||||
'is_csp': False}),
|
||||
('cover', {'table':None,
|
||||
'column':None,
|
||||
'datatype':'int',
|
||||
'is_multiple':{},
|
||||
'kind':'field',
|
||||
'name':_('Cover'),
|
||||
'search_terms':['cover'],
|
||||
'is_custom':False,
|
||||
'is_category':False,
|
||||
'is_csp': False}),
|
||||
('id', {'table':None,
|
||||
'column':None,
|
||||
'datatype':'int',
|
||||
'is_multiple':{},
|
||||
'kind':'field',
|
||||
'name':None,
|
||||
'search_terms':['id'],
|
||||
'is_custom':False,
|
||||
'is_category':False,
|
||||
'is_csp': False}),
|
||||
('last_modified', {'table':None,
|
||||
'column':None,
|
||||
'datatype':'datetime',
|
||||
'is_multiple':{},
|
||||
'kind':'field',
|
||||
'name':_('Modified'),
|
||||
'search_terms':['last_modified'],
|
||||
'is_custom':False,
|
||||
'is_category':False,
|
||||
'is_csp': False}),
|
||||
('ondevice', {'table':None,
|
||||
'column':None,
|
||||
'datatype':'text',
|
||||
'is_multiple':{},
|
||||
'kind':'field',
|
||||
'name':_('On device'),
|
||||
'search_terms':['ondevice'],
|
||||
'is_custom':False,
|
||||
'is_category':False,
|
||||
'is_csp': False}),
|
||||
('path', {'table':None,
|
||||
'column':None,
|
||||
'datatype':'text',
|
||||
'is_multiple':{},
|
||||
'kind':'field',
|
||||
'name':_('Path'),
|
||||
'search_terms':[],
|
||||
'is_custom':False,
|
||||
'is_category':False,
|
||||
'is_csp': False}),
|
||||
('pubdate', {'table':None,
|
||||
'column':None,
|
||||
'datatype':'datetime',
|
||||
'is_multiple':{},
|
||||
'kind':'field',
|
||||
'name':_('Published'),
|
||||
'search_terms':['pubdate'],
|
||||
'is_custom':False,
|
||||
'is_category':False,
|
||||
'is_csp': False}),
|
||||
('marked', {'table':None,
|
||||
'column':None,
|
||||
'datatype':'text',
|
||||
'is_multiple':{},
|
||||
'kind':'field',
|
||||
'name': None,
|
||||
'search_terms':['marked'],
|
||||
'is_custom':False,
|
||||
'is_category':False,
|
||||
'is_csp': False}),
|
||||
('series_index',{'table':None,
|
||||
'column':None,
|
||||
'datatype':'float',
|
||||
'is_multiple':{},
|
||||
'kind':'field',
|
||||
'name':None,
|
||||
'search_terms':['series_index'],
|
||||
'is_custom':False,
|
||||
'is_category':False,
|
||||
'is_csp': False}),
|
||||
('series_sort', {'table':None,
|
||||
'column':None,
|
||||
'datatype':'text',
|
||||
'is_multiple':{},
|
||||
'kind':'field',
|
||||
'name':_('Series sort'),
|
||||
'search_terms':['series_sort'],
|
||||
'is_custom':False,
|
||||
'is_category':False,
|
||||
'is_csp': False}),
|
||||
('sort', {'table':None,
|
||||
'column':None,
|
||||
'datatype':'text',
|
||||
'is_multiple':{},
|
||||
'kind':'field',
|
||||
'name':_('Title sort'),
|
||||
'search_terms':['title_sort'],
|
||||
'is_custom':False,
|
||||
'is_category':False,
|
||||
'is_csp': False}),
|
||||
('size', {'table':None,
|
||||
'column':None,
|
||||
'datatype':'float',
|
||||
'is_multiple':{},
|
||||
'kind':'field',
|
||||
'name':_('Size'),
|
||||
'search_terms':['size'],
|
||||
'is_custom':False,
|
||||
'is_category':False,
|
||||
'is_csp': False}),
|
||||
('timestamp', {'table':None,
|
||||
'column':None,
|
||||
'datatype':'datetime',
|
||||
'is_multiple':{},
|
||||
'kind':'field',
|
||||
'name':_('Date'),
|
||||
'search_terms':['date'],
|
||||
'is_custom':False,
|
||||
'is_category':False,
|
||||
'is_csp': False}),
|
||||
('title', {'table':None,
|
||||
'column':None,
|
||||
'datatype':'text',
|
||||
'is_multiple':{},
|
||||
'kind':'field',
|
||||
'name':_('Title'),
|
||||
'search_terms':['title'],
|
||||
'is_custom':False,
|
||||
'is_category':False,
|
||||
'is_csp': False}),
|
||||
('uuid', {'table':None,
|
||||
'column':None,
|
||||
'datatype':'text',
|
||||
'is_multiple':{},
|
||||
'kind':'field',
|
||||
'name':None,
|
||||
'search_terms':['uuid'],
|
||||
'is_custom':False,
|
||||
'is_category':False,
|
||||
'is_csp': False}),
|
||||
]
|
||||
# }}}
|
||||
|
||||
|
||||
class FieldMetadata(object):
|
||||
'''
|
||||
key: the key to the dictionary is:
|
||||
- for standard fields, the metadata field name.
|
||||
- for custom fields, the metadata field name prefixed by '#'
|
||||
This is done to create two 'namespaces' so the names don't clash
|
||||
|
||||
label: the actual column label. No prefixing.
|
||||
|
||||
datatype: the type of information in the field. Valid values are listed in
|
||||
VALID_DATA_TYPES below.
|
||||
is_multiple: valid for the text datatype. If {}, the field is to be
|
||||
treated as a single term. If not None, it contains a dict of the form
|
||||
{'cache_to_list': ',',
|
||||
'ui_to_list': ',',
|
||||
'list_to_ui': ', '}
|
||||
where the cache_to_list contains the character used to split the value in
|
||||
the meta2 table, ui_to_list contains the character used to create a list
|
||||
from a value shown in the ui (each resulting value must be strip()ed and
|
||||
empty values removed), and list_to_ui contains the string used in join()
|
||||
to create a displayable string from the list.
|
||||
|
||||
kind == field: is a db field.
|
||||
kind == category: standard tag category that isn't a field. see news.
|
||||
kind == user: user-defined tag category.
|
||||
kind == search: saved-searches category.
|
||||
|
||||
is_category: is a tag browser category. If true, then:
|
||||
table: name of the db table used to construct item list
|
||||
column: name of the column in the normalized table to join on
|
||||
link_column: name of the column in the connection table to join on. This
|
||||
key should not be present if there is no link table
|
||||
category_sort: the field in the normalized table to sort on. This
|
||||
key must be present if is_category is True
|
||||
If these are None, then the category constructor must know how
|
||||
to build the item list (e.g., formats, news).
|
||||
The order below is the order that the categories will
|
||||
appear in the tags pane.
|
||||
|
||||
name: the text that is to be used when displaying the field. Column headings
|
||||
in the GUI, etc.
|
||||
|
||||
search_terms: the terms that can be used to identify the field when
|
||||
searching. They can be thought of as aliases for metadata keys, but are only
|
||||
valid when passed to search().
|
||||
|
||||
is_custom: the field has been added by the user.
|
||||
|
||||
rec_index: the index of the field in the db metadata record.
|
||||
|
||||
is_csp: field contains colon-separated pairs. Must also be text, is_multiple
|
||||
|
||||
'''
|
||||
|
||||
VALID_DATA_TYPES = frozenset([None, 'rating', 'text', 'comments', 'datetime',
|
||||
'int', 'float', 'bool', 'series', 'composite', 'enumeration'])
|
||||
|
||||
# search labels that are not db columns
|
||||
search_items = ['all', 'search', 'vl']
|
||||
__calibre_serializable__ = True
|
||||
|
||||
def __init__(self):
|
||||
self._field_metadata = _builtin_field_metadata()
|
||||
self._tb_cats = OrderedDict()
|
||||
self._tb_custom_fields = {}
|
||||
self._search_term_map = {}
|
||||
self.custom_label_to_key_map = {}
|
||||
for k,v in self._field_metadata:
|
||||
if v['kind'] == 'field' and v['datatype'] not in self.VALID_DATA_TYPES:
|
||||
raise ValueError('Unknown datatype %s for field %s'%(v['datatype'], k))
|
||||
self._tb_cats[k] = v
|
||||
self._tb_cats[k]['label'] = k
|
||||
self._tb_cats[k]['display'] = {}
|
||||
self._tb_cats[k]['is_editable'] = True
|
||||
self._add_search_terms_to_map(k, v['search_terms'])
|
||||
self._tb_cats['timestamp']['display'] = {
|
||||
'date_format': tweaks['gui_timestamp_display_format']}
|
||||
self._tb_cats['pubdate']['display'] = {
|
||||
'date_format': tweaks['gui_pubdate_display_format']}
|
||||
self._tb_cats['last_modified']['display'] = {
|
||||
'date_format': tweaks['gui_last_modified_display_format']}
|
||||
self.custom_field_prefix = '#'
|
||||
self.get = self._tb_cats.get
|
||||
|
||||
def __getitem__(self, key):
|
||||
if key == 'title_sort':
|
||||
return self._tb_cats['sort']
|
||||
return self._tb_cats[key]
|
||||
|
||||
def __setitem__(self, key, val):
|
||||
raise AttributeError('Assigning to this object is forbidden')
|
||||
|
||||
def __delitem__(self, key):
|
||||
del self._tb_cats[key]
|
||||
|
||||
def __iter__(self):
|
||||
for key in self._tb_cats:
|
||||
yield key
|
||||
|
||||
def __contains__(self, key):
|
||||
return key in self._tb_cats or key == 'title_sort'
|
||||
|
||||
def has_key(self, key):
|
||||
return key in self
|
||||
|
||||
def keys(self):
|
||||
return list(self._tb_cats.keys())
|
||||
|
||||
def __eq__(self, other):
|
||||
if not isinstance(other, FieldMetadata):
|
||||
return False
|
||||
for attr in ('_tb_custom_fields', '_search_term_map', 'custom_label_to_key_map', 'custom_field_prefix'):
|
||||
if getattr(self, attr) != getattr(other, attr):
|
||||
return False
|
||||
return dict(self._tb_cats) == dict(other._tb_cats)
|
||||
|
||||
def __ne__(self, other):
|
||||
return not self.__eq__(other)
|
||||
|
||||
def sortable_field_keys(self):
|
||||
return [k for k in self._tb_cats.keys()
|
||||
if self._tb_cats[k]['kind']=='field' and
|
||||
self._tb_cats[k]['datatype'] is not None]
|
||||
|
||||
def ui_sortable_field_keys(self):
|
||||
ans = {k:self._tb_cats[k]['name'] for k in set(self.sortable_field_keys()) - {
|
||||
'sort', 'author_sort', 'au_map', 'series_sort', 'marked',
|
||||
'series_index', 'path', 'formats', 'identifiers', 'uuid',
|
||||
'comments',
|
||||
} if self._tb_cats[k]['name']}
|
||||
ans['cover'] = _('Has cover')
|
||||
return ans
|
||||
|
||||
def displayable_field_keys(self):
|
||||
return [k for k in self._tb_cats.keys()
|
||||
if self._tb_cats[k]['kind']=='field' and
|
||||
self._tb_cats[k]['datatype'] is not None and
|
||||
k not in ('au_map', 'marked', 'ondevice', 'cover', 'series_sort') and
|
||||
not self.is_series_index(k)]
|
||||
|
||||
def standard_field_keys(self):
|
||||
return [k for k in self._tb_cats.keys()
|
||||
if self._tb_cats[k]['kind']=='field' and
|
||||
not self._tb_cats[k]['is_custom']]
|
||||
|
||||
def custom_field_keys(self, include_composites=True):
|
||||
res = []
|
||||
for k in self._tb_cats.keys():
|
||||
fm = self._tb_cats[k]
|
||||
if fm['kind']=='field' and fm['is_custom'] and \
|
||||
(fm['datatype'] != 'composite' or include_composites):
|
||||
res.append(k)
|
||||
return res
|
||||
|
||||
def all_field_keys(self):
|
||||
return [k for k in self._tb_cats.keys() if self._tb_cats[k]['kind']=='field']
|
||||
|
||||
def iterkeys(self):
|
||||
for key in self._tb_cats:
|
||||
yield key
|
||||
|
||||
def itervalues(self):
|
||||
return itervalues(self._tb_cats)
|
||||
|
||||
def values(self):
|
||||
return list(self._tb_cats.values())
|
||||
|
||||
def iteritems(self):
|
||||
for key in self._tb_cats:
|
||||
yield (key, self._tb_cats[key])
|
||||
iter_items = iteritems
|
||||
|
||||
def custom_iteritems(self):
|
||||
for key, meta in iteritems(self._tb_custom_fields):
|
||||
yield (key, meta)
|
||||
|
||||
def items(self):
|
||||
return list(self.iter_items())
|
||||
|
||||
def is_custom_field(self, key):
|
||||
return key.startswith(self.custom_field_prefix)
|
||||
|
||||
def is_ignorable_field(self, key):
|
||||
'Custom fields and user categories are ignorable'
|
||||
return self.is_custom_field(key) or key.startswith('@')
|
||||
|
||||
def ignorable_field_keys(self):
|
||||
return [k for k in self._tb_cats if self.is_ignorable_field(k)]
|
||||
|
||||
def is_series_index(self, key):
|
||||
try:
|
||||
m = self._tb_cats[key]
|
||||
return (m['datatype'] == 'float' and key.endswith('_index') and
|
||||
key[:-6] in self._tb_cats)
|
||||
except (KeyError, ValueError, TypeError, AttributeError):
|
||||
return False
|
||||
|
||||
def key_to_label(self, key):
|
||||
if 'label' not in self._tb_cats[key]:
|
||||
return key
|
||||
return self._tb_cats[key]['label']
|
||||
|
||||
def label_to_key(self, label, prefer_custom=False):
|
||||
if prefer_custom:
|
||||
if label in self.custom_label_to_key_map:
|
||||
return self.custom_label_to_key_map[label]
|
||||
if 'label' in self._tb_cats:
|
||||
return label
|
||||
if not prefer_custom:
|
||||
if label in self.custom_label_to_key_map:
|
||||
return self.custom_label_to_key_map[label]
|
||||
raise ValueError('Unknown key [%s]'%(label))
|
||||
|
||||
def all_metadata(self):
|
||||
l = {}
|
||||
for k in self._tb_cats:
|
||||
l[k] = self._tb_cats[k]
|
||||
return l
|
||||
|
||||
def custom_field_metadata(self, include_composites=True):
|
||||
if include_composites:
|
||||
return self._tb_custom_fields
|
||||
l = {}
|
||||
for k in self.custom_field_keys(include_composites):
|
||||
l[k] = self._tb_cats[k]
|
||||
return l
|
||||
|
||||
def add_custom_field(self, label, table, column, datatype, colnum, name,
|
||||
display, is_editable, is_multiple, is_category,
|
||||
is_csp=False):
|
||||
key = self.custom_field_prefix + label
|
||||
if key in self._tb_cats:
|
||||
raise ValueError('Duplicate custom field [%s]'%(label))
|
||||
if datatype not in self.VALID_DATA_TYPES:
|
||||
raise ValueError('Unknown datatype %s for field %s'%(datatype, key))
|
||||
self._tb_cats[key] = {'table':table, 'column':column,
|
||||
'datatype':datatype, 'is_multiple':is_multiple,
|
||||
'kind':'field', 'name':name,
|
||||
'search_terms':[key], 'label':label,
|
||||
'colnum':colnum, 'display':display,
|
||||
'is_custom':True, 'is_category':is_category,
|
||||
'link_column':'value','category_sort':'value',
|
||||
'is_csp' : is_csp, 'is_editable': is_editable,}
|
||||
self._tb_custom_fields[key] = self._tb_cats[key]
|
||||
self._add_search_terms_to_map(key, [key])
|
||||
self.custom_label_to_key_map[label] = key
|
||||
if datatype == 'series':
|
||||
key += '_index'
|
||||
self._tb_cats[key] = {'table':None, 'column':None,
|
||||
'datatype':'float', 'is_multiple':{},
|
||||
'kind':'field', 'name':'',
|
||||
'search_terms':[key], 'label':label+'_index',
|
||||
'colnum':None, 'display':{},
|
||||
'is_custom':False, 'is_category':False,
|
||||
'link_column':None, 'category_sort':None,
|
||||
'is_editable': False, 'is_csp': False}
|
||||
self._add_search_terms_to_map(key, [key])
|
||||
self.custom_label_to_key_map[label+'_index'] = key
|
||||
|
||||
def remove_dynamic_categories(self):
|
||||
for key in list(self._tb_cats.keys()):
|
||||
val = self._tb_cats[key]
|
||||
if val['is_category'] and val['kind'] in ('user', 'search'):
|
||||
for k in self._tb_cats[key]['search_terms']:
|
||||
if k in self._search_term_map:
|
||||
del self._search_term_map[k]
|
||||
del self._tb_cats[key]
|
||||
|
||||
def remove_user_categories(self):
|
||||
for key in list(self._tb_cats.keys()):
|
||||
val = self._tb_cats[key]
|
||||
if val['is_category'] and val['kind'] == 'user':
|
||||
for k in self._tb_cats[key]['search_terms']:
|
||||
if k in self._search_term_map:
|
||||
del self._search_term_map[k]
|
||||
del self._tb_cats[key]
|
||||
|
||||
def _remove_grouped_search_terms(self):
|
||||
to_remove = [v for v in self._search_term_map
|
||||
if isinstance(self._search_term_map[v], list)]
|
||||
for v in to_remove:
|
||||
del self._search_term_map[v]
|
||||
|
||||
def add_grouped_search_terms(self, gst):
|
||||
self._remove_grouped_search_terms()
|
||||
for t in gst:
|
||||
try:
|
||||
self._add_search_terms_to_map(gst[t], [t])
|
||||
except ValueError:
|
||||
traceback.print_exc()
|
||||
|
||||
def cc_series_index_column_for(self, key):
|
||||
return self._tb_cats[key]['rec_index'] + 1
|
||||
|
||||
def add_user_category(self, label, name):
|
||||
if label in self._tb_cats:
|
||||
raise ValueError('Duplicate user field [%s]'%(label))
|
||||
st = [label]
|
||||
if icu_lower(label) != label:
|
||||
st.append(icu_lower(label))
|
||||
self._tb_cats[label] = {'table':None, 'column':None,
|
||||
'datatype':None, 'is_multiple':{},
|
||||
'kind':'user', 'name':name,
|
||||
'search_terms':st, 'is_custom':False,
|
||||
'is_category':True, 'is_csp': False}
|
||||
self._add_search_terms_to_map(label, st)
|
||||
|
||||
def add_search_category(self, label, name, fail_on_existing=True):
|
||||
if label in self._tb_cats:
|
||||
if not fail_on_existing:
|
||||
return
|
||||
raise ValueError('Duplicate user field [%s]'%(label))
|
||||
self._tb_cats[label] = {'table':None, 'column':None,
|
||||
'datatype':None, 'is_multiple':{},
|
||||
'kind':'search', 'name':name,
|
||||
'search_terms':[], 'is_custom':False,
|
||||
'is_category':True, 'is_csp': False}
|
||||
|
||||
def set_field_record_index(self, label, index, prefer_custom=False):
|
||||
if prefer_custom:
|
||||
key = self.custom_field_prefix+label
|
||||
if key not in self._tb_cats:
|
||||
key = label
|
||||
else:
|
||||
if label in self._tb_cats:
|
||||
key = label
|
||||
else:
|
||||
key = self.custom_field_prefix+label
|
||||
self._tb_cats[key]['rec_index'] = index # let the exception fly ...
|
||||
|
||||
def get_search_terms(self):
|
||||
s_keys = sorted(self._search_term_map.keys())
|
||||
for v in self.search_items:
|
||||
s_keys.append(v)
|
||||
return s_keys
|
||||
|
||||
def _add_search_terms_to_map(self, key, terms):
|
||||
if terms is not None:
|
||||
for t in terms:
|
||||
if t in self._search_term_map:
|
||||
raise ValueError('Attempt to add duplicate search term "%s"'%t)
|
||||
self._search_term_map[t] = key
|
||||
|
||||
def search_term_to_field_key(self, term):
|
||||
return self._search_term_map.get(term, term)
|
||||
|
||||
def searchable_fields(self):
|
||||
return [k for k in self._tb_cats.keys()
|
||||
if self._tb_cats[k]['kind']=='field' and
|
||||
len(self._tb_cats[k]['search_terms']) > 0]
|
||||
|
||||
|
||||
# The following two methods are to support serialization
|
||||
# Note that they do not create copies of internal structures, for performance,
|
||||
# so they are not safe to use for anything else
|
||||
def fm_as_dict(self):
|
||||
return {
|
||||
'custom_fields': self._tb_custom_fields,
|
||||
'search_term_map': self._search_term_map,
|
||||
'custom_label_to_key_map': self.custom_label_to_key_map,
|
||||
'user_categories': {k:v for k, v in iteritems(self._tb_cats) if v['kind'] == 'user'},
|
||||
'search_categories': {k:v for k, v in iteritems(self._tb_cats) if v['kind'] == 'search'},
|
||||
}
|
||||
|
||||
|
||||
def fm_from_dict(src):
|
||||
ans = FieldMetadata()
|
||||
ans._tb_custom_fields = src['custom_fields']
|
||||
ans._search_term_map = src['search_term_map']
|
||||
ans.custom_label_to_key_map = src['custom_label_to_key_map']
|
||||
for q in ('custom_fields', 'user_categories', 'search_categories'):
|
||||
for k, v in iteritems(src[q]):
|
||||
ans._tb_cats[k] = v
|
||||
return ans
|
||||
Reference in New Issue
Block a user