Initial import

2026-04-04 20:03:34 +02:00 · 2020-03-31 17:15:23 +02:00
commit d97ea9b0bc
311 changed files with 131419 additions and 0 deletions
--- a/ebook_converter/library/init.py
+++ b/ebook_converter/library/init.py
@@ -0,0 +1,87 @@
+from __future__ import absolute_import, division, print_function, unicode_literals
+
+__license__   = 'GPL v3'
+__copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
+
+''' Code to manage ebook library'''
+
+
+import os
+from polyglot.builtins import range
+
+
+def db(path=None, read_only=False):
+    from calibre.db.legacy import LibraryDatabase
+    from calibre.utils.config import prefs
+    return LibraryDatabase(os.path.expanduser(path) if path else prefs['library_path'],
+            read_only=read_only)
+
+
+def generate_test_db(library_path,  # {{{
+        num_of_records=20000,
+        num_of_authors=6000,
+        num_of_tags=10000,
+        tag_length=7,
+        author_length=7,
+        title_length=10,
+        max_authors=10,
+        max_tags=10
+        ):
+    import random, string, os, sys, time
+    from calibre.constants import preferred_encoding
+
+    if not os.path.exists(library_path):
+        os.makedirs(library_path)
+
+    letters = string.letters.decode(preferred_encoding)
+
+    def randstr(length):
+        return ''.join(random.choice(letters) for i in
+                range(length))
+
+    all_tags = [randstr(tag_length) for j in range(num_of_tags)]
+    print('Generated', num_of_tags, 'tags')
+    all_authors = [randstr(author_length) for j in range(num_of_authors)]
+    print('Generated', num_of_authors, 'authors')
+    all_titles = [randstr(title_length) for j in range(num_of_records)]
+    print('Generated', num_of_records, 'titles')
+
+    testdb = db(library_path)
+
+    print('Creating', num_of_records, 'records...')
+
+    start = time.time()
+
+    for i, title in enumerate(all_titles):
+        print(i+1, end=' ')
+        sys.stdout.flush()
+        authors = random.randint(1, max_authors)
+        authors = [random.choice(all_authors) for i in range(authors)]
+        tags = random.randint(0, max_tags)
+        tags = [random.choice(all_tags) for i in range(tags)]
+        from calibre.ebooks.metadata.book.base import Metadata
+        mi = Metadata(title, authors)
+        mi.tags = tags
+        testdb.import_book(mi, [])
+
+    t = time.time() - start
+    print('\nGenerated', num_of_records, 'records in:', t, 'seconds')
+    print('Time per record:', t/num_of_records)
+# }}}
+
+
+def current_library_path():
+    from calibre.utils.config import prefs
+    path = prefs['library_path']
+    if path:
+        path = path.replace('\\', '/')
+        while path.endswith('/'):
+            path = path[:-1]
+        return path
+
+
+def current_library_name():
+    import posixpath
+    path = current_library_path()
+    if path:
+        return posixpath.basename(path)
--- a/ebook_converter/library/catalogs/init.py
+++ b/ebook_converter/library/catalogs/init.py
@@ -0,0 +1,30 @@
+#!/usr/bin/env python2
+# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
+from __future__ import absolute_import, division, print_function, unicode_literals
+
+__license__   = 'GPL v3'
+__copyright__ = '2012, Kovid Goyal <kovid@kovidgoyal.net>'
+__docformat__ = 'restructuredtext en'
+
+
+FIELDS = ['all', 'title', 'title_sort', 'author_sort', 'authors', 'comments',
+          'cover', 'formats','id', 'isbn', 'library_name','ondevice', 'pubdate', 'publisher',
+          'rating', 'series_index', 'series', 'size', 'tags', 'timestamp',
+          'uuid', 'languages', 'identifiers']
+
+# Allowed fields for template
+TEMPLATE_ALLOWED_FIELDS = ['author_sort', 'authors', 'id', 'isbn', 'pubdate', 'title_sort',
+    'publisher', 'series_index', 'series', 'tags', 'timestamp', 'title', 'uuid']
+
+
+class AuthorSortMismatchException(Exception):
+    pass
+
+
+class EmptyCatalogException(Exception):
+    pass
+
+
+class InvalidGenresSourceFieldException(Exception):
+    pass
+
--- a/ebook_converter/library/catalogs/bibtex.py
+++ b/ebook_converter/library/catalogs/bibtex.py
@@ -0,0 +1,402 @@
+#!/usr/bin/env python2
+# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
+from __future__ import absolute_import, division, print_function, unicode_literals
+
+__license__   = 'GPL v3'
+__copyright__ = '2012, Kovid Goyal <kovid@kovidgoyal.net>'
+__docformat__ = 'restructuredtext en'
+
+import re, codecs, os, numbers
+from collections import namedtuple
+
+from calibre import strftime
+from calibre.customize import CatalogPlugin
+from calibre.library.catalogs import FIELDS, TEMPLATE_ALLOWED_FIELDS
+from calibre.customize.conversion import DummyReporter
+from calibre.ebooks.metadata import format_isbn
+from polyglot.builtins import filter, string_or_bytes, unicode_type
+
+
+class BIBTEX(CatalogPlugin):
+    'BIBTEX catalog generator'
+
+    Option = namedtuple('Option', 'option, default, dest, action, help')
+
+    name = 'Catalog_BIBTEX'
+    description = 'BIBTEX catalog generator'
+    supported_platforms = ['windows', 'osx', 'linux']
+    author = 'Sengian'
+    version = (1, 0, 0)
+    file_types = {'bib'}
+
+    cli_options = [
+            Option('--fields',
+                default='all',
+                dest='fields',
+                action=None,
+                help=_('The fields to output when cataloging books in the '
+                    'database.  Should be a comma-separated list of fields.\n'
+                    'Available fields: %(fields)s.\n'
+                    'plus user-created custom fields.\n'
+                    'Example: %(opt)s=title,authors,tags\n'
+                    "Default: '%%default'\n"
+                    "Applies to: BIBTEX output format")%dict(
+                        fields=', '.join(FIELDS), opt='--fields')),
+
+            Option('--sort-by',
+                default='id',
+                dest='sort_by',
+                action=None,
+                help=_('Output field to sort on.\n'
+                'Available fields: author_sort, id, rating, size, timestamp, title.\n'
+                "Default: '%default'\n"
+                "Applies to: BIBTEX output format")),
+
+            Option('--create-citation',
+                default='True',
+                dest='impcit',
+                action=None,
+                help=_('Create a citation for BibTeX entries.\n'
+                'Boolean value: True, False\n'
+                "Default: '%default'\n"
+                "Applies to: BIBTEX output format")),
+
+            Option('--add-files-path',
+                default='True',
+                dest='addfiles',
+                action=None,
+                help=_('Create a file entry if formats is selected for BibTeX entries.\n'
+                'Boolean value: True, False\n'
+                "Default: '%default'\n"
+                "Applies to: BIBTEX output format")),
+
+            Option('--citation-template',
+                default='{authors}{id}',
+                dest='bib_cit',
+                action=None,
+                help=_('The template for citation creation from database fields.\n'
+                    'Should be a template with {} enclosed fields.\n'
+                    'Available fields: %s.\n'
+                    "Default: '%%default'\n"
+                    "Applies to: BIBTEX output format")%', '.join(TEMPLATE_ALLOWED_FIELDS)),
+
+            Option('--choose-encoding',
+                default='utf8',
+                dest='bibfile_enc',
+                action=None,
+                help=_('BibTeX file encoding output.\n'
+                'Available types: utf8, cp1252, ascii.\n'
+                "Default: '%default'\n"
+                "Applies to: BIBTEX output format")),
+
+            Option('--choose-encoding-configuration',
+                default='strict',
+                dest='bibfile_enctag',
+                action=None,
+                help=_('BibTeX file encoding flag.\n'
+                'Available types: strict, replace, ignore, backslashreplace.\n'
+                "Default: '%default'\n"
+                "Applies to: BIBTEX output format")),
+
+            Option('--entry-type',
+                default='book',
+                dest='bib_entry',
+                action=None,
+                help=_('Entry type for BibTeX catalog.\n'
+                'Available types: book, misc, mixed.\n'
+                "Default: '%default'\n"
+                "Applies to: BIBTEX output format"))]
+
+    def run(self, path_to_output, opts, db, notification=DummyReporter()):
+        from calibre.utils.date import isoformat
+        from calibre.utils.html2text import html2text
+        from calibre.utils.bibtex import BibTeX
+        from calibre.library.save_to_disk import preprocess_template
+        from calibre.utils.logging import default_log as log
+        from calibre.utils.filenames import ascii_text
+
+        library_name = os.path.basename(db.library_path)
+
+        def create_bibtex_entry(entry, fields, mode, template_citation,
+                                    bibtexdict, db, citation_bibtex=True, calibre_files=True):
+
+            # Bibtex doesn't like UTF-8 but keep unicode until writing
+            # Define starting chain or if book valid strict and not book return a Fail string
+
+            bibtex_entry = []
+            if mode != "misc" and check_entry_book_valid(entry) :
+                bibtex_entry.append('@book{')
+            elif mode != "book" :
+                bibtex_entry.append('@misc{')
+            else :
+                # case strict book
+                return ''
+
+            if citation_bibtex :
+                # Citation tag
+                bibtex_entry.append(make_bibtex_citation(entry, template_citation,
+                    bibtexdict))
+                bibtex_entry = [' '.join(bibtex_entry)]
+
+            for field in fields:
+                if field.startswith('#'):
+                    item = db.get_field(entry['id'],field,index_is_id=True)
+                    if isinstance(item, (bool, numbers.Number)):
+                        item = repr(item)
+                elif field == 'title_sort':
+                    item = entry['sort']
+                elif field == 'library_name':
+                    item = library_name
+                else:
+                    item = entry[field]
+
+                # check if the field should be included (none or empty)
+                if item is None:
+                    continue
+                try:
+                    if len(item) == 0 :
+                        continue
+                except TypeError:
+                    pass
+
+                if field == 'authors' :
+                    bibtex_entry.append('author = "%s"' % bibtexdict.bibtex_author_format(item))
+
+                elif field == 'id' :
+                    bibtex_entry.append('calibreid = "%s"' % int(item))
+
+                elif field == 'rating' :
+                    bibtex_entry.append('rating = "%s"' % int(item))
+
+                elif field == 'size' :
+                    bibtex_entry.append('%s = "%s octets"' % (field, int(item)))
+
+                elif field == 'tags' :
+                    # A list to flatten
+                    bibtex_entry.append('tags = "%s"' % bibtexdict.utf8ToBibtex(', '.join(item)))
+
+                elif field == 'comments' :
+                    # \n removal
+                    item = item.replace('\r\n', ' ')
+                    item = item.replace('\n', ' ')
+                    # unmatched brace removal (users should use \leftbrace or \rightbrace for single braces)
+                    item = bibtexdict.stripUnmatchedSyntax(item, '{', '}')
+                    # html to text
+                    try:
+                        item = html2text(item)
+                    except:
+                        log.warn("Failed to convert comments to text")
+                    bibtex_entry.append('note = "%s"' % bibtexdict.utf8ToBibtex(item))
+
+                elif field == 'isbn' :
+                    # Could be 9, 10 or 13 digits
+                    bibtex_entry.append('isbn = "%s"' % format_isbn(item))
+
+                elif field == 'formats' :
+                    # Add file path if format is selected
+                    formats = [format.rpartition('.')[2].lower() for format in item]
+                    bibtex_entry.append('formats = "%s"' % ', '.join(formats))
+                    if calibre_files:
+                        files = [':%s:%s' % (format, format.rpartition('.')[2].upper())
+                            for format in item]
+                        bibtex_entry.append('file = "%s"' % ', '.join(files))
+
+                elif field == 'series_index' :
+                    bibtex_entry.append('volume = "%s"' % int(item))
+
+                elif field == 'timestamp' :
+                    bibtex_entry.append('timestamp = "%s"' % isoformat(item).partition('T')[0])
+
+                elif field == 'pubdate' :
+                    bibtex_entry.append('year = "%s"' % item.year)
+                    bibtex_entry.append('month = "%s"' % bibtexdict.utf8ToBibtex(strftime("%b", item)))
+
+                elif field.startswith('#') and isinstance(item, string_or_bytes):
+                    bibtex_entry.append('custom_%s = "%s"' % (field[1:],
+                        bibtexdict.utf8ToBibtex(item)))
+
+                elif isinstance(item, string_or_bytes):
+                    # elif field in ['title', 'publisher', 'cover', 'uuid', 'ondevice',
+                    # 'author_sort', 'series', 'title_sort'] :
+                    bibtex_entry.append('%s = "%s"' % (field, bibtexdict.utf8ToBibtex(item)))
+
+            bibtex_entry = ',\n    '.join(bibtex_entry)
+            bibtex_entry += ' }\n\n'
+
+            return bibtex_entry
+
+        def check_entry_book_valid(entry):
+            # Check that the required fields are ok for a book entry
+            for field in ['title', 'authors', 'publisher'] :
+                if entry[field] is None or len(entry[field]) == 0 :
+                    return False
+            if entry['pubdate'] is None :
+                return False
+            else :
+                return True
+
+        def make_bibtex_citation(entry, template_citation, bibtexclass):
+
+            # define a function to replace the template entry by its value
+            def tpl_replace(objtplname) :
+
+                tpl_field = re.sub(r'[\{\}]', '', objtplname.group())
+
+                if tpl_field in TEMPLATE_ALLOWED_FIELDS :
+                    if tpl_field in ['pubdate', 'timestamp'] :
+                        tpl_field = isoformat(entry[tpl_field]).partition('T')[0]
+                    elif tpl_field in ['tags', 'authors'] :
+                        tpl_field =entry[tpl_field][0]
+                    elif tpl_field in ['id', 'series_index'] :
+                        tpl_field = unicode_type(entry[tpl_field])
+                    else :
+                        tpl_field = entry[tpl_field]
+                    return ascii_text(tpl_field)
+                else:
+                    return ''
+
+            if len(template_citation) >0 :
+                tpl_citation = bibtexclass.utf8ToBibtex(
+                    bibtexclass.ValidateCitationKey(re.sub(r'\{[^{}]*\}',
+                        tpl_replace, template_citation)))
+
+                if len(tpl_citation) >0 :
+                    return tpl_citation
+
+            if len(entry["isbn"]) > 0 :
+                template_citation = '%s' % re.sub(r'[\D]','', entry["isbn"])
+
+            else :
+                template_citation = '%s' % unicode_type(entry["id"])
+
+            return bibtexclass.ValidateCitationKey(template_citation)
+
+        self.fmt = path_to_output.rpartition('.')[2]
+        self.notification = notification
+
+        # Combobox options
+        bibfile_enc = ['utf8', 'cp1252', 'ascii']
+        bibfile_enctag = ['strict', 'replace', 'ignore', 'backslashreplace']
+        bib_entry = ['mixed', 'misc', 'book']
+
+        # Needed beacause CLI return str vs int by widget
+        try:
+            bibfile_enc = bibfile_enc[opts.bibfile_enc]
+            bibfile_enctag = bibfile_enctag[opts.bibfile_enctag]
+            bib_entry = bib_entry[opts.bib_entry]
+        except:
+            if opts.bibfile_enc in bibfile_enc :
+                bibfile_enc = opts.bibfile_enc
+            else :
+                log.warn("Incorrect --choose-encoding flag, revert to default")
+                bibfile_enc = bibfile_enc[0]
+            if opts.bibfile_enctag in bibfile_enctag :
+                bibfile_enctag = opts.bibfile_enctag
+            else :
+                log.warn("Incorrect --choose-encoding-configuration flag, revert to default")
+                bibfile_enctag = bibfile_enctag[0]
+            if opts.bib_entry in bib_entry :
+                bib_entry = opts.bib_entry
+            else :
+                log.warn("Incorrect --entry-type flag, revert to default")
+                bib_entry = bib_entry[0]
+
+        if opts.verbose:
+            opts_dict = vars(opts)
+            log("%s(): Generating %s" % (self.name,self.fmt))
+            if opts.connected_device['is_device_connected']:
+                log(" connected_device: %s" % opts.connected_device['name'])
+            if opts_dict['search_text']:
+                log(" --search='%s'" % opts_dict['search_text'])
+
+            if opts_dict['ids']:
+                log(" Book count: %d" % len(opts_dict['ids']))
+                if opts_dict['search_text']:
+                    log(" (--search ignored when a subset of the database is specified)")
+
+            if opts_dict['fields']:
+                if opts_dict['fields'] == 'all':
+                    log(" Fields: %s" % ', '.join(FIELDS[1:]))
+                else:
+                    log(" Fields: %s" % opts_dict['fields'])
+
+            log(" Output file will be encoded in %s with %s flag" % (bibfile_enc, bibfile_enctag))
+
+            log(" BibTeX entry type is %s with a citation like '%s' flag" % (bib_entry, opts_dict['bib_cit']))
+
+        # If a list of ids are provided, don't use search_text
+        if opts.ids:
+            opts.search_text = None
+
+        data = self.search_sort_db(db, opts)
+
+        if not len(data):
+            log.error("\nNo matching database entries for search criteria '%s'" % opts.search_text)
+
+        # Get the requested output fields as a list
+        fields = self.get_output_fields(db, opts)
+
+        if not len(data):
+            log.error("\nNo matching database entries for search criteria '%s'" % opts.search_text)
+
+        # Initialize BibTeX class
+        bibtexc = BibTeX()
+
+        # Entries writing after Bibtex formating (or not)
+        if bibfile_enc != 'ascii' :
+            bibtexc.ascii_bibtex = False
+        else :
+            bibtexc.ascii_bibtex = True
+
+        # Check citation choice and go to default in case of bad CLI
+        if isinstance(opts.impcit, string_or_bytes) :
+            if opts.impcit == 'False' :
+                citation_bibtex= False
+            elif opts.impcit == 'True' :
+                citation_bibtex= True
+            else :
+                log.warn("Incorrect --create-citation, revert to default")
+                citation_bibtex= True
+        else :
+            citation_bibtex= opts.impcit
+
+        # Check add file entry and go to default in case of bad CLI
+        if isinstance(opts.addfiles, string_or_bytes) :
+            if opts.addfiles == 'False' :
+                addfiles_bibtex = False
+            elif opts.addfiles == 'True' :
+                addfiles_bibtex = True
+            else :
+                log.warn("Incorrect --add-files-path, revert to default")
+                addfiles_bibtex= True
+        else :
+            addfiles_bibtex = opts.addfiles
+
+        # Preprocess for error and light correction
+        template_citation = preprocess_template(opts.bib_cit)
+
+        # Open output and write entries
+        with codecs.open(path_to_output, 'w', bibfile_enc, bibfile_enctag)\
+            as outfile:
+            # File header
+            nb_entries = len(data)
+
+            # check in book strict if all is ok else throw a warning into log
+            if bib_entry == 'book' :
+                nb_books = len(list(filter(check_entry_book_valid, data)))
+                if nb_books < nb_entries :
+                    log.warn("Only %d entries in %d are book compatible" % (nb_books, nb_entries))
+                    nb_entries = nb_books
+
+            # If connected device, add 'On Device' values to data
+            if opts.connected_device['is_device_connected'] and 'ondevice' in fields:
+                for entry in data:
+                    entry['ondevice'] = db.catalog_plugin_on_device_temp_mapping[entry['id']]['ondevice']
+
+            outfile.write('%%%Calibre catalog\n%%%{0} entries in catalog\n\n'.format(nb_entries))
+            outfile.write('@preamble{"This catalog of %d entries was generated by calibre on %s"}\n\n'
+                % (nb_entries, strftime("%A, %d. %B %Y %H:%M")))
+
+            for entry in data:
+                outfile.write(create_bibtex_entry(entry, fields, bib_entry, template_citation,
+                    bibtexc, db, citation_bibtex, addfiles_bibtex))
--- a/ebook_converter/library/catalogs/csv_xml.py
+++ b/ebook_converter/library/catalogs/csv_xml.py
@@ -0,0 +1,241 @@
+#!/usr/bin/env python2
+# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
+from __future__ import absolute_import, division, print_function, unicode_literals
+
+__license__ = 'GPL v3'
+__copyright__ = '2012, Kovid Goyal <kovid@kovidgoyal.net>'
+__docformat__ = 'restructuredtext en'
+
+import re, codecs, os
+from collections import namedtuple
+
+from calibre.customize import CatalogPlugin
+from calibre.library.catalogs import FIELDS
+from calibre.customize.conversion import DummyReporter
+from polyglot.builtins import unicode_type
+
+
+class CSV_XML(CatalogPlugin):
+
+    'CSV/XML catalog generator'
+
+    Option = namedtuple('Option', 'option, default, dest, action, help')
+
+    name = 'Catalog_CSV_XML'
+    description = 'CSV/XML catalog generator'
+    supported_platforms = ['windows', 'osx', 'linux']
+    author = 'Greg Riker'
+    version = (1, 0, 0)
+    file_types = {'csv', 'xml'}
+
+    cli_options = [
+            Option('--fields',
+                default='all',
+                dest='fields',
+                action=None,
+                help=_('The fields to output when cataloging books in the '
+                    'database.  Should be a comma-separated list of fields.\n'
+                    'Available fields: %(fields)s,\n'
+                    'plus user-created custom fields.\n'
+                    'Example: %(opt)s=title,authors,tags\n'
+                    "Default: '%%default'\n"
+                    "Applies to: CSV, XML output formats") % dict(
+                        fields=', '.join(FIELDS), opt='--fields')),
+
+            Option('--sort-by',
+                default='id',
+                dest='sort_by',
+                action=None,
+                help=_('Output field to sort on.\n'
+                'Available fields: author_sort, id, rating, size, timestamp, title_sort\n'
+                "Default: '%default'\n"
+                "Applies to: CSV, XML output formats"))]
+
+    def run(self, path_to_output, opts, db, notification=DummyReporter()):
+        from calibre.library import current_library_name
+        from calibre.utils.date import isoformat
+        from calibre.utils.html2text import html2text
+        from calibre.utils.logging import default_log as log
+        from lxml import etree
+        from calibre.ebooks.metadata import authors_to_string
+
+        self.fmt = path_to_output.rpartition('.')[2]
+        self.notification = notification
+        current_library = current_library_name()
+        if getattr(opts, 'library_path', None):
+            current_library = os.path.basename(opts.library_path)
+
+        if opts.verbose:
+            opts_dict = vars(opts)
+            log("%s('%s'): Generating %s" % (self.name, current_library, self.fmt.upper()))
+            if opts.connected_device['is_device_connected']:
+                log(" connected_device: %s" % opts.connected_device['name'])
+            if opts_dict['search_text']:
+                log(" --search='%s'" % opts_dict['search_text'])
+
+            if opts_dict['ids']:
+                log(" Book count: %d" % len(opts_dict['ids']))
+                if opts_dict['search_text']:
+                    log(" (--search ignored when a subset of the database is specified)")
+
+            if opts_dict['fields']:
+                if opts_dict['fields'] == 'all':
+                    log(" Fields: %s" % ', '.join(FIELDS[1:]))
+                else:
+                    log(" Fields: %s" % opts_dict['fields'])
+
+        # If a list of ids are provided, don't use search_text
+        if opts.ids:
+            opts.search_text = None
+
+        data = self.search_sort_db(db, opts)
+
+        if not len(data):
+            log.error("\nNo matching database entries for search criteria '%s'" % opts.search_text)
+            # raise SystemExit(1)
+
+        # Get the requested output fields as a list
+        fields = self.get_output_fields(db, opts)
+
+        # If connected device, add 'On Device' values to data
+        if opts.connected_device['is_device_connected'] and 'ondevice' in fields:
+            for entry in data:
+                entry['ondevice'] = db.catalog_plugin_on_device_temp_mapping[entry['id']]['ondevice']
+
+        fm = {x: db.field_metadata.get(x, {}) for x in fields}
+
+        if self.fmt == 'csv':
+            outfile = codecs.open(path_to_output, 'w', 'utf8')
+
+            # Write a UTF-8 BOM
+            outfile.write('\ufeff')
+
+            # Output the field headers
+            outfile.write('%s\n' % ','.join(fields))
+
+            # Output the entry fields
+            for entry in data:
+                outstr = []
+                for field in fields:
+                    if field.startswith('#'):
+                        item = db.get_field(entry['id'], field, index_is_id=True)
+                        if isinstance(item, (list, tuple)):
+                            if fm.get(field, {}).get('display', {}).get('is_names', False):
+                                item = ' & '.join(item)
+                            else:
+                                item = ', '.join(item)
+                    elif field == 'library_name':
+                        item = current_library
+                    elif field == 'title_sort':
+                        item = entry['sort']
+                    else:
+                        item = entry[field]
+
+                    if item is None:
+                        outstr.append('""')
+                        continue
+                    elif field == 'formats':
+                        fmt_list = []
+                        for format in item:
+                            fmt_list.append(format.rpartition('.')[2].lower())
+                        item = ', '.join(fmt_list)
+                    elif field == 'authors':
+                        item = authors_to_string(item)
+                    elif field == 'tags':
+                        item = ', '.join(item)
+                    elif field == 'isbn':
+                        # Could be 9, 10 or 13 digits, with hyphens, possibly ending in 'X'
+                        item = '%s' % re.sub(r'[^\dX-]', '', item)
+                    elif fm.get(field, {}).get('datatype') == 'datetime':
+                        item = isoformat(item, as_utc=False)
+                    elif field == 'comments':
+                        item = item.replace('\r\n', ' ')
+                        item = item.replace('\n', ' ')
+                    elif fm.get(field, {}).get('datatype', None) == 'rating' and item:
+                        item = '%.2g' % (item / 2)
+
+                    # Convert HTML to markdown text
+                    if isinstance(item, unicode_type):
+                        opening_tag = re.search(r'<(\w+)( |>)', item)
+                        if opening_tag:
+                            closing_tag = re.search(r'<\/%s>$' % opening_tag.group(1), item)
+                            if closing_tag:
+                                item = html2text(item)
+
+                    outstr.append('"%s"' % unicode_type(item).replace('"', '""'))
+
+                outfile.write(','.join(outstr) + '\n')
+            outfile.close()
+
+        elif self.fmt == 'xml':
+            from lxml.builder import E
+
+            root = E.calibredb()
+            for r in data:
+                record = E.record()
+                root.append(record)
+
+                for field in fields:
+                    if field.startswith('#'):
+                        val = db.get_field(r['id'], field, index_is_id=True)
+                        if not isinstance(val, unicode_type):
+                            val = unicode_type(val)
+                        item = getattr(E, field.replace('#', '_'))(val)
+                        record.append(item)
+
+                for field in ('id', 'uuid', 'publisher', 'rating', 'size',
+                              'isbn', 'ondevice', 'identifiers'):
+                    if field in fields:
+                        val = r[field]
+                        if not val:
+                            continue
+                        if not isinstance(val, (bytes, unicode_type)):
+                            if (fm.get(field, {}).get('datatype', None) ==
+                                    'rating' and val):
+                                val = '%.2g' % (val / 2)
+                            val = unicode_type(val)
+                        item = getattr(E, field)(val)
+                        record.append(item)
+
+                if 'title' in fields:
+                    title = E.title(r['title'], sort=r['sort'])
+                    record.append(title)
+
+                if 'authors' in fields:
+                    aus = E.authors(sort=r['author_sort'])
+                    for au in r['authors']:
+                        aus.append(E.author(au))
+                    record.append(aus)
+
+                for field in ('timestamp', 'pubdate'):
+                    if field in fields:
+                        record.append(getattr(E, field)(isoformat(r[field], as_utc=False)))
+
+                if 'tags' in fields and r['tags']:
+                    tags = E.tags()
+                    for tag in r['tags']:
+                        tags.append(E.tag(tag))
+                    record.append(tags)
+
+                if 'comments' in fields and r['comments']:
+                    record.append(E.comments(r['comments']))
+
+                if 'series' in fields and r['series']:
+                    record.append(E.series(r['series'],
+                        index=unicode_type(r['series_index'])))
+
+                if 'cover' in fields and r['cover']:
+                    record.append(E.cover(r['cover'].replace(os.sep, '/')))
+
+                if 'formats' in fields and r['formats']:
+                    fmt = E.formats()
+                    for f in r['formats']:
+                        fmt.append(E.format(f.replace(os.sep, '/')))
+                    record.append(fmt)
+
+                if 'library_name' in fields:
+                    record.append(E.library_name(current_library))
+
+            with open(path_to_output, 'wb') as f:
+                f.write(etree.tostring(root, encoding='utf-8',
+                    xml_declaration=True, pretty_print=True))
--- a/ebook_converter/library/catalogs/epub_mobi.py
+++ b/ebook_converter/library/catalogs/epub_mobi.py
@@ -0,0 +1,506 @@
+#!/usr/bin/env python2
+# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
+from __future__ import absolute_import, division, print_function, unicode_literals
+
+__license__ = 'GPL v3'
+__copyright__ = '2012, Kovid Goyal <kovid@kovidgoyal.net>'
+__docformat__ = 'restructuredtext en'
+
+import datetime, os, time
+from collections import namedtuple
+
+from calibre import strftime
+from calibre.customize import CatalogPlugin
+from calibre.customize.conversion import OptionRecommendation, DummyReporter
+from calibre.library import current_library_name
+from calibre.library.catalogs import AuthorSortMismatchException, EmptyCatalogException
+from calibre.ptempfile import PersistentTemporaryFile
+from calibre.utils.localization import calibre_langcode_to_name, canonicalize_lang, get_lang
+from polyglot.builtins import unicode_type
+
+Option = namedtuple('Option', 'option, default, dest, action, help')
+
+
+class EPUB_MOBI(CatalogPlugin):
+
+    'EPUB catalog generator'
+
+    name = 'Catalog_EPUB_MOBI'
+    description = 'AZW3/EPUB/MOBI catalog generator'
+    supported_platforms = ['windows', 'osx', 'linux']
+    minimum_calibre_version = (0, 7, 40)
+    author = 'Greg Riker'
+    version = (1, 0, 0)
+    file_types = {'azw3', 'epub', 'mobi'}
+
+    THUMB_SMALLEST = "1.0"
+    THUMB_LARGEST = "2.0"
+
+    cli_options = [Option('--catalog-title',  # {{{
+                          default='My Books',
+                          dest='catalog_title',
+                          action=None,
+                          help=_('Title of generated catalog used as title in metadata.\n'
+                          "Default: '%default'\n"
+                          "Applies to: AZW3, EPUB, MOBI output formats")),
+                   Option('--cross-reference-authors',
+                          default=False,
+                          dest='cross_reference_authors',
+                          action='store_true',
+                          help=_("Create cross-references in Authors section for books with multiple authors.\n"
+                          "Default: '%default'\n"
+                          "Applies to: AZW3, EPUB, MOBI output formats")),
+                   Option('--debug-pipeline',
+                           default=None,
+                           dest='debug_pipeline',
+                           action=None,
+                           help=_("Save the output from different stages of the conversion "
+                           "pipeline to the specified "
+                           "directory. Useful if you are unsure at which stage "
+                           "of the conversion process a bug is occurring.\n"
+                           "Default: '%default'\n"
+                           "Applies to: AZW3, EPUB, MOBI output formats")),
+                   Option('--exclude-genre',
+                          default=r'\[.+\]|^\+$',
+                          dest='exclude_genre',
+                          action=None,
+                          help=_("Regex describing tags to exclude as genres.\n"
+                          "Default: '%default' excludes bracketed tags, e.g. '[Project Gutenberg]', and '+', the default tag for read books.\n"
+                          "Applies to: AZW3, EPUB, MOBI output formats")),
+                   Option('--exclusion-rules',
+                          default="(('Catalogs','Tags','Catalog'),)",
+                          dest='exclusion_rules',
+                          action=None,
+                          help=_("Specifies the rules used to exclude books from the generated catalog.\n"
+                          "The model for an exclusion rule is either\n('<rule name>','Tags','<comma-separated list of tags>') or\n"
+                          "('<rule name>','<custom column>','<pattern>').\n"
+                          "For example:\n"
+                          "(('Archived books','#status','Archived'),)\n"
+                          "will exclude a book with a value of 'Archived' in the custom column 'status'.\n"
+                          "When multiple rules are defined, all rules will be applied.\n"
+                          "Default: \n" + '"' + '%default' + '"' + "\n"
+                          "Applies to: AZW3, EPUB, MOBI output formats")),
+                   Option('--generate-authors',
+                          default=False,
+                          dest='generate_authors',
+                          action='store_true',
+                          help=_("Include 'Authors' section in catalog.\n"
+                          "Default: '%default'\n"
+                          "Applies to: AZW3, EPUB, MOBI output formats")),
+                   Option('--generate-descriptions',
+                          default=False,
+                          dest='generate_descriptions',
+                          action='store_true',
+                          help=_("Include 'Descriptions' section in catalog.\n"
+                          "Default: '%default'\n"
+                          "Applies to: AZW3, EPUB, MOBI output formats")),
+                   Option('--generate-genres',
+                          default=False,
+                          dest='generate_genres',
+                          action='store_true',
+                          help=_("Include 'Genres' section in catalog.\n"
+                          "Default: '%default'\n"
+                          "Applies to: AZW3, EPUB, MOBI output formats")),
+                   Option('--generate-titles',
+                          default=False,
+                          dest='generate_titles',
+                          action='store_true',
+                          help=_("Include 'Titles' section in catalog.\n"
+                          "Default: '%default'\n"
+                          "Applies to: AZW3, EPUB, MOBI output formats")),
+                   Option('--generate-series',
+                          default=False,
+                          dest='generate_series',
+                          action='store_true',
+                          help=_("Include 'Series' section in catalog.\n"
+                          "Default: '%default'\n"
+                          "Applies to: AZW3, EPUB, MOBI output formats")),
+                   Option('--generate-recently-added',
+                          default=False,
+                          dest='generate_recently_added',
+                          action='store_true',
+                          help=_("Include 'Recently Added' section in catalog.\n"
+                          "Default: '%default'\n"
+                          "Applies to: AZW3, EPUB, MOBI output formats")),
+                   Option('--genre-source-field',
+                          default=_('Tags'),
+                          dest='genre_source_field',
+                          action=None,
+                          help=_("Source field for 'Genres' section.\n"
+                          "Default: '%default'\n"
+                          "Applies to: AZW3, EPUB, MOBI output formats")),
+                   Option('--header-note-source-field',
+                          default='',
+                          dest='header_note_source_field',
+                          action=None,
+                          help=_("Custom field containing note text to insert in Description header.\n"
+                          "Default: '%default'\n"
+                          "Applies to: AZW3, EPUB, MOBI output formats")),
+                   Option('--merge-comments-rule',
+                          default='::',
+                          dest='merge_comments_rule',
+                          action=None,
+                          help=_("#<custom field>:[before|after]:[True|False] specifying:\n"
+                          " <custom field> Custom field containing notes to merge with Comments\n"
+                          " [before|after] Placement of notes with respect to Comments\n"
+                          " [True|False] - A horizontal rule is inserted between notes and Comments\n"
+                          "Default: '%default'\n"
+                          "Applies to: AZW3, EPUB, MOBI output formats")),
+                   Option('--output-profile',
+                          default=None,
+                          dest='output_profile',
+                          action=None,
+                          help=_("Specifies the output profile. In some cases, an output profile is required to optimize"
+                                 " the catalog for the device. For example, 'kindle' or 'kindle_dx' creates a structured"
+                                 " Table of Contents with Sections and Articles.\n"
+                                 "Default: '%default'\n"
+                                 "Applies to: AZW3, EPUB, MOBI output formats")),
+                   Option('--prefix-rules',
+                          default="(('Read books','tags','+','\u2713'),('Wishlist item','tags','Wishlist','\u00d7'))",
+                          dest='prefix_rules',
+                          action=None,
+                          help=_("Specifies the rules used to include prefixes indicating read books, wishlist items and other user-specified prefixes.\n"
+                          "The model for a prefix rule is ('<rule name>','<source field>','<pattern>','<prefix>').\n"
+                          "When multiple rules are defined, the first matching rule will be used.\n"
+                          "Default:\n" + '"' + '%default' + '"' + "\n"
+                          "Applies to: AZW3, EPUB, MOBI output formats")),
+                   Option('--preset',
+                          default=None,
+                          dest='preset',
+                          action=None,
+                          help=_("Use a named preset created with the GUI catalog builder.\n"
+                          "A preset specifies all settings for building a catalog.\n"
+                          "Default: '%default'\n"
+                          "Applies to: AZW3, EPUB, MOBI output formats")),
+                   Option('--use-existing-cover',
+                          default=False,
+                          dest='use_existing_cover',
+                          action='store_true',
+                          help=_("Replace existing cover when generating the catalog.\n"
+                          "Default: '%default'\n"
+                          "Applies to: AZW3, EPUB, MOBI output formats")),
+                   Option('--thumb-width',
+                          default='1.0',
+                          dest='thumb_width',
+                          action=None,
+                          help=_("Size hint (in inches) for book covers in catalog.\n"
+                          "Range: 1.0 - 2.0\n"
+                          "Default: '%default'\n"
+                          "Applies to: AZW3, EPUB, MOBI output formats")),
+                          ]
+    # }}}
+
+    def run(self, path_to_output, opts, db, notification=DummyReporter()):
+        from calibre.library.catalogs.epub_mobi_builder import CatalogBuilder
+        from calibre.utils.logging import default_log as log
+        from calibre.utils.config import JSONConfig
+
+        # If preset specified from the cli, insert stored options from JSON file
+        if hasattr(opts, 'preset') and opts.preset:
+            available_presets = JSONConfig("catalog_presets")
+            if opts.preset not in available_presets:
+                if available_presets:
+                    print(_('Error: Preset "%s" not found.' % opts.preset))
+                    print(_('Stored presets: %s' % ', '.join([p for p in sorted(available_presets.keys())])))
+                else:
+                    print(_('Error: No stored presets.'))
+                return 1
+
+            # Copy the relevant preset values to the opts object
+            for item in available_presets[opts.preset]:
+                if item not in ['exclusion_rules_tw', 'format', 'prefix_rules_tw']:
+                    setattr(opts, item, available_presets[opts.preset][item])
+
+            # Provide an unconnected device
+            opts.connected_device = {
+                         'is_device_connected': False,
+                         'kind': None,
+                         'name': None,
+                         'save_template': None,
+                         'serial': None,
+                         'storage': None,
+                        }
+
+            # Convert prefix_rules and exclusion_rules from JSON lists to tuples
+            prs = []
+            for rule in opts.prefix_rules:
+                prs.append(tuple(rule))
+            opts.prefix_rules = tuple(prs)
+
+            ers = []
+            for rule in opts.exclusion_rules:
+                ers.append(tuple(rule))
+            opts.exclusion_rules = tuple(ers)
+
+        opts.log = log
+        opts.fmt = self.fmt = path_to_output.rpartition('.')[2]
+
+        # Add local options
+        opts.creator = '%s, %s %s, %s' % (strftime('%A'), strftime('%B'), strftime('%d').lstrip('0'), strftime('%Y'))
+        opts.creator_sort_as = '%s %s' % ('calibre', strftime('%Y-%m-%d'))
+        opts.connected_kindle = False
+
+        # Finalize output_profile
+        op = opts.output_profile
+        if op is None:
+            op = 'default'
+
+        if opts.connected_device['name'] and 'kindle' in opts.connected_device['name'].lower():
+            opts.connected_kindle = True
+            if opts.connected_device['serial'] and \
+               opts.connected_device['serial'][:4] in ['B004', 'B005']:
+                op = "kindle_dx"
+            else:
+                op = "kindle"
+
+        opts.description_clip = 380 if op.endswith('dx') or 'kindle' not in op else 100
+        opts.author_clip = 100 if op.endswith('dx') or 'kindle' not in op else 60
+        opts.output_profile = op
+
+        opts.basename = "Catalog"
+        opts.cli_environment = not hasattr(opts, 'sync')
+
+        # Hard-wired to always sort descriptions by author, with series after non-series
+        opts.sort_descriptions_by_author = True
+
+        build_log = []
+
+        build_log.append("%s('%s'): Generating %s %sin %s environment, locale: '%s'" %
+            (self.name,
+             current_library_name(),
+             self.fmt,
+             'for %s ' % opts.output_profile if opts.output_profile else '',
+             'CLI' if opts.cli_environment else 'GUI',
+             calibre_langcode_to_name(canonicalize_lang(get_lang()), localize=False))
+             )
+
+        # If exclude_genre is blank, assume user wants all tags as genres
+        if opts.exclude_genre.strip() == '':
+            # opts.exclude_genre = '\[^.\]'
+            # build_log.append(" converting empty exclude_genre to '\[^.\]'")
+            opts.exclude_genre = 'a^'
+            build_log.append(" converting empty exclude_genre to 'a^'")
+        if opts.connected_device['is_device_connected'] and \
+           opts.connected_device['kind'] == 'device':
+            if opts.connected_device['serial']:
+                build_log.append(" connected_device: '%s' #%s%s " %
+                    (opts.connected_device['name'],
+                     opts.connected_device['serial'][0:4],
+                     'x' * (len(opts.connected_device['serial']) - 4)))
+                for storage in opts.connected_device['storage']:
+                    if storage:
+                        build_log.append("  mount point: %s" % storage)
+            else:
+                build_log.append(" connected_device: '%s'" % opts.connected_device['name'])
+                try:
+                    for storage in opts.connected_device['storage']:
+                        if storage:
+                            build_log.append("  mount point: %s" % storage)
+                except:
+                    build_log.append("  (no mount points)")
+        else:
+            build_log.append(" connected_device: '%s'" % opts.connected_device['name'])
+
+        opts_dict = vars(opts)
+        if opts_dict['ids']:
+            build_log.append(" book count: %d" % len(opts_dict['ids']))
+
+        sections_list = []
+        if opts.generate_authors:
+            sections_list.append('Authors')
+        if opts.generate_titles:
+            sections_list.append('Titles')
+        if opts.generate_series:
+            sections_list.append('Series')
+        if opts.generate_genres:
+            sections_list.append('Genres')
+        if opts.generate_recently_added:
+            sections_list.append('Recently Added')
+        if opts.generate_descriptions:
+            sections_list.append('Descriptions')
+
+        if not sections_list:
+            if opts.cli_environment:
+                opts.log.warn('*** No Section switches specified, enabling all Sections ***')
+                opts.generate_authors = True
+                opts.generate_titles = True
+                opts.generate_series = True
+                opts.generate_genres = True
+                opts.generate_recently_added = True
+                opts.generate_descriptions = True
+                sections_list = ['Authors', 'Titles', 'Series', 'Genres', 'Recently Added', 'Descriptions']
+            else:
+                opts.log.warn('\n*** No enabled Sections, terminating catalog generation ***')
+                return ["No Included Sections", "No enabled Sections.\nCheck E-book options tab\n'Included sections'\n"]
+        if opts.fmt == 'mobi' and sections_list == ['Descriptions']:
+            warning = _("\n*** Adding 'By authors' section required for MOBI output ***")
+            opts.log.warn(warning)
+            sections_list.insert(0, 'Authors')
+            opts.generate_authors = True
+
+        opts.log(" Sections: %s" % ', '.join(sections_list))
+        opts.section_list = sections_list
+
+        # Limit thumb_width to 1.0" - 2.0"
+        try:
+            if float(opts.thumb_width) < float(self.THUMB_SMALLEST):
+                log.warning("coercing thumb_width from '%s' to '%s'" % (opts.thumb_width, self.THUMB_SMALLEST))
+                opts.thumb_width = self.THUMB_SMALLEST
+            if float(opts.thumb_width) > float(self.THUMB_LARGEST):
+                log.warning("coercing thumb_width from '%s' to '%s'" % (opts.thumb_width, self.THUMB_LARGEST))
+                opts.thumb_width = self.THUMB_LARGEST
+            opts.thumb_width = "%.2f" % float(opts.thumb_width)
+        except:
+            log.error("coercing thumb_width from '%s' to '%s'" % (opts.thumb_width, self.THUMB_SMALLEST))
+            opts.thumb_width = "1.0"
+
+        # eval prefix_rules if passed from command line
+        if type(opts.prefix_rules) is not tuple:
+            try:
+                opts.prefix_rules = eval(opts.prefix_rules)
+            except:
+                log.error("malformed --prefix-rules: %s" % opts.prefix_rules)
+                raise
+            for rule in opts.prefix_rules:
+                if len(rule) != 4:
+                    log.error("incorrect number of args for --prefix-rules: %s" % repr(rule))
+
+        # eval exclusion_rules if passed from command line
+        if type(opts.exclusion_rules) is not tuple:
+            try:
+                opts.exclusion_rules = eval(opts.exclusion_rules)
+            except:
+                log.error("malformed --exclusion-rules: %s" % opts.exclusion_rules)
+                raise
+            for rule in opts.exclusion_rules:
+                if len(rule) != 3:
+                    log.error("incorrect number of args for --exclusion-rules: %s" % repr(rule))
+
+        # Display opts
+        keys = sorted(opts_dict.keys())
+        build_log.append(" opts:")
+        for key in keys:
+            if key in ['catalog_title', 'author_clip', 'connected_kindle', 'creator',
+                       'cross_reference_authors', 'description_clip', 'exclude_book_marker',
+                       'exclude_genre', 'exclude_tags', 'exclusion_rules', 'fmt',
+                       'genre_source_field', 'header_note_source_field', 'merge_comments_rule',
+                       'output_profile', 'prefix_rules', 'preset', 'read_book_marker',
+                       'search_text', 'sort_by', 'sort_descriptions_by_author', 'sync',
+                       'thumb_width', 'use_existing_cover', 'wishlist_tag']:
+                build_log.append("  %s: %s" % (key, repr(opts_dict[key])))
+        if opts.verbose:
+            log('\n'.join(line for line in build_log))
+
+        # Capture start_time
+        opts.start_time = time.time()
+
+        self.opts = opts
+
+        if opts.verbose:
+            log.info(" Begin catalog source generation (%s)" %
+                     unicode_type(datetime.timedelta(seconds=int(time.time() - opts.start_time))))
+
+        # Launch the Catalog builder
+        catalog = CatalogBuilder(db, opts, self, report_progress=notification)
+
+        try:
+            catalog.build_sources()
+            if opts.verbose:
+                log.info(" Completed catalog source generation (%s)\n"  %
+                         unicode_type(datetime.timedelta(seconds=int(time.time() - opts.start_time))))
+        except (AuthorSortMismatchException, EmptyCatalogException) as e:
+            log.error(" *** Terminated catalog generation: %s ***" % e)
+        except:
+            log.error(" unhandled exception in catalog generator")
+            raise
+
+        else:
+            recommendations = []
+            recommendations.append(('remove_fake_margins', False,
+                OptionRecommendation.HIGH))
+            recommendations.append(('comments', '', OptionRecommendation.HIGH))
+
+            """
+            >>> Use to debug generated catalog code before pipeline conversion <<<
+            """
+            GENERATE_DEBUG_EPUB = False
+            if GENERATE_DEBUG_EPUB:
+                catalog_debug_path = os.path.join(os.path.expanduser('~'), 'Desktop', 'Catalog debug')
+                setattr(opts, 'debug_pipeline', os.path.expanduser(catalog_debug_path))
+
+            dp = getattr(opts, 'debug_pipeline', None)
+            if dp is not None:
+                recommendations.append(('debug_pipeline', dp,
+                    OptionRecommendation.HIGH))
+
+            if opts.output_profile and opts.output_profile.startswith("kindle"):
+                recommendations.append(('output_profile', opts.output_profile,
+                    OptionRecommendation.HIGH))
+                recommendations.append(('book_producer', opts.output_profile,
+                    OptionRecommendation.HIGH))
+                if opts.fmt == 'mobi':
+                    recommendations.append(('no_inline_toc', True,
+                        OptionRecommendation.HIGH))
+                    recommendations.append(('verbose', 2,
+                        OptionRecommendation.HIGH))
+
+            # Use existing cover or generate new cover
+            cpath = None
+            existing_cover = False
+            try:
+                search_text = 'title:"%s" author:%s' % (
+                        opts.catalog_title.replace('"', '\\"'), 'calibre')
+                matches = db.search(search_text, return_matches=True, sort_results=False)
+                if matches:
+                    cpath = db.cover(matches[0], index_is_id=True, as_path=True)
+                    if cpath and os.path.exists(cpath):
+                        existing_cover = True
+            except:
+                pass
+
+            if self.opts.use_existing_cover and not existing_cover:
+                log.warning("no existing catalog cover found")
+
+            if self.opts.use_existing_cover and existing_cover:
+                recommendations.append(('cover', cpath, OptionRecommendation.HIGH))
+                log.info("using existing catalog cover")
+            else:
+                from calibre.ebooks.covers import calibre_cover2
+                log.info("replacing catalog cover")
+                new_cover_path = PersistentTemporaryFile(suffix='.jpg')
+                new_cover = calibre_cover2(opts.catalog_title, 'calibre')
+                new_cover_path.write(new_cover)
+                new_cover_path.close()
+                recommendations.append(('cover', new_cover_path.name, OptionRecommendation.HIGH))
+
+            # Run ebook-convert
+            from calibre.ebooks.conversion.plumber import Plumber
+            plumber = Plumber(os.path.join(catalog.catalog_path, opts.basename + '.opf'),
+                            path_to_output, log, report_progress=notification,
+                            abort_after_input_dump=False)
+            plumber.merge_ui_recommendations(recommendations)
+            plumber.run()
+
+            try:
+                os.remove(cpath)
+            except:
+                pass
+
+            if GENERATE_DEBUG_EPUB:
+                from calibre.ebooks.epub import initialize_container
+                from calibre.ebooks.tweak import zip_rebuilder
+                from calibre.utils.zipfile import ZipFile
+                input_path = os.path.join(catalog_debug_path, 'input')
+                epub_shell = os.path.join(catalog_debug_path, 'epub_shell.zip')
+                initialize_container(epub_shell, opf_name='content.opf')
+                with ZipFile(epub_shell, 'r') as zf:
+                    zf.extractall(path=input_path)
+                os.remove(epub_shell)
+                zip_rebuilder(input_path, os.path.join(catalog_debug_path, 'input.epub'))
+
+            if opts.verbose:
+                log.info(" Catalog creation complete (%s)\n" %
+                     unicode_type(datetime.timedelta(seconds=int(time.time() - opts.start_time))))
+
+        # returns to gui2.actions.catalog:catalog_generated()
+        return catalog.error
--- a/ebook_converter/library/comments.py
+++ b/ebook_converter/library/comments.py
@@ -0,0 +1,174 @@
+#!/usr/bin/env python2
+# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
+# License: GPLv3 Copyright: 2010, Kovid Goyal <kovid at kovidgoyal.net>
+
+from __future__ import absolute_import, division, print_function, unicode_literals
+
+import re
+
+from calibre import prepare_string_for_xml
+from calibre.constants import preferred_encoding
+from calibre.ebooks.BeautifulSoup import (
+    BeautifulSoup, CData, Comment, Declaration, NavigableString,
+    ProcessingInstruction
+)
+from calibre.utils.html2text import html2text
+from polyglot.builtins import unicode_type
+
+# Hackish - ignoring sentences ending or beginning in numbers to avoid
+# confusion with decimal points.
+lost_cr_pat = re.compile('([a-z])([\\.\\?!])([A-Z])')
+lost_cr_exception_pat = re.compile(r'(Ph\.D)|(D\.Phil)|((Dr|Mr|Mrs|Ms)\.[A-Z])')
+sanitize_pat = re.compile(r'<script|<table|<tr|<td|<th|<style|<iframe',
+        re.IGNORECASE)
+
+
+def comments_to_html(comments):
+    '''
+    Convert random comment text to normalized, xml-legal block of <p>s
+    'plain text' returns as
+    <p>plain text</p>
+
+    'plain text with <i>minimal</i> <b>markup</b>' returns as
+    <p>plain text with <i>minimal</i> <b>markup</b></p>
+
+    '<p>pre-formatted text</p> returns untouched
+
+    'A line of text\n\nFollowed by a line of text' returns as
+    <p>A line of text</p>
+    <p>Followed by a line of text</p>
+
+    'A line of text.\nA second line of text.\rA third line of text' returns as
+    <p>A line of text.<br />A second line of text.<br />A third line of text.</p>
+
+    '...end of a paragraph.Somehow the break was lost...' returns as
+    <p>...end of a paragraph.</p>
+    <p>Somehow the break was lost...</p>
+
+    Deprecated HTML returns as HTML via BeautifulSoup()
+
+    '''
+    if not comments:
+        return u'<p></p>'
+    if not isinstance(comments, unicode_type):
+        comments = comments.decode(preferred_encoding, 'replace')
+
+    if comments.lstrip().startswith('<'):
+        # Comment is already HTML do not mess with it
+        return comments
+
+    if '<' not in comments:
+        comments = prepare_string_for_xml(comments)
+        parts = [u'<p class="description">%s</p>'%x.replace(u'\n', u'<br />')
+                for x in comments.split('\n\n')]
+        return '\n'.join(parts)
+
+    if sanitize_pat.search(comments) is not None:
+        try:
+            return sanitize_comments_html(comments)
+        except:
+            import traceback
+            traceback.print_exc()
+            return u'<p></p>'
+
+    # Explode lost CRs to \n\n
+    comments = lost_cr_exception_pat.sub(lambda m: m.group().replace('.',
+        '.\r'), comments)
+    for lost_cr in lost_cr_pat.finditer(comments):
+        comments = comments.replace(lost_cr.group(),
+                                    '%s%s\n\n%s' % (lost_cr.group(1),
+                                                    lost_cr.group(2),
+                                                    lost_cr.group(3)))
+
+    comments = comments.replace(u'\r', u'')
+    # Convert \n\n to <p>s
+    comments = comments.replace(u'\n\n', u'<p>')
+    # Convert solo returns to <br />
+    comments = comments.replace(u'\n', '<br />')
+    # Convert two hyphens to emdash
+    comments = comments.replace('--', '&mdash;')
+
+    soup = BeautifulSoup('<div>' + comments + '</div>').find('div')
+    result = BeautifulSoup('<div>')
+    container = result.find('div')
+    rtc = 0
+    open_pTag = False
+
+    all_tokens = list(soup.contents)
+    inline_tags = ('br', 'b', 'i', 'em', 'strong', 'span', 'font', 'a', 'hr')
+    for token in all_tokens:
+        if isinstance(token,  (CData, Comment, Declaration, ProcessingInstruction)):
+            continue
+        if isinstance(token, NavigableString):
+            if not open_pTag:
+                pTag = result.new_tag('p')
+                open_pTag = True
+                ptc = 0
+            pTag.insert(ptc, token)
+            ptc += 1
+        elif token.name in inline_tags:
+            if not open_pTag:
+                pTag = result.new_tag('p')
+                open_pTag = True
+                ptc = 0
+            pTag.insert(ptc, token)
+            ptc += 1
+        else:
+            if open_pTag:
+                container.insert(rtc, pTag)
+                rtc += 1
+                open_pTag = False
+                ptc = 0
+            container.insert(rtc, token)
+            rtc += 1
+
+    if open_pTag:
+        container.insert(rtc, pTag)
+
+    for p in container.findAll('p'):
+        p['class'] = 'description'
+
+    return container.decode_contents()
+
+
+def markdown(val):
+    try:
+        md = markdown.Markdown
+    except AttributeError:
+        from calibre.ebooks.markdown import Markdown
+        md = markdown.Markdown = Markdown()
+    return md.convert(val)
+
+
+def merge_comments(one, two):
+    return comments_to_html(one) + '\n\n' + comments_to_html(two)
+
+
+def sanitize_comments_html(html):
+    from calibre.ebooks.markdown import Markdown
+    text = html2text(html)
+    md = Markdown()
+    html = md.convert(text)
+    return html
+
+
+def find_tests():
+    import unittest
+
+    class Test(unittest.TestCase):
+
+        def test_comments_to_html(self):
+            for pat, val in [
+                    (b'lineone\n\nlinetwo',
+                        '<p class="description">lineone</p>\n<p class="description">linetwo</p>'),
+
+                    ('a <b>b&c</b>\nf',
+                        '<p class="description">a <b>b&amp;c</b><br/>f</p>'),
+
+                    ('a <?xml asd> b\n\ncd',
+                        '<p class="description">a  b</p><p class="description">cd</p>'),
+            ]:
+                cval = comments_to_html(pat)
+                self.assertEqual(cval, val)
+
+    return unittest.defaultTestLoader.loadTestsFromTestCase(Test)
--- a/ebook_converter/library/field_metadata.py
+++ b/ebook_converter/library/field_metadata.py
@@ -0,0 +1,702 @@
+from __future__ import absolute_import, division, print_function, unicode_literals
+
+'''
+Created on 25 May 2010
+
+@author: charles
+'''
+
+import traceback
+from collections import OrderedDict
+
+from calibre.utils.config_base import tweaks
+from polyglot.builtins import iteritems, itervalues
+
+category_icon_map = {
+                    'authors'    : 'user_profile.png',
+                    'series'     : 'series.png',
+                    'formats'    : 'book.png',
+                    'publisher'  : 'publisher.png',
+                    'rating'     : 'rating.png',
+                    'news'       : 'news.png',
+                    'tags'       : 'tags.png',
+                    'custom:'    : 'column.png',
+                    'user:'      : 'tb_folder.png',
+                    'search'     : 'search.png',
+                    'identifiers': 'identifiers.png',
+                    'gst'        : 'catalog.png',
+                    'languages'  : 'languages.png',
+            }
+
+# Builtin metadata {{{
+
+
+def _builtin_field_metadata():
+    # This is a function so that changing the UI language allows newly created
+    # field metadata objects to have correctly translated labels for builtin
+    # fields.
+    return [
+            ('authors',   {'table':'authors',
+                           'column':'name',
+                           'link_column':'author',
+                           'category_sort':'sort',
+                           'datatype':'text',
+                           'is_multiple':{'cache_to_list': ',',
+                                          'ui_to_list': '&',
+                                          'list_to_ui': ' & '},
+                           'kind':'field',
+                           'name':_('Authors'),
+                           'search_terms':['authors', 'author'],
+                           'is_custom':False,
+                           'is_category':True,
+                           'is_csp': False}),
+            ('languages', {'table':'languages',
+                           'column':'lang_code',
+                           'link_column':'lang_code',
+                           'category_sort':'lang_code',
+                           'datatype':'text',
+                           'is_multiple':{'cache_to_list': ',',
+                                          'ui_to_list': ',',
+                                          'list_to_ui': ', '},
+                           'kind':'field',
+                           'name':_('Languages'),
+                           'search_terms':['languages', 'language'],
+                           'is_custom':False,
+                           'is_category':True,
+                           'is_csp': False}),
+
+            ('series',    {'table':'series',
+                           'column':'name',
+                           'link_column':'series',
+                           'category_sort':'(title_sort(name))',
+                           'datatype':'series',
+                           'is_multiple':{},
+                           'kind':'field',
+                           'name':ngettext('Series', 'Series', 1),
+                           'search_terms':['series'],
+                           'is_custom':False,
+                           'is_category':True,
+                           'is_csp': False}),
+            ('formats',   {'table':None,
+                           'column':None,
+                           'datatype':'text',
+                           'is_multiple':{'cache_to_list': ',',
+                                          'ui_to_list': ',',
+                                          'list_to_ui': ', '},
+                           'kind':'field',
+                           'name':_('Formats'),
+                           'search_terms':['formats', 'format'],
+                           'is_custom':False,
+                           'is_category':True,
+                           'is_csp': False}),
+            ('publisher', {'table':'publishers',
+                           'column':'name',
+                           'link_column':'publisher',
+                           'category_sort':'name',
+                           'datatype':'text',
+                           'is_multiple':{},
+                           'kind':'field',
+                           'name':_('Publisher'),
+                           'search_terms':['publisher'],
+                           'is_custom':False,
+                           'is_category':True,
+                           'is_csp': False}),
+            ('rating',    {'table':'ratings',
+                           'column':'rating',
+                           'link_column':'rating',
+                           'category_sort':'rating',
+                           'datatype':'rating',
+                           'is_multiple':{},
+                           'kind':'field',
+                           'name':_('Rating'),
+                           'search_terms':['rating'],
+                           'is_custom':False,
+                           'is_category':True,
+                           'is_csp': False}),
+            ('news',      {'table':'news',
+                           'column':'name',
+                           'category_sort':'name',
+                           'datatype':None,
+                           'is_multiple':{},
+                           'kind':'category',
+                           'name':_('News'),
+                           'search_terms':[],
+                           'is_custom':False,
+                           'is_category':True,
+                           'is_csp': False}),
+            ('tags',      {'table':'tags',
+                           'column':'name',
+                           'link_column': 'tag',
+                           'category_sort':'name',
+                           'datatype':'text',
+                           'is_multiple':{'cache_to_list': ',',
+                                          'ui_to_list': ',',
+                                          'list_to_ui': ', '},
+                           'kind':'field',
+                           'name':_('Tags'),
+                           'search_terms':['tags', 'tag'],
+                           'is_custom':False,
+                           'is_category':True,
+                           'is_csp': False}),
+            ('identifiers',   {'table':None,
+                           'column':None,
+                           'datatype':'text',
+                           'is_multiple':{'cache_to_list': ',',
+                                          'ui_to_list': ',',
+                                          'list_to_ui': ', '},
+                           'kind':'field',
+                           'name':_('Identifiers'),
+                           'search_terms':['identifiers', 'identifier', 'isbn'],
+                           'is_custom':False,
+                           'is_category':True,
+                           'is_csp': True}),
+            ('author_sort',{'table':None,
+                            'column':None,
+                            'datatype':'text',
+                           'is_multiple':{},
+                           'kind':'field',
+                           'name':_('Author sort'),
+                           'search_terms':['author_sort'],
+                           'is_custom':False,
+                           'is_category':False,
+                           'is_csp': False}),
+            ('au_map',    {'table':None,
+                           'column':None,
+                           'datatype':'text',
+                           'is_multiple':{'cache_to_list': ',',
+                                          'ui_to_list': None,
+                                          'list_to_ui': None},
+                           'kind':'field',
+                           'name':None,
+                           'search_terms':[],
+                           'is_custom':False,
+                           'is_category':False,
+                           'is_csp': False}),
+            ('comments',  {'table':None,
+                           'column':None,
+                           'datatype':'text',
+                           'is_multiple':{},
+                           'kind':'field',
+                           'name':_('Comments'),
+                           'search_terms':['comments', 'comment'],
+                           'is_custom':False,
+                           'is_category':False,
+                           'is_csp': False}),
+            ('cover',     {'table':None,
+                           'column':None,
+                           'datatype':'int',
+                           'is_multiple':{},
+                           'kind':'field',
+                           'name':_('Cover'),
+                           'search_terms':['cover'],
+                           'is_custom':False,
+                           'is_category':False,
+                           'is_csp': False}),
+            ('id',        {'table':None,
+                           'column':None,
+                           'datatype':'int',
+                           'is_multiple':{},
+                           'kind':'field',
+                           'name':None,
+                           'search_terms':['id'],
+                           'is_custom':False,
+                           'is_category':False,
+                           'is_csp': False}),
+            ('last_modified', {'table':None,
+                           'column':None,
+                           'datatype':'datetime',
+                           'is_multiple':{},
+                           'kind':'field',
+                           'name':_('Modified'),
+                           'search_terms':['last_modified'],
+                           'is_custom':False,
+                           'is_category':False,
+                           'is_csp': False}),
+            ('ondevice',  {'table':None,
+                           'column':None,
+                           'datatype':'text',
+                           'is_multiple':{},
+                           'kind':'field',
+                           'name':_('On device'),
+                           'search_terms':['ondevice'],
+                           'is_custom':False,
+                           'is_category':False,
+                           'is_csp': False}),
+            ('path',      {'table':None,
+                           'column':None,
+                           'datatype':'text',
+                           'is_multiple':{},
+                           'kind':'field',
+                           'name':_('Path'),
+                           'search_terms':[],
+                           'is_custom':False,
+                           'is_category':False,
+                           'is_csp': False}),
+            ('pubdate',   {'table':None,
+                           'column':None,
+                           'datatype':'datetime',
+                           'is_multiple':{},
+                           'kind':'field',
+                           'name':_('Published'),
+                           'search_terms':['pubdate'],
+                           'is_custom':False,
+                           'is_category':False,
+                           'is_csp': False}),
+            ('marked',    {'table':None,
+                           'column':None,
+                           'datatype':'text',
+                           'is_multiple':{},
+                           'kind':'field',
+                           'name': None,
+                           'search_terms':['marked'],
+                           'is_custom':False,
+                           'is_category':False,
+                           'is_csp': False}),
+            ('series_index',{'table':None,
+                             'column':None,
+                             'datatype':'float',
+                             'is_multiple':{},
+                             'kind':'field',
+                             'name':None,
+                             'search_terms':['series_index'],
+                             'is_custom':False,
+                             'is_category':False,
+                           'is_csp': False}),
+            ('series_sort',  {'table':None,
+                           'column':None,
+                           'datatype':'text',
+                           'is_multiple':{},
+                           'kind':'field',
+                           'name':_('Series sort'),
+                           'search_terms':['series_sort'],
+                           'is_custom':False,
+                           'is_category':False,
+                           'is_csp': False}),
+            ('sort',      {'table':None,
+                           'column':None,
+                           'datatype':'text',
+                           'is_multiple':{},
+                           'kind':'field',
+                           'name':_('Title sort'),
+                           'search_terms':['title_sort'],
+                           'is_custom':False,
+                           'is_category':False,
+                           'is_csp': False}),
+            ('size',      {'table':None,
+                           'column':None,
+                           'datatype':'float',
+                           'is_multiple':{},
+                           'kind':'field',
+                           'name':_('Size'),
+                           'search_terms':['size'],
+                           'is_custom':False,
+                           'is_category':False,
+                           'is_csp': False}),
+            ('timestamp', {'table':None,
+                           'column':None,
+                           'datatype':'datetime',
+                           'is_multiple':{},
+                           'kind':'field',
+                           'name':_('Date'),
+                           'search_terms':['date'],
+                           'is_custom':False,
+                           'is_category':False,
+                           'is_csp': False}),
+            ('title',     {'table':None,
+                           'column':None,
+                           'datatype':'text',
+                           'is_multiple':{},
+                           'kind':'field',
+                           'name':_('Title'),
+                           'search_terms':['title'],
+                           'is_custom':False,
+                           'is_category':False,
+                           'is_csp': False}),
+            ('uuid',      {'table':None,
+                           'column':None,
+                           'datatype':'text',
+                           'is_multiple':{},
+                           'kind':'field',
+                           'name':None,
+                           'search_terms':['uuid'],
+                           'is_custom':False,
+                           'is_category':False,
+                           'is_csp': False}),
+        ]
+# }}}
+
+
+class FieldMetadata(object):
+    '''
+    key: the key to the dictionary is:
+    - for standard fields, the metadata field name.
+    - for custom fields, the metadata field name prefixed by '#'
+    This is done to create two 'namespaces' so the names don't clash
+
+    label: the actual column label. No prefixing.
+
+    datatype: the type of information in the field. Valid values are listed in
+    VALID_DATA_TYPES below.
+    is_multiple: valid for the text datatype. If {}, the field is to be
+    treated as a single term. If not None, it contains a dict of the form
+            {'cache_to_list': ',',
+             'ui_to_list': ',',
+             'list_to_ui': ', '}
+    where the cache_to_list contains the character used to split the value in
+    the meta2 table, ui_to_list contains the character used to create a list
+    from a value shown in the ui (each resulting value must be strip()ed and
+    empty values removed), and list_to_ui contains the string used in join()
+    to create a displayable string from the list.
+
+    kind == field: is a db field.
+    kind == category: standard tag category that isn't a field. see news.
+    kind == user: user-defined tag category.
+    kind == search: saved-searches category.
+
+    is_category: is a tag browser category. If true, then:
+       table: name of the db table used to construct item list
+       column: name of the column in the normalized table to join on
+       link_column: name of the column in the connection table to join on. This
+                    key should not be present if there is no link table
+       category_sort: the field in the normalized table to sort on. This
+                      key must be present if is_category is True
+       If these are None, then the category constructor must know how
+       to build the item list (e.g., formats, news).
+       The order below is the order that the categories will
+       appear in the tags pane.
+
+    name: the text that is to be used when displaying the field. Column headings
+    in the GUI, etc.
+
+    search_terms: the terms that can be used to identify the field when
+    searching. They can be thought of as aliases for metadata keys, but are only
+    valid when passed to search().
+
+    is_custom: the field has been added by the user.
+
+    rec_index: the index of the field in the db metadata record.
+
+    is_csp: field contains colon-separated pairs. Must also be text, is_multiple
+
+    '''
+
+    VALID_DATA_TYPES = frozenset([None, 'rating', 'text', 'comments', 'datetime',
+                'int', 'float', 'bool', 'series', 'composite', 'enumeration'])
+
+    # search labels that are not db columns
+    search_items = ['all', 'search', 'vl']
+    __calibre_serializable__ = True
+
+    def __init__(self):
+        self._field_metadata = _builtin_field_metadata()
+        self._tb_cats = OrderedDict()
+        self._tb_custom_fields = {}
+        self._search_term_map = {}
+        self.custom_label_to_key_map = {}
+        for k,v in self._field_metadata:
+            if v['kind'] == 'field' and v['datatype'] not in self.VALID_DATA_TYPES:
+                raise ValueError('Unknown datatype %s for field %s'%(v['datatype'], k))
+            self._tb_cats[k] = v
+            self._tb_cats[k]['label'] = k
+            self._tb_cats[k]['display'] = {}
+            self._tb_cats[k]['is_editable'] = True
+            self._add_search_terms_to_map(k, v['search_terms'])
+        self._tb_cats['timestamp']['display'] = {
+                        'date_format': tweaks['gui_timestamp_display_format']}
+        self._tb_cats['pubdate']['display'] = {
+                        'date_format': tweaks['gui_pubdate_display_format']}
+        self._tb_cats['last_modified']['display'] = {
+                        'date_format': tweaks['gui_last_modified_display_format']}
+        self.custom_field_prefix = '#'
+        self.get = self._tb_cats.get
+
+    def __getitem__(self, key):
+        if key == 'title_sort':
+            return self._tb_cats['sort']
+        return self._tb_cats[key]
+
+    def __setitem__(self, key, val):
+        raise AttributeError('Assigning to this object is forbidden')
+
+    def __delitem__(self, key):
+        del self._tb_cats[key]
+
+    def __iter__(self):
+        for key in self._tb_cats:
+            yield key
+
+    def __contains__(self, key):
+        return key in self._tb_cats or key == 'title_sort'
+
+    def has_key(self, key):
+        return key in self
+
+    def keys(self):
+        return list(self._tb_cats.keys())
+
+    def __eq__(self, other):
+        if not isinstance(other, FieldMetadata):
+            return False
+        for attr in ('_tb_custom_fields', '_search_term_map', 'custom_label_to_key_map', 'custom_field_prefix'):
+            if getattr(self, attr) != getattr(other, attr):
+                return False
+        return dict(self._tb_cats) == dict(other._tb_cats)
+
+    def __ne__(self, other):
+        return not self.__eq__(other)
+
+    def sortable_field_keys(self):
+        return [k for k in self._tb_cats.keys()
+                if self._tb_cats[k]['kind']=='field' and
+                   self._tb_cats[k]['datatype'] is not None]
+
+    def ui_sortable_field_keys(self):
+        ans = {k:self._tb_cats[k]['name'] for k in set(self.sortable_field_keys()) - {
+            'sort', 'author_sort', 'au_map', 'series_sort', 'marked',
+            'series_index', 'path', 'formats', 'identifiers', 'uuid',
+            'comments',
+        } if self._tb_cats[k]['name']}
+        ans['cover'] = _('Has cover')
+        return ans
+
+    def displayable_field_keys(self):
+        return [k for k in self._tb_cats.keys()
+                if self._tb_cats[k]['kind']=='field' and
+                   self._tb_cats[k]['datatype'] is not None and
+                   k not in ('au_map', 'marked', 'ondevice', 'cover', 'series_sort') and
+                   not self.is_series_index(k)]
+
+    def standard_field_keys(self):
+        return [k for k in self._tb_cats.keys()
+                if self._tb_cats[k]['kind']=='field' and
+                   not self._tb_cats[k]['is_custom']]
+
+    def custom_field_keys(self, include_composites=True):
+        res = []
+        for k in self._tb_cats.keys():
+            fm = self._tb_cats[k]
+            if fm['kind']=='field' and fm['is_custom'] and \
+                   (fm['datatype'] != 'composite' or include_composites):
+                res.append(k)
+        return res
+
+    def all_field_keys(self):
+        return [k for k in self._tb_cats.keys() if self._tb_cats[k]['kind']=='field']
+
+    def iterkeys(self):
+        for key in self._tb_cats:
+            yield key
+
+    def itervalues(self):
+        return itervalues(self._tb_cats)
+
+    def values(self):
+        return list(self._tb_cats.values())
+
+    def iteritems(self):
+        for key in self._tb_cats:
+            yield (key, self._tb_cats[key])
+    iter_items = iteritems
+
+    def custom_iteritems(self):
+        for key, meta in iteritems(self._tb_custom_fields):
+            yield (key, meta)
+
+    def items(self):
+        return list(self.iter_items())
+
+    def is_custom_field(self, key):
+        return key.startswith(self.custom_field_prefix)
+
+    def is_ignorable_field(self, key):
+        'Custom fields and user categories are ignorable'
+        return self.is_custom_field(key) or key.startswith('@')
+
+    def ignorable_field_keys(self):
+        return [k for k in self._tb_cats if self.is_ignorable_field(k)]
+
+    def is_series_index(self, key):
+        try:
+            m = self._tb_cats[key]
+            return (m['datatype'] == 'float' and key.endswith('_index') and
+                    key[:-6] in self._tb_cats)
+        except (KeyError, ValueError, TypeError, AttributeError):
+            return False
+
+    def key_to_label(self, key):
+        if 'label' not in self._tb_cats[key]:
+            return key
+        return self._tb_cats[key]['label']
+
+    def label_to_key(self, label, prefer_custom=False):
+        if prefer_custom:
+            if label in self.custom_label_to_key_map:
+                return self.custom_label_to_key_map[label]
+        if 'label' in self._tb_cats:
+            return label
+        if not prefer_custom:
+            if label in self.custom_label_to_key_map:
+                return self.custom_label_to_key_map[label]
+        raise ValueError('Unknown key [%s]'%(label))
+
+    def all_metadata(self):
+        l = {}
+        for k in self._tb_cats:
+            l[k] = self._tb_cats[k]
+        return l
+
+    def custom_field_metadata(self, include_composites=True):
+        if include_composites:
+            return self._tb_custom_fields
+        l = {}
+        for k in self.custom_field_keys(include_composites):
+            l[k] = self._tb_cats[k]
+        return l
+
+    def add_custom_field(self, label, table, column, datatype, colnum, name,
+                         display, is_editable, is_multiple, is_category,
+                         is_csp=False):
+        key = self.custom_field_prefix + label
+        if key in self._tb_cats:
+            raise ValueError('Duplicate custom field [%s]'%(label))
+        if datatype not in self.VALID_DATA_TYPES:
+            raise ValueError('Unknown datatype %s for field %s'%(datatype, key))
+        self._tb_cats[key] = {'table':table,       'column':column,
+                             'datatype':datatype,  'is_multiple':is_multiple,
+                             'kind':'field',       'name':name,
+                             'search_terms':[key], 'label':label,
+                             'colnum':colnum,      'display':display,
+                             'is_custom':True,     'is_category':is_category,
+                             'link_column':'value','category_sort':'value',
+                             'is_csp' : is_csp,     'is_editable': is_editable,}
+        self._tb_custom_fields[key] = self._tb_cats[key]
+        self._add_search_terms_to_map(key, [key])
+        self.custom_label_to_key_map[label] = key
+        if datatype == 'series':
+            key += '_index'
+            self._tb_cats[key] = {'table':None,        'column':None,
+                                 'datatype':'float',   'is_multiple':{},
+                                 'kind':'field',       'name':'',
+                                 'search_terms':[key], 'label':label+'_index',
+                                 'colnum':None,        'display':{},
+                                 'is_custom':False,    'is_category':False,
+                                 'link_column':None,   'category_sort':None,
+                                 'is_editable': False, 'is_csp': False}
+            self._add_search_terms_to_map(key, [key])
+            self.custom_label_to_key_map[label+'_index'] = key
+
+    def remove_dynamic_categories(self):
+        for key in list(self._tb_cats.keys()):
+            val = self._tb_cats[key]
+            if val['is_category'] and val['kind'] in ('user', 'search'):
+                for k in self._tb_cats[key]['search_terms']:
+                    if k in self._search_term_map:
+                        del self._search_term_map[k]
+                del self._tb_cats[key]
+
+    def remove_user_categories(self):
+        for key in list(self._tb_cats.keys()):
+            val = self._tb_cats[key]
+            if val['is_category'] and val['kind']  == 'user':
+                for k in self._tb_cats[key]['search_terms']:
+                    if k in self._search_term_map:
+                        del self._search_term_map[k]
+                del self._tb_cats[key]
+
+    def _remove_grouped_search_terms(self):
+        to_remove = [v for v in self._search_term_map
+                        if isinstance(self._search_term_map[v], list)]
+        for v in to_remove:
+            del self._search_term_map[v]
+
+    def add_grouped_search_terms(self, gst):
+        self._remove_grouped_search_terms()
+        for t in gst:
+            try:
+                self._add_search_terms_to_map(gst[t], [t])
+            except ValueError:
+                traceback.print_exc()
+
+    def cc_series_index_column_for(self, key):
+        return self._tb_cats[key]['rec_index'] + 1
+
+    def add_user_category(self, label, name):
+        if label in self._tb_cats:
+            raise ValueError('Duplicate user field [%s]'%(label))
+        st = [label]
+        if icu_lower(label) != label:
+            st.append(icu_lower(label))
+        self._tb_cats[label] = {'table':None,          'column':None,
+                                'datatype':None,       'is_multiple':{},
+                                'kind':'user',         'name':name,
+                                'search_terms':st,     'is_custom':False,
+                                'is_category':True,    'is_csp': False}
+        self._add_search_terms_to_map(label, st)
+
+    def add_search_category(self, label, name, fail_on_existing=True):
+        if label in self._tb_cats:
+            if not fail_on_existing:
+                return
+            raise ValueError('Duplicate user field [%s]'%(label))
+        self._tb_cats[label] = {'table':None,        'column':None,
+                                'datatype':None,     'is_multiple':{},
+                                'kind':'search',     'name':name,
+                                'search_terms':[],   'is_custom':False,
+                                'is_category':True,  'is_csp': False}
+
+    def set_field_record_index(self, label, index, prefer_custom=False):
+        if prefer_custom:
+            key = self.custom_field_prefix+label
+            if key not in self._tb_cats:
+                key = label
+        else:
+            if label in self._tb_cats:
+                key = label
+            else:
+                key = self.custom_field_prefix+label
+        self._tb_cats[key]['rec_index'] = index  # let the exception fly ...
+
+    def get_search_terms(self):
+        s_keys = sorted(self._search_term_map.keys())
+        for v in self.search_items:
+            s_keys.append(v)
+        return s_keys
+
+    def _add_search_terms_to_map(self, key, terms):
+        if terms is not None:
+            for t in terms:
+                if t in self._search_term_map:
+                    raise ValueError('Attempt to add duplicate search term "%s"'%t)
+                self._search_term_map[t] = key
+
+    def search_term_to_field_key(self, term):
+        return self._search_term_map.get(term, term)
+
+    def searchable_fields(self):
+        return [k for k in self._tb_cats.keys()
+                if self._tb_cats[k]['kind']=='field' and
+                   len(self._tb_cats[k]['search_terms']) > 0]
+
+
+# The following two methods are to support serialization
+# Note that they do not create copies of internal structures, for performance,
+# so they are not safe to use for anything else
+def fm_as_dict(self):
+    return {
+        'custom_fields': self._tb_custom_fields,
+        'search_term_map': self._search_term_map,
+        'custom_label_to_key_map': self.custom_label_to_key_map,
+        'user_categories': {k:v for k, v in iteritems(self._tb_cats) if v['kind'] == 'user'},
+        'search_categories': {k:v for k, v in iteritems(self._tb_cats) if v['kind'] == 'search'},
+    }
+
+
+def fm_from_dict(src):
+    ans = FieldMetadata()
+    ans._tb_custom_fields = src['custom_fields']
+    ans._search_term_map = src['search_term_map']
+    ans.custom_label_to_key_map = src['custom_label_to_key_map']
+    for q in ('custom_fields', 'user_categories', 'search_categories'):
+        for k, v in iteritems(src[q]):
+            ans._tb_cats[k] = v
+    return ans