Initial import

2026-04-05 12:23:34 +02:00 · 2020-03-31 17:15:23 +02:00
commit d97ea9b0bc
311 changed files with 131419 additions and 0 deletions
--- a/ebook_converter/ebooks/pdb/init.py
+++ b/ebook_converter/ebooks/pdb/init.py
@@ -0,0 +1,106 @@
+# -*- coding: utf-8 -*-
+from __future__ import absolute_import, division, print_function, unicode_literals
+
+__license__   = 'GPL v3'
+__copyright__ = '2009, John Schember <john@nachtimwald.com>'
+__docformat__ = 'restructuredtext en'
+
+
+class PDBError(Exception):
+    pass
+
+
+FORMAT_READERS = None
+
+
+def _import_readers():
+    global FORMAT_READERS
+    from calibre.ebooks.pdb.ereader.reader import Reader as ereader_reader
+    from calibre.ebooks.pdb.palmdoc.reader import Reader as palmdoc_reader
+    from calibre.ebooks.pdb.ztxt.reader import Reader as ztxt_reader
+    from calibre.ebooks.pdb.pdf.reader import Reader as pdf_reader
+    from calibre.ebooks.pdb.plucker.reader import Reader as plucker_reader
+    from calibre.ebooks.pdb.haodoo.reader import Reader as haodoo_reader
+
+    FORMAT_READERS = {
+        'PNPdPPrs': ereader_reader,
+        'PNRdPPrs': ereader_reader,
+        'zTXTGPlm': ztxt_reader,
+        'TEXtREAd': palmdoc_reader,
+        '.pdfADBE': pdf_reader,
+        'DataPlkr': plucker_reader,
+        'BOOKMTIT': haodoo_reader,
+        'BOOKMTIU': haodoo_reader,
+    }
+
+
+ALL_FORMAT_WRITERS = {'doc', 'ztxt', 'ereader'}
+FORMAT_WRITERS = None
+
+
+def _import_writers():
+    global FORMAT_WRITERS
+    from calibre.ebooks.pdb.palmdoc.writer import Writer as palmdoc_writer
+    from calibre.ebooks.pdb.ztxt.writer import Writer as ztxt_writer
+    from calibre.ebooks.pdb.ereader.writer import Writer as ereader_writer
+
+    FORMAT_WRITERS = {
+        'doc': palmdoc_writer,
+        'ztxt': ztxt_writer,
+        'ereader': ereader_writer,
+    }
+
+
+IDENTITY_TO_NAME = {
+    'PNPdPPrs': 'eReader',
+    'PNRdPPrs': 'eReader',
+    'zTXTGPlm': 'zTXT',
+    'TEXtREAd': 'PalmDOC',
+    '.pdfADBE': 'Adobe Reader',
+    'DataPlkr': 'Plucker',
+    'BOOKMTIT': 'Haodoo.net',
+    'BOOKMTIU': 'Haodoo.net',
+
+    'BVokBDIC': 'BDicty',
+    'DB99DBOS': 'DB (Database program)',
+    'vIMGView': 'FireViewer (ImageViewer)',
+    'PmDBPmDB': 'HanDBase',
+    'InfoINDB': 'InfoView',
+    'ToGoToGo': 'iSilo',
+    'SDocSilX': 'iSilo 3',
+    'JbDbJBas': 'JFile',
+    'JfDbJFil': 'JFile Pro',
+    'DATALSdb': 'LIST',
+    'Mdb1Mdb1': 'MobileDB',
+    'BOOKMOBI': 'MobiPocket',
+    'DataSprd': 'QuickSheet',
+    'SM01SMem': 'SuperMemo',
+    'TEXtTlDc': 'TealDoc',
+    'InfoTlIf': 'TealInfo',
+    'DataTlMl': 'TealMeal',
+    'DataTlPt': 'TealPaint',
+    'dataTDBP': 'ThinkDB',
+    'TdatTide': 'Tides',
+    'ToRaTRPW': 'TomeRaider',
+    'BDOCWrdS': 'WordSmith',
+}
+
+
+def get_reader(identity):
+    '''
+    Returns None if no reader is found for the identity.
+    '''
+    global FORMAT_READERS
+    if FORMAT_READERS is None:
+        _import_readers()
+    return FORMAT_READERS.get(identity, None)
+
+
+def get_writer(extension):
+    '''
+    Returns None if no writer is found for extension.
+    '''
+    global FORMAT_WRITERS
+    if FORMAT_WRITERS is None:
+        _import_writers()
+    return FORMAT_WRITERS.get(extension, None)
--- a/ebook_converter/ebooks/pdb/ereader/init.py
+++ b/ebook_converter/ebooks/pdb/ereader/init.py
@@ -0,0 +1,30 @@
+# -*- coding: utf-8 -*-
+from __future__ import absolute_import, division, print_function, unicode_literals
+
+__license__   = 'GPL v3'
+__copyright__ = '2009, John Schember <john@nachtimwald.com>'
+__docformat__ = 'restructuredtext en'
+
+import os
+
+
+class EreaderError(Exception):
+    pass
+
+
+def image_name(name, taken_names=()):
+    name = os.path.basename(name)
+
+    if len(name) > 32:
+        cut = len(name) - 32
+        names = name[:10]
+        namee = name[10+cut:]
+        name = '%s%s.png' % (names, namee)
+
+    i = 0
+    base_name, ext = os.path.splitext(name)
+    while name in taken_names:
+        i += 1
+        name = '%s%s%s' % (base_name, i, ext)
+
+    return name.ljust(32, '\x00')[:32]
--- a/ebook_converter/ebooks/pdb/ereader/reader.py
+++ b/ebook_converter/ebooks/pdb/ereader/reader.py
@@ -0,0 +1,37 @@
+# -*- coding: utf-8 -*-
+from __future__ import absolute_import, division, print_function, unicode_literals
+
+'''
+Read content from ereader pdb file.
+'''
+
+__license__   = 'GPL v3'
+__copyright__ = '2009, John Schember <john@nachtimwald.com>'
+__docformat__ = 'restructuredtext en'
+
+from calibre.ebooks.pdb.ereader import EreaderError
+from calibre.ebooks.pdb.formatreader import FormatReader
+from calibre.ebooks.pdb.ereader.reader132 import Reader132
+from calibre.ebooks.pdb.ereader.reader202 import Reader202
+
+
+class Reader(FormatReader):
+
+    def __init__(self, header, stream, log, options):
+        record0_size = len(header.section_data(0))
+
+        if record0_size == 132:
+            self.reader = Reader132(header, stream, log, options)
+        elif record0_size in (116, 202):
+            self.reader = Reader202(header, stream, log, options)
+        else:
+            raise EreaderError('Size mismatch. eReader header record size %s KB is not supported.' % record0_size)
+
+    def extract_content(self, output_dir):
+        return self.reader.extract_content(output_dir)
+
+    def dump_pml(self):
+        return self.reader.dump_pml()
+
+    def dump_images(self, out_dir):
+        return self.reader.dump_images(out_dir)
--- a/ebook_converter/ebooks/pdb/ereader/reader132.py
+++ b/ebook_converter/ebooks/pdb/ereader/reader132.py
@@ -0,0 +1,221 @@
+# -*- coding: utf-8 -*-
+from __future__ import absolute_import, division, print_function, unicode_literals
+
+'''
+Read content from ereader pdb file with a 132 byte header created by Dropbook.
+'''
+
+__license__   = 'GPL v3'
+__copyright__ = '2009, John Schember <john@nachtimwald.com>'
+__docformat__ = 'restructuredtext en'
+
+import os
+import re
+import struct
+import zlib
+
+from calibre import CurrentDir
+from calibre.ebooks import DRMError
+from calibre.ebooks.metadata.opf2 import OPFCreator
+from calibre.ebooks.pdb.ereader import EreaderError
+from calibre.ebooks.pdb.formatreader import FormatReader
+from polyglot.builtins import unicode_type, range
+
+
+class HeaderRecord(object):
+    '''
+    The first record in the file is always the header record. It holds
+    information related to the location of text, images, and so on
+    in the file. This is used in conjunction with the sections
+    defined in the file header.
+    '''
+
+    def __init__(self, raw):
+        self.compression, = struct.unpack('>H', raw[0:2])
+        self.non_text_offset, = struct.unpack('>H', raw[12:14])
+        self.chapter_count, = struct.unpack('>H', raw[14:16])
+        self.image_count, = struct.unpack('>H', raw[20:22])
+        self.link_count, = struct.unpack('>H', raw[22:24])
+        self.has_metadata, = struct.unpack('>H', raw[24:26])
+        self.footnote_count, = struct.unpack('>H', raw[28:30])
+        self.sidebar_count, = struct.unpack('>H', raw[30:32])
+        self.chapter_offset, = struct.unpack('>H', raw[32:34])
+        self.small_font_page_offset, = struct.unpack('>H', raw[36:38])
+        self.large_font_page_offset, = struct.unpack('>H', raw[38:40])
+        self.image_data_offset, = struct.unpack('>H', raw[40:42])
+        self.link_offset, = struct.unpack('>H', raw[42:44])
+        self.metadata_offset, = struct.unpack('>H', raw[44:46])
+        self.footnote_offset, = struct.unpack('>H', raw[48:50])
+        self.sidebar_offset, = struct.unpack('>H', raw[50:52])
+        self.last_data_offset, = struct.unpack('>H', raw[52:54])
+
+        self.num_text_pages = self.non_text_offset - 1
+        self.num_image_pages = self.metadata_offset - self.image_data_offset
+
+
+class Reader132(FormatReader):
+
+    def __init__(self, header, stream, log, options):
+        self.log = log
+        self.encoding = options.input_encoding
+
+        self.log.debug('132 byte header version found.')
+
+        self.sections = []
+        for i in range(header.num_sections):
+            self.sections.append(header.section_data(i))
+
+        self.header_record = HeaderRecord(self.section_data(0))
+
+        if self.header_record.compression not in (2, 10):
+            if self.header_record.compression in (260, 272):
+                raise DRMError('eReader DRM is not supported.')
+            else:
+                raise EreaderError('Unknown book compression %i.' % self.header_record.compression)
+
+        from calibre.ebooks.metadata.pdb import get_metadata
+        self.mi = get_metadata(stream, False)
+
+    def section_data(self, number):
+        return self.sections[number]
+
+    def decompress_text(self, number):
+        if self.header_record.compression == 2:
+            from calibre.ebooks.compression.palmdoc import decompress_doc
+            return decompress_doc(self.section_data(number)).decode('cp1252' if self.encoding is None else self.encoding, 'replace')
+        if self.header_record.compression == 10:
+            return zlib.decompress(self.section_data(number)).decode('cp1252' if self.encoding is None else self.encoding, 'replace')
+
+    def get_image(self, number):
+        if number < self.header_record.image_data_offset or number > self.header_record.image_data_offset + self.header_record.num_image_pages - 1:
+            return 'empty', b''
+        data = self.section_data(number)
+        name = data[4:4 + 32].strip(b'\x00').decode(self.encoding or 'cp1252')
+        img = data[62:]
+        return name, img
+
+    def get_text_page(self, number):
+        '''
+        Only palmdoc and zlib compressed are supported. The text is
+        assumed to be encoded as Windows-1252. The encoding is part of
+        the eReader file spec and should always be this encoding.
+        '''
+        if not (1 <= number <= self.header_record.num_text_pages):
+            return ''
+
+        return self.decompress_text(number)
+
+    def extract_content(self, output_dir):
+        from calibre.ebooks.pml.pmlconverter import footnote_to_html, sidebar_to_html
+        from calibre.ebooks.pml.pmlconverter import PML_HTMLizer
+
+        output_dir = os.path.abspath(output_dir)
+
+        if not os.path.exists(output_dir):
+            os.makedirs(output_dir)
+
+        title = self.mi.title
+        if not isinstance(title, unicode_type):
+            title = title.decode('utf-8', 'replace')
+        html = '<html><head><title>%s</title></head><body>' % title
+
+        pml = ''
+        for i in range(1, self.header_record.num_text_pages + 1):
+            self.log.debug('Extracting text page %i' % i)
+            pml += self.get_text_page(i)
+        hizer = PML_HTMLizer()
+        html += hizer.parse_pml(pml, 'index.html')
+        toc = hizer.get_toc()
+
+        if self.header_record.footnote_count > 0:
+            html += '<br /><h1>%s</h1>' % _('Footnotes')
+            footnoteids = re.findall(
+                '\\w+(?=\x00)', self.section_data(self.header_record.footnote_offset).decode('cp1252' if self.encoding is None else self.encoding))
+            for fid, i in enumerate(range(self.header_record.footnote_offset + 1, self.header_record.footnote_offset + self.header_record.footnote_count)):
+                self.log.debug('Extracting footnote page %i' % i)
+                if fid < len(footnoteids):
+                    fid = footnoteids[fid]
+                else:
+                    fid = ''
+                html += footnote_to_html(fid, self.decompress_text(i))
+
+        if self.header_record.sidebar_count > 0:
+            html += '<br /><h1>%s</h1>' % _('Sidebar')
+            sidebarids = re.findall(
+                '\\w+(?=\x00)', self.section_data(self.header_record.sidebar_offset).decode('cp1252' if self.encoding is None else self.encoding))
+            for sid, i in enumerate(range(self.header_record.sidebar_offset + 1, self.header_record.sidebar_offset + self.header_record.sidebar_count)):
+                self.log.debug('Extracting sidebar page %i' % i)
+                if sid < len(sidebarids):
+                    sid = sidebarids[sid]
+                else:
+                    sid = ''
+                html += sidebar_to_html(sid, self.decompress_text(i))
+
+        html += '</body></html>'
+
+        with CurrentDir(output_dir):
+            with open('index.html', 'wb') as index:
+                self.log.debug('Writing text to index.html')
+                index.write(html.encode('utf-8'))
+
+        if not os.path.exists(os.path.join(output_dir, 'images/')):
+            os.makedirs(os.path.join(output_dir, 'images/'))
+        images = []
+        with CurrentDir(os.path.join(output_dir, 'images/')):
+            for i in range(0, self.header_record.num_image_pages):
+                name, img = self.get_image(self.header_record.image_data_offset + i)
+                images.append(name)
+                with open(name, 'wb') as imgf:
+                    self.log.debug('Writing image %s to images/' % name)
+                    imgf.write(img)
+
+        opf_path = self.create_opf(output_dir, images, toc)
+
+        return opf_path
+
+    def create_opf(self, output_dir, images, toc):
+        with CurrentDir(output_dir):
+            if 'cover.png' in images:
+                self.mi.cover = os.path.join('images', 'cover.png')
+
+            opf = OPFCreator(output_dir, self.mi)
+
+            manifest = [('index.html', None)]
+
+            for i in images:
+                manifest.append((os.path.join('images', i), None))
+
+            opf.create_manifest(manifest)
+            opf.create_spine(['index.html'])
+            opf.set_toc(toc)
+            with open('metadata.opf', 'wb') as opffile:
+                with open('toc.ncx', 'wb') as tocfile:
+                    opf.render(opffile, tocfile, 'toc.ncx')
+
+        return os.path.join(output_dir, 'metadata.opf')
+
+    def dump_pml(self):
+        '''
+        This is primarily used for debugging and 3rd party tools to
+        get the plm markup that comprises the text in the file.
+        '''
+        pml = ''
+
+        for i in range(1, self.header_record.num_text_pages + 1):
+            pml += self.get_text_page(i)
+
+        return pml
+
+    def dump_images(self, output_dir):
+        '''
+        This is primarily used for debugging and 3rd party tools to
+        get the images in the file.
+        '''
+        if not os.path.exists(output_dir):
+            os.makedirs(output_dir)
+
+        with CurrentDir(output_dir):
+            for i in range(0, self.header_record.num_image_pages):
+                name, img = self.get_image(self.header_record.image_data_offset + i)
+                with open(name, 'wb') as imgf:
+                    imgf.write(img)
--- a/ebook_converter/ebooks/pdb/ereader/reader202.py
+++ b/ebook_converter/ebooks/pdb/ereader/reader202.py
@@ -0,0 +1,169 @@
+# -*- coding: utf-8 -*-
+
+from __future__ import absolute_import, division, print_function, unicode_literals
+
+'''
+Read content from ereader pdb file with a 116 and 202 byte header created by Makebook.
+'''
+__license__   = 'GPL v3'
+__copyright__ = '2009, John Schember <john@nachtimwald.com>'
+__docformat__ = 'restructuredtext en'
+
+import os
+import struct
+
+from calibre import CurrentDir
+from calibre.ebooks.metadata.opf2 import OPFCreator
+from calibre.ebooks.pdb.formatreader import FormatReader
+from calibre.ebooks.pdb.ereader import EreaderError
+from polyglot.builtins import unicode_type, range
+
+
+class HeaderRecord(object):
+    '''
+    The first record in the file is always the header record. It holds
+    information related to the location of text, images, and so on
+    in the file. This is used in conjunction with the sections
+    defined in the file header.
+    '''
+
+    def __init__(self, raw):
+        self.version, = struct.unpack('>H', raw[0:2])
+        self.non_text_offset, = struct.unpack('>H', raw[8:10])
+
+        self.num_text_pages = self.non_text_offset - 1
+
+
+class Reader202(FormatReader):
+
+    def __init__(self, header, stream, log, options):
+        self.log = log
+        self.encoding = options.input_encoding
+
+        self.log.debug('202 byte header version found.')
+
+        self.sections = []
+        for i in range(header.num_sections):
+            self.sections.append(header.section_data(i))
+
+        self.header_record = HeaderRecord(self.section_data(0))
+
+        if self.header_record.version not in (2, 4):
+            raise EreaderError('Unknown book version %i.' % self.header_record.version)
+
+        from calibre.ebooks.metadata.pdb import get_metadata
+        self.mi = get_metadata(stream, False)
+
+    def section_data(self, number):
+        return self.sections[number]
+
+    def decompress_text(self, number):
+        from calibre.ebooks.compression.palmdoc import decompress_doc
+        data = bytearray(self.section_data(number))
+        data = bytes(bytearray(x ^ 0xA5 for x in data))
+        return decompress_doc(data).decode(self.encoding or 'cp1252', 'replace')
+
+    def get_image(self, number):
+        name = None
+        img = None
+
+        data = self.section_data(number)
+        if data.startswith(b'PNG'):
+            name = data[4:4 + 32].strip(b'\x00')
+            img = data[62:]
+
+        return name, img
+
+    def get_text_page(self, number):
+        '''
+        Only palmdoc compression is supported. The text is xored with 0xA5 and
+        assumed to be encoded as Windows-1252. The encoding is part of
+        the eReader file spec and should always be this encoding.
+        '''
+        if not (1 <= number <= self.header_record.num_text_pages):
+            return ''
+
+        return self.decompress_text(number)
+
+    def extract_content(self, output_dir):
+        from calibre.ebooks.pml.pmlconverter import pml_to_html
+
+        output_dir = os.path.abspath(output_dir)
+
+        if not os.path.exists(output_dir):
+            os.makedirs(output_dir)
+
+        pml = ''
+        for i in range(1, self.header_record.num_text_pages + 1):
+            self.log.debug('Extracting text page %i' % i)
+            pml += self.get_text_page(i)
+
+        title = self.mi.title
+        if not isinstance(title, unicode_type):
+            title = title.decode('utf-8', 'replace')
+
+        html = '<html><head><title>%s</title></head><body>%s</body></html>' % \
+            (title, pml_to_html(pml))
+
+        with CurrentDir(output_dir):
+            with open('index.html', 'wb') as index:
+                self.log.debug('Writing text to index.html')
+                index.write(html.encode('utf-8'))
+
+        if not os.path.exists(os.path.join(output_dir, 'images/')):
+            os.makedirs(os.path.join(output_dir, 'images/'))
+        images = []
+        with CurrentDir(os.path.join(output_dir, 'images/')):
+            for i in range(self.header_record.non_text_offset, len(self.sections)):
+                name, img = self.get_image(i)
+                if name:
+                    images.append(name)
+                    with open(name, 'wb') as imgf:
+                        self.log.debug('Writing image %s to images/' % name)
+                        imgf.write(img)
+
+        opf_path = self.create_opf(output_dir, images)
+
+        return opf_path
+
+    def create_opf(self, output_dir, images):
+        with CurrentDir(output_dir):
+            opf = OPFCreator(output_dir, self.mi)
+
+            manifest = [('index.html', None)]
+
+            for i in images:
+                manifest.append((os.path.join('images/', i), None))
+
+            opf.create_manifest(manifest)
+            opf.create_spine(['index.html'])
+            with open('metadata.opf', 'wb') as opffile:
+                opf.render(opffile)
+
+        return os.path.join(output_dir, 'metadata.opf')
+
+    def dump_pml(self):
+        '''
+        This is primarily used for debugging and 3rd party tools to
+        get the plm markup that comprises the text in the file.
+        '''
+        pml = ''
+
+        for i in range(1, self.header_record.num_text_pages + 1):
+            pml += self.get_text_page(i)
+
+        return pml
+
+    def dump_images(self, output_dir):
+        '''
+        This is primarily used for debugging and 3rd party tools to
+        get the images in the file.
+        '''
+        if not os.path.exists(output_dir):
+            os.makedirs(output_dir)
+
+        with CurrentDir(output_dir):
+            for i in range(0, self.header_record.num_image_pages):
+                name, img = self.get_image(self.header_record.image_data_offset + i)
+                with open(name, 'wb') as imgf:
+                    imgf.write(img)
--- a/ebook_converter/ebooks/pdb/formatreader.py
+++ b/ebook_converter/ebooks/pdb/formatreader.py
@@ -0,0 +1,19 @@
+# -*- coding: utf-8 -*-
+from __future__ import absolute_import, division, print_function, unicode_literals
+
+'''
+Interface defining the necessary public functions for a pdb format reader.
+'''
+
+__license__   = 'GPL v3'
+__copyright__ = '2009, John Schember <john@nachtimwald.com>'
+__docformat__ = 'restructuredtext en'
+
+
+class FormatReader(object):
+
+    def __init__(self, header, stream, log, options):
+        raise NotImplementedError()
+
+    def extract_content(self, output_dir):
+        raise NotImplementedError()
--- a/ebook_converter/ebooks/pdb/haodoo/init.py
+++ b/ebook_converter/ebooks/pdb/haodoo/init.py
@@ -0,0 +1,10 @@
+#!/usr/bin/env python2
+# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
+from __future__ import absolute_import, division, print_function, unicode_literals
+
+__license__   = 'GPL v3'
+__copyright__ = '2012, Kovid Goyal <kovid@kovidgoyal.net>'
+__docformat__ = 'restructuredtext en'
+
+
+
--- a/ebook_converter/ebooks/pdb/haodoo/reader.py
+++ b/ebook_converter/ebooks/pdb/haodoo/reader.py
@@ -0,0 +1,157 @@
+# -*- coding: utf-8 -*-
+from __future__ import absolute_import, division, print_function, unicode_literals
+
+'''
+Read content from Haodoo.net pdb file.
+'''
+
+__license__   = 'GPL v3'
+__copyright__ = '2012, Kan-Ru Chen <kanru@kanru.info>'
+__docformat__ = 'restructuredtext en'
+
+
+import struct
+import os
+
+from calibre import prepare_string_for_xml
+from calibre.ebooks.pdb.formatreader import FormatReader
+from calibre.ebooks.metadata import MetaInformation
+from calibre.ebooks.txt.processor import opf_writer, HTML_TEMPLATE
+from polyglot.builtins import range, map
+
+BPDB_IDENT = b'BOOKMTIT'
+UPDB_IDENT = b'BOOKMTIU'
+
+punct_table = {
+    u"︵": u"（",
+    u"︶": u"）",
+    u"︷": u"｛",
+    u"︸": u"｝",
+    u"︹": u"〔",
+    u"︺": u"〕",
+    u"︻": u"【",
+    u"︼": u"】",
+    u"︗": u"〖",
+    u"︘": u"〗",
+    u"﹇": u"［］",
+    u"﹈": u"［］",
+    u"︽": u"《",
+    u"︾": u"》",
+    u"︿": u"〈",
+    u"﹀": u"〉",
+    u"﹁": u"「",
+    u"﹂": u"」",
+    u"﹃": u"『",
+    u"﹄": u"』",
+    u"｜": u"—",
+    u"︙": u"…",
+    u"ⸯ": u"～",
+    u"│": u"…",
+    u"￤": u"…",
+    u"　": u"  ",
+    }
+
+
+def fix_punct(line):
+    for (key, value) in punct_table.items():
+        line = line.replace(key, value)
+    return line
+
+
+class LegacyHeaderRecord(object):
+
+    def __init__(self, raw):
+        fields = raw.lstrip().replace(b'\x1b\x1b\x1b', b'\x1b').split(b'\x1b')
+        self.title = fix_punct(fields[0].decode('cp950', 'replace'))
+        self.num_records = int(fields[1])
+        self.chapter_titles = list(map(
+            lambda x: fix_punct(x.decode('cp950', 'replace').rstrip('\x00')),
+            fields[2:]))
+
+
+class UnicodeHeaderRecord(object):
+
+    def __init__(self, raw):
+        fields = raw.lstrip().replace(b'\x1b\x00\x1b\x00\x1b\x00',
+                b'\x1b\x00').split(b'\x1b\x00')
+        self.title = fix_punct(fields[0].decode('utf_16_le', 'ignore'))
+        self.num_records = int(fields[1])
+        self.chapter_titles = list(map(
+            lambda x: fix_punct(x.decode('utf_16_le', 'replace').rstrip('\x00')),
+            fields[2].split(b'\r\x00\n\x00')))
+
+
+class Reader(FormatReader):
+
+    def __init__(self, header, stream, log, options):
+        self.stream = stream
+        self.log = log
+
+        self.sections = []
+        for i in range(header.num_sections):
+            self.sections.append(header.section_data(i))
+
+        if header.ident == BPDB_IDENT:
+            self.header_record = LegacyHeaderRecord(self.section_data(0))
+            self.encoding = 'cp950'
+        else:
+            self.header_record = UnicodeHeaderRecord(self.section_data(0))
+            self.encoding = 'utf_16_le'
+
+    def author(self):
+        self.stream.seek(35)
+        version = struct.unpack('>b', self.stream.read(1))[0]
+        if version == 2:
+            self.stream.seek(0)
+            author = self.stream.read(35).rstrip(b'\x00').decode(self.encoding, 'replace')
+            return author
+        else:
+            return 'Unknown'
+
+    def get_metadata(self):
+        mi = MetaInformation(self.header_record.title,
+                             [self.author()])
+        mi.language = 'zh-tw'
+
+        return mi
+
+    def section_data(self, number):
+        return self.sections[number]
+
+    def decompress_text(self, number):
+        return self.section_data(number).decode(self.encoding,
+                'replace').rstrip('\x00')
+
+    def extract_content(self, output_dir):
+        txt = ''
+
+        self.log.info(u'Decompressing text...')
+        for i in range(1, self.header_record.num_records + 1):
+            self.log.debug(u'\tDecompressing text section %i' % i)
+            title = self.header_record.chapter_titles[i-1]
+            lines = []
+            title_added = False
+            for line in self.decompress_text(i).splitlines():
+                line = fix_punct(line)
+                line = line.strip()
+                if not title_added and title in line:
+                    line = '<h1 class="chapter">' + line + '</h1>\n'
+                    title_added = True
+                else:
+                    line = prepare_string_for_xml(line)
+                lines.append('<p>%s</p>' % line)
+            if not title_added:
+                lines.insert(0, '<h1 class="chapter">' + title + '</h1>\n')
+            txt += '\n'.join(lines)
+
+        self.log.info(u'Converting text to OEB...')
+        html = HTML_TEMPLATE % (self.header_record.title, txt)
+        with open(os.path.join(output_dir, 'index.html'), 'wb') as index:
+            index.write(html.encode('utf-8'))
+
+        mi = self.get_metadata()
+        manifest = [('index.html', None)]
+        spine = ['index.html']
+        opf_writer(output_dir, 'metadata.opf', manifest, spine, mi)
+
+        return os.path.join(output_dir, 'metadata.opf')
--- a/ebook_converter/ebooks/pdb/header.py
+++ b/ebook_converter/ebooks/pdb/header.py
@@ -0,0 +1,91 @@
+# -*- coding: utf-8 -*-
+
+from __future__ import absolute_import, division, print_function, unicode_literals
+
+'''
+Read the header data from a pdb file.
+'''
+
+__license__   = 'GPL v3'
+__copyright__ = '2009, John Schember <john@nachtimwald.com>'
+__docformat__ = 'restructuredtext en'
+
+import re
+import struct
+import time
+from polyglot.builtins import long_type
+
+
+class PdbHeaderReader(object):
+
+    def __init__(self, stream):
+        self.stream = stream
+        self.ident = self.identity()
+        self.num_sections = self.section_count()
+        self.title = self.name()
+
+    def identity(self):
+        self.stream.seek(60)
+        ident = self.stream.read(8)
+        return ident.decode('utf-8')
+
+    def section_count(self):
+        self.stream.seek(76)
+        return struct.unpack('>H', self.stream.read(2))[0]
+
+    def name(self):
+        self.stream.seek(0)
+        return re.sub(b'[^-A-Za-z0-9 ]+', b'_', self.stream.read(32).replace(b'\x00', b''))
+
+    def full_section_info(self, number):
+        if not (0 <= number < self.num_sections):
+            raise ValueError('Not a valid section number %i' % number)
+
+        self.stream.seek(78 + number * 8)
+        offset, a1, a2, a3, a4 = struct.unpack('>LBBBB', self.stream.read(8))[0]
+        flags, val = a1, a2 << 16 | a3 << 8 | a4
+        return (offset, flags, val)
+
+    def section_offset(self, number):
+        if not (0 <= number < self.num_sections):
+            raise ValueError('Not a valid section number %i' % number)
+
+        self.stream.seek(78 + number * 8)
+        return struct.unpack('>LBBBB', self.stream.read(8))[0]
+
+    def section_data(self, number):
+        if not (0 <= number < self.num_sections):
+            raise ValueError('Not a valid section number %i' % number)
+
+        start = self.section_offset(number)
+        if number == self.num_sections -1:
+            self.stream.seek(0, 2)
+            end = self.stream.tell()
+        else:
+            end = self.section_offset(number + 1)
+        self.stream.seek(start)
+        return self.stream.read(end - start)
+
+
+class PdbHeaderBuilder(object):
+
+    def __init__(self, identity, title):
+        self.identity = identity.ljust(3, '\x00')[:8].encode('utf-8')
+        self.title = b'%s\x00' % re.sub('[^-A-Za-z0-9 ]+', '_', title).ljust(31, '\x00')[:31].encode('ascii', 'replace')
+
+    def build_header(self, section_lengths, out_stream):
+        '''
+        section_lengths = Lenght of each section in file.
+        '''
+
+        now = int(time.time())
+        nrecords = len(section_lengths)
+
+        out_stream.write(self.title + struct.pack('>HHIIIIII', 0, 0, now, now, 0, 0, 0, 0))
+        out_stream.write(self.identity + struct.pack('>IIH', nrecords, 0, nrecords))
+
+        offset = 78 + (8 * nrecords) + 2
+        for id, record in enumerate(section_lengths):
+            out_stream.write(struct.pack('>LBBBB', long_type(offset), 0, 0, 0, 0))
+            offset += record
+        out_stream.write(b'\x00\x00')
--- a/ebook_converter/ebooks/pdb/palmdoc/init.py
+++ b/ebook_converter/ebooks/pdb/palmdoc/init.py
--- a/ebook_converter/ebooks/pdb/palmdoc/reader.py
+++ b/ebook_converter/ebooks/pdb/palmdoc/reader.py
@@ -0,0 +1,74 @@
+# -*- coding: utf-8 -*-
+from __future__ import absolute_import, division, print_function, unicode_literals
+
+'''
+Read content from palmdoc pdb file.
+'''
+
+__license__   = 'GPL v3'
+__copyright__ = '2009, John Schember <john@nachtimwald.com>'
+__docformat__ = 'restructuredtext en'
+
+import struct, io
+
+
+from calibre.ebooks.pdb.formatreader import FormatReader
+
+
+class HeaderRecord(object):
+    '''
+    The first record in the file is always the header record. It holds
+    information related to the location of text, images, and so on
+    in the file. This is used in conjunction with the sections
+    defined in the file header.
+    '''
+
+    def __init__(self, raw):
+        self.compression, = struct.unpack('>H', raw[0:2])
+        self.num_records, = struct.unpack('>H', raw[8:10])
+
+
+class Reader(FormatReader):
+
+    def __init__(self, header, stream, log, options):
+        self.stream = stream
+        self.log = log
+        self.options = options
+
+        self.sections = []
+        for i in range(header.num_sections):
+            self.sections.append(header.section_data(i))
+
+        self.header_record = HeaderRecord(self.section_data(0))
+
+    def section_data(self, number):
+        return self.sections[number]
+
+    def decompress_text(self, number):
+        if self.header_record.compression == 1:
+            return self.section_data(number)
+        if self.header_record.compression == 2 or self.header_record.compression == 258:
+            from calibre.ebooks.compression.palmdoc import decompress_doc
+            return decompress_doc(self.section_data(number))
+        return b''
+
+    def extract_content(self, output_dir):
+        raw_txt = b''
+
+        self.log.info('Decompressing text...')
+        for i in range(1, self.header_record.num_records + 1):
+            self.log.debug('\tDecompressing text section %i' % i)
+            raw_txt += self.decompress_text(i)
+
+        self.log.info('Converting text to OEB...')
+        stream = io.BytesIO(raw_txt)
+
+        from calibre.customize.ui import plugin_for_input_format
+
+        txt_plugin = plugin_for_input_format('txt')
+        for opt in txt_plugin.options:
+            if not hasattr(self.options, opt.option.name):
+                setattr(self.options, opt.option.name, opt.recommended_value)
+
+        stream.seek(0)
+        return txt_plugin.convert(stream, self.options, 'txt', self.log, {})
--- a/ebook_converter/ebooks/pdb/pdf/init.py
+++ b/ebook_converter/ebooks/pdb/pdf/init.py
--- a/ebook_converter/ebooks/pdb/pdf/reader.py
+++ b/ebook_converter/ebooks/pdb/pdf/reader.py
@@ -0,0 +1,43 @@
+# -*- coding: utf-8 -*-
+from __future__ import absolute_import, division, print_function, unicode_literals
+
+'''
+Read content from palmdoc pdb file.
+'''
+
+__license__   = 'GPL v3'
+__copyright__ = '2010, John Schember <john@nachtimwald.com>'
+__docformat__ = 'restructuredtext en'
+
+
+from calibre.ebooks.pdb.formatreader import FormatReader
+from calibre.ptempfile import PersistentTemporaryFile
+from polyglot.builtins import range
+
+
+class Reader(FormatReader):
+
+    def __init__(self, header, stream, log, options):
+        self.header = header
+        self.stream = stream
+        self.log = log
+        self.options = options
+
+    def extract_content(self, output_dir):
+        self.log.info('Extracting PDF...')
+
+        pdf = PersistentTemporaryFile('.pdf')
+        pdf.close()
+        pdf = open(pdf, 'wb')
+        for x in range(self.header.section_count()):
+            pdf.write(self.header.section_data(x))
+        pdf.close()
+
+        from calibre.customize.ui import plugin_for_input_format
+
+        pdf_plugin = plugin_for_input_format('pdf')
+        for opt in pdf_plugin.options:
+            if not hasattr(self.options, opt.option.name):
+                setattr(self.options, opt.option.name, opt.recommended_value)
+
+        return pdf_plugin.convert(open(pdf, 'rb'), self.options, 'pdf', self.log, {})
--- a/ebook_converter/ebooks/pdb/plucker/init.py
+++ b/ebook_converter/ebooks/pdb/plucker/init.py
--- a/ebook_converter/ebooks/pdb/plucker/reader.py
+++ b/ebook_converter/ebooks/pdb/plucker/reader.py
@@ -0,0 +1,737 @@
+# -*- coding: utf-8 -*-
+from __future__ import absolute_import, division, print_function, unicode_literals
+
+
+__license__   = 'GPL v3'
+__copyright__ = '20011, John Schember <john@nachtimwald.com>'
+__docformat__ = 'restructuredtext en'
+
+import os
+import struct
+import zlib
+
+from collections import OrderedDict
+
+from calibre import CurrentDir
+from calibre.ebooks.pdb.formatreader import FormatReader
+from calibre.ebooks.compression.palmdoc import decompress_doc
+from calibre.utils.imghdr import identify
+from calibre.utils.img import save_cover_data_to, Canvas, image_from_data
+from polyglot.builtins import codepoint_to_chr, range
+
+DATATYPE_PHTML = 0
+DATATYPE_PHTML_COMPRESSED = 1
+DATATYPE_TBMP = 2
+DATATYPE_TBMP_COMPRESSED = 3
+DATATYPE_MAILTO = 4
+DATATYPE_LINK_INDEX = 5
+DATATYPE_LINKS = 6
+DATATYPE_LINKS_COMPRESSED = 7
+DATATYPE_BOOKMARKS = 8
+DATATYPE_CATEGORY = 9
+DATATYPE_METADATA = 10
+DATATYPE_STYLE_SHEET = 11
+DATATYPE_FONT_PAGE = 12
+DATATYPE_TABLE = 13
+DATATYPE_TABLE_COMPRESSED = 14
+DATATYPE_COMPOSITE_IMAGE = 15
+DATATYPE_PAGELIST_METADATA = 16
+DATATYPE_SORTED_URL_INDEX = 17
+DATATYPE_SORTED_URL = 18
+DATATYPE_SORTED_URL_COMPRESSED = 19
+DATATYPE_EXT_ANCHOR_INDEX = 20
+DATATYPE_EXT_ANCHOR = 21
+DATATYPE_EXT_ANCHOR_COMPRESSED = 22
+
+# IETF IANA MIBenum value for the character set.
+# See the http://www.iana.org/assignments/character-sets for valid values.
+# Not all character sets are handled by Python. This is a small subset that
+# the MIBenum maps to Python standard encodings
+# from http://docs.python.org/library/codecs.html#standard-encodings
+MIBNUM_TO_NAME = {
+    3: 'ascii',
+    4: 'latin_1',
+    5: 'iso8859_2',
+    6: 'iso8859_3',
+    7: 'iso8859_4',
+    8: 'iso8859_5',
+    9: 'iso8859_6',
+    10: 'iso8859_7',
+    11: 'iso8859_8',
+    12: 'iso8859_9',
+    13: 'iso8859_10',
+    17: 'shift_jis',
+    18: 'euc_jp',
+    27: 'utf_7',
+    36: 'euc_kr',
+    37: 'iso2022_kr',
+    38: 'euc_kr',
+    39: 'iso2022_jp',
+    40: 'iso2022_jp_2',
+    106: 'utf-8',
+    109: 'iso8859_13',
+    110: 'iso8859_14',
+    111: 'iso8859_15',
+    112: 'iso8859_16',
+    1013: 'utf_16_be',
+    1014: 'utf_16_le',
+    1015: 'utf_16',
+    2009: 'cp850',
+    2010: 'cp852',
+    2011: 'cp437',
+    2013: 'cp862',
+    2025: 'gb2312',
+    2026: 'big5',
+    2028: 'cp037',
+    2043: 'cp424',
+    2044: 'cp500',
+    2046: 'cp855',
+    2047: 'cp857',
+    2048: 'cp860',
+    2049: 'cp861',
+    2050: 'cp863',
+    2051: 'cp864',
+    2052: 'cp865',
+    2054: 'cp869',
+    2063: 'cp1026',
+    2085: 'hz',
+    2086: 'cp866',
+    2087: 'cp775',
+    2089: 'cp858',
+    2091: 'cp1140',
+    2102: 'big5hkscs',
+    2250: 'cp1250',
+    2251: 'cp1251',
+    2252: 'cp1252',
+    2253: 'cp1253',
+    2254: 'cp1254',
+    2255: 'cp1255',
+    2256: 'cp1256',
+    2257: 'cp1257',
+    2258: 'cp1258',
+}
+
+
+class HeaderRecord(object):
+    '''
+    Plucker header. PDB record 0.
+    '''
+
+    def __init__(self, raw):
+        self.uid, = struct.unpack('>H', raw[0:2])
+        # This is labled version in the spec.
+        # 2 is ZLIB compressed,
+        # 1 is DOC compressed
+        self.compression, = struct.unpack('>H', raw[2:4])
+        self.records, = struct.unpack('>H', raw[4:6])
+        # uid of the first html file. This should link
+        # to other files which in turn may link to others.
+        self.home_html = None
+
+        self.reserved = {}
+        for i in range(self.records):
+            adv = 4*i
+            name, = struct.unpack('>H', raw[6+adv:8+adv])
+            id, = struct.unpack('>H', raw[8+adv:10+adv])
+            self.reserved[id] = name
+            if name == 0:
+                self.home_html = id
+
+
+class SectionHeader(object):
+    '''
+    Every sections (record) has this header. It gives
+    details about the section such as it's uid.
+    '''
+
+    def __init__(self, raw):
+        self.uid, = struct.unpack('>H', raw[0:2])
+        self.paragraphs, = struct.unpack('>H', raw[2:4])
+        self.size, = struct.unpack('>H', raw[4:6])
+        self.type, = struct.unpack('>B', raw[6])
+        self.flags, = struct.unpack('>B', raw[7])
+
+
+class SectionHeaderText(object):
+    '''
+    Sub header for text records.
+    '''
+
+    def __init__(self, section_header, raw):
+        # The uncompressed size of each paragraph.
+        self.sizes = []
+        # uncompressed offset of each paragraph starting
+        # at the beginning of the PHTML.
+        self.paragraph_offsets = []
+        # Paragraph attributes.
+        self.attributes = []
+
+        for i in range(section_header.paragraphs):
+            adv = 4*i
+            self.sizes.append(struct.unpack('>H', raw[adv:2+adv])[0])
+            self.attributes.append(struct.unpack('>H', raw[2+adv:4+adv])[0])
+
+        running_offset = 0
+        for size in self.sizes:
+            running_offset += size
+            self.paragraph_offsets.append(running_offset)
+
+
+class SectionMetadata(object):
+    '''
+    Metadata.
+
+    This does not store metadata such as title, or author.
+    That metadata would be best retrieved with the PDB (plucker)
+    metdata reader.
+
+    This stores document specific information such as the
+    text encoding.
+
+    Note: There is a default encoding but each text section
+    can be assigned a different encoding.
+    '''
+
+    def __init__(self, raw):
+        self.default_encoding = 'latin-1'
+        self.exceptional_uid_encodings = {}
+        self.owner_id = None
+
+        record_count, = struct.unpack('>H', raw[0:2])
+
+        adv = 0
+        for i in range(record_count):
+            try:
+                type, length = struct.unpack_from('>HH', raw, 2 + adv)
+            except struct.error:
+                break
+
+            # CharSet
+            if type == 1:
+                val, = struct.unpack('>H', raw[6+adv:8+adv])
+                self.default_encoding = MIBNUM_TO_NAME.get(val, 'latin-1')
+            # ExceptionalCharSets
+            elif type == 2:
+                ii_adv = 0
+                for ii in range(length / 2):
+                    uid, = struct.unpack('>H', raw[6+adv+ii_adv:8+adv+ii_adv])
+                    mib, = struct.unpack('>H', raw[8+adv+ii_adv:10+adv+ii_adv])
+                    self.exceptional_uid_encodings[uid] = MIBNUM_TO_NAME.get(mib, 'latin-1')
+                    ii_adv += 4
+            # OwnerID
+            elif type == 3:
+                self.owner_id = struct.unpack('>I', raw[6+adv:10+adv])
+            # Author, Title, PubDate
+            # Ignored here. The metadata reader plugin
+            # will get this info because if it's missing
+            # the metadata reader plugin will use fall
+            # back data from elsewhere in the file.
+            elif type in (4, 5, 6):
+                pass
+            # Linked Documents
+            elif type == 7:
+                pass
+
+            adv += 2*length
+
+
+class SectionText(object):
+    '''
+    Text data. Stores a text section header and the PHTML.
+    '''
+
+    def __init__(self, section_header, raw):
+        self.header = SectionHeaderText(section_header, raw)
+        self.data = raw[section_header.paragraphs * 4:]
+
+
+class SectionCompositeImage(object):
+    '''
+    A composite image consists of a 2D array
+    of rows and columns. The entries in the array
+    are uid's.
+    '''
+
+    def __init__(self, raw):
+        self.columns, = struct.unpack('>H', raw[0:2])
+        self.rows, = struct.unpack('>H', raw[2:4])
+
+        # [
+        #  [uid, uid, uid, ...],
+        #  [uid, uid, uid, ...],
+        #  ...
+        # ]
+        #
+        # Each item in the layout is in it's
+        # correct position in the final
+        # composite.
+        #
+        # Each item in the layout is a uid
+        # to an image record.
+        self.layout = []
+        offset = 4
+        for i in range(self.rows):
+            col = []
+            for j in range(self.columns):
+                col.append(struct.unpack('>H', raw[offset:offset+2])[0])
+                offset += 2
+            self.layout.append(col)
+
+
+class Reader(FormatReader):
+    '''
+    Convert a plucker archive into HTML.
+
+    TODO:
+          * UTF 16 and 32 characters.
+          * Margins.
+          * Alignment.
+          * Font color.
+          * DATATYPE_MAILTO
+          * DATATYPE_TABLE(_COMPRESSED)
+          * DATATYPE_EXT_ANCHOR_INDEX
+          * DATATYPE_EXT_ANCHOR(_COMPRESSED)
+    '''
+
+    def __init__(self, header, stream, log, options):
+        self.stream = stream
+        self.log = log
+        self.options = options
+
+        # Mapping of section uid to our internal
+        # list of sections.
+        self.uid_section_number = OrderedDict()
+        self.uid_text_secion_number = OrderedDict()
+        self.uid_text_secion_encoding = {}
+        self.uid_image_section_number = {}
+        self.uid_composite_image_section_number = {}
+        self.metadata_section_number = None
+        self.default_encoding = 'latin-1'
+        self.owner_id = None
+        self.sections = []
+
+        # The Plucker record0 header
+        self.header_record = HeaderRecord(header.section_data(0))
+
+        for i in range(1, header.num_sections):
+            section_number = len(self.sections)
+            # The length of the section header.
+            # Where the actual data in the section starts.
+            start = 8
+            section = None
+
+            raw_data = header.section_data(i)
+            # Every sections has a section header.
+            section_header = SectionHeader(raw_data)
+
+            # Store sections we care able.
+            if section_header.type in (DATATYPE_PHTML, DATATYPE_PHTML_COMPRESSED):
+                self.uid_text_secion_number[section_header.uid] = section_number
+                section = SectionText(section_header, raw_data[start:])
+            elif section_header.type in (DATATYPE_TBMP, DATATYPE_TBMP_COMPRESSED):
+                self.uid_image_section_number[section_header.uid] = section_number
+                section = raw_data[start:]
+            elif section_header.type == DATATYPE_METADATA:
+                self.metadata_section_number = section_number
+                section = SectionMetadata(raw_data[start:])
+            elif section_header.type == DATATYPE_COMPOSITE_IMAGE:
+                self.uid_composite_image_section_number[section_header.uid] = section_number
+                section = SectionCompositeImage(raw_data[start:])
+
+            # Store the section.
+            if section:
+                self.uid_section_number[section_header.uid] = section_number
+                self.sections.append((section_header, section))
+
+        # Store useful information from the metadata section locally
+        # to make access easier.
+        if self.metadata_section_number:
+            mdata_section = self.sections[self.metadata_section_number][1]
+            for k, v in mdata_section.exceptional_uid_encodings.items():
+                self.uid_text_secion_encoding[k] = v
+            self.default_encoding = mdata_section.default_encoding
+            self.owner_id = mdata_section.owner_id
+
+        # Get the metadata (tile, author, ...) with the metadata reader.
+        from calibre.ebooks.metadata.pdb import get_metadata
+        self.mi = get_metadata(stream, False)
+
+    def extract_content(self, output_dir):
+        # Each text record is independent (unless the continuation
+        # value is set in the previous record). Put each converted
+        # text recored into a separate file. We will reference the
+        # home.html file as the first file and let the HTML input
+        # plugin assemble the order based on hyperlinks.
+        with CurrentDir(output_dir):
+            for uid, num in self.uid_text_secion_number.items():
+                self.log.debug('Writing record with uid: %s as %s.html' % (uid, uid))
+                with open('%s.html' % uid, 'wb') as htmlf:
+                    html = u'<html><body>'
+                    section_header, section_data = self.sections[num]
+                    if section_header.type == DATATYPE_PHTML:
+                        html += self.process_phtml(section_data.data, section_data.header.paragraph_offsets)
+                    elif section_header.type == DATATYPE_PHTML_COMPRESSED:
+                        d = self.decompress_phtml(section_data.data)
+                        html += self.process_phtml(d, section_data.header.paragraph_offsets).decode(self.get_text_uid_encoding(section_header.uid), 'replace')
+                    html += '</body></html>'
+                    htmlf.write(html.encode('utf-8'))
+
+        # Images.
+        # Cache the image sizes in case they are used by a composite image.
+        images = set()
+        if not os.path.exists(os.path.join(output_dir, 'images/')):
+            os.makedirs(os.path.join(output_dir, 'images/'))
+        with CurrentDir(os.path.join(output_dir, 'images/')):
+            # Single images.
+            for uid, num in self.uid_image_section_number.items():
+                section_header, section_data = self.sections[num]
+                if section_data:
+                    idata = None
+                    if section_header.type == DATATYPE_TBMP:
+                        idata = section_data
+                    elif section_header.type == DATATYPE_TBMP_COMPRESSED:
+                        if self.header_record.compression == 1:
+                            idata = decompress_doc(section_data)
+                        elif self.header_record.compression == 2:
+                            idata = zlib.decompress(section_data)
+                    try:
+                        save_cover_data_to(idata, '%s.jpg' % uid, compression_quality=70)
+                        images.add(uid)
+                        self.log.debug('Wrote image with uid %s to images/%s.jpg' % (uid, uid))
+                    except Exception as e:
+                        self.log.error('Failed to write image with uid %s: %s' % (uid, e))
+                else:
+                    self.log.error('Failed to write image with uid %s: No data.' % uid)
+            # Composite images.
+            # We're going to use the already compressed .jpg images here.
+            for uid, num in self.uid_composite_image_section_number.items():
+                try:
+                    section_header, section_data = self.sections[num]
+                    # Get the final width and height.
+                    width = 0
+                    height = 0
+                    for row in section_data.layout:
+                        row_width = 0
+                        col_height = 0
+                        for col in row:
+                            if col not in images:
+                                raise Exception('Image with uid: %s missing.' % col)
+                            w, h = identify(lopen('%s.jpg' % col, 'rb'))[1:]
+                            row_width += w
+                            if col_height < h:
+                                col_height = h
+                        if width < row_width:
+                            width = row_width
+                        height += col_height
+                    # Create a new image the total size of all image
+                    # parts. Put the parts into the new image.
+                    with Canvas(width, height) as canvas:
+                        y_off = 0
+                        for row in section_data.layout:
+                            x_off = 0
+                            largest_height = 0
+                            for col in row:
+                                im = image_from_data(lopen('%s.jpg' % col, 'rb').read())
+                                canvas.compose(im, x_off, y_off)
+                                w, h = im.width(), im.height()
+                                x_off += w
+                                if largest_height < h:
+                                    largest_height = h
+                            y_off += largest_height
+                    with lopen('%s.jpg' % uid) as out:
+                        out.write(canvas.export(compression_quality=70))
+                    self.log.debug('Wrote composite image with uid %s to images/%s.jpg' % (uid, uid))
+                except Exception as e:
+                    self.log.error('Failed to write composite image with uid %s: %s' % (uid, e))
+
+        # Run the HTML through the html processing plugin.
+        from calibre.customize.ui import plugin_for_input_format
+        html_input = plugin_for_input_format('html')
+        for opt in html_input.options:
+            setattr(self.options, opt.option.name, opt.recommended_value)
+        self.options.input_encoding = 'utf-8'
+        odi = self.options.debug_pipeline
+        self.options.debug_pipeline = None
+        # Determine the home.html record uid. This should be set in the
+        # reserved values in the metadata recored. home.html is the first
+        # text record (should have hyper link references to other records)
+        # in the document.
+        try:
+            home_html = self.header_record.home_html
+            if not home_html:
+                home_html = self.uid_text_secion_number.items()[0][0]
+        except:
+            raise Exception('Could not determine home.html')
+        # Generate oeb from html conversion.
+        oeb = html_input.convert(open('%s.html' % home_html, 'rb'), self.options, 'html', self.log, {})
+        self.options.debug_pipeline = odi
+
+        return oeb
+
+    def decompress_phtml(self, data):
+        if self.header_record.compression == 2:
+            if self.owner_id:
+                raise NotImplementedError
+            return zlib.decompress(data)
+        elif self.header_record.compression == 1:
+            from calibre.ebooks.compression.palmdoc import decompress_doc
+            return decompress_doc(data)
+
+    def process_phtml(self, d, paragraph_offsets=[]):
+        html = u'<p id="p0">'
+        offset = 0
+        paragraph_open = True
+        link_open = False
+        need_set_p_id = False
+        p_num = 1
+        font_specifier_close = ''
+
+        while offset < len(d):
+            if not paragraph_open:
+                if need_set_p_id:
+                    html += u'<p id="p%s">' % p_num
+                    p_num += 1
+                    need_set_p_id = False
+                else:
+                    html += u'<p>'
+                paragraph_open = True
+
+            c = ord(d[offset:offset+1])
+            # PHTML "functions"
+            if c == 0x0:
+                offset += 1
+                c = ord(d[offset:offset+1])
+                # Page link begins
+                # 2 Bytes
+                # record ID
+                if c == 0x0a:
+                    offset += 1
+                    id = struct.unpack('>H', d[offset:offset+2])[0]
+                    if id in self.uid_text_secion_number:
+                        html += '<a href="%s.html">' % id
+                        link_open = True
+                    offset += 1
+                # Targeted page link begins
+                # 3 Bytes
+                # record ID, target
+                elif c == 0x0b:
+                    offset += 3
+                # Paragraph link begins
+                # 4 Bytes
+                # record ID, paragraph number
+                elif c == 0x0c:
+                    offset += 1
+                    id = struct.unpack('>H', d[offset:offset+2])[0]
+                    offset += 2
+                    pid = struct.unpack('>H', d[offset:offset+2])[0]
+                    if id in self.uid_text_secion_number:
+                        html += '<a href="%s.html#p%s">' % (id, pid)
+                        link_open = True
+                    offset += 1
+                # Targeted paragraph link begins
+                # 5 Bytes
+                # record ID, paragraph number, target
+                elif c == 0x0d:
+                    offset += 5
+                # Link ends
+                # 0 Bytes
+                elif c == 0x08:
+                    if link_open:
+                        html += '</a>'
+                        link_open = False
+                # Set font
+                # 1 Bytes
+                # font specifier
+                elif c == 0x11:
+                    offset += 1
+                    specifier = d[offset]
+                    html += font_specifier_close
+                    # Regular text
+                    if specifier == 0:
+                        font_specifier_close = ''
+                    # h1
+                    elif specifier == 1:
+                        html += '<h1>'
+                        font_specifier_close = '</h1>'
+                    # h2
+                    elif specifier == 2:
+                        html += '<h2>'
+                        font_specifier_close = '</h2>'
+                    # h3
+                    elif specifier == 3:
+                        html += '<h13>'
+                        font_specifier_close = '</h3>'
+                    # h4
+                    elif specifier == 4:
+                        html += '<h4>'
+                        font_specifier_close = '</h4>'
+                    # h5
+                    elif specifier == 5:
+                        html += '<h5>'
+                        font_specifier_close = '</h5>'
+                    # h6
+                    elif specifier == 6:
+                        html += '<h6>'
+                        font_specifier_close = '</h6>'
+                    # Bold
+                    elif specifier == 7:
+                        html += '<b>'
+                        font_specifier_close = '</b>'
+                    # Fixed-width
+                    elif specifier == 8:
+                        html += '<tt>'
+                        font_specifier_close = '</tt>'
+                    # Small
+                    elif specifier == 9:
+                        html += '<small>'
+                        font_specifier_close = '</small>'
+                    # Subscript
+                    elif specifier == 10:
+                        html += '<sub>'
+                        font_specifier_close = '</sub>'
+                    # Superscript
+                    elif specifier == 11:
+                        html += '<sup>'
+                        font_specifier_close = '</sup>'
+                # Embedded image
+                # 2 Bytes
+                # image record ID
+                elif c == 0x1a:
+                    offset += 1
+                    uid = struct.unpack('>H', d[offset:offset+2])[0]
+                    html += '<img src="images/%s.jpg" />' % uid
+                    offset += 1
+                # Set margin
+                # 2 Bytes
+                # left margin, right margin
+                elif c == 0x22:
+                    offset += 2
+                # Alignment of text
+                # 1 Bytes
+                # alignment
+                elif c == 0x29:
+                    offset += 1
+                # Horizontal rule
+                # 3 Bytes
+                # 8-bit height, 8-bit width (pixels), 8-bit width (%, 1-100)
+                elif c == 0x33:
+                    offset += 3
+                    if paragraph_open:
+                        html += u'</p>'
+                        paragraph_open = False
+                    html += u'<hr />'
+                # New line
+                # 0 Bytes
+                elif c == 0x38:
+                    if paragraph_open:
+                        html += u'</p>\n'
+                        paragraph_open = False
+                # Italic text begins
+                # 0 Bytes
+                elif c == 0x40:
+                    html += u'<i>'
+                # Italic text ends
+                # 0 Bytes
+                elif c == 0x48:
+                    html += u'</i>'
+                # Set text color
+                # 3 Bytes
+                # 8-bit red, 8-bit green, 8-bit blue
+                elif c == 0x53:
+                    offset += 3
+                # Multiple embedded image
+                # 4 Bytes
+                # alternate image record ID, image record ID
+                elif c == 0x5c:
+                    offset += 3
+                    uid = struct.unpack('>H', d[offset:offset+2])[0]
+                    html += '<img src="images/%s.jpg" />' % uid
+                    offset += 1
+                # Underline text begins
+                # 0 Bytes
+                elif c == 0x60:
+                    html += u'<u>'
+                # Underline text ends
+                # 0 Bytes
+                elif c == 0x68:
+                    html += u'</u>'
+                # Strike-through text begins
+                # 0 Bytes
+                elif c == 0x70:
+                    html += u'<s>'
+                # Strike-through text ends
+                # 0 Bytes
+                elif c == 0x78:
+                    html += u'</s>'
+                # 16-bit Unicode character
+                # 3 Bytes
+                # alternate text length, 16-bit unicode character
+                elif c == 0x83:
+                    offset += 3
+                # 32-bit Unicode character
+                # 5 Bytes
+                # alternate text length, 32-bit unicode character
+                elif c == 0x85:
+                    offset += 5
+                # Begin custom font span
+                # 6 Bytes
+                # font page record ID, X page position, Y page position
+                elif c == 0x8e:
+                    offset += 6
+                # Adjust custom font glyph position
+                # 4 Bytes
+                # X page position, Y page position
+                elif c == 0x8c:
+                    offset += 4
+                # Change font page
+                # 2 Bytes
+                # font record ID
+                elif c == 0x8a:
+                    offset += 2
+                # End custom font span
+                # 0 Bytes
+                elif c == 0x88:
+                    pass
+                # Begin new table row
+                # 0 Bytes
+                elif c == 0x90:
+                    pass
+                # Insert table (or table link)
+                # 2 Bytes
+                # table record ID
+                elif c == 0x92:
+                    offset += 2
+                # Table cell data
+                # 7 Bytes
+                # 8-bit alignment, 16-bit image record ID, 8-bit columns, 8-bit rows, 16-bit text length
+                elif c == 0x97:
+                    offset += 7
+                # Exact link modifier
+                # 2 Bytes
+                # Paragraph Offset (The Exact Link Modifier modifies a Paragraph Link or
+                # Targeted Paragraph Link function to specify an exact byte offset within
+                # the paragraph. This function must be followed immediately by the
+                # function it modifies).
+                elif c == 0x9a:
+                    offset += 2
+            elif c == 0xa0:
+                html += '&nbsp;'
+            else:
+                html += codepoint_to_chr(c)
+            offset += 1
+            if offset in paragraph_offsets:
+                need_set_p_id = True
+                if paragraph_open:
+                    html += u'</p>\n'
+                    paragraph_open = False
+
+        if paragraph_open:
+            html += u'</p>'
+
+        return html
+
+    def get_text_uid_encoding(self, uid):
+        # Return the user sepcified input encoding,
+        # otherwise return the alternate encoding specified for the uid,
+        # otherwise retur the default encoding for the document.
+        return self.options.input_encoding if self.options.input_encoding else self.uid_text_secion_encoding.get(uid, self.default_encoding)
--- a/ebook_converter/ebooks/pdb/ztxt/init.py
+++ b/ebook_converter/ebooks/pdb/ztxt/init.py
@@ -0,0 +1,10 @@
+# -*- coding: utf-8 -*-
+from __future__ import absolute_import, division, print_function, unicode_literals
+
+__license__   = 'GPL v3'
+__copyright__ = '2009, John Schember <john@nachtimwald.com>'
+__docformat__ = 'restructuredtext en'
+
+
+class zTXTError(Exception):
+    pass
--- a/ebook_converter/ebooks/pdb/ztxt/reader.py
+++ b/ebook_converter/ebooks/pdb/ztxt/reader.py
@@ -0,0 +1,94 @@
+# -*- coding: utf-8 -*-
+from __future__ import absolute_import, division, print_function, unicode_literals
+
+'''
+Read content from ztxt pdb file.
+'''
+
+__license__   = 'GPL v3'
+__copyright__ = '2009, John Schember <john@nachtimwald.com>'
+__docformat__ = 'restructuredtext en'
+
+import struct
+import zlib
+import io
+
+
+from calibre.ebooks.pdb.formatreader import FormatReader
+from calibre.ebooks.pdb.ztxt import zTXTError
+
+SUPPORTED_VERSION = (1, 40)
+
+
+class HeaderRecord(object):
+    '''
+    The first record in the file is always the header record. It holds
+    information related to the location of text, images, and so on
+    in the file. This is used in conjunction with the sections
+    defined in the file header.
+    '''
+
+    def __init__(self, raw):
+        self.version, = struct.unpack('>H', raw[0:2])
+        self.num_records, = struct.unpack('>H', raw[2:4])
+        self.size, = struct.unpack('>L', raw[4:8])
+        self.record_size, = struct.unpack('>H', raw[8:10])
+        self.flags, = struct.unpack('>B', raw[18:19])
+
+
+class Reader(FormatReader):
+
+    def __init__(self, header, stream, log, options):
+        self.stream = stream
+        self.log = log
+        self.options = options
+
+        self.sections = []
+        for i in range(header.num_sections):
+            self.sections.append(header.section_data(i))
+
+        self.header_record = HeaderRecord(self.section_data(0))
+
+        vmajor = (self.header_record.version & 0x0000FF00) >> 8
+        vminor = self.header_record.version & 0x000000FF
+        if vmajor < 1 or (vmajor == 1 and vminor < 40):
+            raise zTXTError('Unsupported ztxt version (%i.%i). Only versions newer than %i.%i are supported.' %
+                            (vmajor, vminor, SUPPORTED_VERSION[0], SUPPORTED_VERSION[1]))
+
+        if (self.header_record.flags & 0x01) == 0:
+            raise zTXTError('Only compression method 1 (random access) is supported')
+
+        self.log.debug('Foud ztxt version: %i.%i' % (vmajor, vminor))
+
+        # Initalize the decompressor
+        self.uncompressor = zlib.decompressobj()
+        self.uncompressor.decompress(self.section_data(1))
+
+    def section_data(self, number):
+        return self.sections[number]
+
+    def decompress_text(self, number):
+        if number == 1:
+            self.uncompressor = zlib.decompressobj()
+        return self.uncompressor.decompress(self.section_data(number))
+
+    def extract_content(self, output_dir):
+        raw_txt = b''
+
+        self.log.info('Decompressing text...')
+        for i in range(1, self.header_record.num_records + 1):
+            self.log.debug('\tDecompressing text section %i' % i)
+            raw_txt += self.decompress_text(i)
+
+        self.log.info('Converting text to OEB...')
+        stream = io.BytesIO(raw_txt)
+
+        from calibre.customize.ui import plugin_for_input_format
+
+        txt_plugin = plugin_for_input_format('txt')
+        for opt in txt_plugin.options:
+            if not hasattr(self.options, opt.option.name):
+                setattr(self.options, opt.option.name, opt.recommended_value)
+
+        stream.seek(0)
+        return txt_plugin.convert(stream, self.options, 'txt', self.log, {})