1
0
mirror of https://github.com/gryf/ebook-converter.git synced 2026-03-02 23:05:51 +01:00
Files
ebook-converter/ebook_converter/ebooks/pdb/ztxt/reader.py
2020-04-19 17:50:19 +02:00

91 lines
3.0 KiB
Python

"""
Read content from ztxt pdb file.
"""
import struct
import zlib
import io
from ebook_converter.ebooks.pdb.formatreader import FormatReader
from ebook_converter.ebooks.pdb.ztxt import zTXTError
__license__ = 'GPL v3'
__copyright__ = '2009, John Schember <john@nachtimwald.com>'
__docformat__ = 'restructuredtext en'
SUPPORTED_VERSION = (1, 40)
class HeaderRecord(object):
'''
The first record in the file is always the header record. It holds
information related to the location of text, images, and so on
in the file. This is used in conjunction with the sections
defined in the file header.
'''
def __init__(self, raw):
self.version, = struct.unpack('>H', raw[0:2])
self.num_records, = struct.unpack('>H', raw[2:4])
self.size, = struct.unpack('>L', raw[4:8])
self.record_size, = struct.unpack('>H', raw[8:10])
self.flags, = struct.unpack('>B', raw[18:19])
class Reader(FormatReader):
def __init__(self, header, stream, log, options):
self.stream = stream
self.log = log
self.options = options
self.sections = []
for i in range(header.num_sections):
self.sections.append(header.section_data(i))
self.header_record = HeaderRecord(self.section_data(0))
vmajor = (self.header_record.version & 0x0000FF00) >> 8
vminor = self.header_record.version & 0x000000FF
if vmajor < 1 or (vmajor == 1 and vminor < 40):
raise zTXTError('Unsupported ztxt version (%i.%i). Only versions newer than %i.%i are supported.' %
(vmajor, vminor, SUPPORTED_VERSION[0], SUPPORTED_VERSION[1]))
if (self.header_record.flags & 0x01) == 0:
raise zTXTError('Only compression method 1 (random access) is supported')
self.log.debug('Foud ztxt version: %i.%i' % (vmajor, vminor))
# Initalize the decompressor
self.uncompressor = zlib.decompressobj()
self.uncompressor.decompress(self.section_data(1))
def section_data(self, number):
return self.sections[number]
def decompress_text(self, number):
if number == 1:
self.uncompressor = zlib.decompressobj()
return self.uncompressor.decompress(self.section_data(number))
def extract_content(self, output_dir):
raw_txt = b''
self.log.info('Decompressing text...')
for i in range(1, self.header_record.num_records + 1):
self.log.debug('\tDecompressing text section %i' % i)
raw_txt += self.decompress_text(i)
self.log.info('Converting text to OEB...')
stream = io.BytesIO(raw_txt)
from ebook_converter.customize.ui import plugin_for_input_format
txt_plugin = plugin_for_input_format('txt')
for opt in txt_plugin.options:
if not hasattr(self.options, opt.option.name):
setattr(self.options, opt.option.name, opt.recommended_value)
stream.seek(0)
return txt_plugin.convert(stream, self.options, 'txt', self.log, {})