import array
import itertools
import re
from ebook_converter.utils.config import OptionParser
from ebook_converter.utils.filenames import ascii_filename
from ebook_converter.ebooks.lrf.meta import LRFMetaFile
from ebook_converter.ebooks.lrf.objects import get_object, PageTree, \
StyleObject, Font, Text, TOCObject, BookAttr, ruby_tags
class LRFDocument(LRFMetaFile):
class temp(object):
pass
def __init__(self, stream):
LRFMetaFile.__init__(self, stream)
self.scramble_key = self.xor_key
self.page_trees = []
self.font_map = {}
self.image_map = {}
self.toc = ''
self.keep_parsing = True
def parse(self):
self._parse_objects()
self.metadata = LRFDocument.temp()
for a in ('title', 'title_reading', 'author', 'author_reading',
'book_id', 'classification', 'free_text', 'publisher',
'label', 'category'):
setattr(self.metadata, a, getattr(self, a))
self.doc_info = LRFDocument.temp()
for a in ('thumbnail', 'language', 'creator', 'producer', 'page'):
setattr(self.doc_info, a, getattr(self, a))
self.doc_info.thumbnail_extension = self.thumbail_extension()
self.device_info = LRFDocument.temp()
for a in ('dpi', 'width', 'height'):
setattr(self.device_info, a, getattr(self, a))
def _parse_objects(self):
self.objects = {}
self._file.seek(self.object_index_offset)
obj_array = array.array("I",
self._file.read(4 * 4 *
self.number_of_objects))
if ord(array.array("i", [1]).tostring()[0:1]) == 0: # big-endian
obj_array.byteswap()
for i in range(self.number_of_objects):
if not self.keep_parsing:
break
objid, objoff, objsize = obj_array[i*4:i*4+3]
self._parse_object(objid, objoff, objsize)
for obj in self.objects.values():
if not self.keep_parsing:
break
if hasattr(obj, 'initialize'):
obj.initialize()
def _parse_object(self, objid, objoff, objsize):
obj = get_object(self, self._file, objid, objoff, objsize,
self.scramble_key)
self.objects[objid] = obj
if isinstance(obj, PageTree):
self.page_trees.append(obj)
elif isinstance(obj, TOCObject):
self.toc = obj
elif isinstance(obj, BookAttr):
self.ruby_tags = {}
for h in ruby_tags.values():
attr = h[0]
if hasattr(obj, attr):
self.ruby_tags[attr] = getattr(obj, attr)
def __iter__(self):
for pt in self.page_trees:
yield pt
def write_files(self):
for obj in itertools.chain(self.image_map.values(),
self.font_map.values()):
with open(obj.file, 'wb') as f:
f.write(obj.stream)
def to_xml(self, write_files=True):
bookinfo = ('\n\n\n'
'%s\n'
'%s\n'
'%s\n'
'%s\n'
'\n'
'%s\n'
'%s\n'
'%s\n'
'\n\n' %
(self.metadata.title_reading, self.metadata.title,
self.metadata.author_reading, self.metadata.author,
self.metadata.book_id, self.metadata.publisher,
self.metadata.label, self.metadata.category,
self.metadata.classification, self.metadata.free_text))
th = self.doc_info.thumbnail
if th:
prefix = ascii_filename(self.metadata.title)
bookinfo += ('\n' %
(prefix + '_thumbnail.' +
self.doc_info.thumbnail_extension))
if write_files:
with open(prefix + '_thumbnail.' +
self.doc_info.thumbnail_extension, 'wb') as f:
f.write(th)
bookinfo += ('%s\n'
'%s\n'
'%s\n'
'%s\n'
'\n\n%s\n' %
(self.doc_info.language, self.doc_info.creator,
self.doc_info.producer, self.doc_info.page, self.toc))
pages = ''
done_main = False
pt_id = -1
for page_tree in self:
if not done_main:
done_main = True
pages += '\n'
close = '\n'
pt_id = page_tree.id
else:
pages += '\n' % (page_tree.id,)
close = '\n'
for page in page_tree:
pages += str(page)
pages += close
traversed_objects = [int(i) for i in re.findall(r'objid="(\w+)"',
pages)] + [pt_id]
objects = '\n\n'
styles = '\n\n'
objects += '\n'
if write_files:
self.write_files()
return ('\n' + bookinfo + pages + styles +
objects + '')
def option_parser():
parser = OptionParser(usage='%prog book.lrf\nConvert an LRF file into '
'an LRS (XML UTF-8 encoded) file')
parser.add_option('--output', '-o', default=None,
help='Output LRS file', dest='out')
parser.add_option('--dont-output-resources', default=True,
action='store_false',
help='Do not save embedded image and font files to '
'disk', dest='output_resources')
parser.add_option('--verbose', default=False, action='store_true',
dest='verbose', help='Be more verbose')
return parser