import array import itertools import re from ebook_converter.utils.config import OptionParser from ebook_converter.utils.filenames import ascii_filename from ebook_converter.ebooks.lrf.meta import LRFMetaFile from ebook_converter.ebooks.lrf.objects import get_object, PageTree, \ StyleObject, Font, Text, TOCObject, BookAttr, ruby_tags class LRFDocument(LRFMetaFile): class temp(object): pass def __init__(self, stream): LRFMetaFile.__init__(self, stream) self.scramble_key = self.xor_key self.page_trees = [] self.font_map = {} self.image_map = {} self.toc = '' self.keep_parsing = True def parse(self): self._parse_objects() self.metadata = LRFDocument.temp() for a in ('title', 'title_reading', 'author', 'author_reading', 'book_id', 'classification', 'free_text', 'publisher', 'label', 'category'): setattr(self.metadata, a, getattr(self, a)) self.doc_info = LRFDocument.temp() for a in ('thumbnail', 'language', 'creator', 'producer', 'page'): setattr(self.doc_info, a, getattr(self, a)) self.doc_info.thumbnail_extension = self.thumbail_extension() self.device_info = LRFDocument.temp() for a in ('dpi', 'width', 'height'): setattr(self.device_info, a, getattr(self, a)) def _parse_objects(self): self.objects = {} self._file.seek(self.object_index_offset) obj_array = array.array("I", self._file.read(4 * 4 * self.number_of_objects)) if ord(array.array("i", [1]).tostring()[0:1]) == 0: # big-endian obj_array.byteswap() for i in range(self.number_of_objects): if not self.keep_parsing: break objid, objoff, objsize = obj_array[i*4:i*4+3] self._parse_object(objid, objoff, objsize) for obj in self.objects.values(): if not self.keep_parsing: break if hasattr(obj, 'initialize'): obj.initialize() def _parse_object(self, objid, objoff, objsize): obj = get_object(self, self._file, objid, objoff, objsize, self.scramble_key) self.objects[objid] = obj if isinstance(obj, PageTree): self.page_trees.append(obj) elif isinstance(obj, TOCObject): self.toc = obj elif isinstance(obj, BookAttr): self.ruby_tags = {} for h in ruby_tags.values(): attr = h[0] if hasattr(obj, attr): self.ruby_tags[attr] = getattr(obj, attr) def __iter__(self): for pt in self.page_trees: yield pt def write_files(self): for obj in itertools.chain(self.image_map.values(), self.font_map.values()): with open(obj.file, 'wb') as f: f.write(obj.stream) def to_xml(self, write_files=True): bookinfo = ('\n\n\n' '%s\n' '%s\n' '%s\n' '%s\n' '\n' '%s\n' '%s\n' '%s\n' '\n\n' % (self.metadata.title_reading, self.metadata.title, self.metadata.author_reading, self.metadata.author, self.metadata.book_id, self.metadata.publisher, self.metadata.label, self.metadata.category, self.metadata.classification, self.metadata.free_text)) th = self.doc_info.thumbnail if th: prefix = ascii_filename(self.metadata.title) bookinfo += ('\n' % (prefix + '_thumbnail.' + self.doc_info.thumbnail_extension)) if write_files: with open(prefix + '_thumbnail.' + self.doc_info.thumbnail_extension, 'wb') as f: f.write(th) bookinfo += ('%s\n' '%s\n' '%s\n' '%s\n' '\n\n%s\n' % (self.doc_info.language, self.doc_info.creator, self.doc_info.producer, self.doc_info.page, self.toc)) pages = '' done_main = False pt_id = -1 for page_tree in self: if not done_main: done_main = True pages += '
\n' close = '
\n' pt_id = page_tree.id else: pages += '\n' % (page_tree.id,) close = '\n' for page in page_tree: pages += str(page) pages += close traversed_objects = [int(i) for i in re.findall(r'objid="(\w+)"', pages)] + [pt_id] objects = '\n\n' styles = '\n\n' objects += '\n' if write_files: self.write_files() return ('\n' + bookinfo + pages + styles + objects + '') def option_parser(): parser = OptionParser(usage='%prog book.lrf\nConvert an LRF file into ' 'an LRS (XML UTF-8 encoded) file') parser.add_option('--output', '-o', default=None, help='Output LRS file', dest='out') parser.add_option('--dont-output-resources', default=True, action='store_false', help='Do not save embedded image and font files to ' 'disk', dest='output_resources') parser.add_option('--verbose', default=False, action='store_true', dest='verbose', help='Be more verbose') return parser