import array import collections import io import re import struct import zlib from ebook_converter.ebooks.lrf import LRFParseError, PRS500_PROFILE from ebook_converter import entity_to_unicode, prepare_string_for_xml from ebook_converter.ebooks.lrf.tags import Tag ruby_tags = {0xF575: ['rubyAlignAndAdjust', 'W'], 0xF576: ['rubyoverhang', 'W', {0: 'none', 1: 'auto'}], 0xF577: ['empdotsposition', 'W', {1: 'before', 2: 'after'}], 0xF578: ['', 'parse_empdots'], 0xF579: ['emplineposition', 'W', {1: 'before', 2: 'after'}], 0xF57A: ['emplinetype', 'W', {0: 'none', 0x10: 'solid', 0x20: 'dashed', 0x30: 'double', 0x40: 'dotted'}]} class LRFObject(object): tag_map = { 0xF500: ['', ''], 0xF502: ['infoLink', 'D'], 0xF501: ['', ''], } @classmethod def descramble_buffer(cls, buf, l, xorKey): i = 0 a = array.array('B', buf) while l > 0: a[i] ^= xorKey i += 1 l -= 1 return a.tostring() @classmethod def parse_empdots(self, tag, f): self.refEmpDotsFont, self.empDotsFontName, self.empDotsCode = tag.contents @staticmethod def tag_to_val(h, obj, tag, stream): val = None if h[1] == 'D': val = tag.dword elif h[1] == 'W': val = tag.word elif h[1] == 'w': val = tag.word if val > 0x8000: val -= 0x10000 elif h[1] == 'B': val = tag.byte elif h[1] == 'P': val = tag.contents elif h[1] != '': val = getattr(obj, h[1])(tag, stream) if len(h) > 2: val = h[2](val) if callable(h[2]) else h[2][val] return val def __init__(self, document, stream, id, scramble_key, boundary): self._scramble_key = scramble_key self._document = document self.id = id while stream.tell() < boundary: tag = Tag(stream) self.handle_tag(tag, stream) def parse_bg_image(self, tag, f): self.bg_image_mode, self.bg_image_id = struct.unpack(" 0x400 and (isinstance(self, ImageStream) or isinstance(self, Font) or isinstance(self, SoundStream)): l = 0x400 self.stream = self.descramble_buffer(self.stream, l, key) if self.stream_flags & 0x100 !=0: decomp_size = struct.unpack(">8)&0xFF, (val>>16)&0xFF, (val>>24)&0xFF def __str__(self): return '0x%02x%02x%02x%02x'%(self.a, self.r, self.g, self.b) def __len__(self): return 4 def __getitem__(self, i): # Qt compatible ordering and values return (self.r, self.g, self.b, 0xff-self.a)[i] # In Qt 0xff is opaque while in LRS 0x00 is opaque def to_html(self): return 'rgb(%d, %d, %d)'%(self.r, self.g, self.b) class EmptyPageElement(object): def __iter__(self): for i in range(0): yield i def __str__(self): return str(self) class PageDiv(EmptyPageElement): def __init__(self, pain, spacesize, linewidth, linecolor): self.pain, self.spacesize, self.linewidth = pain, spacesize, linewidth self.linecolor = Color(linecolor) def __str__(self): return '\n\n'%\ (self.pain, self.spacesize, self.linewidth, self.color) class RuledLine(EmptyPageElement): linetype_map = {0x00: 'none', 0x10: 'solid', 0x20: 'dashed', 0x30: 'double', 0x40: 'dotted', 0x13: 'unknown13'} def __init__(self, linelength, linetype, linewidth, linecolor): self.linelength, self.linewidth = linelength, linewidth self.linetype = self.linetype_map[linetype] self.linecolor = Color(linecolor) self.id = -1 def __str__(self): return '\n\n'%\ (self.linelength, self.linetype, self.linewidth, self.linecolor) class Wait(EmptyPageElement): def __init__(self, time): self.time = time def __str__(self): return '\n\n'%(self.time) class Locate(EmptyPageElement): pos_map = {1:'bottomleft', 2:'bottomright', 3:'topright', 4:'topleft', 5:'base'} def __init__(self, pos): self.pos = self.pos_map[pos] def __str__(self): return '\n\n'%(self.pos) class BlockSpace(EmptyPageElement): def __init__(self, xspace, yspace): self.xspace, self.yspace = xspace, yspace def __str__(self): return '\n\n'%\ (self.xspace, self.yspace) class Page(LRFStream): tag_map = { 0xF503: ['style_id', 'D'], 0xF50B: ['obj_list', 'P'], 0xF571: ['', ''], 0xF57C: ['parent_page_tree', 'D'], } tag_map.update(PageAttr.tag_map) tag_map.update(LRFStream.tag_map) style = property(fget=lambda self : self._document.objects[self.style_id]) evenheader = property(fget=lambda self : self._document.objects[self.style.evenheaderid]) evenfooter = property(fget=lambda self : self._document.objects[self.style.evenfooterid]) oddheader = property(fget=lambda self : self._document.objects[self.style.oddheaderid]) oddfooter = property(fget=lambda self : self._document.objects[self.style.oddfooterid]) class Content(LRFContentObject): tag_map = { 0xF503: 'link', 0xF54E: 'page_div', 0xF547: 'x_space', 0xF546: 'y_space', 0xF548: 'do_pos', 0xF573: 'ruled_line', 0xF5D4: 'wait', 0xF5D6: 'sound_stop', } def __init__(self, byts, objects): self.in_blockspace = False LRFContentObject.__init__(self, byts, objects) def link(self, tag): self.close_blockspace() self._contents.append(self.objects[tag.dword]) def page_div(self, tag): self.close_blockspace() pars = struct.unpack("= 700 else 'normal')) fn = getattr(obj, 'fontfacename', None) if fn is not None: fn = cls.FONT_MAP[fn] ans += item('font-family: %s;'%fn) fg = getattr(obj, 'textcolor', None) if fg is not None: fg = fg.to_html() ans += item('color: %s;'%fg) bg = getattr(obj, 'textbgcolor', None) if bg is not None: bg = bg.to_html() ans += item('background-color: %s;'%bg) al = getattr(obj, 'align', None) if al is not None: al = dict(head='left', center='center', foot='right') ans += item('text-align: %s;'%al) lh = getattr(obj, 'linespace', None) if lh is not None: ans += item('text-align: %fpt;'%(int(lh)/10)) pi = getattr(obj, 'parindent', None) if pi is not None: ans += item('text-indent: %fpt;'%(int(pi)/10)) return ans class TextAttr(StyleObject, LRFObject, TextCSS): FONT_MAP = collections.defaultdict(lambda : 'serif') for key, value in PRS500_PROFILE.default_fonts.items(): FONT_MAP[value] = key tag_map = { 0xF511: ['fontsize', 'w'], 0xF512: ['fontwidth', 'w'], 0xF513: ['fontescapement', 'w'], 0xF514: ['fontorientation', 'w'], 0xF515: ['fontweight', 'W'], 0xF516: ['fontfacename', 'P'], 0xF517: ['textcolor', 'D', Color], 0xF518: ['textbgcolor', 'D', Color], 0xF519: ['wordspace', 'w'], 0xF51A: ['letterspace', 'w'], 0xF51B: ['baselineskip', 'w'], 0xF51C: ['linespace', 'w'], 0xF51D: ['parindent', 'w'], 0xF51E: ['parskip', 'w'], 0xF53C: ['align', 'W', {1: 'head', 4: 'center', 8: 'foot'}], 0xF53D: ['column', 'W'], 0xF53E: ['columnsep', 'W'], 0xF5DD: ['charspace', 'w'], 0xF5F1: ['textlinewidth', 'W'], 0xF5F2: ['linecolor', 'D', Color], } tag_map.update(ruby_tags) tag_map.update(LRFObject.tag_map) class Block(LRFStream, TextCSS): tag_map = { 0xF503: ['style_id', 'D'], } tag_map.update(BlockAttr.tag_map) tag_map.update(TextAttr.tag_map) tag_map.update(LRFStream.tag_map) extra_attrs = [i[0] for i in BlockAttr.tag_map.values()] extra_attrs.extend([i[0] for i in TextAttr.tag_map.values()]) style = property(fget=lambda self : self._document.objects[self.style_id]) textstyle = property(fget=lambda self : self._document.objects[self.textstyle_id]) def initialize(self): self.attrs = {} stream = io.BytesIO(self.stream) tag = Tag(stream) if tag.id != 0xF503: raise LRFParseError("Bad block content") obj = self._document.objects[tag.dword] if isinstance(obj, SimpleText): self.name = 'SimpleTextBlock' self.textstyle_id = obj.style_id elif isinstance(obj, Text): self.name = 'TextBlock' self.textstyle_id = obj.style_id elif isinstance(obj, Image): self.name = 'ImageBlock' for attr in ('x0', 'x1', 'y0', 'y1', 'xsize', 'ysize', 'refstream'): self.attrs[attr] = getattr(obj, attr) self.refstream = self._document.objects[self.attrs['refstream']] elif isinstance(obj, Button): self.name = 'ButtonBlock' else: raise LRFParseError("Unexpected block type: "+obj.__class__.__name__) self.content = obj for attr in self.extra_attrs: if hasattr(self, attr): self.attrs[attr] = getattr(self, attr) def __str__(self): s = '\n<%s objid="%d" blockstyle="%d" '%(self.name, self.id, self.style_id) if hasattr(self, 'textstyle_id'): s += 'textstyle="%d" '%(self.textstyle_id,) for attr in self.attrs: s += '%s="%s" '%(attr, self.attrs[attr]) if self.name != 'ImageBlock': s = s.rstrip()+'>\n' s += str(self.content) s += '\n'%(self.name,) return s return s.rstrip() + ' />\n' def to_html(self): if self.name == 'TextBlock': return '
%s
'%(self.style_id, self.textstyle_id, self.content.to_html()) return '' class MiniPage(LRFStream): tag_map = { 0xF541: ['minipagewidth', 'W'], 0xF542: ['minipageheight', 'W'], } tag_map.update(LRFStream.tag_map) tag_map.update(BlockAttr.tag_map) class Text(LRFStream): tag_map = { 0xF503: ['style_id', 'D'], } tag_map.update(TextAttr.tag_map) tag_map.update(LRFStream.tag_map) style = property(fget=lambda self : self._document.objects[self.style_id]) text_map = {0x22: '"', 0x26: '&', 0x27: '\'', 0x3c: '<', 0x3e: '>'} entity_pattern = re.compile(r'&(\S+?);') text_tags = { 0xF581: ['simple_container', 'Italic'], 0xF582: 'end_container', 0xF5B1: ['simple_container', 'Yoko'], 0xF5B2: 'end_container', 0xF5B3: ['simple_container', 'Tate'], 0xF5B4: 'end_container', 0xF5B5: ['simple_container', 'Nekase'], 0xF5B6: 'end_container', 0xF5A1: 'start_para', 0xF5A2: 'end_para', 0xF5A7: 'char_button', 0xF5A8: 'end_container', 0xF5A9: ['simple_container', 'Rubi'], 0xF5AA: 'end_container', 0xF5AB: ['simple_container', 'Oyamoji'], 0xF5AC: 'end_container', 0xF5AD: ['simple_container', 'Rubimoji'], 0xF5AE: 'end_container', 0xF5B7: ['simple_container', 'Sup'], 0xF5B8: 'end_container', 0xF5B9: ['simple_container', 'Sub'], 0xF5BA: 'end_container', 0xF5BB: ['simple_container', 'NoBR'], 0xF5BC: 'end_container', 0xF5BD: ['simple_container', 'EmpDots'], 0xF5BE: 'end_container', 0xF5C1: 'empline', 0xF5C2: 'end_container', 0xF5C3: 'draw_char', 0xF5C4: 'end_container', 0xF5C6: 'box', 0xF5C7: 'end_container', 0xF5CA: 'space', 0xF5D1: 'plot', 0xF5D2: 'cr', } class TextTag(object): def __init__(self, name, attrs={}, self_closing=False): self.name = name self.attrs = attrs self.self_closing = self_closing def __str__(self): s = '<%s '%(self.name,) for name, val in self.attrs.items(): s += '%s="%s" '%(name, val) return s.rstrip() + (' />' if self.self_closing else '>') def to_html(self): s = '' return s def close_html(self): return '' class Span(TextTag): pass linetype_map = {0: 'none', 0x10: 'solid', 0x20: 'dashed', 0x30: 'double', 0x40: 'dotted'} adjustment_map = {1: 'top', 2: 'center', 3: 'baseline', 4: 'bottom'} lineposition_map = {1:'before', 2:'after'} def add_text(self, text): s = str(text, "utf-16-le") if s: s = s.translate(self.text_map) self.content.append(self.entity_pattern.sub(entity_to_unicode, s)) def end_container(self, tag, stream): self.content.append(None) def start_para(self, tag, stream): self.content.append(self.__class__.TextTag('P')) def close_containers(self, start=0): if len(self.content) == 0: return open_containers = 0 if len(self.content) > 0 and isinstance(self.content[-1], self.__class__.Span): self.content.pop() while start < len(self.content): c = self.content[start] if c is None: open_containers -= 1 elif isinstance(c, self.__class__.TextTag) and not c.self_closing: open_containers += 1 start += 1 self.content.extend(None for i in range(open_containers)) def end_para(self, tag, stream): i = len(self.content)-1 while i > -1: if isinstance(self.content[i], Text.TextTag) and self.content[i].name == 'P': break i -= 1 self.close_containers(start=i) def cr(self, tag, stream): self.content.append(self.__class__.TextTag('CR', self_closing=True)) def char_button(self, tag, stream): self.content.append(self.__class__.TextTag( 'CharButton', attrs={'refobj':tag.dword})) def simple_container(self, tag, name): self.content.append(self.__class__.TextTag(name)) def empline(self, tag, stream): def invalid(op): stream.seek(op) # self.simple_container(None, 'EmpLine') oldpos = stream.tell() try: t = Tag(stream) if t.id not in (0xF579, 0xF57A): raise LRFParseError except LRFParseError: invalid(oldpos) return h = TextAttr.tag_map[t.id] attrs = {} attrs[h[0]] = TextAttr.tag_to_val(h, None, t, None) oldpos = stream.tell() try: t = Tag(stream) if t.id not in (0xF579, 0xF57A): raise LRFParseError h = TextAttr.tag_map[t.id] attrs[h[0]] = TextAttr.tag_to_val(h, None, t, None) except LRFParseError: stream.seek(oldpos) if attrs: self.content.append(self.__class__.TextTag( 'EmpLine', attrs=attrs)) def space(self, tag, stream): self.content.append(self.__class__.TextTag('Space', attrs={'xsize':tag.sword}, self_closing=True)) def plot(self, tag, stream): xsize, ysize, refobj, adjustment = struct.unpack("= start_pos: if tag_pos > start_pos: self.add_text(self.stream[start_pos:tag_pos]) stream.seek(tag_pos) else: # No tags in this stream self.add_text(self.stream) stream.seek(0, 2) break tag = Tag(stream) if tag.id == 0xF5CC: self.add_text(stream.read(tag.word)) elif tag.id in self.__class__.text_tags: # A Text tag action = self.__class__.text_tags[tag.id] if isinstance(action, str): getattr(self, action)(tag, stream) else: getattr(self, action[0])(tag, action[1]) elif tag.id in TextAttr.tag_map: # A Span attribute action = TextAttr.tag_map[tag.id] if len(self.content) == 0: current_style = style.copy() name, val = action[0], LRFObject.tag_to_val(action, self, tag, None) if name and (name not in current_style or current_style[name] != val): # No existing Span if len(self.content) > 0 and isinstance(self.content[-1], self.__class__.Span): self.content[-1].attrs[name] = val else: self.content.append(self.__class__.Span('Span', {name:val})) current_style[name] = val if len(self.content) > 0: self.close_containers() self.stream = None def __str__(self): s = '' open_containers = collections.deque() for c in self.content: if isinstance(c, str): s += prepare_string_for_xml(c).replace('\0', '') elif c is None: if open_containers: p = open_containers.pop() s += ''%(p.name,) else: s += str(c) if not c.self_closing: open_containers.append(c) if len(open_containers) > 0: if len(open_containers) == 1: s += ''%(open_containers[0].name,) else: raise LRFParseError('Malformed text stream %s'%([i.name for i in open_containers if isinstance(i, Text.TextTag)],)) return s def to_html(self): s = '' open_containers = collections.deque() in_p = False for c in self.content: if isinstance(c, str): s += c elif c is None: p = open_containers.pop() s += p.close_html() else: if c.name == 'P': in_p = True elif c.name == 'CR': s += '
' if in_p else '

' else: s += c.to_html() if not c.self_closing: open_containers.append(c) if len(open_containers) > 0: raise LRFParseError('Malformed text stream %s'%([i.name for i in open_containers if isinstance(i, Text.TextTag)],)) return s class Image(LRFObject): tag_map = { 0xF54A: ['', 'parse_image_rect'], 0xF54B: ['', 'parse_image_size'], 0xF54C: ['refstream', 'D'], 0xF555: ['comment', 'P'], } def parse_image_rect(self, tag, f): self.x0, self.y0, self.x1, self.y1 = struct.unpack("\n'%\ (self.id, self.x0, self.y0, self.x1, self.y1, self.xsize, self.ysize, self.refstream) class PutObj(EmptyPageElement): def __init__(self, objects, x1, y1, refobj): self.x1, self.y1, self.refobj = x1, y1, refobj self.object = objects[refobj] def __str__(self): return ''%(self.x1, self.y1, self.refobj) class Canvas(LRFStream): tag_map = { 0xF551: ['canvaswidth', 'W'], 0xF552: ['canvasheight', 'W'], 0xF5DA: ['', 'parse_waits'], 0xF533: ['blockrule', 'W', {0x44: "block-fixed", 0x22: "block-adjustable"}], 0xF534: ['bgcolor', 'D', Color], 0xF535: ['layout', 'W', {0x41: 'TbRl', 0x34: 'LrTb'}], 0xF536: ['framewidth', 'W'], 0xF537: ['framecolor', 'D', Color], 0xF52E: ['framemode', 'W', {0: 'none', 2: 'curve', 1:'square'}], } tag_map.update(LRFStream.tag_map) extra_attrs = ['canvaswidth', 'canvasheight', 'blockrule', 'layout', 'framewidth', 'framecolor', 'framemode'] def parse_waits(self, tag, f): val = tag.word self.setwaitprop = val&0xF self.setwaitsync = val&0xF0 def initialize(self): self.attrs = {} for attr in self.extra_attrs: if hasattr(self, attr): self.attrs[attr] = getattr(self, attr) self._contents = [] stream = io.BytesIO(self.stream) while stream.tell() < len(self.stream): tag = Tag(stream) try: self._contents.append( PutObj(self._document.objects, *struct.unpack("\n' for po in self: s += str(po) + '\n' s += '\n'%(self.__class__.__name__,) return s def __iter__(self): for i in self._contents: yield i class Header(Canvas): pass class Footer(Canvas): pass class ESound(LRFObject): pass class ImageStream(LRFStream): tag_map = { 0xF555: ['comment', 'P'], } imgext = {0x11: 'jpeg', 0x12: 'png', 0x13: 'bmp', 0x14: 'gif'} tag_map.update(LRFStream.tag_map) encoding = property(fget=lambda self : self.imgext[self.stream_flags & 0xFF].upper()) def end_stream(self, *args): LRFStream.end_stream(self, *args) self.file = str(self.id) + '.' + self.encoding.lower() if self._document is not None: self._document.image_map[self.id] = self def __str__(self): return '\n'%\ (self.id, self.encoding, self.file) class Import(LRFStream): pass class Button(LRFObject): tag_map = { 0xF503: ['', 'do_ref_image'], 0xF561: ['button_flags','W'], # \n' return s refpage = property(fget=lambda self : self.jump_action(2)[0]) refobj = property(fget=lambda self : self.jump_action(2)[1]) class Window(LRFObject): pass class PopUpWin(LRFObject): pass class Sound(LRFObject): pass class SoundStream(LRFObject): pass class Font(LRFStream): tag_map = { 0xF559: ['fontfilename', 'P'], 0xF55D: ['fontfacename', 'P'], } tag_map.update(LRFStream.tag_map) data = property(fget=lambda self: self.stream) def end_stream(self, *args): LRFStream.end_stream(self, *args) self._document.font_map[self.fontfacename] = self self.file = self.fontfacename + '.ttf' def __unicode__(self): s = '\n'%\ (self.id, self.fontfilename, self.fontfacename, self.file) return s class ObjectInfo(LRFStream): pass class BookAttr(StyleObject, LRFObject): tag_map = { 0xF57B: ['page_tree_id', 'D'], 0xF5D8: ['', 'add_font'], 0xF5DA: ['setwaitprop', 'W', {1: 'replay', 2: 'noreplay'}], } tag_map.update(ruby_tags) tag_map.update(LRFObject.tag_map) binding_map = {1: 'Lr', 16 : 'Rl'} def __init__(self, document, stream, id, scramble_key, boundary): self.font_link_list = [] LRFObject.__init__(self, document, stream, id, scramble_key, boundary) def add_font(self, tag, f): self.font_link_list.append(tag.dword) def __str__(self): s = '\n'%(self.id, self.id) s += '\n'%(self._tags_to_xml(),) doc = self._document s += '\n'%\ (self.binding_map[doc.binding], doc.dpi, doc.width, doc.height, doc.color_depth) for font in self._document.font_map.values(): s += str(font) s += '\n' return s class SimpleText(Text): pass class TocLabel(object): def __init__(self, refpage, refobject, label): self.refpage, self.refobject, self.label = refpage, refobject, label def __str__(self): return '%s\n'%(self.refpage, self.refobject, self.label) class TOCObject(LRFStream): def initialize(self): stream = io.BytesIO(self.stream) c = struct.unpack(" 0: refpage = struct.unpack("\n' object_map = [None, # 00 PageTree, # 01 Page, # 02 Header, # 03 Footer, # 04 PageAttr, # 05 Block, # 06 BlockAttr, # 07 MiniPage, # 08 None, # 09 Text, # 0A TextAttr, # 0B Image, # 0C Canvas, # 0D ESound, # 0E None, # 0F None, # 10 ImageStream, # 11 Import, # 12 Button, # 13 Window, # 14 PopUpWin, # 15 Sound, # 16 SoundStream, # 17 None, # 18 Font, # 19 ObjectInfo, # 1A None, # 1B BookAttr, # 1C SimpleText, # 1D TOCObject] # 1E def get_object(document, stream, id, offset, size, scramble_key): stream.seek(offset) start_tag = Tag(stream) if start_tag.id != 0xF500: raise LRFParseError('Bad object start') obj_id, obj_type = struct.unpack("