From 399456d9ad4abe307f51e6cbe58b80be0835dbb6 Mon Sep 17 00:00:00 2001
From: gryf <gryf73@gmail.com>
Date: Sun, 24 May 2020 12:43:33 +0200
Subject: [PATCH] Added LRF input format support.

---
 README.rst                              |    3 +-
 ebook_converter/ebooks/lrf/input.py     |  394 +++++++
 ebook_converter/ebooks/lrf/lrfparser.py |  171 +++
 ebook_converter/ebooks/lrf/meta.py      |  766 ++++++++++++++
 ebook_converter/ebooks/lrf/objects.py   | 1279 +++++++++++++++++++++++
 ebook_converter/ebooks/lrf/tags.py      |  255 +++++
 6 files changed, 2867 insertions(+), 1 deletion(-)
 create mode 100644 ebook_converter/ebooks/lrf/input.py
 create mode 100644 ebook_converter/ebooks/lrf/lrfparser.py
 create mode 100644 ebook_converter/ebooks/lrf/meta.py
 create mode 100644 ebook_converter/ebooks/lrf/objects.py
 create mode 100644 ebook_converter/ebooks/lrf/tags.py

diff --git a/README.rst b/README.rst
index d0059f7..ec7b5f5 100644
--- a/README.rst
+++ b/README.rst
@@ -54,6 +54,7 @@ Currently, I've tested following input formats:
 - fb2
 - html
 - pdf
+- lrf
 
 Note, that old Microsoft doc format is not supported, although old documents
 can be fairly easy converted using text processors programs, lik Word or
@@ -65,7 +66,7 @@ Output formats
 
 Currently, following formats are supported:
 
-- lrf (for Sony readers)
+- lrf
 - epub
 - mobi
 - docx
diff --git a/ebook_converter/ebooks/lrf/input.py b/ebook_converter/ebooks/lrf/input.py
new file mode 100644
index 0000000..d9ed86f
--- /dev/null
+++ b/ebook_converter/ebooks/lrf/input.py
@@ -0,0 +1,394 @@
+import textwrap, operator
+from copy import deepcopy, copy
+
+from lxml import etree
+
+from ebook_converter import guess_type
+from ebook_converter.polyglot.builtins import as_bytes
+
+
+class Canvas(etree.XSLTExtension):
+
+    def __init__(self, doc, styles, text_block, log):
+        self.doc = doc
+        self.styles = styles
+        self.text_block = text_block
+        self.log = log
+        self.processed = set()
+
+    def execute(self, context, self_node, input_node, output_parent):
+        cid = input_node.get('objid', None)
+        if cid is None or cid in self.processed:
+            return
+        self.processed.add(cid)
+        input_node = self.doc.xpath('//Canvas[@objid="%s"]'%cid)[0]
+
+        objects = list(self.get_objects(input_node))
+        if len(objects) == 1 and objects[0][0].tag == 'ImageBlock':
+            self.image_page(input_node, objects[0][0], output_parent)
+        else:
+            canvases = [input_node]
+            for x in input_node.itersiblings():
+                if x.tag == 'Canvas':
+                    oid = x.get('objid', None)
+                    if oid is not None:
+                        canvases.append(x)
+                        self.processed.add(oid)
+                else:
+                    break
+
+            table = etree.Element('table')
+            table.text = '\n\t'
+            for canvas in canvases:
+                oid = canvas.get('objid')
+                tr = table.makeelement('tr')
+                tr.set('id', oid)
+                tr.tail = '\n\t'
+                table.append(tr)
+                for obj, x, y in self.get_objects(canvas):
+                    if obj.tag != 'TextBlock':
+                        self.log.warn(obj.tag, 'elements in Canvas not supported')
+                        continue
+                    td = table.makeelement('td')
+                    self.text_block.render_block(obj, td)
+                    tr.append(td)
+            output_parent.append(table)
+
+    def image_page(self, input_node, block, output_parent):
+        div = etree.Element('div')
+        div.set('id', input_node.get('objid', 'scuzzy'))
+        div.set('class', 'image_page')
+        width = self.styles.to_num(block.get("xsize", None))
+        height = self.styles.to_num(block.get("ysize", None))
+        img = div.makeelement('img')
+        if width is not None:
+            img.set('width', str(int(width)))
+        if height is not None:
+            img.set('height', str(int(height)))
+        ref = block.get('refstream', None)
+        if ref is not None:
+            imstr = self.doc.xpath('//ImageStream[@objid="%s"]'%ref)
+            if imstr:
+                src = imstr[0].get('file', None)
+                if src:
+                    img.set('src', src)
+        div.append(img)
+        output_parent.append(div)
+
+    def get_objects(self, node):
+        for x in node.xpath('descendant::PutObj[@refobj and @x1 and @y1]'):
+            objs = node.xpath('//*[@objid="%s"]'%x.get('refobj'))
+            x, y = map(self.styles.to_num, (x.get('x1'), x.get('y1')))
+            if objs and x is not None and y is not None:
+                yield objs[0], int(x), int(y)
+
+
+class MediaType(etree.XSLTExtension):
+
+    def execute(self, context, self_node, input_node, output_parent):
+        name = input_node.get('file', None)
+        typ = guess_type(name)[0]
+        if not typ:
+            typ = 'application/octet-stream'
+        output_parent.text = typ
+
+
+class ImageBlock(etree.XSLTExtension):
+
+    def __init__(self, canvas):
+        etree.XSLTExtension.__init__(self)
+        self.canvas = canvas
+
+    def execute(self, context, self_node, input_node, output_parent):
+        self.canvas.image_page(input_node, input_node, output_parent)
+
+
+class RuledLine(etree.XSLTExtension):
+
+    def execute(self, context, self_node, input_node, output_parent):
+        hr = etree.Element('hr')
+        output_parent.append(hr)
+
+
+class TextBlock(etree.XSLTExtension):
+
+    def __init__(self, styles, char_button_map, plot_map, log):
+        etree.XSLTExtension.__init__(self)
+        self.styles = styles
+        self.log = log
+        self.char_button_map = char_button_map
+        self.plot_map = plot_map
+
+    def execute(self, context, self_node, input_node, output_parent):
+        input_node = deepcopy(input_node)
+        div = etree.Element('div')
+        self.render_block(input_node, div)
+        output_parent.append(div)
+
+    def render_block(self, node, root):
+        ts = node.get('textstyle', None)
+        classes = []
+        bs = node.get('blockstyle')
+        if bs in self.styles.block_style_map:
+            classes.append('bs%d'%self.styles.block_style_map[bs])
+        if ts in self.styles.text_style_map:
+            classes.append('ts%d'%self.styles.text_style_map[ts])
+        if classes:
+            root.set('class', ' '.join(classes))
+        objid = node.get('objid', None)
+        if objid:
+            root.set('id', objid)
+        root.text = node.text
+        self.root = root
+        self.parent = root
+        self.add_text_to = (self.parent, 'text')
+        self.fix_deep_nesting(node)
+        for child in node:
+            self.process_child(child)
+
+    def fix_deep_nesting(self, node):
+        deepest = 1
+
+        def depth(node):
+            parent = node.getparent()
+            ans = 1
+            while parent is not None:
+                ans += 1
+                parent = parent.getparent()
+            return ans
+
+        for span in node.xpath('descendant::Span'):
+            d = depth(span)
+            if d > deepest:
+                deepest = d
+                if d > 500:
+                    break
+
+        if deepest < 500:
+            return
+
+        self.log.warn('Found deeply nested spans. Flattening.')
+        # with open('/t/before.xml', 'wb') as f:
+        #    f.write(etree.tostring(node, method='xml'))
+
+        spans = [(depth(span), span) for span in node.xpath('descendant::Span')]
+        spans.sort(key=operator.itemgetter(0), reverse=True)
+
+        for depth, span in spans:
+            if depth < 3:
+                continue
+            p = span.getparent()
+            gp = p.getparent()
+            idx = p.index(span)
+            pidx = gp.index(p)
+            children = list(p)[idx:]
+            t = children[-1].tail
+            t = t if t else ''
+            children[-1].tail = t + (p.tail if p.tail else '')
+            p.tail = ''
+            pattrib = dict(**p.attrib) if p.tag == 'Span' else {}
+            for child in children:
+                p.remove(child)
+                if pattrib and child.tag == "Span":
+                    attrib = copy(pattrib)
+                    attrib.update(child.attrib)
+                    child.attrib.update(attrib)
+
+            for child in reversed(children):
+                gp.insert(pidx+1, child)
+
+        # with open('/t/after.xml', 'wb') as f:
+        #    f.write(etree.tostring(node, method='xml'))
+
+    def add_text(self, text):
+        if text:
+            if getattr(self.add_text_to[0], self.add_text_to[1]) is None:
+                setattr(self.add_text_to[0], self.add_text_to[1], '')
+            setattr(self.add_text_to[0], self.add_text_to[1],
+                    getattr(self.add_text_to[0], self.add_text_to[1])+ text)
+
+    def process_container(self, child, tgt):
+        idx = self.styles.get_text_styles(child)
+        if idx is not None:
+            tgt.set('class', 'ts%d'%idx)
+        self.parent.append(tgt)
+        orig_parent = self.parent
+        self.parent = tgt
+        self.add_text_to = (self.parent, 'text')
+        self.add_text(child.text)
+        for gchild in child:
+            self.process_child(gchild)
+        self.parent = orig_parent
+        self.add_text_to = (tgt, 'tail')
+        self.add_text(child.tail)
+
+    def process_child(self, child):
+        if child.tag == 'CR':
+            if self.parent == self.root or self.parent.tag == 'p':
+                self.parent = self.root.makeelement('p')
+                self.root.append(self.parent)
+                self.add_text_to = (self.parent, 'text')
+            else:
+                br = self.parent.makeelement('br')
+                self.parent.append(br)
+                self.add_text_to = (br, 'tail')
+            self.add_text(child.tail)
+        elif child.tag in ('P', 'Span', 'EmpLine', 'NoBR'):
+            span = self.root.makeelement('span')
+            if child.tag == 'EmpLine':
+                td = 'underline' if child.get('emplineposition', 'before') == 'before' else 'overline'
+                span.set('style', 'text-decoration: '+td)
+            self.process_container(child, span)
+        elif child.tag == 'Sup':
+            sup = self.root.makeelement('sup')
+            self.process_container(child, sup)
+        elif child.tag == 'Sub':
+            sub = self.root.makeelement('sub')
+            self.process_container(child, sub)
+        elif child.tag == 'Italic':
+            sup = self.root.makeelement('i')
+            self.process_container(child, sup)
+        elif child.tag == 'CharButton':
+            a = self.root.makeelement('a')
+            oid = child.get('refobj', None)
+            if oid in self.char_button_map:
+                a.set('href', self.char_button_map[oid])
+            self.process_container(child, a)
+        elif child.tag == 'Plot':
+            xsize = self.styles.to_num(child.get('xsize', None), 166/720)
+            ysize = self.styles.to_num(child.get('ysize', None), 166/720)
+            img = self.root.makeelement('img')
+            if xsize is not None:
+                img.set('width', str(int(xsize)))
+            if ysize is not None:
+                img.set('height', str(int(ysize)))
+            ro = child.get('refobj', None)
+            if ro in self.plot_map:
+                img.set('src', self.plot_map[ro])
+            self.parent.append(img)
+            self.add_text_to = (img, 'tail')
+            self.add_text(child.tail)
+        else:
+            self.log.warn('Unhandled Text element:', child.tag)
+
+
+class Styles(etree.XSLTExtension):
+
+    def __init__(self):
+        etree.XSLTExtension.__init__(self)
+        self.text_styles, self.block_styles = [], []
+        self.text_style_map, self.block_style_map = {}, {}
+        self.CSS = textwrap.dedent('''
+        .image_page { text-align:center }
+        ''')
+
+    def write(self, name='styles.css'):
+
+        def join(style):
+            ans = ['%s : %s;'%(k, v) for k, v in style.items()]
+            if ans:
+                ans[-1] = ans[-1][:-1]
+            return '\n\t'.join(ans)
+
+        with open(name, 'wb') as f:
+            f.write(as_bytes(self.CSS))
+            for (w, sel) in [(self.text_styles, 'ts'), (self.block_styles,
+                'bs')]:
+                for i, s in enumerate(w):
+                    if not s:
+                        continue
+                    rsel = '.%s%d'%(sel, i)
+                    s = join(s)
+                    f.write(as_bytes(rsel + ' {\n\t' + s + '\n}\n\n'))
+
+    def execute(self, context, self_node, input_node, output_parent):
+        if input_node.tag == 'TextStyle':
+            idx = self.get_text_styles(input_node)
+            if idx is not None:
+                self.text_style_map[input_node.get('objid')] = idx
+        else:
+            idx = self.get_block_styles(input_node)
+            self.block_style_map[input_node.get('objid')] = idx
+
+    def px_to_pt(self, px):
+        try:
+            return px * 72/166
+        except:
+            return None
+
+    def color(self, val):
+        try:
+            val = int(val, 16)
+            r, g, b, a = val & 0xFF, (val>>8)&0xFF, (val>>16)&0xFF, (val>>24)&0xFF
+            if a == 255:
+                return None
+            if a == 0:
+                return 'rgb(%d,%d,%d)'%(r,g,b)
+            return 'rgba(%d,%d,%d,%f)'%(r,g,b,1.-a/255.)
+        except:
+            return None
+
+    def get_block_styles(self, node):
+        ans = {}
+        sm = self.px_to_pt(node.get('sidemargin', None))
+        if sm is not None:
+            ans['margin-left'] = ans['margin-right'] = '%fpt'%sm
+        ts = self.px_to_pt(node.get('topskip', None))
+        if ts is not None:
+            ans['margin-top'] = '%fpt'%ts
+        fs = self.px_to_pt(node.get('footskip', None))
+        if fs is not None:
+            ans['margin-bottom'] = '%fpt'%fs
+        fw = self.px_to_pt(node.get('framewidth', None))
+        if fw is not None:
+            ans['border-width'] = '%fpt'%fw
+            ans['border-style'] = 'solid'
+        fc = self.color(node.get('framecolor', None))
+        if fc is not None:
+            ans['border-color'] = fc
+        bc = self.color(node.get('bgcolor', None))
+        if bc is not None:
+            ans['background-color'] = bc
+        if ans not in self.block_styles:
+            self.block_styles.append(ans)
+        return self.block_styles.index(ans)
+
+    def to_num(self, val, factor=1.):
+        try:
+            return float(val)*factor
+        except:
+            return None
+
+    def get_text_styles(self, node):
+        ans = {}
+        fs = self.to_num(node.get('fontsize', None), 0.1)
+        if fs is not None:
+            ans['font-size'] = '%fpt'%fs
+        fw = self.to_num(node.get('fontweight', None))
+        if fw is not None:
+            ans['font-weight'] = ('bold' if fw >= 700 else 'normal')
+        # fn = getattr(obj, 'fontfacename', None)
+        # if fn is not None:
+        #    fn = cls.FONT_MAP[fn]
+        #    item('font-family: %s;'%fn)
+        fg = self.color(node.get('textcolor', None))
+        if fg is not None:
+            ans['color'] = fg
+        bg = self.color(node.get('textbgcolor', None))
+        if bg is not None:
+            ans['background-color'] = bg
+        al = node.get('align', None)
+        if al is not None:
+            all = dict(head='left', center='center', foot='right')
+            ans['text-align'] = all.get(al, 'left')
+        # lh = self.to_num(node.get('linespace', None), 0.1)
+        # if lh is not None:
+        #    ans['line-height'] = '%fpt'%lh
+        pi = self.to_num(node.get('parindent', None), 0.1)
+        if pi is not None:
+            ans['text-indent'] = '%fpt'%pi
+        if not ans:
+            return None
+        if ans not in self.text_styles:
+            self.text_styles.append(ans)
+        return self.text_styles.index(ans)
diff --git a/ebook_converter/ebooks/lrf/lrfparser.py b/ebook_converter/ebooks/lrf/lrfparser.py
new file mode 100644
index 0000000..6b876a5
--- /dev/null
+++ b/ebook_converter/ebooks/lrf/lrfparser.py
@@ -0,0 +1,171 @@
+import sys, array, os, re, codecs, logging
+from itertools import chain
+
+from ebook_converter import setup_cli_handlers
+from ebook_converter.utils.config import OptionParser
+from ebook_converter.utils.filenames import ascii_filename
+from ebook_converter.ebooks.lrf.meta import LRFMetaFile
+from ebook_converter.ebooks.lrf.objects import get_object, PageTree, StyleObject, \
+                                         Font, Text, TOCObject, BookAttr, ruby_tags
+
+
+class LRFDocument(LRFMetaFile):
+
+    class temp(object):
+        pass
+
+    def __init__(self, stream):
+        LRFMetaFile.__init__(self, stream)
+        self.scramble_key = self.xor_key
+        self.page_trees = []
+        self.font_map = {}
+        self.image_map = {}
+        self.toc = ''
+        self.keep_parsing = True
+
+    def parse(self):
+        self._parse_objects()
+        self.metadata = LRFDocument.temp()
+        for a in ('title', 'title_reading', 'author', 'author_reading', 'book_id',
+                  'classification', 'free_text', 'publisher', 'label', 'category'):
+            setattr(self.metadata, a, getattr(self, a))
+        self.doc_info = LRFDocument.temp()
+        for a in ('thumbnail', 'language', 'creator', 'producer', 'page'):
+            setattr(self.doc_info, a, getattr(self, a))
+        self.doc_info.thumbnail_extension = self.thumbail_extension()
+        self.device_info = LRFDocument.temp()
+        for a in ('dpi', 'width', 'height'):
+            setattr(self.device_info, a, getattr(self, a))
+
+    def _parse_objects(self):
+        self.objects = {}
+        self._file.seek(self.object_index_offset)
+        obj_array = array.array("I", self._file.read(4*4*self.number_of_objects))
+        if ord(array.array("i",[1]).tostring()[0:1])==0:  # big-endian
+            obj_array.byteswap()
+        for i in range(self.number_of_objects):
+            if not self.keep_parsing:
+                break
+            objid, objoff, objsize = obj_array[i*4:i*4+3]
+            self._parse_object(objid, objoff, objsize)
+        for obj in self.objects.values():
+            if not self.keep_parsing:
+                break
+            if hasattr(obj, 'initialize'):
+                obj.initialize()
+
+    def _parse_object(self, objid, objoff, objsize):
+        obj = get_object(self, self._file, objid, objoff, objsize, self.scramble_key)
+        self.objects[objid] = obj
+        if isinstance(obj, PageTree):
+            self.page_trees.append(obj)
+        elif isinstance(obj, TOCObject):
+            self.toc = obj
+        elif isinstance(obj, BookAttr):
+            self.ruby_tags = {}
+            for h in ruby_tags.values():
+                attr = h[0]
+                if hasattr(obj, attr):
+                    self.ruby_tags[attr] = getattr(obj, attr)
+
+    def __iter__(self):
+        for pt in self.page_trees:
+            yield pt
+
+    def write_files(self):
+        for obj in chain(self.image_map.values(), self.font_map.values()):
+            with open(obj.file, 'wb') as f:
+                f.write(obj.stream)
+
+    def to_xml(self, write_files=True):
+        bookinfo = '<BookInformation>\n<Info version="1.1">\n<BookInfo>\n'
+        bookinfo += '<Title reading="%s">%s</Title>\n'%(self.metadata.title_reading, self.metadata.title)
+        bookinfo += '<Author reading="%s">%s</Author>\n'%(self.metadata.author_reading, self.metadata.author)
+        bookinfo += '<BookID>%s</BookID>\n'%(self.metadata.book_id,)
+        bookinfo += '<Publisher reading="">%s</Publisher>\n'%(self.metadata.publisher,)
+        bookinfo += '<Label reading="">%s</Label>\n'%(self.metadata.label,)
+        bookinfo += '<Category reading="">%s</Category>\n'%(self.metadata.category,)
+        bookinfo += '<Classification reading="">%s</Classification>\n'%(self.metadata.classification,)
+        bookinfo += '<FreeText reading="">%s</FreeText>\n</BookInfo>\n<DocInfo>\n'%(self.metadata.free_text,)
+        th = self.doc_info.thumbnail
+        if th:
+            prefix = ascii_filename(self.metadata.title)
+            bookinfo += '<CThumbnail file="%s" />\n'%(prefix+'_thumbnail.'+self.doc_info.thumbnail_extension,)
+            if write_files:
+                with open(prefix+'_thumbnail.'+self.doc_info.thumbnail_extension, 'wb') as f:
+                    f.write(th)
+        bookinfo += '<Language reading="">%s</Language>\n'%(self.doc_info.language,)
+        bookinfo += '<Creator reading="">%s</Creator>\n'%(self.doc_info.creator,)
+        bookinfo += '<Producer reading="">%s</Producer>\n'%(self.doc_info.producer,)
+        bookinfo += '<SumPage>%s</SumPage>\n</DocInfo>\n</Info>\n%s</BookInformation>\n'%(self.doc_info.page,self.toc)
+        pages = ''
+        done_main = False
+        pt_id = -1
+        for page_tree in self:
+            if not done_main:
+                done_main = True
+                pages += '<Main>\n'
+                close = '</Main>\n'
+                pt_id = page_tree.id
+            else:
+                pages += '<PageTree objid="%d">\n'%(page_tree.id,)
+                close = '</PageTree>\n'
+            for page in page_tree:
+                pages += str(page)
+            pages += close
+        traversed_objects = [int(i) for i in re.findall(r'objid="(\w+)"', pages)] + [pt_id]
+
+        objects = '\n<Objects>\n'
+        styles  = '\n<Style>\n'
+        for obj in self.objects:
+            obj = self.objects[obj]
+            if obj.id in traversed_objects:
+                continue
+            if isinstance(obj, (Font, Text, TOCObject)):
+                continue
+            if isinstance(obj, StyleObject):
+                styles += str(obj)
+            else:
+                objects += str(obj)
+        styles += '</Style>\n'
+        objects += '</Objects>\n'
+        if write_files:
+            self.write_files()
+        return '<BBeBXylog version="1.0">\n' + bookinfo + pages + styles + objects + '</BBeBXylog>'
+
+
+def option_parser():
+    parser = OptionParser(usage=_('%prog book.lrf\nConvert an LRF file into an LRS (XML UTF-8 encoded) file'))
+    parser.add_option('--output', '-o', default=None, help=_('Output LRS file'), dest='out')
+    parser.add_option('--dont-output-resources', default=True, action='store_false',
+                      help=_('Do not save embedded image and font files to disk'),
+                      dest='output_resources')
+    parser.add_option('--verbose', default=False, action='store_true', dest='verbose', help=_('Be more verbose'))
+    return parser
+
+
+def main(args=sys.argv, logger=None):
+    parser = option_parser()
+    opts, args = parser.parse_args(args)
+    if logger is None:
+        level = logging.DEBUG if opts.verbose else logging.INFO
+        logger = logging.getLogger('lrf2lrs')
+        setup_cli_handlers(logger, level)
+    if len(args) != 2:
+        parser.print_help()
+        return 1
+    if opts.out is None:
+        opts.out = os.path.join(os.path.dirname(args[1]), os.path.splitext(os.path.basename(args[1]))[0]+".lrs")
+    logger.info(_('Parsing LRF...'))
+    d = LRFDocument(open(args[1], 'rb'))
+    d.parse()
+    logger.info(_('Creating XML...'))
+    with codecs.open(os.path.abspath(os.path.expanduser(opts.out)), 'wb', 'utf-8') as f:
+        f.write('<?xml version="1.0" encoding="UTF-8"?>\n')
+        f.write(d.to_xml(write_files=opts.output_resources))
+    logger.info(_('LRS written to ')+opts.out)
+    return 0
+
+
+if __name__ == '__main__':
+    sys.exit(main())
diff --git a/ebook_converter/ebooks/lrf/meta.py b/ebook_converter/ebooks/lrf/meta.py
new file mode 100644
index 0000000..248e3b7
--- /dev/null
+++ b/ebook_converter/ebooks/lrf/meta.py
@@ -0,0 +1,766 @@
+"""
+This module presents an easy to use interface for getting and setting
+meta information in LRF files.
+Just create an L{LRFMetaFile} object and use its properties
+to get and set meta information. For example:
+
+>>> lrf = LRFMetaFile("mybook.lrf")
+>>> print(lrf.title, lrf.author)
+>>> lrf.category = "History"
+"""
+
+import functools
+import io
+import os
+import shutil
+import struct
+import sys
+from xml.dom import minidom
+import zlib
+
+from ebook_converter.ebooks.chardet import xml_to_unicode
+from ebook_converter.ebooks.metadata import MetaInformation, string_to_authors
+
+BYTE = "<B"  #: Unsigned char little endian encoded in 1 byte
+WORD = "<H"  #: Unsigned short little endian encoded in 2 bytes
+DWORD = "<I"  #: Unsigned integer little endian encoded in 4 bytes
+QWORD = "<Q"  #: Unsigned long long little endian encoded in 8 bytes
+
+
+class field(object):
+    """ A U{Descriptor<http://www.cafepy.com/article/python_attributes_
+            and_methods/python_attributes_and_methods.html>},
+    that implements access to protocol packets in a human readable way.
+    """
+
+    def __init__(self, start=16, fmt=DWORD):
+        """
+        @param start: The byte at which this field is stored in the buffer
+        @param fmt:   The packing format for this field.
+        See U{struct<http://docs.python.org/lib/module-struct.html>}.
+        """
+        self._fmt, self._start = fmt, start
+
+    def __get__(self, obj, typ=None):
+        return obj.unpack(start=self._start, fmt=self._fmt)[0]
+
+    def __set__(self, obj, val):
+        obj.pack(val, start=self._start, fmt=self._fmt)
+
+    def __repr__(self):
+        typ = {DWORD: 'unsigned int', 'QWORD': 'unsigned long long',
+               BYTE: 'unsigned char',
+               WORD: 'unsigned short'}.get(self._fmt, '')
+        return ("An " + typ + " stored in " +
+                str(struct.calcsize(self._fmt)) +
+                " bytes starting at byte " + str(self._start))
+
+
+class versioned_field(field):
+
+    def __init__(self, vfield, version, start=0, fmt=WORD):
+        field.__init__(self, start=start, fmt=fmt)
+        self.vfield, self.version = vfield, version
+
+    def enabled(self, obj):
+        return self.vfield.__get__(obj) > self.version
+
+    def __get__(self, obj, typ=None):
+        if self.enabled(obj):
+            return field.__get__(self, obj, typ=typ)
+        else:
+            return None
+
+    def __set__(self, obj, val):
+        if not self.enabled(obj):
+            raise LRFException("Trying to set disabled field")
+        else:
+            field.__set__(self, obj, val)
+
+
+class LRFException(Exception):
+    pass
+
+
+class fixed_stringfield(object):
+    """ A field storing a variable length string. """
+
+    def __init__(self, length=8, start=0):
+        """
+        @param length: Size of this string
+        @param start: The byte at which this field is stored in the buffer
+        """
+        self._length = length
+        self._start = start
+
+    def __get__(self, obj, typ=None):
+        length = str(self._length)
+        return obj.unpack(start=self._start, fmt="<"+length+"s")[0]
+
+    def __set__(self, obj, val):
+        if not isinstance(val, (str, bytes)):
+            val = str(val)
+        if isinstance(val, str):
+            val = val.encode('utf-8')
+        if len(val) != self._length:
+            raise LRFException("Trying to set fixed_stringfield with a " +
+                               "string of  incorrect length")
+        obj.pack(val, start=self._start, fmt="<"+str(len(val))+"s")
+
+    def __repr__(self):
+        return "A string of length " + str(self._length) + \
+                " starting at byte " + str(self._start)
+
+
+class xml_attr_field(object):
+
+    def __init__(self, tag_name, attr, parent='BookInfo'):
+        self.tag_name = tag_name
+        self.parent = parent
+        self.attr = attr
+
+    def __get__(self, obj, typ=None):
+        """ Return the data in this field or '' if the field is empty """
+        document = obj.info
+        elems = document.getElementsByTagName(self.tag_name)
+        if len(elems):
+            elem = None
+            for candidate in elems:
+                if candidate.parentNode.nodeName == self.parent:
+                    elem = candidate
+            if elem and elem.hasAttribute(self.attr):
+                return elem.getAttribute(self.attr)
+        return ''
+
+    def __set__(self, obj, val):
+        if val is None:
+            val = ""
+        document = obj.info
+        elems = document.getElementsByTagName(self.tag_name)
+        if len(elems):
+            elem = None
+            for candidate in elems:
+                if candidate.parentNode.nodeName == self.parent:
+                    elem = candidate
+        if elem:
+            elem.setAttribute(self.attr, val)
+        obj.info = document
+
+    def __repr__(self):
+        return "XML Attr Field: " + self.tag_name + " in " + self.parent
+
+    def __str__(self):
+        return self.tag_name+'.'+self.attr
+
+
+class xml_field(object):
+    """
+    Descriptor that gets and sets XML based meta information from an LRF file.
+    Works for simple XML fields of the form <tagname>data</tagname>
+    """
+
+    def __init__(self, tag_name, parent="BookInfo"):
+        """
+        @param tag_name: The XML tag whose data we operate on
+        @param parent: The tagname of the parent element of C{tag_name}
+        """
+        self.tag_name = tag_name
+        self.parent = parent
+
+    def __get__(self, obj, typ=None):
+        """ Return the data in this field or '' if the field is empty """
+        document = obj.info
+
+        elems = document.getElementsByTagName(self.tag_name)
+        if len(elems):
+            elem = None
+            for candidate in elems:
+                if candidate.parentNode.nodeName == self.parent:
+                    elem = candidate
+            if elem:
+                elem.normalize()
+                if elem.hasChildNodes():
+                    return elem.firstChild.data.strip()
+        return ''
+
+    def __set__(self, obj, val):
+        if not val:
+            val = ''
+        document = obj.info
+
+        def create_elem():
+            elem = document.createElement(self.tag_name)
+            parent = document.getElementsByTagName(self.parent)[0]
+            parent.appendChild(elem)
+            return elem
+
+        if not val:
+            val = ''
+        if not isinstance(val, str):
+            val = val.decode('utf-8')
+
+        elems = document.getElementsByTagName(self.tag_name)
+        elem = None
+        if len(elems):
+            for candidate in elems:
+                if candidate.parentNode.nodeName == self.parent:
+                    elem = candidate
+            if not elem:
+                elem = create_elem()
+            else:
+                elem.normalize()
+                while elem.hasChildNodes():
+                    elem.removeChild(elem.lastChild)
+        else:
+            elem = create_elem()
+        elem.appendChild(document.createTextNode(val))
+
+        obj.info = document
+
+    def __str__(self):
+        return self.tag_name
+
+    def __repr__(self):
+        return "XML Field: " + self.tag_name + " in " + self.parent
+
+
+def insert_into_file(fileobj, data, start, end):
+    """
+    Insert data into fileobj at position C{start}.
+
+    This function inserts data into a file, overwriting all data between start
+    and end. If end == start no data is overwritten. Do not use this function
+    to append data to a file.
+
+    @param fileobj: file like object
+    @param data: data to be inserted into fileobj
+    @param start: The position at which to start inserting data
+    @param end: The position in fileobj of data that must not be overwritten
+    @return: C{start + len(data) - end}
+    """
+    buffer = io.BytesIO()
+    fileobj.seek(end)
+    shutil.copyfileobj(fileobj, buffer, -1)
+    buffer.flush()
+    buffer.seek(0)
+    fileobj.seek(start)
+    fileobj.write(data)
+    fileobj.flush()
+    fileobj.truncate()
+    delta = fileobj.tell() - end  # < 0 if len(data) < end-start
+    shutil.copyfileobj(buffer, fileobj, -1)
+    fileobj.flush()
+    buffer.close()
+    return delta
+
+
+def get_metadata(stream):
+    """
+    Return basic meta-data about the LRF file in C{stream} as a
+    L{MetaInformation} object.
+    @param stream: A file like object or an instance of L{LRFMetaFile}
+    """
+    lrf = stream if isinstance(stream, LRFMetaFile) else LRFMetaFile(stream)
+    authors = string_to_authors(lrf.author)
+    mi = MetaInformation(lrf.title.strip(), authors)
+    mi.author = lrf.author.strip()
+    mi.comments = lrf.free_text.strip()
+    mi.category = lrf.category.strip()+', '+lrf.classification.strip()
+    tags = [x.strip() for x in mi.category.split(',') if x.strip()]
+    if tags:
+        mi.tags = tags
+    if mi.category.strip() == ',':
+        mi.category = None
+    mi.publisher = lrf.publisher.strip()
+    mi.cover_data = lrf.get_cover()
+    try:
+        mi.title_sort = lrf.title_reading.strip()
+        if not mi.title_sort:
+            mi.title_sort = None
+    except Exception:
+        pass
+
+    try:
+        mi.author_sort = lrf.author_reading.strip()
+        if not mi.author_sort:
+            mi.author_sort = None
+    except Exception:
+        pass
+
+    if not mi.title or 'unknown' in mi.title.lower():
+        mi.title = None
+    if not mi.authors:
+        mi.authors = None
+    if not mi.author or 'unknown' in mi.author.lower():
+        mi.author = None
+    if not mi.category or 'unknown' in mi.category.lower():
+        mi.category = None
+    if not mi.publisher or 'unknown' in mi.publisher.lower() or \
+            'some publisher' in mi.publisher.lower():
+        mi.publisher = None
+
+    return mi
+
+
+class LRFMetaFile(object):
+    """Has properties to read and write all Meta information in a LRF file."""
+    #: The first 6 bytes of all valid LRF files
+    LRF_HEADER = 'LRF'.encode('utf-16le')
+
+    lrf_header = fixed_stringfield(length=6, start=0x0)
+    version = field(fmt=WORD, start=0x8)
+    xor_key = field(fmt=WORD, start=0xa)
+    root_object_id = field(fmt=DWORD, start=0xc)
+    number_of_objects = field(fmt=QWORD, start=0x10)
+    object_index_offset = field(fmt=QWORD, start=0x18)
+    binding = field(fmt=BYTE, start=0x24)
+    dpi = field(fmt=WORD, start=0x26)
+    width = field(fmt=WORD, start=0x2a)
+    height = field(fmt=WORD, start=0x2c)
+    color_depth = field(fmt=BYTE, start=0x2e)
+    toc_object_id = field(fmt=DWORD, start=0x44)
+    toc_object_offset = field(fmt=DWORD, start=0x48)
+    compressed_info_size = field(fmt=WORD, start=0x4c)
+    thumbnail_type = versioned_field(version, 800, fmt=WORD, start=0x4e)
+    thumbnail_size = versioned_field(version, 800, fmt=DWORD, start=0x50)
+    uncompressed_info_size = versioned_field(compressed_info_size, 0,
+                                             fmt=DWORD, start=0x54)
+
+    title = xml_field("Title", parent="BookInfo")
+    title_reading = xml_attr_field("Title", 'reading', parent="BookInfo")
+    author = xml_field("Author", parent="BookInfo")
+    author_reading = xml_attr_field("Author", 'reading', parent="BookInfo")
+    # 16 characters. First two chars should be FB for personal use ebooks.
+    book_id = xml_field("BookID", parent="BookInfo")
+    publisher = xml_field("Publisher", parent="BookInfo")
+    label = xml_field("Label", parent="BookInfo")
+    category = xml_field("Category", parent="BookInfo")
+    classification = xml_field("Classification", parent="BookInfo")
+    free_text = xml_field("FreeText", parent="BookInfo")
+    # Should use ISO 639 language codes
+    language = xml_field("Language", parent="DocInfo")
+    creator = xml_field("Creator", parent="DocInfo")
+    # Format is %Y-%m-%d
+    creation_date = xml_field("CreationDate", parent="DocInfo")
+    producer = xml_field("Producer", parent="DocInfo")
+    page = xml_field("SumPage", parent="DocInfo")
+
+    def safe(func):
+        """
+        Decorator that ensures that function calls leave the pos
+        in the underlying file unchanged
+        """
+        @functools.wraps(func)
+        def restore_pos(*args, **kwargs):
+            obj = args[0]
+            pos = obj._file.tell()
+            res = func(*args, **kwargs)
+            obj._file.seek(0, 2)
+            if obj._file.tell() >= pos:
+                obj._file.seek(pos)
+            return res
+        return restore_pos
+
+    def safe_property(func):
+        """
+        Decorator that ensures that read or writing a property leaves
+        the position in the underlying file unchanged
+        """
+        def decorator(f):
+            def restore_pos(*args, **kwargs):
+                obj = args[0]
+                pos = obj._file.tell()
+                res = f(*args, **kwargs)
+                obj._file.seek(0, 2)
+                if obj._file.tell() >= pos:
+                    obj._file.seek(pos)
+                return res
+            return restore_pos
+        locals_ = func()
+        if 'fget' in locals_:
+            locals_["fget"] = decorator(locals_["fget"])
+        if 'fset' in locals_:
+            locals_["fset"] = decorator(locals_["fset"])
+        return property(**locals_)
+
+    @safe_property
+    def info():
+        doc = """\
+        Document meta information as a minidom Document object.
+        To set use a minidom document object.
+        """
+
+        def fget(self):
+            if self.compressed_info_size == 0:
+                raise LRFException("This document has no meta info")
+            size = self.compressed_info_size - 4
+            self._file.seek(self.info_start)
+            try:
+                src = zlib.decompress(self._file.read(size))
+                if len(src) != self.uncompressed_info_size:
+                    raise LRFException("Decompression of document meta info\
+                                        yielded unexpected results")
+
+                src = xml_to_unicode(src, strip_encoding_pats=True,
+                                     resolve_entities=True,
+                                     assume_utf8=True)[0]
+                return minidom.parseString(src)
+            except zlib.error:
+                raise LRFException("Unable to decompress document meta "
+                                   "information")
+
+        def fset(self, document):
+            info = document.toxml('utf-8')
+            self.uncompressed_info_size = len(info)
+            stream = zlib.compress(info)
+            orig_size = self.compressed_info_size
+            self.compressed_info_size = len(stream) + 4
+            delta = insert_into_file(self._file, stream, self.info_start,
+                                     self.info_start + orig_size - 4)
+
+            if self.toc_object_offset > 0:
+                self.toc_object_offset += delta
+            self.object_index_offset += delta
+            self.update_object_offsets(delta)
+
+        return {"fget": fget, "fset": fset, "doc": doc}
+
+    @safe_property
+    def thumbnail_pos():
+        doc = """The position of the thumbnail in the LRF file"""
+
+        def fget(self):
+            return self.info_start + self.compressed_info_size-4
+        return {"fget": fget, "doc": doc}
+
+    @classmethod
+    def _detect_thumbnail_type(cls, slice):
+        """ @param slice: The first 16 bytes of the thumbnail """
+        ttype = 0x14  # GIF
+        if "PNG" in slice:
+            ttype = 0x12
+        if "BM" in slice:
+            ttype = 0x13
+        if "JFIF" in slice:
+            ttype = 0x11
+        return ttype
+
+    @safe_property
+    def thumbnail():
+        doc = """\
+        The thumbnail.
+        Represented as a string.
+        The string you would get from the file read function.
+        """
+
+        def fget(self):
+            size = self.thumbnail_size
+            if size:
+                self._file.seek(self.thumbnail_pos)
+                return self._file.read(size)
+
+        def fset(self, data):
+            if self.version <= 800:
+                raise LRFException("Cannot store thumbnails in LRF files "
+                                   "of version <= 800")
+            slice = data[0:16]
+            orig_size = self.thumbnail_size
+            self.thumbnail_size = len(data)
+            delta = insert_into_file(self._file, data, self.thumbnail_pos,
+                                     self.thumbnail_pos + orig_size)
+            self.toc_object_offset += delta
+            self.object_index_offset += delta
+            self.thumbnail_type = self._detect_thumbnail_type(slice)
+            self.update_object_offsets(delta)
+
+        return {"fget": fget, "fset": fset, "doc": doc}
+
+    def __init__(self, file):
+        """ @param file: A file object opened in the r+b mode """
+        file.seek(0, 2)
+        self.size = file.tell()
+        self._file = file
+        if self.lrf_header != LRFMetaFile.LRF_HEADER:
+            raise LRFException(file.name + " has an invalid LRF header. Are "
+                               "you sure it is an LRF file?")
+        # Byte at which the compressed meta information starts
+        self.info_start = 0x58 if self.version > 800 else 0x53
+
+    @safe
+    def update_object_offsets(self, delta):
+        """
+        Run through the LRF Object index changing the offset by C{delta}.
+        """
+        self._file.seek(self.object_index_offset)
+        count = self.number_of_objects
+        while count > 0:
+            raw = self._file.read(8)
+            new_offset = struct.unpack(DWORD, raw[4:8])[0] + delta
+            if new_offset >= (2**8)**4 or new_offset < 0x4C:
+                raise LRFException('Invalid LRF file. Could not set metadata.')
+            self._file.seek(-4, os.SEEK_CUR)
+            self._file.write(struct.pack(DWORD, new_offset))
+            self._file.seek(8, os.SEEK_CUR)
+            count -= 1
+        self._file.flush()
+
+    @safe
+    def unpack(self, fmt=DWORD, start=0):
+        """
+        Return decoded data from file.
+
+        @param fmt: See http://docs.python.org/lib/module-struct.html
+        @param start: Position in file from which to decode
+        """
+        end = start + struct.calcsize(fmt)
+        self._file.seek(start)
+        ret = struct.unpack(fmt, self._file.read(end-start))
+        return ret
+
+    @safe
+    def pack(self, *args, **kwargs):
+        """
+        Encode C{args} and write them to file.
+        C{kwargs} must contain the keywords C{fmt} and C{start}
+
+        @param args: The values to pack
+        @param fmt: See http://docs.python.org/lib/module-struct.html
+        @param start: Position in file at which to write encoded data
+        """
+        encoded = struct.pack(kwargs["fmt"], *args)
+        self._file.seek(kwargs["start"])
+        self._file.write(encoded)
+        self._file.flush()
+
+    def thumbail_extension(self):
+        """
+        Return the extension for the thumbnail image type as specified
+        by L{self.thumbnail_type}. If the LRF file was created by buggy
+        software, the extension maye be incorrect. See
+        L{self.fix_thumbnail_type}.
+        """
+        ext = "gif"
+        ttype = self.thumbnail_type
+        if ttype == 0x11:
+            ext = "jpeg"
+        elif ttype == 0x12:
+            ext = "png"
+        elif ttype == 0x13:
+            ext = "bmp"
+        return ext
+
+    def fix_thumbnail_type(self):
+        """
+        Attempt to guess the thumbnail image format and set
+        L{self.thumbnail_type} accordingly.
+        """
+        slice = self.thumbnail[0:16]
+        self.thumbnail_type = self._detect_thumbnail_type(slice)
+
+    def seek(self, *args):
+        """ See L{file.seek} """
+        return self._file.seek(*args)
+
+    def tell(self):
+        """ See L{file.tell} """
+        return self._file.tell()
+
+    def read(self):
+        """ See L{file.read} """
+        return self._file.read()
+
+    def write(self, val):
+        """ See L{file.write} """
+        self._file.write(val)
+
+    def _objects(self):
+        self._file.seek(self.object_index_offset)
+        c = self.number_of_objects
+        while c > 0:
+            c -= 1
+            raw = self._file.read(16)
+            pos = self._file.tell()
+            yield struct.unpack('<IIII', raw)[:3]
+            self._file.seek(pos)
+
+    def get_objects_by_type(self, type):
+        from ebook_converter.ebooks.lrf.tags import Tag
+        objects = []
+        for id, offset, size in self._objects():
+            self._file.seek(offset)
+            tag = Tag(self._file)
+            if tag.id == 0xF500:
+                obj_id, obj_type = struct.unpack("<IH", tag.contents)
+                if obj_type == type:
+                    objects.append((obj_id, offset, size))
+        return objects
+
+    def get_object_by_id(self, tid):
+        from ebook_converter.ebooks.lrf.tags import Tag
+        for id, offset, size in self._objects():
+            self._file.seek(offset)
+            tag = Tag(self._file)
+            if tag.id == 0xF500:
+                obj_id, obj_type = struct.unpack("<IH", tag.contents)
+                if obj_id == tid:
+                    return obj_id, offset, size, obj_type
+        return (False, False, False, False)
+
+    @safe
+    def get_cover(self):
+        from ebook_converter.ebooks.lrf.objects import get_object
+
+        for id, offset, size in self.get_objects_by_type(0x0C):
+            image = get_object(None, self._file, id, offset, size,
+                               self.xor_key)
+            id, offset, size = self.get_object_by_id(image.refstream)[:3]
+            image_stream = get_object(None, self._file, id, offset, size,
+                                      self.xor_key)
+            return image_stream.file.rpartition('.')[-1], image_stream.stream
+        return None
+
+
+def option_parser():
+    from ebook_converter.utils.config import OptionParser
+    from ebook_converter.constants import __appname__, __version__
+    parser = OptionParser(usage=('''%prog [options] mybook.lrf
+
+
+Show/edit the metadata in an LRF file.\n\n'''),
+                          version=__appname__+' '+__version__,
+                          epilog='Created by Kovid Goyal')
+    parser.add_option("-t", "--title", action="store", type="string",
+                      dest="title", help="Set the book title")
+    parser.add_option('--title-sort', action='store', type='string',
+                      default=None, dest='title_reading',
+                      help='Set sort key for the title')
+    parser.add_option("-a", "--author", action="store", type="string",
+                      dest="author", help="Set the author")
+    parser.add_option('--author-sort', action='store', type='string',
+                      default=None, dest='author_reading',
+                      help='Set sort key for the author')
+    parser.add_option("-c", "--category", action="store", type="string",
+                      dest="category", help="The category this book belongs "
+                      "to. E.g.: History")
+    parser.add_option("--thumbnail", action="store", type="string",
+                      dest="thumbnail", help="Path to a graphic that will be "
+                      "set as this files' thumbnail")
+    parser.add_option("--comment", action="store", type="string",
+                      dest="comment", help="Path to a TXT file containing the "
+                      "comment to be stored in the LRF file.")
+    parser.add_option("--get-thumbnail", action="store_true",
+                      dest="get_thumbnail", default=False,
+                      help="Extract thumbnail from LRF file")
+    parser.add_option('--publisher', default=None, help='Set the publisher')
+    parser.add_option('--classification', default=None,
+                      help='Set the book classification')
+    parser.add_option('--creator', default=None, help='Set the book creator')
+    parser.add_option('--producer', default=None, help='Set the book '
+                      'producer')
+    parser.add_option('--get-cover', action='store_true', default=False,
+                      help='Extract cover from LRF file. Note that the LRF '
+                      'format has no defined cover, so we use some heuristics '
+                      'to guess the cover.')
+    parser.add_option('--bookid', action='store', type='string', default=None,
+                      dest='book_id', help='Set book ID')
+    # The SumPage element specifies the number of "View"s (visible pages for
+    # the BookSetting element conditions) of the content.
+    # Basically, the total pages per the page size, font size, etc. when the
+    # LRF is first created. Since this will change as the book is reflowed, it
+    # is probably not worth using.
+    # parser.add_option("-p", "--page", action="store", type="string", \
+    #                dest="page", help=_("Don't know what this is for"))
+
+    return parser
+
+
+def set_metadata(stream, mi):
+    lrf = LRFMetaFile(stream)
+    if mi.title:
+        lrf.title = mi.title
+    if mi.authors:
+        lrf.author = ', '.join(mi.authors)
+    if mi.tags:
+        lrf.category = mi.tags[0]
+    if getattr(mi, 'category', False):
+        lrf.category = mi.category
+    if mi.comments:
+        lrf.free_text = mi.comments
+    if mi.author_sort:
+        lrf.author_reading = mi.author_sort
+    if mi.publisher:
+        lrf.publisher = mi.publisher
+
+
+def main(args=sys.argv):
+    parser = option_parser()
+    options, args = parser.parse_args(args)
+    if len(args) != 2:
+        parser.print_help()
+        print()
+        print('No lrf file specified')
+        return 1
+    lrf = LRFMetaFile(open(args[1], "r+b"))
+
+    if options.title:
+        lrf.title = options.title
+    if options.title_reading is not None:
+        lrf.title_reading = options.title_reading
+    if options.author_reading is not None:
+        lrf.author_reading = options.author_reading
+    if options.author:
+        lrf.author = options.author
+    if options.publisher:
+        lrf.publisher = options.publisher
+    if options.classification:
+        lrf.classification = options.classification
+    if options.category:
+        lrf.category = options.category
+    if options.creator:
+        lrf.creator = options.creator
+    if options.producer:
+        lrf.producer = options.producer
+    if options.thumbnail:
+        path = os.path.expanduser(os.path.expandvars(options.thumbnail))
+        with open(path, "rb") as f:
+            lrf.thumbnail = f.read()
+    if options.book_id is not None:
+        lrf.book_id = options.book_id
+    if options.comment:
+        path = os.path.expanduser(os.path.expandvars(options.comment))
+        with open(path, 'rb') as f:
+            lrf.free_text = f.read().decode('utf-8', 'replace')
+    if options.get_thumbnail:
+        t = lrf.thumbnail
+        td = "None"
+        if t and len(t) > 0:
+            td = (os.path.basename(args[1]) + "_thumbnail." +
+                  lrf.thumbail_extension())
+            with open(td, "wb") as f:
+                f.write(t)
+
+    fields = LRFMetaFile.__dict__.items()
+    fields.sort()
+    for f in fields:
+        if "XML" in str(f):
+            print(str(f[1]) + ":",
+                  getattr(lrf, f[0]).encode('utf-8'))
+    if options.get_thumbnail:
+        print("Thumbnail:", td)
+    if options.get_cover:
+        try:
+            ext, data = lrf.get_cover()
+        except Exception:  # Fails on books created by LRFCreator 1.0
+            ext, data = None, None
+        if data:
+            cover = (os.path.splitext(os.path.basename(args[1]))[0] +
+                     "_cover." + ext)
+            with open(cover, 'wb') as f:
+                f.write(data)
+            print('Cover:', cover)
+        else:
+            print('Could not find cover in the LRF file')
+
+
+if __name__ == '__main__':
+    sys.exit(main())
diff --git a/ebook_converter/ebooks/lrf/objects.py b/ebook_converter/ebooks/lrf/objects.py
new file mode 100644
index 0000000..306300f
--- /dev/null
+++ b/ebook_converter/ebooks/lrf/objects.py
@@ -0,0 +1,1279 @@
+import array
+import collections
+import io
+import re
+import struct
+import zlib
+
+from ebook_converter.ebooks.lrf import LRFParseError, PRS500_PROFILE
+from ebook_converter import entity_to_unicode, prepare_string_for_xml
+from ebook_converter.ebooks.lrf.tags import Tag
+
+ruby_tags = {0xF575: ['rubyAlignAndAdjust', 'W'],
+             0xF576: ['rubyoverhang', 'W', {0: 'none', 1: 'auto'}],
+             0xF577: ['empdotsposition', 'W', {1: 'before', 2: 'after'}],
+             0xF578: ['', 'parse_empdots'],
+             0xF579: ['emplineposition', 'W', {1: 'before', 2: 'after'}],
+             0xF57A: ['emplinetype', 'W', {0: 'none', 0x10: 'solid',
+                                           0x20: 'dashed', 0x30: 'double',
+                                           0x40: 'dotted'}]}
+
+
+class LRFObject(object):
+
+    tag_map = {
+        0xF500: ['', ''],
+        0xF502: ['infoLink', 'D'],
+        0xF501: ['', ''],
+    }
+
+    @classmethod
+    def descramble_buffer(cls, buf, l, xorKey):
+        i = 0
+        a = array.array('B', buf)
+        while l > 0:
+            a[i] ^= xorKey
+            i += 1
+            l -= 1
+        return a.tostring()
+
+    @classmethod
+    def parse_empdots(self, tag, f):
+        self.refEmpDotsFont, self.empDotsFontName, self.empDotsCode = tag.contents
+
+    @staticmethod
+    def tag_to_val(h, obj, tag, stream):
+        val = None
+        if h[1] == 'D':
+            val = tag.dword
+        elif h[1] == 'W':
+            val = tag.word
+        elif h[1] == 'w':
+            val = tag.word
+            if val > 0x8000:
+                val -= 0x10000
+        elif h[1] == 'B':
+            val = tag.byte
+        elif h[1] == 'P':
+            val = tag.contents
+        elif h[1] != '':
+            val = getattr(obj, h[1])(tag, stream)
+        if len(h) > 2:
+            val = h[2](val) if callable(h[2]) else h[2][val]
+        return val
+
+    def __init__(self, document, stream, id, scramble_key, boundary):
+        self._scramble_key = scramble_key
+        self._document = document
+        self.id = id
+
+        while stream.tell() < boundary:
+            tag = Tag(stream)
+            self.handle_tag(tag, stream)
+
+    def parse_bg_image(self, tag, f):
+        self.bg_image_mode, self.bg_image_id = struct.unpack("<HI", tag.contents)
+
+    def handle_tag(self, tag, stream, tag_map=None):
+        if tag_map is None:
+            tag_map = self.__class__.tag_map
+        if tag.id in tag_map:
+            h = tag_map[tag.id]
+            val = LRFObject.tag_to_val(h, self, tag, stream)
+            if h[1] != '' and h[0] != '':
+                setattr(self, h[0], val)
+        else:
+            raise LRFParseError("Unknown tag in %s: %s" % (self.__class__.__name__, str(tag)))
+
+    def __iter__(self):
+        for i in range(0):
+            yield i
+
+    def __str__(self):
+        return self.__class__.__name__
+
+
+class LRFContentObject(LRFObject):
+
+    tag_map = {}
+
+    def __init__(self, byts, objects):
+        self.stream = byts if hasattr(byts, 'read') else io.BytesIO(byts)
+        length = self.stream_size()
+        self.objects = objects
+        self._contents = []
+        self.current = 0
+        self.in_container = True
+        self.parse_stream(length)
+
+    def parse_stream(self, length):
+        while self.in_container and self.stream.tell() < length:
+            tag = Tag(self.stream)
+            self.handle_tag(tag)
+
+    def stream_size(self):
+        pos = self.stream.tell()
+        self.stream.seek(0, 2)
+        size = self.stream.tell()
+        self.stream.seek(pos)
+        return size
+
+    def handle_tag(self, tag):
+        if tag.id in self.tag_map:
+            action = self.tag_map[tag.id]
+            if isinstance(action, str):
+                func, args = action, ()
+            else:
+                func, args = action[0], (action[1],)
+            getattr(self, func)(tag, *args)
+        else:
+            raise LRFParseError("Unknown tag in %s: %s" % (self.__class__.__name__, str(tag)))
+
+    def __iter__(self):
+        for i in self._contents:
+            yield i
+
+
+class LRFStream(LRFObject):
+    tag_map = {
+        0xF504: ['', 'read_stream_size'],
+        0xF554: ['stream_flags', 'W'],
+        0xF505: ['', 'read_stream'],
+        0xF506: ['', 'end_stream'],
+      }
+    tag_map.update(LRFObject.tag_map)
+
+    def __init__(self, document, stream, id, scramble_key, boundary):
+        self.stream = ''
+        self.stream_size = 0
+        self.stream_read = False
+        LRFObject.__init__(self, document, stream, id, scramble_key, boundary)
+
+    def read_stream_size(self, tag, stream):
+        self.stream_size = tag.dword
+
+    def end_stream(self, tag, stream):
+        self.stream_read = True
+
+    def read_stream(self, tag, stream):
+        if self.stream_read:
+            raise LRFParseError('There can be only one stream per object')
+        if not hasattr(self, 'stream_flags'):
+            raise LRFParseError('Stream flags not initialized')
+        self.stream = stream.read(self.stream_size)
+        if self.stream_flags & 0x200 !=0:
+            l = len(self.stream)
+            key = self._scramble_key&0xFF
+            if key != 0 and key <= 0xF0:
+                key = l % key + 0xF
+            else:
+                key = 0
+            if l > 0x400 and (isinstance(self, ImageStream) or isinstance(self, Font) or isinstance(self, SoundStream)):
+                l = 0x400
+            self.stream = self.descramble_buffer(self.stream, l, key)
+        if self.stream_flags & 0x100 !=0:
+            decomp_size = struct.unpack("<I", self.stream[:4])[0]
+            self.stream = zlib.decompress(self.stream[4:])
+            if len(self.stream) != decomp_size:
+                raise LRFParseError("Stream decompressed size is wrong!")
+        if stream.read(2) != b'\x06\xF5':
+            print("Warning: corrupted end-of-stream tag at %08X; skipping it"%(stream.tell()-2))
+        self.end_stream(None, None)
+
+
+class PageTree(LRFObject):
+    tag_map = {
+        0xF55C: ['_contents', 'P'],
+      }
+    tag_map.update(LRFObject.tag_map)
+
+    def __iter__(self):
+        for id in getattr(self, '_contents', []):
+            yield self._document.objects[id]
+
+
+class StyleObject(object):
+
+    def _tags_to_xml(self):
+        s = ''
+        for h in self.tag_map.values():
+            attr = h[0]
+            if hasattr(self, attr):
+                s += '%s="%s" '%(attr, getattr(self, attr))
+        return s
+
+    def __str__(self):
+        s = '<%s objid="%s" stylelabel="%s" '%(self.__class__.__name__.replace('Attr', 'Style'), self.id, self.id)
+        s += self._tags_to_xml()
+        s += '/>\n'
+        return s
+
+    def as_dict(self):
+        d = {}
+        for h in self.tag_map.values():
+            attr = h[0]
+            if hasattr(self, attr):
+                d[attr] = getattr(self, attr)
+        return d
+
+
+class PageAttr(StyleObject, LRFObject):
+    tag_map = {
+        0xF507: ['oddheaderid', 'D'],
+        0xF508: ['evenheaderid', 'D'],
+        0xF509: ['oddfooterid', 'D'],
+        0xF50A: ['evenfooterid', 'D'],
+        0xF521: ['topmargin', 'W'],
+        0xF522: ['headheight', 'W'],
+        0xF523: ['headsep', 'W'],
+        0xF524: ['oddsidemargin', 'W'],
+        0xF52C: ['evensidemargin', 'W'],
+        0xF525: ['textheight', 'W'],
+        0xF526: ['textwidth', 'W'],
+        0xF527: ['footspace', 'W'],
+        0xF528: ['footheight', 'W'],
+        0xF535: ['layout', 'W', {0x41: 'TbRl', 0x34: 'LrTb'}],
+        0xF52B: ['pageposition', 'W', {0: 'any', 1:'upper', 2: 'lower'}],
+        0xF52A: ['setemptyview', 'W', {1: 'show', 0: 'empty'}],
+        0xF5DA: ['setwaitprop', 'W', {1: 'replay', 2: 'noreplay'}],
+        0xF529: ['', "parse_bg_image"],
+      }
+    tag_map.update(LRFObject.tag_map)
+
+    @classmethod
+    def to_css(cls, obj, inline=False):
+        return ''
+
+
+class Color(object):
+
+    def __init__(self, val):
+        self.a, self.r, self.g, self.b = val & 0xFF, (val>>8)&0xFF, (val>>16)&0xFF, (val>>24)&0xFF
+
+    def __str__(self):
+        return '0x%02x%02x%02x%02x'%(self.a, self.r, self.g, self.b)
+
+    def __len__(self):
+        return 4
+
+    def __getitem__(self, i):  # Qt compatible ordering and values
+        return (self.r, self.g, self.b, 0xff-self.a)[i]  # In Qt 0xff is opaque while in LRS 0x00 is opaque
+
+    def to_html(self):
+        return 'rgb(%d, %d, %d)'%(self.r, self.g, self.b)
+
+
+class EmptyPageElement(object):
+
+    def __iter__(self):
+        for i in range(0):
+            yield i
+
+    def __str__(self):
+        return str(self)
+
+
+class PageDiv(EmptyPageElement):
+
+    def __init__(self, pain, spacesize, linewidth, linecolor):
+        self.pain, self.spacesize, self.linewidth = pain, spacesize, linewidth
+        self.linecolor = Color(linecolor)
+
+    def __str__(self):
+        return '\n<PageDiv pain="%s" spacesize="%s" linewidth="%s" linecolor="%s" />\n'%\
+                (self.pain, self.spacesize, self.linewidth, self.color)
+
+
+class RuledLine(EmptyPageElement):
+
+    linetype_map = {0x00: 'none', 0x10: 'solid', 0x20: 'dashed', 0x30: 'double', 0x40: 'dotted', 0x13: 'unknown13'}
+
+    def __init__(self, linelength, linetype, linewidth, linecolor):
+        self.linelength, self.linewidth = linelength, linewidth
+        self.linetype = self.linetype_map[linetype]
+        self.linecolor = Color(linecolor)
+        self.id = -1
+
+    def __str__(self):
+        return '\n<RuledLine linelength="%s" linetype="%s" linewidth="%s" linecolor="%s" />\n'%\
+                (self.linelength, self.linetype, self.linewidth, self.linecolor)
+
+
+class Wait(EmptyPageElement):
+
+    def __init__(self, time):
+        self.time = time
+
+    def __str__(self):
+        return '\n<Wait time="%d" />\n'%(self.time)
+
+
+class Locate(EmptyPageElement):
+
+    pos_map = {1:'bottomleft', 2:'bottomright', 3:'topright', 4:'topleft', 5:'base'}
+
+    def __init__(self, pos):
+        self.pos = self.pos_map[pos]
+
+    def __str__(self):
+        return '\n<Locate pos="%s" />\n'%(self.pos)
+
+
+class BlockSpace(EmptyPageElement):
+
+    def __init__(self, xspace, yspace):
+        self.xspace, self.yspace = xspace, yspace
+
+    def __str__(self):
+        return '\n<BlockSpace xspace="%d" yspace="%d" />\n'%\
+                (self.xspace, self.yspace)
+
+
+class Page(LRFStream):
+    tag_map = {
+        0xF503: ['style_id', 'D'],
+        0xF50B: ['obj_list', 'P'],
+        0xF571: ['', ''],
+        0xF57C: ['parent_page_tree', 'D'],
+      }
+    tag_map.update(PageAttr.tag_map)
+    tag_map.update(LRFStream.tag_map)
+    style = property(fget=lambda self : self._document.objects[self.style_id])
+    evenheader = property(fget=lambda self : self._document.objects[self.style.evenheaderid])
+    evenfooter = property(fget=lambda self : self._document.objects[self.style.evenfooterid])
+    oddheader  = property(fget=lambda self : self._document.objects[self.style.oddheaderid])
+    oddfooter  = property(fget=lambda self : self._document.objects[self.style.oddfooterid])
+
+    class Content(LRFContentObject):
+        tag_map = {
+           0xF503: 'link',
+           0xF54E: 'page_div',
+           0xF547: 'x_space',
+           0xF546: 'y_space',
+           0xF548: 'do_pos',
+           0xF573: 'ruled_line',
+           0xF5D4: 'wait',
+           0xF5D6: 'sound_stop',
+          }
+
+        def __init__(self, byts, objects):
+            self.in_blockspace = False
+            LRFContentObject.__init__(self, byts, objects)
+
+        def link(self, tag):
+            self.close_blockspace()
+            self._contents.append(self.objects[tag.dword])
+
+        def page_div(self, tag):
+            self.close_blockspace()
+            pars = struct.unpack("<HIHI", tag.contents)
+            self._contents.append(PageDiv(*pars))
+
+        def x_space(self, tag):
+            self.xspace = tag.word
+            self.in_blockspace = True
+
+        def y_space(self, tag):
+            self.yspace = tag.word
+            self.in_blockspace = True
+
+        def do_pos(self, tag):
+            self.pos = tag.wordself.pos_map[tag.word]
+            self.in_blockspace = True
+
+        def ruled_line(self, tag):
+            self.close_blockspace()
+            pars = struct.unpack("<HHHI", tag.contents)
+            self._contents.append(RuledLine(*pars))
+
+        def wait(self, tag):
+            self.close_blockspace()
+            self._contents.append(Wait(tag.word))
+
+        def sound_stop(self, tag):
+            self.close_blockspace()
+
+        def close_blockspace(self):
+            if self.in_blockspace:
+                if hasattr(self, 'pos'):
+                    self._contents.append(Locate(self.pos))
+                    delattr(self, 'pos')
+                else:
+                    xspace = self.xspace if hasattr(self, 'xspace') else 0
+                    yspace = self.yspace if hasattr(self, 'yspace') else 0
+                    self._contents.append(BlockSpace(xspace, yspace))
+                    if hasattr(self, 'xspace'):
+                        delattr(self, 'xspace')
+                    if hasattr(self, 'yspace'):
+                        delattr(self, 'yspace')
+
+    def header(self, odd):
+        id = self._document.objects[self.style_id].oddheaderid if odd else self._document.objects[self.style_id].evenheaderid
+        return self._document.objects[id]
+
+    def footer(self, odd):
+        id = self._document.objects[self.style_id].oddfooterid if odd else self._document.objects[self.style_id].evenfooterid
+        return self._document.objects[id]
+
+    def initialize(self):
+        self.content = Page.Content(self.stream, self._document.objects)
+
+    def __iter__(self):
+        for i in self.content:
+            yield i
+
+    def __str__(self):
+        s = '\n<Page pagestyle="%d" objid="%d">\n'%(self.style_id, self.id)
+        for i in self:
+            s += str(i)
+        s += '\n</Page>\n'
+        return s
+
+    def to_html(self):
+        s = ''
+        for i in self:
+            s += i.to_html()
+        return s
+
+
+class BlockAttr(StyleObject, LRFObject):
+    tag_map = {
+        0xF531: ['blockwidth', 'W'],
+        0xF532: ['blockheight', 'W'],
+        0xF533: ['blockrule', 'W', {
+            0x14: "horz-fixed",
+            0x12: "horz-adjustable",
+            0x41: "vert-fixed",
+            0x21: "vert-adjustable",
+            0x44: "block-fixed",
+            0x22: "block-adjustable"}],
+        0xF534: ['bgcolor', 'D', Color],
+        0xF535: ['layout', 'W', {0x41: 'TbRl', 0x34: 'LrTb'}],
+        0xF536: ['framewidth', 'W'],
+        0xF537: ['framecolor', 'D', Color],
+        0xF52E: ['framemode', 'W', {0: 'none', 2: 'curve', 1:'square'}],
+        0xF538: ['topskip', 'W'],
+        0xF539: ['sidemargin', 'W'],
+        0xF53A: ['footskip', 'W'],
+        0xF529: ['', 'parse_bg_image'],
+      }
+    tag_map.update(LRFObject.tag_map)
+
+    @classmethod
+    def to_css(cls, obj, inline=False):
+        ans = ''
+
+        def item(line):
+            ans = '' if inline else '\t'
+            ans += line
+            ans += ' ' if inline else '\n'
+            return ans
+
+        if hasattr(obj, 'sidemargin'):
+            margin = str(obj.sidemargin) + 'px'
+            ans += item('margin-left: %(m)s; margin-right: %(m)s;'%dict(m=margin))
+        if hasattr(obj, 'topskip'):
+            ans += item('margin-top: %dpx;'%obj.topskip)
+        if hasattr(obj, 'footskip'):
+            ans += item('margin-bottom: %dpx;'%obj.footskip)
+        if hasattr(obj, 'framewidth'):
+            ans += item('border: solid %dpx'%obj.framewidth)
+        if hasattr(obj, 'framecolor') and obj.framecolor.a < 255:
+            ans += item('border-color: %s;'%obj.framecolor.to_html())
+        if hasattr(obj, 'bgcolor') and obj.bgcolor.a < 255:
+            ans += item('background-color: %s;'%obj.bgcolor.to_html())
+
+        return ans
+
+
+class TextCSS(object):
+
+    @classmethod
+    def to_css(cls, obj, inline=False):
+        ans = ''
+
+        def item(line):
+            ans = '' if inline else '\t'
+            ans += line
+            ans += ' ' if inline else '\n'
+            return ans
+
+        fs = getattr(obj, 'fontsize', None)
+        if fs is not None:
+            ans += item('font-size: %fpt;'%(int(fs)/10))
+        fw = getattr(obj, 'fontweight', None)
+        if fw is not None:
+            ans += item('font-weight: %s;'%('bold' if int(fw) >= 700 else 'normal'))
+        fn = getattr(obj, 'fontfacename', None)
+        if fn is not None:
+            fn = cls.FONT_MAP[fn]
+            ans += item('font-family: %s;'%fn)
+        fg = getattr(obj, 'textcolor', None)
+        if fg is not None:
+            fg = fg.to_html()
+            ans += item('color: %s;'%fg)
+        bg = getattr(obj, 'textbgcolor', None)
+        if bg is not None:
+            bg = bg.to_html()
+            ans += item('background-color: %s;'%bg)
+        al = getattr(obj, 'align', None)
+        if al is not None:
+            al = dict(head='left', center='center', foot='right')
+            ans += item('text-align: %s;'%al)
+        lh = getattr(obj, 'linespace', None)
+        if lh is not None:
+            ans += item('text-align: %fpt;'%(int(lh)/10))
+        pi = getattr(obj, 'parindent', None)
+        if pi is not None:
+            ans += item('text-indent: %fpt;'%(int(pi)/10))
+
+        return ans
+
+
+class TextAttr(StyleObject, LRFObject, TextCSS):
+
+    FONT_MAP = collections.defaultdict(lambda : 'serif')
+    for key, value in PRS500_PROFILE.default_fonts.items():
+        FONT_MAP[value] = key
+
+    tag_map = {
+        0xF511: ['fontsize', 'w'],
+        0xF512: ['fontwidth', 'w'],
+        0xF513: ['fontescapement', 'w'],
+        0xF514: ['fontorientation', 'w'],
+        0xF515: ['fontweight', 'W'],
+        0xF516: ['fontfacename', 'P'],
+        0xF517: ['textcolor', 'D', Color],
+        0xF518: ['textbgcolor', 'D', Color],
+        0xF519: ['wordspace', 'w'],
+        0xF51A: ['letterspace', 'w'],
+        0xF51B: ['baselineskip', 'w'],
+        0xF51C: ['linespace', 'w'],
+        0xF51D: ['parindent', 'w'],
+        0xF51E: ['parskip', 'w'],
+        0xF53C: ['align', 'W', {1: 'head', 4: 'center', 8: 'foot'}],
+        0xF53D: ['column', 'W'],
+        0xF53E: ['columnsep', 'W'],
+        0xF5DD: ['charspace', 'w'],
+        0xF5F1: ['textlinewidth', 'W'],
+        0xF5F2: ['linecolor', 'D', Color],
+      }
+    tag_map.update(ruby_tags)
+    tag_map.update(LRFObject.tag_map)
+
+
+class Block(LRFStream, TextCSS):
+    tag_map = {
+        0xF503: ['style_id', 'D'],
+      }
+    tag_map.update(BlockAttr.tag_map)
+    tag_map.update(TextAttr.tag_map)
+    tag_map.update(LRFStream.tag_map)
+    extra_attrs = [i[0] for i in BlockAttr.tag_map.values()]
+    extra_attrs.extend([i[0] for i in TextAttr.tag_map.values()])
+
+    style = property(fget=lambda self : self._document.objects[self.style_id])
+    textstyle = property(fget=lambda self : self._document.objects[self.textstyle_id])
+
+    def initialize(self):
+        self.attrs = {}
+        stream = io.BytesIO(self.stream)
+        tag = Tag(stream)
+        if tag.id != 0xF503:
+            raise LRFParseError("Bad block content")
+        obj = self._document.objects[tag.dword]
+        if isinstance(obj, SimpleText):
+            self.name = 'SimpleTextBlock'
+            self.textstyle_id = obj.style_id
+        elif isinstance(obj, Text):
+            self.name = 'TextBlock'
+            self.textstyle_id = obj.style_id
+        elif isinstance(obj, Image):
+            self.name = 'ImageBlock'
+            for attr in ('x0', 'x1', 'y0', 'y1', 'xsize', 'ysize', 'refstream'):
+                self.attrs[attr] = getattr(obj, attr)
+            self.refstream = self._document.objects[self.attrs['refstream']]
+        elif isinstance(obj, Button):
+            self.name = 'ButtonBlock'
+        else:
+            raise LRFParseError("Unexpected block type: "+obj.__class__.__name__)
+
+        self.content = obj
+
+        for attr in self.extra_attrs:
+            if hasattr(self, attr):
+                self.attrs[attr] = getattr(self, attr)
+
+    def __str__(self):
+        s = '\n<%s objid="%d" blockstyle="%d" '%(self.name, self.id, self.style_id)
+        if hasattr(self, 'textstyle_id'):
+            s += 'textstyle="%d" '%(self.textstyle_id,)
+        for attr in self.attrs:
+            s += '%s="%s" '%(attr, self.attrs[attr])
+        if self.name != 'ImageBlock':
+            s = s.rstrip()+'>\n'
+            s += str(self.content)
+            s += '</%s>\n'%(self.name,)
+            return s
+        return s.rstrip() + ' />\n'
+
+    def to_html(self):
+        if self.name == 'TextBlock':
+            return '<div class="block%s text%s">%s</div>'%(self.style_id, self.textstyle_id, self.content.to_html())
+        return ''
+
+
+class MiniPage(LRFStream):
+    tag_map = {
+        0xF541: ['minipagewidth', 'W'],
+        0xF542: ['minipageheight', 'W'],
+      }
+    tag_map.update(LRFStream.tag_map)
+    tag_map.update(BlockAttr.tag_map)
+
+
+class Text(LRFStream):
+    tag_map = {
+        0xF503: ['style_id', 'D'],
+      }
+    tag_map.update(TextAttr.tag_map)
+    tag_map.update(LRFStream.tag_map)
+
+    style = property(fget=lambda self : self._document.objects[self.style_id])
+
+    text_map = {0x22: '"', 0x26: '&amp;', 0x27: '\'', 0x3c: '&lt;', 0x3e: '&gt;'}
+    entity_pattern = re.compile(r'&amp;(\S+?);')
+
+    text_tags = {
+           0xF581: ['simple_container', 'Italic'],
+           0xF582: 'end_container',
+           0xF5B1: ['simple_container', 'Yoko'],
+           0xF5B2: 'end_container',
+           0xF5B3: ['simple_container', 'Tate'],
+           0xF5B4: 'end_container',
+           0xF5B5: ['simple_container', 'Nekase'],
+           0xF5B6: 'end_container',
+           0xF5A1: 'start_para',
+           0xF5A2: 'end_para',
+           0xF5A7: 'char_button',
+           0xF5A8: 'end_container',
+           0xF5A9: ['simple_container', 'Rubi'],
+           0xF5AA: 'end_container',
+           0xF5AB: ['simple_container', 'Oyamoji'],
+           0xF5AC: 'end_container',
+           0xF5AD: ['simple_container', 'Rubimoji'],
+           0xF5AE: 'end_container',
+           0xF5B7: ['simple_container', 'Sup'],
+           0xF5B8: 'end_container',
+           0xF5B9: ['simple_container', 'Sub'],
+           0xF5BA: 'end_container',
+           0xF5BB: ['simple_container', 'NoBR'],
+           0xF5BC: 'end_container',
+           0xF5BD: ['simple_container', 'EmpDots'],
+           0xF5BE: 'end_container',
+           0xF5C1: 'empline',
+           0xF5C2: 'end_container',
+           0xF5C3: 'draw_char',
+           0xF5C4: 'end_container',
+           0xF5C6: 'box',
+           0xF5C7: 'end_container',
+           0xF5CA: 'space',
+           0xF5D1: 'plot',
+           0xF5D2: 'cr',
+        }
+
+    class TextTag(object):
+
+        def __init__(self, name, attrs={}, self_closing=False):
+            self.name = name
+            self.attrs = attrs
+            self.self_closing = self_closing
+
+        def __str__(self):
+            s = '<%s '%(self.name,)
+            for name, val in self.attrs.items():
+                s += '%s="%s" '%(name, val)
+            return s.rstrip() + (' />' if self.self_closing else '>')
+
+        def to_html(self):
+            s = ''
+            return s
+
+        def close_html(self):
+            return ''
+
+    class Span(TextTag):
+        pass
+
+    linetype_map = {0: 'none', 0x10: 'solid', 0x20: 'dashed', 0x30: 'double', 0x40: 'dotted'}
+    adjustment_map = {1: 'top', 2: 'center', 3: 'baseline', 4: 'bottom'}
+    lineposition_map = {1:'before', 2:'after'}
+
+    def add_text(self, text):
+        s = str(text, "utf-16-le")
+        if s:
+            s = s.translate(self.text_map)
+            self.content.append(self.entity_pattern.sub(entity_to_unicode, s))
+
+    def end_container(self, tag, stream):
+        self.content.append(None)
+
+    def start_para(self, tag, stream):
+        self.content.append(self.__class__.TextTag('P'))
+
+    def close_containers(self, start=0):
+        if len(self.content) == 0:
+            return
+        open_containers = 0
+        if len(self.content) > 0 and isinstance(self.content[-1], self.__class__.Span):
+            self.content.pop()
+        while start < len(self.content):
+            c = self.content[start]
+            if c is None:
+                open_containers -= 1
+            elif isinstance(c, self.__class__.TextTag) and not c.self_closing:
+                open_containers += 1
+            start += 1
+        self.content.extend(None for i in range(open_containers))
+
+    def end_para(self, tag, stream):
+        i = len(self.content)-1
+        while i > -1:
+            if isinstance(self.content[i], Text.TextTag) and self.content[i].name == 'P':
+                break
+            i -= 1
+        self.close_containers(start=i)
+
+    def cr(self, tag, stream):
+        self.content.append(self.__class__.TextTag('CR', self_closing=True))
+
+    def char_button(self, tag, stream):
+        self.content.append(self.__class__.TextTag(
+                                'CharButton', attrs={'refobj':tag.dword}))
+
+    def simple_container(self, tag, name):
+        self.content.append(self.__class__.TextTag(name))
+
+    def empline(self, tag, stream):
+        def invalid(op):
+            stream.seek(op)
+            # self.simple_container(None, 'EmpLine')
+
+        oldpos = stream.tell()
+        try:
+            t = Tag(stream)
+            if t.id not in (0xF579, 0xF57A):
+                raise LRFParseError
+        except LRFParseError:
+            invalid(oldpos)
+            return
+        h = TextAttr.tag_map[t.id]
+        attrs = {}
+        attrs[h[0]] = TextAttr.tag_to_val(h, None, t, None)
+        oldpos = stream.tell()
+        try:
+            t = Tag(stream)
+            if t.id not in (0xF579, 0xF57A):
+                raise LRFParseError
+            h = TextAttr.tag_map[t.id]
+            attrs[h[0]] = TextAttr.tag_to_val(h, None, t, None)
+        except LRFParseError:
+            stream.seek(oldpos)
+
+        if attrs:
+            self.content.append(self.__class__.TextTag(
+                            'EmpLine', attrs=attrs))
+
+    def space(self, tag, stream):
+        self.content.append(self.__class__.TextTag('Space',
+                                        attrs={'xsize':tag.sword},
+                                        self_closing=True))
+
+    def plot(self, tag, stream):
+        xsize, ysize, refobj, adjustment = struct.unpack("<HHII", tag.contents)
+        plot = self.__class__.TextTag('Plot',
+            {'xsize': xsize, 'ysize': ysize, 'refobj':refobj,
+             'adjustment':self.adjustment_map[adjustment]}, self_closing=True)
+        plot.refobj = self._document.objects[refobj]
+        self.content.append(plot)
+
+    def draw_char(self, tag, stream):
+        self.content.append(self.__class__.TextTag('DrawChar', {'line':tag.word}))
+
+    def box(self, tag, stream):
+        self.content.append(self.__class__.TextTag('Box',
+                                     {'linetype':self.linetype_map[tag.word]}))
+
+    def initialize(self):
+        self.content = collections.deque()
+        stream = io.BytesIO(self.stream)
+        length = len(self.stream)
+        style = self.style.as_dict()
+        current_style = style.copy()
+        text_tags = set(list(TextAttr.tag_map.keys()) +
+                        list(Text.text_tags.keys()) +
+                        list(ruby_tags.keys()))
+        text_tags -= {0xf500+i for i in range(10)}
+        text_tags.add(0xf5cc)
+
+        while stream.tell() < length:
+
+            # Is there some text before a tag?
+            def find_first_tag(start):
+                pos = self.stream.find(b'\xf5', start)
+                if pos == -1:
+                    return -1
+                try:
+                    stream.seek(pos-1)
+                    _t = Tag(stream)
+                    if _t.id in text_tags:
+                        return pos-1
+                    return find_first_tag(pos+1)
+
+                except:
+                    return find_first_tag(pos+1)
+
+            start_pos = stream.tell()
+            tag_pos = find_first_tag(start_pos)
+            if tag_pos >= start_pos:
+                if tag_pos > start_pos:
+                    self.add_text(self.stream[start_pos:tag_pos])
+                stream.seek(tag_pos)
+            else:  # No tags in this stream
+                self.add_text(self.stream)
+                stream.seek(0, 2)
+                break
+
+            tag = Tag(stream)
+
+            if tag.id == 0xF5CC:
+                self.add_text(stream.read(tag.word))
+            elif tag.id in self.__class__.text_tags:  # A Text tag
+                action = self.__class__.text_tags[tag.id]
+                if isinstance(action, str):
+                    getattr(self, action)(tag, stream)
+                else:
+                    getattr(self, action[0])(tag, action[1])
+            elif tag.id in TextAttr.tag_map:  # A Span attribute
+                action = TextAttr.tag_map[tag.id]
+                if len(self.content) == 0:
+                    current_style = style.copy()
+                name, val = action[0], LRFObject.tag_to_val(action, self, tag, None)
+                if name and (name not in current_style or current_style[name] != val):
+                    # No existing Span
+                    if len(self.content) > 0 and isinstance(self.content[-1], self.__class__.Span):
+                        self.content[-1].attrs[name] = val
+                    else:
+                        self.content.append(self.__class__.Span('Span', {name:val}))
+                    current_style[name] = val
+        if len(self.content) > 0:
+            self.close_containers()
+        self.stream = None
+
+    def __str__(self):
+        s = ''
+        open_containers = collections.deque()
+        for c in self.content:
+            if isinstance(c, str):
+                s += prepare_string_for_xml(c).replace('\0', '')
+            elif c is None:
+                if open_containers:
+                    p = open_containers.pop()
+                    s += '</%s>'%(p.name,)
+            else:
+                s += str(c)
+                if not c.self_closing:
+                    open_containers.append(c)
+
+        if len(open_containers) > 0:
+            if len(open_containers) == 1:
+                s += '</%s>'%(open_containers[0].name,)
+            else:
+                raise LRFParseError('Malformed text stream %s'%([i.name for i in open_containers if isinstance(i, Text.TextTag)],))
+        return s
+
+    def to_html(self):
+        s = ''
+        open_containers = collections.deque()
+        in_p = False
+        for c in self.content:
+            if isinstance(c, str):
+                s += c
+            elif c is None:
+                p = open_containers.pop()
+                s += p.close_html()
+            else:
+                if c.name == 'P':
+                    in_p = True
+                elif c.name == 'CR':
+                    s += '<br />' if in_p else '<p>'
+                else:
+                    s += c.to_html()
+                    if not c.self_closing:
+                        open_containers.append(c)
+
+        if len(open_containers) > 0:
+            raise LRFParseError('Malformed text stream %s'%([i.name for i in open_containers if isinstance(i, Text.TextTag)],))
+        return s
+
+
+class Image(LRFObject):
+    tag_map = {
+        0xF54A: ['', 'parse_image_rect'],
+        0xF54B: ['', 'parse_image_size'],
+        0xF54C: ['refstream', 'D'],
+        0xF555: ['comment', 'P'],
+      }
+
+    def parse_image_rect(self, tag, f):
+        self.x0, self.y0, self.x1, self.y1 = struct.unpack("<HHHH", tag.contents)
+
+    def parse_image_size(self, tag, f):
+        self.xsize, self.ysize = struct.unpack("<HH", tag.contents)
+
+    encoding = property(fget=lambda self : self._document.objects[self.refstream].encoding)
+    data = property(fget=lambda self : self._document.objects[self.refstream].stream)
+
+    def __str__(self):
+        return '<Image objid="%s" x0="%d" y0="%d" x1="%d" y1="%d" xsize="%d" ysize="%d" refstream="%d" />\n'%\
+        (self.id, self.x0, self.y0, self.x1, self.y1, self.xsize, self.ysize, self.refstream)
+
+
+class PutObj(EmptyPageElement):
+
+    def __init__(self, objects, x1, y1, refobj):
+        self.x1, self.y1, self.refobj = x1, y1, refobj
+        self.object = objects[refobj]
+
+    def __str__(self):
+        return '<PutObj x1="%d" y1="%d" refobj="%d" />'%(self.x1, self.y1, self.refobj)
+
+
+class Canvas(LRFStream):
+    tag_map = {
+        0xF551: ['canvaswidth', 'W'],
+        0xF552: ['canvasheight', 'W'],
+        0xF5DA: ['', 'parse_waits'],
+        0xF533: ['blockrule', 'W', {0x44: "block-fixed", 0x22: "block-adjustable"}],
+        0xF534: ['bgcolor', 'D', Color],
+        0xF535: ['layout', 'W', {0x41: 'TbRl', 0x34: 'LrTb'}],
+        0xF536: ['framewidth', 'W'],
+        0xF537: ['framecolor', 'D', Color],
+        0xF52E: ['framemode', 'W', {0: 'none', 2: 'curve', 1:'square'}],
+      }
+    tag_map.update(LRFStream.tag_map)
+    extra_attrs = ['canvaswidth', 'canvasheight', 'blockrule', 'layout',
+                   'framewidth', 'framecolor', 'framemode']
+
+    def parse_waits(self, tag, f):
+        val = tag.word
+        self.setwaitprop = val&0xF
+        self.setwaitsync = val&0xF0
+
+    def initialize(self):
+        self.attrs = {}
+        for attr in self.extra_attrs:
+            if hasattr(self, attr):
+                self.attrs[attr] = getattr(self, attr)
+        self._contents = []
+        stream = io.BytesIO(self.stream)
+        while stream.tell() < len(self.stream):
+            tag = Tag(stream)
+            try:
+                self._contents.append(
+                    PutObj(self._document.objects,
+                        *struct.unpack("<HHI", tag.contents)))
+            except struct.error:
+                print('Canvas object has errors, skipping.')
+
+    def __str__(self):
+        s = '\n<%s objid="%s" '%(self.__class__.__name__, self.id,)
+        for attr in self.attrs:
+            s += '%s="%s" '%(attr, self.attrs[attr])
+        s = s.rstrip() + '>\n'
+        for po in self:
+            s += str(po) + '\n'
+        s += '</%s>\n'%(self.__class__.__name__,)
+        return s
+
+    def __iter__(self):
+        for i in self._contents:
+            yield i
+
+
+class Header(Canvas):
+    pass
+
+
+class Footer(Canvas):
+    pass
+
+
+class ESound(LRFObject):
+    pass
+
+
+class ImageStream(LRFStream):
+    tag_map = {
+        0xF555: ['comment', 'P'],
+      }
+    imgext = {0x11: 'jpeg', 0x12: 'png', 0x13: 'bmp', 0x14: 'gif'}
+
+    tag_map.update(LRFStream.tag_map)
+
+    encoding = property(fget=lambda self : self.imgext[self.stream_flags & 0xFF].upper())
+
+    def end_stream(self, *args):
+        LRFStream.end_stream(self, *args)
+        self.file = str(self.id) + '.' + self.encoding.lower()
+        if self._document is not None:
+            self._document.image_map[self.id] = self
+
+    def __str__(self):
+        return '<ImageStream objid="%s" encoding="%s" file="%s" />\n'%\
+            (self.id, self.encoding, self.file)
+
+
+class Import(LRFStream):
+    pass
+
+
+class Button(LRFObject):
+    tag_map = {
+        0xF503: ['', 'do_ref_image'],
+        0xF561: ['button_flags','W'],  # <Button/>
+        0xF562: ['','do_base_button'],  # <BaseButton>
+        0xF563: ['',''],  # </BaseButton>
+        0xF564: ['','do_focus_in_button'],  # <FocusinButton>
+        0xF565: ['',''],  # </FocusinButton>
+        0xF566: ['','do_push_button'],  # <PushButton>
+        0xF567: ['',''],  # </PushButton>
+        0xF568: ['','do_up_button'],  # <UpButton>
+        0xF569: ['',''],  # </UpButton>
+        0xF56A: ['','do_start_actions'],  # start actions
+        0xF56B: ['',''],  # end actions
+        0xF56C: ['','parse_jump_to'],  # JumpTo
+        0xF56D: ['','parse_send_message'],  # <SendMessage
+        0xF56E: ['','parse_close_window'],  # <CloseWindow/>
+        0xF5D6: ['','parse_sound_stop'],  # <SoundStop/>
+        0xF5F9: ['','parse_run'],  # Run
+      }
+    tag_map.update(LRFObject.tag_map)
+
+    def __init__(self, document, stream, id, scramble_key, boundary):
+        self.xml = ''
+        self.refimage = {}
+        self.actions = {}
+        self.to_dump = True
+        LRFObject.__init__(self, document, stream, id, scramble_key, boundary)
+
+    def do_ref_image(self, tag, f):
+        self.refimage[self.button_type] = tag.dword
+
+    def do_base_button(self, tag, f):
+        self.button_type = 0
+        self.actions[self.button_type] = []
+
+    def do_focus_in_button(self, tag, f):
+        self.button_type = 1
+
+    def do_push_button(self, tag, f):
+        self.button_type = 2
+
+    def do_up_button(self, tag, f):
+        self.button_type = 3
+
+    def do_start_actions(self, tag, f):
+        self.actions[self.button_type] = []
+
+    def parse_jump_to(self, tag, f):
+        self.actions[self.button_type].append((1, struct.unpack("<II", tag.contents)))
+
+    def parse_send_message(self, tag, f):
+        params = (tag.word, Tag.string_parser(f), Tag.string_parser(f))
+        self.actions[self.button_type].append((2, params))
+
+    def parse_close_window(self, tag, f):
+        self.actions[self.button_type].append((3,))
+
+    def parse_sound_stop(self, tag, f):
+        self.actions[self.button_type].append((4,))
+
+    def parse_run(self, tag, f):
+        self.actions[self.button_type].append((5, struct.unpack("<HI", tag.contents)))
+
+    def jump_action(self, button_type):
+        for i in self.actions[button_type]:
+            if i[0] == 1:
+                return i[1:][0]
+        return (None, None)
+
+    def __str__(self):
+        s = '<Button objid="%s">\n'%(self.id,)
+        if self.button_flags & 0x10 != 0:
+            s += '<PushButton '
+            if 2 in self.refimage:
+                s += 'refimage="%s" '%(self.refimage[2],)
+            s = s.rstrip() + '>\n'
+            s += '<JumpTo refpage="%s" refobj="%s" />\n'% self.jump_action(2)
+            s += '</PushButton>\n'
+        else:
+            raise LRFParseError('Unsupported button type')
+        s += '</Button>\n'
+        return s
+
+    refpage = property(fget=lambda self : self.jump_action(2)[0])
+    refobj = property(fget=lambda self : self.jump_action(2)[1])
+
+
+class Window(LRFObject):
+    pass
+
+
+class PopUpWin(LRFObject):
+    pass
+
+
+class Sound(LRFObject):
+    pass
+
+
+class SoundStream(LRFObject):
+    pass
+
+
+class Font(LRFStream):
+    tag_map = {
+        0xF559: ['fontfilename', 'P'],
+        0xF55D: ['fontfacename', 'P'],
+      }
+    tag_map.update(LRFStream.tag_map)
+    data = property(fget=lambda self: self.stream)
+
+    def end_stream(self, *args):
+        LRFStream.end_stream(self, *args)
+        self._document.font_map[self.fontfacename] = self
+        self.file = self.fontfacename + '.ttf'
+
+    def __unicode__(self):
+        s = '<RegistFont objid="%s" fontfilename="%s" fontname="%s" encoding="TTF" file="%s" />\n'%\
+            (self.id, self.fontfilename, self.fontfacename, self.file)
+        return s
+
+
+class ObjectInfo(LRFStream):
+    pass
+
+
+class BookAttr(StyleObject, LRFObject):
+    tag_map = {
+        0xF57B: ['page_tree_id', 'D'],
+        0xF5D8: ['', 'add_font'],
+        0xF5DA: ['setwaitprop', 'W', {1: 'replay', 2: 'noreplay'}],
+      }
+    tag_map.update(ruby_tags)
+    tag_map.update(LRFObject.tag_map)
+    binding_map = {1: 'Lr', 16 : 'Rl'}
+
+    def __init__(self, document, stream, id, scramble_key, boundary):
+        self.font_link_list = []
+        LRFObject.__init__(self, document, stream, id, scramble_key, boundary)
+
+    def add_font(self, tag, f):
+        self.font_link_list.append(tag.dword)
+
+    def __str__(self):
+        s = '<BookStyle objid="%s" stylelabel="%s">\n'%(self.id, self.id)
+        s += '<SetDefault %s />\n'%(self._tags_to_xml(),)
+        doc = self._document
+        s += '<BookSetting bindingdirection="%s" dpi="%s" screenwidth="%s" screenheight="%s" colordepth="%s" />\n'%\
+        (self.binding_map[doc.binding], doc.dpi, doc.width, doc.height, doc.color_depth)
+        for font in self._document.font_map.values():
+            s += str(font)
+        s += '</BookStyle>\n'
+        return s
+
+
+class SimpleText(Text):
+    pass
+
+
+class TocLabel(object):
+
+    def __init__(self, refpage, refobject, label):
+        self.refpage, self.refobject, self.label = refpage, refobject, label
+
+    def __str__(self):
+        return '<TocLabel refpage="%s" refobj="%s">%s</TocLabel>\n'%(self.refpage, self.refobject, self.label)
+
+
+class TOCObject(LRFStream):
+
+    def initialize(self):
+        stream = io.BytesIO(self.stream)
+        c = struct.unpack("<H", stream.read(2))[0]
+        stream.seek(4*(c+1))
+        self._contents = []
+        while c > 0:
+            refpage = struct.unpack("<I", stream.read(4))[0]
+            refobj  = struct.unpack("<I", stream.read(4))[0]
+            cnt = struct.unpack("<H", stream.read(2))[0]
+            raw = stream.read(cnt)
+            label = raw.decode('utf_16_le')
+            self._contents.append(TocLabel(refpage, refobj, label))
+            c -= 1
+
+    def __iter__(self):
+        for i in self._contents:
+            yield i
+
+    def __str__(self):
+        s = '<TOC>\n'
+        for i in self:
+            s += str(i)
+        return s + '</TOC>\n'
+
+
+object_map = [None,  # 00
+              PageTree,  # 01
+              Page,  # 02
+              Header,  # 03
+              Footer,  # 04
+              PageAttr,  # 05
+              Block,  # 06
+              BlockAttr,  # 07
+              MiniPage,  # 08
+              None,  # 09
+              Text,  # 0A
+              TextAttr,  # 0B
+              Image,  # 0C
+              Canvas,  # 0D
+              ESound,  # 0E
+              None,  # 0F
+              None,  # 10
+              ImageStream,  # 11
+              Import,  # 12
+              Button,  # 13
+              Window,  # 14
+              PopUpWin,  # 15
+              Sound,  # 16
+              SoundStream,  # 17
+              None,  # 18
+              Font,  # 19
+              ObjectInfo,  # 1A
+              None,  # 1B
+              BookAttr,  # 1C
+              SimpleText,  # 1D
+              TOCObject]  # 1E
+
+
+def get_object(document, stream, id, offset, size, scramble_key):
+    stream.seek(offset)
+    start_tag = Tag(stream)
+    if start_tag.id != 0xF500:
+        raise LRFParseError('Bad object start')
+    obj_id, obj_type = struct.unpack("<IH", start_tag.contents)
+    if obj_type < len(object_map) and object_map[obj_type] is not None:
+        return object_map[obj_type](document, stream, obj_id, scramble_key, offset+size-Tag.tags[0][0])
+
+    raise LRFParseError("Unknown object type: %02X!" % obj_type)
diff --git a/ebook_converter/ebooks/lrf/tags.py b/ebook_converter/ebooks/lrf/tags.py
new file mode 100644
index 0000000..a596cc7
--- /dev/null
+++ b/ebook_converter/ebooks/lrf/tags.py
@@ -0,0 +1,255 @@
+import struct
+
+from ebook_converter.ebooks.lrf import LRFParseError
+
+
+class Tag(object):
+
+    tags = {0x00: (6, "*ObjectStart"),
+            0x01: (0, "*ObjectEnd"),
+            0x02: (4, "*ObjectInfoLink"),
+            0x03: (4, "*Link"),
+            0x04: (4, "*StreamSize"),
+            0x05: (0, "*StreamStart"),
+            0x06: (0, "*StreamEnd"),
+            0x07: (4, None),
+            0x08: (4, None),
+            0x09: (4, None),
+            0x0A: (4, None),
+            0x0B: ("type_one", "*ContainedObjectsList"),
+            0x0D: (2, None),
+            0x0E: (2, None),
+            0x11: (2, None),
+            0x12: (2, None),
+            0x13: (2, None),
+            0x14: (2, None),
+            0x15: (2, None),
+            0x16: ("string", None),
+            0x17: (4, None),
+            0x18: (4, None),
+            0x19: (2, None),
+            0x1A: (2, None),
+            0x1B: (2, None),
+            0x1C: (2, None),
+            0x1D: (2, None),
+            0x1E: (2, None),
+            0x21: (2, None),
+            0x22: (2, None),
+            0x23: (2, None),
+            0x24: (2, None),
+            0x25: (2, None),
+            0x26: (2, None),
+            0x27: (2, None),
+            0x28: (2, None),
+            0x29: (6, None),
+            0x2A: (2, None),
+            0x2B: (2, None),
+            0x2C: (2, None),
+            0x2D: (4, None),
+            0x2E: (2, None),
+            0x31: (2, None),
+            0x32: (2, None),
+            0x33: (2, None),
+            0x34: (4, None),
+            0x35: (2, None),
+            0x36: (2, None),
+            0x37: (4, None),
+            0x38: (2, None),
+            0x39: (2, None),
+            0x3A: (2, None),
+            0x3C: (2, None),
+            0x3D: (2, None),
+            0x3E: (2, None),
+            0x41: (2, None),
+            0x42: (2, None),
+            0x44: (4, None),
+            0x45: (4, None),
+            0x46: (2, None),
+            0x47: (2, None),
+            0x48: (2, None),
+            0x49: (8, None),
+            0x4A: (8, None),
+            0x4B: (4, None),
+            0x4C: (4, None),
+            0x4D: (0, None),
+            0x4E: (12, None),
+            0x51: (2, None),
+            0x52: (2, None),
+            0x53: (4, None),
+            0x54: (2, "*StreamFlags"),
+            0x55: ("string", None),
+            0x56: (2, None),
+            0x57: (2, None),
+            0x58: (2, None),
+            0x59: ("string", None),
+            0x5A: ("string", None),
+            0x5B: (4, None),
+            0x5C: ("type_one", None),
+            0x5D: ("string", None),
+            0x5E: (2, None),
+            0x61: (2, None),
+            0x62: (0, None),
+            0x63: (0, None),
+            0x64: (0, None),
+            0x65: (0, None),
+            0x66: (0, None),
+            0x67: (0, None),
+            0x68: (0, None),
+            0x69: (0, None),
+            0x6A: (0, None),
+            0x6B: (0, None),
+            0x6C: (8, None),
+            0x6D: (2, None),
+            0x6E: (0, None),
+            0x71: (0, None),
+            0x72: (0, None),
+            0x73: (10, None),
+            0x75: (2, None),
+            0x76: (2, None),
+            0x77: (2, None),
+            0x78: ("tag_78", None),
+            0x79: (2, None),
+            0x7A: (2, None),
+            0x7B: (4, None),
+            0x7C: (4, "*ParentPageTree"),
+            0x81: (0, None),
+            0x82: (0, None),
+            0xA1: (4, None),
+            0xA2: (0, None),
+            0xA5: ("unknown", None),
+            0xA6: (0, None),
+            0xA7: (4, None),
+            0xA8: (0, None),
+            0xA9: (0, None),
+            0xAA: (0, None),
+            0xAB: (0, None),
+            0xAC: (0, None),
+            0xAD: (0, None),
+            0xAE: (0, None),
+            0xB1: (0, None),
+            0xB2: (0, None),
+            0xB3: (0, None),
+            0xB4: (0, None),
+            0xB5: (0, None),
+            0xB6: (0, None),
+            0xB7: (0, None),
+            0xB8: (0, None),
+            0xB9: (0, None),
+            0xBA: (0, None),
+            0xBB: (0, None),
+            0xBC: (0, None),
+            0xBD: (0, None),
+            0xBE: (0, None),
+            0xC1: (0, None),
+            0xC2: (0, None),
+            0xC3: (2, None),
+            0xC4: (0, None),
+            0xC5: (2, None),
+            0xC6: (2, None),
+            0xC7: (0, None),
+            0xC8: (2, None),
+            0xC9: (0, None),
+            0xCA: (2, None),
+            0xCB: ("unknown", None),
+            0xCC: (2, None),
+            0xD1: (12, None),
+            0xD2: (0, None),
+            0xD4: (2, None),
+            0xD6: (0, None),
+            0xD7: (14, None),
+            0xD8: (4, None),
+            0xD9: (8, None),
+            0xDA: (2, None),
+            0xDB: (2, None),
+            0xDC: (2, None),
+            0xDD: (2, None),
+            0xF1: (2, None),
+            0xF2: (4, None),
+            0xF3: (4, None),
+            0xF4: (2, None),
+            0xF5: (4, None),
+            0xF6: (4, None),
+            0xF7: (4, None),
+            0xF8: (4, None),
+            0xF9: (6, None)}
+    name_map = {}
+    for key in tags.keys():
+        temp = tags[key][1]
+        if temp is not None:
+            name_map[key] = temp
+
+    def __init__(self, stream):
+        self.offset = stream.tell()
+        tag_id = struct.unpack("<BB", stream.read(2))
+        if tag_id[1] != 0xF5:
+            raise LRFParseError("Bad tag ID %02X at %d" % (tag_id[1],
+                                                           self.offset))
+        if tag_id[0] not in self.__class__.tags:
+            raise LRFParseError("Unknown tag ID: F5%02X" % tag_id[0])
+
+        self.id = 0xF500 + tag_id[0]
+
+        size, self.name = self.__class__.tags[tag_id[0]]
+        if isinstance(size, str):
+            parser = getattr(self, size + '_parser')
+            self.contents = parser(stream)
+        else:
+            self.contents = stream.read(size)
+
+    def __str__(self):
+        s = "Tag %04X " % self.id
+        if self.name:
+            s += self.name
+        s += " at %08X, contents: %s" % (self.offset, repr(self.contents))
+        return s
+
+    @property
+    def byte(self):
+        if len(self.contents) != 1:
+            raise LRFParseError("Bad parameter for tag ID: %04X" % self.id)
+        return struct.unpack("<B", self.contents)[0]
+
+    @property
+    def word(self):
+        if len(self.contents) != 2:
+            raise LRFParseError("Bad parameter for tag ID: %04X" % self.id)
+        return struct.unpack("<H", self.contents)[0]
+
+    @property
+    def sword(self):
+        if len(self.contents) != 2:
+            raise LRFParseError("Bad parameter for tag ID: %04X" % self.id)
+        return struct.unpack("<h", self.contents)[0]
+
+    @property
+    def dword(self):
+        if len(self.contents) != 4:
+            raise LRFParseError("Bad parameter for tag ID: %04X" % self.id)
+        return struct.unpack("<I", self.contents)[0]
+
+    def dummy_parser(self, stream):
+        raise LRFParseError("Unknown tag at %08X" % stream.tell())
+
+    @classmethod
+    def string_parser(self, stream):
+        size = struct.unpack("<H", stream.read(2))[0]
+        return str(stream.read(size), "utf_16")
+
+    def type_one_parser(self, stream):
+        cnt = struct.unpack("<H", stream.read(2))[0]
+        res = []
+        while cnt > 0:
+            res.append(struct.unpack("<I", stream.read(4))[0])
+            cnt -= 1
+        return res
+
+    def tag_78_parser(self, stream):
+        pos = stream.tell()
+        res = []
+        res.append(struct.unpack("<I", stream.read(4))[0])
+        tag = Tag(stream)
+        if tag.id != 0xF516:
+            raise LRFParseError("Bad tag 78 at %08X" % pos)
+        res.append(tag.contents)
+        res.append(struct.unpack("<H", stream.read(2))[0])
+        return res