ebook-converter/ebook_converter/ebooks/compression/palmdoc.py

import io
import sys
from struct import pack


def decompress_doc(data):
    uncompressed = b''
    skip_next = 0

    for idx, item in enumerate(data):
        if skip_next:
            skip_next -= 1
            continue

        if item in range(1, 9):
            # copy amount of bytes as in item
            skip_next = item
            for amount in range(1, item + 1):
                uncompressed += data[idx + amount].to_bytes(1, sys.byteorder)

        elif item < 128:
            # direct ascii copy
            uncompressed += item.to_bytes(1, sys.byteorder)

        elif item >= 192:
            # merged space and ascii character
            uncompressed += b' ' + (item ^ 128).to_bytes(1, sys.byteorder)

        else:
            # compressed data, item contains how many characters should be
            # repeated for the next one.
            skip_next = 1
            item = (item << 8) + data[idx + 1]
            character_index = (item & 0x3FFF) >> 3
            for _ in range((item & 7) + 3):
                uncompressed += (uncompressed[len(uncompressed) -
                                              character_index]
                                 .to_bytes(1, sys.byteorder))

    return uncompressed


def compress_doc(data):
    out = io.BytesIO()
    i = 0
    ldata = len(data)
    while i < ldata:
        if i > 10 and (ldata - i) > 10:
            chunk = b''
            match = -1
            for j in range(10, 2, -1):
                chunk = data[i:i+j]
                try:
                    match = data.rindex(chunk, 0, i)
                except ValueError:
                    continue
                if (i - match) <= 2047:
                    break
                match = -1
            if match >= 0:
                n = len(chunk)
                m = i - match
                code = 0x8000 + ((m << 3) & 0x3ff8) + (n - 3)
                out.write(pack('>H', code))
                i += n
                continue
        ch = data[i:i+1]
        och = ord(ch)
        i += 1
        if ch == b' ' and (i + 1) < ldata:
            onch = ord(data[i:i+1])
            if onch >= 0x40 and onch < 0x80:
                out.write(pack('>B', onch ^ 0x80))
                i += 1
                continue
        if och == 0 or (och > 8 and och < 0x80):
            out.write(ch)
        else:
            j = i
            binseq = [ch]
            while j < ldata and len(binseq) < 8:
                ch = data[j:j+1]
                och = ord(ch)
                if och == 0 or (och > 8 and och < 0x80):
                    break
                binseq.append(ch)
                j += 1
            out.write(pack('>B', len(binseq)))
            out.write(b''.join(binseq))
            i += len(binseq) - 1
    return out.getvalue()