Initial import

2026-04-27 01:01:28 +02:00 · 2020-03-31 17:15:23 +02:00
commit d97ea9b0bc
311 changed files with 131419 additions and 0 deletions
@@ -0,0 +1,6 @@
+# -*- coding: utf-8 -*-
+from __future__ import absolute_import, division, print_function, unicode_literals
+
+__license__ = 'GPL 3'
+__copyright__ = '2009, John Schember <john@nachtimwald.com>'
+__docformat__ = 'restructuredtext en'
@@ -0,0 +1,238 @@
+/*
+:mod:`cPalmdoc` -- Palmdoc compression/decompression
+=====================================================
+
+.. module:: cPalmdoc
+    :platform: All
+    :synopsis: Compression decompression of Palmdoc implemented in C for speed
+
+.. moduleauthor:: Kovid Goyal <kovid@kovidgoyal.net> Copyright 2009
+
+*/
+
+#define PY_SSIZE_T_CLEAN
+#include <Python.h>
+#include <stdio.h>
+
+#define BUFFER 6000
+
+#define MIN(x, y) ( ((x) < (y)) ? (x) : (y) )
+#define MAX(x, y) ( ((x) > (y)) ? (x) : (y) )
+
+typedef unsigned short int Byte;
+typedef struct {
+	Byte	*data;
+	Py_ssize_t len;
+} buffer;
+
+#ifdef	bool
+#undef	bool
+#endif
+#define	bool		int
+
+#ifdef	false
+#undef	false
+#endif
+#define	false		0
+
+#ifdef	true
+#undef	true
+#endif
+#define	true		1
+
+#define CHAR(x) (( (x) > 127 ) ? (x)-256 : (x))
+
+#if PY_MAJOR_VERSION >= 3
+    #define BUFFER_FMT "y#"
+    #define BYTES_FMT "y#"
+#else
+    #define BUFFER_FMT "t#"
+    #define BYTES_FMT "s#"
+#endif
+
+static PyObject *
+cpalmdoc_decompress(PyObject *self, PyObject *args) {
+    const char *_input = NULL; Py_ssize_t input_len = 0;
+    Byte *input; char *output; Byte c; PyObject *ans;
+    Py_ssize_t i = 0, o = 0, j = 0, di, n;
+    if (!PyArg_ParseTuple(args, BUFFER_FMT, &_input, &input_len))
+		return NULL;
+    input = (Byte *) PyMem_Malloc(sizeof(Byte)*input_len);
+    if (input == NULL) return PyErr_NoMemory();
+    // Map chars to bytes
+    for (j = 0; j < input_len; j++)
+        input[j] = (_input[j] < 0) ? _input[j]+256 : _input[j];
+    output = (char *)PyMem_Malloc(sizeof(char)*(MAX(BUFFER, 8*input_len)));
+    if (output == NULL) return PyErr_NoMemory();
+
+    while (i < input_len) {
+        c = input[i++];
+        if (c >= 1 && c <= 8)  // copy 'c' bytes
+            while (c--) output[o++] = (char)input[i++];
+
+        else if (c <= 0x7F)  // 0, 09-7F = self
+            output[o++] = (char)c;
+
+        else if (c >= 0xC0) { // space + ASCII char
+            output[o++] = ' ';
+            output[o++] = c ^ 0x80;
+        }
+        else { // 80-BF repeat sequences
+            c = (c << 8) + input[i++];
+            di = (c & 0x3FFF) >> 3;
+            for ( n = (c & 7) + 3; n--; ++o )
+                output[o] = output[o - di];
+        }
+    }
+    ans = Py_BuildValue(BYTES_FMT, output, o);
+    if (output != NULL) PyMem_Free(output);
+    if (input != NULL) PyMem_Free(input);
+    return ans;
+}
+
+static bool
+cpalmdoc_memcmp( Byte *a, Byte *b, Py_ssize_t len) {
+    Py_ssize_t i;
+    for (i = 0; i < len; i++) if (a[i] != b[i]) return false;
+    return true;
+}
+
+static Py_ssize_t
+cpalmdoc_rfind(Byte *data, Py_ssize_t pos, Py_ssize_t chunk_length) {
+    Py_ssize_t i;
+    for (i = pos - chunk_length; i > -1; i--)
+        if (cpalmdoc_memcmp(data+i, data+pos, chunk_length)) return i;
+    return pos;
+}
+
+
+static Py_ssize_t
+cpalmdoc_do_compress(buffer *b, char *output) {
+    Py_ssize_t i = 0, j, chunk_len, dist;
+    unsigned int compound;
+    Byte c, n;
+    bool found;
+    char *head;
+    buffer temp;
+    head = output;
+    temp.data = (Byte *)PyMem_Malloc(sizeof(Byte)*8); temp.len = 0;
+    if (temp.data == NULL) return 0;
+    while (i < b->len) {
+        c = b->data[i];
+        //do repeats
+        if ( i > 10 && (b->len - i) > 10) {
+            found = false;
+            for (chunk_len = 10; chunk_len > 2; chunk_len--) {
+                j = cpalmdoc_rfind(b->data, i, chunk_len);
+                dist = i - j;
+                if (j < i && dist <= 2047) {
+                    found = true;
+                    compound = (unsigned int)((dist << 3) + chunk_len-3);
+                    *(output++) = CHAR(0x80 + (compound >> 8 ));
+                    *(output++) = CHAR(compound & 0xFF);
+                    i += chunk_len;
+                    break;
+                }
+            }
+            if (found) continue;
+        }
+
+        //write single character
+        i++;
+        if (c == 32 && i < b->len) {
+            n = b->data[i];
+            if ( n >= 0x40 && n <= 0x7F) {
+                *(output++) = CHAR(n^0x80); i++; continue;
+            }
+        }
+        if (c == 0 || (c > 8 && c < 0x80))
+            *(output++) = CHAR(c);
+        else { // Write binary data
+            j = i;
+            temp.data[0] = c; temp.len = 1;
+            while (j < b->len && temp.len < 8) {
+                c = b->data[j];
+                if (c == 0 || (c > 8 && c < 0x80)) break;
+                temp.data[temp.len++] = c; j++;
+            }
+            i += temp.len - 1;
+            *(output++) = (char)temp.len;
+            for (j=0; j < temp.len; j++) *(output++) = (char)temp.data[j];
+        }
+    }
+    PyMem_Free(temp.data);
+    return output - head;
+}
+
+static PyObject *
+cpalmdoc_compress(PyObject *self, PyObject *args) {
+    const char *_input = NULL; Py_ssize_t input_len = 0;
+    char *output; PyObject *ans;
+    Py_ssize_t j = 0;
+    buffer b;
+    if (!PyArg_ParseTuple(args, BUFFER_FMT, &_input, &input_len))
+		return NULL;
+    b.data = (Byte *)PyMem_Malloc(sizeof(Byte)*input_len);
+    if (b.data == NULL) return PyErr_NoMemory();
+    // Map chars to bytes
+    for (j = 0; j < input_len; j++)
+        b.data[j] = (_input[j] < 0) ? _input[j]+256 : _input[j];
+    b.len = input_len;
+    // Make the output buffer larger than the input as sometimes
+    // compression results in a larger block
+    output = (char *)PyMem_Malloc(sizeof(char) * (int)(1.25*b.len));
+    if (output == NULL) return PyErr_NoMemory();
+    j = cpalmdoc_do_compress(&b, output);
+    if ( j == 0) return PyErr_NoMemory();
+    ans = Py_BuildValue(BYTES_FMT, output, j);
+    PyMem_Free(output);
+    PyMem_Free(b.data);
+    return ans;
+}
+
+static char cPalmdoc_doc[] = "Compress and decompress palmdoc strings.";
+
+static PyMethodDef cPalmdoc_methods[] = {
+    {"decompress", cpalmdoc_decompress, METH_VARARGS,
+    "decompress(bytestring) -> decompressed bytestring\n\n"
+    		"Decompress a palmdoc compressed byte string. "
+    },
+
+    {"compress", cpalmdoc_compress, METH_VARARGS,
+    "compress(bytestring) -> compressed bytestring\n\n"
+    		"Palmdoc compress a byte string. "
+    },
+    {NULL, NULL, 0, NULL}
+};
+
+#if PY_MAJOR_VERSION >= 3
+#define INITERROR return NULL
+#define INITMODULE PyModule_Create(&cPalmdoc_module)
+static struct PyModuleDef cPalmdoc_module = {
+    /* m_base     */ PyModuleDef_HEAD_INIT,
+    /* m_name     */ "cPalmdoc",
+    /* m_doc      */ cPalmdoc_doc,
+    /* m_size     */ -1,
+    /* m_methods  */ cPalmdoc_methods,
+    /* m_slots    */ 0,
+    /* m_traverse */ 0,
+    /* m_clear    */ 0,
+    /* m_free     */ 0,
+};
+CALIBRE_MODINIT_FUNC PyInit_cPalmdoc(void) {
+#else
+#define INITERROR return
+#define INITMODULE Py_InitModule3("cPalmdoc", cPalmdoc_methods, cPalmdoc_doc)
+CALIBRE_MODINIT_FUNC initcPalmdoc(void) {
+#endif
+
+    PyObject *m;
+    m = INITMODULE;
+    if (m == NULL) {
+        INITERROR;
+    }
+
+#if PY_MAJOR_VERSION >= 3
+    return m;
+#endif
+}
@@ -0,0 +1,96 @@
+#!/usr/bin/env  python2
+# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
+from __future__ import absolute_import, division, print_function, unicode_literals
+
+__license__   = 'GPL v3'
+__copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
+
+import io
+from struct import pack
+
+from calibre.constants import plugins
+from polyglot.builtins import range
+cPalmdoc = plugins['cPalmdoc'][0]
+if not cPalmdoc:
+    raise RuntimeError(('Failed to load required cPalmdoc module: '
+            '%s')%plugins['cPalmdoc'][1])
+
+
+def decompress_doc(data):
+    return cPalmdoc.decompress(data)
+
+
+def compress_doc(data):
+    return cPalmdoc.compress(data) if data else b''
+
+
+def py_compress_doc(data):
+    out = io.BytesIO()
+    i = 0
+    ldata = len(data)
+    while i < ldata:
+        if i > 10 and (ldata - i) > 10:
+            chunk = b''
+            match = -1
+            for j in range(10, 2, -1):
+                chunk = data[i:i+j]
+                try:
+                    match = data.rindex(chunk, 0, i)
+                except ValueError:
+                    continue
+                if (i - match) <= 2047:
+                    break
+                match = -1
+            if match >= 0:
+                n = len(chunk)
+                m = i - match
+                code = 0x8000 + ((m << 3) & 0x3ff8) + (n - 3)
+                out.write(pack('>H', code))
+                i += n
+                continue
+        ch = data[i:i+1]
+        och = ord(ch)
+        i += 1
+        if ch == b' ' and (i + 1) < ldata:
+            onch = ord(data[i:i+1])
+            if onch >= 0x40 and onch < 0x80:
+                out.write(pack('>B', onch ^ 0x80))
+                i += 1
+                continue
+        if och == 0 or (och > 8 and och < 0x80):
+            out.write(ch)
+        else:
+            j = i
+            binseq = [ch]
+            while j < ldata and len(binseq) < 8:
+                ch = data[j:j+1]
+                och = ord(ch)
+                if och == 0 or (och > 8 and och < 0x80):
+                    break
+                binseq.append(ch)
+                j += 1
+            out.write(pack('>B', len(binseq)))
+            out.write(b''.join(binseq))
+            i += len(binseq) - 1
+    return out.getvalue()
+
+
+def find_tests():
+    import unittest
+
+    class Test(unittest.TestCase):
+
+        def test_palmdoc_compression(self):
+            for test in [
+                b'abc\x03\x04\x05\x06ms',  # Test binary writing
+                b'a b c \xfed ',  # Test encoding of spaces
+                b'0123456789axyz2bxyz2cdfgfo9iuyerh',
+                b'0123456789asd0123456789asd|yyzzxxffhhjjkk',
+                (b'ciewacnaq eiu743 r787q 0w%  ; sa fd\xef\ffdxosac wocjp acoiecowei '
+                b'owaic jociowapjcivcjpoivjporeivjpoavca; p9aw8743y6r74%$^$^%8 ')
+            ]:
+                x = compress_doc(test)
+                self.assertEqual(py_compress_doc(test), x)
+                self.assertEqual(decompress_doc(x), test)
+
+    return unittest.defaultTestLoader.loadTestsFromTestCase(Test)