Removed palmdoc C implementation in favor of pure Python.

2020-05-17 20:30:01 +02:00
parent da010d7841
commit 178b9fd4d7
5 changed files with 36 additions and 277 deletions
@@ -18,7 +18,6 @@ To build and run ebook converter, you'll need:
 - Python 3.6 or newer
 - `Liberation fonts`_
 - setuptools
- C compiler (yes, there is a single module, which is written in C)

 No Python2 support. Even if Calibre probably still is able to run on Python2, I
 do not have an intention to support it.
@@ -1,238 +0,0 @@
-/*
-:mod:`cPalmdoc` -- Palmdoc compression/decompression
-=====================================================
-
-.. module:: cPalmdoc
-    :platform: All
-    :synopsis: Compression decompression of Palmdoc implemented in C for speed
-
-.. moduleauthor:: Kovid Goyal <kovid@kovidgoyal.net> Copyright 2009
-
-*/
-
-#define PY_SSIZE_T_CLEAN
-#include <Python.h>
-#include <stdio.h>
-
-#define BUFFER 6000
-
-#define MIN(x, y) ( ((x) < (y)) ? (x) : (y) )
-#define MAX(x, y) ( ((x) > (y)) ? (x) : (y) )
-
-typedef unsigned short int Byte;
-typedef struct {
-	Byte	*data;
-	Py_ssize_t len;
-} buffer;
-
-#ifdef	bool
-#undef	bool
-#endif
-#define	bool		int
-
-#ifdef	false
-#undef	false
-#endif
-#define	false		0
-
-#ifdef	true
-#undef	true
-#endif
-#define	true		1
-
-#define CHAR(x) (( (x) > 127 ) ? (x)-256 : (x))
-
-#if PY_MAJOR_VERSION >= 3
-    #define BUFFER_FMT "y#"
-    #define BYTES_FMT "y#"
-#else
-    #define BUFFER_FMT "t#"
-    #define BYTES_FMT "s#"
-#endif
-
-static PyObject *
-cpalmdoc_decompress(PyObject *self, PyObject *args) {
-    const char *_input = NULL; Py_ssize_t input_len = 0;
-    Byte *input; char *output; Byte c; PyObject *ans;
-    Py_ssize_t i = 0, o = 0, j = 0, di, n;
-    if (!PyArg_ParseTuple(args, BUFFER_FMT, &_input, &input_len))
-		return NULL;
-    input = (Byte *) PyMem_Malloc(sizeof(Byte)*input_len);
-    if (input == NULL) return PyErr_NoMemory();
-    // Map chars to bytes
-    for (j = 0; j < input_len; j++)
-        input[j] = (_input[j] < 0) ? _input[j]+256 : _input[j];
-    output = (char *)PyMem_Malloc(sizeof(char)*(MAX(BUFFER, 8*input_len)));
-    if (output == NULL) return PyErr_NoMemory();
-
-    while (i < input_len) {
-        c = input[i++];
-        if (c >= 1 && c <= 8)  // copy 'c' bytes
-            while (c--) output[o++] = (char)input[i++];
-
-        else if (c <= 0x7F)  // 0, 09-7F = self
-            output[o++] = (char)c;
-
-        else if (c >= 0xC0) { // space + ASCII char
-            output[o++] = ' ';
-            output[o++] = c ^ 0x80;
-        }
-        else { // 80-BF repeat sequences
-            c = (c << 8) + input[i++];
-            di = (c & 0x3FFF) >> 3;
-            for ( n = (c & 7) + 3; n--; ++o )
-                output[o] = output[o - di];
-        }
-    }
-    ans = Py_BuildValue(BYTES_FMT, output, o);
-    if (output != NULL) PyMem_Free(output);
-    if (input != NULL) PyMem_Free(input);
-    return ans;
-}
-
-static bool
-cpalmdoc_memcmp( Byte *a, Byte *b, Py_ssize_t len) {
-    Py_ssize_t i;
-    for (i = 0; i < len; i++) if (a[i] != b[i]) return false;
-    return true;
-}
-
-static Py_ssize_t
-cpalmdoc_rfind(Byte *data, Py_ssize_t pos, Py_ssize_t chunk_length) {
-    Py_ssize_t i;
-    for (i = pos - chunk_length; i > -1; i--)
-        if (cpalmdoc_memcmp(data+i, data+pos, chunk_length)) return i;
-    return pos;
-}
-
-
-static Py_ssize_t
-cpalmdoc_do_compress(buffer *b, char *output) {
-    Py_ssize_t i = 0, j, chunk_len, dist;
-    unsigned int compound;
-    Byte c, n;
-    bool found;
-    char *head;
-    buffer temp;
-    head = output;
-    temp.data = (Byte *)PyMem_Malloc(sizeof(Byte)*8); temp.len = 0;
-    if (temp.data == NULL) return 0;
-    while (i < b->len) {
-        c = b->data[i];
-        //do repeats
-        if ( i > 10 && (b->len - i) > 10) {
-            found = false;
-            for (chunk_len = 10; chunk_len > 2; chunk_len--) {
-                j = cpalmdoc_rfind(b->data, i, chunk_len);
-                dist = i - j;
-                if (j < i && dist <= 2047) {
-                    found = true;
-                    compound = (unsigned int)((dist << 3) + chunk_len-3);
-                    *(output++) = CHAR(0x80 + (compound >> 8 ));
-                    *(output++) = CHAR(compound & 0xFF);
-                    i += chunk_len;
-                    break;
-                }
-            }
-            if (found) continue;
-        }
-
-        //write single character
-        i++;
-        if (c == 32 && i < b->len) {
-            n = b->data[i];
-            if ( n >= 0x40 && n <= 0x7F) {
-                *(output++) = CHAR(n^0x80); i++; continue;
-            }
-        }
-        if (c == 0 || (c > 8 && c < 0x80))
-            *(output++) = CHAR(c);
-        else { // Write binary data
-            j = i;
-            temp.data[0] = c; temp.len = 1;
-            while (j < b->len && temp.len < 8) {
-                c = b->data[j];
-                if (c == 0 || (c > 8 && c < 0x80)) break;
-                temp.data[temp.len++] = c; j++;
-            }
-            i += temp.len - 1;
-            *(output++) = (char)temp.len;
-            for (j=0; j < temp.len; j++) *(output++) = (char)temp.data[j];
-        }
-    }
-    PyMem_Free(temp.data);
-    return output - head;
-}
-
-static PyObject *
-cpalmdoc_compress(PyObject *self, PyObject *args) {
-    const char *_input = NULL; Py_ssize_t input_len = 0;
-    char *output; PyObject *ans;
-    Py_ssize_t j = 0;
-    buffer b;
-    if (!PyArg_ParseTuple(args, BUFFER_FMT, &_input, &input_len))
-		return NULL;
-    b.data = (Byte *)PyMem_Malloc(sizeof(Byte)*input_len);
-    if (b.data == NULL) return PyErr_NoMemory();
-    // Map chars to bytes
-    for (j = 0; j < input_len; j++)
-        b.data[j] = (_input[j] < 0) ? _input[j]+256 : _input[j];
-    b.len = input_len;
-    // Make the output buffer larger than the input as sometimes
-    // compression results in a larger block
-    output = (char *)PyMem_Malloc(sizeof(char) * (int)(1.25*b.len));
-    if (output == NULL) return PyErr_NoMemory();
-    j = cpalmdoc_do_compress(&b, output);
-    if ( j == 0) return PyErr_NoMemory();
-    ans = Py_BuildValue(BYTES_FMT, output, j);
-    PyMem_Free(output);
-    PyMem_Free(b.data);
-    return ans;
-}
-
-static char cPalmdoc_doc[] = "Compress and decompress palmdoc strings.";
-
-static PyMethodDef cPalmdoc_methods[] = {
-    {"decompress", cpalmdoc_decompress, METH_VARARGS,
-    "decompress(bytestring) -> decompressed bytestring\n\n"
-    		"Decompress a palmdoc compressed byte string. "
-    },
-
-    {"compress", cpalmdoc_compress, METH_VARARGS,
-    "compress(bytestring) -> compressed bytestring\n\n"
-    		"Palmdoc compress a byte string. "
-    },
-    {NULL, NULL, 0, NULL}
-};
-
-#if PY_MAJOR_VERSION >= 3
-#define INITERROR return NULL
-#define INITMODULE PyModule_Create(&cPalmdoc_module)
-static struct PyModuleDef cPalmdoc_module = {
-    /* m_base     */ PyModuleDef_HEAD_INIT,
-    /* m_name     */ "cPalmdoc",
-    /* m_doc      */ cPalmdoc_doc,
-    /* m_size     */ -1,
-    /* m_methods  */ cPalmdoc_methods,
-    /* m_slots    */ 0,
-    /* m_traverse */ 0,
-    /* m_clear    */ 0,
-    /* m_free     */ 0,
-};
-PyObject* PyInit_cPalmdoc(void) {
-#else
-#define INITERROR return
-#define INITMODULE Py_InitModule3("cPalmdoc", cPalmdoc_methods, cPalmdoc_doc)
-PyObject* initcPalmdoc(void) {
-#endif
-
-    PyObject *m;
-    m = INITMODULE;
-    if (m == NULL) {
-        INITERROR;
-    }
-
-#if PY_MAJOR_VERSION >= 3
-    return m;
-#endif
-}
@@ -1,22 +1,46 @@
 import io
+import sys
 from struct import pack

-from ebook_converter.ebooks.compression import cPalmdoc
-
-
-__license__ = 'GPL v3'
-__copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
-

 def decompress_doc(data):
-    return cPalmdoc.decompress(data)
+    uncompressed = b''
+    skip_next = 0
+
+    for idx, item in enumerate(data):
+        if skip_next:
+            skip_next -= 1
+            continue
+
+        if item in range(1, 9):
+            # copy amount of bytes as in item
+            skip_next = item
+            for amount in range(1, item + 1):
+                uncompressed += data[idx + amount].to_bytes(1, sys.byteorder)
+
+        elif item < 128:
+            # direct ascii copy
+            uncompressed += item.to_bytes(1, sys.byteorder)
+
+        elif item >= 192:
+            # merged space and ascii character
+            uncompressed += b' ' + (item ^ 128).to_bytes(1, sys.byteorder)
+
+        else:
+            # compressed data, item contains how many characters should be
+            # repeated for the next one.
+            skip_next = 1
+            item = (item << 8) + data[idx + 1]
+            character_index = (item & 0x3FFF) >> 3
+            for _ in range((item & 7) + 3):
+                uncompressed += (uncompressed[len(uncompressed) -
+                                              character_index]
+                                 .to_bytes(1, sys.byteorder))
+
+    return uncompressed


 def compress_doc(data):
-    return cPalmdoc.compress(data) if data else b''
-
-
-def py_compress_doc(data):
    out = io.BytesIO()
    i = 0
    ldata = len(data)
@@ -65,24 +89,3 @@ def py_compress_doc(data):
            out.write(b''.join(binseq))
            i += len(binseq) - 1
    return out.getvalue()
-
-
-def find_tests():
-    import unittest
-
-    class Test(unittest.TestCase):
-
-        def test_palmdoc_compression(self):
-            for test in [
-                b'abc\x03\x04\x05\x06ms',  # Test binary writing
-                b'a b c \xfed ',  # Test encoding of spaces
-                b'0123456789axyz2bxyz2cdfgfo9iuyerh',
-                b'0123456789asd0123456789asd|yyzzxxffhhjjkk',
-                (b'ciewacnaq eiu743 r787q 0w%  ; sa fd\xef\ffdxosac wocjp acoiecowei '
-                b'owaic jociowapjcivcjpoivjporeivjpoavca; p9aw8743y6r74%$^$^%8 ')
-            ]:
-                x = compress_doc(test)
-                self.assertEqual(py_compress_doc(test), x)
-                self.assertEqual(decompress_doc(x), test)
-
-    return unittest.defaultTestLoader.loadTestsFromTestCase(Test)
@@ -1,9 +1,4 @@
 import setuptools

-module = setuptools.Extension('ebook_converter.ebooks.compression.cPalmdoc',
-                              sources=['ebook_converter/ebooks/compression/'
-                                       'palmdoc.c'],
-                              language='c')

-setuptools.setup(ext_modules=[module])
-# setuptools.setup()
+setuptools.setup()