From 178b9fd4d758d38e07d65fc612bd8ac55c6cdf52 Mon Sep 17 00:00:00 2001 From: gryf Date: Sun, 17 May 2020 20:30:01 +0200 Subject: [PATCH] Removed palmdoc C implementation in favor of pure Python. --- README.rst | 1 - .../cPalmdoc.cpython-36m-x86_64-linux-gnu.so | Bin 16416 -> 0 bytes ebook_converter/ebooks/compression/palmdoc.c | 238 ------------------ ebook_converter/ebooks/compression/palmdoc.py | 67 ++--- setup.py | 7 +- 5 files changed, 36 insertions(+), 277 deletions(-) delete mode 100755 ebook_converter/ebooks/compression/cPalmdoc.cpython-36m-x86_64-linux-gnu.so delete mode 100644 ebook_converter/ebooks/compression/palmdoc.c diff --git a/README.rst b/README.rst index bff77ba..d0059f7 100644 --- a/README.rst +++ b/README.rst @@ -18,7 +18,6 @@ To build and run ebook converter, you'll need: - Python 3.6 or newer - `Liberation fonts`_ - setuptools -- C compiler (yes, there is a single module, which is written in C) No Python2 support. Even if Calibre probably still is able to run on Python2, I do not have an intention to support it. diff --git a/ebook_converter/ebooks/compression/cPalmdoc.cpython-36m-x86_64-linux-gnu.so b/ebook_converter/ebooks/compression/cPalmdoc.cpython-36m-x86_64-linux-gnu.so deleted file mode 100755 index d0bbd0e2944e79ea8c248a633d21de225bf2ed04..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 16416 zcmeHO3v64}89t7i)}*BlU3m=HxI79i4^yB;?MkWB*6HnN+E5ZE1n9<%ohBlQgX6RW z8X6XgYK3gw3&Dh@AfeJplU8jKRICE06k;k33=$|BFtQ1x>jkmU0rNJ7_xwOzfoW167~Au;tSRy2Q@h)_LkLji8OP3tk`7~Tpy9Wt%))r7;hQ>?CFye!vHT|yY8=%riRRj&M%$~Vf0eg?w*MB*A$BkpsY>tXgCqQUUYZu>KjaS#(HlE+}IQ7j|Hv^ ztR%3V@;8bt+qQ1pWL+P)KCrrwU&T@3VW@ep9Dl@kAejZ_aXoN;*BL^1#oslyw5#h( z0iF3YFevHy$n&o4ECu^qxS{d3pq48lm`}NIub~vuEaJdd^*{lOc;0r0|z!w%! zAD zR3lK0Ks5r@2vj3bjX*U5&Ip(zANkDD+V`%-*6BbhQIpS@BPV??6;$%8{|Hfj*$24# zFA5?@d?(4X(|M$2zayTumf4Kt-z1*4g4vYhPZ3XB!0b`UpCF#Le%W!!KTkYuL9>S> z|108Y3z!|2{7hrr8Zj;()VCF(j>dy2@~T;gn0{~YL0_0Pyp`!5za z^-qbc_|io5_JMKi%QKXminiczqalV&GbHWc?&P zTeq6Y4-<>b(e;<0O#5zB(eA|M?pJEpK+(hHGR-~!A=x0%eP(j#aWgq^C}NJzTecj| z%w#BcUCNvc9RVXik7{~M)0C!ZNynOwnq#d;%+b&xb8L0PVsbDkRqdz0%;yiJ2I?f2 zeH8JB)=|@wf`@Y{A-?Zt;_@5OIrv}d{MLHhu zs1=@>dN&F_ZH_Hk0y<@SPIV+}%#l#qBMV;}O7D9{<|d}S=K9#VLOfW<{7mh!J^)lb zpQ_$8sy8kC2liqx*-!ryReTva=pQr3nmuN+6?7@_C@AB9PIM$ghdV}w4vWN2BEG%1 z%&e-YjX@qtX+Ma`39!>}lRR~>-v3@DIQ=!Xz|B&}#&ZhHP_m zAQv%jSS1I2qSk&J_^U6;X^|YVF(u#t<7YCIqj5g78a6rI*|zL#j5|UWQTL+_Y3ly9 zClU)}g=UtbH92Zc&RiFo+Bb2GmZQ6s|B;WDW7-wVAp5H@gfR1FZkFEEpt8-eRW+&< zImo~-A|9G2%;f9#4$NaL5c5d|(_o|r1&$2m#NcnpiEhluM{OwV2pTlbQw!2@6gK^0 zJ?dnCg93ssjhVv$$My6Yr!np1M_W%y0P{AKma&_n z*iF^=zu$(ajtr$lqBh%0lbzNMOuggAz<1z5H4w|l&{oA!BT6Fj0+e6au;)r5`}bao zneYNnw$kd5L1G0O5d(N`>XSIbvboCtD0xcBChZ>(Pa%M(11b+$=|VaNys-AvzwjCMS@5~# z3M@6>Rqil~uu2W+m2w^&=3adXTD0#g+38W&RZ}$r)d*B0P>sO>i$?wie2N(!=sE0FNOi*MSz~-O@cF)l`i@H3HQLR3lK0Ks5r@2vj3bjX*U5 z|DzG$?+dr!WM0$H zzRbsDmYz+J_}j!bdKN8LHnedmS zOhH``<0<;b8j0I6tjjw?!Gg=fzbdl+hqONa-;jJEj+*b&^1o@m5LfN*zeV^vc6WI< z(%)4`jhf!BX@{nJG<`(V$23(|#cNxzm@k*xq+dN4;ogYj6+_`Yf_iVzPyf7^O6O>SuQ&Q`SfQB8hLFgwr)%b_c=1Ip&l9KA3k#l4rT9F-VJ{ob*L1_DrGnkmG_Fph4UFssaN>L znv0zX>!UJy-EvC3;w-`6o0sBCM7jCp6<-tlU3RHHR->R3QF*U8yYQX5Q|cAxh=UDI zg!Oq9E6Qjb&l8{Ii-^k6dRZeD3Z6gmofEK&G`>$SkZs^8U5g*-zpHWijtLlrh48Is zBZDO0ErH&KAK^UzuNBb?_=8-Yk8_z`aC_I65CUCOoVM-bJB zx#Qsp?WZlMZ25Xv;&c1!mr`NwI5{Ep&z=*1`XWSiTOQhcc$n!6P z&f#?7yiBA&W_9AWMJIPQ-?6=+Y3nASTei1aA=7Hy+0YcS%*{Ii;gF9N+P;}i|7_W| zV`IZMYe!?_ZJ`#crD5Z?5K#C_eC5d?b+AXC({Uf`aUI(U2Kn60DwJGu^axH$5Aakz zMpSz8r|i)od622>VWpB&NLIHVQ!5;ghX<7f=TSnqr>h;s#}ecjm6LvU;V+Z`MpX}PB%h*5f8<_IG8)`CX zcgza(vcN{DP=DTEFy-d~_V1<}VNZK%#(5vYbfqrIU4O2?t;o<;o9%hO!gN?ynC0&F z->L1X9+Y@r!}N%PrGL65y6x3{uK3(iSpW5Xz7BJ=dvJd8&(l~b7^SNNAjr;(l9dNnq?|zp(@6VXJ%w=D(Ezx28sC@1p zVSu4z$g8jwDO|bU3>-wpX>S`!!j%3KbS8KFAA`_o&-+5A7n0DC+~to$hVOLc{E}+C zf7DMjZhhvN{sKmRhdu8zegCFCeOg3ghvo+*-jDKmWojcy zkqmh)Uf7=L1Vm1I5!AXX6>Zim6SiY|3IeBnTTp4XX?yx#)|m_^gM0=*^2PP%{qjNi z=VfuevYE@Bcnbo;baAXE*xo20D#P|pS{SS`+aMa!SA&l6Sw`p02a{*KmY&$ diff --git a/ebook_converter/ebooks/compression/palmdoc.c b/ebook_converter/ebooks/compression/palmdoc.c deleted file mode 100644 index 5ff6a6f..0000000 --- a/ebook_converter/ebooks/compression/palmdoc.c +++ /dev/null @@ -1,238 +0,0 @@ -/* -:mod:`cPalmdoc` -- Palmdoc compression/decompression -===================================================== - -.. module:: cPalmdoc - :platform: All - :synopsis: Compression decompression of Palmdoc implemented in C for speed - -.. moduleauthor:: Kovid Goyal Copyright 2009 - -*/ - -#define PY_SSIZE_T_CLEAN -#include -#include - -#define BUFFER 6000 - -#define MIN(x, y) ( ((x) < (y)) ? (x) : (y) ) -#define MAX(x, y) ( ((x) > (y)) ? (x) : (y) ) - -typedef unsigned short int Byte; -typedef struct { - Byte *data; - Py_ssize_t len; -} buffer; - -#ifdef bool -#undef bool -#endif -#define bool int - -#ifdef false -#undef false -#endif -#define false 0 - -#ifdef true -#undef true -#endif -#define true 1 - -#define CHAR(x) (( (x) > 127 ) ? (x)-256 : (x)) - -#if PY_MAJOR_VERSION >= 3 - #define BUFFER_FMT "y#" - #define BYTES_FMT "y#" -#else - #define BUFFER_FMT "t#" - #define BYTES_FMT "s#" -#endif - -static PyObject * -cpalmdoc_decompress(PyObject *self, PyObject *args) { - const char *_input = NULL; Py_ssize_t input_len = 0; - Byte *input; char *output; Byte c; PyObject *ans; - Py_ssize_t i = 0, o = 0, j = 0, di, n; - if (!PyArg_ParseTuple(args, BUFFER_FMT, &_input, &input_len)) - return NULL; - input = (Byte *) PyMem_Malloc(sizeof(Byte)*input_len); - if (input == NULL) return PyErr_NoMemory(); - // Map chars to bytes - for (j = 0; j < input_len; j++) - input[j] = (_input[j] < 0) ? _input[j]+256 : _input[j]; - output = (char *)PyMem_Malloc(sizeof(char)*(MAX(BUFFER, 8*input_len))); - if (output == NULL) return PyErr_NoMemory(); - - while (i < input_len) { - c = input[i++]; - if (c >= 1 && c <= 8) // copy 'c' bytes - while (c--) output[o++] = (char)input[i++]; - - else if (c <= 0x7F) // 0, 09-7F = self - output[o++] = (char)c; - - else if (c >= 0xC0) { // space + ASCII char - output[o++] = ' '; - output[o++] = c ^ 0x80; - } - else { // 80-BF repeat sequences - c = (c << 8) + input[i++]; - di = (c & 0x3FFF) >> 3; - for ( n = (c & 7) + 3; n--; ++o ) - output[o] = output[o - di]; - } - } - ans = Py_BuildValue(BYTES_FMT, output, o); - if (output != NULL) PyMem_Free(output); - if (input != NULL) PyMem_Free(input); - return ans; -} - -static bool -cpalmdoc_memcmp( Byte *a, Byte *b, Py_ssize_t len) { - Py_ssize_t i; - for (i = 0; i < len; i++) if (a[i] != b[i]) return false; - return true; -} - -static Py_ssize_t -cpalmdoc_rfind(Byte *data, Py_ssize_t pos, Py_ssize_t chunk_length) { - Py_ssize_t i; - for (i = pos - chunk_length; i > -1; i--) - if (cpalmdoc_memcmp(data+i, data+pos, chunk_length)) return i; - return pos; -} - - -static Py_ssize_t -cpalmdoc_do_compress(buffer *b, char *output) { - Py_ssize_t i = 0, j, chunk_len, dist; - unsigned int compound; - Byte c, n; - bool found; - char *head; - buffer temp; - head = output; - temp.data = (Byte *)PyMem_Malloc(sizeof(Byte)*8); temp.len = 0; - if (temp.data == NULL) return 0; - while (i < b->len) { - c = b->data[i]; - //do repeats - if ( i > 10 && (b->len - i) > 10) { - found = false; - for (chunk_len = 10; chunk_len > 2; chunk_len--) { - j = cpalmdoc_rfind(b->data, i, chunk_len); - dist = i - j; - if (j < i && dist <= 2047) { - found = true; - compound = (unsigned int)((dist << 3) + chunk_len-3); - *(output++) = CHAR(0x80 + (compound >> 8 )); - *(output++) = CHAR(compound & 0xFF); - i += chunk_len; - break; - } - } - if (found) continue; - } - - //write single character - i++; - if (c == 32 && i < b->len) { - n = b->data[i]; - if ( n >= 0x40 && n <= 0x7F) { - *(output++) = CHAR(n^0x80); i++; continue; - } - } - if (c == 0 || (c > 8 && c < 0x80)) - *(output++) = CHAR(c); - else { // Write binary data - j = i; - temp.data[0] = c; temp.len = 1; - while (j < b->len && temp.len < 8) { - c = b->data[j]; - if (c == 0 || (c > 8 && c < 0x80)) break; - temp.data[temp.len++] = c; j++; - } - i += temp.len - 1; - *(output++) = (char)temp.len; - for (j=0; j < temp.len; j++) *(output++) = (char)temp.data[j]; - } - } - PyMem_Free(temp.data); - return output - head; -} - -static PyObject * -cpalmdoc_compress(PyObject *self, PyObject *args) { - const char *_input = NULL; Py_ssize_t input_len = 0; - char *output; PyObject *ans; - Py_ssize_t j = 0; - buffer b; - if (!PyArg_ParseTuple(args, BUFFER_FMT, &_input, &input_len)) - return NULL; - b.data = (Byte *)PyMem_Malloc(sizeof(Byte)*input_len); - if (b.data == NULL) return PyErr_NoMemory(); - // Map chars to bytes - for (j = 0; j < input_len; j++) - b.data[j] = (_input[j] < 0) ? _input[j]+256 : _input[j]; - b.len = input_len; - // Make the output buffer larger than the input as sometimes - // compression results in a larger block - output = (char *)PyMem_Malloc(sizeof(char) * (int)(1.25*b.len)); - if (output == NULL) return PyErr_NoMemory(); - j = cpalmdoc_do_compress(&b, output); - if ( j == 0) return PyErr_NoMemory(); - ans = Py_BuildValue(BYTES_FMT, output, j); - PyMem_Free(output); - PyMem_Free(b.data); - return ans; -} - -static char cPalmdoc_doc[] = "Compress and decompress palmdoc strings."; - -static PyMethodDef cPalmdoc_methods[] = { - {"decompress", cpalmdoc_decompress, METH_VARARGS, - "decompress(bytestring) -> decompressed bytestring\n\n" - "Decompress a palmdoc compressed byte string. " - }, - - {"compress", cpalmdoc_compress, METH_VARARGS, - "compress(bytestring) -> compressed bytestring\n\n" - "Palmdoc compress a byte string. " - }, - {NULL, NULL, 0, NULL} -}; - -#if PY_MAJOR_VERSION >= 3 -#define INITERROR return NULL -#define INITMODULE PyModule_Create(&cPalmdoc_module) -static struct PyModuleDef cPalmdoc_module = { - /* m_base */ PyModuleDef_HEAD_INIT, - /* m_name */ "cPalmdoc", - /* m_doc */ cPalmdoc_doc, - /* m_size */ -1, - /* m_methods */ cPalmdoc_methods, - /* m_slots */ 0, - /* m_traverse */ 0, - /* m_clear */ 0, - /* m_free */ 0, -}; -PyObject* PyInit_cPalmdoc(void) { -#else -#define INITERROR return -#define INITMODULE Py_InitModule3("cPalmdoc", cPalmdoc_methods, cPalmdoc_doc) -PyObject* initcPalmdoc(void) { -#endif - - PyObject *m; - m = INITMODULE; - if (m == NULL) { - INITERROR; - } - -#if PY_MAJOR_VERSION >= 3 - return m; -#endif -} diff --git a/ebook_converter/ebooks/compression/palmdoc.py b/ebook_converter/ebooks/compression/palmdoc.py index d15452e..87bf8f4 100644 --- a/ebook_converter/ebooks/compression/palmdoc.py +++ b/ebook_converter/ebooks/compression/palmdoc.py @@ -1,22 +1,46 @@ import io +import sys from struct import pack -from ebook_converter.ebooks.compression import cPalmdoc - - -__license__ = 'GPL v3' -__copyright__ = '2008, Kovid Goyal ' - def decompress_doc(data): - return cPalmdoc.decompress(data) + uncompressed = b'' + skip_next = 0 + + for idx, item in enumerate(data): + if skip_next: + skip_next -= 1 + continue + + if item in range(1, 9): + # copy amount of bytes as in item + skip_next = item + for amount in range(1, item + 1): + uncompressed += data[idx + amount].to_bytes(1, sys.byteorder) + + elif item < 128: + # direct ascii copy + uncompressed += item.to_bytes(1, sys.byteorder) + + elif item >= 192: + # merged space and ascii character + uncompressed += b' ' + (item ^ 128).to_bytes(1, sys.byteorder) + + else: + # compressed data, item contains how many characters should be + # repeated for the next one. + skip_next = 1 + item = (item << 8) + data[idx + 1] + character_index = (item & 0x3FFF) >> 3 + for _ in range((item & 7) + 3): + uncompressed += (uncompressed[len(uncompressed) - + character_index] + .to_bytes(1, sys.byteorder)) + + return uncompressed def compress_doc(data): - return cPalmdoc.compress(data) if data else b'' - - -def py_compress_doc(data): out = io.BytesIO() i = 0 ldata = len(data) @@ -65,24 +89,3 @@ def py_compress_doc(data): out.write(b''.join(binseq)) i += len(binseq) - 1 return out.getvalue() - - -def find_tests(): - import unittest - - class Test(unittest.TestCase): - - def test_palmdoc_compression(self): - for test in [ - b'abc\x03\x04\x05\x06ms', # Test binary writing - b'a b c \xfed ', # Test encoding of spaces - b'0123456789axyz2bxyz2cdfgfo9iuyerh', - b'0123456789asd0123456789asd|yyzzxxffhhjjkk', - (b'ciewacnaq eiu743 r787q 0w% ; sa fd\xef\ffdxosac wocjp acoiecowei ' - b'owaic jociowapjcivcjpoivjporeivjpoavca; p9aw8743y6r74%$^$^%8 ') - ]: - x = compress_doc(test) - self.assertEqual(py_compress_doc(test), x) - self.assertEqual(decompress_doc(x), test) - - return unittest.defaultTestLoader.loadTestsFromTestCase(Test) diff --git a/setup.py b/setup.py index 1c88d8c..056ba45 100644 --- a/setup.py +++ b/setup.py @@ -1,9 +1,4 @@ import setuptools -module = setuptools.Extension('ebook_converter.ebooks.compression.cPalmdoc', - sources=['ebook_converter/ebooks/compression/' - 'palmdoc.c'], - language='c') -setuptools.setup(ext_modules=[module]) -# setuptools.setup() +setuptools.setup()