blob: 920b898b2ccc2da8392021c099fc738d5142efe2 [file] [log] [blame]
Jim Mussared35339242023-06-26 13:52:10 +10001/*
2 * This file is part of the MicroPython project, http://micropython.org/
3 *
4 * The MIT License (MIT)
5 *
6 * Copyright (c) 2023 Jim Mussared
7 *
8 * Based on extmod/modzlib.c
9 * Copyright (c) 2014-2016 Paul Sokolovsky
10 * Copyright (c) 2021-2023 Damien P. George
11 *
12 * Permission is hereby granted, free of charge, to any person obtaining a copy
13 * of this software and associated documentation files (the "Software"), to deal
14 * in the Software without restriction, including without limitation the rights
15 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
16 * copies of the Software, and to permit persons to whom the Software is
17 * furnished to do so, subject to the following conditions:
18 *
19 * The above copyright notice and this permission notice shall be included in
20 * all copies or substantial portions of the Software.
21 *
22 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
23 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
24 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
25 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
26 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
27 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
28 * THE SOFTWARE.
29 */
30
31#include <stdio.h>
32#include <string.h>
33
34#include "py/runtime.h"
35#include "py/stream.h"
36#include "py/mperrno.h"
37
38#if MICROPY_PY_DEFLATE
39
40#include "lib/uzlib/uzlib.h"
41
42#if 0 // print debugging info
43#define DEBUG_printf DEBUG_printf
44#else // don't print debugging info
45#define DEBUG_printf(...) (void)0
46#endif
47
48typedef enum {
49 DEFLATEIO_FORMAT_MIN = 0,
50 DEFLATEIO_FORMAT_AUTO = DEFLATEIO_FORMAT_MIN, // Read mode this means auto-detect zlib/gzip, write mode this means RAW.
51 DEFLATEIO_FORMAT_RAW = 1,
52 DEFLATEIO_FORMAT_ZLIB = 2,
53 DEFLATEIO_FORMAT_GZIP = 3,
54 DEFLATEIO_FORMAT_MAX = DEFLATEIO_FORMAT_GZIP,
55} deflateio_format_t;
56
Jim Mussared32db4c52023-08-31 14:29:49 +100057// This is used when the wbits is unset in the DeflateIO constructor. Default
58// to the smallest window size (faster compression, less RAM usage, etc).
Jim Mussared35339242023-06-26 13:52:10 +100059const int DEFLATEIO_DEFAULT_WBITS = 8;
60
61typedef struct {
62 void *window;
63 uzlib_uncomp_t decomp;
64 bool eof;
65} mp_obj_deflateio_read_t;
66
67#if MICROPY_PY_DEFLATE_COMPRESS
68typedef struct {
69 void *window;
70 size_t input_len;
71 uint32_t input_checksum;
72 uzlib_lz77_state_t lz77;
73} mp_obj_deflateio_write_t;
74#endif
75
76typedef struct {
77 mp_obj_base_t base;
78 mp_obj_t stream;
79 uint8_t format : 2;
80 uint8_t window_bits : 4;
81 bool close : 1;
82 mp_obj_deflateio_read_t *read;
83 #if MICROPY_PY_DEFLATE_COMPRESS
84 mp_obj_deflateio_write_t *write;
85 #endif
86} mp_obj_deflateio_t;
87
Angus Grattondecf8e62024-02-27 15:32:29 +110088static int deflateio_read_stream(void *data) {
Jim Mussared35339242023-06-26 13:52:10 +100089 mp_obj_deflateio_t *self = data;
90 const mp_stream_p_t *stream = mp_get_stream(self->stream);
91 int err;
92 byte c;
93 mp_uint_t out_sz = stream->read(self->stream, &c, 1, &err);
94 if (out_sz == MP_STREAM_ERROR) {
95 mp_raise_OSError(err);
96 }
97 if (out_sz == 0) {
98 mp_raise_type(&mp_type_EOFError);
99 }
100 return c;
101}
102
Angus Grattondecf8e62024-02-27 15:32:29 +1100103static bool deflateio_init_read(mp_obj_deflateio_t *self) {
Jim Mussared35339242023-06-26 13:52:10 +1000104 if (self->read) {
105 return true;
106 }
107
108 mp_get_stream_raise(self->stream, MP_STREAM_OP_READ);
109
110 self->read = m_new_obj(mp_obj_deflateio_read_t);
111 memset(&self->read->decomp, 0, sizeof(self->read->decomp));
112 self->read->decomp.source_read_data = self;
113 self->read->decomp.source_read_cb = deflateio_read_stream;
114 self->read->eof = false;
115
116 // Don't modify self->window_bits as it may also be used for write.
117 int wbits = self->window_bits;
118
Jim Mussared32db4c52023-08-31 14:29:49 +1000119 if (self->format == DEFLATEIO_FORMAT_RAW) {
120 if (wbits == 0) {
121 // The docs recommends always setting wbits explicitly when using
122 // RAW, but we still allow a default.
123 wbits = DEFLATEIO_DEFAULT_WBITS;
124 }
125 } else {
126 // Parse the header if we're in NONE/ZLIB/GZIP modes.
127 int header_wbits;
Jim Mussared35339242023-06-26 13:52:10 +1000128 int header_type = uzlib_parse_zlib_gzip_header(&self->read->decomp, &header_wbits);
Jim Mussared32db4c52023-08-31 14:29:49 +1000129 if (header_type < 0) {
130 // Stream header was invalid.
Jim Mussared35339242023-06-26 13:52:10 +1000131 return false;
132 }
Jim Mussared32db4c52023-08-31 14:29:49 +1000133 if ((self->format == DEFLATEIO_FORMAT_ZLIB && header_type != UZLIB_HEADER_ZLIB) || (self->format == DEFLATEIO_FORMAT_GZIP && header_type != UZLIB_HEADER_GZIP)) {
134 // Not what we expected.
135 return false;
136 }
137 // header_wbits will either be 15 (gzip) or 8-15 (zlib).
138 if (wbits == 0 || header_wbits < wbits) {
139 // If the header specified something lower, then use that instead.
140 // No point doing a bigger allocation than we need to.
Jim Mussared35339242023-06-26 13:52:10 +1000141 wbits = header_wbits;
142 }
143 }
144
Yoctopuce61995b52024-05-13 12:11:31 +0200145 size_t window_len = (size_t)1 << wbits;
Jim Mussared35339242023-06-26 13:52:10 +1000146 self->read->window = m_new(uint8_t, window_len);
147
148 uzlib_uncompress_init(&self->read->decomp, self->read->window, window_len);
149
150 return true;
151}
152
153#if MICROPY_PY_DEFLATE_COMPRESS
Angus Grattondecf8e62024-02-27 15:32:29 +1100154static void deflateio_out_byte(void *data, uint8_t b) {
Jim Mussared35339242023-06-26 13:52:10 +1000155 mp_obj_deflateio_t *self = data;
156 const mp_stream_p_t *stream = mp_get_stream(self->stream);
157 int err;
158 mp_uint_t ret = stream->write(self->stream, &b, 1, &err);
159 if (ret == MP_STREAM_ERROR) {
160 mp_raise_OSError(err);
161 }
162}
163
Angus Grattondecf8e62024-02-27 15:32:29 +1100164static bool deflateio_init_write(mp_obj_deflateio_t *self) {
Jim Mussared35339242023-06-26 13:52:10 +1000165 if (self->write) {
166 return true;
167 }
168
169 const mp_stream_p_t *stream = mp_get_stream_raise(self->stream, MP_STREAM_OP_WRITE);
170
Jim Mussared35339242023-06-26 13:52:10 +1000171 int wbits = self->window_bits;
172 if (wbits == 0) {
Jim Mussared32db4c52023-08-31 14:29:49 +1000173 // Same default wbits for all formats.
Jim Mussared35339242023-06-26 13:52:10 +1000174 wbits = DEFLATEIO_DEFAULT_WBITS;
175 }
Damien Georgefa424872025-03-18 00:34:20 +1100176
177 // Allocate the large window before allocating the mp_obj_deflateio_write_t, in case the
178 // window allocation fails the mp_obj_deflateio_t object will remain in a consistent state.
Jim Mussared35339242023-06-26 13:52:10 +1000179 size_t window_len = 1 << wbits;
Damien Georgefa424872025-03-18 00:34:20 +1100180 uint8_t *window = m_new(uint8_t, window_len);
181
182 self->write = m_new_obj(mp_obj_deflateio_write_t);
183 self->write->window = window;
184 self->write->input_len = 0;
Jim Mussared35339242023-06-26 13:52:10 +1000185
186 uzlib_lz77_init(&self->write->lz77, self->write->window, window_len);
187 self->write->lz77.dest_write_data = self;
188 self->write->lz77.dest_write_cb = deflateio_out_byte;
189
190 // Write header if needed.
191 mp_uint_t ret = 0;
192 int err;
193 if (self->format == DEFLATEIO_FORMAT_ZLIB) {
194 // -----CMF------ ----------FLG---------------
195 // CINFO(5) CM(3) FLEVEL(2) FDICT(1) FCHECK(5)
196 uint8_t buf[] = { 0x08, 0x80 }; // CM=2 (deflate), FLEVEL=2 (default), FDICT=0 (no dictionary)
197 buf[0] |= MAX(wbits - 8, 1) << 4; // base-2 logarithm of the LZ77 window size, minus eight.
198 buf[1] |= 31 - ((buf[0] * 256 + buf[1]) % 31); // (CMF*256 + FLG) % 31 == 0.
199 ret = stream->write(self->stream, buf, sizeof(buf), &err);
200
201 self->write->input_checksum = 1; // ADLER32
202 } else if (self->format == DEFLATEIO_FORMAT_GZIP) {
203 // ID1(8) ID2(8) CM(8) ---FLG--- MTIME(32) XFL(8) OS(8)
204 // FLG: x x x FCOMMENT FNAME FEXTRA FHCRC FTEXT
205 uint8_t buf[] = { 0x1f, 0x8b, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x04, 0x03 }; // MTIME=0, XFL=4 (fastest), OS=3 (unix)
206 ret = stream->write(self->stream, buf, sizeof(buf), &err);
207
208 self->write->input_checksum = ~0; // CRC32
209 }
210 if (ret == MP_STREAM_ERROR) {
211 return false;
212 }
213
214 // Write starting block.
215 uzlib_start_block(&self->write->lz77);
216
217 return true;
218}
219#endif
220
Angus Grattondecf8e62024-02-27 15:32:29 +1100221static mp_obj_t deflateio_make_new(const mp_obj_type_t *type, size_t n_args, size_t n_kw, const mp_obj_t *args_in) {
Jim Mussared35339242023-06-26 13:52:10 +1000222 // args: stream, format=NONE, wbits=0, close=False
223 mp_arg_check_num(n_args, n_kw, 1, 4, false);
224
225 mp_int_t format = n_args > 1 ? mp_obj_get_int(args_in[1]) : DEFLATEIO_FORMAT_AUTO;
226 mp_int_t wbits = n_args > 2 ? mp_obj_get_int(args_in[2]) : 0;
227
228 if (format < DEFLATEIO_FORMAT_MIN || format > DEFLATEIO_FORMAT_MAX) {
229 mp_raise_ValueError(MP_ERROR_TEXT("format"));
230 }
231 if (wbits != 0 && (wbits < 5 || wbits > 15)) {
232 mp_raise_ValueError(MP_ERROR_TEXT("wbits"));
233 }
234
235 mp_obj_deflateio_t *self = mp_obj_malloc(mp_obj_deflateio_t, type);
236 self->stream = args_in[0];
237 self->format = format;
238 self->window_bits = wbits;
239 self->read = NULL;
240 #if MICROPY_PY_DEFLATE_COMPRESS
241 self->write = NULL;
242 #endif
243 self->close = n_args > 3 ? mp_obj_is_true(args_in[3]) : false;
244
245 return MP_OBJ_FROM_PTR(self);
246}
247
Angus Grattondecf8e62024-02-27 15:32:29 +1100248static mp_uint_t deflateio_read(mp_obj_t o_in, void *buf, mp_uint_t size, int *errcode) {
Jim Mussared35339242023-06-26 13:52:10 +1000249 mp_obj_deflateio_t *self = MP_OBJ_TO_PTR(o_in);
250
251 if (self->stream == MP_OBJ_NULL || !deflateio_init_read(self)) {
252 *errcode = MP_EINVAL;
253 return MP_STREAM_ERROR;
254 }
255
256 if (self->read->eof) {
257 return 0;
258 }
259
260 self->read->decomp.dest = buf;
261 self->read->decomp.dest_limit = (uint8_t *)buf + size;
262 int st = uzlib_uncompress_chksum(&self->read->decomp);
263 if (st == UZLIB_DONE) {
264 self->read->eof = true;
265 }
266 if (st < 0) {
267 DEBUG_printf("uncompress error=" INT_FMT "\n", st);
268 *errcode = MP_EINVAL;
269 return MP_STREAM_ERROR;
270 }
271 return self->read->decomp.dest - (uint8_t *)buf;
272}
273
274#if MICROPY_PY_DEFLATE_COMPRESS
Angus Grattondecf8e62024-02-27 15:32:29 +1100275static mp_uint_t deflateio_write(mp_obj_t self_in, const void *buf, mp_uint_t size, int *errcode) {
Jim Mussared35339242023-06-26 13:52:10 +1000276 mp_obj_deflateio_t *self = MP_OBJ_TO_PTR(self_in);
277
278 if (self->stream == MP_OBJ_NULL || !deflateio_init_write(self)) {
279 *errcode = MP_EINVAL;
280 return MP_STREAM_ERROR;
281 }
282
283 self->write->input_len += size;
284 if (self->format == DEFLATEIO_FORMAT_ZLIB) {
285 self->write->input_checksum = uzlib_adler32(buf, size, self->write->input_checksum);
286 } else if (self->format == DEFLATEIO_FORMAT_GZIP) {
287 self->write->input_checksum = uzlib_crc32(buf, size, self->write->input_checksum);
288 }
289
290 uzlib_lz77_compress(&self->write->lz77, buf, size);
291 return size;
292}
293
294static inline void put_le32(char *buf, uint32_t value) {
295 buf[0] = value & 0xff;
296 buf[1] = value >> 8 & 0xff;
297 buf[2] = value >> 16 & 0xff;
298 buf[3] = value >> 24 & 0xff;
299}
300
301static inline void put_be32(char *buf, uint32_t value) {
302 buf[3] = value & 0xff;
303 buf[2] = value >> 8 & 0xff;
304 buf[1] = value >> 16 & 0xff;
305 buf[0] = value >> 24 & 0xff;
306}
307#endif
308
Angus Grattondecf8e62024-02-27 15:32:29 +1100309static mp_uint_t deflateio_ioctl(mp_obj_t self_in, mp_uint_t request, uintptr_t arg, int *errcode) {
Jim Mussared35339242023-06-26 13:52:10 +1000310 if (request == MP_STREAM_CLOSE) {
311 mp_obj_deflateio_t *self = MP_OBJ_TO_PTR(self_in);
312
313 mp_uint_t ret = 0;
314
315 if (self->stream != MP_OBJ_NULL) {
316 #if MICROPY_PY_DEFLATE_COMPRESS
317 if (self->write) {
318 uzlib_finish_block(&self->write->lz77);
319
320 const mp_stream_p_t *stream = mp_get_stream(self->stream);
321
322 // Write footer if needed.
323 if (self->format == DEFLATEIO_FORMAT_ZLIB || self->format == DEFLATEIO_FORMAT_GZIP) {
324 char footer[8];
325 size_t footer_len;
326 if (self->format == DEFLATEIO_FORMAT_ZLIB) {
327 put_be32(&footer[0], self->write->input_checksum);
328 footer_len = 4;
329 } else { // DEFLATEIO_FORMAT_GZIP
330 put_le32(&footer[0], ~self->write->input_checksum);
331 put_le32(&footer[4], self->write->input_len);
332 footer_len = 8;
333 }
334 if (stream->write(self->stream, footer, footer_len, errcode) == MP_STREAM_ERROR) {
335 ret = MP_STREAM_ERROR;
336 }
337 }
338 }
339 #endif
340
341 // Only close the stream if required. e.g. when using io.BytesIO
342 // it needs to stay open so that getvalue() can be called.
343 if (self->close) {
344 mp_stream_close(self->stream);
345 }
346
347 // Either way, free the reference to the stream.
348 self->stream = MP_OBJ_NULL;
349 }
350
351 return ret;
352 } else {
353 *errcode = MP_EINVAL;
354 return MP_STREAM_ERROR;
355 }
356}
357
Angus Grattondecf8e62024-02-27 15:32:29 +1100358static const mp_stream_p_t deflateio_stream_p = {
Jim Mussared35339242023-06-26 13:52:10 +1000359 .read = deflateio_read,
360 #if MICROPY_PY_DEFLATE_COMPRESS
361 .write = deflateio_write,
362 #endif
363 .ioctl = deflateio_ioctl,
364};
365
366#if !MICROPY_ENABLE_DYNRUNTIME
Angus Grattondecf8e62024-02-27 15:32:29 +1100367static const mp_rom_map_elem_t deflateio_locals_dict_table[] = {
Jim Mussared35339242023-06-26 13:52:10 +1000368 { MP_ROM_QSTR(MP_QSTR_read), MP_ROM_PTR(&mp_stream_read_obj) },
369 { MP_ROM_QSTR(MP_QSTR_readinto), MP_ROM_PTR(&mp_stream_readinto_obj) },
370 { MP_ROM_QSTR(MP_QSTR_readline), MP_ROM_PTR(&mp_stream_unbuffered_readline_obj) },
371 #if MICROPY_PY_DEFLATE_COMPRESS
372 { MP_ROM_QSTR(MP_QSTR_write), MP_ROM_PTR(&mp_stream_write_obj) },
373 #endif
374 { MP_ROM_QSTR(MP_QSTR_close), MP_ROM_PTR(&mp_stream_close_obj) },
375 { MP_ROM_QSTR(MP_QSTR___enter__), MP_ROM_PTR(&mp_identity_obj) },
376 { MP_ROM_QSTR(MP_QSTR___exit__), MP_ROM_PTR(&mp_stream___exit___obj) },
377};
Angus Grattondecf8e62024-02-27 15:32:29 +1100378static MP_DEFINE_CONST_DICT(deflateio_locals_dict, deflateio_locals_dict_table);
Jim Mussared35339242023-06-26 13:52:10 +1000379
Angus Grattondecf8e62024-02-27 15:32:29 +1100380static MP_DEFINE_CONST_OBJ_TYPE(
Jim Mussared35339242023-06-26 13:52:10 +1000381 deflateio_type,
382 MP_QSTR_DeflateIO,
383 MP_TYPE_FLAG_NONE,
384 make_new, deflateio_make_new,
385 protocol, &deflateio_stream_p,
386 locals_dict, &deflateio_locals_dict
387 );
388
Angus Grattondecf8e62024-02-27 15:32:29 +1100389static const mp_rom_map_elem_t mp_module_deflate_globals_table[] = {
Jim Mussared35339242023-06-26 13:52:10 +1000390 { MP_ROM_QSTR(MP_QSTR___name__), MP_ROM_QSTR(MP_QSTR_deflate) },
391 { MP_ROM_QSTR(MP_QSTR_DeflateIO), MP_ROM_PTR(&deflateio_type) },
392 { MP_ROM_QSTR(MP_QSTR_AUTO), MP_ROM_INT(DEFLATEIO_FORMAT_AUTO) },
393 { MP_ROM_QSTR(MP_QSTR_RAW), MP_ROM_INT(DEFLATEIO_FORMAT_RAW) },
394 { MP_ROM_QSTR(MP_QSTR_ZLIB), MP_ROM_INT(DEFLATEIO_FORMAT_ZLIB) },
395 { MP_ROM_QSTR(MP_QSTR_GZIP), MP_ROM_INT(DEFLATEIO_FORMAT_GZIP) },
396};
Angus Grattondecf8e62024-02-27 15:32:29 +1100397static MP_DEFINE_CONST_DICT(mp_module_deflate_globals, mp_module_deflate_globals_table);
Jim Mussared35339242023-06-26 13:52:10 +1000398
399const mp_obj_module_t mp_module_deflate = {
400 .base = { &mp_type_module },
401 .globals = (mp_obj_dict_t *)&mp_module_deflate_globals,
402};
403
404MP_REGISTER_MODULE(MP_QSTR_deflate, mp_module_deflate);
405#endif // !MICROPY_ENABLE_DYNRUNTIME
406
407// Source files #include'd here to make sure they're compiled in
408// only if the module is enabled.
409
410#include "lib/uzlib/tinflate.c"
411#include "lib/uzlib/header.c"
412#include "lib/uzlib/adler32.c"
413#include "lib/uzlib/crc32.c"
414
415#if MICROPY_PY_DEFLATE_COMPRESS
416#include "lib/uzlib/lz77.c"
417#endif
418
419#endif // MICROPY_PY_DEFLATE