blob: fd79b56f6cd2d4aeaa71013eb0622bd26022e42c [file] [log] [blame]
Damien George0699c6b2016-01-31 21:45:22 +00001#!/usr/bin/env python3
2#
3# This file is part of the MicroPython project, http://micropython.org/
4#
5# The MIT License (MIT)
6#
7# Copyright (c) 2016 Damien P. George
8#
9# Permission is hereby granted, free of charge, to any person obtaining a copy
10# of this software and associated documentation files (the "Software"), to deal
11# in the Software without restriction, including without limitation the rights
12# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
13# copies of the Software, and to permit persons to whom the Software is
14# furnished to do so, subject to the following conditions:
15#
16# The above copyright notice and this permission notice shall be included in
17# all copies or substantial portions of the Software.
18#
19# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
20# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
21# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
22# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
23# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
24# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
25# THE SOFTWARE.
26
27import sys
28from collections import namedtuple
29
30sys.path.append('../py')
31import makeqstrdata as qstrutil
32
33class FreezeError(Exception):
34 def __init__(self, rawcode, msg):
35 self.rawcode = rawcode
36 self.msg = msg
37
38 def __str__(self):
39 return 'error while freezing %s: %s' % (self.rawcode.source_file, self.msg)
40
41class Config:
42 MICROPY_LONGINT_IMPL_NONE = 0
43 MICROPY_LONGINT_IMPL_LONGLONG = 1
44 MICROPY_LONGINT_IMPL_MPZ = 2
45config = Config()
46
47MP_OPCODE_BYTE = 0
48MP_OPCODE_QSTR = 1
49MP_OPCODE_VAR_UINT = 2
50MP_OPCODE_OFFSET = 3
51
52# extra bytes:
53MP_BC_MAKE_CLOSURE = 0x62
54MP_BC_MAKE_CLOSURE_DEFARGS = 0x63
55MP_BC_RAISE_VARARGS = 0x5c
56# extra byte if caching enabled:
57MP_BC_LOAD_NAME = 0x1c
58MP_BC_LOAD_GLOBAL = 0x1d
59MP_BC_LOAD_ATTR = 0x1e
60MP_BC_STORE_ATTR = 0x26
61
62def make_opcode_format():
63 def OC4(a, b, c, d):
64 return a | (b << 2) | (c << 4) | (d << 6)
65 U = 0
66 B = 0
67 Q = 1
68 V = 2
69 O = 3
70 return bytes((
71 # this table is taken verbatim from py/bc.c
72 OC4(U, U, U, U), # 0x00-0x03
73 OC4(U, U, U, U), # 0x04-0x07
74 OC4(U, U, U, U), # 0x08-0x0b
75 OC4(U, U, U, U), # 0x0c-0x0f
76 OC4(B, B, B, U), # 0x10-0x13
77 OC4(V, U, Q, V), # 0x14-0x17
78 OC4(B, U, V, V), # 0x18-0x1b
79 OC4(Q, Q, Q, Q), # 0x1c-0x1f
80 OC4(B, B, V, V), # 0x20-0x23
81 OC4(Q, Q, Q, B), # 0x24-0x27
82 OC4(V, V, Q, Q), # 0x28-0x2b
83 OC4(U, U, U, U), # 0x2c-0x2f
84 OC4(B, B, B, B), # 0x30-0x33
85 OC4(B, O, O, O), # 0x34-0x37
86 OC4(O, O, U, U), # 0x38-0x3b
87 OC4(U, O, B, O), # 0x3c-0x3f
88 OC4(O, B, B, O), # 0x40-0x43
89 OC4(B, B, O, U), # 0x44-0x47
90 OC4(U, U, U, U), # 0x48-0x4b
91 OC4(U, U, U, U), # 0x4c-0x4f
92 OC4(V, V, V, V), # 0x50-0x53
93 OC4(B, V, V, V), # 0x54-0x57
94 OC4(V, V, V, B), # 0x58-0x5b
95 OC4(B, B, B, U), # 0x5c-0x5f
96 OC4(V, V, V, V), # 0x60-0x63
97 OC4(V, V, V, V), # 0x64-0x67
98 OC4(Q, Q, B, U), # 0x68-0x6b
99 OC4(U, U, U, U), # 0x6c-0x6f
100
101 OC4(B, B, B, B), # 0x70-0x73
102 OC4(B, B, B, B), # 0x74-0x77
103 OC4(B, B, B, B), # 0x78-0x7b
104 OC4(B, B, B, B), # 0x7c-0x7f
105 OC4(B, B, B, B), # 0x80-0x83
106 OC4(B, B, B, B), # 0x84-0x87
107 OC4(B, B, B, B), # 0x88-0x8b
108 OC4(B, B, B, B), # 0x8c-0x8f
109 OC4(B, B, B, B), # 0x90-0x93
110 OC4(B, B, B, B), # 0x94-0x97
111 OC4(B, B, B, B), # 0x98-0x9b
112 OC4(B, B, B, B), # 0x9c-0x9f
113 OC4(B, B, B, B), # 0xa0-0xa3
114 OC4(B, B, B, B), # 0xa4-0xa7
115 OC4(B, B, B, B), # 0xa8-0xab
116 OC4(B, B, B, B), # 0xac-0xaf
117
118 OC4(B, B, B, B), # 0xb0-0xb3
119 OC4(B, B, B, B), # 0xb4-0xb7
120 OC4(B, B, B, B), # 0xb8-0xbb
121 OC4(B, B, B, B), # 0xbc-0xbf
122
123 OC4(B, B, B, B), # 0xc0-0xc3
124 OC4(B, B, B, B), # 0xc4-0xc7
125 OC4(B, B, B, B), # 0xc8-0xcb
126 OC4(B, B, B, B), # 0xcc-0xcf
127
128 OC4(B, B, B, B), # 0xd0-0xd3
129 OC4(B, B, B, B), # 0xd4-0xd7
130 OC4(B, B, B, B), # 0xd8-0xdb
131 OC4(B, B, B, B), # 0xdc-0xdf
132
133 OC4(B, B, B, B), # 0xe0-0xe3
134 OC4(B, B, B, B), # 0xe4-0xe7
135 OC4(B, B, B, B), # 0xe8-0xeb
136 OC4(B, B, B, B), # 0xec-0xef
137
138 OC4(B, B, B, B), # 0xf0-0xf3
139 OC4(B, B, B, B), # 0xf4-0xf7
140 OC4(B, B, B, U), # 0xf8-0xfb
141 OC4(U, U, U, U), # 0xfc-0xff
142 ))
143
144# this function mirrors that in py/bc.c
145def mp_opcode_format(bytecode, ip, opcode_format=make_opcode_format()):
146 opcode = bytecode[ip]
147 ip_start = ip
148 f = (opcode_format[opcode >> 2] >> (2 * (opcode & 3))) & 3
149 if f == MP_OPCODE_QSTR:
150 ip += 3
151 else:
152 extra_byte = (
153 opcode == MP_BC_RAISE_VARARGS
154 or opcode == MP_BC_MAKE_CLOSURE
155 or opcode == MP_BC_MAKE_CLOSURE_DEFARGS
156 or config.MICROPY_OPT_CACHE_MAP_LOOKUP_IN_BYTECODE and (
157 opcode == MP_BC_LOAD_NAME
158 or opcode == MP_BC_LOAD_GLOBAL
159 or opcode == MP_BC_LOAD_ATTR
160 or opcode == MP_BC_STORE_ATTR
161 )
162 )
163 ip += 1
164 if f == MP_OPCODE_VAR_UINT:
165 while bytecode[ip] & 0x80 != 0:
166 ip += 1
167 ip += 1
168 elif f == MP_OPCODE_OFFSET:
169 ip += 2
170 ip += extra_byte
171 return f, ip - ip_start
172
173def decode_uint(bytecode, ip):
174 unum = 0
175 while True:
176 val = bytecode[ip]
177 ip += 1
178 unum = (unum << 7) | (val & 0x7f)
179 if not (val & 0x80):
180 break
181 return ip, unum
182
183def extract_prelude(bytecode):
184 ip = 0
185 ip, n_state = decode_uint(bytecode, ip)
186 ip, n_exc_stack = decode_uint(bytecode, ip)
187 scope_flags = bytecode[ip]; ip += 1
188 n_pos_args = bytecode[ip]; ip += 1
189 n_kwonly_args = bytecode[ip]; ip += 1
190 n_def_pos_args = bytecode[ip]; ip += 1
191 ip2, code_info_size = decode_uint(bytecode, ip)
192 ip += code_info_size
193 while bytecode[ip] != 0xff:
194 ip += 1
195 ip += 1
196 # ip now points to first opcode
197 # ip2 points to simple_name qstr
198 return ip, ip2, (n_state, n_exc_stack, scope_flags, n_pos_args, n_kwonly_args, n_def_pos_args, code_info_size)
199
200class RawCode:
201 def __init__(self, bytecode, qstrs, objs, raw_codes):
202 # set core variables
203 self.bytecode = bytecode
204 self.qstrs = qstrs
205 self.objs = objs
206 self.raw_codes = raw_codes
207
208 # extract prelude
209 self.ip, self.ip2, self.prelude = extract_prelude(self.bytecode)
210 self.simple_name = self._unpack_qstr(self.ip2)
211 self.source_file = self._unpack_qstr(self.ip2 + 2)
212
213 def _unpack_qstr(self, ip):
214 qst = self.bytecode[ip] | self.bytecode[ip + 1] << 8
215 return global_qstrs[qst]
216
217 def dump(self):
218 # dump children first
219 for rc in self.raw_codes:
220 rc.freeze()
221 # TODO
222
223 def freeze(self, parent_name):
224 self.escaped_name = parent_name + self.simple_name.qstr_esc
225
226 # emit children first
227 for rc in self.raw_codes:
228 rc.freeze(self.escaped_name + '_')
229
230 # generate bytecode data
231 print()
232 print('// frozen bytecode for file %s, scope %s%s' % (self.source_file.str, parent_name, self.simple_name.str))
233 print('STATIC const byte bytecode_data_%s[%u] = {' % (self.escaped_name, len(self.bytecode)))
234 print(' ', end='')
235 for i in range(self.ip2):
236 print(' 0x%02x,' % self.bytecode[i], end='')
237 print()
238 print(' ', self.simple_name.qstr_id, '& 0xff,', self.simple_name.qstr_id, '>> 8,')
239 print(' ', self.source_file.qstr_id, '& 0xff,', self.source_file.qstr_id, '>> 8,')
240 print(' ', end='')
241 for i in range(self.ip2 + 4, self.ip):
242 print(' 0x%02x,' % self.bytecode[i], end='')
243 print()
244 ip = self.ip
245 while ip < len(self.bytecode):
246 f, sz = mp_opcode_format(self.bytecode, ip)
247 if f == 1:
248 qst = self._unpack_qstr(ip + 1).qstr_id
249 print(' ', '0x%02x,' % self.bytecode[ip], qst, '& 0xff,', qst, '>> 8,')
250 else:
251 print(' ', ''.join('0x%02x, ' % self.bytecode[ip + i] for i in range(sz)))
252 ip += sz
253 print('};')
254
255 # generate constant objects
256 for i, obj in enumerate(self.objs):
257 obj_name = 'const_obj_%s_%u' % (self.escaped_name, i)
258 if type(obj) is str:
259 obj = bytes(obj, 'utf8')
260 print('STATIC const mp_obj_str_t %s = '
261 '{{&mp_type_str}, 0, %u, (const byte*)"%s"};'
262 % (obj_name, len(obj), ''.join(('\\x%02x' % b) for b in obj)))
263 elif type(obj) is bytes:
264 print('STATIC const mp_obj_str_t %s = '
265 '{{&mp_type_bytes}, 0, %u, (const byte*)"%s"};'
266 % (obj_name, len(obj), ''.join(('\\x%02x' % b) for b in obj)))
267 elif type(obj) is int:
268 if config.MICROPY_LONGINT_IMPL == config.MICROPY_LONGINT_IMPL_NONE:
269 # TODO check if we can actually fit this long-int into a small-int
270 raise FreezeError(self, 'target does not support long int')
271 elif config.MICROPY_LONGINT_IMPL == config.MICROPY_LONGINT_IMPL_LONGLONG:
272 # TODO
273 raise FreezeError(self, 'freezing int to long-long is not implemented')
274 elif config.MICROPY_LONGINT_IMPL == config.MICROPY_LONGINT_IMPL_MPZ:
275 neg = 0
276 if obj < 0:
277 obj = -obj
278 neg = 1
279 bits_per_dig = config.MPZ_DIG_SIZE
280 digs = []
281 z = obj
282 while z:
283 digs.append(z & ((1 << bits_per_dig) - 1))
284 z >>= bits_per_dig
285 ndigs = len(digs)
286 digs = ','.join(('%#x' % d) for d in digs)
287 print('STATIC const mp_obj_int_t %s = {{&mp_type_int}, '
288 '{.neg=%u, .fixed_dig=1, .alloc=%u, .len=%u, .dig=(uint%u_t[]){%s}}};'
289 % (obj_name, neg, ndigs, ndigs, bits_per_dig, digs))
290 elif type(obj) is float:
291 # works for REPR A and B only
292 print('STATIC const mp_obj_float_t %s = {{&mp_type_float}, %.16g};'
293 % (obj_name, obj))
294 else:
295 # TODO
296 raise FreezeError(self, 'freezing of object %r is not implemented' % (obj,))
297
298 # generate constant table
299 print('STATIC const mp_uint_t const_table_data_%s[%u] = {'
300 % (self.escaped_name, len(self.qstrs) + len(self.objs) + len(self.raw_codes)))
301 for qst in self.qstrs:
302 print(' (mp_uint_t)MP_OBJ_NEW_QSTR(%s),' % global_qstrs[qst].qstr_id)
303 for i in range(len(self.objs)):
304 print(' (mp_uint_t)&const_obj_%s_%u,' % (self.escaped_name, i))
305 for rc in self.raw_codes:
306 print(' (mp_uint_t)&raw_code_%s,' % rc.escaped_name)
307 print('};')
308
309 # generate module
310 if self.simple_name.str != '<module>':
311 print('STATIC ', end='')
312 print('const mp_raw_code_t raw_code_%s = {' % self.escaped_name)
313 print(' .kind = MP_CODE_BYTECODE,')
314 print(' .scope_flags = 0x%02x,' % self.prelude[2])
315 print(' .n_pos_args = %u,' % self.prelude[3])
316 print(' .data.u_byte = {')
317 print(' .bytecode = bytecode_data_%s,' % self.escaped_name)
318 print(' .const_table = const_table_data_%s,' % self.escaped_name)
319 print(' #if MICROPY_PERSISTENT_CODE_SAVE')
320 print(' .bc_len = %u,' % len(self.bytecode))
321 print(' .n_obj = %u,' % len(self.objs))
322 print(' .n_raw_code = %u,' % len(self.raw_codes))
323 print(' #endif')
324 print(' },')
325 print('};')
326
327def read_uint(f):
328 i = 0
329 while True:
330 b = f.read(1)[0]
331 i = (i << 7) | (b & 0x7f)
332 if b & 0x80 == 0:
333 break
334 return i
335
336global_qstrs = []
337qstr_type = namedtuple('qstr', ('str', 'qstr_esc', 'qstr_id'))
338def read_qstr(f):
339 ln = read_uint(f)
340 data = str(f.read(ln), 'utf8')
341 qstr_esc = qstrutil.qstr_escape(data)
342 global_qstrs.append(qstr_type(data, qstr_esc, 'MP_QSTR_' + qstr_esc))
343 return len(global_qstrs) - 1
344
345def read_obj(f):
346 obj_type = f.read(1)
347 if obj_type == b'e':
348 return Ellipsis
349 else:
350 buf = f.read(read_uint(f))
351 if obj_type == b's':
352 return str(buf, 'utf8')
353 elif obj_type == b'b':
354 return buf
355 elif obj_type == b'i':
356 return int(str(buf, 'ascii'), 10)
357 elif obj_type == b'f':
358 return float(str(buf, 'ascii'))
359 elif obj_type == b'c':
360 return complex(str(buf, 'ascii'))
361 else:
362 assert 0
363
364def read_qstr_and_pack(f, bytecode, ip):
365 qst = read_qstr(f)
366 bytecode[ip] = qst & 0xff
367 bytecode[ip + 1] = qst >> 8
368
369def read_bytecode_qstrs(file, bytecode, ip):
370 while ip < len(bytecode):
371 f, sz = mp_opcode_format(bytecode, ip)
372 if f == 1:
373 read_qstr_and_pack(file, bytecode, ip + 1)
374 ip += sz
375
376def read_raw_code(f):
377 bc_len = read_uint(f)
378 bytecode = bytearray(f.read(bc_len))
379 ip, ip2, prelude = extract_prelude(bytecode)
380 read_qstr_and_pack(f, bytecode, ip2) # simple_name
381 read_qstr_and_pack(f, bytecode, ip2 + 2) # source_file
382 read_bytecode_qstrs(f, bytecode, ip)
383 n_obj = read_uint(f)
384 n_raw_code = read_uint(f)
385 qstrs = [read_qstr(f) for _ in range(prelude[3] + prelude[4])]
386 objs = [read_obj(f) for _ in range(n_obj)]
387 raw_codes = [read_raw_code(f) for _ in range(n_raw_code)]
388 return RawCode(bytecode, qstrs, objs, raw_codes)
389
390def read_mpy(filename):
391 with open(filename, 'rb') as f:
392 header = f.read(4)
393 if header[0] != ord('M'):
394 raise Exception('not a valid .mpy file')
395 if header[1] != 0:
396 raise Exception('incompatible version')
397 feature_flags = header[2]
398 config.MICROPY_OPT_CACHE_MAP_LOOKUP_IN_BYTECODE = (feature_flags & 1) != 0
399 config.MICROPY_PY_BUILTINS_STR_UNICODE = (feature_flags & 2) != 0
400 config.mp_small_int_bits = header[3]
401 return read_raw_code(f)
402
403def dump_mpy(raw_codes):
404 for rc in raw_codes:
405 rc.dump()
406
407def freeze_mpy(qcfgs, base_qstrs, raw_codes):
408 cfg_bytes_len = int(qcfgs['BYTES_IN_LEN'])
409 cfg_bytes_hash = int(qcfgs['BYTES_IN_HASH'])
410
411 # add to qstrs
412 new = {}
413 for q in global_qstrs:
414 # don't add duplicates
415 if q.qstr_esc in base_qstrs or q.qstr_esc in new:
416 continue
417 new[q.qstr_esc] = (len(new), q.qstr_esc, q.str)
418 new = sorted(new.values(), key=lambda x: x[0])
419
420 print('#include "py/mpconfig.h"')
421 print('#include "py/objint.h"')
422 print('#include "py/objstr.h"')
423 print('#include "py/emitglue.h"')
424 print()
425
426 print('#if MICROPY_PY_BUILTINS_FLOAT')
427 print('typedef struct _mp_obj_float_t {')
428 print(' mp_obj_base_t base;')
429 print(' mp_float_t value;')
430 print('} mp_obj_float_t;')
431 print('#endif')
432 print()
433
434 print('enum {')
435 for i in range(len(new)):
436 if i == 0:
437 print(' MP_QSTR_%s = MP_QSTRnumber_of,' % new[i][1])
438 else:
439 print(' MP_QSTR_%s,' % new[i][1])
440 print('};')
441
442 print()
443 print('extern const qstr_pool_t mp_qstr_const_pool;');
444 print('const qstr_pool_t mp_qstr_frozen_const_pool = {')
445 print(' (qstr_pool_t*)&mp_qstr_const_pool, // previous pool')
446 print(' MP_QSTRnumber_of, // previous pool size')
447 print(' %u, // allocated entries' % len(new))
448 print(' %u, // used entries' % len(new))
449 print(' {')
450 for _, _, qstr in new:
451 print(' %s,' % qstrutil.make_bytes(cfg_bytes_len, cfg_bytes_hash, qstr))
452 print(' },')
453 print('};')
454
455 for rc in raw_codes:
456 rc.freeze(rc.source_file.str.replace('/', '_')[:-3] + '_')
457
458 print()
459 print('const char mp_frozen_mpy_names[] = {')
460 for rc in raw_codes:
461 module_name = rc.source_file.str[:-len(".py")]
462 slash = module_name.rfind('/')
463 if slash != -1:
464 module_name = module_name[slash + 1:]
465 print('"%s\\0"' % module_name)
466 print('"\\0"};')
467
468 print('const mp_raw_code_t *const mp_frozen_mpy_content[] = {')
469 for rc in raw_codes:
470 print(' &raw_code_%s,' % rc.escaped_name)
471 print('};')
472
473def main():
474 import argparse
475 cmd_parser = argparse.ArgumentParser(description='A tool to work with MicroPython .mpy files.')
476 cmd_parser.add_argument('-d', '--dump', action='store_true',
477 help='dump contents of files')
478 cmd_parser.add_argument('-f', '--freeze', action='store_true',
479 help='freeze files')
480 cmd_parser.add_argument('-q', '--qstr-header',
481 help='qstr header file to freeze against')
482 cmd_parser.add_argument('-mlongint-impl', choices=['none', 'longlong', 'mpz'], default='mpz',
483 help='long-int implementation used by target (default mpz)')
484 cmd_parser.add_argument('-mmpz-dig-size', metavar='N', type=int, default=16,
485 help='mpz digit size used by target (default 16)')
486 cmd_parser.add_argument('files', nargs='+',
487 help='input .mpy files')
488 args = cmd_parser.parse_args()
489
490 # set config values relevant to target machine
491 config.MICROPY_LONGINT_IMPL = {
492 'none':config.MICROPY_LONGINT_IMPL_NONE,
493 'longlong':config.MICROPY_LONGINT_IMPL_LONGLONG,
494 'mpz':config.MICROPY_LONGINT_IMPL_MPZ,
495 }[args.mlongint_impl]
496 config.MPZ_DIG_SIZE = args.mmpz_dig_size
497
498 if args.qstr_header:
499 qcfgs, base_qstrs = qstrutil.parse_input_headers([args.qstr_header])
500 else:
501 qcfgs, base_qstrs = {'BYTES_IN_LEN':1, 'BYTES_IN_HASH':1}, {}
502
503 raw_codes = [read_mpy(file) for file in args.files]
504
505 if args.dump:
506 dump_mpy(raw_codes)
507 elif args.freeze:
508 try:
509 freeze_mpy(qcfgs, base_qstrs, raw_codes)
510 except FreezeError as er:
511 print(er, file=sys.stderr)
512 sys.exit(1)
513
514if __name__ == '__main__':
515 main()