blob: f4a8ef6420f20a08cf603503514c5537e1a20d9b [file] [log] [blame]
Damien George0699c6b2016-01-31 21:45:22 +00001#!/usr/bin/env python3
2#
3# This file is part of the MicroPython project, http://micropython.org/
4#
5# The MIT License (MIT)
6#
Damien Georgefaf3d3e2019-06-04 22:13:32 +10007# Copyright (c) 2016-2019 Damien P. George
Damien George0699c6b2016-01-31 21:45:22 +00008#
9# Permission is hereby granted, free of charge, to any person obtaining a copy
10# of this software and associated documentation files (the "Software"), to deal
11# in the Software without restriction, including without limitation the rights
12# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
13# copies of the Software, and to permit persons to whom the Software is
14# furnished to do so, subject to the following conditions:
15#
16# The above copyright notice and this permission notice shall be included in
17# all copies or substantial portions of the Software.
18#
19# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
20# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
21# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
22# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
23# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
24# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
25# THE SOFTWARE.
26
Damien Georgec3beb162016-04-15 11:56:10 +010027# Python 2/3 compatibility code
28from __future__ import print_function
29import platform
Damien George69661f32020-02-27 15:36:53 +110030
31if platform.python_version_tuple()[0] == "2":
Damien Georgef2040bf2021-10-22 22:22:47 +110032 from binascii import hexlify as hexlify_py2
33
34 str_cons = lambda val, enc=None: str(val)
Damien Georgec3beb162016-04-15 11:56:10 +010035 bytes_cons = lambda val, enc=None: bytearray(val)
36 is_str_type = lambda o: type(o) is str
37 is_bytes_type = lambda o: type(o) is bytearray
38 is_int_type = lambda o: type(o) is int or type(o) is long
Damien Georgef2040bf2021-10-22 22:22:47 +110039
40 def hexlify_to_str(b):
41 x = hexlify_py2(b)
42 return ":".join(x[i : i + 2] for i in range(0, len(x), 2))
43
Damien Georgec3beb162016-04-15 11:56:10 +010044else:
Damien Georgef2040bf2021-10-22 22:22:47 +110045 from binascii import hexlify
46
Damien Georgec3beb162016-04-15 11:56:10 +010047 str_cons = str
48 bytes_cons = bytes
49 is_str_type = lambda o: type(o) is str
50 is_bytes_type = lambda o: type(o) is bytes
51 is_int_type = lambda o: type(o) is int
Damien Georgef2040bf2021-10-22 22:22:47 +110052
53 def hexlify_to_str(b):
54 return str(hexlify(b, ":"), "ascii")
55
56
Damien Georgec3beb162016-04-15 11:56:10 +010057# end compatibility code
58
Damien George0699c6b2016-01-31 21:45:22 +000059import sys
Damien George72ae3c72016-08-10 13:26:11 +100060import struct
Damien George0699c6b2016-01-31 21:45:22 +000061
Damien George69661f32020-02-27 15:36:53 +110062sys.path.append(sys.path[0] + "/../py")
Damien George0699c6b2016-01-31 21:45:22 +000063import makeqstrdata as qstrutil
64
Damien George69661f32020-02-27 15:36:53 +110065
Damien Georgef2040bf2021-10-22 22:22:47 +110066class MPYReadError(Exception):
67 def __init__(self, filename, msg):
68 self.filename = filename
69 self.msg = msg
70
71 def __str__(self):
72 return "%s: %s" % (self.filename, self.msg)
73
74
Damien George0699c6b2016-01-31 21:45:22 +000075class FreezeError(Exception):
76 def __init__(self, rawcode, msg):
77 self.rawcode = rawcode
78 self.msg = msg
79
80 def __str__(self):
Damien George69661f32020-02-27 15:36:53 +110081 return "error while freezing %s: %s" % (self.rawcode.source_file, self.msg)
82
Damien George0699c6b2016-01-31 21:45:22 +000083
84class Config:
Damien Georgef2040bf2021-10-22 22:22:47 +110085 MPY_VERSION = 6
Damien George0699c6b2016-01-31 21:45:22 +000086 MICROPY_LONGINT_IMPL_NONE = 0
87 MICROPY_LONGINT_IMPL_LONGLONG = 1
88 MICROPY_LONGINT_IMPL_MPZ = 2
Damien George69661f32020-02-27 15:36:53 +110089
90
Damien George0699c6b2016-01-31 21:45:22 +000091config = Config()
92
Damien George69661f32020-02-27 15:36:53 +110093
Damien Georgeea3c80a2019-02-21 15:18:59 +110094MP_CODE_BYTECODE = 2
95MP_CODE_NATIVE_PY = 3
96MP_CODE_NATIVE_VIPER = 4
97MP_CODE_NATIVE_ASM = 5
98
99MP_NATIVE_ARCH_NONE = 0
100MP_NATIVE_ARCH_X86 = 1
101MP_NATIVE_ARCH_X64 = 2
102MP_NATIVE_ARCH_ARMV6 = 3
103MP_NATIVE_ARCH_ARMV6M = 4
104MP_NATIVE_ARCH_ARMV7M = 5
105MP_NATIVE_ARCH_ARMV7EM = 6
106MP_NATIVE_ARCH_ARMV7EMSP = 7
107MP_NATIVE_ARCH_ARMV7EMDP = 8
108MP_NATIVE_ARCH_XTENSA = 9
Damien George9adedce2019-09-13 13:15:12 +1000109MP_NATIVE_ARCH_XTENSAWIN = 10
Damien Georgeea3c80a2019-02-21 15:18:59 +1100110
Damien George42d0bd22022-04-07 22:18:37 +1000111MP_PERSISTENT_OBJ_FUN_TABLE = 0
Damien George2a075cc2022-03-31 15:26:14 +1100112MP_PERSISTENT_OBJ_NONE = 1
113MP_PERSISTENT_OBJ_FALSE = 2
114MP_PERSISTENT_OBJ_TRUE = 3
115MP_PERSISTENT_OBJ_ELLIPSIS = 4
116MP_PERSISTENT_OBJ_STR = 5
117MP_PERSISTENT_OBJ_BYTES = 6
118MP_PERSISTENT_OBJ_INT = 7
119MP_PERSISTENT_OBJ_FLOAT = 8
120MP_PERSISTENT_OBJ_COMPLEX = 9
121MP_PERSISTENT_OBJ_TUPLE = 10
Damien George42d0bd22022-04-07 22:18:37 +1000122
Damien Georgef2040bf2021-10-22 22:22:47 +1100123MP_SCOPE_FLAG_VIPERRELOC = 0x10
124MP_SCOPE_FLAG_VIPERRODATA = 0x20
125MP_SCOPE_FLAG_VIPERBSS = 0x40
126
Damien George69661f32020-02-27 15:36:53 +1100127MP_BC_MASK_EXTRA_BYTE = 0x9E
Damien George0699c6b2016-01-31 21:45:22 +0000128
Damien George1f7202d2019-09-02 21:35:26 +1000129MP_BC_FORMAT_BYTE = 0
130MP_BC_FORMAT_QSTR = 1
131MP_BC_FORMAT_VAR_UINT = 2
132MP_BC_FORMAT_OFFSET = 3
133
Damien Georgef2040bf2021-10-22 22:22:47 +1100134mp_unary_op_method_name = (
135 "__pos__",
136 "__neg__",
137 "__invert__",
138 "<not>",
139)
140
141mp_binary_op_method_name = (
142 "__lt__",
143 "__gt__",
144 "__eq__",
145 "__le__",
146 "__ge__",
147 "__ne__",
148 "<in>",
149 "<is>",
150 "<exception match>",
151 "__ior__",
152 "__ixor__",
153 "__iand__",
154 "__ilshift__",
155 "__irshift__",
156 "__iadd__",
157 "__isub__",
158 "__imul__",
159 "__imatmul__",
160 "__ifloordiv__",
161 "__itruediv__",
162 "__imod__",
163 "__ipow__",
164 "__or__",
165 "__xor__",
166 "__and__",
167 "__lshift__",
168 "__rshift__",
169 "__add__",
170 "__sub__",
171 "__mul__",
172 "__matmul__",
173 "__floordiv__",
174 "__truediv__",
175 "__mod__",
176 "__pow__",
177)
178
179
180class Opcodes:
181 # fmt: off
182 # Load, Store, Delete, Import, Make, Build, Unpack, Call, Jump, Exception, For, sTack, Return, Yield, Op
183 MP_BC_BASE_RESERVED = (0x00) # ----------------
184 MP_BC_BASE_QSTR_O = (0x10) # LLLLLLSSSDDII---
185 MP_BC_BASE_VINT_E = (0x20) # MMLLLLSSDDBBBBBB
186 MP_BC_BASE_VINT_O = (0x30) # UUMMCCCC--------
187 MP_BC_BASE_JUMP_E = (0x40) # J-JJJJJEEEEF----
188 MP_BC_BASE_BYTE_O = (0x50) # LLLLSSDTTTTTEEFF
189 MP_BC_BASE_BYTE_E = (0x60) # --BREEEYYI------
190 MP_BC_LOAD_CONST_SMALL_INT_MULTI = (0x70) # LLLLLLLLLLLLLLLL
191 # = (0x80) # LLLLLLLLLLLLLLLL
192 # = (0x90) # LLLLLLLLLLLLLLLL
193 # = (0xa0) # LLLLLLLLLLLLLLLL
194 MP_BC_LOAD_FAST_MULTI = (0xb0) # LLLLLLLLLLLLLLLL
195 MP_BC_STORE_FAST_MULTI = (0xc0) # SSSSSSSSSSSSSSSS
196 MP_BC_UNARY_OP_MULTI = (0xd0) # OOOOOOO
197 MP_BC_BINARY_OP_MULTI = (0xd7) # OOOOOOOOO
198 # = (0xe0) # OOOOOOOOOOOOOOOO
199 # = (0xf0) # OOOOOOOOOO------
200
201 MP_BC_LOAD_CONST_SMALL_INT_MULTI_NUM = 64
202 MP_BC_LOAD_CONST_SMALL_INT_MULTI_EXCESS = 16
203 MP_BC_LOAD_FAST_MULTI_NUM = 16
204 MP_BC_STORE_FAST_MULTI_NUM = 16
205 MP_BC_UNARY_OP_MULTI_NUM = 4 # MP_UNARY_OP_NUM_BYTECODE
206 MP_BC_BINARY_OP_MULTI_NUM = 35 # MP_BINARY_OP_NUM_BYTECODE
207
208 MP_BC_LOAD_CONST_FALSE = (MP_BC_BASE_BYTE_O + 0x00)
209 MP_BC_LOAD_CONST_NONE = (MP_BC_BASE_BYTE_O + 0x01)
210 MP_BC_LOAD_CONST_TRUE = (MP_BC_BASE_BYTE_O + 0x02)
211 MP_BC_LOAD_CONST_SMALL_INT = (MP_BC_BASE_VINT_E + 0x02) # signed var-int
212 MP_BC_LOAD_CONST_STRING = (MP_BC_BASE_QSTR_O + 0x00) # qstr
213 MP_BC_LOAD_CONST_OBJ = (MP_BC_BASE_VINT_E + 0x03) # ptr
214 MP_BC_LOAD_NULL = (MP_BC_BASE_BYTE_O + 0x03)
215
216 MP_BC_LOAD_FAST_N = (MP_BC_BASE_VINT_E + 0x04) # uint
217 MP_BC_LOAD_DEREF = (MP_BC_BASE_VINT_E + 0x05) # uint
218 MP_BC_LOAD_NAME = (MP_BC_BASE_QSTR_O + 0x01) # qstr
219 MP_BC_LOAD_GLOBAL = (MP_BC_BASE_QSTR_O + 0x02) # qstr
220 MP_BC_LOAD_ATTR = (MP_BC_BASE_QSTR_O + 0x03) # qstr
221 MP_BC_LOAD_METHOD = (MP_BC_BASE_QSTR_O + 0x04) # qstr
222 MP_BC_LOAD_SUPER_METHOD = (MP_BC_BASE_QSTR_O + 0x05) # qstr
223 MP_BC_LOAD_BUILD_CLASS = (MP_BC_BASE_BYTE_O + 0x04)
224 MP_BC_LOAD_SUBSCR = (MP_BC_BASE_BYTE_O + 0x05)
225
226 MP_BC_STORE_FAST_N = (MP_BC_BASE_VINT_E + 0x06) # uint
227 MP_BC_STORE_DEREF = (MP_BC_BASE_VINT_E + 0x07) # uint
228 MP_BC_STORE_NAME = (MP_BC_BASE_QSTR_O + 0x06) # qstr
229 MP_BC_STORE_GLOBAL = (MP_BC_BASE_QSTR_O + 0x07) # qstr
230 MP_BC_STORE_ATTR = (MP_BC_BASE_QSTR_O + 0x08) # qstr
231 MP_BC_STORE_SUBSCR = (MP_BC_BASE_BYTE_O + 0x06)
232
233 MP_BC_DELETE_FAST = (MP_BC_BASE_VINT_E + 0x08) # uint
234 MP_BC_DELETE_DEREF = (MP_BC_BASE_VINT_E + 0x09) # uint
235 MP_BC_DELETE_NAME = (MP_BC_BASE_QSTR_O + 0x09) # qstr
236 MP_BC_DELETE_GLOBAL = (MP_BC_BASE_QSTR_O + 0x0a) # qstr
237
238 MP_BC_DUP_TOP = (MP_BC_BASE_BYTE_O + 0x07)
239 MP_BC_DUP_TOP_TWO = (MP_BC_BASE_BYTE_O + 0x08)
240 MP_BC_POP_TOP = (MP_BC_BASE_BYTE_O + 0x09)
241 MP_BC_ROT_TWO = (MP_BC_BASE_BYTE_O + 0x0a)
242 MP_BC_ROT_THREE = (MP_BC_BASE_BYTE_O + 0x0b)
243
Damien George538c3c02022-03-16 09:37:58 +1100244 MP_BC_UNWIND_JUMP = (MP_BC_BASE_JUMP_E + 0x00) # signed relative bytecode offset; then a byte
245 MP_BC_JUMP = (MP_BC_BASE_JUMP_E + 0x02) # signed relative bytecode offset
246 MP_BC_POP_JUMP_IF_TRUE = (MP_BC_BASE_JUMP_E + 0x03) # signed relative bytecode offset
247 MP_BC_POP_JUMP_IF_FALSE = (MP_BC_BASE_JUMP_E + 0x04) # signed relative bytecode offset
Damien George6d11c692022-03-21 16:36:13 +1100248 MP_BC_JUMP_IF_TRUE_OR_POP = (MP_BC_BASE_JUMP_E + 0x05) # unsigned relative bytecode offset
249 MP_BC_JUMP_IF_FALSE_OR_POP = (MP_BC_BASE_JUMP_E + 0x06) # unsigned relative bytecode offset
Damien George538c3c02022-03-16 09:37:58 +1100250 MP_BC_SETUP_WITH = (MP_BC_BASE_JUMP_E + 0x07) # unsigned relative bytecode offset
251 MP_BC_SETUP_EXCEPT = (MP_BC_BASE_JUMP_E + 0x08) # unsigned relative bytecode offset
252 MP_BC_SETUP_FINALLY = (MP_BC_BASE_JUMP_E + 0x09) # unsigned relative bytecode offset
253 MP_BC_POP_EXCEPT_JUMP = (MP_BC_BASE_JUMP_E + 0x0a) # unsigned relative bytecode offset
254 MP_BC_FOR_ITER = (MP_BC_BASE_JUMP_E + 0x0b) # unsigned relative bytecode offset
Damien Georgef2040bf2021-10-22 22:22:47 +1100255 MP_BC_WITH_CLEANUP = (MP_BC_BASE_BYTE_O + 0x0c)
256 MP_BC_END_FINALLY = (MP_BC_BASE_BYTE_O + 0x0d)
257 MP_BC_GET_ITER = (MP_BC_BASE_BYTE_O + 0x0e)
258 MP_BC_GET_ITER_STACK = (MP_BC_BASE_BYTE_O + 0x0f)
259
260 MP_BC_BUILD_TUPLE = (MP_BC_BASE_VINT_E + 0x0a) # uint
261 MP_BC_BUILD_LIST = (MP_BC_BASE_VINT_E + 0x0b) # uint
262 MP_BC_BUILD_MAP = (MP_BC_BASE_VINT_E + 0x0c) # uint
263 MP_BC_STORE_MAP = (MP_BC_BASE_BYTE_E + 0x02)
264 MP_BC_BUILD_SET = (MP_BC_BASE_VINT_E + 0x0d) # uint
265 MP_BC_BUILD_SLICE = (MP_BC_BASE_VINT_E + 0x0e) # uint
266 MP_BC_STORE_COMP = (MP_BC_BASE_VINT_E + 0x0f) # uint
267 MP_BC_UNPACK_SEQUENCE = (MP_BC_BASE_VINT_O + 0x00) # uint
268 MP_BC_UNPACK_EX = (MP_BC_BASE_VINT_O + 0x01) # uint
269
270 MP_BC_RETURN_VALUE = (MP_BC_BASE_BYTE_E + 0x03)
271 MP_BC_RAISE_LAST = (MP_BC_BASE_BYTE_E + 0x04)
272 MP_BC_RAISE_OBJ = (MP_BC_BASE_BYTE_E + 0x05)
273 MP_BC_RAISE_FROM = (MP_BC_BASE_BYTE_E + 0x06)
274 MP_BC_YIELD_VALUE = (MP_BC_BASE_BYTE_E + 0x07)
275 MP_BC_YIELD_FROM = (MP_BC_BASE_BYTE_E + 0x08)
276
277 MP_BC_MAKE_FUNCTION = (MP_BC_BASE_VINT_O + 0x02) # uint
278 MP_BC_MAKE_FUNCTION_DEFARGS = (MP_BC_BASE_VINT_O + 0x03) # uint
279 MP_BC_MAKE_CLOSURE = (MP_BC_BASE_VINT_E + 0x00) # uint; extra byte
280 MP_BC_MAKE_CLOSURE_DEFARGS = (MP_BC_BASE_VINT_E + 0x01) # uint; extra byte
281 MP_BC_CALL_FUNCTION = (MP_BC_BASE_VINT_O + 0x04) # uint
282 MP_BC_CALL_FUNCTION_VAR_KW = (MP_BC_BASE_VINT_O + 0x05) # uint
283 MP_BC_CALL_METHOD = (MP_BC_BASE_VINT_O + 0x06) # uint
284 MP_BC_CALL_METHOD_VAR_KW = (MP_BC_BASE_VINT_O + 0x07) # uint
285
286 MP_BC_IMPORT_NAME = (MP_BC_BASE_QSTR_O + 0x0b) # qstr
287 MP_BC_IMPORT_FROM = (MP_BC_BASE_QSTR_O + 0x0c) # qstr
288 MP_BC_IMPORT_STAR = (MP_BC_BASE_BYTE_E + 0x09)
289 # fmt: on
290
Damien George538c3c02022-03-16 09:37:58 +1100291 # Create sets of related opcodes.
292 ALL_OFFSET_SIGNED = (
293 MP_BC_UNWIND_JUMP,
294 MP_BC_JUMP,
295 MP_BC_POP_JUMP_IF_TRUE,
296 MP_BC_POP_JUMP_IF_FALSE,
Damien George538c3c02022-03-16 09:37:58 +1100297 )
298
Damien Georgef2040bf2021-10-22 22:22:47 +1100299 # Create a dict mapping opcode value to opcode name.
300 mapping = ["unknown" for _ in range(256)]
301 for op_name in list(locals()):
302 if op_name.startswith("MP_BC_"):
303 mapping[locals()[op_name]] = op_name[len("MP_BC_") :]
304 for i in range(MP_BC_LOAD_CONST_SMALL_INT_MULTI_NUM):
305 name = "LOAD_CONST_SMALL_INT %d" % (i - MP_BC_LOAD_CONST_SMALL_INT_MULTI_EXCESS)
306 mapping[MP_BC_LOAD_CONST_SMALL_INT_MULTI + i] = name
307 for i in range(MP_BC_LOAD_FAST_MULTI_NUM):
308 mapping[MP_BC_LOAD_FAST_MULTI + i] = "LOAD_FAST %d" % i
309 for i in range(MP_BC_STORE_FAST_MULTI_NUM):
310 mapping[MP_BC_STORE_FAST_MULTI + i] = "STORE_FAST %d" % i
311 for i in range(MP_BC_UNARY_OP_MULTI_NUM):
312 mapping[MP_BC_UNARY_OP_MULTI + i] = "UNARY_OP %d %s" % (i, mp_unary_op_method_name[i])
313 for i in range(MP_BC_BINARY_OP_MULTI_NUM):
314 mapping[MP_BC_BINARY_OP_MULTI + i] = "BINARY_OP %d %s" % (i, mp_binary_op_method_name[i])
315
Damien George0699c6b2016-01-31 21:45:22 +0000316
Damien George9c8a5632022-04-07 23:53:37 +1000317# This definition of a small int covers all possible targets, in the sense that every
318# target can encode as a small int, an integer that passes this test. The minimum is set
319# by MICROPY_OBJ_REPR_B on a 16-bit machine, where there are 14 bits for the small int.
320def mp_small_int_fits(i):
321 return -0x2000 <= i <= 0x1FFF
322
323
Damien George0699c6b2016-01-31 21:45:22 +0000324# this function mirrors that in py/bc.c
Damien George1f7202d2019-09-02 21:35:26 +1000325def mp_opcode_format(bytecode, ip, count_var_uint):
Damien George0699c6b2016-01-31 21:45:22 +0000326 opcode = bytecode[ip]
327 ip_start = ip
Damien George69661f32020-02-27 15:36:53 +1100328 f = (0x000003A4 >> (2 * ((opcode) >> 4))) & 3
Damien George1f7202d2019-09-02 21:35:26 +1000329 if f == MP_BC_FORMAT_QSTR:
Damien George0699c6b2016-01-31 21:45:22 +0000330 ip += 3
331 else:
Damien George1f7202d2019-09-02 21:35:26 +1000332 extra_byte = (opcode & MP_BC_MASK_EXTRA_BYTE) == 0
Damien George0699c6b2016-01-31 21:45:22 +0000333 ip += 1
Damien George1f7202d2019-09-02 21:35:26 +1000334 if f == MP_BC_FORMAT_VAR_UINT:
Damien George992a6e12019-03-01 14:03:10 +1100335 if count_var_uint:
336 while bytecode[ip] & 0x80 != 0:
337 ip += 1
Damien George0699c6b2016-01-31 21:45:22 +0000338 ip += 1
Damien George1f7202d2019-09-02 21:35:26 +1000339 elif f == MP_BC_FORMAT_OFFSET:
Damien George538c3c02022-03-16 09:37:58 +1100340 if bytecode[ip] & 0x80 == 0:
341 ip += 1
342 else:
343 ip += 2
Damien George0699c6b2016-01-31 21:45:22 +0000344 ip += extra_byte
345 return f, ip - ip_start
346
Damien George69661f32020-02-27 15:36:53 +1100347
Damien Georgef2040bf2021-10-22 22:22:47 +1100348def mp_opcode_decode(bytecode, ip):
349 opcode = bytecode[ip]
350 ip_start = ip
351 f = (0x000003A4 >> (2 * ((opcode) >> 4))) & 3
352 extra_byte = (opcode & MP_BC_MASK_EXTRA_BYTE) == 0
353 ip += 1
354 arg = 0
355 if f in (MP_BC_FORMAT_QSTR, MP_BC_FORMAT_VAR_UINT):
356 arg = bytecode[ip] & 0x7F
357 while bytecode[ip] & 0x80 != 0:
358 ip += 1
359 arg = arg << 7 | bytecode[ip] & 0x7F
360 ip += 1
361 elif f == MP_BC_FORMAT_OFFSET:
Damien George538c3c02022-03-16 09:37:58 +1100362 if bytecode[ip] & 0x80 == 0:
363 arg = bytecode[ip]
364 ip += 1
365 if opcode in Opcodes.ALL_OFFSET_SIGNED:
366 arg -= 0x40
367 else:
368 arg = bytecode[ip] & 0x7F | bytecode[ip + 1] << 7
369 ip += 2
370 if opcode in Opcodes.ALL_OFFSET_SIGNED:
371 arg -= 0x4000
Damien Georgef2040bf2021-10-22 22:22:47 +1100372 ip += extra_byte
373 return f, ip - ip_start, arg
374
375
Damien Georgeb5ebfad2019-09-16 22:12:59 +1000376def read_prelude_sig(read_byte):
377 z = read_byte()
378 # xSSSSEAA
Damien George69661f32020-02-27 15:36:53 +1100379 S = (z >> 3) & 0xF
Damien Georgeb5ebfad2019-09-16 22:12:59 +1000380 E = (z >> 2) & 0x1
381 F = 0
382 A = z & 0x3
383 K = 0
384 D = 0
385 n = 0
386 while z & 0x80:
387 z = read_byte()
388 # xFSSKAED
389 S |= (z & 0x30) << (2 * n)
390 E |= (z & 0x02) << n
391 F |= ((z & 0x40) >> 6) << n
392 A |= (z & 0x4) << n
393 K |= ((z & 0x08) >> 3) << n
394 D |= (z & 0x1) << n
395 n += 1
396 S += 1
397 return S, E, F, A, K, D
398
Damien George69661f32020-02-27 15:36:53 +1100399
Damien Georgec8c0fd42019-09-25 15:45:47 +1000400def read_prelude_size(read_byte):
401 I = 0
402 C = 0
403 n = 0
404 while True:
405 z = read_byte()
406 # xIIIIIIC
Damien George69661f32020-02-27 15:36:53 +1100407 I |= ((z & 0x7E) >> 1) << (6 * n)
Damien Georgec8c0fd42019-09-25 15:45:47 +1000408 C |= (z & 1) << n
409 if not (z & 0x80):
410 break
411 n += 1
412 return I, C
413
Damien George69661f32020-02-27 15:36:53 +1100414
Damien Georgeea3c80a2019-02-21 15:18:59 +1100415def extract_prelude(bytecode, ip):
Damien Georgeb5ebfad2019-09-16 22:12:59 +1000416 def local_read_byte():
417 b = bytecode[ip_ref[0]]
418 ip_ref[0] += 1
419 return b
Damien George69661f32020-02-27 15:36:53 +1100420
421 ip_ref = [ip] # to close over ip in Python 2 and 3
422 (
423 n_state,
424 n_exc_stack,
425 scope_flags,
426 n_pos_args,
427 n_kwonly_args,
428 n_def_pos_args,
429 ) = read_prelude_sig(local_read_byte)
Damien Georgef2040bf2021-10-22 22:22:47 +1100430
Damien Georgec8c0fd42019-09-25 15:45:47 +1000431 n_info, n_cell = read_prelude_size(local_read_byte)
Damien Georgeb5ebfad2019-09-16 22:12:59 +1000432 ip = ip_ref[0]
433
Damien Georgec8c0fd42019-09-25 15:45:47 +1000434 ip2 = ip
435 ip = ip2 + n_info + n_cell
Damien George0699c6b2016-01-31 21:45:22 +0000436 # ip now points to first opcode
437 # ip2 points to simple_name qstr
Damien Georgef2040bf2021-10-22 22:22:47 +1100438
439 # Extract simple_name and argument qstrs (var uints).
440 args = []
441 for arg_num in range(1 + n_pos_args + n_kwonly_args):
442 value = 0
443 while True:
444 b = local_read_byte()
445 value = (value << 7) | (b & 0x7F)
446 if b & 0x80 == 0:
447 break
448 args.append(value)
449
450 return (
451 ip2,
452 ip,
453 ip_ref[0],
454 (n_state, n_exc_stack, scope_flags, n_pos_args, n_kwonly_args, n_def_pos_args),
455 args,
456 )
Damien George0699c6b2016-01-31 21:45:22 +0000457
Damien George69661f32020-02-27 15:36:53 +1100458
Damien Georgee6479662022-04-08 14:04:21 +1000459class QStrType:
460 def __init__(self, str):
461 self.str = str
462 self.qstr_esc = qstrutil.qstr_escape(self.str)
463 self.qstr_id = "MP_QSTR_" + self.qstr_esc
464
465
466class GlobalQStrList:
467 def __init__(self):
468 # Initialise global list of qstrs with static qstrs
469 self.qstrs = [None] # MP_QSTRnull should never be referenced
470 for n in qstrutil.static_qstr_list:
471 self.qstrs.append(QStrType(n))
472
473 def add(self, s):
474 q = QStrType(s)
475 self.qstrs.append(q)
476 return q
477
478 def get_by_index(self, i):
479 return self.qstrs[i]
480
Damien George40d431d2022-04-08 14:05:23 +1000481 def find_by_str(self, s):
482 for q in self.qstrs:
483 if q is not None and q.str == s:
484 return q
485 return None
486
Damien Georgee6479662022-04-08 14:04:21 +1000487
Damien Georgeea3c80a2019-02-21 15:18:59 +1100488class MPFunTable:
Damien Georgef2040bf2021-10-22 22:22:47 +1100489 def __repr__(self):
490 return "mp_fun_table"
Damien Georgeea3c80a2019-02-21 15:18:59 +1100491
Damien George69661f32020-02-27 15:36:53 +1100492
Damien Georgef2040bf2021-10-22 22:22:47 +1100493class CompiledModule:
494 def __init__(
495 self,
496 mpy_source_file,
497 mpy_segments,
498 header,
499 qstr_table,
500 obj_table,
501 raw_code,
502 raw_code_file_offset,
503 escaped_name,
504 ):
505 self.mpy_source_file = mpy_source_file
506 self.mpy_segments = mpy_segments
507 self.source_file = qstr_table[0]
508 self.header = header
509 self.qstr_table = qstr_table
510 self.obj_table = obj_table
511 self.raw_code_file_offset = raw_code_file_offset
512 self.raw_code = raw_code
513 self.escaped_name = escaped_name
Damien George0699c6b2016-01-31 21:45:22 +0000514
Damien Georgef2040bf2021-10-22 22:22:47 +1100515 def hexdump(self):
516 with open(self.mpy_source_file, "rb") as f:
517 WIDTH = 16
518 COL_OFF = "\033[0m"
519 COL_TABLE = (
520 ("", ""), # META
521 ("\033[0;31m", "\033[0;91m"), # QSTR
522 ("\033[0;32m", "\033[0;92m"), # OBJ
523 ("\033[0;34m", "\033[0;94m"), # CODE
524 )
525 cur_col = ""
526 cur_col_index = 0
527 offset = 0
528 segment_index = 0
529 while True:
530 data = bytes_cons(f.read(WIDTH))
531 if not data:
532 break
Damien George0699c6b2016-01-31 21:45:22 +0000533
Damien Georgef2040bf2021-10-22 22:22:47 +1100534 # Print out the hex dump of this line of data.
535 line_hex = cur_col
536 line_chr = cur_col
537 line_comment = ""
538 for i in range(len(data)):
539 # Determine the colour of the data, if any, and the line comment.
540 while segment_index < len(self.mpy_segments):
541 if offset + i == self.mpy_segments[segment_index].start:
542 cur_col = COL_TABLE[self.mpy_segments[segment_index].kind][
543 cur_col_index
544 ]
545 cur_col_index = 1 - cur_col_index
546 line_hex += cur_col
547 line_chr += cur_col
548 line_comment += " %s%s%s" % (
549 cur_col,
550 self.mpy_segments[segment_index].name,
551 COL_OFF,
552 )
553 if offset + i == self.mpy_segments[segment_index].end:
554 cur_col = ""
555 line_hex += COL_OFF
556 line_chr += COL_OFF
557 segment_index += 1
558 else:
559 break
Damien George0699c6b2016-01-31 21:45:22 +0000560
Damien Georgef2040bf2021-10-22 22:22:47 +1100561 # Add to the hex part of the line.
562 if i % 2 == 0:
563 line_hex += " "
564 line_hex += "%02x" % data[i]
Damien George02fd83b2016-05-03 12:24:39 +0100565
Damien Georgef2040bf2021-10-22 22:22:47 +1100566 # Add to the characters part of the line.
567 if 0x20 <= data[i] <= 0x7E:
568 line_chr += "%s" % chr(data[i])
569 else:
570 line_chr += "."
571
572 # Print out this line.
573 if cur_col:
574 line_hex += COL_OFF
575 line_chr += COL_OFF
576 pad = " " * ((WIDTH - len(data)) * 5 // 2)
577 print("%08x:%s%s %s %s" % (offset, line_hex, pad, line_chr, line_comment))
578 offset += WIDTH
579
580 def disassemble(self):
581 print("mpy_source_file:", self.mpy_source_file)
582 print("source_file:", self.source_file.str)
583 print("header:", hexlify_to_str(self.header))
584 print("qstr_table[%u]:" % len(self.qstr_table))
585 for q in self.qstr_table:
586 print(" %s" % q.str)
587 print("obj_table:", self.obj_table)
588 self.raw_code.disassemble()
589
590 def freeze(self, compiled_module_index):
591 print()
592 print("/" * 80)
593 print("// frozen module %s" % self.escaped_name)
594 print("// - original source file: %s" % self.mpy_source_file)
595 print("// - frozen file name: %s" % self.source_file.str)
596 print("// - .mpy header: %s" % ":".join("%02x" % b for b in self.header))
597 print()
598
599 self.raw_code.freeze()
600 print()
601
602 self.freeze_constants()
603
604 print()
605 print("static const mp_frozen_module_t frozen_module_%s = {" % self.escaped_name)
606 print(" .constants = {")
607 if len(self.qstr_table):
608 print(
609 " .qstr_table = (qstr_short_t *)&const_qstr_table_data_%s,"
610 % self.escaped_name
611 )
612 else:
613 print(" .qstr_table = NULL,")
614 if len(self.obj_table):
615 print(" .obj_table = (mp_obj_t *)&const_obj_table_data_%s," % self.escaped_name)
616 else:
617 print(" .obj_table = NULL,")
618 print(" },")
619 print(" .rc = &raw_code_%s," % self.raw_code.escaped_name)
620 print("};")
Damien George0699c6b2016-01-31 21:45:22 +0000621
Damien George68b3aee2022-03-31 16:20:23 +1100622 def freeze_constant_obj(self, obj_name, obj):
Damien Georgef2040bf2021-10-22 22:22:47 +1100623 global const_str_content, const_int_content, const_obj_content
624
Damien George68b3aee2022-03-31 16:20:23 +1100625 if isinstance(obj, MPFunTable):
626 return "&mp_fun_table"
627 elif obj is None:
628 return "MP_ROM_NONE"
629 elif obj is False:
630 return "MP_ROM_FALSE"
631 elif obj is True:
632 return "MP_ROM_TRUE"
633 elif obj is Ellipsis:
634 return "MP_ROM_PTR(&mp_const_ellipsis_obj)"
635 elif is_str_type(obj) or is_bytes_type(obj):
Damien Georgedfc6c6292022-04-08 13:07:25 +1000636 if len(obj) == 0:
637 if is_str_type(obj):
638 return "MP_ROM_QSTR(MP_QSTR_)"
639 else:
640 return "MP_ROM_PTR(&mp_const_empty_bytes_obj)"
Damien George68b3aee2022-03-31 16:20:23 +1100641 if is_str_type(obj):
Damien George40d431d2022-04-08 14:05:23 +1000642 q = global_qstrs.find_by_str(obj)
643 if q:
644 return "MP_ROM_QSTR(%s)" % q.qstr_id
Damien George68b3aee2022-03-31 16:20:23 +1100645 obj = bytes_cons(obj, "utf8")
646 obj_type = "mp_type_str"
647 else:
648 obj_type = "mp_type_bytes"
649 print(
650 'static const mp_obj_str_t %s = {{&%s}, %u, %u, (const byte*)"%s"};'
651 % (
652 obj_name,
653 obj_type,
654 qstrutil.compute_hash(obj, config.MICROPY_QSTR_BYTES_IN_HASH),
655 len(obj),
656 "".join(("\\x%02x" % b) for b in obj),
657 )
658 )
659 const_str_content += len(obj)
660 const_obj_content += 4 * 4
661 return "MP_ROM_PTR(&%s)" % obj_name
662 elif is_int_type(obj):
Damien George9c8a5632022-04-07 23:53:37 +1000663 if mp_small_int_fits(obj):
664 # Encode directly as a small integer object.
665 return "MP_ROM_INT(%d)" % obj
666 elif config.MICROPY_LONGINT_IMPL == config.MICROPY_LONGINT_IMPL_NONE:
Damien George68b3aee2022-03-31 16:20:23 +1100667 raise FreezeError(self, "target does not support long int")
668 elif config.MICROPY_LONGINT_IMPL == config.MICROPY_LONGINT_IMPL_LONGLONG:
669 # TODO
670 raise FreezeError(self, "freezing int to long-long is not implemented")
671 elif config.MICROPY_LONGINT_IMPL == config.MICROPY_LONGINT_IMPL_MPZ:
672 neg = 0
673 if obj < 0:
674 obj = -obj
675 neg = 1
676 bits_per_dig = config.MPZ_DIG_SIZE
677 digs = []
678 z = obj
679 while z:
680 digs.append(z & ((1 << bits_per_dig) - 1))
681 z >>= bits_per_dig
682 ndigs = len(digs)
683 digs = ",".join(("%#x" % d) for d in digs)
684 print(
685 "static const mp_obj_int_t %s = {{&mp_type_int}, "
686 "{.neg=%u, .fixed_dig=1, .alloc=%u, .len=%u, .dig=(uint%u_t*)(const uint%u_t[]){%s}}};"
687 % (obj_name, neg, ndigs, ndigs, bits_per_dig, bits_per_dig, digs)
688 )
689 const_int_content += (digs.count(",") + 1) * bits_per_dig // 8
690 const_obj_content += 4 * 4
691 return "MP_ROM_PTR(&%s)" % obj_name
692 elif type(obj) is float:
693 macro_name = "%s_macro" % obj_name
694 print(
695 "#if MICROPY_OBJ_REPR == MICROPY_OBJ_REPR_A || MICROPY_OBJ_REPR == MICROPY_OBJ_REPR_B"
696 )
697 print(
698 "static const mp_obj_float_t %s = {{&mp_type_float}, (mp_float_t)%.16g};"
699 % (obj_name, obj)
700 )
701 print("#define %s MP_ROM_PTR(&%s)" % (macro_name, obj_name))
702 print("#elif MICROPY_OBJ_REPR == MICROPY_OBJ_REPR_C")
703 n = struct.unpack("<I", struct.pack("<f", obj))[0]
704 n = ((n & ~0x3) | 2) + 0x80800000
705 print("#define %s ((mp_rom_obj_t)(0x%08x))" % (macro_name, n))
706 print("#elif MICROPY_OBJ_REPR == MICROPY_OBJ_REPR_D")
707 n = struct.unpack("<Q", struct.pack("<d", obj))[0]
708 n += 0x8004000000000000
709 print("#define %s ((mp_rom_obj_t)(0x%016x))" % (macro_name, n))
710 print("#endif")
711 const_obj_content += 3 * 4
712 return macro_name
713 elif type(obj) is complex:
714 print(
715 "static const mp_obj_complex_t %s = {{&mp_type_complex}, (mp_float_t)%.16g, (mp_float_t)%.16g};"
716 % (obj_name, obj.real, obj.imag)
717 )
718 return "MP_ROM_PTR(&%s)" % obj_name
719 elif type(obj) is tuple:
720 if len(obj) == 0:
721 return "MP_ROM_PTR(&mp_const_empty_tuple_obj)"
722 else:
723 obj_refs = []
724 for i, sub_obj in enumerate(obj):
725 sub_obj_name = "%s_%u" % (obj_name, i)
726 obj_refs.append(self.freeze_constant_obj(sub_obj_name, sub_obj))
727 print(
728 "static const mp_rom_obj_tuple_t %s = {{&mp_type_tuple}, %d, {"
729 % (obj_name, len(obj))
730 )
731 for ref in obj_refs:
732 print(" %s," % ref)
733 print("}};")
734 return "MP_ROM_PTR(&%s)" % obj_name
735 else:
736 raise FreezeError(self, "freezing of object %r is not implemented" % (obj,))
737
738 def freeze_constants(self):
Damien Georgef2040bf2021-10-22 22:22:47 +1100739 if len(self.qstr_table):
740 print(
741 "static const qstr_short_t const_qstr_table_data_%s[%u] = {"
742 % (self.escaped_name, len(self.qstr_table))
743 )
744 for q in self.qstr_table:
745 print(" %s," % q.qstr_id)
746 print("};")
747
748 if not len(self.obj_table):
749 return
750
Damien George0699c6b2016-01-31 21:45:22 +0000751 # generate constant objects
Damien Georgef2040bf2021-10-22 22:22:47 +1100752 print()
753 print("// constants")
Damien George68b3aee2022-03-31 16:20:23 +1100754 obj_refs = []
Damien Georgef2040bf2021-10-22 22:22:47 +1100755 for i, obj in enumerate(self.obj_table):
Damien George69661f32020-02-27 15:36:53 +1100756 obj_name = "const_obj_%s_%u" % (self.escaped_name, i)
Damien George68b3aee2022-03-31 16:20:23 +1100757 obj_refs.append(self.freeze_constant_obj(obj_name, obj))
Damien George0699c6b2016-01-31 21:45:22 +0000758
Damien Georgef2040bf2021-10-22 22:22:47 +1100759 # generate constant table
760 print()
761 print("// constant table")
762 print(
763 "static const mp_rom_obj_t const_obj_table_data_%s[%u] = {"
764 % (self.escaped_name, len(self.obj_table))
765 )
Damien George68b3aee2022-03-31 16:20:23 +1100766 for ref in obj_refs:
767 print(" %s," % ref)
Damien Georgef2040bf2021-10-22 22:22:47 +1100768 print("};")
Damien George0699c6b2016-01-31 21:45:22 +0000769
Damien Georgef2040bf2021-10-22 22:22:47 +1100770 global const_table_ptr_content
771 const_table_ptr_content += len(self.obj_table)
772
773
774class RawCode(object):
775 # a set of all escaped names, to make sure they are unique
776 escaped_names = set()
777
778 # convert code kind number to string
779 code_kind_str = {
780 MP_CODE_BYTECODE: "MP_CODE_BYTECODE",
781 MP_CODE_NATIVE_PY: "MP_CODE_NATIVE_PY",
782 MP_CODE_NATIVE_VIPER: "MP_CODE_NATIVE_VIPER",
783 MP_CODE_NATIVE_ASM: "MP_CODE_NATIVE_ASM",
784 }
785
786 def __init__(self, cm_escaped_name, qstr_table, fun_data, prelude_offset, code_kind):
787 self.qstr_table = qstr_table
788 self.fun_data = fun_data
789 self.prelude_offset = prelude_offset
790 self.code_kind = code_kind
791
792 if code_kind in (MP_CODE_BYTECODE, MP_CODE_NATIVE_PY):
793 (
794 self.offset_names,
795 self.offset_opcodes,
796 self.offset_line_info,
797 self.prelude,
798 self.names,
799 ) = extract_prelude(self.fun_data, prelude_offset)
800 self.scope_flags = self.prelude[2]
801 self.n_pos_args = self.prelude[3]
802 self.simple_name = self.qstr_table[self.names[0]]
Damien Georgeb6a32892017-08-12 22:26:18 +1000803 else:
Damien Georgef2040bf2021-10-22 22:22:47 +1100804 self.simple_name = self.qstr_table[0]
805
806 escaped_name = cm_escaped_name + "_" + self.simple_name.qstr_esc
807
808 # make sure the escaped name is unique
809 i = 2
810 unique_escaped_name = escaped_name
811 while unique_escaped_name in self.escaped_names:
812 unique_escaped_name = escaped_name + str(i)
813 i += 1
814 self.escaped_names.add(unique_escaped_name)
815 self.escaped_name = unique_escaped_name
816
817 def disassemble_children(self):
818 print(" children:", [rc.simple_name.str for rc in self.children])
819 for rc in self.children:
820 rc.disassemble()
821
822 def freeze_children(self):
823 # Freeze children and generate table of children.
824 if len(self.children):
825 for rc in self.children:
826 print("// child of %s" % self.escaped_name)
827 rc.freeze()
828 print()
829 print("static const mp_raw_code_t *const children_%s[] = {" % self.escaped_name)
830 for rc in self.children:
831 print(" &raw_code_%s," % rc.escaped_name)
832 print("};")
833 print()
834
835 def freeze_raw_code(self, qstr_links=(), type_sig=0):
836 # Generate mp_raw_code_t.
837 print("static const mp_raw_code_t raw_code_%s = {" % self.escaped_name)
838 print(" .kind = %s," % RawCode.code_kind_str[self.code_kind])
839 print(" .scope_flags = 0x%02x," % self.scope_flags)
840 print(" .n_pos_args = %u," % self.n_pos_args)
841 print(" .fun_data = fun_data_%s," % self.escaped_name)
842 print(" #if MICROPY_PERSISTENT_CODE_SAVE || MICROPY_DEBUG_PRINTERS")
843 print(" .fun_data_len = %u," % len(self.fun_data))
844 print(" #endif")
845 if len(self.children):
846 print(" .children = (void *)&children_%s," % self.escaped_name)
847 else:
848 print(" .children = NULL,")
Damien George69661f32020-02-27 15:36:53 +1100849 print(" #if MICROPY_PERSISTENT_CODE_SAVE")
Damien Georgef2040bf2021-10-22 22:22:47 +1100850 print(" .n_children = %u," % len(self.children))
Damien Georgec69f58e2019-09-06 23:55:15 +1000851 if self.code_kind == MP_CODE_BYTECODE:
Damien George69661f32020-02-27 15:36:53 +1100852 print(" #if MICROPY_PY_SYS_SETTRACE")
853 print(" .prelude = {")
854 print(" .n_state = %u," % self.prelude[0])
855 print(" .n_exc_stack = %u," % self.prelude[1])
856 print(" .scope_flags = %u," % self.prelude[2])
857 print(" .n_pos_args = %u," % self.prelude[3])
858 print(" .n_kwonly_args = %u," % self.prelude[4])
859 print(" .n_def_pos_args = %u," % self.prelude[5])
Damien Georgef2040bf2021-10-22 22:22:47 +1100860 print(" .qstr_block_name_idx = %u," % self.names[0])
Martin Milata492cf342020-08-13 15:20:08 +0200861 print(
862 " .line_info = fun_data_%s + %u,"
Damien Georgef2040bf2021-10-22 22:22:47 +1100863 % (self.escaped_name, self.offset_line_info)
Martin Milata492cf342020-08-13 15:20:08 +0200864 )
Damien Georgef2040bf2021-10-22 22:22:47 +1100865 print(
866 " .opcodes = fun_data_%s + %u," % (self.escaped_name, self.offset_opcodes)
867 )
Damien George69661f32020-02-27 15:36:53 +1100868 print(" },")
869 print(" .line_of_definition = %u," % 0) # TODO
870 print(" #endif")
871 print(" #if MICROPY_EMIT_MACHINE_CODE")
872 print(" .prelude_offset = %u," % self.prelude_offset)
873 print(" .n_qstr = %u," % len(qstr_links))
874 print(" .qstr_link = NULL,") # TODO
875 print(" #endif")
876 print(" #endif")
877 print(" #if MICROPY_EMIT_MACHINE_CODE")
878 print(" .type_sig = %u," % type_sig)
879 print(" #endif")
880 print("};")
881
Damien Georgef2040bf2021-10-22 22:22:47 +1100882 global raw_code_count, raw_code_content
883 raw_code_count += 1
884 raw_code_content += 4 * 4
885
Damien George0699c6b2016-01-31 21:45:22 +0000886
Damien Georgeea3c80a2019-02-21 15:18:59 +1100887class RawCodeBytecode(RawCode):
Damien Georgef2040bf2021-10-22 22:22:47 +1100888 def __init__(self, cm_escaped_name, qstr_table, obj_table, fun_data):
889 self.obj_table = obj_table
Damien George69661f32020-02-27 15:36:53 +1100890 super(RawCodeBytecode, self).__init__(
Damien Georgef2040bf2021-10-22 22:22:47 +1100891 cm_escaped_name, qstr_table, fun_data, 0, MP_CODE_BYTECODE
Damien George69661f32020-02-27 15:36:53 +1100892 )
Damien Georgeea3c80a2019-02-21 15:18:59 +1100893
Damien Georgef2040bf2021-10-22 22:22:47 +1100894 def disassemble(self):
895 bc = self.fun_data
896 print("simple_name:", self.simple_name.str)
897 print(" raw bytecode:", len(bc), hexlify_to_str(bc))
898 print(" prelude:", self.prelude)
899 print(" args:", [self.qstr_table[i].str for i in self.names[1:]])
900 print(" line info:", hexlify_to_str(bc[self.offset_line_info : self.offset_opcodes]))
901 ip = self.offset_opcodes
902 while ip < len(bc):
903 fmt, sz, arg = mp_opcode_decode(bc, ip)
904 if bc[ip] == Opcodes.MP_BC_LOAD_CONST_OBJ:
Damien George2a075cc2022-03-31 15:26:14 +1100905 arg = repr(self.obj_table[arg])
Damien Georgef2040bf2021-10-22 22:22:47 +1100906 if fmt == MP_BC_FORMAT_QSTR:
907 arg = self.qstr_table[arg].str
908 elif fmt in (MP_BC_FORMAT_VAR_UINT, MP_BC_FORMAT_OFFSET):
909 pass
Damien Georgeea3c80a2019-02-21 15:18:59 +1100910 else:
Damien Georgef2040bf2021-10-22 22:22:47 +1100911 arg = ""
912 print(
913 " %-11s %s %s" % (hexlify_to_str(bc[ip : ip + sz]), Opcodes.mapping[bc[ip]], arg)
914 )
Damien Georgeea3c80a2019-02-21 15:18:59 +1100915 ip += sz
Damien Georgef2040bf2021-10-22 22:22:47 +1100916 self.disassemble_children()
917
918 def freeze(self):
919 # generate bytecode data
920 bc = self.fun_data
921 print(
922 "// frozen bytecode for file %s, scope %s"
923 % (self.qstr_table[0].str, self.escaped_name)
924 )
925 print("static const byte fun_data_%s[%u] = {" % (self.escaped_name, len(bc)))
926
927 print(" ", end="")
928 for b in bc[: self.offset_names]:
929 print("0x%02x," % b, end="")
930 print(" // prelude")
931
932 print(" ", end="")
933 for b in bc[self.offset_names : self.offset_line_info]:
934 print("0x%02x," % b, end="")
935 print(" // names: %s" % ", ".join(self.qstr_table[i].str for i in self.names))
936
937 print(" ", end="")
938 for b in bc[self.offset_line_info : self.offset_opcodes]:
939 print("0x%02x," % b, end="")
940 print(" // code info")
941
942 ip = self.offset_opcodes
943 while ip < len(bc):
944 fmt, sz, arg = mp_opcode_decode(bc, ip)
945 opcode_name = Opcodes.mapping[bc[ip]]
946 if fmt == MP_BC_FORMAT_QSTR:
robert-hh5c467212022-02-26 07:55:53 +0100947 opcode_name += " " + repr(self.qstr_table[arg].str)
Damien Georgef2040bf2021-10-22 22:22:47 +1100948 elif fmt in (MP_BC_FORMAT_VAR_UINT, MP_BC_FORMAT_OFFSET):
949 opcode_name += " %u" % arg
950 print(
951 " %s, // %s" % (",".join("0x%02x" % b for b in bc[ip : ip + sz]), opcode_name)
952 )
953 ip += sz
954
Damien George69661f32020-02-27 15:36:53 +1100955 print("};")
Damien Georgeea3c80a2019-02-21 15:18:59 +1100956
Damien Georgef2040bf2021-10-22 22:22:47 +1100957 self.freeze_children()
958 self.freeze_raw_code()
959
960 global bc_content
961 bc_content += len(bc)
Damien Georgeea3c80a2019-02-21 15:18:59 +1100962
Damien George69661f32020-02-27 15:36:53 +1100963
Damien Georgeea3c80a2019-02-21 15:18:59 +1100964class RawCodeNative(RawCode):
Damien George69661f32020-02-27 15:36:53 +1100965 def __init__(
966 self,
Damien Georgef2040bf2021-10-22 22:22:47 +1100967 cm_escaped_name,
968 qstr_table,
969 kind,
Damien George69661f32020-02-27 15:36:53 +1100970 fun_data,
971 prelude_offset,
Damien George69661f32020-02-27 15:36:53 +1100972 qstr_links,
Damien Georgef2040bf2021-10-22 22:22:47 +1100973 scope_flags,
974 n_pos_args,
Damien George69661f32020-02-27 15:36:53 +1100975 type_sig,
976 ):
977 super(RawCodeNative, self).__init__(
Damien Georgef2040bf2021-10-22 22:22:47 +1100978 cm_escaped_name, qstr_table, fun_data, prelude_offset, kind
Damien George69661f32020-02-27 15:36:53 +1100979 )
Damien Georgef2040bf2021-10-22 22:22:47 +1100980
981 if kind in (MP_CODE_NATIVE_VIPER, MP_CODE_NATIVE_ASM):
982 self.scope_flags = scope_flags
983 self.n_pos_args = n_pos_args
984
Damien Georgeea3c80a2019-02-21 15:18:59 +1100985 self.qstr_links = qstr_links
986 self.type_sig = type_sig
Damien George69661f32020-02-27 15:36:53 +1100987 if config.native_arch in (
988 MP_NATIVE_ARCH_X86,
989 MP_NATIVE_ARCH_X64,
990 MP_NATIVE_ARCH_XTENSA,
991 MP_NATIVE_ARCH_XTENSAWIN,
992 ):
Damien Georgeea3c80a2019-02-21 15:18:59 +1100993 self.fun_data_attributes = '__attribute__((section(".text,\\"ax\\",@progbits # ")))'
994 else:
995 self.fun_data_attributes = '__attribute__((section(".text,\\"ax\\",%progbits @ ")))'
996
Damien George7f24c292019-11-28 13:11:51 +1100997 # Allow single-byte alignment by default for x86/x64.
998 # ARM needs word alignment, ARM Thumb needs halfword, due to instruction size.
999 # Xtensa needs word alignment due to the 32-bit constant table embedded in the code.
Damien George69661f32020-02-27 15:36:53 +11001000 if config.native_arch in (
1001 MP_NATIVE_ARCH_ARMV6,
1002 MP_NATIVE_ARCH_XTENSA,
1003 MP_NATIVE_ARCH_XTENSAWIN,
1004 ):
Damien George7f24c292019-11-28 13:11:51 +11001005 # ARMV6 or Xtensa -- four byte align.
Damien George69661f32020-02-27 15:36:53 +11001006 self.fun_data_attributes += " __attribute__ ((aligned (4)))"
Jim Mussared4ab51562019-08-17 00:32:04 +10001007 elif MP_NATIVE_ARCH_ARMV6M <= config.native_arch <= MP_NATIVE_ARCH_ARMV7EMDP:
1008 # ARMVxxM -- two byte align.
Damien George69661f32020-02-27 15:36:53 +11001009 self.fun_data_attributes += " __attribute__ ((aligned (2)))"
Jim Mussared4ab51562019-08-17 00:32:04 +10001010
Damien Georgef2040bf2021-10-22 22:22:47 +11001011 def disassemble(self):
1012 fun_data = self.fun_data
1013 print("simple_name:", self.simple_name.str)
1014 print(
1015 " raw data:",
1016 len(fun_data),
1017 hexlify_to_str(fun_data[:32]),
1018 "..." if len(fun_data) > 32 else "",
1019 )
1020 if self.code_kind != MP_CODE_NATIVE_PY:
1021 return
1022 print(" prelude:", self.prelude)
1023 print(" args:", [self.qstr_table[i].str for i in self.names[1:]])
1024 print(" line info:", fun_data[self.offset_line_info : self.offset_opcodes])
1025 ip = 0
1026 while ip < self.prelude_offset:
1027 sz = 16
1028 print(" ", hexlify_to_str(fun_data[ip : min(ip + sz, self.prelude_offset)]))
1029 ip += sz
1030 self.disassemble_children()
1031
Damien Georgeea3c80a2019-02-21 15:18:59 +11001032 def _asm_thumb_rewrite_mov(self, pc, val):
Damien Georgef2040bf2021-10-22 22:22:47 +11001033 print(" (%u & 0xf0) | (%s >> 12)," % (self.fun_data[pc], val), end="")
1034 print(" (%u & 0xfb) | (%s >> 9 & 0x04)," % (self.fun_data[pc + 1], val), end="")
Damien George69661f32020-02-27 15:36:53 +11001035 print(" (%s & 0xff)," % (val,), end="")
Damien Georgef2040bf2021-10-22 22:22:47 +11001036 print(" (%u & 0x07) | (%s >> 4 & 0x70)," % (self.fun_data[pc + 3], val))
Damien Georgeea3c80a2019-02-21 15:18:59 +11001037
1038 def _link_qstr(self, pc, kind, qst):
1039 if kind == 0:
Damien Georgefaf3d3e2019-06-04 22:13:32 +10001040 # Generic 16-bit link
Damien George69661f32020-02-27 15:36:53 +11001041 print(" %s & 0xff, %s >> 8," % (qst, qst))
Damien George9d3031c2019-06-11 11:36:39 +10001042 return 2
Damien Georgeea3c80a2019-02-21 15:18:59 +11001043 else:
Damien Georgefaf3d3e2019-06-04 22:13:32 +10001044 # Architecture-specific link
1045 is_obj = kind == 2
1046 if is_obj:
Damien George69661f32020-02-27 15:36:53 +11001047 qst = "((uintptr_t)MP_OBJ_NEW_QSTR(%s))" % qst
Damien George7f24c292019-11-28 13:11:51 +11001048 if config.native_arch in (
Damien George69661f32020-02-27 15:36:53 +11001049 MP_NATIVE_ARCH_X86,
1050 MP_NATIVE_ARCH_X64,
Damien George2c1a6a22021-05-25 22:16:06 +10001051 MP_NATIVE_ARCH_ARMV6,
Damien George69661f32020-02-27 15:36:53 +11001052 MP_NATIVE_ARCH_XTENSA,
1053 MP_NATIVE_ARCH_XTENSAWIN,
1054 ):
1055 print(
1056 " %s & 0xff, (%s >> 8) & 0xff, (%s >> 16) & 0xff, %s >> 24,"
1057 % (qst, qst, qst, qst)
1058 )
Damien George9d3031c2019-06-11 11:36:39 +10001059 return 4
Damien Georgeea3c80a2019-02-21 15:18:59 +11001060 elif MP_NATIVE_ARCH_ARMV6M <= config.native_arch <= MP_NATIVE_ARCH_ARMV7EMDP:
1061 if is_obj:
Damien Georgefaf3d3e2019-06-04 22:13:32 +10001062 # qstr object, movw and movt
1063 self._asm_thumb_rewrite_mov(pc, qst)
Damien George69661f32020-02-27 15:36:53 +11001064 self._asm_thumb_rewrite_mov(pc + 4, "(%s >> 16)" % qst)
Damien George9d3031c2019-06-11 11:36:39 +10001065 return 8
Damien Georgeea3c80a2019-02-21 15:18:59 +11001066 else:
Damien Georgefaf3d3e2019-06-04 22:13:32 +10001067 # qstr number, movw instruction
1068 self._asm_thumb_rewrite_mov(pc, qst)
Damien George9d3031c2019-06-11 11:36:39 +10001069 return 4
Damien Georgeea3c80a2019-02-21 15:18:59 +11001070 else:
1071 assert 0
1072
Damien Georgef2040bf2021-10-22 22:22:47 +11001073 def freeze(self):
1074 if self.scope_flags & ~0x0F:
Damien George69661f32020-02-27 15:36:53 +11001075 raise FreezeError("unable to freeze code with relocations")
Damien Georgefc97d6d2019-12-10 14:57:12 +11001076
Damien Georgeea3c80a2019-02-21 15:18:59 +11001077 # generate native code data
1078 print()
Damien George69661f32020-02-27 15:36:53 +11001079 print(
Damien Georgef2040bf2021-10-22 22:22:47 +11001080 "// frozen native code for file %s, scope %s"
1081 % (self.qstr_table[0].str, self.escaped_name)
1082 )
1083 print(
1084 "static const byte fun_data_%s[%u] %s = {"
1085 % (self.escaped_name, len(self.fun_data), self.fun_data_attributes)
Damien George69661f32020-02-27 15:36:53 +11001086 )
Damien Georgeea3c80a2019-02-21 15:18:59 +11001087
Damien Georgef2040bf2021-10-22 22:22:47 +11001088 i_top = len(self.fun_data)
Damien Georgeea3c80a2019-02-21 15:18:59 +11001089 i = 0
1090 qi = 0
1091 while i < i_top:
1092 if qi < len(self.qstr_links) and i == self.qstr_links[qi][0]:
1093 # link qstr
1094 qi_off, qi_kind, qi_val = self.qstr_links[qi]
Damien Georgee6479662022-04-08 14:04:21 +10001095 i += self._link_qstr(i, qi_kind, qi_val.qstr_id)
Damien Georgeea3c80a2019-02-21 15:18:59 +11001096 qi += 1
1097 else:
1098 # copy machine code (max 16 bytes)
1099 i16 = min(i + 16, i_top)
1100 if qi < len(self.qstr_links):
1101 i16 = min(i16, self.qstr_links[qi][0])
Damien George69661f32020-02-27 15:36:53 +11001102 print(" ", end="")
Damien Georgeea3c80a2019-02-21 15:18:59 +11001103 for ii in range(i, i16):
Damien Georgef2040bf2021-10-22 22:22:47 +11001104 print(" 0x%02x," % self.fun_data[ii], end="")
Damien Georgeea3c80a2019-02-21 15:18:59 +11001105 print()
1106 i = i16
1107
Damien George69661f32020-02-27 15:36:53 +11001108 print("};")
Damien Georgeea3c80a2019-02-21 15:18:59 +11001109
Damien Georgef2040bf2021-10-22 22:22:47 +11001110 self.freeze_children()
1111 self.freeze_raw_code(self.qstr_links, self.type_sig)
Damien Georgeea3c80a2019-02-21 15:18:59 +11001112
Damien George69661f32020-02-27 15:36:53 +11001113
Damien Georgef2040bf2021-10-22 22:22:47 +11001114class MPYSegment:
1115 META = 0
1116 QSTR = 1
1117 OBJ = 2
1118 CODE = 3
Damien George992a6e12019-03-01 14:03:10 +11001119
Damien Georgef2040bf2021-10-22 22:22:47 +11001120 def __init__(self, kind, name, start, end):
1121 self.kind = kind
1122 self.name = name
1123 self.start = start
1124 self.end = end
Damien George992a6e12019-03-01 14:03:10 +11001125
Damien George69661f32020-02-27 15:36:53 +11001126
Damien Georgef2040bf2021-10-22 22:22:47 +11001127class MPYReader:
1128 def __init__(self, filename, fileobj):
1129 self.filename = filename
1130 self.fileobj = fileobj
1131
1132 def tell(self):
1133 return self.fileobj.tell()
1134
1135 def read_byte(self):
1136 return bytes_cons(self.fileobj.read(1))[0]
1137
1138 def read_bytes(self, n):
1139 return bytes_cons(self.fileobj.read(n))
1140
1141 def read_uint(self):
1142 i = 0
1143 while True:
1144 b = self.read_byte()
1145 i = (i << 7) | (b & 0x7F)
1146 if b & 0x80 == 0:
1147 break
1148 return i
Damien George992a6e12019-03-01 14:03:10 +11001149
Damien George69661f32020-02-27 15:36:53 +11001150
Damien Georgef2040bf2021-10-22 22:22:47 +11001151def read_qstr(reader, segments):
1152 start_pos = reader.tell()
1153 ln = reader.read_uint()
Damien George5996eeb2019-02-25 23:15:51 +11001154 if ln & 1:
Damien Georgef2040bf2021-10-22 22:22:47 +11001155 # static qstr
Damien Georgee6479662022-04-08 14:04:21 +10001156 q = global_qstrs.get_by_index(ln >> 1)
1157 segments.append(MPYSegment(MPYSegment.META, q.str, start_pos, start_pos))
1158 return q
Damien George5996eeb2019-02-25 23:15:51 +11001159 ln >>= 1
Damien Georgef2040bf2021-10-22 22:22:47 +11001160 start_pos = reader.tell()
1161 data = str_cons(reader.read_bytes(ln), "utf8")
1162 reader.read_byte() # read and discard null terminator
1163 segments.append(MPYSegment(MPYSegment.QSTR, data, start_pos, reader.tell()))
Damien Georgee6479662022-04-08 14:04:21 +10001164 return global_qstrs.add(data)
Damien George0699c6b2016-01-31 21:45:22 +00001165
Damien George69661f32020-02-27 15:36:53 +11001166
Damien Georgef2040bf2021-10-22 22:22:47 +11001167def read_obj(reader, segments):
Damien George42d0bd22022-04-07 22:18:37 +10001168 obj_type = reader.read_byte()
1169 if obj_type == MP_PERSISTENT_OBJ_FUN_TABLE:
Damien Georgef2040bf2021-10-22 22:22:47 +11001170 return MPFunTable()
Damien George2a075cc2022-03-31 15:26:14 +11001171 elif obj_type == MP_PERSISTENT_OBJ_NONE:
1172 return None
1173 elif obj_type == MP_PERSISTENT_OBJ_FALSE:
1174 return False
1175 elif obj_type == MP_PERSISTENT_OBJ_TRUE:
1176 return True
Damien George42d0bd22022-04-07 22:18:37 +10001177 elif obj_type == MP_PERSISTENT_OBJ_ELLIPSIS:
Damien George0699c6b2016-01-31 21:45:22 +00001178 return Ellipsis
Damien George2a075cc2022-03-31 15:26:14 +11001179 elif obj_type == MP_PERSISTENT_OBJ_TUPLE:
1180 ln = reader.read_uint()
1181 return tuple(read_obj(reader, segments) for _ in range(ln))
Damien George0699c6b2016-01-31 21:45:22 +00001182 else:
Damien Georgef2040bf2021-10-22 22:22:47 +11001183 ln = reader.read_uint()
1184 start_pos = reader.tell()
1185 buf = reader.read_bytes(ln)
Damien George42d0bd22022-04-07 22:18:37 +10001186 if obj_type in (MP_PERSISTENT_OBJ_STR, MP_PERSISTENT_OBJ_BYTES):
Damien Georgef2040bf2021-10-22 22:22:47 +11001187 reader.read_byte() # read and discard null terminator
Damien George42d0bd22022-04-07 22:18:37 +10001188 if obj_type == MP_PERSISTENT_OBJ_STR:
Damien Georgef2040bf2021-10-22 22:22:47 +11001189 obj = str_cons(buf, "utf8")
Damien George42d0bd22022-04-07 22:18:37 +10001190 elif obj_type == MP_PERSISTENT_OBJ_BYTES:
Damien Georgef2040bf2021-10-22 22:22:47 +11001191 obj = buf
Damien George42d0bd22022-04-07 22:18:37 +10001192 elif obj_type == MP_PERSISTENT_OBJ_INT:
Damien Georgef2040bf2021-10-22 22:22:47 +11001193 obj = int(str_cons(buf, "ascii"), 10)
Damien George42d0bd22022-04-07 22:18:37 +10001194 elif obj_type == MP_PERSISTENT_OBJ_FLOAT:
Damien Georgef2040bf2021-10-22 22:22:47 +11001195 obj = float(str_cons(buf, "ascii"))
Damien George42d0bd22022-04-07 22:18:37 +10001196 elif obj_type == MP_PERSISTENT_OBJ_COMPLEX:
Damien Georgef2040bf2021-10-22 22:22:47 +11001197 obj = complex(str_cons(buf, "ascii"))
Damien George0699c6b2016-01-31 21:45:22 +00001198 else:
Damien Georgef2040bf2021-10-22 22:22:47 +11001199 raise MPYReadError(reader.filename, "corrupt .mpy file")
1200 segments.append(MPYSegment(MPYSegment.OBJ, obj, start_pos, reader.tell()))
1201 return obj
Damien George0699c6b2016-01-31 21:45:22 +00001202
Damien George69661f32020-02-27 15:36:53 +11001203
Damien Georgef2040bf2021-10-22 22:22:47 +11001204def read_raw_code(reader, cm_escaped_name, qstr_table, obj_table, segments):
1205 # Read raw code header.
1206 kind_len = reader.read_uint()
Damien Georgeea3c80a2019-02-21 15:18:59 +11001207 kind = (kind_len & 3) + MP_CODE_BYTECODE
Damien Georgef2040bf2021-10-22 22:22:47 +11001208 has_children = (kind_len >> 2) & 1
1209 fun_data_len = kind_len >> 3
1210
1211 # Read the body of the raw code.
1212 file_offset = reader.tell()
1213 fun_data = reader.read_bytes(fun_data_len)
1214 segments_len = len(segments)
Damien Georgeea3c80a2019-02-21 15:18:59 +11001215
1216 if kind == MP_CODE_BYTECODE:
Damien Georgef2040bf2021-10-22 22:22:47 +11001217 # Create bytecode raw code.
1218 rc = RawCodeBytecode(cm_escaped_name, qstr_table, obj_table, fun_data)
Damien Georgeea3c80a2019-02-21 15:18:59 +11001219 else:
Damien Georgef2040bf2021-10-22 22:22:47 +11001220 # Create native raw code.
Damien Georgeea3c80a2019-02-21 15:18:59 +11001221 qstr_links = []
1222 if kind in (MP_CODE_NATIVE_PY, MP_CODE_NATIVE_VIPER):
Damien Georgef2040bf2021-10-22 22:22:47 +11001223 # Read qstr link table.
1224 n_qstr_link = reader.read_uint()
Damien Georgeea3c80a2019-02-21 15:18:59 +11001225 for _ in range(n_qstr_link):
Damien Georgef2040bf2021-10-22 22:22:47 +11001226 off = reader.read_uint()
1227 qst = read_qstr(reader, segments)
Damien Georgeea3c80a2019-02-21 15:18:59 +11001228 qstr_links.append((off >> 2, off & 3, qst))
1229
Damien Georgef2040bf2021-10-22 22:22:47 +11001230 native_scope_flags = 0
1231 native_n_pos_args = 0
1232 native_type_sig = 0
Damien Georgeea3c80a2019-02-21 15:18:59 +11001233 if kind == MP_CODE_NATIVE_PY:
Damien Georgef2040bf2021-10-22 22:22:47 +11001234 prelude_offset = reader.read_uint()
Damien Georgeea3c80a2019-02-21 15:18:59 +11001235 else:
Damien Georgef2040bf2021-10-22 22:22:47 +11001236 prelude_offset = 0
1237 native_scope_flags = reader.read_uint()
1238 if kind == MP_CODE_NATIVE_VIPER:
1239 # Read any additional sections for native viper.
1240 if native_scope_flags & MP_SCOPE_FLAG_VIPERRODATA:
1241 rodata_size = reader.read_uint()
1242 if native_scope_flags & MP_SCOPE_FLAG_VIPERBSS:
1243 bss_size = reader.read_uint()
1244 if native_scope_flags & MP_SCOPE_FLAG_VIPERRODATA:
1245 reader.read_bytes(rodata_size)
1246 if native_scope_flags & MP_SCOPE_FLAG_VIPERRELOC:
1247 while True:
1248 op = reader.read_byte()
1249 if op == 0xFF:
1250 break
1251 if op & 1:
1252 addr = reader.read_uint()
1253 op >>= 1
1254 if op <= 5 and op & 1:
1255 n = reader.read_uint()
1256 else:
1257 assert kind == MP_CODE_NATIVE_ASM
1258 native_n_pos_args = reader.read_uint()
1259 native_type_sig = reader.read_uint()
Damien Georgeea3c80a2019-02-21 15:18:59 +11001260
Damien Georgef2040bf2021-10-22 22:22:47 +11001261 rc = RawCodeNative(
1262 cm_escaped_name,
1263 qstr_table,
Damien George69661f32020-02-27 15:36:53 +11001264 kind,
Damien Georgef2040bf2021-10-22 22:22:47 +11001265 fun_data,
Damien George69661f32020-02-27 15:36:53 +11001266 prelude_offset,
Damien George69661f32020-02-27 15:36:53 +11001267 qstr_links,
Damien Georgef2040bf2021-10-22 22:22:47 +11001268 native_scope_flags,
1269 native_n_pos_args,
1270 native_type_sig,
Damien George69661f32020-02-27 15:36:53 +11001271 )
1272
Damien Georgef2040bf2021-10-22 22:22:47 +11001273 # Add a segment for the raw code data.
1274 segments.insert(
1275 segments_len,
1276 MPYSegment(MPYSegment.CODE, rc.simple_name.str, file_offset, file_offset + fun_data_len),
1277 )
1278
1279 # Read children, if there are any.
1280 rc.children = []
1281 if has_children:
1282 n_children = reader.read_uint()
1283 for _ in range(n_children):
1284 rc.children.append(
1285 read_raw_code(reader, cm_escaped_name, qstr_table, obj_table, segments)
1286 )
1287
1288 return rc
1289
Damien George0699c6b2016-01-31 21:45:22 +00001290
1291def read_mpy(filename):
Damien Georgef2040bf2021-10-22 22:22:47 +11001292 with open(filename, "rb") as fileobj:
1293 reader = MPYReader(filename, fileobj)
1294 segments = []
1295
1296 # Read and verify the header.
1297 header = reader.read_bytes(4)
Damien George69661f32020-02-27 15:36:53 +11001298 if header[0] != ord("M"):
Damien Georgef2040bf2021-10-22 22:22:47 +11001299 raise MPYReadError(filename, "not a valid .mpy file")
Damien George6a110482017-02-17 00:19:34 +11001300 if header[1] != config.MPY_VERSION:
Damien Georgef2040bf2021-10-22 22:22:47 +11001301 raise MPYReadError(filename, "incompatible .mpy version")
Damien George5996eeb2019-02-25 23:15:51 +11001302 feature_byte = header[2]
Damien George5996eeb2019-02-25 23:15:51 +11001303 config.MICROPY_PY_BUILTINS_STR_UNICODE = (feature_byte & 2) != 0
Damien Georgefaf3d3e2019-06-04 22:13:32 +10001304 mpy_native_arch = feature_byte >> 2
1305 if mpy_native_arch != MP_NATIVE_ARCH_NONE:
1306 if config.native_arch == MP_NATIVE_ARCH_NONE:
1307 config.native_arch = mpy_native_arch
1308 elif config.native_arch != mpy_native_arch:
Damien Georgef2040bf2021-10-22 22:22:47 +11001309 raise MPYReadError(filename, "native architecture mismatch")
Damien George0699c6b2016-01-31 21:45:22 +00001310 config.mp_small_int_bits = header[3]
Damien Georgef2040bf2021-10-22 22:22:47 +11001311
1312 # Read number of qstrs, and number of objects.
1313 n_qstr = reader.read_uint()
1314 n_obj = reader.read_uint()
1315
1316 # Read qstrs and construct qstr table.
1317 qstr_table = []
1318 for i in range(n_qstr):
Damien Georgee6479662022-04-08 14:04:21 +10001319 qstr_table.append(read_qstr(reader, segments))
Damien Georgef2040bf2021-10-22 22:22:47 +11001320
1321 # Read objects and construct object table.
1322 obj_table = []
1323 for i in range(n_obj):
1324 obj_table.append(read_obj(reader, segments))
1325
1326 # Compute the compiled-module escaped name.
1327 cm_escaped_name = qstr_table[0].str.replace("/", "_")[:-3]
1328
1329 # Read the outer raw code, which will in turn read all its children.
1330 raw_code_file_offset = reader.tell()
1331 raw_code = read_raw_code(reader, cm_escaped_name, qstr_table, obj_table, segments)
1332
1333 # Create the outer-level compiled module representing the whole .mpy file.
1334 return CompiledModule(
1335 filename,
1336 segments,
1337 header,
1338 qstr_table,
1339 obj_table,
1340 raw_code,
1341 raw_code_file_offset,
1342 cm_escaped_name,
1343 )
Damien George0699c6b2016-01-31 21:45:22 +00001344
Damien George69661f32020-02-27 15:36:53 +11001345
Damien Georgef2040bf2021-10-22 22:22:47 +11001346def hexdump_mpy(compiled_modules):
1347 for cm in compiled_modules:
1348 cm.hexdump()
Damien George0699c6b2016-01-31 21:45:22 +00001349
Damien George69661f32020-02-27 15:36:53 +11001350
Damien Georgef2040bf2021-10-22 22:22:47 +11001351def disassemble_mpy(compiled_modules):
1352 for cm in compiled_modules:
1353 cm.disassemble()
1354
1355
1356def freeze_mpy(base_qstrs, compiled_modules):
Damien George0699c6b2016-01-31 21:45:22 +00001357 # add to qstrs
1358 new = {}
Damien Georgee6479662022-04-08 14:04:21 +10001359 for q in global_qstrs.qstrs:
Damien George0699c6b2016-01-31 21:45:22 +00001360 # don't add duplicates
Damien George4f0931b2019-03-01 14:33:03 +11001361 if q is None or q.qstr_esc in base_qstrs or q.qstr_esc in new:
Damien George0699c6b2016-01-31 21:45:22 +00001362 continue
Artyom Skrobov18b1ba02021-05-03 14:17:36 -04001363 new[q.qstr_esc] = (len(new), q.qstr_esc, q.str, bytes_cons(q.str, "utf8"))
Damien George0699c6b2016-01-31 21:45:22 +00001364 new = sorted(new.values(), key=lambda x: x[0])
1365
1366 print('#include "py/mpconfig.h"')
1367 print('#include "py/objint.h"')
1368 print('#include "py/objstr.h"')
1369 print('#include "py/emitglue.h"')
Damien George360d9722019-10-07 11:56:24 +11001370 print('#include "py/nativeglue.h"')
Damien George0699c6b2016-01-31 21:45:22 +00001371 print()
1372
Damien George69661f32020-02-27 15:36:53 +11001373 print("#if MICROPY_LONGINT_IMPL != %u" % config.MICROPY_LONGINT_IMPL)
Damien George99b47192016-05-16 23:13:30 +01001374 print('#error "incompatible MICROPY_LONGINT_IMPL"')
Damien George69661f32020-02-27 15:36:53 +11001375 print("#endif")
Damien George99b47192016-05-16 23:13:30 +01001376 print()
1377
1378 if config.MICROPY_LONGINT_IMPL == config.MICROPY_LONGINT_IMPL_MPZ:
Damien George69661f32020-02-27 15:36:53 +11001379 print("#if MPZ_DIG_SIZE != %u" % config.MPZ_DIG_SIZE)
Damien George99b47192016-05-16 23:13:30 +01001380 print('#error "incompatible MPZ_DIG_SIZE"')
Damien George69661f32020-02-27 15:36:53 +11001381 print("#endif")
Damien George99b47192016-05-16 23:13:30 +01001382 print()
1383
Damien George69661f32020-02-27 15:36:53 +11001384 print("#if MICROPY_PY_BUILTINS_FLOAT")
1385 print("typedef struct _mp_obj_float_t {")
1386 print(" mp_obj_base_t base;")
1387 print(" mp_float_t value;")
1388 print("} mp_obj_float_t;")
1389 print("#endif")
Damien George0699c6b2016-01-31 21:45:22 +00001390 print()
1391
Damien George69661f32020-02-27 15:36:53 +11001392 print("#if MICROPY_PY_BUILTINS_COMPLEX")
1393 print("typedef struct _mp_obj_complex_t {")
1394 print(" mp_obj_base_t base;")
1395 print(" mp_float_t real;")
1396 print(" mp_float_t imag;")
1397 print("} mp_obj_complex_t;")
1398 print("#endif")
Damien Georgec51c8832016-09-03 00:19:02 +10001399 print()
1400
Dave Hylands39eef272018-12-11 14:55:26 -08001401 if len(new) > 0:
Damien George69661f32020-02-27 15:36:53 +11001402 print("enum {")
Dave Hylands39eef272018-12-11 14:55:26 -08001403 for i in range(len(new)):
1404 if i == 0:
Damien George69661f32020-02-27 15:36:53 +11001405 print(" MP_QSTR_%s = MP_QSTRnumber_of," % new[i][1])
Dave Hylands39eef272018-12-11 14:55:26 -08001406 else:
Damien George69661f32020-02-27 15:36:53 +11001407 print(" MP_QSTR_%s," % new[i][1])
1408 print("};")
Damien George0699c6b2016-01-31 21:45:22 +00001409
Rich Barlow6e5a40c2018-07-19 12:42:26 +01001410 # As in qstr.c, set so that the first dynamically allocated pool is twice this size; must be <= the len
1411 qstr_pool_alloc = min(len(new), 10)
1412
Damien Georgef2040bf2021-10-22 22:22:47 +11001413 global bc_content, const_str_content, const_int_content, const_obj_content, const_table_qstr_content, const_table_ptr_content, raw_code_count, raw_code_content
1414 qstr_content = 0
1415 bc_content = 0
1416 const_str_content = 0
1417 const_int_content = 0
1418 const_obj_content = 0
1419 const_table_qstr_content = 0
1420 const_table_ptr_content = 0
1421 raw_code_count = 0
1422 raw_code_content = 0
1423
Damien George0699c6b2016-01-31 21:45:22 +00001424 print()
Artyom Skrobov18b1ba02021-05-03 14:17:36 -04001425 print("const qstr_hash_t mp_qstr_frozen_const_hashes[] = {")
1426 qstr_size = {"metadata": 0, "data": 0}
1427 for _, _, _, qbytes in new:
1428 qhash = qstrutil.compute_hash(qbytes, config.MICROPY_QSTR_BYTES_IN_HASH)
1429 print(" %d," % qhash)
1430 print("};")
1431 print()
1432 print("const qstr_len_t mp_qstr_frozen_const_lengths[] = {")
1433 for _, _, _, qbytes in new:
1434 print(" %d," % len(qbytes))
1435 qstr_size["metadata"] += (
1436 config.MICROPY_QSTR_BYTES_IN_LEN + config.MICROPY_QSTR_BYTES_IN_HASH
1437 )
1438 qstr_size["data"] += len(qbytes)
1439 print("};")
1440 print()
Damien George69661f32020-02-27 15:36:53 +11001441 print("extern const qstr_pool_t mp_qstr_const_pool;")
1442 print("const qstr_pool_t mp_qstr_frozen_const_pool = {")
Artyom Skrobovf46a7142021-05-04 03:35:45 -04001443 print(" &mp_qstr_const_pool, // previous pool")
Damien George69661f32020-02-27 15:36:53 +11001444 print(" MP_QSTRnumber_of, // previous pool size")
1445 print(" %u, // allocated entries" % qstr_pool_alloc)
1446 print(" %u, // used entries" % len(new))
Artyom Skrobov18b1ba02021-05-03 14:17:36 -04001447 print(" (qstr_hash_t *)mp_qstr_frozen_const_hashes,")
1448 print(" (qstr_len_t *)mp_qstr_frozen_const_lengths,")
Damien George69661f32020-02-27 15:36:53 +11001449 print(" {")
Artyom Skrobov18b1ba02021-05-03 14:17:36 -04001450 for _, _, qstr, qbytes in new:
1451 print(' "%s",' % qstrutil.escape_bytes(qstr, qbytes))
Damien Georgef2040bf2021-10-22 22:22:47 +11001452 qstr_content += (
1453 config.MICROPY_QSTR_BYTES_IN_LEN + config.MICROPY_QSTR_BYTES_IN_HASH + len(qbytes) + 1
1454 )
Damien George69661f32020-02-27 15:36:53 +11001455 print(" },")
1456 print("};")
Damien George0699c6b2016-01-31 21:45:22 +00001457
Damien Georgef2040bf2021-10-22 22:22:47 +11001458 # Freeze all modules.
1459 for idx, cm in enumerate(compiled_modules):
1460 cm.freeze(idx)
Damien George0699c6b2016-01-31 21:45:22 +00001461
Damien Georgef2040bf2021-10-22 22:22:47 +11001462 # Print separator, separating individual modules from global data structures.
1463 print()
1464 print("/" * 80)
1465 print("// collection of all frozen modules")
1466
1467 # Define the string of frozen module names.
Damien George0699c6b2016-01-31 21:45:22 +00001468 print()
Jim Mussarede0bf4612021-12-11 22:40:21 +11001469 print("const char mp_frozen_names[] = {")
Damien Georgef2040bf2021-10-22 22:22:47 +11001470 print(" #ifdef MP_FROZEN_STR_NAMES")
Jim Mussarede0bf4612021-12-11 22:40:21 +11001471 # makemanifest.py might also include some frozen string content.
Damien Georgef2040bf2021-10-22 22:22:47 +11001472 print(" MP_FROZEN_STR_NAMES")
1473 print(" #endif")
1474 mp_frozen_mpy_names_content = 1
1475 for cm in compiled_modules:
1476 module_name = cm.source_file.str
1477 print(' "%s\\0"' % module_name)
1478 mp_frozen_mpy_names_content += len(cm.source_file.str) + 1
1479 print(' "\\0"')
Damien George69661f32020-02-27 15:36:53 +11001480 print("};")
1481
Damien Georgef2040bf2021-10-22 22:22:47 +11001482 # Define the array of pointers to frozen module content.
1483 print()
1484 print("const mp_frozen_module_t *const mp_frozen_mpy_content[] = {")
1485 for cm in compiled_modules:
1486 print(" &frozen_module_%s," % cm.escaped_name)
1487 print("};")
1488 mp_frozen_mpy_content_size = len(compiled_modules * 4)
1489
Damien Georgefe16e782021-01-16 02:01:26 +11001490 # If a port defines MICROPY_FROZEN_LIST_ITEM then list all modules wrapped in that macro.
Damien Georgef2040bf2021-10-22 22:22:47 +11001491 print()
Damien Georgefe16e782021-01-16 02:01:26 +11001492 print("#ifdef MICROPY_FROZEN_LIST_ITEM")
Damien Georgef2040bf2021-10-22 22:22:47 +11001493 for cm in compiled_modules:
1494 module_name = cm.source_file.str
Damien Georgefe16e782021-01-16 02:01:26 +11001495 if module_name.endswith("/__init__.py"):
1496 short_name = module_name[: -len("/__init__.py")]
1497 else:
1498 short_name = module_name[: -len(".py")]
1499 print('MICROPY_FROZEN_LIST_ITEM("%s", "%s")' % (short_name, module_name))
1500 print("#endif")
1501
Damien Georgef2040bf2021-10-22 22:22:47 +11001502 print()
1503 print("/*")
1504 print("byte sizes:")
1505 print("qstr content: %d unique, %d bytes" % (len(new), qstr_content))
1506 print("bc content: %d" % bc_content)
1507 print("const str content: %d" % const_str_content)
1508 print("const int content: %d" % const_int_content)
1509 print("const obj content: %d" % const_obj_content)
1510 print(
1511 "const table qstr content: %d entries, %d bytes"
1512 % (const_table_qstr_content, const_table_qstr_content * 4)
1513 )
1514 print(
1515 "const table ptr content: %d entries, %d bytes"
1516 % (const_table_ptr_content, const_table_ptr_content * 4)
1517 )
1518 print("raw code content: %d * 4 = %d" % (raw_code_count, raw_code_content))
1519 print("mp_frozen_mpy_names_content: %d" % mp_frozen_mpy_names_content)
1520 print("mp_frozen_mpy_content_size: %d" % mp_frozen_mpy_content_size)
1521 print(
1522 "total: %d"
1523 % (
1524 qstr_content
1525 + bc_content
1526 + const_str_content
1527 + const_int_content
1528 + const_obj_content
1529 + const_table_qstr_content * 4
1530 + const_table_ptr_content * 4
1531 + raw_code_content
1532 + mp_frozen_mpy_names_content
1533 + mp_frozen_mpy_content_size
1534 )
1535 )
1536 print("*/")
1537
Damien George0699c6b2016-01-31 21:45:22 +00001538
Damien George27879842019-10-09 14:23:15 +11001539def merge_mpy(raw_codes, output_file):
Damien Georgef2040bf2021-10-22 22:22:47 +11001540 assert len(raw_codes) <= 2 # so var-uints all fit in 1 byte
Damien George27879842019-10-09 14:23:15 +11001541 merged_mpy = bytearray()
1542
1543 if len(raw_codes) == 1:
Damien George69661f32020-02-27 15:36:53 +11001544 with open(raw_codes[0].mpy_source_file, "rb") as f:
Damien George27879842019-10-09 14:23:15 +11001545 merged_mpy.extend(f.read())
1546 else:
Damien Georgef2040bf2021-10-22 22:22:47 +11001547 main_rc = None
1548 for rc in raw_codes:
1549 if len(rc.qstr_table) > 1 or len(rc.obj_table) > 0:
1550 # Must use qstr_table and obj_table from this raw_code
1551 if main_rc is not None:
1552 raise Exception(
1553 "can't merge files when more than one has a populated qstr or obj table"
1554 )
1555 main_rc = rc
1556 if main_rc is None:
1557 main_rc = raw_codes[0]
1558
1559 header = bytearray(4)
Damien George69661f32020-02-27 15:36:53 +11001560 header[0] = ord("M")
Damien George27879842019-10-09 14:23:15 +11001561 header[1] = config.MPY_VERSION
Jim Mussaredb326edf2021-09-06 12:28:06 +10001562 header[2] = config.native_arch << 2 | config.MICROPY_PY_BUILTINS_STR_UNICODE << 1
Damien George27879842019-10-09 14:23:15 +11001563 header[3] = config.mp_small_int_bits
Damien George27879842019-10-09 14:23:15 +11001564 merged_mpy.extend(header)
1565
Damien Georgef2040bf2021-10-22 22:22:47 +11001566 # Copy n_qstr, n_obj, qstr_table, obj_table from main_rc.
1567 with open(main_rc.mpy_source_file, "rb") as f:
1568 data = f.read(main_rc.raw_code_file_offset)
1569 merged_mpy.extend(data[4:])
1570
Damien George27879842019-10-09 14:23:15 +11001571 bytecode = bytearray()
Damien Georgef2040bf2021-10-22 22:22:47 +11001572 bytecode_len = 3 + len(raw_codes) * 5 + 2
1573 bytecode.append(bytecode_len << 3 | 1 << 2) # kind, has_children and length
Damien George69661f32020-02-27 15:36:53 +11001574 bytecode.append(0b00000000) # signature prelude
Damien Georgef2040bf2021-10-22 22:22:47 +11001575 bytecode.append(0b00000010) # size prelude; n_info=1
1576 bytecode.extend(b"\x00") # simple_name: qstr index 0 (will use source filename)
Damien George27879842019-10-09 14:23:15 +11001577 for idx in range(len(raw_codes)):
Damien George69661f32020-02-27 15:36:53 +11001578 bytecode.append(0x32) # MP_BC_MAKE_FUNCTION
1579 bytecode.append(idx) # index raw code
Damien George4f2fe342020-09-04 16:12:09 +10001580 bytecode.extend(b"\x34\x00\x59") # MP_BC_CALL_FUNCTION, 0 args, MP_BC_POP_TOP
Damien George69661f32020-02-27 15:36:53 +11001581 bytecode.extend(b"\x51\x63") # MP_BC_LOAD_NONE, MP_BC_RETURN_VALUE
Damien George27879842019-10-09 14:23:15 +11001582
Damien George27879842019-10-09 14:23:15 +11001583 merged_mpy.extend(bytecode)
1584
Damien Georgef2040bf2021-10-22 22:22:47 +11001585 merged_mpy.append(len(raw_codes)) # n_children
1586
Damien George27879842019-10-09 14:23:15 +11001587 for rc in raw_codes:
Damien George69661f32020-02-27 15:36:53 +11001588 with open(rc.mpy_source_file, "rb") as f:
Damien Georgef2040bf2021-10-22 22:22:47 +11001589 f.seek(rc.raw_code_file_offset)
Damien George69661f32020-02-27 15:36:53 +11001590 data = f.read() # read rest of mpy file
Damien George27879842019-10-09 14:23:15 +11001591 merged_mpy.extend(data)
1592
1593 if output_file is None:
1594 sys.stdout.buffer.write(merged_mpy)
1595 else:
Damien George69661f32020-02-27 15:36:53 +11001596 with open(output_file, "wb") as f:
Damien George27879842019-10-09 14:23:15 +11001597 f.write(merged_mpy)
1598
Damien George69661f32020-02-27 15:36:53 +11001599
Damien George0699c6b2016-01-31 21:45:22 +00001600def main():
Damien Georgee6479662022-04-08 14:04:21 +10001601 global global_qstrs
1602
Damien George0699c6b2016-01-31 21:45:22 +00001603 import argparse
Damien George69661f32020-02-27 15:36:53 +11001604
1605 cmd_parser = argparse.ArgumentParser(description="A tool to work with MicroPython .mpy files.")
Damien Georgef2040bf2021-10-22 22:22:47 +11001606 cmd_parser.add_argument(
1607 "-x", "--hexdump", action="store_true", help="output an annotated hex dump of files"
1608 )
1609 cmd_parser.add_argument(
1610 "-d", "--disassemble", action="store_true", help="output disassembled contents of files"
1611 )
Damien George69661f32020-02-27 15:36:53 +11001612 cmd_parser.add_argument("-f", "--freeze", action="store_true", help="freeze files")
1613 cmd_parser.add_argument(
1614 "--merge", action="store_true", help="merge multiple .mpy files into one"
1615 )
1616 cmd_parser.add_argument("-q", "--qstr-header", help="qstr header file to freeze against")
1617 cmd_parser.add_argument(
1618 "-mlongint-impl",
1619 choices=["none", "longlong", "mpz"],
1620 default="mpz",
1621 help="long-int implementation used by target (default mpz)",
1622 )
1623 cmd_parser.add_argument(
1624 "-mmpz-dig-size",
1625 metavar="N",
1626 type=int,
1627 default=16,
1628 help="mpz digit size used by target (default 16)",
1629 )
1630 cmd_parser.add_argument("-o", "--output", default=None, help="output file")
1631 cmd_parser.add_argument("files", nargs="+", help="input .mpy files")
Damien George0699c6b2016-01-31 21:45:22 +00001632 args = cmd_parser.parse_args()
1633
1634 # set config values relevant to target machine
1635 config.MICROPY_LONGINT_IMPL = {
Damien George69661f32020-02-27 15:36:53 +11001636 "none": config.MICROPY_LONGINT_IMPL_NONE,
1637 "longlong": config.MICROPY_LONGINT_IMPL_LONGLONG,
1638 "mpz": config.MICROPY_LONGINT_IMPL_MPZ,
Damien George0699c6b2016-01-31 21:45:22 +00001639 }[args.mlongint_impl]
1640 config.MPZ_DIG_SIZE = args.mmpz_dig_size
Damien Georgefaf3d3e2019-06-04 22:13:32 +10001641 config.native_arch = MP_NATIVE_ARCH_NONE
Damien George0699c6b2016-01-31 21:45:22 +00001642
Damien Georgeb4790af2016-09-02 15:09:21 +10001643 # set config values for qstrs, and get the existing base set of qstrs
Damien George0699c6b2016-01-31 21:45:22 +00001644 if args.qstr_header:
1645 qcfgs, base_qstrs = qstrutil.parse_input_headers([args.qstr_header])
Damien George69661f32020-02-27 15:36:53 +11001646 config.MICROPY_QSTR_BYTES_IN_LEN = int(qcfgs["BYTES_IN_LEN"])
1647 config.MICROPY_QSTR_BYTES_IN_HASH = int(qcfgs["BYTES_IN_HASH"])
Damien George0699c6b2016-01-31 21:45:22 +00001648 else:
Damien Georgeb4790af2016-09-02 15:09:21 +10001649 config.MICROPY_QSTR_BYTES_IN_LEN = 1
1650 config.MICROPY_QSTR_BYTES_IN_HASH = 1
Damien Georgef2040bf2021-10-22 22:22:47 +11001651 base_qstrs = list(qstrutil.static_qstr_list)
Damien George0699c6b2016-01-31 21:45:22 +00001652
Damien Georgee6479662022-04-08 14:04:21 +10001653 # Create initial list of global qstrs.
1654 global_qstrs = GlobalQStrList()
1655
Damien Georgef2040bf2021-10-22 22:22:47 +11001656 # Load all .mpy files.
1657 try:
1658 compiled_modules = [read_mpy(file) for file in args.files]
1659 except MPYReadError as er:
1660 print(er, file=sys.stderr)
1661 sys.exit(1)
Damien George0699c6b2016-01-31 21:45:22 +00001662
Damien Georgef2040bf2021-10-22 22:22:47 +11001663 if args.hexdump:
1664 hexdump_mpy(compiled_modules)
1665
1666 if args.disassemble:
1667 if args.hexdump:
1668 print()
1669 disassemble_mpy(compiled_modules)
1670
1671 if args.freeze:
Damien George0699c6b2016-01-31 21:45:22 +00001672 try:
Damien Georgef2040bf2021-10-22 22:22:47 +11001673 freeze_mpy(base_qstrs, compiled_modules)
Damien George0699c6b2016-01-31 21:45:22 +00001674 except FreezeError as er:
1675 print(er, file=sys.stderr)
1676 sys.exit(1)
Damien Georgef2040bf2021-10-22 22:22:47 +11001677
1678 if args.merge:
1679 merge_mpy(compiled_modules, args.output)
Damien George0699c6b2016-01-31 21:45:22 +00001680
Damien George69661f32020-02-27 15:36:53 +11001681
1682if __name__ == "__main__":
Damien George0699c6b2016-01-31 21:45:22 +00001683 main()