blob: 3ebbdd11042ab106c780e170c81ab4fb572e4222 [file] [log] [blame]
Damien George0699c6b2016-01-31 21:45:22 +00001#!/usr/bin/env python3
2#
3# This file is part of the MicroPython project, http://micropython.org/
4#
5# The MIT License (MIT)
6#
Damien Georgefaf3d3e2019-06-04 22:13:32 +10007# Copyright (c) 2016-2019 Damien P. George
Damien George0699c6b2016-01-31 21:45:22 +00008#
9# Permission is hereby granted, free of charge, to any person obtaining a copy
10# of this software and associated documentation files (the "Software"), to deal
11# in the Software without restriction, including without limitation the rights
12# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
13# copies of the Software, and to permit persons to whom the Software is
14# furnished to do so, subject to the following conditions:
15#
16# The above copyright notice and this permission notice shall be included in
17# all copies or substantial portions of the Software.
18#
19# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
20# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
21# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
22# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
23# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
24# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
25# THE SOFTWARE.
26
Damien Georgec3beb162016-04-15 11:56:10 +010027# Python 2/3 compatibility code
28from __future__ import print_function
29import platform
Damien George69661f32020-02-27 15:36:53 +110030
31if platform.python_version_tuple()[0] == "2":
Damien Georgef2040bf2021-10-22 22:22:47 +110032 from binascii import hexlify as hexlify_py2
33
34 str_cons = lambda val, enc=None: str(val)
Damien Georgec3beb162016-04-15 11:56:10 +010035 bytes_cons = lambda val, enc=None: bytearray(val)
36 is_str_type = lambda o: type(o) is str
37 is_bytes_type = lambda o: type(o) is bytearray
38 is_int_type = lambda o: type(o) is int or type(o) is long
Damien Georgef2040bf2021-10-22 22:22:47 +110039
40 def hexlify_to_str(b):
41 x = hexlify_py2(b)
42 return ":".join(x[i : i + 2] for i in range(0, len(x), 2))
43
Damien Georgec3beb162016-04-15 11:56:10 +010044else:
Damien Georgef2040bf2021-10-22 22:22:47 +110045 from binascii import hexlify
46
Damien Georgec3beb162016-04-15 11:56:10 +010047 str_cons = str
48 bytes_cons = bytes
49 is_str_type = lambda o: type(o) is str
50 is_bytes_type = lambda o: type(o) is bytes
51 is_int_type = lambda o: type(o) is int
Damien Georgef2040bf2021-10-22 22:22:47 +110052
53 def hexlify_to_str(b):
54 return str(hexlify(b, ":"), "ascii")
55
56
Damien Georgec3beb162016-04-15 11:56:10 +010057# end compatibility code
58
Damien George0699c6b2016-01-31 21:45:22 +000059import sys
Damien George72ae3c72016-08-10 13:26:11 +100060import struct
Damien George0699c6b2016-01-31 21:45:22 +000061
Damien George69661f32020-02-27 15:36:53 +110062sys.path.append(sys.path[0] + "/../py")
Damien George0699c6b2016-01-31 21:45:22 +000063import makeqstrdata as qstrutil
64
Damien George07f52602022-04-08 14:09:08 +100065# Threshold of str length below which it will be turned into a qstr when freezing.
66# This helps to reduce frozen code size because qstrs are more efficient to encode
67# as objects than full mp_obj_str_t instances.
68PERSISTENT_STR_INTERN_THRESHOLD = 25
69
Damien George69661f32020-02-27 15:36:53 +110070
Damien Georgef2040bf2021-10-22 22:22:47 +110071class MPYReadError(Exception):
72 def __init__(self, filename, msg):
73 self.filename = filename
74 self.msg = msg
75
76 def __str__(self):
77 return "%s: %s" % (self.filename, self.msg)
78
79
Damien George0699c6b2016-01-31 21:45:22 +000080class FreezeError(Exception):
81 def __init__(self, rawcode, msg):
82 self.rawcode = rawcode
83 self.msg = msg
84
85 def __str__(self):
Damien George69661f32020-02-27 15:36:53 +110086 return "error while freezing %s: %s" % (self.rawcode.source_file, self.msg)
87
Damien George0699c6b2016-01-31 21:45:22 +000088
89class Config:
Damien Georgef2040bf2021-10-22 22:22:47 +110090 MPY_VERSION = 6
Damien George0699c6b2016-01-31 21:45:22 +000091 MICROPY_LONGINT_IMPL_NONE = 0
92 MICROPY_LONGINT_IMPL_LONGLONG = 1
93 MICROPY_LONGINT_IMPL_MPZ = 2
Damien George69661f32020-02-27 15:36:53 +110094
95
Damien George0699c6b2016-01-31 21:45:22 +000096config = Config()
97
Damien George69661f32020-02-27 15:36:53 +110098
Damien Georgeea3c80a2019-02-21 15:18:59 +110099MP_CODE_BYTECODE = 2
100MP_CODE_NATIVE_PY = 3
101MP_CODE_NATIVE_VIPER = 4
102MP_CODE_NATIVE_ASM = 5
103
104MP_NATIVE_ARCH_NONE = 0
105MP_NATIVE_ARCH_X86 = 1
106MP_NATIVE_ARCH_X64 = 2
107MP_NATIVE_ARCH_ARMV6 = 3
108MP_NATIVE_ARCH_ARMV6M = 4
109MP_NATIVE_ARCH_ARMV7M = 5
110MP_NATIVE_ARCH_ARMV7EM = 6
111MP_NATIVE_ARCH_ARMV7EMSP = 7
112MP_NATIVE_ARCH_ARMV7EMDP = 8
113MP_NATIVE_ARCH_XTENSA = 9
Damien George9adedce2019-09-13 13:15:12 +1000114MP_NATIVE_ARCH_XTENSAWIN = 10
Damien Georgeea3c80a2019-02-21 15:18:59 +1100115
Damien George42d0bd22022-04-07 22:18:37 +1000116MP_PERSISTENT_OBJ_FUN_TABLE = 0
Damien George2a075cc2022-03-31 15:26:14 +1100117MP_PERSISTENT_OBJ_NONE = 1
118MP_PERSISTENT_OBJ_FALSE = 2
119MP_PERSISTENT_OBJ_TRUE = 3
120MP_PERSISTENT_OBJ_ELLIPSIS = 4
121MP_PERSISTENT_OBJ_STR = 5
122MP_PERSISTENT_OBJ_BYTES = 6
123MP_PERSISTENT_OBJ_INT = 7
124MP_PERSISTENT_OBJ_FLOAT = 8
125MP_PERSISTENT_OBJ_COMPLEX = 9
126MP_PERSISTENT_OBJ_TUPLE = 10
Damien George42d0bd22022-04-07 22:18:37 +1000127
Damien Georgef2040bf2021-10-22 22:22:47 +1100128MP_SCOPE_FLAG_VIPERRELOC = 0x10
129MP_SCOPE_FLAG_VIPERRODATA = 0x20
130MP_SCOPE_FLAG_VIPERBSS = 0x40
131
Damien George69661f32020-02-27 15:36:53 +1100132MP_BC_MASK_EXTRA_BYTE = 0x9E
Damien George0699c6b2016-01-31 21:45:22 +0000133
Damien George1f7202d2019-09-02 21:35:26 +1000134MP_BC_FORMAT_BYTE = 0
135MP_BC_FORMAT_QSTR = 1
136MP_BC_FORMAT_VAR_UINT = 2
137MP_BC_FORMAT_OFFSET = 3
138
Damien Georgef2040bf2021-10-22 22:22:47 +1100139mp_unary_op_method_name = (
140 "__pos__",
141 "__neg__",
142 "__invert__",
143 "<not>",
144)
145
146mp_binary_op_method_name = (
147 "__lt__",
148 "__gt__",
149 "__eq__",
150 "__le__",
151 "__ge__",
152 "__ne__",
153 "<in>",
154 "<is>",
155 "<exception match>",
156 "__ior__",
157 "__ixor__",
158 "__iand__",
159 "__ilshift__",
160 "__irshift__",
161 "__iadd__",
162 "__isub__",
163 "__imul__",
164 "__imatmul__",
165 "__ifloordiv__",
166 "__itruediv__",
167 "__imod__",
168 "__ipow__",
169 "__or__",
170 "__xor__",
171 "__and__",
172 "__lshift__",
173 "__rshift__",
174 "__add__",
175 "__sub__",
176 "__mul__",
177 "__matmul__",
178 "__floordiv__",
179 "__truediv__",
180 "__mod__",
181 "__pow__",
182)
183
184
185class Opcodes:
186 # fmt: off
187 # Load, Store, Delete, Import, Make, Build, Unpack, Call, Jump, Exception, For, sTack, Return, Yield, Op
188 MP_BC_BASE_RESERVED = (0x00) # ----------------
189 MP_BC_BASE_QSTR_O = (0x10) # LLLLLLSSSDDII---
190 MP_BC_BASE_VINT_E = (0x20) # MMLLLLSSDDBBBBBB
191 MP_BC_BASE_VINT_O = (0x30) # UUMMCCCC--------
192 MP_BC_BASE_JUMP_E = (0x40) # J-JJJJJEEEEF----
193 MP_BC_BASE_BYTE_O = (0x50) # LLLLSSDTTTTTEEFF
194 MP_BC_BASE_BYTE_E = (0x60) # --BREEEYYI------
195 MP_BC_LOAD_CONST_SMALL_INT_MULTI = (0x70) # LLLLLLLLLLLLLLLL
196 # = (0x80) # LLLLLLLLLLLLLLLL
197 # = (0x90) # LLLLLLLLLLLLLLLL
198 # = (0xa0) # LLLLLLLLLLLLLLLL
199 MP_BC_LOAD_FAST_MULTI = (0xb0) # LLLLLLLLLLLLLLLL
200 MP_BC_STORE_FAST_MULTI = (0xc0) # SSSSSSSSSSSSSSSS
201 MP_BC_UNARY_OP_MULTI = (0xd0) # OOOOOOO
202 MP_BC_BINARY_OP_MULTI = (0xd7) # OOOOOOOOO
203 # = (0xe0) # OOOOOOOOOOOOOOOO
204 # = (0xf0) # OOOOOOOOOO------
205
206 MP_BC_LOAD_CONST_SMALL_INT_MULTI_NUM = 64
207 MP_BC_LOAD_CONST_SMALL_INT_MULTI_EXCESS = 16
208 MP_BC_LOAD_FAST_MULTI_NUM = 16
209 MP_BC_STORE_FAST_MULTI_NUM = 16
210 MP_BC_UNARY_OP_MULTI_NUM = 4 # MP_UNARY_OP_NUM_BYTECODE
211 MP_BC_BINARY_OP_MULTI_NUM = 35 # MP_BINARY_OP_NUM_BYTECODE
212
213 MP_BC_LOAD_CONST_FALSE = (MP_BC_BASE_BYTE_O + 0x00)
214 MP_BC_LOAD_CONST_NONE = (MP_BC_BASE_BYTE_O + 0x01)
215 MP_BC_LOAD_CONST_TRUE = (MP_BC_BASE_BYTE_O + 0x02)
216 MP_BC_LOAD_CONST_SMALL_INT = (MP_BC_BASE_VINT_E + 0x02) # signed var-int
217 MP_BC_LOAD_CONST_STRING = (MP_BC_BASE_QSTR_O + 0x00) # qstr
218 MP_BC_LOAD_CONST_OBJ = (MP_BC_BASE_VINT_E + 0x03) # ptr
219 MP_BC_LOAD_NULL = (MP_BC_BASE_BYTE_O + 0x03)
220
221 MP_BC_LOAD_FAST_N = (MP_BC_BASE_VINT_E + 0x04) # uint
222 MP_BC_LOAD_DEREF = (MP_BC_BASE_VINT_E + 0x05) # uint
223 MP_BC_LOAD_NAME = (MP_BC_BASE_QSTR_O + 0x01) # qstr
224 MP_BC_LOAD_GLOBAL = (MP_BC_BASE_QSTR_O + 0x02) # qstr
225 MP_BC_LOAD_ATTR = (MP_BC_BASE_QSTR_O + 0x03) # qstr
226 MP_BC_LOAD_METHOD = (MP_BC_BASE_QSTR_O + 0x04) # qstr
227 MP_BC_LOAD_SUPER_METHOD = (MP_BC_BASE_QSTR_O + 0x05) # qstr
228 MP_BC_LOAD_BUILD_CLASS = (MP_BC_BASE_BYTE_O + 0x04)
229 MP_BC_LOAD_SUBSCR = (MP_BC_BASE_BYTE_O + 0x05)
230
231 MP_BC_STORE_FAST_N = (MP_BC_BASE_VINT_E + 0x06) # uint
232 MP_BC_STORE_DEREF = (MP_BC_BASE_VINT_E + 0x07) # uint
233 MP_BC_STORE_NAME = (MP_BC_BASE_QSTR_O + 0x06) # qstr
234 MP_BC_STORE_GLOBAL = (MP_BC_BASE_QSTR_O + 0x07) # qstr
235 MP_BC_STORE_ATTR = (MP_BC_BASE_QSTR_O + 0x08) # qstr
236 MP_BC_STORE_SUBSCR = (MP_BC_BASE_BYTE_O + 0x06)
237
238 MP_BC_DELETE_FAST = (MP_BC_BASE_VINT_E + 0x08) # uint
239 MP_BC_DELETE_DEREF = (MP_BC_BASE_VINT_E + 0x09) # uint
240 MP_BC_DELETE_NAME = (MP_BC_BASE_QSTR_O + 0x09) # qstr
241 MP_BC_DELETE_GLOBAL = (MP_BC_BASE_QSTR_O + 0x0a) # qstr
242
243 MP_BC_DUP_TOP = (MP_BC_BASE_BYTE_O + 0x07)
244 MP_BC_DUP_TOP_TWO = (MP_BC_BASE_BYTE_O + 0x08)
245 MP_BC_POP_TOP = (MP_BC_BASE_BYTE_O + 0x09)
246 MP_BC_ROT_TWO = (MP_BC_BASE_BYTE_O + 0x0a)
247 MP_BC_ROT_THREE = (MP_BC_BASE_BYTE_O + 0x0b)
248
Damien George538c3c02022-03-16 09:37:58 +1100249 MP_BC_UNWIND_JUMP = (MP_BC_BASE_JUMP_E + 0x00) # signed relative bytecode offset; then a byte
250 MP_BC_JUMP = (MP_BC_BASE_JUMP_E + 0x02) # signed relative bytecode offset
251 MP_BC_POP_JUMP_IF_TRUE = (MP_BC_BASE_JUMP_E + 0x03) # signed relative bytecode offset
252 MP_BC_POP_JUMP_IF_FALSE = (MP_BC_BASE_JUMP_E + 0x04) # signed relative bytecode offset
Damien George6d11c692022-03-21 16:36:13 +1100253 MP_BC_JUMP_IF_TRUE_OR_POP = (MP_BC_BASE_JUMP_E + 0x05) # unsigned relative bytecode offset
254 MP_BC_JUMP_IF_FALSE_OR_POP = (MP_BC_BASE_JUMP_E + 0x06) # unsigned relative bytecode offset
Damien George538c3c02022-03-16 09:37:58 +1100255 MP_BC_SETUP_WITH = (MP_BC_BASE_JUMP_E + 0x07) # unsigned relative bytecode offset
256 MP_BC_SETUP_EXCEPT = (MP_BC_BASE_JUMP_E + 0x08) # unsigned relative bytecode offset
257 MP_BC_SETUP_FINALLY = (MP_BC_BASE_JUMP_E + 0x09) # unsigned relative bytecode offset
258 MP_BC_POP_EXCEPT_JUMP = (MP_BC_BASE_JUMP_E + 0x0a) # unsigned relative bytecode offset
259 MP_BC_FOR_ITER = (MP_BC_BASE_JUMP_E + 0x0b) # unsigned relative bytecode offset
Damien Georgef2040bf2021-10-22 22:22:47 +1100260 MP_BC_WITH_CLEANUP = (MP_BC_BASE_BYTE_O + 0x0c)
261 MP_BC_END_FINALLY = (MP_BC_BASE_BYTE_O + 0x0d)
262 MP_BC_GET_ITER = (MP_BC_BASE_BYTE_O + 0x0e)
263 MP_BC_GET_ITER_STACK = (MP_BC_BASE_BYTE_O + 0x0f)
264
265 MP_BC_BUILD_TUPLE = (MP_BC_BASE_VINT_E + 0x0a) # uint
266 MP_BC_BUILD_LIST = (MP_BC_BASE_VINT_E + 0x0b) # uint
267 MP_BC_BUILD_MAP = (MP_BC_BASE_VINT_E + 0x0c) # uint
268 MP_BC_STORE_MAP = (MP_BC_BASE_BYTE_E + 0x02)
269 MP_BC_BUILD_SET = (MP_BC_BASE_VINT_E + 0x0d) # uint
270 MP_BC_BUILD_SLICE = (MP_BC_BASE_VINT_E + 0x0e) # uint
271 MP_BC_STORE_COMP = (MP_BC_BASE_VINT_E + 0x0f) # uint
272 MP_BC_UNPACK_SEQUENCE = (MP_BC_BASE_VINT_O + 0x00) # uint
273 MP_BC_UNPACK_EX = (MP_BC_BASE_VINT_O + 0x01) # uint
274
275 MP_BC_RETURN_VALUE = (MP_BC_BASE_BYTE_E + 0x03)
276 MP_BC_RAISE_LAST = (MP_BC_BASE_BYTE_E + 0x04)
277 MP_BC_RAISE_OBJ = (MP_BC_BASE_BYTE_E + 0x05)
278 MP_BC_RAISE_FROM = (MP_BC_BASE_BYTE_E + 0x06)
279 MP_BC_YIELD_VALUE = (MP_BC_BASE_BYTE_E + 0x07)
280 MP_BC_YIELD_FROM = (MP_BC_BASE_BYTE_E + 0x08)
281
282 MP_BC_MAKE_FUNCTION = (MP_BC_BASE_VINT_O + 0x02) # uint
283 MP_BC_MAKE_FUNCTION_DEFARGS = (MP_BC_BASE_VINT_O + 0x03) # uint
284 MP_BC_MAKE_CLOSURE = (MP_BC_BASE_VINT_E + 0x00) # uint; extra byte
285 MP_BC_MAKE_CLOSURE_DEFARGS = (MP_BC_BASE_VINT_E + 0x01) # uint; extra byte
286 MP_BC_CALL_FUNCTION = (MP_BC_BASE_VINT_O + 0x04) # uint
287 MP_BC_CALL_FUNCTION_VAR_KW = (MP_BC_BASE_VINT_O + 0x05) # uint
288 MP_BC_CALL_METHOD = (MP_BC_BASE_VINT_O + 0x06) # uint
289 MP_BC_CALL_METHOD_VAR_KW = (MP_BC_BASE_VINT_O + 0x07) # uint
290
291 MP_BC_IMPORT_NAME = (MP_BC_BASE_QSTR_O + 0x0b) # qstr
292 MP_BC_IMPORT_FROM = (MP_BC_BASE_QSTR_O + 0x0c) # qstr
293 MP_BC_IMPORT_STAR = (MP_BC_BASE_BYTE_E + 0x09)
294 # fmt: on
295
Damien George538c3c02022-03-16 09:37:58 +1100296 # Create sets of related opcodes.
297 ALL_OFFSET_SIGNED = (
298 MP_BC_UNWIND_JUMP,
299 MP_BC_JUMP,
300 MP_BC_POP_JUMP_IF_TRUE,
301 MP_BC_POP_JUMP_IF_FALSE,
Damien George538c3c02022-03-16 09:37:58 +1100302 )
303
Damien Georgef2040bf2021-10-22 22:22:47 +1100304 # Create a dict mapping opcode value to opcode name.
305 mapping = ["unknown" for _ in range(256)]
306 for op_name in list(locals()):
307 if op_name.startswith("MP_BC_"):
308 mapping[locals()[op_name]] = op_name[len("MP_BC_") :]
309 for i in range(MP_BC_LOAD_CONST_SMALL_INT_MULTI_NUM):
310 name = "LOAD_CONST_SMALL_INT %d" % (i - MP_BC_LOAD_CONST_SMALL_INT_MULTI_EXCESS)
311 mapping[MP_BC_LOAD_CONST_SMALL_INT_MULTI + i] = name
312 for i in range(MP_BC_LOAD_FAST_MULTI_NUM):
313 mapping[MP_BC_LOAD_FAST_MULTI + i] = "LOAD_FAST %d" % i
314 for i in range(MP_BC_STORE_FAST_MULTI_NUM):
315 mapping[MP_BC_STORE_FAST_MULTI + i] = "STORE_FAST %d" % i
316 for i in range(MP_BC_UNARY_OP_MULTI_NUM):
317 mapping[MP_BC_UNARY_OP_MULTI + i] = "UNARY_OP %d %s" % (i, mp_unary_op_method_name[i])
318 for i in range(MP_BC_BINARY_OP_MULTI_NUM):
319 mapping[MP_BC_BINARY_OP_MULTI + i] = "BINARY_OP %d %s" % (i, mp_binary_op_method_name[i])
320
Damien George0699c6b2016-01-31 21:45:22 +0000321
Damien George9c8a5632022-04-07 23:53:37 +1000322# This definition of a small int covers all possible targets, in the sense that every
323# target can encode as a small int, an integer that passes this test. The minimum is set
324# by MICROPY_OBJ_REPR_B on a 16-bit machine, where there are 14 bits for the small int.
325def mp_small_int_fits(i):
326 return -0x2000 <= i <= 0x1FFF
327
328
Damien George0699c6b2016-01-31 21:45:22 +0000329# this function mirrors that in py/bc.c
Damien George1f7202d2019-09-02 21:35:26 +1000330def mp_opcode_format(bytecode, ip, count_var_uint):
Damien George0699c6b2016-01-31 21:45:22 +0000331 opcode = bytecode[ip]
332 ip_start = ip
Damien George69661f32020-02-27 15:36:53 +1100333 f = (0x000003A4 >> (2 * ((opcode) >> 4))) & 3
Damien George1f7202d2019-09-02 21:35:26 +1000334 if f == MP_BC_FORMAT_QSTR:
Damien George0699c6b2016-01-31 21:45:22 +0000335 ip += 3
336 else:
Damien George1f7202d2019-09-02 21:35:26 +1000337 extra_byte = (opcode & MP_BC_MASK_EXTRA_BYTE) == 0
Damien George0699c6b2016-01-31 21:45:22 +0000338 ip += 1
Damien George1f7202d2019-09-02 21:35:26 +1000339 if f == MP_BC_FORMAT_VAR_UINT:
Damien George992a6e12019-03-01 14:03:10 +1100340 if count_var_uint:
341 while bytecode[ip] & 0x80 != 0:
342 ip += 1
Damien George0699c6b2016-01-31 21:45:22 +0000343 ip += 1
Damien George1f7202d2019-09-02 21:35:26 +1000344 elif f == MP_BC_FORMAT_OFFSET:
Damien George538c3c02022-03-16 09:37:58 +1100345 if bytecode[ip] & 0x80 == 0:
346 ip += 1
347 else:
348 ip += 2
Damien George0699c6b2016-01-31 21:45:22 +0000349 ip += extra_byte
350 return f, ip - ip_start
351
Damien George69661f32020-02-27 15:36:53 +1100352
Damien Georgef2040bf2021-10-22 22:22:47 +1100353def mp_opcode_decode(bytecode, ip):
354 opcode = bytecode[ip]
355 ip_start = ip
356 f = (0x000003A4 >> (2 * ((opcode) >> 4))) & 3
357 extra_byte = (opcode & MP_BC_MASK_EXTRA_BYTE) == 0
358 ip += 1
359 arg = 0
360 if f in (MP_BC_FORMAT_QSTR, MP_BC_FORMAT_VAR_UINT):
361 arg = bytecode[ip] & 0x7F
362 while bytecode[ip] & 0x80 != 0:
363 ip += 1
364 arg = arg << 7 | bytecode[ip] & 0x7F
365 ip += 1
366 elif f == MP_BC_FORMAT_OFFSET:
Damien George538c3c02022-03-16 09:37:58 +1100367 if bytecode[ip] & 0x80 == 0:
368 arg = bytecode[ip]
369 ip += 1
370 if opcode in Opcodes.ALL_OFFSET_SIGNED:
371 arg -= 0x40
372 else:
373 arg = bytecode[ip] & 0x7F | bytecode[ip + 1] << 7
374 ip += 2
375 if opcode in Opcodes.ALL_OFFSET_SIGNED:
376 arg -= 0x4000
Damien Georgef2040bf2021-10-22 22:22:47 +1100377 ip += extra_byte
378 return f, ip - ip_start, arg
379
380
Damien Georgeb5ebfad2019-09-16 22:12:59 +1000381def read_prelude_sig(read_byte):
382 z = read_byte()
383 # xSSSSEAA
Damien George69661f32020-02-27 15:36:53 +1100384 S = (z >> 3) & 0xF
Damien Georgeb5ebfad2019-09-16 22:12:59 +1000385 E = (z >> 2) & 0x1
386 F = 0
387 A = z & 0x3
388 K = 0
389 D = 0
390 n = 0
391 while z & 0x80:
392 z = read_byte()
393 # xFSSKAED
394 S |= (z & 0x30) << (2 * n)
395 E |= (z & 0x02) << n
396 F |= ((z & 0x40) >> 6) << n
397 A |= (z & 0x4) << n
398 K |= ((z & 0x08) >> 3) << n
399 D |= (z & 0x1) << n
400 n += 1
401 S += 1
402 return S, E, F, A, K, D
403
Damien George69661f32020-02-27 15:36:53 +1100404
Damien Georgec8c0fd42019-09-25 15:45:47 +1000405def read_prelude_size(read_byte):
406 I = 0
407 C = 0
408 n = 0
409 while True:
410 z = read_byte()
411 # xIIIIIIC
Damien George69661f32020-02-27 15:36:53 +1100412 I |= ((z & 0x7E) >> 1) << (6 * n)
Damien Georgec8c0fd42019-09-25 15:45:47 +1000413 C |= (z & 1) << n
414 if not (z & 0x80):
415 break
416 n += 1
417 return I, C
418
Damien George69661f32020-02-27 15:36:53 +1100419
Damien Georgeea3c80a2019-02-21 15:18:59 +1100420def extract_prelude(bytecode, ip):
Damien Georgeb5ebfad2019-09-16 22:12:59 +1000421 def local_read_byte():
422 b = bytecode[ip_ref[0]]
423 ip_ref[0] += 1
424 return b
Damien George69661f32020-02-27 15:36:53 +1100425
426 ip_ref = [ip] # to close over ip in Python 2 and 3
427 (
428 n_state,
429 n_exc_stack,
430 scope_flags,
431 n_pos_args,
432 n_kwonly_args,
433 n_def_pos_args,
434 ) = read_prelude_sig(local_read_byte)
Damien Georgef2040bf2021-10-22 22:22:47 +1100435
Damien Georgec8c0fd42019-09-25 15:45:47 +1000436 n_info, n_cell = read_prelude_size(local_read_byte)
Damien Georgeb5ebfad2019-09-16 22:12:59 +1000437 ip = ip_ref[0]
438
Damien Georgec8c0fd42019-09-25 15:45:47 +1000439 ip2 = ip
440 ip = ip2 + n_info + n_cell
Damien George0699c6b2016-01-31 21:45:22 +0000441 # ip now points to first opcode
442 # ip2 points to simple_name qstr
Damien Georgef2040bf2021-10-22 22:22:47 +1100443
444 # Extract simple_name and argument qstrs (var uints).
445 args = []
446 for arg_num in range(1 + n_pos_args + n_kwonly_args):
447 value = 0
448 while True:
449 b = local_read_byte()
450 value = (value << 7) | (b & 0x7F)
451 if b & 0x80 == 0:
452 break
453 args.append(value)
454
455 return (
456 ip2,
457 ip,
458 ip_ref[0],
459 (n_state, n_exc_stack, scope_flags, n_pos_args, n_kwonly_args, n_def_pos_args),
460 args,
461 )
Damien George0699c6b2016-01-31 21:45:22 +0000462
Damien George69661f32020-02-27 15:36:53 +1100463
Damien Georgee6479662022-04-08 14:04:21 +1000464class QStrType:
465 def __init__(self, str):
466 self.str = str
467 self.qstr_esc = qstrutil.qstr_escape(self.str)
468 self.qstr_id = "MP_QSTR_" + self.qstr_esc
469
470
471class GlobalQStrList:
472 def __init__(self):
473 # Initialise global list of qstrs with static qstrs
474 self.qstrs = [None] # MP_QSTRnull should never be referenced
475 for n in qstrutil.static_qstr_list:
476 self.qstrs.append(QStrType(n))
477
478 def add(self, s):
479 q = QStrType(s)
480 self.qstrs.append(q)
481 return q
482
483 def get_by_index(self, i):
484 return self.qstrs[i]
485
Damien George40d431d2022-04-08 14:05:23 +1000486 def find_by_str(self, s):
487 for q in self.qstrs:
488 if q is not None and q.str == s:
489 return q
490 return None
491
Damien Georgee6479662022-04-08 14:04:21 +1000492
Damien Georgeea3c80a2019-02-21 15:18:59 +1100493class MPFunTable:
Damien Georgef2040bf2021-10-22 22:22:47 +1100494 def __repr__(self):
495 return "mp_fun_table"
Damien Georgeea3c80a2019-02-21 15:18:59 +1100496
Damien George69661f32020-02-27 15:36:53 +1100497
Damien Georgef2040bf2021-10-22 22:22:47 +1100498class CompiledModule:
499 def __init__(
500 self,
501 mpy_source_file,
502 mpy_segments,
503 header,
504 qstr_table,
505 obj_table,
506 raw_code,
507 raw_code_file_offset,
508 escaped_name,
509 ):
510 self.mpy_source_file = mpy_source_file
511 self.mpy_segments = mpy_segments
512 self.source_file = qstr_table[0]
513 self.header = header
514 self.qstr_table = qstr_table
515 self.obj_table = obj_table
516 self.raw_code_file_offset = raw_code_file_offset
517 self.raw_code = raw_code
518 self.escaped_name = escaped_name
Damien George0699c6b2016-01-31 21:45:22 +0000519
Damien Georgef2040bf2021-10-22 22:22:47 +1100520 def hexdump(self):
521 with open(self.mpy_source_file, "rb") as f:
522 WIDTH = 16
523 COL_OFF = "\033[0m"
524 COL_TABLE = (
525 ("", ""), # META
526 ("\033[0;31m", "\033[0;91m"), # QSTR
527 ("\033[0;32m", "\033[0;92m"), # OBJ
528 ("\033[0;34m", "\033[0;94m"), # CODE
529 )
530 cur_col = ""
531 cur_col_index = 0
532 offset = 0
533 segment_index = 0
534 while True:
535 data = bytes_cons(f.read(WIDTH))
536 if not data:
537 break
Damien George0699c6b2016-01-31 21:45:22 +0000538
Damien Georgef2040bf2021-10-22 22:22:47 +1100539 # Print out the hex dump of this line of data.
540 line_hex = cur_col
541 line_chr = cur_col
542 line_comment = ""
543 for i in range(len(data)):
544 # Determine the colour of the data, if any, and the line comment.
545 while segment_index < len(self.mpy_segments):
546 if offset + i == self.mpy_segments[segment_index].start:
547 cur_col = COL_TABLE[self.mpy_segments[segment_index].kind][
548 cur_col_index
549 ]
550 cur_col_index = 1 - cur_col_index
551 line_hex += cur_col
552 line_chr += cur_col
553 line_comment += " %s%s%s" % (
554 cur_col,
555 self.mpy_segments[segment_index].name,
556 COL_OFF,
557 )
558 if offset + i == self.mpy_segments[segment_index].end:
559 cur_col = ""
560 line_hex += COL_OFF
561 line_chr += COL_OFF
562 segment_index += 1
563 else:
564 break
Damien George0699c6b2016-01-31 21:45:22 +0000565
Damien Georgef2040bf2021-10-22 22:22:47 +1100566 # Add to the hex part of the line.
567 if i % 2 == 0:
568 line_hex += " "
569 line_hex += "%02x" % data[i]
Damien George02fd83b2016-05-03 12:24:39 +0100570
Damien Georgef2040bf2021-10-22 22:22:47 +1100571 # Add to the characters part of the line.
572 if 0x20 <= data[i] <= 0x7E:
573 line_chr += "%s" % chr(data[i])
574 else:
575 line_chr += "."
576
577 # Print out this line.
578 if cur_col:
579 line_hex += COL_OFF
580 line_chr += COL_OFF
581 pad = " " * ((WIDTH - len(data)) * 5 // 2)
582 print("%08x:%s%s %s %s" % (offset, line_hex, pad, line_chr, line_comment))
583 offset += WIDTH
584
585 def disassemble(self):
586 print("mpy_source_file:", self.mpy_source_file)
587 print("source_file:", self.source_file.str)
588 print("header:", hexlify_to_str(self.header))
589 print("qstr_table[%u]:" % len(self.qstr_table))
590 for q in self.qstr_table:
591 print(" %s" % q.str)
592 print("obj_table:", self.obj_table)
593 self.raw_code.disassemble()
594
595 def freeze(self, compiled_module_index):
596 print()
597 print("/" * 80)
598 print("// frozen module %s" % self.escaped_name)
599 print("// - original source file: %s" % self.mpy_source_file)
600 print("// - frozen file name: %s" % self.source_file.str)
601 print("// - .mpy header: %s" % ":".join("%02x" % b for b in self.header))
602 print()
603
604 self.raw_code.freeze()
605 print()
606
607 self.freeze_constants()
608
609 print()
610 print("static const mp_frozen_module_t frozen_module_%s = {" % self.escaped_name)
611 print(" .constants = {")
612 if len(self.qstr_table):
613 print(
614 " .qstr_table = (qstr_short_t *)&const_qstr_table_data_%s,"
615 % self.escaped_name
616 )
617 else:
618 print(" .qstr_table = NULL,")
619 if len(self.obj_table):
620 print(" .obj_table = (mp_obj_t *)&const_obj_table_data_%s," % self.escaped_name)
621 else:
622 print(" .obj_table = NULL,")
623 print(" },")
624 print(" .rc = &raw_code_%s," % self.raw_code.escaped_name)
625 print("};")
Damien George0699c6b2016-01-31 21:45:22 +0000626
Damien George68b3aee2022-03-31 16:20:23 +1100627 def freeze_constant_obj(self, obj_name, obj):
Damien Georgef2040bf2021-10-22 22:22:47 +1100628 global const_str_content, const_int_content, const_obj_content
629
Damien George68b3aee2022-03-31 16:20:23 +1100630 if isinstance(obj, MPFunTable):
631 return "&mp_fun_table"
632 elif obj is None:
633 return "MP_ROM_NONE"
634 elif obj is False:
635 return "MP_ROM_FALSE"
636 elif obj is True:
637 return "MP_ROM_TRUE"
638 elif obj is Ellipsis:
639 return "MP_ROM_PTR(&mp_const_ellipsis_obj)"
640 elif is_str_type(obj) or is_bytes_type(obj):
Damien Georgedfc6c6292022-04-08 13:07:25 +1000641 if len(obj) == 0:
642 if is_str_type(obj):
643 return "MP_ROM_QSTR(MP_QSTR_)"
644 else:
645 return "MP_ROM_PTR(&mp_const_empty_bytes_obj)"
Damien George68b3aee2022-03-31 16:20:23 +1100646 if is_str_type(obj):
Damien George40d431d2022-04-08 14:05:23 +1000647 q = global_qstrs.find_by_str(obj)
648 if q:
649 return "MP_ROM_QSTR(%s)" % q.qstr_id
Damien George68b3aee2022-03-31 16:20:23 +1100650 obj = bytes_cons(obj, "utf8")
651 obj_type = "mp_type_str"
652 else:
653 obj_type = "mp_type_bytes"
654 print(
655 'static const mp_obj_str_t %s = {{&%s}, %u, %u, (const byte*)"%s"};'
656 % (
657 obj_name,
658 obj_type,
659 qstrutil.compute_hash(obj, config.MICROPY_QSTR_BYTES_IN_HASH),
660 len(obj),
661 "".join(("\\x%02x" % b) for b in obj),
662 )
663 )
664 const_str_content += len(obj)
665 const_obj_content += 4 * 4
666 return "MP_ROM_PTR(&%s)" % obj_name
667 elif is_int_type(obj):
Damien George9c8a5632022-04-07 23:53:37 +1000668 if mp_small_int_fits(obj):
669 # Encode directly as a small integer object.
670 return "MP_ROM_INT(%d)" % obj
671 elif config.MICROPY_LONGINT_IMPL == config.MICROPY_LONGINT_IMPL_NONE:
Damien George68b3aee2022-03-31 16:20:23 +1100672 raise FreezeError(self, "target does not support long int")
673 elif config.MICROPY_LONGINT_IMPL == config.MICROPY_LONGINT_IMPL_LONGLONG:
674 # TODO
675 raise FreezeError(self, "freezing int to long-long is not implemented")
676 elif config.MICROPY_LONGINT_IMPL == config.MICROPY_LONGINT_IMPL_MPZ:
677 neg = 0
678 if obj < 0:
679 obj = -obj
680 neg = 1
681 bits_per_dig = config.MPZ_DIG_SIZE
682 digs = []
683 z = obj
684 while z:
685 digs.append(z & ((1 << bits_per_dig) - 1))
686 z >>= bits_per_dig
687 ndigs = len(digs)
688 digs = ",".join(("%#x" % d) for d in digs)
689 print(
690 "static const mp_obj_int_t %s = {{&mp_type_int}, "
691 "{.neg=%u, .fixed_dig=1, .alloc=%u, .len=%u, .dig=(uint%u_t*)(const uint%u_t[]){%s}}};"
692 % (obj_name, neg, ndigs, ndigs, bits_per_dig, bits_per_dig, digs)
693 )
694 const_int_content += (digs.count(",") + 1) * bits_per_dig // 8
695 const_obj_content += 4 * 4
696 return "MP_ROM_PTR(&%s)" % obj_name
697 elif type(obj) is float:
698 macro_name = "%s_macro" % obj_name
699 print(
700 "#if MICROPY_OBJ_REPR == MICROPY_OBJ_REPR_A || MICROPY_OBJ_REPR == MICROPY_OBJ_REPR_B"
701 )
702 print(
703 "static const mp_obj_float_t %s = {{&mp_type_float}, (mp_float_t)%.16g};"
704 % (obj_name, obj)
705 )
706 print("#define %s MP_ROM_PTR(&%s)" % (macro_name, obj_name))
707 print("#elif MICROPY_OBJ_REPR == MICROPY_OBJ_REPR_C")
708 n = struct.unpack("<I", struct.pack("<f", obj))[0]
709 n = ((n & ~0x3) | 2) + 0x80800000
710 print("#define %s ((mp_rom_obj_t)(0x%08x))" % (macro_name, n))
711 print("#elif MICROPY_OBJ_REPR == MICROPY_OBJ_REPR_D")
712 n = struct.unpack("<Q", struct.pack("<d", obj))[0]
713 n += 0x8004000000000000
714 print("#define %s ((mp_rom_obj_t)(0x%016x))" % (macro_name, n))
715 print("#endif")
716 const_obj_content += 3 * 4
717 return macro_name
718 elif type(obj) is complex:
719 print(
720 "static const mp_obj_complex_t %s = {{&mp_type_complex}, (mp_float_t)%.16g, (mp_float_t)%.16g};"
721 % (obj_name, obj.real, obj.imag)
722 )
723 return "MP_ROM_PTR(&%s)" % obj_name
724 elif type(obj) is tuple:
725 if len(obj) == 0:
726 return "MP_ROM_PTR(&mp_const_empty_tuple_obj)"
727 else:
728 obj_refs = []
729 for i, sub_obj in enumerate(obj):
730 sub_obj_name = "%s_%u" % (obj_name, i)
731 obj_refs.append(self.freeze_constant_obj(sub_obj_name, sub_obj))
732 print(
733 "static const mp_rom_obj_tuple_t %s = {{&mp_type_tuple}, %d, {"
734 % (obj_name, len(obj))
735 )
736 for ref in obj_refs:
737 print(" %s," % ref)
738 print("}};")
739 return "MP_ROM_PTR(&%s)" % obj_name
740 else:
741 raise FreezeError(self, "freezing of object %r is not implemented" % (obj,))
742
743 def freeze_constants(self):
Damien Georgef2040bf2021-10-22 22:22:47 +1100744 if len(self.qstr_table):
745 print(
746 "static const qstr_short_t const_qstr_table_data_%s[%u] = {"
747 % (self.escaped_name, len(self.qstr_table))
748 )
749 for q in self.qstr_table:
750 print(" %s," % q.qstr_id)
751 print("};")
752
753 if not len(self.obj_table):
754 return
755
Damien George0699c6b2016-01-31 21:45:22 +0000756 # generate constant objects
Damien Georgef2040bf2021-10-22 22:22:47 +1100757 print()
758 print("// constants")
Damien George68b3aee2022-03-31 16:20:23 +1100759 obj_refs = []
Damien Georgef2040bf2021-10-22 22:22:47 +1100760 for i, obj in enumerate(self.obj_table):
Damien George69661f32020-02-27 15:36:53 +1100761 obj_name = "const_obj_%s_%u" % (self.escaped_name, i)
Damien George68b3aee2022-03-31 16:20:23 +1100762 obj_refs.append(self.freeze_constant_obj(obj_name, obj))
Damien George0699c6b2016-01-31 21:45:22 +0000763
Damien Georgef2040bf2021-10-22 22:22:47 +1100764 # generate constant table
765 print()
766 print("// constant table")
767 print(
768 "static const mp_rom_obj_t const_obj_table_data_%s[%u] = {"
769 % (self.escaped_name, len(self.obj_table))
770 )
Damien George68b3aee2022-03-31 16:20:23 +1100771 for ref in obj_refs:
772 print(" %s," % ref)
Damien Georgef2040bf2021-10-22 22:22:47 +1100773 print("};")
Damien George0699c6b2016-01-31 21:45:22 +0000774
Damien Georgef2040bf2021-10-22 22:22:47 +1100775 global const_table_ptr_content
776 const_table_ptr_content += len(self.obj_table)
777
778
779class RawCode(object):
780 # a set of all escaped names, to make sure they are unique
781 escaped_names = set()
782
783 # convert code kind number to string
784 code_kind_str = {
785 MP_CODE_BYTECODE: "MP_CODE_BYTECODE",
786 MP_CODE_NATIVE_PY: "MP_CODE_NATIVE_PY",
787 MP_CODE_NATIVE_VIPER: "MP_CODE_NATIVE_VIPER",
788 MP_CODE_NATIVE_ASM: "MP_CODE_NATIVE_ASM",
789 }
790
791 def __init__(self, cm_escaped_name, qstr_table, fun_data, prelude_offset, code_kind):
792 self.qstr_table = qstr_table
793 self.fun_data = fun_data
794 self.prelude_offset = prelude_offset
795 self.code_kind = code_kind
796
797 if code_kind in (MP_CODE_BYTECODE, MP_CODE_NATIVE_PY):
798 (
799 self.offset_names,
800 self.offset_opcodes,
801 self.offset_line_info,
802 self.prelude,
803 self.names,
804 ) = extract_prelude(self.fun_data, prelude_offset)
805 self.scope_flags = self.prelude[2]
806 self.n_pos_args = self.prelude[3]
807 self.simple_name = self.qstr_table[self.names[0]]
Damien Georgeb6a32892017-08-12 22:26:18 +1000808 else:
Damien Georgef2040bf2021-10-22 22:22:47 +1100809 self.simple_name = self.qstr_table[0]
810
811 escaped_name = cm_escaped_name + "_" + self.simple_name.qstr_esc
812
813 # make sure the escaped name is unique
814 i = 2
815 unique_escaped_name = escaped_name
816 while unique_escaped_name in self.escaped_names:
817 unique_escaped_name = escaped_name + str(i)
818 i += 1
819 self.escaped_names.add(unique_escaped_name)
820 self.escaped_name = unique_escaped_name
821
822 def disassemble_children(self):
823 print(" children:", [rc.simple_name.str for rc in self.children])
824 for rc in self.children:
825 rc.disassemble()
826
827 def freeze_children(self):
828 # Freeze children and generate table of children.
829 if len(self.children):
830 for rc in self.children:
831 print("// child of %s" % self.escaped_name)
832 rc.freeze()
833 print()
834 print("static const mp_raw_code_t *const children_%s[] = {" % self.escaped_name)
835 for rc in self.children:
836 print(" &raw_code_%s," % rc.escaped_name)
837 print("};")
838 print()
839
840 def freeze_raw_code(self, qstr_links=(), type_sig=0):
841 # Generate mp_raw_code_t.
842 print("static const mp_raw_code_t raw_code_%s = {" % self.escaped_name)
843 print(" .kind = %s," % RawCode.code_kind_str[self.code_kind])
844 print(" .scope_flags = 0x%02x," % self.scope_flags)
845 print(" .n_pos_args = %u," % self.n_pos_args)
846 print(" .fun_data = fun_data_%s," % self.escaped_name)
847 print(" #if MICROPY_PERSISTENT_CODE_SAVE || MICROPY_DEBUG_PRINTERS")
848 print(" .fun_data_len = %u," % len(self.fun_data))
849 print(" #endif")
850 if len(self.children):
851 print(" .children = (void *)&children_%s," % self.escaped_name)
852 else:
853 print(" .children = NULL,")
Damien George69661f32020-02-27 15:36:53 +1100854 print(" #if MICROPY_PERSISTENT_CODE_SAVE")
Damien Georgef2040bf2021-10-22 22:22:47 +1100855 print(" .n_children = %u," % len(self.children))
Damien Georgec69f58e2019-09-06 23:55:15 +1000856 if self.code_kind == MP_CODE_BYTECODE:
Damien George69661f32020-02-27 15:36:53 +1100857 print(" #if MICROPY_PY_SYS_SETTRACE")
858 print(" .prelude = {")
859 print(" .n_state = %u," % self.prelude[0])
860 print(" .n_exc_stack = %u," % self.prelude[1])
861 print(" .scope_flags = %u," % self.prelude[2])
862 print(" .n_pos_args = %u," % self.prelude[3])
863 print(" .n_kwonly_args = %u," % self.prelude[4])
864 print(" .n_def_pos_args = %u," % self.prelude[5])
Damien Georgef2040bf2021-10-22 22:22:47 +1100865 print(" .qstr_block_name_idx = %u," % self.names[0])
Martin Milata492cf342020-08-13 15:20:08 +0200866 print(
867 " .line_info = fun_data_%s + %u,"
Damien Georgef2040bf2021-10-22 22:22:47 +1100868 % (self.escaped_name, self.offset_line_info)
Martin Milata492cf342020-08-13 15:20:08 +0200869 )
Damien Georgef2040bf2021-10-22 22:22:47 +1100870 print(
871 " .opcodes = fun_data_%s + %u," % (self.escaped_name, self.offset_opcodes)
872 )
Damien George69661f32020-02-27 15:36:53 +1100873 print(" },")
874 print(" .line_of_definition = %u," % 0) # TODO
875 print(" #endif")
876 print(" #if MICROPY_EMIT_MACHINE_CODE")
877 print(" .prelude_offset = %u," % self.prelude_offset)
878 print(" .n_qstr = %u," % len(qstr_links))
879 print(" .qstr_link = NULL,") # TODO
880 print(" #endif")
881 print(" #endif")
882 print(" #if MICROPY_EMIT_MACHINE_CODE")
883 print(" .type_sig = %u," % type_sig)
884 print(" #endif")
885 print("};")
886
Damien Georgef2040bf2021-10-22 22:22:47 +1100887 global raw_code_count, raw_code_content
888 raw_code_count += 1
889 raw_code_content += 4 * 4
890
Damien George0699c6b2016-01-31 21:45:22 +0000891
Damien Georgeea3c80a2019-02-21 15:18:59 +1100892class RawCodeBytecode(RawCode):
Damien Georgef2040bf2021-10-22 22:22:47 +1100893 def __init__(self, cm_escaped_name, qstr_table, obj_table, fun_data):
894 self.obj_table = obj_table
Damien George69661f32020-02-27 15:36:53 +1100895 super(RawCodeBytecode, self).__init__(
Damien Georgef2040bf2021-10-22 22:22:47 +1100896 cm_escaped_name, qstr_table, fun_data, 0, MP_CODE_BYTECODE
Damien George69661f32020-02-27 15:36:53 +1100897 )
Damien Georgeea3c80a2019-02-21 15:18:59 +1100898
Damien Georgef2040bf2021-10-22 22:22:47 +1100899 def disassemble(self):
900 bc = self.fun_data
901 print("simple_name:", self.simple_name.str)
902 print(" raw bytecode:", len(bc), hexlify_to_str(bc))
903 print(" prelude:", self.prelude)
904 print(" args:", [self.qstr_table[i].str for i in self.names[1:]])
905 print(" line info:", hexlify_to_str(bc[self.offset_line_info : self.offset_opcodes]))
906 ip = self.offset_opcodes
907 while ip < len(bc):
908 fmt, sz, arg = mp_opcode_decode(bc, ip)
909 if bc[ip] == Opcodes.MP_BC_LOAD_CONST_OBJ:
Damien George2a075cc2022-03-31 15:26:14 +1100910 arg = repr(self.obj_table[arg])
Damien Georgef2040bf2021-10-22 22:22:47 +1100911 if fmt == MP_BC_FORMAT_QSTR:
912 arg = self.qstr_table[arg].str
913 elif fmt in (MP_BC_FORMAT_VAR_UINT, MP_BC_FORMAT_OFFSET):
914 pass
Damien Georgeea3c80a2019-02-21 15:18:59 +1100915 else:
Damien Georgef2040bf2021-10-22 22:22:47 +1100916 arg = ""
917 print(
918 " %-11s %s %s" % (hexlify_to_str(bc[ip : ip + sz]), Opcodes.mapping[bc[ip]], arg)
919 )
Damien Georgeea3c80a2019-02-21 15:18:59 +1100920 ip += sz
Damien Georgef2040bf2021-10-22 22:22:47 +1100921 self.disassemble_children()
922
923 def freeze(self):
924 # generate bytecode data
925 bc = self.fun_data
926 print(
927 "// frozen bytecode for file %s, scope %s"
928 % (self.qstr_table[0].str, self.escaped_name)
929 )
930 print("static const byte fun_data_%s[%u] = {" % (self.escaped_name, len(bc)))
931
932 print(" ", end="")
933 for b in bc[: self.offset_names]:
934 print("0x%02x," % b, end="")
935 print(" // prelude")
936
937 print(" ", end="")
938 for b in bc[self.offset_names : self.offset_line_info]:
939 print("0x%02x," % b, end="")
940 print(" // names: %s" % ", ".join(self.qstr_table[i].str for i in self.names))
941
942 print(" ", end="")
943 for b in bc[self.offset_line_info : self.offset_opcodes]:
944 print("0x%02x," % b, end="")
945 print(" // code info")
946
947 ip = self.offset_opcodes
948 while ip < len(bc):
949 fmt, sz, arg = mp_opcode_decode(bc, ip)
950 opcode_name = Opcodes.mapping[bc[ip]]
951 if fmt == MP_BC_FORMAT_QSTR:
robert-hh5c467212022-02-26 07:55:53 +0100952 opcode_name += " " + repr(self.qstr_table[arg].str)
Damien Georgef2040bf2021-10-22 22:22:47 +1100953 elif fmt in (MP_BC_FORMAT_VAR_UINT, MP_BC_FORMAT_OFFSET):
954 opcode_name += " %u" % arg
955 print(
956 " %s, // %s" % (",".join("0x%02x" % b for b in bc[ip : ip + sz]), opcode_name)
957 )
958 ip += sz
959
Damien George69661f32020-02-27 15:36:53 +1100960 print("};")
Damien Georgeea3c80a2019-02-21 15:18:59 +1100961
Damien Georgef2040bf2021-10-22 22:22:47 +1100962 self.freeze_children()
963 self.freeze_raw_code()
964
965 global bc_content
966 bc_content += len(bc)
Damien Georgeea3c80a2019-02-21 15:18:59 +1100967
Damien George69661f32020-02-27 15:36:53 +1100968
Damien Georgeea3c80a2019-02-21 15:18:59 +1100969class RawCodeNative(RawCode):
Damien George69661f32020-02-27 15:36:53 +1100970 def __init__(
971 self,
Damien Georgef2040bf2021-10-22 22:22:47 +1100972 cm_escaped_name,
973 qstr_table,
974 kind,
Damien George69661f32020-02-27 15:36:53 +1100975 fun_data,
976 prelude_offset,
Damien George69661f32020-02-27 15:36:53 +1100977 qstr_links,
Damien Georgef2040bf2021-10-22 22:22:47 +1100978 scope_flags,
979 n_pos_args,
Damien George69661f32020-02-27 15:36:53 +1100980 type_sig,
981 ):
982 super(RawCodeNative, self).__init__(
Damien Georgef2040bf2021-10-22 22:22:47 +1100983 cm_escaped_name, qstr_table, fun_data, prelude_offset, kind
Damien George69661f32020-02-27 15:36:53 +1100984 )
Damien Georgef2040bf2021-10-22 22:22:47 +1100985
986 if kind in (MP_CODE_NATIVE_VIPER, MP_CODE_NATIVE_ASM):
987 self.scope_flags = scope_flags
988 self.n_pos_args = n_pos_args
989
Damien Georgeea3c80a2019-02-21 15:18:59 +1100990 self.qstr_links = qstr_links
991 self.type_sig = type_sig
Damien George69661f32020-02-27 15:36:53 +1100992 if config.native_arch in (
993 MP_NATIVE_ARCH_X86,
994 MP_NATIVE_ARCH_X64,
995 MP_NATIVE_ARCH_XTENSA,
996 MP_NATIVE_ARCH_XTENSAWIN,
997 ):
Damien Georgeea3c80a2019-02-21 15:18:59 +1100998 self.fun_data_attributes = '__attribute__((section(".text,\\"ax\\",@progbits # ")))'
999 else:
1000 self.fun_data_attributes = '__attribute__((section(".text,\\"ax\\",%progbits @ ")))'
1001
Damien George7f24c292019-11-28 13:11:51 +11001002 # Allow single-byte alignment by default for x86/x64.
1003 # ARM needs word alignment, ARM Thumb needs halfword, due to instruction size.
1004 # Xtensa needs word alignment due to the 32-bit constant table embedded in the code.
Damien George69661f32020-02-27 15:36:53 +11001005 if config.native_arch in (
1006 MP_NATIVE_ARCH_ARMV6,
1007 MP_NATIVE_ARCH_XTENSA,
1008 MP_NATIVE_ARCH_XTENSAWIN,
1009 ):
Damien George7f24c292019-11-28 13:11:51 +11001010 # ARMV6 or Xtensa -- four byte align.
Damien George69661f32020-02-27 15:36:53 +11001011 self.fun_data_attributes += " __attribute__ ((aligned (4)))"
Jim Mussared4ab51562019-08-17 00:32:04 +10001012 elif MP_NATIVE_ARCH_ARMV6M <= config.native_arch <= MP_NATIVE_ARCH_ARMV7EMDP:
1013 # ARMVxxM -- two byte align.
Damien George69661f32020-02-27 15:36:53 +11001014 self.fun_data_attributes += " __attribute__ ((aligned (2)))"
Jim Mussared4ab51562019-08-17 00:32:04 +10001015
Damien Georgef2040bf2021-10-22 22:22:47 +11001016 def disassemble(self):
1017 fun_data = self.fun_data
1018 print("simple_name:", self.simple_name.str)
1019 print(
1020 " raw data:",
1021 len(fun_data),
1022 hexlify_to_str(fun_data[:32]),
1023 "..." if len(fun_data) > 32 else "",
1024 )
1025 if self.code_kind != MP_CODE_NATIVE_PY:
1026 return
1027 print(" prelude:", self.prelude)
1028 print(" args:", [self.qstr_table[i].str for i in self.names[1:]])
1029 print(" line info:", fun_data[self.offset_line_info : self.offset_opcodes])
1030 ip = 0
1031 while ip < self.prelude_offset:
1032 sz = 16
1033 print(" ", hexlify_to_str(fun_data[ip : min(ip + sz, self.prelude_offset)]))
1034 ip += sz
1035 self.disassemble_children()
1036
Damien Georgeea3c80a2019-02-21 15:18:59 +11001037 def _asm_thumb_rewrite_mov(self, pc, val):
Damien Georgef2040bf2021-10-22 22:22:47 +11001038 print(" (%u & 0xf0) | (%s >> 12)," % (self.fun_data[pc], val), end="")
1039 print(" (%u & 0xfb) | (%s >> 9 & 0x04)," % (self.fun_data[pc + 1], val), end="")
Damien George69661f32020-02-27 15:36:53 +11001040 print(" (%s & 0xff)," % (val,), end="")
Damien Georgef2040bf2021-10-22 22:22:47 +11001041 print(" (%u & 0x07) | (%s >> 4 & 0x70)," % (self.fun_data[pc + 3], val))
Damien Georgeea3c80a2019-02-21 15:18:59 +11001042
1043 def _link_qstr(self, pc, kind, qst):
1044 if kind == 0:
Damien Georgefaf3d3e2019-06-04 22:13:32 +10001045 # Generic 16-bit link
Damien George69661f32020-02-27 15:36:53 +11001046 print(" %s & 0xff, %s >> 8," % (qst, qst))
Damien George9d3031c2019-06-11 11:36:39 +10001047 return 2
Damien Georgeea3c80a2019-02-21 15:18:59 +11001048 else:
Damien Georgefaf3d3e2019-06-04 22:13:32 +10001049 # Architecture-specific link
1050 is_obj = kind == 2
1051 if is_obj:
Damien George69661f32020-02-27 15:36:53 +11001052 qst = "((uintptr_t)MP_OBJ_NEW_QSTR(%s))" % qst
Damien George7f24c292019-11-28 13:11:51 +11001053 if config.native_arch in (
Damien George69661f32020-02-27 15:36:53 +11001054 MP_NATIVE_ARCH_X86,
1055 MP_NATIVE_ARCH_X64,
Damien George2c1a6a22021-05-25 22:16:06 +10001056 MP_NATIVE_ARCH_ARMV6,
Damien George69661f32020-02-27 15:36:53 +11001057 MP_NATIVE_ARCH_XTENSA,
1058 MP_NATIVE_ARCH_XTENSAWIN,
1059 ):
1060 print(
1061 " %s & 0xff, (%s >> 8) & 0xff, (%s >> 16) & 0xff, %s >> 24,"
1062 % (qst, qst, qst, qst)
1063 )
Damien George9d3031c2019-06-11 11:36:39 +10001064 return 4
Damien Georgeea3c80a2019-02-21 15:18:59 +11001065 elif MP_NATIVE_ARCH_ARMV6M <= config.native_arch <= MP_NATIVE_ARCH_ARMV7EMDP:
1066 if is_obj:
Damien Georgefaf3d3e2019-06-04 22:13:32 +10001067 # qstr object, movw and movt
1068 self._asm_thumb_rewrite_mov(pc, qst)
Damien George69661f32020-02-27 15:36:53 +11001069 self._asm_thumb_rewrite_mov(pc + 4, "(%s >> 16)" % qst)
Damien George9d3031c2019-06-11 11:36:39 +10001070 return 8
Damien Georgeea3c80a2019-02-21 15:18:59 +11001071 else:
Damien Georgefaf3d3e2019-06-04 22:13:32 +10001072 # qstr number, movw instruction
1073 self._asm_thumb_rewrite_mov(pc, qst)
Damien George9d3031c2019-06-11 11:36:39 +10001074 return 4
Damien Georgeea3c80a2019-02-21 15:18:59 +11001075 else:
1076 assert 0
1077
Damien Georgef2040bf2021-10-22 22:22:47 +11001078 def freeze(self):
1079 if self.scope_flags & ~0x0F:
Damien George69661f32020-02-27 15:36:53 +11001080 raise FreezeError("unable to freeze code with relocations")
Damien Georgefc97d6d2019-12-10 14:57:12 +11001081
Damien Georgeea3c80a2019-02-21 15:18:59 +11001082 # generate native code data
1083 print()
Damien George69661f32020-02-27 15:36:53 +11001084 print(
Damien Georgef2040bf2021-10-22 22:22:47 +11001085 "// frozen native code for file %s, scope %s"
1086 % (self.qstr_table[0].str, self.escaped_name)
1087 )
1088 print(
1089 "static const byte fun_data_%s[%u] %s = {"
1090 % (self.escaped_name, len(self.fun_data), self.fun_data_attributes)
Damien George69661f32020-02-27 15:36:53 +11001091 )
Damien Georgeea3c80a2019-02-21 15:18:59 +11001092
Damien Georgef2040bf2021-10-22 22:22:47 +11001093 i_top = len(self.fun_data)
Damien Georgeea3c80a2019-02-21 15:18:59 +11001094 i = 0
1095 qi = 0
1096 while i < i_top:
1097 if qi < len(self.qstr_links) and i == self.qstr_links[qi][0]:
1098 # link qstr
1099 qi_off, qi_kind, qi_val = self.qstr_links[qi]
Damien Georgee6479662022-04-08 14:04:21 +10001100 i += self._link_qstr(i, qi_kind, qi_val.qstr_id)
Damien Georgeea3c80a2019-02-21 15:18:59 +11001101 qi += 1
1102 else:
1103 # copy machine code (max 16 bytes)
1104 i16 = min(i + 16, i_top)
1105 if qi < len(self.qstr_links):
1106 i16 = min(i16, self.qstr_links[qi][0])
Damien George69661f32020-02-27 15:36:53 +11001107 print(" ", end="")
Damien Georgeea3c80a2019-02-21 15:18:59 +11001108 for ii in range(i, i16):
Damien Georgef2040bf2021-10-22 22:22:47 +11001109 print(" 0x%02x," % self.fun_data[ii], end="")
Damien Georgeea3c80a2019-02-21 15:18:59 +11001110 print()
1111 i = i16
1112
Damien George69661f32020-02-27 15:36:53 +11001113 print("};")
Damien Georgeea3c80a2019-02-21 15:18:59 +11001114
Damien Georgef2040bf2021-10-22 22:22:47 +11001115 self.freeze_children()
1116 self.freeze_raw_code(self.qstr_links, self.type_sig)
Damien Georgeea3c80a2019-02-21 15:18:59 +11001117
Damien George69661f32020-02-27 15:36:53 +11001118
Damien Georgef2040bf2021-10-22 22:22:47 +11001119class MPYSegment:
1120 META = 0
1121 QSTR = 1
1122 OBJ = 2
1123 CODE = 3
Damien George992a6e12019-03-01 14:03:10 +11001124
Damien Georgef2040bf2021-10-22 22:22:47 +11001125 def __init__(self, kind, name, start, end):
1126 self.kind = kind
1127 self.name = name
1128 self.start = start
1129 self.end = end
Damien George992a6e12019-03-01 14:03:10 +11001130
Damien George69661f32020-02-27 15:36:53 +11001131
Damien Georgef2040bf2021-10-22 22:22:47 +11001132class MPYReader:
1133 def __init__(self, filename, fileobj):
1134 self.filename = filename
1135 self.fileobj = fileobj
1136
1137 def tell(self):
1138 return self.fileobj.tell()
1139
1140 def read_byte(self):
1141 return bytes_cons(self.fileobj.read(1))[0]
1142
1143 def read_bytes(self, n):
1144 return bytes_cons(self.fileobj.read(n))
1145
1146 def read_uint(self):
1147 i = 0
1148 while True:
1149 b = self.read_byte()
1150 i = (i << 7) | (b & 0x7F)
1151 if b & 0x80 == 0:
1152 break
1153 return i
Damien George992a6e12019-03-01 14:03:10 +11001154
Damien George69661f32020-02-27 15:36:53 +11001155
Damien Georgef2040bf2021-10-22 22:22:47 +11001156def read_qstr(reader, segments):
1157 start_pos = reader.tell()
1158 ln = reader.read_uint()
Damien George5996eeb2019-02-25 23:15:51 +11001159 if ln & 1:
Damien Georgef2040bf2021-10-22 22:22:47 +11001160 # static qstr
Damien Georgee6479662022-04-08 14:04:21 +10001161 q = global_qstrs.get_by_index(ln >> 1)
1162 segments.append(MPYSegment(MPYSegment.META, q.str, start_pos, start_pos))
1163 return q
Damien George5996eeb2019-02-25 23:15:51 +11001164 ln >>= 1
Damien Georgef2040bf2021-10-22 22:22:47 +11001165 start_pos = reader.tell()
1166 data = str_cons(reader.read_bytes(ln), "utf8")
1167 reader.read_byte() # read and discard null terminator
1168 segments.append(MPYSegment(MPYSegment.QSTR, data, start_pos, reader.tell()))
Damien Georgee6479662022-04-08 14:04:21 +10001169 return global_qstrs.add(data)
Damien George0699c6b2016-01-31 21:45:22 +00001170
Damien George69661f32020-02-27 15:36:53 +11001171
Damien Georgef2040bf2021-10-22 22:22:47 +11001172def read_obj(reader, segments):
Damien George42d0bd22022-04-07 22:18:37 +10001173 obj_type = reader.read_byte()
1174 if obj_type == MP_PERSISTENT_OBJ_FUN_TABLE:
Damien Georgef2040bf2021-10-22 22:22:47 +11001175 return MPFunTable()
Damien George2a075cc2022-03-31 15:26:14 +11001176 elif obj_type == MP_PERSISTENT_OBJ_NONE:
1177 return None
1178 elif obj_type == MP_PERSISTENT_OBJ_FALSE:
1179 return False
1180 elif obj_type == MP_PERSISTENT_OBJ_TRUE:
1181 return True
Damien George42d0bd22022-04-07 22:18:37 +10001182 elif obj_type == MP_PERSISTENT_OBJ_ELLIPSIS:
Damien George0699c6b2016-01-31 21:45:22 +00001183 return Ellipsis
Damien George2a075cc2022-03-31 15:26:14 +11001184 elif obj_type == MP_PERSISTENT_OBJ_TUPLE:
1185 ln = reader.read_uint()
1186 return tuple(read_obj(reader, segments) for _ in range(ln))
Damien George0699c6b2016-01-31 21:45:22 +00001187 else:
Damien Georgef2040bf2021-10-22 22:22:47 +11001188 ln = reader.read_uint()
1189 start_pos = reader.tell()
1190 buf = reader.read_bytes(ln)
Damien George42d0bd22022-04-07 22:18:37 +10001191 if obj_type in (MP_PERSISTENT_OBJ_STR, MP_PERSISTENT_OBJ_BYTES):
Damien Georgef2040bf2021-10-22 22:22:47 +11001192 reader.read_byte() # read and discard null terminator
Damien George42d0bd22022-04-07 22:18:37 +10001193 if obj_type == MP_PERSISTENT_OBJ_STR:
Damien Georgef2040bf2021-10-22 22:22:47 +11001194 obj = str_cons(buf, "utf8")
Damien George07f52602022-04-08 14:09:08 +10001195 if len(obj) < PERSISTENT_STR_INTERN_THRESHOLD:
1196 if not global_qstrs.find_by_str(obj):
1197 global_qstrs.add(obj)
Damien George42d0bd22022-04-07 22:18:37 +10001198 elif obj_type == MP_PERSISTENT_OBJ_BYTES:
Damien Georgef2040bf2021-10-22 22:22:47 +11001199 obj = buf
Damien George42d0bd22022-04-07 22:18:37 +10001200 elif obj_type == MP_PERSISTENT_OBJ_INT:
Damien Georgef2040bf2021-10-22 22:22:47 +11001201 obj = int(str_cons(buf, "ascii"), 10)
Damien George42d0bd22022-04-07 22:18:37 +10001202 elif obj_type == MP_PERSISTENT_OBJ_FLOAT:
Damien Georgef2040bf2021-10-22 22:22:47 +11001203 obj = float(str_cons(buf, "ascii"))
Damien George42d0bd22022-04-07 22:18:37 +10001204 elif obj_type == MP_PERSISTENT_OBJ_COMPLEX:
Damien Georgef2040bf2021-10-22 22:22:47 +11001205 obj = complex(str_cons(buf, "ascii"))
Damien George0699c6b2016-01-31 21:45:22 +00001206 else:
Damien Georgef2040bf2021-10-22 22:22:47 +11001207 raise MPYReadError(reader.filename, "corrupt .mpy file")
1208 segments.append(MPYSegment(MPYSegment.OBJ, obj, start_pos, reader.tell()))
1209 return obj
Damien George0699c6b2016-01-31 21:45:22 +00001210
Damien George69661f32020-02-27 15:36:53 +11001211
Damien Georgef2040bf2021-10-22 22:22:47 +11001212def read_raw_code(reader, cm_escaped_name, qstr_table, obj_table, segments):
1213 # Read raw code header.
1214 kind_len = reader.read_uint()
Damien Georgeea3c80a2019-02-21 15:18:59 +11001215 kind = (kind_len & 3) + MP_CODE_BYTECODE
Damien Georgef2040bf2021-10-22 22:22:47 +11001216 has_children = (kind_len >> 2) & 1
1217 fun_data_len = kind_len >> 3
1218
1219 # Read the body of the raw code.
1220 file_offset = reader.tell()
1221 fun_data = reader.read_bytes(fun_data_len)
1222 segments_len = len(segments)
Damien Georgeea3c80a2019-02-21 15:18:59 +11001223
1224 if kind == MP_CODE_BYTECODE:
Damien Georgef2040bf2021-10-22 22:22:47 +11001225 # Create bytecode raw code.
1226 rc = RawCodeBytecode(cm_escaped_name, qstr_table, obj_table, fun_data)
Damien Georgeea3c80a2019-02-21 15:18:59 +11001227 else:
Damien Georgef2040bf2021-10-22 22:22:47 +11001228 # Create native raw code.
Damien Georgeea3c80a2019-02-21 15:18:59 +11001229 qstr_links = []
1230 if kind in (MP_CODE_NATIVE_PY, MP_CODE_NATIVE_VIPER):
Damien Georgef2040bf2021-10-22 22:22:47 +11001231 # Read qstr link table.
1232 n_qstr_link = reader.read_uint()
Damien Georgeea3c80a2019-02-21 15:18:59 +11001233 for _ in range(n_qstr_link):
Damien Georgef2040bf2021-10-22 22:22:47 +11001234 off = reader.read_uint()
1235 qst = read_qstr(reader, segments)
Damien Georgeea3c80a2019-02-21 15:18:59 +11001236 qstr_links.append((off >> 2, off & 3, qst))
1237
Damien Georgef2040bf2021-10-22 22:22:47 +11001238 native_scope_flags = 0
1239 native_n_pos_args = 0
1240 native_type_sig = 0
Damien Georgeea3c80a2019-02-21 15:18:59 +11001241 if kind == MP_CODE_NATIVE_PY:
Damien Georgef2040bf2021-10-22 22:22:47 +11001242 prelude_offset = reader.read_uint()
Damien Georgeea3c80a2019-02-21 15:18:59 +11001243 else:
Damien Georgef2040bf2021-10-22 22:22:47 +11001244 prelude_offset = 0
1245 native_scope_flags = reader.read_uint()
1246 if kind == MP_CODE_NATIVE_VIPER:
1247 # Read any additional sections for native viper.
1248 if native_scope_flags & MP_SCOPE_FLAG_VIPERRODATA:
1249 rodata_size = reader.read_uint()
1250 if native_scope_flags & MP_SCOPE_FLAG_VIPERBSS:
1251 bss_size = reader.read_uint()
1252 if native_scope_flags & MP_SCOPE_FLAG_VIPERRODATA:
1253 reader.read_bytes(rodata_size)
1254 if native_scope_flags & MP_SCOPE_FLAG_VIPERRELOC:
1255 while True:
1256 op = reader.read_byte()
1257 if op == 0xFF:
1258 break
1259 if op & 1:
1260 addr = reader.read_uint()
1261 op >>= 1
1262 if op <= 5 and op & 1:
1263 n = reader.read_uint()
1264 else:
1265 assert kind == MP_CODE_NATIVE_ASM
1266 native_n_pos_args = reader.read_uint()
1267 native_type_sig = reader.read_uint()
Damien Georgeea3c80a2019-02-21 15:18:59 +11001268
Damien Georgef2040bf2021-10-22 22:22:47 +11001269 rc = RawCodeNative(
1270 cm_escaped_name,
1271 qstr_table,
Damien George69661f32020-02-27 15:36:53 +11001272 kind,
Damien Georgef2040bf2021-10-22 22:22:47 +11001273 fun_data,
Damien George69661f32020-02-27 15:36:53 +11001274 prelude_offset,
Damien George69661f32020-02-27 15:36:53 +11001275 qstr_links,
Damien Georgef2040bf2021-10-22 22:22:47 +11001276 native_scope_flags,
1277 native_n_pos_args,
1278 native_type_sig,
Damien George69661f32020-02-27 15:36:53 +11001279 )
1280
Damien Georgef2040bf2021-10-22 22:22:47 +11001281 # Add a segment for the raw code data.
1282 segments.insert(
1283 segments_len,
1284 MPYSegment(MPYSegment.CODE, rc.simple_name.str, file_offset, file_offset + fun_data_len),
1285 )
1286
1287 # Read children, if there are any.
1288 rc.children = []
1289 if has_children:
1290 n_children = reader.read_uint()
1291 for _ in range(n_children):
1292 rc.children.append(
1293 read_raw_code(reader, cm_escaped_name, qstr_table, obj_table, segments)
1294 )
1295
1296 return rc
1297
Damien George0699c6b2016-01-31 21:45:22 +00001298
1299def read_mpy(filename):
Damien Georgef2040bf2021-10-22 22:22:47 +11001300 with open(filename, "rb") as fileobj:
1301 reader = MPYReader(filename, fileobj)
1302 segments = []
1303
1304 # Read and verify the header.
1305 header = reader.read_bytes(4)
Damien George69661f32020-02-27 15:36:53 +11001306 if header[0] != ord("M"):
Damien Georgef2040bf2021-10-22 22:22:47 +11001307 raise MPYReadError(filename, "not a valid .mpy file")
Damien George6a110482017-02-17 00:19:34 +11001308 if header[1] != config.MPY_VERSION:
Damien Georgef2040bf2021-10-22 22:22:47 +11001309 raise MPYReadError(filename, "incompatible .mpy version")
Damien George5996eeb2019-02-25 23:15:51 +11001310 feature_byte = header[2]
Damien George5996eeb2019-02-25 23:15:51 +11001311 config.MICROPY_PY_BUILTINS_STR_UNICODE = (feature_byte & 2) != 0
Damien Georgefaf3d3e2019-06-04 22:13:32 +10001312 mpy_native_arch = feature_byte >> 2
1313 if mpy_native_arch != MP_NATIVE_ARCH_NONE:
1314 if config.native_arch == MP_NATIVE_ARCH_NONE:
1315 config.native_arch = mpy_native_arch
1316 elif config.native_arch != mpy_native_arch:
Damien Georgef2040bf2021-10-22 22:22:47 +11001317 raise MPYReadError(filename, "native architecture mismatch")
Damien George0699c6b2016-01-31 21:45:22 +00001318 config.mp_small_int_bits = header[3]
Damien Georgef2040bf2021-10-22 22:22:47 +11001319
1320 # Read number of qstrs, and number of objects.
1321 n_qstr = reader.read_uint()
1322 n_obj = reader.read_uint()
1323
1324 # Read qstrs and construct qstr table.
1325 qstr_table = []
1326 for i in range(n_qstr):
Damien Georgee6479662022-04-08 14:04:21 +10001327 qstr_table.append(read_qstr(reader, segments))
Damien Georgef2040bf2021-10-22 22:22:47 +11001328
1329 # Read objects and construct object table.
1330 obj_table = []
1331 for i in range(n_obj):
1332 obj_table.append(read_obj(reader, segments))
1333
1334 # Compute the compiled-module escaped name.
1335 cm_escaped_name = qstr_table[0].str.replace("/", "_")[:-3]
1336
1337 # Read the outer raw code, which will in turn read all its children.
1338 raw_code_file_offset = reader.tell()
1339 raw_code = read_raw_code(reader, cm_escaped_name, qstr_table, obj_table, segments)
1340
1341 # Create the outer-level compiled module representing the whole .mpy file.
1342 return CompiledModule(
1343 filename,
1344 segments,
1345 header,
1346 qstr_table,
1347 obj_table,
1348 raw_code,
1349 raw_code_file_offset,
1350 cm_escaped_name,
1351 )
Damien George0699c6b2016-01-31 21:45:22 +00001352
Damien George69661f32020-02-27 15:36:53 +11001353
Damien Georgef2040bf2021-10-22 22:22:47 +11001354def hexdump_mpy(compiled_modules):
1355 for cm in compiled_modules:
1356 cm.hexdump()
Damien George0699c6b2016-01-31 21:45:22 +00001357
Damien George69661f32020-02-27 15:36:53 +11001358
Damien Georgef2040bf2021-10-22 22:22:47 +11001359def disassemble_mpy(compiled_modules):
1360 for cm in compiled_modules:
1361 cm.disassemble()
1362
1363
1364def freeze_mpy(base_qstrs, compiled_modules):
Damien George0699c6b2016-01-31 21:45:22 +00001365 # add to qstrs
1366 new = {}
Damien Georgee6479662022-04-08 14:04:21 +10001367 for q in global_qstrs.qstrs:
Damien George0699c6b2016-01-31 21:45:22 +00001368 # don't add duplicates
Damien George4f0931b2019-03-01 14:33:03 +11001369 if q is None or q.qstr_esc in base_qstrs or q.qstr_esc in new:
Damien George0699c6b2016-01-31 21:45:22 +00001370 continue
Artyom Skrobov18b1ba02021-05-03 14:17:36 -04001371 new[q.qstr_esc] = (len(new), q.qstr_esc, q.str, bytes_cons(q.str, "utf8"))
Damien George0699c6b2016-01-31 21:45:22 +00001372 new = sorted(new.values(), key=lambda x: x[0])
1373
1374 print('#include "py/mpconfig.h"')
1375 print('#include "py/objint.h"')
1376 print('#include "py/objstr.h"')
1377 print('#include "py/emitglue.h"')
Damien George360d9722019-10-07 11:56:24 +11001378 print('#include "py/nativeglue.h"')
Damien George0699c6b2016-01-31 21:45:22 +00001379 print()
1380
Damien George69661f32020-02-27 15:36:53 +11001381 print("#if MICROPY_LONGINT_IMPL != %u" % config.MICROPY_LONGINT_IMPL)
Damien George99b47192016-05-16 23:13:30 +01001382 print('#error "incompatible MICROPY_LONGINT_IMPL"')
Damien George69661f32020-02-27 15:36:53 +11001383 print("#endif")
Damien George99b47192016-05-16 23:13:30 +01001384 print()
1385
1386 if config.MICROPY_LONGINT_IMPL == config.MICROPY_LONGINT_IMPL_MPZ:
Damien George69661f32020-02-27 15:36:53 +11001387 print("#if MPZ_DIG_SIZE != %u" % config.MPZ_DIG_SIZE)
Damien George99b47192016-05-16 23:13:30 +01001388 print('#error "incompatible MPZ_DIG_SIZE"')
Damien George69661f32020-02-27 15:36:53 +11001389 print("#endif")
Damien George99b47192016-05-16 23:13:30 +01001390 print()
1391
Damien George69661f32020-02-27 15:36:53 +11001392 print("#if MICROPY_PY_BUILTINS_FLOAT")
1393 print("typedef struct _mp_obj_float_t {")
1394 print(" mp_obj_base_t base;")
1395 print(" mp_float_t value;")
1396 print("} mp_obj_float_t;")
1397 print("#endif")
Damien George0699c6b2016-01-31 21:45:22 +00001398 print()
1399
Damien George69661f32020-02-27 15:36:53 +11001400 print("#if MICROPY_PY_BUILTINS_COMPLEX")
1401 print("typedef struct _mp_obj_complex_t {")
1402 print(" mp_obj_base_t base;")
1403 print(" mp_float_t real;")
1404 print(" mp_float_t imag;")
1405 print("} mp_obj_complex_t;")
1406 print("#endif")
Damien Georgec51c8832016-09-03 00:19:02 +10001407 print()
1408
Dave Hylands39eef272018-12-11 14:55:26 -08001409 if len(new) > 0:
Damien George69661f32020-02-27 15:36:53 +11001410 print("enum {")
Dave Hylands39eef272018-12-11 14:55:26 -08001411 for i in range(len(new)):
1412 if i == 0:
Damien George69661f32020-02-27 15:36:53 +11001413 print(" MP_QSTR_%s = MP_QSTRnumber_of," % new[i][1])
Dave Hylands39eef272018-12-11 14:55:26 -08001414 else:
Damien George69661f32020-02-27 15:36:53 +11001415 print(" MP_QSTR_%s," % new[i][1])
1416 print("};")
Damien George0699c6b2016-01-31 21:45:22 +00001417
Rich Barlow6e5a40c2018-07-19 12:42:26 +01001418 # As in qstr.c, set so that the first dynamically allocated pool is twice this size; must be <= the len
1419 qstr_pool_alloc = min(len(new), 10)
1420
Damien Georgef2040bf2021-10-22 22:22:47 +11001421 global bc_content, const_str_content, const_int_content, const_obj_content, const_table_qstr_content, const_table_ptr_content, raw_code_count, raw_code_content
1422 qstr_content = 0
1423 bc_content = 0
1424 const_str_content = 0
1425 const_int_content = 0
1426 const_obj_content = 0
1427 const_table_qstr_content = 0
1428 const_table_ptr_content = 0
1429 raw_code_count = 0
1430 raw_code_content = 0
1431
Damien George0699c6b2016-01-31 21:45:22 +00001432 print()
Artyom Skrobov18b1ba02021-05-03 14:17:36 -04001433 print("const qstr_hash_t mp_qstr_frozen_const_hashes[] = {")
1434 qstr_size = {"metadata": 0, "data": 0}
1435 for _, _, _, qbytes in new:
1436 qhash = qstrutil.compute_hash(qbytes, config.MICROPY_QSTR_BYTES_IN_HASH)
1437 print(" %d," % qhash)
1438 print("};")
1439 print()
1440 print("const qstr_len_t mp_qstr_frozen_const_lengths[] = {")
1441 for _, _, _, qbytes in new:
1442 print(" %d," % len(qbytes))
1443 qstr_size["metadata"] += (
1444 config.MICROPY_QSTR_BYTES_IN_LEN + config.MICROPY_QSTR_BYTES_IN_HASH
1445 )
1446 qstr_size["data"] += len(qbytes)
1447 print("};")
1448 print()
Damien George69661f32020-02-27 15:36:53 +11001449 print("extern const qstr_pool_t mp_qstr_const_pool;")
1450 print("const qstr_pool_t mp_qstr_frozen_const_pool = {")
Artyom Skrobovf46a7142021-05-04 03:35:45 -04001451 print(" &mp_qstr_const_pool, // previous pool")
Damien George69661f32020-02-27 15:36:53 +11001452 print(" MP_QSTRnumber_of, // previous pool size")
1453 print(" %u, // allocated entries" % qstr_pool_alloc)
1454 print(" %u, // used entries" % len(new))
Artyom Skrobov18b1ba02021-05-03 14:17:36 -04001455 print(" (qstr_hash_t *)mp_qstr_frozen_const_hashes,")
1456 print(" (qstr_len_t *)mp_qstr_frozen_const_lengths,")
Damien George69661f32020-02-27 15:36:53 +11001457 print(" {")
Artyom Skrobov18b1ba02021-05-03 14:17:36 -04001458 for _, _, qstr, qbytes in new:
1459 print(' "%s",' % qstrutil.escape_bytes(qstr, qbytes))
Damien Georgef2040bf2021-10-22 22:22:47 +11001460 qstr_content += (
1461 config.MICROPY_QSTR_BYTES_IN_LEN + config.MICROPY_QSTR_BYTES_IN_HASH + len(qbytes) + 1
1462 )
Damien George69661f32020-02-27 15:36:53 +11001463 print(" },")
1464 print("};")
Damien George0699c6b2016-01-31 21:45:22 +00001465
Damien Georgef2040bf2021-10-22 22:22:47 +11001466 # Freeze all modules.
1467 for idx, cm in enumerate(compiled_modules):
1468 cm.freeze(idx)
Damien George0699c6b2016-01-31 21:45:22 +00001469
Damien Georgef2040bf2021-10-22 22:22:47 +11001470 # Print separator, separating individual modules from global data structures.
1471 print()
1472 print("/" * 80)
1473 print("// collection of all frozen modules")
1474
1475 # Define the string of frozen module names.
Damien George0699c6b2016-01-31 21:45:22 +00001476 print()
Jim Mussarede0bf4612021-12-11 22:40:21 +11001477 print("const char mp_frozen_names[] = {")
Damien Georgef2040bf2021-10-22 22:22:47 +11001478 print(" #ifdef MP_FROZEN_STR_NAMES")
Jim Mussarede0bf4612021-12-11 22:40:21 +11001479 # makemanifest.py might also include some frozen string content.
Damien Georgef2040bf2021-10-22 22:22:47 +11001480 print(" MP_FROZEN_STR_NAMES")
1481 print(" #endif")
1482 mp_frozen_mpy_names_content = 1
1483 for cm in compiled_modules:
1484 module_name = cm.source_file.str
1485 print(' "%s\\0"' % module_name)
1486 mp_frozen_mpy_names_content += len(cm.source_file.str) + 1
1487 print(' "\\0"')
Damien George69661f32020-02-27 15:36:53 +11001488 print("};")
1489
Damien Georgef2040bf2021-10-22 22:22:47 +11001490 # Define the array of pointers to frozen module content.
1491 print()
1492 print("const mp_frozen_module_t *const mp_frozen_mpy_content[] = {")
1493 for cm in compiled_modules:
1494 print(" &frozen_module_%s," % cm.escaped_name)
1495 print("};")
1496 mp_frozen_mpy_content_size = len(compiled_modules * 4)
1497
Damien Georgefe16e782021-01-16 02:01:26 +11001498 # If a port defines MICROPY_FROZEN_LIST_ITEM then list all modules wrapped in that macro.
Damien Georgef2040bf2021-10-22 22:22:47 +11001499 print()
Damien Georgefe16e782021-01-16 02:01:26 +11001500 print("#ifdef MICROPY_FROZEN_LIST_ITEM")
Damien Georgef2040bf2021-10-22 22:22:47 +11001501 for cm in compiled_modules:
1502 module_name = cm.source_file.str
Damien Georgefe16e782021-01-16 02:01:26 +11001503 if module_name.endswith("/__init__.py"):
1504 short_name = module_name[: -len("/__init__.py")]
1505 else:
1506 short_name = module_name[: -len(".py")]
1507 print('MICROPY_FROZEN_LIST_ITEM("%s", "%s")' % (short_name, module_name))
1508 print("#endif")
1509
Damien Georgef2040bf2021-10-22 22:22:47 +11001510 print()
1511 print("/*")
1512 print("byte sizes:")
1513 print("qstr content: %d unique, %d bytes" % (len(new), qstr_content))
1514 print("bc content: %d" % bc_content)
1515 print("const str content: %d" % const_str_content)
1516 print("const int content: %d" % const_int_content)
1517 print("const obj content: %d" % const_obj_content)
1518 print(
1519 "const table qstr content: %d entries, %d bytes"
1520 % (const_table_qstr_content, const_table_qstr_content * 4)
1521 )
1522 print(
1523 "const table ptr content: %d entries, %d bytes"
1524 % (const_table_ptr_content, const_table_ptr_content * 4)
1525 )
1526 print("raw code content: %d * 4 = %d" % (raw_code_count, raw_code_content))
1527 print("mp_frozen_mpy_names_content: %d" % mp_frozen_mpy_names_content)
1528 print("mp_frozen_mpy_content_size: %d" % mp_frozen_mpy_content_size)
1529 print(
1530 "total: %d"
1531 % (
1532 qstr_content
1533 + bc_content
1534 + const_str_content
1535 + const_int_content
1536 + const_obj_content
1537 + const_table_qstr_content * 4
1538 + const_table_ptr_content * 4
1539 + raw_code_content
1540 + mp_frozen_mpy_names_content
1541 + mp_frozen_mpy_content_size
1542 )
1543 )
1544 print("*/")
1545
Damien George0699c6b2016-01-31 21:45:22 +00001546
Damien George27879842019-10-09 14:23:15 +11001547def merge_mpy(raw_codes, output_file):
Damien Georgef2040bf2021-10-22 22:22:47 +11001548 assert len(raw_codes) <= 2 # so var-uints all fit in 1 byte
Damien George27879842019-10-09 14:23:15 +11001549 merged_mpy = bytearray()
1550
1551 if len(raw_codes) == 1:
Damien George69661f32020-02-27 15:36:53 +11001552 with open(raw_codes[0].mpy_source_file, "rb") as f:
Damien George27879842019-10-09 14:23:15 +11001553 merged_mpy.extend(f.read())
1554 else:
Damien Georgef2040bf2021-10-22 22:22:47 +11001555 main_rc = None
1556 for rc in raw_codes:
1557 if len(rc.qstr_table) > 1 or len(rc.obj_table) > 0:
1558 # Must use qstr_table and obj_table from this raw_code
1559 if main_rc is not None:
1560 raise Exception(
1561 "can't merge files when more than one has a populated qstr or obj table"
1562 )
1563 main_rc = rc
1564 if main_rc is None:
1565 main_rc = raw_codes[0]
1566
1567 header = bytearray(4)
Damien George69661f32020-02-27 15:36:53 +11001568 header[0] = ord("M")
Damien George27879842019-10-09 14:23:15 +11001569 header[1] = config.MPY_VERSION
Jim Mussaredb326edf2021-09-06 12:28:06 +10001570 header[2] = config.native_arch << 2 | config.MICROPY_PY_BUILTINS_STR_UNICODE << 1
Damien George27879842019-10-09 14:23:15 +11001571 header[3] = config.mp_small_int_bits
Damien George27879842019-10-09 14:23:15 +11001572 merged_mpy.extend(header)
1573
Damien Georgef2040bf2021-10-22 22:22:47 +11001574 # Copy n_qstr, n_obj, qstr_table, obj_table from main_rc.
1575 with open(main_rc.mpy_source_file, "rb") as f:
1576 data = f.read(main_rc.raw_code_file_offset)
1577 merged_mpy.extend(data[4:])
1578
Damien George27879842019-10-09 14:23:15 +11001579 bytecode = bytearray()
Damien Georgef2040bf2021-10-22 22:22:47 +11001580 bytecode_len = 3 + len(raw_codes) * 5 + 2
1581 bytecode.append(bytecode_len << 3 | 1 << 2) # kind, has_children and length
Damien George69661f32020-02-27 15:36:53 +11001582 bytecode.append(0b00000000) # signature prelude
Damien Georgef2040bf2021-10-22 22:22:47 +11001583 bytecode.append(0b00000010) # size prelude; n_info=1
1584 bytecode.extend(b"\x00") # simple_name: qstr index 0 (will use source filename)
Damien George27879842019-10-09 14:23:15 +11001585 for idx in range(len(raw_codes)):
Damien George69661f32020-02-27 15:36:53 +11001586 bytecode.append(0x32) # MP_BC_MAKE_FUNCTION
1587 bytecode.append(idx) # index raw code
Damien George4f2fe342020-09-04 16:12:09 +10001588 bytecode.extend(b"\x34\x00\x59") # MP_BC_CALL_FUNCTION, 0 args, MP_BC_POP_TOP
Damien George69661f32020-02-27 15:36:53 +11001589 bytecode.extend(b"\x51\x63") # MP_BC_LOAD_NONE, MP_BC_RETURN_VALUE
Damien George27879842019-10-09 14:23:15 +11001590
Damien George27879842019-10-09 14:23:15 +11001591 merged_mpy.extend(bytecode)
1592
Damien Georgef2040bf2021-10-22 22:22:47 +11001593 merged_mpy.append(len(raw_codes)) # n_children
1594
Damien George27879842019-10-09 14:23:15 +11001595 for rc in raw_codes:
Damien George69661f32020-02-27 15:36:53 +11001596 with open(rc.mpy_source_file, "rb") as f:
Damien Georgef2040bf2021-10-22 22:22:47 +11001597 f.seek(rc.raw_code_file_offset)
Damien George69661f32020-02-27 15:36:53 +11001598 data = f.read() # read rest of mpy file
Damien George27879842019-10-09 14:23:15 +11001599 merged_mpy.extend(data)
1600
1601 if output_file is None:
1602 sys.stdout.buffer.write(merged_mpy)
1603 else:
Damien George69661f32020-02-27 15:36:53 +11001604 with open(output_file, "wb") as f:
Damien George27879842019-10-09 14:23:15 +11001605 f.write(merged_mpy)
1606
Damien George69661f32020-02-27 15:36:53 +11001607
Damien George0699c6b2016-01-31 21:45:22 +00001608def main():
Damien Georgee6479662022-04-08 14:04:21 +10001609 global global_qstrs
1610
Damien George0699c6b2016-01-31 21:45:22 +00001611 import argparse
Damien George69661f32020-02-27 15:36:53 +11001612
1613 cmd_parser = argparse.ArgumentParser(description="A tool to work with MicroPython .mpy files.")
Damien Georgef2040bf2021-10-22 22:22:47 +11001614 cmd_parser.add_argument(
1615 "-x", "--hexdump", action="store_true", help="output an annotated hex dump of files"
1616 )
1617 cmd_parser.add_argument(
1618 "-d", "--disassemble", action="store_true", help="output disassembled contents of files"
1619 )
Damien George69661f32020-02-27 15:36:53 +11001620 cmd_parser.add_argument("-f", "--freeze", action="store_true", help="freeze files")
1621 cmd_parser.add_argument(
1622 "--merge", action="store_true", help="merge multiple .mpy files into one"
1623 )
1624 cmd_parser.add_argument("-q", "--qstr-header", help="qstr header file to freeze against")
1625 cmd_parser.add_argument(
1626 "-mlongint-impl",
1627 choices=["none", "longlong", "mpz"],
1628 default="mpz",
1629 help="long-int implementation used by target (default mpz)",
1630 )
1631 cmd_parser.add_argument(
1632 "-mmpz-dig-size",
1633 metavar="N",
1634 type=int,
1635 default=16,
1636 help="mpz digit size used by target (default 16)",
1637 )
1638 cmd_parser.add_argument("-o", "--output", default=None, help="output file")
1639 cmd_parser.add_argument("files", nargs="+", help="input .mpy files")
Damien George0699c6b2016-01-31 21:45:22 +00001640 args = cmd_parser.parse_args()
1641
1642 # set config values relevant to target machine
1643 config.MICROPY_LONGINT_IMPL = {
Damien George69661f32020-02-27 15:36:53 +11001644 "none": config.MICROPY_LONGINT_IMPL_NONE,
1645 "longlong": config.MICROPY_LONGINT_IMPL_LONGLONG,
1646 "mpz": config.MICROPY_LONGINT_IMPL_MPZ,
Damien George0699c6b2016-01-31 21:45:22 +00001647 }[args.mlongint_impl]
1648 config.MPZ_DIG_SIZE = args.mmpz_dig_size
Damien Georgefaf3d3e2019-06-04 22:13:32 +10001649 config.native_arch = MP_NATIVE_ARCH_NONE
Damien George0699c6b2016-01-31 21:45:22 +00001650
Damien Georgeb4790af2016-09-02 15:09:21 +10001651 # set config values for qstrs, and get the existing base set of qstrs
Damien George0699c6b2016-01-31 21:45:22 +00001652 if args.qstr_header:
1653 qcfgs, base_qstrs = qstrutil.parse_input_headers([args.qstr_header])
Damien George69661f32020-02-27 15:36:53 +11001654 config.MICROPY_QSTR_BYTES_IN_LEN = int(qcfgs["BYTES_IN_LEN"])
1655 config.MICROPY_QSTR_BYTES_IN_HASH = int(qcfgs["BYTES_IN_HASH"])
Damien George0699c6b2016-01-31 21:45:22 +00001656 else:
Damien Georgeb4790af2016-09-02 15:09:21 +10001657 config.MICROPY_QSTR_BYTES_IN_LEN = 1
1658 config.MICROPY_QSTR_BYTES_IN_HASH = 1
Damien Georgef2040bf2021-10-22 22:22:47 +11001659 base_qstrs = list(qstrutil.static_qstr_list)
Damien George0699c6b2016-01-31 21:45:22 +00001660
Damien Georgee6479662022-04-08 14:04:21 +10001661 # Create initial list of global qstrs.
1662 global_qstrs = GlobalQStrList()
1663
Damien Georgef2040bf2021-10-22 22:22:47 +11001664 # Load all .mpy files.
1665 try:
1666 compiled_modules = [read_mpy(file) for file in args.files]
1667 except MPYReadError as er:
1668 print(er, file=sys.stderr)
1669 sys.exit(1)
Damien George0699c6b2016-01-31 21:45:22 +00001670
Damien Georgef2040bf2021-10-22 22:22:47 +11001671 if args.hexdump:
1672 hexdump_mpy(compiled_modules)
1673
1674 if args.disassemble:
1675 if args.hexdump:
1676 print()
1677 disassemble_mpy(compiled_modules)
1678
1679 if args.freeze:
Damien George0699c6b2016-01-31 21:45:22 +00001680 try:
Damien Georgef2040bf2021-10-22 22:22:47 +11001681 freeze_mpy(base_qstrs, compiled_modules)
Damien George0699c6b2016-01-31 21:45:22 +00001682 except FreezeError as er:
1683 print(er, file=sys.stderr)
1684 sys.exit(1)
Damien Georgef2040bf2021-10-22 22:22:47 +11001685
1686 if args.merge:
1687 merge_mpy(compiled_modules, args.output)
Damien George0699c6b2016-01-31 21:45:22 +00001688
Damien George69661f32020-02-27 15:36:53 +11001689
1690if __name__ == "__main__":
Damien George0699c6b2016-01-31 21:45:22 +00001691 main()