py: Compress load-int, load-fast, store-fast, unop, binop bytecodes.
There is a lot potential in compress bytecodes and make more use of the
coding space. This patch introduces "multi" bytecodes which have their
argument included in the bytecode (by addition).
UNARY_OP and BINARY_OP now no longer take a 1 byte argument for the
opcode. Rather, the opcode is included in the first byte itself.
LOAD_FAST_[0,1,2] and STORE_FAST_[0,1,2] are removed in favour of their
multi versions, which can take an argument between 0 and 15 inclusive.
The majority of LOAD_FAST/STORE_FAST codes fit in this range and so this
saves a byte for each of these.
LOAD_CONST_SMALL_INT_MULTI is used to load small ints between -16 and 47
inclusive. Such ints are quite common and now only need 1 byte to
store, and now have much faster decoding.
In all this patch saves about 2% RAM for typically bytecode (1.8% on
64-bit test, 2.5% on pyboard test). It also reduces the binary size
(because bytecodes are simplified) and doesn't harm performance.
diff --git a/py/vm.c b/py/vm.c
index d959880..36ea10f 100644
--- a/py/vm.c
+++ b/py/vm.c
@@ -223,18 +223,6 @@
PUSH(MP_OBJ_NULL);
DISPATCH();
- ENTRY(MP_BC_LOAD_FAST_0):
- obj_shared = fastn[0];
- goto load_check;
-
- ENTRY(MP_BC_LOAD_FAST_1):
- obj_shared = fastn[-1];
- goto load_check;
-
- ENTRY(MP_BC_LOAD_FAST_2):
- obj_shared = fastn[-2];
- goto load_check;
-
ENTRY(MP_BC_LOAD_FAST_N):
DECODE_UINT;
obj_shared = fastn[-unum];
@@ -288,18 +276,6 @@
DISPATCH();
}
- ENTRY(MP_BC_STORE_FAST_0):
- fastn[0] = POP();
- DISPATCH();
-
- ENTRY(MP_BC_STORE_FAST_1):
- fastn[-1] = POP();
- DISPATCH();
-
- ENTRY(MP_BC_STORE_FAST_2):
- fastn[-2] = POP();
- DISPATCH();
-
ENTRY(MP_BC_STORE_FAST_N):
DECODE_UINT;
fastn[-unum] = POP();
@@ -606,19 +582,6 @@
}
DISPATCH();
- ENTRY(MP_BC_UNARY_OP):
- unum = *ip++;
- SET_TOP(mp_unary_op(unum, TOP()));
- DISPATCH();
-
- ENTRY(MP_BC_BINARY_OP): {
- unum = *ip++;
- mp_obj_t rhs = POP();
- mp_obj_t lhs = TOP();
- SET_TOP(mp_binary_op(unum, lhs, rhs));
- DISPATCH();
- }
-
ENTRY(MP_BC_BUILD_TUPLE):
DECODE_UINT;
sp -= unum - 1;
@@ -890,7 +853,53 @@
mp_import_all(POP());
DISPATCH();
- ENTRY_DEFAULT: {
+#if MICROPY_OPT_COMPUTED_GOTO
+ ENTRY(MP_BC_LOAD_CONST_SMALL_INT_MULTI):
+ PUSH(MP_OBJ_NEW_SMALL_INT((mp_int_t)ip[-1] - MP_BC_LOAD_CONST_SMALL_INT_MULTI - 16));
+ DISPATCH();
+
+ ENTRY(MP_BC_LOAD_FAST_MULTI):
+ obj_shared = fastn[MP_BC_LOAD_FAST_MULTI - (mp_int_t)ip[-1]];
+ goto load_check;
+
+ ENTRY(MP_BC_STORE_FAST_MULTI):
+ fastn[MP_BC_STORE_FAST_MULTI - (mp_int_t)ip[-1]] = POP();
+ DISPATCH();
+
+ ENTRY(MP_BC_UNARY_OP_MULTI):
+ SET_TOP(mp_unary_op(ip[-1] - MP_BC_UNARY_OP_MULTI, TOP()));
+ DISPATCH();
+
+ ENTRY(MP_BC_BINARY_OP_MULTI): {
+ mp_obj_t rhs = POP();
+ mp_obj_t lhs = TOP();
+ SET_TOP(mp_binary_op(ip[-1] - MP_BC_BINARY_OP_MULTI, lhs, rhs));
+ DISPATCH();
+ }
+
+ ENTRY_DEFAULT:
+#else
+ ENTRY_DEFAULT:
+ if (ip[-1] < MP_BC_LOAD_CONST_SMALL_INT_MULTI + 64) {
+ PUSH(MP_OBJ_NEW_SMALL_INT((mp_int_t)ip[-1] - MP_BC_LOAD_CONST_SMALL_INT_MULTI - 16));
+ DISPATCH();
+ } else if (ip[-1] < MP_BC_LOAD_FAST_MULTI + 16) {
+ obj_shared = fastn[MP_BC_LOAD_FAST_MULTI - (mp_int_t)ip[-1]];
+ goto load_check;
+ } else if (ip[-1] < MP_BC_STORE_FAST_MULTI + 16) {
+ fastn[MP_BC_STORE_FAST_MULTI - (mp_int_t)ip[-1]] = POP();
+ DISPATCH();
+ } else if (ip[-1] < MP_BC_UNARY_OP_MULTI + 5) {
+ SET_TOP(mp_unary_op(ip[-1] - MP_BC_UNARY_OP_MULTI, TOP()));
+ DISPATCH();
+ } else if (ip[-1] < MP_BC_BINARY_OP_MULTI + 35) {
+ mp_obj_t rhs = POP();
+ mp_obj_t lhs = TOP();
+ SET_TOP(mp_binary_op(ip[-1] - MP_BC_BINARY_OP_MULTI, lhs, rhs));
+ DISPATCH();
+ } else
+#endif
+ {
mp_obj_t obj = mp_obj_new_exception_msg(&mp_type_NotImplementedError, "byte code not implemented");
nlr_pop();
fastn[0] = obj;