py: Convert hash API to use MP_UNARY_OP_HASH instead of ad-hoc function.
Hashing is now done using mp_unary_op function with MP_UNARY_OP_HASH as
the operator argument. Hashing for int, str and bytes still go via
fast-path in mp_unary_op since they are the most common objects which
need to be hashed.
This lead to quite a bit of code cleanup, and should be more efficient
if anything. It saves 176 bytes code space on Thumb2, and 360 bytes on
x86.
The only loss is that the error message "unhashable type" is now the
more generic "unsupported type for __hash__".
diff --git a/py/bc0.h b/py/bc0.h
index 89f138e..2dfdc70 100644
--- a/py/bc0.h
+++ b/py/bc0.h
@@ -116,7 +116,7 @@
#define MP_BC_LOAD_CONST_SMALL_INT_MULTI (0x70) // + N(64)
#define MP_BC_LOAD_FAST_MULTI (0xb0) // + N(16)
#define MP_BC_STORE_FAST_MULTI (0xc0) // + N(16)
-#define MP_BC_UNARY_OP_MULTI (0xd0) // + op(5)
-#define MP_BC_BINARY_OP_MULTI (0xd5) // + op(35)
+#define MP_BC_UNARY_OP_MULTI (0xd0) // + op(6)
+#define MP_BC_BINARY_OP_MULTI (0xd6) // + op(35)
#endif // __MICROPY_INCLUDED_PY_BC0_H__
diff --git a/py/map.c b/py/map.c
index 8cfc19b..5c83913 100644
--- a/py/map.c
+++ b/py/map.c
@@ -31,7 +31,8 @@
#include "py/mpconfig.h"
#include "py/misc.h"
-#include "py/obj.h"
+#include "py/runtime0.h"
+#include "py/runtime.h"
// Fixed empty map. Useful when need to call kw-receiving functions
// without any keywords from C, etc.
@@ -200,7 +201,7 @@
}
}
- mp_uint_t hash = mp_obj_hash(index);
+ mp_uint_t hash = MP_OBJ_SMALL_INT_VALUE(mp_unary_op(MP_UNARY_OP_HASH, index));
mp_uint_t pos = hash % map->alloc;
mp_uint_t start_pos = pos;
mp_map_elem_t *avail_slot = NULL;
@@ -308,7 +309,7 @@
return NULL;
}
}
- mp_uint_t hash = mp_obj_hash(index);
+ mp_uint_t hash = MP_OBJ_SMALL_INT_VALUE(mp_unary_op(MP_UNARY_OP_HASH, index));
mp_uint_t pos = hash % set->alloc;
mp_uint_t start_pos = pos;
mp_obj_t *avail_slot = NULL;
diff --git a/py/modbuiltins.c b/py/modbuiltins.c
index 1e91d43..76274c2 100644
--- a/py/modbuiltins.c
+++ b/py/modbuiltins.c
@@ -272,8 +272,8 @@
MP_DEFINE_CONST_FUN_OBJ_2(mp_builtin_divmod_obj, mp_builtin_divmod);
STATIC mp_obj_t mp_builtin_hash(mp_obj_t o_in) {
- // TODO hash will generally overflow small integer; can we safely truncate it?
- return mp_obj_new_int(mp_obj_hash(o_in));
+ // result is guaranteed to be a (small) int
+ return mp_unary_op(MP_UNARY_OP_HASH, o_in);
}
MP_DEFINE_CONST_FUN_OBJ_1(mp_builtin_hash_obj, mp_builtin_hash);
diff --git a/py/obj.c b/py/obj.c
index 1b42377..2b5585b 100644
--- a/py/obj.c
+++ b/py/obj.c
@@ -147,61 +147,6 @@
return mp_obj_instance_is_callable(o_in);
}
-mp_int_t mp_obj_hash(mp_obj_t o_in) {
- if (o_in == mp_const_false) {
- return 0; // needs to hash to same as the integer 0, since False==0
- } else if (o_in == mp_const_true) {
- return 1; // needs to hash to same as the integer 1, since True==1
- } else if (MP_OBJ_IS_SMALL_INT(o_in)) {
- return MP_OBJ_SMALL_INT_VALUE(o_in);
- } else if (MP_OBJ_IS_TYPE(o_in, &mp_type_int)) {
- return mp_obj_int_hash(o_in);
- } else if (MP_OBJ_IS_STR(o_in) || MP_OBJ_IS_TYPE(o_in, &mp_type_bytes)) {
- return mp_obj_str_get_hash(o_in);
- } else if (MP_OBJ_IS_TYPE(o_in, &mp_type_NoneType)) {
- return (mp_int_t)o_in;
- } else if (MP_OBJ_IS_FUN(o_in)) {
- return (mp_int_t)o_in;
- } else if (MP_OBJ_IS_TYPE(o_in, &mp_type_tuple)) {
- return mp_obj_tuple_hash(o_in);
- } else if (MP_OBJ_IS_TYPE(o_in, &mp_type_type)) {
- return (mp_int_t)o_in;
- } else if (mp_obj_is_instance_type(mp_obj_get_type(o_in))) {
- // if a valid __hash__ method exists, use it
- mp_obj_t method[2];
- mp_load_method_maybe(o_in, MP_QSTR___hash__, method);
- if (method[0] != MP_OBJ_NULL) {
- mp_obj_t hash_val = mp_call_method_n_kw(0, 0, method);
- if (MP_OBJ_IS_INT(hash_val)) {
- return mp_obj_int_get_truncated(hash_val);
- }
- goto error;
- }
-
- mp_load_method_maybe(o_in, MP_QSTR___eq__, method);
- if (method[0] == MP_OBJ_NULL) {
- // https://docs.python.org/3/reference/datamodel.html#object.__hash__
- // "User-defined classes have __eq__() and __hash__() methods by default;
- // with them, all objects compare unequal (except with themselves) and
- // x.__hash__() returns an appropriate value such that x == y implies
- // both that x is y and hash(x) == hash(y)."
- return (mp_int_t)o_in;
- }
- // "A class that overrides __eq__() and does not define __hash__() will have its __hash__() implicitly set to None.
- // When the __hash__() method of a class is None, instances of the class will raise an appropriate TypeError"
- }
-
- // TODO hash classes
-
-error:
- if (MICROPY_ERROR_REPORTING == MICROPY_ERROR_REPORTING_TERSE) {
- nlr_raise(mp_obj_new_exception_msg(&mp_type_TypeError, "unhashable type"));
- } else {
- nlr_raise(mp_obj_new_exception_msg_varg(&mp_type_TypeError,
- "unhashable type: '%s'", mp_obj_get_type_str(o_in)));
- }
-}
-
// This function implements the '==' operator (and so the inverse of '!=').
//
// From the Python language reference:
@@ -540,3 +485,10 @@
nlr_raise(mp_obj_new_exception_msg(&mp_type_TypeError, "object with buffer protocol required"));
}
}
+
+mp_obj_t mp_generic_unary_op(mp_uint_t op, mp_obj_t o_in) {
+ switch (op) {
+ case MP_UNARY_OP_HASH: return MP_OBJ_NEW_SMALL_INT((mp_uint_t)o_in);
+ default: return MP_OBJ_NULL; // op not supported
+ }
+}
diff --git a/py/obj.h b/py/obj.h
index c67c04a..b9e912a 100644
--- a/py/obj.h
+++ b/py/obj.h
@@ -508,7 +508,6 @@
bool mp_obj_is_true(mp_obj_t arg);
bool mp_obj_is_callable(mp_obj_t o_in);
-mp_int_t mp_obj_hash(mp_obj_t o_in);
bool mp_obj_equal(mp_obj_t o1, mp_obj_t o2);
mp_int_t mp_obj_get_int(mp_const_obj_t arg);
@@ -525,6 +524,7 @@
mp_obj_t mp_obj_len(mp_obj_t o_in);
mp_obj_t mp_obj_len_maybe(mp_obj_t o_in); // may return MP_OBJ_NULL
mp_obj_t mp_obj_subscr(mp_obj_t base, mp_obj_t index, mp_obj_t val);
+mp_obj_t mp_generic_unary_op(mp_uint_t op, mp_obj_t o_in);
// bool
// TODO make lower case when it has proven itself
diff --git a/py/objbool.c b/py/objbool.c
index 478491b..fe33e96 100644
--- a/py/objbool.c
+++ b/py/objbool.c
@@ -69,18 +69,21 @@
mp_int_t value = ((mp_obj_bool_t*)o_in)->value;
switch (op) {
case MP_UNARY_OP_BOOL: return o_in;
+ // needs to hash to the same value as if converting to an integer
+ case MP_UNARY_OP_HASH: return MP_OBJ_NEW_SMALL_INT(value);
case MP_UNARY_OP_POSITIVE: return MP_OBJ_NEW_SMALL_INT(value);
case MP_UNARY_OP_NEGATIVE: return MP_OBJ_NEW_SMALL_INT(-value);
case MP_UNARY_OP_INVERT: return MP_OBJ_NEW_SMALL_INT(~value);
// only bool needs to implement MP_UNARY_OP_NOT
case MP_UNARY_OP_NOT:
- default: // no other cases
if (value) {
return mp_const_false;
} else {
return mp_const_true;
}
+
+ default: return MP_OBJ_NULL; // op not supported
}
}
diff --git a/py/objfun.c b/py/objfun.c
index ced8ceb..487c432 100644
--- a/py/objfun.c
+++ b/py/objfun.c
@@ -98,6 +98,7 @@
{ &mp_type_type },
.name = MP_QSTR_function,
.call = fun_builtin_call,
+ .unary_op = mp_generic_unary_op,
};
/******************************************************************************/
@@ -314,6 +315,7 @@
.print = fun_bc_print,
#endif
.call = fun_bc_call,
+ .unary_op = mp_generic_unary_op,
#if MICROPY_PY_FUNCTION_ATTRS
.attr = fun_bc_attr,
#endif
@@ -366,6 +368,7 @@
{ &mp_type_type },
.name = MP_QSTR_function,
.call = fun_native_call,
+ .unary_op = mp_generic_unary_op,
};
mp_obj_t mp_obj_new_fun_native(mp_uint_t scope_flags, mp_uint_t n_pos_args, mp_uint_t n_kwonly_args, mp_obj_t def_args_in, mp_obj_t def_kw_args, const void *fun_data) {
@@ -421,6 +424,7 @@
{ &mp_type_type },
.name = MP_QSTR_function,
.call = fun_viper_call,
+ .unary_op = mp_generic_unary_op,
};
mp_obj_t mp_obj_new_fun_viper(mp_uint_t n_args, void *fun_data, mp_uint_t type_sig) {
@@ -533,6 +537,7 @@
{ &mp_type_type },
.name = MP_QSTR_function,
.call = fun_asm_call,
+ .unary_op = mp_generic_unary_op,
};
mp_obj_t mp_obj_new_fun_asm(mp_uint_t n_args, void *fun_data) {
diff --git a/py/objint.c b/py/objint.c
index 7c527d4..1ff89d6 100644
--- a/py/objint.c
+++ b/py/objint.c
@@ -259,10 +259,6 @@
#if MICROPY_LONGINT_IMPL == MICROPY_LONGINT_IMPL_NONE
-mp_int_t mp_obj_int_hash(mp_obj_t self_in) {
- return MP_OBJ_SMALL_INT_VALUE(self_in);
-}
-
bool mp_obj_int_is_positive(mp_obj_t self_in) {
return mp_obj_get_int(self_in) >= 0;
}
diff --git a/py/objint_longlong.c b/py/objint_longlong.c
index 64376e9..075fabe 100644
--- a/py/objint_longlong.c
+++ b/py/objint_longlong.c
@@ -53,16 +53,6 @@
const mp_obj_int_t mp_maxsize_obj = {{&mp_type_int}, MP_SSIZE_MAX};
#endif
-mp_int_t mp_obj_int_hash(mp_obj_t self_in) {
- if (MP_OBJ_IS_SMALL_INT(self_in)) {
- return MP_OBJ_SMALL_INT_VALUE(self_in);
- }
- mp_obj_int_t *self = self_in;
- // truncate value to fit in mp_int_t, which gives the same hash as
- // small int if the value fits without truncation
- return self->val;
-}
-
void mp_obj_int_to_bytes_impl(mp_obj_t self_in, bool big_endian, mp_uint_t len, byte *buf) {
assert(MP_OBJ_IS_TYPE(self_in, &mp_type_int));
mp_obj_int_t *self = self_in;
@@ -117,6 +107,11 @@
mp_obj_int_t *o = o_in;
switch (op) {
case MP_UNARY_OP_BOOL: return MP_BOOL(o->val != 0);
+
+ // truncate value to fit in mp_int_t, which gives the same hash as
+ // small int if the value fits without truncation
+ case MP_UNARY_OP_HASH: return MP_OBJ_NEW_SMALL_INT((mp_int_t)o->val);
+
case MP_UNARY_OP_POSITIVE: return o_in;
case MP_UNARY_OP_NEGATIVE: return mp_obj_new_int_from_ll(-o->val);
case MP_UNARY_OP_INVERT: return mp_obj_new_int_from_ll(~o->val);
diff --git a/py/objint_mpz.c b/py/objint_mpz.c
index 369e5af..936e2cb 100644
--- a/py/objint_mpz.c
+++ b/py/objint_mpz.c
@@ -96,14 +96,6 @@
return str;
}
-mp_int_t mp_obj_int_hash(mp_obj_t self_in) {
- if (MP_OBJ_IS_SMALL_INT(self_in)) {
- return MP_OBJ_SMALL_INT_VALUE(self_in);
- }
- mp_obj_int_t *self = self_in;
- return mpz_hash(&self->mpz);
-}
-
void mp_obj_int_to_bytes_impl(mp_obj_t self_in, bool big_endian, mp_uint_t len, byte *buf) {
assert(MP_OBJ_IS_TYPE(self_in, &mp_type_int));
mp_obj_int_t *self = self_in;
@@ -143,6 +135,7 @@
mp_obj_int_t *o = o_in;
switch (op) {
case MP_UNARY_OP_BOOL: return MP_BOOL(!mpz_is_zero(&o->mpz));
+ case MP_UNARY_OP_HASH: return MP_OBJ_NEW_SMALL_INT(mpz_hash(&o->mpz));
case MP_UNARY_OP_POSITIVE: return o_in;
case MP_UNARY_OP_NEGATIVE: { mp_obj_int_t *o2 = mp_obj_int_new_mpz(); mpz_neg_inpl(&o2->mpz, &o->mpz); return o2; }
case MP_UNARY_OP_INVERT: { mp_obj_int_t *o2 = mp_obj_int_new_mpz(); mpz_not_inpl(&o2->mpz, &o->mpz); return o2; }
diff --git a/py/objnone.c b/py/objnone.c
index d47452b..69eab03 100644
--- a/py/objnone.c
+++ b/py/objnone.c
@@ -47,6 +47,7 @@
(void)o_in;
switch (op) {
case MP_UNARY_OP_BOOL: return mp_const_false;
+ case MP_UNARY_OP_HASH: return MP_OBJ_NEW_SMALL_INT((mp_uint_t)o_in);
default: return MP_OBJ_NULL; // op not supported
}
}
diff --git a/py/objstr.c b/py/objstr.c
index 7d307f5..40e3c3b 100644
--- a/py/objstr.c
+++ b/py/objstr.c
@@ -1982,16 +1982,6 @@
}
}
-mp_uint_t mp_obj_str_get_hash(mp_obj_t self_in) {
- // TODO: This has too big overhead for hash accessor
- if (MP_OBJ_IS_STR_OR_BYTES(self_in)) {
- GET_STR_HASH(self_in, h);
- return h;
- } else {
- bad_implicit_conversion(self_in);
- }
-}
-
mp_uint_t mp_obj_str_get_len(mp_obj_t self_in) {
// TODO This has a double check for the type, one in obj.c and one here
if (MP_OBJ_IS_STR_OR_BYTES(self_in)) {
diff --git a/py/objtuple.c b/py/objtuple.c
index 3ba37d0..2fd1815 100644
--- a/py/objtuple.c
+++ b/py/objtuple.c
@@ -126,6 +126,14 @@
mp_obj_tuple_t *self = self_in;
switch (op) {
case MP_UNARY_OP_BOOL: return MP_BOOL(self->len != 0);
+ case MP_UNARY_OP_HASH: {
+ // start hash with pointer to empty tuple, to make it fairly unique
+ mp_int_t hash = (mp_int_t)mp_const_empty_tuple;
+ for (mp_uint_t i = 0; i < self->len; i++) {
+ hash += MP_OBJ_SMALL_INT_VALUE(mp_unary_op(MP_UNARY_OP_HASH, self->items[i]));
+ }
+ return MP_OBJ_NEW_SMALL_INT(hash);
+ }
case MP_UNARY_OP_LEN: return MP_OBJ_NEW_SMALL_INT(self->len);
default: return MP_OBJ_NULL; // op not supported
}
@@ -258,17 +266,6 @@
m_del_var(mp_obj_tuple_t, mp_obj_t, self->len, self);
}
-mp_int_t mp_obj_tuple_hash(mp_obj_t self_in) {
- assert(MP_OBJ_IS_TYPE(self_in, &mp_type_tuple));
- mp_obj_tuple_t *self = self_in;
- // start hash with pointer to empty tuple, to make it fairly unique
- mp_int_t hash = (mp_int_t)mp_const_empty_tuple;
- for (mp_uint_t i = 0; i < self->len; i++) {
- hash += mp_obj_hash(self->items[i]);
- }
- return hash;
-}
-
/******************************************************************************/
/* tuple iterator */
diff --git a/py/objtype.c b/py/objtype.c
index c2c8d2e..7b293c9 100644
--- a/py/objtype.c
+++ b/py/objtype.c
@@ -327,6 +327,7 @@
const qstr mp_unary_op_method_name[] = {
[MP_UNARY_OP_BOOL] = MP_QSTR___bool__,
[MP_UNARY_OP_LEN] = MP_QSTR___len__,
+ [MP_UNARY_OP_HASH] = MP_QSTR___hash__,
#if MICROPY_PY_ALL_SPECIAL_METHODS
[MP_UNARY_OP_POSITIVE] = MP_QSTR___pos__,
[MP_UNARY_OP_NEGATIVE] = MP_QSTR___neg__,
@@ -355,8 +356,28 @@
if (member[0] == MP_OBJ_SENTINEL) {
return mp_unary_op(op, self->subobj[0]);
} else if (member[0] != MP_OBJ_NULL) {
- return mp_call_function_1(member[0], self_in);
+ mp_obj_t val = mp_call_function_1(member[0], self_in);
+ // __hash__ must return a small int
+ if (op == MP_UNARY_OP_HASH) {
+ val = MP_OBJ_NEW_SMALL_INT(mp_obj_int_get_truncated(val));
+ }
+ return val;
} else {
+ if (op == MP_UNARY_OP_HASH) {
+ lookup.attr = MP_QSTR___eq__;
+ mp_obj_class_lookup(&lookup, self->base.type);
+ if (member[0] == MP_OBJ_NULL) {
+ // https://docs.python.org/3/reference/datamodel.html#object.__hash__
+ // "User-defined classes have __eq__() and __hash__() methods by default;
+ // with them, all objects compare unequal (except with themselves) and
+ // x.__hash__() returns an appropriate value such that x == y implies
+ // both that x is y and hash(x) == hash(y)."
+ return MP_OBJ_NEW_SMALL_INT((mp_uint_t)self_in);
+ }
+ // "A class that overrides __eq__() and does not define __hash__() will have its __hash__() implicitly set to None.
+ // When the __hash__() method of a class is None, instances of the class will raise an appropriate TypeError"
+ }
+
return MP_OBJ_NULL; // op not supported
}
}
@@ -835,6 +856,7 @@
.print = type_print,
.make_new = type_make_new,
.call = type_call,
+ .unary_op = mp_generic_unary_op,
.attr = type_attr,
};
diff --git a/py/runtime.c b/py/runtime.c
index a6cf7e0..4f2d5a1 100644
--- a/py/runtime.c
+++ b/py/runtime.c
@@ -32,6 +32,7 @@
#include "py/nlr.h"
#include "py/parsenum.h"
#include "py/compile.h"
+#include "py/objstr.h"
#include "py/objtuple.h"
#include "py/objlist.h"
#include "py/objmodule.h"
@@ -200,6 +201,8 @@
switch (op) {
case MP_UNARY_OP_BOOL:
return MP_BOOL(val != 0);
+ case MP_UNARY_OP_HASH:
+ return arg;
case MP_UNARY_OP_POSITIVE:
return arg;
case MP_UNARY_OP_NEGATIVE:
@@ -215,6 +218,10 @@
assert(0);
return arg;
}
+ } else if (op == MP_UNARY_OP_HASH && MP_OBJ_IS_STR_OR_BYTES(arg)) {
+ // fast path for hashing str/bytes
+ GET_STR_HASH(arg, h);
+ return MP_OBJ_NEW_SMALL_INT(h);
} else {
mp_obj_type_t *type = mp_obj_get_type(arg);
if (type->unary_op != NULL) {
diff --git a/py/runtime0.h b/py/runtime0.h
index be9fc8d..65c7df0 100644
--- a/py/runtime0.h
+++ b/py/runtime0.h
@@ -50,6 +50,7 @@
typedef enum {
MP_UNARY_OP_BOOL, // __bool__
MP_UNARY_OP_LEN, // __len__
+ MP_UNARY_OP_HASH, // __hash__; must return a small int
MP_UNARY_OP_POSITIVE,
MP_UNARY_OP_NEGATIVE,
MP_UNARY_OP_INVERT,
diff --git a/py/vm.c b/py/vm.c
index aa84bdb..5ae4180 100644
--- a/py/vm.c
+++ b/py/vm.c
@@ -1196,7 +1196,7 @@
} else if (ip[-1] < MP_BC_STORE_FAST_MULTI + 16) {
fastn[MP_BC_STORE_FAST_MULTI - (mp_int_t)ip[-1]] = POP();
DISPATCH();
- } else if (ip[-1] < MP_BC_UNARY_OP_MULTI + 5) {
+ } else if (ip[-1] < MP_BC_UNARY_OP_MULTI + 6) {
SET_TOP(mp_unary_op(ip[-1] - MP_BC_UNARY_OP_MULTI, TOP()));
DISPATCH();
} else if (ip[-1] < MP_BC_BINARY_OP_MULTI + 35) {
diff --git a/py/vmentrytable.h b/py/vmentrytable.h
index 2e3e3ab..7c8f81b 100644
--- a/py/vmentrytable.h
+++ b/py/vmentrytable.h
@@ -111,7 +111,7 @@
[MP_BC_LOAD_CONST_SMALL_INT_MULTI ... MP_BC_LOAD_CONST_SMALL_INT_MULTI + 63] = &&entry_MP_BC_LOAD_CONST_SMALL_INT_MULTI,
[MP_BC_LOAD_FAST_MULTI ... MP_BC_LOAD_FAST_MULTI + 15] = &&entry_MP_BC_LOAD_FAST_MULTI,
[MP_BC_STORE_FAST_MULTI ... MP_BC_STORE_FAST_MULTI + 15] = &&entry_MP_BC_STORE_FAST_MULTI,
- [MP_BC_UNARY_OP_MULTI ... MP_BC_UNARY_OP_MULTI + 4] = &&entry_MP_BC_UNARY_OP_MULTI,
+ [MP_BC_UNARY_OP_MULTI ... MP_BC_UNARY_OP_MULTI + 5] = &&entry_MP_BC_UNARY_OP_MULTI,
[MP_BC_BINARY_OP_MULTI ... MP_BC_BINARY_OP_MULTI + 34] = &&entry_MP_BC_BINARY_OP_MULTI,
};