py: Improve handling of long-int overflow. This removes mpz_as_int, since that was a terrible function (it implemented saturating conversion). Use mpz_as_int_checked and mpz_as_uint_checked. These now work correctly (they previously had wrong overflow checking, eg print(chr(10000000000000)) on 32-bit machine would incorrectly convert this large number to a small int).

commit: c9aa58e6381018cca2513e3468363af0b5442d1f [log] [tgz]
author: Damien George <damien.p.george@gmail.com> Thu Jul 31 13:41:43 2014 +0000
committer: Damien George <damien.p.george@gmail.com> Thu Jul 31 13:41:43 2014 +0000
tree: 47fa6e3a4971fe4053b08bb085710c325d11bfd0
parent: 94fbe9711a1179b3c4d0eb2e9822787d90231719 [diff]
diff --git a/py/builtin.c b/py/builtin.c
index dabc990..88a724f 100644
--- a/py/builtin.c
+++ b/py/builtin.c

@@ -175,7 +175,7 @@
 
 STATIC mp_obj_t mp_builtin_chr(mp_obj_t o_in) {
     #if MICROPY_PY_BUILTINS_STR_UNICODE
-    mp_int_t c = mp_obj_get_int(o_in);
+    mp_uint_t c = mp_obj_get_int(o_in);
     char str[4];
     int len = 0;
     if (c < 0x80) {

diff --git a/py/mpz.c b/py/mpz.c
index 24c83d9..cf02aa2 100644
--- a/py/mpz.c
+++ b/py/mpz.c

@@ -1229,45 +1229,16 @@
     return val;
 }
 
-// TODO check that this correctly handles overflow in all cases
-mp_int_t mpz_as_int(const mpz_t *i) {
-    mp_int_t val = 0;
-    mpz_dig_t *d = i->dig + i->len;
-
-    while (--d >= i->dig) {
-        mp_int_t oldval = val;
-        val = (val << DIG_SIZE) | *d;
-        if (val < oldval) {
-            // overflow, return +/- "infinity"
-            if (i->neg == 0) {
-                // +infinity
-                return ~WORD_MSBIT_HIGH;
-            } else {
-                // -infinity
-                return WORD_MSBIT_HIGH;
-            }
-        }
-    }
-
-    if (i->neg != 0) {
-        val = -val;
-    }
-
-    return val;
-}
-
-// TODO check that this correctly handles overflow in all cases
 bool mpz_as_int_checked(const mpz_t *i, mp_int_t *value) {
     mp_int_t val = 0;
     mpz_dig_t *d = i->dig + i->len;
 
     while (--d >= i->dig) {
-        mp_int_t oldval = val;
-        val = (val << DIG_SIZE) | *d;
-        if (val < oldval) {
-            // overflow
+        if (val > (~(WORD_MSBIT_HIGH) >> DIG_SIZE)) {
+            // will overflow
             return false;
         }
+        val = (val << DIG_SIZE) | *d;
     }
 
     if (i->neg != 0) {
@@ -1278,6 +1249,27 @@
     return true;
 }
 
+bool mpz_as_uint_checked(const mpz_t *i, mp_uint_t *value) {
+    if (i->neg != 0) {
+        // can't represent signed values
+        return false;
+    }
+
+    mp_uint_t val = 0;
+    mpz_dig_t *d = i->dig + i->len;
+
+    while (--d >= i->dig) {
+        if (val > ((~0) >> DIG_SIZE)) {
+            // will overflow
+            return false;
+        }
+        val = (val << DIG_SIZE) | *d;
+    }
+
+    *value = val;
+    return true;
+}
+
 #if MICROPY_PY_BUILTINS_FLOAT
 mp_float_t mpz_as_float(const mpz_t *i) {
     mp_float_t val = 0;

diff --git a/py/mpz.h b/py/mpz.h
index c0cf60f..1620a43 100644
--- a/py/mpz.h
+++ b/py/mpz.h

@@ -97,8 +97,8 @@
 mpz_t *mpz_mod(const mpz_t *lhs, const mpz_t *rhs);
 
 mp_int_t mpz_hash(const mpz_t *z);
-mp_int_t mpz_as_int(const mpz_t *z);
 bool mpz_as_int_checked(const mpz_t *z, mp_int_t *value);
+bool mpz_as_uint_checked(const mpz_t *z, mp_uint_t *value);
 #if MICROPY_PY_BUILTINS_FLOAT
 mp_float_t mpz_as_float(const mpz_t *z);
 #endif

diff --git a/py/objint_mpz.c b/py/objint_mpz.c
index c60e5c2..8233773 100644
--- a/py/objint_mpz.c
+++ b/py/objint_mpz.c

@@ -225,8 +225,7 @@
             case MP_BINARY_OP_INPLACE_LSHIFT:
             case MP_BINARY_OP_RSHIFT:
             case MP_BINARY_OP_INPLACE_RSHIFT: {
-                // TODO check conversion overflow
-                mp_int_t irhs = mpz_as_int(zrhs);
+                mp_int_t irhs = mp_obj_int_get_checked(rhs_in);
                 if (irhs < 0) {
                     nlr_raise(mp_obj_new_exception_msg(&mp_type_ValueError, "negative shift count"));
                 }
@@ -303,7 +302,8 @@
         return MP_OBJ_SMALL_INT_VALUE(self_in);
     } else {
         const mp_obj_int_t *self = self_in;
-        return mpz_as_int(&self->mpz);
+        // TODO this is a hack until we remove mp_obj_int_get function entirely
+        return mpz_hash(&self->mpz);
     }
 }
commit	c9aa58e6381018cca2513e3468363af0b5442d1f	[log] [tgz]
author	Damien George <damien.p.george@gmail.com>	Thu Jul 31 13:41:43 2014 +0000
committer	Damien George <damien.p.george@gmail.com>	Thu Jul 31 13:41:43 2014 +0000
tree	47fa6e3a4971fe4053b08bb085710c325d11bfd0
parent	94fbe9711a1179b3c4d0eb2e9822787d90231719 [diff]