py: Implement divmod, % and proper // for floating point.

Tested and working on unix and pyboard.
diff --git a/py/builtin.c b/py/builtin.c
index 9810270..5b70531 100644
--- a/py/builtin.c
+++ b/py/builtin.c
@@ -242,13 +242,32 @@
 MP_DEFINE_CONST_FUN_OBJ_VAR_BETWEEN(mp_builtin_dir_obj, 0, 1, mp_builtin_dir);
 
 STATIC mp_obj_t mp_builtin_divmod(mp_obj_t o1_in, mp_obj_t o2_in) {
+    // TODO handle big int
     if (MP_OBJ_IS_SMALL_INT(o1_in) && MP_OBJ_IS_SMALL_INT(o2_in)) {
         mp_int_t i1 = MP_OBJ_SMALL_INT_VALUE(o1_in);
         mp_int_t i2 = MP_OBJ_SMALL_INT_VALUE(o2_in);
+        if (i2 == 0) {
+            zero_division_error:
+            nlr_raise(mp_obj_new_exception_msg(&mp_type_ZeroDivisionError, "division by zero"));
+        }
         mp_obj_t args[2];
         args[0] = MP_OBJ_NEW_SMALL_INT(i1 / i2);
         args[1] = MP_OBJ_NEW_SMALL_INT(i1 % i2);
         return mp_obj_new_tuple(2, args);
+#if MICROPY_PY_BUILTINS_FLOAT
+    } else if (MP_OBJ_IS_TYPE(o1_in, &mp_type_float) || MP_OBJ_IS_TYPE(o2_in, &mp_type_float)) {
+        mp_float_t f1 = mp_obj_get_float(o1_in);
+        mp_float_t f2 = mp_obj_get_float(o2_in);
+        if (f2 == 0.0) {
+            goto zero_division_error;
+        }
+        mp_obj_float_divmod(&f1, &f2);
+        mp_obj_t tuple[2] = {
+            mp_obj_new_float(f1),
+            mp_obj_new_float(f2),
+        };
+        return mp_obj_new_tuple(2, tuple);
+#endif
     } else {
         nlr_raise(mp_obj_new_exception_msg_varg(&mp_type_TypeError, "unsupported operand type(s) for divmod(): '%s' and '%s'", mp_obj_get_type_str(o1_in), mp_obj_get_type_str(o2_in)));
     }
diff --git a/py/obj.h b/py/obj.h
index eff4a4e..c7745dc 100644
--- a/py/obj.h
+++ b/py/obj.h
@@ -487,6 +487,7 @@
 } mp_obj_float_t;
 mp_float_t mp_obj_float_get(mp_obj_t self_in);
 mp_obj_t mp_obj_float_binary_op(mp_uint_t op, mp_float_t lhs_val, mp_obj_t rhs); // can return MP_OBJ_NULL if op not supported
+void mp_obj_float_divmod(mp_float_t *x, mp_float_t *y);
 
 // complex
 void mp_obj_complex_get(mp_obj_t self_in, mp_float_t *real, mp_float_t *imag);
diff --git a/py/objfloat.c b/py/objfloat.c
index c6734ee..b75d0e4 100644
--- a/py/objfloat.c
+++ b/py/objfloat.c
@@ -143,14 +143,16 @@
         case MP_BINARY_OP_INPLACE_SUBTRACT: lhs_val -= rhs_val; break;
         case MP_BINARY_OP_MULTIPLY:
         case MP_BINARY_OP_INPLACE_MULTIPLY: lhs_val *= rhs_val; break;
-        // TODO: verify that C floor matches Python semantics
         case MP_BINARY_OP_FLOOR_DIVIDE:
         case MP_BINARY_OP_INPLACE_FLOOR_DIVIDE:
             if (rhs_val == 0) {
                 zero_division_error:
-                nlr_raise(mp_obj_new_exception_msg(&mp_type_ZeroDivisionError, "float division by zero"));
+                nlr_raise(mp_obj_new_exception_msg(&mp_type_ZeroDivisionError, "division by zero"));
             }
-            lhs_val = MICROPY_FLOAT_C_FUN(floor)(lhs_val / rhs_val);
+            // Python specs require that x == (x//y)*y + (x%y) so we must
+            // call divmod to compute the correct floor division, which
+            // returns the floor divide in lhs_val.
+            mp_obj_float_divmod(&lhs_val, &rhs_val);
             break;
         case MP_BINARY_OP_TRUE_DIVIDE:
         case MP_BINARY_OP_INPLACE_TRUE_DIVIDE:
@@ -159,6 +161,21 @@
             }
             lhs_val /= rhs_val;
             break;
+        case MP_BINARY_OP_MODULO:
+        case MP_BINARY_OP_INPLACE_MODULO:
+            if (rhs_val == 0) {
+                goto zero_division_error;
+            }
+            lhs_val = MICROPY_FLOAT_C_FUN(fmod)(lhs_val, rhs_val);
+            // Python specs require that mod has same sign as second operand
+            if (lhs_val == 0.0) {
+                lhs_val = MICROPY_FLOAT_C_FUN(copysign)(0.0, rhs_val);
+            } else {
+                if ((lhs_val < 0.0) != (rhs_val < 0.0)) {
+                    lhs_val += rhs_val;
+                }
+            }
+            break;
         case MP_BINARY_OP_POWER:
         case MP_BINARY_OP_INPLACE_POWER: lhs_val = MICROPY_FLOAT_C_FUN(pow)(lhs_val, rhs_val); break;
         case MP_BINARY_OP_LESS: return MP_BOOL(lhs_val < rhs_val);
@@ -173,4 +190,39 @@
     return mp_obj_new_float(lhs_val);
 }
 
+void mp_obj_float_divmod(mp_float_t *x, mp_float_t *y) {
+    // logic here follows that of CPython
+    // https://docs.python.org/3/reference/expressions.html#binary-arithmetic-operations
+    // x == (x//y)*y + (x%y)
+    // divmod(x, y) == (x//y, x%y)
+    mp_float_t mod = MICROPY_FLOAT_C_FUN(fmod)(*x, *y);
+    mp_float_t div = (*x - mod) / *y;
+
+    // Python specs require that mod has same sign as second operand
+    if (mod == 0.0) {
+        mod = MICROPY_FLOAT_C_FUN(copysign)(0.0, *y);
+    } else {
+        if ((mod < 0.0) != (*y < 0.0)) {
+            mod += *y;
+            div -= 1.0;
+        }
+    }
+
+    mp_float_t floordiv;
+    if (div == 0.0) {
+        // if division is zero, take the correct sign of zero
+        floordiv = MICROPY_FLOAT_C_FUN(copysign)(0.0, *x / *y);
+    } else {
+        // Python specs require that x == (x//y)*y + (x%y)
+        floordiv = MICROPY_FLOAT_C_FUN(floor)(div);
+        if (div - floordiv > 0.5) {
+            floordiv += 1.0;
+        }
+    }
+
+    // return results
+    *x = floordiv;
+    *y = mod;
+}
+
 #endif // MICROPY_PY_BUILTINS_FLOAT