py: Make UNARY_OP_NOT a first-class op, to agree with Py not semantics.

Fixes #1684 and makes "not" match Python semantics.  The code is also
simplified (the separate MP_BC_NOT opcode is removed) and the patch saves
68 bytes for bare-arm/ and 52 bytes for minimal/.

Previously "not x" was implemented as !mp_unary_op(x, MP_UNARY_OP_BOOL),
so any given object only needs to implement MP_UNARY_OP_BOOL (and the VM
had a special opcode to do the ! bit).

With this patch "not x" is implemented as mp_unary_op(x, MP_UNARY_OP_NOT),
but this operation is caught at the start of mp_unary_op and dispatched as
!mp_obj_is_true(x).  mp_obj_is_true has special logic to test for
truthness, and is the correct way to handle the not operation.
diff --git a/py/bc0.h b/py/bc0.h
index c5a3015..b0b7d5c 100644
--- a/py/bc0.h
+++ b/py/bc0.h
@@ -80,8 +80,6 @@
 #define MP_BC_POP_EXCEPT         (0x45)
 #define MP_BC_UNWIND_JUMP        (0x46) // rel byte code offset, 16-bit signed, in excess; then a byte
 
-#define MP_BC_NOT                (0x47)
-
 #define MP_BC_BUILD_TUPLE        (0x50) // uint
 #define MP_BC_BUILD_LIST         (0x51) // uint
 #define MP_BC_LIST_APPEND        (0x52) // uint
@@ -115,7 +113,7 @@
 #define MP_BC_LOAD_CONST_SMALL_INT_MULTI (0x70) // + N(64)
 #define MP_BC_LOAD_FAST_MULTI            (0xb0) // + N(16)
 #define MP_BC_STORE_FAST_MULTI           (0xc0) // + N(16)
-#define MP_BC_UNARY_OP_MULTI             (0xd0) // + op(6)
-#define MP_BC_BINARY_OP_MULTI            (0xd6) // + op(36)
+#define MP_BC_UNARY_OP_MULTI             (0xd0) // + op(7)
+#define MP_BC_BINARY_OP_MULTI            (0xd7) // + op(36)
 
 #endif // __MICROPY_INCLUDED_PY_BC0_H__
diff --git a/py/emitbc.c b/py/emitbc.c
index c08501b..7957a21 100644
--- a/py/emitbc.c
+++ b/py/emitbc.c
@@ -801,15 +801,8 @@
 }
 
 void mp_emit_bc_unary_op(emit_t *emit, mp_unary_op_t op) {
-    if (op == MP_UNARY_OP_NOT) {
-        emit_bc_pre(emit, 0);
-        emit_write_bytecode_byte(emit, MP_BC_UNARY_OP_MULTI + MP_UNARY_OP_BOOL);
-        emit_bc_pre(emit, 0);
-        emit_write_bytecode_byte(emit, MP_BC_NOT);
-    } else {
-        emit_bc_pre(emit, 0);
-        emit_write_bytecode_byte(emit, MP_BC_UNARY_OP_MULTI + op);
-    }
+    emit_bc_pre(emit, 0);
+    emit_write_bytecode_byte(emit, MP_BC_UNARY_OP_MULTI + op);
 }
 
 void mp_emit_bc_binary_op(emit_t *emit, mp_binary_op_t op) {
@@ -825,7 +818,7 @@
     emit_write_bytecode_byte(emit, MP_BC_BINARY_OP_MULTI + op);
     if (invert) {
         emit_bc_pre(emit, 0);
-        emit_write_bytecode_byte(emit, MP_BC_NOT);
+        emit_write_bytecode_byte(emit, MP_BC_UNARY_OP_MULTI + MP_UNARY_OP_NOT);
     }
 }
 
diff --git a/py/emitnative.c b/py/emitnative.c
index 34e0b92..6590807 100644
--- a/py/emitnative.c
+++ b/py/emitnative.c
@@ -2061,11 +2061,6 @@
     vtype_kind_t vtype;
     emit_pre_pop_reg(emit, &vtype, REG_ARG_2);
     if (vtype == VTYPE_PYOBJ) {
-        if (op == MP_UNARY_OP_NOT) {
-            // we need to synthesise this operation by converting to bool first
-            emit_call_with_imm_arg(emit, MP_F_UNARY_OP, MP_UNARY_OP_BOOL, REG_ARG_1);
-            ASM_MOV_REG_REG(emit->as, REG_ARG_2, REG_RET);
-        }
         emit_call_with_imm_arg(emit, MP_F_UNARY_OP, op, REG_ARG_1);
         emit_post_push_reg(emit, VTYPE_PYOBJ, REG_RET);
     } else {
diff --git a/py/objbool.c b/py/objbool.c
index 6220056..4276335 100644
--- a/py/objbool.c
+++ b/py/objbool.c
@@ -74,15 +74,6 @@
         case MP_UNARY_OP_POSITIVE: return MP_OBJ_NEW_SMALL_INT(value);
         case MP_UNARY_OP_NEGATIVE: return MP_OBJ_NEW_SMALL_INT(-value);
         case MP_UNARY_OP_INVERT: return MP_OBJ_NEW_SMALL_INT(~value);
-
-        // only bool needs to implement MP_UNARY_OP_NOT
-        case MP_UNARY_OP_NOT:
-            if (value) {
-                return mp_const_false;
-            } else {
-                return mp_const_true;
-            }
-
         default: return MP_OBJ_NULL; // op not supported
     }
 }
diff --git a/py/runtime.c b/py/runtime.c
index 58b5a5d..c9a56f6 100644
--- a/py/runtime.c
+++ b/py/runtime.c
@@ -184,7 +184,10 @@
 mp_obj_t mp_unary_op(mp_uint_t op, mp_obj_t arg) {
     DEBUG_OP_printf("unary " UINT_FMT " %p\n", op, arg);
 
-    if (MP_OBJ_IS_SMALL_INT(arg)) {
+    if (op == MP_UNARY_OP_NOT) {
+        // "not x" is the negative of whether "x" is true per Python semantics
+        return mp_obj_new_bool(mp_obj_is_true(arg) == 0);
+    } else if (MP_OBJ_IS_SMALL_INT(arg)) {
         mp_int_t val = MP_OBJ_SMALL_INT_VALUE(arg);
         switch (op) {
             case MP_UNARY_OP_BOOL:
diff --git a/py/runtime0.h b/py/runtime0.h
index d00949f..6417cfd 100644
--- a/py/runtime0.h
+++ b/py/runtime0.h
@@ -45,8 +45,6 @@
     MP_UNARY_OP_POSITIVE,
     MP_UNARY_OP_NEGATIVE,
     MP_UNARY_OP_INVERT,
-    // The NOT op is only implemented by bool.  The emitter must synthesise NOT
-    // for other types by calling BOOL then inverting (eg by then calling NOT).
     MP_UNARY_OP_NOT,
 } mp_unary_op_t;
 
diff --git a/py/showbc.c b/py/showbc.c
index 9b08fa6..dd5959f 100644
--- a/py/showbc.c
+++ b/py/showbc.c
@@ -399,10 +399,6 @@
             printf("POP_EXCEPT");
             break;
 
-        case MP_BC_NOT:
-            printf("NOT");
-            break;
-
         case MP_BC_BUILD_TUPLE:
             DECODE_UINT;
             printf("BUILD_TUPLE " UINT_FMT, unum);
@@ -541,7 +537,7 @@
                 printf("LOAD_FAST " UINT_FMT, (mp_uint_t)ip[-1] - MP_BC_LOAD_FAST_MULTI);
             } else if (ip[-1] < MP_BC_STORE_FAST_MULTI + 16) {
                 printf("STORE_FAST " UINT_FMT, (mp_uint_t)ip[-1] - MP_BC_STORE_FAST_MULTI);
-            } else if (ip[-1] < MP_BC_UNARY_OP_MULTI + 6) {
+            } else if (ip[-1] < MP_BC_UNARY_OP_MULTI + 7) {
                 printf("UNARY_OP " UINT_FMT, (mp_uint_t)ip[-1] - MP_BC_UNARY_OP_MULTI);
             } else if (ip[-1] < MP_BC_BINARY_OP_MULTI + 36) {
                 mp_uint_t op = ip[-1] - MP_BC_BINARY_OP_MULTI;
diff --git a/py/vm.c b/py/vm.c
index f0a0bad..5bf14eb 100644
--- a/py/vm.c
+++ b/py/vm.c
@@ -760,14 +760,6 @@
                     //sp -= 3; // pop 3 exception values
                     DISPATCH();
 
-                ENTRY(MP_BC_NOT):
-                    if (TOP() == mp_const_true) {
-                        SET_TOP(mp_const_false);
-                    } else {
-                        SET_TOP(mp_const_true);
-                    }
-                    DISPATCH();
-
                 ENTRY(MP_BC_BUILD_TUPLE): {
                     MARK_EXC_IP_SELECTIVE();
                     DECODE_UINT;
@@ -1230,7 +1222,7 @@
                     } else if (ip[-1] < MP_BC_STORE_FAST_MULTI + 16) {
                         fastn[MP_BC_STORE_FAST_MULTI - (mp_int_t)ip[-1]] = POP();
                         DISPATCH();
-                    } else if (ip[-1] < MP_BC_UNARY_OP_MULTI + 6) {
+                    } else if (ip[-1] < MP_BC_UNARY_OP_MULTI + 7) {
                         SET_TOP(mp_unary_op(ip[-1] - MP_BC_UNARY_OP_MULTI, TOP()));
                         DISPATCH();
                     } else if (ip[-1] < MP_BC_BINARY_OP_MULTI + 36) {
diff --git a/py/vmentrytable.h b/py/vmentrytable.h
index d71a8d4..f3143b5 100644
--- a/py/vmentrytable.h
+++ b/py/vmentrytable.h
@@ -76,7 +76,6 @@
     [MP_BC_FOR_ITER] = &&entry_MP_BC_FOR_ITER,
     [MP_BC_POP_BLOCK] = &&entry_MP_BC_POP_BLOCK,
     [MP_BC_POP_EXCEPT] = &&entry_MP_BC_POP_EXCEPT,
-    [MP_BC_NOT] = &&entry_MP_BC_NOT,
     [MP_BC_BUILD_TUPLE] = &&entry_MP_BC_BUILD_TUPLE,
     [MP_BC_BUILD_LIST] = &&entry_MP_BC_BUILD_LIST,
     [MP_BC_LIST_APPEND] = &&entry_MP_BC_LIST_APPEND,
@@ -110,7 +109,7 @@
     [MP_BC_LOAD_CONST_SMALL_INT_MULTI ... MP_BC_LOAD_CONST_SMALL_INT_MULTI + 63] = &&entry_MP_BC_LOAD_CONST_SMALL_INT_MULTI,
     [MP_BC_LOAD_FAST_MULTI ... MP_BC_LOAD_FAST_MULTI + 15] = &&entry_MP_BC_LOAD_FAST_MULTI,
     [MP_BC_STORE_FAST_MULTI ... MP_BC_STORE_FAST_MULTI + 15] = &&entry_MP_BC_STORE_FAST_MULTI,
-    [MP_BC_UNARY_OP_MULTI ... MP_BC_UNARY_OP_MULTI + 5] = &&entry_MP_BC_UNARY_OP_MULTI,
+    [MP_BC_UNARY_OP_MULTI ... MP_BC_UNARY_OP_MULTI + 6] = &&entry_MP_BC_UNARY_OP_MULTI,
     [MP_BC_BINARY_OP_MULTI ... MP_BC_BINARY_OP_MULTI + 35] = &&entry_MP_BC_BINARY_OP_MULTI,
 };