py: Implement a simple global interpreter lock.

This makes the VM/runtime thread safe, at the cost of not being able to
run code in parallel.
diff --git a/py/modthread.c b/py/modthread.c
index 930ca45..7efad78 100644
--- a/py/modthread.c
+++ b/py/modthread.c
@@ -146,6 +146,8 @@
 } thread_entry_args_t;
 
 STATIC void *thread_entry(void *args_in) {
+    // Execution begins here for a new thread.  We do not have the GIL.
+
     thread_entry_args_t *args = (thread_entry_args_t*)args_in;
 
     mp_state_thread_t ts;
@@ -154,6 +156,8 @@
     mp_stack_set_top(&ts + 1); // need to include ts in root-pointer scan
     mp_stack_set_limit(16 * 1024); // fixed stack limit for now
 
+    MP_THREAD_GIL_ENTER();
+
     // signal that we are set up and running
     mp_thread_start();
 
@@ -188,6 +192,8 @@
     // signal that we are finished
     mp_thread_finish();
 
+    MP_THREAD_GIL_EXIT();
+
     return NULL;
 }