py/gc: Make memory manager and garbage collector thread safe.

By using a single, global mutex, all memory-related functions (alloc,
free, realloc, collect, etc) are made thread safe.  This means that only
one thread can be in such a function at any one time.
diff --git a/py/gc.c b/py/gc.c
index ebf6995..b5cf949 100644
--- a/py/gc.c
+++ b/py/gc.c
@@ -94,6 +94,14 @@
 #define FTB_CLEAR(block) do { MP_STATE_MEM(gc_finaliser_table_start)[(block) / BLOCKS_PER_FTB] &= (~(1 << ((block) & 7))); } while (0)
 #endif
 
+#if MICROPY_PY_THREAD
+#define GC_ENTER() mp_thread_mutex_lock(&MP_STATE_MEM(gc_mutex), 1)
+#define GC_EXIT() mp_thread_mutex_unlock(&MP_STATE_MEM(gc_mutex))
+#else
+#define GC_ENTER()
+#define GC_EXIT()
+#endif
+
 // TODO waste less memory; currently requires that all entries in alloc_table have a corresponding block in pool
 void gc_init(void *start, void *end) {
     // align end pointer on block boundary
@@ -144,6 +152,10 @@
     // allow auto collection
     MP_STATE_MEM(gc_auto_collect_enabled) = 1;
 
+    #if MICROPY_PY_THREAD
+    mp_thread_mutex_init(&MP_STATE_MEM(gc_mutex));
+    #endif
+
     DEBUG_printf("GC layout:\n");
     DEBUG_printf("  alloc table at %p, length " UINT_FMT " bytes, " UINT_FMT " blocks\n", MP_STATE_MEM(gc_alloc_table_start), MP_STATE_MEM(gc_alloc_table_byte_len), MP_STATE_MEM(gc_alloc_table_byte_len) * BLOCKS_PER_ATB);
 #if MICROPY_ENABLE_FINALISER
@@ -153,11 +165,15 @@
 }
 
 void gc_lock(void) {
+    GC_ENTER();
     MP_STATE_MEM(gc_lock_depth)++;
+    GC_EXIT();
 }
 
 void gc_unlock(void) {
+    GC_ENTER();
     MP_STATE_MEM(gc_lock_depth)--;
+    GC_EXIT();
 }
 
 bool gc_is_locked(void) {
@@ -236,6 +252,10 @@
             case AT_HEAD:
 #if MICROPY_ENABLE_FINALISER
                 if (FTB_GET(block)) {
+                    #if MICROPY_PY_THREAD
+                    // TODO need to think about reentrancy with finaliser code
+                    assert(!"finaliser with threading not implemented");
+                    #endif
                     mp_obj_base_t *obj = (mp_obj_base_t*)PTR_FROM_BLOCK(block);
                     if (obj->type != NULL) {
                         // if the object has a type then see if it has a __del__ method
@@ -272,7 +292,8 @@
 }
 
 void gc_collect_start(void) {
-    gc_lock();
+    GC_ENTER();
+    MP_STATE_MEM(gc_lock_depth)++;
     MP_STATE_MEM(gc_stack_overflow) = 0;
     MP_STATE_MEM(gc_sp) = MP_STATE_MEM(gc_stack);
     // Trace root pointers.  This relies on the root pointers being organised
@@ -294,10 +315,12 @@
     gc_deal_with_stack_overflow();
     gc_sweep();
     MP_STATE_MEM(gc_last_free_atb_index) = 0;
-    gc_unlock();
+    MP_STATE_MEM(gc_lock_depth)--;
+    GC_EXIT();
 }
 
 void gc_info(gc_info_t *info) {
+    GC_ENTER();
     info->total = MP_STATE_MEM(gc_pool_end) - MP_STATE_MEM(gc_pool_start);
     info->used = 0;
     info->free = 0;
@@ -340,19 +363,23 @@
 
     info->used *= BYTES_PER_BLOCK;
     info->free *= BYTES_PER_BLOCK;
+    GC_EXIT();
 }
 
 void *gc_alloc(size_t n_bytes, bool has_finaliser) {
     size_t n_blocks = ((n_bytes + BYTES_PER_BLOCK - 1) & (~(BYTES_PER_BLOCK - 1))) / BYTES_PER_BLOCK;
     DEBUG_printf("gc_alloc(" UINT_FMT " bytes -> " UINT_FMT " blocks)\n", n_bytes, n_blocks);
 
-    // check if GC is locked
-    if (MP_STATE_MEM(gc_lock_depth) > 0) {
+    // check for 0 allocation
+    if (n_blocks == 0) {
         return NULL;
     }
 
-    // check for 0 allocation
-    if (n_blocks == 0) {
+    GC_ENTER();
+
+    // check if GC is locked
+    if (MP_STATE_MEM(gc_lock_depth) > 0) {
+        GC_EXIT();
         return NULL;
     }
 
@@ -372,6 +399,7 @@
             if (ATB_3_IS_FREE(a)) { if (++n_free >= n_blocks) { i = i * BLOCKS_PER_ATB + 3; goto found; } } else { n_free = 0; }
         }
 
+        GC_EXIT();
         // nothing found!
         if (collected) {
             return NULL;
@@ -379,6 +407,7 @@
         DEBUG_printf("gc_alloc(" UINT_FMT "): no free mem, triggering GC\n", n_bytes);
         gc_collect();
         collected = 1;
+        GC_ENTER();
     }
 
     // found, ending at block i inclusive
@@ -405,6 +434,8 @@
         ATB_FREE_TO_TAIL(bl);
     }
 
+    GC_EXIT();
+
     // get pointer to first block
     void *ret_ptr = (void*)(MP_STATE_MEM(gc_pool_start) + start_block * BYTES_PER_BLOCK);
     DEBUG_printf("gc_alloc(%p)\n", ret_ptr);
@@ -421,7 +452,9 @@
         // clear type pointer in case it is never set
         ((mp_obj_base_t*)ret_ptr)->type = NULL;
         // set mp_obj flag only if it has a finaliser
+        GC_ENTER();
         FTB_SET(start_block);
+        GC_EXIT();
     }
     #else
     (void)has_finaliser;
@@ -447,8 +480,10 @@
 // force the freeing of a piece of memory
 // TODO: freeing here does not call finaliser
 void gc_free(void *ptr) {
+    GC_ENTER();
     if (MP_STATE_MEM(gc_lock_depth) > 0) {
         // TODO how to deal with this error?
+        GC_EXIT();
         return;
     }
 
@@ -471,18 +506,25 @@
                 block += 1;
             } while (ATB_GET_KIND(block) == AT_TAIL);
 
+            GC_EXIT();
+
             #if EXTENSIVE_HEAP_PROFILING
             gc_dump_alloc_table();
             #endif
         } else {
+            GC_EXIT();
             assert(!"bad free");
         }
     } else if (ptr != NULL) {
+        GC_EXIT();
         assert(!"bad free");
+    } else {
+        GC_EXIT();
     }
 }
 
 size_t gc_nbytes(const void *ptr) {
+    GC_ENTER();
     if (VERIFY_PTR(ptr)) {
         size_t block = BLOCK_FROM_PTR(ptr);
         if (ATB_GET_KIND(block) == AT_HEAD) {
@@ -491,11 +533,13 @@
             do {
                 n_blocks += 1;
             } while (ATB_GET_KIND(block + n_blocks) == AT_TAIL);
+            GC_EXIT();
             return n_blocks * BYTES_PER_BLOCK;
         }
     }
 
     // invalid pointer
+    GC_EXIT();
     return 0;
 }
 
@@ -529,10 +573,6 @@
 #else // Alternative gc_realloc impl
 
 void *gc_realloc(void *ptr_in, size_t n_bytes, bool allow_move) {
-    if (MP_STATE_MEM(gc_lock_depth) > 0) {
-        return NULL;
-    }
-
     // check for pure allocation
     if (ptr_in == NULL) {
         return gc_alloc(n_bytes, false);
@@ -559,6 +599,13 @@
         return NULL;
     }
 
+    GC_ENTER();
+
+    if (MP_STATE_MEM(gc_lock_depth) > 0) {
+        GC_EXIT();
+        return NULL;
+    }
+
     // compute number of new blocks that are requested
     size_t new_blocks = (n_bytes + BYTES_PER_BLOCK - 1) / BYTES_PER_BLOCK;
 
@@ -590,6 +637,7 @@
 
     // return original ptr if it already has the requested number of blocks
     if (new_blocks == n_blocks) {
+        GC_EXIT();
         return ptr_in;
     }
 
@@ -605,6 +653,8 @@
             MP_STATE_MEM(gc_last_free_atb_index) = (block + new_blocks) / BLOCKS_PER_ATB;
         }
 
+        GC_EXIT();
+
         #if EXTENSIVE_HEAP_PROFILING
         gc_dump_alloc_table();
         #endif
@@ -620,6 +670,8 @@
             ATB_FREE_TO_TAIL(bl);
         }
 
+        GC_EXIT();
+
         // zero out the additional bytes of the newly allocated blocks (see comment above in gc_alloc)
         memset((byte*)ptr_in + n_bytes, 0, new_blocks * BYTES_PER_BLOCK - n_bytes);
 
@@ -630,6 +682,8 @@
         return ptr_in;
     }
 
+    GC_EXIT();
+
     if (!allow_move) {
         // not allowed to move memory block so return failure
         return NULL;
@@ -666,6 +720,7 @@
 }
 
 void gc_dump_alloc_table(void) {
+    GC_ENTER();
     static const size_t DUMP_BYTES_PER_LINE = 64;
     #if !EXTENSIVE_HEAP_PROFILING
     // When comparing heap output we don't want to print the starting
@@ -771,6 +826,7 @@
         mp_printf(&mp_plat_print, "%c", c);
     }
     mp_print_str(&mp_plat_print, "\n");
+    GC_EXIT();
 }
 
 #if DEBUG_PRINT
diff --git a/py/mpstate.h b/py/mpstate.h
index 18bec09..f1f8fd9 100644
--- a/py/mpstate.h
+++ b/py/mpstate.h
@@ -29,6 +29,7 @@
 #include <stdint.h>
 
 #include "py/mpconfig.h"
+#include "py/mpthread.h"
 #include "py/misc.h"
 #include "py/nlr.h"
 #include "py/obj.h"
@@ -80,6 +81,11 @@
     #if MICROPY_PY_GC_COLLECT_RETVAL
     size_t gc_collected;
     #endif
+
+    #if MICROPY_PY_THREAD
+    // This is a global mutex used to make the GC thread-safe.
+    mp_thread_mutex_t gc_mutex;
+    #endif
 } mp_state_mem_t;
 
 // This structure hold runtime and VM information.  It includes a section
diff --git a/py/mpthread.h b/py/mpthread.h
index 498380a..a92f2d0 100644
--- a/py/mpthread.h
+++ b/py/mpthread.h
@@ -36,7 +36,9 @@
 #include <mpthreadport.h>
 #endif
 
-mp_state_thread_t *mp_thread_get_state(void);
+struct _mp_state_thread_t;
+
+struct _mp_state_thread_t *mp_thread_get_state(void);
 void mp_thread_set_state(void *state);
 void mp_thread_create(void *(*entry)(void*), void *arg, size_t stack_size);
 void mp_thread_mutex_init(mp_thread_mutex_t *mutex);