py: Add MP_STATE_THREAD to hold state specific to a given thread.
diff --git a/py/gc.c b/py/gc.c
index a1ba45a..ebf6995 100644
--- a/py/gc.c
+++ b/py/gc.c
@@ -279,7 +279,7 @@
     // correctly in the mp_state_ctx structure.  We scan nlr_top, dict_locals,
     // dict_globals, then the root pointer section of mp_state_vm.
     void **ptrs = (void**)(void*)&mp_state_ctx;
-    gc_collect_root(ptrs, offsetof(mp_state_ctx_t, vm.stack_top) / sizeof(void*));
+    gc_collect_root(ptrs, offsetof(mp_state_ctx_t, vm.qstr_last_chunk) / sizeof(void*));
 }
 
 void gc_collect_root(void **ptrs, size_t len) {
@@ -713,7 +713,7 @@
                 }
                 if (c == 'h') {
                     ptrs = (void**)&c;
-                    len = ((mp_uint_t)MP_STATE_VM(stack_top) - (mp_uint_t)&c) / sizeof(mp_uint_t);
+                    len = ((mp_uint_t)MP_STATE_THREAD(stack_top) - (mp_uint_t)&c) / sizeof(mp_uint_t);
                     for (mp_uint_t i = 0; i < len; i++) {
                         mp_uint_t ptr = (mp_uint_t)ptrs[i];
                         if (VERIFY_PTR(ptr) && BLOCK_FROM_PTR(ptr) == bl) {
diff --git a/py/modmicropython.c b/py/modmicropython.c
index 1ff5e25..805bda5 100644
--- a/py/modmicropython.c
+++ b/py/modmicropython.c
@@ -60,7 +60,7 @@
         (mp_uint_t)m_get_total_bytes_allocated(), (mp_uint_t)m_get_current_bytes_allocated(), (mp_uint_t)m_get_peak_bytes_allocated());
 #endif
 #if MICROPY_STACK_CHECK
-    mp_printf(&mp_plat_print, "stack: " UINT_FMT " out of " INT_FMT "\n", mp_stack_usage(), MP_STATE_VM(stack_limit));
+    mp_printf(&mp_plat_print, "stack: " UINT_FMT " out of " INT_FMT "\n", mp_stack_usage(), MP_STATE_THREAD(stack_limit));
 #else
     mp_printf(&mp_plat_print, "stack: " UINT_FMT "\n", mp_stack_usage());
 #endif
diff --git a/py/mpstate.h b/py/mpstate.h
index 0e77e65..824aba1 100644
--- a/py/mpstate.h
+++ b/py/mpstate.h
@@ -91,9 +91,6 @@
     // this must start at the start of this structure
     //
 
-    // Note: nlr asm code has the offset of this hard-coded
-    nlr_buf_t *nlr_top;
-
     qstr_pool_t *last_pool;
 
     // non-heap memory for creating an exception if we can't allocate RAM
@@ -161,14 +158,6 @@
     size_t qstr_last_alloc;
     size_t qstr_last_used;
 
-    // Stack top at the start of program
-    // Note: this entry is used to locate the end of the root pointer section.
-    char *stack_top;
-
-    #if MICROPY_STACK_CHECK
-    mp_uint_t stack_limit;
-    #endif
-
     mp_uint_t mp_optimise_value;
 
     // size of the emergency exception buf, if it's dynamically allocated
@@ -177,7 +166,22 @@
     #endif
 } mp_state_vm_t;
 
-// This structure combines the above 2 structures, and adds the local
+// This structure holds state that is specific to a given thread.
+// Everything in this structure is scanned for root pointers.
+typedef struct _mp_state_thread_t {
+    // Note: nlr asm code has the offset of this hard-coded
+    nlr_buf_t *nlr_top; // ROOT POINTER
+
+    // Stack top at the start of program
+    // Note: this entry is used to locate the end of the root pointer section.
+    char *stack_top;
+
+    #if MICROPY_STACK_CHECK
+    size_t stack_limit;
+    #endif
+} mp_state_thread_t;
+
+// This structure combines the above 3 structures, and adds the local
 // and global dicts.
 // Note: if this structure changes then revisit all nlr asm code since they
 // have the offset of nlr_top hard-coded.
@@ -185,7 +189,8 @@
     // these must come first for root pointer scanning in GC to work
     mp_obj_dict_t *dict_locals;
     mp_obj_dict_t *dict_globals;
-    // this must come next for root pointer scanning in GC to work
+    // these must come next in this order for root pointer scanning in GC to work
+    mp_state_thread_t thread;
     mp_state_vm_t vm;
     mp_state_mem_t mem;
 } mp_state_ctx_t;
@@ -196,4 +201,6 @@
 #define MP_STATE_VM(x) (mp_state_ctx.vm.x)
 #define MP_STATE_MEM(x) (mp_state_ctx.mem.x)
 
+#define MP_STATE_THREAD(x) (mp_state_ctx.thread.x)
+
 #endif // __MICROPY_INCLUDED_PY_MPSTATE_H__
diff --git a/py/stackctrl.c b/py/stackctrl.c
index 14d1fd0..1843e73 100644
--- a/py/stackctrl.c
+++ b/py/stackctrl.c
@@ -32,23 +32,23 @@
 
 void mp_stack_ctrl_init(void) {
     volatile int stack_dummy;
-    MP_STATE_VM(stack_top) = (char*)&stack_dummy;
+    MP_STATE_THREAD(stack_top) = (char*)&stack_dummy;
 }
 
 void mp_stack_set_top(void *top) {
-    MP_STATE_VM(stack_top) = top;
+    MP_STATE_THREAD(stack_top) = top;
 }
 
 mp_uint_t mp_stack_usage(void) {
     // Assumes descending stack
     volatile int stack_dummy;
-    return MP_STATE_VM(stack_top) - (char*)&stack_dummy;
+    return MP_STATE_THREAD(stack_top) - (char*)&stack_dummy;
 }
 
 #if MICROPY_STACK_CHECK
 
 void mp_stack_set_limit(mp_uint_t limit) {
-    MP_STATE_VM(stack_limit) = limit;
+    MP_STATE_THREAD(stack_limit) = limit;
 }
 
 void mp_exc_recursion_depth(void) {
@@ -57,7 +57,7 @@
 }
 
 void mp_stack_check(void) {
-    if (mp_stack_usage() >= MP_STATE_VM(stack_limit)) {
+    if (mp_stack_usage() >= MP_STATE_THREAD(stack_limit)) {
         mp_exc_recursion_depth();
     }
 }