py/asm: Remove need for dummy_data when doing initial assembler passes.

For all but the last pass the assembler only needs to count how much space
is needed for the machine code, it doesn't actually need to emit anything.
The dummy_data just uses unnecessary RAM and without it the code is not
any more complex (and code size does not increase for Thumb and Xtensa
archs).
diff --git a/py/asmarm.c b/py/asmarm.c
index 663f861..63963d2 100644
--- a/py/asmarm.c
+++ b/py/asmarm.c
@@ -55,7 +55,10 @@
 
 // Insert word into instruction flow
 STATIC void emit(asm_arm_t *as, uint op) {
-    *(uint*)asm_arm_get_cur_to_write_bytes(as, 4) = op;
+    uint8_t *c = mp_asm_base_get_cur_to_write_bytes(&as->base, 4);
+    if (c != NULL) {
+        *(uint32_t*)c = op;
+    }
 }
 
 // Insert word into instruction flow, add "ALWAYS" condition code
diff --git a/py/asmbase.c b/py/asmbase.c
index 8487305..c941e91 100644
--- a/py/asmbase.c
+++ b/py/asmbase.c
@@ -59,17 +59,16 @@
 }
 
 // all functions must go through this one to emit bytes
-// if as->pass < MP_ASM_PASS_EMIT, then this function returns dummy_data
+// if as->pass < MP_ASM_PASS_EMIT, then this function just counts the number
+// of bytes needed and returns NULL, and callers should not store any data
 uint8_t *mp_asm_base_get_cur_to_write_bytes(mp_asm_base_t *as, size_t num_bytes_to_write) {
-    if (as->pass < MP_ASM_PASS_EMIT) {
-        as->code_offset += num_bytes_to_write;
-        return as->dummy_data;
-    } else {
+    uint8_t *c = NULL;
+    if (as->pass == MP_ASM_PASS_EMIT) {
         assert(as->code_offset + num_bytes_to_write <= as->code_size);
-        uint8_t *c = as->code_base + as->code_offset;
-        as->code_offset += num_bytes_to_write;
-        return c;
+        c = as->code_base + as->code_offset;
     }
+    as->code_offset += num_bytes_to_write;
+    return c;
 }
 
 void mp_asm_base_label_assign(mp_asm_base_t *as, size_t label) {
@@ -92,8 +91,7 @@
 // this function assumes a little endian machine
 void mp_asm_base_data(mp_asm_base_t* as, unsigned int bytesize, uintptr_t val) {
     uint8_t *c = mp_asm_base_get_cur_to_write_bytes(as, bytesize);
-    // only write to the buffer in the emit pass (otherwise we may overflow dummy_data)
-    if (as->pass == MP_ASM_PASS_EMIT) {
+    if (c != NULL) {
         for (unsigned int i = 0; i < bytesize; i++) {
             *c++ = val;
             val >>= 8;
diff --git a/py/asmbase.h b/py/asmbase.h
index 06fdd4b..d2b4038 100644
--- a/py/asmbase.h
+++ b/py/asmbase.h
@@ -40,9 +40,6 @@
 
     size_t max_num_labels;
     size_t *label_offsets;
-
-    // must be last in struct
-    uint8_t dummy_data[4];
 } mp_asm_base_t;
 
 void mp_asm_base_init(mp_asm_base_t *as, size_t max_num_labels);
diff --git a/py/asmthumb.c b/py/asmthumb.c
index 82a226b..749c1e4 100644
--- a/py/asmthumb.c
+++ b/py/asmthumb.c
@@ -162,18 +162,22 @@
 
 void asm_thumb_op16(asm_thumb_t *as, uint op) {
     byte *c = asm_thumb_get_cur_to_write_bytes(as, 2);
-    // little endian
-    c[0] = op;
-    c[1] = op >> 8;
+    if (c != NULL) {
+        // little endian
+        c[0] = op;
+        c[1] = op >> 8;
+    }
 }
 
 void asm_thumb_op32(asm_thumb_t *as, uint op1, uint op2) {
     byte *c = asm_thumb_get_cur_to_write_bytes(as, 4);
-    // little endian, op1 then op2
-    c[0] = op1;
-    c[1] = op1 >> 8;
-    c[2] = op2;
-    c[3] = op2 >> 8;
+    if (c != NULL) {
+        // little endian, op1 then op2
+        c[0] = op1;
+        c[1] = op1 >> 8;
+        c[2] = op2;
+        c[3] = op2 >> 8;
+    }
 }
 
 #define OP_FORMAT_4(op, rlo_dest, rlo_src) ((op) | ((rlo_src) << 3) | (rlo_dest))
diff --git a/py/asmx64.c b/py/asmx64.c
index c9dad2a..cf1a86b 100644
--- a/py/asmx64.c
+++ b/py/asmx64.c
@@ -122,40 +122,50 @@
 
 STATIC void asm_x64_write_byte_1(asm_x64_t *as, byte b1) {
     byte* c = asm_x64_get_cur_to_write_bytes(as, 1);
-    c[0] = b1;
+    if (c != NULL) {
+        c[0] = b1;
+    }
 }
 
 STATIC void asm_x64_write_byte_2(asm_x64_t *as, byte b1, byte b2) {
     byte* c = asm_x64_get_cur_to_write_bytes(as, 2);
-    c[0] = b1;
-    c[1] = b2;
+    if (c != NULL) {
+        c[0] = b1;
+        c[1] = b2;
+    }
 }
 
 STATIC void asm_x64_write_byte_3(asm_x64_t *as, byte b1, byte b2, byte b3) {
     byte* c = asm_x64_get_cur_to_write_bytes(as, 3);
-    c[0] = b1;
-    c[1] = b2;
-    c[2] = b3;
+    if (c != NULL) {
+        c[0] = b1;
+        c[1] = b2;
+        c[2] = b3;
+    }
 }
 
 STATIC void asm_x64_write_word32(asm_x64_t *as, int w32) {
     byte* c = asm_x64_get_cur_to_write_bytes(as, 4);
-    c[0] = IMM32_L0(w32);
-    c[1] = IMM32_L1(w32);
-    c[2] = IMM32_L2(w32);
-    c[3] = IMM32_L3(w32);
+    if (c != NULL) {
+        c[0] = IMM32_L0(w32);
+        c[1] = IMM32_L1(w32);
+        c[2] = IMM32_L2(w32);
+        c[3] = IMM32_L3(w32);
+    }
 }
 
 STATIC void asm_x64_write_word64(asm_x64_t *as, int64_t w64) {
     byte* c = asm_x64_get_cur_to_write_bytes(as, 8);
-    c[0] = IMM32_L0(w64);
-    c[1] = IMM32_L1(w64);
-    c[2] = IMM32_L2(w64);
-    c[3] = IMM32_L3(w64);
-    c[4] = IMM64_L4(w64);
-    c[5] = IMM64_L5(w64);
-    c[6] = IMM64_L6(w64);
-    c[7] = IMM64_L7(w64);
+    if (c != NULL) {
+        c[0] = IMM32_L0(w64);
+        c[1] = IMM32_L1(w64);
+        c[2] = IMM32_L2(w64);
+        c[3] = IMM32_L3(w64);
+        c[4] = IMM64_L4(w64);
+        c[5] = IMM64_L5(w64);
+        c[6] = IMM64_L6(w64);
+        c[7] = IMM64_L7(w64);
+    }
 }
 
 /* unused
diff --git a/py/asmx64.h b/py/asmx64.h
index f80c8da..4499c53 100644
--- a/py/asmx64.h
+++ b/py/asmx64.h
@@ -72,7 +72,6 @@
 
 typedef struct _asm_x64_t {
     mp_asm_base_t base;
-    byte dummy_data[4]; // in addition to dummy_data in base
     int num_locals;
 } asm_x64_t;
 
diff --git a/py/asmx86.c b/py/asmx86.c
index cb9b30d..ef315bf 100644
--- a/py/asmx86.c
+++ b/py/asmx86.c
@@ -101,29 +101,37 @@
 #define SIGNED_FIT8(x) (((x) & 0xffffff80) == 0) || (((x) & 0xffffff80) == 0xffffff80)
 
 STATIC void asm_x86_write_byte_1(asm_x86_t *as, byte b1) {
-    byte* c = asm_x86_get_cur_to_write_bytes(as, 1);
-    c[0] = b1;
+    byte* c = mp_asm_base_get_cur_to_write_bytes(&as->base, 1);
+    if (c != NULL) {
+        c[0] = b1;
+    }
 }
 
 STATIC void asm_x86_write_byte_2(asm_x86_t *as, byte b1, byte b2) {
-    byte* c = asm_x86_get_cur_to_write_bytes(as, 2);
-    c[0] = b1;
-    c[1] = b2;
+    byte* c = mp_asm_base_get_cur_to_write_bytes(&as->base, 2);
+    if (c != NULL) {
+        c[0] = b1;
+        c[1] = b2;
+    }
 }
 
 STATIC void asm_x86_write_byte_3(asm_x86_t *as, byte b1, byte b2, byte b3) {
-    byte* c = asm_x86_get_cur_to_write_bytes(as, 3);
-    c[0] = b1;
-    c[1] = b2;
-    c[2] = b3;
+    byte* c = mp_asm_base_get_cur_to_write_bytes(&as->base, 3);
+    if (c != NULL) {
+        c[0] = b1;
+        c[1] = b2;
+        c[2] = b3;
+    }
 }
 
 STATIC void asm_x86_write_word32(asm_x86_t *as, int w32) {
-    byte* c = asm_x86_get_cur_to_write_bytes(as, 4);
-    c[0] = IMM32_L0(w32);
-    c[1] = IMM32_L1(w32);
-    c[2] = IMM32_L2(w32);
-    c[3] = IMM32_L3(w32);
+    byte* c = mp_asm_base_get_cur_to_write_bytes(&as->base, 4);
+    if (c != NULL) {
+        c[0] = IMM32_L0(w32);
+        c[1] = IMM32_L1(w32);
+        c[2] = IMM32_L2(w32);
+        c[3] = IMM32_L3(w32);
+    }
 }
 
 STATIC void asm_x86_write_r32_disp(asm_x86_t *as, int r32, int disp_r32, int disp_offset) {
diff --git a/py/asmx86.h b/py/asmx86.h
index ac4b2ec..0b44af6 100644
--- a/py/asmx86.h
+++ b/py/asmx86.h
@@ -75,7 +75,6 @@
 
 typedef struct _asm_x86_t {
     mp_asm_base_t base;
-    byte dummy_data[4]; // in addition to dummy_data in base
     int num_locals;
 } asm_x86_t;
 
diff --git a/py/asmxtensa.c b/py/asmxtensa.c
index 00df432..00448df 100644
--- a/py/asmxtensa.c
+++ b/py/asmxtensa.c
@@ -97,15 +97,19 @@
 
 void asm_xtensa_op16(asm_xtensa_t *as, uint16_t op) {
     uint8_t *c = mp_asm_base_get_cur_to_write_bytes(&as->base, 2);
-    c[0] = op;
-    c[1] = op >> 8;
+    if (c != NULL) {
+        c[0] = op;
+        c[1] = op >> 8;
+    }
 }
 
 void asm_xtensa_op24(asm_xtensa_t *as, uint32_t op) {
     uint8_t *c = mp_asm_base_get_cur_to_write_bytes(&as->base, 3);
-    c[0] = op;
-    c[1] = op >> 8;
-    c[2] = op >> 16;
+    if (c != NULL) {
+        c[0] = op;
+        c[1] = op >> 8;
+        c[2] = op >> 16;
+    }
 }
 
 void asm_xtensa_j_label(asm_xtensa_t *as, uint label) {
@@ -147,7 +151,7 @@
         // load the constant
         asm_xtensa_op_l32r(as, reg_dest, as->base.code_offset, 4 + as->cur_const * WORD_SIZE);
         // store the constant in the table
-        if (as->base.pass == MP_ASM_PASS_EMIT) {
+        if (as->const_table != NULL) {
             as->const_table[as->cur_const] = i32;
         }
         ++as->cur_const;