Make VM stack grow upwards, and so no reversed args arrays.

Change state layout in VM so the stack starts at state[0] and grows
upwards.  Locals are at the top end of the state and number downwards.
This cleans up a lot of the interface connecting the VM to C: now all
functions that take an array of Micro Python objects are in order (ie no
longer in reverse).

Also clean up C API with keyword arguments (call_n and call_n_kw
replaced with single call method that takes keyword arguments).  And now
make_new takes keyword arguments.

emitnative.c has not yet been changed to comply with the new order of
stack layout.
diff --git a/py/builtin.c b/py/builtin.c
index 04bb268..fcd58d4 100644
--- a/py/builtin.c
+++ b/py/builtin.c
@@ -45,12 +45,11 @@
     // TODO do proper metaclass resolution for multiple base objects
 
     // create the new class using a call to the meta object
-    // (arguments must be backwards in the array)
     mp_obj_t meta_args[3];
-    meta_args[2] = args[1]; // class name
+    meta_args[0] = args[1]; // class name
     meta_args[1] = mp_obj_new_tuple(n_args - 2, args + 2); // tuple of bases
-    meta_args[0] = class_locals; // dict of members
-    mp_obj_t new_class = rt_call_function_n(meta, 3, meta_args);
+    meta_args[2] = class_locals; // dict of members
+    mp_obj_t new_class = rt_call_function_n_kw(meta, 3, 0, meta_args);
 
     // store into cell if neede
     if (cell != mp_const_none) {
@@ -153,10 +152,10 @@
     if (MP_OBJ_IS_SMALL_INT(o1_in) && MP_OBJ_IS_SMALL_INT(o2_in)) {
         mp_small_int_t i1 = MP_OBJ_SMALL_INT_VALUE(o1_in);
         mp_small_int_t i2 = MP_OBJ_SMALL_INT_VALUE(o2_in);
-        mp_obj_t revs_args[2];
-        revs_args[1] = MP_OBJ_NEW_SMALL_INT(i1 / i2);
-        revs_args[0] = MP_OBJ_NEW_SMALL_INT(i1 % i2);
-        return rt_build_tuple(2, revs_args);
+        mp_obj_t args[2];
+        args[0] = MP_OBJ_NEW_SMALL_INT(i1 / i2);
+        args[1] = MP_OBJ_NEW_SMALL_INT(i1 % i2);
+        return rt_build_tuple(2, args);
     } else {
         nlr_jump(mp_obj_new_exception_msg_varg(MP_QSTR_TypeError, "unsupported operand type(s) for divmod(): '%s' and '%s'", mp_obj_get_type_str(o1_in), mp_obj_get_type_str(o2_in)));
     }
@@ -327,20 +326,14 @@
 
 MP_DEFINE_CONST_FUN_OBJ_VAR_BETWEEN(mp_builtin_sum_obj, 1, 2, mp_builtin_sum);
 
-static mp_obj_t mp_builtin_sorted(mp_obj_t args, mp_map_t *kwargs) {
-    mp_obj_t *args_items = NULL;
-    uint args_len = 0;
-
-    assert(MP_OBJ_IS_TYPE(args, &tuple_type));
-    mp_obj_tuple_get(args, &args_len, &args_items);
-    assert(args_len >= 1);
-    if (args_len > 1) {
+static mp_obj_t mp_builtin_sorted(uint n_args, const mp_obj_t *args, mp_map_t *kwargs) {
+    assert(n_args >= 1);
+    if (n_args > 1) {
         nlr_jump(mp_obj_new_exception_msg(MP_QSTR_TypeError,
                                           "must use keyword argument for key function"));
     }
-    mp_obj_t self = list_type.make_new((mp_obj_t)&list_type, 1, args_items);
-    mp_obj_t new_args = rt_build_tuple(1, &self);
-    mp_obj_list_sort(new_args, kwargs);
+    mp_obj_t self = list_type.make_new((mp_obj_t)&list_type, 1, 0, args);
+    mp_obj_list_sort(1, &self, kwargs);
 
     return self;
 }
diff --git a/py/emitnative.c b/py/emitnative.c
index 7ba4d07..aea25ac 100644
--- a/py/emitnative.c
+++ b/py/emitnative.c
@@ -1156,7 +1156,8 @@
         vtype_kind_t vtype_fun;
         emit_pre_pop_reg(emit, &vtype_fun, REG_ARG_1); // the function
         assert(vtype_fun == VTYPE_PYOBJ);
-        emit_call_with_imm_arg(emit, RT_F_CALL_FUNCTION_N, rt_call_function_n, n_positional, REG_ARG_2);
+        // XXX rt_call_function_n now merged with rt_call_function_n_kw
+        //emit_call_with_imm_arg(emit, RT_F_CALL_FUNCTION_N, rt_call_function_n, n_positional, REG_ARG_2);
     //}
     emit_post_push_reg(emit, VTYPE_PYOBJ, REG_RET);
 }
@@ -1181,7 +1182,8 @@
     */
         emit_pre(emit);
         emit_get_stack_pointer_to_reg_for_pop(emit, REG_ARG_2, n_positional + 2); // pointer to items in reverse order, including meth and self
-        emit_call_with_imm_arg(emit, RT_F_CALL_METHOD_N, rt_call_method_n, n_positional, REG_ARG_1);
+        // XXX rt_call_method_n now merged with rt_call_method_n_kw
+        //emit_call_with_imm_arg(emit, RT_F_CALL_METHOD_N, rt_call_method_n, n_positional, REG_ARG_1);
     //}
     emit_post_push_reg(emit, VTYPE_PYOBJ, REG_RET);
 }
diff --git a/py/obj.c b/py/obj.c
index 9ca3d5d..bf7013a 100644
--- a/py/obj.c
+++ b/py/obj.c
@@ -63,7 +63,7 @@
         return false;
     } else {
         mp_obj_base_t *o = o_in;
-        return o->type->call_n != NULL;
+        return o->type->call != NULL;
     }
 }
 
diff --git a/py/obj.h b/py/obj.h
index 99d430f..5ca0e01 100644
--- a/py/obj.h
+++ b/py/obj.h
@@ -83,19 +83,18 @@
 typedef mp_obj_t (*mp_fun_3_t)(mp_obj_t, mp_obj_t, mp_obj_t);
 typedef mp_obj_t (*mp_fun_t)(void);
 typedef mp_obj_t (*mp_fun_var_t)(int n, const mp_obj_t *);
-typedef mp_obj_t (*mp_fun_kw_t)(mp_obj_t, struct _mp_map_t*);
+typedef mp_obj_t (*mp_fun_kw_t)(uint n, const mp_obj_t *, struct _mp_map_t *);
 
 typedef enum {
     PRINT_STR, PRINT_REPR
 } mp_print_kind_t;
 
 typedef void (*mp_print_fun_t)(void (*print)(void *env, const char *fmt, ...), void *env, mp_obj_t o, mp_print_kind_t kind);
-typedef mp_obj_t (*mp_make_new_fun_t)(mp_obj_t type_in, int n_args, const mp_obj_t *args); // args are in reverse order in the array
-typedef mp_obj_t (*mp_call_n_fun_t)(mp_obj_t fun, int n_args, const mp_obj_t *args); // args are in reverse order in the array
-typedef mp_obj_t (*mp_call_n_kw_fun_t)(mp_obj_t fun, int n_args, int n_kw, const mp_obj_t *args); // args are in reverse order in the array
+typedef mp_obj_t (*mp_make_new_fun_t)(mp_obj_t type_in, uint n_args, uint n_kw, const mp_obj_t *args);
+typedef mp_obj_t (*mp_call_fun_t)(mp_obj_t fun, uint n_args, uint n_kw, const mp_obj_t *args);
 typedef mp_obj_t (*mp_unary_op_fun_t)(int op, mp_obj_t);
 typedef mp_obj_t (*mp_binary_op_fun_t)(int op, mp_obj_t, mp_obj_t);
-typedef void (*mp_load_attr_fun_t)(mp_obj_t self_in, qstr attr, mp_obj_t *dest); // for fail, do nothing; for attr, dest[1] = value; for method, dest[0] = self, dest[1] = method
+typedef void (*mp_load_attr_fun_t)(mp_obj_t self_in, qstr attr, mp_obj_t *dest); // for fail, do nothing; for attr, dest[0] = value; for method, dest[0] = method, dest[1] = self
 typedef bool (*mp_store_attr_fun_t)(mp_obj_t self_in, qstr attr, mp_obj_t value); // return true if store succeeded
 
 typedef struct _mp_method_t {
@@ -144,8 +143,7 @@
     mp_print_fun_t print;
     mp_make_new_fun_t make_new;     // to make an instance of the type
 
-    mp_call_n_fun_t call_n;
-    mp_call_n_kw_fun_t call_n_kw;
+    mp_call_fun_t call;
     mp_unary_op_fun_t unary_op;     // can return NULL if op not supported
     mp_binary_op_fun_t binary_op;   // can return NULL if op not supported
 
@@ -222,13 +220,11 @@
 mp_obj_t mp_obj_new_gen_instance(const byte *bytecode, uint n_state, int n_args, const mp_obj_t *args);
 mp_obj_t mp_obj_new_closure(mp_obj_t fun, mp_obj_t closure_tuple);
 mp_obj_t mp_obj_new_tuple(uint n, const mp_obj_t *items);
-mp_obj_t mp_obj_new_tuple_reverse(uint n, const mp_obj_t *items);
 mp_obj_t mp_obj_new_list(uint n, mp_obj_t *items);
-mp_obj_t mp_obj_new_list_reverse(uint n, mp_obj_t *items);
 mp_obj_t mp_obj_new_dict(int n_args);
 mp_obj_t mp_obj_new_set(int n_args, mp_obj_t *items);
 mp_obj_t mp_obj_new_slice(mp_obj_t start, mp_obj_t stop, mp_obj_t step);
-mp_obj_t mp_obj_new_bound_meth(mp_obj_t self, mp_obj_t meth);
+mp_obj_t mp_obj_new_bound_meth(mp_obj_t meth, mp_obj_t self);
 mp_obj_t mp_obj_new_module(qstr module_name);
 
 mp_obj_t mp_obj_get_type(mp_obj_t o_in);
@@ -296,7 +292,7 @@
 mp_obj_t mp_obj_list_append(mp_obj_t self_in, mp_obj_t arg);
 void mp_obj_list_get(mp_obj_t self_in, uint *len, mp_obj_t **items);
 void mp_obj_list_store(mp_obj_t self_in, mp_obj_t index, mp_obj_t value);
-mp_obj_t mp_obj_list_sort(mp_obj_t args, struct _mp_map_t *kwargs);
+mp_obj_t mp_obj_list_sort(uint n_args, const mp_obj_t *args, struct _mp_map_t *kwargs);
 
 // map (the python builtin, not the dict implementation detail)
 extern const mp_obj_type_t map_type;
diff --git a/py/objbool.c b/py/objbool.c
index 66f9e90..fac26f3 100644
--- a/py/objbool.c
+++ b/py/objbool.c
@@ -22,8 +22,9 @@
     }
 }
 
-// args are reverse in the array
-static mp_obj_t bool_make_new(mp_obj_t type_in, int n_args, const mp_obj_t *args) {
+static mp_obj_t bool_make_new(mp_obj_t type_in, uint n_args, uint n_kw, const mp_obj_t *args) {
+    // TODO check n_kw == 0
+
     switch (n_args) {
         case 0: return mp_const_false;
         case 1: if (rt_is_true(args[0])) { return mp_const_true; } else { return mp_const_false; }
diff --git a/py/objboundmeth.c b/py/objboundmeth.c
index 78e5c62..2b13200 100644
--- a/py/objboundmeth.c
+++ b/py/objboundmeth.c
@@ -1,5 +1,6 @@
 #include <stdlib.h>
-#include <stdint.h>
+#include <stdlib.h>
+#include <string.h>
 #include <assert.h>
 
 #include "nlr.h"
@@ -14,32 +15,36 @@
     mp_obj_t self;
 } mp_obj_bound_meth_t;
 
-// args are in reverse order in the array
-mp_obj_t bound_meth_call_n(mp_obj_t self_in, int n_args, const mp_obj_t *args) {
+mp_obj_t bound_meth_call(mp_obj_t self_in, uint n_args, uint n_kw, const mp_obj_t *args) {
     mp_obj_bound_meth_t *self = self_in;
 
-    if (n_args == 0) {
-        return rt_call_function_n(self->meth, 1, &self->self);
-    } else if (n_args == 1) {
-        mp_obj_t args2[2];
-        args2[1] = self->self;
-        args2[0] = args[0];
-        return rt_call_function_n(self->meth, 2, args2);
+    // need to insert self->self before all other args and then call self->meth
+
+    int n_total = n_args + 2 * n_kw;
+    if (n_total <= 4) {
+        // use stack to allocate temporary args array
+        mp_obj_t args2[5];
+        args2[0] = self->self;
+        memcpy(args2 + 1, args, n_total * sizeof(mp_obj_t));
+        return rt_call_function_n_kw(self->meth, n_args + 1, n_kw, &args2[0]);
     } else {
-        // TODO not implemented
-        assert(0);
-        return mp_const_none;
-        //return rt_call_function_2(self->meth, n_args + 1, self->self + args);
+        // use heap to allocate temporary args array
+        mp_obj_t *args2 = m_new(mp_obj_t, 1 + n_total);
+        args2[0] = self->self;
+        memcpy(args2 + 1, args, n_total * sizeof(mp_obj_t));
+        mp_obj_t res = rt_call_function_n_kw(self->meth, n_args + 1, n_kw, &args2[0]);
+        m_del(mp_obj_t, args2, 1 + n_total);
+        return res;
     }
 }
 
 const mp_obj_type_t bound_meth_type = {
     { &mp_const_type },
     "bound_method",
-    .call_n = bound_meth_call_n,
+    .call = bound_meth_call,
 };
 
-mp_obj_t mp_obj_new_bound_meth(mp_obj_t self, mp_obj_t meth) {
+mp_obj_t mp_obj_new_bound_meth(mp_obj_t meth, mp_obj_t self) {
     mp_obj_bound_meth_t *o = m_new_obj(mp_obj_bound_meth_t);
     o->base.type = &bound_meth_type;
     o->meth = meth;
diff --git a/py/objclosure.c b/py/objclosure.c
index b372ee6..7f6bcf4 100644
--- a/py/objclosure.c
+++ b/py/objclosure.c
@@ -16,26 +16,33 @@
     mp_obj_t *closed;
 } mp_obj_closure_t;
 
-// args are in reverse order in the array
-mp_obj_t closure_call_n(mp_obj_t self_in, int n_args, const mp_obj_t *args) {
+mp_obj_t closure_call(mp_obj_t self_in, uint n_args, uint n_kw, const mp_obj_t *args) {
     mp_obj_closure_t *self = self_in;
 
-    // concatenate args and closed-over-vars, in reverse order
-    // TODO perhaps cache this array so we don't need to create it each time we are called
-    mp_obj_t *args2 = m_new(mp_obj_t, self->n_closed + n_args);
-    memcpy(args2, args, n_args * sizeof(mp_obj_t));
-    for (int i = 0; i < self->n_closed; i++) {
-        args2[n_args + i] = self->closed[self->n_closed - 1 - i];
-    }
+    // need to concatenate closed-over-vars and args
 
-    // call the function with the new vars array
-    return rt_call_function_n(self->fun, n_args + self->n_closed, args2);
+    int n_total = self->n_closed + n_args + 2 * n_kw;
+    if (n_total <= 5) {
+        // use stack to allocate temporary args array
+        mp_obj_t args2[5];
+        memcpy(args2, self->closed, self->n_closed * sizeof(mp_obj_t));
+        memcpy(args2 + self->n_closed, args, (n_args + 2 * n_kw) * sizeof(mp_obj_t));
+        return rt_call_function_n_kw(self->fun, self->n_closed + n_args, n_kw, args2);
+    } else {
+        // use heap to allocate temporary args array
+        mp_obj_t *args2 = m_new(mp_obj_t, n_total);
+        memcpy(args2, self->closed, self->n_closed * sizeof(mp_obj_t));
+        memcpy(args2 + self->n_closed, args, (n_args + 2 * n_kw) * sizeof(mp_obj_t));
+        mp_obj_t res = rt_call_function_n_kw(self->fun, self->n_closed + n_args, n_kw, args2);
+        m_del(mp_obj_t, args2, n_total);
+        return res;
+    }
 }
 
 const mp_obj_type_t closure_type = {
     { &mp_const_type },
     "closure",
-    .call_n = closure_call_n,
+    .call = closure_call,
 };
 
 mp_obj_t mp_obj_new_closure(mp_obj_t fun, mp_obj_t closure_tuple) {
diff --git a/py/objcomplex.c b/py/objcomplex.c
index bd103bb..b56f75c 100644
--- a/py/objcomplex.c
+++ b/py/objcomplex.c
@@ -30,8 +30,9 @@
     }
 }
 
-// args are reverse in the array
-static mp_obj_t complex_make_new(mp_obj_t type_in, int n_args, const mp_obj_t *args) {
+static mp_obj_t complex_make_new(mp_obj_t type_in, uint n_args, uint n_kw, const mp_obj_t *args) {
+    // TODO check n_kw == 0
+
     switch (n_args) {
         case 0:
             return mp_obj_new_complex(0, 0);
@@ -47,19 +48,19 @@
         case 2:
         {
             mp_float_t real, imag;
-            if (MP_OBJ_IS_TYPE(args[1], &complex_type)) {
-                mp_obj_complex_get(args[1], &real, &imag);
+            if (MP_OBJ_IS_TYPE(args[0], &complex_type)) {
+                mp_obj_complex_get(args[0], &real, &imag);
             } else {
-                real = mp_obj_get_float(args[1]);
+                real = mp_obj_get_float(args[0]);
                 imag = 0;
             }
-            if (MP_OBJ_IS_TYPE(args[0], &complex_type)) {
+            if (MP_OBJ_IS_TYPE(args[1], &complex_type)) {
                 mp_float_t real2, imag2;
-                mp_obj_complex_get(args[0], &real2, &imag2);
+                mp_obj_complex_get(args[1], &real2, &imag2);
                 real -= imag2;
                 imag += real2;
             } else {
-                imag += mp_obj_get_float(args[0]);
+                imag += mp_obj_get_float(args[1]);
             }
             return mp_obj_new_complex(real, imag);
         }
diff --git a/py/objdict.c b/py/objdict.c
index da1b5b9..0d4a60a 100644
--- a/py/objdict.c
+++ b/py/objdict.c
@@ -38,8 +38,7 @@
     print(env, "}");
 }
 
-// args are reverse in the array
-static mp_obj_t dict_make_new(mp_obj_t type_in, int n_args, const mp_obj_t *args) {
+static mp_obj_t dict_make_new(mp_obj_t type_in, uint n_args, uint n_kw, const mp_obj_t *args) {
     // TODO create from an iterable!
     return rt_build_map(0);
 }
diff --git a/py/objenumerate.c b/py/objenumerate.c
index 5bfd8a3..3e25124 100644
--- a/py/objenumerate.c
+++ b/py/objenumerate.c
@@ -20,15 +20,12 @@
 
 /* TODO: enumerate is one of the ones that can take args or kwargs.
    Sticking to args for now */
-static mp_obj_t enumerate_make_new(mp_obj_t type_in, int n_args, const mp_obj_t *args) {
-    /* NOTE: args are backwards */
+static mp_obj_t enumerate_make_new(mp_obj_t type_in, uint n_args, uint n_kw, const mp_obj_t *args) {
     assert(n_args > 0);
-    args += n_args - 1;
     mp_obj_enumerate_t *o = m_new_obj(mp_obj_enumerate_t);
     o->base.type = &enumerate_type;
     o->iter = rt_getiter(args[0]);
-    o->cur = n_args > 1 ? mp_obj_get_int(args[-1]) : 0;
-
+    o->cur = n_args > 1 ? mp_obj_get_int(args[1]) : 0;
     return o;
 }
 
diff --git a/py/objexcept.c b/py/objexcept.c
index 67e6d63..1d30758 100644
--- a/py/objexcept.c
+++ b/py/objexcept.c
@@ -7,6 +7,7 @@
 #include "nlr.h"
 #include "misc.h"
 #include "mpconfig.h"
+#include "mpqstr.h"
 #include "obj.h"
 #include "objtuple.h"
 
@@ -43,20 +44,19 @@
     }
 }
 
-// args in reversed order
-static mp_obj_t exception_call(mp_obj_t self_in, int n_args, const mp_obj_t *args) {
+static mp_obj_t exception_call(mp_obj_t self_in, uint n_args, uint n_kw, const mp_obj_t *args) {
     mp_obj_exception_t *base = self_in;
-    mp_obj_exception_t *o = m_new_obj_var(mp_obj_exception_t, mp_obj_t*, n_args);
+
+    if (n_kw != 0) {
+        nlr_jump(mp_obj_new_exception_msg_1_arg(MP_QSTR_TypeError, "%s does not take keyword arguments", qstr_str(base->id)));
+    }
+
+    mp_obj_exception_t *o = m_new_obj_var(mp_obj_exception_t, mp_obj_t, n_args);
     o->base.type = &exception_type;
     o->id = base->id;
     o->msg = 0;
     o->args.len = n_args;
-
-    // TODO: factor out as reusable copy_reversed()
-    int j = 0;
-    for (int i = n_args - 1; i >= 0; i--) {
-        o->args.items[i] = args[j++];
-    }
+    memcpy(o->args.items, args, n_args * sizeof(mp_obj_t));
     return o;
 }
 
@@ -64,7 +64,7 @@
     { &mp_const_type },
     "exception",
     .print = exception_print,
-    .call_n = exception_call,
+    .call = exception_call,
 };
 
 mp_obj_t mp_obj_new_exception(qstr id) {
diff --git a/py/objfilter.c b/py/objfilter.c
index 18225ac..6ef3ef6 100644
--- a/py/objfilter.c
+++ b/py/objfilter.c
@@ -14,16 +14,15 @@
     mp_obj_t iter;
 } mp_obj_filter_t;
 
-static mp_obj_t filter_make_new(mp_obj_t type_in, int n_args, const mp_obj_t *args) {
-    /* NOTE: args are backwards */
-    if (n_args != 2) {
+static mp_obj_t filter_make_new(mp_obj_t type_in, uint n_args, uint n_kw, const mp_obj_t *args) {
+    if (n_args != 2 || n_kw != 0) {
         nlr_jump(mp_obj_new_exception_msg(MP_QSTR_TypeError, "filter expected 2 arguments"));
     }
     assert(n_args == 2);
     mp_obj_filter_t *o = m_new_obj(mp_obj_filter_t);
     o->base.type = &filter_type;
-    o->fun = args[1];
-    o->iter = rt_getiter(args[0]);
+    o->fun = args[0];
+    o->iter = rt_getiter(args[1]);
     return o;
 }
 
@@ -38,7 +37,7 @@
     while ((next = rt_iternext(self->iter)) != mp_const_stop_iteration) {
         mp_obj_t val;
         if (self->fun != mp_const_none) {
-            val = rt_call_function_n(self->fun, 1, &next);
+            val = rt_call_function_n_kw(self->fun, 1, 0, &next);
         } else {
             val = next;
         }
diff --git a/py/objfloat.c b/py/objfloat.c
index 1ac8754..d397daa 100644
--- a/py/objfloat.c
+++ b/py/objfloat.c
@@ -24,8 +24,9 @@
     print(env, "%.8g", o->value);
 }
 
-// args are reverse in the array
-static mp_obj_t float_make_new(mp_obj_t type_in, int n_args, const mp_obj_t *args) {
+static mp_obj_t float_make_new(mp_obj_t type_in, uint n_args, uint n_kw, const mp_obj_t *args) {
+    // TODO check n_kw == 0
+
     switch (n_args) {
         case 0:
             return mp_obj_new_float(0);
diff --git a/py/objfun.c b/py/objfun.c
index c624cf2..0bac142 100644
--- a/py/objfun.c
+++ b/py/objfun.c
@@ -43,16 +43,28 @@
     }
 }
 
-mp_obj_t fun_native_call_n_kw(mp_obj_t self_in, int n_args, int n_kw, const mp_obj_t *args);
-// args are in reverse order in the array
-mp_obj_t fun_native_call_n(mp_obj_t self_in, int n_args, const mp_obj_t *args) {
+mp_obj_t fun_native_call(mp_obj_t self_in, uint n_args, uint n_kw, const mp_obj_t *args) {
+    assert(MP_OBJ_IS_TYPE(self_in, &fun_native_type));
     mp_obj_fun_native_t *self = self_in;
+
     // check number of arguments
-    check_nargs(self, n_args, 0);
+    check_nargs(self, n_args, n_kw);
+
     if (self->is_kw) {
-        return fun_native_call_n_kw(self_in, n_args, 0, args);
-    }
-    if (self->n_args_min == self->n_args_max) {
+        // function allows keywords
+
+        // TODO if n_kw==0 then don't allocate any memory for map (either pass NULL or allocate it on the heap)
+        mp_map_t *kw_args = mp_map_new(n_kw);
+        for (int i = 0; i < 2 * n_kw; i += 2) {
+            qstr name = mp_obj_str_get(args[n_args + i]);
+            mp_map_lookup(kw_args, MP_OBJ_NEW_QSTR(name), MP_MAP_LOOKUP_ADD_IF_NOT_FOUND)->value = args[n_args + i + 1];
+        }
+        mp_obj_t res = ((mp_fun_kw_t)self->fun)(n_args, args, kw_args);
+        // TODO clean up kw_args
+
+        return res;
+
+    } else if (self->n_args_min == self->n_args_max) {
         // function requires a fixed number of arguments
 
         // dispatch function call
@@ -64,10 +76,10 @@
                 return ((mp_fun_1_t)self->fun)(args[0]);
 
             case 2:
-                return ((mp_fun_2_t)self->fun)(args[1], args[0]);
+                return ((mp_fun_2_t)self->fun)(args[0], args[1]);
 
             case 3:
-                return ((mp_fun_3_t)self->fun)(args[2], args[1], args[0]);
+                return ((mp_fun_3_t)self->fun)(args[0], args[1], args[2]);
 
             default:
                 assert(0);
@@ -75,42 +87,16 @@
         }
 
     } else {
-        // function takes a variable number of arguments
+        // function takes a variable number of arguments, but no keywords
 
-        // TODO really the args need to be passed in as a Python tuple, as the form f(*[1,2]) can be used to pass var args
-        mp_obj_t *args_ordered = m_new(mp_obj_t, n_args);
-        for (int i = 0; i < n_args; i++) {
-            args_ordered[i] = args[n_args - i - 1];
-        }
-
-        mp_obj_t res = ((mp_fun_var_t)self->fun)(n_args, args_ordered);
-        m_del(mp_obj_t, args_ordered, n_args);
-
-        return res;
+        return ((mp_fun_var_t)self->fun)(n_args, args);
     }
 }
 
-mp_obj_t fun_native_call_n_kw(mp_obj_t self_in, int n_args, int n_kw, const mp_obj_t *args) {
-    mp_obj_fun_native_t *self = self_in;
-
-    check_nargs(self, n_args, n_kw);
-
-    mp_obj_t *vargs = mp_obj_new_tuple_reverse(n_args, args + 2*n_kw);
-    mp_map_t *kw_args = mp_map_new(n_kw);
-    for (int i = 0; i < 2*n_kw; i+=2) {
-        qstr name = mp_obj_str_get(args[i+1]);
-        mp_map_lookup(kw_args, MP_OBJ_NEW_QSTR(name), MP_MAP_LOOKUP_ADD_IF_NOT_FOUND)->value = args[i];
-    }
-    mp_obj_t res = ((mp_fun_kw_t)self->fun)(vargs, kw_args);
-    // TODO clean up vargs and kw_args
-    return res;
-}
-
 const mp_obj_type_t fun_native_type = {
     { &mp_const_type },
     "function",
-    .call_n = fun_native_call_n,
-    .call_n_kw = fun_native_call_n_kw,
+    .call = fun_native_call,
 };
 
 // fun must have the correct signature for n_args fixed arguments
@@ -156,13 +142,15 @@
     const byte *bytecode;   // bytecode for the function
 } mp_obj_fun_bc_t;
 
-// args are in reverse order in the array
-mp_obj_t fun_bc_call_n(mp_obj_t self_in, int n_args, const mp_obj_t *args) {
+mp_obj_t fun_bc_call(mp_obj_t self_in, uint n_args, uint n_kw, const mp_obj_t *args) {
     mp_obj_fun_bc_t *self = self_in;
 
     if (n_args != self->n_args) {
         nlr_jump(mp_obj_new_exception_msg_2_args(MP_QSTR_TypeError, "function takes %d positional arguments but %d were given", (const char*)(machine_int_t)self->n_args, (const char*)(machine_int_t)n_args));
     }
+    if (n_kw != 0) {
+        nlr_jump(mp_obj_new_exception_msg(MP_QSTR_TypeError, "function does not take keyword arguments"));
+    }
 
     // optimisation: allow the compiler to optimise this tail call for
     // the common case when the globals don't need to be changed
@@ -180,7 +168,7 @@
 const mp_obj_type_t fun_bc_type = {
     { &mp_const_type },
     "function",
-    .call_n = fun_bc_call_n,
+    .call = fun_bc_call,
 };
 
 mp_obj_t mp_obj_new_fun_bc(int n_args, uint n_state, const byte *code) {
@@ -257,13 +245,15 @@
     return MP_OBJ_NEW_SMALL_INT(val);
 }
 
-// args are in reverse order in the array
-mp_obj_t fun_asm_call_n(mp_obj_t self_in, int n_args, const mp_obj_t *args) {
+mp_obj_t fun_asm_call(mp_obj_t self_in, uint n_args, uint n_kw, const mp_obj_t *args) {
     mp_obj_fun_asm_t *self = self_in;
 
     if (n_args != self->n_args) {
         nlr_jump(mp_obj_new_exception_msg_2_args(MP_QSTR_TypeError, "function takes %d positional arguments but %d were given", (const char*)(machine_int_t)self->n_args, (const char*)(machine_int_t)n_args));
     }
+    if (n_kw != 0) {
+        nlr_jump(mp_obj_new_exception_msg(MP_QSTR_TypeError, "function does not take keyword arguments"));
+    }
 
     machine_uint_t ret;
     if (n_args == 0) {
@@ -271,9 +261,9 @@
     } else if (n_args == 1) {
         ret = ((inline_asm_fun_1_t)self->fun)(convert_obj_for_inline_asm(args[0]));
     } else if (n_args == 2) {
-        ret = ((inline_asm_fun_2_t)self->fun)(convert_obj_for_inline_asm(args[1]), convert_obj_for_inline_asm(args[0]));
+        ret = ((inline_asm_fun_2_t)self->fun)(convert_obj_for_inline_asm(args[0]), convert_obj_for_inline_asm(args[1]));
     } else if (n_args == 3) {
-        ret = ((inline_asm_fun_3_t)self->fun)(convert_obj_for_inline_asm(args[2]), convert_obj_for_inline_asm(args[1]), convert_obj_for_inline_asm(args[0]));
+        ret = ((inline_asm_fun_3_t)self->fun)(convert_obj_for_inline_asm(args[0]), convert_obj_for_inline_asm(args[1]), convert_obj_for_inline_asm(args[2]));
     } else {
         assert(0);
         ret = 0;
@@ -285,7 +275,7 @@
 static const mp_obj_type_t fun_asm_type = {
     { &mp_const_type },
     "function",
-    .call_n = fun_asm_call_n,
+    .call = fun_asm_call,
 };
 
 mp_obj_t mp_obj_new_fun_asm(uint n_args, void *fun) {
diff --git a/py/objgenerator.c b/py/objgenerator.c
index 40f202d..d58480f 100644
--- a/py/objgenerator.c
+++ b/py/objgenerator.c
@@ -20,8 +20,7 @@
     mp_obj_t *fun;
 } mp_obj_gen_wrap_t;
 
-// args are in reverse order in the array
-mp_obj_t gen_wrap_call_n(mp_obj_t self_in, int n_args, const mp_obj_t *args) {
+mp_obj_t gen_wrap_call(mp_obj_t self_in, uint n_args, uint n_kw, const mp_obj_t *args) {
     mp_obj_gen_wrap_t *self = self_in;
     mp_obj_t self_fun = self->fun;
     assert(MP_OBJ_IS_TYPE(self_fun, &fun_bc_type));
@@ -32,6 +31,9 @@
     if (n_args != bc_n_args) {
         nlr_jump(mp_obj_new_exception_msg_2_args(MP_QSTR_TypeError, "function takes %d positional arguments but %d were given", (const char*)(machine_int_t)bc_n_args, (const char*)(machine_int_t)n_args));
     }
+    if (n_kw != 0) {
+        nlr_jump(mp_obj_new_exception_msg(MP_QSTR_TypeError, "function does not take keyword arguments"));
+    }
 
     return mp_obj_new_gen_instance(bc_code, self->n_state, n_args, args);
 }
@@ -39,7 +41,7 @@
 const mp_obj_type_t gen_wrap_type = {
     { &mp_const_type },
     "generator",
-    .call_n = gen_wrap_call_n,
+    .call = gen_wrap_call,
 };
 
 mp_obj_t mp_obj_new_gen_wrap(uint n_locals, uint n_stack, mp_obj_t fun) {
@@ -58,6 +60,7 @@
     mp_obj_base_t base;
     const byte *ip;
     mp_obj_t *sp;
+    uint n_state;
     mp_obj_t state[];
 } mp_obj_gen_instance_t;
 
@@ -71,7 +74,7 @@
 
 mp_obj_t gen_instance_iternext(mp_obj_t self_in) {
     mp_obj_gen_instance_t *self = self_in;
-    bool yield = mp_execute_byte_code_2(&self->ip, &self->state[0], &self->sp);
+    bool yield = mp_execute_byte_code_2(&self->ip, &self->state[self->n_state - 1], &self->sp);
     if (yield) {
         return *self->sp;
     } else {
@@ -92,16 +95,16 @@
     .iternext = gen_instance_iternext,
 };
 
-// args are in reverse order in the array
 mp_obj_t mp_obj_new_gen_instance(const byte *bytecode, uint n_state, int n_args, const mp_obj_t *args) {
     mp_obj_gen_instance_t *o = m_new_obj_var(mp_obj_gen_instance_t, mp_obj_t, n_state);
     o->base.type = &gen_instance_type;
     o->ip = bytecode;
-    o->sp = o->state + n_state;
+    o->sp = &o->state[0] - 1; // sp points to top of stack, which starts off 1 below the state
+    o->n_state = n_state;
 
-    // copy args (which are in reverse order) to start of state array
+    // copy args to end of state array, in reverse (that's how mp_execute_byte_code_2 needs it)
     for (int i = 0; i < n_args; i++) {
-        o->state[i] = args[n_args - 1 - i];
+        o->state[n_state - 1 - i] = args[i];
     }
 
     // TODO
diff --git a/py/objint.c b/py/objint.c
index 937bff7..477b8aa 100644
--- a/py/objint.c
+++ b/py/objint.c
@@ -12,7 +12,9 @@
 
 // This dispatcher function is expected to be independent of the implementation
 // of long int
-static mp_obj_t int_make_new(mp_obj_t type_in, int n_args, const mp_obj_t *args) {
+static mp_obj_t int_make_new(mp_obj_t type_in, uint n_args, uint n_kw, const mp_obj_t *args) {
+    // TODO check n_kw == 0
+
     switch (n_args) {
         case 0:
             return MP_OBJ_NEW_SMALL_INT(0);
@@ -28,7 +30,7 @@
         case 2:
             // should be a string, parse it
             // TODO proper error checking of argument types
-            return MP_OBJ_NEW_SMALL_INT(strtonum(qstr_str(mp_obj_get_qstr(args[1])), mp_obj_get_int(args[0])));
+            return MP_OBJ_NEW_SMALL_INT(strtonum(qstr_str(mp_obj_get_qstr(args[0])), mp_obj_get_int(args[1])));
 
         default:
             nlr_jump(mp_obj_new_exception_msg_1_arg(MP_QSTR_TypeError, "int takes at most 2 arguments, %d given", (void*)(machine_int_t)n_args));
diff --git a/py/objlist.c b/py/objlist.c
index 829677b..4822c7b 100644
--- a/py/objlist.c
+++ b/py/objlist.c
@@ -38,7 +38,9 @@
     print(env, "]");
 }
 
-static mp_obj_t list_make_new(mp_obj_t type_in, int n_args, const mp_obj_t *args) {
+static mp_obj_t list_make_new(mp_obj_t type_in, uint n_args, uint n_kw, const mp_obj_t *args) {
+    // TODO check n_kw == 0
+
     switch (n_args) {
         case 0:
             // return a new, empty list
@@ -248,19 +250,14 @@
     }
 }
 
-mp_obj_t mp_obj_list_sort(mp_obj_t args, mp_map_t *kwargs) {
-    mp_obj_t *args_items = NULL;
-    uint args_len = 0;
-
-    assert(MP_OBJ_IS_TYPE(args, &tuple_type));
-    mp_obj_tuple_get(args, &args_len, &args_items);
-    assert(args_len >= 1);
-    assert(MP_OBJ_IS_TYPE(args_items[0], &list_type));
-    if (args_len > 1) {
+mp_obj_t mp_obj_list_sort(uint n_args, const mp_obj_t *args, mp_map_t *kwargs) {
+    assert(n_args >= 1);
+    assert(MP_OBJ_IS_TYPE(args[0], &list_type));
+    if (n_args > 1) {
         nlr_jump(mp_obj_new_exception_msg(MP_QSTR_TypeError,
                                           "list.sort takes no positional arguments"));
     }
-    mp_obj_list_t *self = args_items[0];
+    mp_obj_list_t *self = args[0];
     if (self->len > 1) {
         mp_map_elem_t *keyfun = mp_map_lookup(kwargs, MP_OBJ_NEW_QSTR(qstr_from_str_static("key")), MP_MAP_LOOKUP);
         mp_map_elem_t *reverse = mp_map_lookup(kwargs, MP_OBJ_NEW_QSTR(qstr_from_str_static("reverse")), MP_MAP_LOOKUP);
@@ -425,14 +422,6 @@
     return o;
 }
 
-mp_obj_t mp_obj_new_list_reverse(uint n, mp_obj_t *items) {
-    mp_obj_list_t *o = list_new(n);
-    for (int i = 0; i < n; i++) {
-        o->items[i] = items[n - i - 1];
-    }
-    return o;
-}
-
 void mp_obj_list_get(mp_obj_t self_in, uint *len, mp_obj_t **items) {
     mp_obj_list_t *self = self_in;
     *len = self->len;
diff --git a/py/objmap.c b/py/objmap.c
index 3657352..0c25cfd 100644
--- a/py/objmap.c
+++ b/py/objmap.c
@@ -15,18 +15,17 @@
     mp_obj_t iters[];
 } mp_obj_map_t;
 
-static mp_obj_t map_make_new(mp_obj_t type_in, int n_args, const mp_obj_t *args) {
-    /* NOTE: args are backwards */
-    if (n_args < 2) {
-        nlr_jump(mp_obj_new_exception_msg(MP_QSTR_TypeError, "map must have at least 2 arguments"));
+static mp_obj_t map_make_new(mp_obj_t type_in, uint n_args, uint n_kw, const mp_obj_t *args) {
+    if (n_args < 2 || n_kw != 0) {
+        nlr_jump(mp_obj_new_exception_msg(MP_QSTR_TypeError, "map must have at least 2 arguments and no keyword arguments"));
     }
     assert(n_args >= 2);
     mp_obj_map_t *o = m_new_obj_var(mp_obj_map_t, mp_obj_t, n_args - 1);
     o->base.type = &map_type;
     o->n_iters = n_args - 1;
-    o->fun = args[n_args - 1];
+    o->fun = args[0];
     for (int i = 0; i < n_args - 1; i++) {
-        o->iters[i] = rt_getiter(args[n_args-i-2]);
+        o->iters[i] = rt_getiter(args[i + 1]);
     }
     return o;
 }
@@ -48,7 +47,7 @@
         }
         nextses[i] = next;
     }
-    return rt_call_function_n(self->fun, self->n_iters, nextses);
+    return rt_call_function_n_kw(self->fun, self->n_iters, 0, nextses);
 }
 
 const mp_obj_type_t map_type = {
diff --git a/py/objmodule.c b/py/objmodule.c
index 50d2bb3..e97e731 100644
--- a/py/objmodule.c
+++ b/py/objmodule.c
@@ -26,7 +26,7 @@
     mp_obj_module_t *self = self_in;
     mp_map_elem_t *elem = mp_map_lookup(self->globals, MP_OBJ_NEW_QSTR(attr), MP_MAP_LOOKUP);
     if (elem != NULL) {
-        dest[1] = elem->value;
+        dest[0] = elem->value;
     }
 }
 
diff --git a/py/objset.c b/py/objset.c
index 6ea6d62..f44c5ba 100644
--- a/py/objset.c
+++ b/py/objset.c
@@ -46,7 +46,9 @@
 }
 
 
-static mp_obj_t set_make_new(mp_obj_t type_in, int n_args, const mp_obj_t *args) {
+static mp_obj_t set_make_new(mp_obj_t type_in, uint n_args, uint n_kw, const mp_obj_t *args) {
+    // TODO check n_kw == 0
+
     switch (n_args) {
         case 0:
             // return a new, empty set
@@ -239,7 +241,7 @@
     if (MP_OBJ_IS_TYPE(self_in, &set_type)) {
         self = self_in;
     } else {
-        self = set_make_new(NULL, 1, &self_in);
+        self = set_make_new((mp_obj_t)&set_type, 1, 0, &self_in);
         cleanup_self = true;
     }
 
@@ -248,7 +250,7 @@
     if (MP_OBJ_IS_TYPE(other_in, &set_type)) {
         other = other_in;
     } else {
-        other = set_make_new(NULL, 1, &other_in);
+        other = set_make_new((mp_obj_t)&set_type, 1, 0, &other_in);
         cleanup_other = true;
     }
     bool out = true;
diff --git a/py/objtuple.c b/py/objtuple.c
index a64b1fa..fd6d415 100644
--- a/py/objtuple.c
+++ b/py/objtuple.c
@@ -31,8 +31,9 @@
     print(env, ")");
 }
 
-// args are in reverse order in the array
-static mp_obj_t tuple_make_new(mp_obj_t type_in, int n_args, const mp_obj_t *args) {
+static mp_obj_t tuple_make_new(mp_obj_t type_in, uint n_args, uint n_kw, const mp_obj_t *args) {
+    // TODO check n_kw == 0
+
     switch (n_args) {
         case 0:
             // return a empty tuple
@@ -119,19 +120,6 @@
     return o;
 }
 
-mp_obj_t mp_obj_new_tuple_reverse(uint n, const mp_obj_t *items) {
-    if (n == 0) {
-        return mp_const_empty_tuple;
-    }
-    mp_obj_tuple_t *o = m_new_obj_var(mp_obj_tuple_t, mp_obj_t, n);
-    o->base.type = &tuple_type;
-    o->len = n;
-    for (int i = 0; i < n; i++) {
-        o->items[i] = items[n - i - 1];
-    }
-    return o;
-}
-
 void mp_obj_tuple_get(mp_obj_t self_in, uint *len, mp_obj_t **items) {
     assert(MP_OBJ_IS_TYPE(self_in, &tuple_type));
     mp_obj_tuple_t *self = self_in;
diff --git a/py/objtype.c b/py/objtype.c
index 5a2f96d..2dcca6b 100644
--- a/py/objtype.c
+++ b/py/objtype.c
@@ -70,8 +70,7 @@
     print(env, "<%s object at %p>", mp_obj_get_type_str(self_in), self_in);
 }
 
-// args are reverse in the array
-static mp_obj_t class_make_new(mp_obj_t self_in, int n_args, const mp_obj_t *args) {
+static mp_obj_t class_make_new(mp_obj_t self_in, uint n_args, uint n_kw, const mp_obj_t *args) {
     assert(MP_OBJ_IS_TYPE(self_in, &mp_const_type));
     mp_obj_type_t *self = self_in;
 
@@ -83,14 +82,14 @@
     if (init_fn != NULL) {
         // call __init__ function
         mp_obj_t init_ret;
-        if (n_args == 0) {
-            init_ret = rt_call_function_n(init_fn->value, 1, (mp_obj_t*)&o);
+        if (n_args == 0 && n_kw == 0) {
+            init_ret = rt_call_function_n_kw(init_fn->value, 1, 0, (mp_obj_t*)&o);
         } else {
-            mp_obj_t *args2 = m_new(mp_obj_t, n_args + 1);
-            memcpy(args2, args, n_args * sizeof(mp_obj_t));
-            args2[n_args] = o;
-            init_ret = rt_call_function_n(init_fn->value, n_args + 1, args2);
-            m_del(mp_obj_t, args2, n_args + 1);
+            mp_obj_t *args2 = m_new(mp_obj_t, 1 + n_args + 2 * n_kw);
+            args2[0] = o;
+            memcpy(args2 + 1, args, (n_args + 2 * n_kw) * sizeof(mp_obj_t));
+            init_ret = rt_call_function_n_kw(init_fn->value, n_args + 1, n_kw, args2);
+            m_del(mp_obj_t, args2, 1 + n_args + 2 * n_kw);
         }
         if (init_ret != mp_const_none) {
             nlr_jump(mp_obj_new_exception_msg_1_arg(MP_QSTR_TypeError, "__init__() should return None, not '%s'", mp_obj_get_type_str(init_ret)));
@@ -112,19 +111,19 @@
     mp_map_elem_t *elem = mp_map_lookup(&self->members, MP_OBJ_NEW_QSTR(attr), MP_MAP_LOOKUP);
     if (elem != NULL) {
         // object member, always treated as a value
-        dest[1] = elem->value;
+        dest[0] = elem->value;
         return;
     }
     elem = mp_obj_class_lookup(self->base.type, attr, MP_MAP_LOOKUP);
     if (elem != NULL) {
         if (mp_obj_is_callable(elem->value)) {
             // class member is callable so build a bound method
-            dest[1] = elem->value;
-            dest[0] = self_in;
+            dest[0] = elem->value;
+            dest[1] = self_in;
             return;
         } else {
             // class member is a value, so just return that value
-            dest[1] = elem->value;
+            dest[0] = elem->value;
             return;
         }
     }
@@ -153,25 +152,25 @@
     print(env, "<class '%s'>", self->name);
 }
 
-// args are reverse in the array
-static mp_obj_t type_make_new(mp_obj_t type_in, int n_args, const mp_obj_t *args) {
+static mp_obj_t type_make_new(mp_obj_t type_in, uint n_args, uint n_kw, const mp_obj_t *args) {
+    // TODO check n_kw == 0
+
     switch (n_args) {
         case 1:
             return mp_obj_get_type(args[0]);
 
         case 3:
-            // args[2] = name
+            // args[0] = name
             // args[1] = bases tuple
-            // args[0] = locals dict
-            return mp_obj_new_type(mp_obj_get_qstr(args[2]), args[1], args[0]);
+            // args[2] = locals dict
+            return mp_obj_new_type(mp_obj_get_qstr(args[0]), args[1], args[2]);
 
         default:
             nlr_jump(mp_obj_new_exception_msg(MP_QSTR_TypeError, "type takes at 1 or 3 arguments"));
     }
 }
 
-// args are in reverse order in the array
-static mp_obj_t type_call_n(mp_obj_t self_in, int n_args, const mp_obj_t *args) {
+static mp_obj_t type_call(mp_obj_t self_in, uint n_args, uint n_kw, const mp_obj_t *args) {
     // instantiate an instance of a class
 
     mp_obj_type_t *self = self_in;
@@ -181,19 +180,19 @@
     }
 
     // make new instance
-    mp_obj_t o = self->make_new(self, n_args, args);
+    mp_obj_t o = self->make_new(self, n_args, n_kw, args);
 
     // return new instance
     return o;
 }
 
-// for fail, do nothing; for attr, dest[1] = value; for method, dest[0] = self, dest[1] = method
+// for fail, do nothing; for attr, dest[0] = value; for method, dest[0] = method, dest[1] = self
 static void type_load_attr(mp_obj_t self_in, qstr attr, mp_obj_t *dest) {
     assert(MP_OBJ_IS_TYPE(self_in, &mp_const_type));
     mp_obj_type_t *self = self_in;
     mp_map_elem_t *elem = mp_obj_class_lookup(self, attr, MP_MAP_LOOKUP);
     if (elem != NULL) {
-        dest[1] = elem->value;
+        dest[0] = elem->value;
         return;
     }
 
@@ -207,15 +206,15 @@
                 // see http://docs.python.org/3.3/howto/descriptor.html
                 if (MP_OBJ_IS_TYPE(meth->fun, &mp_type_staticmethod)) {
                     // return just the function
-                    dest[1] = ((mp_obj_staticmethod_t*)meth->fun)->fun;
+                    dest[0] = ((mp_obj_staticmethod_t*)meth->fun)->fun;
                 } else if (MP_OBJ_IS_TYPE(meth->fun, &mp_type_classmethod)) {
                     // return a bound method, with self being this class
-                    dest[1] = ((mp_obj_classmethod_t*)meth->fun)->fun;
-                    dest[0] = self_in;
+                    dest[0] = ((mp_obj_classmethod_t*)meth->fun)->fun;
+                    dest[1] = self_in;
                 } else {
                     // return just the function
                     // TODO need to wrap in a type check for the first argument; eg list.append(1,1) needs to throw an exception
-                    dest[1] = (mp_obj_t)meth->fun;
+                    dest[0] = (mp_obj_t)meth->fun;
                 }
                 return;
             }
@@ -243,7 +242,7 @@
     "type",
     .print = type_print,
     .make_new = type_make_new,
-    .call_n = type_call_n,
+    .call = type_call,
     .load_attr = type_load_attr,
     .store_attr = type_store_attr,
 };
diff --git a/py/objzip.c b/py/objzip.c
index a552ff5..72db06a 100644
--- a/py/objzip.c
+++ b/py/objzip.c
@@ -12,13 +12,14 @@
     mp_obj_t iters[];
 } mp_obj_zip_t;
 
-static mp_obj_t zip_make_new(mp_obj_t type_in, int n_args, const mp_obj_t *args) {
-    /* NOTE: args are backwards */
+static mp_obj_t zip_make_new(mp_obj_t type_in, uint n_args, uint n_kw, const mp_obj_t *args) {
+    // TODO check n_kw == 0
+
     mp_obj_zip_t *o = m_new_obj_var(mp_obj_zip_t, mp_obj_t, n_args);
     o->base.type = &zip_type;
     o->n_iters = n_args;
     for (int i = 0; i < n_args; i++) {
-        o->iters[i] = rt_getiter(args[n_args-i-1]);
+        o->iters[i] = rt_getiter(args[i]);
     }
     return o;
 }
diff --git a/py/runtime.c b/py/runtime.c
index d6ef6f9..aa1b83a 100644
--- a/py/runtime.c
+++ b/py/runtime.c
@@ -694,79 +694,53 @@
 }
 
 mp_obj_t rt_call_function_0(mp_obj_t fun) {
-    return rt_call_function_n(fun, 0, NULL);
+    return rt_call_function_n_kw(fun, 0, 0, NULL);
 }
 
 mp_obj_t rt_call_function_1(mp_obj_t fun, mp_obj_t arg) {
-    return rt_call_function_n(fun, 1, &arg);
+    return rt_call_function_n_kw(fun, 1, 0, &arg);
 }
 
 mp_obj_t rt_call_function_2(mp_obj_t fun, mp_obj_t arg1, mp_obj_t arg2) {
     mp_obj_t args[2];
-    args[1] = arg1;
-    args[0] = arg2;
-    return rt_call_function_n(fun, 2, args);
+    args[0] = arg1;
+    args[1] = arg2;
+    return rt_call_function_n_kw(fun, 2, 0, args);
 }
 
-// args are in reverse order in the array
-mp_obj_t rt_call_function_n(mp_obj_t fun_in, int n_args, const mp_obj_t *args) {
+// args contains, eg: arg0  arg1  key0  value0  key1  value1
+mp_obj_t rt_call_function_n_kw(mp_obj_t fun_in, uint n_args, uint n_kw, const mp_obj_t *args) {
     // TODO improve this: fun object can specify its type and we parse here the arguments,
     // passing to the function arrays of fixed and keyword arguments
 
-    DEBUG_OP_printf("calling function %p(n_args=%d, args=%p)\n", fun_in, n_args, args);
-
-    if (MP_OBJ_IS_SMALL_INT(fun_in)) {
-        nlr_jump(mp_obj_new_exception_msg(MP_QSTR_TypeError, "'int' object is not callable"));
-    } else {
-        mp_obj_base_t *fun = fun_in;
-        if (fun->type->call_n != NULL) {
-            return fun->type->call_n(fun_in, n_args, args);
-        } else {
-            nlr_jump(mp_obj_new_exception_msg_varg(MP_QSTR_TypeError, "'%s' object is not callable", fun->type->name));
-        }
-    }
-}
-
-// args are in reverse order in the array; keyword arguments come first, value then key
-// eg: (value1, key1, value0, key0, arg1, arg0)
-mp_obj_t rt_call_function_n_kw(mp_obj_t fun_in, uint n_args, uint n_kw, const mp_obj_t *args) {
-    // TODO merge this and _n into a single, smarter thing
     DEBUG_OP_printf("calling function %p(n_args=%d, n_kw=%d, args=%p)\n", fun_in, n_args, n_kw, args);
 
     if (MP_OBJ_IS_SMALL_INT(fun_in)) {
         nlr_jump(mp_obj_new_exception_msg(MP_QSTR_TypeError, "'int' object is not callable"));
     } else {
         mp_obj_base_t *fun = fun_in;
-        if (fun->type->call_n_kw != NULL) {
-            return fun->type->call_n_kw(fun_in, n_args, n_kw, args);
+        if (fun->type->call != NULL) {
+            return fun->type->call(fun_in, n_args, n_kw, args);
         } else {
             nlr_jump(mp_obj_new_exception_msg_varg(MP_QSTR_TypeError, "'%s' object is not callable", fun->type->name));
         }
     }
 }
 
-// args contains: arg(n_args-1)  arg(n_args-2)  ...  arg(0)  self/NULL  fun
-// if n_args==0 then there are only self/NULL and fun
-mp_obj_t rt_call_method_n(uint n_args, const mp_obj_t *args) {
-    DEBUG_OP_printf("call method %p(self=%p, n_args=%u)\n", args[n_args + 1], args[n_args], n_args);
-    return rt_call_function_n(args[n_args + 1], n_args + ((args[n_args] == NULL) ? 0 : 1), args);
-}
-
-// args contains: kw_val(n_kw-1)  kw_key(n_kw-1) ... kw_val(0)  kw_key(0)  arg(n_args-1)  arg(n_args-2)  ...  arg(0)  self/NULL  fun
+// args contains: fun  self/NULL  arg(0)  ...  arg(n_args-2)  arg(n_args-1)  kw_key(0)  kw_val(0)  ... kw_key(n_kw-1)  kw_val(n_kw-1)
+// if n_args==0 and n_kw==0 then there are only fun and self/NULL
 mp_obj_t rt_call_method_n_kw(uint n_args, uint n_kw, const mp_obj_t *args) {
-    uint n = n_args + 2 * n_kw;
-    DEBUG_OP_printf("call method %p(self=%p, n_args=%u, n_kw=%u)\n", args[n + 1], args[n], n_args, n_kw);
-    return rt_call_function_n_kw(args[n + 1], n_args + ((args[n] == NULL) ? 0 : 1), n_kw, args);
+    DEBUG_OP_printf("call method (fun=%p, self=%p, n_args=%u, n_kw=%u, args=%p)\n", args[0], args[1], n_args, n_kw, args);
+    int adjust = (args[1] == NULL) ? 0 : 1;
+    return rt_call_function_n_kw(args[0], n_args + adjust, n_kw, args + 2 - adjust);
 }
 
-// items are in reverse order
 mp_obj_t rt_build_tuple(int n_args, mp_obj_t *items) {
-    return mp_obj_new_tuple_reverse(n_args, items);
+    return mp_obj_new_tuple(n_args, items);
 }
 
-// items are in reverse order
 mp_obj_t rt_build_list(int n_args, mp_obj_t *items) {
-    return mp_obj_new_list_reverse(n_args, items);
+    return mp_obj_new_list(n_args, items);
 }
 
 mp_obj_t rt_build_set(int n_args, mp_obj_t *items) {
@@ -814,9 +788,9 @@
     // use load_method
     mp_obj_t dest[2];
     rt_load_method(base, attr, dest);
-    if (dest[0] == NULL) {
+    if (dest[1] == NULL) {
         // load_method returned just a normal attribute
-        return dest[1];
+        return dest[0];
     } else {
         // load_method returned a method, so build a bound method object
         return mp_obj_new_bound_meth(dest[0], dest[1]);
@@ -839,10 +813,10 @@
     }
 
     // if nothing found yet, look for built-in and generic names
-    if (dest[1] == NULL) {
+    if (dest[0] == NULL) {
         if (attr == MP_QSTR___next__ && type->iternext != NULL) {
-            dest[1] = (mp_obj_t)&mp_builtin_next_obj;
-            dest[0] = base;
+            dest[0] = (mp_obj_t)&mp_builtin_next_obj;
+            dest[1] = base;
         } else {
             // generic method lookup
             // this is a lookup in the object (ie not class or type)
@@ -854,15 +828,15 @@
                         // see http://docs.python.org/3.3/howto/descriptor.html
                         if (MP_OBJ_IS_TYPE(meth->fun, &mp_type_staticmethod)) {
                             // return just the function
-                            dest[1] = ((mp_obj_staticmethod_t*)meth->fun)->fun;
+                            dest[0] = ((mp_obj_staticmethod_t*)meth->fun)->fun;
                         } else if (MP_OBJ_IS_TYPE(meth->fun, &mp_type_classmethod)) {
                             // return a bound method, with self being the type of this object
-                            dest[1] = ((mp_obj_classmethod_t*)meth->fun)->fun;
-                            dest[0] = mp_obj_get_type(base);
+                            dest[0] = ((mp_obj_classmethod_t*)meth->fun)->fun;
+                            dest[1] = mp_obj_get_type(base);
                         } else {
                             // return a bound method, with self being this object
-                            dest[1] = (mp_obj_t)meth->fun;
-                            dest[0] = base;
+                            dest[0] = (mp_obj_t)meth->fun;
+                            dest[1] = base;
                         }
                         break;
                     }
@@ -871,7 +845,7 @@
         }
     }
 
-    if (dest[1] == NULL) {
+    if (dest[0] == NULL) {
         // no attribute/method called attr
         // following CPython, we give a more detailed error message for type objects
         if (MP_OBJ_IS_TYPE(base, &mp_const_type)) {
@@ -995,8 +969,8 @@
     rt_build_set,
     rt_store_set,
     rt_make_function_from_id,
-    rt_call_function_n,
-    rt_call_method_n,
+    rt_call_function_n_kw,
+    rt_call_method_n_kw,
     rt_binary_op,
     rt_getiter,
     rt_iternext,
diff --git a/py/runtime.h b/py/runtime.h
index 32cb476..770cac8 100644
--- a/py/runtime.h
+++ b/py/runtime.h
@@ -19,9 +19,7 @@
 mp_obj_t rt_call_function_0(mp_obj_t fun);
 mp_obj_t rt_call_function_1(mp_obj_t fun, mp_obj_t arg);
 mp_obj_t rt_call_function_2(mp_obj_t fun, mp_obj_t arg1, mp_obj_t arg2);
-mp_obj_t rt_call_function_n(mp_obj_t fun, int n_args, const mp_obj_t *args);
 mp_obj_t rt_call_function_n_kw(mp_obj_t fun, uint n_args, uint n_kw, const mp_obj_t *args);
-mp_obj_t rt_call_method_n(uint n_args, const mp_obj_t *args);
 mp_obj_t rt_call_method_n_kw(uint n_args, uint n_kw, const mp_obj_t *args);
 mp_obj_t rt_build_tuple(int n_args, mp_obj_t *items);
 mp_obj_t rt_build_list(int n_args, mp_obj_t *items);
diff --git a/py/showbc.c b/py/showbc.c
index 2ba81df..36393ce 100644
--- a/py/showbc.c
+++ b/py/showbc.c
@@ -286,12 +286,11 @@
                 printf("BUILD_MAP " UINT_FMT, unum);
                 break;
 
-                /*
             case MP_BC_STORE_MAP:
-                sp += 2;
-                rt_store_map(sp[0], sp[-2], sp[-1]);
+                printf("STORE_MAP");
                 break;
 
+                /*
             case MP_BC_MAP_ADD:
                 DECODE_UINT;
                 // I think it's guaranteed by the compiler that sp[unum + 1] is a map
@@ -310,6 +309,13 @@
                 printf("SET_ADD " UINT_FMT, unum);
                 break;
 
+#if MICROPY_ENABLE_SLICE
+            case MP_BC_BUILD_SLICE:
+                DECODE_UINT;
+                printf("BUILD_SLICE " UINT_FMT, unum);
+                break;
+#endif
+
             case MP_BC_UNPACK_SEQUENCE:
                 DECODE_UINT;
                 printf("UNPACK_SEQUENCE " UINT_FMT, unum);
diff --git a/py/vm.c b/py/vm.c
index 7cfa938..f352b12 100644
--- a/py/vm.c
+++ b/py/vm.c
@@ -19,24 +19,24 @@
 #define DECODE_ULABEL do { unum = (ip[0] | (ip[1] << 8)); ip += 2; } while (0)
 #define DECODE_SLABEL do { unum = (ip[0] | (ip[1] << 8)) - 0x8000; ip += 2; } while (0)
 #define DECODE_QSTR do { qstr = *ip++; if (qstr > 127) { qstr = ((qstr & 0x3f) << 8) | (*ip++); } } while (0)
-#define PUSH(val) *--sp = (val)
-#define POP() (*sp++)
+#define PUSH(val) *++sp = (val)
+#define POP() (*sp--)
 #define TOP() (*sp)
 #define SET_TOP(val) *sp = (val)
 
-// args are in reverse order in array
 mp_obj_t mp_execute_byte_code(const byte *code, const mp_obj_t *args, uint n_args, uint n_state) {
-    mp_obj_t temp_state[10]; // TODO allocate properly
+    // allocate state for locals and stack
+    mp_obj_t temp_state[10];
     mp_obj_t *state = &temp_state[0];
-    mp_obj_t *sp = &state[10];
     if (n_state > 10) {
         state = m_new(mp_obj_t, n_state);
-        sp = &state[n_state];
     }
+    mp_obj_t *sp = &state[0] - 1;
+
     // init args
     for (int i = 0; i < n_args; i++) {
         assert(i < 8);
-        state[i] = args[n_args - 1 - i];
+        state[n_state - 1 - i] = args[i];
     }
     const byte *ip = code;
 
@@ -45,15 +45,15 @@
         for (uint n_local = *ip++; n_local > 0; n_local--) {
             uint local_num = *ip++;
             if (local_num < n_args) {
-                state[local_num] = mp_obj_new_cell(state[local_num]);
+                state[n_state - 1 - local_num] = mp_obj_new_cell(state[n_state - 1 - local_num]);
             } else {
-                state[local_num] = mp_obj_new_cell(MP_OBJ_NULL);
+                state[n_state - 1 - local_num] = mp_obj_new_cell(MP_OBJ_NULL);
             }
         }
     }
 
     // execute the byte code
-    if (mp_execute_byte_code_2(&ip, &state[0], &sp)) {
+    if (mp_execute_byte_code_2(&ip, &state[n_state - 1], &sp)) {
         // it shouldn't yield
         assert(0);
     }
@@ -63,8 +63,8 @@
     return *sp;
 }
 
-// fastn has items in normal order
-// sp points to top of stack which grows down
+// fastn has items in reverse order (fastn[0] is local[0], fastn[-1] is local[1], etc)
+// sp points to bottom of stack which grows up
 bool mp_execute_byte_code_2(const byte **ip_in_out, mp_obj_t *fastn, mp_obj_t **sp_in_out) {
     // careful: be sure to declare volatile any variables read in the exception handler (written is ok, I think)
 
@@ -73,7 +73,7 @@
     machine_uint_t unum;
     qstr qstr;
     mp_obj_t obj1, obj2;
-    mp_obj_t fast0 = fastn[0], fast1 = fastn[1], fast2 = fastn[2];
+    mp_obj_t fast0 = fastn[0], fast1 = fastn[-1], fast2 = fastn[-2];
     nlr_buf_t nlr;
 
     volatile machine_uint_t currently_in_except_block = 0; // 0 or 1, to detect nested exceptions
@@ -148,12 +148,21 @@
 
                     case MP_BC_LOAD_FAST_N:
                         DECODE_UINT;
-                        PUSH(fastn[unum]);
+                        PUSH(fastn[-unum]);
                         break;
 
                     case MP_BC_LOAD_DEREF:
                         DECODE_UINT;
-                        PUSH(rt_get_cell(fastn[unum]));
+                        if (unum == 0) {
+                            obj1 = fast0;
+                        } else if (unum == 1) {
+                            obj1 = fast1;
+                        } else if (unum == 2) {
+                            obj1 = fast2;
+                        } else {
+                            obj1 = fastn[-unum];
+                        }
+                        PUSH(rt_get_cell(obj1));
                         break;
 
                     case MP_BC_LOAD_NAME:
@@ -173,8 +182,8 @@
 
                     case MP_BC_LOAD_METHOD:
                         DECODE_QSTR;
-                        sp -= 1;
-                        rt_load_method(sp[1], qstr, sp);
+                        rt_load_method(*sp, qstr, sp);
+                        sp += 1;
                         break;
 
                     case MP_BC_LOAD_BUILD_CLASS:
@@ -195,12 +204,21 @@
 
                     case MP_BC_STORE_FAST_N:
                         DECODE_UINT;
-                        fastn[unum] = POP();
+                        fastn[-unum] = POP();
                         break;
 
                     case MP_BC_STORE_DEREF:
                         DECODE_UINT;
-                        rt_set_cell(fastn[unum], POP());
+                        if (unum == 0) {
+                            obj1 = fast0;
+                        } else if (unum == 1) {
+                            obj1 = fast1;
+                        } else if (unum == 2) {
+                            obj1 = fast2;
+                        } else {
+                            obj1 = fastn[-unum];
+                        }
+                        rt_set_cell(obj1, POP());
                         break;
 
                     case MP_BC_STORE_NAME:
@@ -215,13 +233,13 @@
 
                     case MP_BC_STORE_ATTR:
                         DECODE_QSTR;
-                        rt_store_attr(sp[0], qstr, sp[1]);
-                        sp += 2;
+                        rt_store_attr(sp[0], qstr, sp[-1]);
+                        sp -= 2;
                         break;
 
                     case MP_BC_STORE_SUBSCR:
-                        rt_store_subscr(sp[1], sp[0], sp[2]);
-                        sp += 3;
+                        rt_store_subscr(sp[-1], sp[0], sp[-2]);
+                        sp -= 3;
                         break;
 
                     case MP_BC_DUP_TOP:
@@ -230,26 +248,26 @@
                         break;
 
                     case MP_BC_DUP_TOP_TWO:
-                        sp -= 2;
-                        sp[0] = sp[2];
-                        sp[1] = sp[3];
+                        sp += 2;
+                        sp[0] = sp[-2];
+                        sp[-1] = sp[-3];
                         break;
 
                     case MP_BC_POP_TOP:
-                        ++sp;
+                        sp -= 1;
                         break;
 
                     case MP_BC_ROT_TWO:
                         obj1 = sp[0];
-                        sp[0] = sp[1];
-                        sp[1] = obj1;
+                        sp[0] = sp[-1];
+                        sp[-1] = obj1;
                         break;
 
                     case MP_BC_ROT_THREE:
                         obj1 = sp[0];
-                        sp[0] = sp[1];
-                        sp[1] = sp[2];
-                        sp[2] = obj1;
+                        sp[0] = sp[-1];
+                        sp[-1] = sp[-2];
+                        sp[-2] = obj1;
                         break;
 
                     case MP_BC_JUMP:
@@ -276,14 +294,14 @@
                         if (rt_is_true(TOP())) {
                             ip += unum;
                         } else {
-                            sp++;
+                            sp--;
                         }
                         break;
 
                     case MP_BC_JUMP_IF_FALSE_OR_POP:
                         DECODE_SLABEL;
                         if (rt_is_true(TOP())) {
-                            sp++;
+                            sp--;
                         } else {
                             ip += unum;
                         }
@@ -321,7 +339,7 @@
                         DECODE_ULABEL; // the jump offset if iteration finishes; for labels are always forward
                         obj1 = rt_iternext(TOP());
                         if (obj1 == mp_const_stop_iteration) {
-                            ++sp; // pop the exhausted iterator
+                            --sp; // pop the exhausted iterator
                             ip += unum; // jump to after for-block
                         } else {
                             PUSH(obj1); // push the next iteration value
@@ -345,7 +363,7 @@
                         //exc_sp--; // discard ip
                         currently_in_except_block = (exc_sp[0] & 1); // restore previous state
                         exc_sp -= 2; // pop back to previous exception handler
-                        //sp += 3; // pop 3 exception values
+                        //sp -= 3; // pop 3 exception values
                         break;
 
                     case MP_BC_UNARY_OP:
@@ -362,23 +380,21 @@
 
                     case MP_BC_BUILD_TUPLE:
                         DECODE_UINT;
-                        obj1 = rt_build_tuple(unum, sp);
-                        sp += unum - 1;
-                        SET_TOP(obj1);
+                        sp -= unum - 1;
+                        SET_TOP(rt_build_tuple(unum, sp));
                         break;
 
                     case MP_BC_BUILD_LIST:
                         DECODE_UINT;
-                        obj1 = rt_build_list(unum, sp);
-                        sp += unum - 1;
-                        SET_TOP(obj1);
+                        sp -= unum - 1;
+                        SET_TOP(rt_build_list(unum, sp));
                         break;
 
                     case MP_BC_LIST_APPEND:
                         DECODE_UINT;
                         // I think it's guaranteed by the compiler that sp[unum] is a list
-                        rt_list_append(sp[unum], sp[0]);
-                        sp++;
+                        rt_list_append(sp[-unum], sp[0]);
+                        sp--;
                         break;
 
                     case MP_BC_BUILD_MAP:
@@ -387,29 +403,28 @@
                         break;
 
                     case MP_BC_STORE_MAP:
-                        sp += 2;
-                        rt_store_map(sp[0], sp[-2], sp[-1]);
+                        sp -= 2;
+                        rt_store_map(sp[0], sp[2], sp[1]);
                         break;
 
                     case MP_BC_MAP_ADD:
                         DECODE_UINT;
-                        // I think it's guaranteed by the compiler that sp[unum + 1] is a map
-                        rt_store_map(sp[unum + 1], sp[0], sp[1]);
-                        sp += 2;
+                        // I think it's guaranteed by the compiler that sp[-unum - 1] is a map
+                        rt_store_map(sp[-unum - 1], sp[0], sp[-1]);
+                        sp -= 2;
                         break;
 
                     case MP_BC_BUILD_SET:
                         DECODE_UINT;
-                        obj1 = rt_build_set(unum, sp);
-                        sp += unum - 1;
-                        SET_TOP(obj1);
+                        sp -= unum - 1;
+                        SET_TOP(rt_build_set(unum, sp));
                         break;
 
                     case MP_BC_SET_ADD:
                         DECODE_UINT;
-                        // I think it's guaranteed by the compiler that sp[unum] is a set
-                        rt_store_set(sp[unum], sp[0]);
-                        sp++;
+                        // I think it's guaranteed by the compiler that sp[-unum] is a set
+                        rt_store_set(sp[-unum], sp[0]);
+                        sp--;
                         break;
 
 #if MICROPY_ENABLE_SLICE
@@ -428,8 +443,8 @@
 
                     case MP_BC_UNPACK_SEQUENCE:
                         DECODE_UINT;
-                        rt_unpack_sequence(sp[0], unum, sp - unum + 1);
-                        sp -= unum - 1;
+                        rt_unpack_sequence(sp[0], unum, sp + unum - 1);
+                        sp += unum - 1;
                         break;
 
                     case MP_BC_MAKE_FUNCTION:
@@ -445,32 +460,18 @@
 
                     case MP_BC_CALL_FUNCTION:
                         DECODE_UINT;
-                        if ((unum & 0xff00) == 0) {
-                            // no keywords
-                            unum &= 0xff; // n_positional
-                            sp += unum;
-                            *sp = rt_call_function_n(*sp, unum, sp - unum);
-                        } else {
-                            // keywords
-                            int argsize = (unum & 0xff) + ((unum >> 7) & 0x1fe);
-                            sp += argsize;
-                            *sp = rt_call_function_n_kw(*sp, unum & 0xff, (unum >> 8) & 0xff, sp - argsize);
-                        }
+                        // unum & 0xff == n_positional
+                        // (unum >> 8) & 0xff == n_keyword
+                        sp -= (unum & 0xff) + ((unum >> 7) & 0x1fe);
+                        SET_TOP(rt_call_function_n_kw(*sp, unum & 0xff, (unum >> 8) & 0xff, sp + 1));
                         break;
 
                     case MP_BC_CALL_METHOD:
                         DECODE_UINT;
-                        if ((unum & 0xff00) == 0) {
-                            // no keywords
-                            unum &= 0xff;
-                            obj1 = rt_call_method_n(unum, sp);
-                            sp += unum + 1;
-                        } else {
-                            // keywords
-                            obj1 = rt_call_method_n_kw(unum & 0xff, (unum >> 8) & 0xff, sp);
-                            sp += (unum & 0xff) + ((unum >> 7) & 0x1fe) + 1;
-                        }
-                        SET_TOP(obj1);
+                        // unum & 0xff == n_positional
+                        // (unum >> 8) & 0xff == n_keyword
+                        sp -= (unum & 0xff) + ((unum >> 7) & 0x1fe) + 1;
+                        SET_TOP(rt_call_method_n_kw(unum & 0xff, (unum >> 8) & 0xff, sp));
                         break;
 
                     case MP_BC_RETURN_VALUE:
@@ -489,8 +490,8 @@
                         nlr_pop();
                         *ip_in_out = ip;
                         fastn[0] = fast0;
-                        fastn[1] = fast1;
-                        fastn[2] = fast2;
+                        fastn[-1] = fast1;
+                        fastn[-2] = fast2;
                         *sp_in_out = sp;
                         return true;