Implement mp_parse_node_free; print properly repr(string).

commit: b829b5caecd1ba4fbc30e03978776d2c868dd67b [log] [tgz]
author: Damien George <damien.p.george@gmail.com> Sat Jan 25 13:51:19 2014 +0000
committer: Damien George <damien.p.george@gmail.com> Sat Jan 25 13:51:19 2014 +0000
tree: b3135377105920ff382d7c7ebc2117655b4d00da
parent: e0722ee9d94c0e812a474e349ded5147f6b869f6 [diff]
diff --git a/py/builtineval.c b/py/builtineval.c
index 0e8f9e3..49d2bf1 100644
--- a/py/builtineval.c
+++ b/py/builtineval.c

@@ -24,13 +24,13 @@
     const byte *str = mp_obj_str_get_data(o_in, &str_len);
 
     // create the lexer
-    mp_lexer_t *lex = mp_lexer_new_from_str_len("<string>", (const char*)str, str_len, 0);
+    mp_lexer_t *lex = mp_lexer_new_from_str_len(MP_QSTR__lt_string_gt_, (const char*)str, str_len, 0);
+    qstr source_name = mp_lexer_source_name(lex);
 
     // parse the string
     qstr parse_exc_id;
     const char *parse_exc_msg;
     mp_parse_node_t pn = mp_parse(lex, MP_PARSE_EVAL_INPUT, &parse_exc_id, &parse_exc_msg);
-    qstr source_name = mp_lexer_source_name(lex);
     mp_lexer_free(lex);
 
     if (pn == MP_PARSE_NODE_NULL) {
@@ -40,6 +40,7 @@
 
     // compile the string
     mp_obj_t module_fun = mp_compile(pn, source_name, false);
+    mp_parse_node_free(pn);
 
     if (module_fun == mp_const_none) {
         // TODO handle compile error correctly

diff --git a/py/builtinimport.c b/py/builtinimport.c
index 3cfd64e..35e7dcb 100644
--- a/py/builtinimport.c
+++ b/py/builtinimport.c

@@ -29,9 +29,7 @@
     }
     */
 
-    uint mod_name_l;
-    const byte *mod_name_s = mp_obj_str_get_data(args[0], &mod_name_l);
-    qstr mod_name = qstr_from_strn((const char*)mod_name_s, mod_name_l);
+    qstr mod_name = mp_obj_str_get_qstr(args[0]);
 
     mp_obj_t loaded = mp_obj_module_get(mod_name);
     if (loaded != MP_OBJ_NULL) {
@@ -44,6 +42,7 @@
         // TODO handle lexer error correctly
         return mp_const_none;
     }
+    qstr source_name = mp_lexer_source_name(lex);
 
     // create a new module object
     mp_obj_t module_obj = mp_obj_new_module(mod_name);
@@ -60,7 +59,6 @@
     qstr parse_exc_id;
     const char *parse_exc_msg;
     mp_parse_node_t pn = mp_parse(lex, MP_PARSE_FILE_INPUT, &parse_exc_id, &parse_exc_msg);
-    qstr source_name = mp_lexer_source_name(lex);
     mp_lexer_free(lex);
 
     if (pn == MP_PARSE_NODE_NULL) {
@@ -72,6 +70,7 @@
 
     // compile the imported script
     mp_obj_t module_fun = mp_compile(pn, source_name, false);
+    mp_parse_node_free(pn);
 
     if (module_fun == mp_const_none) {
         // TODO handle compile error correctly

diff --git a/py/emitcpy.c b/py/emitcpy.c
index de2a578..71861c9 100644
--- a/py/emitcpy.c
+++ b/py/emitcpy.c

@@ -192,29 +192,26 @@
     if (bytes) {
         printf("b");
     }
-    bool quote_single = false;
+    int quote_char = '\'';
     if (has_single_quote && !has_double_quote) {
-        printf("\"");
-    } else {
-        quote_single = true;
-        printf("'");
+        quote_char = '"';
     }
-    for (int i = 0; i < len; i++) {
-        if (str[i] == '\n') {
-            printf("\\n");
-        } else if (str[i] == '\\') {
+    printf("%c", quote_char);
+    for (const char *s = str, *top = str + len; s < top; s++) {
+        if (*s == quote_char) {
+            printf("\\%c", quote_char);
+        } else if (*s == '\\') {
             printf("\\\\");
-        } else if (str[i] == '\'' && quote_single) {
-            printf("\\'");
+        } else if (32 <= *s && *s <= 126) {
+            printf("%c", *s);
+        } else if (*s == '\n') {
+            printf("\\n");
+        // TODO add more escape codes here
         } else {
-            printf("%c", str[i]);
+            printf("\\x%02x", (*s) & 0xff);
         }
     }
-    if (has_single_quote && !has_double_quote) {
-        printf("\"");
-    } else {
-        printf("'");
-    }
+    printf("%c", quote_char);
 }
 
 static void emit_cpy_load_const_str(emit_t *emit, qstr qstr, bool bytes) {

diff --git a/py/lexer.c b/py/lexer.c
index 9911da3..f71e355 100644
--- a/py/lexer.c
+++ b/py/lexer.c

@@ -493,8 +493,8 @@
                                 }
                                 c = num;
                             } else {
-                                // TODO error message
-                                assert(0);
+                                // unrecognised escape character; CPython lets this through verbatim as '\' and then the character
+                                vstr_add_char(&lex->vstr, '\\');
                             }
                             break;
                     }
@@ -644,10 +644,10 @@
     }
 }
 
-mp_lexer_t *mp_lexer_new(const char *src_name, void *stream_data, mp_lexer_stream_next_char_t stream_next_char, mp_lexer_stream_close_t stream_close) {
+mp_lexer_t *mp_lexer_new(qstr src_name, void *stream_data, mp_lexer_stream_next_char_t stream_next_char, mp_lexer_stream_close_t stream_close) {
     mp_lexer_t *lex = m_new(mp_lexer_t, 1);
 
-    lex->source_name = qstr_from_str(src_name);
+    lex->source_name = src_name;
     lex->stream_data = stream_data;
     lex->stream_next_char = stream_next_char;
     lex->stream_close = stream_close;

diff --git a/py/lexer.h b/py/lexer.h
index 69e9732..13fbfb5 100644
--- a/py/lexer.h
+++ b/py/lexer.h

@@ -124,8 +124,8 @@
 
 void mp_token_show(const mp_token_t *tok);
 
-mp_lexer_t *mp_lexer_new(const char *src_name, void *stream_data, mp_lexer_stream_next_char_t stream_next_char, mp_lexer_stream_close_t stream_close);
-mp_lexer_t *mp_lexer_new_from_str_len(const char *src_name, const char *str, uint len, uint free_len);
+mp_lexer_t *mp_lexer_new(qstr src_name, void *stream_data, mp_lexer_stream_next_char_t stream_next_char, mp_lexer_stream_close_t stream_close);
+mp_lexer_t *mp_lexer_new_from_str_len(qstr src_name, const char *str, uint len, uint free_len);
 
 void mp_lexer_free(mp_lexer_t *lex);
 qstr mp_lexer_source_name(mp_lexer_t *lex);

diff --git a/py/lexerstr.c b/py/lexerstr.c
index 1e105d8..d53a47d 100644
--- a/py/lexerstr.c
+++ b/py/lexerstr.c

@@ -28,7 +28,7 @@
     m_del_obj(mp_lexer_str_buf_t, sb);
 }
 
-mp_lexer_t *mp_lexer_new_from_str_len(const char *src_name, const char *str, uint len, uint free_len) {
+mp_lexer_t *mp_lexer_new_from_str_len(qstr src_name, const char *str, uint len, uint free_len) {
     mp_lexer_str_buf_t *sb = m_new_obj(mp_lexer_str_buf_t);
     sb->free_len = free_len;
     sb->src_beg = str;

diff --git a/py/lexerunix.c b/py/lexerunix.c
index 7846120..5d96c46 100644
--- a/py/lexerunix.c
+++ b/py/lexerunix.c

@@ -28,7 +28,7 @@
         return NULL;
     }
 
-    return mp_lexer_new_from_str_len(filename, data, size, size);
+    return mp_lexer_new_from_str_len(qstr_from_str(filename), data, size, size);
 }
 
 /******************************************************************************/

diff --git a/py/obj.h b/py/obj.h
index e122f5a..b33e3c5 100644
--- a/py/obj.h
+++ b/py/obj.h

@@ -287,6 +287,7 @@
 bool mp_obj_str_equal(mp_obj_t s1, mp_obj_t s2);
 uint mp_obj_str_get_hash(mp_obj_t self_in);
 uint mp_obj_str_get_len(mp_obj_t self_in);
+qstr mp_obj_str_get_qstr(mp_obj_t self_in); // use this if you will anyway convert the string to a qstr
 const char *mp_obj_str_get_str(mp_obj_t self_in); // use this only if you need the string to be null terminated
 const byte *mp_obj_str_get_data(mp_obj_t self_in, uint *len);
 

diff --git a/py/objstr.c b/py/objstr.c
index 3a4d69c..84ac74b 100644
--- a/py/objstr.c
+++ b/py/objstr.c

@@ -40,11 +40,39 @@
     if (kind == PRINT_STR && !is_bytes) {
         print(env, "%.*s", str_len, str_data);
     } else {
+        // this escapes characters, but it will be very slow to print (calling print many times)
+        bool has_single_quote = false;
+        bool has_double_quote = false;
+        for (const byte *s = str_data, *top = str_data + str_len; (!has_single_quote || !has_double_quote) && s < top; s++) {
+            if (*s == '\'') {
+                has_single_quote = true;
+            } else if (*s == '"') {
+                has_double_quote = true;
+            }
+        }
         if (is_bytes) {
             print(env, "b");
         }
-        // TODO need to escape chars etc
-        print(env, "'%.*s'", str_len, str_data);
+        int quote_char = '\'';
+        if (has_single_quote && !has_double_quote) {
+            quote_char = '"';
+        }
+        print(env, "%c", quote_char);
+        for (const byte *s = str_data, *top = str_data + str_len; s < top; s++) {
+            if (*s == quote_char) {
+                print(env, "\\%c", quote_char);
+            } else if (*s == '\\') {
+                print(env, "\\\\");
+            } else if (32 <= *s && *s <= 126) {
+                print(env, "%c", *s);
+            } else if (*s == '\n') {
+                print(env, "\\n");
+            // TODO add more escape codes here if we want to match CPython
+            } else {
+                print(env, "\\x%02x", *s);
+            }
+        }
+        print(env, "%c", quote_char);
     }
 }
 
@@ -474,13 +502,17 @@
     }
 }
 
+void bad_implicit_conversion(mp_obj_t self_in) __attribute__((noreturn));
+void bad_implicit_conversion(mp_obj_t self_in) {
+    nlr_jump(mp_obj_new_exception_msg_varg(MP_QSTR_TypeError, "Can't convert '%s' object to str implicitly", mp_obj_get_type_str(self_in)));
+}
+
 uint mp_obj_str_get_hash(mp_obj_t self_in) {
     if (MP_OBJ_IS_STR(self_in)) {
         GET_STR_HASH(self_in, h);
         return h;
     } else {
-        nlr_jump(mp_obj_new_exception_msg_varg(MP_QSTR_TypeError, "Can't convert '%s' object to str implicitly",
-                 mp_obj_get_type_str(self_in)));
+        bad_implicit_conversion(self_in);
     }
 }
 
@@ -489,8 +521,20 @@
         GET_STR_LEN(self_in, l);
         return l;
     } else {
-        nlr_jump(mp_obj_new_exception_msg_varg(MP_QSTR_TypeError, "Can't convert '%s' object to str implicitly",
-                 mp_obj_get_type_str(self_in)));
+        bad_implicit_conversion(self_in);
+    }
+}
+
+// use this if you will anyway convert the string to a qstr
+// will be more efficient for the case where it's already a qstr
+qstr mp_obj_str_get_qstr(mp_obj_t self_in) {
+    if (MP_OBJ_IS_QSTR(self_in)) {
+        return MP_OBJ_QSTR_VALUE(self_in);
+    } else if (MP_OBJ_IS_TYPE(self_in, &str_type)) {
+        mp_obj_str_t *self = self_in;
+        return qstr_from_strn((char*)self->data, self->len);
+    } else {
+        bad_implicit_conversion(self_in);
     }
 }
 
@@ -502,8 +546,7 @@
         (void)l; // len unused
         return (const char*)s;
     } else {
-        nlr_jump(mp_obj_new_exception_msg_varg(MP_QSTR_TypeError, "Can't convert '%s' object to str implicitly",
-                 mp_obj_get_type_str(self_in)));
+        bad_implicit_conversion(self_in);
     }
 }
 
@@ -513,8 +556,7 @@
         *len = l;
         return s;
     } else {
-        nlr_jump(mp_obj_new_exception_msg_varg(MP_QSTR_TypeError, "Can't convert '%s' object to str implicitly",
-                 mp_obj_get_type_str(self_in)));
+        bad_implicit_conversion(self_in);
     }
 }
 

diff --git a/py/parse.c b/py/parse.c
index 7a8fd3b..d9969d6 100644
--- a/py/parse.c
+++ b/py/parse.c

@@ -26,6 +26,8 @@
 #define RULE_ARG_OPT_TOK        (0x3000)
 #define RULE_ARG_OPT_RULE       (0x4000)
 
+#define ADD_BLANK_NODE(rule_id) ((rule_id) == RULE_funcdef || (rule_id) == RULE_classdef || (rule_id) == RULE_comp_for || (rule_id) == RULE_lambdef || (rule_id) == RULE_lambdef_nocond)
+
 // (un)comment to use rule names; for debugging
 //#define USE_RULE_NAME (1)
 
@@ -135,15 +137,23 @@
     return pn;
 }
 
-int parse_node_free_struct(mp_parse_node_t pn_in) {
-    int cnt = 0;
-    if (MP_PARSE_NODE_IS_STRUCT(pn_in)) {
-        mp_parse_node_struct_t *pn = (mp_parse_node_struct_t *)pn_in;
-        int n = pn->kind_num_nodes >> 8;
-        for (int i = 0; i < n; i++) {
-            cnt += parse_node_free_struct(pn->nodes[i]);
+uint mp_parse_node_free(mp_parse_node_t pn) {
+    uint cnt = 0;
+    if (MP_PARSE_NODE_IS_STRUCT(pn)) {
+        mp_parse_node_struct_t *pns = (mp_parse_node_struct_t *)pn;
+        uint n = MP_PARSE_NODE_STRUCT_NUM_NODES(pns);
+        uint rule_id = MP_PARSE_NODE_STRUCT_KIND(pns);
+        bool adjust = ADD_BLANK_NODE(rule_id);
+        if (adjust) {
+            n--;
         }
-        m_del_var(mp_parse_node_struct_t, mp_parse_node_t, n, pn);
+        for (uint i = 0; i < n; i++) {
+            cnt += mp_parse_node_free(pns->nodes[i]);
+        }
+        if (adjust) {
+            n++;
+        }
+        m_del_var(mp_parse_node_struct_t, mp_parse_node_t, n, pns);
         cnt++;
     }
     return cnt;
@@ -174,15 +184,15 @@
             default: assert(0);
         }
     } else {
-        mp_parse_node_struct_t *pns2 = (mp_parse_node_struct_t*)pn;
-        int n = pns2->kind_num_nodes >> 8;
+        mp_parse_node_struct_t *pns = (mp_parse_node_struct_t*)pn;
+        uint n = MP_PARSE_NODE_STRUCT_NUM_NODES(pns);
 #ifdef USE_RULE_NAME
-        printf("%s(%d) (n=%d)\n", rules[MP_PARSE_NODE_STRUCT_KIND(pns2)]->rule_name, MP_PARSE_NODE_STRUCT_KIND(pns2), n);
+        printf("%s(%d) (n=%d)\n", rules[MP_PARSE_NODE_STRUCT_KIND(pns)]->rule_name, MP_PARSE_NODE_STRUCT_KIND(pns), n);
 #else
-        printf("rule(%u) (n=%d)\n", (uint)MP_PARSE_NODE_STRUCT_KIND(pns2), n);
+        printf("rule(%u) (n=%d)\n", (uint)MP_PARSE_NODE_STRUCT_KIND(pns), n);
 #endif
-        for (int i = 0; i < n; i++) {
-            mp_parse_node_print(pns2->nodes[i], indent + 2);
+        for (uint i = 0; i < n; i++) {
+            mp_parse_node_print(pns->nodes[i], indent + 2);
         }
     }
 }
@@ -472,7 +482,7 @@
                 }
 
                 // always emit these rules, and add an extra blank node at the end (to be used by the compiler to store data)
-                if (rule->rule_id == RULE_funcdef || rule->rule_id == RULE_classdef || rule->rule_id == RULE_comp_for || rule->rule_id == RULE_lambdef || rule->rule_id == RULE_lambdef_nocond) {
+                if (ADD_BLANK_NODE(rule->rule_id)) {
                     emit_rule = true;
                     push_result_node(parser, MP_PARSE_NODE_NULL);
                     i += 1;

diff --git a/py/parse.h b/py/parse.h
index b7f1904..9797873 100644
--- a/py/parse.h
+++ b/py/parse.h

@@ -53,7 +53,7 @@
 #define MP_PARSE_NODE_STRUCT_NUM_NODES(pns) ((pns)->kind_num_nodes >> 8)
 
 mp_parse_node_t mp_parse_node_new_leaf(machine_int_t kind, machine_int_t arg);
-int parse_node_free_struct(mp_parse_node_t pn_in);
+uint mp_parse_node_free(mp_parse_node_t pn);
 
 void mp_parse_node_print(mp_parse_node_t pn, int indent);
 

diff --git a/py/qstrdefs.h b/py/qstrdefs.h
index e76efaf..8170684 100644
--- a/py/qstrdefs.h
+++ b/py/qstrdefs.h

@@ -91,4 +91,5 @@
 Q(<dictcomp>)
 Q(<setcomp>)
 Q(<genexpr>)
+Q(<string>)
 Q(<stdin>)
commit	b829b5caecd1ba4fbc30e03978776d2c868dd67b	[log] [tgz]
author	Damien George <damien.p.george@gmail.com>	Sat Jan 25 13:51:19 2014 +0000
committer	Damien George <damien.p.george@gmail.com>	Sat Jan 25 13:51:19 2014 +0000
tree	b3135377105920ff382d7c7ebc2117655b4d00da
parent	e0722ee9d94c0e812a474e349ded5147f6b869f6 [diff]