Damien George | 04b9147 | 2014-05-03 23:27:38 +0100 | [diff] [blame] | 1 | /* |
| 2 | * This file is part of the Micro Python project, http://micropython.org/ |
| 3 | * |
| 4 | * The MIT License (MIT) |
| 5 | * |
| 6 | * Copyright (c) 2013, 2014 Damien P. George |
| 7 | * |
| 8 | * Permission is hereby granted, free of charge, to any person obtaining a copy |
| 9 | * of this software and associated documentation files (the "Software"), to deal |
| 10 | * in the Software without restriction, including without limitation the rights |
| 11 | * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell |
| 12 | * copies of the Software, and to permit persons to whom the Software is |
| 13 | * furnished to do so, subject to the following conditions: |
| 14 | * |
| 15 | * The above copyright notice and this permission notice shall be included in |
| 16 | * all copies or substantial portions of the Software. |
| 17 | * |
| 18 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
| 19 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
| 20 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE |
| 21 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
| 22 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, |
| 23 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN |
| 24 | * THE SOFTWARE. |
| 25 | */ |
| 26 | |
xbe | efe3422 | 2014-03-16 00:14:26 -0700 | [diff] [blame] | 27 | #include <stdbool.h> |
Damien | 429d719 | 2013-10-04 19:53:11 +0100 | [diff] [blame] | 28 | #include <stdint.h> |
| 29 | #include <stdio.h> |
Damien | 429d719 | 2013-10-04 19:53:11 +0100 | [diff] [blame] | 30 | #include <assert.h> |
Damien George | 5042bce | 2014-05-25 22:06:06 +0100 | [diff] [blame] | 31 | #include <string.h> |
Damien | 429d719 | 2013-10-04 19:53:11 +0100 | [diff] [blame] | 32 | |
Damien George | 0bfc763 | 2015-02-07 18:33:58 +0000 | [diff] [blame] | 33 | #include "py/nlr.h" |
Damien George | 51dfcb4 | 2015-01-01 20:27:54 +0000 | [diff] [blame] | 34 | #include "py/lexer.h" |
| 35 | #include "py/parse.h" |
| 36 | #include "py/parsenum.h" |
| 37 | #include "py/smallint.h" |
Damien | 429d719 | 2013-10-04 19:53:11 +0100 | [diff] [blame] | 38 | |
Damien | 429d719 | 2013-10-04 19:53:11 +0100 | [diff] [blame] | 39 | #define RULE_ACT_ARG_MASK (0x0f) |
Damien George | b47ea4e | 2014-12-20 18:37:50 +0000 | [diff] [blame] | 40 | #define RULE_ACT_KIND_MASK (0x30) |
| 41 | #define RULE_ACT_ALLOW_IDENT (0x40) |
| 42 | #define RULE_ACT_ADD_BLANK (0x80) |
Damien | 429d719 | 2013-10-04 19:53:11 +0100 | [diff] [blame] | 43 | #define RULE_ACT_OR (0x10) |
| 44 | #define RULE_ACT_AND (0x20) |
| 45 | #define RULE_ACT_LIST (0x30) |
| 46 | |
Damien | 429d719 | 2013-10-04 19:53:11 +0100 | [diff] [blame] | 47 | #define RULE_ARG_KIND_MASK (0xf000) |
| 48 | #define RULE_ARG_ARG_MASK (0x0fff) |
| 49 | #define RULE_ARG_TOK (0x1000) |
| 50 | #define RULE_ARG_RULE (0x2000) |
| 51 | #define RULE_ARG_OPT_TOK (0x3000) |
| 52 | #define RULE_ARG_OPT_RULE (0x4000) |
| 53 | |
Damien George | b47ea4e | 2014-12-20 18:37:50 +0000 | [diff] [blame] | 54 | #define ADD_BLANK_NODE(rule) ((rule->act & RULE_ACT_ADD_BLANK) != 0) |
Damien George | b829b5c | 2014-01-25 13:51:19 +0000 | [diff] [blame] | 55 | |
Damien | 429d719 | 2013-10-04 19:53:11 +0100 | [diff] [blame] | 56 | // (un)comment to use rule names; for debugging |
| 57 | //#define USE_RULE_NAME (1) |
| 58 | |
| 59 | typedef struct _rule_t { |
| 60 | byte rule_id; |
| 61 | byte act; |
| 62 | #ifdef USE_RULE_NAME |
| 63 | const char *rule_name; |
| 64 | #endif |
| 65 | uint16_t arg[]; |
| 66 | } rule_t; |
| 67 | |
| 68 | enum { |
Damien George | 00208ce | 2014-01-23 00:00:53 +0000 | [diff] [blame] | 69 | #define DEF_RULE(rule, comp, kind, ...) RULE_##rule, |
Damien George | 51dfcb4 | 2015-01-01 20:27:54 +0000 | [diff] [blame] | 70 | #include "py/grammar.h" |
Damien | 429d719 | 2013-10-04 19:53:11 +0100 | [diff] [blame] | 71 | #undef DEF_RULE |
| 72 | RULE_maximum_number_of, |
Damien George | 5042bce | 2014-05-25 22:06:06 +0100 | [diff] [blame] | 73 | RULE_string, // special node for non-interned string |
Damien George | 4c81ba8 | 2015-01-13 16:21:23 +0000 | [diff] [blame] | 74 | RULE_bytes, // special node for non-interned bytes |
Damien George | 7d414a1 | 2015-02-08 01:57:40 +0000 | [diff] [blame] | 75 | RULE_const_object, // special node for a constant, generic Python object |
Damien | 429d719 | 2013-10-04 19:53:11 +0100 | [diff] [blame] | 76 | }; |
| 77 | |
Damien George | b47ea4e | 2014-12-20 18:37:50 +0000 | [diff] [blame] | 78 | #define ident (RULE_ACT_ALLOW_IDENT) |
| 79 | #define blank (RULE_ACT_ADD_BLANK) |
Damien | 429d719 | 2013-10-04 19:53:11 +0100 | [diff] [blame] | 80 | #define or(n) (RULE_ACT_OR | n) |
| 81 | #define and(n) (RULE_ACT_AND | n) |
| 82 | #define one_or_more (RULE_ACT_LIST | 2) |
| 83 | #define list (RULE_ACT_LIST | 1) |
| 84 | #define list_with_end (RULE_ACT_LIST | 3) |
Damien | d99b052 | 2013-12-21 18:17:45 +0000 | [diff] [blame] | 85 | #define tok(t) (RULE_ARG_TOK | MP_TOKEN_##t) |
Damien | 429d719 | 2013-10-04 19:53:11 +0100 | [diff] [blame] | 86 | #define rule(r) (RULE_ARG_RULE | RULE_##r) |
Damien | d99b052 | 2013-12-21 18:17:45 +0000 | [diff] [blame] | 87 | #define opt_tok(t) (RULE_ARG_OPT_TOK | MP_TOKEN_##t) |
Damien | 429d719 | 2013-10-04 19:53:11 +0100 | [diff] [blame] | 88 | #define opt_rule(r) (RULE_ARG_OPT_RULE | RULE_##r) |
| 89 | #ifdef USE_RULE_NAME |
Damien George | 00208ce | 2014-01-23 00:00:53 +0000 | [diff] [blame] | 90 | #define DEF_RULE(rule, comp, kind, ...) static const rule_t rule_##rule = { RULE_##rule, kind, #rule, { __VA_ARGS__ } }; |
Damien | 429d719 | 2013-10-04 19:53:11 +0100 | [diff] [blame] | 91 | #else |
Damien George | 00208ce | 2014-01-23 00:00:53 +0000 | [diff] [blame] | 92 | #define DEF_RULE(rule, comp, kind, ...) static const rule_t rule_##rule = { RULE_##rule, kind, { __VA_ARGS__ } }; |
Damien | 429d719 | 2013-10-04 19:53:11 +0100 | [diff] [blame] | 93 | #endif |
Damien George | 51dfcb4 | 2015-01-01 20:27:54 +0000 | [diff] [blame] | 94 | #include "py/grammar.h" |
Damien | 429d719 | 2013-10-04 19:53:11 +0100 | [diff] [blame] | 95 | #undef or |
| 96 | #undef and |
| 97 | #undef list |
| 98 | #undef list_with_end |
| 99 | #undef tok |
| 100 | #undef rule |
| 101 | #undef opt_tok |
| 102 | #undef opt_rule |
| 103 | #undef one_or_more |
| 104 | #undef DEF_RULE |
| 105 | |
Paul Sokolovsky | 520e2f5 | 2014-02-12 18:31:30 +0200 | [diff] [blame] | 106 | STATIC const rule_t *rules[] = { |
Damien George | 00208ce | 2014-01-23 00:00:53 +0000 | [diff] [blame] | 107 | #define DEF_RULE(rule, comp, kind, ...) &rule_##rule, |
Damien George | 51dfcb4 | 2015-01-01 20:27:54 +0000 | [diff] [blame] | 108 | #include "py/grammar.h" |
Damien | 429d719 | 2013-10-04 19:53:11 +0100 | [diff] [blame] | 109 | #undef DEF_RULE |
| 110 | }; |
| 111 | |
| 112 | typedef struct _rule_stack_t { |
Damien George | 5c670ac | 2015-01-24 23:12:58 +0000 | [diff] [blame] | 113 | mp_uint_t src_line : BITS_PER_WORD - 8; // maximum bits storing source line number |
| 114 | mp_uint_t rule_id : 8; // this must be large enough to fit largest rule number |
| 115 | mp_uint_t arg_i; // this dictates the maximum nodes in a "list" of things |
Damien | 429d719 | 2013-10-04 19:53:11 +0100 | [diff] [blame] | 116 | } rule_stack_t; |
| 117 | |
| 118 | typedef struct _parser_t { |
Damien George | 58ba4c3 | 2014-04-10 14:27:31 +0000 | [diff] [blame] | 119 | bool had_memory_error; |
| 120 | |
Damien George | 3816182 | 2014-07-03 14:13:33 +0100 | [diff] [blame] | 121 | mp_uint_t rule_stack_alloc; |
| 122 | mp_uint_t rule_stack_top; |
Damien | 429d719 | 2013-10-04 19:53:11 +0100 | [diff] [blame] | 123 | rule_stack_t *rule_stack; |
| 124 | |
Damien George | 3816182 | 2014-07-03 14:13:33 +0100 | [diff] [blame] | 125 | mp_uint_t result_stack_alloc; |
| 126 | mp_uint_t result_stack_top; |
Damien | d99b052 | 2013-12-21 18:17:45 +0000 | [diff] [blame] | 127 | mp_parse_node_t *result_stack; |
Damien George | 0833500 | 2014-01-18 23:24:36 +0000 | [diff] [blame] | 128 | |
| 129 | mp_lexer_t *lexer; |
Damien | 429d719 | 2013-10-04 19:53:11 +0100 | [diff] [blame] | 130 | } parser_t; |
| 131 | |
Damien George | 58ba4c3 | 2014-04-10 14:27:31 +0000 | [diff] [blame] | 132 | STATIC inline void memory_error(parser_t *parser) { |
| 133 | parser->had_memory_error = true; |
| 134 | } |
| 135 | |
Damien George | 3816182 | 2014-07-03 14:13:33 +0100 | [diff] [blame] | 136 | STATIC void push_rule(parser_t *parser, mp_uint_t src_line, const rule_t *rule, mp_uint_t arg_i) { |
Damien George | 58ba4c3 | 2014-04-10 14:27:31 +0000 | [diff] [blame] | 137 | if (parser->had_memory_error) { |
| 138 | return; |
| 139 | } |
Damien | 429d719 | 2013-10-04 19:53:11 +0100 | [diff] [blame] | 140 | if (parser->rule_stack_top >= parser->rule_stack_alloc) { |
Damien George | 58ebde4 | 2014-05-21 20:32:59 +0100 | [diff] [blame] | 141 | rule_stack_t *rs = m_renew_maybe(rule_stack_t, parser->rule_stack, parser->rule_stack_alloc, parser->rule_stack_alloc + MICROPY_ALLOC_PARSE_RULE_INC); |
Damien George | 58ba4c3 | 2014-04-10 14:27:31 +0000 | [diff] [blame] | 142 | if (rs == NULL) { |
| 143 | memory_error(parser); |
| 144 | return; |
| 145 | } |
| 146 | parser->rule_stack = rs; |
Damien George | 58ebde4 | 2014-05-21 20:32:59 +0100 | [diff] [blame] | 147 | parser->rule_stack_alloc += MICROPY_ALLOC_PARSE_RULE_INC; |
Damien | 429d719 | 2013-10-04 19:53:11 +0100 | [diff] [blame] | 148 | } |
Damien George | 0833500 | 2014-01-18 23:24:36 +0000 | [diff] [blame] | 149 | rule_stack_t *rs = &parser->rule_stack[parser->rule_stack_top++]; |
| 150 | rs->src_line = src_line; |
| 151 | rs->rule_id = rule->rule_id; |
| 152 | rs->arg_i = arg_i; |
Damien | 429d719 | 2013-10-04 19:53:11 +0100 | [diff] [blame] | 153 | } |
| 154 | |
Damien George | 3816182 | 2014-07-03 14:13:33 +0100 | [diff] [blame] | 155 | STATIC void push_rule_from_arg(parser_t *parser, mp_uint_t arg) { |
Damien | 429d719 | 2013-10-04 19:53:11 +0100 | [diff] [blame] | 156 | assert((arg & RULE_ARG_KIND_MASK) == RULE_ARG_RULE || (arg & RULE_ARG_KIND_MASK) == RULE_ARG_OPT_RULE); |
Damien George | 3816182 | 2014-07-03 14:13:33 +0100 | [diff] [blame] | 157 | mp_uint_t rule_id = arg & RULE_ARG_ARG_MASK; |
Damien | 429d719 | 2013-10-04 19:53:11 +0100 | [diff] [blame] | 158 | assert(rule_id < RULE_maximum_number_of); |
Damien George | a4c52c5 | 2014-12-05 19:35:18 +0000 | [diff] [blame] | 159 | push_rule(parser, parser->lexer->tok_line, rules[rule_id], 0); |
Damien | 429d719 | 2013-10-04 19:53:11 +0100 | [diff] [blame] | 160 | } |
| 161 | |
Damien George | 3816182 | 2014-07-03 14:13:33 +0100 | [diff] [blame] | 162 | STATIC void pop_rule(parser_t *parser, const rule_t **rule, mp_uint_t *arg_i, mp_uint_t *src_line) { |
Damien George | 58ba4c3 | 2014-04-10 14:27:31 +0000 | [diff] [blame] | 163 | assert(!parser->had_memory_error); |
Damien | 429d719 | 2013-10-04 19:53:11 +0100 | [diff] [blame] | 164 | parser->rule_stack_top -= 1; |
| 165 | *rule = rules[parser->rule_stack[parser->rule_stack_top].rule_id]; |
| 166 | *arg_i = parser->rule_stack[parser->rule_stack_top].arg_i; |
Damien George | 0833500 | 2014-01-18 23:24:36 +0000 | [diff] [blame] | 167 | *src_line = parser->rule_stack[parser->rule_stack_top].src_line; |
Damien | 429d719 | 2013-10-04 19:53:11 +0100 | [diff] [blame] | 168 | } |
| 169 | |
Damien George | 40f3c02 | 2014-07-03 13:25:24 +0100 | [diff] [blame] | 170 | mp_parse_node_t mp_parse_node_new_leaf(mp_int_t kind, mp_int_t arg) { |
Paul Sokolovsky | 56e5ef2 | 2014-02-22 16:39:45 +0200 | [diff] [blame] | 171 | if (kind == MP_PARSE_NODE_SMALL_INT) { |
| 172 | return (mp_parse_node_t)(kind | (arg << 1)); |
| 173 | } |
Damien George | 7d414a1 | 2015-02-08 01:57:40 +0000 | [diff] [blame] | 174 | return (mp_parse_node_t)(kind | (arg << 4)); |
Damien | 429d719 | 2013-10-04 19:53:11 +0100 | [diff] [blame] | 175 | } |
| 176 | |
Damien George | 5042bce | 2014-05-25 22:06:06 +0100 | [diff] [blame] | 177 | void mp_parse_node_free(mp_parse_node_t pn) { |
Damien George | b829b5c | 2014-01-25 13:51:19 +0000 | [diff] [blame] | 178 | if (MP_PARSE_NODE_IS_STRUCT(pn)) { |
| 179 | mp_parse_node_struct_t *pns = (mp_parse_node_struct_t *)pn; |
Damien George | 3816182 | 2014-07-03 14:13:33 +0100 | [diff] [blame] | 180 | mp_uint_t n = MP_PARSE_NODE_STRUCT_NUM_NODES(pns); |
| 181 | mp_uint_t rule_id = MP_PARSE_NODE_STRUCT_KIND(pns); |
Damien George | 4c81ba8 | 2015-01-13 16:21:23 +0000 | [diff] [blame] | 182 | if (rule_id == RULE_string || rule_id == RULE_bytes) { |
Damien George | 52b5d76 | 2014-09-23 15:31:56 +0000 | [diff] [blame] | 183 | m_del(char, (char*)pns->nodes[0], (mp_uint_t)pns->nodes[1]); |
Damien George | 7d414a1 | 2015-02-08 01:57:40 +0000 | [diff] [blame] | 184 | } else if (rule_id == RULE_const_object) { |
| 185 | // don't free the const object since it's probably used by the compiled code |
Damien George | e7bb044 | 2014-10-23 14:13:05 +0100 | [diff] [blame] | 186 | } else { |
Damien George | b47ea4e | 2014-12-20 18:37:50 +0000 | [diff] [blame] | 187 | bool adjust = ADD_BLANK_NODE(rules[rule_id]); |
Damien George | e7bb044 | 2014-10-23 14:13:05 +0100 | [diff] [blame] | 188 | if (adjust) { |
| 189 | n--; |
| 190 | } |
| 191 | for (mp_uint_t i = 0; i < n; i++) { |
| 192 | mp_parse_node_free(pns->nodes[i]); |
| 193 | } |
| 194 | if (adjust) { |
| 195 | n++; |
| 196 | } |
Damien George | b829b5c | 2014-01-25 13:51:19 +0000 | [diff] [blame] | 197 | } |
| 198 | m_del_var(mp_parse_node_struct_t, mp_parse_node_t, n, pns); |
Paul Sokolovsky | aee2ba7 | 2014-01-25 00:56:19 +0200 | [diff] [blame] | 199 | } |
Paul Sokolovsky | aee2ba7 | 2014-01-25 00:56:19 +0200 | [diff] [blame] | 200 | } |
| 201 | |
Damien George | cbd2f74 | 2014-01-19 11:48:48 +0000 | [diff] [blame] | 202 | #if MICROPY_DEBUG_PRINTERS |
Damien George | 3816182 | 2014-07-03 14:13:33 +0100 | [diff] [blame] | 203 | void mp_parse_node_print(mp_parse_node_t pn, mp_uint_t indent) { |
Damien George | 0833500 | 2014-01-18 23:24:36 +0000 | [diff] [blame] | 204 | if (MP_PARSE_NODE_IS_STRUCT(pn)) { |
| 205 | printf("[% 4d] ", (int)((mp_parse_node_struct_t*)pn)->source_line); |
| 206 | } else { |
| 207 | printf(" "); |
| 208 | } |
Damien George | 3816182 | 2014-07-03 14:13:33 +0100 | [diff] [blame] | 209 | for (mp_uint_t i = 0; i < indent; i++) { |
Damien | 429d719 | 2013-10-04 19:53:11 +0100 | [diff] [blame] | 210 | printf(" "); |
| 211 | } |
Damien | d99b052 | 2013-12-21 18:17:45 +0000 | [diff] [blame] | 212 | if (MP_PARSE_NODE_IS_NULL(pn)) { |
Damien | 429d719 | 2013-10-04 19:53:11 +0100 | [diff] [blame] | 213 | printf("NULL\n"); |
Paul Sokolovsky | 56e5ef2 | 2014-02-22 16:39:45 +0200 | [diff] [blame] | 214 | } else if (MP_PARSE_NODE_IS_SMALL_INT(pn)) { |
Damien George | 40f3c02 | 2014-07-03 13:25:24 +0100 | [diff] [blame] | 215 | mp_int_t arg = MP_PARSE_NODE_LEAF_SMALL_INT(pn); |
Paul Sokolovsky | 56e5ef2 | 2014-02-22 16:39:45 +0200 | [diff] [blame] | 216 | printf("int(" INT_FMT ")\n", arg); |
Damien | d99b052 | 2013-12-21 18:17:45 +0000 | [diff] [blame] | 217 | } else if (MP_PARSE_NODE_IS_LEAF(pn)) { |
Damien George | 40f3c02 | 2014-07-03 13:25:24 +0100 | [diff] [blame] | 218 | mp_uint_t arg = MP_PARSE_NODE_LEAF_ARG(pn); |
Damien | d99b052 | 2013-12-21 18:17:45 +0000 | [diff] [blame] | 219 | switch (MP_PARSE_NODE_LEAF_KIND(pn)) { |
| 220 | case MP_PARSE_NODE_ID: printf("id(%s)\n", qstr_str(arg)); break; |
Damien | d99b052 | 2013-12-21 18:17:45 +0000 | [diff] [blame] | 221 | case MP_PARSE_NODE_STRING: printf("str(%s)\n", qstr_str(arg)); break; |
| 222 | case MP_PARSE_NODE_BYTES: printf("bytes(%s)\n", qstr_str(arg)); break; |
Damien George | 08d0755 | 2014-01-29 18:58:52 +0000 | [diff] [blame] | 223 | case MP_PARSE_NODE_TOKEN: printf("tok(" INT_FMT ")\n", arg); break; |
Damien | 429d719 | 2013-10-04 19:53:11 +0100 | [diff] [blame] | 224 | default: assert(0); |
| 225 | } |
| 226 | } else { |
Damien George | 5042bce | 2014-05-25 22:06:06 +0100 | [diff] [blame] | 227 | // node must be a mp_parse_node_struct_t |
Damien George | b829b5c | 2014-01-25 13:51:19 +0000 | [diff] [blame] | 228 | mp_parse_node_struct_t *pns = (mp_parse_node_struct_t*)pn; |
Damien George | 5042bce | 2014-05-25 22:06:06 +0100 | [diff] [blame] | 229 | if (MP_PARSE_NODE_STRUCT_KIND(pns) == RULE_string) { |
| 230 | printf("literal str(%.*s)\n", (int)pns->nodes[1], (char*)pns->nodes[0]); |
Damien George | 4c81ba8 | 2015-01-13 16:21:23 +0000 | [diff] [blame] | 231 | } else if (MP_PARSE_NODE_STRUCT_KIND(pns) == RULE_bytes) { |
| 232 | printf("literal bytes(%.*s)\n", (int)pns->nodes[1], (char*)pns->nodes[0]); |
Damien George | 7d414a1 | 2015-02-08 01:57:40 +0000 | [diff] [blame] | 233 | } else if (MP_PARSE_NODE_STRUCT_KIND(pns) == RULE_const_object) { |
| 234 | printf("literal const(%p)\n", (mp_obj_t)pns->nodes[0]); |
Damien George | 5042bce | 2014-05-25 22:06:06 +0100 | [diff] [blame] | 235 | } else { |
Damien George | 3816182 | 2014-07-03 14:13:33 +0100 | [diff] [blame] | 236 | mp_uint_t n = MP_PARSE_NODE_STRUCT_NUM_NODES(pns); |
Damien | 429d719 | 2013-10-04 19:53:11 +0100 | [diff] [blame] | 237 | #ifdef USE_RULE_NAME |
Damien George | 3816182 | 2014-07-03 14:13:33 +0100 | [diff] [blame] | 238 | printf("%s(" UINT_FMT ") (n=" UINT_FMT ")\n", rules[MP_PARSE_NODE_STRUCT_KIND(pns)]->rule_name, (mp_uint_t)MP_PARSE_NODE_STRUCT_KIND(pns), n); |
Damien | 429d719 | 2013-10-04 19:53:11 +0100 | [diff] [blame] | 239 | #else |
Damien George | 3816182 | 2014-07-03 14:13:33 +0100 | [diff] [blame] | 240 | printf("rule(" UINT_FMT ") (n=" UINT_FMT ")\n", (mp_uint_t)MP_PARSE_NODE_STRUCT_KIND(pns), n); |
Damien | 429d719 | 2013-10-04 19:53:11 +0100 | [diff] [blame] | 241 | #endif |
Damien George | 3816182 | 2014-07-03 14:13:33 +0100 | [diff] [blame] | 242 | for (mp_uint_t i = 0; i < n; i++) { |
Damien George | 5042bce | 2014-05-25 22:06:06 +0100 | [diff] [blame] | 243 | mp_parse_node_print(pns->nodes[i], indent + 2); |
| 244 | } |
Damien | 429d719 | 2013-10-04 19:53:11 +0100 | [diff] [blame] | 245 | } |
| 246 | } |
| 247 | } |
Damien George | cbd2f74 | 2014-01-19 11:48:48 +0000 | [diff] [blame] | 248 | #endif // MICROPY_DEBUG_PRINTERS |
Damien | 429d719 | 2013-10-04 19:53:11 +0100 | [diff] [blame] | 249 | |
| 250 | /* |
Paul Sokolovsky | 520e2f5 | 2014-02-12 18:31:30 +0200 | [diff] [blame] | 251 | STATIC void result_stack_show(parser_t *parser) { |
Damien | 429d719 | 2013-10-04 19:53:11 +0100 | [diff] [blame] | 252 | printf("result stack, most recent first\n"); |
Damien George | 3816182 | 2014-07-03 14:13:33 +0100 | [diff] [blame] | 253 | for (mp_int_t i = parser->result_stack_top - 1; i >= 0; i--) { |
Damien George | cbd2f74 | 2014-01-19 11:48:48 +0000 | [diff] [blame] | 254 | mp_parse_node_print(parser->result_stack[i], 0); |
Damien | 429d719 | 2013-10-04 19:53:11 +0100 | [diff] [blame] | 255 | } |
| 256 | } |
| 257 | */ |
| 258 | |
Paul Sokolovsky | 520e2f5 | 2014-02-12 18:31:30 +0200 | [diff] [blame] | 259 | STATIC mp_parse_node_t pop_result(parser_t *parser) { |
Damien George | 58ba4c3 | 2014-04-10 14:27:31 +0000 | [diff] [blame] | 260 | if (parser->had_memory_error) { |
| 261 | return MP_PARSE_NODE_NULL; |
| 262 | } |
Damien | 429d719 | 2013-10-04 19:53:11 +0100 | [diff] [blame] | 263 | assert(parser->result_stack_top > 0); |
| 264 | return parser->result_stack[--parser->result_stack_top]; |
| 265 | } |
| 266 | |
Damien George | 3816182 | 2014-07-03 14:13:33 +0100 | [diff] [blame] | 267 | STATIC mp_parse_node_t peek_result(parser_t *parser, mp_uint_t pos) { |
Damien George | 58ba4c3 | 2014-04-10 14:27:31 +0000 | [diff] [blame] | 268 | if (parser->had_memory_error) { |
| 269 | return MP_PARSE_NODE_NULL; |
| 270 | } |
Damien | 429d719 | 2013-10-04 19:53:11 +0100 | [diff] [blame] | 271 | assert(parser->result_stack_top > pos); |
| 272 | return parser->result_stack[parser->result_stack_top - 1 - pos]; |
| 273 | } |
| 274 | |
Paul Sokolovsky | 520e2f5 | 2014-02-12 18:31:30 +0200 | [diff] [blame] | 275 | STATIC void push_result_node(parser_t *parser, mp_parse_node_t pn) { |
Damien George | 58ba4c3 | 2014-04-10 14:27:31 +0000 | [diff] [blame] | 276 | if (parser->had_memory_error) { |
| 277 | return; |
| 278 | } |
Damien George | 69a818d | 2014-01-12 13:55:24 +0000 | [diff] [blame] | 279 | if (parser->result_stack_top >= parser->result_stack_alloc) { |
Damien George | 50912e7 | 2015-01-20 11:55:10 +0000 | [diff] [blame] | 280 | mp_parse_node_t *stack = m_renew_maybe(mp_parse_node_t, parser->result_stack, parser->result_stack_alloc, parser->result_stack_alloc + MICROPY_ALLOC_PARSE_RESULT_INC); |
| 281 | if (stack == NULL) { |
Damien George | 58ba4c3 | 2014-04-10 14:27:31 +0000 | [diff] [blame] | 282 | memory_error(parser); |
| 283 | return; |
| 284 | } |
Damien George | 50912e7 | 2015-01-20 11:55:10 +0000 | [diff] [blame] | 285 | parser->result_stack = stack; |
Damien George | 58ebde4 | 2014-05-21 20:32:59 +0100 | [diff] [blame] | 286 | parser->result_stack_alloc += MICROPY_ALLOC_PARSE_RESULT_INC; |
Damien George | 69a818d | 2014-01-12 13:55:24 +0000 | [diff] [blame] | 287 | } |
Damien | 429d719 | 2013-10-04 19:53:11 +0100 | [diff] [blame] | 288 | parser->result_stack[parser->result_stack_top++] = pn; |
| 289 | } |
| 290 | |
Damien George | 7d414a1 | 2015-02-08 01:57:40 +0000 | [diff] [blame] | 291 | STATIC mp_parse_node_t make_node_string_bytes(parser_t *parser, mp_uint_t src_line, mp_uint_t rule_kind, const char *str, mp_uint_t len) { |
Damien George | 5042bce | 2014-05-25 22:06:06 +0100 | [diff] [blame] | 292 | mp_parse_node_struct_t *pn = m_new_obj_var_maybe(mp_parse_node_struct_t, mp_parse_node_t, 2); |
| 293 | if (pn == NULL) { |
| 294 | memory_error(parser); |
Damien George | 7d414a1 | 2015-02-08 01:57:40 +0000 | [diff] [blame] | 295 | return MP_PARSE_NODE_NULL; |
Damien George | 5042bce | 2014-05-25 22:06:06 +0100 | [diff] [blame] | 296 | } |
| 297 | pn->source_line = src_line; |
Damien George | 4c81ba8 | 2015-01-13 16:21:23 +0000 | [diff] [blame] | 298 | pn->kind_num_nodes = rule_kind | (2 << 8); |
Damien George | 5042bce | 2014-05-25 22:06:06 +0100 | [diff] [blame] | 299 | char *p = m_new(char, len); |
| 300 | memcpy(p, str, len); |
Damien George | 40f3c02 | 2014-07-03 13:25:24 +0100 | [diff] [blame] | 301 | pn->nodes[0] = (mp_int_t)p; |
Damien George | 5042bce | 2014-05-25 22:06:06 +0100 | [diff] [blame] | 302 | pn->nodes[1] = len; |
Damien George | 7d414a1 | 2015-02-08 01:57:40 +0000 | [diff] [blame] | 303 | return (mp_parse_node_t)pn; |
| 304 | } |
| 305 | |
| 306 | STATIC mp_parse_node_t make_node_const_object(parser_t *parser, mp_uint_t src_line, mp_obj_t obj) { |
| 307 | mp_parse_node_struct_t *pn = m_new_obj_var_maybe(mp_parse_node_struct_t, mp_parse_node_t, 1); |
| 308 | if (pn == NULL) { |
| 309 | memory_error(parser); |
| 310 | return MP_PARSE_NODE_NULL; |
| 311 | } |
| 312 | pn->source_line = src_line; |
| 313 | pn->kind_num_nodes = RULE_const_object | (1 << 8); |
| 314 | pn->nodes[0] = (mp_uint_t)obj; |
| 315 | return (mp_parse_node_t)pn; |
Damien George | 5042bce | 2014-05-25 22:06:06 +0100 | [diff] [blame] | 316 | } |
Paul Sokolovsky | 9e76b11 | 2014-05-08 22:43:46 +0300 | [diff] [blame] | 317 | |
Damien George | a4c52c5 | 2014-12-05 19:35:18 +0000 | [diff] [blame] | 318 | STATIC void push_result_token(parser_t *parser) { |
Damien | d99b052 | 2013-12-21 18:17:45 +0000 | [diff] [blame] | 319 | mp_parse_node_t pn; |
Damien George | a4c52c5 | 2014-12-05 19:35:18 +0000 | [diff] [blame] | 320 | mp_lexer_t *lex = parser->lexer; |
| 321 | if (lex->tok_kind == MP_TOKEN_NAME) { |
| 322 | pn = mp_parse_node_new_leaf(MP_PARSE_NODE_ID, qstr_from_strn(lex->vstr.buf, lex->vstr.len)); |
Damien George | 7d414a1 | 2015-02-08 01:57:40 +0000 | [diff] [blame] | 323 | } else if (lex->tok_kind == MP_TOKEN_INTEGER) { |
| 324 | mp_obj_t o = mp_parse_num_integer(lex->vstr.buf, lex->vstr.len, 0, lex); |
| 325 | if (MP_OBJ_IS_SMALL_INT(o)) { |
| 326 | pn = mp_parse_node_new_leaf(MP_PARSE_NODE_SMALL_INT, MP_OBJ_SMALL_INT_VALUE(o)); |
Damien | 429d719 | 2013-10-04 19:53:11 +0100 | [diff] [blame] | 327 | } else { |
Damien George | 7d414a1 | 2015-02-08 01:57:40 +0000 | [diff] [blame] | 328 | pn = make_node_const_object(parser, lex->tok_line, o); |
Damien | 429d719 | 2013-10-04 19:53:11 +0100 | [diff] [blame] | 329 | } |
Damien George | 7d414a1 | 2015-02-08 01:57:40 +0000 | [diff] [blame] | 330 | } else if (lex->tok_kind == MP_TOKEN_FLOAT_OR_IMAG) { |
| 331 | mp_obj_t o = mp_parse_num_decimal(lex->vstr.buf, lex->vstr.len, true, false, lex); |
| 332 | pn = make_node_const_object(parser, lex->tok_line, o); |
Damien George | 4c81ba8 | 2015-01-13 16:21:23 +0000 | [diff] [blame] | 333 | } else if (lex->tok_kind == MP_TOKEN_STRING || lex->tok_kind == MP_TOKEN_BYTES) { |
| 334 | // Don't automatically intern all strings/bytes. doc strings (which are usually large) |
Damien George | 5042bce | 2014-05-25 22:06:06 +0100 | [diff] [blame] | 335 | // will be discarded by the compiler, and so we shouldn't intern them. |
| 336 | qstr qst = MP_QSTR_NULL; |
Damien George | a4c52c5 | 2014-12-05 19:35:18 +0000 | [diff] [blame] | 337 | if (lex->vstr.len <= MICROPY_ALLOC_PARSE_INTERN_STRING_LEN) { |
Damien George | 5042bce | 2014-05-25 22:06:06 +0100 | [diff] [blame] | 338 | // intern short strings |
Damien George | a4c52c5 | 2014-12-05 19:35:18 +0000 | [diff] [blame] | 339 | qst = qstr_from_strn(lex->vstr.buf, lex->vstr.len); |
Damien George | 5042bce | 2014-05-25 22:06:06 +0100 | [diff] [blame] | 340 | } else { |
| 341 | // check if this string is already interned |
Damien George | a4c52c5 | 2014-12-05 19:35:18 +0000 | [diff] [blame] | 342 | qst = qstr_find_strn(lex->vstr.buf, lex->vstr.len); |
Damien George | 5042bce | 2014-05-25 22:06:06 +0100 | [diff] [blame] | 343 | } |
| 344 | if (qst != MP_QSTR_NULL) { |
| 345 | // qstr exists, make a leaf node |
Damien George | 4c81ba8 | 2015-01-13 16:21:23 +0000 | [diff] [blame] | 346 | pn = mp_parse_node_new_leaf(lex->tok_kind == MP_TOKEN_STRING ? MP_PARSE_NODE_STRING : MP_PARSE_NODE_BYTES, qst); |
Damien George | 5042bce | 2014-05-25 22:06:06 +0100 | [diff] [blame] | 347 | } else { |
Damien George | 4c81ba8 | 2015-01-13 16:21:23 +0000 | [diff] [blame] | 348 | // not interned, make a node holding a pointer to the string/bytes data |
Damien George | 7d414a1 | 2015-02-08 01:57:40 +0000 | [diff] [blame] | 349 | pn = make_node_string_bytes(parser, lex->tok_line, lex->tok_kind == MP_TOKEN_STRING ? RULE_string : RULE_bytes, lex->vstr.buf, lex->vstr.len); |
Damien George | 5042bce | 2014-05-25 22:06:06 +0100 | [diff] [blame] | 350 | } |
Damien | 429d719 | 2013-10-04 19:53:11 +0100 | [diff] [blame] | 351 | } else { |
Damien George | a4c52c5 | 2014-12-05 19:35:18 +0000 | [diff] [blame] | 352 | pn = mp_parse_node_new_leaf(MP_PARSE_NODE_TOKEN, lex->tok_kind); |
Damien | 429d719 | 2013-10-04 19:53:11 +0100 | [diff] [blame] | 353 | } |
| 354 | push_result_node(parser, pn); |
| 355 | } |
| 356 | |
Damien George | 3816182 | 2014-07-03 14:13:33 +0100 | [diff] [blame] | 357 | STATIC void push_result_rule(parser_t *parser, mp_uint_t src_line, const rule_t *rule, mp_uint_t num_args) { |
Damien George | 58ba4c3 | 2014-04-10 14:27:31 +0000 | [diff] [blame] | 358 | mp_parse_node_struct_t *pn = m_new_obj_var_maybe(mp_parse_node_struct_t, mp_parse_node_t, num_args); |
| 359 | if (pn == NULL) { |
| 360 | memory_error(parser); |
| 361 | return; |
| 362 | } |
| 363 | pn->source_line = src_line; |
| 364 | pn->kind_num_nodes = (rule->rule_id & 0xff) | (num_args << 8); |
Damien George | 3816182 | 2014-07-03 14:13:33 +0100 | [diff] [blame] | 365 | for (mp_uint_t i = num_args; i > 0; i--) { |
Damien | 429d719 | 2013-10-04 19:53:11 +0100 | [diff] [blame] | 366 | pn->nodes[i - 1] = pop_result(parser); |
| 367 | } |
Damien | d99b052 | 2013-12-21 18:17:45 +0000 | [diff] [blame] | 368 | push_result_node(parser, (mp_parse_node_t)pn); |
Damien | 429d719 | 2013-10-04 19:53:11 +0100 | [diff] [blame] | 369 | } |
| 370 | |
Damien George | 0bfc763 | 2015-02-07 18:33:58 +0000 | [diff] [blame] | 371 | mp_parse_node_t mp_parse(mp_lexer_t *lex, mp_parse_input_kind_t input_kind) { |
Damien George | 69a818d | 2014-01-12 13:55:24 +0000 | [diff] [blame] | 372 | |
Damien George | 1b82e9a | 2014-05-10 17:36:41 +0100 | [diff] [blame] | 373 | // initialise parser and allocate memory for its stacks |
Damien George | 69a818d | 2014-01-12 13:55:24 +0000 | [diff] [blame] | 374 | |
Damien George | 1b82e9a | 2014-05-10 17:36:41 +0100 | [diff] [blame] | 375 | parser_t parser; |
Damien George | 69a818d | 2014-01-12 13:55:24 +0000 | [diff] [blame] | 376 | |
Damien George | 1b82e9a | 2014-05-10 17:36:41 +0100 | [diff] [blame] | 377 | parser.had_memory_error = false; |
Damien George | 58ba4c3 | 2014-04-10 14:27:31 +0000 | [diff] [blame] | 378 | |
Damien George | 58ebde4 | 2014-05-21 20:32:59 +0100 | [diff] [blame] | 379 | parser.rule_stack_alloc = MICROPY_ALLOC_PARSE_RULE_INIT; |
Damien George | 1b82e9a | 2014-05-10 17:36:41 +0100 | [diff] [blame] | 380 | parser.rule_stack_top = 0; |
| 381 | parser.rule_stack = m_new_maybe(rule_stack_t, parser.rule_stack_alloc); |
Damien | 429d719 | 2013-10-04 19:53:11 +0100 | [diff] [blame] | 382 | |
Damien George | 58ebde4 | 2014-05-21 20:32:59 +0100 | [diff] [blame] | 383 | parser.result_stack_alloc = MICROPY_ALLOC_PARSE_RESULT_INIT; |
Damien George | 1b82e9a | 2014-05-10 17:36:41 +0100 | [diff] [blame] | 384 | parser.result_stack_top = 0; |
| 385 | parser.result_stack = m_new_maybe(mp_parse_node_t, parser.result_stack_alloc); |
Damien | 429d719 | 2013-10-04 19:53:11 +0100 | [diff] [blame] | 386 | |
Damien George | 1b82e9a | 2014-05-10 17:36:41 +0100 | [diff] [blame] | 387 | parser.lexer = lex; |
| 388 | |
| 389 | // check if we could allocate the stacks |
| 390 | if (parser.rule_stack == NULL || parser.result_stack == NULL) { |
| 391 | goto memory_error; |
| 392 | } |
Damien George | 0833500 | 2014-01-18 23:24:36 +0000 | [diff] [blame] | 393 | |
Damien George | 69a818d | 2014-01-12 13:55:24 +0000 | [diff] [blame] | 394 | // work out the top-level rule to use, and push it on the stack |
Damien George | 3816182 | 2014-07-03 14:13:33 +0100 | [diff] [blame] | 395 | mp_uint_t top_level_rule; |
Damien | 5ac1b2e | 2013-10-18 19:58:12 +0100 | [diff] [blame] | 396 | switch (input_kind) { |
Damien | d99b052 | 2013-12-21 18:17:45 +0000 | [diff] [blame] | 397 | case MP_PARSE_SINGLE_INPUT: top_level_rule = RULE_single_input; break; |
Damien George | d02c6d8 | 2014-01-15 22:14:03 +0000 | [diff] [blame] | 398 | case MP_PARSE_EVAL_INPUT: top_level_rule = RULE_eval_input; break; |
Damien | 5ac1b2e | 2013-10-18 19:58:12 +0100 | [diff] [blame] | 399 | default: top_level_rule = RULE_file_input; |
| 400 | } |
Damien George | a4c52c5 | 2014-12-05 19:35:18 +0000 | [diff] [blame] | 401 | push_rule(&parser, lex->tok_line, rules[top_level_rule], 0); |
Damien | 429d719 | 2013-10-04 19:53:11 +0100 | [diff] [blame] | 402 | |
Damien George | 69a818d | 2014-01-12 13:55:24 +0000 | [diff] [blame] | 403 | // parse! |
| 404 | |
Damien George | 3816182 | 2014-07-03 14:13:33 +0100 | [diff] [blame] | 405 | mp_uint_t n, i; // state for the current rule |
| 406 | mp_uint_t rule_src_line; // source line for the first token matched by the current rule |
Damien | 429d719 | 2013-10-04 19:53:11 +0100 | [diff] [blame] | 407 | bool backtrack = false; |
Damien George | 0833500 | 2014-01-18 23:24:36 +0000 | [diff] [blame] | 408 | const rule_t *rule = NULL; |
Damien | 429d719 | 2013-10-04 19:53:11 +0100 | [diff] [blame] | 409 | |
| 410 | for (;;) { |
| 411 | next_rule: |
Damien George | 1b82e9a | 2014-05-10 17:36:41 +0100 | [diff] [blame] | 412 | if (parser.rule_stack_top == 0 || parser.had_memory_error) { |
Damien | 429d719 | 2013-10-04 19:53:11 +0100 | [diff] [blame] | 413 | break; |
| 414 | } |
| 415 | |
Damien George | 1b82e9a | 2014-05-10 17:36:41 +0100 | [diff] [blame] | 416 | pop_rule(&parser, &rule, &i, &rule_src_line); |
Damien | 429d719 | 2013-10-04 19:53:11 +0100 | [diff] [blame] | 417 | n = rule->act & RULE_ACT_ARG_MASK; |
| 418 | |
| 419 | /* |
| 420 | // debugging |
Damien George | 1b82e9a | 2014-05-10 17:36:41 +0100 | [diff] [blame] | 421 | printf("depth=%d ", parser.rule_stack_top); |
| 422 | for (int j = 0; j < parser.rule_stack_top; ++j) { |
Damien | 429d719 | 2013-10-04 19:53:11 +0100 | [diff] [blame] | 423 | printf(" "); |
| 424 | } |
| 425 | printf("%s n=%d i=%d bt=%d\n", rule->rule_name, n, i, backtrack); |
| 426 | */ |
| 427 | |
| 428 | switch (rule->act & RULE_ACT_KIND_MASK) { |
| 429 | case RULE_ACT_OR: |
| 430 | if (i > 0 && !backtrack) { |
| 431 | goto next_rule; |
| 432 | } else { |
| 433 | backtrack = false; |
| 434 | } |
| 435 | for (; i < n - 1; ++i) { |
| 436 | switch (rule->arg[i] & RULE_ARG_KIND_MASK) { |
| 437 | case RULE_ARG_TOK: |
Damien George | a4c52c5 | 2014-12-05 19:35:18 +0000 | [diff] [blame] | 438 | if (lex->tok_kind == (rule->arg[i] & RULE_ARG_ARG_MASK)) { |
| 439 | push_result_token(&parser); |
Damien | d99b052 | 2013-12-21 18:17:45 +0000 | [diff] [blame] | 440 | mp_lexer_to_next(lex); |
Damien | 429d719 | 2013-10-04 19:53:11 +0100 | [diff] [blame] | 441 | goto next_rule; |
| 442 | } |
| 443 | break; |
| 444 | case RULE_ARG_RULE: |
Damien George | d2d64f0 | 2015-01-14 21:32:42 +0000 | [diff] [blame] | 445 | rule_or_no_other_choice: |
Damien George | 1b82e9a | 2014-05-10 17:36:41 +0100 | [diff] [blame] | 446 | push_rule(&parser, rule_src_line, rule, i + 1); // save this or-rule |
| 447 | push_rule_from_arg(&parser, rule->arg[i]); // push child of or-rule |
Damien | 429d719 | 2013-10-04 19:53:11 +0100 | [diff] [blame] | 448 | goto next_rule; |
| 449 | default: |
| 450 | assert(0); |
Damien George | d2d64f0 | 2015-01-14 21:32:42 +0000 | [diff] [blame] | 451 | goto rule_or_no_other_choice; // to help flow control analysis |
Damien | 429d719 | 2013-10-04 19:53:11 +0100 | [diff] [blame] | 452 | } |
| 453 | } |
| 454 | if ((rule->arg[i] & RULE_ARG_KIND_MASK) == RULE_ARG_TOK) { |
Damien George | a4c52c5 | 2014-12-05 19:35:18 +0000 | [diff] [blame] | 455 | if (lex->tok_kind == (rule->arg[i] & RULE_ARG_ARG_MASK)) { |
| 456 | push_result_token(&parser); |
Damien | d99b052 | 2013-12-21 18:17:45 +0000 | [diff] [blame] | 457 | mp_lexer_to_next(lex); |
Damien | 429d719 | 2013-10-04 19:53:11 +0100 | [diff] [blame] | 458 | } else { |
| 459 | backtrack = true; |
| 460 | goto next_rule; |
| 461 | } |
| 462 | } else { |
Damien George | 1b82e9a | 2014-05-10 17:36:41 +0100 | [diff] [blame] | 463 | push_rule_from_arg(&parser, rule->arg[i]); |
Damien | 429d719 | 2013-10-04 19:53:11 +0100 | [diff] [blame] | 464 | } |
| 465 | break; |
| 466 | |
Damien George | 2870d85 | 2014-12-20 18:06:08 +0000 | [diff] [blame] | 467 | case RULE_ACT_AND: { |
Damien | 429d719 | 2013-10-04 19:53:11 +0100 | [diff] [blame] | 468 | |
| 469 | // failed, backtrack if we can, else syntax error |
| 470 | if (backtrack) { |
| 471 | assert(i > 0); |
| 472 | if ((rule->arg[i - 1] & RULE_ARG_KIND_MASK) == RULE_ARG_OPT_RULE) { |
| 473 | // an optional rule that failed, so continue with next arg |
Damien George | 1b82e9a | 2014-05-10 17:36:41 +0100 | [diff] [blame] | 474 | push_result_node(&parser, MP_PARSE_NODE_NULL); |
Damien | 429d719 | 2013-10-04 19:53:11 +0100 | [diff] [blame] | 475 | backtrack = false; |
| 476 | } else { |
| 477 | // a mandatory rule that failed, so propagate backtrack |
| 478 | if (i > 1) { |
| 479 | // already eaten tokens so can't backtrack |
| 480 | goto syntax_error; |
| 481 | } else { |
| 482 | goto next_rule; |
| 483 | } |
| 484 | } |
| 485 | } |
| 486 | |
| 487 | // progress through the rule |
| 488 | for (; i < n; ++i) { |
| 489 | switch (rule->arg[i] & RULE_ARG_KIND_MASK) { |
Damien George | 2870d85 | 2014-12-20 18:06:08 +0000 | [diff] [blame] | 490 | case RULE_ARG_TOK: { |
Damien | 429d719 | 2013-10-04 19:53:11 +0100 | [diff] [blame] | 491 | // need to match a token |
Damien George | 2870d85 | 2014-12-20 18:06:08 +0000 | [diff] [blame] | 492 | mp_token_kind_t tok_kind = rule->arg[i] & RULE_ARG_ARG_MASK; |
Damien George | a4c52c5 | 2014-12-05 19:35:18 +0000 | [diff] [blame] | 493 | if (lex->tok_kind == tok_kind) { |
Damien | 429d719 | 2013-10-04 19:53:11 +0100 | [diff] [blame] | 494 | // matched token |
Damien | d99b052 | 2013-12-21 18:17:45 +0000 | [diff] [blame] | 495 | if (tok_kind == MP_TOKEN_NAME) { |
Damien George | a4c52c5 | 2014-12-05 19:35:18 +0000 | [diff] [blame] | 496 | push_result_token(&parser); |
Damien | 429d719 | 2013-10-04 19:53:11 +0100 | [diff] [blame] | 497 | } |
Damien | d99b052 | 2013-12-21 18:17:45 +0000 | [diff] [blame] | 498 | mp_lexer_to_next(lex); |
Damien | 429d719 | 2013-10-04 19:53:11 +0100 | [diff] [blame] | 499 | } else { |
| 500 | // failed to match token |
| 501 | if (i > 0) { |
| 502 | // already eaten tokens so can't backtrack |
| 503 | goto syntax_error; |
| 504 | } else { |
| 505 | // this rule failed, so backtrack |
| 506 | backtrack = true; |
| 507 | goto next_rule; |
| 508 | } |
| 509 | } |
| 510 | break; |
Damien George | d2d64f0 | 2015-01-14 21:32:42 +0000 | [diff] [blame] | 511 | } |
Damien | 429d719 | 2013-10-04 19:53:11 +0100 | [diff] [blame] | 512 | case RULE_ARG_RULE: |
Damien | 429d719 | 2013-10-04 19:53:11 +0100 | [diff] [blame] | 513 | case RULE_ARG_OPT_RULE: |
Damien George | d2d64f0 | 2015-01-14 21:32:42 +0000 | [diff] [blame] | 514 | rule_and_no_other_choice: |
Damien George | 1b82e9a | 2014-05-10 17:36:41 +0100 | [diff] [blame] | 515 | push_rule(&parser, rule_src_line, rule, i + 1); // save this and-rule |
| 516 | push_rule_from_arg(&parser, rule->arg[i]); // push child of and-rule |
Damien | 429d719 | 2013-10-04 19:53:11 +0100 | [diff] [blame] | 517 | goto next_rule; |
| 518 | default: |
| 519 | assert(0); |
Damien George | d2d64f0 | 2015-01-14 21:32:42 +0000 | [diff] [blame] | 520 | goto rule_and_no_other_choice; // to help flow control analysis |
Damien | 429d719 | 2013-10-04 19:53:11 +0100 | [diff] [blame] | 521 | } |
| 522 | } |
| 523 | |
| 524 | assert(i == n); |
| 525 | |
| 526 | // matched the rule, so now build the corresponding parse_node |
| 527 | |
| 528 | // count number of arguments for the parse_node |
| 529 | i = 0; |
Damien George | 2870d85 | 2014-12-20 18:06:08 +0000 | [diff] [blame] | 530 | bool emit_rule = false; |
Damien George | 3816182 | 2014-07-03 14:13:33 +0100 | [diff] [blame] | 531 | for (mp_uint_t x = 0; x < n; ++x) { |
Damien | 429d719 | 2013-10-04 19:53:11 +0100 | [diff] [blame] | 532 | if ((rule->arg[x] & RULE_ARG_KIND_MASK) == RULE_ARG_TOK) { |
Damien George | 2870d85 | 2014-12-20 18:06:08 +0000 | [diff] [blame] | 533 | mp_token_kind_t tok_kind = rule->arg[x] & RULE_ARG_ARG_MASK; |
Damien | d99b052 | 2013-12-21 18:17:45 +0000 | [diff] [blame] | 534 | if (tok_kind >= MP_TOKEN_NAME) { |
Damien | 429d719 | 2013-10-04 19:53:11 +0100 | [diff] [blame] | 535 | emit_rule = true; |
| 536 | } |
Damien | d99b052 | 2013-12-21 18:17:45 +0000 | [diff] [blame] | 537 | if (tok_kind == MP_TOKEN_NAME) { |
Damien | 429d719 | 2013-10-04 19:53:11 +0100 | [diff] [blame] | 538 | // only tokens which were names are pushed to stack |
| 539 | i += 1; |
| 540 | } |
| 541 | } else { |
| 542 | // rules are always pushed |
| 543 | i += 1; |
| 544 | } |
| 545 | } |
| 546 | |
Damien George | 5042bce | 2014-05-25 22:06:06 +0100 | [diff] [blame] | 547 | #if !MICROPY_EMIT_CPYTHON && !MICROPY_ENABLE_DOC_STRING |
| 548 | // this code discards lonely statements, such as doc strings |
Damien George | 1b82e9a | 2014-05-10 17:36:41 +0100 | [diff] [blame] | 549 | if (input_kind != MP_PARSE_SINGLE_INPUT && rule->rule_id == RULE_expr_stmt && peek_result(&parser, 0) == MP_PARSE_NODE_NULL) { |
| 550 | mp_parse_node_t p = peek_result(&parser, 1); |
Damien George | 5042bce | 2014-05-25 22:06:06 +0100 | [diff] [blame] | 551 | if ((MP_PARSE_NODE_IS_LEAF(p) && !MP_PARSE_NODE_IS_ID(p)) || MP_PARSE_NODE_IS_STRUCT_KIND(p, RULE_string)) { |
Damien George | 52b5d76 | 2014-09-23 15:31:56 +0000 | [diff] [blame] | 552 | pop_result(&parser); // MP_PARSE_NODE_NULL |
| 553 | mp_parse_node_free(pop_result(&parser)); // RULE_string |
Damien George | 1b82e9a | 2014-05-10 17:36:41 +0100 | [diff] [blame] | 554 | push_result_rule(&parser, rule_src_line, rules[RULE_pass_stmt], 0); |
Damien George | 93afa23 | 2014-05-06 21:44:11 +0100 | [diff] [blame] | 555 | break; |
| 556 | } |
| 557 | } |
| 558 | #endif |
| 559 | |
Damien | 429d719 | 2013-10-04 19:53:11 +0100 | [diff] [blame] | 560 | // always emit these rules, even if they have only 1 argument |
| 561 | if (rule->rule_id == RULE_expr_stmt || rule->rule_id == RULE_yield_stmt) { |
| 562 | emit_rule = true; |
| 563 | } |
| 564 | |
Damien George | b47ea4e | 2014-12-20 18:37:50 +0000 | [diff] [blame] | 565 | // if a rule has the RULE_ACT_ALLOW_IDENT bit set then this |
| 566 | // rule should not be emitted if it has only 1 argument |
| 567 | // NOTE: can't set this flag for atom_paren because we need it |
| 568 | // to distinguish, for example, [a,b] from [(a,b)] |
| 569 | // TODO possibly set for: varargslist_name, varargslist_equal |
| 570 | if (rule->act & RULE_ACT_ALLOW_IDENT) { |
Damien | 429d719 | 2013-10-04 19:53:11 +0100 | [diff] [blame] | 571 | emit_rule = false; |
| 572 | } |
| 573 | |
| 574 | // always emit these rules, and add an extra blank node at the end (to be used by the compiler to store data) |
Damien George | b47ea4e | 2014-12-20 18:37:50 +0000 | [diff] [blame] | 575 | if (ADD_BLANK_NODE(rule)) { |
Damien | 429d719 | 2013-10-04 19:53:11 +0100 | [diff] [blame] | 576 | emit_rule = true; |
Damien George | 1b82e9a | 2014-05-10 17:36:41 +0100 | [diff] [blame] | 577 | push_result_node(&parser, MP_PARSE_NODE_NULL); |
Damien | 429d719 | 2013-10-04 19:53:11 +0100 | [diff] [blame] | 578 | i += 1; |
| 579 | } |
| 580 | |
Damien George | 3816182 | 2014-07-03 14:13:33 +0100 | [diff] [blame] | 581 | mp_uint_t num_not_nil = 0; |
| 582 | for (mp_uint_t x = 0; x < i; ++x) { |
Damien George | 1b82e9a | 2014-05-10 17:36:41 +0100 | [diff] [blame] | 583 | if (peek_result(&parser, x) != MP_PARSE_NODE_NULL) { |
Damien | 429d719 | 2013-10-04 19:53:11 +0100 | [diff] [blame] | 584 | num_not_nil += 1; |
| 585 | } |
| 586 | } |
| 587 | //printf("done and %s n=%d i=%d notnil=%d\n", rule->rule_name, n, i, num_not_nil); |
| 588 | if (emit_rule) { |
Damien George | 1b82e9a | 2014-05-10 17:36:41 +0100 | [diff] [blame] | 589 | push_result_rule(&parser, rule_src_line, rule, i); |
Damien | 429d719 | 2013-10-04 19:53:11 +0100 | [diff] [blame] | 590 | } else if (num_not_nil == 0) { |
Damien George | 1b82e9a | 2014-05-10 17:36:41 +0100 | [diff] [blame] | 591 | push_result_rule(&parser, rule_src_line, rule, i); // needed for, eg, atom_paren, testlist_comp_3b |
Damien | 429d719 | 2013-10-04 19:53:11 +0100 | [diff] [blame] | 592 | //result_stack_show(parser); |
| 593 | //assert(0); |
| 594 | } else if (num_not_nil == 1) { |
| 595 | // single result, leave it on stack |
Damien | d99b052 | 2013-12-21 18:17:45 +0000 | [diff] [blame] | 596 | mp_parse_node_t pn = MP_PARSE_NODE_NULL; |
Damien George | 3816182 | 2014-07-03 14:13:33 +0100 | [diff] [blame] | 597 | for (mp_uint_t x = 0; x < i; ++x) { |
Damien George | 1b82e9a | 2014-05-10 17:36:41 +0100 | [diff] [blame] | 598 | mp_parse_node_t pn2 = pop_result(&parser); |
Damien | d99b052 | 2013-12-21 18:17:45 +0000 | [diff] [blame] | 599 | if (pn2 != MP_PARSE_NODE_NULL) { |
Damien | 429d719 | 2013-10-04 19:53:11 +0100 | [diff] [blame] | 600 | pn = pn2; |
| 601 | } |
| 602 | } |
Damien George | 1b82e9a | 2014-05-10 17:36:41 +0100 | [diff] [blame] | 603 | push_result_node(&parser, pn); |
Damien | 429d719 | 2013-10-04 19:53:11 +0100 | [diff] [blame] | 604 | } else { |
Damien George | 1b82e9a | 2014-05-10 17:36:41 +0100 | [diff] [blame] | 605 | push_result_rule(&parser, rule_src_line, rule, i); |
Damien | 429d719 | 2013-10-04 19:53:11 +0100 | [diff] [blame] | 606 | } |
| 607 | break; |
Damien George | 2870d85 | 2014-12-20 18:06:08 +0000 | [diff] [blame] | 608 | } |
Damien | 429d719 | 2013-10-04 19:53:11 +0100 | [diff] [blame] | 609 | |
Damien George | 2870d85 | 2014-12-20 18:06:08 +0000 | [diff] [blame] | 610 | case RULE_ACT_LIST: { |
Damien | 429d719 | 2013-10-04 19:53:11 +0100 | [diff] [blame] | 611 | // n=2 is: item item* |
| 612 | // n=1 is: item (sep item)* |
| 613 | // n=3 is: item (sep item)* [sep] |
Damien George | 2870d85 | 2014-12-20 18:06:08 +0000 | [diff] [blame] | 614 | bool had_trailing_sep; |
Damien | 429d719 | 2013-10-04 19:53:11 +0100 | [diff] [blame] | 615 | if (backtrack) { |
| 616 | list_backtrack: |
| 617 | had_trailing_sep = false; |
| 618 | if (n == 2) { |
| 619 | if (i == 1) { |
| 620 | // fail on item, first time round; propagate backtrack |
| 621 | goto next_rule; |
| 622 | } else { |
| 623 | // fail on item, in later rounds; finish with this rule |
| 624 | backtrack = false; |
| 625 | } |
| 626 | } else { |
| 627 | if (i == 1) { |
| 628 | // fail on item, first time round; propagate backtrack |
| 629 | goto next_rule; |
| 630 | } else if ((i & 1) == 1) { |
| 631 | // fail on item, in later rounds; have eaten tokens so can't backtrack |
| 632 | if (n == 3) { |
| 633 | // list allows trailing separator; finish parsing list |
| 634 | had_trailing_sep = true; |
| 635 | backtrack = false; |
| 636 | } else { |
| 637 | // list doesn't allowing trailing separator; fail |
| 638 | goto syntax_error; |
| 639 | } |
| 640 | } else { |
| 641 | // fail on separator; finish parsing list |
| 642 | backtrack = false; |
| 643 | } |
| 644 | } |
| 645 | } else { |
| 646 | for (;;) { |
Damien George | 3816182 | 2014-07-03 14:13:33 +0100 | [diff] [blame] | 647 | mp_uint_t arg = rule->arg[i & 1 & n]; |
Damien | 429d719 | 2013-10-04 19:53:11 +0100 | [diff] [blame] | 648 | switch (arg & RULE_ARG_KIND_MASK) { |
| 649 | case RULE_ARG_TOK: |
Damien George | a4c52c5 | 2014-12-05 19:35:18 +0000 | [diff] [blame] | 650 | if (lex->tok_kind == (arg & RULE_ARG_ARG_MASK)) { |
Damien | 429d719 | 2013-10-04 19:53:11 +0100 | [diff] [blame] | 651 | if (i & 1 & n) { |
| 652 | // separators which are tokens are not pushed to result stack |
| 653 | } else { |
Damien George | a4c52c5 | 2014-12-05 19:35:18 +0000 | [diff] [blame] | 654 | push_result_token(&parser); |
Damien | 429d719 | 2013-10-04 19:53:11 +0100 | [diff] [blame] | 655 | } |
Damien | d99b052 | 2013-12-21 18:17:45 +0000 | [diff] [blame] | 656 | mp_lexer_to_next(lex); |
Damien | 429d719 | 2013-10-04 19:53:11 +0100 | [diff] [blame] | 657 | // got element of list, so continue parsing list |
| 658 | i += 1; |
| 659 | } else { |
| 660 | // couldn't get element of list |
| 661 | i += 1; |
| 662 | backtrack = true; |
| 663 | goto list_backtrack; |
| 664 | } |
| 665 | break; |
| 666 | case RULE_ARG_RULE: |
Damien George | d2d64f0 | 2015-01-14 21:32:42 +0000 | [diff] [blame] | 667 | rule_list_no_other_choice: |
Damien George | 1b82e9a | 2014-05-10 17:36:41 +0100 | [diff] [blame] | 668 | push_rule(&parser, rule_src_line, rule, i + 1); // save this list-rule |
| 669 | push_rule_from_arg(&parser, arg); // push child of list-rule |
Damien | 429d719 | 2013-10-04 19:53:11 +0100 | [diff] [blame] | 670 | goto next_rule; |
| 671 | default: |
| 672 | assert(0); |
Damien George | d2d64f0 | 2015-01-14 21:32:42 +0000 | [diff] [blame] | 673 | goto rule_list_no_other_choice; // to help flow control analysis |
Damien | 429d719 | 2013-10-04 19:53:11 +0100 | [diff] [blame] | 674 | } |
| 675 | } |
| 676 | } |
| 677 | assert(i >= 1); |
| 678 | |
| 679 | // compute number of elements in list, result in i |
| 680 | i -= 1; |
| 681 | if ((n & 1) && (rule->arg[1] & RULE_ARG_KIND_MASK) == RULE_ARG_TOK) { |
| 682 | // don't count separators when they are tokens |
| 683 | i = (i + 1) / 2; |
| 684 | } |
| 685 | |
| 686 | if (i == 1) { |
| 687 | // list matched single item |
| 688 | if (had_trailing_sep) { |
| 689 | // if there was a trailing separator, make a list of a single item |
Damien George | 1b82e9a | 2014-05-10 17:36:41 +0100 | [diff] [blame] | 690 | push_result_rule(&parser, rule_src_line, rule, i); |
Damien | 429d719 | 2013-10-04 19:53:11 +0100 | [diff] [blame] | 691 | } else { |
| 692 | // just leave single item on stack (ie don't wrap in a list) |
| 693 | } |
| 694 | } else { |
| 695 | //printf("done list %s %d %d\n", rule->rule_name, n, i); |
Damien George | 1b82e9a | 2014-05-10 17:36:41 +0100 | [diff] [blame] | 696 | push_result_rule(&parser, rule_src_line, rule, i); |
Damien | 429d719 | 2013-10-04 19:53:11 +0100 | [diff] [blame] | 697 | } |
| 698 | break; |
Damien George | 2870d85 | 2014-12-20 18:06:08 +0000 | [diff] [blame] | 699 | } |
Damien | 429d719 | 2013-10-04 19:53:11 +0100 | [diff] [blame] | 700 | |
| 701 | default: |
| 702 | assert(0); |
| 703 | } |
| 704 | } |
Damien | 91d387d | 2013-10-09 15:09:52 +0100 | [diff] [blame] | 705 | |
Damien George | f804833 | 2015-02-08 13:40:20 +0000 | [diff] [blame^] | 706 | mp_obj_t exc; |
| 707 | mp_parse_node_t result; |
Damien George | 58ba4c3 | 2014-04-10 14:27:31 +0000 | [diff] [blame] | 708 | |
| 709 | // check if we had a memory error |
Damien George | 1b82e9a | 2014-05-10 17:36:41 +0100 | [diff] [blame] | 710 | if (parser.had_memory_error) { |
| 711 | memory_error: |
Damien George | 0bfc763 | 2015-02-07 18:33:58 +0000 | [diff] [blame] | 712 | exc = mp_obj_new_exception_msg(&mp_type_MemoryError, |
| 713 | "parser could not allocate enough memory"); |
Damien George | f804833 | 2015-02-08 13:40:20 +0000 | [diff] [blame^] | 714 | result = MP_PARSE_NODE_NULL; |
Damien George | 58ba4c3 | 2014-04-10 14:27:31 +0000 | [diff] [blame] | 715 | goto finished; |
Damien George | 58ba4c3 | 2014-04-10 14:27:31 +0000 | [diff] [blame] | 716 | } |
| 717 | |
Damien | 91d387d | 2013-10-09 15:09:52 +0100 | [diff] [blame] | 718 | // check we are at the end of the token stream |
Damien George | a4c52c5 | 2014-12-05 19:35:18 +0000 | [diff] [blame] | 719 | if (lex->tok_kind != MP_TOKEN_END) { |
Damien | 91d387d | 2013-10-09 15:09:52 +0100 | [diff] [blame] | 720 | goto syntax_error; |
Damien | 429d719 | 2013-10-04 19:53:11 +0100 | [diff] [blame] | 721 | } |
Damien | 91d387d | 2013-10-09 15:09:52 +0100 | [diff] [blame] | 722 | |
Damien | 429d719 | 2013-10-04 19:53:11 +0100 | [diff] [blame] | 723 | //printf("--------------\n"); |
| 724 | //result_stack_show(parser); |
Damien George | 1b82e9a | 2014-05-10 17:36:41 +0100 | [diff] [blame] | 725 | //printf("rule stack alloc: %d\n", parser.rule_stack_alloc); |
| 726 | //printf("result stack alloc: %d\n", parser.result_stack_alloc); |
Damien | 429d719 | 2013-10-04 19:53:11 +0100 | [diff] [blame] | 727 | //printf("number of parse nodes allocated: %d\n", num_parse_nodes_allocated); |
Damien George | 69a818d | 2014-01-12 13:55:24 +0000 | [diff] [blame] | 728 | |
| 729 | // get the root parse node that we created |
Damien George | 1b82e9a | 2014-05-10 17:36:41 +0100 | [diff] [blame] | 730 | assert(parser.result_stack_top == 1); |
Damien George | f804833 | 2015-02-08 13:40:20 +0000 | [diff] [blame^] | 731 | exc = MP_OBJ_NULL; |
Damien George | 1b82e9a | 2014-05-10 17:36:41 +0100 | [diff] [blame] | 732 | result = parser.result_stack[0]; |
Damien George | 69a818d | 2014-01-12 13:55:24 +0000 | [diff] [blame] | 733 | |
| 734 | finished: |
| 735 | // free the memory that we don't need anymore |
Damien George | 1b82e9a | 2014-05-10 17:36:41 +0100 | [diff] [blame] | 736 | m_del(rule_stack_t, parser.rule_stack, parser.rule_stack_alloc); |
| 737 | m_del(mp_parse_node_t, parser.result_stack, parser.result_stack_alloc); |
Damien George | 0bfc763 | 2015-02-07 18:33:58 +0000 | [diff] [blame] | 738 | // we also free the lexer on behalf of the caller (see below) |
Damien George | 69a818d | 2014-01-12 13:55:24 +0000 | [diff] [blame] | 739 | |
Damien George | 0bfc763 | 2015-02-07 18:33:58 +0000 | [diff] [blame] | 740 | if (exc != MP_OBJ_NULL) { |
| 741 | // had an error so raise the exception |
| 742 | // add traceback to give info about file name and location |
| 743 | // we don't have a 'block' name, so just pass the NULL qstr to indicate this |
| 744 | mp_obj_exception_add_traceback(exc, lex->source_name, lex->tok_line, MP_QSTR_NULL); |
| 745 | mp_lexer_free(lex); |
| 746 | nlr_raise(exc); |
| 747 | } else { |
| 748 | mp_lexer_free(lex); |
| 749 | return result; |
| 750 | } |
Damien | 429d719 | 2013-10-04 19:53:11 +0100 | [diff] [blame] | 751 | |
| 752 | syntax_error: |
Damien George | a4c52c5 | 2014-12-05 19:35:18 +0000 | [diff] [blame] | 753 | if (lex->tok_kind == MP_TOKEN_INDENT) { |
Damien George | 0bfc763 | 2015-02-07 18:33:58 +0000 | [diff] [blame] | 754 | exc = mp_obj_new_exception_msg(&mp_type_IndentationError, |
| 755 | "unexpected indent"); |
Damien George | a4c52c5 | 2014-12-05 19:35:18 +0000 | [diff] [blame] | 756 | } else if (lex->tok_kind == MP_TOKEN_DEDENT_MISMATCH) { |
Damien George | 0bfc763 | 2015-02-07 18:33:58 +0000 | [diff] [blame] | 757 | exc = mp_obj_new_exception_msg(&mp_type_IndentationError, |
| 758 | "unindent does not match any outer indentation level"); |
Damien | 91d387d | 2013-10-09 15:09:52 +0100 | [diff] [blame] | 759 | } else { |
Damien George | 0bfc763 | 2015-02-07 18:33:58 +0000 | [diff] [blame] | 760 | exc = mp_obj_new_exception_msg(&mp_type_SyntaxError, |
| 761 | "invalid syntax"); |
Damien | 429d719 | 2013-10-04 19:53:11 +0100 | [diff] [blame] | 762 | #ifdef USE_RULE_NAME |
Damien George | 9528cd6 | 2014-01-15 21:23:31 +0000 | [diff] [blame] | 763 | // debugging: print the rule name that failed and the token |
Damien George | c596612 | 2014-02-15 16:10:44 +0000 | [diff] [blame] | 764 | printf("rule: %s\n", rule->rule_name); |
| 765 | #if MICROPY_DEBUG_PRINTERS |
Damien George | a4c52c5 | 2014-12-05 19:35:18 +0000 | [diff] [blame] | 766 | mp_token_show(lex); |
Damien George | 9528cd6 | 2014-01-15 21:23:31 +0000 | [diff] [blame] | 767 | #endif |
Damien George | c596612 | 2014-02-15 16:10:44 +0000 | [diff] [blame] | 768 | #endif |
Damien | 91d387d | 2013-10-09 15:09:52 +0100 | [diff] [blame] | 769 | } |
Damien George | f804833 | 2015-02-08 13:40:20 +0000 | [diff] [blame^] | 770 | result = MP_PARSE_NODE_NULL; |
Damien George | 69a818d | 2014-01-12 13:55:24 +0000 | [diff] [blame] | 771 | goto finished; |
Damien | 429d719 | 2013-10-04 19:53:11 +0100 | [diff] [blame] | 772 | } |