blob: afe8711a2d29af43e663bd3a4a22bd60841857a7 [file] [log] [blame]
Damien George04b91472014-05-03 23:27:38 +01001/*
2 * This file is part of the Micro Python project, http://micropython.org/
3 *
4 * The MIT License (MIT)
5 *
Damien George4735c452015-04-21 16:43:18 +00006 * Copyright (c) 2013-2015 Damien P. George
Damien George04b91472014-05-03 23:27:38 +01007 *
8 * Permission is hereby granted, free of charge, to any person obtaining a copy
9 * of this software and associated documentation files (the "Software"), to deal
10 * in the Software without restriction, including without limitation the rights
11 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
12 * copies of the Software, and to permit persons to whom the Software is
13 * furnished to do so, subject to the following conditions:
14 *
15 * The above copyright notice and this permission notice shall be included in
16 * all copies or substantial portions of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
21 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
22 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
23 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
24 * THE SOFTWARE.
25 */
26
xbeefe34222014-03-16 00:14:26 -070027#include <stdbool.h>
Damien429d7192013-10-04 19:53:11 +010028#include <stdint.h>
29#include <stdio.h>
Damien429d7192013-10-04 19:53:11 +010030#include <assert.h>
Damien George5042bce2014-05-25 22:06:06 +010031#include <string.h>
Damien429d7192013-10-04 19:53:11 +010032
Damien George0bfc7632015-02-07 18:33:58 +000033#include "py/nlr.h"
Damien George51dfcb42015-01-01 20:27:54 +000034#include "py/lexer.h"
35#include "py/parse.h"
36#include "py/parsenum.h"
37#include "py/smallint.h"
Damien George64f2b212015-10-08 14:58:15 +010038#include "py/runtime.h"
39#include "py/builtin.h"
Damien429d7192013-10-04 19:53:11 +010040
Damien429d7192013-10-04 19:53:11 +010041#define RULE_ACT_ARG_MASK (0x0f)
Damien Georgeb47ea4e2014-12-20 18:37:50 +000042#define RULE_ACT_KIND_MASK (0x30)
43#define RULE_ACT_ALLOW_IDENT (0x40)
44#define RULE_ACT_ADD_BLANK (0x80)
Damien429d7192013-10-04 19:53:11 +010045#define RULE_ACT_OR (0x10)
46#define RULE_ACT_AND (0x20)
47#define RULE_ACT_LIST (0x30)
48
Damien429d7192013-10-04 19:53:11 +010049#define RULE_ARG_KIND_MASK (0xf000)
50#define RULE_ARG_ARG_MASK (0x0fff)
51#define RULE_ARG_TOK (0x1000)
52#define RULE_ARG_RULE (0x2000)
Damien George4735c452015-04-21 16:43:18 +000053#define RULE_ARG_OPT_RULE (0x3000)
Damien429d7192013-10-04 19:53:11 +010054
Damien Georgeb47ea4e2014-12-20 18:37:50 +000055#define ADD_BLANK_NODE(rule) ((rule->act & RULE_ACT_ADD_BLANK) != 0)
Damien Georgeb829b5c2014-01-25 13:51:19 +000056
Damien429d7192013-10-04 19:53:11 +010057// (un)comment to use rule names; for debugging
58//#define USE_RULE_NAME (1)
59
60typedef struct _rule_t {
61 byte rule_id;
62 byte act;
63#ifdef USE_RULE_NAME
64 const char *rule_name;
65#endif
66 uint16_t arg[];
67} rule_t;
68
69enum {
Damien George00208ce2014-01-23 00:00:53 +000070#define DEF_RULE(rule, comp, kind, ...) RULE_##rule,
Damien George51dfcb42015-01-01 20:27:54 +000071#include "py/grammar.h"
Damien429d7192013-10-04 19:53:11 +010072#undef DEF_RULE
73 RULE_maximum_number_of,
Damien George5042bce2014-05-25 22:06:06 +010074 RULE_string, // special node for non-interned string
Damien George4c81ba82015-01-13 16:21:23 +000075 RULE_bytes, // special node for non-interned bytes
Damien George7d414a12015-02-08 01:57:40 +000076 RULE_const_object, // special node for a constant, generic Python object
Damien429d7192013-10-04 19:53:11 +010077};
78
Damien Georgeb47ea4e2014-12-20 18:37:50 +000079#define ident (RULE_ACT_ALLOW_IDENT)
80#define blank (RULE_ACT_ADD_BLANK)
Damien429d7192013-10-04 19:53:11 +010081#define or(n) (RULE_ACT_OR | n)
82#define and(n) (RULE_ACT_AND | n)
83#define one_or_more (RULE_ACT_LIST | 2)
84#define list (RULE_ACT_LIST | 1)
85#define list_with_end (RULE_ACT_LIST | 3)
Damiend99b0522013-12-21 18:17:45 +000086#define tok(t) (RULE_ARG_TOK | MP_TOKEN_##t)
Damien429d7192013-10-04 19:53:11 +010087#define rule(r) (RULE_ARG_RULE | RULE_##r)
Damien429d7192013-10-04 19:53:11 +010088#define opt_rule(r) (RULE_ARG_OPT_RULE | RULE_##r)
89#ifdef USE_RULE_NAME
Damien George00208ce2014-01-23 00:00:53 +000090#define DEF_RULE(rule, comp, kind, ...) static const rule_t rule_##rule = { RULE_##rule, kind, #rule, { __VA_ARGS__ } };
Damien429d7192013-10-04 19:53:11 +010091#else
Damien George00208ce2014-01-23 00:00:53 +000092#define DEF_RULE(rule, comp, kind, ...) static const rule_t rule_##rule = { RULE_##rule, kind, { __VA_ARGS__ } };
Damien429d7192013-10-04 19:53:11 +010093#endif
Damien George51dfcb42015-01-01 20:27:54 +000094#include "py/grammar.h"
Damien429d7192013-10-04 19:53:11 +010095#undef or
96#undef and
97#undef list
98#undef list_with_end
99#undef tok
100#undef rule
Damien429d7192013-10-04 19:53:11 +0100101#undef opt_rule
102#undef one_or_more
103#undef DEF_RULE
104
Paul Sokolovsky520e2f52014-02-12 18:31:30 +0200105STATIC const rule_t *rules[] = {
Damien George00208ce2014-01-23 00:00:53 +0000106#define DEF_RULE(rule, comp, kind, ...) &rule_##rule,
Damien George51dfcb42015-01-01 20:27:54 +0000107#include "py/grammar.h"
Damien429d7192013-10-04 19:53:11 +0100108#undef DEF_RULE
109};
110
111typedef struct _rule_stack_t {
Damien George5c670ac2015-01-24 23:12:58 +0000112 mp_uint_t src_line : BITS_PER_WORD - 8; // maximum bits storing source line number
113 mp_uint_t rule_id : 8; // this must be large enough to fit largest rule number
114 mp_uint_t arg_i; // this dictates the maximum nodes in a "list" of things
Damien429d7192013-10-04 19:53:11 +0100115} rule_stack_t;
116
Damien George58e0f4a2015-09-23 10:50:43 +0100117typedef struct _mp_parse_chunk_t {
118 mp_uint_t alloc;
119 union {
120 mp_uint_t used;
121 struct _mp_parse_chunk_t *next;
122 } union_;
123 byte data[];
124} mp_parse_chunk_t;
125
Damien George64f2b212015-10-08 14:58:15 +0100126typedef enum {
127 PARSE_ERROR_NONE = 0,
128 PARSE_ERROR_MEMORY,
129 PARSE_ERROR_CONST,
130} parse_error_t;
131
Damien429d7192013-10-04 19:53:11 +0100132typedef struct _parser_t {
Damien George64f2b212015-10-08 14:58:15 +0100133 parse_error_t parse_error;
Damien George58ba4c32014-04-10 14:27:31 +0000134
Damien George38161822014-07-03 14:13:33 +0100135 mp_uint_t rule_stack_alloc;
136 mp_uint_t rule_stack_top;
Damien429d7192013-10-04 19:53:11 +0100137 rule_stack_t *rule_stack;
138
Damien George38161822014-07-03 14:13:33 +0100139 mp_uint_t result_stack_alloc;
140 mp_uint_t result_stack_top;
Damiend99b0522013-12-21 18:17:45 +0000141 mp_parse_node_t *result_stack;
Damien George08335002014-01-18 23:24:36 +0000142
143 mp_lexer_t *lexer;
Damien George58e0f4a2015-09-23 10:50:43 +0100144
145 mp_parse_tree_t tree;
146 mp_parse_chunk_t *cur_chunk;
Damien429d7192013-10-04 19:53:11 +0100147
Damien George64f2b212015-10-08 14:58:15 +0100148 #if MICROPY_COMP_CONST
149 mp_map_t consts;
150 #endif
151} parser_t;
Damien George58ba4c32014-04-10 14:27:31 +0000152
Damien George58e0f4a2015-09-23 10:50:43 +0100153STATIC void *parser_alloc(parser_t *parser, size_t num_bytes) {
154 // use a custom memory allocator to store parse nodes sequentially in large chunks
155
156 mp_parse_chunk_t *chunk = parser->cur_chunk;
157
158 if (chunk != NULL && chunk->union_.used + num_bytes > chunk->alloc) {
159 // not enough room at end of previously allocated chunk so try to grow
160 mp_parse_chunk_t *new_data = (mp_parse_chunk_t*)m_renew_maybe(byte, chunk,
161 sizeof(mp_parse_chunk_t) + chunk->alloc,
162 sizeof(mp_parse_chunk_t) + chunk->alloc + num_bytes, false);
163 if (new_data == NULL) {
164 // could not grow existing memory; shrink it to fit previous
165 (void)m_renew(byte, chunk, sizeof(mp_parse_chunk_t) + chunk->alloc,
166 sizeof(mp_parse_chunk_t) + chunk->union_.used);
167 chunk->alloc = chunk->union_.used;
168 chunk->union_.next = parser->tree.chunk;
169 parser->tree.chunk = chunk;
170 chunk = NULL;
171 } else {
172 // could grow existing memory
173 chunk->alloc += num_bytes;
174 }
175 }
176
177 if (chunk == NULL) {
178 // no previous chunk, allocate a new chunk
179 size_t alloc = MICROPY_ALLOC_PARSE_CHUNK_INIT;
180 if (alloc < num_bytes) {
181 alloc = num_bytes;
182 }
183 chunk = (mp_parse_chunk_t*)m_new(byte, sizeof(mp_parse_chunk_t) + alloc);
184 chunk->alloc = alloc;
185 chunk->union_.used = 0;
186 parser->cur_chunk = chunk;
187 }
188
189 byte *ret = chunk->data + chunk->union_.used;
190 chunk->union_.used += num_bytes;
191 return ret;
192}
193
Damien George38161822014-07-03 14:13:33 +0100194STATIC void push_rule(parser_t *parser, mp_uint_t src_line, const rule_t *rule, mp_uint_t arg_i) {
Damien George64f2b212015-10-08 14:58:15 +0100195 if (parser->parse_error) {
Damien George58ba4c32014-04-10 14:27:31 +0000196 return;
197 }
Damien429d7192013-10-04 19:53:11 +0100198 if (parser->rule_stack_top >= parser->rule_stack_alloc) {
Damien Georgeade9a052015-06-13 21:53:22 +0100199 rule_stack_t *rs = m_renew_maybe(rule_stack_t, parser->rule_stack, parser->rule_stack_alloc, parser->rule_stack_alloc + MICROPY_ALLOC_PARSE_RULE_INC, true);
Damien George58ba4c32014-04-10 14:27:31 +0000200 if (rs == NULL) {
Damien George64f2b212015-10-08 14:58:15 +0100201 parser->parse_error = PARSE_ERROR_MEMORY;
Damien George58ba4c32014-04-10 14:27:31 +0000202 return;
203 }
204 parser->rule_stack = rs;
Damien George58ebde42014-05-21 20:32:59 +0100205 parser->rule_stack_alloc += MICROPY_ALLOC_PARSE_RULE_INC;
Damien429d7192013-10-04 19:53:11 +0100206 }
Damien George08335002014-01-18 23:24:36 +0000207 rule_stack_t *rs = &parser->rule_stack[parser->rule_stack_top++];
208 rs->src_line = src_line;
209 rs->rule_id = rule->rule_id;
210 rs->arg_i = arg_i;
Damien429d7192013-10-04 19:53:11 +0100211}
212
Damien George38161822014-07-03 14:13:33 +0100213STATIC void push_rule_from_arg(parser_t *parser, mp_uint_t arg) {
Damien429d7192013-10-04 19:53:11 +0100214 assert((arg & RULE_ARG_KIND_MASK) == RULE_ARG_RULE || (arg & RULE_ARG_KIND_MASK) == RULE_ARG_OPT_RULE);
Damien George38161822014-07-03 14:13:33 +0100215 mp_uint_t rule_id = arg & RULE_ARG_ARG_MASK;
Damien429d7192013-10-04 19:53:11 +0100216 assert(rule_id < RULE_maximum_number_of);
Damien Georgea4c52c52014-12-05 19:35:18 +0000217 push_rule(parser, parser->lexer->tok_line, rules[rule_id], 0);
Damien429d7192013-10-04 19:53:11 +0100218}
219
Damien George38161822014-07-03 14:13:33 +0100220STATIC void pop_rule(parser_t *parser, const rule_t **rule, mp_uint_t *arg_i, mp_uint_t *src_line) {
Damien George64f2b212015-10-08 14:58:15 +0100221 assert(!parser->parse_error);
Damien429d7192013-10-04 19:53:11 +0100222 parser->rule_stack_top -= 1;
223 *rule = rules[parser->rule_stack[parser->rule_stack_top].rule_id];
224 *arg_i = parser->rule_stack[parser->rule_stack_top].arg_i;
Damien George08335002014-01-18 23:24:36 +0000225 *src_line = parser->rule_stack[parser->rule_stack_top].src_line;
Damien429d7192013-10-04 19:53:11 +0100226}
227
Damien George40f3c022014-07-03 13:25:24 +0100228mp_parse_node_t mp_parse_node_new_leaf(mp_int_t kind, mp_int_t arg) {
Paul Sokolovsky56e5ef22014-02-22 16:39:45 +0200229 if (kind == MP_PARSE_NODE_SMALL_INT) {
230 return (mp_parse_node_t)(kind | (arg << 1));
231 }
Damien George7d414a12015-02-08 01:57:40 +0000232 return (mp_parse_node_t)(kind | (arg << 4));
Damien429d7192013-10-04 19:53:11 +0100233}
234
Damien Georgedfe944c2015-02-13 02:29:46 +0000235int mp_parse_node_extract_list(mp_parse_node_t *pn, mp_uint_t pn_kind, mp_parse_node_t **nodes) {
236 if (MP_PARSE_NODE_IS_NULL(*pn)) {
237 *nodes = NULL;
238 return 0;
239 } else if (MP_PARSE_NODE_IS_LEAF(*pn)) {
240 *nodes = pn;
241 return 1;
242 } else {
243 mp_parse_node_struct_t *pns = (mp_parse_node_struct_t*)(*pn);
244 if (MP_PARSE_NODE_STRUCT_KIND(pns) != pn_kind) {
245 *nodes = pn;
246 return 1;
247 } else {
248 *nodes = pns->nodes;
249 return MP_PARSE_NODE_STRUCT_NUM_NODES(pns);
250 }
251 }
252}
253
Damien Georgecbd2f742014-01-19 11:48:48 +0000254#if MICROPY_DEBUG_PRINTERS
Damien George38161822014-07-03 14:13:33 +0100255void mp_parse_node_print(mp_parse_node_t pn, mp_uint_t indent) {
Damien George08335002014-01-18 23:24:36 +0000256 if (MP_PARSE_NODE_IS_STRUCT(pn)) {
257 printf("[% 4d] ", (int)((mp_parse_node_struct_t*)pn)->source_line);
258 } else {
259 printf(" ");
260 }
Damien George38161822014-07-03 14:13:33 +0100261 for (mp_uint_t i = 0; i < indent; i++) {
Damien429d7192013-10-04 19:53:11 +0100262 printf(" ");
263 }
Damiend99b0522013-12-21 18:17:45 +0000264 if (MP_PARSE_NODE_IS_NULL(pn)) {
Damien429d7192013-10-04 19:53:11 +0100265 printf("NULL\n");
Paul Sokolovsky56e5ef22014-02-22 16:39:45 +0200266 } else if (MP_PARSE_NODE_IS_SMALL_INT(pn)) {
Damien George40f3c022014-07-03 13:25:24 +0100267 mp_int_t arg = MP_PARSE_NODE_LEAF_SMALL_INT(pn);
Paul Sokolovsky56e5ef22014-02-22 16:39:45 +0200268 printf("int(" INT_FMT ")\n", arg);
Damiend99b0522013-12-21 18:17:45 +0000269 } else if (MP_PARSE_NODE_IS_LEAF(pn)) {
Damien George40f3c022014-07-03 13:25:24 +0100270 mp_uint_t arg = MP_PARSE_NODE_LEAF_ARG(pn);
Damiend99b0522013-12-21 18:17:45 +0000271 switch (MP_PARSE_NODE_LEAF_KIND(pn)) {
272 case MP_PARSE_NODE_ID: printf("id(%s)\n", qstr_str(arg)); break;
Damiend99b0522013-12-21 18:17:45 +0000273 case MP_PARSE_NODE_STRING: printf("str(%s)\n", qstr_str(arg)); break;
274 case MP_PARSE_NODE_BYTES: printf("bytes(%s)\n", qstr_str(arg)); break;
Damien George08d07552014-01-29 18:58:52 +0000275 case MP_PARSE_NODE_TOKEN: printf("tok(" INT_FMT ")\n", arg); break;
Damien429d7192013-10-04 19:53:11 +0100276 default: assert(0);
277 }
278 } else {
Damien George5042bce2014-05-25 22:06:06 +0100279 // node must be a mp_parse_node_struct_t
Damien Georgeb829b5c2014-01-25 13:51:19 +0000280 mp_parse_node_struct_t *pns = (mp_parse_node_struct_t*)pn;
Damien George5042bce2014-05-25 22:06:06 +0100281 if (MP_PARSE_NODE_STRUCT_KIND(pns) == RULE_string) {
282 printf("literal str(%.*s)\n", (int)pns->nodes[1], (char*)pns->nodes[0]);
Damien George4c81ba82015-01-13 16:21:23 +0000283 } else if (MP_PARSE_NODE_STRUCT_KIND(pns) == RULE_bytes) {
284 printf("literal bytes(%.*s)\n", (int)pns->nodes[1], (char*)pns->nodes[0]);
Damien George7d414a12015-02-08 01:57:40 +0000285 } else if (MP_PARSE_NODE_STRUCT_KIND(pns) == RULE_const_object) {
286 printf("literal const(%p)\n", (mp_obj_t)pns->nodes[0]);
Damien George5042bce2014-05-25 22:06:06 +0100287 } else {
Damien George38161822014-07-03 14:13:33 +0100288 mp_uint_t n = MP_PARSE_NODE_STRUCT_NUM_NODES(pns);
Damien429d7192013-10-04 19:53:11 +0100289#ifdef USE_RULE_NAME
Damien George38161822014-07-03 14:13:33 +0100290 printf("%s(" UINT_FMT ") (n=" UINT_FMT ")\n", rules[MP_PARSE_NODE_STRUCT_KIND(pns)]->rule_name, (mp_uint_t)MP_PARSE_NODE_STRUCT_KIND(pns), n);
Damien429d7192013-10-04 19:53:11 +0100291#else
Damien George38161822014-07-03 14:13:33 +0100292 printf("rule(" UINT_FMT ") (n=" UINT_FMT ")\n", (mp_uint_t)MP_PARSE_NODE_STRUCT_KIND(pns), n);
Damien429d7192013-10-04 19:53:11 +0100293#endif
Damien George38161822014-07-03 14:13:33 +0100294 for (mp_uint_t i = 0; i < n; i++) {
Damien George5042bce2014-05-25 22:06:06 +0100295 mp_parse_node_print(pns->nodes[i], indent + 2);
296 }
Damien429d7192013-10-04 19:53:11 +0100297 }
298 }
299}
Damien Georgecbd2f742014-01-19 11:48:48 +0000300#endif // MICROPY_DEBUG_PRINTERS
Damien429d7192013-10-04 19:53:11 +0100301
302/*
Paul Sokolovsky520e2f52014-02-12 18:31:30 +0200303STATIC void result_stack_show(parser_t *parser) {
Damien429d7192013-10-04 19:53:11 +0100304 printf("result stack, most recent first\n");
Damien George38161822014-07-03 14:13:33 +0100305 for (mp_int_t i = parser->result_stack_top - 1; i >= 0; i--) {
Damien Georgecbd2f742014-01-19 11:48:48 +0000306 mp_parse_node_print(parser->result_stack[i], 0);
Damien429d7192013-10-04 19:53:11 +0100307 }
308}
309*/
310
Paul Sokolovsky520e2f52014-02-12 18:31:30 +0200311STATIC mp_parse_node_t pop_result(parser_t *parser) {
Damien George64f2b212015-10-08 14:58:15 +0100312 if (parser->parse_error) {
Damien George58ba4c32014-04-10 14:27:31 +0000313 return MP_PARSE_NODE_NULL;
314 }
Damien429d7192013-10-04 19:53:11 +0100315 assert(parser->result_stack_top > 0);
316 return parser->result_stack[--parser->result_stack_top];
317}
318
Damien George38161822014-07-03 14:13:33 +0100319STATIC mp_parse_node_t peek_result(parser_t *parser, mp_uint_t pos) {
Damien George64f2b212015-10-08 14:58:15 +0100320 if (parser->parse_error) {
Damien George58ba4c32014-04-10 14:27:31 +0000321 return MP_PARSE_NODE_NULL;
322 }
Damien429d7192013-10-04 19:53:11 +0100323 assert(parser->result_stack_top > pos);
324 return parser->result_stack[parser->result_stack_top - 1 - pos];
325}
326
Paul Sokolovsky520e2f52014-02-12 18:31:30 +0200327STATIC void push_result_node(parser_t *parser, mp_parse_node_t pn) {
Damien George64f2b212015-10-08 14:58:15 +0100328 if (parser->parse_error) {
Damien George58ba4c32014-04-10 14:27:31 +0000329 return;
330 }
Damien George69a818d2014-01-12 13:55:24 +0000331 if (parser->result_stack_top >= parser->result_stack_alloc) {
Damien Georgeade9a052015-06-13 21:53:22 +0100332 mp_parse_node_t *stack = m_renew_maybe(mp_parse_node_t, parser->result_stack, parser->result_stack_alloc, parser->result_stack_alloc + MICROPY_ALLOC_PARSE_RESULT_INC, true);
Damien George50912e72015-01-20 11:55:10 +0000333 if (stack == NULL) {
Damien George64f2b212015-10-08 14:58:15 +0100334 parser->parse_error = PARSE_ERROR_MEMORY;
Damien George58ba4c32014-04-10 14:27:31 +0000335 return;
336 }
Damien George50912e72015-01-20 11:55:10 +0000337 parser->result_stack = stack;
Damien George58ebde42014-05-21 20:32:59 +0100338 parser->result_stack_alloc += MICROPY_ALLOC_PARSE_RESULT_INC;
Damien George69a818d2014-01-12 13:55:24 +0000339 }
Damien429d7192013-10-04 19:53:11 +0100340 parser->result_stack[parser->result_stack_top++] = pn;
341}
342
Damien George7d414a12015-02-08 01:57:40 +0000343STATIC mp_parse_node_t make_node_string_bytes(parser_t *parser, mp_uint_t src_line, mp_uint_t rule_kind, const char *str, mp_uint_t len) {
Damien George58e0f4a2015-09-23 10:50:43 +0100344 mp_parse_node_struct_t *pn = parser_alloc(parser, sizeof(mp_parse_node_struct_t) + sizeof(mp_parse_node_t) * 2);
Damien George5042bce2014-05-25 22:06:06 +0100345 if (pn == NULL) {
Damien George64f2b212015-10-08 14:58:15 +0100346 parser->parse_error = PARSE_ERROR_MEMORY;
Damien George7d414a12015-02-08 01:57:40 +0000347 return MP_PARSE_NODE_NULL;
Damien George5042bce2014-05-25 22:06:06 +0100348 }
349 pn->source_line = src_line;
Damien George4c81ba82015-01-13 16:21:23 +0000350 pn->kind_num_nodes = rule_kind | (2 << 8);
Damien George5042bce2014-05-25 22:06:06 +0100351 char *p = m_new(char, len);
352 memcpy(p, str, len);
Damien George999cedb2015-11-27 17:01:44 +0000353 pn->nodes[0] = (uintptr_t)p;
Damien George5042bce2014-05-25 22:06:06 +0100354 pn->nodes[1] = len;
Damien George7d414a12015-02-08 01:57:40 +0000355 return (mp_parse_node_t)pn;
356}
357
358STATIC mp_parse_node_t make_node_const_object(parser_t *parser, mp_uint_t src_line, mp_obj_t obj) {
Damien George999cedb2015-11-27 17:01:44 +0000359 mp_parse_node_struct_t *pn = parser_alloc(parser, sizeof(mp_parse_node_struct_t) + sizeof(mp_obj_t));
Damien George7d414a12015-02-08 01:57:40 +0000360 if (pn == NULL) {
Damien George64f2b212015-10-08 14:58:15 +0100361 parser->parse_error = PARSE_ERROR_MEMORY;
Damien George7d414a12015-02-08 01:57:40 +0000362 return MP_PARSE_NODE_NULL;
363 }
364 pn->source_line = src_line;
365 pn->kind_num_nodes = RULE_const_object | (1 << 8);
366 pn->nodes[0] = (mp_uint_t)obj;
367 return (mp_parse_node_t)pn;
Damien George5042bce2014-05-25 22:06:06 +0100368}
Paul Sokolovsky9e76b112014-05-08 22:43:46 +0300369
Damien Georgea4c52c52014-12-05 19:35:18 +0000370STATIC void push_result_token(parser_t *parser) {
Damiend99b0522013-12-21 18:17:45 +0000371 mp_parse_node_t pn;
Damien Georgea4c52c52014-12-05 19:35:18 +0000372 mp_lexer_t *lex = parser->lexer;
373 if (lex->tok_kind == MP_TOKEN_NAME) {
Damien George64f2b212015-10-08 14:58:15 +0100374 qstr id = qstr_from_strn(lex->vstr.buf, lex->vstr.len);
375 #if MICROPY_COMP_CONST
376 // lookup identifier in table of dynamic constants
377 mp_map_elem_t *elem = mp_map_lookup(&parser->consts, MP_OBJ_NEW_QSTR(id), MP_MAP_LOOKUP);
378 if (elem != NULL) {
379 pn = mp_parse_node_new_leaf(MP_PARSE_NODE_SMALL_INT, MP_OBJ_SMALL_INT_VALUE(elem->value));
380 } else
381 #endif
382 {
383 pn = mp_parse_node_new_leaf(MP_PARSE_NODE_ID, id);
384 }
Damien George7d414a12015-02-08 01:57:40 +0000385 } else if (lex->tok_kind == MP_TOKEN_INTEGER) {
386 mp_obj_t o = mp_parse_num_integer(lex->vstr.buf, lex->vstr.len, 0, lex);
387 if (MP_OBJ_IS_SMALL_INT(o)) {
388 pn = mp_parse_node_new_leaf(MP_PARSE_NODE_SMALL_INT, MP_OBJ_SMALL_INT_VALUE(o));
Damien429d7192013-10-04 19:53:11 +0100389 } else {
Damien George7d414a12015-02-08 01:57:40 +0000390 pn = make_node_const_object(parser, lex->tok_line, o);
Damien429d7192013-10-04 19:53:11 +0100391 }
Damien George7d414a12015-02-08 01:57:40 +0000392 } else if (lex->tok_kind == MP_TOKEN_FLOAT_OR_IMAG) {
393 mp_obj_t o = mp_parse_num_decimal(lex->vstr.buf, lex->vstr.len, true, false, lex);
394 pn = make_node_const_object(parser, lex->tok_line, o);
Damien George4c81ba82015-01-13 16:21:23 +0000395 } else if (lex->tok_kind == MP_TOKEN_STRING || lex->tok_kind == MP_TOKEN_BYTES) {
396 // Don't automatically intern all strings/bytes. doc strings (which are usually large)
Damien George5042bce2014-05-25 22:06:06 +0100397 // will be discarded by the compiler, and so we shouldn't intern them.
398 qstr qst = MP_QSTR_NULL;
Damien Georgea4c52c52014-12-05 19:35:18 +0000399 if (lex->vstr.len <= MICROPY_ALLOC_PARSE_INTERN_STRING_LEN) {
Damien George5042bce2014-05-25 22:06:06 +0100400 // intern short strings
Damien Georgea4c52c52014-12-05 19:35:18 +0000401 qst = qstr_from_strn(lex->vstr.buf, lex->vstr.len);
Damien George5042bce2014-05-25 22:06:06 +0100402 } else {
403 // check if this string is already interned
Damien Georgea4c52c52014-12-05 19:35:18 +0000404 qst = qstr_find_strn(lex->vstr.buf, lex->vstr.len);
Damien George5042bce2014-05-25 22:06:06 +0100405 }
406 if (qst != MP_QSTR_NULL) {
407 // qstr exists, make a leaf node
Damien George4c81ba82015-01-13 16:21:23 +0000408 pn = mp_parse_node_new_leaf(lex->tok_kind == MP_TOKEN_STRING ? MP_PARSE_NODE_STRING : MP_PARSE_NODE_BYTES, qst);
Damien George5042bce2014-05-25 22:06:06 +0100409 } else {
Damien George4c81ba82015-01-13 16:21:23 +0000410 // not interned, make a node holding a pointer to the string/bytes data
Damien George7d414a12015-02-08 01:57:40 +0000411 pn = make_node_string_bytes(parser, lex->tok_line, lex->tok_kind == MP_TOKEN_STRING ? RULE_string : RULE_bytes, lex->vstr.buf, lex->vstr.len);
Damien George5042bce2014-05-25 22:06:06 +0100412 }
Damien429d7192013-10-04 19:53:11 +0100413 } else {
Damien Georgea4c52c52014-12-05 19:35:18 +0000414 pn = mp_parse_node_new_leaf(MP_PARSE_NODE_TOKEN, lex->tok_kind);
Damien429d7192013-10-04 19:53:11 +0100415 }
416 push_result_node(parser, pn);
417}
418
Damien George64f2b212015-10-08 14:58:15 +0100419#if MICROPY_COMP_MODULE_CONST
Damien Georgecbf76742015-11-27 13:38:15 +0000420STATIC const mp_rom_map_elem_t mp_constants_table[] = {
Damien George64f2b212015-10-08 14:58:15 +0100421 #if MICROPY_PY_UCTYPES
Damien Georgecbf76742015-11-27 13:38:15 +0000422 { MP_ROM_QSTR(MP_QSTR_uctypes), MP_ROM_PTR(&mp_module_uctypes) },
Damien George64f2b212015-10-08 14:58:15 +0100423 #endif
424 // Extra constants as defined by a port
425 MICROPY_PORT_CONSTANTS
426};
427STATIC MP_DEFINE_CONST_MAP(mp_constants_map, mp_constants_table);
428#endif
429
430#if MICROPY_COMP_CONST_FOLDING
431STATIC bool fold_constants(parser_t *parser, const rule_t *rule, mp_uint_t num_args) {
432 // this code does folding of arbitrary integer expressions, eg 1 + 2 * 3 + 4
433 // it does not do partial folding, eg 1 + 2 + x -> 3 + x
434
435 mp_int_t arg0;
436 if (rule->rule_id == RULE_expr
437 || rule->rule_id == RULE_xor_expr
438 || rule->rule_id == RULE_and_expr) {
439 // folding for binary ops: | ^ &
440 mp_parse_node_t pn = peek_result(parser, num_args - 1);
441 if (!MP_PARSE_NODE_IS_SMALL_INT(pn)) {
442 return false;
443 }
444 arg0 = MP_PARSE_NODE_LEAF_SMALL_INT(pn);
445 for (mp_int_t i = num_args - 2; i >= 0; --i) {
446 pn = peek_result(parser, i);
447 if (!MP_PARSE_NODE_IS_SMALL_INT(pn)) {
448 return false;
449 }
450 mp_int_t arg1 = MP_PARSE_NODE_LEAF_SMALL_INT(pn);
451 if (rule->rule_id == RULE_expr) {
452 // int | int
453 arg0 |= arg1;
454 } else if (rule->rule_id == RULE_xor_expr) {
455 // int ^ int
456 arg0 ^= arg1;
457 } else if (rule->rule_id == RULE_and_expr) {
458 // int & int
459 arg0 &= arg1;
460 }
461 }
462 } else if (rule->rule_id == RULE_shift_expr
463 || rule->rule_id == RULE_arith_expr
464 || rule->rule_id == RULE_term) {
465 // folding for binary ops: << >> + - * / % //
466 mp_parse_node_t pn = peek_result(parser, num_args - 1);
467 if (!MP_PARSE_NODE_IS_SMALL_INT(pn)) {
468 return false;
469 }
470 arg0 = MP_PARSE_NODE_LEAF_SMALL_INT(pn);
471 for (mp_int_t i = num_args - 2; i >= 1; i -= 2) {
472 pn = peek_result(parser, i - 1);
473 if (!MP_PARSE_NODE_IS_SMALL_INT(pn)) {
474 return false;
475 }
476 mp_int_t arg1 = MP_PARSE_NODE_LEAF_SMALL_INT(pn);
477 mp_token_kind_t tok = MP_PARSE_NODE_LEAF_ARG(peek_result(parser, i));
478 if (tok == MP_TOKEN_OP_DBL_LESS) {
479 // int << int
480 if (arg1 >= (mp_int_t)BITS_PER_WORD
481 || arg0 > (MP_SMALL_INT_MAX >> arg1)
482 || arg0 < (MP_SMALL_INT_MIN >> arg1)) {
483 return false;
484 }
485 arg0 <<= arg1;
486 } else if (tok == MP_TOKEN_OP_DBL_MORE) {
487 // int >> int
488 if (arg1 >= (mp_int_t)BITS_PER_WORD) {
489 // Shifting to big amounts is underfined behavior
490 // in C and is CPU-dependent; propagate sign bit.
491 arg1 = BITS_PER_WORD - 1;
492 }
493 arg0 >>= arg1;
494 } else if (tok == MP_TOKEN_OP_PLUS) {
495 // int + int
496 arg0 += arg1;
497 } else if (tok == MP_TOKEN_OP_MINUS) {
498 // int - int
499 arg0 -= arg1;
500 } else if (tok == MP_TOKEN_OP_STAR) {
501 // int * int
502 if (mp_small_int_mul_overflow(arg0, arg1)) {
503 return false;
504 }
505 arg0 *= arg1;
506 } else if (tok == MP_TOKEN_OP_SLASH) {
507 // int / int
508 return false;
509 } else if (tok == MP_TOKEN_OP_PERCENT) {
510 // int % int
511 if (arg1 == 0) {
512 return false;
513 }
514 arg0 = mp_small_int_modulo(arg0, arg1);
515 } else {
516 assert(tok == MP_TOKEN_OP_DBL_SLASH); // should be
517 // int // int
518 if (arg1 == 0) {
519 return false;
520 }
521 arg0 = mp_small_int_floor_divide(arg0, arg1);
522 }
523 if (!MP_SMALL_INT_FITS(arg0)) {
524 return false;
525 }
526 }
527 } else if (rule->rule_id == RULE_factor_2) {
528 // folding for unary ops: + - ~
529 mp_parse_node_t pn = peek_result(parser, 0);
530 if (!MP_PARSE_NODE_IS_SMALL_INT(pn)) {
531 return false;
532 }
533 arg0 = MP_PARSE_NODE_LEAF_SMALL_INT(pn);
534 mp_token_kind_t tok = MP_PARSE_NODE_LEAF_ARG(peek_result(parser, 1));
535 if (tok == MP_TOKEN_OP_PLUS) {
536 // +int
537 } else if (tok == MP_TOKEN_OP_MINUS) {
538 // -int
539 arg0 = -arg0;
540 if (!MP_SMALL_INT_FITS(arg0)) {
541 return false;
542 }
543 } else {
544 assert(tok == MP_TOKEN_OP_TILDE); // should be
545 // ~int
546 arg0 = ~arg0;
547 }
548
549 #if MICROPY_COMP_CONST
550 } else if (rule->rule_id == RULE_expr_stmt) {
551 mp_parse_node_t pn1 = peek_result(parser, 0);
552 if (!MP_PARSE_NODE_IS_NULL(pn1)
553 && !(MP_PARSE_NODE_IS_STRUCT_KIND(pn1, RULE_expr_stmt_augassign)
554 || MP_PARSE_NODE_IS_STRUCT_KIND(pn1, RULE_expr_stmt_assign_list))) {
555 // this node is of the form <x> = <y>
556 mp_parse_node_t pn0 = peek_result(parser, 1);
557 if (MP_PARSE_NODE_IS_ID(pn0)
558 && MP_PARSE_NODE_IS_STRUCT_KIND(pn1, RULE_power)
559 && MP_PARSE_NODE_IS_ID(((mp_parse_node_struct_t*)pn1)->nodes[0])
560 && MP_PARSE_NODE_LEAF_ARG(((mp_parse_node_struct_t*)pn1)->nodes[0]) == MP_QSTR_const
561 && MP_PARSE_NODE_IS_STRUCT_KIND(((mp_parse_node_struct_t*)pn1)->nodes[1], RULE_trailer_paren)
562 && MP_PARSE_NODE_IS_NULL(((mp_parse_node_struct_t*)pn1)->nodes[2])
563 ) {
564 // code to assign dynamic constants: id = const(value)
565
566 // get the id
567 qstr id = MP_PARSE_NODE_LEAF_ARG(pn0);
568
569 // get the value
570 mp_parse_node_t pn_value = ((mp_parse_node_struct_t*)((mp_parse_node_struct_t*)pn1)->nodes[1])->nodes[0];
571 if (!MP_PARSE_NODE_IS_SMALL_INT(pn_value)) {
572 parser->parse_error = PARSE_ERROR_CONST;
573 return false;
574 }
575 mp_int_t value = MP_PARSE_NODE_LEAF_SMALL_INT(pn_value);
576
577 // store the value in the table of dynamic constants
578 mp_map_elem_t *elem = mp_map_lookup(&parser->consts, MP_OBJ_NEW_QSTR(id), MP_MAP_LOOKUP_ADD_IF_NOT_FOUND);
579 assert(elem->value == MP_OBJ_NULL);
580 elem->value = MP_OBJ_NEW_SMALL_INT(value);
581
582 // replace const(value) with value
583 pop_result(parser);
584 push_result_node(parser, pn_value);
585
586 // finished folding this assignment, but we still want it to be part of the tree
587 return false;
588 }
589 }
590 return false;
591 #endif
592
593 #if MICROPY_COMP_MODULE_CONST
594 } else if (rule->rule_id == RULE_power) {
595 mp_parse_node_t pn0 = peek_result(parser, 2);
596 mp_parse_node_t pn1 = peek_result(parser, 1);
597 mp_parse_node_t pn2 = peek_result(parser, 0);
598 if (!(MP_PARSE_NODE_IS_ID(pn0)
599 && MP_PARSE_NODE_IS_STRUCT_KIND(pn1, RULE_trailer_period)
600 && MP_PARSE_NODE_IS_NULL(pn2))) {
601 return false;
602 }
603 // id1.id2
604 // look it up in constant table, see if it can be replaced with an integer
605 mp_parse_node_struct_t *pns1 = (mp_parse_node_struct_t*)pn1;
606 assert(MP_PARSE_NODE_IS_ID(pns1->nodes[0]));
607 qstr q_base = MP_PARSE_NODE_LEAF_ARG(pn0);
608 qstr q_attr = MP_PARSE_NODE_LEAF_ARG(pns1->nodes[0]);
609 mp_map_elem_t *elem = mp_map_lookup((mp_map_t*)&mp_constants_map, MP_OBJ_NEW_QSTR(q_base), MP_MAP_LOOKUP);
610 if (elem == NULL) {
611 return false;
612 }
613 mp_obj_t dest[2];
614 mp_load_method_maybe(elem->value, q_attr, dest);
Damien George999cedb2015-11-27 17:01:44 +0000615 if (!(MP_OBJ_IS_SMALL_INT(dest[0]) && dest[1] == MP_OBJ_NULL)) {
Damien George64f2b212015-10-08 14:58:15 +0100616 return false;
617 }
618 arg0 = MP_OBJ_SMALL_INT_VALUE(dest[0]);
619 #endif
620
621 } else {
622 return false;
623 }
624
625 // success folding this rule
626
627 for (mp_uint_t i = num_args; i > 0; i--) {
628 pop_result(parser);
629 }
630 push_result_node(parser, mp_parse_node_new_leaf(MP_PARSE_NODE_SMALL_INT, arg0));
631
632 return true;
633}
634#endif
635
Damien George38161822014-07-03 14:13:33 +0100636STATIC void push_result_rule(parser_t *parser, mp_uint_t src_line, const rule_t *rule, mp_uint_t num_args) {
Damien George64f2b212015-10-08 14:58:15 +0100637 #if MICROPY_COMP_CONST_FOLDING
638 if (fold_constants(parser, rule, num_args)) {
639 // we folded this rule so return straight away
640 return;
641 }
642 #endif
643
Damien George58e0f4a2015-09-23 10:50:43 +0100644 mp_parse_node_struct_t *pn = parser_alloc(parser, sizeof(mp_parse_node_struct_t) + sizeof(mp_parse_node_t) * num_args);
Damien George58ba4c32014-04-10 14:27:31 +0000645 if (pn == NULL) {
Damien George64f2b212015-10-08 14:58:15 +0100646 parser->parse_error = PARSE_ERROR_MEMORY;
Damien George58ba4c32014-04-10 14:27:31 +0000647 return;
648 }
649 pn->source_line = src_line;
650 pn->kind_num_nodes = (rule->rule_id & 0xff) | (num_args << 8);
Damien George38161822014-07-03 14:13:33 +0100651 for (mp_uint_t i = num_args; i > 0; i--) {
Damien429d7192013-10-04 19:53:11 +0100652 pn->nodes[i - 1] = pop_result(parser);
653 }
Damiend99b0522013-12-21 18:17:45 +0000654 push_result_node(parser, (mp_parse_node_t)pn);
Damien429d7192013-10-04 19:53:11 +0100655}
656
Damien George58e0f4a2015-09-23 10:50:43 +0100657mp_parse_tree_t mp_parse(mp_lexer_t *lex, mp_parse_input_kind_t input_kind) {
Damien George69a818d2014-01-12 13:55:24 +0000658
Damien George1b82e9a2014-05-10 17:36:41 +0100659 // initialise parser and allocate memory for its stacks
Damien George69a818d2014-01-12 13:55:24 +0000660
Damien George1b82e9a2014-05-10 17:36:41 +0100661 parser_t parser;
Damien George69a818d2014-01-12 13:55:24 +0000662
Damien George64f2b212015-10-08 14:58:15 +0100663 parser.parse_error = PARSE_ERROR_NONE;
Damien George58ba4c32014-04-10 14:27:31 +0000664
Damien George58ebde42014-05-21 20:32:59 +0100665 parser.rule_stack_alloc = MICROPY_ALLOC_PARSE_RULE_INIT;
Damien George1b82e9a2014-05-10 17:36:41 +0100666 parser.rule_stack_top = 0;
667 parser.rule_stack = m_new_maybe(rule_stack_t, parser.rule_stack_alloc);
Damien429d7192013-10-04 19:53:11 +0100668
Damien George58ebde42014-05-21 20:32:59 +0100669 parser.result_stack_alloc = MICROPY_ALLOC_PARSE_RESULT_INIT;
Damien George1b82e9a2014-05-10 17:36:41 +0100670 parser.result_stack_top = 0;
671 parser.result_stack = m_new_maybe(mp_parse_node_t, parser.result_stack_alloc);
Damien429d7192013-10-04 19:53:11 +0100672
Damien George1b82e9a2014-05-10 17:36:41 +0100673 parser.lexer = lex;
674
Damien George58e0f4a2015-09-23 10:50:43 +0100675 parser.tree.chunk = NULL;
676 parser.cur_chunk = NULL;
677
Damien George64f2b212015-10-08 14:58:15 +0100678 #if MICROPY_COMP_CONST
679 mp_map_init(&parser.consts, 0);
680 #endif
681
Damien George1b82e9a2014-05-10 17:36:41 +0100682 // check if we could allocate the stacks
683 if (parser.rule_stack == NULL || parser.result_stack == NULL) {
684 goto memory_error;
685 }
Damien George08335002014-01-18 23:24:36 +0000686
Damien George69a818d2014-01-12 13:55:24 +0000687 // work out the top-level rule to use, and push it on the stack
Damien George38161822014-07-03 14:13:33 +0100688 mp_uint_t top_level_rule;
Damien5ac1b2e2013-10-18 19:58:12 +0100689 switch (input_kind) {
Damiend99b0522013-12-21 18:17:45 +0000690 case MP_PARSE_SINGLE_INPUT: top_level_rule = RULE_single_input; break;
Damien Georged02c6d82014-01-15 22:14:03 +0000691 case MP_PARSE_EVAL_INPUT: top_level_rule = RULE_eval_input; break;
Damien5ac1b2e2013-10-18 19:58:12 +0100692 default: top_level_rule = RULE_file_input;
693 }
Damien Georgea4c52c52014-12-05 19:35:18 +0000694 push_rule(&parser, lex->tok_line, rules[top_level_rule], 0);
Damien429d7192013-10-04 19:53:11 +0100695
Damien George69a818d2014-01-12 13:55:24 +0000696 // parse!
697
Damien George38161822014-07-03 14:13:33 +0100698 mp_uint_t n, i; // state for the current rule
699 mp_uint_t rule_src_line; // source line for the first token matched by the current rule
Damien429d7192013-10-04 19:53:11 +0100700 bool backtrack = false;
Damien George08335002014-01-18 23:24:36 +0000701 const rule_t *rule = NULL;
Damien429d7192013-10-04 19:53:11 +0100702
703 for (;;) {
704 next_rule:
Damien George64f2b212015-10-08 14:58:15 +0100705 if (parser.rule_stack_top == 0 || parser.parse_error) {
Damien429d7192013-10-04 19:53:11 +0100706 break;
707 }
708
Damien George1b82e9a2014-05-10 17:36:41 +0100709 pop_rule(&parser, &rule, &i, &rule_src_line);
Damien429d7192013-10-04 19:53:11 +0100710 n = rule->act & RULE_ACT_ARG_MASK;
711
712 /*
713 // debugging
Damien George1b82e9a2014-05-10 17:36:41 +0100714 printf("depth=%d ", parser.rule_stack_top);
715 for (int j = 0; j < parser.rule_stack_top; ++j) {
Damien429d7192013-10-04 19:53:11 +0100716 printf(" ");
717 }
718 printf("%s n=%d i=%d bt=%d\n", rule->rule_name, n, i, backtrack);
719 */
720
721 switch (rule->act & RULE_ACT_KIND_MASK) {
722 case RULE_ACT_OR:
723 if (i > 0 && !backtrack) {
724 goto next_rule;
725 } else {
726 backtrack = false;
727 }
Damien Georgefa7c61d2015-07-24 14:35:57 +0000728 for (; i < n; ++i) {
729 uint16_t kind = rule->arg[i] & RULE_ARG_KIND_MASK;
730 if (kind == RULE_ARG_TOK) {
731 if (lex->tok_kind == (rule->arg[i] & RULE_ARG_ARG_MASK)) {
732 push_result_token(&parser);
733 mp_lexer_to_next(lex);
Damien429d7192013-10-04 19:53:11 +0100734 goto next_rule;
Damien Georgefa7c61d2015-07-24 14:35:57 +0000735 }
Damien429d7192013-10-04 19:53:11 +0100736 } else {
Damien Georgefa7c61d2015-07-24 14:35:57 +0000737 assert(kind == RULE_ARG_RULE);
738 if (i + 1 < n) {
739 push_rule(&parser, rule_src_line, rule, i + 1); // save this or-rule
740 }
741 push_rule_from_arg(&parser, rule->arg[i]); // push child of or-rule
Damien429d7192013-10-04 19:53:11 +0100742 goto next_rule;
743 }
Damien429d7192013-10-04 19:53:11 +0100744 }
Damien Georgefa7c61d2015-07-24 14:35:57 +0000745 backtrack = true;
Damien429d7192013-10-04 19:53:11 +0100746 break;
747
Damien George2870d852014-12-20 18:06:08 +0000748 case RULE_ACT_AND: {
Damien429d7192013-10-04 19:53:11 +0100749
750 // failed, backtrack if we can, else syntax error
751 if (backtrack) {
752 assert(i > 0);
753 if ((rule->arg[i - 1] & RULE_ARG_KIND_MASK) == RULE_ARG_OPT_RULE) {
754 // an optional rule that failed, so continue with next arg
Damien George1b82e9a2014-05-10 17:36:41 +0100755 push_result_node(&parser, MP_PARSE_NODE_NULL);
Damien429d7192013-10-04 19:53:11 +0100756 backtrack = false;
757 } else {
758 // a mandatory rule that failed, so propagate backtrack
759 if (i > 1) {
760 // already eaten tokens so can't backtrack
761 goto syntax_error;
762 } else {
763 goto next_rule;
764 }
765 }
766 }
767
768 // progress through the rule
769 for (; i < n; ++i) {
770 switch (rule->arg[i] & RULE_ARG_KIND_MASK) {
Damien George2870d852014-12-20 18:06:08 +0000771 case RULE_ARG_TOK: {
Damien429d7192013-10-04 19:53:11 +0100772 // need to match a token
Damien George2870d852014-12-20 18:06:08 +0000773 mp_token_kind_t tok_kind = rule->arg[i] & RULE_ARG_ARG_MASK;
Damien Georgea4c52c52014-12-05 19:35:18 +0000774 if (lex->tok_kind == tok_kind) {
Damien429d7192013-10-04 19:53:11 +0100775 // matched token
Damiend99b0522013-12-21 18:17:45 +0000776 if (tok_kind == MP_TOKEN_NAME) {
Damien Georgea4c52c52014-12-05 19:35:18 +0000777 push_result_token(&parser);
Damien429d7192013-10-04 19:53:11 +0100778 }
Damiend99b0522013-12-21 18:17:45 +0000779 mp_lexer_to_next(lex);
Damien429d7192013-10-04 19:53:11 +0100780 } else {
781 // failed to match token
782 if (i > 0) {
783 // already eaten tokens so can't backtrack
784 goto syntax_error;
785 } else {
786 // this rule failed, so backtrack
787 backtrack = true;
788 goto next_rule;
789 }
790 }
791 break;
Damien Georged2d64f02015-01-14 21:32:42 +0000792 }
Damien429d7192013-10-04 19:53:11 +0100793 case RULE_ARG_RULE:
Damien429d7192013-10-04 19:53:11 +0100794 case RULE_ARG_OPT_RULE:
Damien Georged2d64f02015-01-14 21:32:42 +0000795 rule_and_no_other_choice:
Damien George1b82e9a2014-05-10 17:36:41 +0100796 push_rule(&parser, rule_src_line, rule, i + 1); // save this and-rule
797 push_rule_from_arg(&parser, rule->arg[i]); // push child of and-rule
Damien429d7192013-10-04 19:53:11 +0100798 goto next_rule;
799 default:
800 assert(0);
Damien Georged2d64f02015-01-14 21:32:42 +0000801 goto rule_and_no_other_choice; // to help flow control analysis
Damien429d7192013-10-04 19:53:11 +0100802 }
803 }
804
805 assert(i == n);
806
807 // matched the rule, so now build the corresponding parse_node
808
809 // count number of arguments for the parse_node
810 i = 0;
Damien George2870d852014-12-20 18:06:08 +0000811 bool emit_rule = false;
Damien George38161822014-07-03 14:13:33 +0100812 for (mp_uint_t x = 0; x < n; ++x) {
Damien429d7192013-10-04 19:53:11 +0100813 if ((rule->arg[x] & RULE_ARG_KIND_MASK) == RULE_ARG_TOK) {
Damien George2870d852014-12-20 18:06:08 +0000814 mp_token_kind_t tok_kind = rule->arg[x] & RULE_ARG_ARG_MASK;
Damiend99b0522013-12-21 18:17:45 +0000815 if (tok_kind >= MP_TOKEN_NAME) {
Damien429d7192013-10-04 19:53:11 +0100816 emit_rule = true;
817 }
Damiend99b0522013-12-21 18:17:45 +0000818 if (tok_kind == MP_TOKEN_NAME) {
Damien429d7192013-10-04 19:53:11 +0100819 // only tokens which were names are pushed to stack
820 i += 1;
821 }
822 } else {
823 // rules are always pushed
824 i += 1;
825 }
826 }
827
Damien George65dc9602015-08-14 12:24:11 +0100828 #if !MICROPY_ENABLE_DOC_STRING
Damien George5042bce2014-05-25 22:06:06 +0100829 // this code discards lonely statements, such as doc strings
Damien George1b82e9a2014-05-10 17:36:41 +0100830 if (input_kind != MP_PARSE_SINGLE_INPUT && rule->rule_id == RULE_expr_stmt && peek_result(&parser, 0) == MP_PARSE_NODE_NULL) {
831 mp_parse_node_t p = peek_result(&parser, 1);
Damien George5042bce2014-05-25 22:06:06 +0100832 if ((MP_PARSE_NODE_IS_LEAF(p) && !MP_PARSE_NODE_IS_ID(p)) || MP_PARSE_NODE_IS_STRUCT_KIND(p, RULE_string)) {
Damien George52b5d762014-09-23 15:31:56 +0000833 pop_result(&parser); // MP_PARSE_NODE_NULL
Damien George58e0f4a2015-09-23 10:50:43 +0100834 mp_parse_node_t pn = pop_result(&parser); // possibly RULE_string
835 if (MP_PARSE_NODE_IS_STRUCT(pn)) {
836 mp_parse_node_struct_t *pns = (mp_parse_node_struct_t *)pn;
837 if (MP_PARSE_NODE_STRUCT_KIND(pns) == RULE_string) {
838 m_del(char, (char*)pns->nodes[0], (mp_uint_t)pns->nodes[1]);
839 }
840 }
Damien George1b82e9a2014-05-10 17:36:41 +0100841 push_result_rule(&parser, rule_src_line, rules[RULE_pass_stmt], 0);
Damien George93afa232014-05-06 21:44:11 +0100842 break;
843 }
844 }
Damien George65dc9602015-08-14 12:24:11 +0100845 #endif
Damien George93afa232014-05-06 21:44:11 +0100846
Damien429d7192013-10-04 19:53:11 +0100847 // always emit these rules, even if they have only 1 argument
848 if (rule->rule_id == RULE_expr_stmt || rule->rule_id == RULE_yield_stmt) {
849 emit_rule = true;
850 }
851
Damien Georgeb47ea4e2014-12-20 18:37:50 +0000852 // if a rule has the RULE_ACT_ALLOW_IDENT bit set then this
853 // rule should not be emitted if it has only 1 argument
854 // NOTE: can't set this flag for atom_paren because we need it
855 // to distinguish, for example, [a,b] from [(a,b)]
Damien Georgeb47ea4e2014-12-20 18:37:50 +0000856 if (rule->act & RULE_ACT_ALLOW_IDENT) {
Damien429d7192013-10-04 19:53:11 +0100857 emit_rule = false;
858 }
859
860 // always emit these rules, and add an extra blank node at the end (to be used by the compiler to store data)
Damien Georgeb47ea4e2014-12-20 18:37:50 +0000861 if (ADD_BLANK_NODE(rule)) {
Damien429d7192013-10-04 19:53:11 +0100862 emit_rule = true;
Damien George1b82e9a2014-05-10 17:36:41 +0100863 push_result_node(&parser, MP_PARSE_NODE_NULL);
Damien429d7192013-10-04 19:53:11 +0100864 i += 1;
865 }
866
Damien George38161822014-07-03 14:13:33 +0100867 mp_uint_t num_not_nil = 0;
868 for (mp_uint_t x = 0; x < i; ++x) {
Damien George1b82e9a2014-05-10 17:36:41 +0100869 if (peek_result(&parser, x) != MP_PARSE_NODE_NULL) {
Damien429d7192013-10-04 19:53:11 +0100870 num_not_nil += 1;
871 }
872 }
Damien George366239b2015-10-08 23:13:18 +0100873 if (emit_rule || num_not_nil != 1) {
874 // need to add rule when num_not_nil==0 for, eg, atom_paren, testlist_comp_3b
Damien George1b82e9a2014-05-10 17:36:41 +0100875 push_result_rule(&parser, rule_src_line, rule, i);
Damien George366239b2015-10-08 23:13:18 +0100876 } else {
Damien429d7192013-10-04 19:53:11 +0100877 // single result, leave it on stack
Damiend99b0522013-12-21 18:17:45 +0000878 mp_parse_node_t pn = MP_PARSE_NODE_NULL;
Damien George38161822014-07-03 14:13:33 +0100879 for (mp_uint_t x = 0; x < i; ++x) {
Damien George1b82e9a2014-05-10 17:36:41 +0100880 mp_parse_node_t pn2 = pop_result(&parser);
Damiend99b0522013-12-21 18:17:45 +0000881 if (pn2 != MP_PARSE_NODE_NULL) {
Damien429d7192013-10-04 19:53:11 +0100882 pn = pn2;
883 }
884 }
Damien George1b82e9a2014-05-10 17:36:41 +0100885 push_result_node(&parser, pn);
Damien429d7192013-10-04 19:53:11 +0100886 }
887 break;
Damien George2870d852014-12-20 18:06:08 +0000888 }
Damien429d7192013-10-04 19:53:11 +0100889
Damien George2870d852014-12-20 18:06:08 +0000890 case RULE_ACT_LIST: {
Damien429d7192013-10-04 19:53:11 +0100891 // n=2 is: item item*
892 // n=1 is: item (sep item)*
893 // n=3 is: item (sep item)* [sep]
Damien George2870d852014-12-20 18:06:08 +0000894 bool had_trailing_sep;
Damien429d7192013-10-04 19:53:11 +0100895 if (backtrack) {
896 list_backtrack:
897 had_trailing_sep = false;
898 if (n == 2) {
899 if (i == 1) {
900 // fail on item, first time round; propagate backtrack
901 goto next_rule;
902 } else {
903 // fail on item, in later rounds; finish with this rule
904 backtrack = false;
905 }
906 } else {
907 if (i == 1) {
908 // fail on item, first time round; propagate backtrack
909 goto next_rule;
910 } else if ((i & 1) == 1) {
911 // fail on item, in later rounds; have eaten tokens so can't backtrack
912 if (n == 3) {
913 // list allows trailing separator; finish parsing list
914 had_trailing_sep = true;
915 backtrack = false;
916 } else {
917 // list doesn't allowing trailing separator; fail
918 goto syntax_error;
919 }
920 } else {
921 // fail on separator; finish parsing list
922 backtrack = false;
923 }
924 }
925 } else {
926 for (;;) {
Damien George38161822014-07-03 14:13:33 +0100927 mp_uint_t arg = rule->arg[i & 1 & n];
Damien429d7192013-10-04 19:53:11 +0100928 switch (arg & RULE_ARG_KIND_MASK) {
929 case RULE_ARG_TOK:
Damien Georgea4c52c52014-12-05 19:35:18 +0000930 if (lex->tok_kind == (arg & RULE_ARG_ARG_MASK)) {
Damien429d7192013-10-04 19:53:11 +0100931 if (i & 1 & n) {
932 // separators which are tokens are not pushed to result stack
933 } else {
Damien Georgea4c52c52014-12-05 19:35:18 +0000934 push_result_token(&parser);
Damien429d7192013-10-04 19:53:11 +0100935 }
Damiend99b0522013-12-21 18:17:45 +0000936 mp_lexer_to_next(lex);
Damien429d7192013-10-04 19:53:11 +0100937 // got element of list, so continue parsing list
938 i += 1;
939 } else {
940 // couldn't get element of list
941 i += 1;
942 backtrack = true;
943 goto list_backtrack;
944 }
945 break;
946 case RULE_ARG_RULE:
Damien Georged2d64f02015-01-14 21:32:42 +0000947 rule_list_no_other_choice:
Damien George1b82e9a2014-05-10 17:36:41 +0100948 push_rule(&parser, rule_src_line, rule, i + 1); // save this list-rule
949 push_rule_from_arg(&parser, arg); // push child of list-rule
Damien429d7192013-10-04 19:53:11 +0100950 goto next_rule;
951 default:
952 assert(0);
Damien Georged2d64f02015-01-14 21:32:42 +0000953 goto rule_list_no_other_choice; // to help flow control analysis
Damien429d7192013-10-04 19:53:11 +0100954 }
955 }
956 }
957 assert(i >= 1);
958
959 // compute number of elements in list, result in i
960 i -= 1;
961 if ((n & 1) && (rule->arg[1] & RULE_ARG_KIND_MASK) == RULE_ARG_TOK) {
962 // don't count separators when they are tokens
963 i = (i + 1) / 2;
964 }
965
966 if (i == 1) {
967 // list matched single item
968 if (had_trailing_sep) {
969 // if there was a trailing separator, make a list of a single item
Damien George1b82e9a2014-05-10 17:36:41 +0100970 push_result_rule(&parser, rule_src_line, rule, i);
Damien429d7192013-10-04 19:53:11 +0100971 } else {
972 // just leave single item on stack (ie don't wrap in a list)
973 }
974 } else {
Damien George1b82e9a2014-05-10 17:36:41 +0100975 push_result_rule(&parser, rule_src_line, rule, i);
Damien429d7192013-10-04 19:53:11 +0100976 }
977 break;
Damien George2870d852014-12-20 18:06:08 +0000978 }
Damien429d7192013-10-04 19:53:11 +0100979
980 default:
981 assert(0);
982 }
983 }
Damien91d387d2013-10-09 15:09:52 +0100984
Damien George64f2b212015-10-08 14:58:15 +0100985 #if MICROPY_COMP_CONST
986 mp_map_deinit(&parser.consts);
987 #endif
988
Damien George58e0f4a2015-09-23 10:50:43 +0100989 // truncate final chunk and link into chain of chunks
990 if (parser.cur_chunk != NULL) {
991 (void)m_renew(byte, parser.cur_chunk,
992 sizeof(mp_parse_chunk_t) + parser.cur_chunk->alloc,
993 sizeof(mp_parse_chunk_t) + parser.cur_chunk->union_.used);
994 parser.cur_chunk->alloc = parser.cur_chunk->union_.used;
995 parser.cur_chunk->union_.next = parser.tree.chunk;
996 parser.tree.chunk = parser.cur_chunk;
997 }
998
Damien Georgef8048332015-02-08 13:40:20 +0000999 mp_obj_t exc;
Damien George58ba4c32014-04-10 14:27:31 +00001000
Damien George64f2b212015-10-08 14:58:15 +01001001 if (parser.parse_error) {
1002 #if MICROPY_COMP_CONST
1003 if (parser.parse_error == PARSE_ERROR_CONST) {
1004 exc = mp_obj_new_exception_msg(&mp_type_SyntaxError,
1005 "constant must be an integer");
1006 } else
1007 #endif
1008 {
1009 assert(parser.parse_error == PARSE_ERROR_MEMORY);
1010 memory_error:
1011 exc = mp_obj_new_exception_msg(&mp_type_MemoryError,
1012 "parser could not allocate enough memory");
1013 }
Damien George58e0f4a2015-09-23 10:50:43 +01001014 parser.tree.root = MP_PARSE_NODE_NULL;
Damien Georgefdfcee72015-10-12 12:59:18 +01001015 } else if (
1016 lex->tok_kind != MP_TOKEN_END // check we are at the end of the token stream
1017 || parser.result_stack_top == 0 // check that we got a node (can fail on empty input)
1018 ) {
1019 syntax_error:
1020 if (lex->tok_kind == MP_TOKEN_INDENT) {
1021 exc = mp_obj_new_exception_msg(&mp_type_IndentationError,
1022 "unexpected indent");
1023 } else if (lex->tok_kind == MP_TOKEN_DEDENT_MISMATCH) {
1024 exc = mp_obj_new_exception_msg(&mp_type_IndentationError,
1025 "unindent does not match any outer indentation level");
1026 } else {
1027 exc = mp_obj_new_exception_msg(&mp_type_SyntaxError,
1028 "invalid syntax");
1029 }
1030 parser.tree.root = MP_PARSE_NODE_NULL;
1031 } else {
1032 // no errors
1033
1034 //result_stack_show(parser);
1035 //printf("rule stack alloc: %d\n", parser.rule_stack_alloc);
1036 //printf("result stack alloc: %d\n", parser.result_stack_alloc);
1037 //printf("number of parse nodes allocated: %d\n", num_parse_nodes_allocated);
1038
1039 // get the root parse node that we created
1040 assert(parser.result_stack_top == 1);
1041 exc = MP_OBJ_NULL;
1042 parser.tree.root = parser.result_stack[0];
Damien George58ba4c32014-04-10 14:27:31 +00001043 }
1044
Damien George69a818d2014-01-12 13:55:24 +00001045 // free the memory that we don't need anymore
Damien George1b82e9a2014-05-10 17:36:41 +01001046 m_del(rule_stack_t, parser.rule_stack, parser.rule_stack_alloc);
1047 m_del(mp_parse_node_t, parser.result_stack, parser.result_stack_alloc);
Damien George0bfc7632015-02-07 18:33:58 +00001048 // we also free the lexer on behalf of the caller (see below)
Damien George69a818d2014-01-12 13:55:24 +00001049
Damien George0bfc7632015-02-07 18:33:58 +00001050 if (exc != MP_OBJ_NULL) {
1051 // had an error so raise the exception
1052 // add traceback to give info about file name and location
1053 // we don't have a 'block' name, so just pass the NULL qstr to indicate this
1054 mp_obj_exception_add_traceback(exc, lex->source_name, lex->tok_line, MP_QSTR_NULL);
1055 mp_lexer_free(lex);
1056 nlr_raise(exc);
1057 } else {
1058 mp_lexer_free(lex);
Damien George58e0f4a2015-09-23 10:50:43 +01001059 return parser.tree;
Damien George0bfc7632015-02-07 18:33:58 +00001060 }
Damien429d7192013-10-04 19:53:11 +01001061}
Damien George58e0f4a2015-09-23 10:50:43 +01001062
1063void mp_parse_tree_clear(mp_parse_tree_t *tree) {
1064 mp_parse_chunk_t *chunk = tree->chunk;
1065 while (chunk != NULL) {
1066 mp_parse_chunk_t *next = chunk->union_.next;
1067 m_del(byte, chunk, sizeof(mp_parse_chunk_t) + chunk->alloc);
1068 chunk = next;
1069 }
1070}