blob: ae153732279539512e99443333de1e473082b3d7 [file] [log] [blame]
Damien George04b91472014-05-03 23:27:38 +01001/*
2 * This file is part of the Micro Python project, http://micropython.org/
3 *
4 * The MIT License (MIT)
5 *
Damien George4735c452015-04-21 16:43:18 +00006 * Copyright (c) 2013-2015 Damien P. George
Damien George04b91472014-05-03 23:27:38 +01007 *
8 * Permission is hereby granted, free of charge, to any person obtaining a copy
9 * of this software and associated documentation files (the "Software"), to deal
10 * in the Software without restriction, including without limitation the rights
11 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
12 * copies of the Software, and to permit persons to whom the Software is
13 * furnished to do so, subject to the following conditions:
14 *
15 * The above copyright notice and this permission notice shall be included in
16 * all copies or substantial portions of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
21 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
22 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
23 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
24 * THE SOFTWARE.
25 */
26
xbeefe34222014-03-16 00:14:26 -070027#include <stdbool.h>
Damien429d7192013-10-04 19:53:11 +010028#include <stdint.h>
29#include <stdio.h>
Damien429d7192013-10-04 19:53:11 +010030#include <assert.h>
Damien George5042bce2014-05-25 22:06:06 +010031#include <string.h>
Damien429d7192013-10-04 19:53:11 +010032
Damien George0bfc7632015-02-07 18:33:58 +000033#include "py/nlr.h"
Damien George51dfcb42015-01-01 20:27:54 +000034#include "py/lexer.h"
35#include "py/parse.h"
36#include "py/parsenum.h"
37#include "py/smallint.h"
Damien George64f2b212015-10-08 14:58:15 +010038#include "py/runtime.h"
39#include "py/builtin.h"
Damien429d7192013-10-04 19:53:11 +010040
Damien429d7192013-10-04 19:53:11 +010041#define RULE_ACT_ARG_MASK (0x0f)
Damien Georgeb47ea4e2014-12-20 18:37:50 +000042#define RULE_ACT_KIND_MASK (0x30)
43#define RULE_ACT_ALLOW_IDENT (0x40)
44#define RULE_ACT_ADD_BLANK (0x80)
Damien429d7192013-10-04 19:53:11 +010045#define RULE_ACT_OR (0x10)
46#define RULE_ACT_AND (0x20)
47#define RULE_ACT_LIST (0x30)
48
Damien429d7192013-10-04 19:53:11 +010049#define RULE_ARG_KIND_MASK (0xf000)
50#define RULE_ARG_ARG_MASK (0x0fff)
51#define RULE_ARG_TOK (0x1000)
52#define RULE_ARG_RULE (0x2000)
Damien George4735c452015-04-21 16:43:18 +000053#define RULE_ARG_OPT_RULE (0x3000)
Damien429d7192013-10-04 19:53:11 +010054
Damien Georgeb47ea4e2014-12-20 18:37:50 +000055#define ADD_BLANK_NODE(rule) ((rule->act & RULE_ACT_ADD_BLANK) != 0)
Damien Georgeb829b5c2014-01-25 13:51:19 +000056
Damien429d7192013-10-04 19:53:11 +010057// (un)comment to use rule names; for debugging
58//#define USE_RULE_NAME (1)
59
60typedef struct _rule_t {
61 byte rule_id;
62 byte act;
63#ifdef USE_RULE_NAME
64 const char *rule_name;
65#endif
66 uint16_t arg[];
67} rule_t;
68
69enum {
Damien George00208ce2014-01-23 00:00:53 +000070#define DEF_RULE(rule, comp, kind, ...) RULE_##rule,
Damien George51dfcb42015-01-01 20:27:54 +000071#include "py/grammar.h"
Damien429d7192013-10-04 19:53:11 +010072#undef DEF_RULE
73 RULE_maximum_number_of,
Damien George5042bce2014-05-25 22:06:06 +010074 RULE_string, // special node for non-interned string
Damien George4c81ba82015-01-13 16:21:23 +000075 RULE_bytes, // special node for non-interned bytes
Damien George7d414a12015-02-08 01:57:40 +000076 RULE_const_object, // special node for a constant, generic Python object
Damien429d7192013-10-04 19:53:11 +010077};
78
Damien Georgeb47ea4e2014-12-20 18:37:50 +000079#define ident (RULE_ACT_ALLOW_IDENT)
80#define blank (RULE_ACT_ADD_BLANK)
Damien429d7192013-10-04 19:53:11 +010081#define or(n) (RULE_ACT_OR | n)
82#define and(n) (RULE_ACT_AND | n)
83#define one_or_more (RULE_ACT_LIST | 2)
84#define list (RULE_ACT_LIST | 1)
85#define list_with_end (RULE_ACT_LIST | 3)
Damiend99b0522013-12-21 18:17:45 +000086#define tok(t) (RULE_ARG_TOK | MP_TOKEN_##t)
Damien429d7192013-10-04 19:53:11 +010087#define rule(r) (RULE_ARG_RULE | RULE_##r)
Damien429d7192013-10-04 19:53:11 +010088#define opt_rule(r) (RULE_ARG_OPT_RULE | RULE_##r)
89#ifdef USE_RULE_NAME
Damien George00208ce2014-01-23 00:00:53 +000090#define DEF_RULE(rule, comp, kind, ...) static const rule_t rule_##rule = { RULE_##rule, kind, #rule, { __VA_ARGS__ } };
Damien429d7192013-10-04 19:53:11 +010091#else
Damien George00208ce2014-01-23 00:00:53 +000092#define DEF_RULE(rule, comp, kind, ...) static const rule_t rule_##rule = { RULE_##rule, kind, { __VA_ARGS__ } };
Damien429d7192013-10-04 19:53:11 +010093#endif
Damien George51dfcb42015-01-01 20:27:54 +000094#include "py/grammar.h"
Damien429d7192013-10-04 19:53:11 +010095#undef or
96#undef and
97#undef list
98#undef list_with_end
99#undef tok
100#undef rule
Damien429d7192013-10-04 19:53:11 +0100101#undef opt_rule
102#undef one_or_more
103#undef DEF_RULE
104
Paul Sokolovsky520e2f52014-02-12 18:31:30 +0200105STATIC const rule_t *rules[] = {
Damien George00208ce2014-01-23 00:00:53 +0000106#define DEF_RULE(rule, comp, kind, ...) &rule_##rule,
Damien George51dfcb42015-01-01 20:27:54 +0000107#include "py/grammar.h"
Damien429d7192013-10-04 19:53:11 +0100108#undef DEF_RULE
109};
110
111typedef struct _rule_stack_t {
Damien George16a6a472015-12-17 13:06:05 +0000112 size_t src_line : 8 * sizeof(size_t) - 8; // maximum bits storing source line number
113 size_t rule_id : 8; // this must be large enough to fit largest rule number
114 size_t arg_i; // this dictates the maximum nodes in a "list" of things
Damien429d7192013-10-04 19:53:11 +0100115} rule_stack_t;
116
Damien George58e0f4a2015-09-23 10:50:43 +0100117typedef struct _mp_parse_chunk_t {
Damien George16a6a472015-12-17 13:06:05 +0000118 size_t alloc;
Damien George58e0f4a2015-09-23 10:50:43 +0100119 union {
Damien George16a6a472015-12-17 13:06:05 +0000120 size_t used;
Damien George58e0f4a2015-09-23 10:50:43 +0100121 struct _mp_parse_chunk_t *next;
122 } union_;
123 byte data[];
124} mp_parse_chunk_t;
125
Damien George64f2b212015-10-08 14:58:15 +0100126typedef enum {
127 PARSE_ERROR_NONE = 0,
128 PARSE_ERROR_MEMORY,
129 PARSE_ERROR_CONST,
130} parse_error_t;
131
Damien429d7192013-10-04 19:53:11 +0100132typedef struct _parser_t {
Damien George64f2b212015-10-08 14:58:15 +0100133 parse_error_t parse_error;
Damien George58ba4c32014-04-10 14:27:31 +0000134
Damien George16a6a472015-12-17 13:06:05 +0000135 size_t rule_stack_alloc;
136 size_t rule_stack_top;
Damien429d7192013-10-04 19:53:11 +0100137 rule_stack_t *rule_stack;
138
Damien George16a6a472015-12-17 13:06:05 +0000139 size_t result_stack_alloc;
140 size_t result_stack_top;
Damiend99b0522013-12-21 18:17:45 +0000141 mp_parse_node_t *result_stack;
Damien George08335002014-01-18 23:24:36 +0000142
143 mp_lexer_t *lexer;
Damien George58e0f4a2015-09-23 10:50:43 +0100144
145 mp_parse_tree_t tree;
146 mp_parse_chunk_t *cur_chunk;
Damien429d7192013-10-04 19:53:11 +0100147
Damien George64f2b212015-10-08 14:58:15 +0100148 #if MICROPY_COMP_CONST
149 mp_map_t consts;
150 #endif
151} parser_t;
Damien George58ba4c32014-04-10 14:27:31 +0000152
Damien George58e0f4a2015-09-23 10:50:43 +0100153STATIC void *parser_alloc(parser_t *parser, size_t num_bytes) {
154 // use a custom memory allocator to store parse nodes sequentially in large chunks
155
156 mp_parse_chunk_t *chunk = parser->cur_chunk;
157
158 if (chunk != NULL && chunk->union_.used + num_bytes > chunk->alloc) {
159 // not enough room at end of previously allocated chunk so try to grow
160 mp_parse_chunk_t *new_data = (mp_parse_chunk_t*)m_renew_maybe(byte, chunk,
161 sizeof(mp_parse_chunk_t) + chunk->alloc,
162 sizeof(mp_parse_chunk_t) + chunk->alloc + num_bytes, false);
163 if (new_data == NULL) {
164 // could not grow existing memory; shrink it to fit previous
165 (void)m_renew(byte, chunk, sizeof(mp_parse_chunk_t) + chunk->alloc,
166 sizeof(mp_parse_chunk_t) + chunk->union_.used);
167 chunk->alloc = chunk->union_.used;
168 chunk->union_.next = parser->tree.chunk;
169 parser->tree.chunk = chunk;
170 chunk = NULL;
171 } else {
172 // could grow existing memory
173 chunk->alloc += num_bytes;
174 }
175 }
176
177 if (chunk == NULL) {
178 // no previous chunk, allocate a new chunk
179 size_t alloc = MICROPY_ALLOC_PARSE_CHUNK_INIT;
180 if (alloc < num_bytes) {
181 alloc = num_bytes;
182 }
183 chunk = (mp_parse_chunk_t*)m_new(byte, sizeof(mp_parse_chunk_t) + alloc);
184 chunk->alloc = alloc;
185 chunk->union_.used = 0;
186 parser->cur_chunk = chunk;
187 }
188
189 byte *ret = chunk->data + chunk->union_.used;
190 chunk->union_.used += num_bytes;
191 return ret;
192}
193
Damien George16a6a472015-12-17 13:06:05 +0000194STATIC void push_rule(parser_t *parser, size_t src_line, const rule_t *rule, size_t arg_i) {
Damien George64f2b212015-10-08 14:58:15 +0100195 if (parser->parse_error) {
Damien George58ba4c32014-04-10 14:27:31 +0000196 return;
197 }
Damien429d7192013-10-04 19:53:11 +0100198 if (parser->rule_stack_top >= parser->rule_stack_alloc) {
Damien Georgeade9a052015-06-13 21:53:22 +0100199 rule_stack_t *rs = m_renew_maybe(rule_stack_t, parser->rule_stack, parser->rule_stack_alloc, parser->rule_stack_alloc + MICROPY_ALLOC_PARSE_RULE_INC, true);
Damien George58ba4c32014-04-10 14:27:31 +0000200 if (rs == NULL) {
Damien George64f2b212015-10-08 14:58:15 +0100201 parser->parse_error = PARSE_ERROR_MEMORY;
Damien George58ba4c32014-04-10 14:27:31 +0000202 return;
203 }
204 parser->rule_stack = rs;
Damien George58ebde42014-05-21 20:32:59 +0100205 parser->rule_stack_alloc += MICROPY_ALLOC_PARSE_RULE_INC;
Damien429d7192013-10-04 19:53:11 +0100206 }
Damien George08335002014-01-18 23:24:36 +0000207 rule_stack_t *rs = &parser->rule_stack[parser->rule_stack_top++];
208 rs->src_line = src_line;
209 rs->rule_id = rule->rule_id;
210 rs->arg_i = arg_i;
Damien429d7192013-10-04 19:53:11 +0100211}
212
Damien George16a6a472015-12-17 13:06:05 +0000213STATIC void push_rule_from_arg(parser_t *parser, size_t arg) {
Damien429d7192013-10-04 19:53:11 +0100214 assert((arg & RULE_ARG_KIND_MASK) == RULE_ARG_RULE || (arg & RULE_ARG_KIND_MASK) == RULE_ARG_OPT_RULE);
Damien George16a6a472015-12-17 13:06:05 +0000215 size_t rule_id = arg & RULE_ARG_ARG_MASK;
Damien429d7192013-10-04 19:53:11 +0100216 assert(rule_id < RULE_maximum_number_of);
Damien Georgea4c52c52014-12-05 19:35:18 +0000217 push_rule(parser, parser->lexer->tok_line, rules[rule_id], 0);
Damien429d7192013-10-04 19:53:11 +0100218}
219
Damien George16a6a472015-12-17 13:06:05 +0000220STATIC void pop_rule(parser_t *parser, const rule_t **rule, size_t *arg_i, size_t *src_line) {
Damien George64f2b212015-10-08 14:58:15 +0100221 assert(!parser->parse_error);
Damien429d7192013-10-04 19:53:11 +0100222 parser->rule_stack_top -= 1;
223 *rule = rules[parser->rule_stack[parser->rule_stack_top].rule_id];
224 *arg_i = parser->rule_stack[parser->rule_stack_top].arg_i;
Damien George08335002014-01-18 23:24:36 +0000225 *src_line = parser->rule_stack[parser->rule_stack_top].src_line;
Damien429d7192013-10-04 19:53:11 +0100226}
227
Damien George16a6a472015-12-17 13:06:05 +0000228mp_parse_node_t mp_parse_node_new_leaf(size_t kind, mp_int_t arg) {
Paul Sokolovsky56e5ef22014-02-22 16:39:45 +0200229 if (kind == MP_PARSE_NODE_SMALL_INT) {
230 return (mp_parse_node_t)(kind | (arg << 1));
231 }
Damien George7d414a12015-02-08 01:57:40 +0000232 return (mp_parse_node_t)(kind | (arg << 4));
Damien429d7192013-10-04 19:53:11 +0100233}
234
Damien George16a6a472015-12-17 13:06:05 +0000235int mp_parse_node_extract_list(mp_parse_node_t *pn, size_t pn_kind, mp_parse_node_t **nodes) {
Damien Georgedfe944c2015-02-13 02:29:46 +0000236 if (MP_PARSE_NODE_IS_NULL(*pn)) {
237 *nodes = NULL;
238 return 0;
239 } else if (MP_PARSE_NODE_IS_LEAF(*pn)) {
240 *nodes = pn;
241 return 1;
242 } else {
243 mp_parse_node_struct_t *pns = (mp_parse_node_struct_t*)(*pn);
244 if (MP_PARSE_NODE_STRUCT_KIND(pns) != pn_kind) {
245 *nodes = pn;
246 return 1;
247 } else {
248 *nodes = pns->nodes;
249 return MP_PARSE_NODE_STRUCT_NUM_NODES(pns);
250 }
251 }
252}
253
Damien Georgecbd2f742014-01-19 11:48:48 +0000254#if MICROPY_DEBUG_PRINTERS
Damien George16a6a472015-12-17 13:06:05 +0000255void mp_parse_node_print(mp_parse_node_t pn, size_t indent) {
Damien George08335002014-01-18 23:24:36 +0000256 if (MP_PARSE_NODE_IS_STRUCT(pn)) {
257 printf("[% 4d] ", (int)((mp_parse_node_struct_t*)pn)->source_line);
258 } else {
259 printf(" ");
260 }
Damien George16a6a472015-12-17 13:06:05 +0000261 for (size_t i = 0; i < indent; i++) {
Damien429d7192013-10-04 19:53:11 +0100262 printf(" ");
263 }
Damiend99b0522013-12-21 18:17:45 +0000264 if (MP_PARSE_NODE_IS_NULL(pn)) {
Damien429d7192013-10-04 19:53:11 +0100265 printf("NULL\n");
Paul Sokolovsky56e5ef22014-02-22 16:39:45 +0200266 } else if (MP_PARSE_NODE_IS_SMALL_INT(pn)) {
Damien George40f3c022014-07-03 13:25:24 +0100267 mp_int_t arg = MP_PARSE_NODE_LEAF_SMALL_INT(pn);
Paul Sokolovsky56e5ef22014-02-22 16:39:45 +0200268 printf("int(" INT_FMT ")\n", arg);
Damiend99b0522013-12-21 18:17:45 +0000269 } else if (MP_PARSE_NODE_IS_LEAF(pn)) {
Damien George16a6a472015-12-17 13:06:05 +0000270 uintptr_t arg = MP_PARSE_NODE_LEAF_ARG(pn);
Damiend99b0522013-12-21 18:17:45 +0000271 switch (MP_PARSE_NODE_LEAF_KIND(pn)) {
272 case MP_PARSE_NODE_ID: printf("id(%s)\n", qstr_str(arg)); break;
Damiend99b0522013-12-21 18:17:45 +0000273 case MP_PARSE_NODE_STRING: printf("str(%s)\n", qstr_str(arg)); break;
274 case MP_PARSE_NODE_BYTES: printf("bytes(%s)\n", qstr_str(arg)); break;
Damien George16a6a472015-12-17 13:06:05 +0000275 case MP_PARSE_NODE_TOKEN: printf("tok(%u)\n", (uint)arg); break;
Damien429d7192013-10-04 19:53:11 +0100276 default: assert(0);
277 }
278 } else {
Damien George5042bce2014-05-25 22:06:06 +0100279 // node must be a mp_parse_node_struct_t
Damien Georgeb829b5c2014-01-25 13:51:19 +0000280 mp_parse_node_struct_t *pns = (mp_parse_node_struct_t*)pn;
Damien George5042bce2014-05-25 22:06:06 +0100281 if (MP_PARSE_NODE_STRUCT_KIND(pns) == RULE_string) {
282 printf("literal str(%.*s)\n", (int)pns->nodes[1], (char*)pns->nodes[0]);
Damien George4c81ba82015-01-13 16:21:23 +0000283 } else if (MP_PARSE_NODE_STRUCT_KIND(pns) == RULE_bytes) {
284 printf("literal bytes(%.*s)\n", (int)pns->nodes[1], (char*)pns->nodes[0]);
Damien George7d414a12015-02-08 01:57:40 +0000285 } else if (MP_PARSE_NODE_STRUCT_KIND(pns) == RULE_const_object) {
Damien Georgeb8cfb0d2015-11-27 17:09:11 +0000286 #if MICROPY_OBJ_REPR == MICROPY_OBJ_REPR_D
287 printf("literal const(%016llx)\n", (uint64_t)pns->nodes[0] | ((uint64_t)pns->nodes[1] << 32));
288 #else
Damien George7d414a12015-02-08 01:57:40 +0000289 printf("literal const(%p)\n", (mp_obj_t)pns->nodes[0]);
Damien Georgeb8cfb0d2015-11-27 17:09:11 +0000290 #endif
Damien George5042bce2014-05-25 22:06:06 +0100291 } else {
Damien George16a6a472015-12-17 13:06:05 +0000292 size_t n = MP_PARSE_NODE_STRUCT_NUM_NODES(pns);
Damien429d7192013-10-04 19:53:11 +0100293#ifdef USE_RULE_NAME
Damien George16a6a472015-12-17 13:06:05 +0000294 printf("%s(%u) (n=%u)\n", rules[MP_PARSE_NODE_STRUCT_KIND(pns)]->rule_name, (uint)MP_PARSE_NODE_STRUCT_KIND(pns), (uint)n);
Damien429d7192013-10-04 19:53:11 +0100295#else
Damien George16a6a472015-12-17 13:06:05 +0000296 printf("rule(%u) (n=%u)\n", (uint)MP_PARSE_NODE_STRUCT_KIND(pns), (uint)n);
Damien429d7192013-10-04 19:53:11 +0100297#endif
Damien George16a6a472015-12-17 13:06:05 +0000298 for (size_t i = 0; i < n; i++) {
Damien George5042bce2014-05-25 22:06:06 +0100299 mp_parse_node_print(pns->nodes[i], indent + 2);
300 }
Damien429d7192013-10-04 19:53:11 +0100301 }
302 }
303}
Damien Georgecbd2f742014-01-19 11:48:48 +0000304#endif // MICROPY_DEBUG_PRINTERS
Damien429d7192013-10-04 19:53:11 +0100305
306/*
Paul Sokolovsky520e2f52014-02-12 18:31:30 +0200307STATIC void result_stack_show(parser_t *parser) {
Damien429d7192013-10-04 19:53:11 +0100308 printf("result stack, most recent first\n");
Damien George16a6a472015-12-17 13:06:05 +0000309 for (ssize_t i = parser->result_stack_top - 1; i >= 0; i--) {
Damien Georgecbd2f742014-01-19 11:48:48 +0000310 mp_parse_node_print(parser->result_stack[i], 0);
Damien429d7192013-10-04 19:53:11 +0100311 }
312}
313*/
314
Paul Sokolovsky520e2f52014-02-12 18:31:30 +0200315STATIC mp_parse_node_t pop_result(parser_t *parser) {
Damien George64f2b212015-10-08 14:58:15 +0100316 if (parser->parse_error) {
Damien George58ba4c32014-04-10 14:27:31 +0000317 return MP_PARSE_NODE_NULL;
318 }
Damien429d7192013-10-04 19:53:11 +0100319 assert(parser->result_stack_top > 0);
320 return parser->result_stack[--parser->result_stack_top];
321}
322
Damien George16a6a472015-12-17 13:06:05 +0000323STATIC mp_parse_node_t peek_result(parser_t *parser, size_t pos) {
Damien George64f2b212015-10-08 14:58:15 +0100324 if (parser->parse_error) {
Damien George58ba4c32014-04-10 14:27:31 +0000325 return MP_PARSE_NODE_NULL;
326 }
Damien429d7192013-10-04 19:53:11 +0100327 assert(parser->result_stack_top > pos);
328 return parser->result_stack[parser->result_stack_top - 1 - pos];
329}
330
Paul Sokolovsky520e2f52014-02-12 18:31:30 +0200331STATIC void push_result_node(parser_t *parser, mp_parse_node_t pn) {
Damien George64f2b212015-10-08 14:58:15 +0100332 if (parser->parse_error) {
Damien George58ba4c32014-04-10 14:27:31 +0000333 return;
334 }
Damien George69a818d2014-01-12 13:55:24 +0000335 if (parser->result_stack_top >= parser->result_stack_alloc) {
Damien Georgeade9a052015-06-13 21:53:22 +0100336 mp_parse_node_t *stack = m_renew_maybe(mp_parse_node_t, parser->result_stack, parser->result_stack_alloc, parser->result_stack_alloc + MICROPY_ALLOC_PARSE_RESULT_INC, true);
Damien George50912e72015-01-20 11:55:10 +0000337 if (stack == NULL) {
Damien George64f2b212015-10-08 14:58:15 +0100338 parser->parse_error = PARSE_ERROR_MEMORY;
Damien George58ba4c32014-04-10 14:27:31 +0000339 return;
340 }
Damien George50912e72015-01-20 11:55:10 +0000341 parser->result_stack = stack;
Damien George58ebde42014-05-21 20:32:59 +0100342 parser->result_stack_alloc += MICROPY_ALLOC_PARSE_RESULT_INC;
Damien George69a818d2014-01-12 13:55:24 +0000343 }
Damien429d7192013-10-04 19:53:11 +0100344 parser->result_stack[parser->result_stack_top++] = pn;
345}
346
Damien George16a6a472015-12-17 13:06:05 +0000347STATIC mp_parse_node_t make_node_string_bytes(parser_t *parser, size_t src_line, size_t rule_kind, const char *str, size_t len) {
Damien George58e0f4a2015-09-23 10:50:43 +0100348 mp_parse_node_struct_t *pn = parser_alloc(parser, sizeof(mp_parse_node_struct_t) + sizeof(mp_parse_node_t) * 2);
Damien George5042bce2014-05-25 22:06:06 +0100349 if (pn == NULL) {
Damien George64f2b212015-10-08 14:58:15 +0100350 parser->parse_error = PARSE_ERROR_MEMORY;
Damien George7d414a12015-02-08 01:57:40 +0000351 return MP_PARSE_NODE_NULL;
Damien George5042bce2014-05-25 22:06:06 +0100352 }
353 pn->source_line = src_line;
Damien George4c81ba82015-01-13 16:21:23 +0000354 pn->kind_num_nodes = rule_kind | (2 << 8);
Damien George5042bce2014-05-25 22:06:06 +0100355 char *p = m_new(char, len);
356 memcpy(p, str, len);
Damien George999cedb2015-11-27 17:01:44 +0000357 pn->nodes[0] = (uintptr_t)p;
Damien George5042bce2014-05-25 22:06:06 +0100358 pn->nodes[1] = len;
Damien George7d414a12015-02-08 01:57:40 +0000359 return (mp_parse_node_t)pn;
360}
361
Damien George16a6a472015-12-17 13:06:05 +0000362STATIC mp_parse_node_t make_node_const_object(parser_t *parser, size_t src_line, mp_obj_t obj) {
Damien George999cedb2015-11-27 17:01:44 +0000363 mp_parse_node_struct_t *pn = parser_alloc(parser, sizeof(mp_parse_node_struct_t) + sizeof(mp_obj_t));
Damien George7d414a12015-02-08 01:57:40 +0000364 if (pn == NULL) {
Damien George64f2b212015-10-08 14:58:15 +0100365 parser->parse_error = PARSE_ERROR_MEMORY;
Damien George7d414a12015-02-08 01:57:40 +0000366 return MP_PARSE_NODE_NULL;
367 }
368 pn->source_line = src_line;
Damien Georgeb8cfb0d2015-11-27 17:09:11 +0000369 #if MICROPY_OBJ_REPR == MICROPY_OBJ_REPR_D
370 // nodes are 32-bit pointers, but need to store 64-bit object
371 pn->kind_num_nodes = RULE_const_object | (2 << 8);
372 pn->nodes[0] = (uint64_t)obj;
373 pn->nodes[1] = (uint64_t)obj >> 32;
374 #else
Damien George7d414a12015-02-08 01:57:40 +0000375 pn->kind_num_nodes = RULE_const_object | (1 << 8);
Damien George16a6a472015-12-17 13:06:05 +0000376 pn->nodes[0] = (uintptr_t)obj;
Damien Georgeb8cfb0d2015-11-27 17:09:11 +0000377 #endif
Damien George7d414a12015-02-08 01:57:40 +0000378 return (mp_parse_node_t)pn;
Damien George5042bce2014-05-25 22:06:06 +0100379}
Paul Sokolovsky9e76b112014-05-08 22:43:46 +0300380
Damien Georgea4c52c52014-12-05 19:35:18 +0000381STATIC void push_result_token(parser_t *parser) {
Damiend99b0522013-12-21 18:17:45 +0000382 mp_parse_node_t pn;
Damien Georgea4c52c52014-12-05 19:35:18 +0000383 mp_lexer_t *lex = parser->lexer;
384 if (lex->tok_kind == MP_TOKEN_NAME) {
Damien George64f2b212015-10-08 14:58:15 +0100385 qstr id = qstr_from_strn(lex->vstr.buf, lex->vstr.len);
386 #if MICROPY_COMP_CONST
387 // lookup identifier in table of dynamic constants
388 mp_map_elem_t *elem = mp_map_lookup(&parser->consts, MP_OBJ_NEW_QSTR(id), MP_MAP_LOOKUP);
389 if (elem != NULL) {
390 pn = mp_parse_node_new_leaf(MP_PARSE_NODE_SMALL_INT, MP_OBJ_SMALL_INT_VALUE(elem->value));
391 } else
392 #endif
393 {
394 pn = mp_parse_node_new_leaf(MP_PARSE_NODE_ID, id);
395 }
Damien George7d414a12015-02-08 01:57:40 +0000396 } else if (lex->tok_kind == MP_TOKEN_INTEGER) {
397 mp_obj_t o = mp_parse_num_integer(lex->vstr.buf, lex->vstr.len, 0, lex);
398 if (MP_OBJ_IS_SMALL_INT(o)) {
399 pn = mp_parse_node_new_leaf(MP_PARSE_NODE_SMALL_INT, MP_OBJ_SMALL_INT_VALUE(o));
Damien429d7192013-10-04 19:53:11 +0100400 } else {
Damien George7d414a12015-02-08 01:57:40 +0000401 pn = make_node_const_object(parser, lex->tok_line, o);
Damien429d7192013-10-04 19:53:11 +0100402 }
Damien George7d414a12015-02-08 01:57:40 +0000403 } else if (lex->tok_kind == MP_TOKEN_FLOAT_OR_IMAG) {
404 mp_obj_t o = mp_parse_num_decimal(lex->vstr.buf, lex->vstr.len, true, false, lex);
405 pn = make_node_const_object(parser, lex->tok_line, o);
Damien George4c81ba82015-01-13 16:21:23 +0000406 } else if (lex->tok_kind == MP_TOKEN_STRING || lex->tok_kind == MP_TOKEN_BYTES) {
407 // Don't automatically intern all strings/bytes. doc strings (which are usually large)
Damien George5042bce2014-05-25 22:06:06 +0100408 // will be discarded by the compiler, and so we shouldn't intern them.
409 qstr qst = MP_QSTR_NULL;
Damien Georgea4c52c52014-12-05 19:35:18 +0000410 if (lex->vstr.len <= MICROPY_ALLOC_PARSE_INTERN_STRING_LEN) {
Damien George5042bce2014-05-25 22:06:06 +0100411 // intern short strings
Damien Georgea4c52c52014-12-05 19:35:18 +0000412 qst = qstr_from_strn(lex->vstr.buf, lex->vstr.len);
Damien George5042bce2014-05-25 22:06:06 +0100413 } else {
414 // check if this string is already interned
Damien Georgea4c52c52014-12-05 19:35:18 +0000415 qst = qstr_find_strn(lex->vstr.buf, lex->vstr.len);
Damien George5042bce2014-05-25 22:06:06 +0100416 }
417 if (qst != MP_QSTR_NULL) {
418 // qstr exists, make a leaf node
Damien George4c81ba82015-01-13 16:21:23 +0000419 pn = mp_parse_node_new_leaf(lex->tok_kind == MP_TOKEN_STRING ? MP_PARSE_NODE_STRING : MP_PARSE_NODE_BYTES, qst);
Damien George5042bce2014-05-25 22:06:06 +0100420 } else {
Damien George4c81ba82015-01-13 16:21:23 +0000421 // not interned, make a node holding a pointer to the string/bytes data
Damien George7d414a12015-02-08 01:57:40 +0000422 pn = make_node_string_bytes(parser, lex->tok_line, lex->tok_kind == MP_TOKEN_STRING ? RULE_string : RULE_bytes, lex->vstr.buf, lex->vstr.len);
Damien George5042bce2014-05-25 22:06:06 +0100423 }
Damien429d7192013-10-04 19:53:11 +0100424 } else {
Damien Georgea4c52c52014-12-05 19:35:18 +0000425 pn = mp_parse_node_new_leaf(MP_PARSE_NODE_TOKEN, lex->tok_kind);
Damien429d7192013-10-04 19:53:11 +0100426 }
427 push_result_node(parser, pn);
428}
429
Damien George64f2b212015-10-08 14:58:15 +0100430#if MICROPY_COMP_MODULE_CONST
Damien Georgecbf76742015-11-27 13:38:15 +0000431STATIC const mp_rom_map_elem_t mp_constants_table[] = {
Damien George64f2b212015-10-08 14:58:15 +0100432 #if MICROPY_PY_UCTYPES
Damien Georgecbf76742015-11-27 13:38:15 +0000433 { MP_ROM_QSTR(MP_QSTR_uctypes), MP_ROM_PTR(&mp_module_uctypes) },
Damien George64f2b212015-10-08 14:58:15 +0100434 #endif
435 // Extra constants as defined by a port
436 MICROPY_PORT_CONSTANTS
437};
438STATIC MP_DEFINE_CONST_MAP(mp_constants_map, mp_constants_table);
439#endif
440
441#if MICROPY_COMP_CONST_FOLDING
Damien George16a6a472015-12-17 13:06:05 +0000442STATIC bool fold_constants(parser_t *parser, const rule_t *rule, size_t num_args) {
Damien George64f2b212015-10-08 14:58:15 +0100443 // this code does folding of arbitrary integer expressions, eg 1 + 2 * 3 + 4
444 // it does not do partial folding, eg 1 + 2 + x -> 3 + x
445
446 mp_int_t arg0;
447 if (rule->rule_id == RULE_expr
448 || rule->rule_id == RULE_xor_expr
449 || rule->rule_id == RULE_and_expr) {
450 // folding for binary ops: | ^ &
451 mp_parse_node_t pn = peek_result(parser, num_args - 1);
452 if (!MP_PARSE_NODE_IS_SMALL_INT(pn)) {
453 return false;
454 }
455 arg0 = MP_PARSE_NODE_LEAF_SMALL_INT(pn);
Damien George16a6a472015-12-17 13:06:05 +0000456 for (ssize_t i = num_args - 2; i >= 0; --i) {
Damien George64f2b212015-10-08 14:58:15 +0100457 pn = peek_result(parser, i);
458 if (!MP_PARSE_NODE_IS_SMALL_INT(pn)) {
459 return false;
460 }
461 mp_int_t arg1 = MP_PARSE_NODE_LEAF_SMALL_INT(pn);
462 if (rule->rule_id == RULE_expr) {
463 // int | int
464 arg0 |= arg1;
465 } else if (rule->rule_id == RULE_xor_expr) {
466 // int ^ int
467 arg0 ^= arg1;
468 } else if (rule->rule_id == RULE_and_expr) {
469 // int & int
470 arg0 &= arg1;
471 }
472 }
473 } else if (rule->rule_id == RULE_shift_expr
474 || rule->rule_id == RULE_arith_expr
475 || rule->rule_id == RULE_term) {
476 // folding for binary ops: << >> + - * / % //
477 mp_parse_node_t pn = peek_result(parser, num_args - 1);
478 if (!MP_PARSE_NODE_IS_SMALL_INT(pn)) {
479 return false;
480 }
481 arg0 = MP_PARSE_NODE_LEAF_SMALL_INT(pn);
Damien George16a6a472015-12-17 13:06:05 +0000482 for (ssize_t i = num_args - 2; i >= 1; i -= 2) {
Damien George64f2b212015-10-08 14:58:15 +0100483 pn = peek_result(parser, i - 1);
484 if (!MP_PARSE_NODE_IS_SMALL_INT(pn)) {
485 return false;
486 }
487 mp_int_t arg1 = MP_PARSE_NODE_LEAF_SMALL_INT(pn);
488 mp_token_kind_t tok = MP_PARSE_NODE_LEAF_ARG(peek_result(parser, i));
489 if (tok == MP_TOKEN_OP_DBL_LESS) {
490 // int << int
491 if (arg1 >= (mp_int_t)BITS_PER_WORD
492 || arg0 > (MP_SMALL_INT_MAX >> arg1)
493 || arg0 < (MP_SMALL_INT_MIN >> arg1)) {
494 return false;
495 }
496 arg0 <<= arg1;
497 } else if (tok == MP_TOKEN_OP_DBL_MORE) {
498 // int >> int
499 if (arg1 >= (mp_int_t)BITS_PER_WORD) {
500 // Shifting to big amounts is underfined behavior
501 // in C and is CPU-dependent; propagate sign bit.
502 arg1 = BITS_PER_WORD - 1;
503 }
504 arg0 >>= arg1;
505 } else if (tok == MP_TOKEN_OP_PLUS) {
506 // int + int
507 arg0 += arg1;
508 } else if (tok == MP_TOKEN_OP_MINUS) {
509 // int - int
510 arg0 -= arg1;
511 } else if (tok == MP_TOKEN_OP_STAR) {
512 // int * int
513 if (mp_small_int_mul_overflow(arg0, arg1)) {
514 return false;
515 }
516 arg0 *= arg1;
517 } else if (tok == MP_TOKEN_OP_SLASH) {
518 // int / int
519 return false;
520 } else if (tok == MP_TOKEN_OP_PERCENT) {
521 // int % int
522 if (arg1 == 0) {
523 return false;
524 }
525 arg0 = mp_small_int_modulo(arg0, arg1);
526 } else {
527 assert(tok == MP_TOKEN_OP_DBL_SLASH); // should be
528 // int // int
529 if (arg1 == 0) {
530 return false;
531 }
532 arg0 = mp_small_int_floor_divide(arg0, arg1);
533 }
534 if (!MP_SMALL_INT_FITS(arg0)) {
535 return false;
536 }
537 }
538 } else if (rule->rule_id == RULE_factor_2) {
539 // folding for unary ops: + - ~
540 mp_parse_node_t pn = peek_result(parser, 0);
541 if (!MP_PARSE_NODE_IS_SMALL_INT(pn)) {
542 return false;
543 }
544 arg0 = MP_PARSE_NODE_LEAF_SMALL_INT(pn);
545 mp_token_kind_t tok = MP_PARSE_NODE_LEAF_ARG(peek_result(parser, 1));
546 if (tok == MP_TOKEN_OP_PLUS) {
547 // +int
548 } else if (tok == MP_TOKEN_OP_MINUS) {
549 // -int
550 arg0 = -arg0;
551 if (!MP_SMALL_INT_FITS(arg0)) {
552 return false;
553 }
554 } else {
555 assert(tok == MP_TOKEN_OP_TILDE); // should be
556 // ~int
557 arg0 = ~arg0;
558 }
559
560 #if MICROPY_COMP_CONST
561 } else if (rule->rule_id == RULE_expr_stmt) {
562 mp_parse_node_t pn1 = peek_result(parser, 0);
563 if (!MP_PARSE_NODE_IS_NULL(pn1)
564 && !(MP_PARSE_NODE_IS_STRUCT_KIND(pn1, RULE_expr_stmt_augassign)
565 || MP_PARSE_NODE_IS_STRUCT_KIND(pn1, RULE_expr_stmt_assign_list))) {
566 // this node is of the form <x> = <y>
567 mp_parse_node_t pn0 = peek_result(parser, 1);
568 if (MP_PARSE_NODE_IS_ID(pn0)
569 && MP_PARSE_NODE_IS_STRUCT_KIND(pn1, RULE_power)
570 && MP_PARSE_NODE_IS_ID(((mp_parse_node_struct_t*)pn1)->nodes[0])
571 && MP_PARSE_NODE_LEAF_ARG(((mp_parse_node_struct_t*)pn1)->nodes[0]) == MP_QSTR_const
572 && MP_PARSE_NODE_IS_STRUCT_KIND(((mp_parse_node_struct_t*)pn1)->nodes[1], RULE_trailer_paren)
573 && MP_PARSE_NODE_IS_NULL(((mp_parse_node_struct_t*)pn1)->nodes[2])
574 ) {
575 // code to assign dynamic constants: id = const(value)
576
577 // get the id
578 qstr id = MP_PARSE_NODE_LEAF_ARG(pn0);
579
580 // get the value
581 mp_parse_node_t pn_value = ((mp_parse_node_struct_t*)((mp_parse_node_struct_t*)pn1)->nodes[1])->nodes[0];
582 if (!MP_PARSE_NODE_IS_SMALL_INT(pn_value)) {
583 parser->parse_error = PARSE_ERROR_CONST;
584 return false;
585 }
586 mp_int_t value = MP_PARSE_NODE_LEAF_SMALL_INT(pn_value);
587
588 // store the value in the table of dynamic constants
589 mp_map_elem_t *elem = mp_map_lookup(&parser->consts, MP_OBJ_NEW_QSTR(id), MP_MAP_LOOKUP_ADD_IF_NOT_FOUND);
590 assert(elem->value == MP_OBJ_NULL);
591 elem->value = MP_OBJ_NEW_SMALL_INT(value);
592
593 // replace const(value) with value
594 pop_result(parser);
595 push_result_node(parser, pn_value);
596
597 // finished folding this assignment, but we still want it to be part of the tree
598 return false;
599 }
600 }
601 return false;
602 #endif
603
604 #if MICROPY_COMP_MODULE_CONST
605 } else if (rule->rule_id == RULE_power) {
606 mp_parse_node_t pn0 = peek_result(parser, 2);
607 mp_parse_node_t pn1 = peek_result(parser, 1);
608 mp_parse_node_t pn2 = peek_result(parser, 0);
609 if (!(MP_PARSE_NODE_IS_ID(pn0)
610 && MP_PARSE_NODE_IS_STRUCT_KIND(pn1, RULE_trailer_period)
611 && MP_PARSE_NODE_IS_NULL(pn2))) {
612 return false;
613 }
614 // id1.id2
615 // look it up in constant table, see if it can be replaced with an integer
616 mp_parse_node_struct_t *pns1 = (mp_parse_node_struct_t*)pn1;
617 assert(MP_PARSE_NODE_IS_ID(pns1->nodes[0]));
618 qstr q_base = MP_PARSE_NODE_LEAF_ARG(pn0);
619 qstr q_attr = MP_PARSE_NODE_LEAF_ARG(pns1->nodes[0]);
620 mp_map_elem_t *elem = mp_map_lookup((mp_map_t*)&mp_constants_map, MP_OBJ_NEW_QSTR(q_base), MP_MAP_LOOKUP);
621 if (elem == NULL) {
622 return false;
623 }
624 mp_obj_t dest[2];
625 mp_load_method_maybe(elem->value, q_attr, dest);
Damien George999cedb2015-11-27 17:01:44 +0000626 if (!(MP_OBJ_IS_SMALL_INT(dest[0]) && dest[1] == MP_OBJ_NULL)) {
Damien George64f2b212015-10-08 14:58:15 +0100627 return false;
628 }
629 arg0 = MP_OBJ_SMALL_INT_VALUE(dest[0]);
630 #endif
631
632 } else {
633 return false;
634 }
635
636 // success folding this rule
637
Damien George16a6a472015-12-17 13:06:05 +0000638 for (size_t i = num_args; i > 0; i--) {
Damien George64f2b212015-10-08 14:58:15 +0100639 pop_result(parser);
640 }
641 push_result_node(parser, mp_parse_node_new_leaf(MP_PARSE_NODE_SMALL_INT, arg0));
642
643 return true;
644}
645#endif
646
Damien George16a6a472015-12-17 13:06:05 +0000647STATIC void push_result_rule(parser_t *parser, size_t src_line, const rule_t *rule, size_t num_args) {
Damien George64f2b212015-10-08 14:58:15 +0100648 #if MICROPY_COMP_CONST_FOLDING
649 if (fold_constants(parser, rule, num_args)) {
650 // we folded this rule so return straight away
651 return;
652 }
653 #endif
654
Damien George58e0f4a2015-09-23 10:50:43 +0100655 mp_parse_node_struct_t *pn = parser_alloc(parser, sizeof(mp_parse_node_struct_t) + sizeof(mp_parse_node_t) * num_args);
Damien George58ba4c32014-04-10 14:27:31 +0000656 if (pn == NULL) {
Damien George64f2b212015-10-08 14:58:15 +0100657 parser->parse_error = PARSE_ERROR_MEMORY;
Damien George58ba4c32014-04-10 14:27:31 +0000658 return;
659 }
660 pn->source_line = src_line;
661 pn->kind_num_nodes = (rule->rule_id & 0xff) | (num_args << 8);
Damien George16a6a472015-12-17 13:06:05 +0000662 for (size_t i = num_args; i > 0; i--) {
Damien429d7192013-10-04 19:53:11 +0100663 pn->nodes[i - 1] = pop_result(parser);
664 }
Damiend99b0522013-12-21 18:17:45 +0000665 push_result_node(parser, (mp_parse_node_t)pn);
Damien429d7192013-10-04 19:53:11 +0100666}
667
Damien George58e0f4a2015-09-23 10:50:43 +0100668mp_parse_tree_t mp_parse(mp_lexer_t *lex, mp_parse_input_kind_t input_kind) {
Damien George69a818d2014-01-12 13:55:24 +0000669
Damien George1b82e9a2014-05-10 17:36:41 +0100670 // initialise parser and allocate memory for its stacks
Damien George69a818d2014-01-12 13:55:24 +0000671
Damien George1b82e9a2014-05-10 17:36:41 +0100672 parser_t parser;
Damien George69a818d2014-01-12 13:55:24 +0000673
Damien George64f2b212015-10-08 14:58:15 +0100674 parser.parse_error = PARSE_ERROR_NONE;
Damien George58ba4c32014-04-10 14:27:31 +0000675
Damien George58ebde42014-05-21 20:32:59 +0100676 parser.rule_stack_alloc = MICROPY_ALLOC_PARSE_RULE_INIT;
Damien George1b82e9a2014-05-10 17:36:41 +0100677 parser.rule_stack_top = 0;
678 parser.rule_stack = m_new_maybe(rule_stack_t, parser.rule_stack_alloc);
Damien429d7192013-10-04 19:53:11 +0100679
Damien George58ebde42014-05-21 20:32:59 +0100680 parser.result_stack_alloc = MICROPY_ALLOC_PARSE_RESULT_INIT;
Damien George1b82e9a2014-05-10 17:36:41 +0100681 parser.result_stack_top = 0;
682 parser.result_stack = m_new_maybe(mp_parse_node_t, parser.result_stack_alloc);
Damien429d7192013-10-04 19:53:11 +0100683
Damien George1b82e9a2014-05-10 17:36:41 +0100684 parser.lexer = lex;
685
Damien George58e0f4a2015-09-23 10:50:43 +0100686 parser.tree.chunk = NULL;
687 parser.cur_chunk = NULL;
688
Damien George64f2b212015-10-08 14:58:15 +0100689 #if MICROPY_COMP_CONST
690 mp_map_init(&parser.consts, 0);
691 #endif
692
Damien George1b82e9a2014-05-10 17:36:41 +0100693 // check if we could allocate the stacks
694 if (parser.rule_stack == NULL || parser.result_stack == NULL) {
695 goto memory_error;
696 }
Damien George08335002014-01-18 23:24:36 +0000697
Damien George69a818d2014-01-12 13:55:24 +0000698 // work out the top-level rule to use, and push it on the stack
Damien George16a6a472015-12-17 13:06:05 +0000699 size_t top_level_rule;
Damien5ac1b2e2013-10-18 19:58:12 +0100700 switch (input_kind) {
Damiend99b0522013-12-21 18:17:45 +0000701 case MP_PARSE_SINGLE_INPUT: top_level_rule = RULE_single_input; break;
Damien Georged02c6d82014-01-15 22:14:03 +0000702 case MP_PARSE_EVAL_INPUT: top_level_rule = RULE_eval_input; break;
Damien5ac1b2e2013-10-18 19:58:12 +0100703 default: top_level_rule = RULE_file_input;
704 }
Damien Georgea4c52c52014-12-05 19:35:18 +0000705 push_rule(&parser, lex->tok_line, rules[top_level_rule], 0);
Damien429d7192013-10-04 19:53:11 +0100706
Damien George69a818d2014-01-12 13:55:24 +0000707 // parse!
708
Damien George16a6a472015-12-17 13:06:05 +0000709 size_t n, i; // state for the current rule
710 size_t rule_src_line; // source line for the first token matched by the current rule
Damien429d7192013-10-04 19:53:11 +0100711 bool backtrack = false;
Damien George08335002014-01-18 23:24:36 +0000712 const rule_t *rule = NULL;
Damien429d7192013-10-04 19:53:11 +0100713
714 for (;;) {
715 next_rule:
Damien George64f2b212015-10-08 14:58:15 +0100716 if (parser.rule_stack_top == 0 || parser.parse_error) {
Damien429d7192013-10-04 19:53:11 +0100717 break;
718 }
719
Damien George1b82e9a2014-05-10 17:36:41 +0100720 pop_rule(&parser, &rule, &i, &rule_src_line);
Damien429d7192013-10-04 19:53:11 +0100721 n = rule->act & RULE_ACT_ARG_MASK;
722
723 /*
724 // debugging
Damien George1b82e9a2014-05-10 17:36:41 +0100725 printf("depth=%d ", parser.rule_stack_top);
726 for (int j = 0; j < parser.rule_stack_top; ++j) {
Damien429d7192013-10-04 19:53:11 +0100727 printf(" ");
728 }
729 printf("%s n=%d i=%d bt=%d\n", rule->rule_name, n, i, backtrack);
730 */
731
732 switch (rule->act & RULE_ACT_KIND_MASK) {
733 case RULE_ACT_OR:
734 if (i > 0 && !backtrack) {
735 goto next_rule;
736 } else {
737 backtrack = false;
738 }
Damien Georgefa7c61d2015-07-24 14:35:57 +0000739 for (; i < n; ++i) {
740 uint16_t kind = rule->arg[i] & RULE_ARG_KIND_MASK;
741 if (kind == RULE_ARG_TOK) {
742 if (lex->tok_kind == (rule->arg[i] & RULE_ARG_ARG_MASK)) {
743 push_result_token(&parser);
744 mp_lexer_to_next(lex);
Damien429d7192013-10-04 19:53:11 +0100745 goto next_rule;
Damien Georgefa7c61d2015-07-24 14:35:57 +0000746 }
Damien429d7192013-10-04 19:53:11 +0100747 } else {
Damien Georgefa7c61d2015-07-24 14:35:57 +0000748 assert(kind == RULE_ARG_RULE);
749 if (i + 1 < n) {
750 push_rule(&parser, rule_src_line, rule, i + 1); // save this or-rule
751 }
752 push_rule_from_arg(&parser, rule->arg[i]); // push child of or-rule
Damien429d7192013-10-04 19:53:11 +0100753 goto next_rule;
754 }
Damien429d7192013-10-04 19:53:11 +0100755 }
Damien Georgefa7c61d2015-07-24 14:35:57 +0000756 backtrack = true;
Damien429d7192013-10-04 19:53:11 +0100757 break;
758
Damien George2870d852014-12-20 18:06:08 +0000759 case RULE_ACT_AND: {
Damien429d7192013-10-04 19:53:11 +0100760
761 // failed, backtrack if we can, else syntax error
762 if (backtrack) {
763 assert(i > 0);
764 if ((rule->arg[i - 1] & RULE_ARG_KIND_MASK) == RULE_ARG_OPT_RULE) {
765 // an optional rule that failed, so continue with next arg
Damien George1b82e9a2014-05-10 17:36:41 +0100766 push_result_node(&parser, MP_PARSE_NODE_NULL);
Damien429d7192013-10-04 19:53:11 +0100767 backtrack = false;
768 } else {
769 // a mandatory rule that failed, so propagate backtrack
770 if (i > 1) {
771 // already eaten tokens so can't backtrack
772 goto syntax_error;
773 } else {
774 goto next_rule;
775 }
776 }
777 }
778
779 // progress through the rule
780 for (; i < n; ++i) {
781 switch (rule->arg[i] & RULE_ARG_KIND_MASK) {
Damien George2870d852014-12-20 18:06:08 +0000782 case RULE_ARG_TOK: {
Damien429d7192013-10-04 19:53:11 +0100783 // need to match a token
Damien George2870d852014-12-20 18:06:08 +0000784 mp_token_kind_t tok_kind = rule->arg[i] & RULE_ARG_ARG_MASK;
Damien Georgea4c52c52014-12-05 19:35:18 +0000785 if (lex->tok_kind == tok_kind) {
Damien429d7192013-10-04 19:53:11 +0100786 // matched token
Damiend99b0522013-12-21 18:17:45 +0000787 if (tok_kind == MP_TOKEN_NAME) {
Damien Georgea4c52c52014-12-05 19:35:18 +0000788 push_result_token(&parser);
Damien429d7192013-10-04 19:53:11 +0100789 }
Damiend99b0522013-12-21 18:17:45 +0000790 mp_lexer_to_next(lex);
Damien429d7192013-10-04 19:53:11 +0100791 } else {
792 // failed to match token
793 if (i > 0) {
794 // already eaten tokens so can't backtrack
795 goto syntax_error;
796 } else {
797 // this rule failed, so backtrack
798 backtrack = true;
799 goto next_rule;
800 }
801 }
802 break;
Damien Georged2d64f02015-01-14 21:32:42 +0000803 }
Damien429d7192013-10-04 19:53:11 +0100804 case RULE_ARG_RULE:
Damien429d7192013-10-04 19:53:11 +0100805 case RULE_ARG_OPT_RULE:
Damien Georged2d64f02015-01-14 21:32:42 +0000806 rule_and_no_other_choice:
Damien George1b82e9a2014-05-10 17:36:41 +0100807 push_rule(&parser, rule_src_line, rule, i + 1); // save this and-rule
808 push_rule_from_arg(&parser, rule->arg[i]); // push child of and-rule
Damien429d7192013-10-04 19:53:11 +0100809 goto next_rule;
810 default:
811 assert(0);
Damien Georged2d64f02015-01-14 21:32:42 +0000812 goto rule_and_no_other_choice; // to help flow control analysis
Damien429d7192013-10-04 19:53:11 +0100813 }
814 }
815
816 assert(i == n);
817
818 // matched the rule, so now build the corresponding parse_node
819
820 // count number of arguments for the parse_node
821 i = 0;
Damien George2870d852014-12-20 18:06:08 +0000822 bool emit_rule = false;
Damien George16a6a472015-12-17 13:06:05 +0000823 for (size_t x = 0; x < n; ++x) {
Damien429d7192013-10-04 19:53:11 +0100824 if ((rule->arg[x] & RULE_ARG_KIND_MASK) == RULE_ARG_TOK) {
Damien George2870d852014-12-20 18:06:08 +0000825 mp_token_kind_t tok_kind = rule->arg[x] & RULE_ARG_ARG_MASK;
Damiend99b0522013-12-21 18:17:45 +0000826 if (tok_kind >= MP_TOKEN_NAME) {
Damien429d7192013-10-04 19:53:11 +0100827 emit_rule = true;
828 }
Damiend99b0522013-12-21 18:17:45 +0000829 if (tok_kind == MP_TOKEN_NAME) {
Damien429d7192013-10-04 19:53:11 +0100830 // only tokens which were names are pushed to stack
831 i += 1;
832 }
833 } else {
834 // rules are always pushed
835 i += 1;
836 }
837 }
838
Damien George65dc9602015-08-14 12:24:11 +0100839 #if !MICROPY_ENABLE_DOC_STRING
Damien George5042bce2014-05-25 22:06:06 +0100840 // this code discards lonely statements, such as doc strings
Damien George1b82e9a2014-05-10 17:36:41 +0100841 if (input_kind != MP_PARSE_SINGLE_INPUT && rule->rule_id == RULE_expr_stmt && peek_result(&parser, 0) == MP_PARSE_NODE_NULL) {
842 mp_parse_node_t p = peek_result(&parser, 1);
Damien George5042bce2014-05-25 22:06:06 +0100843 if ((MP_PARSE_NODE_IS_LEAF(p) && !MP_PARSE_NODE_IS_ID(p)) || MP_PARSE_NODE_IS_STRUCT_KIND(p, RULE_string)) {
Damien George52b5d762014-09-23 15:31:56 +0000844 pop_result(&parser); // MP_PARSE_NODE_NULL
Damien George58e0f4a2015-09-23 10:50:43 +0100845 mp_parse_node_t pn = pop_result(&parser); // possibly RULE_string
846 if (MP_PARSE_NODE_IS_STRUCT(pn)) {
847 mp_parse_node_struct_t *pns = (mp_parse_node_struct_t *)pn;
848 if (MP_PARSE_NODE_STRUCT_KIND(pns) == RULE_string) {
Damien George16a6a472015-12-17 13:06:05 +0000849 m_del(char, (char*)pns->nodes[0], (size_t)pns->nodes[1]);
Damien George58e0f4a2015-09-23 10:50:43 +0100850 }
851 }
Damien George1b82e9a2014-05-10 17:36:41 +0100852 push_result_rule(&parser, rule_src_line, rules[RULE_pass_stmt], 0);
Damien George93afa232014-05-06 21:44:11 +0100853 break;
854 }
855 }
Damien George65dc9602015-08-14 12:24:11 +0100856 #endif
Damien George93afa232014-05-06 21:44:11 +0100857
Damien429d7192013-10-04 19:53:11 +0100858 // always emit these rules, even if they have only 1 argument
859 if (rule->rule_id == RULE_expr_stmt || rule->rule_id == RULE_yield_stmt) {
860 emit_rule = true;
861 }
862
Damien Georgeb47ea4e2014-12-20 18:37:50 +0000863 // if a rule has the RULE_ACT_ALLOW_IDENT bit set then this
864 // rule should not be emitted if it has only 1 argument
865 // NOTE: can't set this flag for atom_paren because we need it
866 // to distinguish, for example, [a,b] from [(a,b)]
Damien Georgeb47ea4e2014-12-20 18:37:50 +0000867 if (rule->act & RULE_ACT_ALLOW_IDENT) {
Damien429d7192013-10-04 19:53:11 +0100868 emit_rule = false;
869 }
870
871 // always emit these rules, and add an extra blank node at the end (to be used by the compiler to store data)
Damien Georgeb47ea4e2014-12-20 18:37:50 +0000872 if (ADD_BLANK_NODE(rule)) {
Damien429d7192013-10-04 19:53:11 +0100873 emit_rule = true;
Damien George1b82e9a2014-05-10 17:36:41 +0100874 push_result_node(&parser, MP_PARSE_NODE_NULL);
Damien429d7192013-10-04 19:53:11 +0100875 i += 1;
876 }
877
Damien George16a6a472015-12-17 13:06:05 +0000878 size_t num_not_nil = 0;
879 for (size_t x = 0; x < i; ++x) {
Damien George1b82e9a2014-05-10 17:36:41 +0100880 if (peek_result(&parser, x) != MP_PARSE_NODE_NULL) {
Damien429d7192013-10-04 19:53:11 +0100881 num_not_nil += 1;
882 }
883 }
Damien George366239b2015-10-08 23:13:18 +0100884 if (emit_rule || num_not_nil != 1) {
885 // need to add rule when num_not_nil==0 for, eg, atom_paren, testlist_comp_3b
Damien George1b82e9a2014-05-10 17:36:41 +0100886 push_result_rule(&parser, rule_src_line, rule, i);
Damien George366239b2015-10-08 23:13:18 +0100887 } else {
Damien429d7192013-10-04 19:53:11 +0100888 // single result, leave it on stack
Damiend99b0522013-12-21 18:17:45 +0000889 mp_parse_node_t pn = MP_PARSE_NODE_NULL;
Damien George16a6a472015-12-17 13:06:05 +0000890 for (size_t x = 0; x < i; ++x) {
Damien George1b82e9a2014-05-10 17:36:41 +0100891 mp_parse_node_t pn2 = pop_result(&parser);
Damiend99b0522013-12-21 18:17:45 +0000892 if (pn2 != MP_PARSE_NODE_NULL) {
Damien429d7192013-10-04 19:53:11 +0100893 pn = pn2;
894 }
895 }
Damien George1b82e9a2014-05-10 17:36:41 +0100896 push_result_node(&parser, pn);
Damien429d7192013-10-04 19:53:11 +0100897 }
898 break;
Damien George2870d852014-12-20 18:06:08 +0000899 }
Damien429d7192013-10-04 19:53:11 +0100900
Damien George2870d852014-12-20 18:06:08 +0000901 case RULE_ACT_LIST: {
Damien429d7192013-10-04 19:53:11 +0100902 // n=2 is: item item*
903 // n=1 is: item (sep item)*
904 // n=3 is: item (sep item)* [sep]
Damien George2870d852014-12-20 18:06:08 +0000905 bool had_trailing_sep;
Damien429d7192013-10-04 19:53:11 +0100906 if (backtrack) {
907 list_backtrack:
908 had_trailing_sep = false;
909 if (n == 2) {
910 if (i == 1) {
911 // fail on item, first time round; propagate backtrack
912 goto next_rule;
913 } else {
914 // fail on item, in later rounds; finish with this rule
915 backtrack = false;
916 }
917 } else {
918 if (i == 1) {
919 // fail on item, first time round; propagate backtrack
920 goto next_rule;
921 } else if ((i & 1) == 1) {
922 // fail on item, in later rounds; have eaten tokens so can't backtrack
923 if (n == 3) {
924 // list allows trailing separator; finish parsing list
925 had_trailing_sep = true;
926 backtrack = false;
927 } else {
928 // list doesn't allowing trailing separator; fail
929 goto syntax_error;
930 }
931 } else {
932 // fail on separator; finish parsing list
933 backtrack = false;
934 }
935 }
936 } else {
937 for (;;) {
Damien George16a6a472015-12-17 13:06:05 +0000938 size_t arg = rule->arg[i & 1 & n];
Damien429d7192013-10-04 19:53:11 +0100939 switch (arg & RULE_ARG_KIND_MASK) {
940 case RULE_ARG_TOK:
Damien Georgea4c52c52014-12-05 19:35:18 +0000941 if (lex->tok_kind == (arg & RULE_ARG_ARG_MASK)) {
Damien429d7192013-10-04 19:53:11 +0100942 if (i & 1 & n) {
943 // separators which are tokens are not pushed to result stack
944 } else {
Damien Georgea4c52c52014-12-05 19:35:18 +0000945 push_result_token(&parser);
Damien429d7192013-10-04 19:53:11 +0100946 }
Damiend99b0522013-12-21 18:17:45 +0000947 mp_lexer_to_next(lex);
Damien429d7192013-10-04 19:53:11 +0100948 // got element of list, so continue parsing list
949 i += 1;
950 } else {
951 // couldn't get element of list
952 i += 1;
953 backtrack = true;
954 goto list_backtrack;
955 }
956 break;
957 case RULE_ARG_RULE:
Damien Georged2d64f02015-01-14 21:32:42 +0000958 rule_list_no_other_choice:
Damien George1b82e9a2014-05-10 17:36:41 +0100959 push_rule(&parser, rule_src_line, rule, i + 1); // save this list-rule
960 push_rule_from_arg(&parser, arg); // push child of list-rule
Damien429d7192013-10-04 19:53:11 +0100961 goto next_rule;
962 default:
963 assert(0);
Damien Georged2d64f02015-01-14 21:32:42 +0000964 goto rule_list_no_other_choice; // to help flow control analysis
Damien429d7192013-10-04 19:53:11 +0100965 }
966 }
967 }
968 assert(i >= 1);
969
970 // compute number of elements in list, result in i
971 i -= 1;
972 if ((n & 1) && (rule->arg[1] & RULE_ARG_KIND_MASK) == RULE_ARG_TOK) {
973 // don't count separators when they are tokens
974 i = (i + 1) / 2;
975 }
976
977 if (i == 1) {
978 // list matched single item
979 if (had_trailing_sep) {
980 // if there was a trailing separator, make a list of a single item
Damien George1b82e9a2014-05-10 17:36:41 +0100981 push_result_rule(&parser, rule_src_line, rule, i);
Damien429d7192013-10-04 19:53:11 +0100982 } else {
983 // just leave single item on stack (ie don't wrap in a list)
984 }
985 } else {
Damien George1b82e9a2014-05-10 17:36:41 +0100986 push_result_rule(&parser, rule_src_line, rule, i);
Damien429d7192013-10-04 19:53:11 +0100987 }
988 break;
Damien George2870d852014-12-20 18:06:08 +0000989 }
Damien429d7192013-10-04 19:53:11 +0100990
991 default:
992 assert(0);
993 }
994 }
Damien91d387d2013-10-09 15:09:52 +0100995
Damien George64f2b212015-10-08 14:58:15 +0100996 #if MICROPY_COMP_CONST
997 mp_map_deinit(&parser.consts);
998 #endif
999
Damien George58e0f4a2015-09-23 10:50:43 +01001000 // truncate final chunk and link into chain of chunks
1001 if (parser.cur_chunk != NULL) {
1002 (void)m_renew(byte, parser.cur_chunk,
1003 sizeof(mp_parse_chunk_t) + parser.cur_chunk->alloc,
1004 sizeof(mp_parse_chunk_t) + parser.cur_chunk->union_.used);
1005 parser.cur_chunk->alloc = parser.cur_chunk->union_.used;
1006 parser.cur_chunk->union_.next = parser.tree.chunk;
1007 parser.tree.chunk = parser.cur_chunk;
1008 }
1009
Damien Georgef8048332015-02-08 13:40:20 +00001010 mp_obj_t exc;
Damien George58ba4c32014-04-10 14:27:31 +00001011
Damien George64f2b212015-10-08 14:58:15 +01001012 if (parser.parse_error) {
1013 #if MICROPY_COMP_CONST
1014 if (parser.parse_error == PARSE_ERROR_CONST) {
1015 exc = mp_obj_new_exception_msg(&mp_type_SyntaxError,
1016 "constant must be an integer");
1017 } else
1018 #endif
1019 {
1020 assert(parser.parse_error == PARSE_ERROR_MEMORY);
1021 memory_error:
1022 exc = mp_obj_new_exception_msg(&mp_type_MemoryError,
1023 "parser could not allocate enough memory");
1024 }
Damien George58e0f4a2015-09-23 10:50:43 +01001025 parser.tree.root = MP_PARSE_NODE_NULL;
Damien Georgefdfcee72015-10-12 12:59:18 +01001026 } else if (
1027 lex->tok_kind != MP_TOKEN_END // check we are at the end of the token stream
1028 || parser.result_stack_top == 0 // check that we got a node (can fail on empty input)
1029 ) {
1030 syntax_error:
1031 if (lex->tok_kind == MP_TOKEN_INDENT) {
1032 exc = mp_obj_new_exception_msg(&mp_type_IndentationError,
1033 "unexpected indent");
1034 } else if (lex->tok_kind == MP_TOKEN_DEDENT_MISMATCH) {
1035 exc = mp_obj_new_exception_msg(&mp_type_IndentationError,
1036 "unindent does not match any outer indentation level");
1037 } else {
1038 exc = mp_obj_new_exception_msg(&mp_type_SyntaxError,
1039 "invalid syntax");
1040 }
1041 parser.tree.root = MP_PARSE_NODE_NULL;
1042 } else {
1043 // no errors
1044
1045 //result_stack_show(parser);
1046 //printf("rule stack alloc: %d\n", parser.rule_stack_alloc);
1047 //printf("result stack alloc: %d\n", parser.result_stack_alloc);
1048 //printf("number of parse nodes allocated: %d\n", num_parse_nodes_allocated);
1049
1050 // get the root parse node that we created
1051 assert(parser.result_stack_top == 1);
1052 exc = MP_OBJ_NULL;
1053 parser.tree.root = parser.result_stack[0];
Damien George58ba4c32014-04-10 14:27:31 +00001054 }
1055
Damien George69a818d2014-01-12 13:55:24 +00001056 // free the memory that we don't need anymore
Damien George1b82e9a2014-05-10 17:36:41 +01001057 m_del(rule_stack_t, parser.rule_stack, parser.rule_stack_alloc);
1058 m_del(mp_parse_node_t, parser.result_stack, parser.result_stack_alloc);
Damien George0bfc7632015-02-07 18:33:58 +00001059 // we also free the lexer on behalf of the caller (see below)
Damien George69a818d2014-01-12 13:55:24 +00001060
Damien George0bfc7632015-02-07 18:33:58 +00001061 if (exc != MP_OBJ_NULL) {
1062 // had an error so raise the exception
1063 // add traceback to give info about file name and location
1064 // we don't have a 'block' name, so just pass the NULL qstr to indicate this
1065 mp_obj_exception_add_traceback(exc, lex->source_name, lex->tok_line, MP_QSTR_NULL);
1066 mp_lexer_free(lex);
1067 nlr_raise(exc);
1068 } else {
1069 mp_lexer_free(lex);
Damien George58e0f4a2015-09-23 10:50:43 +01001070 return parser.tree;
Damien George0bfc7632015-02-07 18:33:58 +00001071 }
Damien429d7192013-10-04 19:53:11 +01001072}
Damien George58e0f4a2015-09-23 10:50:43 +01001073
1074void mp_parse_tree_clear(mp_parse_tree_t *tree) {
1075 mp_parse_chunk_t *chunk = tree->chunk;
1076 while (chunk != NULL) {
1077 mp_parse_chunk_t *next = chunk->union_.next;
1078 m_del(byte, chunk, sizeof(mp_parse_chunk_t) + chunk->alloc);
1079 chunk = next;
1080 }
1081}