blob: 2f16748a6ce735a6749ec78f1c5752251fb58216 [file] [log] [blame]
Damien George04b91472014-05-03 23:27:38 +01001/*
2 * This file is part of the Micro Python project, http://micropython.org/
3 *
4 * The MIT License (MIT)
5 *
Damien George4735c452015-04-21 16:43:18 +00006 * Copyright (c) 2013-2015 Damien P. George
Damien George04b91472014-05-03 23:27:38 +01007 *
8 * Permission is hereby granted, free of charge, to any person obtaining a copy
9 * of this software and associated documentation files (the "Software"), to deal
10 * in the Software without restriction, including without limitation the rights
11 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
12 * copies of the Software, and to permit persons to whom the Software is
13 * furnished to do so, subject to the following conditions:
14 *
15 * The above copyright notice and this permission notice shall be included in
16 * all copies or substantial portions of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
21 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
22 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
23 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
24 * THE SOFTWARE.
25 */
26
xbeefe34222014-03-16 00:14:26 -070027#include <stdbool.h>
Damien429d7192013-10-04 19:53:11 +010028#include <stdint.h>
29#include <stdio.h>
Damien George7dbf74c2016-01-08 13:42:00 +000030#include <unistd.h> // for ssize_t
Damien429d7192013-10-04 19:53:11 +010031#include <assert.h>
Damien George5042bce2014-05-25 22:06:06 +010032#include <string.h>
Damien429d7192013-10-04 19:53:11 +010033
Damien George0bfc7632015-02-07 18:33:58 +000034#include "py/nlr.h"
Damien George51dfcb42015-01-01 20:27:54 +000035#include "py/lexer.h"
36#include "py/parse.h"
37#include "py/parsenum.h"
Damien George22b22652016-01-07 14:40:35 +000038#include "py/runtime0.h"
Damien George64f2b212015-10-08 14:58:15 +010039#include "py/runtime.h"
Damien George22b22652016-01-07 14:40:35 +000040#include "py/objint.h"
Damien George52552552017-02-24 13:43:43 +110041#include "py/objstr.h"
Damien George64f2b212015-10-08 14:58:15 +010042#include "py/builtin.h"
Damien429d7192013-10-04 19:53:11 +010043
Damien Georgedd5353a2015-12-18 12:35:44 +000044#if MICROPY_ENABLE_COMPILER
45
Damien429d7192013-10-04 19:53:11 +010046#define RULE_ACT_ARG_MASK (0x0f)
Damien Georgeb47ea4e2014-12-20 18:37:50 +000047#define RULE_ACT_KIND_MASK (0x30)
48#define RULE_ACT_ALLOW_IDENT (0x40)
49#define RULE_ACT_ADD_BLANK (0x80)
Damien429d7192013-10-04 19:53:11 +010050#define RULE_ACT_OR (0x10)
51#define RULE_ACT_AND (0x20)
52#define RULE_ACT_LIST (0x30)
53
Damien429d7192013-10-04 19:53:11 +010054#define RULE_ARG_KIND_MASK (0xf000)
55#define RULE_ARG_ARG_MASK (0x0fff)
56#define RULE_ARG_TOK (0x1000)
57#define RULE_ARG_RULE (0x2000)
Damien George4735c452015-04-21 16:43:18 +000058#define RULE_ARG_OPT_RULE (0x3000)
Damien429d7192013-10-04 19:53:11 +010059
60// (un)comment to use rule names; for debugging
61//#define USE_RULE_NAME (1)
62
63typedef struct _rule_t {
64 byte rule_id;
65 byte act;
66#ifdef USE_RULE_NAME
67 const char *rule_name;
68#endif
69 uint16_t arg[];
70} rule_t;
71
72enum {
Damien George71019ae2017-02-15 10:58:05 +110073// define rules with a compile function
Damien George00208ce2014-01-23 00:00:53 +000074#define DEF_RULE(rule, comp, kind, ...) RULE_##rule,
Damien George71019ae2017-02-15 10:58:05 +110075#define DEF_RULE_NC(rule, kind, ...)
Damien George51dfcb42015-01-01 20:27:54 +000076#include "py/grammar.h"
Damien429d7192013-10-04 19:53:11 +010077#undef DEF_RULE
Damien George71019ae2017-02-15 10:58:05 +110078#undef DEF_RULE_NC
Damien George7d414a12015-02-08 01:57:40 +000079 RULE_const_object, // special node for a constant, generic Python object
Damien George71019ae2017-02-15 10:58:05 +110080
81// define rules without a compile function
82#define DEF_RULE(rule, comp, kind, ...)
83#define DEF_RULE_NC(rule, kind, ...) RULE_##rule,
84#include "py/grammar.h"
85#undef DEF_RULE
86#undef DEF_RULE_NC
Damien429d7192013-10-04 19:53:11 +010087};
88
89#define or(n) (RULE_ACT_OR | n)
90#define and(n) (RULE_ACT_AND | n)
Damien George0c1de1c2016-04-14 13:23:50 +010091#define and_ident(n) (RULE_ACT_AND | n | RULE_ACT_ALLOW_IDENT)
92#define and_blank(n) (RULE_ACT_AND | n | RULE_ACT_ADD_BLANK)
Damien429d7192013-10-04 19:53:11 +010093#define one_or_more (RULE_ACT_LIST | 2)
94#define list (RULE_ACT_LIST | 1)
95#define list_with_end (RULE_ACT_LIST | 3)
Damiend99b0522013-12-21 18:17:45 +000096#define tok(t) (RULE_ARG_TOK | MP_TOKEN_##t)
Damien429d7192013-10-04 19:53:11 +010097#define rule(r) (RULE_ARG_RULE | RULE_##r)
Damien429d7192013-10-04 19:53:11 +010098#define opt_rule(r) (RULE_ARG_OPT_RULE | RULE_##r)
99#ifdef USE_RULE_NAME
Damien George00208ce2014-01-23 00:00:53 +0000100#define DEF_RULE(rule, comp, kind, ...) static const rule_t rule_##rule = { RULE_##rule, kind, #rule, { __VA_ARGS__ } };
Damien George71019ae2017-02-15 10:58:05 +1100101#define DEF_RULE_NC(rule, kind, ...) static const rule_t rule_##rule = { RULE_##rule, kind, #rule, { __VA_ARGS__ } };
Damien429d7192013-10-04 19:53:11 +0100102#else
Damien George00208ce2014-01-23 00:00:53 +0000103#define DEF_RULE(rule, comp, kind, ...) static const rule_t rule_##rule = { RULE_##rule, kind, { __VA_ARGS__ } };
Damien George71019ae2017-02-15 10:58:05 +1100104#define DEF_RULE_NC(rule, kind, ...) static const rule_t rule_##rule = { RULE_##rule, kind, { __VA_ARGS__ } };
Damien429d7192013-10-04 19:53:11 +0100105#endif
Damien George51dfcb42015-01-01 20:27:54 +0000106#include "py/grammar.h"
Damien429d7192013-10-04 19:53:11 +0100107#undef or
108#undef and
109#undef list
110#undef list_with_end
111#undef tok
112#undef rule
Damien429d7192013-10-04 19:53:11 +0100113#undef opt_rule
114#undef one_or_more
115#undef DEF_RULE
Damien George71019ae2017-02-15 10:58:05 +1100116#undef DEF_RULE_NC
Damien429d7192013-10-04 19:53:11 +0100117
Damien George3ff16ff2016-05-20 12:38:15 +0100118STATIC const rule_t *const rules[] = {
Damien George71019ae2017-02-15 10:58:05 +1100119// define rules with a compile function
Damien George00208ce2014-01-23 00:00:53 +0000120#define DEF_RULE(rule, comp, kind, ...) &rule_##rule,
Damien George71019ae2017-02-15 10:58:05 +1100121#define DEF_RULE_NC(rule, kind, ...)
Damien George51dfcb42015-01-01 20:27:54 +0000122#include "py/grammar.h"
Damien429d7192013-10-04 19:53:11 +0100123#undef DEF_RULE
Damien George71019ae2017-02-15 10:58:05 +1100124#undef DEF_RULE_NC
Damien George71019ae2017-02-15 10:58:05 +1100125 NULL, // RULE_const_object
126
127// define rules without a compile function
128#define DEF_RULE(rule, comp, kind, ...)
129#define DEF_RULE_NC(rule, kind, ...) &rule_##rule,
130#include "py/grammar.h"
131#undef DEF_RULE
132#undef DEF_RULE_NC
Damien429d7192013-10-04 19:53:11 +0100133};
134
135typedef struct _rule_stack_t {
Damien George16a6a472015-12-17 13:06:05 +0000136 size_t src_line : 8 * sizeof(size_t) - 8; // maximum bits storing source line number
137 size_t rule_id : 8; // this must be large enough to fit largest rule number
138 size_t arg_i; // this dictates the maximum nodes in a "list" of things
Damien429d7192013-10-04 19:53:11 +0100139} rule_stack_t;
140
Damien George58e0f4a2015-09-23 10:50:43 +0100141typedef struct _mp_parse_chunk_t {
Damien George16a6a472015-12-17 13:06:05 +0000142 size_t alloc;
Damien George58e0f4a2015-09-23 10:50:43 +0100143 union {
Damien George16a6a472015-12-17 13:06:05 +0000144 size_t used;
Damien George58e0f4a2015-09-23 10:50:43 +0100145 struct _mp_parse_chunk_t *next;
146 } union_;
147 byte data[];
148} mp_parse_chunk_t;
149
Damien429d7192013-10-04 19:53:11 +0100150typedef struct _parser_t {
Damien George16a6a472015-12-17 13:06:05 +0000151 size_t rule_stack_alloc;
152 size_t rule_stack_top;
Damien429d7192013-10-04 19:53:11 +0100153 rule_stack_t *rule_stack;
154
Damien George16a6a472015-12-17 13:06:05 +0000155 size_t result_stack_alloc;
156 size_t result_stack_top;
Damiend99b0522013-12-21 18:17:45 +0000157 mp_parse_node_t *result_stack;
Damien George08335002014-01-18 23:24:36 +0000158
159 mp_lexer_t *lexer;
Damien George58e0f4a2015-09-23 10:50:43 +0100160
161 mp_parse_tree_t tree;
162 mp_parse_chunk_t *cur_chunk;
Damien429d7192013-10-04 19:53:11 +0100163
Damien George64f2b212015-10-08 14:58:15 +0100164 #if MICROPY_COMP_CONST
165 mp_map_t consts;
166 #endif
167} parser_t;
Damien George58ba4c32014-04-10 14:27:31 +0000168
Damien George58e0f4a2015-09-23 10:50:43 +0100169STATIC void *parser_alloc(parser_t *parser, size_t num_bytes) {
170 // use a custom memory allocator to store parse nodes sequentially in large chunks
171
172 mp_parse_chunk_t *chunk = parser->cur_chunk;
173
174 if (chunk != NULL && chunk->union_.used + num_bytes > chunk->alloc) {
175 // not enough room at end of previously allocated chunk so try to grow
176 mp_parse_chunk_t *new_data = (mp_parse_chunk_t*)m_renew_maybe(byte, chunk,
177 sizeof(mp_parse_chunk_t) + chunk->alloc,
178 sizeof(mp_parse_chunk_t) + chunk->alloc + num_bytes, false);
179 if (new_data == NULL) {
180 // could not grow existing memory; shrink it to fit previous
Damien Georged6c558c2016-02-23 13:44:29 +0000181 (void)m_renew_maybe(byte, chunk, sizeof(mp_parse_chunk_t) + chunk->alloc,
182 sizeof(mp_parse_chunk_t) + chunk->union_.used, false);
Damien George58e0f4a2015-09-23 10:50:43 +0100183 chunk->alloc = chunk->union_.used;
184 chunk->union_.next = parser->tree.chunk;
185 parser->tree.chunk = chunk;
186 chunk = NULL;
187 } else {
188 // could grow existing memory
189 chunk->alloc += num_bytes;
190 }
191 }
192
193 if (chunk == NULL) {
194 // no previous chunk, allocate a new chunk
195 size_t alloc = MICROPY_ALLOC_PARSE_CHUNK_INIT;
196 if (alloc < num_bytes) {
197 alloc = num_bytes;
198 }
199 chunk = (mp_parse_chunk_t*)m_new(byte, sizeof(mp_parse_chunk_t) + alloc);
200 chunk->alloc = alloc;
201 chunk->union_.used = 0;
202 parser->cur_chunk = chunk;
203 }
204
205 byte *ret = chunk->data + chunk->union_.used;
206 chunk->union_.used += num_bytes;
207 return ret;
208}
209
Damien George16a6a472015-12-17 13:06:05 +0000210STATIC void push_rule(parser_t *parser, size_t src_line, const rule_t *rule, size_t arg_i) {
Damien429d7192013-10-04 19:53:11 +0100211 if (parser->rule_stack_top >= parser->rule_stack_alloc) {
Damien Georgef615d822017-02-24 14:56:37 +1100212 rule_stack_t *rs = m_renew(rule_stack_t, parser->rule_stack, parser->rule_stack_alloc, parser->rule_stack_alloc + MICROPY_ALLOC_PARSE_RULE_INC);
Damien George58ba4c32014-04-10 14:27:31 +0000213 parser->rule_stack = rs;
Damien George58ebde42014-05-21 20:32:59 +0100214 parser->rule_stack_alloc += MICROPY_ALLOC_PARSE_RULE_INC;
Damien429d7192013-10-04 19:53:11 +0100215 }
Damien George08335002014-01-18 23:24:36 +0000216 rule_stack_t *rs = &parser->rule_stack[parser->rule_stack_top++];
217 rs->src_line = src_line;
218 rs->rule_id = rule->rule_id;
219 rs->arg_i = arg_i;
Damien429d7192013-10-04 19:53:11 +0100220}
221
Damien George16a6a472015-12-17 13:06:05 +0000222STATIC void push_rule_from_arg(parser_t *parser, size_t arg) {
Damien429d7192013-10-04 19:53:11 +0100223 assert((arg & RULE_ARG_KIND_MASK) == RULE_ARG_RULE || (arg & RULE_ARG_KIND_MASK) == RULE_ARG_OPT_RULE);
Damien George16a6a472015-12-17 13:06:05 +0000224 size_t rule_id = arg & RULE_ARG_ARG_MASK;
Damien Georgea4c52c52014-12-05 19:35:18 +0000225 push_rule(parser, parser->lexer->tok_line, rules[rule_id], 0);
Damien429d7192013-10-04 19:53:11 +0100226}
227
Damien George16a6a472015-12-17 13:06:05 +0000228STATIC void pop_rule(parser_t *parser, const rule_t **rule, size_t *arg_i, size_t *src_line) {
Damien429d7192013-10-04 19:53:11 +0100229 parser->rule_stack_top -= 1;
230 *rule = rules[parser->rule_stack[parser->rule_stack_top].rule_id];
231 *arg_i = parser->rule_stack[parser->rule_stack_top].arg_i;
Damien George08335002014-01-18 23:24:36 +0000232 *src_line = parser->rule_stack[parser->rule_stack_top].src_line;
Damien429d7192013-10-04 19:53:11 +0100233}
234
Damien Georgeb0cbfb02016-11-13 15:32:05 +1100235bool mp_parse_node_is_const_false(mp_parse_node_t pn) {
236 return MP_PARSE_NODE_IS_TOKEN_KIND(pn, MP_TOKEN_KW_FALSE)
237 || (MP_PARSE_NODE_IS_SMALL_INT(pn) && MP_PARSE_NODE_LEAF_SMALL_INT(pn) == 0);
238}
239
240bool mp_parse_node_is_const_true(mp_parse_node_t pn) {
241 return MP_PARSE_NODE_IS_TOKEN_KIND(pn, MP_TOKEN_KW_TRUE)
242 || (MP_PARSE_NODE_IS_SMALL_INT(pn) && MP_PARSE_NODE_LEAF_SMALL_INT(pn) != 0);
243}
244
Damien George22b22652016-01-07 14:40:35 +0000245bool mp_parse_node_get_int_maybe(mp_parse_node_t pn, mp_obj_t *o) {
246 if (MP_PARSE_NODE_IS_SMALL_INT(pn)) {
247 *o = MP_OBJ_NEW_SMALL_INT(MP_PARSE_NODE_LEAF_SMALL_INT(pn));
248 return true;
249 } else if (MP_PARSE_NODE_IS_STRUCT_KIND(pn, RULE_const_object)) {
250 mp_parse_node_struct_t *pns = (mp_parse_node_struct_t*)pn;
251 #if MICROPY_OBJ_REPR == MICROPY_OBJ_REPR_D
252 // nodes are 32-bit pointers, but need to extract 64-bit object
253 *o = (uint64_t)pns->nodes[0] | ((uint64_t)pns->nodes[1] << 32);
254 #else
255 *o = (mp_obj_t)pns->nodes[0];
256 #endif
257 return MP_OBJ_IS_INT(*o);
258 } else {
259 return false;
260 }
261}
262
Damien George16a6a472015-12-17 13:06:05 +0000263int mp_parse_node_extract_list(mp_parse_node_t *pn, size_t pn_kind, mp_parse_node_t **nodes) {
Damien Georgedfe944c2015-02-13 02:29:46 +0000264 if (MP_PARSE_NODE_IS_NULL(*pn)) {
265 *nodes = NULL;
266 return 0;
267 } else if (MP_PARSE_NODE_IS_LEAF(*pn)) {
268 *nodes = pn;
269 return 1;
270 } else {
271 mp_parse_node_struct_t *pns = (mp_parse_node_struct_t*)(*pn);
272 if (MP_PARSE_NODE_STRUCT_KIND(pns) != pn_kind) {
273 *nodes = pn;
274 return 1;
275 } else {
276 *nodes = pns->nodes;
277 return MP_PARSE_NODE_STRUCT_NUM_NODES(pns);
278 }
279 }
280}
281
Damien Georgecbd2f742014-01-19 11:48:48 +0000282#if MICROPY_DEBUG_PRINTERS
Damien George16a6a472015-12-17 13:06:05 +0000283void mp_parse_node_print(mp_parse_node_t pn, size_t indent) {
Damien George08335002014-01-18 23:24:36 +0000284 if (MP_PARSE_NODE_IS_STRUCT(pn)) {
285 printf("[% 4d] ", (int)((mp_parse_node_struct_t*)pn)->source_line);
286 } else {
287 printf(" ");
288 }
Damien George16a6a472015-12-17 13:06:05 +0000289 for (size_t i = 0; i < indent; i++) {
Damien429d7192013-10-04 19:53:11 +0100290 printf(" ");
291 }
Damiend99b0522013-12-21 18:17:45 +0000292 if (MP_PARSE_NODE_IS_NULL(pn)) {
Damien429d7192013-10-04 19:53:11 +0100293 printf("NULL\n");
Paul Sokolovsky56e5ef22014-02-22 16:39:45 +0200294 } else if (MP_PARSE_NODE_IS_SMALL_INT(pn)) {
Damien George40f3c022014-07-03 13:25:24 +0100295 mp_int_t arg = MP_PARSE_NODE_LEAF_SMALL_INT(pn);
Paul Sokolovsky56e5ef22014-02-22 16:39:45 +0200296 printf("int(" INT_FMT ")\n", arg);
Damiend99b0522013-12-21 18:17:45 +0000297 } else if (MP_PARSE_NODE_IS_LEAF(pn)) {
Damien George16a6a472015-12-17 13:06:05 +0000298 uintptr_t arg = MP_PARSE_NODE_LEAF_ARG(pn);
Damiend99b0522013-12-21 18:17:45 +0000299 switch (MP_PARSE_NODE_LEAF_KIND(pn)) {
300 case MP_PARSE_NODE_ID: printf("id(%s)\n", qstr_str(arg)); break;
Damiend99b0522013-12-21 18:17:45 +0000301 case MP_PARSE_NODE_STRING: printf("str(%s)\n", qstr_str(arg)); break;
302 case MP_PARSE_NODE_BYTES: printf("bytes(%s)\n", qstr_str(arg)); break;
Damien George86e94232017-01-17 17:00:55 +1100303 default:
304 assert(MP_PARSE_NODE_LEAF_KIND(pn) == MP_PARSE_NODE_TOKEN);
305 printf("tok(%u)\n", (uint)arg); break;
Damien429d7192013-10-04 19:53:11 +0100306 }
307 } else {
Damien George5042bce2014-05-25 22:06:06 +0100308 // node must be a mp_parse_node_struct_t
Damien Georgeb829b5c2014-01-25 13:51:19 +0000309 mp_parse_node_struct_t *pns = (mp_parse_node_struct_t*)pn;
Damien George52552552017-02-24 13:43:43 +1100310 if (MP_PARSE_NODE_STRUCT_KIND(pns) == RULE_const_object) {
Damien Georgeb8cfb0d2015-11-27 17:09:11 +0000311 #if MICROPY_OBJ_REPR == MICROPY_OBJ_REPR_D
312 printf("literal const(%016llx)\n", (uint64_t)pns->nodes[0] | ((uint64_t)pns->nodes[1] << 32));
313 #else
Damien George7d414a12015-02-08 01:57:40 +0000314 printf("literal const(%p)\n", (mp_obj_t)pns->nodes[0]);
Damien Georgeb8cfb0d2015-11-27 17:09:11 +0000315 #endif
Damien George5042bce2014-05-25 22:06:06 +0100316 } else {
Damien George16a6a472015-12-17 13:06:05 +0000317 size_t n = MP_PARSE_NODE_STRUCT_NUM_NODES(pns);
Damien429d7192013-10-04 19:53:11 +0100318#ifdef USE_RULE_NAME
Damien George16a6a472015-12-17 13:06:05 +0000319 printf("%s(%u) (n=%u)\n", rules[MP_PARSE_NODE_STRUCT_KIND(pns)]->rule_name, (uint)MP_PARSE_NODE_STRUCT_KIND(pns), (uint)n);
Damien429d7192013-10-04 19:53:11 +0100320#else
Damien George16a6a472015-12-17 13:06:05 +0000321 printf("rule(%u) (n=%u)\n", (uint)MP_PARSE_NODE_STRUCT_KIND(pns), (uint)n);
Damien429d7192013-10-04 19:53:11 +0100322#endif
Damien George16a6a472015-12-17 13:06:05 +0000323 for (size_t i = 0; i < n; i++) {
Damien George5042bce2014-05-25 22:06:06 +0100324 mp_parse_node_print(pns->nodes[i], indent + 2);
325 }
Damien429d7192013-10-04 19:53:11 +0100326 }
327 }
328}
Damien Georgecbd2f742014-01-19 11:48:48 +0000329#endif // MICROPY_DEBUG_PRINTERS
Damien429d7192013-10-04 19:53:11 +0100330
331/*
Paul Sokolovsky520e2f52014-02-12 18:31:30 +0200332STATIC void result_stack_show(parser_t *parser) {
Damien429d7192013-10-04 19:53:11 +0100333 printf("result stack, most recent first\n");
Damien George16a6a472015-12-17 13:06:05 +0000334 for (ssize_t i = parser->result_stack_top - 1; i >= 0; i--) {
Damien Georgecbd2f742014-01-19 11:48:48 +0000335 mp_parse_node_print(parser->result_stack[i], 0);
Damien429d7192013-10-04 19:53:11 +0100336 }
337}
338*/
339
Paul Sokolovsky520e2f52014-02-12 18:31:30 +0200340STATIC mp_parse_node_t pop_result(parser_t *parser) {
Damien429d7192013-10-04 19:53:11 +0100341 assert(parser->result_stack_top > 0);
342 return parser->result_stack[--parser->result_stack_top];
343}
344
Damien George16a6a472015-12-17 13:06:05 +0000345STATIC mp_parse_node_t peek_result(parser_t *parser, size_t pos) {
Damien429d7192013-10-04 19:53:11 +0100346 assert(parser->result_stack_top > pos);
347 return parser->result_stack[parser->result_stack_top - 1 - pos];
348}
349
Paul Sokolovsky520e2f52014-02-12 18:31:30 +0200350STATIC void push_result_node(parser_t *parser, mp_parse_node_t pn) {
Damien George69a818d2014-01-12 13:55:24 +0000351 if (parser->result_stack_top >= parser->result_stack_alloc) {
Damien Georgef615d822017-02-24 14:56:37 +1100352 mp_parse_node_t *stack = m_renew(mp_parse_node_t, parser->result_stack, parser->result_stack_alloc, parser->result_stack_alloc + MICROPY_ALLOC_PARSE_RESULT_INC);
Damien George50912e72015-01-20 11:55:10 +0000353 parser->result_stack = stack;
Damien George58ebde42014-05-21 20:32:59 +0100354 parser->result_stack_alloc += MICROPY_ALLOC_PARSE_RESULT_INC;
Damien George69a818d2014-01-12 13:55:24 +0000355 }
Damien429d7192013-10-04 19:53:11 +0100356 parser->result_stack[parser->result_stack_top++] = pn;
357}
358
Damien George16a6a472015-12-17 13:06:05 +0000359STATIC mp_parse_node_t make_node_const_object(parser_t *parser, size_t src_line, mp_obj_t obj) {
Damien George999cedb2015-11-27 17:01:44 +0000360 mp_parse_node_struct_t *pn = parser_alloc(parser, sizeof(mp_parse_node_struct_t) + sizeof(mp_obj_t));
Damien George7d414a12015-02-08 01:57:40 +0000361 pn->source_line = src_line;
Damien Georgeb8cfb0d2015-11-27 17:09:11 +0000362 #if MICROPY_OBJ_REPR == MICROPY_OBJ_REPR_D
363 // nodes are 32-bit pointers, but need to store 64-bit object
364 pn->kind_num_nodes = RULE_const_object | (2 << 8);
365 pn->nodes[0] = (uint64_t)obj;
366 pn->nodes[1] = (uint64_t)obj >> 32;
367 #else
Damien George7d414a12015-02-08 01:57:40 +0000368 pn->kind_num_nodes = RULE_const_object | (1 << 8);
Damien George16a6a472015-12-17 13:06:05 +0000369 pn->nodes[0] = (uintptr_t)obj;
Damien Georgeb8cfb0d2015-11-27 17:09:11 +0000370 #endif
Damien George7d414a12015-02-08 01:57:40 +0000371 return (mp_parse_node_t)pn;
Damien George5042bce2014-05-25 22:06:06 +0100372}
Paul Sokolovsky9e76b112014-05-08 22:43:46 +0300373
Damien George6d310a52016-09-23 17:23:16 +1000374STATIC void push_result_token(parser_t *parser, const rule_t *rule) {
Damiend99b0522013-12-21 18:17:45 +0000375 mp_parse_node_t pn;
Damien Georgea4c52c52014-12-05 19:35:18 +0000376 mp_lexer_t *lex = parser->lexer;
377 if (lex->tok_kind == MP_TOKEN_NAME) {
Damien George64f2b212015-10-08 14:58:15 +0100378 qstr id = qstr_from_strn(lex->vstr.buf, lex->vstr.len);
379 #if MICROPY_COMP_CONST
Damien George6d310a52016-09-23 17:23:16 +1000380 // if name is a standalone identifier, look it up in the table of dynamic constants
381 mp_map_elem_t *elem;
382 if (rule->rule_id == RULE_atom
383 && (elem = mp_map_lookup(&parser->consts, MP_OBJ_NEW_QSTR(id), MP_MAP_LOOKUP)) != NULL) {
Damien George74f4d2c2017-02-24 13:03:44 +1100384 if (MP_OBJ_IS_SMALL_INT(elem->value)) {
385 pn = mp_parse_node_new_small_int(MP_OBJ_SMALL_INT_VALUE(elem->value));
386 } else {
387 pn = make_node_const_object(parser, lex->tok_line, elem->value);
388 }
Damien George6d310a52016-09-23 17:23:16 +1000389 } else {
Damien George64f2b212015-10-08 14:58:15 +0100390 pn = mp_parse_node_new_leaf(MP_PARSE_NODE_ID, id);
391 }
Damien George6d310a52016-09-23 17:23:16 +1000392 #else
393 (void)rule;
394 pn = mp_parse_node_new_leaf(MP_PARSE_NODE_ID, id);
395 #endif
Damien George7d414a12015-02-08 01:57:40 +0000396 } else if (lex->tok_kind == MP_TOKEN_INTEGER) {
397 mp_obj_t o = mp_parse_num_integer(lex->vstr.buf, lex->vstr.len, 0, lex);
398 if (MP_OBJ_IS_SMALL_INT(o)) {
Damien Georgeed9c93f2016-11-13 15:35:11 +1100399 pn = mp_parse_node_new_small_int(MP_OBJ_SMALL_INT_VALUE(o));
Damien429d7192013-10-04 19:53:11 +0100400 } else {
Damien George7d414a12015-02-08 01:57:40 +0000401 pn = make_node_const_object(parser, lex->tok_line, o);
Damien429d7192013-10-04 19:53:11 +0100402 }
Damien George7d414a12015-02-08 01:57:40 +0000403 } else if (lex->tok_kind == MP_TOKEN_FLOAT_OR_IMAG) {
404 mp_obj_t o = mp_parse_num_decimal(lex->vstr.buf, lex->vstr.len, true, false, lex);
405 pn = make_node_const_object(parser, lex->tok_line, o);
Damien George4c81ba82015-01-13 16:21:23 +0000406 } else if (lex->tok_kind == MP_TOKEN_STRING || lex->tok_kind == MP_TOKEN_BYTES) {
407 // Don't automatically intern all strings/bytes. doc strings (which are usually large)
Damien George5042bce2014-05-25 22:06:06 +0100408 // will be discarded by the compiler, and so we shouldn't intern them.
409 qstr qst = MP_QSTR_NULL;
Damien Georgea4c52c52014-12-05 19:35:18 +0000410 if (lex->vstr.len <= MICROPY_ALLOC_PARSE_INTERN_STRING_LEN) {
Damien George5042bce2014-05-25 22:06:06 +0100411 // intern short strings
Damien Georgea4c52c52014-12-05 19:35:18 +0000412 qst = qstr_from_strn(lex->vstr.buf, lex->vstr.len);
Damien George5042bce2014-05-25 22:06:06 +0100413 } else {
414 // check if this string is already interned
Damien Georgea4c52c52014-12-05 19:35:18 +0000415 qst = qstr_find_strn(lex->vstr.buf, lex->vstr.len);
Damien George5042bce2014-05-25 22:06:06 +0100416 }
417 if (qst != MP_QSTR_NULL) {
418 // qstr exists, make a leaf node
Damien George4c81ba82015-01-13 16:21:23 +0000419 pn = mp_parse_node_new_leaf(lex->tok_kind == MP_TOKEN_STRING ? MP_PARSE_NODE_STRING : MP_PARSE_NODE_BYTES, qst);
Damien George5042bce2014-05-25 22:06:06 +0100420 } else {
Damien George52552552017-02-24 13:43:43 +1100421 // not interned, make a node holding a pointer to the string/bytes object
422 mp_obj_t o = mp_obj_new_str_of_type(
423 lex->tok_kind == MP_TOKEN_STRING ? &mp_type_str : &mp_type_bytes,
424 (const byte*)lex->vstr.buf, lex->vstr.len);
425 pn = make_node_const_object(parser, lex->tok_line, o);
Damien George5042bce2014-05-25 22:06:06 +0100426 }
Damien429d7192013-10-04 19:53:11 +0100427 } else {
Damien Georgea4c52c52014-12-05 19:35:18 +0000428 pn = mp_parse_node_new_leaf(MP_PARSE_NODE_TOKEN, lex->tok_kind);
Damien429d7192013-10-04 19:53:11 +0100429 }
430 push_result_node(parser, pn);
431}
432
Damien George64f2b212015-10-08 14:58:15 +0100433#if MICROPY_COMP_MODULE_CONST
Damien Georgecbf76742015-11-27 13:38:15 +0000434STATIC const mp_rom_map_elem_t mp_constants_table[] = {
Damien Georgee36ff982016-05-10 23:23:50 +0100435 #if MICROPY_PY_UERRNO
436 { MP_ROM_QSTR(MP_QSTR_errno), MP_ROM_PTR(&mp_module_uerrno) },
437 #endif
Damien George64f2b212015-10-08 14:58:15 +0100438 #if MICROPY_PY_UCTYPES
Damien Georgecbf76742015-11-27 13:38:15 +0000439 { MP_ROM_QSTR(MP_QSTR_uctypes), MP_ROM_PTR(&mp_module_uctypes) },
Damien George64f2b212015-10-08 14:58:15 +0100440 #endif
441 // Extra constants as defined by a port
442 MICROPY_PORT_CONSTANTS
443};
444STATIC MP_DEFINE_CONST_MAP(mp_constants_map, mp_constants_table);
445#endif
446
Damien Georgeb1533c42016-06-06 17:28:32 +0100447STATIC void push_result_rule(parser_t *parser, size_t src_line, const rule_t *rule, size_t num_args);
448
Damien George64f2b212015-10-08 14:58:15 +0100449#if MICROPY_COMP_CONST_FOLDING
Damien George9b525132016-11-13 15:36:49 +1100450STATIC bool fold_logical_constants(parser_t *parser, const rule_t *rule, size_t *num_args) {
451 if (rule->rule_id == RULE_or_test
452 || rule->rule_id == RULE_and_test) {
453 // folding for binary logical ops: or and
454 size_t copy_to = *num_args;
455 for (size_t i = copy_to; i > 0;) {
456 mp_parse_node_t pn = peek_result(parser, --i);
457 parser->result_stack[parser->result_stack_top - copy_to] = pn;
458 if (i == 0) {
459 // always need to keep the last value
460 break;
461 }
462 if (rule->rule_id == RULE_or_test) {
463 if (mp_parse_node_is_const_true(pn)) {
464 //
465 break;
466 } else if (!mp_parse_node_is_const_false(pn)) {
467 copy_to -= 1;
468 }
469 } else {
470 // RULE_and_test
471 if (mp_parse_node_is_const_false(pn)) {
472 break;
473 } else if (!mp_parse_node_is_const_true(pn)) {
474 copy_to -= 1;
475 }
476 }
477 }
478 copy_to -= 1; // copy_to now contains number of args to pop
479
480 // pop and discard all the short-circuited expressions
481 for (size_t i = 0; i < copy_to; ++i) {
482 pop_result(parser);
483 }
484 *num_args -= copy_to;
485
486 // we did a complete folding if there's only 1 arg left
487 return *num_args == 1;
488
489 } else if (rule->rule_id == RULE_not_test_2) {
490 // folding for unary logical op: not
491 mp_parse_node_t pn = peek_result(parser, 0);
492 if (mp_parse_node_is_const_false(pn)) {
493 pn = mp_parse_node_new_leaf(MP_PARSE_NODE_TOKEN, MP_TOKEN_KW_TRUE);
494 } else if (mp_parse_node_is_const_true(pn)) {
495 pn = mp_parse_node_new_leaf(MP_PARSE_NODE_TOKEN, MP_TOKEN_KW_FALSE);
496 } else {
497 return false;
498 }
499 pop_result(parser);
500 push_result_node(parser, pn);
501 return true;
502 }
503
504 return false;
505}
506
Damien George16a6a472015-12-17 13:06:05 +0000507STATIC bool fold_constants(parser_t *parser, const rule_t *rule, size_t num_args) {
Damien George64f2b212015-10-08 14:58:15 +0100508 // this code does folding of arbitrary integer expressions, eg 1 + 2 * 3 + 4
509 // it does not do partial folding, eg 1 + 2 + x -> 3 + x
510
Damien George22b22652016-01-07 14:40:35 +0000511 mp_obj_t arg0;
Damien George64f2b212015-10-08 14:58:15 +0100512 if (rule->rule_id == RULE_expr
513 || rule->rule_id == RULE_xor_expr
514 || rule->rule_id == RULE_and_expr) {
515 // folding for binary ops: | ^ &
516 mp_parse_node_t pn = peek_result(parser, num_args - 1);
Damien George22b22652016-01-07 14:40:35 +0000517 if (!mp_parse_node_get_int_maybe(pn, &arg0)) {
Damien George64f2b212015-10-08 14:58:15 +0100518 return false;
519 }
Damien George22b22652016-01-07 14:40:35 +0000520 mp_binary_op_t op;
521 if (rule->rule_id == RULE_expr) {
522 op = MP_BINARY_OP_OR;
523 } else if (rule->rule_id == RULE_xor_expr) {
524 op = MP_BINARY_OP_XOR;
525 } else {
526 op = MP_BINARY_OP_AND;
527 }
Damien George16a6a472015-12-17 13:06:05 +0000528 for (ssize_t i = num_args - 2; i >= 0; --i) {
Damien George64f2b212015-10-08 14:58:15 +0100529 pn = peek_result(parser, i);
Damien George22b22652016-01-07 14:40:35 +0000530 mp_obj_t arg1;
531 if (!mp_parse_node_get_int_maybe(pn, &arg1)) {
Damien George64f2b212015-10-08 14:58:15 +0100532 return false;
533 }
Damien George22b22652016-01-07 14:40:35 +0000534 arg0 = mp_binary_op(op, arg0, arg1);
Damien George64f2b212015-10-08 14:58:15 +0100535 }
536 } else if (rule->rule_id == RULE_shift_expr
537 || rule->rule_id == RULE_arith_expr
538 || rule->rule_id == RULE_term) {
539 // folding for binary ops: << >> + - * / % //
540 mp_parse_node_t pn = peek_result(parser, num_args - 1);
Damien George22b22652016-01-07 14:40:35 +0000541 if (!mp_parse_node_get_int_maybe(pn, &arg0)) {
Damien George64f2b212015-10-08 14:58:15 +0100542 return false;
543 }
Damien George16a6a472015-12-17 13:06:05 +0000544 for (ssize_t i = num_args - 2; i >= 1; i -= 2) {
Damien George64f2b212015-10-08 14:58:15 +0100545 pn = peek_result(parser, i - 1);
Damien George22b22652016-01-07 14:40:35 +0000546 mp_obj_t arg1;
547 if (!mp_parse_node_get_int_maybe(pn, &arg1)) {
Damien George64f2b212015-10-08 14:58:15 +0100548 return false;
549 }
Damien George64f2b212015-10-08 14:58:15 +0100550 mp_token_kind_t tok = MP_PARSE_NODE_LEAF_ARG(peek_result(parser, i));
Damien George22b22652016-01-07 14:40:35 +0000551 static const uint8_t token_to_op[] = {
552 MP_BINARY_OP_ADD,
553 MP_BINARY_OP_SUBTRACT,
554 MP_BINARY_OP_MULTIPLY,
555 255,//MP_BINARY_OP_POWER,
556 255,//MP_BINARY_OP_TRUE_DIVIDE,
557 MP_BINARY_OP_FLOOR_DIVIDE,
558 MP_BINARY_OP_MODULO,
559 255,//MP_BINARY_OP_LESS
560 MP_BINARY_OP_LSHIFT,
561 255,//MP_BINARY_OP_MORE
562 MP_BINARY_OP_RSHIFT,
563 };
564 mp_binary_op_t op = token_to_op[tok - MP_TOKEN_OP_PLUS];
Antonin ENFRUNefc971e2016-01-12 22:06:39 +0100565 if (op == (mp_binary_op_t)255) {
Damien George64f2b212015-10-08 14:58:15 +0100566 return false;
567 }
Damien George22b22652016-01-07 14:40:35 +0000568 int rhs_sign = mp_obj_int_sign(arg1);
569 if (op <= MP_BINARY_OP_RSHIFT) {
570 // << and >> can't have negative rhs
571 if (rhs_sign < 0) {
572 return false;
573 }
574 } else if (op >= MP_BINARY_OP_FLOOR_DIVIDE) {
575 // % and // can't have zero rhs
576 if (rhs_sign == 0) {
577 return false;
578 }
579 }
580 arg0 = mp_binary_op(op, arg0, arg1);
Damien George64f2b212015-10-08 14:58:15 +0100581 }
582 } else if (rule->rule_id == RULE_factor_2) {
583 // folding for unary ops: + - ~
584 mp_parse_node_t pn = peek_result(parser, 0);
Damien George22b22652016-01-07 14:40:35 +0000585 if (!mp_parse_node_get_int_maybe(pn, &arg0)) {
Damien George64f2b212015-10-08 14:58:15 +0100586 return false;
587 }
Damien George64f2b212015-10-08 14:58:15 +0100588 mp_token_kind_t tok = MP_PARSE_NODE_LEAF_ARG(peek_result(parser, 1));
Antonin ENFRUNefc971e2016-01-12 22:06:39 +0100589 mp_unary_op_t op;
Damien George64f2b212015-10-08 14:58:15 +0100590 if (tok == MP_TOKEN_OP_PLUS) {
Damien George22b22652016-01-07 14:40:35 +0000591 op = MP_UNARY_OP_POSITIVE;
Damien George64f2b212015-10-08 14:58:15 +0100592 } else if (tok == MP_TOKEN_OP_MINUS) {
Damien George22b22652016-01-07 14:40:35 +0000593 op = MP_UNARY_OP_NEGATIVE;
Damien George64f2b212015-10-08 14:58:15 +0100594 } else {
595 assert(tok == MP_TOKEN_OP_TILDE); // should be
Damien George22b22652016-01-07 14:40:35 +0000596 op = MP_UNARY_OP_INVERT;
Damien George64f2b212015-10-08 14:58:15 +0100597 }
Damien George22b22652016-01-07 14:40:35 +0000598 arg0 = mp_unary_op(op, arg0);
Damien George64f2b212015-10-08 14:58:15 +0100599
600 #if MICROPY_COMP_CONST
601 } else if (rule->rule_id == RULE_expr_stmt) {
602 mp_parse_node_t pn1 = peek_result(parser, 0);
603 if (!MP_PARSE_NODE_IS_NULL(pn1)
604 && !(MP_PARSE_NODE_IS_STRUCT_KIND(pn1, RULE_expr_stmt_augassign)
605 || MP_PARSE_NODE_IS_STRUCT_KIND(pn1, RULE_expr_stmt_assign_list))) {
606 // this node is of the form <x> = <y>
607 mp_parse_node_t pn0 = peek_result(parser, 1);
608 if (MP_PARSE_NODE_IS_ID(pn0)
Damien Georgeeacbd7a2016-04-13 15:21:47 +0100609 && MP_PARSE_NODE_IS_STRUCT_KIND(pn1, RULE_atom_expr_normal)
Damien George64f2b212015-10-08 14:58:15 +0100610 && MP_PARSE_NODE_IS_ID(((mp_parse_node_struct_t*)pn1)->nodes[0])
611 && MP_PARSE_NODE_LEAF_ARG(((mp_parse_node_struct_t*)pn1)->nodes[0]) == MP_QSTR_const
612 && MP_PARSE_NODE_IS_STRUCT_KIND(((mp_parse_node_struct_t*)pn1)->nodes[1], RULE_trailer_paren)
Damien George64f2b212015-10-08 14:58:15 +0100613 ) {
614 // code to assign dynamic constants: id = const(value)
615
616 // get the id
617 qstr id = MP_PARSE_NODE_LEAF_ARG(pn0);
618
619 // get the value
620 mp_parse_node_t pn_value = ((mp_parse_node_struct_t*)((mp_parse_node_struct_t*)pn1)->nodes[1])->nodes[0];
Damien George74f4d2c2017-02-24 13:03:44 +1100621 mp_obj_t value;
622 if (!mp_parse_node_get_int_maybe(pn_value, &value)) {
Damien Georgef615d822017-02-24 14:56:37 +1100623 mp_obj_t exc = mp_obj_new_exception_msg(&mp_type_SyntaxError,
624 "constant must be an integer");
625 mp_obj_exception_add_traceback(exc, parser->lexer->source_name,
626 ((mp_parse_node_struct_t*)pn1)->source_line, MP_QSTR_NULL);
627 nlr_raise(exc);
Damien George64f2b212015-10-08 14:58:15 +0100628 }
Damien George64f2b212015-10-08 14:58:15 +0100629
630 // store the value in the table of dynamic constants
631 mp_map_elem_t *elem = mp_map_lookup(&parser->consts, MP_OBJ_NEW_QSTR(id), MP_MAP_LOOKUP_ADD_IF_NOT_FOUND);
632 assert(elem->value == MP_OBJ_NULL);
Damien George74f4d2c2017-02-24 13:03:44 +1100633 elem->value = value;
Damien George64f2b212015-10-08 14:58:15 +0100634
Damien Georgeb1533c42016-06-06 17:28:32 +0100635 // If the constant starts with an underscore then treat it as a private
636 // variable and don't emit any code to store the value to the id.
637 if (qstr_str(id)[0] == '_') {
638 pop_result(parser); // pop const(value)
639 pop_result(parser); // pop id
640 push_result_rule(parser, 0, rules[RULE_pass_stmt], 0); // replace with "pass"
641 return true;
642 }
643
Damien George64f2b212015-10-08 14:58:15 +0100644 // replace const(value) with value
645 pop_result(parser);
646 push_result_node(parser, pn_value);
647
648 // finished folding this assignment, but we still want it to be part of the tree
649 return false;
650 }
651 }
652 return false;
653 #endif
654
655 #if MICROPY_COMP_MODULE_CONST
Damien Georgeeacbd7a2016-04-13 15:21:47 +0100656 } else if (rule->rule_id == RULE_atom_expr_normal) {
657 mp_parse_node_t pn0 = peek_result(parser, 1);
658 mp_parse_node_t pn1 = peek_result(parser, 0);
Damien George64f2b212015-10-08 14:58:15 +0100659 if (!(MP_PARSE_NODE_IS_ID(pn0)
Damien Georgeeacbd7a2016-04-13 15:21:47 +0100660 && MP_PARSE_NODE_IS_STRUCT_KIND(pn1, RULE_trailer_period))) {
Damien George64f2b212015-10-08 14:58:15 +0100661 return false;
662 }
663 // id1.id2
664 // look it up in constant table, see if it can be replaced with an integer
665 mp_parse_node_struct_t *pns1 = (mp_parse_node_struct_t*)pn1;
666 assert(MP_PARSE_NODE_IS_ID(pns1->nodes[0]));
667 qstr q_base = MP_PARSE_NODE_LEAF_ARG(pn0);
668 qstr q_attr = MP_PARSE_NODE_LEAF_ARG(pns1->nodes[0]);
669 mp_map_elem_t *elem = mp_map_lookup((mp_map_t*)&mp_constants_map, MP_OBJ_NEW_QSTR(q_base), MP_MAP_LOOKUP);
670 if (elem == NULL) {
671 return false;
672 }
673 mp_obj_t dest[2];
674 mp_load_method_maybe(elem->value, q_attr, dest);
Damien George8d4d6732016-03-19 21:36:32 +0000675 if (!(dest[0] != MP_OBJ_NULL && MP_OBJ_IS_INT(dest[0]) && dest[1] == MP_OBJ_NULL)) {
Damien George64f2b212015-10-08 14:58:15 +0100676 return false;
677 }
Damien George22b22652016-01-07 14:40:35 +0000678 arg0 = dest[0];
Damien George64f2b212015-10-08 14:58:15 +0100679 #endif
680
681 } else {
682 return false;
683 }
684
685 // success folding this rule
686
Damien George16a6a472015-12-17 13:06:05 +0000687 for (size_t i = num_args; i > 0; i--) {
Damien George64f2b212015-10-08 14:58:15 +0100688 pop_result(parser);
689 }
Damien George22b22652016-01-07 14:40:35 +0000690 if (MP_OBJ_IS_SMALL_INT(arg0)) {
Damien Georgeed9c93f2016-11-13 15:35:11 +1100691 push_result_node(parser, mp_parse_node_new_small_int(MP_OBJ_SMALL_INT_VALUE(arg0)));
Damien George22b22652016-01-07 14:40:35 +0000692 } else {
693 // TODO reuse memory for parse node struct?
694 push_result_node(parser, make_node_const_object(parser, 0, arg0));
695 }
Damien George64f2b212015-10-08 14:58:15 +0100696
697 return true;
698}
699#endif
700
Damien George16a6a472015-12-17 13:06:05 +0000701STATIC void push_result_rule(parser_t *parser, size_t src_line, const rule_t *rule, size_t num_args) {
Damien George93b37262016-01-07 13:07:52 +0000702 // optimise away parenthesis around an expression if possible
703 if (rule->rule_id == RULE_atom_paren) {
704 // there should be just 1 arg for this rule
705 mp_parse_node_t pn = peek_result(parser, 0);
706 if (MP_PARSE_NODE_IS_NULL(pn)) {
707 // need to keep parenthesis for ()
708 } else if (MP_PARSE_NODE_IS_STRUCT_KIND(pn, RULE_testlist_comp)) {
709 // need to keep parenthesis for (a, b, ...)
710 } else {
711 // parenthesis around a single expression, so it's just the expression
712 return;
713 }
714 }
715
Damien George64f2b212015-10-08 14:58:15 +0100716 #if MICROPY_COMP_CONST_FOLDING
Damien George9b525132016-11-13 15:36:49 +1100717 if (fold_logical_constants(parser, rule, &num_args)) {
718 // we folded this rule so return straight away
719 return;
720 }
Damien George64f2b212015-10-08 14:58:15 +0100721 if (fold_constants(parser, rule, num_args)) {
722 // we folded this rule so return straight away
723 return;
724 }
725 #endif
726
Damien George58e0f4a2015-09-23 10:50:43 +0100727 mp_parse_node_struct_t *pn = parser_alloc(parser, sizeof(mp_parse_node_struct_t) + sizeof(mp_parse_node_t) * num_args);
Damien George58ba4c32014-04-10 14:27:31 +0000728 pn->source_line = src_line;
729 pn->kind_num_nodes = (rule->rule_id & 0xff) | (num_args << 8);
Damien George16a6a472015-12-17 13:06:05 +0000730 for (size_t i = num_args; i > 0; i--) {
Damien429d7192013-10-04 19:53:11 +0100731 pn->nodes[i - 1] = pop_result(parser);
732 }
Damiend99b0522013-12-21 18:17:45 +0000733 push_result_node(parser, (mp_parse_node_t)pn);
Damien429d7192013-10-04 19:53:11 +0100734}
735
Damien George58e0f4a2015-09-23 10:50:43 +0100736mp_parse_tree_t mp_parse(mp_lexer_t *lex, mp_parse_input_kind_t input_kind) {
Damien George69a818d2014-01-12 13:55:24 +0000737
Damien George1b82e9a2014-05-10 17:36:41 +0100738 // initialise parser and allocate memory for its stacks
Damien George69a818d2014-01-12 13:55:24 +0000739
Damien George1b82e9a2014-05-10 17:36:41 +0100740 parser_t parser;
Damien George69a818d2014-01-12 13:55:24 +0000741
Damien George58ebde42014-05-21 20:32:59 +0100742 parser.rule_stack_alloc = MICROPY_ALLOC_PARSE_RULE_INIT;
Damien George1b82e9a2014-05-10 17:36:41 +0100743 parser.rule_stack_top = 0;
Damien Georgef615d822017-02-24 14:56:37 +1100744 parser.rule_stack = m_new(rule_stack_t, parser.rule_stack_alloc);
Damien429d7192013-10-04 19:53:11 +0100745
Damien George58ebde42014-05-21 20:32:59 +0100746 parser.result_stack_alloc = MICROPY_ALLOC_PARSE_RESULT_INIT;
Damien George1b82e9a2014-05-10 17:36:41 +0100747 parser.result_stack_top = 0;
Damien Georgef615d822017-02-24 14:56:37 +1100748 parser.result_stack = m_new(mp_parse_node_t, parser.result_stack_alloc);
Damien429d7192013-10-04 19:53:11 +0100749
Damien George1b82e9a2014-05-10 17:36:41 +0100750 parser.lexer = lex;
751
Damien George58e0f4a2015-09-23 10:50:43 +0100752 parser.tree.chunk = NULL;
753 parser.cur_chunk = NULL;
754
Damien George64f2b212015-10-08 14:58:15 +0100755 #if MICROPY_COMP_CONST
756 mp_map_init(&parser.consts, 0);
757 #endif
758
Damien George69a818d2014-01-12 13:55:24 +0000759 // work out the top-level rule to use, and push it on the stack
Damien George16a6a472015-12-17 13:06:05 +0000760 size_t top_level_rule;
Damien5ac1b2e2013-10-18 19:58:12 +0100761 switch (input_kind) {
Damiend99b0522013-12-21 18:17:45 +0000762 case MP_PARSE_SINGLE_INPUT: top_level_rule = RULE_single_input; break;
Damien Georged02c6d82014-01-15 22:14:03 +0000763 case MP_PARSE_EVAL_INPUT: top_level_rule = RULE_eval_input; break;
Damien5ac1b2e2013-10-18 19:58:12 +0100764 default: top_level_rule = RULE_file_input;
765 }
Damien Georgea4c52c52014-12-05 19:35:18 +0000766 push_rule(&parser, lex->tok_line, rules[top_level_rule], 0);
Damien429d7192013-10-04 19:53:11 +0100767
Damien George69a818d2014-01-12 13:55:24 +0000768 // parse!
769
Damien George16a6a472015-12-17 13:06:05 +0000770 size_t n, i; // state for the current rule
771 size_t rule_src_line; // source line for the first token matched by the current rule
Damien429d7192013-10-04 19:53:11 +0100772 bool backtrack = false;
Damien George08335002014-01-18 23:24:36 +0000773 const rule_t *rule = NULL;
Damien429d7192013-10-04 19:53:11 +0100774
775 for (;;) {
776 next_rule:
Damien Georgef615d822017-02-24 14:56:37 +1100777 if (parser.rule_stack_top == 0) {
Damien429d7192013-10-04 19:53:11 +0100778 break;
779 }
780
Damien George1b82e9a2014-05-10 17:36:41 +0100781 pop_rule(&parser, &rule, &i, &rule_src_line);
Damien429d7192013-10-04 19:53:11 +0100782 n = rule->act & RULE_ACT_ARG_MASK;
783
784 /*
785 // debugging
Damien George1b82e9a2014-05-10 17:36:41 +0100786 printf("depth=%d ", parser.rule_stack_top);
787 for (int j = 0; j < parser.rule_stack_top; ++j) {
Damien429d7192013-10-04 19:53:11 +0100788 printf(" ");
789 }
790 printf("%s n=%d i=%d bt=%d\n", rule->rule_name, n, i, backtrack);
791 */
792
793 switch (rule->act & RULE_ACT_KIND_MASK) {
794 case RULE_ACT_OR:
795 if (i > 0 && !backtrack) {
796 goto next_rule;
797 } else {
798 backtrack = false;
799 }
Damien Georgefa7c61d2015-07-24 14:35:57 +0000800 for (; i < n; ++i) {
801 uint16_t kind = rule->arg[i] & RULE_ARG_KIND_MASK;
802 if (kind == RULE_ARG_TOK) {
803 if (lex->tok_kind == (rule->arg[i] & RULE_ARG_ARG_MASK)) {
Damien George6d310a52016-09-23 17:23:16 +1000804 push_result_token(&parser, rule);
Damien Georgefa7c61d2015-07-24 14:35:57 +0000805 mp_lexer_to_next(lex);
Damien429d7192013-10-04 19:53:11 +0100806 goto next_rule;
Damien Georgefa7c61d2015-07-24 14:35:57 +0000807 }
Damien429d7192013-10-04 19:53:11 +0100808 } else {
Damien Georgefa7c61d2015-07-24 14:35:57 +0000809 assert(kind == RULE_ARG_RULE);
810 if (i + 1 < n) {
811 push_rule(&parser, rule_src_line, rule, i + 1); // save this or-rule
812 }
813 push_rule_from_arg(&parser, rule->arg[i]); // push child of or-rule
Damien429d7192013-10-04 19:53:11 +0100814 goto next_rule;
815 }
Damien429d7192013-10-04 19:53:11 +0100816 }
Damien Georgefa7c61d2015-07-24 14:35:57 +0000817 backtrack = true;
Damien429d7192013-10-04 19:53:11 +0100818 break;
819
Damien George2870d852014-12-20 18:06:08 +0000820 case RULE_ACT_AND: {
Damien429d7192013-10-04 19:53:11 +0100821
822 // failed, backtrack if we can, else syntax error
823 if (backtrack) {
824 assert(i > 0);
825 if ((rule->arg[i - 1] & RULE_ARG_KIND_MASK) == RULE_ARG_OPT_RULE) {
826 // an optional rule that failed, so continue with next arg
Damien George1b82e9a2014-05-10 17:36:41 +0100827 push_result_node(&parser, MP_PARSE_NODE_NULL);
Damien429d7192013-10-04 19:53:11 +0100828 backtrack = false;
829 } else {
830 // a mandatory rule that failed, so propagate backtrack
831 if (i > 1) {
832 // already eaten tokens so can't backtrack
833 goto syntax_error;
834 } else {
835 goto next_rule;
836 }
837 }
838 }
839
840 // progress through the rule
841 for (; i < n; ++i) {
Damien George86e94232017-01-17 17:00:55 +1100842 if ((rule->arg[i] & RULE_ARG_KIND_MASK) == RULE_ARG_TOK) {
843 // need to match a token
844 mp_token_kind_t tok_kind = rule->arg[i] & RULE_ARG_ARG_MASK;
845 if (lex->tok_kind == tok_kind) {
846 // matched token
847 if (tok_kind == MP_TOKEN_NAME) {
848 push_result_token(&parser, rule);
Damien429d7192013-10-04 19:53:11 +0100849 }
Damien George86e94232017-01-17 17:00:55 +1100850 mp_lexer_to_next(lex);
851 } else {
852 // failed to match token
853 if (i > 0) {
854 // already eaten tokens so can't backtrack
855 goto syntax_error;
856 } else {
857 // this rule failed, so backtrack
858 backtrack = true;
859 goto next_rule;
860 }
Damien Georged2d64f02015-01-14 21:32:42 +0000861 }
Damien George86e94232017-01-17 17:00:55 +1100862 } else {
863 push_rule(&parser, rule_src_line, rule, i + 1); // save this and-rule
864 push_rule_from_arg(&parser, rule->arg[i]); // push child of and-rule
865 goto next_rule;
Damien429d7192013-10-04 19:53:11 +0100866 }
867 }
868
869 assert(i == n);
870
871 // matched the rule, so now build the corresponding parse_node
872
Damien George65dc9602015-08-14 12:24:11 +0100873 #if !MICROPY_ENABLE_DOC_STRING
Damien George5042bce2014-05-25 22:06:06 +0100874 // this code discards lonely statements, such as doc strings
Damien George1b82e9a2014-05-10 17:36:41 +0100875 if (input_kind != MP_PARSE_SINGLE_INPUT && rule->rule_id == RULE_expr_stmt && peek_result(&parser, 0) == MP_PARSE_NODE_NULL) {
876 mp_parse_node_t p = peek_result(&parser, 1);
Damien George52552552017-02-24 13:43:43 +1100877 if ((MP_PARSE_NODE_IS_LEAF(p) && !MP_PARSE_NODE_IS_ID(p))
878 || MP_PARSE_NODE_IS_STRUCT_KIND(p, RULE_const_object)) {
Damien George52b5d762014-09-23 15:31:56 +0000879 pop_result(&parser); // MP_PARSE_NODE_NULL
Damien George52552552017-02-24 13:43:43 +1100880 pop_result(&parser); // const expression (leaf or RULE_const_object)
881 // Pushing the "pass" rule here will overwrite any RULE_const_object
882 // entry that was on the result stack, allowing the GC to reclaim
883 // the memory from the const object when needed.
Damien George1b82e9a2014-05-10 17:36:41 +0100884 push_result_rule(&parser, rule_src_line, rules[RULE_pass_stmt], 0);
Damien George93afa232014-05-06 21:44:11 +0100885 break;
886 }
887 }
Damien George65dc9602015-08-14 12:24:11 +0100888 #endif
Damien George93afa232014-05-06 21:44:11 +0100889
Damien George0c1de1c2016-04-14 13:23:50 +0100890 // count number of arguments for the parse node
891 i = 0;
Damien George16a6a472015-12-17 13:06:05 +0000892 size_t num_not_nil = 0;
Damien George0c1de1c2016-04-14 13:23:50 +0100893 for (size_t x = n; x > 0;) {
894 --x;
895 if ((rule->arg[x] & RULE_ARG_KIND_MASK) == RULE_ARG_TOK) {
896 mp_token_kind_t tok_kind = rule->arg[x] & RULE_ARG_ARG_MASK;
897 if (tok_kind == MP_TOKEN_NAME) {
898 // only tokens which were names are pushed to stack
899 i += 1;
900 num_not_nil += 1;
901 }
902 } else {
903 // rules are always pushed
904 if (peek_result(&parser, i) != MP_PARSE_NODE_NULL) {
905 num_not_nil += 1;
906 }
907 i += 1;
Damien429d7192013-10-04 19:53:11 +0100908 }
909 }
Damien George0c1de1c2016-04-14 13:23:50 +0100910
911 if (num_not_nil == 1 && (rule->act & RULE_ACT_ALLOW_IDENT)) {
912 // this rule has only 1 argument and should not be emitted
Damiend99b0522013-12-21 18:17:45 +0000913 mp_parse_node_t pn = MP_PARSE_NODE_NULL;
Damien George16a6a472015-12-17 13:06:05 +0000914 for (size_t x = 0; x < i; ++x) {
Damien George1b82e9a2014-05-10 17:36:41 +0100915 mp_parse_node_t pn2 = pop_result(&parser);
Damiend99b0522013-12-21 18:17:45 +0000916 if (pn2 != MP_PARSE_NODE_NULL) {
Damien429d7192013-10-04 19:53:11 +0100917 pn = pn2;
918 }
919 }
Damien George1b82e9a2014-05-10 17:36:41 +0100920 push_result_node(&parser, pn);
Damien George0c1de1c2016-04-14 13:23:50 +0100921 } else {
922 // this rule must be emitted
923
924 if (rule->act & RULE_ACT_ADD_BLANK) {
925 // and add an extra blank node at the end (used by the compiler to store data)
926 push_result_node(&parser, MP_PARSE_NODE_NULL);
927 i += 1;
928 }
929
930 push_result_rule(&parser, rule_src_line, rule, i);
Damien429d7192013-10-04 19:53:11 +0100931 }
932 break;
Damien George2870d852014-12-20 18:06:08 +0000933 }
Damien429d7192013-10-04 19:53:11 +0100934
Damien George86e94232017-01-17 17:00:55 +1100935 default: {
936 assert((rule->act & RULE_ACT_KIND_MASK) == RULE_ACT_LIST);
937
Damien429d7192013-10-04 19:53:11 +0100938 // n=2 is: item item*
939 // n=1 is: item (sep item)*
940 // n=3 is: item (sep item)* [sep]
Damien George2870d852014-12-20 18:06:08 +0000941 bool had_trailing_sep;
Damien429d7192013-10-04 19:53:11 +0100942 if (backtrack) {
943 list_backtrack:
944 had_trailing_sep = false;
945 if (n == 2) {
946 if (i == 1) {
947 // fail on item, first time round; propagate backtrack
948 goto next_rule;
949 } else {
950 // fail on item, in later rounds; finish with this rule
951 backtrack = false;
952 }
953 } else {
954 if (i == 1) {
955 // fail on item, first time round; propagate backtrack
956 goto next_rule;
957 } else if ((i & 1) == 1) {
958 // fail on item, in later rounds; have eaten tokens so can't backtrack
959 if (n == 3) {
960 // list allows trailing separator; finish parsing list
961 had_trailing_sep = true;
962 backtrack = false;
963 } else {
964 // list doesn't allowing trailing separator; fail
965 goto syntax_error;
966 }
967 } else {
968 // fail on separator; finish parsing list
969 backtrack = false;
970 }
971 }
972 } else {
973 for (;;) {
Damien George16a6a472015-12-17 13:06:05 +0000974 size_t arg = rule->arg[i & 1 & n];
Damien George86e94232017-01-17 17:00:55 +1100975 if ((arg & RULE_ARG_KIND_MASK) == RULE_ARG_TOK) {
976 if (lex->tok_kind == (arg & RULE_ARG_ARG_MASK)) {
977 if (i & 1 & n) {
978 // separators which are tokens are not pushed to result stack
Damien429d7192013-10-04 19:53:11 +0100979 } else {
Damien George86e94232017-01-17 17:00:55 +1100980 push_result_token(&parser, rule);
Damien429d7192013-10-04 19:53:11 +0100981 }
Damien George86e94232017-01-17 17:00:55 +1100982 mp_lexer_to_next(lex);
983 // got element of list, so continue parsing list
984 i += 1;
985 } else {
986 // couldn't get element of list
987 i += 1;
988 backtrack = true;
989 goto list_backtrack;
990 }
991 } else {
992 assert((arg & RULE_ARG_KIND_MASK) == RULE_ARG_RULE);
993 push_rule(&parser, rule_src_line, rule, i + 1); // save this list-rule
994 push_rule_from_arg(&parser, arg); // push child of list-rule
995 goto next_rule;
Damien429d7192013-10-04 19:53:11 +0100996 }
997 }
998 }
999 assert(i >= 1);
1000
1001 // compute number of elements in list, result in i
1002 i -= 1;
1003 if ((n & 1) && (rule->arg[1] & RULE_ARG_KIND_MASK) == RULE_ARG_TOK) {
1004 // don't count separators when they are tokens
1005 i = (i + 1) / 2;
1006 }
1007
1008 if (i == 1) {
1009 // list matched single item
1010 if (had_trailing_sep) {
1011 // if there was a trailing separator, make a list of a single item
Damien George1b82e9a2014-05-10 17:36:41 +01001012 push_result_rule(&parser, rule_src_line, rule, i);
Damien429d7192013-10-04 19:53:11 +01001013 } else {
1014 // just leave single item on stack (ie don't wrap in a list)
1015 }
1016 } else {
Damien George1b82e9a2014-05-10 17:36:41 +01001017 push_result_rule(&parser, rule_src_line, rule, i);
Damien429d7192013-10-04 19:53:11 +01001018 }
1019 break;
Damien George2870d852014-12-20 18:06:08 +00001020 }
Damien429d7192013-10-04 19:53:11 +01001021 }
1022 }
Damien91d387d2013-10-09 15:09:52 +01001023
Damien George64f2b212015-10-08 14:58:15 +01001024 #if MICROPY_COMP_CONST
1025 mp_map_deinit(&parser.consts);
1026 #endif
1027
Damien George58e0f4a2015-09-23 10:50:43 +01001028 // truncate final chunk and link into chain of chunks
1029 if (parser.cur_chunk != NULL) {
Colin Hogbenf9b6b372016-10-31 14:05:56 +00001030 (void)m_renew_maybe(byte, parser.cur_chunk,
Damien George58e0f4a2015-09-23 10:50:43 +01001031 sizeof(mp_parse_chunk_t) + parser.cur_chunk->alloc,
Colin Hogbenf9b6b372016-10-31 14:05:56 +00001032 sizeof(mp_parse_chunk_t) + parser.cur_chunk->union_.used,
1033 false);
Damien George58e0f4a2015-09-23 10:50:43 +01001034 parser.cur_chunk->alloc = parser.cur_chunk->union_.used;
1035 parser.cur_chunk->union_.next = parser.tree.chunk;
1036 parser.tree.chunk = parser.cur_chunk;
1037 }
1038
Damien Georgef615d822017-02-24 14:56:37 +11001039 if (
Damien Georgefdfcee72015-10-12 12:59:18 +01001040 lex->tok_kind != MP_TOKEN_END // check we are at the end of the token stream
1041 || parser.result_stack_top == 0 // check that we got a node (can fail on empty input)
1042 ) {
Damien Georgef615d822017-02-24 14:56:37 +11001043 syntax_error:;
1044 mp_obj_t exc;
Damien Georgefdfcee72015-10-12 12:59:18 +01001045 if (lex->tok_kind == MP_TOKEN_INDENT) {
1046 exc = mp_obj_new_exception_msg(&mp_type_IndentationError,
1047 "unexpected indent");
1048 } else if (lex->tok_kind == MP_TOKEN_DEDENT_MISMATCH) {
1049 exc = mp_obj_new_exception_msg(&mp_type_IndentationError,
1050 "unindent does not match any outer indentation level");
1051 } else {
1052 exc = mp_obj_new_exception_msg(&mp_type_SyntaxError,
1053 "invalid syntax");
1054 }
Damien Georgef615d822017-02-24 14:56:37 +11001055 // add traceback to give info about file name and location
1056 // we don't have a 'block' name, so just pass the NULL qstr to indicate this
1057 mp_obj_exception_add_traceback(exc, lex->source_name, lex->tok_line, MP_QSTR_NULL);
1058 nlr_raise(exc);
Damien George58ba4c32014-04-10 14:27:31 +00001059 }
1060
Damien Georgef615d822017-02-24 14:56:37 +11001061 // get the root parse node that we created
1062 assert(parser.result_stack_top == 1);
1063 parser.tree.root = parser.result_stack[0];
1064
Damien George69a818d2014-01-12 13:55:24 +00001065 // free the memory that we don't need anymore
Damien George1b82e9a2014-05-10 17:36:41 +01001066 m_del(rule_stack_t, parser.rule_stack, parser.rule_stack_alloc);
1067 m_del(mp_parse_node_t, parser.result_stack, parser.result_stack_alloc);
Damien George69a818d2014-01-12 13:55:24 +00001068
Damien Georgef615d822017-02-24 14:56:37 +11001069 // we also free the lexer on behalf of the caller
1070 mp_lexer_free(lex);
1071
1072 return parser.tree;
Damien429d7192013-10-04 19:53:11 +01001073}
Damien George58e0f4a2015-09-23 10:50:43 +01001074
1075void mp_parse_tree_clear(mp_parse_tree_t *tree) {
1076 mp_parse_chunk_t *chunk = tree->chunk;
1077 while (chunk != NULL) {
1078 mp_parse_chunk_t *next = chunk->union_.next;
1079 m_del(byte, chunk, sizeof(mp_parse_chunk_t) + chunk->alloc);
1080 chunk = next;
1081 }
1082}
Damien Georgedd5353a2015-12-18 12:35:44 +00001083
1084#endif // MICROPY_ENABLE_COMPILER