blob: 0ec336cd1a3019a54de9849e069c96899454b989 [file] [log] [blame]
Damien George04b91472014-05-03 23:27:38 +01001/*
2 * This file is part of the Micro Python project, http://micropython.org/
3 *
4 * The MIT License (MIT)
5 *
6 * Copyright (c) 2013, 2014 Damien P. George
7 *
8 * Permission is hereby granted, free of charge, to any person obtaining a copy
9 * of this software and associated documentation files (the "Software"), to deal
10 * in the Software without restriction, including without limitation the rights
11 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
12 * copies of the Software, and to permit persons to whom the Software is
13 * furnished to do so, subject to the following conditions:
14 *
15 * The above copyright notice and this permission notice shall be included in
16 * all copies or substantial portions of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
21 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
22 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
23 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
24 * THE SOFTWARE.
25 */
26
xbeefe34222014-03-16 00:14:26 -070027#include <stdbool.h>
Damien429d7192013-10-04 19:53:11 +010028#include <stdint.h>
29#include <stdio.h>
Damien429d7192013-10-04 19:53:11 +010030#include <assert.h>
31
32#include "misc.h"
Damiend99b0522013-12-21 18:17:45 +000033#include "mpconfig.h"
Damien George55baff42014-01-21 21:40:13 +000034#include "qstr.h"
Damien429d7192013-10-04 19:53:11 +010035#include "lexer.h"
Damien George06201ff2014-03-01 19:50:50 +000036#include "parsenumbase.h"
Damien429d7192013-10-04 19:53:11 +010037#include "parse.h"
38
39#define RULE_ACT_KIND_MASK (0xf0)
40#define RULE_ACT_ARG_MASK (0x0f)
41#define RULE_ACT_OR (0x10)
42#define RULE_ACT_AND (0x20)
43#define RULE_ACT_LIST (0x30)
44
45#define RULE_ARG_BLANK (0x0000)
46#define RULE_ARG_KIND_MASK (0xf000)
47#define RULE_ARG_ARG_MASK (0x0fff)
48#define RULE_ARG_TOK (0x1000)
49#define RULE_ARG_RULE (0x2000)
50#define RULE_ARG_OPT_TOK (0x3000)
51#define RULE_ARG_OPT_RULE (0x4000)
52
Damien Georgeb829b5c2014-01-25 13:51:19 +000053#define ADD_BLANK_NODE(rule_id) ((rule_id) == RULE_funcdef || (rule_id) == RULE_classdef || (rule_id) == RULE_comp_for || (rule_id) == RULE_lambdef || (rule_id) == RULE_lambdef_nocond)
54
Damien429d7192013-10-04 19:53:11 +010055// (un)comment to use rule names; for debugging
56//#define USE_RULE_NAME (1)
57
58typedef struct _rule_t {
59 byte rule_id;
60 byte act;
61#ifdef USE_RULE_NAME
62 const char *rule_name;
63#endif
64 uint16_t arg[];
65} rule_t;
66
67enum {
68 RULE_none = 0,
Damien George00208ce2014-01-23 00:00:53 +000069#define DEF_RULE(rule, comp, kind, ...) RULE_##rule,
Damien429d7192013-10-04 19:53:11 +010070#include "grammar.h"
71#undef DEF_RULE
72 RULE_maximum_number_of,
73};
74
75#define or(n) (RULE_ACT_OR | n)
76#define and(n) (RULE_ACT_AND | n)
77#define one_or_more (RULE_ACT_LIST | 2)
78#define list (RULE_ACT_LIST | 1)
79#define list_with_end (RULE_ACT_LIST | 3)
Damiend99b0522013-12-21 18:17:45 +000080#define tok(t) (RULE_ARG_TOK | MP_TOKEN_##t)
Damien429d7192013-10-04 19:53:11 +010081#define rule(r) (RULE_ARG_RULE | RULE_##r)
Damiend99b0522013-12-21 18:17:45 +000082#define opt_tok(t) (RULE_ARG_OPT_TOK | MP_TOKEN_##t)
Damien429d7192013-10-04 19:53:11 +010083#define opt_rule(r) (RULE_ARG_OPT_RULE | RULE_##r)
84#ifdef USE_RULE_NAME
Damien George00208ce2014-01-23 00:00:53 +000085#define DEF_RULE(rule, comp, kind, ...) static const rule_t rule_##rule = { RULE_##rule, kind, #rule, { __VA_ARGS__ } };
Damien429d7192013-10-04 19:53:11 +010086#else
Damien George00208ce2014-01-23 00:00:53 +000087#define DEF_RULE(rule, comp, kind, ...) static const rule_t rule_##rule = { RULE_##rule, kind, { __VA_ARGS__ } };
Damien429d7192013-10-04 19:53:11 +010088#endif
89#include "grammar.h"
90#undef or
91#undef and
92#undef list
93#undef list_with_end
94#undef tok
95#undef rule
96#undef opt_tok
97#undef opt_rule
98#undef one_or_more
99#undef DEF_RULE
100
Paul Sokolovsky520e2f52014-02-12 18:31:30 +0200101STATIC const rule_t *rules[] = {
Damien429d7192013-10-04 19:53:11 +0100102 NULL,
Damien George00208ce2014-01-23 00:00:53 +0000103#define DEF_RULE(rule, comp, kind, ...) &rule_##rule,
Damien429d7192013-10-04 19:53:11 +0100104#include "grammar.h"
105#undef DEF_RULE
106};
107
108typedef struct _rule_stack_t {
Damien George08335002014-01-18 23:24:36 +0000109 unsigned int src_line : 24;
110 unsigned int rule_id : 8;
Damien429d7192013-10-04 19:53:11 +0100111 int32_t arg_i; // what should be the size and signedness?
112} rule_stack_t;
113
114typedef struct _parser_t {
Damien George58ba4c32014-04-10 14:27:31 +0000115 bool had_memory_error;
116
Damien429d7192013-10-04 19:53:11 +0100117 uint rule_stack_alloc;
118 uint rule_stack_top;
119 rule_stack_t *rule_stack;
120
Damien George69a818d2014-01-12 13:55:24 +0000121 uint result_stack_alloc;
Damien429d7192013-10-04 19:53:11 +0100122 uint result_stack_top;
Damiend99b0522013-12-21 18:17:45 +0000123 mp_parse_node_t *result_stack;
Damien George08335002014-01-18 23:24:36 +0000124
125 mp_lexer_t *lexer;
Damien429d7192013-10-04 19:53:11 +0100126} parser_t;
127
Damien George58ba4c32014-04-10 14:27:31 +0000128STATIC inline void memory_error(parser_t *parser) {
129 parser->had_memory_error = true;
130}
131
Paul Sokolovsky520e2f52014-02-12 18:31:30 +0200132STATIC void push_rule(parser_t *parser, int src_line, const rule_t *rule, int arg_i) {
Damien George58ba4c32014-04-10 14:27:31 +0000133 if (parser->had_memory_error) {
134 return;
135 }
Damien429d7192013-10-04 19:53:11 +0100136 if (parser->rule_stack_top >= parser->rule_stack_alloc) {
Damien George66e18f02014-05-05 13:19:03 +0100137 rule_stack_t *rs = m_renew_maybe(rule_stack_t, parser->rule_stack, parser->rule_stack_alloc, parser->rule_stack_alloc + MP_ALLOC_PARSE_RULE_INC);
Damien George58ba4c32014-04-10 14:27:31 +0000138 if (rs == NULL) {
139 memory_error(parser);
140 return;
141 }
142 parser->rule_stack = rs;
Damien George66e18f02014-05-05 13:19:03 +0100143 parser->rule_stack_alloc += MP_ALLOC_PARSE_RULE_INC;
Damien429d7192013-10-04 19:53:11 +0100144 }
Damien George08335002014-01-18 23:24:36 +0000145 rule_stack_t *rs = &parser->rule_stack[parser->rule_stack_top++];
146 rs->src_line = src_line;
147 rs->rule_id = rule->rule_id;
148 rs->arg_i = arg_i;
Damien429d7192013-10-04 19:53:11 +0100149}
150
Paul Sokolovsky520e2f52014-02-12 18:31:30 +0200151STATIC void push_rule_from_arg(parser_t *parser, uint arg) {
Damien429d7192013-10-04 19:53:11 +0100152 assert((arg & RULE_ARG_KIND_MASK) == RULE_ARG_RULE || (arg & RULE_ARG_KIND_MASK) == RULE_ARG_OPT_RULE);
153 uint rule_id = arg & RULE_ARG_ARG_MASK;
154 assert(rule_id < RULE_maximum_number_of);
Damien George08335002014-01-18 23:24:36 +0000155 push_rule(parser, mp_lexer_cur(parser->lexer)->src_line, rules[rule_id], 0);
Damien429d7192013-10-04 19:53:11 +0100156}
157
Paul Sokolovsky520e2f52014-02-12 18:31:30 +0200158STATIC void pop_rule(parser_t *parser, const rule_t **rule, uint *arg_i, uint *src_line) {
Damien George58ba4c32014-04-10 14:27:31 +0000159 assert(!parser->had_memory_error);
Damien429d7192013-10-04 19:53:11 +0100160 parser->rule_stack_top -= 1;
161 *rule = rules[parser->rule_stack[parser->rule_stack_top].rule_id];
162 *arg_i = parser->rule_stack[parser->rule_stack_top].arg_i;
Damien George08335002014-01-18 23:24:36 +0000163 *src_line = parser->rule_stack[parser->rule_stack_top].src_line;
Damien429d7192013-10-04 19:53:11 +0100164}
165
Damiend99b0522013-12-21 18:17:45 +0000166mp_parse_node_t mp_parse_node_new_leaf(machine_int_t kind, machine_int_t arg) {
Paul Sokolovsky56e5ef22014-02-22 16:39:45 +0200167 if (kind == MP_PARSE_NODE_SMALL_INT) {
168 return (mp_parse_node_t)(kind | (arg << 1));
169 }
170 return (mp_parse_node_t)(kind | (arg << 5));
Damien429d7192013-10-04 19:53:11 +0100171}
172
Damien Georgeb829b5c2014-01-25 13:51:19 +0000173uint mp_parse_node_free(mp_parse_node_t pn) {
174 uint cnt = 0;
175 if (MP_PARSE_NODE_IS_STRUCT(pn)) {
176 mp_parse_node_struct_t *pns = (mp_parse_node_struct_t *)pn;
177 uint n = MP_PARSE_NODE_STRUCT_NUM_NODES(pns);
178 uint rule_id = MP_PARSE_NODE_STRUCT_KIND(pns);
179 bool adjust = ADD_BLANK_NODE(rule_id);
180 if (adjust) {
181 n--;
Paul Sokolovskyaee2ba72014-01-25 00:56:19 +0200182 }
Damien Georgeb829b5c2014-01-25 13:51:19 +0000183 for (uint i = 0; i < n; i++) {
184 cnt += mp_parse_node_free(pns->nodes[i]);
185 }
186 if (adjust) {
187 n++;
188 }
189 m_del_var(mp_parse_node_struct_t, mp_parse_node_t, n, pns);
Paul Sokolovskyaee2ba72014-01-25 00:56:19 +0200190 cnt++;
191 }
192 return cnt;
193}
194
Damien Georgecbd2f742014-01-19 11:48:48 +0000195#if MICROPY_DEBUG_PRINTERS
196void mp_parse_node_print(mp_parse_node_t pn, int indent) {
Damien George08335002014-01-18 23:24:36 +0000197 if (MP_PARSE_NODE_IS_STRUCT(pn)) {
198 printf("[% 4d] ", (int)((mp_parse_node_struct_t*)pn)->source_line);
199 } else {
200 printf(" ");
201 }
Damien429d7192013-10-04 19:53:11 +0100202 for (int i = 0; i < indent; i++) {
203 printf(" ");
204 }
Damiend99b0522013-12-21 18:17:45 +0000205 if (MP_PARSE_NODE_IS_NULL(pn)) {
Damien429d7192013-10-04 19:53:11 +0100206 printf("NULL\n");
Paul Sokolovsky56e5ef22014-02-22 16:39:45 +0200207 } else if (MP_PARSE_NODE_IS_SMALL_INT(pn)) {
208 machine_int_t arg = MP_PARSE_NODE_LEAF_SMALL_INT(pn);
209 printf("int(" INT_FMT ")\n", arg);
Damiend99b0522013-12-21 18:17:45 +0000210 } else if (MP_PARSE_NODE_IS_LEAF(pn)) {
Paul Sokolovsky56e5ef22014-02-22 16:39:45 +0200211 machine_uint_t arg = MP_PARSE_NODE_LEAF_ARG(pn);
Damiend99b0522013-12-21 18:17:45 +0000212 switch (MP_PARSE_NODE_LEAF_KIND(pn)) {
213 case MP_PARSE_NODE_ID: printf("id(%s)\n", qstr_str(arg)); break;
Damiend99b0522013-12-21 18:17:45 +0000214 case MP_PARSE_NODE_INTEGER: printf("int(%s)\n", qstr_str(arg)); break;
215 case MP_PARSE_NODE_DECIMAL: printf("dec(%s)\n", qstr_str(arg)); break;
216 case MP_PARSE_NODE_STRING: printf("str(%s)\n", qstr_str(arg)); break;
217 case MP_PARSE_NODE_BYTES: printf("bytes(%s)\n", qstr_str(arg)); break;
Damien George08d07552014-01-29 18:58:52 +0000218 case MP_PARSE_NODE_TOKEN: printf("tok(" INT_FMT ")\n", arg); break;
Damien429d7192013-10-04 19:53:11 +0100219 default: assert(0);
220 }
221 } else {
Damien Georgeb829b5c2014-01-25 13:51:19 +0000222 mp_parse_node_struct_t *pns = (mp_parse_node_struct_t*)pn;
223 uint n = MP_PARSE_NODE_STRUCT_NUM_NODES(pns);
Damien429d7192013-10-04 19:53:11 +0100224#ifdef USE_RULE_NAME
Damien Georgeb829b5c2014-01-25 13:51:19 +0000225 printf("%s(%d) (n=%d)\n", rules[MP_PARSE_NODE_STRUCT_KIND(pns)]->rule_name, MP_PARSE_NODE_STRUCT_KIND(pns), n);
Damien429d7192013-10-04 19:53:11 +0100226#else
Damien Georgeb829b5c2014-01-25 13:51:19 +0000227 printf("rule(%u) (n=%d)\n", (uint)MP_PARSE_NODE_STRUCT_KIND(pns), n);
Damien429d7192013-10-04 19:53:11 +0100228#endif
Damien Georgeb829b5c2014-01-25 13:51:19 +0000229 for (uint i = 0; i < n; i++) {
230 mp_parse_node_print(pns->nodes[i], indent + 2);
Damien429d7192013-10-04 19:53:11 +0100231 }
232 }
233}
Damien Georgecbd2f742014-01-19 11:48:48 +0000234#endif // MICROPY_DEBUG_PRINTERS
Damien429d7192013-10-04 19:53:11 +0100235
236/*
Paul Sokolovsky520e2f52014-02-12 18:31:30 +0200237STATIC void result_stack_show(parser_t *parser) {
Damien429d7192013-10-04 19:53:11 +0100238 printf("result stack, most recent first\n");
239 for (int i = parser->result_stack_top - 1; i >= 0; i--) {
Damien Georgecbd2f742014-01-19 11:48:48 +0000240 mp_parse_node_print(parser->result_stack[i], 0);
Damien429d7192013-10-04 19:53:11 +0100241 }
242}
243*/
244
Paul Sokolovsky520e2f52014-02-12 18:31:30 +0200245STATIC mp_parse_node_t pop_result(parser_t *parser) {
Damien George58ba4c32014-04-10 14:27:31 +0000246 if (parser->had_memory_error) {
247 return MP_PARSE_NODE_NULL;
248 }
Damien429d7192013-10-04 19:53:11 +0100249 assert(parser->result_stack_top > 0);
250 return parser->result_stack[--parser->result_stack_top];
251}
252
Paul Sokolovsky520e2f52014-02-12 18:31:30 +0200253STATIC mp_parse_node_t peek_result(parser_t *parser, int pos) {
Damien George58ba4c32014-04-10 14:27:31 +0000254 if (parser->had_memory_error) {
255 return MP_PARSE_NODE_NULL;
256 }
Damien429d7192013-10-04 19:53:11 +0100257 assert(parser->result_stack_top > pos);
258 return parser->result_stack[parser->result_stack_top - 1 - pos];
259}
260
Paul Sokolovsky520e2f52014-02-12 18:31:30 +0200261STATIC void push_result_node(parser_t *parser, mp_parse_node_t pn) {
Damien George58ba4c32014-04-10 14:27:31 +0000262 if (parser->had_memory_error) {
263 return;
264 }
Damien George69a818d2014-01-12 13:55:24 +0000265 if (parser->result_stack_top >= parser->result_stack_alloc) {
Damien George66e18f02014-05-05 13:19:03 +0100266 mp_parse_node_t *pn = m_renew_maybe(mp_parse_node_t, parser->result_stack, parser->result_stack_alloc, parser->result_stack_alloc + MP_ALLOC_PARSE_RESULT_INC);
Damien George58ba4c32014-04-10 14:27:31 +0000267 if (pn == NULL) {
268 memory_error(parser);
269 return;
270 }
271 parser->result_stack = pn;
Damien George66e18f02014-05-05 13:19:03 +0100272 parser->result_stack_alloc += MP_ALLOC_PARSE_RESULT_INC;
Damien George69a818d2014-01-12 13:55:24 +0000273 }
Damien429d7192013-10-04 19:53:11 +0100274 parser->result_stack[parser->result_stack_top++] = pn;
275}
276
Paul Sokolovsky520e2f52014-02-12 18:31:30 +0200277STATIC void push_result_token(parser_t *parser, const mp_lexer_t *lex) {
Damiend99b0522013-12-21 18:17:45 +0000278 const mp_token_t *tok = mp_lexer_cur(lex);
279 mp_parse_node_t pn;
280 if (tok->kind == MP_TOKEN_NAME) {
Damien George55baff42014-01-21 21:40:13 +0000281 pn = mp_parse_node_new_leaf(MP_PARSE_NODE_ID, qstr_from_strn(tok->str, tok->len));
Damiend99b0522013-12-21 18:17:45 +0000282 } else if (tok->kind == MP_TOKEN_NUMBER) {
Damien429d7192013-10-04 19:53:11 +0100283 bool dec = false;
284 bool small_int = true;
Paul Sokolovsky80f60e12014-01-11 02:33:29 +0200285 machine_int_t int_val = 0;
Damien429d7192013-10-04 19:53:11 +0100286 int len = tok->len;
287 const char *str = tok->str;
Damien George06201ff2014-03-01 19:50:50 +0000288 int base = 0;
289 int i = mp_parse_num_base(str, len, &base);
Paul Sokolovsky80f60e12014-01-11 02:33:29 +0200290 bool overflow = false;
Damien429d7192013-10-04 19:53:11 +0100291 for (; i < len; i++) {
Paul Sokolovsky80f60e12014-01-11 02:33:29 +0200292 machine_int_t old_val = int_val;
Damien George8cc96a32013-12-30 18:23:50 +0000293 if (unichar_isdigit(str[i]) && str[i] - '0' < base) {
Damien429d7192013-10-04 19:53:11 +0100294 int_val = base * int_val + str[i] - '0';
295 } else if (base == 16 && 'a' <= str[i] && str[i] <= 'f') {
296 int_val = base * int_val + str[i] - 'a' + 10;
Damiendd12d132013-12-29 13:03:49 +0000297 } else if (base == 16 && 'A' <= str[i] && str[i] <= 'F') {
Damien429d7192013-10-04 19:53:11 +0100298 int_val = base * int_val + str[i] - 'A' + 10;
Damien7410e442013-11-02 19:47:57 +0000299 } else if (str[i] == '.' || str[i] == 'e' || str[i] == 'E' || str[i] == 'j' || str[i] == 'J') {
Damien429d7192013-10-04 19:53:11 +0100300 dec = true;
301 break;
302 } else {
303 small_int = false;
304 break;
305 }
Paul Sokolovsky80f60e12014-01-11 02:33:29 +0200306 if (int_val < old_val) {
307 // If new value became less than previous, it's overflow
308 overflow = true;
309 } else if ((old_val ^ int_val) & WORD_MSBIT_HIGH) {
310 // If signed number changed sign - it's overflow
311 overflow = true;
312 }
Damien429d7192013-10-04 19:53:11 +0100313 }
314 if (dec) {
Damien George55baff42014-01-21 21:40:13 +0000315 pn = mp_parse_node_new_leaf(MP_PARSE_NODE_DECIMAL, qstr_from_strn(str, len));
Paul Sokolovskybbf0e2f2014-02-21 02:04:32 +0200316 } else if (small_int && !overflow && MP_PARSE_FITS_SMALL_INT(int_val)) {
Damiend99b0522013-12-21 18:17:45 +0000317 pn = mp_parse_node_new_leaf(MP_PARSE_NODE_SMALL_INT, int_val);
Damien429d7192013-10-04 19:53:11 +0100318 } else {
Damien George55baff42014-01-21 21:40:13 +0000319 pn = mp_parse_node_new_leaf(MP_PARSE_NODE_INTEGER, qstr_from_strn(str, len));
Damien429d7192013-10-04 19:53:11 +0100320 }
Damiend99b0522013-12-21 18:17:45 +0000321 } else if (tok->kind == MP_TOKEN_STRING) {
Damien George55baff42014-01-21 21:40:13 +0000322 pn = mp_parse_node_new_leaf(MP_PARSE_NODE_STRING, qstr_from_strn(tok->str, tok->len));
Damiend99b0522013-12-21 18:17:45 +0000323 } else if (tok->kind == MP_TOKEN_BYTES) {
Damien George55baff42014-01-21 21:40:13 +0000324 pn = mp_parse_node_new_leaf(MP_PARSE_NODE_BYTES, qstr_from_strn(tok->str, tok->len));
Damien429d7192013-10-04 19:53:11 +0100325 } else {
Damiend99b0522013-12-21 18:17:45 +0000326 pn = mp_parse_node_new_leaf(MP_PARSE_NODE_TOKEN, tok->kind);
Damien429d7192013-10-04 19:53:11 +0100327 }
328 push_result_node(parser, pn);
329}
330
Paul Sokolovsky520e2f52014-02-12 18:31:30 +0200331STATIC void push_result_rule(parser_t *parser, int src_line, const rule_t *rule, int num_args) {
Damien George58ba4c32014-04-10 14:27:31 +0000332 mp_parse_node_struct_t *pn = m_new_obj_var_maybe(mp_parse_node_struct_t, mp_parse_node_t, num_args);
333 if (pn == NULL) {
334 memory_error(parser);
335 return;
336 }
337 pn->source_line = src_line;
338 pn->kind_num_nodes = (rule->rule_id & 0xff) | (num_args << 8);
Damien429d7192013-10-04 19:53:11 +0100339 for (int i = num_args; i > 0; i--) {
340 pn->nodes[i - 1] = pop_result(parser);
341 }
Damiend99b0522013-12-21 18:17:45 +0000342 push_result_node(parser, (mp_parse_node_t)pn);
Damien429d7192013-10-04 19:53:11 +0100343}
344
Damien Georgec5966122014-02-15 16:10:44 +0000345mp_parse_node_t mp_parse(mp_lexer_t *lex, mp_parse_input_kind_t input_kind, mp_parse_error_kind_t *parse_error_kind_out) {
Damien George69a818d2014-01-12 13:55:24 +0000346
347 // allocate memory for the parser and its stacks
348
349 parser_t *parser = m_new_obj(parser_t);
350
Damien George58ba4c32014-04-10 14:27:31 +0000351 parser->had_memory_error = false;
352
Damien George66e18f02014-05-05 13:19:03 +0100353 parser->rule_stack_alloc = MP_ALLOC_PARSE_RULE_INIT;
Damien429d7192013-10-04 19:53:11 +0100354 parser->rule_stack_top = 0;
355 parser->rule_stack = m_new(rule_stack_t, parser->rule_stack_alloc);
356
Damien George66e18f02014-05-05 13:19:03 +0100357 parser->result_stack_alloc = MP_ALLOC_PARSE_RESULT_INIT;
Damien429d7192013-10-04 19:53:11 +0100358 parser->result_stack_top = 0;
Damien George69a818d2014-01-12 13:55:24 +0000359 parser->result_stack = m_new(mp_parse_node_t, parser->result_stack_alloc);
Damien429d7192013-10-04 19:53:11 +0100360
Damien George08335002014-01-18 23:24:36 +0000361 parser->lexer = lex;
362
Damien George69a818d2014-01-12 13:55:24 +0000363 // work out the top-level rule to use, and push it on the stack
Damien5ac1b2e2013-10-18 19:58:12 +0100364 int top_level_rule;
365 switch (input_kind) {
Damiend99b0522013-12-21 18:17:45 +0000366 case MP_PARSE_SINGLE_INPUT: top_level_rule = RULE_single_input; break;
Damien Georged02c6d82014-01-15 22:14:03 +0000367 case MP_PARSE_EVAL_INPUT: top_level_rule = RULE_eval_input; break;
Damien5ac1b2e2013-10-18 19:58:12 +0100368 default: top_level_rule = RULE_file_input;
369 }
Damien George08335002014-01-18 23:24:36 +0000370 push_rule(parser, mp_lexer_cur(lex)->src_line, rules[top_level_rule], 0);
Damien429d7192013-10-04 19:53:11 +0100371
Damien George69a818d2014-01-12 13:55:24 +0000372 // parse!
373
Damien George08335002014-01-18 23:24:36 +0000374 uint n, i; // state for the current rule
375 uint rule_src_line; // source line for the first token matched by the current rule
Damien429d7192013-10-04 19:53:11 +0100376 bool backtrack = false;
Damien George08335002014-01-18 23:24:36 +0000377 const rule_t *rule = NULL;
Damiend99b0522013-12-21 18:17:45 +0000378 mp_token_kind_t tok_kind;
Damien429d7192013-10-04 19:53:11 +0100379 bool emit_rule;
380 bool had_trailing_sep;
381
382 for (;;) {
383 next_rule:
Damien George58ba4c32014-04-10 14:27:31 +0000384 if (parser->rule_stack_top == 0 || parser->had_memory_error) {
Damien429d7192013-10-04 19:53:11 +0100385 break;
386 }
387
Damien George08335002014-01-18 23:24:36 +0000388 pop_rule(parser, &rule, &i, &rule_src_line);
Damien429d7192013-10-04 19:53:11 +0100389 n = rule->act & RULE_ACT_ARG_MASK;
390
391 /*
392 // debugging
393 printf("depth=%d ", parser->rule_stack_top);
394 for (int j = 0; j < parser->rule_stack_top; ++j) {
395 printf(" ");
396 }
397 printf("%s n=%d i=%d bt=%d\n", rule->rule_name, n, i, backtrack);
398 */
399
400 switch (rule->act & RULE_ACT_KIND_MASK) {
401 case RULE_ACT_OR:
402 if (i > 0 && !backtrack) {
403 goto next_rule;
404 } else {
405 backtrack = false;
406 }
407 for (; i < n - 1; ++i) {
408 switch (rule->arg[i] & RULE_ARG_KIND_MASK) {
409 case RULE_ARG_TOK:
Damiend99b0522013-12-21 18:17:45 +0000410 if (mp_lexer_is_kind(lex, rule->arg[i] & RULE_ARG_ARG_MASK)) {
Damien429d7192013-10-04 19:53:11 +0100411 push_result_token(parser, lex);
Damiend99b0522013-12-21 18:17:45 +0000412 mp_lexer_to_next(lex);
Damien429d7192013-10-04 19:53:11 +0100413 goto next_rule;
414 }
415 break;
416 case RULE_ARG_RULE:
Damien George08335002014-01-18 23:24:36 +0000417 push_rule(parser, rule_src_line, rule, i + 1); // save this or-rule
418 push_rule_from_arg(parser, rule->arg[i]); // push child of or-rule
Damien429d7192013-10-04 19:53:11 +0100419 goto next_rule;
420 default:
421 assert(0);
422 }
423 }
424 if ((rule->arg[i] & RULE_ARG_KIND_MASK) == RULE_ARG_TOK) {
Damiend99b0522013-12-21 18:17:45 +0000425 if (mp_lexer_is_kind(lex, rule->arg[i] & RULE_ARG_ARG_MASK)) {
Damien429d7192013-10-04 19:53:11 +0100426 push_result_token(parser, lex);
Damiend99b0522013-12-21 18:17:45 +0000427 mp_lexer_to_next(lex);
Damien429d7192013-10-04 19:53:11 +0100428 } else {
429 backtrack = true;
430 goto next_rule;
431 }
432 } else {
433 push_rule_from_arg(parser, rule->arg[i]);
434 }
435 break;
436
437 case RULE_ACT_AND:
438
439 // failed, backtrack if we can, else syntax error
440 if (backtrack) {
441 assert(i > 0);
442 if ((rule->arg[i - 1] & RULE_ARG_KIND_MASK) == RULE_ARG_OPT_RULE) {
443 // an optional rule that failed, so continue with next arg
Damiend99b0522013-12-21 18:17:45 +0000444 push_result_node(parser, MP_PARSE_NODE_NULL);
Damien429d7192013-10-04 19:53:11 +0100445 backtrack = false;
446 } else {
447 // a mandatory rule that failed, so propagate backtrack
448 if (i > 1) {
449 // already eaten tokens so can't backtrack
450 goto syntax_error;
451 } else {
452 goto next_rule;
453 }
454 }
455 }
456
457 // progress through the rule
458 for (; i < n; ++i) {
459 switch (rule->arg[i] & RULE_ARG_KIND_MASK) {
460 case RULE_ARG_TOK:
461 // need to match a token
462 tok_kind = rule->arg[i] & RULE_ARG_ARG_MASK;
Damiend99b0522013-12-21 18:17:45 +0000463 if (mp_lexer_is_kind(lex, tok_kind)) {
Damien429d7192013-10-04 19:53:11 +0100464 // matched token
Damiend99b0522013-12-21 18:17:45 +0000465 if (tok_kind == MP_TOKEN_NAME) {
Damien429d7192013-10-04 19:53:11 +0100466 push_result_token(parser, lex);
467 }
Damiend99b0522013-12-21 18:17:45 +0000468 mp_lexer_to_next(lex);
Damien429d7192013-10-04 19:53:11 +0100469 } else {
470 // failed to match token
471 if (i > 0) {
472 // already eaten tokens so can't backtrack
473 goto syntax_error;
474 } else {
475 // this rule failed, so backtrack
476 backtrack = true;
477 goto next_rule;
478 }
479 }
480 break;
481 case RULE_ARG_RULE:
Damien429d7192013-10-04 19:53:11 +0100482 case RULE_ARG_OPT_RULE:
Damien George08335002014-01-18 23:24:36 +0000483 push_rule(parser, rule_src_line, rule, i + 1); // save this and-rule
484 push_rule_from_arg(parser, rule->arg[i]); // push child of and-rule
Damien429d7192013-10-04 19:53:11 +0100485 goto next_rule;
486 default:
487 assert(0);
488 }
489 }
490
491 assert(i == n);
492
493 // matched the rule, so now build the corresponding parse_node
494
495 // count number of arguments for the parse_node
496 i = 0;
497 emit_rule = false;
498 for (int x = 0; x < n; ++x) {
499 if ((rule->arg[x] & RULE_ARG_KIND_MASK) == RULE_ARG_TOK) {
500 tok_kind = rule->arg[x] & RULE_ARG_ARG_MASK;
Damiend99b0522013-12-21 18:17:45 +0000501 if (tok_kind >= MP_TOKEN_NAME) {
Damien429d7192013-10-04 19:53:11 +0100502 emit_rule = true;
503 }
Damiend99b0522013-12-21 18:17:45 +0000504 if (tok_kind == MP_TOKEN_NAME) {
Damien429d7192013-10-04 19:53:11 +0100505 // only tokens which were names are pushed to stack
506 i += 1;
507 }
508 } else {
509 // rules are always pushed
510 i += 1;
511 }
512 }
513
Damien George93afa232014-05-06 21:44:11 +0100514#if 0 && !MICROPY_ENABLE_DOC_STRING
515 // this code discards lonely statement, such as doc strings
516 // problem is that doc strings have already been interned, so this doesn't really help reduce RAM usage
517 if (input_kind != MP_PARSE_SINGLE_INPUT && rule->rule_id == RULE_expr_stmt && peek_result(parser, 0) == MP_PARSE_NODE_NULL) {
518 mp_parse_node_t p = peek_result(parser, 1);
519 if (MP_PARSE_NODE_IS_LEAF(p) && !MP_PARSE_NODE_IS_ID(p)) {
520 pop_result(parser);
521 pop_result(parser);
522 push_result_rule(parser, rule_src_line, rules[RULE_pass_stmt], 0);
523 break;
524 }
525 }
526#endif
527
Damien429d7192013-10-04 19:53:11 +0100528 // always emit these rules, even if they have only 1 argument
529 if (rule->rule_id == RULE_expr_stmt || rule->rule_id == RULE_yield_stmt) {
530 emit_rule = true;
531 }
532
533 // never emit these rules if they have only 1 argument
534 // NOTE: can't put atom_paren here because we need it to distinguisg, for example, [a,b] from [(a,b)]
Damienb14de212013-10-06 00:28:28 +0100535 // TODO possibly put varargslist_name, varargslist_equal here as well
536 if (rule->rule_id == RULE_else_stmt || rule->rule_id == RULE_testlist_comp_3b || rule->rule_id == RULE_import_as_names_paren || rule->rule_id == RULE_typedargslist_name || rule->rule_id == RULE_typedargslist_colon || rule->rule_id == RULE_typedargslist_equal || rule->rule_id == RULE_dictorsetmaker_colon || rule->rule_id == RULE_classdef_2 || rule->rule_id == RULE_with_item_as || rule->rule_id == RULE_assert_stmt_extra || rule->rule_id == RULE_as_name || rule->rule_id == RULE_raise_stmt_from || rule->rule_id == RULE_vfpdef) {
Damien429d7192013-10-04 19:53:11 +0100537 emit_rule = false;
538 }
539
540 // always emit these rules, and add an extra blank node at the end (to be used by the compiler to store data)
Damien Georgeb829b5c2014-01-25 13:51:19 +0000541 if (ADD_BLANK_NODE(rule->rule_id)) {
Damien429d7192013-10-04 19:53:11 +0100542 emit_rule = true;
Damiend99b0522013-12-21 18:17:45 +0000543 push_result_node(parser, MP_PARSE_NODE_NULL);
Damien429d7192013-10-04 19:53:11 +0100544 i += 1;
545 }
546
547 int num_not_nil = 0;
548 for (int x = 0; x < i; ++x) {
Damiend99b0522013-12-21 18:17:45 +0000549 if (peek_result(parser, x) != MP_PARSE_NODE_NULL) {
Damien429d7192013-10-04 19:53:11 +0100550 num_not_nil += 1;
551 }
552 }
553 //printf("done and %s n=%d i=%d notnil=%d\n", rule->rule_name, n, i, num_not_nil);
554 if (emit_rule) {
Damien George08335002014-01-18 23:24:36 +0000555 push_result_rule(parser, rule_src_line, rule, i);
Damien429d7192013-10-04 19:53:11 +0100556 } else if (num_not_nil == 0) {
Damien George08335002014-01-18 23:24:36 +0000557 push_result_rule(parser, rule_src_line, rule, i); // needed for, eg, atom_paren, testlist_comp_3b
Damien429d7192013-10-04 19:53:11 +0100558 //result_stack_show(parser);
559 //assert(0);
560 } else if (num_not_nil == 1) {
561 // single result, leave it on stack
Damiend99b0522013-12-21 18:17:45 +0000562 mp_parse_node_t pn = MP_PARSE_NODE_NULL;
Damien429d7192013-10-04 19:53:11 +0100563 for (int x = 0; x < i; ++x) {
Damiend99b0522013-12-21 18:17:45 +0000564 mp_parse_node_t pn2 = pop_result(parser);
565 if (pn2 != MP_PARSE_NODE_NULL) {
Damien429d7192013-10-04 19:53:11 +0100566 pn = pn2;
567 }
568 }
569 push_result_node(parser, pn);
570 } else {
Damien George08335002014-01-18 23:24:36 +0000571 push_result_rule(parser, rule_src_line, rule, i);
Damien429d7192013-10-04 19:53:11 +0100572 }
573 break;
574
575 case RULE_ACT_LIST:
576 // n=2 is: item item*
577 // n=1 is: item (sep item)*
578 // n=3 is: item (sep item)* [sep]
579 if (backtrack) {
580 list_backtrack:
581 had_trailing_sep = false;
582 if (n == 2) {
583 if (i == 1) {
584 // fail on item, first time round; propagate backtrack
585 goto next_rule;
586 } else {
587 // fail on item, in later rounds; finish with this rule
588 backtrack = false;
589 }
590 } else {
591 if (i == 1) {
592 // fail on item, first time round; propagate backtrack
593 goto next_rule;
594 } else if ((i & 1) == 1) {
595 // fail on item, in later rounds; have eaten tokens so can't backtrack
596 if (n == 3) {
597 // list allows trailing separator; finish parsing list
598 had_trailing_sep = true;
599 backtrack = false;
600 } else {
601 // list doesn't allowing trailing separator; fail
602 goto syntax_error;
603 }
604 } else {
605 // fail on separator; finish parsing list
606 backtrack = false;
607 }
608 }
609 } else {
610 for (;;) {
611 uint arg = rule->arg[i & 1 & n];
612 switch (arg & RULE_ARG_KIND_MASK) {
613 case RULE_ARG_TOK:
Damiend99b0522013-12-21 18:17:45 +0000614 if (mp_lexer_is_kind(lex, arg & RULE_ARG_ARG_MASK)) {
Damien429d7192013-10-04 19:53:11 +0100615 if (i & 1 & n) {
616 // separators which are tokens are not pushed to result stack
617 } else {
618 push_result_token(parser, lex);
619 }
Damiend99b0522013-12-21 18:17:45 +0000620 mp_lexer_to_next(lex);
Damien429d7192013-10-04 19:53:11 +0100621 // got element of list, so continue parsing list
622 i += 1;
623 } else {
624 // couldn't get element of list
625 i += 1;
626 backtrack = true;
627 goto list_backtrack;
628 }
629 break;
630 case RULE_ARG_RULE:
Damien George08335002014-01-18 23:24:36 +0000631 push_rule(parser, rule_src_line, rule, i + 1); // save this list-rule
632 push_rule_from_arg(parser, arg); // push child of list-rule
Damien429d7192013-10-04 19:53:11 +0100633 goto next_rule;
634 default:
635 assert(0);
636 }
637 }
638 }
639 assert(i >= 1);
640
641 // compute number of elements in list, result in i
642 i -= 1;
643 if ((n & 1) && (rule->arg[1] & RULE_ARG_KIND_MASK) == RULE_ARG_TOK) {
644 // don't count separators when they are tokens
645 i = (i + 1) / 2;
646 }
647
648 if (i == 1) {
649 // list matched single item
650 if (had_trailing_sep) {
651 // if there was a trailing separator, make a list of a single item
Damien George08335002014-01-18 23:24:36 +0000652 push_result_rule(parser, rule_src_line, rule, i);
Damien429d7192013-10-04 19:53:11 +0100653 } else {
654 // just leave single item on stack (ie don't wrap in a list)
655 }
656 } else {
657 //printf("done list %s %d %d\n", rule->rule_name, n, i);
Damien George08335002014-01-18 23:24:36 +0000658 push_result_rule(parser, rule_src_line, rule, i);
Damien429d7192013-10-04 19:53:11 +0100659 }
660 break;
661
662 default:
663 assert(0);
664 }
665 }
Damien91d387d2013-10-09 15:09:52 +0100666
Damien George58ba4c32014-04-10 14:27:31 +0000667 mp_parse_node_t result;
668
669 // check if we had a memory error
670 if (parser->had_memory_error) {
671 *parse_error_kind_out = MP_PARSE_ERROR_MEMORY;
672 result = MP_PARSE_NODE_NULL;
673 goto finished;
674
675 }
676
Damien91d387d2013-10-09 15:09:52 +0100677 // check we are at the end of the token stream
Damiend99b0522013-12-21 18:17:45 +0000678 if (!mp_lexer_is_kind(lex, MP_TOKEN_END)) {
Damien91d387d2013-10-09 15:09:52 +0100679 goto syntax_error;
Damien429d7192013-10-04 19:53:11 +0100680 }
Damien91d387d2013-10-09 15:09:52 +0100681
Damien429d7192013-10-04 19:53:11 +0100682 //printf("--------------\n");
683 //result_stack_show(parser);
Damien George69a818d2014-01-12 13:55:24 +0000684 //printf("rule stack alloc: %d\n", parser->rule_stack_alloc);
685 //printf("result stack alloc: %d\n", parser->result_stack_alloc);
Damien429d7192013-10-04 19:53:11 +0100686 //printf("number of parse nodes allocated: %d\n", num_parse_nodes_allocated);
Damien George69a818d2014-01-12 13:55:24 +0000687
688 // get the root parse node that we created
689 assert(parser->result_stack_top == 1);
Damien George58ba4c32014-04-10 14:27:31 +0000690 result = parser->result_stack[0];
Damien George69a818d2014-01-12 13:55:24 +0000691
692finished:
693 // free the memory that we don't need anymore
694 m_del(rule_stack_t, parser->rule_stack, parser->rule_stack_alloc);
695 m_del(mp_parse_node_t, parser->result_stack, parser->result_stack_alloc);
696 m_del_obj(parser_t, parser);
697
698 // return the result
699 return result;
Damien429d7192013-10-04 19:53:11 +0100700
701syntax_error:
Damiend99b0522013-12-21 18:17:45 +0000702 if (mp_lexer_is_kind(lex, MP_TOKEN_INDENT)) {
Damien Georgec5966122014-02-15 16:10:44 +0000703 *parse_error_kind_out = MP_PARSE_ERROR_UNEXPECTED_INDENT;
Damiend99b0522013-12-21 18:17:45 +0000704 } else if (mp_lexer_is_kind(lex, MP_TOKEN_DEDENT_MISMATCH)) {
Damien Georgec5966122014-02-15 16:10:44 +0000705 *parse_error_kind_out = MP_PARSE_ERROR_UNMATCHED_UNINDENT;
Damien91d387d2013-10-09 15:09:52 +0100706 } else {
Damien Georgec5966122014-02-15 16:10:44 +0000707 *parse_error_kind_out = MP_PARSE_ERROR_INVALID_SYNTAX;
Damien429d7192013-10-04 19:53:11 +0100708#ifdef USE_RULE_NAME
Damien George9528cd62014-01-15 21:23:31 +0000709 // debugging: print the rule name that failed and the token
Damien Georgec5966122014-02-15 16:10:44 +0000710 printf("rule: %s\n", rule->rule_name);
711#if MICROPY_DEBUG_PRINTERS
Damiend99b0522013-12-21 18:17:45 +0000712 mp_token_show(mp_lexer_cur(lex));
Damien George9528cd62014-01-15 21:23:31 +0000713#endif
Damien Georgec5966122014-02-15 16:10:44 +0000714#endif
Damien91d387d2013-10-09 15:09:52 +0100715 }
Damien George69a818d2014-01-12 13:55:24 +0000716 result = MP_PARSE_NODE_NULL;
717 goto finished;
Damien429d7192013-10-04 19:53:11 +0100718}