blob: 3ec6b6d749afab5ced4be591e9e8d2b33e388959 [file] [log] [blame]
Damien George04b91472014-05-03 23:27:38 +01001/*
2 * This file is part of the Micro Python project, http://micropython.org/
3 *
4 * The MIT License (MIT)
5 *
6 * Copyright (c) 2013, 2014 Damien P. George
7 *
8 * Permission is hereby granted, free of charge, to any person obtaining a copy
9 * of this software and associated documentation files (the "Software"), to deal
10 * in the Software without restriction, including without limitation the rights
11 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
12 * copies of the Software, and to permit persons to whom the Software is
13 * furnished to do so, subject to the following conditions:
14 *
15 * The above copyright notice and this permission notice shall be included in
16 * all copies or substantial portions of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
21 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
22 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
23 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
24 * THE SOFTWARE.
25 */
Paul Sokolovsky8ab6f902014-12-25 23:29:19 +020026#ifndef __MICROPY_INCLUDED_PY_LEXER_H__
27#define __MICROPY_INCLUDED_PY_LEXER_H__
Damien George04b91472014-05-03 23:27:38 +010028
Damien George51dfcb42015-01-01 20:27:54 +000029#include <stdint.h>
30
31#include "py/mpconfig.h"
32#include "py/qstr.h"
33
Damiend99b0522013-12-21 18:17:45 +000034/* lexer.h -- simple tokeniser for Micro Python
35 *
36 * Uses (byte) length instead of null termination.
37 * Tokens are the same - UTF-8 with (byte) length.
Damien429d7192013-10-04 19:53:11 +010038 */
39
Damiend99b0522013-12-21 18:17:45 +000040typedef enum _mp_token_kind_t {
41 MP_TOKEN_END, // 0
Damien429d7192013-10-04 19:53:11 +010042
Damiend99b0522013-12-21 18:17:45 +000043 MP_TOKEN_INVALID,
44 MP_TOKEN_DEDENT_MISMATCH,
45 MP_TOKEN_LONELY_STRING_OPEN,
Damien George69a818d2014-01-12 13:55:24 +000046 MP_TOKEN_BAD_LINE_CONTINUATION,
Damien429d7192013-10-04 19:53:11 +010047
Damien George69a818d2014-01-12 13:55:24 +000048 MP_TOKEN_NEWLINE, // 5
49 MP_TOKEN_INDENT, // 6
50 MP_TOKEN_DEDENT, // 7
Damien429d7192013-10-04 19:53:11 +010051
Damien George69a818d2014-01-12 13:55:24 +000052 MP_TOKEN_NAME, // 8
Damiend99b0522013-12-21 18:17:45 +000053 MP_TOKEN_NUMBER,
54 MP_TOKEN_STRING,
55 MP_TOKEN_BYTES,
Damien429d7192013-10-04 19:53:11 +010056
Damien Georgee9906ac2014-01-04 18:44:46 +000057 MP_TOKEN_ELLIPSIS,
Damien429d7192013-10-04 19:53:11 +010058
Damien George69a818d2014-01-12 13:55:24 +000059 MP_TOKEN_KW_FALSE, // 13
Damiend99b0522013-12-21 18:17:45 +000060 MP_TOKEN_KW_NONE,
61 MP_TOKEN_KW_TRUE,
62 MP_TOKEN_KW_AND,
63 MP_TOKEN_KW_AS,
64 MP_TOKEN_KW_ASSERT,
65 MP_TOKEN_KW_BREAK,
66 MP_TOKEN_KW_CLASS,
67 MP_TOKEN_KW_CONTINUE,
Damien George69a818d2014-01-12 13:55:24 +000068 MP_TOKEN_KW_DEF, // 22
Damiend99b0522013-12-21 18:17:45 +000069 MP_TOKEN_KW_DEL,
70 MP_TOKEN_KW_ELIF,
71 MP_TOKEN_KW_ELSE,
72 MP_TOKEN_KW_EXCEPT,
73 MP_TOKEN_KW_FINALLY,
74 MP_TOKEN_KW_FOR,
75 MP_TOKEN_KW_FROM,
76 MP_TOKEN_KW_GLOBAL,
77 MP_TOKEN_KW_IF,
Damien George69a818d2014-01-12 13:55:24 +000078 MP_TOKEN_KW_IMPORT, // 32
Damiend99b0522013-12-21 18:17:45 +000079 MP_TOKEN_KW_IN,
80 MP_TOKEN_KW_IS,
81 MP_TOKEN_KW_LAMBDA,
82 MP_TOKEN_KW_NONLOCAL,
83 MP_TOKEN_KW_NOT,
84 MP_TOKEN_KW_OR,
85 MP_TOKEN_KW_PASS,
86 MP_TOKEN_KW_RAISE,
87 MP_TOKEN_KW_RETURN,
Damien George69a818d2014-01-12 13:55:24 +000088 MP_TOKEN_KW_TRY, // 42
Damiend99b0522013-12-21 18:17:45 +000089 MP_TOKEN_KW_WHILE,
90 MP_TOKEN_KW_WITH,
91 MP_TOKEN_KW_YIELD,
Damien429d7192013-10-04 19:53:11 +010092
Damien George69a818d2014-01-12 13:55:24 +000093 MP_TOKEN_OP_PLUS, // 46
Damiend99b0522013-12-21 18:17:45 +000094 MP_TOKEN_OP_MINUS,
95 MP_TOKEN_OP_STAR,
96 MP_TOKEN_OP_DBL_STAR,
97 MP_TOKEN_OP_SLASH,
98 MP_TOKEN_OP_DBL_SLASH,
99 MP_TOKEN_OP_PERCENT,
100 MP_TOKEN_OP_LESS,
101 MP_TOKEN_OP_DBL_LESS,
102 MP_TOKEN_OP_MORE,
Damien George69a818d2014-01-12 13:55:24 +0000103 MP_TOKEN_OP_DBL_MORE, // 56
Damiend99b0522013-12-21 18:17:45 +0000104 MP_TOKEN_OP_AMPERSAND,
105 MP_TOKEN_OP_PIPE,
106 MP_TOKEN_OP_CARET,
107 MP_TOKEN_OP_TILDE,
108 MP_TOKEN_OP_LESS_EQUAL,
109 MP_TOKEN_OP_MORE_EQUAL,
110 MP_TOKEN_OP_DBL_EQUAL,
111 MP_TOKEN_OP_NOT_EQUAL,
Damien429d7192013-10-04 19:53:11 +0100112
Damien George69a818d2014-01-12 13:55:24 +0000113 MP_TOKEN_DEL_PAREN_OPEN, // 65
Damiend99b0522013-12-21 18:17:45 +0000114 MP_TOKEN_DEL_PAREN_CLOSE,
115 MP_TOKEN_DEL_BRACKET_OPEN,
116 MP_TOKEN_DEL_BRACKET_CLOSE,
117 MP_TOKEN_DEL_BRACE_OPEN,
118 MP_TOKEN_DEL_BRACE_CLOSE,
119 MP_TOKEN_DEL_COMMA,
120 MP_TOKEN_DEL_COLON,
121 MP_TOKEN_DEL_PERIOD,
122 MP_TOKEN_DEL_SEMICOLON,
Damien George69a818d2014-01-12 13:55:24 +0000123 MP_TOKEN_DEL_AT, // 75
Damiend99b0522013-12-21 18:17:45 +0000124 MP_TOKEN_DEL_EQUAL,
125 MP_TOKEN_DEL_PLUS_EQUAL,
126 MP_TOKEN_DEL_MINUS_EQUAL,
127 MP_TOKEN_DEL_STAR_EQUAL,
128 MP_TOKEN_DEL_SLASH_EQUAL,
129 MP_TOKEN_DEL_DBL_SLASH_EQUAL,
130 MP_TOKEN_DEL_PERCENT_EQUAL,
131 MP_TOKEN_DEL_AMPERSAND_EQUAL,
132 MP_TOKEN_DEL_PIPE_EQUAL,
Damien George69a818d2014-01-12 13:55:24 +0000133 MP_TOKEN_DEL_CARET_EQUAL, // 85
Damiend99b0522013-12-21 18:17:45 +0000134 MP_TOKEN_DEL_DBL_MORE_EQUAL,
135 MP_TOKEN_DEL_DBL_LESS_EQUAL,
136 MP_TOKEN_DEL_DBL_STAR_EQUAL,
137 MP_TOKEN_DEL_MINUS_MORE,
138} mp_token_kind_t;
Damien429d7192013-10-04 19:53:11 +0100139
Damien George94fbe972014-07-30 11:46:05 +0100140// the next-byte function must return the next byte in the stream
141// it must return MP_LEXER_EOF if end of stream
142// it can be called again after returning MP_LEXER_EOF, and in that case must return MP_LEXER_EOF
143#define MP_LEXER_EOF (-1)
144typedef mp_uint_t (*mp_lexer_stream_next_byte_t)(void*);
Damiend99b0522013-12-21 18:17:45 +0000145typedef void (*mp_lexer_stream_close_t)(void*);
Damiena5185f42013-10-20 14:41:27 +0100146
Damien Georgea4c52c52014-12-05 19:35:18 +0000147// this data structure is exposed for efficiency
148// public members are: source_name, tok_line, tok_column, tok_kind, vstr
149typedef struct _mp_lexer_t {
150 qstr source_name; // name of source
151 void *stream_data; // data for stream
152 mp_lexer_stream_next_byte_t stream_next_byte; // stream callback to get next byte
153 mp_lexer_stream_close_t stream_close; // stream callback to free
Damien429d7192013-10-04 19:53:11 +0100154
Damien Georgea4c52c52014-12-05 19:35:18 +0000155 unichar chr0, chr1, chr2; // current cached characters from source
156
157 mp_uint_t line; // current source line
158 mp_uint_t column; // current source column
159
160 mp_int_t emit_dent; // non-zero when there are INDENT/DEDENT tokens to emit
161 mp_int_t nested_bracket_level; // >0 when there are nested brackets over multiple lines
162
163 mp_uint_t alloc_indent_level;
164 mp_uint_t num_indent_level;
165 uint16_t *indent_level;
166
167 mp_uint_t tok_line; // token source line
168 mp_uint_t tok_column; // token source column
169 mp_token_kind_t tok_kind; // token kind
170 vstr_t vstr; // token data
171} mp_lexer_t;
Damien429d7192013-10-04 19:53:11 +0100172
Damien George94fbe972014-07-30 11:46:05 +0100173mp_lexer_t *mp_lexer_new(qstr src_name, void *stream_data, mp_lexer_stream_next_byte_t stream_next_byte, mp_lexer_stream_close_t stream_close);
Damien George54eb4e72014-07-03 13:47:47 +0100174mp_lexer_t *mp_lexer_new_from_str_len(qstr src_name, const char *str, mp_uint_t len, mp_uint_t free_len);
Damien George9193f892014-01-08 15:28:26 +0000175
Damiend99b0522013-12-21 18:17:45 +0000176void mp_lexer_free(mp_lexer_t *lex);
177void mp_lexer_to_next(mp_lexer_t *lex);
Damien Georgea4c52c52014-12-05 19:35:18 +0000178void mp_lexer_show_token(const mp_lexer_t *lex);
Damien George66028ab2014-01-03 14:03:48 +0000179
Damien Georgee09ffa12014-02-05 23:57:48 +0000180/******************************************************************/
181// platform specific import function; must be implemented for a specific port
182// TODO tidy up, rename, or put elsewhere
183
184//mp_lexer_t *mp_import_open_file(qstr mod_name);
185
186typedef enum {
187 MP_IMPORT_STAT_NO_EXIST,
188 MP_IMPORT_STAT_DIR,
189 MP_IMPORT_STAT_FILE,
190} mp_import_stat_t;
191
192mp_import_stat_t mp_import_stat(const char *path);
193mp_lexer_t *mp_lexer_new_from_file(const char *filename);
Paul Sokolovskyd3439d02014-06-02 19:37:55 +0300194
Paul Sokolovsky8ab6f902014-12-25 23:29:19 +0200195#endif // __MICROPY_INCLUDED_PY_LEXER_H__