Damien George | 04b9147 | 2014-05-03 23:27:38 +0100 | [diff] [blame] | 1 | /* |
| 2 | * This file is part of the Micro Python project, http://micropython.org/ |
| 3 | * |
| 4 | * The MIT License (MIT) |
| 5 | * |
| 6 | * Copyright (c) 2013, 2014 Damien P. George |
| 7 | * |
| 8 | * Permission is hereby granted, free of charge, to any person obtaining a copy |
| 9 | * of this software and associated documentation files (the "Software"), to deal |
| 10 | * in the Software without restriction, including without limitation the rights |
| 11 | * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell |
| 12 | * copies of the Software, and to permit persons to whom the Software is |
| 13 | * furnished to do so, subject to the following conditions: |
| 14 | * |
| 15 | * The above copyright notice and this permission notice shall be included in |
| 16 | * all copies or substantial portions of the Software. |
| 17 | * |
| 18 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
| 19 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
| 20 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE |
| 21 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
| 22 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, |
| 23 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN |
| 24 | * THE SOFTWARE. |
| 25 | */ |
Paul Sokolovsky | 8ab6f90 | 2014-12-25 23:29:19 +0200 | [diff] [blame] | 26 | #ifndef __MICROPY_INCLUDED_PY_LEXER_H__ |
| 27 | #define __MICROPY_INCLUDED_PY_LEXER_H__ |
Damien George | 04b9147 | 2014-05-03 23:27:38 +0100 | [diff] [blame] | 28 | |
Damien George | 51dfcb4 | 2015-01-01 20:27:54 +0000 | [diff] [blame] | 29 | #include <stdint.h> |
| 30 | |
| 31 | #include "py/mpconfig.h" |
| 32 | #include "py/qstr.h" |
| 33 | |
Damien | d99b052 | 2013-12-21 18:17:45 +0000 | [diff] [blame] | 34 | /* lexer.h -- simple tokeniser for Micro Python |
| 35 | * |
| 36 | * Uses (byte) length instead of null termination. |
| 37 | * Tokens are the same - UTF-8 with (byte) length. |
Damien | 429d719 | 2013-10-04 19:53:11 +0100 | [diff] [blame] | 38 | */ |
| 39 | |
Damien | d99b052 | 2013-12-21 18:17:45 +0000 | [diff] [blame] | 40 | typedef enum _mp_token_kind_t { |
| 41 | MP_TOKEN_END, // 0 |
Damien | 429d719 | 2013-10-04 19:53:11 +0100 | [diff] [blame] | 42 | |
Damien | d99b052 | 2013-12-21 18:17:45 +0000 | [diff] [blame] | 43 | MP_TOKEN_INVALID, |
| 44 | MP_TOKEN_DEDENT_MISMATCH, |
| 45 | MP_TOKEN_LONELY_STRING_OPEN, |
Damien George | 69a818d | 2014-01-12 13:55:24 +0000 | [diff] [blame] | 46 | MP_TOKEN_BAD_LINE_CONTINUATION, |
Damien | 429d719 | 2013-10-04 19:53:11 +0100 | [diff] [blame] | 47 | |
Damien George | 69a818d | 2014-01-12 13:55:24 +0000 | [diff] [blame] | 48 | MP_TOKEN_NEWLINE, // 5 |
| 49 | MP_TOKEN_INDENT, // 6 |
| 50 | MP_TOKEN_DEDENT, // 7 |
Damien | 429d719 | 2013-10-04 19:53:11 +0100 | [diff] [blame] | 51 | |
Damien George | 69a818d | 2014-01-12 13:55:24 +0000 | [diff] [blame] | 52 | MP_TOKEN_NAME, // 8 |
Damien George | 7d414a1 | 2015-02-08 01:57:40 +0000 | [diff] [blame] | 53 | MP_TOKEN_INTEGER, |
| 54 | MP_TOKEN_FLOAT_OR_IMAG, |
Damien | d99b052 | 2013-12-21 18:17:45 +0000 | [diff] [blame] | 55 | MP_TOKEN_STRING, |
| 56 | MP_TOKEN_BYTES, |
Damien | 429d719 | 2013-10-04 19:53:11 +0100 | [diff] [blame] | 57 | |
Damien George | e9906ac | 2014-01-04 18:44:46 +0000 | [diff] [blame] | 58 | MP_TOKEN_ELLIPSIS, |
Damien | 429d719 | 2013-10-04 19:53:11 +0100 | [diff] [blame] | 59 | |
Damien George | 7d414a1 | 2015-02-08 01:57:40 +0000 | [diff] [blame] | 60 | MP_TOKEN_KW_FALSE, // 14 |
Damien | d99b052 | 2013-12-21 18:17:45 +0000 | [diff] [blame] | 61 | MP_TOKEN_KW_NONE, |
| 62 | MP_TOKEN_KW_TRUE, |
| 63 | MP_TOKEN_KW_AND, |
| 64 | MP_TOKEN_KW_AS, |
| 65 | MP_TOKEN_KW_ASSERT, |
| 66 | MP_TOKEN_KW_BREAK, |
| 67 | MP_TOKEN_KW_CLASS, |
| 68 | MP_TOKEN_KW_CONTINUE, |
Damien George | 7d414a1 | 2015-02-08 01:57:40 +0000 | [diff] [blame] | 69 | MP_TOKEN_KW_DEF, // 23 |
Damien | d99b052 | 2013-12-21 18:17:45 +0000 | [diff] [blame] | 70 | MP_TOKEN_KW_DEL, |
| 71 | MP_TOKEN_KW_ELIF, |
| 72 | MP_TOKEN_KW_ELSE, |
| 73 | MP_TOKEN_KW_EXCEPT, |
| 74 | MP_TOKEN_KW_FINALLY, |
| 75 | MP_TOKEN_KW_FOR, |
| 76 | MP_TOKEN_KW_FROM, |
| 77 | MP_TOKEN_KW_GLOBAL, |
| 78 | MP_TOKEN_KW_IF, |
Damien George | 7d414a1 | 2015-02-08 01:57:40 +0000 | [diff] [blame] | 79 | MP_TOKEN_KW_IMPORT, // 33 |
Damien | d99b052 | 2013-12-21 18:17:45 +0000 | [diff] [blame] | 80 | MP_TOKEN_KW_IN, |
| 81 | MP_TOKEN_KW_IS, |
| 82 | MP_TOKEN_KW_LAMBDA, |
| 83 | MP_TOKEN_KW_NONLOCAL, |
| 84 | MP_TOKEN_KW_NOT, |
| 85 | MP_TOKEN_KW_OR, |
| 86 | MP_TOKEN_KW_PASS, |
| 87 | MP_TOKEN_KW_RAISE, |
| 88 | MP_TOKEN_KW_RETURN, |
Damien George | 7d414a1 | 2015-02-08 01:57:40 +0000 | [diff] [blame] | 89 | MP_TOKEN_KW_TRY, // 43 |
Damien | d99b052 | 2013-12-21 18:17:45 +0000 | [diff] [blame] | 90 | MP_TOKEN_KW_WHILE, |
| 91 | MP_TOKEN_KW_WITH, |
| 92 | MP_TOKEN_KW_YIELD, |
Damien | 429d719 | 2013-10-04 19:53:11 +0100 | [diff] [blame] | 93 | |
Damien George | 7d414a1 | 2015-02-08 01:57:40 +0000 | [diff] [blame] | 94 | MP_TOKEN_OP_PLUS, // 47 |
Damien | d99b052 | 2013-12-21 18:17:45 +0000 | [diff] [blame] | 95 | MP_TOKEN_OP_MINUS, |
| 96 | MP_TOKEN_OP_STAR, |
| 97 | MP_TOKEN_OP_DBL_STAR, |
| 98 | MP_TOKEN_OP_SLASH, |
| 99 | MP_TOKEN_OP_DBL_SLASH, |
| 100 | MP_TOKEN_OP_PERCENT, |
| 101 | MP_TOKEN_OP_LESS, |
| 102 | MP_TOKEN_OP_DBL_LESS, |
| 103 | MP_TOKEN_OP_MORE, |
Damien George | 7d414a1 | 2015-02-08 01:57:40 +0000 | [diff] [blame] | 104 | MP_TOKEN_OP_DBL_MORE, // 57 |
Damien | d99b052 | 2013-12-21 18:17:45 +0000 | [diff] [blame] | 105 | MP_TOKEN_OP_AMPERSAND, |
| 106 | MP_TOKEN_OP_PIPE, |
| 107 | MP_TOKEN_OP_CARET, |
| 108 | MP_TOKEN_OP_TILDE, |
| 109 | MP_TOKEN_OP_LESS_EQUAL, |
| 110 | MP_TOKEN_OP_MORE_EQUAL, |
| 111 | MP_TOKEN_OP_DBL_EQUAL, |
| 112 | MP_TOKEN_OP_NOT_EQUAL, |
Damien | 429d719 | 2013-10-04 19:53:11 +0100 | [diff] [blame] | 113 | |
Damien George | 7d414a1 | 2015-02-08 01:57:40 +0000 | [diff] [blame] | 114 | MP_TOKEN_DEL_PAREN_OPEN, // 66 |
Damien | d99b052 | 2013-12-21 18:17:45 +0000 | [diff] [blame] | 115 | MP_TOKEN_DEL_PAREN_CLOSE, |
| 116 | MP_TOKEN_DEL_BRACKET_OPEN, |
| 117 | MP_TOKEN_DEL_BRACKET_CLOSE, |
| 118 | MP_TOKEN_DEL_BRACE_OPEN, |
| 119 | MP_TOKEN_DEL_BRACE_CLOSE, |
| 120 | MP_TOKEN_DEL_COMMA, |
| 121 | MP_TOKEN_DEL_COLON, |
| 122 | MP_TOKEN_DEL_PERIOD, |
| 123 | MP_TOKEN_DEL_SEMICOLON, |
Damien George | 7d414a1 | 2015-02-08 01:57:40 +0000 | [diff] [blame] | 124 | MP_TOKEN_DEL_AT, // 76 |
Damien | d99b052 | 2013-12-21 18:17:45 +0000 | [diff] [blame] | 125 | MP_TOKEN_DEL_EQUAL, |
| 126 | MP_TOKEN_DEL_PLUS_EQUAL, |
| 127 | MP_TOKEN_DEL_MINUS_EQUAL, |
| 128 | MP_TOKEN_DEL_STAR_EQUAL, |
| 129 | MP_TOKEN_DEL_SLASH_EQUAL, |
| 130 | MP_TOKEN_DEL_DBL_SLASH_EQUAL, |
| 131 | MP_TOKEN_DEL_PERCENT_EQUAL, |
| 132 | MP_TOKEN_DEL_AMPERSAND_EQUAL, |
| 133 | MP_TOKEN_DEL_PIPE_EQUAL, |
Damien George | 7d414a1 | 2015-02-08 01:57:40 +0000 | [diff] [blame] | 134 | MP_TOKEN_DEL_CARET_EQUAL, // 86 |
Damien | d99b052 | 2013-12-21 18:17:45 +0000 | [diff] [blame] | 135 | MP_TOKEN_DEL_DBL_MORE_EQUAL, |
| 136 | MP_TOKEN_DEL_DBL_LESS_EQUAL, |
| 137 | MP_TOKEN_DEL_DBL_STAR_EQUAL, |
| 138 | MP_TOKEN_DEL_MINUS_MORE, |
| 139 | } mp_token_kind_t; |
Damien | 429d719 | 2013-10-04 19:53:11 +0100 | [diff] [blame] | 140 | |
Damien George | 94fbe97 | 2014-07-30 11:46:05 +0100 | [diff] [blame] | 141 | // the next-byte function must return the next byte in the stream |
| 142 | // it must return MP_LEXER_EOF if end of stream |
| 143 | // it can be called again after returning MP_LEXER_EOF, and in that case must return MP_LEXER_EOF |
Damien George | 2e2e404 | 2015-03-19 00:21:29 +0000 | [diff] [blame] | 144 | #define MP_LEXER_EOF ((unichar)(-1)) |
| 145 | |
Damien George | 94fbe97 | 2014-07-30 11:46:05 +0100 | [diff] [blame] | 146 | typedef mp_uint_t (*mp_lexer_stream_next_byte_t)(void*); |
Damien | d99b052 | 2013-12-21 18:17:45 +0000 | [diff] [blame] | 147 | typedef void (*mp_lexer_stream_close_t)(void*); |
Damien | a5185f4 | 2013-10-20 14:41:27 +0100 | [diff] [blame] | 148 | |
Damien George | a4c52c5 | 2014-12-05 19:35:18 +0000 | [diff] [blame] | 149 | // this data structure is exposed for efficiency |
| 150 | // public members are: source_name, tok_line, tok_column, tok_kind, vstr |
| 151 | typedef struct _mp_lexer_t { |
| 152 | qstr source_name; // name of source |
| 153 | void *stream_data; // data for stream |
| 154 | mp_lexer_stream_next_byte_t stream_next_byte; // stream callback to get next byte |
| 155 | mp_lexer_stream_close_t stream_close; // stream callback to free |
Damien | 429d719 | 2013-10-04 19:53:11 +0100 | [diff] [blame] | 156 | |
Damien George | a4c52c5 | 2014-12-05 19:35:18 +0000 | [diff] [blame] | 157 | unichar chr0, chr1, chr2; // current cached characters from source |
| 158 | |
| 159 | mp_uint_t line; // current source line |
| 160 | mp_uint_t column; // current source column |
| 161 | |
| 162 | mp_int_t emit_dent; // non-zero when there are INDENT/DEDENT tokens to emit |
| 163 | mp_int_t nested_bracket_level; // >0 when there are nested brackets over multiple lines |
| 164 | |
| 165 | mp_uint_t alloc_indent_level; |
| 166 | mp_uint_t num_indent_level; |
| 167 | uint16_t *indent_level; |
| 168 | |
| 169 | mp_uint_t tok_line; // token source line |
| 170 | mp_uint_t tok_column; // token source column |
| 171 | mp_token_kind_t tok_kind; // token kind |
| 172 | vstr_t vstr; // token data |
| 173 | } mp_lexer_t; |
Damien | 429d719 | 2013-10-04 19:53:11 +0100 | [diff] [blame] | 174 | |
Damien George | 94fbe97 | 2014-07-30 11:46:05 +0100 | [diff] [blame] | 175 | mp_lexer_t *mp_lexer_new(qstr src_name, void *stream_data, mp_lexer_stream_next_byte_t stream_next_byte, mp_lexer_stream_close_t stream_close); |
Damien George | 54eb4e7 | 2014-07-03 13:47:47 +0100 | [diff] [blame] | 176 | mp_lexer_t *mp_lexer_new_from_str_len(qstr src_name, const char *str, mp_uint_t len, mp_uint_t free_len); |
Damien George | 9193f89 | 2014-01-08 15:28:26 +0000 | [diff] [blame] | 177 | |
Damien | d99b052 | 2013-12-21 18:17:45 +0000 | [diff] [blame] | 178 | void mp_lexer_free(mp_lexer_t *lex); |
| 179 | void mp_lexer_to_next(mp_lexer_t *lex); |
Damien George | a4c52c5 | 2014-12-05 19:35:18 +0000 | [diff] [blame] | 180 | void mp_lexer_show_token(const mp_lexer_t *lex); |
Damien George | 66028ab | 2014-01-03 14:03:48 +0000 | [diff] [blame] | 181 | |
Damien George | e09ffa1 | 2014-02-05 23:57:48 +0000 | [diff] [blame] | 182 | /******************************************************************/ |
| 183 | // platform specific import function; must be implemented for a specific port |
| 184 | // TODO tidy up, rename, or put elsewhere |
| 185 | |
| 186 | //mp_lexer_t *mp_import_open_file(qstr mod_name); |
| 187 | |
| 188 | typedef enum { |
| 189 | MP_IMPORT_STAT_NO_EXIST, |
| 190 | MP_IMPORT_STAT_DIR, |
| 191 | MP_IMPORT_STAT_FILE, |
| 192 | } mp_import_stat_t; |
| 193 | |
| 194 | mp_import_stat_t mp_import_stat(const char *path); |
| 195 | mp_lexer_t *mp_lexer_new_from_file(const char *filename); |
Paul Sokolovsky | d3439d0 | 2014-06-02 19:37:55 +0300 | [diff] [blame] | 196 | |
Damien George | 031278f | 2015-06-04 23:42:45 +0100 | [diff] [blame] | 197 | #if MICROPY_HELPER_LEXER_UNIX |
| 198 | mp_lexer_t *mp_lexer_new_from_fd(qstr filename, int fd, bool close_fd); |
| 199 | #endif |
| 200 | |
Paul Sokolovsky | 8ab6f90 | 2014-12-25 23:29:19 +0200 | [diff] [blame] | 201 | #endif // __MICROPY_INCLUDED_PY_LEXER_H__ |