blob: 36d1e99d23f23776115b0f4fa9f70fa826912fdc [file] [log] [blame]
Damien George04b91472014-05-03 23:27:38 +01001/*
2 * This file is part of the Micro Python project, http://micropython.org/
3 *
4 * The MIT License (MIT)
5 *
6 * Copyright (c) 2013, 2014 Damien P. George
7 *
8 * Permission is hereby granted, free of charge, to any person obtaining a copy
9 * of this software and associated documentation files (the "Software"), to deal
10 * in the Software without restriction, including without limitation the rights
11 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
12 * copies of the Software, and to permit persons to whom the Software is
13 * furnished to do so, subject to the following conditions:
14 *
15 * The above copyright notice and this permission notice shall be included in
16 * all copies or substantial portions of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
21 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
22 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
23 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
24 * THE SOFTWARE.
25 */
Paul Sokolovsky8ab6f902014-12-25 23:29:19 +020026#ifndef __MICROPY_INCLUDED_PY_LEXER_H__
27#define __MICROPY_INCLUDED_PY_LEXER_H__
Damien George04b91472014-05-03 23:27:38 +010028
Damien George51dfcb42015-01-01 20:27:54 +000029#include <stdint.h>
30
31#include "py/mpconfig.h"
32#include "py/qstr.h"
33
Damiend99b0522013-12-21 18:17:45 +000034/* lexer.h -- simple tokeniser for Micro Python
35 *
36 * Uses (byte) length instead of null termination.
37 * Tokens are the same - UTF-8 with (byte) length.
Damien429d7192013-10-04 19:53:11 +010038 */
39
Damiend99b0522013-12-21 18:17:45 +000040typedef enum _mp_token_kind_t {
41 MP_TOKEN_END, // 0
Damien429d7192013-10-04 19:53:11 +010042
Damiend99b0522013-12-21 18:17:45 +000043 MP_TOKEN_INVALID,
44 MP_TOKEN_DEDENT_MISMATCH,
45 MP_TOKEN_LONELY_STRING_OPEN,
Damien George69a818d2014-01-12 13:55:24 +000046 MP_TOKEN_BAD_LINE_CONTINUATION,
Damien429d7192013-10-04 19:53:11 +010047
Damien George69a818d2014-01-12 13:55:24 +000048 MP_TOKEN_NEWLINE, // 5
49 MP_TOKEN_INDENT, // 6
50 MP_TOKEN_DEDENT, // 7
Damien429d7192013-10-04 19:53:11 +010051
Damien George69a818d2014-01-12 13:55:24 +000052 MP_TOKEN_NAME, // 8
Damien George7d414a12015-02-08 01:57:40 +000053 MP_TOKEN_INTEGER,
54 MP_TOKEN_FLOAT_OR_IMAG,
Damiend99b0522013-12-21 18:17:45 +000055 MP_TOKEN_STRING,
56 MP_TOKEN_BYTES,
Damien429d7192013-10-04 19:53:11 +010057
Damien Georgee9906ac2014-01-04 18:44:46 +000058 MP_TOKEN_ELLIPSIS,
Damien429d7192013-10-04 19:53:11 +010059
Damien George7d414a12015-02-08 01:57:40 +000060 MP_TOKEN_KW_FALSE, // 14
Damiend99b0522013-12-21 18:17:45 +000061 MP_TOKEN_KW_NONE,
62 MP_TOKEN_KW_TRUE,
63 MP_TOKEN_KW_AND,
64 MP_TOKEN_KW_AS,
65 MP_TOKEN_KW_ASSERT,
66 MP_TOKEN_KW_BREAK,
67 MP_TOKEN_KW_CLASS,
68 MP_TOKEN_KW_CONTINUE,
Damien George7d414a12015-02-08 01:57:40 +000069 MP_TOKEN_KW_DEF, // 23
Damiend99b0522013-12-21 18:17:45 +000070 MP_TOKEN_KW_DEL,
71 MP_TOKEN_KW_ELIF,
72 MP_TOKEN_KW_ELSE,
73 MP_TOKEN_KW_EXCEPT,
74 MP_TOKEN_KW_FINALLY,
75 MP_TOKEN_KW_FOR,
76 MP_TOKEN_KW_FROM,
77 MP_TOKEN_KW_GLOBAL,
78 MP_TOKEN_KW_IF,
Damien George7d414a12015-02-08 01:57:40 +000079 MP_TOKEN_KW_IMPORT, // 33
Damiend99b0522013-12-21 18:17:45 +000080 MP_TOKEN_KW_IN,
81 MP_TOKEN_KW_IS,
82 MP_TOKEN_KW_LAMBDA,
83 MP_TOKEN_KW_NONLOCAL,
84 MP_TOKEN_KW_NOT,
85 MP_TOKEN_KW_OR,
86 MP_TOKEN_KW_PASS,
87 MP_TOKEN_KW_RAISE,
88 MP_TOKEN_KW_RETURN,
Damien George7d414a12015-02-08 01:57:40 +000089 MP_TOKEN_KW_TRY, // 43
Damiend99b0522013-12-21 18:17:45 +000090 MP_TOKEN_KW_WHILE,
91 MP_TOKEN_KW_WITH,
92 MP_TOKEN_KW_YIELD,
Damien429d7192013-10-04 19:53:11 +010093
Damien George7d414a12015-02-08 01:57:40 +000094 MP_TOKEN_OP_PLUS, // 47
Damiend99b0522013-12-21 18:17:45 +000095 MP_TOKEN_OP_MINUS,
96 MP_TOKEN_OP_STAR,
97 MP_TOKEN_OP_DBL_STAR,
98 MP_TOKEN_OP_SLASH,
99 MP_TOKEN_OP_DBL_SLASH,
100 MP_TOKEN_OP_PERCENT,
101 MP_TOKEN_OP_LESS,
102 MP_TOKEN_OP_DBL_LESS,
103 MP_TOKEN_OP_MORE,
Damien George7d414a12015-02-08 01:57:40 +0000104 MP_TOKEN_OP_DBL_MORE, // 57
Damiend99b0522013-12-21 18:17:45 +0000105 MP_TOKEN_OP_AMPERSAND,
106 MP_TOKEN_OP_PIPE,
107 MP_TOKEN_OP_CARET,
108 MP_TOKEN_OP_TILDE,
109 MP_TOKEN_OP_LESS_EQUAL,
110 MP_TOKEN_OP_MORE_EQUAL,
111 MP_TOKEN_OP_DBL_EQUAL,
112 MP_TOKEN_OP_NOT_EQUAL,
Damien429d7192013-10-04 19:53:11 +0100113
Damien George7d414a12015-02-08 01:57:40 +0000114 MP_TOKEN_DEL_PAREN_OPEN, // 66
Damiend99b0522013-12-21 18:17:45 +0000115 MP_TOKEN_DEL_PAREN_CLOSE,
116 MP_TOKEN_DEL_BRACKET_OPEN,
117 MP_TOKEN_DEL_BRACKET_CLOSE,
118 MP_TOKEN_DEL_BRACE_OPEN,
119 MP_TOKEN_DEL_BRACE_CLOSE,
120 MP_TOKEN_DEL_COMMA,
121 MP_TOKEN_DEL_COLON,
122 MP_TOKEN_DEL_PERIOD,
123 MP_TOKEN_DEL_SEMICOLON,
Damien George7d414a12015-02-08 01:57:40 +0000124 MP_TOKEN_DEL_AT, // 76
Damiend99b0522013-12-21 18:17:45 +0000125 MP_TOKEN_DEL_EQUAL,
126 MP_TOKEN_DEL_PLUS_EQUAL,
127 MP_TOKEN_DEL_MINUS_EQUAL,
128 MP_TOKEN_DEL_STAR_EQUAL,
129 MP_TOKEN_DEL_SLASH_EQUAL,
130 MP_TOKEN_DEL_DBL_SLASH_EQUAL,
131 MP_TOKEN_DEL_PERCENT_EQUAL,
132 MP_TOKEN_DEL_AMPERSAND_EQUAL,
133 MP_TOKEN_DEL_PIPE_EQUAL,
Damien George7d414a12015-02-08 01:57:40 +0000134 MP_TOKEN_DEL_CARET_EQUAL, // 86
Damiend99b0522013-12-21 18:17:45 +0000135 MP_TOKEN_DEL_DBL_MORE_EQUAL,
136 MP_TOKEN_DEL_DBL_LESS_EQUAL,
137 MP_TOKEN_DEL_DBL_STAR_EQUAL,
138 MP_TOKEN_DEL_MINUS_MORE,
139} mp_token_kind_t;
Damien429d7192013-10-04 19:53:11 +0100140
Damien George94fbe972014-07-30 11:46:05 +0100141// the next-byte function must return the next byte in the stream
142// it must return MP_LEXER_EOF if end of stream
143// it can be called again after returning MP_LEXER_EOF, and in that case must return MP_LEXER_EOF
Damien George2e2e4042015-03-19 00:21:29 +0000144#define MP_LEXER_EOF ((unichar)(-1))
145
Damien George94fbe972014-07-30 11:46:05 +0100146typedef mp_uint_t (*mp_lexer_stream_next_byte_t)(void*);
Damiend99b0522013-12-21 18:17:45 +0000147typedef void (*mp_lexer_stream_close_t)(void*);
Damiena5185f42013-10-20 14:41:27 +0100148
Damien Georgea4c52c52014-12-05 19:35:18 +0000149// this data structure is exposed for efficiency
150// public members are: source_name, tok_line, tok_column, tok_kind, vstr
151typedef struct _mp_lexer_t {
152 qstr source_name; // name of source
153 void *stream_data; // data for stream
154 mp_lexer_stream_next_byte_t stream_next_byte; // stream callback to get next byte
155 mp_lexer_stream_close_t stream_close; // stream callback to free
Damien429d7192013-10-04 19:53:11 +0100156
Damien Georgea4c52c52014-12-05 19:35:18 +0000157 unichar chr0, chr1, chr2; // current cached characters from source
158
159 mp_uint_t line; // current source line
160 mp_uint_t column; // current source column
161
162 mp_int_t emit_dent; // non-zero when there are INDENT/DEDENT tokens to emit
163 mp_int_t nested_bracket_level; // >0 when there are nested brackets over multiple lines
164
165 mp_uint_t alloc_indent_level;
166 mp_uint_t num_indent_level;
167 uint16_t *indent_level;
168
169 mp_uint_t tok_line; // token source line
170 mp_uint_t tok_column; // token source column
171 mp_token_kind_t tok_kind; // token kind
172 vstr_t vstr; // token data
173} mp_lexer_t;
Damien429d7192013-10-04 19:53:11 +0100174
Damien George94fbe972014-07-30 11:46:05 +0100175mp_lexer_t *mp_lexer_new(qstr src_name, void *stream_data, mp_lexer_stream_next_byte_t stream_next_byte, mp_lexer_stream_close_t stream_close);
Damien George54eb4e72014-07-03 13:47:47 +0100176mp_lexer_t *mp_lexer_new_from_str_len(qstr src_name, const char *str, mp_uint_t len, mp_uint_t free_len);
Damien George9193f892014-01-08 15:28:26 +0000177
Damiend99b0522013-12-21 18:17:45 +0000178void mp_lexer_free(mp_lexer_t *lex);
179void mp_lexer_to_next(mp_lexer_t *lex);
Damien Georgea4c52c52014-12-05 19:35:18 +0000180void mp_lexer_show_token(const mp_lexer_t *lex);
Damien George66028ab2014-01-03 14:03:48 +0000181
Damien Georgee09ffa12014-02-05 23:57:48 +0000182/******************************************************************/
183// platform specific import function; must be implemented for a specific port
184// TODO tidy up, rename, or put elsewhere
185
186//mp_lexer_t *mp_import_open_file(qstr mod_name);
187
188typedef enum {
189 MP_IMPORT_STAT_NO_EXIST,
190 MP_IMPORT_STAT_DIR,
191 MP_IMPORT_STAT_FILE,
192} mp_import_stat_t;
193
194mp_import_stat_t mp_import_stat(const char *path);
195mp_lexer_t *mp_lexer_new_from_file(const char *filename);
Paul Sokolovskyd3439d02014-06-02 19:37:55 +0300196
Damien George031278f2015-06-04 23:42:45 +0100197#if MICROPY_HELPER_LEXER_UNIX
198mp_lexer_t *mp_lexer_new_from_fd(qstr filename, int fd, bool close_fd);
199#endif
200
Paul Sokolovsky8ab6f902014-12-25 23:29:19 +0200201#endif // __MICROPY_INCLUDED_PY_LEXER_H__