blob: 3118df62c89c65c76478123160ae411ad7a556a0 [file] [log] [blame]
Damien George04b91472014-05-03 23:27:38 +01001/*
2 * This file is part of the Micro Python project, http://micropython.org/
3 *
4 * The MIT License (MIT)
5 *
6 * Copyright (c) 2013, 2014 Damien P. George
7 *
8 * Permission is hereby granted, free of charge, to any person obtaining a copy
9 * of this software and associated documentation files (the "Software"), to deal
10 * in the Software without restriction, including without limitation the rights
11 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
12 * copies of the Software, and to permit persons to whom the Software is
13 * furnished to do so, subject to the following conditions:
14 *
15 * The above copyright notice and this permission notice shall be included in
16 * all copies or substantial portions of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
21 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
22 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
23 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
24 * THE SOFTWARE.
25 */
Paul Sokolovsky8ab6f902014-12-25 23:29:19 +020026#ifndef __MICROPY_INCLUDED_PY_LEXER_H__
27#define __MICROPY_INCLUDED_PY_LEXER_H__
Damien George04b91472014-05-03 23:27:38 +010028
Damien George51dfcb42015-01-01 20:27:54 +000029#include <stdint.h>
30
31#include "py/mpconfig.h"
32#include "py/qstr.h"
33
Damiend99b0522013-12-21 18:17:45 +000034/* lexer.h -- simple tokeniser for Micro Python
35 *
36 * Uses (byte) length instead of null termination.
37 * Tokens are the same - UTF-8 with (byte) length.
Damien429d7192013-10-04 19:53:11 +010038 */
39
Damiend99b0522013-12-21 18:17:45 +000040typedef enum _mp_token_kind_t {
41 MP_TOKEN_END, // 0
Damien429d7192013-10-04 19:53:11 +010042
Damiend99b0522013-12-21 18:17:45 +000043 MP_TOKEN_INVALID,
44 MP_TOKEN_DEDENT_MISMATCH,
45 MP_TOKEN_LONELY_STRING_OPEN,
Damien George69a818d2014-01-12 13:55:24 +000046 MP_TOKEN_BAD_LINE_CONTINUATION,
Damien429d7192013-10-04 19:53:11 +010047
Damien George69a818d2014-01-12 13:55:24 +000048 MP_TOKEN_NEWLINE, // 5
49 MP_TOKEN_INDENT, // 6
50 MP_TOKEN_DEDENT, // 7
Damien429d7192013-10-04 19:53:11 +010051
Damien George69a818d2014-01-12 13:55:24 +000052 MP_TOKEN_NAME, // 8
Damien George7d414a12015-02-08 01:57:40 +000053 MP_TOKEN_INTEGER,
54 MP_TOKEN_FLOAT_OR_IMAG,
Damiend99b0522013-12-21 18:17:45 +000055 MP_TOKEN_STRING,
56 MP_TOKEN_BYTES,
Damien429d7192013-10-04 19:53:11 +010057
Damien Georgee9906ac2014-01-04 18:44:46 +000058 MP_TOKEN_ELLIPSIS,
Damien429d7192013-10-04 19:53:11 +010059
Damien George7d414a12015-02-08 01:57:40 +000060 MP_TOKEN_KW_FALSE, // 14
Damiend99b0522013-12-21 18:17:45 +000061 MP_TOKEN_KW_NONE,
62 MP_TOKEN_KW_TRUE,
63 MP_TOKEN_KW_AND,
64 MP_TOKEN_KW_AS,
65 MP_TOKEN_KW_ASSERT,
66 MP_TOKEN_KW_BREAK,
67 MP_TOKEN_KW_CLASS,
68 MP_TOKEN_KW_CONTINUE,
Damien George7d414a12015-02-08 01:57:40 +000069 MP_TOKEN_KW_DEF, // 23
Damiend99b0522013-12-21 18:17:45 +000070 MP_TOKEN_KW_DEL,
71 MP_TOKEN_KW_ELIF,
72 MP_TOKEN_KW_ELSE,
73 MP_TOKEN_KW_EXCEPT,
74 MP_TOKEN_KW_FINALLY,
75 MP_TOKEN_KW_FOR,
76 MP_TOKEN_KW_FROM,
77 MP_TOKEN_KW_GLOBAL,
78 MP_TOKEN_KW_IF,
Damien George7d414a12015-02-08 01:57:40 +000079 MP_TOKEN_KW_IMPORT, // 33
Damiend99b0522013-12-21 18:17:45 +000080 MP_TOKEN_KW_IN,
81 MP_TOKEN_KW_IS,
82 MP_TOKEN_KW_LAMBDA,
83 MP_TOKEN_KW_NONLOCAL,
84 MP_TOKEN_KW_NOT,
85 MP_TOKEN_KW_OR,
86 MP_TOKEN_KW_PASS,
87 MP_TOKEN_KW_RAISE,
88 MP_TOKEN_KW_RETURN,
Damien George7d414a12015-02-08 01:57:40 +000089 MP_TOKEN_KW_TRY, // 43
Damiend99b0522013-12-21 18:17:45 +000090 MP_TOKEN_KW_WHILE,
91 MP_TOKEN_KW_WITH,
92 MP_TOKEN_KW_YIELD,
Damien429d7192013-10-04 19:53:11 +010093
Damien George7d414a12015-02-08 01:57:40 +000094 MP_TOKEN_OP_PLUS, // 47
Damiend99b0522013-12-21 18:17:45 +000095 MP_TOKEN_OP_MINUS,
96 MP_TOKEN_OP_STAR,
97 MP_TOKEN_OP_DBL_STAR,
98 MP_TOKEN_OP_SLASH,
99 MP_TOKEN_OP_DBL_SLASH,
100 MP_TOKEN_OP_PERCENT,
101 MP_TOKEN_OP_LESS,
102 MP_TOKEN_OP_DBL_LESS,
103 MP_TOKEN_OP_MORE,
Damien George7d414a12015-02-08 01:57:40 +0000104 MP_TOKEN_OP_DBL_MORE, // 57
Damiend99b0522013-12-21 18:17:45 +0000105 MP_TOKEN_OP_AMPERSAND,
106 MP_TOKEN_OP_PIPE,
107 MP_TOKEN_OP_CARET,
108 MP_TOKEN_OP_TILDE,
109 MP_TOKEN_OP_LESS_EQUAL,
110 MP_TOKEN_OP_MORE_EQUAL,
111 MP_TOKEN_OP_DBL_EQUAL,
112 MP_TOKEN_OP_NOT_EQUAL,
Damien429d7192013-10-04 19:53:11 +0100113
Damien George7d414a12015-02-08 01:57:40 +0000114 MP_TOKEN_DEL_PAREN_OPEN, // 66
Damiend99b0522013-12-21 18:17:45 +0000115 MP_TOKEN_DEL_PAREN_CLOSE,
116 MP_TOKEN_DEL_BRACKET_OPEN,
117 MP_TOKEN_DEL_BRACKET_CLOSE,
118 MP_TOKEN_DEL_BRACE_OPEN,
119 MP_TOKEN_DEL_BRACE_CLOSE,
120 MP_TOKEN_DEL_COMMA,
121 MP_TOKEN_DEL_COLON,
122 MP_TOKEN_DEL_PERIOD,
123 MP_TOKEN_DEL_SEMICOLON,
Damien George7d414a12015-02-08 01:57:40 +0000124 MP_TOKEN_DEL_AT, // 76
Damiend99b0522013-12-21 18:17:45 +0000125 MP_TOKEN_DEL_EQUAL,
126 MP_TOKEN_DEL_PLUS_EQUAL,
127 MP_TOKEN_DEL_MINUS_EQUAL,
128 MP_TOKEN_DEL_STAR_EQUAL,
129 MP_TOKEN_DEL_SLASH_EQUAL,
130 MP_TOKEN_DEL_DBL_SLASH_EQUAL,
131 MP_TOKEN_DEL_PERCENT_EQUAL,
132 MP_TOKEN_DEL_AMPERSAND_EQUAL,
133 MP_TOKEN_DEL_PIPE_EQUAL,
Damien George7d414a12015-02-08 01:57:40 +0000134 MP_TOKEN_DEL_CARET_EQUAL, // 86
Damiend99b0522013-12-21 18:17:45 +0000135 MP_TOKEN_DEL_DBL_MORE_EQUAL,
136 MP_TOKEN_DEL_DBL_LESS_EQUAL,
137 MP_TOKEN_DEL_DBL_STAR_EQUAL,
138 MP_TOKEN_DEL_MINUS_MORE,
139} mp_token_kind_t;
Damien429d7192013-10-04 19:53:11 +0100140
Damien George94fbe972014-07-30 11:46:05 +0100141// the next-byte function must return the next byte in the stream
142// it must return MP_LEXER_EOF if end of stream
143// it can be called again after returning MP_LEXER_EOF, and in that case must return MP_LEXER_EOF
144#define MP_LEXER_EOF (-1)
145typedef mp_uint_t (*mp_lexer_stream_next_byte_t)(void*);
Damiend99b0522013-12-21 18:17:45 +0000146typedef void (*mp_lexer_stream_close_t)(void*);
Damiena5185f42013-10-20 14:41:27 +0100147
Damien Georgea4c52c52014-12-05 19:35:18 +0000148// this data structure is exposed for efficiency
149// public members are: source_name, tok_line, tok_column, tok_kind, vstr
150typedef struct _mp_lexer_t {
151 qstr source_name; // name of source
152 void *stream_data; // data for stream
153 mp_lexer_stream_next_byte_t stream_next_byte; // stream callback to get next byte
154 mp_lexer_stream_close_t stream_close; // stream callback to free
Damien429d7192013-10-04 19:53:11 +0100155
Damien Georgea4c52c52014-12-05 19:35:18 +0000156 unichar chr0, chr1, chr2; // current cached characters from source
157
158 mp_uint_t line; // current source line
159 mp_uint_t column; // current source column
160
161 mp_int_t emit_dent; // non-zero when there are INDENT/DEDENT tokens to emit
162 mp_int_t nested_bracket_level; // >0 when there are nested brackets over multiple lines
163
164 mp_uint_t alloc_indent_level;
165 mp_uint_t num_indent_level;
166 uint16_t *indent_level;
167
168 mp_uint_t tok_line; // token source line
169 mp_uint_t tok_column; // token source column
170 mp_token_kind_t tok_kind; // token kind
171 vstr_t vstr; // token data
172} mp_lexer_t;
Damien429d7192013-10-04 19:53:11 +0100173
Damien George94fbe972014-07-30 11:46:05 +0100174mp_lexer_t *mp_lexer_new(qstr src_name, void *stream_data, mp_lexer_stream_next_byte_t stream_next_byte, mp_lexer_stream_close_t stream_close);
Damien George54eb4e72014-07-03 13:47:47 +0100175mp_lexer_t *mp_lexer_new_from_str_len(qstr src_name, const char *str, mp_uint_t len, mp_uint_t free_len);
Damien George9193f892014-01-08 15:28:26 +0000176
Damiend99b0522013-12-21 18:17:45 +0000177void mp_lexer_free(mp_lexer_t *lex);
178void mp_lexer_to_next(mp_lexer_t *lex);
Damien Georgea4c52c52014-12-05 19:35:18 +0000179void mp_lexer_show_token(const mp_lexer_t *lex);
Damien George66028ab2014-01-03 14:03:48 +0000180
Damien Georgee09ffa12014-02-05 23:57:48 +0000181/******************************************************************/
182// platform specific import function; must be implemented for a specific port
183// TODO tidy up, rename, or put elsewhere
184
185//mp_lexer_t *mp_import_open_file(qstr mod_name);
186
187typedef enum {
188 MP_IMPORT_STAT_NO_EXIST,
189 MP_IMPORT_STAT_DIR,
190 MP_IMPORT_STAT_FILE,
191} mp_import_stat_t;
192
193mp_import_stat_t mp_import_stat(const char *path);
194mp_lexer_t *mp_lexer_new_from_file(const char *filename);
Paul Sokolovskyd3439d02014-06-02 19:37:55 +0300195
Paul Sokolovsky8ab6f902014-12-25 23:29:19 +0200196#endif // __MICROPY_INCLUDED_PY_LEXER_H__