blob: d70735f6d7a722c61b972465a83fc1100623d0e0 [file] [log] [blame]
Damien George04b91472014-05-03 23:27:38 +01001/*
2 * This file is part of the Micro Python project, http://micropython.org/
3 *
4 * The MIT License (MIT)
5 *
6 * Copyright (c) 2013, 2014 Damien P. George
7 *
8 * Permission is hereby granted, free of charge, to any person obtaining a copy
9 * of this software and associated documentation files (the "Software"), to deal
10 * in the Software without restriction, including without limitation the rights
11 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
12 * copies of the Software, and to permit persons to whom the Software is
13 * furnished to do so, subject to the following conditions:
14 *
15 * The above copyright notice and this permission notice shall be included in
16 * all copies or substantial portions of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
21 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
22 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
23 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
24 * THE SOFTWARE.
25 */
26
Damiend99b0522013-12-21 18:17:45 +000027/* lexer.h -- simple tokeniser for Micro Python
28 *
29 * Uses (byte) length instead of null termination.
30 * Tokens are the same - UTF-8 with (byte) length.
Damien429d7192013-10-04 19:53:11 +010031 */
32
Damiend99b0522013-12-21 18:17:45 +000033typedef enum _mp_token_kind_t {
34 MP_TOKEN_END, // 0
Damien429d7192013-10-04 19:53:11 +010035
Damiend99b0522013-12-21 18:17:45 +000036 MP_TOKEN_INVALID,
37 MP_TOKEN_DEDENT_MISMATCH,
38 MP_TOKEN_LONELY_STRING_OPEN,
Damien George69a818d2014-01-12 13:55:24 +000039 MP_TOKEN_BAD_LINE_CONTINUATION,
Damien429d7192013-10-04 19:53:11 +010040
Damien George69a818d2014-01-12 13:55:24 +000041 MP_TOKEN_NEWLINE, // 5
42 MP_TOKEN_INDENT, // 6
43 MP_TOKEN_DEDENT, // 7
Damien429d7192013-10-04 19:53:11 +010044
Damien George69a818d2014-01-12 13:55:24 +000045 MP_TOKEN_NAME, // 8
Damiend99b0522013-12-21 18:17:45 +000046 MP_TOKEN_NUMBER,
47 MP_TOKEN_STRING,
48 MP_TOKEN_BYTES,
Damien429d7192013-10-04 19:53:11 +010049
Damien Georgee9906ac2014-01-04 18:44:46 +000050 MP_TOKEN_ELLIPSIS,
Damien429d7192013-10-04 19:53:11 +010051
Damien George69a818d2014-01-12 13:55:24 +000052 MP_TOKEN_KW_FALSE, // 13
Damiend99b0522013-12-21 18:17:45 +000053 MP_TOKEN_KW_NONE,
54 MP_TOKEN_KW_TRUE,
55 MP_TOKEN_KW_AND,
56 MP_TOKEN_KW_AS,
57 MP_TOKEN_KW_ASSERT,
58 MP_TOKEN_KW_BREAK,
59 MP_TOKEN_KW_CLASS,
60 MP_TOKEN_KW_CONTINUE,
Damien George69a818d2014-01-12 13:55:24 +000061 MP_TOKEN_KW_DEF, // 22
Damiend99b0522013-12-21 18:17:45 +000062 MP_TOKEN_KW_DEL,
63 MP_TOKEN_KW_ELIF,
64 MP_TOKEN_KW_ELSE,
65 MP_TOKEN_KW_EXCEPT,
66 MP_TOKEN_KW_FINALLY,
67 MP_TOKEN_KW_FOR,
68 MP_TOKEN_KW_FROM,
69 MP_TOKEN_KW_GLOBAL,
70 MP_TOKEN_KW_IF,
Damien George69a818d2014-01-12 13:55:24 +000071 MP_TOKEN_KW_IMPORT, // 32
Damiend99b0522013-12-21 18:17:45 +000072 MP_TOKEN_KW_IN,
73 MP_TOKEN_KW_IS,
74 MP_TOKEN_KW_LAMBDA,
75 MP_TOKEN_KW_NONLOCAL,
76 MP_TOKEN_KW_NOT,
77 MP_TOKEN_KW_OR,
78 MP_TOKEN_KW_PASS,
79 MP_TOKEN_KW_RAISE,
80 MP_TOKEN_KW_RETURN,
Damien George69a818d2014-01-12 13:55:24 +000081 MP_TOKEN_KW_TRY, // 42
Damiend99b0522013-12-21 18:17:45 +000082 MP_TOKEN_KW_WHILE,
83 MP_TOKEN_KW_WITH,
84 MP_TOKEN_KW_YIELD,
Damien429d7192013-10-04 19:53:11 +010085
Damien George69a818d2014-01-12 13:55:24 +000086 MP_TOKEN_OP_PLUS, // 46
Damiend99b0522013-12-21 18:17:45 +000087 MP_TOKEN_OP_MINUS,
88 MP_TOKEN_OP_STAR,
89 MP_TOKEN_OP_DBL_STAR,
90 MP_TOKEN_OP_SLASH,
91 MP_TOKEN_OP_DBL_SLASH,
92 MP_TOKEN_OP_PERCENT,
93 MP_TOKEN_OP_LESS,
94 MP_TOKEN_OP_DBL_LESS,
95 MP_TOKEN_OP_MORE,
Damien George69a818d2014-01-12 13:55:24 +000096 MP_TOKEN_OP_DBL_MORE, // 56
Damiend99b0522013-12-21 18:17:45 +000097 MP_TOKEN_OP_AMPERSAND,
98 MP_TOKEN_OP_PIPE,
99 MP_TOKEN_OP_CARET,
100 MP_TOKEN_OP_TILDE,
101 MP_TOKEN_OP_LESS_EQUAL,
102 MP_TOKEN_OP_MORE_EQUAL,
103 MP_TOKEN_OP_DBL_EQUAL,
104 MP_TOKEN_OP_NOT_EQUAL,
Damien429d7192013-10-04 19:53:11 +0100105
Damien George69a818d2014-01-12 13:55:24 +0000106 MP_TOKEN_DEL_PAREN_OPEN, // 65
Damiend99b0522013-12-21 18:17:45 +0000107 MP_TOKEN_DEL_PAREN_CLOSE,
108 MP_TOKEN_DEL_BRACKET_OPEN,
109 MP_TOKEN_DEL_BRACKET_CLOSE,
110 MP_TOKEN_DEL_BRACE_OPEN,
111 MP_TOKEN_DEL_BRACE_CLOSE,
112 MP_TOKEN_DEL_COMMA,
113 MP_TOKEN_DEL_COLON,
114 MP_TOKEN_DEL_PERIOD,
115 MP_TOKEN_DEL_SEMICOLON,
Damien George69a818d2014-01-12 13:55:24 +0000116 MP_TOKEN_DEL_AT, // 75
Damiend99b0522013-12-21 18:17:45 +0000117 MP_TOKEN_DEL_EQUAL,
118 MP_TOKEN_DEL_PLUS_EQUAL,
119 MP_TOKEN_DEL_MINUS_EQUAL,
120 MP_TOKEN_DEL_STAR_EQUAL,
121 MP_TOKEN_DEL_SLASH_EQUAL,
122 MP_TOKEN_DEL_DBL_SLASH_EQUAL,
123 MP_TOKEN_DEL_PERCENT_EQUAL,
124 MP_TOKEN_DEL_AMPERSAND_EQUAL,
125 MP_TOKEN_DEL_PIPE_EQUAL,
Damien George69a818d2014-01-12 13:55:24 +0000126 MP_TOKEN_DEL_CARET_EQUAL, // 85
Damiend99b0522013-12-21 18:17:45 +0000127 MP_TOKEN_DEL_DBL_MORE_EQUAL,
128 MP_TOKEN_DEL_DBL_LESS_EQUAL,
129 MP_TOKEN_DEL_DBL_STAR_EQUAL,
130 MP_TOKEN_DEL_MINUS_MORE,
131} mp_token_kind_t;
Damien429d7192013-10-04 19:53:11 +0100132
Damiend99b0522013-12-21 18:17:45 +0000133typedef struct _mp_token_t {
Damien George54eb4e72014-07-03 13:47:47 +0100134 mp_uint_t src_line; // source line
135 mp_uint_t src_column; // source column
Damien429d7192013-10-04 19:53:11 +0100136
Damiend99b0522013-12-21 18:17:45 +0000137 mp_token_kind_t kind; // kind of token
Damiena5185f42013-10-20 14:41:27 +0100138 const char *str; // string of token (valid only while this token is current token)
Damien George54eb4e72014-07-03 13:47:47 +0100139 mp_uint_t len; // (byte) length of string of token
Damiend99b0522013-12-21 18:17:45 +0000140} mp_token_t;
Damien429d7192013-10-04 19:53:11 +0100141
Damien George94fbe972014-07-30 11:46:05 +0100142// the next-byte function must return the next byte in the stream
143// it must return MP_LEXER_EOF if end of stream
144// it can be called again after returning MP_LEXER_EOF, and in that case must return MP_LEXER_EOF
145#define MP_LEXER_EOF (-1)
146typedef mp_uint_t (*mp_lexer_stream_next_byte_t)(void*);
Damiend99b0522013-12-21 18:17:45 +0000147typedef void (*mp_lexer_stream_close_t)(void*);
Damiena5185f42013-10-20 14:41:27 +0100148
Damiend99b0522013-12-21 18:17:45 +0000149typedef struct _mp_lexer_t mp_lexer_t;
Damien429d7192013-10-04 19:53:11 +0100150
Damiend99b0522013-12-21 18:17:45 +0000151void mp_token_show(const mp_token_t *tok);
Damien429d7192013-10-04 19:53:11 +0100152
Damien George94fbe972014-07-30 11:46:05 +0100153mp_lexer_t *mp_lexer_new(qstr src_name, void *stream_data, mp_lexer_stream_next_byte_t stream_next_byte, mp_lexer_stream_close_t stream_close);
Damien George54eb4e72014-07-03 13:47:47 +0100154mp_lexer_t *mp_lexer_new_from_str_len(qstr src_name, const char *str, mp_uint_t len, mp_uint_t free_len);
Damien George9193f892014-01-08 15:28:26 +0000155
Damiend99b0522013-12-21 18:17:45 +0000156void mp_lexer_free(mp_lexer_t *lex);
Damien George08335002014-01-18 23:24:36 +0000157qstr mp_lexer_source_name(mp_lexer_t *lex);
Damiend99b0522013-12-21 18:17:45 +0000158void mp_lexer_to_next(mp_lexer_t *lex);
159const mp_token_t *mp_lexer_cur(const mp_lexer_t *lex);
160bool mp_lexer_is_kind(mp_lexer_t *lex, mp_token_kind_t kind);
Damien George9528cd62014-01-15 21:23:31 +0000161
162bool mp_lexer_show_error_pythonic_prefix(mp_lexer_t *lex);
Damiend99b0522013-12-21 18:17:45 +0000163bool mp_lexer_show_error_pythonic(mp_lexer_t *lex, const char *msg);
Damien George66028ab2014-01-03 14:03:48 +0000164
Damien Georgee09ffa12014-02-05 23:57:48 +0000165/******************************************************************/
166// platform specific import function; must be implemented for a specific port
167// TODO tidy up, rename, or put elsewhere
168
169//mp_lexer_t *mp_import_open_file(qstr mod_name);
170
171typedef enum {
172 MP_IMPORT_STAT_NO_EXIST,
173 MP_IMPORT_STAT_DIR,
174 MP_IMPORT_STAT_FILE,
175} mp_import_stat_t;
176
177mp_import_stat_t mp_import_stat(const char *path);
178mp_lexer_t *mp_lexer_new_from_file(const char *filename);
Paul Sokolovskyd3439d02014-06-02 19:37:55 +0300179
Damien George54eb4e72014-07-03 13:47:47 +0100180extern mp_uint_t mp_optimise_value;