blob: f41c962e7e50412a39ccad561a2c6560835cfdfa [file] [log] [blame]
Damiend99b0522013-12-21 18:17:45 +00001/* lexer.h -- simple tokeniser for Micro Python
2 *
3 * Uses (byte) length instead of null termination.
4 * Tokens are the same - UTF-8 with (byte) length.
Damien429d7192013-10-04 19:53:11 +01005 */
6
Damiend99b0522013-12-21 18:17:45 +00007typedef enum _mp_token_kind_t {
8 MP_TOKEN_END, // 0
Damien429d7192013-10-04 19:53:11 +01009
Damiend99b0522013-12-21 18:17:45 +000010 MP_TOKEN_INVALID,
11 MP_TOKEN_DEDENT_MISMATCH,
12 MP_TOKEN_LONELY_STRING_OPEN,
Damien George69a818d2014-01-12 13:55:24 +000013 MP_TOKEN_BAD_LINE_CONTINUATION,
Damien429d7192013-10-04 19:53:11 +010014
Damien George69a818d2014-01-12 13:55:24 +000015 MP_TOKEN_NEWLINE, // 5
16 MP_TOKEN_INDENT, // 6
17 MP_TOKEN_DEDENT, // 7
Damien429d7192013-10-04 19:53:11 +010018
Damien George69a818d2014-01-12 13:55:24 +000019 MP_TOKEN_NAME, // 8
Damiend99b0522013-12-21 18:17:45 +000020 MP_TOKEN_NUMBER,
21 MP_TOKEN_STRING,
22 MP_TOKEN_BYTES,
Damien429d7192013-10-04 19:53:11 +010023
Damien Georgee9906ac2014-01-04 18:44:46 +000024 MP_TOKEN_ELLIPSIS,
Damien429d7192013-10-04 19:53:11 +010025
Damien George69a818d2014-01-12 13:55:24 +000026 MP_TOKEN_KW_FALSE, // 13
Damiend99b0522013-12-21 18:17:45 +000027 MP_TOKEN_KW_NONE,
28 MP_TOKEN_KW_TRUE,
29 MP_TOKEN_KW_AND,
30 MP_TOKEN_KW_AS,
31 MP_TOKEN_KW_ASSERT,
32 MP_TOKEN_KW_BREAK,
33 MP_TOKEN_KW_CLASS,
34 MP_TOKEN_KW_CONTINUE,
Damien George69a818d2014-01-12 13:55:24 +000035 MP_TOKEN_KW_DEF, // 22
Damiend99b0522013-12-21 18:17:45 +000036 MP_TOKEN_KW_DEL,
37 MP_TOKEN_KW_ELIF,
38 MP_TOKEN_KW_ELSE,
39 MP_TOKEN_KW_EXCEPT,
40 MP_TOKEN_KW_FINALLY,
41 MP_TOKEN_KW_FOR,
42 MP_TOKEN_KW_FROM,
43 MP_TOKEN_KW_GLOBAL,
44 MP_TOKEN_KW_IF,
Damien George69a818d2014-01-12 13:55:24 +000045 MP_TOKEN_KW_IMPORT, // 32
Damiend99b0522013-12-21 18:17:45 +000046 MP_TOKEN_KW_IN,
47 MP_TOKEN_KW_IS,
48 MP_TOKEN_KW_LAMBDA,
49 MP_TOKEN_KW_NONLOCAL,
50 MP_TOKEN_KW_NOT,
51 MP_TOKEN_KW_OR,
52 MP_TOKEN_KW_PASS,
53 MP_TOKEN_KW_RAISE,
54 MP_TOKEN_KW_RETURN,
Damien George69a818d2014-01-12 13:55:24 +000055 MP_TOKEN_KW_TRY, // 42
Damiend99b0522013-12-21 18:17:45 +000056 MP_TOKEN_KW_WHILE,
57 MP_TOKEN_KW_WITH,
58 MP_TOKEN_KW_YIELD,
Damien429d7192013-10-04 19:53:11 +010059
Damien George69a818d2014-01-12 13:55:24 +000060 MP_TOKEN_OP_PLUS, // 46
Damiend99b0522013-12-21 18:17:45 +000061 MP_TOKEN_OP_MINUS,
62 MP_TOKEN_OP_STAR,
63 MP_TOKEN_OP_DBL_STAR,
64 MP_TOKEN_OP_SLASH,
65 MP_TOKEN_OP_DBL_SLASH,
66 MP_TOKEN_OP_PERCENT,
67 MP_TOKEN_OP_LESS,
68 MP_TOKEN_OP_DBL_LESS,
69 MP_TOKEN_OP_MORE,
Damien George69a818d2014-01-12 13:55:24 +000070 MP_TOKEN_OP_DBL_MORE, // 56
Damiend99b0522013-12-21 18:17:45 +000071 MP_TOKEN_OP_AMPERSAND,
72 MP_TOKEN_OP_PIPE,
73 MP_TOKEN_OP_CARET,
74 MP_TOKEN_OP_TILDE,
75 MP_TOKEN_OP_LESS_EQUAL,
76 MP_TOKEN_OP_MORE_EQUAL,
77 MP_TOKEN_OP_DBL_EQUAL,
78 MP_TOKEN_OP_NOT_EQUAL,
Damien429d7192013-10-04 19:53:11 +010079
Damien George69a818d2014-01-12 13:55:24 +000080 MP_TOKEN_DEL_PAREN_OPEN, // 65
Damiend99b0522013-12-21 18:17:45 +000081 MP_TOKEN_DEL_PAREN_CLOSE,
82 MP_TOKEN_DEL_BRACKET_OPEN,
83 MP_TOKEN_DEL_BRACKET_CLOSE,
84 MP_TOKEN_DEL_BRACE_OPEN,
85 MP_TOKEN_DEL_BRACE_CLOSE,
86 MP_TOKEN_DEL_COMMA,
87 MP_TOKEN_DEL_COLON,
88 MP_TOKEN_DEL_PERIOD,
89 MP_TOKEN_DEL_SEMICOLON,
Damien George69a818d2014-01-12 13:55:24 +000090 MP_TOKEN_DEL_AT, // 75
Damiend99b0522013-12-21 18:17:45 +000091 MP_TOKEN_DEL_EQUAL,
92 MP_TOKEN_DEL_PLUS_EQUAL,
93 MP_TOKEN_DEL_MINUS_EQUAL,
94 MP_TOKEN_DEL_STAR_EQUAL,
95 MP_TOKEN_DEL_SLASH_EQUAL,
96 MP_TOKEN_DEL_DBL_SLASH_EQUAL,
97 MP_TOKEN_DEL_PERCENT_EQUAL,
98 MP_TOKEN_DEL_AMPERSAND_EQUAL,
99 MP_TOKEN_DEL_PIPE_EQUAL,
Damien George69a818d2014-01-12 13:55:24 +0000100 MP_TOKEN_DEL_CARET_EQUAL, // 85
Damiend99b0522013-12-21 18:17:45 +0000101 MP_TOKEN_DEL_DBL_MORE_EQUAL,
102 MP_TOKEN_DEL_DBL_LESS_EQUAL,
103 MP_TOKEN_DEL_DBL_STAR_EQUAL,
104 MP_TOKEN_DEL_MINUS_MORE,
105} mp_token_kind_t;
Damien429d7192013-10-04 19:53:11 +0100106
Damiend99b0522013-12-21 18:17:45 +0000107typedef struct _mp_token_t {
Damiena5185f42013-10-20 14:41:27 +0100108 uint src_line; // source line
109 uint src_column; // source column
Damien429d7192013-10-04 19:53:11 +0100110
Damiend99b0522013-12-21 18:17:45 +0000111 mp_token_kind_t kind; // kind of token
Damiena5185f42013-10-20 14:41:27 +0100112 const char *str; // string of token (valid only while this token is current token)
Damien429d7192013-10-04 19:53:11 +0100113 uint len; // (byte) length of string of token
Damiend99b0522013-12-21 18:17:45 +0000114} mp_token_t;
Damien429d7192013-10-04 19:53:11 +0100115
Damiena5185f42013-10-20 14:41:27 +0100116// the next-char function must return the next character in the stream
Damiend99b0522013-12-21 18:17:45 +0000117// it must return MP_LEXER_CHAR_EOF if end of stream
118// it can be called again after returning MP_LEXER_CHAR_EOF, and in that case must return MP_LEXER_CHAR_EOF
119#define MP_LEXER_CHAR_EOF (-1)
120typedef unichar (*mp_lexer_stream_next_char_t)(void*);
121typedef void (*mp_lexer_stream_close_t)(void*);
Damiena5185f42013-10-20 14:41:27 +0100122
Damiend99b0522013-12-21 18:17:45 +0000123typedef struct _mp_lexer_t mp_lexer_t;
Damien429d7192013-10-04 19:53:11 +0100124
Damiend99b0522013-12-21 18:17:45 +0000125void mp_token_show(const mp_token_t *tok);
Damien429d7192013-10-04 19:53:11 +0100126
Damien Georgeb829b5c2014-01-25 13:51:19 +0000127mp_lexer_t *mp_lexer_new(qstr src_name, void *stream_data, mp_lexer_stream_next_char_t stream_next_char, mp_lexer_stream_close_t stream_close);
128mp_lexer_t *mp_lexer_new_from_str_len(qstr src_name, const char *str, uint len, uint free_len);
Damien George9193f892014-01-08 15:28:26 +0000129
Damiend99b0522013-12-21 18:17:45 +0000130void mp_lexer_free(mp_lexer_t *lex);
Damien George08335002014-01-18 23:24:36 +0000131qstr mp_lexer_source_name(mp_lexer_t *lex);
Damiend99b0522013-12-21 18:17:45 +0000132void mp_lexer_to_next(mp_lexer_t *lex);
133const mp_token_t *mp_lexer_cur(const mp_lexer_t *lex);
134bool mp_lexer_is_kind(mp_lexer_t *lex, mp_token_kind_t kind);
Damien George9528cd62014-01-15 21:23:31 +0000135
136bool mp_lexer_show_error_pythonic_prefix(mp_lexer_t *lex);
Damiend99b0522013-12-21 18:17:45 +0000137bool mp_lexer_show_error_pythonic(mp_lexer_t *lex, const char *msg);
Damien George66028ab2014-01-03 14:03:48 +0000138
Damien Georgee09ffa12014-02-05 23:57:48 +0000139/******************************************************************/
140// platform specific import function; must be implemented for a specific port
141// TODO tidy up, rename, or put elsewhere
142
143//mp_lexer_t *mp_import_open_file(qstr mod_name);
144
145typedef enum {
146 MP_IMPORT_STAT_NO_EXIST,
147 MP_IMPORT_STAT_DIR,
148 MP_IMPORT_STAT_FILE,
149} mp_import_stat_t;
150
151mp_import_stat_t mp_import_stat(const char *path);
152mp_lexer_t *mp_lexer_new_from_file(const char *filename);