blob: f58a38e92b7c0f88aab0947c5f85bf96cb642fa0 [file] [log] [blame]
Damiend99b0522013-12-21 18:17:45 +00001/* lexer.h -- simple tokeniser for Micro Python
2 *
3 * Uses (byte) length instead of null termination.
4 * Tokens are the same - UTF-8 with (byte) length.
Damien429d7192013-10-04 19:53:11 +01005 */
6
Damiend99b0522013-12-21 18:17:45 +00007typedef enum _mp_token_kind_t {
8 MP_TOKEN_END, // 0
Damien429d7192013-10-04 19:53:11 +01009
Damiend99b0522013-12-21 18:17:45 +000010 MP_TOKEN_INVALID,
11 MP_TOKEN_DEDENT_MISMATCH,
12 MP_TOKEN_LONELY_STRING_OPEN,
Damien429d7192013-10-04 19:53:11 +010013
Damiend99b0522013-12-21 18:17:45 +000014 MP_TOKEN_NEWLINE, // 4
15 MP_TOKEN_INDENT, // 5
16 MP_TOKEN_DEDENT, // 6
Damien429d7192013-10-04 19:53:11 +010017
Damiend99b0522013-12-21 18:17:45 +000018 MP_TOKEN_NAME, // 7
19 MP_TOKEN_NUMBER,
20 MP_TOKEN_STRING,
21 MP_TOKEN_BYTES,
Damien429d7192013-10-04 19:53:11 +010022
Damiend99b0522013-12-21 18:17:45 +000023 MP_TOKEN_ELLIPSES,
Damien429d7192013-10-04 19:53:11 +010024
Damiend99b0522013-12-21 18:17:45 +000025 MP_TOKEN_KW_FALSE, // 12
26 MP_TOKEN_KW_NONE,
27 MP_TOKEN_KW_TRUE,
28 MP_TOKEN_KW_AND,
29 MP_TOKEN_KW_AS,
30 MP_TOKEN_KW_ASSERT,
31 MP_TOKEN_KW_BREAK,
32 MP_TOKEN_KW_CLASS,
33 MP_TOKEN_KW_CONTINUE,
34 MP_TOKEN_KW_DEF, // 21
35 MP_TOKEN_KW_DEL,
36 MP_TOKEN_KW_ELIF,
37 MP_TOKEN_KW_ELSE,
38 MP_TOKEN_KW_EXCEPT,
39 MP_TOKEN_KW_FINALLY,
40 MP_TOKEN_KW_FOR,
41 MP_TOKEN_KW_FROM,
42 MP_TOKEN_KW_GLOBAL,
43 MP_TOKEN_KW_IF,
44 MP_TOKEN_KW_IMPORT, // 31
45 MP_TOKEN_KW_IN,
46 MP_TOKEN_KW_IS,
47 MP_TOKEN_KW_LAMBDA,
48 MP_TOKEN_KW_NONLOCAL,
49 MP_TOKEN_KW_NOT,
50 MP_TOKEN_KW_OR,
51 MP_TOKEN_KW_PASS,
52 MP_TOKEN_KW_RAISE,
53 MP_TOKEN_KW_RETURN,
54 MP_TOKEN_KW_TRY, // 41
55 MP_TOKEN_KW_WHILE,
56 MP_TOKEN_KW_WITH,
57 MP_TOKEN_KW_YIELD,
Damien429d7192013-10-04 19:53:11 +010058
Damiend99b0522013-12-21 18:17:45 +000059 MP_TOKEN_OP_PLUS, // 45
60 MP_TOKEN_OP_MINUS,
61 MP_TOKEN_OP_STAR,
62 MP_TOKEN_OP_DBL_STAR,
63 MP_TOKEN_OP_SLASH,
64 MP_TOKEN_OP_DBL_SLASH,
65 MP_TOKEN_OP_PERCENT,
66 MP_TOKEN_OP_LESS,
67 MP_TOKEN_OP_DBL_LESS,
68 MP_TOKEN_OP_MORE,
69 MP_TOKEN_OP_DBL_MORE, // 55
70 MP_TOKEN_OP_AMPERSAND,
71 MP_TOKEN_OP_PIPE,
72 MP_TOKEN_OP_CARET,
73 MP_TOKEN_OP_TILDE,
74 MP_TOKEN_OP_LESS_EQUAL,
75 MP_TOKEN_OP_MORE_EQUAL,
76 MP_TOKEN_OP_DBL_EQUAL,
77 MP_TOKEN_OP_NOT_EQUAL,
Damien429d7192013-10-04 19:53:11 +010078
Damiend99b0522013-12-21 18:17:45 +000079 MP_TOKEN_DEL_PAREN_OPEN, // 64
80 MP_TOKEN_DEL_PAREN_CLOSE,
81 MP_TOKEN_DEL_BRACKET_OPEN,
82 MP_TOKEN_DEL_BRACKET_CLOSE,
83 MP_TOKEN_DEL_BRACE_OPEN,
84 MP_TOKEN_DEL_BRACE_CLOSE,
85 MP_TOKEN_DEL_COMMA,
86 MP_TOKEN_DEL_COLON,
87 MP_TOKEN_DEL_PERIOD,
88 MP_TOKEN_DEL_SEMICOLON,
89 MP_TOKEN_DEL_AT, // 74
90 MP_TOKEN_DEL_EQUAL,
91 MP_TOKEN_DEL_PLUS_EQUAL,
92 MP_TOKEN_DEL_MINUS_EQUAL,
93 MP_TOKEN_DEL_STAR_EQUAL,
94 MP_TOKEN_DEL_SLASH_EQUAL,
95 MP_TOKEN_DEL_DBL_SLASH_EQUAL,
96 MP_TOKEN_DEL_PERCENT_EQUAL,
97 MP_TOKEN_DEL_AMPERSAND_EQUAL,
98 MP_TOKEN_DEL_PIPE_EQUAL,
99 MP_TOKEN_DEL_CARET_EQUAL, // 84
100 MP_TOKEN_DEL_DBL_MORE_EQUAL,
101 MP_TOKEN_DEL_DBL_LESS_EQUAL,
102 MP_TOKEN_DEL_DBL_STAR_EQUAL,
103 MP_TOKEN_DEL_MINUS_MORE,
104} mp_token_kind_t;
Damien429d7192013-10-04 19:53:11 +0100105
Damiend99b0522013-12-21 18:17:45 +0000106typedef struct _mp_token_t {
Damiena5185f42013-10-20 14:41:27 +0100107 const char *src_name; // name of source
108 uint src_line; // source line
109 uint src_column; // source column
Damien429d7192013-10-04 19:53:11 +0100110
Damiend99b0522013-12-21 18:17:45 +0000111 mp_token_kind_t kind; // kind of token
Damiena5185f42013-10-20 14:41:27 +0100112 const char *str; // string of token (valid only while this token is current token)
Damien429d7192013-10-04 19:53:11 +0100113 uint len; // (byte) length of string of token
Damiend99b0522013-12-21 18:17:45 +0000114} mp_token_t;
Damien429d7192013-10-04 19:53:11 +0100115
Damiena5185f42013-10-20 14:41:27 +0100116// the next-char function must return the next character in the stream
Damiend99b0522013-12-21 18:17:45 +0000117// it must return MP_LEXER_CHAR_EOF if end of stream
118// it can be called again after returning MP_LEXER_CHAR_EOF, and in that case must return MP_LEXER_CHAR_EOF
119#define MP_LEXER_CHAR_EOF (-1)
120typedef unichar (*mp_lexer_stream_next_char_t)(void*);
121typedef void (*mp_lexer_stream_close_t)(void*);
Damiena5185f42013-10-20 14:41:27 +0100122
Damiend99b0522013-12-21 18:17:45 +0000123typedef struct _mp_lexer_t mp_lexer_t;
Damien429d7192013-10-04 19:53:11 +0100124
Damiend99b0522013-12-21 18:17:45 +0000125void mp_token_show(const mp_token_t *tok);
126void mp_token_show_error_prefix(const mp_token_t *tok);
127bool mp_token_show_error(const mp_token_t *tok, const char *msg);
Damien429d7192013-10-04 19:53:11 +0100128
Damiend99b0522013-12-21 18:17:45 +0000129mp_lexer_t *mp_lexer_new(const char *src_name, void *stream_data, mp_lexer_stream_next_char_t stream_next_char, mp_lexer_stream_close_t stream_close);
130void mp_lexer_free(mp_lexer_t *lex);
131void mp_lexer_to_next(mp_lexer_t *lex);
132const mp_token_t *mp_lexer_cur(const mp_lexer_t *lex);
133bool mp_lexer_is_kind(mp_lexer_t *lex, mp_token_kind_t kind);
Damien429d7192013-10-04 19:53:11 +0100134/* unused
Damiend99b0522013-12-21 18:17:45 +0000135bool mp_lexer_is_str(mp_lexer_t *lex, const char *str);
136bool mp_lexer_opt_kind(mp_lexer_t *lex, mp_token_kind_t kind);
137bool mp_lexer_opt_str(mp_lexer_t *lex, const char *str);
Damien429d7192013-10-04 19:53:11 +0100138*/
Damiend99b0522013-12-21 18:17:45 +0000139bool mp_lexer_show_error(mp_lexer_t *lex, const char *msg);
140bool mp_lexer_show_error_pythonic(mp_lexer_t *lex, const char *msg);