Damien | d99b052 | 2013-12-21 18:17:45 +0000 | [diff] [blame] | 1 | /* lexer.h -- simple tokeniser for Micro Python |
| 2 | * |
| 3 | * Uses (byte) length instead of null termination. |
| 4 | * Tokens are the same - UTF-8 with (byte) length. |
Damien | 429d719 | 2013-10-04 19:53:11 +0100 | [diff] [blame] | 5 | */ |
| 6 | |
Damien | d99b052 | 2013-12-21 18:17:45 +0000 | [diff] [blame] | 7 | typedef enum _mp_token_kind_t { |
| 8 | MP_TOKEN_END, // 0 |
Damien | 429d719 | 2013-10-04 19:53:11 +0100 | [diff] [blame] | 9 | |
Damien | d99b052 | 2013-12-21 18:17:45 +0000 | [diff] [blame] | 10 | MP_TOKEN_INVALID, |
| 11 | MP_TOKEN_DEDENT_MISMATCH, |
| 12 | MP_TOKEN_LONELY_STRING_OPEN, |
Damien George | 69a818d | 2014-01-12 13:55:24 +0000 | [diff] [blame] | 13 | MP_TOKEN_BAD_LINE_CONTINUATION, |
Damien | 429d719 | 2013-10-04 19:53:11 +0100 | [diff] [blame] | 14 | |
Damien George | 69a818d | 2014-01-12 13:55:24 +0000 | [diff] [blame] | 15 | MP_TOKEN_NEWLINE, // 5 |
| 16 | MP_TOKEN_INDENT, // 6 |
| 17 | MP_TOKEN_DEDENT, // 7 |
Damien | 429d719 | 2013-10-04 19:53:11 +0100 | [diff] [blame] | 18 | |
Damien George | 69a818d | 2014-01-12 13:55:24 +0000 | [diff] [blame] | 19 | MP_TOKEN_NAME, // 8 |
Damien | d99b052 | 2013-12-21 18:17:45 +0000 | [diff] [blame] | 20 | MP_TOKEN_NUMBER, |
| 21 | MP_TOKEN_STRING, |
| 22 | MP_TOKEN_BYTES, |
Damien | 429d719 | 2013-10-04 19:53:11 +0100 | [diff] [blame] | 23 | |
Damien George | e9906ac | 2014-01-04 18:44:46 +0000 | [diff] [blame] | 24 | MP_TOKEN_ELLIPSIS, |
Damien | 429d719 | 2013-10-04 19:53:11 +0100 | [diff] [blame] | 25 | |
Damien George | 69a818d | 2014-01-12 13:55:24 +0000 | [diff] [blame] | 26 | MP_TOKEN_KW_FALSE, // 13 |
Damien | d99b052 | 2013-12-21 18:17:45 +0000 | [diff] [blame] | 27 | MP_TOKEN_KW_NONE, |
| 28 | MP_TOKEN_KW_TRUE, |
| 29 | MP_TOKEN_KW_AND, |
| 30 | MP_TOKEN_KW_AS, |
| 31 | MP_TOKEN_KW_ASSERT, |
| 32 | MP_TOKEN_KW_BREAK, |
| 33 | MP_TOKEN_KW_CLASS, |
| 34 | MP_TOKEN_KW_CONTINUE, |
Damien George | 69a818d | 2014-01-12 13:55:24 +0000 | [diff] [blame] | 35 | MP_TOKEN_KW_DEF, // 22 |
Damien | d99b052 | 2013-12-21 18:17:45 +0000 | [diff] [blame] | 36 | MP_TOKEN_KW_DEL, |
| 37 | MP_TOKEN_KW_ELIF, |
| 38 | MP_TOKEN_KW_ELSE, |
| 39 | MP_TOKEN_KW_EXCEPT, |
| 40 | MP_TOKEN_KW_FINALLY, |
| 41 | MP_TOKEN_KW_FOR, |
| 42 | MP_TOKEN_KW_FROM, |
| 43 | MP_TOKEN_KW_GLOBAL, |
| 44 | MP_TOKEN_KW_IF, |
Damien George | 69a818d | 2014-01-12 13:55:24 +0000 | [diff] [blame] | 45 | MP_TOKEN_KW_IMPORT, // 32 |
Damien | d99b052 | 2013-12-21 18:17:45 +0000 | [diff] [blame] | 46 | MP_TOKEN_KW_IN, |
| 47 | MP_TOKEN_KW_IS, |
| 48 | MP_TOKEN_KW_LAMBDA, |
| 49 | MP_TOKEN_KW_NONLOCAL, |
| 50 | MP_TOKEN_KW_NOT, |
| 51 | MP_TOKEN_KW_OR, |
| 52 | MP_TOKEN_KW_PASS, |
| 53 | MP_TOKEN_KW_RAISE, |
| 54 | MP_TOKEN_KW_RETURN, |
Damien George | 69a818d | 2014-01-12 13:55:24 +0000 | [diff] [blame] | 55 | MP_TOKEN_KW_TRY, // 42 |
Damien | d99b052 | 2013-12-21 18:17:45 +0000 | [diff] [blame] | 56 | MP_TOKEN_KW_WHILE, |
| 57 | MP_TOKEN_KW_WITH, |
| 58 | MP_TOKEN_KW_YIELD, |
Damien | 429d719 | 2013-10-04 19:53:11 +0100 | [diff] [blame] | 59 | |
Damien George | 69a818d | 2014-01-12 13:55:24 +0000 | [diff] [blame] | 60 | MP_TOKEN_OP_PLUS, // 46 |
Damien | d99b052 | 2013-12-21 18:17:45 +0000 | [diff] [blame] | 61 | MP_TOKEN_OP_MINUS, |
| 62 | MP_TOKEN_OP_STAR, |
| 63 | MP_TOKEN_OP_DBL_STAR, |
| 64 | MP_TOKEN_OP_SLASH, |
| 65 | MP_TOKEN_OP_DBL_SLASH, |
| 66 | MP_TOKEN_OP_PERCENT, |
| 67 | MP_TOKEN_OP_LESS, |
| 68 | MP_TOKEN_OP_DBL_LESS, |
| 69 | MP_TOKEN_OP_MORE, |
Damien George | 69a818d | 2014-01-12 13:55:24 +0000 | [diff] [blame] | 70 | MP_TOKEN_OP_DBL_MORE, // 56 |
Damien | d99b052 | 2013-12-21 18:17:45 +0000 | [diff] [blame] | 71 | MP_TOKEN_OP_AMPERSAND, |
| 72 | MP_TOKEN_OP_PIPE, |
| 73 | MP_TOKEN_OP_CARET, |
| 74 | MP_TOKEN_OP_TILDE, |
| 75 | MP_TOKEN_OP_LESS_EQUAL, |
| 76 | MP_TOKEN_OP_MORE_EQUAL, |
| 77 | MP_TOKEN_OP_DBL_EQUAL, |
| 78 | MP_TOKEN_OP_NOT_EQUAL, |
Damien | 429d719 | 2013-10-04 19:53:11 +0100 | [diff] [blame] | 79 | |
Damien George | 69a818d | 2014-01-12 13:55:24 +0000 | [diff] [blame] | 80 | MP_TOKEN_DEL_PAREN_OPEN, // 65 |
Damien | d99b052 | 2013-12-21 18:17:45 +0000 | [diff] [blame] | 81 | MP_TOKEN_DEL_PAREN_CLOSE, |
| 82 | MP_TOKEN_DEL_BRACKET_OPEN, |
| 83 | MP_TOKEN_DEL_BRACKET_CLOSE, |
| 84 | MP_TOKEN_DEL_BRACE_OPEN, |
| 85 | MP_TOKEN_DEL_BRACE_CLOSE, |
| 86 | MP_TOKEN_DEL_COMMA, |
| 87 | MP_TOKEN_DEL_COLON, |
| 88 | MP_TOKEN_DEL_PERIOD, |
| 89 | MP_TOKEN_DEL_SEMICOLON, |
Damien George | 69a818d | 2014-01-12 13:55:24 +0000 | [diff] [blame] | 90 | MP_TOKEN_DEL_AT, // 75 |
Damien | d99b052 | 2013-12-21 18:17:45 +0000 | [diff] [blame] | 91 | MP_TOKEN_DEL_EQUAL, |
| 92 | MP_TOKEN_DEL_PLUS_EQUAL, |
| 93 | MP_TOKEN_DEL_MINUS_EQUAL, |
| 94 | MP_TOKEN_DEL_STAR_EQUAL, |
| 95 | MP_TOKEN_DEL_SLASH_EQUAL, |
| 96 | MP_TOKEN_DEL_DBL_SLASH_EQUAL, |
| 97 | MP_TOKEN_DEL_PERCENT_EQUAL, |
| 98 | MP_TOKEN_DEL_AMPERSAND_EQUAL, |
| 99 | MP_TOKEN_DEL_PIPE_EQUAL, |
Damien George | 69a818d | 2014-01-12 13:55:24 +0000 | [diff] [blame] | 100 | MP_TOKEN_DEL_CARET_EQUAL, // 85 |
Damien | d99b052 | 2013-12-21 18:17:45 +0000 | [diff] [blame] | 101 | MP_TOKEN_DEL_DBL_MORE_EQUAL, |
| 102 | MP_TOKEN_DEL_DBL_LESS_EQUAL, |
| 103 | MP_TOKEN_DEL_DBL_STAR_EQUAL, |
| 104 | MP_TOKEN_DEL_MINUS_MORE, |
| 105 | } mp_token_kind_t; |
Damien | 429d719 | 2013-10-04 19:53:11 +0100 | [diff] [blame] | 106 | |
Damien | d99b052 | 2013-12-21 18:17:45 +0000 | [diff] [blame] | 107 | typedef struct _mp_token_t { |
Damien | a5185f4 | 2013-10-20 14:41:27 +0100 | [diff] [blame] | 108 | uint src_line; // source line |
| 109 | uint src_column; // source column |
Damien | 429d719 | 2013-10-04 19:53:11 +0100 | [diff] [blame] | 110 | |
Damien | d99b052 | 2013-12-21 18:17:45 +0000 | [diff] [blame] | 111 | mp_token_kind_t kind; // kind of token |
Damien | a5185f4 | 2013-10-20 14:41:27 +0100 | [diff] [blame] | 112 | const char *str; // string of token (valid only while this token is current token) |
Damien | 429d719 | 2013-10-04 19:53:11 +0100 | [diff] [blame] | 113 | uint len; // (byte) length of string of token |
Damien | d99b052 | 2013-12-21 18:17:45 +0000 | [diff] [blame] | 114 | } mp_token_t; |
Damien | 429d719 | 2013-10-04 19:53:11 +0100 | [diff] [blame] | 115 | |
Damien | a5185f4 | 2013-10-20 14:41:27 +0100 | [diff] [blame] | 116 | // the next-char function must return the next character in the stream |
Damien | d99b052 | 2013-12-21 18:17:45 +0000 | [diff] [blame] | 117 | // it must return MP_LEXER_CHAR_EOF if end of stream |
| 118 | // it can be called again after returning MP_LEXER_CHAR_EOF, and in that case must return MP_LEXER_CHAR_EOF |
| 119 | #define MP_LEXER_CHAR_EOF (-1) |
| 120 | typedef unichar (*mp_lexer_stream_next_char_t)(void*); |
| 121 | typedef void (*mp_lexer_stream_close_t)(void*); |
Damien | a5185f4 | 2013-10-20 14:41:27 +0100 | [diff] [blame] | 122 | |
Damien | d99b052 | 2013-12-21 18:17:45 +0000 | [diff] [blame] | 123 | typedef struct _mp_lexer_t mp_lexer_t; |
Damien | 429d719 | 2013-10-04 19:53:11 +0100 | [diff] [blame] | 124 | |
Damien | d99b052 | 2013-12-21 18:17:45 +0000 | [diff] [blame] | 125 | void mp_token_show(const mp_token_t *tok); |
Damien | 429d719 | 2013-10-04 19:53:11 +0100 | [diff] [blame] | 126 | |
Damien George | b829b5c | 2014-01-25 13:51:19 +0000 | [diff] [blame] | 127 | mp_lexer_t *mp_lexer_new(qstr src_name, void *stream_data, mp_lexer_stream_next_char_t stream_next_char, mp_lexer_stream_close_t stream_close); |
| 128 | mp_lexer_t *mp_lexer_new_from_str_len(qstr src_name, const char *str, uint len, uint free_len); |
Damien George | 9193f89 | 2014-01-08 15:28:26 +0000 | [diff] [blame] | 129 | |
Damien | d99b052 | 2013-12-21 18:17:45 +0000 | [diff] [blame] | 130 | void mp_lexer_free(mp_lexer_t *lex); |
Damien George | 0833500 | 2014-01-18 23:24:36 +0000 | [diff] [blame] | 131 | qstr mp_lexer_source_name(mp_lexer_t *lex); |
Damien | d99b052 | 2013-12-21 18:17:45 +0000 | [diff] [blame] | 132 | void mp_lexer_to_next(mp_lexer_t *lex); |
| 133 | const mp_token_t *mp_lexer_cur(const mp_lexer_t *lex); |
| 134 | bool mp_lexer_is_kind(mp_lexer_t *lex, mp_token_kind_t kind); |
Damien George | 9528cd6 | 2014-01-15 21:23:31 +0000 | [diff] [blame] | 135 | |
| 136 | bool mp_lexer_show_error_pythonic_prefix(mp_lexer_t *lex); |
Damien | d99b052 | 2013-12-21 18:17:45 +0000 | [diff] [blame] | 137 | bool mp_lexer_show_error_pythonic(mp_lexer_t *lex, const char *msg); |
Damien George | 66028ab | 2014-01-03 14:03:48 +0000 | [diff] [blame] | 138 | |
Damien George | e09ffa1 | 2014-02-05 23:57:48 +0000 | [diff] [blame] | 139 | /******************************************************************/ |
| 140 | // platform specific import function; must be implemented for a specific port |
| 141 | // TODO tidy up, rename, or put elsewhere |
| 142 | |
| 143 | //mp_lexer_t *mp_import_open_file(qstr mod_name); |
| 144 | |
| 145 | typedef enum { |
| 146 | MP_IMPORT_STAT_NO_EXIST, |
| 147 | MP_IMPORT_STAT_DIR, |
| 148 | MP_IMPORT_STAT_FILE, |
| 149 | } mp_import_stat_t; |
| 150 | |
| 151 | mp_import_stat_t mp_import_stat(const char *path); |
| 152 | mp_lexer_t *mp_lexer_new_from_file(const char *filename); |