Damien | 429d719 | 2013-10-04 19:53:11 +0100 | [diff] [blame] | 1 | /* lexer.h -- simple tokeniser for Python implementation |
| 2 | */ |
| 3 | |
| 4 | #ifndef INCLUDED_LEXER_H |
| 5 | #define INCLUDED_LEXER_H |
| 6 | |
| 7 | /* uses (byte) length instead of null termination |
| 8 | * tokens are the same - UTF-8 with (byte) length |
| 9 | */ |
| 10 | |
| 11 | typedef enum _py_token_kind_t { |
| 12 | PY_TOKEN_END, // 0 |
| 13 | |
| 14 | PY_TOKEN_INVALID, |
Damien | 91d387d | 2013-10-09 15:09:52 +0100 | [diff] [blame] | 15 | PY_TOKEN_DEDENT_MISMATCH, |
Damien | 429d719 | 2013-10-04 19:53:11 +0100 | [diff] [blame] | 16 | PY_TOKEN_LONELY_STRING_OPEN, |
| 17 | |
Damien | 91d387d | 2013-10-09 15:09:52 +0100 | [diff] [blame] | 18 | PY_TOKEN_NEWLINE, // 4 |
| 19 | PY_TOKEN_INDENT, // 5 |
| 20 | PY_TOKEN_DEDENT, // 6 |
Damien | 429d719 | 2013-10-04 19:53:11 +0100 | [diff] [blame] | 21 | |
Damien | 91d387d | 2013-10-09 15:09:52 +0100 | [diff] [blame] | 22 | PY_TOKEN_NAME, // 7 |
Damien | 429d719 | 2013-10-04 19:53:11 +0100 | [diff] [blame] | 23 | PY_TOKEN_NUMBER, |
| 24 | PY_TOKEN_STRING, |
| 25 | PY_TOKEN_BYTES, |
| 26 | |
| 27 | PY_TOKEN_ELLIPSES, |
| 28 | |
Damien | 91d387d | 2013-10-09 15:09:52 +0100 | [diff] [blame] | 29 | PY_TOKEN_KW_FALSE, // 12 |
Damien | 429d719 | 2013-10-04 19:53:11 +0100 | [diff] [blame] | 30 | PY_TOKEN_KW_NONE, |
| 31 | PY_TOKEN_KW_TRUE, |
| 32 | PY_TOKEN_KW_AND, |
| 33 | PY_TOKEN_KW_AS, |
| 34 | PY_TOKEN_KW_ASSERT, |
| 35 | PY_TOKEN_KW_BREAK, |
| 36 | PY_TOKEN_KW_CLASS, |
| 37 | PY_TOKEN_KW_CONTINUE, |
Damien | 91d387d | 2013-10-09 15:09:52 +0100 | [diff] [blame] | 38 | PY_TOKEN_KW_DEF, // 21 |
Damien | 429d719 | 2013-10-04 19:53:11 +0100 | [diff] [blame] | 39 | PY_TOKEN_KW_DEL, |
| 40 | PY_TOKEN_KW_ELIF, |
| 41 | PY_TOKEN_KW_ELSE, |
| 42 | PY_TOKEN_KW_EXCEPT, |
| 43 | PY_TOKEN_KW_FINALLY, |
| 44 | PY_TOKEN_KW_FOR, |
| 45 | PY_TOKEN_KW_FROM, |
| 46 | PY_TOKEN_KW_GLOBAL, |
| 47 | PY_TOKEN_KW_IF, |
Damien | 91d387d | 2013-10-09 15:09:52 +0100 | [diff] [blame] | 48 | PY_TOKEN_KW_IMPORT, // 31 |
Damien | 429d719 | 2013-10-04 19:53:11 +0100 | [diff] [blame] | 49 | PY_TOKEN_KW_IN, |
| 50 | PY_TOKEN_KW_IS, |
| 51 | PY_TOKEN_KW_LAMBDA, |
| 52 | PY_TOKEN_KW_NONLOCAL, |
| 53 | PY_TOKEN_KW_NOT, |
| 54 | PY_TOKEN_KW_OR, |
| 55 | PY_TOKEN_KW_PASS, |
| 56 | PY_TOKEN_KW_RAISE, |
| 57 | PY_TOKEN_KW_RETURN, |
Damien | 91d387d | 2013-10-09 15:09:52 +0100 | [diff] [blame] | 58 | PY_TOKEN_KW_TRY, // 41 |
Damien | 429d719 | 2013-10-04 19:53:11 +0100 | [diff] [blame] | 59 | PY_TOKEN_KW_WHILE, |
| 60 | PY_TOKEN_KW_WITH, |
| 61 | PY_TOKEN_KW_YIELD, |
| 62 | |
Damien | 91d387d | 2013-10-09 15:09:52 +0100 | [diff] [blame] | 63 | PY_TOKEN_OP_PLUS, // 45 |
Damien | 429d719 | 2013-10-04 19:53:11 +0100 | [diff] [blame] | 64 | PY_TOKEN_OP_MINUS, |
| 65 | PY_TOKEN_OP_STAR, |
| 66 | PY_TOKEN_OP_DBL_STAR, |
| 67 | PY_TOKEN_OP_SLASH, |
| 68 | PY_TOKEN_OP_DBL_SLASH, |
| 69 | PY_TOKEN_OP_PERCENT, |
| 70 | PY_TOKEN_OP_LESS, |
| 71 | PY_TOKEN_OP_DBL_LESS, |
| 72 | PY_TOKEN_OP_MORE, |
Damien | 91d387d | 2013-10-09 15:09:52 +0100 | [diff] [blame] | 73 | PY_TOKEN_OP_DBL_MORE, // 55 |
Damien | 429d719 | 2013-10-04 19:53:11 +0100 | [diff] [blame] | 74 | PY_TOKEN_OP_AMPERSAND, |
| 75 | PY_TOKEN_OP_PIPE, |
| 76 | PY_TOKEN_OP_CARET, |
| 77 | PY_TOKEN_OP_TILDE, |
| 78 | PY_TOKEN_OP_LESS_EQUAL, |
| 79 | PY_TOKEN_OP_MORE_EQUAL, |
| 80 | PY_TOKEN_OP_DBL_EQUAL, |
| 81 | PY_TOKEN_OP_NOT_EQUAL, |
| 82 | |
Damien | 91d387d | 2013-10-09 15:09:52 +0100 | [diff] [blame] | 83 | PY_TOKEN_DEL_PAREN_OPEN, // 64 |
Damien | 429d719 | 2013-10-04 19:53:11 +0100 | [diff] [blame] | 84 | PY_TOKEN_DEL_PAREN_CLOSE, |
| 85 | PY_TOKEN_DEL_BRACKET_OPEN, |
| 86 | PY_TOKEN_DEL_BRACKET_CLOSE, |
| 87 | PY_TOKEN_DEL_BRACE_OPEN, |
| 88 | PY_TOKEN_DEL_BRACE_CLOSE, |
| 89 | PY_TOKEN_DEL_COMMA, |
| 90 | PY_TOKEN_DEL_COLON, |
| 91 | PY_TOKEN_DEL_PERIOD, |
| 92 | PY_TOKEN_DEL_SEMICOLON, |
Damien | 91d387d | 2013-10-09 15:09:52 +0100 | [diff] [blame] | 93 | PY_TOKEN_DEL_AT, // 74 |
Damien | 429d719 | 2013-10-04 19:53:11 +0100 | [diff] [blame] | 94 | PY_TOKEN_DEL_EQUAL, |
| 95 | PY_TOKEN_DEL_PLUS_EQUAL, |
| 96 | PY_TOKEN_DEL_MINUS_EQUAL, |
| 97 | PY_TOKEN_DEL_STAR_EQUAL, |
| 98 | PY_TOKEN_DEL_SLASH_EQUAL, |
| 99 | PY_TOKEN_DEL_DBL_SLASH_EQUAL, |
| 100 | PY_TOKEN_DEL_PERCENT_EQUAL, |
| 101 | PY_TOKEN_DEL_AMPERSAND_EQUAL, |
| 102 | PY_TOKEN_DEL_PIPE_EQUAL, |
Damien | 91d387d | 2013-10-09 15:09:52 +0100 | [diff] [blame] | 103 | PY_TOKEN_DEL_CARET_EQUAL, // 84 |
Damien | 429d719 | 2013-10-04 19:53:11 +0100 | [diff] [blame] | 104 | PY_TOKEN_DEL_DBL_MORE_EQUAL, |
| 105 | PY_TOKEN_DEL_DBL_LESS_EQUAL, |
| 106 | PY_TOKEN_DEL_DBL_STAR_EQUAL, |
| 107 | PY_TOKEN_DEL_MINUS_MORE, |
| 108 | } py_token_kind_t; |
| 109 | |
| 110 | typedef struct _py_token_t { |
Damien | a5185f4 | 2013-10-20 14:41:27 +0100 | [diff] [blame] | 111 | const char *src_name; // name of source |
| 112 | uint src_line; // source line |
| 113 | uint src_column; // source column |
Damien | 429d719 | 2013-10-04 19:53:11 +0100 | [diff] [blame] | 114 | |
| 115 | py_token_kind_t kind; // kind of token |
Damien | a5185f4 | 2013-10-20 14:41:27 +0100 | [diff] [blame] | 116 | const char *str; // string of token (valid only while this token is current token) |
Damien | 429d719 | 2013-10-04 19:53:11 +0100 | [diff] [blame] | 117 | uint len; // (byte) length of string of token |
| 118 | } py_token_t; |
| 119 | |
Damien | a5185f4 | 2013-10-20 14:41:27 +0100 | [diff] [blame] | 120 | // the next-char function must return the next character in the stream |
| 121 | // it must return PY_LEXER_CHAR_EOF if end of stream |
| 122 | // it can be called again after returning PY_LEXER_CHAR_EOF, and in that case must return PY_LEXER_CHAR_EOF |
| 123 | #define PY_LEXER_CHAR_EOF (-1) |
| 124 | typedef unichar (*py_lexer_stream_next_char_t)(void*); |
Damien | fa2162b | 2013-10-20 17:42:00 +0100 | [diff] [blame] | 125 | typedef void (*py_lexer_stream_close_t)(void*); |
Damien | a5185f4 | 2013-10-20 14:41:27 +0100 | [diff] [blame] | 126 | |
Damien | 429d719 | 2013-10-04 19:53:11 +0100 | [diff] [blame] | 127 | typedef struct _py_lexer_t py_lexer_t; |
| 128 | |
| 129 | void py_token_show(const py_token_t *tok); |
| 130 | void py_token_show_error_prefix(const py_token_t *tok); |
| 131 | bool py_token_show_error(const py_token_t *tok, const char *msg); |
| 132 | |
Damien | fa2162b | 2013-10-20 17:42:00 +0100 | [diff] [blame] | 133 | py_lexer_t *py_lexer_new(const char *src_name, void *stream_data, py_lexer_stream_next_char_t stream_next_char, py_lexer_stream_close_t stream_close); |
Damien | 429d719 | 2013-10-04 19:53:11 +0100 | [diff] [blame] | 134 | void py_lexer_free(py_lexer_t *lex); |
| 135 | void py_lexer_to_next(py_lexer_t *lex); |
| 136 | const py_token_t *py_lexer_cur(const py_lexer_t *lex); |
| 137 | bool py_lexer_is_kind(py_lexer_t *lex, py_token_kind_t kind); |
| 138 | /* unused |
| 139 | bool py_lexer_is_str(py_lexer_t *lex, const char *str); |
Damien | 429d719 | 2013-10-04 19:53:11 +0100 | [diff] [blame] | 140 | bool py_lexer_opt_kind(py_lexer_t *lex, py_token_kind_t kind); |
| 141 | bool py_lexer_opt_str(py_lexer_t *lex, const char *str); |
| 142 | */ |
| 143 | bool py_lexer_show_error(py_lexer_t *lex, const char *msg); |
Damien | 91d387d | 2013-10-09 15:09:52 +0100 | [diff] [blame] | 144 | bool py_lexer_show_error_pythonic(py_lexer_t *lex, const char *msg); |
Damien | 429d719 | 2013-10-04 19:53:11 +0100 | [diff] [blame] | 145 | |
| 146 | #endif /* INCLUDED_LEXER_H */ |