blob: 3472370604638302a59d969d7ec72fd350901853 [file] [log] [blame]
Damien429d7192013-10-04 19:53:11 +01001/* lexer.h -- simple tokeniser for Python implementation
2 */
3
4#ifndef INCLUDED_LEXER_H
5#define INCLUDED_LEXER_H
6
7/* uses (byte) length instead of null termination
8 * tokens are the same - UTF-8 with (byte) length
9 */
10
11typedef enum _py_token_kind_t {
12 PY_TOKEN_END, // 0
13
14 PY_TOKEN_INVALID,
Damien91d387d2013-10-09 15:09:52 +010015 PY_TOKEN_DEDENT_MISMATCH,
Damien429d7192013-10-04 19:53:11 +010016 PY_TOKEN_LONELY_STRING_OPEN,
17
Damien91d387d2013-10-09 15:09:52 +010018 PY_TOKEN_NEWLINE, // 4
19 PY_TOKEN_INDENT, // 5
20 PY_TOKEN_DEDENT, // 6
Damien429d7192013-10-04 19:53:11 +010021
Damien91d387d2013-10-09 15:09:52 +010022 PY_TOKEN_NAME, // 7
Damien429d7192013-10-04 19:53:11 +010023 PY_TOKEN_NUMBER,
24 PY_TOKEN_STRING,
25 PY_TOKEN_BYTES,
26
27 PY_TOKEN_ELLIPSES,
28
Damien91d387d2013-10-09 15:09:52 +010029 PY_TOKEN_KW_FALSE, // 12
Damien429d7192013-10-04 19:53:11 +010030 PY_TOKEN_KW_NONE,
31 PY_TOKEN_KW_TRUE,
32 PY_TOKEN_KW_AND,
33 PY_TOKEN_KW_AS,
34 PY_TOKEN_KW_ASSERT,
35 PY_TOKEN_KW_BREAK,
36 PY_TOKEN_KW_CLASS,
37 PY_TOKEN_KW_CONTINUE,
Damien91d387d2013-10-09 15:09:52 +010038 PY_TOKEN_KW_DEF, // 21
Damien429d7192013-10-04 19:53:11 +010039 PY_TOKEN_KW_DEL,
40 PY_TOKEN_KW_ELIF,
41 PY_TOKEN_KW_ELSE,
42 PY_TOKEN_KW_EXCEPT,
43 PY_TOKEN_KW_FINALLY,
44 PY_TOKEN_KW_FOR,
45 PY_TOKEN_KW_FROM,
46 PY_TOKEN_KW_GLOBAL,
47 PY_TOKEN_KW_IF,
Damien91d387d2013-10-09 15:09:52 +010048 PY_TOKEN_KW_IMPORT, // 31
Damien429d7192013-10-04 19:53:11 +010049 PY_TOKEN_KW_IN,
50 PY_TOKEN_KW_IS,
51 PY_TOKEN_KW_LAMBDA,
52 PY_TOKEN_KW_NONLOCAL,
53 PY_TOKEN_KW_NOT,
54 PY_TOKEN_KW_OR,
55 PY_TOKEN_KW_PASS,
56 PY_TOKEN_KW_RAISE,
57 PY_TOKEN_KW_RETURN,
Damien91d387d2013-10-09 15:09:52 +010058 PY_TOKEN_KW_TRY, // 41
Damien429d7192013-10-04 19:53:11 +010059 PY_TOKEN_KW_WHILE,
60 PY_TOKEN_KW_WITH,
61 PY_TOKEN_KW_YIELD,
62
Damien91d387d2013-10-09 15:09:52 +010063 PY_TOKEN_OP_PLUS, // 45
Damien429d7192013-10-04 19:53:11 +010064 PY_TOKEN_OP_MINUS,
65 PY_TOKEN_OP_STAR,
66 PY_TOKEN_OP_DBL_STAR,
67 PY_TOKEN_OP_SLASH,
68 PY_TOKEN_OP_DBL_SLASH,
69 PY_TOKEN_OP_PERCENT,
70 PY_TOKEN_OP_LESS,
71 PY_TOKEN_OP_DBL_LESS,
72 PY_TOKEN_OP_MORE,
Damien91d387d2013-10-09 15:09:52 +010073 PY_TOKEN_OP_DBL_MORE, // 55
Damien429d7192013-10-04 19:53:11 +010074 PY_TOKEN_OP_AMPERSAND,
75 PY_TOKEN_OP_PIPE,
76 PY_TOKEN_OP_CARET,
77 PY_TOKEN_OP_TILDE,
78 PY_TOKEN_OP_LESS_EQUAL,
79 PY_TOKEN_OP_MORE_EQUAL,
80 PY_TOKEN_OP_DBL_EQUAL,
81 PY_TOKEN_OP_NOT_EQUAL,
82
Damien91d387d2013-10-09 15:09:52 +010083 PY_TOKEN_DEL_PAREN_OPEN, // 64
Damien429d7192013-10-04 19:53:11 +010084 PY_TOKEN_DEL_PAREN_CLOSE,
85 PY_TOKEN_DEL_BRACKET_OPEN,
86 PY_TOKEN_DEL_BRACKET_CLOSE,
87 PY_TOKEN_DEL_BRACE_OPEN,
88 PY_TOKEN_DEL_BRACE_CLOSE,
89 PY_TOKEN_DEL_COMMA,
90 PY_TOKEN_DEL_COLON,
91 PY_TOKEN_DEL_PERIOD,
92 PY_TOKEN_DEL_SEMICOLON,
Damien91d387d2013-10-09 15:09:52 +010093 PY_TOKEN_DEL_AT, // 74
Damien429d7192013-10-04 19:53:11 +010094 PY_TOKEN_DEL_EQUAL,
95 PY_TOKEN_DEL_PLUS_EQUAL,
96 PY_TOKEN_DEL_MINUS_EQUAL,
97 PY_TOKEN_DEL_STAR_EQUAL,
98 PY_TOKEN_DEL_SLASH_EQUAL,
99 PY_TOKEN_DEL_DBL_SLASH_EQUAL,
100 PY_TOKEN_DEL_PERCENT_EQUAL,
101 PY_TOKEN_DEL_AMPERSAND_EQUAL,
102 PY_TOKEN_DEL_PIPE_EQUAL,
Damien91d387d2013-10-09 15:09:52 +0100103 PY_TOKEN_DEL_CARET_EQUAL, // 84
Damien429d7192013-10-04 19:53:11 +0100104 PY_TOKEN_DEL_DBL_MORE_EQUAL,
105 PY_TOKEN_DEL_DBL_LESS_EQUAL,
106 PY_TOKEN_DEL_DBL_STAR_EQUAL,
107 PY_TOKEN_DEL_MINUS_MORE,
108} py_token_kind_t;
109
110typedef struct _py_token_t {
Damiena5185f42013-10-20 14:41:27 +0100111 const char *src_name; // name of source
112 uint src_line; // source line
113 uint src_column; // source column
Damien429d7192013-10-04 19:53:11 +0100114
115 py_token_kind_t kind; // kind of token
Damiena5185f42013-10-20 14:41:27 +0100116 const char *str; // string of token (valid only while this token is current token)
Damien429d7192013-10-04 19:53:11 +0100117 uint len; // (byte) length of string of token
118} py_token_t;
119
Damiena5185f42013-10-20 14:41:27 +0100120// the next-char function must return the next character in the stream
121// it must return PY_LEXER_CHAR_EOF if end of stream
122// it can be called again after returning PY_LEXER_CHAR_EOF, and in that case must return PY_LEXER_CHAR_EOF
123#define PY_LEXER_CHAR_EOF (-1)
124typedef unichar (*py_lexer_stream_next_char_t)(void*);
Damienfa2162b2013-10-20 17:42:00 +0100125typedef void (*py_lexer_stream_close_t)(void*);
Damiena5185f42013-10-20 14:41:27 +0100126
Damien429d7192013-10-04 19:53:11 +0100127typedef struct _py_lexer_t py_lexer_t;
128
129void py_token_show(const py_token_t *tok);
130void py_token_show_error_prefix(const py_token_t *tok);
131bool py_token_show_error(const py_token_t *tok, const char *msg);
132
Damienfa2162b2013-10-20 17:42:00 +0100133py_lexer_t *py_lexer_new(const char *src_name, void *stream_data, py_lexer_stream_next_char_t stream_next_char, py_lexer_stream_close_t stream_close);
Damien429d7192013-10-04 19:53:11 +0100134void py_lexer_free(py_lexer_t *lex);
135void py_lexer_to_next(py_lexer_t *lex);
136const py_token_t *py_lexer_cur(const py_lexer_t *lex);
137bool py_lexer_is_kind(py_lexer_t *lex, py_token_kind_t kind);
138/* unused
139bool py_lexer_is_str(py_lexer_t *lex, const char *str);
Damien429d7192013-10-04 19:53:11 +0100140bool py_lexer_opt_kind(py_lexer_t *lex, py_token_kind_t kind);
141bool py_lexer_opt_str(py_lexer_t *lex, const char *str);
142*/
143bool py_lexer_show_error(py_lexer_t *lex, const char *msg);
Damien91d387d2013-10-09 15:09:52 +0100144bool py_lexer_show_error_pythonic(py_lexer_t *lex, const char *msg);
Damien429d7192013-10-04 19:53:11 +0100145
146#endif /* INCLUDED_LEXER_H */