blob: 5b22627c461bd85ae2c29df021c59465afd97996 [file] [log] [blame]
Zack Weinberg45b966d2000-03-13 22:01:08 +00001/* CPP Library - lexical analysis.
2 Copyright (C) 2000 Free Software Foundation, Inc.
3 Contributed by Per Bothner, 1994-95.
4 Based on CCCP program by Paul Rubin, June 1986
5 Adapted to ANSI C, Richard Stallman, Jan 1987
6 Broken out to separate file, Zack Weinberg, Mar 2000
Zack Weinbergc5a04732000-04-25 19:32:36 +00007 Single-pass line tokenization by Neil Booth, April 2000
Zack Weinberg45b966d2000-03-13 22:01:08 +00008
9This program is free software; you can redistribute it and/or modify it
10under the terms of the GNU General Public License as published by the
11Free Software Foundation; either version 2, or (at your option) any
12later version.
13
14This program is distributed in the hope that it will be useful,
15but WITHOUT ANY WARRANTY; without even the implied warranty of
16MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17GNU General Public License for more details.
18
19You should have received a copy of the GNU General Public License
20along with this program; if not, write to the Free Software
21Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
22
Zack Weinberg041c3192000-07-04 01:58:21 +000023/*
24
25Cleanups to do:-
26
27o Fix ISTABLE to flag the parts we want for IS_HSPACE and IS_NEWLINE.
28o Get use of digraphs in sync with the standard reqd on the command line.
29o -dM and with _cpp_dump_list: too many \n output.
30o Put a printer object in cpp_reader?
31o Check line numbers assigned to all errors.
32o Replace strncmp with memcmp almost everywhere.
33o lex_line's use of cur_token, flags and list->token_used is a bit opaque.
34o Get rid of cpp_get_directive_token.
35o Convert do_ functions to return void. Kaveh thinks its OK; and said he'll
36 give it a run when we've got some code.
37o _cpp_parse_expr updated to new lexer.
38o Distinguish integers, floats, and 'other' pp-numbers.
39o Store ints and char constants as binary values.
40o New command-line assertion syntax.
41o Merge hash table text pointer and token list text pointer for identifiers.
42o Have _cpp_parse_expr use all the information the new lexer provides.
43o Work towards functions in cpperror.c taking a message level parameter.
44 If we do this, merge the common code of do_warning and do_error.
45o Comment all functions, and describe macro expansion algorithm.
46o Move as much out of header files as possible.
47o Remove single quote pairs `', and some '', from diagnostics.
48o Correct pastability test for CPP_NAME and CPP_NUMBER.
49
50*/
51
Zack Weinberg45b966d2000-03-13 22:01:08 +000052#include "config.h"
53#include "system.h"
54#include "intl.h"
55#include "cpplib.h"
56#include "cpphash.h"
Zack Weinberg041c3192000-07-04 01:58:21 +000057#include "symcat.h"
Zack Weinberg45b966d2000-03-13 22:01:08 +000058
Zack Weinberg041c3192000-07-04 01:58:21 +000059#define auto_expand_name_space(list) \
60 _cpp_expand_name_space ((list), 1 + (list)->name_cap / 2)
Zack Weinbergf2d5f0c2000-04-14 23:29:45 +000061static void safe_fwrite PARAMS ((cpp_reader *, const U_CHAR *,
62 size_t, FILE *));
Zack Weinberg041c3192000-07-04 01:58:21 +000063static void dump_param_spelling PARAMS ((cpp_reader *, const cpp_toklist *,
Zack Weinberg1368ee72000-04-20 19:33:11 +000064 unsigned int));
Zack Weinberg041c3192000-07-04 01:58:21 +000065static void output_line_command PARAMS ((cpp_reader *, cpp_printer *,
Zack Weinberg9e62c812000-04-21 17:18:50 +000066 unsigned int));
Zack Weinbergf2d5f0c2000-04-14 23:29:45 +000067
Zack Weinberg041c3192000-07-04 01:58:21 +000068static void process_directive PARAMS ((cpp_reader *, const cpp_token *));
69static unsigned char *trigraph_replace PARAMS ((cpp_reader *, unsigned char *,
Neil Boothb8f41012000-05-07 08:49:08 +000070 unsigned char *));
71static const unsigned char *backslash_start PARAMS ((cpp_reader *,
72 const unsigned char *));
Zack Weinberg041c3192000-07-04 01:58:21 +000073static int skip_block_comment PARAMS ((cpp_reader *));
74static int skip_line_comment PARAMS ((cpp_reader *));
Neil Boothb8f41012000-05-07 08:49:08 +000075static void skip_whitespace PARAMS ((cpp_reader *, int));
Zack Weinbergbfb9dc72000-07-08 19:00:39 +000076static const U_CHAR *parse_name PARAMS ((cpp_reader *, cpp_token *,
77 const U_CHAR *, const U_CHAR *));
78static void parse_number PARAMS ((cpp_reader *, cpp_toklist *, cpp_string *));
Zack Weinberg041c3192000-07-04 01:58:21 +000079static void parse_string PARAMS ((cpp_reader *, cpp_toklist *, cpp_token *,
80 unsigned int));
Neil Boothb8f41012000-05-07 08:49:08 +000081static int trigraph_ok PARAMS ((cpp_reader *, const unsigned char *));
Neil Boothf624ffa2000-05-28 01:03:16 +000082static void save_comment PARAMS ((cpp_toklist *, cpp_token *,
83 const unsigned char *,
Neil Boothad265aa2000-05-27 23:27:36 +000084 unsigned int, unsigned int));
Zack Weinberg041c3192000-07-04 01:58:21 +000085static void lex_line PARAMS ((cpp_reader *, cpp_toklist *));
86static int lex_next PARAMS ((cpp_reader *, int));
87static int is_macro_disabled PARAMS ((cpp_reader *, const cpp_toklist *,
88 const cpp_token *));
Neil Boothb8f41012000-05-07 08:49:08 +000089
Zack Weinberg041c3192000-07-04 01:58:21 +000090static cpp_token *stringify_arg PARAMS ((cpp_reader *, const cpp_token *));
91static void expand_context_stack PARAMS ((cpp_reader *));
Neil Boothd1d9a6b2000-05-27 23:19:56 +000092static unsigned char * spell_token PARAMS ((cpp_reader *, const cpp_token *,
Zack Weinberg041c3192000-07-04 01:58:21 +000093 unsigned char *));
94static void output_token PARAMS ((cpp_reader *, const cpp_token *,
95 const cpp_token *));
Neil Boothb8f41012000-05-07 08:49:08 +000096typedef unsigned int (* speller) PARAMS ((unsigned char *, cpp_toklist *,
97 cpp_token *));
Zack Weinberg041c3192000-07-04 01:58:21 +000098static cpp_token *make_string_token PARAMS ((cpp_token *, const U_CHAR *,
99 unsigned int));
100static cpp_token *alloc_number_token PARAMS ((cpp_reader *, int number));
101static const cpp_token *special_symbol PARAMS ((cpp_reader *, cpp_hashnode *,
102 const cpp_token *));
103static cpp_token *duplicate_token PARAMS ((cpp_reader *, const cpp_token *));
104static const cpp_token *maybe_paste_with_next PARAMS ((cpp_reader *,
105 const cpp_token *));
106static enum cpp_ttype can_paste PARAMS ((cpp_reader *, const cpp_token *,
107 const cpp_token *, int *));
108static unsigned int prevent_macro_expansion PARAMS ((cpp_reader *));
109static void restore_macro_expansion PARAMS ((cpp_reader *, unsigned int));
110static cpp_token *get_temp_token PARAMS ((cpp_reader *));
111static void release_temp_tokens PARAMS ((cpp_reader *));
112static U_CHAR * quote_string PARAMS ((U_CHAR *, const U_CHAR *, unsigned int));
113static void process_directive PARAMS ((cpp_reader *, const cpp_token *));
Neil Boothb8f41012000-05-07 08:49:08 +0000114
Zack Weinbergbfb9dc72000-07-08 19:00:39 +0000115#define INIT_TOKEN_STR(list, token) \
116 do {(token)->val.str.len = 0; \
117 (token)->val.str.text = (list)->namebuf + (list)->name_used; \
Neil Boothd1d9a6b2000-05-27 23:19:56 +0000118 } while (0)
Neil Boothb8f41012000-05-07 08:49:08 +0000119
Zack Weinberg041c3192000-07-04 01:58:21 +0000120#define VALID_SIGN(c, prevc) \
121 (((c) == '+' || (c) == '-') && \
122 ((prevc) == 'e' || (prevc) == 'E' \
123 || (((prevc) == 'p' || (prevc) == 'P') && !CPP_OPTION (pfile, c89))))
124
Neil Boothb8f41012000-05-07 08:49:08 +0000125/* Maybe put these in the ISTABLE eventually. */
126#define IS_HSPACE(c) ((c) == ' ' || (c) == '\t')
127#define IS_NEWLINE(c) ((c) == '\n' || (c) == '\r')
128
129/* Handle LF, CR, CR-LF and LF-CR style newlines. Assumes next
130 character, if any, is in buffer. */
Zack Weinberg041c3192000-07-04 01:58:21 +0000131
Neil Boothb8f41012000-05-07 08:49:08 +0000132#define handle_newline(cur, limit, c) \
Zack Weinberg041c3192000-07-04 01:58:21 +0000133 do { \
Neil Boothb8f41012000-05-07 08:49:08 +0000134 if ((cur) < (limit) && *(cur) == '\r' + '\n' - c) \
135 (cur)++; \
Zack Weinberg041c3192000-07-04 01:58:21 +0000136 pfile->buffer->lineno++; \
137 pfile->buffer->line_base = (cur); \
Neil Booth6ab3e7d2000-05-18 11:09:27 +0000138 pfile->col_adjust = 0; \
Zack Weinberg041c3192000-07-04 01:58:21 +0000139 } while (0)
Neil Boothb8f41012000-05-07 08:49:08 +0000140
Zack Weinberg041c3192000-07-04 01:58:21 +0000141#define IMMED_TOKEN() (!(cur_token->flags & PREV_WHITE))
Neil Boothb8f41012000-05-07 08:49:08 +0000142#define PREV_TOKEN_TYPE (cur_token[-1].type)
143
Neil Boothb8f41012000-05-07 08:49:08 +0000144#define PUSH_TOKEN(ttype) cur_token++->type = ttype
145#define REVISE_TOKEN(ttype) cur_token[-1].type = ttype
146#define BACKUP_TOKEN(ttype) (--cur_token)->type = ttype
147#define BACKUP_DIGRAPH(ttype) do { \
148 BACKUP_TOKEN(ttype); cur_token->flags |= DIGRAPH;} while (0)
149
Neil Booth5d7ee2f2000-05-10 09:39:18 +0000150/* An upper bound on the number of bytes needed to spell a token,
151 including preceding whitespace. */
Zack Weinbergbfb9dc72000-07-08 19:00:39 +0000152#define TOKEN_SPELL(token) token_spellings[(token)->type].type
153#define TOKEN_LEN(token) (5 + (TOKEN_SPELL(token) == SPELL_STRING \
154 ? (token)->val.str.len \
155 : (TOKEN_SPELL(token) == SPELL_IDENT \
156 ? (token)->val.node->length \
157 : 0)))
Neil Boothb8f41012000-05-07 08:49:08 +0000158
Neil Boothf617b8e2000-05-14 22:42:58 +0000159#define T(e, s) {SPELL_OPERATOR, (const U_CHAR *) s},
160#define I(e, s) {SPELL_IDENT, s},
161#define S(e, s) {SPELL_STRING, s},
162#define C(e, s) {SPELL_CHAR, s},
163#define N(e, s) {SPELL_NONE, s},
164
Zack Weinberg041c3192000-07-04 01:58:21 +0000165const struct token_spelling
166token_spellings [N_TTYPES + 1] = {TTYPE_TABLE {0, 0} };
Neil Boothf617b8e2000-05-14 22:42:58 +0000167
168#undef T
169#undef I
170#undef S
171#undef C
172#undef N
173
Zack Weinberg041c3192000-07-04 01:58:21 +0000174/* For debugging: the internal names of the tokens. */
Zack Weinbergcf00a882000-07-08 02:33:00 +0000175#define T(e, s) U STRINGX(e) + 4,
176#define I(e, s) U STRINGX(e) + 4,
177#define S(e, s) U STRINGX(e) + 4,
178#define C(e, s) U STRINGX(e) + 4,
179#define N(e, s) U STRINGX(e) + 4,
Zack Weinberg041c3192000-07-04 01:58:21 +0000180
Zack Weinbergcf00a882000-07-08 02:33:00 +0000181const U_CHAR *const token_names[N_TTYPES] = { TTYPE_TABLE };
Zack Weinberg041c3192000-07-04 01:58:21 +0000182
183#undef T
184#undef I
185#undef S
186#undef C
187#undef N
188
189/* The following table is used by trigraph_ok/trigraph_replace. If we
190 have designated initializers, it can be constant data; otherwise,
191 it is set up at runtime by _cpp_init_input_buffer. */
192
193#if (GCC_VERSION >= 2007)
194#define init_trigraph_map() /* nothing */
195#define TRIGRAPH_MAP \
196__extension__ static const U_CHAR trigraph_map[UCHAR_MAX + 1] = {
197#define END };
198#define s(p, v) [p] = v,
199#else
200#define TRIGRAPH_MAP static U_CHAR trigraph_map[UCHAR_MAX + 1] = { 0 }; \
201 static void init_trigraph_map PARAMS ((void)) { \
202 unsigned char *x = trigraph_map;
203#define END }
204#define s(p, v) x[p] = v;
205#endif
206
207TRIGRAPH_MAP
208 s('=', '#') s(')', ']') s('!', '|')
209 s('(', '[') s('\'', '^') s('>', '}')
210 s('/', '\\') s('<', '{') s('-', '~')
211END
212
213#undef TRIGRAPH_MAP
214#undef END
215#undef s
216
Zack Weinberg45b966d2000-03-13 22:01:08 +0000217/* Re-allocates PFILE->token_buffer so it will hold at least N more chars. */
218
219void
220_cpp_grow_token_buffer (pfile, n)
221 cpp_reader *pfile;
222 long n;
223{
224 long old_written = CPP_WRITTEN (pfile);
225 pfile->token_buffer_size = n + 2 * pfile->token_buffer_size;
226 pfile->token_buffer = (U_CHAR *)
227 xrealloc(pfile->token_buffer, pfile->token_buffer_size);
228 CPP_SET_WRITTEN (pfile, old_written);
229}
230
Zack Weinbergf2d5f0c2000-04-14 23:29:45 +0000231/* Deal with the annoying semantics of fwrite. */
232static void
233safe_fwrite (pfile, buf, len, fp)
Zack Weinberg45b966d2000-03-13 22:01:08 +0000234 cpp_reader *pfile;
Zack Weinbergf2d5f0c2000-04-14 23:29:45 +0000235 const U_CHAR *buf;
236 size_t len;
237 FILE *fp;
Zack Weinberg45b966d2000-03-13 22:01:08 +0000238{
Zack Weinbergf2d5f0c2000-04-14 23:29:45 +0000239 size_t count;
240
241 while (len)
Zack Weinberg45b966d2000-03-13 22:01:08 +0000242 {
Zack Weinbergf2d5f0c2000-04-14 23:29:45 +0000243 count = fwrite (buf, 1, len, fp);
244 if (count == 0)
245 goto error;
246 len -= count;
247 buf += count;
Zack Weinberg45b966d2000-03-13 22:01:08 +0000248 }
Zack Weinbergf2d5f0c2000-04-14 23:29:45 +0000249 return;
250
251 error:
252 cpp_notice_from_errno (pfile, CPP_OPTION (pfile, out_fname));
Zack Weinberg45b966d2000-03-13 22:01:08 +0000253}
254
Zack Weinbergf2d5f0c2000-04-14 23:29:45 +0000255/* Notify the compiler proper that the current line number has jumped,
256 or the current file name has changed. */
257
258static void
Zack Weinberg1368ee72000-04-20 19:33:11 +0000259output_line_command (pfile, print, line)
Zack Weinbergf2d5f0c2000-04-14 23:29:45 +0000260 cpp_reader *pfile;
261 cpp_printer *print;
Zack Weinberg1368ee72000-04-20 19:33:11 +0000262 unsigned int line;
Zack Weinbergf2d5f0c2000-04-14 23:29:45 +0000263{
Zack Weinberg041c3192000-07-04 01:58:21 +0000264 cpp_buffer *ip = CPP_BUFFER (pfile);
Zack Weinbergf2d5f0c2000-04-14 23:29:45 +0000265 enum { same = 0, enter, leave, rname } change;
266 static const char * const codes[] = { "", " 1", " 2", "" };
267
Zack Weinberg041c3192000-07-04 01:58:21 +0000268 if (line == 0)
269 return;
270
271 /* End the previous line of text. */
272 if (pfile->need_newline)
273 putc ('\n', print->outf);
274 pfile->need_newline = 0;
275
Zack Weinbergf2d5f0c2000-04-14 23:29:45 +0000276 if (CPP_OPTION (pfile, no_line_commands))
277 return;
278
Zack Weinberg041c3192000-07-04 01:58:21 +0000279 /* If ip is null, we've been called from cpp_finish, and they just
280 needed the final flush and trailing newline. */
281 if (!ip)
282 return;
283
Jakub Jelinekfb753f82000-07-02 10:56:57 +0200284 if (pfile->include_depth == print->last_id)
285 {
286 /* Determine whether the current filename has changed, and if so,
287 how. 'nominal_fname' values are unique, so they can be compared
288 by comparing pointers. */
289 if (ip->nominal_fname == print->last_fname)
290 change = same;
291 else
292 change = rname;
293 }
Zack Weinbergf2d5f0c2000-04-14 23:29:45 +0000294 else
295 {
Jakub Jelinekfb753f82000-07-02 10:56:57 +0200296 if (pfile->include_depth > print->last_id)
297 change = enter;
Zack Weinbergf2d5f0c2000-04-14 23:29:45 +0000298 else
Jakub Jelinekfb753f82000-07-02 10:56:57 +0200299 change = leave;
300 print->last_id = pfile->include_depth;
Zack Weinbergf2d5f0c2000-04-14 23:29:45 +0000301 }
Jakub Jelinekfb753f82000-07-02 10:56:57 +0200302 print->last_fname = ip->nominal_fname;
303
Zack Weinbergf2d5f0c2000-04-14 23:29:45 +0000304 /* If the current file has not changed, we can output a few newlines
305 instead if we want to increase the line number by a small amount.
306 We cannot do this if print->lineno is zero, because that means we
307 haven't output any line commands yet. (The very first line
308 command output is a `same_file' command.) */
Zack Weinberg041c3192000-07-04 01:58:21 +0000309 if (change == same && print->lineno > 0
Zack Weinbergf2d5f0c2000-04-14 23:29:45 +0000310 && line >= print->lineno && line < print->lineno + 8)
311 {
312 while (line > print->lineno)
313 {
314 putc ('\n', print->outf);
315 print->lineno++;
316 }
Zack Weinbergf2d5f0c2000-04-14 23:29:45 +0000317 return;
318 }
319
320#ifndef NO_IMPLICIT_EXTERN_C
321 if (CPP_OPTION (pfile, cplusplus))
322 fprintf (print->outf, "# %u \"%s\"%s%s%s\n", line, ip->nominal_fname,
323 codes[change],
Zack Weinbergc31a6502000-06-21 18:33:51 +0000324 ip->inc->sysp ? " 3" : "",
325 (ip->inc->sysp == 2) ? " 4" : "");
Zack Weinbergf2d5f0c2000-04-14 23:29:45 +0000326 else
327#endif
328 fprintf (print->outf, "# %u \"%s\"%s%s\n", line, ip->nominal_fname,
329 codes[change],
Zack Weinbergc31a6502000-06-21 18:33:51 +0000330 ip->inc->sysp ? " 3" : "");
Zack Weinbergf2d5f0c2000-04-14 23:29:45 +0000331 print->lineno = line;
332}
333
334/* Write the contents of the token_buffer to the output stream, and
335 clear the token_buffer. Also handles generating line commands and
336 keeping track of file transitions. */
Zack Weinberg45b966d2000-03-13 22:01:08 +0000337
338void
Zack Weinberg041c3192000-07-04 01:58:21 +0000339cpp_output_tokens (pfile, print, line)
Zack Weinbergf2d5f0c2000-04-14 23:29:45 +0000340 cpp_reader *pfile;
341 cpp_printer *print;
Zack Weinberg041c3192000-07-04 01:58:21 +0000342 unsigned int line;
Zack Weinbergf2d5f0c2000-04-14 23:29:45 +0000343{
Zack Weinbergf6fab912000-04-16 02:18:52 +0000344 if (CPP_WRITTEN (pfile) - print->written)
345 {
Zack Weinbergf6fab912000-04-16 02:18:52 +0000346 safe_fwrite (pfile, pfile->token_buffer,
347 CPP_WRITTEN (pfile) - print->written, print->outf);
Zack Weinberg041c3192000-07-04 01:58:21 +0000348 pfile->need_newline = 1;
349 if (print->lineno)
350 print->lineno++;
351
352 CPP_SET_WRITTEN (pfile, print->written);
Zack Weinbergf6fab912000-04-16 02:18:52 +0000353 }
Zack Weinberg041c3192000-07-04 01:58:21 +0000354 output_line_command (pfile, print, line);
Zack Weinberg45b966d2000-03-13 22:01:08 +0000355}
356
Zack Weinbergc56c2072000-05-02 16:09:12 +0000357/* Scan until CPP_BUFFER (PFILE) is exhausted, discarding output. */
Zack Weinbergf2d5f0c2000-04-14 23:29:45 +0000358
359void
360cpp_scan_buffer_nooutput (pfile)
361 cpp_reader *pfile;
362{
Zack Weinbergf2d5f0c2000-04-14 23:29:45 +0000363 unsigned int old_written = CPP_WRITTEN (pfile);
Zack Weinberg041c3192000-07-04 01:58:21 +0000364 cpp_buffer *stop = CPP_PREV_BUFFER (CPP_BUFFER (pfile));
365
Zack Weinbergf2d5f0c2000-04-14 23:29:45 +0000366 for (;;)
367 {
Zack Weinberg041c3192000-07-04 01:58:21 +0000368 /* In no-output mode, we can ignore everything but directives. */
369 const cpp_token *token = cpp_get_token (pfile);
370 if (token->type == CPP_EOF)
371 {
372 cpp_pop_buffer (pfile);
373 if (CPP_BUFFER (pfile) == stop)
374 break;
375 }
376 _cpp_skip_rest_of_line (pfile);
Zack Weinbergf2d5f0c2000-04-14 23:29:45 +0000377 }
378 CPP_SET_WRITTEN (pfile, old_written);
379}
380
Zack Weinbergc56c2072000-05-02 16:09:12 +0000381/* Scan until CPP_BUFFER (pfile) is exhausted, writing output to PRINT. */
Zack Weinbergf2d5f0c2000-04-14 23:29:45 +0000382
383void
384cpp_scan_buffer (pfile, print)
385 cpp_reader *pfile;
386 cpp_printer *print;
387{
Zack Weinbergc56c2072000-05-02 16:09:12 +0000388 cpp_buffer *stop = CPP_PREV_BUFFER (CPP_BUFFER (pfile));
Zack Weinberg041c3192000-07-04 01:58:21 +0000389 const cpp_token *token, *prev = 0;
Zack Weinbergf2d5f0c2000-04-14 23:29:45 +0000390
391 for (;;)
392 {
393 token = cpp_get_token (pfile);
Zack Weinberg041c3192000-07-04 01:58:21 +0000394 if (token->type == CPP_EOF)
Zack Weinbergf2d5f0c2000-04-14 23:29:45 +0000395 {
Zack Weinberg041c3192000-07-04 01:58:21 +0000396 cpp_pop_buffer (pfile);
397 if (CPP_BUFFER (pfile) == stop)
Zack Weinbergf2d5f0c2000-04-14 23:29:45 +0000398 return;
Zack Weinberg041c3192000-07-04 01:58:21 +0000399 cpp_output_tokens (pfile, print, CPP_BUF_LINE (CPP_BUFFER (pfile)));
400 prev = 0;
401 continue;
Zack Weinbergf2d5f0c2000-04-14 23:29:45 +0000402 }
Zack Weinberg041c3192000-07-04 01:58:21 +0000403
404 if (token->flags & BOL)
405 {
406 cpp_output_tokens (pfile, print, pfile->token_list.line);
407 prev = 0;
408 }
409
410 output_token (pfile, token, prev);
411 prev = token;
Zack Weinbergf2d5f0c2000-04-14 23:29:45 +0000412 }
413}
414
Zack Weinberg041c3192000-07-04 01:58:21 +0000415/* Helper routine used by parse_include, which can't see spell_token.
416 Reinterpret the current line as an h-char-sequence (< ... >); we are
417 looking at the first token after the <. */
418const cpp_token *
419_cpp_glue_header_name (pfile)
Zack Weinberg45b966d2000-03-13 22:01:08 +0000420 cpp_reader *pfile;
421{
Zack Weinberg041c3192000-07-04 01:58:21 +0000422 unsigned int written = CPP_WRITTEN (pfile);
423 const cpp_token *t;
424 cpp_token *hdr;
425 U_CHAR *buf;
426 size_t len;
Zack Weinberg45b966d2000-03-13 22:01:08 +0000427
Zack Weinberg041c3192000-07-04 01:58:21 +0000428 for (;;)
429 {
430 t = cpp_get_token (pfile);
431 if (t->type == CPP_GREATER || t->type == CPP_EOF)
432 break;
433
434 CPP_RESERVE (pfile, TOKEN_LEN (t));
435 if (t->flags & PREV_WHITE)
436 CPP_PUTC_Q (pfile, ' ');
437 pfile->limit = spell_token (pfile, t, pfile->limit);
438 }
439
440 if (t->type == CPP_EOF)
441 cpp_error (pfile, "missing terminating > character");
442
443 len = CPP_WRITTEN (pfile) - written;
444 buf = xmalloc (len);
445 memcpy (buf, pfile->token_buffer + written, len);
446 CPP_SET_WRITTEN (pfile, written);
447
448 hdr = get_temp_token (pfile);
449 hdr->type = CPP_HEADER_NAME;
450 hdr->flags = 0;
Zack Weinbergbfb9dc72000-07-08 19:00:39 +0000451 hdr->val.str.text = buf;
452 hdr->val.str.len = len;
Zack Weinberg041c3192000-07-04 01:58:21 +0000453 return hdr;
Zack Weinberg45b966d2000-03-13 22:01:08 +0000454}
455
Zack Weinberg1368ee72000-04-20 19:33:11 +0000456/* Token-buffer helper functions. */
457
Neil Boothd1d9a6b2000-05-27 23:19:56 +0000458/* Expand a token list's string space. It is *vital* that
459 list->tokens_used is correct, to get pointer fix-up right. */
Zack Weinberg041c3192000-07-04 01:58:21 +0000460void
461_cpp_expand_name_space (list, len)
Zack Weinberg1368ee72000-04-20 19:33:11 +0000462 cpp_toklist *list;
Zack Weinbergc5a04732000-04-25 19:32:36 +0000463 unsigned int len;
464{
Neil Boothf617b8e2000-05-14 22:42:58 +0000465 const U_CHAR *old_namebuf;
Neil Boothf617b8e2000-05-14 22:42:58 +0000466
467 old_namebuf = list->namebuf;
Zack Weinbergc5a04732000-04-25 19:32:36 +0000468 list->name_cap += len;
469 list->namebuf = (unsigned char *) xrealloc (list->namebuf, list->name_cap);
Neil Boothf617b8e2000-05-14 22:42:58 +0000470
471 /* Fix up token text pointers. */
Neil Booth79f50f22000-05-17 19:06:20 +0000472 if (list->namebuf != old_namebuf)
Neil Boothf617b8e2000-05-14 22:42:58 +0000473 {
474 unsigned int i;
475
476 for (i = 0; i < list->tokens_used; i++)
Zack Weinbergbfb9dc72000-07-08 19:00:39 +0000477 if (token_spellings[list->tokens[i].type].type == SPELL_STRING)
478 list->tokens[i].val.str.text += (list->namebuf - old_namebuf);
Neil Boothf617b8e2000-05-14 22:42:58 +0000479 }
Zack Weinberg1368ee72000-04-20 19:33:11 +0000480}
481
Zack Weinberg041c3192000-07-04 01:58:21 +0000482/* If there is not enough room for LEN more characters, expand the
483 list by just enough to have room for LEN characters. */
484void
485_cpp_reserve_name_space (list, len)
486 cpp_toklist *list;
487 unsigned int len;
488{
489 unsigned int room = list->name_cap - list->name_used;
490
491 if (room < len)
492 _cpp_expand_name_space (list, len - room);
493}
494
Zack Weinberg1368ee72000-04-20 19:33:11 +0000495/* Expand the number of tokens in a list. */
Neil Boothd1d9a6b2000-05-27 23:19:56 +0000496void
497_cpp_expand_token_space (list, count)
Zack Weinberg1368ee72000-04-20 19:33:11 +0000498 cpp_toklist *list;
Neil Boothd1d9a6b2000-05-27 23:19:56 +0000499 unsigned int count;
Zack Weinberg1368ee72000-04-20 19:33:11 +0000500{
Neil Boothd1d9a6b2000-05-27 23:19:56 +0000501 unsigned int n;
502
503 list->tokens_cap += count;
504 n = list->tokens_cap;
Zack Weinberg15dad1d2000-05-18 15:55:46 +0000505 if (list->flags & LIST_OFFSET)
Neil Boothd1d9a6b2000-05-27 23:19:56 +0000506 list->tokens--, n++;
Zack Weinberg1368ee72000-04-20 19:33:11 +0000507 list->tokens = (cpp_token *)
Neil Boothd1d9a6b2000-05-27 23:19:56 +0000508 xrealloc (list->tokens, n * sizeof (cpp_token));
Zack Weinberg15dad1d2000-05-18 15:55:46 +0000509 if (list->flags & LIST_OFFSET)
510 list->tokens++; /* Skip the dummy. */
Zack Weinberg1368ee72000-04-20 19:33:11 +0000511}
512
Neil Boothd1d9a6b2000-05-27 23:19:56 +0000513/* Initialize a token list. If flags is DUMMY_TOKEN, we allocate
514 an extra token in front of the token list, as this allows the lexer
515 to always peek at the previous token without worrying about
516 underflowing the list, and some initial space. Otherwise, no
517 token- or name-space is allocated, and there is no dummy token. */
Zack Weinberg15dad1d2000-05-18 15:55:46 +0000518void
Neil Boothd1d9a6b2000-05-27 23:19:56 +0000519_cpp_init_toklist (list, flags)
Zack Weinberg1368ee72000-04-20 19:33:11 +0000520 cpp_toklist *list;
Neil Boothd1d9a6b2000-05-27 23:19:56 +0000521 int flags;
Zack Weinberg1368ee72000-04-20 19:33:11 +0000522{
Neil Boothd1d9a6b2000-05-27 23:19:56 +0000523 if (flags == NO_DUMMY_TOKEN)
524 {
525 list->tokens_cap = 0;
Zack Weinberg041c3192000-07-04 01:58:21 +0000526 list->tokens = 0;
Neil Boothd1d9a6b2000-05-27 23:19:56 +0000527 list->name_cap = 0;
Zack Weinberg041c3192000-07-04 01:58:21 +0000528 list->namebuf = 0;
Neil Boothd1d9a6b2000-05-27 23:19:56 +0000529 list->flags = 0;
530 }
531 else
532 {
533 /* Initialize token space. Put a dummy token before the start
534 that will fail matches. */
535 list->tokens_cap = 256; /* 4K's worth. */
536 list->tokens = (cpp_token *)
537 xmalloc ((list->tokens_cap + 1) * sizeof (cpp_token));
538 list->tokens[0].type = CPP_EOF;
539 list->tokens++;
Zack Weinberg1368ee72000-04-20 19:33:11 +0000540
Neil Boothd1d9a6b2000-05-27 23:19:56 +0000541 /* Initialize name space. */
542 list->name_cap = 1024;
Zack Weinberg041c3192000-07-04 01:58:21 +0000543 list->namebuf = (unsigned char *) xmalloc (list->name_cap);
Neil Boothd1d9a6b2000-05-27 23:19:56 +0000544 list->flags = LIST_OFFSET;
545 }
546
Zack Weinberg15dad1d2000-05-18 15:55:46 +0000547 _cpp_clear_toklist (list);
548}
549
550/* Clear a token list. */
551void
552_cpp_clear_toklist (list)
553 cpp_toklist *list;
554{
Zack Weinbergc5a04732000-04-25 19:32:36 +0000555 list->tokens_used = 0;
556 list->name_used = 0;
Zack Weinberg041c3192000-07-04 01:58:21 +0000557 list->directive = 0;
558 list->paramc = 0;
559 list->params_len = 0;
Zack Weinberg15dad1d2000-05-18 15:55:46 +0000560 list->flags &= LIST_OFFSET; /* clear all but that one */
Zack Weinberg1368ee72000-04-20 19:33:11 +0000561}
562
Zack Weinberg15dad1d2000-05-18 15:55:46 +0000563/* Free a token list. Does not free the list itself, which may be
564 embedded in a larger structure. */
Zack Weinberg1368ee72000-04-20 19:33:11 +0000565void
Zack Weinberg15dad1d2000-05-18 15:55:46 +0000566_cpp_free_toklist (list)
Zack Weinberg041c3192000-07-04 01:58:21 +0000567 const cpp_toklist *list;
Zack Weinberg15dad1d2000-05-18 15:55:46 +0000568{
Zack Weinberg15dad1d2000-05-18 15:55:46 +0000569 if (list->flags & LIST_OFFSET)
570 free (list->tokens - 1); /* Backup over dummy token. */
571 else
572 free (list->tokens);
573 free (list->namebuf);
574}
575
Zack Weinberg15dad1d2000-05-18 15:55:46 +0000576/* Compare two tokens. */
577int
578_cpp_equiv_tokens (a, b)
579 const cpp_token *a, *b;
580{
Zack Weinberg041c3192000-07-04 01:58:21 +0000581 if (a->type == b->type && a->flags == b->flags)
582 switch (token_spellings[a->type].type)
583 {
584 default: /* Keep compiler happy. */
585 case SPELL_OPERATOR:
586 return 1;
587 case SPELL_CHAR:
588 case SPELL_NONE:
589 return a->val.aux == b->val.aux; /* arg_no or character. */
590 case SPELL_IDENT:
Zack Weinbergbfb9dc72000-07-08 19:00:39 +0000591 return a->val.node == b->val.node;
Zack Weinberg041c3192000-07-04 01:58:21 +0000592 case SPELL_STRING:
Zack Weinbergbfb9dc72000-07-08 19:00:39 +0000593 return (a->val.str.len == b->val.str.len
594 && !memcmp (a->val.str.text, b->val.str.text,
595 a->val.str.len));
Zack Weinberg041c3192000-07-04 01:58:21 +0000596 }
Zack Weinberg15dad1d2000-05-18 15:55:46 +0000597
Zack Weinberg041c3192000-07-04 01:58:21 +0000598 return 0;
Zack Weinberg15dad1d2000-05-18 15:55:46 +0000599}
600
601/* Compare two token lists. */
602int
603_cpp_equiv_toklists (a, b)
604 const cpp_toklist *a, *b;
605{
606 unsigned int i;
607
Zack Weinberg041c3192000-07-04 01:58:21 +0000608 if (a->tokens_used != b->tokens_used
609 || a->flags != b->flags
610 || a->paramc != b->paramc)
Zack Weinberg15dad1d2000-05-18 15:55:46 +0000611 return 0;
612
613 for (i = 0; i < a->tokens_used; i++)
614 if (! _cpp_equiv_tokens (&a->tokens[i], &b->tokens[i]))
615 return 0;
616 return 1;
617}
618
Zack Weinberg6d2c2042000-04-30 17:30:25 +0000619/* Utility routine:
Zack Weinberg6d2c2042000-04-30 17:30:25 +0000620
Zack Weinbergbfb9dc72000-07-08 19:00:39 +0000621 Compares, the token TOKEN to the NUL-terminated string STRING.
622 TOKEN must be a CPP_NAME. Returns 1 for equal, 0 for unequal. */
Zack Weinberg6d2c2042000-04-30 17:30:25 +0000623
624int
Zack Weinbergbfb9dc72000-07-08 19:00:39 +0000625cpp_ideq (token, string)
626 const cpp_token *token;
Zack Weinberg6d2c2042000-04-30 17:30:25 +0000627 const char *string;
628{
Zack Weinbergbfb9dc72000-07-08 19:00:39 +0000629 if (token->type != CPP_NAME)
Zack Weinberg6d2c2042000-04-30 17:30:25 +0000630 return 0;
Zack Weinbergbfb9dc72000-07-08 19:00:39 +0000631
632 return !ustrcmp (token->val.node->name, (const U_CHAR *)string);
Zack Weinberg6d2c2042000-04-30 17:30:25 +0000633}
634
Neil Boothd6d5f792000-04-26 10:17:32 +0000635/* Lexing algorithm.
636
637 The original lexer in cpplib was made up of two passes: a first pass
638 that replaced trigraphs and deleted esacped newlines, and a second
639 pass that tokenized the result of the first pass. Tokenisation was
640 performed by peeking at the next character in the input stream. For
Neil Booth6777db62000-04-27 05:49:33 +0000641 example, if the input stream contained "!=", the handler for the !
Neil Boothd6d5f792000-04-26 10:17:32 +0000642 character would peek at the next character, and if it were a '='
Neil Booth6777db62000-04-27 05:49:33 +0000643 would skip over it, and return a "!=" token, otherwise it would
644 return just the "!" token.
Neil Boothd6d5f792000-04-26 10:17:32 +0000645
646 To implement a single-pass lexer, this peeking ahead is unworkable.
647 An arbitrary number of escaped newlines, and trigraphs (in particular
Neil Booth6777db62000-04-27 05:49:33 +0000648 ??/ which translates to the escape \), could separate the '!' and '='
649 in the input stream, yet the next token is still a "!=".
Neil Boothd6d5f792000-04-26 10:17:32 +0000650
651 Suppose instead that we lex by one logical line at a time, producing
Neil Booth6777db62000-04-27 05:49:33 +0000652 a token list or stack for each logical line, and when seeing the '!'
653 push a CPP_NOT token on the list. Then if the '!' is part of a
654 longer token ("!=") we know we must see the remainder of the token by
655 the time we reach the end of the logical line. Thus we can have the
656 '=' handler look at the previous token (at the end of the list / top
657 of the stack) and see if it is a "!" token, and if so, instead of
658 pushing a "=" token revise the existing token to be a "!=" token.
Neil Boothd6d5f792000-04-26 10:17:32 +0000659
660 This works in the presence of escaped newlines, because the '\' would
661 have been pushed on the top of the stack as a CPP_BACKSLASH. The
662 newline ('\n' or '\r') handler looks at the token at the top of the
663 stack to see if it is a CPP_BACKSLASH, and if so discards both.
664 Otherwise it pushes the newline (CPP_VSPACE) token as normal. Hence
665 the '=' handler would never see any intervening escaped newlines.
666
667 To make trigraphs work in this context, as in precedence trigraphs
668 are highest and converted before anything else, the '?' handler does
669 lookahead to see if it is a trigraph, and if so skips the trigraph
670 and pushes the token it represents onto the top of the stack. This
671 also works in the particular case of a CPP_BACKSLASH trigraph.
672
673 To the preprocessor, whitespace is only significant to the point of
674 knowing whether whitespace precedes a particular token. For example,
675 the '=' handler needs to know whether there was whitespace between it
Neil Booth6777db62000-04-27 05:49:33 +0000676 and a "!" token on the top of the stack, to make the token conversion
Zack Weinberg041c3192000-07-04 01:58:21 +0000677 decision correctly. So each token has a PREV_WHITE flag to
Neil Boothd6d5f792000-04-26 10:17:32 +0000678 indicate this - the standard permits consecutive whitespace to be
679 regarded as a single space. The compiler front ends are not
680 interested in whitespace at all; they just require a token stream.
681 Another place where whitespace is significant to the preprocessor is
682 a #define statment - if there is whitespace between the macro name
683 and an initial "(" token the macro is "object-like", otherwise it is
684 a function-like macro that takes arguments.
685
686 However, all is not rosy. Parsing of identifiers, numbers, comments
687 and strings becomes trickier because of the possibility of raw
688 trigraphs and escaped newlines in the input stream.
689
690 The trigraphs are three consecutive characters beginning with two
Neil Boothc2e25d52000-04-26 10:28:36 +0000691 question marks. A question mark is not valid as part of a number or
692 identifier, so parsing of a number or identifier terminates normally
693 upon reaching it, returning to the mainloop which handles the
694 trigraph just like it would in any other position. Similarly for the
695 backslash of a backslash-newline combination. So we just need the
696 escaped-newline dropper in the mainloop to check if the token on the
697 top of the stack after dropping the escaped newline is a number or
698 identifier, and if so to continue the processing it as if nothing had
699 happened.
Neil Boothd6d5f792000-04-26 10:17:32 +0000700
701 For strings, we replace trigraphs whenever we reach a quote or
702 newline, because there might be a backslash trigraph escaping them.
703 We need to be careful that we start trigraph replacing from where we
704 left off previously, because it is possible for a first scan to leave
705 "fake" trigraphs that a second scan would pick up as real (e.g. the
Neil Boothc2e25d52000-04-26 10:28:36 +0000706 sequence "????/\n=" would find a fake ??= trigraph after removing the
Neil Boothd6d5f792000-04-26 10:17:32 +0000707 escaped newline.)
708
709 For line comments, on reaching a newline we scan the previous
710 character(s) to see if it escaped, and continue if it is. Block
711 comments ignore everything and just focus on finding the comment
712 termination mark. The only difficult thing, and it is surprisingly
713 tricky, is checking if an asterisk precedes the final slash since
714 they could be separated by escaped newlines. If the preprocessor is
715 invoked with the output comments option, we don't bother removing
716 escaped newlines and replacing trigraphs for output.
717
718 Finally, numbers can begin with a period, which is pushed initially
719 as a CPP_DOT token in its own right. The digit handler checks if the
720 previous token was a CPP_DOT not separated by whitespace, and if so
721 pops it off the stack and pushes a period into the number's buffer
722 before calling the number parser.
723
724*/
725
Neil Boothb8f41012000-05-07 08:49:08 +0000726static const unsigned char *digraph_spellings [] = {U"%:", U"%:%:", U"<:",
727 U":>", U"<%", U"%>"};
Zack Weinbergc5a04732000-04-25 19:32:36 +0000728
729/* Call when a trigraph is encountered. It warns if necessary, and
730 returns true if the trigraph should be honoured. END is the third
731 character of a trigraph in the input stream. */
732static int
733trigraph_ok (pfile, end)
734 cpp_reader *pfile;
735 const unsigned char *end;
736{
737 int accept = CPP_OPTION (pfile, trigraphs);
738
739 if (CPP_OPTION (pfile, warn_trigraphs))
740 {
741 unsigned int col = end - 1 - pfile->buffer->line_base;
742 if (accept)
743 cpp_warning_with_line (pfile, pfile->buffer->lineno, col,
744 "trigraph ??%c converted to %c",
745 (int) *end, (int) trigraph_map[*end]);
746 else
747 cpp_warning_with_line (pfile, pfile->buffer->lineno, col,
748 "trigraph ??%c ignored", (int) *end);
749 }
750 return accept;
751}
752
753/* Scan a string for trigraphs, warning or replacing them inline as
754 appropriate. When parsing a string, we must call this routine
755 before processing a newline character (if trigraphs are enabled),
756 since the newline might be escaped by a preceding backslash
757 trigraph sequence. Returns a pointer to the end of the name after
758 replacement. */
759
Zack Weinberg041c3192000-07-04 01:58:21 +0000760static unsigned char *
Zack Weinbergc5a04732000-04-25 19:32:36 +0000761trigraph_replace (pfile, src, limit)
762 cpp_reader *pfile;
763 unsigned char *src;
Zack Weinberg041c3192000-07-04 01:58:21 +0000764 unsigned char *limit;
Zack Weinbergc5a04732000-04-25 19:32:36 +0000765{
766 unsigned char *dest;
767
768 /* Starting with src[1], find two consecutive '?'. The case of no
769 trigraphs is streamlined. */
770
Neil Booth043afb22000-07-04 22:26:16 +0000771 for (src++; src + 1 < limit; src += 2)
Zack Weinbergc5a04732000-04-25 19:32:36 +0000772 {
773 if (src[0] != '?')
774 continue;
775
776 /* Make src point to the 1st (NOT 2nd) of two consecutive '?'s. */
777 if (src[-1] == '?')
778 src--;
779 else if (src + 2 == limit || src[1] != '?')
780 continue;
781
782 /* Check if it really is a trigraph. */
783 if (trigraph_map[src[2]] == 0)
784 continue;
785
786 dest = src;
787 goto trigraph_found;
788 }
789 return limit;
790
791 /* Now we have a trigraph, we need to scan the remaining buffer, and
792 copy-shifting its contents left if replacement is enabled. */
793 for (; src + 2 < limit; dest++, src++)
794 if ((*dest = *src) == '?' && src[1] == '?' && trigraph_map[src[2]])
795 {
796 trigraph_found:
797 src += 2;
798 if (trigraph_ok (pfile, pfile->buffer->cur - (limit - src)))
799 *dest = trigraph_map[*src];
800 }
801
802 /* Copy remaining (at most 2) characters. */
803 while (src < limit)
804 *dest++ = *src++;
805 return dest;
806}
807
808/* If CUR is a backslash or the end of a trigraphed backslash, return
809 a pointer to its beginning, otherwise NULL. We don't read beyond
810 the buffer start, because there is the start of the comment in the
811 buffer. */
812static const unsigned char *
813backslash_start (pfile, cur)
814 cpp_reader *pfile;
815 const unsigned char *cur;
816{
817 if (cur[0] == '\\')
818 return cur;
819 if (cur[0] == '/' && cur[-1] == '?' && cur[-2] == '?'
820 && trigraph_ok (pfile, cur))
821 return cur - 2;
822 return 0;
823}
824
825/* Skip a C-style block comment. This is probably the trickiest
826 handler. We find the end of the comment by seeing if an asterisk
827 is before every '/' we encounter. The nasty complication is that a
828 previous asterisk may be separated by one or more escaped newlines.
829 Returns non-zero if comment terminated by EOF, zero otherwise. */
830static int
Zack Weinberg041c3192000-07-04 01:58:21 +0000831skip_block_comment (pfile)
Zack Weinbergc5a04732000-04-25 19:32:36 +0000832 cpp_reader *pfile;
833{
834 cpp_buffer *buffer = pfile->buffer;
835 const unsigned char *char_after_star = 0;
836 register const unsigned char *cur = buffer->cur;
837 int seen_eof = 0;
838
839 /* Inner loop would think the comment has ended if the first comment
840 character is a '/'. Avoid this and keep the inner loop clean by
841 skipping such a character. */
842 if (cur < buffer->rlimit && cur[0] == '/')
843 cur++;
844
845 for (; cur < buffer->rlimit; )
846 {
847 unsigned char c = *cur++;
848
849 /* People like decorating comments with '*', so check for
850 '/' instead for efficiency. */
851 if (c == '/')
852 {
853 if (cur[-2] == '*' || cur - 1 == char_after_star)
854 goto out;
855
856 /* Warn about potential nested comments, but not when
857 the final character inside the comment is a '/'.
858 Don't bother to get it right across escaped newlines. */
859 if (CPP_OPTION (pfile, warn_comments) && cur + 1 < buffer->rlimit
860 && cur[0] == '*' && cur[1] != '/')
861 {
862 buffer->cur = cur;
863 cpp_warning (pfile, "'/*' within comment");
864 }
865 }
866 else if (IS_NEWLINE(c))
867 {
868 const unsigned char* bslash = backslash_start (pfile, cur - 2);
869
870 handle_newline (cur, buffer->rlimit, c);
871 /* Work correctly if there is an asterisk before an
872 arbirtrarily long sequence of escaped newlines. */
873 if (bslash && (bslash[-1] == '*' || bslash == char_after_star))
874 char_after_star = cur;
875 else
876 char_after_star = 0;
877 }
878 }
879 seen_eof = 1;
880
881 out:
882 buffer->cur = cur;
883 return seen_eof;
884}
885
886/* Skip a C++ or Chill line comment. Handles escaped newlines.
887 Returns non-zero if a multiline comment. */
888static int
Zack Weinberg041c3192000-07-04 01:58:21 +0000889skip_line_comment (pfile)
Zack Weinbergc5a04732000-04-25 19:32:36 +0000890 cpp_reader *pfile;
891{
892 cpp_buffer *buffer = pfile->buffer;
893 register const unsigned char *cur = buffer->cur;
894 int multiline = 0;
895
896 for (; cur < buffer->rlimit; )
897 {
898 unsigned char c = *cur++;
899
900 if (IS_NEWLINE (c))
901 {
902 /* Check for a (trigaph?) backslash escaping the newline. */
903 if (!backslash_start (pfile, cur - 2))
904 goto out;
905 multiline = 1;
906 handle_newline (cur, buffer->rlimit, c);
907 }
908 }
909 cur++;
910
911 out:
912 buffer->cur = cur - 1; /* Leave newline for caller. */
913 return multiline;
914}
915
Neil Booth6ab3e7d2000-05-18 11:09:27 +0000916/* Skips whitespace, stopping at next non-whitespace character.
917 Adjusts pfile->col_adjust to account for tabs. This enables tokens
918 to be assigned the correct column. */
Zack Weinbergc5a04732000-04-25 19:32:36 +0000919static void
920skip_whitespace (pfile, in_directive)
921 cpp_reader *pfile;
922 int in_directive;
923{
924 cpp_buffer *buffer = pfile->buffer;
925 register const unsigned char *cur = buffer->cur;
926 unsigned short null_count = 0;
927
928 for (; cur < buffer->rlimit; )
929 {
930 unsigned char c = *cur++;
931
Neil Booth6ab3e7d2000-05-18 11:09:27 +0000932 if (c == '\t')
933 {
934 unsigned int col = CPP_BUF_COLUMN (buffer, cur - 1);
935 pfile->col_adjust += (CPP_OPTION (pfile, tabstop) - 1
936 - col % CPP_OPTION(pfile, tabstop));
937 }
Zack Weinbergc5a04732000-04-25 19:32:36 +0000938 if (IS_HSPACE(c)) /* FIXME: Fix ISTABLE. */
939 continue;
940 if (!is_space(c) || IS_NEWLINE (c)) /* Main loop handles newlines. */
941 goto out;
942 if (c == '\0')
943 null_count++;
944 /* Mut be '\f' or '\v' */
945 else if (in_directive && CPP_PEDANTIC (pfile))
946 cpp_pedwarn (pfile, "%s in preprocessing directive",
947 c == '\f' ? "formfeed" : "vertical tab");
948 }
949 cur++;
950
951 out:
952 buffer->cur = cur - 1;
953 if (null_count)
954 cpp_warning (pfile, null_count > 1 ? "embedded null characters ignored"
955 : "embedded null character ignored");
956}
957
958/* Parse (append) an identifier. */
Zack Weinbergbfb9dc72000-07-08 19:00:39 +0000959static inline const U_CHAR *
960parse_name (pfile, tok, cur, rlimit)
Zack Weinbergc5a04732000-04-25 19:32:36 +0000961 cpp_reader *pfile;
Zack Weinbergbfb9dc72000-07-08 19:00:39 +0000962 cpp_token *tok;
963 const U_CHAR *cur, *rlimit;
Zack Weinbergc5a04732000-04-25 19:32:36 +0000964{
Zack Weinbergbfb9dc72000-07-08 19:00:39 +0000965 const U_CHAR *name = cur;
966 unsigned int len;
Zack Weinbergc5a04732000-04-25 19:32:36 +0000967
Zack Weinbergbfb9dc72000-07-08 19:00:39 +0000968 while (cur < rlimit)
Zack Weinbergc5a04732000-04-25 19:32:36 +0000969 {
Zack Weinbergbfb9dc72000-07-08 19:00:39 +0000970 if (! is_idchar (*cur))
971 break;
Zack Weinberg041c3192000-07-04 01:58:21 +0000972 /* $ is not a legal identifier character in the standard, but is
973 commonly accepted as an extension. Don't warn about it in
974 skipped conditional blocks. */
Zack Weinbergbfb9dc72000-07-08 19:00:39 +0000975 if (*cur == '$' && CPP_PEDANTIC (pfile) && ! pfile->skipping)
Zack Weinbergc5a04732000-04-25 19:32:36 +0000976 {
Zack Weinbergbfb9dc72000-07-08 19:00:39 +0000977 CPP_BUFFER (pfile)->cur = cur;
Zack Weinbergc5a04732000-04-25 19:32:36 +0000978 cpp_pedwarn (pfile, "'$' character in identifier");
979 }
Zack Weinbergbfb9dc72000-07-08 19:00:39 +0000980 cur++;
Zack Weinbergc5a04732000-04-25 19:32:36 +0000981 }
Zack Weinbergbfb9dc72000-07-08 19:00:39 +0000982 len = cur - name;
Zack Weinbergc5a04732000-04-25 19:32:36 +0000983
Zack Weinbergbfb9dc72000-07-08 19:00:39 +0000984 if (tok->val.node)
Zack Weinbergc5a04732000-04-25 19:32:36 +0000985 {
Zack Weinbergbfb9dc72000-07-08 19:00:39 +0000986 unsigned int oldlen = tok->val.node->length;
987 U_CHAR *newname = alloca (oldlen + len);
988 memcpy (newname, tok->val.node->name, oldlen);
989 memcpy (newname + oldlen, name, len);
990 len += oldlen;
991 name = newname;
Zack Weinbergc5a04732000-04-25 19:32:36 +0000992 }
993
Zack Weinbergbfb9dc72000-07-08 19:00:39 +0000994 tok->val.node = cpp_lookup (pfile, name, len);
995 return cur;
Zack Weinbergc5a04732000-04-25 19:32:36 +0000996}
997
998/* Parse (append) a number. */
Zack Weinbergc5a04732000-04-25 19:32:36 +0000999static void
1000parse_number (pfile, list, name)
1001 cpp_reader *pfile;
1002 cpp_toklist *list;
Zack Weinbergbfb9dc72000-07-08 19:00:39 +00001003 cpp_string *name;
Zack Weinbergc5a04732000-04-25 19:32:36 +00001004{
1005 const unsigned char *name_limit;
1006 unsigned char *namebuf;
1007 cpp_buffer *buffer = pfile->buffer;
1008 register const unsigned char *cur = buffer->cur;
1009
1010 expanded:
1011 name_limit = list->namebuf + list->name_cap;
1012 namebuf = list->namebuf + list->name_used;
1013
1014 for (; cur < buffer->rlimit && namebuf < name_limit; )
1015 {
1016 unsigned char c = *namebuf = *cur; /* Copy a single char. */
1017
1018 /* Perhaps we should accept '$' here if we accept it for
1019 identifiers. We know namebuf[-1] is safe, because for c to
1020 be a sign we must have pushed at least one character. */
1021 if (!is_numchar (c) && c != '.' && ! VALID_SIGN (c, namebuf[-1]))
1022 goto out;
1023
1024 namebuf++;
1025 cur++;
1026 }
1027
1028 /* Run out of name space? */
1029 if (cur < buffer->rlimit)
1030 {
1031 list->name_used = namebuf - list->namebuf;
1032 auto_expand_name_space (list);
1033 goto expanded;
1034 }
1035
1036 out:
1037 buffer->cur = cur;
Neil Boothf617b8e2000-05-14 22:42:58 +00001038 name->len = namebuf - name->text;
Zack Weinbergc5a04732000-04-25 19:32:36 +00001039 list->name_used = namebuf - list->namebuf;
1040}
1041
1042/* Places a string terminated by an unescaped TERMINATOR into a
Zack Weinbergbfb9dc72000-07-08 19:00:39 +00001043 cpp_string, which should be expandable and thus at the top of the
Zack Weinbergc5a04732000-04-25 19:32:36 +00001044 list's stack. Handles embedded trigraphs, if necessary, and
1045 escaped newlines.
1046
1047 Can be used for character constants (terminator = '\''), string
Neil Booth41e8b1d2000-05-15 22:44:22 +00001048 constants ('"') and angled headers ('>'). Multi-line strings are
1049 allowed, except for within directives. */
Zack Weinbergc5a04732000-04-25 19:32:36 +00001050
1051static void
Zack Weinberg041c3192000-07-04 01:58:21 +00001052parse_string (pfile, list, token, terminator)
Zack Weinbergc5a04732000-04-25 19:32:36 +00001053 cpp_reader *pfile;
1054 cpp_toklist *list;
Zack Weinberg041c3192000-07-04 01:58:21 +00001055 cpp_token *token;
Zack Weinbergc5a04732000-04-25 19:32:36 +00001056 unsigned int terminator;
1057{
1058 cpp_buffer *buffer = pfile->buffer;
Zack Weinbergbfb9dc72000-07-08 19:00:39 +00001059 cpp_string *name = &token->val.str;
Zack Weinbergc5a04732000-04-25 19:32:36 +00001060 register const unsigned char *cur = buffer->cur;
1061 const unsigned char *name_limit;
1062 unsigned char *namebuf;
1063 unsigned int null_count = 0;
Zack Weinberg041c3192000-07-04 01:58:21 +00001064 unsigned int trigraphed = list->name_used;
Zack Weinbergc5a04732000-04-25 19:32:36 +00001065
1066 expanded:
1067 name_limit = list->namebuf + list->name_cap;
1068 namebuf = list->namebuf + list->name_used;
1069
1070 for (; cur < buffer->rlimit && namebuf < name_limit; )
1071 {
1072 unsigned int c = *namebuf++ = *cur++; /* Copy a single char. */
1073
1074 if (c == '\0')
1075 null_count++;
1076 else if (c == terminator || IS_NEWLINE (c))
1077 {
Zack Weinbergc5a04732000-04-25 19:32:36 +00001078 /* Needed for trigraph_replace and multiline string warning. */
1079 buffer->cur = cur;
1080
1081 /* Scan for trigraphs before checking if backslash-escaped. */
Zack Weinberg041c3192000-07-04 01:58:21 +00001082 if ((CPP_OPTION (pfile, trigraphs)
1083 || CPP_OPTION (pfile, warn_trigraphs))
1084 && namebuf - (list->namebuf + trigraphed) >= 3)
Zack Weinbergc5a04732000-04-25 19:32:36 +00001085 {
Zack Weinberg041c3192000-07-04 01:58:21 +00001086 namebuf = trigraph_replace (pfile, list->namebuf + trigraphed,
1087 namebuf);
1088 /* The test above guarantees trigraphed will be positive. */
1089 trigraphed = namebuf - list->namebuf - 2;
Zack Weinbergc5a04732000-04-25 19:32:36 +00001090 }
1091
1092 namebuf--; /* Drop the newline / terminator from the name. */
1093 if (IS_NEWLINE (c))
1094 {
1095 /* Drop a backslash newline, and continue. */
1096 if (namebuf[-1] == '\\')
1097 {
1098 handle_newline (cur, buffer->rlimit, c);
1099 namebuf--;
1100 continue;
1101 }
1102
1103 cur--;
1104
1105 /* In Fortran and assembly language, silently terminate
1106 strings of either variety at end of line. This is a
1107 kludge around not knowing where comments are in these
1108 languages. */
1109 if (CPP_OPTION (pfile, lang_fortran)
1110 || CPP_OPTION (pfile, lang_asm))
1111 goto out;
1112
1113 /* Character constants, headers and asserts may not
1114 extend over multiple lines. In Standard C, neither
1115 may strings. We accept multiline strings as an
Zack Weinberg041c3192000-07-04 01:58:21 +00001116 extension. (Even in directives - otherwise, glibc's
1117 longlong.h breaks.) */
1118 if (terminator != '"')
Zack Weinbergc5a04732000-04-25 19:32:36 +00001119 goto unterminated;
1120
1121 cur++; /* Move forwards again. */
1122
1123 if (pfile->multiline_string_line == 0)
1124 {
Zack Weinberg041c3192000-07-04 01:58:21 +00001125 pfile->multiline_string_line = token->line;
1126 pfile->multiline_string_column = token->col;
Zack Weinbergc5a04732000-04-25 19:32:36 +00001127 if (CPP_PEDANTIC (pfile))
1128 cpp_pedwarn (pfile, "multi-line string constant");
1129 }
1130
1131 *namebuf++ = '\n';
1132 handle_newline (cur, buffer->rlimit, c);
1133 }
1134 else
1135 {
1136 unsigned char *temp;
1137
1138 /* An odd number of consecutive backslashes represents
1139 an escaped terminator. */
1140 temp = namebuf - 1;
Neil Boothf617b8e2000-05-14 22:42:58 +00001141 while (temp >= name->text && *temp == '\\')
Zack Weinbergc5a04732000-04-25 19:32:36 +00001142 temp--;
1143
1144 if ((namebuf - temp) & 1)
1145 goto out;
1146 namebuf++;
1147 }
1148 }
1149 }
1150
1151 /* Run out of name space? */
1152 if (cur < buffer->rlimit)
1153 {
1154 list->name_used = namebuf - list->namebuf;
1155 auto_expand_name_space (list);
1156 goto expanded;
1157 }
1158
1159 /* We may not have trigraph-replaced the input for this code path,
1160 but as the input is in error by being unterminated we don't
1161 bother. Prevent warnings about no newlines at EOF. */
1162 if (IS_NEWLINE(cur[-1]))
1163 cur--;
1164
1165 unterminated:
1166 cpp_error (pfile, "missing terminating %c character", (int) terminator);
1167
1168 if (terminator == '\"' && pfile->multiline_string_line != list->line
1169 && pfile->multiline_string_line != 0)
1170 {
Zack Weinberg041c3192000-07-04 01:58:21 +00001171 cpp_error_with_line (pfile, pfile->multiline_string_line,
1172 pfile->multiline_string_column,
Zack Weinbergc5a04732000-04-25 19:32:36 +00001173 "possible start of unterminated string literal");
1174 pfile->multiline_string_line = 0;
1175 }
1176
1177 out:
1178 buffer->cur = cur;
Neil Boothf617b8e2000-05-14 22:42:58 +00001179 name->len = namebuf - name->text;
Zack Weinbergc5a04732000-04-25 19:32:36 +00001180 list->name_used = namebuf - list->namebuf;
1181
1182 if (null_count > 0)
1183 cpp_warning (pfile, (null_count > 1 ? "null characters preserved"
1184 : "null character preserved"));
1185}
1186
Neil Booth5d7ee2f2000-05-10 09:39:18 +00001187/* The character TYPE helps us distinguish comment types: '*' = C
1188 style, '-' = Chill-style and '/' = C++ style. For code simplicity,
1189 the stored comment includes the comment start and any terminator. */
1190
1191#define COMMENT_START_LEN 2
Zack Weinbergc5a04732000-04-25 19:32:36 +00001192static void
Neil Boothad265aa2000-05-27 23:27:36 +00001193save_comment (list, token, from, len, type)
Zack Weinbergc5a04732000-04-25 19:32:36 +00001194 cpp_toklist *list;
Neil Boothad265aa2000-05-27 23:27:36 +00001195 cpp_token *token;
Zack Weinbergc5a04732000-04-25 19:32:36 +00001196 const unsigned char *from;
1197 unsigned int len;
Zack Weinbergc5a04732000-04-25 19:32:36 +00001198 unsigned int type;
1199{
Neil Booth5d7ee2f2000-05-10 09:39:18 +00001200 unsigned char *buffer;
1201
1202 len += COMMENT_START_LEN;
Zack Weinbergc5a04732000-04-25 19:32:36 +00001203
Zack Weinbergc5a04732000-04-25 19:32:36 +00001204 if (list->name_used + len > list->name_cap)
Zack Weinberg041c3192000-07-04 01:58:21 +00001205 _cpp_expand_name_space (list, len);
Zack Weinbergc5a04732000-04-25 19:32:36 +00001206
Zack Weinbergbfb9dc72000-07-08 19:00:39 +00001207 INIT_TOKEN_STR (list, token);
Neil Boothad265aa2000-05-27 23:27:36 +00001208 token->type = CPP_COMMENT;
Zack Weinbergbfb9dc72000-07-08 19:00:39 +00001209 token->val.str.len = len;
Zack Weinbergc5a04732000-04-25 19:32:36 +00001210
Neil Boothd1d9a6b2000-05-27 23:19:56 +00001211 buffer = list->namebuf + list->name_used;
1212 list->name_used += len;
1213
1214 /* Copy the comment. */
Neil Booth5d7ee2f2000-05-10 09:39:18 +00001215 if (type == '*')
1216 {
1217 *buffer++ = '/';
1218 *buffer++ = '*';
1219 }
1220 else
1221 {
1222 *buffer++ = type;
1223 *buffer++ = type;
1224 }
Neil Booth5d7ee2f2000-05-10 09:39:18 +00001225 memcpy (buffer, from, len - COMMENT_START_LEN);
Zack Weinbergc5a04732000-04-25 19:32:36 +00001226}
1227
1228/*
1229 * The tokenizer's main loop. Returns a token list, representing a
Neil Boothf624ffa2000-05-28 01:03:16 +00001230 * logical line in the input file. On EOF after some tokens have
1231 * been processed, we return immediately. Then in next call, or if
1232 * EOF occurred at the beginning of a logical line, a single CPP_EOF
1233 * token is placed in the list.
Zack Weinbergc5a04732000-04-25 19:32:36 +00001234 *
1235 * Implementation relies almost entirely on lookback, rather than
1236 * looking forwards. This means that tokenization requires just
1237 * a single pass of the file, even in the presence of trigraphs and
1238 * escaped newlines, providing significant performance benefits.
1239 * Trigraph overhead is negligible if they are disabled, and low
1240 * even when enabled.
1241 */
1242
Zack Weinberg041c3192000-07-04 01:58:21 +00001243#define IS_DIRECTIVE() (list->directive != 0)
1244#define MIGHT_BE_DIRECTIVE() \
1245(cur_token == &list->tokens[first_token + 1] && cur_token[-1].type == CPP_HASH)
Neil Boothd1d9a6b2000-05-27 23:19:56 +00001246
Zack Weinberg041c3192000-07-04 01:58:21 +00001247static void
1248lex_line (pfile, list)
Zack Weinbergc5a04732000-04-25 19:32:36 +00001249 cpp_reader *pfile;
1250 cpp_toklist *list;
1251{
Zack Weinberg041c3192000-07-04 01:58:21 +00001252 cpp_token *cur_token, *token_limit, *first;
Zack Weinbergc5a04732000-04-25 19:32:36 +00001253 cpp_buffer *buffer = pfile->buffer;
Zack Weinberg041c3192000-07-04 01:58:21 +00001254 const unsigned char *cur = buffer->cur;
Zack Weinbergc5a04732000-04-25 19:32:36 +00001255 unsigned char flags = 0;
Neil Boothd1d9a6b2000-05-27 23:19:56 +00001256 unsigned int first_token = list->tokens_used;
Zack Weinbergc5a04732000-04-25 19:32:36 +00001257
Zack Weinberg041c3192000-07-04 01:58:21 +00001258 if (!(list->flags & LIST_OFFSET))
1259 (abort) ();
1260
1261 list->file = buffer->nominal_fname;
Neil Boothf624ffa2000-05-28 01:03:16 +00001262 list->line = CPP_BUF_LINE (buffer);
Neil Booth6ab3e7d2000-05-18 11:09:27 +00001263 pfile->col_adjust = 0;
Zack Weinberg041c3192000-07-04 01:58:21 +00001264 pfile->in_lex_line = 1;
1265 if (cur == buffer->buf)
1266 list->flags |= BEG_OF_FILE;
1267
Zack Weinbergc5a04732000-04-25 19:32:36 +00001268 expanded:
1269 token_limit = list->tokens + list->tokens_cap;
1270 cur_token = list->tokens + list->tokens_used;
1271
1272 for (; cur < buffer->rlimit && cur_token < token_limit;)
1273 {
Zack Weinberg041c3192000-07-04 01:58:21 +00001274 unsigned char c;
Zack Weinbergc5a04732000-04-25 19:32:36 +00001275
Neil Booth6ab3e7d2000-05-18 11:09:27 +00001276 /* Optimize whitespace skipping, as most tokens are probably
1277 separated by whitespace. (' ' '\t' '\v' '\f' '\0'). */
Zack Weinberg041c3192000-07-04 01:58:21 +00001278 c = *cur++;
1279 if (is_hspace (c))
Zack Weinbergc5a04732000-04-25 19:32:36 +00001280 {
Neil Booth6ab3e7d2000-05-18 11:09:27 +00001281 /* Step back to get the null warning and tab correction. */
1282 buffer->cur = cur - 1;
Neil Boothd1d9a6b2000-05-27 23:19:56 +00001283 skip_whitespace (pfile, IS_DIRECTIVE ());
Neil Booth6ab3e7d2000-05-18 11:09:27 +00001284 cur = buffer->cur;
1285
Zack Weinberg041c3192000-07-04 01:58:21 +00001286 flags = PREV_WHITE;
Zack Weinbergc5a04732000-04-25 19:32:36 +00001287 if (cur == buffer->rlimit)
1288 break;
1289 c = *cur++;
1290 }
1291
Zack Weinberg041c3192000-07-04 01:58:21 +00001292 /* Initialize current token. CPP_EOF will not be fixed up by
1293 expand_name_space. */
1294 list->tokens_used = cur_token - list->tokens + 1;
1295 cur_token->type = CPP_EOF;
Neil Booth6ab3e7d2000-05-18 11:09:27 +00001296 cur_token->col = CPP_BUF_COLUMN (buffer, cur);
Zack Weinberg041c3192000-07-04 01:58:21 +00001297 cur_token->line = CPP_BUF_LINE (buffer);
Zack Weinbergc5a04732000-04-25 19:32:36 +00001298 cur_token->flags = flags;
1299 flags = 0;
1300
1301 switch (c)
1302 {
1303 case '0': case '1': case '2': case '3': case '4':
1304 case '5': case '6': case '7': case '8': case '9':
Neil Boothd1d9a6b2000-05-27 23:19:56 +00001305 {
1306 int prev_dot;
1307
1308 cur--; /* Backup character. */
1309 prev_dot = PREV_TOKEN_TYPE == CPP_DOT && IMMED_TOKEN ();
1310 if (prev_dot)
Zack Weinbergc5a04732000-04-25 19:32:36 +00001311 cur_token--;
Zack Weinbergbfb9dc72000-07-08 19:00:39 +00001312 INIT_TOKEN_STR (list, cur_token);
Neil Boothd1d9a6b2000-05-27 23:19:56 +00001313 /* Prepend an immediately previous CPP_DOT token. */
1314 if (prev_dot)
1315 {
1316 if (list->name_cap == list->name_used)
1317 auto_expand_name_space (list);
Zack Weinbergc5a04732000-04-25 19:32:36 +00001318
Zack Weinbergbfb9dc72000-07-08 19:00:39 +00001319 cur_token->val.str.len = 1;
Neil Boothd1d9a6b2000-05-27 23:19:56 +00001320 list->namebuf[list->name_used++] = '.';
1321 }
Zack Weinbergc5a04732000-04-25 19:32:36 +00001322
Neil Boothd1d9a6b2000-05-27 23:19:56 +00001323 continue_number:
1324 cur_token->type = CPP_NUMBER; /* Before parse_number. */
1325 buffer->cur = cur;
Zack Weinbergbfb9dc72000-07-08 19:00:39 +00001326 parse_number (pfile, list, &cur_token->val.str);
Neil Boothd1d9a6b2000-05-27 23:19:56 +00001327 cur = buffer->cur;
Neil Boothd1d9a6b2000-05-27 23:19:56 +00001328 }
Zack Weinberg041c3192000-07-04 01:58:21 +00001329 /* Check for # 123 form of #line. */
1330 if (MIGHT_BE_DIRECTIVE ())
1331 list->directive = _cpp_check_linemarker (pfile, cur_token,
1332 !(cur_token[-1].flags
1333 & PREV_WHITE));
1334 cur_token++;
Zack Weinbergc5a04732000-04-25 19:32:36 +00001335 break;
1336
1337 letter:
1338 case '_':
1339 case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
1340 case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':
1341 case 'm': case 'n': case 'o': case 'p': case 'q': case 'r':
1342 case 's': case 't': case 'u': case 'v': case 'w': case 'x':
1343 case 'y': case 'z':
1344 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
1345 case 'G': case 'H': case 'I': case 'J': case 'K': case 'L':
1346 case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R':
1347 case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
1348 case 'Y': case 'Z':
Zack Weinbergc5a04732000-04-25 19:32:36 +00001349 cur--; /* Backup character. */
Zack Weinbergbfb9dc72000-07-08 19:00:39 +00001350 cur_token->val.node = 0;
Zack Weinbergc5a04732000-04-25 19:32:36 +00001351 cur_token->type = CPP_NAME; /* Identifier, macro etc. */
1352
1353 continue_name:
Zack Weinbergbfb9dc72000-07-08 19:00:39 +00001354 cur = parse_name (pfile, cur_token, cur, buffer->rlimit);
Zack Weinbergc5a04732000-04-25 19:32:36 +00001355
Zack Weinberg041c3192000-07-04 01:58:21 +00001356 if (MIGHT_BE_DIRECTIVE ())
1357 list->directive = _cpp_check_directive (pfile, cur_token,
1358 !(list->tokens[0].flags
1359 & PREV_WHITE));
Zack Weinbergc5a04732000-04-25 19:32:36 +00001360 cur_token++;
1361 break;
1362
1363 case '\'':
Zack Weinberg041c3192000-07-04 01:58:21 +00001364 /* Character constants are not recognized when processing Fortran,
1365 or if -traditional. */
1366 if (CPP_OPTION (pfile, lang_fortran) || CPP_TRADITIONAL (pfile))
1367 goto other;
1368
Zack Weinbergc5a04732000-04-25 19:32:36 +00001369 /* Fall through. */
1370 case '\"':
Zack Weinberg041c3192000-07-04 01:58:21 +00001371 /* Traditionally, escaped strings are not strings. */
1372 if (CPP_TRADITIONAL (pfile) && IMMED_TOKEN ()
1373 && PREV_TOKEN_TYPE == CPP_BACKSLASH)
1374 goto other;
1375
Zack Weinbergc5a04732000-04-25 19:32:36 +00001376 cur_token->type = c == '\'' ? CPP_CHAR : CPP_STRING;
1377 /* Do we have a wide string? */
1378 if (cur_token[-1].type == CPP_NAME && IMMED_TOKEN ()
Zack Weinbergbfb9dc72000-07-08 19:00:39 +00001379 && cur_token[-1].val.node == pfile->spec_nodes->n_L
Zack Weinbergc5a04732000-04-25 19:32:36 +00001380 && !CPP_TRADITIONAL (pfile))
1381 {
Zack Weinbergc5a04732000-04-25 19:32:36 +00001382 (--cur_token)->type = (c == '\'' ? CPP_WCHAR : CPP_WSTRING);
1383 }
1384
1385 do_parse_string:
Neil Booth6ab3e7d2000-05-18 11:09:27 +00001386 /* Here c is one of ' " or >. */
Zack Weinbergbfb9dc72000-07-08 19:00:39 +00001387 INIT_TOKEN_STR (list, cur_token);
Zack Weinbergc5a04732000-04-25 19:32:36 +00001388 buffer->cur = cur;
Zack Weinberg041c3192000-07-04 01:58:21 +00001389 parse_string (pfile, list, cur_token, c);
Zack Weinbergc5a04732000-04-25 19:32:36 +00001390 cur = buffer->cur;
1391 cur_token++;
1392 break;
1393
1394 case '/':
1395 cur_token->type = CPP_DIV;
1396 if (IMMED_TOKEN ())
1397 {
1398 if (PREV_TOKEN_TYPE == CPP_DIV)
1399 {
1400 /* We silently allow C++ comments in system headers,
1401 irrespective of conformance mode, because lots of
1402 broken systems do that and trying to clean it up
1403 in fixincludes is a nightmare. */
Zack Weinbergc31a6502000-06-21 18:33:51 +00001404 if (CPP_IN_SYSTEM_HEADER (pfile))
Zack Weinbergc5a04732000-04-25 19:32:36 +00001405 goto do_line_comment;
1406 else if (CPP_OPTION (pfile, cplusplus_comments))
1407 {
1408 if (CPP_OPTION (pfile, c89) && CPP_PEDANTIC (pfile)
1409 && ! buffer->warned_cplusplus_comments)
1410 {
1411 buffer->cur = cur;
1412 cpp_pedwarn (pfile,
1413 "C++ style comments are not allowed in ISO C89");
1414 cpp_pedwarn (pfile,
1415 "(this will be reported only once per input file)");
1416 buffer->warned_cplusplus_comments = 1;
1417 }
1418 do_line_comment:
1419 buffer->cur = cur;
Zack Weinberg041c3192000-07-04 01:58:21 +00001420#if 0 /* Leave until new lexer in place. */
Zack Weinbergc5a04732000-04-25 19:32:36 +00001421 if (cur[-2] != c)
1422 cpp_warning (pfile,
1423 "comment start split across lines");
Zack Weinberg041c3192000-07-04 01:58:21 +00001424#endif
1425 if (skip_line_comment (pfile))
1426 cpp_warning (pfile, "multi-line comment");
Zack Weinbergc5a04732000-04-25 19:32:36 +00001427
1428 /* Back-up to first '-' or '/'. */
Neil Boothad265aa2000-05-27 23:27:36 +00001429 cur_token--;
1430 if (!CPP_OPTION (pfile, discard_comments)
Zack Weinberg041c3192000-07-04 01:58:21 +00001431 && (!IS_DIRECTIVE()
1432 || (list->directive->flags & COMMENTS)))
Neil Boothad265aa2000-05-27 23:27:36 +00001433 save_comment (list, cur_token++, cur,
1434 buffer->cur - cur, c);
Zack Weinberg041c3192000-07-04 01:58:21 +00001435 else if (!CPP_OPTION (pfile, traditional))
1436 flags = PREV_WHITE;
Neil Boothad265aa2000-05-27 23:27:36 +00001437
Zack Weinberg041c3192000-07-04 01:58:21 +00001438 cur = buffer->cur;
Neil Boothad265aa2000-05-27 23:27:36 +00001439 break;
Zack Weinbergc5a04732000-04-25 19:32:36 +00001440 }
1441 }
1442 }
1443 cur_token++;
1444 break;
1445
1446 case '*':
1447 cur_token->type = CPP_MULT;
1448 if (IMMED_TOKEN ())
1449 {
1450 if (PREV_TOKEN_TYPE == CPP_DIV)
1451 {
1452 buffer->cur = cur;
Zack Weinberg041c3192000-07-04 01:58:21 +00001453#if 0 /* Leave until new lexer in place. */
Zack Weinbergc5a04732000-04-25 19:32:36 +00001454 if (cur[-2] != '/')
1455 cpp_warning (pfile,
1456 "comment start '/*' split across lines");
Zack Weinberg041c3192000-07-04 01:58:21 +00001457#endif
1458 if (skip_block_comment (pfile))
Zack Weinbergc5a04732000-04-25 19:32:36 +00001459 cpp_error_with_line (pfile, list->line, cur_token[-1].col,
1460 "unterminated comment");
Zack Weinberg041c3192000-07-04 01:58:21 +00001461#if 0 /* Leave until new lexer in place. */
Zack Weinbergc5a04732000-04-25 19:32:36 +00001462 else if (buffer->cur[-2] != '*')
1463 cpp_warning (pfile,
1464 "comment end '*/' split across lines");
Zack Weinberg041c3192000-07-04 01:58:21 +00001465#endif
Neil Boothad265aa2000-05-27 23:27:36 +00001466 /* Back up to opening '/'. */
1467 cur_token--;
1468 if (!CPP_OPTION (pfile, discard_comments)
Zack Weinberg041c3192000-07-04 01:58:21 +00001469 && (!IS_DIRECTIVE()
1470 || (list->directive->flags & COMMENTS)))
Neil Boothad265aa2000-05-27 23:27:36 +00001471 save_comment (list, cur_token++, cur,
1472 buffer->cur - cur, c);
Zack Weinberg041c3192000-07-04 01:58:21 +00001473 else if (!CPP_OPTION (pfile, traditional))
1474 flags = PREV_WHITE;
Zack Weinbergc5a04732000-04-25 19:32:36 +00001475
Zack Weinberg041c3192000-07-04 01:58:21 +00001476 cur = buffer->cur;
Neil Boothf617b8e2000-05-14 22:42:58 +00001477 break;
Zack Weinbergc5a04732000-04-25 19:32:36 +00001478 }
1479 else if (CPP_OPTION (pfile, cplusplus))
1480 {
1481 /* In C++, there are .* and ->* operators. */
1482 if (PREV_TOKEN_TYPE == CPP_DEREF)
1483 BACKUP_TOKEN (CPP_DEREF_STAR);
1484 else if (PREV_TOKEN_TYPE == CPP_DOT)
1485 BACKUP_TOKEN (CPP_DOT_STAR);
1486 }
1487 }
1488 cur_token++;
1489 break;
1490
1491 case '\n':
1492 case '\r':
1493 handle_newline (cur, buffer->rlimit, c);
Neil Boothfb4527c2000-05-10 09:11:06 +00001494 if (PREV_TOKEN_TYPE == CPP_BACKSLASH)
1495 {
Zack Weinberg041c3192000-07-04 01:58:21 +00001496 if (IMMED_TOKEN ())
1497 {
1498 /* Remove the escaped newline. Then continue to process
1499 any interrupted name or number. */
1500 cur_token--;
1501 /* Backslash-newline may not be immediately followed by
1502 EOF (C99 5.1.1.2). */
1503 if (cur >= buffer->rlimit)
1504 {
1505 cpp_pedwarn (pfile, "backslash-newline at end of file");
1506 break;
1507 }
1508 if (IMMED_TOKEN ())
1509 {
1510 cur_token--;
1511 if (cur_token->type == CPP_NAME)
1512 goto continue_name;
1513 else if (cur_token->type == CPP_NUMBER)
1514 goto continue_number;
1515 cur_token++;
1516 }
1517 /* Remember whitespace setting. */
1518 flags = cur_token->flags;
1519 break;
1520 }
1521 else
1522 {
1523 buffer->cur = cur;
1524 cpp_warning (pfile,
1525 "backslash and newline separated by space");
1526 }
Neil Boothfb4527c2000-05-10 09:11:06 +00001527 }
Zack Weinberg041c3192000-07-04 01:58:21 +00001528 else if (MIGHT_BE_DIRECTIVE ())
1529 {
1530 /* "Null directive." C99 6.10.7: A preprocessing
1531 directive of the form # <new-line> has no effect.
1532
1533 But it is still a directive, and therefore disappears
1534 from the output. */
1535 cur_token--;
1536 if (cur_token->flags & PREV_WHITE)
1537 {
1538 if (CPP_WTRADITIONAL (pfile))
1539 cpp_warning (pfile,
1540 "K+R C ignores #\\n with the # indented");
1541 if (CPP_TRADITIONAL (pfile))
1542 cur_token++;
1543 }
1544 }
1545
Neil Boothf624ffa2000-05-28 01:03:16 +00001546 /* Skip vertical space until we have at least one token to
1547 return. */
1548 if (cur_token != &list->tokens[first_token])
1549 goto out;
1550 list->line = CPP_BUF_LINE (buffer);
1551 break;
Zack Weinbergc5a04732000-04-25 19:32:36 +00001552
1553 case '-':
1554 if (IMMED_TOKEN () && PREV_TOKEN_TYPE == CPP_MINUS)
1555 {
1556 if (CPP_OPTION (pfile, chill))
1557 goto do_line_comment;
1558 REVISE_TOKEN (CPP_MINUS_MINUS);
1559 }
1560 else
1561 PUSH_TOKEN (CPP_MINUS);
1562 break;
1563
Zack Weinbergc5a04732000-04-25 19:32:36 +00001564 make_hash:
1565 case '#':
Zack Weinberg041c3192000-07-04 01:58:21 +00001566 /* The digraph flag checking ensures that ## and %:%:
1567 are interpreted as CPP_PASTE, but #%: and %:# are not. */
Zack Weinbergc5a04732000-04-25 19:32:36 +00001568 if (PREV_TOKEN_TYPE == CPP_HASH && IMMED_TOKEN ()
1569 && ((cur_token->flags ^ cur_token[-1].flags) & DIGRAPH) == 0)
1570 REVISE_TOKEN (CPP_PASTE);
1571 else
1572 PUSH_TOKEN (CPP_HASH);
1573 break;
1574
1575 case ':':
1576 cur_token->type = CPP_COLON;
1577 if (IMMED_TOKEN ())
1578 {
1579 if (PREV_TOKEN_TYPE == CPP_COLON
1580 && CPP_OPTION (pfile, cplusplus))
1581 BACKUP_TOKEN (CPP_SCOPE);
1582 /* Digraph: "<:" is a '[' */
1583 else if (PREV_TOKEN_TYPE == CPP_LESS)
1584 BACKUP_DIGRAPH (CPP_OPEN_SQUARE);
1585 /* Digraph: "%:" is a '#' */
1586 else if (PREV_TOKEN_TYPE == CPP_MOD)
1587 {
1588 (--cur_token)->flags |= DIGRAPH;
1589 goto make_hash;
1590 }
1591 }
1592 cur_token++;
1593 break;
1594
1595 case '&':
1596 if (IMMED_TOKEN () && PREV_TOKEN_TYPE == CPP_AND)
1597 REVISE_TOKEN (CPP_AND_AND);
1598 else
1599 PUSH_TOKEN (CPP_AND);
1600 break;
1601
1602 make_or:
1603 case '|':
1604 if (IMMED_TOKEN () && PREV_TOKEN_TYPE == CPP_OR)
1605 REVISE_TOKEN (CPP_OR_OR);
1606 else
1607 PUSH_TOKEN (CPP_OR);
1608 break;
1609
1610 case '+':
1611 if (IMMED_TOKEN () && PREV_TOKEN_TYPE == CPP_PLUS)
1612 REVISE_TOKEN (CPP_PLUS_PLUS);
1613 else
1614 PUSH_TOKEN (CPP_PLUS);
1615 break;
1616
1617 case '=':
1618 /* This relies on equidistance of "?=" and "?" tokens. */
1619 if (IMMED_TOKEN () && PREV_TOKEN_TYPE <= CPP_LAST_EQ)
1620 REVISE_TOKEN (PREV_TOKEN_TYPE + (CPP_EQ_EQ - CPP_EQ));
1621 else
1622 PUSH_TOKEN (CPP_EQ);
1623 break;
1624
1625 case '>':
1626 cur_token->type = CPP_GREATER;
1627 if (IMMED_TOKEN ())
1628 {
1629 if (PREV_TOKEN_TYPE == CPP_GREATER)
1630 BACKUP_TOKEN (CPP_RSHIFT);
1631 else if (PREV_TOKEN_TYPE == CPP_MINUS)
1632 BACKUP_TOKEN (CPP_DEREF);
1633 /* Digraph: ":>" is a ']' */
1634 else if (PREV_TOKEN_TYPE == CPP_COLON)
1635 BACKUP_DIGRAPH (CPP_CLOSE_SQUARE);
1636 /* Digraph: "%>" is a '}' */
1637 else if (PREV_TOKEN_TYPE == CPP_MOD)
1638 BACKUP_DIGRAPH (CPP_CLOSE_BRACE);
1639 }
1640 cur_token++;
1641 break;
1642
1643 case '<':
1644 if (IMMED_TOKEN () && PREV_TOKEN_TYPE == CPP_LESS)
1645 {
1646 REVISE_TOKEN (CPP_LSHIFT);
1647 break;
1648 }
1649 /* Is this the beginning of a header name? */
Zack Weinberg041c3192000-07-04 01:58:21 +00001650 if (IS_DIRECTIVE () && (list->directive->flags & INCL))
Zack Weinbergc5a04732000-04-25 19:32:36 +00001651 {
1652 c = '>'; /* Terminator. */
1653 cur_token->type = CPP_HEADER_NAME;
1654 goto do_parse_string;
1655 }
1656 PUSH_TOKEN (CPP_LESS);
1657 break;
1658
1659 case '%':
1660 /* Digraph: "<%" is a '{' */
1661 cur_token->type = CPP_MOD;
1662 if (IMMED_TOKEN () && PREV_TOKEN_TYPE == CPP_LESS)
1663 BACKUP_DIGRAPH (CPP_OPEN_BRACE);
1664 cur_token++;
1665 break;
1666
Zack Weinbergc5a04732000-04-25 19:32:36 +00001667 case '?':
1668 if (cur + 1 < buffer->rlimit && *cur == '?'
1669 && trigraph_map[cur[1]] && trigraph_ok (pfile, cur + 1))
1670 {
1671 /* Handle trigraph. */
1672 cur++;
1673 switch (*cur++)
1674 {
1675 case '(': goto make_open_square;
1676 case ')': goto make_close_square;
1677 case '<': goto make_open_brace;
1678 case '>': goto make_close_brace;
1679 case '=': goto make_hash;
1680 case '!': goto make_or;
1681 case '-': goto make_complement;
1682 case '/': goto make_backslash;
1683 case '\'': goto make_xor;
1684 }
1685 }
1686 if (IMMED_TOKEN () && CPP_OPTION (pfile, cplusplus))
1687 {
1688 /* GNU C++ defines <? and >? operators. */
1689 if (PREV_TOKEN_TYPE == CPP_LESS)
1690 {
1691 REVISE_TOKEN (CPP_MIN);
1692 break;
1693 }
1694 else if (PREV_TOKEN_TYPE == CPP_GREATER)
1695 {
1696 REVISE_TOKEN (CPP_MAX);
1697 break;
1698 }
1699 }
1700 PUSH_TOKEN (CPP_QUERY);
1701 break;
1702
1703 case '.':
1704 if (PREV_TOKEN_TYPE == CPP_DOT && cur_token[-2].type == CPP_DOT
1705 && IMMED_TOKEN ()
Zack Weinberg041c3192000-07-04 01:58:21 +00001706 && !(cur_token[-1].flags & PREV_WHITE))
Zack Weinbergc5a04732000-04-25 19:32:36 +00001707 {
1708 cur_token -= 2;
1709 PUSH_TOKEN (CPP_ELLIPSIS);
1710 }
1711 else
1712 PUSH_TOKEN (CPP_DOT);
1713 break;
1714
Neil Boothcfd5b8b82000-04-27 00:58:50 +00001715 make_complement:
1716 case '~': PUSH_TOKEN (CPP_COMPL); break;
Zack Weinbergc5a04732000-04-25 19:32:36 +00001717 make_xor:
1718 case '^': PUSH_TOKEN (CPP_XOR); break;
1719 make_open_brace:
1720 case '{': PUSH_TOKEN (CPP_OPEN_BRACE); break;
1721 make_close_brace:
1722 case '}': PUSH_TOKEN (CPP_CLOSE_BRACE); break;
1723 make_open_square:
1724 case '[': PUSH_TOKEN (CPP_OPEN_SQUARE); break;
1725 make_close_square:
1726 case ']': PUSH_TOKEN (CPP_CLOSE_SQUARE); break;
1727 make_backslash:
1728 case '\\': PUSH_TOKEN (CPP_BACKSLASH); break;
1729 case '!': PUSH_TOKEN (CPP_NOT); break;
1730 case ',': PUSH_TOKEN (CPP_COMMA); break;
1731 case ';': PUSH_TOKEN (CPP_SEMICOLON); break;
Neil Booth41e8b1d2000-05-15 22:44:22 +00001732 case '(': PUSH_TOKEN (CPP_OPEN_PAREN); break;
Neil Boothcfd5b8b82000-04-27 00:58:50 +00001733 case ')': PUSH_TOKEN (CPP_CLOSE_PAREN); break;
Zack Weinbergc5a04732000-04-25 19:32:36 +00001734
1735 case '$':
1736 if (CPP_OPTION (pfile, dollars_in_ident))
1737 goto letter;
1738 /* Fall through */
Zack Weinberg041c3192000-07-04 01:58:21 +00001739 other:
Zack Weinbergc5a04732000-04-25 19:32:36 +00001740 default:
Zack Weinberg041c3192000-07-04 01:58:21 +00001741 cur_token->val.aux = c;
Zack Weinbergc5a04732000-04-25 19:32:36 +00001742 PUSH_TOKEN (CPP_OTHER);
1743 break;
1744 }
1745 }
1746
1747 /* Run out of token space? */
1748 if (cur_token == token_limit)
1749 {
1750 list->tokens_used = cur_token - list->tokens;
Neil Boothd1d9a6b2000-05-27 23:19:56 +00001751 _cpp_expand_token_space (list, 256);
Zack Weinbergc5a04732000-04-25 19:32:36 +00001752 goto expanded;
1753 }
1754
Zack Weinbergc5a04732000-04-25 19:32:36 +00001755 cur_token->flags = flags;
Zack Weinberg041c3192000-07-04 01:58:21 +00001756 if (cur_token == &list->tokens[first_token] && pfile->done_initializing)
Zack Weinbergc5a04732000-04-25 19:32:36 +00001757 {
Neil Boothf624ffa2000-05-28 01:03:16 +00001758 if (cur > buffer->buf && !IS_NEWLINE (cur[-1]))
Zack Weinberg041c3192000-07-04 01:58:21 +00001759 cpp_pedwarn_with_line (pfile, CPP_BUF_LINE (buffer),
1760 CPP_BUF_COLUMN (buffer, cur),
1761 "no newline at end of file");
Neil Boothf624ffa2000-05-28 01:03:16 +00001762 cur_token++->type = CPP_EOF;
Zack Weinbergc5a04732000-04-25 19:32:36 +00001763 }
1764
1765 out:
Zack Weinberg041c3192000-07-04 01:58:21 +00001766 /* All tokens are allocated, so the memory location is fixed. */
1767 first = &list->tokens[first_token];
1768
1769 /* Don't complain about the null directive, nor directives in
1770 assembly source: we don't know where the comments are, and # may
1771 introduce assembler pseudo-ops. Don't complain about invalid
1772 directives in skipped conditional groups (6.10 p4). */
1773 if (first->type == CPP_HASH && list->directive == 0 && !pfile->skipping
1774 && cur_token > first + 1 && !CPP_OPTION (pfile, lang_asm))
1775 {
1776 if (first[1].type == CPP_NAME)
1777 cpp_error (pfile, "invalid preprocessing directive #%.*s",
Zack Weinbergbfb9dc72000-07-08 19:00:39 +00001778 (int) first[1].val.node->length, first[1].val.node->name);
Zack Weinberg041c3192000-07-04 01:58:21 +00001779 else
1780 cpp_error (pfile, "invalid preprocessing directive");
1781 }
1782
1783 /* Put EOF at end of directives. This covers "directives do not
1784 extend beyond the end of the line (description 6.10 part 2)". */
1785 if (IS_DIRECTIVE () || !pfile->done_initializing)
1786 {
1787 pfile->first_directive_token = first;
1788 cur_token++->type = CPP_EOF;
1789 }
1790
1791 if (first_token == 0 || IS_DIRECTIVE ())
1792 /* Set beginning of line flag. */
1793 first->flags |= BOL;
1794 else
1795 /* 6.10.3.10: Within the sequence of preprocessing tokens making
1796 up the invocation of a function-like macro, new line is
1797 considered a normal white-space character. */
1798 first->flags |= PREV_WHITE;
1799
Zack Weinbergc5a04732000-04-25 19:32:36 +00001800 buffer->cur = cur;
Zack Weinbergc5a04732000-04-25 19:32:36 +00001801 list->tokens_used = cur_token - list->tokens;
Zack Weinberg041c3192000-07-04 01:58:21 +00001802 pfile->in_lex_line = 0;
1803}
1804
1805/* Write the spelling of a token TOKEN, with any appropriate
1806 whitespace before it, to the token_buffer. PREV is the previous
1807 token, which is used to determine if we need to shove in an extra
1808 space in order to avoid accidental token paste. */
1809static void
1810output_token (pfile, token, prev)
1811 cpp_reader *pfile;
1812 const cpp_token *token, *prev;
1813{
1814 int dummy;
1815
1816 if (token->col && (token->flags & BOL))
1817 {
1818 /* Supply enough whitespace to put this token in its original
1819 column. Don't bother trying to reconstruct tabs; we can't
1820 get it right in general, and nothing ought to care. (Yes,
1821 some things do care; the fault lies with them.) */
1822 unsigned char *buffer;
1823 unsigned int spaces = token->col - 1;
1824
1825 CPP_RESERVE (pfile, token->col);
1826 buffer = pfile->limit;
1827
1828 while (spaces--)
1829 *buffer++ = ' ';
1830 pfile->limit = buffer;
1831 }
1832 else if (token->flags & PREV_WHITE)
1833 CPP_PUTC (pfile, ' ');
1834 /* Check for and prevent accidental token pasting, in ANSI mode. */
1835
1836 else if (!CPP_TRADITIONAL (pfile) && prev)
1837 {
1838 if (can_paste (pfile, prev, token, &dummy) != CPP_EOF)
1839 CPP_PUTC (pfile, ' ');
1840 /* can_paste catches most of the accidental paste cases, but not all.
1841 Consider a + ++b - if there is not a space between the + and ++, it
1842 will be misparsed as a++ + b. */
1843 else if ((prev->type == CPP_PLUS && token->type == CPP_PLUS_PLUS)
1844 || (prev->type == CPP_MINUS && token->type == CPP_MINUS_MINUS))
1845 CPP_PUTC (pfile, ' ');
1846 }
1847
1848 CPP_RESERVE (pfile, TOKEN_LEN (token));
1849 pfile->limit = spell_token (pfile, token, pfile->limit);
Zack Weinbergc5a04732000-04-25 19:32:36 +00001850}
1851
Neil Booth3fef5b22000-05-08 22:22:49 +00001852/* Write the spelling of a token TOKEN to BUFFER. The buffer must
Zack Weinbergcf00a882000-07-08 02:33:00 +00001853 already contain the enough space to hold the token's spelling.
1854 Returns a pointer to the character after the last character
1855 written. */
Neil Booth3fef5b22000-05-08 22:22:49 +00001856
1857static unsigned char *
Zack Weinberg041c3192000-07-04 01:58:21 +00001858spell_token (pfile, token, buffer)
Neil Booth3fef5b22000-05-08 22:22:49 +00001859 cpp_reader *pfile; /* Would be nice to be rid of this... */
Neil Boothd1d9a6b2000-05-27 23:19:56 +00001860 const cpp_token *token;
Neil Booth3fef5b22000-05-08 22:22:49 +00001861 unsigned char *buffer;
Neil Booth3fef5b22000-05-08 22:22:49 +00001862{
Neil Booth3fef5b22000-05-08 22:22:49 +00001863 switch (token_spellings[token->type].type)
1864 {
Neil Booth5d7ee2f2000-05-10 09:39:18 +00001865 case SPELL_OPERATOR:
Neil Booth3fef5b22000-05-08 22:22:49 +00001866 {
1867 const unsigned char *spelling;
1868 unsigned char c;
1869
1870 if (token->flags & DIGRAPH)
1871 spelling = digraph_spellings[token->type - CPP_FIRST_DIGRAPH];
1872 else
Neil Boothf617b8e2000-05-14 22:42:58 +00001873 spelling = token_spellings[token->type].spelling;
Neil Booth3fef5b22000-05-08 22:22:49 +00001874
1875 while ((c = *spelling++) != '\0')
1876 *buffer++ = c;
1877 }
1878 break;
1879
Neil Booth5d7ee2f2000-05-10 09:39:18 +00001880 case SPELL_IDENT:
Zack Weinbergbfb9dc72000-07-08 19:00:39 +00001881 memcpy (buffer, token->val.node->name, token->val.node->length);
1882 buffer += token->val.node->length;
Neil Booth5d7ee2f2000-05-10 09:39:18 +00001883 break;
Neil Booth3fef5b22000-05-08 22:22:49 +00001884
Neil Booth5d7ee2f2000-05-10 09:39:18 +00001885 case SPELL_STRING:
1886 {
Neil Booth5d7ee2f2000-05-10 09:39:18 +00001887 if (token->type == CPP_WSTRING || token->type == CPP_WCHAR)
1888 *buffer++ = 'L';
Zack Weinbergbfb9dc72000-07-08 19:00:39 +00001889
Neil Booth5d7ee2f2000-05-10 09:39:18 +00001890 if (token->type == CPP_STRING || token->type == CPP_WSTRING)
Zack Weinbergbfb9dc72000-07-08 19:00:39 +00001891 *buffer++ = '"';
1892 if (token->type == CPP_CHAR || token->type == CPP_WCHAR)
1893 *buffer++ = '\'';
1894
1895 memcpy (buffer, token->val.str.text, token->val.str.len);
1896 buffer += token->val.str.len;
1897
1898 if (token->type == CPP_STRING || token->type == CPP_WSTRING)
1899 *buffer++ = '"';
1900 if (token->type == CPP_CHAR || token->type == CPP_WCHAR)
1901 *buffer++ = '\'';
Neil Booth3fef5b22000-05-08 22:22:49 +00001902 }
1903 break;
1904
1905 case SPELL_CHAR:
Zack Weinberg041c3192000-07-04 01:58:21 +00001906 *buffer++ = token->val.aux;
Neil Booth3fef5b22000-05-08 22:22:49 +00001907 break;
1908
1909 case SPELL_NONE:
Zack Weinberg041c3192000-07-04 01:58:21 +00001910 cpp_ice (pfile, "Unspellable token %s", token_names[token->type]);
Neil Booth3fef5b22000-05-08 22:22:49 +00001911 break;
1912 }
1913
1914 return buffer;
1915}
1916
Zack Weinbergcf00a882000-07-08 02:33:00 +00001917/* Return the spelling of a token known to be an operator.
1918 Does not distinguish digraphs from their counterparts. */
1919const unsigned char *
1920_cpp_spell_operator (type)
1921 enum cpp_ttype type;
1922{
1923 if (token_spellings[type].type == SPELL_OPERATOR)
1924 return token_spellings[type].spelling;
1925 else
1926 return token_names[type];
1927}
1928
1929
Zack Weinberg041c3192000-07-04 01:58:21 +00001930/* Macro expansion algorithm. TODO. */
1931
Kaveh R. Ghazi7de9cc32000-07-07 14:29:03 +00001932static const cpp_token placemarker_token = {0, 0, CPP_PLACEMARKER, 0 UNION_INIT_ZERO};
1933static const cpp_token eof_token = {0, 0, CPP_EOF, 0 UNION_INIT_ZERO};
Zack Weinberg041c3192000-07-04 01:58:21 +00001934
1935#define IS_ARG_CONTEXT(c) ((c)->flags & CONTEXT_ARG)
1936#define CURRENT_CONTEXT(pfile) ((pfile)->contexts + (pfile)->cur_context)
1937
1938/* Flags for cpp_context. */
1939#define CONTEXT_PASTEL (1 << 0) /* An argument context on LHS of ##. */
1940#define CONTEXT_PASTER (1 << 1) /* An argument context on RHS of ##. */
1941#define CONTEXT_RAW (1 << 2) /* If argument tokens already expanded. */
1942#define CONTEXT_ARG (1 << 3) /* If an argument context. */
1943
1944#define ASSIGN_FLAGS_AND_POS(d, s) \
1945 do {(d)->flags = (s)->flags & (PREV_WHITE | BOL | PASTE_LEFT); \
1946 if ((d)->flags & BOL) {(d)->col = (s)->col; (d)->line = (s)->line;} \
1947 } while (0)
1948
1949/* f is flags, just consisting of PREV_WHITE | BOL. */
1950#define MODIFY_FLAGS_AND_POS(d, s, f) \
1951 do {(d)->flags &= ~(PREV_WHITE | BOL); (d)->flags |= (f); \
1952 if ((f) & BOL) {(d)->col = (s)->col; (d)->line = (s)->line;} \
1953 } while (0)
1954
1955typedef struct cpp_context cpp_context;
1956struct cpp_context
Zack Weinbergc5a04732000-04-25 19:32:36 +00001957{
Zack Weinberg041c3192000-07-04 01:58:21 +00001958 union
1959 {
1960 const cpp_toklist *list; /* Used for macro contexts only. */
1961 const cpp_token **arg; /* Used for arg contexts only. */
1962 } u;
Zack Weinbergc5a04732000-04-25 19:32:36 +00001963
Zack Weinberg041c3192000-07-04 01:58:21 +00001964 /* Pushed token to be returned by next call to cpp_get_token. */
1965 const cpp_token *pushed_token;
Zack Weinbergc5a04732000-04-25 19:32:36 +00001966
Zack Weinberg041c3192000-07-04 01:58:21 +00001967 struct macro_args *args; /* 0 for arguments and object-like macros. */
1968 unsigned short posn; /* Current posn, index into u. */
1969 unsigned short count; /* No. of tokens in u. */
1970 unsigned short level;
1971 unsigned char flags;
1972};
1973
1974typedef struct macro_args macro_args;
1975struct macro_args
1976{
1977 unsigned int *ends;
1978 const cpp_token **tokens;
1979 unsigned int capacity;
1980 unsigned int used;
1981 unsigned short level;
1982};
1983
1984static const cpp_token *get_raw_token PARAMS ((cpp_reader *));
1985static const cpp_token *parse_arg PARAMS ((cpp_reader *, int, unsigned int,
1986 macro_args *, unsigned int *));
1987static int parse_args PARAMS ((cpp_reader *, cpp_hashnode *, macro_args *));
1988static void save_token PARAMS ((macro_args *, const cpp_token *));
1989static const cpp_token *push_arg_context PARAMS ((cpp_reader *,
1990 const cpp_token *));
1991static int do_pop_context PARAMS ((cpp_reader *));
1992static const cpp_token *pop_context PARAMS ((cpp_reader *));
1993static const cpp_token *push_macro_context PARAMS ((cpp_reader *,
1994 cpp_hashnode *,
1995 const cpp_token *));
1996static void free_macro_args PARAMS ((macro_args *));
1997
1998/* Free the storage allocated for macro arguments. */
1999static void
2000free_macro_args (args)
2001 macro_args *args;
2002{
2003 if (args->tokens)
2004 free (args->tokens);
2005 free (args->ends);
2006 free (args);
2007}
2008
2009/* Determines if a macro has been already used (and is therefore
2010 disabled). */
2011static int
2012is_macro_disabled (pfile, expansion, token)
2013 cpp_reader *pfile;
2014 const cpp_toklist *expansion;
2015 const cpp_token *token;
2016{
2017 cpp_context *context = CURRENT_CONTEXT (pfile);
2018
Zack Weinbergcf00a882000-07-08 02:33:00 +00002019 /* Don't expand anything if this file has already been preprocessed. */
2020 if (CPP_OPTION (pfile, preprocessed))
2021 return 1;
2022
Zack Weinberg041c3192000-07-04 01:58:21 +00002023 /* Arguments on either side of ## are inserted in place without
2024 macro expansion (6.10.3.3.2). Conceptually, any macro expansion
2025 occurs during a later rescan pass. The effect is that we expand
2026 iff we would as part of the macro's expansion list, so we should
2027 drop to the macro's context. */
2028 if (IS_ARG_CONTEXT (context))
2029 {
2030 if (token->flags & PASTED)
2031 context--;
2032 else if (!(context->flags & CONTEXT_RAW))
2033 return 1;
2034 else if (context->flags & (CONTEXT_PASTEL | CONTEXT_PASTER))
2035 context--;
2036 }
2037
2038 /* Have we already used this macro? */
2039 while (context->level > 0)
2040 {
2041 if (!IS_ARG_CONTEXT (context) && context->u.list == expansion)
2042 return 1;
2043 /* Raw argument tokens are judged based on the token list they
2044 came from. */
2045 if (context->flags & CONTEXT_RAW)
2046 context = pfile->contexts + context->level;
2047 else
2048 context--;
2049 }
2050
2051 /* Function-like macros may be disabled if the '(' is not in the
2052 current context. We check this without disrupting the context
2053 stack. */
2054 if (expansion->paramc >= 0)
2055 {
2056 const cpp_token *next;
2057 unsigned int prev_nme;
2058
2059 context = CURRENT_CONTEXT (pfile);
2060 /* Drop down any contexts we're at the end of: the '(' may
2061 appear in lower macro expansions, or in the rest of the file. */
2062 while (context->posn == context->count && context > pfile->contexts)
2063 {
2064 context--;
2065 /* If we matched, we are disabled, as we appear in the
2066 expansion of each macro we meet. */
2067 if (!IS_ARG_CONTEXT (context) && context->u.list == expansion)
2068 return 1;
2069 }
2070
2071 prev_nme = pfile->no_expand_level;
2072 pfile->no_expand_level = context - pfile->contexts;
2073 next = cpp_get_token (pfile);
2074 restore_macro_expansion (pfile, prev_nme);
2075 if (next->type != CPP_OPEN_PAREN)
2076 {
2077 _cpp_push_token (pfile, next);
2078 if (CPP_OPTION (pfile, warn_traditional))
2079 cpp_warning (pfile,
2080 "function macro %.*s must be used with arguments in traditional C",
Zack Weinbergbfb9dc72000-07-08 19:00:39 +00002081 (int) token->val.node->length, token->val.node->name);
Zack Weinberg041c3192000-07-04 01:58:21 +00002082 return 1;
2083 }
2084 }
2085
2086 return 0;
2087}
2088
2089/* Add a token to the set of tokens forming the arguments to the macro
2090 being parsed in parse_args. */
2091static void
2092save_token (args, token)
2093 macro_args *args;
2094 const cpp_token *token;
2095{
2096 if (args->used == args->capacity)
2097 {
2098 args->capacity += args->capacity + 100;
2099 args->tokens = (const cpp_token **)
2100 xrealloc (args->tokens, args->capacity * sizeof (const cpp_token *));
2101 }
2102 args->tokens[args->used++] = token;
2103}
2104
2105/* Take and save raw tokens until we finish one argument. Empty
2106 arguments are saved as a single CPP_PLACEMARKER token. */
2107static const cpp_token *
2108parse_arg (pfile, var_args, paren_context, args, pcount)
2109 cpp_reader *pfile;
2110 int var_args;
2111 unsigned int paren_context;
2112 macro_args *args;
2113 unsigned int *pcount;
2114{
2115 const cpp_token *token;
2116 unsigned int paren = 0, count = 0;
2117 int raw, was_raw = 1;
2118
2119 for (count = 0;; count++)
2120 {
2121 token = cpp_get_token (pfile);
2122
2123 switch (token->type)
2124 {
2125 default:
2126 break;
2127
2128 case CPP_OPEN_PAREN:
2129 paren++;
2130 break;
2131
2132 case CPP_CLOSE_PAREN:
2133 if (paren-- != 0)
2134 break;
2135 goto out;
2136
2137 case CPP_COMMA:
2138 /* Commas are not terminators within parantheses or var_args. */
2139 if (paren || var_args)
2140 break;
2141 goto out;
2142
2143 case CPP_EOF: /* Error reported by caller. */
2144 goto out;
2145 }
2146
2147 raw = pfile->cur_context <= paren_context;
2148 if (raw != was_raw)
2149 {
2150 was_raw = raw;
2151 save_token (args, 0);
2152 count++;
2153 }
2154 save_token (args, token);
2155 }
2156
2157 out:
2158 if (count == 0)
2159 {
2160 /* Duplicate the placemarker. Then we can set its flags and
2161 position and safely be using more than one. */
2162 save_token (args, duplicate_token (pfile, &placemarker_token));
2163 count++;
2164 }
2165
2166 *pcount = count;
2167 return token;
2168}
2169
2170/* This macro returns true if the argument starting at offset O of arglist
2171 A is empty - that is, it's either a single PLACEMARKER token, or a null
2172 pointer followed by a PLACEMARKER. */
2173
2174#define empty_argument(A, O) \
2175 ((A)->tokens[O] ? (A)->tokens[O]->type == CPP_PLACEMARKER \
2176 : (A)->tokens[(O)+1]->type == CPP_PLACEMARKER)
2177
2178/* Parse the arguments making up a macro invocation. Nested arguments
2179 are automatically macro expanded, but immediate macros are not
2180 expanded; this enables e.g. operator # to work correctly. Returns
2181 non-zero on error. */
2182static int
2183parse_args (pfile, hp, args)
2184 cpp_reader *pfile;
2185 cpp_hashnode *hp;
2186 macro_args *args;
2187{
2188 const cpp_token *token;
2189 const cpp_toklist *macro;
2190 unsigned int total = 0;
2191 unsigned int paren_context = pfile->cur_context;
2192 int argc = 0;
2193
2194 macro = hp->value.expansion;
2195 do
2196 {
2197 unsigned int count;
2198
2199 token = parse_arg (pfile, (argc + 1 == macro->paramc
2200 && (macro->flags & VAR_ARGS)),
2201 paren_context, args, &count);
2202 if (argc < macro->paramc)
2203 {
2204 total += count;
2205 args->ends[argc] = total;
2206 }
2207 argc++;
2208 }
2209 while (token->type != CPP_CLOSE_PAREN && token->type != CPP_EOF);
2210
2211 if (token->type == CPP_EOF)
2212 {
2213 cpp_error (pfile, "unterminated invocation of macro \"%.*s\"",
2214 hp->length, hp->name);
2215 return 1;
2216 }
2217 else if (argc < macro->paramc)
2218 {
2219 /* A rest argument is allowed to not appear in the invocation at all.
2220 e.g. #define debug(format, args...) ...
2221 debug("string");
2222 This is exactly the same as if the rest argument had received no
Neil Booth563dd082000-07-08 02:18:25 +00002223 tokens - debug("string",); This extension is deprecated. */
Zack Weinberg041c3192000-07-04 01:58:21 +00002224
Neil Booth563dd082000-07-08 02:18:25 +00002225 if (argc + 1 == macro->paramc && (macro->flags & GNU_REST_ARGS))
Zack Weinberg041c3192000-07-04 01:58:21 +00002226 {
2227 /* Duplicate the placemarker. Then we can set its flags and
2228 position and safely be using more than one. */
2229 save_token (args, duplicate_token (pfile, &placemarker_token));
2230 args->ends[argc] = total + 1;
2231 return 0;
2232 }
2233 else
2234 {
2235 cpp_error (pfile,
2236 "insufficient arguments in invocation of macro \"%.*s\"",
2237 hp->length, hp->name);
2238 return 1;
2239 }
2240 }
2241 /* An empty argument to an empty function-like macro is fine. */
2242 else if (argc > macro->paramc
2243 && !(macro->paramc == 0 && argc == 1 && empty_argument (args, 0)))
2244 {
2245 cpp_error (pfile,
2246 "too many arguments in invocation of macro \"%.*s\"",
2247 hp->length, hp->name);
2248 return 1;
2249 }
2250
2251 return 0;
2252}
2253
2254/* Adds backslashes before all backslashes and double quotes appearing
2255 in strings. Non-printable characters are converted to octal. */
2256static U_CHAR *
2257quote_string (dest, src, len)
2258 U_CHAR *dest;
2259 const U_CHAR *src;
2260 unsigned int len;
2261{
2262 while (len--)
2263 {
2264 U_CHAR c = *src++;
2265
2266 if (c == '\\' || c == '"')
2267 {
2268 *dest++ = '\\';
2269 *dest++ = c;
2270 }
2271 else
2272 {
2273 if (ISPRINT (c))
2274 *dest++ = c;
2275 else
2276 {
2277 sprintf ((char *) dest, "\\%03o", c);
2278 dest += 4;
2279 }
2280 }
2281 }
2282
2283 return dest;
2284}
2285
2286/* Allocates a buffer to hold a token's TEXT, and converts TOKEN to a
2287 CPP_STRING token containing TEXT in quoted form. */
2288static cpp_token *
2289make_string_token (token, text, len)
2290 cpp_token *token;
2291 const U_CHAR *text;
2292 unsigned int len;
2293{
2294 U_CHAR *buf;
2295
2296 buf = (U_CHAR *) xmalloc (len * 4);
2297 token->type = CPP_STRING;
2298 token->flags = 0;
Zack Weinbergbfb9dc72000-07-08 19:00:39 +00002299 token->val.str.text = buf;
2300 token->val.str.len = quote_string (buf, text, len) - buf;
Zack Weinberg041c3192000-07-04 01:58:21 +00002301 return token;
2302}
2303
2304/* Allocates and converts a temporary token to a CPP_NUMBER token,
2305 evaluating to NUMBER. */
2306static cpp_token *
2307alloc_number_token (pfile, number)
2308 cpp_reader *pfile;
2309 int number;
2310{
2311 cpp_token *result;
2312 char *buf;
2313
2314 result = get_temp_token (pfile);
2315 buf = xmalloc (20);
2316 sprintf (buf, "%d", number);
2317
2318 result->type = CPP_NUMBER;
2319 result->flags = 0;
Zack Weinbergbfb9dc72000-07-08 19:00:39 +00002320 result->val.str.text = (U_CHAR *) buf;
2321 result->val.str.len = strlen (buf);
Zack Weinberg041c3192000-07-04 01:58:21 +00002322 return result;
2323}
2324
2325/* Returns a temporary token from the temporary token store of PFILE. */
2326static cpp_token *
2327get_temp_token (pfile)
2328 cpp_reader *pfile;
2329{
2330 if (pfile->temp_used == pfile->temp_alloced)
2331 {
2332 if (pfile->temp_used == pfile->temp_cap)
2333 {
2334 pfile->temp_cap += pfile->temp_cap + 20;
2335 pfile->temp_tokens = (cpp_token **) xrealloc
2336 (pfile->temp_tokens, pfile->temp_cap * sizeof (cpp_token *));
2337 }
2338 pfile->temp_tokens[pfile->temp_alloced++] = (cpp_token *) xmalloc
2339 (sizeof (cpp_token));
2340 }
2341
2342 return pfile->temp_tokens[pfile->temp_used++];
2343}
2344
2345/* Release (not free) for re-use the temporary tokens of PFILE. */
2346static void
2347release_temp_tokens (pfile)
2348 cpp_reader *pfile;
2349{
2350 while (pfile->temp_used)
2351 {
2352 cpp_token *token = pfile->temp_tokens[--pfile->temp_used];
2353
Zack Weinbergbfb9dc72000-07-08 19:00:39 +00002354 if (token_spellings[token->type].type == SPELL_STRING)
Zack Weinberg041c3192000-07-04 01:58:21 +00002355 {
Zack Weinbergbfb9dc72000-07-08 19:00:39 +00002356 free ((char *) token->val.str.text);
2357 token->val.str.text = 0;
Zack Weinberg041c3192000-07-04 01:58:21 +00002358 }
2359 }
2360}
2361
2362/* Free all of PFILE's dynamically-allocated temporary tokens. */
2363void
2364_cpp_free_temp_tokens (pfile)
2365 cpp_reader *pfile;
2366{
2367 if (pfile->temp_tokens)
2368 {
2369 /* It is possible, though unlikely (looking for '(' of a funlike
2370 macro into EOF), that we haven't released the tokens yet. */
2371 release_temp_tokens (pfile);
2372 while (pfile->temp_alloced)
2373 free (pfile->temp_tokens[--pfile->temp_alloced]);
2374 free (pfile->temp_tokens);
2375 }
2376
2377 if (pfile->date)
2378 {
Zack Weinbergbfb9dc72000-07-08 19:00:39 +00002379 free ((char *) pfile->date->val.str.text);
Zack Weinberg041c3192000-07-04 01:58:21 +00002380 free (pfile->date);
Zack Weinbergbfb9dc72000-07-08 19:00:39 +00002381 free ((char *) pfile->time->val.str.text);
Zack Weinberg041c3192000-07-04 01:58:21 +00002382 free (pfile->time);
2383 }
2384}
2385
2386/* Copy TOKEN into a temporary token from PFILE's store. */
2387static cpp_token *
2388duplicate_token (pfile, token)
2389 cpp_reader *pfile;
2390 const cpp_token *token;
2391{
2392 cpp_token *result = get_temp_token (pfile);
2393
2394 *result = *token;
Zack Weinbergbfb9dc72000-07-08 19:00:39 +00002395 if (token_spellings[token->type].type == SPELL_STRING)
Zack Weinberg041c3192000-07-04 01:58:21 +00002396 {
Zack Weinbergbfb9dc72000-07-08 19:00:39 +00002397 U_CHAR *buff = (U_CHAR *) xmalloc (token->val.str.len);
2398 memcpy (buff, token->val.str.text, token->val.str.len);
2399 result->val.str.text = buff;
Zack Weinberg041c3192000-07-04 01:58:21 +00002400 }
2401 return result;
2402}
2403
2404/* Determine whether two tokens can be pasted together, and if so,
2405 what the resulting token is. Returns CPP_EOF if the tokens cannot
2406 be pasted, or the appropriate type for the merged token if they
2407 can. */
2408static enum cpp_ttype
2409can_paste (pfile, token1, token2, digraph)
2410 cpp_reader * pfile;
2411 const cpp_token *token1, *token2;
2412 int* digraph;
2413{
2414 enum cpp_ttype a = token1->type, b = token2->type;
2415 int cxx = CPP_OPTION (pfile, cplusplus);
2416
2417 if (a <= CPP_LAST_EQ && b == CPP_EQ)
2418 return a + (CPP_EQ_EQ - CPP_EQ);
2419
2420 switch (a)
2421 {
2422 case CPP_GREATER:
2423 if (b == a) return CPP_RSHIFT;
2424 if (b == CPP_QUERY && cxx) return CPP_MAX;
2425 if (b == CPP_GREATER_EQ) return CPP_RSHIFT_EQ;
2426 break;
2427 case CPP_LESS:
2428 if (b == a) return CPP_LSHIFT;
2429 if (b == CPP_QUERY && cxx) return CPP_MIN;
2430 if (b == CPP_LESS_EQ) return CPP_LSHIFT_EQ;
2431 if (b == CPP_COLON)
2432 {*digraph = 1; return CPP_OPEN_SQUARE;} /* <: digraph */
2433 if (b == CPP_MOD)
2434 {*digraph = 1; return CPP_OPEN_BRACE;} /* <% digraph */
2435 break;
2436
2437 case CPP_PLUS: if (b == a) return CPP_PLUS_PLUS; break;
2438 case CPP_AND: if (b == a) return CPP_AND_AND; break;
2439 case CPP_OR: if (b == a) return CPP_OR_OR; break;
2440
2441 case CPP_MINUS:
2442 if (b == a) return CPP_MINUS_MINUS;
2443 if (b == CPP_GREATER) return CPP_DEREF;
2444 break;
2445 case CPP_COLON:
2446 if (b == a && cxx) return CPP_SCOPE;
2447 if (b == CPP_GREATER)
2448 {*digraph = 1; return CPP_CLOSE_SQUARE;} /* :> digraph */
2449 break;
2450
2451 case CPP_MOD:
2452 if (b == CPP_GREATER)
2453 {*digraph = 1; return CPP_CLOSE_BRACE;} /* %> digraph */
2454 if (b == CPP_COLON)
2455 {*digraph = 1; return CPP_HASH;} /* %: digraph */
2456 break;
2457 case CPP_DEREF:
2458 if (b == CPP_MULT && cxx) return CPP_DEREF_STAR;
2459 break;
2460 case CPP_DOT:
2461 if (b == CPP_MULT && cxx) return CPP_DOT_STAR;
2462 if (b == CPP_NUMBER) return CPP_NUMBER;
2463 break;
2464
2465 case CPP_HASH:
2466 if (b == a && (token1->flags & DIGRAPH) == (token2->flags & DIGRAPH))
2467 /* %:%: digraph */
2468 {*digraph = (token1->flags & DIGRAPH); return CPP_PASTE;}
2469 break;
2470
2471 case CPP_NAME:
2472 if (b == CPP_NAME) return CPP_NAME;
2473 if (b == CPP_NUMBER
Zack Weinbergbfb9dc72000-07-08 19:00:39 +00002474 && is_numstart(token2->val.str.text[0])) return CPP_NAME;
Zack Weinberg041c3192000-07-04 01:58:21 +00002475 if (b == CPP_CHAR
Zack Weinbergbfb9dc72000-07-08 19:00:39 +00002476 && token1->val.node == pfile->spec_nodes->n_L) return CPP_WCHAR;
Zack Weinberg041c3192000-07-04 01:58:21 +00002477 if (b == CPP_STRING
Zack Weinbergbfb9dc72000-07-08 19:00:39 +00002478 && token1->val.node == pfile->spec_nodes->n_L) return CPP_WSTRING;
Zack Weinberg041c3192000-07-04 01:58:21 +00002479 break;
2480
2481 case CPP_NUMBER:
2482 if (b == CPP_NUMBER) return CPP_NUMBER;
2483 if (b == CPP_NAME) return CPP_NUMBER;
2484 if (b == CPP_DOT) return CPP_NUMBER;
2485 /* Numbers cannot have length zero, so this is safe. */
2486 if ((b == CPP_PLUS || b == CPP_MINUS)
Zack Weinbergbfb9dc72000-07-08 19:00:39 +00002487 && VALID_SIGN ('+', token1->val.str.text[token1->val.str.len - 1]))
Zack Weinberg041c3192000-07-04 01:58:21 +00002488 return CPP_NUMBER;
2489 break;
2490
2491 default:
2492 break;
2493 }
2494
2495 return CPP_EOF;
2496}
2497
2498/* Check if TOKEN is to be ##-pasted with the token after it. */
2499static const cpp_token *
2500maybe_paste_with_next (pfile, token)
2501 cpp_reader *pfile;
2502 const cpp_token *token;
2503{
2504 cpp_token *pasted;
2505 const cpp_token *second;
2506 cpp_context *context = CURRENT_CONTEXT (pfile);
2507
2508 /* Is this token on the LHS of ## ? */
2509 if (!((context->flags & CONTEXT_PASTEL) && context->posn == context->count)
2510 && !(token->flags & PASTE_LEFT))
2511 return token;
2512
2513 /* Prevent recursion, and possibly pushing back more than one token. */
2514 if (pfile->paste_level)
2515 return token;
2516
2517 /* Suppress macro expansion for next token, but don't conflict with
2518 the other method of suppression. If it is an argument, macro
2519 expansion within the argument will still occur. */
2520 pfile->paste_level = pfile->cur_context;
2521 second = cpp_get_token (pfile);
2522 pfile->paste_level = 0;
2523
Neil Booth563dd082000-07-08 02:18:25 +00002524 /* Ignore placemarker argument tokens (cannot be from an empty macro
2525 since macros are not expanded). */
Zack Weinberg041c3192000-07-04 01:58:21 +00002526 if (token->type == CPP_PLACEMARKER)
2527 pasted = duplicate_token (pfile, second);
2528 else if (second->type == CPP_PLACEMARKER)
2529 {
Neil Booth563dd082000-07-08 02:18:25 +00002530 cpp_context *mac_context = CURRENT_CONTEXT (pfile) - 1;
Zack Weinberg041c3192000-07-04 01:58:21 +00002531 /* GCC has special extended semantics for a ## b where b is a
2532 varargs parameter: a disappears if b consists of no tokens.
2533 This extension is deprecated. */
Neil Booth563dd082000-07-08 02:18:25 +00002534 if ((mac_context->u.list->flags & GNU_REST_ARGS)
2535 && (mac_context->u.list->tokens[mac_context->posn - 1].val.aux + 1
2536 == (unsigned) mac_context->u.list->paramc))
Zack Weinberg041c3192000-07-04 01:58:21 +00002537 {
2538 cpp_warning (pfile, "deprecated GNU ## extension used");
2539 pasted = duplicate_token (pfile, second);
2540 }
2541 else
2542 pasted = duplicate_token (pfile, token);
2543 }
2544 else
2545 {
2546 int digraph = 0;
2547 enum cpp_ttype type = can_paste (pfile, token, second, &digraph);
2548
2549 if (type == CPP_EOF)
2550 {
2551 if (CPP_OPTION (pfile, warn_paste))
2552 cpp_warning (pfile,
2553 "pasting would not give a valid preprocessing token");
2554 _cpp_push_token (pfile, second);
2555 return token;
2556 }
2557
2558 if (type == CPP_NAME || type == CPP_NUMBER)
2559 {
2560 /* Join spellings. */
Zack Weinbergbfb9dc72000-07-08 19:00:39 +00002561 U_CHAR *buf, *end;
Zack Weinberg041c3192000-07-04 01:58:21 +00002562
2563 pasted = get_temp_token (pfile);
Zack Weinbergbfb9dc72000-07-08 19:00:39 +00002564 buf = (U_CHAR *) alloca (TOKEN_LEN (token) + TOKEN_LEN (second));
2565 end = spell_token (pfile, token, buf);
2566 end = spell_token (pfile, second, end);
2567 *end = '\0';
Zack Weinberg041c3192000-07-04 01:58:21 +00002568
Zack Weinbergbfb9dc72000-07-08 19:00:39 +00002569 if (type == CPP_NAME)
2570 pasted->val.node = cpp_lookup (pfile, buf, end - buf);
2571 else
2572 {
2573 pasted->val.str.text = uxstrdup (buf);
2574 pasted->val.str.len = end - buf;
2575 }
Zack Weinberg041c3192000-07-04 01:58:21 +00002576 }
2577 else if (type == CPP_WCHAR || type == CPP_WSTRING)
2578 pasted = duplicate_token (pfile, second);
2579 else
2580 {
2581 pasted = get_temp_token (pfile);
2582 pasted->val.integer = 0;
2583 }
2584
2585 pasted->type = type;
Zack Weinbergbfb9dc72000-07-08 19:00:39 +00002586 pasted->flags = digraph ? DIGRAPH : 0;
Zack Weinberg041c3192000-07-04 01:58:21 +00002587 }
2588
2589 /* The pasted token gets the whitespace flags and position of the
2590 first token, the PASTE_LEFT flag of the second token, plus the
2591 PASTED flag to indicate it is the result of a paste. However, we
2592 want to preserve the DIGRAPH flag. */
2593 pasted->flags &= ~(PREV_WHITE | BOL | PASTE_LEFT);
2594 pasted->flags |= ((token->flags & (PREV_WHITE | BOL))
2595 | (second->flags & PASTE_LEFT) | PASTED);
2596 pasted->col = token->col;
2597 pasted->line = token->line;
2598
2599 return maybe_paste_with_next (pfile, pasted);
2600}
2601
2602/* Convert a token sequence to a single string token according to the
2603 rules of the ISO C #-operator. */
2604#define INIT_SIZE 200
2605static cpp_token *
2606stringify_arg (pfile, token)
2607 cpp_reader *pfile;
2608 const cpp_token *token;
2609{
2610 cpp_token *result;
2611 unsigned char *main_buf;
2612 unsigned int prev_value, backslash_count = 0;
2613 unsigned int buf_used = 0, whitespace = 0, buf_cap = INIT_SIZE;
2614
2615 prev_value = prevent_macro_expansion (pfile);
2616 main_buf = (unsigned char *) xmalloc (buf_cap);
2617
2618 result = get_temp_token (pfile);
2619 ASSIGN_FLAGS_AND_POS (result, token);
2620
2621 for (; (token = cpp_get_token (pfile))->type != CPP_EOF; )
2622 {
2623 int escape;
2624 unsigned char *buf;
2625 unsigned int len = TOKEN_LEN (token);
2626
2627 escape = (token->type == CPP_STRING || token->type == CPP_WSTRING
2628 || token->type == CPP_CHAR || token->type == CPP_WCHAR);
2629 if (escape)
2630 len *= 4 + 1;
2631
2632 if (buf_used + len > buf_cap)
2633 {
2634 buf_cap = buf_used + len + INIT_SIZE;
2635 main_buf = xrealloc (main_buf, buf_cap);
2636 }
2637
2638 if (whitespace && (token->flags & PREV_WHITE))
2639 main_buf[buf_used++] = ' ';
2640
2641 if (escape)
2642 buf = (unsigned char *) xmalloc (len);
2643 else
2644 buf = main_buf + buf_used;
2645
2646 len = spell_token (pfile, token, buf) - buf;
2647 if (escape)
2648 {
2649 buf_used = quote_string (&main_buf[buf_used], buf, len) - main_buf;
2650 free (buf);
2651 }
2652 else
2653 buf_used += len;
2654
2655 whitespace = 1;
2656 if (token->type == CPP_BACKSLASH)
2657 backslash_count++;
2658 else
2659 backslash_count = 0;
2660 }
2661
2662 /* Ignore the final \ of invalid string literals. */
2663 if (backslash_count & 1)
2664 {
2665 cpp_warning (pfile, "invalid string literal, ignoring final '\\'");
2666 buf_used--;
2667 }
2668
2669 result->type = CPP_STRING;
Zack Weinbergbfb9dc72000-07-08 19:00:39 +00002670 result->val.str.text = main_buf;
2671 result->val.str.len = buf_used;
Zack Weinberg041c3192000-07-04 01:58:21 +00002672 restore_macro_expansion (pfile, prev_value);
2673 return result;
2674}
2675
2676/* Allocate more room on the context stack of PFILE. */
2677static void
2678expand_context_stack (pfile)
2679 cpp_reader *pfile;
2680{
2681 pfile->context_cap += pfile->context_cap + 20;
2682 pfile->contexts = (cpp_context *)
2683 xrealloc (pfile->contexts, pfile->context_cap * sizeof (cpp_context));
2684}
2685
2686/* Push the context of macro NODE onto the context stack. TOKEN is
2687 the CPP_NAME token invoking the macro. */
2688static const cpp_token *
2689push_macro_context (pfile, node, token)
2690 cpp_reader *pfile;
2691 cpp_hashnode *node;
2692 const cpp_token *token;
2693{
2694 unsigned char orig_flags;
2695 macro_args *args;
2696 cpp_context *context;
2697
2698 if (pfile->cur_context > CPP_STACK_MAX)
2699 {
2700 cpp_error (pfile, "infinite macro recursion invoking '%s'", node->name);
2701 return token;
2702 }
2703
2704 /* Token's flags may change when parsing args containing a nested
2705 invocation of this macro. */
2706 orig_flags = token->flags & (PREV_WHITE | BOL);
2707 args = 0;
2708 if (node->value.expansion->paramc >= 0)
2709 {
2710 unsigned int error, prev_nme;
2711
2712 /* Allocate room for the argument contexts, and parse them. */
2713 args = (macro_args *) xmalloc (sizeof (macro_args));
2714 args->ends = (unsigned int *)
2715 xmalloc (node->value.expansion->paramc * sizeof (unsigned int));
2716 args->tokens = 0;
2717 args->capacity = 0;
2718 args->used = 0;
2719 args->level = pfile->cur_context;
2720
2721 prev_nme = prevent_macro_expansion (pfile);
2722 pfile->args = args;
2723 error = parse_args (pfile, node, args);
2724 pfile->args = 0;
2725 restore_macro_expansion (pfile, prev_nme);
2726 if (error)
2727 {
2728 free_macro_args (args);
2729 return token;
2730 }
2731 }
2732
2733 /* Now push its context. */
2734 pfile->cur_context++;
2735 if (pfile->cur_context == pfile->context_cap)
2736 expand_context_stack (pfile);
2737
2738 context = CURRENT_CONTEXT (pfile);
2739 context->u.list = node->value.expansion;
2740 context->args = args;
2741 context->posn = 0;
2742 context->count = context->u.list->tokens_used;
2743 context->level = pfile->cur_context;
2744 context->flags = 0;
2745 context->pushed_token = 0;
2746
2747 /* Set the flags of the first token. We know there must
2748 be one, empty macros are a single placemarker token. */
2749 MODIFY_FLAGS_AND_POS (&context->u.list->tokens[0], token, orig_flags);
2750
2751 return cpp_get_token (pfile);
2752}
2753
2754/* Push an argument to the current macro onto the context stack.
2755 TOKEN is the MACRO_ARG token representing the argument expansion. */
2756static const cpp_token *
2757push_arg_context (pfile, token)
2758 cpp_reader *pfile;
2759 const cpp_token *token;
2760{
2761 cpp_context *context;
2762 macro_args *args;
2763
2764 pfile->cur_context++;
2765 if (pfile->cur_context == pfile->context_cap)
2766 expand_context_stack (pfile);
2767
2768 context = CURRENT_CONTEXT (pfile);
2769 args = context[-1].args;
2770
2771 context->count = token->val.aux ? args->ends[token->val.aux - 1]: 0;
2772 context->u.arg = args->tokens + context->count;
2773 context->count = args->ends[token->val.aux] - context->count;
2774 context->args = 0;
2775 context->posn = 0;
2776 context->level = args->level;
2777 context->flags = CONTEXT_ARG | CONTEXT_RAW;
2778 context->pushed_token = 0;
2779
2780 /* Set the flags of the first token. There is one. */
2781 {
2782 const cpp_token *first = context->u.arg[0];
2783 if (!first)
2784 first = context->u.arg[1];
2785
2786 MODIFY_FLAGS_AND_POS ((cpp_token *) first, token,
2787 token->flags & (PREV_WHITE | BOL));
2788 }
2789
2790 if (token->flags & STRINGIFY_ARG)
2791 return stringify_arg (pfile, token);
2792
2793 if (token->flags & PASTE_LEFT)
2794 context->flags |= CONTEXT_PASTEL;
2795 if (pfile->paste_level)
2796 context->flags |= CONTEXT_PASTER;
2797
2798 return get_raw_token (pfile);
2799}
2800
2801/* "Unget" a token. It is effectively inserted in the token queue and
2802 will be returned by the next call to get_raw_token. */
2803void
2804_cpp_push_token (pfile, token)
2805 cpp_reader *pfile;
2806 const cpp_token *token;
2807{
2808 cpp_context *context = CURRENT_CONTEXT (pfile);
2809 if (context->pushed_token)
2810 cpp_ice (pfile, "two tokens pushed in a row");
2811 if (token->type != CPP_EOF)
2812 context->pushed_token = token;
2813 /* Don't push back a directive's CPP_EOF, step back instead. */
2814 else if (pfile->cur_context == 0)
2815 pfile->contexts[0].posn--;
2816}
2817
2818/* Handle a preprocessing directive. TOKEN is the CPP_HASH token
2819 introducing the directive. */
2820static void
2821process_directive (pfile, token)
2822 cpp_reader *pfile;
2823 const cpp_token *token;
2824{
2825 const struct directive *d = pfile->token_list.directive;
2826 int prev_nme = 0;
2827
2828 /* Skip over the directive name. */
2829 if (token[1].type == CPP_NAME)
2830 _cpp_get_raw_token (pfile);
2831 else if (token[1].type != CPP_NUMBER)
2832 cpp_ice (pfile, "directive begins with %s?!",
2833 token_names[token[1].type]);
2834
2835 /* Flush pending tokens at this point, in case the directive produces
2836 output. XXX Directive output won't be visible to a direct caller of
2837 cpp_get_token. */
2838 if (pfile->printer && CPP_WRITTEN (pfile) - pfile->printer->written)
2839 cpp_output_tokens (pfile, pfile->printer, pfile->token_list.line);
2840
2841 if (! (d->flags & EXPAND))
2842 prev_nme = prevent_macro_expansion (pfile);
2843 (void) (*d->handler) (pfile);
2844 if (! (d->flags & EXPAND))
2845 restore_macro_expansion (pfile, prev_nme);
2846 _cpp_skip_rest_of_line (pfile);
2847}
2848
2849/* The external interface to return the next token. All macro
2850 expansion and directive processing is handled internally, the
2851 caller only ever sees the output after preprocessing. */
2852const cpp_token *
2853cpp_get_token (pfile)
2854 cpp_reader *pfile;
2855{
2856 const cpp_token *token;
2857 cpp_hashnode *node;
2858
2859 /* Loop till we hit a non-directive, non-skipped, non-placemarker token. */
Zack Weinberg15dad1d2000-05-18 15:55:46 +00002860 for (;;)
Zack Weinbergc5a04732000-04-25 19:32:36 +00002861 {
Zack Weinberg041c3192000-07-04 01:58:21 +00002862 token = get_raw_token (pfile);
2863 if (token->flags & BOL && token->type == CPP_HASH
2864 && pfile->token_list.directive)
2865 {
2866 process_directive (pfile, token);
2867 continue;
2868 }
Zack Weinbergc5a04732000-04-25 19:32:36 +00002869
Zack Weinberg041c3192000-07-04 01:58:21 +00002870 /* Short circuit EOF. */
2871 if (token->type == CPP_EOF)
2872 return token;
2873
2874 if (pfile->skipping && ! pfile->token_list.directive)
2875 {
2876 _cpp_skip_rest_of_line (pfile);
2877 continue;
2878 }
2879 break;
Zack Weinbergc5a04732000-04-25 19:32:36 +00002880 }
Zack Weinbergc5a04732000-04-25 19:32:36 +00002881
Zack Weinberg041c3192000-07-04 01:58:21 +00002882 /* If there's a potential control macro and we get here, then that
2883 #ifndef didn't cover the entire file and its argument shouldn't
2884 be taken as a control macro. */
2885 pfile->potential_control_macro = 0;
Zack Weinbergc5a04732000-04-25 19:32:36 +00002886
Zack Weinberg041c3192000-07-04 01:58:21 +00002887 token = maybe_paste_with_next (pfile, token);
2888
2889 if (token->type != CPP_NAME)
2890 return token;
2891
2892 /* Is macro expansion disabled in general? */
2893 if (pfile->no_expand_level == pfile->cur_context || pfile->paste_level)
2894 return token;
2895
Zack Weinbergbfb9dc72000-07-08 19:00:39 +00002896 node = token->val.node;
Zack Weinberg041c3192000-07-04 01:58:21 +00002897 if (node->type == T_VOID)
2898 return token;
2899
2900 if (node->type == T_MACRO)
Zack Weinbergc5a04732000-04-25 19:32:36 +00002901 {
Zack Weinberg041c3192000-07-04 01:58:21 +00002902 if (is_macro_disabled (pfile, node->value.expansion, token))
2903 return token;
2904
2905 return push_macro_context (pfile, node, token);
Zack Weinbergc5a04732000-04-25 19:32:36 +00002906 }
Zack Weinberg041c3192000-07-04 01:58:21 +00002907 else
2908 return special_symbol (pfile, node, token);
Zack Weinbergc5a04732000-04-25 19:32:36 +00002909}
2910
Zack Weinberg041c3192000-07-04 01:58:21 +00002911/* Returns the next raw token, i.e. without performing macro
2912 expansion. Argument contexts are automatically entered. */
2913static const cpp_token *
2914get_raw_token (pfile)
2915 cpp_reader *pfile;
2916{
2917 const cpp_token *result;
2918 cpp_context *context = CURRENT_CONTEXT (pfile);
2919
2920 if (context->pushed_token)
2921 {
2922 result = context->pushed_token;
2923 context->pushed_token = 0;
2924 }
2925 else if (context->posn == context->count)
2926 result = pop_context (pfile);
2927 else
2928 {
2929 if (IS_ARG_CONTEXT (context))
2930 {
2931 result = context->u.arg[context->posn++];
2932 if (result == 0)
2933 {
2934 context->flags ^= CONTEXT_RAW;
2935 result = context->u.arg[context->posn++];
2936 }
2937 return result; /* Cannot be a CPP_MACRO_ARG */
2938 }
2939 result = &context->u.list->tokens[context->posn++];
2940 }
2941
2942 if (result->type == CPP_MACRO_ARG)
2943 result = push_arg_context (pfile, result);
2944 return result;
2945}
2946
2947/* Internal interface to get the token without macro expanding. */
2948const cpp_token *
2949_cpp_get_raw_token (pfile)
2950 cpp_reader *pfile;
2951{
2952 int prev_nme = prevent_macro_expansion (pfile);
2953 const cpp_token *result = cpp_get_token (pfile);
2954 restore_macro_expansion (pfile, prev_nme);
2955 return result;
2956}
2957
2958/* A thin wrapper to lex_line. CLEAR is non-zero if the current token
2959 list should be overwritten, or zero if we need to append
2960 (typically, if we are within the arguments to a macro, or looking
2961 for the '(' to start a function-like macro invocation). */
2962static int
2963lex_next (pfile, clear)
2964 cpp_reader *pfile;
2965 int clear;
2966{
2967 cpp_toklist *list = &pfile->token_list;
2968 const cpp_token *old_list = list->tokens;
2969 unsigned int old_used = list->tokens_used;
2970
2971 if (clear)
2972 {
2973 /* Release all temporary tokens. */
2974 _cpp_clear_toklist (list);
2975 pfile->contexts[0].posn = 0;
2976 if (pfile->temp_used)
2977 release_temp_tokens (pfile);
2978 }
2979 else
2980 {
2981 /* If we are currently processing a directive, do not advance.
2982 (6.10 paragraph 2: A new-line character ends the directive
2983 even if it occurs within what would otherwise be an
2984 invocation of a function-like macro.) */
2985 if (list->directive)
2986 return 1;
2987 }
2988
2989 lex_line (pfile, list);
2990 pfile->contexts[0].count = list->tokens_used;
2991
2992 if (!clear && pfile->args)
2993 {
2994 /* Fix up argument token pointers. */
2995 if (old_list != list->tokens)
2996 {
2997 unsigned int i;
2998
2999 for (i = 0; i < pfile->args->used; i++)
3000 {
3001 const cpp_token *token = pfile->args->tokens[i];
3002 if (token >= old_list && token < old_list + old_used)
3003 pfile->args->tokens[i] = (const cpp_token *)
3004 ((char *) token + ((char *) list->tokens - (char *) old_list));
3005 }
3006 }
3007
3008 /* 6.10.3 paragraph 11: If there are sequences of preprocessing
3009 tokens within the list of arguments that would otherwise act as
3010 preprocessing directives, the behavior is undefined.
3011
3012 This implementation will report a hard error and treat the
3013 'sequence of preprocessing tokens' as part of the macro argument,
3014 not a directive.
3015
3016 Note if pfile->args == 0, we're OK since we're only inside a
3017 macro argument after a '('. */
3018 if (list->directive)
3019 {
3020 cpp_error_with_line (pfile, list->tokens[old_used].line,
3021 list->tokens[old_used].col,
3022 "#%s may not be used inside a macro argument",
3023 list->directive->name);
3024 /* Don't treat as a directive: clear list->directive,
3025 prune the final EOF from the list. */
3026 list->directive = 0;
3027 list->tokens_used--;
3028 pfile->contexts[0].count--;
3029 }
3030 }
3031
3032 return 0;
3033}
3034
3035/* Pops a context of the context stack. If we're at the bottom, lexes
3036 the next logical line. Returns 1 if we're at the end of the
3037 argument list to the # operator, or if it is illegal to "overflow"
3038 into the rest of the file (e.g. 6.10.3.1.1). */
3039static int
3040do_pop_context (pfile)
3041 cpp_reader *pfile;
3042{
3043 cpp_context *context;
3044
3045 if (pfile->cur_context == 0)
3046 return lex_next (pfile, pfile->no_expand_level == UINT_MAX);
3047
3048 /* Argument contexts, when parsing args or handling # operator
3049 return CPP_EOF at the end. */
3050 context = CURRENT_CONTEXT (pfile);
3051 if (IS_ARG_CONTEXT (context) && pfile->cur_context == pfile->no_expand_level)
3052 return 1;
3053
3054 /* Free resources when leaving macro contexts. */
3055 if (context->args)
3056 free_macro_args (context->args);
3057
3058 if (pfile->cur_context == pfile->no_expand_level)
3059 pfile->no_expand_level--;
3060 pfile->cur_context--;
3061
3062 return 0;
3063}
3064
3065/* Move down the context stack, and return the next raw token. */
3066static const cpp_token *
3067pop_context (pfile)
3068 cpp_reader *pfile;
3069{
3070 if (do_pop_context (pfile))
3071 return &eof_token;
3072 return get_raw_token (pfile);
3073}
3074
3075/* Turn off macro expansion at the current context level. */
3076static unsigned int
3077prevent_macro_expansion (pfile)
3078 cpp_reader *pfile;
3079{
3080 unsigned int prev_value = pfile->no_expand_level;
3081 pfile->no_expand_level = pfile->cur_context;
3082 return prev_value;
3083}
3084
3085/* Restore macro expansion to its previous state. */
3086static void
3087restore_macro_expansion (pfile, prev_value)
3088 cpp_reader *pfile;
3089 unsigned int prev_value;
3090{
3091 pfile->no_expand_level = prev_value;
3092}
3093
3094/* Used by cpperror.c to obtain the correct line and column to report
3095 in a diagnostic. */
3096unsigned int
3097_cpp_get_line (pfile, pcol)
3098 cpp_reader *pfile;
3099 unsigned int *pcol;
3100{
3101 unsigned int index;
3102 const cpp_token *cur_token;
3103
3104 if (pfile->in_lex_line)
3105 index = pfile->token_list.tokens_used;
3106 else
3107 index = pfile->contexts[0].posn;
3108
3109 cur_token = &pfile->token_list.tokens[index - 1];
3110 if (pcol)
3111 *pcol = cur_token->col;
3112 return cur_token->line;
3113}
3114
3115#define DSC(str) (const U_CHAR *)str, sizeof str - 1
3116static const char * const monthnames[] =
3117{
3118 "Jan", "Feb", "Mar", "Apr", "May", "Jun",
3119 "Jul", "Aug", "Sep", "Oct", "Nov", "Dec",
3120};
3121
3122/* Handle builtin macros like __FILE__. */
3123static const cpp_token *
3124special_symbol (pfile, node, token)
3125 cpp_reader *pfile;
3126 cpp_hashnode *node;
3127 const cpp_token *token;
3128{
3129 cpp_token *result;
3130 cpp_buffer *ip;
3131
3132 switch (node->type)
3133 {
3134 case T_FILE:
3135 case T_BASE_FILE:
3136 {
3137 const char *file;
3138
3139 ip = CPP_BUFFER (pfile);
3140 if (ip == 0)
3141 file = "";
3142 else
3143 {
3144 if (node->type == T_BASE_FILE)
3145 while (CPP_PREV_BUFFER (ip) != NULL)
3146 ip = CPP_PREV_BUFFER (ip);
3147
3148 file = ip->nominal_fname;
3149 }
3150 result = make_string_token (get_temp_token (pfile), (U_CHAR *) file,
3151 strlen (file));
3152 }
3153 break;
3154
3155 case T_INCLUDE_LEVEL:
3156 {
3157 int true_indepth = 0;
3158
3159 /* Do not count the primary source file in the include level. */
3160 ip = CPP_PREV_BUFFER (CPP_BUFFER (pfile));
3161 while (ip)
3162 {
3163 true_indepth++;
3164 ip = CPP_PREV_BUFFER (ip);
3165 }
3166 result = alloc_number_token (pfile, true_indepth);
3167 }
3168 break;
3169
3170 case T_SPECLINE:
3171 /* If __LINE__ is embedded in a macro, it must expand to the
3172 line of the macro's invocation, not its definition.
3173 Otherwise things like assert() will not work properly. */
3174 result = alloc_number_token (pfile, _cpp_get_line (pfile, NULL));
3175 break;
3176
3177 case T_STDC:
3178 {
3179 int stdc = 1;
3180
3181#ifdef STDC_0_IN_SYSTEM_HEADERS
3182 if (CPP_IN_SYSTEM_HEADER (pfile)
Zack Weinbergbfb9dc72000-07-08 19:00:39 +00003183 && pfile->spec_nodes->n__STRICT_ANSI__->type == T_VOID)
Zack Weinberg041c3192000-07-04 01:58:21 +00003184 stdc = 0;
Zack Weinbergc5a04732000-04-25 19:32:36 +00003185#endif
Zack Weinberg041c3192000-07-04 01:58:21 +00003186 result = alloc_number_token (pfile, stdc);
3187 }
3188 break;
3189
3190 case T_DATE:
3191 case T_TIME:
3192 if (pfile->date == 0)
3193 {
3194 /* Allocate __DATE__ and __TIME__ from permanent storage,
3195 and save them in pfile so we don't have to do this again.
3196 We don't generate these strings at init time because
3197 time() and localtime() are very slow on some systems. */
3198 time_t tt = time (NULL);
3199 struct tm *tb = localtime (&tt);
3200
3201 pfile->date = make_string_token
3202 ((cpp_token *) xmalloc (sizeof (cpp_token)), DSC("Oct 11 1347"));
3203 pfile->time = make_string_token
3204 ((cpp_token *) xmalloc (sizeof (cpp_token)), DSC("12:34:56"));
3205
Zack Weinbergbfb9dc72000-07-08 19:00:39 +00003206 sprintf ((char *) pfile->date->val.str.text, "%s %2d %4d",
Zack Weinberg041c3192000-07-04 01:58:21 +00003207 monthnames[tb->tm_mon], tb->tm_mday, tb->tm_year + 1900);
Zack Weinbergbfb9dc72000-07-08 19:00:39 +00003208 sprintf ((char *) pfile->time->val.str.text, "%02d:%02d:%02d",
Zack Weinberg041c3192000-07-04 01:58:21 +00003209 tb->tm_hour, tb->tm_min, tb->tm_sec);
3210 }
3211 result = node->type == T_DATE ? pfile->date: pfile->time;
3212 break;
3213
3214 case T_POISON:
Zack Weinbergbfb9dc72000-07-08 19:00:39 +00003215 cpp_error (pfile, "attempt to use poisoned \"%s\"", node->name);
Zack Weinberg041c3192000-07-04 01:58:21 +00003216 return token;
3217
3218 default:
3219 cpp_ice (pfile, "invalid special hash type");
3220 return token;
3221 }
3222
3223 ASSIGN_FLAGS_AND_POS (result, token);
3224 return result;
3225}
3226#undef DSC
3227
3228/* Dump the original user's spelling of argument index ARG_NO to the
3229 macro whose expansion is LIST. */
3230static void
3231dump_param_spelling (pfile, list, arg_no)
3232 cpp_reader *pfile;
3233 const cpp_toklist *list;
3234 unsigned int arg_no;
3235{
3236 const U_CHAR *param = list->namebuf;
3237
3238 while (arg_no--)
3239 param += ustrlen (param) + 1;
3240 CPP_PUTS (pfile, param, ustrlen (param));
3241}
3242
3243/* Dump a token list to the output. */
3244void
3245_cpp_dump_list (pfile, list, token, flush)
3246 cpp_reader *pfile;
3247 const cpp_toklist *list;
3248 const cpp_token *token;
3249 int flush;
3250{
3251 const cpp_token *limit = list->tokens + list->tokens_used;
3252 const cpp_token *prev = 0;
3253
3254 /* Avoid the CPP_EOF. */
3255 if (list->directive)
3256 limit--;
3257
3258 while (token < limit)
3259 {
3260 if (token->type == CPP_MACRO_ARG)
3261 {
3262 if (token->flags & PREV_WHITE)
3263 CPP_PUTC (pfile, ' ');
3264 if (token->flags & STRINGIFY_ARG)
3265 CPP_PUTC (pfile, '#');
3266 dump_param_spelling (pfile, list, token->val.aux);
3267 }
3268 else
3269 output_token (pfile, token, prev);
3270 if (token->flags & PASTE_LEFT)
3271 CPP_PUTS (pfile, " ##", 3);
3272 prev = token;
3273 token++;
3274 }
3275
3276 if (flush && pfile->printer)
3277 cpp_output_tokens (pfile, pfile->printer, pfile->token_list.line);
3278}
3279
Zack Weinberg041c3192000-07-04 01:58:21 +00003280/* Allocate pfile->input_buffer, and initialize trigraph_map[]
3281 if it hasn't happened already. */
3282
3283void
3284_cpp_init_input_buffer (pfile)
3285 cpp_reader *pfile;
3286{
3287 init_trigraph_map ();
3288 pfile->context_cap = 20;
3289 pfile->contexts = (cpp_context *)
3290 xmalloc (pfile->context_cap * sizeof (cpp_context));
3291 pfile->cur_context = 0;
3292 pfile->contexts[0].u.list = &pfile->token_list;
3293
3294 pfile->contexts[0].posn = 0;
3295 pfile->contexts[0].count = 0;
3296 pfile->no_expand_level = UINT_MAX;
3297
3298 _cpp_init_toklist (&pfile->token_list, DUMMY_TOKEN);
3299}
3300
3301/* Moves to the end of the directive line, popping contexts as
3302 necessary. */
3303void
3304_cpp_skip_rest_of_line (pfile)
3305 cpp_reader *pfile;
3306{
3307 /* Get to base context. Clear parsing args and each contexts flags,
3308 since these can cause pop_context to return without popping. */
3309 pfile->no_expand_level = UINT_MAX;
3310 while (pfile->cur_context != 0)
3311 {
3312 pfile->contexts[pfile->cur_context].flags = 0;
3313 do_pop_context (pfile);
3314 }
3315
3316 pfile->contexts[pfile->cur_context].count = 0;
3317 pfile->contexts[pfile->cur_context].posn = 0;
3318 pfile->token_list.directive = 0;
3319}
3320
3321/* Directive handler wrapper used by the command line option
3322 processor. */
3323void
3324_cpp_run_directive (pfile, dir, buf, count)
3325 cpp_reader *pfile;
3326 const struct directive *dir;
3327 const char *buf;
3328 size_t count;
3329{
3330 if (cpp_push_buffer (pfile, (const U_CHAR *)buf, count) != NULL)
3331 {
3332 unsigned int prev_lvl = 0;
3333 /* scan the line now, else prevent_macro_expansion won't work */
3334 do_pop_context (pfile);
3335 if (! (dir->flags & EXPAND))
3336 prev_lvl = prevent_macro_expansion (pfile);
3337
3338 (void) (*dir->handler) (pfile);
3339
3340 if (! (dir->flags & EXPAND))
3341 restore_macro_expansion (pfile, prev_lvl);
3342
3343 _cpp_skip_rest_of_line (pfile);
3344 cpp_pop_buffer (pfile);
3345 }
3346}