blob: ac8c3c48406f0f6f5c641214537f92a93b39069d [file] [log] [blame]
Zack Weinberg45b966d2000-03-13 22:01:08 +00001/* CPP Library - lexical analysis.
2 Copyright (C) 2000 Free Software Foundation, Inc.
3 Contributed by Per Bothner, 1994-95.
4 Based on CCCP program by Paul Rubin, June 1986
5 Adapted to ANSI C, Richard Stallman, Jan 1987
6 Broken out to separate file, Zack Weinberg, Mar 2000
Zack Weinbergc5a04732000-04-25 19:32:36 +00007 Single-pass line tokenization by Neil Booth, April 2000
Zack Weinberg45b966d2000-03-13 22:01:08 +00008
9This program is free software; you can redistribute it and/or modify it
10under the terms of the GNU General Public License as published by the
11Free Software Foundation; either version 2, or (at your option) any
12later version.
13
14This program is distributed in the hope that it will be useful,
15but WITHOUT ANY WARRANTY; without even the implied warranty of
16MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17GNU General Public License for more details.
18
19You should have received a copy of the GNU General Public License
20along with this program; if not, write to the Free Software
21Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
22
Neil Booth93c803682000-10-28 17:59:06 +000023/* This lexer works with a single pass of the file. Recently I
24 re-wrote it to minimize the places where we step backwards in the
25 input stream, to make future changes to support multi-byte
26 character sets fairly straight-forward.
Zack Weinberg041c3192000-07-04 01:58:21 +000027
Neil Booth93c803682000-10-28 17:59:06 +000028 There is now only one routine where we do step backwards:
29 skip_escaped_newlines. This routine could probably also be changed
30 so that it doesn't need to step back. One possibility is to use a
31 trick similar to that used in lex_period and lex_percent. Two
32 extra characters might be needed, but skip_escaped_newlines itself
33 would probably be the only place that needs to be aware of that,
34 and changes to the remaining routines would probably only be needed
35 if they process a backslash. */
Zack Weinberg041c3192000-07-04 01:58:21 +000036
Zack Weinberg45b966d2000-03-13 22:01:08 +000037#include "config.h"
38#include "system.h"
Zack Weinberg45b966d2000-03-13 22:01:08 +000039#include "cpplib.h"
40#include "cpphash.h"
Zack Weinberg041c3192000-07-04 01:58:21 +000041#include "symcat.h"
Zack Weinberg45b966d2000-03-13 22:01:08 +000042
Neil Booth93c803682000-10-28 17:59:06 +000043/* Tokens with SPELL_STRING store their spelling in the token list,
44 and it's length in the token->val.name.len. */
45enum spell_type
Zack Weinbergf9a0e962000-07-13 02:32:41 +000046{
Neil Booth93c803682000-10-28 17:59:06 +000047 SPELL_OPERATOR = 0,
48 SPELL_CHAR,
49 SPELL_IDENT,
50 SPELL_STRING,
51 SPELL_NONE
Zack Weinbergf9a0e962000-07-13 02:32:41 +000052};
53
Neil Booth93c803682000-10-28 17:59:06 +000054struct token_spelling
Zack Weinbergf9a0e962000-07-13 02:32:41 +000055{
Neil Booth93c803682000-10-28 17:59:06 +000056 enum spell_type category;
57 const unsigned char *name;
Zack Weinbergf9a0e962000-07-13 02:32:41 +000058};
59
Neil Booth93c803682000-10-28 17:59:06 +000060const unsigned char *digraph_spellings [] = {U"%:", U"%:%:", U"<:",
61 U":>", U"<%", U"%>"};
62
63#define OP(e, s) { SPELL_OPERATOR, U s },
64#define TK(e, s) { s, U STRINGX (e) },
65const struct token_spelling token_spellings [N_TTYPES] = {TTYPE_TABLE };
66#undef OP
67#undef TK
68
69#define TOKEN_SPELL(token) (token_spellings[(token)->type].category)
70#define TOKEN_NAME(token) (token_spellings[(token)->type].name)
Zack Weinbergf2d5f0c2000-04-14 23:29:45 +000071
Neil Booth0d9f2342000-09-18 18:43:05 +000072static cppchar_t handle_newline PARAMS ((cpp_buffer *, cppchar_t));
73static cppchar_t skip_escaped_newlines PARAMS ((cpp_buffer *, cppchar_t));
74static cppchar_t get_effective_char PARAMS ((cpp_buffer *));
75
Zack Weinberg041c3192000-07-04 01:58:21 +000076static int skip_block_comment PARAMS ((cpp_reader *));
Neil Boothcbcff6d2000-09-23 21:41:41 +000077static int skip_line_comment PARAMS ((cpp_reader *));
Neil Booth0d9f2342000-09-18 18:43:05 +000078static void adjust_column PARAMS ((cpp_reader *));
79static void skip_whitespace PARAMS ((cpp_reader *, cppchar_t));
80static cpp_hashnode *parse_identifier PARAMS ((cpp_reader *, cppchar_t));
Neil Booth93c803682000-10-28 17:59:06 +000081static void parse_number PARAMS ((cpp_reader *, cpp_string *, cppchar_t, int));
82static int unescaped_terminator_p PARAMS ((cpp_reader *, const U_CHAR *));
Neil Booth0d9f2342000-09-18 18:43:05 +000083static void parse_string PARAMS ((cpp_reader *, cpp_token *, cppchar_t));
Neil Booth93c803682000-10-28 17:59:06 +000084static void unterminated PARAMS ((cpp_reader *, int));
Neil Booth0d9f2342000-09-18 18:43:05 +000085static int trigraph_ok PARAMS ((cpp_reader *, cppchar_t));
86static void save_comment PARAMS ((cpp_reader *, cpp_token *, const U_CHAR *));
Neil Boothcbcff6d2000-09-23 21:41:41 +000087static void lex_percent PARAMS ((cpp_buffer *, cpp_token *));
88static void lex_dot PARAMS ((cpp_reader *, cpp_token *));
Neil Booth93c803682000-10-28 17:59:06 +000089static int name_p PARAMS ((cpp_reader *, const cpp_string *));
Neil Booth0d9f2342000-09-18 18:43:05 +000090
Neil Booth93c803682000-10-28 17:59:06 +000091static cpp_chunk *new_chunk PARAMS ((unsigned int));
92static int chunk_suitable PARAMS ((cpp_pool *, cpp_chunk *, unsigned int));
Zack Weinberg15dad1d2000-05-18 15:55:46 +000093
Zack Weinberg6d2c2042000-04-30 17:30:25 +000094/* Utility routine:
Zack Weinberg6d2c2042000-04-30 17:30:25 +000095
Zack Weinbergbfb9dc72000-07-08 19:00:39 +000096 Compares, the token TOKEN to the NUL-terminated string STRING.
97 TOKEN must be a CPP_NAME. Returns 1 for equal, 0 for unequal. */
Zack Weinberg6d2c2042000-04-30 17:30:25 +000098
99int
Zack Weinbergbfb9dc72000-07-08 19:00:39 +0000100cpp_ideq (token, string)
101 const cpp_token *token;
Zack Weinberg6d2c2042000-04-30 17:30:25 +0000102 const char *string;
103{
Zack Weinbergbfb9dc72000-07-08 19:00:39 +0000104 if (token->type != CPP_NAME)
Zack Weinberg6d2c2042000-04-30 17:30:25 +0000105 return 0;
Zack Weinbergbfb9dc72000-07-08 19:00:39 +0000106
Neil Booth93c803682000-10-28 17:59:06 +0000107 return !ustrcmp (token->val.node->name, (const U_CHAR *) string);
Zack Weinberg6d2c2042000-04-30 17:30:25 +0000108}
109
Neil Booth0d9f2342000-09-18 18:43:05 +0000110/* Call when meeting a newline. Returns the character after the newline
111 (or carriage-return newline combination), or EOF. */
112static cppchar_t
113handle_newline (buffer, newline_char)
114 cpp_buffer *buffer;
115 cppchar_t newline_char;
116{
117 cppchar_t next = EOF;
118
119 buffer->col_adjust = 0;
120 buffer->lineno++;
121 buffer->line_base = buffer->cur;
122
123 /* Handle CR-LF and LF-CR combinations, get the next character. */
124 if (buffer->cur < buffer->rlimit)
125 {
126 next = *buffer->cur++;
127 if (next + newline_char == '\r' + '\n')
128 {
129 buffer->line_base = buffer->cur;
130 if (buffer->cur < buffer->rlimit)
131 next = *buffer->cur++;
132 else
133 next = EOF;
134 }
135 }
136
137 buffer->read_ahead = next;
138 return next;
139}
140
141/* Subroutine of skip_escaped_newlines; called when a trigraph is
142 encountered. It warns if necessary, and returns true if the
143 trigraph should be honoured. FROM_CHAR is the third character of a
144 trigraph, and presumed to be the previous character for position
145 reporting. */
Zack Weinbergc5a04732000-04-25 19:32:36 +0000146static int
Neil Booth0d9f2342000-09-18 18:43:05 +0000147trigraph_ok (pfile, from_char)
Zack Weinbergc5a04732000-04-25 19:32:36 +0000148 cpp_reader *pfile;
Neil Booth0d9f2342000-09-18 18:43:05 +0000149 cppchar_t from_char;
Zack Weinbergc5a04732000-04-25 19:32:36 +0000150{
151 int accept = CPP_OPTION (pfile, trigraphs);
152
Neil Boothcbcff6d2000-09-23 21:41:41 +0000153 /* Don't warn about trigraphs in comments. */
154 if (CPP_OPTION (pfile, warn_trigraphs) && !pfile->state.lexing_comment)
Zack Weinbergc5a04732000-04-25 19:32:36 +0000155 {
Neil Booth0d9f2342000-09-18 18:43:05 +0000156 cpp_buffer *buffer = pfile->buffer;
Zack Weinbergc5a04732000-04-25 19:32:36 +0000157 if (accept)
Neil Booth0d9f2342000-09-18 18:43:05 +0000158 cpp_warning_with_line (pfile, buffer->lineno, CPP_BUF_COL (buffer) - 2,
Zack Weinbergc5a04732000-04-25 19:32:36 +0000159 "trigraph ??%c converted to %c",
Neil Booth0d9f2342000-09-18 18:43:05 +0000160 (int) from_char,
161 (int) _cpp_trigraph_map[from_char]);
Zack Weinbergc5a04732000-04-25 19:32:36 +0000162 else
Neil Booth0d9f2342000-09-18 18:43:05 +0000163 cpp_warning_with_line (pfile, buffer->lineno, CPP_BUF_COL (buffer) - 2,
164 "trigraph ??%c ignored", (int) from_char);
Zack Weinbergc5a04732000-04-25 19:32:36 +0000165 }
Neil Booth0d9f2342000-09-18 18:43:05 +0000166
Zack Weinbergc5a04732000-04-25 19:32:36 +0000167 return accept;
168}
169
Neil Booth0d9f2342000-09-18 18:43:05 +0000170/* Assumes local variables buffer and result. */
171#define ACCEPT_CHAR(t) \
172 do { result->type = t; buffer->read_ahead = EOF; } while (0)
Zack Weinbergc5a04732000-04-25 19:32:36 +0000173
Neil Booth0d9f2342000-09-18 18:43:05 +0000174/* When we move to multibyte character sets, add to these something
175 that saves and restores the state of the multibyte conversion
176 library. This probably involves saving and restoring a "cookie".
177 In the case of glibc it is an 8-byte structure, so is not a high
178 overhead operation. In any case, it's out of the fast path. */
179#define SAVE_STATE() do { saved_cur = buffer->cur; } while (0)
180#define RESTORE_STATE() do { buffer->cur = saved_cur; } while (0)
181
182/* Skips any escaped newlines introduced by NEXT, which is either a
183 '?' or a '\\'. Returns the next character, which will also have
184 been placed in buffer->read_ahead. */
185static cppchar_t
186skip_escaped_newlines (buffer, next)
187 cpp_buffer *buffer;
188 cppchar_t next;
Zack Weinbergc5a04732000-04-25 19:32:36 +0000189{
Neil Booth0d9f2342000-09-18 18:43:05 +0000190 cppchar_t next1;
191 const unsigned char *saved_cur;
192 int space;
Zack Weinbergc5a04732000-04-25 19:32:36 +0000193
Neil Booth0d9f2342000-09-18 18:43:05 +0000194 do
Zack Weinbergc5a04732000-04-25 19:32:36 +0000195 {
Neil Booth0d9f2342000-09-18 18:43:05 +0000196 if (buffer->cur == buffer->rlimit)
197 break;
198
199 SAVE_STATE ();
200 if (next == '?')
201 {
202 next1 = *buffer->cur++;
203 if (next1 != '?' || buffer->cur == buffer->rlimit)
204 {
205 RESTORE_STATE ();
206 break;
207 }
Zack Weinbergc5a04732000-04-25 19:32:36 +0000208
Neil Booth0d9f2342000-09-18 18:43:05 +0000209 next1 = *buffer->cur++;
210 if (!_cpp_trigraph_map[next1] || !trigraph_ok (buffer->pfile, next1))
211 {
212 RESTORE_STATE ();
213 break;
214 }
Zack Weinbergc5a04732000-04-25 19:32:36 +0000215
Neil Booth0d9f2342000-09-18 18:43:05 +0000216 /* We have a full trigraph here. */
217 next = _cpp_trigraph_map[next1];
218 if (next != '\\' || buffer->cur == buffer->rlimit)
219 break;
220 SAVE_STATE ();
221 }
Zack Weinbergc5a04732000-04-25 19:32:36 +0000222
Neil Booth0d9f2342000-09-18 18:43:05 +0000223 /* We have a backslash, and room for at least one more character. */
224 space = 0;
225 do
226 {
227 next1 = *buffer->cur++;
228 if (!is_nvspace (next1))
229 break;
230 space = 1;
231 }
232 while (buffer->cur < buffer->rlimit);
233
234 if (!is_vspace (next1))
235 {
236 RESTORE_STATE ();
237 break;
238 }
239
240 if (space)
241 cpp_warning (buffer->pfile,
242 "backslash and newline separated by space");
243
244 next = handle_newline (buffer, next1);
245 if (next == EOF)
246 cpp_pedwarn (buffer->pfile, "backslash-newline at end of file");
Zack Weinbergc5a04732000-04-25 19:32:36 +0000247 }
Neil Booth0d9f2342000-09-18 18:43:05 +0000248 while (next == '\\' || next == '?');
Zack Weinbergc5a04732000-04-25 19:32:36 +0000249
Neil Booth0d9f2342000-09-18 18:43:05 +0000250 buffer->read_ahead = next;
251 return next;
Zack Weinbergc5a04732000-04-25 19:32:36 +0000252}
253
Neil Booth0d9f2342000-09-18 18:43:05 +0000254/* Obtain the next character, after trigraph conversion and skipping
255 an arbitrary string of escaped newlines. The common case of no
256 trigraphs or escaped newlines falls through quickly. */
257static cppchar_t
258get_effective_char (buffer)
259 cpp_buffer *buffer;
Zack Weinbergc5a04732000-04-25 19:32:36 +0000260{
Neil Booth0d9f2342000-09-18 18:43:05 +0000261 cppchar_t next = EOF;
262
263 if (buffer->cur < buffer->rlimit)
264 {
265 next = *buffer->cur++;
266
267 /* '?' can introduce trigraphs (and therefore backslash); '\\'
268 can introduce escaped newlines, which we want to skip, or
269 UCNs, which, depending upon lexer state, we will handle in
270 the future. */
271 if (next == '?' || next == '\\')
272 next = skip_escaped_newlines (buffer, next);
273 }
274
275 buffer->read_ahead = next;
276 return next;
Zack Weinbergc5a04732000-04-25 19:32:36 +0000277}
278
Neil Booth0d9f2342000-09-18 18:43:05 +0000279/* Skip a C-style block comment. We find the end of the comment by
280 seeing if an asterisk is before every '/' we encounter. Returns
281 non-zero if comment terminated by EOF, zero otherwise. */
Zack Weinbergc5a04732000-04-25 19:32:36 +0000282static int
Zack Weinberg041c3192000-07-04 01:58:21 +0000283skip_block_comment (pfile)
Zack Weinbergc5a04732000-04-25 19:32:36 +0000284 cpp_reader *pfile;
285{
286 cpp_buffer *buffer = pfile->buffer;
Neil Boothd8090682000-09-28 05:55:22 +0000287 cppchar_t c = EOF, prevc = EOF;
Zack Weinbergc5a04732000-04-25 19:32:36 +0000288
Neil Boothcbcff6d2000-09-23 21:41:41 +0000289 pfile->state.lexing_comment = 1;
Neil Booth0d9f2342000-09-18 18:43:05 +0000290 while (buffer->cur != buffer->rlimit)
291 {
292 prevc = c, c = *buffer->cur++;
293
294 next_char:
295 /* FIXME: For speed, create a new character class of characters
Neil Booth93c803682000-10-28 17:59:06 +0000296 of interest inside block comments. */
Neil Booth0d9f2342000-09-18 18:43:05 +0000297 if (c == '?' || c == '\\')
298 c = skip_escaped_newlines (buffer, c);
299
300 /* People like decorating comments with '*', so check for '/'
301 instead for efficiency. */
Zack Weinbergc5a04732000-04-25 19:32:36 +0000302 if (c == '/')
303 {
Neil Booth0d9f2342000-09-18 18:43:05 +0000304 if (prevc == '*')
305 break;
Zack Weinbergc5a04732000-04-25 19:32:36 +0000306
Neil Booth0d9f2342000-09-18 18:43:05 +0000307 /* Warn about potential nested comments, but not if the '/'
308 comes immediately before the true comment delimeter.
Zack Weinbergc5a04732000-04-25 19:32:36 +0000309 Don't bother to get it right across escaped newlines. */
Neil Booth0d9f2342000-09-18 18:43:05 +0000310 if (CPP_OPTION (pfile, warn_comments)
311 && buffer->cur != buffer->rlimit)
Zack Weinbergc5a04732000-04-25 19:32:36 +0000312 {
Neil Booth0d9f2342000-09-18 18:43:05 +0000313 prevc = c, c = *buffer->cur++;
314 if (c == '*' && buffer->cur != buffer->rlimit)
315 {
316 prevc = c, c = *buffer->cur++;
317 if (c != '/')
318 cpp_warning_with_line (pfile, CPP_BUF_LINE (buffer),
319 CPP_BUF_COL (buffer),
320 "\"/*\" within comment");
321 }
322 goto next_char;
Zack Weinbergc5a04732000-04-25 19:32:36 +0000323 }
324 }
Neil Booth91fcd152000-07-09 09:19:44 +0000325 else if (is_vspace (c))
Zack Weinbergc5a04732000-04-25 19:32:36 +0000326 {
Neil Booth0d9f2342000-09-18 18:43:05 +0000327 prevc = c, c = handle_newline (buffer, c);
328 goto next_char;
Zack Weinbergc5a04732000-04-25 19:32:36 +0000329 }
Neil Booth52fadca2000-07-13 22:55:04 +0000330 else if (c == '\t')
Neil Booth0d9f2342000-09-18 18:43:05 +0000331 adjust_column (pfile);
Zack Weinbergc5a04732000-04-25 19:32:36 +0000332 }
Zack Weinbergc5a04732000-04-25 19:32:36 +0000333
Neil Boothcbcff6d2000-09-23 21:41:41 +0000334 pfile->state.lexing_comment = 0;
Neil Booth0d9f2342000-09-18 18:43:05 +0000335 buffer->read_ahead = EOF;
336 return c != '/' || prevc != '*';
Zack Weinbergc5a04732000-04-25 19:32:36 +0000337}
338
Zack Weinbergf9a0e962000-07-13 02:32:41 +0000339/* Skip a C++ line comment. Handles escaped newlines. Returns
Neil Booth0d9f2342000-09-18 18:43:05 +0000340 non-zero if a multiline comment. The following new line, if any,
341 is left in buffer->read_ahead. */
Zack Weinbergc5a04732000-04-25 19:32:36 +0000342static int
Neil Boothcbcff6d2000-09-23 21:41:41 +0000343skip_line_comment (pfile)
344 cpp_reader *pfile;
Neil Booth0d9f2342000-09-18 18:43:05 +0000345{
Neil Boothcbcff6d2000-09-23 21:41:41 +0000346 cpp_buffer *buffer = pfile->buffer;
Neil Booth0d9f2342000-09-18 18:43:05 +0000347 unsigned int orig_lineno = buffer->lineno;
348 cppchar_t c;
349
Neil Boothcbcff6d2000-09-23 21:41:41 +0000350 pfile->state.lexing_comment = 1;
Neil Booth0d9f2342000-09-18 18:43:05 +0000351 do
352 {
353 c = EOF;
354 if (buffer->cur == buffer->rlimit)
355 break;
356
357 c = *buffer->cur++;
358 if (c == '?' || c == '\\')
359 c = skip_escaped_newlines (buffer, c);
360 }
361 while (!is_vspace (c));
362
Neil Boothcbcff6d2000-09-23 21:41:41 +0000363 pfile->state.lexing_comment = 0;
Neil Booth0d9f2342000-09-18 18:43:05 +0000364 buffer->read_ahead = c; /* Leave any newline for caller. */
365 return orig_lineno != buffer->lineno;
366}
367
368/* pfile->buffer->cur is one beyond the \t character. Update
369 col_adjust so we track the column correctly. */
370static void
371adjust_column (pfile)
Zack Weinbergc5a04732000-04-25 19:32:36 +0000372 cpp_reader *pfile;
373{
374 cpp_buffer *buffer = pfile->buffer;
Neil Booth0d9f2342000-09-18 18:43:05 +0000375 unsigned int col = CPP_BUF_COL (buffer) - 1; /* Zero-based column. */
Neil Booth52fadca2000-07-13 22:55:04 +0000376
377 /* Round it up to multiple of the tabstop, but subtract 1 since the
378 tab itself occupies a character position. */
Neil Booth0d9f2342000-09-18 18:43:05 +0000379 buffer->col_adjust += (CPP_OPTION (pfile, tabstop)
380 - col % CPP_OPTION (pfile, tabstop)) - 1;
Neil Booth52fadca2000-07-13 22:55:04 +0000381}
382
Neil Booth0d9f2342000-09-18 18:43:05 +0000383/* Skips whitespace, saving the next non-whitespace character.
384 Adjusts pfile->col_adjust to account for tabs. Without this,
385 tokens might be assigned an incorrect column. */
Zack Weinbergc5a04732000-04-25 19:32:36 +0000386static void
Neil Booth0d9f2342000-09-18 18:43:05 +0000387skip_whitespace (pfile, c)
Zack Weinbergc5a04732000-04-25 19:32:36 +0000388 cpp_reader *pfile;
Neil Booth0d9f2342000-09-18 18:43:05 +0000389 cppchar_t c;
Zack Weinbergc5a04732000-04-25 19:32:36 +0000390{
391 cpp_buffer *buffer = pfile->buffer;
Neil Booth0d9f2342000-09-18 18:43:05 +0000392 unsigned int warned = 0;
Zack Weinbergc5a04732000-04-25 19:32:36 +0000393
Neil Booth0d9f2342000-09-18 18:43:05 +0000394 do
Zack Weinbergc5a04732000-04-25 19:32:36 +0000395 {
Neil Booth91fcd152000-07-09 09:19:44 +0000396 /* Horizontal space always OK. */
397 if (c == ' ')
Neil Booth0d9f2342000-09-18 18:43:05 +0000398 ;
Neil Booth91fcd152000-07-09 09:19:44 +0000399 else if (c == '\t')
Neil Booth0d9f2342000-09-18 18:43:05 +0000400 adjust_column (pfile);
401 /* Just \f \v or \0 left. */
Neil Booth91fcd152000-07-09 09:19:44 +0000402 else if (c == '\0')
403 {
404 if (!warned)
Neil Booth0d9f2342000-09-18 18:43:05 +0000405 {
406 cpp_warning (pfile, "null character(s) ignored");
407 warned = 1;
408 }
Neil Booth91fcd152000-07-09 09:19:44 +0000409 }
Neil Booth93c803682000-10-28 17:59:06 +0000410 else if (pfile->state.in_directive && CPP_PEDANTIC (pfile))
Neil Booth91fcd152000-07-09 09:19:44 +0000411 cpp_pedwarn_with_line (pfile, CPP_BUF_LINE (buffer),
412 CPP_BUF_COL (buffer),
413 "%s in preprocessing directive",
414 c == '\f' ? "form feed" : "vertical tab");
Zack Weinbergc5a04732000-04-25 19:32:36 +0000415
Neil Booth0d9f2342000-09-18 18:43:05 +0000416 c = EOF;
417 if (buffer->cur == buffer->rlimit)
Zack Weinbergbfb9dc72000-07-08 19:00:39 +0000418 break;
Neil Booth0d9f2342000-09-18 18:43:05 +0000419 c = *buffer->cur++;
Zack Weinbergc5a04732000-04-25 19:32:36 +0000420 }
Neil Booth0d9f2342000-09-18 18:43:05 +0000421 /* We only want non-vertical space, i.e. ' ' \t \f \v \0. */
422 while (is_nvspace (c));
Zack Weinbergc5a04732000-04-25 19:32:36 +0000423
Neil Booth0d9f2342000-09-18 18:43:05 +0000424 /* Remember the next character. */
425 buffer->read_ahead = c;
Zack Weinbergc5a04732000-04-25 19:32:36 +0000426}
427
Neil Booth93c803682000-10-28 17:59:06 +0000428/* See if the characters of a number token are valid in a name (no
429 '.', '+' or '-'). */
430static int
431name_p (pfile, string)
432 cpp_reader *pfile;
433 const cpp_string *string;
434{
435 unsigned int i;
436
437 for (i = 0; i < string->len; i++)
438 if (!is_idchar (string->text[i]))
439 return 0;
440
441 return 1;
442}
443
Neil Booth0d9f2342000-09-18 18:43:05 +0000444/* Parse an identifier, skipping embedded backslash-newlines.
445 Calculate the hash value of the token while parsing, for improved
446 performance. The hashing algorithm *must* match cpp_lookup(). */
447
448static cpp_hashnode *
449parse_identifier (pfile, c)
Zack Weinbergc5a04732000-04-25 19:32:36 +0000450 cpp_reader *pfile;
Neil Booth0d9f2342000-09-18 18:43:05 +0000451 cppchar_t c;
Zack Weinbergc5a04732000-04-25 19:32:36 +0000452{
Neil Booth93c803682000-10-28 17:59:06 +0000453 cpp_hashnode *result;
Zack Weinbergc5a04732000-04-25 19:32:36 +0000454 cpp_buffer *buffer = pfile->buffer;
Neil Booth93c803682000-10-28 17:59:06 +0000455 unsigned char *dest, *limit;
Neil Booth0d9f2342000-09-18 18:43:05 +0000456 unsigned int r = 0, saw_dollar = 0;
Neil Booth93c803682000-10-28 17:59:06 +0000457
458 dest = POOL_FRONT (&pfile->ident_pool);
459 limit = POOL_LIMIT (&pfile->ident_pool);
Zack Weinbergc5a04732000-04-25 19:32:36 +0000460
Neil Booth0d9f2342000-09-18 18:43:05 +0000461 do
Zack Weinbergc5a04732000-04-25 19:32:36 +0000462 {
Neil Booth0d9f2342000-09-18 18:43:05 +0000463 do
464 {
Neil Booth93c803682000-10-28 17:59:06 +0000465 /* Need room for terminating null. */
466 if (dest + 1 >= limit)
467 limit = _cpp_next_chunk (&pfile->ident_pool, 0, &dest);
468
469 *dest++ = c;
Neil Booth0d9f2342000-09-18 18:43:05 +0000470 r = HASHSTEP (r, c);
Zack Weinbergc5a04732000-04-25 19:32:36 +0000471
Neil Booth0d9f2342000-09-18 18:43:05 +0000472 if (c == '$')
473 saw_dollar++;
Zack Weinbergc5a04732000-04-25 19:32:36 +0000474
Neil Booth0d9f2342000-09-18 18:43:05 +0000475 c = EOF;
476 if (buffer->cur == buffer->rlimit)
477 break;
478
479 c = *buffer->cur++;
480 }
481 while (is_idchar (c));
482
483 /* Potential escaped newline? */
484 if (c != '?' && c != '\\')
485 break;
486 c = skip_escaped_newlines (buffer, c);
Zack Weinbergc5a04732000-04-25 19:32:36 +0000487 }
Neil Booth0d9f2342000-09-18 18:43:05 +0000488 while (is_idchar (c));
Zack Weinbergc5a04732000-04-25 19:32:36 +0000489
Neil Booth93c803682000-10-28 17:59:06 +0000490 /* Remember the next character. */
491 buffer->read_ahead = c;
492
Neil Booth0d9f2342000-09-18 18:43:05 +0000493 /* $ is not a identifier character in the standard, but is commonly
494 accepted as an extension. Don't warn about it in skipped
495 conditional blocks. */
496 if (saw_dollar && CPP_PEDANTIC (pfile) && ! pfile->skipping)
497 cpp_pedwarn (pfile, "'$' character(s) in identifier");
498
Neil Booth93c803682000-10-28 17:59:06 +0000499 /* Identifiers are null-terminated. */
500 *dest = '\0';
501
502 /* This routine commits the memory if necessary. */
503 result = _cpp_lookup_with_hash (pfile,
504 dest - POOL_FRONT (&pfile->ident_pool), r);
505
506 /* Some identifiers require diagnostics when lexed. */
507 if (result->flags & NODE_DIAGNOSTIC && !pfile->skipping)
508 {
509 /* It is allowed to poison the same identifier twice. */
510 if ((result->flags & NODE_POISONED) && !pfile->state.poisoned_ok)
511 cpp_error (pfile, "attempt to use poisoned \"%s\"", result->name);
512
513 /* Constraint 6.10.3.5: __VA_ARGS__ should only appear in the
514 replacement list of a variable-arguments macro. */
515 if (result == pfile->spec_nodes.n__VA_ARGS__
516 && !pfile->state.va_args_ok)
517 cpp_pedwarn (pfile, "__VA_ARGS__ can only appear in the expansion of a C99 variable-argument macro");
518 }
519
520 return result;
Zack Weinbergc5a04732000-04-25 19:32:36 +0000521}
522
Neil Booth0d9f2342000-09-18 18:43:05 +0000523/* Parse a number, skipping embedded backslash-newlines. */
Zack Weinbergc5a04732000-04-25 19:32:36 +0000524static void
Neil Booth93c803682000-10-28 17:59:06 +0000525parse_number (pfile, number, c, leading_period)
Zack Weinbergc5a04732000-04-25 19:32:36 +0000526 cpp_reader *pfile;
Neil Booth0d9f2342000-09-18 18:43:05 +0000527 cpp_string *number;
528 cppchar_t c;
Neil Booth93c803682000-10-28 17:59:06 +0000529 int leading_period;
Zack Weinbergc5a04732000-04-25 19:32:36 +0000530{
531 cpp_buffer *buffer = pfile->buffer;
Neil Booth93c803682000-10-28 17:59:06 +0000532 cpp_pool *pool = pfile->string_pool;
533 unsigned char *dest, *limit;
Zack Weinbergc5a04732000-04-25 19:32:36 +0000534
Neil Booth93c803682000-10-28 17:59:06 +0000535 dest = POOL_FRONT (pool);
536 limit = POOL_LIMIT (pool);
Neil Boothcbcff6d2000-09-23 21:41:41 +0000537
Neil Booth93c803682000-10-28 17:59:06 +0000538 /* Place a leading period. */
539 if (leading_period)
540 {
541 if (dest >= limit)
542 limit = _cpp_next_chunk (pool, 0, &dest);
543 *dest++ = '.';
544 }
545
Neil Booth0d9f2342000-09-18 18:43:05 +0000546 do
Zack Weinbergc5a04732000-04-25 19:32:36 +0000547 {
Neil Booth0d9f2342000-09-18 18:43:05 +0000548 do
Zack Weinbergc5a04732000-04-25 19:32:36 +0000549 {
Neil Booth93c803682000-10-28 17:59:06 +0000550 /* Need room for terminating null. */
551 if (dest + 1 >= limit)
552 limit = _cpp_next_chunk (pool, 0, &dest);
553 *dest++ = c;
Zack Weinbergc5a04732000-04-25 19:32:36 +0000554
Neil Booth0d9f2342000-09-18 18:43:05 +0000555 c = EOF;
556 if (buffer->cur == buffer->rlimit)
557 break;
Zack Weinbergc5a04732000-04-25 19:32:36 +0000558
Neil Booth0d9f2342000-09-18 18:43:05 +0000559 c = *buffer->cur++;
Zack Weinbergc5a04732000-04-25 19:32:36 +0000560 }
Neil Booth93c803682000-10-28 17:59:06 +0000561 while (is_numchar (c) || c == '.' || VALID_SIGN (c, dest[-1]));
Neil Booth0d9f2342000-09-18 18:43:05 +0000562
563 /* Potential escaped newline? */
564 if (c != '?' && c != '\\')
565 break;
566 c = skip_escaped_newlines (buffer, c);
Zack Weinbergc5a04732000-04-25 19:32:36 +0000567 }
Neil Booth93c803682000-10-28 17:59:06 +0000568 while (is_numchar (c) || c == '.' || VALID_SIGN (c, dest[-1]));
Neil Boothcbcff6d2000-09-23 21:41:41 +0000569
Neil Booth0d9f2342000-09-18 18:43:05 +0000570 /* Remember the next character. */
571 buffer->read_ahead = c;
572
Neil Booth93c803682000-10-28 17:59:06 +0000573 /* Null-terminate the number. */
574 *dest = '\0';
575
576 number->text = POOL_FRONT (pool);
577 number->len = dest - number->text;
578 POOL_COMMIT (pool, number->len + 1);
Neil Booth0d9f2342000-09-18 18:43:05 +0000579}
580
581/* Subroutine of parse_string. Emits error for unterminated strings. */
582static void
Neil Booth93c803682000-10-28 17:59:06 +0000583unterminated (pfile, term)
Neil Booth0d9f2342000-09-18 18:43:05 +0000584 cpp_reader *pfile;
Neil Booth0d9f2342000-09-18 18:43:05 +0000585 int term;
586{
587 cpp_error (pfile, "missing terminating %c character", term);
588
Neil Booth93c803682000-10-28 17:59:06 +0000589 if (term == '\"' && pfile->mlstring_pos.line
590 && pfile->mlstring_pos.line != pfile->lexer_pos.line)
Zack Weinbergc5a04732000-04-25 19:32:36 +0000591 {
Neil Booth93c803682000-10-28 17:59:06 +0000592 cpp_error_with_line (pfile, pfile->mlstring_pos.line,
593 pfile->mlstring_pos.col,
Zack Weinbergc5a04732000-04-25 19:32:36 +0000594 "possible start of unterminated string literal");
Neil Booth93c803682000-10-28 17:59:06 +0000595 pfile->mlstring_pos.line = 0;
Zack Weinbergc5a04732000-04-25 19:32:36 +0000596 }
Zack Weinbergc5a04732000-04-25 19:32:36 +0000597}
598
Neil Booth93c803682000-10-28 17:59:06 +0000599/* Subroutine of parse_string. */
600static int
601unescaped_terminator_p (pfile, dest)
602 cpp_reader *pfile;
603 const unsigned char *dest;
604{
605 const unsigned char *start, *temp;
606
607 /* In #include-style directives, terminators are not escapeable. */
608 if (pfile->state.angled_headers)
609 return 1;
610
611 start = POOL_FRONT (pfile->string_pool);
612
613 /* An odd number of consecutive backslashes represents an escaped
614 terminator. */
615 for (temp = dest; temp > start && temp[-1] == '\\'; temp--)
616 ;
617
618 return ((dest - temp) & 1) == 0;
619}
620
Neil Booth0d9f2342000-09-18 18:43:05 +0000621/* Parses a string, character constant, or angle-bracketed header file
622 name. Handles embedded trigraphs and escaped newlines.
Neil Booth5d7ee2f2000-05-10 09:39:18 +0000623
Neil Booth0d9f2342000-09-18 18:43:05 +0000624 Multi-line strings are allowed, but they are deprecated within
625 directives. */
Zack Weinbergc5a04732000-04-25 19:32:36 +0000626static void
Neil Booth0d9f2342000-09-18 18:43:05 +0000627parse_string (pfile, token, terminator)
628 cpp_reader *pfile;
629 cpp_token *token;
630 cppchar_t terminator;
631{
632 cpp_buffer *buffer = pfile->buffer;
Neil Booth93c803682000-10-28 17:59:06 +0000633 cpp_pool *pool = pfile->string_pool;
634 unsigned char *dest, *limit;
Neil Booth0d9f2342000-09-18 18:43:05 +0000635 cppchar_t c;
636 unsigned int nulls = 0;
637
Neil Booth93c803682000-10-28 17:59:06 +0000638 dest = POOL_FRONT (pool);
639 limit = POOL_LIMIT (pool);
640
Neil Booth0d9f2342000-09-18 18:43:05 +0000641 for (;;)
642 {
643 if (buffer->cur == buffer->rlimit)
644 {
645 c = EOF;
Neil Booth93c803682000-10-28 17:59:06 +0000646 unterminated (pfile, terminator);
Neil Booth0d9f2342000-09-18 18:43:05 +0000647 break;
648 }
649 c = *buffer->cur++;
650
651 have_char:
652 /* Handle trigraphs, escaped newlines etc. */
653 if (c == '?' || c == '\\')
654 c = skip_escaped_newlines (buffer, c);
655
Neil Booth93c803682000-10-28 17:59:06 +0000656 if (c == terminator && unescaped_terminator_p (pfile, dest))
Neil Booth0d9f2342000-09-18 18:43:05 +0000657 {
Neil Booth93c803682000-10-28 17:59:06 +0000658 c = EOF;
659 break;
Neil Booth0d9f2342000-09-18 18:43:05 +0000660 }
661 else if (is_vspace (c))
662 {
663 /* In assembly language, silently terminate string and
664 character literals at end of line. This is a kludge
665 around not knowing where comments are. */
666 if (CPP_OPTION (pfile, lang_asm) && terminator != '>')
667 break;
668
669 /* Character constants and header names may not extend over
670 multiple lines. In Standard C, neither may strings.
671 Unfortunately, we accept multiline strings as an
Neil Booth93c803682000-10-28 17:59:06 +0000672 extension. */
Neil Booth0d9f2342000-09-18 18:43:05 +0000673 if (terminator != '"')
674 {
Neil Booth93c803682000-10-28 17:59:06 +0000675 unterminated (pfile, terminator);
Neil Booth0d9f2342000-09-18 18:43:05 +0000676 break;
677 }
678
Neil Booth93c803682000-10-28 17:59:06 +0000679 if (pfile->mlstring_pos.line == 0)
Neil Booth0d9f2342000-09-18 18:43:05 +0000680 {
Neil Booth93c803682000-10-28 17:59:06 +0000681 pfile->mlstring_pos = pfile->lexer_pos;
Neil Booth0d9f2342000-09-18 18:43:05 +0000682 if (CPP_PEDANTIC (pfile))
683 cpp_pedwarn (pfile, "multi-line string constant");
684 }
685
686 handle_newline (buffer, c); /* Stores to read_ahead. */
687 c = '\n';
688 }
689 else if (c == '\0')
690 {
691 if (nulls++ == 0)
692 cpp_warning (pfile, "null character(s) preserved in literal");
693 }
694
Neil Booth93c803682000-10-28 17:59:06 +0000695 /* No terminating null for strings - they could contain nulls. */
696 if (dest >= limit)
697 limit = _cpp_next_chunk (pool, 0, &dest);
698 *dest++ = c;
Neil Booth0d9f2342000-09-18 18:43:05 +0000699
Neil Booth0d9f2342000-09-18 18:43:05 +0000700 /* If we had a new line, the next character is in read_ahead. */
701 if (c != '\n')
702 continue;
703 c = buffer->read_ahead;
704 if (c != EOF)
705 goto have_char;
706 }
707
Neil Booth93c803682000-10-28 17:59:06 +0000708 /* Remember the next character. */
Neil Booth0d9f2342000-09-18 18:43:05 +0000709 buffer->read_ahead = c;
710
Neil Booth93c803682000-10-28 17:59:06 +0000711 token->val.str.text = POOL_FRONT (pool);
712 token->val.str.len = dest - token->val.str.text;
713 POOL_COMMIT (pool, token->val.str.len);
Neil Booth0d9f2342000-09-18 18:43:05 +0000714}
715
Neil Booth93c803682000-10-28 17:59:06 +0000716/* The stored comment includes the comment start and any terminator. */
Neil Booth0d9f2342000-09-18 18:43:05 +0000717static void
718save_comment (pfile, token, from)
719 cpp_reader *pfile;
Neil Boothad265aa2000-05-27 23:27:36 +0000720 cpp_token *token;
Zack Weinbergc5a04732000-04-25 19:32:36 +0000721 const unsigned char *from;
Zack Weinbergc5a04732000-04-25 19:32:36 +0000722{
Neil Booth5d7ee2f2000-05-10 09:39:18 +0000723 unsigned char *buffer;
Neil Booth0d9f2342000-09-18 18:43:05 +0000724 unsigned int len;
Neil Booth5d7ee2f2000-05-10 09:39:18 +0000725
Neil Booth1c6d33e2000-09-25 22:39:51 +0000726 len = pfile->buffer->cur - from + 1; /* + 1 for the initial '/'. */
Neil Booth35422032000-10-29 09:56:00 +0000727 /* C++ comments probably (not definitely) have moved past a new
728 line, which we don't want to save in the comment. */
729 if (pfile->buffer->read_ahead != EOF)
730 len--;
Neil Booth93c803682000-10-28 17:59:06 +0000731 buffer = _cpp_pool_alloc (pfile->string_pool, len);
Neil Booth0d9f2342000-09-18 18:43:05 +0000732
733 token->type = CPP_COMMENT;
734 token->val.str.len = len;
735 token->val.str.text = buffer;
Neil Boothd1d9a6b2000-05-27 23:19:56 +0000736
Neil Booth1c6d33e2000-09-25 22:39:51 +0000737 buffer[0] = '/';
738 memcpy (buffer + 1, from, len - 1);
Neil Booth0d9f2342000-09-18 18:43:05 +0000739}
740
Neil Boothcbcff6d2000-09-23 21:41:41 +0000741/* Subroutine of lex_token to handle '%'. A little tricky, since we
742 want to avoid stepping back when lexing %:%X. */
Neil Booth0d9f2342000-09-18 18:43:05 +0000743static void
Neil Boothcbcff6d2000-09-23 21:41:41 +0000744lex_percent (buffer, result)
Neil Booth0d9f2342000-09-18 18:43:05 +0000745 cpp_buffer *buffer;
746 cpp_token *result;
Neil Booth0d9f2342000-09-18 18:43:05 +0000747{
Neil Boothcbcff6d2000-09-23 21:41:41 +0000748 cppchar_t c;
Neil Booth0d9f2342000-09-18 18:43:05 +0000749
Neil Boothcbcff6d2000-09-23 21:41:41 +0000750 result->type = CPP_MOD;
751 /* Parsing %:%X could leave an extra character. */
752 if (buffer->extra_char == EOF)
753 c = get_effective_char (buffer);
Neil Booth5d7ee2f2000-05-10 09:39:18 +0000754 else
755 {
Neil Boothcbcff6d2000-09-23 21:41:41 +0000756 c = buffer->read_ahead = buffer->extra_char;
757 buffer->extra_char = EOF;
758 }
759
760 if (c == '=')
761 ACCEPT_CHAR (CPP_MOD_EQ);
762 else if (CPP_OPTION (buffer->pfile, digraphs))
763 {
764 if (c == ':')
765 {
766 result->flags |= DIGRAPH;
767 ACCEPT_CHAR (CPP_HASH);
768 if (get_effective_char (buffer) == '%')
769 {
770 buffer->extra_char = get_effective_char (buffer);
771 if (buffer->extra_char == ':')
772 {
773 buffer->extra_char = EOF;
774 ACCEPT_CHAR (CPP_PASTE);
775 }
776 else
777 /* We'll catch the extra_char when we're called back. */
778 buffer->read_ahead = '%';
779 }
780 }
781 else if (c == '>')
782 {
783 result->flags |= DIGRAPH;
784 ACCEPT_CHAR (CPP_CLOSE_BRACE);
785 }
786 }
787}
788
789/* Subroutine of lex_token to handle '.'. This is tricky, since we
790 want to avoid stepping back when lexing '...' or '.123'. In the
791 latter case we should also set a flag for parse_number. */
792static void
793lex_dot (pfile, result)
794 cpp_reader *pfile;
795 cpp_token *result;
796{
797 cpp_buffer *buffer = pfile->buffer;
798 cppchar_t c;
799
800 /* Parsing ..X could leave an extra character. */
801 if (buffer->extra_char == EOF)
802 c = get_effective_char (buffer);
803 else
804 {
805 c = buffer->read_ahead = buffer->extra_char;
806 buffer->extra_char = EOF;
807 }
808
809 /* All known character sets have 0...9 contiguous. */
810 if (c >= '0' && c <= '9')
811 {
812 result->type = CPP_NUMBER;
Neil Booth93c803682000-10-28 17:59:06 +0000813 parse_number (pfile, &result->val.str, c, 1);
Neil Boothcbcff6d2000-09-23 21:41:41 +0000814 }
815 else
816 {
817 result->type = CPP_DOT;
818 if (c == '.')
819 {
820 buffer->extra_char = get_effective_char (buffer);
821 if (buffer->extra_char == '.')
822 {
823 buffer->extra_char = EOF;
824 ACCEPT_CHAR (CPP_ELLIPSIS);
825 }
826 else
827 /* We'll catch the extra_char when we're called back. */
828 buffer->read_ahead = '.';
829 }
830 else if (c == '*' && CPP_OPTION (pfile, cplusplus))
831 ACCEPT_CHAR (CPP_DOT_STAR);
Neil Booth5d7ee2f2000-05-10 09:39:18 +0000832 }
Neil Booth0d9f2342000-09-18 18:43:05 +0000833}
834
Neil Booth93c803682000-10-28 17:59:06 +0000835void
836_cpp_lex_token (pfile, result)
Neil Booth0d9f2342000-09-18 18:43:05 +0000837 cpp_reader *pfile;
838 cpp_token *result;
839{
840 cppchar_t c;
841 cpp_buffer *buffer = pfile->buffer;
842 const unsigned char *comment_start;
Neil Booth93c803682000-10-28 17:59:06 +0000843 unsigned char was_skip_newlines = pfile->state.skip_newlines;
844 unsigned char newline_in_args = 0;
Neil Booth0d9f2342000-09-18 18:43:05 +0000845
Neil Booth93c803682000-10-28 17:59:06 +0000846 pfile->state.skip_newlines = 0;
Neil Booth0d9f2342000-09-18 18:43:05 +0000847 result->flags = 0;
848 next_char:
Neil Booth93c803682000-10-28 17:59:06 +0000849 pfile->lexer_pos.line = buffer->lineno;
Neil Booth0d9f2342000-09-18 18:43:05 +0000850 next_char2:
Neil Booth93c803682000-10-28 17:59:06 +0000851 pfile->lexer_pos.col = CPP_BUF_COLUMN (buffer, buffer->cur);
Neil Booth0d9f2342000-09-18 18:43:05 +0000852
853 c = buffer->read_ahead;
854 if (c == EOF && buffer->cur < buffer->rlimit)
855 {
856 c = *buffer->cur++;
Neil Booth93c803682000-10-28 17:59:06 +0000857 pfile->lexer_pos.col++;
Neil Booth0d9f2342000-09-18 18:43:05 +0000858 }
859
860 do_switch:
861 buffer->read_ahead = EOF;
862 switch (c)
863 {
864 case EOF:
Neil Booth93c803682000-10-28 17:59:06 +0000865 /* Non-empty files should end in a newline. Ignore for command
866 line - we get e.g. -A options with no trailing \n. */
867 if (pfile->lexer_pos.col != 0 && pfile->done_initializing)
868 cpp_pedwarn (pfile, "no newline at end of file");
869 pfile->state.skip_newlines = 1;
Neil Booth0d9f2342000-09-18 18:43:05 +0000870 result->type = CPP_EOF;
871 break;
872
873 case ' ': case '\t': case '\f': case '\v': case '\0':
874 skip_whitespace (pfile, c);
875 result->flags |= PREV_WHITE;
876 goto next_char2;
877
878 case '\n': case '\r':
Neil Booth93c803682000-10-28 17:59:06 +0000879 /* Don't let directives spill over to the next line. */
880 if (pfile->state.in_directive)
881 buffer->read_ahead = c;
882 else
Neil Booth0d9f2342000-09-18 18:43:05 +0000883 {
Neil Booth93c803682000-10-28 17:59:06 +0000884 handle_newline (buffer, c);
885
886 pfile->lexer_pos.output_line = buffer->lineno;
887
888 /* Skip newlines in macro arguments (except in directives). */
889 if (pfile->state.parsing_args)
890 {
891 /* Set the whitespace flag. */
892 newline_in_args = 1;
893 result->flags |= PREV_WHITE;
894 goto next_char;
895 }
896
897 if (was_skip_newlines)
898 {
899 /* Clear any whitespace flag. */
900 result->flags &= ~PREV_WHITE;
901 goto next_char;
902 }
Neil Booth0d9f2342000-09-18 18:43:05 +0000903 }
Neil Booth93c803682000-10-28 17:59:06 +0000904
905 /* Next we're at BOL, so skip new lines. */
906 pfile->state.skip_newlines = 1;
907 result->type = CPP_EOF;
Neil Booth0d9f2342000-09-18 18:43:05 +0000908 break;
909
910 case '?':
911 case '\\':
912 /* These could start an escaped newline, or '?' a trigraph. Let
913 skip_escaped_newlines do all the work. */
914 {
915 unsigned int lineno = buffer->lineno;
916
917 c = skip_escaped_newlines (buffer, c);
918 if (lineno != buffer->lineno)
919 /* We had at least one escaped newline of some sort, and the
920 next character is in buffer->read_ahead. Update the
921 token's line and column. */
922 goto next_char;
923
924 /* We are either the original '?' or '\\', or a trigraph. */
925 result->type = CPP_QUERY;
926 buffer->read_ahead = EOF;
927 if (c == '\\')
928 result->type = CPP_BACKSLASH;
929 else if (c != '?')
930 goto do_switch;
931 }
932 break;
933
Neil Booth0d9f2342000-09-18 18:43:05 +0000934 case '0': case '1': case '2': case '3': case '4':
935 case '5': case '6': case '7': case '8': case '9':
936 result->type = CPP_NUMBER;
Neil Booth93c803682000-10-28 17:59:06 +0000937 parse_number (pfile, &result->val.str, c, 0);
Neil Booth0d9f2342000-09-18 18:43:05 +0000938 break;
939
940 case '$':
941 if (!CPP_OPTION (pfile, dollars_in_ident))
942 goto random_char;
943 /* Fall through... */
944
945 case '_':
946 case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
947 case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':
948 case 'm': case 'n': case 'o': case 'p': case 'q': case 'r':
949 case 's': case 't': case 'u': case 'v': case 'w': case 'x':
950 case 'y': case 'z':
951 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
952 case 'G': case 'H': case 'I': case 'J': case 'K': case 'L':
953 case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R':
954 case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
955 case 'Y': case 'Z':
956 result->type = CPP_NAME;
957 result->val.node = parse_identifier (pfile, c);
958
959 /* 'L' may introduce wide characters or strings. */
Neil Booth93c803682000-10-28 17:59:06 +0000960 if (result->val.node == pfile->spec_nodes.n_L)
Neil Booth0d9f2342000-09-18 18:43:05 +0000961 {
962 c = buffer->read_ahead; /* For make_string. */
963 if (c == '\'' || c == '"')
964 {
965 ACCEPT_CHAR (c == '"' ? CPP_WSTRING: CPP_WCHAR);
966 goto make_string;
967 }
968 }
969 /* Convert named operators to their proper types. */
Neil Booth93c803682000-10-28 17:59:06 +0000970 else if (result->val.node->flags & NODE_OPERATOR)
Neil Booth0d9f2342000-09-18 18:43:05 +0000971 {
972 result->flags |= NAMED_OP;
Neil Booth93c803682000-10-28 17:59:06 +0000973 result->type = result->val.node->value.operator;
Neil Booth0d9f2342000-09-18 18:43:05 +0000974 }
975 break;
976
977 case '\'':
978 case '"':
979 result->type = c == '"' ? CPP_STRING: CPP_CHAR;
980 make_string:
981 parse_string (pfile, result, c);
982 break;
983
984 case '/':
Neil Booth1c6d33e2000-09-25 22:39:51 +0000985 /* A potential block or line comment. */
986 comment_start = buffer->cur;
Neil Booth0d9f2342000-09-18 18:43:05 +0000987 result->type = CPP_DIV;
988 c = get_effective_char (buffer);
989 if (c == '=')
990 ACCEPT_CHAR (CPP_DIV_EQ);
Neil Booth1c6d33e2000-09-25 22:39:51 +0000991 if (c != '/' && c != '*')
992 break;
Neil Booth0d9f2342000-09-18 18:43:05 +0000993
Neil Booth1c6d33e2000-09-25 22:39:51 +0000994 if (c == '*')
995 {
Neil Booth0d9f2342000-09-18 18:43:05 +0000996 if (skip_block_comment (pfile))
Neil Booth93c803682000-10-28 17:59:06 +0000997 cpp_error_with_line (pfile, pfile->lexer_pos.line,
998 pfile->lexer_pos.col,
Neil Booth0d9f2342000-09-18 18:43:05 +0000999 "unterminated comment");
Neil Booth0d9f2342000-09-18 18:43:05 +00001000 }
Neil Booth1c6d33e2000-09-25 22:39:51 +00001001 else
Neil Booth0d9f2342000-09-18 18:43:05 +00001002 {
Neil Booth1c6d33e2000-09-25 22:39:51 +00001003 if (!CPP_OPTION (pfile, cplusplus_comments)
1004 && !CPP_IN_SYSTEM_HEADER (pfile))
1005 break;
1006
Neil Booth0d9f2342000-09-18 18:43:05 +00001007 /* We silently allow C++ comments in system headers,
1008 irrespective of conformance mode, because lots of
1009 broken systems do that and trying to clean it up in
1010 fixincludes is a nightmare. */
Neil Bootha94c1192000-09-25 23:35:10 +00001011 if (CPP_OPTION (pfile, c89) && CPP_PEDANTIC (pfile)
1012 && ! buffer->warned_cplusplus_comments)
Neil Booth0d9f2342000-09-18 18:43:05 +00001013 {
Neil Booth1c6d33e2000-09-25 22:39:51 +00001014 cpp_pedwarn (pfile,
1015 "C++ style comments are not allowed in ISO C89");
1016 cpp_pedwarn (pfile,
1017 "(this will be reported only once per input file)");
1018 buffer->warned_cplusplus_comments = 1;
Neil Booth0d9f2342000-09-18 18:43:05 +00001019 }
Neil Booth1c6d33e2000-09-25 22:39:51 +00001020
Neil Bootha94c1192000-09-25 23:35:10 +00001021 /* Skip_line_comment updates buffer->read_ahead. */
Neil Booth1c6d33e2000-09-25 22:39:51 +00001022 if (skip_line_comment (pfile))
Neil Booth93c803682000-10-28 17:59:06 +00001023 cpp_warning_with_line (pfile, pfile->lexer_pos.line,
1024 pfile->lexer_pos.col,
Neil Booth1c6d33e2000-09-25 22:39:51 +00001025 "multi-line comment");
Neil Booth0d9f2342000-09-18 18:43:05 +00001026 }
Neil Booth1c6d33e2000-09-25 22:39:51 +00001027
1028 /* Skipping the comment has updated buffer->read_ahead. */
1029 if (!pfile->state.save_comments)
1030 {
1031 result->flags |= PREV_WHITE;
1032 goto next_char;
1033 }
1034
1035 /* Save the comment as a token in its own right. */
1036 save_comment (pfile, result, comment_start);
Neil Booth0d9f2342000-09-18 18:43:05 +00001037 break;
1038
1039 case '<':
1040 if (pfile->state.angled_headers)
1041 {
1042 result->type = CPP_HEADER_NAME;
1043 c = '>'; /* terminator. */
1044 goto make_string;
1045 }
1046
1047 result->type = CPP_LESS;
1048 c = get_effective_char (buffer);
1049 if (c == '=')
1050 ACCEPT_CHAR (CPP_LESS_EQ);
1051 else if (c == '<')
1052 {
1053 ACCEPT_CHAR (CPP_LSHIFT);
1054 if (get_effective_char (buffer) == '=')
1055 ACCEPT_CHAR (CPP_LSHIFT_EQ);
1056 }
1057 else if (c == '?' && CPP_OPTION (pfile, cplusplus))
1058 {
1059 ACCEPT_CHAR (CPP_MIN);
1060 if (get_effective_char (buffer) == '=')
1061 ACCEPT_CHAR (CPP_MIN_EQ);
1062 }
1063 else if (c == ':' && CPP_OPTION (pfile, digraphs))
1064 {
1065 ACCEPT_CHAR (CPP_OPEN_SQUARE);
1066 result->flags |= DIGRAPH;
1067 }
1068 else if (c == '%' && CPP_OPTION (pfile, digraphs))
1069 {
1070 ACCEPT_CHAR (CPP_OPEN_BRACE);
1071 result->flags |= DIGRAPH;
1072 }
1073 break;
1074
1075 case '>':
1076 result->type = CPP_GREATER;
1077 c = get_effective_char (buffer);
1078 if (c == '=')
1079 ACCEPT_CHAR (CPP_GREATER_EQ);
1080 else if (c == '>')
1081 {
1082 ACCEPT_CHAR (CPP_RSHIFT);
1083 if (get_effective_char (buffer) == '=')
1084 ACCEPT_CHAR (CPP_RSHIFT_EQ);
1085 }
1086 else if (c == '?' && CPP_OPTION (pfile, cplusplus))
1087 {
1088 ACCEPT_CHAR (CPP_MAX);
1089 if (get_effective_char (buffer) == '=')
1090 ACCEPT_CHAR (CPP_MAX_EQ);
1091 }
1092 break;
1093
Neil Boothcbcff6d2000-09-23 21:41:41 +00001094 case '%':
1095 lex_percent (buffer, result);
Neil Booth93c803682000-10-28 17:59:06 +00001096 if (result->type == CPP_HASH)
1097 goto do_hash;
Neil Booth0d9f2342000-09-18 18:43:05 +00001098 break;
1099
Neil Boothcbcff6d2000-09-23 21:41:41 +00001100 case '.':
1101 lex_dot (pfile, result);
Neil Booth0d9f2342000-09-18 18:43:05 +00001102 break;
1103
1104 case '+':
1105 result->type = CPP_PLUS;
1106 c = get_effective_char (buffer);
1107 if (c == '=')
1108 ACCEPT_CHAR (CPP_PLUS_EQ);
1109 else if (c == '+')
1110 ACCEPT_CHAR (CPP_PLUS_PLUS);
1111 break;
1112
1113 case '-':
1114 result->type = CPP_MINUS;
1115 c = get_effective_char (buffer);
1116 if (c == '>')
1117 {
1118 ACCEPT_CHAR (CPP_DEREF);
1119 if (CPP_OPTION (pfile, cplusplus)
1120 && get_effective_char (buffer) == '*')
1121 ACCEPT_CHAR (CPP_DEREF_STAR);
1122 }
1123 else if (c == '=')
1124 ACCEPT_CHAR (CPP_MINUS_EQ);
1125 else if (c == '-')
1126 ACCEPT_CHAR (CPP_MINUS_MINUS);
1127 break;
1128
1129 case '*':
1130 result->type = CPP_MULT;
1131 if (get_effective_char (buffer) == '=')
1132 ACCEPT_CHAR (CPP_MULT_EQ);
1133 break;
1134
1135 case '=':
1136 result->type = CPP_EQ;
1137 if (get_effective_char (buffer) == '=')
1138 ACCEPT_CHAR (CPP_EQ_EQ);
1139 break;
1140
1141 case '!':
1142 result->type = CPP_NOT;
1143 if (get_effective_char (buffer) == '=')
1144 ACCEPT_CHAR (CPP_NOT_EQ);
1145 break;
1146
1147 case '&':
1148 result->type = CPP_AND;
1149 c = get_effective_char (buffer);
1150 if (c == '=')
1151 ACCEPT_CHAR (CPP_AND_EQ);
1152 else if (c == '&')
1153 ACCEPT_CHAR (CPP_AND_AND);
1154 break;
1155
1156 case '#':
Neil Booth0d9f2342000-09-18 18:43:05 +00001157 if (get_effective_char (buffer) == '#')
1158 ACCEPT_CHAR (CPP_PASTE);
Neil Booth93c803682000-10-28 17:59:06 +00001159 else
1160 {
1161 result->type = CPP_HASH;
1162 do_hash:
1163 /* CPP_DHASH is the hash introducing a directive. */
1164 if (was_skip_newlines || newline_in_args)
1165 {
1166 result->type = CPP_DHASH;
1167 /* Get whitespace right - newline_in_args sets it. */
1168 if (pfile->lexer_pos.col == 1)
1169 result->flags &= ~PREV_WHITE;
1170 }
1171 }
Neil Booth0d9f2342000-09-18 18:43:05 +00001172 break;
1173
1174 case '|':
1175 result->type = CPP_OR;
1176 c = get_effective_char (buffer);
1177 if (c == '=')
1178 ACCEPT_CHAR (CPP_OR_EQ);
1179 else if (c == '|')
1180 ACCEPT_CHAR (CPP_OR_OR);
1181 break;
1182
1183 case '^':
1184 result->type = CPP_XOR;
1185 if (get_effective_char (buffer) == '=')
1186 ACCEPT_CHAR (CPP_XOR_EQ);
1187 break;
1188
1189 case ':':
1190 result->type = CPP_COLON;
1191 c = get_effective_char (buffer);
1192 if (c == ':' && CPP_OPTION (pfile, cplusplus))
1193 ACCEPT_CHAR (CPP_SCOPE);
1194 else if (c == '>' && CPP_OPTION (pfile, digraphs))
1195 {
1196 result->flags |= DIGRAPH;
1197 ACCEPT_CHAR (CPP_CLOSE_SQUARE);
1198 }
1199 break;
1200
1201 case '~': result->type = CPP_COMPL; break;
1202 case ',': result->type = CPP_COMMA; break;
1203 case '(': result->type = CPP_OPEN_PAREN; break;
1204 case ')': result->type = CPP_CLOSE_PAREN; break;
1205 case '[': result->type = CPP_OPEN_SQUARE; break;
1206 case ']': result->type = CPP_CLOSE_SQUARE; break;
1207 case '{': result->type = CPP_OPEN_BRACE; break;
1208 case '}': result->type = CPP_CLOSE_BRACE; break;
1209 case ';': result->type = CPP_SEMICOLON; break;
1210
1211 case '@':
1212 if (CPP_OPTION (pfile, objc))
1213 {
1214 /* In Objective C, '@' may begin keywords or strings, like
1215 @keyword or @"string". It would be nice to call
1216 get_effective_char here and test the result. However, we
1217 would then need to pass 2 characters to parse_identifier,
1218 making it ugly and slowing down its main loop. Instead,
1219 we assume we have an identifier, and recover if not. */
1220 result->type = CPP_NAME;
1221 result->val.node = parse_identifier (pfile, c);
1222 if (result->val.node->length != 1)
1223 break;
1224
1225 /* OK, so it wasn't an identifier. Maybe a string? */
1226 if (buffer->read_ahead == '"')
1227 {
1228 c = '"';
1229 ACCEPT_CHAR (CPP_OSTRING);
1230 goto make_string;
1231 }
1232 }
1233 goto random_char;
1234
1235 random_char:
1236 default:
1237 result->type = CPP_OTHER;
1238 result->val.aux = c;
1239 break;
1240 }
Zack Weinbergc5a04732000-04-25 19:32:36 +00001241}
1242
Neil Booth93c803682000-10-28 17:59:06 +00001243/* An upper bound on the number of bytes needed to spell a token,
1244 including preceding whitespace. */
1245unsigned int
1246cpp_token_len (token)
1247 const cpp_token *token;
Zack Weinbergc5a04732000-04-25 19:32:36 +00001248{
Neil Booth93c803682000-10-28 17:59:06 +00001249 unsigned int len;
Zack Weinbergc5a04732000-04-25 19:32:36 +00001250
Neil Booth93c803682000-10-28 17:59:06 +00001251 switch (TOKEN_SPELL (token))
Zack Weinbergc5a04732000-04-25 19:32:36 +00001252 {
Neil Booth93c803682000-10-28 17:59:06 +00001253 default: len = 0; break;
1254 case SPELL_STRING: len = token->val.str.len; break;
1255 case SPELL_IDENT: len = token->val.node->length; break;
Zack Weinbergc5a04732000-04-25 19:32:36 +00001256 }
Neil Booth93c803682000-10-28 17:59:06 +00001257 /* 1 for whitespace, 4 for comment delimeters. */
1258 return len + 5;
Zack Weinberg041c3192000-07-04 01:58:21 +00001259}
1260
Neil Booth3fef5b22000-05-08 22:22:49 +00001261/* Write the spelling of a token TOKEN to BUFFER. The buffer must
Zack Weinbergcf00a882000-07-08 02:33:00 +00001262 already contain the enough space to hold the token's spelling.
1263 Returns a pointer to the character after the last character
1264 written. */
Neil Booth93c803682000-10-28 17:59:06 +00001265unsigned char *
1266cpp_spell_token (pfile, token, buffer)
Neil Booth3fef5b22000-05-08 22:22:49 +00001267 cpp_reader *pfile; /* Would be nice to be rid of this... */
Neil Boothd1d9a6b2000-05-27 23:19:56 +00001268 const cpp_token *token;
Neil Booth3fef5b22000-05-08 22:22:49 +00001269 unsigned char *buffer;
Neil Booth3fef5b22000-05-08 22:22:49 +00001270{
Zack Weinberg96be6992000-07-18 23:25:06 +00001271 switch (TOKEN_SPELL (token))
Neil Booth3fef5b22000-05-08 22:22:49 +00001272 {
Neil Booth5d7ee2f2000-05-10 09:39:18 +00001273 case SPELL_OPERATOR:
Neil Booth3fef5b22000-05-08 22:22:49 +00001274 {
1275 const unsigned char *spelling;
1276 unsigned char c;
1277
1278 if (token->flags & DIGRAPH)
Neil Booth93c803682000-10-28 17:59:06 +00001279 spelling = digraph_spellings[token->type - CPP_FIRST_DIGRAPH];
Zack Weinberg92936ec2000-07-19 20:18:08 +00001280 else if (token->flags & NAMED_OP)
1281 goto spell_ident;
Neil Booth3fef5b22000-05-08 22:22:49 +00001282 else
Zack Weinberg96be6992000-07-18 23:25:06 +00001283 spelling = TOKEN_NAME (token);
Neil Booth3fef5b22000-05-08 22:22:49 +00001284
1285 while ((c = *spelling++) != '\0')
1286 *buffer++ = c;
1287 }
1288 break;
1289
Neil Booth5d7ee2f2000-05-10 09:39:18 +00001290 case SPELL_IDENT:
Zack Weinberg92936ec2000-07-19 20:18:08 +00001291 spell_ident:
Zack Weinbergbfb9dc72000-07-08 19:00:39 +00001292 memcpy (buffer, token->val.node->name, token->val.node->length);
1293 buffer += token->val.node->length;
Neil Booth5d7ee2f2000-05-10 09:39:18 +00001294 break;
Neil Booth3fef5b22000-05-08 22:22:49 +00001295
Neil Booth5d7ee2f2000-05-10 09:39:18 +00001296 case SPELL_STRING:
1297 {
Zack Weinbergba89d662000-08-04 01:30:06 +00001298 int left, right, tag;
1299 switch (token->type)
1300 {
1301 case CPP_STRING: left = '"'; right = '"'; tag = '\0'; break;
1302 case CPP_WSTRING: left = '"'; right = '"'; tag = 'L'; break;
1303 case CPP_OSTRING: left = '"'; right = '"'; tag = '@'; break;
1304 case CPP_CHAR: left = '\''; right = '\''; tag = '\0'; break;
1305 case CPP_WCHAR: left = '\''; right = '\''; tag = 'L'; break;
1306 case CPP_HEADER_NAME: left = '<'; right = '>'; tag = '\0'; break;
1307 default: left = '\0'; right = '\0'; tag = '\0'; break;
1308 }
1309 if (tag) *buffer++ = tag;
1310 if (left) *buffer++ = left;
Zack Weinbergbfb9dc72000-07-08 19:00:39 +00001311 memcpy (buffer, token->val.str.text, token->val.str.len);
1312 buffer += token->val.str.len;
Zack Weinbergba89d662000-08-04 01:30:06 +00001313 if (right) *buffer++ = right;
Neil Booth3fef5b22000-05-08 22:22:49 +00001314 }
1315 break;
1316
1317 case SPELL_CHAR:
Zack Weinberg041c3192000-07-04 01:58:21 +00001318 *buffer++ = token->val.aux;
Neil Booth3fef5b22000-05-08 22:22:49 +00001319 break;
1320
1321 case SPELL_NONE:
Zack Weinberg96be6992000-07-18 23:25:06 +00001322 cpp_ice (pfile, "Unspellable token %s", TOKEN_NAME (token));
Neil Booth3fef5b22000-05-08 22:22:49 +00001323 break;
1324 }
1325
1326 return buffer;
1327}
1328
Neil Booth93c803682000-10-28 17:59:06 +00001329/* Returns a token as a null-terminated string. The string is
1330 temporary, and automatically freed later. Useful for diagnostics. */
1331unsigned char *
1332cpp_token_as_text (pfile, token)
Zack Weinberg041c3192000-07-04 01:58:21 +00001333 cpp_reader *pfile;
Zack Weinberg041c3192000-07-04 01:58:21 +00001334 const cpp_token *token;
1335{
Neil Booth93c803682000-10-28 17:59:06 +00001336 unsigned int len = cpp_token_len (token);
1337 unsigned char *start = _cpp_pool_alloc (&pfile->temp_string_pool, len), *end;
Zack Weinberg041c3192000-07-04 01:58:21 +00001338
Neil Booth93c803682000-10-28 17:59:06 +00001339 end = cpp_spell_token (pfile, token, start);
1340 end[0] = '\0';
Zack Weinberg041c3192000-07-04 01:58:21 +00001341
Neil Booth93c803682000-10-28 17:59:06 +00001342 return start;
Zack Weinberg041c3192000-07-04 01:58:21 +00001343}
1344
Neil Booth93c803682000-10-28 17:59:06 +00001345/* Used by C front ends. Should really move to using cpp_token_as_text. */
1346const char *
1347cpp_type2name (type)
1348 enum cpp_ttype type;
Zack Weinberg041c3192000-07-04 01:58:21 +00001349{
Neil Booth93c803682000-10-28 17:59:06 +00001350 return (const char *) token_spellings[type].name;
Zack Weinberg041c3192000-07-04 01:58:21 +00001351}
1352
Neil Booth93c803682000-10-28 17:59:06 +00001353/* Writes the spelling of token to FP. Separate from cpp_spell_token
1354 for efficiency - to avoid double-buffering. Also, outputs a space
1355 if PREV_WHITE is flagged. */
Zack Weinberg041c3192000-07-04 01:58:21 +00001356void
Neil Booth93c803682000-10-28 17:59:06 +00001357cpp_output_token (token, fp)
Zack Weinberg041c3192000-07-04 01:58:21 +00001358 const cpp_token *token;
Neil Booth93c803682000-10-28 17:59:06 +00001359 FILE *fp;
Zack Weinberg041c3192000-07-04 01:58:21 +00001360{
Neil Booth93c803682000-10-28 17:59:06 +00001361 if (token->flags & PREV_WHITE)
1362 putc (' ', fp);
Zack Weinberg041c3192000-07-04 01:58:21 +00001363
Neil Booth93c803682000-10-28 17:59:06 +00001364 switch (TOKEN_SPELL (token))
Zack Weinberg041c3192000-07-04 01:58:21 +00001365 {
Neil Booth93c803682000-10-28 17:59:06 +00001366 case SPELL_OPERATOR:
1367 {
1368 const unsigned char *spelling;
1369
1370 if (token->flags & DIGRAPH)
1371 spelling = digraph_spellings[token->type - CPP_FIRST_DIGRAPH];
1372 else if (token->flags & NAMED_OP)
1373 goto spell_ident;
1374 else
1375 spelling = TOKEN_NAME (token);
1376
1377 ufputs (spelling, fp);
1378 }
1379 break;
1380
1381 spell_ident:
1382 case SPELL_IDENT:
1383 ufputs (token->val.node->name, fp);
1384 break;
1385
1386 case SPELL_STRING:
1387 {
1388 int left, right, tag;
1389 switch (token->type)
1390 {
1391 case CPP_STRING: left = '"'; right = '"'; tag = '\0'; break;
1392 case CPP_WSTRING: left = '"'; right = '"'; tag = 'L'; break;
1393 case CPP_OSTRING: left = '"'; right = '"'; tag = '@'; break;
1394 case CPP_CHAR: left = '\''; right = '\''; tag = '\0'; break;
1395 case CPP_WCHAR: left = '\''; right = '\''; tag = 'L'; break;
1396 case CPP_HEADER_NAME: left = '<'; right = '>'; tag = '\0'; break;
1397 default: left = '\0'; right = '\0'; tag = '\0'; break;
1398 }
1399 if (tag) putc (tag, fp);
1400 if (left) putc (left, fp);
1401 fwrite (token->val.str.text, 1, token->val.str.len, fp);
1402 if (right) putc (right, fp);
1403 }
1404 break;
1405
1406 case SPELL_CHAR:
1407 putc (token->val.aux, fp);
1408 break;
1409
1410 case SPELL_NONE:
1411 /* An error, most probably. */
1412 break;
Zack Weinberg041c3192000-07-04 01:58:21 +00001413 }
Zack Weinberg041c3192000-07-04 01:58:21 +00001414}
1415
Neil Booth93c803682000-10-28 17:59:06 +00001416/* Compare two tokens. */
1417int
1418_cpp_equiv_tokens (a, b)
1419 const cpp_token *a, *b;
1420{
1421 if (a->type == b->type && a->flags == b->flags)
1422 switch (TOKEN_SPELL (a))
1423 {
1424 default: /* Keep compiler happy. */
1425 case SPELL_OPERATOR:
1426 return 1;
1427 case SPELL_CHAR:
1428 return a->val.aux == b->val.aux; /* Character. */
1429 case SPELL_NONE:
1430 return (a->type != CPP_MACRO_ARG || a->val.aux == b->val.aux);
1431 case SPELL_IDENT:
1432 return a->val.node == b->val.node;
1433 case SPELL_STRING:
1434 return (a->val.str.len == b->val.str.len
1435 && !memcmp (a->val.str.text, b->val.str.text,
1436 a->val.str.len));
1437 }
1438
1439 return 0;
1440}
1441
1442#if 0
1443/* Compare two token lists. */
1444int
1445_cpp_equiv_toklists (a, b)
1446 const struct toklist *a, *b;
1447{
1448 unsigned int i, count;
1449
1450 count = a->limit - a->first;
1451 if (count != (b->limit - b->first))
1452 return 0;
1453
1454 for (i = 0; i < count; i++)
1455 if (! _cpp_equiv_tokens (&a->first[i], &b->first[i]))
1456 return 0;
1457
1458 return 1;
1459}
1460#endif
1461
Zack Weinberg041c3192000-07-04 01:58:21 +00001462/* Determine whether two tokens can be pasted together, and if so,
1463 what the resulting token is. Returns CPP_EOF if the tokens cannot
1464 be pasted, or the appropriate type for the merged token if they
1465 can. */
Neil Booth7de4d002000-09-25 22:34:53 +00001466enum cpp_ttype
Neil Booth93c803682000-10-28 17:59:06 +00001467cpp_can_paste (pfile, token1, token2, digraph)
Zack Weinberg041c3192000-07-04 01:58:21 +00001468 cpp_reader * pfile;
1469 const cpp_token *token1, *token2;
1470 int* digraph;
1471{
1472 enum cpp_ttype a = token1->type, b = token2->type;
1473 int cxx = CPP_OPTION (pfile, cplusplus);
1474
Zack Weinberg92936ec2000-07-19 20:18:08 +00001475 /* Treat named operators as if they were ordinary NAMEs. */
1476 if (token1->flags & NAMED_OP)
1477 a = CPP_NAME;
1478 if (token2->flags & NAMED_OP)
1479 b = CPP_NAME;
1480
Zack Weinberg041c3192000-07-04 01:58:21 +00001481 if (a <= CPP_LAST_EQ && b == CPP_EQ)
1482 return a + (CPP_EQ_EQ - CPP_EQ);
1483
1484 switch (a)
1485 {
1486 case CPP_GREATER:
1487 if (b == a) return CPP_RSHIFT;
1488 if (b == CPP_QUERY && cxx) return CPP_MAX;
1489 if (b == CPP_GREATER_EQ) return CPP_RSHIFT_EQ;
1490 break;
1491 case CPP_LESS:
1492 if (b == a) return CPP_LSHIFT;
1493 if (b == CPP_QUERY && cxx) return CPP_MIN;
1494 if (b == CPP_LESS_EQ) return CPP_LSHIFT_EQ;
Neil Booth9b55f292000-07-11 08:58:28 +00001495 if (CPP_OPTION (pfile, digraphs))
1496 {
1497 if (b == CPP_COLON)
1498 {*digraph = 1; return CPP_OPEN_SQUARE;} /* <: digraph */
1499 if (b == CPP_MOD)
1500 {*digraph = 1; return CPP_OPEN_BRACE;} /* <% digraph */
1501 }
Zack Weinberg041c3192000-07-04 01:58:21 +00001502 break;
1503
1504 case CPP_PLUS: if (b == a) return CPP_PLUS_PLUS; break;
1505 case CPP_AND: if (b == a) return CPP_AND_AND; break;
1506 case CPP_OR: if (b == a) return CPP_OR_OR; break;
1507
1508 case CPP_MINUS:
1509 if (b == a) return CPP_MINUS_MINUS;
1510 if (b == CPP_GREATER) return CPP_DEREF;
1511 break;
1512 case CPP_COLON:
1513 if (b == a && cxx) return CPP_SCOPE;
Neil Booth9b55f292000-07-11 08:58:28 +00001514 if (b == CPP_GREATER && CPP_OPTION (pfile, digraphs))
Zack Weinberg041c3192000-07-04 01:58:21 +00001515 {*digraph = 1; return CPP_CLOSE_SQUARE;} /* :> digraph */
1516 break;
1517
1518 case CPP_MOD:
Neil Booth9b55f292000-07-11 08:58:28 +00001519 if (CPP_OPTION (pfile, digraphs))
1520 {
1521 if (b == CPP_GREATER)
1522 {*digraph = 1; return CPP_CLOSE_BRACE;} /* %> digraph */
1523 if (b == CPP_COLON)
1524 {*digraph = 1; return CPP_HASH;} /* %: digraph */
1525 }
Zack Weinberg041c3192000-07-04 01:58:21 +00001526 break;
1527 case CPP_DEREF:
1528 if (b == CPP_MULT && cxx) return CPP_DEREF_STAR;
1529 break;
1530 case CPP_DOT:
1531 if (b == CPP_MULT && cxx) return CPP_DOT_STAR;
1532 if (b == CPP_NUMBER) return CPP_NUMBER;
1533 break;
1534
1535 case CPP_HASH:
1536 if (b == a && (token1->flags & DIGRAPH) == (token2->flags & DIGRAPH))
1537 /* %:%: digraph */
1538 {*digraph = (token1->flags & DIGRAPH); return CPP_PASTE;}
1539 break;
1540
1541 case CPP_NAME:
1542 if (b == CPP_NAME) return CPP_NAME;
1543 if (b == CPP_NUMBER
Neil Booth93c803682000-10-28 17:59:06 +00001544 && name_p (pfile, &token2->val.str)) return CPP_NAME;
Zack Weinberg041c3192000-07-04 01:58:21 +00001545 if (b == CPP_CHAR
Neil Booth93c803682000-10-28 17:59:06 +00001546 && token1->val.node == pfile->spec_nodes.n_L) return CPP_WCHAR;
Zack Weinberg041c3192000-07-04 01:58:21 +00001547 if (b == CPP_STRING
Neil Booth93c803682000-10-28 17:59:06 +00001548 && token1->val.node == pfile->spec_nodes.n_L) return CPP_WSTRING;
Zack Weinberg041c3192000-07-04 01:58:21 +00001549 break;
1550
1551 case CPP_NUMBER:
1552 if (b == CPP_NUMBER) return CPP_NUMBER;
1553 if (b == CPP_NAME) return CPP_NUMBER;
1554 if (b == CPP_DOT) return CPP_NUMBER;
1555 /* Numbers cannot have length zero, so this is safe. */
1556 if ((b == CPP_PLUS || b == CPP_MINUS)
Zack Weinbergbfb9dc72000-07-08 19:00:39 +00001557 && VALID_SIGN ('+', token1->val.str.text[token1->val.str.len - 1]))
Zack Weinberg041c3192000-07-04 01:58:21 +00001558 return CPP_NUMBER;
1559 break;
1560
Zack Weinbergba89d662000-08-04 01:30:06 +00001561 case CPP_OTHER:
1562 if (CPP_OPTION (pfile, objc) && token1->val.aux == '@')
1563 {
1564 if (b == CPP_NAME) return CPP_NAME;
1565 if (b == CPP_STRING) return CPP_OSTRING;
1566 }
1567
Zack Weinberg041c3192000-07-04 01:58:21 +00001568 default:
1569 break;
1570 }
1571
1572 return CPP_EOF;
1573}
1574
Neil Booth93c803682000-10-28 17:59:06 +00001575/* Returns nonzero if a space should be inserted to avoid an
1576 accidental token paste for output. For simplicity, it is
1577 conservative, and occasionally advises a space where one is not
1578 needed, e.g. "." and ".2". */
1579
1580int
1581cpp_avoid_paste (pfile, token1, token2)
Zack Weinberg041c3192000-07-04 01:58:21 +00001582 cpp_reader *pfile;
Neil Booth93c803682000-10-28 17:59:06 +00001583 const cpp_token *token1, *token2;
Zack Weinberg041c3192000-07-04 01:58:21 +00001584{
Neil Booth93c803682000-10-28 17:59:06 +00001585 enum cpp_ttype a = token1->type, b = token2->type;
1586 cppchar_t c;
Zack Weinberg041c3192000-07-04 01:58:21 +00001587
Neil Booth93c803682000-10-28 17:59:06 +00001588 if (token1->flags & NAMED_OP)
1589 a = CPP_NAME;
1590 if (token2->flags & NAMED_OP)
1591 b = CPP_NAME;
Zack Weinberg041c3192000-07-04 01:58:21 +00001592
Neil Booth93c803682000-10-28 17:59:06 +00001593 c = EOF;
1594 if (token2->flags & DIGRAPH)
1595 c = digraph_spellings[b - CPP_FIRST_DIGRAPH][0];
1596 else if (token_spellings[b].category == SPELL_OPERATOR)
1597 c = token_spellings[b].name[0];
Zack Weinberg417f3e32000-07-11 23:20:53 +00001598
Neil Booth93c803682000-10-28 17:59:06 +00001599 /* Quickly get everything that can paste with an '='. */
1600 if (a <= CPP_LAST_EQ && c == '=')
Zack Weinberg041c3192000-07-04 01:58:21 +00001601 return 1;
1602
Neil Booth93c803682000-10-28 17:59:06 +00001603 switch (a)
1604 {
1605 case CPP_GREATER: return c == '>' || c == '?';
1606 case CPP_LESS: return c == '<' || c == '?' || c == '%' || c == ':';
1607 case CPP_PLUS: return c == '+';
1608 case CPP_MINUS: return c == '-' || c == '>';
1609 case CPP_DIV: return c == '/' || c == '*'; /* Comments. */
1610 case CPP_MOD: return c == ':' || c == '>';
1611 case CPP_AND: return c == '&';
1612 case CPP_OR: return c == '|';
1613 case CPP_COLON: return c == ':' || c == '>';
1614 case CPP_DEREF: return c == '*';
1615 case CPP_DOT: return c == '.' || c == '%';
1616 case CPP_HASH: return c == '#' || c == '%'; /* Digraph form. */
1617 case CPP_NAME: return ((b == CPP_NUMBER
1618 && name_p (pfile, &token2->val.str))
1619 || b == CPP_NAME
1620 || b == CPP_CHAR || b == CPP_STRING); /* L */
1621 case CPP_NUMBER: return (b == CPP_NUMBER || b == CPP_NAME
1622 || c == '.' || c == '+' || c == '-');
1623 case CPP_OTHER: return (CPP_OPTION (pfile, objc)
1624 && token1->val.aux == '@'
1625 && (b == CPP_NAME || b == CPP_STRING));
1626 default: break;
1627 }
Zack Weinberg041c3192000-07-04 01:58:21 +00001628
1629 return 0;
1630}
1631
Neil Booth93c803682000-10-28 17:59:06 +00001632/* Output all the remaining tokens on the current line, and a newline
1633 character, to FP. Leading whitespace is removed. */
1634void
1635cpp_output_line (pfile, fp)
Zack Weinberg041c3192000-07-04 01:58:21 +00001636 cpp_reader *pfile;
Neil Booth93c803682000-10-28 17:59:06 +00001637 FILE *fp;
Zack Weinberg041c3192000-07-04 01:58:21 +00001638{
Neil Booth93c803682000-10-28 17:59:06 +00001639 cpp_token token;
Zack Weinberg041c3192000-07-04 01:58:21 +00001640
Neil Booth93c803682000-10-28 17:59:06 +00001641 _cpp_get_token (pfile, &token);
1642 token.flags &= ~PREV_WHITE;
1643 while (token.type != CPP_EOF)
Zack Weinberg6ead1e92000-07-31 23:47:19 +00001644 {
Neil Booth93c803682000-10-28 17:59:06 +00001645 cpp_output_token (&token, fp);
1646 _cpp_get_token (pfile, &token);
Zack Weinberg6ead1e92000-07-31 23:47:19 +00001647 }
1648
Neil Booth93c803682000-10-28 17:59:06 +00001649 putc ('\n', fp);
Zack Weinberg041c3192000-07-04 01:58:21 +00001650}
1651
Neil Booth93c803682000-10-28 17:59:06 +00001652/* Memory pools. */
1653
1654struct dummy
Zack Weinberg041c3192000-07-04 01:58:21 +00001655{
Neil Booth93c803682000-10-28 17:59:06 +00001656 char c;
1657 union
1658 {
1659 double d;
1660 int *p;
1661 } u;
Zack Weinberg041c3192000-07-04 01:58:21 +00001662};
1663
Neil Booth93c803682000-10-28 17:59:06 +00001664#define DEFAULT_ALIGNMENT (offsetof (struct dummy, u))
1665
1666static int
1667chunk_suitable (pool, chunk, size)
1668 cpp_pool *pool;
1669 cpp_chunk *chunk;
1670 unsigned int size;
Zack Weinberg041c3192000-07-04 01:58:21 +00001671{
Neil Booth93c803682000-10-28 17:59:06 +00001672 /* Being at least twice SIZE means we can use memcpy in
1673 _cpp_next_chunk rather than memmove. Besides, it's a good idea
1674 anyway. */
1675 return (chunk && pool->locked != chunk
1676 && (unsigned int) (chunk->limit - chunk->base) >= size * 2);
1677}
Zack Weinberg041c3192000-07-04 01:58:21 +00001678
Neil Booth93c803682000-10-28 17:59:06 +00001679/* Returns the end of the new pool. PTR points to a char in the old
1680 pool, and is updated to point to the same char in the new pool. */
1681unsigned char *
1682_cpp_next_chunk (pool, len, ptr)
1683 cpp_pool *pool;
1684 unsigned int len;
1685 unsigned char **ptr;
1686{
1687 cpp_chunk *chunk = pool->cur->next;
1688
1689 /* LEN is the minimum size we want in the new pool. */
1690 len += POOL_ROOM (pool);
1691 if (! chunk_suitable (pool, chunk, len))
Zack Weinberg041c3192000-07-04 01:58:21 +00001692 {
Neil Booth93c803682000-10-28 17:59:06 +00001693 chunk = new_chunk (POOL_SIZE (pool) * 2 + len);
Zack Weinberg041c3192000-07-04 01:58:21 +00001694
Neil Booth93c803682000-10-28 17:59:06 +00001695 chunk->next = pool->cur->next;
1696 pool->cur->next = chunk;
Zack Weinberg041c3192000-07-04 01:58:21 +00001697 }
1698
Neil Booth93c803682000-10-28 17:59:06 +00001699 /* Update the pointer before changing chunk's front. */
1700 if (ptr)
1701 *ptr += chunk->base - POOL_FRONT (pool);
1702
1703 memcpy (chunk->base, POOL_FRONT (pool), POOL_ROOM (pool));
1704 chunk->front = chunk->base;
1705
1706 pool->cur = chunk;
1707 return POOL_LIMIT (pool);
1708}
1709
1710static cpp_chunk *
1711new_chunk (size)
1712 unsigned int size;
1713{
1714 unsigned char *base;
1715 cpp_chunk *result;
1716
1717 size = ALIGN (size, DEFAULT_ALIGNMENT);
1718 base = (unsigned char *) xmalloc (size + sizeof (cpp_chunk));
1719 /* Put the chunk descriptor at the end. Then chunk overruns will
1720 cause obvious chaos. */
1721 result = (cpp_chunk *) (base + size);
1722 result->base = base;
1723 result->front = base;
1724 result->limit = base + size;
1725 result->next = 0;
1726
Zack Weinberg041c3192000-07-04 01:58:21 +00001727 return result;
1728}
Zack Weinberg041c3192000-07-04 01:58:21 +00001729
1730void
Neil Booth93c803682000-10-28 17:59:06 +00001731_cpp_init_pool (pool, size, align, temp)
1732 cpp_pool *pool;
1733 unsigned int size, align, temp;
Zack Weinberg041c3192000-07-04 01:58:21 +00001734{
Neil Booth93c803682000-10-28 17:59:06 +00001735 if (align == 0)
1736 align = DEFAULT_ALIGNMENT;
1737 if (align & (align - 1))
1738 abort ();
1739 pool->align = align;
1740 pool->cur = new_chunk (size);
1741 pool->locked = 0;
1742 pool->locks = 0;
1743 if (temp)
1744 pool->cur->next = pool->cur;
Zack Weinberg041c3192000-07-04 01:58:21 +00001745}
1746
Zack Weinberg041c3192000-07-04 01:58:21 +00001747void
Neil Booth93c803682000-10-28 17:59:06 +00001748_cpp_lock_pool (pool)
1749 cpp_pool *pool;
Zack Weinberg041c3192000-07-04 01:58:21 +00001750{
Neil Booth93c803682000-10-28 17:59:06 +00001751 if (pool->locks++ == 0)
1752 pool->locked = pool->cur;
Zack Weinberg041c3192000-07-04 01:58:21 +00001753}
1754
Zack Weinberg041c3192000-07-04 01:58:21 +00001755void
Neil Booth93c803682000-10-28 17:59:06 +00001756_cpp_unlock_pool (pool)
1757 cpp_pool *pool;
Zack Weinberg041c3192000-07-04 01:58:21 +00001758{
Neil Booth93c803682000-10-28 17:59:06 +00001759 if (--pool->locks == 0)
1760 pool->locked = 0;
1761}
1762
1763void
1764_cpp_free_pool (pool)
1765 cpp_pool *pool;
1766{
1767 cpp_chunk *chunk = pool->cur, *next;
1768
1769 do
Zack Weinberg041c3192000-07-04 01:58:21 +00001770 {
Neil Booth93c803682000-10-28 17:59:06 +00001771 next = chunk->next;
1772 free (chunk->base);
1773 chunk = next;
Zack Weinberg041c3192000-07-04 01:58:21 +00001774 }
Neil Booth93c803682000-10-28 17:59:06 +00001775 while (chunk && chunk != pool->cur);
1776}
1777
1778/* Reserve LEN bytes from a memory pool. */
1779unsigned char *
1780_cpp_pool_reserve (pool, len)
1781 cpp_pool *pool;
1782 unsigned int len;
1783{
1784 len = ALIGN (len, pool->align);
1785 if (len > (unsigned int) POOL_ROOM (pool))
1786 _cpp_next_chunk (pool, len, 0);
1787
1788 return POOL_FRONT (pool);
1789}
1790
1791/* Allocate LEN bytes from a memory pool. */
1792unsigned char *
1793_cpp_pool_alloc (pool, len)
1794 cpp_pool *pool;
1795 unsigned int len;
1796{
1797 unsigned char *result = _cpp_pool_reserve (pool, len);
1798
1799 POOL_COMMIT (pool, len);
1800 return result;
Zack Weinberg041c3192000-07-04 01:58:21 +00001801}