blob: b38047e087974965ebbd510a0c4a886401cdd0a3 [file] [log] [blame]
Zack Weinberg45b966d2000-03-13 22:01:08 +00001/* CPP Library - lexical analysis.
Andreas Jaegercbada202005-03-28 08:55:47 +02002 Copyright (C) 2000, 2001, 2002, 2003, 2004, 2005 Free Software Foundation, Inc.
Zack Weinberg45b966d2000-03-13 22:01:08 +00003 Contributed by Per Bothner, 1994-95.
4 Based on CCCP program by Paul Rubin, June 1986
5 Adapted to ANSI C, Richard Stallman, Jan 1987
6 Broken out to separate file, Zack Weinberg, Mar 2000
7
8This program is free software; you can redistribute it and/or modify it
9under the terms of the GNU General Public License as published by the
10Free Software Foundation; either version 2, or (at your option) any
11later version.
12
13This program is distributed in the hope that it will be useful,
14but WITHOUT ANY WARRANTY; without even the implied warranty of
15MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16GNU General Public License for more details.
17
18You should have received a copy of the GNU General Public License
19along with this program; if not, write to the Free Software
20Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
21
22#include "config.h"
23#include "system.h"
Zack Weinberg45b966d2000-03-13 22:01:08 +000024#include "cpplib.h"
Paolo Bonzini4f4e53dd2004-05-24 10:50:45 +000025#include "internal.h"
Zack Weinberg45b966d2000-03-13 22:01:08 +000026
Neil Booth93c803682000-10-28 17:59:06 +000027enum spell_type
Zack Weinbergf9a0e962000-07-13 02:32:41 +000028{
Neil Booth93c803682000-10-28 17:59:06 +000029 SPELL_OPERATOR = 0,
Neil Booth93c803682000-10-28 17:59:06 +000030 SPELL_IDENT,
Neil Booth6338b352003-04-23 22:44:06 +000031 SPELL_LITERAL,
Neil Booth93c803682000-10-28 17:59:06 +000032 SPELL_NONE
Zack Weinbergf9a0e962000-07-13 02:32:41 +000033};
34
Neil Booth93c803682000-10-28 17:59:06 +000035struct token_spelling
Zack Weinbergf9a0e962000-07-13 02:32:41 +000036{
Neil Booth93c803682000-10-28 17:59:06 +000037 enum spell_type category;
38 const unsigned char *name;
Zack Weinbergf9a0e962000-07-13 02:32:41 +000039};
40
Zack Weinberg8206c792001-10-11 21:21:57 +000041static const unsigned char *const digraph_spellings[] =
42{ U"%:", U"%:%:", U"<:", U":>", U"<%", U"%>" };
Neil Booth93c803682000-10-28 17:59:06 +000043
Zack Weinberg21b11492004-09-09 19:16:56 +000044#define OP(e, s) { SPELL_OPERATOR, U s },
45#define TK(e, s) { SPELL_ ## s, U #e },
Zack Weinberg8206c792001-10-11 21:21:57 +000046static const struct token_spelling token_spellings[N_TTYPES] = { TTYPE_TABLE };
Neil Booth93c803682000-10-28 17:59:06 +000047#undef OP
48#undef TK
49
50#define TOKEN_SPELL(token) (token_spellings[(token)->type].category)
51#define TOKEN_NAME(token) (token_spellings[(token)->type].name)
Zack Weinbergf2d5f0c2000-04-14 23:29:45 +000052
Zack Weinberg6cf87ca2003-06-17 06:17:44 +000053static void add_line_note (cpp_buffer *, const uchar *, unsigned int);
54static int skip_line_comment (cpp_reader *);
55static void skip_whitespace (cpp_reader *, cppchar_t);
Zack Weinberg6cf87ca2003-06-17 06:17:44 +000056static void lex_string (cpp_reader *, cpp_token *, const uchar *);
57static void save_comment (cpp_reader *, cpp_token *, const uchar *, cppchar_t);
58static void create_literal (cpp_reader *, cpp_token *, const uchar *,
59 unsigned int, enum cpp_ttype);
60static bool warn_in_comment (cpp_reader *, _cpp_line_note *);
61static int name_p (cpp_reader *, const cpp_string *);
Zack Weinberg6cf87ca2003-06-17 06:17:44 +000062static tokenrun *next_tokenrun (tokenrun *);
Neil Booth0d9f2342000-09-18 18:43:05 +000063
Zack Weinberg6cf87ca2003-06-17 06:17:44 +000064static _cpp_buff *new_buff (size_t);
Zack Weinberg15dad1d2000-05-18 15:55:46 +000065
Neil Booth9d10c9a2003-03-06 23:12:30 +000066
Zack Weinberg6d2c2042000-04-30 17:30:25 +000067/* Utility routine:
Zack Weinberg6d2c2042000-04-30 17:30:25 +000068
Zack Weinbergbfb9dc72000-07-08 19:00:39 +000069 Compares, the token TOKEN to the NUL-terminated string STRING.
70 TOKEN must be a CPP_NAME. Returns 1 for equal, 0 for unequal. */
Zack Weinberg6d2c2042000-04-30 17:30:25 +000071int
Zack Weinberg6cf87ca2003-06-17 06:17:44 +000072cpp_ideq (const cpp_token *token, const char *string)
Zack Weinberg6d2c2042000-04-30 17:30:25 +000073{
Zack Weinbergbfb9dc72000-07-08 19:00:39 +000074 if (token->type != CPP_NAME)
Zack Weinberg6d2c2042000-04-30 17:30:25 +000075 return 0;
Zack Weinbergbfb9dc72000-07-08 19:00:39 +000076
Neil Booth562a5c22002-04-21 18:46:42 +000077 return !ustrcmp (NODE_NAME (token->val.node), (const uchar *) string);
Zack Weinberg6d2c2042000-04-30 17:30:25 +000078}
79
Neil Booth26aea072003-04-19 00:22:51 +000080/* Record a note TYPE at byte POS into the current cleaned logical
81 line. */
Neil Booth87062812001-10-20 09:00:53 +000082static void
Zack Weinberg6cf87ca2003-06-17 06:17:44 +000083add_line_note (cpp_buffer *buffer, const uchar *pos, unsigned int type)
Neil Booth0d9f2342000-09-18 18:43:05 +000084{
Neil Booth26aea072003-04-19 00:22:51 +000085 if (buffer->notes_used == buffer->notes_cap)
Zack Weinbergc5a04732000-04-25 19:32:36 +000086 {
Neil Booth26aea072003-04-19 00:22:51 +000087 buffer->notes_cap = buffer->notes_cap * 2 + 200;
Gabriel Dos Reisc3f829c2005-05-28 15:52:48 +000088 buffer->notes = XRESIZEVEC (_cpp_line_note, buffer->notes,
89 buffer->notes_cap);
Zack Weinbergc5a04732000-04-25 19:32:36 +000090 }
Neil Booth0d9f2342000-09-18 18:43:05 +000091
Neil Booth26aea072003-04-19 00:22:51 +000092 buffer->notes[buffer->notes_used].pos = pos;
93 buffer->notes[buffer->notes_used].type = type;
94 buffer->notes_used++;
Zack Weinbergc5a04732000-04-25 19:32:36 +000095}
96
Neil Booth26aea072003-04-19 00:22:51 +000097/* Returns with a logical line that contains no escaped newlines or
98 trigraphs. This is a time-critical inner loop. */
99void
Zack Weinberg6cf87ca2003-06-17 06:17:44 +0000100_cpp_clean_line (cpp_reader *pfile)
Zack Weinbergc5a04732000-04-25 19:32:36 +0000101{
Neil Booth26aea072003-04-19 00:22:51 +0000102 cpp_buffer *buffer;
103 const uchar *s;
104 uchar c, *d, *p;
Neil Booth29401c32001-08-22 20:37:20 +0000105
Neil Booth26aea072003-04-19 00:22:51 +0000106 buffer = pfile->buffer;
107 buffer->cur_note = buffer->notes_used = 0;
108 buffer->cur = buffer->line_base = buffer->next_line;
109 buffer->need_line = false;
110 s = buffer->next_line - 1;
111
Neil Bootha5c3ccc2000-10-30 22:29:00 +0000112 if (!buffer->from_stage3)
Zack Weinbergc5a04732000-04-25 19:32:36 +0000113 {
Zack Weinbergd08dcf82003-10-13 18:53:28 +0000114 /* Short circuit for the common case of an un-escaped line with
115 no trigraphs. The primary win here is by not writing any
116 data back to memory until we have to. */
117 for (;;)
118 {
119 c = *++s;
120 if (c == '\n' || c == '\r')
121 {
122 d = (uchar *) s;
123
124 if (s == buffer->rlimit)
125 goto done;
126
127 /* DOS line ending? */
128 if (c == '\r' && s[1] == '\n')
129 s++;
130
131 if (s == buffer->rlimit)
132 goto done;
133
134 /* check for escaped newline */
135 p = d;
136 while (p != buffer->next_line && is_nvspace (p[-1]))
137 p--;
138 if (p == buffer->next_line || p[-1] != '\\')
139 goto done;
140
141 /* Have an escaped newline; process it and proceed to
142 the slow path. */
143 add_line_note (buffer, p - 1, p != d ? ' ' : '\\');
144 d = p - 2;
145 buffer->next_line = p - 1;
146 break;
147 }
148 if (c == '?' && s[1] == '?' && _cpp_trigraph_map[s[2]])
149 {
150 /* Have a trigraph. We may or may not have to convert
151 it. Add a line note regardless, for -Wtrigraphs. */
152 add_line_note (buffer, s, s[2]);
153 if (CPP_OPTION (pfile, trigraphs))
154 {
155 /* We do, and that means we have to switch to the
156 slow path. */
157 d = (uchar *) s;
158 *d = _cpp_trigraph_map[s[2]];
159 s += 2;
160 break;
161 }
162 }
163 }
164
Zack Weinbergc5a04732000-04-25 19:32:36 +0000165
Neil Booth26aea072003-04-19 00:22:51 +0000166 for (;;)
Neil Booth0d9f2342000-09-18 18:43:05 +0000167 {
Neil Booth26aea072003-04-19 00:22:51 +0000168 c = *++s;
169 *++d = c;
170
171 if (c == '\n' || c == '\r')
Neil Bootha5c3ccc2000-10-30 22:29:00 +0000172 {
Neil Booth26aea072003-04-19 00:22:51 +0000173 /* Handle DOS line endings. */
174 if (c == '\r' && s != buffer->rlimit && s[1] == '\n')
175 s++;
176 if (s == buffer->rlimit)
Neil Booth87062812001-10-20 09:00:53 +0000177 break;
Neil Bootha5c3ccc2000-10-30 22:29:00 +0000178
Neil Booth26aea072003-04-19 00:22:51 +0000179 /* Escaped? */
180 p = d;
181 while (p != buffer->next_line && is_nvspace (p[-1]))
182 p--;
183 if (p == buffer->next_line || p[-1] != '\\')
Neil Bootha5c3ccc2000-10-30 22:29:00 +0000184 break;
Neil Booth26aea072003-04-19 00:22:51 +0000185
Neil Booth41c32c92003-04-20 19:02:53 +0000186 add_line_note (buffer, p - 1, p != d ? ' ': '\\');
Neil Booth26aea072003-04-19 00:22:51 +0000187 d = p - 2;
188 buffer->next_line = p - 1;
Neil Bootha5c3ccc2000-10-30 22:29:00 +0000189 }
Neil Booth26aea072003-04-19 00:22:51 +0000190 else if (c == '?' && s[1] == '?' && _cpp_trigraph_map[s[2]])
Neil Bootha5c3ccc2000-10-30 22:29:00 +0000191 {
Neil Booth26aea072003-04-19 00:22:51 +0000192 /* Add a note regardless, for the benefit of -Wtrigraphs. */
Neil Booth41c32c92003-04-20 19:02:53 +0000193 add_line_note (buffer, d, s[2]);
Neil Booth26aea072003-04-19 00:22:51 +0000194 if (CPP_OPTION (pfile, trigraphs))
195 {
196 *d = _cpp_trigraph_map[s[2]];
197 s += 2;
198 }
Neil Bootha5c3ccc2000-10-30 22:29:00 +0000199 }
Neil Booth0d9f2342000-09-18 18:43:05 +0000200 }
Neil Booth26aea072003-04-19 00:22:51 +0000201 }
202 else
203 {
204 do
205 s++;
206 while (*s != '\n' && *s != '\r');
207 d = (uchar *) s;
208
209 /* Handle DOS line endings. */
210 if (*s == '\r' && s != buffer->rlimit && s[1] == '\n')
211 s++;
Zack Weinbergc5a04732000-04-25 19:32:36 +0000212 }
Zack Weinbergc5a04732000-04-25 19:32:36 +0000213
Zack Weinbergd08dcf82003-10-13 18:53:28 +0000214 done:
Neil Booth26aea072003-04-19 00:22:51 +0000215 *d = '\n';
Neil Booth41c32c92003-04-20 19:02:53 +0000216 /* A sentinel note that should never be processed. */
217 add_line_note (buffer, d + 1, '\n');
Neil Booth26aea072003-04-19 00:22:51 +0000218 buffer->next_line = s + 1;
219}
220
Neil Bootha8eb6042003-05-04 20:03:55 +0000221/* Return true if the trigraph indicated by NOTE should be warned
222 about in a comment. */
223static bool
Zack Weinberg6cf87ca2003-06-17 06:17:44 +0000224warn_in_comment (cpp_reader *pfile, _cpp_line_note *note)
Neil Bootha8eb6042003-05-04 20:03:55 +0000225{
226 const uchar *p;
227
228 /* Within comments we don't warn about trigraphs, unless the
229 trigraph forms an escaped newline, as that may change
Kazu Hirata6356f892003-06-12 19:01:08 +0000230 behavior. */
Neil Bootha8eb6042003-05-04 20:03:55 +0000231 if (note->type != '/')
232 return false;
233
234 /* If -trigraphs, then this was an escaped newline iff the next note
235 is coincident. */
236 if (CPP_OPTION (pfile, trigraphs))
237 return note[1].pos == note->pos;
238
239 /* Otherwise, see if this forms an escaped newline. */
240 p = note->pos + 3;
241 while (is_nvspace (*p))
242 p++;
243
244 /* There might have been escaped newlines between the trigraph and the
245 newline we found. Hence the position test. */
246 return (*p == '\n' && p < note[1].pos);
247}
248
Neil Booth26aea072003-04-19 00:22:51 +0000249/* Process the notes created by add_line_note as far as the current
250 location. */
251void
Zack Weinberg6cf87ca2003-06-17 06:17:44 +0000252_cpp_process_line_notes (cpp_reader *pfile, int in_comment)
Neil Booth26aea072003-04-19 00:22:51 +0000253{
254 cpp_buffer *buffer = pfile->buffer;
255
256 for (;;)
257 {
258 _cpp_line_note *note = &buffer->notes[buffer->cur_note];
259 unsigned int col;
260
261 if (note->pos > buffer->cur)
262 break;
263
264 buffer->cur_note++;
265 col = CPP_BUF_COLUMN (buffer, note->pos + 1);
266
Neil Booth41c32c92003-04-20 19:02:53 +0000267 if (note->type == '\\' || note->type == ' ')
Neil Booth26aea072003-04-19 00:22:51 +0000268 {
Neil Booth41c32c92003-04-20 19:02:53 +0000269 if (note->type == ' ' && !in_comment)
Per Bothner500bee02004-04-22 19:22:27 -0700270 cpp_error_with_line (pfile, CPP_DL_WARNING, pfile->line_table->highest_line, col,
Neil Booth26aea072003-04-19 00:22:51 +0000271 "backslash and newline separated by space");
Neil Booth41c32c92003-04-20 19:02:53 +0000272
Neil Booth26aea072003-04-19 00:22:51 +0000273 if (buffer->next_line > buffer->rlimit)
274 {
Per Bothner500bee02004-04-22 19:22:27 -0700275 cpp_error_with_line (pfile, CPP_DL_PEDWARN, pfile->line_table->highest_line, col,
Neil Booth26aea072003-04-19 00:22:51 +0000276 "backslash-newline at end of file");
277 /* Prevent "no newline at end of file" warning. */
278 buffer->next_line = buffer->rlimit;
279 }
280
281 buffer->line_base = note->pos;
Per Bothner12f9df42004-02-11 07:29:30 -0800282 CPP_INCREMENT_LINE (pfile, 0);
Neil Booth26aea072003-04-19 00:22:51 +0000283 }
Neil Booth41c32c92003-04-20 19:02:53 +0000284 else if (_cpp_trigraph_map[note->type])
285 {
Neil Bootha8eb6042003-05-04 20:03:55 +0000286 if (CPP_OPTION (pfile, warn_trigraphs)
287 && (!in_comment || warn_in_comment (pfile, note)))
Neil Booth41c32c92003-04-20 19:02:53 +0000288 {
289 if (CPP_OPTION (pfile, trigraphs))
Per Bothner500bee02004-04-22 19:22:27 -0700290 cpp_error_with_line (pfile, CPP_DL_WARNING, pfile->line_table->highest_line, col,
Neil Booth41c32c92003-04-20 19:02:53 +0000291 "trigraph ??%c converted to %c",
292 note->type,
293 (int) _cpp_trigraph_map[note->type]);
294 else
Geoffrey Keating905bd7b2003-07-22 02:21:16 +0000295 {
296 cpp_error_with_line
Per Bothner500bee02004-04-22 19:22:27 -0700297 (pfile, CPP_DL_WARNING, pfile->line_table->highest_line, col,
Geoffrey Keating905bd7b2003-07-22 02:21:16 +0000298 "trigraph ??%c ignored, use -trigraphs to enable",
299 note->type);
300 }
Neil Booth41c32c92003-04-20 19:02:53 +0000301 }
302 }
303 else
304 abort ();
Neil Booth26aea072003-04-19 00:22:51 +0000305 }
Zack Weinbergc5a04732000-04-25 19:32:36 +0000306}
307
Neil Booth0d9f2342000-09-18 18:43:05 +0000308/* Skip a C-style block comment. We find the end of the comment by
309 seeing if an asterisk is before every '/' we encounter. Returns
Neil Booth6f572ac2003-04-19 16:34:33 +0000310 nonzero if comment terminated by EOF, zero otherwise.
311
312 Buffer->cur points to the initial asterisk of the comment. */
Neil Booth26aea072003-04-19 00:22:51 +0000313bool
Zack Weinberg6cf87ca2003-06-17 06:17:44 +0000314_cpp_skip_block_comment (cpp_reader *pfile)
Zack Weinbergc5a04732000-04-25 19:32:36 +0000315{
316 cpp_buffer *buffer = pfile->buffer;
Zack Weinbergd08dcf82003-10-13 18:53:28 +0000317 const uchar *cur = buffer->cur;
318 uchar c;
Zack Weinbergc5a04732000-04-25 19:32:36 +0000319
Zack Weinbergd08dcf82003-10-13 18:53:28 +0000320 cur++;
321 if (*cur == '/')
322 cur++;
Neil Booth26aea072003-04-19 00:22:51 +0000323
324 for (;;)
Neil Booth0d9f2342000-09-18 18:43:05 +0000325 {
Neil Booth0d9f2342000-09-18 18:43:05 +0000326 /* People like decorating comments with '*', so check for '/'
327 instead for efficiency. */
Zack Weinbergd08dcf82003-10-13 18:53:28 +0000328 c = *cur++;
329
Zack Weinbergc5a04732000-04-25 19:32:36 +0000330 if (c == '/')
331 {
Zack Weinbergd08dcf82003-10-13 18:53:28 +0000332 if (cur[-2] == '*')
Neil Booth0d9f2342000-09-18 18:43:05 +0000333 break;
Zack Weinbergc5a04732000-04-25 19:32:36 +0000334
Neil Booth0d9f2342000-09-18 18:43:05 +0000335 /* Warn about potential nested comments, but not if the '/'
Joseph Myersa1f300c2001-11-23 02:05:19 +0000336 comes immediately before the true comment delimiter.
Zack Weinbergc5a04732000-04-25 19:32:36 +0000337 Don't bother to get it right across escaped newlines. */
Neil Booth0d9f2342000-09-18 18:43:05 +0000338 if (CPP_OPTION (pfile, warn_comments)
Zack Weinbergd08dcf82003-10-13 18:53:28 +0000339 && cur[0] == '*' && cur[1] != '/')
340 {
341 buffer->cur = cur;
John David Anglin0527bc42003-11-01 22:56:54 +0000342 cpp_error_with_line (pfile, CPP_DL_WARNING,
Per Bothner500bee02004-04-22 19:22:27 -0700343 pfile->line_table->highest_line, CPP_BUF_COL (buffer),
Zack Weinbergd08dcf82003-10-13 18:53:28 +0000344 "\"/*\" within comment");
345 }
Zack Weinbergc5a04732000-04-25 19:32:36 +0000346 }
Neil Booth26aea072003-04-19 00:22:51 +0000347 else if (c == '\n')
348 {
Per Bothner12f9df42004-02-11 07:29:30 -0800349 unsigned int cols;
Zack Weinbergd08dcf82003-10-13 18:53:28 +0000350 buffer->cur = cur - 1;
Neil Booth26aea072003-04-19 00:22:51 +0000351 _cpp_process_line_notes (pfile, true);
352 if (buffer->next_line >= buffer->rlimit)
353 return true;
354 _cpp_clean_line (pfile);
Per Bothner12f9df42004-02-11 07:29:30 -0800355
356 cols = buffer->next_line - buffer->line_base;
357 CPP_INCREMENT_LINE (pfile, cols);
358
Zack Weinbergd08dcf82003-10-13 18:53:28 +0000359 cur = buffer->cur;
Neil Booth26aea072003-04-19 00:22:51 +0000360 }
Zack Weinbergc5a04732000-04-25 19:32:36 +0000361 }
Zack Weinbergc5a04732000-04-25 19:32:36 +0000362
Zack Weinbergd08dcf82003-10-13 18:53:28 +0000363 buffer->cur = cur;
Neil Bootha8eb6042003-05-04 20:03:55 +0000364 _cpp_process_line_notes (pfile, true);
Neil Booth26aea072003-04-19 00:22:51 +0000365 return false;
Zack Weinbergc5a04732000-04-25 19:32:36 +0000366}
367
Neil Booth480709c2001-10-21 14:04:42 +0000368/* Skip a C++ line comment, leaving buffer->cur pointing to the
Kazu Hiratada7d8302002-09-22 02:03:17 +0000369 terminating newline. Handles escaped newlines. Returns nonzero
Neil Booth480709c2001-10-21 14:04:42 +0000370 if a multiline comment. */
Zack Weinbergc5a04732000-04-25 19:32:36 +0000371static int
Zack Weinberg6cf87ca2003-06-17 06:17:44 +0000372skip_line_comment (cpp_reader *pfile)
Neil Booth0d9f2342000-09-18 18:43:05 +0000373{
Neil Boothcbcff6d2000-09-23 21:41:41 +0000374 cpp_buffer *buffer = pfile->buffer;
Per Bothner500bee02004-04-22 19:22:27 -0700375 unsigned int orig_line = pfile->line_table->highest_line;
Neil Booth0d9f2342000-09-18 18:43:05 +0000376
Neil Booth26aea072003-04-19 00:22:51 +0000377 while (*buffer->cur != '\n')
378 buffer->cur++;
Neil Booth0d9f2342000-09-18 18:43:05 +0000379
Neil Booth26aea072003-04-19 00:22:51 +0000380 _cpp_process_line_notes (pfile, true);
Per Bothner500bee02004-04-22 19:22:27 -0700381 return orig_line != pfile->line_table->highest_line;
Neil Booth0d9f2342000-09-18 18:43:05 +0000382}
383
Neil Booth26aea072003-04-19 00:22:51 +0000384/* Skips whitespace, saving the next non-whitespace character. */
Neil Booth0d9f2342000-09-18 18:43:05 +0000385static void
Zack Weinberg6cf87ca2003-06-17 06:17:44 +0000386skip_whitespace (cpp_reader *pfile, cppchar_t c)
Zack Weinbergc5a04732000-04-25 19:32:36 +0000387{
388 cpp_buffer *buffer = pfile->buffer;
Neil Boothf7d151f2003-04-19 07:41:15 +0000389 bool saw_NUL = false;
Zack Weinbergc5a04732000-04-25 19:32:36 +0000390
Neil Booth0d9f2342000-09-18 18:43:05 +0000391 do
Zack Weinbergc5a04732000-04-25 19:32:36 +0000392 {
Neil Booth91fcd152000-07-09 09:19:44 +0000393 /* Horizontal space always OK. */
Neil Booth26aea072003-04-19 00:22:51 +0000394 if (c == ' ' || c == '\t')
Neil Booth0d9f2342000-09-18 18:43:05 +0000395 ;
Neil Booth0d9f2342000-09-18 18:43:05 +0000396 /* Just \f \v or \0 left. */
Neil Booth91fcd152000-07-09 09:19:44 +0000397 else if (c == '\0')
Neil Boothf7d151f2003-04-19 07:41:15 +0000398 saw_NUL = true;
Neil Booth93c803682000-10-28 17:59:06 +0000399 else if (pfile->state.in_directive && CPP_PEDANTIC (pfile))
Per Bothner500bee02004-04-22 19:22:27 -0700400 cpp_error_with_line (pfile, CPP_DL_PEDWARN, pfile->line_table->highest_line,
Neil Boothebef4e82002-04-14 18:42:47 +0000401 CPP_BUF_COL (buffer),
402 "%s in preprocessing directive",
403 c == '\f' ? "form feed" : "vertical tab");
Zack Weinbergc5a04732000-04-25 19:32:36 +0000404
Neil Booth0d9f2342000-09-18 18:43:05 +0000405 c = *buffer->cur++;
Zack Weinbergc5a04732000-04-25 19:32:36 +0000406 }
Kazu Hirataec5c56d2001-08-01 17:57:27 +0000407 /* We only want non-vertical space, i.e. ' ' \t \f \v \0. */
Neil Booth0d9f2342000-09-18 18:43:05 +0000408 while (is_nvspace (c));
Zack Weinbergc5a04732000-04-25 19:32:36 +0000409
Neil Boothf7d151f2003-04-19 07:41:15 +0000410 if (saw_NUL)
John David Anglin0527bc42003-11-01 22:56:54 +0000411 cpp_error (pfile, CPP_DL_WARNING, "null character(s) ignored");
Neil Boothf7d151f2003-04-19 07:41:15 +0000412
Neil Booth480709c2001-10-21 14:04:42 +0000413 buffer->cur--;
Zack Weinbergc5a04732000-04-25 19:32:36 +0000414}
415
Neil Booth93c803682000-10-28 17:59:06 +0000416/* See if the characters of a number token are valid in a name (no
417 '.', '+' or '-'). */
418static int
Zack Weinberg6cf87ca2003-06-17 06:17:44 +0000419name_p (cpp_reader *pfile, const cpp_string *string)
Neil Booth93c803682000-10-28 17:59:06 +0000420{
421 unsigned int i;
422
423 for (i = 0; i < string->len; i++)
424 if (!is_idchar (string->text[i]))
425 return 0;
426
Kazu Hiratadf383482002-05-22 22:02:16 +0000427 return 1;
Neil Booth93c803682000-10-28 17:59:06 +0000428}
429
Geoffrey Keating50668cf2005-03-15 00:36:33 +0000430/* After parsing an identifier or other sequence, produce a warning about
431 sequences not in NFC/NFKC. */
432static void
433warn_about_normalization (cpp_reader *pfile,
434 const cpp_token *token,
435 const struct normalize_state *s)
436{
437 if (CPP_OPTION (pfile, warn_normalize) < NORMALIZE_STATE_RESULT (s)
438 && !pfile->state.skipping)
439 {
440 /* Make sure that the token is printed using UCNs, even
441 if we'd otherwise happily print UTF-8. */
Gabriel Dos Reisc3f829c2005-05-28 15:52:48 +0000442 unsigned char *buf = XNEWVEC (unsigned char, cpp_token_len (token));
Geoffrey Keating50668cf2005-03-15 00:36:33 +0000443 size_t sz;
444
445 sz = cpp_spell_token (pfile, token, buf, false) - buf;
446 if (NORMALIZE_STATE_RESULT (s) == normalized_C)
447 cpp_error_with_line (pfile, CPP_DL_WARNING, token->src_loc, 0,
Andreas Jaegercbada202005-03-28 08:55:47 +0200448 "`%.*s' is not in NFKC", (int) sz, buf);
Geoffrey Keating50668cf2005-03-15 00:36:33 +0000449 else
450 cpp_error_with_line (pfile, CPP_DL_WARNING, token->src_loc, 0,
Andreas Jaegercbada202005-03-28 08:55:47 +0200451 "`%.*s' is not in NFC", (int) sz, buf);
Geoffrey Keating50668cf2005-03-15 00:36:33 +0000452 }
453}
454
Neil Boothbced6ed2003-04-19 11:59:44 +0000455/* Returns TRUE if the sequence starting at buffer->cur is invalid in
Neil Booth1613e522003-04-20 07:29:23 +0000456 an identifier. FIRST is TRUE if this starts an identifier. */
Neil Boothbced6ed2003-04-19 11:59:44 +0000457static bool
Geoffrey Keating50668cf2005-03-15 00:36:33 +0000458forms_identifier_p (cpp_reader *pfile, int first,
459 struct normalize_state *state)
Neil Boothbced6ed2003-04-19 11:59:44 +0000460{
Neil Booth1613e522003-04-20 07:29:23 +0000461 cpp_buffer *buffer = pfile->buffer;
Neil Boothbced6ed2003-04-19 11:59:44 +0000462
Neil Booth1613e522003-04-20 07:29:23 +0000463 if (*buffer->cur == '$')
Neil Boothbced6ed2003-04-19 11:59:44 +0000464 {
Neil Booth1613e522003-04-20 07:29:23 +0000465 if (!CPP_OPTION (pfile, dollars_in_ident))
466 return false;
Neil Boothbced6ed2003-04-19 11:59:44 +0000467
Neil Booth1613e522003-04-20 07:29:23 +0000468 buffer->cur++;
Hans-Peter Nilsson78b88112003-06-12 06:09:15 +0000469 if (CPP_OPTION (pfile, warn_dollars) && !pfile->state.skipping)
Neil Booth1613e522003-04-20 07:29:23 +0000470 {
Hans-Peter Nilsson78b88112003-06-12 06:09:15 +0000471 CPP_OPTION (pfile, warn_dollars) = 0;
John David Anglin0527bc42003-11-01 22:56:54 +0000472 cpp_error (pfile, CPP_DL_PEDWARN, "'$' in identifier or number");
Neil Booth1613e522003-04-20 07:29:23 +0000473 }
474
475 return true;
476 }
477
478 /* Is this a syntactically valid UCN? */
Geoffrey Keating6baba9b2005-03-15 09:55:41 +0000479 if ((CPP_OPTION (pfile, cplusplus) || CPP_OPTION (pfile, c99))
480 && *buffer->cur == '\\'
Neil Booth1613e522003-04-20 07:29:23 +0000481 && (buffer->cur[1] == 'u' || buffer->cur[1] == 'U'))
482 {
483 buffer->cur += 2;
Geoffrey Keating50668cf2005-03-15 00:36:33 +0000484 if (_cpp_valid_ucn (pfile, &buffer->cur, buffer->rlimit, 1 + !first,
485 state))
Neil Booth1613e522003-04-20 07:29:23 +0000486 return true;
487 buffer->cur -= 2;
488 }
489
490 return false;
Neil Boothbced6ed2003-04-19 11:59:44 +0000491}
492
493/* Lex an identifier starting at BUFFER->CUR - 1. */
Neil Booth0d9f2342000-09-18 18:43:05 +0000494static cpp_hashnode *
Geoffrey Keating50668cf2005-03-15 00:36:33 +0000495lex_identifier (cpp_reader *pfile, const uchar *base, bool starts_ucn,
496 struct normalize_state *nst)
Zack Weinbergc5a04732000-04-25 19:32:36 +0000497{
Neil Booth93c803682000-10-28 17:59:06 +0000498 cpp_hashnode *result;
Geoffrey Keating47e20492005-03-12 10:44:06 +0000499 const uchar *cur;
Zack Weinbergc6e83802004-06-05 20:58:06 +0000500 unsigned int len;
501 unsigned int hash = HT_HASHSTEP (0, *base);
Zack Weinbergc5a04732000-04-25 19:32:36 +0000502
Zack Weinbergc6e83802004-06-05 20:58:06 +0000503 cur = pfile->buffer->cur;
Geoffrey Keating47e20492005-03-12 10:44:06 +0000504 if (! starts_ucn)
505 while (ISIDNUM (*cur))
506 {
507 hash = HT_HASHSTEP (hash, *cur);
508 cur++;
509 }
510 pfile->buffer->cur = cur;
Geoffrey Keating50668cf2005-03-15 00:36:33 +0000511 if (starts_ucn || forms_identifier_p (pfile, false, nst))
Neil Booth10cf9bd2002-03-22 07:23:21 +0000512 {
Geoffrey Keating47e20492005-03-12 10:44:06 +0000513 /* Slower version for identifiers containing UCNs (or $). */
514 do {
515 while (ISIDNUM (*pfile->buffer->cur))
Geoffrey Keating50668cf2005-03-15 00:36:33 +0000516 {
517 pfile->buffer->cur++;
518 NORMALIZE_STATE_UPDATE_IDNUM (nst);
519 }
520 } while (forms_identifier_p (pfile, false, nst));
Geoffrey Keating47e20492005-03-12 10:44:06 +0000521 result = _cpp_interpret_identifier (pfile, base,
522 pfile->buffer->cur - base);
Zack Weinberg2c3fcba2001-09-10 22:34:03 +0000523 }
Geoffrey Keating47e20492005-03-12 10:44:06 +0000524 else
525 {
526 len = cur - base;
527 hash = HT_HASHFINISH (hash, len);
Zack Weinberg2c3fcba2001-09-10 22:34:03 +0000528
Geoffrey Keating47e20492005-03-12 10:44:06 +0000529 result = (cpp_hashnode *)
530 ht_lookup_with_hash (pfile->hash_table, base, len, hash, HT_ALLOC);
531 }
Neil Boothbced6ed2003-04-19 11:59:44 +0000532
533 /* Rarely, identifiers require diagnostics when lexed. */
Zack Weinberg2c3fcba2001-09-10 22:34:03 +0000534 if (__builtin_expect ((result->flags & NODE_DIAGNOSTIC)
535 && !pfile->state.skipping, 0))
536 {
537 /* It is allowed to poison the same identifier twice. */
538 if ((result->flags & NODE_POISONED) && !pfile->state.poisoned_ok)
John David Anglin0527bc42003-11-01 22:56:54 +0000539 cpp_error (pfile, CPP_DL_ERROR, "attempt to use poisoned \"%s\"",
Zack Weinberg2c3fcba2001-09-10 22:34:03 +0000540 NODE_NAME (result));
541
542 /* Constraint 6.10.3.5: __VA_ARGS__ should only appear in the
543 replacement list of a variadic macro. */
544 if (result == pfile->spec_nodes.n__VA_ARGS__
545 && !pfile->state.va_args_ok)
John David Anglin0527bc42003-11-01 22:56:54 +0000546 cpp_error (pfile, CPP_DL_PEDWARN,
Zack Weinberg6cf87ca2003-06-17 06:17:44 +0000547 "__VA_ARGS__ can only appear in the expansion"
548 " of a C99 variadic macro");
Zack Weinberg2c3fcba2001-09-10 22:34:03 +0000549 }
550
551 return result;
552}
553
Neil Boothbced6ed2003-04-19 11:59:44 +0000554/* Lex a number to NUMBER starting at BUFFER->CUR - 1. */
Zack Weinbergc5a04732000-04-25 19:32:36 +0000555static void
Geoffrey Keating50668cf2005-03-15 00:36:33 +0000556lex_number (cpp_reader *pfile, cpp_string *number,
557 struct normalize_state *nst)
Zack Weinbergc5a04732000-04-25 19:32:36 +0000558{
Neil Booth562a5c22002-04-21 18:46:42 +0000559 const uchar *cur;
Neil Boothbced6ed2003-04-19 11:59:44 +0000560 const uchar *base;
561 uchar *dest;
Zack Weinbergc5a04732000-04-25 19:32:36 +0000562
Neil Boothbced6ed2003-04-19 11:59:44 +0000563 base = pfile->buffer->cur - 1;
564 do
Neil Booth93c803682000-10-28 17:59:06 +0000565 {
Neil Boothbced6ed2003-04-19 11:59:44 +0000566 cur = pfile->buffer->cur;
Neil Booth10cf9bd2002-03-22 07:23:21 +0000567
Neil Boothbced6ed2003-04-19 11:59:44 +0000568 /* N.B. ISIDNUM does not include $. */
569 while (ISIDNUM (*cur) || *cur == '.' || VALID_SIGN (*cur, cur[-1]))
Geoffrey Keating50668cf2005-03-15 00:36:33 +0000570 {
571 cur++;
572 NORMALIZE_STATE_UPDATE_IDNUM (nst);
573 }
Neil Booth10cf9bd2002-03-22 07:23:21 +0000574
Neil Booth10cf9bd2002-03-22 07:23:21 +0000575 pfile->buffer->cur = cur;
Neil Booth93c803682000-10-28 17:59:06 +0000576 }
Geoffrey Keating50668cf2005-03-15 00:36:33 +0000577 while (forms_identifier_p (pfile, false, nst));
Neil Boothbced6ed2003-04-19 11:59:44 +0000578
579 number->len = cur - base;
580 dest = _cpp_unaligned_alloc (pfile, number->len + 1);
581 memcpy (dest, base, number->len);
582 dest[number->len] = '\0';
583 number->text = dest;
Neil Booth0d9f2342000-09-18 18:43:05 +0000584}
585
Neil Booth6338b352003-04-23 22:44:06 +0000586/* Create a token of type TYPE with a literal spelling. */
Zack Weinbergc5a04732000-04-25 19:32:36 +0000587static void
Zack Weinberg6cf87ca2003-06-17 06:17:44 +0000588create_literal (cpp_reader *pfile, cpp_token *token, const uchar *base,
589 unsigned int len, enum cpp_ttype type)
Neil Booth0d9f2342000-09-18 18:43:05 +0000590{
Neil Booth6338b352003-04-23 22:44:06 +0000591 uchar *dest = _cpp_unaligned_alloc (pfile, len + 1);
Neil Booth0d9f2342000-09-18 18:43:05 +0000592
Neil Booth6338b352003-04-23 22:44:06 +0000593 memcpy (dest, base, len);
594 dest[len] = '\0';
595 token->type = type;
596 token->val.str.len = len;
597 token->val.str.text = dest;
598}
599
600/* Lexes a string, character constant, or angle-bracketed header file
601 name. The stored string contains the spelling, including opening
602 quote and leading any leading 'L'. It returns the type of the
603 literal, or CPP_OTHER if it was not properly terminated.
604
605 The spelling is NUL-terminated, but it is not guaranteed that this
606 is the first NUL since embedded NULs are preserved. */
607static void
Zack Weinberg6cf87ca2003-06-17 06:17:44 +0000608lex_string (cpp_reader *pfile, cpp_token *token, const uchar *base)
Neil Booth6338b352003-04-23 22:44:06 +0000609{
610 bool saw_NUL = false;
611 const uchar *cur;
612 cppchar_t terminator;
613 enum cpp_ttype type;
614
615 cur = base;
616 terminator = *cur++;
617 if (terminator == 'L')
618 terminator = *cur++;
619 if (terminator == '\"')
620 type = *base == 'L' ? CPP_WSTRING: CPP_STRING;
621 else if (terminator == '\'')
622 type = *base == 'L' ? CPP_WCHAR: CPP_CHAR;
623 else
624 terminator = '>', type = CPP_HEADER_NAME;
Neil Booth93c803682000-10-28 17:59:06 +0000625
Neil Booth0d9f2342000-09-18 18:43:05 +0000626 for (;;)
627 {
Neil Booth6338b352003-04-23 22:44:06 +0000628 cppchar_t c = *cur++;
Neil Booth7868b4a2001-03-04 12:02:02 +0000629
Neil Booth6f572ac2003-04-19 16:34:33 +0000630 /* In #include-style directives, terminators are not escapable. */
Neil Booth6338b352003-04-23 22:44:06 +0000631 if (c == '\\' && !pfile->state.angled_headers && *cur != '\n')
632 cur++;
633 else if (c == terminator)
Neil Boothbced6ed2003-04-19 11:59:44 +0000634 break;
Neil Booth6338b352003-04-23 22:44:06 +0000635 else if (c == '\n')
Neil Booth0d9f2342000-09-18 18:43:05 +0000636 {
Neil Booth6338b352003-04-23 22:44:06 +0000637 cur--;
638 type = CPP_OTHER;
639 break;
Neil Booth0d9f2342000-09-18 18:43:05 +0000640 }
Neil Booth6338b352003-04-23 22:44:06 +0000641 else if (c == '\0')
642 saw_NUL = true;
Neil Booth0d9f2342000-09-18 18:43:05 +0000643 }
644
Neil Booth6338b352003-04-23 22:44:06 +0000645 if (saw_NUL && !pfile->state.skipping)
John David Anglin0527bc42003-11-01 22:56:54 +0000646 cpp_error (pfile, CPP_DL_WARNING,
647 "null character(s) preserved in literal");
Neil Booth0d9f2342000-09-18 18:43:05 +0000648
Neil Booth6338b352003-04-23 22:44:06 +0000649 pfile->buffer->cur = cur;
650 create_literal (pfile, token, base, cur - base, type);
Neil Booth0d9f2342000-09-18 18:43:05 +0000651}
652
Neil Booth93c803682000-10-28 17:59:06 +0000653/* The stored comment includes the comment start and any terminator. */
Neil Booth0d9f2342000-09-18 18:43:05 +0000654static void
Zack Weinberg6cf87ca2003-06-17 06:17:44 +0000655save_comment (cpp_reader *pfile, cpp_token *token, const unsigned char *from,
656 cppchar_t type)
Zack Weinbergc5a04732000-04-25 19:32:36 +0000657{
Neil Booth5d7ee2f2000-05-10 09:39:18 +0000658 unsigned char *buffer;
Jason Thorpe477cdac2002-04-07 03:12:23 +0000659 unsigned int len, clen;
Kazu Hiratadf383482002-05-22 22:02:16 +0000660
Neil Booth1c6d33e2000-09-25 22:39:51 +0000661 len = pfile->buffer->cur - from + 1; /* + 1 for the initial '/'. */
Neil Booth480709c2001-10-21 14:04:42 +0000662
Neil Booth35422032000-10-29 09:56:00 +0000663 /* C++ comments probably (not definitely) have moved past a new
664 line, which we don't want to save in the comment. */
Neil Booth480709c2001-10-21 14:04:42 +0000665 if (is_vspace (pfile->buffer->cur[-1]))
Neil Booth35422032000-10-29 09:56:00 +0000666 len--;
Jason Thorpe477cdac2002-04-07 03:12:23 +0000667
668 /* If we are currently in a directive, then we need to store all
669 C++ comments as C comments internally, and so we need to
670 allocate a little extra space in that case.
671
672 Note that the only time we encounter a directive here is
673 when we are saving comments in a "#define". */
674 clen = (pfile->state.in_directive && type == '/') ? len + 2 : len;
675
676 buffer = _cpp_unaligned_alloc (pfile, clen);
Kazu Hiratadf383482002-05-22 22:02:16 +0000677
Neil Booth0d9f2342000-09-18 18:43:05 +0000678 token->type = CPP_COMMENT;
Jason Thorpe477cdac2002-04-07 03:12:23 +0000679 token->val.str.len = clen;
Neil Booth0d9f2342000-09-18 18:43:05 +0000680 token->val.str.text = buffer;
Neil Boothd1d9a6b2000-05-27 23:19:56 +0000681
Neil Booth1c6d33e2000-09-25 22:39:51 +0000682 buffer[0] = '/';
683 memcpy (buffer + 1, from, len - 1);
Jason Thorpe477cdac2002-04-07 03:12:23 +0000684
Kazu Hirata1eeeb6a2002-04-30 20:48:55 +0000685 /* Finish conversion to a C comment, if necessary. */
Jason Thorpe477cdac2002-04-07 03:12:23 +0000686 if (pfile->state.in_directive && type == '/')
687 {
688 buffer[1] = '*';
689 buffer[clen - 2] = '*';
690 buffer[clen - 1] = '/';
691 }
Neil Booth0d9f2342000-09-18 18:43:05 +0000692}
693
Neil Booth5fddcff2001-09-11 07:00:12 +0000694/* Allocate COUNT tokens for RUN. */
695void
Zack Weinberg6cf87ca2003-06-17 06:17:44 +0000696_cpp_init_tokenrun (tokenrun *run, unsigned int count)
Neil Booth5fddcff2001-09-11 07:00:12 +0000697{
Bernardo Innocenti72bb2c32004-07-24 20:04:42 +0200698 run->base = XNEWVEC (cpp_token, count);
Neil Booth5fddcff2001-09-11 07:00:12 +0000699 run->limit = run->base + count;
700 run->next = NULL;
701}
702
703/* Returns the next tokenrun, or creates one if there is none. */
704static tokenrun *
Zack Weinberg6cf87ca2003-06-17 06:17:44 +0000705next_tokenrun (tokenrun *run)
Neil Booth5fddcff2001-09-11 07:00:12 +0000706{
707 if (run->next == NULL)
708 {
Bernardo Innocenti72bb2c32004-07-24 20:04:42 +0200709 run->next = XNEW (tokenrun);
Neil Boothbdcbe492001-09-13 20:05:17 +0000710 run->next->prev = run;
Neil Booth5fddcff2001-09-11 07:00:12 +0000711 _cpp_init_tokenrun (run->next, 250);
712 }
713
714 return run->next;
715}
716
Neil Booth4ed5bcf2001-09-24 22:53:12 +0000717/* Allocate a single token that is invalidated at the same time as the
718 rest of the tokens on the line. Has its line and col set to the
719 same as the last lexed token, so that diagnostics appear in the
720 right place. */
721cpp_token *
Zack Weinberg6cf87ca2003-06-17 06:17:44 +0000722_cpp_temp_token (cpp_reader *pfile)
Neil Booth4ed5bcf2001-09-24 22:53:12 +0000723{
724 cpp_token *old, *result;
725
726 old = pfile->cur_token - 1;
727 if (pfile->cur_token == pfile->cur_run->limit)
728 {
729 pfile->cur_run = next_tokenrun (pfile->cur_run);
730 pfile->cur_token = pfile->cur_run->base;
731 }
732
733 result = pfile->cur_token++;
Per Bothner12f9df42004-02-11 07:29:30 -0800734 result->src_loc = old->src_loc;
Neil Booth4ed5bcf2001-09-24 22:53:12 +0000735 return result;
736}
737
Neil Booth14baae02001-09-17 18:26:12 +0000738/* Lex a token into RESULT (external interface). Takes care of issues
739 like directive handling, token lookahead, multiple include
Joseph Myersa1f300c2001-11-23 02:05:19 +0000740 optimization and skipping. */
Neil Booth345894b2001-09-16 13:44:29 +0000741const cpp_token *
Zack Weinberg6cf87ca2003-06-17 06:17:44 +0000742_cpp_lex_token (cpp_reader *pfile)
Neil Booth0d9f2342000-09-18 18:43:05 +0000743{
Neil Boothbdcbe492001-09-13 20:05:17 +0000744 cpp_token *result;
Neil Booth5fddcff2001-09-11 07:00:12 +0000745
Neil Boothbdcbe492001-09-13 20:05:17 +0000746 for (;;)
Neil Booth5fddcff2001-09-11 07:00:12 +0000747 {
Neil Boothbdcbe492001-09-13 20:05:17 +0000748 if (pfile->cur_token == pfile->cur_run->limit)
Neil Booth5fddcff2001-09-11 07:00:12 +0000749 {
Neil Boothbdcbe492001-09-13 20:05:17 +0000750 pfile->cur_run = next_tokenrun (pfile->cur_run);
751 pfile->cur_token = pfile->cur_run->base;
752 }
Neil Boothbdcbe492001-09-13 20:05:17 +0000753
754 if (pfile->lookaheads)
Neil Booth14baae02001-09-17 18:26:12 +0000755 {
756 pfile->lookaheads--;
757 result = pfile->cur_token++;
758 }
Neil Boothbdcbe492001-09-13 20:05:17 +0000759 else
Neil Booth14baae02001-09-17 18:26:12 +0000760 result = _cpp_lex_direct (pfile);
Neil Boothbdcbe492001-09-13 20:05:17 +0000761
762 if (result->flags & BOL)
763 {
Neil Boothbdcbe492001-09-13 20:05:17 +0000764 /* Is this a directive. If _cpp_handle_directive returns
765 false, it is an assembler #. */
766 if (result->type == CPP_HASH
Neil Boothe808ec92002-02-27 07:24:53 +0000767 /* 6.10.3 p 11: Directives in a list of macro arguments
768 gives undefined behavior. This implementation
769 handles the directive as normal. */
770 && pfile->state.parsing_args != 1
Neil Boothbdcbe492001-09-13 20:05:17 +0000771 && _cpp_handle_directive (pfile, result->flags & PREV_WHITE))
Zack Weinberg21b11492004-09-09 19:16:56 +0000772 {
773 if (pfile->directive_result.type == CPP_PADDING)
774 continue;
775 else
776 {
777 result = &pfile->directive_result;
778 break;
779 }
780 }
781
Neil Booth97293892001-09-14 22:04:46 +0000782 if (pfile->cb.line_change && !pfile->state.skipping)
Zack Weinberg6cf87ca2003-06-17 06:17:44 +0000783 pfile->cb.line_change (pfile, result, pfile->state.parsing_args);
Neil Booth5fddcff2001-09-11 07:00:12 +0000784 }
785
Neil Boothbdcbe492001-09-13 20:05:17 +0000786 /* We don't skip tokens in directives. */
787 if (pfile->state.in_directive)
788 break;
Neil Booth5fddcff2001-09-11 07:00:12 +0000789
Neil Boothbdcbe492001-09-13 20:05:17 +0000790 /* Outside a directive, invalidate controlling macros. At file
Neil Booth14baae02001-09-17 18:26:12 +0000791 EOF, _cpp_lex_direct takes care of popping the buffer, so we never
Kazu Hirata6356f892003-06-12 19:01:08 +0000792 get here and MI optimization works. */
Neil Booth5fddcff2001-09-11 07:00:12 +0000793 pfile->mi_valid = false;
Neil Boothbdcbe492001-09-13 20:05:17 +0000794
795 if (!pfile->state.skipping || result->type == CPP_EOF)
796 break;
Neil Booth5fddcff2001-09-11 07:00:12 +0000797 }
798
Neil Booth345894b2001-09-16 13:44:29 +0000799 return result;
Neil Booth5fddcff2001-09-11 07:00:12 +0000800}
801
Neil Booth26aea072003-04-19 00:22:51 +0000802/* Returns true if a fresh line has been loaded. */
803bool
Zack Weinberg6cf87ca2003-06-17 06:17:44 +0000804_cpp_get_fresh_line (cpp_reader *pfile)
Neil Booth004cb262002-05-17 20:16:48 +0000805{
Per Bothner22234f52004-02-18 14:02:39 -0800806 int return_at_eof;
807
Neil Booth26aea072003-04-19 00:22:51 +0000808 /* We can't get a new line until we leave the current directive. */
809 if (pfile->state.in_directive)
810 return false;
Kazu Hiratadf383482002-05-22 22:02:16 +0000811
Neil Booth26aea072003-04-19 00:22:51 +0000812 for (;;)
Neil Booth1a769162002-06-11 05:36:17 +0000813 {
Neil Booth26aea072003-04-19 00:22:51 +0000814 cpp_buffer *buffer = pfile->buffer;
815
816 if (!buffer->need_line)
817 return true;
818
819 if (buffer->next_line < buffer->rlimit)
820 {
821 _cpp_clean_line (pfile);
822 return true;
823 }
824
825 /* First, get out of parsing arguments state. */
826 if (pfile->state.parsing_args)
Neil Booth1a769162002-06-11 05:36:17 +0000827 return false;
828
Neil Booth26aea072003-04-19 00:22:51 +0000829 /* End of buffer. Non-empty files should end in a newline. */
830 if (buffer->buf != buffer->rlimit
831 && buffer->next_line > buffer->rlimit
832 && !buffer->from_stage3)
Neil Booth004cb262002-05-17 20:16:48 +0000833 {
Neil Booth26aea072003-04-19 00:22:51 +0000834 /* Only warn once. */
835 buffer->next_line = buffer->rlimit;
Per Bothner500bee02004-04-22 19:22:27 -0700836 cpp_error_with_line (pfile, CPP_DL_PEDWARN, pfile->line_table->highest_line,
Neil Booth26aea072003-04-19 00:22:51 +0000837 CPP_BUF_COLUMN (buffer, buffer->cur),
838 "no newline at end of file");
Neil Booth004cb262002-05-17 20:16:48 +0000839 }
Per Bothner22234f52004-02-18 14:02:39 -0800840
841 return_at_eof = buffer->return_at_eof;
Neil Booth26aea072003-04-19 00:22:51 +0000842 _cpp_pop_buffer (pfile);
Per Bothner22234f52004-02-18 14:02:39 -0800843 if (pfile->buffer == NULL || return_at_eof)
Per Bothnera506c552003-10-02 07:20:38 +0000844 return false;
Neil Booth26aea072003-04-19 00:22:51 +0000845 }
Neil Booth004cb262002-05-17 20:16:48 +0000846}
847
Neil Booth6f572ac2003-04-19 16:34:33 +0000848#define IF_NEXT_IS(CHAR, THEN_TYPE, ELSE_TYPE) \
849 do \
850 { \
851 result->type = ELSE_TYPE; \
852 if (*buffer->cur == CHAR) \
853 buffer->cur++, result->type = THEN_TYPE; \
854 } \
855 while (0)
Neil Booth480709c2001-10-21 14:04:42 +0000856
Neil Booth14baae02001-09-17 18:26:12 +0000857/* Lex a token into pfile->cur_token, which is also incremented, to
858 get diagnostics pointing to the correct location.
859
860 Does not handle issues such as token lookahead, multiple-include
Kazu Hirataf1ba6652003-06-28 19:43:01 +0000861 optimization, directives, skipping etc. This function is only
Neil Booth14baae02001-09-17 18:26:12 +0000862 suitable for use by _cpp_lex_token, and in special cases like
863 lex_expansion_token which doesn't care for any of these issues.
864
865 When meeting a newline, returns CPP_EOF if parsing a directive,
866 otherwise returns to the start of the token buffer if permissible.
867 Returns the location of the lexed token. */
868cpp_token *
Zack Weinberg6cf87ca2003-06-17 06:17:44 +0000869_cpp_lex_direct (cpp_reader *pfile)
Neil Booth5fddcff2001-09-11 07:00:12 +0000870{
Neil Booth0d9f2342000-09-18 18:43:05 +0000871 cppchar_t c;
Neil Boothadb84b42000-11-08 23:08:07 +0000872 cpp_buffer *buffer;
Neil Booth0d9f2342000-09-18 18:43:05 +0000873 const unsigned char *comment_start;
Neil Booth14baae02001-09-17 18:26:12 +0000874 cpp_token *result = pfile->cur_token++;
Neil Booth0d9f2342000-09-18 18:43:05 +0000875
Neil Booth5fddcff2001-09-11 07:00:12 +0000876 fresh_line:
Neil Booth26aea072003-04-19 00:22:51 +0000877 result->flags = 0;
Per Bothner2be570f2003-08-28 18:07:42 -0700878 buffer = pfile->buffer;
Per Bothnera506c552003-10-02 07:20:38 +0000879 if (buffer->need_line)
Neil Booth26aea072003-04-19 00:22:51 +0000880 {
881 if (!_cpp_get_fresh_line (pfile))
882 {
883 result->type = CPP_EOF;
Neil Booth9ff78682003-04-26 21:03:51 +0000884 if (!pfile->state.in_directive)
885 {
886 /* Tell the compiler the line number of the EOF token. */
Per Bothner500bee02004-04-22 19:22:27 -0700887 result->src_loc = pfile->line_table->highest_line;
Neil Booth9ff78682003-04-26 21:03:51 +0000888 result->flags = BOL;
889 }
Neil Booth26aea072003-04-19 00:22:51 +0000890 return result;
891 }
892 if (!pfile->keep_tokens)
893 {
894 pfile->cur_run = &pfile->base_run;
895 result = pfile->base_run.base;
896 pfile->cur_token = result + 1;
897 }
898 result->flags = BOL;
899 if (pfile->state.parsing_args == 2)
900 result->flags |= PREV_WHITE;
901 }
Per Bothnera506c552003-10-02 07:20:38 +0000902 buffer = pfile->buffer;
Neil Booth5fddcff2001-09-11 07:00:12 +0000903 update_tokens_line:
Per Bothner500bee02004-04-22 19:22:27 -0700904 result->src_loc = pfile->line_table->highest_line;
Neil Booth0d9f2342000-09-18 18:43:05 +0000905
Neil Booth5fddcff2001-09-11 07:00:12 +0000906 skipped_white:
Neil Booth26aea072003-04-19 00:22:51 +0000907 if (buffer->cur >= buffer->notes[buffer->cur_note].pos
908 && !pfile->overlaid_buffer)
909 {
910 _cpp_process_line_notes (pfile, false);
Per Bothner500bee02004-04-22 19:22:27 -0700911 result->src_loc = pfile->line_table->highest_line;
Neil Booth26aea072003-04-19 00:22:51 +0000912 }
Neil Booth480709c2001-10-21 14:04:42 +0000913 c = *buffer->cur++;
Per Bothner12f9df42004-02-11 07:29:30 -0800914
Per Bothner500bee02004-04-22 19:22:27 -0700915 LINEMAP_POSITION_FOR_COLUMN (result->src_loc, pfile->line_table,
916 CPP_BUF_COLUMN (buffer, buffer->cur));
Neil Booth5fddcff2001-09-11 07:00:12 +0000917
Neil Booth0d9f2342000-09-18 18:43:05 +0000918 switch (c)
919 {
Neil Booth4d6baaf2001-11-26 23:44:54 +0000920 case ' ': case '\t': case '\f': case '\v': case '\0':
921 result->flags |= PREV_WHITE;
Neil Booth26aea072003-04-19 00:22:51 +0000922 skip_whitespace (pfile, c);
923 goto skipped_white;
Neil Booth4d6baaf2001-11-26 23:44:54 +0000924
Neil Booth26aea072003-04-19 00:22:51 +0000925 case '\n':
Per Bothner12f9df42004-02-11 07:29:30 -0800926 if (buffer->cur < buffer->rlimit)
927 CPP_INCREMENT_LINE (pfile, 0);
Neil Booth26aea072003-04-19 00:22:51 +0000928 buffer->need_line = true;
929 goto fresh_line;
Neil Booth0d9f2342000-09-18 18:43:05 +0000930
Neil Booth0d9f2342000-09-18 18:43:05 +0000931 case '0': case '1': case '2': case '3': case '4':
932 case '5': case '6': case '7': case '8': case '9':
Geoffrey Keating50668cf2005-03-15 00:36:33 +0000933 {
934 struct normalize_state nst = INITIAL_NORMALIZE_STATE;
935 result->type = CPP_NUMBER;
936 lex_number (pfile, &result->val.str, &nst);
937 warn_about_normalization (pfile, result, &nst);
938 break;
939 }
Neil Booth0d9f2342000-09-18 18:43:05 +0000940
Neil Booth0abc6a62001-11-27 22:31:34 +0000941 case 'L':
942 /* 'L' may introduce wide characters or strings. */
Neil Boothbced6ed2003-04-19 11:59:44 +0000943 if (*buffer->cur == '\'' || *buffer->cur == '"')
944 {
Neil Booth6338b352003-04-23 22:44:06 +0000945 lex_string (pfile, result, buffer->cur - 1);
Neil Boothbced6ed2003-04-19 11:59:44 +0000946 break;
947 }
Kazu Hiratadf383482002-05-22 22:02:16 +0000948 /* Fall through. */
Neil Booth0abc6a62001-11-27 22:31:34 +0000949
Neil Booth0d9f2342000-09-18 18:43:05 +0000950 case '_':
951 case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
952 case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':
953 case 'm': case 'n': case 'o': case 'p': case 'q': case 'r':
954 case 's': case 't': case 'u': case 'v': case 'w': case 'x':
955 case 'y': case 'z':
956 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
Neil Booth0abc6a62001-11-27 22:31:34 +0000957 case 'G': case 'H': case 'I': case 'J': case 'K':
Neil Booth0d9f2342000-09-18 18:43:05 +0000958 case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R':
959 case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
960 case 'Y': case 'Z':
961 result->type = CPP_NAME;
Geoffrey Keating50668cf2005-03-15 00:36:33 +0000962 {
963 struct normalize_state nst = INITIAL_NORMALIZE_STATE;
964 result->val.node = lex_identifier (pfile, buffer->cur - 1, false,
965 &nst);
966 warn_about_normalization (pfile, result, &nst);
967 }
Neil Booth0d9f2342000-09-18 18:43:05 +0000968
Neil Booth0d9f2342000-09-18 18:43:05 +0000969 /* Convert named operators to their proper types. */
Neil Booth0abc6a62001-11-27 22:31:34 +0000970 if (result->val.node->flags & NODE_OPERATOR)
Neil Booth0d9f2342000-09-18 18:43:05 +0000971 {
972 result->flags |= NAMED_OP;
Gabriel Dos Reisc3f829c2005-05-28 15:52:48 +0000973 result->type = (enum cpp_ttype) result->val.node->directive_index;
Neil Booth0d9f2342000-09-18 18:43:05 +0000974 }
975 break;
976
977 case '\'':
978 case '"':
Neil Booth6338b352003-04-23 22:44:06 +0000979 lex_string (pfile, result, buffer->cur - 1);
Neil Booth0d9f2342000-09-18 18:43:05 +0000980 break;
981
982 case '/':
Neil Booth1c6d33e2000-09-25 22:39:51 +0000983 /* A potential block or line comment. */
984 comment_start = buffer->cur;
Neil Booth6f572ac2003-04-19 16:34:33 +0000985 c = *buffer->cur;
986
Neil Booth1c6d33e2000-09-25 22:39:51 +0000987 if (c == '*')
988 {
Neil Booth26aea072003-04-19 00:22:51 +0000989 if (_cpp_skip_block_comment (pfile))
John David Anglin0527bc42003-11-01 22:56:54 +0000990 cpp_error (pfile, CPP_DL_ERROR, "unterminated comment");
Neil Booth0d9f2342000-09-18 18:43:05 +0000991 }
Neil Booth480709c2001-10-21 14:04:42 +0000992 else if (c == '/' && (CPP_OPTION (pfile, cplusplus_comments)
Per Bothner12f9df42004-02-11 07:29:30 -0800993 || cpp_in_system_header (pfile)))
Neil Booth0d9f2342000-09-18 18:43:05 +0000994 {
Neil Boothbdb05a72000-11-26 17:31:13 +0000995 /* Warn about comments only if pedantically GNUC89, and not
996 in system headers. */
997 if (CPP_OPTION (pfile, lang) == CLK_GNUC89 && CPP_PEDANTIC (pfile)
Neil Bootha94c1192000-09-25 23:35:10 +0000998 && ! buffer->warned_cplusplus_comments)
Neil Booth0d9f2342000-09-18 18:43:05 +0000999 {
John David Anglin0527bc42003-11-01 22:56:54 +00001000 cpp_error (pfile, CPP_DL_PEDWARN,
Gabriel Dos Reis56508302002-07-21 21:35:17 +00001001 "C++ style comments are not allowed in ISO C90");
John David Anglin0527bc42003-11-01 22:56:54 +00001002 cpp_error (pfile, CPP_DL_PEDWARN,
Neil Boothebef4e82002-04-14 18:42:47 +00001003 "(this will be reported only once per input file)");
Neil Booth1c6d33e2000-09-25 22:39:51 +00001004 buffer->warned_cplusplus_comments = 1;
Neil Booth0d9f2342000-09-18 18:43:05 +00001005 }
Neil Booth1c6d33e2000-09-25 22:39:51 +00001006
Jakub Jelinek01ef6562001-04-11 11:43:10 +02001007 if (skip_line_comment (pfile) && CPP_OPTION (pfile, warn_comments))
John David Anglin0527bc42003-11-01 22:56:54 +00001008 cpp_error (pfile, CPP_DL_WARNING, "multi-line comment");
Neil Booth0d9f2342000-09-18 18:43:05 +00001009 }
Neil Booth480709c2001-10-21 14:04:42 +00001010 else if (c == '=')
1011 {
Neil Booth6f572ac2003-04-19 16:34:33 +00001012 buffer->cur++;
Neil Booth480709c2001-10-21 14:04:42 +00001013 result->type = CPP_DIV_EQ;
1014 break;
1015 }
1016 else
1017 {
Neil Booth480709c2001-10-21 14:04:42 +00001018 result->type = CPP_DIV;
1019 break;
1020 }
Neil Booth1c6d33e2000-09-25 22:39:51 +00001021
Neil Booth1c6d33e2000-09-25 22:39:51 +00001022 if (!pfile->state.save_comments)
1023 {
1024 result->flags |= PREV_WHITE;
Neil Booth5fddcff2001-09-11 07:00:12 +00001025 goto update_tokens_line;
Neil Booth1c6d33e2000-09-25 22:39:51 +00001026 }
1027
1028 /* Save the comment as a token in its own right. */
Jason Thorpe477cdac2002-04-07 03:12:23 +00001029 save_comment (pfile, result, comment_start, c);
Neil Boothbdcbe492001-09-13 20:05:17 +00001030 break;
Neil Booth0d9f2342000-09-18 18:43:05 +00001031
1032 case '<':
1033 if (pfile->state.angled_headers)
1034 {
Neil Booth6338b352003-04-23 22:44:06 +00001035 lex_string (pfile, result, buffer->cur - 1);
Neil Booth480709c2001-10-21 14:04:42 +00001036 break;
Neil Booth0d9f2342000-09-18 18:43:05 +00001037 }
1038
Neil Booth6f572ac2003-04-19 16:34:33 +00001039 result->type = CPP_LESS;
1040 if (*buffer->cur == '=')
1041 buffer->cur++, result->type = CPP_LESS_EQ;
1042 else if (*buffer->cur == '<')
Neil Booth0d9f2342000-09-18 18:43:05 +00001043 {
Neil Booth6f572ac2003-04-19 16:34:33 +00001044 buffer->cur++;
1045 IF_NEXT_IS ('=', CPP_LSHIFT_EQ, CPP_LSHIFT);
Neil Booth0d9f2342000-09-18 18:43:05 +00001046 }
Neil Booth6f572ac2003-04-19 16:34:33 +00001047 else if (*buffer->cur == '?' && CPP_OPTION (pfile, cplusplus))
Neil Booth0d9f2342000-09-18 18:43:05 +00001048 {
Neil Booth6f572ac2003-04-19 16:34:33 +00001049 buffer->cur++;
1050 IF_NEXT_IS ('=', CPP_MIN_EQ, CPP_MIN);
Neil Booth0d9f2342000-09-18 18:43:05 +00001051 }
Neil Booth6f572ac2003-04-19 16:34:33 +00001052 else if (CPP_OPTION (pfile, digraphs))
Neil Booth480709c2001-10-21 14:04:42 +00001053 {
Neil Booth6f572ac2003-04-19 16:34:33 +00001054 if (*buffer->cur == ':')
1055 {
1056 buffer->cur++;
1057 result->flags |= DIGRAPH;
1058 result->type = CPP_OPEN_SQUARE;
1059 }
1060 else if (*buffer->cur == '%')
1061 {
1062 buffer->cur++;
1063 result->flags |= DIGRAPH;
1064 result->type = CPP_OPEN_BRACE;
1065 }
Neil Booth480709c2001-10-21 14:04:42 +00001066 }
Neil Booth0d9f2342000-09-18 18:43:05 +00001067 break;
1068
1069 case '>':
Neil Booth6f572ac2003-04-19 16:34:33 +00001070 result->type = CPP_GREATER;
1071 if (*buffer->cur == '=')
1072 buffer->cur++, result->type = CPP_GREATER_EQ;
1073 else if (*buffer->cur == '>')
Neil Booth0d9f2342000-09-18 18:43:05 +00001074 {
Neil Booth6f572ac2003-04-19 16:34:33 +00001075 buffer->cur++;
1076 IF_NEXT_IS ('=', CPP_RSHIFT_EQ, CPP_RSHIFT);
1077 }
1078 else if (*buffer->cur == '?' && CPP_OPTION (pfile, cplusplus))
1079 {
1080 buffer->cur++;
1081 IF_NEXT_IS ('=', CPP_MAX_EQ, CPP_MAX);
Neil Booth0d9f2342000-09-18 18:43:05 +00001082 }
1083 break;
1084
Neil Boothcbcff6d2000-09-23 21:41:41 +00001085 case '%':
Neil Booth6f572ac2003-04-19 16:34:33 +00001086 result->type = CPP_MOD;
1087 if (*buffer->cur == '=')
1088 buffer->cur++, result->type = CPP_MOD_EQ;
1089 else if (CPP_OPTION (pfile, digraphs))
Neil Booth480709c2001-10-21 14:04:42 +00001090 {
Neil Booth6f572ac2003-04-19 16:34:33 +00001091 if (*buffer->cur == ':')
Neil Booth480709c2001-10-21 14:04:42 +00001092 {
Neil Booth6f572ac2003-04-19 16:34:33 +00001093 buffer->cur++;
1094 result->flags |= DIGRAPH;
1095 result->type = CPP_HASH;
1096 if (*buffer->cur == '%' && buffer->cur[1] == ':')
1097 buffer->cur += 2, result->type = CPP_PASTE;
Neil Booth480709c2001-10-21 14:04:42 +00001098 }
Neil Booth6f572ac2003-04-19 16:34:33 +00001099 else if (*buffer->cur == '>')
1100 {
1101 buffer->cur++;
1102 result->flags |= DIGRAPH;
1103 result->type = CPP_CLOSE_BRACE;
1104 }
Neil Booth480709c2001-10-21 14:04:42 +00001105 }
Neil Booth0d9f2342000-09-18 18:43:05 +00001106 break;
1107
Neil Boothcbcff6d2000-09-23 21:41:41 +00001108 case '.':
Neil Booth480709c2001-10-21 14:04:42 +00001109 result->type = CPP_DOT;
Neil Booth6f572ac2003-04-19 16:34:33 +00001110 if (ISDIGIT (*buffer->cur))
Neil Booth480709c2001-10-21 14:04:42 +00001111 {
Geoffrey Keating50668cf2005-03-15 00:36:33 +00001112 struct normalize_state nst = INITIAL_NORMALIZE_STATE;
Neil Booth480709c2001-10-21 14:04:42 +00001113 result->type = CPP_NUMBER;
Geoffrey Keating50668cf2005-03-15 00:36:33 +00001114 lex_number (pfile, &result->val.str, &nst);
1115 warn_about_normalization (pfile, result, &nst);
Neil Booth480709c2001-10-21 14:04:42 +00001116 }
Neil Booth6f572ac2003-04-19 16:34:33 +00001117 else if (*buffer->cur == '.' && buffer->cur[1] == '.')
1118 buffer->cur += 2, result->type = CPP_ELLIPSIS;
1119 else if (*buffer->cur == '*' && CPP_OPTION (pfile, cplusplus))
1120 buffer->cur++, result->type = CPP_DOT_STAR;
Neil Booth0d9f2342000-09-18 18:43:05 +00001121 break;
1122
1123 case '+':
Neil Booth6f572ac2003-04-19 16:34:33 +00001124 result->type = CPP_PLUS;
1125 if (*buffer->cur == '+')
1126 buffer->cur++, result->type = CPP_PLUS_PLUS;
1127 else if (*buffer->cur == '=')
1128 buffer->cur++, result->type = CPP_PLUS_EQ;
Neil Booth0d9f2342000-09-18 18:43:05 +00001129 break;
1130
1131 case '-':
Neil Booth6f572ac2003-04-19 16:34:33 +00001132 result->type = CPP_MINUS;
1133 if (*buffer->cur == '>')
Neil Booth0d9f2342000-09-18 18:43:05 +00001134 {
Neil Booth6f572ac2003-04-19 16:34:33 +00001135 buffer->cur++;
Neil Booth480709c2001-10-21 14:04:42 +00001136 result->type = CPP_DEREF;
Neil Booth6f572ac2003-04-19 16:34:33 +00001137 if (*buffer->cur == '*' && CPP_OPTION (pfile, cplusplus))
1138 buffer->cur++, result->type = CPP_DEREF_STAR;
Neil Booth0d9f2342000-09-18 18:43:05 +00001139 }
Neil Booth6f572ac2003-04-19 16:34:33 +00001140 else if (*buffer->cur == '-')
1141 buffer->cur++, result->type = CPP_MINUS_MINUS;
1142 else if (*buffer->cur == '=')
1143 buffer->cur++, result->type = CPP_MINUS_EQ;
Neil Booth0d9f2342000-09-18 18:43:05 +00001144 break;
1145
1146 case '&':
Neil Booth6f572ac2003-04-19 16:34:33 +00001147 result->type = CPP_AND;
1148 if (*buffer->cur == '&')
1149 buffer->cur++, result->type = CPP_AND_AND;
1150 else if (*buffer->cur == '=')
1151 buffer->cur++, result->type = CPP_AND_EQ;
Neil Booth0d9f2342000-09-18 18:43:05 +00001152 break;
Kazu Hiratadf383482002-05-22 22:02:16 +00001153
Neil Booth0d9f2342000-09-18 18:43:05 +00001154 case '|':
Neil Booth6f572ac2003-04-19 16:34:33 +00001155 result->type = CPP_OR;
1156 if (*buffer->cur == '|')
1157 buffer->cur++, result->type = CPP_OR_OR;
1158 else if (*buffer->cur == '=')
1159 buffer->cur++, result->type = CPP_OR_EQ;
Neil Booth0d9f2342000-09-18 18:43:05 +00001160 break;
1161
1162 case ':':
Neil Booth6f572ac2003-04-19 16:34:33 +00001163 result->type = CPP_COLON;
1164 if (*buffer->cur == ':' && CPP_OPTION (pfile, cplusplus))
1165 buffer->cur++, result->type = CPP_SCOPE;
1166 else if (*buffer->cur == '>' && CPP_OPTION (pfile, digraphs))
Neil Booth0d9f2342000-09-18 18:43:05 +00001167 {
Neil Booth6f572ac2003-04-19 16:34:33 +00001168 buffer->cur++;
Neil Booth0d9f2342000-09-18 18:43:05 +00001169 result->flags |= DIGRAPH;
Neil Booth480709c2001-10-21 14:04:42 +00001170 result->type = CPP_CLOSE_SQUARE;
1171 }
Neil Booth0d9f2342000-09-18 18:43:05 +00001172 break;
1173
Neil Booth480709c2001-10-21 14:04:42 +00001174 case '*': IF_NEXT_IS ('=', CPP_MULT_EQ, CPP_MULT); break;
1175 case '=': IF_NEXT_IS ('=', CPP_EQ_EQ, CPP_EQ); break;
1176 case '!': IF_NEXT_IS ('=', CPP_NOT_EQ, CPP_NOT); break;
1177 case '^': IF_NEXT_IS ('=', CPP_XOR_EQ, CPP_XOR); break;
1178 case '#': IF_NEXT_IS ('#', CPP_PASTE, CPP_HASH); break;
1179
Neil Booth26aea072003-04-19 00:22:51 +00001180 case '?': result->type = CPP_QUERY; break;
Neil Booth0d9f2342000-09-18 18:43:05 +00001181 case '~': result->type = CPP_COMPL; break;
1182 case ',': result->type = CPP_COMMA; break;
1183 case '(': result->type = CPP_OPEN_PAREN; break;
1184 case ')': result->type = CPP_CLOSE_PAREN; break;
1185 case '[': result->type = CPP_OPEN_SQUARE; break;
1186 case ']': result->type = CPP_CLOSE_SQUARE; break;
1187 case '{': result->type = CPP_OPEN_BRACE; break;
1188 case '}': result->type = CPP_CLOSE_BRACE; break;
1189 case ';': result->type = CPP_SEMICOLON; break;
1190
Kazu Hirata40f03652002-09-26 22:25:14 +00001191 /* @ is a punctuator in Objective-C. */
Zack Weinbergcc937582001-03-07 01:32:01 +00001192 case '@': result->type = CPP_ATSIGN; break;
Neil Booth0d9f2342000-09-18 18:43:05 +00001193
Neil Booth0abc6a62001-11-27 22:31:34 +00001194 case '$':
Neil Booth1613e522003-04-20 07:29:23 +00001195 case '\\':
1196 {
1197 const uchar *base = --buffer->cur;
Geoffrey Keating50668cf2005-03-15 00:36:33 +00001198 struct normalize_state nst = INITIAL_NORMALIZE_STATE;
Neil Booth0abc6a62001-11-27 22:31:34 +00001199
Geoffrey Keating50668cf2005-03-15 00:36:33 +00001200 if (forms_identifier_p (pfile, true, &nst))
Neil Booth1613e522003-04-20 07:29:23 +00001201 {
1202 result->type = CPP_NAME;
Geoffrey Keating50668cf2005-03-15 00:36:33 +00001203 result->val.node = lex_identifier (pfile, base, true, &nst);
1204 warn_about_normalization (pfile, result, &nst);
Neil Booth1613e522003-04-20 07:29:23 +00001205 break;
1206 }
1207 buffer->cur++;
Neil Booth10676942003-04-22 19:28:00 +00001208 }
Neil Booth1613e522003-04-20 07:29:23 +00001209
Neil Booth10676942003-04-22 19:28:00 +00001210 default:
Neil Booth6338b352003-04-23 22:44:06 +00001211 create_literal (pfile, result, buffer->cur - 1, 1, CPP_OTHER);
1212 break;
Neil Booth0d9f2342000-09-18 18:43:05 +00001213 }
Neil Boothbdcbe492001-09-13 20:05:17 +00001214
1215 return result;
Zack Weinbergc5a04732000-04-25 19:32:36 +00001216}
1217
Neil Booth59325652003-04-24 20:03:57 +00001218/* An upper bound on the number of bytes needed to spell TOKEN.
1219 Does not include preceding whitespace. */
Neil Booth93c803682000-10-28 17:59:06 +00001220unsigned int
Zack Weinberg6cf87ca2003-06-17 06:17:44 +00001221cpp_token_len (const cpp_token *token)
Zack Weinbergc5a04732000-04-25 19:32:36 +00001222{
Neil Booth93c803682000-10-28 17:59:06 +00001223 unsigned int len;
Zack Weinbergc5a04732000-04-25 19:32:36 +00001224
Neil Booth93c803682000-10-28 17:59:06 +00001225 switch (TOKEN_SPELL (token))
Zack Weinbergc5a04732000-04-25 19:32:36 +00001226 {
Neil Booth59325652003-04-24 20:03:57 +00001227 default: len = 4; break;
Neil Booth6338b352003-04-23 22:44:06 +00001228 case SPELL_LITERAL: len = token->val.str.len; break;
Geoffrey Keating47e20492005-03-12 10:44:06 +00001229 case SPELL_IDENT: len = NODE_LEN (token->val.node) * 10; break;
Zack Weinbergc5a04732000-04-25 19:32:36 +00001230 }
Neil Booth59325652003-04-24 20:03:57 +00001231
1232 return len;
Zack Weinberg041c3192000-07-04 01:58:21 +00001233}
1234
Geoffrey Keating47e20492005-03-12 10:44:06 +00001235/* Parse UTF-8 out of NAMEP and place a \U escape in BUFFER.
1236 Return the number of bytes read out of NAME. (There are always
1237 10 bytes written to BUFFER.) */
1238
1239static size_t
1240utf8_to_ucn (unsigned char *buffer, const unsigned char *name)
1241{
1242 int j;
1243 int ucn_len = 0;
1244 int ucn_len_c;
1245 unsigned t;
1246 unsigned long utf32;
1247
1248 /* Compute the length of the UTF-8 sequence. */
1249 for (t = *name; t & 0x80; t <<= 1)
1250 ucn_len++;
1251
1252 utf32 = *name & (0x7F >> ucn_len);
1253 for (ucn_len_c = 1; ucn_len_c < ucn_len; ucn_len_c++)
1254 {
1255 utf32 = (utf32 << 6) | (*++name & 0x3F);
1256
1257 /* Ill-formed UTF-8. */
1258 if ((*name & ~0x3F) != 0x80)
1259 abort ();
1260 }
1261
1262 *buffer++ = '\\';
1263 *buffer++ = 'U';
1264 for (j = 7; j >= 0; j--)
1265 *buffer++ = "0123456789abcdef"[(utf32 >> (4 * j)) & 0xF];
1266 return ucn_len;
1267}
1268
1269
Neil Booth3fef5b22000-05-08 22:22:49 +00001270/* Write the spelling of a token TOKEN to BUFFER. The buffer must
Zack Weinbergcf00a882000-07-08 02:33:00 +00001271 already contain the enough space to hold the token's spelling.
Zack Weinberg6cf87ca2003-06-17 06:17:44 +00001272 Returns a pointer to the character after the last character written.
Geoffrey Keating47e20492005-03-12 10:44:06 +00001273 FORSTRING is true if this is to be the spelling after translation
1274 phase 1 (this is different for UCNs).
Zack Weinberg6cf87ca2003-06-17 06:17:44 +00001275 FIXME: Would be nice if we didn't need the PFILE argument. */
Neil Booth93c803682000-10-28 17:59:06 +00001276unsigned char *
Zack Weinberg6cf87ca2003-06-17 06:17:44 +00001277cpp_spell_token (cpp_reader *pfile, const cpp_token *token,
Geoffrey Keating47e20492005-03-12 10:44:06 +00001278 unsigned char *buffer, bool forstring)
Neil Booth3fef5b22000-05-08 22:22:49 +00001279{
Zack Weinberg96be6992000-07-18 23:25:06 +00001280 switch (TOKEN_SPELL (token))
Neil Booth3fef5b22000-05-08 22:22:49 +00001281 {
Neil Booth5d7ee2f2000-05-10 09:39:18 +00001282 case SPELL_OPERATOR:
Neil Booth3fef5b22000-05-08 22:22:49 +00001283 {
1284 const unsigned char *spelling;
1285 unsigned char c;
1286
1287 if (token->flags & DIGRAPH)
John David Anglin37b85242001-03-02 01:11:50 +00001288 spelling
1289 = digraph_spellings[(int) token->type - (int) CPP_FIRST_DIGRAPH];
Zack Weinberg92936ec2000-07-19 20:18:08 +00001290 else if (token->flags & NAMED_OP)
1291 goto spell_ident;
Neil Booth3fef5b22000-05-08 22:22:49 +00001292 else
Zack Weinberg96be6992000-07-18 23:25:06 +00001293 spelling = TOKEN_NAME (token);
Kazu Hiratadf383482002-05-22 22:02:16 +00001294
Neil Booth3fef5b22000-05-08 22:22:49 +00001295 while ((c = *spelling++) != '\0')
1296 *buffer++ = c;
1297 }
1298 break;
1299
Zack Weinberg47ad4132001-10-06 23:11:27 +00001300 spell_ident:
Neil Booth5d7ee2f2000-05-10 09:39:18 +00001301 case SPELL_IDENT:
Geoffrey Keating47e20492005-03-12 10:44:06 +00001302 if (forstring)
1303 {
1304 memcpy (buffer, NODE_NAME (token->val.node),
1305 NODE_LEN (token->val.node));
1306 buffer += NODE_LEN (token->val.node);
1307 }
1308 else
1309 {
1310 size_t i;
1311 const unsigned char * name = NODE_NAME (token->val.node);
1312
1313 for (i = 0; i < NODE_LEN (token->val.node); i++)
1314 if (name[i] & ~0x7F)
1315 {
1316 i += utf8_to_ucn (buffer, name + i) - 1;
1317 buffer += 10;
1318 }
1319 else
1320 *buffer++ = NODE_NAME (token->val.node)[i];
1321 }
Neil Booth5d7ee2f2000-05-10 09:39:18 +00001322 break;
Neil Booth3fef5b22000-05-08 22:22:49 +00001323
Neil Booth6338b352003-04-23 22:44:06 +00001324 case SPELL_LITERAL:
Zack Weinberg47ad4132001-10-06 23:11:27 +00001325 memcpy (buffer, token->val.str.text, token->val.str.len);
1326 buffer += token->val.str.len;
1327 break;
1328
Neil Booth3fef5b22000-05-08 22:22:49 +00001329 case SPELL_NONE:
John David Anglin0527bc42003-11-01 22:56:54 +00001330 cpp_error (pfile, CPP_DL_ICE,
1331 "unspellable token %s", TOKEN_NAME (token));
Neil Booth3fef5b22000-05-08 22:22:49 +00001332 break;
1333 }
1334
1335 return buffer;
1336}
1337
Neil Booth5d8ebbd2002-01-03 21:43:09 +00001338/* Returns TOKEN spelt as a null-terminated string. The string is
1339 freed when the reader is destroyed. Useful for diagnostics. */
Neil Booth93c803682000-10-28 17:59:06 +00001340unsigned char *
Zack Weinberg6cf87ca2003-06-17 06:17:44 +00001341cpp_token_as_text (cpp_reader *pfile, const cpp_token *token)
Neil Booth59325652003-04-24 20:03:57 +00001342{
1343 unsigned int len = cpp_token_len (token) + 1;
Neil Boothece54d52001-09-28 09:40:22 +00001344 unsigned char *start = _cpp_unaligned_alloc (pfile, len), *end;
Zack Weinberg041c3192000-07-04 01:58:21 +00001345
Geoffrey Keating47e20492005-03-12 10:44:06 +00001346 end = cpp_spell_token (pfile, token, start, false);
Neil Booth93c803682000-10-28 17:59:06 +00001347 end[0] = '\0';
Zack Weinberg041c3192000-07-04 01:58:21 +00001348
Neil Booth93c803682000-10-28 17:59:06 +00001349 return start;
Zack Weinberg041c3192000-07-04 01:58:21 +00001350}
1351
Neil Booth5d8ebbd2002-01-03 21:43:09 +00001352/* Used by C front ends, which really should move to using
1353 cpp_token_as_text. */
Neil Booth93c803682000-10-28 17:59:06 +00001354const char *
Zack Weinberg6cf87ca2003-06-17 06:17:44 +00001355cpp_type2name (enum cpp_ttype type)
Zack Weinberg041c3192000-07-04 01:58:21 +00001356{
Neil Booth93c803682000-10-28 17:59:06 +00001357 return (const char *) token_spellings[type].name;
Zack Weinberg041c3192000-07-04 01:58:21 +00001358}
1359
Neil Booth4ed5bcf2001-09-24 22:53:12 +00001360/* Writes the spelling of token to FP, without any preceding space.
1361 Separated from cpp_spell_token for efficiency - to avoid stdio
1362 double-buffering. */
Zack Weinberg041c3192000-07-04 01:58:21 +00001363void
Zack Weinberg6cf87ca2003-06-17 06:17:44 +00001364cpp_output_token (const cpp_token *token, FILE *fp)
Zack Weinberg041c3192000-07-04 01:58:21 +00001365{
Neil Booth93c803682000-10-28 17:59:06 +00001366 switch (TOKEN_SPELL (token))
Zack Weinberg041c3192000-07-04 01:58:21 +00001367 {
Neil Booth93c803682000-10-28 17:59:06 +00001368 case SPELL_OPERATOR:
1369 {
1370 const unsigned char *spelling;
Zack Weinberg3b681e92001-09-28 07:00:27 +00001371 int c;
Neil Booth93c803682000-10-28 17:59:06 +00001372
1373 if (token->flags & DIGRAPH)
John David Anglin37b85242001-03-02 01:11:50 +00001374 spelling
1375 = digraph_spellings[(int) token->type - (int) CPP_FIRST_DIGRAPH];
Neil Booth93c803682000-10-28 17:59:06 +00001376 else if (token->flags & NAMED_OP)
1377 goto spell_ident;
1378 else
1379 spelling = TOKEN_NAME (token);
1380
Zack Weinberg3b681e92001-09-28 07:00:27 +00001381 c = *spelling;
1382 do
1383 putc (c, fp);
1384 while ((c = *++spelling) != '\0');
Neil Booth93c803682000-10-28 17:59:06 +00001385 }
1386 break;
1387
1388 spell_ident:
1389 case SPELL_IDENT:
Geoffrey Keating47e20492005-03-12 10:44:06 +00001390 {
1391 size_t i;
1392 const unsigned char * name = NODE_NAME (token->val.node);
1393
1394 for (i = 0; i < NODE_LEN (token->val.node); i++)
1395 if (name[i] & ~0x7F)
1396 {
1397 unsigned char buffer[10];
1398 i += utf8_to_ucn (buffer, name + i) - 1;
1399 fwrite (buffer, 1, 10, fp);
1400 }
1401 else
1402 fputc (NODE_NAME (token->val.node)[i], fp);
1403 }
1404 break;
Neil Booth93c803682000-10-28 17:59:06 +00001405
Neil Booth6338b352003-04-23 22:44:06 +00001406 case SPELL_LITERAL:
Zack Weinberg47ad4132001-10-06 23:11:27 +00001407 fwrite (token->val.str.text, 1, token->val.str.len, fp);
1408 break;
1409
Neil Booth93c803682000-10-28 17:59:06 +00001410 case SPELL_NONE:
1411 /* An error, most probably. */
1412 break;
Zack Weinberg041c3192000-07-04 01:58:21 +00001413 }
Zack Weinberg041c3192000-07-04 01:58:21 +00001414}
1415
Neil Booth93c803682000-10-28 17:59:06 +00001416/* Compare two tokens. */
1417int
Zack Weinberg6cf87ca2003-06-17 06:17:44 +00001418_cpp_equiv_tokens (const cpp_token *a, const cpp_token *b)
Neil Booth93c803682000-10-28 17:59:06 +00001419{
1420 if (a->type == b->type && a->flags == b->flags)
1421 switch (TOKEN_SPELL (a))
1422 {
1423 default: /* Keep compiler happy. */
1424 case SPELL_OPERATOR:
1425 return 1;
Neil Booth93c803682000-10-28 17:59:06 +00001426 case SPELL_NONE:
Neil Booth56051c02000-11-06 18:47:21 +00001427 return (a->type != CPP_MACRO_ARG || a->val.arg_no == b->val.arg_no);
Neil Booth93c803682000-10-28 17:59:06 +00001428 case SPELL_IDENT:
1429 return a->val.node == b->val.node;
Neil Booth6338b352003-04-23 22:44:06 +00001430 case SPELL_LITERAL:
Neil Booth93c803682000-10-28 17:59:06 +00001431 return (a->val.str.len == b->val.str.len
1432 && !memcmp (a->val.str.text, b->val.str.text,
1433 a->val.str.len));
1434 }
1435
1436 return 0;
1437}
1438
Neil Booth93c803682000-10-28 17:59:06 +00001439/* Returns nonzero if a space should be inserted to avoid an
1440 accidental token paste for output. For simplicity, it is
1441 conservative, and occasionally advises a space where one is not
1442 needed, e.g. "." and ".2". */
Neil Booth93c803682000-10-28 17:59:06 +00001443int
Zack Weinberg6cf87ca2003-06-17 06:17:44 +00001444cpp_avoid_paste (cpp_reader *pfile, const cpp_token *token1,
1445 const cpp_token *token2)
Zack Weinberg041c3192000-07-04 01:58:21 +00001446{
Neil Booth93c803682000-10-28 17:59:06 +00001447 enum cpp_ttype a = token1->type, b = token2->type;
1448 cppchar_t c;
Zack Weinberg041c3192000-07-04 01:58:21 +00001449
Neil Booth93c803682000-10-28 17:59:06 +00001450 if (token1->flags & NAMED_OP)
1451 a = CPP_NAME;
1452 if (token2->flags & NAMED_OP)
1453 b = CPP_NAME;
Zack Weinberg041c3192000-07-04 01:58:21 +00001454
Neil Booth93c803682000-10-28 17:59:06 +00001455 c = EOF;
1456 if (token2->flags & DIGRAPH)
John David Anglin37b85242001-03-02 01:11:50 +00001457 c = digraph_spellings[(int) b - (int) CPP_FIRST_DIGRAPH][0];
Neil Booth93c803682000-10-28 17:59:06 +00001458 else if (token_spellings[b].category == SPELL_OPERATOR)
1459 c = token_spellings[b].name[0];
Zack Weinberg417f3e32000-07-11 23:20:53 +00001460
Neil Booth93c803682000-10-28 17:59:06 +00001461 /* Quickly get everything that can paste with an '='. */
John David Anglin37b85242001-03-02 01:11:50 +00001462 if ((int) a <= (int) CPP_LAST_EQ && c == '=')
Zack Weinberg041c3192000-07-04 01:58:21 +00001463 return 1;
1464
Neil Booth93c803682000-10-28 17:59:06 +00001465 switch (a)
1466 {
1467 case CPP_GREATER: return c == '>' || c == '?';
1468 case CPP_LESS: return c == '<' || c == '?' || c == '%' || c == ':';
1469 case CPP_PLUS: return c == '+';
1470 case CPP_MINUS: return c == '-' || c == '>';
1471 case CPP_DIV: return c == '/' || c == '*'; /* Comments. */
1472 case CPP_MOD: return c == ':' || c == '>';
1473 case CPP_AND: return c == '&';
1474 case CPP_OR: return c == '|';
1475 case CPP_COLON: return c == ':' || c == '>';
1476 case CPP_DEREF: return c == '*';
Neil Booth26ec42e2001-01-28 11:22:23 +00001477 case CPP_DOT: return c == '.' || c == '%' || b == CPP_NUMBER;
Neil Booth93c803682000-10-28 17:59:06 +00001478 case CPP_HASH: return c == '#' || c == '%'; /* Digraph form. */
1479 case CPP_NAME: return ((b == CPP_NUMBER
1480 && name_p (pfile, &token2->val.str))
1481 || b == CPP_NAME
1482 || b == CPP_CHAR || b == CPP_STRING); /* L */
1483 case CPP_NUMBER: return (b == CPP_NUMBER || b == CPP_NAME
1484 || c == '.' || c == '+' || c == '-');
Neil Booth1613e522003-04-20 07:29:23 +00001485 /* UCNs */
Neil Booth10676942003-04-22 19:28:00 +00001486 case CPP_OTHER: return ((token1->val.str.text[0] == '\\'
1487 && b == CPP_NAME)
Neil Booth1613e522003-04-20 07:29:23 +00001488 || (CPP_OPTION (pfile, objc)
Neil Booth10676942003-04-22 19:28:00 +00001489 && token1->val.str.text[0] == '@'
Neil Booth1613e522003-04-20 07:29:23 +00001490 && (b == CPP_NAME || b == CPP_STRING)));
Neil Booth93c803682000-10-28 17:59:06 +00001491 default: break;
1492 }
Zack Weinberg041c3192000-07-04 01:58:21 +00001493
1494 return 0;
1495}
1496
Neil Booth93c803682000-10-28 17:59:06 +00001497/* Output all the remaining tokens on the current line, and a newline
Neil Booth4ed5bcf2001-09-24 22:53:12 +00001498 character, to FP. Leading whitespace is removed. If there are
1499 macros, special token padding is not performed. */
Neil Booth93c803682000-10-28 17:59:06 +00001500void
Zack Weinberg6cf87ca2003-06-17 06:17:44 +00001501cpp_output_line (cpp_reader *pfile, FILE *fp)
Zack Weinberg041c3192000-07-04 01:58:21 +00001502{
Neil Booth4ed5bcf2001-09-24 22:53:12 +00001503 const cpp_token *token;
Zack Weinberg041c3192000-07-04 01:58:21 +00001504
Neil Booth4ed5bcf2001-09-24 22:53:12 +00001505 token = cpp_get_token (pfile);
1506 while (token->type != CPP_EOF)
Zack Weinberg6ead1e92000-07-31 23:47:19 +00001507 {
Neil Booth4ed5bcf2001-09-24 22:53:12 +00001508 cpp_output_token (token, fp);
1509 token = cpp_get_token (pfile);
1510 if (token->flags & PREV_WHITE)
1511 putc (' ', fp);
Zack Weinberg6ead1e92000-07-31 23:47:19 +00001512 }
1513
Neil Booth93c803682000-10-28 17:59:06 +00001514 putc ('\n', fp);
Zack Weinberg041c3192000-07-04 01:58:21 +00001515}
1516
Neil Booth1e013d22001-09-26 21:44:35 +00001517/* Memory buffers. Changing these three constants can have a dramatic
1518 effect on performance. The values here are reasonable defaults,
1519 but might be tuned. If you adjust them, be sure to test across a
1520 range of uses of cpplib, including heavy nested function-like macro
1521 expansion. Also check the change in peak memory usage (NJAMD is a
1522 good tool for this). */
1523#define MIN_BUFF_SIZE 8000
Neil Booth87062812001-10-20 09:00:53 +00001524#define BUFF_SIZE_UPPER_BOUND(MIN_SIZE) (MIN_BUFF_SIZE + (MIN_SIZE) * 3 / 2)
Neil Booth1e013d22001-09-26 21:44:35 +00001525#define EXTENDED_BUFF_SIZE(BUFF, MIN_EXTRA) \
1526 (MIN_EXTRA + ((BUFF)->limit - (BUFF)->cur) * 2)
Neil Booth93c803682000-10-28 17:59:06 +00001527
Neil Booth87062812001-10-20 09:00:53 +00001528#if MIN_BUFF_SIZE > BUFF_SIZE_UPPER_BOUND (0)
1529 #error BUFF_SIZE_UPPER_BOUND must be at least as large as MIN_BUFF_SIZE!
1530#endif
1531
Neil Boothc9e7a602001-09-27 12:59:38 +00001532/* Create a new allocation buffer. Place the control block at the end
1533 of the buffer, so that buffer overflows will cause immediate chaos. */
Neil Boothb8af0ca2001-09-26 17:52:50 +00001534static _cpp_buff *
Zack Weinberg6cf87ca2003-06-17 06:17:44 +00001535new_buff (size_t len)
Neil Boothb8af0ca2001-09-26 17:52:50 +00001536{
1537 _cpp_buff *result;
Neil Boothece54d52001-09-28 09:40:22 +00001538 unsigned char *base;
Neil Boothb8af0ca2001-09-26 17:52:50 +00001539
Neil Booth1e013d22001-09-26 21:44:35 +00001540 if (len < MIN_BUFF_SIZE)
1541 len = MIN_BUFF_SIZE;
Neil Boothc70f6ed2002-06-07 06:26:32 +00001542 len = CPP_ALIGN (len);
Neil Boothb8af0ca2001-09-26 17:52:50 +00001543
Gabriel Dos Reisc3f829c2005-05-28 15:52:48 +00001544 base = XNEWVEC (unsigned char, len + sizeof (_cpp_buff));
Neil Boothb8af0ca2001-09-26 17:52:50 +00001545 result = (_cpp_buff *) (base + len);
1546 result->base = base;
1547 result->cur = base;
1548 result->limit = base + len;
1549 result->next = NULL;
1550 return result;
1551}
1552
1553/* Place a chain of unwanted allocation buffers on the free list. */
1554void
Zack Weinberg6cf87ca2003-06-17 06:17:44 +00001555_cpp_release_buff (cpp_reader *pfile, _cpp_buff *buff)
Neil Boothb8af0ca2001-09-26 17:52:50 +00001556{
1557 _cpp_buff *end = buff;
1558
1559 while (end->next)
1560 end = end->next;
1561 end->next = pfile->free_buffs;
1562 pfile->free_buffs = buff;
1563}
1564
1565/* Return a free buffer of size at least MIN_SIZE. */
1566_cpp_buff *
Zack Weinberg6cf87ca2003-06-17 06:17:44 +00001567_cpp_get_buff (cpp_reader *pfile, size_t min_size)
Neil Boothb8af0ca2001-09-26 17:52:50 +00001568{
1569 _cpp_buff *result, **p;
1570
1571 for (p = &pfile->free_buffs;; p = &(*p)->next)
1572 {
Neil Booth61420882001-09-28 13:25:38 +00001573 size_t size;
Neil Booth1e013d22001-09-26 21:44:35 +00001574
1575 if (*p == NULL)
Neil Boothb8af0ca2001-09-26 17:52:50 +00001576 return new_buff (min_size);
Neil Booth1e013d22001-09-26 21:44:35 +00001577 result = *p;
1578 size = result->limit - result->base;
1579 /* Return a buffer that's big enough, but don't waste one that's
1580 way too big. */
Richard Earnshaw34f52712001-10-17 16:20:04 +00001581 if (size >= min_size && size <= BUFF_SIZE_UPPER_BOUND (min_size))
Neil Boothb8af0ca2001-09-26 17:52:50 +00001582 break;
1583 }
1584
1585 *p = result->next;
1586 result->next = NULL;
1587 result->cur = result->base;
1588 return result;
1589}
1590
Kazu Hirata4fe9b912001-10-09 06:03:16 +00001591/* Creates a new buffer with enough space to hold the uncommitted
Neil Booth8c3b2692001-09-30 10:03:11 +00001592 remaining bytes of BUFF, and at least MIN_EXTRA more bytes. Copies
1593 the excess bytes to the new buffer. Chains the new buffer after
1594 BUFF, and returns the new buffer. */
Neil Boothb8af0ca2001-09-26 17:52:50 +00001595_cpp_buff *
Zack Weinberg6cf87ca2003-06-17 06:17:44 +00001596_cpp_append_extend_buff (cpp_reader *pfile, _cpp_buff *buff, size_t min_extra)
Neil Boothb8af0ca2001-09-26 17:52:50 +00001597{
Neil Booth61420882001-09-28 13:25:38 +00001598 size_t size = EXTENDED_BUFF_SIZE (buff, min_extra);
Neil Booth8c3b2692001-09-30 10:03:11 +00001599 _cpp_buff *new_buff = _cpp_get_buff (pfile, size);
Neil Boothb8af0ca2001-09-26 17:52:50 +00001600
Neil Booth8c3b2692001-09-30 10:03:11 +00001601 buff->next = new_buff;
1602 memcpy (new_buff->base, buff->cur, BUFF_ROOM (buff));
1603 return new_buff;
1604}
1605
Kazu Hirata4fe9b912001-10-09 06:03:16 +00001606/* Creates a new buffer with enough space to hold the uncommitted
Neil Booth8c3b2692001-09-30 10:03:11 +00001607 remaining bytes of the buffer pointed to by BUFF, and at least
1608 MIN_EXTRA more bytes. Copies the excess bytes to the new buffer.
1609 Chains the new buffer before the buffer pointed to by BUFF, and
1610 updates the pointer to point to the new buffer. */
1611void
Zack Weinberg6cf87ca2003-06-17 06:17:44 +00001612_cpp_extend_buff (cpp_reader *pfile, _cpp_buff **pbuff, size_t min_extra)
Neil Booth8c3b2692001-09-30 10:03:11 +00001613{
1614 _cpp_buff *new_buff, *old_buff = *pbuff;
1615 size_t size = EXTENDED_BUFF_SIZE (old_buff, min_extra);
1616
1617 new_buff = _cpp_get_buff (pfile, size);
1618 memcpy (new_buff->base, old_buff->cur, BUFF_ROOM (old_buff));
1619 new_buff->next = old_buff;
1620 *pbuff = new_buff;
Neil Boothb8af0ca2001-09-26 17:52:50 +00001621}
1622
1623/* Free a chain of buffers starting at BUFF. */
1624void
Andreas Jaeger5671bf22003-07-07 21:11:59 +02001625_cpp_free_buff (_cpp_buff *buff)
Neil Boothb8af0ca2001-09-26 17:52:50 +00001626{
1627 _cpp_buff *next;
1628
1629 for (; buff; buff = next)
1630 {
1631 next = buff->next;
1632 free (buff->base);
1633 }
1634}
Neil Booth93c803682000-10-28 17:59:06 +00001635
Neil Boothece54d52001-09-28 09:40:22 +00001636/* Allocate permanent, unaligned storage of length LEN. */
1637unsigned char *
Zack Weinberg6cf87ca2003-06-17 06:17:44 +00001638_cpp_unaligned_alloc (cpp_reader *pfile, size_t len)
Neil Boothece54d52001-09-28 09:40:22 +00001639{
1640 _cpp_buff *buff = pfile->u_buff;
1641 unsigned char *result = buff->cur;
1642
1643 if (len > (size_t) (buff->limit - result))
1644 {
1645 buff = _cpp_get_buff (pfile, len);
1646 buff->next = pfile->u_buff;
1647 pfile->u_buff = buff;
1648 result = buff->cur;
1649 }
1650
1651 buff->cur = result + len;
1652 return result;
1653}
1654
Neil Booth87062812001-10-20 09:00:53 +00001655/* Allocate permanent, unaligned storage of length LEN from a_buff.
1656 That buffer is used for growing allocations when saving macro
1657 replacement lists in a #define, and when parsing an answer to an
1658 assertion in #assert, #unassert or #if (and therefore possibly
1659 whilst expanding macros). It therefore must not be used by any
1660 code that they might call: specifically the lexer and the guts of
1661 the macro expander.
1662
1663 All existing other uses clearly fit this restriction: storing
1664 registered pragmas during initialization. */
Neil Booth93c803682000-10-28 17:59:06 +00001665unsigned char *
Zack Weinberg6cf87ca2003-06-17 06:17:44 +00001666_cpp_aligned_alloc (cpp_reader *pfile, size_t len)
Neil Booth93c803682000-10-28 17:59:06 +00001667{
Neil Booth8c3b2692001-09-30 10:03:11 +00001668 _cpp_buff *buff = pfile->a_buff;
1669 unsigned char *result = buff->cur;
Neil Booth93c803682000-10-28 17:59:06 +00001670
Neil Booth8c3b2692001-09-30 10:03:11 +00001671 if (len > (size_t) (buff->limit - result))
Zack Weinberg041c3192000-07-04 01:58:21 +00001672 {
Neil Booth8c3b2692001-09-30 10:03:11 +00001673 buff = _cpp_get_buff (pfile, len);
1674 buff->next = pfile->a_buff;
1675 pfile->a_buff = buff;
1676 result = buff->cur;
Zack Weinberg041c3192000-07-04 01:58:21 +00001677 }
1678
Neil Booth8c3b2692001-09-30 10:03:11 +00001679 buff->cur = result + len;
Neil Booth93c803682000-10-28 17:59:06 +00001680 return result;
Zack Weinberg041c3192000-07-04 01:58:21 +00001681}
Geoffrey Keatingd8044162004-06-09 20:10:13 +00001682
1683/* Say which field of TOK is in use. */
1684
1685enum cpp_token_fld_kind
1686cpp_token_val_index (cpp_token *tok)
1687{
1688 switch (TOKEN_SPELL (tok))
1689 {
1690 case SPELL_IDENT:
1691 return CPP_TOKEN_FLD_NODE;
1692 case SPELL_LITERAL:
1693 return CPP_TOKEN_FLD_STR;
1694 case SPELL_NONE:
1695 if (tok->type == CPP_MACRO_ARG)
1696 return CPP_TOKEN_FLD_ARG_NO;
1697 else if (tok->type == CPP_PADDING)
1698 return CPP_TOKEN_FLD_SOURCE;
Zack Weinberg21b11492004-09-09 19:16:56 +00001699 else if (tok->type == CPP_PRAGMA)
1700 return CPP_TOKEN_FLD_STR;
Geoffrey Keatingd8044162004-06-09 20:10:13 +00001701 /* else fall through */
1702 default:
1703 return CPP_TOKEN_FLD_NONE;
1704 }
1705}