blob: 5758e580c2ba09729a0e5874c838691c44ee6102 [file] [log] [blame]
Zack Weinberg45b966d2000-03-13 22:01:08 +00001/* CPP Library - lexical analysis.
Jakub Jelinek5624e562015-01-05 13:33:28 +01002 Copyright (C) 2000-2015 Free Software Foundation, Inc.
Zack Weinberg45b966d2000-03-13 22:01:08 +00003 Contributed by Per Bothner, 1994-95.
4 Based on CCCP program by Paul Rubin, June 1986
5 Adapted to ANSI C, Richard Stallman, Jan 1987
6 Broken out to separate file, Zack Weinberg, Mar 2000
7
8This program is free software; you can redistribute it and/or modify it
9under the terms of the GNU General Public License as published by the
Jakub Jelinek748086b2009-04-09 17:00:19 +020010Free Software Foundation; either version 3, or (at your option) any
Zack Weinberg45b966d2000-03-13 22:01:08 +000011later version.
12
13This program is distributed in the hope that it will be useful,
14but WITHOUT ANY WARRANTY; without even the implied warranty of
15MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16GNU General Public License for more details.
17
18You should have received a copy of the GNU General Public License
Jakub Jelinek748086b2009-04-09 17:00:19 +020019along with this program; see the file COPYING3. If not see
20<http://www.gnu.org/licenses/>. */
Zack Weinberg45b966d2000-03-13 22:01:08 +000021
22#include "config.h"
23#include "system.h"
Zack Weinberg45b966d2000-03-13 22:01:08 +000024#include "cpplib.h"
Paolo Bonzini4f4e53dd2004-05-24 10:50:45 +000025#include "internal.h"
Zack Weinberg45b966d2000-03-13 22:01:08 +000026
Neil Booth93c803682000-10-28 17:59:06 +000027enum spell_type
Zack Weinbergf9a0e962000-07-13 02:32:41 +000028{
Neil Booth93c803682000-10-28 17:59:06 +000029 SPELL_OPERATOR = 0,
Neil Booth93c803682000-10-28 17:59:06 +000030 SPELL_IDENT,
Neil Booth6338b352003-04-23 22:44:06 +000031 SPELL_LITERAL,
Neil Booth93c803682000-10-28 17:59:06 +000032 SPELL_NONE
Zack Weinbergf9a0e962000-07-13 02:32:41 +000033};
34
Neil Booth93c803682000-10-28 17:59:06 +000035struct token_spelling
Zack Weinbergf9a0e962000-07-13 02:32:41 +000036{
Neil Booth93c803682000-10-28 17:59:06 +000037 enum spell_type category;
38 const unsigned char *name;
Zack Weinbergf9a0e962000-07-13 02:32:41 +000039};
40
Zack Weinberg8206c792001-10-11 21:21:57 +000041static const unsigned char *const digraph_spellings[] =
Kris Van Heesb6baa672008-04-18 13:58:08 +000042{ UC"%:", UC"%:%:", UC"<:", UC":>", UC"<%", UC"%>" };
Neil Booth93c803682000-10-28 17:59:06 +000043
Kris Van Heesb6baa672008-04-18 13:58:08 +000044#define OP(e, s) { SPELL_OPERATOR, UC s },
45#define TK(e, s) { SPELL_ ## s, UC #e },
Zack Weinberg8206c792001-10-11 21:21:57 +000046static const struct token_spelling token_spellings[N_TTYPES] = { TTYPE_TABLE };
Neil Booth93c803682000-10-28 17:59:06 +000047#undef OP
48#undef TK
49
50#define TOKEN_SPELL(token) (token_spellings[(token)->type].category)
51#define TOKEN_NAME(token) (token_spellings[(token)->type].name)
Zack Weinbergf2d5f0c2000-04-14 23:29:45 +000052
Zack Weinberg6cf87ca2003-06-17 06:17:44 +000053static void add_line_note (cpp_buffer *, const uchar *, unsigned int);
54static int skip_line_comment (cpp_reader *);
55static void skip_whitespace (cpp_reader *, cppchar_t);
Zack Weinberg6cf87ca2003-06-17 06:17:44 +000056static void lex_string (cpp_reader *, cpp_token *, const uchar *);
57static void save_comment (cpp_reader *, cpp_token *, const uchar *, cppchar_t);
Matthew Gingell631d0d32008-10-05 12:35:36 +000058static void store_comment (cpp_reader *, cpp_token *);
Zack Weinberg6cf87ca2003-06-17 06:17:44 +000059static void create_literal (cpp_reader *, cpp_token *, const uchar *,
60 unsigned int, enum cpp_ttype);
61static bool warn_in_comment (cpp_reader *, _cpp_line_note *);
62static int name_p (cpp_reader *, const cpp_string *);
Zack Weinberg6cf87ca2003-06-17 06:17:44 +000063static tokenrun *next_tokenrun (tokenrun *);
Neil Booth0d9f2342000-09-18 18:43:05 +000064
Zack Weinberg6cf87ca2003-06-17 06:17:44 +000065static _cpp_buff *new_buff (size_t);
Zack Weinberg15dad1d2000-05-18 15:55:46 +000066
Neil Booth9d10c9a2003-03-06 23:12:30 +000067
Zack Weinberg6d2c2042000-04-30 17:30:25 +000068/* Utility routine:
Zack Weinberg6d2c2042000-04-30 17:30:25 +000069
Zack Weinbergbfb9dc72000-07-08 19:00:39 +000070 Compares, the token TOKEN to the NUL-terminated string STRING.
71 TOKEN must be a CPP_NAME. Returns 1 for equal, 0 for unequal. */
Zack Weinberg6d2c2042000-04-30 17:30:25 +000072int
Zack Weinberg6cf87ca2003-06-17 06:17:44 +000073cpp_ideq (const cpp_token *token, const char *string)
Zack Weinberg6d2c2042000-04-30 17:30:25 +000074{
Zack Weinbergbfb9dc72000-07-08 19:00:39 +000075 if (token->type != CPP_NAME)
Zack Weinberg6d2c2042000-04-30 17:30:25 +000076 return 0;
Zack Weinbergbfb9dc72000-07-08 19:00:39 +000077
Joseph Myers9a0c6182009-05-10 15:27:32 +010078 return !ustrcmp (NODE_NAME (token->val.node.node), (const uchar *) string);
Zack Weinberg6d2c2042000-04-30 17:30:25 +000079}
80
Neil Booth26aea072003-04-19 00:22:51 +000081/* Record a note TYPE at byte POS into the current cleaned logical
82 line. */
Neil Booth87062812001-10-20 09:00:53 +000083static void
Zack Weinberg6cf87ca2003-06-17 06:17:44 +000084add_line_note (cpp_buffer *buffer, const uchar *pos, unsigned int type)
Neil Booth0d9f2342000-09-18 18:43:05 +000085{
Neil Booth26aea072003-04-19 00:22:51 +000086 if (buffer->notes_used == buffer->notes_cap)
Zack Weinbergc5a04732000-04-25 19:32:36 +000087 {
Neil Booth26aea072003-04-19 00:22:51 +000088 buffer->notes_cap = buffer->notes_cap * 2 + 200;
Gabriel Dos Reisc3f829c2005-05-28 15:52:48 +000089 buffer->notes = XRESIZEVEC (_cpp_line_note, buffer->notes,
90 buffer->notes_cap);
Zack Weinbergc5a04732000-04-25 19:32:36 +000091 }
Neil Booth0d9f2342000-09-18 18:43:05 +000092
Neil Booth26aea072003-04-19 00:22:51 +000093 buffer->notes[buffer->notes_used].pos = pos;
94 buffer->notes[buffer->notes_used].type = type;
95 buffer->notes_used++;
Zack Weinbergc5a04732000-04-25 19:32:36 +000096}
97
Richard Henderson246a2fc2010-08-21 12:05:40 -070098
99/* Fast path to find line special characters using optimized character
100 scanning algorithms. Anything complicated falls back to the slow
101 path below. Since this loop is very hot it's worth doing these kinds
102 of optimizations.
103
104 One of the paths through the ifdefs should provide
105
106 const uchar *search_line_fast (const uchar *s, const uchar *end);
107
108 Between S and END, search for \n, \r, \\, ?. Return a pointer to
109 the found character.
110
111 Note that the last character of the buffer is *always* a newline,
112 as forced by _cpp_convert_input. This fact can be used to avoid
113 explicitly looking for the end of the buffer. */
114
115/* Configure gives us an ifdef test. */
116#ifndef WORDS_BIGENDIAN
117#define WORDS_BIGENDIAN 0
118#endif
119
120/* We'd like the largest integer that fits into a register. There's nothing
121 in <stdint.h> that gives us that. For most hosts this is unsigned long,
122 but MS decided on an LLP64 model. Thankfully when building with GCC we
123 can get the "real" word size. */
124#ifdef __GNUC__
125typedef unsigned int word_type __attribute__((__mode__(__word__)));
126#else
127typedef unsigned long word_type;
128#endif
129
130/* The code below is only expecting sizes 4 or 8.
131 Die at compile-time if this expectation is violated. */
132typedef char check_word_type_size
133 [(sizeof(word_type) == 8 || sizeof(word_type) == 4) * 2 - 1];
134
135/* Return X with the first N bytes forced to values that won't match one
136 of the interesting characters. Note that NUL is not interesting. */
137
138static inline word_type
139acc_char_mask_misalign (word_type val, unsigned int n)
140{
141 word_type mask = -1;
142 if (WORDS_BIGENDIAN)
143 mask >>= n * 8;
144 else
145 mask <<= n * 8;
146 return val & mask;
147}
148
149/* Return X replicated to all byte positions within WORD_TYPE. */
150
151static inline word_type
152acc_char_replicate (uchar x)
153{
154 word_type ret;
155
156 ret = (x << 24) | (x << 16) | (x << 8) | x;
157 if (sizeof(word_type) == 8)
158 ret = (ret << 16 << 16) | ret;
159 return ret;
160}
161
162/* Return non-zero if some byte of VAL is (probably) C. */
163
164static inline word_type
165acc_char_cmp (word_type val, word_type c)
166{
167#if defined(__GNUC__) && defined(__alpha__)
168 /* We can get exact results using a compare-bytes instruction.
169 Get (val == c) via (0 >= (val ^ c)). */
170 return __builtin_alpha_cmpbge (0, val ^ c);
171#else
172 word_type magic = 0x7efefefeU;
173 if (sizeof(word_type) == 8)
174 magic = (magic << 16 << 16) | 0xfefefefeU;
175 magic |= 1;
176
177 val ^= c;
178 return ((val + magic) ^ ~val) & ~magic;
179#endif
180}
181
182/* Given the result of acc_char_cmp is non-zero, return the index of
183 the found character. If this was a false positive, return -1. */
184
185static inline int
186acc_char_index (word_type cmp ATTRIBUTE_UNUSED,
187 word_type val ATTRIBUTE_UNUSED)
188{
189#if defined(__GNUC__) && defined(__alpha__) && !WORDS_BIGENDIAN
190 /* The cmpbge instruction sets *bits* of the result corresponding to
191 matches in the bytes with no false positives. */
192 return __builtin_ctzl (cmp);
193#else
194 unsigned int i;
195
196 /* ??? It would be nice to force unrolling here,
197 and have all of these constants folded. */
198 for (i = 0; i < sizeof(word_type); ++i)
199 {
200 uchar c;
201 if (WORDS_BIGENDIAN)
202 c = (val >> (sizeof(word_type) - i - 1) * 8) & 0xff;
203 else
204 c = (val >> i * 8) & 0xff;
205
206 if (c == '\n' || c == '\r' || c == '\\' || c == '?')
207 return i;
208 }
209
210 return -1;
211#endif
212}
213
214/* A version of the fast scanner using bit fiddling techniques.
215
216 For 32-bit words, one would normally perform 16 comparisons and
217 16 branches. With this algorithm one performs 24 arithmetic
218 operations and one branch. Whether this is faster with a 32-bit
219 word size is going to be somewhat system dependent.
220
221 For 64-bit words, we eliminate twice the number of comparisons
222 and branches without increasing the number of arithmetic operations.
223 It's almost certainly going to be a win with 64-bit word size. */
224
225static const uchar * search_line_acc_char (const uchar *, const uchar *)
226 ATTRIBUTE_UNUSED;
227
228static const uchar *
229search_line_acc_char (const uchar *s, const uchar *end ATTRIBUTE_UNUSED)
230{
231 const word_type repl_nl = acc_char_replicate ('\n');
232 const word_type repl_cr = acc_char_replicate ('\r');
233 const word_type repl_bs = acc_char_replicate ('\\');
234 const word_type repl_qm = acc_char_replicate ('?');
235
236 unsigned int misalign;
237 const word_type *p;
238 word_type val, t;
239
240 /* Align the buffer. Mask out any bytes from before the beginning. */
241 p = (word_type *)((uintptr_t)s & -sizeof(word_type));
242 val = *p;
243 misalign = (uintptr_t)s & (sizeof(word_type) - 1);
244 if (misalign)
245 val = acc_char_mask_misalign (val, misalign);
246
247 /* Main loop. */
248 while (1)
249 {
250 t = acc_char_cmp (val, repl_nl);
251 t |= acc_char_cmp (val, repl_cr);
252 t |= acc_char_cmp (val, repl_bs);
253 t |= acc_char_cmp (val, repl_qm);
254
255 if (__builtin_expect (t != 0, 0))
256 {
257 int i = acc_char_index (t, val);
258 if (i >= 0)
259 return (const uchar *)p + i;
260 }
261
262 val = *++p;
263 }
264}
265
Rainer Orthd9f069a2014-04-22 12:30:59 +0000266/* Disable on Solaris 2/x86 until the following problem can be properly
Rainer Orth789d73c2010-08-24 17:23:35 +0000267 autoconfed:
268
Rainer Orth789d73c2010-08-24 17:23:35 +0000269 The Solaris 10+ assembler tags objects with the instruction set
270 extensions used, so SSE4.2 executables cannot run on machines that
271 don't support that extension. */
272
Uros Bizjak1b6b13f2014-11-20 13:10:12 +0100273#if (GCC_VERSION >= 4005) && (__GNUC__ >= 5 || !defined(__PIC__)) && (defined(__i386__) || defined(__x86_64__)) && !(defined(__sun__) && defined(__svr4__))
Richard Henderson246a2fc2010-08-21 12:05:40 -0700274
275/* Replicated character data to be shared between implementations.
276 Recall that outside of a context with vector support we can't
277 define compatible vector types, therefore these are all defined
278 in terms of raw characters. */
279static const char repl_chars[4][16] __attribute__((aligned(16))) = {
280 { '\n', '\n', '\n', '\n', '\n', '\n', '\n', '\n',
281 '\n', '\n', '\n', '\n', '\n', '\n', '\n', '\n' },
282 { '\r', '\r', '\r', '\r', '\r', '\r', '\r', '\r',
283 '\r', '\r', '\r', '\r', '\r', '\r', '\r', '\r' },
284 { '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\',
285 '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\' },
286 { '?', '?', '?', '?', '?', '?', '?', '?',
287 '?', '?', '?', '?', '?', '?', '?', '?' },
288};
289
290/* A version of the fast scanner using MMX vectorized byte compare insns.
291
292 This uses the PMOVMSKB instruction which was introduced with "MMX2",
Uros Bizjakef230b32011-05-22 20:53:32 +0200293 which was packaged into SSE1; it is also present in the AMD MMX
Richard Henderson246a2fc2010-08-21 12:05:40 -0700294 extension. Mark the function as using "sse" so that we emit a real
295 "emms" instruction, rather than the 3dNOW "femms" instruction. */
296
297static const uchar *
298#ifndef __SSE__
299__attribute__((__target__("sse")))
300#endif
301search_line_mmx (const uchar *s, const uchar *end ATTRIBUTE_UNUSED)
302{
303 typedef char v8qi __attribute__ ((__vector_size__ (8)));
304 typedef int __m64 __attribute__ ((__vector_size__ (8), __may_alias__));
305
306 const v8qi repl_nl = *(const v8qi *)repl_chars[0];
307 const v8qi repl_cr = *(const v8qi *)repl_chars[1];
308 const v8qi repl_bs = *(const v8qi *)repl_chars[2];
309 const v8qi repl_qm = *(const v8qi *)repl_chars[3];
310
311 unsigned int misalign, found, mask;
312 const v8qi *p;
313 v8qi data, t, c;
314
315 /* Align the source pointer. While MMX doesn't generate unaligned data
316 faults, this allows us to safely scan to the end of the buffer without
317 reading beyond the end of the last page. */
318 misalign = (uintptr_t)s & 7;
319 p = (const v8qi *)((uintptr_t)s & -8);
320 data = *p;
321
322 /* Create a mask for the bytes that are valid within the first
323 16-byte block. The Idea here is that the AND with the mask
324 within the loop is "free", since we need some AND or TEST
325 insn in order to set the flags for the branch anyway. */
326 mask = -1u << misalign;
327
328 /* Main loop processing 8 bytes at a time. */
329 goto start;
330 do
331 {
332 data = *++p;
333 mask = -1;
334
335 start:
336 t = __builtin_ia32_pcmpeqb(data, repl_nl);
337 c = __builtin_ia32_pcmpeqb(data, repl_cr);
338 t = (v8qi) __builtin_ia32_por ((__m64)t, (__m64)c);
339 c = __builtin_ia32_pcmpeqb(data, repl_bs);
340 t = (v8qi) __builtin_ia32_por ((__m64)t, (__m64)c);
341 c = __builtin_ia32_pcmpeqb(data, repl_qm);
342 t = (v8qi) __builtin_ia32_por ((__m64)t, (__m64)c);
343 found = __builtin_ia32_pmovmskb (t);
344 found &= mask;
345 }
346 while (!found);
347
348 __builtin_ia32_emms ();
349
350 /* FOUND contains 1 in bits for which we matched a relevant
351 character. Conversion to the byte index is trivial. */
352 found = __builtin_ctz(found);
353 return (const uchar *)p + found;
354}
355
356/* A version of the fast scanner using SSE2 vectorized byte compare insns. */
357
358static const uchar *
359#ifndef __SSE2__
360__attribute__((__target__("sse2")))
361#endif
362search_line_sse2 (const uchar *s, const uchar *end ATTRIBUTE_UNUSED)
363{
364 typedef char v16qi __attribute__ ((__vector_size__ (16)));
365
366 const v16qi repl_nl = *(const v16qi *)repl_chars[0];
367 const v16qi repl_cr = *(const v16qi *)repl_chars[1];
368 const v16qi repl_bs = *(const v16qi *)repl_chars[2];
369 const v16qi repl_qm = *(const v16qi *)repl_chars[3];
370
371 unsigned int misalign, found, mask;
372 const v16qi *p;
373 v16qi data, t;
374
375 /* Align the source pointer. */
376 misalign = (uintptr_t)s & 15;
377 p = (const v16qi *)((uintptr_t)s & -16);
378 data = *p;
379
380 /* Create a mask for the bytes that are valid within the first
381 16-byte block. The Idea here is that the AND with the mask
382 within the loop is "free", since we need some AND or TEST
383 insn in order to set the flags for the branch anyway. */
384 mask = -1u << misalign;
385
386 /* Main loop processing 16 bytes at a time. */
387 goto start;
388 do
389 {
390 data = *++p;
391 mask = -1;
392
393 start:
394 t = __builtin_ia32_pcmpeqb128(data, repl_nl);
395 t |= __builtin_ia32_pcmpeqb128(data, repl_cr);
396 t |= __builtin_ia32_pcmpeqb128(data, repl_bs);
397 t |= __builtin_ia32_pcmpeqb128(data, repl_qm);
398 found = __builtin_ia32_pmovmskb128 (t);
399 found &= mask;
400 }
401 while (!found);
402
403 /* FOUND contains 1 in bits for which we matched a relevant
404 character. Conversion to the byte index is trivial. */
405 found = __builtin_ctz(found);
406 return (const uchar *)p + found;
407}
408
Richard Henderson6f173e52010-08-24 14:08:05 -0700409#ifdef HAVE_SSE4
Richard Henderson246a2fc2010-08-21 12:05:40 -0700410/* A version of the fast scanner using SSE 4.2 vectorized string insns. */
411
412static const uchar *
413#ifndef __SSE4_2__
414__attribute__((__target__("sse4.2")))
415#endif
416search_line_sse42 (const uchar *s, const uchar *end)
417{
418 typedef char v16qi __attribute__ ((__vector_size__ (16)));
419 static const v16qi search = { '\n', '\r', '?', '\\' };
420
421 uintptr_t si = (uintptr_t)s;
422 uintptr_t index;
423
424 /* Check for unaligned input. */
425 if (si & 15)
426 {
Uros Bizjakd35d1c02012-06-19 18:28:50 +0200427 v16qi sv;
428
Richard Henderson246a2fc2010-08-21 12:05:40 -0700429 if (__builtin_expect (end - s < 16, 0)
430 && __builtin_expect ((si & 0xfff) > 0xff0, 0))
431 {
432 /* There are less than 16 bytes left in the buffer, and less
433 than 16 bytes left on the page. Reading 16 bytes at this
434 point might generate a spurious page fault. Defer to the
435 SSE2 implementation, which already handles alignment. */
436 return search_line_sse2 (s, end);
437 }
438
439 /* ??? The builtin doesn't understand that the PCMPESTRI read from
440 memory need not be aligned. */
Uros Bizjakd35d1c02012-06-19 18:28:50 +0200441 sv = __builtin_ia32_loaddqu ((const char *) s);
442 index = __builtin_ia32_pcmpestri128 (search, 4, sv, 16, 0);
443
Richard Henderson246a2fc2010-08-21 12:05:40 -0700444 if (__builtin_expect (index < 16, 0))
445 goto found;
446
447 /* Advance the pointer to an aligned address. We will re-scan a
448 few bytes, but we no longer need care for reading past the
449 end of a page, since we're guaranteed a match. */
450 s = (const uchar *)((si + 16) & -16);
451 }
452
Uros Bizjakdc6bcf52015-06-30 10:26:57 +0200453 /* Main loop, processing 16 bytes at a time. */
454#ifdef __GCC_ASM_FLAG_OUTPUTS__
455 while (1)
456 {
457 char f;
458
459 /* By using inline assembly instead of the builtin,
460 we can use the result, as well as the flags set. */
461 __asm ("%vpcmpestri\t$0, %2, %3"
462 : "=c"(index), "=@ccc"(f)
463 : "m"(*s), "x"(search), "a"(4), "d"(16));
464 if (f)
465 break;
466
467 s += 16;
468 }
469#else
470 s -= 16;
471 /* By doing the whole loop in inline assembly,
472 we can make proper use of the flags set. */
473 __asm ( ".balign 16\n"
Richard Henderson246a2fc2010-08-21 12:05:40 -0700474 "0: add $16, %1\n"
Uros Bizjakdc6bcf52015-06-30 10:26:57 +0200475 " %vpcmpestri\t$0, (%1), %2\n"
Richard Henderson246a2fc2010-08-21 12:05:40 -0700476 " jnc 0b"
477 : "=&c"(index), "+r"(s)
478 : "x"(search), "a"(4), "d"(16));
Uros Bizjakdc6bcf52015-06-30 10:26:57 +0200479#endif
Richard Henderson246a2fc2010-08-21 12:05:40 -0700480
481 found:
482 return s + index;
483}
484
Richard Henderson6f173e52010-08-24 14:08:05 -0700485#else
486/* Work around out-dated assemblers without sse4 support. */
487#define search_line_sse42 search_line_sse2
488#endif
489
Richard Henderson246a2fc2010-08-21 12:05:40 -0700490/* Check the CPU capabilities. */
491
492#include "../gcc/config/i386/cpuid.h"
493
494typedef const uchar * (*search_line_fast_type) (const uchar *, const uchar *);
495static search_line_fast_type search_line_fast;
496
Jakub Jelinekb0c084b2011-12-07 23:05:59 +0100497#define HAVE_init_vectorized_lexer 1
498static inline void
Richard Henderson246a2fc2010-08-21 12:05:40 -0700499init_vectorized_lexer (void)
500{
501 unsigned dummy, ecx = 0, edx = 0;
502 search_line_fast_type impl = search_line_acc_char;
503 int minimum = 0;
504
505#if defined(__SSE4_2__)
506 minimum = 3;
507#elif defined(__SSE2__)
508 minimum = 2;
Uros Bizjakef230b32011-05-22 20:53:32 +0200509#elif defined(__SSE__)
Richard Henderson246a2fc2010-08-21 12:05:40 -0700510 minimum = 1;
511#endif
512
513 if (minimum == 3)
514 impl = search_line_sse42;
515 else if (__get_cpuid (1, &dummy, &dummy, &ecx, &edx) || minimum == 2)
516 {
517 if (minimum == 3 || (ecx & bit_SSE4_2))
518 impl = search_line_sse42;
519 else if (minimum == 2 || (edx & bit_SSE2))
520 impl = search_line_sse2;
521 else if (minimum == 1 || (edx & bit_SSE))
522 impl = search_line_mmx;
523 }
524 else if (__get_cpuid (0x80000001, &dummy, &dummy, &dummy, &edx))
525 {
Uros Bizjak5e70c0b2011-05-22 21:04:54 +0200526 if (minimum == 1
527 || (edx & (bit_MMXEXT | bit_CMOV)) == (bit_MMXEXT | bit_CMOV))
Richard Henderson246a2fc2010-08-21 12:05:40 -0700528 impl = search_line_mmx;
529 }
530
531 search_line_fast = impl;
532}
533
Bill Schmidt0ccaaab2014-10-03 20:06:38 +0000534#elif defined(_ARCH_PWR8) && defined(__ALTIVEC__)
Richard Henderson246a2fc2010-08-21 12:05:40 -0700535
Bill Schmidt0ccaaab2014-10-03 20:06:38 +0000536/* A vection of the fast scanner using AltiVec vectorized byte compares
537 and VSX unaligned loads (when VSX is available). This is otherwise
538 the same as the pre-GCC 5 version. */
539
Markus Trippelsdorf44d95242015-03-04 17:28:56 +0000540ATTRIBUTE_NO_SANITIZE_UNDEFINED
Bill Schmidt0ccaaab2014-10-03 20:06:38 +0000541static const uchar *
542search_line_fast (const uchar *s, const uchar *end ATTRIBUTE_UNUSED)
543{
544 typedef __attribute__((altivec(vector))) unsigned char vc;
545
546 const vc repl_nl = {
547 '\n', '\n', '\n', '\n', '\n', '\n', '\n', '\n',
548 '\n', '\n', '\n', '\n', '\n', '\n', '\n', '\n'
549 };
550 const vc repl_cr = {
551 '\r', '\r', '\r', '\r', '\r', '\r', '\r', '\r',
552 '\r', '\r', '\r', '\r', '\r', '\r', '\r', '\r'
553 };
554 const vc repl_bs = {
555 '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\',
556 '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\'
557 };
558 const vc repl_qm = {
559 '?', '?', '?', '?', '?', '?', '?', '?',
560 '?', '?', '?', '?', '?', '?', '?', '?',
561 };
562 const vc zero = { 0 };
563
564 vc data, t;
565
566 /* Main loop processing 16 bytes at a time. */
567 do
568 {
569 vc m_nl, m_cr, m_bs, m_qm;
570
571 data = *((const vc *)s);
572 s += 16;
573
574 m_nl = (vc) __builtin_vec_cmpeq(data, repl_nl);
575 m_cr = (vc) __builtin_vec_cmpeq(data, repl_cr);
576 m_bs = (vc) __builtin_vec_cmpeq(data, repl_bs);
577 m_qm = (vc) __builtin_vec_cmpeq(data, repl_qm);
578 t = (m_nl | m_cr) | (m_bs | m_qm);
579
580 /* T now contains 0xff in bytes for which we matched one of the relevant
581 characters. We want to exit the loop if any byte in T is non-zero.
582 Below is the expansion of vec_any_ne(t, zero). */
583 }
584 while (!__builtin_vec_vcmpeq_p(/*__CR6_LT_REV*/3, t, zero));
585
586 /* Restore s to to point to the 16 bytes we just processed. */
587 s -= 16;
588
589 {
590#define N (sizeof(vc) / sizeof(long))
591
592 union {
593 vc v;
594 /* Statically assert that N is 2 or 4. */
595 unsigned long l[(N == 2 || N == 4) ? N : -1];
596 } u;
597 unsigned long l, i = 0;
598
599 u.v = t;
600
601 /* Find the first word of T that is non-zero. */
602 switch (N)
603 {
604 case 4:
605 l = u.l[i++];
606 if (l != 0)
607 break;
608 s += sizeof(unsigned long);
609 l = u.l[i++];
610 if (l != 0)
611 break;
612 s += sizeof(unsigned long);
613 case 2:
614 l = u.l[i++];
615 if (l != 0)
616 break;
617 s += sizeof(unsigned long);
618 l = u.l[i];
619 }
620
621 /* L now contains 0xff in bytes for which we matched one of the
622 relevant characters. We can find the byte index by finding
623 its bit index and dividing by 8. */
624#ifdef __BIG_ENDIAN__
625 l = __builtin_clzl(l) >> 3;
626#else
627 l = __builtin_ctzl(l) >> 3;
628#endif
629 return s + l;
630
631#undef N
632 }
633}
634
635#elif (GCC_VERSION >= 4005) && defined(__ALTIVEC__) && defined (__BIG_ENDIAN__)
636
637/* A vection of the fast scanner using AltiVec vectorized byte compares.
638 This cannot be used for little endian because vec_lvsl/lvsr are
639 deprecated for little endian and the code won't work properly. */
Richard Henderson246a2fc2010-08-21 12:05:40 -0700640/* ??? Unfortunately, attribute(target("altivec")) is not yet supported,
641 so we can't compile this function without -maltivec on the command line
642 (or implied by some other switch). */
643
644static const uchar *
645search_line_fast (const uchar *s, const uchar *end ATTRIBUTE_UNUSED)
646{
647 typedef __attribute__((altivec(vector))) unsigned char vc;
648
649 const vc repl_nl = {
650 '\n', '\n', '\n', '\n', '\n', '\n', '\n', '\n',
651 '\n', '\n', '\n', '\n', '\n', '\n', '\n', '\n'
652 };
653 const vc repl_cr = {
654 '\r', '\r', '\r', '\r', '\r', '\r', '\r', '\r',
655 '\r', '\r', '\r', '\r', '\r', '\r', '\r', '\r'
656 };
657 const vc repl_bs = {
658 '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\',
659 '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\'
660 };
661 const vc repl_qm = {
662 '?', '?', '?', '?', '?', '?', '?', '?',
663 '?', '?', '?', '?', '?', '?', '?', '?',
664 };
665 const vc ones = {
666 -1, -1, -1, -1, -1, -1, -1, -1,
667 -1, -1, -1, -1, -1, -1, -1, -1,
668 };
669 const vc zero = { 0 };
670
671 vc data, mask, t;
672
673 /* Altivec loads automatically mask addresses with -16. This lets us
674 issue the first load as early as possible. */
675 data = __builtin_vec_ld(0, (const vc *)s);
676
677 /* Discard bytes before the beginning of the buffer. Do this by
678 beginning with all ones and shifting in zeros according to the
679 mis-alignment. The LVSR instruction pulls the exact shift we
680 want from the address. */
681 mask = __builtin_vec_lvsr(0, s);
682 mask = __builtin_vec_perm(zero, ones, mask);
683 data &= mask;
684
685 /* While altivec loads mask addresses, we still need to align S so
686 that the offset we compute at the end is correct. */
687 s = (const uchar *)((uintptr_t)s & -16);
688
689 /* Main loop processing 16 bytes at a time. */
690 goto start;
691 do
692 {
693 vc m_nl, m_cr, m_bs, m_qm;
694
695 s += 16;
696 data = __builtin_vec_ld(0, (const vc *)s);
697
698 start:
699 m_nl = (vc) __builtin_vec_cmpeq(data, repl_nl);
700 m_cr = (vc) __builtin_vec_cmpeq(data, repl_cr);
701 m_bs = (vc) __builtin_vec_cmpeq(data, repl_bs);
702 m_qm = (vc) __builtin_vec_cmpeq(data, repl_qm);
703 t = (m_nl | m_cr) | (m_bs | m_qm);
704
705 /* T now contains 0xff in bytes for which we matched one of the relevant
706 characters. We want to exit the loop if any byte in T is non-zero.
707 Below is the expansion of vec_any_ne(t, zero). */
708 }
709 while (!__builtin_vec_vcmpeq_p(/*__CR6_LT_REV*/3, t, zero));
710
711 {
712#define N (sizeof(vc) / sizeof(long))
713
Richard Henderson246a2fc2010-08-21 12:05:40 -0700714 union {
715 vc v;
Dodji Seketeli53a103d2012-05-29 09:42:39 +0000716 /* Statically assert that N is 2 or 4. */
717 unsigned long l[(N == 2 || N == 4) ? N : -1];
Richard Henderson246a2fc2010-08-21 12:05:40 -0700718 } u;
719 unsigned long l, i = 0;
720
721 u.v = t;
722
723 /* Find the first word of T that is non-zero. */
724 switch (N)
725 {
726 case 4:
727 l = u.l[i++];
728 if (l != 0)
729 break;
730 s += sizeof(unsigned long);
731 l = u.l[i++];
732 if (l != 0)
733 break;
734 s += sizeof(unsigned long);
735 case 2:
736 l = u.l[i++];
737 if (l != 0)
738 break;
739 s += sizeof(unsigned long);
740 l = u.l[i];
741 }
742
743 /* L now contains 0xff in bytes for which we matched one of the
744 relevant characters. We can find the byte index by finding
745 its bit index and dividing by 8. */
746 l = __builtin_clzl(l) >> 3;
747 return s + l;
748
749#undef N
750 }
751}
752
Szabolcs Nagy95d06102015-01-30 14:07:00 +0000753#elif defined (__ARM_NEON)
Richard Earnshawe75b54a2012-03-22 17:54:55 +0000754#include "arm_neon.h"
755
756static const uchar *
757search_line_fast (const uchar *s, const uchar *end ATTRIBUTE_UNUSED)
758{
759 const uint8x16_t repl_nl = vdupq_n_u8 ('\n');
760 const uint8x16_t repl_cr = vdupq_n_u8 ('\r');
761 const uint8x16_t repl_bs = vdupq_n_u8 ('\\');
762 const uint8x16_t repl_qm = vdupq_n_u8 ('?');
763 const uint8x16_t xmask = (uint8x16_t) vdupq_n_u64 (0x8040201008040201ULL);
764
765 unsigned int misalign, found, mask;
766 const uint8_t *p;
767 uint8x16_t data;
768
769 /* Align the source pointer. */
770 misalign = (uintptr_t)s & 15;
771 p = (const uint8_t *)((uintptr_t)s & -16);
772 data = vld1q_u8 (p);
773
774 /* Create a mask for the bytes that are valid within the first
775 16-byte block. The Idea here is that the AND with the mask
776 within the loop is "free", since we need some AND or TEST
777 insn in order to set the flags for the branch anyway. */
778 mask = (-1u << misalign) & 0xffff;
779
780 /* Main loop, processing 16 bytes at a time. */
781 goto start;
782
783 do
784 {
785 uint8x8_t l;
786 uint16x4_t m;
787 uint32x2_t n;
788 uint8x16_t t, u, v, w;
789
790 p += 16;
791 data = vld1q_u8 (p);
792 mask = 0xffff;
793
794 start:
795 t = vceqq_u8 (data, repl_nl);
796 u = vceqq_u8 (data, repl_cr);
797 v = vorrq_u8 (t, vceqq_u8 (data, repl_bs));
798 w = vorrq_u8 (u, vceqq_u8 (data, repl_qm));
799 t = vandq_u8 (vorrq_u8 (v, w), xmask);
800 l = vpadd_u8 (vget_low_u8 (t), vget_high_u8 (t));
801 m = vpaddl_u8 (l);
802 n = vpaddl_u16 (m);
803
804 found = vget_lane_u32 ((uint32x2_t) vorr_u64 ((uint64x1_t) n,
805 vshr_n_u64 ((uint64x1_t) n, 24)), 0);
806 found &= mask;
807 }
808 while (!found);
809
810 /* FOUND contains 1 in bits for which we matched a relevant
811 character. Conversion to the byte index is trivial. */
812 found = __builtin_ctz (found);
813 return (const uchar *)p + found;
814}
815
Richard Henderson246a2fc2010-08-21 12:05:40 -0700816#else
817
818/* We only have one accellerated alternative. Use a direct call so that
819 we encourage inlining. */
820
821#define search_line_fast search_line_acc_char
822
823#endif
824
Jakub Jelinekb0c084b2011-12-07 23:05:59 +0100825/* Initialize the lexer if needed. */
826
827void
828_cpp_init_lexer (void)
829{
830#ifdef HAVE_init_vectorized_lexer
831 init_vectorized_lexer ();
832#endif
833}
834
Neil Booth26aea072003-04-19 00:22:51 +0000835/* Returns with a logical line that contains no escaped newlines or
836 trigraphs. This is a time-critical inner loop. */
837void
Zack Weinberg6cf87ca2003-06-17 06:17:44 +0000838_cpp_clean_line (cpp_reader *pfile)
Zack Weinbergc5a04732000-04-25 19:32:36 +0000839{
Neil Booth26aea072003-04-19 00:22:51 +0000840 cpp_buffer *buffer;
841 const uchar *s;
842 uchar c, *d, *p;
Neil Booth29401c32001-08-22 20:37:20 +0000843
Neil Booth26aea072003-04-19 00:22:51 +0000844 buffer = pfile->buffer;
845 buffer->cur_note = buffer->notes_used = 0;
846 buffer->cur = buffer->line_base = buffer->next_line;
847 buffer->need_line = false;
Richard Henderson246a2fc2010-08-21 12:05:40 -0700848 s = buffer->next_line;
Neil Booth26aea072003-04-19 00:22:51 +0000849
Neil Bootha5c3ccc2000-10-30 22:29:00 +0000850 if (!buffer->from_stage3)
Zack Weinbergc5a04732000-04-25 19:32:36 +0000851 {
Ian Lance Taylor7af45bd2006-12-29 15:43:55 +0000852 const uchar *pbackslash = NULL;
853
Richard Henderson246a2fc2010-08-21 12:05:40 -0700854 /* Fast path. This is the common case of an un-escaped line with
Zack Weinbergd08dcf82003-10-13 18:53:28 +0000855 no trigraphs. The primary win here is by not writing any
856 data back to memory until we have to. */
Richard Henderson246a2fc2010-08-21 12:05:40 -0700857 while (1)
Zack Weinbergd08dcf82003-10-13 18:53:28 +0000858 {
Richard Henderson246a2fc2010-08-21 12:05:40 -0700859 /* Perform an optimized search for \n, \r, \\, ?. */
860 s = search_line_fast (s, buffer->rlimit);
861
862 c = *s;
863 if (c == '\\')
Zack Weinbergd08dcf82003-10-13 18:53:28 +0000864 {
Richard Henderson246a2fc2010-08-21 12:05:40 -0700865 /* Record the location of the backslash and continue. */
866 pbackslash = s++;
Zack Weinbergd08dcf82003-10-13 18:53:28 +0000867 }
Richard Henderson246a2fc2010-08-21 12:05:40 -0700868 else if (__builtin_expect (c == '?', 0))
869 {
870 if (__builtin_expect (s[1] == '?', false)
Ian Lance Taylor7af45bd2006-12-29 15:43:55 +0000871 && _cpp_trigraph_map[s[2]])
Zack Weinbergd08dcf82003-10-13 18:53:28 +0000872 {
Richard Henderson246a2fc2010-08-21 12:05:40 -0700873 /* Have a trigraph. We may or may not have to convert
874 it. Add a line note regardless, for -Wtrigraphs. */
875 add_line_note (buffer, s, s[2]);
876 if (CPP_OPTION (pfile, trigraphs))
877 {
878 /* We do, and that means we have to switch to the
879 slow path. */
880 d = (uchar *) s;
881 *d = _cpp_trigraph_map[s[2]];
882 s += 2;
883 goto slow_path;
884 }
Zack Weinbergd08dcf82003-10-13 18:53:28 +0000885 }
Richard Henderson246a2fc2010-08-21 12:05:40 -0700886 /* Not a trigraph. Continue on fast-path. */
887 s++;
Zack Weinbergd08dcf82003-10-13 18:53:28 +0000888 }
Richard Henderson246a2fc2010-08-21 12:05:40 -0700889 else
890 break;
Zack Weinbergd08dcf82003-10-13 18:53:28 +0000891 }
892
Richard Henderson246a2fc2010-08-21 12:05:40 -0700893 /* This must be \r or \n. We're either done, or we'll be forced
894 to write back to the buffer and continue on the slow path. */
895 d = (uchar *) s;
Zack Weinbergc5a04732000-04-25 19:32:36 +0000896
Richard Henderson246a2fc2010-08-21 12:05:40 -0700897 if (__builtin_expect (s == buffer->rlimit, false))
898 goto done;
899
900 /* DOS line ending? */
901 if (__builtin_expect (c == '\r', false) && s[1] == '\n')
902 {
903 s++;
904 if (s == buffer->rlimit)
905 goto done;
906 }
907
908 if (__builtin_expect (pbackslash == NULL, true))
909 goto done;
910
911 /* Check for escaped newline. */
912 p = d;
913 while (is_nvspace (p[-1]))
914 p--;
915 if (p - 1 != pbackslash)
916 goto done;
917
918 /* Have an escaped newline; process it and proceed to
919 the slow path. */
920 add_line_note (buffer, p - 1, p != d ? ' ' : '\\');
921 d = p - 2;
922 buffer->next_line = p - 1;
923
924 slow_path:
925 while (1)
Neil Booth0d9f2342000-09-18 18:43:05 +0000926 {
Neil Booth26aea072003-04-19 00:22:51 +0000927 c = *++s;
928 *++d = c;
929
930 if (c == '\n' || c == '\r')
Neil Bootha5c3ccc2000-10-30 22:29:00 +0000931 {
Richard Henderson246a2fc2010-08-21 12:05:40 -0700932 /* Handle DOS line endings. */
Neil Booth26aea072003-04-19 00:22:51 +0000933 if (c == '\r' && s != buffer->rlimit && s[1] == '\n')
934 s++;
935 if (s == buffer->rlimit)
Neil Booth87062812001-10-20 09:00:53 +0000936 break;
Neil Bootha5c3ccc2000-10-30 22:29:00 +0000937
Neil Booth26aea072003-04-19 00:22:51 +0000938 /* Escaped? */
939 p = d;
940 while (p != buffer->next_line && is_nvspace (p[-1]))
941 p--;
942 if (p == buffer->next_line || p[-1] != '\\')
Neil Bootha5c3ccc2000-10-30 22:29:00 +0000943 break;
Neil Booth26aea072003-04-19 00:22:51 +0000944
Neil Booth41c32c92003-04-20 19:02:53 +0000945 add_line_note (buffer, p - 1, p != d ? ' ': '\\');
Neil Booth26aea072003-04-19 00:22:51 +0000946 d = p - 2;
947 buffer->next_line = p - 1;
Neil Bootha5c3ccc2000-10-30 22:29:00 +0000948 }
Neil Booth26aea072003-04-19 00:22:51 +0000949 else if (c == '?' && s[1] == '?' && _cpp_trigraph_map[s[2]])
Neil Bootha5c3ccc2000-10-30 22:29:00 +0000950 {
Neil Booth26aea072003-04-19 00:22:51 +0000951 /* Add a note regardless, for the benefit of -Wtrigraphs. */
Neil Booth41c32c92003-04-20 19:02:53 +0000952 add_line_note (buffer, d, s[2]);
Neil Booth26aea072003-04-19 00:22:51 +0000953 if (CPP_OPTION (pfile, trigraphs))
954 {
955 *d = _cpp_trigraph_map[s[2]];
956 s += 2;
957 }
Neil Bootha5c3ccc2000-10-30 22:29:00 +0000958 }
Neil Booth0d9f2342000-09-18 18:43:05 +0000959 }
Neil Booth26aea072003-04-19 00:22:51 +0000960 }
961 else
962 {
Richard Henderson246a2fc2010-08-21 12:05:40 -0700963 while (*s != '\n' && *s != '\r')
Neil Booth26aea072003-04-19 00:22:51 +0000964 s++;
Neil Booth26aea072003-04-19 00:22:51 +0000965 d = (uchar *) s;
966
967 /* Handle DOS line endings. */
968 if (*s == '\r' && s != buffer->rlimit && s[1] == '\n')
969 s++;
Zack Weinbergc5a04732000-04-25 19:32:36 +0000970 }
Zack Weinbergc5a04732000-04-25 19:32:36 +0000971
Zack Weinbergd08dcf82003-10-13 18:53:28 +0000972 done:
Neil Booth26aea072003-04-19 00:22:51 +0000973 *d = '\n';
Neil Booth41c32c92003-04-20 19:02:53 +0000974 /* A sentinel note that should never be processed. */
975 add_line_note (buffer, d + 1, '\n');
Neil Booth26aea072003-04-19 00:22:51 +0000976 buffer->next_line = s + 1;
977}
978
Neil Bootha8eb6042003-05-04 20:03:55 +0000979/* Return true if the trigraph indicated by NOTE should be warned
980 about in a comment. */
981static bool
Zack Weinberg6cf87ca2003-06-17 06:17:44 +0000982warn_in_comment (cpp_reader *pfile, _cpp_line_note *note)
Neil Bootha8eb6042003-05-04 20:03:55 +0000983{
984 const uchar *p;
985
986 /* Within comments we don't warn about trigraphs, unless the
987 trigraph forms an escaped newline, as that may change
Kazu Hirata6356f892003-06-12 19:01:08 +0000988 behavior. */
Neil Bootha8eb6042003-05-04 20:03:55 +0000989 if (note->type != '/')
990 return false;
991
992 /* If -trigraphs, then this was an escaped newline iff the next note
993 is coincident. */
994 if (CPP_OPTION (pfile, trigraphs))
995 return note[1].pos == note->pos;
996
997 /* Otherwise, see if this forms an escaped newline. */
998 p = note->pos + 3;
999 while (is_nvspace (*p))
1000 p++;
1001
1002 /* There might have been escaped newlines between the trigraph and the
1003 newline we found. Hence the position test. */
1004 return (*p == '\n' && p < note[1].pos);
1005}
1006
Neil Booth26aea072003-04-19 00:22:51 +00001007/* Process the notes created by add_line_note as far as the current
1008 location. */
1009void
Zack Weinberg6cf87ca2003-06-17 06:17:44 +00001010_cpp_process_line_notes (cpp_reader *pfile, int in_comment)
Neil Booth26aea072003-04-19 00:22:51 +00001011{
1012 cpp_buffer *buffer = pfile->buffer;
1013
1014 for (;;)
1015 {
1016 _cpp_line_note *note = &buffer->notes[buffer->cur_note];
1017 unsigned int col;
1018
1019 if (note->pos > buffer->cur)
1020 break;
1021
1022 buffer->cur_note++;
1023 col = CPP_BUF_COLUMN (buffer, note->pos + 1);
1024
Neil Booth41c32c92003-04-20 19:02:53 +00001025 if (note->type == '\\' || note->type == ' ')
Neil Booth26aea072003-04-19 00:22:51 +00001026 {
Neil Booth41c32c92003-04-20 19:02:53 +00001027 if (note->type == ' ' && !in_comment)
Per Bothner500bee02004-04-22 19:22:27 -07001028 cpp_error_with_line (pfile, CPP_DL_WARNING, pfile->line_table->highest_line, col,
Neil Booth26aea072003-04-19 00:22:51 +00001029 "backslash and newline separated by space");
Neil Booth41c32c92003-04-20 19:02:53 +00001030
Neil Booth26aea072003-04-19 00:22:51 +00001031 if (buffer->next_line > buffer->rlimit)
1032 {
Per Bothner500bee02004-04-22 19:22:27 -07001033 cpp_error_with_line (pfile, CPP_DL_PEDWARN, pfile->line_table->highest_line, col,
Neil Booth26aea072003-04-19 00:22:51 +00001034 "backslash-newline at end of file");
1035 /* Prevent "no newline at end of file" warning. */
1036 buffer->next_line = buffer->rlimit;
1037 }
1038
1039 buffer->line_base = note->pos;
Per Bothner12f9df42004-02-11 07:29:30 -08001040 CPP_INCREMENT_LINE (pfile, 0);
Neil Booth26aea072003-04-19 00:22:51 +00001041 }
Neil Booth41c32c92003-04-20 19:02:53 +00001042 else if (_cpp_trigraph_map[note->type])
1043 {
Neil Bootha8eb6042003-05-04 20:03:55 +00001044 if (CPP_OPTION (pfile, warn_trigraphs)
1045 && (!in_comment || warn_in_comment (pfile, note)))
Neil Booth41c32c92003-04-20 19:02:53 +00001046 {
1047 if (CPP_OPTION (pfile, trigraphs))
Simon Baldwin87cf0652010-04-07 17:18:10 +00001048 cpp_warning_with_line (pfile, CPP_W_TRIGRAPHS,
1049 pfile->line_table->highest_line, col,
1050 "trigraph ??%c converted to %c",
1051 note->type,
1052 (int) _cpp_trigraph_map[note->type]);
Neil Booth41c32c92003-04-20 19:02:53 +00001053 else
Geoffrey Keating905bd7b2003-07-22 02:21:16 +00001054 {
Simon Baldwin87cf0652010-04-07 17:18:10 +00001055 cpp_warning_with_line
1056 (pfile, CPP_W_TRIGRAPHS,
1057 pfile->line_table->highest_line, col,
Geoffrey Keating905bd7b2003-07-22 02:21:16 +00001058 "trigraph ??%c ignored, use -trigraphs to enable",
1059 note->type);
1060 }
Neil Booth41c32c92003-04-20 19:02:53 +00001061 }
1062 }
Jason Merrill00a81b82010-03-29 16:07:29 -04001063 else if (note->type == 0)
1064 /* Already processed in lex_raw_string. */;
Neil Booth41c32c92003-04-20 19:02:53 +00001065 else
1066 abort ();
Neil Booth26aea072003-04-19 00:22:51 +00001067 }
Zack Weinbergc5a04732000-04-25 19:32:36 +00001068}
1069
Neil Booth0d9f2342000-09-18 18:43:05 +00001070/* Skip a C-style block comment. We find the end of the comment by
1071 seeing if an asterisk is before every '/' we encounter. Returns
Neil Booth6f572ac2003-04-19 16:34:33 +00001072 nonzero if comment terminated by EOF, zero otherwise.
1073
1074 Buffer->cur points to the initial asterisk of the comment. */
Neil Booth26aea072003-04-19 00:22:51 +00001075bool
Zack Weinberg6cf87ca2003-06-17 06:17:44 +00001076_cpp_skip_block_comment (cpp_reader *pfile)
Zack Weinbergc5a04732000-04-25 19:32:36 +00001077{
1078 cpp_buffer *buffer = pfile->buffer;
Zack Weinbergd08dcf82003-10-13 18:53:28 +00001079 const uchar *cur = buffer->cur;
1080 uchar c;
Zack Weinbergc5a04732000-04-25 19:32:36 +00001081
Zack Weinbergd08dcf82003-10-13 18:53:28 +00001082 cur++;
1083 if (*cur == '/')
1084 cur++;
Neil Booth26aea072003-04-19 00:22:51 +00001085
1086 for (;;)
Neil Booth0d9f2342000-09-18 18:43:05 +00001087 {
Neil Booth0d9f2342000-09-18 18:43:05 +00001088 /* People like decorating comments with '*', so check for '/'
1089 instead for efficiency. */
Zack Weinbergd08dcf82003-10-13 18:53:28 +00001090 c = *cur++;
1091
Zack Weinbergc5a04732000-04-25 19:32:36 +00001092 if (c == '/')
1093 {
Zack Weinbergd08dcf82003-10-13 18:53:28 +00001094 if (cur[-2] == '*')
Neil Booth0d9f2342000-09-18 18:43:05 +00001095 break;
Zack Weinbergc5a04732000-04-25 19:32:36 +00001096
Neil Booth0d9f2342000-09-18 18:43:05 +00001097 /* Warn about potential nested comments, but not if the '/'
Joseph Myersa1f300c2001-11-23 02:05:19 +00001098 comes immediately before the true comment delimiter.
Zack Weinbergc5a04732000-04-25 19:32:36 +00001099 Don't bother to get it right across escaped newlines. */
Neil Booth0d9f2342000-09-18 18:43:05 +00001100 if (CPP_OPTION (pfile, warn_comments)
Zack Weinbergd08dcf82003-10-13 18:53:28 +00001101 && cur[0] == '*' && cur[1] != '/')
1102 {
1103 buffer->cur = cur;
Simon Baldwin87cf0652010-04-07 17:18:10 +00001104 cpp_warning_with_line (pfile, CPP_W_COMMENTS,
1105 pfile->line_table->highest_line,
1106 CPP_BUF_COL (buffer),
1107 "\"/*\" within comment");
Zack Weinbergd08dcf82003-10-13 18:53:28 +00001108 }
Zack Weinbergc5a04732000-04-25 19:32:36 +00001109 }
Neil Booth26aea072003-04-19 00:22:51 +00001110 else if (c == '\n')
1111 {
Per Bothner12f9df42004-02-11 07:29:30 -08001112 unsigned int cols;
Zack Weinbergd08dcf82003-10-13 18:53:28 +00001113 buffer->cur = cur - 1;
Neil Booth26aea072003-04-19 00:22:51 +00001114 _cpp_process_line_notes (pfile, true);
1115 if (buffer->next_line >= buffer->rlimit)
1116 return true;
1117 _cpp_clean_line (pfile);
Per Bothner12f9df42004-02-11 07:29:30 -08001118
1119 cols = buffer->next_line - buffer->line_base;
1120 CPP_INCREMENT_LINE (pfile, cols);
1121
Zack Weinbergd08dcf82003-10-13 18:53:28 +00001122 cur = buffer->cur;
Neil Booth26aea072003-04-19 00:22:51 +00001123 }
Zack Weinbergc5a04732000-04-25 19:32:36 +00001124 }
Zack Weinbergc5a04732000-04-25 19:32:36 +00001125
Zack Weinbergd08dcf82003-10-13 18:53:28 +00001126 buffer->cur = cur;
Neil Bootha8eb6042003-05-04 20:03:55 +00001127 _cpp_process_line_notes (pfile, true);
Neil Booth26aea072003-04-19 00:22:51 +00001128 return false;
Zack Weinbergc5a04732000-04-25 19:32:36 +00001129}
1130
Neil Booth480709c2001-10-21 14:04:42 +00001131/* Skip a C++ line comment, leaving buffer->cur pointing to the
Kazu Hiratada7d8302002-09-22 02:03:17 +00001132 terminating newline. Handles escaped newlines. Returns nonzero
Neil Booth480709c2001-10-21 14:04:42 +00001133 if a multiline comment. */
Zack Weinbergc5a04732000-04-25 19:32:36 +00001134static int
Zack Weinberg6cf87ca2003-06-17 06:17:44 +00001135skip_line_comment (cpp_reader *pfile)
Neil Booth0d9f2342000-09-18 18:43:05 +00001136{
Neil Boothcbcff6d2000-09-23 21:41:41 +00001137 cpp_buffer *buffer = pfile->buffer;
Manuel López-Ibáñez1bb64662008-07-21 09:33:38 +00001138 source_location orig_line = pfile->line_table->highest_line;
Neil Booth0d9f2342000-09-18 18:43:05 +00001139
Neil Booth26aea072003-04-19 00:22:51 +00001140 while (*buffer->cur != '\n')
1141 buffer->cur++;
Neil Booth0d9f2342000-09-18 18:43:05 +00001142
Neil Booth26aea072003-04-19 00:22:51 +00001143 _cpp_process_line_notes (pfile, true);
Per Bothner500bee02004-04-22 19:22:27 -07001144 return orig_line != pfile->line_table->highest_line;
Neil Booth0d9f2342000-09-18 18:43:05 +00001145}
1146
Neil Booth26aea072003-04-19 00:22:51 +00001147/* Skips whitespace, saving the next non-whitespace character. */
Neil Booth0d9f2342000-09-18 18:43:05 +00001148static void
Zack Weinberg6cf87ca2003-06-17 06:17:44 +00001149skip_whitespace (cpp_reader *pfile, cppchar_t c)
Zack Weinbergc5a04732000-04-25 19:32:36 +00001150{
1151 cpp_buffer *buffer = pfile->buffer;
Neil Boothf7d151f2003-04-19 07:41:15 +00001152 bool saw_NUL = false;
Zack Weinbergc5a04732000-04-25 19:32:36 +00001153
Neil Booth0d9f2342000-09-18 18:43:05 +00001154 do
Zack Weinbergc5a04732000-04-25 19:32:36 +00001155 {
Neil Booth91fcd152000-07-09 09:19:44 +00001156 /* Horizontal space always OK. */
Neil Booth26aea072003-04-19 00:22:51 +00001157 if (c == ' ' || c == '\t')
Neil Booth0d9f2342000-09-18 18:43:05 +00001158 ;
Neil Booth0d9f2342000-09-18 18:43:05 +00001159 /* Just \f \v or \0 left. */
Neil Booth91fcd152000-07-09 09:19:44 +00001160 else if (c == '\0')
Neil Boothf7d151f2003-04-19 07:41:15 +00001161 saw_NUL = true;
Neil Booth93c803682000-10-28 17:59:06 +00001162 else if (pfile->state.in_directive && CPP_PEDANTIC (pfile))
Per Bothner500bee02004-04-22 19:22:27 -07001163 cpp_error_with_line (pfile, CPP_DL_PEDWARN, pfile->line_table->highest_line,
Neil Boothebef4e82002-04-14 18:42:47 +00001164 CPP_BUF_COL (buffer),
1165 "%s in preprocessing directive",
1166 c == '\f' ? "form feed" : "vertical tab");
Zack Weinbergc5a04732000-04-25 19:32:36 +00001167
Neil Booth0d9f2342000-09-18 18:43:05 +00001168 c = *buffer->cur++;
Zack Weinbergc5a04732000-04-25 19:32:36 +00001169 }
Kazu Hirataec5c56d2001-08-01 17:57:27 +00001170 /* We only want non-vertical space, i.e. ' ' \t \f \v \0. */
Neil Booth0d9f2342000-09-18 18:43:05 +00001171 while (is_nvspace (c));
Zack Weinbergc5a04732000-04-25 19:32:36 +00001172
Neil Boothf7d151f2003-04-19 07:41:15 +00001173 if (saw_NUL)
John David Anglin0527bc42003-11-01 22:56:54 +00001174 cpp_error (pfile, CPP_DL_WARNING, "null character(s) ignored");
Neil Boothf7d151f2003-04-19 07:41:15 +00001175
Neil Booth480709c2001-10-21 14:04:42 +00001176 buffer->cur--;
Zack Weinbergc5a04732000-04-25 19:32:36 +00001177}
1178
Neil Booth93c803682000-10-28 17:59:06 +00001179/* See if the characters of a number token are valid in a name (no
1180 '.', '+' or '-'). */
1181static int
Zack Weinberg6cf87ca2003-06-17 06:17:44 +00001182name_p (cpp_reader *pfile, const cpp_string *string)
Neil Booth93c803682000-10-28 17:59:06 +00001183{
1184 unsigned int i;
1185
1186 for (i = 0; i < string->len; i++)
1187 if (!is_idchar (string->text[i]))
1188 return 0;
1189
Kazu Hiratadf383482002-05-22 22:02:16 +00001190 return 1;
Neil Booth93c803682000-10-28 17:59:06 +00001191}
1192
Geoffrey Keating50668cf2005-03-15 00:36:33 +00001193/* After parsing an identifier or other sequence, produce a warning about
1194 sequences not in NFC/NFKC. */
1195static void
1196warn_about_normalization (cpp_reader *pfile,
1197 const cpp_token *token,
1198 const struct normalize_state *s)
1199{
1200 if (CPP_OPTION (pfile, warn_normalize) < NORMALIZE_STATE_RESULT (s)
1201 && !pfile->state.skipping)
1202 {
1203 /* Make sure that the token is printed using UCNs, even
1204 if we'd otherwise happily print UTF-8. */
Gabriel Dos Reisc3f829c2005-05-28 15:52:48 +00001205 unsigned char *buf = XNEWVEC (unsigned char, cpp_token_len (token));
Geoffrey Keating50668cf2005-03-15 00:36:33 +00001206 size_t sz;
1207
1208 sz = cpp_spell_token (pfile, token, buf, false) - buf;
1209 if (NORMALIZE_STATE_RESULT (s) == normalized_C)
Simon Baldwin87cf0652010-04-07 17:18:10 +00001210 cpp_warning_with_line (pfile, CPP_W_NORMALIZE, token->src_loc, 0,
1211 "`%.*s' is not in NFKC", (int) sz, buf);
Geoffrey Keating50668cf2005-03-15 00:36:33 +00001212 else
Simon Baldwin87cf0652010-04-07 17:18:10 +00001213 cpp_warning_with_line (pfile, CPP_W_NORMALIZE, token->src_loc, 0,
1214 "`%.*s' is not in NFC", (int) sz, buf);
Tobias Burnus55e7f902012-10-15 22:08:57 +02001215 free (buf);
Geoffrey Keating50668cf2005-03-15 00:36:33 +00001216 }
1217}
1218
Neil Boothbced6ed2003-04-19 11:59:44 +00001219/* Returns TRUE if the sequence starting at buffer->cur is invalid in
Neil Booth1613e522003-04-20 07:29:23 +00001220 an identifier. FIRST is TRUE if this starts an identifier. */
Neil Boothbced6ed2003-04-19 11:59:44 +00001221static bool
Geoffrey Keating50668cf2005-03-15 00:36:33 +00001222forms_identifier_p (cpp_reader *pfile, int first,
1223 struct normalize_state *state)
Neil Boothbced6ed2003-04-19 11:59:44 +00001224{
Neil Booth1613e522003-04-20 07:29:23 +00001225 cpp_buffer *buffer = pfile->buffer;
Neil Boothbced6ed2003-04-19 11:59:44 +00001226
Neil Booth1613e522003-04-20 07:29:23 +00001227 if (*buffer->cur == '$')
Neil Boothbced6ed2003-04-19 11:59:44 +00001228 {
Neil Booth1613e522003-04-20 07:29:23 +00001229 if (!CPP_OPTION (pfile, dollars_in_ident))
1230 return false;
Neil Boothbced6ed2003-04-19 11:59:44 +00001231
Neil Booth1613e522003-04-20 07:29:23 +00001232 buffer->cur++;
Hans-Peter Nilsson78b88112003-06-12 06:09:15 +00001233 if (CPP_OPTION (pfile, warn_dollars) && !pfile->state.skipping)
Neil Booth1613e522003-04-20 07:29:23 +00001234 {
Hans-Peter Nilsson78b88112003-06-12 06:09:15 +00001235 CPP_OPTION (pfile, warn_dollars) = 0;
John David Anglin0527bc42003-11-01 22:56:54 +00001236 cpp_error (pfile, CPP_DL_PEDWARN, "'$' in identifier or number");
Neil Booth1613e522003-04-20 07:29:23 +00001237 }
1238
1239 return true;
1240 }
1241
1242 /* Is this a syntactically valid UCN? */
Joseph Myersaf15a2f2005-09-20 21:31:37 +01001243 if (CPP_OPTION (pfile, extended_identifiers)
Geoffrey Keating6baba9b2005-03-15 09:55:41 +00001244 && *buffer->cur == '\\'
Neil Booth1613e522003-04-20 07:29:23 +00001245 && (buffer->cur[1] == 'u' || buffer->cur[1] == 'U'))
1246 {
1247 buffer->cur += 2;
Geoffrey Keating50668cf2005-03-15 00:36:33 +00001248 if (_cpp_valid_ucn (pfile, &buffer->cur, buffer->rlimit, 1 + !first,
1249 state))
Neil Booth1613e522003-04-20 07:29:23 +00001250 return true;
1251 buffer->cur -= 2;
1252 }
1253
1254 return false;
Neil Boothbced6ed2003-04-19 11:59:44 +00001255}
1256
Kai Tietz17e7cb82009-11-11 18:37:19 +00001257/* Helper function to get the cpp_hashnode of the identifier BASE. */
1258static cpp_hashnode *
1259lex_identifier_intern (cpp_reader *pfile, const uchar *base)
1260{
1261 cpp_hashnode *result;
1262 const uchar *cur;
1263 unsigned int len;
1264 unsigned int hash = HT_HASHSTEP (0, *base);
1265
1266 cur = base + 1;
1267 while (ISIDNUM (*cur))
1268 {
1269 hash = HT_HASHSTEP (hash, *cur);
1270 cur++;
1271 }
1272 len = cur - base;
1273 hash = HT_HASHFINISH (hash, len);
1274 result = CPP_HASHNODE (ht_lookup_with_hash (pfile->hash_table,
1275 base, len, hash, HT_ALLOC));
1276
1277 /* Rarely, identifiers require diagnostics when lexed. */
1278 if (__builtin_expect ((result->flags & NODE_DIAGNOSTIC)
1279 && !pfile->state.skipping, 0))
1280 {
1281 /* It is allowed to poison the same identifier twice. */
1282 if ((result->flags & NODE_POISONED) && !pfile->state.poisoned_ok)
1283 cpp_error (pfile, CPP_DL_ERROR, "attempt to use poisoned \"%s\"",
1284 NODE_NAME (result));
1285
1286 /* Constraint 6.10.3.5: __VA_ARGS__ should only appear in the
1287 replacement list of a variadic macro. */
1288 if (result == pfile->spec_nodes.n__VA_ARGS__
1289 && !pfile->state.va_args_ok)
Edward Smith-Rowland39767962014-07-10 22:26:50 +00001290 {
1291 if (CPP_OPTION (pfile, cplusplus))
1292 cpp_error (pfile, CPP_DL_PEDWARN,
1293 "__VA_ARGS__ can only appear in the expansion"
1294 " of a C++11 variadic macro");
1295 else
1296 cpp_error (pfile, CPP_DL_PEDWARN,
1297 "__VA_ARGS__ can only appear in the expansion"
1298 " of a C99 variadic macro");
1299 }
Kai Tietz17e7cb82009-11-11 18:37:19 +00001300
1301 /* For -Wc++-compat, warn about use of C++ named operators. */
1302 if (result->flags & NODE_WARN_OPERATOR)
Simon Baldwin87cf0652010-04-07 17:18:10 +00001303 cpp_warning (pfile, CPP_W_CXX_OPERATOR_NAMES,
1304 "identifier \"%s\" is a special operator name in C++",
1305 NODE_NAME (result));
Kai Tietz17e7cb82009-11-11 18:37:19 +00001306 }
1307
1308 return result;
1309}
1310
1311/* Get the cpp_hashnode of an identifier specified by NAME in
1312 the current cpp_reader object. If none is found, NULL is returned. */
1313cpp_hashnode *
1314_cpp_lex_identifier (cpp_reader *pfile, const char *name)
1315{
1316 cpp_hashnode *result;
1317 result = lex_identifier_intern (pfile, (uchar *) name);
1318 return result;
1319}
1320
Neil Boothbced6ed2003-04-19 11:59:44 +00001321/* Lex an identifier starting at BUFFER->CUR - 1. */
Neil Booth0d9f2342000-09-18 18:43:05 +00001322static cpp_hashnode *
Geoffrey Keating50668cf2005-03-15 00:36:33 +00001323lex_identifier (cpp_reader *pfile, const uchar *base, bool starts_ucn,
Joseph Myersbe5ffc52014-11-06 21:08:52 +00001324 struct normalize_state *nst, cpp_hashnode **spelling)
Zack Weinbergc5a04732000-04-25 19:32:36 +00001325{
Neil Booth93c803682000-10-28 17:59:06 +00001326 cpp_hashnode *result;
Geoffrey Keating47e20492005-03-12 10:44:06 +00001327 const uchar *cur;
Zack Weinbergc6e83802004-06-05 20:58:06 +00001328 unsigned int len;
1329 unsigned int hash = HT_HASHSTEP (0, *base);
Zack Weinbergc5a04732000-04-25 19:32:36 +00001330
Zack Weinbergc6e83802004-06-05 20:58:06 +00001331 cur = pfile->buffer->cur;
Geoffrey Keating47e20492005-03-12 10:44:06 +00001332 if (! starts_ucn)
Joseph Myersd3f4ff82013-11-16 00:05:08 +00001333 {
1334 while (ISIDNUM (*cur))
1335 {
1336 hash = HT_HASHSTEP (hash, *cur);
1337 cur++;
1338 }
1339 NORMALIZE_STATE_UPDATE_IDNUM (nst, *(cur - 1));
1340 }
Geoffrey Keating47e20492005-03-12 10:44:06 +00001341 pfile->buffer->cur = cur;
Geoffrey Keating50668cf2005-03-15 00:36:33 +00001342 if (starts_ucn || forms_identifier_p (pfile, false, nst))
Neil Booth10cf9bd2002-03-22 07:23:21 +00001343 {
Geoffrey Keating47e20492005-03-12 10:44:06 +00001344 /* Slower version for identifiers containing UCNs (or $). */
1345 do {
1346 while (ISIDNUM (*pfile->buffer->cur))
Geoffrey Keating50668cf2005-03-15 00:36:33 +00001347 {
Joseph Myersd3f4ff82013-11-16 00:05:08 +00001348 NORMALIZE_STATE_UPDATE_IDNUM (nst, *pfile->buffer->cur);
Geoffrey Keating50668cf2005-03-15 00:36:33 +00001349 pfile->buffer->cur++;
Geoffrey Keating50668cf2005-03-15 00:36:33 +00001350 }
1351 } while (forms_identifier_p (pfile, false, nst));
Geoffrey Keating47e20492005-03-12 10:44:06 +00001352 result = _cpp_interpret_identifier (pfile, base,
1353 pfile->buffer->cur - base);
Joseph Myersbe5ffc52014-11-06 21:08:52 +00001354 *spelling = cpp_lookup (pfile, base, pfile->buffer->cur - base);
Zack Weinberg2c3fcba2001-09-10 22:34:03 +00001355 }
Geoffrey Keating47e20492005-03-12 10:44:06 +00001356 else
1357 {
1358 len = cur - base;
1359 hash = HT_HASHFINISH (hash, len);
Zack Weinberg2c3fcba2001-09-10 22:34:03 +00001360
Tom Tromey2bf41bf2008-02-20 02:16:43 +00001361 result = CPP_HASHNODE (ht_lookup_with_hash (pfile->hash_table,
1362 base, len, hash, HT_ALLOC));
Joseph Myersbe5ffc52014-11-06 21:08:52 +00001363 *spelling = result;
Geoffrey Keating47e20492005-03-12 10:44:06 +00001364 }
Neil Boothbced6ed2003-04-19 11:59:44 +00001365
1366 /* Rarely, identifiers require diagnostics when lexed. */
Zack Weinberg2c3fcba2001-09-10 22:34:03 +00001367 if (__builtin_expect ((result->flags & NODE_DIAGNOSTIC)
1368 && !pfile->state.skipping, 0))
1369 {
1370 /* It is allowed to poison the same identifier twice. */
1371 if ((result->flags & NODE_POISONED) && !pfile->state.poisoned_ok)
John David Anglin0527bc42003-11-01 22:56:54 +00001372 cpp_error (pfile, CPP_DL_ERROR, "attempt to use poisoned \"%s\"",
Zack Weinberg2c3fcba2001-09-10 22:34:03 +00001373 NODE_NAME (result));
1374
1375 /* Constraint 6.10.3.5: __VA_ARGS__ should only appear in the
1376 replacement list of a variadic macro. */
1377 if (result == pfile->spec_nodes.n__VA_ARGS__
1378 && !pfile->state.va_args_ok)
Edward Smith-Rowland39767962014-07-10 22:26:50 +00001379 {
1380 if (CPP_OPTION (pfile, cplusplus))
1381 cpp_error (pfile, CPP_DL_PEDWARN,
1382 "__VA_ARGS__ can only appear in the expansion"
1383 " of a C++11 variadic macro");
1384 else
1385 cpp_error (pfile, CPP_DL_PEDWARN,
1386 "__VA_ARGS__ can only appear in the expansion"
1387 " of a C99 variadic macro");
1388 }
Ian Lance Taylor3d8b2a92009-06-12 19:43:25 +00001389
1390 /* For -Wc++-compat, warn about use of C++ named operators. */
1391 if (result->flags & NODE_WARN_OPERATOR)
Simon Baldwin87cf0652010-04-07 17:18:10 +00001392 cpp_warning (pfile, CPP_W_CXX_OPERATOR_NAMES,
1393 "identifier \"%s\" is a special operator name in C++",
1394 NODE_NAME (result));
Zack Weinberg2c3fcba2001-09-10 22:34:03 +00001395 }
1396
1397 return result;
1398}
1399
Neil Boothbced6ed2003-04-19 11:59:44 +00001400/* Lex a number to NUMBER starting at BUFFER->CUR - 1. */
Zack Weinbergc5a04732000-04-25 19:32:36 +00001401static void
Geoffrey Keating50668cf2005-03-15 00:36:33 +00001402lex_number (cpp_reader *pfile, cpp_string *number,
1403 struct normalize_state *nst)
Zack Weinbergc5a04732000-04-25 19:32:36 +00001404{
Neil Booth562a5c22002-04-21 18:46:42 +00001405 const uchar *cur;
Neil Boothbced6ed2003-04-19 11:59:44 +00001406 const uchar *base;
1407 uchar *dest;
Zack Weinbergc5a04732000-04-25 19:32:36 +00001408
Neil Boothbced6ed2003-04-19 11:59:44 +00001409 base = pfile->buffer->cur - 1;
1410 do
Neil Booth93c803682000-10-28 17:59:06 +00001411 {
Neil Boothbced6ed2003-04-19 11:59:44 +00001412 cur = pfile->buffer->cur;
Neil Booth10cf9bd2002-03-22 07:23:21 +00001413
Neil Boothbced6ed2003-04-19 11:59:44 +00001414 /* N.B. ISIDNUM does not include $. */
Edward Smith-Rowland7057e642013-10-31 14:01:23 +00001415 while (ISIDNUM (*cur) || *cur == '.' || DIGIT_SEP (*cur)
1416 || VALID_SIGN (*cur, cur[-1]))
Geoffrey Keating50668cf2005-03-15 00:36:33 +00001417 {
Joseph Myersd3f4ff82013-11-16 00:05:08 +00001418 NORMALIZE_STATE_UPDATE_IDNUM (nst, *cur);
Geoffrey Keating50668cf2005-03-15 00:36:33 +00001419 cur++;
Geoffrey Keating50668cf2005-03-15 00:36:33 +00001420 }
Edward Smith-Rowlanda5858a32015-03-17 00:50:55 +00001421 /* A number can't end with a digit separator. */
1422 while (cur > pfile->buffer->cur && DIGIT_SEP (cur[-1]))
1423 --cur;
Neil Booth10cf9bd2002-03-22 07:23:21 +00001424
Neil Booth10cf9bd2002-03-22 07:23:21 +00001425 pfile->buffer->cur = cur;
Neil Booth93c803682000-10-28 17:59:06 +00001426 }
Geoffrey Keating50668cf2005-03-15 00:36:33 +00001427 while (forms_identifier_p (pfile, false, nst));
Neil Boothbced6ed2003-04-19 11:59:44 +00001428
1429 number->len = cur - base;
1430 dest = _cpp_unaligned_alloc (pfile, number->len + 1);
1431 memcpy (dest, base, number->len);
1432 dest[number->len] = '\0';
1433 number->text = dest;
Neil Booth0d9f2342000-09-18 18:43:05 +00001434}
1435
Neil Booth6338b352003-04-23 22:44:06 +00001436/* Create a token of type TYPE with a literal spelling. */
Zack Weinbergc5a04732000-04-25 19:32:36 +00001437static void
Zack Weinberg6cf87ca2003-06-17 06:17:44 +00001438create_literal (cpp_reader *pfile, cpp_token *token, const uchar *base,
1439 unsigned int len, enum cpp_ttype type)
Neil Booth0d9f2342000-09-18 18:43:05 +00001440{
Neil Booth6338b352003-04-23 22:44:06 +00001441 uchar *dest = _cpp_unaligned_alloc (pfile, len + 1);
Neil Booth0d9f2342000-09-18 18:43:05 +00001442
Neil Booth6338b352003-04-23 22:44:06 +00001443 memcpy (dest, base, len);
1444 dest[len] = '\0';
1445 token->type = type;
1446 token->val.str.len = len;
1447 token->val.str.text = dest;
1448}
1449
Jason Merrill00a81b82010-03-29 16:07:29 -04001450/* Subroutine of lex_raw_string: Append LEN chars from BASE to the buffer
1451 sequence from *FIRST_BUFF_P to LAST_BUFF_P. */
1452
1453static void
1454bufring_append (cpp_reader *pfile, const uchar *base, size_t len,
1455 _cpp_buff **first_buff_p, _cpp_buff **last_buff_p)
1456{
1457 _cpp_buff *first_buff = *first_buff_p;
1458 _cpp_buff *last_buff = *last_buff_p;
1459
1460 if (first_buff == NULL)
1461 first_buff = last_buff = _cpp_get_buff (pfile, len);
1462 else if (len > BUFF_ROOM (last_buff))
1463 {
1464 size_t room = BUFF_ROOM (last_buff);
1465 memcpy (BUFF_FRONT (last_buff), base, room);
1466 BUFF_FRONT (last_buff) += room;
1467 base += room;
1468 len -= room;
1469 last_buff = _cpp_append_extend_buff (pfile, last_buff, len);
1470 }
1471
1472 memcpy (BUFF_FRONT (last_buff), base, len);
1473 BUFF_FRONT (last_buff) += len;
1474
1475 *first_buff_p = first_buff;
1476 *last_buff_p = last_buff;
1477}
1478
Ed Smith-Rowlandc865f922013-06-29 03:41:58 +00001479
1480/* Returns true if a macro has been defined.
1481 This might not work if compile with -save-temps,
1482 or preprocess separately from compilation. */
1483
1484static bool
1485is_macro(cpp_reader *pfile, const uchar *base)
1486{
1487 const uchar *cur = base;
1488 if (! ISIDST (*cur))
1489 return false;
1490 unsigned int hash = HT_HASHSTEP (0, *cur);
1491 ++cur;
1492 while (ISIDNUM (*cur))
1493 {
1494 hash = HT_HASHSTEP (hash, *cur);
1495 ++cur;
1496 }
1497 hash = HT_HASHFINISH (hash, cur - base);
1498
1499 cpp_hashnode *result = CPP_HASHNODE (ht_lookup_with_hash (pfile->hash_table,
1500 base, cur - base, hash, HT_NO_INSERT));
1501
1502 return !result ? false : (result->type == NT_MACRO);
1503}
1504
1505
Jakub Jelinek2c6e3f52009-10-19 23:41:15 +02001506/* Lexes a raw string. The stored string contains the spelling, including
Jason Merrill00a81b82010-03-29 16:07:29 -04001507 double quotes, delimiter string, '(' and ')', any leading
Jakub Jelinek2c6e3f52009-10-19 23:41:15 +02001508 'L', 'u', 'U' or 'u8' and 'R' modifier. It returns the type of the
1509 literal, or CPP_OTHER if it was not properly terminated.
1510
1511 The spelling is NUL-terminated, but it is not guaranteed that this
1512 is the first NUL since embedded NULs are preserved. */
1513
1514static void
1515lex_raw_string (cpp_reader *pfile, cpp_token *token, const uchar *base,
1516 const uchar *cur)
1517{
Jakub Jelinek8cf88732013-07-21 04:28:03 +02001518 uchar raw_prefix[17];
1519 uchar temp_buffer[18];
1520 const uchar *orig_base;
1521 unsigned int raw_prefix_len = 0, raw_suffix_len = 0;
1522 enum raw_str_phase { RAW_STR_PREFIX, RAW_STR, RAW_STR_SUFFIX };
1523 raw_str_phase phase = RAW_STR_PREFIX;
Jakub Jelinek2c6e3f52009-10-19 23:41:15 +02001524 enum cpp_ttype type;
1525 size_t total_len = 0;
Jakub Jelinek8cf88732013-07-21 04:28:03 +02001526 /* Index into temp_buffer during phases other than RAW_STR,
1527 during RAW_STR phase 17 to tell BUF_APPEND that nothing should
1528 be appended to temp_buffer. */
1529 size_t temp_buffer_len = 0;
Jakub Jelinek2c6e3f52009-10-19 23:41:15 +02001530 _cpp_buff *first_buff = NULL, *last_buff = NULL;
Jakub Jelinek8cf88732013-07-21 04:28:03 +02001531 size_t raw_prefix_start;
Jason Merrill00a81b82010-03-29 16:07:29 -04001532 _cpp_line_note *note = &pfile->buffer->notes[pfile->buffer->cur_note];
Jakub Jelinek2c6e3f52009-10-19 23:41:15 +02001533
1534 type = (*base == 'L' ? CPP_WSTRING :
1535 *base == 'U' ? CPP_STRING32 :
1536 *base == 'u' ? (base[1] == '8' ? CPP_UTF8STRING : CPP_STRING16)
1537 : CPP_STRING);
1538
Jason Merrill00a81b82010-03-29 16:07:29 -04001539#define BUF_APPEND(STR,LEN) \
1540 do { \
1541 bufring_append (pfile, (const uchar *)(STR), (LEN), \
1542 &first_buff, &last_buff); \
1543 total_len += (LEN); \
Jakub Jelinek8cf88732013-07-21 04:28:03 +02001544 if (__builtin_expect (temp_buffer_len < 17, 0) \
1545 && (const uchar *)(STR) != base \
1546 && (LEN) <= 2) \
1547 { \
1548 memcpy (temp_buffer + temp_buffer_len, \
1549 (const uchar *)(STR), (LEN)); \
1550 temp_buffer_len += (LEN); \
1551 } \
Jason Merrill00a81b82010-03-29 16:07:29 -04001552 } while (0);
1553
Jakub Jelinek8cf88732013-07-21 04:28:03 +02001554 orig_base = base;
1555 ++cur;
1556 raw_prefix_start = cur - base;
1557 for (;;)
1558 {
Jason Merrill00a81b82010-03-29 16:07:29 -04001559 cppchar_t c;
1560
1561 /* If we previously performed any trigraph or line splicing
Jakub Jelinek8cf88732013-07-21 04:28:03 +02001562 transformations, undo them in between the opening and closing
1563 double quote. */
Jason Merrill00a81b82010-03-29 16:07:29 -04001564 while (note->pos < cur)
1565 ++note;
1566 for (; note->pos == cur; ++note)
1567 {
1568 switch (note->type)
1569 {
1570 case '\\':
1571 case ' ':
1572 /* Restore backslash followed by newline. */
1573 BUF_APPEND (base, cur - base);
1574 base = cur;
1575 BUF_APPEND ("\\", 1);
1576 after_backslash:
1577 if (note->type == ' ')
1578 {
1579 /* GNU backslash whitespace newline extension. FIXME
1580 could be any sequence of non-vertical space. When we
1581 can properly restore any such sequence, we should mark
1582 this note as handled so _cpp_process_line_notes
1583 doesn't warn. */
1584 BUF_APPEND (" ", 1);
1585 }
1586
1587 BUF_APPEND ("\n", 1);
1588 break;
1589
1590 case 0:
1591 /* Already handled. */
1592 break;
1593
1594 default:
1595 if (_cpp_trigraph_map[note->type])
1596 {
1597 /* Don't warn about this trigraph in
1598 _cpp_process_line_notes, since trigraphs show up as
1599 trigraphs in raw strings. */
Jakub Jelinekd947ada2010-04-06 09:02:40 +02001600 uchar type = note->type;
Jason Merrill00a81b82010-03-29 16:07:29 -04001601 note->type = 0;
1602
1603 if (!CPP_OPTION (pfile, trigraphs))
1604 /* If we didn't convert the trigraph in the first
1605 place, don't do anything now either. */
1606 break;
1607
1608 BUF_APPEND (base, cur - base);
1609 base = cur;
1610 BUF_APPEND ("??", 2);
1611
1612 /* ??/ followed by newline gets two line notes, one for
1613 the trigraph and one for the backslash/newline. */
1614 if (type == '/' && note[1].pos == cur)
1615 {
1616 if (note[1].type != '\\'
1617 && note[1].type != ' ')
1618 abort ();
1619 BUF_APPEND ("/", 1);
1620 ++note;
1621 goto after_backslash;
1622 }
Jason Merrill00a81b82010-03-29 16:07:29 -04001623 else
1624 {
1625 /* Skip the replacement character. */
1626 base = ++cur;
1627 BUF_APPEND (&type, 1);
Jakub Jelinek8cf88732013-07-21 04:28:03 +02001628 c = type;
1629 goto check_c;
Jason Merrill00a81b82010-03-29 16:07:29 -04001630 }
1631 }
1632 else
1633 abort ();
1634 break;
1635 }
1636 }
1637 c = *cur++;
Jakub Jelinek8cf88732013-07-21 04:28:03 +02001638 if (__builtin_expect (temp_buffer_len < 17, 0))
1639 temp_buffer[temp_buffer_len++] = c;
Jakub Jelinek2c6e3f52009-10-19 23:41:15 +02001640
Jakub Jelinek8cf88732013-07-21 04:28:03 +02001641 check_c:
1642 if (phase == RAW_STR_PREFIX)
Jakub Jelinek2c6e3f52009-10-19 23:41:15 +02001643 {
Jakub Jelinek8cf88732013-07-21 04:28:03 +02001644 while (raw_prefix_len < temp_buffer_len)
1645 {
1646 raw_prefix[raw_prefix_len] = temp_buffer[raw_prefix_len];
1647 switch (raw_prefix[raw_prefix_len])
1648 {
1649 case ' ': case '(': case ')': case '\\': case '\t':
1650 case '\v': case '\f': case '\n': default:
1651 break;
1652 /* Basic source charset except the above chars. */
1653 case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
1654 case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':
1655 case 'm': case 'n': case 'o': case 'p': case 'q': case 'r':
1656 case 's': case 't': case 'u': case 'v': case 'w': case 'x':
1657 case 'y': case 'z':
1658 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
1659 case 'G': case 'H': case 'I': case 'J': case 'K': case 'L':
1660 case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R':
1661 case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
1662 case 'Y': case 'Z':
1663 case '0': case '1': case '2': case '3': case '4': case '5':
1664 case '6': case '7': case '8': case '9':
1665 case '_': case '{': case '}': case '#': case '[': case ']':
1666 case '<': case '>': case '%': case ':': case ';': case '.':
1667 case '?': case '*': case '+': case '-': case '/': case '^':
1668 case '&': case '|': case '~': case '!': case '=': case ',':
1669 case '"': case '\'':
1670 if (raw_prefix_len < 16)
1671 {
1672 raw_prefix_len++;
1673 continue;
1674 }
1675 break;
1676 }
1677
1678 if (raw_prefix[raw_prefix_len] != '(')
1679 {
1680 int col = CPP_BUF_COLUMN (pfile->buffer, cur) + 1;
1681 if (raw_prefix_len == 16)
1682 cpp_error_with_line (pfile, CPP_DL_ERROR, token->src_loc,
1683 col, "raw string delimiter longer "
1684 "than 16 characters");
1685 else if (raw_prefix[raw_prefix_len] == '\n')
1686 cpp_error_with_line (pfile, CPP_DL_ERROR, token->src_loc,
1687 col, "invalid new-line in raw "
1688 "string delimiter");
1689 else
1690 cpp_error_with_line (pfile, CPP_DL_ERROR, token->src_loc,
1691 col, "invalid character '%c' in "
1692 "raw string delimiter",
1693 (int) raw_prefix[raw_prefix_len]);
1694 pfile->buffer->cur = orig_base + raw_prefix_start - 1;
1695 create_literal (pfile, token, orig_base,
1696 raw_prefix_start - 1, CPP_OTHER);
1697 if (first_buff)
1698 _cpp_release_buff (pfile, first_buff);
1699 return;
1700 }
1701 raw_prefix[raw_prefix_len] = '"';
1702 phase = RAW_STR;
1703 /* Nothing should be appended to temp_buffer during
1704 RAW_STR phase. */
1705 temp_buffer_len = 17;
1706 break;
1707 }
1708 continue;
1709 }
1710 else if (phase == RAW_STR_SUFFIX)
1711 {
1712 while (raw_suffix_len <= raw_prefix_len
1713 && raw_suffix_len < temp_buffer_len
1714 && temp_buffer[raw_suffix_len] == raw_prefix[raw_suffix_len])
1715 raw_suffix_len++;
1716 if (raw_suffix_len > raw_prefix_len)
1717 break;
1718 if (raw_suffix_len == temp_buffer_len)
1719 continue;
1720 phase = RAW_STR;
1721 /* Nothing should be appended to temp_buffer during
1722 RAW_STR phase. */
1723 temp_buffer_len = 17;
1724 }
1725 if (c == ')')
1726 {
1727 phase = RAW_STR_SUFFIX;
1728 raw_suffix_len = 0;
1729 temp_buffer_len = 0;
Jakub Jelinek2c6e3f52009-10-19 23:41:15 +02001730 }
1731 else if (c == '\n')
1732 {
1733 if (pfile->state.in_directive
Jakub Jelinekd5e48352013-07-10 18:52:19 +02001734 || (pfile->state.parsing_args
1735 && pfile->buffer->next_line >= pfile->buffer->rlimit))
Jakub Jelinek2c6e3f52009-10-19 23:41:15 +02001736 {
1737 cur--;
1738 type = CPP_OTHER;
1739 cpp_error_with_line (pfile, CPP_DL_ERROR, token->src_loc, 0,
1740 "unterminated raw string");
1741 break;
1742 }
1743
Jason Merrill00a81b82010-03-29 16:07:29 -04001744 BUF_APPEND (base, cur - base);
Jakub Jelinek2c6e3f52009-10-19 23:41:15 +02001745
1746 if (pfile->buffer->cur < pfile->buffer->rlimit)
1747 CPP_INCREMENT_LINE (pfile, 0);
1748 pfile->buffer->need_line = true;
1749
Jason Merrill00a81b82010-03-29 16:07:29 -04001750 pfile->buffer->cur = cur-1;
1751 _cpp_process_line_notes (pfile, false);
Jakub Jelinek2c6e3f52009-10-19 23:41:15 +02001752 if (!_cpp_get_fresh_line (pfile))
1753 {
1754 source_location src_loc = token->src_loc;
1755 token->type = CPP_EOF;
1756 /* Tell the compiler the line number of the EOF token. */
1757 token->src_loc = pfile->line_table->highest_line;
1758 token->flags = BOL;
1759 if (first_buff != NULL)
1760 _cpp_release_buff (pfile, first_buff);
1761 cpp_error_with_line (pfile, CPP_DL_ERROR, src_loc, 0,
1762 "unterminated raw string");
1763 return;
1764 }
1765
1766 cur = base = pfile->buffer->cur;
Jason Merrill00a81b82010-03-29 16:07:29 -04001767 note = &pfile->buffer->notes[pfile->buffer->cur_note];
Jakub Jelinek2c6e3f52009-10-19 23:41:15 +02001768 }
Jakub Jelinek2c6e3f52009-10-19 23:41:15 +02001769 }
1770
Ed Smith-Rowland3ce4f9e2011-10-26 19:30:59 +00001771 if (CPP_OPTION (pfile, user_literals))
1772 {
Ed Smith-Rowlandc865f922013-06-29 03:41:58 +00001773 /* If a string format macro, say from inttypes.h, is placed touching
1774 a string literal it could be parsed as a C++11 user-defined string
1775 literal thus breaking the program.
1776 Try to identify macros with is_macro. A warning is issued. */
1777 if (is_macro (pfile, cur))
Ollie Wild7f5f5f92012-04-27 14:29:32 +00001778 {
Dodji Seketeli112448b2012-04-29 16:27:08 +00001779 /* Raise a warning, but do not consume subsequent tokens. */
Edward Smith-Rowland7aee8642014-07-09 13:33:58 +00001780 if (CPP_OPTION (pfile, warn_literal_suffix) && !pfile->state.skipping)
Ollie Wild7f5f5f92012-04-27 14:29:32 +00001781 cpp_warning_with_line (pfile, CPP_W_LITERAL_SUFFIX,
1782 token->src_loc, 0,
1783 "invalid suffix on literal; C++11 requires "
Ed Smith-Rowlandc865f922013-06-29 03:41:58 +00001784 "a space between literal and string macro");
Ollie Wild7f5f5f92012-04-27 14:29:32 +00001785 }
Ed Smith-Rowland3ce4f9e2011-10-26 19:30:59 +00001786 /* Grab user defined literal suffix. */
Ed Smith-Rowland561f7fc2013-02-14 02:55:42 +00001787 else if (ISIDST (*cur))
Ed Smith-Rowland3ce4f9e2011-10-26 19:30:59 +00001788 {
1789 type = cpp_userdef_string_add_type (type);
1790 ++cur;
Ollie Wild7f5f5f92012-04-27 14:29:32 +00001791
1792 while (ISIDNUM (*cur))
1793 ++cur;
Ed Smith-Rowland3ce4f9e2011-10-26 19:30:59 +00001794 }
Ed Smith-Rowland3ce4f9e2011-10-26 19:30:59 +00001795 }
1796
Jakub Jelinek2c6e3f52009-10-19 23:41:15 +02001797 pfile->buffer->cur = cur;
1798 if (first_buff == NULL)
1799 create_literal (pfile, token, base, cur - base, type);
1800 else
1801 {
1802 uchar *dest = _cpp_unaligned_alloc (pfile, total_len + (cur - base) + 1);
1803
1804 token->type = type;
1805 token->val.str.len = total_len + (cur - base);
1806 token->val.str.text = dest;
1807 last_buff = first_buff;
1808 while (last_buff != NULL)
1809 {
1810 memcpy (dest, last_buff->base,
1811 BUFF_FRONT (last_buff) - last_buff->base);
1812 dest += BUFF_FRONT (last_buff) - last_buff->base;
1813 last_buff = last_buff->next;
1814 }
1815 _cpp_release_buff (pfile, first_buff);
1816 memcpy (dest, base, cur - base);
1817 dest[cur - base] = '\0';
1818 }
1819}
1820
Neil Booth6338b352003-04-23 22:44:06 +00001821/* Lexes a string, character constant, or angle-bracketed header file
1822 name. The stored string contains the spelling, including opening
Jakub Jelinek2c6e3f52009-10-19 23:41:15 +02001823 quote and any leading 'L', 'u', 'U' or 'u8' and optional
1824 'R' modifier. It returns the type of the literal, or CPP_OTHER
1825 if it was not properly terminated, or CPP_LESS for an unterminated
1826 header name which must be relexed as normal tokens.
Neil Booth6338b352003-04-23 22:44:06 +00001827
1828 The spelling is NUL-terminated, but it is not guaranteed that this
1829 is the first NUL since embedded NULs are preserved. */
1830static void
Zack Weinberg6cf87ca2003-06-17 06:17:44 +00001831lex_string (cpp_reader *pfile, cpp_token *token, const uchar *base)
Neil Booth6338b352003-04-23 22:44:06 +00001832{
1833 bool saw_NUL = false;
1834 const uchar *cur;
1835 cppchar_t terminator;
1836 enum cpp_ttype type;
1837
1838 cur = base;
1839 terminator = *cur++;
Jakub Jelinek2c6e3f52009-10-19 23:41:15 +02001840 if (terminator == 'L' || terminator == 'U')
Neil Booth6338b352003-04-23 22:44:06 +00001841 terminator = *cur++;
Jakub Jelinek2c6e3f52009-10-19 23:41:15 +02001842 else if (terminator == 'u')
1843 {
1844 terminator = *cur++;
1845 if (terminator == '8')
1846 terminator = *cur++;
1847 }
1848 if (terminator == 'R')
1849 {
1850 lex_raw_string (pfile, token, base, cur);
1851 return;
1852 }
1853 if (terminator == '"')
Kris Van Heesb6baa672008-04-18 13:58:08 +00001854 type = (*base == 'L' ? CPP_WSTRING :
1855 *base == 'U' ? CPP_STRING32 :
Jakub Jelinek2c6e3f52009-10-19 23:41:15 +02001856 *base == 'u' ? (base[1] == '8' ? CPP_UTF8STRING : CPP_STRING16)
1857 : CPP_STRING);
Neil Booth6338b352003-04-23 22:44:06 +00001858 else if (terminator == '\'')
Kris Van Heesb6baa672008-04-18 13:58:08 +00001859 type = (*base == 'L' ? CPP_WCHAR :
1860 *base == 'U' ? CPP_CHAR32 :
1861 *base == 'u' ? CPP_CHAR16 : CPP_CHAR);
Neil Booth6338b352003-04-23 22:44:06 +00001862 else
1863 terminator = '>', type = CPP_HEADER_NAME;
Neil Booth93c803682000-10-28 17:59:06 +00001864
Neil Booth0d9f2342000-09-18 18:43:05 +00001865 for (;;)
1866 {
Neil Booth6338b352003-04-23 22:44:06 +00001867 cppchar_t c = *cur++;
Neil Booth7868b4a2001-03-04 12:02:02 +00001868
Neil Booth6f572ac2003-04-19 16:34:33 +00001869 /* In #include-style directives, terminators are not escapable. */
Neil Booth6338b352003-04-23 22:44:06 +00001870 if (c == '\\' && !pfile->state.angled_headers && *cur != '\n')
1871 cur++;
1872 else if (c == terminator)
Neil Boothbced6ed2003-04-19 11:59:44 +00001873 break;
Neil Booth6338b352003-04-23 22:44:06 +00001874 else if (c == '\n')
Neil Booth0d9f2342000-09-18 18:43:05 +00001875 {
Neil Booth6338b352003-04-23 22:44:06 +00001876 cur--;
Joseph Myers4bb09c22009-02-21 21:25:39 +00001877 /* Unmatched quotes always yield undefined behavior, but
1878 greedy lexing means that what appears to be an unterminated
1879 header name may actually be a legitimate sequence of tokens. */
1880 if (terminator == '>')
1881 {
1882 token->type = CPP_LESS;
1883 return;
1884 }
Neil Booth6338b352003-04-23 22:44:06 +00001885 type = CPP_OTHER;
1886 break;
Neil Booth0d9f2342000-09-18 18:43:05 +00001887 }
Neil Booth6338b352003-04-23 22:44:06 +00001888 else if (c == '\0')
1889 saw_NUL = true;
Neil Booth0d9f2342000-09-18 18:43:05 +00001890 }
1891
Neil Booth6338b352003-04-23 22:44:06 +00001892 if (saw_NUL && !pfile->state.skipping)
John David Anglin0527bc42003-11-01 22:56:54 +00001893 cpp_error (pfile, CPP_DL_WARNING,
1894 "null character(s) preserved in literal");
Neil Booth0d9f2342000-09-18 18:43:05 +00001895
Joseph Myersc663e302006-09-13 02:04:18 +01001896 if (type == CPP_OTHER && CPP_OPTION (pfile, lang) != CLK_ASM)
1897 cpp_error (pfile, CPP_DL_PEDWARN, "missing terminating %c character",
1898 (int) terminator);
1899
Ed Smith-Rowland3ce4f9e2011-10-26 19:30:59 +00001900 if (CPP_OPTION (pfile, user_literals))
1901 {
Ed Smith-Rowlandc865f922013-06-29 03:41:58 +00001902 /* If a string format macro, say from inttypes.h, is placed touching
1903 a string literal it could be parsed as a C++11 user-defined string
1904 literal thus breaking the program.
1905 Try to identify macros with is_macro. A warning is issued. */
1906 if (is_macro (pfile, cur))
Ollie Wild7f5f5f92012-04-27 14:29:32 +00001907 {
Dodji Seketeli112448b2012-04-29 16:27:08 +00001908 /* Raise a warning, but do not consume subsequent tokens. */
Edward Smith-Rowland7aee8642014-07-09 13:33:58 +00001909 if (CPP_OPTION (pfile, warn_literal_suffix) && !pfile->state.skipping)
Ollie Wild7f5f5f92012-04-27 14:29:32 +00001910 cpp_warning_with_line (pfile, CPP_W_LITERAL_SUFFIX,
1911 token->src_loc, 0,
1912 "invalid suffix on literal; C++11 requires "
Ed Smith-Rowlandc865f922013-06-29 03:41:58 +00001913 "a space between literal and string macro");
Ollie Wild7f5f5f92012-04-27 14:29:32 +00001914 }
Ed Smith-Rowland3ce4f9e2011-10-26 19:30:59 +00001915 /* Grab user defined literal suffix. */
Ed Smith-Rowland561f7fc2013-02-14 02:55:42 +00001916 else if (ISIDST (*cur))
Ed Smith-Rowland3ce4f9e2011-10-26 19:30:59 +00001917 {
1918 type = cpp_userdef_char_add_type (type);
1919 type = cpp_userdef_string_add_type (type);
1920 ++cur;
Ollie Wild7f5f5f92012-04-27 14:29:32 +00001921
1922 while (ISIDNUM (*cur))
1923 ++cur;
Ed Smith-Rowland3ce4f9e2011-10-26 19:30:59 +00001924 }
Ed Smith-Rowland3ce4f9e2011-10-26 19:30:59 +00001925 }
Jason Merrillfe191302015-05-09 00:50:10 -04001926 else if (CPP_OPTION (pfile, cpp_warn_cxx11_compat)
1927 && is_macro (pfile, cur)
1928 && !pfile->state.skipping)
1929 cpp_warning_with_line (pfile, CPP_W_CXX11_COMPAT,
1930 token->src_loc, 0, "C++11 requires a space "
1931 "between string literal and macro");
Ed Smith-Rowland3ce4f9e2011-10-26 19:30:59 +00001932
Neil Booth6338b352003-04-23 22:44:06 +00001933 pfile->buffer->cur = cur;
1934 create_literal (pfile, token, base, cur - base, type);
Neil Booth0d9f2342000-09-18 18:43:05 +00001935}
1936
Matthew Gingell631d0d32008-10-05 12:35:36 +00001937/* Return the comment table. The client may not make any assumption
1938 about the ordering of the table. */
1939cpp_comment_table *
1940cpp_get_comments (cpp_reader *pfile)
1941{
1942 return &pfile->comments;
1943}
1944
1945/* Append a comment to the end of the comment table. */
1946static void
1947store_comment (cpp_reader *pfile, cpp_token *token)
1948{
1949 int len;
1950
1951 if (pfile->comments.allocated == 0)
1952 {
1953 pfile->comments.allocated = 256;
1954 pfile->comments.entries = (cpp_comment *) xmalloc
1955 (pfile->comments.allocated * sizeof (cpp_comment));
1956 }
1957
1958 if (pfile->comments.count == pfile->comments.allocated)
1959 {
1960 pfile->comments.allocated *= 2;
1961 pfile->comments.entries = (cpp_comment *) xrealloc
1962 (pfile->comments.entries,
1963 pfile->comments.allocated * sizeof (cpp_comment));
1964 }
1965
1966 len = token->val.str.len;
1967
1968 /* Copy comment. Note, token may not be NULL terminated. */
1969 pfile->comments.entries[pfile->comments.count].comment =
1970 (char *) xmalloc (sizeof (char) * (len + 1));
1971 memcpy (pfile->comments.entries[pfile->comments.count].comment,
1972 token->val.str.text, len);
1973 pfile->comments.entries[pfile->comments.count].comment[len] = '\0';
1974
1975 /* Set source location. */
1976 pfile->comments.entries[pfile->comments.count].sloc = token->src_loc;
1977
1978 /* Increment the count of entries in the comment table. */
1979 pfile->comments.count++;
1980}
1981
Neil Booth93c803682000-10-28 17:59:06 +00001982/* The stored comment includes the comment start and any terminator. */
Neil Booth0d9f2342000-09-18 18:43:05 +00001983static void
Zack Weinberg6cf87ca2003-06-17 06:17:44 +00001984save_comment (cpp_reader *pfile, cpp_token *token, const unsigned char *from,
1985 cppchar_t type)
Zack Weinbergc5a04732000-04-25 19:32:36 +00001986{
Neil Booth5d7ee2f2000-05-10 09:39:18 +00001987 unsigned char *buffer;
Kai Tietz651a20b2010-11-16 19:50:17 +00001988 unsigned int len, clen, i;
Kazu Hiratadf383482002-05-22 22:02:16 +00001989
Neil Booth1c6d33e2000-09-25 22:39:51 +00001990 len = pfile->buffer->cur - from + 1; /* + 1 for the initial '/'. */
Neil Booth480709c2001-10-21 14:04:42 +00001991
Neil Booth35422032000-10-29 09:56:00 +00001992 /* C++ comments probably (not definitely) have moved past a new
1993 line, which we don't want to save in the comment. */
Neil Booth480709c2001-10-21 14:04:42 +00001994 if (is_vspace (pfile->buffer->cur[-1]))
Neil Booth35422032000-10-29 09:56:00 +00001995 len--;
Jason Thorpe477cdac2002-04-07 03:12:23 +00001996
Kai Tietz651a20b2010-11-16 19:50:17 +00001997 /* If we are currently in a directive or in argument parsing, then
1998 we need to store all C++ comments as C comments internally, and
1999 so we need to allocate a little extra space in that case.
Jason Thorpe477cdac2002-04-07 03:12:23 +00002000
2001 Note that the only time we encounter a directive here is
2002 when we are saving comments in a "#define". */
Kai Tietz651a20b2010-11-16 19:50:17 +00002003 clen = ((pfile->state.in_directive || pfile->state.parsing_args)
2004 && type == '/') ? len + 2 : len;
Jason Thorpe477cdac2002-04-07 03:12:23 +00002005
2006 buffer = _cpp_unaligned_alloc (pfile, clen);
Kazu Hiratadf383482002-05-22 22:02:16 +00002007
Neil Booth0d9f2342000-09-18 18:43:05 +00002008 token->type = CPP_COMMENT;
Jason Thorpe477cdac2002-04-07 03:12:23 +00002009 token->val.str.len = clen;
Neil Booth0d9f2342000-09-18 18:43:05 +00002010 token->val.str.text = buffer;
Neil Boothd1d9a6b2000-05-27 23:19:56 +00002011
Neil Booth1c6d33e2000-09-25 22:39:51 +00002012 buffer[0] = '/';
2013 memcpy (buffer + 1, from, len - 1);
Jason Thorpe477cdac2002-04-07 03:12:23 +00002014
Kazu Hirata1eeeb6a2002-04-30 20:48:55 +00002015 /* Finish conversion to a C comment, if necessary. */
Kai Tietz651a20b2010-11-16 19:50:17 +00002016 if ((pfile->state.in_directive || pfile->state.parsing_args) && type == '/')
Jason Thorpe477cdac2002-04-07 03:12:23 +00002017 {
2018 buffer[1] = '*';
2019 buffer[clen - 2] = '*';
2020 buffer[clen - 1] = '/';
Kai Tietz651a20b2010-11-16 19:50:17 +00002021 /* As there can be in a C++ comments illegal sequences for C comments
2022 we need to filter them out. */
2023 for (i = 2; i < (clen - 2); i++)
2024 if (buffer[i] == '/' && (buffer[i - 1] == '*' || buffer[i + 1] == '*'))
2025 buffer[i] = '|';
Jason Thorpe477cdac2002-04-07 03:12:23 +00002026 }
Matthew Gingell631d0d32008-10-05 12:35:36 +00002027
2028 /* Finally store this comment for use by clients of libcpp. */
2029 store_comment (pfile, token);
Neil Booth0d9f2342000-09-18 18:43:05 +00002030}
2031
Neil Booth5fddcff2001-09-11 07:00:12 +00002032/* Allocate COUNT tokens for RUN. */
2033void
Zack Weinberg6cf87ca2003-06-17 06:17:44 +00002034_cpp_init_tokenrun (tokenrun *run, unsigned int count)
Neil Booth5fddcff2001-09-11 07:00:12 +00002035{
Bernardo Innocenti72bb2c32004-07-24 20:04:42 +02002036 run->base = XNEWVEC (cpp_token, count);
Neil Booth5fddcff2001-09-11 07:00:12 +00002037 run->limit = run->base + count;
2038 run->next = NULL;
2039}
2040
2041/* Returns the next tokenrun, or creates one if there is none. */
2042static tokenrun *
Zack Weinberg6cf87ca2003-06-17 06:17:44 +00002043next_tokenrun (tokenrun *run)
Neil Booth5fddcff2001-09-11 07:00:12 +00002044{
2045 if (run->next == NULL)
2046 {
Bernardo Innocenti72bb2c32004-07-24 20:04:42 +02002047 run->next = XNEW (tokenrun);
Neil Boothbdcbe492001-09-13 20:05:17 +00002048 run->next->prev = run;
Neil Booth5fddcff2001-09-11 07:00:12 +00002049 _cpp_init_tokenrun (run->next, 250);
2050 }
2051
2052 return run->next;
2053}
2054
Dodji Seketeliad2305a2011-10-22 17:49:18 +00002055/* Return the number of not yet processed token in a given
Tom Tromey92582b72011-10-17 09:59:12 +00002056 context. */
2057int
Dodji Seketeliad2305a2011-10-22 17:49:18 +00002058_cpp_remaining_tokens_num_in_context (cpp_context *context)
Tom Tromey92582b72011-10-17 09:59:12 +00002059{
Tom Tromey92582b72011-10-17 09:59:12 +00002060 if (context->tokens_kind == TOKENS_KIND_DIRECT)
Dodji Seketelicbbcf652011-10-20 08:49:29 +00002061 return (LAST (context).token - FIRST (context).token);
Tom Tromey92582b72011-10-17 09:59:12 +00002062 else if (context->tokens_kind == TOKENS_KIND_INDIRECT
2063 || context->tokens_kind == TOKENS_KIND_EXTENDED)
Dodji Seketelicbbcf652011-10-20 08:49:29 +00002064 return (LAST (context).ptoken - FIRST (context).ptoken);
Tom Tromey92582b72011-10-17 09:59:12 +00002065 else
2066 abort ();
2067}
2068
Dodji Seketeliad2305a2011-10-22 17:49:18 +00002069/* Returns the token present at index INDEX in a given context. If
2070 INDEX is zero, the next token to be processed is returned. */
Tom Tromey92582b72011-10-17 09:59:12 +00002071static const cpp_token*
Dodji Seketeliad2305a2011-10-22 17:49:18 +00002072_cpp_token_from_context_at (cpp_context *context, int index)
Tom Tromey92582b72011-10-17 09:59:12 +00002073{
Tom Tromey92582b72011-10-17 09:59:12 +00002074 if (context->tokens_kind == TOKENS_KIND_DIRECT)
2075 return &(FIRST (context).token[index]);
2076 else if (context->tokens_kind == TOKENS_KIND_INDIRECT
2077 || context->tokens_kind == TOKENS_KIND_EXTENDED)
2078 return FIRST (context).ptoken[index];
2079 else
2080 abort ();
2081}
2082
Ben Elliston5950c3c2008-07-14 05:09:48 +00002083/* Look ahead in the input stream. */
2084const cpp_token *
2085cpp_peek_token (cpp_reader *pfile, int index)
2086{
2087 cpp_context *context = pfile->context;
2088 const cpp_token *peektok;
2089 int count;
2090
2091 /* First, scan through any pending cpp_context objects. */
2092 while (context->prev)
2093 {
Dodji Seketeliad2305a2011-10-22 17:49:18 +00002094 ptrdiff_t sz = _cpp_remaining_tokens_num_in_context (context);
Ben Elliston5950c3c2008-07-14 05:09:48 +00002095
2096 if (index < (int) sz)
Dodji Seketeliad2305a2011-10-22 17:49:18 +00002097 return _cpp_token_from_context_at (context, index);
Ben Elliston5950c3c2008-07-14 05:09:48 +00002098 index -= (int) sz;
2099 context = context->prev;
2100 }
2101
2102 /* We will have to read some new tokens after all (and do so
2103 without invalidating preceding tokens). */
2104 count = index;
2105 pfile->keep_tokens++;
2106
Jakub Jelinekb8cd77f2015-04-02 13:57:02 +02002107 /* For peeked tokens temporarily disable line_change reporting,
2108 until the tokens are parsed for real. */
2109 void (*line_change) (cpp_reader *, const cpp_token *, int)
2110 = pfile->cb.line_change;
2111 pfile->cb.line_change = NULL;
2112
Ben Elliston5950c3c2008-07-14 05:09:48 +00002113 do
2114 {
2115 peektok = _cpp_lex_token (pfile);
2116 if (peektok->type == CPP_EOF)
Jakub Jelineke4b33ee2015-04-06 19:01:50 +02002117 {
2118 index--;
2119 break;
2120 }
Ben Elliston5950c3c2008-07-14 05:09:48 +00002121 }
2122 while (index--);
2123
Jakub Jelineke4b33ee2015-04-06 19:01:50 +02002124 _cpp_backup_tokens_direct (pfile, count - index);
Ben Elliston5950c3c2008-07-14 05:09:48 +00002125 pfile->keep_tokens--;
Jakub Jelinekb8cd77f2015-04-02 13:57:02 +02002126 pfile->cb.line_change = line_change;
Ben Elliston5950c3c2008-07-14 05:09:48 +00002127
2128 return peektok;
2129}
2130
Neil Booth4ed5bcf2001-09-24 22:53:12 +00002131/* Allocate a single token that is invalidated at the same time as the
2132 rest of the tokens on the line. Has its line and col set to the
2133 same as the last lexed token, so that diagnostics appear in the
2134 right place. */
2135cpp_token *
Zack Weinberg6cf87ca2003-06-17 06:17:44 +00002136_cpp_temp_token (cpp_reader *pfile)
Neil Booth4ed5bcf2001-09-24 22:53:12 +00002137{
2138 cpp_token *old, *result;
Ben Elliston5950c3c2008-07-14 05:09:48 +00002139 ptrdiff_t sz = pfile->cur_run->limit - pfile->cur_token;
2140 ptrdiff_t la = (ptrdiff_t) pfile->lookaheads;
Neil Booth4ed5bcf2001-09-24 22:53:12 +00002141
2142 old = pfile->cur_token - 1;
Ben Elliston5950c3c2008-07-14 05:09:48 +00002143 /* Any pre-existing lookaheads must not be clobbered. */
2144 if (la)
2145 {
2146 if (sz <= la)
2147 {
2148 tokenrun *next = next_tokenrun (pfile->cur_run);
2149
2150 if (sz < la)
2151 memmove (next->base + 1, next->base,
2152 (la - sz) * sizeof (cpp_token));
2153
2154 next->base[0] = pfile->cur_run->limit[-1];
2155 }
2156
2157 if (sz > 1)
2158 memmove (pfile->cur_token + 1, pfile->cur_token,
2159 MIN (la, sz - 1) * sizeof (cpp_token));
2160 }
2161
2162 if (!sz && pfile->cur_token == pfile->cur_run->limit)
Neil Booth4ed5bcf2001-09-24 22:53:12 +00002163 {
2164 pfile->cur_run = next_tokenrun (pfile->cur_run);
2165 pfile->cur_token = pfile->cur_run->base;
2166 }
2167
2168 result = pfile->cur_token++;
Per Bothner12f9df42004-02-11 07:29:30 -08002169 result->src_loc = old->src_loc;
Neil Booth4ed5bcf2001-09-24 22:53:12 +00002170 return result;
2171}
2172
Neil Booth14baae02001-09-17 18:26:12 +00002173/* Lex a token into RESULT (external interface). Takes care of issues
2174 like directive handling, token lookahead, multiple include
Joseph Myersa1f300c2001-11-23 02:05:19 +00002175 optimization and skipping. */
Neil Booth345894b2001-09-16 13:44:29 +00002176const cpp_token *
Zack Weinberg6cf87ca2003-06-17 06:17:44 +00002177_cpp_lex_token (cpp_reader *pfile)
Neil Booth0d9f2342000-09-18 18:43:05 +00002178{
Neil Boothbdcbe492001-09-13 20:05:17 +00002179 cpp_token *result;
Neil Booth5fddcff2001-09-11 07:00:12 +00002180
Neil Boothbdcbe492001-09-13 20:05:17 +00002181 for (;;)
Neil Booth5fddcff2001-09-11 07:00:12 +00002182 {
Neil Boothbdcbe492001-09-13 20:05:17 +00002183 if (pfile->cur_token == pfile->cur_run->limit)
Neil Booth5fddcff2001-09-11 07:00:12 +00002184 {
Neil Boothbdcbe492001-09-13 20:05:17 +00002185 pfile->cur_run = next_tokenrun (pfile->cur_run);
2186 pfile->cur_token = pfile->cur_run->base;
2187 }
Tom Tromeyee380362007-01-30 15:46:01 +00002188 /* We assume that the current token is somewhere in the current
2189 run. */
2190 if (pfile->cur_token < pfile->cur_run->base
2191 || pfile->cur_token >= pfile->cur_run->limit)
2192 abort ();
Neil Boothbdcbe492001-09-13 20:05:17 +00002193
2194 if (pfile->lookaheads)
Neil Booth14baae02001-09-17 18:26:12 +00002195 {
2196 pfile->lookaheads--;
2197 result = pfile->cur_token++;
2198 }
Neil Boothbdcbe492001-09-13 20:05:17 +00002199 else
Neil Booth14baae02001-09-17 18:26:12 +00002200 result = _cpp_lex_direct (pfile);
Neil Boothbdcbe492001-09-13 20:05:17 +00002201
2202 if (result->flags & BOL)
2203 {
Neil Boothbdcbe492001-09-13 20:05:17 +00002204 /* Is this a directive. If _cpp_handle_directive returns
2205 false, it is an assembler #. */
2206 if (result->type == CPP_HASH
Neil Boothe808ec92002-02-27 07:24:53 +00002207 /* 6.10.3 p 11: Directives in a list of macro arguments
2208 gives undefined behavior. This implementation
2209 handles the directive as normal. */
Richard Hendersonbc4071d2006-01-04 08:33:38 -08002210 && pfile->state.parsing_args != 1)
Zack Weinberg21b11492004-09-09 19:16:56 +00002211 {
Richard Hendersonbc4071d2006-01-04 08:33:38 -08002212 if (_cpp_handle_directive (pfile, result->flags & PREV_WHITE))
Zack Weinberg21b11492004-09-09 19:16:56 +00002213 {
Richard Hendersonbc4071d2006-01-04 08:33:38 -08002214 if (pfile->directive_result.type == CPP_PADDING)
2215 continue;
Zack Weinberg21b11492004-09-09 19:16:56 +00002216 result = &pfile->directive_result;
Zack Weinberg21b11492004-09-09 19:16:56 +00002217 }
2218 }
Richard Hendersonbc4071d2006-01-04 08:33:38 -08002219 else if (pfile->state.in_deferred_pragma)
2220 result = &pfile->directive_result;
Zack Weinberg21b11492004-09-09 19:16:56 +00002221
Neil Booth97293892001-09-14 22:04:46 +00002222 if (pfile->cb.line_change && !pfile->state.skipping)
Zack Weinberg6cf87ca2003-06-17 06:17:44 +00002223 pfile->cb.line_change (pfile, result, pfile->state.parsing_args);
Neil Booth5fddcff2001-09-11 07:00:12 +00002224 }
2225
Neil Boothbdcbe492001-09-13 20:05:17 +00002226 /* We don't skip tokens in directives. */
Richard Hendersonbc4071d2006-01-04 08:33:38 -08002227 if (pfile->state.in_directive || pfile->state.in_deferred_pragma)
Neil Boothbdcbe492001-09-13 20:05:17 +00002228 break;
Neil Booth5fddcff2001-09-11 07:00:12 +00002229
Neil Boothbdcbe492001-09-13 20:05:17 +00002230 /* Outside a directive, invalidate controlling macros. At file
Neil Booth14baae02001-09-17 18:26:12 +00002231 EOF, _cpp_lex_direct takes care of popping the buffer, so we never
Kazu Hirata6356f892003-06-12 19:01:08 +00002232 get here and MI optimization works. */
Neil Booth5fddcff2001-09-11 07:00:12 +00002233 pfile->mi_valid = false;
Neil Boothbdcbe492001-09-13 20:05:17 +00002234
2235 if (!pfile->state.skipping || result->type == CPP_EOF)
2236 break;
Neil Booth5fddcff2001-09-11 07:00:12 +00002237 }
2238
Neil Booth345894b2001-09-16 13:44:29 +00002239 return result;
Neil Booth5fddcff2001-09-11 07:00:12 +00002240}
2241
Neil Booth26aea072003-04-19 00:22:51 +00002242/* Returns true if a fresh line has been loaded. */
2243bool
Zack Weinberg6cf87ca2003-06-17 06:17:44 +00002244_cpp_get_fresh_line (cpp_reader *pfile)
Neil Booth004cb262002-05-17 20:16:48 +00002245{
Per Bothner22234f52004-02-18 14:02:39 -08002246 int return_at_eof;
2247
Neil Booth26aea072003-04-19 00:22:51 +00002248 /* We can't get a new line until we leave the current directive. */
2249 if (pfile->state.in_directive)
2250 return false;
Kazu Hiratadf383482002-05-22 22:02:16 +00002251
Neil Booth26aea072003-04-19 00:22:51 +00002252 for (;;)
Neil Booth1a769162002-06-11 05:36:17 +00002253 {
Neil Booth26aea072003-04-19 00:22:51 +00002254 cpp_buffer *buffer = pfile->buffer;
2255
2256 if (!buffer->need_line)
2257 return true;
2258
2259 if (buffer->next_line < buffer->rlimit)
2260 {
2261 _cpp_clean_line (pfile);
2262 return true;
2263 }
2264
2265 /* First, get out of parsing arguments state. */
2266 if (pfile->state.parsing_args)
Neil Booth1a769162002-06-11 05:36:17 +00002267 return false;
2268
Neil Booth26aea072003-04-19 00:22:51 +00002269 /* End of buffer. Non-empty files should end in a newline. */
2270 if (buffer->buf != buffer->rlimit
2271 && buffer->next_line > buffer->rlimit
2272 && !buffer->from_stage3)
Neil Booth004cb262002-05-17 20:16:48 +00002273 {
Dave Korned0e74e2007-05-31 02:06:48 +00002274 /* Clip to buffer size. */
Neil Booth26aea072003-04-19 00:22:51 +00002275 buffer->next_line = buffer->rlimit;
Neil Booth004cb262002-05-17 20:16:48 +00002276 }
Per Bothner22234f52004-02-18 14:02:39 -08002277
2278 return_at_eof = buffer->return_at_eof;
Neil Booth26aea072003-04-19 00:22:51 +00002279 _cpp_pop_buffer (pfile);
Per Bothner22234f52004-02-18 14:02:39 -08002280 if (pfile->buffer == NULL || return_at_eof)
Per Bothnera506c552003-10-02 07:20:38 +00002281 return false;
Neil Booth26aea072003-04-19 00:22:51 +00002282 }
Neil Booth004cb262002-05-17 20:16:48 +00002283}
2284
Neil Booth6f572ac2003-04-19 16:34:33 +00002285#define IF_NEXT_IS(CHAR, THEN_TYPE, ELSE_TYPE) \
2286 do \
2287 { \
2288 result->type = ELSE_TYPE; \
2289 if (*buffer->cur == CHAR) \
2290 buffer->cur++, result->type = THEN_TYPE; \
2291 } \
2292 while (0)
Neil Booth480709c2001-10-21 14:04:42 +00002293
Neil Booth14baae02001-09-17 18:26:12 +00002294/* Lex a token into pfile->cur_token, which is also incremented, to
2295 get diagnostics pointing to the correct location.
2296
2297 Does not handle issues such as token lookahead, multiple-include
Kazu Hirataf1ba6652003-06-28 19:43:01 +00002298 optimization, directives, skipping etc. This function is only
Neil Booth14baae02001-09-17 18:26:12 +00002299 suitable for use by _cpp_lex_token, and in special cases like
2300 lex_expansion_token which doesn't care for any of these issues.
2301
2302 When meeting a newline, returns CPP_EOF if parsing a directive,
2303 otherwise returns to the start of the token buffer if permissible.
2304 Returns the location of the lexed token. */
2305cpp_token *
Zack Weinberg6cf87ca2003-06-17 06:17:44 +00002306_cpp_lex_direct (cpp_reader *pfile)
Neil Booth5fddcff2001-09-11 07:00:12 +00002307{
Neil Booth0d9f2342000-09-18 18:43:05 +00002308 cppchar_t c;
Neil Boothadb84b42000-11-08 23:08:07 +00002309 cpp_buffer *buffer;
Neil Booth0d9f2342000-09-18 18:43:05 +00002310 const unsigned char *comment_start;
Neil Booth14baae02001-09-17 18:26:12 +00002311 cpp_token *result = pfile->cur_token++;
Neil Booth0d9f2342000-09-18 18:43:05 +00002312
Neil Booth5fddcff2001-09-11 07:00:12 +00002313 fresh_line:
Neil Booth26aea072003-04-19 00:22:51 +00002314 result->flags = 0;
Per Bothner2be570f2003-08-28 18:07:42 -07002315 buffer = pfile->buffer;
Per Bothnera506c552003-10-02 07:20:38 +00002316 if (buffer->need_line)
Neil Booth26aea072003-04-19 00:22:51 +00002317 {
Richard Hendersonbc4071d2006-01-04 08:33:38 -08002318 if (pfile->state.in_deferred_pragma)
2319 {
2320 result->type = CPP_PRAGMA_EOL;
2321 pfile->state.in_deferred_pragma = false;
2322 if (!pfile->state.pragma_allow_expansion)
2323 pfile->state.prevent_expansion--;
2324 return result;
2325 }
Neil Booth26aea072003-04-19 00:22:51 +00002326 if (!_cpp_get_fresh_line (pfile))
2327 {
2328 result->type = CPP_EOF;
Neil Booth9ff78682003-04-26 21:03:51 +00002329 if (!pfile->state.in_directive)
2330 {
2331 /* Tell the compiler the line number of the EOF token. */
Per Bothner500bee02004-04-22 19:22:27 -07002332 result->src_loc = pfile->line_table->highest_line;
Neil Booth9ff78682003-04-26 21:03:51 +00002333 result->flags = BOL;
2334 }
Neil Booth26aea072003-04-19 00:22:51 +00002335 return result;
2336 }
2337 if (!pfile->keep_tokens)
2338 {
2339 pfile->cur_run = &pfile->base_run;
2340 result = pfile->base_run.base;
2341 pfile->cur_token = result + 1;
2342 }
2343 result->flags = BOL;
2344 if (pfile->state.parsing_args == 2)
2345 result->flags |= PREV_WHITE;
2346 }
Per Bothnera506c552003-10-02 07:20:38 +00002347 buffer = pfile->buffer;
Neil Booth5fddcff2001-09-11 07:00:12 +00002348 update_tokens_line:
Per Bothner500bee02004-04-22 19:22:27 -07002349 result->src_loc = pfile->line_table->highest_line;
Neil Booth0d9f2342000-09-18 18:43:05 +00002350
Neil Booth5fddcff2001-09-11 07:00:12 +00002351 skipped_white:
Neil Booth26aea072003-04-19 00:22:51 +00002352 if (buffer->cur >= buffer->notes[buffer->cur_note].pos
2353 && !pfile->overlaid_buffer)
2354 {
2355 _cpp_process_line_notes (pfile, false);
Per Bothner500bee02004-04-22 19:22:27 -07002356 result->src_loc = pfile->line_table->highest_line;
Neil Booth26aea072003-04-19 00:22:51 +00002357 }
Neil Booth480709c2001-10-21 14:04:42 +00002358 c = *buffer->cur++;
Per Bothner12f9df42004-02-11 07:29:30 -08002359
Gabriel Charettee3dfef42011-08-22 20:41:07 +00002360 if (pfile->forced_token_location_p)
2361 result->src_loc = *pfile->forced_token_location_p;
2362 else
2363 result->src_loc = linemap_position_for_column (pfile->line_table,
2364 CPP_BUF_COLUMN (buffer, buffer->cur));
Neil Booth5fddcff2001-09-11 07:00:12 +00002365
Neil Booth0d9f2342000-09-18 18:43:05 +00002366 switch (c)
2367 {
Neil Booth4d6baaf2001-11-26 23:44:54 +00002368 case ' ': case '\t': case '\f': case '\v': case '\0':
2369 result->flags |= PREV_WHITE;
Neil Booth26aea072003-04-19 00:22:51 +00002370 skip_whitespace (pfile, c);
2371 goto skipped_white;
Neil Booth4d6baaf2001-11-26 23:44:54 +00002372
Neil Booth26aea072003-04-19 00:22:51 +00002373 case '\n':
Per Bothner12f9df42004-02-11 07:29:30 -08002374 if (buffer->cur < buffer->rlimit)
2375 CPP_INCREMENT_LINE (pfile, 0);
Neil Booth26aea072003-04-19 00:22:51 +00002376 buffer->need_line = true;
2377 goto fresh_line;
Neil Booth0d9f2342000-09-18 18:43:05 +00002378
Neil Booth0d9f2342000-09-18 18:43:05 +00002379 case '0': case '1': case '2': case '3': case '4':
2380 case '5': case '6': case '7': case '8': case '9':
Geoffrey Keating50668cf2005-03-15 00:36:33 +00002381 {
2382 struct normalize_state nst = INITIAL_NORMALIZE_STATE;
2383 result->type = CPP_NUMBER;
2384 lex_number (pfile, &result->val.str, &nst);
2385 warn_about_normalization (pfile, result, &nst);
2386 break;
2387 }
Neil Booth0d9f2342000-09-18 18:43:05 +00002388
Neil Booth0abc6a62001-11-27 22:31:34 +00002389 case 'L':
Kris Van Heesb6baa672008-04-18 13:58:08 +00002390 case 'u':
2391 case 'U':
Jakub Jelinek2c6e3f52009-10-19 23:41:15 +02002392 case 'R':
2393 /* 'L', 'u', 'U', 'u8' or 'R' may introduce wide characters,
2394 wide strings or raw strings. */
Joseph Myersa48e3dd2011-08-18 16:13:49 +01002395 if (c == 'L' || CPP_OPTION (pfile, rliterals)
2396 || (c != 'R' && CPP_OPTION (pfile, uliterals)))
Neil Boothbced6ed2003-04-19 11:59:44 +00002397 {
Jakub Jelinek2c6e3f52009-10-19 23:41:15 +02002398 if ((*buffer->cur == '\'' && c != 'R')
2399 || *buffer->cur == '"'
2400 || (*buffer->cur == 'R'
2401 && c != 'R'
2402 && buffer->cur[1] == '"'
Joseph Myersa48e3dd2011-08-18 16:13:49 +01002403 && CPP_OPTION (pfile, rliterals))
Jakub Jelinek2c6e3f52009-10-19 23:41:15 +02002404 || (*buffer->cur == '8'
2405 && c == 'u'
2406 && (buffer->cur[1] == '"'
Joseph Myersa48e3dd2011-08-18 16:13:49 +01002407 || (buffer->cur[1] == 'R' && buffer->cur[2] == '"'
2408 && CPP_OPTION (pfile, rliterals)))))
Kris Van Heesb6baa672008-04-18 13:58:08 +00002409 {
2410 lex_string (pfile, result, buffer->cur - 1);
2411 break;
2412 }
Neil Boothbced6ed2003-04-19 11:59:44 +00002413 }
Kazu Hiratadf383482002-05-22 22:02:16 +00002414 /* Fall through. */
Neil Booth0abc6a62001-11-27 22:31:34 +00002415
Neil Booth0d9f2342000-09-18 18:43:05 +00002416 case '_':
2417 case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
2418 case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':
2419 case 'm': case 'n': case 'o': case 'p': case 'q': case 'r':
Kris Van Heesb6baa672008-04-18 13:58:08 +00002420 case 's': case 't': case 'v': case 'w': case 'x':
Neil Booth0d9f2342000-09-18 18:43:05 +00002421 case 'y': case 'z':
2422 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
Neil Booth0abc6a62001-11-27 22:31:34 +00002423 case 'G': case 'H': case 'I': case 'J': case 'K':
Jakub Jelinek2c6e3f52009-10-19 23:41:15 +02002424 case 'M': case 'N': case 'O': case 'P': case 'Q':
Kris Van Heesb6baa672008-04-18 13:58:08 +00002425 case 'S': case 'T': case 'V': case 'W': case 'X':
Neil Booth0d9f2342000-09-18 18:43:05 +00002426 case 'Y': case 'Z':
2427 result->type = CPP_NAME;
Geoffrey Keating50668cf2005-03-15 00:36:33 +00002428 {
2429 struct normalize_state nst = INITIAL_NORMALIZE_STATE;
Joseph Myers9a0c6182009-05-10 15:27:32 +01002430 result->val.node.node = lex_identifier (pfile, buffer->cur - 1, false,
Joseph Myersbe5ffc52014-11-06 21:08:52 +00002431 &nst,
2432 &result->val.node.spelling);
Geoffrey Keating50668cf2005-03-15 00:36:33 +00002433 warn_about_normalization (pfile, result, &nst);
2434 }
Neil Booth0d9f2342000-09-18 18:43:05 +00002435
Neil Booth0d9f2342000-09-18 18:43:05 +00002436 /* Convert named operators to their proper types. */
Joseph Myers9a0c6182009-05-10 15:27:32 +01002437 if (result->val.node.node->flags & NODE_OPERATOR)
Neil Booth0d9f2342000-09-18 18:43:05 +00002438 {
2439 result->flags |= NAMED_OP;
Joseph Myers9a0c6182009-05-10 15:27:32 +01002440 result->type = (enum cpp_ttype) result->val.node.node->directive_index;
Neil Booth0d9f2342000-09-18 18:43:05 +00002441 }
2442 break;
2443
2444 case '\'':
2445 case '"':
Neil Booth6338b352003-04-23 22:44:06 +00002446 lex_string (pfile, result, buffer->cur - 1);
Neil Booth0d9f2342000-09-18 18:43:05 +00002447 break;
2448
2449 case '/':
Neil Booth1c6d33e2000-09-25 22:39:51 +00002450 /* A potential block or line comment. */
2451 comment_start = buffer->cur;
Neil Booth6f572ac2003-04-19 16:34:33 +00002452 c = *buffer->cur;
2453
Neil Booth1c6d33e2000-09-25 22:39:51 +00002454 if (c == '*')
2455 {
Neil Booth26aea072003-04-19 00:22:51 +00002456 if (_cpp_skip_block_comment (pfile))
John David Anglin0527bc42003-11-01 22:56:54 +00002457 cpp_error (pfile, CPP_DL_ERROR, "unterminated comment");
Neil Booth0d9f2342000-09-18 18:43:05 +00002458 }
Marek Polacek909eb892014-09-17 21:49:46 +00002459 else if (c == '/' && ! CPP_OPTION (pfile, traditional))
Neil Booth0d9f2342000-09-18 18:43:05 +00002460 {
Marek Polacek909eb892014-09-17 21:49:46 +00002461 /* Don't warn for system headers. */
2462 if (cpp_in_system_header (pfile))
2463 ;
Marek Polacekf3bede72014-08-10 06:10:49 +00002464 /* Warn about comments if pedantically GNUC89, and not
Neil Boothbdb05a72000-11-26 17:31:13 +00002465 in system headers. */
Marek Polacek909eb892014-09-17 21:49:46 +00002466 else if (CPP_OPTION (pfile, lang) == CLK_GNUC89
2467 && CPP_PEDANTIC (pfile)
2468 && ! buffer->warned_cplusplus_comments)
Neil Booth0d9f2342000-09-18 18:43:05 +00002469 {
John David Anglin0527bc42003-11-01 22:56:54 +00002470 cpp_error (pfile, CPP_DL_PEDWARN,
Gabriel Dos Reis56508302002-07-21 21:35:17 +00002471 "C++ style comments are not allowed in ISO C90");
John David Anglin0527bc42003-11-01 22:56:54 +00002472 cpp_error (pfile, CPP_DL_PEDWARN,
Neil Boothebef4e82002-04-14 18:42:47 +00002473 "(this will be reported only once per input file)");
Neil Booth1c6d33e2000-09-25 22:39:51 +00002474 buffer->warned_cplusplus_comments = 1;
Neil Booth0d9f2342000-09-18 18:43:05 +00002475 }
Marek Polacekf3bede72014-08-10 06:10:49 +00002476 /* Or if specifically desired via -Wc90-c99-compat. */
Marek Polacek177cce42014-08-19 05:34:31 +00002477 else if (CPP_OPTION (pfile, cpp_warn_c90_c99_compat) > 0
Marek Polacekdd3ff072014-08-20 04:12:58 +00002478 && ! CPP_OPTION (pfile, cplusplus)
Marek Polacekf3bede72014-08-10 06:10:49 +00002479 && ! buffer->warned_cplusplus_comments)
2480 {
2481 cpp_error (pfile, CPP_DL_WARNING,
Marek Polacek3f4f5c92014-08-19 15:52:02 +00002482 "C++ style comments are incompatible with C90");
Marek Polacekf3bede72014-08-10 06:10:49 +00002483 cpp_error (pfile, CPP_DL_WARNING,
2484 "(this will be reported only once per input file)");
2485 buffer->warned_cplusplus_comments = 1;
2486 }
Marek Polacek909eb892014-09-17 21:49:46 +00002487 /* In C89/C94, C++ style comments are forbidden. */
2488 else if ((CPP_OPTION (pfile, lang) == CLK_STDC89
2489 || CPP_OPTION (pfile, lang) == CLK_STDC94))
2490 {
2491 /* But don't be confused about valid code such as
2492 - // immediately followed by *,
2493 - // in a preprocessing directive,
2494 - // in an #if 0 block. */
2495 if (buffer->cur[1] == '*'
2496 || pfile->state.in_directive
2497 || pfile->state.skipping)
2498 {
2499 result->type = CPP_DIV;
2500 break;
2501 }
2502 else if (! buffer->warned_cplusplus_comments)
2503 {
2504 cpp_error (pfile, CPP_DL_ERROR,
2505 "C++ style comments are not allowed in ISO C90");
2506 cpp_error (pfile, CPP_DL_ERROR,
2507 "(this will be reported only once per input "
2508 "file)");
2509 buffer->warned_cplusplus_comments = 1;
2510 }
2511 }
Jakub Jelinek01ef6562001-04-11 11:43:10 +02002512 if (skip_line_comment (pfile) && CPP_OPTION (pfile, warn_comments))
Simon Baldwin87cf0652010-04-07 17:18:10 +00002513 cpp_warning (pfile, CPP_W_COMMENTS, "multi-line comment");
Neil Booth0d9f2342000-09-18 18:43:05 +00002514 }
Neil Booth480709c2001-10-21 14:04:42 +00002515 else if (c == '=')
2516 {
Neil Booth6f572ac2003-04-19 16:34:33 +00002517 buffer->cur++;
Neil Booth480709c2001-10-21 14:04:42 +00002518 result->type = CPP_DIV_EQ;
2519 break;
2520 }
2521 else
2522 {
Neil Booth480709c2001-10-21 14:04:42 +00002523 result->type = CPP_DIV;
2524 break;
2525 }
Neil Booth1c6d33e2000-09-25 22:39:51 +00002526
Neil Booth1c6d33e2000-09-25 22:39:51 +00002527 if (!pfile->state.save_comments)
2528 {
2529 result->flags |= PREV_WHITE;
Neil Booth5fddcff2001-09-11 07:00:12 +00002530 goto update_tokens_line;
Neil Booth1c6d33e2000-09-25 22:39:51 +00002531 }
2532
2533 /* Save the comment as a token in its own right. */
Jason Thorpe477cdac2002-04-07 03:12:23 +00002534 save_comment (pfile, result, comment_start, c);
Neil Boothbdcbe492001-09-13 20:05:17 +00002535 break;
Neil Booth0d9f2342000-09-18 18:43:05 +00002536
2537 case '<':
2538 if (pfile->state.angled_headers)
2539 {
Neil Booth6338b352003-04-23 22:44:06 +00002540 lex_string (pfile, result, buffer->cur - 1);
Joseph Myers4bb09c22009-02-21 21:25:39 +00002541 if (result->type != CPP_LESS)
2542 break;
Neil Booth0d9f2342000-09-18 18:43:05 +00002543 }
2544
Neil Booth6f572ac2003-04-19 16:34:33 +00002545 result->type = CPP_LESS;
2546 if (*buffer->cur == '=')
2547 buffer->cur++, result->type = CPP_LESS_EQ;
2548 else if (*buffer->cur == '<')
Neil Booth0d9f2342000-09-18 18:43:05 +00002549 {
Neil Booth6f572ac2003-04-19 16:34:33 +00002550 buffer->cur++;
2551 IF_NEXT_IS ('=', CPP_LSHIFT_EQ, CPP_LSHIFT);
Neil Booth0d9f2342000-09-18 18:43:05 +00002552 }
Neil Booth6f572ac2003-04-19 16:34:33 +00002553 else if (CPP_OPTION (pfile, digraphs))
Neil Booth480709c2001-10-21 14:04:42 +00002554 {
Neil Booth6f572ac2003-04-19 16:34:33 +00002555 if (*buffer->cur == ':')
2556 {
Paolo Carlini1582c672013-01-04 15:30:24 +00002557 /* C++11 [2.5/3 lex.pptoken], "Otherwise, if the next
2558 three characters are <:: and the subsequent character
2559 is neither : nor >, the < is treated as a preprocessor
2560 token by itself". */
2561 if (CPP_OPTION (pfile, cplusplus)
Paolo Carlini61949152013-04-24 19:33:54 +00002562 && CPP_OPTION (pfile, lang) != CLK_CXX98
2563 && CPP_OPTION (pfile, lang) != CLK_GNUCXX
Paolo Carlini1582c672013-01-04 15:30:24 +00002564 && buffer->cur[1] == ':'
2565 && buffer->cur[2] != ':' && buffer->cur[2] != '>')
2566 break;
2567
Neil Booth6f572ac2003-04-19 16:34:33 +00002568 buffer->cur++;
2569 result->flags |= DIGRAPH;
2570 result->type = CPP_OPEN_SQUARE;
2571 }
2572 else if (*buffer->cur == '%')
2573 {
2574 buffer->cur++;
2575 result->flags |= DIGRAPH;
2576 result->type = CPP_OPEN_BRACE;
2577 }
Neil Booth480709c2001-10-21 14:04:42 +00002578 }
Neil Booth0d9f2342000-09-18 18:43:05 +00002579 break;
2580
2581 case '>':
Neil Booth6f572ac2003-04-19 16:34:33 +00002582 result->type = CPP_GREATER;
2583 if (*buffer->cur == '=')
2584 buffer->cur++, result->type = CPP_GREATER_EQ;
2585 else if (*buffer->cur == '>')
Neil Booth0d9f2342000-09-18 18:43:05 +00002586 {
Neil Booth6f572ac2003-04-19 16:34:33 +00002587 buffer->cur++;
2588 IF_NEXT_IS ('=', CPP_RSHIFT_EQ, CPP_RSHIFT);
2589 }
Neil Booth0d9f2342000-09-18 18:43:05 +00002590 break;
2591
Neil Boothcbcff6d2000-09-23 21:41:41 +00002592 case '%':
Neil Booth6f572ac2003-04-19 16:34:33 +00002593 result->type = CPP_MOD;
2594 if (*buffer->cur == '=')
2595 buffer->cur++, result->type = CPP_MOD_EQ;
2596 else if (CPP_OPTION (pfile, digraphs))
Neil Booth480709c2001-10-21 14:04:42 +00002597 {
Neil Booth6f572ac2003-04-19 16:34:33 +00002598 if (*buffer->cur == ':')
Neil Booth480709c2001-10-21 14:04:42 +00002599 {
Neil Booth6f572ac2003-04-19 16:34:33 +00002600 buffer->cur++;
2601 result->flags |= DIGRAPH;
2602 result->type = CPP_HASH;
2603 if (*buffer->cur == '%' && buffer->cur[1] == ':')
Joseph Myers9a0c6182009-05-10 15:27:32 +01002604 buffer->cur += 2, result->type = CPP_PASTE, result->val.token_no = 0;
Neil Booth480709c2001-10-21 14:04:42 +00002605 }
Neil Booth6f572ac2003-04-19 16:34:33 +00002606 else if (*buffer->cur == '>')
2607 {
2608 buffer->cur++;
2609 result->flags |= DIGRAPH;
2610 result->type = CPP_CLOSE_BRACE;
2611 }
Neil Booth480709c2001-10-21 14:04:42 +00002612 }
Neil Booth0d9f2342000-09-18 18:43:05 +00002613 break;
2614
Neil Boothcbcff6d2000-09-23 21:41:41 +00002615 case '.':
Neil Booth480709c2001-10-21 14:04:42 +00002616 result->type = CPP_DOT;
Neil Booth6f572ac2003-04-19 16:34:33 +00002617 if (ISDIGIT (*buffer->cur))
Neil Booth480709c2001-10-21 14:04:42 +00002618 {
Geoffrey Keating50668cf2005-03-15 00:36:33 +00002619 struct normalize_state nst = INITIAL_NORMALIZE_STATE;
Neil Booth480709c2001-10-21 14:04:42 +00002620 result->type = CPP_NUMBER;
Geoffrey Keating50668cf2005-03-15 00:36:33 +00002621 lex_number (pfile, &result->val.str, &nst);
2622 warn_about_normalization (pfile, result, &nst);
Neil Booth480709c2001-10-21 14:04:42 +00002623 }
Neil Booth6f572ac2003-04-19 16:34:33 +00002624 else if (*buffer->cur == '.' && buffer->cur[1] == '.')
2625 buffer->cur += 2, result->type = CPP_ELLIPSIS;
2626 else if (*buffer->cur == '*' && CPP_OPTION (pfile, cplusplus))
2627 buffer->cur++, result->type = CPP_DOT_STAR;
Neil Booth0d9f2342000-09-18 18:43:05 +00002628 break;
2629
2630 case '+':
Neil Booth6f572ac2003-04-19 16:34:33 +00002631 result->type = CPP_PLUS;
2632 if (*buffer->cur == '+')
2633 buffer->cur++, result->type = CPP_PLUS_PLUS;
2634 else if (*buffer->cur == '=')
2635 buffer->cur++, result->type = CPP_PLUS_EQ;
Neil Booth0d9f2342000-09-18 18:43:05 +00002636 break;
2637
2638 case '-':
Neil Booth6f572ac2003-04-19 16:34:33 +00002639 result->type = CPP_MINUS;
2640 if (*buffer->cur == '>')
Neil Booth0d9f2342000-09-18 18:43:05 +00002641 {
Neil Booth6f572ac2003-04-19 16:34:33 +00002642 buffer->cur++;
Neil Booth480709c2001-10-21 14:04:42 +00002643 result->type = CPP_DEREF;
Neil Booth6f572ac2003-04-19 16:34:33 +00002644 if (*buffer->cur == '*' && CPP_OPTION (pfile, cplusplus))
2645 buffer->cur++, result->type = CPP_DEREF_STAR;
Neil Booth0d9f2342000-09-18 18:43:05 +00002646 }
Neil Booth6f572ac2003-04-19 16:34:33 +00002647 else if (*buffer->cur == '-')
2648 buffer->cur++, result->type = CPP_MINUS_MINUS;
2649 else if (*buffer->cur == '=')
2650 buffer->cur++, result->type = CPP_MINUS_EQ;
Neil Booth0d9f2342000-09-18 18:43:05 +00002651 break;
2652
2653 case '&':
Neil Booth6f572ac2003-04-19 16:34:33 +00002654 result->type = CPP_AND;
2655 if (*buffer->cur == '&')
2656 buffer->cur++, result->type = CPP_AND_AND;
2657 else if (*buffer->cur == '=')
2658 buffer->cur++, result->type = CPP_AND_EQ;
Neil Booth0d9f2342000-09-18 18:43:05 +00002659 break;
Kazu Hiratadf383482002-05-22 22:02:16 +00002660
Neil Booth0d9f2342000-09-18 18:43:05 +00002661 case '|':
Neil Booth6f572ac2003-04-19 16:34:33 +00002662 result->type = CPP_OR;
2663 if (*buffer->cur == '|')
2664 buffer->cur++, result->type = CPP_OR_OR;
2665 else if (*buffer->cur == '=')
2666 buffer->cur++, result->type = CPP_OR_EQ;
Neil Booth0d9f2342000-09-18 18:43:05 +00002667 break;
2668
2669 case ':':
Neil Booth6f572ac2003-04-19 16:34:33 +00002670 result->type = CPP_COLON;
2671 if (*buffer->cur == ':' && CPP_OPTION (pfile, cplusplus))
2672 buffer->cur++, result->type = CPP_SCOPE;
2673 else if (*buffer->cur == '>' && CPP_OPTION (pfile, digraphs))
Neil Booth0d9f2342000-09-18 18:43:05 +00002674 {
Neil Booth6f572ac2003-04-19 16:34:33 +00002675 buffer->cur++;
Neil Booth0d9f2342000-09-18 18:43:05 +00002676 result->flags |= DIGRAPH;
Neil Booth480709c2001-10-21 14:04:42 +00002677 result->type = CPP_CLOSE_SQUARE;
2678 }
Neil Booth0d9f2342000-09-18 18:43:05 +00002679 break;
2680
Neil Booth480709c2001-10-21 14:04:42 +00002681 case '*': IF_NEXT_IS ('=', CPP_MULT_EQ, CPP_MULT); break;
2682 case '=': IF_NEXT_IS ('=', CPP_EQ_EQ, CPP_EQ); break;
2683 case '!': IF_NEXT_IS ('=', CPP_NOT_EQ, CPP_NOT); break;
2684 case '^': IF_NEXT_IS ('=', CPP_XOR_EQ, CPP_XOR); break;
Joseph Myers9a0c6182009-05-10 15:27:32 +01002685 case '#': IF_NEXT_IS ('#', CPP_PASTE, CPP_HASH); result->val.token_no = 0; break;
Neil Booth480709c2001-10-21 14:04:42 +00002686
Neil Booth26aea072003-04-19 00:22:51 +00002687 case '?': result->type = CPP_QUERY; break;
Neil Booth0d9f2342000-09-18 18:43:05 +00002688 case '~': result->type = CPP_COMPL; break;
2689 case ',': result->type = CPP_COMMA; break;
2690 case '(': result->type = CPP_OPEN_PAREN; break;
2691 case ')': result->type = CPP_CLOSE_PAREN; break;
2692 case '[': result->type = CPP_OPEN_SQUARE; break;
2693 case ']': result->type = CPP_CLOSE_SQUARE; break;
2694 case '{': result->type = CPP_OPEN_BRACE; break;
2695 case '}': result->type = CPP_CLOSE_BRACE; break;
2696 case ';': result->type = CPP_SEMICOLON; break;
2697
Kazu Hirata40f03652002-09-26 22:25:14 +00002698 /* @ is a punctuator in Objective-C. */
Zack Weinbergcc937582001-03-07 01:32:01 +00002699 case '@': result->type = CPP_ATSIGN; break;
Neil Booth0d9f2342000-09-18 18:43:05 +00002700
Neil Booth0abc6a62001-11-27 22:31:34 +00002701 case '$':
Neil Booth1613e522003-04-20 07:29:23 +00002702 case '\\':
2703 {
2704 const uchar *base = --buffer->cur;
Geoffrey Keating50668cf2005-03-15 00:36:33 +00002705 struct normalize_state nst = INITIAL_NORMALIZE_STATE;
Neil Booth0abc6a62001-11-27 22:31:34 +00002706
Geoffrey Keating50668cf2005-03-15 00:36:33 +00002707 if (forms_identifier_p (pfile, true, &nst))
Neil Booth1613e522003-04-20 07:29:23 +00002708 {
2709 result->type = CPP_NAME;
Joseph Myersbe5ffc52014-11-06 21:08:52 +00002710 result->val.node.node = lex_identifier (pfile, base, true, &nst,
2711 &result->val.node.spelling);
Geoffrey Keating50668cf2005-03-15 00:36:33 +00002712 warn_about_normalization (pfile, result, &nst);
Neil Booth1613e522003-04-20 07:29:23 +00002713 break;
2714 }
2715 buffer->cur++;
Neil Booth10676942003-04-22 19:28:00 +00002716 }
Neil Booth1613e522003-04-20 07:29:23 +00002717
Neil Booth10676942003-04-22 19:28:00 +00002718 default:
Neil Booth6338b352003-04-23 22:44:06 +00002719 create_literal (pfile, result, buffer->cur - 1, 1, CPP_OTHER);
2720 break;
Neil Booth0d9f2342000-09-18 18:43:05 +00002721 }
Neil Boothbdcbe492001-09-13 20:05:17 +00002722
2723 return result;
Zack Weinbergc5a04732000-04-25 19:32:36 +00002724}
2725
Neil Booth59325652003-04-24 20:03:57 +00002726/* An upper bound on the number of bytes needed to spell TOKEN.
2727 Does not include preceding whitespace. */
Neil Booth93c803682000-10-28 17:59:06 +00002728unsigned int
Zack Weinberg6cf87ca2003-06-17 06:17:44 +00002729cpp_token_len (const cpp_token *token)
Zack Weinbergc5a04732000-04-25 19:32:36 +00002730{
Neil Booth93c803682000-10-28 17:59:06 +00002731 unsigned int len;
Zack Weinbergc5a04732000-04-25 19:32:36 +00002732
Neil Booth93c803682000-10-28 17:59:06 +00002733 switch (TOKEN_SPELL (token))
Zack Weinbergc5a04732000-04-25 19:32:36 +00002734 {
Joseph Myerscc955282008-11-29 12:21:10 +00002735 default: len = 6; break;
Neil Booth6338b352003-04-23 22:44:06 +00002736 case SPELL_LITERAL: len = token->val.str.len; break;
Joseph Myers9a0c6182009-05-10 15:27:32 +01002737 case SPELL_IDENT: len = NODE_LEN (token->val.node.node) * 10; break;
Zack Weinbergc5a04732000-04-25 19:32:36 +00002738 }
Neil Booth59325652003-04-24 20:03:57 +00002739
2740 return len;
Zack Weinberg041c3192000-07-04 01:58:21 +00002741}
2742
Geoffrey Keating47e20492005-03-12 10:44:06 +00002743/* Parse UTF-8 out of NAMEP and place a \U escape in BUFFER.
2744 Return the number of bytes read out of NAME. (There are always
2745 10 bytes written to BUFFER.) */
2746
2747static size_t
2748utf8_to_ucn (unsigned char *buffer, const unsigned char *name)
2749{
2750 int j;
2751 int ucn_len = 0;
2752 int ucn_len_c;
2753 unsigned t;
2754 unsigned long utf32;
2755
2756 /* Compute the length of the UTF-8 sequence. */
2757 for (t = *name; t & 0x80; t <<= 1)
2758 ucn_len++;
2759
2760 utf32 = *name & (0x7F >> ucn_len);
2761 for (ucn_len_c = 1; ucn_len_c < ucn_len; ucn_len_c++)
2762 {
2763 utf32 = (utf32 << 6) | (*++name & 0x3F);
2764
2765 /* Ill-formed UTF-8. */
2766 if ((*name & ~0x3F) != 0x80)
2767 abort ();
2768 }
2769
2770 *buffer++ = '\\';
2771 *buffer++ = 'U';
2772 for (j = 7; j >= 0; j--)
2773 *buffer++ = "0123456789abcdef"[(utf32 >> (4 * j)) & 0xF];
2774 return ucn_len;
2775}
2776
Manuel López-Ibáñezcfc93532009-04-22 15:32:18 +00002777/* Given a token TYPE corresponding to a digraph, return a pointer to
2778 the spelling of the digraph. */
2779static const unsigned char *
2780cpp_digraph2name (enum cpp_ttype type)
2781{
2782 return digraph_spellings[(int) type - (int) CPP_FIRST_DIGRAPH];
2783}
Geoffrey Keating47e20492005-03-12 10:44:06 +00002784
Joseph Myersbe5ffc52014-11-06 21:08:52 +00002785/* Write the spelling of an identifier IDENT, using UCNs, to BUFFER.
2786 The buffer must already contain the enough space to hold the
2787 token's spelling. Returns a pointer to the character after the
2788 last character written. */
2789unsigned char *
2790_cpp_spell_ident_ucns (unsigned char *buffer, cpp_hashnode *ident)
2791{
2792 size_t i;
2793 const unsigned char *name = NODE_NAME (ident);
2794
2795 for (i = 0; i < NODE_LEN (ident); i++)
2796 if (name[i] & ~0x7F)
2797 {
2798 i += utf8_to_ucn (buffer, name + i) - 1;
2799 buffer += 10;
2800 }
2801 else
2802 *buffer++ = name[i];
2803
2804 return buffer;
2805}
2806
Neil Booth3fef5b22000-05-08 22:22:49 +00002807/* Write the spelling of a token TOKEN to BUFFER. The buffer must
Zack Weinbergcf00a882000-07-08 02:33:00 +00002808 already contain the enough space to hold the token's spelling.
Zack Weinberg6cf87ca2003-06-17 06:17:44 +00002809 Returns a pointer to the character after the last character written.
Geoffrey Keating47e20492005-03-12 10:44:06 +00002810 FORSTRING is true if this is to be the spelling after translation
Joseph Myersbe5ffc52014-11-06 21:08:52 +00002811 phase 1 (with the original spelling of extended identifiers), false
2812 if extended identifiers should always be written using UCNs (there is
2813 no option for always writing them in the internal UTF-8 form).
Zack Weinberg6cf87ca2003-06-17 06:17:44 +00002814 FIXME: Would be nice if we didn't need the PFILE argument. */
Neil Booth93c803682000-10-28 17:59:06 +00002815unsigned char *
Zack Weinberg6cf87ca2003-06-17 06:17:44 +00002816cpp_spell_token (cpp_reader *pfile, const cpp_token *token,
Geoffrey Keating47e20492005-03-12 10:44:06 +00002817 unsigned char *buffer, bool forstring)
Neil Booth3fef5b22000-05-08 22:22:49 +00002818{
Zack Weinberg96be6992000-07-18 23:25:06 +00002819 switch (TOKEN_SPELL (token))
Neil Booth3fef5b22000-05-08 22:22:49 +00002820 {
Neil Booth5d7ee2f2000-05-10 09:39:18 +00002821 case SPELL_OPERATOR:
Neil Booth3fef5b22000-05-08 22:22:49 +00002822 {
2823 const unsigned char *spelling;
2824 unsigned char c;
2825
2826 if (token->flags & DIGRAPH)
Manuel López-Ibáñezcfc93532009-04-22 15:32:18 +00002827 spelling = cpp_digraph2name (token->type);
Zack Weinberg92936ec2000-07-19 20:18:08 +00002828 else if (token->flags & NAMED_OP)
2829 goto spell_ident;
Neil Booth3fef5b22000-05-08 22:22:49 +00002830 else
Zack Weinberg96be6992000-07-18 23:25:06 +00002831 spelling = TOKEN_NAME (token);
Kazu Hiratadf383482002-05-22 22:02:16 +00002832
Neil Booth3fef5b22000-05-08 22:22:49 +00002833 while ((c = *spelling++) != '\0')
2834 *buffer++ = c;
2835 }
2836 break;
2837
Zack Weinberg47ad4132001-10-06 23:11:27 +00002838 spell_ident:
Neil Booth5d7ee2f2000-05-10 09:39:18 +00002839 case SPELL_IDENT:
Geoffrey Keating47e20492005-03-12 10:44:06 +00002840 if (forstring)
2841 {
Joseph Myersbe5ffc52014-11-06 21:08:52 +00002842 memcpy (buffer, NODE_NAME (token->val.node.spelling),
2843 NODE_LEN (token->val.node.spelling));
2844 buffer += NODE_LEN (token->val.node.spelling);
Geoffrey Keating47e20492005-03-12 10:44:06 +00002845 }
2846 else
Joseph Myersbe5ffc52014-11-06 21:08:52 +00002847 buffer = _cpp_spell_ident_ucns (buffer, token->val.node.node);
Neil Booth5d7ee2f2000-05-10 09:39:18 +00002848 break;
Neil Booth3fef5b22000-05-08 22:22:49 +00002849
Neil Booth6338b352003-04-23 22:44:06 +00002850 case SPELL_LITERAL:
Zack Weinberg47ad4132001-10-06 23:11:27 +00002851 memcpy (buffer, token->val.str.text, token->val.str.len);
2852 buffer += token->val.str.len;
2853 break;
2854
Neil Booth3fef5b22000-05-08 22:22:49 +00002855 case SPELL_NONE:
John David Anglin0527bc42003-11-01 22:56:54 +00002856 cpp_error (pfile, CPP_DL_ICE,
2857 "unspellable token %s", TOKEN_NAME (token));
Neil Booth3fef5b22000-05-08 22:22:49 +00002858 break;
2859 }
2860
2861 return buffer;
2862}
2863
Neil Booth5d8ebbd2002-01-03 21:43:09 +00002864/* Returns TOKEN spelt as a null-terminated string. The string is
2865 freed when the reader is destroyed. Useful for diagnostics. */
Neil Booth93c803682000-10-28 17:59:06 +00002866unsigned char *
Zack Weinberg6cf87ca2003-06-17 06:17:44 +00002867cpp_token_as_text (cpp_reader *pfile, const cpp_token *token)
Neil Booth59325652003-04-24 20:03:57 +00002868{
2869 unsigned int len = cpp_token_len (token) + 1;
Neil Boothece54d52001-09-28 09:40:22 +00002870 unsigned char *start = _cpp_unaligned_alloc (pfile, len), *end;
Zack Weinberg041c3192000-07-04 01:58:21 +00002871
Geoffrey Keating47e20492005-03-12 10:44:06 +00002872 end = cpp_spell_token (pfile, token, start, false);
Neil Booth93c803682000-10-28 17:59:06 +00002873 end[0] = '\0';
Zack Weinberg041c3192000-07-04 01:58:21 +00002874
Neil Booth93c803682000-10-28 17:59:06 +00002875 return start;
Zack Weinberg041c3192000-07-04 01:58:21 +00002876}
2877
Manuel López-Ibáñezcfc93532009-04-22 15:32:18 +00002878/* Returns a pointer to a string which spells the token defined by
2879 TYPE and FLAGS. Used by C front ends, which really should move to
2880 using cpp_token_as_text. */
Neil Booth93c803682000-10-28 17:59:06 +00002881const char *
Manuel López-Ibáñezcfc93532009-04-22 15:32:18 +00002882cpp_type2name (enum cpp_ttype type, unsigned char flags)
Zack Weinberg041c3192000-07-04 01:58:21 +00002883{
Manuel López-Ibáñezcfc93532009-04-22 15:32:18 +00002884 if (flags & DIGRAPH)
2885 return (const char *) cpp_digraph2name (type);
2886 else if (flags & NAMED_OP)
2887 return cpp_named_operator2name (type);
2888
Neil Booth93c803682000-10-28 17:59:06 +00002889 return (const char *) token_spellings[type].name;
Zack Weinberg041c3192000-07-04 01:58:21 +00002890}
2891
Neil Booth4ed5bcf2001-09-24 22:53:12 +00002892/* Writes the spelling of token to FP, without any preceding space.
2893 Separated from cpp_spell_token for efficiency - to avoid stdio
2894 double-buffering. */
Zack Weinberg041c3192000-07-04 01:58:21 +00002895void
Zack Weinberg6cf87ca2003-06-17 06:17:44 +00002896cpp_output_token (const cpp_token *token, FILE *fp)
Zack Weinberg041c3192000-07-04 01:58:21 +00002897{
Neil Booth93c803682000-10-28 17:59:06 +00002898 switch (TOKEN_SPELL (token))
Zack Weinberg041c3192000-07-04 01:58:21 +00002899 {
Neil Booth93c803682000-10-28 17:59:06 +00002900 case SPELL_OPERATOR:
2901 {
2902 const unsigned char *spelling;
Zack Weinberg3b681e92001-09-28 07:00:27 +00002903 int c;
Neil Booth93c803682000-10-28 17:59:06 +00002904
2905 if (token->flags & DIGRAPH)
Manuel López-Ibáñezcfc93532009-04-22 15:32:18 +00002906 spelling = cpp_digraph2name (token->type);
Neil Booth93c803682000-10-28 17:59:06 +00002907 else if (token->flags & NAMED_OP)
2908 goto spell_ident;
2909 else
2910 spelling = TOKEN_NAME (token);
2911
Zack Weinberg3b681e92001-09-28 07:00:27 +00002912 c = *spelling;
2913 do
2914 putc (c, fp);
2915 while ((c = *++spelling) != '\0');
Neil Booth93c803682000-10-28 17:59:06 +00002916 }
2917 break;
2918
2919 spell_ident:
2920 case SPELL_IDENT:
Geoffrey Keating47e20492005-03-12 10:44:06 +00002921 {
2922 size_t i;
Joseph Myers9a0c6182009-05-10 15:27:32 +01002923 const unsigned char * name = NODE_NAME (token->val.node.node);
Geoffrey Keating47e20492005-03-12 10:44:06 +00002924
Joseph Myers9a0c6182009-05-10 15:27:32 +01002925 for (i = 0; i < NODE_LEN (token->val.node.node); i++)
Geoffrey Keating47e20492005-03-12 10:44:06 +00002926 if (name[i] & ~0x7F)
2927 {
2928 unsigned char buffer[10];
2929 i += utf8_to_ucn (buffer, name + i) - 1;
2930 fwrite (buffer, 1, 10, fp);
2931 }
2932 else
Joseph Myers9a0c6182009-05-10 15:27:32 +01002933 fputc (NODE_NAME (token->val.node.node)[i], fp);
Geoffrey Keating47e20492005-03-12 10:44:06 +00002934 }
2935 break;
Neil Booth93c803682000-10-28 17:59:06 +00002936
Neil Booth6338b352003-04-23 22:44:06 +00002937 case SPELL_LITERAL:
Zack Weinberg47ad4132001-10-06 23:11:27 +00002938 fwrite (token->val.str.text, 1, token->val.str.len, fp);
2939 break;
2940
Neil Booth93c803682000-10-28 17:59:06 +00002941 case SPELL_NONE:
2942 /* An error, most probably. */
2943 break;
Zack Weinberg041c3192000-07-04 01:58:21 +00002944 }
Zack Weinberg041c3192000-07-04 01:58:21 +00002945}
2946
Neil Booth93c803682000-10-28 17:59:06 +00002947/* Compare two tokens. */
2948int
Zack Weinberg6cf87ca2003-06-17 06:17:44 +00002949_cpp_equiv_tokens (const cpp_token *a, const cpp_token *b)
Neil Booth93c803682000-10-28 17:59:06 +00002950{
2951 if (a->type == b->type && a->flags == b->flags)
2952 switch (TOKEN_SPELL (a))
2953 {
2954 default: /* Keep compiler happy. */
2955 case SPELL_OPERATOR:
Joseph Myers9a0c6182009-05-10 15:27:32 +01002956 /* token_no is used to track where multiple consecutive ##
Joseph Myersaa508502009-04-19 18:10:56 +01002957 tokens were originally located. */
Joseph Myers9a0c6182009-05-10 15:27:32 +01002958 return (a->type != CPP_PASTE || a->val.token_no == b->val.token_no);
Neil Booth93c803682000-10-28 17:59:06 +00002959 case SPELL_NONE:
Joseph Myers9a0c6182009-05-10 15:27:32 +01002960 return (a->type != CPP_MACRO_ARG
Joseph Myersbe5ffc52014-11-06 21:08:52 +00002961 || (a->val.macro_arg.arg_no == b->val.macro_arg.arg_no
2962 && a->val.macro_arg.spelling == b->val.macro_arg.spelling));
Neil Booth93c803682000-10-28 17:59:06 +00002963 case SPELL_IDENT:
Joseph Myersbe5ffc52014-11-06 21:08:52 +00002964 return (a->val.node.node == b->val.node.node
2965 && a->val.node.spelling == b->val.node.spelling);
Neil Booth6338b352003-04-23 22:44:06 +00002966 case SPELL_LITERAL:
Neil Booth93c803682000-10-28 17:59:06 +00002967 return (a->val.str.len == b->val.str.len
2968 && !memcmp (a->val.str.text, b->val.str.text,
2969 a->val.str.len));
2970 }
2971
2972 return 0;
2973}
2974
Neil Booth93c803682000-10-28 17:59:06 +00002975/* Returns nonzero if a space should be inserted to avoid an
2976 accidental token paste for output. For simplicity, it is
2977 conservative, and occasionally advises a space where one is not
2978 needed, e.g. "." and ".2". */
Neil Booth93c803682000-10-28 17:59:06 +00002979int
Zack Weinberg6cf87ca2003-06-17 06:17:44 +00002980cpp_avoid_paste (cpp_reader *pfile, const cpp_token *token1,
2981 const cpp_token *token2)
Zack Weinberg041c3192000-07-04 01:58:21 +00002982{
Neil Booth93c803682000-10-28 17:59:06 +00002983 enum cpp_ttype a = token1->type, b = token2->type;
2984 cppchar_t c;
Zack Weinberg041c3192000-07-04 01:58:21 +00002985
Neil Booth93c803682000-10-28 17:59:06 +00002986 if (token1->flags & NAMED_OP)
2987 a = CPP_NAME;
2988 if (token2->flags & NAMED_OP)
2989 b = CPP_NAME;
Zack Weinberg041c3192000-07-04 01:58:21 +00002990
Neil Booth93c803682000-10-28 17:59:06 +00002991 c = EOF;
2992 if (token2->flags & DIGRAPH)
John David Anglin37b85242001-03-02 01:11:50 +00002993 c = digraph_spellings[(int) b - (int) CPP_FIRST_DIGRAPH][0];
Neil Booth93c803682000-10-28 17:59:06 +00002994 else if (token_spellings[b].category == SPELL_OPERATOR)
2995 c = token_spellings[b].name[0];
Zack Weinberg417f3e32000-07-11 23:20:53 +00002996
Neil Booth93c803682000-10-28 17:59:06 +00002997 /* Quickly get everything that can paste with an '='. */
John David Anglin37b85242001-03-02 01:11:50 +00002998 if ((int) a <= (int) CPP_LAST_EQ && c == '=')
Zack Weinberg041c3192000-07-04 01:58:21 +00002999 return 1;
3000
Neil Booth93c803682000-10-28 17:59:06 +00003001 switch (a)
3002 {
Steve Ellceyb52dbbf2006-08-14 23:13:54 +00003003 case CPP_GREATER: return c == '>';
3004 case CPP_LESS: return c == '<' || c == '%' || c == ':';
Neil Booth93c803682000-10-28 17:59:06 +00003005 case CPP_PLUS: return c == '+';
3006 case CPP_MINUS: return c == '-' || c == '>';
3007 case CPP_DIV: return c == '/' || c == '*'; /* Comments. */
3008 case CPP_MOD: return c == ':' || c == '>';
3009 case CPP_AND: return c == '&';
3010 case CPP_OR: return c == '|';
3011 case CPP_COLON: return c == ':' || c == '>';
3012 case CPP_DEREF: return c == '*';
Neil Booth26ec42e2001-01-28 11:22:23 +00003013 case CPP_DOT: return c == '.' || c == '%' || b == CPP_NUMBER;
Neil Booth93c803682000-10-28 17:59:06 +00003014 case CPP_HASH: return c == '#' || c == '%'; /* Digraph form. */
3015 case CPP_NAME: return ((b == CPP_NUMBER
3016 && name_p (pfile, &token2->val.str))
3017 || b == CPP_NAME
3018 || b == CPP_CHAR || b == CPP_STRING); /* L */
3019 case CPP_NUMBER: return (b == CPP_NUMBER || b == CPP_NAME
3020 || c == '.' || c == '+' || c == '-');
Neil Booth1613e522003-04-20 07:29:23 +00003021 /* UCNs */
Neil Booth10676942003-04-22 19:28:00 +00003022 case CPP_OTHER: return ((token1->val.str.text[0] == '\\'
3023 && b == CPP_NAME)
Neil Booth1613e522003-04-20 07:29:23 +00003024 || (CPP_OPTION (pfile, objc)
Neil Booth10676942003-04-22 19:28:00 +00003025 && token1->val.str.text[0] == '@'
Neil Booth1613e522003-04-20 07:29:23 +00003026 && (b == CPP_NAME || b == CPP_STRING)));
Jakub Jelinek87e356b2013-07-10 18:40:49 +02003027 case CPP_STRING:
3028 case CPP_WSTRING:
3029 case CPP_UTF8STRING:
3030 case CPP_STRING16:
3031 case CPP_STRING32: return (CPP_OPTION (pfile, user_literals)
3032 && (b == CPP_NAME
3033 || (TOKEN_SPELL (token2) == SPELL_LITERAL
3034 && ISIDST (token2->val.str.text[0]))));
3035
Neil Booth93c803682000-10-28 17:59:06 +00003036 default: break;
3037 }
Zack Weinberg041c3192000-07-04 01:58:21 +00003038
3039 return 0;
3040}
3041
Neil Booth93c803682000-10-28 17:59:06 +00003042/* Output all the remaining tokens on the current line, and a newline
Neil Booth4ed5bcf2001-09-24 22:53:12 +00003043 character, to FP. Leading whitespace is removed. If there are
3044 macros, special token padding is not performed. */
Neil Booth93c803682000-10-28 17:59:06 +00003045void
Zack Weinberg6cf87ca2003-06-17 06:17:44 +00003046cpp_output_line (cpp_reader *pfile, FILE *fp)
Zack Weinberg041c3192000-07-04 01:58:21 +00003047{
Neil Booth4ed5bcf2001-09-24 22:53:12 +00003048 const cpp_token *token;
Zack Weinberg041c3192000-07-04 01:58:21 +00003049
Neil Booth4ed5bcf2001-09-24 22:53:12 +00003050 token = cpp_get_token (pfile);
3051 while (token->type != CPP_EOF)
Zack Weinberg6ead1e92000-07-31 23:47:19 +00003052 {
Neil Booth4ed5bcf2001-09-24 22:53:12 +00003053 cpp_output_token (token, fp);
3054 token = cpp_get_token (pfile);
3055 if (token->flags & PREV_WHITE)
3056 putc (' ', fp);
Zack Weinberg6ead1e92000-07-31 23:47:19 +00003057 }
3058
Neil Booth93c803682000-10-28 17:59:06 +00003059 putc ('\n', fp);
Zack Weinberg041c3192000-07-04 01:58:21 +00003060}
3061
Tom Tromey5d6342e2008-05-21 21:52:57 +00003062/* Return a string representation of all the remaining tokens on the
3063 current line. The result is allocated using xmalloc and must be
3064 freed by the caller. */
3065unsigned char *
3066cpp_output_line_to_string (cpp_reader *pfile, const unsigned char *dir_name)
3067{
3068 const cpp_token *token;
3069 unsigned int out = dir_name ? ustrlen (dir_name) : 0;
3070 unsigned int alloced = 120 + out;
3071 unsigned char *result = (unsigned char *) xmalloc (alloced);
3072
3073 /* If DIR_NAME is empty, there are no initial contents. */
3074 if (dir_name)
3075 {
3076 sprintf ((char *) result, "#%s ", dir_name);
3077 out += 2;
3078 }
3079
3080 token = cpp_get_token (pfile);
3081 while (token->type != CPP_EOF)
3082 {
3083 unsigned char *last;
3084 /* Include room for a possible space and the terminating nul. */
3085 unsigned int len = cpp_token_len (token) + 2;
3086
3087 if (out + len > alloced)
3088 {
3089 alloced *= 2;
3090 if (out + len > alloced)
3091 alloced = out + len;
3092 result = (unsigned char *) xrealloc (result, alloced);
3093 }
3094
3095 last = cpp_spell_token (pfile, token, &result[out], 0);
3096 out = last - result;
3097
3098 token = cpp_get_token (pfile);
3099 if (token->flags & PREV_WHITE)
3100 result[out++] = ' ';
3101 }
3102
3103 result[out] = '\0';
3104 return result;
3105}
3106
Neil Booth1e013d22001-09-26 21:44:35 +00003107/* Memory buffers. Changing these three constants can have a dramatic
3108 effect on performance. The values here are reasonable defaults,
3109 but might be tuned. If you adjust them, be sure to test across a
3110 range of uses of cpplib, including heavy nested function-like macro
3111 expansion. Also check the change in peak memory usage (NJAMD is a
3112 good tool for this). */
3113#define MIN_BUFF_SIZE 8000
Neil Booth87062812001-10-20 09:00:53 +00003114#define BUFF_SIZE_UPPER_BOUND(MIN_SIZE) (MIN_BUFF_SIZE + (MIN_SIZE) * 3 / 2)
Neil Booth1e013d22001-09-26 21:44:35 +00003115#define EXTENDED_BUFF_SIZE(BUFF, MIN_EXTRA) \
3116 (MIN_EXTRA + ((BUFF)->limit - (BUFF)->cur) * 2)
Neil Booth93c803682000-10-28 17:59:06 +00003117
Neil Booth87062812001-10-20 09:00:53 +00003118#if MIN_BUFF_SIZE > BUFF_SIZE_UPPER_BOUND (0)
3119 #error BUFF_SIZE_UPPER_BOUND must be at least as large as MIN_BUFF_SIZE!
3120#endif
3121
Neil Boothc9e7a602001-09-27 12:59:38 +00003122/* Create a new allocation buffer. Place the control block at the end
3123 of the buffer, so that buffer overflows will cause immediate chaos. */
Neil Boothb8af0ca2001-09-26 17:52:50 +00003124static _cpp_buff *
Zack Weinberg6cf87ca2003-06-17 06:17:44 +00003125new_buff (size_t len)
Neil Boothb8af0ca2001-09-26 17:52:50 +00003126{
3127 _cpp_buff *result;
Neil Boothece54d52001-09-28 09:40:22 +00003128 unsigned char *base;
Neil Boothb8af0ca2001-09-26 17:52:50 +00003129
Neil Booth1e013d22001-09-26 21:44:35 +00003130 if (len < MIN_BUFF_SIZE)
3131 len = MIN_BUFF_SIZE;
Neil Boothc70f6ed2002-06-07 06:26:32 +00003132 len = CPP_ALIGN (len);
Neil Boothb8af0ca2001-09-26 17:52:50 +00003133
Jakub Jelinek1a80db92013-02-28 10:58:47 +01003134#ifdef ENABLE_VALGRIND_CHECKING
3135 /* Valgrind warns about uses of interior pointers, so put _cpp_buff
3136 struct first. */
3137 size_t slen = CPP_ALIGN2 (sizeof (_cpp_buff), 2 * DEFAULT_ALIGNMENT);
3138 base = XNEWVEC (unsigned char, len + slen);
3139 result = (_cpp_buff *) base;
3140 base += slen;
3141#else
Gabriel Dos Reisc3f829c2005-05-28 15:52:48 +00003142 base = XNEWVEC (unsigned char, len + sizeof (_cpp_buff));
Neil Boothb8af0ca2001-09-26 17:52:50 +00003143 result = (_cpp_buff *) (base + len);
Jakub Jelinek1a80db92013-02-28 10:58:47 +01003144#endif
Neil Boothb8af0ca2001-09-26 17:52:50 +00003145 result->base = base;
3146 result->cur = base;
3147 result->limit = base + len;
3148 result->next = NULL;
3149 return result;
3150}
3151
3152/* Place a chain of unwanted allocation buffers on the free list. */
3153void
Zack Weinberg6cf87ca2003-06-17 06:17:44 +00003154_cpp_release_buff (cpp_reader *pfile, _cpp_buff *buff)
Neil Boothb8af0ca2001-09-26 17:52:50 +00003155{
3156 _cpp_buff *end = buff;
3157
3158 while (end->next)
3159 end = end->next;
3160 end->next = pfile->free_buffs;
3161 pfile->free_buffs = buff;
3162}
3163
3164/* Return a free buffer of size at least MIN_SIZE. */
3165_cpp_buff *
Zack Weinberg6cf87ca2003-06-17 06:17:44 +00003166_cpp_get_buff (cpp_reader *pfile, size_t min_size)
Neil Boothb8af0ca2001-09-26 17:52:50 +00003167{
3168 _cpp_buff *result, **p;
3169
3170 for (p = &pfile->free_buffs;; p = &(*p)->next)
3171 {
Neil Booth61420882001-09-28 13:25:38 +00003172 size_t size;
Neil Booth1e013d22001-09-26 21:44:35 +00003173
3174 if (*p == NULL)
Neil Boothb8af0ca2001-09-26 17:52:50 +00003175 return new_buff (min_size);
Neil Booth1e013d22001-09-26 21:44:35 +00003176 result = *p;
3177 size = result->limit - result->base;
3178 /* Return a buffer that's big enough, but don't waste one that's
3179 way too big. */
Richard Earnshaw34f52712001-10-17 16:20:04 +00003180 if (size >= min_size && size <= BUFF_SIZE_UPPER_BOUND (min_size))
Neil Boothb8af0ca2001-09-26 17:52:50 +00003181 break;
3182 }
3183
3184 *p = result->next;
3185 result->next = NULL;
3186 result->cur = result->base;
3187 return result;
3188}
3189
Kazu Hirata4fe9b912001-10-09 06:03:16 +00003190/* Creates a new buffer with enough space to hold the uncommitted
Neil Booth8c3b2692001-09-30 10:03:11 +00003191 remaining bytes of BUFF, and at least MIN_EXTRA more bytes. Copies
3192 the excess bytes to the new buffer. Chains the new buffer after
3193 BUFF, and returns the new buffer. */
Neil Boothb8af0ca2001-09-26 17:52:50 +00003194_cpp_buff *
Zack Weinberg6cf87ca2003-06-17 06:17:44 +00003195_cpp_append_extend_buff (cpp_reader *pfile, _cpp_buff *buff, size_t min_extra)
Neil Boothb8af0ca2001-09-26 17:52:50 +00003196{
Neil Booth61420882001-09-28 13:25:38 +00003197 size_t size = EXTENDED_BUFF_SIZE (buff, min_extra);
Neil Booth8c3b2692001-09-30 10:03:11 +00003198 _cpp_buff *new_buff = _cpp_get_buff (pfile, size);
Neil Boothb8af0ca2001-09-26 17:52:50 +00003199
Neil Booth8c3b2692001-09-30 10:03:11 +00003200 buff->next = new_buff;
3201 memcpy (new_buff->base, buff->cur, BUFF_ROOM (buff));
3202 return new_buff;
3203}
3204
Kazu Hirata4fe9b912001-10-09 06:03:16 +00003205/* Creates a new buffer with enough space to hold the uncommitted
Neil Booth8c3b2692001-09-30 10:03:11 +00003206 remaining bytes of the buffer pointed to by BUFF, and at least
3207 MIN_EXTRA more bytes. Copies the excess bytes to the new buffer.
3208 Chains the new buffer before the buffer pointed to by BUFF, and
3209 updates the pointer to point to the new buffer. */
3210void
Zack Weinberg6cf87ca2003-06-17 06:17:44 +00003211_cpp_extend_buff (cpp_reader *pfile, _cpp_buff **pbuff, size_t min_extra)
Neil Booth8c3b2692001-09-30 10:03:11 +00003212{
3213 _cpp_buff *new_buff, *old_buff = *pbuff;
3214 size_t size = EXTENDED_BUFF_SIZE (old_buff, min_extra);
3215
3216 new_buff = _cpp_get_buff (pfile, size);
3217 memcpy (new_buff->base, old_buff->cur, BUFF_ROOM (old_buff));
3218 new_buff->next = old_buff;
3219 *pbuff = new_buff;
Neil Boothb8af0ca2001-09-26 17:52:50 +00003220}
3221
3222/* Free a chain of buffers starting at BUFF. */
3223void
Andreas Jaeger5671bf22003-07-07 21:11:59 +02003224_cpp_free_buff (_cpp_buff *buff)
Neil Boothb8af0ca2001-09-26 17:52:50 +00003225{
3226 _cpp_buff *next;
3227
3228 for (; buff; buff = next)
3229 {
3230 next = buff->next;
Jakub Jelinek1a80db92013-02-28 10:58:47 +01003231#ifdef ENABLE_VALGRIND_CHECKING
3232 free (buff);
3233#else
Neil Boothb8af0ca2001-09-26 17:52:50 +00003234 free (buff->base);
Jakub Jelinek1a80db92013-02-28 10:58:47 +01003235#endif
Neil Boothb8af0ca2001-09-26 17:52:50 +00003236 }
3237}
Neil Booth93c803682000-10-28 17:59:06 +00003238
Neil Boothece54d52001-09-28 09:40:22 +00003239/* Allocate permanent, unaligned storage of length LEN. */
3240unsigned char *
Zack Weinberg6cf87ca2003-06-17 06:17:44 +00003241_cpp_unaligned_alloc (cpp_reader *pfile, size_t len)
Neil Boothece54d52001-09-28 09:40:22 +00003242{
3243 _cpp_buff *buff = pfile->u_buff;
3244 unsigned char *result = buff->cur;
3245
3246 if (len > (size_t) (buff->limit - result))
3247 {
3248 buff = _cpp_get_buff (pfile, len);
3249 buff->next = pfile->u_buff;
3250 pfile->u_buff = buff;
3251 result = buff->cur;
3252 }
3253
3254 buff->cur = result + len;
3255 return result;
3256}
3257
Neil Booth87062812001-10-20 09:00:53 +00003258/* Allocate permanent, unaligned storage of length LEN from a_buff.
3259 That buffer is used for growing allocations when saving macro
3260 replacement lists in a #define, and when parsing an answer to an
3261 assertion in #assert, #unassert or #if (and therefore possibly
3262 whilst expanding macros). It therefore must not be used by any
3263 code that they might call: specifically the lexer and the guts of
3264 the macro expander.
3265
3266 All existing other uses clearly fit this restriction: storing
3267 registered pragmas during initialization. */
Neil Booth93c803682000-10-28 17:59:06 +00003268unsigned char *
Zack Weinberg6cf87ca2003-06-17 06:17:44 +00003269_cpp_aligned_alloc (cpp_reader *pfile, size_t len)
Neil Booth93c803682000-10-28 17:59:06 +00003270{
Neil Booth8c3b2692001-09-30 10:03:11 +00003271 _cpp_buff *buff = pfile->a_buff;
3272 unsigned char *result = buff->cur;
Neil Booth93c803682000-10-28 17:59:06 +00003273
Neil Booth8c3b2692001-09-30 10:03:11 +00003274 if (len > (size_t) (buff->limit - result))
Zack Weinberg041c3192000-07-04 01:58:21 +00003275 {
Neil Booth8c3b2692001-09-30 10:03:11 +00003276 buff = _cpp_get_buff (pfile, len);
3277 buff->next = pfile->a_buff;
3278 pfile->a_buff = buff;
3279 result = buff->cur;
Zack Weinberg041c3192000-07-04 01:58:21 +00003280 }
3281
Neil Booth8c3b2692001-09-30 10:03:11 +00003282 buff->cur = result + len;
Neil Booth93c803682000-10-28 17:59:06 +00003283 return result;
Zack Weinberg041c3192000-07-04 01:58:21 +00003284}
Geoffrey Keatingd8044162004-06-09 20:10:13 +00003285
3286/* Say which field of TOK is in use. */
3287
3288enum cpp_token_fld_kind
Jakub Jelinekc26302d2013-07-10 18:49:24 +02003289cpp_token_val_index (const cpp_token *tok)
Geoffrey Keatingd8044162004-06-09 20:10:13 +00003290{
3291 switch (TOKEN_SPELL (tok))
3292 {
3293 case SPELL_IDENT:
3294 return CPP_TOKEN_FLD_NODE;
3295 case SPELL_LITERAL:
3296 return CPP_TOKEN_FLD_STR;
Joseph Myersaa508502009-04-19 18:10:56 +01003297 case SPELL_OPERATOR:
3298 if (tok->type == CPP_PASTE)
Joseph Myers9a0c6182009-05-10 15:27:32 +01003299 return CPP_TOKEN_FLD_TOKEN_NO;
Joseph Myersaa508502009-04-19 18:10:56 +01003300 else
3301 return CPP_TOKEN_FLD_NONE;
Geoffrey Keatingd8044162004-06-09 20:10:13 +00003302 case SPELL_NONE:
3303 if (tok->type == CPP_MACRO_ARG)
3304 return CPP_TOKEN_FLD_ARG_NO;
3305 else if (tok->type == CPP_PADDING)
3306 return CPP_TOKEN_FLD_SOURCE;
Zack Weinberg21b11492004-09-09 19:16:56 +00003307 else if (tok->type == CPP_PRAGMA)
Richard Hendersonbc4071d2006-01-04 08:33:38 -08003308 return CPP_TOKEN_FLD_PRAGMA;
Geoffrey Keatingd8044162004-06-09 20:10:13 +00003309 /* else fall through */
3310 default:
3311 return CPP_TOKEN_FLD_NONE;
3312 }
3313}
Gabriel Charettee3dfef42011-08-22 20:41:07 +00003314
3315/* All tokens lexed in R after calling this function will be forced to have
3316 their source_location the same as the location referenced by P, until
3317 cpp_stop_forcing_token_locations is called for R. */
3318
3319void
3320cpp_force_token_locations (cpp_reader *r, source_location *p)
3321{
3322 r->forced_token_location_p = p;
3323}
3324
3325/* Go back to assigning locations naturally for lexed tokens. */
3326
3327void
3328cpp_stop_forcing_token_locations (cpp_reader *r)
3329{
3330 r->forced_token_location_p = NULL;
3331}