Blame - libcpp/lex.c - toolchain/gcc

blob: 8af09e50295535048b2f9f7105bd24ea2ab968e1 [file] [log] [blame]

Zack Weinberg	45b966d	2000-03-13 22:01:08 +0000	[diff] [blame]	1	/* CPP Library - lexical analysis.
Jakub Jelinek	cbe34bb	2017-01-01 13:07:43 +0100	[diff] [blame]	2	Copyright (C) 2000-2017 Free Software Foundation, Inc.
Zack Weinberg	45b966d	2000-03-13 22:01:08 +0000	[diff] [blame]	3	Contributed by Per Bothner, 1994-95.
				4	Based on CCCP program by Paul Rubin, June 1986
				5	Adapted to ANSI C, Richard Stallman, Jan 1987
				6	Broken out to separate file, Zack Weinberg, Mar 2000
				7
				8	This program is free software; you can redistribute it and/or modify it
				9	under the terms of the GNU General Public License as published by the
Jakub Jelinek	748086b	2009-04-09 17:00:19 +0200	[diff] [blame]	10	Free Software Foundation; either version 3, or (at your option) any
Zack Weinberg	45b966d	2000-03-13 22:01:08 +0000	[diff] [blame]	11	later version.
				12
				13	This program is distributed in the hope that it will be useful,
				14	but WITHOUT ANY WARRANTY; without even the implied warranty of
				15	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
				16	GNU General Public License for more details.
				17
				18	You should have received a copy of the GNU General Public License
Jakub Jelinek	748086b	2009-04-09 17:00:19 +0200	[diff] [blame]	19	along with this program; see the file COPYING3. If not see
				20	<http://www.gnu.org/licenses/>. */
Zack Weinberg	45b966d	2000-03-13 22:01:08 +0000	[diff] [blame]	21
				22	#include "config.h"
				23	#include "system.h"
Zack Weinberg	45b966d	2000-03-13 22:01:08 +0000	[diff] [blame]	24	#include "cpplib.h"
Paolo Bonzini	4f4e53dd	2004-05-24 10:50:45 +0000	[diff] [blame]	25	#include "internal.h"
Zack Weinberg	45b966d	2000-03-13 22:01:08 +0000	[diff] [blame]	26
Neil Booth	93c80368	2000-10-28 17:59:06 +0000	[diff] [blame]	27	enum spell_type
Zack Weinberg	f9a0e96	2000-07-13 02:32:41 +0000	[diff] [blame]	28	{
Neil Booth	93c80368	2000-10-28 17:59:06 +0000	[diff] [blame]	29	SPELL_OPERATOR = 0,
Neil Booth	93c80368	2000-10-28 17:59:06 +0000	[diff] [blame]	30	SPELL_IDENT,
Neil Booth	6338b35	2003-04-23 22:44:06 +0000	[diff] [blame]	31	SPELL_LITERAL,
Neil Booth	93c80368	2000-10-28 17:59:06 +0000	[diff] [blame]	32	SPELL_NONE
Zack Weinberg	f9a0e96	2000-07-13 02:32:41 +0000	[diff] [blame]	33	};
				34
Neil Booth	93c80368	2000-10-28 17:59:06 +0000	[diff] [blame]	35	struct token_spelling
Zack Weinberg	f9a0e96	2000-07-13 02:32:41 +0000	[diff] [blame]	36	{
Neil Booth	93c80368	2000-10-28 17:59:06 +0000	[diff] [blame]	37	enum spell_type category;
				38	const unsigned char *name;
Zack Weinberg	f9a0e96	2000-07-13 02:32:41 +0000	[diff] [blame]	39	};
				40
Zack Weinberg	8206c79	2001-10-11 21:21:57 +0000	[diff] [blame]	41	static const unsigned char *const digraph_spellings[] =
Kris Van Hees	b6baa67	2008-04-18 13:58:08 +0000	[diff] [blame]	42	{ UC"%:", UC"%:%:", UC"<:", UC":>", UC"<%", UC"%>" };
Neil Booth	93c80368	2000-10-28 17:59:06 +0000	[diff] [blame]	43
Kris Van Hees	b6baa67	2008-04-18 13:58:08 +0000	[diff] [blame]	44	#define OP(e, s) { SPELL_OPERATOR, UC s },
				45	#define TK(e, s) { SPELL_ ## s, UC #e },
Zack Weinberg	8206c79	2001-10-11 21:21:57 +0000	[diff] [blame]	46	static const struct token_spelling token_spellings[N_TTYPES] = { TTYPE_TABLE };
Neil Booth	93c80368	2000-10-28 17:59:06 +0000	[diff] [blame]	47	#undef OP
				48	#undef TK
				49
				50	#define TOKEN_SPELL(token) (token_spellings[(token)->type].category)
				51	#define TOKEN_NAME(token) (token_spellings[(token)->type].name)
Zack Weinberg	f2d5f0c	2000-04-14 23:29:45 +0000	[diff] [blame]	52
Zack Weinberg	6cf87ca	2003-06-17 06:17:44 +0000	[diff] [blame]	53	static void add_line_note (cpp_buffer , const uchar , unsigned int);
				54	static int skip_line_comment (cpp_reader *);
				55	static void skip_whitespace (cpp_reader *, cppchar_t);
Zack Weinberg	6cf87ca	2003-06-17 06:17:44 +0000	[diff] [blame]	56	static void lex_string (cpp_reader , cpp_token , const uchar *);
				57	static void save_comment (cpp_reader , cpp_token , const uchar *, cppchar_t);
Matthew Gingell	631d0d3	2008-10-05 12:35:36 +0000	[diff] [blame]	58	static void store_comment (cpp_reader , cpp_token );
Zack Weinberg	6cf87ca	2003-06-17 06:17:44 +0000	[diff] [blame]	59	static void create_literal (cpp_reader , cpp_token , const uchar *,
				60	unsigned int, enum cpp_ttype);
				61	static bool warn_in_comment (cpp_reader , _cpp_line_note );
				62	static int name_p (cpp_reader , const cpp_string );
Zack Weinberg	6cf87ca	2003-06-17 06:17:44 +0000	[diff] [blame]	63	static tokenrun next_tokenrun (tokenrun );
Neil Booth	0d9f234	2000-09-18 18:43:05 +0000	[diff] [blame]	64
Zack Weinberg	6cf87ca	2003-06-17 06:17:44 +0000	[diff] [blame]	65	static _cpp_buff *new_buff (size_t);
Zack Weinberg	15dad1d	2000-05-18 15:55:46 +0000	[diff] [blame]	66
Neil Booth	9d10c9a	2003-03-06 23:12:30 +0000	[diff] [blame]	67
Zack Weinberg	6d2c204	2000-04-30 17:30:25 +0000	[diff] [blame]	68	/* Utility routine:
Zack Weinberg	6d2c204	2000-04-30 17:30:25 +0000	[diff] [blame]	69
Zack Weinberg	bfb9dc7	2000-07-08 19:00:39 +0000	[diff] [blame]	70	Compares, the token TOKEN to the NUL-terminated string STRING.
				71	TOKEN must be a CPP_NAME. Returns 1 for equal, 0 for unequal. */
Zack Weinberg	6d2c204	2000-04-30 17:30:25 +0000	[diff] [blame]	72	int
Zack Weinberg	6cf87ca	2003-06-17 06:17:44 +0000	[diff] [blame]	73	cpp_ideq (const cpp_token token, const char string)
Zack Weinberg	6d2c204	2000-04-30 17:30:25 +0000	[diff] [blame]	74	{
Zack Weinberg	bfb9dc7	2000-07-08 19:00:39 +0000	[diff] [blame]	75	if (token->type != CPP_NAME)
Zack Weinberg	6d2c204	2000-04-30 17:30:25 +0000	[diff] [blame]	76	return 0;
Zack Weinberg	bfb9dc7	2000-07-08 19:00:39 +0000	[diff] [blame]	77
Joseph Myers	9a0c618	2009-05-10 15:27:32 +0100	[diff] [blame]	78	return !ustrcmp (NODE_NAME (token->val.node.node), (const uchar *) string);
Zack Weinberg	6d2c204	2000-04-30 17:30:25 +0000	[diff] [blame]	79	}
				80
Neil Booth	26aea07	2003-04-19 00:22:51 +0000	[diff] [blame]	81	/* Record a note TYPE at byte POS into the current cleaned logical
				82	line. */
Neil Booth	8706281	2001-10-20 09:00:53 +0000	[diff] [blame]	83	static void
Zack Weinberg	6cf87ca	2003-06-17 06:17:44 +0000	[diff] [blame]	84	add_line_note (cpp_buffer buffer, const uchar pos, unsigned int type)
Neil Booth	0d9f234	2000-09-18 18:43:05 +0000	[diff] [blame]	85	{
Neil Booth	26aea07	2003-04-19 00:22:51 +0000	[diff] [blame]	86	if (buffer->notes_used == buffer->notes_cap)
Zack Weinberg	c5a0473	2000-04-25 19:32:36 +0000	[diff] [blame]	87	{
Neil Booth	26aea07	2003-04-19 00:22:51 +0000	[diff] [blame]	88	buffer->notes_cap = buffer->notes_cap * 2 + 200;
Gabriel Dos Reis	c3f829c	2005-05-28 15:52:48 +0000	[diff] [blame]	89	buffer->notes = XRESIZEVEC (_cpp_line_note, buffer->notes,
				90	buffer->notes_cap);
Zack Weinberg	c5a0473	2000-04-25 19:32:36 +0000	[diff] [blame]	91	}
Neil Booth	0d9f234	2000-09-18 18:43:05 +0000	[diff] [blame]	92
Neil Booth	26aea07	2003-04-19 00:22:51 +0000	[diff] [blame]	93	buffer->notes[buffer->notes_used].pos = pos;
				94	buffer->notes[buffer->notes_used].type = type;
				95	buffer->notes_used++;
Zack Weinberg	c5a0473	2000-04-25 19:32:36 +0000	[diff] [blame]	96	}
				97
Richard Henderson	246a2fc	2010-08-21 12:05:40 -0700	[diff] [blame]	98
				99	/* Fast path to find line special characters using optimized character
				100	scanning algorithms. Anything complicated falls back to the slow
				101	path below. Since this loop is very hot it's worth doing these kinds
				102	of optimizations.
				103
				104	One of the paths through the ifdefs should provide
				105
				106	const uchar search_line_fast (const uchar s, const uchar *end);
				107
				108	Between S and END, search for \n, \r, \\, ?. Return a pointer to
				109	the found character.
				110
				111	Note that the last character of the buffer is always a newline,
				112	as forced by _cpp_convert_input. This fact can be used to avoid
				113	explicitly looking for the end of the buffer. */
				114
				115	/* Configure gives us an ifdef test. */
				116	#ifndef WORDS_BIGENDIAN
				117	#define WORDS_BIGENDIAN 0
				118	#endif
				119
				120	/* We'd like the largest integer that fits into a register. There's nothing
				121	in <stdint.h> that gives us that. For most hosts this is unsigned long,
				122	but MS decided on an LLP64 model. Thankfully when building with GCC we
				123	can get the "real" word size. */
				124	#ifdef __GNUC__
				125	typedef unsigned int word_type __attribute__((__mode__(__word__)));
				126	#else
				127	typedef unsigned long word_type;
				128	#endif
				129
				130	/* The code below is only expecting sizes 4 or 8.
				131	Die at compile-time if this expectation is violated. */
				132	typedef char check_word_type_size
				133	[(sizeof(word_type) == 8 \|\| sizeof(word_type) == 4) * 2 - 1];
				134
				135	/* Return X with the first N bytes forced to values that won't match one
				136	of the interesting characters. Note that NUL is not interesting. */
				137
				138	static inline word_type
				139	acc_char_mask_misalign (word_type val, unsigned int n)
				140	{
				141	word_type mask = -1;
				142	if (WORDS_BIGENDIAN)
				143	mask >>= n * 8;
				144	else
				145	mask <<= n * 8;
				146	return val & mask;
				147	}
				148
				149	/* Return X replicated to all byte positions within WORD_TYPE. */
				150
				151	static inline word_type
				152	acc_char_replicate (uchar x)
				153	{
				154	word_type ret;
				155
				156	ret = (x << 24) \| (x << 16) \| (x << 8) \| x;
				157	if (sizeof(word_type) == 8)
				158	ret = (ret << 16 << 16) \| ret;
				159	return ret;
				160	}
				161
				162	/* Return non-zero if some byte of VAL is (probably) C. */
				163
				164	static inline word_type
				165	acc_char_cmp (word_type val, word_type c)
				166	{
				167	#if defined(__GNUC__) && defined(__alpha__)
				168	/* We can get exact results using a compare-bytes instruction.
				169	Get (val == c) via (0 >= (val ^ c)). */
				170	return __builtin_alpha_cmpbge (0, val ^ c);
				171	#else
				172	word_type magic = 0x7efefefeU;
				173	if (sizeof(word_type) == 8)
				174	magic = (magic << 16 << 16) \| 0xfefefefeU;
				175	magic \|= 1;
				176
				177	val ^= c;
				178	return ((val + magic) ^ ~val) & ~magic;
				179	#endif
				180	}
				181
				182	/* Given the result of acc_char_cmp is non-zero, return the index of
				183	the found character. If this was a false positive, return -1. */
				184
				185	static inline int
				186	acc_char_index (word_type cmp ATTRIBUTE_UNUSED,
				187	word_type val ATTRIBUTE_UNUSED)
				188	{
				189	#if defined(__GNUC__) && defined(__alpha__) && !WORDS_BIGENDIAN
				190	/* The cmpbge instruction sets bits of the result corresponding to
				191	matches in the bytes with no false positives. */
				192	return __builtin_ctzl (cmp);
				193	#else
				194	unsigned int i;
				195
				196	/* ??? It would be nice to force unrolling here,
				197	and have all of these constants folded. */
				198	for (i = 0; i < sizeof(word_type); ++i)
				199	{
				200	uchar c;
				201	if (WORDS_BIGENDIAN)
				202	c = (val >> (sizeof(word_type) - i - 1) * 8) & 0xff;
				203	else
				204	c = (val >> i * 8) & 0xff;
				205
				206	if (c == '\n' \|\| c == '\r' \|\| c == '\\' \|\| c == '?')
				207	return i;
				208	}
				209
				210	return -1;
				211	#endif
				212	}
				213
				214	/* A version of the fast scanner using bit fiddling techniques.
				215
				216	For 32-bit words, one would normally perform 16 comparisons and
				217	16 branches. With this algorithm one performs 24 arithmetic
				218	operations and one branch. Whether this is faster with a 32-bit
				219	word size is going to be somewhat system dependent.
				220
				221	For 64-bit words, we eliminate twice the number of comparisons
				222	and branches without increasing the number of arithmetic operations.
				223	It's almost certainly going to be a win with 64-bit word size. */
				224
				225	static const uchar * search_line_acc_char (const uchar , const uchar )
				226	ATTRIBUTE_UNUSED;
				227
				228	static const uchar *
				229	search_line_acc_char (const uchar s, const uchar end ATTRIBUTE_UNUSED)
				230	{
				231	const word_type repl_nl = acc_char_replicate ('\n');
				232	const word_type repl_cr = acc_char_replicate ('\r');
				233	const word_type repl_bs = acc_char_replicate ('\\');
				234	const word_type repl_qm = acc_char_replicate ('?');
				235
				236	unsigned int misalign;
				237	const word_type *p;
				238	word_type val, t;
				239
				240	/* Align the buffer. Mask out any bytes from before the beginning. */
				241	p = (word_type *)((uintptr_t)s & -sizeof(word_type));
				242	val = *p;
				243	misalign = (uintptr_t)s & (sizeof(word_type) - 1);
				244	if (misalign)
				245	val = acc_char_mask_misalign (val, misalign);
				246
				247	/* Main loop. */
				248	while (1)
				249	{
				250	t = acc_char_cmp (val, repl_nl);
				251	t \|= acc_char_cmp (val, repl_cr);
				252	t \|= acc_char_cmp (val, repl_bs);
				253	t \|= acc_char_cmp (val, repl_qm);
				254
				255	if (__builtin_expect (t != 0, 0))
				256	{
				257	int i = acc_char_index (t, val);
				258	if (i >= 0)
				259	return (const uchar *)p + i;
				260	}
				261
				262	val = *++p;
				263	}
				264	}
				265
Rainer Orth	d9f069a	2014-04-22 12:30:59 +0000	[diff] [blame]	266	/* Disable on Solaris 2/x86 until the following problem can be properly
Rainer Orth	789d73c	2010-08-24 17:23:35 +0000	[diff] [blame]	267	autoconfed:
				268
Rainer Orth	789d73c	2010-08-24 17:23:35 +0000	[diff] [blame]	269	The Solaris 10+ assembler tags objects with the instruction set
				270	extensions used, so SSE4.2 executables cannot run on machines that
				271	don't support that extension. */
				272
Uros Bizjak	1b6b13f	2014-11-20 13:10:12 +0100	[diff] [blame]	273	#if (GCC_VERSION >= 4005) && (__GNUC__ >= 5 \|\| !defined(__PIC__)) && (defined(__i386__) \|\| defined(__x86_64__)) && !(defined(__sun__) && defined(__svr4__))
Richard Henderson	246a2fc	2010-08-21 12:05:40 -0700	[diff] [blame]	274
				275	/* Replicated character data to be shared between implementations.
				276	Recall that outside of a context with vector support we can't
				277	define compatible vector types, therefore these are all defined
				278	in terms of raw characters. */
				279	static const char repl_chars[4][16] __attribute__((aligned(16))) = {
				280	{ '\n', '\n', '\n', '\n', '\n', '\n', '\n', '\n',
				281	'\n', '\n', '\n', '\n', '\n', '\n', '\n', '\n' },
				282	{ '\r', '\r', '\r', '\r', '\r', '\r', '\r', '\r',
				283	'\r', '\r', '\r', '\r', '\r', '\r', '\r', '\r' },
				284	{ '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\',
				285	'\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\' },
				286	{ '?', '?', '?', '?', '?', '?', '?', '?',
				287	'?', '?', '?', '?', '?', '?', '?', '?' },
				288	};
				289
				290	/* A version of the fast scanner using MMX vectorized byte compare insns.
				291
				292	This uses the PMOVMSKB instruction which was introduced with "MMX2",
Uros Bizjak	ef230b3	2011-05-22 20:53:32 +0200	[diff] [blame]	293	which was packaged into SSE1; it is also present in the AMD MMX
Richard Henderson	246a2fc	2010-08-21 12:05:40 -0700	[diff] [blame]	294	extension. Mark the function as using "sse" so that we emit a real
				295	"emms" instruction, rather than the 3dNOW "femms" instruction. */
				296
				297	static const uchar *
				298	#ifndef __SSE__
				299	__attribute__((__target__("sse")))
				300	#endif
				301	search_line_mmx (const uchar s, const uchar end ATTRIBUTE_UNUSED)
				302	{
				303	typedef char v8qi __attribute__ ((__vector_size__ (8)));
				304	typedef int __m64 __attribute__ ((__vector_size__ (8), __may_alias__));
				305
				306	const v8qi repl_nl = (const v8qi )repl_chars[0];
				307	const v8qi repl_cr = (const v8qi )repl_chars[1];
				308	const v8qi repl_bs = (const v8qi )repl_chars[2];
				309	const v8qi repl_qm = (const v8qi )repl_chars[3];
				310
				311	unsigned int misalign, found, mask;
				312	const v8qi *p;
				313	v8qi data, t, c;
				314
				315	/* Align the source pointer. While MMX doesn't generate unaligned data
				316	faults, this allows us to safely scan to the end of the buffer without
				317	reading beyond the end of the last page. */
				318	misalign = (uintptr_t)s & 7;
				319	p = (const v8qi *)((uintptr_t)s & -8);
				320	data = *p;
				321
				322	/* Create a mask for the bytes that are valid within the first
				323	16-byte block. The Idea here is that the AND with the mask
				324	within the loop is "free", since we need some AND or TEST
				325	insn in order to set the flags for the branch anyway. */
				326	mask = -1u << misalign;
				327
				328	/* Main loop processing 8 bytes at a time. */
				329	goto start;
				330	do
				331	{
				332	data = *++p;
				333	mask = -1;
				334
				335	start:
				336	t = __builtin_ia32_pcmpeqb(data, repl_nl);
				337	c = __builtin_ia32_pcmpeqb(data, repl_cr);
				338	t = (v8qi) __builtin_ia32_por ((__m64)t, (__m64)c);
				339	c = __builtin_ia32_pcmpeqb(data, repl_bs);
				340	t = (v8qi) __builtin_ia32_por ((__m64)t, (__m64)c);
				341	c = __builtin_ia32_pcmpeqb(data, repl_qm);
				342	t = (v8qi) __builtin_ia32_por ((__m64)t, (__m64)c);
				343	found = __builtin_ia32_pmovmskb (t);
				344	found &= mask;
				345	}
				346	while (!found);
				347
				348	__builtin_ia32_emms ();
				349
				350	/* FOUND contains 1 in bits for which we matched a relevant
				351	character. Conversion to the byte index is trivial. */
				352	found = __builtin_ctz(found);
				353	return (const uchar *)p + found;
				354	}
				355
				356	/* A version of the fast scanner using SSE2 vectorized byte compare insns. */
				357
				358	static const uchar *
				359	#ifndef __SSE2__
				360	__attribute__((__target__("sse2")))
				361	#endif
				362	search_line_sse2 (const uchar s, const uchar end ATTRIBUTE_UNUSED)
				363	{
				364	typedef char v16qi __attribute__ ((__vector_size__ (16)));
				365
				366	const v16qi repl_nl = (const v16qi )repl_chars[0];
				367	const v16qi repl_cr = (const v16qi )repl_chars[1];
				368	const v16qi repl_bs = (const v16qi )repl_chars[2];
				369	const v16qi repl_qm = (const v16qi )repl_chars[3];
				370
				371	unsigned int misalign, found, mask;
				372	const v16qi *p;
				373	v16qi data, t;
				374
				375	/* Align the source pointer. */
				376	misalign = (uintptr_t)s & 15;
				377	p = (const v16qi *)((uintptr_t)s & -16);
				378	data = *p;
				379
				380	/* Create a mask for the bytes that are valid within the first
				381	16-byte block. The Idea here is that the AND with the mask
				382	within the loop is "free", since we need some AND or TEST
				383	insn in order to set the flags for the branch anyway. */
				384	mask = -1u << misalign;
				385
				386	/* Main loop processing 16 bytes at a time. */
				387	goto start;
				388	do
				389	{
				390	data = *++p;
				391	mask = -1;
				392
				393	start:
				394	t = __builtin_ia32_pcmpeqb128(data, repl_nl);
				395	t \|= __builtin_ia32_pcmpeqb128(data, repl_cr);
				396	t \|= __builtin_ia32_pcmpeqb128(data, repl_bs);
				397	t \|= __builtin_ia32_pcmpeqb128(data, repl_qm);
				398	found = __builtin_ia32_pmovmskb128 (t);
				399	found &= mask;
				400	}
				401	while (!found);
				402
				403	/* FOUND contains 1 in bits for which we matched a relevant
				404	character. Conversion to the byte index is trivial. */
				405	found = __builtin_ctz(found);
				406	return (const uchar *)p + found;
				407	}
				408
Richard Henderson	6f173e5	2010-08-24 14:08:05 -0700	[diff] [blame]	409	#ifdef HAVE_SSE4
Richard Henderson	246a2fc	2010-08-21 12:05:40 -0700	[diff] [blame]	410	/* A version of the fast scanner using SSE 4.2 vectorized string insns. */
				411
				412	static const uchar *
				413	#ifndef __SSE4_2__
				414	__attribute__((__target__("sse4.2")))
				415	#endif
				416	search_line_sse42 (const uchar s, const uchar end)
				417	{
				418	typedef char v16qi __attribute__ ((__vector_size__ (16)));
				419	static const v16qi search = { '\n', '\r', '?', '\\' };
				420
				421	uintptr_t si = (uintptr_t)s;
				422	uintptr_t index;
				423
				424	/* Check for unaligned input. */
				425	if (si & 15)
				426	{
Uros Bizjak	d35d1c0	2012-06-19 18:28:50 +0200	[diff] [blame]	427	v16qi sv;
				428
Richard Henderson	246a2fc	2010-08-21 12:05:40 -0700	[diff] [blame]	429	if (__builtin_expect (end - s < 16, 0)
				430	&& __builtin_expect ((si & 0xfff) > 0xff0, 0))
				431	{
				432	/* There are less than 16 bytes left in the buffer, and less
				433	than 16 bytes left on the page. Reading 16 bytes at this
				434	point might generate a spurious page fault. Defer to the
				435	SSE2 implementation, which already handles alignment. */
				436	return search_line_sse2 (s, end);
				437	}
				438
				439	/* ??? The builtin doesn't understand that the PCMPESTRI read from
				440	memory need not be aligned. */
Uros Bizjak	d35d1c0	2012-06-19 18:28:50 +0200	[diff] [blame]	441	sv = __builtin_ia32_loaddqu ((const char *) s);
				442	index = __builtin_ia32_pcmpestri128 (search, 4, sv, 16, 0);
				443
Richard Henderson	246a2fc	2010-08-21 12:05:40 -0700	[diff] [blame]	444	if (__builtin_expect (index < 16, 0))
				445	goto found;
				446
				447	/* Advance the pointer to an aligned address. We will re-scan a
				448	few bytes, but we no longer need care for reading past the
				449	end of a page, since we're guaranteed a match. */
Uros Bizjak	4944590	2015-11-03 20:05:50 +0100	[diff] [blame]	450	s = (const uchar *)((si + 15) & -16);
Richard Henderson	246a2fc	2010-08-21 12:05:40 -0700	[diff] [blame]	451	}
				452
Uros Bizjak	dc6bcf5	2015-06-30 10:26:57 +0200	[diff] [blame]	453	/* Main loop, processing 16 bytes at a time. */
				454	#ifdef __GCC_ASM_FLAG_OUTPUTS__
				455	while (1)
				456	{
				457	char f;
				458
				459	/* By using inline assembly instead of the builtin,
				460	we can use the result, as well as the flags set. */
				461	__asm ("%vpcmpestri\t$0, %2, %3"
				462	: "=c"(index), "=@ccc"(f)
				463	: "m"(*s), "x"(search), "a"(4), "d"(16));
				464	if (f)
				465	break;
				466
				467	s += 16;
				468	}
				469	#else
				470	s -= 16;
				471	/* By doing the whole loop in inline assembly,
				472	we can make proper use of the flags set. */
				473	__asm ( ".balign 16\n"
Richard Henderson	246a2fc	2010-08-21 12:05:40 -0700	[diff] [blame]	474	"0: add $16, %1\n"
Uros Bizjak	dc6bcf5	2015-06-30 10:26:57 +0200	[diff] [blame]	475	" %vpcmpestri\t$0, (%1), %2\n"
Richard Henderson	246a2fc	2010-08-21 12:05:40 -0700	[diff] [blame]	476	" jnc 0b"
				477	: "=&c"(index), "+r"(s)
				478	: "x"(search), "a"(4), "d"(16));
Uros Bizjak	dc6bcf5	2015-06-30 10:26:57 +0200	[diff] [blame]	479	#endif
Richard Henderson	246a2fc	2010-08-21 12:05:40 -0700	[diff] [blame]	480
				481	found:
				482	return s + index;
				483	}
				484
Richard Henderson	6f173e5	2010-08-24 14:08:05 -0700	[diff] [blame]	485	#else
				486	/* Work around out-dated assemblers without sse4 support. */
				487	#define search_line_sse42 search_line_sse2
				488	#endif
				489
Richard Henderson	246a2fc	2010-08-21 12:05:40 -0700	[diff] [blame]	490	/* Check the CPU capabilities. */
				491
				492	#include "../gcc/config/i386/cpuid.h"
				493
				494	typedef const uchar * (search_line_fast_type) (const uchar , const uchar *);
				495	static search_line_fast_type search_line_fast;
				496
Jakub Jelinek	b0c084b	2011-12-07 23:05:59 +0100	[diff] [blame]	497	#define HAVE_init_vectorized_lexer 1
				498	static inline void
Richard Henderson	246a2fc	2010-08-21 12:05:40 -0700	[diff] [blame]	499	init_vectorized_lexer (void)
				500	{
				501	unsigned dummy, ecx = 0, edx = 0;
				502	search_line_fast_type impl = search_line_acc_char;
				503	int minimum = 0;
				504
				505	#if defined(__SSE4_2__)
				506	minimum = 3;
				507	#elif defined(__SSE2__)
				508	minimum = 2;
Uros Bizjak	ef230b3	2011-05-22 20:53:32 +0200	[diff] [blame]	509	#elif defined(__SSE__)
Richard Henderson	246a2fc	2010-08-21 12:05:40 -0700	[diff] [blame]	510	minimum = 1;
				511	#endif
				512
				513	if (minimum == 3)
				514	impl = search_line_sse42;
				515	else if (__get_cpuid (1, &dummy, &dummy, &ecx, &edx) \|\| minimum == 2)
				516	{
				517	if (minimum == 3 \|\| (ecx & bit_SSE4_2))
				518	impl = search_line_sse42;
				519	else if (minimum == 2 \|\| (edx & bit_SSE2))
				520	impl = search_line_sse2;
				521	else if (minimum == 1 \|\| (edx & bit_SSE))
				522	impl = search_line_mmx;
				523	}
				524	else if (__get_cpuid (0x80000001, &dummy, &dummy, &dummy, &edx))
				525	{
Uros Bizjak	5e70c0b	2011-05-22 21:04:54 +0200	[diff] [blame]	526	if (minimum == 1
				527	\|\| (edx & (bit_MMXEXT \| bit_CMOV)) == (bit_MMXEXT \| bit_CMOV))
Richard Henderson	246a2fc	2010-08-21 12:05:40 -0700	[diff] [blame]	528	impl = search_line_mmx;
				529	}
				530
				531	search_line_fast = impl;
				532	}
				533
Bill Schmidt	0ccaaab	2014-10-03 20:06:38 +0000	[diff] [blame]	534	#elif defined(_ARCH_PWR8) && defined(__ALTIVEC__)
Richard Henderson	246a2fc	2010-08-21 12:05:40 -0700	[diff] [blame]	535
Bill Schmidt	0ccaaab	2014-10-03 20:06:38 +0000	[diff] [blame]	536	/* A vection of the fast scanner using AltiVec vectorized byte compares
				537	and VSX unaligned loads (when VSX is available). This is otherwise
				538	the same as the pre-GCC 5 version. */
				539
Markus Trippelsdorf	44d9524	2015-03-04 17:28:56 +0000	[diff] [blame]	540	ATTRIBUTE_NO_SANITIZE_UNDEFINED
Bill Schmidt	0ccaaab	2014-10-03 20:06:38 +0000	[diff] [blame]	541	static const uchar *
				542	search_line_fast (const uchar s, const uchar end ATTRIBUTE_UNUSED)
				543	{
				544	typedef __attribute__((altivec(vector))) unsigned char vc;
				545
				546	const vc repl_nl = {
				547	'\n', '\n', '\n', '\n', '\n', '\n', '\n', '\n',
				548	'\n', '\n', '\n', '\n', '\n', '\n', '\n', '\n'
				549	};
				550	const vc repl_cr = {
				551	'\r', '\r', '\r', '\r', '\r', '\r', '\r', '\r',
				552	'\r', '\r', '\r', '\r', '\r', '\r', '\r', '\r'
				553	};
				554	const vc repl_bs = {
				555	'\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\',
				556	'\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\'
				557	};
				558	const vc repl_qm = {
				559	'?', '?', '?', '?', '?', '?', '?', '?',
				560	'?', '?', '?', '?', '?', '?', '?', '?',
				561	};
				562	const vc zero = { 0 };
				563
				564	vc data, t;
				565
				566	/* Main loop processing 16 bytes at a time. */
				567	do
				568	{
				569	vc m_nl, m_cr, m_bs, m_qm;
				570
				571	data = ((const vc )s);
				572	s += 16;
				573
				574	m_nl = (vc) __builtin_vec_cmpeq(data, repl_nl);
				575	m_cr = (vc) __builtin_vec_cmpeq(data, repl_cr);
				576	m_bs = (vc) __builtin_vec_cmpeq(data, repl_bs);
				577	m_qm = (vc) __builtin_vec_cmpeq(data, repl_qm);
				578	t = (m_nl \| m_cr) \| (m_bs \| m_qm);
				579
				580	/* T now contains 0xff in bytes for which we matched one of the relevant
				581	characters. We want to exit the loop if any byte in T is non-zero.
				582	Below is the expansion of vec_any_ne(t, zero). */
				583	}
				584	while (!__builtin_vec_vcmpeq_p(/__CR6_LT_REV/3, t, zero));
				585
				586	/* Restore s to to point to the 16 bytes we just processed. */
				587	s -= 16;
				588
				589	{
				590	#define N (sizeof(vc) / sizeof(long))
				591
				592	union {
				593	vc v;
				594	/* Statically assert that N is 2 or 4. */
				595	unsigned long l[(N == 2 \|\| N == 4) ? N : -1];
				596	} u;
				597	unsigned long l, i = 0;
				598
				599	u.v = t;
				600
				601	/* Find the first word of T that is non-zero. */
				602	switch (N)
				603	{
				604	case 4:
				605	l = u.l[i++];
				606	if (l != 0)
				607	break;
				608	s += sizeof(unsigned long);
				609	l = u.l[i++];
				610	if (l != 0)
				611	break;
				612	s += sizeof(unsigned long);
Marek Polacek	191816a	2016-08-12 10:30:47 +0000	[diff] [blame]	613	/* FALLTHRU */
Bill Schmidt	0ccaaab	2014-10-03 20:06:38 +0000	[diff] [blame]	614	case 2:
				615	l = u.l[i++];
				616	if (l != 0)
				617	break;
				618	s += sizeof(unsigned long);
				619	l = u.l[i];
				620	}
				621
				622	/* L now contains 0xff in bytes for which we matched one of the
				623	relevant characters. We can find the byte index by finding
				624	its bit index and dividing by 8. */
				625	#ifdef __BIG_ENDIAN__
				626	l = __builtin_clzl(l) >> 3;
				627	#else
				628	l = __builtin_ctzl(l) >> 3;
				629	#endif
				630	return s + l;
				631
				632	#undef N
				633	}
				634	}
				635
				636	#elif (GCC_VERSION >= 4005) && defined(__ALTIVEC__) && defined (__BIG_ENDIAN__)
				637
				638	/* A vection of the fast scanner using AltiVec vectorized byte compares.
				639	This cannot be used for little endian because vec_lvsl/lvsr are
				640	deprecated for little endian and the code won't work properly. */
Richard Henderson	246a2fc	2010-08-21 12:05:40 -0700	[diff] [blame]	641	/* ??? Unfortunately, attribute(target("altivec")) is not yet supported,
				642	so we can't compile this function without -maltivec on the command line
				643	(or implied by some other switch). */
				644
				645	static const uchar *
				646	search_line_fast (const uchar s, const uchar end ATTRIBUTE_UNUSED)
				647	{
				648	typedef __attribute__((altivec(vector))) unsigned char vc;
				649
				650	const vc repl_nl = {
				651	'\n', '\n', '\n', '\n', '\n', '\n', '\n', '\n',
				652	'\n', '\n', '\n', '\n', '\n', '\n', '\n', '\n'
				653	};
				654	const vc repl_cr = {
				655	'\r', '\r', '\r', '\r', '\r', '\r', '\r', '\r',
				656	'\r', '\r', '\r', '\r', '\r', '\r', '\r', '\r'
				657	};
				658	const vc repl_bs = {
				659	'\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\',
				660	'\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\'
				661	};
				662	const vc repl_qm = {
				663	'?', '?', '?', '?', '?', '?', '?', '?',
				664	'?', '?', '?', '?', '?', '?', '?', '?',
				665	};
				666	const vc ones = {
				667	-1, -1, -1, -1, -1, -1, -1, -1,
				668	-1, -1, -1, -1, -1, -1, -1, -1,
				669	};
				670	const vc zero = { 0 };
				671
				672	vc data, mask, t;
				673
				674	/* Altivec loads automatically mask addresses with -16. This lets us
				675	issue the first load as early as possible. */
				676	data = __builtin_vec_ld(0, (const vc *)s);
				677
				678	/* Discard bytes before the beginning of the buffer. Do this by
				679	beginning with all ones and shifting in zeros according to the
				680	mis-alignment. The LVSR instruction pulls the exact shift we
				681	want from the address. */
				682	mask = __builtin_vec_lvsr(0, s);
				683	mask = __builtin_vec_perm(zero, ones, mask);
				684	data &= mask;
				685
				686	/* While altivec loads mask addresses, we still need to align S so
				687	that the offset we compute at the end is correct. */
				688	s = (const uchar *)((uintptr_t)s & -16);
				689
				690	/* Main loop processing 16 bytes at a time. */
				691	goto start;
				692	do
				693	{
				694	vc m_nl, m_cr, m_bs, m_qm;
				695
				696	s += 16;
				697	data = __builtin_vec_ld(0, (const vc *)s);
				698
				699	start:
				700	m_nl = (vc) __builtin_vec_cmpeq(data, repl_nl);
				701	m_cr = (vc) __builtin_vec_cmpeq(data, repl_cr);
				702	m_bs = (vc) __builtin_vec_cmpeq(data, repl_bs);
				703	m_qm = (vc) __builtin_vec_cmpeq(data, repl_qm);
				704	t = (m_nl \| m_cr) \| (m_bs \| m_qm);
				705
				706	/* T now contains 0xff in bytes for which we matched one of the relevant
				707	characters. We want to exit the loop if any byte in T is non-zero.
				708	Below is the expansion of vec_any_ne(t, zero). */
				709	}
				710	while (!__builtin_vec_vcmpeq_p(/__CR6_LT_REV/3, t, zero));
				711
				712	{
				713	#define N (sizeof(vc) / sizeof(long))
				714
Richard Henderson	246a2fc	2010-08-21 12:05:40 -0700	[diff] [blame]	715	union {
				716	vc v;
Dodji Seketeli	53a103d	2012-05-29 09:42:39 +0000	[diff] [blame]	717	/* Statically assert that N is 2 or 4. */
				718	unsigned long l[(N == 2 \|\| N == 4) ? N : -1];
Richard Henderson	246a2fc	2010-08-21 12:05:40 -0700	[diff] [blame]	719	} u;
				720	unsigned long l, i = 0;
				721
				722	u.v = t;
				723
				724	/* Find the first word of T that is non-zero. */
				725	switch (N)
				726	{
				727	case 4:
				728	l = u.l[i++];
				729	if (l != 0)
				730	break;
				731	s += sizeof(unsigned long);
				732	l = u.l[i++];
				733	if (l != 0)
				734	break;
				735	s += sizeof(unsigned long);
Kelvin Nilsen	67ef83c	2016-10-05 12:36:48 +0000	[diff] [blame]	736	/* FALLTHROUGH */
Richard Henderson	246a2fc	2010-08-21 12:05:40 -0700	[diff] [blame]	737	case 2:
				738	l = u.l[i++];
				739	if (l != 0)
				740	break;
				741	s += sizeof(unsigned long);
				742	l = u.l[i];
				743	}
				744
				745	/* L now contains 0xff in bytes for which we matched one of the
				746	relevant characters. We can find the byte index by finding
				747	its bit index and dividing by 8. */
				748	l = __builtin_clzl(l) >> 3;
				749	return s + l;
				750
				751	#undef N
				752	}
				753	}
				754
Richard Earnshaw	a6ac871	2016-11-08 13:29:32 +0000	[diff] [blame]	755	#elif defined (__ARM_NEON) && defined (__ARM_64BIT_STATE)
				756	#include "arm_neon.h"
				757
				758	/* This doesn't have to be the exact page size, but no system may use
				759	a size smaller than this. ARMv8 requires a minimum page size of
				760	4k. The impact of being conservative here is a small number of
				761	cases will take the slightly slower entry path into the main
				762	loop. */
				763
				764	#define AARCH64_MIN_PAGE_SIZE 4096
				765
				766	static const uchar *
				767	search_line_fast (const uchar s, const uchar end ATTRIBUTE_UNUSED)
				768	{
				769	const uint8x16_t repl_nl = vdupq_n_u8 ('\n');
				770	const uint8x16_t repl_cr = vdupq_n_u8 ('\r');
				771	const uint8x16_t repl_bs = vdupq_n_u8 ('\\');
				772	const uint8x16_t repl_qm = vdupq_n_u8 ('?');
				773	const uint8x16_t xmask = (uint8x16_t) vdupq_n_u64 (0x8040201008040201ULL);
				774
				775	#ifdef __AARCH64EB
				776	const int16x8_t shift = {8, 8, 8, 8, 0, 0, 0, 0};
				777	#else
				778	const int16x8_t shift = {0, 0, 0, 0, 8, 8, 8, 8};
				779	#endif
				780
				781	unsigned int found;
				782	const uint8_t *p;
				783	uint8x16_t data;
				784	uint8x16_t t;
				785	uint16x8_t m;
				786	uint8x16_t u, v, w;
				787
				788	/* Align the source pointer. */
				789	p = (const uint8_t *)((uintptr_t)s & -16);
				790
				791	/* Assuming random string start positions, with a 4k page size we'll take
				792	the slow path about 0.37% of the time. */
				793	if (__builtin_expect ((AARCH64_MIN_PAGE_SIZE
				794	- (((uintptr_t) s) & (AARCH64_MIN_PAGE_SIZE - 1)))
				795	< 16, 0))
				796	{
				797	/* Slow path: the string starts near a possible page boundary. */
				798	uint32_t misalign, mask;
				799
				800	misalign = (uintptr_t)s & 15;
				801	mask = (-1u << misalign) & 0xffff;
				802	data = vld1q_u8 (p);
				803	t = vceqq_u8 (data, repl_nl);
				804	u = vceqq_u8 (data, repl_cr);
				805	v = vorrq_u8 (t, vceqq_u8 (data, repl_bs));
				806	w = vorrq_u8 (u, vceqq_u8 (data, repl_qm));
				807	t = vorrq_u8 (v, w);
				808	t = vandq_u8 (t, xmask);
				809	m = vpaddlq_u8 (t);
				810	m = vshlq_u16 (m, shift);
				811	found = vaddvq_u16 (m);
				812	found &= mask;
				813	if (found)
				814	return (const uchar*)p + __builtin_ctz (found);
				815	}
				816	else
				817	{
				818	data = vld1q_u8 ((const uint8_t *) s);
				819	t = vceqq_u8 (data, repl_nl);
				820	u = vceqq_u8 (data, repl_cr);
				821	v = vorrq_u8 (t, vceqq_u8 (data, repl_bs));
				822	w = vorrq_u8 (u, vceqq_u8 (data, repl_qm));
				823	t = vorrq_u8 (v, w);
Andreas Schwab	8c00ae2	2017-03-21 11:10:17 +0000	[diff] [blame]	824	if (__builtin_expect (vpaddd_u64 ((uint64x2_t)t) != 0, 0))
Richard Earnshaw	a6ac871	2016-11-08 13:29:32 +0000	[diff] [blame]	825	goto done;
				826	}
				827
				828	do
				829	{
				830	p += 16;
				831	data = vld1q_u8 (p);
				832	t = vceqq_u8 (data, repl_nl);
				833	u = vceqq_u8 (data, repl_cr);
				834	v = vorrq_u8 (t, vceqq_u8 (data, repl_bs));
				835	w = vorrq_u8 (u, vceqq_u8 (data, repl_qm));
				836	t = vorrq_u8 (v, w);
				837	} while (!vpaddd_u64 ((uint64x2_t)t));
				838
				839	done:
				840	/* Now that we've found the terminating substring, work out precisely where
				841	we need to stop. */
				842	t = vandq_u8 (t, xmask);
				843	m = vpaddlq_u8 (t);
				844	m = vshlq_u16 (m, shift);
				845	found = vaddvq_u16 (m);
				846	return (((((uintptr_t) p) < (uintptr_t) s) ? s : (const uchar *)p)
				847	+ __builtin_ctz (found));
				848	}
				849
Szabolcs Nagy	95d0610	2015-01-30 14:07:00 +0000	[diff] [blame]	850	#elif defined (__ARM_NEON)
Richard Earnshaw	e75b54a	2012-03-22 17:54:55 +0000	[diff] [blame]	851	#include "arm_neon.h"
				852
				853	static const uchar *
				854	search_line_fast (const uchar s, const uchar end ATTRIBUTE_UNUSED)
				855	{
				856	const uint8x16_t repl_nl = vdupq_n_u8 ('\n');
				857	const uint8x16_t repl_cr = vdupq_n_u8 ('\r');
				858	const uint8x16_t repl_bs = vdupq_n_u8 ('\\');
				859	const uint8x16_t repl_qm = vdupq_n_u8 ('?');
				860	const uint8x16_t xmask = (uint8x16_t) vdupq_n_u64 (0x8040201008040201ULL);
				861
				862	unsigned int misalign, found, mask;
				863	const uint8_t *p;
				864	uint8x16_t data;
				865
				866	/* Align the source pointer. */
				867	misalign = (uintptr_t)s & 15;
				868	p = (const uint8_t *)((uintptr_t)s & -16);
				869	data = vld1q_u8 (p);
				870
				871	/* Create a mask for the bytes that are valid within the first
				872	16-byte block. The Idea here is that the AND with the mask
				873	within the loop is "free", since we need some AND or TEST
				874	insn in order to set the flags for the branch anyway. */
				875	mask = (-1u << misalign) & 0xffff;
				876
				877	/* Main loop, processing 16 bytes at a time. */
				878	goto start;
				879
				880	do
				881	{
				882	uint8x8_t l;
				883	uint16x4_t m;
				884	uint32x2_t n;
				885	uint8x16_t t, u, v, w;
				886
				887	p += 16;
				888	data = vld1q_u8 (p);
				889	mask = 0xffff;
				890
				891	start:
				892	t = vceqq_u8 (data, repl_nl);
				893	u = vceqq_u8 (data, repl_cr);
				894	v = vorrq_u8 (t, vceqq_u8 (data, repl_bs));
				895	w = vorrq_u8 (u, vceqq_u8 (data, repl_qm));
				896	t = vandq_u8 (vorrq_u8 (v, w), xmask);
				897	l = vpadd_u8 (vget_low_u8 (t), vget_high_u8 (t));
				898	m = vpaddl_u8 (l);
				899	n = vpaddl_u16 (m);
				900
				901	found = vget_lane_u32 ((uint32x2_t) vorr_u64 ((uint64x1_t) n,
				902	vshr_n_u64 ((uint64x1_t) n, 24)), 0);
				903	found &= mask;
				904	}
				905	while (!found);
				906
				907	/* FOUND contains 1 in bits for which we matched a relevant
				908	character. Conversion to the byte index is trivial. */
				909	found = __builtin_ctz (found);
				910	return (const uchar *)p + found;
				911	}
				912
Richard Henderson	246a2fc	2010-08-21 12:05:40 -0700	[diff] [blame]	913	#else
				914
Jonathan Wakely	5764ee3	2017-04-03 23:30:56 +0100	[diff] [blame]	915	/* We only have one accelerated alternative. Use a direct call so that
Richard Henderson	246a2fc	2010-08-21 12:05:40 -0700	[diff] [blame]	916	we encourage inlining. */
				917
				918	#define search_line_fast search_line_acc_char
				919
				920	#endif
				921
Jakub Jelinek	b0c084b	2011-12-07 23:05:59 +0100	[diff] [blame]	922	/* Initialize the lexer if needed. */
				923
				924	void
				925	_cpp_init_lexer (void)
				926	{
				927	#ifdef HAVE_init_vectorized_lexer
				928	init_vectorized_lexer ();
				929	#endif
				930	}
				931
Neil Booth	26aea07	2003-04-19 00:22:51 +0000	[diff] [blame]	932	/* Returns with a logical line that contains no escaped newlines or
				933	trigraphs. This is a time-critical inner loop. */
				934	void
Zack Weinberg	6cf87ca	2003-06-17 06:17:44 +0000	[diff] [blame]	935	_cpp_clean_line (cpp_reader *pfile)
Zack Weinberg	c5a0473	2000-04-25 19:32:36 +0000	[diff] [blame]	936	{
Neil Booth	26aea07	2003-04-19 00:22:51 +0000	[diff] [blame]	937	cpp_buffer *buffer;
				938	const uchar *s;
				939	uchar c, d, p;
Neil Booth	29401c3	2001-08-22 20:37:20 +0000	[diff] [blame]	940
Neil Booth	26aea07	2003-04-19 00:22:51 +0000	[diff] [blame]	941	buffer = pfile->buffer;
				942	buffer->cur_note = buffer->notes_used = 0;
				943	buffer->cur = buffer->line_base = buffer->next_line;
				944	buffer->need_line = false;
Richard Henderson	246a2fc	2010-08-21 12:05:40 -0700	[diff] [blame]	945	s = buffer->next_line;
Neil Booth	26aea07	2003-04-19 00:22:51 +0000	[diff] [blame]	946
Neil Booth	a5c3ccc	2000-10-30 22:29:00 +0000	[diff] [blame]	947	if (!buffer->from_stage3)
Zack Weinberg	c5a0473	2000-04-25 19:32:36 +0000	[diff] [blame]	948	{
Ian Lance Taylor	7af45bd	2006-12-29 15:43:55 +0000	[diff] [blame]	949	const uchar *pbackslash = NULL;
				950
Richard Henderson	246a2fc	2010-08-21 12:05:40 -0700	[diff] [blame]	951	/* Fast path. This is the common case of an un-escaped line with
Zack Weinberg	d08dcf8	2003-10-13 18:53:28 +0000	[diff] [blame]	952	no trigraphs. The primary win here is by not writing any
				953	data back to memory until we have to. */
Richard Henderson	246a2fc	2010-08-21 12:05:40 -0700	[diff] [blame]	954	while (1)
Zack Weinberg	d08dcf8	2003-10-13 18:53:28 +0000	[diff] [blame]	955	{
Richard Henderson	246a2fc	2010-08-21 12:05:40 -0700	[diff] [blame]	956	/* Perform an optimized search for \n, \r, \\, ?. */
				957	s = search_line_fast (s, buffer->rlimit);
				958
				959	c = *s;
				960	if (c == '\\')
Zack Weinberg	d08dcf8	2003-10-13 18:53:28 +0000	[diff] [blame]	961	{
Richard Henderson	246a2fc	2010-08-21 12:05:40 -0700	[diff] [blame]	962	/* Record the location of the backslash and continue. */
				963	pbackslash = s++;
Zack Weinberg	d08dcf8	2003-10-13 18:53:28 +0000	[diff] [blame]	964	}
Richard Henderson	246a2fc	2010-08-21 12:05:40 -0700	[diff] [blame]	965	else if (__builtin_expect (c == '?', 0))
				966	{
				967	if (__builtin_expect (s[1] == '?', false)
Ian Lance Taylor	7af45bd	2006-12-29 15:43:55 +0000	[diff] [blame]	968	&& _cpp_trigraph_map[s[2]])
Zack Weinberg	d08dcf8	2003-10-13 18:53:28 +0000	[diff] [blame]	969	{
Richard Henderson	246a2fc	2010-08-21 12:05:40 -0700	[diff] [blame]	970	/* Have a trigraph. We may or may not have to convert
				971	it. Add a line note regardless, for -Wtrigraphs. */
				972	add_line_note (buffer, s, s[2]);
				973	if (CPP_OPTION (pfile, trigraphs))
				974	{
				975	/* We do, and that means we have to switch to the
				976	slow path. */
				977	d = (uchar *) s;
				978	*d = _cpp_trigraph_map[s[2]];
				979	s += 2;
				980	goto slow_path;
				981	}
Zack Weinberg	d08dcf8	2003-10-13 18:53:28 +0000	[diff] [blame]	982	}
Richard Henderson	246a2fc	2010-08-21 12:05:40 -0700	[diff] [blame]	983	/* Not a trigraph. Continue on fast-path. */
				984	s++;
Zack Weinberg	d08dcf8	2003-10-13 18:53:28 +0000	[diff] [blame]	985	}
Richard Henderson	246a2fc	2010-08-21 12:05:40 -0700	[diff] [blame]	986	else
				987	break;
Zack Weinberg	d08dcf8	2003-10-13 18:53:28 +0000	[diff] [blame]	988	}
				989
Richard Henderson	246a2fc	2010-08-21 12:05:40 -0700	[diff] [blame]	990	/* This must be \r or \n. We're either done, or we'll be forced
				991	to write back to the buffer and continue on the slow path. */
				992	d = (uchar *) s;
Zack Weinberg	c5a0473	2000-04-25 19:32:36 +0000	[diff] [blame]	993
Richard Henderson	246a2fc	2010-08-21 12:05:40 -0700	[diff] [blame]	994	if (__builtin_expect (s == buffer->rlimit, false))
				995	goto done;
				996
				997	/* DOS line ending? */
				998	if (__builtin_expect (c == '\r', false) && s[1] == '\n')
				999	{
				1000	s++;
				1001	if (s == buffer->rlimit)
				1002	goto done;
				1003	}
				1004
				1005	if (__builtin_expect (pbackslash == NULL, true))
				1006	goto done;
				1007
				1008	/* Check for escaped newline. */
				1009	p = d;
				1010	while (is_nvspace (p[-1]))
				1011	p--;
				1012	if (p - 1 != pbackslash)
				1013	goto done;
				1014
				1015	/* Have an escaped newline; process it and proceed to
				1016	the slow path. */
				1017	add_line_note (buffer, p - 1, p != d ? ' ' : '\\');
				1018	d = p - 2;
				1019	buffer->next_line = p - 1;
				1020
				1021	slow_path:
				1022	while (1)
Neil Booth	0d9f234	2000-09-18 18:43:05 +0000	[diff] [blame]	1023	{
Neil Booth	26aea07	2003-04-19 00:22:51 +0000	[diff] [blame]	1024	c = *++s;
				1025	*++d = c;
				1026
				1027	if (c == '\n' \|\| c == '\r')
Neil Booth	a5c3ccc	2000-10-30 22:29:00 +0000	[diff] [blame]	1028	{
Richard Henderson	246a2fc	2010-08-21 12:05:40 -0700	[diff] [blame]	1029	/* Handle DOS line endings. */
Neil Booth	26aea07	2003-04-19 00:22:51 +0000	[diff] [blame]	1030	if (c == '\r' && s != buffer->rlimit && s[1] == '\n')
				1031	s++;
				1032	if (s == buffer->rlimit)
Neil Booth	8706281	2001-10-20 09:00:53 +0000	[diff] [blame]	1033	break;
Neil Booth	a5c3ccc	2000-10-30 22:29:00 +0000	[diff] [blame]	1034
Neil Booth	26aea07	2003-04-19 00:22:51 +0000	[diff] [blame]	1035	/* Escaped? */
				1036	p = d;
				1037	while (p != buffer->next_line && is_nvspace (p[-1]))
				1038	p--;
				1039	if (p == buffer->next_line \|\| p[-1] != '\\')
Neil Booth	a5c3ccc	2000-10-30 22:29:00 +0000	[diff] [blame]	1040	break;
Neil Booth	26aea07	2003-04-19 00:22:51 +0000	[diff] [blame]	1041
Neil Booth	41c32c9	2003-04-20 19:02:53 +0000	[diff] [blame]	1042	add_line_note (buffer, p - 1, p != d ? ' ': '\\');
Neil Booth	26aea07	2003-04-19 00:22:51 +0000	[diff] [blame]	1043	d = p - 2;
				1044	buffer->next_line = p - 1;
Neil Booth	a5c3ccc	2000-10-30 22:29:00 +0000	[diff] [blame]	1045	}
Neil Booth	26aea07	2003-04-19 00:22:51 +0000	[diff] [blame]	1046	else if (c == '?' && s[1] == '?' && _cpp_trigraph_map[s[2]])
Neil Booth	a5c3ccc	2000-10-30 22:29:00 +0000	[diff] [blame]	1047	{
Neil Booth	26aea07	2003-04-19 00:22:51 +0000	[diff] [blame]	1048	/* Add a note regardless, for the benefit of -Wtrigraphs. */
Neil Booth	41c32c9	2003-04-20 19:02:53 +0000	[diff] [blame]	1049	add_line_note (buffer, d, s[2]);
Neil Booth	26aea07	2003-04-19 00:22:51 +0000	[diff] [blame]	1050	if (CPP_OPTION (pfile, trigraphs))
				1051	{
				1052	*d = _cpp_trigraph_map[s[2]];
				1053	s += 2;
				1054	}
Neil Booth	a5c3ccc	2000-10-30 22:29:00 +0000	[diff] [blame]	1055	}
Neil Booth	0d9f234	2000-09-18 18:43:05 +0000	[diff] [blame]	1056	}
Neil Booth	26aea07	2003-04-19 00:22:51 +0000	[diff] [blame]	1057	}
				1058	else
				1059	{
Richard Henderson	246a2fc	2010-08-21 12:05:40 -0700	[diff] [blame]	1060	while (s != '\n' && s != '\r')
Neil Booth	26aea07	2003-04-19 00:22:51 +0000	[diff] [blame]	1061	s++;
Neil Booth	26aea07	2003-04-19 00:22:51 +0000	[diff] [blame]	1062	d = (uchar *) s;
				1063
				1064	/* Handle DOS line endings. */
				1065	if (*s == '\r' && s != buffer->rlimit && s[1] == '\n')
				1066	s++;
Zack Weinberg	c5a0473	2000-04-25 19:32:36 +0000	[diff] [blame]	1067	}
Zack Weinberg	c5a0473	2000-04-25 19:32:36 +0000	[diff] [blame]	1068
Zack Weinberg	d08dcf8	2003-10-13 18:53:28 +0000	[diff] [blame]	1069	done:
Neil Booth	26aea07	2003-04-19 00:22:51 +0000	[diff] [blame]	1070	*d = '\n';
Neil Booth	41c32c9	2003-04-20 19:02:53 +0000	[diff] [blame]	1071	/* A sentinel note that should never be processed. */
				1072	add_line_note (buffer, d + 1, '\n');
Neil Booth	26aea07	2003-04-19 00:22:51 +0000	[diff] [blame]	1073	buffer->next_line = s + 1;
				1074	}
				1075
Neil Booth	a8eb604	2003-05-04 20:03:55 +0000	[diff] [blame]	1076	/* Return true if the trigraph indicated by NOTE should be warned
				1077	about in a comment. */
				1078	static bool
Zack Weinberg	6cf87ca	2003-06-17 06:17:44 +0000	[diff] [blame]	1079	warn_in_comment (cpp_reader pfile, _cpp_line_note note)
Neil Booth	a8eb604	2003-05-04 20:03:55 +0000	[diff] [blame]	1080	{
				1081	const uchar *p;
				1082
				1083	/* Within comments we don't warn about trigraphs, unless the
				1084	trigraph forms an escaped newline, as that may change
Kazu Hirata	6356f89	2003-06-12 19:01:08 +0000	[diff] [blame]	1085	behavior. */
Neil Booth	a8eb604	2003-05-04 20:03:55 +0000	[diff] [blame]	1086	if (note->type != '/')
				1087	return false;
				1088
				1089	/* If -trigraphs, then this was an escaped newline iff the next note
				1090	is coincident. */
				1091	if (CPP_OPTION (pfile, trigraphs))
				1092	return note[1].pos == note->pos;
				1093
				1094	/* Otherwise, see if this forms an escaped newline. */
				1095	p = note->pos + 3;
				1096	while (is_nvspace (*p))
				1097	p++;
				1098
				1099	/* There might have been escaped newlines between the trigraph and the
				1100	newline we found. Hence the position test. */
				1101	return (*p == '\n' && p < note[1].pos);
				1102	}
				1103
Neil Booth	26aea07	2003-04-19 00:22:51 +0000	[diff] [blame]	1104	/* Process the notes created by add_line_note as far as the current
				1105	location. */
				1106	void
Zack Weinberg	6cf87ca	2003-06-17 06:17:44 +0000	[diff] [blame]	1107	_cpp_process_line_notes (cpp_reader *pfile, int in_comment)
Neil Booth	26aea07	2003-04-19 00:22:51 +0000	[diff] [blame]	1108	{
				1109	cpp_buffer *buffer = pfile->buffer;
				1110
				1111	for (;;)
				1112	{
				1113	_cpp_line_note *note = &buffer->notes[buffer->cur_note];
				1114	unsigned int col;
				1115
				1116	if (note->pos > buffer->cur)
				1117	break;
				1118
				1119	buffer->cur_note++;
				1120	col = CPP_BUF_COLUMN (buffer, note->pos + 1);
				1121
Neil Booth	41c32c9	2003-04-20 19:02:53 +0000	[diff] [blame]	1122	if (note->type == '\\' \|\| note->type == ' ')
Neil Booth	26aea07	2003-04-19 00:22:51 +0000	[diff] [blame]	1123	{
Neil Booth	41c32c9	2003-04-20 19:02:53 +0000	[diff] [blame]	1124	if (note->type == ' ' && !in_comment)
Per Bothner	500bee0	2004-04-22 19:22:27 -0700	[diff] [blame]	1125	cpp_error_with_line (pfile, CPP_DL_WARNING, pfile->line_table->highest_line, col,
Neil Booth	26aea07	2003-04-19 00:22:51 +0000	[diff] [blame]	1126	"backslash and newline separated by space");
Neil Booth	41c32c9	2003-04-20 19:02:53 +0000	[diff] [blame]	1127
Neil Booth	26aea07	2003-04-19 00:22:51 +0000	[diff] [blame]	1128	if (buffer->next_line > buffer->rlimit)
				1129	{
Per Bothner	500bee0	2004-04-22 19:22:27 -0700	[diff] [blame]	1130	cpp_error_with_line (pfile, CPP_DL_PEDWARN, pfile->line_table->highest_line, col,
Neil Booth	26aea07	2003-04-19 00:22:51 +0000	[diff] [blame]	1131	"backslash-newline at end of file");
				1132	/* Prevent "no newline at end of file" warning. */
				1133	buffer->next_line = buffer->rlimit;
				1134	}
				1135
				1136	buffer->line_base = note->pos;
Per Bothner	12f9df4	2004-02-11 07:29:30 -0800	[diff] [blame]	1137	CPP_INCREMENT_LINE (pfile, 0);
Neil Booth	26aea07	2003-04-19 00:22:51 +0000	[diff] [blame]	1138	}
Neil Booth	41c32c9	2003-04-20 19:02:53 +0000	[diff] [blame]	1139	else if (_cpp_trigraph_map[note->type])
				1140	{
Neil Booth	a8eb604	2003-05-04 20:03:55 +0000	[diff] [blame]	1141	if (CPP_OPTION (pfile, warn_trigraphs)
				1142	&& (!in_comment \|\| warn_in_comment (pfile, note)))
Neil Booth	41c32c9	2003-04-20 19:02:53 +0000	[diff] [blame]	1143	{
				1144	if (CPP_OPTION (pfile, trigraphs))
Simon Baldwin	87cf065	2010-04-07 17:18:10 +0000	[diff] [blame]	1145	cpp_warning_with_line (pfile, CPP_W_TRIGRAPHS,
				1146	pfile->line_table->highest_line, col,
				1147	"trigraph ??%c converted to %c",
				1148	note->type,
				1149	(int) _cpp_trigraph_map[note->type]);
Neil Booth	41c32c9	2003-04-20 19:02:53 +0000	[diff] [blame]	1150	else
Geoffrey Keating	905bd7b	2003-07-22 02:21:16 +0000	[diff] [blame]	1151	{
Simon Baldwin	87cf065	2010-04-07 17:18:10 +0000	[diff] [blame]	1152	cpp_warning_with_line
				1153	(pfile, CPP_W_TRIGRAPHS,
				1154	pfile->line_table->highest_line, col,
Geoffrey Keating	905bd7b	2003-07-22 02:21:16 +0000	[diff] [blame]	1155	"trigraph ??%c ignored, use -trigraphs to enable",
				1156	note->type);
				1157	}
Neil Booth	41c32c9	2003-04-20 19:02:53 +0000	[diff] [blame]	1158	}
				1159	}
Jason Merrill	00a81b8	2010-03-29 16:07:29 -0400	[diff] [blame]	1160	else if (note->type == 0)
				1161	/* Already processed in lex_raw_string. */;
Neil Booth	41c32c9	2003-04-20 19:02:53 +0000	[diff] [blame]	1162	else
				1163	abort ();
Neil Booth	26aea07	2003-04-19 00:22:51 +0000	[diff] [blame]	1164	}
Zack Weinberg	c5a0473	2000-04-25 19:32:36 +0000	[diff] [blame]	1165	}
				1166
Neil Booth	0d9f234	2000-09-18 18:43:05 +0000	[diff] [blame]	1167	/* Skip a C-style block comment. We find the end of the comment by
				1168	seeing if an asterisk is before every '/' we encounter. Returns
Neil Booth	6f572ac	2003-04-19 16:34:33 +0000	[diff] [blame]	1169	nonzero if comment terminated by EOF, zero otherwise.
				1170
				1171	Buffer->cur points to the initial asterisk of the comment. */
Neil Booth	26aea07	2003-04-19 00:22:51 +0000	[diff] [blame]	1172	bool
Zack Weinberg	6cf87ca	2003-06-17 06:17:44 +0000	[diff] [blame]	1173	_cpp_skip_block_comment (cpp_reader *pfile)
Zack Weinberg	c5a0473	2000-04-25 19:32:36 +0000	[diff] [blame]	1174	{
				1175	cpp_buffer *buffer = pfile->buffer;
Zack Weinberg	d08dcf8	2003-10-13 18:53:28 +0000	[diff] [blame]	1176	const uchar *cur = buffer->cur;
				1177	uchar c;
Zack Weinberg	c5a0473	2000-04-25 19:32:36 +0000	[diff] [blame]	1178
Zack Weinberg	d08dcf8	2003-10-13 18:53:28 +0000	[diff] [blame]	1179	cur++;
				1180	if (*cur == '/')
				1181	cur++;
Neil Booth	26aea07	2003-04-19 00:22:51 +0000	[diff] [blame]	1182
				1183	for (;;)
Neil Booth	0d9f234	2000-09-18 18:43:05 +0000	[diff] [blame]	1184	{
Neil Booth	0d9f234	2000-09-18 18:43:05 +0000	[diff] [blame]	1185	/* People like decorating comments with '*', so check for '/'
				1186	instead for efficiency. */
Zack Weinberg	d08dcf8	2003-10-13 18:53:28 +0000	[diff] [blame]	1187	c = *cur++;
				1188
Zack Weinberg	c5a0473	2000-04-25 19:32:36 +0000	[diff] [blame]	1189	if (c == '/')
				1190	{
Zack Weinberg	d08dcf8	2003-10-13 18:53:28 +0000	[diff] [blame]	1191	if (cur[-2] == '*')
Neil Booth	0d9f234	2000-09-18 18:43:05 +0000	[diff] [blame]	1192	break;
Zack Weinberg	c5a0473	2000-04-25 19:32:36 +0000	[diff] [blame]	1193
Neil Booth	0d9f234	2000-09-18 18:43:05 +0000	[diff] [blame]	1194	/* Warn about potential nested comments, but not if the '/'
Joseph Myers	a1f300c	2001-11-23 02:05:19 +0000	[diff] [blame]	1195	comes immediately before the true comment delimiter.
Zack Weinberg	c5a0473	2000-04-25 19:32:36 +0000	[diff] [blame]	1196	Don't bother to get it right across escaped newlines. */
Neil Booth	0d9f234	2000-09-18 18:43:05 +0000	[diff] [blame]	1197	if (CPP_OPTION (pfile, warn_comments)
Zack Weinberg	d08dcf8	2003-10-13 18:53:28 +0000	[diff] [blame]	1198	&& cur[0] == '*' && cur[1] != '/')
				1199	{
				1200	buffer->cur = cur;
Simon Baldwin	87cf065	2010-04-07 17:18:10 +0000	[diff] [blame]	1201	cpp_warning_with_line (pfile, CPP_W_COMMENTS,
				1202	pfile->line_table->highest_line,
				1203	CPP_BUF_COL (buffer),
				1204	"\"/*\" within comment");
Zack Weinberg	d08dcf8	2003-10-13 18:53:28 +0000	[diff] [blame]	1205	}
Zack Weinberg	c5a0473	2000-04-25 19:32:36 +0000	[diff] [blame]	1206	}
Neil Booth	26aea07	2003-04-19 00:22:51 +0000	[diff] [blame]	1207	else if (c == '\n')
				1208	{
Per Bothner	12f9df4	2004-02-11 07:29:30 -0800	[diff] [blame]	1209	unsigned int cols;
Zack Weinberg	d08dcf8	2003-10-13 18:53:28 +0000	[diff] [blame]	1210	buffer->cur = cur - 1;
Neil Booth	26aea07	2003-04-19 00:22:51 +0000	[diff] [blame]	1211	_cpp_process_line_notes (pfile, true);
				1212	if (buffer->next_line >= buffer->rlimit)
				1213	return true;
				1214	_cpp_clean_line (pfile);
Per Bothner	12f9df4	2004-02-11 07:29:30 -0800	[diff] [blame]	1215
				1216	cols = buffer->next_line - buffer->line_base;
				1217	CPP_INCREMENT_LINE (pfile, cols);
				1218
Zack Weinberg	d08dcf8	2003-10-13 18:53:28 +0000	[diff] [blame]	1219	cur = buffer->cur;
Neil Booth	26aea07	2003-04-19 00:22:51 +0000	[diff] [blame]	1220	}
Zack Weinberg	c5a0473	2000-04-25 19:32:36 +0000	[diff] [blame]	1221	}
Zack Weinberg	c5a0473	2000-04-25 19:32:36 +0000	[diff] [blame]	1222
Zack Weinberg	d08dcf8	2003-10-13 18:53:28 +0000	[diff] [blame]	1223	buffer->cur = cur;
Neil Booth	a8eb604	2003-05-04 20:03:55 +0000	[diff] [blame]	1224	_cpp_process_line_notes (pfile, true);
Neil Booth	26aea07	2003-04-19 00:22:51 +0000	[diff] [blame]	1225	return false;
Zack Weinberg	c5a0473	2000-04-25 19:32:36 +0000	[diff] [blame]	1226	}
				1227
Neil Booth	480709c	2001-10-21 14:04:42 +0000	[diff] [blame]	1228	/* Skip a C++ line comment, leaving buffer->cur pointing to the
Kazu Hirata	da7d830	2002-09-22 02:03:17 +0000	[diff] [blame]	1229	terminating newline. Handles escaped newlines. Returns nonzero
Neil Booth	480709c	2001-10-21 14:04:42 +0000	[diff] [blame]	1230	if a multiline comment. */
Zack Weinberg	c5a0473	2000-04-25 19:32:36 +0000	[diff] [blame]	1231	static int
Zack Weinberg	6cf87ca	2003-06-17 06:17:44 +0000	[diff] [blame]	1232	skip_line_comment (cpp_reader *pfile)
Neil Booth	0d9f234	2000-09-18 18:43:05 +0000	[diff] [blame]	1233	{
Neil Booth	cbcff6d	2000-09-23 21:41:41 +0000	[diff] [blame]	1234	cpp_buffer *buffer = pfile->buffer;
Manuel López-Ibáñez	1bb6466	2008-07-21 09:33:38 +0000	[diff] [blame]	1235	source_location orig_line = pfile->line_table->highest_line;
Neil Booth	0d9f234	2000-09-18 18:43:05 +0000	[diff] [blame]	1236
Neil Booth	26aea07	2003-04-19 00:22:51 +0000	[diff] [blame]	1237	while (*buffer->cur != '\n')
				1238	buffer->cur++;
Neil Booth	0d9f234	2000-09-18 18:43:05 +0000	[diff] [blame]	1239
Neil Booth	26aea07	2003-04-19 00:22:51 +0000	[diff] [blame]	1240	_cpp_process_line_notes (pfile, true);
Per Bothner	500bee0	2004-04-22 19:22:27 -0700	[diff] [blame]	1241	return orig_line != pfile->line_table->highest_line;
Neil Booth	0d9f234	2000-09-18 18:43:05 +0000	[diff] [blame]	1242	}
				1243
Neil Booth	26aea07	2003-04-19 00:22:51 +0000	[diff] [blame]	1244	/* Skips whitespace, saving the next non-whitespace character. */
Neil Booth	0d9f234	2000-09-18 18:43:05 +0000	[diff] [blame]	1245	static void
Zack Weinberg	6cf87ca	2003-06-17 06:17:44 +0000	[diff] [blame]	1246	skip_whitespace (cpp_reader *pfile, cppchar_t c)
Zack Weinberg	c5a0473	2000-04-25 19:32:36 +0000	[diff] [blame]	1247	{
				1248	cpp_buffer *buffer = pfile->buffer;
Neil Booth	f7d151f	2003-04-19 07:41:15 +0000	[diff] [blame]	1249	bool saw_NUL = false;
Zack Weinberg	c5a0473	2000-04-25 19:32:36 +0000	[diff] [blame]	1250
Neil Booth	0d9f234	2000-09-18 18:43:05 +0000	[diff] [blame]	1251	do
Zack Weinberg	c5a0473	2000-04-25 19:32:36 +0000	[diff] [blame]	1252	{
Neil Booth	91fcd15	2000-07-09 09:19:44 +0000	[diff] [blame]	1253	/* Horizontal space always OK. */
Neil Booth	26aea07	2003-04-19 00:22:51 +0000	[diff] [blame]	1254	if (c == ' ' \|\| c == '\t')
Neil Booth	0d9f234	2000-09-18 18:43:05 +0000	[diff] [blame]	1255	;
Neil Booth	0d9f234	2000-09-18 18:43:05 +0000	[diff] [blame]	1256	/* Just \f \v or \0 left. */
Neil Booth	91fcd15	2000-07-09 09:19:44 +0000	[diff] [blame]	1257	else if (c == '\0')
Neil Booth	f7d151f	2003-04-19 07:41:15 +0000	[diff] [blame]	1258	saw_NUL = true;
Neil Booth	93c80368	2000-10-28 17:59:06 +0000	[diff] [blame]	1259	else if (pfile->state.in_directive && CPP_PEDANTIC (pfile))
Per Bothner	500bee0	2004-04-22 19:22:27 -0700	[diff] [blame]	1260	cpp_error_with_line (pfile, CPP_DL_PEDWARN, pfile->line_table->highest_line,
Neil Booth	ebef4e8	2002-04-14 18:42:47 +0000	[diff] [blame]	1261	CPP_BUF_COL (buffer),
				1262	"%s in preprocessing directive",
				1263	c == '\f' ? "form feed" : "vertical tab");
Zack Weinberg	c5a0473	2000-04-25 19:32:36 +0000	[diff] [blame]	1264
Neil Booth	0d9f234	2000-09-18 18:43:05 +0000	[diff] [blame]	1265	c = *buffer->cur++;
Zack Weinberg	c5a0473	2000-04-25 19:32:36 +0000	[diff] [blame]	1266	}
Kazu Hirata	ec5c56d	2001-08-01 17:57:27 +0000	[diff] [blame]	1267	/* We only want non-vertical space, i.e. ' ' \t \f \v \0. */
Neil Booth	0d9f234	2000-09-18 18:43:05 +0000	[diff] [blame]	1268	while (is_nvspace (c));
Zack Weinberg	c5a0473	2000-04-25 19:32:36 +0000	[diff] [blame]	1269
Neil Booth	f7d151f	2003-04-19 07:41:15 +0000	[diff] [blame]	1270	if (saw_NUL)
John David Anglin	0527bc4	2003-11-01 22:56:54 +0000	[diff] [blame]	1271	cpp_error (pfile, CPP_DL_WARNING, "null character(s) ignored");
Neil Booth	f7d151f	2003-04-19 07:41:15 +0000	[diff] [blame]	1272
Neil Booth	480709c	2001-10-21 14:04:42 +0000	[diff] [blame]	1273	buffer->cur--;
Zack Weinberg	c5a0473	2000-04-25 19:32:36 +0000	[diff] [blame]	1274	}
				1275
Neil Booth	93c80368	2000-10-28 17:59:06 +0000	[diff] [blame]	1276	/* See if the characters of a number token are valid in a name (no
				1277	'.', '+' or '-'). */
				1278	static int
Zack Weinberg	6cf87ca	2003-06-17 06:17:44 +0000	[diff] [blame]	1279	name_p (cpp_reader pfile, const cpp_string string)
Neil Booth	93c80368	2000-10-28 17:59:06 +0000	[diff] [blame]	1280	{
				1281	unsigned int i;
				1282
				1283	for (i = 0; i < string->len; i++)
				1284	if (!is_idchar (string->text[i]))
				1285	return 0;
				1286
Kazu Hirata	df38348	2002-05-22 22:02:16 +0000	[diff] [blame]	1287	return 1;
Neil Booth	93c80368	2000-10-28 17:59:06 +0000	[diff] [blame]	1288	}
				1289
Geoffrey Keating	50668cf	2005-03-15 00:36:33 +0000	[diff] [blame]	1290	/* After parsing an identifier or other sequence, produce a warning about
				1291	sequences not in NFC/NFKC. */
				1292	static void
				1293	warn_about_normalization (cpp_reader *pfile,
				1294	const cpp_token *token,
				1295	const struct normalize_state *s)
				1296	{
				1297	if (CPP_OPTION (pfile, warn_normalize) < NORMALIZE_STATE_RESULT (s)
				1298	&& !pfile->state.skipping)
				1299	{
				1300	/* Make sure that the token is printed using UCNs, even
				1301	if we'd otherwise happily print UTF-8. */
Gabriel Dos Reis	c3f829c	2005-05-28 15:52:48 +0000	[diff] [blame]	1302	unsigned char *buf = XNEWVEC (unsigned char, cpp_token_len (token));
Geoffrey Keating	50668cf	2005-03-15 00:36:33 +0000	[diff] [blame]	1303	size_t sz;
				1304
				1305	sz = cpp_spell_token (pfile, token, buf, false) - buf;
				1306	if (NORMALIZE_STATE_RESULT (s) == normalized_C)
Simon Baldwin	87cf065	2010-04-07 17:18:10 +0000	[diff] [blame]	1307	cpp_warning_with_line (pfile, CPP_W_NORMALIZE, token->src_loc, 0,
				1308	"`%.*s' is not in NFKC", (int) sz, buf);
Geoffrey Keating	50668cf	2005-03-15 00:36:33 +0000	[diff] [blame]	1309	else
Simon Baldwin	87cf065	2010-04-07 17:18:10 +0000	[diff] [blame]	1310	cpp_warning_with_line (pfile, CPP_W_NORMALIZE, token->src_loc, 0,
				1311	"`%.*s' is not in NFC", (int) sz, buf);
Tobias Burnus	55e7f90	2012-10-15 22:08:57 +0200	[diff] [blame]	1312	free (buf);
Geoffrey Keating	50668cf	2005-03-15 00:36:33 +0000	[diff] [blame]	1313	}
				1314	}
				1315
Neil Booth	bced6ed	2003-04-19 11:59:44 +0000	[diff] [blame]	1316	/* Returns TRUE if the sequence starting at buffer->cur is invalid in
Neil Booth	1613e52	2003-04-20 07:29:23 +0000	[diff] [blame]	1317	an identifier. FIRST is TRUE if this starts an identifier. */
Neil Booth	bced6ed	2003-04-19 11:59:44 +0000	[diff] [blame]	1318	static bool
Geoffrey Keating	50668cf	2005-03-15 00:36:33 +0000	[diff] [blame]	1319	forms_identifier_p (cpp_reader *pfile, int first,
				1320	struct normalize_state *state)
Neil Booth	bced6ed	2003-04-19 11:59:44 +0000	[diff] [blame]	1321	{
Neil Booth	1613e52	2003-04-20 07:29:23 +0000	[diff] [blame]	1322	cpp_buffer *buffer = pfile->buffer;
Neil Booth	bced6ed	2003-04-19 11:59:44 +0000	[diff] [blame]	1323
Neil Booth	1613e52	2003-04-20 07:29:23 +0000	[diff] [blame]	1324	if (*buffer->cur == '$')
Neil Booth	bced6ed	2003-04-19 11:59:44 +0000	[diff] [blame]	1325	{
Neil Booth	1613e52	2003-04-20 07:29:23 +0000	[diff] [blame]	1326	if (!CPP_OPTION (pfile, dollars_in_ident))
				1327	return false;
Neil Booth	bced6ed	2003-04-19 11:59:44 +0000	[diff] [blame]	1328
Neil Booth	1613e52	2003-04-20 07:29:23 +0000	[diff] [blame]	1329	buffer->cur++;
Hans-Peter Nilsson	78b8811	2003-06-12 06:09:15 +0000	[diff] [blame]	1330	if (CPP_OPTION (pfile, warn_dollars) && !pfile->state.skipping)
Neil Booth	1613e52	2003-04-20 07:29:23 +0000	[diff] [blame]	1331	{
Hans-Peter Nilsson	78b8811	2003-06-12 06:09:15 +0000	[diff] [blame]	1332	CPP_OPTION (pfile, warn_dollars) = 0;
John David Anglin	0527bc4	2003-11-01 22:56:54 +0000	[diff] [blame]	1333	cpp_error (pfile, CPP_DL_PEDWARN, "'$' in identifier or number");
Neil Booth	1613e52	2003-04-20 07:29:23 +0000	[diff] [blame]	1334	}
				1335
				1336	return true;
				1337	}
				1338
				1339	/* Is this a syntactically valid UCN? */
Joseph Myers	af15a2f	2005-09-20 21:31:37 +0100	[diff] [blame]	1340	if (CPP_OPTION (pfile, extended_identifiers)
Geoffrey Keating	6baba9b	2005-03-15 09:55:41 +0000	[diff] [blame]	1341	&& *buffer->cur == '\\'
Neil Booth	1613e52	2003-04-20 07:29:23 +0000	[diff] [blame]	1342	&& (buffer->cur[1] == 'u' \|\| buffer->cur[1] == 'U'))
				1343	{
Paolo Carlini	fbb2291	2015-07-02 18:54:41 +0000	[diff] [blame]	1344	cppchar_t s;
Neil Booth	1613e52	2003-04-20 07:29:23 +0000	[diff] [blame]	1345	buffer->cur += 2;
Geoffrey Keating	50668cf	2005-03-15 00:36:33 +0000	[diff] [blame]	1346	if (_cpp_valid_ucn (pfile, &buffer->cur, buffer->rlimit, 1 + !first,
David Malcolm	88fa555	2016-08-05 18:08:33 +0000	[diff] [blame]	1347	state, &s, NULL, NULL))
Neil Booth	1613e52	2003-04-20 07:29:23 +0000	[diff] [blame]	1348	return true;
				1349	buffer->cur -= 2;
				1350	}
				1351
				1352	return false;
Neil Booth	bced6ed	2003-04-19 11:59:44 +0000	[diff] [blame]	1353	}
				1354
Kai Tietz	17e7cb8	2009-11-11 18:37:19 +0000	[diff] [blame]	1355	/* Helper function to get the cpp_hashnode of the identifier BASE. */
				1356	static cpp_hashnode *
				1357	lex_identifier_intern (cpp_reader pfile, const uchar base)
				1358	{
				1359	cpp_hashnode *result;
				1360	const uchar *cur;
				1361	unsigned int len;
				1362	unsigned int hash = HT_HASHSTEP (0, *base);
				1363
				1364	cur = base + 1;
				1365	while (ISIDNUM (*cur))
				1366	{
				1367	hash = HT_HASHSTEP (hash, *cur);
				1368	cur++;
				1369	}
				1370	len = cur - base;
				1371	hash = HT_HASHFINISH (hash, len);
				1372	result = CPP_HASHNODE (ht_lookup_with_hash (pfile->hash_table,
				1373	base, len, hash, HT_ALLOC));
				1374
				1375	/* Rarely, identifiers require diagnostics when lexed. */
				1376	if (__builtin_expect ((result->flags & NODE_DIAGNOSTIC)
				1377	&& !pfile->state.skipping, 0))
				1378	{
				1379	/* It is allowed to poison the same identifier twice. */
				1380	if ((result->flags & NODE_POISONED) && !pfile->state.poisoned_ok)
				1381	cpp_error (pfile, CPP_DL_ERROR, "attempt to use poisoned \"%s\"",
				1382	NODE_NAME (result));
				1383
				1384	/* Constraint 6.10.3.5: __VA_ARGS__ should only appear in the
				1385	replacement list of a variadic macro. */
				1386	if (result == pfile->spec_nodes.n__VA_ARGS__
				1387	&& !pfile->state.va_args_ok)
Edward Smith-Rowland	3976796	2014-07-10 22:26:50 +0000	[diff] [blame]	1388	{
				1389	if (CPP_OPTION (pfile, cplusplus))
				1390	cpp_error (pfile, CPP_DL_PEDWARN,
				1391	"__VA_ARGS__ can only appear in the expansion"
				1392	" of a C++11 variadic macro");
				1393	else
				1394	cpp_error (pfile, CPP_DL_PEDWARN,
				1395	"__VA_ARGS__ can only appear in the expansion"
				1396	" of a C99 variadic macro");
				1397	}
Kai Tietz	17e7cb8	2009-11-11 18:37:19 +0000	[diff] [blame]	1398
				1399	/* For -Wc++-compat, warn about use of C++ named operators. */
				1400	if (result->flags & NODE_WARN_OPERATOR)
Simon Baldwin	87cf065	2010-04-07 17:18:10 +0000	[diff] [blame]	1401	cpp_warning (pfile, CPP_W_CXX_OPERATOR_NAMES,
				1402	"identifier \"%s\" is a special operator name in C++",
				1403	NODE_NAME (result));
Kai Tietz	17e7cb8	2009-11-11 18:37:19 +0000	[diff] [blame]	1404	}
				1405
				1406	return result;
				1407	}
				1408
				1409	/* Get the cpp_hashnode of an identifier specified by NAME in
				1410	the current cpp_reader object. If none is found, NULL is returned. */
				1411	cpp_hashnode *
				1412	_cpp_lex_identifier (cpp_reader pfile, const char name)
				1413	{
				1414	cpp_hashnode *result;
				1415	result = lex_identifier_intern (pfile, (uchar *) name);
				1416	return result;
				1417	}
				1418
Neil Booth	bced6ed	2003-04-19 11:59:44 +0000	[diff] [blame]	1419	/* Lex an identifier starting at BUFFER->CUR - 1. */
Neil Booth	0d9f234	2000-09-18 18:43:05 +0000	[diff] [blame]	1420	static cpp_hashnode *
Geoffrey Keating	50668cf	2005-03-15 00:36:33 +0000	[diff] [blame]	1421	lex_identifier (cpp_reader pfile, const uchar base, bool starts_ucn,
Joseph Myers	be5ffc5	2014-11-06 21:08:52 +0000	[diff] [blame]	1422	struct normalize_state nst, cpp_hashnode *spelling)
Zack Weinberg	c5a0473	2000-04-25 19:32:36 +0000	[diff] [blame]	1423	{
Neil Booth	93c80368	2000-10-28 17:59:06 +0000	[diff] [blame]	1424	cpp_hashnode *result;
Geoffrey Keating	47e2049	2005-03-12 10:44:06 +0000	[diff] [blame]	1425	const uchar *cur;
Zack Weinberg	c6e8380	2004-06-05 20:58:06 +0000	[diff] [blame]	1426	unsigned int len;
				1427	unsigned int hash = HT_HASHSTEP (0, *base);
Zack Weinberg	c5a0473	2000-04-25 19:32:36 +0000	[diff] [blame]	1428
Zack Weinberg	c6e8380	2004-06-05 20:58:06 +0000	[diff] [blame]	1429	cur = pfile->buffer->cur;
Geoffrey Keating	47e2049	2005-03-12 10:44:06 +0000	[diff] [blame]	1430	if (! starts_ucn)
Joseph Myers	d3f4ff8	2013-11-16 00:05:08 +0000	[diff] [blame]	1431	{
				1432	while (ISIDNUM (*cur))
				1433	{
				1434	hash = HT_HASHSTEP (hash, *cur);
				1435	cur++;
				1436	}
				1437	NORMALIZE_STATE_UPDATE_IDNUM (nst, *(cur - 1));
				1438	}
Geoffrey Keating	47e2049	2005-03-12 10:44:06 +0000	[diff] [blame]	1439	pfile->buffer->cur = cur;
Geoffrey Keating	50668cf	2005-03-15 00:36:33 +0000	[diff] [blame]	1440	if (starts_ucn \|\| forms_identifier_p (pfile, false, nst))
Neil Booth	10cf9bd	2002-03-22 07:23:21 +0000	[diff] [blame]	1441	{
Geoffrey Keating	47e2049	2005-03-12 10:44:06 +0000	[diff] [blame]	1442	/* Slower version for identifiers containing UCNs (or $). */
				1443	do {
				1444	while (ISIDNUM (*pfile->buffer->cur))
Geoffrey Keating	50668cf	2005-03-15 00:36:33 +0000	[diff] [blame]	1445	{
Joseph Myers	d3f4ff8	2013-11-16 00:05:08 +0000	[diff] [blame]	1446	NORMALIZE_STATE_UPDATE_IDNUM (nst, *pfile->buffer->cur);
Geoffrey Keating	50668cf	2005-03-15 00:36:33 +0000	[diff] [blame]	1447	pfile->buffer->cur++;
Geoffrey Keating	50668cf	2005-03-15 00:36:33 +0000	[diff] [blame]	1448	}
				1449	} while (forms_identifier_p (pfile, false, nst));
Geoffrey Keating	47e2049	2005-03-12 10:44:06 +0000	[diff] [blame]	1450	result = _cpp_interpret_identifier (pfile, base,
				1451	pfile->buffer->cur - base);
Joseph Myers	be5ffc5	2014-11-06 21:08:52 +0000	[diff] [blame]	1452	*spelling = cpp_lookup (pfile, base, pfile->buffer->cur - base);
Zack Weinberg	2c3fcba	2001-09-10 22:34:03 +0000	[diff] [blame]	1453	}
Geoffrey Keating	47e2049	2005-03-12 10:44:06 +0000	[diff] [blame]	1454	else
				1455	{
				1456	len = cur - base;
				1457	hash = HT_HASHFINISH (hash, len);
Zack Weinberg	2c3fcba	2001-09-10 22:34:03 +0000	[diff] [blame]	1458
Tom Tromey	2bf41bf	2008-02-20 02:16:43 +0000	[diff] [blame]	1459	result = CPP_HASHNODE (ht_lookup_with_hash (pfile->hash_table,
				1460	base, len, hash, HT_ALLOC));
Joseph Myers	be5ffc5	2014-11-06 21:08:52 +0000	[diff] [blame]	1461	*spelling = result;
Geoffrey Keating	47e2049	2005-03-12 10:44:06 +0000	[diff] [blame]	1462	}
Neil Booth	bced6ed	2003-04-19 11:59:44 +0000	[diff] [blame]	1463
				1464	/* Rarely, identifiers require diagnostics when lexed. */
Zack Weinberg	2c3fcba	2001-09-10 22:34:03 +0000	[diff] [blame]	1465	if (__builtin_expect ((result->flags & NODE_DIAGNOSTIC)
				1466	&& !pfile->state.skipping, 0))
				1467	{
				1468	/* It is allowed to poison the same identifier twice. */
				1469	if ((result->flags & NODE_POISONED) && !pfile->state.poisoned_ok)
John David Anglin	0527bc4	2003-11-01 22:56:54 +0000	[diff] [blame]	1470	cpp_error (pfile, CPP_DL_ERROR, "attempt to use poisoned \"%s\"",
Zack Weinberg	2c3fcba	2001-09-10 22:34:03 +0000	[diff] [blame]	1471	NODE_NAME (result));
				1472
				1473	/* Constraint 6.10.3.5: __VA_ARGS__ should only appear in the
				1474	replacement list of a variadic macro. */
				1475	if (result == pfile->spec_nodes.n__VA_ARGS__
				1476	&& !pfile->state.va_args_ok)
Edward Smith-Rowland	3976796	2014-07-10 22:26:50 +0000	[diff] [blame]	1477	{
				1478	if (CPP_OPTION (pfile, cplusplus))
				1479	cpp_error (pfile, CPP_DL_PEDWARN,
				1480	"__VA_ARGS__ can only appear in the expansion"
				1481	" of a C++11 variadic macro");
				1482	else
				1483	cpp_error (pfile, CPP_DL_PEDWARN,
				1484	"__VA_ARGS__ can only appear in the expansion"
				1485	" of a C99 variadic macro");
				1486	}
Ian Lance Taylor	3d8b2a9	2009-06-12 19:43:25 +0000	[diff] [blame]	1487
				1488	/* For -Wc++-compat, warn about use of C++ named operators. */
				1489	if (result->flags & NODE_WARN_OPERATOR)
Simon Baldwin	87cf065	2010-04-07 17:18:10 +0000	[diff] [blame]	1490	cpp_warning (pfile, CPP_W_CXX_OPERATOR_NAMES,
				1491	"identifier \"%s\" is a special operator name in C++",
				1492	NODE_NAME (result));
Zack Weinberg	2c3fcba	2001-09-10 22:34:03 +0000	[diff] [blame]	1493	}
				1494
				1495	return result;
				1496	}
				1497
Neil Booth	bced6ed	2003-04-19 11:59:44 +0000	[diff] [blame]	1498	/* Lex a number to NUMBER starting at BUFFER->CUR - 1. */
Zack Weinberg	c5a0473	2000-04-25 19:32:36 +0000	[diff] [blame]	1499	static void
Geoffrey Keating	50668cf	2005-03-15 00:36:33 +0000	[diff] [blame]	1500	lex_number (cpp_reader pfile, cpp_string number,
				1501	struct normalize_state *nst)
Zack Weinberg	c5a0473	2000-04-25 19:32:36 +0000	[diff] [blame]	1502	{
Neil Booth	562a5c2	2002-04-21 18:46:42 +0000	[diff] [blame]	1503	const uchar *cur;
Neil Booth	bced6ed	2003-04-19 11:59:44 +0000	[diff] [blame]	1504	const uchar *base;
				1505	uchar *dest;
Zack Weinberg	c5a0473	2000-04-25 19:32:36 +0000	[diff] [blame]	1506
Neil Booth	bced6ed	2003-04-19 11:59:44 +0000	[diff] [blame]	1507	base = pfile->buffer->cur - 1;
				1508	do
Neil Booth	93c80368	2000-10-28 17:59:06 +0000	[diff] [blame]	1509	{
Neil Booth	bced6ed	2003-04-19 11:59:44 +0000	[diff] [blame]	1510	cur = pfile->buffer->cur;
Neil Booth	10cf9bd	2002-03-22 07:23:21 +0000	[diff] [blame]	1511
Neil Booth	bced6ed	2003-04-19 11:59:44 +0000	[diff] [blame]	1512	/* N.B. ISIDNUM does not include $. */
Edward Smith-Rowland	7057e64	2013-10-31 14:01:23 +0000	[diff] [blame]	1513	while (ISIDNUM (cur) \|\| cur == '.' \|\| DIGIT_SEP (*cur)
				1514	\|\| VALID_SIGN (*cur, cur[-1]))
Geoffrey Keating	50668cf	2005-03-15 00:36:33 +0000	[diff] [blame]	1515	{
Joseph Myers	d3f4ff8	2013-11-16 00:05:08 +0000	[diff] [blame]	1516	NORMALIZE_STATE_UPDATE_IDNUM (nst, *cur);
Geoffrey Keating	50668cf	2005-03-15 00:36:33 +0000	[diff] [blame]	1517	cur++;
Geoffrey Keating	50668cf	2005-03-15 00:36:33 +0000	[diff] [blame]	1518	}
Edward Smith-Rowland	a5858a3	2015-03-17 00:50:55 +0000	[diff] [blame]	1519	/* A number can't end with a digit separator. */
				1520	while (cur > pfile->buffer->cur && DIGIT_SEP (cur[-1]))
				1521	--cur;
Neil Booth	10cf9bd	2002-03-22 07:23:21 +0000	[diff] [blame]	1522
Neil Booth	10cf9bd	2002-03-22 07:23:21 +0000	[diff] [blame]	1523	pfile->buffer->cur = cur;
Neil Booth	93c80368	2000-10-28 17:59:06 +0000	[diff] [blame]	1524	}
Geoffrey Keating	50668cf	2005-03-15 00:36:33 +0000	[diff] [blame]	1525	while (forms_identifier_p (pfile, false, nst));
Neil Booth	bced6ed	2003-04-19 11:59:44 +0000	[diff] [blame]	1526
				1527	number->len = cur - base;
				1528	dest = _cpp_unaligned_alloc (pfile, number->len + 1);
				1529	memcpy (dest, base, number->len);
				1530	dest[number->len] = '\0';
				1531	number->text = dest;
Neil Booth	0d9f234	2000-09-18 18:43:05 +0000	[diff] [blame]	1532	}
				1533
Neil Booth	6338b35	2003-04-23 22:44:06 +0000	[diff] [blame]	1534	/* Create a token of type TYPE with a literal spelling. */
Zack Weinberg	c5a0473	2000-04-25 19:32:36 +0000	[diff] [blame]	1535	static void
Zack Weinberg	6cf87ca	2003-06-17 06:17:44 +0000	[diff] [blame]	1536	create_literal (cpp_reader pfile, cpp_token token, const uchar *base,
				1537	unsigned int len, enum cpp_ttype type)
Neil Booth	0d9f234	2000-09-18 18:43:05 +0000	[diff] [blame]	1538	{
Neil Booth	6338b35	2003-04-23 22:44:06 +0000	[diff] [blame]	1539	uchar *dest = _cpp_unaligned_alloc (pfile, len + 1);
Neil Booth	0d9f234	2000-09-18 18:43:05 +0000	[diff] [blame]	1540
Neil Booth	6338b35	2003-04-23 22:44:06 +0000	[diff] [blame]	1541	memcpy (dest, base, len);
				1542	dest[len] = '\0';
				1543	token->type = type;
				1544	token->val.str.len = len;
				1545	token->val.str.text = dest;
				1546	}
				1547
Jason Merrill	00a81b8	2010-03-29 16:07:29 -0400	[diff] [blame]	1548	/* Subroutine of lex_raw_string: Append LEN chars from BASE to the buffer
				1549	sequence from FIRST_BUFF_P to LAST_BUFF_P. /
				1550
				1551	static void
				1552	bufring_append (cpp_reader pfile, const uchar base, size_t len,
				1553	_cpp_buff first_buff_p, _cpp_buff last_buff_p)
				1554	{
				1555	_cpp_buff first_buff = first_buff_p;
				1556	_cpp_buff last_buff = last_buff_p;
				1557
				1558	if (first_buff == NULL)
				1559	first_buff = last_buff = _cpp_get_buff (pfile, len);
				1560	else if (len > BUFF_ROOM (last_buff))
				1561	{
				1562	size_t room = BUFF_ROOM (last_buff);
				1563	memcpy (BUFF_FRONT (last_buff), base, room);
				1564	BUFF_FRONT (last_buff) += room;
				1565	base += room;
				1566	len -= room;
				1567	last_buff = _cpp_append_extend_buff (pfile, last_buff, len);
				1568	}
				1569
				1570	memcpy (BUFF_FRONT (last_buff), base, len);
				1571	BUFF_FRONT (last_buff) += len;
				1572
				1573	*first_buff_p = first_buff;
				1574	*last_buff_p = last_buff;
				1575	}
				1576
Ed Smith-Rowland	c865f92	2013-06-29 03:41:58 +0000	[diff] [blame]	1577
				1578	/* Returns true if a macro has been defined.
				1579	This might not work if compile with -save-temps,
				1580	or preprocess separately from compilation. */
				1581
				1582	static bool
				1583	is_macro(cpp_reader pfile, const uchar base)
				1584	{
				1585	const uchar *cur = base;
				1586	if (! ISIDST (*cur))
				1587	return false;
				1588	unsigned int hash = HT_HASHSTEP (0, *cur);
				1589	++cur;
				1590	while (ISIDNUM (*cur))
				1591	{
				1592	hash = HT_HASHSTEP (hash, *cur);
				1593	++cur;
				1594	}
				1595	hash = HT_HASHFINISH (hash, cur - base);
				1596
				1597	cpp_hashnode *result = CPP_HASHNODE (ht_lookup_with_hash (pfile->hash_table,
				1598	base, cur - base, hash, HT_NO_INSERT));
				1599
				1600	return !result ? false : (result->type == NT_MACRO);
				1601	}
				1602
				1603
Jakub Jelinek	2c6e3f5	2009-10-19 23:41:15 +0200	[diff] [blame]	1604	/* Lexes a raw string. The stored string contains the spelling, including
Jason Merrill	00a81b8	2010-03-29 16:07:29 -0400	[diff] [blame]	1605	double quotes, delimiter string, '(' and ')', any leading
Jakub Jelinek	2c6e3f5	2009-10-19 23:41:15 +0200	[diff] [blame]	1606	'L', 'u', 'U' or 'u8' and 'R' modifier. It returns the type of the
				1607	literal, or CPP_OTHER if it was not properly terminated.
				1608
				1609	The spelling is NUL-terminated, but it is not guaranteed that this
				1610	is the first NUL since embedded NULs are preserved. */
				1611
				1612	static void
				1613	lex_raw_string (cpp_reader pfile, cpp_token token, const uchar *base,
				1614	const uchar *cur)
				1615	{
Jakub Jelinek	8cf8873	2013-07-21 04:28:03 +0200	[diff] [blame]	1616	uchar raw_prefix[17];
				1617	uchar temp_buffer[18];
				1618	const uchar *orig_base;
				1619	unsigned int raw_prefix_len = 0, raw_suffix_len = 0;
				1620	enum raw_str_phase { RAW_STR_PREFIX, RAW_STR, RAW_STR_SUFFIX };
				1621	raw_str_phase phase = RAW_STR_PREFIX;
Jakub Jelinek	2c6e3f5	2009-10-19 23:41:15 +0200	[diff] [blame]	1622	enum cpp_ttype type;
				1623	size_t total_len = 0;
Jakub Jelinek	8cf8873	2013-07-21 04:28:03 +0200	[diff] [blame]	1624	/* Index into temp_buffer during phases other than RAW_STR,
				1625	during RAW_STR phase 17 to tell BUF_APPEND that nothing should
				1626	be appended to temp_buffer. */
				1627	size_t temp_buffer_len = 0;
Jakub Jelinek	2c6e3f5	2009-10-19 23:41:15 +0200	[diff] [blame]	1628	_cpp_buff first_buff = NULL, last_buff = NULL;
Jakub Jelinek	8cf8873	2013-07-21 04:28:03 +0200	[diff] [blame]	1629	size_t raw_prefix_start;
Jason Merrill	00a81b8	2010-03-29 16:07:29 -0400	[diff] [blame]	1630	_cpp_line_note *note = &pfile->buffer->notes[pfile->buffer->cur_note];
Jakub Jelinek	2c6e3f5	2009-10-19 23:41:15 +0200	[diff] [blame]	1631
				1632	type = (*base == 'L' ? CPP_WSTRING :
				1633	*base == 'U' ? CPP_STRING32 :
				1634	*base == 'u' ? (base[1] == '8' ? CPP_UTF8STRING : CPP_STRING16)
				1635	: CPP_STRING);
				1636
Jason Merrill	00a81b8	2010-03-29 16:07:29 -0400	[diff] [blame]	1637	#define BUF_APPEND(STR,LEN) \
				1638	do { \
				1639	bufring_append (pfile, (const uchar *)(STR), (LEN), \
				1640	&first_buff, &last_buff); \
				1641	total_len += (LEN); \
Jakub Jelinek	8cf8873	2013-07-21 04:28:03 +0200	[diff] [blame]	1642	if (__builtin_expect (temp_buffer_len < 17, 0) \
				1643	&& (const uchar *)(STR) != base \
				1644	&& (LEN) <= 2) \
				1645	{ \
				1646	memcpy (temp_buffer + temp_buffer_len, \
				1647	(const uchar *)(STR), (LEN)); \
				1648	temp_buffer_len += (LEN); \
				1649	} \
Tom de Vries	c830c7d5	2017-11-05 09:58:16 +0000	[diff] [blame]	1650	} while (0)
Jason Merrill	00a81b8	2010-03-29 16:07:29 -0400	[diff] [blame]	1651
Jakub Jelinek	8cf8873	2013-07-21 04:28:03 +0200	[diff] [blame]	1652	orig_base = base;
				1653	++cur;
				1654	raw_prefix_start = cur - base;
				1655	for (;;)
				1656	{
Jason Merrill	00a81b8	2010-03-29 16:07:29 -0400	[diff] [blame]	1657	cppchar_t c;
				1658
				1659	/* If we previously performed any trigraph or line splicing
Jakub Jelinek	8cf8873	2013-07-21 04:28:03 +0200	[diff] [blame]	1660	transformations, undo them in between the opening and closing
				1661	double quote. */
Jason Merrill	00a81b8	2010-03-29 16:07:29 -0400	[diff] [blame]	1662	while (note->pos < cur)
				1663	++note;
				1664	for (; note->pos == cur; ++note)
				1665	{
				1666	switch (note->type)
				1667	{
				1668	case '\\':
				1669	case ' ':
				1670	/* Restore backslash followed by newline. */
				1671	BUF_APPEND (base, cur - base);
				1672	base = cur;
				1673	BUF_APPEND ("\\", 1);
				1674	after_backslash:
				1675	if (note->type == ' ')
				1676	{
				1677	/* GNU backslash whitespace newline extension. FIXME
				1678	could be any sequence of non-vertical space. When we
				1679	can properly restore any such sequence, we should mark
				1680	this note as handled so _cpp_process_line_notes
				1681	doesn't warn. */
				1682	BUF_APPEND (" ", 1);
				1683	}
				1684
				1685	BUF_APPEND ("\n", 1);
				1686	break;
				1687
				1688	case 0:
				1689	/* Already handled. */
				1690	break;
				1691
				1692	default:
				1693	if (_cpp_trigraph_map[note->type])
				1694	{
				1695	/* Don't warn about this trigraph in
				1696	_cpp_process_line_notes, since trigraphs show up as
				1697	trigraphs in raw strings. */
Jakub Jelinek	d947ada	2010-04-06 09:02:40 +0200	[diff] [blame]	1698	uchar type = note->type;
Jason Merrill	00a81b8	2010-03-29 16:07:29 -0400	[diff] [blame]	1699	note->type = 0;
				1700
				1701	if (!CPP_OPTION (pfile, trigraphs))
				1702	/* If we didn't convert the trigraph in the first
				1703	place, don't do anything now either. */
				1704	break;
				1705
				1706	BUF_APPEND (base, cur - base);
				1707	base = cur;
				1708	BUF_APPEND ("??", 2);
				1709
				1710	/* ??/ followed by newline gets two line notes, one for
				1711	the trigraph and one for the backslash/newline. */
				1712	if (type == '/' && note[1].pos == cur)
				1713	{
				1714	if (note[1].type != '\\'
				1715	&& note[1].type != ' ')
				1716	abort ();
				1717	BUF_APPEND ("/", 1);
				1718	++note;
				1719	goto after_backslash;
				1720	}
Jason Merrill	00a81b8	2010-03-29 16:07:29 -0400	[diff] [blame]	1721	else
				1722	{
				1723	/* Skip the replacement character. */
				1724	base = ++cur;
				1725	BUF_APPEND (&type, 1);
Jakub Jelinek	8cf8873	2013-07-21 04:28:03 +0200	[diff] [blame]	1726	c = type;
				1727	goto check_c;
Jason Merrill	00a81b8	2010-03-29 16:07:29 -0400	[diff] [blame]	1728	}
				1729	}
				1730	else
				1731	abort ();
				1732	break;
				1733	}
				1734	}
				1735	c = *cur++;
Jakub Jelinek	8cf8873	2013-07-21 04:28:03 +0200	[diff] [blame]	1736	if (__builtin_expect (temp_buffer_len < 17, 0))
				1737	temp_buffer[temp_buffer_len++] = c;
Jakub Jelinek	2c6e3f5	2009-10-19 23:41:15 +0200	[diff] [blame]	1738
Jakub Jelinek	8cf8873	2013-07-21 04:28:03 +0200	[diff] [blame]	1739	check_c:
				1740	if (phase == RAW_STR_PREFIX)
Jakub Jelinek	2c6e3f5	2009-10-19 23:41:15 +0200	[diff] [blame]	1741	{
Jakub Jelinek	8cf8873	2013-07-21 04:28:03 +0200	[diff] [blame]	1742	while (raw_prefix_len < temp_buffer_len)
				1743	{
				1744	raw_prefix[raw_prefix_len] = temp_buffer[raw_prefix_len];
				1745	switch (raw_prefix[raw_prefix_len])
				1746	{
				1747	case ' ': case '(': case ')': case '\\': case '\t':
				1748	case '\v': case '\f': case '\n': default:
				1749	break;
				1750	/* Basic source charset except the above chars. */
				1751	case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
				1752	case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':
				1753	case 'm': case 'n': case 'o': case 'p': case 'q': case 'r':
				1754	case 's': case 't': case 'u': case 'v': case 'w': case 'x':
				1755	case 'y': case 'z':
				1756	case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
				1757	case 'G': case 'H': case 'I': case 'J': case 'K': case 'L':
				1758	case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R':
				1759	case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
				1760	case 'Y': case 'Z':
				1761	case '0': case '1': case '2': case '3': case '4': case '5':
				1762	case '6': case '7': case '8': case '9':
				1763	case '_': case '{': case '}': case '#': case '[': case ']':
				1764	case '<': case '>': case '%': case ':': case ';': case '.':
				1765	case '?': case '*': case '+': case '-': case '/': case '^':
				1766	case '&': case '\|': case '~': case '!': case '=': case ',':
				1767	case '"': case '\'':
				1768	if (raw_prefix_len < 16)
				1769	{
				1770	raw_prefix_len++;
				1771	continue;
				1772	}
				1773	break;
				1774	}
				1775
				1776	if (raw_prefix[raw_prefix_len] != '(')
				1777	{
				1778	int col = CPP_BUF_COLUMN (pfile->buffer, cur) + 1;
				1779	if (raw_prefix_len == 16)
				1780	cpp_error_with_line (pfile, CPP_DL_ERROR, token->src_loc,
				1781	col, "raw string delimiter longer "
				1782	"than 16 characters");
				1783	else if (raw_prefix[raw_prefix_len] == '\n')
				1784	cpp_error_with_line (pfile, CPP_DL_ERROR, token->src_loc,
				1785	col, "invalid new-line in raw "
				1786	"string delimiter");
				1787	else
				1788	cpp_error_with_line (pfile, CPP_DL_ERROR, token->src_loc,
				1789	col, "invalid character '%c' in "
				1790	"raw string delimiter",
				1791	(int) raw_prefix[raw_prefix_len]);
				1792	pfile->buffer->cur = orig_base + raw_prefix_start - 1;
				1793	create_literal (pfile, token, orig_base,
				1794	raw_prefix_start - 1, CPP_OTHER);
				1795	if (first_buff)
				1796	_cpp_release_buff (pfile, first_buff);
				1797	return;
				1798	}
				1799	raw_prefix[raw_prefix_len] = '"';
				1800	phase = RAW_STR;
				1801	/* Nothing should be appended to temp_buffer during
				1802	RAW_STR phase. */
				1803	temp_buffer_len = 17;
				1804	break;
				1805	}
				1806	continue;
				1807	}
				1808	else if (phase == RAW_STR_SUFFIX)
				1809	{
				1810	while (raw_suffix_len <= raw_prefix_len
				1811	&& raw_suffix_len < temp_buffer_len
				1812	&& temp_buffer[raw_suffix_len] == raw_prefix[raw_suffix_len])
				1813	raw_suffix_len++;
				1814	if (raw_suffix_len > raw_prefix_len)
				1815	break;
				1816	if (raw_suffix_len == temp_buffer_len)
				1817	continue;
				1818	phase = RAW_STR;
				1819	/* Nothing should be appended to temp_buffer during
				1820	RAW_STR phase. */
				1821	temp_buffer_len = 17;
				1822	}
				1823	if (c == ')')
				1824	{
				1825	phase = RAW_STR_SUFFIX;
				1826	raw_suffix_len = 0;
				1827	temp_buffer_len = 0;
Jakub Jelinek	2c6e3f5	2009-10-19 23:41:15 +0200	[diff] [blame]	1828	}
				1829	else if (c == '\n')
				1830	{
				1831	if (pfile->state.in_directive
Jakub Jelinek	d5e4835	2013-07-10 18:52:19 +0200	[diff] [blame]	1832	\|\| (pfile->state.parsing_args
				1833	&& pfile->buffer->next_line >= pfile->buffer->rlimit))
Jakub Jelinek	2c6e3f5	2009-10-19 23:41:15 +0200	[diff] [blame]	1834	{
				1835	cur--;
				1836	type = CPP_OTHER;
				1837	cpp_error_with_line (pfile, CPP_DL_ERROR, token->src_loc, 0,
				1838	"unterminated raw string");
				1839	break;
				1840	}
				1841
Jason Merrill	00a81b8	2010-03-29 16:07:29 -0400	[diff] [blame]	1842	BUF_APPEND (base, cur - base);
Jakub Jelinek	2c6e3f5	2009-10-19 23:41:15 +0200	[diff] [blame]	1843
				1844	if (pfile->buffer->cur < pfile->buffer->rlimit)
				1845	CPP_INCREMENT_LINE (pfile, 0);
				1846	pfile->buffer->need_line = true;
				1847
Jason Merrill	00a81b8	2010-03-29 16:07:29 -0400	[diff] [blame]	1848	pfile->buffer->cur = cur-1;
				1849	_cpp_process_line_notes (pfile, false);
Jakub Jelinek	2c6e3f5	2009-10-19 23:41:15 +0200	[diff] [blame]	1850	if (!_cpp_get_fresh_line (pfile))
				1851	{
				1852	source_location src_loc = token->src_loc;
				1853	token->type = CPP_EOF;
				1854	/* Tell the compiler the line number of the EOF token. */
				1855	token->src_loc = pfile->line_table->highest_line;
				1856	token->flags = BOL;
				1857	if (first_buff != NULL)
				1858	_cpp_release_buff (pfile, first_buff);
				1859	cpp_error_with_line (pfile, CPP_DL_ERROR, src_loc, 0,
				1860	"unterminated raw string");
				1861	return;
				1862	}
				1863
				1864	cur = base = pfile->buffer->cur;
Jason Merrill	00a81b8	2010-03-29 16:07:29 -0400	[diff] [blame]	1865	note = &pfile->buffer->notes[pfile->buffer->cur_note];
Jakub Jelinek	2c6e3f5	2009-10-19 23:41:15 +0200	[diff] [blame]	1866	}
Jakub Jelinek	2c6e3f5	2009-10-19 23:41:15 +0200	[diff] [blame]	1867	}
				1868
Ed Smith-Rowland	3ce4f9e	2011-10-26 19:30:59 +0000	[diff] [blame]	1869	if (CPP_OPTION (pfile, user_literals))
				1870	{
Ed Smith-Rowland	c865f92	2013-06-29 03:41:58 +0000	[diff] [blame]	1871	/* If a string format macro, say from inttypes.h, is placed touching
				1872	a string literal it could be parsed as a C++11 user-defined string
				1873	literal thus breaking the program.
Mukesh Kapoor	7d19c46	2017-11-06 10:33:41 +0000	[diff] [blame]	1874	Try to identify macros with is_macro. A warning is issued.
				1875	The macro name should not start with '_' for this warning. */
				1876	if ((*cur != '_') && is_macro (pfile, cur))
Ollie Wild	7f5f5f9	2012-04-27 14:29:32 +0000	[diff] [blame]	1877	{
Dodji Seketeli	112448b	2012-04-29 16:27:08 +0000	[diff] [blame]	1878	/* Raise a warning, but do not consume subsequent tokens. */
Edward Smith-Rowland	7aee864	2014-07-09 13:33:58 +0000	[diff] [blame]	1879	if (CPP_OPTION (pfile, warn_literal_suffix) && !pfile->state.skipping)
Ollie Wild	7f5f5f9	2012-04-27 14:29:32 +0000	[diff] [blame]	1880	cpp_warning_with_line (pfile, CPP_W_LITERAL_SUFFIX,
				1881	token->src_loc, 0,
				1882	"invalid suffix on literal; C++11 requires "
Ed Smith-Rowland	c865f92	2013-06-29 03:41:58 +0000	[diff] [blame]	1883	"a space between literal and string macro");
Ollie Wild	7f5f5f9	2012-04-27 14:29:32 +0000	[diff] [blame]	1884	}
Ed Smith-Rowland	3ce4f9e	2011-10-26 19:30:59 +0000	[diff] [blame]	1885	/* Grab user defined literal suffix. */
Ed Smith-Rowland	561f7fc	2013-02-14 02:55:42 +0000	[diff] [blame]	1886	else if (ISIDST (*cur))
Ed Smith-Rowland	3ce4f9e	2011-10-26 19:30:59 +0000	[diff] [blame]	1887	{
				1888	type = cpp_userdef_string_add_type (type);
				1889	++cur;
Ollie Wild	7f5f5f9	2012-04-27 14:29:32 +0000	[diff] [blame]	1890
				1891	while (ISIDNUM (*cur))
				1892	++cur;
Ed Smith-Rowland	3ce4f9e	2011-10-26 19:30:59 +0000	[diff] [blame]	1893	}
Ed Smith-Rowland	3ce4f9e	2011-10-26 19:30:59 +0000	[diff] [blame]	1894	}
				1895
Jakub Jelinek	2c6e3f5	2009-10-19 23:41:15 +0200	[diff] [blame]	1896	pfile->buffer->cur = cur;
				1897	if (first_buff == NULL)
				1898	create_literal (pfile, token, base, cur - base, type);
				1899	else
				1900	{
				1901	uchar *dest = _cpp_unaligned_alloc (pfile, total_len + (cur - base) + 1);
				1902
				1903	token->type = type;
				1904	token->val.str.len = total_len + (cur - base);
				1905	token->val.str.text = dest;
				1906	last_buff = first_buff;
				1907	while (last_buff != NULL)
				1908	{
				1909	memcpy (dest, last_buff->base,
				1910	BUFF_FRONT (last_buff) - last_buff->base);
				1911	dest += BUFF_FRONT (last_buff) - last_buff->base;
				1912	last_buff = last_buff->next;
				1913	}
				1914	_cpp_release_buff (pfile, first_buff);
				1915	memcpy (dest, base, cur - base);
				1916	dest[cur - base] = '\0';
				1917	}
				1918	}
				1919
Neil Booth	6338b35	2003-04-23 22:44:06 +0000	[diff] [blame]	1920	/* Lexes a string, character constant, or angle-bracketed header file
				1921	name. The stored string contains the spelling, including opening
Jakub Jelinek	2c6e3f5	2009-10-19 23:41:15 +0200	[diff] [blame]	1922	quote and any leading 'L', 'u', 'U' or 'u8' and optional
				1923	'R' modifier. It returns the type of the literal, or CPP_OTHER
				1924	if it was not properly terminated, or CPP_LESS for an unterminated
				1925	header name which must be relexed as normal tokens.
Neil Booth	6338b35	2003-04-23 22:44:06 +0000	[diff] [blame]	1926
				1927	The spelling is NUL-terminated, but it is not guaranteed that this
				1928	is the first NUL since embedded NULs are preserved. */
				1929	static void
Zack Weinberg	6cf87ca	2003-06-17 06:17:44 +0000	[diff] [blame]	1930	lex_string (cpp_reader pfile, cpp_token token, const uchar *base)
Neil Booth	6338b35	2003-04-23 22:44:06 +0000	[diff] [blame]	1931	{
				1932	bool saw_NUL = false;
				1933	const uchar *cur;
				1934	cppchar_t terminator;
				1935	enum cpp_ttype type;
				1936
				1937	cur = base;
				1938	terminator = *cur++;
Jakub Jelinek	2c6e3f5	2009-10-19 23:41:15 +0200	[diff] [blame]	1939	if (terminator == 'L' \|\| terminator == 'U')
Neil Booth	6338b35	2003-04-23 22:44:06 +0000	[diff] [blame]	1940	terminator = *cur++;
Jakub Jelinek	2c6e3f5	2009-10-19 23:41:15 +0200	[diff] [blame]	1941	else if (terminator == 'u')
				1942	{
				1943	terminator = *cur++;
				1944	if (terminator == '8')
				1945	terminator = *cur++;
				1946	}
				1947	if (terminator == 'R')
				1948	{
				1949	lex_raw_string (pfile, token, base, cur);
				1950	return;
				1951	}
				1952	if (terminator == '"')
Kris Van Hees	b6baa67	2008-04-18 13:58:08 +0000	[diff] [blame]	1953	type = (*base == 'L' ? CPP_WSTRING :
				1954	*base == 'U' ? CPP_STRING32 :
Jakub Jelinek	2c6e3f5	2009-10-19 23:41:15 +0200	[diff] [blame]	1955	*base == 'u' ? (base[1] == '8' ? CPP_UTF8STRING : CPP_STRING16)
				1956	: CPP_STRING);
Neil Booth	6338b35	2003-04-23 22:44:06 +0000	[diff] [blame]	1957	else if (terminator == '\'')
Kris Van Hees	b6baa67	2008-04-18 13:58:08 +0000	[diff] [blame]	1958	type = (*base == 'L' ? CPP_WCHAR :
				1959	*base == 'U' ? CPP_CHAR32 :
Edward Smith-Rowland	fe95b03	2015-06-30 12:58:48 +0000	[diff] [blame]	1960	*base == 'u' ? (base[1] == '8' ? CPP_UTF8CHAR : CPP_CHAR16)
				1961	: CPP_CHAR);
Neil Booth	6338b35	2003-04-23 22:44:06 +0000	[diff] [blame]	1962	else
				1963	terminator = '>', type = CPP_HEADER_NAME;
Neil Booth	93c80368	2000-10-28 17:59:06 +0000	[diff] [blame]	1964
Neil Booth	0d9f234	2000-09-18 18:43:05 +0000	[diff] [blame]	1965	for (;;)
				1966	{
Neil Booth	6338b35	2003-04-23 22:44:06 +0000	[diff] [blame]	1967	cppchar_t c = *cur++;
Neil Booth	7868b4a	2001-03-04 12:02:02 +0000	[diff] [blame]	1968
Neil Booth	6f572ac	2003-04-19 16:34:33 +0000	[diff] [blame]	1969	/* In #include-style directives, terminators are not escapable. */
Neil Booth	6338b35	2003-04-23 22:44:06 +0000	[diff] [blame]	1970	if (c == '\\' && !pfile->state.angled_headers && *cur != '\n')
				1971	cur++;
				1972	else if (c == terminator)
Neil Booth	bced6ed	2003-04-19 11:59:44 +0000	[diff] [blame]	1973	break;
Neil Booth	6338b35	2003-04-23 22:44:06 +0000	[diff] [blame]	1974	else if (c == '\n')
Neil Booth	0d9f234	2000-09-18 18:43:05 +0000	[diff] [blame]	1975	{
Neil Booth	6338b35	2003-04-23 22:44:06 +0000	[diff] [blame]	1976	cur--;
Joseph Myers	4bb09c2	2009-02-21 21:25:39 +0000	[diff] [blame]	1977	/* Unmatched quotes always yield undefined behavior, but
				1978	greedy lexing means that what appears to be an unterminated
				1979	header name may actually be a legitimate sequence of tokens. */
				1980	if (terminator == '>')
				1981	{
				1982	token->type = CPP_LESS;
				1983	return;
				1984	}
Neil Booth	6338b35	2003-04-23 22:44:06 +0000	[diff] [blame]	1985	type = CPP_OTHER;
				1986	break;
Neil Booth	0d9f234	2000-09-18 18:43:05 +0000	[diff] [blame]	1987	}
Neil Booth	6338b35	2003-04-23 22:44:06 +0000	[diff] [blame]	1988	else if (c == '\0')
				1989	saw_NUL = true;
Neil Booth	0d9f234	2000-09-18 18:43:05 +0000	[diff] [blame]	1990	}
				1991
Neil Booth	6338b35	2003-04-23 22:44:06 +0000	[diff] [blame]	1992	if (saw_NUL && !pfile->state.skipping)
John David Anglin	0527bc4	2003-11-01 22:56:54 +0000	[diff] [blame]	1993	cpp_error (pfile, CPP_DL_WARNING,
				1994	"null character(s) preserved in literal");
Neil Booth	0d9f234	2000-09-18 18:43:05 +0000	[diff] [blame]	1995
Joseph Myers	c663e30	2006-09-13 02:04:18 +0100	[diff] [blame]	1996	if (type == CPP_OTHER && CPP_OPTION (pfile, lang) != CLK_ASM)
				1997	cpp_error (pfile, CPP_DL_PEDWARN, "missing terminating %c character",
				1998	(int) terminator);
				1999
Ed Smith-Rowland	3ce4f9e	2011-10-26 19:30:59 +0000	[diff] [blame]	2000	if (CPP_OPTION (pfile, user_literals))
				2001	{
Ed Smith-Rowland	c865f92	2013-06-29 03:41:58 +0000	[diff] [blame]	2002	/* If a string format macro, say from inttypes.h, is placed touching
				2003	a string literal it could be parsed as a C++11 user-defined string
				2004	literal thus breaking the program.
Mukesh Kapoor	7d19c46	2017-11-06 10:33:41 +0000	[diff] [blame]	2005	Try to identify macros with is_macro. A warning is issued.
				2006	The macro name should not start with '_' for this warning. */
				2007	if ((*cur != '_') && is_macro (pfile, cur))
Ollie Wild	7f5f5f9	2012-04-27 14:29:32 +0000	[diff] [blame]	2008	{
Dodji Seketeli	112448b	2012-04-29 16:27:08 +0000	[diff] [blame]	2009	/* Raise a warning, but do not consume subsequent tokens. */
Edward Smith-Rowland	7aee864	2014-07-09 13:33:58 +0000	[diff] [blame]	2010	if (CPP_OPTION (pfile, warn_literal_suffix) && !pfile->state.skipping)
Ollie Wild	7f5f5f9	2012-04-27 14:29:32 +0000	[diff] [blame]	2011	cpp_warning_with_line (pfile, CPP_W_LITERAL_SUFFIX,
				2012	token->src_loc, 0,
				2013	"invalid suffix on literal; C++11 requires "
Ed Smith-Rowland	c865f92	2013-06-29 03:41:58 +0000	[diff] [blame]	2014	"a space between literal and string macro");
Ollie Wild	7f5f5f9	2012-04-27 14:29:32 +0000	[diff] [blame]	2015	}
Ed Smith-Rowland	3ce4f9e	2011-10-26 19:30:59 +0000	[diff] [blame]	2016	/* Grab user defined literal suffix. */
Ed Smith-Rowland	561f7fc	2013-02-14 02:55:42 +0000	[diff] [blame]	2017	else if (ISIDST (*cur))
Ed Smith-Rowland	3ce4f9e	2011-10-26 19:30:59 +0000	[diff] [blame]	2018	{
				2019	type = cpp_userdef_char_add_type (type);
				2020	type = cpp_userdef_string_add_type (type);
				2021	++cur;
Ollie Wild	7f5f5f9	2012-04-27 14:29:32 +0000	[diff] [blame]	2022
				2023	while (ISIDNUM (*cur))
				2024	++cur;
Ed Smith-Rowland	3ce4f9e	2011-10-26 19:30:59 +0000	[diff] [blame]	2025	}
Ed Smith-Rowland	3ce4f9e	2011-10-26 19:30:59 +0000	[diff] [blame]	2026	}
Jason Merrill	fe19130	2015-05-09 00:50:10 -0400	[diff] [blame]	2027	else if (CPP_OPTION (pfile, cpp_warn_cxx11_compat)
				2028	&& is_macro (pfile, cur)
				2029	&& !pfile->state.skipping)
				2030	cpp_warning_with_line (pfile, CPP_W_CXX11_COMPAT,
				2031	token->src_loc, 0, "C++11 requires a space "
				2032	"between string literal and macro");
Ed Smith-Rowland	3ce4f9e	2011-10-26 19:30:59 +0000	[diff] [blame]	2033
Neil Booth	6338b35	2003-04-23 22:44:06 +0000	[diff] [blame]	2034	pfile->buffer->cur = cur;
				2035	create_literal (pfile, token, base, cur - base, type);
Neil Booth	0d9f234	2000-09-18 18:43:05 +0000	[diff] [blame]	2036	}
				2037
Matthew Gingell	631d0d3	2008-10-05 12:35:36 +0000	[diff] [blame]	2038	/* Return the comment table. The client may not make any assumption
				2039	about the ordering of the table. */
				2040	cpp_comment_table *
				2041	cpp_get_comments (cpp_reader *pfile)
				2042	{
				2043	return &pfile->comments;
				2044	}
				2045
				2046	/* Append a comment to the end of the comment table. */
				2047	static void
				2048	store_comment (cpp_reader pfile, cpp_token token)
				2049	{
				2050	int len;
				2051
				2052	if (pfile->comments.allocated == 0)
				2053	{
				2054	pfile->comments.allocated = 256;
				2055	pfile->comments.entries = (cpp_comment *) xmalloc
				2056	(pfile->comments.allocated * sizeof (cpp_comment));
				2057	}
				2058
				2059	if (pfile->comments.count == pfile->comments.allocated)
				2060	{
				2061	pfile->comments.allocated *= 2;
				2062	pfile->comments.entries = (cpp_comment *) xrealloc
				2063	(pfile->comments.entries,
				2064	pfile->comments.allocated * sizeof (cpp_comment));
				2065	}
				2066
				2067	len = token->val.str.len;
				2068
				2069	/* Copy comment. Note, token may not be NULL terminated. */
				2070	pfile->comments.entries[pfile->comments.count].comment =
				2071	(char ) xmalloc (sizeof (char) (len + 1));
				2072	memcpy (pfile->comments.entries[pfile->comments.count].comment,
				2073	token->val.str.text, len);
				2074	pfile->comments.entries[pfile->comments.count].comment[len] = '\0';
				2075
				2076	/* Set source location. */
				2077	pfile->comments.entries[pfile->comments.count].sloc = token->src_loc;
				2078
				2079	/* Increment the count of entries in the comment table. */
				2080	pfile->comments.count++;
				2081	}
				2082
Neil Booth	93c80368	2000-10-28 17:59:06 +0000	[diff] [blame]	2083	/* The stored comment includes the comment start and any terminator. */
Neil Booth	0d9f234	2000-09-18 18:43:05 +0000	[diff] [blame]	2084	static void
Zack Weinberg	6cf87ca	2003-06-17 06:17:44 +0000	[diff] [blame]	2085	save_comment (cpp_reader pfile, cpp_token token, const unsigned char *from,
				2086	cppchar_t type)
Zack Weinberg	c5a0473	2000-04-25 19:32:36 +0000	[diff] [blame]	2087	{
Neil Booth	5d7ee2f	2000-05-10 09:39:18 +0000	[diff] [blame]	2088	unsigned char *buffer;
Kai Tietz	651a20b	2010-11-16 19:50:17 +0000	[diff] [blame]	2089	unsigned int len, clen, i;
Kazu Hirata	df38348	2002-05-22 22:02:16 +0000	[diff] [blame]	2090
Neil Booth	1c6d33e	2000-09-25 22:39:51 +0000	[diff] [blame]	2091	len = pfile->buffer->cur - from + 1; /* + 1 for the initial '/'. */
Neil Booth	480709c	2001-10-21 14:04:42 +0000	[diff] [blame]	2092
Neil Booth	3542203	2000-10-29 09:56:00 +0000	[diff] [blame]	2093	/* C++ comments probably (not definitely) have moved past a new
				2094	line, which we don't want to save in the comment. */
Neil Booth	480709c	2001-10-21 14:04:42 +0000	[diff] [blame]	2095	if (is_vspace (pfile->buffer->cur[-1]))
Neil Booth	3542203	2000-10-29 09:56:00 +0000	[diff] [blame]	2096	len--;
Jason Thorpe	477cdac	2002-04-07 03:12:23 +0000	[diff] [blame]	2097
Kai Tietz	651a20b	2010-11-16 19:50:17 +0000	[diff] [blame]	2098	/* If we are currently in a directive or in argument parsing, then
				2099	we need to store all C++ comments as C comments internally, and
				2100	so we need to allocate a little extra space in that case.
Jason Thorpe	477cdac	2002-04-07 03:12:23 +0000	[diff] [blame]	2101
				2102	Note that the only time we encounter a directive here is
				2103	when we are saving comments in a "#define". */
Kai Tietz	651a20b	2010-11-16 19:50:17 +0000	[diff] [blame]	2104	clen = ((pfile->state.in_directive \|\| pfile->state.parsing_args)
				2105	&& type == '/') ? len + 2 : len;
Jason Thorpe	477cdac	2002-04-07 03:12:23 +0000	[diff] [blame]	2106
				2107	buffer = _cpp_unaligned_alloc (pfile, clen);
Kazu Hirata	df38348	2002-05-22 22:02:16 +0000	[diff] [blame]	2108
Neil Booth	0d9f234	2000-09-18 18:43:05 +0000	[diff] [blame]	2109	token->type = CPP_COMMENT;
Jason Thorpe	477cdac	2002-04-07 03:12:23 +0000	[diff] [blame]	2110	token->val.str.len = clen;
Neil Booth	0d9f234	2000-09-18 18:43:05 +0000	[diff] [blame]	2111	token->val.str.text = buffer;
Neil Booth	d1d9a6b	2000-05-27 23:19:56 +0000	[diff] [blame]	2112
Neil Booth	1c6d33e	2000-09-25 22:39:51 +0000	[diff] [blame]	2113	buffer[0] = '/';
				2114	memcpy (buffer + 1, from, len - 1);
Jason Thorpe	477cdac	2002-04-07 03:12:23 +0000	[diff] [blame]	2115
Kazu Hirata	1eeeb6a	2002-04-30 20:48:55 +0000	[diff] [blame]	2116	/* Finish conversion to a C comment, if necessary. */
Kai Tietz	651a20b	2010-11-16 19:50:17 +0000	[diff] [blame]	2117	if ((pfile->state.in_directive \|\| pfile->state.parsing_args) && type == '/')
Jason Thorpe	477cdac	2002-04-07 03:12:23 +0000	[diff] [blame]	2118	{
				2119	buffer[1] = '*';
				2120	buffer[clen - 2] = '*';
				2121	buffer[clen - 1] = '/';
Kai Tietz	651a20b	2010-11-16 19:50:17 +0000	[diff] [blame]	2122	/* As there can be in a C++ comments illegal sequences for C comments
				2123	we need to filter them out. */
				2124	for (i = 2; i < (clen - 2); i++)
				2125	if (buffer[i] == '/' && (buffer[i - 1] == '' \|\| buffer[i + 1] == ''))
				2126	buffer[i] = '\|';
Jason Thorpe	477cdac	2002-04-07 03:12:23 +0000	[diff] [blame]	2127	}
Matthew Gingell	631d0d3	2008-10-05 12:35:36 +0000	[diff] [blame]	2128
				2129	/* Finally store this comment for use by clients of libcpp. */
				2130	store_comment (pfile, token);
Neil Booth	0d9f234	2000-09-18 18:43:05 +0000	[diff] [blame]	2131	}
				2132
Marek Polacek	81fea42	2016-09-26 09:42:50 +0000	[diff] [blame]	2133	/* Returns true if comment at COMMENT_START is a recognized FALLTHROUGH
				2134	comment. */
				2135
				2136	static bool
				2137	fallthrough_comment_p (cpp_reader pfile, const unsigned char comment_start)
				2138	{
				2139	const unsigned char *from = comment_start + 1;
Jakub Jelinek	70f6d5e	2016-10-12 01:19:06 +0200	[diff] [blame]	2140
				2141	switch (CPP_OPTION (pfile, cpp_warn_implicit_fallthrough))
				2142	{
				2143	/* For both -Wimplicit-fallthrough=0 and -Wimplicit-fallthrough=5 we
				2144	don't recognize any comments. The latter only checks attributes,
				2145	the former doesn't warn. */
				2146	case 0:
				2147	default:
				2148	return false;
				2149	/* -Wimplicit-fallthrough=1 considers any comment, no matter what
				2150	content it has. */
				2151	case 1:
				2152	return true;
				2153	case 2:
				2154	/* -Wimplicit-fallthrough=2 looks for (case insensitive)
				2155	.falls?[ \t-]thr(u\|ough).* regex. */
				2156	for (; (size_t) (pfile->buffer->cur - from) >= sizeof "fallthru" - 1;
				2157	from++)
				2158	{
				2159	/* Is there anything like strpbrk with upper boundary, or
				2160	memchr looking for 2 characters rather than just one? */
				2161	if (from[0] != 'f' && from[0] != 'F')
				2162	continue;
				2163	if (from[1] != 'a' && from[1] != 'A')
				2164	continue;
				2165	if (from[2] != 'l' && from[2] != 'L')
				2166	continue;
				2167	if (from[3] != 'l' && from[3] != 'L')
				2168	continue;
				2169	from += sizeof "fall" - 1;
				2170	if (from[0] == 's' \|\| from[0] == 'S')
				2171	from++;
				2172	while (from == ' ' \|\| from == '\t' \|\| *from == '-')
				2173	from++;
				2174	if (from[0] != 't' && from[0] != 'T')
				2175	continue;
				2176	if (from[1] != 'h' && from[1] != 'H')
				2177	continue;
				2178	if (from[2] != 'r' && from[2] != 'R')
				2179	continue;
				2180	if (from[3] == 'u' \|\| from[3] == 'U')
				2181	return true;
				2182	if (from[3] != 'o' && from[3] != 'O')
				2183	continue;
				2184	if (from[4] != 'u' && from[4] != 'U')
				2185	continue;
				2186	if (from[5] != 'g' && from[5] != 'G')
				2187	continue;
				2188	if (from[6] != 'h' && from[6] != 'H')
				2189	continue;
				2190	return true;
				2191	}
				2192	return false;
				2193	case 3:
				2194	case 4:
				2195	break;
				2196	}
				2197
Marek Polacek	81fea42	2016-09-26 09:42:50 +0000	[diff] [blame]	2198	/* Whole comment contents:
				2199	-fallthrough
				2200	@fallthrough@
				2201	*/
				2202	if (from == '-' \|\| from == '@')
				2203	{
				2204	size_t len = sizeof "fallthrough" - 1;
				2205	if ((size_t) (pfile->buffer->cur - from - 1) < len)
				2206	return false;
				2207	if (memcmp (from + 1, "fallthrough", len))
				2208	return false;
				2209	if (*from == '@')
				2210	{
				2211	if (from[len + 1] != '@')
				2212	return false;
				2213	len++;
				2214	}
				2215	from += 1 + len;
				2216	}
				2217	/* Whole comment contents (regex):
Jakub Jelinek	70f6d5e	2016-10-12 01:19:06 +0200	[diff] [blame]	2218	lint -fallthrough[ \t]*
Jakub Jelinek	81b0290	2016-10-08 12:53:05 +0200	[diff] [blame]	2219	*/
				2220	else if (*from == 'l')
				2221	{
				2222	size_t len = sizeof "int -fallthrough" - 1;
				2223	if ((size_t) (pfile->buffer->cur - from - 1) < len)
				2224	return false;
				2225	if (memcmp (from + 1, "int -fallthrough", len))
Jakub Jelinek	70f6d5e	2016-10-12 01:19:06 +0200	[diff] [blame]	2226	return false;
Jakub Jelinek	81b0290	2016-10-08 12:53:05 +0200	[diff] [blame]	2227	from += 1 + len;
Jakub Jelinek	70f6d5e	2016-10-12 01:19:06 +0200	[diff] [blame]	2228	while (from == ' ' \|\| from == '\t')
				2229	from++;
				2230	}
				2231	/* Whole comment contents (regex):
				2232	[ \t]FALLTHR(U\|OUGH)[ \t]
				2233	*/
				2234	else if (CPP_OPTION (pfile, cpp_warn_implicit_fallthrough) == 4)
				2235	{
				2236	while (from == ' ' \|\| from == '\t')
				2237	from++;
				2238	if ((size_t) (pfile->buffer->cur - from) < sizeof "FALLTHRU" - 1)
				2239	return false;
				2240	if (memcmp (from, "FALLTHR", sizeof "FALLTHR" - 1))
				2241	return false;
				2242	from += sizeof "FALLTHR" - 1;
				2243	if (*from == 'U')
				2244	from++;
				2245	else if ((size_t) (pfile->buffer->cur - from) < sizeof "OUGH" - 1)
				2246	return false;
				2247	else if (memcmp (from, "OUGH", sizeof "OUGH" - 1))
				2248	return false;
				2249	else
				2250	from += sizeof "OUGH" - 1;
				2251	while (from == ' ' \|\| from == '\t')
				2252	from++;
Jakub Jelinek	81b0290	2016-10-08 12:53:05 +0200	[diff] [blame]	2253	}
				2254	/* Whole comment contents (regex):
Jakub Jelinek	ee19ef4	2016-10-08 12:54:27 +0200	[diff] [blame]	2255	[ \t.!](ELSE,? \|INTENTIONAL(LY)? )?FALL(S \| \|-)?THR(OUGH\|U)[ \t.!](-[^\n\r]*)?
				2256	[ \t.!](Else,? \|Intentional(ly)? )?Fall((s \| \|-)[Tt]\|t)hr(ough\|u)[ \t.!](-[^\n\r]*)?
				2257	[ \t.!]([Ee]lse,? \|[Ii]ntentional(ly)? )?fall(s \| \|-)?thr(ough\|u)[ \t.!](-[^\n\r]*)?
Marek Polacek	81fea42	2016-09-26 09:42:50 +0000	[diff] [blame]	2258	*/
				2259	else
				2260	{
Jakub Jelinek	81b0290	2016-10-08 12:53:05 +0200	[diff] [blame]	2261	while (from == ' ' \|\| from == '\t' \|\| from == '.' \|\| from == '!')
Marek Polacek	81fea42	2016-09-26 09:42:50 +0000	[diff] [blame]	2262	from++;
				2263	unsigned char f = *from;
Jakub Jelinek	81b0290	2016-10-08 12:53:05 +0200	[diff] [blame]	2264	bool all_upper = false;
				2265	if (f == 'E' \|\| f == 'e')
Jakub Jelinek	70f6d5e	2016-10-12 01:19:06 +0200	[diff] [blame]	2266	{
				2267	if ((size_t) (pfile->buffer->cur - from)
Jakub Jelinek	81b0290	2016-10-08 12:53:05 +0200	[diff] [blame]	2268	< sizeof "else fallthru" - 1)
				2269	return false;
Jakub Jelinek	ee19ef4	2016-10-08 12:54:27 +0200	[diff] [blame]	2270	if (f == 'E' && memcmp (from + 1, "LSE", sizeof "LSE" - 1) == 0)
Jakub Jelinek	81b0290	2016-10-08 12:53:05 +0200	[diff] [blame]	2271	all_upper = true;
Jakub Jelinek	ee19ef4	2016-10-08 12:54:27 +0200	[diff] [blame]	2272	else if (memcmp (from + 1, "lse", sizeof "lse" - 1))
Jakub Jelinek	81b0290	2016-10-08 12:53:05 +0200	[diff] [blame]	2273	return false;
Jakub Jelinek	ee19ef4	2016-10-08 12:54:27 +0200	[diff] [blame]	2274	from += sizeof "else" - 1;
				2275	if (*from == ',')
				2276	from++;
Jakub Jelinek	70f6d5e	2016-10-12 01:19:06 +0200	[diff] [blame]	2277	if (*from != ' ')
Jakub Jelinek	ee19ef4	2016-10-08 12:54:27 +0200	[diff] [blame]	2278	return false;
				2279	from++;
				2280	if (all_upper && *from == 'f')
				2281	return false;
Jakub Jelinek	81b0290	2016-10-08 12:53:05 +0200	[diff] [blame]	2282	if (f == 'e' && *from == 'F')
				2283	return false;
				2284	f = *from;
Jakub Jelinek	70f6d5e	2016-10-12 01:19:06 +0200	[diff] [blame]	2285	}
Jakub Jelinek	81b0290	2016-10-08 12:53:05 +0200	[diff] [blame]	2286	else if (f == 'I' \|\| f == 'i')
Jakub Jelinek	70f6d5e	2016-10-12 01:19:06 +0200	[diff] [blame]	2287	{
				2288	if ((size_t) (pfile->buffer->cur - from)
Jakub Jelinek	81b0290	2016-10-08 12:53:05 +0200	[diff] [blame]	2289	< sizeof "intentional fallthru" - 1)
				2290	return false;
				2291	if (f == 'I' && memcmp (from + 1, "NTENTIONAL",
				2292	sizeof "NTENTIONAL" - 1) == 0)
				2293	all_upper = true;
				2294	else if (memcmp (from + 1, "ntentional",
				2295	sizeof "ntentional" - 1))
				2296	return false;
				2297	from += sizeof "intentional" - 1;
				2298	if (*from == ' ')
				2299	{
				2300	from++;
				2301	if (all_upper && *from == 'f')
				2302	return false;
				2303	}
				2304	else if (all_upper)
				2305	{
				2306	if (memcmp (from, "LY F", sizeof "LY F" - 1))
				2307	return false;
				2308	from += sizeof "LY " - 1;
				2309	}
				2310	else
				2311	{
				2312	if (memcmp (from, "ly ", sizeof "ly " - 1))
				2313	return false;
				2314	from += sizeof "ly " - 1;
				2315	}
				2316	if (f == 'i' && *from == 'F')
				2317	return false;
				2318	f = *from;
Jakub Jelinek	70f6d5e	2016-10-12 01:19:06 +0200	[diff] [blame]	2319	}
Marek Polacek	81fea42	2016-09-26 09:42:50 +0000	[diff] [blame]	2320	if (f != 'F' && f != 'f')
				2321	return false;
Jakub Jelinek	7bad794	2016-10-08 12:48:54 +0200	[diff] [blame]	2322	if ((size_t) (pfile->buffer->cur - from) < sizeof "fallthru" - 1)
Marek Polacek	81fea42	2016-09-26 09:42:50 +0000	[diff] [blame]	2323	return false;
Marek Polacek	81fea42	2016-09-26 09:42:50 +0000	[diff] [blame]	2324	if (f == 'F' && memcmp (from + 1, "ALL", sizeof "ALL" - 1) == 0)
				2325	all_upper = true;
Jakub Jelinek	81b0290	2016-10-08 12:53:05 +0200	[diff] [blame]	2326	else if (all_upper)
				2327	return false;
Marek Polacek	81fea42	2016-09-26 09:42:50 +0000	[diff] [blame]	2328	else if (memcmp (from + 1, "all", sizeof "all" - 1))
				2329	return false;
Jakub Jelinek	7bad794	2016-10-08 12:48:54 +0200	[diff] [blame]	2330	from += sizeof "fall" - 1;
				2331	if (*from == (all_upper ? 'S' : 's') && from[1] == ' ')
				2332	from += 2;
				2333	else if (from == ' ' \|\| from == '-')
				2334	from++;
				2335	else if (*from != (all_upper ? 'T' : 't'))
Marek Polacek	81fea42	2016-09-26 09:42:50 +0000	[diff] [blame]	2336	return false;
Marek Polacek	81fea42	2016-09-26 09:42:50 +0000	[diff] [blame]	2337	if ((f == 'f' \|\| from != 'T') && (all_upper \|\| from != 't'))
				2338	return false;
Jakub Jelinek	7bad794	2016-10-08 12:48:54 +0200	[diff] [blame]	2339	if ((size_t) (pfile->buffer->cur - from) < sizeof "thru" - 1)
Marek Polacek	81fea42	2016-09-26 09:42:50 +0000	[diff] [blame]	2340	return false;
				2341	if (memcmp (from + 1, all_upper ? "HRU" : "hru", sizeof "hru" - 1))
				2342	{
Jakub Jelinek	7bad794	2016-10-08 12:48:54 +0200	[diff] [blame]	2343	if ((size_t) (pfile->buffer->cur - from) < sizeof "through" - 1)
Marek Polacek	81fea42	2016-09-26 09:42:50 +0000	[diff] [blame]	2344	return false;
				2345	if (memcmp (from + 1, all_upper ? "HROUGH" : "hrough",
				2346	sizeof "hrough" - 1))
				2347	return false;
				2348	from += sizeof "through" - 1;
				2349	}
				2350	else
				2351	from += sizeof "thru" - 1;
Jakub Jelinek	81b0290	2016-10-08 12:53:05 +0200	[diff] [blame]	2352	while (from == ' ' \|\| from == '\t' \|\| from == '.' \|\| from == '!')
Marek Polacek	81fea42	2016-09-26 09:42:50 +0000	[diff] [blame]	2353	from++;
Jakub Jelinek	81b0290	2016-10-08 12:53:05 +0200	[diff] [blame]	2354	if (*from == '-')
				2355	{
				2356	from++;
				2357	if (comment_start == '')
				2358	{
				2359	do
				2360	{
				2361	while (from && from != '*'
				2362	&& from != '\n' && from != '\r')
				2363	from++;
				2364	if (from != '' \|\| from[1] == '/')
				2365	break;
				2366	from++;
				2367	}
				2368	while (1);
				2369	}
				2370	else
				2371	while (from && from != '\n' && *from != '\r')
				2372	from++;
				2373	}
Marek Polacek	81fea42	2016-09-26 09:42:50 +0000	[diff] [blame]	2374	}
				2375	/* C block comment. */
				2376	if (comment_start == '')
				2377	{
				2378	if (from != '' \|\| from[1] != '/')
				2379	return false;
				2380	}
				2381	/* C++ line comment. */
				2382	else if (*from != '\n')
				2383	return false;
				2384
				2385	return true;
				2386	}
				2387
Neil Booth	5fddcff	2001-09-11 07:00:12 +0000	[diff] [blame]	2388	/* Allocate COUNT tokens for RUN. */
				2389	void
Zack Weinberg	6cf87ca	2003-06-17 06:17:44 +0000	[diff] [blame]	2390	_cpp_init_tokenrun (tokenrun *run, unsigned int count)
Neil Booth	5fddcff	2001-09-11 07:00:12 +0000	[diff] [blame]	2391	{
Bernardo Innocenti	72bb2c3	2004-07-24 20:04:42 +0200	[diff] [blame]	2392	run->base = XNEWVEC (cpp_token, count);
Neil Booth	5fddcff	2001-09-11 07:00:12 +0000	[diff] [blame]	2393	run->limit = run->base + count;
				2394	run->next = NULL;
				2395	}
				2396
				2397	/* Returns the next tokenrun, or creates one if there is none. */
				2398	static tokenrun *
Zack Weinberg	6cf87ca	2003-06-17 06:17:44 +0000	[diff] [blame]	2399	next_tokenrun (tokenrun *run)
Neil Booth	5fddcff	2001-09-11 07:00:12 +0000	[diff] [blame]	2400	{
				2401	if (run->next == NULL)
				2402	{
Bernardo Innocenti	72bb2c3	2004-07-24 20:04:42 +0200	[diff] [blame]	2403	run->next = XNEW (tokenrun);
Neil Booth	bdcbe49	2001-09-13 20:05:17 +0000	[diff] [blame]	2404	run->next->prev = run;
Neil Booth	5fddcff	2001-09-11 07:00:12 +0000	[diff] [blame]	2405	_cpp_init_tokenrun (run->next, 250);
				2406	}
				2407
				2408	return run->next;
				2409	}
				2410
Dodji Seketeli	ad2305a	2011-10-22 17:49:18 +0000	[diff] [blame]	2411	/* Return the number of not yet processed token in a given
Tom Tromey	92582b7	2011-10-17 09:59:12 +0000	[diff] [blame]	2412	context. */
				2413	int
Dodji Seketeli	ad2305a	2011-10-22 17:49:18 +0000	[diff] [blame]	2414	_cpp_remaining_tokens_num_in_context (cpp_context *context)
Tom Tromey	92582b7	2011-10-17 09:59:12 +0000	[diff] [blame]	2415	{
Tom Tromey	92582b7	2011-10-17 09:59:12 +0000	[diff] [blame]	2416	if (context->tokens_kind == TOKENS_KIND_DIRECT)
Dodji Seketeli	cbbcf65	2011-10-20 08:49:29 +0000	[diff] [blame]	2417	return (LAST (context).token - FIRST (context).token);
Tom Tromey	92582b7	2011-10-17 09:59:12 +0000	[diff] [blame]	2418	else if (context->tokens_kind == TOKENS_KIND_INDIRECT
				2419	\|\| context->tokens_kind == TOKENS_KIND_EXTENDED)
Dodji Seketeli	cbbcf65	2011-10-20 08:49:29 +0000	[diff] [blame]	2420	return (LAST (context).ptoken - FIRST (context).ptoken);
Tom Tromey	92582b7	2011-10-17 09:59:12 +0000	[diff] [blame]	2421	else
				2422	abort ();
				2423	}
				2424
Dodji Seketeli	ad2305a	2011-10-22 17:49:18 +0000	[diff] [blame]	2425	/* Returns the token present at index INDEX in a given context. If
				2426	INDEX is zero, the next token to be processed is returned. */
Tom Tromey	92582b7	2011-10-17 09:59:12 +0000	[diff] [blame]	2427	static const cpp_token*
Dodji Seketeli	ad2305a	2011-10-22 17:49:18 +0000	[diff] [blame]	2428	_cpp_token_from_context_at (cpp_context *context, int index)
Tom Tromey	92582b7	2011-10-17 09:59:12 +0000	[diff] [blame]	2429	{
Tom Tromey	92582b7	2011-10-17 09:59:12 +0000	[diff] [blame]	2430	if (context->tokens_kind == TOKENS_KIND_DIRECT)
				2431	return &(FIRST (context).token[index]);
				2432	else if (context->tokens_kind == TOKENS_KIND_INDIRECT
				2433	\|\| context->tokens_kind == TOKENS_KIND_EXTENDED)
				2434	return FIRST (context).ptoken[index];
				2435	else
				2436	abort ();
				2437	}
				2438
Ben Elliston	5950c3c	2008-07-14 05:09:48 +0000	[diff] [blame]	2439	/* Look ahead in the input stream. */
				2440	const cpp_token *
				2441	cpp_peek_token (cpp_reader *pfile, int index)
				2442	{
				2443	cpp_context *context = pfile->context;
				2444	const cpp_token *peektok;
				2445	int count;
				2446
				2447	/* First, scan through any pending cpp_context objects. */
				2448	while (context->prev)
				2449	{
Dodji Seketeli	ad2305a	2011-10-22 17:49:18 +0000	[diff] [blame]	2450	ptrdiff_t sz = _cpp_remaining_tokens_num_in_context (context);
Ben Elliston	5950c3c	2008-07-14 05:09:48 +0000	[diff] [blame]	2451
				2452	if (index < (int) sz)
Dodji Seketeli	ad2305a	2011-10-22 17:49:18 +0000	[diff] [blame]	2453	return _cpp_token_from_context_at (context, index);
Ben Elliston	5950c3c	2008-07-14 05:09:48 +0000	[diff] [blame]	2454	index -= (int) sz;
				2455	context = context->prev;
				2456	}
				2457
				2458	/* We will have to read some new tokens after all (and do so
				2459	without invalidating preceding tokens). */
				2460	count = index;
				2461	pfile->keep_tokens++;
				2462
Jakub Jelinek	b8cd77f	2015-04-02 13:57:02 +0200	[diff] [blame]	2463	/* For peeked tokens temporarily disable line_change reporting,
				2464	until the tokens are parsed for real. */
				2465	void (line_change) (cpp_reader , const cpp_token *, int)
				2466	= pfile->cb.line_change;
				2467	pfile->cb.line_change = NULL;
				2468
Ben Elliston	5950c3c	2008-07-14 05:09:48 +0000	[diff] [blame]	2469	do
				2470	{
				2471	peektok = _cpp_lex_token (pfile);
				2472	if (peektok->type == CPP_EOF)
Jakub Jelinek	e4b33ee	2015-04-06 19:01:50 +0200	[diff] [blame]	2473	{
				2474	index--;
				2475	break;
				2476	}
Ben Elliston	5950c3c	2008-07-14 05:09:48 +0000	[diff] [blame]	2477	}
				2478	while (index--);
				2479
Jakub Jelinek	e4b33ee	2015-04-06 19:01:50 +0200	[diff] [blame]	2480	_cpp_backup_tokens_direct (pfile, count - index);
Ben Elliston	5950c3c	2008-07-14 05:09:48 +0000	[diff] [blame]	2481	pfile->keep_tokens--;
Jakub Jelinek	b8cd77f	2015-04-02 13:57:02 +0200	[diff] [blame]	2482	pfile->cb.line_change = line_change;
Ben Elliston	5950c3c	2008-07-14 05:09:48 +0000	[diff] [blame]	2483
				2484	return peektok;
				2485	}
				2486
Neil Booth	4ed5bcf	2001-09-24 22:53:12 +0000	[diff] [blame]	2487	/* Allocate a single token that is invalidated at the same time as the
				2488	rest of the tokens on the line. Has its line and col set to the
				2489	same as the last lexed token, so that diagnostics appear in the
				2490	right place. */
				2491	cpp_token *
Zack Weinberg	6cf87ca	2003-06-17 06:17:44 +0000	[diff] [blame]	2492	_cpp_temp_token (cpp_reader *pfile)
Neil Booth	4ed5bcf	2001-09-24 22:53:12 +0000	[diff] [blame]	2493	{
				2494	cpp_token old, result;
Ben Elliston	5950c3c	2008-07-14 05:09:48 +0000	[diff] [blame]	2495	ptrdiff_t sz = pfile->cur_run->limit - pfile->cur_token;
				2496	ptrdiff_t la = (ptrdiff_t) pfile->lookaheads;
Neil Booth	4ed5bcf	2001-09-24 22:53:12 +0000	[diff] [blame]	2497
				2498	old = pfile->cur_token - 1;
Ben Elliston	5950c3c	2008-07-14 05:09:48 +0000	[diff] [blame]	2499	/* Any pre-existing lookaheads must not be clobbered. */
				2500	if (la)
				2501	{
				2502	if (sz <= la)
				2503	{
				2504	tokenrun *next = next_tokenrun (pfile->cur_run);
				2505
				2506	if (sz < la)
				2507	memmove (next->base + 1, next->base,
				2508	(la - sz) * sizeof (cpp_token));
				2509
				2510	next->base[0] = pfile->cur_run->limit[-1];
				2511	}
				2512
				2513	if (sz > 1)
				2514	memmove (pfile->cur_token + 1, pfile->cur_token,
				2515	MIN (la, sz - 1) * sizeof (cpp_token));
				2516	}
				2517
				2518	if (!sz && pfile->cur_token == pfile->cur_run->limit)
Neil Booth	4ed5bcf	2001-09-24 22:53:12 +0000	[diff] [blame]	2519	{
				2520	pfile->cur_run = next_tokenrun (pfile->cur_run);
				2521	pfile->cur_token = pfile->cur_run->base;
				2522	}
				2523
				2524	result = pfile->cur_token++;
Per Bothner	12f9df4	2004-02-11 07:29:30 -0800	[diff] [blame]	2525	result->src_loc = old->src_loc;
Neil Booth	4ed5bcf	2001-09-24 22:53:12 +0000	[diff] [blame]	2526	return result;
				2527	}
				2528
Neil Booth	14baae0	2001-09-17 18:26:12 +0000	[diff] [blame]	2529	/* Lex a token into RESULT (external interface). Takes care of issues
				2530	like directive handling, token lookahead, multiple include
Joseph Myers	a1f300c	2001-11-23 02:05:19 +0000	[diff] [blame]	2531	optimization and skipping. */
Neil Booth	345894b	2001-09-16 13:44:29 +0000	[diff] [blame]	2532	const cpp_token *
Zack Weinberg	6cf87ca	2003-06-17 06:17:44 +0000	[diff] [blame]	2533	_cpp_lex_token (cpp_reader *pfile)
Neil Booth	0d9f234	2000-09-18 18:43:05 +0000	[diff] [blame]	2534	{
Neil Booth	bdcbe49	2001-09-13 20:05:17 +0000	[diff] [blame]	2535	cpp_token *result;
Neil Booth	5fddcff	2001-09-11 07:00:12 +0000	[diff] [blame]	2536
Neil Booth	bdcbe49	2001-09-13 20:05:17 +0000	[diff] [blame]	2537	for (;;)
Neil Booth	5fddcff	2001-09-11 07:00:12 +0000	[diff] [blame]	2538	{
Neil Booth	bdcbe49	2001-09-13 20:05:17 +0000	[diff] [blame]	2539	if (pfile->cur_token == pfile->cur_run->limit)
Neil Booth	5fddcff	2001-09-11 07:00:12 +0000	[diff] [blame]	2540	{
Neil Booth	bdcbe49	2001-09-13 20:05:17 +0000	[diff] [blame]	2541	pfile->cur_run = next_tokenrun (pfile->cur_run);
				2542	pfile->cur_token = pfile->cur_run->base;
				2543	}
Tom Tromey	ee38036	2007-01-30 15:46:01 +0000	[diff] [blame]	2544	/* We assume that the current token is somewhere in the current
				2545	run. */
				2546	if (pfile->cur_token < pfile->cur_run->base
				2547	\|\| pfile->cur_token >= pfile->cur_run->limit)
				2548	abort ();
Neil Booth	bdcbe49	2001-09-13 20:05:17 +0000	[diff] [blame]	2549
				2550	if (pfile->lookaheads)
Neil Booth	14baae0	2001-09-17 18:26:12 +0000	[diff] [blame]	2551	{
				2552	pfile->lookaheads--;
				2553	result = pfile->cur_token++;
				2554	}
Neil Booth	bdcbe49	2001-09-13 20:05:17 +0000	[diff] [blame]	2555	else
Neil Booth	14baae0	2001-09-17 18:26:12 +0000	[diff] [blame]	2556	result = _cpp_lex_direct (pfile);
Neil Booth	bdcbe49	2001-09-13 20:05:17 +0000	[diff] [blame]	2557
				2558	if (result->flags & BOL)
				2559	{
Neil Booth	bdcbe49	2001-09-13 20:05:17 +0000	[diff] [blame]	2560	/* Is this a directive. If _cpp_handle_directive returns
				2561	false, it is an assembler #. */
				2562	if (result->type == CPP_HASH
Neil Booth	e808ec9	2002-02-27 07:24:53 +0000	[diff] [blame]	2563	/* 6.10.3 p 11: Directives in a list of macro arguments
				2564	gives undefined behavior. This implementation
				2565	handles the directive as normal. */
Richard Henderson	bc4071d	2006-01-04 08:33:38 -0800	[diff] [blame]	2566	&& pfile->state.parsing_args != 1)
Zack Weinberg	21b1149	2004-09-09 19:16:56 +0000	[diff] [blame]	2567	{
Richard Henderson	bc4071d	2006-01-04 08:33:38 -0800	[diff] [blame]	2568	if (_cpp_handle_directive (pfile, result->flags & PREV_WHITE))
Zack Weinberg	21b1149	2004-09-09 19:16:56 +0000	[diff] [blame]	2569	{
Richard Henderson	bc4071d	2006-01-04 08:33:38 -0800	[diff] [blame]	2570	if (pfile->directive_result.type == CPP_PADDING)
				2571	continue;
Zack Weinberg	21b1149	2004-09-09 19:16:56 +0000	[diff] [blame]	2572	result = &pfile->directive_result;
Zack Weinberg	21b1149	2004-09-09 19:16:56 +0000	[diff] [blame]	2573	}
				2574	}
Richard Henderson	bc4071d	2006-01-04 08:33:38 -0800	[diff] [blame]	2575	else if (pfile->state.in_deferred_pragma)
				2576	result = &pfile->directive_result;
Zack Weinberg	21b1149	2004-09-09 19:16:56 +0000	[diff] [blame]	2577
Neil Booth	9729389	2001-09-14 22:04:46 +0000	[diff] [blame]	2578	if (pfile->cb.line_change && !pfile->state.skipping)
Zack Weinberg	6cf87ca	2003-06-17 06:17:44 +0000	[diff] [blame]	2579	pfile->cb.line_change (pfile, result, pfile->state.parsing_args);
Neil Booth	5fddcff	2001-09-11 07:00:12 +0000	[diff] [blame]	2580	}
				2581
Neil Booth	bdcbe49	2001-09-13 20:05:17 +0000	[diff] [blame]	2582	/* We don't skip tokens in directives. */
Richard Henderson	bc4071d	2006-01-04 08:33:38 -0800	[diff] [blame]	2583	if (pfile->state.in_directive \|\| pfile->state.in_deferred_pragma)
Neil Booth	bdcbe49	2001-09-13 20:05:17 +0000	[diff] [blame]	2584	break;
Neil Booth	5fddcff	2001-09-11 07:00:12 +0000	[diff] [blame]	2585
Neil Booth	bdcbe49	2001-09-13 20:05:17 +0000	[diff] [blame]	2586	/* Outside a directive, invalidate controlling macros. At file
Neil Booth	14baae0	2001-09-17 18:26:12 +0000	[diff] [blame]	2587	EOF, _cpp_lex_direct takes care of popping the buffer, so we never
Kazu Hirata	6356f89	2003-06-12 19:01:08 +0000	[diff] [blame]	2588	get here and MI optimization works. */
Neil Booth	5fddcff	2001-09-11 07:00:12 +0000	[diff] [blame]	2589	pfile->mi_valid = false;
Neil Booth	bdcbe49	2001-09-13 20:05:17 +0000	[diff] [blame]	2590
				2591	if (!pfile->state.skipping \|\| result->type == CPP_EOF)
				2592	break;
Neil Booth	5fddcff	2001-09-11 07:00:12 +0000	[diff] [blame]	2593	}
				2594
Neil Booth	345894b	2001-09-16 13:44:29 +0000	[diff] [blame]	2595	return result;
Neil Booth	5fddcff	2001-09-11 07:00:12 +0000	[diff] [blame]	2596	}
				2597
Neil Booth	26aea07	2003-04-19 00:22:51 +0000	[diff] [blame]	2598	/* Returns true if a fresh line has been loaded. */
				2599	bool
Zack Weinberg	6cf87ca	2003-06-17 06:17:44 +0000	[diff] [blame]	2600	_cpp_get_fresh_line (cpp_reader *pfile)
Neil Booth	004cb26	2002-05-17 20:16:48 +0000	[diff] [blame]	2601	{
Per Bothner	22234f5	2004-02-18 14:02:39 -0800	[diff] [blame]	2602	int return_at_eof;
				2603
Neil Booth	26aea07	2003-04-19 00:22:51 +0000	[diff] [blame]	2604	/* We can't get a new line until we leave the current directive. */
				2605	if (pfile->state.in_directive)
				2606	return false;
Kazu Hirata	df38348	2002-05-22 22:02:16 +0000	[diff] [blame]	2607
Neil Booth	26aea07	2003-04-19 00:22:51 +0000	[diff] [blame]	2608	for (;;)
Neil Booth	1a76916	2002-06-11 05:36:17 +0000	[diff] [blame]	2609	{
Neil Booth	26aea07	2003-04-19 00:22:51 +0000	[diff] [blame]	2610	cpp_buffer *buffer = pfile->buffer;
				2611
				2612	if (!buffer->need_line)
				2613	return true;
				2614
				2615	if (buffer->next_line < buffer->rlimit)
				2616	{
				2617	_cpp_clean_line (pfile);
				2618	return true;
				2619	}
				2620
				2621	/* First, get out of parsing arguments state. */
				2622	if (pfile->state.parsing_args)
Neil Booth	1a76916	2002-06-11 05:36:17 +0000	[diff] [blame]	2623	return false;
				2624
Neil Booth	26aea07	2003-04-19 00:22:51 +0000	[diff] [blame]	2625	/* End of buffer. Non-empty files should end in a newline. */
				2626	if (buffer->buf != buffer->rlimit
				2627	&& buffer->next_line > buffer->rlimit
				2628	&& !buffer->from_stage3)
Neil Booth	004cb26	2002-05-17 20:16:48 +0000	[diff] [blame]	2629	{
Dave Korn	ed0e74e	2007-05-31 02:06:48 +0000	[diff] [blame]	2630	/* Clip to buffer size. */
Neil Booth	26aea07	2003-04-19 00:22:51 +0000	[diff] [blame]	2631	buffer->next_line = buffer->rlimit;
Neil Booth	004cb26	2002-05-17 20:16:48 +0000	[diff] [blame]	2632	}
Per Bothner	22234f5	2004-02-18 14:02:39 -0800	[diff] [blame]	2633
				2634	return_at_eof = buffer->return_at_eof;
Neil Booth	26aea07	2003-04-19 00:22:51 +0000	[diff] [blame]	2635	_cpp_pop_buffer (pfile);
Per Bothner	22234f5	2004-02-18 14:02:39 -0800	[diff] [blame]	2636	if (pfile->buffer == NULL \|\| return_at_eof)
Per Bothner	a506c55	2003-10-02 07:20:38 +0000	[diff] [blame]	2637	return false;
Neil Booth	26aea07	2003-04-19 00:22:51 +0000	[diff] [blame]	2638	}
Neil Booth	004cb26	2002-05-17 20:16:48 +0000	[diff] [blame]	2639	}
				2640
Neil Booth	6f572ac	2003-04-19 16:34:33 +0000	[diff] [blame]	2641	#define IF_NEXT_IS(CHAR, THEN_TYPE, ELSE_TYPE) \
				2642	do \
				2643	{ \
				2644	result->type = ELSE_TYPE; \
				2645	if (*buffer->cur == CHAR) \
				2646	buffer->cur++, result->type = THEN_TYPE; \
				2647	} \
				2648	while (0)
Neil Booth	480709c	2001-10-21 14:04:42 +0000	[diff] [blame]	2649
Neil Booth	14baae0	2001-09-17 18:26:12 +0000	[diff] [blame]	2650	/* Lex a token into pfile->cur_token, which is also incremented, to
				2651	get diagnostics pointing to the correct location.
				2652
				2653	Does not handle issues such as token lookahead, multiple-include
Kazu Hirata	f1ba665	2003-06-28 19:43:01 +0000	[diff] [blame]	2654	optimization, directives, skipping etc. This function is only
Neil Booth	14baae0	2001-09-17 18:26:12 +0000	[diff] [blame]	2655	suitable for use by _cpp_lex_token, and in special cases like
				2656	lex_expansion_token which doesn't care for any of these issues.
				2657
				2658	When meeting a newline, returns CPP_EOF if parsing a directive,
				2659	otherwise returns to the start of the token buffer if permissible.
				2660	Returns the location of the lexed token. */
				2661	cpp_token *
Zack Weinberg	6cf87ca	2003-06-17 06:17:44 +0000	[diff] [blame]	2662	_cpp_lex_direct (cpp_reader *pfile)
Neil Booth	5fddcff	2001-09-11 07:00:12 +0000	[diff] [blame]	2663	{
Neil Booth	0d9f234	2000-09-18 18:43:05 +0000	[diff] [blame]	2664	cppchar_t c;
Neil Booth	adb84b4	2000-11-08 23:08:07 +0000	[diff] [blame]	2665	cpp_buffer *buffer;
Jakub Jelinek	7bad794	2016-10-08 12:48:54 +0200	[diff] [blame]	2666	const unsigned char *comment_start;
				2667	bool fallthrough_comment = false;
Neil Booth	14baae0	2001-09-17 18:26:12 +0000	[diff] [blame]	2668	cpp_token *result = pfile->cur_token++;
Neil Booth	0d9f234	2000-09-18 18:43:05 +0000	[diff] [blame]	2669
Neil Booth	5fddcff	2001-09-11 07:00:12 +0000	[diff] [blame]	2670	fresh_line:
Neil Booth	26aea07	2003-04-19 00:22:51 +0000	[diff] [blame]	2671	result->flags = 0;
Per Bothner	2be570f	2003-08-28 18:07:42 -0700	[diff] [blame]	2672	buffer = pfile->buffer;
Per Bothner	a506c55	2003-10-02 07:20:38 +0000	[diff] [blame]	2673	if (buffer->need_line)
Neil Booth	26aea07	2003-04-19 00:22:51 +0000	[diff] [blame]	2674	{
Richard Henderson	bc4071d	2006-01-04 08:33:38 -0800	[diff] [blame]	2675	if (pfile->state.in_deferred_pragma)
				2676	{
				2677	result->type = CPP_PRAGMA_EOL;
				2678	pfile->state.in_deferred_pragma = false;
				2679	if (!pfile->state.pragma_allow_expansion)
				2680	pfile->state.prevent_expansion--;
				2681	return result;
				2682	}
Neil Booth	26aea07	2003-04-19 00:22:51 +0000	[diff] [blame]	2683	if (!_cpp_get_fresh_line (pfile))
				2684	{
				2685	result->type = CPP_EOF;
Neil Booth	9ff7868	2003-04-26 21:03:51 +0000	[diff] [blame]	2686	if (!pfile->state.in_directive)
				2687	{
				2688	/* Tell the compiler the line number of the EOF token. */
Per Bothner	500bee0	2004-04-22 19:22:27 -0700	[diff] [blame]	2689	result->src_loc = pfile->line_table->highest_line;
Neil Booth	9ff7868	2003-04-26 21:03:51 +0000	[diff] [blame]	2690	result->flags = BOL;
				2691	}
Neil Booth	26aea07	2003-04-19 00:22:51 +0000	[diff] [blame]	2692	return result;
				2693	}
Marek Polacek	81fea42	2016-09-26 09:42:50 +0000	[diff] [blame]	2694	if (buffer != pfile->buffer)
Jakub Jelinek	7bad794	2016-10-08 12:48:54 +0200	[diff] [blame]	2695	fallthrough_comment = false;
Neil Booth	26aea07	2003-04-19 00:22:51 +0000	[diff] [blame]	2696	if (!pfile->keep_tokens)
				2697	{
				2698	pfile->cur_run = &pfile->base_run;
				2699	result = pfile->base_run.base;
				2700	pfile->cur_token = result + 1;
				2701	}
				2702	result->flags = BOL;
				2703	if (pfile->state.parsing_args == 2)
				2704	result->flags \|= PREV_WHITE;
				2705	}
Per Bothner	a506c55	2003-10-02 07:20:38 +0000	[diff] [blame]	2706	buffer = pfile->buffer;
Neil Booth	5fddcff	2001-09-11 07:00:12 +0000	[diff] [blame]	2707	update_tokens_line:
Per Bothner	500bee0	2004-04-22 19:22:27 -0700	[diff] [blame]	2708	result->src_loc = pfile->line_table->highest_line;
Neil Booth	0d9f234	2000-09-18 18:43:05 +0000	[diff] [blame]	2709
Neil Booth	5fddcff	2001-09-11 07:00:12 +0000	[diff] [blame]	2710	skipped_white:
Neil Booth	26aea07	2003-04-19 00:22:51 +0000	[diff] [blame]	2711	if (buffer->cur >= buffer->notes[buffer->cur_note].pos
				2712	&& !pfile->overlaid_buffer)
				2713	{
				2714	_cpp_process_line_notes (pfile, false);
Per Bothner	500bee0	2004-04-22 19:22:27 -0700	[diff] [blame]	2715	result->src_loc = pfile->line_table->highest_line;
Neil Booth	26aea07	2003-04-19 00:22:51 +0000	[diff] [blame]	2716	}
Neil Booth	480709c	2001-10-21 14:04:42 +0000	[diff] [blame]	2717	c = *buffer->cur++;
Per Bothner	12f9df4	2004-02-11 07:29:30 -0800	[diff] [blame]	2718
Gabriel Charette	e3dfef4	2011-08-22 20:41:07 +0000	[diff] [blame]	2719	if (pfile->forced_token_location_p)
				2720	result->src_loc = *pfile->forced_token_location_p;
				2721	else
				2722	result->src_loc = linemap_position_for_column (pfile->line_table,
				2723	CPP_BUF_COLUMN (buffer, buffer->cur));
Neil Booth	5fddcff	2001-09-11 07:00:12 +0000	[diff] [blame]	2724
Neil Booth	0d9f234	2000-09-18 18:43:05 +0000	[diff] [blame]	2725	switch (c)
				2726	{
Neil Booth	4d6baaf	2001-11-26 23:44:54 +0000	[diff] [blame]	2727	case ' ': case '\t': case '\f': case '\v': case '\0':
				2728	result->flags \|= PREV_WHITE;
Neil Booth	26aea07	2003-04-19 00:22:51 +0000	[diff] [blame]	2729	skip_whitespace (pfile, c);
				2730	goto skipped_white;
Neil Booth	4d6baaf	2001-11-26 23:44:54 +0000	[diff] [blame]	2731
Neil Booth	26aea07	2003-04-19 00:22:51 +0000	[diff] [blame]	2732	case '\n':
Per Bothner	12f9df4	2004-02-11 07:29:30 -0800	[diff] [blame]	2733	if (buffer->cur < buffer->rlimit)
				2734	CPP_INCREMENT_LINE (pfile, 0);
Neil Booth	26aea07	2003-04-19 00:22:51 +0000	[diff] [blame]	2735	buffer->need_line = true;
				2736	goto fresh_line;
Neil Booth	0d9f234	2000-09-18 18:43:05 +0000	[diff] [blame]	2737
Neil Booth	0d9f234	2000-09-18 18:43:05 +0000	[diff] [blame]	2738	case '0': case '1': case '2': case '3': case '4':
				2739	case '5': case '6': case '7': case '8': case '9':
Geoffrey Keating	50668cf	2005-03-15 00:36:33 +0000	[diff] [blame]	2740	{
				2741	struct normalize_state nst = INITIAL_NORMALIZE_STATE;
				2742	result->type = CPP_NUMBER;
				2743	lex_number (pfile, &result->val.str, &nst);
				2744	warn_about_normalization (pfile, result, &nst);
				2745	break;
				2746	}
Neil Booth	0d9f234	2000-09-18 18:43:05 +0000	[diff] [blame]	2747
Neil Booth	0abc6a6	2001-11-27 22:31:34 +0000	[diff] [blame]	2748	case 'L':
Kris Van Hees	b6baa67	2008-04-18 13:58:08 +0000	[diff] [blame]	2749	case 'u':
				2750	case 'U':
Jakub Jelinek	2c6e3f5	2009-10-19 23:41:15 +0200	[diff] [blame]	2751	case 'R':
				2752	/* 'L', 'u', 'U', 'u8' or 'R' may introduce wide characters,
				2753	wide strings or raw strings. */
Joseph Myers	a48e3dd	2011-08-18 16:13:49 +0100	[diff] [blame]	2754	if (c == 'L' \|\| CPP_OPTION (pfile, rliterals)
				2755	\|\| (c != 'R' && CPP_OPTION (pfile, uliterals)))
Neil Booth	bced6ed	2003-04-19 11:59:44 +0000	[diff] [blame]	2756	{
Jakub Jelinek	2c6e3f5	2009-10-19 23:41:15 +0200	[diff] [blame]	2757	if ((*buffer->cur == '\'' && c != 'R')
				2758	\|\| *buffer->cur == '"'
				2759	\|\| (*buffer->cur == 'R'
				2760	&& c != 'R'
				2761	&& buffer->cur[1] == '"'
Joseph Myers	a48e3dd	2011-08-18 16:13:49 +0100	[diff] [blame]	2762	&& CPP_OPTION (pfile, rliterals))
Jakub Jelinek	2c6e3f5	2009-10-19 23:41:15 +0200	[diff] [blame]	2763	\|\| (*buffer->cur == '8'
				2764	&& c == 'u'
Edward Smith-Rowland	fe95b03	2015-06-30 12:58:48 +0000	[diff] [blame]	2765	&& ((buffer->cur[1] == '"' \|\| (buffer->cur[1] == '\''
				2766	&& CPP_OPTION (pfile, utf8_char_literals)))
Joseph Myers	a48e3dd	2011-08-18 16:13:49 +0100	[diff] [blame]	2767	\|\| (buffer->cur[1] == 'R' && buffer->cur[2] == '"'
				2768	&& CPP_OPTION (pfile, rliterals)))))
Kris Van Hees	b6baa67	2008-04-18 13:58:08 +0000	[diff] [blame]	2769	{
				2770	lex_string (pfile, result, buffer->cur - 1);
				2771	break;
				2772	}
Neil Booth	bced6ed	2003-04-19 11:59:44 +0000	[diff] [blame]	2773	}
Kazu Hirata	df38348	2002-05-22 22:02:16 +0000	[diff] [blame]	2774	/* Fall through. */
Neil Booth	0abc6a6	2001-11-27 22:31:34 +0000	[diff] [blame]	2775
Neil Booth	0d9f234	2000-09-18 18:43:05 +0000	[diff] [blame]	2776	case '_':
				2777	case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
				2778	case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':
				2779	case 'm': case 'n': case 'o': case 'p': case 'q': case 'r':
Kris Van Hees	b6baa67	2008-04-18 13:58:08 +0000	[diff] [blame]	2780	case 's': case 't': case 'v': case 'w': case 'x':
Neil Booth	0d9f234	2000-09-18 18:43:05 +0000	[diff] [blame]	2781	case 'y': case 'z':
				2782	case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
Neil Booth	0abc6a6	2001-11-27 22:31:34 +0000	[diff] [blame]	2783	case 'G': case 'H': case 'I': case 'J': case 'K':
Jakub Jelinek	2c6e3f5	2009-10-19 23:41:15 +0200	[diff] [blame]	2784	case 'M': case 'N': case 'O': case 'P': case 'Q':
Kris Van Hees	b6baa67	2008-04-18 13:58:08 +0000	[diff] [blame]	2785	case 'S': case 'T': case 'V': case 'W': case 'X':
Neil Booth	0d9f234	2000-09-18 18:43:05 +0000	[diff] [blame]	2786	case 'Y': case 'Z':
				2787	result->type = CPP_NAME;
Geoffrey Keating	50668cf	2005-03-15 00:36:33 +0000	[diff] [blame]	2788	{
				2789	struct normalize_state nst = INITIAL_NORMALIZE_STATE;
Joseph Myers	9a0c618	2009-05-10 15:27:32 +0100	[diff] [blame]	2790	result->val.node.node = lex_identifier (pfile, buffer->cur - 1, false,
Joseph Myers	be5ffc5	2014-11-06 21:08:52 +0000	[diff] [blame]	2791	&nst,
				2792	&result->val.node.spelling);
Geoffrey Keating	50668cf	2005-03-15 00:36:33 +0000	[diff] [blame]	2793	warn_about_normalization (pfile, result, &nst);
				2794	}
Neil Booth	0d9f234	2000-09-18 18:43:05 +0000	[diff] [blame]	2795
Neil Booth	0d9f234	2000-09-18 18:43:05 +0000	[diff] [blame]	2796	/* Convert named operators to their proper types. */
Joseph Myers	9a0c618	2009-05-10 15:27:32 +0100	[diff] [blame]	2797	if (result->val.node.node->flags & NODE_OPERATOR)
Neil Booth	0d9f234	2000-09-18 18:43:05 +0000	[diff] [blame]	2798	{
				2799	result->flags \|= NAMED_OP;
Joseph Myers	9a0c618	2009-05-10 15:27:32 +0100	[diff] [blame]	2800	result->type = (enum cpp_ttype) result->val.node.node->directive_index;
Neil Booth	0d9f234	2000-09-18 18:43:05 +0000	[diff] [blame]	2801	}
Marek Polacek	81fea42	2016-09-26 09:42:50 +0000	[diff] [blame]	2802
				2803	/* Signal FALLTHROUGH comment followed by another token. */
Jakub Jelinek	7bad794	2016-10-08 12:48:54 +0200	[diff] [blame]	2804	if (fallthrough_comment)
Marek Polacek	81fea42	2016-09-26 09:42:50 +0000	[diff] [blame]	2805	result->flags \|= PREV_FALLTHROUGH;
Neil Booth	0d9f234	2000-09-18 18:43:05 +0000	[diff] [blame]	2806	break;
				2807
				2808	case '\'':
				2809	case '"':
Neil Booth	6338b35	2003-04-23 22:44:06 +0000	[diff] [blame]	2810	lex_string (pfile, result, buffer->cur - 1);
Neil Booth	0d9f234	2000-09-18 18:43:05 +0000	[diff] [blame]	2811	break;
				2812
				2813	case '/':
Neil Booth	1c6d33e	2000-09-25 22:39:51 +0000	[diff] [blame]	2814	/* A potential block or line comment. */
				2815	comment_start = buffer->cur;
Neil Booth	6f572ac	2003-04-19 16:34:33 +0000	[diff] [blame]	2816	c = *buffer->cur;
				2817
Neil Booth	1c6d33e	2000-09-25 22:39:51 +0000	[diff] [blame]	2818	if (c == '*')
				2819	{
Neil Booth	26aea07	2003-04-19 00:22:51 +0000	[diff] [blame]	2820	if (_cpp_skip_block_comment (pfile))
John David Anglin	0527bc4	2003-11-01 22:56:54 +0000	[diff] [blame]	2821	cpp_error (pfile, CPP_DL_ERROR, "unterminated comment");
Neil Booth	0d9f234	2000-09-18 18:43:05 +0000	[diff] [blame]	2822	}
Marek Polacek	909eb89	2014-09-17 21:49:46 +0000	[diff] [blame]	2823	else if (c == '/' && ! CPP_OPTION (pfile, traditional))
Neil Booth	0d9f234	2000-09-18 18:43:05 +0000	[diff] [blame]	2824	{
Marek Polacek	909eb89	2014-09-17 21:49:46 +0000	[diff] [blame]	2825	/* Don't warn for system headers. */
				2826	if (cpp_in_system_header (pfile))
				2827	;
Marek Polacek	f3bede7	2014-08-10 06:10:49 +0000	[diff] [blame]	2828	/* Warn about comments if pedantically GNUC89, and not
Neil Booth	bdb05a7	2000-11-26 17:31:13 +0000	[diff] [blame]	2829	in system headers. */
Marek Polacek	909eb89	2014-09-17 21:49:46 +0000	[diff] [blame]	2830	else if (CPP_OPTION (pfile, lang) == CLK_GNUC89
				2831	&& CPP_PEDANTIC (pfile)
				2832	&& ! buffer->warned_cplusplus_comments)
Neil Booth	0d9f234	2000-09-18 18:43:05 +0000	[diff] [blame]	2833	{
John David Anglin	0527bc4	2003-11-01 22:56:54 +0000	[diff] [blame]	2834	cpp_error (pfile, CPP_DL_PEDWARN,
Gabriel Dos Reis	5650830	2002-07-21 21:35:17 +0000	[diff] [blame]	2835	"C++ style comments are not allowed in ISO C90");
John David Anglin	0527bc4	2003-11-01 22:56:54 +0000	[diff] [blame]	2836	cpp_error (pfile, CPP_DL_PEDWARN,
Neil Booth	ebef4e8	2002-04-14 18:42:47 +0000	[diff] [blame]	2837	"(this will be reported only once per input file)");
Neil Booth	1c6d33e	2000-09-25 22:39:51 +0000	[diff] [blame]	2838	buffer->warned_cplusplus_comments = 1;
Neil Booth	0d9f234	2000-09-18 18:43:05 +0000	[diff] [blame]	2839	}
Marek Polacek	f3bede7	2014-08-10 06:10:49 +0000	[diff] [blame]	2840	/* Or if specifically desired via -Wc90-c99-compat. */
Marek Polacek	177cce4	2014-08-19 05:34:31 +0000	[diff] [blame]	2841	else if (CPP_OPTION (pfile, cpp_warn_c90_c99_compat) > 0
Marek Polacek	dd3ff07	2014-08-20 04:12:58 +0000	[diff] [blame]	2842	&& ! CPP_OPTION (pfile, cplusplus)
Marek Polacek	f3bede7	2014-08-10 06:10:49 +0000	[diff] [blame]	2843	&& ! buffer->warned_cplusplus_comments)
				2844	{
				2845	cpp_error (pfile, CPP_DL_WARNING,
Marek Polacek	3f4f5c9	2014-08-19 15:52:02 +0000	[diff] [blame]	2846	"C++ style comments are incompatible with C90");
Marek Polacek	f3bede7	2014-08-10 06:10:49 +0000	[diff] [blame]	2847	cpp_error (pfile, CPP_DL_WARNING,
				2848	"(this will be reported only once per input file)");
				2849	buffer->warned_cplusplus_comments = 1;
				2850	}
Marek Polacek	909eb89	2014-09-17 21:49:46 +0000	[diff] [blame]	2851	/* In C89/C94, C++ style comments are forbidden. */
				2852	else if ((CPP_OPTION (pfile, lang) == CLK_STDC89
				2853	\|\| CPP_OPTION (pfile, lang) == CLK_STDC94))
				2854	{
				2855	/* But don't be confused about valid code such as
				2856	- // immediately followed by *,
				2857	- // in a preprocessing directive,
				2858	- // in an #if 0 block. */
				2859	if (buffer->cur[1] == '*'
				2860	\|\| pfile->state.in_directive
				2861	\|\| pfile->state.skipping)
				2862	{
				2863	result->type = CPP_DIV;
				2864	break;
				2865	}
				2866	else if (! buffer->warned_cplusplus_comments)
				2867	{
				2868	cpp_error (pfile, CPP_DL_ERROR,
				2869	"C++ style comments are not allowed in ISO C90");
				2870	cpp_error (pfile, CPP_DL_ERROR,
				2871	"(this will be reported only once per input "
				2872	"file)");
				2873	buffer->warned_cplusplus_comments = 1;
				2874	}
				2875	}
Jakub Jelinek	01ef656	2001-04-11 11:43:10 +0200	[diff] [blame]	2876	if (skip_line_comment (pfile) && CPP_OPTION (pfile, warn_comments))
Simon Baldwin	87cf065	2010-04-07 17:18:10 +0000	[diff] [blame]	2877	cpp_warning (pfile, CPP_W_COMMENTS, "multi-line comment");
Neil Booth	0d9f234	2000-09-18 18:43:05 +0000	[diff] [blame]	2878	}
Neil Booth	480709c	2001-10-21 14:04:42 +0000	[diff] [blame]	2879	else if (c == '=')
				2880	{
Neil Booth	6f572ac	2003-04-19 16:34:33 +0000	[diff] [blame]	2881	buffer->cur++;
Neil Booth	480709c	2001-10-21 14:04:42 +0000	[diff] [blame]	2882	result->type = CPP_DIV_EQ;
				2883	break;
				2884	}
				2885	else
				2886	{
Neil Booth	480709c	2001-10-21 14:04:42 +0000	[diff] [blame]	2887	result->type = CPP_DIV;
				2888	break;
				2889	}
Neil Booth	1c6d33e	2000-09-25 22:39:51 +0000	[diff] [blame]	2890
Jakub Jelinek	7bad794	2016-10-08 12:48:54 +0200	[diff] [blame]	2891	if (fallthrough_comment_p (pfile, comment_start))
				2892	fallthrough_comment = true;
				2893
David Malcolm	05945a1	2017-06-05 20:53:06 +0000	[diff] [blame]	2894	if (pfile->cb.comment)
				2895	{
				2896	size_t len = pfile->buffer->cur - comment_start;
				2897	pfile->cb.comment (pfile, result->src_loc, comment_start - 1,
				2898	len + 1);
				2899	}
				2900
Neil Booth	1c6d33e	2000-09-25 22:39:51 +0000	[diff] [blame]	2901	if (!pfile->state.save_comments)
				2902	{
				2903	result->flags \|= PREV_WHITE;
Neil Booth	5fddcff	2001-09-11 07:00:12 +0000	[diff] [blame]	2904	goto update_tokens_line;
Neil Booth	1c6d33e	2000-09-25 22:39:51 +0000	[diff] [blame]	2905	}
				2906
Jakub Jelinek	7bad794	2016-10-08 12:48:54 +0200	[diff] [blame]	2907	if (fallthrough_comment)
Marek Polacek	81fea42	2016-09-26 09:42:50 +0000	[diff] [blame]	2908	result->flags \|= PREV_FALLTHROUGH;
				2909
Neil Booth	1c6d33e	2000-09-25 22:39:51 +0000	[diff] [blame]	2910	/* Save the comment as a token in its own right. */
Jason Thorpe	477cdac	2002-04-07 03:12:23 +0000	[diff] [blame]	2911	save_comment (pfile, result, comment_start, c);
Neil Booth	bdcbe49	2001-09-13 20:05:17 +0000	[diff] [blame]	2912	break;
Neil Booth	0d9f234	2000-09-18 18:43:05 +0000	[diff] [blame]	2913
				2914	case '<':
				2915	if (pfile->state.angled_headers)
				2916	{
Neil Booth	6338b35	2003-04-23 22:44:06 +0000	[diff] [blame]	2917	lex_string (pfile, result, buffer->cur - 1);
Joseph Myers	4bb09c2	2009-02-21 21:25:39 +0000	[diff] [blame]	2918	if (result->type != CPP_LESS)
				2919	break;
Neil Booth	0d9f234	2000-09-18 18:43:05 +0000	[diff] [blame]	2920	}
				2921
Neil Booth	6f572ac	2003-04-19 16:34:33 +0000	[diff] [blame]	2922	result->type = CPP_LESS;
				2923	if (*buffer->cur == '=')
				2924	buffer->cur++, result->type = CPP_LESS_EQ;
				2925	else if (*buffer->cur == '<')
Neil Booth	0d9f234	2000-09-18 18:43:05 +0000	[diff] [blame]	2926	{
Neil Booth	6f572ac	2003-04-19 16:34:33 +0000	[diff] [blame]	2927	buffer->cur++;
				2928	IF_NEXT_IS ('=', CPP_LSHIFT_EQ, CPP_LSHIFT);
Neil Booth	0d9f234	2000-09-18 18:43:05 +0000	[diff] [blame]	2929	}
Neil Booth	6f572ac	2003-04-19 16:34:33 +0000	[diff] [blame]	2930	else if (CPP_OPTION (pfile, digraphs))
Neil Booth	480709c	2001-10-21 14:04:42 +0000	[diff] [blame]	2931	{
Neil Booth	6f572ac	2003-04-19 16:34:33 +0000	[diff] [blame]	2932	if (*buffer->cur == ':')
				2933	{
Paolo Carlini	1582c67	2013-01-04 15:30:24 +0000	[diff] [blame]	2934	/* C++11 [2.5/3 lex.pptoken], "Otherwise, if the next
				2935	three characters are <:: and the subsequent character
				2936	is neither : nor >, the < is treated as a preprocessor
				2937	token by itself". */
				2938	if (CPP_OPTION (pfile, cplusplus)
Paolo Carlini	6194915	2013-04-24 19:33:54 +0000	[diff] [blame]	2939	&& CPP_OPTION (pfile, lang) != CLK_CXX98
				2940	&& CPP_OPTION (pfile, lang) != CLK_GNUCXX
Paolo Carlini	1582c67	2013-01-04 15:30:24 +0000	[diff] [blame]	2941	&& buffer->cur[1] == ':'
				2942	&& buffer->cur[2] != ':' && buffer->cur[2] != '>')
				2943	break;
				2944
Neil Booth	6f572ac	2003-04-19 16:34:33 +0000	[diff] [blame]	2945	buffer->cur++;
				2946	result->flags \|= DIGRAPH;
				2947	result->type = CPP_OPEN_SQUARE;
				2948	}
				2949	else if (*buffer->cur == '%')
				2950	{
				2951	buffer->cur++;
				2952	result->flags \|= DIGRAPH;
				2953	result->type = CPP_OPEN_BRACE;
				2954	}
Neil Booth	480709c	2001-10-21 14:04:42 +0000	[diff] [blame]	2955	}
Neil Booth	0d9f234	2000-09-18 18:43:05 +0000	[diff] [blame]	2956	break;
				2957
				2958	case '>':
Neil Booth	6f572ac	2003-04-19 16:34:33 +0000	[diff] [blame]	2959	result->type = CPP_GREATER;
				2960	if (*buffer->cur == '=')
				2961	buffer->cur++, result->type = CPP_GREATER_EQ;
				2962	else if (*buffer->cur == '>')
Neil Booth	0d9f234	2000-09-18 18:43:05 +0000	[diff] [blame]	2963	{
Neil Booth	6f572ac	2003-04-19 16:34:33 +0000	[diff] [blame]	2964	buffer->cur++;
				2965	IF_NEXT_IS ('=', CPP_RSHIFT_EQ, CPP_RSHIFT);
				2966	}
Neil Booth	0d9f234	2000-09-18 18:43:05 +0000	[diff] [blame]	2967	break;
				2968
Neil Booth	cbcff6d	2000-09-23 21:41:41 +0000	[diff] [blame]	2969	case '%':
Neil Booth	6f572ac	2003-04-19 16:34:33 +0000	[diff] [blame]	2970	result->type = CPP_MOD;
				2971	if (*buffer->cur == '=')
				2972	buffer->cur++, result->type = CPP_MOD_EQ;
				2973	else if (CPP_OPTION (pfile, digraphs))
Neil Booth	480709c	2001-10-21 14:04:42 +0000	[diff] [blame]	2974	{
Neil Booth	6f572ac	2003-04-19 16:34:33 +0000	[diff] [blame]	2975	if (*buffer->cur == ':')
Neil Booth	480709c	2001-10-21 14:04:42 +0000	[diff] [blame]	2976	{
Neil Booth	6f572ac	2003-04-19 16:34:33 +0000	[diff] [blame]	2977	buffer->cur++;
				2978	result->flags \|= DIGRAPH;
				2979	result->type = CPP_HASH;
				2980	if (*buffer->cur == '%' && buffer->cur[1] == ':')
Joseph Myers	9a0c618	2009-05-10 15:27:32 +0100	[diff] [blame]	2981	buffer->cur += 2, result->type = CPP_PASTE, result->val.token_no = 0;
Neil Booth	480709c	2001-10-21 14:04:42 +0000	[diff] [blame]	2982	}
Neil Booth	6f572ac	2003-04-19 16:34:33 +0000	[diff] [blame]	2983	else if (*buffer->cur == '>')
				2984	{
				2985	buffer->cur++;
				2986	result->flags \|= DIGRAPH;
				2987	result->type = CPP_CLOSE_BRACE;
				2988	}
Neil Booth	480709c	2001-10-21 14:04:42 +0000	[diff] [blame]	2989	}
Neil Booth	0d9f234	2000-09-18 18:43:05 +0000	[diff] [blame]	2990	break;
				2991
Neil Booth	cbcff6d	2000-09-23 21:41:41 +0000	[diff] [blame]	2992	case '.':
Neil Booth	480709c	2001-10-21 14:04:42 +0000	[diff] [blame]	2993	result->type = CPP_DOT;
Neil Booth	6f572ac	2003-04-19 16:34:33 +0000	[diff] [blame]	2994	if (ISDIGIT (*buffer->cur))
Neil Booth	480709c	2001-10-21 14:04:42 +0000	[diff] [blame]	2995	{
Geoffrey Keating	50668cf	2005-03-15 00:36:33 +0000	[diff] [blame]	2996	struct normalize_state nst = INITIAL_NORMALIZE_STATE;
Neil Booth	480709c	2001-10-21 14:04:42 +0000	[diff] [blame]	2997	result->type = CPP_NUMBER;
Geoffrey Keating	50668cf	2005-03-15 00:36:33 +0000	[diff] [blame]	2998	lex_number (pfile, &result->val.str, &nst);
				2999	warn_about_normalization (pfile, result, &nst);
Neil Booth	480709c	2001-10-21 14:04:42 +0000	[diff] [blame]	3000	}
Neil Booth	6f572ac	2003-04-19 16:34:33 +0000	[diff] [blame]	3001	else if (*buffer->cur == '.' && buffer->cur[1] == '.')
				3002	buffer->cur += 2, result->type = CPP_ELLIPSIS;
				3003	else if (buffer->cur == '' && CPP_OPTION (pfile, cplusplus))
				3004	buffer->cur++, result->type = CPP_DOT_STAR;
Neil Booth	0d9f234	2000-09-18 18:43:05 +0000	[diff] [blame]	3005	break;
				3006
				3007	case '+':
Neil Booth	6f572ac	2003-04-19 16:34:33 +0000	[diff] [blame]	3008	result->type = CPP_PLUS;
				3009	if (*buffer->cur == '+')
				3010	buffer->cur++, result->type = CPP_PLUS_PLUS;
				3011	else if (*buffer->cur == '=')
				3012	buffer->cur++, result->type = CPP_PLUS_EQ;
Neil Booth	0d9f234	2000-09-18 18:43:05 +0000	[diff] [blame]	3013	break;
				3014
				3015	case '-':
Neil Booth	6f572ac	2003-04-19 16:34:33 +0000	[diff] [blame]	3016	result->type = CPP_MINUS;
				3017	if (*buffer->cur == '>')
Neil Booth	0d9f234	2000-09-18 18:43:05 +0000	[diff] [blame]	3018	{
Neil Booth	6f572ac	2003-04-19 16:34:33 +0000	[diff] [blame]	3019	buffer->cur++;
Neil Booth	480709c	2001-10-21 14:04:42 +0000	[diff] [blame]	3020	result->type = CPP_DEREF;
Neil Booth	6f572ac	2003-04-19 16:34:33 +0000	[diff] [blame]	3021	if (buffer->cur == '' && CPP_OPTION (pfile, cplusplus))
				3022	buffer->cur++, result->type = CPP_DEREF_STAR;
Neil Booth	0d9f234	2000-09-18 18:43:05 +0000	[diff] [blame]	3023	}
Neil Booth	6f572ac	2003-04-19 16:34:33 +0000	[diff] [blame]	3024	else if (*buffer->cur == '-')
				3025	buffer->cur++, result->type = CPP_MINUS_MINUS;
				3026	else if (*buffer->cur == '=')
				3027	buffer->cur++, result->type = CPP_MINUS_EQ;
Neil Booth	0d9f234	2000-09-18 18:43:05 +0000	[diff] [blame]	3028	break;
				3029
				3030	case '&':
Neil Booth	6f572ac	2003-04-19 16:34:33 +0000	[diff] [blame]	3031	result->type = CPP_AND;
				3032	if (*buffer->cur == '&')
				3033	buffer->cur++, result->type = CPP_AND_AND;
				3034	else if (*buffer->cur == '=')
				3035	buffer->cur++, result->type = CPP_AND_EQ;
Neil Booth	0d9f234	2000-09-18 18:43:05 +0000	[diff] [blame]	3036	break;
Kazu Hirata	df38348	2002-05-22 22:02:16 +0000	[diff] [blame]	3037
Neil Booth	0d9f234	2000-09-18 18:43:05 +0000	[diff] [blame]	3038	case '\|':
Neil Booth	6f572ac	2003-04-19 16:34:33 +0000	[diff] [blame]	3039	result->type = CPP_OR;
				3040	if (*buffer->cur == '\|')
				3041	buffer->cur++, result->type = CPP_OR_OR;
				3042	else if (*buffer->cur == '=')
				3043	buffer->cur++, result->type = CPP_OR_EQ;
Neil Booth	0d9f234	2000-09-18 18:43:05 +0000	[diff] [blame]	3044	break;
				3045
				3046	case ':':
Neil Booth	6f572ac	2003-04-19 16:34:33 +0000	[diff] [blame]	3047	result->type = CPP_COLON;
				3048	if (*buffer->cur == ':' && CPP_OPTION (pfile, cplusplus))
				3049	buffer->cur++, result->type = CPP_SCOPE;
				3050	else if (*buffer->cur == '>' && CPP_OPTION (pfile, digraphs))
Neil Booth	0d9f234	2000-09-18 18:43:05 +0000	[diff] [blame]	3051	{
Neil Booth	6f572ac	2003-04-19 16:34:33 +0000	[diff] [blame]	3052	buffer->cur++;
Neil Booth	0d9f234	2000-09-18 18:43:05 +0000	[diff] [blame]	3053	result->flags \|= DIGRAPH;
Neil Booth	480709c	2001-10-21 14:04:42 +0000	[diff] [blame]	3054	result->type = CPP_CLOSE_SQUARE;
				3055	}
Neil Booth	0d9f234	2000-09-18 18:43:05 +0000	[diff] [blame]	3056	break;
				3057
Neil Booth	480709c	2001-10-21 14:04:42 +0000	[diff] [blame]	3058	case '*': IF_NEXT_IS ('=', CPP_MULT_EQ, CPP_MULT); break;
				3059	case '=': IF_NEXT_IS ('=', CPP_EQ_EQ, CPP_EQ); break;
				3060	case '!': IF_NEXT_IS ('=', CPP_NOT_EQ, CPP_NOT); break;
				3061	case '^': IF_NEXT_IS ('=', CPP_XOR_EQ, CPP_XOR); break;
Joseph Myers	9a0c618	2009-05-10 15:27:32 +0100	[diff] [blame]	3062	case '#': IF_NEXT_IS ('#', CPP_PASTE, CPP_HASH); result->val.token_no = 0; break;
Neil Booth	480709c	2001-10-21 14:04:42 +0000	[diff] [blame]	3063
Neil Booth	26aea07	2003-04-19 00:22:51 +0000	[diff] [blame]	3064	case '?': result->type = CPP_QUERY; break;
Neil Booth	0d9f234	2000-09-18 18:43:05 +0000	[diff] [blame]	3065	case '~': result->type = CPP_COMPL; break;
				3066	case ',': result->type = CPP_COMMA; break;
				3067	case '(': result->type = CPP_OPEN_PAREN; break;
				3068	case ')': result->type = CPP_CLOSE_PAREN; break;
				3069	case '[': result->type = CPP_OPEN_SQUARE; break;
				3070	case ']': result->type = CPP_CLOSE_SQUARE; break;
				3071	case '{': result->type = CPP_OPEN_BRACE; break;
				3072	case '}': result->type = CPP_CLOSE_BRACE; break;
				3073	case ';': result->type = CPP_SEMICOLON; break;
				3074
Kazu Hirata	40f0365	2002-09-26 22:25:14 +0000	[diff] [blame]	3075	/* @ is a punctuator in Objective-C. */
Zack Weinberg	cc93758	2001-03-07 01:32:01 +0000	[diff] [blame]	3076	case '@': result->type = CPP_ATSIGN; break;
Neil Booth	0d9f234	2000-09-18 18:43:05 +0000	[diff] [blame]	3077
Neil Booth	0abc6a6	2001-11-27 22:31:34 +0000	[diff] [blame]	3078	case '$':
Neil Booth	1613e52	2003-04-20 07:29:23 +0000	[diff] [blame]	3079	case '\\':
				3080	{
				3081	const uchar *base = --buffer->cur;
Geoffrey Keating	50668cf	2005-03-15 00:36:33 +0000	[diff] [blame]	3082	struct normalize_state nst = INITIAL_NORMALIZE_STATE;
Neil Booth	0abc6a6	2001-11-27 22:31:34 +0000	[diff] [blame]	3083
Geoffrey Keating	50668cf	2005-03-15 00:36:33 +0000	[diff] [blame]	3084	if (forms_identifier_p (pfile, true, &nst))
Neil Booth	1613e52	2003-04-20 07:29:23 +0000	[diff] [blame]	3085	{
				3086	result->type = CPP_NAME;
Joseph Myers	be5ffc5	2014-11-06 21:08:52 +0000	[diff] [blame]	3087	result->val.node.node = lex_identifier (pfile, base, true, &nst,
				3088	&result->val.node.spelling);
Geoffrey Keating	50668cf	2005-03-15 00:36:33 +0000	[diff] [blame]	3089	warn_about_normalization (pfile, result, &nst);
Neil Booth	1613e52	2003-04-20 07:29:23 +0000	[diff] [blame]	3090	break;
				3091	}
				3092	buffer->cur++;
Neil Booth	1067694	2003-04-22 19:28:00 +0000	[diff] [blame]	3093	}
Marek Polacek	191816a	2016-08-12 10:30:47 +0000	[diff] [blame]	3094	/* FALLTHRU */
Neil Booth	1613e52	2003-04-20 07:29:23 +0000	[diff] [blame]	3095
Neil Booth	1067694	2003-04-22 19:28:00 +0000	[diff] [blame]	3096	default:
Neil Booth	6338b35	2003-04-23 22:44:06 +0000	[diff] [blame]	3097	create_literal (pfile, result, buffer->cur - 1, 1, CPP_OTHER);
				3098	break;
Neil Booth	0d9f234	2000-09-18 18:43:05 +0000	[diff] [blame]	3099	}
Neil Booth	bdcbe49	2001-09-13 20:05:17 +0000	[diff] [blame]	3100
David Malcolm	a3998c2	2016-12-15 18:05:05 +0000	[diff] [blame]	3101	/* Potentially convert the location of the token to a range. */
				3102	if (result->src_loc >= RESERVED_LOCATION_COUNT
				3103	&& result->type != CPP_EOF)
				3104	{
				3105	/* Ensure that any line notes are processed, so that we have the
				3106	correct physical line/column for the end-point of the token even
				3107	when a logical line is split via one or more backslashes. */
				3108	if (buffer->cur >= buffer->notes[buffer->cur_note].pos
				3109	&& !pfile->overlaid_buffer)
				3110	_cpp_process_line_notes (pfile, false);
David Malcolm	470a60b	2016-12-12 17:37:48 +0000	[diff] [blame]	3111
David Malcolm	a3998c2	2016-12-15 18:05:05 +0000	[diff] [blame]	3112	source_range tok_range;
				3113	tok_range.m_start = result->src_loc;
				3114	tok_range.m_finish
				3115	= linemap_position_for_column (pfile->line_table,
				3116	CPP_BUF_COLUMN (buffer, buffer->cur));
David Malcolm	ebedc9a	2015-11-13 16:29:59 +0000	[diff] [blame]	3117
David Malcolm	a3998c2	2016-12-15 18:05:05 +0000	[diff] [blame]	3118	result->src_loc = COMBINE_LOCATION_DATA (pfile->line_table,
				3119	result->src_loc,
				3120	tok_range, NULL);
				3121	}
David Malcolm	ebedc9a	2015-11-13 16:29:59 +0000	[diff] [blame]	3122
Neil Booth	bdcbe49	2001-09-13 20:05:17 +0000	[diff] [blame]	3123	return result;
Zack Weinberg	c5a0473	2000-04-25 19:32:36 +0000	[diff] [blame]	3124	}
				3125
Neil Booth	5932565	2003-04-24 20:03:57 +0000	[diff] [blame]	3126	/* An upper bound on the number of bytes needed to spell TOKEN.
				3127	Does not include preceding whitespace. */
Neil Booth	93c80368	2000-10-28 17:59:06 +0000	[diff] [blame]	3128	unsigned int
Zack Weinberg	6cf87ca	2003-06-17 06:17:44 +0000	[diff] [blame]	3129	cpp_token_len (const cpp_token *token)
Zack Weinberg	c5a0473	2000-04-25 19:32:36 +0000	[diff] [blame]	3130	{
Neil Booth	93c80368	2000-10-28 17:59:06 +0000	[diff] [blame]	3131	unsigned int len;
Zack Weinberg	c5a0473	2000-04-25 19:32:36 +0000	[diff] [blame]	3132
Neil Booth	93c80368	2000-10-28 17:59:06 +0000	[diff] [blame]	3133	switch (TOKEN_SPELL (token))
Zack Weinberg	c5a0473	2000-04-25 19:32:36 +0000	[diff] [blame]	3134	{
Joseph Myers	cc95528	2008-11-29 12:21:10 +0000	[diff] [blame]	3135	default: len = 6; break;
Neil Booth	6338b35	2003-04-23 22:44:06 +0000	[diff] [blame]	3136	case SPELL_LITERAL: len = token->val.str.len; break;
Joseph Myers	9a0c618	2009-05-10 15:27:32 +0100	[diff] [blame]	3137	case SPELL_IDENT: len = NODE_LEN (token->val.node.node) * 10; break;
Zack Weinberg	c5a0473	2000-04-25 19:32:36 +0000	[diff] [blame]	3138	}
Neil Booth	5932565	2003-04-24 20:03:57 +0000	[diff] [blame]	3139
				3140	return len;
Zack Weinberg	041c319	2000-07-04 01:58:21 +0000	[diff] [blame]	3141	}
				3142
Geoffrey Keating	47e2049	2005-03-12 10:44:06 +0000	[diff] [blame]	3143	/* Parse UTF-8 out of NAMEP and place a \U escape in BUFFER.
				3144	Return the number of bytes read out of NAME. (There are always
				3145	10 bytes written to BUFFER.) */
				3146
				3147	static size_t
				3148	utf8_to_ucn (unsigned char buffer, const unsigned char name)
				3149	{
				3150	int j;
				3151	int ucn_len = 0;
				3152	int ucn_len_c;
				3153	unsigned t;
				3154	unsigned long utf32;
				3155
				3156	/* Compute the length of the UTF-8 sequence. */
				3157	for (t = *name; t & 0x80; t <<= 1)
				3158	ucn_len++;
				3159
				3160	utf32 = *name & (0x7F >> ucn_len);
				3161	for (ucn_len_c = 1; ucn_len_c < ucn_len; ucn_len_c++)
				3162	{
				3163	utf32 = (utf32 << 6) \| (*++name & 0x3F);
				3164
				3165	/* Ill-formed UTF-8. */
				3166	if ((*name & ~0x3F) != 0x80)
				3167	abort ();
				3168	}
				3169
				3170	*buffer++ = '\\';
				3171	*buffer++ = 'U';
				3172	for (j = 7; j >= 0; j--)
				3173	buffer++ = "0123456789abcdef"[(utf32 >> (4 j)) & 0xF];
				3174	return ucn_len;
				3175	}
				3176
Manuel López-Ibáñez	cfc9353	2009-04-22 15:32:18 +0000	[diff] [blame]	3177	/* Given a token TYPE corresponding to a digraph, return a pointer to
				3178	the spelling of the digraph. */
				3179	static const unsigned char *
				3180	cpp_digraph2name (enum cpp_ttype type)
				3181	{
				3182	return digraph_spellings[(int) type - (int) CPP_FIRST_DIGRAPH];
				3183	}
Geoffrey Keating	47e2049	2005-03-12 10:44:06 +0000	[diff] [blame]	3184
Joseph Myers	be5ffc5	2014-11-06 21:08:52 +0000	[diff] [blame]	3185	/* Write the spelling of an identifier IDENT, using UCNs, to BUFFER.
				3186	The buffer must already contain the enough space to hold the
				3187	token's spelling. Returns a pointer to the character after the
				3188	last character written. */
				3189	unsigned char *
				3190	_cpp_spell_ident_ucns (unsigned char buffer, cpp_hashnode ident)
				3191	{
				3192	size_t i;
				3193	const unsigned char *name = NODE_NAME (ident);
				3194
				3195	for (i = 0; i < NODE_LEN (ident); i++)
				3196	if (name[i] & ~0x7F)
				3197	{
				3198	i += utf8_to_ucn (buffer, name + i) - 1;
				3199	buffer += 10;
				3200	}
				3201	else
				3202	*buffer++ = name[i];
				3203
				3204	return buffer;
				3205	}
				3206
Neil Booth	3fef5b2	2000-05-08 22:22:49 +0000	[diff] [blame]	3207	/* Write the spelling of a token TOKEN to BUFFER. The buffer must
Zack Weinberg	cf00a88	2000-07-08 02:33:00 +0000	[diff] [blame]	3208	already contain the enough space to hold the token's spelling.
Zack Weinberg	6cf87ca	2003-06-17 06:17:44 +0000	[diff] [blame]	3209	Returns a pointer to the character after the last character written.
Geoffrey Keating	47e2049	2005-03-12 10:44:06 +0000	[diff] [blame]	3210	FORSTRING is true if this is to be the spelling after translation
Joseph Myers	be5ffc5	2014-11-06 21:08:52 +0000	[diff] [blame]	3211	phase 1 (with the original spelling of extended identifiers), false
				3212	if extended identifiers should always be written using UCNs (there is
				3213	no option for always writing them in the internal UTF-8 form).
Zack Weinberg	6cf87ca	2003-06-17 06:17:44 +0000	[diff] [blame]	3214	FIXME: Would be nice if we didn't need the PFILE argument. */
Neil Booth	93c80368	2000-10-28 17:59:06 +0000	[diff] [blame]	3215	unsigned char *
Zack Weinberg	6cf87ca	2003-06-17 06:17:44 +0000	[diff] [blame]	3216	cpp_spell_token (cpp_reader pfile, const cpp_token token,
Geoffrey Keating	47e2049	2005-03-12 10:44:06 +0000	[diff] [blame]	3217	unsigned char *buffer, bool forstring)
Neil Booth	3fef5b2	2000-05-08 22:22:49 +0000	[diff] [blame]	3218	{
Zack Weinberg	96be699	2000-07-18 23:25:06 +0000	[diff] [blame]	3219	switch (TOKEN_SPELL (token))
Neil Booth	3fef5b2	2000-05-08 22:22:49 +0000	[diff] [blame]	3220	{
Neil Booth	5d7ee2f	2000-05-10 09:39:18 +0000	[diff] [blame]	3221	case SPELL_OPERATOR:
Neil Booth	3fef5b2	2000-05-08 22:22:49 +0000	[diff] [blame]	3222	{
				3223	const unsigned char *spelling;
				3224	unsigned char c;
				3225
				3226	if (token->flags & DIGRAPH)
Manuel López-Ibáñez	cfc9353	2009-04-22 15:32:18 +0000	[diff] [blame]	3227	spelling = cpp_digraph2name (token->type);
Zack Weinberg	92936ec	2000-07-19 20:18:08 +0000	[diff] [blame]	3228	else if (token->flags & NAMED_OP)
				3229	goto spell_ident;
Neil Booth	3fef5b2	2000-05-08 22:22:49 +0000	[diff] [blame]	3230	else
Zack Weinberg	96be699	2000-07-18 23:25:06 +0000	[diff] [blame]	3231	spelling = TOKEN_NAME (token);
Kazu Hirata	df38348	2002-05-22 22:02:16 +0000	[diff] [blame]	3232
Neil Booth	3fef5b2	2000-05-08 22:22:49 +0000	[diff] [blame]	3233	while ((c = *spelling++) != '\0')
				3234	*buffer++ = c;
				3235	}
				3236	break;
				3237
Zack Weinberg	47ad413	2001-10-06 23:11:27 +0000	[diff] [blame]	3238	spell_ident:
Neil Booth	5d7ee2f	2000-05-10 09:39:18 +0000	[diff] [blame]	3239	case SPELL_IDENT:
Geoffrey Keating	47e2049	2005-03-12 10:44:06 +0000	[diff] [blame]	3240	if (forstring)
				3241	{
Joseph Myers	be5ffc5	2014-11-06 21:08:52 +0000	[diff] [blame]	3242	memcpy (buffer, NODE_NAME (token->val.node.spelling),
				3243	NODE_LEN (token->val.node.spelling));
				3244	buffer += NODE_LEN (token->val.node.spelling);
Geoffrey Keating	47e2049	2005-03-12 10:44:06 +0000	[diff] [blame]	3245	}
				3246	else
Joseph Myers	be5ffc5	2014-11-06 21:08:52 +0000	[diff] [blame]	3247	buffer = _cpp_spell_ident_ucns (buffer, token->val.node.node);
Neil Booth	5d7ee2f	2000-05-10 09:39:18 +0000	[diff] [blame]	3248	break;
Neil Booth	3fef5b2	2000-05-08 22:22:49 +0000	[diff] [blame]	3249
Neil Booth	6338b35	2003-04-23 22:44:06 +0000	[diff] [blame]	3250	case SPELL_LITERAL:
Zack Weinberg	47ad413	2001-10-06 23:11:27 +0000	[diff] [blame]	3251	memcpy (buffer, token->val.str.text, token->val.str.len);
				3252	buffer += token->val.str.len;
				3253	break;
				3254
Neil Booth	3fef5b2	2000-05-08 22:22:49 +0000	[diff] [blame]	3255	case SPELL_NONE:
John David Anglin	0527bc4	2003-11-01 22:56:54 +0000	[diff] [blame]	3256	cpp_error (pfile, CPP_DL_ICE,
				3257	"unspellable token %s", TOKEN_NAME (token));
Neil Booth	3fef5b2	2000-05-08 22:22:49 +0000	[diff] [blame]	3258	break;
				3259	}
				3260
				3261	return buffer;
				3262	}
				3263
Neil Booth	5d8ebbd	2002-01-03 21:43:09 +0000	[diff] [blame]	3264	/* Returns TOKEN spelt as a null-terminated string. The string is
				3265	freed when the reader is destroyed. Useful for diagnostics. */
Neil Booth	93c80368	2000-10-28 17:59:06 +0000	[diff] [blame]	3266	unsigned char *
Zack Weinberg	6cf87ca	2003-06-17 06:17:44 +0000	[diff] [blame]	3267	cpp_token_as_text (cpp_reader pfile, const cpp_token token)
Neil Booth	5932565	2003-04-24 20:03:57 +0000	[diff] [blame]	3268	{
				3269	unsigned int len = cpp_token_len (token) + 1;
Neil Booth	ece54d5	2001-09-28 09:40:22 +0000	[diff] [blame]	3270	unsigned char start = _cpp_unaligned_alloc (pfile, len), end;
Zack Weinberg	041c319	2000-07-04 01:58:21 +0000	[diff] [blame]	3271
Geoffrey Keating	47e2049	2005-03-12 10:44:06 +0000	[diff] [blame]	3272	end = cpp_spell_token (pfile, token, start, false);
Neil Booth	93c80368	2000-10-28 17:59:06 +0000	[diff] [blame]	3273	end[0] = '\0';
Zack Weinberg	041c319	2000-07-04 01:58:21 +0000	[diff] [blame]	3274
Neil Booth	93c80368	2000-10-28 17:59:06 +0000	[diff] [blame]	3275	return start;
Zack Weinberg	041c319	2000-07-04 01:58:21 +0000	[diff] [blame]	3276	}
				3277
Manuel López-Ibáñez	cfc9353	2009-04-22 15:32:18 +0000	[diff] [blame]	3278	/* Returns a pointer to a string which spells the token defined by
				3279	TYPE and FLAGS. Used by C front ends, which really should move to
				3280	using cpp_token_as_text. */
Neil Booth	93c80368	2000-10-28 17:59:06 +0000	[diff] [blame]	3281	const char *
Manuel López-Ibáñez	cfc9353	2009-04-22 15:32:18 +0000	[diff] [blame]	3282	cpp_type2name (enum cpp_ttype type, unsigned char flags)
Zack Weinberg	041c319	2000-07-04 01:58:21 +0000	[diff] [blame]	3283	{
Manuel López-Ibáñez	cfc9353	2009-04-22 15:32:18 +0000	[diff] [blame]	3284	if (flags & DIGRAPH)
				3285	return (const char *) cpp_digraph2name (type);
				3286	else if (flags & NAMED_OP)
				3287	return cpp_named_operator2name (type);
				3288
Neil Booth	93c80368	2000-10-28 17:59:06 +0000	[diff] [blame]	3289	return (const char *) token_spellings[type].name;
Zack Weinberg	041c319	2000-07-04 01:58:21 +0000	[diff] [blame]	3290	}
				3291
Neil Booth	4ed5bcf	2001-09-24 22:53:12 +0000	[diff] [blame]	3292	/* Writes the spelling of token to FP, without any preceding space.
				3293	Separated from cpp_spell_token for efficiency - to avoid stdio
				3294	double-buffering. */
Zack Weinberg	041c319	2000-07-04 01:58:21 +0000	[diff] [blame]	3295	void
Zack Weinberg	6cf87ca	2003-06-17 06:17:44 +0000	[diff] [blame]	3296	cpp_output_token (const cpp_token token, FILE fp)
Zack Weinberg	041c319	2000-07-04 01:58:21 +0000	[diff] [blame]	3297	{
Neil Booth	93c80368	2000-10-28 17:59:06 +0000	[diff] [blame]	3298	switch (TOKEN_SPELL (token))
Zack Weinberg	041c319	2000-07-04 01:58:21 +0000	[diff] [blame]	3299	{
Neil Booth	93c80368	2000-10-28 17:59:06 +0000	[diff] [blame]	3300	case SPELL_OPERATOR:
				3301	{
				3302	const unsigned char *spelling;
Zack Weinberg	3b681e9	2001-09-28 07:00:27 +0000	[diff] [blame]	3303	int c;
Neil Booth	93c80368	2000-10-28 17:59:06 +0000	[diff] [blame]	3304
				3305	if (token->flags & DIGRAPH)
Manuel López-Ibáñez	cfc9353	2009-04-22 15:32:18 +0000	[diff] [blame]	3306	spelling = cpp_digraph2name (token->type);
Neil Booth	93c80368	2000-10-28 17:59:06 +0000	[diff] [blame]	3307	else if (token->flags & NAMED_OP)
				3308	goto spell_ident;
				3309	else
				3310	spelling = TOKEN_NAME (token);
				3311
Zack Weinberg	3b681e9	2001-09-28 07:00:27 +0000	[diff] [blame]	3312	c = *spelling;
				3313	do
				3314	putc (c, fp);
				3315	while ((c = *++spelling) != '\0');
Neil Booth	93c80368	2000-10-28 17:59:06 +0000	[diff] [blame]	3316	}
				3317	break;
				3318
				3319	spell_ident:
				3320	case SPELL_IDENT:
Geoffrey Keating	47e2049	2005-03-12 10:44:06 +0000	[diff] [blame]	3321	{
				3322	size_t i;
Joseph Myers	9a0c618	2009-05-10 15:27:32 +0100	[diff] [blame]	3323	const unsigned char * name = NODE_NAME (token->val.node.node);
Geoffrey Keating	47e2049	2005-03-12 10:44:06 +0000	[diff] [blame]	3324
Joseph Myers	9a0c618	2009-05-10 15:27:32 +0100	[diff] [blame]	3325	for (i = 0; i < NODE_LEN (token->val.node.node); i++)
Geoffrey Keating	47e2049	2005-03-12 10:44:06 +0000	[diff] [blame]	3326	if (name[i] & ~0x7F)
				3327	{
				3328	unsigned char buffer[10];
				3329	i += utf8_to_ucn (buffer, name + i) - 1;
				3330	fwrite (buffer, 1, 10, fp);
				3331	}
				3332	else
Joseph Myers	9a0c618	2009-05-10 15:27:32 +0100	[diff] [blame]	3333	fputc (NODE_NAME (token->val.node.node)[i], fp);
Geoffrey Keating	47e2049	2005-03-12 10:44:06 +0000	[diff] [blame]	3334	}
				3335	break;
Neil Booth	93c80368	2000-10-28 17:59:06 +0000	[diff] [blame]	3336
Neil Booth	6338b35	2003-04-23 22:44:06 +0000	[diff] [blame]	3337	case SPELL_LITERAL:
Zack Weinberg	47ad413	2001-10-06 23:11:27 +0000	[diff] [blame]	3338	fwrite (token->val.str.text, 1, token->val.str.len, fp);
				3339	break;
				3340
Neil Booth	93c80368	2000-10-28 17:59:06 +0000	[diff] [blame]	3341	case SPELL_NONE:
				3342	/* An error, most probably. */
				3343	break;
Zack Weinberg	041c319	2000-07-04 01:58:21 +0000	[diff] [blame]	3344	}
Zack Weinberg	041c319	2000-07-04 01:58:21 +0000	[diff] [blame]	3345	}
				3346
Neil Booth	93c80368	2000-10-28 17:59:06 +0000	[diff] [blame]	3347	/* Compare two tokens. */
				3348	int
Zack Weinberg	6cf87ca	2003-06-17 06:17:44 +0000	[diff] [blame]	3349	_cpp_equiv_tokens (const cpp_token a, const cpp_token b)
Neil Booth	93c80368	2000-10-28 17:59:06 +0000	[diff] [blame]	3350	{
				3351	if (a->type == b->type && a->flags == b->flags)
				3352	switch (TOKEN_SPELL (a))
				3353	{
				3354	default: /* Keep compiler happy. */
				3355	case SPELL_OPERATOR:
Joseph Myers	9a0c618	2009-05-10 15:27:32 +0100	[diff] [blame]	3356	/* token_no is used to track where multiple consecutive ##
Joseph Myers	aa50850	2009-04-19 18:10:56 +0100	[diff] [blame]	3357	tokens were originally located. */
Joseph Myers	9a0c618	2009-05-10 15:27:32 +0100	[diff] [blame]	3358	return (a->type != CPP_PASTE \|\| a->val.token_no == b->val.token_no);
Neil Booth	93c80368	2000-10-28 17:59:06 +0000	[diff] [blame]	3359	case SPELL_NONE:
Joseph Myers	9a0c618	2009-05-10 15:27:32 +0100	[diff] [blame]	3360	return (a->type != CPP_MACRO_ARG
Joseph Myers	be5ffc5	2014-11-06 21:08:52 +0000	[diff] [blame]	3361	\|\| (a->val.macro_arg.arg_no == b->val.macro_arg.arg_no
				3362	&& a->val.macro_arg.spelling == b->val.macro_arg.spelling));
Neil Booth	93c80368	2000-10-28 17:59:06 +0000	[diff] [blame]	3363	case SPELL_IDENT:
Joseph Myers	be5ffc5	2014-11-06 21:08:52 +0000	[diff] [blame]	3364	return (a->val.node.node == b->val.node.node
				3365	&& a->val.node.spelling == b->val.node.spelling);
Neil Booth	6338b35	2003-04-23 22:44:06 +0000	[diff] [blame]	3366	case SPELL_LITERAL:
Neil Booth	93c80368	2000-10-28 17:59:06 +0000	[diff] [blame]	3367	return (a->val.str.len == b->val.str.len
				3368	&& !memcmp (a->val.str.text, b->val.str.text,
				3369	a->val.str.len));
				3370	}
				3371
				3372	return 0;
				3373	}
				3374
Neil Booth	93c80368	2000-10-28 17:59:06 +0000	[diff] [blame]	3375	/* Returns nonzero if a space should be inserted to avoid an
				3376	accidental token paste for output. For simplicity, it is
				3377	conservative, and occasionally advises a space where one is not
				3378	needed, e.g. "." and ".2". */
Neil Booth	93c80368	2000-10-28 17:59:06 +0000	[diff] [blame]	3379	int
Zack Weinberg	6cf87ca	2003-06-17 06:17:44 +0000	[diff] [blame]	3380	cpp_avoid_paste (cpp_reader pfile, const cpp_token token1,
				3381	const cpp_token *token2)
Zack Weinberg	041c319	2000-07-04 01:58:21 +0000	[diff] [blame]	3382	{
Neil Booth	93c80368	2000-10-28 17:59:06 +0000	[diff] [blame]	3383	enum cpp_ttype a = token1->type, b = token2->type;
				3384	cppchar_t c;
Zack Weinberg	041c319	2000-07-04 01:58:21 +0000	[diff] [blame]	3385
Neil Booth	93c80368	2000-10-28 17:59:06 +0000	[diff] [blame]	3386	if (token1->flags & NAMED_OP)
				3387	a = CPP_NAME;
				3388	if (token2->flags & NAMED_OP)
				3389	b = CPP_NAME;
Zack Weinberg	041c319	2000-07-04 01:58:21 +0000	[diff] [blame]	3390
Neil Booth	93c80368	2000-10-28 17:59:06 +0000	[diff] [blame]	3391	c = EOF;
				3392	if (token2->flags & DIGRAPH)
John David Anglin	37b8524	2001-03-02 01:11:50 +0000	[diff] [blame]	3393	c = digraph_spellings[(int) b - (int) CPP_FIRST_DIGRAPH][0];
Neil Booth	93c80368	2000-10-28 17:59:06 +0000	[diff] [blame]	3394	else if (token_spellings[b].category == SPELL_OPERATOR)
				3395	c = token_spellings[b].name[0];
Zack Weinberg	417f3e3	2000-07-11 23:20:53 +0000	[diff] [blame]	3396
Neil Booth	93c80368	2000-10-28 17:59:06 +0000	[diff] [blame]	3397	/* Quickly get everything that can paste with an '='. */
John David Anglin	37b8524	2001-03-02 01:11:50 +0000	[diff] [blame]	3398	if ((int) a <= (int) CPP_LAST_EQ && c == '=')
Zack Weinberg	041c319	2000-07-04 01:58:21 +0000	[diff] [blame]	3399	return 1;
				3400
Neil Booth	93c80368	2000-10-28 17:59:06 +0000	[diff] [blame]	3401	switch (a)
				3402	{
Steve Ellcey	b52dbbf	2006-08-14 23:13:54 +0000	[diff] [blame]	3403	case CPP_GREATER: return c == '>';
				3404	case CPP_LESS: return c == '<' \|\| c == '%' \|\| c == ':';
Neil Booth	93c80368	2000-10-28 17:59:06 +0000	[diff] [blame]	3405	case CPP_PLUS: return c == '+';
				3406	case CPP_MINUS: return c == '-' \|\| c == '>';
				3407	case CPP_DIV: return c == '/' \|\| c == ''; / Comments. */
				3408	case CPP_MOD: return c == ':' \|\| c == '>';
				3409	case CPP_AND: return c == '&';
				3410	case CPP_OR: return c == '\|';
				3411	case CPP_COLON: return c == ':' \|\| c == '>';
				3412	case CPP_DEREF: return c == '*';
Neil Booth	26ec42e	2001-01-28 11:22:23 +0000	[diff] [blame]	3413	case CPP_DOT: return c == '.' \|\| c == '%' \|\| b == CPP_NUMBER;
Neil Booth	93c80368	2000-10-28 17:59:06 +0000	[diff] [blame]	3414	case CPP_HASH: return c == '#' \|\| c == '%'; /* Digraph form. */
				3415	case CPP_NAME: return ((b == CPP_NUMBER
				3416	&& name_p (pfile, &token2->val.str))
				3417	\|\| b == CPP_NAME
				3418	\|\| b == CPP_CHAR \|\| b == CPP_STRING); /* L */
				3419	case CPP_NUMBER: return (b == CPP_NUMBER \|\| b == CPP_NAME
				3420	\|\| c == '.' \|\| c == '+' \|\| c == '-');
Neil Booth	1613e52	2003-04-20 07:29:23 +0000	[diff] [blame]	3421	/* UCNs */
Neil Booth	1067694	2003-04-22 19:28:00 +0000	[diff] [blame]	3422	case CPP_OTHER: return ((token1->val.str.text[0] == '\\'
				3423	&& b == CPP_NAME)
Neil Booth	1613e52	2003-04-20 07:29:23 +0000	[diff] [blame]	3424	\|\| (CPP_OPTION (pfile, objc)
Neil Booth	1067694	2003-04-22 19:28:00 +0000	[diff] [blame]	3425	&& token1->val.str.text[0] == '@'
Neil Booth	1613e52	2003-04-20 07:29:23 +0000	[diff] [blame]	3426	&& (b == CPP_NAME \|\| b == CPP_STRING)));
Jakub Jelinek	87e356b	2013-07-10 18:40:49 +0200	[diff] [blame]	3427	case CPP_STRING:
				3428	case CPP_WSTRING:
				3429	case CPP_UTF8STRING:
				3430	case CPP_STRING16:
				3431	case CPP_STRING32: return (CPP_OPTION (pfile, user_literals)
				3432	&& (b == CPP_NAME
				3433	\|\| (TOKEN_SPELL (token2) == SPELL_LITERAL
				3434	&& ISIDST (token2->val.str.text[0]))));
				3435
Neil Booth	93c80368	2000-10-28 17:59:06 +0000	[diff] [blame]	3436	default: break;
				3437	}
Zack Weinberg	041c319	2000-07-04 01:58:21 +0000	[diff] [blame]	3438
				3439	return 0;
				3440	}
				3441
Neil Booth	93c80368	2000-10-28 17:59:06 +0000	[diff] [blame]	3442	/* Output all the remaining tokens on the current line, and a newline
Neil Booth	4ed5bcf	2001-09-24 22:53:12 +0000	[diff] [blame]	3443	character, to FP. Leading whitespace is removed. If there are
				3444	macros, special token padding is not performed. */
Neil Booth	93c80368	2000-10-28 17:59:06 +0000	[diff] [blame]	3445	void
Zack Weinberg	6cf87ca	2003-06-17 06:17:44 +0000	[diff] [blame]	3446	cpp_output_line (cpp_reader pfile, FILE fp)
Zack Weinberg	041c319	2000-07-04 01:58:21 +0000	[diff] [blame]	3447	{
Neil Booth	4ed5bcf	2001-09-24 22:53:12 +0000	[diff] [blame]	3448	const cpp_token *token;
Zack Weinberg	041c319	2000-07-04 01:58:21 +0000	[diff] [blame]	3449
Neil Booth	4ed5bcf	2001-09-24 22:53:12 +0000	[diff] [blame]	3450	token = cpp_get_token (pfile);
				3451	while (token->type != CPP_EOF)
Zack Weinberg	6ead1e9	2000-07-31 23:47:19 +0000	[diff] [blame]	3452	{
Neil Booth	4ed5bcf	2001-09-24 22:53:12 +0000	[diff] [blame]	3453	cpp_output_token (token, fp);
				3454	token = cpp_get_token (pfile);
				3455	if (token->flags & PREV_WHITE)
				3456	putc (' ', fp);
Zack Weinberg	6ead1e9	2000-07-31 23:47:19 +0000	[diff] [blame]	3457	}
				3458
Neil Booth	93c80368	2000-10-28 17:59:06 +0000	[diff] [blame]	3459	putc ('\n', fp);
Zack Weinberg	041c319	2000-07-04 01:58:21 +0000	[diff] [blame]	3460	}
				3461
Tom Tromey	5d6342e	2008-05-21 21:52:57 +0000	[diff] [blame]	3462	/* Return a string representation of all the remaining tokens on the
				3463	current line. The result is allocated using xmalloc and must be
				3464	freed by the caller. */
				3465	unsigned char *
				3466	cpp_output_line_to_string (cpp_reader pfile, const unsigned char dir_name)
				3467	{
				3468	const cpp_token *token;
				3469	unsigned int out = dir_name ? ustrlen (dir_name) : 0;
				3470	unsigned int alloced = 120 + out;
				3471	unsigned char result = (unsigned char ) xmalloc (alloced);
				3472
				3473	/* If DIR_NAME is empty, there are no initial contents. */
				3474	if (dir_name)
				3475	{
				3476	sprintf ((char *) result, "#%s ", dir_name);
				3477	out += 2;
				3478	}
				3479
				3480	token = cpp_get_token (pfile);
				3481	while (token->type != CPP_EOF)
				3482	{
				3483	unsigned char *last;
				3484	/* Include room for a possible space and the terminating nul. */
				3485	unsigned int len = cpp_token_len (token) + 2;
				3486
				3487	if (out + len > alloced)
				3488	{
				3489	alloced *= 2;
				3490	if (out + len > alloced)
				3491	alloced = out + len;
				3492	result = (unsigned char *) xrealloc (result, alloced);
				3493	}
				3494
				3495	last = cpp_spell_token (pfile, token, &result[out], 0);
				3496	out = last - result;
				3497
				3498	token = cpp_get_token (pfile);
				3499	if (token->flags & PREV_WHITE)
				3500	result[out++] = ' ';
				3501	}
				3502
				3503	result[out] = '\0';
				3504	return result;
				3505	}
				3506
Neil Booth	1e013d2	2001-09-26 21:44:35 +0000	[diff] [blame]	3507	/* Memory buffers. Changing these three constants can have a dramatic
				3508	effect on performance. The values here are reasonable defaults,
				3509	but might be tuned. If you adjust them, be sure to test across a
				3510	range of uses of cpplib, including heavy nested function-like macro
				3511	expansion. Also check the change in peak memory usage (NJAMD is a
				3512	good tool for this). */
				3513	#define MIN_BUFF_SIZE 8000
Neil Booth	8706281	2001-10-20 09:00:53 +0000	[diff] [blame]	3514	#define BUFF_SIZE_UPPER_BOUND(MIN_SIZE) (MIN_BUFF_SIZE + (MIN_SIZE) * 3 / 2)
Neil Booth	1e013d2	2001-09-26 21:44:35 +0000	[diff] [blame]	3515	#define EXTENDED_BUFF_SIZE(BUFF, MIN_EXTRA) \
				3516	(MIN_EXTRA + ((BUFF)->limit - (BUFF)->cur) * 2)
Neil Booth	93c80368	2000-10-28 17:59:06 +0000	[diff] [blame]	3517
Neil Booth	8706281	2001-10-20 09:00:53 +0000	[diff] [blame]	3518	#if MIN_BUFF_SIZE > BUFF_SIZE_UPPER_BOUND (0)
				3519	#error BUFF_SIZE_UPPER_BOUND must be at least as large as MIN_BUFF_SIZE!
				3520	#endif
				3521
Neil Booth	c9e7a60	2001-09-27 12:59:38 +0000	[diff] [blame]	3522	/* Create a new allocation buffer. Place the control block at the end
				3523	of the buffer, so that buffer overflows will cause immediate chaos. */
Neil Booth	b8af0ca	2001-09-26 17:52:50 +0000	[diff] [blame]	3524	static _cpp_buff *
Zack Weinberg	6cf87ca	2003-06-17 06:17:44 +0000	[diff] [blame]	3525	new_buff (size_t len)
Neil Booth	b8af0ca	2001-09-26 17:52:50 +0000	[diff] [blame]	3526	{
				3527	_cpp_buff *result;
Neil Booth	ece54d5	2001-09-28 09:40:22 +0000	[diff] [blame]	3528	unsigned char *base;
Neil Booth	b8af0ca	2001-09-26 17:52:50 +0000	[diff] [blame]	3529
Neil Booth	1e013d2	2001-09-26 21:44:35 +0000	[diff] [blame]	3530	if (len < MIN_BUFF_SIZE)
				3531	len = MIN_BUFF_SIZE;
Neil Booth	c70f6ed	2002-06-07 06:26:32 +0000	[diff] [blame]	3532	len = CPP_ALIGN (len);
Neil Booth	b8af0ca	2001-09-26 17:52:50 +0000	[diff] [blame]	3533
Martin Liska	ceb1792	2016-05-20 10:03:25 +0200	[diff] [blame]	3534	#ifdef ENABLE_VALGRIND_ANNOTATIONS
Jakub Jelinek	1a80db9	2013-02-28 10:58:47 +0100	[diff] [blame]	3535	/* Valgrind warns about uses of interior pointers, so put _cpp_buff
				3536	struct first. */
				3537	size_t slen = CPP_ALIGN2 (sizeof (_cpp_buff), 2 * DEFAULT_ALIGNMENT);
				3538	base = XNEWVEC (unsigned char, len + slen);
				3539	result = (_cpp_buff *) base;
				3540	base += slen;
				3541	#else
Gabriel Dos Reis	c3f829c	2005-05-28 15:52:48 +0000	[diff] [blame]	3542	base = XNEWVEC (unsigned char, len + sizeof (_cpp_buff));
Neil Booth	b8af0ca	2001-09-26 17:52:50 +0000	[diff] [blame]	3543	result = (_cpp_buff *) (base + len);
Jakub Jelinek	1a80db9	2013-02-28 10:58:47 +0100	[diff] [blame]	3544	#endif
Neil Booth	b8af0ca	2001-09-26 17:52:50 +0000	[diff] [blame]	3545	result->base = base;
				3546	result->cur = base;
				3547	result->limit = base + len;
				3548	result->next = NULL;
				3549	return result;
				3550	}
				3551
				3552	/* Place a chain of unwanted allocation buffers on the free list. */
				3553	void
Zack Weinberg	6cf87ca	2003-06-17 06:17:44 +0000	[diff] [blame]	3554	_cpp_release_buff (cpp_reader pfile, _cpp_buff buff)
Neil Booth	b8af0ca	2001-09-26 17:52:50 +0000	[diff] [blame]	3555	{
				3556	_cpp_buff *end = buff;
				3557
				3558	while (end->next)
				3559	end = end->next;
				3560	end->next = pfile->free_buffs;
				3561	pfile->free_buffs = buff;
				3562	}
				3563
				3564	/* Return a free buffer of size at least MIN_SIZE. */
				3565	_cpp_buff *
Zack Weinberg	6cf87ca	2003-06-17 06:17:44 +0000	[diff] [blame]	3566	_cpp_get_buff (cpp_reader *pfile, size_t min_size)
Neil Booth	b8af0ca	2001-09-26 17:52:50 +0000	[diff] [blame]	3567	{
				3568	_cpp_buff result, *p;
				3569
				3570	for (p = &pfile->free_buffs;; p = &(*p)->next)
				3571	{
Neil Booth	6142088	2001-09-28 13:25:38 +0000	[diff] [blame]	3572	size_t size;
Neil Booth	1e013d2	2001-09-26 21:44:35 +0000	[diff] [blame]	3573
				3574	if (*p == NULL)
Neil Booth	b8af0ca	2001-09-26 17:52:50 +0000	[diff] [blame]	3575	return new_buff (min_size);
Neil Booth	1e013d2	2001-09-26 21:44:35 +0000	[diff] [blame]	3576	result = *p;
				3577	size = result->limit - result->base;
				3578	/* Return a buffer that's big enough, but don't waste one that's
				3579	way too big. */
Richard Earnshaw	34f5271	2001-10-17 16:20:04 +0000	[diff] [blame]	3580	if (size >= min_size && size <= BUFF_SIZE_UPPER_BOUND (min_size))
Neil Booth	b8af0ca	2001-09-26 17:52:50 +0000	[diff] [blame]	3581	break;
				3582	}
				3583
				3584	*p = result->next;
				3585	result->next = NULL;
				3586	result->cur = result->base;
				3587	return result;
				3588	}
				3589
Kazu Hirata	4fe9b91	2001-10-09 06:03:16 +0000	[diff] [blame]	3590	/* Creates a new buffer with enough space to hold the uncommitted
Neil Booth	8c3b269	2001-09-30 10:03:11 +0000	[diff] [blame]	3591	remaining bytes of BUFF, and at least MIN_EXTRA more bytes. Copies
				3592	the excess bytes to the new buffer. Chains the new buffer after
				3593	BUFF, and returns the new buffer. */
Neil Booth	b8af0ca	2001-09-26 17:52:50 +0000	[diff] [blame]	3594	_cpp_buff *
Zack Weinberg	6cf87ca	2003-06-17 06:17:44 +0000	[diff] [blame]	3595	_cpp_append_extend_buff (cpp_reader pfile, _cpp_buff buff, size_t min_extra)
Neil Booth	b8af0ca	2001-09-26 17:52:50 +0000	[diff] [blame]	3596	{
Neil Booth	6142088	2001-09-28 13:25:38 +0000	[diff] [blame]	3597	size_t size = EXTENDED_BUFF_SIZE (buff, min_extra);
Neil Booth	8c3b269	2001-09-30 10:03:11 +0000	[diff] [blame]	3598	_cpp_buff *new_buff = _cpp_get_buff (pfile, size);
Neil Booth	b8af0ca	2001-09-26 17:52:50 +0000	[diff] [blame]	3599
Neil Booth	8c3b269	2001-09-30 10:03:11 +0000	[diff] [blame]	3600	buff->next = new_buff;
				3601	memcpy (new_buff->base, buff->cur, BUFF_ROOM (buff));
				3602	return new_buff;
				3603	}
				3604
Kazu Hirata	4fe9b91	2001-10-09 06:03:16 +0000	[diff] [blame]	3605	/* Creates a new buffer with enough space to hold the uncommitted
Neil Booth	8c3b269	2001-09-30 10:03:11 +0000	[diff] [blame]	3606	remaining bytes of the buffer pointed to by BUFF, and at least
				3607	MIN_EXTRA more bytes. Copies the excess bytes to the new buffer.
				3608	Chains the new buffer before the buffer pointed to by BUFF, and
				3609	updates the pointer to point to the new buffer. */
				3610	void
Zack Weinberg	6cf87ca	2003-06-17 06:17:44 +0000	[diff] [blame]	3611	_cpp_extend_buff (cpp_reader pfile, _cpp_buff *pbuff, size_t min_extra)
Neil Booth	8c3b269	2001-09-30 10:03:11 +0000	[diff] [blame]	3612	{
				3613	_cpp_buff new_buff, old_buff = *pbuff;
				3614	size_t size = EXTENDED_BUFF_SIZE (old_buff, min_extra);
				3615
				3616	new_buff = _cpp_get_buff (pfile, size);
				3617	memcpy (new_buff->base, old_buff->cur, BUFF_ROOM (old_buff));
				3618	new_buff->next = old_buff;
				3619	*pbuff = new_buff;
Neil Booth	b8af0ca	2001-09-26 17:52:50 +0000	[diff] [blame]	3620	}
				3621
				3622	/* Free a chain of buffers starting at BUFF. */
				3623	void
Andreas Jaeger	5671bf2	2003-07-07 21:11:59 +0200	[diff] [blame]	3624	_cpp_free_buff (_cpp_buff *buff)
Neil Booth	b8af0ca	2001-09-26 17:52:50 +0000	[diff] [blame]	3625	{
				3626	_cpp_buff *next;
				3627
				3628	for (; buff; buff = next)
				3629	{
				3630	next = buff->next;
Martin Liska	ceb1792	2016-05-20 10:03:25 +0200	[diff] [blame]	3631	#ifdef ENABLE_VALGRIND_ANNOTATIONS
Jakub Jelinek	1a80db9	2013-02-28 10:58:47 +0100	[diff] [blame]	3632	free (buff);
				3633	#else
Neil Booth	b8af0ca	2001-09-26 17:52:50 +0000	[diff] [blame]	3634	free (buff->base);
Jakub Jelinek	1a80db9	2013-02-28 10:58:47 +0100	[diff] [blame]	3635	#endif
Neil Booth	b8af0ca	2001-09-26 17:52:50 +0000	[diff] [blame]	3636	}
				3637	}
Neil Booth	93c80368	2000-10-28 17:59:06 +0000	[diff] [blame]	3638
Neil Booth	ece54d5	2001-09-28 09:40:22 +0000	[diff] [blame]	3639	/* Allocate permanent, unaligned storage of length LEN. */
				3640	unsigned char *
Zack Weinberg	6cf87ca	2003-06-17 06:17:44 +0000	[diff] [blame]	3641	_cpp_unaligned_alloc (cpp_reader *pfile, size_t len)
Neil Booth	ece54d5	2001-09-28 09:40:22 +0000	[diff] [blame]	3642	{
				3643	_cpp_buff *buff = pfile->u_buff;
				3644	unsigned char *result = buff->cur;
				3645
				3646	if (len > (size_t) (buff->limit - result))
				3647	{
				3648	buff = _cpp_get_buff (pfile, len);
				3649	buff->next = pfile->u_buff;
				3650	pfile->u_buff = buff;
				3651	result = buff->cur;
				3652	}
				3653
				3654	buff->cur = result + len;
				3655	return result;
				3656	}
				3657
Neil Booth	8706281	2001-10-20 09:00:53 +0000	[diff] [blame]	3658	/* Allocate permanent, unaligned storage of length LEN from a_buff.
				3659	That buffer is used for growing allocations when saving macro
				3660	replacement lists in a #define, and when parsing an answer to an
				3661	assertion in #assert, #unassert or #if (and therefore possibly
				3662	whilst expanding macros). It therefore must not be used by any
				3663	code that they might call: specifically the lexer and the guts of
				3664	the macro expander.
				3665
				3666	All existing other uses clearly fit this restriction: storing
				3667	registered pragmas during initialization. */
Neil Booth	93c80368	2000-10-28 17:59:06 +0000	[diff] [blame]	3668	unsigned char *
Zack Weinberg	6cf87ca	2003-06-17 06:17:44 +0000	[diff] [blame]	3669	_cpp_aligned_alloc (cpp_reader *pfile, size_t len)
Neil Booth	93c80368	2000-10-28 17:59:06 +0000	[diff] [blame]	3670	{
Neil Booth	8c3b269	2001-09-30 10:03:11 +0000	[diff] [blame]	3671	_cpp_buff *buff = pfile->a_buff;
				3672	unsigned char *result = buff->cur;
Neil Booth	93c80368	2000-10-28 17:59:06 +0000	[diff] [blame]	3673
Neil Booth	8c3b269	2001-09-30 10:03:11 +0000	[diff] [blame]	3674	if (len > (size_t) (buff->limit - result))
Zack Weinberg	041c319	2000-07-04 01:58:21 +0000	[diff] [blame]	3675	{
Neil Booth	8c3b269	2001-09-30 10:03:11 +0000	[diff] [blame]	3676	buff = _cpp_get_buff (pfile, len);
				3677	buff->next = pfile->a_buff;
				3678	pfile->a_buff = buff;
				3679	result = buff->cur;
Zack Weinberg	041c319	2000-07-04 01:58:21 +0000	[diff] [blame]	3680	}
				3681
Neil Booth	8c3b269	2001-09-30 10:03:11 +0000	[diff] [blame]	3682	buff->cur = result + len;
Neil Booth	93c80368	2000-10-28 17:59:06 +0000	[diff] [blame]	3683	return result;
Zack Weinberg	041c319	2000-07-04 01:58:21 +0000	[diff] [blame]	3684	}
Geoffrey Keating	d804416	2004-06-09 20:10:13 +0000	[diff] [blame]	3685
				3686	/* Say which field of TOK is in use. */
				3687
				3688	enum cpp_token_fld_kind
Jakub Jelinek	c26302d	2013-07-10 18:49:24 +0200	[diff] [blame]	3689	cpp_token_val_index (const cpp_token *tok)
Geoffrey Keating	d804416	2004-06-09 20:10:13 +0000	[diff] [blame]	3690	{
				3691	switch (TOKEN_SPELL (tok))
				3692	{
				3693	case SPELL_IDENT:
				3694	return CPP_TOKEN_FLD_NODE;
				3695	case SPELL_LITERAL:
				3696	return CPP_TOKEN_FLD_STR;
Joseph Myers	aa50850	2009-04-19 18:10:56 +0100	[diff] [blame]	3697	case SPELL_OPERATOR:
				3698	if (tok->type == CPP_PASTE)
Joseph Myers	9a0c618	2009-05-10 15:27:32 +0100	[diff] [blame]	3699	return CPP_TOKEN_FLD_TOKEN_NO;
Joseph Myers	aa50850	2009-04-19 18:10:56 +0100	[diff] [blame]	3700	else
				3701	return CPP_TOKEN_FLD_NONE;
Geoffrey Keating	d804416	2004-06-09 20:10:13 +0000	[diff] [blame]	3702	case SPELL_NONE:
				3703	if (tok->type == CPP_MACRO_ARG)
				3704	return CPP_TOKEN_FLD_ARG_NO;
				3705	else if (tok->type == CPP_PADDING)
				3706	return CPP_TOKEN_FLD_SOURCE;
Zack Weinberg	21b1149	2004-09-09 19:16:56 +0000	[diff] [blame]	3707	else if (tok->type == CPP_PRAGMA)
Richard Henderson	bc4071d	2006-01-04 08:33:38 -0800	[diff] [blame]	3708	return CPP_TOKEN_FLD_PRAGMA;
Marek Polacek	191816a	2016-08-12 10:30:47 +0000	[diff] [blame]	3709	/* fall through */
Geoffrey Keating	d804416	2004-06-09 20:10:13 +0000	[diff] [blame]	3710	default:
				3711	return CPP_TOKEN_FLD_NONE;
				3712	}
				3713	}
Gabriel Charette	e3dfef4	2011-08-22 20:41:07 +0000	[diff] [blame]	3714
				3715	/* All tokens lexed in R after calling this function will be forced to have
				3716	their source_location the same as the location referenced by P, until
				3717	cpp_stop_forcing_token_locations is called for R. */
				3718
				3719	void
				3720	cpp_force_token_locations (cpp_reader r, source_location p)
				3721	{
				3722	r->forced_token_location_p = p;
				3723	}
				3724
				3725	/* Go back to assigning locations naturally for lexed tokens. */
				3726
				3727	void
				3728	cpp_stop_forcing_token_locations (cpp_reader *r)
				3729	{
				3730	r->forced_token_location_p = NULL;
				3731	}