Blame - libcpp/lex.c - toolchain/gcc

blob: 5758e580c2ba09729a0e5874c838691c44ee6102 [file] [log] [blame]

Zack Weinberg	45b966d	2000-03-13 22:01:08 +0000	[diff] [blame]	1	/* CPP Library - lexical analysis.
Jakub Jelinek	5624e56	2015-01-05 13:33:28 +0100	[diff] [blame]	2	Copyright (C) 2000-2015 Free Software Foundation, Inc.
Zack Weinberg	45b966d	2000-03-13 22:01:08 +0000	[diff] [blame]	3	Contributed by Per Bothner, 1994-95.
				4	Based on CCCP program by Paul Rubin, June 1986
				5	Adapted to ANSI C, Richard Stallman, Jan 1987
				6	Broken out to separate file, Zack Weinberg, Mar 2000
				7
				8	This program is free software; you can redistribute it and/or modify it
				9	under the terms of the GNU General Public License as published by the
Jakub Jelinek	748086b	2009-04-09 17:00:19 +0200	[diff] [blame]	10	Free Software Foundation; either version 3, or (at your option) any
Zack Weinberg	45b966d	2000-03-13 22:01:08 +0000	[diff] [blame]	11	later version.
				12
				13	This program is distributed in the hope that it will be useful,
				14	but WITHOUT ANY WARRANTY; without even the implied warranty of
				15	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
				16	GNU General Public License for more details.
				17
				18	You should have received a copy of the GNU General Public License
Jakub Jelinek	748086b	2009-04-09 17:00:19 +0200	[diff] [blame]	19	along with this program; see the file COPYING3. If not see
				20	<http://www.gnu.org/licenses/>. */
Zack Weinberg	45b966d	2000-03-13 22:01:08 +0000	[diff] [blame]	21
				22	#include "config.h"
				23	#include "system.h"
Zack Weinberg	45b966d	2000-03-13 22:01:08 +0000	[diff] [blame]	24	#include "cpplib.h"
Paolo Bonzini	4f4e53dd	2004-05-24 10:50:45 +0000	[diff] [blame]	25	#include "internal.h"
Zack Weinberg	45b966d	2000-03-13 22:01:08 +0000	[diff] [blame]	26
Neil Booth	93c80368	2000-10-28 17:59:06 +0000	[diff] [blame]	27	enum spell_type
Zack Weinberg	f9a0e96	2000-07-13 02:32:41 +0000	[diff] [blame]	28	{
Neil Booth	93c80368	2000-10-28 17:59:06 +0000	[diff] [blame]	29	SPELL_OPERATOR = 0,
Neil Booth	93c80368	2000-10-28 17:59:06 +0000	[diff] [blame]	30	SPELL_IDENT,
Neil Booth	6338b35	2003-04-23 22:44:06 +0000	[diff] [blame]	31	SPELL_LITERAL,
Neil Booth	93c80368	2000-10-28 17:59:06 +0000	[diff] [blame]	32	SPELL_NONE
Zack Weinberg	f9a0e96	2000-07-13 02:32:41 +0000	[diff] [blame]	33	};
				34
Neil Booth	93c80368	2000-10-28 17:59:06 +0000	[diff] [blame]	35	struct token_spelling
Zack Weinberg	f9a0e96	2000-07-13 02:32:41 +0000	[diff] [blame]	36	{
Neil Booth	93c80368	2000-10-28 17:59:06 +0000	[diff] [blame]	37	enum spell_type category;
				38	const unsigned char *name;
Zack Weinberg	f9a0e96	2000-07-13 02:32:41 +0000	[diff] [blame]	39	};
				40
Zack Weinberg	8206c79	2001-10-11 21:21:57 +0000	[diff] [blame]	41	static const unsigned char *const digraph_spellings[] =
Kris Van Hees	b6baa67	2008-04-18 13:58:08 +0000	[diff] [blame]	42	{ UC"%:", UC"%:%:", UC"<:", UC":>", UC"<%", UC"%>" };
Neil Booth	93c80368	2000-10-28 17:59:06 +0000	[diff] [blame]	43
Kris Van Hees	b6baa67	2008-04-18 13:58:08 +0000	[diff] [blame]	44	#define OP(e, s) { SPELL_OPERATOR, UC s },
				45	#define TK(e, s) { SPELL_ ## s, UC #e },
Zack Weinberg	8206c79	2001-10-11 21:21:57 +0000	[diff] [blame]	46	static const struct token_spelling token_spellings[N_TTYPES] = { TTYPE_TABLE };
Neil Booth	93c80368	2000-10-28 17:59:06 +0000	[diff] [blame]	47	#undef OP
				48	#undef TK
				49
				50	#define TOKEN_SPELL(token) (token_spellings[(token)->type].category)
				51	#define TOKEN_NAME(token) (token_spellings[(token)->type].name)
Zack Weinberg	f2d5f0c	2000-04-14 23:29:45 +0000	[diff] [blame]	52
Zack Weinberg	6cf87ca	2003-06-17 06:17:44 +0000	[diff] [blame]	53	static void add_line_note (cpp_buffer , const uchar , unsigned int);
				54	static int skip_line_comment (cpp_reader *);
				55	static void skip_whitespace (cpp_reader *, cppchar_t);
Zack Weinberg	6cf87ca	2003-06-17 06:17:44 +0000	[diff] [blame]	56	static void lex_string (cpp_reader , cpp_token , const uchar *);
				57	static void save_comment (cpp_reader , cpp_token , const uchar *, cppchar_t);
Matthew Gingell	631d0d3	2008-10-05 12:35:36 +0000	[diff] [blame]	58	static void store_comment (cpp_reader , cpp_token );
Zack Weinberg	6cf87ca	2003-06-17 06:17:44 +0000	[diff] [blame]	59	static void create_literal (cpp_reader , cpp_token , const uchar *,
				60	unsigned int, enum cpp_ttype);
				61	static bool warn_in_comment (cpp_reader , _cpp_line_note );
				62	static int name_p (cpp_reader , const cpp_string );
Zack Weinberg	6cf87ca	2003-06-17 06:17:44 +0000	[diff] [blame]	63	static tokenrun next_tokenrun (tokenrun );
Neil Booth	0d9f234	2000-09-18 18:43:05 +0000	[diff] [blame]	64
Zack Weinberg	6cf87ca	2003-06-17 06:17:44 +0000	[diff] [blame]	65	static _cpp_buff *new_buff (size_t);
Zack Weinberg	15dad1d	2000-05-18 15:55:46 +0000	[diff] [blame]	66
Neil Booth	9d10c9a	2003-03-06 23:12:30 +0000	[diff] [blame]	67
Zack Weinberg	6d2c204	2000-04-30 17:30:25 +0000	[diff] [blame]	68	/* Utility routine:
Zack Weinberg	6d2c204	2000-04-30 17:30:25 +0000	[diff] [blame]	69
Zack Weinberg	bfb9dc7	2000-07-08 19:00:39 +0000	[diff] [blame]	70	Compares, the token TOKEN to the NUL-terminated string STRING.
				71	TOKEN must be a CPP_NAME. Returns 1 for equal, 0 for unequal. */
Zack Weinberg	6d2c204	2000-04-30 17:30:25 +0000	[diff] [blame]	72	int
Zack Weinberg	6cf87ca	2003-06-17 06:17:44 +0000	[diff] [blame]	73	cpp_ideq (const cpp_token token, const char string)
Zack Weinberg	6d2c204	2000-04-30 17:30:25 +0000	[diff] [blame]	74	{
Zack Weinberg	bfb9dc7	2000-07-08 19:00:39 +0000	[diff] [blame]	75	if (token->type != CPP_NAME)
Zack Weinberg	6d2c204	2000-04-30 17:30:25 +0000	[diff] [blame]	76	return 0;
Zack Weinberg	bfb9dc7	2000-07-08 19:00:39 +0000	[diff] [blame]	77
Joseph Myers	9a0c618	2009-05-10 15:27:32 +0100	[diff] [blame]	78	return !ustrcmp (NODE_NAME (token->val.node.node), (const uchar *) string);
Zack Weinberg	6d2c204	2000-04-30 17:30:25 +0000	[diff] [blame]	79	}
				80
Neil Booth	26aea07	2003-04-19 00:22:51 +0000	[diff] [blame]	81	/* Record a note TYPE at byte POS into the current cleaned logical
				82	line. */
Neil Booth	8706281	2001-10-20 09:00:53 +0000	[diff] [blame]	83	static void
Zack Weinberg	6cf87ca	2003-06-17 06:17:44 +0000	[diff] [blame]	84	add_line_note (cpp_buffer buffer, const uchar pos, unsigned int type)
Neil Booth	0d9f234	2000-09-18 18:43:05 +0000	[diff] [blame]	85	{
Neil Booth	26aea07	2003-04-19 00:22:51 +0000	[diff] [blame]	86	if (buffer->notes_used == buffer->notes_cap)
Zack Weinberg	c5a0473	2000-04-25 19:32:36 +0000	[diff] [blame]	87	{
Neil Booth	26aea07	2003-04-19 00:22:51 +0000	[diff] [blame]	88	buffer->notes_cap = buffer->notes_cap * 2 + 200;
Gabriel Dos Reis	c3f829c	2005-05-28 15:52:48 +0000	[diff] [blame]	89	buffer->notes = XRESIZEVEC (_cpp_line_note, buffer->notes,
				90	buffer->notes_cap);
Zack Weinberg	c5a0473	2000-04-25 19:32:36 +0000	[diff] [blame]	91	}
Neil Booth	0d9f234	2000-09-18 18:43:05 +0000	[diff] [blame]	92
Neil Booth	26aea07	2003-04-19 00:22:51 +0000	[diff] [blame]	93	buffer->notes[buffer->notes_used].pos = pos;
				94	buffer->notes[buffer->notes_used].type = type;
				95	buffer->notes_used++;
Zack Weinberg	c5a0473	2000-04-25 19:32:36 +0000	[diff] [blame]	96	}
				97
Richard Henderson	246a2fc	2010-08-21 12:05:40 -0700	[diff] [blame]	98
				99	/* Fast path to find line special characters using optimized character
				100	scanning algorithms. Anything complicated falls back to the slow
				101	path below. Since this loop is very hot it's worth doing these kinds
				102	of optimizations.
				103
				104	One of the paths through the ifdefs should provide
				105
				106	const uchar search_line_fast (const uchar s, const uchar *end);
				107
				108	Between S and END, search for \n, \r, \\, ?. Return a pointer to
				109	the found character.
				110
				111	Note that the last character of the buffer is always a newline,
				112	as forced by _cpp_convert_input. This fact can be used to avoid
				113	explicitly looking for the end of the buffer. */
				114
				115	/* Configure gives us an ifdef test. */
				116	#ifndef WORDS_BIGENDIAN
				117	#define WORDS_BIGENDIAN 0
				118	#endif
				119
				120	/* We'd like the largest integer that fits into a register. There's nothing
				121	in <stdint.h> that gives us that. For most hosts this is unsigned long,
				122	but MS decided on an LLP64 model. Thankfully when building with GCC we
				123	can get the "real" word size. */
				124	#ifdef __GNUC__
				125	typedef unsigned int word_type __attribute__((__mode__(__word__)));
				126	#else
				127	typedef unsigned long word_type;
				128	#endif
				129
				130	/* The code below is only expecting sizes 4 or 8.
				131	Die at compile-time if this expectation is violated. */
				132	typedef char check_word_type_size
				133	[(sizeof(word_type) == 8 \|\| sizeof(word_type) == 4) * 2 - 1];
				134
				135	/* Return X with the first N bytes forced to values that won't match one
				136	of the interesting characters. Note that NUL is not interesting. */
				137
				138	static inline word_type
				139	acc_char_mask_misalign (word_type val, unsigned int n)
				140	{
				141	word_type mask = -1;
				142	if (WORDS_BIGENDIAN)
				143	mask >>= n * 8;
				144	else
				145	mask <<= n * 8;
				146	return val & mask;
				147	}
				148
				149	/* Return X replicated to all byte positions within WORD_TYPE. */
				150
				151	static inline word_type
				152	acc_char_replicate (uchar x)
				153	{
				154	word_type ret;
				155
				156	ret = (x << 24) \| (x << 16) \| (x << 8) \| x;
				157	if (sizeof(word_type) == 8)
				158	ret = (ret << 16 << 16) \| ret;
				159	return ret;
				160	}
				161
				162	/* Return non-zero if some byte of VAL is (probably) C. */
				163
				164	static inline word_type
				165	acc_char_cmp (word_type val, word_type c)
				166	{
				167	#if defined(__GNUC__) && defined(__alpha__)
				168	/* We can get exact results using a compare-bytes instruction.
				169	Get (val == c) via (0 >= (val ^ c)). */
				170	return __builtin_alpha_cmpbge (0, val ^ c);
				171	#else
				172	word_type magic = 0x7efefefeU;
				173	if (sizeof(word_type) == 8)
				174	magic = (magic << 16 << 16) \| 0xfefefefeU;
				175	magic \|= 1;
				176
				177	val ^= c;
				178	return ((val + magic) ^ ~val) & ~magic;
				179	#endif
				180	}
				181
				182	/* Given the result of acc_char_cmp is non-zero, return the index of
				183	the found character. If this was a false positive, return -1. */
				184
				185	static inline int
				186	acc_char_index (word_type cmp ATTRIBUTE_UNUSED,
				187	word_type val ATTRIBUTE_UNUSED)
				188	{
				189	#if defined(__GNUC__) && defined(__alpha__) && !WORDS_BIGENDIAN
				190	/* The cmpbge instruction sets bits of the result corresponding to
				191	matches in the bytes with no false positives. */
				192	return __builtin_ctzl (cmp);
				193	#else
				194	unsigned int i;
				195
				196	/* ??? It would be nice to force unrolling here,
				197	and have all of these constants folded. */
				198	for (i = 0; i < sizeof(word_type); ++i)
				199	{
				200	uchar c;
				201	if (WORDS_BIGENDIAN)
				202	c = (val >> (sizeof(word_type) - i - 1) * 8) & 0xff;
				203	else
				204	c = (val >> i * 8) & 0xff;
				205
				206	if (c == '\n' \|\| c == '\r' \|\| c == '\\' \|\| c == '?')
				207	return i;
				208	}
				209
				210	return -1;
				211	#endif
				212	}
				213
				214	/* A version of the fast scanner using bit fiddling techniques.
				215
				216	For 32-bit words, one would normally perform 16 comparisons and
				217	16 branches. With this algorithm one performs 24 arithmetic
				218	operations and one branch. Whether this is faster with a 32-bit
				219	word size is going to be somewhat system dependent.
				220
				221	For 64-bit words, we eliminate twice the number of comparisons
				222	and branches without increasing the number of arithmetic operations.
				223	It's almost certainly going to be a win with 64-bit word size. */
				224
				225	static const uchar * search_line_acc_char (const uchar , const uchar )
				226	ATTRIBUTE_UNUSED;
				227
				228	static const uchar *
				229	search_line_acc_char (const uchar s, const uchar end ATTRIBUTE_UNUSED)
				230	{
				231	const word_type repl_nl = acc_char_replicate ('\n');
				232	const word_type repl_cr = acc_char_replicate ('\r');
				233	const word_type repl_bs = acc_char_replicate ('\\');
				234	const word_type repl_qm = acc_char_replicate ('?');
				235
				236	unsigned int misalign;
				237	const word_type *p;
				238	word_type val, t;
				239
				240	/* Align the buffer. Mask out any bytes from before the beginning. */
				241	p = (word_type *)((uintptr_t)s & -sizeof(word_type));
				242	val = *p;
				243	misalign = (uintptr_t)s & (sizeof(word_type) - 1);
				244	if (misalign)
				245	val = acc_char_mask_misalign (val, misalign);
				246
				247	/* Main loop. */
				248	while (1)
				249	{
				250	t = acc_char_cmp (val, repl_nl);
				251	t \|= acc_char_cmp (val, repl_cr);
				252	t \|= acc_char_cmp (val, repl_bs);
				253	t \|= acc_char_cmp (val, repl_qm);
				254
				255	if (__builtin_expect (t != 0, 0))
				256	{
				257	int i = acc_char_index (t, val);
				258	if (i >= 0)
				259	return (const uchar *)p + i;
				260	}
				261
				262	val = *++p;
				263	}
				264	}
				265
Rainer Orth	d9f069a	2014-04-22 12:30:59 +0000	[diff] [blame]	266	/* Disable on Solaris 2/x86 until the following problem can be properly
Rainer Orth	789d73c	2010-08-24 17:23:35 +0000	[diff] [blame]	267	autoconfed:
				268
Rainer Orth	789d73c	2010-08-24 17:23:35 +0000	[diff] [blame]	269	The Solaris 10+ assembler tags objects with the instruction set
				270	extensions used, so SSE4.2 executables cannot run on machines that
				271	don't support that extension. */
				272
Uros Bizjak	1b6b13f	2014-11-20 13:10:12 +0100	[diff] [blame]	273	#if (GCC_VERSION >= 4005) && (__GNUC__ >= 5 \|\| !defined(__PIC__)) && (defined(__i386__) \|\| defined(__x86_64__)) && !(defined(__sun__) && defined(__svr4__))
Richard Henderson	246a2fc	2010-08-21 12:05:40 -0700	[diff] [blame]	274
				275	/* Replicated character data to be shared between implementations.
				276	Recall that outside of a context with vector support we can't
				277	define compatible vector types, therefore these are all defined
				278	in terms of raw characters. */
				279	static const char repl_chars[4][16] __attribute__((aligned(16))) = {
				280	{ '\n', '\n', '\n', '\n', '\n', '\n', '\n', '\n',
				281	'\n', '\n', '\n', '\n', '\n', '\n', '\n', '\n' },
				282	{ '\r', '\r', '\r', '\r', '\r', '\r', '\r', '\r',
				283	'\r', '\r', '\r', '\r', '\r', '\r', '\r', '\r' },
				284	{ '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\',
				285	'\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\' },
				286	{ '?', '?', '?', '?', '?', '?', '?', '?',
				287	'?', '?', '?', '?', '?', '?', '?', '?' },
				288	};
				289
				290	/* A version of the fast scanner using MMX vectorized byte compare insns.
				291
				292	This uses the PMOVMSKB instruction which was introduced with "MMX2",
Uros Bizjak	ef230b3	2011-05-22 20:53:32 +0200	[diff] [blame]	293	which was packaged into SSE1; it is also present in the AMD MMX
Richard Henderson	246a2fc	2010-08-21 12:05:40 -0700	[diff] [blame]	294	extension. Mark the function as using "sse" so that we emit a real
				295	"emms" instruction, rather than the 3dNOW "femms" instruction. */
				296
				297	static const uchar *
				298	#ifndef __SSE__
				299	__attribute__((__target__("sse")))
				300	#endif
				301	search_line_mmx (const uchar s, const uchar end ATTRIBUTE_UNUSED)
				302	{
				303	typedef char v8qi __attribute__ ((__vector_size__ (8)));
				304	typedef int __m64 __attribute__ ((__vector_size__ (8), __may_alias__));
				305
				306	const v8qi repl_nl = (const v8qi )repl_chars[0];
				307	const v8qi repl_cr = (const v8qi )repl_chars[1];
				308	const v8qi repl_bs = (const v8qi )repl_chars[2];
				309	const v8qi repl_qm = (const v8qi )repl_chars[3];
				310
				311	unsigned int misalign, found, mask;
				312	const v8qi *p;
				313	v8qi data, t, c;
				314
				315	/* Align the source pointer. While MMX doesn't generate unaligned data
				316	faults, this allows us to safely scan to the end of the buffer without
				317	reading beyond the end of the last page. */
				318	misalign = (uintptr_t)s & 7;
				319	p = (const v8qi *)((uintptr_t)s & -8);
				320	data = *p;
				321
				322	/* Create a mask for the bytes that are valid within the first
				323	16-byte block. The Idea here is that the AND with the mask
				324	within the loop is "free", since we need some AND or TEST
				325	insn in order to set the flags for the branch anyway. */
				326	mask = -1u << misalign;
				327
				328	/* Main loop processing 8 bytes at a time. */
				329	goto start;
				330	do
				331	{
				332	data = *++p;
				333	mask = -1;
				334
				335	start:
				336	t = __builtin_ia32_pcmpeqb(data, repl_nl);
				337	c = __builtin_ia32_pcmpeqb(data, repl_cr);
				338	t = (v8qi) __builtin_ia32_por ((__m64)t, (__m64)c);
				339	c = __builtin_ia32_pcmpeqb(data, repl_bs);
				340	t = (v8qi) __builtin_ia32_por ((__m64)t, (__m64)c);
				341	c = __builtin_ia32_pcmpeqb(data, repl_qm);
				342	t = (v8qi) __builtin_ia32_por ((__m64)t, (__m64)c);
				343	found = __builtin_ia32_pmovmskb (t);
				344	found &= mask;
				345	}
				346	while (!found);
				347
				348	__builtin_ia32_emms ();
				349
				350	/* FOUND contains 1 in bits for which we matched a relevant
				351	character. Conversion to the byte index is trivial. */
				352	found = __builtin_ctz(found);
				353	return (const uchar *)p + found;
				354	}
				355
				356	/* A version of the fast scanner using SSE2 vectorized byte compare insns. */
				357
				358	static const uchar *
				359	#ifndef __SSE2__
				360	__attribute__((__target__("sse2")))
				361	#endif
				362	search_line_sse2 (const uchar s, const uchar end ATTRIBUTE_UNUSED)
				363	{
				364	typedef char v16qi __attribute__ ((__vector_size__ (16)));
				365
				366	const v16qi repl_nl = (const v16qi )repl_chars[0];
				367	const v16qi repl_cr = (const v16qi )repl_chars[1];
				368	const v16qi repl_bs = (const v16qi )repl_chars[2];
				369	const v16qi repl_qm = (const v16qi )repl_chars[3];
				370
				371	unsigned int misalign, found, mask;
				372	const v16qi *p;
				373	v16qi data, t;
				374
				375	/* Align the source pointer. */
				376	misalign = (uintptr_t)s & 15;
				377	p = (const v16qi *)((uintptr_t)s & -16);
				378	data = *p;
				379
				380	/* Create a mask for the bytes that are valid within the first
				381	16-byte block. The Idea here is that the AND with the mask
				382	within the loop is "free", since we need some AND or TEST
				383	insn in order to set the flags for the branch anyway. */
				384	mask = -1u << misalign;
				385
				386	/* Main loop processing 16 bytes at a time. */
				387	goto start;
				388	do
				389	{
				390	data = *++p;
				391	mask = -1;
				392
				393	start:
				394	t = __builtin_ia32_pcmpeqb128(data, repl_nl);
				395	t \|= __builtin_ia32_pcmpeqb128(data, repl_cr);
				396	t \|= __builtin_ia32_pcmpeqb128(data, repl_bs);
				397	t \|= __builtin_ia32_pcmpeqb128(data, repl_qm);
				398	found = __builtin_ia32_pmovmskb128 (t);
				399	found &= mask;
				400	}
				401	while (!found);
				402
				403	/* FOUND contains 1 in bits for which we matched a relevant
				404	character. Conversion to the byte index is trivial. */
				405	found = __builtin_ctz(found);
				406	return (const uchar *)p + found;
				407	}
				408
Richard Henderson	6f173e5	2010-08-24 14:08:05 -0700	[diff] [blame]	409	#ifdef HAVE_SSE4
Richard Henderson	246a2fc	2010-08-21 12:05:40 -0700	[diff] [blame]	410	/* A version of the fast scanner using SSE 4.2 vectorized string insns. */
				411
				412	static const uchar *
				413	#ifndef __SSE4_2__
				414	__attribute__((__target__("sse4.2")))
				415	#endif
				416	search_line_sse42 (const uchar s, const uchar end)
				417	{
				418	typedef char v16qi __attribute__ ((__vector_size__ (16)));
				419	static const v16qi search = { '\n', '\r', '?', '\\' };
				420
				421	uintptr_t si = (uintptr_t)s;
				422	uintptr_t index;
				423
				424	/* Check for unaligned input. */
				425	if (si & 15)
				426	{
Uros Bizjak	d35d1c0	2012-06-19 18:28:50 +0200	[diff] [blame]	427	v16qi sv;
				428
Richard Henderson	246a2fc	2010-08-21 12:05:40 -0700	[diff] [blame]	429	if (__builtin_expect (end - s < 16, 0)
				430	&& __builtin_expect ((si & 0xfff) > 0xff0, 0))
				431	{
				432	/* There are less than 16 bytes left in the buffer, and less
				433	than 16 bytes left on the page. Reading 16 bytes at this
				434	point might generate a spurious page fault. Defer to the
				435	SSE2 implementation, which already handles alignment. */
				436	return search_line_sse2 (s, end);
				437	}
				438
				439	/* ??? The builtin doesn't understand that the PCMPESTRI read from
				440	memory need not be aligned. */
Uros Bizjak	d35d1c0	2012-06-19 18:28:50 +0200	[diff] [blame]	441	sv = __builtin_ia32_loaddqu ((const char *) s);
				442	index = __builtin_ia32_pcmpestri128 (search, 4, sv, 16, 0);
				443
Richard Henderson	246a2fc	2010-08-21 12:05:40 -0700	[diff] [blame]	444	if (__builtin_expect (index < 16, 0))
				445	goto found;
				446
				447	/* Advance the pointer to an aligned address. We will re-scan a
				448	few bytes, but we no longer need care for reading past the
				449	end of a page, since we're guaranteed a match. */
				450	s = (const uchar *)((si + 16) & -16);
				451	}
				452
Uros Bizjak	dc6bcf5	2015-06-30 10:26:57 +0200	[diff] [blame^]	453	/* Main loop, processing 16 bytes at a time. */
				454	#ifdef __GCC_ASM_FLAG_OUTPUTS__
				455	while (1)
				456	{
				457	char f;
				458
				459	/* By using inline assembly instead of the builtin,
				460	we can use the result, as well as the flags set. */
				461	__asm ("%vpcmpestri\t$0, %2, %3"
				462	: "=c"(index), "=@ccc"(f)
				463	: "m"(*s), "x"(search), "a"(4), "d"(16));
				464	if (f)
				465	break;
				466
				467	s += 16;
				468	}
				469	#else
				470	s -= 16;
				471	/* By doing the whole loop in inline assembly,
				472	we can make proper use of the flags set. */
				473	__asm ( ".balign 16\n"
Richard Henderson	246a2fc	2010-08-21 12:05:40 -0700	[diff] [blame]	474	"0: add $16, %1\n"
Uros Bizjak	dc6bcf5	2015-06-30 10:26:57 +0200	[diff] [blame^]	475	" %vpcmpestri\t$0, (%1), %2\n"
Richard Henderson	246a2fc	2010-08-21 12:05:40 -0700	[diff] [blame]	476	" jnc 0b"
				477	: "=&c"(index), "+r"(s)
				478	: "x"(search), "a"(4), "d"(16));
Uros Bizjak	dc6bcf5	2015-06-30 10:26:57 +0200	[diff] [blame^]	479	#endif
Richard Henderson	246a2fc	2010-08-21 12:05:40 -0700	[diff] [blame]	480
				481	found:
				482	return s + index;
				483	}
				484
Richard Henderson	6f173e5	2010-08-24 14:08:05 -0700	[diff] [blame]	485	#else
				486	/* Work around out-dated assemblers without sse4 support. */
				487	#define search_line_sse42 search_line_sse2
				488	#endif
				489
Richard Henderson	246a2fc	2010-08-21 12:05:40 -0700	[diff] [blame]	490	/* Check the CPU capabilities. */
				491
				492	#include "../gcc/config/i386/cpuid.h"
				493
				494	typedef const uchar * (search_line_fast_type) (const uchar , const uchar *);
				495	static search_line_fast_type search_line_fast;
				496
Jakub Jelinek	b0c084b	2011-12-07 23:05:59 +0100	[diff] [blame]	497	#define HAVE_init_vectorized_lexer 1
				498	static inline void
Richard Henderson	246a2fc	2010-08-21 12:05:40 -0700	[diff] [blame]	499	init_vectorized_lexer (void)
				500	{
				501	unsigned dummy, ecx = 0, edx = 0;
				502	search_line_fast_type impl = search_line_acc_char;
				503	int minimum = 0;
				504
				505	#if defined(__SSE4_2__)
				506	minimum = 3;
				507	#elif defined(__SSE2__)
				508	minimum = 2;
Uros Bizjak	ef230b3	2011-05-22 20:53:32 +0200	[diff] [blame]	509	#elif defined(__SSE__)
Richard Henderson	246a2fc	2010-08-21 12:05:40 -0700	[diff] [blame]	510	minimum = 1;
				511	#endif
				512
				513	if (minimum == 3)
				514	impl = search_line_sse42;
				515	else if (__get_cpuid (1, &dummy, &dummy, &ecx, &edx) \|\| minimum == 2)
				516	{
				517	if (minimum == 3 \|\| (ecx & bit_SSE4_2))
				518	impl = search_line_sse42;
				519	else if (minimum == 2 \|\| (edx & bit_SSE2))
				520	impl = search_line_sse2;
				521	else if (minimum == 1 \|\| (edx & bit_SSE))
				522	impl = search_line_mmx;
				523	}
				524	else if (__get_cpuid (0x80000001, &dummy, &dummy, &dummy, &edx))
				525	{
Uros Bizjak	5e70c0b	2011-05-22 21:04:54 +0200	[diff] [blame]	526	if (minimum == 1
				527	\|\| (edx & (bit_MMXEXT \| bit_CMOV)) == (bit_MMXEXT \| bit_CMOV))
Richard Henderson	246a2fc	2010-08-21 12:05:40 -0700	[diff] [blame]	528	impl = search_line_mmx;
				529	}
				530
				531	search_line_fast = impl;
				532	}
				533
Bill Schmidt	0ccaaab	2014-10-03 20:06:38 +0000	[diff] [blame]	534	#elif defined(_ARCH_PWR8) && defined(__ALTIVEC__)
Richard Henderson	246a2fc	2010-08-21 12:05:40 -0700	[diff] [blame]	535
Bill Schmidt	0ccaaab	2014-10-03 20:06:38 +0000	[diff] [blame]	536	/* A vection of the fast scanner using AltiVec vectorized byte compares
				537	and VSX unaligned loads (when VSX is available). This is otherwise
				538	the same as the pre-GCC 5 version. */
				539
Markus Trippelsdorf	44d9524	2015-03-04 17:28:56 +0000	[diff] [blame]	540	ATTRIBUTE_NO_SANITIZE_UNDEFINED
Bill Schmidt	0ccaaab	2014-10-03 20:06:38 +0000	[diff] [blame]	541	static const uchar *
				542	search_line_fast (const uchar s, const uchar end ATTRIBUTE_UNUSED)
				543	{
				544	typedef __attribute__((altivec(vector))) unsigned char vc;
				545
				546	const vc repl_nl = {
				547	'\n', '\n', '\n', '\n', '\n', '\n', '\n', '\n',
				548	'\n', '\n', '\n', '\n', '\n', '\n', '\n', '\n'
				549	};
				550	const vc repl_cr = {
				551	'\r', '\r', '\r', '\r', '\r', '\r', '\r', '\r',
				552	'\r', '\r', '\r', '\r', '\r', '\r', '\r', '\r'
				553	};
				554	const vc repl_bs = {
				555	'\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\',
				556	'\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\'
				557	};
				558	const vc repl_qm = {
				559	'?', '?', '?', '?', '?', '?', '?', '?',
				560	'?', '?', '?', '?', '?', '?', '?', '?',
				561	};
				562	const vc zero = { 0 };
				563
				564	vc data, t;
				565
				566	/* Main loop processing 16 bytes at a time. */
				567	do
				568	{
				569	vc m_nl, m_cr, m_bs, m_qm;
				570
				571	data = ((const vc )s);
				572	s += 16;
				573
				574	m_nl = (vc) __builtin_vec_cmpeq(data, repl_nl);
				575	m_cr = (vc) __builtin_vec_cmpeq(data, repl_cr);
				576	m_bs = (vc) __builtin_vec_cmpeq(data, repl_bs);
				577	m_qm = (vc) __builtin_vec_cmpeq(data, repl_qm);
				578	t = (m_nl \| m_cr) \| (m_bs \| m_qm);
				579
				580	/* T now contains 0xff in bytes for which we matched one of the relevant
				581	characters. We want to exit the loop if any byte in T is non-zero.
				582	Below is the expansion of vec_any_ne(t, zero). */
				583	}
				584	while (!__builtin_vec_vcmpeq_p(/__CR6_LT_REV/3, t, zero));
				585
				586	/* Restore s to to point to the 16 bytes we just processed. */
				587	s -= 16;
				588
				589	{
				590	#define N (sizeof(vc) / sizeof(long))
				591
				592	union {
				593	vc v;
				594	/* Statically assert that N is 2 or 4. */
				595	unsigned long l[(N == 2 \|\| N == 4) ? N : -1];
				596	} u;
				597	unsigned long l, i = 0;
				598
				599	u.v = t;
				600
				601	/* Find the first word of T that is non-zero. */
				602	switch (N)
				603	{
				604	case 4:
				605	l = u.l[i++];
				606	if (l != 0)
				607	break;
				608	s += sizeof(unsigned long);
				609	l = u.l[i++];
				610	if (l != 0)
				611	break;
				612	s += sizeof(unsigned long);
				613	case 2:
				614	l = u.l[i++];
				615	if (l != 0)
				616	break;
				617	s += sizeof(unsigned long);
				618	l = u.l[i];
				619	}
				620
				621	/* L now contains 0xff in bytes for which we matched one of the
				622	relevant characters. We can find the byte index by finding
				623	its bit index and dividing by 8. */
				624	#ifdef __BIG_ENDIAN__
				625	l = __builtin_clzl(l) >> 3;
				626	#else
				627	l = __builtin_ctzl(l) >> 3;
				628	#endif
				629	return s + l;
				630
				631	#undef N
				632	}
				633	}
				634
				635	#elif (GCC_VERSION >= 4005) && defined(__ALTIVEC__) && defined (__BIG_ENDIAN__)
				636
				637	/* A vection of the fast scanner using AltiVec vectorized byte compares.
				638	This cannot be used for little endian because vec_lvsl/lvsr are
				639	deprecated for little endian and the code won't work properly. */
Richard Henderson	246a2fc	2010-08-21 12:05:40 -0700	[diff] [blame]	640	/* ??? Unfortunately, attribute(target("altivec")) is not yet supported,
				641	so we can't compile this function without -maltivec on the command line
				642	(or implied by some other switch). */
				643
				644	static const uchar *
				645	search_line_fast (const uchar s, const uchar end ATTRIBUTE_UNUSED)
				646	{
				647	typedef __attribute__((altivec(vector))) unsigned char vc;
				648
				649	const vc repl_nl = {
				650	'\n', '\n', '\n', '\n', '\n', '\n', '\n', '\n',
				651	'\n', '\n', '\n', '\n', '\n', '\n', '\n', '\n'
				652	};
				653	const vc repl_cr = {
				654	'\r', '\r', '\r', '\r', '\r', '\r', '\r', '\r',
				655	'\r', '\r', '\r', '\r', '\r', '\r', '\r', '\r'
				656	};
				657	const vc repl_bs = {
				658	'\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\',
				659	'\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\'
				660	};
				661	const vc repl_qm = {
				662	'?', '?', '?', '?', '?', '?', '?', '?',
				663	'?', '?', '?', '?', '?', '?', '?', '?',
				664	};
				665	const vc ones = {
				666	-1, -1, -1, -1, -1, -1, -1, -1,
				667	-1, -1, -1, -1, -1, -1, -1, -1,
				668	};
				669	const vc zero = { 0 };
				670
				671	vc data, mask, t;
				672
				673	/* Altivec loads automatically mask addresses with -16. This lets us
				674	issue the first load as early as possible. */
				675	data = __builtin_vec_ld(0, (const vc *)s);
				676
				677	/* Discard bytes before the beginning of the buffer. Do this by
				678	beginning with all ones and shifting in zeros according to the
				679	mis-alignment. The LVSR instruction pulls the exact shift we
				680	want from the address. */
				681	mask = __builtin_vec_lvsr(0, s);
				682	mask = __builtin_vec_perm(zero, ones, mask);
				683	data &= mask;
				684
				685	/* While altivec loads mask addresses, we still need to align S so
				686	that the offset we compute at the end is correct. */
				687	s = (const uchar *)((uintptr_t)s & -16);
				688
				689	/* Main loop processing 16 bytes at a time. */
				690	goto start;
				691	do
				692	{
				693	vc m_nl, m_cr, m_bs, m_qm;
				694
				695	s += 16;
				696	data = __builtin_vec_ld(0, (const vc *)s);
				697
				698	start:
				699	m_nl = (vc) __builtin_vec_cmpeq(data, repl_nl);
				700	m_cr = (vc) __builtin_vec_cmpeq(data, repl_cr);
				701	m_bs = (vc) __builtin_vec_cmpeq(data, repl_bs);
				702	m_qm = (vc) __builtin_vec_cmpeq(data, repl_qm);
				703	t = (m_nl \| m_cr) \| (m_bs \| m_qm);
				704
				705	/* T now contains 0xff in bytes for which we matched one of the relevant
				706	characters. We want to exit the loop if any byte in T is non-zero.
				707	Below is the expansion of vec_any_ne(t, zero). */
				708	}
				709	while (!__builtin_vec_vcmpeq_p(/__CR6_LT_REV/3, t, zero));
				710
				711	{
				712	#define N (sizeof(vc) / sizeof(long))
				713
Richard Henderson	246a2fc	2010-08-21 12:05:40 -0700	[diff] [blame]	714	union {
				715	vc v;
Dodji Seketeli	53a103d	2012-05-29 09:42:39 +0000	[diff] [blame]	716	/* Statically assert that N is 2 or 4. */
				717	unsigned long l[(N == 2 \|\| N == 4) ? N : -1];
Richard Henderson	246a2fc	2010-08-21 12:05:40 -0700	[diff] [blame]	718	} u;
				719	unsigned long l, i = 0;
				720
				721	u.v = t;
				722
				723	/* Find the first word of T that is non-zero. */
				724	switch (N)
				725	{
				726	case 4:
				727	l = u.l[i++];
				728	if (l != 0)
				729	break;
				730	s += sizeof(unsigned long);
				731	l = u.l[i++];
				732	if (l != 0)
				733	break;
				734	s += sizeof(unsigned long);
				735	case 2:
				736	l = u.l[i++];
				737	if (l != 0)
				738	break;
				739	s += sizeof(unsigned long);
				740	l = u.l[i];
				741	}
				742
				743	/* L now contains 0xff in bytes for which we matched one of the
				744	relevant characters. We can find the byte index by finding
				745	its bit index and dividing by 8. */
				746	l = __builtin_clzl(l) >> 3;
				747	return s + l;
				748
				749	#undef N
				750	}
				751	}
				752
Szabolcs Nagy	95d0610	2015-01-30 14:07:00 +0000	[diff] [blame]	753	#elif defined (__ARM_NEON)
Richard Earnshaw	e75b54a	2012-03-22 17:54:55 +0000	[diff] [blame]	754	#include "arm_neon.h"
				755
				756	static const uchar *
				757	search_line_fast (const uchar s, const uchar end ATTRIBUTE_UNUSED)
				758	{
				759	const uint8x16_t repl_nl = vdupq_n_u8 ('\n');
				760	const uint8x16_t repl_cr = vdupq_n_u8 ('\r');
				761	const uint8x16_t repl_bs = vdupq_n_u8 ('\\');
				762	const uint8x16_t repl_qm = vdupq_n_u8 ('?');
				763	const uint8x16_t xmask = (uint8x16_t) vdupq_n_u64 (0x8040201008040201ULL);
				764
				765	unsigned int misalign, found, mask;
				766	const uint8_t *p;
				767	uint8x16_t data;
				768
				769	/* Align the source pointer. */
				770	misalign = (uintptr_t)s & 15;
				771	p = (const uint8_t *)((uintptr_t)s & -16);
				772	data = vld1q_u8 (p);
				773
				774	/* Create a mask for the bytes that are valid within the first
				775	16-byte block. The Idea here is that the AND with the mask
				776	within the loop is "free", since we need some AND or TEST
				777	insn in order to set the flags for the branch anyway. */
				778	mask = (-1u << misalign) & 0xffff;
				779
				780	/* Main loop, processing 16 bytes at a time. */
				781	goto start;
				782
				783	do
				784	{
				785	uint8x8_t l;
				786	uint16x4_t m;
				787	uint32x2_t n;
				788	uint8x16_t t, u, v, w;
				789
				790	p += 16;
				791	data = vld1q_u8 (p);
				792	mask = 0xffff;
				793
				794	start:
				795	t = vceqq_u8 (data, repl_nl);
				796	u = vceqq_u8 (data, repl_cr);
				797	v = vorrq_u8 (t, vceqq_u8 (data, repl_bs));
				798	w = vorrq_u8 (u, vceqq_u8 (data, repl_qm));
				799	t = vandq_u8 (vorrq_u8 (v, w), xmask);
				800	l = vpadd_u8 (vget_low_u8 (t), vget_high_u8 (t));
				801	m = vpaddl_u8 (l);
				802	n = vpaddl_u16 (m);
				803
				804	found = vget_lane_u32 ((uint32x2_t) vorr_u64 ((uint64x1_t) n,
				805	vshr_n_u64 ((uint64x1_t) n, 24)), 0);
				806	found &= mask;
				807	}
				808	while (!found);
				809
				810	/* FOUND contains 1 in bits for which we matched a relevant
				811	character. Conversion to the byte index is trivial. */
				812	found = __builtin_ctz (found);
				813	return (const uchar *)p + found;
				814	}
				815
Richard Henderson	246a2fc	2010-08-21 12:05:40 -0700	[diff] [blame]	816	#else
				817
				818	/* We only have one accellerated alternative. Use a direct call so that
				819	we encourage inlining. */
				820
				821	#define search_line_fast search_line_acc_char
				822
				823	#endif
				824
Jakub Jelinek	b0c084b	2011-12-07 23:05:59 +0100	[diff] [blame]	825	/* Initialize the lexer if needed. */
				826
				827	void
				828	_cpp_init_lexer (void)
				829	{
				830	#ifdef HAVE_init_vectorized_lexer
				831	init_vectorized_lexer ();
				832	#endif
				833	}
				834
Neil Booth	26aea07	2003-04-19 00:22:51 +0000	[diff] [blame]	835	/* Returns with a logical line that contains no escaped newlines or
				836	trigraphs. This is a time-critical inner loop. */
				837	void
Zack Weinberg	6cf87ca	2003-06-17 06:17:44 +0000	[diff] [blame]	838	_cpp_clean_line (cpp_reader *pfile)
Zack Weinberg	c5a0473	2000-04-25 19:32:36 +0000	[diff] [blame]	839	{
Neil Booth	26aea07	2003-04-19 00:22:51 +0000	[diff] [blame]	840	cpp_buffer *buffer;
				841	const uchar *s;
				842	uchar c, d, p;
Neil Booth	29401c3	2001-08-22 20:37:20 +0000	[diff] [blame]	843
Neil Booth	26aea07	2003-04-19 00:22:51 +0000	[diff] [blame]	844	buffer = pfile->buffer;
				845	buffer->cur_note = buffer->notes_used = 0;
				846	buffer->cur = buffer->line_base = buffer->next_line;
				847	buffer->need_line = false;
Richard Henderson	246a2fc	2010-08-21 12:05:40 -0700	[diff] [blame]	848	s = buffer->next_line;
Neil Booth	26aea07	2003-04-19 00:22:51 +0000	[diff] [blame]	849
Neil Booth	a5c3ccc	2000-10-30 22:29:00 +0000	[diff] [blame]	850	if (!buffer->from_stage3)
Zack Weinberg	c5a0473	2000-04-25 19:32:36 +0000	[diff] [blame]	851	{
Ian Lance Taylor	7af45bd	2006-12-29 15:43:55 +0000	[diff] [blame]	852	const uchar *pbackslash = NULL;
				853
Richard Henderson	246a2fc	2010-08-21 12:05:40 -0700	[diff] [blame]	854	/* Fast path. This is the common case of an un-escaped line with
Zack Weinberg	d08dcf8	2003-10-13 18:53:28 +0000	[diff] [blame]	855	no trigraphs. The primary win here is by not writing any
				856	data back to memory until we have to. */
Richard Henderson	246a2fc	2010-08-21 12:05:40 -0700	[diff] [blame]	857	while (1)
Zack Weinberg	d08dcf8	2003-10-13 18:53:28 +0000	[diff] [blame]	858	{
Richard Henderson	246a2fc	2010-08-21 12:05:40 -0700	[diff] [blame]	859	/* Perform an optimized search for \n, \r, \\, ?. */
				860	s = search_line_fast (s, buffer->rlimit);
				861
				862	c = *s;
				863	if (c == '\\')
Zack Weinberg	d08dcf8	2003-10-13 18:53:28 +0000	[diff] [blame]	864	{
Richard Henderson	246a2fc	2010-08-21 12:05:40 -0700	[diff] [blame]	865	/* Record the location of the backslash and continue. */
				866	pbackslash = s++;
Zack Weinberg	d08dcf8	2003-10-13 18:53:28 +0000	[diff] [blame]	867	}
Richard Henderson	246a2fc	2010-08-21 12:05:40 -0700	[diff] [blame]	868	else if (__builtin_expect (c == '?', 0))
				869	{
				870	if (__builtin_expect (s[1] == '?', false)
Ian Lance Taylor	7af45bd	2006-12-29 15:43:55 +0000	[diff] [blame]	871	&& _cpp_trigraph_map[s[2]])
Zack Weinberg	d08dcf8	2003-10-13 18:53:28 +0000	[diff] [blame]	872	{
Richard Henderson	246a2fc	2010-08-21 12:05:40 -0700	[diff] [blame]	873	/* Have a trigraph. We may or may not have to convert
				874	it. Add a line note regardless, for -Wtrigraphs. */
				875	add_line_note (buffer, s, s[2]);
				876	if (CPP_OPTION (pfile, trigraphs))
				877	{
				878	/* We do, and that means we have to switch to the
				879	slow path. */
				880	d = (uchar *) s;
				881	*d = _cpp_trigraph_map[s[2]];
				882	s += 2;
				883	goto slow_path;
				884	}
Zack Weinberg	d08dcf8	2003-10-13 18:53:28 +0000	[diff] [blame]	885	}
Richard Henderson	246a2fc	2010-08-21 12:05:40 -0700	[diff] [blame]	886	/* Not a trigraph. Continue on fast-path. */
				887	s++;
Zack Weinberg	d08dcf8	2003-10-13 18:53:28 +0000	[diff] [blame]	888	}
Richard Henderson	246a2fc	2010-08-21 12:05:40 -0700	[diff] [blame]	889	else
				890	break;
Zack Weinberg	d08dcf8	2003-10-13 18:53:28 +0000	[diff] [blame]	891	}
				892
Richard Henderson	246a2fc	2010-08-21 12:05:40 -0700	[diff] [blame]	893	/* This must be \r or \n. We're either done, or we'll be forced
				894	to write back to the buffer and continue on the slow path. */
				895	d = (uchar *) s;
Zack Weinberg	c5a0473	2000-04-25 19:32:36 +0000	[diff] [blame]	896
Richard Henderson	246a2fc	2010-08-21 12:05:40 -0700	[diff] [blame]	897	if (__builtin_expect (s == buffer->rlimit, false))
				898	goto done;
				899
				900	/* DOS line ending? */
				901	if (__builtin_expect (c == '\r', false) && s[1] == '\n')
				902	{
				903	s++;
				904	if (s == buffer->rlimit)
				905	goto done;
				906	}
				907
				908	if (__builtin_expect (pbackslash == NULL, true))
				909	goto done;
				910
				911	/* Check for escaped newline. */
				912	p = d;
				913	while (is_nvspace (p[-1]))
				914	p--;
				915	if (p - 1 != pbackslash)
				916	goto done;
				917
				918	/* Have an escaped newline; process it and proceed to
				919	the slow path. */
				920	add_line_note (buffer, p - 1, p != d ? ' ' : '\\');
				921	d = p - 2;
				922	buffer->next_line = p - 1;
				923
				924	slow_path:
				925	while (1)
Neil Booth	0d9f234	2000-09-18 18:43:05 +0000	[diff] [blame]	926	{
Neil Booth	26aea07	2003-04-19 00:22:51 +0000	[diff] [blame]	927	c = *++s;
				928	*++d = c;
				929
				930	if (c == '\n' \|\| c == '\r')
Neil Booth	a5c3ccc	2000-10-30 22:29:00 +0000	[diff] [blame]	931	{
Richard Henderson	246a2fc	2010-08-21 12:05:40 -0700	[diff] [blame]	932	/* Handle DOS line endings. */
Neil Booth	26aea07	2003-04-19 00:22:51 +0000	[diff] [blame]	933	if (c == '\r' && s != buffer->rlimit && s[1] == '\n')
				934	s++;
				935	if (s == buffer->rlimit)
Neil Booth	8706281	2001-10-20 09:00:53 +0000	[diff] [blame]	936	break;
Neil Booth	a5c3ccc	2000-10-30 22:29:00 +0000	[diff] [blame]	937
Neil Booth	26aea07	2003-04-19 00:22:51 +0000	[diff] [blame]	938	/* Escaped? */
				939	p = d;
				940	while (p != buffer->next_line && is_nvspace (p[-1]))
				941	p--;
				942	if (p == buffer->next_line \|\| p[-1] != '\\')
Neil Booth	a5c3ccc	2000-10-30 22:29:00 +0000	[diff] [blame]	943	break;
Neil Booth	26aea07	2003-04-19 00:22:51 +0000	[diff] [blame]	944
Neil Booth	41c32c9	2003-04-20 19:02:53 +0000	[diff] [blame]	945	add_line_note (buffer, p - 1, p != d ? ' ': '\\');
Neil Booth	26aea07	2003-04-19 00:22:51 +0000	[diff] [blame]	946	d = p - 2;
				947	buffer->next_line = p - 1;
Neil Booth	a5c3ccc	2000-10-30 22:29:00 +0000	[diff] [blame]	948	}
Neil Booth	26aea07	2003-04-19 00:22:51 +0000	[diff] [blame]	949	else if (c == '?' && s[1] == '?' && _cpp_trigraph_map[s[2]])
Neil Booth	a5c3ccc	2000-10-30 22:29:00 +0000	[diff] [blame]	950	{
Neil Booth	26aea07	2003-04-19 00:22:51 +0000	[diff] [blame]	951	/* Add a note regardless, for the benefit of -Wtrigraphs. */
Neil Booth	41c32c9	2003-04-20 19:02:53 +0000	[diff] [blame]	952	add_line_note (buffer, d, s[2]);
Neil Booth	26aea07	2003-04-19 00:22:51 +0000	[diff] [blame]	953	if (CPP_OPTION (pfile, trigraphs))
				954	{
				955	*d = _cpp_trigraph_map[s[2]];
				956	s += 2;
				957	}
Neil Booth	a5c3ccc	2000-10-30 22:29:00 +0000	[diff] [blame]	958	}
Neil Booth	0d9f234	2000-09-18 18:43:05 +0000	[diff] [blame]	959	}
Neil Booth	26aea07	2003-04-19 00:22:51 +0000	[diff] [blame]	960	}
				961	else
				962	{
Richard Henderson	246a2fc	2010-08-21 12:05:40 -0700	[diff] [blame]	963	while (s != '\n' && s != '\r')
Neil Booth	26aea07	2003-04-19 00:22:51 +0000	[diff] [blame]	964	s++;
Neil Booth	26aea07	2003-04-19 00:22:51 +0000	[diff] [blame]	965	d = (uchar *) s;
				966
				967	/* Handle DOS line endings. */
				968	if (*s == '\r' && s != buffer->rlimit && s[1] == '\n')
				969	s++;
Zack Weinberg	c5a0473	2000-04-25 19:32:36 +0000	[diff] [blame]	970	}
Zack Weinberg	c5a0473	2000-04-25 19:32:36 +0000	[diff] [blame]	971
Zack Weinberg	d08dcf8	2003-10-13 18:53:28 +0000	[diff] [blame]	972	done:
Neil Booth	26aea07	2003-04-19 00:22:51 +0000	[diff] [blame]	973	*d = '\n';
Neil Booth	41c32c9	2003-04-20 19:02:53 +0000	[diff] [blame]	974	/* A sentinel note that should never be processed. */
				975	add_line_note (buffer, d + 1, '\n');
Neil Booth	26aea07	2003-04-19 00:22:51 +0000	[diff] [blame]	976	buffer->next_line = s + 1;
				977	}
				978
Neil Booth	a8eb604	2003-05-04 20:03:55 +0000	[diff] [blame]	979	/* Return true if the trigraph indicated by NOTE should be warned
				980	about in a comment. */
				981	static bool
Zack Weinberg	6cf87ca	2003-06-17 06:17:44 +0000	[diff] [blame]	982	warn_in_comment (cpp_reader pfile, _cpp_line_note note)
Neil Booth	a8eb604	2003-05-04 20:03:55 +0000	[diff] [blame]	983	{
				984	const uchar *p;
				985
				986	/* Within comments we don't warn about trigraphs, unless the
				987	trigraph forms an escaped newline, as that may change
Kazu Hirata	6356f89	2003-06-12 19:01:08 +0000	[diff] [blame]	988	behavior. */
Neil Booth	a8eb604	2003-05-04 20:03:55 +0000	[diff] [blame]	989	if (note->type != '/')
				990	return false;
				991
				992	/* If -trigraphs, then this was an escaped newline iff the next note
				993	is coincident. */
				994	if (CPP_OPTION (pfile, trigraphs))
				995	return note[1].pos == note->pos;
				996
				997	/* Otherwise, see if this forms an escaped newline. */
				998	p = note->pos + 3;
				999	while (is_nvspace (*p))
				1000	p++;
				1001
				1002	/* There might have been escaped newlines between the trigraph and the
				1003	newline we found. Hence the position test. */
				1004	return (*p == '\n' && p < note[1].pos);
				1005	}
				1006
Neil Booth	26aea07	2003-04-19 00:22:51 +0000	[diff] [blame]	1007	/* Process the notes created by add_line_note as far as the current
				1008	location. */
				1009	void
Zack Weinberg	6cf87ca	2003-06-17 06:17:44 +0000	[diff] [blame]	1010	_cpp_process_line_notes (cpp_reader *pfile, int in_comment)
Neil Booth	26aea07	2003-04-19 00:22:51 +0000	[diff] [blame]	1011	{
				1012	cpp_buffer *buffer = pfile->buffer;
				1013
				1014	for (;;)
				1015	{
				1016	_cpp_line_note *note = &buffer->notes[buffer->cur_note];
				1017	unsigned int col;
				1018
				1019	if (note->pos > buffer->cur)
				1020	break;
				1021
				1022	buffer->cur_note++;
				1023	col = CPP_BUF_COLUMN (buffer, note->pos + 1);
				1024
Neil Booth	41c32c9	2003-04-20 19:02:53 +0000	[diff] [blame]	1025	if (note->type == '\\' \|\| note->type == ' ')
Neil Booth	26aea07	2003-04-19 00:22:51 +0000	[diff] [blame]	1026	{
Neil Booth	41c32c9	2003-04-20 19:02:53 +0000	[diff] [blame]	1027	if (note->type == ' ' && !in_comment)
Per Bothner	500bee0	2004-04-22 19:22:27 -0700	[diff] [blame]	1028	cpp_error_with_line (pfile, CPP_DL_WARNING, pfile->line_table->highest_line, col,
Neil Booth	26aea07	2003-04-19 00:22:51 +0000	[diff] [blame]	1029	"backslash and newline separated by space");
Neil Booth	41c32c9	2003-04-20 19:02:53 +0000	[diff] [blame]	1030
Neil Booth	26aea07	2003-04-19 00:22:51 +0000	[diff] [blame]	1031	if (buffer->next_line > buffer->rlimit)
				1032	{
Per Bothner	500bee0	2004-04-22 19:22:27 -0700	[diff] [blame]	1033	cpp_error_with_line (pfile, CPP_DL_PEDWARN, pfile->line_table->highest_line, col,
Neil Booth	26aea07	2003-04-19 00:22:51 +0000	[diff] [blame]	1034	"backslash-newline at end of file");
				1035	/* Prevent "no newline at end of file" warning. */
				1036	buffer->next_line = buffer->rlimit;
				1037	}
				1038
				1039	buffer->line_base = note->pos;
Per Bothner	12f9df4	2004-02-11 07:29:30 -0800	[diff] [blame]	1040	CPP_INCREMENT_LINE (pfile, 0);
Neil Booth	26aea07	2003-04-19 00:22:51 +0000	[diff] [blame]	1041	}
Neil Booth	41c32c9	2003-04-20 19:02:53 +0000	[diff] [blame]	1042	else if (_cpp_trigraph_map[note->type])
				1043	{
Neil Booth	a8eb604	2003-05-04 20:03:55 +0000	[diff] [blame]	1044	if (CPP_OPTION (pfile, warn_trigraphs)
				1045	&& (!in_comment \|\| warn_in_comment (pfile, note)))
Neil Booth	41c32c9	2003-04-20 19:02:53 +0000	[diff] [blame]	1046	{
				1047	if (CPP_OPTION (pfile, trigraphs))
Simon Baldwin	87cf065	2010-04-07 17:18:10 +0000	[diff] [blame]	1048	cpp_warning_with_line (pfile, CPP_W_TRIGRAPHS,
				1049	pfile->line_table->highest_line, col,
				1050	"trigraph ??%c converted to %c",
				1051	note->type,
				1052	(int) _cpp_trigraph_map[note->type]);
Neil Booth	41c32c9	2003-04-20 19:02:53 +0000	[diff] [blame]	1053	else
Geoffrey Keating	905bd7b	2003-07-22 02:21:16 +0000	[diff] [blame]	1054	{
Simon Baldwin	87cf065	2010-04-07 17:18:10 +0000	[diff] [blame]	1055	cpp_warning_with_line
				1056	(pfile, CPP_W_TRIGRAPHS,
				1057	pfile->line_table->highest_line, col,
Geoffrey Keating	905bd7b	2003-07-22 02:21:16 +0000	[diff] [blame]	1058	"trigraph ??%c ignored, use -trigraphs to enable",
				1059	note->type);
				1060	}
Neil Booth	41c32c9	2003-04-20 19:02:53 +0000	[diff] [blame]	1061	}
				1062	}
Jason Merrill	00a81b8	2010-03-29 16:07:29 -0400	[diff] [blame]	1063	else if (note->type == 0)
				1064	/* Already processed in lex_raw_string. */;
Neil Booth	41c32c9	2003-04-20 19:02:53 +0000	[diff] [blame]	1065	else
				1066	abort ();
Neil Booth	26aea07	2003-04-19 00:22:51 +0000	[diff] [blame]	1067	}
Zack Weinberg	c5a0473	2000-04-25 19:32:36 +0000	[diff] [blame]	1068	}
				1069
Neil Booth	0d9f234	2000-09-18 18:43:05 +0000	[diff] [blame]	1070	/* Skip a C-style block comment. We find the end of the comment by
				1071	seeing if an asterisk is before every '/' we encounter. Returns
Neil Booth	6f572ac	2003-04-19 16:34:33 +0000	[diff] [blame]	1072	nonzero if comment terminated by EOF, zero otherwise.
				1073
				1074	Buffer->cur points to the initial asterisk of the comment. */
Neil Booth	26aea07	2003-04-19 00:22:51 +0000	[diff] [blame]	1075	bool
Zack Weinberg	6cf87ca	2003-06-17 06:17:44 +0000	[diff] [blame]	1076	_cpp_skip_block_comment (cpp_reader *pfile)
Zack Weinberg	c5a0473	2000-04-25 19:32:36 +0000	[diff] [blame]	1077	{
				1078	cpp_buffer *buffer = pfile->buffer;
Zack Weinberg	d08dcf8	2003-10-13 18:53:28 +0000	[diff] [blame]	1079	const uchar *cur = buffer->cur;
				1080	uchar c;
Zack Weinberg	c5a0473	2000-04-25 19:32:36 +0000	[diff] [blame]	1081
Zack Weinberg	d08dcf8	2003-10-13 18:53:28 +0000	[diff] [blame]	1082	cur++;
				1083	if (*cur == '/')
				1084	cur++;
Neil Booth	26aea07	2003-04-19 00:22:51 +0000	[diff] [blame]	1085
				1086	for (;;)
Neil Booth	0d9f234	2000-09-18 18:43:05 +0000	[diff] [blame]	1087	{
Neil Booth	0d9f234	2000-09-18 18:43:05 +0000	[diff] [blame]	1088	/* People like decorating comments with '*', so check for '/'
				1089	instead for efficiency. */
Zack Weinberg	d08dcf8	2003-10-13 18:53:28 +0000	[diff] [blame]	1090	c = *cur++;
				1091
Zack Weinberg	c5a0473	2000-04-25 19:32:36 +0000	[diff] [blame]	1092	if (c == '/')
				1093	{
Zack Weinberg	d08dcf8	2003-10-13 18:53:28 +0000	[diff] [blame]	1094	if (cur[-2] == '*')
Neil Booth	0d9f234	2000-09-18 18:43:05 +0000	[diff] [blame]	1095	break;
Zack Weinberg	c5a0473	2000-04-25 19:32:36 +0000	[diff] [blame]	1096
Neil Booth	0d9f234	2000-09-18 18:43:05 +0000	[diff] [blame]	1097	/* Warn about potential nested comments, but not if the '/'
Joseph Myers	a1f300c	2001-11-23 02:05:19 +0000	[diff] [blame]	1098	comes immediately before the true comment delimiter.
Zack Weinberg	c5a0473	2000-04-25 19:32:36 +0000	[diff] [blame]	1099	Don't bother to get it right across escaped newlines. */
Neil Booth	0d9f234	2000-09-18 18:43:05 +0000	[diff] [blame]	1100	if (CPP_OPTION (pfile, warn_comments)
Zack Weinberg	d08dcf8	2003-10-13 18:53:28 +0000	[diff] [blame]	1101	&& cur[0] == '*' && cur[1] != '/')
				1102	{
				1103	buffer->cur = cur;
Simon Baldwin	87cf065	2010-04-07 17:18:10 +0000	[diff] [blame]	1104	cpp_warning_with_line (pfile, CPP_W_COMMENTS,
				1105	pfile->line_table->highest_line,
				1106	CPP_BUF_COL (buffer),
				1107	"\"/*\" within comment");
Zack Weinberg	d08dcf8	2003-10-13 18:53:28 +0000	[diff] [blame]	1108	}
Zack Weinberg	c5a0473	2000-04-25 19:32:36 +0000	[diff] [blame]	1109	}
Neil Booth	26aea07	2003-04-19 00:22:51 +0000	[diff] [blame]	1110	else if (c == '\n')
				1111	{
Per Bothner	12f9df4	2004-02-11 07:29:30 -0800	[diff] [blame]	1112	unsigned int cols;
Zack Weinberg	d08dcf8	2003-10-13 18:53:28 +0000	[diff] [blame]	1113	buffer->cur = cur - 1;
Neil Booth	26aea07	2003-04-19 00:22:51 +0000	[diff] [blame]	1114	_cpp_process_line_notes (pfile, true);
				1115	if (buffer->next_line >= buffer->rlimit)
				1116	return true;
				1117	_cpp_clean_line (pfile);
Per Bothner	12f9df4	2004-02-11 07:29:30 -0800	[diff] [blame]	1118
				1119	cols = buffer->next_line - buffer->line_base;
				1120	CPP_INCREMENT_LINE (pfile, cols);
				1121
Zack Weinberg	d08dcf8	2003-10-13 18:53:28 +0000	[diff] [blame]	1122	cur = buffer->cur;
Neil Booth	26aea07	2003-04-19 00:22:51 +0000	[diff] [blame]	1123	}
Zack Weinberg	c5a0473	2000-04-25 19:32:36 +0000	[diff] [blame]	1124	}
Zack Weinberg	c5a0473	2000-04-25 19:32:36 +0000	[diff] [blame]	1125
Zack Weinberg	d08dcf8	2003-10-13 18:53:28 +0000	[diff] [blame]	1126	buffer->cur = cur;
Neil Booth	a8eb604	2003-05-04 20:03:55 +0000	[diff] [blame]	1127	_cpp_process_line_notes (pfile, true);
Neil Booth	26aea07	2003-04-19 00:22:51 +0000	[diff] [blame]	1128	return false;
Zack Weinberg	c5a0473	2000-04-25 19:32:36 +0000	[diff] [blame]	1129	}
				1130
Neil Booth	480709c	2001-10-21 14:04:42 +0000	[diff] [blame]	1131	/* Skip a C++ line comment, leaving buffer->cur pointing to the
Kazu Hirata	da7d830	2002-09-22 02:03:17 +0000	[diff] [blame]	1132	terminating newline. Handles escaped newlines. Returns nonzero
Neil Booth	480709c	2001-10-21 14:04:42 +0000	[diff] [blame]	1133	if a multiline comment. */
Zack Weinberg	c5a0473	2000-04-25 19:32:36 +0000	[diff] [blame]	1134	static int
Zack Weinberg	6cf87ca	2003-06-17 06:17:44 +0000	[diff] [blame]	1135	skip_line_comment (cpp_reader *pfile)
Neil Booth	0d9f234	2000-09-18 18:43:05 +0000	[diff] [blame]	1136	{
Neil Booth	cbcff6d	2000-09-23 21:41:41 +0000	[diff] [blame]	1137	cpp_buffer *buffer = pfile->buffer;
Manuel López-Ibáñez	1bb6466	2008-07-21 09:33:38 +0000	[diff] [blame]	1138	source_location orig_line = pfile->line_table->highest_line;
Neil Booth	0d9f234	2000-09-18 18:43:05 +0000	[diff] [blame]	1139
Neil Booth	26aea07	2003-04-19 00:22:51 +0000	[diff] [blame]	1140	while (*buffer->cur != '\n')
				1141	buffer->cur++;
Neil Booth	0d9f234	2000-09-18 18:43:05 +0000	[diff] [blame]	1142
Neil Booth	26aea07	2003-04-19 00:22:51 +0000	[diff] [blame]	1143	_cpp_process_line_notes (pfile, true);
Per Bothner	500bee0	2004-04-22 19:22:27 -0700	[diff] [blame]	1144	return orig_line != pfile->line_table->highest_line;
Neil Booth	0d9f234	2000-09-18 18:43:05 +0000	[diff] [blame]	1145	}
				1146
Neil Booth	26aea07	2003-04-19 00:22:51 +0000	[diff] [blame]	1147	/* Skips whitespace, saving the next non-whitespace character. */
Neil Booth	0d9f234	2000-09-18 18:43:05 +0000	[diff] [blame]	1148	static void
Zack Weinberg	6cf87ca	2003-06-17 06:17:44 +0000	[diff] [blame]	1149	skip_whitespace (cpp_reader *pfile, cppchar_t c)
Zack Weinberg	c5a0473	2000-04-25 19:32:36 +0000	[diff] [blame]	1150	{
				1151	cpp_buffer *buffer = pfile->buffer;
Neil Booth	f7d151f	2003-04-19 07:41:15 +0000	[diff] [blame]	1152	bool saw_NUL = false;
Zack Weinberg	c5a0473	2000-04-25 19:32:36 +0000	[diff] [blame]	1153
Neil Booth	0d9f234	2000-09-18 18:43:05 +0000	[diff] [blame]	1154	do
Zack Weinberg	c5a0473	2000-04-25 19:32:36 +0000	[diff] [blame]	1155	{
Neil Booth	91fcd15	2000-07-09 09:19:44 +0000	[diff] [blame]	1156	/* Horizontal space always OK. */
Neil Booth	26aea07	2003-04-19 00:22:51 +0000	[diff] [blame]	1157	if (c == ' ' \|\| c == '\t')
Neil Booth	0d9f234	2000-09-18 18:43:05 +0000	[diff] [blame]	1158	;
Neil Booth	0d9f234	2000-09-18 18:43:05 +0000	[diff] [blame]	1159	/* Just \f \v or \0 left. */
Neil Booth	91fcd15	2000-07-09 09:19:44 +0000	[diff] [blame]	1160	else if (c == '\0')
Neil Booth	f7d151f	2003-04-19 07:41:15 +0000	[diff] [blame]	1161	saw_NUL = true;
Neil Booth	93c80368	2000-10-28 17:59:06 +0000	[diff] [blame]	1162	else if (pfile->state.in_directive && CPP_PEDANTIC (pfile))
Per Bothner	500bee0	2004-04-22 19:22:27 -0700	[diff] [blame]	1163	cpp_error_with_line (pfile, CPP_DL_PEDWARN, pfile->line_table->highest_line,
Neil Booth	ebef4e8	2002-04-14 18:42:47 +0000	[diff] [blame]	1164	CPP_BUF_COL (buffer),
				1165	"%s in preprocessing directive",
				1166	c == '\f' ? "form feed" : "vertical tab");
Zack Weinberg	c5a0473	2000-04-25 19:32:36 +0000	[diff] [blame]	1167
Neil Booth	0d9f234	2000-09-18 18:43:05 +0000	[diff] [blame]	1168	c = *buffer->cur++;
Zack Weinberg	c5a0473	2000-04-25 19:32:36 +0000	[diff] [blame]	1169	}
Kazu Hirata	ec5c56d	2001-08-01 17:57:27 +0000	[diff] [blame]	1170	/* We only want non-vertical space, i.e. ' ' \t \f \v \0. */
Neil Booth	0d9f234	2000-09-18 18:43:05 +0000	[diff] [blame]	1171	while (is_nvspace (c));
Zack Weinberg	c5a0473	2000-04-25 19:32:36 +0000	[diff] [blame]	1172
Neil Booth	f7d151f	2003-04-19 07:41:15 +0000	[diff] [blame]	1173	if (saw_NUL)
John David Anglin	0527bc4	2003-11-01 22:56:54 +0000	[diff] [blame]	1174	cpp_error (pfile, CPP_DL_WARNING, "null character(s) ignored");
Neil Booth	f7d151f	2003-04-19 07:41:15 +0000	[diff] [blame]	1175
Neil Booth	480709c	2001-10-21 14:04:42 +0000	[diff] [blame]	1176	buffer->cur--;
Zack Weinberg	c5a0473	2000-04-25 19:32:36 +0000	[diff] [blame]	1177	}
				1178
Neil Booth	93c80368	2000-10-28 17:59:06 +0000	[diff] [blame]	1179	/* See if the characters of a number token are valid in a name (no
				1180	'.', '+' or '-'). */
				1181	static int
Zack Weinberg	6cf87ca	2003-06-17 06:17:44 +0000	[diff] [blame]	1182	name_p (cpp_reader pfile, const cpp_string string)
Neil Booth	93c80368	2000-10-28 17:59:06 +0000	[diff] [blame]	1183	{
				1184	unsigned int i;
				1185
				1186	for (i = 0; i < string->len; i++)
				1187	if (!is_idchar (string->text[i]))
				1188	return 0;
				1189
Kazu Hirata	df38348	2002-05-22 22:02:16 +0000	[diff] [blame]	1190	return 1;
Neil Booth	93c80368	2000-10-28 17:59:06 +0000	[diff] [blame]	1191	}
				1192
Geoffrey Keating	50668cf	2005-03-15 00:36:33 +0000	[diff] [blame]	1193	/* After parsing an identifier or other sequence, produce a warning about
				1194	sequences not in NFC/NFKC. */
				1195	static void
				1196	warn_about_normalization (cpp_reader *pfile,
				1197	const cpp_token *token,
				1198	const struct normalize_state *s)
				1199	{
				1200	if (CPP_OPTION (pfile, warn_normalize) < NORMALIZE_STATE_RESULT (s)
				1201	&& !pfile->state.skipping)
				1202	{
				1203	/* Make sure that the token is printed using UCNs, even
				1204	if we'd otherwise happily print UTF-8. */
Gabriel Dos Reis	c3f829c	2005-05-28 15:52:48 +0000	[diff] [blame]	1205	unsigned char *buf = XNEWVEC (unsigned char, cpp_token_len (token));
Geoffrey Keating	50668cf	2005-03-15 00:36:33 +0000	[diff] [blame]	1206	size_t sz;
				1207
				1208	sz = cpp_spell_token (pfile, token, buf, false) - buf;
				1209	if (NORMALIZE_STATE_RESULT (s) == normalized_C)
Simon Baldwin	87cf065	2010-04-07 17:18:10 +0000	[diff] [blame]	1210	cpp_warning_with_line (pfile, CPP_W_NORMALIZE, token->src_loc, 0,
				1211	"`%.*s' is not in NFKC", (int) sz, buf);
Geoffrey Keating	50668cf	2005-03-15 00:36:33 +0000	[diff] [blame]	1212	else
Simon Baldwin	87cf065	2010-04-07 17:18:10 +0000	[diff] [blame]	1213	cpp_warning_with_line (pfile, CPP_W_NORMALIZE, token->src_loc, 0,
				1214	"`%.*s' is not in NFC", (int) sz, buf);
Tobias Burnus	55e7f90	2012-10-15 22:08:57 +0200	[diff] [blame]	1215	free (buf);
Geoffrey Keating	50668cf	2005-03-15 00:36:33 +0000	[diff] [blame]	1216	}
				1217	}
				1218
Neil Booth	bced6ed	2003-04-19 11:59:44 +0000	[diff] [blame]	1219	/* Returns TRUE if the sequence starting at buffer->cur is invalid in
Neil Booth	1613e52	2003-04-20 07:29:23 +0000	[diff] [blame]	1220	an identifier. FIRST is TRUE if this starts an identifier. */
Neil Booth	bced6ed	2003-04-19 11:59:44 +0000	[diff] [blame]	1221	static bool
Geoffrey Keating	50668cf	2005-03-15 00:36:33 +0000	[diff] [blame]	1222	forms_identifier_p (cpp_reader *pfile, int first,
				1223	struct normalize_state *state)
Neil Booth	bced6ed	2003-04-19 11:59:44 +0000	[diff] [blame]	1224	{
Neil Booth	1613e52	2003-04-20 07:29:23 +0000	[diff] [blame]	1225	cpp_buffer *buffer = pfile->buffer;
Neil Booth	bced6ed	2003-04-19 11:59:44 +0000	[diff] [blame]	1226
Neil Booth	1613e52	2003-04-20 07:29:23 +0000	[diff] [blame]	1227	if (*buffer->cur == '$')
Neil Booth	bced6ed	2003-04-19 11:59:44 +0000	[diff] [blame]	1228	{
Neil Booth	1613e52	2003-04-20 07:29:23 +0000	[diff] [blame]	1229	if (!CPP_OPTION (pfile, dollars_in_ident))
				1230	return false;
Neil Booth	bced6ed	2003-04-19 11:59:44 +0000	[diff] [blame]	1231
Neil Booth	1613e52	2003-04-20 07:29:23 +0000	[diff] [blame]	1232	buffer->cur++;
Hans-Peter Nilsson	78b8811	2003-06-12 06:09:15 +0000	[diff] [blame]	1233	if (CPP_OPTION (pfile, warn_dollars) && !pfile->state.skipping)
Neil Booth	1613e52	2003-04-20 07:29:23 +0000	[diff] [blame]	1234	{
Hans-Peter Nilsson	78b8811	2003-06-12 06:09:15 +0000	[diff] [blame]	1235	CPP_OPTION (pfile, warn_dollars) = 0;
John David Anglin	0527bc4	2003-11-01 22:56:54 +0000	[diff] [blame]	1236	cpp_error (pfile, CPP_DL_PEDWARN, "'$' in identifier or number");
Neil Booth	1613e52	2003-04-20 07:29:23 +0000	[diff] [blame]	1237	}
				1238
				1239	return true;
				1240	}
				1241
				1242	/* Is this a syntactically valid UCN? */
Joseph Myers	af15a2f	2005-09-20 21:31:37 +0100	[diff] [blame]	1243	if (CPP_OPTION (pfile, extended_identifiers)
Geoffrey Keating	6baba9b	2005-03-15 09:55:41 +0000	[diff] [blame]	1244	&& *buffer->cur == '\\'
Neil Booth	1613e52	2003-04-20 07:29:23 +0000	[diff] [blame]	1245	&& (buffer->cur[1] == 'u' \|\| buffer->cur[1] == 'U'))
				1246	{
				1247	buffer->cur += 2;
Geoffrey Keating	50668cf	2005-03-15 00:36:33 +0000	[diff] [blame]	1248	if (_cpp_valid_ucn (pfile, &buffer->cur, buffer->rlimit, 1 + !first,
				1249	state))
Neil Booth	1613e52	2003-04-20 07:29:23 +0000	[diff] [blame]	1250	return true;
				1251	buffer->cur -= 2;
				1252	}
				1253
				1254	return false;
Neil Booth	bced6ed	2003-04-19 11:59:44 +0000	[diff] [blame]	1255	}
				1256
Kai Tietz	17e7cb8	2009-11-11 18:37:19 +0000	[diff] [blame]	1257	/* Helper function to get the cpp_hashnode of the identifier BASE. */
				1258	static cpp_hashnode *
				1259	lex_identifier_intern (cpp_reader pfile, const uchar base)
				1260	{
				1261	cpp_hashnode *result;
				1262	const uchar *cur;
				1263	unsigned int len;
				1264	unsigned int hash = HT_HASHSTEP (0, *base);
				1265
				1266	cur = base + 1;
				1267	while (ISIDNUM (*cur))
				1268	{
				1269	hash = HT_HASHSTEP (hash, *cur);
				1270	cur++;
				1271	}
				1272	len = cur - base;
				1273	hash = HT_HASHFINISH (hash, len);
				1274	result = CPP_HASHNODE (ht_lookup_with_hash (pfile->hash_table,
				1275	base, len, hash, HT_ALLOC));
				1276
				1277	/* Rarely, identifiers require diagnostics when lexed. */
				1278	if (__builtin_expect ((result->flags & NODE_DIAGNOSTIC)
				1279	&& !pfile->state.skipping, 0))
				1280	{
				1281	/* It is allowed to poison the same identifier twice. */
				1282	if ((result->flags & NODE_POISONED) && !pfile->state.poisoned_ok)
				1283	cpp_error (pfile, CPP_DL_ERROR, "attempt to use poisoned \"%s\"",
				1284	NODE_NAME (result));
				1285
				1286	/* Constraint 6.10.3.5: __VA_ARGS__ should only appear in the
				1287	replacement list of a variadic macro. */
				1288	if (result == pfile->spec_nodes.n__VA_ARGS__
				1289	&& !pfile->state.va_args_ok)
Edward Smith-Rowland	3976796	2014-07-10 22:26:50 +0000	[diff] [blame]	1290	{
				1291	if (CPP_OPTION (pfile, cplusplus))
				1292	cpp_error (pfile, CPP_DL_PEDWARN,
				1293	"__VA_ARGS__ can only appear in the expansion"
				1294	" of a C++11 variadic macro");
				1295	else
				1296	cpp_error (pfile, CPP_DL_PEDWARN,
				1297	"__VA_ARGS__ can only appear in the expansion"
				1298	" of a C99 variadic macro");
				1299	}
Kai Tietz	17e7cb8	2009-11-11 18:37:19 +0000	[diff] [blame]	1300
				1301	/* For -Wc++-compat, warn about use of C++ named operators. */
				1302	if (result->flags & NODE_WARN_OPERATOR)
Simon Baldwin	87cf065	2010-04-07 17:18:10 +0000	[diff] [blame]	1303	cpp_warning (pfile, CPP_W_CXX_OPERATOR_NAMES,
				1304	"identifier \"%s\" is a special operator name in C++",
				1305	NODE_NAME (result));
Kai Tietz	17e7cb8	2009-11-11 18:37:19 +0000	[diff] [blame]	1306	}
				1307
				1308	return result;
				1309	}
				1310
				1311	/* Get the cpp_hashnode of an identifier specified by NAME in
				1312	the current cpp_reader object. If none is found, NULL is returned. */
				1313	cpp_hashnode *
				1314	_cpp_lex_identifier (cpp_reader pfile, const char name)
				1315	{
				1316	cpp_hashnode *result;
				1317	result = lex_identifier_intern (pfile, (uchar *) name);
				1318	return result;
				1319	}
				1320
Neil Booth	bced6ed	2003-04-19 11:59:44 +0000	[diff] [blame]	1321	/* Lex an identifier starting at BUFFER->CUR - 1. */
Neil Booth	0d9f234	2000-09-18 18:43:05 +0000	[diff] [blame]	1322	static cpp_hashnode *
Geoffrey Keating	50668cf	2005-03-15 00:36:33 +0000	[diff] [blame]	1323	lex_identifier (cpp_reader pfile, const uchar base, bool starts_ucn,
Joseph Myers	be5ffc5	2014-11-06 21:08:52 +0000	[diff] [blame]	1324	struct normalize_state nst, cpp_hashnode *spelling)
Zack Weinberg	c5a0473	2000-04-25 19:32:36 +0000	[diff] [blame]	1325	{
Neil Booth	93c80368	2000-10-28 17:59:06 +0000	[diff] [blame]	1326	cpp_hashnode *result;
Geoffrey Keating	47e2049	2005-03-12 10:44:06 +0000	[diff] [blame]	1327	const uchar *cur;
Zack Weinberg	c6e8380	2004-06-05 20:58:06 +0000	[diff] [blame]	1328	unsigned int len;
				1329	unsigned int hash = HT_HASHSTEP (0, *base);
Zack Weinberg	c5a0473	2000-04-25 19:32:36 +0000	[diff] [blame]	1330
Zack Weinberg	c6e8380	2004-06-05 20:58:06 +0000	[diff] [blame]	1331	cur = pfile->buffer->cur;
Geoffrey Keating	47e2049	2005-03-12 10:44:06 +0000	[diff] [blame]	1332	if (! starts_ucn)
Joseph Myers	d3f4ff8	2013-11-16 00:05:08 +0000	[diff] [blame]	1333	{
				1334	while (ISIDNUM (*cur))
				1335	{
				1336	hash = HT_HASHSTEP (hash, *cur);
				1337	cur++;
				1338	}
				1339	NORMALIZE_STATE_UPDATE_IDNUM (nst, *(cur - 1));
				1340	}
Geoffrey Keating	47e2049	2005-03-12 10:44:06 +0000	[diff] [blame]	1341	pfile->buffer->cur = cur;
Geoffrey Keating	50668cf	2005-03-15 00:36:33 +0000	[diff] [blame]	1342	if (starts_ucn \|\| forms_identifier_p (pfile, false, nst))
Neil Booth	10cf9bd	2002-03-22 07:23:21 +0000	[diff] [blame]	1343	{
Geoffrey Keating	47e2049	2005-03-12 10:44:06 +0000	[diff] [blame]	1344	/* Slower version for identifiers containing UCNs (or $). */
				1345	do {
				1346	while (ISIDNUM (*pfile->buffer->cur))
Geoffrey Keating	50668cf	2005-03-15 00:36:33 +0000	[diff] [blame]	1347	{
Joseph Myers	d3f4ff8	2013-11-16 00:05:08 +0000	[diff] [blame]	1348	NORMALIZE_STATE_UPDATE_IDNUM (nst, *pfile->buffer->cur);
Geoffrey Keating	50668cf	2005-03-15 00:36:33 +0000	[diff] [blame]	1349	pfile->buffer->cur++;
Geoffrey Keating	50668cf	2005-03-15 00:36:33 +0000	[diff] [blame]	1350	}
				1351	} while (forms_identifier_p (pfile, false, nst));
Geoffrey Keating	47e2049	2005-03-12 10:44:06 +0000	[diff] [blame]	1352	result = _cpp_interpret_identifier (pfile, base,
				1353	pfile->buffer->cur - base);
Joseph Myers	be5ffc5	2014-11-06 21:08:52 +0000	[diff] [blame]	1354	*spelling = cpp_lookup (pfile, base, pfile->buffer->cur - base);
Zack Weinberg	2c3fcba	2001-09-10 22:34:03 +0000	[diff] [blame]	1355	}
Geoffrey Keating	47e2049	2005-03-12 10:44:06 +0000	[diff] [blame]	1356	else
				1357	{
				1358	len = cur - base;
				1359	hash = HT_HASHFINISH (hash, len);
Zack Weinberg	2c3fcba	2001-09-10 22:34:03 +0000	[diff] [blame]	1360
Tom Tromey	2bf41bf	2008-02-20 02:16:43 +0000	[diff] [blame]	1361	result = CPP_HASHNODE (ht_lookup_with_hash (pfile->hash_table,
				1362	base, len, hash, HT_ALLOC));
Joseph Myers	be5ffc5	2014-11-06 21:08:52 +0000	[diff] [blame]	1363	*spelling = result;
Geoffrey Keating	47e2049	2005-03-12 10:44:06 +0000	[diff] [blame]	1364	}
Neil Booth	bced6ed	2003-04-19 11:59:44 +0000	[diff] [blame]	1365
				1366	/* Rarely, identifiers require diagnostics when lexed. */
Zack Weinberg	2c3fcba	2001-09-10 22:34:03 +0000	[diff] [blame]	1367	if (__builtin_expect ((result->flags & NODE_DIAGNOSTIC)
				1368	&& !pfile->state.skipping, 0))
				1369	{
				1370	/* It is allowed to poison the same identifier twice. */
				1371	if ((result->flags & NODE_POISONED) && !pfile->state.poisoned_ok)
John David Anglin	0527bc4	2003-11-01 22:56:54 +0000	[diff] [blame]	1372	cpp_error (pfile, CPP_DL_ERROR, "attempt to use poisoned \"%s\"",
Zack Weinberg	2c3fcba	2001-09-10 22:34:03 +0000	[diff] [blame]	1373	NODE_NAME (result));
				1374
				1375	/* Constraint 6.10.3.5: __VA_ARGS__ should only appear in the
				1376	replacement list of a variadic macro. */
				1377	if (result == pfile->spec_nodes.n__VA_ARGS__
				1378	&& !pfile->state.va_args_ok)
Edward Smith-Rowland	3976796	2014-07-10 22:26:50 +0000	[diff] [blame]	1379	{
				1380	if (CPP_OPTION (pfile, cplusplus))
				1381	cpp_error (pfile, CPP_DL_PEDWARN,
				1382	"__VA_ARGS__ can only appear in the expansion"
				1383	" of a C++11 variadic macro");
				1384	else
				1385	cpp_error (pfile, CPP_DL_PEDWARN,
				1386	"__VA_ARGS__ can only appear in the expansion"
				1387	" of a C99 variadic macro");
				1388	}
Ian Lance Taylor	3d8b2a9	2009-06-12 19:43:25 +0000	[diff] [blame]	1389
				1390	/* For -Wc++-compat, warn about use of C++ named operators. */
				1391	if (result->flags & NODE_WARN_OPERATOR)
Simon Baldwin	87cf065	2010-04-07 17:18:10 +0000	[diff] [blame]	1392	cpp_warning (pfile, CPP_W_CXX_OPERATOR_NAMES,
				1393	"identifier \"%s\" is a special operator name in C++",
				1394	NODE_NAME (result));
Zack Weinberg	2c3fcba	2001-09-10 22:34:03 +0000	[diff] [blame]	1395	}
				1396
				1397	return result;
				1398	}
				1399
Neil Booth	bced6ed	2003-04-19 11:59:44 +0000	[diff] [blame]	1400	/* Lex a number to NUMBER starting at BUFFER->CUR - 1. */
Zack Weinberg	c5a0473	2000-04-25 19:32:36 +0000	[diff] [blame]	1401	static void
Geoffrey Keating	50668cf	2005-03-15 00:36:33 +0000	[diff] [blame]	1402	lex_number (cpp_reader pfile, cpp_string number,
				1403	struct normalize_state *nst)
Zack Weinberg	c5a0473	2000-04-25 19:32:36 +0000	[diff] [blame]	1404	{
Neil Booth	562a5c2	2002-04-21 18:46:42 +0000	[diff] [blame]	1405	const uchar *cur;
Neil Booth	bced6ed	2003-04-19 11:59:44 +0000	[diff] [blame]	1406	const uchar *base;
				1407	uchar *dest;
Zack Weinberg	c5a0473	2000-04-25 19:32:36 +0000	[diff] [blame]	1408
Neil Booth	bced6ed	2003-04-19 11:59:44 +0000	[diff] [blame]	1409	base = pfile->buffer->cur - 1;
				1410	do
Neil Booth	93c80368	2000-10-28 17:59:06 +0000	[diff] [blame]	1411	{
Neil Booth	bced6ed	2003-04-19 11:59:44 +0000	[diff] [blame]	1412	cur = pfile->buffer->cur;
Neil Booth	10cf9bd	2002-03-22 07:23:21 +0000	[diff] [blame]	1413
Neil Booth	bced6ed	2003-04-19 11:59:44 +0000	[diff] [blame]	1414	/* N.B. ISIDNUM does not include $. */
Edward Smith-Rowland	7057e64	2013-10-31 14:01:23 +0000	[diff] [blame]	1415	while (ISIDNUM (cur) \|\| cur == '.' \|\| DIGIT_SEP (*cur)
				1416	\|\| VALID_SIGN (*cur, cur[-1]))
Geoffrey Keating	50668cf	2005-03-15 00:36:33 +0000	[diff] [blame]	1417	{
Joseph Myers	d3f4ff8	2013-11-16 00:05:08 +0000	[diff] [blame]	1418	NORMALIZE_STATE_UPDATE_IDNUM (nst, *cur);
Geoffrey Keating	50668cf	2005-03-15 00:36:33 +0000	[diff] [blame]	1419	cur++;
Geoffrey Keating	50668cf	2005-03-15 00:36:33 +0000	[diff] [blame]	1420	}
Edward Smith-Rowland	a5858a3	2015-03-17 00:50:55 +0000	[diff] [blame]	1421	/* A number can't end with a digit separator. */
				1422	while (cur > pfile->buffer->cur && DIGIT_SEP (cur[-1]))
				1423	--cur;
Neil Booth	10cf9bd	2002-03-22 07:23:21 +0000	[diff] [blame]	1424
Neil Booth	10cf9bd	2002-03-22 07:23:21 +0000	[diff] [blame]	1425	pfile->buffer->cur = cur;
Neil Booth	93c80368	2000-10-28 17:59:06 +0000	[diff] [blame]	1426	}
Geoffrey Keating	50668cf	2005-03-15 00:36:33 +0000	[diff] [blame]	1427	while (forms_identifier_p (pfile, false, nst));
Neil Booth	bced6ed	2003-04-19 11:59:44 +0000	[diff] [blame]	1428
				1429	number->len = cur - base;
				1430	dest = _cpp_unaligned_alloc (pfile, number->len + 1);
				1431	memcpy (dest, base, number->len);
				1432	dest[number->len] = '\0';
				1433	number->text = dest;
Neil Booth	0d9f234	2000-09-18 18:43:05 +0000	[diff] [blame]	1434	}
				1435
Neil Booth	6338b35	2003-04-23 22:44:06 +0000	[diff] [blame]	1436	/* Create a token of type TYPE with a literal spelling. */
Zack Weinberg	c5a0473	2000-04-25 19:32:36 +0000	[diff] [blame]	1437	static void
Zack Weinberg	6cf87ca	2003-06-17 06:17:44 +0000	[diff] [blame]	1438	create_literal (cpp_reader pfile, cpp_token token, const uchar *base,
				1439	unsigned int len, enum cpp_ttype type)
Neil Booth	0d9f234	2000-09-18 18:43:05 +0000	[diff] [blame]	1440	{
Neil Booth	6338b35	2003-04-23 22:44:06 +0000	[diff] [blame]	1441	uchar *dest = _cpp_unaligned_alloc (pfile, len + 1);
Neil Booth	0d9f234	2000-09-18 18:43:05 +0000	[diff] [blame]	1442
Neil Booth	6338b35	2003-04-23 22:44:06 +0000	[diff] [blame]	1443	memcpy (dest, base, len);
				1444	dest[len] = '\0';
				1445	token->type = type;
				1446	token->val.str.len = len;
				1447	token->val.str.text = dest;
				1448	}
				1449
Jason Merrill	00a81b8	2010-03-29 16:07:29 -0400	[diff] [blame]	1450	/* Subroutine of lex_raw_string: Append LEN chars from BASE to the buffer
				1451	sequence from FIRST_BUFF_P to LAST_BUFF_P. /
				1452
				1453	static void
				1454	bufring_append (cpp_reader pfile, const uchar base, size_t len,
				1455	_cpp_buff first_buff_p, _cpp_buff last_buff_p)
				1456	{
				1457	_cpp_buff first_buff = first_buff_p;
				1458	_cpp_buff last_buff = last_buff_p;
				1459
				1460	if (first_buff == NULL)
				1461	first_buff = last_buff = _cpp_get_buff (pfile, len);
				1462	else if (len > BUFF_ROOM (last_buff))
				1463	{
				1464	size_t room = BUFF_ROOM (last_buff);
				1465	memcpy (BUFF_FRONT (last_buff), base, room);
				1466	BUFF_FRONT (last_buff) += room;
				1467	base += room;
				1468	len -= room;
				1469	last_buff = _cpp_append_extend_buff (pfile, last_buff, len);
				1470	}
				1471
				1472	memcpy (BUFF_FRONT (last_buff), base, len);
				1473	BUFF_FRONT (last_buff) += len;
				1474
				1475	*first_buff_p = first_buff;
				1476	*last_buff_p = last_buff;
				1477	}
				1478
Ed Smith-Rowland	c865f92	2013-06-29 03:41:58 +0000	[diff] [blame]	1479
				1480	/* Returns true if a macro has been defined.
				1481	This might not work if compile with -save-temps,
				1482	or preprocess separately from compilation. */
				1483
				1484	static bool
				1485	is_macro(cpp_reader pfile, const uchar base)
				1486	{
				1487	const uchar *cur = base;
				1488	if (! ISIDST (*cur))
				1489	return false;
				1490	unsigned int hash = HT_HASHSTEP (0, *cur);
				1491	++cur;
				1492	while (ISIDNUM (*cur))
				1493	{
				1494	hash = HT_HASHSTEP (hash, *cur);
				1495	++cur;
				1496	}
				1497	hash = HT_HASHFINISH (hash, cur - base);
				1498
				1499	cpp_hashnode *result = CPP_HASHNODE (ht_lookup_with_hash (pfile->hash_table,
				1500	base, cur - base, hash, HT_NO_INSERT));
				1501
				1502	return !result ? false : (result->type == NT_MACRO);
				1503	}
				1504
				1505
Jakub Jelinek	2c6e3f5	2009-10-19 23:41:15 +0200	[diff] [blame]	1506	/* Lexes a raw string. The stored string contains the spelling, including
Jason Merrill	00a81b8	2010-03-29 16:07:29 -0400	[diff] [blame]	1507	double quotes, delimiter string, '(' and ')', any leading
Jakub Jelinek	2c6e3f5	2009-10-19 23:41:15 +0200	[diff] [blame]	1508	'L', 'u', 'U' or 'u8' and 'R' modifier. It returns the type of the
				1509	literal, or CPP_OTHER if it was not properly terminated.
				1510
				1511	The spelling is NUL-terminated, but it is not guaranteed that this
				1512	is the first NUL since embedded NULs are preserved. */
				1513
				1514	static void
				1515	lex_raw_string (cpp_reader pfile, cpp_token token, const uchar *base,
				1516	const uchar *cur)
				1517	{
Jakub Jelinek	8cf8873	2013-07-21 04:28:03 +0200	[diff] [blame]	1518	uchar raw_prefix[17];
				1519	uchar temp_buffer[18];
				1520	const uchar *orig_base;
				1521	unsigned int raw_prefix_len = 0, raw_suffix_len = 0;
				1522	enum raw_str_phase { RAW_STR_PREFIX, RAW_STR, RAW_STR_SUFFIX };
				1523	raw_str_phase phase = RAW_STR_PREFIX;
Jakub Jelinek	2c6e3f5	2009-10-19 23:41:15 +0200	[diff] [blame]	1524	enum cpp_ttype type;
				1525	size_t total_len = 0;
Jakub Jelinek	8cf8873	2013-07-21 04:28:03 +0200	[diff] [blame]	1526	/* Index into temp_buffer during phases other than RAW_STR,
				1527	during RAW_STR phase 17 to tell BUF_APPEND that nothing should
				1528	be appended to temp_buffer. */
				1529	size_t temp_buffer_len = 0;
Jakub Jelinek	2c6e3f5	2009-10-19 23:41:15 +0200	[diff] [blame]	1530	_cpp_buff first_buff = NULL, last_buff = NULL;
Jakub Jelinek	8cf8873	2013-07-21 04:28:03 +0200	[diff] [blame]	1531	size_t raw_prefix_start;
Jason Merrill	00a81b8	2010-03-29 16:07:29 -0400	[diff] [blame]	1532	_cpp_line_note *note = &pfile->buffer->notes[pfile->buffer->cur_note];
Jakub Jelinek	2c6e3f5	2009-10-19 23:41:15 +0200	[diff] [blame]	1533
				1534	type = (*base == 'L' ? CPP_WSTRING :
				1535	*base == 'U' ? CPP_STRING32 :
				1536	*base == 'u' ? (base[1] == '8' ? CPP_UTF8STRING : CPP_STRING16)
				1537	: CPP_STRING);
				1538
Jason Merrill	00a81b8	2010-03-29 16:07:29 -0400	[diff] [blame]	1539	#define BUF_APPEND(STR,LEN) \
				1540	do { \
				1541	bufring_append (pfile, (const uchar *)(STR), (LEN), \
				1542	&first_buff, &last_buff); \
				1543	total_len += (LEN); \
Jakub Jelinek	8cf8873	2013-07-21 04:28:03 +0200	[diff] [blame]	1544	if (__builtin_expect (temp_buffer_len < 17, 0) \
				1545	&& (const uchar *)(STR) != base \
				1546	&& (LEN) <= 2) \
				1547	{ \
				1548	memcpy (temp_buffer + temp_buffer_len, \
				1549	(const uchar *)(STR), (LEN)); \
				1550	temp_buffer_len += (LEN); \
				1551	} \
Jason Merrill	00a81b8	2010-03-29 16:07:29 -0400	[diff] [blame]	1552	} while (0);
				1553
Jakub Jelinek	8cf8873	2013-07-21 04:28:03 +0200	[diff] [blame]	1554	orig_base = base;
				1555	++cur;
				1556	raw_prefix_start = cur - base;
				1557	for (;;)
				1558	{
Jason Merrill	00a81b8	2010-03-29 16:07:29 -0400	[diff] [blame]	1559	cppchar_t c;
				1560
				1561	/* If we previously performed any trigraph or line splicing
Jakub Jelinek	8cf8873	2013-07-21 04:28:03 +0200	[diff] [blame]	1562	transformations, undo them in between the opening and closing
				1563	double quote. */
Jason Merrill	00a81b8	2010-03-29 16:07:29 -0400	[diff] [blame]	1564	while (note->pos < cur)
				1565	++note;
				1566	for (; note->pos == cur; ++note)
				1567	{
				1568	switch (note->type)
				1569	{
				1570	case '\\':
				1571	case ' ':
				1572	/* Restore backslash followed by newline. */
				1573	BUF_APPEND (base, cur - base);
				1574	base = cur;
				1575	BUF_APPEND ("\\", 1);
				1576	after_backslash:
				1577	if (note->type == ' ')
				1578	{
				1579	/* GNU backslash whitespace newline extension. FIXME
				1580	could be any sequence of non-vertical space. When we
				1581	can properly restore any such sequence, we should mark
				1582	this note as handled so _cpp_process_line_notes
				1583	doesn't warn. */
				1584	BUF_APPEND (" ", 1);
				1585	}
				1586
				1587	BUF_APPEND ("\n", 1);
				1588	break;
				1589
				1590	case 0:
				1591	/* Already handled. */
				1592	break;
				1593
				1594	default:
				1595	if (_cpp_trigraph_map[note->type])
				1596	{
				1597	/* Don't warn about this trigraph in
				1598	_cpp_process_line_notes, since trigraphs show up as
				1599	trigraphs in raw strings. */
Jakub Jelinek	d947ada	2010-04-06 09:02:40 +0200	[diff] [blame]	1600	uchar type = note->type;
Jason Merrill	00a81b8	2010-03-29 16:07:29 -0400	[diff] [blame]	1601	note->type = 0;
				1602
				1603	if (!CPP_OPTION (pfile, trigraphs))
				1604	/* If we didn't convert the trigraph in the first
				1605	place, don't do anything now either. */
				1606	break;
				1607
				1608	BUF_APPEND (base, cur - base);
				1609	base = cur;
				1610	BUF_APPEND ("??", 2);
				1611
				1612	/* ??/ followed by newline gets two line notes, one for
				1613	the trigraph and one for the backslash/newline. */
				1614	if (type == '/' && note[1].pos == cur)
				1615	{
				1616	if (note[1].type != '\\'
				1617	&& note[1].type != ' ')
				1618	abort ();
				1619	BUF_APPEND ("/", 1);
				1620	++note;
				1621	goto after_backslash;
				1622	}
Jason Merrill	00a81b8	2010-03-29 16:07:29 -0400	[diff] [blame]	1623	else
				1624	{
				1625	/* Skip the replacement character. */
				1626	base = ++cur;
				1627	BUF_APPEND (&type, 1);
Jakub Jelinek	8cf8873	2013-07-21 04:28:03 +0200	[diff] [blame]	1628	c = type;
				1629	goto check_c;
Jason Merrill	00a81b8	2010-03-29 16:07:29 -0400	[diff] [blame]	1630	}
				1631	}
				1632	else
				1633	abort ();
				1634	break;
				1635	}
				1636	}
				1637	c = *cur++;
Jakub Jelinek	8cf8873	2013-07-21 04:28:03 +0200	[diff] [blame]	1638	if (__builtin_expect (temp_buffer_len < 17, 0))
				1639	temp_buffer[temp_buffer_len++] = c;
Jakub Jelinek	2c6e3f5	2009-10-19 23:41:15 +0200	[diff] [blame]	1640
Jakub Jelinek	8cf8873	2013-07-21 04:28:03 +0200	[diff] [blame]	1641	check_c:
				1642	if (phase == RAW_STR_PREFIX)
Jakub Jelinek	2c6e3f5	2009-10-19 23:41:15 +0200	[diff] [blame]	1643	{
Jakub Jelinek	8cf8873	2013-07-21 04:28:03 +0200	[diff] [blame]	1644	while (raw_prefix_len < temp_buffer_len)
				1645	{
				1646	raw_prefix[raw_prefix_len] = temp_buffer[raw_prefix_len];
				1647	switch (raw_prefix[raw_prefix_len])
				1648	{
				1649	case ' ': case '(': case ')': case '\\': case '\t':
				1650	case '\v': case '\f': case '\n': default:
				1651	break;
				1652	/* Basic source charset except the above chars. */
				1653	case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
				1654	case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':
				1655	case 'm': case 'n': case 'o': case 'p': case 'q': case 'r':
				1656	case 's': case 't': case 'u': case 'v': case 'w': case 'x':
				1657	case 'y': case 'z':
				1658	case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
				1659	case 'G': case 'H': case 'I': case 'J': case 'K': case 'L':
				1660	case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R':
				1661	case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
				1662	case 'Y': case 'Z':
				1663	case '0': case '1': case '2': case '3': case '4': case '5':
				1664	case '6': case '7': case '8': case '9':
				1665	case '_': case '{': case '}': case '#': case '[': case ']':
				1666	case '<': case '>': case '%': case ':': case ';': case '.':
				1667	case '?': case '*': case '+': case '-': case '/': case '^':
				1668	case '&': case '\|': case '~': case '!': case '=': case ',':
				1669	case '"': case '\'':
				1670	if (raw_prefix_len < 16)
				1671	{
				1672	raw_prefix_len++;
				1673	continue;
				1674	}
				1675	break;
				1676	}
				1677
				1678	if (raw_prefix[raw_prefix_len] != '(')
				1679	{
				1680	int col = CPP_BUF_COLUMN (pfile->buffer, cur) + 1;
				1681	if (raw_prefix_len == 16)
				1682	cpp_error_with_line (pfile, CPP_DL_ERROR, token->src_loc,
				1683	col, "raw string delimiter longer "
				1684	"than 16 characters");
				1685	else if (raw_prefix[raw_prefix_len] == '\n')
				1686	cpp_error_with_line (pfile, CPP_DL_ERROR, token->src_loc,
				1687	col, "invalid new-line in raw "
				1688	"string delimiter");
				1689	else
				1690	cpp_error_with_line (pfile, CPP_DL_ERROR, token->src_loc,
				1691	col, "invalid character '%c' in "
				1692	"raw string delimiter",
				1693	(int) raw_prefix[raw_prefix_len]);
				1694	pfile->buffer->cur = orig_base + raw_prefix_start - 1;
				1695	create_literal (pfile, token, orig_base,
				1696	raw_prefix_start - 1, CPP_OTHER);
				1697	if (first_buff)
				1698	_cpp_release_buff (pfile, first_buff);
				1699	return;
				1700	}
				1701	raw_prefix[raw_prefix_len] = '"';
				1702	phase = RAW_STR;
				1703	/* Nothing should be appended to temp_buffer during
				1704	RAW_STR phase. */
				1705	temp_buffer_len = 17;
				1706	break;
				1707	}
				1708	continue;
				1709	}
				1710	else if (phase == RAW_STR_SUFFIX)
				1711	{
				1712	while (raw_suffix_len <= raw_prefix_len
				1713	&& raw_suffix_len < temp_buffer_len
				1714	&& temp_buffer[raw_suffix_len] == raw_prefix[raw_suffix_len])
				1715	raw_suffix_len++;
				1716	if (raw_suffix_len > raw_prefix_len)
				1717	break;
				1718	if (raw_suffix_len == temp_buffer_len)
				1719	continue;
				1720	phase = RAW_STR;
				1721	/* Nothing should be appended to temp_buffer during
				1722	RAW_STR phase. */
				1723	temp_buffer_len = 17;
				1724	}
				1725	if (c == ')')
				1726	{
				1727	phase = RAW_STR_SUFFIX;
				1728	raw_suffix_len = 0;
				1729	temp_buffer_len = 0;
Jakub Jelinek	2c6e3f5	2009-10-19 23:41:15 +0200	[diff] [blame]	1730	}
				1731	else if (c == '\n')
				1732	{
				1733	if (pfile->state.in_directive
Jakub Jelinek	d5e4835	2013-07-10 18:52:19 +0200	[diff] [blame]	1734	\|\| (pfile->state.parsing_args
				1735	&& pfile->buffer->next_line >= pfile->buffer->rlimit))
Jakub Jelinek	2c6e3f5	2009-10-19 23:41:15 +0200	[diff] [blame]	1736	{
				1737	cur--;
				1738	type = CPP_OTHER;
				1739	cpp_error_with_line (pfile, CPP_DL_ERROR, token->src_loc, 0,
				1740	"unterminated raw string");
				1741	break;
				1742	}
				1743
Jason Merrill	00a81b8	2010-03-29 16:07:29 -0400	[diff] [blame]	1744	BUF_APPEND (base, cur - base);
Jakub Jelinek	2c6e3f5	2009-10-19 23:41:15 +0200	[diff] [blame]	1745
				1746	if (pfile->buffer->cur < pfile->buffer->rlimit)
				1747	CPP_INCREMENT_LINE (pfile, 0);
				1748	pfile->buffer->need_line = true;
				1749
Jason Merrill	00a81b8	2010-03-29 16:07:29 -0400	[diff] [blame]	1750	pfile->buffer->cur = cur-1;
				1751	_cpp_process_line_notes (pfile, false);
Jakub Jelinek	2c6e3f5	2009-10-19 23:41:15 +0200	[diff] [blame]	1752	if (!_cpp_get_fresh_line (pfile))
				1753	{
				1754	source_location src_loc = token->src_loc;
				1755	token->type = CPP_EOF;
				1756	/* Tell the compiler the line number of the EOF token. */
				1757	token->src_loc = pfile->line_table->highest_line;
				1758	token->flags = BOL;
				1759	if (first_buff != NULL)
				1760	_cpp_release_buff (pfile, first_buff);
				1761	cpp_error_with_line (pfile, CPP_DL_ERROR, src_loc, 0,
				1762	"unterminated raw string");
				1763	return;
				1764	}
				1765
				1766	cur = base = pfile->buffer->cur;
Jason Merrill	00a81b8	2010-03-29 16:07:29 -0400	[diff] [blame]	1767	note = &pfile->buffer->notes[pfile->buffer->cur_note];
Jakub Jelinek	2c6e3f5	2009-10-19 23:41:15 +0200	[diff] [blame]	1768	}
Jakub Jelinek	2c6e3f5	2009-10-19 23:41:15 +0200	[diff] [blame]	1769	}
				1770
Ed Smith-Rowland	3ce4f9e	2011-10-26 19:30:59 +0000	[diff] [blame]	1771	if (CPP_OPTION (pfile, user_literals))
				1772	{
Ed Smith-Rowland	c865f92	2013-06-29 03:41:58 +0000	[diff] [blame]	1773	/* If a string format macro, say from inttypes.h, is placed touching
				1774	a string literal it could be parsed as a C++11 user-defined string
				1775	literal thus breaking the program.
				1776	Try to identify macros with is_macro. A warning is issued. */
				1777	if (is_macro (pfile, cur))
Ollie Wild	7f5f5f9	2012-04-27 14:29:32 +0000	[diff] [blame]	1778	{
Dodji Seketeli	112448b	2012-04-29 16:27:08 +0000	[diff] [blame]	1779	/* Raise a warning, but do not consume subsequent tokens. */
Edward Smith-Rowland	7aee864	2014-07-09 13:33:58 +0000	[diff] [blame]	1780	if (CPP_OPTION (pfile, warn_literal_suffix) && !pfile->state.skipping)
Ollie Wild	7f5f5f9	2012-04-27 14:29:32 +0000	[diff] [blame]	1781	cpp_warning_with_line (pfile, CPP_W_LITERAL_SUFFIX,
				1782	token->src_loc, 0,
				1783	"invalid suffix on literal; C++11 requires "
Ed Smith-Rowland	c865f92	2013-06-29 03:41:58 +0000	[diff] [blame]	1784	"a space between literal and string macro");
Ollie Wild	7f5f5f9	2012-04-27 14:29:32 +0000	[diff] [blame]	1785	}
Ed Smith-Rowland	3ce4f9e	2011-10-26 19:30:59 +0000	[diff] [blame]	1786	/* Grab user defined literal suffix. */
Ed Smith-Rowland	561f7fc	2013-02-14 02:55:42 +0000	[diff] [blame]	1787	else if (ISIDST (*cur))
Ed Smith-Rowland	3ce4f9e	2011-10-26 19:30:59 +0000	[diff] [blame]	1788	{
				1789	type = cpp_userdef_string_add_type (type);
				1790	++cur;
Ollie Wild	7f5f5f9	2012-04-27 14:29:32 +0000	[diff] [blame]	1791
				1792	while (ISIDNUM (*cur))
				1793	++cur;
Ed Smith-Rowland	3ce4f9e	2011-10-26 19:30:59 +0000	[diff] [blame]	1794	}
Ed Smith-Rowland	3ce4f9e	2011-10-26 19:30:59 +0000	[diff] [blame]	1795	}
				1796
Jakub Jelinek	2c6e3f5	2009-10-19 23:41:15 +0200	[diff] [blame]	1797	pfile->buffer->cur = cur;
				1798	if (first_buff == NULL)
				1799	create_literal (pfile, token, base, cur - base, type);
				1800	else
				1801	{
				1802	uchar *dest = _cpp_unaligned_alloc (pfile, total_len + (cur - base) + 1);
				1803
				1804	token->type = type;
				1805	token->val.str.len = total_len + (cur - base);
				1806	token->val.str.text = dest;
				1807	last_buff = first_buff;
				1808	while (last_buff != NULL)
				1809	{
				1810	memcpy (dest, last_buff->base,
				1811	BUFF_FRONT (last_buff) - last_buff->base);
				1812	dest += BUFF_FRONT (last_buff) - last_buff->base;
				1813	last_buff = last_buff->next;
				1814	}
				1815	_cpp_release_buff (pfile, first_buff);
				1816	memcpy (dest, base, cur - base);
				1817	dest[cur - base] = '\0';
				1818	}
				1819	}
				1820
Neil Booth	6338b35	2003-04-23 22:44:06 +0000	[diff] [blame]	1821	/* Lexes a string, character constant, or angle-bracketed header file
				1822	name. The stored string contains the spelling, including opening
Jakub Jelinek	2c6e3f5	2009-10-19 23:41:15 +0200	[diff] [blame]	1823	quote and any leading 'L', 'u', 'U' or 'u8' and optional
				1824	'R' modifier. It returns the type of the literal, or CPP_OTHER
				1825	if it was not properly terminated, or CPP_LESS for an unterminated
				1826	header name which must be relexed as normal tokens.
Neil Booth	6338b35	2003-04-23 22:44:06 +0000	[diff] [blame]	1827
				1828	The spelling is NUL-terminated, but it is not guaranteed that this
				1829	is the first NUL since embedded NULs are preserved. */
				1830	static void
Zack Weinberg	6cf87ca	2003-06-17 06:17:44 +0000	[diff] [blame]	1831	lex_string (cpp_reader pfile, cpp_token token, const uchar *base)
Neil Booth	6338b35	2003-04-23 22:44:06 +0000	[diff] [blame]	1832	{
				1833	bool saw_NUL = false;
				1834	const uchar *cur;
				1835	cppchar_t terminator;
				1836	enum cpp_ttype type;
				1837
				1838	cur = base;
				1839	terminator = *cur++;
Jakub Jelinek	2c6e3f5	2009-10-19 23:41:15 +0200	[diff] [blame]	1840	if (terminator == 'L' \|\| terminator == 'U')
Neil Booth	6338b35	2003-04-23 22:44:06 +0000	[diff] [blame]	1841	terminator = *cur++;
Jakub Jelinek	2c6e3f5	2009-10-19 23:41:15 +0200	[diff] [blame]	1842	else if (terminator == 'u')
				1843	{
				1844	terminator = *cur++;
				1845	if (terminator == '8')
				1846	terminator = *cur++;
				1847	}
				1848	if (terminator == 'R')
				1849	{
				1850	lex_raw_string (pfile, token, base, cur);
				1851	return;
				1852	}
				1853	if (terminator == '"')
Kris Van Hees	b6baa67	2008-04-18 13:58:08 +0000	[diff] [blame]	1854	type = (*base == 'L' ? CPP_WSTRING :
				1855	*base == 'U' ? CPP_STRING32 :
Jakub Jelinek	2c6e3f5	2009-10-19 23:41:15 +0200	[diff] [blame]	1856	*base == 'u' ? (base[1] == '8' ? CPP_UTF8STRING : CPP_STRING16)
				1857	: CPP_STRING);
Neil Booth	6338b35	2003-04-23 22:44:06 +0000	[diff] [blame]	1858	else if (terminator == '\'')
Kris Van Hees	b6baa67	2008-04-18 13:58:08 +0000	[diff] [blame]	1859	type = (*base == 'L' ? CPP_WCHAR :
				1860	*base == 'U' ? CPP_CHAR32 :
				1861	*base == 'u' ? CPP_CHAR16 : CPP_CHAR);
Neil Booth	6338b35	2003-04-23 22:44:06 +0000	[diff] [blame]	1862	else
				1863	terminator = '>', type = CPP_HEADER_NAME;
Neil Booth	93c80368	2000-10-28 17:59:06 +0000	[diff] [blame]	1864
Neil Booth	0d9f234	2000-09-18 18:43:05 +0000	[diff] [blame]	1865	for (;;)
				1866	{
Neil Booth	6338b35	2003-04-23 22:44:06 +0000	[diff] [blame]	1867	cppchar_t c = *cur++;
Neil Booth	7868b4a	2001-03-04 12:02:02 +0000	[diff] [blame]	1868
Neil Booth	6f572ac	2003-04-19 16:34:33 +0000	[diff] [blame]	1869	/* In #include-style directives, terminators are not escapable. */
Neil Booth	6338b35	2003-04-23 22:44:06 +0000	[diff] [blame]	1870	if (c == '\\' && !pfile->state.angled_headers && *cur != '\n')
				1871	cur++;
				1872	else if (c == terminator)
Neil Booth	bced6ed	2003-04-19 11:59:44 +0000	[diff] [blame]	1873	break;
Neil Booth	6338b35	2003-04-23 22:44:06 +0000	[diff] [blame]	1874	else if (c == '\n')
Neil Booth	0d9f234	2000-09-18 18:43:05 +0000	[diff] [blame]	1875	{
Neil Booth	6338b35	2003-04-23 22:44:06 +0000	[diff] [blame]	1876	cur--;
Joseph Myers	4bb09c2	2009-02-21 21:25:39 +0000	[diff] [blame]	1877	/* Unmatched quotes always yield undefined behavior, but
				1878	greedy lexing means that what appears to be an unterminated
				1879	header name may actually be a legitimate sequence of tokens. */
				1880	if (terminator == '>')
				1881	{
				1882	token->type = CPP_LESS;
				1883	return;
				1884	}
Neil Booth	6338b35	2003-04-23 22:44:06 +0000	[diff] [blame]	1885	type = CPP_OTHER;
				1886	break;
Neil Booth	0d9f234	2000-09-18 18:43:05 +0000	[diff] [blame]	1887	}
Neil Booth	6338b35	2003-04-23 22:44:06 +0000	[diff] [blame]	1888	else if (c == '\0')
				1889	saw_NUL = true;
Neil Booth	0d9f234	2000-09-18 18:43:05 +0000	[diff] [blame]	1890	}
				1891
Neil Booth	6338b35	2003-04-23 22:44:06 +0000	[diff] [blame]	1892	if (saw_NUL && !pfile->state.skipping)
John David Anglin	0527bc4	2003-11-01 22:56:54 +0000	[diff] [blame]	1893	cpp_error (pfile, CPP_DL_WARNING,
				1894	"null character(s) preserved in literal");
Neil Booth	0d9f234	2000-09-18 18:43:05 +0000	[diff] [blame]	1895
Joseph Myers	c663e30	2006-09-13 02:04:18 +0100	[diff] [blame]	1896	if (type == CPP_OTHER && CPP_OPTION (pfile, lang) != CLK_ASM)
				1897	cpp_error (pfile, CPP_DL_PEDWARN, "missing terminating %c character",
				1898	(int) terminator);
				1899
Ed Smith-Rowland	3ce4f9e	2011-10-26 19:30:59 +0000	[diff] [blame]	1900	if (CPP_OPTION (pfile, user_literals))
				1901	{
Ed Smith-Rowland	c865f92	2013-06-29 03:41:58 +0000	[diff] [blame]	1902	/* If a string format macro, say from inttypes.h, is placed touching
				1903	a string literal it could be parsed as a C++11 user-defined string
				1904	literal thus breaking the program.
				1905	Try to identify macros with is_macro. A warning is issued. */
				1906	if (is_macro (pfile, cur))
Ollie Wild	7f5f5f9	2012-04-27 14:29:32 +0000	[diff] [blame]	1907	{
Dodji Seketeli	112448b	2012-04-29 16:27:08 +0000	[diff] [blame]	1908	/* Raise a warning, but do not consume subsequent tokens. */
Edward Smith-Rowland	7aee864	2014-07-09 13:33:58 +0000	[diff] [blame]	1909	if (CPP_OPTION (pfile, warn_literal_suffix) && !pfile->state.skipping)
Ollie Wild	7f5f5f9	2012-04-27 14:29:32 +0000	[diff] [blame]	1910	cpp_warning_with_line (pfile, CPP_W_LITERAL_SUFFIX,
				1911	token->src_loc, 0,
				1912	"invalid suffix on literal; C++11 requires "
Ed Smith-Rowland	c865f92	2013-06-29 03:41:58 +0000	[diff] [blame]	1913	"a space between literal and string macro");
Ollie Wild	7f5f5f9	2012-04-27 14:29:32 +0000	[diff] [blame]	1914	}
Ed Smith-Rowland	3ce4f9e	2011-10-26 19:30:59 +0000	[diff] [blame]	1915	/* Grab user defined literal suffix. */
Ed Smith-Rowland	561f7fc	2013-02-14 02:55:42 +0000	[diff] [blame]	1916	else if (ISIDST (*cur))
Ed Smith-Rowland	3ce4f9e	2011-10-26 19:30:59 +0000	[diff] [blame]	1917	{
				1918	type = cpp_userdef_char_add_type (type);
				1919	type = cpp_userdef_string_add_type (type);
				1920	++cur;
Ollie Wild	7f5f5f9	2012-04-27 14:29:32 +0000	[diff] [blame]	1921
				1922	while (ISIDNUM (*cur))
				1923	++cur;
Ed Smith-Rowland	3ce4f9e	2011-10-26 19:30:59 +0000	[diff] [blame]	1924	}
Ed Smith-Rowland	3ce4f9e	2011-10-26 19:30:59 +0000	[diff] [blame]	1925	}
Jason Merrill	fe19130	2015-05-09 00:50:10 -0400	[diff] [blame]	1926	else if (CPP_OPTION (pfile, cpp_warn_cxx11_compat)
				1927	&& is_macro (pfile, cur)
				1928	&& !pfile->state.skipping)
				1929	cpp_warning_with_line (pfile, CPP_W_CXX11_COMPAT,
				1930	token->src_loc, 0, "C++11 requires a space "
				1931	"between string literal and macro");
Ed Smith-Rowland	3ce4f9e	2011-10-26 19:30:59 +0000	[diff] [blame]	1932
Neil Booth	6338b35	2003-04-23 22:44:06 +0000	[diff] [blame]	1933	pfile->buffer->cur = cur;
				1934	create_literal (pfile, token, base, cur - base, type);
Neil Booth	0d9f234	2000-09-18 18:43:05 +0000	[diff] [blame]	1935	}
				1936
Matthew Gingell	631d0d3	2008-10-05 12:35:36 +0000	[diff] [blame]	1937	/* Return the comment table. The client may not make any assumption
				1938	about the ordering of the table. */
				1939	cpp_comment_table *
				1940	cpp_get_comments (cpp_reader *pfile)
				1941	{
				1942	return &pfile->comments;
				1943	}
				1944
				1945	/* Append a comment to the end of the comment table. */
				1946	static void
				1947	store_comment (cpp_reader pfile, cpp_token token)
				1948	{
				1949	int len;
				1950
				1951	if (pfile->comments.allocated == 0)
				1952	{
				1953	pfile->comments.allocated = 256;
				1954	pfile->comments.entries = (cpp_comment *) xmalloc
				1955	(pfile->comments.allocated * sizeof (cpp_comment));
				1956	}
				1957
				1958	if (pfile->comments.count == pfile->comments.allocated)
				1959	{
				1960	pfile->comments.allocated *= 2;
				1961	pfile->comments.entries = (cpp_comment *) xrealloc
				1962	(pfile->comments.entries,
				1963	pfile->comments.allocated * sizeof (cpp_comment));
				1964	}
				1965
				1966	len = token->val.str.len;
				1967
				1968	/* Copy comment. Note, token may not be NULL terminated. */
				1969	pfile->comments.entries[pfile->comments.count].comment =
				1970	(char ) xmalloc (sizeof (char) (len + 1));
				1971	memcpy (pfile->comments.entries[pfile->comments.count].comment,
				1972	token->val.str.text, len);
				1973	pfile->comments.entries[pfile->comments.count].comment[len] = '\0';
				1974
				1975	/* Set source location. */
				1976	pfile->comments.entries[pfile->comments.count].sloc = token->src_loc;
				1977
				1978	/* Increment the count of entries in the comment table. */
				1979	pfile->comments.count++;
				1980	}
				1981
Neil Booth	93c80368	2000-10-28 17:59:06 +0000	[diff] [blame]	1982	/* The stored comment includes the comment start and any terminator. */
Neil Booth	0d9f234	2000-09-18 18:43:05 +0000	[diff] [blame]	1983	static void
Zack Weinberg	6cf87ca	2003-06-17 06:17:44 +0000	[diff] [blame]	1984	save_comment (cpp_reader pfile, cpp_token token, const unsigned char *from,
				1985	cppchar_t type)
Zack Weinberg	c5a0473	2000-04-25 19:32:36 +0000	[diff] [blame]	1986	{
Neil Booth	5d7ee2f	2000-05-10 09:39:18 +0000	[diff] [blame]	1987	unsigned char *buffer;
Kai Tietz	651a20b	2010-11-16 19:50:17 +0000	[diff] [blame]	1988	unsigned int len, clen, i;
Kazu Hirata	df38348	2002-05-22 22:02:16 +0000	[diff] [blame]	1989
Neil Booth	1c6d33e	2000-09-25 22:39:51 +0000	[diff] [blame]	1990	len = pfile->buffer->cur - from + 1; /* + 1 for the initial '/'. */
Neil Booth	480709c	2001-10-21 14:04:42 +0000	[diff] [blame]	1991
Neil Booth	3542203	2000-10-29 09:56:00 +0000	[diff] [blame]	1992	/* C++ comments probably (not definitely) have moved past a new
				1993	line, which we don't want to save in the comment. */
Neil Booth	480709c	2001-10-21 14:04:42 +0000	[diff] [blame]	1994	if (is_vspace (pfile->buffer->cur[-1]))
Neil Booth	3542203	2000-10-29 09:56:00 +0000	[diff] [blame]	1995	len--;
Jason Thorpe	477cdac	2002-04-07 03:12:23 +0000	[diff] [blame]	1996
Kai Tietz	651a20b	2010-11-16 19:50:17 +0000	[diff] [blame]	1997	/* If we are currently in a directive or in argument parsing, then
				1998	we need to store all C++ comments as C comments internally, and
				1999	so we need to allocate a little extra space in that case.
Jason Thorpe	477cdac	2002-04-07 03:12:23 +0000	[diff] [blame]	2000
				2001	Note that the only time we encounter a directive here is
				2002	when we are saving comments in a "#define". */
Kai Tietz	651a20b	2010-11-16 19:50:17 +0000	[diff] [blame]	2003	clen = ((pfile->state.in_directive \|\| pfile->state.parsing_args)
				2004	&& type == '/') ? len + 2 : len;
Jason Thorpe	477cdac	2002-04-07 03:12:23 +0000	[diff] [blame]	2005
				2006	buffer = _cpp_unaligned_alloc (pfile, clen);
Kazu Hirata	df38348	2002-05-22 22:02:16 +0000	[diff] [blame]	2007
Neil Booth	0d9f234	2000-09-18 18:43:05 +0000	[diff] [blame]	2008	token->type = CPP_COMMENT;
Jason Thorpe	477cdac	2002-04-07 03:12:23 +0000	[diff] [blame]	2009	token->val.str.len = clen;
Neil Booth	0d9f234	2000-09-18 18:43:05 +0000	[diff] [blame]	2010	token->val.str.text = buffer;
Neil Booth	d1d9a6b	2000-05-27 23:19:56 +0000	[diff] [blame]	2011
Neil Booth	1c6d33e	2000-09-25 22:39:51 +0000	[diff] [blame]	2012	buffer[0] = '/';
				2013	memcpy (buffer + 1, from, len - 1);
Jason Thorpe	477cdac	2002-04-07 03:12:23 +0000	[diff] [blame]	2014
Kazu Hirata	1eeeb6a	2002-04-30 20:48:55 +0000	[diff] [blame]	2015	/* Finish conversion to a C comment, if necessary. */
Kai Tietz	651a20b	2010-11-16 19:50:17 +0000	[diff] [blame]	2016	if ((pfile->state.in_directive \|\| pfile->state.parsing_args) && type == '/')
Jason Thorpe	477cdac	2002-04-07 03:12:23 +0000	[diff] [blame]	2017	{
				2018	buffer[1] = '*';
				2019	buffer[clen - 2] = '*';
				2020	buffer[clen - 1] = '/';
Kai Tietz	651a20b	2010-11-16 19:50:17 +0000	[diff] [blame]	2021	/* As there can be in a C++ comments illegal sequences for C comments
				2022	we need to filter them out. */
				2023	for (i = 2; i < (clen - 2); i++)
				2024	if (buffer[i] == '/' && (buffer[i - 1] == '' \|\| buffer[i + 1] == ''))
				2025	buffer[i] = '\|';
Jason Thorpe	477cdac	2002-04-07 03:12:23 +0000	[diff] [blame]	2026	}
Matthew Gingell	631d0d3	2008-10-05 12:35:36 +0000	[diff] [blame]	2027
				2028	/* Finally store this comment for use by clients of libcpp. */
				2029	store_comment (pfile, token);
Neil Booth	0d9f234	2000-09-18 18:43:05 +0000	[diff] [blame]	2030	}
				2031
Neil Booth	5fddcff	2001-09-11 07:00:12 +0000	[diff] [blame]	2032	/* Allocate COUNT tokens for RUN. */
				2033	void
Zack Weinberg	6cf87ca	2003-06-17 06:17:44 +0000	[diff] [blame]	2034	_cpp_init_tokenrun (tokenrun *run, unsigned int count)
Neil Booth	5fddcff	2001-09-11 07:00:12 +0000	[diff] [blame]	2035	{
Bernardo Innocenti	72bb2c3	2004-07-24 20:04:42 +0200	[diff] [blame]	2036	run->base = XNEWVEC (cpp_token, count);
Neil Booth	5fddcff	2001-09-11 07:00:12 +0000	[diff] [blame]	2037	run->limit = run->base + count;
				2038	run->next = NULL;
				2039	}
				2040
				2041	/* Returns the next tokenrun, or creates one if there is none. */
				2042	static tokenrun *
Zack Weinberg	6cf87ca	2003-06-17 06:17:44 +0000	[diff] [blame]	2043	next_tokenrun (tokenrun *run)
Neil Booth	5fddcff	2001-09-11 07:00:12 +0000	[diff] [blame]	2044	{
				2045	if (run->next == NULL)
				2046	{
Bernardo Innocenti	72bb2c3	2004-07-24 20:04:42 +0200	[diff] [blame]	2047	run->next = XNEW (tokenrun);
Neil Booth	bdcbe49	2001-09-13 20:05:17 +0000	[diff] [blame]	2048	run->next->prev = run;
Neil Booth	5fddcff	2001-09-11 07:00:12 +0000	[diff] [blame]	2049	_cpp_init_tokenrun (run->next, 250);
				2050	}
				2051
				2052	return run->next;
				2053	}
				2054
Dodji Seketeli	ad2305a	2011-10-22 17:49:18 +0000	[diff] [blame]	2055	/* Return the number of not yet processed token in a given
Tom Tromey	92582b7	2011-10-17 09:59:12 +0000	[diff] [blame]	2056	context. */
				2057	int
Dodji Seketeli	ad2305a	2011-10-22 17:49:18 +0000	[diff] [blame]	2058	_cpp_remaining_tokens_num_in_context (cpp_context *context)
Tom Tromey	92582b7	2011-10-17 09:59:12 +0000	[diff] [blame]	2059	{
Tom Tromey	92582b7	2011-10-17 09:59:12 +0000	[diff] [blame]	2060	if (context->tokens_kind == TOKENS_KIND_DIRECT)
Dodji Seketeli	cbbcf65	2011-10-20 08:49:29 +0000	[diff] [blame]	2061	return (LAST (context).token - FIRST (context).token);
Tom Tromey	92582b7	2011-10-17 09:59:12 +0000	[diff] [blame]	2062	else if (context->tokens_kind == TOKENS_KIND_INDIRECT
				2063	\|\| context->tokens_kind == TOKENS_KIND_EXTENDED)
Dodji Seketeli	cbbcf65	2011-10-20 08:49:29 +0000	[diff] [blame]	2064	return (LAST (context).ptoken - FIRST (context).ptoken);
Tom Tromey	92582b7	2011-10-17 09:59:12 +0000	[diff] [blame]	2065	else
				2066	abort ();
				2067	}
				2068
Dodji Seketeli	ad2305a	2011-10-22 17:49:18 +0000	[diff] [blame]	2069	/* Returns the token present at index INDEX in a given context. If
				2070	INDEX is zero, the next token to be processed is returned. */
Tom Tromey	92582b7	2011-10-17 09:59:12 +0000	[diff] [blame]	2071	static const cpp_token*
Dodji Seketeli	ad2305a	2011-10-22 17:49:18 +0000	[diff] [blame]	2072	_cpp_token_from_context_at (cpp_context *context, int index)
Tom Tromey	92582b7	2011-10-17 09:59:12 +0000	[diff] [blame]	2073	{
Tom Tromey	92582b7	2011-10-17 09:59:12 +0000	[diff] [blame]	2074	if (context->tokens_kind == TOKENS_KIND_DIRECT)
				2075	return &(FIRST (context).token[index]);
				2076	else if (context->tokens_kind == TOKENS_KIND_INDIRECT
				2077	\|\| context->tokens_kind == TOKENS_KIND_EXTENDED)
				2078	return FIRST (context).ptoken[index];
				2079	else
				2080	abort ();
				2081	}
				2082
Ben Elliston	5950c3c	2008-07-14 05:09:48 +0000	[diff] [blame]	2083	/* Look ahead in the input stream. */
				2084	const cpp_token *
				2085	cpp_peek_token (cpp_reader *pfile, int index)
				2086	{
				2087	cpp_context *context = pfile->context;
				2088	const cpp_token *peektok;
				2089	int count;
				2090
				2091	/* First, scan through any pending cpp_context objects. */
				2092	while (context->prev)
				2093	{
Dodji Seketeli	ad2305a	2011-10-22 17:49:18 +0000	[diff] [blame]	2094	ptrdiff_t sz = _cpp_remaining_tokens_num_in_context (context);
Ben Elliston	5950c3c	2008-07-14 05:09:48 +0000	[diff] [blame]	2095
				2096	if (index < (int) sz)
Dodji Seketeli	ad2305a	2011-10-22 17:49:18 +0000	[diff] [blame]	2097	return _cpp_token_from_context_at (context, index);
Ben Elliston	5950c3c	2008-07-14 05:09:48 +0000	[diff] [blame]	2098	index -= (int) sz;
				2099	context = context->prev;
				2100	}
				2101
				2102	/* We will have to read some new tokens after all (and do so
				2103	without invalidating preceding tokens). */
				2104	count = index;
				2105	pfile->keep_tokens++;
				2106
Jakub Jelinek	b8cd77f	2015-04-02 13:57:02 +0200	[diff] [blame]	2107	/* For peeked tokens temporarily disable line_change reporting,
				2108	until the tokens are parsed for real. */
				2109	void (line_change) (cpp_reader , const cpp_token *, int)
				2110	= pfile->cb.line_change;
				2111	pfile->cb.line_change = NULL;
				2112
Ben Elliston	5950c3c	2008-07-14 05:09:48 +0000	[diff] [blame]	2113	do
				2114	{
				2115	peektok = _cpp_lex_token (pfile);
				2116	if (peektok->type == CPP_EOF)
Jakub Jelinek	e4b33ee	2015-04-06 19:01:50 +0200	[diff] [blame]	2117	{
				2118	index--;
				2119	break;
				2120	}
Ben Elliston	5950c3c	2008-07-14 05:09:48 +0000	[diff] [blame]	2121	}
				2122	while (index--);
				2123
Jakub Jelinek	e4b33ee	2015-04-06 19:01:50 +0200	[diff] [blame]	2124	_cpp_backup_tokens_direct (pfile, count - index);
Ben Elliston	5950c3c	2008-07-14 05:09:48 +0000	[diff] [blame]	2125	pfile->keep_tokens--;
Jakub Jelinek	b8cd77f	2015-04-02 13:57:02 +0200	[diff] [blame]	2126	pfile->cb.line_change = line_change;
Ben Elliston	5950c3c	2008-07-14 05:09:48 +0000	[diff] [blame]	2127
				2128	return peektok;
				2129	}
				2130
Neil Booth	4ed5bcf	2001-09-24 22:53:12 +0000	[diff] [blame]	2131	/* Allocate a single token that is invalidated at the same time as the
				2132	rest of the tokens on the line. Has its line and col set to the
				2133	same as the last lexed token, so that diagnostics appear in the
				2134	right place. */
				2135	cpp_token *
Zack Weinberg	6cf87ca	2003-06-17 06:17:44 +0000	[diff] [blame]	2136	_cpp_temp_token (cpp_reader *pfile)
Neil Booth	4ed5bcf	2001-09-24 22:53:12 +0000	[diff] [blame]	2137	{
				2138	cpp_token old, result;
Ben Elliston	5950c3c	2008-07-14 05:09:48 +0000	[diff] [blame]	2139	ptrdiff_t sz = pfile->cur_run->limit - pfile->cur_token;
				2140	ptrdiff_t la = (ptrdiff_t) pfile->lookaheads;
Neil Booth	4ed5bcf	2001-09-24 22:53:12 +0000	[diff] [blame]	2141
				2142	old = pfile->cur_token - 1;
Ben Elliston	5950c3c	2008-07-14 05:09:48 +0000	[diff] [blame]	2143	/* Any pre-existing lookaheads must not be clobbered. */
				2144	if (la)
				2145	{
				2146	if (sz <= la)
				2147	{
				2148	tokenrun *next = next_tokenrun (pfile->cur_run);
				2149
				2150	if (sz < la)
				2151	memmove (next->base + 1, next->base,
				2152	(la - sz) * sizeof (cpp_token));
				2153
				2154	next->base[0] = pfile->cur_run->limit[-1];
				2155	}
				2156
				2157	if (sz > 1)
				2158	memmove (pfile->cur_token + 1, pfile->cur_token,
				2159	MIN (la, sz - 1) * sizeof (cpp_token));
				2160	}
				2161
				2162	if (!sz && pfile->cur_token == pfile->cur_run->limit)
Neil Booth	4ed5bcf	2001-09-24 22:53:12 +0000	[diff] [blame]	2163	{
				2164	pfile->cur_run = next_tokenrun (pfile->cur_run);
				2165	pfile->cur_token = pfile->cur_run->base;
				2166	}
				2167
				2168	result = pfile->cur_token++;
Per Bothner	12f9df4	2004-02-11 07:29:30 -0800	[diff] [blame]	2169	result->src_loc = old->src_loc;
Neil Booth	4ed5bcf	2001-09-24 22:53:12 +0000	[diff] [blame]	2170	return result;
				2171	}
				2172
Neil Booth	14baae0	2001-09-17 18:26:12 +0000	[diff] [blame]	2173	/* Lex a token into RESULT (external interface). Takes care of issues
				2174	like directive handling, token lookahead, multiple include
Joseph Myers	a1f300c	2001-11-23 02:05:19 +0000	[diff] [blame]	2175	optimization and skipping. */
Neil Booth	345894b	2001-09-16 13:44:29 +0000	[diff] [blame]	2176	const cpp_token *
Zack Weinberg	6cf87ca	2003-06-17 06:17:44 +0000	[diff] [blame]	2177	_cpp_lex_token (cpp_reader *pfile)
Neil Booth	0d9f234	2000-09-18 18:43:05 +0000	[diff] [blame]	2178	{
Neil Booth	bdcbe49	2001-09-13 20:05:17 +0000	[diff] [blame]	2179	cpp_token *result;
Neil Booth	5fddcff	2001-09-11 07:00:12 +0000	[diff] [blame]	2180
Neil Booth	bdcbe49	2001-09-13 20:05:17 +0000	[diff] [blame]	2181	for (;;)
Neil Booth	5fddcff	2001-09-11 07:00:12 +0000	[diff] [blame]	2182	{
Neil Booth	bdcbe49	2001-09-13 20:05:17 +0000	[diff] [blame]	2183	if (pfile->cur_token == pfile->cur_run->limit)
Neil Booth	5fddcff	2001-09-11 07:00:12 +0000	[diff] [blame]	2184	{
Neil Booth	bdcbe49	2001-09-13 20:05:17 +0000	[diff] [blame]	2185	pfile->cur_run = next_tokenrun (pfile->cur_run);
				2186	pfile->cur_token = pfile->cur_run->base;
				2187	}
Tom Tromey	ee38036	2007-01-30 15:46:01 +0000	[diff] [blame]	2188	/* We assume that the current token is somewhere in the current
				2189	run. */
				2190	if (pfile->cur_token < pfile->cur_run->base
				2191	\|\| pfile->cur_token >= pfile->cur_run->limit)
				2192	abort ();
Neil Booth	bdcbe49	2001-09-13 20:05:17 +0000	[diff] [blame]	2193
				2194	if (pfile->lookaheads)
Neil Booth	14baae0	2001-09-17 18:26:12 +0000	[diff] [blame]	2195	{
				2196	pfile->lookaheads--;
				2197	result = pfile->cur_token++;
				2198	}
Neil Booth	bdcbe49	2001-09-13 20:05:17 +0000	[diff] [blame]	2199	else
Neil Booth	14baae0	2001-09-17 18:26:12 +0000	[diff] [blame]	2200	result = _cpp_lex_direct (pfile);
Neil Booth	bdcbe49	2001-09-13 20:05:17 +0000	[diff] [blame]	2201
				2202	if (result->flags & BOL)
				2203	{
Neil Booth	bdcbe49	2001-09-13 20:05:17 +0000	[diff] [blame]	2204	/* Is this a directive. If _cpp_handle_directive returns
				2205	false, it is an assembler #. */
				2206	if (result->type == CPP_HASH
Neil Booth	e808ec9	2002-02-27 07:24:53 +0000	[diff] [blame]	2207	/* 6.10.3 p 11: Directives in a list of macro arguments
				2208	gives undefined behavior. This implementation
				2209	handles the directive as normal. */
Richard Henderson	bc4071d	2006-01-04 08:33:38 -0800	[diff] [blame]	2210	&& pfile->state.parsing_args != 1)
Zack Weinberg	21b1149	2004-09-09 19:16:56 +0000	[diff] [blame]	2211	{
Richard Henderson	bc4071d	2006-01-04 08:33:38 -0800	[diff] [blame]	2212	if (_cpp_handle_directive (pfile, result->flags & PREV_WHITE))
Zack Weinberg	21b1149	2004-09-09 19:16:56 +0000	[diff] [blame]	2213	{
Richard Henderson	bc4071d	2006-01-04 08:33:38 -0800	[diff] [blame]	2214	if (pfile->directive_result.type == CPP_PADDING)
				2215	continue;
Zack Weinberg	21b1149	2004-09-09 19:16:56 +0000	[diff] [blame]	2216	result = &pfile->directive_result;
Zack Weinberg	21b1149	2004-09-09 19:16:56 +0000	[diff] [blame]	2217	}
				2218	}
Richard Henderson	bc4071d	2006-01-04 08:33:38 -0800	[diff] [blame]	2219	else if (pfile->state.in_deferred_pragma)
				2220	result = &pfile->directive_result;
Zack Weinberg	21b1149	2004-09-09 19:16:56 +0000	[diff] [blame]	2221
Neil Booth	9729389	2001-09-14 22:04:46 +0000	[diff] [blame]	2222	if (pfile->cb.line_change && !pfile->state.skipping)
Zack Weinberg	6cf87ca	2003-06-17 06:17:44 +0000	[diff] [blame]	2223	pfile->cb.line_change (pfile, result, pfile->state.parsing_args);
Neil Booth	5fddcff	2001-09-11 07:00:12 +0000	[diff] [blame]	2224	}
				2225
Neil Booth	bdcbe49	2001-09-13 20:05:17 +0000	[diff] [blame]	2226	/* We don't skip tokens in directives. */
Richard Henderson	bc4071d	2006-01-04 08:33:38 -0800	[diff] [blame]	2227	if (pfile->state.in_directive \|\| pfile->state.in_deferred_pragma)
Neil Booth	bdcbe49	2001-09-13 20:05:17 +0000	[diff] [blame]	2228	break;
Neil Booth	5fddcff	2001-09-11 07:00:12 +0000	[diff] [blame]	2229
Neil Booth	bdcbe49	2001-09-13 20:05:17 +0000	[diff] [blame]	2230	/* Outside a directive, invalidate controlling macros. At file
Neil Booth	14baae0	2001-09-17 18:26:12 +0000	[diff] [blame]	2231	EOF, _cpp_lex_direct takes care of popping the buffer, so we never
Kazu Hirata	6356f89	2003-06-12 19:01:08 +0000	[diff] [blame]	2232	get here and MI optimization works. */
Neil Booth	5fddcff	2001-09-11 07:00:12 +0000	[diff] [blame]	2233	pfile->mi_valid = false;
Neil Booth	bdcbe49	2001-09-13 20:05:17 +0000	[diff] [blame]	2234
				2235	if (!pfile->state.skipping \|\| result->type == CPP_EOF)
				2236	break;
Neil Booth	5fddcff	2001-09-11 07:00:12 +0000	[diff] [blame]	2237	}
				2238
Neil Booth	345894b	2001-09-16 13:44:29 +0000	[diff] [blame]	2239	return result;
Neil Booth	5fddcff	2001-09-11 07:00:12 +0000	[diff] [blame]	2240	}
				2241
Neil Booth	26aea07	2003-04-19 00:22:51 +0000	[diff] [blame]	2242	/* Returns true if a fresh line has been loaded. */
				2243	bool
Zack Weinberg	6cf87ca	2003-06-17 06:17:44 +0000	[diff] [blame]	2244	_cpp_get_fresh_line (cpp_reader *pfile)
Neil Booth	004cb26	2002-05-17 20:16:48 +0000	[diff] [blame]	2245	{
Per Bothner	22234f5	2004-02-18 14:02:39 -0800	[diff] [blame]	2246	int return_at_eof;
				2247
Neil Booth	26aea07	2003-04-19 00:22:51 +0000	[diff] [blame]	2248	/* We can't get a new line until we leave the current directive. */
				2249	if (pfile->state.in_directive)
				2250	return false;
Kazu Hirata	df38348	2002-05-22 22:02:16 +0000	[diff] [blame]	2251
Neil Booth	26aea07	2003-04-19 00:22:51 +0000	[diff] [blame]	2252	for (;;)
Neil Booth	1a76916	2002-06-11 05:36:17 +0000	[diff] [blame]	2253	{
Neil Booth	26aea07	2003-04-19 00:22:51 +0000	[diff] [blame]	2254	cpp_buffer *buffer = pfile->buffer;
				2255
				2256	if (!buffer->need_line)
				2257	return true;
				2258
				2259	if (buffer->next_line < buffer->rlimit)
				2260	{
				2261	_cpp_clean_line (pfile);
				2262	return true;
				2263	}
				2264
				2265	/* First, get out of parsing arguments state. */
				2266	if (pfile->state.parsing_args)
Neil Booth	1a76916	2002-06-11 05:36:17 +0000	[diff] [blame]	2267	return false;
				2268
Neil Booth	26aea07	2003-04-19 00:22:51 +0000	[diff] [blame]	2269	/* End of buffer. Non-empty files should end in a newline. */
				2270	if (buffer->buf != buffer->rlimit
				2271	&& buffer->next_line > buffer->rlimit
				2272	&& !buffer->from_stage3)
Neil Booth	004cb26	2002-05-17 20:16:48 +0000	[diff] [blame]	2273	{
Dave Korn	ed0e74e	2007-05-31 02:06:48 +0000	[diff] [blame]	2274	/* Clip to buffer size. */
Neil Booth	26aea07	2003-04-19 00:22:51 +0000	[diff] [blame]	2275	buffer->next_line = buffer->rlimit;
Neil Booth	004cb26	2002-05-17 20:16:48 +0000	[diff] [blame]	2276	}
Per Bothner	22234f5	2004-02-18 14:02:39 -0800	[diff] [blame]	2277
				2278	return_at_eof = buffer->return_at_eof;
Neil Booth	26aea07	2003-04-19 00:22:51 +0000	[diff] [blame]	2279	_cpp_pop_buffer (pfile);
Per Bothner	22234f5	2004-02-18 14:02:39 -0800	[diff] [blame]	2280	if (pfile->buffer == NULL \|\| return_at_eof)
Per Bothner	a506c55	2003-10-02 07:20:38 +0000	[diff] [blame]	2281	return false;
Neil Booth	26aea07	2003-04-19 00:22:51 +0000	[diff] [blame]	2282	}
Neil Booth	004cb26	2002-05-17 20:16:48 +0000	[diff] [blame]	2283	}
				2284
Neil Booth	6f572ac	2003-04-19 16:34:33 +0000	[diff] [blame]	2285	#define IF_NEXT_IS(CHAR, THEN_TYPE, ELSE_TYPE) \
				2286	do \
				2287	{ \
				2288	result->type = ELSE_TYPE; \
				2289	if (*buffer->cur == CHAR) \
				2290	buffer->cur++, result->type = THEN_TYPE; \
				2291	} \
				2292	while (0)
Neil Booth	480709c	2001-10-21 14:04:42 +0000	[diff] [blame]	2293
Neil Booth	14baae0	2001-09-17 18:26:12 +0000	[diff] [blame]	2294	/* Lex a token into pfile->cur_token, which is also incremented, to
				2295	get diagnostics pointing to the correct location.
				2296
				2297	Does not handle issues such as token lookahead, multiple-include
Kazu Hirata	f1ba665	2003-06-28 19:43:01 +0000	[diff] [blame]	2298	optimization, directives, skipping etc. This function is only
Neil Booth	14baae0	2001-09-17 18:26:12 +0000	[diff] [blame]	2299	suitable for use by _cpp_lex_token, and in special cases like
				2300	lex_expansion_token which doesn't care for any of these issues.
				2301
				2302	When meeting a newline, returns CPP_EOF if parsing a directive,
				2303	otherwise returns to the start of the token buffer if permissible.
				2304	Returns the location of the lexed token. */
				2305	cpp_token *
Zack Weinberg	6cf87ca	2003-06-17 06:17:44 +0000	[diff] [blame]	2306	_cpp_lex_direct (cpp_reader *pfile)
Neil Booth	5fddcff	2001-09-11 07:00:12 +0000	[diff] [blame]	2307	{
Neil Booth	0d9f234	2000-09-18 18:43:05 +0000	[diff] [blame]	2308	cppchar_t c;
Neil Booth	adb84b4	2000-11-08 23:08:07 +0000	[diff] [blame]	2309	cpp_buffer *buffer;
Neil Booth	0d9f234	2000-09-18 18:43:05 +0000	[diff] [blame]	2310	const unsigned char *comment_start;
Neil Booth	14baae0	2001-09-17 18:26:12 +0000	[diff] [blame]	2311	cpp_token *result = pfile->cur_token++;
Neil Booth	0d9f234	2000-09-18 18:43:05 +0000	[diff] [blame]	2312
Neil Booth	5fddcff	2001-09-11 07:00:12 +0000	[diff] [blame]	2313	fresh_line:
Neil Booth	26aea07	2003-04-19 00:22:51 +0000	[diff] [blame]	2314	result->flags = 0;
Per Bothner	2be570f	2003-08-28 18:07:42 -0700	[diff] [blame]	2315	buffer = pfile->buffer;
Per Bothner	a506c55	2003-10-02 07:20:38 +0000	[diff] [blame]	2316	if (buffer->need_line)
Neil Booth	26aea07	2003-04-19 00:22:51 +0000	[diff] [blame]	2317	{
Richard Henderson	bc4071d	2006-01-04 08:33:38 -0800	[diff] [blame]	2318	if (pfile->state.in_deferred_pragma)
				2319	{
				2320	result->type = CPP_PRAGMA_EOL;
				2321	pfile->state.in_deferred_pragma = false;
				2322	if (!pfile->state.pragma_allow_expansion)
				2323	pfile->state.prevent_expansion--;
				2324	return result;
				2325	}
Neil Booth	26aea07	2003-04-19 00:22:51 +0000	[diff] [blame]	2326	if (!_cpp_get_fresh_line (pfile))
				2327	{
				2328	result->type = CPP_EOF;
Neil Booth	9ff7868	2003-04-26 21:03:51 +0000	[diff] [blame]	2329	if (!pfile->state.in_directive)
				2330	{
				2331	/* Tell the compiler the line number of the EOF token. */
Per Bothner	500bee0	2004-04-22 19:22:27 -0700	[diff] [blame]	2332	result->src_loc = pfile->line_table->highest_line;
Neil Booth	9ff7868	2003-04-26 21:03:51 +0000	[diff] [blame]	2333	result->flags = BOL;
				2334	}
Neil Booth	26aea07	2003-04-19 00:22:51 +0000	[diff] [blame]	2335	return result;
				2336	}
				2337	if (!pfile->keep_tokens)
				2338	{
				2339	pfile->cur_run = &pfile->base_run;
				2340	result = pfile->base_run.base;
				2341	pfile->cur_token = result + 1;
				2342	}
				2343	result->flags = BOL;
				2344	if (pfile->state.parsing_args == 2)
				2345	result->flags \|= PREV_WHITE;
				2346	}
Per Bothner	a506c55	2003-10-02 07:20:38 +0000	[diff] [blame]	2347	buffer = pfile->buffer;
Neil Booth	5fddcff	2001-09-11 07:00:12 +0000	[diff] [blame]	2348	update_tokens_line:
Per Bothner	500bee0	2004-04-22 19:22:27 -0700	[diff] [blame]	2349	result->src_loc = pfile->line_table->highest_line;
Neil Booth	0d9f234	2000-09-18 18:43:05 +0000	[diff] [blame]	2350
Neil Booth	5fddcff	2001-09-11 07:00:12 +0000	[diff] [blame]	2351	skipped_white:
Neil Booth	26aea07	2003-04-19 00:22:51 +0000	[diff] [blame]	2352	if (buffer->cur >= buffer->notes[buffer->cur_note].pos
				2353	&& !pfile->overlaid_buffer)
				2354	{
				2355	_cpp_process_line_notes (pfile, false);
Per Bothner	500bee0	2004-04-22 19:22:27 -0700	[diff] [blame]	2356	result->src_loc = pfile->line_table->highest_line;
Neil Booth	26aea07	2003-04-19 00:22:51 +0000	[diff] [blame]	2357	}
Neil Booth	480709c	2001-10-21 14:04:42 +0000	[diff] [blame]	2358	c = *buffer->cur++;
Per Bothner	12f9df4	2004-02-11 07:29:30 -0800	[diff] [blame]	2359
Gabriel Charette	e3dfef4	2011-08-22 20:41:07 +0000	[diff] [blame]	2360	if (pfile->forced_token_location_p)
				2361	result->src_loc = *pfile->forced_token_location_p;
				2362	else
				2363	result->src_loc = linemap_position_for_column (pfile->line_table,
				2364	CPP_BUF_COLUMN (buffer, buffer->cur));
Neil Booth	5fddcff	2001-09-11 07:00:12 +0000	[diff] [blame]	2365
Neil Booth	0d9f234	2000-09-18 18:43:05 +0000	[diff] [blame]	2366	switch (c)
				2367	{
Neil Booth	4d6baaf	2001-11-26 23:44:54 +0000	[diff] [blame]	2368	case ' ': case '\t': case '\f': case '\v': case '\0':
				2369	result->flags \|= PREV_WHITE;
Neil Booth	26aea07	2003-04-19 00:22:51 +0000	[diff] [blame]	2370	skip_whitespace (pfile, c);
				2371	goto skipped_white;
Neil Booth	4d6baaf	2001-11-26 23:44:54 +0000	[diff] [blame]	2372
Neil Booth	26aea07	2003-04-19 00:22:51 +0000	[diff] [blame]	2373	case '\n':
Per Bothner	12f9df4	2004-02-11 07:29:30 -0800	[diff] [blame]	2374	if (buffer->cur < buffer->rlimit)
				2375	CPP_INCREMENT_LINE (pfile, 0);
Neil Booth	26aea07	2003-04-19 00:22:51 +0000	[diff] [blame]	2376	buffer->need_line = true;
				2377	goto fresh_line;
Neil Booth	0d9f234	2000-09-18 18:43:05 +0000	[diff] [blame]	2378
Neil Booth	0d9f234	2000-09-18 18:43:05 +0000	[diff] [blame]	2379	case '0': case '1': case '2': case '3': case '4':
				2380	case '5': case '6': case '7': case '8': case '9':
Geoffrey Keating	50668cf	2005-03-15 00:36:33 +0000	[diff] [blame]	2381	{
				2382	struct normalize_state nst = INITIAL_NORMALIZE_STATE;
				2383	result->type = CPP_NUMBER;
				2384	lex_number (pfile, &result->val.str, &nst);
				2385	warn_about_normalization (pfile, result, &nst);
				2386	break;
				2387	}
Neil Booth	0d9f234	2000-09-18 18:43:05 +0000	[diff] [blame]	2388
Neil Booth	0abc6a6	2001-11-27 22:31:34 +0000	[diff] [blame]	2389	case 'L':
Kris Van Hees	b6baa67	2008-04-18 13:58:08 +0000	[diff] [blame]	2390	case 'u':
				2391	case 'U':
Jakub Jelinek	2c6e3f5	2009-10-19 23:41:15 +0200	[diff] [blame]	2392	case 'R':
				2393	/* 'L', 'u', 'U', 'u8' or 'R' may introduce wide characters,
				2394	wide strings or raw strings. */
Joseph Myers	a48e3dd	2011-08-18 16:13:49 +0100	[diff] [blame]	2395	if (c == 'L' \|\| CPP_OPTION (pfile, rliterals)
				2396	\|\| (c != 'R' && CPP_OPTION (pfile, uliterals)))
Neil Booth	bced6ed	2003-04-19 11:59:44 +0000	[diff] [blame]	2397	{
Jakub Jelinek	2c6e3f5	2009-10-19 23:41:15 +0200	[diff] [blame]	2398	if ((*buffer->cur == '\'' && c != 'R')
				2399	\|\| *buffer->cur == '"'
				2400	\|\| (*buffer->cur == 'R'
				2401	&& c != 'R'
				2402	&& buffer->cur[1] == '"'
Joseph Myers	a48e3dd	2011-08-18 16:13:49 +0100	[diff] [blame]	2403	&& CPP_OPTION (pfile, rliterals))
Jakub Jelinek	2c6e3f5	2009-10-19 23:41:15 +0200	[diff] [blame]	2404	\|\| (*buffer->cur == '8'
				2405	&& c == 'u'
				2406	&& (buffer->cur[1] == '"'
Joseph Myers	a48e3dd	2011-08-18 16:13:49 +0100	[diff] [blame]	2407	\|\| (buffer->cur[1] == 'R' && buffer->cur[2] == '"'
				2408	&& CPP_OPTION (pfile, rliterals)))))
Kris Van Hees	b6baa67	2008-04-18 13:58:08 +0000	[diff] [blame]	2409	{
				2410	lex_string (pfile, result, buffer->cur - 1);
				2411	break;
				2412	}
Neil Booth	bced6ed	2003-04-19 11:59:44 +0000	[diff] [blame]	2413	}
Kazu Hirata	df38348	2002-05-22 22:02:16 +0000	[diff] [blame]	2414	/* Fall through. */
Neil Booth	0abc6a6	2001-11-27 22:31:34 +0000	[diff] [blame]	2415
Neil Booth	0d9f234	2000-09-18 18:43:05 +0000	[diff] [blame]	2416	case '_':
				2417	case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
				2418	case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':
				2419	case 'm': case 'n': case 'o': case 'p': case 'q': case 'r':
Kris Van Hees	b6baa67	2008-04-18 13:58:08 +0000	[diff] [blame]	2420	case 's': case 't': case 'v': case 'w': case 'x':
Neil Booth	0d9f234	2000-09-18 18:43:05 +0000	[diff] [blame]	2421	case 'y': case 'z':
				2422	case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
Neil Booth	0abc6a6	2001-11-27 22:31:34 +0000	[diff] [blame]	2423	case 'G': case 'H': case 'I': case 'J': case 'K':
Jakub Jelinek	2c6e3f5	2009-10-19 23:41:15 +0200	[diff] [blame]	2424	case 'M': case 'N': case 'O': case 'P': case 'Q':
Kris Van Hees	b6baa67	2008-04-18 13:58:08 +0000	[diff] [blame]	2425	case 'S': case 'T': case 'V': case 'W': case 'X':
Neil Booth	0d9f234	2000-09-18 18:43:05 +0000	[diff] [blame]	2426	case 'Y': case 'Z':
				2427	result->type = CPP_NAME;
Geoffrey Keating	50668cf	2005-03-15 00:36:33 +0000	[diff] [blame]	2428	{
				2429	struct normalize_state nst = INITIAL_NORMALIZE_STATE;
Joseph Myers	9a0c618	2009-05-10 15:27:32 +0100	[diff] [blame]	2430	result->val.node.node = lex_identifier (pfile, buffer->cur - 1, false,
Joseph Myers	be5ffc5	2014-11-06 21:08:52 +0000	[diff] [blame]	2431	&nst,
				2432	&result->val.node.spelling);
Geoffrey Keating	50668cf	2005-03-15 00:36:33 +0000	[diff] [blame]	2433	warn_about_normalization (pfile, result, &nst);
				2434	}
Neil Booth	0d9f234	2000-09-18 18:43:05 +0000	[diff] [blame]	2435
Neil Booth	0d9f234	2000-09-18 18:43:05 +0000	[diff] [blame]	2436	/* Convert named operators to their proper types. */
Joseph Myers	9a0c618	2009-05-10 15:27:32 +0100	[diff] [blame]	2437	if (result->val.node.node->flags & NODE_OPERATOR)
Neil Booth	0d9f234	2000-09-18 18:43:05 +0000	[diff] [blame]	2438	{
				2439	result->flags \|= NAMED_OP;
Joseph Myers	9a0c618	2009-05-10 15:27:32 +0100	[diff] [blame]	2440	result->type = (enum cpp_ttype) result->val.node.node->directive_index;
Neil Booth	0d9f234	2000-09-18 18:43:05 +0000	[diff] [blame]	2441	}
				2442	break;
				2443
				2444	case '\'':
				2445	case '"':
Neil Booth	6338b35	2003-04-23 22:44:06 +0000	[diff] [blame]	2446	lex_string (pfile, result, buffer->cur - 1);
Neil Booth	0d9f234	2000-09-18 18:43:05 +0000	[diff] [blame]	2447	break;
				2448
				2449	case '/':
Neil Booth	1c6d33e	2000-09-25 22:39:51 +0000	[diff] [blame]	2450	/* A potential block or line comment. */
				2451	comment_start = buffer->cur;
Neil Booth	6f572ac	2003-04-19 16:34:33 +0000	[diff] [blame]	2452	c = *buffer->cur;
				2453
Neil Booth	1c6d33e	2000-09-25 22:39:51 +0000	[diff] [blame]	2454	if (c == '*')
				2455	{
Neil Booth	26aea07	2003-04-19 00:22:51 +0000	[diff] [blame]	2456	if (_cpp_skip_block_comment (pfile))
John David Anglin	0527bc4	2003-11-01 22:56:54 +0000	[diff] [blame]	2457	cpp_error (pfile, CPP_DL_ERROR, "unterminated comment");
Neil Booth	0d9f234	2000-09-18 18:43:05 +0000	[diff] [blame]	2458	}
Marek Polacek	909eb89	2014-09-17 21:49:46 +0000	[diff] [blame]	2459	else if (c == '/' && ! CPP_OPTION (pfile, traditional))
Neil Booth	0d9f234	2000-09-18 18:43:05 +0000	[diff] [blame]	2460	{
Marek Polacek	909eb89	2014-09-17 21:49:46 +0000	[diff] [blame]	2461	/* Don't warn for system headers. */
				2462	if (cpp_in_system_header (pfile))
				2463	;
Marek Polacek	f3bede7	2014-08-10 06:10:49 +0000	[diff] [blame]	2464	/* Warn about comments if pedantically GNUC89, and not
Neil Booth	bdb05a7	2000-11-26 17:31:13 +0000	[diff] [blame]	2465	in system headers. */
Marek Polacek	909eb89	2014-09-17 21:49:46 +0000	[diff] [blame]	2466	else if (CPP_OPTION (pfile, lang) == CLK_GNUC89
				2467	&& CPP_PEDANTIC (pfile)
				2468	&& ! buffer->warned_cplusplus_comments)
Neil Booth	0d9f234	2000-09-18 18:43:05 +0000	[diff] [blame]	2469	{
John David Anglin	0527bc4	2003-11-01 22:56:54 +0000	[diff] [blame]	2470	cpp_error (pfile, CPP_DL_PEDWARN,
Gabriel Dos Reis	5650830	2002-07-21 21:35:17 +0000	[diff] [blame]	2471	"C++ style comments are not allowed in ISO C90");
John David Anglin	0527bc4	2003-11-01 22:56:54 +0000	[diff] [blame]	2472	cpp_error (pfile, CPP_DL_PEDWARN,
Neil Booth	ebef4e8	2002-04-14 18:42:47 +0000	[diff] [blame]	2473	"(this will be reported only once per input file)");
Neil Booth	1c6d33e	2000-09-25 22:39:51 +0000	[diff] [blame]	2474	buffer->warned_cplusplus_comments = 1;
Neil Booth	0d9f234	2000-09-18 18:43:05 +0000	[diff] [blame]	2475	}
Marek Polacek	f3bede7	2014-08-10 06:10:49 +0000	[diff] [blame]	2476	/* Or if specifically desired via -Wc90-c99-compat. */
Marek Polacek	177cce4	2014-08-19 05:34:31 +0000	[diff] [blame]	2477	else if (CPP_OPTION (pfile, cpp_warn_c90_c99_compat) > 0
Marek Polacek	dd3ff07	2014-08-20 04:12:58 +0000	[diff] [blame]	2478	&& ! CPP_OPTION (pfile, cplusplus)
Marek Polacek	f3bede7	2014-08-10 06:10:49 +0000	[diff] [blame]	2479	&& ! buffer->warned_cplusplus_comments)
				2480	{
				2481	cpp_error (pfile, CPP_DL_WARNING,
Marek Polacek	3f4f5c9	2014-08-19 15:52:02 +0000	[diff] [blame]	2482	"C++ style comments are incompatible with C90");
Marek Polacek	f3bede7	2014-08-10 06:10:49 +0000	[diff] [blame]	2483	cpp_error (pfile, CPP_DL_WARNING,
				2484	"(this will be reported only once per input file)");
				2485	buffer->warned_cplusplus_comments = 1;
				2486	}
Marek Polacek	909eb89	2014-09-17 21:49:46 +0000	[diff] [blame]	2487	/* In C89/C94, C++ style comments are forbidden. */
				2488	else if ((CPP_OPTION (pfile, lang) == CLK_STDC89
				2489	\|\| CPP_OPTION (pfile, lang) == CLK_STDC94))
				2490	{
				2491	/* But don't be confused about valid code such as
				2492	- // immediately followed by *,
				2493	- // in a preprocessing directive,
				2494	- // in an #if 0 block. */
				2495	if (buffer->cur[1] == '*'
				2496	\|\| pfile->state.in_directive
				2497	\|\| pfile->state.skipping)
				2498	{
				2499	result->type = CPP_DIV;
				2500	break;
				2501	}
				2502	else if (! buffer->warned_cplusplus_comments)
				2503	{
				2504	cpp_error (pfile, CPP_DL_ERROR,
				2505	"C++ style comments are not allowed in ISO C90");
				2506	cpp_error (pfile, CPP_DL_ERROR,
				2507	"(this will be reported only once per input "
				2508	"file)");
				2509	buffer->warned_cplusplus_comments = 1;
				2510	}
				2511	}
Jakub Jelinek	01ef656	2001-04-11 11:43:10 +0200	[diff] [blame]	2512	if (skip_line_comment (pfile) && CPP_OPTION (pfile, warn_comments))
Simon Baldwin	87cf065	2010-04-07 17:18:10 +0000	[diff] [blame]	2513	cpp_warning (pfile, CPP_W_COMMENTS, "multi-line comment");
Neil Booth	0d9f234	2000-09-18 18:43:05 +0000	[diff] [blame]	2514	}
Neil Booth	480709c	2001-10-21 14:04:42 +0000	[diff] [blame]	2515	else if (c == '=')
				2516	{
Neil Booth	6f572ac	2003-04-19 16:34:33 +0000	[diff] [blame]	2517	buffer->cur++;
Neil Booth	480709c	2001-10-21 14:04:42 +0000	[diff] [blame]	2518	result->type = CPP_DIV_EQ;
				2519	break;
				2520	}
				2521	else
				2522	{
Neil Booth	480709c	2001-10-21 14:04:42 +0000	[diff] [blame]	2523	result->type = CPP_DIV;
				2524	break;
				2525	}
Neil Booth	1c6d33e	2000-09-25 22:39:51 +0000	[diff] [blame]	2526
Neil Booth	1c6d33e	2000-09-25 22:39:51 +0000	[diff] [blame]	2527	if (!pfile->state.save_comments)
				2528	{
				2529	result->flags \|= PREV_WHITE;
Neil Booth	5fddcff	2001-09-11 07:00:12 +0000	[diff] [blame]	2530	goto update_tokens_line;
Neil Booth	1c6d33e	2000-09-25 22:39:51 +0000	[diff] [blame]	2531	}
				2532
				2533	/* Save the comment as a token in its own right. */
Jason Thorpe	477cdac	2002-04-07 03:12:23 +0000	[diff] [blame]	2534	save_comment (pfile, result, comment_start, c);
Neil Booth	bdcbe49	2001-09-13 20:05:17 +0000	[diff] [blame]	2535	break;
Neil Booth	0d9f234	2000-09-18 18:43:05 +0000	[diff] [blame]	2536
				2537	case '<':
				2538	if (pfile->state.angled_headers)
				2539	{
Neil Booth	6338b35	2003-04-23 22:44:06 +0000	[diff] [blame]	2540	lex_string (pfile, result, buffer->cur - 1);
Joseph Myers	4bb09c2	2009-02-21 21:25:39 +0000	[diff] [blame]	2541	if (result->type != CPP_LESS)
				2542	break;
Neil Booth	0d9f234	2000-09-18 18:43:05 +0000	[diff] [blame]	2543	}
				2544
Neil Booth	6f572ac	2003-04-19 16:34:33 +0000	[diff] [blame]	2545	result->type = CPP_LESS;
				2546	if (*buffer->cur == '=')
				2547	buffer->cur++, result->type = CPP_LESS_EQ;
				2548	else if (*buffer->cur == '<')
Neil Booth	0d9f234	2000-09-18 18:43:05 +0000	[diff] [blame]	2549	{
Neil Booth	6f572ac	2003-04-19 16:34:33 +0000	[diff] [blame]	2550	buffer->cur++;
				2551	IF_NEXT_IS ('=', CPP_LSHIFT_EQ, CPP_LSHIFT);
Neil Booth	0d9f234	2000-09-18 18:43:05 +0000	[diff] [blame]	2552	}
Neil Booth	6f572ac	2003-04-19 16:34:33 +0000	[diff] [blame]	2553	else if (CPP_OPTION (pfile, digraphs))
Neil Booth	480709c	2001-10-21 14:04:42 +0000	[diff] [blame]	2554	{
Neil Booth	6f572ac	2003-04-19 16:34:33 +0000	[diff] [blame]	2555	if (*buffer->cur == ':')
				2556	{
Paolo Carlini	1582c67	2013-01-04 15:30:24 +0000	[diff] [blame]	2557	/* C++11 [2.5/3 lex.pptoken], "Otherwise, if the next
				2558	three characters are <:: and the subsequent character
				2559	is neither : nor >, the < is treated as a preprocessor
				2560	token by itself". */
				2561	if (CPP_OPTION (pfile, cplusplus)
Paolo Carlini	6194915	2013-04-24 19:33:54 +0000	[diff] [blame]	2562	&& CPP_OPTION (pfile, lang) != CLK_CXX98
				2563	&& CPP_OPTION (pfile, lang) != CLK_GNUCXX
Paolo Carlini	1582c67	2013-01-04 15:30:24 +0000	[diff] [blame]	2564	&& buffer->cur[1] == ':'
				2565	&& buffer->cur[2] != ':' && buffer->cur[2] != '>')
				2566	break;
				2567
Neil Booth	6f572ac	2003-04-19 16:34:33 +0000	[diff] [blame]	2568	buffer->cur++;
				2569	result->flags \|= DIGRAPH;
				2570	result->type = CPP_OPEN_SQUARE;
				2571	}
				2572	else if (*buffer->cur == '%')
				2573	{
				2574	buffer->cur++;
				2575	result->flags \|= DIGRAPH;
				2576	result->type = CPP_OPEN_BRACE;
				2577	}
Neil Booth	480709c	2001-10-21 14:04:42 +0000	[diff] [blame]	2578	}
Neil Booth	0d9f234	2000-09-18 18:43:05 +0000	[diff] [blame]	2579	break;
				2580
				2581	case '>':
Neil Booth	6f572ac	2003-04-19 16:34:33 +0000	[diff] [blame]	2582	result->type = CPP_GREATER;
				2583	if (*buffer->cur == '=')
				2584	buffer->cur++, result->type = CPP_GREATER_EQ;
				2585	else if (*buffer->cur == '>')
Neil Booth	0d9f234	2000-09-18 18:43:05 +0000	[diff] [blame]	2586	{
Neil Booth	6f572ac	2003-04-19 16:34:33 +0000	[diff] [blame]	2587	buffer->cur++;
				2588	IF_NEXT_IS ('=', CPP_RSHIFT_EQ, CPP_RSHIFT);
				2589	}
Neil Booth	0d9f234	2000-09-18 18:43:05 +0000	[diff] [blame]	2590	break;
				2591
Neil Booth	cbcff6d	2000-09-23 21:41:41 +0000	[diff] [blame]	2592	case '%':
Neil Booth	6f572ac	2003-04-19 16:34:33 +0000	[diff] [blame]	2593	result->type = CPP_MOD;
				2594	if (*buffer->cur == '=')
				2595	buffer->cur++, result->type = CPP_MOD_EQ;
				2596	else if (CPP_OPTION (pfile, digraphs))
Neil Booth	480709c	2001-10-21 14:04:42 +0000	[diff] [blame]	2597	{
Neil Booth	6f572ac	2003-04-19 16:34:33 +0000	[diff] [blame]	2598	if (*buffer->cur == ':')
Neil Booth	480709c	2001-10-21 14:04:42 +0000	[diff] [blame]	2599	{
Neil Booth	6f572ac	2003-04-19 16:34:33 +0000	[diff] [blame]	2600	buffer->cur++;
				2601	result->flags \|= DIGRAPH;
				2602	result->type = CPP_HASH;
				2603	if (*buffer->cur == '%' && buffer->cur[1] == ':')
Joseph Myers	9a0c618	2009-05-10 15:27:32 +0100	[diff] [blame]	2604	buffer->cur += 2, result->type = CPP_PASTE, result->val.token_no = 0;
Neil Booth	480709c	2001-10-21 14:04:42 +0000	[diff] [blame]	2605	}
Neil Booth	6f572ac	2003-04-19 16:34:33 +0000	[diff] [blame]	2606	else if (*buffer->cur == '>')
				2607	{
				2608	buffer->cur++;
				2609	result->flags \|= DIGRAPH;
				2610	result->type = CPP_CLOSE_BRACE;
				2611	}
Neil Booth	480709c	2001-10-21 14:04:42 +0000	[diff] [blame]	2612	}
Neil Booth	0d9f234	2000-09-18 18:43:05 +0000	[diff] [blame]	2613	break;
				2614
Neil Booth	cbcff6d	2000-09-23 21:41:41 +0000	[diff] [blame]	2615	case '.':
Neil Booth	480709c	2001-10-21 14:04:42 +0000	[diff] [blame]	2616	result->type = CPP_DOT;
Neil Booth	6f572ac	2003-04-19 16:34:33 +0000	[diff] [blame]	2617	if (ISDIGIT (*buffer->cur))
Neil Booth	480709c	2001-10-21 14:04:42 +0000	[diff] [blame]	2618	{
Geoffrey Keating	50668cf	2005-03-15 00:36:33 +0000	[diff] [blame]	2619	struct normalize_state nst = INITIAL_NORMALIZE_STATE;
Neil Booth	480709c	2001-10-21 14:04:42 +0000	[diff] [blame]	2620	result->type = CPP_NUMBER;
Geoffrey Keating	50668cf	2005-03-15 00:36:33 +0000	[diff] [blame]	2621	lex_number (pfile, &result->val.str, &nst);
				2622	warn_about_normalization (pfile, result, &nst);
Neil Booth	480709c	2001-10-21 14:04:42 +0000	[diff] [blame]	2623	}
Neil Booth	6f572ac	2003-04-19 16:34:33 +0000	[diff] [blame]	2624	else if (*buffer->cur == '.' && buffer->cur[1] == '.')
				2625	buffer->cur += 2, result->type = CPP_ELLIPSIS;
				2626	else if (buffer->cur == '' && CPP_OPTION (pfile, cplusplus))
				2627	buffer->cur++, result->type = CPP_DOT_STAR;
Neil Booth	0d9f234	2000-09-18 18:43:05 +0000	[diff] [blame]	2628	break;
				2629
				2630	case '+':
Neil Booth	6f572ac	2003-04-19 16:34:33 +0000	[diff] [blame]	2631	result->type = CPP_PLUS;
				2632	if (*buffer->cur == '+')
				2633	buffer->cur++, result->type = CPP_PLUS_PLUS;
				2634	else if (*buffer->cur == '=')
				2635	buffer->cur++, result->type = CPP_PLUS_EQ;
Neil Booth	0d9f234	2000-09-18 18:43:05 +0000	[diff] [blame]	2636	break;
				2637
				2638	case '-':
Neil Booth	6f572ac	2003-04-19 16:34:33 +0000	[diff] [blame]	2639	result->type = CPP_MINUS;
				2640	if (*buffer->cur == '>')
Neil Booth	0d9f234	2000-09-18 18:43:05 +0000	[diff] [blame]	2641	{
Neil Booth	6f572ac	2003-04-19 16:34:33 +0000	[diff] [blame]	2642	buffer->cur++;
Neil Booth	480709c	2001-10-21 14:04:42 +0000	[diff] [blame]	2643	result->type = CPP_DEREF;
Neil Booth	6f572ac	2003-04-19 16:34:33 +0000	[diff] [blame]	2644	if (buffer->cur == '' && CPP_OPTION (pfile, cplusplus))
				2645	buffer->cur++, result->type = CPP_DEREF_STAR;
Neil Booth	0d9f234	2000-09-18 18:43:05 +0000	[diff] [blame]	2646	}
Neil Booth	6f572ac	2003-04-19 16:34:33 +0000	[diff] [blame]	2647	else if (*buffer->cur == '-')
				2648	buffer->cur++, result->type = CPP_MINUS_MINUS;
				2649	else if (*buffer->cur == '=')
				2650	buffer->cur++, result->type = CPP_MINUS_EQ;
Neil Booth	0d9f234	2000-09-18 18:43:05 +0000	[diff] [blame]	2651	break;
				2652
				2653	case '&':
Neil Booth	6f572ac	2003-04-19 16:34:33 +0000	[diff] [blame]	2654	result->type = CPP_AND;
				2655	if (*buffer->cur == '&')
				2656	buffer->cur++, result->type = CPP_AND_AND;
				2657	else if (*buffer->cur == '=')
				2658	buffer->cur++, result->type = CPP_AND_EQ;
Neil Booth	0d9f234	2000-09-18 18:43:05 +0000	[diff] [blame]	2659	break;
Kazu Hirata	df38348	2002-05-22 22:02:16 +0000	[diff] [blame]	2660
Neil Booth	0d9f234	2000-09-18 18:43:05 +0000	[diff] [blame]	2661	case '\|':
Neil Booth	6f572ac	2003-04-19 16:34:33 +0000	[diff] [blame]	2662	result->type = CPP_OR;
				2663	if (*buffer->cur == '\|')
				2664	buffer->cur++, result->type = CPP_OR_OR;
				2665	else if (*buffer->cur == '=')
				2666	buffer->cur++, result->type = CPP_OR_EQ;
Neil Booth	0d9f234	2000-09-18 18:43:05 +0000	[diff] [blame]	2667	break;
				2668
				2669	case ':':
Neil Booth	6f572ac	2003-04-19 16:34:33 +0000	[diff] [blame]	2670	result->type = CPP_COLON;
				2671	if (*buffer->cur == ':' && CPP_OPTION (pfile, cplusplus))
				2672	buffer->cur++, result->type = CPP_SCOPE;
				2673	else if (*buffer->cur == '>' && CPP_OPTION (pfile, digraphs))
Neil Booth	0d9f234	2000-09-18 18:43:05 +0000	[diff] [blame]	2674	{
Neil Booth	6f572ac	2003-04-19 16:34:33 +0000	[diff] [blame]	2675	buffer->cur++;
Neil Booth	0d9f234	2000-09-18 18:43:05 +0000	[diff] [blame]	2676	result->flags \|= DIGRAPH;
Neil Booth	480709c	2001-10-21 14:04:42 +0000	[diff] [blame]	2677	result->type = CPP_CLOSE_SQUARE;
				2678	}
Neil Booth	0d9f234	2000-09-18 18:43:05 +0000	[diff] [blame]	2679	break;
				2680
Neil Booth	480709c	2001-10-21 14:04:42 +0000	[diff] [blame]	2681	case '*': IF_NEXT_IS ('=', CPP_MULT_EQ, CPP_MULT); break;
				2682	case '=': IF_NEXT_IS ('=', CPP_EQ_EQ, CPP_EQ); break;
				2683	case '!': IF_NEXT_IS ('=', CPP_NOT_EQ, CPP_NOT); break;
				2684	case '^': IF_NEXT_IS ('=', CPP_XOR_EQ, CPP_XOR); break;
Joseph Myers	9a0c618	2009-05-10 15:27:32 +0100	[diff] [blame]	2685	case '#': IF_NEXT_IS ('#', CPP_PASTE, CPP_HASH); result->val.token_no = 0; break;
Neil Booth	480709c	2001-10-21 14:04:42 +0000	[diff] [blame]	2686
Neil Booth	26aea07	2003-04-19 00:22:51 +0000	[diff] [blame]	2687	case '?': result->type = CPP_QUERY; break;
Neil Booth	0d9f234	2000-09-18 18:43:05 +0000	[diff] [blame]	2688	case '~': result->type = CPP_COMPL; break;
				2689	case ',': result->type = CPP_COMMA; break;
				2690	case '(': result->type = CPP_OPEN_PAREN; break;
				2691	case ')': result->type = CPP_CLOSE_PAREN; break;
				2692	case '[': result->type = CPP_OPEN_SQUARE; break;
				2693	case ']': result->type = CPP_CLOSE_SQUARE; break;
				2694	case '{': result->type = CPP_OPEN_BRACE; break;
				2695	case '}': result->type = CPP_CLOSE_BRACE; break;
				2696	case ';': result->type = CPP_SEMICOLON; break;
				2697
Kazu Hirata	40f0365	2002-09-26 22:25:14 +0000	[diff] [blame]	2698	/* @ is a punctuator in Objective-C. */
Zack Weinberg	cc93758	2001-03-07 01:32:01 +0000	[diff] [blame]	2699	case '@': result->type = CPP_ATSIGN; break;
Neil Booth	0d9f234	2000-09-18 18:43:05 +0000	[diff] [blame]	2700
Neil Booth	0abc6a6	2001-11-27 22:31:34 +0000	[diff] [blame]	2701	case '$':
Neil Booth	1613e52	2003-04-20 07:29:23 +0000	[diff] [blame]	2702	case '\\':
				2703	{
				2704	const uchar *base = --buffer->cur;
Geoffrey Keating	50668cf	2005-03-15 00:36:33 +0000	[diff] [blame]	2705	struct normalize_state nst = INITIAL_NORMALIZE_STATE;
Neil Booth	0abc6a6	2001-11-27 22:31:34 +0000	[diff] [blame]	2706
Geoffrey Keating	50668cf	2005-03-15 00:36:33 +0000	[diff] [blame]	2707	if (forms_identifier_p (pfile, true, &nst))
Neil Booth	1613e52	2003-04-20 07:29:23 +0000	[diff] [blame]	2708	{
				2709	result->type = CPP_NAME;
Joseph Myers	be5ffc5	2014-11-06 21:08:52 +0000	[diff] [blame]	2710	result->val.node.node = lex_identifier (pfile, base, true, &nst,
				2711	&result->val.node.spelling);
Geoffrey Keating	50668cf	2005-03-15 00:36:33 +0000	[diff] [blame]	2712	warn_about_normalization (pfile, result, &nst);
Neil Booth	1613e52	2003-04-20 07:29:23 +0000	[diff] [blame]	2713	break;
				2714	}
				2715	buffer->cur++;
Neil Booth	1067694	2003-04-22 19:28:00 +0000	[diff] [blame]	2716	}
Neil Booth	1613e52	2003-04-20 07:29:23 +0000	[diff] [blame]	2717
Neil Booth	1067694	2003-04-22 19:28:00 +0000	[diff] [blame]	2718	default:
Neil Booth	6338b35	2003-04-23 22:44:06 +0000	[diff] [blame]	2719	create_literal (pfile, result, buffer->cur - 1, 1, CPP_OTHER);
				2720	break;
Neil Booth	0d9f234	2000-09-18 18:43:05 +0000	[diff] [blame]	2721	}
Neil Booth	bdcbe49	2001-09-13 20:05:17 +0000	[diff] [blame]	2722
				2723	return result;
Zack Weinberg	c5a0473	2000-04-25 19:32:36 +0000	[diff] [blame]	2724	}
				2725
Neil Booth	5932565	2003-04-24 20:03:57 +0000	[diff] [blame]	2726	/* An upper bound on the number of bytes needed to spell TOKEN.
				2727	Does not include preceding whitespace. */
Neil Booth	93c80368	2000-10-28 17:59:06 +0000	[diff] [blame]	2728	unsigned int
Zack Weinberg	6cf87ca	2003-06-17 06:17:44 +0000	[diff] [blame]	2729	cpp_token_len (const cpp_token *token)
Zack Weinberg	c5a0473	2000-04-25 19:32:36 +0000	[diff] [blame]	2730	{
Neil Booth	93c80368	2000-10-28 17:59:06 +0000	[diff] [blame]	2731	unsigned int len;
Zack Weinberg	c5a0473	2000-04-25 19:32:36 +0000	[diff] [blame]	2732
Neil Booth	93c80368	2000-10-28 17:59:06 +0000	[diff] [blame]	2733	switch (TOKEN_SPELL (token))
Zack Weinberg	c5a0473	2000-04-25 19:32:36 +0000	[diff] [blame]	2734	{
Joseph Myers	cc95528	2008-11-29 12:21:10 +0000	[diff] [blame]	2735	default: len = 6; break;
Neil Booth	6338b35	2003-04-23 22:44:06 +0000	[diff] [blame]	2736	case SPELL_LITERAL: len = token->val.str.len; break;
Joseph Myers	9a0c618	2009-05-10 15:27:32 +0100	[diff] [blame]	2737	case SPELL_IDENT: len = NODE_LEN (token->val.node.node) * 10; break;
Zack Weinberg	c5a0473	2000-04-25 19:32:36 +0000	[diff] [blame]	2738	}
Neil Booth	5932565	2003-04-24 20:03:57 +0000	[diff] [blame]	2739
				2740	return len;
Zack Weinberg	041c319	2000-07-04 01:58:21 +0000	[diff] [blame]	2741	}
				2742
Geoffrey Keating	47e2049	2005-03-12 10:44:06 +0000	[diff] [blame]	2743	/* Parse UTF-8 out of NAMEP and place a \U escape in BUFFER.
				2744	Return the number of bytes read out of NAME. (There are always
				2745	10 bytes written to BUFFER.) */
				2746
				2747	static size_t
				2748	utf8_to_ucn (unsigned char buffer, const unsigned char name)
				2749	{
				2750	int j;
				2751	int ucn_len = 0;
				2752	int ucn_len_c;
				2753	unsigned t;
				2754	unsigned long utf32;
				2755
				2756	/* Compute the length of the UTF-8 sequence. */
				2757	for (t = *name; t & 0x80; t <<= 1)
				2758	ucn_len++;
				2759
				2760	utf32 = *name & (0x7F >> ucn_len);
				2761	for (ucn_len_c = 1; ucn_len_c < ucn_len; ucn_len_c++)
				2762	{
				2763	utf32 = (utf32 << 6) \| (*++name & 0x3F);
				2764
				2765	/* Ill-formed UTF-8. */
				2766	if ((*name & ~0x3F) != 0x80)
				2767	abort ();
				2768	}
				2769
				2770	*buffer++ = '\\';
				2771	*buffer++ = 'U';
				2772	for (j = 7; j >= 0; j--)
				2773	buffer++ = "0123456789abcdef"[(utf32 >> (4 j)) & 0xF];
				2774	return ucn_len;
				2775	}
				2776
Manuel López-Ibáñez	cfc9353	2009-04-22 15:32:18 +0000	[diff] [blame]	2777	/* Given a token TYPE corresponding to a digraph, return a pointer to
				2778	the spelling of the digraph. */
				2779	static const unsigned char *
				2780	cpp_digraph2name (enum cpp_ttype type)
				2781	{
				2782	return digraph_spellings[(int) type - (int) CPP_FIRST_DIGRAPH];
				2783	}
Geoffrey Keating	47e2049	2005-03-12 10:44:06 +0000	[diff] [blame]	2784
Joseph Myers	be5ffc5	2014-11-06 21:08:52 +0000	[diff] [blame]	2785	/* Write the spelling of an identifier IDENT, using UCNs, to BUFFER.
				2786	The buffer must already contain the enough space to hold the
				2787	token's spelling. Returns a pointer to the character after the
				2788	last character written. */
				2789	unsigned char *
				2790	_cpp_spell_ident_ucns (unsigned char buffer, cpp_hashnode ident)
				2791	{
				2792	size_t i;
				2793	const unsigned char *name = NODE_NAME (ident);
				2794
				2795	for (i = 0; i < NODE_LEN (ident); i++)
				2796	if (name[i] & ~0x7F)
				2797	{
				2798	i += utf8_to_ucn (buffer, name + i) - 1;
				2799	buffer += 10;
				2800	}
				2801	else
				2802	*buffer++ = name[i];
				2803
				2804	return buffer;
				2805	}
				2806
Neil Booth	3fef5b2	2000-05-08 22:22:49 +0000	[diff] [blame]	2807	/* Write the spelling of a token TOKEN to BUFFER. The buffer must
Zack Weinberg	cf00a88	2000-07-08 02:33:00 +0000	[diff] [blame]	2808	already contain the enough space to hold the token's spelling.
Zack Weinberg	6cf87ca	2003-06-17 06:17:44 +0000	[diff] [blame]	2809	Returns a pointer to the character after the last character written.
Geoffrey Keating	47e2049	2005-03-12 10:44:06 +0000	[diff] [blame]	2810	FORSTRING is true if this is to be the spelling after translation
Joseph Myers	be5ffc5	2014-11-06 21:08:52 +0000	[diff] [blame]	2811	phase 1 (with the original spelling of extended identifiers), false
				2812	if extended identifiers should always be written using UCNs (there is
				2813	no option for always writing them in the internal UTF-8 form).
Zack Weinberg	6cf87ca	2003-06-17 06:17:44 +0000	[diff] [blame]	2814	FIXME: Would be nice if we didn't need the PFILE argument. */
Neil Booth	93c80368	2000-10-28 17:59:06 +0000	[diff] [blame]	2815	unsigned char *
Zack Weinberg	6cf87ca	2003-06-17 06:17:44 +0000	[diff] [blame]	2816	cpp_spell_token (cpp_reader pfile, const cpp_token token,
Geoffrey Keating	47e2049	2005-03-12 10:44:06 +0000	[diff] [blame]	2817	unsigned char *buffer, bool forstring)
Neil Booth	3fef5b2	2000-05-08 22:22:49 +0000	[diff] [blame]	2818	{
Zack Weinberg	96be699	2000-07-18 23:25:06 +0000	[diff] [blame]	2819	switch (TOKEN_SPELL (token))
Neil Booth	3fef5b2	2000-05-08 22:22:49 +0000	[diff] [blame]	2820	{
Neil Booth	5d7ee2f	2000-05-10 09:39:18 +0000	[diff] [blame]	2821	case SPELL_OPERATOR:
Neil Booth	3fef5b2	2000-05-08 22:22:49 +0000	[diff] [blame]	2822	{
				2823	const unsigned char *spelling;
				2824	unsigned char c;
				2825
				2826	if (token->flags & DIGRAPH)
Manuel López-Ibáñez	cfc9353	2009-04-22 15:32:18 +0000	[diff] [blame]	2827	spelling = cpp_digraph2name (token->type);
Zack Weinberg	92936ec	2000-07-19 20:18:08 +0000	[diff] [blame]	2828	else if (token->flags & NAMED_OP)
				2829	goto spell_ident;
Neil Booth	3fef5b2	2000-05-08 22:22:49 +0000	[diff] [blame]	2830	else
Zack Weinberg	96be699	2000-07-18 23:25:06 +0000	[diff] [blame]	2831	spelling = TOKEN_NAME (token);
Kazu Hirata	df38348	2002-05-22 22:02:16 +0000	[diff] [blame]	2832
Neil Booth	3fef5b2	2000-05-08 22:22:49 +0000	[diff] [blame]	2833	while ((c = *spelling++) != '\0')
				2834	*buffer++ = c;
				2835	}
				2836	break;
				2837
Zack Weinberg	47ad413	2001-10-06 23:11:27 +0000	[diff] [blame]	2838	spell_ident:
Neil Booth	5d7ee2f	2000-05-10 09:39:18 +0000	[diff] [blame]	2839	case SPELL_IDENT:
Geoffrey Keating	47e2049	2005-03-12 10:44:06 +0000	[diff] [blame]	2840	if (forstring)
				2841	{
Joseph Myers	be5ffc5	2014-11-06 21:08:52 +0000	[diff] [blame]	2842	memcpy (buffer, NODE_NAME (token->val.node.spelling),
				2843	NODE_LEN (token->val.node.spelling));
				2844	buffer += NODE_LEN (token->val.node.spelling);
Geoffrey Keating	47e2049	2005-03-12 10:44:06 +0000	[diff] [blame]	2845	}
				2846	else
Joseph Myers	be5ffc5	2014-11-06 21:08:52 +0000	[diff] [blame]	2847	buffer = _cpp_spell_ident_ucns (buffer, token->val.node.node);
Neil Booth	5d7ee2f	2000-05-10 09:39:18 +0000	[diff] [blame]	2848	break;
Neil Booth	3fef5b2	2000-05-08 22:22:49 +0000	[diff] [blame]	2849
Neil Booth	6338b35	2003-04-23 22:44:06 +0000	[diff] [blame]	2850	case SPELL_LITERAL:
Zack Weinberg	47ad413	2001-10-06 23:11:27 +0000	[diff] [blame]	2851	memcpy (buffer, token->val.str.text, token->val.str.len);
				2852	buffer += token->val.str.len;
				2853	break;
				2854
Neil Booth	3fef5b2	2000-05-08 22:22:49 +0000	[diff] [blame]	2855	case SPELL_NONE:
John David Anglin	0527bc4	2003-11-01 22:56:54 +0000	[diff] [blame]	2856	cpp_error (pfile, CPP_DL_ICE,
				2857	"unspellable token %s", TOKEN_NAME (token));
Neil Booth	3fef5b2	2000-05-08 22:22:49 +0000	[diff] [blame]	2858	break;
				2859	}
				2860
				2861	return buffer;
				2862	}
				2863
Neil Booth	5d8ebbd	2002-01-03 21:43:09 +0000	[diff] [blame]	2864	/* Returns TOKEN spelt as a null-terminated string. The string is
				2865	freed when the reader is destroyed. Useful for diagnostics. */
Neil Booth	93c80368	2000-10-28 17:59:06 +0000	[diff] [blame]	2866	unsigned char *
Zack Weinberg	6cf87ca	2003-06-17 06:17:44 +0000	[diff] [blame]	2867	cpp_token_as_text (cpp_reader pfile, const cpp_token token)
Neil Booth	5932565	2003-04-24 20:03:57 +0000	[diff] [blame]	2868	{
				2869	unsigned int len = cpp_token_len (token) + 1;
Neil Booth	ece54d5	2001-09-28 09:40:22 +0000	[diff] [blame]	2870	unsigned char start = _cpp_unaligned_alloc (pfile, len), end;
Zack Weinberg	041c319	2000-07-04 01:58:21 +0000	[diff] [blame]	2871
Geoffrey Keating	47e2049	2005-03-12 10:44:06 +0000	[diff] [blame]	2872	end = cpp_spell_token (pfile, token, start, false);
Neil Booth	93c80368	2000-10-28 17:59:06 +0000	[diff] [blame]	2873	end[0] = '\0';
Zack Weinberg	041c319	2000-07-04 01:58:21 +0000	[diff] [blame]	2874
Neil Booth	93c80368	2000-10-28 17:59:06 +0000	[diff] [blame]	2875	return start;
Zack Weinberg	041c319	2000-07-04 01:58:21 +0000	[diff] [blame]	2876	}
				2877
Manuel López-Ibáñez	cfc9353	2009-04-22 15:32:18 +0000	[diff] [blame]	2878	/* Returns a pointer to a string which spells the token defined by
				2879	TYPE and FLAGS. Used by C front ends, which really should move to
				2880	using cpp_token_as_text. */
Neil Booth	93c80368	2000-10-28 17:59:06 +0000	[diff] [blame]	2881	const char *
Manuel López-Ibáñez	cfc9353	2009-04-22 15:32:18 +0000	[diff] [blame]	2882	cpp_type2name (enum cpp_ttype type, unsigned char flags)
Zack Weinberg	041c319	2000-07-04 01:58:21 +0000	[diff] [blame]	2883	{
Manuel López-Ibáñez	cfc9353	2009-04-22 15:32:18 +0000	[diff] [blame]	2884	if (flags & DIGRAPH)
				2885	return (const char *) cpp_digraph2name (type);
				2886	else if (flags & NAMED_OP)
				2887	return cpp_named_operator2name (type);
				2888
Neil Booth	93c80368	2000-10-28 17:59:06 +0000	[diff] [blame]	2889	return (const char *) token_spellings[type].name;
Zack Weinberg	041c319	2000-07-04 01:58:21 +0000	[diff] [blame]	2890	}
				2891
Neil Booth	4ed5bcf	2001-09-24 22:53:12 +0000	[diff] [blame]	2892	/* Writes the spelling of token to FP, without any preceding space.
				2893	Separated from cpp_spell_token for efficiency - to avoid stdio
				2894	double-buffering. */
Zack Weinberg	041c319	2000-07-04 01:58:21 +0000	[diff] [blame]	2895	void
Zack Weinberg	6cf87ca	2003-06-17 06:17:44 +0000	[diff] [blame]	2896	cpp_output_token (const cpp_token token, FILE fp)
Zack Weinberg	041c319	2000-07-04 01:58:21 +0000	[diff] [blame]	2897	{
Neil Booth	93c80368	2000-10-28 17:59:06 +0000	[diff] [blame]	2898	switch (TOKEN_SPELL (token))
Zack Weinberg	041c319	2000-07-04 01:58:21 +0000	[diff] [blame]	2899	{
Neil Booth	93c80368	2000-10-28 17:59:06 +0000	[diff] [blame]	2900	case SPELL_OPERATOR:
				2901	{
				2902	const unsigned char *spelling;
Zack Weinberg	3b681e9	2001-09-28 07:00:27 +0000	[diff] [blame]	2903	int c;
Neil Booth	93c80368	2000-10-28 17:59:06 +0000	[diff] [blame]	2904
				2905	if (token->flags & DIGRAPH)
Manuel López-Ibáñez	cfc9353	2009-04-22 15:32:18 +0000	[diff] [blame]	2906	spelling = cpp_digraph2name (token->type);
Neil Booth	93c80368	2000-10-28 17:59:06 +0000	[diff] [blame]	2907	else if (token->flags & NAMED_OP)
				2908	goto spell_ident;
				2909	else
				2910	spelling = TOKEN_NAME (token);
				2911
Zack Weinberg	3b681e9	2001-09-28 07:00:27 +0000	[diff] [blame]	2912	c = *spelling;
				2913	do
				2914	putc (c, fp);
				2915	while ((c = *++spelling) != '\0');
Neil Booth	93c80368	2000-10-28 17:59:06 +0000	[diff] [blame]	2916	}
				2917	break;
				2918
				2919	spell_ident:
				2920	case SPELL_IDENT:
Geoffrey Keating	47e2049	2005-03-12 10:44:06 +0000	[diff] [blame]	2921	{
				2922	size_t i;
Joseph Myers	9a0c618	2009-05-10 15:27:32 +0100	[diff] [blame]	2923	const unsigned char * name = NODE_NAME (token->val.node.node);
Geoffrey Keating	47e2049	2005-03-12 10:44:06 +0000	[diff] [blame]	2924
Joseph Myers	9a0c618	2009-05-10 15:27:32 +0100	[diff] [blame]	2925	for (i = 0; i < NODE_LEN (token->val.node.node); i++)
Geoffrey Keating	47e2049	2005-03-12 10:44:06 +0000	[diff] [blame]	2926	if (name[i] & ~0x7F)
				2927	{
				2928	unsigned char buffer[10];
				2929	i += utf8_to_ucn (buffer, name + i) - 1;
				2930	fwrite (buffer, 1, 10, fp);
				2931	}
				2932	else
Joseph Myers	9a0c618	2009-05-10 15:27:32 +0100	[diff] [blame]	2933	fputc (NODE_NAME (token->val.node.node)[i], fp);
Geoffrey Keating	47e2049	2005-03-12 10:44:06 +0000	[diff] [blame]	2934	}
				2935	break;
Neil Booth	93c80368	2000-10-28 17:59:06 +0000	[diff] [blame]	2936
Neil Booth	6338b35	2003-04-23 22:44:06 +0000	[diff] [blame]	2937	case SPELL_LITERAL:
Zack Weinberg	47ad413	2001-10-06 23:11:27 +0000	[diff] [blame]	2938	fwrite (token->val.str.text, 1, token->val.str.len, fp);
				2939	break;
				2940
Neil Booth	93c80368	2000-10-28 17:59:06 +0000	[diff] [blame]	2941	case SPELL_NONE:
				2942	/* An error, most probably. */
				2943	break;
Zack Weinberg	041c319	2000-07-04 01:58:21 +0000	[diff] [blame]	2944	}
Zack Weinberg	041c319	2000-07-04 01:58:21 +0000	[diff] [blame]	2945	}
				2946
Neil Booth	93c80368	2000-10-28 17:59:06 +0000	[diff] [blame]	2947	/* Compare two tokens. */
				2948	int
Zack Weinberg	6cf87ca	2003-06-17 06:17:44 +0000	[diff] [blame]	2949	_cpp_equiv_tokens (const cpp_token a, const cpp_token b)
Neil Booth	93c80368	2000-10-28 17:59:06 +0000	[diff] [blame]	2950	{
				2951	if (a->type == b->type && a->flags == b->flags)
				2952	switch (TOKEN_SPELL (a))
				2953	{
				2954	default: /* Keep compiler happy. */
				2955	case SPELL_OPERATOR:
Joseph Myers	9a0c618	2009-05-10 15:27:32 +0100	[diff] [blame]	2956	/* token_no is used to track where multiple consecutive ##
Joseph Myers	aa50850	2009-04-19 18:10:56 +0100	[diff] [blame]	2957	tokens were originally located. */
Joseph Myers	9a0c618	2009-05-10 15:27:32 +0100	[diff] [blame]	2958	return (a->type != CPP_PASTE \|\| a->val.token_no == b->val.token_no);
Neil Booth	93c80368	2000-10-28 17:59:06 +0000	[diff] [blame]	2959	case SPELL_NONE:
Joseph Myers	9a0c618	2009-05-10 15:27:32 +0100	[diff] [blame]	2960	return (a->type != CPP_MACRO_ARG
Joseph Myers	be5ffc5	2014-11-06 21:08:52 +0000	[diff] [blame]	2961	\|\| (a->val.macro_arg.arg_no == b->val.macro_arg.arg_no
				2962	&& a->val.macro_arg.spelling == b->val.macro_arg.spelling));
Neil Booth	93c80368	2000-10-28 17:59:06 +0000	[diff] [blame]	2963	case SPELL_IDENT:
Joseph Myers	be5ffc5	2014-11-06 21:08:52 +0000	[diff] [blame]	2964	return (a->val.node.node == b->val.node.node
				2965	&& a->val.node.spelling == b->val.node.spelling);
Neil Booth	6338b35	2003-04-23 22:44:06 +0000	[diff] [blame]	2966	case SPELL_LITERAL:
Neil Booth	93c80368	2000-10-28 17:59:06 +0000	[diff] [blame]	2967	return (a->val.str.len == b->val.str.len
				2968	&& !memcmp (a->val.str.text, b->val.str.text,
				2969	a->val.str.len));
				2970	}
				2971
				2972	return 0;
				2973	}
				2974
Neil Booth	93c80368	2000-10-28 17:59:06 +0000	[diff] [blame]	2975	/* Returns nonzero if a space should be inserted to avoid an
				2976	accidental token paste for output. For simplicity, it is
				2977	conservative, and occasionally advises a space where one is not
				2978	needed, e.g. "." and ".2". */
Neil Booth	93c80368	2000-10-28 17:59:06 +0000	[diff] [blame]	2979	int
Zack Weinberg	6cf87ca	2003-06-17 06:17:44 +0000	[diff] [blame]	2980	cpp_avoid_paste (cpp_reader pfile, const cpp_token token1,
				2981	const cpp_token *token2)
Zack Weinberg	041c319	2000-07-04 01:58:21 +0000	[diff] [blame]	2982	{
Neil Booth	93c80368	2000-10-28 17:59:06 +0000	[diff] [blame]	2983	enum cpp_ttype a = token1->type, b = token2->type;
				2984	cppchar_t c;
Zack Weinberg	041c319	2000-07-04 01:58:21 +0000	[diff] [blame]	2985
Neil Booth	93c80368	2000-10-28 17:59:06 +0000	[diff] [blame]	2986	if (token1->flags & NAMED_OP)
				2987	a = CPP_NAME;
				2988	if (token2->flags & NAMED_OP)
				2989	b = CPP_NAME;
Zack Weinberg	041c319	2000-07-04 01:58:21 +0000	[diff] [blame]	2990
Neil Booth	93c80368	2000-10-28 17:59:06 +0000	[diff] [blame]	2991	c = EOF;
				2992	if (token2->flags & DIGRAPH)
John David Anglin	37b8524	2001-03-02 01:11:50 +0000	[diff] [blame]	2993	c = digraph_spellings[(int) b - (int) CPP_FIRST_DIGRAPH][0];
Neil Booth	93c80368	2000-10-28 17:59:06 +0000	[diff] [blame]	2994	else if (token_spellings[b].category == SPELL_OPERATOR)
				2995	c = token_spellings[b].name[0];
Zack Weinberg	417f3e3	2000-07-11 23:20:53 +0000	[diff] [blame]	2996
Neil Booth	93c80368	2000-10-28 17:59:06 +0000	[diff] [blame]	2997	/* Quickly get everything that can paste with an '='. */
John David Anglin	37b8524	2001-03-02 01:11:50 +0000	[diff] [blame]	2998	if ((int) a <= (int) CPP_LAST_EQ && c == '=')
Zack Weinberg	041c319	2000-07-04 01:58:21 +0000	[diff] [blame]	2999	return 1;
				3000
Neil Booth	93c80368	2000-10-28 17:59:06 +0000	[diff] [blame]	3001	switch (a)
				3002	{
Steve Ellcey	b52dbbf	2006-08-14 23:13:54 +0000	[diff] [blame]	3003	case CPP_GREATER: return c == '>';
				3004	case CPP_LESS: return c == '<' \|\| c == '%' \|\| c == ':';
Neil Booth	93c80368	2000-10-28 17:59:06 +0000	[diff] [blame]	3005	case CPP_PLUS: return c == '+';
				3006	case CPP_MINUS: return c == '-' \|\| c == '>';
				3007	case CPP_DIV: return c == '/' \|\| c == ''; / Comments. */
				3008	case CPP_MOD: return c == ':' \|\| c == '>';
				3009	case CPP_AND: return c == '&';
				3010	case CPP_OR: return c == '\|';
				3011	case CPP_COLON: return c == ':' \|\| c == '>';
				3012	case CPP_DEREF: return c == '*';
Neil Booth	26ec42e	2001-01-28 11:22:23 +0000	[diff] [blame]	3013	case CPP_DOT: return c == '.' \|\| c == '%' \|\| b == CPP_NUMBER;
Neil Booth	93c80368	2000-10-28 17:59:06 +0000	[diff] [blame]	3014	case CPP_HASH: return c == '#' \|\| c == '%'; /* Digraph form. */
				3015	case CPP_NAME: return ((b == CPP_NUMBER
				3016	&& name_p (pfile, &token2->val.str))
				3017	\|\| b == CPP_NAME
				3018	\|\| b == CPP_CHAR \|\| b == CPP_STRING); /* L */
				3019	case CPP_NUMBER: return (b == CPP_NUMBER \|\| b == CPP_NAME
				3020	\|\| c == '.' \|\| c == '+' \|\| c == '-');
Neil Booth	1613e52	2003-04-20 07:29:23 +0000	[diff] [blame]	3021	/* UCNs */
Neil Booth	1067694	2003-04-22 19:28:00 +0000	[diff] [blame]	3022	case CPP_OTHER: return ((token1->val.str.text[0] == '\\'
				3023	&& b == CPP_NAME)
Neil Booth	1613e52	2003-04-20 07:29:23 +0000	[diff] [blame]	3024	\|\| (CPP_OPTION (pfile, objc)
Neil Booth	1067694	2003-04-22 19:28:00 +0000	[diff] [blame]	3025	&& token1->val.str.text[0] == '@'
Neil Booth	1613e52	2003-04-20 07:29:23 +0000	[diff] [blame]	3026	&& (b == CPP_NAME \|\| b == CPP_STRING)));
Jakub Jelinek	87e356b	2013-07-10 18:40:49 +0200	[diff] [blame]	3027	case CPP_STRING:
				3028	case CPP_WSTRING:
				3029	case CPP_UTF8STRING:
				3030	case CPP_STRING16:
				3031	case CPP_STRING32: return (CPP_OPTION (pfile, user_literals)
				3032	&& (b == CPP_NAME
				3033	\|\| (TOKEN_SPELL (token2) == SPELL_LITERAL
				3034	&& ISIDST (token2->val.str.text[0]))));
				3035
Neil Booth	93c80368	2000-10-28 17:59:06 +0000	[diff] [blame]	3036	default: break;
				3037	}
Zack Weinberg	041c319	2000-07-04 01:58:21 +0000	[diff] [blame]	3038
				3039	return 0;
				3040	}
				3041
Neil Booth	93c80368	2000-10-28 17:59:06 +0000	[diff] [blame]	3042	/* Output all the remaining tokens on the current line, and a newline
Neil Booth	4ed5bcf	2001-09-24 22:53:12 +0000	[diff] [blame]	3043	character, to FP. Leading whitespace is removed. If there are
				3044	macros, special token padding is not performed. */
Neil Booth	93c80368	2000-10-28 17:59:06 +0000	[diff] [blame]	3045	void
Zack Weinberg	6cf87ca	2003-06-17 06:17:44 +0000	[diff] [blame]	3046	cpp_output_line (cpp_reader pfile, FILE fp)
Zack Weinberg	041c319	2000-07-04 01:58:21 +0000	[diff] [blame]	3047	{
Neil Booth	4ed5bcf	2001-09-24 22:53:12 +0000	[diff] [blame]	3048	const cpp_token *token;
Zack Weinberg	041c319	2000-07-04 01:58:21 +0000	[diff] [blame]	3049
Neil Booth	4ed5bcf	2001-09-24 22:53:12 +0000	[diff] [blame]	3050	token = cpp_get_token (pfile);
				3051	while (token->type != CPP_EOF)
Zack Weinberg	6ead1e9	2000-07-31 23:47:19 +0000	[diff] [blame]	3052	{
Neil Booth	4ed5bcf	2001-09-24 22:53:12 +0000	[diff] [blame]	3053	cpp_output_token (token, fp);
				3054	token = cpp_get_token (pfile);
				3055	if (token->flags & PREV_WHITE)
				3056	putc (' ', fp);
Zack Weinberg	6ead1e9	2000-07-31 23:47:19 +0000	[diff] [blame]	3057	}
				3058
Neil Booth	93c80368	2000-10-28 17:59:06 +0000	[diff] [blame]	3059	putc ('\n', fp);
Zack Weinberg	041c319	2000-07-04 01:58:21 +0000	[diff] [blame]	3060	}
				3061
Tom Tromey	5d6342e	2008-05-21 21:52:57 +0000	[diff] [blame]	3062	/* Return a string representation of all the remaining tokens on the
				3063	current line. The result is allocated using xmalloc and must be
				3064	freed by the caller. */
				3065	unsigned char *
				3066	cpp_output_line_to_string (cpp_reader pfile, const unsigned char dir_name)
				3067	{
				3068	const cpp_token *token;
				3069	unsigned int out = dir_name ? ustrlen (dir_name) : 0;
				3070	unsigned int alloced = 120 + out;
				3071	unsigned char result = (unsigned char ) xmalloc (alloced);
				3072
				3073	/* If DIR_NAME is empty, there are no initial contents. */
				3074	if (dir_name)
				3075	{
				3076	sprintf ((char *) result, "#%s ", dir_name);
				3077	out += 2;
				3078	}
				3079
				3080	token = cpp_get_token (pfile);
				3081	while (token->type != CPP_EOF)
				3082	{
				3083	unsigned char *last;
				3084	/* Include room for a possible space and the terminating nul. */
				3085	unsigned int len = cpp_token_len (token) + 2;
				3086
				3087	if (out + len > alloced)
				3088	{
				3089	alloced *= 2;
				3090	if (out + len > alloced)
				3091	alloced = out + len;
				3092	result = (unsigned char *) xrealloc (result, alloced);
				3093	}
				3094
				3095	last = cpp_spell_token (pfile, token, &result[out], 0);
				3096	out = last - result;
				3097
				3098	token = cpp_get_token (pfile);
				3099	if (token->flags & PREV_WHITE)
				3100	result[out++] = ' ';
				3101	}
				3102
				3103	result[out] = '\0';
				3104	return result;
				3105	}
				3106
Neil Booth	1e013d2	2001-09-26 21:44:35 +0000	[diff] [blame]	3107	/* Memory buffers. Changing these three constants can have a dramatic
				3108	effect on performance. The values here are reasonable defaults,
				3109	but might be tuned. If you adjust them, be sure to test across a
				3110	range of uses of cpplib, including heavy nested function-like macro
				3111	expansion. Also check the change in peak memory usage (NJAMD is a
				3112	good tool for this). */
				3113	#define MIN_BUFF_SIZE 8000
Neil Booth	8706281	2001-10-20 09:00:53 +0000	[diff] [blame]	3114	#define BUFF_SIZE_UPPER_BOUND(MIN_SIZE) (MIN_BUFF_SIZE + (MIN_SIZE) * 3 / 2)
Neil Booth	1e013d2	2001-09-26 21:44:35 +0000	[diff] [blame]	3115	#define EXTENDED_BUFF_SIZE(BUFF, MIN_EXTRA) \
				3116	(MIN_EXTRA + ((BUFF)->limit - (BUFF)->cur) * 2)
Neil Booth	93c80368	2000-10-28 17:59:06 +0000	[diff] [blame]	3117
Neil Booth	8706281	2001-10-20 09:00:53 +0000	[diff] [blame]	3118	#if MIN_BUFF_SIZE > BUFF_SIZE_UPPER_BOUND (0)
				3119	#error BUFF_SIZE_UPPER_BOUND must be at least as large as MIN_BUFF_SIZE!
				3120	#endif
				3121
Neil Booth	c9e7a60	2001-09-27 12:59:38 +0000	[diff] [blame]	3122	/* Create a new allocation buffer. Place the control block at the end
				3123	of the buffer, so that buffer overflows will cause immediate chaos. */
Neil Booth	b8af0ca	2001-09-26 17:52:50 +0000	[diff] [blame]	3124	static _cpp_buff *
Zack Weinberg	6cf87ca	2003-06-17 06:17:44 +0000	[diff] [blame]	3125	new_buff (size_t len)
Neil Booth	b8af0ca	2001-09-26 17:52:50 +0000	[diff] [blame]	3126	{
				3127	_cpp_buff *result;
Neil Booth	ece54d5	2001-09-28 09:40:22 +0000	[diff] [blame]	3128	unsigned char *base;
Neil Booth	b8af0ca	2001-09-26 17:52:50 +0000	[diff] [blame]	3129
Neil Booth	1e013d2	2001-09-26 21:44:35 +0000	[diff] [blame]	3130	if (len < MIN_BUFF_SIZE)
				3131	len = MIN_BUFF_SIZE;
Neil Booth	c70f6ed	2002-06-07 06:26:32 +0000	[diff] [blame]	3132	len = CPP_ALIGN (len);
Neil Booth	b8af0ca	2001-09-26 17:52:50 +0000	[diff] [blame]	3133
Jakub Jelinek	1a80db9	2013-02-28 10:58:47 +0100	[diff] [blame]	3134	#ifdef ENABLE_VALGRIND_CHECKING
				3135	/* Valgrind warns about uses of interior pointers, so put _cpp_buff
				3136	struct first. */
				3137	size_t slen = CPP_ALIGN2 (sizeof (_cpp_buff), 2 * DEFAULT_ALIGNMENT);
				3138	base = XNEWVEC (unsigned char, len + slen);
				3139	result = (_cpp_buff *) base;
				3140	base += slen;
				3141	#else
Gabriel Dos Reis	c3f829c	2005-05-28 15:52:48 +0000	[diff] [blame]	3142	base = XNEWVEC (unsigned char, len + sizeof (_cpp_buff));
Neil Booth	b8af0ca	2001-09-26 17:52:50 +0000	[diff] [blame]	3143	result = (_cpp_buff *) (base + len);
Jakub Jelinek	1a80db9	2013-02-28 10:58:47 +0100	[diff] [blame]	3144	#endif
Neil Booth	b8af0ca	2001-09-26 17:52:50 +0000	[diff] [blame]	3145	result->base = base;
				3146	result->cur = base;
				3147	result->limit = base + len;
				3148	result->next = NULL;
				3149	return result;
				3150	}
				3151
				3152	/* Place a chain of unwanted allocation buffers on the free list. */
				3153	void
Zack Weinberg	6cf87ca	2003-06-17 06:17:44 +0000	[diff] [blame]	3154	_cpp_release_buff (cpp_reader pfile, _cpp_buff buff)
Neil Booth	b8af0ca	2001-09-26 17:52:50 +0000	[diff] [blame]	3155	{
				3156	_cpp_buff *end = buff;
				3157
				3158	while (end->next)
				3159	end = end->next;
				3160	end->next = pfile->free_buffs;
				3161	pfile->free_buffs = buff;
				3162	}
				3163
				3164	/* Return a free buffer of size at least MIN_SIZE. */
				3165	_cpp_buff *
Zack Weinberg	6cf87ca	2003-06-17 06:17:44 +0000	[diff] [blame]	3166	_cpp_get_buff (cpp_reader *pfile, size_t min_size)
Neil Booth	b8af0ca	2001-09-26 17:52:50 +0000	[diff] [blame]	3167	{
				3168	_cpp_buff result, *p;
				3169
				3170	for (p = &pfile->free_buffs;; p = &(*p)->next)
				3171	{
Neil Booth	6142088	2001-09-28 13:25:38 +0000	[diff] [blame]	3172	size_t size;
Neil Booth	1e013d2	2001-09-26 21:44:35 +0000	[diff] [blame]	3173
				3174	if (*p == NULL)
Neil Booth	b8af0ca	2001-09-26 17:52:50 +0000	[diff] [blame]	3175	return new_buff (min_size);
Neil Booth	1e013d2	2001-09-26 21:44:35 +0000	[diff] [blame]	3176	result = *p;
				3177	size = result->limit - result->base;
				3178	/* Return a buffer that's big enough, but don't waste one that's
				3179	way too big. */
Richard Earnshaw	34f5271	2001-10-17 16:20:04 +0000	[diff] [blame]	3180	if (size >= min_size && size <= BUFF_SIZE_UPPER_BOUND (min_size))
Neil Booth	b8af0ca	2001-09-26 17:52:50 +0000	[diff] [blame]	3181	break;
				3182	}
				3183
				3184	*p = result->next;
				3185	result->next = NULL;
				3186	result->cur = result->base;
				3187	return result;
				3188	}
				3189
Kazu Hirata	4fe9b91	2001-10-09 06:03:16 +0000	[diff] [blame]	3190	/* Creates a new buffer with enough space to hold the uncommitted
Neil Booth	8c3b269	2001-09-30 10:03:11 +0000	[diff] [blame]	3191	remaining bytes of BUFF, and at least MIN_EXTRA more bytes. Copies
				3192	the excess bytes to the new buffer. Chains the new buffer after
				3193	BUFF, and returns the new buffer. */
Neil Booth	b8af0ca	2001-09-26 17:52:50 +0000	[diff] [blame]	3194	_cpp_buff *
Zack Weinberg	6cf87ca	2003-06-17 06:17:44 +0000	[diff] [blame]	3195	_cpp_append_extend_buff (cpp_reader pfile, _cpp_buff buff, size_t min_extra)
Neil Booth	b8af0ca	2001-09-26 17:52:50 +0000	[diff] [blame]	3196	{
Neil Booth	6142088	2001-09-28 13:25:38 +0000	[diff] [blame]	3197	size_t size = EXTENDED_BUFF_SIZE (buff, min_extra);
Neil Booth	8c3b269	2001-09-30 10:03:11 +0000	[diff] [blame]	3198	_cpp_buff *new_buff = _cpp_get_buff (pfile, size);
Neil Booth	b8af0ca	2001-09-26 17:52:50 +0000	[diff] [blame]	3199
Neil Booth	8c3b269	2001-09-30 10:03:11 +0000	[diff] [blame]	3200	buff->next = new_buff;
				3201	memcpy (new_buff->base, buff->cur, BUFF_ROOM (buff));
				3202	return new_buff;
				3203	}
				3204
Kazu Hirata	4fe9b91	2001-10-09 06:03:16 +0000	[diff] [blame]	3205	/* Creates a new buffer with enough space to hold the uncommitted
Neil Booth	8c3b269	2001-09-30 10:03:11 +0000	[diff] [blame]	3206	remaining bytes of the buffer pointed to by BUFF, and at least
				3207	MIN_EXTRA more bytes. Copies the excess bytes to the new buffer.
				3208	Chains the new buffer before the buffer pointed to by BUFF, and
				3209	updates the pointer to point to the new buffer. */
				3210	void
Zack Weinberg	6cf87ca	2003-06-17 06:17:44 +0000	[diff] [blame]	3211	_cpp_extend_buff (cpp_reader pfile, _cpp_buff *pbuff, size_t min_extra)
Neil Booth	8c3b269	2001-09-30 10:03:11 +0000	[diff] [blame]	3212	{
				3213	_cpp_buff new_buff, old_buff = *pbuff;
				3214	size_t size = EXTENDED_BUFF_SIZE (old_buff, min_extra);
				3215
				3216	new_buff = _cpp_get_buff (pfile, size);
				3217	memcpy (new_buff->base, old_buff->cur, BUFF_ROOM (old_buff));
				3218	new_buff->next = old_buff;
				3219	*pbuff = new_buff;
Neil Booth	b8af0ca	2001-09-26 17:52:50 +0000	[diff] [blame]	3220	}
				3221
				3222	/* Free a chain of buffers starting at BUFF. */
				3223	void
Andreas Jaeger	5671bf2	2003-07-07 21:11:59 +0200	[diff] [blame]	3224	_cpp_free_buff (_cpp_buff *buff)
Neil Booth	b8af0ca	2001-09-26 17:52:50 +0000	[diff] [blame]	3225	{
				3226	_cpp_buff *next;
				3227
				3228	for (; buff; buff = next)
				3229	{
				3230	next = buff->next;
Jakub Jelinek	1a80db9	2013-02-28 10:58:47 +0100	[diff] [blame]	3231	#ifdef ENABLE_VALGRIND_CHECKING
				3232	free (buff);
				3233	#else
Neil Booth	b8af0ca	2001-09-26 17:52:50 +0000	[diff] [blame]	3234	free (buff->base);
Jakub Jelinek	1a80db9	2013-02-28 10:58:47 +0100	[diff] [blame]	3235	#endif
Neil Booth	b8af0ca	2001-09-26 17:52:50 +0000	[diff] [blame]	3236	}
				3237	}
Neil Booth	93c80368	2000-10-28 17:59:06 +0000	[diff] [blame]	3238
Neil Booth	ece54d5	2001-09-28 09:40:22 +0000	[diff] [blame]	3239	/* Allocate permanent, unaligned storage of length LEN. */
				3240	unsigned char *
Zack Weinberg	6cf87ca	2003-06-17 06:17:44 +0000	[diff] [blame]	3241	_cpp_unaligned_alloc (cpp_reader *pfile, size_t len)
Neil Booth	ece54d5	2001-09-28 09:40:22 +0000	[diff] [blame]	3242	{
				3243	_cpp_buff *buff = pfile->u_buff;
				3244	unsigned char *result = buff->cur;
				3245
				3246	if (len > (size_t) (buff->limit - result))
				3247	{
				3248	buff = _cpp_get_buff (pfile, len);
				3249	buff->next = pfile->u_buff;
				3250	pfile->u_buff = buff;
				3251	result = buff->cur;
				3252	}
				3253
				3254	buff->cur = result + len;
				3255	return result;
				3256	}
				3257
Neil Booth	8706281	2001-10-20 09:00:53 +0000	[diff] [blame]	3258	/* Allocate permanent, unaligned storage of length LEN from a_buff.
				3259	That buffer is used for growing allocations when saving macro
				3260	replacement lists in a #define, and when parsing an answer to an
				3261	assertion in #assert, #unassert or #if (and therefore possibly
				3262	whilst expanding macros). It therefore must not be used by any
				3263	code that they might call: specifically the lexer and the guts of
				3264	the macro expander.
				3265
				3266	All existing other uses clearly fit this restriction: storing
				3267	registered pragmas during initialization. */
Neil Booth	93c80368	2000-10-28 17:59:06 +0000	[diff] [blame]	3268	unsigned char *
Zack Weinberg	6cf87ca	2003-06-17 06:17:44 +0000	[diff] [blame]	3269	_cpp_aligned_alloc (cpp_reader *pfile, size_t len)
Neil Booth	93c80368	2000-10-28 17:59:06 +0000	[diff] [blame]	3270	{
Neil Booth	8c3b269	2001-09-30 10:03:11 +0000	[diff] [blame]	3271	_cpp_buff *buff = pfile->a_buff;
				3272	unsigned char *result = buff->cur;
Neil Booth	93c80368	2000-10-28 17:59:06 +0000	[diff] [blame]	3273
Neil Booth	8c3b269	2001-09-30 10:03:11 +0000	[diff] [blame]	3274	if (len > (size_t) (buff->limit - result))
Zack Weinberg	041c319	2000-07-04 01:58:21 +0000	[diff] [blame]	3275	{
Neil Booth	8c3b269	2001-09-30 10:03:11 +0000	[diff] [blame]	3276	buff = _cpp_get_buff (pfile, len);
				3277	buff->next = pfile->a_buff;
				3278	pfile->a_buff = buff;
				3279	result = buff->cur;
Zack Weinberg	041c319	2000-07-04 01:58:21 +0000	[diff] [blame]	3280	}
				3281
Neil Booth	8c3b269	2001-09-30 10:03:11 +0000	[diff] [blame]	3282	buff->cur = result + len;
Neil Booth	93c80368	2000-10-28 17:59:06 +0000	[diff] [blame]	3283	return result;
Zack Weinberg	041c319	2000-07-04 01:58:21 +0000	[diff] [blame]	3284	}
Geoffrey Keating	d804416	2004-06-09 20:10:13 +0000	[diff] [blame]	3285
				3286	/* Say which field of TOK is in use. */
				3287
				3288	enum cpp_token_fld_kind
Jakub Jelinek	c26302d	2013-07-10 18:49:24 +0200	[diff] [blame]	3289	cpp_token_val_index (const cpp_token *tok)
Geoffrey Keating	d804416	2004-06-09 20:10:13 +0000	[diff] [blame]	3290	{
				3291	switch (TOKEN_SPELL (tok))
				3292	{
				3293	case SPELL_IDENT:
				3294	return CPP_TOKEN_FLD_NODE;
				3295	case SPELL_LITERAL:
				3296	return CPP_TOKEN_FLD_STR;
Joseph Myers	aa50850	2009-04-19 18:10:56 +0100	[diff] [blame]	3297	case SPELL_OPERATOR:
				3298	if (tok->type == CPP_PASTE)
Joseph Myers	9a0c618	2009-05-10 15:27:32 +0100	[diff] [blame]	3299	return CPP_TOKEN_FLD_TOKEN_NO;
Joseph Myers	aa50850	2009-04-19 18:10:56 +0100	[diff] [blame]	3300	else
				3301	return CPP_TOKEN_FLD_NONE;
Geoffrey Keating	d804416	2004-06-09 20:10:13 +0000	[diff] [blame]	3302	case SPELL_NONE:
				3303	if (tok->type == CPP_MACRO_ARG)
				3304	return CPP_TOKEN_FLD_ARG_NO;
				3305	else if (tok->type == CPP_PADDING)
				3306	return CPP_TOKEN_FLD_SOURCE;
Zack Weinberg	21b1149	2004-09-09 19:16:56 +0000	[diff] [blame]	3307	else if (tok->type == CPP_PRAGMA)
Richard Henderson	bc4071d	2006-01-04 08:33:38 -0800	[diff] [blame]	3308	return CPP_TOKEN_FLD_PRAGMA;
Geoffrey Keating	d804416	2004-06-09 20:10:13 +0000	[diff] [blame]	3309	/* else fall through */
				3310	default:
				3311	return CPP_TOKEN_FLD_NONE;
				3312	}
				3313	}
Gabriel Charette	e3dfef4	2011-08-22 20:41:07 +0000	[diff] [blame]	3314
				3315	/* All tokens lexed in R after calling this function will be forced to have
				3316	their source_location the same as the location referenced by P, until
				3317	cpp_stop_forcing_token_locations is called for R. */
				3318
				3319	void
				3320	cpp_force_token_locations (cpp_reader r, source_location p)
				3321	{
				3322	r->forced_token_location_p = p;
				3323	}
				3324
				3325	/* Go back to assigning locations naturally for lexed tokens. */
				3326
				3327	void
				3328	cpp_stop_forcing_token_locations (cpp_reader *r)
				3329	{
				3330	r->forced_token_location_p = NULL;
				3331	}