Index: libcpp/ChangeLog
2005-03-12 Geoffrey Keating <geoffk@apple.com>
* directives.c (glue_header_name): Update call to cpp_spell_token.
* internal.h (_cpp_interpret_identifier): New.
* charset.c (_cpp_interpret_identifier): New.
(_cpp_valid_ucn): Allow UCN version of '$'.
* lex.c (lex_identifier): Add extra parameter to indicate if initial
character was '$' or '\'. Support identifiers with UCNs.
(forms_identifier_p): Allow UCNs.
(_cpp_lex_direct): Pass extra parameter to lex_identifier.
(utf8_to_ucn): New.
(cpp_spell_token): Add FORSTRING parameter. Use it.
(cpp_token_as_text): Update call to cpp_spell_token.
(cpp_output_token): Write UCNs back out.
(stringify_arg): Update call to cpp_spell_token.
(paste_tokens): Likewise.
(cpp_macro_definition): Likewise.
* macro.c (stringify_arg): Likewise.
(paste_tokens): Likewise.
(cpp_macro_definition): Likewise.
* include/cpplib.h: Add parameter to cpp_spell_token.
Index: gcc/ChangeLog
2005-03-12 Geoffrey Keating <geoffk@apple.com>
* c-lex.c (c_lex_with_flags): Add parameter to call to
cpp_spell_token.
Index: gcc/testsuite/ChangeLog
2005-03-12 Geoffrey Keating <geoffk@apple.com>
* gcc.dg/ucnid-1.c: New.
* gcc.dg/ucnid-2.c: New.
* gcc.dg/ucnid-3.c: New.
* gcc.dg/ucnid-4.c: New.
* gcc.dg/ucnid-5.c: New.
* gcc.dg/ucnid-6.c: New.
* gcc.dg/cpp/ucnid-1.c: New.
* gcc.dg/cpp/ucnid-2.c: New.
* gcc.dg/cpp/ucnid-3.c: New.
* g++.dg/other/ucnid-1.C: New.
From-SVN: r96333
diff --git a/libcpp/charset.c b/libcpp/charset.c
index 6b6c360..cd25f10 100644
--- a/libcpp/charset.c
+++ b/libcpp/charset.c
@@ -907,6 +907,15 @@
(int) (str - base), base);
result = 1;
}
+ else if (identifier_pos && result == 0x24
+ && CPP_OPTION (pfile, dollars_in_ident))
+ {
+ if (CPP_OPTION (pfile, warn_dollars) && !pfile->state.skipping)
+ {
+ CPP_OPTION (pfile, warn_dollars) = 0;
+ cpp_error (pfile, CPP_DL_PEDWARN, "'$' in identifier or number");
+ }
+ }
else if (identifier_pos)
{
int validity = ucn_valid_in_identifier (pfile, result);
@@ -1414,7 +1423,60 @@
return result;
}
+
+/* Convert an identifier denoted by ID and LEN, which might contain
+ UCN escapes, to the source character set, either UTF-8 or
+ UTF-EBCDIC. Assumes that the identifier is actually a valid identifier. */
+cpp_hashnode *
+_cpp_interpret_identifier (cpp_reader *pfile, const uchar *id, size_t len)
+{
+ /* It turns out that a UCN escape always turns into fewer characters
+ than the escape itself, so we can allocate a temporary in advance. */
+ uchar * buf = alloca (len + 1);
+ uchar * bufp = buf;
+ size_t idp;
+
+ for (idp = 0; idp < len; idp++)
+ if (id[idp] != '\\')
+ *bufp++ = id[idp];
+ else
+ {
+ unsigned length = id[idp+1] == 'u' ? 4 : 8;
+ cppchar_t value = 0;
+ size_t bufleft = len - (bufp - buf);
+ int rval;
+ idp += 2;
+ while (length && idp < len && ISXDIGIT (id[idp]))
+ {
+ value = (value << 4) + hex_value (id[idp]);
+ idp++;
+ length--;
+ }
+ idp--;
+
+ /* Special case for EBCDIC: if the identifier contains
+ a '$' specified using a UCN, translate it to EBCDIC. */
+ if (value == 0x24)
+ {
+ *bufp++ = '$';
+ continue;
+ }
+
+ rval = one_cppchar_to_utf8 (value, &bufp, &bufleft);
+ if (rval)
+ {
+ errno = rval;
+ cpp_errno (pfile, CPP_DL_ERROR,
+ "converting UCN to source character set");
+ break;
+ }
+ }
+
+ return CPP_HASHNODE (ht_lookup (pfile->hash_table,
+ buf, bufp - buf, HT_ALLOC));
+}
+
/* Convert an input buffer (containing the complete contents of one
source file) from INPUT_CHARSET to the source character set. INPUT
points to the input buffer, SIZE is its allocated size, and LEN is