ucnid-2011-1.c: New test.

gcc/testsuite:
	* c-c++-common/cpp/ucnid-2011-1.c: New test.

libcpp:
	* ucnid.tab: Add C11 and C11NOSTART data.
	* makeucnid.c (digit): Rename enum value to N99.
	(C11, N11, all_languages): New enum values.
	(NUM_CODE_POINTS, MAX_CODE_POINT): New macros.
	(flags, decomp, combining_value): Use NUM_CODE_POINTS as array
	size.
	(decomp): Use unsigned int as element type.
	(all_decomp): New array.
	(read_ucnid): Handle C11 and C11NOSTART.  Use MAX_CODE_POINT.
	(read_table): Use MAX_CODE_POINT.  Store all decompositions in
	all_decomp.
	(read_derived): Use MAX_CODE_POINT.
	(write_table): Use NUM_CODE_POINTS.  Print N99, C11 and N11
	flags.  Print whole array variable declaration rather than just
	array contents.
	(char_id_valid, write_context_switch): New functions.
	(main): Call write_context_switch.
	* ucnid.h: Regenerate.
	* include/cpplib.h (struct cpp_options): Add c11_identifiers.
	* init.c (struct lang_flags): Add c11_identifiers.
	(cpp_set_lang): Set c11_identifiers option from selected language.
	* internal.h (struct normalize_state): Document "previous" as
	previous starter character.
	(NORMALIZE_STATE_UPDATE_IDNUM): Take character as argument.
	* charset.c (DIG): Rename enum value to N99.
	(C11, N11): New enum values.
	(struct ucnrange): Give name to struct.  Use short for flags and
	unsigned int for end of range.  Include ucnid.h for whole variable
	declaration.
	(ucn_valid_in_identifier): Allow for characters up to 0x10FFFF.
	Allow for C11 in determining valid characters and valid start
	characters.  Use check_nfc for non-Hangul context-dependent
	checks.  Only store starter characters in nst->previous.
	(_cpp_valid_ucn): Pass new argument to
	NORMALIZE_STATE_UPDATE_IDNUM.
	* lex.c (lex_identifier): Pass new argument to
	NORMALIZE_STATE_UPDATE_IDNUM.  Call NORMALIZE_STATE_UPDATE_IDNUM
	after initial non-UCN part of identifier.
	(lex_number): Pass new argument to NORMALIZE_STATE_UPDATE_IDNUM.

From-SVN: r204886
diff --git a/libcpp/charset.c b/libcpp/charset.c
index ae56c5a..c48e64a 100644
--- a/libcpp/charset.c
+++ b/libcpp/charset.c
@@ -828,29 +828,32 @@
   /* Valid in a C99 identifier?  */
   C99 = 1,
   /* Valid in a C99 identifier, but not as the first character?  */
-  DIG = 2,
+  N99 = 2,
   /* Valid in a C++ identifier?  */
   CXX = 4,
+  /* Valid in a C11/C++11 identifier?  */
+  C11 = 8,
+  /* Valid in a C11/C++11 identifier, but not as the first character?  */
+  N11 = 16,
   /* NFC representation is not valid in an identifier?  */
-  CID = 8,
+  CID = 32,
   /* Might be valid NFC form?  */
-  NFC = 16,
+  NFC = 64,
   /* Might be valid NFKC form?  */
-  NKC = 32,
+  NKC = 128,
   /* Certain preceding characters might make it not valid NFC/NKFC form?  */
-  CTX = 64
+  CTX = 256
 };
 
-static const struct {
+struct ucnrange {
   /* Bitmap of flags above.  */
-  unsigned char flags;
+  unsigned short flags;
   /* Combining class of the character.  */
   unsigned char combine;
   /* Last character in the range described by this entry.  */
-  unsigned short end;
-} ucnranges[] = {
-#include "ucnid.h"
+  unsigned int end;
 };
+#include "ucnid.h"
 
 /* Returns 1 if C is valid in an identifier, 2 if C is valid except at
    the start of an identifier, and 0 if C is not valid in an
@@ -864,8 +867,9 @@
 			 struct normalize_state *nst)
 {
   int mn, mx, md;
+  unsigned short valid_flags, invalid_start_flags;
 
-  if (c > 0xFFFF)
+  if (c > 0x10FFFF)
     return 0;
 
   mn = 0;
@@ -881,15 +885,25 @@
 
   /* When -pedantic, we require the character to have been listed by
      the standard for the current language.  Otherwise, we accept the
-     union of the acceptable sets for C++98 and C99.  */
-  if (! (ucnranges[mn].flags & (C99 | CXX)))
+     union of the acceptable sets for all supported language versions.  */
+  valid_flags = C99 | CXX | C11;
+  if (CPP_PEDANTIC (pfile))
+    {
+      if (CPP_OPTION (pfile, c11_identifiers))
+	valid_flags = C11;
+      else if (CPP_OPTION (pfile, c99))
+	valid_flags = C99;
+      else if (CPP_OPTION (pfile, cplusplus))
+	valid_flags = CXX;
+    }
+  if (! (ucnranges[mn].flags & valid_flags))
       return 0;
-
-  if (CPP_PEDANTIC (pfile)
-      && ((CPP_OPTION (pfile, c99) && !(ucnranges[mn].flags & C99))
-	  || (CPP_OPTION (pfile, cplusplus)
-	      && !(ucnranges[mn].flags & CXX))))
-    return 0;
+  if (CPP_OPTION (pfile, c11_identifiers))
+    invalid_start_flags = N11;
+  else if (CPP_OPTION (pfile, c99))
+    invalid_start_flags = N99;
+  else
+    invalid_start_flags = 0;
 
   /* Update NST.  */
   if (ucnranges[mn].combine != 0 && ucnranges[mn].combine < nst->prev_class)
@@ -899,17 +913,6 @@
       bool safe;
       cppchar_t p = nst->previous;
 
-      /* Easy cases from Bengali, Oriya, Tamil, Jannada, and Malayalam.  */
-      if (c == 0x09BE)
-	safe = p != 0x09C7;  /* Use 09CB instead of 09C7 09BE.  */
-      else if (c == 0x0B3E)
-	safe = p != 0x0B47;  /* Use 0B4B instead of 0B47 0B3E.  */
-      else if (c == 0x0BBE)
-	safe = p != 0x0BC6 && p != 0x0BC7;  /* Use 0BCA/0BCB instead.  */
-      else if (c == 0x0CC2)
-	safe = p != 0x0CC6;  /* Use 0CCA instead of 0CC6 0CC2.  */
-      else if (c == 0x0D3E)
-	safe = p != 0x0D46 && p != 0x0D47;  /* Use 0D4A/0D4B instead.  */
       /* For Hangul, characters in the range AC00-D7A3 are NFC/NFKC,
 	 and are combined algorithmically from a sequence of the form
 	 1100-1112 1161-1175 11A8-11C2
@@ -917,20 +920,19 @@
 	 really a valid character).
 	 Unfortunately, C99 allows (only) the NFC form, but C++ allows
 	 only the combining characters.  */
-      else if (c >= 0x1161 && c <= 0x1175)
+      if (c >= 0x1161 && c <= 0x1175)
 	safe = p < 0x1100 || p > 0x1112;
       else if (c >= 0x11A8 && c <= 0x11C2)
 	safe = (p < 0xAC00 || p > 0xD7A3 || (p - 0xAC00) % 28 != 0);
       else
+	safe = check_nfc (pfile, c, p);
+      if (!safe)
 	{
-	  /* Uh-oh, someone updated ucnid.h without updating this code.  */
-	  cpp_error (pfile, CPP_DL_ICE, "Character %x might not be NFKC", c);
-	  safe = true;
+	  if ((c >= 0x1161 && c <= 0x1175) || (c >= 0x11A8 && c <= 0x11C2))
+	    nst->level = MAX (nst->level, normalized_identifier_C);
+	  else
+	    nst->level = normalized_none;
 	}
-      if (!safe && c < 0x1161)
-	nst->level = normalized_none;
-      else if (!safe)
-	nst->level = MAX (nst->level, normalized_identifier_C);
     }
   else if (ucnranges[mn].flags & NKC)
     ;
@@ -940,11 +942,13 @@
     nst->level = MAX (nst->level, normalized_identifier_C);
   else
     nst->level = normalized_none;
-  nst->previous = c;
+  if (ucnranges[mn].combine == 0)
+    nst->previous = c;
   nst->prev_class = ucnranges[mn].combine;
 
-  /* In C99, UCN digits may not begin identifiers.  */
-  if (CPP_OPTION (pfile, c99) && (ucnranges[mn].flags & DIG))
+  /* In C99, UCN digits may not begin identifiers.  In C11 and C++11,
+     UCN combining characters may not begin identifiers.  */
+  if (ucnranges[mn].flags & invalid_start_flags)
     return 2;
 
   return 1;
@@ -1054,7 +1058,7 @@
 	  CPP_OPTION (pfile, warn_dollars) = 0;
 	  cpp_error (pfile, CPP_DL_PEDWARN, "'$' in identifier or number");
 	}
-      NORMALIZE_STATE_UPDATE_IDNUM (nst);
+      NORMALIZE_STATE_UPDATE_IDNUM (nst, result);
     }
   else if (identifier_pos)
     {