blob: b11e6424ad80c71ae6f30d65a510c6480d1b1c80 [file] [log] [blame]
Neil Booth1613e522003-04-20 07:29:23 +00001/* CPP Library - charsets
2 Copyright (C) 1998, 1999, 2000, 2001, 2002, 2003
3 Free Software Foundation, Inc.
4
5 Broken out of c-lex.c Apr 2003, adding valid C99 UCN ranges.
6
7This program is free software; you can redistribute it and/or modify it
8under the terms of the GNU General Public License as published by the
9Free Software Foundation; either version 2, or (at your option) any
10later version.
11
12This program is distributed in the hope that it will be useful,
13but WITHOUT ANY WARRANTY; without even the implied warranty of
14MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15GNU General Public License for more details.
16
17You should have received a copy of the GNU General Public License
18along with this program; if not, write to the Free Software
19Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
20
21#include "config.h"
22#include "system.h"
23#include "coretypes.h"
24#include "tm.h"
25#include "cpplib.h"
26#include "cpphash.h"
27
28static int ucn_valid_in_identifier PARAMS ((cpp_reader *, cppchar_t));
29
30/* [lex.charset]: The character designated by the universal character
31 name \UNNNNNNNN is that character whose character short name in
32 ISO/IEC 10646 is NNNNNNNN; the character designated by the
33 universal character name \uNNNN is that character whose character
34 short name in ISO/IEC 10646 is 0000NNNN. If the hexadecimal value
35 for a universal character name is less than 0x20 or in the range
36 0x7F-0x9F (inclusive), or if the universal character name
37 designates a character in the basic source character set, then the
38 program is ill-formed.
39
40 *PSTR must be preceded by "\u" or "\U"; it is assumed that the
41 buffer end is delimited by a non-hex digit. Returns zero if UCNs
42 are not part of the relevant standard, or if the string beginning
43 at *PSTR doesn't syntactically match the form 'NNNN' or 'NNNNNNNN'.
44
Kazu Hirata6356f892003-06-12 19:01:08 +000045 Otherwise the nonzero value of the UCN, whether valid or invalid,
Neil Booth1613e522003-04-20 07:29:23 +000046 is returned. Diagnostics are emitted for invalid values. PSTR
47 is updated to point one beyond the UCN, or to the syntactically
48 invalid character.
49
50 IDENTIFIER_POS is 0 when not in an identifier, 1 for the start of
51 an identifier, or 2 otherwise.
52*/
53
54cppchar_t
55_cpp_valid_ucn (pfile, pstr, identifier_pos)
56 cpp_reader *pfile;
57 const uchar **pstr;
58 int identifier_pos;
59{
60 cppchar_t result, c;
61 unsigned int length;
62 const uchar *str = *pstr;
63 const uchar *base = str - 2;
64
65 /* Only attempt to interpret a UCS for C++ and C99. */
66 if (!CPP_OPTION (pfile, cplusplus) && !CPP_OPTION (pfile, c99))
67 return 0;
68
69 /* We don't accept UCNs for an EBCDIC target. */
70 if (CPP_OPTION (pfile, EBCDIC))
71 return 0;
72
73 if (str[-1] == 'u')
74 length = 4;
75 else if (str[-1] == 'U')
76 length = 8;
77 else
78 abort();
79
80 result = 0;
81 do
82 {
83 c = *str;
84 if (!ISXDIGIT (c))
85 break;
86 str++;
87 result = (result << 4) + hex_value (c);
88 }
89 while (--length);
90
91 *pstr = str;
92 if (length)
93 /* We'll error when we try it out as the start of an identifier. */
94 cpp_error (pfile, DL_ERROR, "incomplete universal character name %.*s",
Andreas Jaeger0e7a8c42003-04-21 14:06:12 +020095 (int) (str - base), base);
Neil Booth1613e522003-04-20 07:29:23 +000096 /* The standard permits $, @ and ` to be specified as UCNs. We use
97 hex escapes so that this also works with EBCDIC hosts. */
98 else if ((result < 0xa0
99 && (result != 0x24 && result != 0x40 && result != 0x60))
100 || (result & 0x80000000)
101 || (result >= 0xD800 && result <= 0xDFFF))
102 {
103 cpp_error (pfile, DL_ERROR, "%.*s is not a valid universal character",
Andreas Jaeger0e7a8c42003-04-21 14:06:12 +0200104 (int) (str - base), base);
Neil Booth1613e522003-04-20 07:29:23 +0000105 }
106 else if (identifier_pos)
107 {
108 int validity = ucn_valid_in_identifier (pfile, result);
109
110 if (validity == 0)
111 cpp_error (pfile, DL_ERROR,
112 "universal character %.*s is not valid in an identifier",
Andreas Jaeger0e7a8c42003-04-21 14:06:12 +0200113 (int) (str - base), base);
Neil Booth1613e522003-04-20 07:29:23 +0000114 else if (validity == 2 && identifier_pos == 1)
115 cpp_error (pfile, DL_ERROR,
116 "universal character %.*s is not valid at the start of an identifier",
Andreas Jaeger0e7a8c42003-04-21 14:06:12 +0200117 (int) (str - base), base);
Neil Booth1613e522003-04-20 07:29:23 +0000118 }
119
120 if (result == 0)
121 result = 1;
122
123 return result;
124}
125
126/* Returns 1 if C is valid in an identifier, 2 if C is valid except at
127 the start of an identifier, and 0 if C is not valid in an
128 identifier. We assume C has already gone through the checks of
129 _cpp_valid_ucn. */
130static int
131ucn_valid_in_identifier (pfile, c)
132 cpp_reader *pfile;
133 cppchar_t c;
134{
135 /* None of the valid chars are outside the Basic Multilingual Plane (the
136 low 16 bits). */
137 if (c > 0xffff)
138 return 0;
139
140 if (CPP_OPTION (pfile, c99) || !CPP_PEDANTIC (pfile))
141 {
142 /* Latin. */
143 if (c == 0x0aa || c == 0x00ba || c == 0x207f || c == 0x1e9b)
144 return 1;
145
146 /* Greek. */
147 if (c == 0x0386)
148 return 1;
149
150 /* Cyrillic. */
151 if (c == 0x040c)
152 return 1;
153
154 /* Hebrew. */
155 if ((c >= 0x05b0 && c <= 0x05b9)
156 || (c >= 0x05bb && c <= 0x005bd)
157 || c == 0x05bf
158 || (c >= 0x05c1 && c <= 0x05c2))
159 return 1;
160
161 /* Arabic. */
162 if ((c >= 0x06d0 && c <= 0x06dc)
163 || c == 0x06e8
164 || (c >= 0x06ea && c <= 0x06ed))
165 return 1;
166
167 /* Devanagari */
168 if ((c >= 0x0901 && c <= 0x0903)
169 || (c >= 0x093e && c <= 0x094d)
170 || (c >= 0x0950 && c <= 0x0952)
171 || c == 0x0963)
172 return 1;
173
174 /* Bengali */
175 if ((c >= 0x0981 && c <= 0x0983)
176 || (c >= 0x09be && c <= 0x09c4)
177 || (c >= 0x09c7 && c <= 0x09c8)
178 || (c >= 0x09cb && c <= 0x09cd)
179 || (c >= 0x09e2 && c <= 0x09e3))
180 return 1;
181
182 /* Gurmukhi */
183 if (c == 0x0a02
184 || (c >= 0x0a3e && c <= 0x0a42)
185 || (c >= 0x0a47 && c <= 0x0a48)
186 || (c >= 0x0a4b && c <= 0x0a4d)
187 || (c == 0x0a74))
188 return 1;
189
190 /* Gujarati */
191 if ((c >= 0x0a81 && c <= 0x0a83)
192 || (c >= 0x0abd && c <= 0x0ac5)
193 || (c >= 0x0ac7 && c <= 0x0ac9)
194 || (c >= 0x0acb && c <= 0x0acd)
195 || (c == 0x0ad0))
196 return 1;
197
198 /* Oriya */
199 if ((c >= 0x0b01 && c <= 0x0b03)
200 || (c >= 0x0b3e && c <= 0x0b43)
201 || (c >= 0x0b47 && c <= 0x0b48)
202 || (c >= 0x0b4b && c <= 0x0b4d))
203 return 1;
204
205 /* Tamil */
206 if ((c >= 0x0b82 && c <= 0x0b83)
207 || (c >= 0x0bbe && c <= 0x0bc2)
208 || (c >= 0x0bc6 && c <= 0x0bc8)
209 || (c >= 0x0bc8 && c <= 0x0bcd))
210 return 1;
211
212 /* Telugu */
213 if ((c >= 0x0c01 && c <= 0x0c03)
214 || (c >= 0x0c3e && c <= 0x0c44)
215 || (c >= 0x0c46 && c <= 0x0c48)
216 || (c >= 0x0c4a && c <= 0x0c4d))
217 return 1;
218
219 /* Kannada */
220 if ((c >= 0x0c82 && c <= 0x0c83)
221 || (c >= 0x0cbe && c <= 0x0cc4)
222 || (c >= 0x0cc6 && c <= 0x0cc8)
223 || (c >= 0x0cca && c <= 0x0ccd)
224 || c == 0x0cde)
225 return 1;
226
227 /* Malayalam */
228 if ((c >= 0x0d02 && c <= 0x0d03)
229 || (c >= 0x0d3e && c <= 0x0d43)
230 || (c >= 0x0d46 && c <= 0x0d48)
231 || (c >= 0x0d4a && c <= 0x0d4d))
232 return 1;
233
234 /* Thai */
235 if ((c >= 0x0e01 && c <= 0x0e3a)
236 || (c >= 0x0e40 && c <= 0x0e5b))
237 return 1;
238
239 /* Lao */
240 if ((c >= 0x0ead && c <= 0x0eae)
241 || (c >= 0x0eb0 && c <= 0x0eb9)
242 || (c >= 0x0ebb && c <= 0x0ebd)
243 || (c >= 0x0ec0 && c <= 0x0ec4)
244 || c == 0x0ec6
245 || (c >= 0x0ec8 && c <= 0x0ecd)
246 || (c >= 0x0edc && c <= 0x0ed))
247 return 1;
248
249 /* Tibetan. */
250 if (c == 0x0f00
251 || (c >= 0x0f18 && c <= 0x0f19)
252 || c == 0x0f35
253 || c == 0x0f37
254 || c == 0x0f39
255 || (c >= 0x0f3e && c <= 0x0f47)
256 || (c >= 0x0f49 && c <= 0x0f69)
257 || (c >= 0x0f71 && c <= 0x0f84)
258 || (c >= 0x0f86 && c <= 0x0f8b)
259 || (c >= 0x0f90 && c <= 0x0f95)
260 || c == 0x0f97
261 || (c >= 0x0f99 && c <= 0x0fad)
262 || (c >= 0x0fb1 && c <= 0x0fb7)
263 || c == 0x0fb9)
264 return 1;
265
266 /* Katakana */
267 if ((c >= 0x30a1 && c <= 0x30f6)
268 || (c >= 0x30fb && c <= 0x30fc))
269 return 1;
270
271 /* CJK Unified Ideographs. */
272 if (c >= 0x4e00 && c <= 0x9fa5)
273 return 1;
274
275 /* Hangul. */
276 if (c >= 0xac00 && c <= 0xd7a3)
277 return 1;
278
279 /* Digits. */
280 if ((c >= 0x0660 && c <= 0x0669)
281 || (c >= 0x06f0 && c <= 0x06f9)
282 || (c >= 0x0966 && c <= 0x096f)
283 || (c >= 0x09e6 && c <= 0x09ef)
284 || (c >= 0x0a66 && c <= 0x0a6f)
285 || (c >= 0x0ae6 && c <= 0x0aef)
286 || (c >= 0x0b66 && c <= 0x0b6f)
287 || (c >= 0x0be7 && c <= 0x0bef)
288 || (c >= 0x0c66 && c <= 0x0c6f)
289 || (c >= 0x0ce6 && c <= 0x0cef)
290 || (c >= 0x0d66 && c <= 0x0d6f)
291 || (c >= 0x0e50 && c <= 0x0e59)
292 || (c >= 0x0ed0 && c <= 0x0ed9)
293 || (c >= 0x0f20 && c <= 0x0f33))
294 return 2;
295
296 /* Special characters. */
297 if (c == 0x00b5
298 || c == 0x00b7
299 || (c >= 0x02b0 && c <= 0x02b8)
300 || c == 0x02bb
301 || (c >= 0x02bd && c <= 0x02c1)
302 || (c >= 0x02d0 && c <= 0x02d1)
303 || (c >= 0x02e0 && c <= 0x02e4)
304 || c == 0x037a
305 || c == 0x0559
306 || c == 0x093d
307 || c == 0x0b3d
308 || c == 0x1fbe
309 || (c >= 0x203f && c <= 0x2040)
310 || c == 0x2102
311 || c == 0x2107
312 || (c >= 0x210a && c <= 0x2113)
313 || c == 0x2115
314 || (c >= 0x2118 && c <= 0x211d)
315 || c == 0x2124
316 || c == 0x2126
317 || c == 0x2128
318 || (c >= 0x212a && c <= 0x2131)
319 || (c >= 0x2133 && c <= 0x2138)
320 || (c >= 0x2160 && c <= 0x2182)
321 || (c >= 0x3005 && c <= 0x3007)
322 || (c >= 0x3021 && c <= 0x3029))
323 return 1;
324 }
325
326 if (CPP_OPTION (pfile, cplusplus) || !CPP_PEDANTIC (pfile))
327 {
328 /* Greek. */
329 if (c == 0x0384)
330 return 1;
331
332 /* Cyrillic. */
333 if (c == 0x040d)
334 return 1;
335
336 /* Hebrew. */
337 if (c >= 0x05f3 && c <= 0x05f4)
338 return 1;
339
340 /* Lao. */
341 if ((c >= 0x0ead && c <= 0x0eb0)
342 || (c == 0x0eb2)
343 || (c == 0x0eb3)
344 || (c == 0x0ebd)
345 || (c >= 0x0ec0 && c <= 0x0ec4)
346 || (c == 0x0ec6))
347 return 1;
348
349 /* Hiragana */
350 if (c == 0x3094
351 || (c >= 0x309d && c <= 0x309e))
352 return 1;
353
354 /* Katakana */
355 if ((c >= 0x30a1 && c <= 0x30fe))
356 return 1;
357
358 /* Hangul */
359 if ((c >= 0x1100 && c <= 0x1159)
360 || (c >= 0x1161 && c <= 0x11a2)
361 || (c >= 0x11a8 && c <= 0x11f9))
362 return 1;
363
364 /* CJK Unified Ideographs */
365 if ((c >= 0xf900 && c <= 0xfa2d)
366 || (c >= 0xfb1f && c <= 0xfb36)
367 || (c >= 0xfb38 && c <= 0xfb3c)
368 || (c == 0xfb3e)
369 || (c >= 0xfb40 && c <= 0xfb41)
370 || (c >= 0xfb42 && c <= 0xfb44)
371 || (c >= 0xfb46 && c <= 0xfbb1)
372 || (c >= 0xfbd3 && c <= 0xfd3f)
373 || (c >= 0xfd50 && c <= 0xfd8f)
374 || (c >= 0xfd92 && c <= 0xfdc7)
375 || (c >= 0xfdf0 && c <= 0xfdfb)
376 || (c >= 0xfe70 && c <= 0xfe72)
377 || (c == 0xfe74)
378 || (c >= 0xfe76 && c <= 0xfefc)
379 || (c >= 0xff21 && c <= 0xff3a)
380 || (c >= 0xff41 && c <= 0xff5a)
381 || (c >= 0xff66 && c <= 0xffbe)
382 || (c >= 0xffc2 && c <= 0xffc7)
383 || (c >= 0xffca && c <= 0xffcf)
384 || (c >= 0xffd2 && c <= 0xffd7)
385 || (c >= 0xffda && c <= 0xffdc)
386 || (c >= 0x4e00 && c <= 0x9fa5))
387 return 1;
388 }
389
390 /* Latin */
391 if ((c >= 0x00c0 && c <= 0x00d6)
392 || (c >= 0x00d8 && c <= 0x00f6)
393 || (c >= 0x00f8 && c <= 0x01f5)
394 || (c >= 0x01fa && c <= 0x0217)
395 || (c >= 0x0250 && c <= 0x02a8)
396 || (c >= 0x1e00 && c <= 0x1e9a)
397 || (c >= 0x1ea0 && c <= 0x1ef9))
398 return 1;
399
400 /* Greek */
401 if ((c >= 0x0388 && c <= 0x038a)
402 || (c == 0x038c)
403 || (c >= 0x038e && c <= 0x03a1)
404 || (c >= 0x03a3 && c <= 0x03ce)
405 || (c >= 0x03d0 && c <= 0x03d6)
406 || (c == 0x03da)
407 || (c == 0x03dc)
408 || (c == 0x03de)
409 || (c == 0x03e0)
410 || (c >= 0x03e2 && c <= 0x03f3)
411 || (c >= 0x1f00 && c <= 0x1f15)
412 || (c >= 0x1f18 && c <= 0x1f1d)
413 || (c >= 0x1f20 && c <= 0x1f45)
414 || (c >= 0x1f48 && c <= 0x1f4d)
415 || (c >= 0x1f50 && c <= 0x1f57)
416 || (c == 0x1f59)
417 || (c == 0x1f5b)
418 || (c == 0x1f5d)
419 || (c >= 0x1f5f && c <= 0x1f7d)
420 || (c >= 0x1f80 && c <= 0x1fb4)
421 || (c >= 0x1fb6 && c <= 0x1fbc)
422 || (c >= 0x1fc2 && c <= 0x1fc4)
423 || (c >= 0x1fc6 && c <= 0x1fcc)
424 || (c >= 0x1fd0 && c <= 0x1fd3)
425 || (c >= 0x1fd6 && c <= 0x1fdb)
426 || (c >= 0x1fe0 && c <= 0x1fec)
427 || (c >= 0x1ff2 && c <= 0x1ff4)
428 || (c >= 0x1ff6 && c <= 0x1ffc))
429 return 1;
430
431 /* Cyrillic */
432 if ((c >= 0x0401 && c <= 0x040c)
433 || (c >= 0x040f && c <= 0x044f)
434 || (c >= 0x0451 && c <= 0x045c)
435 || (c >= 0x045e && c <= 0x0481)
436 || (c >= 0x0490 && c <= 0x04c4)
437 || (c >= 0x04c7 && c <= 0x04c8)
438 || (c >= 0x04cb && c <= 0x04cc)
439 || (c >= 0x04d0 && c <= 0x04eb)
440 || (c >= 0x04ee && c <= 0x04f5)
441 || (c >= 0x04f8 && c <= 0x04f9))
442 return 1;
443
444 /* Armenian */
445 if ((c >= 0x0531 && c <= 0x0556)
446 || (c >= 0x0561 && c <= 0x0587))
447 return 1;
448
449 /* Hebrew */
450 if ((c >= 0x05d0 && c <= 0x05ea)
451 || (c >= 0x05f0 && c <= 0x05f2))
452 return 1;
453
454 /* Arabic */
455 if ((c >= 0x0621 && c <= 0x063a)
456 || (c >= 0x0640 && c <= 0x0652)
457 || (c >= 0x0670 && c <= 0x06b7)
458 || (c >= 0x06ba && c <= 0x06be)
459 || (c >= 0x06c0 && c <= 0x06ce)
460 || (c >= 0x06e5 && c <= 0x06e7))
461 return 1;
462
463 /* Devanagari */
464 if ((c >= 0x0905 && c <= 0x0939)
465 || (c >= 0x0958 && c <= 0x0962))
466 return 1;
467
468 /* Bengali */
469 if ((c >= 0x0985 && c <= 0x098c)
470 || (c >= 0x098f && c <= 0x0990)
471 || (c >= 0x0993 && c <= 0x09a8)
472 || (c >= 0x09aa && c <= 0x09b0)
473 || (c == 0x09b2)
474 || (c >= 0x09b6 && c <= 0x09b9)
475 || (c >= 0x09dc && c <= 0x09dd)
476 || (c >= 0x09df && c <= 0x09e1)
477 || (c >= 0x09f0 && c <= 0x09f1))
478 return 1;
479
480 /* Gurmukhi */
481 if ((c >= 0x0a05 && c <= 0x0a0a)
482 || (c >= 0x0a0f && c <= 0x0a10)
483 || (c >= 0x0a13 && c <= 0x0a28)
484 || (c >= 0x0a2a && c <= 0x0a30)
485 || (c >= 0x0a32 && c <= 0x0a33)
486 || (c >= 0x0a35 && c <= 0x0a36)
487 || (c >= 0x0a38 && c <= 0x0a39)
488 || (c >= 0x0a59 && c <= 0x0a5c)
489 || (c == 0x0a5e))
490 return 1;
491
492 /* Gujarati */
493 if ((c >= 0x0a85 && c <= 0x0a8b)
494 || (c == 0x0a8d)
495 || (c >= 0x0a8f && c <= 0x0a91)
496 || (c >= 0x0a93 && c <= 0x0aa8)
497 || (c >= 0x0aaa && c <= 0x0ab0)
498 || (c >= 0x0ab2 && c <= 0x0ab3)
499 || (c >= 0x0ab5 && c <= 0x0ab9)
500 || (c == 0x0ae0))
501 return 1;
502
503 /* Oriya */
504 if ((c >= 0x0b05 && c <= 0x0b0c)
505 || (c >= 0x0b0f && c <= 0x0b10)
506 || (c >= 0x0b13 && c <= 0x0b28)
507 || (c >= 0x0b2a && c <= 0x0b30)
508 || (c >= 0x0b32 && c <= 0x0b33)
509 || (c >= 0x0b36 && c <= 0x0b39)
510 || (c >= 0x0b5c && c <= 0x0b5d)
511 || (c >= 0x0b5f && c <= 0x0b61))
512 return 1;
513
514 /* Tamil */
515 if ((c >= 0x0b85 && c <= 0x0b8a)
516 || (c >= 0x0b8e && c <= 0x0b90)
517 || (c >= 0x0b92 && c <= 0x0b95)
518 || (c >= 0x0b99 && c <= 0x0b9a)
519 || (c == 0x0b9c)
520 || (c >= 0x0b9e && c <= 0x0b9f)
521 || (c >= 0x0ba3 && c <= 0x0ba4)
522 || (c >= 0x0ba8 && c <= 0x0baa)
523 || (c >= 0x0bae && c <= 0x0bb5)
524 || (c >= 0x0bb7 && c <= 0x0bb9))
525 return 1;
526
527 /* Telugu */
528 if ((c >= 0x0c05 && c <= 0x0c0c)
529 || (c >= 0x0c0e && c <= 0x0c10)
530 || (c >= 0x0c12 && c <= 0x0c28)
531 || (c >= 0x0c2a && c <= 0x0c33)
532 || (c >= 0x0c35 && c <= 0x0c39)
533 || (c >= 0x0c60 && c <= 0x0c61))
534 return 1;
535
536 /* Kannada */
537 if ((c >= 0x0c85 && c <= 0x0c8c)
538 || (c >= 0x0c8e && c <= 0x0c90)
539 || (c >= 0x0c92 && c <= 0x0ca8)
540 || (c >= 0x0caa && c <= 0x0cb3)
541 || (c >= 0x0cb5 && c <= 0x0cb9)
542 || (c >= 0x0ce0 && c <= 0x0ce1))
543 return 1;
544
545 /* Malayalam */
546 if ((c >= 0x0d05 && c <= 0x0d0c)
547 || (c >= 0x0d0e && c <= 0x0d10)
548 || (c >= 0x0d12 && c <= 0x0d28)
549 || (c >= 0x0d2a && c <= 0x0d39)
550 || (c >= 0x0d60 && c <= 0x0d61))
551 return 1;
552
553 /* Thai */
554 if ((c >= 0x0e01 && c <= 0x0e30)
555 || (c >= 0x0e32 && c <= 0x0e33)
556 || (c >= 0x0e40 && c <= 0x0e46)
557 || (c >= 0x0e4f && c <= 0x0e5b))
558 return 1;
559
560 /* Lao */
561 if ((c >= 0x0e81 && c <= 0x0e82)
562 || (c == 0x0e84)
563 || (c == 0x0e87)
564 || (c == 0x0e88)
565 || (c == 0x0e8a)
566 || (c == 0x0e8d)
567 || (c >= 0x0e94 && c <= 0x0e97)
568 || (c >= 0x0e99 && c <= 0x0e9f)
569 || (c >= 0x0ea1 && c <= 0x0ea3)
570 || (c == 0x0ea5)
571 || (c == 0x0ea7)
572 || (c == 0x0eaa)
573 || (c == 0x0eab))
574 return 1;
575
576 /* Georgian */
577 if ((c >= 0x10a0 && c <= 0x10c5)
578 || (c >= 0x10d0 && c <= 0x10f6))
579 return 1;
580
581 /* Hiragana */
582 if ((c >= 0x3041 && c <= 0x3093)
583 || (c >= 0x309b && c <= 0x309c))
584 return 1;
585
586 /* Bopmofo */
587 if ((c >= 0x3105 && c <= 0x312c))
588 return 1;
589
590 return 0;
591}