Neil Booth | 1613e52 | 2003-04-20 07:29:23 +0000 | [diff] [blame] | 1 | /* CPP Library - charsets |
| 2 | Copyright (C) 1998, 1999, 2000, 2001, 2002, 2003 |
| 3 | Free Software Foundation, Inc. |
| 4 | |
| 5 | Broken out of c-lex.c Apr 2003, adding valid C99 UCN ranges. |
| 6 | |
| 7 | This program is free software; you can redistribute it and/or modify it |
| 8 | under the terms of the GNU General Public License as published by the |
| 9 | Free Software Foundation; either version 2, or (at your option) any |
| 10 | later version. |
| 11 | |
| 12 | This program is distributed in the hope that it will be useful, |
| 13 | but WITHOUT ANY WARRANTY; without even the implied warranty of |
| 14 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
| 15 | GNU General Public License for more details. |
| 16 | |
| 17 | You should have received a copy of the GNU General Public License |
| 18 | along with this program; if not, write to the Free Software |
| 19 | Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */ |
| 20 | |
| 21 | #include "config.h" |
| 22 | #include "system.h" |
| 23 | #include "coretypes.h" |
| 24 | #include "tm.h" |
| 25 | #include "cpplib.h" |
| 26 | #include "cpphash.h" |
| 27 | |
| 28 | static int ucn_valid_in_identifier PARAMS ((cpp_reader *, cppchar_t)); |
| 29 | |
| 30 | /* [lex.charset]: The character designated by the universal character |
| 31 | name \UNNNNNNNN is that character whose character short name in |
| 32 | ISO/IEC 10646 is NNNNNNNN; the character designated by the |
| 33 | universal character name \uNNNN is that character whose character |
| 34 | short name in ISO/IEC 10646 is 0000NNNN. If the hexadecimal value |
| 35 | for a universal character name is less than 0x20 or in the range |
| 36 | 0x7F-0x9F (inclusive), or if the universal character name |
| 37 | designates a character in the basic source character set, then the |
| 38 | program is ill-formed. |
| 39 | |
| 40 | *PSTR must be preceded by "\u" or "\U"; it is assumed that the |
| 41 | buffer end is delimited by a non-hex digit. Returns zero if UCNs |
| 42 | are not part of the relevant standard, or if the string beginning |
| 43 | at *PSTR doesn't syntactically match the form 'NNNN' or 'NNNNNNNN'. |
| 44 | |
Kazu Hirata | 6356f89 | 2003-06-12 19:01:08 +0000 | [diff] [blame^] | 45 | Otherwise the nonzero value of the UCN, whether valid or invalid, |
Neil Booth | 1613e52 | 2003-04-20 07:29:23 +0000 | [diff] [blame] | 46 | is returned. Diagnostics are emitted for invalid values. PSTR |
| 47 | is updated to point one beyond the UCN, or to the syntactically |
| 48 | invalid character. |
| 49 | |
| 50 | IDENTIFIER_POS is 0 when not in an identifier, 1 for the start of |
| 51 | an identifier, or 2 otherwise. |
| 52 | */ |
| 53 | |
| 54 | cppchar_t |
| 55 | _cpp_valid_ucn (pfile, pstr, identifier_pos) |
| 56 | cpp_reader *pfile; |
| 57 | const uchar **pstr; |
| 58 | int identifier_pos; |
| 59 | { |
| 60 | cppchar_t result, c; |
| 61 | unsigned int length; |
| 62 | const uchar *str = *pstr; |
| 63 | const uchar *base = str - 2; |
| 64 | |
| 65 | /* Only attempt to interpret a UCS for C++ and C99. */ |
| 66 | if (!CPP_OPTION (pfile, cplusplus) && !CPP_OPTION (pfile, c99)) |
| 67 | return 0; |
| 68 | |
| 69 | /* We don't accept UCNs for an EBCDIC target. */ |
| 70 | if (CPP_OPTION (pfile, EBCDIC)) |
| 71 | return 0; |
| 72 | |
| 73 | if (str[-1] == 'u') |
| 74 | length = 4; |
| 75 | else if (str[-1] == 'U') |
| 76 | length = 8; |
| 77 | else |
| 78 | abort(); |
| 79 | |
| 80 | result = 0; |
| 81 | do |
| 82 | { |
| 83 | c = *str; |
| 84 | if (!ISXDIGIT (c)) |
| 85 | break; |
| 86 | str++; |
| 87 | result = (result << 4) + hex_value (c); |
| 88 | } |
| 89 | while (--length); |
| 90 | |
| 91 | *pstr = str; |
| 92 | if (length) |
| 93 | /* We'll error when we try it out as the start of an identifier. */ |
| 94 | cpp_error (pfile, DL_ERROR, "incomplete universal character name %.*s", |
Andreas Jaeger | 0e7a8c4 | 2003-04-21 14:06:12 +0200 | [diff] [blame] | 95 | (int) (str - base), base); |
Neil Booth | 1613e52 | 2003-04-20 07:29:23 +0000 | [diff] [blame] | 96 | /* The standard permits $, @ and ` to be specified as UCNs. We use |
| 97 | hex escapes so that this also works with EBCDIC hosts. */ |
| 98 | else if ((result < 0xa0 |
| 99 | && (result != 0x24 && result != 0x40 && result != 0x60)) |
| 100 | || (result & 0x80000000) |
| 101 | || (result >= 0xD800 && result <= 0xDFFF)) |
| 102 | { |
| 103 | cpp_error (pfile, DL_ERROR, "%.*s is not a valid universal character", |
Andreas Jaeger | 0e7a8c4 | 2003-04-21 14:06:12 +0200 | [diff] [blame] | 104 | (int) (str - base), base); |
Neil Booth | 1613e52 | 2003-04-20 07:29:23 +0000 | [diff] [blame] | 105 | } |
| 106 | else if (identifier_pos) |
| 107 | { |
| 108 | int validity = ucn_valid_in_identifier (pfile, result); |
| 109 | |
| 110 | if (validity == 0) |
| 111 | cpp_error (pfile, DL_ERROR, |
| 112 | "universal character %.*s is not valid in an identifier", |
Andreas Jaeger | 0e7a8c4 | 2003-04-21 14:06:12 +0200 | [diff] [blame] | 113 | (int) (str - base), base); |
Neil Booth | 1613e52 | 2003-04-20 07:29:23 +0000 | [diff] [blame] | 114 | else if (validity == 2 && identifier_pos == 1) |
| 115 | cpp_error (pfile, DL_ERROR, |
| 116 | "universal character %.*s is not valid at the start of an identifier", |
Andreas Jaeger | 0e7a8c4 | 2003-04-21 14:06:12 +0200 | [diff] [blame] | 117 | (int) (str - base), base); |
Neil Booth | 1613e52 | 2003-04-20 07:29:23 +0000 | [diff] [blame] | 118 | } |
| 119 | |
| 120 | if (result == 0) |
| 121 | result = 1; |
| 122 | |
| 123 | return result; |
| 124 | } |
| 125 | |
| 126 | /* Returns 1 if C is valid in an identifier, 2 if C is valid except at |
| 127 | the start of an identifier, and 0 if C is not valid in an |
| 128 | identifier. We assume C has already gone through the checks of |
| 129 | _cpp_valid_ucn. */ |
| 130 | static int |
| 131 | ucn_valid_in_identifier (pfile, c) |
| 132 | cpp_reader *pfile; |
| 133 | cppchar_t c; |
| 134 | { |
| 135 | /* None of the valid chars are outside the Basic Multilingual Plane (the |
| 136 | low 16 bits). */ |
| 137 | if (c > 0xffff) |
| 138 | return 0; |
| 139 | |
| 140 | if (CPP_OPTION (pfile, c99) || !CPP_PEDANTIC (pfile)) |
| 141 | { |
| 142 | /* Latin. */ |
| 143 | if (c == 0x0aa || c == 0x00ba || c == 0x207f || c == 0x1e9b) |
| 144 | return 1; |
| 145 | |
| 146 | /* Greek. */ |
| 147 | if (c == 0x0386) |
| 148 | return 1; |
| 149 | |
| 150 | /* Cyrillic. */ |
| 151 | if (c == 0x040c) |
| 152 | return 1; |
| 153 | |
| 154 | /* Hebrew. */ |
| 155 | if ((c >= 0x05b0 && c <= 0x05b9) |
| 156 | || (c >= 0x05bb && c <= 0x005bd) |
| 157 | || c == 0x05bf |
| 158 | || (c >= 0x05c1 && c <= 0x05c2)) |
| 159 | return 1; |
| 160 | |
| 161 | /* Arabic. */ |
| 162 | if ((c >= 0x06d0 && c <= 0x06dc) |
| 163 | || c == 0x06e8 |
| 164 | || (c >= 0x06ea && c <= 0x06ed)) |
| 165 | return 1; |
| 166 | |
| 167 | /* Devanagari */ |
| 168 | if ((c >= 0x0901 && c <= 0x0903) |
| 169 | || (c >= 0x093e && c <= 0x094d) |
| 170 | || (c >= 0x0950 && c <= 0x0952) |
| 171 | || c == 0x0963) |
| 172 | return 1; |
| 173 | |
| 174 | /* Bengali */ |
| 175 | if ((c >= 0x0981 && c <= 0x0983) |
| 176 | || (c >= 0x09be && c <= 0x09c4) |
| 177 | || (c >= 0x09c7 && c <= 0x09c8) |
| 178 | || (c >= 0x09cb && c <= 0x09cd) |
| 179 | || (c >= 0x09e2 && c <= 0x09e3)) |
| 180 | return 1; |
| 181 | |
| 182 | /* Gurmukhi */ |
| 183 | if (c == 0x0a02 |
| 184 | || (c >= 0x0a3e && c <= 0x0a42) |
| 185 | || (c >= 0x0a47 && c <= 0x0a48) |
| 186 | || (c >= 0x0a4b && c <= 0x0a4d) |
| 187 | || (c == 0x0a74)) |
| 188 | return 1; |
| 189 | |
| 190 | /* Gujarati */ |
| 191 | if ((c >= 0x0a81 && c <= 0x0a83) |
| 192 | || (c >= 0x0abd && c <= 0x0ac5) |
| 193 | || (c >= 0x0ac7 && c <= 0x0ac9) |
| 194 | || (c >= 0x0acb && c <= 0x0acd) |
| 195 | || (c == 0x0ad0)) |
| 196 | return 1; |
| 197 | |
| 198 | /* Oriya */ |
| 199 | if ((c >= 0x0b01 && c <= 0x0b03) |
| 200 | || (c >= 0x0b3e && c <= 0x0b43) |
| 201 | || (c >= 0x0b47 && c <= 0x0b48) |
| 202 | || (c >= 0x0b4b && c <= 0x0b4d)) |
| 203 | return 1; |
| 204 | |
| 205 | /* Tamil */ |
| 206 | if ((c >= 0x0b82 && c <= 0x0b83) |
| 207 | || (c >= 0x0bbe && c <= 0x0bc2) |
| 208 | || (c >= 0x0bc6 && c <= 0x0bc8) |
| 209 | || (c >= 0x0bc8 && c <= 0x0bcd)) |
| 210 | return 1; |
| 211 | |
| 212 | /* Telugu */ |
| 213 | if ((c >= 0x0c01 && c <= 0x0c03) |
| 214 | || (c >= 0x0c3e && c <= 0x0c44) |
| 215 | || (c >= 0x0c46 && c <= 0x0c48) |
| 216 | || (c >= 0x0c4a && c <= 0x0c4d)) |
| 217 | return 1; |
| 218 | |
| 219 | /* Kannada */ |
| 220 | if ((c >= 0x0c82 && c <= 0x0c83) |
| 221 | || (c >= 0x0cbe && c <= 0x0cc4) |
| 222 | || (c >= 0x0cc6 && c <= 0x0cc8) |
| 223 | || (c >= 0x0cca && c <= 0x0ccd) |
| 224 | || c == 0x0cde) |
| 225 | return 1; |
| 226 | |
| 227 | /* Malayalam */ |
| 228 | if ((c >= 0x0d02 && c <= 0x0d03) |
| 229 | || (c >= 0x0d3e && c <= 0x0d43) |
| 230 | || (c >= 0x0d46 && c <= 0x0d48) |
| 231 | || (c >= 0x0d4a && c <= 0x0d4d)) |
| 232 | return 1; |
| 233 | |
| 234 | /* Thai */ |
| 235 | if ((c >= 0x0e01 && c <= 0x0e3a) |
| 236 | || (c >= 0x0e40 && c <= 0x0e5b)) |
| 237 | return 1; |
| 238 | |
| 239 | /* Lao */ |
| 240 | if ((c >= 0x0ead && c <= 0x0eae) |
| 241 | || (c >= 0x0eb0 && c <= 0x0eb9) |
| 242 | || (c >= 0x0ebb && c <= 0x0ebd) |
| 243 | || (c >= 0x0ec0 && c <= 0x0ec4) |
| 244 | || c == 0x0ec6 |
| 245 | || (c >= 0x0ec8 && c <= 0x0ecd) |
| 246 | || (c >= 0x0edc && c <= 0x0ed)) |
| 247 | return 1; |
| 248 | |
| 249 | /* Tibetan. */ |
| 250 | if (c == 0x0f00 |
| 251 | || (c >= 0x0f18 && c <= 0x0f19) |
| 252 | || c == 0x0f35 |
| 253 | || c == 0x0f37 |
| 254 | || c == 0x0f39 |
| 255 | || (c >= 0x0f3e && c <= 0x0f47) |
| 256 | || (c >= 0x0f49 && c <= 0x0f69) |
| 257 | || (c >= 0x0f71 && c <= 0x0f84) |
| 258 | || (c >= 0x0f86 && c <= 0x0f8b) |
| 259 | || (c >= 0x0f90 && c <= 0x0f95) |
| 260 | || c == 0x0f97 |
| 261 | || (c >= 0x0f99 && c <= 0x0fad) |
| 262 | || (c >= 0x0fb1 && c <= 0x0fb7) |
| 263 | || c == 0x0fb9) |
| 264 | return 1; |
| 265 | |
| 266 | /* Katakana */ |
| 267 | if ((c >= 0x30a1 && c <= 0x30f6) |
| 268 | || (c >= 0x30fb && c <= 0x30fc)) |
| 269 | return 1; |
| 270 | |
| 271 | /* CJK Unified Ideographs. */ |
| 272 | if (c >= 0x4e00 && c <= 0x9fa5) |
| 273 | return 1; |
| 274 | |
| 275 | /* Hangul. */ |
| 276 | if (c >= 0xac00 && c <= 0xd7a3) |
| 277 | return 1; |
| 278 | |
| 279 | /* Digits. */ |
| 280 | if ((c >= 0x0660 && c <= 0x0669) |
| 281 | || (c >= 0x06f0 && c <= 0x06f9) |
| 282 | || (c >= 0x0966 && c <= 0x096f) |
| 283 | || (c >= 0x09e6 && c <= 0x09ef) |
| 284 | || (c >= 0x0a66 && c <= 0x0a6f) |
| 285 | || (c >= 0x0ae6 && c <= 0x0aef) |
| 286 | || (c >= 0x0b66 && c <= 0x0b6f) |
| 287 | || (c >= 0x0be7 && c <= 0x0bef) |
| 288 | || (c >= 0x0c66 && c <= 0x0c6f) |
| 289 | || (c >= 0x0ce6 && c <= 0x0cef) |
| 290 | || (c >= 0x0d66 && c <= 0x0d6f) |
| 291 | || (c >= 0x0e50 && c <= 0x0e59) |
| 292 | || (c >= 0x0ed0 && c <= 0x0ed9) |
| 293 | || (c >= 0x0f20 && c <= 0x0f33)) |
| 294 | return 2; |
| 295 | |
| 296 | /* Special characters. */ |
| 297 | if (c == 0x00b5 |
| 298 | || c == 0x00b7 |
| 299 | || (c >= 0x02b0 && c <= 0x02b8) |
| 300 | || c == 0x02bb |
| 301 | || (c >= 0x02bd && c <= 0x02c1) |
| 302 | || (c >= 0x02d0 && c <= 0x02d1) |
| 303 | || (c >= 0x02e0 && c <= 0x02e4) |
| 304 | || c == 0x037a |
| 305 | || c == 0x0559 |
| 306 | || c == 0x093d |
| 307 | || c == 0x0b3d |
| 308 | || c == 0x1fbe |
| 309 | || (c >= 0x203f && c <= 0x2040) |
| 310 | || c == 0x2102 |
| 311 | || c == 0x2107 |
| 312 | || (c >= 0x210a && c <= 0x2113) |
| 313 | || c == 0x2115 |
| 314 | || (c >= 0x2118 && c <= 0x211d) |
| 315 | || c == 0x2124 |
| 316 | || c == 0x2126 |
| 317 | || c == 0x2128 |
| 318 | || (c >= 0x212a && c <= 0x2131) |
| 319 | || (c >= 0x2133 && c <= 0x2138) |
| 320 | || (c >= 0x2160 && c <= 0x2182) |
| 321 | || (c >= 0x3005 && c <= 0x3007) |
| 322 | || (c >= 0x3021 && c <= 0x3029)) |
| 323 | return 1; |
| 324 | } |
| 325 | |
| 326 | if (CPP_OPTION (pfile, cplusplus) || !CPP_PEDANTIC (pfile)) |
| 327 | { |
| 328 | /* Greek. */ |
| 329 | if (c == 0x0384) |
| 330 | return 1; |
| 331 | |
| 332 | /* Cyrillic. */ |
| 333 | if (c == 0x040d) |
| 334 | return 1; |
| 335 | |
| 336 | /* Hebrew. */ |
| 337 | if (c >= 0x05f3 && c <= 0x05f4) |
| 338 | return 1; |
| 339 | |
| 340 | /* Lao. */ |
| 341 | if ((c >= 0x0ead && c <= 0x0eb0) |
| 342 | || (c == 0x0eb2) |
| 343 | || (c == 0x0eb3) |
| 344 | || (c == 0x0ebd) |
| 345 | || (c >= 0x0ec0 && c <= 0x0ec4) |
| 346 | || (c == 0x0ec6)) |
| 347 | return 1; |
| 348 | |
| 349 | /* Hiragana */ |
| 350 | if (c == 0x3094 |
| 351 | || (c >= 0x309d && c <= 0x309e)) |
| 352 | return 1; |
| 353 | |
| 354 | /* Katakana */ |
| 355 | if ((c >= 0x30a1 && c <= 0x30fe)) |
| 356 | return 1; |
| 357 | |
| 358 | /* Hangul */ |
| 359 | if ((c >= 0x1100 && c <= 0x1159) |
| 360 | || (c >= 0x1161 && c <= 0x11a2) |
| 361 | || (c >= 0x11a8 && c <= 0x11f9)) |
| 362 | return 1; |
| 363 | |
| 364 | /* CJK Unified Ideographs */ |
| 365 | if ((c >= 0xf900 && c <= 0xfa2d) |
| 366 | || (c >= 0xfb1f && c <= 0xfb36) |
| 367 | || (c >= 0xfb38 && c <= 0xfb3c) |
| 368 | || (c == 0xfb3e) |
| 369 | || (c >= 0xfb40 && c <= 0xfb41) |
| 370 | || (c >= 0xfb42 && c <= 0xfb44) |
| 371 | || (c >= 0xfb46 && c <= 0xfbb1) |
| 372 | || (c >= 0xfbd3 && c <= 0xfd3f) |
| 373 | || (c >= 0xfd50 && c <= 0xfd8f) |
| 374 | || (c >= 0xfd92 && c <= 0xfdc7) |
| 375 | || (c >= 0xfdf0 && c <= 0xfdfb) |
| 376 | || (c >= 0xfe70 && c <= 0xfe72) |
| 377 | || (c == 0xfe74) |
| 378 | || (c >= 0xfe76 && c <= 0xfefc) |
| 379 | || (c >= 0xff21 && c <= 0xff3a) |
| 380 | || (c >= 0xff41 && c <= 0xff5a) |
| 381 | || (c >= 0xff66 && c <= 0xffbe) |
| 382 | || (c >= 0xffc2 && c <= 0xffc7) |
| 383 | || (c >= 0xffca && c <= 0xffcf) |
| 384 | || (c >= 0xffd2 && c <= 0xffd7) |
| 385 | || (c >= 0xffda && c <= 0xffdc) |
| 386 | || (c >= 0x4e00 && c <= 0x9fa5)) |
| 387 | return 1; |
| 388 | } |
| 389 | |
| 390 | /* Latin */ |
| 391 | if ((c >= 0x00c0 && c <= 0x00d6) |
| 392 | || (c >= 0x00d8 && c <= 0x00f6) |
| 393 | || (c >= 0x00f8 && c <= 0x01f5) |
| 394 | || (c >= 0x01fa && c <= 0x0217) |
| 395 | || (c >= 0x0250 && c <= 0x02a8) |
| 396 | || (c >= 0x1e00 && c <= 0x1e9a) |
| 397 | || (c >= 0x1ea0 && c <= 0x1ef9)) |
| 398 | return 1; |
| 399 | |
| 400 | /* Greek */ |
| 401 | if ((c >= 0x0388 && c <= 0x038a) |
| 402 | || (c == 0x038c) |
| 403 | || (c >= 0x038e && c <= 0x03a1) |
| 404 | || (c >= 0x03a3 && c <= 0x03ce) |
| 405 | || (c >= 0x03d0 && c <= 0x03d6) |
| 406 | || (c == 0x03da) |
| 407 | || (c == 0x03dc) |
| 408 | || (c == 0x03de) |
| 409 | || (c == 0x03e0) |
| 410 | || (c >= 0x03e2 && c <= 0x03f3) |
| 411 | || (c >= 0x1f00 && c <= 0x1f15) |
| 412 | || (c >= 0x1f18 && c <= 0x1f1d) |
| 413 | || (c >= 0x1f20 && c <= 0x1f45) |
| 414 | || (c >= 0x1f48 && c <= 0x1f4d) |
| 415 | || (c >= 0x1f50 && c <= 0x1f57) |
| 416 | || (c == 0x1f59) |
| 417 | || (c == 0x1f5b) |
| 418 | || (c == 0x1f5d) |
| 419 | || (c >= 0x1f5f && c <= 0x1f7d) |
| 420 | || (c >= 0x1f80 && c <= 0x1fb4) |
| 421 | || (c >= 0x1fb6 && c <= 0x1fbc) |
| 422 | || (c >= 0x1fc2 && c <= 0x1fc4) |
| 423 | || (c >= 0x1fc6 && c <= 0x1fcc) |
| 424 | || (c >= 0x1fd0 && c <= 0x1fd3) |
| 425 | || (c >= 0x1fd6 && c <= 0x1fdb) |
| 426 | || (c >= 0x1fe0 && c <= 0x1fec) |
| 427 | || (c >= 0x1ff2 && c <= 0x1ff4) |
| 428 | || (c >= 0x1ff6 && c <= 0x1ffc)) |
| 429 | return 1; |
| 430 | |
| 431 | /* Cyrillic */ |
| 432 | if ((c >= 0x0401 && c <= 0x040c) |
| 433 | || (c >= 0x040f && c <= 0x044f) |
| 434 | || (c >= 0x0451 && c <= 0x045c) |
| 435 | || (c >= 0x045e && c <= 0x0481) |
| 436 | || (c >= 0x0490 && c <= 0x04c4) |
| 437 | || (c >= 0x04c7 && c <= 0x04c8) |
| 438 | || (c >= 0x04cb && c <= 0x04cc) |
| 439 | || (c >= 0x04d0 && c <= 0x04eb) |
| 440 | || (c >= 0x04ee && c <= 0x04f5) |
| 441 | || (c >= 0x04f8 && c <= 0x04f9)) |
| 442 | return 1; |
| 443 | |
| 444 | /* Armenian */ |
| 445 | if ((c >= 0x0531 && c <= 0x0556) |
| 446 | || (c >= 0x0561 && c <= 0x0587)) |
| 447 | return 1; |
| 448 | |
| 449 | /* Hebrew */ |
| 450 | if ((c >= 0x05d0 && c <= 0x05ea) |
| 451 | || (c >= 0x05f0 && c <= 0x05f2)) |
| 452 | return 1; |
| 453 | |
| 454 | /* Arabic */ |
| 455 | if ((c >= 0x0621 && c <= 0x063a) |
| 456 | || (c >= 0x0640 && c <= 0x0652) |
| 457 | || (c >= 0x0670 && c <= 0x06b7) |
| 458 | || (c >= 0x06ba && c <= 0x06be) |
| 459 | || (c >= 0x06c0 && c <= 0x06ce) |
| 460 | || (c >= 0x06e5 && c <= 0x06e7)) |
| 461 | return 1; |
| 462 | |
| 463 | /* Devanagari */ |
| 464 | if ((c >= 0x0905 && c <= 0x0939) |
| 465 | || (c >= 0x0958 && c <= 0x0962)) |
| 466 | return 1; |
| 467 | |
| 468 | /* Bengali */ |
| 469 | if ((c >= 0x0985 && c <= 0x098c) |
| 470 | || (c >= 0x098f && c <= 0x0990) |
| 471 | || (c >= 0x0993 && c <= 0x09a8) |
| 472 | || (c >= 0x09aa && c <= 0x09b0) |
| 473 | || (c == 0x09b2) |
| 474 | || (c >= 0x09b6 && c <= 0x09b9) |
| 475 | || (c >= 0x09dc && c <= 0x09dd) |
| 476 | || (c >= 0x09df && c <= 0x09e1) |
| 477 | || (c >= 0x09f0 && c <= 0x09f1)) |
| 478 | return 1; |
| 479 | |
| 480 | /* Gurmukhi */ |
| 481 | if ((c >= 0x0a05 && c <= 0x0a0a) |
| 482 | || (c >= 0x0a0f && c <= 0x0a10) |
| 483 | || (c >= 0x0a13 && c <= 0x0a28) |
| 484 | || (c >= 0x0a2a && c <= 0x0a30) |
| 485 | || (c >= 0x0a32 && c <= 0x0a33) |
| 486 | || (c >= 0x0a35 && c <= 0x0a36) |
| 487 | || (c >= 0x0a38 && c <= 0x0a39) |
| 488 | || (c >= 0x0a59 && c <= 0x0a5c) |
| 489 | || (c == 0x0a5e)) |
| 490 | return 1; |
| 491 | |
| 492 | /* Gujarati */ |
| 493 | if ((c >= 0x0a85 && c <= 0x0a8b) |
| 494 | || (c == 0x0a8d) |
| 495 | || (c >= 0x0a8f && c <= 0x0a91) |
| 496 | || (c >= 0x0a93 && c <= 0x0aa8) |
| 497 | || (c >= 0x0aaa && c <= 0x0ab0) |
| 498 | || (c >= 0x0ab2 && c <= 0x0ab3) |
| 499 | || (c >= 0x0ab5 && c <= 0x0ab9) |
| 500 | || (c == 0x0ae0)) |
| 501 | return 1; |
| 502 | |
| 503 | /* Oriya */ |
| 504 | if ((c >= 0x0b05 && c <= 0x0b0c) |
| 505 | || (c >= 0x0b0f && c <= 0x0b10) |
| 506 | || (c >= 0x0b13 && c <= 0x0b28) |
| 507 | || (c >= 0x0b2a && c <= 0x0b30) |
| 508 | || (c >= 0x0b32 && c <= 0x0b33) |
| 509 | || (c >= 0x0b36 && c <= 0x0b39) |
| 510 | || (c >= 0x0b5c && c <= 0x0b5d) |
| 511 | || (c >= 0x0b5f && c <= 0x0b61)) |
| 512 | return 1; |
| 513 | |
| 514 | /* Tamil */ |
| 515 | if ((c >= 0x0b85 && c <= 0x0b8a) |
| 516 | || (c >= 0x0b8e && c <= 0x0b90) |
| 517 | || (c >= 0x0b92 && c <= 0x0b95) |
| 518 | || (c >= 0x0b99 && c <= 0x0b9a) |
| 519 | || (c == 0x0b9c) |
| 520 | || (c >= 0x0b9e && c <= 0x0b9f) |
| 521 | || (c >= 0x0ba3 && c <= 0x0ba4) |
| 522 | || (c >= 0x0ba8 && c <= 0x0baa) |
| 523 | || (c >= 0x0bae && c <= 0x0bb5) |
| 524 | || (c >= 0x0bb7 && c <= 0x0bb9)) |
| 525 | return 1; |
| 526 | |
| 527 | /* Telugu */ |
| 528 | if ((c >= 0x0c05 && c <= 0x0c0c) |
| 529 | || (c >= 0x0c0e && c <= 0x0c10) |
| 530 | || (c >= 0x0c12 && c <= 0x0c28) |
| 531 | || (c >= 0x0c2a && c <= 0x0c33) |
| 532 | || (c >= 0x0c35 && c <= 0x0c39) |
| 533 | || (c >= 0x0c60 && c <= 0x0c61)) |
| 534 | return 1; |
| 535 | |
| 536 | /* Kannada */ |
| 537 | if ((c >= 0x0c85 && c <= 0x0c8c) |
| 538 | || (c >= 0x0c8e && c <= 0x0c90) |
| 539 | || (c >= 0x0c92 && c <= 0x0ca8) |
| 540 | || (c >= 0x0caa && c <= 0x0cb3) |
| 541 | || (c >= 0x0cb5 && c <= 0x0cb9) |
| 542 | || (c >= 0x0ce0 && c <= 0x0ce1)) |
| 543 | return 1; |
| 544 | |
| 545 | /* Malayalam */ |
| 546 | if ((c >= 0x0d05 && c <= 0x0d0c) |
| 547 | || (c >= 0x0d0e && c <= 0x0d10) |
| 548 | || (c >= 0x0d12 && c <= 0x0d28) |
| 549 | || (c >= 0x0d2a && c <= 0x0d39) |
| 550 | || (c >= 0x0d60 && c <= 0x0d61)) |
| 551 | return 1; |
| 552 | |
| 553 | /* Thai */ |
| 554 | if ((c >= 0x0e01 && c <= 0x0e30) |
| 555 | || (c >= 0x0e32 && c <= 0x0e33) |
| 556 | || (c >= 0x0e40 && c <= 0x0e46) |
| 557 | || (c >= 0x0e4f && c <= 0x0e5b)) |
| 558 | return 1; |
| 559 | |
| 560 | /* Lao */ |
| 561 | if ((c >= 0x0e81 && c <= 0x0e82) |
| 562 | || (c == 0x0e84) |
| 563 | || (c == 0x0e87) |
| 564 | || (c == 0x0e88) |
| 565 | || (c == 0x0e8a) |
| 566 | || (c == 0x0e8d) |
| 567 | || (c >= 0x0e94 && c <= 0x0e97) |
| 568 | || (c >= 0x0e99 && c <= 0x0e9f) |
| 569 | || (c >= 0x0ea1 && c <= 0x0ea3) |
| 570 | || (c == 0x0ea5) |
| 571 | || (c == 0x0ea7) |
| 572 | || (c == 0x0eaa) |
| 573 | || (c == 0x0eab)) |
| 574 | return 1; |
| 575 | |
| 576 | /* Georgian */ |
| 577 | if ((c >= 0x10a0 && c <= 0x10c5) |
| 578 | || (c >= 0x10d0 && c <= 0x10f6)) |
| 579 | return 1; |
| 580 | |
| 581 | /* Hiragana */ |
| 582 | if ((c >= 0x3041 && c <= 0x3093) |
| 583 | || (c >= 0x309b && c <= 0x309c)) |
| 584 | return 1; |
| 585 | |
| 586 | /* Bopmofo */ |
| 587 | if ((c >= 0x3105 && c <= 0x312c)) |
| 588 | return 1; |
| 589 | |
| 590 | return 0; |
| 591 | } |