143 #ifndef __UTF_OLD_H__ 144 #define __UTF_OLD_H__ 146 #ifndef U_HIDE_DEPRECATED_API 154 #ifdef U_USE_UTF_DEPRECATES 162 typedef int32_t UTextOffset;
194 #define UTF8_ERROR_VALUE_1 0x15 201 #define UTF8_ERROR_VALUE_2 0x9f 209 #define UTF_ERROR_VALUE 0xffff 217 #define UTF_IS_ERROR(c) \ 218 (((c)&0xfffe)==0xfffe || (c)==UTF8_ERROR_VALUE_1 || (c)==UTF8_ERROR_VALUE_2) 225 #define UTF_IS_VALID(c) \ 226 (UTF_IS_UNICODE_CHAR(c) && \ 227 (c)!=UTF8_ERROR_VALUE_1 && (c)!=UTF8_ERROR_VALUE_2) 233 #define UTF_IS_SURROGATE(uchar) (((uchar)&0xfffff800)==0xd800) 240 #define UTF_IS_UNICODE_NONCHAR(c) \ 242 ((uint32_t)(c)<=0xfdef || ((c)&0xfffe)==0xfffe) && \ 243 (uint32_t)(c)<=0x10ffff) 260 #define UTF_IS_UNICODE_CHAR(c) \ 261 ((uint32_t)(c)<0xd800 || \ 262 ((uint32_t)(c)>0xdfff && \ 263 (uint32_t)(c)<=0x10ffff && \ 264 !UTF_IS_UNICODE_NONCHAR(c))) 272 #define UTF8_COUNT_TRAIL_BYTES(leadByte) (utf8_countTrailBytes[(uint8_t)leadByte]) 278 #define UTF8_MASK_LEAD_BYTE(leadByte, countTrailBytes) ((leadByte)&=(1<<(6-(countTrailBytes)))-1) 281 #define UTF8_IS_SINGLE(uchar) (((uchar)&0x80)==0) 283 #define UTF8_IS_LEAD(uchar) ((uint8_t)((uchar)-0xc0)<0x3e) 285 #define UTF8_IS_TRAIL(uchar) (((uchar)&0xc0)==0x80) 288 #define UTF8_NEED_MULTIPLE_UCHAR(c) ((uint32_t)(c)>0x7f) 304 # define UTF8_CHAR_LENGTH(c) \ 305 ((uint32_t)(c)<=0x7f ? 1 : \ 306 ((uint32_t)(c)<=0x7ff ? 2 : \ 307 ((uint32_t)((c)-0x10000)>0xfffff ? 3 : 4) \ 311 # define UTF8_CHAR_LENGTH(c) \ 312 ((uint32_t)(c)<=0x7f ? 1 : \ 313 ((uint32_t)(c)<=0x7ff ? 2 : \ 314 ((uint32_t)(c)<=0xffff ? 3 : \ 315 ((uint32_t)(c)<=0x10ffff ? 4 : \ 316 ((uint32_t)(c)<=0x3ffffff ? 5 : \ 317 ((uint32_t)(c)<=0x7fffffff ? 6 : 3) \ 326 #define UTF8_MAX_CHAR_LENGTH 4 329 #define UTF8_ARRAY_SIZE(size) ((5*(size))/2) 332 #define UTF8_GET_CHAR_UNSAFE(s, i, c) { \ 333 int32_t _utf8_get_char_unsafe_index=(int32_t)(i); \ 334 UTF8_SET_CHAR_START_UNSAFE(s, _utf8_get_char_unsafe_index); \ 335 UTF8_NEXT_CHAR_UNSAFE(s, _utf8_get_char_unsafe_index, c); \ 339 #define UTF8_GET_CHAR_SAFE(s, start, i, length, c, strict) { \ 340 int32_t _utf8_get_char_safe_index=(int32_t)(i); \ 341 UTF8_SET_CHAR_START_SAFE(s, start, _utf8_get_char_safe_index); \ 342 UTF8_NEXT_CHAR_SAFE(s, _utf8_get_char_safe_index, length, c, strict); \ 346 #define UTF8_NEXT_CHAR_UNSAFE(s, i, c) { \ 348 if((uint8_t)((c)-0xc0)<0x35) { \ 349 uint8_t __count=UTF8_COUNT_TRAIL_BYTES(c); \ 350 UTF8_MASK_LEAD_BYTE(c, __count); \ 354 (c)=((c)<<6)|((s)[(i)++]&0x3f); \ 356 (c)=((c)<<6)|((s)[(i)++]&0x3f); \ 358 (c)=((c)<<6)|((s)[(i)++]&0x3f); \ 366 #define UTF8_APPEND_CHAR_UNSAFE(s, i, c) { \ 367 if((uint32_t)(c)<=0x7f) { \ 368 (s)[(i)++]=(uint8_t)(c); \ 370 if((uint32_t)(c)<=0x7ff) { \ 371 (s)[(i)++]=(uint8_t)(((c)>>6)|0xc0); \ 373 if((uint32_t)(c)<=0xffff) { \ 374 (s)[(i)++]=(uint8_t)(((c)>>12)|0xe0); \ 376 (s)[(i)++]=(uint8_t)(((c)>>18)|0xf0); \ 377 (s)[(i)++]=(uint8_t)((((c)>>12)&0x3f)|0x80); \ 379 (s)[(i)++]=(uint8_t)((((c)>>6)&0x3f)|0x80); \ 381 (s)[(i)++]=(uint8_t)(((c)&0x3f)|0x80); \ 386 #define UTF8_FWD_1_UNSAFE(s, i) { \ 387 (i)+=1+UTF8_COUNT_TRAIL_BYTES((s)[i]); \ 391 #define UTF8_FWD_N_UNSAFE(s, i, n) { \ 394 UTF8_FWD_1_UNSAFE(s, i); \ 400 #define UTF8_SET_CHAR_START_UNSAFE(s, i) { \ 401 while(UTF8_IS_TRAIL((s)[i])) { --(i); } \ 405 #define UTF8_NEXT_CHAR_SAFE(s, i, length, c, strict) { \ 408 if(UTF8_IS_LEAD(c)) { \ 409 (c)=utf8_nextCharSafeBody(s, &(i), (int32_t)(length), c, strict); \ 411 (c)=UTF8_ERROR_VALUE_1; \ 417 #define UTF8_APPEND_CHAR_SAFE(s, i, length, c) { \ 418 if((uint32_t)(c)<=0x7f) { \ 419 (s)[(i)++]=(uint8_t)(c); \ 421 (i)=utf8_appendCharSafeBody(s, (int32_t)(i), (int32_t)(length), c, NULL); \ 426 #define UTF8_FWD_1_SAFE(s, i, length) U8_FWD_1(s, i, length) 429 #define UTF8_FWD_N_SAFE(s, i, length, n) U8_FWD_N(s, i, length, n) 432 #define UTF8_SET_CHAR_START_SAFE(s, start, i) U8_SET_CP_START(s, start, i) 435 #define UTF8_PREV_CHAR_UNSAFE(s, i, c) { \ 437 if(UTF8_IS_TRAIL(c)) { \ 438 uint8_t __b, __count=1, __shift=6; \ 445 UTF8_MASK_LEAD_BYTE(__b, __count); \ 446 (c)|=(UChar32)__b<<__shift; \ 449 (c)|=(UChar32)(__b&0x3f)<<__shift; \ 458 #define UTF8_BACK_1_UNSAFE(s, i) { \ 459 while(UTF8_IS_TRAIL((s)[--(i)])) {} \ 463 #define UTF8_BACK_N_UNSAFE(s, i, n) { \ 466 UTF8_BACK_1_UNSAFE(s, i); \ 472 #define UTF8_SET_CHAR_LIMIT_UNSAFE(s, i) { \ 473 UTF8_BACK_1_UNSAFE(s, i); \ 474 UTF8_FWD_1_UNSAFE(s, i); \ 478 #define UTF8_PREV_CHAR_SAFE(s, start, i, c, strict) { \ 482 (c)=utf8_prevCharSafeBody(s, start, &(i), c, strict); \ 484 (c)=UTF8_ERROR_VALUE_1; \ 490 #define UTF8_BACK_1_SAFE(s, start, i) U8_BACK_1(s, start, i) 493 #define UTF8_BACK_N_SAFE(s, start, i, n) U8_BACK_N(s, start, i, n) 496 #define UTF8_SET_CHAR_LIMIT_SAFE(s, start, i, length) U8_SET_CP_LIMIT(s, start, i, length) 501 #define UTF_IS_FIRST_SURROGATE(uchar) (((uchar)&0xfffffc00)==0xd800) 504 #define UTF_IS_SECOND_SURROGATE(uchar) (((uchar)&0xfffffc00)==0xdc00) 507 #define UTF_IS_SURROGATE_FIRST(c) (((c)&0x400)==0) 510 #define UTF_SURROGATE_OFFSET ((0xd800<<10UL)+0xdc00-0x10000) 513 #define UTF16_GET_PAIR_VALUE(first, second) \ 514 (((first)<<10UL)+(second)-UTF_SURROGATE_OFFSET) 517 #define UTF_FIRST_SURROGATE(supplementary) (UChar)(((supplementary)>>10)+0xd7c0) 520 #define UTF_SECOND_SURROGATE(supplementary) (UChar)(((supplementary)&0x3ff)|0xdc00) 523 #define UTF16_LEAD(supplementary) UTF_FIRST_SURROGATE(supplementary) 526 #define UTF16_TRAIL(supplementary) UTF_SECOND_SURROGATE(supplementary) 529 #define UTF16_IS_SINGLE(uchar) !UTF_IS_SURROGATE(uchar) 532 #define UTF16_IS_LEAD(uchar) UTF_IS_FIRST_SURROGATE(uchar) 535 #define UTF16_IS_TRAIL(uchar) UTF_IS_SECOND_SURROGATE(uchar) 538 #define UTF16_NEED_MULTIPLE_UCHAR(c) ((uint32_t)(c)>0xffff) 541 #define UTF16_CHAR_LENGTH(c) ((uint32_t)(c)<=0xffff ? 1 : 2) 544 #define UTF16_MAX_CHAR_LENGTH 2 547 #define UTF16_ARRAY_SIZE(size) (size) 560 #define UTF16_GET_CHAR_UNSAFE(s, i, c) { \ 562 if(UTF_IS_SURROGATE(c)) { \ 563 if(UTF_IS_SURROGATE_FIRST(c)) { \ 564 (c)=UTF16_GET_PAIR_VALUE((c), (s)[(i)+1]); \ 566 (c)=UTF16_GET_PAIR_VALUE((s)[(i)-1], (c)); \ 572 #define UTF16_GET_CHAR_SAFE(s, start, i, length, c, strict) { \ 574 if(UTF_IS_SURROGATE(c)) { \ 576 if(UTF_IS_SURROGATE_FIRST(c)) { \ 577 if((i)+1<(length) && UTF_IS_SECOND_SURROGATE(__c2=(s)[(i)+1])) { \ 578 (c)=UTF16_GET_PAIR_VALUE((c), __c2); \ 582 (c)=UTF_ERROR_VALUE; \ 585 if((i)-1>=(start) && UTF_IS_FIRST_SURROGATE(__c2=(s)[(i)-1])) { \ 586 (c)=UTF16_GET_PAIR_VALUE(__c2, (c)); \ 590 (c)=UTF_ERROR_VALUE; \ 593 } else if((strict) && !UTF_IS_UNICODE_CHAR(c)) { \ 594 (c)=UTF_ERROR_VALUE; \ 599 #define UTF16_NEXT_CHAR_UNSAFE(s, i, c) { \ 601 if(UTF_IS_FIRST_SURROGATE(c)) { \ 602 (c)=UTF16_GET_PAIR_VALUE((c), (s)[(i)++]); \ 607 #define UTF16_APPEND_CHAR_UNSAFE(s, i, c) { \ 608 if((uint32_t)(c)<=0xffff) { \ 609 (s)[(i)++]=(uint16_t)(c); \ 611 (s)[(i)++]=(uint16_t)(((c)>>10)+0xd7c0); \ 612 (s)[(i)++]=(uint16_t)(((c)&0x3ff)|0xdc00); \ 617 #define UTF16_FWD_1_UNSAFE(s, i) { \ 618 if(UTF_IS_FIRST_SURROGATE((s)[(i)++])) { \ 624 #define UTF16_FWD_N_UNSAFE(s, i, n) { \ 627 UTF16_FWD_1_UNSAFE(s, i); \ 633 #define UTF16_SET_CHAR_START_UNSAFE(s, i) { \ 634 if(UTF_IS_SECOND_SURROGATE((s)[i])) { \ 640 #define UTF16_NEXT_CHAR_SAFE(s, i, length, c, strict) { \ 642 if(UTF_IS_FIRST_SURROGATE(c)) { \ 644 if((i)<(length) && UTF_IS_SECOND_SURROGATE(__c2=(s)[(i)])) { \ 646 (c)=UTF16_GET_PAIR_VALUE((c), __c2); \ 650 (c)=UTF_ERROR_VALUE; \ 652 } else if((strict) && !UTF_IS_UNICODE_CHAR(c)) { \ 654 (c)=UTF_ERROR_VALUE; \ 659 #define UTF16_APPEND_CHAR_SAFE(s, i, length, c) { \ 660 if((uint32_t)(c)<=0xffff) { \ 661 (s)[(i)++]=(uint16_t)(c); \ 662 } else if((uint32_t)(c)<=0x10ffff) { \ 663 if((i)+1<(length)) { \ 664 (s)[(i)++]=(uint16_t)(((c)>>10)+0xd7c0); \ 665 (s)[(i)++]=(uint16_t)(((c)&0x3ff)|0xdc00); \ 667 (s)[(i)++]=UTF_ERROR_VALUE; \ 670 (s)[(i)++]=UTF_ERROR_VALUE; \ 675 #define UTF16_FWD_1_SAFE(s, i, length) U16_FWD_1(s, i, length) 678 #define UTF16_FWD_N_SAFE(s, i, length, n) U16_FWD_N(s, i, length, n) 681 #define UTF16_SET_CHAR_START_SAFE(s, start, i) U16_SET_CP_START(s, start, i) 684 #define UTF16_PREV_CHAR_UNSAFE(s, i, c) { \ 686 if(UTF_IS_SECOND_SURROGATE(c)) { \ 687 (c)=UTF16_GET_PAIR_VALUE((s)[--(i)], (c)); \ 692 #define UTF16_BACK_1_UNSAFE(s, i) { \ 693 if(UTF_IS_SECOND_SURROGATE((s)[--(i)])) { \ 699 #define UTF16_BACK_N_UNSAFE(s, i, n) { \ 702 UTF16_BACK_1_UNSAFE(s, i); \ 708 #define UTF16_SET_CHAR_LIMIT_UNSAFE(s, i) { \ 709 if(UTF_IS_FIRST_SURROGATE((s)[(i)-1])) { \ 715 #define UTF16_PREV_CHAR_SAFE(s, start, i, c, strict) { \ 717 if(UTF_IS_SECOND_SURROGATE(c)) { \ 719 if((i)>(start) && UTF_IS_FIRST_SURROGATE(__c2=(s)[(i)-1])) { \ 721 (c)=UTF16_GET_PAIR_VALUE(__c2, (c)); \ 725 (c)=UTF_ERROR_VALUE; \ 727 } else if((strict) && !UTF_IS_UNICODE_CHAR(c)) { \ 729 (c)=UTF_ERROR_VALUE; \ 734 #define UTF16_BACK_1_SAFE(s, start, i) U16_BACK_1(s, start, i) 737 #define UTF16_BACK_N_SAFE(s, start, i, n) U16_BACK_N(s, start, i, n) 740 #define UTF16_SET_CHAR_LIMIT_SAFE(s, start, i, length) U16_SET_CP_LIMIT(s, start, i, length) 760 #define UTF32_IS_SAFE(c, strict) \ 762 (uint32_t)(c)<=0x10ffff : \ 763 UTF_IS_UNICODE_CHAR(c)) 776 #define UTF32_IS_SINGLE(uchar) 1 778 #define UTF32_IS_LEAD(uchar) 0 780 #define UTF32_IS_TRAIL(uchar) 0 785 #define UTF32_NEED_MULTIPLE_UCHAR(c) 0 787 #define UTF32_CHAR_LENGTH(c) 1 789 #define UTF32_MAX_CHAR_LENGTH 1 794 #define UTF32_ARRAY_SIZE(size) (size) 797 #define UTF32_GET_CHAR_UNSAFE(s, i, c) { \ 802 #define UTF32_GET_CHAR_SAFE(s, start, i, length, c, strict) { \ 804 if(!UTF32_IS_SAFE(c, strict)) { \ 805 (c)=UTF_ERROR_VALUE; \ 812 #define UTF32_NEXT_CHAR_UNSAFE(s, i, c) { \ 817 #define UTF32_APPEND_CHAR_UNSAFE(s, i, c) { \ 822 #define UTF32_FWD_1_UNSAFE(s, i) { \ 827 #define UTF32_FWD_N_UNSAFE(s, i, n) { \ 832 #define UTF32_SET_CHAR_START_UNSAFE(s, i) { \ 836 #define UTF32_NEXT_CHAR_SAFE(s, i, length, c, strict) { \ 838 if(!UTF32_IS_SAFE(c, strict)) { \ 839 (c)=UTF_ERROR_VALUE; \ 844 #define UTF32_APPEND_CHAR_SAFE(s, i, length, c) { \ 845 if((uint32_t)(c)<=0x10ffff) { \ 853 #define UTF32_FWD_1_SAFE(s, i, length) { \ 858 #define UTF32_FWD_N_SAFE(s, i, length, n) { \ 859 if(((i)+=(n))>(length)) { \ 865 #define UTF32_SET_CHAR_START_SAFE(s, start, i) { \ 871 #define UTF32_PREV_CHAR_UNSAFE(s, i, c) { \ 876 #define UTF32_BACK_1_UNSAFE(s, i) { \ 881 #define UTF32_BACK_N_UNSAFE(s, i, n) { \ 886 #define UTF32_SET_CHAR_LIMIT_UNSAFE(s, i) { \ 890 #define UTF32_PREV_CHAR_SAFE(s, start, i, c, strict) { \ 892 if(!UTF32_IS_SAFE(c, strict)) { \ 893 (c)=UTF_ERROR_VALUE; \ 898 #define UTF32_BACK_1_SAFE(s, start, i) { \ 903 #define UTF32_BACK_N_SAFE(s, start, i, n) { \ 911 #define UTF32_SET_CHAR_LIMIT_SAFE(s, i, length) { \ 921 #define UTF_ARRAY_SIZE(size) UTF16_ARRAY_SIZE(size) 924 #define UTF_GET_CHAR_UNSAFE(s, i, c) UTF16_GET_CHAR_UNSAFE(s, i, c) 927 #define UTF_GET_CHAR_SAFE(s, start, i, length, c, strict) UTF16_GET_CHAR_SAFE(s, start, i, length, c, strict) 931 #define UTF_NEXT_CHAR_UNSAFE(s, i, c) UTF16_NEXT_CHAR_UNSAFE(s, i, c) 934 #define UTF_NEXT_CHAR_SAFE(s, i, length, c, strict) UTF16_NEXT_CHAR_SAFE(s, i, length, c, strict) 938 #define UTF_APPEND_CHAR_UNSAFE(s, i, c) UTF16_APPEND_CHAR_UNSAFE(s, i, c) 941 #define UTF_APPEND_CHAR_SAFE(s, i, length, c) UTF16_APPEND_CHAR_SAFE(s, i, length, c) 945 #define UTF_FWD_1_UNSAFE(s, i) UTF16_FWD_1_UNSAFE(s, i) 948 #define UTF_FWD_1_SAFE(s, i, length) UTF16_FWD_1_SAFE(s, i, length) 952 #define UTF_FWD_N_UNSAFE(s, i, n) UTF16_FWD_N_UNSAFE(s, i, n) 955 #define UTF_FWD_N_SAFE(s, i, length, n) UTF16_FWD_N_SAFE(s, i, length, n) 959 #define UTF_SET_CHAR_START_UNSAFE(s, i) UTF16_SET_CHAR_START_UNSAFE(s, i) 962 #define UTF_SET_CHAR_START_SAFE(s, start, i) UTF16_SET_CHAR_START_SAFE(s, start, i) 966 #define UTF_PREV_CHAR_UNSAFE(s, i, c) UTF16_PREV_CHAR_UNSAFE(s, i, c) 969 #define UTF_PREV_CHAR_SAFE(s, start, i, c, strict) UTF16_PREV_CHAR_SAFE(s, start, i, c, strict) 973 #define UTF_BACK_1_UNSAFE(s, i) UTF16_BACK_1_UNSAFE(s, i) 976 #define UTF_BACK_1_SAFE(s, start, i) UTF16_BACK_1_SAFE(s, start, i) 980 #define UTF_BACK_N_UNSAFE(s, i, n) UTF16_BACK_N_UNSAFE(s, i, n) 983 #define UTF_BACK_N_SAFE(s, start, i, n) UTF16_BACK_N_SAFE(s, start, i, n) 987 #define UTF_SET_CHAR_LIMIT_UNSAFE(s, i) UTF16_SET_CHAR_LIMIT_UNSAFE(s, i) 990 #define UTF_SET_CHAR_LIMIT_SAFE(s, start, i, length) UTF16_SET_CHAR_LIMIT_SAFE(s, start, i, length) 999 #define UTF_IS_SINGLE(uchar) U16_IS_SINGLE(uchar) 1006 #define UTF_IS_LEAD(uchar) U16_IS_LEAD(uchar) 1013 #define UTF_IS_TRAIL(uchar) U16_IS_TRAIL(uchar) 1020 #define UTF_NEED_MULTIPLE_UCHAR(c) UTF16_NEED_MULTIPLE_UCHAR(c) 1027 #define UTF_CHAR_LENGTH(c) U16_LENGTH(c) 1034 #define UTF_MAX_CHAR_LENGTH U16_MAX_LENGTH 1045 #define UTF_GET_CHAR(s, start, i, length, c) U16_GET(s, start, i, length, c) 1058 #define UTF_NEXT_CHAR(s, i, length, c) U16_NEXT(s, i, length, c) 1071 #define UTF_APPEND_CHAR(s, i, length, c) UTF16_APPEND_CHAR_SAFE(s, i, length, c) 1082 #define UTF_FWD_1(s, i, length) U16_FWD_1(s, i, length) 1093 #define UTF_FWD_N(s, i, length, n) U16_FWD_N(s, i, length, n) 1109 #define UTF_SET_CHAR_START(s, start, i) U16_SET_CP_START(s, start, i) 1122 #define UTF_PREV_CHAR(s, start, i, c) U16_PREV(s, start, i, c) 1135 #define UTF_BACK_1(s, start, i) U16_BACK_1(s, start, i) 1148 #define UTF_BACK_N(s, start, i, n) U16_BACK_N(s, start, i, n) 1164 #define UTF_SET_CHAR_LIMIT(s, start, i, length) U16_SET_CP_LIMIT(s, start, i, length) C API: 8-bit Unicode handling macros.
C API: Code point macros.
C API: 16-bit Unicode handling macros.