[FEAT MERGE]charset revise

This commit is contained in:
akaError 2023-11-27 08:52:01 +00:00 committed by ob-robot
parent 654e89bbe1
commit e4f7452b2d
50 changed files with 2188803 additions and 6328 deletions

View File

@ -82,8 +82,6 @@ if(OB_BUILD_CLOSE_MODULES)
ob_define(OB_BUILD_TDE_SECURITY ON)
ob_define(OB_BUILD_AUDIT_SECURITY ON)
ob_define(OB_BUILD_LABEL_SECURITY ON)
#
ob_define(OB_BUILD_FULL_CHARSET ON)
# SPM
ob_define(OB_BUILD_SPM ON)
@ -121,10 +119,6 @@ if(OB_BUILD_LABEL_SECURITY)
add_definitions(-DOB_BUILD_LABEL_SECURITY)
endif()
if(OB_BUILD_FULL_CHARSET)
add_definitions(-DOB_BUILD_FULL_CHARSET)
endif()
if(OB_BUILD_SPM)
add_definitions(-DOB_BUILD_SPM)
endif()

View File

@ -85,14 +85,6 @@ if(OB_BUILD_AUDIT_SECURITY)
)
endif()
if(OB_BUILD_ORACLE_XML)
target_include_directories(
oblib_base_base_base INTERFACE
${CMAKE_SOURCE_DIR}/close_modules/charset
${CMAKE_SOURCE_DIR}/close_modules/charset/deps/oblib/src/
)
endif()
if(OB_USE_BABASSL)
target_include_directories(
oblib_base_base_base INTERFACE

View File

@ -10,16 +10,21 @@ ob_set_subtarget(oblib_lib ALONE
)
ob_set_subtarget(oblib_lib charset
charset/ob_ctype_bin_os.cc
charset/ob_ctype_gb18030_os.cc
charset/ob_ctype_gbk_os.cc
charset/ob_ctype_latin1_os.cc
charset/ob_ctype_mb_os.cc
charset/ob_ctype_simple_os.cc
charset/ob_ctype_os.cc
charset/ob_ctype_utf16_os.cc
charset/ob_ctype_utf8_os.cc
charset/ob_dtoa_os.cc
charset/ob_ctype_bin.cc
charset/ob_ctype.cc
charset/ob_ctype_gb18030.cc
charset/ob_ctype_gbk.cc
charset/ob_ctype_latin1.cc
charset/ob_ctype_mb.cc
charset/ob_ctype_simple.cc
charset/ob_ctype_uca.cc
charset/ob_ctype_utf8.cc
charset/ob_ctype_utf16.cc
charset/ob_dtoa.cc
charset/uca900_ja_tbls.cc
charset/uca900_zh_tbls.cc
charset/uca900_zh2_tbls.cc
charset/uca900_zh3_tbls.cc
charset/ob_charset.cpp
)

171
deps/oblib/src/lib/charset/mb_wc.h vendored Normal file
View File

@ -0,0 +1,171 @@
/**
* Copyright (c) 2021 OceanBase
* OceanBase CE is licensed under Mulan PubL v2.
* You can use this software according to the terms and conditions of the Mulan PubL v2.
* You may obtain a copy of Mulan PubL v2 at:
* http://license.coscl.org.cn/MulanPubL-2.0
* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND,
* EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
* MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
* See the Mulan PubL v2 for more details.
*/
#ifndef MB_WC_INCLUDED
#define MB_WC_INCLUDED
/**
@file mb_wc.h
Definitions of mb_wc (multibyte to wide character, ie., effectively
parse a UTF-8 character) functions for UTF-8 (both three- and four-byte).
These are available both as inline functions, as C-style thunks so that they
can fit into MY_CHARSET_HANDLER, and as functors.
The functors exist so that you can specialize a class on them and get them
inlined instead of having to call them through the function pointer in
MY_CHARSET_HANDLER; mb_wc is in itself so cheap (the most common case is
just a single byte load and a predictable compare) that the call overhead
in a tight loop is significant, and these routines tend to take up a lot
of CPU time when sorting. Typically, at the outermost level, you'd simply
compare cs->cset->mb_wc with my_mb_wc_{utf8,utf8mb4}_thunk, and if so,
instantiate your function with the given class. If it doesn't match,
you can use Mb_wc_through_function_pointer, which calls through the
function pointer as usual. (It will cache the function pointer for you,
which is typically faster than looking it up all the time -- the compiler
cannot always figure out on its own that it doesn't change.)
The Mb_wc_* classes should be sent by _value_, not by reference, since
they are never larger than two pointers (and usually simply zero).
*/
#include "lib/charset/ob_ctype.h"
#define ALWAYS_INLINE __attribute__((always_inline)) inline
template <bool RANGE_CHECK, bool SUPPORT_MB4>
static int ob_mb_wc_utf8_prototype(ob_wc_t *pwc, const unsigned char *s,
const unsigned char *e);
/**
Functor that converts a UTF-8 multibyte sequence (up to three bytes)
to a wide character.
*/
struct Mb_wc_utf8 {
Mb_wc_utf8() {}
ALWAYS_INLINE
int operator()(ob_wc_t *pwc, const unsigned char *s, const unsigned char *e) const {
return ob_mb_wc_utf8_prototype</*RANGE_CHECK=*/true, /*SUPPORT_MB4=*/false>(
pwc, s, e);
}
};
/**
Functor that converts a UTF-8 multibyte sequence (up to four bytes)
to a wide character.
*/
struct Mb_wc_utf8mb4 {
Mb_wc_utf8mb4() {}
ALWAYS_INLINE
int operator()(ob_wc_t *pwc, const unsigned char *s, const unsigned char *e) const {
return ob_mb_wc_utf8_prototype</*RANGE_CHECK=*/true, /*SUPPORT_MB4=*/true>(
pwc, s, e);
}
};
/**
Functor that uses a function pointer to convert a multibyte sequence
to a wide character.
*/
class Mb_wc_through_function_pointer {
public:
explicit Mb_wc_through_function_pointer(const ObCharsetInfo *cs)
: m_funcptr(cs->cset->mb_wc), m_cs(cs) {}
int operator()(ob_wc_t *pwc, const unsigned char *s, const unsigned char *e) const {
return m_funcptr(m_cs, pwc, s, e);
}
private:
typedef int (*mbwc_func_t)(const ObCharsetInfo *, ob_wc_t *, const unsigned char *,
const unsigned char *);
const mbwc_func_t m_funcptr;
const ObCharsetInfo *const m_cs;
};
template <bool RANGE_CHECK, bool SUPPORT_MB4>
static ALWAYS_INLINE int ob_mb_wc_utf8_prototype(ob_wc_t *pwc, const unsigned char *s,
const unsigned char *e) {
if (RANGE_CHECK && s >= e) return OB_CS_TOOSMALL;
unsigned char c = s[0];
if (c < 0x80) {
*pwc = c;
return 1;
}
if (c < 0xe0) {
if (c < 0xc2) // Resulting code point would be less than 0x80.
return OB_CS_ILSEQ;
if (RANGE_CHECK && s + 2 > e) return OB_CS_TOOSMALL2;
if ((s[1] & 0xc0) != 0x80) // Next byte must be a continuation byte.
return OB_CS_ILSEQ;
*pwc = ((ob_wc_t)(c & 0x1f) << 6) + (ob_wc_t)(s[1] & 0x3f);
return 2;
}
if (c < 0xf0) {
if (RANGE_CHECK && s + 3 > e) return OB_CS_TOOSMALL3;
// Next two bytes must be continuation bytes.
uint16 two_bytes;
memcpy(&two_bytes, s + 1, sizeof(two_bytes));
if ((two_bytes & 0xc0c0) != 0x8080) // Endianness does not matter.
return OB_CS_ILSEQ;
*pwc = ((ob_wc_t)(c & 0x0f) << 12) + ((ob_wc_t)(s[1] & 0x3f) << 6) +
(ob_wc_t)(s[2] & 0x3f);
if (*pwc < 0x800) return OB_CS_ILSEQ;
/*
According to RFC 3629, UTF-8 should prohibit characters between
U+D800 and U+DFFF, which are reserved for surrogate pairs and do
not directly represent characters.
*/
if (*pwc >= 0xd800 && *pwc <= 0xdfff) return OB_CS_ILSEQ;
return 3;
}
if (SUPPORT_MB4) {
if (RANGE_CHECK && s + 4 > e) /* We need 4 characters */
return OB_CS_TOOSMALL4;
/*
This byte must be of the form 11110xxx, and the next three bytes
must be continuation bytes.
*/
uint32 four_bytes;
memcpy(&four_bytes, s, sizeof(four_bytes));
#ifdef WORDS_BIGENDIAN
if ((four_bytes & 0xf8c0c0c0) != 0xf0808080)
#else
if ((four_bytes & 0xc0c0c0f8) != 0x808080f0)
#endif
return OB_CS_ILSEQ;
*pwc = ((ob_wc_t)(c & 0x07) << 18) + ((ob_wc_t)(s[1] & 0x3f) << 12) +
((ob_wc_t)(s[2] & 0x3f) << 6) + (ob_wc_t)(s[3] & 0x3f);
if (*pwc < 0x10000 || *pwc > 0x10ffff) return OB_CS_ILSEQ;
return 4;
}
return OB_CS_ILSEQ;
}
extern "C" int ob_mb_wc_utf8mb4_thunk(const ObCharsetInfo *cs, ob_wc_t *pwc,
const unsigned char *s, const unsigned char *e);
#endif // MB_WC_INCLUDED

View File

@ -0,0 +1,74 @@
/**
* Copyright (c) 2021 OceanBase
* OceanBase CE is licensed under Mulan PubL v2.
* You can use this software according to the terms and conditions of the Mulan PubL v2.
* You may obtain a copy of Mulan PubL v2 at:
* http://license.coscl.org.cn/MulanPubL-2.0
* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND,
* EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
* MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
* See the Mulan PubL v2 for more details.
*/
#ifndef OB_BYTEORDER_H
#define OB_BYTEORDER_H
#include <stdint.h>
#include "lib/charset/ob_template_helper.h"
#include <netinet/in.h>
/*
Functions for big-endian loads and stores. These are safe to use
no matter what the compiler, CPU or alignment, and also with -fstrict-aliasing.
The stores return a pointer just past the value that was written.
*/
inline uint16_t load16be(const char *ptr) {
uint16_t val;
memcpy(&val, ptr, sizeof(val));
return ntohs(val);
}
inline uint32_t load32be(const char *ptr) {
uint32_t val;
memcpy(&val, ptr, sizeof(val));
return ntohl(val);
}
__attribute__((always_inline)) inline char *store16be(char *ptr, uint16_t val) {
#if defined(_MSC_VER)
// _byteswap_ushort is an intrinsic on MSVC, but htons is not.
val = _byteswap_ushort(val);
#else
val = htons(val);
#endif
memcpy(ptr, &val, sizeof(val));
return ptr + sizeof(val);
}
inline char *store32be(char *ptr, uint32_t val) {
val = htonl(val);
memcpy(ptr, &val, sizeof(val));
return ptr + sizeof(val);
}
// Adapters for using unsigned char * instead of char *.
inline uint16_t load16be(const unsigned char *ptr) {
return load16be(pointer_cast<const char *>(ptr));
}
inline uint32_t load32be(const unsigned char *ptr) {
return load32be(pointer_cast<const char *>(ptr));
}
__attribute__((always_inline)) inline unsigned char *store16be(unsigned char *ptr, uint16_t val) {
return pointer_cast<unsigned char *>(store16be(pointer_cast<char *>(ptr), val));
}
inline unsigned char *store32be(unsigned char *ptr, uint32_t val) {
return pointer_cast<unsigned char *>(store32be(pointer_cast<char *>(ptr), val));
}
#endif // OB_BYTEORDER_H

View File

@ -293,14 +293,9 @@ const ObCollationWrapper ObCharset::collation_wrap_arr_[ObCharset::VALID_COLLATI
{CS_TYPE_GBK_BIN, CHARSET_GBK, CS_TYPE_GBK_BIN, false, true, 1},
{CS_TYPE_UTF16_GENERAL_CI, CHARSET_UTF16, CS_TYPE_UTF16_GENERAL_CI, true, true, 1},
{CS_TYPE_UTF16_BIN, CHARSET_UTF16, CS_TYPE_UTF16_BIN, false, true, 1},
#ifndef OB_BUILD_FULL_CHARSET
{CS_TYPE_INVALID, CHARSET_INVALID, CS_TYPE_INVALID, false, false, 1},
{CS_TYPE_INVALID, CHARSET_INVALID, CS_TYPE_INVALID, false, false, 1},
#else
//{CS_TYPE_UTF8MB4_ZH_0900_AS_CS, CHARSET_UTF8MB4, CS_TYPE_UTF8MB4_ZH_0900_AS_CS, false, true, 0},
{CS_TYPE_UTF8MB4_UNICODE_CI, CHARSET_UTF8MB4, CS_TYPE_UTF8MB4_UNICODE_CI, false, true, 1},
{CS_TYPE_UTF16_UNICODE_CI, CHARSET_UTF16, CS_TYPE_UTF16_UNICODE_CI, false, true, 1},
#endif
{CS_TYPE_GB18030_CHINESE_CI, CHARSET_GB18030, CS_TYPE_GB18030_CHINESE_CI, true, true, 1},
{CS_TYPE_GB18030_BIN, CHARSET_GB18030, CS_TYPE_GB18030_BIN, false, true, 1},
{CS_TYPE_LATIN1_SWEDISH_CI, CHARSET_LATIN1, CS_TYPE_LATIN1_SWEDISH_CI,true, true, 1},
@ -336,11 +331,7 @@ ObCharsetInfo *ObCharset::charset_arr[CS_TYPE_MAX] = {
&ob_charset_gbk_bin, // 87
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, // 88
NULL, NULL, NULL, NULL, NULL, // 96
#ifdef OB_BUILD_FULL_CHARSET
&ob_charset_utf16_unicode_ci, // 101
#else
NULL,
#endif
NULL, NULL, // 102
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, // 104
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, // 112
@ -360,11 +351,7 @@ ObCharsetInfo *ObCharset::charset_arr[CS_TYPE_MAX] = {
&ob_charset_gb18030_2022_pinyin_cs, &ob_charset_gb18030_2022_radical_ci,// 218
&ob_charset_gb18030_2022_radical_cs, &ob_charset_gb18030_2022_stroke_ci, // 220
&ob_charset_gb18030_2022_stroke_cs, NULL, // 222
#ifdef OB_BUILD_FULL_CHARSET
&ob_charset_utf8mb4_unicode_ci, // 224
#else
NULL,
#endif
NULL, NULL, NULL, NULL, NULL, NULL, NULL, // 225
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, // 232
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, // 240
@ -508,33 +495,6 @@ uint64_t ObCharset::strntoullrnd(const char *str,
return result;
}
#ifdef OB_BUILD_FULL_CHARSET
/*
Convert integer to its string representation in given scale of notation.
SYNOPSIS
int2str()
val - value to convert
dst - points to buffer where string representation should be stored
radix - radix of scale of notation
upcase - set to 1 if we should use upper-case digits
DESCRIPTION
Converts the (long) integer value to its character form and moves it to
the destination buffer followed by a terminating NUL.
If radix is -2..-36, val is taken to be SIGNED, if radix is 2..36, val is
taken to be UNSIGNED. That is, val is signed if and only if radix is.
All other radixes treated as bad and nothing will be changed in this case.
For conversion to decimal representation (radix is -10 or 10) one can use
optimized int10_to_str() function.
RETURN VALUE
Pointer to ending NUL character or NullS if radix is bad.
*/
#endif
//=============================================================
char* ObCharset::lltostr(int64_t val, char *dst, int radix, int upcase)
{
int ret = OB_SUCCESS;
@ -583,7 +543,7 @@ char* ObCharset::lltostr(int64_t val, char *dst, int radix, int upcase)
p = &buffer[sizeof(buffer)-1];
*p = '\0';
new_val= uval / (uint64_t) radix;
*--p = dig_vec[(uchar) (uval- (uint64_t) new_val*(uint64_t) radix)];
*--p = dig_vec[(unsigned char) (uval- (uint64_t) new_val*(uint64_t) radix)];
val = new_val;
ldiv_t res;
while (val != 0)
@ -621,8 +581,8 @@ uint32_t ObCharset::instr(ObCollationType collation_type,
if (is_argument_valid(collation_type, str1, str1_len, str2, str2_len)) {
ObCharsetInfo *cs = static_cast<ObCharsetInfo *>(ObCharset::charset_arr[collation_type]);
ob_match_t m_match_t[2];
uint nmatch = 1;
uint m_ret = cs->coll->instr(cs, str1, str1_len, str2, str2_len, m_match_t, nmatch);
unsigned int nmatch = 1;
unsigned int m_ret = cs->coll->instr(cs, str1, str1_len, str2, str2_len, m_match_t, nmatch);
if (0 == m_ret ) {
result = 0;
} else {
@ -642,8 +602,8 @@ int64_t ObCharset::instrb(ObCollationType collation_type,
if (is_argument_valid(collation_type, str1, str1_len, str2, str2_len)) {
ObCharsetInfo *cs = static_cast<ObCharsetInfo *>(ObCharset::charset_arr[collation_type]);
ob_match_t m_match_t[2];
uint nmatch = 1;
uint m_ret = cs->coll->instr(cs, str1, str1_len, str2, str2_len, m_match_t, nmatch);
unsigned int nmatch = 1;
unsigned int m_ret = cs->coll->instr(cs, str1, str1_len, str2, str2_len, m_match_t, nmatch);
if (0 != m_ret) {
result = m_match_t[0].end - m_match_t[0].beg;
}
@ -700,9 +660,9 @@ int ObCharset::strcmp(ObCollationType collation_type,
ObCharsetInfo *cs = static_cast<ObCharsetInfo *>(ObCharset::charset_arr[collation_type]);
const bool t_is_prefix = false;
result = cs->coll->strnncoll(cs,
reinterpret_cast<const uchar *>(str1),
reinterpret_cast<const unsigned char *>(str1),
str1_len,
reinterpret_cast<const uchar *>(str2),
reinterpret_cast<const unsigned char *>(str2),
str2_len, t_is_prefix);
}
return result;
@ -719,9 +679,9 @@ int ObCharset::strcmpsp(ObCollationType collation_type,
if (is_argument_valid(collation_type, str1, str1_len, str2, str2_len)) {
ObCharsetInfo *cs = static_cast<ObCharsetInfo *>(ObCharset::charset_arr[collation_type]);
result = cs->coll->strnncollsp(cs,
reinterpret_cast<const uchar *>(str1),
reinterpret_cast<const unsigned char *>(str1),
str1_len,
reinterpret_cast<const uchar *>(str2),
reinterpret_cast<const unsigned char *>(str2),
str2_len,
cmp_endspace);
}
@ -860,10 +820,10 @@ size_t ObCharset::sortkey(ObCollationType collation_type,
//
// 对于有非法字符的unicode字符串,采用原生的不转换sortkey的方式进行比较。
result = cs->coll->strnxfrm(cs,
reinterpret_cast<uchar *>(key),
reinterpret_cast<unsigned char *>(key),
key_len,
OB_MAX_WEIGHT,
reinterpret_cast<const uchar *>(str),
reinterpret_cast<const unsigned char *>(str),
str_len,
0,
&is_valid_unicode_tmp);
@ -890,10 +850,10 @@ size_t ObCharset::sortkey_var_len(ObCollationType collation_type,
result = -1;
} else {
result = cs->coll->strnxfrm_varlen(cs,
reinterpret_cast<uchar *>(key),
reinterpret_cast<unsigned char *>(key),
key_len,
OB_MAX_WEIGHT,
reinterpret_cast<const uchar *>(str),
reinterpret_cast<const unsigned char *>(str),
str_len,
is_space_cmp,
&is_valid_unicode_tmp);
@ -921,7 +881,7 @@ uint64_t ObCharset::hash(ObCollationType collation_type,
LOG_WARN("unexpected error. invalid argument(s)", K(cs), K(cs->coll), K(lbt()));
} else {
seed = 0xc6a4a7935bd1e995;
cs->coll->hash_sort(cs, reinterpret_cast<const uchar *>(str), str_len,
cs->coll->hash_sort(cs, reinterpret_cast<const unsigned char *>(str), str_len,
&ret, &seed, calc_end_space, hash_algo);
}
}
@ -1102,10 +1062,6 @@ int ObCharset::well_formed_len(ObCollationType collation_type, const char *str,
return ret;
}
#ifdef OB_BUILD_FULL_CHARSET
// Be careful with this function. The return value may be out of range.
// Refer to
#endif
size_t ObCharset::charpos(const ObCollationType collation_type,
const char *str,
const int64_t str_len,
@ -1204,8 +1160,8 @@ int ObCharset::mb_wc(ObCollationType collation_type,
ret = OB_ERR_UNEXPECTED;
LOG_WARN("unexpected error. invalid argument(s)", K(cs), K(cs->cset));
} else {
int tmp = cs->cset->mb_wc(cs, &my_wc, reinterpret_cast<const uchar*>(mb.ptr()),
reinterpret_cast<const uchar*>(mb.ptr()+mb.length()));
int tmp = cs->cset->mb_wc(cs, &my_wc, reinterpret_cast<const unsigned char*>(mb.ptr()),
reinterpret_cast<const unsigned char*>(mb.ptr()+mb.length()));
if (tmp <= 0) {
ret = OB_ERR_INCORRECT_STRING_VALUE;
} else {
@ -1237,8 +1193,8 @@ int ObCharset::mb_wc(ObCollationType collation_type,
ret = OB_ERR_UNEXPECTED;
LOG_WARN("unexpected error. invalid argument(s)", K(cs), K(cs->cset));
} else {
int tmp = cs->cset->mb_wc(cs, &my_wc, reinterpret_cast<const uchar*>(mb),
reinterpret_cast<const uchar*>(mb + mb_size));
int tmp = cs->cset->mb_wc(cs, &my_wc, reinterpret_cast<const unsigned char*>(mb),
reinterpret_cast<const unsigned char*>(mb + mb_size));
if (tmp <= 0) {
ret = OB_ERR_INCORRECT_STRING_VALUE;
} else {
@ -1268,7 +1224,7 @@ int ObCharset::display_len(ObCollationType collation_type,
ret = OB_ERR_UNEXPECTED;
LOG_WARN("unexpected error. invalid argument(s)", K(cs), K(cs->cset));
} else {
const uchar *buf = reinterpret_cast<const uchar*>(mb.ptr());
const unsigned char *buf = reinterpret_cast<const unsigned char*>(mb.ptr());
int64_t buf_size = mb.length();
int64_t char_pos = 0;
bool found = false;
@ -1320,7 +1276,7 @@ int ObCharset::max_display_width_charpos(ObCollationType collation_type, const c
LOG_WARN("unexpected error. invalid argument(s)", K(cs), K(cs->cset));
} else {
char_pos = 0;
const uchar *buf = reinterpret_cast<const uchar*>(mb);
const unsigned char *buf = reinterpret_cast<const unsigned char*>(mb);
bool found = false;
int64_t total_width = 0;
@ -1372,8 +1328,8 @@ int ObCharset::wc_mb(ObCollationType collation_type, int32_t wc, char *buff, int
ret = OB_ERR_UNEXPECTED;
LOG_WARN("unexpected error. invalid argument(s)", K(cs), K(ret));
} else {
int tmp = cs->cset->wc_mb(cs, wc, reinterpret_cast<uchar*>(buff),
reinterpret_cast<uchar*>(buff + buff_len));
int tmp = cs->cset->wc_mb(cs, wc, reinterpret_cast<unsigned char*>(buff),
reinterpret_cast<unsigned char*>(buff + buff_len));
if (tmp <= 0) {
ret = OB_ERR_INCORRECT_STRING_VALUE;
} else {
@ -1589,14 +1545,12 @@ ObCollationType ObCharset::collation_type(const ObString &cs_name)
collation_type = CS_TYPE_UTF16_GENERAL_CI;
} else if (0 == cs_name.case_compare(ob_charset_utf16_bin.name)) {
collation_type = CS_TYPE_UTF16_BIN;
#ifdef OB_BUILD_FULL_CHARSET
} else if (0 == cs_name.case_compare("utf8_unicode_ci")) {
collation_type = CS_TYPE_UTF8MB4_UNICODE_CI;
} else if (0 == cs_name.case_compare(ob_charset_utf16_unicode_ci.name)) {
collation_type = CS_TYPE_UTF16_UNICODE_CI;
} else if (0 == cs_name.case_compare(ob_charset_utf8mb4_unicode_ci.name)) {
collation_type = CS_TYPE_UTF8MB4_UNICODE_CI;
#endif
} else if (0 == cs_name.case_compare(ob_charset_gb18030_bin.name)) {
collation_type = CS_TYPE_GB18030_BIN;
} else if (0 == cs_name.case_compare(ob_charset_gb18030_chinese_ci.name)) {
@ -1639,9 +1593,7 @@ bool ObCharset::is_valid_collation(ObCharsetType charset_type, ObCollationType c
if (CHARSET_UTF8MB4 == charset_type) {
if (CS_TYPE_UTF8MB4_BIN == collation_type
|| CS_TYPE_UTF8MB4_GENERAL_CI == collation_type
#ifdef OB_BUILD_FULL_CHARSET
|| CS_TYPE_UTF8MB4_UNICODE_CI == collation_type
#endif
) {
ret = true;
}
@ -1655,9 +1607,7 @@ bool ObCharset::is_valid_collation(ObCharsetType charset_type, ObCollationType c
} else if (CHARSET_UTF16 == charset_type) {
if (CS_TYPE_UTF16_GENERAL_CI == collation_type
|| CS_TYPE_UTF16_BIN == collation_type
#ifdef OB_BUILD_FULL_CHARSET
|| CS_TYPE_UTF16_UNICODE_CI == collation_type
#endif
) {
ret = true;
}
@ -1750,11 +1700,9 @@ bool ObCharset::is_valid_collation(int64_t collation_type_int)
|| CS_TYPE_LATIN1_SWEDISH_CI == collation_type
|| CS_TYPE_LATIN1_BIN == collation_type
|| is_gb18030_2022(collation_type)
#ifdef OB_BUILD_FULL_CHARSET
|| CS_TYPE_UTF8MB4_UNICODE_CI == collation_type
|| CS_TYPE_UTF16_UNICODE_CI == collation_type
|| (CS_TYPE_EXTENDED_MARK < collation_type && collation_type < CS_TYPE_MAX)
#endif
;
}
@ -1975,57 +1923,6 @@ int ObCharset::result_collation(
return ret;
}
#ifdef OB_BUILD_FULL_CHARSET
/** note from mysql:
Aggregate two collations together taking
into account their coercibility (aka derivation):.
0 == DERIVATION_EXPLICIT - an explicitly written COLLATE clause @n
1 == DERIVATION_NONE - a mix of two different collations @n
2 == DERIVATION_IMPLICIT - a column @n
3 == DERIVATION_COERCIBLE - a string constant.
The most important rules are:
-# If collations are the same:
chose this collation, and the strongest derivation.
-# If collations are different:
- Character sets may differ, but only if conversion without
data loss is possible. The caller provides flags whether
character set conversion attempts should be done. If no
flags are substituted, then the character sets must be the same.
Currently processed flags are:
MY_COLL_ALLOW_SUPERSET_CONV - allow conversion to a superset
MY_COLL_ALLOW_COERCIBLE_CONV - allow conversion of a coercible value
- two EXPLICIT collations produce an error, e.g. this is wrong:
CONCAT(expr1 collate latin1_swedish_ci, expr2 collate latin1_german_ci)
- the side with smaller derivation value wins,
i.e. a column is stronger than a string constant,
an explicit COLLATE clause is stronger than a column.
- if derivations are the same, we have DERIVATION_NONE,
we'll wait for an explicit COLLATE clause which possibly can
come from another argument later: for example, this is valid,
but we don't know yet when collecting the first two arguments:
@code
CONCAT(latin1_swedish_ci_column,
latin1_german1_ci_column,
expr COLLATE latin1_german2_ci)
@endcode
*/
/** this function is to determine use which charset when compare
* We consider only three charsets(binary, gbk and utf8mb4), so the rule is simpler. Especially,
* res_level can not be CS_LEVEL_NONE.
*
* MySQL uses coercibility values with the following rules to resolve ambiguities:
* 1. Use the collation with the lowest coercibility value.
* 2. If both sides have the same coercibility, then:
* 2.a If both sides are Unicode, or both sides are not Unicode, it is an error.
* 2.b If one of the sides has a Unicode character set, and another side has a non-Unicode character set, the side with Unicode character set wins,
* and automatic character set conversion is applied to the non-Unicode side.
* 2.c For an operation with operands from the same character set but that mix a _bin collation and a _ci or _cs collation, the _bin collation is used.
* This is similar to how operations that mix nonbinary and binary strings evaluate the operands as binary strings, except that it is for collations rather than data types.
*/
#endif
int ObCharset::aggregate_collation(
const ObCollationLevel collation_level1,
const ObCollationType collation_type1,
@ -2947,7 +2844,7 @@ int ObCharset::charset_convert(const ObCollationType from_type,
ret = OB_ERR_UNEXPECTED;
LOG_WARN("unexpected collation type", K(ret), K(from_type), K(to_type));
} else {
uint errors = 0;
unsigned int errors = 0;
result_len = ob_convert(to_str, static_cast<uint32_t>(to_len), to_cs, from_str, from_len, from_cs,
trim_incomplete_tail, replaced_char, &errors);
if (OB_UNLIKELY(errors != 0 && report_error)) {
@ -3209,20 +3106,8 @@ int ObCharset::get_nls_charset_id_by_charset_type(ObCharsetType charset_type)
return static_cast<int>(ret_id);
}
#ifndef OB_BUILD_FULL_CHARSET
int ObCharset::init_charset()
{
int ret = OB_SUCCESS;
if (OB_FAIL(init_gb18030_2022())) {
LOG_WARN("failed to init gb18030 2022", K(ret));
}
return ret;
}
#else
static void ob_charset_error_reporter(enum loglevel level, uint ecode, ...) {
static void ob_charset_error_reporter(enum loglevel level, unsigned int ecode, ...) {
//UNUSED(level);
UNUSED(ecode);
switch (level) {
@ -3411,7 +3296,6 @@ int ObCharset::init_charset()
return ret;
}
#endif
ObString ObCharsetUtils::const_str_for_ascii_[CHARSET_MAX][INT8_MAX + 1];
@ -3420,9 +3304,9 @@ int ObCharsetUtils::remove_char_endspace(ObString &str,
int ret = OB_SUCCESS;
const char *end = str.ptr() + str.length();
if ((CHARSET_UTF16 == charset_type)) {
end= (const char *) skip_trailing_space((const uchar *)str.ptr(), str.length(), 1);
end= (const char *) skip_trailing_space((const unsigned char *)str.ptr(), str.length(), 1);
} else {
end= (const char *) skip_trailing_space((const uchar *)str.ptr(), str.length(), 0);
end= (const char *) skip_trailing_space((const unsigned char *)str.ptr(), str.length(), 0);
}
if (end >= str.ptr()) {
str.assign_ptr(str.ptr(), end - str.ptr());

View File

@ -8,14 +8,7 @@
* EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
* MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
* See the Mulan PubL v2 for more details.
*/
/*
* (C) 2017-2020 Alibaba Group Holding Limited.
*
* Authors:
*/
#ifndef OB_BUILD_FULL_CHARSET
#include "lib/charset/ob_ctype.h"
@ -25,7 +18,7 @@ ob_convert_internal(char *to, uint32 to_length,
const char *from, uint32 from_length,
const ObCharsetInfo *from_cs,
bool trim_incomplete_tail,
const ob_wc_t replaced_char, uint *errors)
const ob_wc_t replaced_char, unsigned int *errors)
{
unsigned int error_num= 0;
int cnvres;
@ -49,7 +42,7 @@ ob_convert_internal(char *to, uint32 to_length,
error_num++;
} else {
// Not enough characters
if (!trim_incomplete_tail && (const uchar*) from < from_end) {
if (!trim_incomplete_tail && (const unsigned char*) from < from_end) {
error_num++;
from++;
wc= replaced_char;
@ -62,7 +55,7 @@ ob_convert_internal(char *to, uint32 to_length,
while (go) {
go = FALSE;
if ((cnvres= (*wc_mb)(to_cs, wc, (unsigned char*) to, to_end)) > 0)
to+= cnvres;
to+= cnvres;
else if (cnvres == OB_CS_ILUNI && wc != replaced_char) {
error_num++;
wc= replaced_char;
@ -122,4 +115,26 @@ ob_convert(char *to, uint32 to_length, const ObCharsetInfo *to_cs,
return 0;
}
#endif
char *strmake(char *dst, const char *src, size_t length) {
#ifdef EXTRA_DEBUG
/*
* 'length' is the maximum length of the string; the buffer needs
* to be one character larger to accomodate the terminating '\0'
* This is easy to get wrong, so we make sure we write to the
* entire length of the buffer to identify incorrect buffer-sizes.
* We only initialise the "unused" part of the buffer here, a) for
* efficiency, and b) because dst==src is allowed, so initialising
* the entire buffer would overwrite the source-string. Also, we
* write a character rather than '\0' as this makes spotting these
* problems in the results easier.
* */
unsigned int n = 0;
while (n < length && src[n++])
;
memset(dst + n, (int)'Z', length - n + 1);
#endif
while (length--)
if (!(*dst++ = *src++)) return dst - 1;
*dst = 0;
return dst;
}

View File

@ -116,9 +116,9 @@
#define _MY_B 0100
#define _MY_X 0200
#define ob_toupper(s, c) (uchar)((s)->to_upper[(uchar)(c)])
#define ob_tolower(s, c) (uchar)((s)->to_lower[(uchar)(c)])
#define ob_sort_order(s,c) (uchar)((s)->sort_order[(uchar)(c)])
#define ob_toupper(s, c) (unsigned char)((s)->to_upper[(unsigned char)(c)])
#define ob_tolower(s, c) (unsigned char)((s)->to_lower[(unsigned char)(c)])
#define ob_sort_order(s,c) (unsigned char)((s)->sort_order[(unsigned char)(c)])
#define is_valid_ascii(e) (0x00<=(uchar)(e) && (uchar)(e)<=0x7F)
@ -127,13 +127,13 @@ struct ObUCAInfo;
struct ObContractions;
typedef struct ObCharsetLoader {
uint errcode;
unsigned int errcode;
char errarg[192];
void *(*once_alloc)(size_t);
void *(*mem_malloc)(size_t);
void *(*mem_realloc)(void *, size_t);
void (*mem_free)(void *);
void (*reporter)(enum loglevel, uint errcode, ...);
void (*reporter)(enum loglevel, unsigned int errcode, ...);
int (*add_collation)(ObCharsetInfo *cs);
} ObCharsetLoader;
@ -157,25 +157,25 @@ typedef char ob_bool; /* Small bool */
/* Some typedef to make it easy for C++ to make function pointers */
typedef int (*ob_charset_conv_mb_wc)(const struct ObCharsetInfo *,
ob_wc_t *, const uchar *, const uchar *);
ob_wc_t *, const unsigned char *, const unsigned char *);
typedef int (*ob_charset_conv_wc_mb)(const struct ObCharsetInfo *, ob_wc_t,
uchar *, uchar *);
unsigned char *, unsigned char *);
typedef size_t (*ob_charset_conv_case)(const struct ObCharsetInfo *,
char *, size_t, char *, size_t);
int init_gb18030_2022();
extern ObUCAInfo ob_uca_v400;
extern uchar ob_uca520_length[4352];
extern unsigned char ob_uca520_length[4352];
extern uint16 *ob_uca520_weight[4352];
extern uchar ob_uca_length[256];
extern unsigned char ob_uca_length[256];
extern uint16 *ob_uca_weight[256];
typedef struct
{
uint beg;
uint end;
uint mb_len;
unsigned int beg;
unsigned int end;
unsigned int mb_len;
} ob_match_t;
typedef struct ObUnicaseInfoChar
@ -192,86 +192,13 @@ typedef struct ObUnicaseInfo
const ObUnicaseInfoChar **page;
} ObUnicaseInfo;
#ifdef OB_BUILD_FULL_CHARSET
// OB_CHARSET_HANDLER
// ==================
// OB_CHARSET_HANDLER is a collection of character-set
// related routines. Defined in m_ctype.h. Have the
// following set of functions:
// Multi-byte routines
// ------------------
// ismbchar() - detects whether the given string is a multi-byte sequence
// mbcharlen() - returns length of multi-byte sequence starting with
// the given character
// numchars() - returns number of characters in the given string, e.g.
// in SQL function CHAR_LENGTH().
// charpos() - calculates the offset of the given position in the string.
// Used in SQL functions LEFT(), RIGHT(), SUBSTRING(),
// INSERT()
// well_formed_len()
// - returns length of a given multi-byte string in bytes
// Used in INSERTs to shorten the given string so it
// a) is "well formed" according to the given character set
// b) can fit into the given data type
// lengthsp() - returns the length of the given string without trailing spaces.
// Unicode conversion routines
// ---------------------------
// mb_wc - converts the left multi-byte sequence into its Unicode code.
// mc_mb - converts the given Unicode code into multi-byte sequence.
// Case and sort conversion
// ------------------------
// caseup_str - converts the given 0-terminated string to uppercase
// casedn_str - converts the given 0-terminated string to lowercase
// caseup - converts the given string to lowercase using length
// casedn - converts the given string to lowercase using length
// Number-to-string conversion routines
// ------------------------------------
// snprintf()
// long10_to_str()
// longlong10_to_str()
// The names are pretty self-describing.
// String padding routines
// -----------------------
// fill() - writes the given Unicode value into the given string
// with the given length. Used to pad the string, usually
// with space character, according to the given charset.
// String-to-number conversion routines
// ------------------------------------
// strntol()
// strntoul()
// strntoll()
// strntoull()
// strntod()
// These functions are almost the same as their STDLIB counterparts,
// but also:
// - accept length instead of 0-terminator
// - are character set dependent
// Simple scanner routines
// -----------------------
// scan() - to skip leading spaces in the given string.
// Used when a string value is inserted into a numeric field.
#endif
typedef struct ObCharsetHandler
{
//my_bool (*init)(struct ObCharsetInfo *, MY_CHARSET_LOADER *loader);
/* Multibyte routines */
uint (*ismbchar)(const struct ObCharsetInfo *, const char *,
unsigned int (*ismbchar)(const struct ObCharsetInfo *, const char *,
const char *);
uint (*mbcharlen)(const struct ObCharsetInfo *, uint c);
unsigned int (*mbcharlen)(const struct ObCharsetInfo *, unsigned int c);
size_t (*numchars)(const struct ObCharsetInfo *, const char *b,
const char *e);
size_t (*charpos)(const struct ObCharsetInfo *, const char *b,
@ -292,7 +219,7 @@ typedef struct ObCharsetHandler
/* CTYPE scanner */
int (*ctype)(const struct ObCharsetInfo *cs, int *ctype,
const uchar *s, const uchar *e);
const unsigned char *s, const unsigned char *e);
/* Functions for case and sort conversion */
/*size_t (*caseup_str)(const struct ObCharsetInfo *, char *);
@ -332,22 +259,7 @@ typedef struct ObCharsetHandler
size_t (*scan)(const struct ObCharsetInfo *, const char *b,
const char *e, int sq);
} ObCharsetHandler;
#ifdef OB_BUILD_FULL_CHARSET
// OB_COLLATION_HANDLER
// ====================
// strnncoll() - compares two strings according to the given collation
// strnncollsp() - like the above but ignores trailing spaces for PAD SPACE
// collations. For NO PAD collations, identical to strnncoll.
// strnxfrm() - makes a sort key suitable for memcmp() corresponding
// to the given string
// like_range() - creates a LIKE range, for optimizer
// wildcmp() - wildcard comparison, for LIKE
// strcasecmp() - 0-terminated string comparison
// instr() - finds the first substring appearance in the string
// hash_sort() - calculates hash value taking into account
// the collation rules, e.g. case-insensitivity,
// accent sensitivity, etc.
#endif
static const int HASH_BUFFER_LENGTH = 128;
typedef uint64_t (*hash_algo)(const void* input, uint64_t length, uint64_t seed);
@ -359,21 +271,21 @@ typedef struct ObCollationHandler
/* Collation routines */
// 进行字符串比较的函数
int (*strnncoll)(const struct ObCharsetInfo *,
const uchar *, size_t, const uchar *, size_t, bool);
const unsigned char *, size_t, const unsigned char *, size_t, bool);
// 字符串比较时忽略尾部空格
int (*strnncollsp)(const struct ObCharsetInfo *,
const uchar *, size_t, const uchar *, size_t,
const unsigned char *, size_t, const unsigned char *, size_t,
bool diff_if_only_endspace_difference);
// makes a sort key suitable for memcmp() corresponding to the given string
size_t (*strnxfrm)(const struct ObCharsetInfo *,
uchar *dst, size_t dstlen, uint nweights,
const uchar *src, size_t srclen, uint flags, bool *is_valid_unicode);
unsigned char *dst, size_t dstlen, unsigned int nweights,
const unsigned char *src, size_t srclen, unsigned int flags, bool *is_valid_unicode);
// 获取weight_string结果的长度
size_t (*strnxfrmlen)(const struct ObCharsetInfo *, size_t);
// makes a sortkey suitable for memcmp() corresponding to the given variable length string
size_t (*strnxfrm_varlen)(const struct ObCharsetInfo*,
uchar* dst, size_t dst_len, uint nweights,
const uchar *src, size_t srclen,
unsigned char* dst, size_t dst_len, unsigned int nweights,
const unsigned char *src, size_t srclen,
bool is_memcmp, bool *is_valid_unicode);
//size_t (*strnxfrmlen)(const struct ObCharsetInfo *, size_t);
@ -394,63 +306,54 @@ typedef struct ObCollationHandler
const char *);
// finds the first substring appearance in the string
uint (*instr)(const struct ObCharsetInfo *,
unsigned int (*instr)(const struct ObCharsetInfo *,
const char *b, size_t b_length,
const char *s, size_t s_length,
ob_match_t *match, uint nmatch);
ob_match_t *match, unsigned int nmatch);
/* Hash calculation */
// calculates hash value taking into account the collation rules, e.g. case-insensitivity
void (*hash_sort)(const struct ObCharsetInfo *cs, const uchar *key, size_t len, ulong *nr1,
void (*hash_sort)(const struct ObCharsetInfo *cs, const unsigned char *key, size_t len, ulong *nr1,
ulong *nr2, const bool calc_end_space, hash_algo hash_algo);
bool (*propagate)(const struct ObCharsetInfo *cs, const uchar *str,
bool (*propagate)(const struct ObCharsetInfo *cs, const unsigned char *str,
size_t len);
} ObCollationHandler;
struct ObCharsetInfo
{
uint number;
uint primary_number;
uint binary_number;
uint state;
unsigned int number;
unsigned int primary_number;
unsigned int binary_number;
unsigned int state;
const char *csname;
const char *name;
const char *comment;
const char *tailoring;
struct Coll_param *coll_param;
uchar *ctype;
uchar *to_lower;
uchar *to_upper;
uchar *sort_order;
unsigned char *ctype;
unsigned char *to_lower;
unsigned char *to_upper;
unsigned char *sort_order;
ObUCAInfo *uca;
//uint16 *tab_to_uni;
//MY_UNI_IDX *tab_from_uni;
ObUnicaseInfo *caseinfo;
uchar *state_map;
uchar *ident_map;
uint strxfrm_multiply;
uchar caseup_multiply;
uchar casedn_multiply;
uint mbminlen;
uint mbmaxlen;
unsigned char *state_map;
unsigned char *ident_map;
unsigned int strxfrm_multiply;
unsigned char caseup_multiply;
unsigned char casedn_multiply;
unsigned int mbminlen;
unsigned int mbmaxlen;
ob_wc_t min_sort_char;
ob_wc_t max_sort_char; /* For LIKE optimization */
uchar pad_char;
unsigned char pad_char;
bool escape_with_backslash_is_dangerous;
uchar levels_for_compare;
uchar levels_for_order;
unsigned char levels_for_compare;
unsigned char levels_for_order;
ObCharsetHandler *cset;
ObCollationHandler *coll;
#ifdef OB_BUILD_FULL_CHARSET
/**
If this collation is PAD_SPACE, it collates as if all inputs were
padded with a given number of spaces at the end (see the "num_codepoints"
flag to strnxfrm). NO_PAD simply compares unextended strings.
Note that this is fundamentally about the behavior of coll->strnxfrm.
*/
#endif
enum ObCharsetPadAttr pad_attribute;
};
@ -459,17 +362,17 @@ struct ObCharsetInfo
#define ob_toascii(c) ((c) & 0177)
#define ob_tocntrl(c) ((c) & 31)
#define ob_toprint(c) ((c) | 64)
#define ob_isalpha(s, c) ((s)->ctype != NULL ? ((s)->ctype+1)[(uchar) (c)] & (_MY_U | _MY_L) : 0)
#define ob_isupper(s, c) ((s)->ctype != NULL ? ((s)->ctype+1)[(uchar) (c)] & _MY_U : 0)
#define ob_islower(s, c) ((s)->ctype != NULL ? ((s)->ctype+1)[(uchar) (c)] & _MY_L : 0)
#define ob_isdigit(s, c) ((s)->ctype != NULL ? ((s)->ctype+1)[(uchar) (c)] & _MY_NMR : 0)
#define ob_isxdigit(s, c) ((s)->ctype != NULL ? ((s)->ctype+1)[(uchar) (c)] & _MY_X : 0)
#define ob_isalnum(s, c) ((s)->ctype != NULL ? ((s)->ctype+1)[(uchar) (c)] & (_MY_U | _MY_L | _MY_NMR) : 0)
#define ob_isspace(s, c) ((s)->ctype != NULL ? ((s)->ctype+1)[(uchar) (c)] & _MY_SPC : 0)
#define ob_ispunct(s, c) ((s)->ctype != NULL ? ((s)->ctype+1)[(uchar) (c)] & _MY_PNT : 0)
#define ob_isprint(s, c) ((s)->ctype != NULL ? ((s)->ctype+1)[(uchar) (c)] & (_MY_PNT | _MY_U | _MY_L | _MY_NMR | _MY_B) : 0)
#define ob_isgraph(s, c) ((s)->ctype != NULL ? ((s)->ctype+1)[(uchar) (c)] & (_MY_PNT | _MY_U | _MY_L | _MY_NMR) : 0)
#define ob_iscntrl(s, c) ((s)->ctype != NULL ? ((s)->ctype+1)[(uchar) (c)] & _MY_CTR : 0)
#define ob_isalpha(s, c) ((s)->ctype != NULL ? ((s)->ctype+1)[(unsigned char) (c)] & (_MY_U | _MY_L) : 0)
#define ob_isupper(s, c) ((s)->ctype != NULL ? ((s)->ctype+1)[(unsigned char) (c)] & _MY_U : 0)
#define ob_islower(s, c) ((s)->ctype != NULL ? ((s)->ctype+1)[(unsigned char) (c)] & _MY_L : 0)
#define ob_isdigit(s, c) ((s)->ctype != NULL ? ((s)->ctype+1)[(unsigned char) (c)] & _MY_NMR : 0)
#define ob_isxdigit(s, c) ((s)->ctype != NULL ? ((s)->ctype+1)[(unsigned char) (c)] & _MY_X : 0)
#define ob_isalnum(s, c) ((s)->ctype != NULL ? ((s)->ctype+1)[(unsigned char) (c)] & (_MY_U | _MY_L | _MY_NMR) : 0)
#define ob_isspace(s, c) ((s)->ctype != NULL ? ((s)->ctype+1)[(unsigned char) (c)] & _MY_SPC : 0)
#define ob_ispunct(s, c) ((s)->ctype != NULL ? ((s)->ctype+1)[(unsigned char) (c)] & _MY_PNT : 0)
#define ob_isprint(s, c) ((s)->ctype != NULL ? ((s)->ctype+1)[(unsigned char) (c)] & (_MY_PNT | _MY_U | _MY_L | _MY_NMR | _MY_B) : 0)
#define ob_isgraph(s, c) ((s)->ctype != NULL ? ((s)->ctype+1)[(unsigned char) (c)] & (_MY_PNT | _MY_U | _MY_L | _MY_NMR) : 0)
#define ob_iscntrl(s, c) ((s)->ctype != NULL ? ((s)->ctype+1)[(unsigned char) (c)] & _MY_CTR : 0)
/* Some macros that should be cleaned up a little */
#define ob_isvar(s,c) (my_isalnum(s,c) || (c) == '_')
@ -488,13 +391,13 @@ struct ObCharsetInfo
#define use_mb(s) ((s)->cset->ismbchar != NULL)
static inline uint ob_ismbchar(const ObCharsetInfo *cs, const char *str,
static inline unsigned int ob_ismbchar(const ObCharsetInfo *cs, const char *str,
const char *strend) {
return cs->cset->ismbchar(cs, str, strend);
}
static inline uint ob_ismbchar(const ObCharsetInfo *cs, const uchar *str,
const uchar *strend) {
static inline unsigned int ob_ismbchar(const ObCharsetInfo *cs, const unsigned char *str,
const unsigned char *strend) {
return cs->cset->ismbchar(cs, (const char *)(str), (const char *)(strend));
}
#define ob_mbcharlen(s, a) ((s)->cset->mbcharlen((s),(a)))
@ -503,8 +406,8 @@ static inline uint ob_ismbchar(const ObCharsetInfo *cs, const uchar *str,
typedef struct ob_uni_ctype
{
uchar pctype;
uchar *ctype;
unsigned char pctype;
unsigned char *ctype;
} ObUniCtype;
extern ObUniCtype ob_uni_ctype[256];
@ -534,7 +437,6 @@ extern ObCharsetInfo ob_charset_gb18030_2022_radical_cs;
extern ObCharsetInfo ob_charset_gb18030_2022_stroke_ci;
extern ObCharsetInfo ob_charset_gb18030_2022_stroke_cs;
extern ObCharsetInfo ob_charset_gb18030_2022_bin;
#ifdef OB_BUILD_FULL_CHARSET
extern ObCharsetInfo ob_charset_utf8mb4_unicode_ci;
extern ObCharsetInfo ob_charset_utf16_unicode_ci;
extern ObCharsetInfo ob_charset_utf8mb4_zh_0900_as_cs;
@ -543,7 +445,6 @@ extern ObCharsetInfo ob_charset_utf8mb4_zh3_0900_as_cs;
extern ObCharsetInfo ob_charset_utf8mb4_0900_bin;
extern ObCharsetInfo ob_charset_latin1;
extern ObCharsetInfo ob_charset_latin1_bin;
#endif
extern ObCollationHandler ob_collation_mb_bin_handler;
extern ObCharsetHandler ob_charset_utf8mb4_handler;
@ -590,17 +491,17 @@ bool ob_like_range_simple(const ObCharsetInfo *cs,
char *min_str, char *max_str,
size_t *min_length, size_t *max_length);
bool ob_propagate_simple(const ObCharsetInfo *cs, const uchar *str,
bool ob_propagate_simple(const ObCharsetInfo *cs, const unsigned char *str,
size_t len);
bool ob_propagate_complex(const ObCharsetInfo *cs, const uchar *str,
bool ob_propagate_complex(const ObCharsetInfo *cs, const unsigned char *str,
size_t len);
void ob_strxfrm_desc_and_reverse(uchar *str, uchar *strend,
uint flags, uint level);
void ob_strxfrm_desc_and_reverse(unsigned char *str, unsigned char *strend,
unsigned int flags, unsigned int level);
size_t ob_strxfrm_pad_desc_and_reverse(const ObCharsetInfo *cs,
uchar *str, uchar *frmend, uchar *strend,
uint nweights, uint flags, uint level);
unsigned char *str, unsigned char *frmend, unsigned char *strend,
unsigned int nweights, unsigned int flags, unsigned int level);
extern "C" int64_t ob_strntoll(const char *ptr, size_t len, int base, char **end, int *err);
extern "C" int64_t ob_strntoull(const char *ptr, size_t len, int base, char **end, int *err);
@ -621,17 +522,17 @@ int ob_wildcmp_mb_impl(const ObCharsetInfo *cs,
const char *wildstr,const char *wildend,
int escape, int w_one, int w_many, int recurse_level);
uint ob_instr_mb(const ObCharsetInfo *cs,
unsigned int ob_instr_mb(const ObCharsetInfo *cs,
const char *b, size_t b_length,
const char *s, size_t s_length,
ob_match_t *match, uint nmatch);
ob_match_t *match, unsigned int nmatch);
void ob_hash_sort_simple(const ObCharsetInfo *cs,
const uchar *key, size_t len,
const unsigned char *key, size_t len,
ulong *nr1, ulong *nr2,
const bool calc_end_space, hash_algo hash_algo);
const uchar *skip_trailing_space(const uchar *ptr,size_t len, bool is_utf16);
const unsigned char *skip_trailing_space(const unsigned char *ptr,size_t len, bool is_utf16);
size_t ob_numchars_mb(const ObCharsetInfo *cs __attribute__((unused)), const char *pos, const char *end);
@ -640,7 +541,7 @@ size_t ob_charpos_mb(const ObCharsetInfo *cs __attribute__((unused)), const char
size_t ob_max_bytes_charpos_mb(const ObCharsetInfo *cs __attribute__((unused)), const char *pos, const char *end, size_t max_bytes, size_t *char_len);
int ob_mb_ctype_mb(const ObCharsetInfo *cs __attribute__((unused)), int *ctype,
const uchar *s, const uchar *e);
const unsigned char *s, const unsigned char *e);
size_t ob_caseup_mb(const ObCharsetInfo *, char *src, size_t srclen,
char *dst, size_t dstlen);
@ -661,18 +562,18 @@ size_t ob_lengthsp_8bit(const ObCharsetInfo *cs __attribute__((unused)),
const char *ptr, size_t length);
int ob_strnncoll_mb_bin(const ObCharsetInfo *cs __attribute__((unused)),
const uchar *s, size_t slen,
const uchar *t, size_t tlen,
const unsigned char *s, size_t slen,
const unsigned char *t, size_t tlen,
bool t_is_prefix);
int ob_strnncollsp_mb_bin(const ObCharsetInfo *cs __attribute__((unused)),
const uchar *a, size_t a_length,
const uchar *b, size_t b_length,
const unsigned char *a, size_t a_length,
const unsigned char *b, size_t b_length,
bool diff_if_only_endspace_difference);
size_t ob_strnxfrm_mb(const ObCharsetInfo *,
uchar *dst, size_t dstlen, uint nweights,
const uchar *src, size_t srclen, uint flags, bool *is_valid_unicode);
unsigned char *dst, size_t dstlen, unsigned int nweights,
const unsigned char *src, size_t srclen, unsigned int flags, bool *is_valid_unicode);
int ob_wildcmp_mb_bin(const ObCharsetInfo *cs,
const char *str,const char *str_end,
@ -680,22 +581,22 @@ int ob_wildcmp_mb_bin(const ObCharsetInfo *cs,
int escape, int w_one, int w_many);
void ob_hash_sort_mb_bin(const ObCharsetInfo *cs __attribute__((unused)),
const uchar *key, size_t len, ulong *nr1, ulong *nr2,
const unsigned char *key, size_t len, ulong *nr1, ulong *nr2,
const bool calc_end_space, hash_algo hash_algo);
uint32 ob_convert(char *to, uint32 to_length, const ObCharsetInfo *to_cs,
const char *from, uint32 from_length,
const ObCharsetInfo *from_cs,
bool trim_incomplete_tail,
const ob_wc_t replaced_char, uint *errors);
const ob_wc_t replaced_char, unsigned int *errors);
size_t ob_strnxfrm_unicode_full_bin(const ObCharsetInfo *cs,
uchar *dst, size_t dstlen, uint nweights,
const uchar *src, size_t srclen, uint flags, bool *is_valid_unicode);
unsigned char *dst, size_t dstlen, unsigned int nweights,
const unsigned char *src, size_t srclen, unsigned int flags, bool *is_valid_unicode);
size_t ob_strnxfrm_unicode_full_bin_varlen(const struct ObCharsetInfo* cs,
uchar* dst, size_t dst_len, uint nweights,
const uchar *src, size_t srclen,
unsigned char* dst, size_t dst_len, unsigned int nweights,
const unsigned char *src, size_t srclen,
bool is_memcmp, bool *is_valid_unicode);
bool ob_like_range_generic(const ObCharsetInfo *cs, const char *ptr,
@ -705,12 +606,12 @@ bool ob_like_range_generic(const ObCharsetInfo *cs, const char *ptr,
size_t *max_length);
size_t ob_strnxfrm_unicode(const ObCharsetInfo *cs,
uchar *dst, size_t dstlen, uint nweights,
const uchar *src, size_t srclen, uint flags, bool *is_valid_unicode);
unsigned char *dst, size_t dstlen, unsigned int nweights,
const unsigned char *src, size_t srclen, unsigned int flags, bool *is_valid_unicode);
size_t ob_strnxfrm_unicode_varlen(const struct ObCharsetInfo* cs,
uchar* dst, size_t dst_len, uint nweights,
const uchar *src, size_t srclen,
unsigned char* dst, size_t dst_len, unsigned int nweights,
const unsigned char *src, size_t srclen,
bool is_memcmp, bool *is_valid_unicode);
int ob_wildcmp_unicode(const ObCharsetInfo *cs,
@ -719,8 +620,8 @@ int ob_wildcmp_unicode(const ObCharsetInfo *cs,
int escape, int w_one, int w_many,
ObUnicaseInfo *weights);
size_t ob_strxfrm_pad(const ObCharsetInfo *cs, uchar *str, uchar *frmend,
uchar *strend, uint nweights, uint flags);
size_t ob_strxfrm_pad(const ObCharsetInfo *cs, unsigned char *str, unsigned char *frmend,
unsigned char *strend, unsigned int nweights, unsigned int flags);
size_t ob_strnxfrmlen_simple(const struct ObCharsetInfo *, size_t);
@ -728,8 +629,8 @@ size_t ob_strnxfrmlen_unicode_full_bin(const struct ObCharsetInfo *, size_t);
size_t ob_strnxfrmlen_utf8mb4(const struct ObCharsetInfo *, size_t);
uint ob_mbcharlen_8bit(const ObCharsetInfo *cs __attribute__((unused)),
uint c __attribute__((unused)));
unsigned int ob_mbcharlen_8bit(const ObCharsetInfo *cs __attribute__((unused)),
unsigned int c __attribute__((unused)));
size_t ob_numchars_8bit(const ObCharsetInfo *cs __attribute__((unused)),
const char *b, const char *e);
@ -750,7 +651,7 @@ size_t ob_lengthsp_binary(const ObCharsetInfo *cs __attribute__((unused)),
size_t length);
int ob_mb_ctype_8bit(const ObCharsetInfo *cs, int *ctype,
const uchar *s, const uchar *e);
const unsigned char *s, const unsigned char *e);
size_t ob_well_formed_len_8bit(const ObCharsetInfo *cs __attribute__((unused)),
const char *start, const char *end,

View File

@ -18,60 +18,18 @@
* - initial release
*
*/
#ifndef OB_BUILD_FULL_CHARSET
#include "lib/charset/ob_ctype.h"
static unsigned char ctype_bin[]=
{
0,
32, 32, 32, 32, 32, 32, 32, 32, 32, 40, 40, 40, 40, 40, 32, 32,
32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
72, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
132,132,132,132,132,132,132,132,132,132, 16, 16, 16, 16, 16, 16,
16,129,129,129,129,129,129, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 16, 16, 16, 16, 16,
16,130,130,130,130,130,130, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 16, 16, 16, 16, 32,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
};
static unsigned char bin_char_array[] =
{
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47,
48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63,
64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79,
80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95,
96, 97, 98, 99,100,101,102,103,104,105,106,107,108,109,110,111,
112,113,114,115,116,117,118,119,120,121,122,123,124,125,126,127,
128,129,130,131,132,133,134,135,136,137,138,139,140,141,142,143,
144,145,146,147,148,149,150,151,152,153,154,155,156,157,158,159,
160,161,162,163,164,165,166,167,168,169,170,171,172,173,174,175,
176,177,178,179,180,181,182,183,184,185,186,187,188,189,190,191,
192,193,194,195,196,197,198,199,200,201,202,203,204,205,206,207,
208,209,210,211,212,213,214,215,216,217,218,219,220,221,222,223,
224,225,226,227,228,229,230,231,232,233,234,235,236,237,238,239,
240,241,242,243,244,245,246,247,248,249,250,251,252,253,254,255
};
#include "lib/charset/ob_ctype_bin_tab.h"
unsigned int ob_mbcharlen_8bit(const ObCharsetInfo *cs __attribute__((unused)),
unsigned int c __attribute__((unused)))
unsigned int c __attribute__((unused)))
{
return 1;
}
size_t ob_numchars_8bit(const ObCharsetInfo *cs __attribute__((unused)),
const char *begin, const char *end)
const char *begin, const char *end)
{
return (size_t) (end - begin);
}
@ -111,9 +69,9 @@ size_t ob_lengthsp_binary(const ObCharsetInfo *cs __attribute__((unused)),
}
static int ob_mb_wc_bin(const ObCharsetInfo *cs __attribute__((unused)),
ob_wc_t *wc,
const unsigned char *str,
const unsigned char *end __attribute__((unused)))
ob_wc_t *wc,
const unsigned char *str,
const unsigned char *end __attribute__((unused)))
{
if (str >= end) {
return OB_CS_TOOSMALL;
@ -125,9 +83,9 @@ static int ob_mb_wc_bin(const ObCharsetInfo *cs __attribute__((unused)),
static int ob_wc_mb_bin(const ObCharsetInfo *cs __attribute__((unused)),
ob_wc_t wc,
unsigned char *str,
unsigned char *end __attribute__((unused)))
ob_wc_t wc,
unsigned char *str,
unsigned char *end __attribute__((unused)))
{
if (str >= end) {
return OB_CS_TOOSMALL;
@ -158,7 +116,7 @@ static size_t ob_case_bin(const ObCharsetInfo *cs __attribute__((unused)),
return srclen;
}
static int ob_strnncoll_8bit_bin(const ObCharsetInfo *cs __attribute__((unused)),
const uchar *s, size_t slen,
const uchar *t, size_t tlen,
@ -247,19 +205,18 @@ int ob_wildcmp_bin_impl(const ObCharsetInfo *cs,
const char *wild_str,const char *wild_end,
int escape_char, int w_one, int w_many, int recurse_level)
{
int result= -1;
int result= -1;
while (wild_str != wild_end) {
while ((*wild_str == escape_char) || (*wild_str != w_many && *wild_str != w_one)) {
if (*wild_str == escape_char && wild_str+1 != wild_end) {
wild_str++;
wild_str++;
}
if (str == str_end || likeconv(cs,*wild_str++) != likeconv(cs,*str++)) {
return(1);
} else if (wild_str == wild_end) {
return(str != str_end);
return(str != str_end);
} else {
result=1;
result=1;
}
}
if (*wild_str == w_one) {
@ -272,7 +229,7 @@ int ob_wildcmp_bin_impl(const ObCharsetInfo *cs,
} while (++wild_str < wild_end && *wild_str == w_one);
if (wild_str == wild_end) break;
}
if (*wild_str == w_many) {
if (*wild_str == w_many) {
unsigned char cmp;
wild_str++;
for (; wild_str != wild_end ; wild_str++) {
@ -286,14 +243,14 @@ int ob_wildcmp_bin_impl(const ObCharsetInfo *cs,
continue;
}
}
break;
break;
}
if (wild_str == wild_end) {
return(0);
return(0);
} else if (str == str_end) {
return(-1);
return(-1);
} else if ((cmp= *wild_str) == escape_char && wild_str+1 != wild_end) {
cmp= *++wild_str;
cmp= *++wild_str;
}
INC_PTR(cs,wild_str,wild_end);
@ -303,12 +260,12 @@ int ob_wildcmp_bin_impl(const ObCharsetInfo *cs,
str++;
}
if (str++ == str_end) {
return(-1);
return(-1);
}
{
int tmp=ob_wildcmp_bin_impl(cs,str,str_end,
wild_str,wild_end,escape_char,
w_one, w_many, recurse_level + 1);
wild_str,wild_end,escape_char,
w_one, w_many, recurse_level + 1);
if (tmp <= 0) {
return(tmp);
} else if (str == str_end) {
@ -336,9 +293,9 @@ int ob_wildcmp_bin(const ObCharsetInfo *cs,
static
unsigned int ob_instr_bin(const ObCharsetInfo *cs __attribute__((unused)),
const char *begin, size_t b_length,
const char *s, size_t s_length,
ob_match_t *match, unsigned int nmatch)
const char *begin, size_t b_length,
const char *s, size_t s_length,
ob_match_t *match, unsigned int nmatch)
{
const unsigned char *str, *search, *end, *search_end;
@ -349,7 +306,7 @@ unsigned int ob_instr_bin(const ObCharsetInfo *cs __attribute__((unused)),
match->end= 0;
match->mb_len= 0;
}
return 1;
return 1;
}
str= (const unsigned char*) begin;
@ -369,7 +326,6 @@ loop:
goto loop;
}
}
if (nmatch > 0) {
match[0].beg= 0;
match[0].end= (size_t) (str- (const unsigned char*)begin-1);
@ -381,7 +337,7 @@ loop:
match[1].mb_len= match[1].end-match[1].beg;
}
}
return 2;
return 2;
}
}
}
@ -409,9 +365,10 @@ void ob_hash_sort_8bit_bin(const ObCharsetInfo *cs __attribute__((unused)),
}
}
void ob_hash_sort_bin(const ObCharsetInfo *cs __attribute__((unused)),
const unsigned char *key, size_t len, unsigned long int *nr1, unsigned long int *nr2,
const bool calc_end_space,
hash_algo hash_algo)
const unsigned char *key, size_t len,
unsigned long int *nr1, unsigned long int *nr2,
const bool calc_end_space,
hash_algo hash_algo)
{
const unsigned char *pos = key;
key+= len;
@ -431,8 +388,8 @@ void ob_hash_sort_bin(const ObCharsetInfo *cs __attribute__((unused)),
static ObCharsetHandler ob_charset_handler=
{
NULL,
ob_mbcharlen_8bit,
NULL,
ob_mbcharlen_8bit,
ob_numchars_8bit,
ob_charpos_8bit,
ob_max_bytes_charpos_8bit,
@ -490,39 +447,37 @@ ObCollationHandler ob_collation_binary_handler =
ObCharsetInfo ob_charset_bin =
{
63,0,0,
OB_CS_COMPILED|OB_CS_BINSORT|OB_CS_PRIMARY,
"binary",
"binary",
"",
NULL,
NULL,
ctype_bin,
bin_char_array,
bin_char_array,
NULL,
NULL,
&ob_unicase_default,
NULL,
NULL,
1,
1,
1,
1,
1,
0,
255,
0,
0,
1,
1,
&ob_charset_handler,
&ob_collation_binary_handler,
PAD_SPACE
63,0,0,
OB_CS_COMPILED|OB_CS_BINSORT|OB_CS_PRIMARY,
"binary",
"binary",
"",
NULL,
NULL,
ctype_bin,
bin_char_array,
bin_char_array,
NULL,
NULL,
&ob_unicase_default,
NULL,
NULL,
1,
1,
1,
1,
1,
0,
255,
0,
0,
1,
1,
&ob_charset_handler,
&ob_collation_binary_handler,
PAD_SPACE
};
#undef likeconv
#undef INC_PTR
#endif

View File

@ -0,0 +1,51 @@
/**
* Copyright (c) 2021 OceanBase
* OceanBase CE is licensed under Mulan PubL v2.
* You can use this software according to the terms and conditions of the Mulan PubL v2.
* You may obtain a copy of Mulan PubL v2 at:
* http://license.coscl.org.cn/MulanPubL-2.0
* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND,
* EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
* MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
* See the Mulan PubL v2 for more details.
*/
static unsigned char ctype_bin[]=
{
0,
32, 32, 32, 32, 32, 32, 32, 32, 32, 40, 40, 40, 40, 40, 32, 32,
32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
72, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
132,132,132,132,132,132,132,132,132,132, 16, 16, 16, 16, 16, 16,
16,129,129,129,129,129,129, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 16, 16, 16, 16, 16,
16,130,130,130,130,130,130, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 16, 16, 16, 16, 32,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
};
static unsigned char bin_char_array[] =
{
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47,
48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63,
64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79,
80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95,
96, 97, 98, 99,100,101,102,103,104,105,106,107,108,109,110,111,
112,113,114,115,116,117,118,119,120,121,122,123,124,125,126,127,
128,129,130,131,132,133,134,135,136,137,138,139,140,141,142,143,
144,145,146,147,148,149,150,151,152,153,154,155,156,157,158,159,
160,161,162,163,164,165,166,167,168,169,170,171,172,173,174,175,
176,177,178,179,180,181,182,183,184,185,186,187,188,189,190,191,
192,193,194,195,196,197,198,199,200,201,202,203,204,205,206,207,
208,209,210,211,212,213,214,215,216,217,218,219,220,221,222,223,
224,225,226,227,228,229,230,231,232,233,234,235,236,237,238,239,
240,241,242,243,244,245,246,247,248,249,250,251,252,253,254,255
};

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,461 @@
/**
* Copyright (c) 2021 OceanBase
* OceanBase CE is licensed under Mulan PubL v2.
* You can use this software according to the terms and conditions of the Mulan PubL v2.
* You may obtain a copy of Mulan PubL v2 at:
* http://license.coscl.org.cn/MulanPubL-2.0
* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND,
* EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
* MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
* See the Mulan PubL v2 for more details.
*/
#include "lib/charset/ob_mysql_global.h"
#include "lib/charset/ob_ctype.h"
#include "lib/charset/ob_ctype_gbk_tab.h"
#define isgbkhead(c) (0x81<=(unsigned char)(c) && (unsigned char)(c)<=0xfe)
#define isgbktail(c) ((0x40<=(unsigned char)(c) && (unsigned char)(c)<=0x7e) || \
(0x80<=(unsigned char)(c) && (unsigned char)(c)<=0xfe))
#define isgbkcode(c,d) (isgbkhead(c) && isgbktail(d))
#define gbkcode(c,d) ((((unsigned int) (unsigned char) (c)) <<8) | (unsigned char)(d))
#define gbkhead(e) ((unsigned char)(e>>8))
#define gbktail(e) ((unsigned char)(e&0xff))
static uint16 gbksortorder(uint16 i)
{
uint idx=gbktail(i);
if (idx>0x7f) idx-=0x41;
else idx-=0x40;
idx+=(gbkhead(i)-0x81)*0xbe;
return 0x8100+gbk_order[idx];
}
int ob_strnncoll_gbk_internal(const unsigned char **a_res, const unsigned char **b_res,
size_t length)
{
const unsigned char *a= *a_res, *b= *b_res;
unsigned int a_char,b_char;
while (length--)
{
if ((length > 0) && isgbkcode(*a,*(a+1)) && isgbkcode(*b, *(b+1)))
{
a_char= gbkcode(*a,*(a+1));
b_char= gbkcode(*b,*(b+1));
if (a_char != b_char)
return ((int) gbksortorder((uint16_t) a_char) -
(int) gbksortorder((uint16_t) b_char));
a+= 2;
b+= 2;
length--;
}
else if (sort_order_gbk[*a++] != sort_order_gbk[*b++])
return ((int) sort_order_gbk[a[-1]] -
(int) sort_order_gbk[b[-1]]);
}
*a_res= a;
*b_res= b;
return 0;
}
int ob_strnncoll_gbk(const ObCharsetInfo *cs __attribute__((unused)),
const unsigned char *a, size_t a_length,
const unsigned char *b, size_t b_length,
bool b_is_prefix)
{
size_t length = OB_MIN(a_length, b_length);
int res= ob_strnncoll_gbk_internal(&a, &b, length);
return res ? res : (int) ((b_is_prefix ? length : a_length) - b_length);
}
static int ob_strnncollsp_gbk(const ObCharsetInfo * cs __attribute__((unused)),
const unsigned char *a, size_t a_length,
const unsigned char *b, size_t b_length,
bool diff_if_only_endspace_difference)
{
size_t length = OB_MIN(a_length, b_length);
int res = ob_strnncoll_gbk_internal(&a, &b, length);
if (!res && a_length != b_length) {
const unsigned char *end;
int swap= 1;
if (diff_if_only_endspace_difference) {
return a_length < b_length ? -1 : 1;
} else if (a_length < b_length) {
a_length = b_length;
a = b;
swap= -1;
res= -res;
}
for (end= a + a_length-length; a < end ; a++) {
if (*a != ' ') {
return (*a < ' ') ? -swap : swap;
}
}
}
return res;
}
static size_t
ob_strnxfrm_gbk(const ObCharsetInfo *cs,
unsigned char *dst, size_t dstlen, unsigned int nweights,
const unsigned char *src, size_t srclen, unsigned int flags, bool *is_valid_unicode)
{
unsigned char *d0= dst;
unsigned char *de= dst + dstlen;
const unsigned char *se= src + srclen;
const unsigned char *sort_order= cs->sort_order;
*is_valid_unicode = 1;
for (; dst < de && src < se && nweights; nweights--) {
if (cs->cset->ismbchar(cs, (const char*) src, (const char*) se)) {
uint16_t e= gbksortorder((uint16_t) gbkcode(*src, *(src + 1)));
*dst++= gbkhead(e);
if (dst < de) {
*dst++= gbktail(e);
}
src+= 2;
} else {
*is_valid_unicode = is_valid_ascii(*src);
*dst++= sort_order ? sort_order[*src++] : *src++;
}
}
return ob_strxfrm_pad_desc_and_reverse(cs, d0, dst, de, nweights, flags, 0);
}
size_t ob_varlen_encoding_gbk_for_memcmp(const struct ObCharsetInfo* cs,
unsigned char* dst, size_t dst_len, unsigned int nweights,
const unsigned char *src, size_t src_len,
bool *is_valid_unicode)
{
unsigned char *d0= dst;
unsigned char *de= dst + dst_len;
const unsigned char *se= src + src_len;
const unsigned char *sort_order= cs->sort_order;
*is_valid_unicode = 1;
for (; *is_valid_unicode && dst < de && src < se && nweights; nweights--)
{
if (isgbkhead(*(src)) && (se)-(src)>1 && isgbktail(*((src)+1)))
{
/*
Note, it is safe not to check (src < se)
in the code below, because ismbchar() would
not return TRUE if src was too short
*/
uint16_t e= gbksortorder((uint16_t) gbkcode(*src, *(src + 1)));
*dst++= gbkhead(e);
if (dst < de)
*dst++= gbktail(e);
src+= 2;
if (e == 0) {
*dst++ = 0x00;
*dst++ = 0x01;
}
} else {
*is_valid_unicode = is_valid_ascii(*src);
uint16_t e = sort_order ? sort_order[*src++] : *src++;
*dst++ = gbkhead(e);
*dst++ = gbktail(e);
if (e == 0) {
*dst++ = 0x00;
*dst++ = 0x01;
}
}
}
*dst++ = 0x00;
*dst++ = 0x00;
*dst++ = 0x00;
*dst++ = 0x00;
return dst - d0;
}
size_t ob_varlen_encoding_gbk_for_spacecmp(const struct ObCharsetInfo* cs,
unsigned char* dst, size_t dst_len, unsigned int nweights,
const unsigned char *src, size_t src_len,
bool *is_valid_unicode)
{
unsigned char *d0= dst;
unsigned char *de= dst + dst_len;
const unsigned char *se= src + src_len;
const unsigned char *sort_order= cs->sort_order;
*is_valid_unicode = 1;
// trim
while (*(se-1) == 0x20 && se>src) se--;
for (;*is_valid_unicode && dst < de && src < se && nweights; nweights--)
{
int16_t space_cnt = 0;
uint16_t e = 0;
while (*src == 0x20)
{
space_cnt++;
src++;
}
if (isgbkhead(*(src)) && (se)-(src)>1 && isgbktail(*((src)+1)))
{
/*
Note, it is safe not to check (src < se)
in the code below, because ismbchar() would
not return TRUE if src was too short
*/
e = gbksortorder((uint16) gbkcode(*src, *(src + 1)));
src+= 2;
} else {
*is_valid_unicode = is_valid_ascii(*src);
e = sort_order ? sort_order[*src++] : *src++;
}
if (space_cnt != 0) {
*dst++ = 0x00;
*dst++ = 0x20;
if (e > 0x20) {
*dst++ = 0x00;
*dst++ = 0x21;
space_cnt = -space_cnt;
} else {
*dst++ = 0x00;
*dst++ = 0x19;
}
*dst++ = ((unsigned char)(space_cnt >> 8));
*dst++ = ((unsigned char)(space_cnt & 0xff));
}
*dst++ = gbkhead(e);
*dst++ = gbktail(e);
}
*dst++ = 0x00;
*dst++ = 0x20;
*dst++ = 0x00;
*dst++ = 0x20;
return dst - d0;
}
size_t ob_strnxfrm_gbk_varlen(const struct ObCharsetInfo* cs,
unsigned char* dst, size_t dst_len, unsigned int nweights,
const unsigned char *src, size_t srclen,
bool is_memcmp, bool *is_valid_unicode)
{
if (is_memcmp) {
return ob_varlen_encoding_gbk_for_memcmp(cs, dst, dst_len, nweights,
src, srclen, is_valid_unicode);
} else {
return ob_varlen_encoding_gbk_for_spacecmp(cs, dst, dst_len, nweights,
src, srclen, is_valid_unicode);
}
}
static unsigned int ismbchar_gbk(const ObCharsetInfo *cs __attribute__((unused)),
const char* p, const char *e)
{
return (isgbkhead(*(p)) && (e)-(p)>1 && isgbktail(*((p)+1))? 2: 0);
}
static unsigned int mbcharlen_gbk(const ObCharsetInfo *cs __attribute__((unused)),
unsigned int c)
{
return (isgbkhead(c)? 2 : 1);
}
static int func_uni_gbk_onechar(int code){
if ((code>=0x00A4)&&(code<=0x0451)) {
return(tab_uni_gbk0[code-0x00A4]);
} else if ((code>=0x2010)&&(code<=0x2312)) {
return(tab_uni_gbk1[code-0x2010]);
} else if ((code>=0x2460)&&(code<=0x2642)) {
return(tab_uni_gbk2[code-0x2460]);
} else if ((code>=0x3000)&&(code<=0x3129)) {
return(tab_uni_gbk3[code-0x3000]);
} else if ((code>=0x3220)&&(code<=0x32A3)) {
return(tab_uni_gbk4[code-0x3220]);
} else if ((code>=0x338E)&&(code<=0x33D5)) {
return(tab_uni_gbk5[code-0x338E]);
} else if ((code>=0x4E00)&&(code<=0x9FA5)) {
return(tab_uni_gbk6[code-0x4E00]);
} else if ((code>=0xE000)&&(code<=0xE864)) {
return(tab_uni_gbk_pua[code-0xE000]);
} else if ((code>=0xF92C)&&(code<=0xFA29)) {
return(tab_uni_gbk7[code-0xF92C]);
} else if ((code>=0xFE30)&&(code<=0xFFE5)) {
return(tab_uni_gbk8[code-0xFE30]);
}
return(0);
}
static int
ob_wc_mb_gbk(const ObCharsetInfo *cs __attribute__((unused)),
ob_wc_t wc, unsigned char *s, unsigned char *e)
{
int code;
if (s >= e) {
return OB_CS_TOOSMALL;
} else if ((unsigned int) wc < 0x80) {
s[0]= (unsigned char) wc;
return 1;
} else if (!(code=func_uni_gbk_onechar(wc))) {
return OB_CS_ILUNI;
} else if (s+2>e) {
return OB_CS_TOOSMALL2;
}
s[0] = code >> 8;
s[1] = code & 0xFF;
return 2;
}
static int ob_mb_wc_gbk(const ObCharsetInfo *cs __attribute__((unused)),
ob_wc_t *pwc, const unsigned char *s, const unsigned char *e)
{
int hi;
if (s >= e) {
return OB_CS_TOOSMALL;
} else if ((hi = s[0]) < 0x80) {
pwc[0]=hi;
return 1;
} else if (s+2>e) {
return OB_CS_TOOSMALL2;
} else if (!(pwc[0]=func_gbk_uni_onechar( (hi<<8) + s[1]))) {
return -2;
}
return 2;
}
static size_t ob_well_formed_len_gbk(const ObCharsetInfo *cs __attribute__((unused)),
const char *b, const char *e,
size_t pos, int *error)
{
const char *b0= b;
const char *emb= e - 1;
*error= 0;
while (pos-- && b < e) {
if ((unsigned char) b[0] < 128) {
b++;
} else if ((b < emb) && isgbkcode((unsigned char)*b, (unsigned char)b[1])) {
b+= 2;
} else {
*error= 1;
break;
}
}
return (size_t) (b - b0);
}
static ObCollationHandler ob_collation_gbk_ci_handler =
{
NULL,
NULL,
ob_strnncoll_gbk,
ob_strnncollsp_gbk,
ob_strnxfrm_gbk,
ob_strnxfrmlen_simple,
ob_strnxfrm_gbk_varlen,
ob_like_range_mb,
ob_wildcmp_mb,
NULL,
ob_instr_mb,
ob_hash_sort_simple,
ob_propagate_simple
};
static ObCharsetHandler ob_charset_gbk_handler=
{
ismbchar_gbk,
mbcharlen_gbk,
ob_numchars_mb,
ob_charpos_mb,
ob_max_bytes_charpos_mb,
ob_well_formed_len_gbk,
ob_lengthsp_8bit,
/* ob_numcells_8bit, */
ob_mb_wc_gbk,
ob_wc_mb_gbk,
ob_mb_ctype_mb,
/* ob_caseup_str_mb, */
/* ob_casedn_str_mb, */
ob_caseup_mb,
ob_casedn_mb,
ob_fill_8bit,
ob_strntol_8bit,
ob_strntoul_8bit,
ob_strntoll_8bit,
ob_strntoull_8bit,
ob_strntod_8bit,
ob_strntoull10rnd_8bit,
ob_scan_8bit
};
ObCharsetInfo ob_charset_gbk_chinese_ci=
{
28,0,0,
OB_CS_COMPILED|OB_CS_PRIMARY|OB_CS_STRNXFRM,
"gbk",
"gbk_chinese_ci",
"",
NULL,
NULL,
ctype_gbk,
to_lower_gbk,
to_upper_gbk,
sort_order_gbk,
NULL,
&ob_caseinfo_gbk,
NULL,
NULL,
1,
1,
1,
1,
2,
0,
0xA967,
' ',
1,
1,
1,
&ob_charset_gbk_handler,
&ob_collation_gbk_ci_handler,
PAD_SPACE};
ObCharsetInfo ob_charset_gbk_bin=
{
87,0,0,
OB_CS_COMPILED|OB_CS_BINSORT,
"gbk",
"gbk_bin",
"",
NULL,
NULL,
ctype_gbk,
to_lower_gbk,
to_upper_gbk,
NULL,
NULL,
&ob_caseinfo_gbk,
NULL,
NULL,
1,
1,
1,
1,
2,
0,
0xFEFE,
' ',
1,
1,
1,
&ob_charset_gbk_handler,
&ob_collation_mb_bin_handler,
PAD_SPACE
};

View File

@ -0,0 +1,137 @@
/**
* Copyright (code) 2021 OceanBase
* OceanBase CE is licensed under Mulan PubL v2.
* You can use this software according to the terms and conditions of the Mulan PubL v2.
* You may obtain a copy of Mulan PubL v2 at:
* http://license.coscl.org.cn/MulanPubL-2.0
* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND,
* EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
* MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
* See the Mulan PubL v2 for more details.
*/
#include "lib/charset/ob_mysql_global.h"
#include "lib/charset/ob_ctype.h"
#include "lib/utility/ob_macro_utils.h"
#include "lib/charset/ob_ctype_latin1_tab.h"
static int ob_mb_wc_latin1(const ObCharsetInfo *cs __attribute__((unused)),
ob_wc_t *pwc, const unsigned char *str,
const unsigned char *end) {
if (str >= end) return OB_CS_TOOSMALL;
*pwc = cs_to_uni[*str];
return (!pwc[0] && str[0]) ? -1 : 1;
}
static int ob_wc_mb_latin1(const ObCharsetInfo *cs __attribute__((unused)),
ob_wc_t wc, unsigned char *str, unsigned char *end) {
const unsigned char *pl;
if (str >= end) return OB_CS_TOOSMALL;
if (wc > 0xFFFF) return OB_CS_ILUNI;
pl = uni_to_cs[wc >> 8];
str[0] = pl ? pl[wc & 0xFF] : '\0';
return (!str[0] && wc) ? OB_CS_ILUNI : 1;
}
static ObCharsetHandler ob_charset_latin1_handler=
{
//NULL,
NULL,
ob_mbcharlen_8bit,
ob_numchars_8bit,
ob_charpos_8bit,
ob_max_bytes_charpos_8bit,
ob_well_formed_len_8bit,
ob_lengthsp_binary,
//ob_numcells_8bit,
ob_mb_wc_latin1,
ob_wc_mb_latin1,
ob_mb_ctype_8bit,
//ob_case_str_bin,
//ob_case_str_bin,
ob_caseup_8bit,
ob_casedn_8bit,
//ob_snprintf_8bit,
//ob_long10_to_str_8bit,
//ob_longlong10_to_str_8bit,
ob_fill_8bit,
ob_strntol_8bit,
ob_strntoul_8bit,
ob_strntoll_8bit,
ob_strntoull_8bit,
ob_strntod_8bit,
//ob_strtoll10_8bit,
ob_strntoull10rnd_8bit,
ob_scan_8bit
};
ObCharsetInfo ob_charset_latin1 = {
8,0,0,
OB_CS_COMPILED | OB_CS_PRIMARY,
OB_LATIN1,
OB_LATIN1_SWEDISH_CI,
"cp1252 West European",
NULL,
NULL,
ctype_latin1,
to_lower_latin1,
to_upper_latin1,
sort_order_latin1,
NULL,
//NULL,
//NULL,
&ob_unicase_default,
NULL,
NULL,
1,
1,
1,
1,
1,
0,
0xFF,
' ',
0,
1,
1,
&ob_charset_latin1_handler,
&ob_collation_8bit_simple_ci_handler,
PAD_SPACE};
ObCharsetInfo ob_charset_latin1_bin = {
47,0,0,
OB_CS_COMPILED | OB_CS_BINSORT,
OB_LATIN1,
OB_LATIN1_BIN,
"cp1252 West European",
NULL,
NULL,
ctype_latin1,
to_lower_latin1,
to_upper_latin1,
NULL,
NULL,
//NULL,
//NULL,
&ob_unicase_default,
NULL,
NULL,
1,
1,
1,
1,
1,
0,
0xFF,
' ',
0,
1,
1,
&ob_charset_latin1_handler,
&ob_collation_8bit_bin_handler,
PAD_SPACE};

View File

@ -1,28 +1,14 @@
/**
* Copyright (c) 2021 OceanBase
* OceanBase CE is licensed under Mulan PubL v2.
* You can use this software according to the terms and conditions of the Mulan PubL v2.
* You may obtain a copy of Mulan PubL v2 at:
* http://license.coscl.org.cn/MulanPubL-2.0
* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND,
* EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
* MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
/**
* Copyright (c) 2021 OceanBase
* OceanBase CE is licensed under Mulan PubL v2.
* You can use this software according to the terms and conditions of the Mulan PubL v2.
* You may obtain a copy of Mulan PubL v2 at:
* http://license.coscl.org.cn/MulanPubL-2.0
* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND,
* EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
* MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
* See the Mulan PubL v2 for more details.
*/
/*
*
* Version: $Id
*
* Authors:
* - initial release
*
*/
#ifndef OB_BUILD_FULL_CHARSET
#include "lib/charset/ob_mysql_global.h"
#include "lib/charset/ob_ctype.h"
#include "lib/utility/ob_macro_utils.h"
*/
static unsigned char ctype_latin1[] = {
0, 32, 32, 32, 32, 32, 32, 32, 32, 32, 40, 40, 40, 40, 40, 32,
32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
@ -281,133 +267,3 @@ static unsigned char *uni_to_cs[] = {
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL};
static int ob_mb_wc_latin1(const ObCharsetInfo *cs __attribute__((unused)),
ob_wc_t *pwc, const unsigned char *str, const unsigned char *end) {
if (str >= end) return OB_CS_TOOSMALL;
*pwc = cs_to_uni[*str];
return (!pwc[0] && str[0]) ? -1 : 1;
}
static int ob_wc_mb_latin1(const ObCharsetInfo *cs __attribute__((unused)),
ob_wc_t wc, unsigned char *str, unsigned char *end) {
const unsigned char *pl;
if (str >= end) return OB_CS_TOOSMALL;
if (wc > 0xFFFF) return OB_CS_ILUNI;
pl = uni_to_cs[wc >> 8];
str[0] = pl ? pl[wc & 0xFF] : '\0';
return (!str[0] && wc) ? OB_CS_ILUNI : 1;
}
static ObCharsetHandler ob_charset_latin1_handler=
{
//NULL, /* init */
NULL, /* ismbchar */
ob_mbcharlen_8bit, /* mbcharlen */
ob_numchars_8bit,
ob_charpos_8bit,
ob_max_bytes_charpos_8bit,
ob_well_formed_len_8bit,
ob_lengthsp_binary,
//ob_numcells_8bit,
ob_mb_wc_latin1,
ob_wc_mb_latin1,
ob_mb_ctype_8bit,
//ob_case_str_bin,
//ob_case_str_bin,
ob_caseup_8bit,
ob_casedn_8bit,
//ob_snprintf_8bit,
//ob_long10_to_str_8bit,
//ob_longlong10_to_str_8bit,
ob_fill_8bit,
ob_strntol_8bit,
ob_strntoul_8bit,
ob_strntoll_8bit,
ob_strntoull_8bit,
ob_strntod_8bit,
//ob_strtoll10_8bit,
ob_strntoull10rnd_8bit,
ob_scan_8bit
};
ObCharsetInfo ob_charset_latin1 = {
8,0,0, /* number */
OB_CS_COMPILED | OB_CS_PRIMARY, /* state */
OB_LATIN1, /* cs name */
OB_LATIN1_SWEDISH_CI, /* name */
"cp1252 West European", /* comment */
NULL, /* tailoring */
NULL, /* coll_param */
ctype_latin1,
to_lower_latin1,
to_upper_latin1,
sort_order_latin1,
NULL, /* uca */
//NULL, /* tab_to_uni */
//NULL, /* tab_from_uni */
&ob_unicase_default, /* caseinfo */
NULL, /* state_map */
NULL, /* ident_map */
1, /* strxfrm_multiply */
1, /* caseup_multiply */
1, /* casedn_multiply */
1, /* mbminlen */
1, /* mbmaxlen */
0, /* min_sort_char */
0xFF, /* max_sort_char */
' ', /* pad char */
0, /* escape_with_backslash_is_dangerous */
1, /* levels_for_compare */
1, /* levels_for_order */
&ob_charset_latin1_handler,
&ob_collation_8bit_simple_ci_handler,
PAD_SPACE};
ObCharsetInfo ob_charset_latin1_bin = {
47,0,0, /* number */
OB_CS_COMPILED | OB_CS_BINSORT, /* state */
OB_LATIN1, /* cs name */
OB_LATIN1_BIN, /* name */
"cp1252 West European", /* comment */
NULL, /* tailoring */
NULL, /* coll_param */
ctype_latin1,
to_lower_latin1,
to_upper_latin1,
NULL, /* sort_order */
NULL, /* uca */
//NULL, /* tab_to_uni */
//NULL, /* tab_from_uni */
&ob_unicase_default, /* caseinfo */
NULL, /* state_map */
NULL, /* ident_map */
1, /* strxfrm_multiply */
1, /* caseup_multiply */
1, /* casedn_multiply */
1, /* mbminlen */
1, /* mbmaxlen */
0, /* min_sort_char */
0xFF, /* max_sort_char */
' ', /* pad char */
0, /* escape_with_backslash_is_dangerous */
1, /* levels_for_compare */
1, /* levels_for_order */
&ob_charset_latin1_handler,
&ob_collation_8bit_bin_handler,
PAD_SPACE};
#endif

View File

@ -10,14 +10,8 @@
* See the Mulan PubL v2 for more details.
*/
/*
* (C) 2017-2020 Alibaba Group Holding Limited.
*
* Authors:
*/
#ifndef OB_BUILD_FULL_CHARSET
#include "lib/charset/ob_ctype.h"
#include "lib/charset/str_uca_type.h"
static void __attribute__ ((noinline)) pad_max_char_help(char *str, char *end, char *buf, char buf_len)
{
@ -78,11 +72,11 @@ bool ob_like_range_mb_help(const ObCharsetInfo *cs,
}
bool ob_like_range_mb(const ObCharsetInfo *cs,
const char *ptr,size_t ptr_length,
pbool escape_char, pbool w_one, pbool w_many,
size_t res_length,
char *min_str,char *max_str,
size_t *min_length,size_t *max_length)
const char *ptr,size_t ptr_length,
pbool escape_char, pbool w_one, pbool w_many,
size_t res_length,
char *min_str,char *max_str,
size_t *min_length,size_t *max_length)
{
unsigned int mb_len;
const char *end= ptr + ptr_length;
@ -95,8 +89,7 @@ bool ob_like_range_mb(const ObCharsetInfo *cs,
for (; ptr != end && min_str != min_end && max_char_len ; max_char_len--) {
if (*ptr == escape_char && ptr+1 != end) {
ptr++;
} else if (*ptr == w_one ||
*ptr == w_many) {
} else if (*ptr == w_one || *ptr == w_many) {
return ob_like_range_mb_help(cs,res_length, &min_str,&max_str, &min_org, &min_end, min_length, max_length, &max_end);
}
mb_len= ob_ismbchar(cs, ptr, end);
@ -114,7 +107,7 @@ bool ob_like_range_mb(const ObCharsetInfo *cs,
if (ptr[1] == w_one || ptr[1] == w_many) {
return ob_like_range_mb_help(cs,res_length, &min_str,&max_str, &min_org, &min_end, min_length, max_length, &max_end);
} else if (ob_uca_can_be_contraction_tail(contractions, (unsigned char) ptr[1]) &&
ob_uca_contraction2_weight(contractions, (unsigned char) ptr[0], ptr[1])) {
ob_uca_contraction2_weight(contractions, (unsigned char) ptr[0], ptr[1])) {
if (max_char_len == 1 || min_str + 1 >= min_end) {
return ob_like_range_mb_help(cs,res_length, &min_str,&max_str, &min_org, &min_end, min_length, max_length, &max_end);
}
@ -150,7 +143,7 @@ int ob_wildcmp_mb_impl(const ObCharsetInfo *cs,
const char *wild_str,const char *wild_end,
int escape_char, int w_one, int w_many, int recurse_level)
{
int result= -1;
int result= -1;
while (wild_str != wild_end) {
while ((*wild_str == escape_char) || (*wild_str != w_many && *wild_str != w_one)) {
int l;
@ -158,59 +151,56 @@ int ob_wildcmp_mb_impl(const ObCharsetInfo *cs,
wild_str++;
}
if ((l = ob_ismbchar(cs, wild_str, wild_end))) {
if (str+l > str_end || memcmp(str, wild_str, l) != 0)
return 1;
str += l;
wild_str += l;
if (str+l > str_end || memcmp(str, wild_str, l) != 0)
return 1;
str += l;
wild_str += l;
} else if (str == str_end || likeconv(cs,*wild_str++) != likeconv(cs,*str++)) {
return(1);
return(1);
}
if (wild_str == wild_end) {
return (str != str_end);
}
result=1;
return (str != str_end);
}
result=1;
}
if (*wild_str == w_one) {
do {
if (str == str_end) {
return (result);
}
INC_PTR(cs,str,str_end);
INC_PTR(cs,str,str_end);
} while (++wild_str < wild_end && *wild_str == w_one);
if (wild_end == wild_str)
break;
break;
}
if (*wild_str == w_many) {
if (*wild_str == w_many) {
unsigned char cmp;
const char* mb = wild_str;
int mb_len=0;
wild_str++;
for (; wild_str != wild_end ; wild_str++)
{
if (*wild_str == w_many)
continue;
if (*wild_str == w_one)
{
if (*wild_str == w_one) {
if (str == str_end)
return (-1);
INC_PTR(cs,str,str_end);
continue;
}
break;
break;
}
if (wild_str == wild_end) {
return(0);
return(0);
} else if (str == str_end) {
return -1;
return -1;
} else if ((cmp= *wild_str) == escape_char && wild_str+1 != wild_end) {
cmp= *++wild_str;
cmp= *++wild_str;
}
mb=wild_str;
mb_len= ob_ismbchar(cs, wild_str, wild_end);
INC_PTR(cs,wild_str,wild_end);
INC_PTR(cs,wild_str,wild_end);
cmp=likeconv(cs,cmp);
while (true) {
while (TRUE) {
@ -232,7 +222,7 @@ int ob_wildcmp_mb_impl(const ObCharsetInfo *cs,
{
int tmp=ob_wildcmp_mb_impl(cs,str,str_end,
wild_str,wild_end,escape_char,w_one,
w_many, recurse_level + 1);
w_many, recurse_level + 1);
if (tmp <= 0)
return (tmp);
}
@ -240,7 +230,7 @@ int ob_wildcmp_mb_impl(const ObCharsetInfo *cs,
return -1;
} else if (wild_str != wild_end && wild_str[0] == w_many) {
return -1;
}
}
}
return(-1);
}
@ -256,7 +246,7 @@ unsigned int __attribute__ ((noinline)) ob_instr_mb_help(size_t s_length, ob_mat
match->end= 0;
match->mb_len= 0;
}
return 1;
return 1;
}
return 0;
}
@ -277,8 +267,8 @@ unsigned int ob_instr_mb(const ObCharsetInfo *cs,
end= b+b_length-s_length+1;
while (b < end) {
int mb_len;
if (!cs->coll->strnncoll(cs, (unsigned char*) b, s_length,
(unsigned char*) s, s_length, 0)) {
if (!cs->coll->strnncoll(cs, (unsigned char*) b, s_length,
(unsigned char*) s, s_length, 0)) {
if (nmatch) {
match[0].beg= 0;
match[0].end= (size_t) (b-b0);
@ -286,7 +276,7 @@ unsigned int ob_instr_mb(const ObCharsetInfo *cs,
if (nmatch > 1) {
match[1].beg= match[0].end;
match[1].end= match[0].end+s_length;
match[1].mb_len= 0;
match[1].mb_len= 0;
}
}
return 2;
@ -349,7 +339,7 @@ size_t ob_max_bytes_charpos_mb(const ObCharsetInfo *cs __attribute__((unused)),
}
int ob_mb_ctype_mb(const ObCharsetInfo *cs __attribute__((unused)), int *ctype,
const unsigned char *s, const unsigned char *e)
const unsigned char *s, const unsigned char *e)
{
ob_wc_t wc;
int res = cs->cset->mb_wc(cs, &wc, s, e);
@ -439,7 +429,7 @@ size_t ob_lengthsp_8bit(const ObCharsetInfo *cs __attribute__((unused)),
const char *ptr, size_t length)
{
const char *end;
end= (const char *) skip_trailing_space((const uchar *)ptr, length, 0);
end= (const char *) skip_trailing_space((const unsigned char *)ptr, length, 0);
return (size_t) (end-ptr);
}
@ -470,18 +460,18 @@ int __attribute__ ((noinline)) ob_strnncollsp_mb_bin_help(
res= 1;
}
if (a_length < b_length) {
a_length= b_length;
a= b;
a_length= b_length;
a= b;
swap= -1;
res= -res;
res= -res;
}
for (end= a + a_length-length; a < end ; a++) {
if (*a != ' ') {
*has_returned = 1;
break;
}
}
}
if (*a != ' ') {
*has_returned = 1;
break;
}
}
}
*a_ = a;
*b_ = b;
*end_ = end;
@ -510,10 +500,10 @@ int ob_strnncollsp_mb_bin(const ObCharsetInfo *cs __attribute__((unused)),
res= 0;
int has_returned = 0;
int tmp = ob_strnncollsp_mb_bin_help(
&a, a_length,
&b, b_length,
&end,
diff_if_only_endspace_difference, &has_returned, &res, length);
&a, a_length,
&b, b_length,
&end,
diff_if_only_endspace_difference, &has_returned, &res, length);
return has_returned == 1 ? tmp : res;
}
@ -585,7 +575,7 @@ size_t ob_strnxfrm_mb(const ObCharsetInfo *cs,
pad:
return ob_strxfrm_pad_desc_and_reverse(cs, d0, dst, de, nweights, flags, 0);
}
#define INC_PTR(cs,A,B) A+=(ob_ismbchar(cs,A,B) ? ob_ismbchar(cs,A,B) : 1)
@ -636,7 +626,7 @@ static int ob_wildcmp_mb_bin_impl(const ObCharsetInfo *cs, const char *str,
const char *str_end, const char *wild_str, const char *wild_end, int escape_char,
int w_one, int w_many, int recurse_level)
{
int result = -1;
int result = -1;
while (wild_str != wild_end) {
int has_returned = 0;
int tmp = ob_wildcmp_mb_bin_impl_help(cs, &str,&str_end, &wild_str, &wild_end, escape_char,w_one,w_many, &result, &has_returned);
@ -644,7 +634,7 @@ static int ob_wildcmp_mb_bin_impl(const ObCharsetInfo *cs, const char *str,
return tmp;
} else if (*wild_str == w_one) {
do {
if (str == str_end) {
if (str == str_end) {
return (result);
} else {
INC_PTR(cs, str, str_end);
@ -654,13 +644,11 @@ static int ob_wildcmp_mb_bin_impl(const ObCharsetInfo *cs, const char *str,
break;
}
}
if (*wild_str == w_many) {
if (*wild_str == w_many) {
unsigned char cmp;
const char* mb = wild_str;
int mb_len = 0;
wild_str++;
for (; wild_str != wild_end; wild_str++) {
if (*wild_str == w_many) {
continue;
@ -673,7 +661,7 @@ static int ob_wildcmp_mb_bin_impl(const ObCharsetInfo *cs, const char *str,
}
} else {
break;
}
}
}
if (wild_str == wild_end) {
return (0);
@ -749,8 +737,6 @@ void ob_hash_sort_mb_bin(const ObCharsetInfo *cs __attribute__((unused)),
}
}
ObCollationHandler ob_collation_mb_bin_handler = {
NULL,
NULL,
@ -770,5 +756,3 @@ ObCollationHandler ob_collation_mb_bin_handler = {
#undef INC_PTR
#undef likeconv
#endif

View File

@ -1,3 +1,4 @@
/**
* Copyright (c) 2021 OceanBase
* OceanBase CE is licensed under Mulan PubL v2.
@ -9,8 +10,7 @@
* MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
* See the Mulan PubL v2 for more details.
*/
/*
/*
*
* Version: $Id
*
@ -18,7 +18,6 @@
* - initial release
*
*/
#ifndef OB_BUILD_FULL_CHARSET
#include "lib/charset/ob_ctype.h"
#include "lib/charset/ob_dtoa.h"
@ -53,14 +52,14 @@ static ulonglong d10[DIGITS_IN_ULONGLONG]=
};
long ob_strntol_8bit(const ObCharsetInfo *cs,
const char *nptr, size_t l, int base,
char **end_ptr, int *err)
const char *nptr, size_t l, int base,
char **end_ptr, int *err)
{
const char *save, *s = nptr, *e = nptr+l;
unsigned char c;
unsigned int cut_lim;
*err= 0;
*err= 0;
uint32 cut_off;
while (s<e && ob_isspace(cs, *s)) {
s++;
@ -115,7 +114,7 @@ long ob_strntol_8bit(const ObCharsetInfo *cs,
if (neg) {
if (i > (uint32) INT_MIN32) {
overflow = 1;
}
}
} else if (i > INT_MAX32) {
overflow = 1;
}
@ -137,8 +136,8 @@ NO_CONV:
ulong ob_strntoul_8bit(const ObCharsetInfo *cs,
const char *nptr, size_t l, int base,
char **end_ptr, int *err)
const char *nptr, size_t l, int base,
char **end_ptr, int *err)
{
int neg;
unsigned char c;
@ -146,7 +145,7 @@ ulong ob_strntoul_8bit(const ObCharsetInfo *cs,
uint32 cut_off;
unsigned int cut_lim;
*err= 0;
*err= 0;
while (s<e && ob_isspace(cs, *s)) {
s++;
@ -217,13 +216,13 @@ NO_CONV:
longlong ob_strntoll_8bit(const ObCharsetInfo *cs __attribute__((unused)),
const char *nptr, size_t l, int base,
char **end_ptr,int *err)
const char *nptr, size_t l, int base,
char **end_ptr,int *err)
{
ulonglong cut_off;
unsigned int cut_lim;
const char *s = nptr, *e = nptr+l, *save;
*err= 0;
*err= 0;
while (s<e && ob_isspace(cs,*s)) {
s++;
@ -302,14 +301,14 @@ NO_CONV:
ulonglong ob_strntoull_8bit(const ObCharsetInfo *cs,
const char *nptr, size_t l, int base,
char **end_ptr, int *err)
const char *nptr, size_t l, int base,
char **end_ptr, int *err)
{
ulonglong cut_off;
unsigned int cut_lim;
const char *s = nptr, *e = nptr + l, *save;
*err= 0;
*err= 0;
while (s<e && ob_isspace(cs,*s)) {
s++;
@ -386,8 +385,8 @@ NO_CONV:
double ob_strntod_8bit(const ObCharsetInfo *cs __attribute__((unused)),
char *str, size_t len,
char **end, int *err)
char *str, size_t len,
char **end, int *err)
{
if (len == INT_MAX32) {
len= 65535;
@ -421,7 +420,7 @@ ob_strntoull10rnd_8bit(const ObCharsetInfo *cs __attribute__((unused)),
beg= str;
end9= (str + 9) > end ? end : (str + 9);
for (ul= 0 ; str < end9 && (ch= (unsigned char) (*str - '0')) < 10; str++) {
ul= ul * 10 + ch;
}
@ -583,7 +582,6 @@ RET_SIGN:
}
}
if (neg && ull) {
*err= OB_ERRNO_ERANGE;
return 0;
@ -611,7 +609,7 @@ RET_TOO_LARGE:
}
void ob_strxfrm_desc_and_reverse(unsigned char *str, unsigned char *str_end,
unsigned int flags, unsigned int level)
unsigned int flags, unsigned int level)
{
if (flags & (OB_STRXFRM_DESC_LEVEL1 << level)) {
if (flags & (OB_STRXFRM_REVERSE_LEVEL1 << level)) {
@ -660,8 +658,6 @@ size_t ob_scan_8bit(const ObCharsetInfo *cs, const char *str, const char *end,
}
}
size_t ob_strxfrm_pad_desc_and_reverse(const ObCharsetInfo *cs,
unsigned char *str, unsigned char *frm_end, unsigned char *str_end,
unsigned int nweights, unsigned int flags, unsigned int level)
@ -686,11 +682,11 @@ size_t ob_strnxfrmlen_simple(const ObCharsetInfo *cs, size_t len)
}
bool ob_like_range_simple(const ObCharsetInfo *cs,
const char *ptr, size_t ptr_len,
pbool escape_char, pbool w_one, pbool w_many,
size_t res_len,
char *min_str,char *max_str,
size_t *min_len, size_t *max_len)
const char *ptr, size_t ptr_len,
pbool escape_char, pbool w_one, pbool w_many,
size_t res_len,
char *min_str,char *max_str,
size_t *min_len, size_t *max_len)
{
const char *end= ptr + ptr_len;
char *min_org=min_str;
@ -699,11 +695,11 @@ bool ob_like_range_simple(const ObCharsetInfo *cs,
for (; ptr != end && min_str != min_end && charlen > 0 ; ptr++, charlen--) {
if (*ptr == escape_char && ptr+1 != end) {
ptr++;
ptr++;
*min_str++= *max_str++ = *ptr;
continue;
} else if (*ptr == w_one) {
*min_str++='\0';
*min_str++='\0';
*max_str++= (char) cs->max_sort_char;
continue;
} else if (*ptr == w_many) {
@ -742,7 +738,7 @@ bool ob_propagate_complex(const ObCharsetInfo *cs __attribute__((unused)),
}
void ob_fill_8bit(const ObCharsetInfo *cs __attribute__((unused)),
char *s, size_t l, int fill)
char *s, size_t l, int fill)
{
memset(s, fill, l);
}
@ -758,9 +754,9 @@ int64_t ob_strntoull(const char *ptr, size_t len, int base, char **end, int *err
}
void ob_hash_sort_simple(const ObCharsetInfo *cs,
const unsigned char *key, size_t len,
unsigned long int *nr1, unsigned long int *nr2,
const bool calc_end_space, hash_algo hash_algo)
const unsigned char *key, size_t len,
unsigned long int *nr1, unsigned long int *nr2,
const bool calc_end_space, hash_algo hash_algo)
{
unsigned char *sort_order=cs->sort_order;
const unsigned char *end;
@ -788,7 +784,7 @@ void ob_hash_sort_simple(const ObCharsetInfo *cs,
#define SPACE_INT 0x20202020
const uchar *skip_trailing_space(const uchar *ptr,size_t len, bool is_utf16 /*false*/)
const unsigned char *skip_trailing_space(const unsigned char *ptr,size_t len, bool is_utf16 /*false*/)
{
const unsigned char *end= ptr + len;
if (len > 20 && !is_utf16) {
@ -853,9 +849,9 @@ size_t ob_casedn_8bit(const ObCharsetInfo *cs __attribute__((unused)),
}
int ob_strnncoll_simple(const ObCharsetInfo *cs __attribute__((unused)),
const uchar *s, size_t slen,
const uchar *t, size_t tlen,
bool is_prefix)
const unsigned char *s, size_t slen,
const unsigned char *t, size_t tlen,
bool is_prefix)
{
size_t len = (slen > tlen) ? tlen : slen;
if (is_prefix && slen > tlen) slen = tlen;
@ -871,18 +867,18 @@ int ob_strnncoll_simple(const ObCharsetInfo *cs __attribute__((unused)),
static int ob_strnncollsp_simple(const ObCharsetInfo *cs
__attribute__((unused)),
const uchar *s, size_t slen,
const uchar *t, size_t tlen,
const unsigned char *s, size_t slen,
const unsigned char *t, size_t tlen,
bool diff_if_only_endspace_difference
__attribute__((unused)))
{
size_t len = (slen > tlen) ? tlen : slen;
for (size_t i = 0; i < len; i++){
if(ob_sort_order(cs,*s)!=ob_sort_order(cs,*t)) {
return (int)ob_sort_order(cs,*s) - (int)ob_sort_order(cs,*t);
}
s++;
t++;
if(ob_sort_order(cs,*s)!=ob_sort_order(cs,*t)) {
return (int)ob_sort_order(cs,*s) - (int)ob_sort_order(cs,*t);
}
s++;
t++;
}
int res = 0;
if (slen != tlen) {
@ -896,7 +892,6 @@ static int ob_strnncollsp_simple(const ObCharsetInfo *cs
*/
if (slen < tlen) {
slen = tlen;
s = t;
swap = -1;
res = -res;
@ -914,14 +909,12 @@ static int ob_strnncollsp_simple(const ObCharsetInfo *cs
return res;
}
static size_t ob_strnxfrm_simple(const ObCharsetInfo* cs __attribute__((unused)), unsigned char* dst, size_t dstlen,
uint nweights, const unsigned char* src, size_t srclen, unsigned int flags, bool* is_valid_unicode)
unsigned int nweights, const unsigned char* src, size_t srclen, unsigned int flags, bool* is_valid_unicode)
{
uchar *dst0 = dst;
const uchar *end;
const uchar *remainder;
unsigned char *dst0 = dst;
const unsigned char *end;
const unsigned char *remainder;
size_t frmlen;
frmlen = dstlen > nweights ? nweights : dstlen;
frmlen = frmlen > srclen ? srclen : frmlen;
@ -1030,9 +1023,8 @@ int ob_wildcmp_8bit(const ObCharsetInfo* cs, const char* str, const char* str_en
return ob_wildcmp_8bit_impl(cs, str, str_end, wildstr, wildend, escape, w_one, w_many, 1);
}
uint32_t ob_instr_simple(const ObCharsetInfo* cs , const char* b, size_t b_length,
const char* s, size_t s_length, ob_match_t* match, uint nmatch)
const char* s, size_t s_length, ob_match_t* match, unsigned int nmatch)
{
register const unsigned char *str, *search, *end, *search_end;
@ -1081,8 +1073,6 @@ uint32_t ob_instr_simple(const ObCharsetInfo* cs , const char* b, size_t b_lengt
return 0;
}
ObCollationHandler ob_collation_8bit_simple_ci_handler = {
NULL, /* init */
NULL,
@ -1099,6 +1089,4 @@ ObCollationHandler ob_collation_8bit_simple_ci_handler = {
ob_propagate_simple};
#undef likeconv
#undef INC_PTR
#endif
#undef INC_PTR

3153
deps/oblib/src/lib/charset/ob_ctype_uca.cc vendored Normal file

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -1,3 +1,4 @@
/**
* Copyright (c) 2021 OceanBase
* OceanBase CE is licensed under Mulan PubL v2.
@ -10,21 +11,16 @@
* See the Mulan PubL v2 for more details.
*/
/*
* (C) 2017-2020 Alibaba Group Holding Limited.
*
* Authors:
*/
#ifndef OB_BUILD_FULL_CHARSET
#include "lib/charset/ob_ctype.h"
#include "lib/charset/str_uca_type.h"
#include "lib/charset/ob_dtoa.h"
#include "lib/charset/ob_template_helper.h"
#define OB_UTF16_HIGH_HEAD(x) ((((unsigned char) (x)) & 0xFC) == 0xD8)
#define OB_UTF16_LOW_HEAD(x) ((((unsigned char) (x)) & 0xFC) == 0xDC)
#define OB_UTF16_HIGH_HEAD(x) ((((uchar) (x)) & 0xFC) == 0xD8)
#define OB_UTF16_LOW_HEAD(x) ((((uchar) (x)) & 0xFC) == 0xDC)
#define OB_UTF16_SURROGATE(x) (((x) & 0xF800) == 0xD800)
#define OB_UTF16_WC2(a, begin) ((a << 8) + begin)
#define OB_UTF16_WC2(a, b) ((a << 8) + b)
static inline int
ob_bincmp(const unsigned char *str, const unsigned char *se,
@ -140,7 +136,7 @@ ob_utf16_uni(const ObCharsetInfo *cs __attribute__((unused)),
} else {
*pwc= OB_UTF16_WC4(str[0], str[1], str[2], str[3]);
return 4;
}
}
} else if (OB_UTF16_LOW_HEAD(*str)) {
return OB_CS_ILSEQ;
} else {
@ -162,7 +158,7 @@ ob_uni_utf16(const ObCharsetInfo *cs __attribute__((unused)),
*str++= (unsigned char) (wc >> 8);
*str= (unsigned char) (wc & 0xFF);
return 2;
}
}
} else if (wc <= 0x10FFFF) {
if (4 > end - str) {
return OB_CS_TOOSMALL4;
@ -295,7 +291,7 @@ ob_strntol_mb2_or_mb4(const ObCharsetInfo *cs,
//do nothing
} else {
break;
}
}
} else {
if (end_ptr != NULL) *end_ptr= (char*) str;
err[0]= (cnv==OB_CS_ILSEQ) ? EILSEQ : EDOM;
@ -393,7 +389,7 @@ ob_strntoul_mb2_or_mb4(const ObCharsetInfo *cs,
//do nothing
} else {
break;
}
}
} else {
if (NULL != end_ptr) {
*end_ptr= (char*)str;
@ -456,7 +452,7 @@ ob_strntoul_mb2_or_mb4(const ObCharsetInfo *cs,
return (negative ? -((long) res) : (long) res);
}
static longlong
static longlong
ob_strntoll_mb2_or_mb4(const ObCharsetInfo *cs,
const char *nptr, size_t l, int base,
char **end_ptr, int *err)
@ -546,7 +542,7 @@ ob_strntoll_mb2_or_mb4(const ObCharsetInfo *cs,
if (negative) {
if (res > (uint64_t) LONGLONG_MIN) {
overflow = 1;
}
}
} else if (res > (uint64_t) LONGLONG_MAX) {
overflow = 1;
}
@ -904,8 +900,8 @@ ob_strnncollsp_utf16(const ObCharsetInfo *cs,
if (s_res <= 0 || t_res <= 0) {
return ob_bincmp(str, se, t, te);
} else {
ob_tosort_utf16(uni_plane, &s_wc);
ob_tosort_utf16(uni_plane, &t_wc);
ob_tosort_utf16(uni_plane, &s_wc);
ob_tosort_utf16(uni_plane, &t_wc);
}
if (s_wc != t_wc) {
return s_wc > t_wc ? 1 : -1;
@ -1097,9 +1093,9 @@ ob_like_range_generic(const ObCharsetInfo *cs,
} else {
max_str+= res;
wc= wc2;
}
}
}
}
}
res= cs->cset->wc_mb(cs, wc, (unsigned char*) min_str, (unsigned char*) min_end);
if (res <= 0) {
goto PAD_SET_LEN;
@ -1123,7 +1119,7 @@ PAD_MIN_MAX:
res_length_diff= res_length % cs->mbminlen;
cs->cset->fill(cs, min_str, min_end - min_str - res_length_diff, cs->min_sort_char);
cs->cset->fill(cs, max_str, max_end - max_str - res_length_diff, cs->max_sort_char);
if (res_length_diff != 0) {
memset(min_end - res_length_diff, 0, res_length_diff);
memset(max_end - res_length_diff, 0, res_length_diff);
@ -1254,5 +1250,3 @@ ObCharsetInfo ob_charset_utf16_general_ci=
&ob_collation_utf16_general_ci_handler,
PAD_SPACE
};
#endif

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -1,3 +1,4 @@
/**
* Copyright (c) 2021 OceanBase
* OceanBase CE is licensed under Mulan PubL v2.
@ -8,19 +9,8 @@
* EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
* MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
* See the Mulan PubL v2 for more details.
*/
/*
*
* Version: $Id
*
* Authors:
* - initial release
*
*/
#ifndef OB_BUILD_FULL_CHARSET
#include "lib/charset/ob_dtoa.h"
#include "lib/charset/ob_mysql_global.h"
@ -48,51 +38,6 @@ size_t ob_fcvt_overflow(char *to, bool *error)
return 1;
}
size_t ob_fcvt(double x, int precision, int width, char *to, bool *error)
{
int decpt, sign;
char *res, *end, *dst= to, *dend= to + width;
char buf[DTOA_BUF_MAX_SIZE];
if (!(precision >= 0 && precision < 31 && to != NULL)) {
return 0;
}
res = dtoa(x, 5, precision, &decpt, &sign, &end, buf, sizeof(buf));
if (decpt == DTOA_OVERFLOW) {
dtoa_free(res, buf, sizeof(buf));
return ob_fcvt_overflow(to, error);
}
ob_fcvt_help(&end, &dst, &dend, sign, decpt, &precision, &res);
*dst= '\0';
if (error != NULL) {
*error= FALSE;
}
dtoa_free(res, buf, sizeof(buf));
return dst - to;
}
size_t ob_fcvt_opt(double x, int precision, int width, char *to, bool *error, bool add_padding_zero)
{
int decpt, sign;
char *res, *end, *dst= to, *dend= to + width;
char buf[DTOA_BUF_MAX_SIZE];
if (!(precision >= 0 && precision < 31 && to != NULL)) {
return 0;
}
res = dtoa(x, 5, precision, &decpt, &sign, &end, buf, sizeof(buf));
if (decpt == DTOA_OVERFLOW) {
dtoa_free(res, buf, sizeof(buf));
return ob_fcvt_overflow(to, error);
}
ob_fcvt_help_opt(&end, &dst, &dend, sign, decpt, &precision, &res, add_padding_zero);
*dst= '\0';
if (error != NULL)
*error= FALSE;
dtoa_free(res, buf, sizeof(buf));
return dst - to;
}
void ob_fcvt_help(char **end, char **dst, char **dend, int sign, int decpt,
int *precision, char **res)
{
@ -133,46 +78,29 @@ void ob_fcvt_help(char **end, char **dst, char **dend, int sign, int decpt,
}
}
void ob_fcvt_help_opt(char **end, char **dst, char **dend, int sign, int decpt,
int *precision, char **res, bool add_padding_zero)
{
const int len = (*end) - (*res);
const char *dend_ptr = *dend;
char *dst_ptr = *dst;
char *src = (*res);
int i = 0;
if (dst_ptr < dend_ptr) {
if (sign)
*dst_ptr++= '-';
if (decpt <= 0)
{
if ((dst_ptr + 1) < dend_ptr) {
*dst_ptr++= '0';
*dst_ptr++= '.';
}
for (i= decpt; i < 0 && dst_ptr < dend_ptr; i++)
*dst_ptr++= '0';
}
for (i= 1; i <= len && dst_ptr < dend_ptr; i++)
{
*dst_ptr++= *src++;
if (i == decpt && i < len && dst_ptr < dend_ptr)
*dst_ptr++= '.';
}
while (i++ <= decpt && dst_ptr < dend_ptr)
*dst_ptr++= '0';
if (*precision > 0 && add_padding_zero)
{
if (len <= decpt && dst_ptr < dend_ptr)
*dst_ptr++= '.';
for (i= *precision - OB_MAX(0, (len - decpt)); i > 0 && dst_ptr < dend_ptr; i--)
*dst_ptr++= '0';
}
*dst = dst_ptr;
}
size_t ob_fcvt(double x, int precision, int width, char *to, bool *error)
{
int decpt, sign;
char *res, *end, *dst= to, *dend= to + width;
char buf[DTOA_BUF_MAX_SIZE];
if (!(precision >= 0 && precision < 31 && to != NULL)) {
return 0;
}
res = dtoa(x, 5, precision, &decpt, &sign, &end, buf, sizeof(buf));
if (decpt == DTOA_OVERFLOW) {
dtoa_free(res, buf, sizeof(buf));
return ob_fcvt_overflow(to, error);
}
ob_fcvt_help(&end, &dst, &dend, sign, decpt, &precision, &res);
*dst= '\0';
if (error != NULL)
*error= FALSE;
dtoa_free(res, buf, sizeof(buf));
return dst - to;
}
//=================================================================================
size_t ob_gcvt_overflow(char *to, bool *error)
{
@ -270,10 +198,12 @@ void ob_gcvt_help2(int *width, int *len, char **dend, char **src,
const int need_check_buf = (*dend - *dst) < MAX_DOUBLE_SIZE;
if (need_check_buf) {
if (sign && dst_ptr < dend_ptr)
*dst_ptr++= '-';
if (dst_ptr < dend_ptr)
*dst_ptr++= *src_ptr++;
//zero
const int is_zero = (dst_ptr < dend_ptr && use_oracle_mode && (*(src_ptr - 1) == '0') && ((*len) == 1));
if (is_zero) {
if (sign) {
@ -482,11 +412,11 @@ typedef union { double d; ULong L[2]; } U;
#if defined(WORDS_BIGENDIAN) || (defined(__FLOAT_WORD_ORDER) && \
(__FLOAT_WORD_ORDER == __BIG_ENDIAN))
COPY_BIGINT WORD0(x) (x)->L[0]
#define WORD1(x) (x)->L[1]
#define word0(x) (x)->L[0]
#define word1(x) (x)->L[1]
#else
#define WORD0(x) (x)->L[1]
#define WORD1(x) (x)->L[0]
#define word0(x) (x)->L[1]
#define word1(x) (x)->L[0]
#endif
#define dval(x) (x)->d
@ -520,7 +450,7 @@ COPY_BIGINT WORD0(x) (x)->L[0]
#else
#define Flt_Rounds 1
#endif
#endif /*Flt_Rounds*/
#endif
#ifdef Honor_FLT_ROUNDS
#define Rounding rounding
@ -540,7 +470,7 @@ COPY_BIGINT WORD0(x) (x)->L[0]
#define Kmax 15
#define COPY_BIGINT(x,y) memcpy((char *)&x->sign, (char *)&y->sign, \
#define copy_bigint(x,y) memcpy((char *)&x->sign, (char *)&y->sign, \
2*sizeof(int) + y->wds*sizeof(ULong))
@ -594,13 +524,15 @@ static Bigint *alloc_bigint(int k, ObStackAllocator *alloc)
static void free_bigint(Bigint *v, ObStackAllocator *alloc)
{
if (v != NULL) {
char *g_ptr= (char*) v;
if (g_ptr < alloc->begin || g_ptr >= alloc->end) {
free(g_ptr);
char *gptr= (char*) v;
if (gptr < alloc->begin || gptr >= alloc->end) {
free(gptr);
} else if (v->k <= Kmax) {
v->p.next= alloc->freelist[v->k];
alloc->freelist[v->k]= v;
}
@ -608,6 +540,8 @@ static void free_bigint(Bigint *v, ObStackAllocator *alloc)
}
static char *dtoa_alloc(int i, ObStackAllocator *alloc)
{
char *rv;
@ -616,19 +550,26 @@ static char *dtoa_alloc(int i, ObStackAllocator *alloc)
rv = alloc->free;
alloc->free += aligned_size;
} else {
rv = (char*)malloc(i);
rv = static_cast<char*>(malloc(i));
}
return rv;
}
static void dtoa_free(char *g_ptr, char *buf, size_t buf_size)
static void dtoa_free(char *gptr, char *buf, size_t buf_size)
{
if (g_ptr < buf || g_ptr >= buf + buf_size) {
free(g_ptr);
if (gptr < buf || gptr >= buf + buf_size) {
free(gptr);
}
}
static Bigint *mult_and_add(Bigint *b, int m, int a, ObStackAllocator *alloc)
{
int i, wds;
@ -652,7 +593,7 @@ static Bigint *mult_and_add(Bigint *b, int m, int a, ObStackAllocator *alloc)
if (wds >= b->maxwds)
{
b1= alloc_bigint(b->k+1, alloc);
COPY_BIGINT(b1, b);
copy_bigint(b1, b);
free_bigint(b, alloc);
b= b1;
}
@ -1038,9 +979,9 @@ static double ulp(U *x)
register Long L;
U u;
L= (WORD0(x) & Exp_mask) - (P - 1)*Exp_msk1;
WORD0(&u) = L;
WORD1(&u) = 0;
L= (word0(x) & Exp_mask) - (P - 1)*Exp_msk1;
word0(&u) = L;
word1(&u) = 0;
return dval(&u);
}
@ -1050,8 +991,8 @@ static double b2d(Bigint *a, int *e)
ULong *xa, *xa0, w, y, z;
int k;
U d;
#define d0 WORD0(&d)
#define d1 WORD1(&d)
#define d0 word0(&d)
#define d1 word1(&d)
xa0= a->p.x;
xa= xa0 + a->wds;
@ -1090,8 +1031,8 @@ static Bigint *d2b(U *d, int *e, int *bits, ObStackAllocator *alloc)
int de, k;
ULong *x, y, z;
int i;
#define d0 WORD0(d)
#define d1 WORD1(d)
#define d0 word0(d)
#define d1 word1(d)
b= alloc_bigint(1, alloc);
x= b->p.x;
@ -1146,11 +1087,11 @@ static double ratio(Bigint *a, Bigint *b)
dval(&db)= b2d(b, &kb);
k= ka - kb + 32*(a->wds - b->wds);
if (k > 0)
WORD0(&da)+= k*Exp_msk1;
word0(&da)+= k*Exp_msk1;
else
{
k= -k;
WORD0(&db)+= k*Exp_msk1;
word0(&db)+= k*Exp_msk1;
}
return dval(&da) / dval(&db);
}
@ -1438,16 +1379,16 @@ static double ob_strtod_int(const char *s00, char **se, int *error, char *buf, s
{
case 0:
case 3:
WORD0(&rv)= Big0;
WORD1(&rv)= Big1;
word0(&rv)= Big0;
word1(&rv)= Big1;
break;
default:
WORD0(&rv)= Exp_mask;
WORD1(&rv)= 0;
word0(&rv)= Exp_mask;
word1(&rv)= 0;
}
#else
WORD0(&rv)= Exp_mask;
WORD1(&rv)= 0;
word0(&rv)= Exp_mask;
word1(&rv)= 0;
#endif
#ifdef SET_INEXACT
dval(&rv0)= 1e300;
@ -1461,17 +1402,17 @@ static double ob_strtod_int(const char *s00, char **se, int *error, char *buf, s
for(j= 0; e1 > 1; j++, e1>>= 1)
if (e1 & 1)
dval(&rv)*= bigtens[j];
WORD0(&rv)-= P*Exp_msk1;
word0(&rv)-= P*Exp_msk1;
dval(&rv)*= bigtens[j];
if ((z= WORD0(&rv) & Exp_mask) > Exp_msk1 * (DBL_MAX_EXP + Bias - P))
if ((z= word0(&rv) & Exp_mask) > Exp_msk1 * (DBL_MAX_EXP + Bias - P))
goto ovfl;
if (z > Exp_msk1 * (DBL_MAX_EXP + Bias - 1 - P))
{
WORD0(&rv)= Big0;
WORD1(&rv)= Big1;
word0(&rv)= Big0;
word1(&rv)= Big1;
}
else
WORD0(&rv)+= P*Exp_msk1;
word0(&rv)+= P*Exp_msk1;
}
}
else if (e1 < 0)
@ -1488,18 +1429,18 @@ static double ob_strtod_int(const char *s00, char **se, int *error, char *buf, s
for(j= 0; e1 > 0; j++, e1>>= 1)
if (e1 & 1)
dval(&rv)*= tinytens[j];
if (scale && (j = 2 * P + 1 - ((WORD0(&rv) & Exp_mask) >> Exp_shift)) > 0)
if (scale && (j = 2 * P + 1 - ((word0(&rv) & Exp_mask) >> Exp_shift)) > 0)
{
if (j >= 32)
{
WORD1(&rv)= 0;
word1(&rv)= 0;
if (j >= 53)
WORD0(&rv)= (P + 2) * Exp_msk1;
word0(&rv)= (P + 2) * Exp_msk1;
else
WORD0(&rv)&= 0xffffffff << (j - 32);
word0(&rv)&= 0xffffffff << (j - 32);
}
else
WORD1(&rv)&= 0xffffffff << j;
word1(&rv)&= 0xffffffff << j;
}
if (!dval(&rv))
{
@ -1517,8 +1458,8 @@ static double ob_strtod_int(const char *s00, char **se, int *error, char *buf, s
for(;;)
{
bd= alloc_bigint(bd0->k, &alloc);
COPY_BIGINT(bd, bd0);
bb= d2b(&rv, &bbe, &bbbits, &alloc);
copy_bigint(bd, bd0);
bb= d2b(&rv, &bbe, &bbbits, &alloc);
bs= integer2bigint(1, &alloc);
if (e >= 0)
@ -1541,7 +1482,7 @@ static double ob_strtod_int(const char *s00, char **se, int *error, char *buf, s
bs2++;
#endif
j= bbe - scale;
i= j + bbbits - 1;
i= j + bbbits - 1;
if (i < Emin)
j+= P - Emin;
else
@ -1600,9 +1541,9 @@ static double ob_strtod_int(const char *s00, char **se, int *error, char *buf, s
else if (!dsign)
{
adj.d= -1.;
if (!WORD1(&rv) && !(WORD0(&rv) & Frac_mask))
if (!word1(&rv) && !(word0(&rv) & Frac_mask))
{
y= WORD0(&rv) & Exp_mask;
y= word0(&rv) & Exp_mask;
if (!scale || y > 2*P*Exp_msk1)
{
delta= left_shift(delta, Log2P, &alloc);
@ -1611,8 +1552,8 @@ static double ob_strtod_int(const char *s00, char **se, int *error, char *buf, s
}
}
apply_adj:
if (scale && (y= WORD0(&rv) & Exp_mask) <= 2 * P * Exp_msk1)
WORD0(&adj)+= (2 * P + 1) * Exp_msk1 - y;
if (scale && (y= word0(&rv) & Exp_mask) <= 2 * P * Exp_msk1)
word0(&adj)+= (2 * P + 1) * Exp_msk1 - y;
dval(&rv)+= adj.d * ulp(&rv);
}
break;
@ -1622,6 +1563,7 @@ static double ob_strtod_int(const char *s00, char **se, int *error, char *buf, s
adj.d= 1.;
if (adj.d <= 0x7ffffffe)
{
y= adj.d;
if (y != adj.d)
{
@ -1630,8 +1572,8 @@ static double ob_strtod_int(const char *s00, char **se, int *error, char *buf, s
adj.d= y;
}
}
if (scale && (y= WORD0(&rv) & Exp_mask) <= 2 * P * Exp_msk1)
WORD0(&adj)+= (2 * P + 1) * Exp_msk1 - y;
if (scale && (y= word0(&rv) & Exp_mask) <= 2 * P * Exp_msk1)
word0(&adj)+= (2 * P + 1) * Exp_msk1 - y;
adj.d*= ulp(&rv);
if (dsign)
dval(&rv)+= adj.d;
@ -1643,8 +1585,8 @@ static double ob_strtod_int(const char *s00, char **se, int *error, char *buf, s
if (i < 0)
{
if (dsign || WORD1(&rv) || WORD0(&rv) & Bndry_mask ||
(WORD0(&rv) & Exp_mask) <= (2 * P + 1) * Exp_msk1)
if (dsign || word1(&rv) || word0(&rv) & Bndry_mask ||
(word0(&rv) & Exp_mask) <= (2 * P + 1) * Exp_msk1)
{
#ifdef SET_INEXACT
if (!delta->x[0] && delta->wds <= 1)
@ -1668,25 +1610,24 @@ static double ob_strtod_int(const char *s00, char **se, int *error, char *buf, s
{
if (dsign)
{
if ((WORD0(&rv) & Bndry_mask1) == Bndry_mask1 &&
WORD1(&rv) ==
((scale && (y = WORD0(&rv) & Exp_mask) <= 2 * P * Exp_msk1) ?
if ((word0(&rv) & Bndry_mask1) == Bndry_mask1 &&
word1(&rv) ==
((scale && (y = word0(&rv) & Exp_mask) <= 2 * P * Exp_msk1) ?
(0xffffffff & (0xffffffff << (2*P+1-(y>>Exp_shift)))) :
0xffffffff))
{
WORD0(&rv)= (WORD0(&rv) & Exp_mask) + Exp_msk1;
WORD1(&rv) = 0;
word0(&rv)= (word0(&rv) & Exp_mask) + Exp_msk1;
word1(&rv) = 0;
dsign = 0;
break;
}
}
else if (!(WORD0(&rv) & Bndry_mask) && !WORD1(&rv))
else if (!(word0(&rv) & Bndry_mask) && !word1(&rv))
{
drop_down:
if (scale)
{
L= WORD0(&rv) & Exp_mask;
L= word0(&rv) & Exp_mask;
if (L <= (2 *P + 1) * Exp_msk1)
{
if (L > (P + 2) * Exp_msk1)
@ -1694,12 +1635,12 @@ static double ob_strtod_int(const char *s00, char **se, int *error, char *buf, s
goto undfl;
}
}
L= (WORD0(&rv) & Exp_mask) - Exp_msk1;
WORD0(&rv)= L | Bndry_mask1;
WORD1(&rv)= 0xffffffff;
L= (word0(&rv) & Exp_mask) - Exp_msk1;
word0(&rv)= L | Bndry_mask1;
word1(&rv)= 0xffffffff;
break;
}
if (!(WORD1(&rv) & LSB))
if (!(word1(&rv) & LSB))
break;
if (dsign)
dval(&rv)+= ulp(&rv);
@ -1716,9 +1657,9 @@ static double ob_strtod_int(const char *s00, char **se, int *error, char *buf, s
{
if (dsign)
aadj= aadj1= 1.;
else if (WORD1(&rv) || WORD0(&rv) & Bndry_mask)
else if (word1(&rv) || word0(&rv) & Bndry_mask)
{
if (WORD1(&rv) == Tiny1 && !WORD0(&rv))
if (word1(&rv) == Tiny1 && !word0(&rv))
goto undfl;
aadj= 1.;
aadj1= -1.;
@ -1751,24 +1692,24 @@ static double ob_strtod_int(const char *s00, char **se, int *error, char *buf, s
aadj1+= 0.5;
#endif
}
y= WORD0(&rv) & Exp_mask;
y= word0(&rv) & Exp_mask;
if (y == Exp_msk1 * (DBL_MAX_EXP + Bias - 1))
{
dval(&rv0)= dval(&rv);
WORD0(&rv)-= P * Exp_msk1;
word0(&rv)-= P * Exp_msk1;
adj.d= aadj1 * ulp(&rv);
dval(&rv)+= adj.d;
if ((WORD0(&rv) & Exp_mask) >= Exp_msk1 * (DBL_MAX_EXP + Bias - P))
if ((word0(&rv) & Exp_mask) >= Exp_msk1 * (DBL_MAX_EXP + Bias - P))
{
if (WORD0(&rv0) == Big0 && WORD1(&rv0) == Big1)
if (word0(&rv0) == Big0 && word1(&rv0) == Big1)
goto ovfl;
WORD0(&rv)= Big0;
WORD1(&rv)= Big1;
word0(&rv)= Big0;
word1(&rv)= Big1;
goto cont;
}
else
WORD0(&rv)+= P * Exp_msk1;
word0(&rv)+= P * Exp_msk1;
}
else
{
@ -1782,7 +1723,7 @@ static double ob_strtod_int(const char *s00, char **se, int *error, char *buf, s
aadj1= dsign ? aadj : -aadj;
}
dval(&aadj2) = aadj1;
WORD0(&aadj2)+= (2 * P + 1) * Exp_msk1 - y;
word0(&aadj2)+= (2 * P + 1) * Exp_msk1 - y;
aadj1= dval(&aadj2);
adj.d= aadj1 * ulp(&rv);
dval(&rv)+= adj.d;
@ -1795,14 +1736,14 @@ static double ob_strtod_int(const char *s00, char **se, int *error, char *buf, s
dval(&rv)+= adj.d;
}
}
z= WORD0(&rv) & Exp_mask;
z= word0(&rv) & Exp_mask;
#ifndef SET_INEXACT
if (!scale)
if (y == z)
{
L= (Long)aadj;
aadj-= L;
if (dsign || WORD1(&rv) || WORD0(&rv) & Bndry_mask)
if (dsign || word1(&rv) || word0(&rv) & Bndry_mask)
{
if (aadj < .4999999 || aadj > .5000001)
break;
@ -1822,8 +1763,8 @@ static double ob_strtod_int(const char *s00, char **se, int *error, char *buf, s
{
if (!oldinexact)
{
WORD0(&rv0)= Exp_1 + (70 << Exp_shift);
WORD1(&rv0)= 0;
word0(&rv0)= Exp_1 + (70 << Exp_shift);
word1(&rv0)= 0;
dval(&rv0)+= 1.;
}
}
@ -1832,13 +1773,14 @@ static double ob_strtod_int(const char *s00, char **se, int *error, char *buf, s
#endif
if (scale)
{
WORD0(&rv0)= Exp_1 - 2 * P * Exp_msk1;
WORD1(&rv0)= 0;
word0(&rv0)= Exp_1 - 2 * P * Exp_msk1;
word1(&rv0)= 0;
dval(&rv)*= dval(&rv0);
}
#ifdef SET_INEXACT
if (inexact && !(WORD0(&rv) & Exp_mask))
if (inexact && !(word0(&rv) & Exp_mask))
{
dval(&rv0)= 1e-300;
dval(&rv0)*= dval(&rv0);
}
@ -1868,7 +1810,7 @@ static int quorem(Bigint *b, Bigint *S)
sxe= sx + --n;
bx= b->p.x;
bxe= bx + n;
q= *bxe / (*sxe + 1);
q= *bxe / (*sxe + 1);
if (q)
{
borrow= 0;
@ -1941,16 +1883,15 @@ static char *dtoa(double dd, int mode, int ndigits, int *decpt, int *sign,
memset(alloc.freelist, 0, sizeof(alloc.freelist));
u.d= dd;
if (WORD0(&u) & Sign_bit)
if (word0(&u) & Sign_bit)
{
*sign= 1;
WORD0(&u) &= ~Sign_bit;
word0(&u) &= ~Sign_bit;
}
else
*sign= 0;
if (((WORD0(&u) & Exp_mask) == Exp_mask && (*decpt= DTOA_OVERFLOW)) ||
if (((word0(&u) & Exp_mask) == Exp_mask && (*decpt= DTOA_OVERFLOW)) ||
(!dval(&u) && (*decpt= 1)))
{
char *res= (char*) dtoa_alloc(2, &alloc);
@ -1973,11 +1914,11 @@ static char *dtoa(double dd, int mode, int ndigits, int *decpt, int *sign,
#endif
b= d2b(&u, &be, &bbits, &alloc);
if ((i= (int)(WORD0(&u) >> Exp_shift1 & (Exp_mask>>Exp_shift1))))
if ((i= (int)(word0(&u) >> Exp_shift1 & (Exp_mask>>Exp_shift1))))
{
dval(&d2)= dval(&u);
WORD0(&d2) &= Frac_mask1;
WORD0(&d2) |= Exp_11;
word0(&d2) &= Frac_mask1;
word0(&d2) |= Exp_11;
i-= Bias;
@ -1987,17 +1928,17 @@ static char *dtoa(double dd, int mode, int ndigits, int *decpt, int *sign,
{
i= bbits + be + (Bias + (P-1) - 1);
x= i > 32 ? WORD0(&u) << (64 - i) | WORD1(&u) >> (i - 32)
: WORD1(&u) << (32 - i);
x= i > 32 ? word0(&u) << (64 - i) | word1(&u) >> (i - 32)
: word1(&u) << (32 - i);
dval(&d2)= x;
WORD0(&d2)-= 31*Exp_msk1;
word0(&d2)-= 31*Exp_msk1;
i-= (Bias + (P-1) - 1) + 1;
denorm= 1;
}
ds= (dval(&d2)-1.5)*0.289529654602168 + 0.1760912590558 + i*0.301029995663981;
k= (int)ds;
if (ds < 0. && ds != k)
k--;
k--;
k_check= 1;
if (k >= 0 && k <= Ten_pmax)
{
@ -2079,7 +2020,7 @@ static char *dtoa(double dd, int mode, int ndigits, int *decpt, int *sign,
dval(&d2)= dval(&u);
k0= k;
ilim0= ilim;
ieps= 2;
ieps= 2;
if (k > 0)
{
ds= tens[k&0xf];
@ -2122,7 +2063,7 @@ static char *dtoa(double dd, int mode, int ndigits, int *decpt, int *sign,
ieps++;
}
dval(&eps)= ieps*dval(&u) + 7.;
WORD0(&eps)-= (P-1)*Exp_msk1;
word0(&eps)-= (P-1)*Exp_msk1;
if (ilim == 0)
{
S= mhi= 0;
@ -2284,8 +2225,8 @@ bump_up:
#endif
)
{
if (!WORD1(&u) && !(WORD0(&u) & Bndry_mask) &&
(WORD0(&u) & (Exp_mask & (~Exp_msk1)))
if (!word1(&u) && !(word0(&u) & Bndry_mask) &&
(word0(&u) & (Exp_mask & (~Exp_msk1)))
)
{
b2+= Log2P;
@ -2347,7 +2288,7 @@ one_digit:
if (spec_case)
{
mhi= alloc_bigint(mhi->k, &alloc);
COPY_BIGINT(mhi, mlo);
copy_bigint(mhi, mlo);
mhi= left_shift(mhi, Log2P, &alloc);
}
@ -2358,7 +2299,7 @@ one_digit:
delta= bigint_diff(S, mhi, &alloc);
j1= delta->sign ? 1 : bigint_cmp(b, delta);
free_bigint(delta, &alloc);
if (j1 == 0 && mode != 1 && !(WORD1(&u) & 1)
if (j1 == 0 && mode != 1 && !(word1(&u) & 1)
#ifdef Honor_FLT_ROUNDS
&& rounding >= 1
#endif
@ -2371,7 +2312,7 @@ one_digit:
*s++= dig;
goto ret;
}
if (j < 0 || (j == 0 && mode != 1 && !(WORD1(&u) & 1)))
if (j < 0 || (j == 0 && mode != 1 && !(word1(&u) & 1)))
{
if (!b->p.x[0] && b->wds <= 1)
{
@ -2487,5 +2428,4 @@ ret1:
}
#undef P
#endif
#undef Rounding

View File

@ -46,38 +46,38 @@
these arrays plus CHINESE_WEIGHT_BASE.
*/
static const uint PINYIN_2_BYTE_START_2022 = 0x8140;
static const uint PINYIN_2_BYTE_END_2022 = 0xFE9F;
static const unsigned int PINYIN_2_BYTE_START_2022 = 0x8140;
static const unsigned int PINYIN_2_BYTE_END_2022 = 0xFE9F;
static const uint PINYIN_4_BYTE_1_START_2022 = 0x8138FD38;
static const uint PINYIN_4_1_DIFF_2022 = 11328;
static const uint PINYIN_4_BYTE_1_END_2022 = 0x82359737;
static const unsigned int PINYIN_4_BYTE_1_START_2022 = 0x8138FD38;
static const unsigned int PINYIN_4_1_DIFF_2022 = 11328;
static const unsigned int PINYIN_4_BYTE_1_END_2022 = 0x82359737;
static const uint PINYIN_4_BYTE_2_START_2022 = 0x95328236;
static const uint PINYIN_4_2_DIFF_2022 = 254536;
static const uint PINYIN_4_BYTE_2_END_2022 = 0x9A37F738;
static const unsigned int PINYIN_4_BYTE_2_START_2022 = 0x95328236;
static const unsigned int PINYIN_4_2_DIFF_2022 = 254536;
static const unsigned int PINYIN_4_BYTE_2_END_2022 = 0x9A37F738;
static const uint STROKE_2_BYTE_START_2022 = 0x8140;
static const uint STROKE_2_BYTE_END_2022 = 0xFE9F;
static const unsigned int STROKE_2_BYTE_START_2022 = 0x8140;
static const unsigned int STROKE_2_BYTE_END_2022 = 0xFE9F;
static const uint STROKE_4_BYTE_1_START_2022 = 0x8138FD38;
static const uint STROKE_4_1_DIFF_2022 = 11328;
static const uint STROKE_4_BYTE_1_END_2022 = 0x82359832;
static const unsigned int STROKE_4_BYTE_1_START_2022 = 0x8138FD38;
static const unsigned int STROKE_4_1_DIFF_2022 = 11328;
static const unsigned int STROKE_4_BYTE_1_END_2022 = 0x82359832;
static const uint STROKE_4_BYTE_2_START_2022 = 0x95328236;
static const uint STROKE_4_2_DIFF_2022 = 254536;
static const uint STROKE_4_BYTE_2_END_2022 = 0x9B31A337;
static const unsigned int STROKE_4_BYTE_2_START_2022 = 0x95328236;
static const unsigned int STROKE_4_2_DIFF_2022 = 254536;
static const unsigned int STROKE_4_BYTE_2_END_2022 = 0x9B31A337;
static const uint RADICAL_2_BYTE_START_2022 = 0x8140;
static const uint RADICAL_2_BYTE_END_2022 = 0xFEA0;
static const unsigned int RADICAL_2_BYTE_START_2022 = 0x8140;
static const unsigned int RADICAL_2_BYTE_END_2022 = 0xFEA0;
static const uint RADICAL_4_BYTE_1_START_2022 = 0x8139EE39;
static const uint RADICAL_4_1_DIFF_2022 = 12439;
static const uint RADICAL_4_BYTE_1_END_2022 = 0x8430AE33;
static const unsigned int RADICAL_4_BYTE_1_START_2022 = 0x8139EE39;
static const unsigned int RADICAL_4_1_DIFF_2022 = 12439;
static const unsigned int RADICAL_4_BYTE_1_END_2022 = 0x8430AE33;
static const uint RADICAL_4_BYTE_2_START_2022 = 0x95328236;
static const uint RADICAL_4_2_DIFF_2022 = 254536;
static const uint RADICAL_4_BYTE_2_END_2022 = 0x9B31A337;
static const unsigned int RADICAL_4_BYTE_2_START_2022 = 0x95328236;
static const unsigned int RADICAL_4_2_DIFF_2022 = 254536;
static const unsigned int RADICAL_4_BYTE_2_END_2022 = 0x9B31A337;
static const uint16 gb18030_2022_2_pinyin_weight_py[] = {
16323, 28217, 34164, 10708, 21648, 4104, 28850, 6524, 26203, 18824, 39282, 1913, 15200, 13279, 14725, 10029, /*[GB+8140, GB+814F]*/

View File

@ -0,0 +1,235 @@
/**
* Copyright (c) 2021 OceanBase
* OceanBase CE is licensed under Mulan PubL v2.
* You can use this software according to the terms and conditions of the Mulan PubL v2.
* You may obtain a copy of Mulan PubL v2 at:
* http://license.coscl.org.cn/MulanPubL-2.0
* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND,
* EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
* MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
* See the Mulan PubL v2 for more details.
*/
#ifndef STR_UCA_TYPE_H
#define STR_UCA_TYPE_H
#include <vector>
/*
So far we have only Croatian collation needs to reorder Latin and
Cyrillic group of characters. May add more in future.
*/
#define UCA_MAX_CHAR_GRP 4
enum enum_uca_ver { UCA_V400, UCA_V520, UCA_V900 };
enum enum_char_grp {
CHARGRP_NONE,
CHARGRP_CORE,
CHARGRP_LATIN,
CHARGRP_CYRILLIC,
CHARGRP_ARAB,
CHARGRP_KANA,
CHARGRP_OTHERS
};
struct Weight_boundary {
uint16 begin;
uint16 end;
};
struct Reorder_wt_rec {
struct Weight_boundary old_wt_bdy;
struct Weight_boundary new_wt_bdy;
};
struct Reorder_param {
enum enum_char_grp reorder_grp[UCA_MAX_CHAR_GRP];
struct Reorder_wt_rec wt_rec[2 * UCA_MAX_CHAR_GRP];
int wt_rec_num;
uint16 max_weight;
};
enum enum_case_first { CASE_FIRST_OFF, CASE_FIRST_UPPER, CASE_FIRST_LOWER };
struct Coll_param {
struct Reorder_param *reorder_param;
bool norm_enabled; // false = normalization off, default;
// true = on
enum enum_case_first case_first;
};
/*
NOTE: If you change OB_UCA_MAX_CONTRACTION, be sure to update the comment on
OB_UCA_CNT_MID1 in strings/uca_data.h, as it might cause us to run out of
bits in a byte flag.
*/
#define OB_UCA_MAX_CONTRACTION 6
#define OB_UCA_MAX_WEIGHT_SIZE 25
#define OB_UCA_WEIGHT_LEVELS 1
/*
We store all the contractions in a trie, indexed on the codepoints they
consist of. The trie is organized as:
1. Each node stores one code point (ch) of contraction, and a list of nodes
(child_nodes) store all possible following code points.
2. The vector in ObUCAInfo stores a list of nodes which store the first
code points of all contractions.
3. Each node has a boolean value (is_contraction_tail) which shows
whether the code point stored in the node is the end of a contraction.
This is necessary because even if one code point is the end of a
contraction, there might be longer contraction contains all the
code points in the path (e.g., for Hungarian, both 'DZ' and 'DZS' are
contractions).
4. A contraction is formed by all the code points in the path until the
end of the contraction.
5. If it is the end of a contraction (is_contraction_tail == true), the
weight of this contraction is stored in array weight.
6. If it is the end of a contraction (is_contraction_tail == true),
with_context shows whether it is common contraction (with_context ==
false), or previous context contraction (with_context == true).
7. If it is the end of a contraction (is_contraction_tail == true),
contraction_len shows how many code points this contraction consists of.
*/
struct ObContraction {
ob_wc_t ch;
// Lists of following nodes.
std::vector<ObContraction> child_nodes;
std::vector<ObContraction> child_nodes_context;
// weight and with_context are only useful when is_contraction_tail is true.
uint16 weight[OB_UCA_MAX_WEIGHT_SIZE]; /* Its weight string, 0-terminated */
bool is_contraction_tail;
size_t contraction_len;
};
struct ObUCAInfo {
enum enum_uca_ver version;
// Collation weights.
ob_wc_t maxchar;
uchar *lengths;
uint16 **weights;
bool have_contractions;
std::vector<ObContraction> *contraction_nodes;
/*
contraction_flags is only used when a collation has contraction rule.
UCA collation supports at least 65535 characters, but only a few of
them can be part of contraction, it is huge waste of time to find out
whether one character is in contraction list for every character.
contraction_flags points to memory which is allocated when a collation
has contraction rule. For a character in contraction, its corresponding
byte (contraction_flags[ch & 0x1000]) will be set to a certain value
according to the position (head, tail or middle) of this character in
contraction. This byte will be used to quick check whether one character
can be part of contraction.
*/
char *contraction_flags;
/* Logical positions */
ob_wc_t first_non_ignorable;
ob_wc_t last_non_ignorable;
ob_wc_t first_primary_ignorable;
ob_wc_t last_primary_ignorable;
ob_wc_t first_secondary_ignorable;
ob_wc_t last_secondary_ignorable;
ob_wc_t first_tertiary_ignorable;
ob_wc_t last_tertiary_ignorable;
ob_wc_t first_trailing;
ob_wc_t last_trailing;
ob_wc_t first_variable;
ob_wc_t last_variable;
/*
extra_ce_pri_base, extra_ce_sec_base and extra_ce_ter_base are only used for
the UCA collations whose UCA version is not smaller than UCA_V900. For why
we need this extra CE, please see the comment in my_char_weight_put_900()
and apply_primary_shift_900().
The value of these three variables is set by the definition of my_uca_v900.
The value of extra_ce_pri_base is usually 0x54A4 (which is the maximum
regular weight value pluses one, 0x54A3 + 1 = 0x54A4). But for the Chinese
collation, the extra_ce_pri_base needs to change. This is because 0x54A4 has
been occupied to do reordering. There might be weight conflict if we still
use 0x54A4. Please also see the comment on modify_all_zh_pages().
*/
uint16 extra_ce_pri_base; // Primary weight of extra CE
uint16 extra_ce_sec_base; // Secondary weight of extra CE
uint16 extra_ce_ter_base; // Tertiary weight of extra CE
};
#define OB_UCA_CNT_FLAG_SIZE 4096
#define OB_UCA_CNT_FLAG_MASK 4095
/** Whether the given character can be the first in any contraction. */
#define OB_UCA_CNT_HEAD 1
/** Whether the given character can be the last in any contraction. */
#define OB_UCA_CNT_TAIL 2
/**
Whether the given character can be the second in any contraction.
Also defined implicitly through shifting OB_UCA_CNT_MID1:
\#define OB_UCA_CNT_MID2 8
\#define OB_UCA_CNT_MID3 16
\#define OB_UCA_CNT_MID4 32
There's no need for OB_UCA_CNT_MID5 (which would cause us to run out of
bits) since OB_UCA_MAX_CONTRACTION is 6 (so head, four in the middle,
and then tail).
*/
#define OB_UCA_CNT_MID1 4
/**
Whether the given character is the first part of a context-sensitive
contraction. Context-sensitive contractions are like normal contractions,
except that for performance reasons, they trigger on the _last_ character
instead of the first. The case given in Unicode TR35 is that in some
scripts (such as katakana in Japanese), "a-" should sort as "aa"
(except on the tertiary level), "e-" should sort as "ee" and so on.
However, adding regular contractions on "a" and "e" would cause undue
performance loss, so instead, we add a special "context-sensitive"
contraction on "-" that then looks at the _previous_ character.
We don't support context-sensitive contractions longer than two characters
at the moment, since none exist in CLDR. Thus, there is no
OB_UCA_PREVIOUS_CONTEXT_MID1 and so on.
*/
#define OB_UCA_PREVIOUS_CONTEXT_HEAD 64
/** Similar to OB_UCA_PREVIOUS_CONTEXT_HEAD, just for the tail. */
#define OB_UCA_PREVIOUS_CONTEXT_TAIL 128
#define OB_UCA_PSHIFT 8
/**
Check if a code point can be contraction head
@param flags Pointer to UCA contraction flag data
@param wc Code point
@retval 0 - cannot be contraction head
@retval 1 - can be contraction head
*/
inline bool ob_uca_can_be_contraction_head(const char *flags, ob_wc_t wc) {
return flags[wc & OB_UCA_CNT_FLAG_MASK] & OB_UCA_CNT_HEAD;
}
/**
Check if a code point can be contraction tail
@param flags Pointer to UCA contraction flag data
@param wc Code point
@retval 0 - cannot be contraction tail
@retval 1 - can be contraction tail
*/
inline bool ob_uca_can_be_contraction_tail(const char *flags, ob_wc_t wc) {
return flags[wc & OB_UCA_CNT_FLAG_MASK] & OB_UCA_CNT_TAIL;
}
const uint16 *ob_uca_contraction2_weight(
const std::vector<ObContraction> *cont_nodes, ob_wc_t wc1, ob_wc_t wc2);
#endif

270961
deps/oblib/src/lib/charset/uca900_data.h vendored Normal file

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,571 @@
/**
* Copyright (c) 2021 OceanBase
* OceanBase CE is licensed under Mulan PubL v2.
* You can use this software according to the terms and conditions of the Mulan PubL v2.
* You may obtain a copy of Mulan PubL v2 at:
* http://license.coscl.org.cn/MulanPubL-2.0
* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND,
* EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
* MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
* See the Mulan PubL v2 for more details.
*/
#ifndef UCA900_JA_DATA_H
#define UCA900_JA_DATA_H
// Quaternary weight of katakana.
static constexpr int JA_KATA_QUAT_WEIGHT= 0x08;
// Quaternary weight of hiragana.
static constexpr int JA_HIRA_QUAT_WEIGHT= 0x02;
static const char ja_cldr_30[]=
"&\\u309D <<<< \\u30FD"
"&[before 3]\\u3041 <<<\\u3041|\\u30FC=\\u3042|\\u30FC=\\u304B|\\u30FC"
"=\\u3095|\\u30FC=\\u304C|\\u30FC=\\u3055|\\u30FC"
"=\\u3056|\\u30FC=\\u305F|\\u30FC=\\u3060|\\u30FC"
"=\\u306A|\\u30FC=\\u306F|\\u30FC=\\u3070|\\u30FC"
"=\\u3071|\\u30FC=\\u307E|\\u30FC=\\u3083|\\u30FC"
"=\\u3084|\\u30FC=\\u3089|\\u30FC=\\u308E|\\u30FC"
"=\\u308F|\\u30FC"
"<<<<\\u30A1|\\u30FC=\\uFF67|\\u30FC=\\u30A2|\\u30FC"
"=\\uFF71|\\u30FC=\\u30AB|\\u30FC=\\uFF76|\\u30FC"
"=\\u30AC|\\u30FC=\\u30B5|\\u30FC=\\uFF7B|\\u30FC"
"=\\u30B6|\\u30FC=\\u30BF|\\u30FC=\\uFF80|\\u30FC"
"=\\u30C0|\\u30FC=\\u30CA|\\u30FC=\\uFF85|\\u30FC"
"=\\u30CF|\\u30FC=\\uFF8A|\\u30FC=\\u31F5|\\u30FC"
"=\\u30D0|\\u30FC=\\u30D1|\\u30FC=\\u30DE|\\u30FC"
"=\\uFF8F|\\u30FC=\\u30E3|\\u30FC=\\uFF6C|\\u30FC"
"=\\u30E4|\\u30FC=\\uFF94|\\u30FC=\\u30E9|\\u30FC"
"=\\uFF97|\\u30FC=\\u31FB|\\u30FC=\\u30EE|\\u30FC"
"=\\u30EF|\\u30FC=\\uFF9C|\\u30FC=\\u30F5|\\u30FC"
"=\\u30F7|\\u30FC"
"&[before 3]\\u3043 <<<\\u3043|\\u30FC=\\u3044|\\u30FC=\\u304D|\\u30FC"
"=\\u304E|\\u30FC=\\u3057|\\u30FC=\\u3058|\\u30FC"
"=\\u3061|\\u30FC=\\u3062|\\u30FC=\\u306B|\\u30FC"
"=\\u3072|\\u30FC=\\u3073|\\u30FC=\\u3074|\\u30FC"
"=\\u307F|\\u30FC=\\u308A|\\u30FC=\\u3090|\\u30FC"
"<<<<\\u30A3|\\u30FC=\\uFF68|\\u30FC=\\u30A4|\\u30FC"
"=\\uFF72|\\u30FC=\\u30AD|\\u30FC=\\uFF77|\\u30FC"
"=\\u30AE|\\u30FC=\\u30B7|\\u30FC=\\uFF7C|\\u30FC"
"=\\u31F1|\\u30FC=\\u30B8|\\u30FC=\\u30C1|\\u30FC"
"=\\uFF81|\\u30FC=\\u30C2|\\u30FC=\\u30CB|\\u30FC"
"=\\uFF86|\\u30FC=\\u30D2|\\u30FC=\\uFF8B|\\u30FC"
"=\\u31F6|\\u30FC=\\u30D3|\\u30FC=\\u30D4|\\u30FC"
"=\\u30DF|\\u30FC=\\uFF90|\\u30FC=\\u30EA|\\u30FC"
"=\\uFF98|\\u30FC=\\u31FC|\\u30FC=\\u30F0|\\u30FC"
"=\\u30F8|\\u30FC"
"&[before 3]\\u3045 <<<\\u3045|\\u30FC=\\u3046|\\u30FC=\\u304F|\\u30FC"
"=\\u3050|\\u30FC=\\u3059|\\u30FC=\\u305A|\\u30FC"
"=\\u3063|\\u30FC=\\u3064|\\u30FC=\\u3065|\\u30FC"
"=\\u306C|\\u30FC=\\u3075|\\u30FC=\\u3076|\\u30FC"
"=\\u3077|\\u30FC=\\u3080|\\u30FC=\\u3085|\\u30FC"
"=\\u3086|\\u30FC=\\u308B|\\u30FC=\\u3094|\\u30FC"
"<<<<\\u30A5|\\u30FC=\\uFF69|\\u30FC=\\u30A6|\\u30FC"
"=\\uFF73|\\u30FC=\\u30AF|\\u30FC=\\uFF78|\\u30FC"
"=\\u31F0|\\u30FC=\\u30B0|\\u30FC=\\u30B9|\\u30FC"
"=\\uFF7D|\\u30FC=\\u31F2|\\u30FC=\\u30BA|\\u30FC"
"=\\u30C3|\\u30FC=\\uFF6F|\\u30FC=\\u30C4|\\u30FC"
"=\\uFF82|\\u30FC=\\u30C5|\\u30FC=\\u30CC|\\u30FC"
"=\\uFF87|\\u30FC=\\u31F4|\\u30FC=\\u30D5|\\u30FC"
"=\\uFF8C|\\u30FC=\\u31F7|\\u30FC=\\u30D6|\\u30FC"
"=\\u30D7|\\u30FC=\\u30E0|\\u30FC=\\uFF91|\\u30FC"
"=\\u31FA|\\u30FC=\\u30E5|\\u30FC=\\uFF6D|\\u30FC"
"=\\u30E6|\\u30FC=\\uFF95|\\u30FC=\\u30EB|\\u30FC"
"=\\uFF99|\\u30FC=\\u31FD|\\u30FC=\\u30F4|\\u30FC"
"&[before 3]\\u3047 <<<\\u3047|\\u30FC=\\u3048|\\u30FC=\\u3051|\\u30FC"
"=\\u3096|\\u30FC=\\u3052|\\u30FC=\\u305B|\\u30FC"
"=\\u305C|\\u30FC=\\u3066|\\u30FC=\\u3067|\\u30FC"
"=\\u306D|\\u30FC=\\u3078|\\u30FC=\\u3079|\\u30FC"
"=\\u307A|\\u30FC=\\u3081|\\u30FC=\\u308C|\\u30FC"
"=\\u3091|\\u30FC"
"<<<<\\u30A7|\\u30FC=\\uFF6A|\\u30FC=\\u30A8|\\u30FC"
"=\\uFF74|\\u30FC=\\u30B1|\\u30FC=\\uFF79|\\u30FC"
"=\\u30B2|\\u30FC=\\u30BB|\\u30FC=\\uFF7E|\\u30FC"
"=\\u30BC|\\u30FC=\\u30C6|\\u30FC=\\uFF83|\\u30FC"
"=\\u30C7|\\u30FC=\\u30CD|\\u30FC=\\uFF88|\\u30FC"
"=\\u30D8|\\u30FC=\\uFF8D|\\u30FC=\\u31F8|\\u30FC"
"=\\u30D9|\\u30FC=\\u30DA|\\u30FC=\\u30E1|\\u30FC"
"=\\uFF92|\\u30FC=\\u30EC|\\u30FC=\\uFF9A|\\u30FC"
"=\\u31FE|\\u30FC=\\u30F1|\\u30FC=\\u30F6|\\u30FC"
"=\\u30F9|\\u30FC"
"&[before 3]\\u3049 <<<\\u3049|\\u30FC=\\u304A|\\u30FC=\\u3053|\\u30FC"
"=\\u3054|\\u30FC=\\u305D|\\u30FC=\\u305E|\\u30FC"
"=\\u3068|\\u30FC=\\u3069|\\u30FC=\\u306E|\\u30FC"
"=\\u307B|\\u30FC=\\u307C|\\u30FC=\\u307D|\\u30FC"
"=\\u3082|\\u30FC=\\u3087|\\u30FC=\\u3088|\\u30FC"
"=\\u308D|\\u30FC=\\u3092|\\u30FC"
"<<<<\\u30A9|\\u30FC=\\uFF6B|\\u30FC=\\u30AA|\\u30FC"
"=\\uFF75|\\u30FC=\\u30B3|\\u30FC=\\uFF7A|\\u30FC"
"=\\u30B4|\\u30FC=\\u30BD|\\u30FC=\\uFF7F|\\u30FC"
"=\\u30BE|\\u30FC=\\u30C8|\\u30FC=\\uFF84|\\u30FC"
"=\\u31F3|\\u30FC=\\u30C9|\\u30FC=\\u30CE|\\u30FC"
"=\\uFF89|\\u30FC=\\u30DB|\\u30FC=\\uFF8E|\\u30FC"
"=\\u31F9|\\u30FC=\\u30DC|\\u30FC=\\u30DD|\\u30FC"
"=\\u30E2|\\u30FC=\\uFF93|\\u30FC=\\u30E7|\\u30FC"
"=\\uFF6E|\\u30FC=\\u30E8|\\u30FC=\\uFF96|\\u30FC"
"=\\u30ED|\\u30FC=\\uFF9B|\\u30FC=\\u31FF|\\u30FC"
"=\\u30F2|\\u30FC=\\uFF66|\\u30FC=\\u30FA|\\u30FC"
"&[before 3]\\u3042 <<<\\u3042|\\u309D=\\u3041|\\u309D"
"<<<<\\u30A2|\\u30FD=\\uFF71|\\u30FD=\\u30A1|\\u30FD"
"=\\uFF67|\\u30FD"
"&[before 3]\\u3044 <<<\\u3044|\\u309D=\\u3043|\\u309D"
"<<<<\\u30A4|\\u30FD=\\uFF72|\\u30FD=\\u30A3|\\u30FD"
"=\\uFF68|\\u30FD"
"&[before 3]\\u3046 <<<\\u3046|\\u309D=\\u3045|\\u309D=\\u3094|\\u309D"
"=\\u3046|\\u309E/\\u3099"
"=\\u3045|\\u309E/\\u3099"
"=\\u3094|\\u309E/\\u3099"
"<<<<\\u30A6|\\u30FD=\\uFF73|\\u30FD=\\u30A5|\\u30FD"
"=\\uFF69|\\u30FD=\\u30F4|\\u30FD"
"=\\u30A6|\\u30FE/\\u3099"
"=\\uFF73|\\u30FE/\\u3099"
"=\\u30A5|\\u30FE/\\u3099"
"=\\uFF69|\\u30FE/\\u3099"
"=\\u30F4|\\u30FE/\\u3099"
"&[before 3]\\u3048 <<<\\u3048|\\u309D=\\u3047|\\u309D"
"<<<<\\u30A8|\\u30FD=\\uFF74|\\u30FD=\\u30A7|\\u30FD"
"=\\uFF6A|\\u30FD"
"&[before 3]\\u304A <<<\\u304A|\\u309D=\\u3049|\\u309D"
"<<<<\\u30AA|\\u30FD=\\uFF75|\\u30FD=\\u30A9|\\u30FD"
"=\\uFF6B|\\u30FD"
"&[before 3]\\u304B <<<\\u304B|\\u309D=\\u3095|\\u309D"
"<<<<\\u30AB|\\u30FD=\\uFF76|\\u30FD=\\u30F5|\\u30FD"
"&[before 3]\\u304C <<<\\u304C|\\u309D <<<<\\u30AC|\\u30FD"
"&[before 3]\\u304D <<<\\u304D|\\u309D=\\u304E|\\u309D"
"=\\u304D|\\u309E/\\u3099"
"=\\u304E|\\u309E/\\u3099"
"<<<<\\u30AD|\\u30FD=\\uFF77|\\u30FD=\\u30AE|\\u30FD"
"=\\u30AD|\\u30FE/\\u3099"
"=\\uFF77|\\u30FE/\\u3099"
"=\\u30AE|\\u30FE/\\u3099"
"&[before 3]\\u304F <<<\\u304F|\\u309D=\\u3050|\\u309D"
"=\\u304F|\\u309E/\\u3099"
"=\\u3050|\\u309E/\\u3099"
"<<<<\\u30AF|\\u30FD=\\uFF78|\\u30FD=\\u31F0|\\u30FD"
"=\\u30B0|\\u30FD=\\u30AF|\\u30FE/\\u3099"
"=\\uFF78|\\u30FE/\\u3099"
"=\\u31F0|\\u30FE/\\u3099"
"=\\u30B0|\\u30FE/\\u3099"
"&[before 3]\\u3051 <<<\\u3051|\\u309D=\\u3096|\\u309D"
"<<<<\\u30B1|\\u30FD=\\uFF79|\\u30FD=\\u30F6|\\u30FD"
"&[before 3]\\u3052 <<<\\u3052|\\u309D <<<<\\u30B2|\\u30FD"
"&[before 3]\\u3053 <<<\\u3053|\\u309D=\\u3054|\\u309D"
"=\\u3053|\\u309E/\\u3099"
"=\\u3054|\\u309E/\\u3099"
"<<<<\\u30B3|\\u30FD=\\uFF7A|\\u30FD=\\u30B4|\\u30FD"
"=\\u30B3|\\u30FE/\\u3099"
"=\\uFF7A|\\u30FE/\\u3099"
"=\\u30B4|\\u30FE/\\u3099"
"&[before 3]\\u3055 <<<\\u3055|\\u309D=\\u3056|\\u309D"
"=\\u3055|\\u309E/\\u3099"
"=\\u3056|\\u309E/\\u3099"
"<<<<\\u30B5|\\u30FD=\\uFF7B|\\u30FD=\\u30B6|\\u30FD"
"=\\u30B5|\\u30FE/\\u3099"
"=\\uFF7B|\\u30FE/\\u3099"
"=\\u30B6|\\u30FE/\\u3099"
"&[before 3]\\u3057 <<<\\u3057|\\u309D=\\u3058|\\u309D"
"=\\u3057|\\u309E/\\u3099"
"=\\u3058|\\u309E/\\u3099"
"<<<<\\u30B7|\\u30FD=\\uFF7C|\\u30FD=\\u31F1|\\u30FD"
"=\\u30B8|\\u30FD=\\u30B7|\\u30FE/\\u3099"
"=\\uFF7C|\\u30FE/\\u3099"
"=\\u31F1|\\u30FE/\\u3099"
"=\\u30B8|\\u30FE/\\u3099"
"&[before 3]\\u3059 <<<\\u3059|\\u309D=\\u305A|\\u309D"
"=\\u3059|\\u309E/\\u3099"
"=\\u305A|\\u309E/\\u3099"
"<<<<\\u30B9|\\u30FD=\\uFF7D|\\u30FD=\\u31F2|\\u30FD"
"=\\u30BA|\\u30FD=\\u30B9|\\u30FE/\\u3099"
"=\\uFF7D|\\u30FE/\\u3099"
"=\\u31F2|\\u30FE/\\u3099"
"=\\u30BA|\\u30FE/\\u3099"
"&[before 3]\\u305B <<<\\u305B|\\u309D=\\u305C|\\u309D"
"=\\u305B|\\u309E/\\u3099"
"=\\u305C|\\u309E/\\u3099"
"<<<<\\u30BB|\\u30FD=\\uFF7E|\\u30FD=\\u30BC|\\u30FD"
"=\\u30BB|\\u30FE/\\u3099"
"=\\uFF7E|\\u30FE/\\u3099"
"=\\u30BC|\\u30FE/\\u3099"
"&[before 3]\\u305D <<<\\u305D|\\u309D=\\u305E|\\u309D"
"=\\u305D|\\u309E/\\u3099"
"=\\u305E|\\u309E/\\u3099"
"<<<<\\u30BD|\\u30FD=\\uFF7F|\\u30FD=\\u30BE|\\u30FD"
"=\\u30BD|\\u30FE/\\u3099"
"=\\uFF7F|\\u30FE/\\u3099"
"=\\u30BE|\\u30FE/\\u3099"
"&[before 3]\\u305F <<<\\u305F|\\u309D=\\u3060|\\u309D"
"=\\u305F|\\u309E/\\u3099"
"=\\u3060|\\u309E/\\u3099"
"<<<<\\u30BF|\\u30FD=\\uFF80|\\u30FD=\\u30C0|\\u30FD"
"=\\u30BF|\\u30FE/\\u3099"
"=\\uFF80|\\u30FE/\\u3099"
"=\\u30C0|\\u30FE/\\u3099"
"&[before 3]\\u3061 <<<\\u3061|\\u309D=\\u3062|\\u309D"
"=\\u3061|\\u309E/\\u3099"
"=\\u3062|\\u309E/\\u3099"
"<<<<\\u30C1|\\u30FD=\\uFF81|\\u30FD=\\u30C2|\\u30FD"
"=\\u30C1|\\u30FE/\\u3099"
"=\\uFF81|\\u30FE/\\u3099"
"=\\u30C2|\\u30FE/\\u3099"
"&[before 3]\\u3064 <<<\\u3064|\\u309D=\\u3063|\\u309D=\\u3065|\\u309D"
"=\\u3064|\\u309E/\\u3099"
"=\\u3065|\\u309E/\\u3099"
"=\\u3064|\\u309D=\\u3063|\\u309E/\\u3099"
"=\\u3064|\\u309E/\\u3099"
"<<<<\\u30C4|\\u30FD=\\uFF82|\\u30FD=\\u30C3|\\u30FD"
"=\\uFF6F|\\u30FD=\\u30C5|\\u30FD"
"=\\u30C4|\\u30FE/\\u3099"
"=\\uFF82|\\u30FE/\\u3099"
"=\\u30C5|\\u30FE/\\u3099=\\u30C4|\\u30FD"
"=\\uFF82|\\u30FD=\\u30C3|\\u30FE/\\u3099"
"=\\uFF6F|\\u30FE/\\u3099"
"=\\u30C4|\\u30FE/\\u3099"
"=\\uFF82|\\u30FE/\\u3099"
"&[before 3]\\u3066 <<<\\u3066|\\u309D=\\u3067|\\u309D"
"=\\u3066|\\u309E/\\u3099"
"=\\u3067|\\u309E/\\u3099"
"<<<<\\u30C6|\\u30FD=\\uFF83|\\u30FD=\\u30C7|\\u30FD"
"=\\u30C6|\\u30FE/\\u3099"
"=\\uFF83|\\u30FE/\\u3099"
"=\\u30C7|\\u30FE/\\u3099"
"&[before 3]\\u3068 <<<\\u3068|\\u309D=\\u3069|\\u309D"
"=\\u3068|\\u309E/\\u3099"
"=\\u3069|\\u309E/\\u3099"
"<<<<\\u30C8|\\u30FD=\\uFF84|\\u30FD=\\u31F3|\\u30FD"
"=\\u30C9|\\u30FD=\\u30C8|\\u30FE/\\u3099"
"=\\uFF84|\\u30FE/\\u3099"
"=\\u31F3|\\u30FE/\\u3099"
"=\\u30C9|\\u30FE/\\u3099"
"&[before 3]\\u306A <<<\\u306A|\\u309D <<<<\\u30CA|\\u30FD=\\uFF85|\\u30FD"
"&[before 3]\\u306B <<<\\u306B|\\u309D <<<<\\u30CB|\\u30FD=\\uFF86|\\u30FD"
"&[before 3]\\u306C <<<\\u306C|\\u309D <<<<\\u30CC|\\u30FD=\\uFF87|\\u30FD"
"=\\u31F4|\\u30FD"
"&[before 3]\\u306D <<<\\u306D|\\u309D <<<<\\u30CD|\\u30FD=\\uFF88|\\u30FD"
"&[before 3]\\u306E <<<\\u306E|\\u309D <<<<\\u30CE|\\u30FD=\\uFF89|\\u30FD"
"&[before 3]\\u306F <<<\\u306F|\\u309D=\\u3070|\\u309D"
"=\\u306F|\\u309E/\\u3099"
"=\\u3070|\\u309E/\\u3099"
"=\\u3071|\\u309D=\\u3071|\\u309E/\\u3099"
"<<<<\\u30CF|\\u30FD=\\uFF8A|\\u30FD=\\u31F5|\\u30FD"
"=\\u30D0|\\u30FD=\\u30CF|\\u30FE/\\u3099"
"=\\uFF8A|\\u30FE/\\u3099"
"=\\u31F5|\\u30FE/\\u3099"
"=\\u30D0|\\u30FE/\\u3099=\\u30D1|\\u30FD"
"=\\u30D1|\\u30FE/\\u3099"
"&[before 3]\\u3072 <<<\\u3072|\\u309D=\\u3073|\\u309D"
"=\\u3072|\\u309E/\\u3099"
"=\\u3073|\\u309E/\\u3099"
"=\\u3074|\\u309D=\\u3074|\\u309E/\\u3099"
"<<<<\\u30D2|\\u30FD=\\uFF8B|\\u30FD=\\u31F6|\\u30FD"
"=\\u30D3|\\u30FD=\\u30D2|\\u30FE/\\u3099"
"=\\uFF8B|\\u30FE/\\u3099"
"=\\u31F6|\\u30FE/\\u3099"
"=\\u30D3|\\u30FE/\\u3099=\\u30D4|\\u30FD"
"=\\u30D4|\\u30FE/\\u3099"
"&[before 3]\\u3075 <<<\\u3075|\\u309D=\\u3076|\\u309D"
"=\\u3075|\\u309E/\\u3099"
"=\\u3076|\\u309E/\\u3099"
"=\\u3077|\\u309D=\\u3077|\\u309E/\\u3099"
"<<<<\\u30D5|\\u30FD=\\uFF8C|\\u30FD=\\u31F7|\\u30FD"
"=\\u30D6|\\u30FD=\\u30D5|\\u30FE/\\u3099"
"=\\uFF8C|\\u30FE/\\u3099"
"=\\u31F7|\\u30FE/\\u3099"
"=\\u30D6|\\u30FE/\\u3099=\\u30D7|\\u30FD"
"=\\u30D7|\\u30FE/\\u3099"
"&[before 3]\\u3078 <<<\\u3078|\\u309D=\\u3079|\\u309D"
"=\\u3078|\\u309E/\\u3099"
"=\\u3079|\\u309E/\\u3099"
"=\\u307A|\\u309D=\\u307A|\\u309E/\\u3099"
"<<<<\\u30D8|\\u30FD=\\uFF8D|\\u30FD=\\u31F8|\\u30FD"
"=\\u30D9|\\u30FD=\\u30D8|\\u30FE/\\u3099"
"=\\uFF8D|\\u30FE/\\u3099"
"=\\u31F8|\\u30FE/\\u3099"
"=\\u30D9|\\u30FE/\\u3099=\\u30DA|\\u30FD"
"=\\u30DA|\\u30FE/\\u3099"
"&[before 3]\\u307B <<<\\u307B|\\u309D=\\u307C|\\u309D"
"=\\u307B|\\u309E/\\u3099"
"=\\u307C|\\u309E/\\u3099"
"=\\u307D|\\u309D=\\u307D|\\u309E/\\u3099"
"<<<<\\u30DB|\\u30FD=\\uFF8E|\\u30FD=\\u31F9|\\u30FD"
"=\\u30DC|\\u30FD=\\u30DB|\\u30FE/\\u3099"
"=\\uFF8E|\\u30FE/\\u3099"
"=\\u31F9|\\u30FE/\\u3099"
"=\\u30DC|\\u30FE/\\u3099=\\u30DD|\\u30FD"
"=\\u30DD|\\u30FE/\\u3099"
"&[before 3]\\u307E <<<\\u307E|\\u309D <<<<\\u30DE|\\u30FD=\\uFF8F|\\u30FD"
"&[before 3]\\u307F <<<\\u307F|\\u309D <<<<\\u30DF|\\u30FD=\\uFF90|\\u30FD"
"&[before 3]\\u3080 <<<\\u3080|\\u309D <<<<\\u30E0|\\u30FD=\\uFF91|\\u30FD"
"=\\u31FA|\\u30FD"
"&[before 3]\\u3081 <<<\\u3081|\\u309D <<<<\\u30E1|\\u30FD=\\uFF92|\\u30FD"
"&[before 3]\\u3082 <<<\\u3082|\\u309D <<<<\\u30E2|\\u30FD=\\uFF93|\\u30FD"
"&[before 3]\\u3084 <<<\\u3084|\\u309D=\\u3083|\\u309D <<<<\\u30E4|\\u30FD"
"=\\uFF94|\\u30FD=\\u30E3|\\u30FD=\\uFF6C|\\u30FD"
"&[before 3]\\u3086 <<<\\u3086|\\u309D=\\u3085|\\u309D <<<<\\u30E6|\\u30FD"
"=\\uFF95|\\u30FD=\\u30E5|\\u30FD=\\uFF6D|\\u30FD"
"&[before 3]\\u3088 <<<\\u3088|\\u309D=\\u3087|\\u309D <<<<\\u30E8|\\u30FD"
"=\\uFF96|\\u30FD=\\u30E7|\\u30FD=\\uFF6E|\\u30FD"
"&[before 3]\\u3089 <<<\\u3089|\\u309D <<<<\\u30E9|\\u30FD=\\uFF97|\\u30FD"
"=\\u31FB|\\u30FD"
"&[before 3]\\u308A <<<\\u308A|\\u309D <<<<\\u30EA|\\u30FD=\\uFF98|\\u30FD"
"=\\u31FC|\\u30FD"
"&[before 3]\\u308B <<<\\u308B|\\u309D <<<<\\u30EB|\\u30FD=\\uFF99|\\u30FD"
"=\\u31FD|\\u30FD"
"&[before 3]\\u308C <<<\\u308C|\\u309D <<<<\\u30EC|\\u30FD=\\uFF9A|\\u30FD"
"=\\u31FE|\\u30FD"
"&[before 3]\\u308D <<<\\u308D|\\u309D <<<<\\u30ED|\\u30FD=\\uFF9B|\\u30FD"
"=\\u31FF|\\u30FD"
"&[before 3]\\u308F <<<\\u308F|\\u309D=\\u308E|\\u309D"
"=\\u308F|\\u309E/\\u3099"
"=\\u308E|\\u309E/\\u3099"
"<<<<\\u30EF|\\u30FD=\\uFF9C|\\u30FD=\\u30EE|\\u30FD"
"=\\u30F7|\\u30FD=\\u30EF|\\u30FE/\\u3099"
"=\\uFF9C|\\u30FE/\\u3099"
"=\\u30F7|\\u30FE/\\u3099"
"=\\u30EE|\\u30FE/\\u3099"
"&[before 3]\\u3090 <<<\\u3090|\\u309D=\\u3090|\\u309E/\\u3099"
"<<<<\\u30F0|\\u30FD=\\u30F8|\\u30FD"
"=\\u30F0|\\u30FE/\\u3099"
"=\\u30F8|\\u30FE/\\u3099"
"&[before 3]\\u3091 <<<\\u3091|\\u309D=\\u3091|\\u309E/\\u3099"
"<<<<\\u30F1|\\u30FD=\\u30F9|\\u30FD"
"=\\u30F1|\\u30FE/\\u3099"
"=\\u30F9|\\u30FE/\\u3099"
"&[before 3]\\u3092 <<<\\u3092|\\u309D=\\u3092|\\u309E/\\u3099"
"<<<<\\u30F2|\\u30FD=\\uFF66|\\u30FD=\\u30FA|\\u30FD"
"=\\u30F2|\\u30FE/\\u3099"
"=\\uFF66|\\u30FE/\\u3099"
"=\\u30FA|\\u30FE/\\u3099"
"&[before 3]\\u3093 <<<\\u3093|\\u309D <<<<\\u30F3|\\u30FD=\\uFF9D|\\u30FD"
"&\\u3041 <<<<\\u30A1=\\uFF67"
"&\\u3042 <<<<\\u30A2=\\uFF71"
"&\\u3043 <<<<\\u30A3=\\uFF68"
"&\\u3044 <<<<\\u30A4=\\uFF72"
"&\\u3045 <<<<\\u30A5=\\uFF69"
"&\\u3046 <<<<\\u30A6=\\uFF73"
"&\\u3047 <<<<\\u30A7=\\uFF6A"
"&\\u3048 <<<<\\u30A8=\\uFF74"
"&\\u3049 <<<<\\u30A9=\\uFF6B"
"&\\u304A <<<<\\u30AA=\\uFF75"
"&\\u304B <<<<\\u30AB=\\uFF76"
"&\\u304D <<<<\\u30AD=\\uFF77"
"&\\u304F <<<<\\u30AF=\\uFF78"
"&\\u3051 <<<<\\u30B1=\\uFF79"
"&\\u3053 <<<<\\u30B3=\\uFF7A"
"&\\u3055 <<<<\\u30B5=\\uFF7B"
"&\\u3057 <<<<\\u30B7=\\uFF7C"
"&\\u3059 <<<<\\u30B9=\\uFF7D"
"&\\u305B <<<<\\u30BB=\\uFF7E"
"&\\u305D <<<<\\u30BD=\\uFF7F"
"&\\u305F <<<<\\u30BF=\\uFF80"
"&\\u3061 <<<<\\u30C1=\\uFF81"
"&\\u3063 <<<<\\u30C3=\\uFF6F"
"&\\u3064 <<<<\\u30C4=\\uFF82"
"&\\u3066 <<<<\\u30C6=\\uFF83"
"&\\u3068 <<<<\\u30C8=\\uFF84"
"&\\u306A <<<<\\u30CA=\\uFF85"
"&\\u306B <<<<\\u30CB=\\uFF86"
"&\\u306C <<<<\\u30CC=\\uFF87"
"&\\u306D <<<<\\u30CD=\\uFF88"
"&\\u306E <<<<\\u30CE=\\uFF89"
"&\\u306F <<<<\\u30CF=\\uFF8A"
"&\\u3072 <<<<\\u30D2=\\uFF8B"
"&\\u3075 <<<<\\u30D5=\\uFF8C"
"&\\u3078 <<<<\\u30D8=\\uFF8D"
"&\\u307B <<<<\\u30DB=\\uFF8E"
"&\\u307E <<<<\\u30DE=\\uFF8F"
"&\\u307F <<<<\\u30DF=\\uFF90"
"&\\u3080 <<<<\\u30E0=\\uFF91"
"&\\u3081 <<<<\\u30E1=\\uFF92"
"&\\u3082 <<<<\\u30E2=\\uFF93"
"&\\u3083 <<<<\\u30E3=\\uFF6C"
"&\\u3084 <<<<\\u30E4=\\uFF94"
"&\\u3085 <<<<\\u30E5=\\uFF6D"
"&\\u3086 <<<<\\u30E6=\\uFF95"
"&\\u3087 <<<<\\u30E7=\\uFF6E"
"&\\u3088 <<<<\\u30E8=\\uFF96"
"&\\u3089 <<<<\\u30E9=\\uFF97"
"&\\u308A <<<<\\u30EA=\\uFF98"
"&\\u308B <<<<\\u30EB=\\uFF99"
"&\\u308C <<<<\\u30EC=\\uFF9A"
"&\\u308D <<<<\\u30ED=\\uFF9B"
"&\\u308E <<<<\\u30EE"
"&\\u308F <<<<\\u30EF=\\uFF9C"
"&\\u3090 <<<<\\u30F0"
"&\\u3091 <<<<\\u30F1"
"&\\u3092 <<<<\\u30F2=\\uFF66"
"&\\u3093 <<<<\\u30F3=\\uFF9D"
"&\\u3095 <<<<\\u30F5"
"&\\u3096 <<<<\\u30F6"
"&\\u3088\\u308A <<\\u309F"
"&\\u30B3\\u30C8 <<\\u30FF"
"&\\u0020=\\u3000=\\uFFE3"
"&\\u0021=\\uFF01"
"&\\u0022=\\uFF02"
"&\\u0023=\\uFF03"
"&\\u0024=\\uFF04"
"&\\u0025=\\uFF05"
"&\\u0026=\\uFF06"
"&\\u0027=\\uFF07"
"&\\u0028=\\uFF08"
"&\\u0029=\\uFF09"
"&\\u002A=\\uFF0A"
"&\\u002B=\\uFF0B"
"&\\u002C=\\uFF0C"
"&\\u002D=\\uFF0D"
"&\\u002E=\\uFF0E"
"&\\u002F=\\uFF0F"
"&0=\\uFF10"
"&1=\\uFF11"
"&2=\\uFF12"
"&3=\\uFF13"
"&4=\\uFF14"
"&5=\\uFF15"
"&6=\\uFF16"
"&7=\\uFF17"
"&8=\\uFF18"
"&9=\\uFF19"
"&\\u003A=\\uFF1A"
"&\\u003B=\\uFF1B"
"&\\u003C=\\uFF1C"
"&\\u003D=\\uFF1D"
"&\\u003E=\\uFF1E"
"&\\u003F=\\uFF1F"
"&\\u0040=\\uFF20"
"&A=\\uFF21"
"&B=\\uFF22"
"&C=\\uFF23"
"&D=\\uFF24"
"&E=\\uFF25"
"&F=\\uFF26"
"&G=\\uFF27"
"&H=\\uFF28"
"&I=\\uFF29"
"&J=\\uFF2A"
"&K=\\uFF2B"
"&L=\\uFF2C"
"&M=\\uFF2D"
"&N=\\uFF2E"
"&O=\\uFF2F"
"&P=\\uFF30"
"&Q=\\uFF31"
"&R=\\uFF32"
"&S=\\uFF33"
"&T=\\uFF34"
"&U=\\uFF35"
"&V=\\uFF36"
"&W=\\uFF37"
"&X=\\uFF38"
"&Y=\\uFF39"
"&Z=\\uFF3A"
"&\\u005B=\\uFF3B"
"&\\u005C=\\uFF3C "
"&\\u005D=\\uFF3D"
"&\\u005E=\\uFF3E"
"&\\u005F=\\uFF3F"
"&\\u0060=\\uFF40"
"&a=\\uFF41"
"&b=\\uFF42"
"&c=\\uFF43"
"&d=\\uFF44"
"&e=\\uFF45"
"&f=\\uFF46"
"&g=\\uFF47"
"&h=\\uFF48"
"&i=\\uFF49"
"&j=\\uFF4A"
"&k=\\uFF4B"
"&l=\\uFF4C"
"&m=\\uFF4D"
"&n=\\uFF4E"
"&o=\\uFF4F"
"&p=\\uFF50"
"&q=\\uFF51"
"&r=\\uFF52"
"&s=\\uFF53"
"&t=\\uFF54"
"&u=\\uFF55"
"&v=\\uFF56"
"&w=\\uFF57"
"&x=\\uFF58"
"&y=\\uFF59"
"&z=\\uFF5A"
"&\\u007B=\\uFF5B"
"&\\u007C=\\uFF5C"
"&\\u007D=\\uFF5D"
"&\\u007E=\\uFF5E"
"&\\u00A2=\\uFFE0"
"&\\u00A3=\\uFFE1"
"&\\u00A5=\\uFFE5"
"&\\u00A6=\\uFFE4"
"&\\u00AC=\\uFFE2"
"&\\u1100=\\uFFA1=\\u3131"
"&\\u1101=\\uFFA2=\\u3132"
"&\\u1102=\\uFFA4=\\u3134"
"&\\u1103=\\uFFA7=\\u3137"
"&\\u1104=\\uFFA8=\\u3138"
"&\\u1105=\\uFFA9=\\u3139"
"&\\u1106=\\uFFB1=\\u3141"
"&\\u1107=\\uFFB2=\\u3142"
"&\\u1108=\\uFFB3=\\u3143"
"&\\u1109=\\uFFB5=\\u3145"
"&\\u110A=\\uFFB6=\\u3146"
"&\\u110B=\\uFFB7=\\u3147"
"&\\u110C=\\uFFB8=\\u3148"
"&\\u110D=\\uFFB9=\\u3149"
"&\\u110E=\\uFFBA=\\u314A"
"&\\u110F=\\uFFBB=\\u314B"
"&\\u1110=\\uFFBC=\\u314C"
"&\\u1111=\\uFFBD=\\u314D"
"&\\u1112=\\uFFBE=\\u314E"
"&\\u111A=\\uFFB0=\\u3140"
"&\\u1121=\\uFFB4=\\u3144"
"&\\u1160=\\uFFA0=\\u3164"
"&\\u1161=\\uFFC2=\\u314F"
"&\\u1162=\\uFFC3=\\u3150"
"&\\u1163=\\uFFC4=\\u3151"
"&\\u1164=\\uFFC5=\\u3152"
"&\\u1165=\\uFFC6=\\u3153"
"&\\u1166=\\uFFC7=\\u3154"
"&\\u1167=\\uFFCA=\\u3155"
"&\\u1168=\\uFFCB=\\u3156"
"&\\u1169=\\uFFCC=\\u3157"
"&\\u116A=\\uFFCD=\\u3158"
"&\\u116B=\\uFFCE=\\u3159"
"&\\u116C=\\uFFCF=\\u315A"
"&\\u116D=\\uFFD2=\\u315B"
"&\\u116E=\\uFFD3=\\u315C"
"&\\u116F=\\uFFD4=\\u315D"
"&\\u1170=\\uFFD5=\\u315E"
"&\\u1171=\\uFFD6=\\u315F"
"&\\u1172=\\uFFD7=\\u3160"
"&\\u1173=\\uFFDA=\\u3161"
"&\\u1174=\\uFFDB=\\u3162"
"&\\u1175=\\uFFDC=\\u3163"
"&\\u11AA=\\uFFA3=\\u3133"
"&\\u11AC=\\uFFA5=\\u3135"
"&\\u11AD=\\uFFA6=\\u3136"
"&\\u11B0=\\uFFAA=\\u313A"
"&\\u11B1=\\uFFAB=\\u313B"
"&\\u11B2=\\uFFAC=\\u313C"
"&\\u11B3=\\uFFAD=\\u313D"
"&\\u11B4=\\uFFAE=\\u313E"
"&\\u11B5=\\uFFAF=\\u313F"
"&\\u20A9=\\uFFE6"
"&\\u2190=\\uFFE9"
"&\\u2191=\\uFFEA"
"&\\u2192=\\uFFEB"
"&\\u2193=\\uFFEC"
"&\\u2502=\\uFFE8"
"&\\u25A0=\\uFFED"
"&\\u25CB=\\uFFEE"
"&\\u3001=\\uFF64"
"&\\u3002=\\uFF61"
"&\\u300C=\\uFF62"
"&\\u300D=\\uFF63";
/*
Below variables are defined in separate .cc file, generated by uca9dump at
build-time for the Japanese collations.
*/
extern uint16 *ja_han_pages[];
extern const int MIN_JA_HAN_PAGE;
extern const int MAX_JA_HAN_PAGE;
#endif

128611
deps/oblib/src/lib/charset/uca900_ja_tbls.cc vendored Normal file

File diff suppressed because it is too large Load Diff

639181
deps/oblib/src/lib/charset/uca900_zh2_tbls.cc vendored Normal file

File diff suppressed because it is too large Load Diff

669300
deps/oblib/src/lib/charset/uca900_zh3_tbls.cc vendored Normal file

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,918 @@
/**
* Copyright (c) 2021 OceanBase
* OceanBase CE is licensed under Mulan PubL v2.
* You can use this software according to the terms and conditions of the Mulan PubL v2.
* You may obtain a copy of Mulan PubL v2 at:
* http://license.coscl.org.cn/MulanPubL-2.0
* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND,
* EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
* MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
* See the Mulan PubL v2 for more details.
*/
#ifndef UCA900_ZH_DATA_H
#define UCA900_ZH_DATA_H
/*
For collation which changes character's primary weight according to its
tailoring rule, we give this character a extra collation element (see
comments in my_char_weight_put_900()). Usually the primary weight of this
extra CE starts from 0x54A4, which is the biggest primary weight of all
regular characters (non-CJK and non-ignorable) in DUCET. But Chinese is
special, because to keep assigning single primary weight to character
groups like Latin, Cyrillic, etc, we used all weight value in [0x1C47,
0xF643], so we give the primary weight of extra CE starting from 0xF644
to avoid weight overlapping.
*/
constexpr int ZH_EXTRA_CE_PRI = 0xF644;
constexpr int ZH2_EXTRA_CE_PRI = 0x94AF;
constexpr int ZH3_EXTRA_CE_PRI = 0x550D;
static const char zh_cldr_30[] =
"&[before 2]a<<\\u0101<<<\\u0100<<\\u00E1<<<\\u00C1<<\\u01CE<<<\\u01CD"
"<<\\u00E0<<<\\u00C0"
"&[before 2]e<<\\u0113<<<\\u0112<<\\u00E9<<<\\u00C9<<\\u011B<<<\\u011A"
"<<\\u00E8<<<\\u00C8"
"&e<<e\\u0302\\u0304<<<E\\u0302\\u0304<<e\\u0302\\u0301<<<E\\u0302\\u0301"
"<<e\\u0302\\u030C<<<E\\u0302\\u030C<<e\\u0302\\u0300<<<E\\u0302\\u0300"
"&[before 2]i<<\\u012B<<<\\u012A<<\\u00ED<<<\\u00CD<<\\u01D0<<<\\u01CF"
"<<\\u00EC<<<\\u00CC"
"&[before 2]m<<m\\u0304<<<M\\u0304<<\\u1E3F<<<\\u1E3E<<m\\u030C"
"<<<M\\u030C<<m\\u0300<<<M\\u0300"
"&[before 2]n<<n\\u0304<<<N\\u0304<<\\u0144<<<\\u0143<<\\u0148<<<\\u0147"
"<<\\u01F9<<<\\u01F8"
"&[before 2]o<<\\u014D<<<\\u014C<<\\u00F3<<<\\u00D3<<\\u01D2<<<\\u01D1"
"<<\\u00F2<<<\\u00D2"
"&[before 2]u<<\\u016B<<<\\u016A<<\\u00FA<<<\\u00DA<<\\u01D4<<<\\u01D3"
"<<\\u00F9<<<\\u00D9"
"&U<<\\u01D6<<<\\u01D5<<\\u01D8<<<\\u01D7<<\\u01DA<<<\\u01D9<<\\u01DC"
"<<<\\u01DB<<\\u00FC<<<\\u00DC"
"&(\\u4E00)<<<\\u3220"
"&(\\u4E03)<<<\\u3226"
"&(\\u4E09)<<<\\u3222"
"&(\\u4E5D)<<<\\u3228"
"&(\\u4E8C)<<<\\u3221"
"&(\\u4E94)<<<\\u3224"
"&(\\u4EE3)<<<\\u3239"
"&(\\u4F01)<<<\\u323D"
"&(\\u4F11)<<<\\u3241"
"&(\\u516B)<<<\\u3227"
"&(\\u516D)<<<\\u3225"
"&(\\u52B4)<<<\\u3238"
"&(\\u5341)<<<\\u3229"
"&(\\u5354)<<<\\u323F"
"&(\\u540D)<<<\\u3234"
"&(\\u547C)<<<\\u323A"
"&(\\u56DB)<<<\\u3223"
"&(\\u571F)<<<\\u322F"
"&(\\u5B66)<<<\\u323B"
"&(\\u65E5)<<<\\u3230"
"&(\\u6708)<<<\\u322A"
"&(\\u6709)<<<\\u3232"
"&(\\u6728)<<<\\u322D"
"&(\\u682A)<<<\\u3231"
"&(\\u6C34)<<<\\u322C"
"&(\\u706B)<<<\\u322B"
"&(\\u7279)<<<\\u3235"
"&(\\u76E3)<<<\\u323C"
"&(\\u793E)<<<\\u3233"
"&(\\u795D)<<<\\u3237"
"&(\\u796D)<<<\\u3240"
"&(\\u81EA)<<<\\u3242"
"&(\\u81F3)<<<\\u3243"
"&(\\u8CA1)<<<\\u3236"
"&(\\u8CC7)<<<\\u323E"
"&(\\u91D1)<<<\\u322E"
"&0\\u70B9<<<\\u3358"
"&10\\u65E5<<<\\u33E9"
"&10\\u6708<<<\\u32C9"
"&10\\u70B9<<<\\u3362"
"&11\\u65E5<<<\\u33EA"
"&11\\u6708<<<\\u32CA"
"&11\\u70B9<<<\\u3363"
"&12\\u65E5<<<\\u33EB"
"&12\\u6708<<<\\u32CB"
"&12\\u70B9<<<\\u3364"
"&13\\u65E5<<<\\u33EC"
"&13\\u70B9<<<\\u3365"
"&14\\u65E5<<<\\u33ED"
"&14\\u70B9<<<\\u3366"
"&15\\u65E5<<<\\u33EE"
"&15\\u70B9<<<\\u3367"
"&16\\u65E5<<<\\u33EF"
"&16\\u70B9<<<\\u3368"
"&17\\u65E5<<<\\u33F0"
"&17\\u70B9<<<\\u3369"
"&18\\u65E5<<<\\u33F1"
"&18\\u70B9<<<\\u336A"
"&19\\u65E5<<<\\u33F2"
"&19\\u70B9<<<\\u336B"
"&1\\u65E5<<<\\u33E0"
"&1\\u6708<<<\\u32C0"
"&1\\u70B9<<<\\u3359"
"&20\\u65E5<<<\\u33F3"
"&20\\u70B9<<<\\u336C"
"&21\\u65E5<<<\\u33F4"
"&21\\u70B9<<<\\u336D"
"&22\\u65E5<<<\\u33F5"
"&22\\u70B9<<<\\u336E"
"&23\\u65E5<<<\\u33F6"
"&23\\u70B9<<<\\u336F"
"&24\\u65E5<<<\\u33F7"
"&24\\u70B9<<<\\u3370"
"&25\\u65E5<<<\\u33F8"
"&26\\u65E5<<<\\u33F9"
"&27\\u65E5<<<\\u33FA"
"&28\\u65E5<<<\\u33FB"
"&29\\u65E5<<<\\u33FC"
"&2\\u65E5<<<\\u33E1"
"&2\\u6708<<<\\u32C1"
"&2\\u70B9<<<\\u335A"
"&30\\u65E5<<<\\u33FD"
"&31\\u65E5<<<\\u33FE"
"&3\\u65E5<<<\\u33E2"
"&3\\u6708<<<\\u32C2"
"&3\\u70B9<<<\\u335B"
"&4\\u65E5<<<\\u33E3"
"&4\\u6708<<<\\u32C3"
"&4\\u70B9<<<\\u335C"
"&5\\u65E5<<<\\u33E4"
"&5\\u6708<<<\\u32C4"
"&5\\u70B9<<<\\u335D"
"&6\\u65E5<<<\\u33E5"
"&6\\u6708<<<\\u32C5"
"&6\\u70B9<<<\\u335E"
"&7\\u65E5<<<\\u33E6"
"&7\\u6708<<<\\u32C6"
"&7\\u70B9<<<\\u335F"
"&8\\u65E5<<<\\u33E7"
"&8\\u6708<<<\\u32C7"
"&8\\u70B9<<<\\u3360"
"&9\\u65E5<<<\\u33E8"
"&9\\u6708<<<\\u32C8"
"&9\\u70B9<<<\\u3361"
"&\\u3014\\u4E09\\u3015<<<\\u01F241"
"&\\u3014\\u4E8C\\u3015<<<\\u01F242"
"&\\u3014\\u52DD\\u3015<<<\\u01F247"
"&\\u3014\\u5B89\\u3015<<<\\u01F243"
"&\\u3014\\u6253\\u3015<<<\\u01F245"
"&\\u3014\\u6557\\u3015<<<\\u01F248"
"&\\u3014\\u672C\\u3015<<<\\u01F240"
"&\\u3014\\u70B9\\u3015<<<\\u01F244"
"&\\u3014\\u76D7\\u3015<<<\\u01F246"
"&\\u4E00<<<\\u2F00<<<\\u3192<<<\\u3280<<<\\u01F229"
"&\\u4E01<<<\\u319C"
"&\\u4E03<<<\\u3286"
"&\\u4E09<<<\\u3194<<<\\u3282<<<\\u01F22A"
"&\\u4E0A<<<\\u3196<<<\\u32A4"
"&\\u4E0B<<<\\u3198<<<\\u32A6"
"&\\u4E19<<<\\u319B"
"&\\u4E28<<<\\u2F01"
"&\\u4E2D<<<\\u3197<<<\\u32A5<<<\\u01F22D"
"&\\u4E36<<<\\u2F02"
"&\\u4E3F<<<\\u2F03"
"&\\u4E59<<<\\u2F04<<<\\u319A"
"&\\u4E5D<<<\\u3288"
"&\\u4E85<<<\\u2F05"
"&\\u4E8C<<<\\u2F06<<<\\u3193<<<\\u3281<<<\\u01F214"
"&\\u4E94<<<\\u3284"
"&\\u4EA0<<<\\u2F07"
"&\\u4EA4<<<\\u01F218"
"&\\u4EBA<<<\\u2F08<<<\\u319F"
"&\\u4F01<<<\\u32AD"
"&\\u4F11<<<\\u32A1"
"&\\u512A<<<\\u329D"
"&\\u513F<<<\\u2F09"
"&\\u5165<<<\\u2F0A"
"&\\u516B<<<\\u2F0B<<<\\u3287"
"&\\u516D<<<\\u3285"
"&\\u5182<<<\\u2F0C"
"&\\u518D<<<\\u01F21E"
"&\\u5196<<<\\u2F0D"
"&\\u5199<<<\\u32A2"
"&\\u51AB<<<\\u2F0E"
"&\\u51E0<<<\\u2F0F"
"&\\u51F5<<<\\u2F10"
"&\\u5200<<<\\u2F11"
"&\\u521D<<<\\u01F220"
"&\\u524D<<<\\u01F21C"
"&\\u5272<<<\\u01F239"
"&\\u529B<<<\\u2F12"
"&\\u52B4<<<\\u3298"
"&\\u52F9<<<\\u2F13"
"&\\u5315<<<\\u2F14"
"&\\u531A<<<\\u2F15"
"&\\u5338<<<\\u2F16<<<\\u32A9"
"&\\u5341<<<\\u2F17<<<\\u3038<<<\\u3289"
"&\\u5344<<<\\u3039"
"&\\u5345<<<\\u303A"
"&\\u5354<<<\\u32AF"
"&\\u535C<<<\\u2F18"
"&\\u5369<<<\\u2F19"
"&\\u5370<<<\\u329E"
"&\\u5382<<<\\u2F1A"
"&\\u53B6<<<\\u2F1B"
"&\\u53C8<<<\\u2F1C"
"&\\u53CC<<<\\u01F212"
"&\\u53E3<<<\\u2F1D"
"&\\u53EF<<<\\u01F251"
"&\\u53F3<<<\\u32A8<<<\\u01F22E"
"&\\u5408<<<\\u01F234"
"&\\u540D<<<\\u3294"
"&\\u5439<<<\\u01F225"
"&\\u554F<<<\\u3244"
"&\\u55B6<<<\\u01F23A"
"&\\u56D7<<<\\u2F1E"
"&\\u56DB<<<\\u3195<<<\\u3283"
"&\\u571F<<<\\u2F1F<<<\\u328F"
"&\\u5730<<<\\u319E"
"&\\u58EB<<<\\u2F20"
"&\\u58F0<<<\\u01F224"
"&\\u5902<<<\\u2F21"
"&\\u590A<<<\\u2F22"
"&\\u5915<<<\\u2F23"
"&\\u591A<<<\\u01F215"
"&\\u591C<<<\\u32B0"
"&\\u5927<<<\\u2F24"
"&\\u5927\\u6B63<<<\\u337D"
"&\\u5929<<<\\u319D<<<\\u01F217"
"&\\u5973<<<\\u2F25<<<\\u329B"
"&\\u5B50<<<\\u2F26"
"&\\u5B57<<<\\u01F211"
"&\\u5B66<<<\\u32AB"
"&\\u5B80<<<\\u2F27"
"&\\u5B97<<<\\u32AA"
"&\\u5BF8<<<\\u2F28"
"&\\u5C0F<<<\\u2F29"
"&\\u5C22<<<\\u2F2A"
"&\\u5C38<<<\\u2F2B"
"&\\u5C6E<<<\\u2F2C"
"&\\u5C71<<<\\u2F2D"
"&\\u5DDB<<<\\u2F2E"
"&\\u5DE5<<<\\u2F2F"
"&\\u5DE6<<<\\u32A7<<<\\u01F22C"
"&\\u5DF1<<<\\u2F30"
"&\\u5DFE<<<\\u2F31"
"&\\u5E72<<<\\u2F32"
"&\\u5E73\\u6210<<<\\u337B"
"&\\u5E7A<<<\\u2F33"
"&\\u5E7C<<<\\u3245"
"&\\u5E7F<<<\\u2F34"
"&\\u5EF4<<<\\u2F35"
"&\\u5EFE<<<\\u2F36"
"&\\u5F0B<<<\\u2F37"
"&\\u5F13<<<\\u2F38"
"&\\u5F50<<<\\u2F39"
"&\\u5F61<<<\\u2F3A"
"&\\u5F73<<<\\u2F3B"
"&\\u5F8C<<<\\u01F21D"
"&\\u5F97<<<\\u01F250"
"&\\u5FC3<<<\\u2F3C"
"&\\u6208<<<\\u2F3D"
"&\\u6236<<<\\u2F3E"
"&\\u624B<<<\\u2F3F<<<\\u01F210"
"&\\u6253<<<\\u01F231"
"&\\u6295<<<\\u01F227"
"&\\u6307<<<\\u01F22F"
"&\\u6355<<<\\u01F228"
"&\\u652F<<<\\u2F40"
"&\\u6534<<<\\u2F41"
"&\\u6587<<<\\u2F42<<<\\u3246"
"&\\u6597<<<\\u2F43"
"&\\u6599<<<\\u01F21B"
"&\\u65A4<<<\\u2F44"
"&\\u65B0<<<\\u01F21F"
"&\\u65B9<<<\\u2F45"
"&\\u65E0<<<\\u2F46"
"&\\u65E5<<<\\u2F47<<<\\u3290"
"&\\u660E\\u6CBB<<<\\u337E"
"&\\u6620<<<\\u01F219"
"&\\u662D\\u548C<<<\\u337C"
"&\\u66F0<<<\\u2F48"
"&\\u6708<<<\\u2F49<<<\\u328A<<<\\u01F237"
"&\\u6709<<<\\u3292<<<\\u01F236"
"&\\u6728<<<\\u2F4A<<<\\u328D"
"&\\u682A<<<\\u3291"
"&\\u682A\\u5F0F\\u4F1A\\u793E<<<\\u337F"
"&\\u6B20<<<\\u2F4B"
"&\\u6B62<<<\\u2F4C"
"&\\u6B63<<<\\u32A3"
"&\\u6B79<<<\\u2F4D"
"&\\u6BB3<<<\\u2F4E"
"&\\u6BCB<<<\\u2F4F"
"&\\u6BCD<<<\\u2E9F"
"&\\u6BD4<<<\\u2F50"
"&\\u6BDB<<<\\u2F51"
"&\\u6C0F<<<\\u2F52"
"&\\u6C14<<<\\u2F53"
"&\\u6C34<<<\\u2F54<<<\\u328C"
"&\\u6CE8<<<\\u329F"
"&\\u6E80<<<\\u01F235"
"&\\u6F14<<<\\u01F226"
"&\\u706B<<<\\u2F55<<<\\u328B"
"&\\u7121<<<\\u01F21A"
"&\\u722A<<<\\u2F56"
"&\\u7236<<<\\u2F57"
"&\\u723B<<<\\u2F58"
"&\\u723F<<<\\u2F59"
"&\\u7247<<<\\u2F5A"
"&\\u7259<<<\\u2F5B"
"&\\u725B<<<\\u2F5C"
"&\\u7279<<<\\u3295"
"&\\u72AC<<<\\u2F5D"
"&\\u7384<<<\\u2F5E"
"&\\u7389<<<\\u2F5F"
"&\\u74DC<<<\\u2F60"
"&\\u74E6<<<\\u2F61"
"&\\u7518<<<\\u2F62"
"&\\u751F<<<\\u2F63<<<\\u01F222"
"&\\u7528<<<\\u2F64"
"&\\u7530<<<\\u2F65"
"&\\u7532<<<\\u3199"
"&\\u7533<<<\\u01F238"
"&\\u7537<<<\\u329A"
"&\\u758B<<<\\u2F66"
"&\\u7592<<<\\u2F67"
"&\\u7676<<<\\u2F68"
"&\\u767D<<<\\u2F69"
"&\\u76AE<<<\\u2F6A"
"&\\u76BF<<<\\u2F6B"
"&\\u76E3<<<\\u32AC"
"&\\u76EE<<<\\u2F6C"
"&\\u77DB<<<\\u2F6D"
"&\\u77E2<<<\\u2F6E"
"&\\u77F3<<<\\u2F6F"
"&\\u793A<<<\\u2F70"
"&\\u793E<<<\\u3293"
"&\\u795D<<<\\u3297"
"&\\u7981<<<\\u01F232"
"&\\u79B8<<<\\u2F71"
"&\\u79BE<<<\\u2F72"
"&\\u79D8<<<\\u3299"
"&\\u7A74<<<\\u2F73"
"&\\u7A7A<<<\\u01F233"
"&\\u7ACB<<<\\u2F74"
"&\\u7AF9<<<\\u2F75"
"&\\u7B8F<<<\\u3247"
"&\\u7C73<<<\\u2F76"
"&\\u7CF8<<<\\u2F77"
"&\\u7D42<<<\\u01F221"
"&\\u7F36<<<\\u2F78"
"&\\u7F51<<<\\u2F79"
"&\\u7F8A<<<\\u2F7A"
"&\\u7FBD<<<\\u2F7B"
"&\\u8001<<<\\u2F7C"
"&\\u800C<<<\\u2F7D"
"&\\u8012<<<\\u2F7E"
"&\\u8033<<<\\u2F7F"
"&\\u807F<<<\\u2F80"
"&\\u8089<<<\\u2F81"
"&\\u81E3<<<\\u2F82"
"&\\u81EA<<<\\u2F83"
"&\\u81F3<<<\\u2F84"
"&\\u81FC<<<\\u2F85"
"&\\u820C<<<\\u2F86"
"&\\u821B<<<\\u2F87"
"&\\u821F<<<\\u2F88"
"&\\u826E<<<\\u2F89"
"&\\u8272<<<\\u2F8A"
"&\\u8278<<<\\u2F8B"
"&\\u864D<<<\\u2F8C"
"&\\u866B<<<\\u2F8D"
"&\\u8840<<<\\u2F8E"
"&\\u884C<<<\\u2F8F"
"&\\u8863<<<\\u2F90"
"&\\u897E<<<\\u2F91"
"&\\u898B<<<\\u2F92"
"&\\u89D2<<<\\u2F93"
"&\\u89E3<<<\\u01F216"
"&\\u8A00<<<\\u2F94"
"&\\u8C37<<<\\u2F95"
"&\\u8C46<<<\\u2F96"
"&\\u8C55<<<\\u2F97"
"&\\u8C78<<<\\u2F98"
"&\\u8C9D<<<\\u2F99"
"&\\u8CA1<<<\\u3296"
"&\\u8CA9<<<\\u01F223"
"&\\u8CC7<<<\\u32AE"
"&\\u8D64<<<\\u2F9A"
"&\\u8D70<<<\\u2F9B<<<\\u01F230"
"&\\u8DB3<<<\\u2F9C"
"&\\u8EAB<<<\\u2F9D"
"&\\u8ECA<<<\\u2F9E"
"&\\u8F9B<<<\\u2F9F"
"&\\u8FB0<<<\\u2FA0"
"&\\u8FB5<<<\\u2FA1"
"&\\u904A<<<\\u01F22B"
"&\\u9069<<<\\u329C"
"&\\u9091<<<\\u2FA2"
"&\\u9149<<<\\u2FA3"
"&\\u914D<<<\\u01F23B"
"&\\u91C6<<<\\u2FA4"
"&\\u91CC<<<\\u2FA5"
"&\\u91D1<<<\\u2FA6<<<\\u328E"
"&\\u9577<<<\\u2FA7"
"&\\u9580<<<\\u2FA8"
"&\\u961C<<<\\u2FA9"
"&\\u96B6<<<\\u2FAA"
"&\\u96B9<<<\\u2FAB"
"&\\u96E8<<<\\u2FAC"
"&\\u9751<<<\\u2FAD"
"&\\u975E<<<\\u2FAE"
"&\\u9762<<<\\u2FAF"
"&\\u9769<<<\\u2FB0"
"&\\u97CB<<<\\u2FB1"
"&\\u97ED<<<\\u2FB2"
"&\\u97F3<<<\\u2FB3"
"&\\u9801<<<\\u2FB4"
"&\\u9805<<<\\u32A0"
"&\\u98A8<<<\\u2FB5"
"&\\u98DB<<<\\u2FB6"
"&\\u98DF<<<\\u2FB7"
"&\\u9996<<<\\u2FB8"
"&\\u9999<<<\\u2FB9"
"&\\u99AC<<<\\u2FBA"
"&\\u9AA8<<<\\u2FBB"
"&\\u9AD8<<<\\u2FBC"
"&\\u9ADF<<<\\u2FBD"
"&\\u9B25<<<\\u2FBE"
"&\\u9B2F<<<\\u2FBF"
"&\\u9B32<<<\\u2FC0"
"&\\u9B3C<<<\\u2FC1"
"&\\u9B5A<<<\\u2FC2"
"&\\u9CE5<<<\\u2FC3"
"&\\u9E75<<<\\u2FC4"
"&\\u9E7F<<<\\u2FC5"
"&\\u9EA5<<<\\u2FC6"
"&\\u9EBB<<<\\u2FC7"
"&\\u9EC3<<<\\u2FC8"
"&\\u9ECD<<<\\u2FC9"
"&\\u9ED1<<<\\u2FCA"
"&\\u9EF9<<<\\u2FCB"
"&\\u9EFD<<<\\u2FCC"
"&\\u9F0E<<<\\u2FCD"
"&\\u9F13<<<\\u2FCE"
"&\\u9F20<<<\\u2FCF"
"&\\u9F3B<<<\\u2FD0"
"&\\u9F4A<<<\\u2FD1"
"&\\u9F52<<<\\u2FD2"
"&\\u9F8D<<<\\u2FD3"
"&\\u9F9C<<<\\u2FD4"
"&\\u9F9F<<<\\u2EF3"
"&\\u9FA0<<<\\u2FD5"
"&\\u02342F<\\u91CD\\u5E86/\\u5E86"
"&\\u5F1E<\\u6C88\\u9633/\\u9633"
"&\\u92BA<\\u85CF\\u6587/\\u6587";
static const char zh2_cldr_30[] =
"&[before 2]a<<\\u0101<<<\\u0100<<\\u00E1<<<\\u00C1<<\\u01CE<<<\\u01CD"
"<<\\u00E0<<<\\u00C0"
"&[before 2]e<<\\u0113<<<\\u0112<<\\u00E9<<<\\u00C9<<\\u011B<<<\\u011A"
"<<\\u00E8<<<\\u00C8"
"&e<<e\\u0302\\u0304<<<E\\u0302\\u0304<<e\\u0302\\u0301<<<E\\u0302\\u0301"
"<<e\\u0302\\u030C<<<E\\u0302\\u030C<<e\\u0302\\u0300<<<E\\u0302\\u0300"
"&[before 2]i<<\\u012B<<<\\u012A<<\\u00ED<<<\\u00CD<<\\u01D0<<<\\u01CF"
"<<\\u00EC<<<\\u00CC"
"&[before 2]m<<m\\u0304<<<M\\u0304<<\\u1E3F<<<\\u1E3E<<m\\u030C"
"<<<M\\u030C<<m\\u0300<<<M\\u0300"
"&[before 2]n<<n\\u0304<<<N\\u0304<<\\u0144<<<\\u0143<<\\u0148<<<\\u0147"
"<<\\u01F9<<<\\u01F8"
"&[before 2]o<<\\u014D<<<\\u014C<<\\u00F3<<<\\u00D3<<\\u01D2<<<\\u01D1"
"<<\\u00F2<<<\\u00D2"
"&[before 2]u<<\\u016B<<<\\u016A<<\\u00FA<<<\\u00DA<<\\u01D4<<<\\u01D3"
"<<\\u00F9<<<\\u00D9"
"&U<<\\u01D6<<<\\u01D5<<\\u01D8<<<\\u01D7<<\\u01DA<<<\\u01D9<<\\u01DC"
"<<<\\u01DB<<\\u00FC<<<\\u00DC"
"&(\\u4E00)<<<\\u3220"
"&(\\u4E03)<<<\\u3226"
"&(\\u4E09)<<<\\u3222"
"&(\\u4E5D)<<<\\u3228"
"&(\\u4E8C)<<<\\u3221"
"&(\\u4E94)<<<\\u3224"
"&(\\u4EE3)<<<\\u3239"
"&(\\u4F01)<<<\\u323D"
"&(\\u4F11)<<<\\u3241"
"&(\\u516B)<<<\\u3227"
"&(\\u516D)<<<\\u3225"
"&(\\u52B4)<<<\\u3238"
"&(\\u5341)<<<\\u3229"
"&(\\u5354)<<<\\u323F"
"&(\\u540D)<<<\\u3234"
"&(\\u547C)<<<\\u323A"
"&(\\u56DB)<<<\\u3223"
"&(\\u571F)<<<\\u322F"
"&(\\u5B66)<<<\\u323B"
"&(\\u65E5)<<<\\u3230"
"&(\\u6708)<<<\\u322A"
"&(\\u6709)<<<\\u3232"
"&(\\u6728)<<<\\u322D"
"&(\\u682A)<<<\\u3231"
"&(\\u6C34)<<<\\u322C"
"&(\\u706B)<<<\\u322B"
"&(\\u7279)<<<\\u3235"
"&(\\u76E3)<<<\\u323C"
"&(\\u793E)<<<\\u3233"
"&(\\u795D)<<<\\u3237"
"&(\\u796D)<<<\\u3240"
"&(\\u81EA)<<<\\u3242"
"&(\\u81F3)<<<\\u3243"
"&(\\u8CA1)<<<\\u3236"
"&(\\u8CC7)<<<\\u323E"
"&(\\u91D1)<<<\\u322E"
"&0\\u70B9<<<\\u3358"
"&10\\u65E5<<<\\u33E9"
"&10\\u6708<<<\\u32C9"
"&10\\u70B9<<<\\u3362"
"&11\\u65E5<<<\\u33EA"
"&11\\u6708<<<\\u32CA"
"&11\\u70B9<<<\\u3363"
"&12\\u65E5<<<\\u33EB"
"&12\\u6708<<<\\u32CB"
"&12\\u70B9<<<\\u3364"
"&13\\u65E5<<<\\u33EC"
"&13\\u70B9<<<\\u3365"
"&14\\u65E5<<<\\u33ED"
"&14\\u70B9<<<\\u3366"
"&15\\u65E5<<<\\u33EE"
"&15\\u70B9<<<\\u3367"
"&16\\u65E5<<<\\u33EF"
"&16\\u70B9<<<\\u3368"
"&17\\u65E5<<<\\u33F0"
"&17\\u70B9<<<\\u3369"
"&18\\u65E5<<<\\u33F1"
"&18\\u70B9<<<\\u336A"
"&19\\u65E5<<<\\u33F2"
"&19\\u70B9<<<\\u336B"
"&1\\u65E5<<<\\u33E0"
"&1\\u6708<<<\\u32C0"
"&1\\u70B9<<<\\u3359"
"&20\\u65E5<<<\\u33F3"
"&20\\u70B9<<<\\u336C"
"&21\\u65E5<<<\\u33F4"
"&21\\u70B9<<<\\u336D"
"&22\\u65E5<<<\\u33F5"
"&22\\u70B9<<<\\u336E"
"&23\\u65E5<<<\\u33F6"
"&23\\u70B9<<<\\u336F"
"&24\\u65E5<<<\\u33F7"
"&24\\u70B9<<<\\u3370"
"&25\\u65E5<<<\\u33F8"
"&26\\u65E5<<<\\u33F9"
"&27\\u65E5<<<\\u33FA"
"&28\\u65E5<<<\\u33FB"
"&29\\u65E5<<<\\u33FC"
"&2\\u65E5<<<\\u33E1"
"&2\\u6708<<<\\u32C1"
"&2\\u70B9<<<\\u335A"
"&30\\u65E5<<<\\u33FD"
"&31\\u65E5<<<\\u33FE"
"&3\\u65E5<<<\\u33E2"
"&3\\u6708<<<\\u32C2"
"&3\\u70B9<<<\\u335B"
"&4\\u65E5<<<\\u33E3"
"&4\\u6708<<<\\u32C3"
"&4\\u70B9<<<\\u335C"
"&5\\u65E5<<<\\u33E4"
"&5\\u6708<<<\\u32C4"
"&5\\u70B9<<<\\u335D"
"&6\\u65E5<<<\\u33E5"
"&6\\u6708<<<\\u32C5"
"&6\\u70B9<<<\\u335E"
"&7\\u65E5<<<\\u33E6"
"&7\\u6708<<<\\u32C6"
"&7\\u70B9<<<\\u335F"
"&8\\u65E5<<<\\u33E7"
"&8\\u6708<<<\\u32C7"
"&8\\u70B9<<<\\u3360"
"&9\\u65E5<<<\\u33E8"
"&9\\u6708<<<\\u32C8"
"&9\\u70B9<<<\\u3361"
"&\\u3014\\u4E09\\u3015<<<\\u01F241"
"&\\u3014\\u4E8C\\u3015<<<\\u01F242"
"&\\u3014\\u52DD\\u3015<<<\\u01F247"
"&\\u3014\\u5B89\\u3015<<<\\u01F243"
"&\\u3014\\u6253\\u3015<<<\\u01F245"
"&\\u3014\\u6557\\u3015<<<\\u01F248"
"&\\u3014\\u672C\\u3015<<<\\u01F240"
"&\\u3014\\u70B9\\u3015<<<\\u01F244"
"&\\u3014\\u76D7\\u3015<<<\\u01F246"
"&\\u4E00<<<\\u2F00<<<\\u3192<<<\\u3280<<<\\u01F229"
"&\\u4E01<<<\\u319C"
"&\\u4E03<<<\\u3286"
"&\\u4E09<<<\\u3194<<<\\u3282<<<\\u01F22A"
"&\\u4E0A<<<\\u3196<<<\\u32A4"
"&\\u4E0B<<<\\u3198<<<\\u32A6"
"&\\u4E19<<<\\u319B"
"&\\u4E28<<<\\u2F01"
"&\\u4E2D<<<\\u3197<<<\\u32A5<<<\\u01F22D"
"&\\u4E36<<<\\u2F02"
"&\\u4E3F<<<\\u2F03"
"&\\u4E59<<<\\u2F04<<<\\u319A"
"&\\u4E5D<<<\\u3288"
"&\\u4E85<<<\\u2F05"
"&\\u4E8C<<<\\u2F06<<<\\u3193<<<\\u3281<<<\\u01F214"
"&\\u4E94<<<\\u3284"
"&\\u4EA0<<<\\u2F07"
"&\\u4EA4<<<\\u01F218"
"&\\u4EBA<<<\\u2F08<<<\\u319F"
"&\\u4F01<<<\\u32AD"
"&\\u4F11<<<\\u32A1"
"&\\u512A<<<\\u329D"
"&\\u513F<<<\\u2F09"
"&\\u5165<<<\\u2F0A"
"&\\u516B<<<\\u2F0B<<<\\u3287"
"&\\u516D<<<\\u3285"
"&\\u5182<<<\\u2F0C"
"&\\u518D<<<\\u01F21E"
"&\\u5196<<<\\u2F0D"
"&\\u5199<<<\\u32A2"
"&\\u51AB<<<\\u2F0E"
"&\\u51E0<<<\\u2F0F"
"&\\u51F5<<<\\u2F10"
"&\\u5200<<<\\u2F11"
"&\\u521D<<<\\u01F220"
"&\\u524D<<<\\u01F21C"
"&\\u5272<<<\\u01F239"
"&\\u529B<<<\\u2F12"
"&\\u52B4<<<\\u3298"
"&\\u52F9<<<\\u2F13"
"&\\u5315<<<\\u2F14"
"&\\u531A<<<\\u2F15"
"&\\u5338<<<\\u2F16<<<\\u32A9"
"&\\u5341<<<\\u2F17<<<\\u3038<<<\\u3289"
"&\\u5344<<<\\u3039"
"&\\u5345<<<\\u303A"
"&\\u5354<<<\\u32AF"
"&\\u535C<<<\\u2F18"
"&\\u5369<<<\\u2F19"
"&\\u5370<<<\\u329E"
"&\\u5382<<<\\u2F1A"
"&\\u53B6<<<\\u2F1B"
"&\\u53C8<<<\\u2F1C"
"&\\u53CC<<<\\u01F212"
"&\\u53E3<<<\\u2F1D"
"&\\u53EF<<<\\u01F251"
"&\\u53F3<<<\\u32A8<<<\\u01F22E"
"&\\u5408<<<\\u01F234"
"&\\u540D<<<\\u3294"
"&\\u5439<<<\\u01F225"
"&\\u554F<<<\\u3244"
"&\\u55B6<<<\\u01F23A"
"&\\u56D7<<<\\u2F1E"
"&\\u56DB<<<\\u3195<<<\\u3283"
"&\\u571F<<<\\u2F1F<<<\\u328F"
"&\\u5730<<<\\u319E"
"&\\u58EB<<<\\u2F20"
"&\\u58F0<<<\\u01F224"
"&\\u5902<<<\\u2F21"
"&\\u590A<<<\\u2F22"
"&\\u5915<<<\\u2F23"
"&\\u591A<<<\\u01F215"
"&\\u591C<<<\\u32B0"
"&\\u5927<<<\\u2F24"
"&\\u5927\\u6B63<<<\\u337D"
"&\\u5929<<<\\u319D<<<\\u01F217"
"&\\u5973<<<\\u2F25<<<\\u329B"
"&\\u5B50<<<\\u2F26"
"&\\u5B57<<<\\u01F211"
"&\\u5B66<<<\\u32AB"
"&\\u5B80<<<\\u2F27"
"&\\u5B97<<<\\u32AA"
"&\\u5BF8<<<\\u2F28"
"&\\u5C0F<<<\\u2F29"
"&\\u5C22<<<\\u2F2A"
"&\\u5C38<<<\\u2F2B"
"&\\u5C6E<<<\\u2F2C"
"&\\u5C71<<<\\u2F2D"
"&\\u5DDB<<<\\u2F2E"
"&\\u5DE5<<<\\u2F2F"
"&\\u5DE6<<<\\u32A7<<<\\u01F22C"
"&\\u5DF1<<<\\u2F30"
"&\\u5DFE<<<\\u2F31"
"&\\u5E72<<<\\u2F32"
"&\\u5E73\\u6210<<<\\u337B"
"&\\u5E7A<<<\\u2F33"
"&\\u5E7C<<<\\u3245"
"&\\u5E7F<<<\\u2F34"
"&\\u5EF4<<<\\u2F35"
"&\\u5EFE<<<\\u2F36"
"&\\u5F0B<<<\\u2F37"
"&\\u5F13<<<\\u2F38"
"&\\u5F50<<<\\u2F39"
"&\\u5F61<<<\\u2F3A"
"&\\u5F73<<<\\u2F3B"
"&\\u5F8C<<<\\u01F21D"
"&\\u5F97<<<\\u01F250"
"&\\u5FC3<<<\\u2F3C"
"&\\u6208<<<\\u2F3D"
"&\\u6236<<<\\u2F3E"
"&\\u624B<<<\\u2F3F<<<\\u01F210"
"&\\u6253<<<\\u01F231"
"&\\u6295<<<\\u01F227"
"&\\u6307<<<\\u01F22F"
"&\\u6355<<<\\u01F228"
"&\\u652F<<<\\u2F40"
"&\\u6534<<<\\u2F41"
"&\\u6587<<<\\u2F42<<<\\u3246"
"&\\u6597<<<\\u2F43"
"&\\u6599<<<\\u01F21B"
"&\\u65A4<<<\\u2F44"
"&\\u65B0<<<\\u01F21F"
"&\\u65B9<<<\\u2F45"
"&\\u65E0<<<\\u2F46"
"&\\u65E5<<<\\u2F47<<<\\u3290"
"&\\u660E\\u6CBB<<<\\u337E"
"&\\u6620<<<\\u01F219"
"&\\u662D\\u548C<<<\\u337C"
"&\\u66F0<<<\\u2F48"
"&\\u6708<<<\\u2F49<<<\\u328A<<<\\u01F237"
"&\\u6709<<<\\u3292<<<\\u01F236"
"&\\u6728<<<\\u2F4A<<<\\u328D"
"&\\u682A<<<\\u3291"
"&\\u682A\\u5F0F\\u4F1A\\u793E<<<\\u337F"
"&\\u6B20<<<\\u2F4B"
"&\\u6B62<<<\\u2F4C"
"&\\u6B63<<<\\u32A3"
"&\\u6B79<<<\\u2F4D"
"&\\u6BB3<<<\\u2F4E"
"&\\u6BCB<<<\\u2F4F"
"&\\u6BCD<<<\\u2E9F"
"&\\u6BD4<<<\\u2F50"
"&\\u6BDB<<<\\u2F51"
"&\\u6C0F<<<\\u2F52"
"&\\u6C14<<<\\u2F53"
"&\\u6C34<<<\\u2F54<<<\\u328C"
"&\\u6CE8<<<\\u329F"
"&\\u6E80<<<\\u01F235"
"&\\u6F14<<<\\u01F226"
"&\\u706B<<<\\u2F55<<<\\u328B"
"&\\u7121<<<\\u01F21A"
"&\\u722A<<<\\u2F56"
"&\\u7236<<<\\u2F57"
"&\\u723B<<<\\u2F58"
"&\\u723F<<<\\u2F59"
"&\\u7247<<<\\u2F5A"
"&\\u7259<<<\\u2F5B"
"&\\u725B<<<\\u2F5C"
"&\\u7279<<<\\u3295"
"&\\u72AC<<<\\u2F5D"
"&\\u7384<<<\\u2F5E"
"&\\u7389<<<\\u2F5F"
"&\\u74DC<<<\\u2F60"
"&\\u74E6<<<\\u2F61"
"&\\u7518<<<\\u2F62"
"&\\u751F<<<\\u2F63<<<\\u01F222"
"&\\u7528<<<\\u2F64"
"&\\u7530<<<\\u2F65"
"&\\u7532<<<\\u3199"
"&\\u7533<<<\\u01F238"
"&\\u7537<<<\\u329A"
"&\\u758B<<<\\u2F66"
"&\\u7592<<<\\u2F67"
"&\\u7676<<<\\u2F68"
"&\\u767D<<<\\u2F69"
"&\\u76AE<<<\\u2F6A"
"&\\u76BF<<<\\u2F6B"
"&\\u76E3<<<\\u32AC"
"&\\u76EE<<<\\u2F6C"
"&\\u77DB<<<\\u2F6D"
"&\\u77E2<<<\\u2F6E"
"&\\u77F3<<<\\u2F6F"
"&\\u793A<<<\\u2F70"
"&\\u793E<<<\\u3293"
"&\\u795D<<<\\u3297"
"&\\u7981<<<\\u01F232"
"&\\u79B8<<<\\u2F71"
"&\\u79BE<<<\\u2F72"
"&\\u79D8<<<\\u3299"
"&\\u7A74<<<\\u2F73"
"&\\u7A7A<<<\\u01F233"
"&\\u7ACB<<<\\u2F74"
"&\\u7AF9<<<\\u2F75"
"&\\u7B8F<<<\\u3247"
"&\\u7C73<<<\\u2F76"
"&\\u7CF8<<<\\u2F77"
"&\\u7D42<<<\\u01F221"
"&\\u7F36<<<\\u2F78"
"&\\u7F51<<<\\u2F79"
"&\\u7F8A<<<\\u2F7A"
"&\\u7FBD<<<\\u2F7B"
"&\\u8001<<<\\u2F7C"
"&\\u800C<<<\\u2F7D"
"&\\u8012<<<\\u2F7E"
"&\\u8033<<<\\u2F7F"
"&\\u807F<<<\\u2F80"
"&\\u8089<<<\\u2F81"
"&\\u81E3<<<\\u2F82"
"&\\u81EA<<<\\u2F83"
"&\\u81F3<<<\\u2F84"
"&\\u81FC<<<\\u2F85"
"&\\u820C<<<\\u2F86"
"&\\u821B<<<\\u2F87"
"&\\u821F<<<\\u2F88"
"&\\u826E<<<\\u2F89"
"&\\u8272<<<\\u2F8A"
"&\\u8278<<<\\u2F8B"
"&\\u864D<<<\\u2F8C"
"&\\u866B<<<\\u2F8D"
"&\\u8840<<<\\u2F8E"
"&\\u884C<<<\\u2F8F"
"&\\u8863<<<\\u2F90"
"&\\u897E<<<\\u2F91"
"&\\u898B<<<\\u2F92"
"&\\u89D2<<<\\u2F93"
"&\\u89E3<<<\\u01F216"
"&\\u8A00<<<\\u2F94"
"&\\u8C37<<<\\u2F95"
"&\\u8C46<<<\\u2F96"
"&\\u8C55<<<\\u2F97"
"&\\u8C78<<<\\u2F98"
"&\\u8C9D<<<\\u2F99"
"&\\u8CA1<<<\\u3296"
"&\\u8CA9<<<\\u01F223"
"&\\u8CC7<<<\\u32AE"
"&\\u8D64<<<\\u2F9A"
"&\\u8D70<<<\\u2F9B<<<\\u01F230"
"&\\u8DB3<<<\\u2F9C"
"&\\u8EAB<<<\\u2F9D"
"&\\u8ECA<<<\\u2F9E"
"&\\u8F9B<<<\\u2F9F"
"&\\u8FB0<<<\\u2FA0"
"&\\u8FB5<<<\\u2FA1"
"&\\u904A<<<\\u01F22B"
"&\\u9069<<<\\u329C"
"&\\u9091<<<\\u2FA2"
"&\\u9149<<<\\u2FA3"
"&\\u914D<<<\\u01F23B"
"&\\u91C6<<<\\u2FA4"
"&\\u91CC<<<\\u2FA5"
"&\\u91D1<<<\\u2FA6<<<\\u328E"
"&\\u9577<<<\\u2FA7"
"&\\u9580<<<\\u2FA8"
"&\\u961C<<<\\u2FA9"
"&\\u96B6<<<\\u2FAA"
"&\\u96B9<<<\\u2FAB"
"&\\u96E8<<<\\u2FAC"
"&\\u9751<<<\\u2FAD"
"&\\u975E<<<\\u2FAE"
"&\\u9762<<<\\u2FAF"
"&\\u9769<<<\\u2FB0"
"&\\u97CB<<<\\u2FB1"
"&\\u97ED<<<\\u2FB2"
"&\\u97F3<<<\\u2FB3"
"&\\u9801<<<\\u2FB4"
"&\\u9805<<<\\u32A0"
"&\\u98A8<<<\\u2FB5"
"&\\u98DB<<<\\u2FB6"
"&\\u98DF<<<\\u2FB7"
"&\\u9996<<<\\u2FB8"
"&\\u9999<<<\\u2FB9"
"&\\u99AC<<<\\u2FBA"
"&\\u9AA8<<<\\u2FBB"
"&\\u9AD8<<<\\u2FBC"
"&\\u9ADF<<<\\u2FBD"
"&\\u9B25<<<\\u2FBE"
"&\\u9B2F<<<\\u2FBF"
"&\\u9B32<<<\\u2FC0"
"&\\u9B3C<<<\\u2FC1"
"&\\u9B5A<<<\\u2FC2"
"&\\u9CE5<<<\\u2FC3"
"&\\u9E75<<<\\u2FC4"
"&\\u9E7F<<<\\u2FC5"
"&\\u9EA5<<<\\u2FC6"
"&\\u9EBB<<<\\u2FC7"
"&\\u9EC3<<<\\u2FC8"
"&\\u9ECD<<<\\u2FC9"
"&\\u9ED1<<<\\u2FCA"
"&\\u9EF9<<<\\u2FCB"
"&\\u9EFD<<<\\u2FCC"
"&\\u9F0E<<<\\u2FCD"
"&\\u9F13<<<\\u2FCE"
"&\\u9F20<<<\\u2FCF"
"&\\u9F3B<<<\\u2FD0"
"&\\u9F4A<<<\\u2FD1"
"&\\u9F52<<<\\u2FD2"
"&\\u9F8D<<<\\u2FD3"
"&\\u9F9C<<<\\u2FD4"
"&\\u9F9F<<<\\u2EF3"
"&\\u9FA0<<<\\u2FD5";
/*
Below variables are defined in separate .cc file, generated by uca9dump at
build-time for the Chinese collation.
*/
extern uint16 *zh_han_pages[];
extern const int MIN_ZH_HAN_PAGE;
extern const int MAX_ZH_HAN_PAGE;
extern int zh_han_to_single_weight[];
extern const int ZH_HAN_WEIGHT_PAIRS;
extern uint16 *zh2_han_pages[];
extern const int MIN_ZH2_HAN_PAGE;
extern const int MAX_ZH2_HAN_PAGE;
extern int zh2_han_to_single_weight[];
extern const int ZH2_HAN_WEIGHT_PAIRS;
extern uint16 *zh3_han_pages[];
extern const int MIN_ZH3_HAN_PAGE;
extern const int MAX_ZH3_HAN_PAGE;
extern int zh3_han_to_single_weight[];
extern const int ZH3_HAN_WEIGHT_PAIRS;
#endif

446779
deps/oblib/src/lib/charset/uca900_zh_tbls.cc vendored Normal file

File diff suppressed because it is too large Load Diff

View File

@ -47,7 +47,10 @@ int ObDatabaseSqlService::insert_database(const ObDatabaseSchema &database_schem
LOG_WARN("database schema is invalid", K(ret));
} else if (OB_FAIL(sql::ObSQLUtils::is_charset_data_version_valid(database_schema.get_charset_type(),
exec_tenant_id))) {
LOG_WARN("failed to check charset data version valid", K(ret));
LOG_WARN("failed to check charset data version valid", K(database_schema.get_charset_type()), K(ret));
} else if (OB_FAIL(sql::ObSQLUtils::is_collation_data_version_valid(database_schema.get_collation_type(),
exec_tenant_id))) {
LOG_WARN("failed to check collation data version valid", K(database_schema.get_collation_type()), K(ret));
} else {
int64_t affected_rows = 0;
ObDMLSqlSplicer dml;
@ -125,7 +128,10 @@ int ObDatabaseSqlService::update_database(const ObDatabaseSchema &database_schem
LOG_WARN("database scheam is invalid", K(ret));
} else if (OB_FAIL(sql::ObSQLUtils::is_charset_data_version_valid(database_schema.get_charset_type(),
exec_tenant_id))) {
LOG_WARN("failed to check charset data version valid", K(ret));
LOG_WARN("failed to check charset data version valid", K(database_schema.get_charset_type()), K(ret));
} else if (OB_FAIL(sql::ObSQLUtils::is_collation_data_version_valid(database_schema.get_collation_type(),
exec_tenant_id))) {
LOG_WARN("failed to check collation data version valid", K(database_schema.get_charset_type()), K(ret));
} else {
int64_t affected_rows = 0;
ObDMLSqlSplicer dml;

View File

@ -2658,7 +2658,10 @@ int ObTableSqlService::gen_table_dml(
LOG_WARN("check ddl allowd failed", K(ret), K(table));
} else if (OB_FAIL(sql::ObSQLUtils::is_charset_data_version_valid(table.get_charset_type(),
exec_tenant_id))) {
LOG_WARN("failed to check charset data version valid", K(ret));
LOG_WARN("failed to check charset data version valid", K(table.get_charset_type()), K(ret));
} else if (OB_FAIL(sql::ObSQLUtils::is_collation_data_version_valid(table.get_collation_type(),
exec_tenant_id))) {
LOG_WARN("failed to check collation data version valid", K(table.get_collation_type()), K(ret));
} else if (OB_FAIL(GET_MIN_DATA_VERSION(table.get_tenant_id(), data_version))) {
LOG_WARN("failed to get data version", K(ret));
} else if (data_version < DATA_VERSION_4_1_0_0
@ -3897,7 +3900,10 @@ int ObTableSqlService::gen_column_dml(
LOG_USER_ERROR(OB_NOT_SUPPORTED, "tenant data version is less than 4.2, skip index");
} else if (OB_FAIL(sql::ObSQLUtils::is_charset_data_version_valid(column.get_charset_type(),
exec_tenant_id))) {
LOG_WARN("failed to check charset data version valid", K(ret));
LOG_WARN("failed to check charset data version valid", K(column.get_charset_type()), K(ret));
} else if (OB_FAIL(sql::ObSQLUtils::is_collation_data_version_valid(column.get_collation_type(),
exec_tenant_id))) {
LOG_WARN("failed to check collation data version valid", K(column.get_collation_type()), K(ret));
} else if (column.is_generated_column() ||
column.is_identity_column() ||
ob_is_string_type(column.get_data_type()) ||

View File

@ -78,7 +78,10 @@ int ObTenantSqlService::alter_tenant(
LOG_WARN("invalid tenant schema", K(tenant_schema), K(ret));
} else if (OB_FAIL(sql::ObSQLUtils::is_charset_data_version_valid(tenant_schema.get_charset_type(),
tenant_schema.get_tenant_id()))) {
LOG_WARN("failed to check charset data version valid", K(ret));
LOG_WARN("failed to check charset data version valid", K(tenant_schema.get_charset_type()), K(ret));
} else if (OB_FAIL(sql::ObSQLUtils::is_collation_data_version_valid(tenant_schema.get_collation_type(),
tenant_schema.get_tenant_id()))) {
LOG_WARN("failed to check charset data version valid", K(tenant_schema.get_collation_type()), K(ret));
} else if (OB_FAIL(replace_tenant(tenant_schema, op, sql_client, ddl_stmt_str))) {
LOG_WARN("replace_tenant failed", K(tenant_schema), K(op), K(ret));
}

View File

@ -81,7 +81,7 @@ int ObSetNamesExecutor::execute(ObExecContext &ctx, ObSetNamesStmt &stmt)
if (CS_TYPE_INVALID == cs_coll_type || CS_TYPE_INVALID == coll_type) {
ret = OB_ERR_UNEXPECTED;
SQL_ENG_LOG(ERROR, "cs coll type or coll type is invalid", K(ret), K(cs_coll_type), K(coll_type));
} else if (OB_FAIL(sql::ObSQLUtils::is_charset_data_version_valid(common::ObCharset::charset_type_by_coll(collation_type),
} else if (OB_FAIL(sql::ObSQLUtils::is_charset_data_version_valid(ObCharset::charset_type(charset),
session->get_effective_tenant_id()))) {
SQL_EXE_LOG(WARN, "failed to check charset data version valid", K(ret));
} else if (OB_FAIL(session->update_sys_variable(SYS_VAR_CHARACTER_SET_CLIENT,
@ -102,7 +102,7 @@ int ObSetNamesExecutor::execute(ObExecContext &ctx, ObSetNamesStmt &stmt)
ObObj database_charset;
ObObj database_collation;
ObCollationType cs_coll_type = ObCharset::get_default_collation(ObCharset::charset_type(charset));
if (OB_FAIL(sql::ObSQLUtils::is_charset_data_version_valid(common::ObCharset::charset_type_by_coll(cs_coll_type),
if (OB_FAIL(sql::ObSQLUtils::is_charset_data_version_valid(ObCharset::charset_type(charset),
session->get_effective_tenant_id()))) {
SQL_EXE_LOG(WARN, "failed to check charset data version valid", K(ret));
} else if (OB_FAIL(session->get_sys_variable(SYS_VAR_CHARACTER_SET_DATABASE,

View File

@ -566,6 +566,9 @@ int ObVariableSetExecutor::update_global_variables(ObExecContext &ctx,
} else if (OB_FAIL(sql::ObSQLUtils::is_charset_data_version_valid(common::ObCharset::charset_type_by_coll(static_cast<ObCollationType>(coll_int64)),
session->get_effective_tenant_id()))) {
LOG_WARN("failed to check charset data version valid", K(ret));
} else if (OB_FAIL(sql::ObSQLUtils::is_collation_data_version_valid(static_cast<ObCollationType>(coll_int64),
session->get_effective_tenant_id()))) {
LOG_WARN("failed to check collation data version valid", K(ret));
} else if (FALSE_IT(coll_str = ObString::make_string(ObCharset::collation_name(static_cast<ObCollationType>(coll_int64))))) {
//do nothing
} else if (OB_FAIL(ObBasicSysVar::get_charset_var_and_val_by_collation(
@ -591,6 +594,9 @@ int ObVariableSetExecutor::update_global_variables(ObExecContext &ctx,
} else if (OB_FAIL(sql::ObSQLUtils::is_charset_data_version_valid(common::ObCharset::charset_type_by_coll(static_cast<ObCollationType>(coll_int64)),
session->get_effective_tenant_id()))) {
LOG_WARN("failed to check charset data version valid", K(ret));
} else if (OB_FAIL(sql::ObSQLUtils::is_collation_data_version_valid(static_cast<ObCollationType>(coll_int64),
session->get_effective_tenant_id()))) {
LOG_WARN("failed to check collation data version valid", K(ret));
} else if (FALSE_IT(cs_str = ObString::make_string(ObCharset::charset_name(
ObCharset::charset_type_by_coll(static_cast<ObCollationType>(coll_int64)))))) {
//do nothing

View File

@ -600,6 +600,24 @@ int ObSQLUtils::is_charset_data_version_valid(ObCharsetType charset_type, const
return ret;
}
int ObSQLUtils::is_collation_data_version_valid(ObCollationType collation_type, const int64_t tenant_id)
{
int ret = OB_SUCCESS;
#ifndef OB_BUILD_CLOSE_MODULES
uint64_t data_version = 0;
if (OB_FAIL(GET_MIN_DATA_VERSION(tenant_id, data_version))) {
SQL_LOG(WARN, "failed to GET_MIN_DATA_VERSION", K(ret));
} else if (data_version < DATA_VERSION_4_2_2_0 &&
(CS_TYPE_UTF16_UNICODE_CI == collation_type ||
CS_TYPE_UTF8MB4_UNICODE_CI == collation_type)) {
ret = OB_NOT_SUPPORTED;
SQL_LOG(WARN, "Unicode collation not supported when data_version < 4_2_2_0", K(collation_type), K(ret));
LOG_USER_ERROR(OB_NOT_SUPPORTED, "tenant data version is less than 4.2.2, unicode collation is");
}
#endif
return ret;
}
// 参数raw_expr中如果出现函数addr_to_partition_id,
// 那么得到的partition_id结果在后面无法映射到相应的addr
int ObSQLUtils::calc_calculable_expr(ObSQLSessionInfo *session,

View File

@ -275,6 +275,7 @@ public:
}
}
static int is_charset_data_version_valid(ObCharsetType charset_type, const int64_t tenant_id);
static int is_collation_data_version_valid(ObCollationType collation_type, const int64_t tenant_id);
static int calc_calculable_expr(ObSQLSessionInfo *session,
const ObRawExpr *expr,
common::ObObj &result,

View File

@ -1,26 +1,9 @@
# charset objects used for proxy_parser
if (OB_BUILD_OPENSOURCE)
set(ob_sql_parser_charset_object_list
ob_ctype_bin_os.cc
ob_ctype_gb18030_os.cc
ob_ctype_gbk_os.cc
ob_ctype_latin1_os.cc
ob_ctype_mb_os.cc
ob_ctype_simple_os.cc
ob_ctype_os.cc
ob_ctype_utf16_os.cc
ob_ctype_utf8_os.cc
ob_dtoa_os.cc
)
endif()
list(TRANSFORM ob_sql_parser_charset_object_list
PREPEND ${PROJECT_SOURCE_DIR}/deps/oblib/src/lib/charset/)
if(OB_BUILD_FULL_CHARSET)
set(ob_sql_parser_full_charset_object_list
ob_ctype_bin.cc
ob_ctype.cc
ob_ctype_gbk.cc
@ -36,12 +19,10 @@ if(OB_BUILD_FULL_CHARSET)
uca900_zh_tbls.cc
uca900_zh2_tbls.cc
uca900_zh3_tbls.cc
)
endif()
list(TRANSFORM ob_sql_parser_full_charset_object_list
PREPEND ${PROJECT_SOURCE_DIR}/close_modules/charset/deps/oblib/src/lib/charset/)
)
list(TRANSFORM ob_sql_parser_charset_object_list
PREPEND ${PROJECT_SOURCE_DIR}/deps/oblib/src/lib/charset/)
# hash objects used for proxy parser
set(ob_sql_parser_hash_object_list
murmur_hash.h
@ -124,19 +105,12 @@ set(ob_extra_sql_parser_object_list
)
# ob_sql_proxy_parser_objects is the static library for proxy, it does not link against observer
if (OB_BUILD_FULL_CHARSET)
add_library(ob_sql_proxy_parser_objects OBJECT
${ob_inner_sql_parser_object_list}
${ob_sql_parser_hash_object_list}
${ob_sql_parser_full_charset_object_list}
)
else()
add_library(ob_sql_proxy_parser_objects OBJECT
${ob_inner_sql_parser_object_list}
${ob_sql_parser_hash_object_list}
${ob_sql_parser_charset_object_list}
)
endif()
add_library(ob_sql_proxy_parser_objects OBJECT
${ob_inner_sql_parser_object_list}
${ob_sql_parser_hash_object_list}
${ob_sql_parser_charset_object_list}
)
# ob_sql_server_parser_object is the static library for observer
add_library(ob_sql_server_parser_objects OBJECT

View File

@ -4454,6 +4454,9 @@ int ObAlterTableResolver::resolve_convert_to_character(const ParseNode &node)
} else if (OB_FAIL(sql::ObSQLUtils::is_charset_data_version_valid(common::ObCharset::charset_type_by_coll(collation_type),
session_info_->get_effective_tenant_id()))) {
LOG_WARN("failed to check charset data version valid", K(ret));
} else if (OB_FAIL(sql::ObSQLUtils::is_collation_data_version_valid(collation_type,
session_info_->get_effective_tenant_id()))) {
LOG_WARN("failed to check collation data version valid", K(ret));
} else {
collation_type_ = collation_type;
}

View File

@ -160,6 +160,9 @@ int ObDatabaseResolver<T>::resolve_database_option(T *stmt, ParseNode *node, ObS
} else if (OB_FAIL(sql::ObSQLUtils::is_charset_data_version_valid(charset_type,
session_info->get_effective_tenant_id()))) {
OB_LOG(WARN, "failed to check charset data version valid", K(ret));
} else if (OB_FAIL(sql::ObSQLUtils::is_collation_data_version_valid(collation_type,
session_info->get_effective_tenant_id()))) {
OB_LOG(WARN, "failed to check collation data version valid", K(ret));
} else if (OB_UNLIKELY(collation_already_set_
&& stmt->get_charset_type() != charset_type)) {
// mysql执行下面这条sql时会报错,为了行为与mysql一致,resolve时即检查collation/charset不一致的问题
@ -181,6 +184,12 @@ int ObDatabaseResolver<T>::resolve_database_option(T *stmt, ParseNode *node, ObS
ret = common::OB_ERR_UNEXPECTED;
SQL_RESV_LOG(WARN, "all valid collation types should have corresponding charset type",
K(ret), K(charset_type), K(collation_type));
} else if (OB_FAIL(sql::ObSQLUtils::is_charset_data_version_valid(charset_type,
session_info->get_effective_tenant_id()))) {
OB_LOG(WARN, "failed to check charset data version valid", K(ret));
} else if (OB_FAIL(sql::ObSQLUtils::is_collation_data_version_valid(collation_type,
session_info->get_effective_tenant_id()))) {
OB_LOG(WARN, "failed to check collation data version valid", K(ret));
} else if (OB_UNLIKELY(collation_already_set_
&& stmt->get_charset_type() != charset_type)) {
ret = OB_ERR_COLLATION_MISMATCH;

View File

@ -1470,6 +1470,9 @@ int ObDDLResolver::resolve_table_option(const ParseNode *option_node, const bool
} else if (OB_FAIL(sql::ObSQLUtils::is_charset_data_version_valid(common::ObCharset::charset_type_by_coll(collation_type),
session_info_->get_effective_tenant_id()))) {
SQL_RESV_LOG(WARN, "failed to check charset data version valid", K(ret));
} else if (OB_FAIL(sql::ObSQLUtils::is_collation_data_version_valid(collation_type,
session_info_->get_effective_tenant_id()))) {
SQL_RESV_LOG(WARN, "failed to check collation data version valid", K(ret));
} else {
collation_type_ = collation_type;
if (stmt::T_ALTER_TABLE == stmt_->get_stmt_type()) {

View File

@ -153,6 +153,9 @@ int ObTenantResolver<T>::resolve_tenant_option(T *stmt, ParseNode *node,
} else if (OB_FAIL(sql::ObSQLUtils::is_charset_data_version_valid(common::ObCharset::charset_type_by_coll(collation_type),
session_info->get_effective_tenant_id()))) {
LOG_WARN("failed to check charset data version valid", K(ret));
} else if (OB_FAIL(sql::ObSQLUtils::is_collation_data_version_valid(collation_type,
session_info->get_effective_tenant_id()))) {
LOG_WARN("failed to check collation data version valid", K(ret));
} else {
collation_type_ = collation_type;
if (stmt->get_stmt_type() == stmt::T_MODIFY_TENANT) {

View File

@ -4877,6 +4877,9 @@ int ObRawExprResolverImpl::process_collation_node(const ParseNode *node, ObRawEx
} else if (OB_FAIL(sql::ObSQLUtils::is_charset_data_version_valid(common::ObCharset::charset_type_by_coll(collation_type),
ctx_.session_info_->get_effective_tenant_id()))) {
LOG_WARN("failed to check charset data version valid", K(ret));
} else if (OB_FAIL(sql::ObSQLUtils::is_collation_data_version_valid(collation_type,
ctx_.session_info_->get_effective_tenant_id()))) {
LOG_WARN("failed to check collation data version valid", K(ret));
} else if (OB_FAIL(ctx_.expr_factory_.create_raw_expr(T_INT, c_expr))) {
LOG_WARN("fail to create raw expr", K(ret));
} else if (OB_ISNULL(c_expr)) {

View File

@ -13,6 +13,8 @@ gbk_chinese_ci gbk 28 Yes Yes 1
gbk_bin gbk 87 Yes 1
utf16_general_ci utf16 54 Yes Yes 1
utf16_bin utf16 55 Yes 1
utf8mb4_unicode_ci utf8mb4 224 Yes 1
utf16_unicode_ci utf16 101 Yes 1
gb18030_chinese_ci gb18030 248 Yes Yes 1
gb18030_bin gb18030 249 Yes 1
latin1_swedish_ci latin1 8 Yes Yes 1

View File

@ -7,6 +7,8 @@ gbk_chinese_ci gbk 28 Yes Yes 1
gbk_bin gbk 87 Yes 1
utf16_general_ci utf16 54 Yes Yes 1
utf16_bin utf16 55 Yes 1
utf8mb4_unicode_ci utf8mb4 224 Yes 1
utf16_unicode_ci utf16 101 Yes 1
gb18030_chinese_ci gb18030 248 Yes Yes 1
gb18030_bin gb18030 249 Yes 1
latin1_swedish_ci latin1 8 Yes Yes 1
@ -27,6 +29,8 @@ gbk_chinese_ci gbk 28 Yes Yes 1
gbk_bin gbk 87 Yes 1
utf16_general_ci utf16 54 Yes Yes 1
utf16_bin utf16 55 Yes 1
utf8mb4_unicode_ci utf8mb4 224 Yes 1
utf16_unicode_ci utf16 101 Yes 1
gb18030_chinese_ci gb18030 248 Yes Yes 1
gb18030_bin gb18030 249 Yes 1
latin1_swedish_ci latin1 8 Yes Yes 1

View File

@ -8,6 +8,8 @@ gbk_chinese_ci gbk 28 Yes Yes 1
gbk_bin gbk 87 Yes 1
utf16_general_ci utf16 54 Yes Yes 1
utf16_bin utf16 55 Yes 1
utf8mb4_unicode_ci utf8mb4 224 Yes 1
utf16_unicode_ci utf16 101 Yes 1
gb18030_chinese_ci gb18030 248 Yes Yes 1
gb18030_bin gb18030 249 Yes 1
latin1_swedish_ci latin1 8 Yes Yes 1
@ -28,6 +30,8 @@ gbk_chinese_ci gbk 28 Yes Yes 1
gbk_bin gbk 87 Yes 1
utf16_general_ci utf16 54 Yes Yes 1
utf16_bin utf16 55 Yes 1
utf8mb4_unicode_ci utf8mb4 224 Yes 1
utf16_unicode_ci utf16 101 Yes 1
gb18030_chinese_ci gb18030 248 Yes Yes 1
gb18030_bin gb18030 249 Yes 1
latin1_swedish_ci latin1 8 Yes Yes 1
@ -43,6 +47,7 @@ select * from collations where collation_name like '%utf8%';
COLLATION_NAME CHARACTER_SET_NAME ID IS_DEFAULT IS_COMPILED SORTLEN
utf8mb4_general_ci utf8mb4 45 Yes Yes 1
utf8mb4_bin utf8mb4 46 Yes 1
utf8mb4_unicode_ci utf8mb4 224 Yes 1
show create table collations;
View Create View character_set_client collation_connection
COLLATIONS CREATE VIEW `COLLATIONS` AS select collation as COLLATION_NAME, charset as CHARACTER_SET_NAME, id as ID, `is_default` as IS_DEFAULT, is_compiled as IS_COMPILED, sortlen as SORTLEN from oceanbase.__tenant_virtual_collation utf8mb4 utf8mb4_general_ci