[FEAT MERGE]charset revise
This commit is contained in:
parent
654e89bbe1
commit
e4f7452b2d
@ -82,8 +82,6 @@ if(OB_BUILD_CLOSE_MODULES)
|
||||
ob_define(OB_BUILD_TDE_SECURITY ON)
|
||||
ob_define(OB_BUILD_AUDIT_SECURITY ON)
|
||||
ob_define(OB_BUILD_LABEL_SECURITY ON)
|
||||
# 字符集
|
||||
ob_define(OB_BUILD_FULL_CHARSET ON)
|
||||
# SPM功能
|
||||
ob_define(OB_BUILD_SPM ON)
|
||||
|
||||
@ -121,10 +119,6 @@ if(OB_BUILD_LABEL_SECURITY)
|
||||
add_definitions(-DOB_BUILD_LABEL_SECURITY)
|
||||
endif()
|
||||
|
||||
if(OB_BUILD_FULL_CHARSET)
|
||||
add_definitions(-DOB_BUILD_FULL_CHARSET)
|
||||
endif()
|
||||
|
||||
if(OB_BUILD_SPM)
|
||||
add_definitions(-DOB_BUILD_SPM)
|
||||
endif()
|
||||
|
8
deps/oblib/src/CMakeLists.txt
vendored
8
deps/oblib/src/CMakeLists.txt
vendored
@ -85,14 +85,6 @@ if(OB_BUILD_AUDIT_SECURITY)
|
||||
)
|
||||
endif()
|
||||
|
||||
if(OB_BUILD_ORACLE_XML)
|
||||
target_include_directories(
|
||||
oblib_base_base_base INTERFACE
|
||||
${CMAKE_SOURCE_DIR}/close_modules/charset
|
||||
${CMAKE_SOURCE_DIR}/close_modules/charset/deps/oblib/src/
|
||||
)
|
||||
endif()
|
||||
|
||||
if(OB_USE_BABASSL)
|
||||
target_include_directories(
|
||||
oblib_base_base_base INTERFACE
|
||||
|
25
deps/oblib/src/lib/CMakeLists.txt
vendored
25
deps/oblib/src/lib/CMakeLists.txt
vendored
@ -10,16 +10,21 @@ ob_set_subtarget(oblib_lib ALONE
|
||||
)
|
||||
|
||||
ob_set_subtarget(oblib_lib charset
|
||||
charset/ob_ctype_bin_os.cc
|
||||
charset/ob_ctype_gb18030_os.cc
|
||||
charset/ob_ctype_gbk_os.cc
|
||||
charset/ob_ctype_latin1_os.cc
|
||||
charset/ob_ctype_mb_os.cc
|
||||
charset/ob_ctype_simple_os.cc
|
||||
charset/ob_ctype_os.cc
|
||||
charset/ob_ctype_utf16_os.cc
|
||||
charset/ob_ctype_utf8_os.cc
|
||||
charset/ob_dtoa_os.cc
|
||||
charset/ob_ctype_bin.cc
|
||||
charset/ob_ctype.cc
|
||||
charset/ob_ctype_gb18030.cc
|
||||
charset/ob_ctype_gbk.cc
|
||||
charset/ob_ctype_latin1.cc
|
||||
charset/ob_ctype_mb.cc
|
||||
charset/ob_ctype_simple.cc
|
||||
charset/ob_ctype_uca.cc
|
||||
charset/ob_ctype_utf8.cc
|
||||
charset/ob_ctype_utf16.cc
|
||||
charset/ob_dtoa.cc
|
||||
charset/uca900_ja_tbls.cc
|
||||
charset/uca900_zh_tbls.cc
|
||||
charset/uca900_zh2_tbls.cc
|
||||
charset/uca900_zh3_tbls.cc
|
||||
charset/ob_charset.cpp
|
||||
)
|
||||
|
||||
|
171
deps/oblib/src/lib/charset/mb_wc.h
vendored
Normal file
171
deps/oblib/src/lib/charset/mb_wc.h
vendored
Normal file
@ -0,0 +1,171 @@
|
||||
/**
|
||||
* Copyright (c) 2021 OceanBase
|
||||
* OceanBase CE is licensed under Mulan PubL v2.
|
||||
* You can use this software according to the terms and conditions of the Mulan PubL v2.
|
||||
* You may obtain a copy of Mulan PubL v2 at:
|
||||
* http://license.coscl.org.cn/MulanPubL-2.0
|
||||
* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND,
|
||||
* EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
|
||||
* MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
|
||||
* See the Mulan PubL v2 for more details.
|
||||
*/
|
||||
|
||||
#ifndef MB_WC_INCLUDED
|
||||
#define MB_WC_INCLUDED
|
||||
|
||||
/**
|
||||
@file mb_wc.h
|
||||
|
||||
Definitions of mb_wc (multibyte to wide character, ie., effectively
|
||||
“parse a UTF-8 character”) functions for UTF-8 (both three- and four-byte).
|
||||
These are available both as inline functions, as C-style thunks so that they
|
||||
can fit into MY_CHARSET_HANDLER, and as functors.
|
||||
|
||||
The functors exist so that you can specialize a class on them and get them
|
||||
inlined instead of having to call them through the function pointer in
|
||||
MY_CHARSET_HANDLER; mb_wc is in itself so cheap (the most common case is
|
||||
just a single byte load and a predictable compare) that the call overhead
|
||||
in a tight loop is significant, and these routines tend to take up a lot
|
||||
of CPU time when sorting. Typically, at the outermost level, you'd simply
|
||||
compare cs->cset->mb_wc with my_mb_wc_{utf8,utf8mb4}_thunk, and if so,
|
||||
instantiate your function with the given class. If it doesn't match,
|
||||
you can use Mb_wc_through_function_pointer, which calls through the
|
||||
function pointer as usual. (It will cache the function pointer for you,
|
||||
which is typically faster than looking it up all the time -- the compiler
|
||||
cannot always figure out on its own that it doesn't change.)
|
||||
|
||||
The Mb_wc_* classes should be sent by _value_, not by reference, since
|
||||
they are never larger than two pointers (and usually simply zero).
|
||||
*/
|
||||
#include "lib/charset/ob_ctype.h"
|
||||
|
||||
#define ALWAYS_INLINE __attribute__((always_inline)) inline
|
||||
|
||||
template <bool RANGE_CHECK, bool SUPPORT_MB4>
|
||||
static int ob_mb_wc_utf8_prototype(ob_wc_t *pwc, const unsigned char *s,
|
||||
const unsigned char *e);
|
||||
|
||||
/**
|
||||
Functor that converts a UTF-8 multibyte sequence (up to three bytes)
|
||||
to a wide character.
|
||||
*/
|
||||
struct Mb_wc_utf8 {
|
||||
Mb_wc_utf8() {}
|
||||
|
||||
ALWAYS_INLINE
|
||||
int operator()(ob_wc_t *pwc, const unsigned char *s, const unsigned char *e) const {
|
||||
return ob_mb_wc_utf8_prototype</*RANGE_CHECK=*/true, /*SUPPORT_MB4=*/false>(
|
||||
pwc, s, e);
|
||||
}
|
||||
};
|
||||
|
||||
/**
|
||||
Functor that converts a UTF-8 multibyte sequence (up to four bytes)
|
||||
to a wide character.
|
||||
*/
|
||||
struct Mb_wc_utf8mb4 {
|
||||
Mb_wc_utf8mb4() {}
|
||||
|
||||
ALWAYS_INLINE
|
||||
int operator()(ob_wc_t *pwc, const unsigned char *s, const unsigned char *e) const {
|
||||
return ob_mb_wc_utf8_prototype</*RANGE_CHECK=*/true, /*SUPPORT_MB4=*/true>(
|
||||
pwc, s, e);
|
||||
}
|
||||
};
|
||||
|
||||
/**
|
||||
Functor that uses a function pointer to convert a multibyte sequence
|
||||
to a wide character.
|
||||
*/
|
||||
class Mb_wc_through_function_pointer {
|
||||
public:
|
||||
explicit Mb_wc_through_function_pointer(const ObCharsetInfo *cs)
|
||||
: m_funcptr(cs->cset->mb_wc), m_cs(cs) {}
|
||||
|
||||
int operator()(ob_wc_t *pwc, const unsigned char *s, const unsigned char *e) const {
|
||||
return m_funcptr(m_cs, pwc, s, e);
|
||||
}
|
||||
|
||||
private:
|
||||
typedef int (*mbwc_func_t)(const ObCharsetInfo *, ob_wc_t *, const unsigned char *,
|
||||
const unsigned char *);
|
||||
|
||||
const mbwc_func_t m_funcptr;
|
||||
const ObCharsetInfo *const m_cs;
|
||||
};
|
||||
|
||||
template <bool RANGE_CHECK, bool SUPPORT_MB4>
|
||||
static ALWAYS_INLINE int ob_mb_wc_utf8_prototype(ob_wc_t *pwc, const unsigned char *s,
|
||||
const unsigned char *e) {
|
||||
if (RANGE_CHECK && s >= e) return OB_CS_TOOSMALL;
|
||||
|
||||
unsigned char c = s[0];
|
||||
if (c < 0x80) {
|
||||
*pwc = c;
|
||||
return 1;
|
||||
}
|
||||
|
||||
if (c < 0xe0) {
|
||||
if (c < 0xc2) // Resulting code point would be less than 0x80.
|
||||
return OB_CS_ILSEQ;
|
||||
|
||||
if (RANGE_CHECK && s + 2 > e) return OB_CS_TOOSMALL2;
|
||||
|
||||
if ((s[1] & 0xc0) != 0x80) // Next byte must be a continuation byte.
|
||||
return OB_CS_ILSEQ;
|
||||
|
||||
*pwc = ((ob_wc_t)(c & 0x1f) << 6) + (ob_wc_t)(s[1] & 0x3f);
|
||||
return 2;
|
||||
}
|
||||
|
||||
if (c < 0xf0) {
|
||||
if (RANGE_CHECK && s + 3 > e) return OB_CS_TOOSMALL3;
|
||||
|
||||
// Next two bytes must be continuation bytes.
|
||||
uint16 two_bytes;
|
||||
memcpy(&two_bytes, s + 1, sizeof(two_bytes));
|
||||
if ((two_bytes & 0xc0c0) != 0x8080) // Endianness does not matter.
|
||||
return OB_CS_ILSEQ;
|
||||
|
||||
*pwc = ((ob_wc_t)(c & 0x0f) << 12) + ((ob_wc_t)(s[1] & 0x3f) << 6) +
|
||||
(ob_wc_t)(s[2] & 0x3f);
|
||||
if (*pwc < 0x800) return OB_CS_ILSEQ;
|
||||
/*
|
||||
According to RFC 3629, UTF-8 should prohibit characters between
|
||||
U+D800 and U+DFFF, which are reserved for surrogate pairs and do
|
||||
not directly represent characters.
|
||||
*/
|
||||
if (*pwc >= 0xd800 && *pwc <= 0xdfff) return OB_CS_ILSEQ;
|
||||
return 3;
|
||||
}
|
||||
|
||||
if (SUPPORT_MB4) {
|
||||
if (RANGE_CHECK && s + 4 > e) /* We need 4 characters */
|
||||
return OB_CS_TOOSMALL4;
|
||||
|
||||
/*
|
||||
This byte must be of the form 11110xxx, and the next three bytes
|
||||
must be continuation bytes.
|
||||
*/
|
||||
uint32 four_bytes;
|
||||
memcpy(&four_bytes, s, sizeof(four_bytes));
|
||||
#ifdef WORDS_BIGENDIAN
|
||||
if ((four_bytes & 0xf8c0c0c0) != 0xf0808080)
|
||||
#else
|
||||
if ((four_bytes & 0xc0c0c0f8) != 0x808080f0)
|
||||
#endif
|
||||
return OB_CS_ILSEQ;
|
||||
|
||||
*pwc = ((ob_wc_t)(c & 0x07) << 18) + ((ob_wc_t)(s[1] & 0x3f) << 12) +
|
||||
((ob_wc_t)(s[2] & 0x3f) << 6) + (ob_wc_t)(s[3] & 0x3f);
|
||||
if (*pwc < 0x10000 || *pwc > 0x10ffff) return OB_CS_ILSEQ;
|
||||
return 4;
|
||||
}
|
||||
|
||||
return OB_CS_ILSEQ;
|
||||
}
|
||||
|
||||
extern "C" int ob_mb_wc_utf8mb4_thunk(const ObCharsetInfo *cs, ob_wc_t *pwc,
|
||||
const unsigned char *s, const unsigned char *e);
|
||||
|
||||
#endif // MB_WC_INCLUDED
|
74
deps/oblib/src/lib/charset/ob_byteorder.h
vendored
Normal file
74
deps/oblib/src/lib/charset/ob_byteorder.h
vendored
Normal file
@ -0,0 +1,74 @@
|
||||
/**
|
||||
* Copyright (c) 2021 OceanBase
|
||||
* OceanBase CE is licensed under Mulan PubL v2.
|
||||
* You can use this software according to the terms and conditions of the Mulan PubL v2.
|
||||
* You may obtain a copy of Mulan PubL v2 at:
|
||||
* http://license.coscl.org.cn/MulanPubL-2.0
|
||||
* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND,
|
||||
* EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
|
||||
* MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
|
||||
* See the Mulan PubL v2 for more details.
|
||||
*/
|
||||
|
||||
#ifndef OB_BYTEORDER_H
|
||||
#define OB_BYTEORDER_H
|
||||
|
||||
#include <stdint.h>
|
||||
#include "lib/charset/ob_template_helper.h"
|
||||
#include <netinet/in.h>
|
||||
|
||||
/*
|
||||
Functions for big-endian loads and stores. These are safe to use
|
||||
no matter what the compiler, CPU or alignment, and also with -fstrict-aliasing.
|
||||
|
||||
The stores return a pointer just past the value that was written.
|
||||
*/
|
||||
|
||||
inline uint16_t load16be(const char *ptr) {
|
||||
uint16_t val;
|
||||
memcpy(&val, ptr, sizeof(val));
|
||||
return ntohs(val);
|
||||
}
|
||||
|
||||
inline uint32_t load32be(const char *ptr) {
|
||||
uint32_t val;
|
||||
memcpy(&val, ptr, sizeof(val));
|
||||
return ntohl(val);
|
||||
}
|
||||
|
||||
__attribute__((always_inline)) inline char *store16be(char *ptr, uint16_t val) {
|
||||
#if defined(_MSC_VER)
|
||||
// _byteswap_ushort is an intrinsic on MSVC, but htons is not.
|
||||
val = _byteswap_ushort(val);
|
||||
#else
|
||||
val = htons(val);
|
||||
#endif
|
||||
memcpy(ptr, &val, sizeof(val));
|
||||
return ptr + sizeof(val);
|
||||
}
|
||||
|
||||
inline char *store32be(char *ptr, uint32_t val) {
|
||||
val = htonl(val);
|
||||
memcpy(ptr, &val, sizeof(val));
|
||||
return ptr + sizeof(val);
|
||||
}
|
||||
|
||||
// Adapters for using unsigned char * instead of char *.
|
||||
|
||||
inline uint16_t load16be(const unsigned char *ptr) {
|
||||
return load16be(pointer_cast<const char *>(ptr));
|
||||
}
|
||||
|
||||
inline uint32_t load32be(const unsigned char *ptr) {
|
||||
return load32be(pointer_cast<const char *>(ptr));
|
||||
}
|
||||
|
||||
__attribute__((always_inline)) inline unsigned char *store16be(unsigned char *ptr, uint16_t val) {
|
||||
return pointer_cast<unsigned char *>(store16be(pointer_cast<char *>(ptr), val));
|
||||
}
|
||||
|
||||
inline unsigned char *store32be(unsigned char *ptr, uint32_t val) {
|
||||
return pointer_cast<unsigned char *>(store32be(pointer_cast<char *>(ptr), val));
|
||||
}
|
||||
|
||||
#endif // OB_BYTEORDER_H
|
168
deps/oblib/src/lib/charset/ob_charset.cpp
vendored
168
deps/oblib/src/lib/charset/ob_charset.cpp
vendored
@ -293,14 +293,9 @@ const ObCollationWrapper ObCharset::collation_wrap_arr_[ObCharset::VALID_COLLATI
|
||||
{CS_TYPE_GBK_BIN, CHARSET_GBK, CS_TYPE_GBK_BIN, false, true, 1},
|
||||
{CS_TYPE_UTF16_GENERAL_CI, CHARSET_UTF16, CS_TYPE_UTF16_GENERAL_CI, true, true, 1},
|
||||
{CS_TYPE_UTF16_BIN, CHARSET_UTF16, CS_TYPE_UTF16_BIN, false, true, 1},
|
||||
#ifndef OB_BUILD_FULL_CHARSET
|
||||
{CS_TYPE_INVALID, CHARSET_INVALID, CS_TYPE_INVALID, false, false, 1},
|
||||
{CS_TYPE_INVALID, CHARSET_INVALID, CS_TYPE_INVALID, false, false, 1},
|
||||
#else
|
||||
//{CS_TYPE_UTF8MB4_ZH_0900_AS_CS, CHARSET_UTF8MB4, CS_TYPE_UTF8MB4_ZH_0900_AS_CS, false, true, 0},
|
||||
{CS_TYPE_UTF8MB4_UNICODE_CI, CHARSET_UTF8MB4, CS_TYPE_UTF8MB4_UNICODE_CI, false, true, 1},
|
||||
{CS_TYPE_UTF16_UNICODE_CI, CHARSET_UTF16, CS_TYPE_UTF16_UNICODE_CI, false, true, 1},
|
||||
#endif
|
||||
{CS_TYPE_GB18030_CHINESE_CI, CHARSET_GB18030, CS_TYPE_GB18030_CHINESE_CI, true, true, 1},
|
||||
{CS_TYPE_GB18030_BIN, CHARSET_GB18030, CS_TYPE_GB18030_BIN, false, true, 1},
|
||||
{CS_TYPE_LATIN1_SWEDISH_CI, CHARSET_LATIN1, CS_TYPE_LATIN1_SWEDISH_CI,true, true, 1},
|
||||
@ -336,11 +331,7 @@ ObCharsetInfo *ObCharset::charset_arr[CS_TYPE_MAX] = {
|
||||
&ob_charset_gbk_bin, // 87
|
||||
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, // 88
|
||||
NULL, NULL, NULL, NULL, NULL, // 96
|
||||
#ifdef OB_BUILD_FULL_CHARSET
|
||||
&ob_charset_utf16_unicode_ci, // 101
|
||||
#else
|
||||
NULL,
|
||||
#endif
|
||||
NULL, NULL, // 102
|
||||
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, // 104
|
||||
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, // 112
|
||||
@ -360,11 +351,7 @@ ObCharsetInfo *ObCharset::charset_arr[CS_TYPE_MAX] = {
|
||||
&ob_charset_gb18030_2022_pinyin_cs, &ob_charset_gb18030_2022_radical_ci,// 218
|
||||
&ob_charset_gb18030_2022_radical_cs, &ob_charset_gb18030_2022_stroke_ci, // 220
|
||||
&ob_charset_gb18030_2022_stroke_cs, NULL, // 222
|
||||
#ifdef OB_BUILD_FULL_CHARSET
|
||||
&ob_charset_utf8mb4_unicode_ci, // 224
|
||||
#else
|
||||
NULL,
|
||||
#endif
|
||||
NULL, NULL, NULL, NULL, NULL, NULL, NULL, // 225
|
||||
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, // 232
|
||||
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, // 240
|
||||
@ -508,33 +495,6 @@ uint64_t ObCharset::strntoullrnd(const char *str,
|
||||
return result;
|
||||
}
|
||||
|
||||
#ifdef OB_BUILD_FULL_CHARSET
|
||||
/*
|
||||
Convert integer to its string representation in given scale of notation.
|
||||
|
||||
SYNOPSIS
|
||||
int2str()
|
||||
val - value to convert
|
||||
dst - points to buffer where string representation should be stored
|
||||
radix - radix of scale of notation
|
||||
upcase - set to 1 if we should use upper-case digits
|
||||
|
||||
DESCRIPTION
|
||||
Converts the (long) integer value to its character form and moves it to
|
||||
the destination buffer followed by a terminating NUL.
|
||||
If radix is -2..-36, val is taken to be SIGNED, if radix is 2..36, val is
|
||||
taken to be UNSIGNED. That is, val is signed if and only if radix is.
|
||||
All other radixes treated as bad and nothing will be changed in this case.
|
||||
|
||||
For conversion to decimal representation (radix is -10 or 10) one can use
|
||||
optimized int10_to_str() function.
|
||||
|
||||
RETURN VALUE
|
||||
Pointer to ending NUL character or NullS if radix is bad.
|
||||
*/
|
||||
#endif
|
||||
|
||||
//=============================================================
|
||||
char* ObCharset::lltostr(int64_t val, char *dst, int radix, int upcase)
|
||||
{
|
||||
int ret = OB_SUCCESS;
|
||||
@ -583,7 +543,7 @@ char* ObCharset::lltostr(int64_t val, char *dst, int radix, int upcase)
|
||||
p = &buffer[sizeof(buffer)-1];
|
||||
*p = '\0';
|
||||
new_val= uval / (uint64_t) radix;
|
||||
*--p = dig_vec[(uchar) (uval- (uint64_t) new_val*(uint64_t) radix)];
|
||||
*--p = dig_vec[(unsigned char) (uval- (uint64_t) new_val*(uint64_t) radix)];
|
||||
val = new_val;
|
||||
ldiv_t res;
|
||||
while (val != 0)
|
||||
@ -621,8 +581,8 @@ uint32_t ObCharset::instr(ObCollationType collation_type,
|
||||
if (is_argument_valid(collation_type, str1, str1_len, str2, str2_len)) {
|
||||
ObCharsetInfo *cs = static_cast<ObCharsetInfo *>(ObCharset::charset_arr[collation_type]);
|
||||
ob_match_t m_match_t[2];
|
||||
uint nmatch = 1;
|
||||
uint m_ret = cs->coll->instr(cs, str1, str1_len, str2, str2_len, m_match_t, nmatch);
|
||||
unsigned int nmatch = 1;
|
||||
unsigned int m_ret = cs->coll->instr(cs, str1, str1_len, str2, str2_len, m_match_t, nmatch);
|
||||
if (0 == m_ret ) {
|
||||
result = 0;
|
||||
} else {
|
||||
@ -642,8 +602,8 @@ int64_t ObCharset::instrb(ObCollationType collation_type,
|
||||
if (is_argument_valid(collation_type, str1, str1_len, str2, str2_len)) {
|
||||
ObCharsetInfo *cs = static_cast<ObCharsetInfo *>(ObCharset::charset_arr[collation_type]);
|
||||
ob_match_t m_match_t[2];
|
||||
uint nmatch = 1;
|
||||
uint m_ret = cs->coll->instr(cs, str1, str1_len, str2, str2_len, m_match_t, nmatch);
|
||||
unsigned int nmatch = 1;
|
||||
unsigned int m_ret = cs->coll->instr(cs, str1, str1_len, str2, str2_len, m_match_t, nmatch);
|
||||
if (0 != m_ret) {
|
||||
result = m_match_t[0].end - m_match_t[0].beg;
|
||||
}
|
||||
@ -700,9 +660,9 @@ int ObCharset::strcmp(ObCollationType collation_type,
|
||||
ObCharsetInfo *cs = static_cast<ObCharsetInfo *>(ObCharset::charset_arr[collation_type]);
|
||||
const bool t_is_prefix = false;
|
||||
result = cs->coll->strnncoll(cs,
|
||||
reinterpret_cast<const uchar *>(str1),
|
||||
reinterpret_cast<const unsigned char *>(str1),
|
||||
str1_len,
|
||||
reinterpret_cast<const uchar *>(str2),
|
||||
reinterpret_cast<const unsigned char *>(str2),
|
||||
str2_len, t_is_prefix);
|
||||
}
|
||||
return result;
|
||||
@ -719,9 +679,9 @@ int ObCharset::strcmpsp(ObCollationType collation_type,
|
||||
if (is_argument_valid(collation_type, str1, str1_len, str2, str2_len)) {
|
||||
ObCharsetInfo *cs = static_cast<ObCharsetInfo *>(ObCharset::charset_arr[collation_type]);
|
||||
result = cs->coll->strnncollsp(cs,
|
||||
reinterpret_cast<const uchar *>(str1),
|
||||
reinterpret_cast<const unsigned char *>(str1),
|
||||
str1_len,
|
||||
reinterpret_cast<const uchar *>(str2),
|
||||
reinterpret_cast<const unsigned char *>(str2),
|
||||
str2_len,
|
||||
cmp_endspace);
|
||||
}
|
||||
@ -860,10 +820,10 @@ size_t ObCharset::sortkey(ObCollationType collation_type,
|
||||
//
|
||||
// 对于有非法字符的unicode字符串,采用原生的不转换sortkey的方式进行比较。
|
||||
result = cs->coll->strnxfrm(cs,
|
||||
reinterpret_cast<uchar *>(key),
|
||||
reinterpret_cast<unsigned char *>(key),
|
||||
key_len,
|
||||
OB_MAX_WEIGHT,
|
||||
reinterpret_cast<const uchar *>(str),
|
||||
reinterpret_cast<const unsigned char *>(str),
|
||||
str_len,
|
||||
0,
|
||||
&is_valid_unicode_tmp);
|
||||
@ -890,10 +850,10 @@ size_t ObCharset::sortkey_var_len(ObCollationType collation_type,
|
||||
result = -1;
|
||||
} else {
|
||||
result = cs->coll->strnxfrm_varlen(cs,
|
||||
reinterpret_cast<uchar *>(key),
|
||||
reinterpret_cast<unsigned char *>(key),
|
||||
key_len,
|
||||
OB_MAX_WEIGHT,
|
||||
reinterpret_cast<const uchar *>(str),
|
||||
reinterpret_cast<const unsigned char *>(str),
|
||||
str_len,
|
||||
is_space_cmp,
|
||||
&is_valid_unicode_tmp);
|
||||
@ -921,7 +881,7 @@ uint64_t ObCharset::hash(ObCollationType collation_type,
|
||||
LOG_WARN("unexpected error. invalid argument(s)", K(cs), K(cs->coll), K(lbt()));
|
||||
} else {
|
||||
seed = 0xc6a4a7935bd1e995;
|
||||
cs->coll->hash_sort(cs, reinterpret_cast<const uchar *>(str), str_len,
|
||||
cs->coll->hash_sort(cs, reinterpret_cast<const unsigned char *>(str), str_len,
|
||||
&ret, &seed, calc_end_space, hash_algo);
|
||||
}
|
||||
}
|
||||
@ -1102,10 +1062,6 @@ int ObCharset::well_formed_len(ObCollationType collation_type, const char *str,
|
||||
return ret;
|
||||
}
|
||||
|
||||
#ifdef OB_BUILD_FULL_CHARSET
|
||||
// Be careful with this function. The return value may be out of range.
|
||||
// Refer to
|
||||
#endif
|
||||
size_t ObCharset::charpos(const ObCollationType collation_type,
|
||||
const char *str,
|
||||
const int64_t str_len,
|
||||
@ -1204,8 +1160,8 @@ int ObCharset::mb_wc(ObCollationType collation_type,
|
||||
ret = OB_ERR_UNEXPECTED;
|
||||
LOG_WARN("unexpected error. invalid argument(s)", K(cs), K(cs->cset));
|
||||
} else {
|
||||
int tmp = cs->cset->mb_wc(cs, &my_wc, reinterpret_cast<const uchar*>(mb.ptr()),
|
||||
reinterpret_cast<const uchar*>(mb.ptr()+mb.length()));
|
||||
int tmp = cs->cset->mb_wc(cs, &my_wc, reinterpret_cast<const unsigned char*>(mb.ptr()),
|
||||
reinterpret_cast<const unsigned char*>(mb.ptr()+mb.length()));
|
||||
if (tmp <= 0) {
|
||||
ret = OB_ERR_INCORRECT_STRING_VALUE;
|
||||
} else {
|
||||
@ -1237,8 +1193,8 @@ int ObCharset::mb_wc(ObCollationType collation_type,
|
||||
ret = OB_ERR_UNEXPECTED;
|
||||
LOG_WARN("unexpected error. invalid argument(s)", K(cs), K(cs->cset));
|
||||
} else {
|
||||
int tmp = cs->cset->mb_wc(cs, &my_wc, reinterpret_cast<const uchar*>(mb),
|
||||
reinterpret_cast<const uchar*>(mb + mb_size));
|
||||
int tmp = cs->cset->mb_wc(cs, &my_wc, reinterpret_cast<const unsigned char*>(mb),
|
||||
reinterpret_cast<const unsigned char*>(mb + mb_size));
|
||||
if (tmp <= 0) {
|
||||
ret = OB_ERR_INCORRECT_STRING_VALUE;
|
||||
} else {
|
||||
@ -1268,7 +1224,7 @@ int ObCharset::display_len(ObCollationType collation_type,
|
||||
ret = OB_ERR_UNEXPECTED;
|
||||
LOG_WARN("unexpected error. invalid argument(s)", K(cs), K(cs->cset));
|
||||
} else {
|
||||
const uchar *buf = reinterpret_cast<const uchar*>(mb.ptr());
|
||||
const unsigned char *buf = reinterpret_cast<const unsigned char*>(mb.ptr());
|
||||
int64_t buf_size = mb.length();
|
||||
int64_t char_pos = 0;
|
||||
bool found = false;
|
||||
@ -1320,7 +1276,7 @@ int ObCharset::max_display_width_charpos(ObCollationType collation_type, const c
|
||||
LOG_WARN("unexpected error. invalid argument(s)", K(cs), K(cs->cset));
|
||||
} else {
|
||||
char_pos = 0;
|
||||
const uchar *buf = reinterpret_cast<const uchar*>(mb);
|
||||
const unsigned char *buf = reinterpret_cast<const unsigned char*>(mb);
|
||||
bool found = false;
|
||||
int64_t total_width = 0;
|
||||
|
||||
@ -1372,8 +1328,8 @@ int ObCharset::wc_mb(ObCollationType collation_type, int32_t wc, char *buff, int
|
||||
ret = OB_ERR_UNEXPECTED;
|
||||
LOG_WARN("unexpected error. invalid argument(s)", K(cs), K(ret));
|
||||
} else {
|
||||
int tmp = cs->cset->wc_mb(cs, wc, reinterpret_cast<uchar*>(buff),
|
||||
reinterpret_cast<uchar*>(buff + buff_len));
|
||||
int tmp = cs->cset->wc_mb(cs, wc, reinterpret_cast<unsigned char*>(buff),
|
||||
reinterpret_cast<unsigned char*>(buff + buff_len));
|
||||
if (tmp <= 0) {
|
||||
ret = OB_ERR_INCORRECT_STRING_VALUE;
|
||||
} else {
|
||||
@ -1589,14 +1545,12 @@ ObCollationType ObCharset::collation_type(const ObString &cs_name)
|
||||
collation_type = CS_TYPE_UTF16_GENERAL_CI;
|
||||
} else if (0 == cs_name.case_compare(ob_charset_utf16_bin.name)) {
|
||||
collation_type = CS_TYPE_UTF16_BIN;
|
||||
#ifdef OB_BUILD_FULL_CHARSET
|
||||
} else if (0 == cs_name.case_compare("utf8_unicode_ci")) {
|
||||
collation_type = CS_TYPE_UTF8MB4_UNICODE_CI;
|
||||
} else if (0 == cs_name.case_compare(ob_charset_utf16_unicode_ci.name)) {
|
||||
collation_type = CS_TYPE_UTF16_UNICODE_CI;
|
||||
} else if (0 == cs_name.case_compare(ob_charset_utf8mb4_unicode_ci.name)) {
|
||||
collation_type = CS_TYPE_UTF8MB4_UNICODE_CI;
|
||||
#endif
|
||||
} else if (0 == cs_name.case_compare(ob_charset_gb18030_bin.name)) {
|
||||
collation_type = CS_TYPE_GB18030_BIN;
|
||||
} else if (0 == cs_name.case_compare(ob_charset_gb18030_chinese_ci.name)) {
|
||||
@ -1639,9 +1593,7 @@ bool ObCharset::is_valid_collation(ObCharsetType charset_type, ObCollationType c
|
||||
if (CHARSET_UTF8MB4 == charset_type) {
|
||||
if (CS_TYPE_UTF8MB4_BIN == collation_type
|
||||
|| CS_TYPE_UTF8MB4_GENERAL_CI == collation_type
|
||||
#ifdef OB_BUILD_FULL_CHARSET
|
||||
|| CS_TYPE_UTF8MB4_UNICODE_CI == collation_type
|
||||
#endif
|
||||
) {
|
||||
ret = true;
|
||||
}
|
||||
@ -1655,9 +1607,7 @@ bool ObCharset::is_valid_collation(ObCharsetType charset_type, ObCollationType c
|
||||
} else if (CHARSET_UTF16 == charset_type) {
|
||||
if (CS_TYPE_UTF16_GENERAL_CI == collation_type
|
||||
|| CS_TYPE_UTF16_BIN == collation_type
|
||||
#ifdef OB_BUILD_FULL_CHARSET
|
||||
|| CS_TYPE_UTF16_UNICODE_CI == collation_type
|
||||
#endif
|
||||
) {
|
||||
ret = true;
|
||||
}
|
||||
@ -1750,11 +1700,9 @@ bool ObCharset::is_valid_collation(int64_t collation_type_int)
|
||||
|| CS_TYPE_LATIN1_SWEDISH_CI == collation_type
|
||||
|| CS_TYPE_LATIN1_BIN == collation_type
|
||||
|| is_gb18030_2022(collation_type)
|
||||
#ifdef OB_BUILD_FULL_CHARSET
|
||||
|| CS_TYPE_UTF8MB4_UNICODE_CI == collation_type
|
||||
|| CS_TYPE_UTF16_UNICODE_CI == collation_type
|
||||
|| (CS_TYPE_EXTENDED_MARK < collation_type && collation_type < CS_TYPE_MAX)
|
||||
#endif
|
||||
;
|
||||
}
|
||||
|
||||
@ -1975,57 +1923,6 @@ int ObCharset::result_collation(
|
||||
return ret;
|
||||
}
|
||||
|
||||
#ifdef OB_BUILD_FULL_CHARSET
|
||||
/** note from mysql:
|
||||
Aggregate two collations together taking
|
||||
into account their coercibility (aka derivation):.
|
||||
|
||||
0 == DERIVATION_EXPLICIT - an explicitly written COLLATE clause @n
|
||||
1 == DERIVATION_NONE - a mix of two different collations @n
|
||||
2 == DERIVATION_IMPLICIT - a column @n
|
||||
3 == DERIVATION_COERCIBLE - a string constant.
|
||||
|
||||
The most important rules are:
|
||||
-# If collations are the same:
|
||||
chose this collation, and the strongest derivation.
|
||||
-# If collations are different:
|
||||
- Character sets may differ, but only if conversion without
|
||||
data loss is possible. The caller provides flags whether
|
||||
character set conversion attempts should be done. If no
|
||||
flags are substituted, then the character sets must be the same.
|
||||
Currently processed flags are:
|
||||
MY_COLL_ALLOW_SUPERSET_CONV - allow conversion to a superset
|
||||
MY_COLL_ALLOW_COERCIBLE_CONV - allow conversion of a coercible value
|
||||
- two EXPLICIT collations produce an error, e.g. this is wrong:
|
||||
CONCAT(expr1 collate latin1_swedish_ci, expr2 collate latin1_german_ci)
|
||||
- the side with smaller derivation value wins,
|
||||
i.e. a column is stronger than a string constant,
|
||||
an explicit COLLATE clause is stronger than a column.
|
||||
- if derivations are the same, we have DERIVATION_NONE,
|
||||
we'll wait for an explicit COLLATE clause which possibly can
|
||||
come from another argument later: for example, this is valid,
|
||||
but we don't know yet when collecting the first two arguments:
|
||||
@code
|
||||
CONCAT(latin1_swedish_ci_column,
|
||||
latin1_german1_ci_column,
|
||||
expr COLLATE latin1_german2_ci)
|
||||
@endcode
|
||||
*/
|
||||
|
||||
/** this function is to determine use which charset when compare
|
||||
* We consider only three charsets(binary, gbk and utf8mb4), so the rule is simpler. Especially,
|
||||
* res_level can not be CS_LEVEL_NONE.
|
||||
*
|
||||
* MySQL uses coercibility values with the following rules to resolve ambiguities:
|
||||
* 1. Use the collation with the lowest coercibility value.
|
||||
* 2. If both sides have the same coercibility, then:
|
||||
* 2.a If both sides are Unicode, or both sides are not Unicode, it is an error.
|
||||
* 2.b If one of the sides has a Unicode character set, and another side has a non-Unicode character set, the side with Unicode character set wins,
|
||||
* and automatic character set conversion is applied to the non-Unicode side.
|
||||
* 2.c For an operation with operands from the same character set but that mix a _bin collation and a _ci or _cs collation, the _bin collation is used.
|
||||
* This is similar to how operations that mix nonbinary and binary strings evaluate the operands as binary strings, except that it is for collations rather than data types.
|
||||
*/
|
||||
#endif
|
||||
int ObCharset::aggregate_collation(
|
||||
const ObCollationLevel collation_level1,
|
||||
const ObCollationType collation_type1,
|
||||
@ -2947,7 +2844,7 @@ int ObCharset::charset_convert(const ObCollationType from_type,
|
||||
ret = OB_ERR_UNEXPECTED;
|
||||
LOG_WARN("unexpected collation type", K(ret), K(from_type), K(to_type));
|
||||
} else {
|
||||
uint errors = 0;
|
||||
unsigned int errors = 0;
|
||||
result_len = ob_convert(to_str, static_cast<uint32_t>(to_len), to_cs, from_str, from_len, from_cs,
|
||||
trim_incomplete_tail, replaced_char, &errors);
|
||||
if (OB_UNLIKELY(errors != 0 && report_error)) {
|
||||
@ -3209,20 +3106,8 @@ int ObCharset::get_nls_charset_id_by_charset_type(ObCharsetType charset_type)
|
||||
return static_cast<int>(ret_id);
|
||||
}
|
||||
|
||||
#ifndef OB_BUILD_FULL_CHARSET
|
||||
|
||||
int ObCharset::init_charset()
|
||||
{
|
||||
int ret = OB_SUCCESS;
|
||||
if (OB_FAIL(init_gb18030_2022())) {
|
||||
LOG_WARN("failed to init gb18030 2022", K(ret));
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
static void ob_charset_error_reporter(enum loglevel level, uint ecode, ...) {
|
||||
static void ob_charset_error_reporter(enum loglevel level, unsigned int ecode, ...) {
|
||||
//UNUSED(level);
|
||||
UNUSED(ecode);
|
||||
switch (level) {
|
||||
@ -3411,7 +3296,6 @@ int ObCharset::init_charset()
|
||||
return ret;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
ObString ObCharsetUtils::const_str_for_ascii_[CHARSET_MAX][INT8_MAX + 1];
|
||||
|
||||
@ -3420,9 +3304,9 @@ int ObCharsetUtils::remove_char_endspace(ObString &str,
|
||||
int ret = OB_SUCCESS;
|
||||
const char *end = str.ptr() + str.length();
|
||||
if ((CHARSET_UTF16 == charset_type)) {
|
||||
end= (const char *) skip_trailing_space((const uchar *)str.ptr(), str.length(), 1);
|
||||
end= (const char *) skip_trailing_space((const unsigned char *)str.ptr(), str.length(), 1);
|
||||
} else {
|
||||
end= (const char *) skip_trailing_space((const uchar *)str.ptr(), str.length(), 0);
|
||||
end= (const char *) skip_trailing_space((const unsigned char *)str.ptr(), str.length(), 0);
|
||||
}
|
||||
if (end >= str.ptr()) {
|
||||
str.assign_ptr(str.ptr(), end - str.ptr());
|
||||
|
@ -8,14 +8,7 @@
|
||||
* EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
|
||||
* MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
|
||||
* See the Mulan PubL v2 for more details.
|
||||
*/
|
||||
|
||||
/*
|
||||
* (C) 2017-2020 Alibaba Group Holding Limited.
|
||||
*
|
||||
* Authors:
|
||||
*/
|
||||
#ifndef OB_BUILD_FULL_CHARSET
|
||||
|
||||
#include "lib/charset/ob_ctype.h"
|
||||
|
||||
@ -25,7 +18,7 @@ ob_convert_internal(char *to, uint32 to_length,
|
||||
const char *from, uint32 from_length,
|
||||
const ObCharsetInfo *from_cs,
|
||||
bool trim_incomplete_tail,
|
||||
const ob_wc_t replaced_char, uint *errors)
|
||||
const ob_wc_t replaced_char, unsigned int *errors)
|
||||
{
|
||||
unsigned int error_num= 0;
|
||||
int cnvres;
|
||||
@ -49,7 +42,7 @@ ob_convert_internal(char *to, uint32 to_length,
|
||||
error_num++;
|
||||
} else {
|
||||
// Not enough characters
|
||||
if (!trim_incomplete_tail && (const uchar*) from < from_end) {
|
||||
if (!trim_incomplete_tail && (const unsigned char*) from < from_end) {
|
||||
error_num++;
|
||||
from++;
|
||||
wc= replaced_char;
|
||||
@ -62,7 +55,7 @@ ob_convert_internal(char *to, uint32 to_length,
|
||||
while (go) {
|
||||
go = FALSE;
|
||||
if ((cnvres= (*wc_mb)(to_cs, wc, (unsigned char*) to, to_end)) > 0)
|
||||
to+= cnvres;
|
||||
to+= cnvres;
|
||||
else if (cnvres == OB_CS_ILUNI && wc != replaced_char) {
|
||||
error_num++;
|
||||
wc= replaced_char;
|
||||
@ -122,4 +115,26 @@ ob_convert(char *to, uint32 to_length, const ObCharsetInfo *to_cs,
|
||||
return 0;
|
||||
}
|
||||
|
||||
#endif
|
||||
char *strmake(char *dst, const char *src, size_t length) {
|
||||
#ifdef EXTRA_DEBUG
|
||||
/*
|
||||
* 'length' is the maximum length of the string; the buffer needs
|
||||
* to be one character larger to accomodate the terminating '\0'
|
||||
* This is easy to get wrong, so we make sure we write to the
|
||||
* entire length of the buffer to identify incorrect buffer-sizes.
|
||||
* We only initialise the "unused" part of the buffer here, a) for
|
||||
* efficiency, and b) because dst==src is allowed, so initialising
|
||||
* the entire buffer would overwrite the source-string. Also, we
|
||||
* write a character rather than '\0' as this makes spotting these
|
||||
* problems in the results easier.
|
||||
* */
|
||||
unsigned int n = 0;
|
||||
while (n < length && src[n++])
|
||||
;
|
||||
memset(dst + n, (int)'Z', length - n + 1);
|
||||
#endif
|
||||
while (length--)
|
||||
if (!(*dst++ = *src++)) return dst - 1;
|
||||
*dst = 0;
|
||||
return dst;
|
||||
}
|
283
deps/oblib/src/lib/charset/ob_ctype.h
vendored
283
deps/oblib/src/lib/charset/ob_ctype.h
vendored
@ -116,9 +116,9 @@
|
||||
#define _MY_B 0100
|
||||
#define _MY_X 0200
|
||||
|
||||
#define ob_toupper(s, c) (uchar)((s)->to_upper[(uchar)(c)])
|
||||
#define ob_tolower(s, c) (uchar)((s)->to_lower[(uchar)(c)])
|
||||
#define ob_sort_order(s,c) (uchar)((s)->sort_order[(uchar)(c)])
|
||||
#define ob_toupper(s, c) (unsigned char)((s)->to_upper[(unsigned char)(c)])
|
||||
#define ob_tolower(s, c) (unsigned char)((s)->to_lower[(unsigned char)(c)])
|
||||
#define ob_sort_order(s,c) (unsigned char)((s)->sort_order[(unsigned char)(c)])
|
||||
|
||||
#define is_valid_ascii(e) (0x00<=(uchar)(e) && (uchar)(e)<=0x7F)
|
||||
|
||||
@ -127,13 +127,13 @@ struct ObUCAInfo;
|
||||
struct ObContractions;
|
||||
|
||||
typedef struct ObCharsetLoader {
|
||||
uint errcode;
|
||||
unsigned int errcode;
|
||||
char errarg[192];
|
||||
void *(*once_alloc)(size_t);
|
||||
void *(*mem_malloc)(size_t);
|
||||
void *(*mem_realloc)(void *, size_t);
|
||||
void (*mem_free)(void *);
|
||||
void (*reporter)(enum loglevel, uint errcode, ...);
|
||||
void (*reporter)(enum loglevel, unsigned int errcode, ...);
|
||||
int (*add_collation)(ObCharsetInfo *cs);
|
||||
} ObCharsetLoader;
|
||||
|
||||
@ -157,25 +157,25 @@ typedef char ob_bool; /* Small bool */
|
||||
|
||||
/* Some typedef to make it easy for C++ to make function pointers */
|
||||
typedef int (*ob_charset_conv_mb_wc)(const struct ObCharsetInfo *,
|
||||
ob_wc_t *, const uchar *, const uchar *);
|
||||
ob_wc_t *, const unsigned char *, const unsigned char *);
|
||||
typedef int (*ob_charset_conv_wc_mb)(const struct ObCharsetInfo *, ob_wc_t,
|
||||
uchar *, uchar *);
|
||||
unsigned char *, unsigned char *);
|
||||
typedef size_t (*ob_charset_conv_case)(const struct ObCharsetInfo *,
|
||||
char *, size_t, char *, size_t);
|
||||
|
||||
int init_gb18030_2022();
|
||||
|
||||
extern ObUCAInfo ob_uca_v400;
|
||||
extern uchar ob_uca520_length[4352];
|
||||
extern unsigned char ob_uca520_length[4352];
|
||||
extern uint16 *ob_uca520_weight[4352];
|
||||
extern uchar ob_uca_length[256];
|
||||
extern unsigned char ob_uca_length[256];
|
||||
extern uint16 *ob_uca_weight[256];
|
||||
|
||||
typedef struct
|
||||
{
|
||||
uint beg;
|
||||
uint end;
|
||||
uint mb_len;
|
||||
unsigned int beg;
|
||||
unsigned int end;
|
||||
unsigned int mb_len;
|
||||
} ob_match_t;
|
||||
|
||||
typedef struct ObUnicaseInfoChar
|
||||
@ -192,86 +192,13 @@ typedef struct ObUnicaseInfo
|
||||
const ObUnicaseInfoChar **page;
|
||||
} ObUnicaseInfo;
|
||||
|
||||
#ifdef OB_BUILD_FULL_CHARSET
|
||||
// OB_CHARSET_HANDLER
|
||||
// ==================
|
||||
|
||||
// OB_CHARSET_HANDLER is a collection of character-set
|
||||
// related routines. Defined in m_ctype.h. Have the
|
||||
// following set of functions:
|
||||
|
||||
// Multi-byte routines
|
||||
// ------------------
|
||||
// ismbchar() - detects whether the given string is a multi-byte sequence
|
||||
// mbcharlen() - returns length of multi-byte sequence starting with
|
||||
// the given character
|
||||
// numchars() - returns number of characters in the given string, e.g.
|
||||
// in SQL function CHAR_LENGTH().
|
||||
// charpos() - calculates the offset of the given position in the string.
|
||||
// Used in SQL functions LEFT(), RIGHT(), SUBSTRING(),
|
||||
// INSERT()
|
||||
|
||||
// well_formed_len()
|
||||
// - returns length of a given multi-byte string in bytes
|
||||
// Used in INSERTs to shorten the given string so it
|
||||
// a) is "well formed" according to the given character set
|
||||
// b) can fit into the given data type
|
||||
|
||||
// lengthsp() - returns the length of the given string without trailing spaces.
|
||||
|
||||
|
||||
// Unicode conversion routines
|
||||
// ---------------------------
|
||||
// mb_wc - converts the left multi-byte sequence into its Unicode code.
|
||||
// mc_mb - converts the given Unicode code into multi-byte sequence.
|
||||
|
||||
|
||||
// Case and sort conversion
|
||||
// ------------------------
|
||||
// caseup_str - converts the given 0-terminated string to uppercase
|
||||
// casedn_str - converts the given 0-terminated string to lowercase
|
||||
// caseup - converts the given string to lowercase using length
|
||||
// casedn - converts the given string to lowercase using length
|
||||
|
||||
// Number-to-string conversion routines
|
||||
// ------------------------------------
|
||||
// snprintf()
|
||||
// long10_to_str()
|
||||
// longlong10_to_str()
|
||||
|
||||
// The names are pretty self-describing.
|
||||
|
||||
// String padding routines
|
||||
// -----------------------
|
||||
// fill() - writes the given Unicode value into the given string
|
||||
// with the given length. Used to pad the string, usually
|
||||
// with space character, according to the given charset.
|
||||
|
||||
// String-to-number conversion routines
|
||||
// ------------------------------------
|
||||
// strntol()
|
||||
// strntoul()
|
||||
// strntoll()
|
||||
// strntoull()
|
||||
// strntod()
|
||||
|
||||
// These functions are almost the same as their STDLIB counterparts,
|
||||
// but also:
|
||||
// - accept length instead of 0-terminator
|
||||
// - are character set dependent
|
||||
|
||||
// Simple scanner routines
|
||||
// -----------------------
|
||||
// scan() - to skip leading spaces in the given string.
|
||||
// Used when a string value is inserted into a numeric field.
|
||||
#endif
|
||||
typedef struct ObCharsetHandler
|
||||
{
|
||||
//my_bool (*init)(struct ObCharsetInfo *, MY_CHARSET_LOADER *loader);
|
||||
/* Multibyte routines */
|
||||
uint (*ismbchar)(const struct ObCharsetInfo *, const char *,
|
||||
unsigned int (*ismbchar)(const struct ObCharsetInfo *, const char *,
|
||||
const char *);
|
||||
uint (*mbcharlen)(const struct ObCharsetInfo *, uint c);
|
||||
unsigned int (*mbcharlen)(const struct ObCharsetInfo *, unsigned int c);
|
||||
size_t (*numchars)(const struct ObCharsetInfo *, const char *b,
|
||||
const char *e);
|
||||
size_t (*charpos)(const struct ObCharsetInfo *, const char *b,
|
||||
@ -292,7 +219,7 @@ typedef struct ObCharsetHandler
|
||||
|
||||
/* CTYPE scanner */
|
||||
int (*ctype)(const struct ObCharsetInfo *cs, int *ctype,
|
||||
const uchar *s, const uchar *e);
|
||||
const unsigned char *s, const unsigned char *e);
|
||||
|
||||
/* Functions for case and sort conversion */
|
||||
/*size_t (*caseup_str)(const struct ObCharsetInfo *, char *);
|
||||
@ -332,22 +259,7 @@ typedef struct ObCharsetHandler
|
||||
size_t (*scan)(const struct ObCharsetInfo *, const char *b,
|
||||
const char *e, int sq);
|
||||
} ObCharsetHandler;
|
||||
#ifdef OB_BUILD_FULL_CHARSET
|
||||
// OB_COLLATION_HANDLER
|
||||
// ====================
|
||||
// strnncoll() - compares two strings according to the given collation
|
||||
// strnncollsp() - like the above but ignores trailing spaces for PAD SPACE
|
||||
// collations. For NO PAD collations, identical to strnncoll.
|
||||
// strnxfrm() - makes a sort key suitable for memcmp() corresponding
|
||||
// to the given string
|
||||
// like_range() - creates a LIKE range, for optimizer
|
||||
// wildcmp() - wildcard comparison, for LIKE
|
||||
// strcasecmp() - 0-terminated string comparison
|
||||
// instr() - finds the first substring appearance in the string
|
||||
// hash_sort() - calculates hash value taking into account
|
||||
// the collation rules, e.g. case-insensitivity,
|
||||
// accent sensitivity, etc.
|
||||
#endif
|
||||
|
||||
static const int HASH_BUFFER_LENGTH = 128;
|
||||
|
||||
typedef uint64_t (*hash_algo)(const void* input, uint64_t length, uint64_t seed);
|
||||
@ -359,21 +271,21 @@ typedef struct ObCollationHandler
|
||||
/* Collation routines */
|
||||
// 进行字符串比较的函数
|
||||
int (*strnncoll)(const struct ObCharsetInfo *,
|
||||
const uchar *, size_t, const uchar *, size_t, bool);
|
||||
const unsigned char *, size_t, const unsigned char *, size_t, bool);
|
||||
// 字符串比较时忽略尾部空格
|
||||
int (*strnncollsp)(const struct ObCharsetInfo *,
|
||||
const uchar *, size_t, const uchar *, size_t,
|
||||
const unsigned char *, size_t, const unsigned char *, size_t,
|
||||
bool diff_if_only_endspace_difference);
|
||||
// makes a sort key suitable for memcmp() corresponding to the given string
|
||||
size_t (*strnxfrm)(const struct ObCharsetInfo *,
|
||||
uchar *dst, size_t dstlen, uint nweights,
|
||||
const uchar *src, size_t srclen, uint flags, bool *is_valid_unicode);
|
||||
unsigned char *dst, size_t dstlen, unsigned int nweights,
|
||||
const unsigned char *src, size_t srclen, unsigned int flags, bool *is_valid_unicode);
|
||||
// 获取weight_string结果的长度
|
||||
size_t (*strnxfrmlen)(const struct ObCharsetInfo *, size_t);
|
||||
// makes a sortkey suitable for memcmp() corresponding to the given variable length string
|
||||
size_t (*strnxfrm_varlen)(const struct ObCharsetInfo*,
|
||||
uchar* dst, size_t dst_len, uint nweights,
|
||||
const uchar *src, size_t srclen,
|
||||
unsigned char* dst, size_t dst_len, unsigned int nweights,
|
||||
const unsigned char *src, size_t srclen,
|
||||
bool is_memcmp, bool *is_valid_unicode);
|
||||
//size_t (*strnxfrmlen)(const struct ObCharsetInfo *, size_t);
|
||||
|
||||
@ -394,63 +306,54 @@ typedef struct ObCollationHandler
|
||||
const char *);
|
||||
|
||||
// finds the first substring appearance in the string
|
||||
uint (*instr)(const struct ObCharsetInfo *,
|
||||
unsigned int (*instr)(const struct ObCharsetInfo *,
|
||||
const char *b, size_t b_length,
|
||||
const char *s, size_t s_length,
|
||||
ob_match_t *match, uint nmatch);
|
||||
ob_match_t *match, unsigned int nmatch);
|
||||
|
||||
/* Hash calculation */
|
||||
// calculates hash value taking into account the collation rules, e.g. case-insensitivity
|
||||
void (*hash_sort)(const struct ObCharsetInfo *cs, const uchar *key, size_t len, ulong *nr1,
|
||||
void (*hash_sort)(const struct ObCharsetInfo *cs, const unsigned char *key, size_t len, ulong *nr1,
|
||||
ulong *nr2, const bool calc_end_space, hash_algo hash_algo);
|
||||
bool (*propagate)(const struct ObCharsetInfo *cs, const uchar *str,
|
||||
bool (*propagate)(const struct ObCharsetInfo *cs, const unsigned char *str,
|
||||
size_t len);
|
||||
} ObCollationHandler;
|
||||
|
||||
struct ObCharsetInfo
|
||||
{
|
||||
uint number;
|
||||
uint primary_number;
|
||||
uint binary_number;
|
||||
uint state;
|
||||
unsigned int number;
|
||||
unsigned int primary_number;
|
||||
unsigned int binary_number;
|
||||
unsigned int state;
|
||||
const char *csname;
|
||||
const char *name;
|
||||
const char *comment;
|
||||
const char *tailoring;
|
||||
struct Coll_param *coll_param;
|
||||
uchar *ctype;
|
||||
uchar *to_lower;
|
||||
uchar *to_upper;
|
||||
uchar *sort_order;
|
||||
unsigned char *ctype;
|
||||
unsigned char *to_lower;
|
||||
unsigned char *to_upper;
|
||||
unsigned char *sort_order;
|
||||
ObUCAInfo *uca;
|
||||
//uint16 *tab_to_uni;
|
||||
//MY_UNI_IDX *tab_from_uni;
|
||||
ObUnicaseInfo *caseinfo;
|
||||
uchar *state_map;
|
||||
uchar *ident_map;
|
||||
uint strxfrm_multiply;
|
||||
uchar caseup_multiply;
|
||||
uchar casedn_multiply;
|
||||
uint mbminlen;
|
||||
uint mbmaxlen;
|
||||
unsigned char *state_map;
|
||||
unsigned char *ident_map;
|
||||
unsigned int strxfrm_multiply;
|
||||
unsigned char caseup_multiply;
|
||||
unsigned char casedn_multiply;
|
||||
unsigned int mbminlen;
|
||||
unsigned int mbmaxlen;
|
||||
ob_wc_t min_sort_char;
|
||||
ob_wc_t max_sort_char; /* For LIKE optimization */
|
||||
uchar pad_char;
|
||||
unsigned char pad_char;
|
||||
bool escape_with_backslash_is_dangerous;
|
||||
uchar levels_for_compare;
|
||||
uchar levels_for_order;
|
||||
unsigned char levels_for_compare;
|
||||
unsigned char levels_for_order;
|
||||
|
||||
ObCharsetHandler *cset;
|
||||
ObCollationHandler *coll;
|
||||
#ifdef OB_BUILD_FULL_CHARSET
|
||||
/**
|
||||
If this collation is PAD_SPACE, it collates as if all inputs were
|
||||
padded with a given number of spaces at the end (see the "num_codepoints"
|
||||
flag to strnxfrm). NO_PAD simply compares unextended strings.
|
||||
|
||||
Note that this is fundamentally about the behavior of coll->strnxfrm.
|
||||
*/
|
||||
#endif
|
||||
enum ObCharsetPadAttr pad_attribute;
|
||||
};
|
||||
|
||||
@ -459,17 +362,17 @@ struct ObCharsetInfo
|
||||
#define ob_toascii(c) ((c) & 0177)
|
||||
#define ob_tocntrl(c) ((c) & 31)
|
||||
#define ob_toprint(c) ((c) | 64)
|
||||
#define ob_isalpha(s, c) ((s)->ctype != NULL ? ((s)->ctype+1)[(uchar) (c)] & (_MY_U | _MY_L) : 0)
|
||||
#define ob_isupper(s, c) ((s)->ctype != NULL ? ((s)->ctype+1)[(uchar) (c)] & _MY_U : 0)
|
||||
#define ob_islower(s, c) ((s)->ctype != NULL ? ((s)->ctype+1)[(uchar) (c)] & _MY_L : 0)
|
||||
#define ob_isdigit(s, c) ((s)->ctype != NULL ? ((s)->ctype+1)[(uchar) (c)] & _MY_NMR : 0)
|
||||
#define ob_isxdigit(s, c) ((s)->ctype != NULL ? ((s)->ctype+1)[(uchar) (c)] & _MY_X : 0)
|
||||
#define ob_isalnum(s, c) ((s)->ctype != NULL ? ((s)->ctype+1)[(uchar) (c)] & (_MY_U | _MY_L | _MY_NMR) : 0)
|
||||
#define ob_isspace(s, c) ((s)->ctype != NULL ? ((s)->ctype+1)[(uchar) (c)] & _MY_SPC : 0)
|
||||
#define ob_ispunct(s, c) ((s)->ctype != NULL ? ((s)->ctype+1)[(uchar) (c)] & _MY_PNT : 0)
|
||||
#define ob_isprint(s, c) ((s)->ctype != NULL ? ((s)->ctype+1)[(uchar) (c)] & (_MY_PNT | _MY_U | _MY_L | _MY_NMR | _MY_B) : 0)
|
||||
#define ob_isgraph(s, c) ((s)->ctype != NULL ? ((s)->ctype+1)[(uchar) (c)] & (_MY_PNT | _MY_U | _MY_L | _MY_NMR) : 0)
|
||||
#define ob_iscntrl(s, c) ((s)->ctype != NULL ? ((s)->ctype+1)[(uchar) (c)] & _MY_CTR : 0)
|
||||
#define ob_isalpha(s, c) ((s)->ctype != NULL ? ((s)->ctype+1)[(unsigned char) (c)] & (_MY_U | _MY_L) : 0)
|
||||
#define ob_isupper(s, c) ((s)->ctype != NULL ? ((s)->ctype+1)[(unsigned char) (c)] & _MY_U : 0)
|
||||
#define ob_islower(s, c) ((s)->ctype != NULL ? ((s)->ctype+1)[(unsigned char) (c)] & _MY_L : 0)
|
||||
#define ob_isdigit(s, c) ((s)->ctype != NULL ? ((s)->ctype+1)[(unsigned char) (c)] & _MY_NMR : 0)
|
||||
#define ob_isxdigit(s, c) ((s)->ctype != NULL ? ((s)->ctype+1)[(unsigned char) (c)] & _MY_X : 0)
|
||||
#define ob_isalnum(s, c) ((s)->ctype != NULL ? ((s)->ctype+1)[(unsigned char) (c)] & (_MY_U | _MY_L | _MY_NMR) : 0)
|
||||
#define ob_isspace(s, c) ((s)->ctype != NULL ? ((s)->ctype+1)[(unsigned char) (c)] & _MY_SPC : 0)
|
||||
#define ob_ispunct(s, c) ((s)->ctype != NULL ? ((s)->ctype+1)[(unsigned char) (c)] & _MY_PNT : 0)
|
||||
#define ob_isprint(s, c) ((s)->ctype != NULL ? ((s)->ctype+1)[(unsigned char) (c)] & (_MY_PNT | _MY_U | _MY_L | _MY_NMR | _MY_B) : 0)
|
||||
#define ob_isgraph(s, c) ((s)->ctype != NULL ? ((s)->ctype+1)[(unsigned char) (c)] & (_MY_PNT | _MY_U | _MY_L | _MY_NMR) : 0)
|
||||
#define ob_iscntrl(s, c) ((s)->ctype != NULL ? ((s)->ctype+1)[(unsigned char) (c)] & _MY_CTR : 0)
|
||||
|
||||
/* Some macros that should be cleaned up a little */
|
||||
#define ob_isvar(s,c) (my_isalnum(s,c) || (c) == '_')
|
||||
@ -488,13 +391,13 @@ struct ObCharsetInfo
|
||||
|
||||
|
||||
#define use_mb(s) ((s)->cset->ismbchar != NULL)
|
||||
static inline uint ob_ismbchar(const ObCharsetInfo *cs, const char *str,
|
||||
static inline unsigned int ob_ismbchar(const ObCharsetInfo *cs, const char *str,
|
||||
const char *strend) {
|
||||
return cs->cset->ismbchar(cs, str, strend);
|
||||
}
|
||||
|
||||
static inline uint ob_ismbchar(const ObCharsetInfo *cs, const uchar *str,
|
||||
const uchar *strend) {
|
||||
static inline unsigned int ob_ismbchar(const ObCharsetInfo *cs, const unsigned char *str,
|
||||
const unsigned char *strend) {
|
||||
return cs->cset->ismbchar(cs, (const char *)(str), (const char *)(strend));
|
||||
}
|
||||
#define ob_mbcharlen(s, a) ((s)->cset->mbcharlen((s),(a)))
|
||||
@ -503,8 +406,8 @@ static inline uint ob_ismbchar(const ObCharsetInfo *cs, const uchar *str,
|
||||
|
||||
typedef struct ob_uni_ctype
|
||||
{
|
||||
uchar pctype;
|
||||
uchar *ctype;
|
||||
unsigned char pctype;
|
||||
unsigned char *ctype;
|
||||
} ObUniCtype;
|
||||
|
||||
extern ObUniCtype ob_uni_ctype[256];
|
||||
@ -534,7 +437,6 @@ extern ObCharsetInfo ob_charset_gb18030_2022_radical_cs;
|
||||
extern ObCharsetInfo ob_charset_gb18030_2022_stroke_ci;
|
||||
extern ObCharsetInfo ob_charset_gb18030_2022_stroke_cs;
|
||||
extern ObCharsetInfo ob_charset_gb18030_2022_bin;
|
||||
#ifdef OB_BUILD_FULL_CHARSET
|
||||
extern ObCharsetInfo ob_charset_utf8mb4_unicode_ci;
|
||||
extern ObCharsetInfo ob_charset_utf16_unicode_ci;
|
||||
extern ObCharsetInfo ob_charset_utf8mb4_zh_0900_as_cs;
|
||||
@ -543,7 +445,6 @@ extern ObCharsetInfo ob_charset_utf8mb4_zh3_0900_as_cs;
|
||||
extern ObCharsetInfo ob_charset_utf8mb4_0900_bin;
|
||||
extern ObCharsetInfo ob_charset_latin1;
|
||||
extern ObCharsetInfo ob_charset_latin1_bin;
|
||||
#endif
|
||||
|
||||
extern ObCollationHandler ob_collation_mb_bin_handler;
|
||||
extern ObCharsetHandler ob_charset_utf8mb4_handler;
|
||||
@ -590,17 +491,17 @@ bool ob_like_range_simple(const ObCharsetInfo *cs,
|
||||
char *min_str, char *max_str,
|
||||
size_t *min_length, size_t *max_length);
|
||||
|
||||
bool ob_propagate_simple(const ObCharsetInfo *cs, const uchar *str,
|
||||
bool ob_propagate_simple(const ObCharsetInfo *cs, const unsigned char *str,
|
||||
size_t len);
|
||||
bool ob_propagate_complex(const ObCharsetInfo *cs, const uchar *str,
|
||||
bool ob_propagate_complex(const ObCharsetInfo *cs, const unsigned char *str,
|
||||
size_t len);
|
||||
|
||||
void ob_strxfrm_desc_and_reverse(uchar *str, uchar *strend,
|
||||
uint flags, uint level);
|
||||
void ob_strxfrm_desc_and_reverse(unsigned char *str, unsigned char *strend,
|
||||
unsigned int flags, unsigned int level);
|
||||
|
||||
size_t ob_strxfrm_pad_desc_and_reverse(const ObCharsetInfo *cs,
|
||||
uchar *str, uchar *frmend, uchar *strend,
|
||||
uint nweights, uint flags, uint level);
|
||||
unsigned char *str, unsigned char *frmend, unsigned char *strend,
|
||||
unsigned int nweights, unsigned int flags, unsigned int level);
|
||||
extern "C" int64_t ob_strntoll(const char *ptr, size_t len, int base, char **end, int *err);
|
||||
extern "C" int64_t ob_strntoull(const char *ptr, size_t len, int base, char **end, int *err);
|
||||
|
||||
@ -621,17 +522,17 @@ int ob_wildcmp_mb_impl(const ObCharsetInfo *cs,
|
||||
const char *wildstr,const char *wildend,
|
||||
int escape, int w_one, int w_many, int recurse_level);
|
||||
|
||||
uint ob_instr_mb(const ObCharsetInfo *cs,
|
||||
unsigned int ob_instr_mb(const ObCharsetInfo *cs,
|
||||
const char *b, size_t b_length,
|
||||
const char *s, size_t s_length,
|
||||
ob_match_t *match, uint nmatch);
|
||||
ob_match_t *match, unsigned int nmatch);
|
||||
|
||||
void ob_hash_sort_simple(const ObCharsetInfo *cs,
|
||||
const uchar *key, size_t len,
|
||||
const unsigned char *key, size_t len,
|
||||
ulong *nr1, ulong *nr2,
|
||||
const bool calc_end_space, hash_algo hash_algo);
|
||||
|
||||
const uchar *skip_trailing_space(const uchar *ptr,size_t len, bool is_utf16);
|
||||
const unsigned char *skip_trailing_space(const unsigned char *ptr,size_t len, bool is_utf16);
|
||||
|
||||
size_t ob_numchars_mb(const ObCharsetInfo *cs __attribute__((unused)), const char *pos, const char *end);
|
||||
|
||||
@ -640,7 +541,7 @@ size_t ob_charpos_mb(const ObCharsetInfo *cs __attribute__((unused)), const char
|
||||
size_t ob_max_bytes_charpos_mb(const ObCharsetInfo *cs __attribute__((unused)), const char *pos, const char *end, size_t max_bytes, size_t *char_len);
|
||||
|
||||
int ob_mb_ctype_mb(const ObCharsetInfo *cs __attribute__((unused)), int *ctype,
|
||||
const uchar *s, const uchar *e);
|
||||
const unsigned char *s, const unsigned char *e);
|
||||
|
||||
size_t ob_caseup_mb(const ObCharsetInfo *, char *src, size_t srclen,
|
||||
char *dst, size_t dstlen);
|
||||
@ -661,18 +562,18 @@ size_t ob_lengthsp_8bit(const ObCharsetInfo *cs __attribute__((unused)),
|
||||
const char *ptr, size_t length);
|
||||
|
||||
int ob_strnncoll_mb_bin(const ObCharsetInfo *cs __attribute__((unused)),
|
||||
const uchar *s, size_t slen,
|
||||
const uchar *t, size_t tlen,
|
||||
const unsigned char *s, size_t slen,
|
||||
const unsigned char *t, size_t tlen,
|
||||
bool t_is_prefix);
|
||||
|
||||
int ob_strnncollsp_mb_bin(const ObCharsetInfo *cs __attribute__((unused)),
|
||||
const uchar *a, size_t a_length,
|
||||
const uchar *b, size_t b_length,
|
||||
const unsigned char *a, size_t a_length,
|
||||
const unsigned char *b, size_t b_length,
|
||||
bool diff_if_only_endspace_difference);
|
||||
|
||||
size_t ob_strnxfrm_mb(const ObCharsetInfo *,
|
||||
uchar *dst, size_t dstlen, uint nweights,
|
||||
const uchar *src, size_t srclen, uint flags, bool *is_valid_unicode);
|
||||
unsigned char *dst, size_t dstlen, unsigned int nweights,
|
||||
const unsigned char *src, size_t srclen, unsigned int flags, bool *is_valid_unicode);
|
||||
|
||||
int ob_wildcmp_mb_bin(const ObCharsetInfo *cs,
|
||||
const char *str,const char *str_end,
|
||||
@ -680,22 +581,22 @@ int ob_wildcmp_mb_bin(const ObCharsetInfo *cs,
|
||||
int escape, int w_one, int w_many);
|
||||
|
||||
void ob_hash_sort_mb_bin(const ObCharsetInfo *cs __attribute__((unused)),
|
||||
const uchar *key, size_t len, ulong *nr1, ulong *nr2,
|
||||
const unsigned char *key, size_t len, ulong *nr1, ulong *nr2,
|
||||
const bool calc_end_space, hash_algo hash_algo);
|
||||
|
||||
uint32 ob_convert(char *to, uint32 to_length, const ObCharsetInfo *to_cs,
|
||||
const char *from, uint32 from_length,
|
||||
const ObCharsetInfo *from_cs,
|
||||
bool trim_incomplete_tail,
|
||||
const ob_wc_t replaced_char, uint *errors);
|
||||
const ob_wc_t replaced_char, unsigned int *errors);
|
||||
|
||||
size_t ob_strnxfrm_unicode_full_bin(const ObCharsetInfo *cs,
|
||||
uchar *dst, size_t dstlen, uint nweights,
|
||||
const uchar *src, size_t srclen, uint flags, bool *is_valid_unicode);
|
||||
unsigned char *dst, size_t dstlen, unsigned int nweights,
|
||||
const unsigned char *src, size_t srclen, unsigned int flags, bool *is_valid_unicode);
|
||||
|
||||
size_t ob_strnxfrm_unicode_full_bin_varlen(const struct ObCharsetInfo* cs,
|
||||
uchar* dst, size_t dst_len, uint nweights,
|
||||
const uchar *src, size_t srclen,
|
||||
unsigned char* dst, size_t dst_len, unsigned int nweights,
|
||||
const unsigned char *src, size_t srclen,
|
||||
bool is_memcmp, bool *is_valid_unicode);
|
||||
|
||||
bool ob_like_range_generic(const ObCharsetInfo *cs, const char *ptr,
|
||||
@ -705,12 +606,12 @@ bool ob_like_range_generic(const ObCharsetInfo *cs, const char *ptr,
|
||||
size_t *max_length);
|
||||
|
||||
size_t ob_strnxfrm_unicode(const ObCharsetInfo *cs,
|
||||
uchar *dst, size_t dstlen, uint nweights,
|
||||
const uchar *src, size_t srclen, uint flags, bool *is_valid_unicode);
|
||||
unsigned char *dst, size_t dstlen, unsigned int nweights,
|
||||
const unsigned char *src, size_t srclen, unsigned int flags, bool *is_valid_unicode);
|
||||
|
||||
size_t ob_strnxfrm_unicode_varlen(const struct ObCharsetInfo* cs,
|
||||
uchar* dst, size_t dst_len, uint nweights,
|
||||
const uchar *src, size_t srclen,
|
||||
unsigned char* dst, size_t dst_len, unsigned int nweights,
|
||||
const unsigned char *src, size_t srclen,
|
||||
bool is_memcmp, bool *is_valid_unicode);
|
||||
|
||||
int ob_wildcmp_unicode(const ObCharsetInfo *cs,
|
||||
@ -719,8 +620,8 @@ int ob_wildcmp_unicode(const ObCharsetInfo *cs,
|
||||
int escape, int w_one, int w_many,
|
||||
ObUnicaseInfo *weights);
|
||||
|
||||
size_t ob_strxfrm_pad(const ObCharsetInfo *cs, uchar *str, uchar *frmend,
|
||||
uchar *strend, uint nweights, uint flags);
|
||||
size_t ob_strxfrm_pad(const ObCharsetInfo *cs, unsigned char *str, unsigned char *frmend,
|
||||
unsigned char *strend, unsigned int nweights, unsigned int flags);
|
||||
|
||||
size_t ob_strnxfrmlen_simple(const struct ObCharsetInfo *, size_t);
|
||||
|
||||
@ -728,8 +629,8 @@ size_t ob_strnxfrmlen_unicode_full_bin(const struct ObCharsetInfo *, size_t);
|
||||
|
||||
size_t ob_strnxfrmlen_utf8mb4(const struct ObCharsetInfo *, size_t);
|
||||
|
||||
uint ob_mbcharlen_8bit(const ObCharsetInfo *cs __attribute__((unused)),
|
||||
uint c __attribute__((unused)));
|
||||
unsigned int ob_mbcharlen_8bit(const ObCharsetInfo *cs __attribute__((unused)),
|
||||
unsigned int c __attribute__((unused)));
|
||||
|
||||
size_t ob_numchars_8bit(const ObCharsetInfo *cs __attribute__((unused)),
|
||||
const char *b, const char *e);
|
||||
@ -750,7 +651,7 @@ size_t ob_lengthsp_binary(const ObCharsetInfo *cs __attribute__((unused)),
|
||||
size_t length);
|
||||
|
||||
int ob_mb_ctype_8bit(const ObCharsetInfo *cs, int *ctype,
|
||||
const uchar *s, const uchar *e);
|
||||
const unsigned char *s, const unsigned char *e);
|
||||
|
||||
size_t ob_well_formed_len_8bit(const ObCharsetInfo *cs __attribute__((unused)),
|
||||
const char *start, const char *end,
|
||||
|
@ -18,60 +18,18 @@
|
||||
* - initial release
|
||||
*
|
||||
*/
|
||||
#ifndef OB_BUILD_FULL_CHARSET
|
||||
|
||||
#include "lib/charset/ob_ctype.h"
|
||||
|
||||
static unsigned char ctype_bin[]=
|
||||
{
|
||||
0,
|
||||
32, 32, 32, 32, 32, 32, 32, 32, 32, 40, 40, 40, 40, 40, 32, 32,
|
||||
32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
|
||||
72, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
|
||||
132,132,132,132,132,132,132,132,132,132, 16, 16, 16, 16, 16, 16,
|
||||
16,129,129,129,129,129,129, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 16, 16, 16, 16, 16,
|
||||
16,130,130,130,130,130,130, 2, 2, 2, 2, 2, 2, 2, 2, 2,
|
||||
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 16, 16, 16, 16, 32,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
};
|
||||
|
||||
static unsigned char bin_char_array[] =
|
||||
{
|
||||
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
|
||||
16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
|
||||
32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47,
|
||||
48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63,
|
||||
64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79,
|
||||
80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95,
|
||||
96, 97, 98, 99,100,101,102,103,104,105,106,107,108,109,110,111,
|
||||
112,113,114,115,116,117,118,119,120,121,122,123,124,125,126,127,
|
||||
128,129,130,131,132,133,134,135,136,137,138,139,140,141,142,143,
|
||||
144,145,146,147,148,149,150,151,152,153,154,155,156,157,158,159,
|
||||
160,161,162,163,164,165,166,167,168,169,170,171,172,173,174,175,
|
||||
176,177,178,179,180,181,182,183,184,185,186,187,188,189,190,191,
|
||||
192,193,194,195,196,197,198,199,200,201,202,203,204,205,206,207,
|
||||
208,209,210,211,212,213,214,215,216,217,218,219,220,221,222,223,
|
||||
224,225,226,227,228,229,230,231,232,233,234,235,236,237,238,239,
|
||||
240,241,242,243,244,245,246,247,248,249,250,251,252,253,254,255
|
||||
};
|
||||
|
||||
#include "lib/charset/ob_ctype_bin_tab.h"
|
||||
|
||||
unsigned int ob_mbcharlen_8bit(const ObCharsetInfo *cs __attribute__((unused)),
|
||||
unsigned int c __attribute__((unused)))
|
||||
unsigned int c __attribute__((unused)))
|
||||
{
|
||||
return 1;
|
||||
}
|
||||
|
||||
size_t ob_numchars_8bit(const ObCharsetInfo *cs __attribute__((unused)),
|
||||
const char *begin, const char *end)
|
||||
const char *begin, const char *end)
|
||||
{
|
||||
return (size_t) (end - begin);
|
||||
}
|
||||
@ -111,9 +69,9 @@ size_t ob_lengthsp_binary(const ObCharsetInfo *cs __attribute__((unused)),
|
||||
}
|
||||
|
||||
static int ob_mb_wc_bin(const ObCharsetInfo *cs __attribute__((unused)),
|
||||
ob_wc_t *wc,
|
||||
const unsigned char *str,
|
||||
const unsigned char *end __attribute__((unused)))
|
||||
ob_wc_t *wc,
|
||||
const unsigned char *str,
|
||||
const unsigned char *end __attribute__((unused)))
|
||||
{
|
||||
if (str >= end) {
|
||||
return OB_CS_TOOSMALL;
|
||||
@ -125,9 +83,9 @@ static int ob_mb_wc_bin(const ObCharsetInfo *cs __attribute__((unused)),
|
||||
|
||||
|
||||
static int ob_wc_mb_bin(const ObCharsetInfo *cs __attribute__((unused)),
|
||||
ob_wc_t wc,
|
||||
unsigned char *str,
|
||||
unsigned char *end __attribute__((unused)))
|
||||
ob_wc_t wc,
|
||||
unsigned char *str,
|
||||
unsigned char *end __attribute__((unused)))
|
||||
{
|
||||
if (str >= end) {
|
||||
return OB_CS_TOOSMALL;
|
||||
@ -158,7 +116,7 @@ static size_t ob_case_bin(const ObCharsetInfo *cs __attribute__((unused)),
|
||||
return srclen;
|
||||
}
|
||||
|
||||
|
||||
|
||||
static int ob_strnncoll_8bit_bin(const ObCharsetInfo *cs __attribute__((unused)),
|
||||
const uchar *s, size_t slen,
|
||||
const uchar *t, size_t tlen,
|
||||
@ -247,19 +205,18 @@ int ob_wildcmp_bin_impl(const ObCharsetInfo *cs,
|
||||
const char *wild_str,const char *wild_end,
|
||||
int escape_char, int w_one, int w_many, int recurse_level)
|
||||
{
|
||||
int result= -1;
|
||||
|
||||
int result= -1;
|
||||
while (wild_str != wild_end) {
|
||||
while ((*wild_str == escape_char) || (*wild_str != w_many && *wild_str != w_one)) {
|
||||
if (*wild_str == escape_char && wild_str+1 != wild_end) {
|
||||
wild_str++;
|
||||
wild_str++;
|
||||
}
|
||||
if (str == str_end || likeconv(cs,*wild_str++) != likeconv(cs,*str++)) {
|
||||
return(1);
|
||||
} else if (wild_str == wild_end) {
|
||||
return(str != str_end);
|
||||
return(str != str_end);
|
||||
} else {
|
||||
result=1;
|
||||
result=1;
|
||||
}
|
||||
}
|
||||
if (*wild_str == w_one) {
|
||||
@ -272,7 +229,7 @@ int ob_wildcmp_bin_impl(const ObCharsetInfo *cs,
|
||||
} while (++wild_str < wild_end && *wild_str == w_one);
|
||||
if (wild_str == wild_end) break;
|
||||
}
|
||||
if (*wild_str == w_many) {
|
||||
if (*wild_str == w_many) {
|
||||
unsigned char cmp;
|
||||
wild_str++;
|
||||
for (; wild_str != wild_end ; wild_str++) {
|
||||
@ -286,14 +243,14 @@ int ob_wildcmp_bin_impl(const ObCharsetInfo *cs,
|
||||
continue;
|
||||
}
|
||||
}
|
||||
break;
|
||||
break;
|
||||
}
|
||||
if (wild_str == wild_end) {
|
||||
return(0);
|
||||
return(0);
|
||||
} else if (str == str_end) {
|
||||
return(-1);
|
||||
return(-1);
|
||||
} else if ((cmp= *wild_str) == escape_char && wild_str+1 != wild_end) {
|
||||
cmp= *++wild_str;
|
||||
cmp= *++wild_str;
|
||||
}
|
||||
|
||||
INC_PTR(cs,wild_str,wild_end);
|
||||
@ -303,12 +260,12 @@ int ob_wildcmp_bin_impl(const ObCharsetInfo *cs,
|
||||
str++;
|
||||
}
|
||||
if (str++ == str_end) {
|
||||
return(-1);
|
||||
return(-1);
|
||||
}
|
||||
{
|
||||
int tmp=ob_wildcmp_bin_impl(cs,str,str_end,
|
||||
wild_str,wild_end,escape_char,
|
||||
w_one, w_many, recurse_level + 1);
|
||||
wild_str,wild_end,escape_char,
|
||||
w_one, w_many, recurse_level + 1);
|
||||
if (tmp <= 0) {
|
||||
return(tmp);
|
||||
} else if (str == str_end) {
|
||||
@ -336,9 +293,9 @@ int ob_wildcmp_bin(const ObCharsetInfo *cs,
|
||||
|
||||
static
|
||||
unsigned int ob_instr_bin(const ObCharsetInfo *cs __attribute__((unused)),
|
||||
const char *begin, size_t b_length,
|
||||
const char *s, size_t s_length,
|
||||
ob_match_t *match, unsigned int nmatch)
|
||||
const char *begin, size_t b_length,
|
||||
const char *s, size_t s_length,
|
||||
ob_match_t *match, unsigned int nmatch)
|
||||
{
|
||||
const unsigned char *str, *search, *end, *search_end;
|
||||
|
||||
@ -349,7 +306,7 @@ unsigned int ob_instr_bin(const ObCharsetInfo *cs __attribute__((unused)),
|
||||
match->end= 0;
|
||||
match->mb_len= 0;
|
||||
}
|
||||
return 1;
|
||||
return 1;
|
||||
}
|
||||
|
||||
str= (const unsigned char*) begin;
|
||||
@ -369,7 +326,6 @@ loop:
|
||||
goto loop;
|
||||
}
|
||||
}
|
||||
|
||||
if (nmatch > 0) {
|
||||
match[0].beg= 0;
|
||||
match[0].end= (size_t) (str- (const unsigned char*)begin-1);
|
||||
@ -381,7 +337,7 @@ loop:
|
||||
match[1].mb_len= match[1].end-match[1].beg;
|
||||
}
|
||||
}
|
||||
return 2;
|
||||
return 2;
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -409,9 +365,10 @@ void ob_hash_sort_8bit_bin(const ObCharsetInfo *cs __attribute__((unused)),
|
||||
}
|
||||
}
|
||||
void ob_hash_sort_bin(const ObCharsetInfo *cs __attribute__((unused)),
|
||||
const unsigned char *key, size_t len, unsigned long int *nr1, unsigned long int *nr2,
|
||||
const bool calc_end_space,
|
||||
hash_algo hash_algo)
|
||||
const unsigned char *key, size_t len,
|
||||
unsigned long int *nr1, unsigned long int *nr2,
|
||||
const bool calc_end_space,
|
||||
hash_algo hash_algo)
|
||||
{
|
||||
const unsigned char *pos = key;
|
||||
key+= len;
|
||||
@ -431,8 +388,8 @@ void ob_hash_sort_bin(const ObCharsetInfo *cs __attribute__((unused)),
|
||||
|
||||
static ObCharsetHandler ob_charset_handler=
|
||||
{
|
||||
NULL,
|
||||
ob_mbcharlen_8bit,
|
||||
NULL,
|
||||
ob_mbcharlen_8bit,
|
||||
ob_numchars_8bit,
|
||||
ob_charpos_8bit,
|
||||
ob_max_bytes_charpos_8bit,
|
||||
@ -490,39 +447,37 @@ ObCollationHandler ob_collation_binary_handler =
|
||||
|
||||
ObCharsetInfo ob_charset_bin =
|
||||
{
|
||||
63,0,0,
|
||||
OB_CS_COMPILED|OB_CS_BINSORT|OB_CS_PRIMARY,
|
||||
"binary",
|
||||
"binary",
|
||||
"",
|
||||
NULL,
|
||||
NULL,
|
||||
ctype_bin,
|
||||
bin_char_array,
|
||||
bin_char_array,
|
||||
NULL,
|
||||
NULL,
|
||||
&ob_unicase_default,
|
||||
NULL,
|
||||
NULL,
|
||||
1,
|
||||
1,
|
||||
1,
|
||||
1,
|
||||
1,
|
||||
0,
|
||||
255,
|
||||
0,
|
||||
0,
|
||||
1,
|
||||
1,
|
||||
&ob_charset_handler,
|
||||
&ob_collation_binary_handler,
|
||||
PAD_SPACE
|
||||
63,0,0,
|
||||
OB_CS_COMPILED|OB_CS_BINSORT|OB_CS_PRIMARY,
|
||||
"binary",
|
||||
"binary",
|
||||
"",
|
||||
NULL,
|
||||
NULL,
|
||||
ctype_bin,
|
||||
bin_char_array,
|
||||
bin_char_array,
|
||||
NULL,
|
||||
NULL,
|
||||
&ob_unicase_default,
|
||||
NULL,
|
||||
NULL,
|
||||
1,
|
||||
1,
|
||||
1,
|
||||
1,
|
||||
1,
|
||||
0,
|
||||
255,
|
||||
0,
|
||||
0,
|
||||
1,
|
||||
1,
|
||||
&ob_charset_handler,
|
||||
&ob_collation_binary_handler,
|
||||
PAD_SPACE
|
||||
};
|
||||
|
||||
|
||||
#undef likeconv
|
||||
#undef INC_PTR
|
||||
|
||||
#endif
|
51
deps/oblib/src/lib/charset/ob_ctype_bin_tab.h
vendored
Normal file
51
deps/oblib/src/lib/charset/ob_ctype_bin_tab.h
vendored
Normal file
@ -0,0 +1,51 @@
|
||||
/**
|
||||
* Copyright (c) 2021 OceanBase
|
||||
* OceanBase CE is licensed under Mulan PubL v2.
|
||||
* You can use this software according to the terms and conditions of the Mulan PubL v2.
|
||||
* You may obtain a copy of Mulan PubL v2 at:
|
||||
* http://license.coscl.org.cn/MulanPubL-2.0
|
||||
* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND,
|
||||
* EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
|
||||
* MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
|
||||
* See the Mulan PubL v2 for more details.
|
||||
*/
|
||||
static unsigned char ctype_bin[]=
|
||||
{
|
||||
0,
|
||||
32, 32, 32, 32, 32, 32, 32, 32, 32, 40, 40, 40, 40, 40, 32, 32,
|
||||
32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
|
||||
72, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
|
||||
132,132,132,132,132,132,132,132,132,132, 16, 16, 16, 16, 16, 16,
|
||||
16,129,129,129,129,129,129, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 16, 16, 16, 16, 16,
|
||||
16,130,130,130,130,130,130, 2, 2, 2, 2, 2, 2, 2, 2, 2,
|
||||
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 16, 16, 16, 16, 32,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
};
|
||||
|
||||
static unsigned char bin_char_array[] =
|
||||
{
|
||||
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
|
||||
16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
|
||||
32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47,
|
||||
48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63,
|
||||
64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79,
|
||||
80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95,
|
||||
96, 97, 98, 99,100,101,102,103,104,105,106,107,108,109,110,111,
|
||||
112,113,114,115,116,117,118,119,120,121,122,123,124,125,126,127,
|
||||
128,129,130,131,132,133,134,135,136,137,138,139,140,141,142,143,
|
||||
144,145,146,147,148,149,150,151,152,153,154,155,156,157,158,159,
|
||||
160,161,162,163,164,165,166,167,168,169,170,171,172,173,174,175,
|
||||
176,177,178,179,180,181,182,183,184,185,186,187,188,189,190,191,
|
||||
192,193,194,195,196,197,198,199,200,201,202,203,204,205,206,207,
|
||||
208,209,210,211,212,213,214,215,216,217,218,219,220,221,222,223,
|
||||
224,225,226,227,228,229,230,231,232,233,234,235,236,237,238,239,
|
||||
240,241,242,243,244,245,246,247,248,249,250,251,252,253,254,255
|
||||
};
|
1868
deps/oblib/src/lib/charset/ob_ctype_gb18030.cc
vendored
Normal file
1868
deps/oblib/src/lib/charset/ob_ctype_gb18030.cc
vendored
Normal file
File diff suppressed because it is too large
Load Diff
461
deps/oblib/src/lib/charset/ob_ctype_gbk.cc
vendored
Normal file
461
deps/oblib/src/lib/charset/ob_ctype_gbk.cc
vendored
Normal file
@ -0,0 +1,461 @@
|
||||
|
||||
/**
|
||||
* Copyright (c) 2021 OceanBase
|
||||
* OceanBase CE is licensed under Mulan PubL v2.
|
||||
* You can use this software according to the terms and conditions of the Mulan PubL v2.
|
||||
* You may obtain a copy of Mulan PubL v2 at:
|
||||
* http://license.coscl.org.cn/MulanPubL-2.0
|
||||
* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND,
|
||||
* EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
|
||||
* MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
|
||||
* See the Mulan PubL v2 for more details.
|
||||
*/
|
||||
|
||||
#include "lib/charset/ob_mysql_global.h"
|
||||
#include "lib/charset/ob_ctype.h"
|
||||
#include "lib/charset/ob_ctype_gbk_tab.h"
|
||||
|
||||
#define isgbkhead(c) (0x81<=(unsigned char)(c) && (unsigned char)(c)<=0xfe)
|
||||
#define isgbktail(c) ((0x40<=(unsigned char)(c) && (unsigned char)(c)<=0x7e) || \
|
||||
(0x80<=(unsigned char)(c) && (unsigned char)(c)<=0xfe))
|
||||
|
||||
#define isgbkcode(c,d) (isgbkhead(c) && isgbktail(d))
|
||||
#define gbkcode(c,d) ((((unsigned int) (unsigned char) (c)) <<8) | (unsigned char)(d))
|
||||
#define gbkhead(e) ((unsigned char)(e>>8))
|
||||
#define gbktail(e) ((unsigned char)(e&0xff))
|
||||
|
||||
static uint16 gbksortorder(uint16 i)
|
||||
{
|
||||
uint idx=gbktail(i);
|
||||
if (idx>0x7f) idx-=0x41;
|
||||
else idx-=0x40;
|
||||
idx+=(gbkhead(i)-0x81)*0xbe;
|
||||
return 0x8100+gbk_order[idx];
|
||||
}
|
||||
|
||||
|
||||
int ob_strnncoll_gbk_internal(const unsigned char **a_res, const unsigned char **b_res,
|
||||
size_t length)
|
||||
{
|
||||
const unsigned char *a= *a_res, *b= *b_res;
|
||||
unsigned int a_char,b_char;
|
||||
|
||||
while (length--)
|
||||
{
|
||||
if ((length > 0) && isgbkcode(*a,*(a+1)) && isgbkcode(*b, *(b+1)))
|
||||
{
|
||||
a_char= gbkcode(*a,*(a+1));
|
||||
b_char= gbkcode(*b,*(b+1));
|
||||
if (a_char != b_char)
|
||||
return ((int) gbksortorder((uint16_t) a_char) -
|
||||
(int) gbksortorder((uint16_t) b_char));
|
||||
a+= 2;
|
||||
b+= 2;
|
||||
length--;
|
||||
}
|
||||
else if (sort_order_gbk[*a++] != sort_order_gbk[*b++])
|
||||
return ((int) sort_order_gbk[a[-1]] -
|
||||
(int) sort_order_gbk[b[-1]]);
|
||||
}
|
||||
*a_res= a;
|
||||
*b_res= b;
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
|
||||
int ob_strnncoll_gbk(const ObCharsetInfo *cs __attribute__((unused)),
|
||||
const unsigned char *a, size_t a_length,
|
||||
const unsigned char *b, size_t b_length,
|
||||
bool b_is_prefix)
|
||||
{
|
||||
size_t length = OB_MIN(a_length, b_length);
|
||||
int res= ob_strnncoll_gbk_internal(&a, &b, length);
|
||||
return res ? res : (int) ((b_is_prefix ? length : a_length) - b_length);
|
||||
}
|
||||
|
||||
|
||||
static int ob_strnncollsp_gbk(const ObCharsetInfo * cs __attribute__((unused)),
|
||||
const unsigned char *a, size_t a_length,
|
||||
const unsigned char *b, size_t b_length,
|
||||
bool diff_if_only_endspace_difference)
|
||||
{
|
||||
size_t length = OB_MIN(a_length, b_length);
|
||||
int res = ob_strnncoll_gbk_internal(&a, &b, length);
|
||||
|
||||
if (!res && a_length != b_length) {
|
||||
const unsigned char *end;
|
||||
int swap= 1;
|
||||
if (diff_if_only_endspace_difference) {
|
||||
return a_length < b_length ? -1 : 1;
|
||||
} else if (a_length < b_length) {
|
||||
a_length = b_length;
|
||||
a = b;
|
||||
swap= -1;
|
||||
res= -res;
|
||||
}
|
||||
for (end= a + a_length-length; a < end ; a++) {
|
||||
if (*a != ' ') {
|
||||
return (*a < ' ') ? -swap : swap;
|
||||
}
|
||||
}
|
||||
}
|
||||
return res;
|
||||
}
|
||||
|
||||
|
||||
static size_t
|
||||
ob_strnxfrm_gbk(const ObCharsetInfo *cs,
|
||||
unsigned char *dst, size_t dstlen, unsigned int nweights,
|
||||
const unsigned char *src, size_t srclen, unsigned int flags, bool *is_valid_unicode)
|
||||
{
|
||||
unsigned char *d0= dst;
|
||||
unsigned char *de= dst + dstlen;
|
||||
const unsigned char *se= src + srclen;
|
||||
const unsigned char *sort_order= cs->sort_order;
|
||||
*is_valid_unicode = 1;
|
||||
|
||||
for (; dst < de && src < se && nweights; nweights--) {
|
||||
if (cs->cset->ismbchar(cs, (const char*) src, (const char*) se)) {
|
||||
uint16_t e= gbksortorder((uint16_t) gbkcode(*src, *(src + 1)));
|
||||
*dst++= gbkhead(e);
|
||||
if (dst < de) {
|
||||
*dst++= gbktail(e);
|
||||
}
|
||||
src+= 2;
|
||||
} else {
|
||||
*is_valid_unicode = is_valid_ascii(*src);
|
||||
*dst++= sort_order ? sort_order[*src++] : *src++;
|
||||
}
|
||||
}
|
||||
return ob_strxfrm_pad_desc_and_reverse(cs, d0, dst, de, nweights, flags, 0);
|
||||
}
|
||||
|
||||
|
||||
size_t ob_varlen_encoding_gbk_for_memcmp(const struct ObCharsetInfo* cs,
|
||||
unsigned char* dst, size_t dst_len, unsigned int nweights,
|
||||
const unsigned char *src, size_t src_len,
|
||||
bool *is_valid_unicode)
|
||||
{
|
||||
unsigned char *d0= dst;
|
||||
unsigned char *de= dst + dst_len;
|
||||
const unsigned char *se= src + src_len;
|
||||
const unsigned char *sort_order= cs->sort_order;
|
||||
*is_valid_unicode = 1;
|
||||
|
||||
for (; *is_valid_unicode && dst < de && src < se && nweights; nweights--)
|
||||
{
|
||||
if (isgbkhead(*(src)) && (se)-(src)>1 && isgbktail(*((src)+1)))
|
||||
{
|
||||
/*
|
||||
Note, it is safe not to check (src < se)
|
||||
in the code below, because ismbchar() would
|
||||
not return TRUE if src was too short
|
||||
*/
|
||||
uint16_t e= gbksortorder((uint16_t) gbkcode(*src, *(src + 1)));
|
||||
*dst++= gbkhead(e);
|
||||
if (dst < de)
|
||||
*dst++= gbktail(e);
|
||||
src+= 2;
|
||||
if (e == 0) {
|
||||
*dst++ = 0x00;
|
||||
*dst++ = 0x01;
|
||||
}
|
||||
} else {
|
||||
*is_valid_unicode = is_valid_ascii(*src);
|
||||
uint16_t e = sort_order ? sort_order[*src++] : *src++;
|
||||
*dst++ = gbkhead(e);
|
||||
*dst++ = gbktail(e);
|
||||
if (e == 0) {
|
||||
*dst++ = 0x00;
|
||||
*dst++ = 0x01;
|
||||
}
|
||||
}
|
||||
}
|
||||
*dst++ = 0x00;
|
||||
*dst++ = 0x00;
|
||||
*dst++ = 0x00;
|
||||
*dst++ = 0x00;
|
||||
return dst - d0;
|
||||
}
|
||||
|
||||
size_t ob_varlen_encoding_gbk_for_spacecmp(const struct ObCharsetInfo* cs,
|
||||
unsigned char* dst, size_t dst_len, unsigned int nweights,
|
||||
const unsigned char *src, size_t src_len,
|
||||
bool *is_valid_unicode)
|
||||
{
|
||||
unsigned char *d0= dst;
|
||||
unsigned char *de= dst + dst_len;
|
||||
const unsigned char *se= src + src_len;
|
||||
const unsigned char *sort_order= cs->sort_order;
|
||||
*is_valid_unicode = 1;
|
||||
|
||||
// trim
|
||||
while (*(se-1) == 0x20 && se>src) se--;
|
||||
for (;*is_valid_unicode && dst < de && src < se && nweights; nweights--)
|
||||
{
|
||||
int16_t space_cnt = 0;
|
||||
uint16_t e = 0;
|
||||
while (*src == 0x20)
|
||||
{
|
||||
space_cnt++;
|
||||
src++;
|
||||
}
|
||||
if (isgbkhead(*(src)) && (se)-(src)>1 && isgbktail(*((src)+1)))
|
||||
{
|
||||
/*
|
||||
Note, it is safe not to check (src < se)
|
||||
in the code below, because ismbchar() would
|
||||
not return TRUE if src was too short
|
||||
*/
|
||||
e = gbksortorder((uint16) gbkcode(*src, *(src + 1)));
|
||||
src+= 2;
|
||||
} else {
|
||||
*is_valid_unicode = is_valid_ascii(*src);
|
||||
e = sort_order ? sort_order[*src++] : *src++;
|
||||
}
|
||||
if (space_cnt != 0) {
|
||||
*dst++ = 0x00;
|
||||
*dst++ = 0x20;
|
||||
if (e > 0x20) {
|
||||
*dst++ = 0x00;
|
||||
*dst++ = 0x21;
|
||||
space_cnt = -space_cnt;
|
||||
} else {
|
||||
*dst++ = 0x00;
|
||||
*dst++ = 0x19;
|
||||
}
|
||||
*dst++ = ((unsigned char)(space_cnt >> 8));
|
||||
*dst++ = ((unsigned char)(space_cnt & 0xff));
|
||||
}
|
||||
*dst++ = gbkhead(e);
|
||||
*dst++ = gbktail(e);
|
||||
}
|
||||
*dst++ = 0x00;
|
||||
*dst++ = 0x20;
|
||||
*dst++ = 0x00;
|
||||
*dst++ = 0x20;
|
||||
|
||||
return dst - d0;
|
||||
}
|
||||
size_t ob_strnxfrm_gbk_varlen(const struct ObCharsetInfo* cs,
|
||||
unsigned char* dst, size_t dst_len, unsigned int nweights,
|
||||
const unsigned char *src, size_t srclen,
|
||||
bool is_memcmp, bool *is_valid_unicode)
|
||||
{
|
||||
if (is_memcmp) {
|
||||
return ob_varlen_encoding_gbk_for_memcmp(cs, dst, dst_len, nweights,
|
||||
src, srclen, is_valid_unicode);
|
||||
} else {
|
||||
return ob_varlen_encoding_gbk_for_spacecmp(cs, dst, dst_len, nweights,
|
||||
src, srclen, is_valid_unicode);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
static unsigned int ismbchar_gbk(const ObCharsetInfo *cs __attribute__((unused)),
|
||||
const char* p, const char *e)
|
||||
{
|
||||
return (isgbkhead(*(p)) && (e)-(p)>1 && isgbktail(*((p)+1))? 2: 0);
|
||||
}
|
||||
|
||||
static unsigned int mbcharlen_gbk(const ObCharsetInfo *cs __attribute__((unused)),
|
||||
unsigned int c)
|
||||
{
|
||||
return (isgbkhead(c)? 2 : 1);
|
||||
}
|
||||
static int func_uni_gbk_onechar(int code){
|
||||
if ((code>=0x00A4)&&(code<=0x0451)) {
|
||||
return(tab_uni_gbk0[code-0x00A4]);
|
||||
} else if ((code>=0x2010)&&(code<=0x2312)) {
|
||||
return(tab_uni_gbk1[code-0x2010]);
|
||||
} else if ((code>=0x2460)&&(code<=0x2642)) {
|
||||
return(tab_uni_gbk2[code-0x2460]);
|
||||
} else if ((code>=0x3000)&&(code<=0x3129)) {
|
||||
return(tab_uni_gbk3[code-0x3000]);
|
||||
} else if ((code>=0x3220)&&(code<=0x32A3)) {
|
||||
return(tab_uni_gbk4[code-0x3220]);
|
||||
} else if ((code>=0x338E)&&(code<=0x33D5)) {
|
||||
return(tab_uni_gbk5[code-0x338E]);
|
||||
} else if ((code>=0x4E00)&&(code<=0x9FA5)) {
|
||||
return(tab_uni_gbk6[code-0x4E00]);
|
||||
} else if ((code>=0xE000)&&(code<=0xE864)) {
|
||||
return(tab_uni_gbk_pua[code-0xE000]);
|
||||
} else if ((code>=0xF92C)&&(code<=0xFA29)) {
|
||||
return(tab_uni_gbk7[code-0xF92C]);
|
||||
} else if ((code>=0xFE30)&&(code<=0xFFE5)) {
|
||||
return(tab_uni_gbk8[code-0xFE30]);
|
||||
}
|
||||
return(0);
|
||||
}
|
||||
|
||||
static int
|
||||
ob_wc_mb_gbk(const ObCharsetInfo *cs __attribute__((unused)),
|
||||
ob_wc_t wc, unsigned char *s, unsigned char *e)
|
||||
{
|
||||
int code;
|
||||
|
||||
if (s >= e) {
|
||||
return OB_CS_TOOSMALL;
|
||||
} else if ((unsigned int) wc < 0x80) {
|
||||
s[0]= (unsigned char) wc;
|
||||
return 1;
|
||||
} else if (!(code=func_uni_gbk_onechar(wc))) {
|
||||
return OB_CS_ILUNI;
|
||||
} else if (s+2>e) {
|
||||
return OB_CS_TOOSMALL2;
|
||||
}
|
||||
s[0] = code >> 8;
|
||||
s[1] = code & 0xFF;
|
||||
return 2;
|
||||
}
|
||||
|
||||
static int ob_mb_wc_gbk(const ObCharsetInfo *cs __attribute__((unused)),
|
||||
ob_wc_t *pwc, const unsigned char *s, const unsigned char *e)
|
||||
{
|
||||
int hi;
|
||||
if (s >= e) {
|
||||
return OB_CS_TOOSMALL;
|
||||
} else if ((hi = s[0]) < 0x80) {
|
||||
pwc[0]=hi;
|
||||
return 1;
|
||||
} else if (s+2>e) {
|
||||
return OB_CS_TOOSMALL2;
|
||||
} else if (!(pwc[0]=func_gbk_uni_onechar( (hi<<8) + s[1]))) {
|
||||
return -2;
|
||||
}
|
||||
|
||||
return 2;
|
||||
}
|
||||
|
||||
|
||||
static size_t ob_well_formed_len_gbk(const ObCharsetInfo *cs __attribute__((unused)),
|
||||
const char *b, const char *e,
|
||||
size_t pos, int *error)
|
||||
{
|
||||
const char *b0= b;
|
||||
const char *emb= e - 1;
|
||||
*error= 0;
|
||||
|
||||
while (pos-- && b < e) {
|
||||
if ((unsigned char) b[0] < 128) {
|
||||
b++;
|
||||
} else if ((b < emb) && isgbkcode((unsigned char)*b, (unsigned char)b[1])) {
|
||||
b+= 2;
|
||||
} else {
|
||||
*error= 1;
|
||||
break;
|
||||
}
|
||||
}
|
||||
return (size_t) (b - b0);
|
||||
}
|
||||
|
||||
static ObCollationHandler ob_collation_gbk_ci_handler =
|
||||
{
|
||||
NULL,
|
||||
NULL,
|
||||
ob_strnncoll_gbk,
|
||||
ob_strnncollsp_gbk,
|
||||
ob_strnxfrm_gbk,
|
||||
ob_strnxfrmlen_simple,
|
||||
ob_strnxfrm_gbk_varlen,
|
||||
ob_like_range_mb,
|
||||
ob_wildcmp_mb,
|
||||
NULL,
|
||||
ob_instr_mb,
|
||||
ob_hash_sort_simple,
|
||||
ob_propagate_simple
|
||||
};
|
||||
|
||||
static ObCharsetHandler ob_charset_gbk_handler=
|
||||
{
|
||||
ismbchar_gbk,
|
||||
mbcharlen_gbk,
|
||||
ob_numchars_mb,
|
||||
ob_charpos_mb,
|
||||
ob_max_bytes_charpos_mb,
|
||||
ob_well_formed_len_gbk,
|
||||
ob_lengthsp_8bit,
|
||||
/* ob_numcells_8bit, */
|
||||
ob_mb_wc_gbk,
|
||||
ob_wc_mb_gbk,
|
||||
ob_mb_ctype_mb,
|
||||
/* ob_caseup_str_mb, */
|
||||
/* ob_casedn_str_mb, */
|
||||
ob_caseup_mb,
|
||||
ob_casedn_mb,
|
||||
ob_fill_8bit,
|
||||
ob_strntol_8bit,
|
||||
ob_strntoul_8bit,
|
||||
ob_strntoll_8bit,
|
||||
ob_strntoull_8bit,
|
||||
ob_strntod_8bit,
|
||||
ob_strntoull10rnd_8bit,
|
||||
ob_scan_8bit
|
||||
};
|
||||
|
||||
|
||||
ObCharsetInfo ob_charset_gbk_chinese_ci=
|
||||
{
|
||||
28,0,0,
|
||||
OB_CS_COMPILED|OB_CS_PRIMARY|OB_CS_STRNXFRM,
|
||||
"gbk",
|
||||
"gbk_chinese_ci",
|
||||
"",
|
||||
NULL,
|
||||
NULL,
|
||||
ctype_gbk,
|
||||
to_lower_gbk,
|
||||
to_upper_gbk,
|
||||
sort_order_gbk,
|
||||
NULL,
|
||||
&ob_caseinfo_gbk,
|
||||
NULL,
|
||||
NULL,
|
||||
1,
|
||||
1,
|
||||
1,
|
||||
1,
|
||||
2,
|
||||
0,
|
||||
0xA967,
|
||||
' ',
|
||||
1,
|
||||
1,
|
||||
1,
|
||||
&ob_charset_gbk_handler,
|
||||
&ob_collation_gbk_ci_handler,
|
||||
PAD_SPACE};
|
||||
|
||||
ObCharsetInfo ob_charset_gbk_bin=
|
||||
{
|
||||
87,0,0,
|
||||
OB_CS_COMPILED|OB_CS_BINSORT,
|
||||
"gbk",
|
||||
"gbk_bin",
|
||||
"",
|
||||
NULL,
|
||||
NULL,
|
||||
ctype_gbk,
|
||||
to_lower_gbk,
|
||||
to_upper_gbk,
|
||||
NULL,
|
||||
NULL,
|
||||
&ob_caseinfo_gbk,
|
||||
NULL,
|
||||
NULL,
|
||||
1,
|
||||
1,
|
||||
1,
|
||||
1,
|
||||
2,
|
||||
0,
|
||||
0xFEFE,
|
||||
' ',
|
||||
1,
|
||||
1,
|
||||
1,
|
||||
&ob_charset_gbk_handler,
|
||||
&ob_collation_mb_bin_handler,
|
||||
PAD_SPACE
|
||||
};
|
File diff suppressed because it is too large
Load Diff
137
deps/oblib/src/lib/charset/ob_ctype_latin1.cc
vendored
Normal file
137
deps/oblib/src/lib/charset/ob_ctype_latin1.cc
vendored
Normal file
@ -0,0 +1,137 @@
|
||||
/**
|
||||
* Copyright (code) 2021 OceanBase
|
||||
* OceanBase CE is licensed under Mulan PubL v2.
|
||||
* You can use this software according to the terms and conditions of the Mulan PubL v2.
|
||||
* You may obtain a copy of Mulan PubL v2 at:
|
||||
* http://license.coscl.org.cn/MulanPubL-2.0
|
||||
* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND,
|
||||
* EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
|
||||
* MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
|
||||
* See the Mulan PubL v2 for more details.
|
||||
*/
|
||||
|
||||
#include "lib/charset/ob_mysql_global.h"
|
||||
#include "lib/charset/ob_ctype.h"
|
||||
#include "lib/utility/ob_macro_utils.h"
|
||||
#include "lib/charset/ob_ctype_latin1_tab.h"
|
||||
|
||||
static int ob_mb_wc_latin1(const ObCharsetInfo *cs __attribute__((unused)),
|
||||
ob_wc_t *pwc, const unsigned char *str,
|
||||
const unsigned char *end) {
|
||||
|
||||
if (str >= end) return OB_CS_TOOSMALL;
|
||||
*pwc = cs_to_uni[*str];
|
||||
return (!pwc[0] && str[0]) ? -1 : 1;
|
||||
|
||||
}
|
||||
|
||||
static int ob_wc_mb_latin1(const ObCharsetInfo *cs __attribute__((unused)),
|
||||
ob_wc_t wc, unsigned char *str, unsigned char *end) {
|
||||
const unsigned char *pl;
|
||||
|
||||
if (str >= end) return OB_CS_TOOSMALL;
|
||||
|
||||
if (wc > 0xFFFF) return OB_CS_ILUNI;
|
||||
|
||||
pl = uni_to_cs[wc >> 8];
|
||||
str[0] = pl ? pl[wc & 0xFF] : '\0';
|
||||
return (!str[0] && wc) ? OB_CS_ILUNI : 1;
|
||||
}
|
||||
|
||||
static ObCharsetHandler ob_charset_latin1_handler=
|
||||
{
|
||||
//NULL,
|
||||
NULL,
|
||||
ob_mbcharlen_8bit,
|
||||
ob_numchars_8bit,
|
||||
ob_charpos_8bit,
|
||||
ob_max_bytes_charpos_8bit,
|
||||
ob_well_formed_len_8bit,
|
||||
ob_lengthsp_binary,
|
||||
//ob_numcells_8bit,
|
||||
ob_mb_wc_latin1,
|
||||
ob_wc_mb_latin1,
|
||||
ob_mb_ctype_8bit,
|
||||
//ob_case_str_bin,
|
||||
//ob_case_str_bin,
|
||||
ob_caseup_8bit,
|
||||
ob_casedn_8bit,
|
||||
//ob_snprintf_8bit,
|
||||
//ob_long10_to_str_8bit,
|
||||
//ob_longlong10_to_str_8bit,
|
||||
ob_fill_8bit,
|
||||
ob_strntol_8bit,
|
||||
ob_strntoul_8bit,
|
||||
ob_strntoll_8bit,
|
||||
ob_strntoull_8bit,
|
||||
ob_strntod_8bit,
|
||||
//ob_strtoll10_8bit,
|
||||
ob_strntoull10rnd_8bit,
|
||||
ob_scan_8bit
|
||||
};
|
||||
|
||||
ObCharsetInfo ob_charset_latin1 = {
|
||||
8,0,0,
|
||||
OB_CS_COMPILED | OB_CS_PRIMARY,
|
||||
OB_LATIN1,
|
||||
OB_LATIN1_SWEDISH_CI,
|
||||
"cp1252 West European",
|
||||
NULL,
|
||||
NULL,
|
||||
ctype_latin1,
|
||||
to_lower_latin1,
|
||||
to_upper_latin1,
|
||||
sort_order_latin1,
|
||||
NULL,
|
||||
//NULL,
|
||||
//NULL,
|
||||
&ob_unicase_default,
|
||||
NULL,
|
||||
NULL,
|
||||
1,
|
||||
1,
|
||||
1,
|
||||
1,
|
||||
1,
|
||||
0,
|
||||
0xFF,
|
||||
' ',
|
||||
0,
|
||||
1,
|
||||
1,
|
||||
&ob_charset_latin1_handler,
|
||||
&ob_collation_8bit_simple_ci_handler,
|
||||
PAD_SPACE};
|
||||
|
||||
ObCharsetInfo ob_charset_latin1_bin = {
|
||||
47,0,0,
|
||||
OB_CS_COMPILED | OB_CS_BINSORT,
|
||||
OB_LATIN1,
|
||||
OB_LATIN1_BIN,
|
||||
"cp1252 West European",
|
||||
NULL,
|
||||
NULL,
|
||||
ctype_latin1,
|
||||
to_lower_latin1,
|
||||
to_upper_latin1,
|
||||
NULL,
|
||||
NULL,
|
||||
//NULL,
|
||||
//NULL,
|
||||
&ob_unicase_default,
|
||||
NULL,
|
||||
NULL,
|
||||
1,
|
||||
1,
|
||||
1,
|
||||
1,
|
||||
1,
|
||||
0,
|
||||
0xFF,
|
||||
' ',
|
||||
0,
|
||||
1,
|
||||
1,
|
||||
&ob_charset_latin1_handler,
|
||||
&ob_collation_8bit_bin_handler,
|
||||
PAD_SPACE};
|
@ -1,28 +1,14 @@
|
||||
/**
|
||||
* Copyright (c) 2021 OceanBase
|
||||
* OceanBase CE is licensed under Mulan PubL v2.
|
||||
* You can use this software according to the terms and conditions of the Mulan PubL v2.
|
||||
* You may obtain a copy of Mulan PubL v2 at:
|
||||
* http://license.coscl.org.cn/MulanPubL-2.0
|
||||
* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND,
|
||||
* EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
|
||||
* MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
|
||||
/**
|
||||
* Copyright (c) 2021 OceanBase
|
||||
* OceanBase CE is licensed under Mulan PubL v2.
|
||||
* You can use this software according to the terms and conditions of the Mulan PubL v2.
|
||||
* You may obtain a copy of Mulan PubL v2 at:
|
||||
* http://license.coscl.org.cn/MulanPubL-2.0
|
||||
* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND,
|
||||
* EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
|
||||
* MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
|
||||
* See the Mulan PubL v2 for more details.
|
||||
*/
|
||||
|
||||
/*
|
||||
*
|
||||
* Version: $Id
|
||||
*
|
||||
* Authors:
|
||||
* - initial release
|
||||
*
|
||||
*/
|
||||
#ifndef OB_BUILD_FULL_CHARSET
|
||||
|
||||
#include "lib/charset/ob_mysql_global.h"
|
||||
#include "lib/charset/ob_ctype.h"
|
||||
#include "lib/utility/ob_macro_utils.h"
|
||||
*/
|
||||
static unsigned char ctype_latin1[] = {
|
||||
0, 32, 32, 32, 32, 32, 32, 32, 32, 32, 40, 40, 40, 40, 40, 32,
|
||||
32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
|
||||
@ -281,133 +267,3 @@ static unsigned char *uni_to_cs[] = {
|
||||
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
|
||||
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
|
||||
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL};
|
||||
|
||||
|
||||
static int ob_mb_wc_latin1(const ObCharsetInfo *cs __attribute__((unused)),
|
||||
ob_wc_t *pwc, const unsigned char *str, const unsigned char *end) {
|
||||
|
||||
if (str >= end) return OB_CS_TOOSMALL;
|
||||
*pwc = cs_to_uni[*str];
|
||||
return (!pwc[0] && str[0]) ? -1 : 1;
|
||||
|
||||
}
|
||||
|
||||
static int ob_wc_mb_latin1(const ObCharsetInfo *cs __attribute__((unused)),
|
||||
ob_wc_t wc, unsigned char *str, unsigned char *end) {
|
||||
const unsigned char *pl;
|
||||
|
||||
if (str >= end) return OB_CS_TOOSMALL;
|
||||
|
||||
if (wc > 0xFFFF) return OB_CS_ILUNI;
|
||||
|
||||
pl = uni_to_cs[wc >> 8];
|
||||
str[0] = pl ? pl[wc & 0xFF] : '\0';
|
||||
return (!str[0] && wc) ? OB_CS_ILUNI : 1;
|
||||
}
|
||||
|
||||
|
||||
|
||||
static ObCharsetHandler ob_charset_latin1_handler=
|
||||
{
|
||||
//NULL, /* init */
|
||||
NULL, /* ismbchar */
|
||||
ob_mbcharlen_8bit, /* mbcharlen */
|
||||
ob_numchars_8bit,
|
||||
ob_charpos_8bit,
|
||||
ob_max_bytes_charpos_8bit,
|
||||
ob_well_formed_len_8bit,
|
||||
ob_lengthsp_binary,
|
||||
//ob_numcells_8bit,
|
||||
ob_mb_wc_latin1,
|
||||
ob_wc_mb_latin1,
|
||||
ob_mb_ctype_8bit,
|
||||
//ob_case_str_bin,
|
||||
//ob_case_str_bin,
|
||||
ob_caseup_8bit,
|
||||
ob_casedn_8bit,
|
||||
//ob_snprintf_8bit,
|
||||
//ob_long10_to_str_8bit,
|
||||
//ob_longlong10_to_str_8bit,
|
||||
ob_fill_8bit,
|
||||
ob_strntol_8bit,
|
||||
ob_strntoul_8bit,
|
||||
ob_strntoll_8bit,
|
||||
ob_strntoull_8bit,
|
||||
ob_strntod_8bit,
|
||||
//ob_strtoll10_8bit,
|
||||
ob_strntoull10rnd_8bit,
|
||||
ob_scan_8bit
|
||||
};
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
ObCharsetInfo ob_charset_latin1 = {
|
||||
8,0,0, /* number */
|
||||
OB_CS_COMPILED | OB_CS_PRIMARY, /* state */
|
||||
OB_LATIN1, /* cs name */
|
||||
OB_LATIN1_SWEDISH_CI, /* name */
|
||||
"cp1252 West European", /* comment */
|
||||
NULL, /* tailoring */
|
||||
NULL, /* coll_param */
|
||||
ctype_latin1,
|
||||
to_lower_latin1,
|
||||
to_upper_latin1,
|
||||
sort_order_latin1,
|
||||
NULL, /* uca */
|
||||
//NULL, /* tab_to_uni */
|
||||
//NULL, /* tab_from_uni */
|
||||
&ob_unicase_default, /* caseinfo */
|
||||
NULL, /* state_map */
|
||||
NULL, /* ident_map */
|
||||
1, /* strxfrm_multiply */
|
||||
1, /* caseup_multiply */
|
||||
1, /* casedn_multiply */
|
||||
1, /* mbminlen */
|
||||
1, /* mbmaxlen */
|
||||
0, /* min_sort_char */
|
||||
0xFF, /* max_sort_char */
|
||||
' ', /* pad char */
|
||||
0, /* escape_with_backslash_is_dangerous */
|
||||
1, /* levels_for_compare */
|
||||
1, /* levels_for_order */
|
||||
&ob_charset_latin1_handler,
|
||||
&ob_collation_8bit_simple_ci_handler,
|
||||
PAD_SPACE};
|
||||
|
||||
ObCharsetInfo ob_charset_latin1_bin = {
|
||||
47,0,0, /* number */
|
||||
OB_CS_COMPILED | OB_CS_BINSORT, /* state */
|
||||
OB_LATIN1, /* cs name */
|
||||
OB_LATIN1_BIN, /* name */
|
||||
"cp1252 West European", /* comment */
|
||||
NULL, /* tailoring */
|
||||
NULL, /* coll_param */
|
||||
ctype_latin1,
|
||||
to_lower_latin1,
|
||||
to_upper_latin1,
|
||||
NULL, /* sort_order */
|
||||
NULL, /* uca */
|
||||
//NULL, /* tab_to_uni */
|
||||
//NULL, /* tab_from_uni */
|
||||
&ob_unicase_default, /* caseinfo */
|
||||
NULL, /* state_map */
|
||||
NULL, /* ident_map */
|
||||
1, /* strxfrm_multiply */
|
||||
1, /* caseup_multiply */
|
||||
1, /* casedn_multiply */
|
||||
1, /* mbminlen */
|
||||
1, /* mbmaxlen */
|
||||
0, /* min_sort_char */
|
||||
0xFF, /* max_sort_char */
|
||||
' ', /* pad char */
|
||||
0, /* escape_with_backslash_is_dangerous */
|
||||
1, /* levels_for_compare */
|
||||
1, /* levels_for_order */
|
||||
&ob_charset_latin1_handler,
|
||||
&ob_collation_8bit_bin_handler,
|
||||
PAD_SPACE};
|
||||
|
||||
|
||||
#endif
|
@ -10,14 +10,8 @@
|
||||
* See the Mulan PubL v2 for more details.
|
||||
*/
|
||||
|
||||
/*
|
||||
* (C) 2017-2020 Alibaba Group Holding Limited.
|
||||
*
|
||||
* Authors:
|
||||
*/
|
||||
#ifndef OB_BUILD_FULL_CHARSET
|
||||
|
||||
#include "lib/charset/ob_ctype.h"
|
||||
#include "lib/charset/str_uca_type.h"
|
||||
|
||||
static void __attribute__ ((noinline)) pad_max_char_help(char *str, char *end, char *buf, char buf_len)
|
||||
{
|
||||
@ -78,11 +72,11 @@ bool ob_like_range_mb_help(const ObCharsetInfo *cs,
|
||||
}
|
||||
|
||||
bool ob_like_range_mb(const ObCharsetInfo *cs,
|
||||
const char *ptr,size_t ptr_length,
|
||||
pbool escape_char, pbool w_one, pbool w_many,
|
||||
size_t res_length,
|
||||
char *min_str,char *max_str,
|
||||
size_t *min_length,size_t *max_length)
|
||||
const char *ptr,size_t ptr_length,
|
||||
pbool escape_char, pbool w_one, pbool w_many,
|
||||
size_t res_length,
|
||||
char *min_str,char *max_str,
|
||||
size_t *min_length,size_t *max_length)
|
||||
{
|
||||
unsigned int mb_len;
|
||||
const char *end= ptr + ptr_length;
|
||||
@ -95,8 +89,7 @@ bool ob_like_range_mb(const ObCharsetInfo *cs,
|
||||
for (; ptr != end && min_str != min_end && max_char_len ; max_char_len--) {
|
||||
if (*ptr == escape_char && ptr+1 != end) {
|
||||
ptr++;
|
||||
} else if (*ptr == w_one ||
|
||||
*ptr == w_many) {
|
||||
} else if (*ptr == w_one || *ptr == w_many) {
|
||||
return ob_like_range_mb_help(cs,res_length, &min_str,&max_str, &min_org, &min_end, min_length, max_length, &max_end);
|
||||
}
|
||||
mb_len= ob_ismbchar(cs, ptr, end);
|
||||
@ -114,7 +107,7 @@ bool ob_like_range_mb(const ObCharsetInfo *cs,
|
||||
if (ptr[1] == w_one || ptr[1] == w_many) {
|
||||
return ob_like_range_mb_help(cs,res_length, &min_str,&max_str, &min_org, &min_end, min_length, max_length, &max_end);
|
||||
} else if (ob_uca_can_be_contraction_tail(contractions, (unsigned char) ptr[1]) &&
|
||||
ob_uca_contraction2_weight(contractions, (unsigned char) ptr[0], ptr[1])) {
|
||||
ob_uca_contraction2_weight(contractions, (unsigned char) ptr[0], ptr[1])) {
|
||||
if (max_char_len == 1 || min_str + 1 >= min_end) {
|
||||
return ob_like_range_mb_help(cs,res_length, &min_str,&max_str, &min_org, &min_end, min_length, max_length, &max_end);
|
||||
}
|
||||
@ -150,7 +143,7 @@ int ob_wildcmp_mb_impl(const ObCharsetInfo *cs,
|
||||
const char *wild_str,const char *wild_end,
|
||||
int escape_char, int w_one, int w_many, int recurse_level)
|
||||
{
|
||||
int result= -1;
|
||||
int result= -1;
|
||||
while (wild_str != wild_end) {
|
||||
while ((*wild_str == escape_char) || (*wild_str != w_many && *wild_str != w_one)) {
|
||||
int l;
|
||||
@ -158,59 +151,56 @@ int ob_wildcmp_mb_impl(const ObCharsetInfo *cs,
|
||||
wild_str++;
|
||||
}
|
||||
if ((l = ob_ismbchar(cs, wild_str, wild_end))) {
|
||||
if (str+l > str_end || memcmp(str, wild_str, l) != 0)
|
||||
return 1;
|
||||
str += l;
|
||||
wild_str += l;
|
||||
if (str+l > str_end || memcmp(str, wild_str, l) != 0)
|
||||
return 1;
|
||||
str += l;
|
||||
wild_str += l;
|
||||
} else if (str == str_end || likeconv(cs,*wild_str++) != likeconv(cs,*str++)) {
|
||||
return(1);
|
||||
return(1);
|
||||
}
|
||||
if (wild_str == wild_end) {
|
||||
return (str != str_end);
|
||||
}
|
||||
result=1;
|
||||
return (str != str_end);
|
||||
}
|
||||
result=1;
|
||||
}
|
||||
if (*wild_str == w_one) {
|
||||
do {
|
||||
if (str == str_end) {
|
||||
return (result);
|
||||
}
|
||||
INC_PTR(cs,str,str_end);
|
||||
INC_PTR(cs,str,str_end);
|
||||
} while (++wild_str < wild_end && *wild_str == w_one);
|
||||
if (wild_end == wild_str)
|
||||
break;
|
||||
break;
|
||||
}
|
||||
if (*wild_str == w_many) {
|
||||
if (*wild_str == w_many) {
|
||||
unsigned char cmp;
|
||||
const char* mb = wild_str;
|
||||
int mb_len=0;
|
||||
|
||||
wild_str++;
|
||||
|
||||
for (; wild_str != wild_end ; wild_str++)
|
||||
{
|
||||
if (*wild_str == w_many)
|
||||
continue;
|
||||
if (*wild_str == w_one)
|
||||
{
|
||||
if (*wild_str == w_one) {
|
||||
if (str == str_end)
|
||||
return (-1);
|
||||
INC_PTR(cs,str,str_end);
|
||||
continue;
|
||||
}
|
||||
break;
|
||||
break;
|
||||
}
|
||||
if (wild_str == wild_end) {
|
||||
return(0);
|
||||
return(0);
|
||||
} else if (str == str_end) {
|
||||
return -1;
|
||||
return -1;
|
||||
} else if ((cmp= *wild_str) == escape_char && wild_str+1 != wild_end) {
|
||||
cmp= *++wild_str;
|
||||
cmp= *++wild_str;
|
||||
}
|
||||
|
||||
mb=wild_str;
|
||||
mb_len= ob_ismbchar(cs, wild_str, wild_end);
|
||||
INC_PTR(cs,wild_str,wild_end);
|
||||
INC_PTR(cs,wild_str,wild_end);
|
||||
cmp=likeconv(cs,cmp);
|
||||
while (true) {
|
||||
while (TRUE) {
|
||||
@ -232,7 +222,7 @@ int ob_wildcmp_mb_impl(const ObCharsetInfo *cs,
|
||||
{
|
||||
int tmp=ob_wildcmp_mb_impl(cs,str,str_end,
|
||||
wild_str,wild_end,escape_char,w_one,
|
||||
w_many, recurse_level + 1);
|
||||
w_many, recurse_level + 1);
|
||||
if (tmp <= 0)
|
||||
return (tmp);
|
||||
}
|
||||
@ -240,7 +230,7 @@ int ob_wildcmp_mb_impl(const ObCharsetInfo *cs,
|
||||
return -1;
|
||||
} else if (wild_str != wild_end && wild_str[0] == w_many) {
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
}
|
||||
return(-1);
|
||||
}
|
||||
@ -256,7 +246,7 @@ unsigned int __attribute__ ((noinline)) ob_instr_mb_help(size_t s_length, ob_mat
|
||||
match->end= 0;
|
||||
match->mb_len= 0;
|
||||
}
|
||||
return 1;
|
||||
return 1;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
@ -277,8 +267,8 @@ unsigned int ob_instr_mb(const ObCharsetInfo *cs,
|
||||
end= b+b_length-s_length+1;
|
||||
while (b < end) {
|
||||
int mb_len;
|
||||
if (!cs->coll->strnncoll(cs, (unsigned char*) b, s_length,
|
||||
(unsigned char*) s, s_length, 0)) {
|
||||
if (!cs->coll->strnncoll(cs, (unsigned char*) b, s_length,
|
||||
(unsigned char*) s, s_length, 0)) {
|
||||
if (nmatch) {
|
||||
match[0].beg= 0;
|
||||
match[0].end= (size_t) (b-b0);
|
||||
@ -286,7 +276,7 @@ unsigned int ob_instr_mb(const ObCharsetInfo *cs,
|
||||
if (nmatch > 1) {
|
||||
match[1].beg= match[0].end;
|
||||
match[1].end= match[0].end+s_length;
|
||||
match[1].mb_len= 0;
|
||||
match[1].mb_len= 0;
|
||||
}
|
||||
}
|
||||
return 2;
|
||||
@ -349,7 +339,7 @@ size_t ob_max_bytes_charpos_mb(const ObCharsetInfo *cs __attribute__((unused)),
|
||||
}
|
||||
|
||||
int ob_mb_ctype_mb(const ObCharsetInfo *cs __attribute__((unused)), int *ctype,
|
||||
const unsigned char *s, const unsigned char *e)
|
||||
const unsigned char *s, const unsigned char *e)
|
||||
{
|
||||
ob_wc_t wc;
|
||||
int res = cs->cset->mb_wc(cs, &wc, s, e);
|
||||
@ -439,7 +429,7 @@ size_t ob_lengthsp_8bit(const ObCharsetInfo *cs __attribute__((unused)),
|
||||
const char *ptr, size_t length)
|
||||
{
|
||||
const char *end;
|
||||
end= (const char *) skip_trailing_space((const uchar *)ptr, length, 0);
|
||||
end= (const char *) skip_trailing_space((const unsigned char *)ptr, length, 0);
|
||||
return (size_t) (end-ptr);
|
||||
}
|
||||
|
||||
@ -470,18 +460,18 @@ int __attribute__ ((noinline)) ob_strnncollsp_mb_bin_help(
|
||||
res= 1;
|
||||
}
|
||||
if (a_length < b_length) {
|
||||
a_length= b_length;
|
||||
a= b;
|
||||
a_length= b_length;
|
||||
a= b;
|
||||
swap= -1;
|
||||
res= -res;
|
||||
res= -res;
|
||||
}
|
||||
for (end= a + a_length-length; a < end ; a++) {
|
||||
if (*a != ' ') {
|
||||
*has_returned = 1;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
if (*a != ' ') {
|
||||
*has_returned = 1;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
*a_ = a;
|
||||
*b_ = b;
|
||||
*end_ = end;
|
||||
@ -510,10 +500,10 @@ int ob_strnncollsp_mb_bin(const ObCharsetInfo *cs __attribute__((unused)),
|
||||
res= 0;
|
||||
int has_returned = 0;
|
||||
int tmp = ob_strnncollsp_mb_bin_help(
|
||||
&a, a_length,
|
||||
&b, b_length,
|
||||
&end,
|
||||
diff_if_only_endspace_difference, &has_returned, &res, length);
|
||||
&a, a_length,
|
||||
&b, b_length,
|
||||
&end,
|
||||
diff_if_only_endspace_difference, &has_returned, &res, length);
|
||||
return has_returned == 1 ? tmp : res;
|
||||
}
|
||||
|
||||
@ -585,7 +575,7 @@ size_t ob_strnxfrm_mb(const ObCharsetInfo *cs,
|
||||
pad:
|
||||
return ob_strxfrm_pad_desc_and_reverse(cs, d0, dst, de, nweights, flags, 0);
|
||||
}
|
||||
|
||||
|
||||
|
||||
#define INC_PTR(cs,A,B) A+=(ob_ismbchar(cs,A,B) ? ob_ismbchar(cs,A,B) : 1)
|
||||
|
||||
@ -636,7 +626,7 @@ static int ob_wildcmp_mb_bin_impl(const ObCharsetInfo *cs, const char *str,
|
||||
const char *str_end, const char *wild_str, const char *wild_end, int escape_char,
|
||||
int w_one, int w_many, int recurse_level)
|
||||
{
|
||||
int result = -1;
|
||||
int result = -1;
|
||||
while (wild_str != wild_end) {
|
||||
int has_returned = 0;
|
||||
int tmp = ob_wildcmp_mb_bin_impl_help(cs, &str,&str_end, &wild_str, &wild_end, escape_char,w_one,w_many, &result, &has_returned);
|
||||
@ -644,7 +634,7 @@ static int ob_wildcmp_mb_bin_impl(const ObCharsetInfo *cs, const char *str,
|
||||
return tmp;
|
||||
} else if (*wild_str == w_one) {
|
||||
do {
|
||||
if (str == str_end) {
|
||||
if (str == str_end) {
|
||||
return (result);
|
||||
} else {
|
||||
INC_PTR(cs, str, str_end);
|
||||
@ -654,13 +644,11 @@ static int ob_wildcmp_mb_bin_impl(const ObCharsetInfo *cs, const char *str,
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (*wild_str == w_many) {
|
||||
if (*wild_str == w_many) {
|
||||
unsigned char cmp;
|
||||
const char* mb = wild_str;
|
||||
int mb_len = 0;
|
||||
|
||||
wild_str++;
|
||||
|
||||
for (; wild_str != wild_end; wild_str++) {
|
||||
if (*wild_str == w_many) {
|
||||
continue;
|
||||
@ -673,7 +661,7 @@ static int ob_wildcmp_mb_bin_impl(const ObCharsetInfo *cs, const char *str,
|
||||
}
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
if (wild_str == wild_end) {
|
||||
return (0);
|
||||
@ -749,8 +737,6 @@ void ob_hash_sort_mb_bin(const ObCharsetInfo *cs __attribute__((unused)),
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
ObCollationHandler ob_collation_mb_bin_handler = {
|
||||
NULL,
|
||||
NULL,
|
||||
@ -770,5 +756,3 @@ ObCollationHandler ob_collation_mb_bin_handler = {
|
||||
|
||||
#undef INC_PTR
|
||||
#undef likeconv
|
||||
|
||||
#endif
|
@ -1,3 +1,4 @@
|
||||
|
||||
/**
|
||||
* Copyright (c) 2021 OceanBase
|
||||
* OceanBase CE is licensed under Mulan PubL v2.
|
||||
@ -9,8 +10,7 @@
|
||||
* MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
|
||||
* See the Mulan PubL v2 for more details.
|
||||
*/
|
||||
|
||||
/*
|
||||
/*
|
||||
*
|
||||
* Version: $Id
|
||||
*
|
||||
@ -18,7 +18,6 @@
|
||||
* - initial release
|
||||
*
|
||||
*/
|
||||
#ifndef OB_BUILD_FULL_CHARSET
|
||||
|
||||
#include "lib/charset/ob_ctype.h"
|
||||
#include "lib/charset/ob_dtoa.h"
|
||||
@ -53,14 +52,14 @@ static ulonglong d10[DIGITS_IN_ULONGLONG]=
|
||||
};
|
||||
|
||||
long ob_strntol_8bit(const ObCharsetInfo *cs,
|
||||
const char *nptr, size_t l, int base,
|
||||
char **end_ptr, int *err)
|
||||
const char *nptr, size_t l, int base,
|
||||
char **end_ptr, int *err)
|
||||
{
|
||||
|
||||
const char *save, *s = nptr, *e = nptr+l;
|
||||
unsigned char c;
|
||||
unsigned int cut_lim;
|
||||
*err= 0;
|
||||
*err= 0;
|
||||
uint32 cut_off;
|
||||
while (s<e && ob_isspace(cs, *s)) {
|
||||
s++;
|
||||
@ -115,7 +114,7 @@ long ob_strntol_8bit(const ObCharsetInfo *cs,
|
||||
if (neg) {
|
||||
if (i > (uint32) INT_MIN32) {
|
||||
overflow = 1;
|
||||
}
|
||||
}
|
||||
} else if (i > INT_MAX32) {
|
||||
overflow = 1;
|
||||
}
|
||||
@ -137,8 +136,8 @@ NO_CONV:
|
||||
|
||||
|
||||
ulong ob_strntoul_8bit(const ObCharsetInfo *cs,
|
||||
const char *nptr, size_t l, int base,
|
||||
char **end_ptr, int *err)
|
||||
const char *nptr, size_t l, int base,
|
||||
char **end_ptr, int *err)
|
||||
{
|
||||
int neg;
|
||||
unsigned char c;
|
||||
@ -146,7 +145,7 @@ ulong ob_strntoul_8bit(const ObCharsetInfo *cs,
|
||||
uint32 cut_off;
|
||||
unsigned int cut_lim;
|
||||
|
||||
*err= 0;
|
||||
*err= 0;
|
||||
|
||||
while (s<e && ob_isspace(cs, *s)) {
|
||||
s++;
|
||||
@ -217,13 +216,13 @@ NO_CONV:
|
||||
|
||||
|
||||
longlong ob_strntoll_8bit(const ObCharsetInfo *cs __attribute__((unused)),
|
||||
const char *nptr, size_t l, int base,
|
||||
char **end_ptr,int *err)
|
||||
const char *nptr, size_t l, int base,
|
||||
char **end_ptr,int *err)
|
||||
{
|
||||
ulonglong cut_off;
|
||||
unsigned int cut_lim;
|
||||
const char *s = nptr, *e = nptr+l, *save;
|
||||
*err= 0;
|
||||
*err= 0;
|
||||
|
||||
while (s<e && ob_isspace(cs,*s)) {
|
||||
s++;
|
||||
@ -302,14 +301,14 @@ NO_CONV:
|
||||
|
||||
|
||||
ulonglong ob_strntoull_8bit(const ObCharsetInfo *cs,
|
||||
const char *nptr, size_t l, int base,
|
||||
char **end_ptr, int *err)
|
||||
const char *nptr, size_t l, int base,
|
||||
char **end_ptr, int *err)
|
||||
{
|
||||
|
||||
ulonglong cut_off;
|
||||
unsigned int cut_lim;
|
||||
const char *s = nptr, *e = nptr + l, *save;
|
||||
*err= 0;
|
||||
*err= 0;
|
||||
|
||||
while (s<e && ob_isspace(cs,*s)) {
|
||||
s++;
|
||||
@ -386,8 +385,8 @@ NO_CONV:
|
||||
|
||||
|
||||
double ob_strntod_8bit(const ObCharsetInfo *cs __attribute__((unused)),
|
||||
char *str, size_t len,
|
||||
char **end, int *err)
|
||||
char *str, size_t len,
|
||||
char **end, int *err)
|
||||
{
|
||||
if (len == INT_MAX32) {
|
||||
len= 65535;
|
||||
@ -421,7 +420,7 @@ ob_strntoull10rnd_8bit(const ObCharsetInfo *cs __attribute__((unused)),
|
||||
|
||||
beg= str;
|
||||
end9= (str + 9) > end ? end : (str + 9);
|
||||
|
||||
|
||||
for (ul= 0 ; str < end9 && (ch= (unsigned char) (*str - '0')) < 10; str++) {
|
||||
ul= ul * 10 + ch;
|
||||
}
|
||||
@ -583,7 +582,6 @@ RET_SIGN:
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
if (neg && ull) {
|
||||
*err= OB_ERRNO_ERANGE;
|
||||
return 0;
|
||||
@ -611,7 +609,7 @@ RET_TOO_LARGE:
|
||||
}
|
||||
|
||||
void ob_strxfrm_desc_and_reverse(unsigned char *str, unsigned char *str_end,
|
||||
unsigned int flags, unsigned int level)
|
||||
unsigned int flags, unsigned int level)
|
||||
{
|
||||
if (flags & (OB_STRXFRM_DESC_LEVEL1 << level)) {
|
||||
if (flags & (OB_STRXFRM_REVERSE_LEVEL1 << level)) {
|
||||
@ -660,8 +658,6 @@ size_t ob_scan_8bit(const ObCharsetInfo *cs, const char *str, const char *end,
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
size_t ob_strxfrm_pad_desc_and_reverse(const ObCharsetInfo *cs,
|
||||
unsigned char *str, unsigned char *frm_end, unsigned char *str_end,
|
||||
unsigned int nweights, unsigned int flags, unsigned int level)
|
||||
@ -686,11 +682,11 @@ size_t ob_strnxfrmlen_simple(const ObCharsetInfo *cs, size_t len)
|
||||
}
|
||||
|
||||
bool ob_like_range_simple(const ObCharsetInfo *cs,
|
||||
const char *ptr, size_t ptr_len,
|
||||
pbool escape_char, pbool w_one, pbool w_many,
|
||||
size_t res_len,
|
||||
char *min_str,char *max_str,
|
||||
size_t *min_len, size_t *max_len)
|
||||
const char *ptr, size_t ptr_len,
|
||||
pbool escape_char, pbool w_one, pbool w_many,
|
||||
size_t res_len,
|
||||
char *min_str,char *max_str,
|
||||
size_t *min_len, size_t *max_len)
|
||||
{
|
||||
const char *end= ptr + ptr_len;
|
||||
char *min_org=min_str;
|
||||
@ -699,11 +695,11 @@ bool ob_like_range_simple(const ObCharsetInfo *cs,
|
||||
|
||||
for (; ptr != end && min_str != min_end && charlen > 0 ; ptr++, charlen--) {
|
||||
if (*ptr == escape_char && ptr+1 != end) {
|
||||
ptr++;
|
||||
ptr++;
|
||||
*min_str++= *max_str++ = *ptr;
|
||||
continue;
|
||||
} else if (*ptr == w_one) {
|
||||
*min_str++='\0';
|
||||
*min_str++='\0';
|
||||
*max_str++= (char) cs->max_sort_char;
|
||||
continue;
|
||||
} else if (*ptr == w_many) {
|
||||
@ -742,7 +738,7 @@ bool ob_propagate_complex(const ObCharsetInfo *cs __attribute__((unused)),
|
||||
}
|
||||
|
||||
void ob_fill_8bit(const ObCharsetInfo *cs __attribute__((unused)),
|
||||
char *s, size_t l, int fill)
|
||||
char *s, size_t l, int fill)
|
||||
{
|
||||
memset(s, fill, l);
|
||||
}
|
||||
@ -758,9 +754,9 @@ int64_t ob_strntoull(const char *ptr, size_t len, int base, char **end, int *err
|
||||
}
|
||||
|
||||
void ob_hash_sort_simple(const ObCharsetInfo *cs,
|
||||
const unsigned char *key, size_t len,
|
||||
unsigned long int *nr1, unsigned long int *nr2,
|
||||
const bool calc_end_space, hash_algo hash_algo)
|
||||
const unsigned char *key, size_t len,
|
||||
unsigned long int *nr1, unsigned long int *nr2,
|
||||
const bool calc_end_space, hash_algo hash_algo)
|
||||
{
|
||||
unsigned char *sort_order=cs->sort_order;
|
||||
const unsigned char *end;
|
||||
@ -788,7 +784,7 @@ void ob_hash_sort_simple(const ObCharsetInfo *cs,
|
||||
|
||||
#define SPACE_INT 0x20202020
|
||||
|
||||
const uchar *skip_trailing_space(const uchar *ptr,size_t len, bool is_utf16 /*false*/)
|
||||
const unsigned char *skip_trailing_space(const unsigned char *ptr,size_t len, bool is_utf16 /*false*/)
|
||||
{
|
||||
const unsigned char *end= ptr + len;
|
||||
if (len > 20 && !is_utf16) {
|
||||
@ -853,9 +849,9 @@ size_t ob_casedn_8bit(const ObCharsetInfo *cs __attribute__((unused)),
|
||||
}
|
||||
|
||||
int ob_strnncoll_simple(const ObCharsetInfo *cs __attribute__((unused)),
|
||||
const uchar *s, size_t slen,
|
||||
const uchar *t, size_t tlen,
|
||||
bool is_prefix)
|
||||
const unsigned char *s, size_t slen,
|
||||
const unsigned char *t, size_t tlen,
|
||||
bool is_prefix)
|
||||
{
|
||||
size_t len = (slen > tlen) ? tlen : slen;
|
||||
if (is_prefix && slen > tlen) slen = tlen;
|
||||
@ -871,18 +867,18 @@ int ob_strnncoll_simple(const ObCharsetInfo *cs __attribute__((unused)),
|
||||
|
||||
static int ob_strnncollsp_simple(const ObCharsetInfo *cs
|
||||
__attribute__((unused)),
|
||||
const uchar *s, size_t slen,
|
||||
const uchar *t, size_t tlen,
|
||||
const unsigned char *s, size_t slen,
|
||||
const unsigned char *t, size_t tlen,
|
||||
bool diff_if_only_endspace_difference
|
||||
__attribute__((unused)))
|
||||
{
|
||||
size_t len = (slen > tlen) ? tlen : slen;
|
||||
for (size_t i = 0; i < len; i++){
|
||||
if(ob_sort_order(cs,*s)!=ob_sort_order(cs,*t)) {
|
||||
return (int)ob_sort_order(cs,*s) - (int)ob_sort_order(cs,*t);
|
||||
}
|
||||
s++;
|
||||
t++;
|
||||
if(ob_sort_order(cs,*s)!=ob_sort_order(cs,*t)) {
|
||||
return (int)ob_sort_order(cs,*s) - (int)ob_sort_order(cs,*t);
|
||||
}
|
||||
s++;
|
||||
t++;
|
||||
}
|
||||
int res = 0;
|
||||
if (slen != tlen) {
|
||||
@ -896,7 +892,6 @@ static int ob_strnncollsp_simple(const ObCharsetInfo *cs
|
||||
*/
|
||||
if (slen < tlen) {
|
||||
slen = tlen;
|
||||
|
||||
s = t;
|
||||
swap = -1;
|
||||
res = -res;
|
||||
@ -914,14 +909,12 @@ static int ob_strnncollsp_simple(const ObCharsetInfo *cs
|
||||
return res;
|
||||
}
|
||||
|
||||
|
||||
|
||||
static size_t ob_strnxfrm_simple(const ObCharsetInfo* cs __attribute__((unused)), unsigned char* dst, size_t dstlen,
|
||||
uint nweights, const unsigned char* src, size_t srclen, unsigned int flags, bool* is_valid_unicode)
|
||||
unsigned int nweights, const unsigned char* src, size_t srclen, unsigned int flags, bool* is_valid_unicode)
|
||||
{
|
||||
uchar *dst0 = dst;
|
||||
const uchar *end;
|
||||
const uchar *remainder;
|
||||
unsigned char *dst0 = dst;
|
||||
const unsigned char *end;
|
||||
const unsigned char *remainder;
|
||||
size_t frmlen;
|
||||
frmlen = dstlen > nweights ? nweights : dstlen;
|
||||
frmlen = frmlen > srclen ? srclen : frmlen;
|
||||
@ -1030,9 +1023,8 @@ int ob_wildcmp_8bit(const ObCharsetInfo* cs, const char* str, const char* str_en
|
||||
return ob_wildcmp_8bit_impl(cs, str, str_end, wildstr, wildend, escape, w_one, w_many, 1);
|
||||
}
|
||||
|
||||
|
||||
uint32_t ob_instr_simple(const ObCharsetInfo* cs , const char* b, size_t b_length,
|
||||
const char* s, size_t s_length, ob_match_t* match, uint nmatch)
|
||||
const char* s, size_t s_length, ob_match_t* match, unsigned int nmatch)
|
||||
{
|
||||
register const unsigned char *str, *search, *end, *search_end;
|
||||
|
||||
@ -1081,8 +1073,6 @@ uint32_t ob_instr_simple(const ObCharsetInfo* cs , const char* b, size_t b_lengt
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
|
||||
ObCollationHandler ob_collation_8bit_simple_ci_handler = {
|
||||
NULL, /* init */
|
||||
NULL,
|
||||
@ -1099,6 +1089,4 @@ ObCollationHandler ob_collation_8bit_simple_ci_handler = {
|
||||
ob_propagate_simple};
|
||||
|
||||
#undef likeconv
|
||||
#undef INC_PTR
|
||||
|
||||
#endif
|
||||
#undef INC_PTR
|
3153
deps/oblib/src/lib/charset/ob_ctype_uca.cc
vendored
Normal file
3153
deps/oblib/src/lib/charset/ob_ctype_uca.cc
vendored
Normal file
File diff suppressed because it is too large
Load Diff
19527
deps/oblib/src/lib/charset/ob_ctype_uca_tab.h
vendored
Normal file
19527
deps/oblib/src/lib/charset/ob_ctype_uca_tab.h
vendored
Normal file
File diff suppressed because it is too large
Load Diff
@ -1,3 +1,4 @@
|
||||
|
||||
/**
|
||||
* Copyright (c) 2021 OceanBase
|
||||
* OceanBase CE is licensed under Mulan PubL v2.
|
||||
@ -10,21 +11,16 @@
|
||||
* See the Mulan PubL v2 for more details.
|
||||
*/
|
||||
|
||||
/*
|
||||
* (C) 2017-2020 Alibaba Group Holding Limited.
|
||||
*
|
||||
* Authors:
|
||||
*/
|
||||
#ifndef OB_BUILD_FULL_CHARSET
|
||||
|
||||
#include "lib/charset/ob_ctype.h"
|
||||
#include "lib/charset/str_uca_type.h"
|
||||
#include "lib/charset/ob_dtoa.h"
|
||||
#include "lib/charset/ob_template_helper.h"
|
||||
|
||||
#define OB_UTF16_HIGH_HEAD(x) ((((unsigned char) (x)) & 0xFC) == 0xD8)
|
||||
#define OB_UTF16_LOW_HEAD(x) ((((unsigned char) (x)) & 0xFC) == 0xDC)
|
||||
#define OB_UTF16_HIGH_HEAD(x) ((((uchar) (x)) & 0xFC) == 0xD8)
|
||||
#define OB_UTF16_LOW_HEAD(x) ((((uchar) (x)) & 0xFC) == 0xDC)
|
||||
#define OB_UTF16_SURROGATE(x) (((x) & 0xF800) == 0xD800)
|
||||
|
||||
#define OB_UTF16_WC2(a, begin) ((a << 8) + begin)
|
||||
#define OB_UTF16_WC2(a, b) ((a << 8) + b)
|
||||
|
||||
static inline int
|
||||
ob_bincmp(const unsigned char *str, const unsigned char *se,
|
||||
@ -140,7 +136,7 @@ ob_utf16_uni(const ObCharsetInfo *cs __attribute__((unused)),
|
||||
} else {
|
||||
*pwc= OB_UTF16_WC4(str[0], str[1], str[2], str[3]);
|
||||
return 4;
|
||||
}
|
||||
}
|
||||
} else if (OB_UTF16_LOW_HEAD(*str)) {
|
||||
return OB_CS_ILSEQ;
|
||||
} else {
|
||||
@ -162,7 +158,7 @@ ob_uni_utf16(const ObCharsetInfo *cs __attribute__((unused)),
|
||||
*str++= (unsigned char) (wc >> 8);
|
||||
*str= (unsigned char) (wc & 0xFF);
|
||||
return 2;
|
||||
}
|
||||
}
|
||||
} else if (wc <= 0x10FFFF) {
|
||||
if (4 > end - str) {
|
||||
return OB_CS_TOOSMALL4;
|
||||
@ -295,7 +291,7 @@ ob_strntol_mb2_or_mb4(const ObCharsetInfo *cs,
|
||||
//do nothing
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
if (end_ptr != NULL) *end_ptr= (char*) str;
|
||||
err[0]= (cnv==OB_CS_ILSEQ) ? EILSEQ : EDOM;
|
||||
@ -393,7 +389,7 @@ ob_strntoul_mb2_or_mb4(const ObCharsetInfo *cs,
|
||||
//do nothing
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
if (NULL != end_ptr) {
|
||||
*end_ptr= (char*)str;
|
||||
@ -456,7 +452,7 @@ ob_strntoul_mb2_or_mb4(const ObCharsetInfo *cs,
|
||||
return (negative ? -((long) res) : (long) res);
|
||||
}
|
||||
|
||||
static longlong
|
||||
static longlong
|
||||
ob_strntoll_mb2_or_mb4(const ObCharsetInfo *cs,
|
||||
const char *nptr, size_t l, int base,
|
||||
char **end_ptr, int *err)
|
||||
@ -546,7 +542,7 @@ ob_strntoll_mb2_or_mb4(const ObCharsetInfo *cs,
|
||||
if (negative) {
|
||||
if (res > (uint64_t) LONGLONG_MIN) {
|
||||
overflow = 1;
|
||||
}
|
||||
}
|
||||
} else if (res > (uint64_t) LONGLONG_MAX) {
|
||||
overflow = 1;
|
||||
}
|
||||
@ -904,8 +900,8 @@ ob_strnncollsp_utf16(const ObCharsetInfo *cs,
|
||||
if (s_res <= 0 || t_res <= 0) {
|
||||
return ob_bincmp(str, se, t, te);
|
||||
} else {
|
||||
ob_tosort_utf16(uni_plane, &s_wc);
|
||||
ob_tosort_utf16(uni_plane, &t_wc);
|
||||
ob_tosort_utf16(uni_plane, &s_wc);
|
||||
ob_tosort_utf16(uni_plane, &t_wc);
|
||||
}
|
||||
if (s_wc != t_wc) {
|
||||
return s_wc > t_wc ? 1 : -1;
|
||||
@ -1097,9 +1093,9 @@ ob_like_range_generic(const ObCharsetInfo *cs,
|
||||
} else {
|
||||
max_str+= res;
|
||||
wc= wc2;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
res= cs->cset->wc_mb(cs, wc, (unsigned char*) min_str, (unsigned char*) min_end);
|
||||
if (res <= 0) {
|
||||
goto PAD_SET_LEN;
|
||||
@ -1123,7 +1119,7 @@ PAD_MIN_MAX:
|
||||
res_length_diff= res_length % cs->mbminlen;
|
||||
cs->cset->fill(cs, min_str, min_end - min_str - res_length_diff, cs->min_sort_char);
|
||||
cs->cset->fill(cs, max_str, max_end - max_str - res_length_diff, cs->max_sort_char);
|
||||
|
||||
|
||||
if (res_length_diff != 0) {
|
||||
memset(min_end - res_length_diff, 0, res_length_diff);
|
||||
memset(max_end - res_length_diff, 0, res_length_diff);
|
||||
@ -1254,5 +1250,3 @@ ObCharsetInfo ob_charset_utf16_general_ci=
|
||||
&ob_collation_utf16_general_ci_handler,
|
||||
PAD_SPACE
|
||||
};
|
||||
|
||||
#endif
|
1069
deps/oblib/src/lib/charset/ob_ctype_utf8.cc
vendored
Normal file
1069
deps/oblib/src/lib/charset/ob_ctype_utf8.cc
vendored
Normal file
File diff suppressed because it is too large
Load Diff
2584
deps/oblib/src/lib/charset/ob_ctype_utf8_os.cc
vendored
2584
deps/oblib/src/lib/charset/ob_ctype_utf8_os.cc
vendored
File diff suppressed because it is too large
Load Diff
4463
deps/oblib/src/lib/charset/ob_ctype_utf8_tab.h
vendored
Normal file
4463
deps/oblib/src/lib/charset/ob_ctype_utf8_tab.h
vendored
Normal file
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
@ -1,3 +1,4 @@
|
||||
|
||||
/**
|
||||
* Copyright (c) 2021 OceanBase
|
||||
* OceanBase CE is licensed under Mulan PubL v2.
|
||||
@ -8,19 +9,8 @@
|
||||
* EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
|
||||
* MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
|
||||
* See the Mulan PubL v2 for more details.
|
||||
*/
|
||||
|
||||
/*
|
||||
*
|
||||
* Version: $Id
|
||||
*
|
||||
* Authors:
|
||||
* - initial release
|
||||
*
|
||||
*/
|
||||
|
||||
#ifndef OB_BUILD_FULL_CHARSET
|
||||
|
||||
#include "lib/charset/ob_dtoa.h"
|
||||
#include "lib/charset/ob_mysql_global.h"
|
||||
|
||||
@ -48,51 +38,6 @@ size_t ob_fcvt_overflow(char *to, bool *error)
|
||||
return 1;
|
||||
}
|
||||
|
||||
|
||||
size_t ob_fcvt(double x, int precision, int width, char *to, bool *error)
|
||||
{
|
||||
int decpt, sign;
|
||||
char *res, *end, *dst= to, *dend= to + width;
|
||||
char buf[DTOA_BUF_MAX_SIZE];
|
||||
if (!(precision >= 0 && precision < 31 && to != NULL)) {
|
||||
return 0;
|
||||
}
|
||||
res = dtoa(x, 5, precision, &decpt, &sign, &end, buf, sizeof(buf));
|
||||
if (decpt == DTOA_OVERFLOW) {
|
||||
dtoa_free(res, buf, sizeof(buf));
|
||||
return ob_fcvt_overflow(to, error);
|
||||
}
|
||||
ob_fcvt_help(&end, &dst, &dend, sign, decpt, &precision, &res);
|
||||
*dst= '\0';
|
||||
if (error != NULL) {
|
||||
*error= FALSE;
|
||||
}
|
||||
dtoa_free(res, buf, sizeof(buf));
|
||||
return dst - to;
|
||||
}
|
||||
|
||||
size_t ob_fcvt_opt(double x, int precision, int width, char *to, bool *error, bool add_padding_zero)
|
||||
{
|
||||
int decpt, sign;
|
||||
char *res, *end, *dst= to, *dend= to + width;
|
||||
char buf[DTOA_BUF_MAX_SIZE];
|
||||
if (!(precision >= 0 && precision < 31 && to != NULL)) {
|
||||
return 0;
|
||||
}
|
||||
res = dtoa(x, 5, precision, &decpt, &sign, &end, buf, sizeof(buf));
|
||||
if (decpt == DTOA_OVERFLOW) {
|
||||
dtoa_free(res, buf, sizeof(buf));
|
||||
return ob_fcvt_overflow(to, error);
|
||||
}
|
||||
ob_fcvt_help_opt(&end, &dst, &dend, sign, decpt, &precision, &res, add_padding_zero);
|
||||
*dst= '\0';
|
||||
if (error != NULL)
|
||||
*error= FALSE;
|
||||
dtoa_free(res, buf, sizeof(buf));
|
||||
return dst - to;
|
||||
}
|
||||
|
||||
|
||||
void ob_fcvt_help(char **end, char **dst, char **dend, int sign, int decpt,
|
||||
int *precision, char **res)
|
||||
{
|
||||
@ -133,46 +78,29 @@ void ob_fcvt_help(char **end, char **dst, char **dend, int sign, int decpt,
|
||||
}
|
||||
}
|
||||
|
||||
void ob_fcvt_help_opt(char **end, char **dst, char **dend, int sign, int decpt,
|
||||
int *precision, char **res, bool add_padding_zero)
|
||||
{
|
||||
const int len = (*end) - (*res);
|
||||
const char *dend_ptr = *dend;
|
||||
char *dst_ptr = *dst;
|
||||
char *src = (*res);
|
||||
int i = 0;
|
||||
|
||||
if (dst_ptr < dend_ptr) {
|
||||
if (sign)
|
||||
*dst_ptr++= '-';
|
||||
if (decpt <= 0)
|
||||
{
|
||||
if ((dst_ptr + 1) < dend_ptr) {
|
||||
*dst_ptr++= '0';
|
||||
*dst_ptr++= '.';
|
||||
}
|
||||
for (i= decpt; i < 0 && dst_ptr < dend_ptr; i++)
|
||||
*dst_ptr++= '0';
|
||||
}
|
||||
for (i= 1; i <= len && dst_ptr < dend_ptr; i++)
|
||||
{
|
||||
*dst_ptr++= *src++;
|
||||
if (i == decpt && i < len && dst_ptr < dend_ptr)
|
||||
*dst_ptr++= '.';
|
||||
}
|
||||
while (i++ <= decpt && dst_ptr < dend_ptr)
|
||||
*dst_ptr++= '0';
|
||||
if (*precision > 0 && add_padding_zero)
|
||||
{
|
||||
if (len <= decpt && dst_ptr < dend_ptr)
|
||||
*dst_ptr++= '.';
|
||||
for (i= *precision - OB_MAX(0, (len - decpt)); i > 0 && dst_ptr < dend_ptr; i--)
|
||||
*dst_ptr++= '0';
|
||||
}
|
||||
*dst = dst_ptr;
|
||||
}
|
||||
size_t ob_fcvt(double x, int precision, int width, char *to, bool *error)
|
||||
{
|
||||
int decpt, sign;
|
||||
char *res, *end, *dst= to, *dend= to + width;
|
||||
char buf[DTOA_BUF_MAX_SIZE];
|
||||
if (!(precision >= 0 && precision < 31 && to != NULL)) {
|
||||
return 0;
|
||||
}
|
||||
res = dtoa(x, 5, precision, &decpt, &sign, &end, buf, sizeof(buf));
|
||||
if (decpt == DTOA_OVERFLOW) {
|
||||
dtoa_free(res, buf, sizeof(buf));
|
||||
return ob_fcvt_overflow(to, error);
|
||||
}
|
||||
ob_fcvt_help(&end, &dst, &dend, sign, decpt, &precision, &res);
|
||||
*dst= '\0';
|
||||
if (error != NULL)
|
||||
*error= FALSE;
|
||||
dtoa_free(res, buf, sizeof(buf));
|
||||
return dst - to;
|
||||
}
|
||||
|
||||
//=================================================================================
|
||||
|
||||
size_t ob_gcvt_overflow(char *to, bool *error)
|
||||
{
|
||||
@ -270,10 +198,12 @@ void ob_gcvt_help2(int *width, int *len, char **dend, char **src,
|
||||
|
||||
const int need_check_buf = (*dend - *dst) < MAX_DOUBLE_SIZE;
|
||||
if (need_check_buf) {
|
||||
|
||||
if (sign && dst_ptr < dend_ptr)
|
||||
*dst_ptr++= '-';
|
||||
if (dst_ptr < dend_ptr)
|
||||
*dst_ptr++= *src_ptr++;
|
||||
//zero
|
||||
const int is_zero = (dst_ptr < dend_ptr && use_oracle_mode && (*(src_ptr - 1) == '0') && ((*len) == 1));
|
||||
if (is_zero) {
|
||||
if (sign) {
|
||||
@ -482,11 +412,11 @@ typedef union { double d; ULong L[2]; } U;
|
||||
|
||||
#if defined(WORDS_BIGENDIAN) || (defined(__FLOAT_WORD_ORDER) && \
|
||||
(__FLOAT_WORD_ORDER == __BIG_ENDIAN))
|
||||
COPY_BIGINT WORD0(x) (x)->L[0]
|
||||
#define WORD1(x) (x)->L[1]
|
||||
#define word0(x) (x)->L[0]
|
||||
#define word1(x) (x)->L[1]
|
||||
#else
|
||||
#define WORD0(x) (x)->L[1]
|
||||
#define WORD1(x) (x)->L[0]
|
||||
#define word0(x) (x)->L[1]
|
||||
#define word1(x) (x)->L[0]
|
||||
#endif
|
||||
|
||||
#define dval(x) (x)->d
|
||||
@ -520,7 +450,7 @@ COPY_BIGINT WORD0(x) (x)->L[0]
|
||||
#else
|
||||
#define Flt_Rounds 1
|
||||
#endif
|
||||
#endif /*Flt_Rounds*/
|
||||
#endif
|
||||
|
||||
#ifdef Honor_FLT_ROUNDS
|
||||
#define Rounding rounding
|
||||
@ -540,7 +470,7 @@ COPY_BIGINT WORD0(x) (x)->L[0]
|
||||
|
||||
#define Kmax 15
|
||||
|
||||
#define COPY_BIGINT(x,y) memcpy((char *)&x->sign, (char *)&y->sign, \
|
||||
#define copy_bigint(x,y) memcpy((char *)&x->sign, (char *)&y->sign, \
|
||||
2*sizeof(int) + y->wds*sizeof(ULong))
|
||||
|
||||
|
||||
@ -594,13 +524,15 @@ static Bigint *alloc_bigint(int k, ObStackAllocator *alloc)
|
||||
|
||||
|
||||
|
||||
|
||||
static void free_bigint(Bigint *v, ObStackAllocator *alloc)
|
||||
{
|
||||
if (v != NULL) {
|
||||
char *g_ptr= (char*) v;
|
||||
if (g_ptr < alloc->begin || g_ptr >= alloc->end) {
|
||||
free(g_ptr);
|
||||
char *gptr= (char*) v;
|
||||
if (gptr < alloc->begin || gptr >= alloc->end) {
|
||||
free(gptr);
|
||||
} else if (v->k <= Kmax) {
|
||||
|
||||
v->p.next= alloc->freelist[v->k];
|
||||
alloc->freelist[v->k]= v;
|
||||
}
|
||||
@ -608,6 +540,8 @@ static void free_bigint(Bigint *v, ObStackAllocator *alloc)
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
static char *dtoa_alloc(int i, ObStackAllocator *alloc)
|
||||
{
|
||||
char *rv;
|
||||
@ -616,19 +550,26 @@ static char *dtoa_alloc(int i, ObStackAllocator *alloc)
|
||||
rv = alloc->free;
|
||||
alloc->free += aligned_size;
|
||||
} else {
|
||||
rv = (char*)malloc(i);
|
||||
rv = static_cast<char*>(malloc(i));
|
||||
}
|
||||
return rv;
|
||||
}
|
||||
|
||||
static void dtoa_free(char *g_ptr, char *buf, size_t buf_size)
|
||||
|
||||
|
||||
|
||||
static void dtoa_free(char *gptr, char *buf, size_t buf_size)
|
||||
{
|
||||
if (g_ptr < buf || g_ptr >= buf + buf_size) {
|
||||
free(g_ptr);
|
||||
if (gptr < buf || gptr >= buf + buf_size) {
|
||||
free(gptr);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
static Bigint *mult_and_add(Bigint *b, int m, int a, ObStackAllocator *alloc)
|
||||
{
|
||||
int i, wds;
|
||||
@ -652,7 +593,7 @@ static Bigint *mult_and_add(Bigint *b, int m, int a, ObStackAllocator *alloc)
|
||||
if (wds >= b->maxwds)
|
||||
{
|
||||
b1= alloc_bigint(b->k+1, alloc);
|
||||
COPY_BIGINT(b1, b);
|
||||
copy_bigint(b1, b);
|
||||
free_bigint(b, alloc);
|
||||
b= b1;
|
||||
}
|
||||
@ -1038,9 +979,9 @@ static double ulp(U *x)
|
||||
register Long L;
|
||||
U u;
|
||||
|
||||
L= (WORD0(x) & Exp_mask) - (P - 1)*Exp_msk1;
|
||||
WORD0(&u) = L;
|
||||
WORD1(&u) = 0;
|
||||
L= (word0(x) & Exp_mask) - (P - 1)*Exp_msk1;
|
||||
word0(&u) = L;
|
||||
word1(&u) = 0;
|
||||
return dval(&u);
|
||||
}
|
||||
|
||||
@ -1050,8 +991,8 @@ static double b2d(Bigint *a, int *e)
|
||||
ULong *xa, *xa0, w, y, z;
|
||||
int k;
|
||||
U d;
|
||||
#define d0 WORD0(&d)
|
||||
#define d1 WORD1(&d)
|
||||
#define d0 word0(&d)
|
||||
#define d1 word1(&d)
|
||||
|
||||
xa0= a->p.x;
|
||||
xa= xa0 + a->wds;
|
||||
@ -1090,8 +1031,8 @@ static Bigint *d2b(U *d, int *e, int *bits, ObStackAllocator *alloc)
|
||||
int de, k;
|
||||
ULong *x, y, z;
|
||||
int i;
|
||||
#define d0 WORD0(d)
|
||||
#define d1 WORD1(d)
|
||||
#define d0 word0(d)
|
||||
#define d1 word1(d)
|
||||
|
||||
b= alloc_bigint(1, alloc);
|
||||
x= b->p.x;
|
||||
@ -1146,11 +1087,11 @@ static double ratio(Bigint *a, Bigint *b)
|
||||
dval(&db)= b2d(b, &kb);
|
||||
k= ka - kb + 32*(a->wds - b->wds);
|
||||
if (k > 0)
|
||||
WORD0(&da)+= k*Exp_msk1;
|
||||
word0(&da)+= k*Exp_msk1;
|
||||
else
|
||||
{
|
||||
k= -k;
|
||||
WORD0(&db)+= k*Exp_msk1;
|
||||
word0(&db)+= k*Exp_msk1;
|
||||
}
|
||||
return dval(&da) / dval(&db);
|
||||
}
|
||||
@ -1438,16 +1379,16 @@ static double ob_strtod_int(const char *s00, char **se, int *error, char *buf, s
|
||||
{
|
||||
case 0:
|
||||
case 3:
|
||||
WORD0(&rv)= Big0;
|
||||
WORD1(&rv)= Big1;
|
||||
word0(&rv)= Big0;
|
||||
word1(&rv)= Big1;
|
||||
break;
|
||||
default:
|
||||
WORD0(&rv)= Exp_mask;
|
||||
WORD1(&rv)= 0;
|
||||
word0(&rv)= Exp_mask;
|
||||
word1(&rv)= 0;
|
||||
}
|
||||
#else
|
||||
WORD0(&rv)= Exp_mask;
|
||||
WORD1(&rv)= 0;
|
||||
word0(&rv)= Exp_mask;
|
||||
word1(&rv)= 0;
|
||||
#endif
|
||||
#ifdef SET_INEXACT
|
||||
dval(&rv0)= 1e300;
|
||||
@ -1461,17 +1402,17 @@ static double ob_strtod_int(const char *s00, char **se, int *error, char *buf, s
|
||||
for(j= 0; e1 > 1; j++, e1>>= 1)
|
||||
if (e1 & 1)
|
||||
dval(&rv)*= bigtens[j];
|
||||
WORD0(&rv)-= P*Exp_msk1;
|
||||
word0(&rv)-= P*Exp_msk1;
|
||||
dval(&rv)*= bigtens[j];
|
||||
if ((z= WORD0(&rv) & Exp_mask) > Exp_msk1 * (DBL_MAX_EXP + Bias - P))
|
||||
if ((z= word0(&rv) & Exp_mask) > Exp_msk1 * (DBL_MAX_EXP + Bias - P))
|
||||
goto ovfl;
|
||||
if (z > Exp_msk1 * (DBL_MAX_EXP + Bias - 1 - P))
|
||||
{
|
||||
WORD0(&rv)= Big0;
|
||||
WORD1(&rv)= Big1;
|
||||
word0(&rv)= Big0;
|
||||
word1(&rv)= Big1;
|
||||
}
|
||||
else
|
||||
WORD0(&rv)+= P*Exp_msk1;
|
||||
word0(&rv)+= P*Exp_msk1;
|
||||
}
|
||||
}
|
||||
else if (e1 < 0)
|
||||
@ -1488,18 +1429,18 @@ static double ob_strtod_int(const char *s00, char **se, int *error, char *buf, s
|
||||
for(j= 0; e1 > 0; j++, e1>>= 1)
|
||||
if (e1 & 1)
|
||||
dval(&rv)*= tinytens[j];
|
||||
if (scale && (j = 2 * P + 1 - ((WORD0(&rv) & Exp_mask) >> Exp_shift)) > 0)
|
||||
if (scale && (j = 2 * P + 1 - ((word0(&rv) & Exp_mask) >> Exp_shift)) > 0)
|
||||
{
|
||||
if (j >= 32)
|
||||
{
|
||||
WORD1(&rv)= 0;
|
||||
word1(&rv)= 0;
|
||||
if (j >= 53)
|
||||
WORD0(&rv)= (P + 2) * Exp_msk1;
|
||||
word0(&rv)= (P + 2) * Exp_msk1;
|
||||
else
|
||||
WORD0(&rv)&= 0xffffffff << (j - 32);
|
||||
word0(&rv)&= 0xffffffff << (j - 32);
|
||||
}
|
||||
else
|
||||
WORD1(&rv)&= 0xffffffff << j;
|
||||
word1(&rv)&= 0xffffffff << j;
|
||||
}
|
||||
if (!dval(&rv))
|
||||
{
|
||||
@ -1517,8 +1458,8 @@ static double ob_strtod_int(const char *s00, char **se, int *error, char *buf, s
|
||||
for(;;)
|
||||
{
|
||||
bd= alloc_bigint(bd0->k, &alloc);
|
||||
COPY_BIGINT(bd, bd0);
|
||||
bb= d2b(&rv, &bbe, &bbbits, &alloc);
|
||||
copy_bigint(bd, bd0);
|
||||
bb= d2b(&rv, &bbe, &bbbits, &alloc);
|
||||
bs= integer2bigint(1, &alloc);
|
||||
|
||||
if (e >= 0)
|
||||
@ -1541,7 +1482,7 @@ static double ob_strtod_int(const char *s00, char **se, int *error, char *buf, s
|
||||
bs2++;
|
||||
#endif
|
||||
j= bbe - scale;
|
||||
i= j + bbbits - 1;
|
||||
i= j + bbbits - 1;
|
||||
if (i < Emin)
|
||||
j+= P - Emin;
|
||||
else
|
||||
@ -1600,9 +1541,9 @@ static double ob_strtod_int(const char *s00, char **se, int *error, char *buf, s
|
||||
else if (!dsign)
|
||||
{
|
||||
adj.d= -1.;
|
||||
if (!WORD1(&rv) && !(WORD0(&rv) & Frac_mask))
|
||||
if (!word1(&rv) && !(word0(&rv) & Frac_mask))
|
||||
{
|
||||
y= WORD0(&rv) & Exp_mask;
|
||||
y= word0(&rv) & Exp_mask;
|
||||
if (!scale || y > 2*P*Exp_msk1)
|
||||
{
|
||||
delta= left_shift(delta, Log2P, &alloc);
|
||||
@ -1611,8 +1552,8 @@ static double ob_strtod_int(const char *s00, char **se, int *error, char *buf, s
|
||||
}
|
||||
}
|
||||
apply_adj:
|
||||
if (scale && (y= WORD0(&rv) & Exp_mask) <= 2 * P * Exp_msk1)
|
||||
WORD0(&adj)+= (2 * P + 1) * Exp_msk1 - y;
|
||||
if (scale && (y= word0(&rv) & Exp_mask) <= 2 * P * Exp_msk1)
|
||||
word0(&adj)+= (2 * P + 1) * Exp_msk1 - y;
|
||||
dval(&rv)+= adj.d * ulp(&rv);
|
||||
}
|
||||
break;
|
||||
@ -1622,6 +1563,7 @@ static double ob_strtod_int(const char *s00, char **se, int *error, char *buf, s
|
||||
adj.d= 1.;
|
||||
if (adj.d <= 0x7ffffffe)
|
||||
{
|
||||
|
||||
y= adj.d;
|
||||
if (y != adj.d)
|
||||
{
|
||||
@ -1630,8 +1572,8 @@ static double ob_strtod_int(const char *s00, char **se, int *error, char *buf, s
|
||||
adj.d= y;
|
||||
}
|
||||
}
|
||||
if (scale && (y= WORD0(&rv) & Exp_mask) <= 2 * P * Exp_msk1)
|
||||
WORD0(&adj)+= (2 * P + 1) * Exp_msk1 - y;
|
||||
if (scale && (y= word0(&rv) & Exp_mask) <= 2 * P * Exp_msk1)
|
||||
word0(&adj)+= (2 * P + 1) * Exp_msk1 - y;
|
||||
adj.d*= ulp(&rv);
|
||||
if (dsign)
|
||||
dval(&rv)+= adj.d;
|
||||
@ -1643,8 +1585,8 @@ static double ob_strtod_int(const char *s00, char **se, int *error, char *buf, s
|
||||
|
||||
if (i < 0)
|
||||
{
|
||||
if (dsign || WORD1(&rv) || WORD0(&rv) & Bndry_mask ||
|
||||
(WORD0(&rv) & Exp_mask) <= (2 * P + 1) * Exp_msk1)
|
||||
if (dsign || word1(&rv) || word0(&rv) & Bndry_mask ||
|
||||
(word0(&rv) & Exp_mask) <= (2 * P + 1) * Exp_msk1)
|
||||
{
|
||||
#ifdef SET_INEXACT
|
||||
if (!delta->x[0] && delta->wds <= 1)
|
||||
@ -1668,25 +1610,24 @@ static double ob_strtod_int(const char *s00, char **se, int *error, char *buf, s
|
||||
{
|
||||
if (dsign)
|
||||
{
|
||||
if ((WORD0(&rv) & Bndry_mask1) == Bndry_mask1 &&
|
||||
WORD1(&rv) ==
|
||||
((scale && (y = WORD0(&rv) & Exp_mask) <= 2 * P * Exp_msk1) ?
|
||||
if ((word0(&rv) & Bndry_mask1) == Bndry_mask1 &&
|
||||
word1(&rv) ==
|
||||
((scale && (y = word0(&rv) & Exp_mask) <= 2 * P * Exp_msk1) ?
|
||||
(0xffffffff & (0xffffffff << (2*P+1-(y>>Exp_shift)))) :
|
||||
0xffffffff))
|
||||
{
|
||||
WORD0(&rv)= (WORD0(&rv) & Exp_mask) + Exp_msk1;
|
||||
WORD1(&rv) = 0;
|
||||
word0(&rv)= (word0(&rv) & Exp_mask) + Exp_msk1;
|
||||
word1(&rv) = 0;
|
||||
dsign = 0;
|
||||
break;
|
||||
}
|
||||
}
|
||||
else if (!(WORD0(&rv) & Bndry_mask) && !WORD1(&rv))
|
||||
else if (!(word0(&rv) & Bndry_mask) && !word1(&rv))
|
||||
{
|
||||
drop_down:
|
||||
|
||||
if (scale)
|
||||
{
|
||||
L= WORD0(&rv) & Exp_mask;
|
||||
L= word0(&rv) & Exp_mask;
|
||||
if (L <= (2 *P + 1) * Exp_msk1)
|
||||
{
|
||||
if (L > (P + 2) * Exp_msk1)
|
||||
@ -1694,12 +1635,12 @@ static double ob_strtod_int(const char *s00, char **se, int *error, char *buf, s
|
||||
goto undfl;
|
||||
}
|
||||
}
|
||||
L= (WORD0(&rv) & Exp_mask) - Exp_msk1;
|
||||
WORD0(&rv)= L | Bndry_mask1;
|
||||
WORD1(&rv)= 0xffffffff;
|
||||
L= (word0(&rv) & Exp_mask) - Exp_msk1;
|
||||
word0(&rv)= L | Bndry_mask1;
|
||||
word1(&rv)= 0xffffffff;
|
||||
break;
|
||||
}
|
||||
if (!(WORD1(&rv) & LSB))
|
||||
if (!(word1(&rv) & LSB))
|
||||
break;
|
||||
if (dsign)
|
||||
dval(&rv)+= ulp(&rv);
|
||||
@ -1716,9 +1657,9 @@ static double ob_strtod_int(const char *s00, char **se, int *error, char *buf, s
|
||||
{
|
||||
if (dsign)
|
||||
aadj= aadj1= 1.;
|
||||
else if (WORD1(&rv) || WORD0(&rv) & Bndry_mask)
|
||||
else if (word1(&rv) || word0(&rv) & Bndry_mask)
|
||||
{
|
||||
if (WORD1(&rv) == Tiny1 && !WORD0(&rv))
|
||||
if (word1(&rv) == Tiny1 && !word0(&rv))
|
||||
goto undfl;
|
||||
aadj= 1.;
|
||||
aadj1= -1.;
|
||||
@ -1751,24 +1692,24 @@ static double ob_strtod_int(const char *s00, char **se, int *error, char *buf, s
|
||||
aadj1+= 0.5;
|
||||
#endif
|
||||
}
|
||||
y= WORD0(&rv) & Exp_mask;
|
||||
y= word0(&rv) & Exp_mask;
|
||||
|
||||
if (y == Exp_msk1 * (DBL_MAX_EXP + Bias - 1))
|
||||
{
|
||||
dval(&rv0)= dval(&rv);
|
||||
WORD0(&rv)-= P * Exp_msk1;
|
||||
word0(&rv)-= P * Exp_msk1;
|
||||
adj.d= aadj1 * ulp(&rv);
|
||||
dval(&rv)+= adj.d;
|
||||
if ((WORD0(&rv) & Exp_mask) >= Exp_msk1 * (DBL_MAX_EXP + Bias - P))
|
||||
if ((word0(&rv) & Exp_mask) >= Exp_msk1 * (DBL_MAX_EXP + Bias - P))
|
||||
{
|
||||
if (WORD0(&rv0) == Big0 && WORD1(&rv0) == Big1)
|
||||
if (word0(&rv0) == Big0 && word1(&rv0) == Big1)
|
||||
goto ovfl;
|
||||
WORD0(&rv)= Big0;
|
||||
WORD1(&rv)= Big1;
|
||||
word0(&rv)= Big0;
|
||||
word1(&rv)= Big1;
|
||||
goto cont;
|
||||
}
|
||||
else
|
||||
WORD0(&rv)+= P * Exp_msk1;
|
||||
word0(&rv)+= P * Exp_msk1;
|
||||
}
|
||||
else
|
||||
{
|
||||
@ -1782,7 +1723,7 @@ static double ob_strtod_int(const char *s00, char **se, int *error, char *buf, s
|
||||
aadj1= dsign ? aadj : -aadj;
|
||||
}
|
||||
dval(&aadj2) = aadj1;
|
||||
WORD0(&aadj2)+= (2 * P + 1) * Exp_msk1 - y;
|
||||
word0(&aadj2)+= (2 * P + 1) * Exp_msk1 - y;
|
||||
aadj1= dval(&aadj2);
|
||||
adj.d= aadj1 * ulp(&rv);
|
||||
dval(&rv)+= adj.d;
|
||||
@ -1795,14 +1736,14 @@ static double ob_strtod_int(const char *s00, char **se, int *error, char *buf, s
|
||||
dval(&rv)+= adj.d;
|
||||
}
|
||||
}
|
||||
z= WORD0(&rv) & Exp_mask;
|
||||
z= word0(&rv) & Exp_mask;
|
||||
#ifndef SET_INEXACT
|
||||
if (!scale)
|
||||
if (y == z)
|
||||
{
|
||||
L= (Long)aadj;
|
||||
aadj-= L;
|
||||
if (dsign || WORD1(&rv) || WORD0(&rv) & Bndry_mask)
|
||||
if (dsign || word1(&rv) || word0(&rv) & Bndry_mask)
|
||||
{
|
||||
if (aadj < .4999999 || aadj > .5000001)
|
||||
break;
|
||||
@ -1822,8 +1763,8 @@ static double ob_strtod_int(const char *s00, char **se, int *error, char *buf, s
|
||||
{
|
||||
if (!oldinexact)
|
||||
{
|
||||
WORD0(&rv0)= Exp_1 + (70 << Exp_shift);
|
||||
WORD1(&rv0)= 0;
|
||||
word0(&rv0)= Exp_1 + (70 << Exp_shift);
|
||||
word1(&rv0)= 0;
|
||||
dval(&rv0)+= 1.;
|
||||
}
|
||||
}
|
||||
@ -1832,13 +1773,14 @@ static double ob_strtod_int(const char *s00, char **se, int *error, char *buf, s
|
||||
#endif
|
||||
if (scale)
|
||||
{
|
||||
WORD0(&rv0)= Exp_1 - 2 * P * Exp_msk1;
|
||||
WORD1(&rv0)= 0;
|
||||
word0(&rv0)= Exp_1 - 2 * P * Exp_msk1;
|
||||
word1(&rv0)= 0;
|
||||
dval(&rv)*= dval(&rv0);
|
||||
}
|
||||
#ifdef SET_INEXACT
|
||||
if (inexact && !(WORD0(&rv) & Exp_mask))
|
||||
if (inexact && !(word0(&rv) & Exp_mask))
|
||||
{
|
||||
|
||||
dval(&rv0)= 1e-300;
|
||||
dval(&rv0)*= dval(&rv0);
|
||||
}
|
||||
@ -1868,7 +1810,7 @@ static int quorem(Bigint *b, Bigint *S)
|
||||
sxe= sx + --n;
|
||||
bx= b->p.x;
|
||||
bxe= bx + n;
|
||||
q= *bxe / (*sxe + 1);
|
||||
q= *bxe / (*sxe + 1);
|
||||
if (q)
|
||||
{
|
||||
borrow= 0;
|
||||
@ -1941,16 +1883,15 @@ static char *dtoa(double dd, int mode, int ndigits, int *decpt, int *sign,
|
||||
memset(alloc.freelist, 0, sizeof(alloc.freelist));
|
||||
|
||||
u.d= dd;
|
||||
if (WORD0(&u) & Sign_bit)
|
||||
if (word0(&u) & Sign_bit)
|
||||
{
|
||||
*sign= 1;
|
||||
WORD0(&u) &= ~Sign_bit;
|
||||
word0(&u) &= ~Sign_bit;
|
||||
}
|
||||
else
|
||||
*sign= 0;
|
||||
|
||||
|
||||
if (((WORD0(&u) & Exp_mask) == Exp_mask && (*decpt= DTOA_OVERFLOW)) ||
|
||||
if (((word0(&u) & Exp_mask) == Exp_mask && (*decpt= DTOA_OVERFLOW)) ||
|
||||
(!dval(&u) && (*decpt= 1)))
|
||||
{
|
||||
char *res= (char*) dtoa_alloc(2, &alloc);
|
||||
@ -1973,11 +1914,11 @@ static char *dtoa(double dd, int mode, int ndigits, int *decpt, int *sign,
|
||||
#endif
|
||||
|
||||
b= d2b(&u, &be, &bbits, &alloc);
|
||||
if ((i= (int)(WORD0(&u) >> Exp_shift1 & (Exp_mask>>Exp_shift1))))
|
||||
if ((i= (int)(word0(&u) >> Exp_shift1 & (Exp_mask>>Exp_shift1))))
|
||||
{
|
||||
dval(&d2)= dval(&u);
|
||||
WORD0(&d2) &= Frac_mask1;
|
||||
WORD0(&d2) |= Exp_11;
|
||||
word0(&d2) &= Frac_mask1;
|
||||
word0(&d2) |= Exp_11;
|
||||
|
||||
|
||||
i-= Bias;
|
||||
@ -1987,17 +1928,17 @@ static char *dtoa(double dd, int mode, int ndigits, int *decpt, int *sign,
|
||||
{
|
||||
|
||||
i= bbits + be + (Bias + (P-1) - 1);
|
||||
x= i > 32 ? WORD0(&u) << (64 - i) | WORD1(&u) >> (i - 32)
|
||||
: WORD1(&u) << (32 - i);
|
||||
x= i > 32 ? word0(&u) << (64 - i) | word1(&u) >> (i - 32)
|
||||
: word1(&u) << (32 - i);
|
||||
dval(&d2)= x;
|
||||
WORD0(&d2)-= 31*Exp_msk1;
|
||||
word0(&d2)-= 31*Exp_msk1;
|
||||
i-= (Bias + (P-1) - 1) + 1;
|
||||
denorm= 1;
|
||||
}
|
||||
ds= (dval(&d2)-1.5)*0.289529654602168 + 0.1760912590558 + i*0.301029995663981;
|
||||
k= (int)ds;
|
||||
if (ds < 0. && ds != k)
|
||||
k--;
|
||||
k--;
|
||||
k_check= 1;
|
||||
if (k >= 0 && k <= Ten_pmax)
|
||||
{
|
||||
@ -2079,7 +2020,7 @@ static char *dtoa(double dd, int mode, int ndigits, int *decpt, int *sign,
|
||||
dval(&d2)= dval(&u);
|
||||
k0= k;
|
||||
ilim0= ilim;
|
||||
ieps= 2;
|
||||
ieps= 2;
|
||||
if (k > 0)
|
||||
{
|
||||
ds= tens[k&0xf];
|
||||
@ -2122,7 +2063,7 @@ static char *dtoa(double dd, int mode, int ndigits, int *decpt, int *sign,
|
||||
ieps++;
|
||||
}
|
||||
dval(&eps)= ieps*dval(&u) + 7.;
|
||||
WORD0(&eps)-= (P-1)*Exp_msk1;
|
||||
word0(&eps)-= (P-1)*Exp_msk1;
|
||||
if (ilim == 0)
|
||||
{
|
||||
S= mhi= 0;
|
||||
@ -2284,8 +2225,8 @@ bump_up:
|
||||
#endif
|
||||
)
|
||||
{
|
||||
if (!WORD1(&u) && !(WORD0(&u) & Bndry_mask) &&
|
||||
(WORD0(&u) & (Exp_mask & (~Exp_msk1)))
|
||||
if (!word1(&u) && !(word0(&u) & Bndry_mask) &&
|
||||
(word0(&u) & (Exp_mask & (~Exp_msk1)))
|
||||
)
|
||||
{
|
||||
b2+= Log2P;
|
||||
@ -2347,7 +2288,7 @@ one_digit:
|
||||
if (spec_case)
|
||||
{
|
||||
mhi= alloc_bigint(mhi->k, &alloc);
|
||||
COPY_BIGINT(mhi, mlo);
|
||||
copy_bigint(mhi, mlo);
|
||||
mhi= left_shift(mhi, Log2P, &alloc);
|
||||
}
|
||||
|
||||
@ -2358,7 +2299,7 @@ one_digit:
|
||||
delta= bigint_diff(S, mhi, &alloc);
|
||||
j1= delta->sign ? 1 : bigint_cmp(b, delta);
|
||||
free_bigint(delta, &alloc);
|
||||
if (j1 == 0 && mode != 1 && !(WORD1(&u) & 1)
|
||||
if (j1 == 0 && mode != 1 && !(word1(&u) & 1)
|
||||
#ifdef Honor_FLT_ROUNDS
|
||||
&& rounding >= 1
|
||||
#endif
|
||||
@ -2371,7 +2312,7 @@ one_digit:
|
||||
*s++= dig;
|
||||
goto ret;
|
||||
}
|
||||
if (j < 0 || (j == 0 && mode != 1 && !(WORD1(&u) & 1)))
|
||||
if (j < 0 || (j == 0 && mode != 1 && !(word1(&u) & 1)))
|
||||
{
|
||||
if (!b->p.x[0] && b->wds <= 1)
|
||||
{
|
||||
@ -2487,5 +2428,4 @@ ret1:
|
||||
}
|
||||
|
||||
#undef P
|
||||
|
||||
#endif
|
||||
#undef Rounding
|
48
deps/oblib/src/lib/charset/ob_gb18030_2022_tab.h
vendored
48
deps/oblib/src/lib/charset/ob_gb18030_2022_tab.h
vendored
@ -46,38 +46,38 @@
|
||||
these arrays plus CHINESE_WEIGHT_BASE.
|
||||
*/
|
||||
|
||||
static const uint PINYIN_2_BYTE_START_2022 = 0x8140;
|
||||
static const uint PINYIN_2_BYTE_END_2022 = 0xFE9F;
|
||||
static const unsigned int PINYIN_2_BYTE_START_2022 = 0x8140;
|
||||
static const unsigned int PINYIN_2_BYTE_END_2022 = 0xFE9F;
|
||||
|
||||
static const uint PINYIN_4_BYTE_1_START_2022 = 0x8138FD38;
|
||||
static const uint PINYIN_4_1_DIFF_2022 = 11328;
|
||||
static const uint PINYIN_4_BYTE_1_END_2022 = 0x82359737;
|
||||
static const unsigned int PINYIN_4_BYTE_1_START_2022 = 0x8138FD38;
|
||||
static const unsigned int PINYIN_4_1_DIFF_2022 = 11328;
|
||||
static const unsigned int PINYIN_4_BYTE_1_END_2022 = 0x82359737;
|
||||
|
||||
static const uint PINYIN_4_BYTE_2_START_2022 = 0x95328236;
|
||||
static const uint PINYIN_4_2_DIFF_2022 = 254536;
|
||||
static const uint PINYIN_4_BYTE_2_END_2022 = 0x9A37F738;
|
||||
static const unsigned int PINYIN_4_BYTE_2_START_2022 = 0x95328236;
|
||||
static const unsigned int PINYIN_4_2_DIFF_2022 = 254536;
|
||||
static const unsigned int PINYIN_4_BYTE_2_END_2022 = 0x9A37F738;
|
||||
|
||||
static const uint STROKE_2_BYTE_START_2022 = 0x8140;
|
||||
static const uint STROKE_2_BYTE_END_2022 = 0xFE9F;
|
||||
static const unsigned int STROKE_2_BYTE_START_2022 = 0x8140;
|
||||
static const unsigned int STROKE_2_BYTE_END_2022 = 0xFE9F;
|
||||
|
||||
static const uint STROKE_4_BYTE_1_START_2022 = 0x8138FD38;
|
||||
static const uint STROKE_4_1_DIFF_2022 = 11328;
|
||||
static const uint STROKE_4_BYTE_1_END_2022 = 0x82359832;
|
||||
static const unsigned int STROKE_4_BYTE_1_START_2022 = 0x8138FD38;
|
||||
static const unsigned int STROKE_4_1_DIFF_2022 = 11328;
|
||||
static const unsigned int STROKE_4_BYTE_1_END_2022 = 0x82359832;
|
||||
|
||||
static const uint STROKE_4_BYTE_2_START_2022 = 0x95328236;
|
||||
static const uint STROKE_4_2_DIFF_2022 = 254536;
|
||||
static const uint STROKE_4_BYTE_2_END_2022 = 0x9B31A337;
|
||||
static const unsigned int STROKE_4_BYTE_2_START_2022 = 0x95328236;
|
||||
static const unsigned int STROKE_4_2_DIFF_2022 = 254536;
|
||||
static const unsigned int STROKE_4_BYTE_2_END_2022 = 0x9B31A337;
|
||||
|
||||
static const uint RADICAL_2_BYTE_START_2022 = 0x8140;
|
||||
static const uint RADICAL_2_BYTE_END_2022 = 0xFEA0;
|
||||
static const unsigned int RADICAL_2_BYTE_START_2022 = 0x8140;
|
||||
static const unsigned int RADICAL_2_BYTE_END_2022 = 0xFEA0;
|
||||
|
||||
static const uint RADICAL_4_BYTE_1_START_2022 = 0x8139EE39;
|
||||
static const uint RADICAL_4_1_DIFF_2022 = 12439;
|
||||
static const uint RADICAL_4_BYTE_1_END_2022 = 0x8430AE33;
|
||||
static const unsigned int RADICAL_4_BYTE_1_START_2022 = 0x8139EE39;
|
||||
static const unsigned int RADICAL_4_1_DIFF_2022 = 12439;
|
||||
static const unsigned int RADICAL_4_BYTE_1_END_2022 = 0x8430AE33;
|
||||
|
||||
static const uint RADICAL_4_BYTE_2_START_2022 = 0x95328236;
|
||||
static const uint RADICAL_4_2_DIFF_2022 = 254536;
|
||||
static const uint RADICAL_4_BYTE_2_END_2022 = 0x9B31A337;
|
||||
static const unsigned int RADICAL_4_BYTE_2_START_2022 = 0x95328236;
|
||||
static const unsigned int RADICAL_4_2_DIFF_2022 = 254536;
|
||||
static const unsigned int RADICAL_4_BYTE_2_END_2022 = 0x9B31A337;
|
||||
|
||||
static const uint16 gb18030_2022_2_pinyin_weight_py[] = {
|
||||
16323, 28217, 34164, 10708, 21648, 4104, 28850, 6524, 26203, 18824, 39282, 1913, 15200, 13279, 14725, 10029, /*[GB+8140, GB+814F]*/
|
||||
|
235
deps/oblib/src/lib/charset/str_uca_type.h
vendored
Normal file
235
deps/oblib/src/lib/charset/str_uca_type.h
vendored
Normal file
@ -0,0 +1,235 @@
|
||||
/**
|
||||
* Copyright (c) 2021 OceanBase
|
||||
* OceanBase CE is licensed under Mulan PubL v2.
|
||||
* You can use this software according to the terms and conditions of the Mulan PubL v2.
|
||||
* You may obtain a copy of Mulan PubL v2 at:
|
||||
* http://license.coscl.org.cn/MulanPubL-2.0
|
||||
* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND,
|
||||
* EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
|
||||
* MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
|
||||
* See the Mulan PubL v2 for more details.
|
||||
*/
|
||||
|
||||
#ifndef STR_UCA_TYPE_H
|
||||
#define STR_UCA_TYPE_H
|
||||
|
||||
#include <vector>
|
||||
|
||||
/*
|
||||
So far we have only Croatian collation needs to reorder Latin and
|
||||
Cyrillic group of characters. May add more in future.
|
||||
*/
|
||||
#define UCA_MAX_CHAR_GRP 4
|
||||
enum enum_uca_ver { UCA_V400, UCA_V520, UCA_V900 };
|
||||
|
||||
enum enum_char_grp {
|
||||
CHARGRP_NONE,
|
||||
CHARGRP_CORE,
|
||||
CHARGRP_LATIN,
|
||||
CHARGRP_CYRILLIC,
|
||||
CHARGRP_ARAB,
|
||||
CHARGRP_KANA,
|
||||
CHARGRP_OTHERS
|
||||
};
|
||||
|
||||
struct Weight_boundary {
|
||||
uint16 begin;
|
||||
uint16 end;
|
||||
};
|
||||
|
||||
struct Reorder_wt_rec {
|
||||
struct Weight_boundary old_wt_bdy;
|
||||
struct Weight_boundary new_wt_bdy;
|
||||
};
|
||||
|
||||
struct Reorder_param {
|
||||
enum enum_char_grp reorder_grp[UCA_MAX_CHAR_GRP];
|
||||
struct Reorder_wt_rec wt_rec[2 * UCA_MAX_CHAR_GRP];
|
||||
int wt_rec_num;
|
||||
uint16 max_weight;
|
||||
};
|
||||
|
||||
enum enum_case_first { CASE_FIRST_OFF, CASE_FIRST_UPPER, CASE_FIRST_LOWER };
|
||||
|
||||
struct Coll_param {
|
||||
struct Reorder_param *reorder_param;
|
||||
bool norm_enabled; // false = normalization off, default;
|
||||
// true = on
|
||||
enum enum_case_first case_first;
|
||||
};
|
||||
|
||||
/*
|
||||
NOTE: If you change OB_UCA_MAX_CONTRACTION, be sure to update the comment on
|
||||
OB_UCA_CNT_MID1 in strings/uca_data.h, as it might cause us to run out of
|
||||
bits in a byte flag.
|
||||
*/
|
||||
#define OB_UCA_MAX_CONTRACTION 6
|
||||
#define OB_UCA_MAX_WEIGHT_SIZE 25
|
||||
#define OB_UCA_WEIGHT_LEVELS 1
|
||||
|
||||
/*
|
||||
We store all the contractions in a trie, indexed on the codepoints they
|
||||
consist of. The trie is organized as:
|
||||
1. Each node stores one code point (ch) of contraction, and a list of nodes
|
||||
(child_nodes) store all possible following code points.
|
||||
2. The vector in ObUCAInfo stores a list of nodes which store the first
|
||||
code points of all contractions.
|
||||
3. Each node has a boolean value (is_contraction_tail) which shows
|
||||
whether the code point stored in the node is the end of a contraction.
|
||||
This is necessary because even if one code point is the end of a
|
||||
contraction, there might be longer contraction contains all the
|
||||
code points in the path (e.g., for Hungarian, both 'DZ' and 'DZS' are
|
||||
contractions).
|
||||
4. A contraction is formed by all the code points in the path until the
|
||||
end of the contraction.
|
||||
5. If it is the end of a contraction (is_contraction_tail == true), the
|
||||
weight of this contraction is stored in array weight.
|
||||
6. If it is the end of a contraction (is_contraction_tail == true),
|
||||
with_context shows whether it is common contraction (with_context ==
|
||||
false), or previous context contraction (with_context == true).
|
||||
7. If it is the end of a contraction (is_contraction_tail == true),
|
||||
contraction_len shows how many code points this contraction consists of.
|
||||
*/
|
||||
struct ObContraction {
|
||||
ob_wc_t ch;
|
||||
// Lists of following nodes.
|
||||
std::vector<ObContraction> child_nodes;
|
||||
std::vector<ObContraction> child_nodes_context;
|
||||
|
||||
// weight and with_context are only useful when is_contraction_tail is true.
|
||||
uint16 weight[OB_UCA_MAX_WEIGHT_SIZE]; /* Its weight string, 0-terminated */
|
||||
bool is_contraction_tail;
|
||||
size_t contraction_len;
|
||||
};
|
||||
|
||||
struct ObUCAInfo {
|
||||
enum enum_uca_ver version;
|
||||
|
||||
// Collation weights.
|
||||
ob_wc_t maxchar;
|
||||
uchar *lengths;
|
||||
uint16 **weights;
|
||||
bool have_contractions;
|
||||
std::vector<ObContraction> *contraction_nodes;
|
||||
/*
|
||||
contraction_flags is only used when a collation has contraction rule.
|
||||
UCA collation supports at least 65535 characters, but only a few of
|
||||
them can be part of contraction, it is huge waste of time to find out
|
||||
whether one character is in contraction list for every character.
|
||||
contraction_flags points to memory which is allocated when a collation
|
||||
has contraction rule. For a character in contraction, its corresponding
|
||||
byte (contraction_flags[ch & 0x1000]) will be set to a certain value
|
||||
according to the position (head, tail or middle) of this character in
|
||||
contraction. This byte will be used to quick check whether one character
|
||||
can be part of contraction.
|
||||
*/
|
||||
char *contraction_flags;
|
||||
|
||||
/* Logical positions */
|
||||
ob_wc_t first_non_ignorable;
|
||||
ob_wc_t last_non_ignorable;
|
||||
ob_wc_t first_primary_ignorable;
|
||||
ob_wc_t last_primary_ignorable;
|
||||
ob_wc_t first_secondary_ignorable;
|
||||
ob_wc_t last_secondary_ignorable;
|
||||
ob_wc_t first_tertiary_ignorable;
|
||||
ob_wc_t last_tertiary_ignorable;
|
||||
ob_wc_t first_trailing;
|
||||
ob_wc_t last_trailing;
|
||||
ob_wc_t first_variable;
|
||||
ob_wc_t last_variable;
|
||||
/*
|
||||
extra_ce_pri_base, extra_ce_sec_base and extra_ce_ter_base are only used for
|
||||
the UCA collations whose UCA version is not smaller than UCA_V900. For why
|
||||
we need this extra CE, please see the comment in my_char_weight_put_900()
|
||||
and apply_primary_shift_900().
|
||||
|
||||
The value of these three variables is set by the definition of my_uca_v900.
|
||||
The value of extra_ce_pri_base is usually 0x54A4 (which is the maximum
|
||||
regular weight value pluses one, 0x54A3 + 1 = 0x54A4). But for the Chinese
|
||||
collation, the extra_ce_pri_base needs to change. This is because 0x54A4 has
|
||||
been occupied to do reordering. There might be weight conflict if we still
|
||||
use 0x54A4. Please also see the comment on modify_all_zh_pages().
|
||||
*/
|
||||
uint16 extra_ce_pri_base; // Primary weight of extra CE
|
||||
uint16 extra_ce_sec_base; // Secondary weight of extra CE
|
||||
uint16 extra_ce_ter_base; // Tertiary weight of extra CE
|
||||
};
|
||||
|
||||
#define OB_UCA_CNT_FLAG_SIZE 4096
|
||||
#define OB_UCA_CNT_FLAG_MASK 4095
|
||||
|
||||
/** Whether the given character can be the first in any contraction. */
|
||||
#define OB_UCA_CNT_HEAD 1
|
||||
|
||||
/** Whether the given character can be the last in any contraction. */
|
||||
#define OB_UCA_CNT_TAIL 2
|
||||
|
||||
/**
|
||||
Whether the given character can be the second in any contraction.
|
||||
|
||||
Also defined implicitly through shifting OB_UCA_CNT_MID1:
|
||||
|
||||
\#define OB_UCA_CNT_MID2 8
|
||||
\#define OB_UCA_CNT_MID3 16
|
||||
\#define OB_UCA_CNT_MID4 32
|
||||
|
||||
There's no need for OB_UCA_CNT_MID5 (which would cause us to run out of
|
||||
bits) since OB_UCA_MAX_CONTRACTION is 6 (so head, four in the middle,
|
||||
and then tail).
|
||||
*/
|
||||
#define OB_UCA_CNT_MID1 4
|
||||
|
||||
/**
|
||||
Whether the given character is the first part of a context-sensitive
|
||||
contraction. Context-sensitive contractions are like normal contractions,
|
||||
except that for performance reasons, they trigger on the _last_ character
|
||||
instead of the first. The case given in Unicode TR35 is that in some
|
||||
scripts (such as katakana in Japanese), "a-" should sort as "aa"
|
||||
(except on the tertiary level), "e-" should sort as "ee" and so on.
|
||||
However, adding regular contractions on "a" and "e" would cause undue
|
||||
performance loss, so instead, we add a special "context-sensitive"
|
||||
contraction on "-" that then looks at the _previous_ character.
|
||||
|
||||
We don't support context-sensitive contractions longer than two characters
|
||||
at the moment, since none exist in CLDR. Thus, there is no
|
||||
OB_UCA_PREVIOUS_CONTEXT_MID1 and so on.
|
||||
*/
|
||||
#define OB_UCA_PREVIOUS_CONTEXT_HEAD 64
|
||||
|
||||
/** Similar to OB_UCA_PREVIOUS_CONTEXT_HEAD, just for the tail. */
|
||||
#define OB_UCA_PREVIOUS_CONTEXT_TAIL 128
|
||||
|
||||
#define OB_UCA_PSHIFT 8
|
||||
|
||||
/**
|
||||
Check if a code point can be contraction head
|
||||
|
||||
@param flags Pointer to UCA contraction flag data
|
||||
@param wc Code point
|
||||
|
||||
@retval 0 - cannot be contraction head
|
||||
@retval 1 - can be contraction head
|
||||
*/
|
||||
|
||||
inline bool ob_uca_can_be_contraction_head(const char *flags, ob_wc_t wc) {
|
||||
return flags[wc & OB_UCA_CNT_FLAG_MASK] & OB_UCA_CNT_HEAD;
|
||||
}
|
||||
|
||||
/**
|
||||
Check if a code point can be contraction tail
|
||||
|
||||
@param flags Pointer to UCA contraction flag data
|
||||
@param wc Code point
|
||||
|
||||
@retval 0 - cannot be contraction tail
|
||||
@retval 1 - can be contraction tail
|
||||
*/
|
||||
|
||||
inline bool ob_uca_can_be_contraction_tail(const char *flags, ob_wc_t wc) {
|
||||
return flags[wc & OB_UCA_CNT_FLAG_MASK] & OB_UCA_CNT_TAIL;
|
||||
}
|
||||
|
||||
const uint16 *ob_uca_contraction2_weight(
|
||||
const std::vector<ObContraction> *cont_nodes, ob_wc_t wc1, ob_wc_t wc2);
|
||||
#endif
|
270961
deps/oblib/src/lib/charset/uca900_data.h
vendored
Normal file
270961
deps/oblib/src/lib/charset/uca900_data.h
vendored
Normal file
File diff suppressed because it is too large
Load Diff
571
deps/oblib/src/lib/charset/uca900_ja_data.h
vendored
Normal file
571
deps/oblib/src/lib/charset/uca900_ja_data.h
vendored
Normal file
@ -0,0 +1,571 @@
|
||||
/**
|
||||
* Copyright (c) 2021 OceanBase
|
||||
* OceanBase CE is licensed under Mulan PubL v2.
|
||||
* You can use this software according to the terms and conditions of the Mulan PubL v2.
|
||||
* You may obtain a copy of Mulan PubL v2 at:
|
||||
* http://license.coscl.org.cn/MulanPubL-2.0
|
||||
* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND,
|
||||
* EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
|
||||
* MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
|
||||
* See the Mulan PubL v2 for more details.
|
||||
*/
|
||||
#ifndef UCA900_JA_DATA_H
|
||||
#define UCA900_JA_DATA_H
|
||||
|
||||
// Quaternary weight of katakana.
|
||||
static constexpr int JA_KATA_QUAT_WEIGHT= 0x08;
|
||||
// Quaternary weight of hiragana.
|
||||
static constexpr int JA_HIRA_QUAT_WEIGHT= 0x02;
|
||||
static const char ja_cldr_30[]=
|
||||
"&\\u309D <<<< \\u30FD"
|
||||
"&[before 3]\\u3041 <<<\\u3041|\\u30FC=\\u3042|\\u30FC=\\u304B|\\u30FC"
|
||||
"=\\u3095|\\u30FC=\\u304C|\\u30FC=\\u3055|\\u30FC"
|
||||
"=\\u3056|\\u30FC=\\u305F|\\u30FC=\\u3060|\\u30FC"
|
||||
"=\\u306A|\\u30FC=\\u306F|\\u30FC=\\u3070|\\u30FC"
|
||||
"=\\u3071|\\u30FC=\\u307E|\\u30FC=\\u3083|\\u30FC"
|
||||
"=\\u3084|\\u30FC=\\u3089|\\u30FC=\\u308E|\\u30FC"
|
||||
"=\\u308F|\\u30FC"
|
||||
"<<<<\\u30A1|\\u30FC=\\uFF67|\\u30FC=\\u30A2|\\u30FC"
|
||||
"=\\uFF71|\\u30FC=\\u30AB|\\u30FC=\\uFF76|\\u30FC"
|
||||
"=\\u30AC|\\u30FC=\\u30B5|\\u30FC=\\uFF7B|\\u30FC"
|
||||
"=\\u30B6|\\u30FC=\\u30BF|\\u30FC=\\uFF80|\\u30FC"
|
||||
"=\\u30C0|\\u30FC=\\u30CA|\\u30FC=\\uFF85|\\u30FC"
|
||||
"=\\u30CF|\\u30FC=\\uFF8A|\\u30FC=\\u31F5|\\u30FC"
|
||||
"=\\u30D0|\\u30FC=\\u30D1|\\u30FC=\\u30DE|\\u30FC"
|
||||
"=\\uFF8F|\\u30FC=\\u30E3|\\u30FC=\\uFF6C|\\u30FC"
|
||||
"=\\u30E4|\\u30FC=\\uFF94|\\u30FC=\\u30E9|\\u30FC"
|
||||
"=\\uFF97|\\u30FC=\\u31FB|\\u30FC=\\u30EE|\\u30FC"
|
||||
"=\\u30EF|\\u30FC=\\uFF9C|\\u30FC=\\u30F5|\\u30FC"
|
||||
"=\\u30F7|\\u30FC"
|
||||
"&[before 3]\\u3043 <<<\\u3043|\\u30FC=\\u3044|\\u30FC=\\u304D|\\u30FC"
|
||||
"=\\u304E|\\u30FC=\\u3057|\\u30FC=\\u3058|\\u30FC"
|
||||
"=\\u3061|\\u30FC=\\u3062|\\u30FC=\\u306B|\\u30FC"
|
||||
"=\\u3072|\\u30FC=\\u3073|\\u30FC=\\u3074|\\u30FC"
|
||||
"=\\u307F|\\u30FC=\\u308A|\\u30FC=\\u3090|\\u30FC"
|
||||
"<<<<\\u30A3|\\u30FC=\\uFF68|\\u30FC=\\u30A4|\\u30FC"
|
||||
"=\\uFF72|\\u30FC=\\u30AD|\\u30FC=\\uFF77|\\u30FC"
|
||||
"=\\u30AE|\\u30FC=\\u30B7|\\u30FC=\\uFF7C|\\u30FC"
|
||||
"=\\u31F1|\\u30FC=\\u30B8|\\u30FC=\\u30C1|\\u30FC"
|
||||
"=\\uFF81|\\u30FC=\\u30C2|\\u30FC=\\u30CB|\\u30FC"
|
||||
"=\\uFF86|\\u30FC=\\u30D2|\\u30FC=\\uFF8B|\\u30FC"
|
||||
"=\\u31F6|\\u30FC=\\u30D3|\\u30FC=\\u30D4|\\u30FC"
|
||||
"=\\u30DF|\\u30FC=\\uFF90|\\u30FC=\\u30EA|\\u30FC"
|
||||
"=\\uFF98|\\u30FC=\\u31FC|\\u30FC=\\u30F0|\\u30FC"
|
||||
"=\\u30F8|\\u30FC"
|
||||
"&[before 3]\\u3045 <<<\\u3045|\\u30FC=\\u3046|\\u30FC=\\u304F|\\u30FC"
|
||||
"=\\u3050|\\u30FC=\\u3059|\\u30FC=\\u305A|\\u30FC"
|
||||
"=\\u3063|\\u30FC=\\u3064|\\u30FC=\\u3065|\\u30FC"
|
||||
"=\\u306C|\\u30FC=\\u3075|\\u30FC=\\u3076|\\u30FC"
|
||||
"=\\u3077|\\u30FC=\\u3080|\\u30FC=\\u3085|\\u30FC"
|
||||
"=\\u3086|\\u30FC=\\u308B|\\u30FC=\\u3094|\\u30FC"
|
||||
"<<<<\\u30A5|\\u30FC=\\uFF69|\\u30FC=\\u30A6|\\u30FC"
|
||||
"=\\uFF73|\\u30FC=\\u30AF|\\u30FC=\\uFF78|\\u30FC"
|
||||
"=\\u31F0|\\u30FC=\\u30B0|\\u30FC=\\u30B9|\\u30FC"
|
||||
"=\\uFF7D|\\u30FC=\\u31F2|\\u30FC=\\u30BA|\\u30FC"
|
||||
"=\\u30C3|\\u30FC=\\uFF6F|\\u30FC=\\u30C4|\\u30FC"
|
||||
"=\\uFF82|\\u30FC=\\u30C5|\\u30FC=\\u30CC|\\u30FC"
|
||||
"=\\uFF87|\\u30FC=\\u31F4|\\u30FC=\\u30D5|\\u30FC"
|
||||
"=\\uFF8C|\\u30FC=\\u31F7|\\u30FC=\\u30D6|\\u30FC"
|
||||
"=\\u30D7|\\u30FC=\\u30E0|\\u30FC=\\uFF91|\\u30FC"
|
||||
"=\\u31FA|\\u30FC=\\u30E5|\\u30FC=\\uFF6D|\\u30FC"
|
||||
"=\\u30E6|\\u30FC=\\uFF95|\\u30FC=\\u30EB|\\u30FC"
|
||||
"=\\uFF99|\\u30FC=\\u31FD|\\u30FC=\\u30F4|\\u30FC"
|
||||
"&[before 3]\\u3047 <<<\\u3047|\\u30FC=\\u3048|\\u30FC=\\u3051|\\u30FC"
|
||||
"=\\u3096|\\u30FC=\\u3052|\\u30FC=\\u305B|\\u30FC"
|
||||
"=\\u305C|\\u30FC=\\u3066|\\u30FC=\\u3067|\\u30FC"
|
||||
"=\\u306D|\\u30FC=\\u3078|\\u30FC=\\u3079|\\u30FC"
|
||||
"=\\u307A|\\u30FC=\\u3081|\\u30FC=\\u308C|\\u30FC"
|
||||
"=\\u3091|\\u30FC"
|
||||
"<<<<\\u30A7|\\u30FC=\\uFF6A|\\u30FC=\\u30A8|\\u30FC"
|
||||
"=\\uFF74|\\u30FC=\\u30B1|\\u30FC=\\uFF79|\\u30FC"
|
||||
"=\\u30B2|\\u30FC=\\u30BB|\\u30FC=\\uFF7E|\\u30FC"
|
||||
"=\\u30BC|\\u30FC=\\u30C6|\\u30FC=\\uFF83|\\u30FC"
|
||||
"=\\u30C7|\\u30FC=\\u30CD|\\u30FC=\\uFF88|\\u30FC"
|
||||
"=\\u30D8|\\u30FC=\\uFF8D|\\u30FC=\\u31F8|\\u30FC"
|
||||
"=\\u30D9|\\u30FC=\\u30DA|\\u30FC=\\u30E1|\\u30FC"
|
||||
"=\\uFF92|\\u30FC=\\u30EC|\\u30FC=\\uFF9A|\\u30FC"
|
||||
"=\\u31FE|\\u30FC=\\u30F1|\\u30FC=\\u30F6|\\u30FC"
|
||||
"=\\u30F9|\\u30FC"
|
||||
"&[before 3]\\u3049 <<<\\u3049|\\u30FC=\\u304A|\\u30FC=\\u3053|\\u30FC"
|
||||
"=\\u3054|\\u30FC=\\u305D|\\u30FC=\\u305E|\\u30FC"
|
||||
"=\\u3068|\\u30FC=\\u3069|\\u30FC=\\u306E|\\u30FC"
|
||||
"=\\u307B|\\u30FC=\\u307C|\\u30FC=\\u307D|\\u30FC"
|
||||
"=\\u3082|\\u30FC=\\u3087|\\u30FC=\\u3088|\\u30FC"
|
||||
"=\\u308D|\\u30FC=\\u3092|\\u30FC"
|
||||
"<<<<\\u30A9|\\u30FC=\\uFF6B|\\u30FC=\\u30AA|\\u30FC"
|
||||
"=\\uFF75|\\u30FC=\\u30B3|\\u30FC=\\uFF7A|\\u30FC"
|
||||
"=\\u30B4|\\u30FC=\\u30BD|\\u30FC=\\uFF7F|\\u30FC"
|
||||
"=\\u30BE|\\u30FC=\\u30C8|\\u30FC=\\uFF84|\\u30FC"
|
||||
"=\\u31F3|\\u30FC=\\u30C9|\\u30FC=\\u30CE|\\u30FC"
|
||||
"=\\uFF89|\\u30FC=\\u30DB|\\u30FC=\\uFF8E|\\u30FC"
|
||||
"=\\u31F9|\\u30FC=\\u30DC|\\u30FC=\\u30DD|\\u30FC"
|
||||
"=\\u30E2|\\u30FC=\\uFF93|\\u30FC=\\u30E7|\\u30FC"
|
||||
"=\\uFF6E|\\u30FC=\\u30E8|\\u30FC=\\uFF96|\\u30FC"
|
||||
"=\\u30ED|\\u30FC=\\uFF9B|\\u30FC=\\u31FF|\\u30FC"
|
||||
"=\\u30F2|\\u30FC=\\uFF66|\\u30FC=\\u30FA|\\u30FC"
|
||||
"&[before 3]\\u3042 <<<\\u3042|\\u309D=\\u3041|\\u309D"
|
||||
"<<<<\\u30A2|\\u30FD=\\uFF71|\\u30FD=\\u30A1|\\u30FD"
|
||||
"=\\uFF67|\\u30FD"
|
||||
"&[before 3]\\u3044 <<<\\u3044|\\u309D=\\u3043|\\u309D"
|
||||
"<<<<\\u30A4|\\u30FD=\\uFF72|\\u30FD=\\u30A3|\\u30FD"
|
||||
"=\\uFF68|\\u30FD"
|
||||
"&[before 3]\\u3046 <<<\\u3046|\\u309D=\\u3045|\\u309D=\\u3094|\\u309D"
|
||||
"=\\u3046|\\u309E/\\u3099"
|
||||
"=\\u3045|\\u309E/\\u3099"
|
||||
"=\\u3094|\\u309E/\\u3099"
|
||||
"<<<<\\u30A6|\\u30FD=\\uFF73|\\u30FD=\\u30A5|\\u30FD"
|
||||
"=\\uFF69|\\u30FD=\\u30F4|\\u30FD"
|
||||
"=\\u30A6|\\u30FE/\\u3099"
|
||||
"=\\uFF73|\\u30FE/\\u3099"
|
||||
"=\\u30A5|\\u30FE/\\u3099"
|
||||
"=\\uFF69|\\u30FE/\\u3099"
|
||||
"=\\u30F4|\\u30FE/\\u3099"
|
||||
"&[before 3]\\u3048 <<<\\u3048|\\u309D=\\u3047|\\u309D"
|
||||
"<<<<\\u30A8|\\u30FD=\\uFF74|\\u30FD=\\u30A7|\\u30FD"
|
||||
"=\\uFF6A|\\u30FD"
|
||||
"&[before 3]\\u304A <<<\\u304A|\\u309D=\\u3049|\\u309D"
|
||||
"<<<<\\u30AA|\\u30FD=\\uFF75|\\u30FD=\\u30A9|\\u30FD"
|
||||
"=\\uFF6B|\\u30FD"
|
||||
"&[before 3]\\u304B <<<\\u304B|\\u309D=\\u3095|\\u309D"
|
||||
"<<<<\\u30AB|\\u30FD=\\uFF76|\\u30FD=\\u30F5|\\u30FD"
|
||||
"&[before 3]\\u304C <<<\\u304C|\\u309D <<<<\\u30AC|\\u30FD"
|
||||
"&[before 3]\\u304D <<<\\u304D|\\u309D=\\u304E|\\u309D"
|
||||
"=\\u304D|\\u309E/\\u3099"
|
||||
"=\\u304E|\\u309E/\\u3099"
|
||||
"<<<<\\u30AD|\\u30FD=\\uFF77|\\u30FD=\\u30AE|\\u30FD"
|
||||
"=\\u30AD|\\u30FE/\\u3099"
|
||||
"=\\uFF77|\\u30FE/\\u3099"
|
||||
"=\\u30AE|\\u30FE/\\u3099"
|
||||
"&[before 3]\\u304F <<<\\u304F|\\u309D=\\u3050|\\u309D"
|
||||
"=\\u304F|\\u309E/\\u3099"
|
||||
"=\\u3050|\\u309E/\\u3099"
|
||||
"<<<<\\u30AF|\\u30FD=\\uFF78|\\u30FD=\\u31F0|\\u30FD"
|
||||
"=\\u30B0|\\u30FD=\\u30AF|\\u30FE/\\u3099"
|
||||
"=\\uFF78|\\u30FE/\\u3099"
|
||||
"=\\u31F0|\\u30FE/\\u3099"
|
||||
"=\\u30B0|\\u30FE/\\u3099"
|
||||
"&[before 3]\\u3051 <<<\\u3051|\\u309D=\\u3096|\\u309D"
|
||||
"<<<<\\u30B1|\\u30FD=\\uFF79|\\u30FD=\\u30F6|\\u30FD"
|
||||
"&[before 3]\\u3052 <<<\\u3052|\\u309D <<<<\\u30B2|\\u30FD"
|
||||
"&[before 3]\\u3053 <<<\\u3053|\\u309D=\\u3054|\\u309D"
|
||||
"=\\u3053|\\u309E/\\u3099"
|
||||
"=\\u3054|\\u309E/\\u3099"
|
||||
"<<<<\\u30B3|\\u30FD=\\uFF7A|\\u30FD=\\u30B4|\\u30FD"
|
||||
"=\\u30B3|\\u30FE/\\u3099"
|
||||
"=\\uFF7A|\\u30FE/\\u3099"
|
||||
"=\\u30B4|\\u30FE/\\u3099"
|
||||
"&[before 3]\\u3055 <<<\\u3055|\\u309D=\\u3056|\\u309D"
|
||||
"=\\u3055|\\u309E/\\u3099"
|
||||
"=\\u3056|\\u309E/\\u3099"
|
||||
"<<<<\\u30B5|\\u30FD=\\uFF7B|\\u30FD=\\u30B6|\\u30FD"
|
||||
"=\\u30B5|\\u30FE/\\u3099"
|
||||
"=\\uFF7B|\\u30FE/\\u3099"
|
||||
"=\\u30B6|\\u30FE/\\u3099"
|
||||
"&[before 3]\\u3057 <<<\\u3057|\\u309D=\\u3058|\\u309D"
|
||||
"=\\u3057|\\u309E/\\u3099"
|
||||
"=\\u3058|\\u309E/\\u3099"
|
||||
"<<<<\\u30B7|\\u30FD=\\uFF7C|\\u30FD=\\u31F1|\\u30FD"
|
||||
"=\\u30B8|\\u30FD=\\u30B7|\\u30FE/\\u3099"
|
||||
"=\\uFF7C|\\u30FE/\\u3099"
|
||||
"=\\u31F1|\\u30FE/\\u3099"
|
||||
"=\\u30B8|\\u30FE/\\u3099"
|
||||
"&[before 3]\\u3059 <<<\\u3059|\\u309D=\\u305A|\\u309D"
|
||||
"=\\u3059|\\u309E/\\u3099"
|
||||
"=\\u305A|\\u309E/\\u3099"
|
||||
"<<<<\\u30B9|\\u30FD=\\uFF7D|\\u30FD=\\u31F2|\\u30FD"
|
||||
"=\\u30BA|\\u30FD=\\u30B9|\\u30FE/\\u3099"
|
||||
"=\\uFF7D|\\u30FE/\\u3099"
|
||||
"=\\u31F2|\\u30FE/\\u3099"
|
||||
"=\\u30BA|\\u30FE/\\u3099"
|
||||
"&[before 3]\\u305B <<<\\u305B|\\u309D=\\u305C|\\u309D"
|
||||
"=\\u305B|\\u309E/\\u3099"
|
||||
"=\\u305C|\\u309E/\\u3099"
|
||||
"<<<<\\u30BB|\\u30FD=\\uFF7E|\\u30FD=\\u30BC|\\u30FD"
|
||||
"=\\u30BB|\\u30FE/\\u3099"
|
||||
"=\\uFF7E|\\u30FE/\\u3099"
|
||||
"=\\u30BC|\\u30FE/\\u3099"
|
||||
"&[before 3]\\u305D <<<\\u305D|\\u309D=\\u305E|\\u309D"
|
||||
"=\\u305D|\\u309E/\\u3099"
|
||||
"=\\u305E|\\u309E/\\u3099"
|
||||
"<<<<\\u30BD|\\u30FD=\\uFF7F|\\u30FD=\\u30BE|\\u30FD"
|
||||
"=\\u30BD|\\u30FE/\\u3099"
|
||||
"=\\uFF7F|\\u30FE/\\u3099"
|
||||
"=\\u30BE|\\u30FE/\\u3099"
|
||||
"&[before 3]\\u305F <<<\\u305F|\\u309D=\\u3060|\\u309D"
|
||||
"=\\u305F|\\u309E/\\u3099"
|
||||
"=\\u3060|\\u309E/\\u3099"
|
||||
"<<<<\\u30BF|\\u30FD=\\uFF80|\\u30FD=\\u30C0|\\u30FD"
|
||||
"=\\u30BF|\\u30FE/\\u3099"
|
||||
"=\\uFF80|\\u30FE/\\u3099"
|
||||
"=\\u30C0|\\u30FE/\\u3099"
|
||||
"&[before 3]\\u3061 <<<\\u3061|\\u309D=\\u3062|\\u309D"
|
||||
"=\\u3061|\\u309E/\\u3099"
|
||||
"=\\u3062|\\u309E/\\u3099"
|
||||
"<<<<\\u30C1|\\u30FD=\\uFF81|\\u30FD=\\u30C2|\\u30FD"
|
||||
"=\\u30C1|\\u30FE/\\u3099"
|
||||
"=\\uFF81|\\u30FE/\\u3099"
|
||||
"=\\u30C2|\\u30FE/\\u3099"
|
||||
"&[before 3]\\u3064 <<<\\u3064|\\u309D=\\u3063|\\u309D=\\u3065|\\u309D"
|
||||
"=\\u3064|\\u309E/\\u3099"
|
||||
"=\\u3065|\\u309E/\\u3099"
|
||||
"=\\u3064|\\u309D=\\u3063|\\u309E/\\u3099"
|
||||
"=\\u3064|\\u309E/\\u3099"
|
||||
"<<<<\\u30C4|\\u30FD=\\uFF82|\\u30FD=\\u30C3|\\u30FD"
|
||||
"=\\uFF6F|\\u30FD=\\u30C5|\\u30FD"
|
||||
"=\\u30C4|\\u30FE/\\u3099"
|
||||
"=\\uFF82|\\u30FE/\\u3099"
|
||||
"=\\u30C5|\\u30FE/\\u3099=\\u30C4|\\u30FD"
|
||||
"=\\uFF82|\\u30FD=\\u30C3|\\u30FE/\\u3099"
|
||||
"=\\uFF6F|\\u30FE/\\u3099"
|
||||
"=\\u30C4|\\u30FE/\\u3099"
|
||||
"=\\uFF82|\\u30FE/\\u3099"
|
||||
"&[before 3]\\u3066 <<<\\u3066|\\u309D=\\u3067|\\u309D"
|
||||
"=\\u3066|\\u309E/\\u3099"
|
||||
"=\\u3067|\\u309E/\\u3099"
|
||||
"<<<<\\u30C6|\\u30FD=\\uFF83|\\u30FD=\\u30C7|\\u30FD"
|
||||
"=\\u30C6|\\u30FE/\\u3099"
|
||||
"=\\uFF83|\\u30FE/\\u3099"
|
||||
"=\\u30C7|\\u30FE/\\u3099"
|
||||
"&[before 3]\\u3068 <<<\\u3068|\\u309D=\\u3069|\\u309D"
|
||||
"=\\u3068|\\u309E/\\u3099"
|
||||
"=\\u3069|\\u309E/\\u3099"
|
||||
"<<<<\\u30C8|\\u30FD=\\uFF84|\\u30FD=\\u31F3|\\u30FD"
|
||||
"=\\u30C9|\\u30FD=\\u30C8|\\u30FE/\\u3099"
|
||||
"=\\uFF84|\\u30FE/\\u3099"
|
||||
"=\\u31F3|\\u30FE/\\u3099"
|
||||
"=\\u30C9|\\u30FE/\\u3099"
|
||||
"&[before 3]\\u306A <<<\\u306A|\\u309D <<<<\\u30CA|\\u30FD=\\uFF85|\\u30FD"
|
||||
"&[before 3]\\u306B <<<\\u306B|\\u309D <<<<\\u30CB|\\u30FD=\\uFF86|\\u30FD"
|
||||
"&[before 3]\\u306C <<<\\u306C|\\u309D <<<<\\u30CC|\\u30FD=\\uFF87|\\u30FD"
|
||||
"=\\u31F4|\\u30FD"
|
||||
"&[before 3]\\u306D <<<\\u306D|\\u309D <<<<\\u30CD|\\u30FD=\\uFF88|\\u30FD"
|
||||
"&[before 3]\\u306E <<<\\u306E|\\u309D <<<<\\u30CE|\\u30FD=\\uFF89|\\u30FD"
|
||||
"&[before 3]\\u306F <<<\\u306F|\\u309D=\\u3070|\\u309D"
|
||||
"=\\u306F|\\u309E/\\u3099"
|
||||
"=\\u3070|\\u309E/\\u3099"
|
||||
"=\\u3071|\\u309D=\\u3071|\\u309E/\\u3099"
|
||||
"<<<<\\u30CF|\\u30FD=\\uFF8A|\\u30FD=\\u31F5|\\u30FD"
|
||||
"=\\u30D0|\\u30FD=\\u30CF|\\u30FE/\\u3099"
|
||||
"=\\uFF8A|\\u30FE/\\u3099"
|
||||
"=\\u31F5|\\u30FE/\\u3099"
|
||||
"=\\u30D0|\\u30FE/\\u3099=\\u30D1|\\u30FD"
|
||||
"=\\u30D1|\\u30FE/\\u3099"
|
||||
"&[before 3]\\u3072 <<<\\u3072|\\u309D=\\u3073|\\u309D"
|
||||
"=\\u3072|\\u309E/\\u3099"
|
||||
"=\\u3073|\\u309E/\\u3099"
|
||||
"=\\u3074|\\u309D=\\u3074|\\u309E/\\u3099"
|
||||
"<<<<\\u30D2|\\u30FD=\\uFF8B|\\u30FD=\\u31F6|\\u30FD"
|
||||
"=\\u30D3|\\u30FD=\\u30D2|\\u30FE/\\u3099"
|
||||
"=\\uFF8B|\\u30FE/\\u3099"
|
||||
"=\\u31F6|\\u30FE/\\u3099"
|
||||
"=\\u30D3|\\u30FE/\\u3099=\\u30D4|\\u30FD"
|
||||
"=\\u30D4|\\u30FE/\\u3099"
|
||||
"&[before 3]\\u3075 <<<\\u3075|\\u309D=\\u3076|\\u309D"
|
||||
"=\\u3075|\\u309E/\\u3099"
|
||||
"=\\u3076|\\u309E/\\u3099"
|
||||
"=\\u3077|\\u309D=\\u3077|\\u309E/\\u3099"
|
||||
"<<<<\\u30D5|\\u30FD=\\uFF8C|\\u30FD=\\u31F7|\\u30FD"
|
||||
"=\\u30D6|\\u30FD=\\u30D5|\\u30FE/\\u3099"
|
||||
"=\\uFF8C|\\u30FE/\\u3099"
|
||||
"=\\u31F7|\\u30FE/\\u3099"
|
||||
"=\\u30D6|\\u30FE/\\u3099=\\u30D7|\\u30FD"
|
||||
"=\\u30D7|\\u30FE/\\u3099"
|
||||
"&[before 3]\\u3078 <<<\\u3078|\\u309D=\\u3079|\\u309D"
|
||||
"=\\u3078|\\u309E/\\u3099"
|
||||
"=\\u3079|\\u309E/\\u3099"
|
||||
"=\\u307A|\\u309D=\\u307A|\\u309E/\\u3099"
|
||||
"<<<<\\u30D8|\\u30FD=\\uFF8D|\\u30FD=\\u31F8|\\u30FD"
|
||||
"=\\u30D9|\\u30FD=\\u30D8|\\u30FE/\\u3099"
|
||||
"=\\uFF8D|\\u30FE/\\u3099"
|
||||
"=\\u31F8|\\u30FE/\\u3099"
|
||||
"=\\u30D9|\\u30FE/\\u3099=\\u30DA|\\u30FD"
|
||||
"=\\u30DA|\\u30FE/\\u3099"
|
||||
"&[before 3]\\u307B <<<\\u307B|\\u309D=\\u307C|\\u309D"
|
||||
"=\\u307B|\\u309E/\\u3099"
|
||||
"=\\u307C|\\u309E/\\u3099"
|
||||
"=\\u307D|\\u309D=\\u307D|\\u309E/\\u3099"
|
||||
"<<<<\\u30DB|\\u30FD=\\uFF8E|\\u30FD=\\u31F9|\\u30FD"
|
||||
"=\\u30DC|\\u30FD=\\u30DB|\\u30FE/\\u3099"
|
||||
"=\\uFF8E|\\u30FE/\\u3099"
|
||||
"=\\u31F9|\\u30FE/\\u3099"
|
||||
"=\\u30DC|\\u30FE/\\u3099=\\u30DD|\\u30FD"
|
||||
"=\\u30DD|\\u30FE/\\u3099"
|
||||
"&[before 3]\\u307E <<<\\u307E|\\u309D <<<<\\u30DE|\\u30FD=\\uFF8F|\\u30FD"
|
||||
"&[before 3]\\u307F <<<\\u307F|\\u309D <<<<\\u30DF|\\u30FD=\\uFF90|\\u30FD"
|
||||
"&[before 3]\\u3080 <<<\\u3080|\\u309D <<<<\\u30E0|\\u30FD=\\uFF91|\\u30FD"
|
||||
"=\\u31FA|\\u30FD"
|
||||
"&[before 3]\\u3081 <<<\\u3081|\\u309D <<<<\\u30E1|\\u30FD=\\uFF92|\\u30FD"
|
||||
"&[before 3]\\u3082 <<<\\u3082|\\u309D <<<<\\u30E2|\\u30FD=\\uFF93|\\u30FD"
|
||||
"&[before 3]\\u3084 <<<\\u3084|\\u309D=\\u3083|\\u309D <<<<\\u30E4|\\u30FD"
|
||||
"=\\uFF94|\\u30FD=\\u30E3|\\u30FD=\\uFF6C|\\u30FD"
|
||||
"&[before 3]\\u3086 <<<\\u3086|\\u309D=\\u3085|\\u309D <<<<\\u30E6|\\u30FD"
|
||||
"=\\uFF95|\\u30FD=\\u30E5|\\u30FD=\\uFF6D|\\u30FD"
|
||||
"&[before 3]\\u3088 <<<\\u3088|\\u309D=\\u3087|\\u309D <<<<\\u30E8|\\u30FD"
|
||||
"=\\uFF96|\\u30FD=\\u30E7|\\u30FD=\\uFF6E|\\u30FD"
|
||||
"&[before 3]\\u3089 <<<\\u3089|\\u309D <<<<\\u30E9|\\u30FD=\\uFF97|\\u30FD"
|
||||
"=\\u31FB|\\u30FD"
|
||||
"&[before 3]\\u308A <<<\\u308A|\\u309D <<<<\\u30EA|\\u30FD=\\uFF98|\\u30FD"
|
||||
"=\\u31FC|\\u30FD"
|
||||
"&[before 3]\\u308B <<<\\u308B|\\u309D <<<<\\u30EB|\\u30FD=\\uFF99|\\u30FD"
|
||||
"=\\u31FD|\\u30FD"
|
||||
"&[before 3]\\u308C <<<\\u308C|\\u309D <<<<\\u30EC|\\u30FD=\\uFF9A|\\u30FD"
|
||||
"=\\u31FE|\\u30FD"
|
||||
"&[before 3]\\u308D <<<\\u308D|\\u309D <<<<\\u30ED|\\u30FD=\\uFF9B|\\u30FD"
|
||||
"=\\u31FF|\\u30FD"
|
||||
"&[before 3]\\u308F <<<\\u308F|\\u309D=\\u308E|\\u309D"
|
||||
"=\\u308F|\\u309E/\\u3099"
|
||||
"=\\u308E|\\u309E/\\u3099"
|
||||
"<<<<\\u30EF|\\u30FD=\\uFF9C|\\u30FD=\\u30EE|\\u30FD"
|
||||
"=\\u30F7|\\u30FD=\\u30EF|\\u30FE/\\u3099"
|
||||
"=\\uFF9C|\\u30FE/\\u3099"
|
||||
"=\\u30F7|\\u30FE/\\u3099"
|
||||
"=\\u30EE|\\u30FE/\\u3099"
|
||||
"&[before 3]\\u3090 <<<\\u3090|\\u309D=\\u3090|\\u309E/\\u3099"
|
||||
"<<<<\\u30F0|\\u30FD=\\u30F8|\\u30FD"
|
||||
"=\\u30F0|\\u30FE/\\u3099"
|
||||
"=\\u30F8|\\u30FE/\\u3099"
|
||||
"&[before 3]\\u3091 <<<\\u3091|\\u309D=\\u3091|\\u309E/\\u3099"
|
||||
"<<<<\\u30F1|\\u30FD=\\u30F9|\\u30FD"
|
||||
"=\\u30F1|\\u30FE/\\u3099"
|
||||
"=\\u30F9|\\u30FE/\\u3099"
|
||||
"&[before 3]\\u3092 <<<\\u3092|\\u309D=\\u3092|\\u309E/\\u3099"
|
||||
"<<<<\\u30F2|\\u30FD=\\uFF66|\\u30FD=\\u30FA|\\u30FD"
|
||||
"=\\u30F2|\\u30FE/\\u3099"
|
||||
"=\\uFF66|\\u30FE/\\u3099"
|
||||
"=\\u30FA|\\u30FE/\\u3099"
|
||||
"&[before 3]\\u3093 <<<\\u3093|\\u309D <<<<\\u30F3|\\u30FD=\\uFF9D|\\u30FD"
|
||||
"&\\u3041 <<<<\\u30A1=\\uFF67"
|
||||
"&\\u3042 <<<<\\u30A2=\\uFF71"
|
||||
"&\\u3043 <<<<\\u30A3=\\uFF68"
|
||||
"&\\u3044 <<<<\\u30A4=\\uFF72"
|
||||
"&\\u3045 <<<<\\u30A5=\\uFF69"
|
||||
"&\\u3046 <<<<\\u30A6=\\uFF73"
|
||||
"&\\u3047 <<<<\\u30A7=\\uFF6A"
|
||||
"&\\u3048 <<<<\\u30A8=\\uFF74"
|
||||
"&\\u3049 <<<<\\u30A9=\\uFF6B"
|
||||
"&\\u304A <<<<\\u30AA=\\uFF75"
|
||||
"&\\u304B <<<<\\u30AB=\\uFF76"
|
||||
"&\\u304D <<<<\\u30AD=\\uFF77"
|
||||
"&\\u304F <<<<\\u30AF=\\uFF78"
|
||||
"&\\u3051 <<<<\\u30B1=\\uFF79"
|
||||
"&\\u3053 <<<<\\u30B3=\\uFF7A"
|
||||
"&\\u3055 <<<<\\u30B5=\\uFF7B"
|
||||
"&\\u3057 <<<<\\u30B7=\\uFF7C"
|
||||
"&\\u3059 <<<<\\u30B9=\\uFF7D"
|
||||
"&\\u305B <<<<\\u30BB=\\uFF7E"
|
||||
"&\\u305D <<<<\\u30BD=\\uFF7F"
|
||||
"&\\u305F <<<<\\u30BF=\\uFF80"
|
||||
"&\\u3061 <<<<\\u30C1=\\uFF81"
|
||||
"&\\u3063 <<<<\\u30C3=\\uFF6F"
|
||||
"&\\u3064 <<<<\\u30C4=\\uFF82"
|
||||
"&\\u3066 <<<<\\u30C6=\\uFF83"
|
||||
"&\\u3068 <<<<\\u30C8=\\uFF84"
|
||||
"&\\u306A <<<<\\u30CA=\\uFF85"
|
||||
"&\\u306B <<<<\\u30CB=\\uFF86"
|
||||
"&\\u306C <<<<\\u30CC=\\uFF87"
|
||||
"&\\u306D <<<<\\u30CD=\\uFF88"
|
||||
"&\\u306E <<<<\\u30CE=\\uFF89"
|
||||
"&\\u306F <<<<\\u30CF=\\uFF8A"
|
||||
"&\\u3072 <<<<\\u30D2=\\uFF8B"
|
||||
"&\\u3075 <<<<\\u30D5=\\uFF8C"
|
||||
"&\\u3078 <<<<\\u30D8=\\uFF8D"
|
||||
"&\\u307B <<<<\\u30DB=\\uFF8E"
|
||||
"&\\u307E <<<<\\u30DE=\\uFF8F"
|
||||
"&\\u307F <<<<\\u30DF=\\uFF90"
|
||||
"&\\u3080 <<<<\\u30E0=\\uFF91"
|
||||
"&\\u3081 <<<<\\u30E1=\\uFF92"
|
||||
"&\\u3082 <<<<\\u30E2=\\uFF93"
|
||||
"&\\u3083 <<<<\\u30E3=\\uFF6C"
|
||||
"&\\u3084 <<<<\\u30E4=\\uFF94"
|
||||
"&\\u3085 <<<<\\u30E5=\\uFF6D"
|
||||
"&\\u3086 <<<<\\u30E6=\\uFF95"
|
||||
"&\\u3087 <<<<\\u30E7=\\uFF6E"
|
||||
"&\\u3088 <<<<\\u30E8=\\uFF96"
|
||||
"&\\u3089 <<<<\\u30E9=\\uFF97"
|
||||
"&\\u308A <<<<\\u30EA=\\uFF98"
|
||||
"&\\u308B <<<<\\u30EB=\\uFF99"
|
||||
"&\\u308C <<<<\\u30EC=\\uFF9A"
|
||||
"&\\u308D <<<<\\u30ED=\\uFF9B"
|
||||
"&\\u308E <<<<\\u30EE"
|
||||
"&\\u308F <<<<\\u30EF=\\uFF9C"
|
||||
"&\\u3090 <<<<\\u30F0"
|
||||
"&\\u3091 <<<<\\u30F1"
|
||||
"&\\u3092 <<<<\\u30F2=\\uFF66"
|
||||
"&\\u3093 <<<<\\u30F3=\\uFF9D"
|
||||
"&\\u3095 <<<<\\u30F5"
|
||||
"&\\u3096 <<<<\\u30F6"
|
||||
"&\\u3088\\u308A <<\\u309F"
|
||||
"&\\u30B3\\u30C8 <<\\u30FF"
|
||||
"&\\u0020=\\u3000=\\uFFE3"
|
||||
"&\\u0021=\\uFF01"
|
||||
"&\\u0022=\\uFF02"
|
||||
"&\\u0023=\\uFF03"
|
||||
"&\\u0024=\\uFF04"
|
||||
"&\\u0025=\\uFF05"
|
||||
"&\\u0026=\\uFF06"
|
||||
"&\\u0027=\\uFF07"
|
||||
"&\\u0028=\\uFF08"
|
||||
"&\\u0029=\\uFF09"
|
||||
"&\\u002A=\\uFF0A"
|
||||
"&\\u002B=\\uFF0B"
|
||||
"&\\u002C=\\uFF0C"
|
||||
"&\\u002D=\\uFF0D"
|
||||
"&\\u002E=\\uFF0E"
|
||||
"&\\u002F=\\uFF0F"
|
||||
"&0=\\uFF10"
|
||||
"&1=\\uFF11"
|
||||
"&2=\\uFF12"
|
||||
"&3=\\uFF13"
|
||||
"&4=\\uFF14"
|
||||
"&5=\\uFF15"
|
||||
"&6=\\uFF16"
|
||||
"&7=\\uFF17"
|
||||
"&8=\\uFF18"
|
||||
"&9=\\uFF19"
|
||||
"&\\u003A=\\uFF1A"
|
||||
"&\\u003B=\\uFF1B"
|
||||
"&\\u003C=\\uFF1C"
|
||||
"&\\u003D=\\uFF1D"
|
||||
"&\\u003E=\\uFF1E"
|
||||
"&\\u003F=\\uFF1F"
|
||||
"&\\u0040=\\uFF20"
|
||||
"&A=\\uFF21"
|
||||
"&B=\\uFF22"
|
||||
"&C=\\uFF23"
|
||||
"&D=\\uFF24"
|
||||
"&E=\\uFF25"
|
||||
"&F=\\uFF26"
|
||||
"&G=\\uFF27"
|
||||
"&H=\\uFF28"
|
||||
"&I=\\uFF29"
|
||||
"&J=\\uFF2A"
|
||||
"&K=\\uFF2B"
|
||||
"&L=\\uFF2C"
|
||||
"&M=\\uFF2D"
|
||||
"&N=\\uFF2E"
|
||||
"&O=\\uFF2F"
|
||||
"&P=\\uFF30"
|
||||
"&Q=\\uFF31"
|
||||
"&R=\\uFF32"
|
||||
"&S=\\uFF33"
|
||||
"&T=\\uFF34"
|
||||
"&U=\\uFF35"
|
||||
"&V=\\uFF36"
|
||||
"&W=\\uFF37"
|
||||
"&X=\\uFF38"
|
||||
"&Y=\\uFF39"
|
||||
"&Z=\\uFF3A"
|
||||
"&\\u005B=\\uFF3B"
|
||||
"&\\u005C=\\uFF3C "
|
||||
"&\\u005D=\\uFF3D"
|
||||
"&\\u005E=\\uFF3E"
|
||||
"&\\u005F=\\uFF3F"
|
||||
"&\\u0060=\\uFF40"
|
||||
"&a=\\uFF41"
|
||||
"&b=\\uFF42"
|
||||
"&c=\\uFF43"
|
||||
"&d=\\uFF44"
|
||||
"&e=\\uFF45"
|
||||
"&f=\\uFF46"
|
||||
"&g=\\uFF47"
|
||||
"&h=\\uFF48"
|
||||
"&i=\\uFF49"
|
||||
"&j=\\uFF4A"
|
||||
"&k=\\uFF4B"
|
||||
"&l=\\uFF4C"
|
||||
"&m=\\uFF4D"
|
||||
"&n=\\uFF4E"
|
||||
"&o=\\uFF4F"
|
||||
"&p=\\uFF50"
|
||||
"&q=\\uFF51"
|
||||
"&r=\\uFF52"
|
||||
"&s=\\uFF53"
|
||||
"&t=\\uFF54"
|
||||
"&u=\\uFF55"
|
||||
"&v=\\uFF56"
|
||||
"&w=\\uFF57"
|
||||
"&x=\\uFF58"
|
||||
"&y=\\uFF59"
|
||||
"&z=\\uFF5A"
|
||||
"&\\u007B=\\uFF5B"
|
||||
"&\\u007C=\\uFF5C"
|
||||
"&\\u007D=\\uFF5D"
|
||||
"&\\u007E=\\uFF5E"
|
||||
"&\\u00A2=\\uFFE0"
|
||||
"&\\u00A3=\\uFFE1"
|
||||
"&\\u00A5=\\uFFE5"
|
||||
"&\\u00A6=\\uFFE4"
|
||||
"&\\u00AC=\\uFFE2"
|
||||
"&\\u1100=\\uFFA1=\\u3131"
|
||||
"&\\u1101=\\uFFA2=\\u3132"
|
||||
"&\\u1102=\\uFFA4=\\u3134"
|
||||
"&\\u1103=\\uFFA7=\\u3137"
|
||||
"&\\u1104=\\uFFA8=\\u3138"
|
||||
"&\\u1105=\\uFFA9=\\u3139"
|
||||
"&\\u1106=\\uFFB1=\\u3141"
|
||||
"&\\u1107=\\uFFB2=\\u3142"
|
||||
"&\\u1108=\\uFFB3=\\u3143"
|
||||
"&\\u1109=\\uFFB5=\\u3145"
|
||||
"&\\u110A=\\uFFB6=\\u3146"
|
||||
"&\\u110B=\\uFFB7=\\u3147"
|
||||
"&\\u110C=\\uFFB8=\\u3148"
|
||||
"&\\u110D=\\uFFB9=\\u3149"
|
||||
"&\\u110E=\\uFFBA=\\u314A"
|
||||
"&\\u110F=\\uFFBB=\\u314B"
|
||||
"&\\u1110=\\uFFBC=\\u314C"
|
||||
"&\\u1111=\\uFFBD=\\u314D"
|
||||
"&\\u1112=\\uFFBE=\\u314E"
|
||||
"&\\u111A=\\uFFB0=\\u3140"
|
||||
"&\\u1121=\\uFFB4=\\u3144"
|
||||
"&\\u1160=\\uFFA0=\\u3164"
|
||||
"&\\u1161=\\uFFC2=\\u314F"
|
||||
"&\\u1162=\\uFFC3=\\u3150"
|
||||
"&\\u1163=\\uFFC4=\\u3151"
|
||||
"&\\u1164=\\uFFC5=\\u3152"
|
||||
"&\\u1165=\\uFFC6=\\u3153"
|
||||
"&\\u1166=\\uFFC7=\\u3154"
|
||||
"&\\u1167=\\uFFCA=\\u3155"
|
||||
"&\\u1168=\\uFFCB=\\u3156"
|
||||
"&\\u1169=\\uFFCC=\\u3157"
|
||||
"&\\u116A=\\uFFCD=\\u3158"
|
||||
"&\\u116B=\\uFFCE=\\u3159"
|
||||
"&\\u116C=\\uFFCF=\\u315A"
|
||||
"&\\u116D=\\uFFD2=\\u315B"
|
||||
"&\\u116E=\\uFFD3=\\u315C"
|
||||
"&\\u116F=\\uFFD4=\\u315D"
|
||||
"&\\u1170=\\uFFD5=\\u315E"
|
||||
"&\\u1171=\\uFFD6=\\u315F"
|
||||
"&\\u1172=\\uFFD7=\\u3160"
|
||||
"&\\u1173=\\uFFDA=\\u3161"
|
||||
"&\\u1174=\\uFFDB=\\u3162"
|
||||
"&\\u1175=\\uFFDC=\\u3163"
|
||||
"&\\u11AA=\\uFFA3=\\u3133"
|
||||
"&\\u11AC=\\uFFA5=\\u3135"
|
||||
"&\\u11AD=\\uFFA6=\\u3136"
|
||||
"&\\u11B0=\\uFFAA=\\u313A"
|
||||
"&\\u11B1=\\uFFAB=\\u313B"
|
||||
"&\\u11B2=\\uFFAC=\\u313C"
|
||||
"&\\u11B3=\\uFFAD=\\u313D"
|
||||
"&\\u11B4=\\uFFAE=\\u313E"
|
||||
"&\\u11B5=\\uFFAF=\\u313F"
|
||||
"&\\u20A9=\\uFFE6"
|
||||
"&\\u2190=\\uFFE9"
|
||||
"&\\u2191=\\uFFEA"
|
||||
"&\\u2192=\\uFFEB"
|
||||
"&\\u2193=\\uFFEC"
|
||||
"&\\u2502=\\uFFE8"
|
||||
"&\\u25A0=\\uFFED"
|
||||
"&\\u25CB=\\uFFEE"
|
||||
"&\\u3001=\\uFF64"
|
||||
"&\\u3002=\\uFF61"
|
||||
"&\\u300C=\\uFF62"
|
||||
"&\\u300D=\\uFF63";
|
||||
|
||||
/*
|
||||
Below variables are defined in separate .cc file, generated by uca9dump at
|
||||
build-time for the Japanese collations.
|
||||
*/
|
||||
extern uint16 *ja_han_pages[];
|
||||
extern const int MIN_JA_HAN_PAGE;
|
||||
extern const int MAX_JA_HAN_PAGE;
|
||||
#endif
|
128611
deps/oblib/src/lib/charset/uca900_ja_tbls.cc
vendored
Normal file
128611
deps/oblib/src/lib/charset/uca900_ja_tbls.cc
vendored
Normal file
File diff suppressed because it is too large
Load Diff
639181
deps/oblib/src/lib/charset/uca900_zh2_tbls.cc
vendored
Normal file
639181
deps/oblib/src/lib/charset/uca900_zh2_tbls.cc
vendored
Normal file
File diff suppressed because it is too large
Load Diff
669300
deps/oblib/src/lib/charset/uca900_zh3_tbls.cc
vendored
Normal file
669300
deps/oblib/src/lib/charset/uca900_zh3_tbls.cc
vendored
Normal file
File diff suppressed because it is too large
Load Diff
918
deps/oblib/src/lib/charset/uca900_zh_data.h
vendored
Normal file
918
deps/oblib/src/lib/charset/uca900_zh_data.h
vendored
Normal file
@ -0,0 +1,918 @@
|
||||
/**
|
||||
* Copyright (c) 2021 OceanBase
|
||||
* OceanBase CE is licensed under Mulan PubL v2.
|
||||
* You can use this software according to the terms and conditions of the Mulan PubL v2.
|
||||
* You may obtain a copy of Mulan PubL v2 at:
|
||||
* http://license.coscl.org.cn/MulanPubL-2.0
|
||||
* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND,
|
||||
* EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
|
||||
* MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
|
||||
* See the Mulan PubL v2 for more details.
|
||||
*/
|
||||
#ifndef UCA900_ZH_DATA_H
|
||||
#define UCA900_ZH_DATA_H
|
||||
|
||||
/*
|
||||
For collation which changes character's primary weight according to its
|
||||
tailoring rule, we give this character a extra collation element (see
|
||||
comments in my_char_weight_put_900()). Usually the primary weight of this
|
||||
extra CE starts from 0x54A4, which is the biggest primary weight of all
|
||||
regular characters (non-CJK and non-ignorable) in DUCET. But Chinese is
|
||||
special, because to keep assigning single primary weight to character
|
||||
groups like Latin, Cyrillic, etc, we used all weight value in [0x1C47,
|
||||
0xF643], so we give the primary weight of extra CE starting from 0xF644
|
||||
to avoid weight overlapping.
|
||||
*/
|
||||
constexpr int ZH_EXTRA_CE_PRI = 0xF644;
|
||||
constexpr int ZH2_EXTRA_CE_PRI = 0x94AF;
|
||||
constexpr int ZH3_EXTRA_CE_PRI = 0x550D;
|
||||
|
||||
static const char zh_cldr_30[] =
|
||||
"&[before 2]a<<\\u0101<<<\\u0100<<\\u00E1<<<\\u00C1<<\\u01CE<<<\\u01CD"
|
||||
"<<\\u00E0<<<\\u00C0"
|
||||
"&[before 2]e<<\\u0113<<<\\u0112<<\\u00E9<<<\\u00C9<<\\u011B<<<\\u011A"
|
||||
"<<\\u00E8<<<\\u00C8"
|
||||
"&e<<e\\u0302\\u0304<<<E\\u0302\\u0304<<e\\u0302\\u0301<<<E\\u0302\\u0301"
|
||||
"<<e\\u0302\\u030C<<<E\\u0302\\u030C<<e\\u0302\\u0300<<<E\\u0302\\u0300"
|
||||
"&[before 2]i<<\\u012B<<<\\u012A<<\\u00ED<<<\\u00CD<<\\u01D0<<<\\u01CF"
|
||||
"<<\\u00EC<<<\\u00CC"
|
||||
"&[before 2]m<<m\\u0304<<<M\\u0304<<\\u1E3F<<<\\u1E3E<<m\\u030C"
|
||||
"<<<M\\u030C<<m\\u0300<<<M\\u0300"
|
||||
"&[before 2]n<<n\\u0304<<<N\\u0304<<\\u0144<<<\\u0143<<\\u0148<<<\\u0147"
|
||||
"<<\\u01F9<<<\\u01F8"
|
||||
"&[before 2]o<<\\u014D<<<\\u014C<<\\u00F3<<<\\u00D3<<\\u01D2<<<\\u01D1"
|
||||
"<<\\u00F2<<<\\u00D2"
|
||||
"&[before 2]u<<\\u016B<<<\\u016A<<\\u00FA<<<\\u00DA<<\\u01D4<<<\\u01D3"
|
||||
"<<\\u00F9<<<\\u00D9"
|
||||
"&U<<\\u01D6<<<\\u01D5<<\\u01D8<<<\\u01D7<<\\u01DA<<<\\u01D9<<\\u01DC"
|
||||
"<<<\\u01DB<<\\u00FC<<<\\u00DC"
|
||||
"&(\\u4E00)<<<\\u3220"
|
||||
"&(\\u4E03)<<<\\u3226"
|
||||
"&(\\u4E09)<<<\\u3222"
|
||||
"&(\\u4E5D)<<<\\u3228"
|
||||
"&(\\u4E8C)<<<\\u3221"
|
||||
"&(\\u4E94)<<<\\u3224"
|
||||
"&(\\u4EE3)<<<\\u3239"
|
||||
"&(\\u4F01)<<<\\u323D"
|
||||
"&(\\u4F11)<<<\\u3241"
|
||||
"&(\\u516B)<<<\\u3227"
|
||||
"&(\\u516D)<<<\\u3225"
|
||||
"&(\\u52B4)<<<\\u3238"
|
||||
"&(\\u5341)<<<\\u3229"
|
||||
"&(\\u5354)<<<\\u323F"
|
||||
"&(\\u540D)<<<\\u3234"
|
||||
"&(\\u547C)<<<\\u323A"
|
||||
"&(\\u56DB)<<<\\u3223"
|
||||
"&(\\u571F)<<<\\u322F"
|
||||
"&(\\u5B66)<<<\\u323B"
|
||||
"&(\\u65E5)<<<\\u3230"
|
||||
"&(\\u6708)<<<\\u322A"
|
||||
"&(\\u6709)<<<\\u3232"
|
||||
"&(\\u6728)<<<\\u322D"
|
||||
"&(\\u682A)<<<\\u3231"
|
||||
"&(\\u6C34)<<<\\u322C"
|
||||
"&(\\u706B)<<<\\u322B"
|
||||
"&(\\u7279)<<<\\u3235"
|
||||
"&(\\u76E3)<<<\\u323C"
|
||||
"&(\\u793E)<<<\\u3233"
|
||||
"&(\\u795D)<<<\\u3237"
|
||||
"&(\\u796D)<<<\\u3240"
|
||||
"&(\\u81EA)<<<\\u3242"
|
||||
"&(\\u81F3)<<<\\u3243"
|
||||
"&(\\u8CA1)<<<\\u3236"
|
||||
"&(\\u8CC7)<<<\\u323E"
|
||||
"&(\\u91D1)<<<\\u322E"
|
||||
"&0\\u70B9<<<\\u3358"
|
||||
"&10\\u65E5<<<\\u33E9"
|
||||
"&10\\u6708<<<\\u32C9"
|
||||
"&10\\u70B9<<<\\u3362"
|
||||
"&11\\u65E5<<<\\u33EA"
|
||||
"&11\\u6708<<<\\u32CA"
|
||||
"&11\\u70B9<<<\\u3363"
|
||||
"&12\\u65E5<<<\\u33EB"
|
||||
"&12\\u6708<<<\\u32CB"
|
||||
"&12\\u70B9<<<\\u3364"
|
||||
"&13\\u65E5<<<\\u33EC"
|
||||
"&13\\u70B9<<<\\u3365"
|
||||
"&14\\u65E5<<<\\u33ED"
|
||||
"&14\\u70B9<<<\\u3366"
|
||||
"&15\\u65E5<<<\\u33EE"
|
||||
"&15\\u70B9<<<\\u3367"
|
||||
"&16\\u65E5<<<\\u33EF"
|
||||
"&16\\u70B9<<<\\u3368"
|
||||
"&17\\u65E5<<<\\u33F0"
|
||||
"&17\\u70B9<<<\\u3369"
|
||||
"&18\\u65E5<<<\\u33F1"
|
||||
"&18\\u70B9<<<\\u336A"
|
||||
"&19\\u65E5<<<\\u33F2"
|
||||
"&19\\u70B9<<<\\u336B"
|
||||
"&1\\u65E5<<<\\u33E0"
|
||||
"&1\\u6708<<<\\u32C0"
|
||||
"&1\\u70B9<<<\\u3359"
|
||||
"&20\\u65E5<<<\\u33F3"
|
||||
"&20\\u70B9<<<\\u336C"
|
||||
"&21\\u65E5<<<\\u33F4"
|
||||
"&21\\u70B9<<<\\u336D"
|
||||
"&22\\u65E5<<<\\u33F5"
|
||||
"&22\\u70B9<<<\\u336E"
|
||||
"&23\\u65E5<<<\\u33F6"
|
||||
"&23\\u70B9<<<\\u336F"
|
||||
"&24\\u65E5<<<\\u33F7"
|
||||
"&24\\u70B9<<<\\u3370"
|
||||
"&25\\u65E5<<<\\u33F8"
|
||||
"&26\\u65E5<<<\\u33F9"
|
||||
"&27\\u65E5<<<\\u33FA"
|
||||
"&28\\u65E5<<<\\u33FB"
|
||||
"&29\\u65E5<<<\\u33FC"
|
||||
"&2\\u65E5<<<\\u33E1"
|
||||
"&2\\u6708<<<\\u32C1"
|
||||
"&2\\u70B9<<<\\u335A"
|
||||
"&30\\u65E5<<<\\u33FD"
|
||||
"&31\\u65E5<<<\\u33FE"
|
||||
"&3\\u65E5<<<\\u33E2"
|
||||
"&3\\u6708<<<\\u32C2"
|
||||
"&3\\u70B9<<<\\u335B"
|
||||
"&4\\u65E5<<<\\u33E3"
|
||||
"&4\\u6708<<<\\u32C3"
|
||||
"&4\\u70B9<<<\\u335C"
|
||||
"&5\\u65E5<<<\\u33E4"
|
||||
"&5\\u6708<<<\\u32C4"
|
||||
"&5\\u70B9<<<\\u335D"
|
||||
"&6\\u65E5<<<\\u33E5"
|
||||
"&6\\u6708<<<\\u32C5"
|
||||
"&6\\u70B9<<<\\u335E"
|
||||
"&7\\u65E5<<<\\u33E6"
|
||||
"&7\\u6708<<<\\u32C6"
|
||||
"&7\\u70B9<<<\\u335F"
|
||||
"&8\\u65E5<<<\\u33E7"
|
||||
"&8\\u6708<<<\\u32C7"
|
||||
"&8\\u70B9<<<\\u3360"
|
||||
"&9\\u65E5<<<\\u33E8"
|
||||
"&9\\u6708<<<\\u32C8"
|
||||
"&9\\u70B9<<<\\u3361"
|
||||
"&\\u3014\\u4E09\\u3015<<<\\u01F241"
|
||||
"&\\u3014\\u4E8C\\u3015<<<\\u01F242"
|
||||
"&\\u3014\\u52DD\\u3015<<<\\u01F247"
|
||||
"&\\u3014\\u5B89\\u3015<<<\\u01F243"
|
||||
"&\\u3014\\u6253\\u3015<<<\\u01F245"
|
||||
"&\\u3014\\u6557\\u3015<<<\\u01F248"
|
||||
"&\\u3014\\u672C\\u3015<<<\\u01F240"
|
||||
"&\\u3014\\u70B9\\u3015<<<\\u01F244"
|
||||
"&\\u3014\\u76D7\\u3015<<<\\u01F246"
|
||||
"&\\u4E00<<<\\u2F00<<<\\u3192<<<\\u3280<<<\\u01F229"
|
||||
"&\\u4E01<<<\\u319C"
|
||||
"&\\u4E03<<<\\u3286"
|
||||
"&\\u4E09<<<\\u3194<<<\\u3282<<<\\u01F22A"
|
||||
"&\\u4E0A<<<\\u3196<<<\\u32A4"
|
||||
"&\\u4E0B<<<\\u3198<<<\\u32A6"
|
||||
"&\\u4E19<<<\\u319B"
|
||||
"&\\u4E28<<<\\u2F01"
|
||||
"&\\u4E2D<<<\\u3197<<<\\u32A5<<<\\u01F22D"
|
||||
"&\\u4E36<<<\\u2F02"
|
||||
"&\\u4E3F<<<\\u2F03"
|
||||
"&\\u4E59<<<\\u2F04<<<\\u319A"
|
||||
"&\\u4E5D<<<\\u3288"
|
||||
"&\\u4E85<<<\\u2F05"
|
||||
"&\\u4E8C<<<\\u2F06<<<\\u3193<<<\\u3281<<<\\u01F214"
|
||||
"&\\u4E94<<<\\u3284"
|
||||
"&\\u4EA0<<<\\u2F07"
|
||||
"&\\u4EA4<<<\\u01F218"
|
||||
"&\\u4EBA<<<\\u2F08<<<\\u319F"
|
||||
"&\\u4F01<<<\\u32AD"
|
||||
"&\\u4F11<<<\\u32A1"
|
||||
"&\\u512A<<<\\u329D"
|
||||
"&\\u513F<<<\\u2F09"
|
||||
"&\\u5165<<<\\u2F0A"
|
||||
"&\\u516B<<<\\u2F0B<<<\\u3287"
|
||||
"&\\u516D<<<\\u3285"
|
||||
"&\\u5182<<<\\u2F0C"
|
||||
"&\\u518D<<<\\u01F21E"
|
||||
"&\\u5196<<<\\u2F0D"
|
||||
"&\\u5199<<<\\u32A2"
|
||||
"&\\u51AB<<<\\u2F0E"
|
||||
"&\\u51E0<<<\\u2F0F"
|
||||
"&\\u51F5<<<\\u2F10"
|
||||
"&\\u5200<<<\\u2F11"
|
||||
"&\\u521D<<<\\u01F220"
|
||||
"&\\u524D<<<\\u01F21C"
|
||||
"&\\u5272<<<\\u01F239"
|
||||
"&\\u529B<<<\\u2F12"
|
||||
"&\\u52B4<<<\\u3298"
|
||||
"&\\u52F9<<<\\u2F13"
|
||||
"&\\u5315<<<\\u2F14"
|
||||
"&\\u531A<<<\\u2F15"
|
||||
"&\\u5338<<<\\u2F16<<<\\u32A9"
|
||||
"&\\u5341<<<\\u2F17<<<\\u3038<<<\\u3289"
|
||||
"&\\u5344<<<\\u3039"
|
||||
"&\\u5345<<<\\u303A"
|
||||
"&\\u5354<<<\\u32AF"
|
||||
"&\\u535C<<<\\u2F18"
|
||||
"&\\u5369<<<\\u2F19"
|
||||
"&\\u5370<<<\\u329E"
|
||||
"&\\u5382<<<\\u2F1A"
|
||||
"&\\u53B6<<<\\u2F1B"
|
||||
"&\\u53C8<<<\\u2F1C"
|
||||
"&\\u53CC<<<\\u01F212"
|
||||
"&\\u53E3<<<\\u2F1D"
|
||||
"&\\u53EF<<<\\u01F251"
|
||||
"&\\u53F3<<<\\u32A8<<<\\u01F22E"
|
||||
"&\\u5408<<<\\u01F234"
|
||||
"&\\u540D<<<\\u3294"
|
||||
"&\\u5439<<<\\u01F225"
|
||||
"&\\u554F<<<\\u3244"
|
||||
"&\\u55B6<<<\\u01F23A"
|
||||
"&\\u56D7<<<\\u2F1E"
|
||||
"&\\u56DB<<<\\u3195<<<\\u3283"
|
||||
"&\\u571F<<<\\u2F1F<<<\\u328F"
|
||||
"&\\u5730<<<\\u319E"
|
||||
"&\\u58EB<<<\\u2F20"
|
||||
"&\\u58F0<<<\\u01F224"
|
||||
"&\\u5902<<<\\u2F21"
|
||||
"&\\u590A<<<\\u2F22"
|
||||
"&\\u5915<<<\\u2F23"
|
||||
"&\\u591A<<<\\u01F215"
|
||||
"&\\u591C<<<\\u32B0"
|
||||
"&\\u5927<<<\\u2F24"
|
||||
"&\\u5927\\u6B63<<<\\u337D"
|
||||
"&\\u5929<<<\\u319D<<<\\u01F217"
|
||||
"&\\u5973<<<\\u2F25<<<\\u329B"
|
||||
"&\\u5B50<<<\\u2F26"
|
||||
"&\\u5B57<<<\\u01F211"
|
||||
"&\\u5B66<<<\\u32AB"
|
||||
"&\\u5B80<<<\\u2F27"
|
||||
"&\\u5B97<<<\\u32AA"
|
||||
"&\\u5BF8<<<\\u2F28"
|
||||
"&\\u5C0F<<<\\u2F29"
|
||||
"&\\u5C22<<<\\u2F2A"
|
||||
"&\\u5C38<<<\\u2F2B"
|
||||
"&\\u5C6E<<<\\u2F2C"
|
||||
"&\\u5C71<<<\\u2F2D"
|
||||
"&\\u5DDB<<<\\u2F2E"
|
||||
"&\\u5DE5<<<\\u2F2F"
|
||||
"&\\u5DE6<<<\\u32A7<<<\\u01F22C"
|
||||
"&\\u5DF1<<<\\u2F30"
|
||||
"&\\u5DFE<<<\\u2F31"
|
||||
"&\\u5E72<<<\\u2F32"
|
||||
"&\\u5E73\\u6210<<<\\u337B"
|
||||
"&\\u5E7A<<<\\u2F33"
|
||||
"&\\u5E7C<<<\\u3245"
|
||||
"&\\u5E7F<<<\\u2F34"
|
||||
"&\\u5EF4<<<\\u2F35"
|
||||
"&\\u5EFE<<<\\u2F36"
|
||||
"&\\u5F0B<<<\\u2F37"
|
||||
"&\\u5F13<<<\\u2F38"
|
||||
"&\\u5F50<<<\\u2F39"
|
||||
"&\\u5F61<<<\\u2F3A"
|
||||
"&\\u5F73<<<\\u2F3B"
|
||||
"&\\u5F8C<<<\\u01F21D"
|
||||
"&\\u5F97<<<\\u01F250"
|
||||
"&\\u5FC3<<<\\u2F3C"
|
||||
"&\\u6208<<<\\u2F3D"
|
||||
"&\\u6236<<<\\u2F3E"
|
||||
"&\\u624B<<<\\u2F3F<<<\\u01F210"
|
||||
"&\\u6253<<<\\u01F231"
|
||||
"&\\u6295<<<\\u01F227"
|
||||
"&\\u6307<<<\\u01F22F"
|
||||
"&\\u6355<<<\\u01F228"
|
||||
"&\\u652F<<<\\u2F40"
|
||||
"&\\u6534<<<\\u2F41"
|
||||
"&\\u6587<<<\\u2F42<<<\\u3246"
|
||||
"&\\u6597<<<\\u2F43"
|
||||
"&\\u6599<<<\\u01F21B"
|
||||
"&\\u65A4<<<\\u2F44"
|
||||
"&\\u65B0<<<\\u01F21F"
|
||||
"&\\u65B9<<<\\u2F45"
|
||||
"&\\u65E0<<<\\u2F46"
|
||||
"&\\u65E5<<<\\u2F47<<<\\u3290"
|
||||
"&\\u660E\\u6CBB<<<\\u337E"
|
||||
"&\\u6620<<<\\u01F219"
|
||||
"&\\u662D\\u548C<<<\\u337C"
|
||||
"&\\u66F0<<<\\u2F48"
|
||||
"&\\u6708<<<\\u2F49<<<\\u328A<<<\\u01F237"
|
||||
"&\\u6709<<<\\u3292<<<\\u01F236"
|
||||
"&\\u6728<<<\\u2F4A<<<\\u328D"
|
||||
"&\\u682A<<<\\u3291"
|
||||
"&\\u682A\\u5F0F\\u4F1A\\u793E<<<\\u337F"
|
||||
"&\\u6B20<<<\\u2F4B"
|
||||
"&\\u6B62<<<\\u2F4C"
|
||||
"&\\u6B63<<<\\u32A3"
|
||||
"&\\u6B79<<<\\u2F4D"
|
||||
"&\\u6BB3<<<\\u2F4E"
|
||||
"&\\u6BCB<<<\\u2F4F"
|
||||
"&\\u6BCD<<<\\u2E9F"
|
||||
"&\\u6BD4<<<\\u2F50"
|
||||
"&\\u6BDB<<<\\u2F51"
|
||||
"&\\u6C0F<<<\\u2F52"
|
||||
"&\\u6C14<<<\\u2F53"
|
||||
"&\\u6C34<<<\\u2F54<<<\\u328C"
|
||||
"&\\u6CE8<<<\\u329F"
|
||||
"&\\u6E80<<<\\u01F235"
|
||||
"&\\u6F14<<<\\u01F226"
|
||||
"&\\u706B<<<\\u2F55<<<\\u328B"
|
||||
"&\\u7121<<<\\u01F21A"
|
||||
"&\\u722A<<<\\u2F56"
|
||||
"&\\u7236<<<\\u2F57"
|
||||
"&\\u723B<<<\\u2F58"
|
||||
"&\\u723F<<<\\u2F59"
|
||||
"&\\u7247<<<\\u2F5A"
|
||||
"&\\u7259<<<\\u2F5B"
|
||||
"&\\u725B<<<\\u2F5C"
|
||||
"&\\u7279<<<\\u3295"
|
||||
"&\\u72AC<<<\\u2F5D"
|
||||
"&\\u7384<<<\\u2F5E"
|
||||
"&\\u7389<<<\\u2F5F"
|
||||
"&\\u74DC<<<\\u2F60"
|
||||
"&\\u74E6<<<\\u2F61"
|
||||
"&\\u7518<<<\\u2F62"
|
||||
"&\\u751F<<<\\u2F63<<<\\u01F222"
|
||||
"&\\u7528<<<\\u2F64"
|
||||
"&\\u7530<<<\\u2F65"
|
||||
"&\\u7532<<<\\u3199"
|
||||
"&\\u7533<<<\\u01F238"
|
||||
"&\\u7537<<<\\u329A"
|
||||
"&\\u758B<<<\\u2F66"
|
||||
"&\\u7592<<<\\u2F67"
|
||||
"&\\u7676<<<\\u2F68"
|
||||
"&\\u767D<<<\\u2F69"
|
||||
"&\\u76AE<<<\\u2F6A"
|
||||
"&\\u76BF<<<\\u2F6B"
|
||||
"&\\u76E3<<<\\u32AC"
|
||||
"&\\u76EE<<<\\u2F6C"
|
||||
"&\\u77DB<<<\\u2F6D"
|
||||
"&\\u77E2<<<\\u2F6E"
|
||||
"&\\u77F3<<<\\u2F6F"
|
||||
"&\\u793A<<<\\u2F70"
|
||||
"&\\u793E<<<\\u3293"
|
||||
"&\\u795D<<<\\u3297"
|
||||
"&\\u7981<<<\\u01F232"
|
||||
"&\\u79B8<<<\\u2F71"
|
||||
"&\\u79BE<<<\\u2F72"
|
||||
"&\\u79D8<<<\\u3299"
|
||||
"&\\u7A74<<<\\u2F73"
|
||||
"&\\u7A7A<<<\\u01F233"
|
||||
"&\\u7ACB<<<\\u2F74"
|
||||
"&\\u7AF9<<<\\u2F75"
|
||||
"&\\u7B8F<<<\\u3247"
|
||||
"&\\u7C73<<<\\u2F76"
|
||||
"&\\u7CF8<<<\\u2F77"
|
||||
"&\\u7D42<<<\\u01F221"
|
||||
"&\\u7F36<<<\\u2F78"
|
||||
"&\\u7F51<<<\\u2F79"
|
||||
"&\\u7F8A<<<\\u2F7A"
|
||||
"&\\u7FBD<<<\\u2F7B"
|
||||
"&\\u8001<<<\\u2F7C"
|
||||
"&\\u800C<<<\\u2F7D"
|
||||
"&\\u8012<<<\\u2F7E"
|
||||
"&\\u8033<<<\\u2F7F"
|
||||
"&\\u807F<<<\\u2F80"
|
||||
"&\\u8089<<<\\u2F81"
|
||||
"&\\u81E3<<<\\u2F82"
|
||||
"&\\u81EA<<<\\u2F83"
|
||||
"&\\u81F3<<<\\u2F84"
|
||||
"&\\u81FC<<<\\u2F85"
|
||||
"&\\u820C<<<\\u2F86"
|
||||
"&\\u821B<<<\\u2F87"
|
||||
"&\\u821F<<<\\u2F88"
|
||||
"&\\u826E<<<\\u2F89"
|
||||
"&\\u8272<<<\\u2F8A"
|
||||
"&\\u8278<<<\\u2F8B"
|
||||
"&\\u864D<<<\\u2F8C"
|
||||
"&\\u866B<<<\\u2F8D"
|
||||
"&\\u8840<<<\\u2F8E"
|
||||
"&\\u884C<<<\\u2F8F"
|
||||
"&\\u8863<<<\\u2F90"
|
||||
"&\\u897E<<<\\u2F91"
|
||||
"&\\u898B<<<\\u2F92"
|
||||
"&\\u89D2<<<\\u2F93"
|
||||
"&\\u89E3<<<\\u01F216"
|
||||
"&\\u8A00<<<\\u2F94"
|
||||
"&\\u8C37<<<\\u2F95"
|
||||
"&\\u8C46<<<\\u2F96"
|
||||
"&\\u8C55<<<\\u2F97"
|
||||
"&\\u8C78<<<\\u2F98"
|
||||
"&\\u8C9D<<<\\u2F99"
|
||||
"&\\u8CA1<<<\\u3296"
|
||||
"&\\u8CA9<<<\\u01F223"
|
||||
"&\\u8CC7<<<\\u32AE"
|
||||
"&\\u8D64<<<\\u2F9A"
|
||||
"&\\u8D70<<<\\u2F9B<<<\\u01F230"
|
||||
"&\\u8DB3<<<\\u2F9C"
|
||||
"&\\u8EAB<<<\\u2F9D"
|
||||
"&\\u8ECA<<<\\u2F9E"
|
||||
"&\\u8F9B<<<\\u2F9F"
|
||||
"&\\u8FB0<<<\\u2FA0"
|
||||
"&\\u8FB5<<<\\u2FA1"
|
||||
"&\\u904A<<<\\u01F22B"
|
||||
"&\\u9069<<<\\u329C"
|
||||
"&\\u9091<<<\\u2FA2"
|
||||
"&\\u9149<<<\\u2FA3"
|
||||
"&\\u914D<<<\\u01F23B"
|
||||
"&\\u91C6<<<\\u2FA4"
|
||||
"&\\u91CC<<<\\u2FA5"
|
||||
"&\\u91D1<<<\\u2FA6<<<\\u328E"
|
||||
"&\\u9577<<<\\u2FA7"
|
||||
"&\\u9580<<<\\u2FA8"
|
||||
"&\\u961C<<<\\u2FA9"
|
||||
"&\\u96B6<<<\\u2FAA"
|
||||
"&\\u96B9<<<\\u2FAB"
|
||||
"&\\u96E8<<<\\u2FAC"
|
||||
"&\\u9751<<<\\u2FAD"
|
||||
"&\\u975E<<<\\u2FAE"
|
||||
"&\\u9762<<<\\u2FAF"
|
||||
"&\\u9769<<<\\u2FB0"
|
||||
"&\\u97CB<<<\\u2FB1"
|
||||
"&\\u97ED<<<\\u2FB2"
|
||||
"&\\u97F3<<<\\u2FB3"
|
||||
"&\\u9801<<<\\u2FB4"
|
||||
"&\\u9805<<<\\u32A0"
|
||||
"&\\u98A8<<<\\u2FB5"
|
||||
"&\\u98DB<<<\\u2FB6"
|
||||
"&\\u98DF<<<\\u2FB7"
|
||||
"&\\u9996<<<\\u2FB8"
|
||||
"&\\u9999<<<\\u2FB9"
|
||||
"&\\u99AC<<<\\u2FBA"
|
||||
"&\\u9AA8<<<\\u2FBB"
|
||||
"&\\u9AD8<<<\\u2FBC"
|
||||
"&\\u9ADF<<<\\u2FBD"
|
||||
"&\\u9B25<<<\\u2FBE"
|
||||
"&\\u9B2F<<<\\u2FBF"
|
||||
"&\\u9B32<<<\\u2FC0"
|
||||
"&\\u9B3C<<<\\u2FC1"
|
||||
"&\\u9B5A<<<\\u2FC2"
|
||||
"&\\u9CE5<<<\\u2FC3"
|
||||
"&\\u9E75<<<\\u2FC4"
|
||||
"&\\u9E7F<<<\\u2FC5"
|
||||
"&\\u9EA5<<<\\u2FC6"
|
||||
"&\\u9EBB<<<\\u2FC7"
|
||||
"&\\u9EC3<<<\\u2FC8"
|
||||
"&\\u9ECD<<<\\u2FC9"
|
||||
"&\\u9ED1<<<\\u2FCA"
|
||||
"&\\u9EF9<<<\\u2FCB"
|
||||
"&\\u9EFD<<<\\u2FCC"
|
||||
"&\\u9F0E<<<\\u2FCD"
|
||||
"&\\u9F13<<<\\u2FCE"
|
||||
"&\\u9F20<<<\\u2FCF"
|
||||
"&\\u9F3B<<<\\u2FD0"
|
||||
"&\\u9F4A<<<\\u2FD1"
|
||||
"&\\u9F52<<<\\u2FD2"
|
||||
"&\\u9F8D<<<\\u2FD3"
|
||||
"&\\u9F9C<<<\\u2FD4"
|
||||
"&\\u9F9F<<<\\u2EF3"
|
||||
"&\\u9FA0<<<\\u2FD5"
|
||||
"&\\u02342F<\\u91CD\\u5E86/\\u5E86"
|
||||
"&\\u5F1E<\\u6C88\\u9633/\\u9633"
|
||||
"&\\u92BA<\\u85CF\\u6587/\\u6587";
|
||||
|
||||
static const char zh2_cldr_30[] =
|
||||
"&[before 2]a<<\\u0101<<<\\u0100<<\\u00E1<<<\\u00C1<<\\u01CE<<<\\u01CD"
|
||||
"<<\\u00E0<<<\\u00C0"
|
||||
"&[before 2]e<<\\u0113<<<\\u0112<<\\u00E9<<<\\u00C9<<\\u011B<<<\\u011A"
|
||||
"<<\\u00E8<<<\\u00C8"
|
||||
"&e<<e\\u0302\\u0304<<<E\\u0302\\u0304<<e\\u0302\\u0301<<<E\\u0302\\u0301"
|
||||
"<<e\\u0302\\u030C<<<E\\u0302\\u030C<<e\\u0302\\u0300<<<E\\u0302\\u0300"
|
||||
"&[before 2]i<<\\u012B<<<\\u012A<<\\u00ED<<<\\u00CD<<\\u01D0<<<\\u01CF"
|
||||
"<<\\u00EC<<<\\u00CC"
|
||||
"&[before 2]m<<m\\u0304<<<M\\u0304<<\\u1E3F<<<\\u1E3E<<m\\u030C"
|
||||
"<<<M\\u030C<<m\\u0300<<<M\\u0300"
|
||||
"&[before 2]n<<n\\u0304<<<N\\u0304<<\\u0144<<<\\u0143<<\\u0148<<<\\u0147"
|
||||
"<<\\u01F9<<<\\u01F8"
|
||||
"&[before 2]o<<\\u014D<<<\\u014C<<\\u00F3<<<\\u00D3<<\\u01D2<<<\\u01D1"
|
||||
"<<\\u00F2<<<\\u00D2"
|
||||
"&[before 2]u<<\\u016B<<<\\u016A<<\\u00FA<<<\\u00DA<<\\u01D4<<<\\u01D3"
|
||||
"<<\\u00F9<<<\\u00D9"
|
||||
"&U<<\\u01D6<<<\\u01D5<<\\u01D8<<<\\u01D7<<\\u01DA<<<\\u01D9<<\\u01DC"
|
||||
"<<<\\u01DB<<\\u00FC<<<\\u00DC"
|
||||
"&(\\u4E00)<<<\\u3220"
|
||||
"&(\\u4E03)<<<\\u3226"
|
||||
"&(\\u4E09)<<<\\u3222"
|
||||
"&(\\u4E5D)<<<\\u3228"
|
||||
"&(\\u4E8C)<<<\\u3221"
|
||||
"&(\\u4E94)<<<\\u3224"
|
||||
"&(\\u4EE3)<<<\\u3239"
|
||||
"&(\\u4F01)<<<\\u323D"
|
||||
"&(\\u4F11)<<<\\u3241"
|
||||
"&(\\u516B)<<<\\u3227"
|
||||
"&(\\u516D)<<<\\u3225"
|
||||
"&(\\u52B4)<<<\\u3238"
|
||||
"&(\\u5341)<<<\\u3229"
|
||||
"&(\\u5354)<<<\\u323F"
|
||||
"&(\\u540D)<<<\\u3234"
|
||||
"&(\\u547C)<<<\\u323A"
|
||||
"&(\\u56DB)<<<\\u3223"
|
||||
"&(\\u571F)<<<\\u322F"
|
||||
"&(\\u5B66)<<<\\u323B"
|
||||
"&(\\u65E5)<<<\\u3230"
|
||||
"&(\\u6708)<<<\\u322A"
|
||||
"&(\\u6709)<<<\\u3232"
|
||||
"&(\\u6728)<<<\\u322D"
|
||||
"&(\\u682A)<<<\\u3231"
|
||||
"&(\\u6C34)<<<\\u322C"
|
||||
"&(\\u706B)<<<\\u322B"
|
||||
"&(\\u7279)<<<\\u3235"
|
||||
"&(\\u76E3)<<<\\u323C"
|
||||
"&(\\u793E)<<<\\u3233"
|
||||
"&(\\u795D)<<<\\u3237"
|
||||
"&(\\u796D)<<<\\u3240"
|
||||
"&(\\u81EA)<<<\\u3242"
|
||||
"&(\\u81F3)<<<\\u3243"
|
||||
"&(\\u8CA1)<<<\\u3236"
|
||||
"&(\\u8CC7)<<<\\u323E"
|
||||
"&(\\u91D1)<<<\\u322E"
|
||||
"&0\\u70B9<<<\\u3358"
|
||||
"&10\\u65E5<<<\\u33E9"
|
||||
"&10\\u6708<<<\\u32C9"
|
||||
"&10\\u70B9<<<\\u3362"
|
||||
"&11\\u65E5<<<\\u33EA"
|
||||
"&11\\u6708<<<\\u32CA"
|
||||
"&11\\u70B9<<<\\u3363"
|
||||
"&12\\u65E5<<<\\u33EB"
|
||||
"&12\\u6708<<<\\u32CB"
|
||||
"&12\\u70B9<<<\\u3364"
|
||||
"&13\\u65E5<<<\\u33EC"
|
||||
"&13\\u70B9<<<\\u3365"
|
||||
"&14\\u65E5<<<\\u33ED"
|
||||
"&14\\u70B9<<<\\u3366"
|
||||
"&15\\u65E5<<<\\u33EE"
|
||||
"&15\\u70B9<<<\\u3367"
|
||||
"&16\\u65E5<<<\\u33EF"
|
||||
"&16\\u70B9<<<\\u3368"
|
||||
"&17\\u65E5<<<\\u33F0"
|
||||
"&17\\u70B9<<<\\u3369"
|
||||
"&18\\u65E5<<<\\u33F1"
|
||||
"&18\\u70B9<<<\\u336A"
|
||||
"&19\\u65E5<<<\\u33F2"
|
||||
"&19\\u70B9<<<\\u336B"
|
||||
"&1\\u65E5<<<\\u33E0"
|
||||
"&1\\u6708<<<\\u32C0"
|
||||
"&1\\u70B9<<<\\u3359"
|
||||
"&20\\u65E5<<<\\u33F3"
|
||||
"&20\\u70B9<<<\\u336C"
|
||||
"&21\\u65E5<<<\\u33F4"
|
||||
"&21\\u70B9<<<\\u336D"
|
||||
"&22\\u65E5<<<\\u33F5"
|
||||
"&22\\u70B9<<<\\u336E"
|
||||
"&23\\u65E5<<<\\u33F6"
|
||||
"&23\\u70B9<<<\\u336F"
|
||||
"&24\\u65E5<<<\\u33F7"
|
||||
"&24\\u70B9<<<\\u3370"
|
||||
"&25\\u65E5<<<\\u33F8"
|
||||
"&26\\u65E5<<<\\u33F9"
|
||||
"&27\\u65E5<<<\\u33FA"
|
||||
"&28\\u65E5<<<\\u33FB"
|
||||
"&29\\u65E5<<<\\u33FC"
|
||||
"&2\\u65E5<<<\\u33E1"
|
||||
"&2\\u6708<<<\\u32C1"
|
||||
"&2\\u70B9<<<\\u335A"
|
||||
"&30\\u65E5<<<\\u33FD"
|
||||
"&31\\u65E5<<<\\u33FE"
|
||||
"&3\\u65E5<<<\\u33E2"
|
||||
"&3\\u6708<<<\\u32C2"
|
||||
"&3\\u70B9<<<\\u335B"
|
||||
"&4\\u65E5<<<\\u33E3"
|
||||
"&4\\u6708<<<\\u32C3"
|
||||
"&4\\u70B9<<<\\u335C"
|
||||
"&5\\u65E5<<<\\u33E4"
|
||||
"&5\\u6708<<<\\u32C4"
|
||||
"&5\\u70B9<<<\\u335D"
|
||||
"&6\\u65E5<<<\\u33E5"
|
||||
"&6\\u6708<<<\\u32C5"
|
||||
"&6\\u70B9<<<\\u335E"
|
||||
"&7\\u65E5<<<\\u33E6"
|
||||
"&7\\u6708<<<\\u32C6"
|
||||
"&7\\u70B9<<<\\u335F"
|
||||
"&8\\u65E5<<<\\u33E7"
|
||||
"&8\\u6708<<<\\u32C7"
|
||||
"&8\\u70B9<<<\\u3360"
|
||||
"&9\\u65E5<<<\\u33E8"
|
||||
"&9\\u6708<<<\\u32C8"
|
||||
"&9\\u70B9<<<\\u3361"
|
||||
"&\\u3014\\u4E09\\u3015<<<\\u01F241"
|
||||
"&\\u3014\\u4E8C\\u3015<<<\\u01F242"
|
||||
"&\\u3014\\u52DD\\u3015<<<\\u01F247"
|
||||
"&\\u3014\\u5B89\\u3015<<<\\u01F243"
|
||||
"&\\u3014\\u6253\\u3015<<<\\u01F245"
|
||||
"&\\u3014\\u6557\\u3015<<<\\u01F248"
|
||||
"&\\u3014\\u672C\\u3015<<<\\u01F240"
|
||||
"&\\u3014\\u70B9\\u3015<<<\\u01F244"
|
||||
"&\\u3014\\u76D7\\u3015<<<\\u01F246"
|
||||
"&\\u4E00<<<\\u2F00<<<\\u3192<<<\\u3280<<<\\u01F229"
|
||||
"&\\u4E01<<<\\u319C"
|
||||
"&\\u4E03<<<\\u3286"
|
||||
"&\\u4E09<<<\\u3194<<<\\u3282<<<\\u01F22A"
|
||||
"&\\u4E0A<<<\\u3196<<<\\u32A4"
|
||||
"&\\u4E0B<<<\\u3198<<<\\u32A6"
|
||||
"&\\u4E19<<<\\u319B"
|
||||
"&\\u4E28<<<\\u2F01"
|
||||
"&\\u4E2D<<<\\u3197<<<\\u32A5<<<\\u01F22D"
|
||||
"&\\u4E36<<<\\u2F02"
|
||||
"&\\u4E3F<<<\\u2F03"
|
||||
"&\\u4E59<<<\\u2F04<<<\\u319A"
|
||||
"&\\u4E5D<<<\\u3288"
|
||||
"&\\u4E85<<<\\u2F05"
|
||||
"&\\u4E8C<<<\\u2F06<<<\\u3193<<<\\u3281<<<\\u01F214"
|
||||
"&\\u4E94<<<\\u3284"
|
||||
"&\\u4EA0<<<\\u2F07"
|
||||
"&\\u4EA4<<<\\u01F218"
|
||||
"&\\u4EBA<<<\\u2F08<<<\\u319F"
|
||||
"&\\u4F01<<<\\u32AD"
|
||||
"&\\u4F11<<<\\u32A1"
|
||||
"&\\u512A<<<\\u329D"
|
||||
"&\\u513F<<<\\u2F09"
|
||||
"&\\u5165<<<\\u2F0A"
|
||||
"&\\u516B<<<\\u2F0B<<<\\u3287"
|
||||
"&\\u516D<<<\\u3285"
|
||||
"&\\u5182<<<\\u2F0C"
|
||||
"&\\u518D<<<\\u01F21E"
|
||||
"&\\u5196<<<\\u2F0D"
|
||||
"&\\u5199<<<\\u32A2"
|
||||
"&\\u51AB<<<\\u2F0E"
|
||||
"&\\u51E0<<<\\u2F0F"
|
||||
"&\\u51F5<<<\\u2F10"
|
||||
"&\\u5200<<<\\u2F11"
|
||||
"&\\u521D<<<\\u01F220"
|
||||
"&\\u524D<<<\\u01F21C"
|
||||
"&\\u5272<<<\\u01F239"
|
||||
"&\\u529B<<<\\u2F12"
|
||||
"&\\u52B4<<<\\u3298"
|
||||
"&\\u52F9<<<\\u2F13"
|
||||
"&\\u5315<<<\\u2F14"
|
||||
"&\\u531A<<<\\u2F15"
|
||||
"&\\u5338<<<\\u2F16<<<\\u32A9"
|
||||
"&\\u5341<<<\\u2F17<<<\\u3038<<<\\u3289"
|
||||
"&\\u5344<<<\\u3039"
|
||||
"&\\u5345<<<\\u303A"
|
||||
"&\\u5354<<<\\u32AF"
|
||||
"&\\u535C<<<\\u2F18"
|
||||
"&\\u5369<<<\\u2F19"
|
||||
"&\\u5370<<<\\u329E"
|
||||
"&\\u5382<<<\\u2F1A"
|
||||
"&\\u53B6<<<\\u2F1B"
|
||||
"&\\u53C8<<<\\u2F1C"
|
||||
"&\\u53CC<<<\\u01F212"
|
||||
"&\\u53E3<<<\\u2F1D"
|
||||
"&\\u53EF<<<\\u01F251"
|
||||
"&\\u53F3<<<\\u32A8<<<\\u01F22E"
|
||||
"&\\u5408<<<\\u01F234"
|
||||
"&\\u540D<<<\\u3294"
|
||||
"&\\u5439<<<\\u01F225"
|
||||
"&\\u554F<<<\\u3244"
|
||||
"&\\u55B6<<<\\u01F23A"
|
||||
"&\\u56D7<<<\\u2F1E"
|
||||
"&\\u56DB<<<\\u3195<<<\\u3283"
|
||||
"&\\u571F<<<\\u2F1F<<<\\u328F"
|
||||
"&\\u5730<<<\\u319E"
|
||||
"&\\u58EB<<<\\u2F20"
|
||||
"&\\u58F0<<<\\u01F224"
|
||||
"&\\u5902<<<\\u2F21"
|
||||
"&\\u590A<<<\\u2F22"
|
||||
"&\\u5915<<<\\u2F23"
|
||||
"&\\u591A<<<\\u01F215"
|
||||
"&\\u591C<<<\\u32B0"
|
||||
"&\\u5927<<<\\u2F24"
|
||||
"&\\u5927\\u6B63<<<\\u337D"
|
||||
"&\\u5929<<<\\u319D<<<\\u01F217"
|
||||
"&\\u5973<<<\\u2F25<<<\\u329B"
|
||||
"&\\u5B50<<<\\u2F26"
|
||||
"&\\u5B57<<<\\u01F211"
|
||||
"&\\u5B66<<<\\u32AB"
|
||||
"&\\u5B80<<<\\u2F27"
|
||||
"&\\u5B97<<<\\u32AA"
|
||||
"&\\u5BF8<<<\\u2F28"
|
||||
"&\\u5C0F<<<\\u2F29"
|
||||
"&\\u5C22<<<\\u2F2A"
|
||||
"&\\u5C38<<<\\u2F2B"
|
||||
"&\\u5C6E<<<\\u2F2C"
|
||||
"&\\u5C71<<<\\u2F2D"
|
||||
"&\\u5DDB<<<\\u2F2E"
|
||||
"&\\u5DE5<<<\\u2F2F"
|
||||
"&\\u5DE6<<<\\u32A7<<<\\u01F22C"
|
||||
"&\\u5DF1<<<\\u2F30"
|
||||
"&\\u5DFE<<<\\u2F31"
|
||||
"&\\u5E72<<<\\u2F32"
|
||||
"&\\u5E73\\u6210<<<\\u337B"
|
||||
"&\\u5E7A<<<\\u2F33"
|
||||
"&\\u5E7C<<<\\u3245"
|
||||
"&\\u5E7F<<<\\u2F34"
|
||||
"&\\u5EF4<<<\\u2F35"
|
||||
"&\\u5EFE<<<\\u2F36"
|
||||
"&\\u5F0B<<<\\u2F37"
|
||||
"&\\u5F13<<<\\u2F38"
|
||||
"&\\u5F50<<<\\u2F39"
|
||||
"&\\u5F61<<<\\u2F3A"
|
||||
"&\\u5F73<<<\\u2F3B"
|
||||
"&\\u5F8C<<<\\u01F21D"
|
||||
"&\\u5F97<<<\\u01F250"
|
||||
"&\\u5FC3<<<\\u2F3C"
|
||||
"&\\u6208<<<\\u2F3D"
|
||||
"&\\u6236<<<\\u2F3E"
|
||||
"&\\u624B<<<\\u2F3F<<<\\u01F210"
|
||||
"&\\u6253<<<\\u01F231"
|
||||
"&\\u6295<<<\\u01F227"
|
||||
"&\\u6307<<<\\u01F22F"
|
||||
"&\\u6355<<<\\u01F228"
|
||||
"&\\u652F<<<\\u2F40"
|
||||
"&\\u6534<<<\\u2F41"
|
||||
"&\\u6587<<<\\u2F42<<<\\u3246"
|
||||
"&\\u6597<<<\\u2F43"
|
||||
"&\\u6599<<<\\u01F21B"
|
||||
"&\\u65A4<<<\\u2F44"
|
||||
"&\\u65B0<<<\\u01F21F"
|
||||
"&\\u65B9<<<\\u2F45"
|
||||
"&\\u65E0<<<\\u2F46"
|
||||
"&\\u65E5<<<\\u2F47<<<\\u3290"
|
||||
"&\\u660E\\u6CBB<<<\\u337E"
|
||||
"&\\u6620<<<\\u01F219"
|
||||
"&\\u662D\\u548C<<<\\u337C"
|
||||
"&\\u66F0<<<\\u2F48"
|
||||
"&\\u6708<<<\\u2F49<<<\\u328A<<<\\u01F237"
|
||||
"&\\u6709<<<\\u3292<<<\\u01F236"
|
||||
"&\\u6728<<<\\u2F4A<<<\\u328D"
|
||||
"&\\u682A<<<\\u3291"
|
||||
"&\\u682A\\u5F0F\\u4F1A\\u793E<<<\\u337F"
|
||||
"&\\u6B20<<<\\u2F4B"
|
||||
"&\\u6B62<<<\\u2F4C"
|
||||
"&\\u6B63<<<\\u32A3"
|
||||
"&\\u6B79<<<\\u2F4D"
|
||||
"&\\u6BB3<<<\\u2F4E"
|
||||
"&\\u6BCB<<<\\u2F4F"
|
||||
"&\\u6BCD<<<\\u2E9F"
|
||||
"&\\u6BD4<<<\\u2F50"
|
||||
"&\\u6BDB<<<\\u2F51"
|
||||
"&\\u6C0F<<<\\u2F52"
|
||||
"&\\u6C14<<<\\u2F53"
|
||||
"&\\u6C34<<<\\u2F54<<<\\u328C"
|
||||
"&\\u6CE8<<<\\u329F"
|
||||
"&\\u6E80<<<\\u01F235"
|
||||
"&\\u6F14<<<\\u01F226"
|
||||
"&\\u706B<<<\\u2F55<<<\\u328B"
|
||||
"&\\u7121<<<\\u01F21A"
|
||||
"&\\u722A<<<\\u2F56"
|
||||
"&\\u7236<<<\\u2F57"
|
||||
"&\\u723B<<<\\u2F58"
|
||||
"&\\u723F<<<\\u2F59"
|
||||
"&\\u7247<<<\\u2F5A"
|
||||
"&\\u7259<<<\\u2F5B"
|
||||
"&\\u725B<<<\\u2F5C"
|
||||
"&\\u7279<<<\\u3295"
|
||||
"&\\u72AC<<<\\u2F5D"
|
||||
"&\\u7384<<<\\u2F5E"
|
||||
"&\\u7389<<<\\u2F5F"
|
||||
"&\\u74DC<<<\\u2F60"
|
||||
"&\\u74E6<<<\\u2F61"
|
||||
"&\\u7518<<<\\u2F62"
|
||||
"&\\u751F<<<\\u2F63<<<\\u01F222"
|
||||
"&\\u7528<<<\\u2F64"
|
||||
"&\\u7530<<<\\u2F65"
|
||||
"&\\u7532<<<\\u3199"
|
||||
"&\\u7533<<<\\u01F238"
|
||||
"&\\u7537<<<\\u329A"
|
||||
"&\\u758B<<<\\u2F66"
|
||||
"&\\u7592<<<\\u2F67"
|
||||
"&\\u7676<<<\\u2F68"
|
||||
"&\\u767D<<<\\u2F69"
|
||||
"&\\u76AE<<<\\u2F6A"
|
||||
"&\\u76BF<<<\\u2F6B"
|
||||
"&\\u76E3<<<\\u32AC"
|
||||
"&\\u76EE<<<\\u2F6C"
|
||||
"&\\u77DB<<<\\u2F6D"
|
||||
"&\\u77E2<<<\\u2F6E"
|
||||
"&\\u77F3<<<\\u2F6F"
|
||||
"&\\u793A<<<\\u2F70"
|
||||
"&\\u793E<<<\\u3293"
|
||||
"&\\u795D<<<\\u3297"
|
||||
"&\\u7981<<<\\u01F232"
|
||||
"&\\u79B8<<<\\u2F71"
|
||||
"&\\u79BE<<<\\u2F72"
|
||||
"&\\u79D8<<<\\u3299"
|
||||
"&\\u7A74<<<\\u2F73"
|
||||
"&\\u7A7A<<<\\u01F233"
|
||||
"&\\u7ACB<<<\\u2F74"
|
||||
"&\\u7AF9<<<\\u2F75"
|
||||
"&\\u7B8F<<<\\u3247"
|
||||
"&\\u7C73<<<\\u2F76"
|
||||
"&\\u7CF8<<<\\u2F77"
|
||||
"&\\u7D42<<<\\u01F221"
|
||||
"&\\u7F36<<<\\u2F78"
|
||||
"&\\u7F51<<<\\u2F79"
|
||||
"&\\u7F8A<<<\\u2F7A"
|
||||
"&\\u7FBD<<<\\u2F7B"
|
||||
"&\\u8001<<<\\u2F7C"
|
||||
"&\\u800C<<<\\u2F7D"
|
||||
"&\\u8012<<<\\u2F7E"
|
||||
"&\\u8033<<<\\u2F7F"
|
||||
"&\\u807F<<<\\u2F80"
|
||||
"&\\u8089<<<\\u2F81"
|
||||
"&\\u81E3<<<\\u2F82"
|
||||
"&\\u81EA<<<\\u2F83"
|
||||
"&\\u81F3<<<\\u2F84"
|
||||
"&\\u81FC<<<\\u2F85"
|
||||
"&\\u820C<<<\\u2F86"
|
||||
"&\\u821B<<<\\u2F87"
|
||||
"&\\u821F<<<\\u2F88"
|
||||
"&\\u826E<<<\\u2F89"
|
||||
"&\\u8272<<<\\u2F8A"
|
||||
"&\\u8278<<<\\u2F8B"
|
||||
"&\\u864D<<<\\u2F8C"
|
||||
"&\\u866B<<<\\u2F8D"
|
||||
"&\\u8840<<<\\u2F8E"
|
||||
"&\\u884C<<<\\u2F8F"
|
||||
"&\\u8863<<<\\u2F90"
|
||||
"&\\u897E<<<\\u2F91"
|
||||
"&\\u898B<<<\\u2F92"
|
||||
"&\\u89D2<<<\\u2F93"
|
||||
"&\\u89E3<<<\\u01F216"
|
||||
"&\\u8A00<<<\\u2F94"
|
||||
"&\\u8C37<<<\\u2F95"
|
||||
"&\\u8C46<<<\\u2F96"
|
||||
"&\\u8C55<<<\\u2F97"
|
||||
"&\\u8C78<<<\\u2F98"
|
||||
"&\\u8C9D<<<\\u2F99"
|
||||
"&\\u8CA1<<<\\u3296"
|
||||
"&\\u8CA9<<<\\u01F223"
|
||||
"&\\u8CC7<<<\\u32AE"
|
||||
"&\\u8D64<<<\\u2F9A"
|
||||
"&\\u8D70<<<\\u2F9B<<<\\u01F230"
|
||||
"&\\u8DB3<<<\\u2F9C"
|
||||
"&\\u8EAB<<<\\u2F9D"
|
||||
"&\\u8ECA<<<\\u2F9E"
|
||||
"&\\u8F9B<<<\\u2F9F"
|
||||
"&\\u8FB0<<<\\u2FA0"
|
||||
"&\\u8FB5<<<\\u2FA1"
|
||||
"&\\u904A<<<\\u01F22B"
|
||||
"&\\u9069<<<\\u329C"
|
||||
"&\\u9091<<<\\u2FA2"
|
||||
"&\\u9149<<<\\u2FA3"
|
||||
"&\\u914D<<<\\u01F23B"
|
||||
"&\\u91C6<<<\\u2FA4"
|
||||
"&\\u91CC<<<\\u2FA5"
|
||||
"&\\u91D1<<<\\u2FA6<<<\\u328E"
|
||||
"&\\u9577<<<\\u2FA7"
|
||||
"&\\u9580<<<\\u2FA8"
|
||||
"&\\u961C<<<\\u2FA9"
|
||||
"&\\u96B6<<<\\u2FAA"
|
||||
"&\\u96B9<<<\\u2FAB"
|
||||
"&\\u96E8<<<\\u2FAC"
|
||||
"&\\u9751<<<\\u2FAD"
|
||||
"&\\u975E<<<\\u2FAE"
|
||||
"&\\u9762<<<\\u2FAF"
|
||||
"&\\u9769<<<\\u2FB0"
|
||||
"&\\u97CB<<<\\u2FB1"
|
||||
"&\\u97ED<<<\\u2FB2"
|
||||
"&\\u97F3<<<\\u2FB3"
|
||||
"&\\u9801<<<\\u2FB4"
|
||||
"&\\u9805<<<\\u32A0"
|
||||
"&\\u98A8<<<\\u2FB5"
|
||||
"&\\u98DB<<<\\u2FB6"
|
||||
"&\\u98DF<<<\\u2FB7"
|
||||
"&\\u9996<<<\\u2FB8"
|
||||
"&\\u9999<<<\\u2FB9"
|
||||
"&\\u99AC<<<\\u2FBA"
|
||||
"&\\u9AA8<<<\\u2FBB"
|
||||
"&\\u9AD8<<<\\u2FBC"
|
||||
"&\\u9ADF<<<\\u2FBD"
|
||||
"&\\u9B25<<<\\u2FBE"
|
||||
"&\\u9B2F<<<\\u2FBF"
|
||||
"&\\u9B32<<<\\u2FC0"
|
||||
"&\\u9B3C<<<\\u2FC1"
|
||||
"&\\u9B5A<<<\\u2FC2"
|
||||
"&\\u9CE5<<<\\u2FC3"
|
||||
"&\\u9E75<<<\\u2FC4"
|
||||
"&\\u9E7F<<<\\u2FC5"
|
||||
"&\\u9EA5<<<\\u2FC6"
|
||||
"&\\u9EBB<<<\\u2FC7"
|
||||
"&\\u9EC3<<<\\u2FC8"
|
||||
"&\\u9ECD<<<\\u2FC9"
|
||||
"&\\u9ED1<<<\\u2FCA"
|
||||
"&\\u9EF9<<<\\u2FCB"
|
||||
"&\\u9EFD<<<\\u2FCC"
|
||||
"&\\u9F0E<<<\\u2FCD"
|
||||
"&\\u9F13<<<\\u2FCE"
|
||||
"&\\u9F20<<<\\u2FCF"
|
||||
"&\\u9F3B<<<\\u2FD0"
|
||||
"&\\u9F4A<<<\\u2FD1"
|
||||
"&\\u9F52<<<\\u2FD2"
|
||||
"&\\u9F8D<<<\\u2FD3"
|
||||
"&\\u9F9C<<<\\u2FD4"
|
||||
"&\\u9F9F<<<\\u2EF3"
|
||||
"&\\u9FA0<<<\\u2FD5";
|
||||
|
||||
/*
|
||||
Below variables are defined in separate .cc file, generated by uca9dump at
|
||||
build-time for the Chinese collation.
|
||||
*/
|
||||
extern uint16 *zh_han_pages[];
|
||||
extern const int MIN_ZH_HAN_PAGE;
|
||||
extern const int MAX_ZH_HAN_PAGE;
|
||||
extern int zh_han_to_single_weight[];
|
||||
extern const int ZH_HAN_WEIGHT_PAIRS;
|
||||
extern uint16 *zh2_han_pages[];
|
||||
extern const int MIN_ZH2_HAN_PAGE;
|
||||
extern const int MAX_ZH2_HAN_PAGE;
|
||||
extern int zh2_han_to_single_weight[];
|
||||
extern const int ZH2_HAN_WEIGHT_PAIRS;
|
||||
extern uint16 *zh3_han_pages[];
|
||||
extern const int MIN_ZH3_HAN_PAGE;
|
||||
extern const int MAX_ZH3_HAN_PAGE;
|
||||
extern int zh3_han_to_single_weight[];
|
||||
extern const int ZH3_HAN_WEIGHT_PAIRS;
|
||||
#endif
|
446779
deps/oblib/src/lib/charset/uca900_zh_tbls.cc
vendored
Normal file
446779
deps/oblib/src/lib/charset/uca900_zh_tbls.cc
vendored
Normal file
File diff suppressed because it is too large
Load Diff
@ -47,7 +47,10 @@ int ObDatabaseSqlService::insert_database(const ObDatabaseSchema &database_schem
|
||||
LOG_WARN("database schema is invalid", K(ret));
|
||||
} else if (OB_FAIL(sql::ObSQLUtils::is_charset_data_version_valid(database_schema.get_charset_type(),
|
||||
exec_tenant_id))) {
|
||||
LOG_WARN("failed to check charset data version valid", K(ret));
|
||||
LOG_WARN("failed to check charset data version valid", K(database_schema.get_charset_type()), K(ret));
|
||||
} else if (OB_FAIL(sql::ObSQLUtils::is_collation_data_version_valid(database_schema.get_collation_type(),
|
||||
exec_tenant_id))) {
|
||||
LOG_WARN("failed to check collation data version valid", K(database_schema.get_collation_type()), K(ret));
|
||||
} else {
|
||||
int64_t affected_rows = 0;
|
||||
ObDMLSqlSplicer dml;
|
||||
@ -125,7 +128,10 @@ int ObDatabaseSqlService::update_database(const ObDatabaseSchema &database_schem
|
||||
LOG_WARN("database scheam is invalid", K(ret));
|
||||
} else if (OB_FAIL(sql::ObSQLUtils::is_charset_data_version_valid(database_schema.get_charset_type(),
|
||||
exec_tenant_id))) {
|
||||
LOG_WARN("failed to check charset data version valid", K(ret));
|
||||
LOG_WARN("failed to check charset data version valid", K(database_schema.get_charset_type()), K(ret));
|
||||
} else if (OB_FAIL(sql::ObSQLUtils::is_collation_data_version_valid(database_schema.get_collation_type(),
|
||||
exec_tenant_id))) {
|
||||
LOG_WARN("failed to check collation data version valid", K(database_schema.get_charset_type()), K(ret));
|
||||
} else {
|
||||
int64_t affected_rows = 0;
|
||||
ObDMLSqlSplicer dml;
|
||||
|
@ -2658,7 +2658,10 @@ int ObTableSqlService::gen_table_dml(
|
||||
LOG_WARN("check ddl allowd failed", K(ret), K(table));
|
||||
} else if (OB_FAIL(sql::ObSQLUtils::is_charset_data_version_valid(table.get_charset_type(),
|
||||
exec_tenant_id))) {
|
||||
LOG_WARN("failed to check charset data version valid", K(ret));
|
||||
LOG_WARN("failed to check charset data version valid", K(table.get_charset_type()), K(ret));
|
||||
} else if (OB_FAIL(sql::ObSQLUtils::is_collation_data_version_valid(table.get_collation_type(),
|
||||
exec_tenant_id))) {
|
||||
LOG_WARN("failed to check collation data version valid", K(table.get_collation_type()), K(ret));
|
||||
} else if (OB_FAIL(GET_MIN_DATA_VERSION(table.get_tenant_id(), data_version))) {
|
||||
LOG_WARN("failed to get data version", K(ret));
|
||||
} else if (data_version < DATA_VERSION_4_1_0_0
|
||||
@ -3897,7 +3900,10 @@ int ObTableSqlService::gen_column_dml(
|
||||
LOG_USER_ERROR(OB_NOT_SUPPORTED, "tenant data version is less than 4.2, skip index");
|
||||
} else if (OB_FAIL(sql::ObSQLUtils::is_charset_data_version_valid(column.get_charset_type(),
|
||||
exec_tenant_id))) {
|
||||
LOG_WARN("failed to check charset data version valid", K(ret));
|
||||
LOG_WARN("failed to check charset data version valid", K(column.get_charset_type()), K(ret));
|
||||
} else if (OB_FAIL(sql::ObSQLUtils::is_collation_data_version_valid(column.get_collation_type(),
|
||||
exec_tenant_id))) {
|
||||
LOG_WARN("failed to check collation data version valid", K(column.get_collation_type()), K(ret));
|
||||
} else if (column.is_generated_column() ||
|
||||
column.is_identity_column() ||
|
||||
ob_is_string_type(column.get_data_type()) ||
|
||||
|
@ -78,7 +78,10 @@ int ObTenantSqlService::alter_tenant(
|
||||
LOG_WARN("invalid tenant schema", K(tenant_schema), K(ret));
|
||||
} else if (OB_FAIL(sql::ObSQLUtils::is_charset_data_version_valid(tenant_schema.get_charset_type(),
|
||||
tenant_schema.get_tenant_id()))) {
|
||||
LOG_WARN("failed to check charset data version valid", K(ret));
|
||||
LOG_WARN("failed to check charset data version valid", K(tenant_schema.get_charset_type()), K(ret));
|
||||
} else if (OB_FAIL(sql::ObSQLUtils::is_collation_data_version_valid(tenant_schema.get_collation_type(),
|
||||
tenant_schema.get_tenant_id()))) {
|
||||
LOG_WARN("failed to check charset data version valid", K(tenant_schema.get_collation_type()), K(ret));
|
||||
} else if (OB_FAIL(replace_tenant(tenant_schema, op, sql_client, ddl_stmt_str))) {
|
||||
LOG_WARN("replace_tenant failed", K(tenant_schema), K(op), K(ret));
|
||||
}
|
||||
|
@ -81,7 +81,7 @@ int ObSetNamesExecutor::execute(ObExecContext &ctx, ObSetNamesStmt &stmt)
|
||||
if (CS_TYPE_INVALID == cs_coll_type || CS_TYPE_INVALID == coll_type) {
|
||||
ret = OB_ERR_UNEXPECTED;
|
||||
SQL_ENG_LOG(ERROR, "cs coll type or coll type is invalid", K(ret), K(cs_coll_type), K(coll_type));
|
||||
} else if (OB_FAIL(sql::ObSQLUtils::is_charset_data_version_valid(common::ObCharset::charset_type_by_coll(collation_type),
|
||||
} else if (OB_FAIL(sql::ObSQLUtils::is_charset_data_version_valid(ObCharset::charset_type(charset),
|
||||
session->get_effective_tenant_id()))) {
|
||||
SQL_EXE_LOG(WARN, "failed to check charset data version valid", K(ret));
|
||||
} else if (OB_FAIL(session->update_sys_variable(SYS_VAR_CHARACTER_SET_CLIENT,
|
||||
@ -102,7 +102,7 @@ int ObSetNamesExecutor::execute(ObExecContext &ctx, ObSetNamesStmt &stmt)
|
||||
ObObj database_charset;
|
||||
ObObj database_collation;
|
||||
ObCollationType cs_coll_type = ObCharset::get_default_collation(ObCharset::charset_type(charset));
|
||||
if (OB_FAIL(sql::ObSQLUtils::is_charset_data_version_valid(common::ObCharset::charset_type_by_coll(cs_coll_type),
|
||||
if (OB_FAIL(sql::ObSQLUtils::is_charset_data_version_valid(ObCharset::charset_type(charset),
|
||||
session->get_effective_tenant_id()))) {
|
||||
SQL_EXE_LOG(WARN, "failed to check charset data version valid", K(ret));
|
||||
} else if (OB_FAIL(session->get_sys_variable(SYS_VAR_CHARACTER_SET_DATABASE,
|
||||
|
@ -566,6 +566,9 @@ int ObVariableSetExecutor::update_global_variables(ObExecContext &ctx,
|
||||
} else if (OB_FAIL(sql::ObSQLUtils::is_charset_data_version_valid(common::ObCharset::charset_type_by_coll(static_cast<ObCollationType>(coll_int64)),
|
||||
session->get_effective_tenant_id()))) {
|
||||
LOG_WARN("failed to check charset data version valid", K(ret));
|
||||
} else if (OB_FAIL(sql::ObSQLUtils::is_collation_data_version_valid(static_cast<ObCollationType>(coll_int64),
|
||||
session->get_effective_tenant_id()))) {
|
||||
LOG_WARN("failed to check collation data version valid", K(ret));
|
||||
} else if (FALSE_IT(coll_str = ObString::make_string(ObCharset::collation_name(static_cast<ObCollationType>(coll_int64))))) {
|
||||
//do nothing
|
||||
} else if (OB_FAIL(ObBasicSysVar::get_charset_var_and_val_by_collation(
|
||||
@ -591,6 +594,9 @@ int ObVariableSetExecutor::update_global_variables(ObExecContext &ctx,
|
||||
} else if (OB_FAIL(sql::ObSQLUtils::is_charset_data_version_valid(common::ObCharset::charset_type_by_coll(static_cast<ObCollationType>(coll_int64)),
|
||||
session->get_effective_tenant_id()))) {
|
||||
LOG_WARN("failed to check charset data version valid", K(ret));
|
||||
} else if (OB_FAIL(sql::ObSQLUtils::is_collation_data_version_valid(static_cast<ObCollationType>(coll_int64),
|
||||
session->get_effective_tenant_id()))) {
|
||||
LOG_WARN("failed to check collation data version valid", K(ret));
|
||||
} else if (FALSE_IT(cs_str = ObString::make_string(ObCharset::charset_name(
|
||||
ObCharset::charset_type_by_coll(static_cast<ObCollationType>(coll_int64)))))) {
|
||||
//do nothing
|
||||
|
@ -600,6 +600,24 @@ int ObSQLUtils::is_charset_data_version_valid(ObCharsetType charset_type, const
|
||||
return ret;
|
||||
}
|
||||
|
||||
int ObSQLUtils::is_collation_data_version_valid(ObCollationType collation_type, const int64_t tenant_id)
|
||||
{
|
||||
int ret = OB_SUCCESS;
|
||||
#ifndef OB_BUILD_CLOSE_MODULES
|
||||
uint64_t data_version = 0;
|
||||
if (OB_FAIL(GET_MIN_DATA_VERSION(tenant_id, data_version))) {
|
||||
SQL_LOG(WARN, "failed to GET_MIN_DATA_VERSION", K(ret));
|
||||
} else if (data_version < DATA_VERSION_4_2_2_0 &&
|
||||
(CS_TYPE_UTF16_UNICODE_CI == collation_type ||
|
||||
CS_TYPE_UTF8MB4_UNICODE_CI == collation_type)) {
|
||||
ret = OB_NOT_SUPPORTED;
|
||||
SQL_LOG(WARN, "Unicode collation not supported when data_version < 4_2_2_0", K(collation_type), K(ret));
|
||||
LOG_USER_ERROR(OB_NOT_SUPPORTED, "tenant data version is less than 4.2.2, unicode collation is");
|
||||
}
|
||||
#endif
|
||||
return ret;
|
||||
}
|
||||
|
||||
// 参数raw_expr中如果出现函数addr_to_partition_id,
|
||||
// 那么得到的partition_id结果在后面无法映射到相应的addr
|
||||
int ObSQLUtils::calc_calculable_expr(ObSQLSessionInfo *session,
|
||||
|
@ -275,6 +275,7 @@ public:
|
||||
}
|
||||
}
|
||||
static int is_charset_data_version_valid(ObCharsetType charset_type, const int64_t tenant_id);
|
||||
static int is_collation_data_version_valid(ObCollationType collation_type, const int64_t tenant_id);
|
||||
static int calc_calculable_expr(ObSQLSessionInfo *session,
|
||||
const ObRawExpr *expr,
|
||||
common::ObObj &result,
|
||||
|
@ -1,26 +1,9 @@
|
||||
|
||||
# charset objects used for proxy_parser
|
||||
|
||||
if (OB_BUILD_OPENSOURCE)
|
||||
|
||||
|
||||
set(ob_sql_parser_charset_object_list
|
||||
ob_ctype_bin_os.cc
|
||||
ob_ctype_gb18030_os.cc
|
||||
ob_ctype_gbk_os.cc
|
||||
ob_ctype_latin1_os.cc
|
||||
ob_ctype_mb_os.cc
|
||||
ob_ctype_simple_os.cc
|
||||
ob_ctype_os.cc
|
||||
ob_ctype_utf16_os.cc
|
||||
ob_ctype_utf8_os.cc
|
||||
ob_dtoa_os.cc
|
||||
)
|
||||
endif()
|
||||
|
||||
list(TRANSFORM ob_sql_parser_charset_object_list
|
||||
PREPEND ${PROJECT_SOURCE_DIR}/deps/oblib/src/lib/charset/)
|
||||
|
||||
if(OB_BUILD_FULL_CHARSET)
|
||||
set(ob_sql_parser_full_charset_object_list
|
||||
ob_ctype_bin.cc
|
||||
ob_ctype.cc
|
||||
ob_ctype_gbk.cc
|
||||
@ -36,12 +19,10 @@ if(OB_BUILD_FULL_CHARSET)
|
||||
uca900_zh_tbls.cc
|
||||
uca900_zh2_tbls.cc
|
||||
uca900_zh3_tbls.cc
|
||||
)
|
||||
endif()
|
||||
|
||||
list(TRANSFORM ob_sql_parser_full_charset_object_list
|
||||
PREPEND ${PROJECT_SOURCE_DIR}/close_modules/charset/deps/oblib/src/lib/charset/)
|
||||
)
|
||||
|
||||
list(TRANSFORM ob_sql_parser_charset_object_list
|
||||
PREPEND ${PROJECT_SOURCE_DIR}/deps/oblib/src/lib/charset/)
|
||||
# hash objects used for proxy parser
|
||||
set(ob_sql_parser_hash_object_list
|
||||
murmur_hash.h
|
||||
@ -124,19 +105,12 @@ set(ob_extra_sql_parser_object_list
|
||||
)
|
||||
|
||||
# ob_sql_proxy_parser_objects is the static library for proxy, it does not link against observer
|
||||
if (OB_BUILD_FULL_CHARSET)
|
||||
add_library(ob_sql_proxy_parser_objects OBJECT
|
||||
${ob_inner_sql_parser_object_list}
|
||||
${ob_sql_parser_hash_object_list}
|
||||
${ob_sql_parser_full_charset_object_list}
|
||||
)
|
||||
else()
|
||||
add_library(ob_sql_proxy_parser_objects OBJECT
|
||||
${ob_inner_sql_parser_object_list}
|
||||
${ob_sql_parser_hash_object_list}
|
||||
${ob_sql_parser_charset_object_list}
|
||||
)
|
||||
endif()
|
||||
add_library(ob_sql_proxy_parser_objects OBJECT
|
||||
${ob_inner_sql_parser_object_list}
|
||||
${ob_sql_parser_hash_object_list}
|
||||
${ob_sql_parser_charset_object_list}
|
||||
)
|
||||
|
||||
|
||||
# ob_sql_server_parser_object is the static library for observer
|
||||
add_library(ob_sql_server_parser_objects OBJECT
|
||||
|
@ -4454,6 +4454,9 @@ int ObAlterTableResolver::resolve_convert_to_character(const ParseNode &node)
|
||||
} else if (OB_FAIL(sql::ObSQLUtils::is_charset_data_version_valid(common::ObCharset::charset_type_by_coll(collation_type),
|
||||
session_info_->get_effective_tenant_id()))) {
|
||||
LOG_WARN("failed to check charset data version valid", K(ret));
|
||||
} else if (OB_FAIL(sql::ObSQLUtils::is_collation_data_version_valid(collation_type,
|
||||
session_info_->get_effective_tenant_id()))) {
|
||||
LOG_WARN("failed to check collation data version valid", K(ret));
|
||||
} else {
|
||||
collation_type_ = collation_type;
|
||||
}
|
||||
|
@ -160,6 +160,9 @@ int ObDatabaseResolver<T>::resolve_database_option(T *stmt, ParseNode *node, ObS
|
||||
} else if (OB_FAIL(sql::ObSQLUtils::is_charset_data_version_valid(charset_type,
|
||||
session_info->get_effective_tenant_id()))) {
|
||||
OB_LOG(WARN, "failed to check charset data version valid", K(ret));
|
||||
} else if (OB_FAIL(sql::ObSQLUtils::is_collation_data_version_valid(collation_type,
|
||||
session_info->get_effective_tenant_id()))) {
|
||||
OB_LOG(WARN, "failed to check collation data version valid", K(ret));
|
||||
} else if (OB_UNLIKELY(collation_already_set_
|
||||
&& stmt->get_charset_type() != charset_type)) {
|
||||
// mysql执行下面这条sql时会报错,为了行为与mysql一致,resolve时即检查collation/charset不一致的问题
|
||||
@ -181,6 +184,12 @@ int ObDatabaseResolver<T>::resolve_database_option(T *stmt, ParseNode *node, ObS
|
||||
ret = common::OB_ERR_UNEXPECTED;
|
||||
SQL_RESV_LOG(WARN, "all valid collation types should have corresponding charset type",
|
||||
K(ret), K(charset_type), K(collation_type));
|
||||
} else if (OB_FAIL(sql::ObSQLUtils::is_charset_data_version_valid(charset_type,
|
||||
session_info->get_effective_tenant_id()))) {
|
||||
OB_LOG(WARN, "failed to check charset data version valid", K(ret));
|
||||
} else if (OB_FAIL(sql::ObSQLUtils::is_collation_data_version_valid(collation_type,
|
||||
session_info->get_effective_tenant_id()))) {
|
||||
OB_LOG(WARN, "failed to check collation data version valid", K(ret));
|
||||
} else if (OB_UNLIKELY(collation_already_set_
|
||||
&& stmt->get_charset_type() != charset_type)) {
|
||||
ret = OB_ERR_COLLATION_MISMATCH;
|
||||
|
@ -1470,6 +1470,9 @@ int ObDDLResolver::resolve_table_option(const ParseNode *option_node, const bool
|
||||
} else if (OB_FAIL(sql::ObSQLUtils::is_charset_data_version_valid(common::ObCharset::charset_type_by_coll(collation_type),
|
||||
session_info_->get_effective_tenant_id()))) {
|
||||
SQL_RESV_LOG(WARN, "failed to check charset data version valid", K(ret));
|
||||
} else if (OB_FAIL(sql::ObSQLUtils::is_collation_data_version_valid(collation_type,
|
||||
session_info_->get_effective_tenant_id()))) {
|
||||
SQL_RESV_LOG(WARN, "failed to check collation data version valid", K(ret));
|
||||
} else {
|
||||
collation_type_ = collation_type;
|
||||
if (stmt::T_ALTER_TABLE == stmt_->get_stmt_type()) {
|
||||
|
@ -153,6 +153,9 @@ int ObTenantResolver<T>::resolve_tenant_option(T *stmt, ParseNode *node,
|
||||
} else if (OB_FAIL(sql::ObSQLUtils::is_charset_data_version_valid(common::ObCharset::charset_type_by_coll(collation_type),
|
||||
session_info->get_effective_tenant_id()))) {
|
||||
LOG_WARN("failed to check charset data version valid", K(ret));
|
||||
} else if (OB_FAIL(sql::ObSQLUtils::is_collation_data_version_valid(collation_type,
|
||||
session_info->get_effective_tenant_id()))) {
|
||||
LOG_WARN("failed to check collation data version valid", K(ret));
|
||||
} else {
|
||||
collation_type_ = collation_type;
|
||||
if (stmt->get_stmt_type() == stmt::T_MODIFY_TENANT) {
|
||||
|
@ -4877,6 +4877,9 @@ int ObRawExprResolverImpl::process_collation_node(const ParseNode *node, ObRawEx
|
||||
} else if (OB_FAIL(sql::ObSQLUtils::is_charset_data_version_valid(common::ObCharset::charset_type_by_coll(collation_type),
|
||||
ctx_.session_info_->get_effective_tenant_id()))) {
|
||||
LOG_WARN("failed to check charset data version valid", K(ret));
|
||||
} else if (OB_FAIL(sql::ObSQLUtils::is_collation_data_version_valid(collation_type,
|
||||
ctx_.session_info_->get_effective_tenant_id()))) {
|
||||
LOG_WARN("failed to check collation data version valid", K(ret));
|
||||
} else if (OB_FAIL(ctx_.expr_factory_.create_raw_expr(T_INT, c_expr))) {
|
||||
LOG_WARN("fail to create raw expr", K(ret));
|
||||
} else if (OB_ISNULL(c_expr)) {
|
||||
|
@ -13,6 +13,8 @@ gbk_chinese_ci gbk 28 Yes Yes 1
|
||||
gbk_bin gbk 87 Yes 1
|
||||
utf16_general_ci utf16 54 Yes Yes 1
|
||||
utf16_bin utf16 55 Yes 1
|
||||
utf8mb4_unicode_ci utf8mb4 224 Yes 1
|
||||
utf16_unicode_ci utf16 101 Yes 1
|
||||
gb18030_chinese_ci gb18030 248 Yes Yes 1
|
||||
gb18030_bin gb18030 249 Yes 1
|
||||
latin1_swedish_ci latin1 8 Yes Yes 1
|
||||
|
@ -7,6 +7,8 @@ gbk_chinese_ci gbk 28 Yes Yes 1
|
||||
gbk_bin gbk 87 Yes 1
|
||||
utf16_general_ci utf16 54 Yes Yes 1
|
||||
utf16_bin utf16 55 Yes 1
|
||||
utf8mb4_unicode_ci utf8mb4 224 Yes 1
|
||||
utf16_unicode_ci utf16 101 Yes 1
|
||||
gb18030_chinese_ci gb18030 248 Yes Yes 1
|
||||
gb18030_bin gb18030 249 Yes 1
|
||||
latin1_swedish_ci latin1 8 Yes Yes 1
|
||||
@ -27,6 +29,8 @@ gbk_chinese_ci gbk 28 Yes Yes 1
|
||||
gbk_bin gbk 87 Yes 1
|
||||
utf16_general_ci utf16 54 Yes Yes 1
|
||||
utf16_bin utf16 55 Yes 1
|
||||
utf8mb4_unicode_ci utf8mb4 224 Yes 1
|
||||
utf16_unicode_ci utf16 101 Yes 1
|
||||
gb18030_chinese_ci gb18030 248 Yes Yes 1
|
||||
gb18030_bin gb18030 249 Yes 1
|
||||
latin1_swedish_ci latin1 8 Yes Yes 1
|
||||
|
@ -8,6 +8,8 @@ gbk_chinese_ci gbk 28 Yes Yes 1
|
||||
gbk_bin gbk 87 Yes 1
|
||||
utf16_general_ci utf16 54 Yes Yes 1
|
||||
utf16_bin utf16 55 Yes 1
|
||||
utf8mb4_unicode_ci utf8mb4 224 Yes 1
|
||||
utf16_unicode_ci utf16 101 Yes 1
|
||||
gb18030_chinese_ci gb18030 248 Yes Yes 1
|
||||
gb18030_bin gb18030 249 Yes 1
|
||||
latin1_swedish_ci latin1 8 Yes Yes 1
|
||||
@ -28,6 +30,8 @@ gbk_chinese_ci gbk 28 Yes Yes 1
|
||||
gbk_bin gbk 87 Yes 1
|
||||
utf16_general_ci utf16 54 Yes Yes 1
|
||||
utf16_bin utf16 55 Yes 1
|
||||
utf8mb4_unicode_ci utf8mb4 224 Yes 1
|
||||
utf16_unicode_ci utf16 101 Yes 1
|
||||
gb18030_chinese_ci gb18030 248 Yes Yes 1
|
||||
gb18030_bin gb18030 249 Yes 1
|
||||
latin1_swedish_ci latin1 8 Yes Yes 1
|
||||
@ -43,6 +47,7 @@ select * from collations where collation_name like '%utf8%';
|
||||
COLLATION_NAME CHARACTER_SET_NAME ID IS_DEFAULT IS_COMPILED SORTLEN
|
||||
utf8mb4_general_ci utf8mb4 45 Yes Yes 1
|
||||
utf8mb4_bin utf8mb4 46 Yes 1
|
||||
utf8mb4_unicode_ci utf8mb4 224 Yes 1
|
||||
show create table collations;
|
||||
View Create View character_set_client collation_connection
|
||||
COLLATIONS CREATE VIEW `COLLATIONS` AS select collation as COLLATION_NAME, charset as CHARACTER_SET_NAME, id as ID, `is_default` as IS_DEFAULT, is_compiled as IS_COMPILED, sortlen as SORTLEN from oceanbase.__tenant_virtual_collation utf8mb4 utf8mb4_general_ci
|
||||
|
Loading…
x
Reference in New Issue
Block a user