[FEAT MERGE]charset revise

2023-11-27 08:52:01 +00:00 · 2023-11-27 08:52:01 +00:00 · e4f7452b2d
commit e4f7452b2d
parent 654e89bbe1
50 changed files with 2188803 additions and 6328 deletions
--- a/cmake/Env.cmake
+++ b/cmake/Env.cmake
@ -82,8 +82,6 @@ if(OB_BUILD_CLOSE_MODULES)
  ob_define(OB_BUILD_TDE_SECURITY ON)
  ob_define(OB_BUILD_AUDIT_SECURITY ON)
  ob_define(OB_BUILD_LABEL_SECURITY ON)
-  # 字符集
-  ob_define(OB_BUILD_FULL_CHARSET ON)
  # SPM功能
  ob_define(OB_BUILD_SPM ON)

@ -121,10 +119,6 @@ if(OB_BUILD_LABEL_SECURITY)
  add_definitions(-DOB_BUILD_LABEL_SECURITY)
 endif()

-if(OB_BUILD_FULL_CHARSET)
-  add_definitions(-DOB_BUILD_FULL_CHARSET)
-endif()
-
 if(OB_BUILD_SPM)
  add_definitions(-DOB_BUILD_SPM)
 endif()
--- a/deps/oblib/src/CMakeLists.txt
+++ b/deps/oblib/src/CMakeLists.txt
@ -85,14 +85,6 @@ if(OB_BUILD_AUDIT_SECURITY)
  )
 endif()

-if(OB_BUILD_ORACLE_XML)
-  target_include_directories(
-    oblib_base_base_base INTERFACE
-    ${CMAKE_SOURCE_DIR}/close_modules/charset
-    ${CMAKE_SOURCE_DIR}/close_modules/charset/deps/oblib/src/
-  )
-endif()
-
 if(OB_USE_BABASSL)
  target_include_directories(
    oblib_base_base_base INTERFACE
--- a/deps/oblib/src/lib/CMakeLists.txt
+++ b/deps/oblib/src/lib/CMakeLists.txt
@ -10,16 +10,21 @@ ob_set_subtarget(oblib_lib ALONE
 )

 ob_set_subtarget(oblib_lib charset
-  charset/ob_ctype_bin_os.cc
-  charset/ob_ctype_gb18030_os.cc
-  charset/ob_ctype_gbk_os.cc
-  charset/ob_ctype_latin1_os.cc
-  charset/ob_ctype_mb_os.cc
-  charset/ob_ctype_simple_os.cc
-  charset/ob_ctype_os.cc
-  charset/ob_ctype_utf16_os.cc
-  charset/ob_ctype_utf8_os.cc
-  charset/ob_dtoa_os.cc
+  charset/ob_ctype_bin.cc
+  charset/ob_ctype.cc
+  charset/ob_ctype_gb18030.cc
+  charset/ob_ctype_gbk.cc
+  charset/ob_ctype_latin1.cc
+  charset/ob_ctype_mb.cc
+  charset/ob_ctype_simple.cc
+  charset/ob_ctype_uca.cc
+  charset/ob_ctype_utf8.cc
+  charset/ob_ctype_utf16.cc
+  charset/ob_dtoa.cc
+  charset/uca900_ja_tbls.cc
+  charset/uca900_zh_tbls.cc
+  charset/uca900_zh2_tbls.cc
+  charset/uca900_zh3_tbls.cc
  charset/ob_charset.cpp
 )

--- a/deps/oblib/src/lib/charset/mb_wc.h
+++ b/deps/oblib/src/lib/charset/mb_wc.h
@ -0,0 +1,171 @@
+/**
+ * Copyright (c) 2021 OceanBase
+ * OceanBase CE is licensed under Mulan PubL v2.
+ * You can use this software according to the terms and conditions of the Mulan PubL v2.
+ * You may obtain a copy of Mulan PubL v2 at:
+ *          http://license.coscl.org.cn/MulanPubL-2.0
+ * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND,
+ * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
+ * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
+ * See the Mulan PubL v2 for more details.
+ */
+
+#ifndef MB_WC_INCLUDED
+#define MB_WC_INCLUDED
+
+/**
+  @file mb_wc.h
+
+  Definitions of mb_wc (multibyte to wide character, ie., effectively
+  “parse a UTF-8 character”) functions for UTF-8 (both three- and four-byte).
+  These are available both as inline functions, as C-style thunks so that they
+  can fit into MY_CHARSET_HANDLER, and as functors.
+
+  The functors exist so that you can specialize a class on them and get them
+  inlined instead of having to call them through the function pointer in
+  MY_CHARSET_HANDLER; mb_wc is in itself so cheap (the most common case is
+  just a single byte load and a predictable compare) that the call overhead
+  in a tight loop is significant, and these routines tend to take up a lot
+  of CPU time when sorting. Typically, at the outermost level, you'd simply
+  compare cs->cset->mb_wc with my_mb_wc_{utf8,utf8mb4}_thunk, and if so,
+  instantiate your function with the given class. If it doesn't match,
+  you can use Mb_wc_through_function_pointer, which calls through the
+  function pointer as usual. (It will cache the function pointer for you,
+  which is typically faster than looking it up all the time -- the compiler
+  cannot always figure out on its own that it doesn't change.)
+
+  The Mb_wc_* classes should be sent by _value_, not by reference, since
+  they are never larger than two pointers (and usually simply zero).
+*/
+#include "lib/charset/ob_ctype.h"
+
+#define ALWAYS_INLINE __attribute__((always_inline)) inline
+
+template <bool RANGE_CHECK, bool SUPPORT_MB4>
+static int ob_mb_wc_utf8_prototype(ob_wc_t *pwc, const unsigned char *s,
+                                   const unsigned char *e);
+
+/**
+  Functor that converts a UTF-8 multibyte sequence (up to three bytes)
+  to a wide character.
+*/
+struct Mb_wc_utf8 {
+  Mb_wc_utf8() {}
+
+  ALWAYS_INLINE
+  int operator()(ob_wc_t *pwc, const unsigned char *s, const unsigned char *e) const {
+    return ob_mb_wc_utf8_prototype</*RANGE_CHECK=*/true, /*SUPPORT_MB4=*/false>(
+          pwc, s, e);
+  }
+};
+
+/**
+  Functor that converts a UTF-8 multibyte sequence (up to four bytes)
+  to a wide character.
+*/
+struct Mb_wc_utf8mb4 {
+  Mb_wc_utf8mb4() {}
+
+  ALWAYS_INLINE
+  int operator()(ob_wc_t *pwc, const unsigned char *s, const unsigned char *e) const {
+    return ob_mb_wc_utf8_prototype</*RANGE_CHECK=*/true, /*SUPPORT_MB4=*/true>(
+          pwc, s, e);
+  }
+};
+
+/**
+  Functor that uses a function pointer to convert a multibyte sequence
+  to a wide character.
+*/
+class Mb_wc_through_function_pointer {
+ public:
+  explicit Mb_wc_through_function_pointer(const ObCharsetInfo *cs)
+      : m_funcptr(cs->cset->mb_wc), m_cs(cs) {}
+
+  int operator()(ob_wc_t *pwc, const unsigned char *s, const unsigned char *e) const {
+    return m_funcptr(m_cs, pwc, s, e);
+  }
+
+ private:
+  typedef int (*mbwc_func_t)(const ObCharsetInfo *, ob_wc_t *, const unsigned char *,
+                             const unsigned char *);
+
+  const mbwc_func_t m_funcptr;
+  const ObCharsetInfo *const m_cs;
+};
+
+template <bool RANGE_CHECK, bool SUPPORT_MB4>
+static ALWAYS_INLINE int ob_mb_wc_utf8_prototype(ob_wc_t *pwc, const unsigned char *s,
+                                                 const unsigned char *e) {
+  if (RANGE_CHECK && s >= e) return OB_CS_TOOSMALL;
+
+  unsigned char c = s[0];
+  if (c < 0x80) {
+    *pwc = c;
+    return 1;
+  }
+
+  if (c < 0xe0) {
+    if (c < 0xc2)  // Resulting code point would be less than 0x80.
+      return OB_CS_ILSEQ;
+
+    if (RANGE_CHECK && s + 2 > e) return OB_CS_TOOSMALL2;
+
+    if ((s[1] & 0xc0) != 0x80)  // Next byte must be a continuation byte.
+      return OB_CS_ILSEQ;
+
+    *pwc = ((ob_wc_t)(c & 0x1f) << 6) + (ob_wc_t)(s[1] & 0x3f);
+    return 2;
+  }
+
+  if (c < 0xf0) {
+    if (RANGE_CHECK && s + 3 > e) return OB_CS_TOOSMALL3;
+
+    // Next two bytes must be continuation bytes.
+    uint16 two_bytes;
+    memcpy(&two_bytes, s + 1, sizeof(two_bytes));
+    if ((two_bytes & 0xc0c0) != 0x8080)  // Endianness does not matter.
+      return OB_CS_ILSEQ;
+
+    *pwc = ((ob_wc_t)(c & 0x0f) << 12) + ((ob_wc_t)(s[1] & 0x3f) << 6) +
+           (ob_wc_t)(s[2] & 0x3f);
+    if (*pwc < 0x800) return OB_CS_ILSEQ;
+    /*
+      According to RFC 3629, UTF-8 should prohibit characters between
+      U+D800 and U+DFFF, which are reserved for surrogate pairs and do
+      not directly represent characters.
+    */
+    if (*pwc >= 0xd800 && *pwc <= 0xdfff) return OB_CS_ILSEQ;
+    return 3;
+  }
+
+  if (SUPPORT_MB4) {
+    if (RANGE_CHECK && s + 4 > e) /* We need 4 characters */
+      return OB_CS_TOOSMALL4;
+
+    /*
+      This byte must be of the form 11110xxx, and the next three bytes
+      must be continuation bytes.
+    */
+    uint32 four_bytes;
+    memcpy(&four_bytes, s, sizeof(four_bytes));
+#ifdef WORDS_BIGENDIAN
+    if ((four_bytes & 0xf8c0c0c0) != 0xf0808080)
+#else
+    if ((four_bytes & 0xc0c0c0f8) != 0x808080f0)
+#endif
+      return OB_CS_ILSEQ;
+
+    *pwc = ((ob_wc_t)(c & 0x07) << 18) + ((ob_wc_t)(s[1] & 0x3f) << 12) +
+           ((ob_wc_t)(s[2] & 0x3f) << 6) + (ob_wc_t)(s[3] & 0x3f);
+    if (*pwc < 0x10000 || *pwc > 0x10ffff) return OB_CS_ILSEQ;
+    return 4;
+  }
+
+  return OB_CS_ILSEQ;
+}
+
+extern "C" int ob_mb_wc_utf8mb4_thunk(const ObCharsetInfo *cs, ob_wc_t *pwc,
+                                      const unsigned char *s, const unsigned char *e);
+
+#endif  // MB_WC_INCLUDED
--- a/deps/oblib/src/lib/charset/ob_byteorder.h
+++ b/deps/oblib/src/lib/charset/ob_byteorder.h
@ -0,0 +1,74 @@
+/**
+ * Copyright (c) 2021 OceanBase
+ * OceanBase CE is licensed under Mulan PubL v2.
+ * You can use this software according to the terms and conditions of the Mulan PubL v2.
+ * You may obtain a copy of Mulan PubL v2 at:
+ *          http://license.coscl.org.cn/MulanPubL-2.0
+ * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND,
+ * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
+ * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
+ * See the Mulan PubL v2 for more details.
+ */
+
+#ifndef OB_BYTEORDER_H
+#define OB_BYTEORDER_H
+
+#include <stdint.h>
+#include "lib/charset/ob_template_helper.h"
+#include <netinet/in.h>
+
+/*
+ Functions for big-endian loads and stores. These are safe to use
+ no matter what the compiler, CPU or alignment, and also with -fstrict-aliasing.
+
+ The stores return a pointer just past the value that was written.
+*/
+
+inline uint16_t load16be(const char *ptr) {
+  uint16_t val;
+  memcpy(&val, ptr, sizeof(val));
+  return ntohs(val);
+}
+
+inline uint32_t load32be(const char *ptr) {
+  uint32_t val;
+  memcpy(&val, ptr, sizeof(val));
+  return ntohl(val);
+}
+
+__attribute__((always_inline)) inline char *store16be(char *ptr, uint16_t val) {
+#if defined(_MSC_VER)
+  // _byteswap_ushort is an intrinsic on MSVC, but htons is not.
+  val = _byteswap_ushort(val);
+#else
+  val = htons(val);
+#endif
+  memcpy(ptr, &val, sizeof(val));
+  return ptr + sizeof(val);
+}
+
+inline char *store32be(char *ptr, uint32_t val) {
+  val = htonl(val);
+  memcpy(ptr, &val, sizeof(val));
+  return ptr + sizeof(val);
+}
+
+// Adapters for using unsigned char * instead of char *.
+
+inline uint16_t load16be(const unsigned char *ptr) {
+  return load16be(pointer_cast<const char *>(ptr));
+}
+
+inline uint32_t load32be(const unsigned char *ptr) {
+  return load32be(pointer_cast<const char *>(ptr));
+}
+
+__attribute__((always_inline)) inline unsigned char *store16be(unsigned char *ptr, uint16_t val) {
+  return pointer_cast<unsigned char *>(store16be(pointer_cast<char *>(ptr), val));
+}
+
+inline unsigned char *store32be(unsigned char *ptr, uint32_t val) {
+  return pointer_cast<unsigned char *>(store32be(pointer_cast<char *>(ptr), val));
+}
+
+#endif // OB_BYTEORDER_H
--- a/deps/oblib/src/lib/charset/ob_charset.cpp
+++ b/deps/oblib/src/lib/charset/ob_charset.cpp
@ -293,14 +293,9 @@ const ObCollationWrapper ObCharset::collation_wrap_arr_[ObCharset::VALID_COLLATI
  {CS_TYPE_GBK_BIN, CHARSET_GBK, CS_TYPE_GBK_BIN, false, true, 1},
  {CS_TYPE_UTF16_GENERAL_CI, CHARSET_UTF16, CS_TYPE_UTF16_GENERAL_CI, true, true, 1},
  {CS_TYPE_UTF16_BIN, CHARSET_UTF16, CS_TYPE_UTF16_BIN, false, true, 1},
-#ifndef OB_BUILD_FULL_CHARSET
-  {CS_TYPE_INVALID, CHARSET_INVALID, CS_TYPE_INVALID, false, false, 1},
-  {CS_TYPE_INVALID, CHARSET_INVALID, CS_TYPE_INVALID, false, false, 1},
-#else
  //{CS_TYPE_UTF8MB4_ZH_0900_AS_CS, CHARSET_UTF8MB4, CS_TYPE_UTF8MB4_ZH_0900_AS_CS, false, true, 0},
  {CS_TYPE_UTF8MB4_UNICODE_CI, CHARSET_UTF8MB4, CS_TYPE_UTF8MB4_UNICODE_CI, false, true, 1},
  {CS_TYPE_UTF16_UNICODE_CI, CHARSET_UTF16, CS_TYPE_UTF16_UNICODE_CI, false, true, 1},
-#endif
  {CS_TYPE_GB18030_CHINESE_CI, CHARSET_GB18030, CS_TYPE_GB18030_CHINESE_CI, true, true, 1},
  {CS_TYPE_GB18030_BIN, CHARSET_GB18030, CS_TYPE_GB18030_BIN, false, true, 1},
  {CS_TYPE_LATIN1_SWEDISH_CI, CHARSET_LATIN1, CS_TYPE_LATIN1_SWEDISH_CI,true, true, 1},
@ -336,11 +331,7 @@ ObCharsetInfo *ObCharset::charset_arr[CS_TYPE_MAX] = {
                                           &ob_charset_gbk_bin,   // 87
  NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,                 // 88
  NULL, NULL, NULL, NULL, NULL,                                   // 96
-#ifdef OB_BUILD_FULL_CHARSET
                                &ob_charset_utf16_unicode_ci,     // 101
-#else
-                                NULL,
-#endif
                                      NULL, NULL,                 // 102
  NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,                 // 104
  NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,                 // 112
@ -360,11 +351,7 @@ ObCharsetInfo *ObCharset::charset_arr[CS_TYPE_MAX] = {
  &ob_charset_gb18030_2022_pinyin_cs,  &ob_charset_gb18030_2022_radical_ci,// 218
  &ob_charset_gb18030_2022_radical_cs, &ob_charset_gb18030_2022_stroke_ci, // 220
  &ob_charset_gb18030_2022_stroke_cs, NULL,                       // 222
-#ifdef OB_BUILD_FULL_CHARSET
  &ob_charset_utf8mb4_unicode_ci,                                 // 224
-#else
-  NULL,
-#endif
        NULL, NULL, NULL, NULL, NULL, NULL, NULL,                 // 225
  NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,                 // 232
  NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,                 // 240
@ -508,33 +495,6 @@ uint64_t ObCharset::strntoullrnd(const char *str,
  return result;
 }

-#ifdef OB_BUILD_FULL_CHARSET
-/*
-  Convert integer to its string representation in given scale of notation.
-
-  SYNOPSIS
-    int2str()
-      val     - value to convert
-      dst     - points to buffer where string representation should be stored
-      radix   - radix of scale of notation
-      upcase  - set to 1 if we should use upper-case digits
-
-  DESCRIPTION
-    Converts the (long) integer value to its character form and moves it to
-    the destination buffer followed by a terminating NUL.
-    If radix is -2..-36, val is taken to be SIGNED, if radix is  2..36, val is
-    taken to be UNSIGNED. That is, val is signed if and only if radix is.
-    All other radixes treated as bad and nothing will be changed in this case.
-
-    For conversion to decimal representation (radix is -10 or 10) one can use
-    optimized int10_to_str() function.
-
-  RETURN VALUE
-    Pointer to ending NUL character or NullS if radix is bad.
-*/
-#endif
-
-//=============================================================
 char* ObCharset::lltostr(int64_t val, char *dst, int radix, int upcase)
 {
  int ret = OB_SUCCESS;
@ -583,7 +543,7 @@ char* ObCharset::lltostr(int64_t val, char *dst, int radix, int upcase)
    p = &buffer[sizeof(buffer)-1];
    *p = '\0';
    new_val= uval / (uint64_t) radix;
-    *--p = dig_vec[(uchar) (uval- (uint64_t) new_val*(uint64_t) radix)];
+    *--p = dig_vec[(unsigned char) (uval- (uint64_t) new_val*(uint64_t) radix)];
    val = new_val;
    ldiv_t res;
    while (val != 0)
@ -621,8 +581,8 @@ uint32_t ObCharset::instr(ObCollationType collation_type,
  if (is_argument_valid(collation_type, str1, str1_len, str2, str2_len)) {
    ObCharsetInfo *cs = static_cast<ObCharsetInfo *>(ObCharset::charset_arr[collation_type]);
    ob_match_t m_match_t[2];
-    uint nmatch = 1;
-    uint m_ret = cs->coll->instr(cs, str1, str1_len, str2, str2_len, m_match_t, nmatch);
+    unsigned int nmatch = 1;
+    unsigned int m_ret = cs->coll->instr(cs, str1, str1_len, str2, str2_len, m_match_t, nmatch);
    if (0 == m_ret ) {
      result = 0;
    } else {
@ -642,8 +602,8 @@ int64_t ObCharset::instrb(ObCollationType collation_type,
  if (is_argument_valid(collation_type, str1, str1_len, str2, str2_len)) {
    ObCharsetInfo *cs = static_cast<ObCharsetInfo *>(ObCharset::charset_arr[collation_type]);
    ob_match_t m_match_t[2];
-    uint nmatch = 1;
-    uint m_ret = cs->coll->instr(cs, str1, str1_len, str2, str2_len, m_match_t, nmatch);
+    unsigned int nmatch = 1;
+    unsigned int m_ret = cs->coll->instr(cs, str1, str1_len, str2, str2_len, m_match_t, nmatch);
    if (0 != m_ret) {
      result =  m_match_t[0].end - m_match_t[0].beg;
    }
@ -700,9 +660,9 @@ int ObCharset::strcmp(ObCollationType collation_type,
    ObCharsetInfo *cs = static_cast<ObCharsetInfo *>(ObCharset::charset_arr[collation_type]);
    const bool t_is_prefix = false;
    result = cs->coll->strnncoll(cs,
-                              reinterpret_cast<const uchar *>(str1),
+                              reinterpret_cast<const unsigned char *>(str1),
                              str1_len,
-                              reinterpret_cast<const uchar *>(str2),
+                              reinterpret_cast<const unsigned char *>(str2),
                              str2_len, t_is_prefix);
  }
  return result;
@ -719,9 +679,9 @@ int ObCharset::strcmpsp(ObCollationType collation_type,
  if (is_argument_valid(collation_type, str1, str1_len, str2, str2_len)) {
    ObCharsetInfo *cs = static_cast<ObCharsetInfo *>(ObCharset::charset_arr[collation_type]);
    result = cs->coll->strnncollsp(cs,
-                                reinterpret_cast<const uchar *>(str1),
+                                reinterpret_cast<const unsigned char *>(str1),
                                str1_len,
-                                reinterpret_cast<const uchar *>(str2),
+                                reinterpret_cast<const unsigned char *>(str2),
                                str2_len,
                                cmp_endspace);
  }
@ -860,10 +820,10 @@ size_t ObCharset::sortkey(ObCollationType collation_type,
    //
    // 对于有非法字符的unicode字符串，采用原生的不转换sortkey的方式进行比较。
    result = cs->coll->strnxfrm(cs,
-                             reinterpret_cast<uchar *>(key),
+                             reinterpret_cast<unsigned char *>(key),
                             key_len,
                             OB_MAX_WEIGHT,
-                             reinterpret_cast<const uchar *>(str),
+                             reinterpret_cast<const unsigned char *>(str),
                             str_len,
                             0,
                             &is_valid_unicode_tmp);
@ -890,10 +850,10 @@ size_t ObCharset::sortkey_var_len(ObCollationType collation_type,
      result = -1;
    } else {
      result = cs->coll->strnxfrm_varlen(cs,
-                                       reinterpret_cast<uchar *>(key),
+                                       reinterpret_cast<unsigned char *>(key),
                                       key_len,
                                       OB_MAX_WEIGHT,
-                                       reinterpret_cast<const uchar *>(str),
+                                       reinterpret_cast<const unsigned char *>(str),
                                       str_len,
                                       is_space_cmp,
                                       &is_valid_unicode_tmp);
@ -921,7 +881,7 @@ uint64_t ObCharset::hash(ObCollationType collation_type,
      LOG_WARN("unexpected error. invalid argument(s)", K(cs), K(cs->coll), K(lbt()));
    } else {
      seed = 0xc6a4a7935bd1e995;
-      cs->coll->hash_sort(cs, reinterpret_cast<const uchar *>(str), str_len,
+      cs->coll->hash_sort(cs, reinterpret_cast<const unsigned char *>(str), str_len,
                          &ret, &seed, calc_end_space, hash_algo);
    }
  }
@ -1102,10 +1062,6 @@ int ObCharset::well_formed_len(ObCollationType collation_type, const char *str,
  return ret;
 }

-#ifdef OB_BUILD_FULL_CHARSET
-// Be careful with this function. The return value may be out of range.
-// Refer to
-#endif
 size_t ObCharset::charpos(const ObCollationType collation_type,
                              const char *str,
                              const int64_t str_len,
@ -1204,8 +1160,8 @@ int ObCharset::mb_wc(ObCollationType collation_type,
      ret = OB_ERR_UNEXPECTED;
      LOG_WARN("unexpected error. invalid argument(s)", K(cs), K(cs->cset));
    } else {
-      int tmp = cs->cset->mb_wc(cs, &my_wc, reinterpret_cast<const uchar*>(mb.ptr()),
-                            reinterpret_cast<const uchar*>(mb.ptr()+mb.length()));
+      int tmp = cs->cset->mb_wc(cs, &my_wc, reinterpret_cast<const unsigned char*>(mb.ptr()),
+                            reinterpret_cast<const unsigned char*>(mb.ptr()+mb.length()));
      if (tmp <= 0) {
        ret = OB_ERR_INCORRECT_STRING_VALUE;
      } else {
@ -1237,8 +1193,8 @@ int ObCharset::mb_wc(ObCollationType collation_type,
      ret = OB_ERR_UNEXPECTED;
      LOG_WARN("unexpected error. invalid argument(s)", K(cs), K(cs->cset));
    } else {
-      int tmp = cs->cset->mb_wc(cs, &my_wc, reinterpret_cast<const uchar*>(mb),
-                                reinterpret_cast<const uchar*>(mb + mb_size));
+      int tmp = cs->cset->mb_wc(cs, &my_wc, reinterpret_cast<const unsigned char*>(mb),
+                                reinterpret_cast<const unsigned char*>(mb + mb_size));
      if (tmp <= 0) {
        ret = OB_ERR_INCORRECT_STRING_VALUE;
      } else {
@ -1268,7 +1224,7 @@ int ObCharset::display_len(ObCollationType collation_type,
      ret = OB_ERR_UNEXPECTED;
      LOG_WARN("unexpected error. invalid argument(s)", K(cs), K(cs->cset));
    } else {
-      const uchar *buf = reinterpret_cast<const uchar*>(mb.ptr());
+      const unsigned char *buf = reinterpret_cast<const unsigned char*>(mb.ptr());
      int64_t buf_size = mb.length();
      int64_t char_pos = 0;
      bool found = false;
@ -1320,7 +1276,7 @@ int ObCharset::max_display_width_charpos(ObCollationType collation_type, const c
      LOG_WARN("unexpected error. invalid argument(s)", K(cs), K(cs->cset));
    } else {
      char_pos = 0;
-      const uchar *buf = reinterpret_cast<const uchar*>(mb);
+      const unsigned char *buf = reinterpret_cast<const unsigned char*>(mb);
      bool found = false;
      int64_t total_width = 0;

@ -1372,8 +1328,8 @@ int ObCharset::wc_mb(ObCollationType collation_type, int32_t wc, char *buff, int
      ret = OB_ERR_UNEXPECTED;
      LOG_WARN("unexpected error. invalid argument(s)", K(cs), K(ret));
    } else {
-      int tmp = cs->cset->wc_mb(cs, wc, reinterpret_cast<uchar*>(buff),
-                                reinterpret_cast<uchar*>(buff + buff_len));
+      int tmp = cs->cset->wc_mb(cs, wc, reinterpret_cast<unsigned char*>(buff),
+                                reinterpret_cast<unsigned char*>(buff + buff_len));
      if (tmp <= 0) {
        ret = OB_ERR_INCORRECT_STRING_VALUE;
      } else {
@ -1589,14 +1545,12 @@ ObCollationType ObCharset::collation_type(const ObString &cs_name)
    collation_type = CS_TYPE_UTF16_GENERAL_CI;
  } else if (0 == cs_name.case_compare(ob_charset_utf16_bin.name)) {
    collation_type = CS_TYPE_UTF16_BIN;
-#ifdef OB_BUILD_FULL_CHARSET
  } else if (0 == cs_name.case_compare("utf8_unicode_ci")) {
    collation_type = CS_TYPE_UTF8MB4_UNICODE_CI;
  } else if (0 == cs_name.case_compare(ob_charset_utf16_unicode_ci.name)) {
    collation_type = CS_TYPE_UTF16_UNICODE_CI;
  } else if (0 == cs_name.case_compare(ob_charset_utf8mb4_unicode_ci.name)) {
    collation_type = CS_TYPE_UTF8MB4_UNICODE_CI;
-#endif
  } else if (0 == cs_name.case_compare(ob_charset_gb18030_bin.name)) {
    collation_type = CS_TYPE_GB18030_BIN;
  } else if (0 == cs_name.case_compare(ob_charset_gb18030_chinese_ci.name)) {
@ -1639,9 +1593,7 @@ bool ObCharset::is_valid_collation(ObCharsetType charset_type, ObCollationType c
  if (CHARSET_UTF8MB4 == charset_type) {
    if (CS_TYPE_UTF8MB4_BIN == collation_type
        || CS_TYPE_UTF8MB4_GENERAL_CI == collation_type
-#ifdef OB_BUILD_FULL_CHARSET
        || CS_TYPE_UTF8MB4_UNICODE_CI == collation_type
-#endif
        ) {
      ret = true;
    }
@ -1655,9 +1607,7 @@ bool ObCharset::is_valid_collation(ObCharsetType charset_type, ObCollationType c
  } else if (CHARSET_UTF16 == charset_type) {
    if (CS_TYPE_UTF16_GENERAL_CI == collation_type
        || CS_TYPE_UTF16_BIN == collation_type
-#ifdef OB_BUILD_FULL_CHARSET
        || CS_TYPE_UTF16_UNICODE_CI == collation_type
-#endif
        ) {
      ret = true;
    }
@ -1750,11 +1700,9 @@ bool ObCharset::is_valid_collation(int64_t collation_type_int)
    || CS_TYPE_LATIN1_SWEDISH_CI == collation_type
    || CS_TYPE_LATIN1_BIN == collation_type
    || is_gb18030_2022(collation_type)
-#ifdef OB_BUILD_FULL_CHARSET
    || CS_TYPE_UTF8MB4_UNICODE_CI == collation_type
    || CS_TYPE_UTF16_UNICODE_CI == collation_type
    || (CS_TYPE_EXTENDED_MARK < collation_type && collation_type < CS_TYPE_MAX)
-#endif
    ;
 }

@ -1975,57 +1923,6 @@ int ObCharset::result_collation(
  return ret;
 }

-#ifdef OB_BUILD_FULL_CHARSET
-/** note from mysql:
-  Aggregate two collations together taking
-  into account their coercibility (aka derivation):.
-
-  0 == DERIVATION_EXPLICIT  - an explicitly written COLLATE clause @n
-  1 == DERIVATION_NONE      - a mix of two different collations @n
-  2 == DERIVATION_IMPLICIT  - a column @n
-  3 == DERIVATION_COERCIBLE - a string constant.
-
-  The most important rules are:
-  -# If collations are the same:
-  chose this collation, and the strongest derivation.
-  -# If collations are different:
-  - Character sets may differ, but only if conversion without
-  data loss is possible. The caller provides flags whether
-  character set conversion attempts should be done. If no
-  flags are substituted, then the character sets must be the same.
-  Currently processed flags are:
-  MY_COLL_ALLOW_SUPERSET_CONV  - allow conversion to a superset
-  MY_COLL_ALLOW_COERCIBLE_CONV - allow conversion of a coercible value
-  - two EXPLICIT collations produce an error, e.g. this is wrong:
-  CONCAT(expr1 collate latin1_swedish_ci, expr2 collate latin1_german_ci)
-  - the side with smaller derivation value wins,
-  i.e. a column is stronger than a string constant,
-  an explicit COLLATE clause is stronger than a column.
-  - if derivations are the same, we have DERIVATION_NONE,
-  we'll wait for an explicit COLLATE clause which possibly can
-  come from another argument later: for example, this is valid,
-  but we don't know yet when collecting the first two arguments:
-     @code
-       CONCAT(latin1_swedish_ci_column,
-              latin1_german1_ci_column,
-              expr COLLATE latin1_german2_ci)
-  @endcode
-*/
-
-/** this function is to determine use which charset when compare
- * We consider only three charsets(binary, gbk and utf8mb4), so the rule is simpler. Especially,
- * res_level can not be CS_LEVEL_NONE.
- *
- * MySQL uses coercibility values with the following rules to resolve ambiguities:
- * 1. Use the collation with the lowest coercibility value.
- * 2. If both sides have the same coercibility, then:
- *  2.a If both sides are Unicode, or both sides are not Unicode, it is an error.
- *  2.b If one of the sides has a Unicode character set, and another side has a non-Unicode character set, the side with Unicode character set wins,
- *      and automatic character set conversion is applied to the non-Unicode side.
- *  2.c For an operation with operands from the same character set but that mix a _bin collation and a _ci or _cs collation, the _bin collation is used.
- *  This is similar to how operations that mix nonbinary and binary strings evaluate the operands as binary strings, except that it is for collations rather than data types.
-*/
-#endif
 int ObCharset::aggregate_collation(
    const ObCollationLevel collation_level1,
    const ObCollationType collation_type1,
@ -2947,7 +2844,7 @@ int ObCharset::charset_convert(const ObCollationType from_type,
      ret = OB_ERR_UNEXPECTED;
      LOG_WARN("unexpected collation type", K(ret), K(from_type), K(to_type));
    } else {
-      uint errors = 0;
+      unsigned int errors = 0;
      result_len = ob_convert(to_str, static_cast<uint32_t>(to_len), to_cs, from_str, from_len, from_cs,
                              trim_incomplete_tail, replaced_char, &errors);
      if (OB_UNLIKELY(errors != 0 && report_error)) {
@ -3209,20 +3106,8 @@ int ObCharset::get_nls_charset_id_by_charset_type(ObCharsetType charset_type)
  return static_cast<int>(ret_id);
 }

-#ifndef OB_BUILD_FULL_CHARSET

-int ObCharset::init_charset()
-{
-  int ret = OB_SUCCESS;
-  if (OB_FAIL(init_gb18030_2022())) {
-    LOG_WARN("failed to init gb18030 2022", K(ret));
-  }
-  return ret;
-}
-
-#else
-
-static void ob_charset_error_reporter(enum loglevel level, uint ecode, ...) {
+static void ob_charset_error_reporter(enum loglevel level, unsigned int ecode, ...) {
  //UNUSED(level);
  UNUSED(ecode);
  switch (level) {
@ -3411,7 +3296,6 @@ int ObCharset::init_charset()
  return ret;
 }

-#endif

 ObString ObCharsetUtils::const_str_for_ascii_[CHARSET_MAX][INT8_MAX + 1];

@ -3420,9 +3304,9 @@ int ObCharsetUtils::remove_char_endspace(ObString &str,
  int ret = OB_SUCCESS;
  const char *end = str.ptr() + str.length();
  if ((CHARSET_UTF16 == charset_type)) {
-    end= (const char *) skip_trailing_space((const uchar *)str.ptr(), str.length(), 1);
+    end= (const char *) skip_trailing_space((const unsigned char *)str.ptr(), str.length(), 1);
  } else {
-    end= (const char *) skip_trailing_space((const uchar *)str.ptr(), str.length(), 0);
+    end= (const char *) skip_trailing_space((const unsigned char *)str.ptr(), str.length(), 0);
  }
  if (end >= str.ptr()) {
    str.assign_ptr(str.ptr(), end - str.ptr());
--- a/deps/oblib/src/lib/charset/ob_ctype_os.cc
+++ b/deps/oblib/src/lib/charset/ob_ctype_os.cc
@ -8,14 +8,7 @@
 * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, 
 * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. 
 * See the Mulan PubL v2 for more details.
- */ 
-
-/*
- * (C) 2017-2020 Alibaba Group Holding Limited.
- *
- *  Authors:
 */
-#ifndef OB_BUILD_FULL_CHARSET

 #include "lib/charset/ob_ctype.h"

@ -25,7 +18,7 @@ ob_convert_internal(char *to, uint32 to_length,
                    const char *from, uint32 from_length,
                    const ObCharsetInfo *from_cs,
                    bool trim_incomplete_tail,
-                    const ob_wc_t replaced_char, uint *errors)
+                    const ob_wc_t replaced_char, unsigned int *errors)
 {
  unsigned int error_num= 0;
  int cnvres;
@ -49,7 +42,7 @@ ob_convert_internal(char *to, uint32 to_length,
      error_num++;
    } else {
      // Not enough characters
-      if (!trim_incomplete_tail && (const uchar*) from < from_end) {
+      if (!trim_incomplete_tail && (const unsigned char*) from < from_end) {
        error_num++;
        from++;
        wc= replaced_char;
@ -62,7 +55,7 @@ ob_convert_internal(char *to, uint32 to_length,
    while (go) {
      go = FALSE;
      if ((cnvres= (*wc_mb)(to_cs, wc, (unsigned char*) to, to_end)) > 0)
-        to+= cnvres;
+      to+= cnvres;
      else if (cnvres == OB_CS_ILUNI && wc != replaced_char) {
        error_num++;
        wc= replaced_char;
@ -122,4 +115,26 @@ ob_convert(char *to, uint32 to_length, const ObCharsetInfo *to_cs,
  return 0;          
 }

-#endif
+char *strmake(char *dst, const char *src, size_t length) {
+#ifdef EXTRA_DEBUG
+  /*
+ * 'length' is the maximum length of the string; the buffer needs
+ * to be one character larger to accomodate the terminating '\0'
+ * This is easy to get wrong, so we make sure we write to the
+ * entire length of the buffer to identify incorrect buffer-sizes.
+ * We only initialise the "unused" part of the buffer here, a) for
+ * efficiency, and b) because dst==src is allowed, so initialising
+ * the entire buffer would overwrite the source-string. Also, we
+ * write a character rather than '\0' as this makes spotting these
+ * problems in the results easier.
+ *		                                      */
+  unsigned int n = 0;
+  while (n < length && src[n++])
+    ;
+  memset(dst + n, (int)'Z', length - n + 1);
+#endif
+  while (length--)
+    if (!(*dst++ = *src++)) return dst - 1;
+  *dst = 0;
+  return dst;
+}
--- a/deps/oblib/src/lib/charset/ob_ctype.h
+++ b/deps/oblib/src/lib/charset/ob_ctype.h
@ -116,9 +116,9 @@
 #define	_MY_B	0100	
 #define	_MY_X	0200	

-#define ob_toupper(s, c) (uchar)((s)->to_upper[(uchar)(c)])
-#define ob_tolower(s, c) (uchar)((s)->to_lower[(uchar)(c)])
-#define ob_sort_order(s,c) (uchar)((s)->sort_order[(uchar)(c)])
+#define ob_toupper(s, c) (unsigned char)((s)->to_upper[(unsigned char)(c)])
+#define ob_tolower(s, c) (unsigned char)((s)->to_lower[(unsigned char)(c)])
+#define ob_sort_order(s,c) (unsigned char)((s)->sort_order[(unsigned char)(c)])

 #define is_valid_ascii(e) (0x00<=(uchar)(e) && (uchar)(e)<=0x7F)

@ -127,13 +127,13 @@ struct ObUCAInfo;
 struct ObContractions;

 typedef struct ObCharsetLoader {
-  uint errcode;
+  unsigned int errcode;
  char errarg[192];
  void *(*once_alloc)(size_t);
  void *(*mem_malloc)(size_t);
  void *(*mem_realloc)(void *, size_t);
  void (*mem_free)(void *);
-  void (*reporter)(enum loglevel, uint errcode, ...);
+  void (*reporter)(enum loglevel, unsigned int errcode, ...);
  int (*add_collation)(ObCharsetInfo *cs);
 } ObCharsetLoader;

@ -157,25 +157,25 @@ typedef char        ob_bool; /* Small bool */

 /* Some typedef to make it easy for C++ to make function pointers */
 typedef int (*ob_charset_conv_mb_wc)(const struct ObCharsetInfo *,
-                                     ob_wc_t *, const uchar *, const uchar *);
+                                     ob_wc_t *, const unsigned char *, const unsigned char *);
 typedef int (*ob_charset_conv_wc_mb)(const struct ObCharsetInfo *, ob_wc_t,
-                                     uchar *, uchar *);
+                                     unsigned char *, unsigned char *);
 typedef size_t (*ob_charset_conv_case)(const struct ObCharsetInfo *,
                                       char *, size_t, char *, size_t);

 int init_gb18030_2022();

 extern ObUCAInfo ob_uca_v400;
-extern uchar ob_uca520_length[4352];
+extern unsigned char ob_uca520_length[4352];
 extern uint16 *ob_uca520_weight[4352];
-extern uchar ob_uca_length[256];
+extern unsigned char ob_uca_length[256];
 extern uint16 *ob_uca_weight[256];

 typedef struct
 {
-  uint beg;
-  uint end;
-  uint mb_len;
+  unsigned int beg;
+  unsigned int end;
+  unsigned int mb_len;
 } ob_match_t;

 typedef struct ObUnicaseInfoChar
@ -192,86 +192,13 @@ typedef struct ObUnicaseInfo
  const ObUnicaseInfoChar **page;
 } ObUnicaseInfo;

-#ifdef OB_BUILD_FULL_CHARSET
-// OB_CHARSET_HANDLER
-// ==================
-
-// OB_CHARSET_HANDLER is a collection of character-set
-// related routines. Defined in m_ctype.h. Have the
-// following set of functions:
-
-// Multi-byte routines
-// ------------------
-// ismbchar()  - detects whether the given string is a multi-byte sequence
-// mbcharlen() - returns length of multi-byte sequence starting with
-//               the given character
-// numchars()  - returns number of characters in the given string, e.g.
-//               in SQL function CHAR_LENGTH().
-// charpos()   - calculates the offset of the given position in the string.
-//               Used in SQL functions LEFT(), RIGHT(), SUBSTRING(),
-//               INSERT()
-
-// well_formed_len()
-//             - returns length of a given multi-byte string in bytes
-//               Used in INSERTs to shorten the given string so it
-//               a) is "well formed" according to the given character set
-//               b) can fit into the given data type
-
-// lengthsp()  - returns the length of the given string without trailing spaces.
-
-
-// Unicode conversion routines
-// ---------------------------
-// mb_wc       - converts the left multi-byte sequence into its Unicode code.
-// mc_mb       - converts the given Unicode code into multi-byte sequence.
-
-
-// Case and sort conversion
-// ------------------------
-// caseup_str  - converts the given 0-terminated string to uppercase
-// casedn_str  - converts the given 0-terminated string to lowercase
-// caseup      - converts the given string to lowercase using length
-// casedn      - converts the given string to lowercase using length
-
-// Number-to-string conversion routines
-// ------------------------------------
-// snprintf()
-// long10_to_str()
-// longlong10_to_str()
-
-// The names are pretty self-describing.
-
-// String padding routines
-// -----------------------
-// fill()     - writes the given Unicode value into the given string
-//              with the given length. Used to pad the string, usually
-//              with space character, according to the given charset.
-
-// String-to-number conversion routines
-// ------------------------------------
-// strntol()
-// strntoul()
-// strntoll()
-// strntoull()
-// strntod()
-
-// These functions are almost the same as their STDLIB counterparts,
-// but also:
-//   - accept length instead of 0-terminator
-//   - are character set dependent
-
-// Simple scanner routines
-// -----------------------
-// scan()    - to skip leading spaces in the given string.
-//             Used when a string value is inserted into a numeric field.
-#endif
 typedef struct ObCharsetHandler
 {
  //my_bool (*init)(struct ObCharsetInfo *, MY_CHARSET_LOADER *loader);
  /* Multibyte routines */
-  uint    (*ismbchar)(const struct ObCharsetInfo *, const char *,
+  unsigned int    (*ismbchar)(const struct ObCharsetInfo *, const char *,
                      const char *);
-  uint    (*mbcharlen)(const struct ObCharsetInfo *, uint c);
+  unsigned int    (*mbcharlen)(const struct ObCharsetInfo *, unsigned int c);
  size_t  (*numchars)(const struct ObCharsetInfo *, const char *b,
                      const char *e);
  size_t  (*charpos)(const struct ObCharsetInfo *, const char *b,
@ -292,7 +219,7 @@ typedef struct ObCharsetHandler

  /* CTYPE scanner */
  int (*ctype)(const struct ObCharsetInfo *cs, int *ctype,
-               const uchar *s, const uchar *e);
+               const unsigned char *s, const unsigned char *e);

  /* Functions for case and sort conversion */
  /*size_t  (*caseup_str)(const struct ObCharsetInfo *, char *);
@ -332,22 +259,7 @@ typedef struct ObCharsetHandler
  size_t        (*scan)(const struct ObCharsetInfo *, const char *b,
                        const char *e, int sq);
 } ObCharsetHandler;
-#ifdef OB_BUILD_FULL_CHARSET
-// OB_COLLATION_HANDLER
-// ====================
-// strnncoll()   - compares two strings according to the given collation
-// strnncollsp() - like the above but ignores trailing spaces for PAD SPACE
-//                 collations. For NO PAD collations, identical to strnncoll.
-// strnxfrm()    - makes a sort key suitable for memcmp() corresponding
-//                 to the given string
-// like_range()  - creates a LIKE range, for optimizer
-// wildcmp()     - wildcard comparison, for LIKE
-// strcasecmp()  - 0-terminated string comparison
-// instr()       - finds the first substring appearance in the string
-// hash_sort()   - calculates hash value taking into account
-//                 the collation rules, e.g. case-insensitivity,
-//                 accent sensitivity, etc.
-#endif
+
 static const int HASH_BUFFER_LENGTH = 128;

 typedef uint64_t (*hash_algo)(const void* input, uint64_t length, uint64_t seed);
@ -359,21 +271,21 @@ typedef struct ObCollationHandler
  /* Collation routines */
  // 进行字符串比较的函数
  int     (*strnncoll)(const struct ObCharsetInfo *,
-               const uchar *, size_t, const uchar *, size_t, bool);
+               const unsigned char *, size_t, const unsigned char *, size_t, bool);
  // 字符串比较时忽略尾部空格
  int     (*strnncollsp)(const struct ObCharsetInfo *,
-                         const uchar *, size_t, const uchar *, size_t,
+                         const unsigned char *, size_t, const unsigned char *, size_t,
                         bool diff_if_only_endspace_difference);
  // makes a sort key suitable for memcmp() corresponding to the given string
  size_t  (*strnxfrm)(const struct ObCharsetInfo *,
-                      uchar *dst, size_t dstlen, uint nweights,
-                      const uchar *src, size_t srclen, uint flags, bool *is_valid_unicode);
+                      unsigned char *dst, size_t dstlen, unsigned int nweights,
+                      const unsigned char *src, size_t srclen, unsigned int flags, bool *is_valid_unicode);
  // 获取weight_string结果的长度
  size_t (*strnxfrmlen)(const struct ObCharsetInfo *, size_t);
  // makes a sortkey suitable for memcmp() corresponding to the given variable length string
  size_t  (*strnxfrm_varlen)(const struct ObCharsetInfo*,
-                             uchar* dst, size_t dst_len, uint nweights,
-                             const uchar *src, size_t srclen,
+                             unsigned char* dst, size_t dst_len, unsigned int nweights,
+                             const unsigned char *src, size_t srclen,
                             bool is_memcmp, bool *is_valid_unicode);
  //size_t    (*strnxfrmlen)(const struct ObCharsetInfo *, size_t);

@ -394,63 +306,54 @@ typedef struct ObCollationHandler
                     const char *);

  // finds the first substring appearance in the string
-  uint (*instr)(const struct ObCharsetInfo *,
+  unsigned int (*instr)(const struct ObCharsetInfo *,
                const char *b, size_t b_length,
                const char *s, size_t s_length,
-                ob_match_t *match, uint nmatch);
+                ob_match_t *match, unsigned int nmatch);

  /* Hash calculation */
  // calculates hash value taking into account the collation rules, e.g. case-insensitivity
-  void (*hash_sort)(const struct ObCharsetInfo *cs, const uchar *key, size_t len, ulong *nr1,
+  void (*hash_sort)(const struct ObCharsetInfo *cs, const unsigned char *key, size_t len, ulong *nr1,
                    ulong *nr2, const bool calc_end_space, hash_algo hash_algo);
-  bool (*propagate)(const struct ObCharsetInfo *cs, const uchar *str,
+  bool (*propagate)(const struct ObCharsetInfo *cs, const unsigned char *str,
                       size_t len);
 } ObCollationHandler;

 struct ObCharsetInfo
 {
-  uint      number;
-  uint      primary_number;
-  uint      binary_number;
-  uint      state;
+  unsigned int      number;
+  unsigned int      primary_number;
+  unsigned int      binary_number;
+  unsigned int      state;
  const char *csname;
  const char *name;
  const char *comment;
  const char *tailoring;
  struct Coll_param *coll_param;
-  uchar    *ctype;
-  uchar    *to_lower;
-  uchar    *to_upper;
-  uchar    *sort_order;
+  unsigned char    *ctype;
+  unsigned char    *to_lower;
+  unsigned char    *to_upper;
+  unsigned char    *sort_order;
  ObUCAInfo *uca;
  //uint16      *tab_to_uni;
  //MY_UNI_IDX  *tab_from_uni;
  ObUnicaseInfo *caseinfo;
-  uchar     *state_map;
-  uchar     *ident_map;
-  uint      strxfrm_multiply;
-  uchar     caseup_multiply;
-  uchar     casedn_multiply;
-  uint      mbminlen;
-  uint      mbmaxlen;
+  unsigned char     *state_map;
+  unsigned char     *ident_map;
+  unsigned int      strxfrm_multiply;
+  unsigned char     caseup_multiply;
+  unsigned char     casedn_multiply;
+  unsigned int      mbminlen;
+  unsigned int      mbmaxlen;
  ob_wc_t   min_sort_char;
  ob_wc_t   max_sort_char; /* For LIKE optimization */
-  uchar     pad_char;
+  unsigned char     pad_char;
  bool   escape_with_backslash_is_dangerous;
-  uchar     levels_for_compare;
-  uchar     levels_for_order;
+  unsigned char     levels_for_compare;
+  unsigned char     levels_for_order;

  ObCharsetHandler *cset;
  ObCollationHandler *coll;
-#ifdef OB_BUILD_FULL_CHARSET
-  /**
-    If this collation is PAD_SPACE, it collates as if all inputs were
-    padded with a given number of spaces at the end (see the "num_codepoints"
-    flag to strnxfrm). NO_PAD simply compares unextended strings.
-
-    Note that this is fundamentally about the behavior of coll->strnxfrm.
-  */
-#endif
  enum ObCharsetPadAttr pad_attribute;
 };

@ -459,17 +362,17 @@ struct ObCharsetInfo
 #define	ob_toascii(c)	((c) & 0177)
 #define ob_tocntrl(c)	((c) & 31)
 #define ob_toprint(c)	((c) | 64)
-#define	ob_isalpha(s, c)  ((s)->ctype != NULL ? ((s)->ctype+1)[(uchar) (c)] & (_MY_U | _MY_L) : 0)
-#define	ob_isupper(s, c)  ((s)->ctype != NULL ? ((s)->ctype+1)[(uchar) (c)] & _MY_U : 0)
-#define	ob_islower(s, c)  ((s)->ctype != NULL ? ((s)->ctype+1)[(uchar) (c)] & _MY_L : 0)
-#define	ob_isdigit(s, c)  ((s)->ctype != NULL ? ((s)->ctype+1)[(uchar) (c)] & _MY_NMR : 0)
-#define	ob_isxdigit(s, c) ((s)->ctype != NULL ? ((s)->ctype+1)[(uchar) (c)] & _MY_X : 0)
-#define	ob_isalnum(s, c)  ((s)->ctype != NULL ? ((s)->ctype+1)[(uchar) (c)] & (_MY_U | _MY_L | _MY_NMR) : 0)
-#define	ob_isspace(s, c)  ((s)->ctype != NULL ? ((s)->ctype+1)[(uchar) (c)] & _MY_SPC : 0)
-#define	ob_ispunct(s, c)  ((s)->ctype != NULL ? ((s)->ctype+1)[(uchar) (c)] & _MY_PNT : 0)
-#define	ob_isprint(s, c)  ((s)->ctype != NULL ? ((s)->ctype+1)[(uchar) (c)] & (_MY_PNT | _MY_U | _MY_L | _MY_NMR | _MY_B) : 0)
-#define	ob_isgraph(s, c)  ((s)->ctype != NULL ? ((s)->ctype+1)[(uchar) (c)] & (_MY_PNT | _MY_U | _MY_L | _MY_NMR) : 0)
-#define	ob_iscntrl(s, c)  ((s)->ctype != NULL ? ((s)->ctype+1)[(uchar) (c)] & _MY_CTR : 0)
+#define	ob_isalpha(s, c)  ((s)->ctype != NULL ? ((s)->ctype+1)[(unsigned char) (c)] & (_MY_U | _MY_L) : 0)
+#define	ob_isupper(s, c)  ((s)->ctype != NULL ? ((s)->ctype+1)[(unsigned char) (c)] & _MY_U : 0)
+#define	ob_islower(s, c)  ((s)->ctype != NULL ? ((s)->ctype+1)[(unsigned char) (c)] & _MY_L : 0)
+#define	ob_isdigit(s, c)  ((s)->ctype != NULL ? ((s)->ctype+1)[(unsigned char) (c)] & _MY_NMR : 0)
+#define	ob_isxdigit(s, c) ((s)->ctype != NULL ? ((s)->ctype+1)[(unsigned char) (c)] & _MY_X : 0)
+#define	ob_isalnum(s, c)  ((s)->ctype != NULL ? ((s)->ctype+1)[(unsigned char) (c)] & (_MY_U | _MY_L | _MY_NMR) : 0)
+#define	ob_isspace(s, c)  ((s)->ctype != NULL ? ((s)->ctype+1)[(unsigned char) (c)] & _MY_SPC : 0)
+#define	ob_ispunct(s, c)  ((s)->ctype != NULL ? ((s)->ctype+1)[(unsigned char) (c)] & _MY_PNT : 0)
+#define	ob_isprint(s, c)  ((s)->ctype != NULL ? ((s)->ctype+1)[(unsigned char) (c)] & (_MY_PNT | _MY_U | _MY_L | _MY_NMR | _MY_B) : 0)
+#define	ob_isgraph(s, c)  ((s)->ctype != NULL ? ((s)->ctype+1)[(unsigned char) (c)] & (_MY_PNT | _MY_U | _MY_L | _MY_NMR) : 0)
+#define	ob_iscntrl(s, c)  ((s)->ctype != NULL ? ((s)->ctype+1)[(unsigned char) (c)] & _MY_CTR : 0)

 /* Some macros that should be cleaned up a little */
 #define ob_isvar(s,c)                 (my_isalnum(s,c) || (c) == '_')
@ -488,13 +391,13 @@ struct ObCharsetInfo


 #define use_mb(s)                     ((s)->cset->ismbchar != NULL)
-static inline uint ob_ismbchar(const ObCharsetInfo *cs, const char *str,
+static inline unsigned int ob_ismbchar(const ObCharsetInfo *cs, const char *str,
                               const char *strend) {
  return cs->cset->ismbchar(cs, str, strend);
 }

-static inline uint ob_ismbchar(const ObCharsetInfo *cs, const uchar *str,
-                               const uchar *strend) {
+static inline unsigned int ob_ismbchar(const ObCharsetInfo *cs, const unsigned char *str,
+                               const unsigned char *strend) {
  return cs->cset->ismbchar(cs, (const char *)(str), (const char *)(strend));
 }
 #define ob_mbcharlen(s, a)            ((s)->cset->mbcharlen((s),(a)))
@ -503,8 +406,8 @@ static inline uint ob_ismbchar(const ObCharsetInfo *cs, const uchar *str,

 typedef struct ob_uni_ctype
 {
-  uchar  pctype;
-  uchar  *ctype;
+  unsigned char  pctype;
+  unsigned char  *ctype;
 } ObUniCtype;

 extern ObUniCtype ob_uni_ctype[256];
@ -534,7 +437,6 @@ extern ObCharsetInfo ob_charset_gb18030_2022_radical_cs;
 extern ObCharsetInfo ob_charset_gb18030_2022_stroke_ci;
 extern ObCharsetInfo ob_charset_gb18030_2022_stroke_cs;
 extern ObCharsetInfo ob_charset_gb18030_2022_bin;
-#ifdef OB_BUILD_FULL_CHARSET
 extern ObCharsetInfo ob_charset_utf8mb4_unicode_ci;
 extern ObCharsetInfo ob_charset_utf16_unicode_ci;
 extern ObCharsetInfo ob_charset_utf8mb4_zh_0900_as_cs;
@ -543,7 +445,6 @@ extern ObCharsetInfo ob_charset_utf8mb4_zh3_0900_as_cs;
 extern ObCharsetInfo ob_charset_utf8mb4_0900_bin;
 extern ObCharsetInfo ob_charset_latin1;
 extern ObCharsetInfo ob_charset_latin1_bin;
-#endif

 extern ObCollationHandler ob_collation_mb_bin_handler;
 extern ObCharsetHandler ob_charset_utf8mb4_handler;
@ -590,17 +491,17 @@ bool  ob_like_range_simple(const ObCharsetInfo *cs,
 			      char *min_str, char *max_str,
 			      size_t *min_length, size_t *max_length);

-bool ob_propagate_simple(const ObCharsetInfo *cs, const uchar *str,
+bool ob_propagate_simple(const ObCharsetInfo *cs, const unsigned char *str,
                            size_t len);
-bool ob_propagate_complex(const ObCharsetInfo *cs, const uchar *str,
+bool ob_propagate_complex(const ObCharsetInfo *cs, const unsigned char *str,
                             size_t len);

-void ob_strxfrm_desc_and_reverse(uchar *str, uchar *strend,
-                                 uint flags, uint level);
+void ob_strxfrm_desc_and_reverse(unsigned char *str, unsigned char *strend,
+                                 unsigned int flags, unsigned int level);

 size_t ob_strxfrm_pad_desc_and_reverse(const ObCharsetInfo *cs,
-                                       uchar *str, uchar *frmend, uchar *strend,
-                                       uint nweights, uint flags, uint level);
+                                       unsigned char *str, unsigned char *frmend, unsigned char *strend,
+                                       unsigned int nweights, unsigned int flags, unsigned int level);
 extern "C" int64_t ob_strntoll(const char *ptr, size_t len, int base, char **end, int *err);
 extern "C" int64_t ob_strntoull(const char *ptr, size_t len, int base, char **end, int *err);

@ -621,17 +522,17 @@ int ob_wildcmp_mb_impl(const ObCharsetInfo *cs,
                       const char *wildstr,const char *wildend,
                       int escape, int w_one, int w_many, int recurse_level);

-uint ob_instr_mb(const ObCharsetInfo *cs,
+unsigned int ob_instr_mb(const ObCharsetInfo *cs,
                 const char *b, size_t b_length,
                 const char *s, size_t s_length,
-                 ob_match_t *match, uint nmatch);
+                 ob_match_t *match, unsigned int nmatch);

 void ob_hash_sort_simple(const ObCharsetInfo *cs,
-				const uchar *key, size_t len,
+				const unsigned char *key, size_t len,
                ulong *nr1, ulong *nr2,
        const bool calc_end_space, hash_algo hash_algo);

-const uchar *skip_trailing_space(const uchar *ptr,size_t len, bool is_utf16);
+const unsigned char *skip_trailing_space(const unsigned char *ptr,size_t len, bool is_utf16);

 size_t ob_numchars_mb(const ObCharsetInfo *cs __attribute__((unused)), const char *pos, const char *end);

@ -640,7 +541,7 @@ size_t ob_charpos_mb(const ObCharsetInfo *cs __attribute__((unused)), const char
 size_t ob_max_bytes_charpos_mb(const ObCharsetInfo *cs __attribute__((unused)), const char *pos, const char *end, size_t max_bytes, size_t *char_len);

 int ob_mb_ctype_mb(const ObCharsetInfo *cs __attribute__((unused)), int *ctype,
-                   const uchar *s, const uchar *e);
+                   const unsigned char *s, const unsigned char *e);

 size_t ob_caseup_mb(const ObCharsetInfo *, char *src, size_t srclen,
                                         char *dst, size_t dstlen);
@ -661,18 +562,18 @@ size_t ob_lengthsp_8bit(const ObCharsetInfo *cs __attribute__((unused)),
                        const char *ptr, size_t length);

 int ob_strnncoll_mb_bin(const ObCharsetInfo *cs __attribute__((unused)),
-                    const uchar *s, size_t slen,
-                    const uchar *t, size_t tlen,
+                    const unsigned char *s, size_t slen,
+                    const unsigned char *t, size_t tlen,
                        bool t_is_prefix);

 int ob_strnncollsp_mb_bin(const ObCharsetInfo *cs __attribute__((unused)),
-                      const uchar *a, size_t a_length,
-                      const uchar *b, size_t b_length,
+                      const unsigned char *a, size_t a_length,
+                      const unsigned char *b, size_t b_length,
                          bool diff_if_only_endspace_difference);

 size_t ob_strnxfrm_mb(const ObCharsetInfo *,
-                      uchar *dst, size_t dstlen, uint nweights,
-                      const uchar *src, size_t srclen, uint flags, bool *is_valid_unicode);
+                      unsigned char *dst, size_t dstlen, unsigned int nweights,
+                      const unsigned char *src, size_t srclen, unsigned int flags, bool *is_valid_unicode);

 int ob_wildcmp_mb_bin(const ObCharsetInfo *cs,
                  const char *str,const char *str_end,
@ -680,22 +581,22 @@ int ob_wildcmp_mb_bin(const ObCharsetInfo *cs,
                      int escape, int w_one, int w_many);

 void ob_hash_sort_mb_bin(const ObCharsetInfo *cs __attribute__((unused)),
-                         const uchar *key, size_t len, ulong *nr1, ulong *nr2,
+                         const unsigned char *key, size_t len, ulong *nr1, ulong *nr2,
                         const bool calc_end_space, hash_algo hash_algo);

 uint32 ob_convert(char *to, uint32 to_length, const ObCharsetInfo *to_cs,
                  const char *from, uint32 from_length,
                  const ObCharsetInfo *from_cs,
                  bool trim_incomplete_tail,
-                  const ob_wc_t replaced_char, uint *errors);
+                  const ob_wc_t replaced_char, unsigned int *errors);

 size_t ob_strnxfrm_unicode_full_bin(const ObCharsetInfo *cs,
-                             uchar *dst, size_t dstlen, uint nweights,
-                             const uchar *src, size_t srclen, uint flags, bool *is_valid_unicode);
+                             unsigned char *dst, size_t dstlen, unsigned int nweights,
+                             const unsigned char *src, size_t srclen, unsigned int flags, bool *is_valid_unicode);

 size_t ob_strnxfrm_unicode_full_bin_varlen(const struct ObCharsetInfo* cs,
-                             uchar* dst, size_t dst_len, uint nweights,
-                             const uchar *src, size_t srclen,
+                             unsigned char* dst, size_t dst_len, unsigned int nweights,
+                             const unsigned char *src, size_t srclen,
                             bool is_memcmp, bool *is_valid_unicode);

 bool ob_like_range_generic(const ObCharsetInfo *cs, const char *ptr,
@ -705,12 +606,12 @@ bool ob_like_range_generic(const ObCharsetInfo *cs, const char *ptr,
                              size_t *max_length);

 size_t ob_strnxfrm_unicode(const ObCharsetInfo *cs,
-                    uchar *dst, size_t dstlen, uint nweights,
-                    const uchar *src, size_t srclen, uint flags, bool *is_valid_unicode);
+                    unsigned char *dst, size_t dstlen, unsigned int nweights,
+                    const unsigned char *src, size_t srclen, unsigned int flags, bool *is_valid_unicode);

 size_t ob_strnxfrm_unicode_varlen(const struct ObCharsetInfo* cs,
-                             uchar* dst, size_t dst_len, uint nweights,
-                             const uchar *src, size_t srclen,
+                             unsigned char* dst, size_t dst_len, unsigned int nweights,
+                             const unsigned char *src, size_t srclen,
                             bool is_memcmp, bool *is_valid_unicode);

 int ob_wildcmp_unicode(const ObCharsetInfo *cs,
@ -719,8 +620,8 @@ int ob_wildcmp_unicode(const ObCharsetInfo *cs,
                   int escape, int w_one, int w_many,
                   ObUnicaseInfo *weights);

-size_t ob_strxfrm_pad(const ObCharsetInfo *cs, uchar *str, uchar *frmend,
-                      uchar *strend, uint nweights, uint flags);
+size_t ob_strxfrm_pad(const ObCharsetInfo *cs, unsigned char *str, unsigned char *frmend,
+                      unsigned char *strend, unsigned int nweights, unsigned int flags);

 size_t ob_strnxfrmlen_simple(const struct ObCharsetInfo *, size_t);

@ -728,8 +629,8 @@ size_t ob_strnxfrmlen_unicode_full_bin(const struct ObCharsetInfo *, size_t);

 size_t ob_strnxfrmlen_utf8mb4(const struct ObCharsetInfo *, size_t);

-uint ob_mbcharlen_8bit(const ObCharsetInfo *cs __attribute__((unused)),
-                      uint c __attribute__((unused)));
+unsigned int ob_mbcharlen_8bit(const ObCharsetInfo *cs __attribute__((unused)),
+                      unsigned int c __attribute__((unused)));

 size_t ob_numchars_8bit(const ObCharsetInfo *cs __attribute__((unused)),
 		      const char *b, const char *e);
@ -750,7 +651,7 @@ size_t ob_lengthsp_binary(const ObCharsetInfo *cs __attribute__((unused)),
                          size_t length);

 int ob_mb_ctype_8bit(const ObCharsetInfo *cs, int *ctype,
-                   const uchar *s, const uchar *e);
+                   const unsigned char *s, const unsigned char *e);

 size_t ob_well_formed_len_8bit(const ObCharsetInfo *cs __attribute__((unused)),
                               const char *start, const char *end,
--- a/deps/oblib/src/lib/charset/ob_ctype_bin_os.cc
+++ b/deps/oblib/src/lib/charset/ob_ctype_bin_os.cc
@ -18,60 +18,18 @@
 *      - initial release
 *
 */
-#ifndef OB_BUILD_FULL_CHARSET

 #include "lib/charset/ob_ctype.h"
-
-static unsigned char ctype_bin[]=
-{
-  0,
-  32, 32, 32, 32, 32, 32, 32, 32, 32, 40, 40, 40, 40, 40, 32, 32,
-  32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
-  72, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-  132,132,132,132,132,132,132,132,132,132, 16, 16, 16, 16, 16, 16,
-  16,129,129,129,129,129,129,  1,  1,  1,  1,  1,  1,  1,  1,  1,
-  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1, 16, 16, 16, 16, 16,
-  16,130,130,130,130,130,130,  2,  2,  2,  2,  2,  2,  2,  2,  2,
-  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2, 16, 16, 16, 16, 32,
-  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
-  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
-  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
-  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
-  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
-  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
-  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
-  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
-};
-
-static unsigned char bin_char_array[] =
-{
-    0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15,
-   16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
-   32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47,
-   48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63,
-   64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79,
-   80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95,
-   96, 97, 98, 99,100,101,102,103,104,105,106,107,108,109,110,111,
-  112,113,114,115,116,117,118,119,120,121,122,123,124,125,126,127,
-  128,129,130,131,132,133,134,135,136,137,138,139,140,141,142,143,
-  144,145,146,147,148,149,150,151,152,153,154,155,156,157,158,159,
-  160,161,162,163,164,165,166,167,168,169,170,171,172,173,174,175,
-  176,177,178,179,180,181,182,183,184,185,186,187,188,189,190,191,
-  192,193,194,195,196,197,198,199,200,201,202,203,204,205,206,207,
-  208,209,210,211,212,213,214,215,216,217,218,219,220,221,222,223,
-  224,225,226,227,228,229,230,231,232,233,234,235,236,237,238,239,
-  240,241,242,243,244,245,246,247,248,249,250,251,252,253,254,255
-};
-
+#include "lib/charset/ob_ctype_bin_tab.h"

 unsigned int ob_mbcharlen_8bit(const ObCharsetInfo *cs __attribute__((unused)),
-                      unsigned int c __attribute__((unused)))
+                               unsigned int c __attribute__((unused)))
 {
  return 1;
 }

 size_t ob_numchars_8bit(const ObCharsetInfo *cs __attribute__((unused)),
-		      const char *begin, const char *end)
+                        const char *begin, const char *end)
 {
  return (size_t) (end - begin);
 }
@ -111,9 +69,9 @@ size_t ob_lengthsp_binary(const ObCharsetInfo *cs __attribute__((unused)),
 }

 static int ob_mb_wc_bin(const ObCharsetInfo *cs __attribute__((unused)),
-			ob_wc_t *wc,
-			const unsigned char *str,
-			const unsigned char *end __attribute__((unused)))
+                        ob_wc_t *wc,
+                        const unsigned char *str,
+                        const unsigned char *end __attribute__((unused)))
 {
  if (str >= end) {
    return OB_CS_TOOSMALL;
@ -125,9 +83,9 @@ static int ob_mb_wc_bin(const ObCharsetInfo *cs __attribute__((unused)),


 static int ob_wc_mb_bin(const ObCharsetInfo *cs __attribute__((unused)),
-			ob_wc_t wc,
-			unsigned char *str,
-			unsigned char *end __attribute__((unused)))
+                        ob_wc_t wc,
+                        unsigned char *str,
+                        unsigned char *end __attribute__((unused)))
 {
  if (str >= end) {
    return OB_CS_TOOSMALL;
@ -158,7 +116,7 @@ static size_t ob_case_bin(const ObCharsetInfo *cs __attribute__((unused)),
  return srclen;
 }

- 
+
 static int ob_strnncoll_8bit_bin(const ObCharsetInfo *cs __attribute__((unused)),
                               const uchar *s, size_t slen,
                               const uchar *t, size_t tlen,
@ -247,19 +205,18 @@ int ob_wildcmp_bin_impl(const ObCharsetInfo *cs,
                        const char *wild_str,const char *wild_end,
                        int escape_char, int w_one, int w_many, int recurse_level)
 {
-  int result= -1;			 
-
+  int result= -1;
  while (wild_str != wild_end) {
    while ((*wild_str == escape_char) ||  (*wild_str != w_many && *wild_str != w_one)) {
      if (*wild_str == escape_char && wild_str+1 != wild_end) {
-	      wild_str++;
+        wild_str++;
      }
      if (str == str_end || likeconv(cs,*wild_str++) != likeconv(cs,*str++)) {
        return(1);			 
      } else if (wild_str == wild_end) {
-	      return(str != str_end);		 
+        return(str != str_end);
      } else {
-        result=1;				 
+        result=1;
      }
    }
    if (*wild_str == w_one) {
@ -272,7 +229,7 @@ int ob_wildcmp_bin_impl(const ObCharsetInfo *cs,
      } while (++wild_str < wild_end && *wild_str == w_one);
      if (wild_str == wild_end) break;
    }
-    if (*wild_str == w_many) {					 
+    if (*wild_str == w_many) {
      unsigned char cmp;
      wild_str++;  
      for (; wild_str != wild_end ; wild_str++) {
@ -286,14 +243,14 @@ int ob_wildcmp_bin_impl(const ObCharsetInfo *cs,
            continue;
          }
        }
-        break;				 
+        break;
      }
      if (wild_str == wild_end) {
-	      return(0);			 
+        return(0);
      } else if (str == str_end) {
-	      return(-1);
+        return(-1);
      } else if ((cmp= *wild_str) == escape_char && wild_str+1 != wild_end) {
-	      cmp= *++wild_str;
+        cmp= *++wild_str;
      }

      INC_PTR(cs,wild_str,wild_end);	 
@ -303,12 +260,12 @@ int ob_wildcmp_bin_impl(const ObCharsetInfo *cs,
          str++;
        }
        if (str++ == str_end) {
-	        return(-1);
+          return(-1);
        }
        {
          int tmp=ob_wildcmp_bin_impl(cs,str,str_end,
-                                            wild_str,wild_end,escape_char,
-                                            w_one, w_many, recurse_level + 1);
+                                      wild_str,wild_end,escape_char,
+                                      w_one, w_many, recurse_level + 1);
          if (tmp <= 0) {
            return(tmp);
          } else if (str == str_end) {
@ -336,9 +293,9 @@ int ob_wildcmp_bin(const ObCharsetInfo *cs,

 static
 unsigned int ob_instr_bin(const ObCharsetInfo *cs __attribute__((unused)),
-		  const char *begin, size_t b_length,
-		  const char *s, size_t s_length,
-		  ob_match_t *match, unsigned int nmatch)
+                          const char *begin, size_t b_length,
+                          const char *s, size_t s_length,
+                          ob_match_t *match, unsigned int nmatch)
 {
  const unsigned char *str, *search, *end, *search_end;

@ -349,7 +306,7 @@ unsigned int ob_instr_bin(const ObCharsetInfo *cs __attribute__((unused)),
        match->end= 0;
        match->mb_len= 0;
      }
-      return 1;		 
+      return 1;
    }

    str= (const unsigned char*) begin;
@ -369,7 +326,6 @@ loop:
            goto loop;
          }
        }
-
        if (nmatch > 0) {
          match[0].beg= 0;
          match[0].end= (size_t) (str- (const unsigned char*)begin-1);
@ -381,7 +337,7 @@ loop:
            match[1].mb_len= match[1].end-match[1].beg;
          }
        }
-	      return 2;
+        return 2;
      }
    }
  }
@ -409,9 +365,10 @@ void ob_hash_sort_8bit_bin(const ObCharsetInfo *cs __attribute__((unused)),
  }
 }
 void ob_hash_sort_bin(const ObCharsetInfo *cs __attribute__((unused)),
-		      const unsigned char *key, size_t len, unsigned long int *nr1, unsigned long int *nr2, 
-          const bool calc_end_space,
-          hash_algo hash_algo)
+                      const unsigned char *key, size_t len,
+                      unsigned long int *nr1, unsigned long int *nr2,
+                      const bool calc_end_space,
+                      hash_algo hash_algo)
 {
  const unsigned char *pos = key;
  key+= len;
@ -431,8 +388,8 @@ void ob_hash_sort_bin(const ObCharsetInfo *cs __attribute__((unused)),

 static ObCharsetHandler ob_charset_handler=
 {
-  NULL,			 
-  ob_mbcharlen_8bit,	 
+  NULL,
+  ob_mbcharlen_8bit,
  ob_numchars_8bit,
  ob_charpos_8bit,
  ob_max_bytes_charpos_8bit,
@ -490,39 +447,37 @@ ObCollationHandler ob_collation_binary_handler =

 ObCharsetInfo ob_charset_bin =
 {
-    63,0,0,			 
-    OB_CS_COMPILED|OB_CS_BINSORT|OB_CS_PRIMARY, 
-    "binary",			 
-    "binary",			 
-    "",				 
-    NULL,			 
-    NULL,	
-    ctype_bin,			 
-    bin_char_array,		 
-    bin_char_array,		 
-    NULL,			 
-    NULL,			 
-    &ob_unicase_default,         
-    NULL,			 
-    NULL,			 
-    1,				 
-    1,                           
-    1,                           
-    1,				 
-    1,				 
-    0,				 
-    255,			 
-    0,                           
-    0,                           
-    1,                           
-    1,                           
-    &ob_charset_handler,
-    &ob_collation_binary_handler,
-    PAD_SPACE
+  63,0,0,
+  OB_CS_COMPILED|OB_CS_BINSORT|OB_CS_PRIMARY,
+  "binary",
+  "binary",
+  "",
+  NULL,
+  NULL,
+  ctype_bin,
+  bin_char_array,
+  bin_char_array,
+  NULL,
+  NULL,
+  &ob_unicase_default,
+  NULL,
+  NULL,
+  1,
+  1,
+  1,
+  1,
+  1,
+  0,
+  255,
+  0,
+  0,
+  1,
+  1,
+  &ob_charset_handler,
+  &ob_collation_binary_handler,
+  PAD_SPACE
 };


 #undef likeconv
 #undef INC_PTR
-
-#endif
--- a/deps/oblib/src/lib/charset/ob_ctype_bin_tab.h
+++ b/deps/oblib/src/lib/charset/ob_ctype_bin_tab.h
@ -0,0 +1,51 @@
+/**
+ * Copyright (c) 2021 OceanBase
+ * OceanBase CE is licensed under Mulan PubL v2.
+ * You can use this software according to the terms and conditions of the Mulan PubL v2.
+ * You may obtain a copy of Mulan PubL v2 at:
+ *          http://license.coscl.org.cn/MulanPubL-2.0
+ * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND,
+ * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
+ * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
+ * See the Mulan PubL v2 for more details.
+ */
+static unsigned char ctype_bin[]=
+{
+  0,
+  32, 32, 32, 32, 32, 32, 32, 32, 32, 40, 40, 40, 40, 40, 32, 32,
+  32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
+  72, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  132,132,132,132,132,132,132,132,132,132, 16, 16, 16, 16, 16, 16,
+  16,129,129,129,129,129,129,  1,  1,  1,  1,  1,  1,  1,  1,  1,
+  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1, 16, 16, 16, 16, 16,
+  16,130,130,130,130,130,130,  2,  2,  2,  2,  2,  2,  2,  2,  2,
+  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2, 16, 16, 16, 16, 32,
+  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
+  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
+  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
+  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
+  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
+  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
+  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
+  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
+};
+
+static unsigned char bin_char_array[] =
+{
+    0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15,
+   16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
+   32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47,
+   48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63,
+   64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79,
+   80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95,
+   96, 97, 98, 99,100,101,102,103,104,105,106,107,108,109,110,111,
+  112,113,114,115,116,117,118,119,120,121,122,123,124,125,126,127,
+  128,129,130,131,132,133,134,135,136,137,138,139,140,141,142,143,
+  144,145,146,147,148,149,150,151,152,153,154,155,156,157,158,159,
+  160,161,162,163,164,165,166,167,168,169,170,171,172,173,174,175,
+  176,177,178,179,180,181,182,183,184,185,186,187,188,189,190,191,
+  192,193,194,195,196,197,198,199,200,201,202,203,204,205,206,207,
+  208,209,210,211,212,213,214,215,216,217,218,219,220,221,222,223,
+  224,225,226,227,228,229,230,231,232,233,234,235,236,237,238,239,
+  240,241,242,243,244,245,246,247,248,249,250,251,252,253,254,255
+};
--- a/deps/oblib/src/lib/charset/ob_ctype_gb18030.cc
+++ b/deps/oblib/src/lib/charset/ob_ctype_gb18030.cc
--- a/deps/oblib/src/lib/charset/ob_ctype_gbk.cc
+++ b/deps/oblib/src/lib/charset/ob_ctype_gbk.cc
@ -0,0 +1,461 @@
+
+/**
+ * Copyright (c) 2021 OceanBase
+ * OceanBase CE is licensed under Mulan PubL v2.
+ * You can use this software according to the terms and conditions of the Mulan PubL v2.
+ * You may obtain a copy of Mulan PubL v2 at:
+ *          http://license.coscl.org.cn/MulanPubL-2.0
+ * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND,
+ * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
+ * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
+ * See the Mulan PubL v2 for more details.
+ */
+
+#include "lib/charset/ob_mysql_global.h"
+#include "lib/charset/ob_ctype.h"
+#include "lib/charset/ob_ctype_gbk_tab.h"
+
+#define isgbkhead(c) (0x81<=(unsigned char)(c) && (unsigned char)(c)<=0xfe)
+#define isgbktail(c) ((0x40<=(unsigned char)(c) && (unsigned char)(c)<=0x7e) || \
+                          (0x80<=(unsigned char)(c) && (unsigned char)(c)<=0xfe))
+
+#define isgbkcode(c,d) (isgbkhead(c) && isgbktail(d))
+#define gbkcode(c,d)   ((((unsigned int) (unsigned char) (c)) <<8) | (unsigned char)(d))
+#define gbkhead(e)     ((unsigned char)(e>>8))
+#define gbktail(e)     ((unsigned char)(e&0xff))
+
+static uint16 gbksortorder(uint16 i)
+{
+  uint idx=gbktail(i);
+  if (idx>0x7f) idx-=0x41;
+  else idx-=0x40;
+  idx+=(gbkhead(i)-0x81)*0xbe;
+  return 0x8100+gbk_order[idx];
+}
+
+
+int ob_strnncoll_gbk_internal(const unsigned char **a_res, const unsigned char **b_res,
+			      size_t length)
+{
+  const unsigned char *a= *a_res, *b= *b_res;
+  unsigned int a_char,b_char;
+
+  while (length--)
+  {
+    if ((length > 0) && isgbkcode(*a,*(a+1)) && isgbkcode(*b, *(b+1)))
+    {
+      a_char= gbkcode(*a,*(a+1));
+      b_char= gbkcode(*b,*(b+1));
+      if (a_char != b_char)
+        return ((int) gbksortorder((uint16_t) a_char) -
+		(int) gbksortorder((uint16_t) b_char));
+      a+= 2;
+      b+= 2;
+      length--;
+    }
+    else if (sort_order_gbk[*a++] != sort_order_gbk[*b++])
+      return ((int) sort_order_gbk[a[-1]] -
+	      (int) sort_order_gbk[b[-1]]);
+  }
+  *a_res= a;
+  *b_res= b;
+  return 0;
+}
+
+
+
+int ob_strnncoll_gbk(const ObCharsetInfo *cs __attribute__((unused)),
+		     const unsigned char *a, size_t a_length,
+                     const unsigned char *b, size_t b_length,
+                     bool b_is_prefix)
+{
+  size_t length = OB_MIN(a_length, b_length);
+  int res= ob_strnncoll_gbk_internal(&a, &b, length);
+  return res ? res : (int) ((b_is_prefix ? length : a_length) - b_length);
+}
+
+
+static int ob_strnncollsp_gbk(const ObCharsetInfo * cs __attribute__((unused)),
+			      const unsigned char *a, size_t a_length,
+			      const unsigned char *b, size_t b_length,
+                              bool diff_if_only_endspace_difference)
+{
+  size_t length = OB_MIN(a_length, b_length);
+  int res = ob_strnncoll_gbk_internal(&a, &b, length);
+
+  if (!res && a_length != b_length) {
+    const unsigned char *end;
+    int swap= 1;
+    if (diff_if_only_endspace_difference) {
+      return a_length < b_length ? -1 : 1;
+    } else if (a_length < b_length) {
+      a_length = b_length;
+      a = b;
+      swap= -1;
+      res= -res;
+    }
+    for (end= a + a_length-length; a < end ; a++) {
+      if (*a != ' ') {
+        return (*a < ' ') ? -swap : swap;
+      }
+    }
+  }
+  return res;
+}
+
+
+static size_t
+ob_strnxfrm_gbk(const ObCharsetInfo *cs,
+                unsigned char *dst, size_t dstlen, unsigned int nweights,
+                const unsigned char *src, size_t srclen, unsigned int flags, bool *is_valid_unicode)
+{
+  unsigned char *d0= dst;
+  unsigned char *de= dst + dstlen;
+  const unsigned char *se= src + srclen;
+  const unsigned char *sort_order= cs->sort_order;
+  *is_valid_unicode = 1;
+
+  for (; dst < de && src < se && nweights; nweights--) {
+    if (cs->cset->ismbchar(cs, (const char*) src, (const char*) se)) {
+      uint16_t e= gbksortorder((uint16_t) gbkcode(*src, *(src + 1)));
+      *dst++= gbkhead(e);
+      if (dst < de) {
+        *dst++= gbktail(e);
+      }
+      src+= 2;
+    } else {
+      *is_valid_unicode = is_valid_ascii(*src);
+      *dst++= sort_order ? sort_order[*src++] : *src++;
+    }
+  }
+  return ob_strxfrm_pad_desc_and_reverse(cs, d0, dst, de, nweights, flags, 0);
+}
+
+
+size_t ob_varlen_encoding_gbk_for_memcmp(const struct ObCharsetInfo* cs,
+                              unsigned char* dst, size_t dst_len, unsigned int nweights,
+                              const unsigned char *src, size_t src_len,
+                              bool *is_valid_unicode)
+{
+  unsigned char *d0= dst;
+  unsigned char *de= dst + dst_len;
+  const unsigned char *se= src + src_len;
+  const unsigned char *sort_order= cs->sort_order;
+  *is_valid_unicode = 1;
+
+  for (; *is_valid_unicode && dst < de && src < se && nweights; nweights--)
+  {
+    if (isgbkhead(*(src)) && (se)-(src)>1 && isgbktail(*((src)+1)))
+    {
+      /*
+        Note, it is safe not to check (src < se)
+        in the code below, because ismbchar() would
+        not return TRUE if src was too short
+      */
+      uint16_t e= gbksortorder((uint16_t) gbkcode(*src, *(src + 1)));
+      *dst++= gbkhead(e);
+      if (dst < de)
+        *dst++= gbktail(e);
+      src+= 2;
+      if (e == 0) {
+        *dst++ = 0x00;
+        *dst++ = 0x01;
+      }
+    } else {
+      *is_valid_unicode = is_valid_ascii(*src);
+      uint16_t e = sort_order ? sort_order[*src++] : *src++;
+      *dst++ = gbkhead(e);
+      *dst++ = gbktail(e);
+      if (e == 0) {
+        *dst++ = 0x00;
+        *dst++ = 0x01;
+      }
+    }
+  }
+  *dst++ = 0x00;
+  *dst++ = 0x00;
+  *dst++ = 0x00;
+  *dst++ = 0x00;
+  return dst - d0;
+}
+
+size_t ob_varlen_encoding_gbk_for_spacecmp(const struct ObCharsetInfo* cs,
+                              unsigned char* dst, size_t dst_len, unsigned int nweights,
+                              const unsigned char *src, size_t src_len,
+                              bool *is_valid_unicode)
+{
+  unsigned char *d0= dst;
+  unsigned char *de= dst + dst_len;
+  const unsigned char *se= src + src_len;
+  const unsigned char *sort_order= cs->sort_order;
+  *is_valid_unicode = 1;
+
+  // trim
+  while (*(se-1) == 0x20 && se>src) se--;
+  for (;*is_valid_unicode && dst < de && src < se && nweights; nweights--)
+  {
+    int16_t space_cnt = 0;
+    uint16_t e = 0;
+    while (*src == 0x20)
+    {
+      space_cnt++;
+      src++;
+    }
+    if (isgbkhead(*(src)) && (se)-(src)>1 && isgbktail(*((src)+1)))
+    {
+      /*
+        Note, it is safe not to check (src < se)
+        in the code below, because ismbchar() would
+        not return TRUE if src was too short
+      */
+      e = gbksortorder((uint16) gbkcode(*src, *(src + 1)));
+      src+= 2;
+    } else {
+      *is_valid_unicode = is_valid_ascii(*src);
+      e = sort_order ? sort_order[*src++] : *src++;
+    }
+    if (space_cnt != 0) {
+      *dst++ = 0x00;
+      *dst++ = 0x20;
+      if (e > 0x20) {
+        *dst++ = 0x00;
+        *dst++ = 0x21;
+        space_cnt = -space_cnt;
+      } else {
+        *dst++ = 0x00;
+        *dst++ = 0x19;
+      }
+      *dst++ = ((unsigned char)(space_cnt >> 8));
+      *dst++ = ((unsigned char)(space_cnt & 0xff));
+    }
+    *dst++ = gbkhead(e);
+    *dst++ = gbktail(e);
+  }
+  *dst++ = 0x00;
+  *dst++ = 0x20;
+  *dst++ = 0x00;
+  *dst++ = 0x20;
+
+  return dst - d0;
+}
+size_t ob_strnxfrm_gbk_varlen(const struct ObCharsetInfo* cs,
+                             unsigned char* dst, size_t dst_len, unsigned int nweights,
+                             const unsigned char *src, size_t srclen,
+                             bool is_memcmp, bool *is_valid_unicode)
+{
+  if (is_memcmp) {
+    return ob_varlen_encoding_gbk_for_memcmp(cs, dst, dst_len, nweights,
+                              src, srclen, is_valid_unicode);
+  } else {
+    return ob_varlen_encoding_gbk_for_spacecmp(cs, dst, dst_len, nweights,
+                              src, srclen, is_valid_unicode);
+  }
+}
+
+
+static unsigned int ismbchar_gbk(const ObCharsetInfo *cs __attribute__((unused)),
+		 const char* p, const char *e)
+{
+  return (isgbkhead(*(p)) && (e)-(p)>1 && isgbktail(*((p)+1))? 2: 0);
+}
+
+static unsigned int mbcharlen_gbk(const ObCharsetInfo *cs __attribute__((unused)),
+                          unsigned int c)
+{
+  return (isgbkhead(c)? 2 : 1);
+}
+static int func_uni_gbk_onechar(int code){
+  if ((code>=0x00A4)&&(code<=0x0451)) {
+    return(tab_uni_gbk0[code-0x00A4]);
+  } else if ((code>=0x2010)&&(code<=0x2312)) {
+    return(tab_uni_gbk1[code-0x2010]);
+  } else if ((code>=0x2460)&&(code<=0x2642)) {
+    return(tab_uni_gbk2[code-0x2460]);
+  } else if ((code>=0x3000)&&(code<=0x3129)) {
+    return(tab_uni_gbk3[code-0x3000]);
+  } else if ((code>=0x3220)&&(code<=0x32A3)) {
+    return(tab_uni_gbk4[code-0x3220]);
+  } else if ((code>=0x338E)&&(code<=0x33D5)) {
+    return(tab_uni_gbk5[code-0x338E]);
+  } else if ((code>=0x4E00)&&(code<=0x9FA5)) {
+    return(tab_uni_gbk6[code-0x4E00]);
+  } else if ((code>=0xE000)&&(code<=0xE864)) {
+    return(tab_uni_gbk_pua[code-0xE000]);
+  } else if ((code>=0xF92C)&&(code<=0xFA29)) {
+    return(tab_uni_gbk7[code-0xF92C]);
+  } else if ((code>=0xFE30)&&(code<=0xFFE5)) {
+    return(tab_uni_gbk8[code-0xFE30]);
+  }
+  return(0);
+}
+
+static int
+ob_wc_mb_gbk(const ObCharsetInfo *cs  __attribute__((unused)),
+	      ob_wc_t wc, unsigned char *s, unsigned char *e)
+{
+  int code;
+
+  if (s >= e) {
+    return OB_CS_TOOSMALL;
+  } else if ((unsigned int) wc < 0x80) {
+    s[0]= (unsigned char) wc;
+    return 1;
+  } else if (!(code=func_uni_gbk_onechar(wc))) {
+    return OB_CS_ILUNI;
+  } else if (s+2>e) {
+    return OB_CS_TOOSMALL2;
+  }
+  s[0] = code >> 8;
+  s[1] = code & 0xFF;
+  return 2;
+}
+
+static int ob_mb_wc_gbk(const ObCharsetInfo *cs __attribute__((unused)),
+	                   ob_wc_t *pwc, const unsigned char *s, const unsigned char *e)
+{
+  int hi;
+  if (s >= e) {
+    return OB_CS_TOOSMALL;
+  } else if ((hi = s[0]) < 0x80) {
+    pwc[0]=hi;
+    return 1;
+  } else if (s+2>e) {
+    return OB_CS_TOOSMALL2;
+  } else if (!(pwc[0]=func_gbk_uni_onechar( (hi<<8) + s[1]))) {
+    return -2;
+  }
+
+  return 2;
+}
+
+
+static size_t ob_well_formed_len_gbk(const ObCharsetInfo *cs __attribute__((unused)),
+                              const char *b, const char *e,
+                              size_t pos, int *error)
+{
+  const char *b0= b;
+  const char *emb= e - 1;
+  *error= 0;
+
+  while (pos-- && b < e) {
+    if ((unsigned char) b[0] < 128) {
+      b++;
+    } else  if ((b < emb) && isgbkcode((unsigned char)*b, (unsigned char)b[1])) {
+      b+= 2;
+    } else {
+      *error= 1;
+      break;
+    }
+  }
+  return (size_t) (b - b0);
+}
+
+static ObCollationHandler ob_collation_gbk_ci_handler =
+{
+  NULL,
+  NULL,
+  ob_strnncoll_gbk,
+  ob_strnncollsp_gbk,
+  ob_strnxfrm_gbk,
+  ob_strnxfrmlen_simple,
+  ob_strnxfrm_gbk_varlen,
+  ob_like_range_mb,
+  ob_wildcmp_mb,
+  NULL,
+  ob_instr_mb,
+  ob_hash_sort_simple,
+  ob_propagate_simple
+};
+
+static ObCharsetHandler ob_charset_gbk_handler=
+{
+  ismbchar_gbk,
+  mbcharlen_gbk,
+  ob_numchars_mb,
+  ob_charpos_mb,
+  ob_max_bytes_charpos_mb,
+  ob_well_formed_len_gbk,
+  ob_lengthsp_8bit,
+  /* ob_numcells_8bit, */
+  ob_mb_wc_gbk,
+  ob_wc_mb_gbk,
+  ob_mb_ctype_mb,
+  /* ob_caseup_str_mb, */
+  /* ob_casedn_str_mb, */
+  ob_caseup_mb,
+  ob_casedn_mb,
+  ob_fill_8bit,
+  ob_strntol_8bit,
+  ob_strntoul_8bit,
+  ob_strntoll_8bit,
+  ob_strntoull_8bit,
+  ob_strntod_8bit,
+  ob_strntoull10rnd_8bit,
+  ob_scan_8bit
+};
+
+
+ObCharsetInfo ob_charset_gbk_chinese_ci=
+{
+    28,0,0,
+    OB_CS_COMPILED|OB_CS_PRIMARY|OB_CS_STRNXFRM,
+    "gbk",
+    "gbk_chinese_ci",
+    "",
+    NULL,
+    NULL,
+    ctype_gbk,
+    to_lower_gbk,
+    to_upper_gbk,
+    sort_order_gbk,
+    NULL,
+    &ob_caseinfo_gbk,
+    NULL,
+    NULL,
+    1,
+    1,
+    1,
+    1,
+    2,
+    0,
+    0xA967,
+    ' ',
+    1,
+    1,
+    1,
+    &ob_charset_gbk_handler,
+    &ob_collation_gbk_ci_handler,
+    PAD_SPACE};
+
+ObCharsetInfo ob_charset_gbk_bin=
+{
+    87,0,0,
+    OB_CS_COMPILED|OB_CS_BINSORT,
+    "gbk",
+    "gbk_bin",
+    "",
+    NULL,
+    NULL,
+    ctype_gbk,
+    to_lower_gbk,
+    to_upper_gbk,
+    NULL,
+    NULL,
+    &ob_caseinfo_gbk,
+    NULL,
+    NULL,
+    1,
+    1,
+    1,
+    1,
+    2,
+    0,
+    0xFEFE,
+    ' ',
+    1,
+    1,
+    1,
+    &ob_charset_gbk_handler,
+    &ob_collation_mb_bin_handler,
+    PAD_SPACE
+};
--- a/deps/oblib/src/lib/charset/ob_ctype_gbk_tab.h
+++ b/deps/oblib/src/lib/charset/ob_ctype_gbk_tab.h
--- a/deps/oblib/src/lib/charset/ob_ctype_latin1.cc
+++ b/deps/oblib/src/lib/charset/ob_ctype_latin1.cc
@ -0,0 +1,137 @@
+/**
+ * Copyright (code) 2021 OceanBase
+ * OceanBase CE is licensed under Mulan PubL v2.
+ * You can use this software according to the terms and conditions of the Mulan PubL v2.
+ * You may obtain a copy of Mulan PubL v2 at:
+ *          http://license.coscl.org.cn/MulanPubL-2.0
+ * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND,
+ * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
+ * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
+ * See the Mulan PubL v2 for more details.
+ */
+
+#include "lib/charset/ob_mysql_global.h"
+#include "lib/charset/ob_ctype.h"
+#include "lib/utility/ob_macro_utils.h"
+#include "lib/charset/ob_ctype_latin1_tab.h"
+
+static int ob_mb_wc_latin1(const ObCharsetInfo *cs __attribute__((unused)),
+                           ob_wc_t *pwc, const unsigned char *str,
+                          const unsigned char *end) {
+
+  if (str >= end) return OB_CS_TOOSMALL;
+  *pwc = cs_to_uni[*str];
+  return (!pwc[0] && str[0]) ? -1 : 1;
+
+}
+
+static int ob_wc_mb_latin1(const ObCharsetInfo *cs  __attribute__((unused)),
+                           ob_wc_t wc, unsigned char *str, unsigned char *end) {
+  const unsigned char *pl;
+
+  if (str >= end) return OB_CS_TOOSMALL;
+
+  if (wc > 0xFFFF) return OB_CS_ILUNI;
+
+  pl = uni_to_cs[wc >> 8];
+  str[0] = pl ? pl[wc & 0xFF] : '\0';
+  return (!str[0] && wc) ? OB_CS_ILUNI : 1;
+}
+
+static ObCharsetHandler ob_charset_latin1_handler=
+{
+  //NULL,
+  NULL,
+  ob_mbcharlen_8bit,
+  ob_numchars_8bit,
+  ob_charpos_8bit,
+  ob_max_bytes_charpos_8bit,
+  ob_well_formed_len_8bit,
+  ob_lengthsp_binary,
+  //ob_numcells_8bit,
+  ob_mb_wc_latin1,
+  ob_wc_mb_latin1,
+  ob_mb_ctype_8bit,
+  //ob_case_str_bin,
+  //ob_case_str_bin,
+  ob_caseup_8bit,
+  ob_casedn_8bit,
+  //ob_snprintf_8bit,
+  //ob_long10_to_str_8bit,
+  //ob_longlong10_to_str_8bit,
+  ob_fill_8bit,
+  ob_strntol_8bit,
+  ob_strntoul_8bit,
+  ob_strntoll_8bit,
+  ob_strntoull_8bit,
+  ob_strntod_8bit,
+  //ob_strtoll10_8bit,
+  ob_strntoull10rnd_8bit,
+  ob_scan_8bit
+};
+
+ObCharsetInfo ob_charset_latin1 = {
+    8,0,0,
+    OB_CS_COMPILED | OB_CS_PRIMARY,
+    OB_LATIN1,
+    OB_LATIN1_SWEDISH_CI,
+    "cp1252 West European",
+    NULL,
+    NULL,
+    ctype_latin1,
+    to_lower_latin1,
+    to_upper_latin1,
+    sort_order_latin1,
+    NULL,
+    //NULL,
+    //NULL,
+    &ob_unicase_default,
+    NULL,
+    NULL,
+    1,
+    1,
+    1,
+    1,
+    1,
+    0,
+    0xFF,
+    ' ',
+    0,
+    1,
+    1,
+    &ob_charset_latin1_handler,
+    &ob_collation_8bit_simple_ci_handler,
+    PAD_SPACE};
+
+ObCharsetInfo ob_charset_latin1_bin = {
+    47,0,0,
+    OB_CS_COMPILED | OB_CS_BINSORT,
+     OB_LATIN1,
+    OB_LATIN1_BIN,
+    "cp1252 West European",
+    NULL,
+    NULL,
+    ctype_latin1,
+    to_lower_latin1,
+    to_upper_latin1,
+    NULL,
+    NULL,
+    //NULL,
+    //NULL,
+    &ob_unicase_default,
+    NULL,
+    NULL,
+    1,
+    1,
+    1,
+    1,
+    1,
+    0,
+    0xFF,
+    ' ',
+    0,
+    1,
+    1,
+    &ob_charset_latin1_handler,
+    &ob_collation_8bit_bin_handler,
+    PAD_SPACE};
--- a/deps/oblib/src/lib/charset/ob_ctype_latin1_tab.h
+++ b/deps/oblib/src/lib/charset/ob_ctype_latin1_tab.h
@ -1,28 +1,14 @@
-/** 
- * Copyright (c) 2021 OceanBase 
- * OceanBase CE is licensed under Mulan PubL v2. 
- * You can use this software according to the terms and conditions of the Mulan PubL v2. 
- * You may obtain a copy of Mulan PubL v2 at: 
- *          http://license.coscl.org.cn/MulanPubL-2.0 
- * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, 
- * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, 
- * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. 
+/**
+ * Copyright (c) 2021 OceanBase
+ * OceanBase CE is licensed under Mulan PubL v2.
+ * You can use this software according to the terms and conditions of the Mulan PubL v2.
+ * You may obtain a copy of Mulan PubL v2 at:
+ *          http://license.coscl.org.cn/MulanPubL-2.0
+ * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND,
+ * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
+ * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
 * See the Mulan PubL v2 for more details.
- */ 
-
-/*
-*
-* Version: $Id
-*
-* Authors:
-*      - initial release
-*
-*/
-#ifndef OB_BUILD_FULL_CHARSET
-
-#include "lib/charset/ob_mysql_global.h"
-#include "lib/charset/ob_ctype.h"
-#include "lib/utility/ob_macro_utils.h"
+ */
 static unsigned char ctype_latin1[] = {
    0,  32,  32,  32,  32,  32,  32,  32,  32,  32,  40,  40, 40, 40, 40, 32,
    32, 32,  32,  32,  32,  32,  32,  32,  32,  32,  32,  32, 32, 32, 32, 32,
@ -281,133 +267,3 @@ static unsigned char *uni_to_cs[] = {
    NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
    NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
    NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL};
-
-
-static int ob_mb_wc_latin1(const ObCharsetInfo *cs __attribute__((unused)),
-	      ob_wc_t *pwc, const unsigned char *str, const unsigned char *end) {
-
-  if (str >= end) return OB_CS_TOOSMALL;
-  *pwc = cs_to_uni[*str];
-  return (!pwc[0] && str[0]) ? -1 : 1;
-
-}
-
-static int ob_wc_mb_latin1(const ObCharsetInfo *cs  __attribute__((unused)),
-	      ob_wc_t wc, unsigned char *str, unsigned char *end) {
-  const unsigned char *pl;
-
-  if (str >= end) return OB_CS_TOOSMALL;
-
-  if (wc > 0xFFFF) return OB_CS_ILUNI;
-
-  pl = uni_to_cs[wc >> 8];
-  str[0] = pl ? pl[wc & 0xFF] : '\0';
-  return (!str[0] && wc) ? OB_CS_ILUNI : 1;
-}
-
-
-
-static ObCharsetHandler ob_charset_latin1_handler=
-{
-  //NULL,			/* init */
-  NULL,			/* ismbchar      */
-  ob_mbcharlen_8bit,	/* mbcharlen     */
-  ob_numchars_8bit,
-  ob_charpos_8bit,
-  ob_max_bytes_charpos_8bit,
-  ob_well_formed_len_8bit,
-  ob_lengthsp_binary,
-  //ob_numcells_8bit,
-  ob_mb_wc_latin1,
-  ob_wc_mb_latin1,
-  ob_mb_ctype_8bit,
-  //ob_case_str_bin,
-  //ob_case_str_bin,
-  ob_caseup_8bit,
-  ob_casedn_8bit,
-  //ob_snprintf_8bit,
-  //ob_long10_to_str_8bit,
-  //ob_longlong10_to_str_8bit,
-  ob_fill_8bit,
-  ob_strntol_8bit,
-  ob_strntoul_8bit,
-  ob_strntoll_8bit,
-  ob_strntoull_8bit,
-  ob_strntod_8bit,
-  //ob_strtoll10_8bit,
-  ob_strntoull10rnd_8bit,
-  ob_scan_8bit
-};
-
-
-
-
-
-ObCharsetInfo ob_charset_latin1 = {
-    8,0,0,                              /* number    */
-    OB_CS_COMPILED | OB_CS_PRIMARY, /* state */
-    OB_LATIN1,                       /* cs name    */
-    OB_LATIN1_SWEDISH_CI,            /* name      */
-    "cp1252 West European",         /* comment   */
-    NULL,                        /* tailoring */
-    NULL,                        /* coll_param */
-    ctype_latin1,
-    to_lower_latin1,
-    to_upper_latin1,
-    sort_order_latin1,
-    NULL,             /* uca          */
-    //NULL,           /* tab_to_uni   */
-    //NULL,             /* tab_from_uni */
-    &ob_unicase_default, /* caseinfo     */
-    NULL,             /* state_map    */
-    NULL,             /* ident_map    */
-    1,                   /* strxfrm_multiply */
-    1,                   /* caseup_multiply  */
-    1,                   /* casedn_multiply  */
-    1,                   /* mbminlen   */
-    1,                   /* mbmaxlen   */
-    0,                   /* min_sort_char */
-    0xFF,                 /* max_sort_char */
-    ' ',                 /* pad char      */
-    0,               /* escape_with_backslash_is_dangerous */
-    1,                   /* levels_for_compare */
-    1,                   /* levels_for_order */
-    &ob_charset_latin1_handler,
-    &ob_collation_8bit_simple_ci_handler,
-    PAD_SPACE};
-
-ObCharsetInfo ob_charset_latin1_bin = {
-    47,0,0,                              /* number    */
-    OB_CS_COMPILED | OB_CS_BINSORT, /* state     */
-     OB_LATIN1,                       /* cs name    */
-    OB_LATIN1_BIN,            /* name      */
-    "cp1252 West European",         /* comment   */
-    NULL,                        /* tailoring */
-    NULL,                        /* coll_param */
-    ctype_latin1,
-    to_lower_latin1,
-    to_upper_latin1,
-    NULL,             /* sort_order   */
-    NULL,             /* uca          */
-    //NULL,           /* tab_to_uni   */
-    //NULL,             /* tab_from_uni */
-    &ob_unicase_default, /* caseinfo     */
-    NULL,             /* state_map    */
-    NULL,             /* ident_map    */
-    1,                   /* strxfrm_multiply */
-    1,                   /* caseup_multiply  */
-    1,                   /* casedn_multiply  */
-    1,                   /* mbminlen   */
-    1,                   /* mbmaxlen   */
-    0,                   /* min_sort_char */
-    0xFF,                /* max_sort_char */
-    ' ',                 /* pad char      */
-    0,               /* escape_with_backslash_is_dangerous */
-    1,                   /* levels_for_compare */
-    1,                   /* levels_for_order */
-    &ob_charset_latin1_handler,
-    &ob_collation_8bit_bin_handler,
-    PAD_SPACE};
-
-
-#endif
--- a/deps/oblib/src/lib/charset/ob_ctype_mb_os.cc
+++ b/deps/oblib/src/lib/charset/ob_ctype_mb_os.cc
@ -10,14 +10,8 @@
 * See the Mulan PubL v2 for more details.
 */ 

-/*
- * (C) 2017-2020 Alibaba Group Holding Limited.
- *
- *  Authors:
- */
-#ifndef OB_BUILD_FULL_CHARSET
-
 #include "lib/charset/ob_ctype.h"
+#include "lib/charset/str_uca_type.h"

 static void __attribute__ ((noinline)) pad_max_char_help(char *str, char *end, char *buf, char buf_len)
 {
@ -78,11 +72,11 @@ bool ob_like_range_mb_help(const ObCharsetInfo *cs,
 }

 bool ob_like_range_mb(const ObCharsetInfo *cs,
-			 const char *ptr,size_t ptr_length,
-			 pbool escape_char, pbool w_one, pbool w_many,
-			 size_t res_length,
-			 char *min_str,char *max_str,
-			 size_t *min_length,size_t *max_length)
+                      const char *ptr,size_t ptr_length,
+                      pbool escape_char, pbool w_one, pbool w_many,
+                      size_t res_length,
+                      char *min_str,char *max_str,
+                      size_t *min_length,size_t *max_length)
 {
  unsigned int mb_len;
  const char *end= ptr + ptr_length;
@ -95,8 +89,7 @@ bool ob_like_range_mb(const ObCharsetInfo *cs,
  for (; ptr != end && min_str != min_end && max_char_len ; max_char_len--) {
    if (*ptr == escape_char && ptr+1 != end) {
      ptr++;                                      
-    } else if (*ptr == w_one || 
-               *ptr == w_many) {
+    } else if (*ptr == w_one ||  *ptr == w_many) {
      return ob_like_range_mb_help(cs,res_length, &min_str,&max_str, &min_org, &min_end, min_length, max_length, &max_end);
    }
    mb_len= ob_ismbchar(cs, ptr, end);
@ -114,7 +107,7 @@ bool ob_like_range_mb(const ObCharsetInfo *cs,
        if (ptr[1] == w_one || ptr[1] == w_many) {
          return ob_like_range_mb_help(cs,res_length, &min_str,&max_str, &min_org, &min_end, min_length, max_length, &max_end);
        } else if (ob_uca_can_be_contraction_tail(contractions, (unsigned char) ptr[1]) &&
-            ob_uca_contraction2_weight(contractions, (unsigned char) ptr[0], ptr[1])) {
+                   ob_uca_contraction2_weight(contractions, (unsigned char) ptr[0], ptr[1])) {
          if (max_char_len == 1 || min_str + 1 >= min_end) {
            return ob_like_range_mb_help(cs,res_length, &min_str,&max_str, &min_org, &min_end, min_length, max_length, &max_end);
          }
@ -150,7 +143,7 @@ int ob_wildcmp_mb_impl(const ObCharsetInfo *cs,
                       const char *wild_str,const char *wild_end,
                       int escape_char, int w_one, int w_many, int recurse_level)
 {
-  int result= -1;				  
+  int result= -1;
  while (wild_str != wild_end) {
    while ((*wild_str == escape_char) || (*wild_str != w_many && *wild_str != w_one)) {
      int l;
@ -158,59 +151,56 @@ int ob_wildcmp_mb_impl(const ObCharsetInfo *cs,
        wild_str++;
      }
      if ((l = ob_ismbchar(cs, wild_str, wild_end))) {
-	      if (str+l > str_end || memcmp(str, wild_str, l) != 0)
-	        return 1;
-	      str += l;
-	      wild_str += l;
+        if (str+l > str_end || memcmp(str, wild_str, l) != 0)
+          return 1;
+        str += l;
+        wild_str += l;
      } else if (str == str_end || likeconv(cs,*wild_str++) != likeconv(cs,*str++)) {
-      	return(1);				  
+       return(1);
      }
      if (wild_str == wild_end) {
-	      return (str != str_end);
-      }		  
-      result=1;					  
+       return (str != str_end);
+      }
+      result=1;
    }
    if (*wild_str == w_one) {
      do {
        if (str == str_end) {
          return (result);
        }
-	      INC_PTR(cs,str,str_end);
+        INC_PTR(cs,str,str_end);
      } while (++wild_str < wild_end && *wild_str == w_one);
      if (wild_end == wild_str)
-	      break;
+        break;
    }
-    if (*wild_str == w_many) {						  
+    if (*wild_str == w_many) {
      unsigned char cmp;
      const char* mb = wild_str;
      int mb_len=0;
-
      wild_str++;
-        
      for (; wild_str != wild_end ; wild_str++)
      {
        if (*wild_str == w_many)
          continue;
-        if (*wild_str == w_one)
-        {
+        if (*wild_str == w_one) {
          if (str == str_end)
            return (-1);
          INC_PTR(cs,str,str_end);
          continue;
        }
-        break;					  
+        break;
      }
      if (wild_str == wild_end) {
-	      return(0);				  
+        return(0);
      } else if (str == str_end) {
-	      return -1;
+        return -1;
      } else if ((cmp= *wild_str) == escape_char && wild_str+1 != wild_end) {
-	      cmp= *++wild_str;
+       cmp= *++wild_str;
      }

      mb=wild_str;
      mb_len= ob_ismbchar(cs, wild_str, wild_end);
-      INC_PTR(cs,wild_str,wild_end);		  
+      INC_PTR(cs,wild_str,wild_end);
      cmp=likeconv(cs,cmp);
      while (true) {
        while (TRUE) {
@ -232,7 +222,7 @@ int ob_wildcmp_mb_impl(const ObCharsetInfo *cs,
        {
          int tmp=ob_wildcmp_mb_impl(cs,str,str_end,
                                          wild_str,wild_end,escape_char,w_one,
-                                          w_many, recurse_level + 1);
+                                            w_many, recurse_level + 1);
          if (tmp <= 0)
            return (tmp);
        }
@ -240,7 +230,7 @@ int ob_wildcmp_mb_impl(const ObCharsetInfo *cs,
          return -1;
        } else if (wild_str != wild_end && wild_str[0] == w_many) {
          return -1;
-        }
+      }
      }
      return(-1);
    }
@ -256,7 +246,7 @@ unsigned int __attribute__ ((noinline)) ob_instr_mb_help(size_t s_length, ob_mat
      match->end= 0;
      match->mb_len= 0;
    }
-    return 1;     
+    return 1;
  }
  return 0;
 }
@ -277,8 +267,8 @@ unsigned int ob_instr_mb(const ObCharsetInfo *cs,
    end= b+b_length-s_length+1;
    while (b < end) {
      int mb_len;
-      if (!cs->coll->strnncoll(cs, (unsigned char*) b,   s_length,
-      				   (unsigned char*) s, s_length, 0)) {
+      if (!cs->coll->strnncoll(cs, (unsigned char*) b, s_length,
+                               (unsigned char*) s, s_length, 0)) {
        if (nmatch) {
          match[0].beg= 0;
          match[0].end= (size_t) (b-b0);
@ -286,7 +276,7 @@ unsigned int ob_instr_mb(const ObCharsetInfo *cs,
          if (nmatch > 1) {
            match[1].beg= match[0].end;
            match[1].end= match[0].end+s_length;
-            match[1].mb_len= 0;	  
+            match[1].mb_len= 0;
          }
        }
        return 2;
@ -349,7 +339,7 @@ size_t ob_max_bytes_charpos_mb(const ObCharsetInfo *cs __attribute__((unused)),
 }

 int ob_mb_ctype_mb(const ObCharsetInfo *cs __attribute__((unused)), int *ctype,
-                          const unsigned char *s, const unsigned char *e)
+                   const unsigned char *s, const unsigned char *e)
 {
  ob_wc_t wc;
  int res = cs->cset->mb_wc(cs, &wc, s, e);
@ -439,7 +429,7 @@ size_t ob_lengthsp_8bit(const ObCharsetInfo *cs __attribute__((unused)),
                        const char *ptr, size_t length)
 {
  const char *end;
-  end= (const char *) skip_trailing_space((const uchar *)ptr, length, 0);
+  end= (const char *) skip_trailing_space((const unsigned char *)ptr, length, 0);
  return (size_t) (end-ptr);
 }

@ -470,18 +460,18 @@ int __attribute__ ((noinline))  ob_strnncollsp_mb_bin_help(
      res= 1;                              
    }
    if (a_length < b_length) {
-      a_length= b_length;
-      a= b;
+       a_length= b_length;
+       a= b;
      swap= -1;           
-      res= -res;
+       res= -res;
    }
    for (end= a + a_length-length; a < end ; a++) {
-      if (*a != ' ') {
-        *has_returned = 1;
-        break;
-      }
-    }
-  }  
+       if (*a != ' ') {
+         *has_returned = 1;
+         break;
+       }
+     }
+  }
  *a_ = a;
  *b_ = b;
  *end_ = end;
@ -510,10 +500,10 @@ int ob_strnncollsp_mb_bin(const ObCharsetInfo *cs __attribute__((unused)),
  res= 0;
  int has_returned = 0;
  int tmp = ob_strnncollsp_mb_bin_help(
-          &a, a_length,
-          &b, b_length,
-          &end,
-          diff_if_only_endspace_difference, &has_returned, &res, length);
+      &a, a_length,
+      &b, b_length,
+      &end,
+      diff_if_only_endspace_difference, &has_returned, &res, length);
  return has_returned == 1 ? tmp : res;
 }

@ -585,7 +575,7 @@ size_t ob_strnxfrm_mb(const ObCharsetInfo *cs,
 pad:
  return ob_strxfrm_pad_desc_and_reverse(cs, d0, dst, de, nweights, flags, 0);
 }
-  
+

 #define INC_PTR(cs,A,B) A+=(ob_ismbchar(cs,A,B) ? ob_ismbchar(cs,A,B) : 1)

@ -636,7 +626,7 @@ static int ob_wildcmp_mb_bin_impl(const ObCharsetInfo *cs, const char *str,
    const char *str_end, const char *wild_str, const char *wild_end, int escape_char,
    int w_one, int w_many, int recurse_level)
 {
-  int result = -1;   
+  int result = -1;
  while (wild_str != wild_end) {
    int has_returned = 0;
    int tmp = ob_wildcmp_mb_bin_impl_help(cs, &str,&str_end, &wild_str, &wild_end, escape_char,w_one,w_many, &result, &has_returned);
@ -644,7 +634,7 @@ static int ob_wildcmp_mb_bin_impl(const ObCharsetInfo *cs, const char *str,
      return tmp;
    } else if (*wild_str == w_one) {
      do {
-        if (str == str_end) {  
+        if (str == str_end) {
          return (result);
        } else {
          INC_PTR(cs, str, str_end);
@ -654,13 +644,11 @@ static int ob_wildcmp_mb_bin_impl(const ObCharsetInfo *cs, const char *str,
        break;
      }
    }
-    if (*wild_str == w_many) {   
+    if (*wild_str == w_many) {
      unsigned char cmp;
      const char* mb = wild_str;
      int mb_len = 0;
-
      wild_str++;
-        
      for (; wild_str != wild_end; wild_str++) {
        if (*wild_str == w_many) {
          continue;
@ -673,7 +661,7 @@ static int ob_wildcmp_mb_bin_impl(const ObCharsetInfo *cs, const char *str,
          }
        } else {
          break;  
-        } 
+        }
      }
      if (wild_str == wild_end) {
        return (0);   
@ -749,8 +737,6 @@ void ob_hash_sort_mb_bin(const ObCharsetInfo *cs __attribute__((unused)),
  }
 }

-  
-
 ObCollationHandler ob_collation_mb_bin_handler = {
  NULL,
  NULL,
@ -770,5 +756,3 @@ ObCollationHandler ob_collation_mb_bin_handler = {

 #undef INC_PTR
 #undef likeconv
-
-#endif
--- a/deps/oblib/src/lib/charset/ob_ctype_simple_os.cc
+++ b/deps/oblib/src/lib/charset/ob_ctype_simple_os.cc
@ -1,3 +1,4 @@
+
 /** 
 * Copyright (c) 2021 OceanBase 
 * OceanBase CE is licensed under Mulan PubL v2. 
@ -9,8 +10,7 @@
 * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. 
 * See the Mulan PubL v2 for more details.
 */ 
-
-/*
+ /*
 *
 * Version: $Id
 *
@ -18,7 +18,6 @@
 *      - initial release
 *
 */
-#ifndef OB_BUILD_FULL_CHARSET

 #include "lib/charset/ob_ctype.h"
 #include "lib/charset/ob_dtoa.h"
@ -53,14 +52,14 @@ static ulonglong d10[DIGITS_IN_ULONGLONG]=
 };

 long ob_strntol_8bit(const ObCharsetInfo *cs,
-		     const char *nptr, size_t l, int base,
-		     char **end_ptr, int *err)
+                     const char *nptr, size_t l, int base,
+                     char **end_ptr, int *err)
 {

  const char *save, *s = nptr, *e = nptr+l;
  unsigned char c;
  unsigned int cut_lim;
-  *err= 0;				
+  *err= 0;
  uint32 cut_off;
  while (s<e && ob_isspace(cs, *s)) {
    s++;
@ -115,7 +114,7 @@ long ob_strntol_8bit(const ObCharsetInfo *cs,
  if (neg) { 
    if (i  > (uint32) INT_MIN32) {
      overflow = 1;
-    }
+  }
  } else if (i > INT_MAX32) {
    overflow = 1;
  }
@ -137,8 +136,8 @@ NO_CONV:


 ulong ob_strntoul_8bit(const ObCharsetInfo *cs,
-		       const char *nptr, size_t l, int base,
-		       char **end_ptr, int *err)
+                       const char *nptr, size_t l, int base,
+                       char **end_ptr, int *err)
 {
  int neg;
  unsigned char c;
@ -146,7 +145,7 @@ ulong ob_strntoul_8bit(const ObCharsetInfo *cs,
  uint32 cut_off;
  unsigned int cut_lim;

-  *err= 0;				
+  *err= 0;

  while (s<e && ob_isspace(cs, *s)) {
    s++;
@ -217,13 +216,13 @@ NO_CONV:


 longlong ob_strntoll_8bit(const ObCharsetInfo *cs __attribute__((unused)),
-			  const char *nptr, size_t l, int base,
-			  char **end_ptr,int *err)
+                          const char *nptr, size_t l, int base,
+                          char **end_ptr,int *err)
 {
  ulonglong cut_off;
  unsigned int cut_lim;
  const char *s = nptr, *e = nptr+l, *save;
-  *err= 0;		
+  *err= 0;

  while (s<e && ob_isspace(cs,*s)) {
    s++;
@ -302,14 +301,14 @@ NO_CONV:


 ulonglong ob_strntoull_8bit(const ObCharsetInfo *cs,
-			   const char *nptr, size_t l, int base,
-			   char **end_ptr, int *err)
+                            const char *nptr, size_t l, int base,
+                            char **end_ptr, int *err)
 {

  ulonglong cut_off;
  unsigned int cut_lim;
  const char *s = nptr, *e = nptr + l, *save;
-  *err= 0;			
+  *err= 0;

  while (s<e && ob_isspace(cs,*s)) {
    s++;
@ -386,8 +385,8 @@ NO_CONV:


 double ob_strntod_8bit(const ObCharsetInfo *cs __attribute__((unused)),
-		       char *str, size_t len,
-		       char **end, int *err)
+                       char *str, size_t len,
+                       char **end, int *err)
 {
  if (len == INT_MAX32) {
    len= 65535;          
@ -421,7 +420,7 @@ ob_strntoull10rnd_8bit(const ObCharsetInfo *cs __attribute__((unused)),

  beg= str;
  end9= (str + 9) > end ? end : (str + 9);
-    
+
  for (ul= 0 ; str < end9 && (ch= (unsigned char) (*str - '0')) < 10; str++) {
    ul= ul * 10 + ch;
  }
@ -583,7 +582,6 @@ RET_SIGN:
    }
  }

-    
  if (neg && ull) {
    *err= OB_ERRNO_ERANGE;
    return 0;
@ -611,7 +609,7 @@ RET_TOO_LARGE:
 }

 void ob_strxfrm_desc_and_reverse(unsigned char *str, unsigned char *str_end,
-                            unsigned int flags, unsigned int level)
+                                 unsigned int flags, unsigned int level)
 {
  if (flags & (OB_STRXFRM_DESC_LEVEL1 << level)) {
    if (flags & (OB_STRXFRM_REVERSE_LEVEL1 << level)) {
@ -660,8 +658,6 @@ size_t ob_scan_8bit(const ObCharsetInfo *cs, const char *str, const char *end,
  }
 }

-
-
 size_t ob_strxfrm_pad_desc_and_reverse(const ObCharsetInfo *cs,
                                unsigned char *str, unsigned char *frm_end, unsigned char *str_end,
                                unsigned int nweights, unsigned int flags, unsigned int level)
@ -686,11 +682,11 @@ size_t ob_strnxfrmlen_simple(const ObCharsetInfo *cs, size_t len)
 }

 bool ob_like_range_simple(const ObCharsetInfo *cs,
-			     const char *ptr, size_t ptr_len,
-			     pbool escape_char, pbool w_one, pbool w_many,
-			     size_t res_len,
-			     char *min_str,char *max_str,
-			     size_t *min_len, size_t *max_len)
+                          const char *ptr, size_t ptr_len,
+                          pbool escape_char, pbool w_one, pbool w_many,
+                          size_t res_len,
+                          char *min_str,char *max_str,
+                          size_t *min_len, size_t *max_len)
 {
  const char *end= ptr + ptr_len;
  char *min_org=min_str;
@ -699,11 +695,11 @@ bool ob_like_range_simple(const ObCharsetInfo *cs,

  for (; ptr != end && min_str != min_end && charlen > 0 ; ptr++, charlen--) {
    if (*ptr == escape_char && ptr+1 != end) {
-      ptr++;					  
+      ptr++;
      *min_str++= *max_str++ = *ptr;
      continue;
    } else if (*ptr == w_one) {
-      *min_str++='\0';				  
+      *min_str++='\0';
      *max_str++= (char) cs->max_sort_char;
      continue;
    } else if (*ptr == w_many) {
@ -742,7 +738,7 @@ bool ob_propagate_complex(const ObCharsetInfo *cs __attribute__((unused)),
 }

 void ob_fill_8bit(const ObCharsetInfo *cs __attribute__((unused)),
-		   char *s, size_t l, int fill)
+     char *s, size_t l, int fill)
 {
  memset(s, fill, l);
 }
@ -758,9 +754,9 @@ int64_t ob_strntoull(const char *ptr, size_t len, int base, char **end, int *err
 }

 void ob_hash_sort_simple(const ObCharsetInfo *cs,
-			 const unsigned char *key, size_t len,
-			 unsigned long int *nr1, unsigned long int *nr2,
-       const bool calc_end_space, hash_algo hash_algo)
+                         const unsigned char *key, size_t len,
+                         unsigned long int *nr1, unsigned long int *nr2,
+                         const bool calc_end_space, hash_algo hash_algo)
 {
  unsigned char *sort_order=cs->sort_order;
  const unsigned char *end;
@ -788,7 +784,7 @@ void ob_hash_sort_simple(const ObCharsetInfo *cs,

 #define SPACE_INT 0x20202020

-const uchar *skip_trailing_space(const uchar *ptr,size_t len, bool is_utf16 /*false*/)
+const unsigned char *skip_trailing_space(const unsigned char *ptr,size_t len, bool is_utf16 /*false*/)
 {
  const unsigned char *end= ptr + len;
  if (len > 20 && !is_utf16) {
@ -853,9 +849,9 @@ size_t ob_casedn_8bit(const ObCharsetInfo *cs __attribute__((unused)),
 }

 int ob_strnncoll_simple(const ObCharsetInfo *cs __attribute__((unused)),
-                               const uchar *s, size_t slen,
-                               const uchar *t, size_t tlen,
-                               bool is_prefix)
+                        const unsigned char *s, size_t slen,
+                        const unsigned char *t, size_t tlen,
+                        bool is_prefix)
 {
  size_t len = (slen > tlen) ? tlen : slen;
  if (is_prefix && slen > tlen) slen = tlen;
@ -871,18 +867,18 @@ int ob_strnncoll_simple(const ObCharsetInfo *cs __attribute__((unused)),

 static int ob_strnncollsp_simple(const ObCharsetInfo *cs
                          __attribute__((unused)),
-                          const uchar *s, size_t slen,
-                          const uchar *t, size_t tlen,
+                          const unsigned char *s, size_t slen,
+                          const unsigned char *t, size_t tlen,
                          bool diff_if_only_endspace_difference
                          __attribute__((unused)))
 {
  size_t len = (slen > tlen) ? tlen : slen;
  for (size_t i = 0; i < len; i++){
-     if(ob_sort_order(cs,*s)!=ob_sort_order(cs,*t)) {
-        return (int)ob_sort_order(cs,*s) - (int)ob_sort_order(cs,*t);
-     }
-      s++;
-      t++;
+    if(ob_sort_order(cs,*s)!=ob_sort_order(cs,*t)) {
+      return (int)ob_sort_order(cs,*s) - (int)ob_sort_order(cs,*t);
+    }
+    s++;
+    t++;
  }
  int res = 0;
  if (slen != tlen) {
@ -896,7 +892,6 @@ static int ob_strnncollsp_simple(const ObCharsetInfo *cs
    */
    if (slen < tlen) {
      slen = tlen;
-
      s = t;
      swap = -1;
      res = -res;
@ -914,14 +909,12 @@ static int ob_strnncollsp_simple(const ObCharsetInfo *cs
  return res;
 }

-
-
 static size_t ob_strnxfrm_simple(const ObCharsetInfo* cs __attribute__((unused)), unsigned char* dst, size_t dstlen,
-    uint nweights, const unsigned char* src, size_t srclen, unsigned int flags, bool* is_valid_unicode)
+    unsigned int nweights, const unsigned char* src, size_t srclen, unsigned int flags, bool* is_valid_unicode)
 {
-  uchar *dst0 = dst;
-  const uchar *end;
-  const uchar *remainder;
+  unsigned char *dst0 = dst;
+  const unsigned char *end;
+  const unsigned char *remainder;
  size_t frmlen;
  frmlen = dstlen > nweights ? nweights : dstlen;
  frmlen = frmlen > srclen ? srclen : frmlen;
@ -1030,9 +1023,8 @@ int ob_wildcmp_8bit(const ObCharsetInfo* cs, const char* str, const char* str_en
  return ob_wildcmp_8bit_impl(cs, str, str_end, wildstr, wildend, escape, w_one, w_many, 1);
 }

-
 uint32_t ob_instr_simple(const ObCharsetInfo* cs , const char* b, size_t b_length,
-    const char* s, size_t s_length, ob_match_t* match, uint nmatch)
+    const char* s, size_t s_length, ob_match_t* match, unsigned int nmatch)
 {
  register const unsigned char *str, *search, *end, *search_end;

@ -1081,8 +1073,6 @@ uint32_t ob_instr_simple(const ObCharsetInfo* cs , const char* b, size_t b_lengt
  return 0;
 }

-
-
 ObCollationHandler ob_collation_8bit_simple_ci_handler = {
    NULL, /* init */
    NULL,
@ -1099,6 +1089,4 @@ ObCollationHandler ob_collation_8bit_simple_ci_handler = {
    ob_propagate_simple};

 #undef likeconv
-#undef INC_PTR
-
-#endif
+#undef INC_PTR
--- a/deps/oblib/src/lib/charset/ob_ctype_uca.cc
+++ b/deps/oblib/src/lib/charset/ob_ctype_uca.cc
--- a/deps/oblib/src/lib/charset/ob_ctype_uca_tab.h
+++ b/deps/oblib/src/lib/charset/ob_ctype_uca_tab.h
--- a/deps/oblib/src/lib/charset/ob_ctype_utf16_os.cc
+++ b/deps/oblib/src/lib/charset/ob_ctype_utf16_os.cc
@ -1,3 +1,4 @@
+
 /** 
 * Copyright (c) 2021 OceanBase 
 * OceanBase CE is licensed under Mulan PubL v2. 
@ -10,21 +11,16 @@
 * See the Mulan PubL v2 for more details.
 */ 

-/*
- * (C) 2017-2020 Alibaba Group Holding Limited.
- *
- *  Authors:
- */
-#ifndef OB_BUILD_FULL_CHARSET
-
 #include "lib/charset/ob_ctype.h"
+#include "lib/charset/str_uca_type.h"
 #include "lib/charset/ob_dtoa.h"
+#include "lib/charset/ob_template_helper.h"

-#define OB_UTF16_HIGH_HEAD(x)  ((((unsigned char) (x)) & 0xFC) == 0xD8)
-#define OB_UTF16_LOW_HEAD(x)   ((((unsigned char) (x)) & 0xFC) == 0xDC)
+#define OB_UTF16_HIGH_HEAD(x)  ((((uchar) (x)) & 0xFC) == 0xD8)
+#define OB_UTF16_LOW_HEAD(x)   ((((uchar) (x)) & 0xFC) == 0xDC)
 #define OB_UTF16_SURROGATE(x)  (((x) & 0xF800) == 0xD800)

-#define OB_UTF16_WC2(a, begin)       ((a << 8) + begin)
+#define OB_UTF16_WC2(a, b)       ((a << 8) + b)

 static inline int
 ob_bincmp(const unsigned char *str, const unsigned char *se,
@ -140,7 +136,7 @@ ob_utf16_uni(const ObCharsetInfo *cs __attribute__((unused)),
    } else {
      *pwc= OB_UTF16_WC4(str[0], str[1], str[2], str[3]);
      return 4;
-    }
+  }
  } else if (OB_UTF16_LOW_HEAD(*str)) {
    return OB_CS_ILSEQ;
  } else {
@ -162,7 +158,7 @@ ob_uni_utf16(const ObCharsetInfo *cs __attribute__((unused)),
      *str++= (unsigned char) (wc >> 8);
      *str= (unsigned char) (wc & 0xFF);
      return 2;
-    }
+  }
  } else if (wc <= 0x10FFFF) {
    if (4 > end - str) {
      return OB_CS_TOOSMALL4;
@ -295,7 +291,7 @@ ob_strntol_mb2_or_mb4(const ObCharsetInfo *cs,
        //do nothing
      } else {
        break;
-      } 
+      }
    } else  {
      if (end_ptr != NULL) *end_ptr= (char*) str;
      err[0]= (cnv==OB_CS_ILSEQ) ? EILSEQ : EDOM;
@ -393,7 +389,7 @@ ob_strntoul_mb2_or_mb4(const ObCharsetInfo *cs,
        //do nothing
      } else {
        break;
-      } 
+      }
    } else  {
      if (NULL != end_ptr) {
        *end_ptr= (char*)str;
@ -456,7 +452,7 @@ ob_strntoul_mb2_or_mb4(const ObCharsetInfo *cs,
  return (negative ? -((long) res) : (long) res);
 }

-static longlong 
+static longlong
 ob_strntoll_mb2_or_mb4(const ObCharsetInfo *cs,
                       const char *nptr, size_t l, int base,
                       char **end_ptr, int *err)
@ -546,7 +542,7 @@ ob_strntoll_mb2_or_mb4(const ObCharsetInfo *cs,
  if (negative) {
    if (res  > (uint64_t) LONGLONG_MIN) {
      overflow = 1;
-    }
+  }
  } else if (res > (uint64_t) LONGLONG_MAX) {
    overflow = 1;
  }
@ -904,8 +900,8 @@ ob_strnncollsp_utf16(const ObCharsetInfo *cs,
    if (s_res <= 0 || t_res <= 0) {
      return ob_bincmp(str, se, t, te);
    } else {
-      ob_tosort_utf16(uni_plane, &s_wc);
-      ob_tosort_utf16(uni_plane, &t_wc);
+    ob_tosort_utf16(uni_plane, &s_wc);
+    ob_tosort_utf16(uni_plane, &t_wc);
    }
    if (s_wc != t_wc) {
      return s_wc > t_wc ? 1 : -1;
@ -1097,9 +1093,9 @@ ob_like_range_generic(const ObCharsetInfo *cs,
        } else {
          max_str+= res;
          wc= wc2;
-        } 
+        }
      }
-    }    
+    }
    res= cs->cset->wc_mb(cs, wc, (unsigned char*) min_str, (unsigned char*) min_end);
    if (res <= 0) {
      goto PAD_SET_LEN;
@ -1123,7 +1119,7 @@ PAD_MIN_MAX:
  res_length_diff= res_length % cs->mbminlen;
  cs->cset->fill(cs, min_str, min_end - min_str - res_length_diff, cs->min_sort_char);
  cs->cset->fill(cs, max_str, max_end - max_str - res_length_diff, cs->max_sort_char);
-  
+
  if (res_length_diff != 0) {
    memset(min_end - res_length_diff, 0, res_length_diff);
    memset(max_end - res_length_diff, 0, res_length_diff);
@ -1254,5 +1250,3 @@ ObCharsetInfo ob_charset_utf16_general_ci=
  &ob_collation_utf16_general_ci_handler,
  PAD_SPACE
 };
-
-#endif
--- a/deps/oblib/src/lib/charset/ob_ctype_utf8.cc
+++ b/deps/oblib/src/lib/charset/ob_ctype_utf8.cc
--- a/deps/oblib/src/lib/charset/ob_ctype_utf8_os.cc
+++ b/deps/oblib/src/lib/charset/ob_ctype_utf8_os.cc
--- a/deps/oblib/src/lib/charset/ob_ctype_utf8_tab.h
+++ b/deps/oblib/src/lib/charset/ob_ctype_utf8_tab.h
--- a/deps/oblib/src/lib/charset/ob_cypte_gb18030_tab.h
+++ b/deps/oblib/src/lib/charset/ob_cypte_gb18030_tab.h
--- a/deps/oblib/src/lib/charset/ob_dtoa_os.cc
+++ b/deps/oblib/src/lib/charset/ob_dtoa_os.cc
@ -1,3 +1,4 @@
+
 /** 
 * Copyright (c) 2021 OceanBase 
 * OceanBase CE is licensed under Mulan PubL v2. 
@ -8,19 +9,8 @@
 * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, 
 * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. 
 * See the Mulan PubL v2 for more details.
- */ 
-
-/*
-*
-* Version: $Id
-*
-* Authors:
-*      - initial release
-*
 */

-#ifndef OB_BUILD_FULL_CHARSET
-
 #include "lib/charset/ob_dtoa.h"
 #include "lib/charset/ob_mysql_global.h"

@ -48,51 +38,6 @@ size_t ob_fcvt_overflow(char *to, bool *error)
  return 1;
 }

-
-size_t ob_fcvt(double x, int precision, int width, char *to, bool *error)
-{
-  int decpt, sign;
-  char *res, *end, *dst= to, *dend= to + width;
-  char buf[DTOA_BUF_MAX_SIZE];
-  if (!(precision >= 0 && precision < 31 && to != NULL)) {
-    return 0;
-  }
-  res = dtoa(x, 5, precision, &decpt, &sign, &end, buf, sizeof(buf));
-  if (decpt == DTOA_OVERFLOW) {
-    dtoa_free(res, buf, sizeof(buf));
-    return ob_fcvt_overflow(to, error);
-  }
-  ob_fcvt_help(&end, &dst, &dend, sign, decpt, &precision, &res);
-  *dst= '\0';
-  if (error != NULL) {
-    *error= FALSE;
-  }
-  dtoa_free(res, buf, sizeof(buf));
-  return dst - to;
-}
-
-size_t ob_fcvt_opt(double x, int precision, int width, char *to, bool *error, bool add_padding_zero)
-{
-  int decpt, sign;
-  char *res, *end, *dst= to, *dend= to + width;
-  char buf[DTOA_BUF_MAX_SIZE];
-  if (!(precision >= 0 && precision < 31 && to != NULL)) {
-    return 0;
-  }
-  res = dtoa(x, 5, precision, &decpt, &sign, &end, buf, sizeof(buf));
-  if (decpt == DTOA_OVERFLOW) {
-    dtoa_free(res, buf, sizeof(buf));
-    return ob_fcvt_overflow(to, error);
-  }
-  ob_fcvt_help_opt(&end, &dst, &dend, sign, decpt, &precision, &res, add_padding_zero);
-  *dst= '\0';
-  if (error != NULL)
-    *error= FALSE;
-  dtoa_free(res, buf, sizeof(buf));
-  return dst - to;
-}
-
-
 void ob_fcvt_help(char **end, char **dst, char **dend, int sign, int decpt,
    int *precision, char **res)
 {
@ -133,46 +78,29 @@ void ob_fcvt_help(char **end, char **dst, char **dend, int sign, int decpt,
   }
 }

-void ob_fcvt_help_opt(char **end, char **dst, char **dend, int sign, int decpt,
-    int *precision, char **res, bool add_padding_zero)
-{
-   const int len = (*end) - (*res);
-   const char *dend_ptr = *dend;
-   char *dst_ptr = *dst;
-   char *src = (*res);
-   int i = 0;

-   if (dst_ptr < dend_ptr) {
-     if (sign)
-       *dst_ptr++= '-';
-     if (decpt <= 0)
-     {
-       if ((dst_ptr + 1) < dend_ptr) {
-         *dst_ptr++= '0';
-         *dst_ptr++= '.';
-       }
-       for (i= decpt; i < 0 && dst_ptr < dend_ptr; i++)
-         *dst_ptr++= '0';
-     }
-     for (i= 1; i <= len && dst_ptr < dend_ptr; i++)
-     {
-       *dst_ptr++= *src++;
-       if (i == decpt && i < len && dst_ptr < dend_ptr)
-         *dst_ptr++= '.';
-     }
-     while (i++ <= decpt && dst_ptr < dend_ptr)
-       *dst_ptr++= '0';
-     if (*precision > 0 && add_padding_zero)
-     {
-       if (len <= decpt && dst_ptr < dend_ptr)
-         *dst_ptr++= '.';
-       for (i= *precision - OB_MAX(0, (len - decpt)); i > 0 && dst_ptr < dend_ptr; i--)
-         *dst_ptr++= '0';
-     }
-     *dst = dst_ptr;
-   }
+size_t ob_fcvt(double x, int precision, int width, char *to, bool *error)
+{
+  int decpt, sign;
+  char *res, *end, *dst= to, *dend= to + width;
+  char buf[DTOA_BUF_MAX_SIZE];
+  if (!(precision >= 0 && precision < 31 && to != NULL)) {
+    return 0;
+  }
+  res = dtoa(x, 5, precision, &decpt, &sign, &end, buf, sizeof(buf));
+  if (decpt == DTOA_OVERFLOW) {
+    dtoa_free(res, buf, sizeof(buf));
+    return ob_fcvt_overflow(to, error);
+  }
+  ob_fcvt_help(&end, &dst, &dend, sign, decpt, &precision, &res);
+  *dst= '\0';
+  if (error != NULL)
+    *error= FALSE;
+  dtoa_free(res, buf, sizeof(buf));
+  return dst - to;
 }

+//=================================================================================

 size_t ob_gcvt_overflow(char *to, bool *error)
 {
@ -270,10 +198,12 @@ void ob_gcvt_help2(int *width, int *len, char **dend, char **src,

  const int need_check_buf = (*dend - *dst) < MAX_DOUBLE_SIZE;
  if (need_check_buf) {
+
    if (sign && dst_ptr < dend_ptr)
      *dst_ptr++= '-';
    if (dst_ptr < dend_ptr)
      *dst_ptr++= *src_ptr++;
+    //zero
    const int is_zero = (dst_ptr < dend_ptr && use_oracle_mode && (*(src_ptr - 1) == '0') && ((*len) == 1));
    if (is_zero) {
      if (sign) {
@ -482,11 +412,11 @@ typedef union { double d; ULong L[2]; } U;

 #if defined(WORDS_BIGENDIAN) || (defined(__FLOAT_WORD_ORDER) &&        \
                                 (__FLOAT_WORD_ORDER == __BIG_ENDIAN))
-COPY_BIGINT WORD0(x) (x)->L[0]
-#define WORD1(x) (x)->L[1]
+#define word0(x) (x)->L[0]
+#define word1(x) (x)->L[1]
 #else
-#define WORD0(x) (x)->L[1]
-#define WORD1(x) (x)->L[0]
+#define word0(x) (x)->L[1]
+#define word1(x) (x)->L[0]
 #endif

 #define dval(x) (x)->d
@ -520,7 +450,7 @@ COPY_BIGINT WORD0(x) (x)->L[0]
 #else
 #define Flt_Rounds 1
 #endif
-#endif /*Flt_Rounds*/
+#endif

 #ifdef Honor_FLT_ROUNDS
 #define Rounding rounding
@ -540,7 +470,7 @@ COPY_BIGINT WORD0(x) (x)->L[0]

 #define Kmax 15

-#define COPY_BIGINT(x,y) memcpy((char *)&x->sign, (char *)&y->sign,   \
+#define copy_bigint(x,y) memcpy((char *)&x->sign, (char *)&y->sign,   \
                          2*sizeof(int) + y->wds*sizeof(ULong))


@ -594,13 +524,15 @@ static Bigint *alloc_bigint(int k, ObStackAllocator *alloc)



+
 static void free_bigint(Bigint *v, ObStackAllocator *alloc)
 {
  if (v != NULL) {
-    char *g_ptr= (char*) v;                     
-    if (g_ptr < alloc->begin || g_ptr >= alloc->end) {
-      free(g_ptr);
+    char *gptr= (char*) v;
+    if (gptr < alloc->begin || gptr >= alloc->end) {
+      free(gptr);
    } else if (v->k <= Kmax) {
+
      v->p.next= alloc->freelist[v->k];
      alloc->freelist[v->k]= v;
    }
@ -608,6 +540,8 @@ static void free_bigint(Bigint *v, ObStackAllocator *alloc)
 }


+
+
 static char *dtoa_alloc(int i, ObStackAllocator *alloc)
 {
  char *rv;
@ -616,19 +550,26 @@ static char *dtoa_alloc(int i, ObStackAllocator *alloc)
    rv = alloc->free;
    alloc->free += aligned_size;
  } else {
-    rv = (char*)malloc(i);
+    rv = static_cast<char*>(malloc(i));
  }
  return rv;
 }

-static void dtoa_free(char *g_ptr, char *buf, size_t buf_size)
+
+
+
+static void dtoa_free(char *gptr, char *buf, size_t buf_size)
 {
-  if (g_ptr < buf || g_ptr >= buf + buf_size) {
-    free(g_ptr);
+  if (gptr < buf || gptr >= buf + buf_size) {
+    free(gptr);
  }
 }


+
+
+
+
 static Bigint *mult_and_add(Bigint *b, int m, int a, ObStackAllocator *alloc)
 {
  int i, wds;
@ -652,7 +593,7 @@ static Bigint *mult_and_add(Bigint *b, int m, int a, ObStackAllocator *alloc)
    if (wds >= b->maxwds)
    {
      b1= alloc_bigint(b->k+1, alloc);
-      COPY_BIGINT(b1, b);
+      copy_bigint(b1, b);
      free_bigint(b, alloc);
      b= b1;
    }
@ -1038,9 +979,9 @@ static double ulp(U *x)
  register Long L;
  U u;

-  L= (WORD0(x) & Exp_mask) - (P - 1)*Exp_msk1;
-  WORD0(&u) = L;
-  WORD1(&u) = 0;
+  L= (word0(x) & Exp_mask) - (P - 1)*Exp_msk1;
+  word0(&u) = L;
+  word1(&u) = 0;
  return dval(&u);
 }

@ -1050,8 +991,8 @@ static double b2d(Bigint *a, int *e)
  ULong *xa, *xa0, w, y, z;
  int k;
  U d;
-#define d0 WORD0(&d)
-#define d1 WORD1(&d)
+#define d0 word0(&d)
+#define d1 word1(&d)

  xa0= a->p.x;
  xa= xa0 + a->wds;
@ -1090,8 +1031,8 @@ static Bigint *d2b(U *d, int *e, int *bits, ObStackAllocator *alloc)
  int de, k;
  ULong *x, y, z;
  int i;
-#define d0 WORD0(d)
-#define d1 WORD1(d)
+#define d0 word0(d)
+#define d1 word1(d)

  b= alloc_bigint(1, alloc);
  x= b->p.x;
@ -1146,11 +1087,11 @@ static double ratio(Bigint *a, Bigint *b)
  dval(&db)= b2d(b, &kb);
  k= ka - kb + 32*(a->wds - b->wds);
  if (k > 0)
-    WORD0(&da)+= k*Exp_msk1;
+    word0(&da)+= k*Exp_msk1;
  else
  {
    k= -k;
-    WORD0(&db)+= k*Exp_msk1;
+    word0(&db)+= k*Exp_msk1;
  }
  return dval(&da) / dval(&db);
 }
@ -1438,16 +1379,16 @@ static double ob_strtod_int(const char *s00, char **se, int *error, char *buf, s
        {
        case 0:
        case 3:
-          WORD0(&rv)= Big0;
-          WORD1(&rv)= Big1;
+          word0(&rv)= Big0;
+          word1(&rv)= Big1;
          break;
        default:
-          WORD0(&rv)= Exp_mask;
-          WORD1(&rv)= 0;
+          word0(&rv)= Exp_mask;
+          word1(&rv)= 0;
        }
 #else 
-        WORD0(&rv)= Exp_mask;
-        WORD1(&rv)= 0;
+        word0(&rv)= Exp_mask;
+        word1(&rv)= 0;
 #endif 
 #ifdef SET_INEXACT
        dval(&rv0)= 1e300;
@ -1461,17 +1402,17 @@ static double ob_strtod_int(const char *s00, char **se, int *error, char *buf, s
      for(j= 0; e1 > 1; j++, e1>>= 1)
        if (e1 & 1)
          dval(&rv)*= bigtens[j];
-      WORD0(&rv)-= P*Exp_msk1;
+      word0(&rv)-= P*Exp_msk1;
      dval(&rv)*= bigtens[j];
-      if ((z= WORD0(&rv) & Exp_mask) > Exp_msk1 * (DBL_MAX_EXP + Bias - P))
+      if ((z= word0(&rv) & Exp_mask) > Exp_msk1 * (DBL_MAX_EXP + Bias - P))
        goto ovfl;
      if (z > Exp_msk1 * (DBL_MAX_EXP + Bias - 1 - P))
      {
-        WORD0(&rv)= Big0;
-        WORD1(&rv)= Big1;
+        word0(&rv)= Big0;
+        word1(&rv)= Big1;
      }
      else
-        WORD0(&rv)+= P*Exp_msk1;
+        word0(&rv)+= P*Exp_msk1;
    }
  }
  else if (e1 < 0)
@ -1488,18 +1429,18 @@ static double ob_strtod_int(const char *s00, char **se, int *error, char *buf, s
      for(j= 0; e1 > 0; j++, e1>>= 1)
        if (e1 & 1)
          dval(&rv)*= tinytens[j];
-      if (scale && (j = 2 * P + 1 - ((WORD0(&rv) & Exp_mask) >> Exp_shift)) > 0)
+      if (scale && (j = 2 * P + 1 - ((word0(&rv) & Exp_mask) >> Exp_shift)) > 0)
      {
        if (j >= 32)
        {
-          WORD1(&rv)= 0;
+          word1(&rv)= 0;
          if (j >= 53)
-            WORD0(&rv)= (P + 2) * Exp_msk1;
+            word0(&rv)= (P + 2) * Exp_msk1;
          else
-            WORD0(&rv)&= 0xffffffff << (j - 32);
+            word0(&rv)&= 0xffffffff << (j - 32);
        }
        else
-          WORD1(&rv)&= 0xffffffff << j;
+          word1(&rv)&= 0xffffffff << j;
      }
      if (!dval(&rv))
      {
@ -1517,8 +1458,8 @@ static double ob_strtod_int(const char *s00, char **se, int *error, char *buf, s
  for(;;)
  {
    bd= alloc_bigint(bd0->k, &alloc);
-    COPY_BIGINT(bd, bd0);
-    bb= d2b(&rv, &bbe, &bbbits, &alloc); 
+    copy_bigint(bd, bd0);
+    bb= d2b(&rv, &bbe, &bbbits, &alloc);
    bs= integer2bigint(1, &alloc);

    if (e >= 0)
@ -1541,7 +1482,7 @@ static double ob_strtod_int(const char *s00, char **se, int *error, char *buf, s
      bs2++;
 #endif
    j= bbe - scale;
-    i= j + bbbits - 1; 
+    i= j + bbbits - 1;
    if (i < Emin)  
      j+= P - Emin;
    else
@ -1600,9 +1541,9 @@ static double ob_strtod_int(const char *s00, char **se, int *error, char *buf, s
        else if (!dsign)
        {
          adj.d= -1.;
-          if (!WORD1(&rv) && !(WORD0(&rv) & Frac_mask))
+          if (!word1(&rv) && !(word0(&rv) & Frac_mask))
          {
-            y= WORD0(&rv) & Exp_mask;
+            y= word0(&rv) & Exp_mask;
            if (!scale || y > 2*P*Exp_msk1)
            {
              delta= left_shift(delta, Log2P, &alloc);
@ -1611,8 +1552,8 @@ static double ob_strtod_int(const char *s00, char **se, int *error, char *buf, s
            }
          }
 apply_adj:
-          if (scale && (y= WORD0(&rv) & Exp_mask) <= 2 * P * Exp_msk1)
-            WORD0(&adj)+= (2 * P + 1) * Exp_msk1 - y;
+          if (scale && (y= word0(&rv) & Exp_mask) <= 2 * P * Exp_msk1)
+            word0(&adj)+= (2 * P + 1) * Exp_msk1 - y;
          dval(&rv)+= adj.d * ulp(&rv);
        }
        break;
@ -1622,6 +1563,7 @@ static double ob_strtod_int(const char *s00, char **se, int *error, char *buf, s
        adj.d= 1.;
      if (adj.d <= 0x7ffffffe)
      {
+
        y= adj.d;
        if (y != adj.d)
        {
@ -1630,8 +1572,8 @@ static double ob_strtod_int(const char *s00, char **se, int *error, char *buf, s
          adj.d= y;
        }
      }
-      if (scale && (y= WORD0(&rv) & Exp_mask) <= 2 * P * Exp_msk1)
-        WORD0(&adj)+= (2 * P + 1) * Exp_msk1 - y;
+      if (scale && (y= word0(&rv) & Exp_mask) <= 2 * P * Exp_msk1)
+        word0(&adj)+= (2 * P + 1) * Exp_msk1 - y;
      adj.d*= ulp(&rv);
      if (dsign)
        dval(&rv)+= adj.d;
@ -1643,8 +1585,8 @@ static double ob_strtod_int(const char *s00, char **se, int *error, char *buf, s

    if (i < 0)
    {
-      if (dsign || WORD1(&rv) || WORD0(&rv) & Bndry_mask ||
-          (WORD0(&rv) & Exp_mask) <= (2 * P + 1) * Exp_msk1)
+      if (dsign || word1(&rv) || word0(&rv) & Bndry_mask ||
+          (word0(&rv) & Exp_mask) <= (2 * P + 1) * Exp_msk1)
      {
 #ifdef SET_INEXACT
        if (!delta->x[0] && delta->wds <= 1)
@ -1668,25 +1610,24 @@ static double ob_strtod_int(const char *s00, char **se, int *error, char *buf, s
    {
      if (dsign)
      {
-        if ((WORD0(&rv) & Bndry_mask1) == Bndry_mask1 &&
-            WORD1(&rv) ==
-            ((scale && (y = WORD0(&rv) & Exp_mask) <= 2 * P * Exp_msk1) ?
+        if ((word0(&rv) & Bndry_mask1) == Bndry_mask1 &&
+            word1(&rv) ==
+            ((scale && (y = word0(&rv) & Exp_mask) <= 2 * P * Exp_msk1) ?
             (0xffffffff & (0xffffffff << (2*P+1-(y>>Exp_shift)))) :
             0xffffffff))
        {
-          WORD0(&rv)= (WORD0(&rv) & Exp_mask) + Exp_msk1;
-          WORD1(&rv) = 0;
+          word0(&rv)= (word0(&rv) & Exp_mask) + Exp_msk1;
+          word1(&rv) = 0;
          dsign = 0;
          break;
        }
      }
-      else if (!(WORD0(&rv) & Bndry_mask) && !WORD1(&rv))
+      else if (!(word0(&rv) & Bndry_mask) && !word1(&rv))
      {
 drop_down:
-
        if (scale)
        {
-          L= WORD0(&rv) & Exp_mask;
+          L= word0(&rv) & Exp_mask;
          if (L <= (2 *P + 1) * Exp_msk1)
          {
            if (L > (P + 2) * Exp_msk1)
@ -1694,12 +1635,12 @@ static double ob_strtod_int(const char *s00, char **se, int *error, char *buf, s
            goto undfl;
          }
        }
-        L= (WORD0(&rv) & Exp_mask) - Exp_msk1;
-        WORD0(&rv)= L | Bndry_mask1;
-        WORD1(&rv)= 0xffffffff;
+        L= (word0(&rv) & Exp_mask) - Exp_msk1;
+        word0(&rv)= L | Bndry_mask1;
+        word1(&rv)= 0xffffffff;
        break;
      }
-      if (!(WORD1(&rv) & LSB))
+      if (!(word1(&rv) & LSB))
        break;
      if (dsign)
        dval(&rv)+= ulp(&rv);
@ -1716,9 +1657,9 @@ static double ob_strtod_int(const char *s00, char **se, int *error, char *buf, s
    {
      if (dsign)
        aadj= aadj1= 1.;
-      else if (WORD1(&rv) || WORD0(&rv) & Bndry_mask)
+      else if (word1(&rv) || word0(&rv) & Bndry_mask)
      {
-        if (WORD1(&rv) == Tiny1 && !WORD0(&rv))
+        if (word1(&rv) == Tiny1 && !word0(&rv))
          goto undfl;
        aadj= 1.;
        aadj1= -1.;
@ -1751,24 +1692,24 @@ static double ob_strtod_int(const char *s00, char **se, int *error, char *buf, s
        aadj1+= 0.5;
 #endif
    }
-    y= WORD0(&rv) & Exp_mask;
+    y= word0(&rv) & Exp_mask;

    if (y == Exp_msk1 * (DBL_MAX_EXP + Bias - 1))
    {
      dval(&rv0)= dval(&rv);
-      WORD0(&rv)-= P * Exp_msk1;
+      word0(&rv)-= P * Exp_msk1;
      adj.d= aadj1 * ulp(&rv);
      dval(&rv)+= adj.d;
-      if ((WORD0(&rv) & Exp_mask) >= Exp_msk1 * (DBL_MAX_EXP + Bias - P))
+      if ((word0(&rv) & Exp_mask) >= Exp_msk1 * (DBL_MAX_EXP + Bias - P))
      {
-        if (WORD0(&rv0) == Big0 && WORD1(&rv0) == Big1)
+        if (word0(&rv0) == Big0 && word1(&rv0) == Big1)
          goto ovfl;
-        WORD0(&rv)= Big0;
-        WORD1(&rv)= Big1;
+        word0(&rv)= Big0;
+        word1(&rv)= Big1;
        goto cont;
      }
      else
-        WORD0(&rv)+= P * Exp_msk1;
+        word0(&rv)+= P * Exp_msk1;
    }
    else
    {
@ -1782,7 +1723,7 @@ static double ob_strtod_int(const char *s00, char **se, int *error, char *buf, s
          aadj1= dsign ? aadj : -aadj;
        }
        dval(&aadj2) = aadj1;
-        WORD0(&aadj2)+= (2 * P + 1) * Exp_msk1 - y;
+        word0(&aadj2)+= (2 * P + 1) * Exp_msk1 - y;
        aadj1= dval(&aadj2);
        adj.d= aadj1 * ulp(&rv);
        dval(&rv)+= adj.d;
@ -1795,14 +1736,14 @@ static double ob_strtod_int(const char *s00, char **se, int *error, char *buf, s
        dval(&rv)+= adj.d;
      }
    }
-    z= WORD0(&rv) & Exp_mask;
+    z= word0(&rv) & Exp_mask;
 #ifndef SET_INEXACT
    if (!scale)
      if (y == z)
      {
        L= (Long)aadj;
        aadj-= L;
-        if (dsign || WORD1(&rv) || WORD0(&rv) & Bndry_mask)
+        if (dsign || word1(&rv) || word0(&rv) & Bndry_mask)
        {
          if (aadj < .4999999 || aadj > .5000001)
            break;
@ -1822,8 +1763,8 @@ static double ob_strtod_int(const char *s00, char **se, int *error, char *buf, s
  {
    if (!oldinexact)
    {
-      WORD0(&rv0)= Exp_1 + (70 << Exp_shift);
-      WORD1(&rv0)= 0;
+      word0(&rv0)= Exp_1 + (70 << Exp_shift);
+      word1(&rv0)= 0;
      dval(&rv0)+= 1.;
    }
  }
@ -1832,13 +1773,14 @@ static double ob_strtod_int(const char *s00, char **se, int *error, char *buf, s
 #endif
  if (scale)
  {
-    WORD0(&rv0)= Exp_1 - 2 * P * Exp_msk1;
-    WORD1(&rv0)= 0;
+    word0(&rv0)= Exp_1 - 2 * P * Exp_msk1;
+    word1(&rv0)= 0;
    dval(&rv)*= dval(&rv0);
  }
 #ifdef SET_INEXACT
-  if (inexact && !(WORD0(&rv) & Exp_mask))
+  if (inexact && !(word0(&rv) & Exp_mask))
  {
+
    dval(&rv0)= 1e-300;
    dval(&rv0)*= dval(&rv0);
  }
@ -1868,7 +1810,7 @@ static int quorem(Bigint *b, Bigint *S)
  sxe= sx + --n;
  bx= b->p.x;
  bxe= bx + n;
-  q= *bxe / (*sxe + 1); 
+  q= *bxe / (*sxe + 1);
  if (q)
  {
    borrow= 0;
@ -1941,16 +1883,15 @@ static char *dtoa(double dd, int mode, int ndigits, int *decpt, int *sign,
  memset(alloc.freelist, 0, sizeof(alloc.freelist));

  u.d= dd;
-  if (WORD0(&u) & Sign_bit)
+  if (word0(&u) & Sign_bit)
  {
    *sign= 1;
-    WORD0(&u) &= ~Sign_bit; 
+    word0(&u) &= ~Sign_bit;
  }
  else
    *sign= 0;

-
-  if (((WORD0(&u) & Exp_mask) == Exp_mask && (*decpt= DTOA_OVERFLOW)) ||
+  if (((word0(&u) & Exp_mask) == Exp_mask && (*decpt= DTOA_OVERFLOW)) ||
      (!dval(&u) && (*decpt= 1)))
  {
    char *res= (char*) dtoa_alloc(2, &alloc);
@ -1973,11 +1914,11 @@ static char *dtoa(double dd, int mode, int ndigits, int *decpt, int *sign,
 #endif

  b= d2b(&u, &be, &bbits, &alloc);
-  if ((i= (int)(WORD0(&u) >> Exp_shift1 & (Exp_mask>>Exp_shift1))))
+  if ((i= (int)(word0(&u) >> Exp_shift1 & (Exp_mask>>Exp_shift1))))
  {
    dval(&d2)= dval(&u);
-    WORD0(&d2) &= Frac_mask1;
-    WORD0(&d2) |= Exp_11;
+    word0(&d2) &= Frac_mask1;
+    word0(&d2) |= Exp_11;


    i-= Bias;
@ -1987,17 +1928,17 @@ static char *dtoa(double dd, int mode, int ndigits, int *decpt, int *sign,
  {

    i= bbits + be + (Bias + (P-1) - 1);
-    x= i > 32  ? WORD0(&u) << (64 - i) | WORD1(&u) >> (i - 32)
-      : WORD1(&u) << (32 - i);
+    x= i > 32  ? word0(&u) << (64 - i) | word1(&u) >> (i - 32)
+      : word1(&u) << (32 - i);
    dval(&d2)= x;
-    WORD0(&d2)-= 31*Exp_msk1; 
+    word0(&d2)-= 31*Exp_msk1;
    i-= (Bias + (P-1) - 1) + 1;
    denorm= 1;
  }
  ds= (dval(&d2)-1.5)*0.289529654602168 + 0.1760912590558 + i*0.301029995663981;
  k= (int)ds;
  if (ds < 0. && ds != k)
-    k--;    
+    k--;
  k_check= 1;
  if (k >= 0 && k <= Ten_pmax)
  {
@ -2079,7 +2020,7 @@ static char *dtoa(double dd, int mode, int ndigits, int *decpt, int *sign,
    dval(&d2)= dval(&u);
    k0= k;
    ilim0= ilim;
-    ieps= 2; 
+    ieps= 2;
    if (k > 0)
    {
      ds= tens[k&0xf];
@ -2122,7 +2063,7 @@ static char *dtoa(double dd, int mode, int ndigits, int *decpt, int *sign,
      ieps++;
    }
    dval(&eps)= ieps*dval(&u) + 7.;
-    WORD0(&eps)-= (P-1)*Exp_msk1;
+    word0(&eps)-= (P-1)*Exp_msk1;
    if (ilim == 0)
    {
      S= mhi= 0;
@ -2284,8 +2225,8 @@ bump_up:
 #endif
     )
  {
-    if (!WORD1(&u) && !(WORD0(&u) & Bndry_mask) &&
-        (WORD0(&u) & (Exp_mask & (~Exp_msk1)))
+    if (!word1(&u) && !(word0(&u) & Bndry_mask) &&
+        (word0(&u) & (Exp_mask & (~Exp_msk1)))
       )
    {
      b2+= Log2P;
@ -2347,7 +2288,7 @@ one_digit:
    if (spec_case)
    {
      mhi= alloc_bigint(mhi->k, &alloc);
-      COPY_BIGINT(mhi, mlo);
+      copy_bigint(mhi, mlo);
      mhi= left_shift(mhi, Log2P, &alloc);
    }

@ -2358,7 +2299,7 @@ one_digit:
      delta= bigint_diff(S, mhi, &alloc);
      j1= delta->sign ? 1 : bigint_cmp(b, delta);
      free_bigint(delta, &alloc);
-      if (j1 == 0 && mode != 1 && !(WORD1(&u) & 1)
+      if (j1 == 0 && mode != 1 && !(word1(&u) & 1)
 #ifdef Honor_FLT_ROUNDS
          && rounding >= 1
 #endif
@ -2371,7 +2312,7 @@ one_digit:
        *s++= dig;
        goto ret;
      }
-      if (j < 0 || (j == 0 && mode != 1 && !(WORD1(&u) & 1)))
+      if (j < 0 || (j == 0 && mode != 1 && !(word1(&u) & 1)))
      {
        if (!b->p.x[0] && b->wds <= 1)
        {
@ -2487,5 +2428,4 @@ ret1:
 }

 #undef P
-
-#endif
+#undef Rounding
--- a/deps/oblib/src/lib/charset/ob_gb18030_2022_tab.h
+++ b/deps/oblib/src/lib/charset/ob_gb18030_2022_tab.h
@ -46,38 +46,38 @@
  these arrays plus CHINESE_WEIGHT_BASE.
 */

-static const uint PINYIN_2_BYTE_START_2022 = 0x8140;
-static const uint PINYIN_2_BYTE_END_2022 = 0xFE9F;
+static const unsigned int PINYIN_2_BYTE_START_2022 = 0x8140;
+static const unsigned int PINYIN_2_BYTE_END_2022 = 0xFE9F;

-static const uint PINYIN_4_BYTE_1_START_2022 = 0x8138FD38;
-static const uint PINYIN_4_1_DIFF_2022 = 11328;
-static const uint PINYIN_4_BYTE_1_END_2022 = 0x82359737;
+static const unsigned int PINYIN_4_BYTE_1_START_2022 = 0x8138FD38;
+static const unsigned int PINYIN_4_1_DIFF_2022 = 11328;
+static const unsigned int PINYIN_4_BYTE_1_END_2022 = 0x82359737;

-static const uint PINYIN_4_BYTE_2_START_2022 = 0x95328236;
-static const uint PINYIN_4_2_DIFF_2022 = 254536;
-static const uint PINYIN_4_BYTE_2_END_2022 = 0x9A37F738;
+static const unsigned int PINYIN_4_BYTE_2_START_2022 = 0x95328236;
+static const unsigned int PINYIN_4_2_DIFF_2022 = 254536;
+static const unsigned int PINYIN_4_BYTE_2_END_2022 = 0x9A37F738;

-static const uint STROKE_2_BYTE_START_2022 = 0x8140;
-static const uint STROKE_2_BYTE_END_2022 = 0xFE9F;
+static const unsigned int STROKE_2_BYTE_START_2022 = 0x8140;
+static const unsigned int STROKE_2_BYTE_END_2022 = 0xFE9F;

-static const uint STROKE_4_BYTE_1_START_2022 = 0x8138FD38;
-static const uint STROKE_4_1_DIFF_2022 = 11328;
-static const uint STROKE_4_BYTE_1_END_2022 = 0x82359832;
+static const unsigned int STROKE_4_BYTE_1_START_2022 = 0x8138FD38;
+static const unsigned int STROKE_4_1_DIFF_2022 = 11328;
+static const unsigned int STROKE_4_BYTE_1_END_2022 = 0x82359832;

-static const uint STROKE_4_BYTE_2_START_2022 = 0x95328236;
-static const uint STROKE_4_2_DIFF_2022 = 254536;
-static const uint STROKE_4_BYTE_2_END_2022 = 0x9B31A337;
+static const unsigned int STROKE_4_BYTE_2_START_2022 = 0x95328236;
+static const unsigned int STROKE_4_2_DIFF_2022 = 254536;
+static const unsigned int STROKE_4_BYTE_2_END_2022 = 0x9B31A337;

-static const uint RADICAL_2_BYTE_START_2022 = 0x8140;
-static const uint RADICAL_2_BYTE_END_2022 = 0xFEA0;
+static const unsigned int RADICAL_2_BYTE_START_2022 = 0x8140;
+static const unsigned int RADICAL_2_BYTE_END_2022 = 0xFEA0;

-static const uint RADICAL_4_BYTE_1_START_2022 = 0x8139EE39;
-static const uint RADICAL_4_1_DIFF_2022 = 12439;
-static const uint RADICAL_4_BYTE_1_END_2022 = 0x8430AE33;
+static const unsigned int RADICAL_4_BYTE_1_START_2022 = 0x8139EE39;
+static const unsigned int RADICAL_4_1_DIFF_2022 = 12439;
+static const unsigned int RADICAL_4_BYTE_1_END_2022 = 0x8430AE33;

-static const uint RADICAL_4_BYTE_2_START_2022 = 0x95328236;
-static const uint RADICAL_4_2_DIFF_2022 = 254536;
-static const uint RADICAL_4_BYTE_2_END_2022 = 0x9B31A337;
+static const unsigned int RADICAL_4_BYTE_2_START_2022 = 0x95328236;
+static const unsigned int RADICAL_4_2_DIFF_2022 = 254536;
+static const unsigned int RADICAL_4_BYTE_2_END_2022 = 0x9B31A337;

 static const uint16 gb18030_2022_2_pinyin_weight_py[] = {
  16323, 28217, 34164, 10708, 21648, 4104,  28850, 6524,  26203, 18824, 39282, 1913,  15200, 13279, 14725, 10029,   /*[GB+8140, GB+814F]*/
--- a/deps/oblib/src/lib/charset/str_uca_type.h
+++ b/deps/oblib/src/lib/charset/str_uca_type.h
@ -0,0 +1,235 @@
+/**
+ * Copyright (c) 2021 OceanBase
+ * OceanBase CE is licensed under Mulan PubL v2.
+ * You can use this software according to the terms and conditions of the Mulan PubL v2.
+ * You may obtain a copy of Mulan PubL v2 at:
+ *          http://license.coscl.org.cn/MulanPubL-2.0
+ * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND,
+ * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
+ * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
+ * See the Mulan PubL v2 for more details.
+ */
+
+#ifndef STR_UCA_TYPE_H
+#define STR_UCA_TYPE_H
+
+#include <vector>
+
+/*
+  So far we have only Croatian collation needs to reorder Latin and
+  Cyrillic group of characters. May add more in future.
+*/
+#define UCA_MAX_CHAR_GRP 4
+enum enum_uca_ver { UCA_V400, UCA_V520, UCA_V900 };
+
+enum enum_char_grp {
+  CHARGRP_NONE,
+  CHARGRP_CORE,
+  CHARGRP_LATIN,
+  CHARGRP_CYRILLIC,
+  CHARGRP_ARAB,
+  CHARGRP_KANA,
+  CHARGRP_OTHERS
+};
+
+struct Weight_boundary {
+  uint16 begin;
+  uint16 end;
+};
+
+struct Reorder_wt_rec {
+  struct Weight_boundary old_wt_bdy;
+  struct Weight_boundary new_wt_bdy;
+};
+
+struct Reorder_param {
+  enum enum_char_grp reorder_grp[UCA_MAX_CHAR_GRP];
+  struct Reorder_wt_rec wt_rec[2 * UCA_MAX_CHAR_GRP];
+  int wt_rec_num;
+  uint16 max_weight;
+};
+
+enum enum_case_first { CASE_FIRST_OFF, CASE_FIRST_UPPER, CASE_FIRST_LOWER };
+
+struct Coll_param {
+  struct Reorder_param *reorder_param;
+  bool norm_enabled;  // false = normalization off, default;
+                      // true = on
+  enum enum_case_first case_first;
+};
+
+/*
+  NOTE: If you change OB_UCA_MAX_CONTRACTION, be sure to update the comment on
+  OB_UCA_CNT_MID1 in strings/uca_data.h, as it might cause us to run out of
+  bits in a byte flag.
+*/
+#define OB_UCA_MAX_CONTRACTION 6
+#define OB_UCA_MAX_WEIGHT_SIZE 25
+#define OB_UCA_WEIGHT_LEVELS 1
+
+/*
+  We store all the contractions in a trie, indexed on the codepoints they
+  consist of. The trie is organized as:
+  1. Each node stores one code point (ch) of contraction, and a list of nodes
+     (child_nodes) store all possible following code points.
+  2. The vector in ObUCAInfo stores a list of nodes which store the first
+     code points of all contractions.
+  3. Each node has a boolean value (is_contraction_tail) which shows
+     whether the code point stored in the node is the end of a contraction.
+     This is necessary because even if one code point is the end of a
+     contraction, there might be longer contraction contains all the
+     code points in the path (e.g., for Hungarian, both 'DZ' and 'DZS' are
+     contractions).
+  4. A contraction is formed by all the code points in the path until the
+     end of the contraction.
+  5. If it is the end of a contraction (is_contraction_tail == true), the
+     weight of this contraction is stored in array weight.
+  6. If it is the end of a contraction (is_contraction_tail == true),
+     with_context shows whether it is common contraction (with_context ==
+     false), or previous context contraction (with_context == true).
+  7. If it is the end of a contraction (is_contraction_tail == true),
+     contraction_len shows how many code points this contraction consists of.
+*/
+struct ObContraction {
+  ob_wc_t ch;
+  // Lists of following nodes.
+  std::vector<ObContraction> child_nodes;
+  std::vector<ObContraction> child_nodes_context;
+
+  // weight and with_context are only useful when is_contraction_tail is true.
+  uint16 weight[OB_UCA_MAX_WEIGHT_SIZE]; /* Its weight string, 0-terminated */
+  bool is_contraction_tail;
+  size_t contraction_len;
+};
+
+struct ObUCAInfo {
+  enum enum_uca_ver version;
+
+  // Collation weights.
+  ob_wc_t maxchar;
+  uchar *lengths;
+  uint16 **weights;
+  bool have_contractions;
+  std::vector<ObContraction> *contraction_nodes;
+  /*
+    contraction_flags is only used when a collation has contraction rule.
+    UCA collation supports at least 65535 characters, but only a few of
+    them can be part of contraction, it is huge waste of time to find out
+    whether one character is in contraction list for every character.
+    contraction_flags points to memory which is allocated when a collation
+    has contraction rule. For a character in contraction, its corresponding
+    byte (contraction_flags[ch & 0x1000]) will be set to a certain value
+    according to the position (head, tail or middle) of this character in
+    contraction. This byte will be used to quick check whether one character
+    can be part of contraction.
+  */
+  char *contraction_flags;
+
+  /* Logical positions */
+  ob_wc_t first_non_ignorable;
+  ob_wc_t last_non_ignorable;
+  ob_wc_t first_primary_ignorable;
+  ob_wc_t last_primary_ignorable;
+  ob_wc_t first_secondary_ignorable;
+  ob_wc_t last_secondary_ignorable;
+  ob_wc_t first_tertiary_ignorable;
+  ob_wc_t last_tertiary_ignorable;
+  ob_wc_t first_trailing;
+  ob_wc_t last_trailing;
+  ob_wc_t first_variable;
+  ob_wc_t last_variable;
+  /*
+    extra_ce_pri_base, extra_ce_sec_base and extra_ce_ter_base are only used for
+    the UCA collations whose UCA version is not smaller than UCA_V900. For why
+    we need this extra CE, please see the comment in my_char_weight_put_900()
+    and apply_primary_shift_900().
+
+    The value of these three variables is set by the definition of my_uca_v900.
+    The value of extra_ce_pri_base is usually 0x54A4 (which is the maximum
+    regular weight value pluses one, 0x54A3 + 1 = 0x54A4). But for the Chinese
+    collation, the extra_ce_pri_base needs to change. This is because 0x54A4 has
+    been occupied to do reordering. There might be weight conflict if we still
+    use 0x54A4. Please also see the comment on modify_all_zh_pages().
+   */
+  uint16 extra_ce_pri_base;  // Primary weight of extra CE
+  uint16 extra_ce_sec_base;  // Secondary weight of extra CE
+  uint16 extra_ce_ter_base;  // Tertiary weight of extra CE
+};
+
+#define OB_UCA_CNT_FLAG_SIZE 4096
+#define OB_UCA_CNT_FLAG_MASK 4095
+
+/** Whether the given character can be the first in any contraction. */
+#define OB_UCA_CNT_HEAD 1
+
+/** Whether the given character can be the last in any contraction. */
+#define OB_UCA_CNT_TAIL 2
+
+/**
+ Whether the given character can be the second in any contraction.
+
+ Also defined implicitly through shifting OB_UCA_CNT_MID1:
+
+ \#define OB_UCA_CNT_MID2  8
+ \#define OB_UCA_CNT_MID3  16
+ \#define OB_UCA_CNT_MID4  32
+
+ There's no need for OB_UCA_CNT_MID5 (which would cause us to run out of
+ bits) since OB_UCA_MAX_CONTRACTION is 6 (so head, four in the middle,
+ and then tail).
+*/
+#define OB_UCA_CNT_MID1 4
+
+/**
+ Whether the given character is the first part of a context-sensitive
+ contraction. Context-sensitive contractions are like normal contractions,
+ except that for performance reasons, they trigger on the _last_ character
+ instead of the first. The case given in Unicode TR35 is that in some
+ scripts (such as katakana in Japanese), "a-" should sort as "aa"
+ (except on the tertiary level), "e-" should sort as "ee" and so on.
+ However, adding regular contractions on "a" and "e" would cause undue
+ performance loss, so instead, we add a special "context-sensitive"
+ contraction on "-" that then looks at the _previous_ character.
+
+ We don't support context-sensitive contractions longer than two characters
+ at the moment, since none exist in CLDR. Thus, there is no
+ OB_UCA_PREVIOUS_CONTEXT_MID1 and so on.
+*/
+#define OB_UCA_PREVIOUS_CONTEXT_HEAD 64
+
+/** Similar to OB_UCA_PREVIOUS_CONTEXT_HEAD, just for the tail. */
+#define OB_UCA_PREVIOUS_CONTEXT_TAIL 128
+
+#define OB_UCA_PSHIFT 8
+
+/**
+  Check if a code point can be contraction head
+
+  @param flags    Pointer to UCA contraction flag data
+  @param wc       Code point
+
+  @retval   0 - cannot be contraction head
+  @retval   1 - can be contraction head
+*/
+
+inline bool ob_uca_can_be_contraction_head(const char *flags, ob_wc_t wc) {
+  return flags[wc & OB_UCA_CNT_FLAG_MASK] & OB_UCA_CNT_HEAD;
+}
+
+/**
+  Check if a code point can be contraction tail
+
+  @param flags    Pointer to UCA contraction flag data
+  @param wc       Code point
+
+  @retval   0 - cannot be contraction tail
+  @retval   1 - can be contraction tail
+*/
+
+inline bool ob_uca_can_be_contraction_tail(const char *flags, ob_wc_t wc) {
+  return flags[wc & OB_UCA_CNT_FLAG_MASK] & OB_UCA_CNT_TAIL;
+}
+
+const uint16 *ob_uca_contraction2_weight(
+    const std::vector<ObContraction> *cont_nodes, ob_wc_t wc1, ob_wc_t wc2);
+#endif
--- a/deps/oblib/src/lib/charset/uca900_data.h
+++ b/deps/oblib/src/lib/charset/uca900_data.h
--- a/deps/oblib/src/lib/charset/uca900_ja_data.h
+++ b/deps/oblib/src/lib/charset/uca900_ja_data.h
@ -0,0 +1,571 @@
+/**
+ * Copyright (c) 2021 OceanBase
+ * OceanBase CE is licensed under Mulan PubL v2.
+ * You can use this software according to the terms and conditions of the Mulan PubL v2.
+ * You may obtain a copy of Mulan PubL v2 at:
+ *          http://license.coscl.org.cn/MulanPubL-2.0
+ * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND,
+ * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
+ * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
+ * See the Mulan PubL v2 for more details.
+ */
+#ifndef UCA900_JA_DATA_H
+#define UCA900_JA_DATA_H
+
+// Quaternary weight of katakana.
+static constexpr int JA_KATA_QUAT_WEIGHT= 0x08;
+// Quaternary weight of hiragana.
+static constexpr int JA_HIRA_QUAT_WEIGHT= 0x02;
+static const char ja_cldr_30[]=
+  "&\\u309D <<<< \\u30FD"
+  "&[before 3]\\u3041 <<<\\u3041|\\u30FC=\\u3042|\\u30FC=\\u304B|\\u30FC"
+                       "=\\u3095|\\u30FC=\\u304C|\\u30FC=\\u3055|\\u30FC"
+                       "=\\u3056|\\u30FC=\\u305F|\\u30FC=\\u3060|\\u30FC"
+                       "=\\u306A|\\u30FC=\\u306F|\\u30FC=\\u3070|\\u30FC"
+                       "=\\u3071|\\u30FC=\\u307E|\\u30FC=\\u3083|\\u30FC"
+                       "=\\u3084|\\u30FC=\\u3089|\\u30FC=\\u308E|\\u30FC"
+                       "=\\u308F|\\u30FC"
+                    "<<<<\\u30A1|\\u30FC=\\uFF67|\\u30FC=\\u30A2|\\u30FC"
+                       "=\\uFF71|\\u30FC=\\u30AB|\\u30FC=\\uFF76|\\u30FC"
+                       "=\\u30AC|\\u30FC=\\u30B5|\\u30FC=\\uFF7B|\\u30FC"
+                       "=\\u30B6|\\u30FC=\\u30BF|\\u30FC=\\uFF80|\\u30FC"
+                       "=\\u30C0|\\u30FC=\\u30CA|\\u30FC=\\uFF85|\\u30FC"
+                       "=\\u30CF|\\u30FC=\\uFF8A|\\u30FC=\\u31F5|\\u30FC"
+                       "=\\u30D0|\\u30FC=\\u30D1|\\u30FC=\\u30DE|\\u30FC"
+                       "=\\uFF8F|\\u30FC=\\u30E3|\\u30FC=\\uFF6C|\\u30FC"
+                       "=\\u30E4|\\u30FC=\\uFF94|\\u30FC=\\u30E9|\\u30FC"
+                       "=\\uFF97|\\u30FC=\\u31FB|\\u30FC=\\u30EE|\\u30FC"
+                       "=\\u30EF|\\u30FC=\\uFF9C|\\u30FC=\\u30F5|\\u30FC"
+                       "=\\u30F7|\\u30FC"
+  "&[before 3]\\u3043 <<<\\u3043|\\u30FC=\\u3044|\\u30FC=\\u304D|\\u30FC"
+                       "=\\u304E|\\u30FC=\\u3057|\\u30FC=\\u3058|\\u30FC"
+                       "=\\u3061|\\u30FC=\\u3062|\\u30FC=\\u306B|\\u30FC"
+                       "=\\u3072|\\u30FC=\\u3073|\\u30FC=\\u3074|\\u30FC"
+                       "=\\u307F|\\u30FC=\\u308A|\\u30FC=\\u3090|\\u30FC"
+                    "<<<<\\u30A3|\\u30FC=\\uFF68|\\u30FC=\\u30A4|\\u30FC"
+                       "=\\uFF72|\\u30FC=\\u30AD|\\u30FC=\\uFF77|\\u30FC"
+                       "=\\u30AE|\\u30FC=\\u30B7|\\u30FC=\\uFF7C|\\u30FC"
+                       "=\\u31F1|\\u30FC=\\u30B8|\\u30FC=\\u30C1|\\u30FC"
+                       "=\\uFF81|\\u30FC=\\u30C2|\\u30FC=\\u30CB|\\u30FC"
+                       "=\\uFF86|\\u30FC=\\u30D2|\\u30FC=\\uFF8B|\\u30FC"
+                       "=\\u31F6|\\u30FC=\\u30D3|\\u30FC=\\u30D4|\\u30FC"
+                       "=\\u30DF|\\u30FC=\\uFF90|\\u30FC=\\u30EA|\\u30FC"
+                       "=\\uFF98|\\u30FC=\\u31FC|\\u30FC=\\u30F0|\\u30FC"
+                       "=\\u30F8|\\u30FC"
+  "&[before 3]\\u3045 <<<\\u3045|\\u30FC=\\u3046|\\u30FC=\\u304F|\\u30FC"
+                       "=\\u3050|\\u30FC=\\u3059|\\u30FC=\\u305A|\\u30FC"
+                       "=\\u3063|\\u30FC=\\u3064|\\u30FC=\\u3065|\\u30FC"
+                       "=\\u306C|\\u30FC=\\u3075|\\u30FC=\\u3076|\\u30FC"
+                       "=\\u3077|\\u30FC=\\u3080|\\u30FC=\\u3085|\\u30FC"
+                       "=\\u3086|\\u30FC=\\u308B|\\u30FC=\\u3094|\\u30FC"
+                    "<<<<\\u30A5|\\u30FC=\\uFF69|\\u30FC=\\u30A6|\\u30FC"
+                       "=\\uFF73|\\u30FC=\\u30AF|\\u30FC=\\uFF78|\\u30FC"
+                       "=\\u31F0|\\u30FC=\\u30B0|\\u30FC=\\u30B9|\\u30FC"
+                       "=\\uFF7D|\\u30FC=\\u31F2|\\u30FC=\\u30BA|\\u30FC"
+                       "=\\u30C3|\\u30FC=\\uFF6F|\\u30FC=\\u30C4|\\u30FC"
+                       "=\\uFF82|\\u30FC=\\u30C5|\\u30FC=\\u30CC|\\u30FC"
+                       "=\\uFF87|\\u30FC=\\u31F4|\\u30FC=\\u30D5|\\u30FC"
+                       "=\\uFF8C|\\u30FC=\\u31F7|\\u30FC=\\u30D6|\\u30FC"
+                       "=\\u30D7|\\u30FC=\\u30E0|\\u30FC=\\uFF91|\\u30FC"
+                       "=\\u31FA|\\u30FC=\\u30E5|\\u30FC=\\uFF6D|\\u30FC"
+                       "=\\u30E6|\\u30FC=\\uFF95|\\u30FC=\\u30EB|\\u30FC"
+                       "=\\uFF99|\\u30FC=\\u31FD|\\u30FC=\\u30F4|\\u30FC"
+  "&[before 3]\\u3047 <<<\\u3047|\\u30FC=\\u3048|\\u30FC=\\u3051|\\u30FC"
+                       "=\\u3096|\\u30FC=\\u3052|\\u30FC=\\u305B|\\u30FC"
+                       "=\\u305C|\\u30FC=\\u3066|\\u30FC=\\u3067|\\u30FC"
+                       "=\\u306D|\\u30FC=\\u3078|\\u30FC=\\u3079|\\u30FC"
+                       "=\\u307A|\\u30FC=\\u3081|\\u30FC=\\u308C|\\u30FC"
+                       "=\\u3091|\\u30FC"
+                    "<<<<\\u30A7|\\u30FC=\\uFF6A|\\u30FC=\\u30A8|\\u30FC"
+                       "=\\uFF74|\\u30FC=\\u30B1|\\u30FC=\\uFF79|\\u30FC"
+                       "=\\u30B2|\\u30FC=\\u30BB|\\u30FC=\\uFF7E|\\u30FC"
+                       "=\\u30BC|\\u30FC=\\u30C6|\\u30FC=\\uFF83|\\u30FC"
+                       "=\\u30C7|\\u30FC=\\u30CD|\\u30FC=\\uFF88|\\u30FC"
+                       "=\\u30D8|\\u30FC=\\uFF8D|\\u30FC=\\u31F8|\\u30FC"
+                       "=\\u30D9|\\u30FC=\\u30DA|\\u30FC=\\u30E1|\\u30FC"
+                       "=\\uFF92|\\u30FC=\\u30EC|\\u30FC=\\uFF9A|\\u30FC"
+                       "=\\u31FE|\\u30FC=\\u30F1|\\u30FC=\\u30F6|\\u30FC"
+                       "=\\u30F9|\\u30FC"
+  "&[before 3]\\u3049 <<<\\u3049|\\u30FC=\\u304A|\\u30FC=\\u3053|\\u30FC"
+                       "=\\u3054|\\u30FC=\\u305D|\\u30FC=\\u305E|\\u30FC"
+                       "=\\u3068|\\u30FC=\\u3069|\\u30FC=\\u306E|\\u30FC"
+                       "=\\u307B|\\u30FC=\\u307C|\\u30FC=\\u307D|\\u30FC"
+                       "=\\u3082|\\u30FC=\\u3087|\\u30FC=\\u3088|\\u30FC"
+                       "=\\u308D|\\u30FC=\\u3092|\\u30FC"
+                    "<<<<\\u30A9|\\u30FC=\\uFF6B|\\u30FC=\\u30AA|\\u30FC"
+                       "=\\uFF75|\\u30FC=\\u30B3|\\u30FC=\\uFF7A|\\u30FC"
+                       "=\\u30B4|\\u30FC=\\u30BD|\\u30FC=\\uFF7F|\\u30FC"
+                       "=\\u30BE|\\u30FC=\\u30C8|\\u30FC=\\uFF84|\\u30FC"
+                       "=\\u31F3|\\u30FC=\\u30C9|\\u30FC=\\u30CE|\\u30FC"
+                       "=\\uFF89|\\u30FC=\\u30DB|\\u30FC=\\uFF8E|\\u30FC"
+                       "=\\u31F9|\\u30FC=\\u30DC|\\u30FC=\\u30DD|\\u30FC"
+                       "=\\u30E2|\\u30FC=\\uFF93|\\u30FC=\\u30E7|\\u30FC"
+                       "=\\uFF6E|\\u30FC=\\u30E8|\\u30FC=\\uFF96|\\u30FC"
+                       "=\\u30ED|\\u30FC=\\uFF9B|\\u30FC=\\u31FF|\\u30FC"
+                       "=\\u30F2|\\u30FC=\\uFF66|\\u30FC=\\u30FA|\\u30FC"
+  "&[before 3]\\u3042 <<<\\u3042|\\u309D=\\u3041|\\u309D"
+                    "<<<<\\u30A2|\\u30FD=\\uFF71|\\u30FD=\\u30A1|\\u30FD"
+                       "=\\uFF67|\\u30FD"
+  "&[before 3]\\u3044 <<<\\u3044|\\u309D=\\u3043|\\u309D"
+                    "<<<<\\u30A4|\\u30FD=\\uFF72|\\u30FD=\\u30A3|\\u30FD"
+                       "=\\uFF68|\\u30FD"
+  "&[before 3]\\u3046 <<<\\u3046|\\u309D=\\u3045|\\u309D=\\u3094|\\u309D"
+                       "=\\u3046|\\u309E/\\u3099"
+                       "=\\u3045|\\u309E/\\u3099"
+                       "=\\u3094|\\u309E/\\u3099"
+                    "<<<<\\u30A6|\\u30FD=\\uFF73|\\u30FD=\\u30A5|\\u30FD"
+                       "=\\uFF69|\\u30FD=\\u30F4|\\u30FD"
+                       "=\\u30A6|\\u30FE/\\u3099"
+                       "=\\uFF73|\\u30FE/\\u3099"
+                       "=\\u30A5|\\u30FE/\\u3099"
+                       "=\\uFF69|\\u30FE/\\u3099"
+                       "=\\u30F4|\\u30FE/\\u3099"
+  "&[before 3]\\u3048 <<<\\u3048|\\u309D=\\u3047|\\u309D"
+                    "<<<<\\u30A8|\\u30FD=\\uFF74|\\u30FD=\\u30A7|\\u30FD"
+                       "=\\uFF6A|\\u30FD"
+  "&[before 3]\\u304A <<<\\u304A|\\u309D=\\u3049|\\u309D"
+                    "<<<<\\u30AA|\\u30FD=\\uFF75|\\u30FD=\\u30A9|\\u30FD"
+                       "=\\uFF6B|\\u30FD"
+  "&[before 3]\\u304B <<<\\u304B|\\u309D=\\u3095|\\u309D"
+                    "<<<<\\u30AB|\\u30FD=\\uFF76|\\u30FD=\\u30F5|\\u30FD"
+  "&[before 3]\\u304C <<<\\u304C|\\u309D <<<<\\u30AC|\\u30FD"
+  "&[before 3]\\u304D <<<\\u304D|\\u309D=\\u304E|\\u309D"
+                       "=\\u304D|\\u309E/\\u3099"
+                       "=\\u304E|\\u309E/\\u3099"
+                    "<<<<\\u30AD|\\u30FD=\\uFF77|\\u30FD=\\u30AE|\\u30FD"
+                       "=\\u30AD|\\u30FE/\\u3099"
+                       "=\\uFF77|\\u30FE/\\u3099"
+                       "=\\u30AE|\\u30FE/\\u3099"
+  "&[before 3]\\u304F <<<\\u304F|\\u309D=\\u3050|\\u309D"
+                       "=\\u304F|\\u309E/\\u3099"
+                       "=\\u3050|\\u309E/\\u3099"
+                    "<<<<\\u30AF|\\u30FD=\\uFF78|\\u30FD=\\u31F0|\\u30FD"
+                       "=\\u30B0|\\u30FD=\\u30AF|\\u30FE/\\u3099"
+                       "=\\uFF78|\\u30FE/\\u3099"
+                       "=\\u31F0|\\u30FE/\\u3099"
+                       "=\\u30B0|\\u30FE/\\u3099"
+  "&[before 3]\\u3051 <<<\\u3051|\\u309D=\\u3096|\\u309D"
+                    "<<<<\\u30B1|\\u30FD=\\uFF79|\\u30FD=\\u30F6|\\u30FD"
+  "&[before 3]\\u3052 <<<\\u3052|\\u309D <<<<\\u30B2|\\u30FD"
+  "&[before 3]\\u3053 <<<\\u3053|\\u309D=\\u3054|\\u309D"
+                       "=\\u3053|\\u309E/\\u3099"
+                       "=\\u3054|\\u309E/\\u3099"
+                    "<<<<\\u30B3|\\u30FD=\\uFF7A|\\u30FD=\\u30B4|\\u30FD"
+                       "=\\u30B3|\\u30FE/\\u3099"
+                       "=\\uFF7A|\\u30FE/\\u3099"
+                       "=\\u30B4|\\u30FE/\\u3099"
+  "&[before 3]\\u3055 <<<\\u3055|\\u309D=\\u3056|\\u309D"
+                       "=\\u3055|\\u309E/\\u3099"
+                       "=\\u3056|\\u309E/\\u3099"
+                    "<<<<\\u30B5|\\u30FD=\\uFF7B|\\u30FD=\\u30B6|\\u30FD"
+                       "=\\u30B5|\\u30FE/\\u3099"
+                       "=\\uFF7B|\\u30FE/\\u3099"
+                       "=\\u30B6|\\u30FE/\\u3099"
+  "&[before 3]\\u3057 <<<\\u3057|\\u309D=\\u3058|\\u309D"
+                       "=\\u3057|\\u309E/\\u3099"
+                       "=\\u3058|\\u309E/\\u3099"
+                    "<<<<\\u30B7|\\u30FD=\\uFF7C|\\u30FD=\\u31F1|\\u30FD"
+                       "=\\u30B8|\\u30FD=\\u30B7|\\u30FE/\\u3099"
+                       "=\\uFF7C|\\u30FE/\\u3099"
+                       "=\\u31F1|\\u30FE/\\u3099"
+                       "=\\u30B8|\\u30FE/\\u3099"
+  "&[before 3]\\u3059 <<<\\u3059|\\u309D=\\u305A|\\u309D"
+                       "=\\u3059|\\u309E/\\u3099"
+                       "=\\u305A|\\u309E/\\u3099"
+                    "<<<<\\u30B9|\\u30FD=\\uFF7D|\\u30FD=\\u31F2|\\u30FD"
+                       "=\\u30BA|\\u30FD=\\u30B9|\\u30FE/\\u3099"
+                       "=\\uFF7D|\\u30FE/\\u3099"
+                       "=\\u31F2|\\u30FE/\\u3099"
+                       "=\\u30BA|\\u30FE/\\u3099"
+  "&[before 3]\\u305B <<<\\u305B|\\u309D=\\u305C|\\u309D"
+                       "=\\u305B|\\u309E/\\u3099"
+                       "=\\u305C|\\u309E/\\u3099"
+                    "<<<<\\u30BB|\\u30FD=\\uFF7E|\\u30FD=\\u30BC|\\u30FD"
+                       "=\\u30BB|\\u30FE/\\u3099"
+                       "=\\uFF7E|\\u30FE/\\u3099"
+                       "=\\u30BC|\\u30FE/\\u3099"
+  "&[before 3]\\u305D <<<\\u305D|\\u309D=\\u305E|\\u309D"
+                       "=\\u305D|\\u309E/\\u3099"
+                       "=\\u305E|\\u309E/\\u3099"
+                    "<<<<\\u30BD|\\u30FD=\\uFF7F|\\u30FD=\\u30BE|\\u30FD"
+                       "=\\u30BD|\\u30FE/\\u3099"
+                       "=\\uFF7F|\\u30FE/\\u3099"
+                       "=\\u30BE|\\u30FE/\\u3099"
+  "&[before 3]\\u305F <<<\\u305F|\\u309D=\\u3060|\\u309D"
+                       "=\\u305F|\\u309E/\\u3099"
+                       "=\\u3060|\\u309E/\\u3099"
+                    "<<<<\\u30BF|\\u30FD=\\uFF80|\\u30FD=\\u30C0|\\u30FD"
+                       "=\\u30BF|\\u30FE/\\u3099"
+                       "=\\uFF80|\\u30FE/\\u3099"
+                       "=\\u30C0|\\u30FE/\\u3099"
+  "&[before 3]\\u3061 <<<\\u3061|\\u309D=\\u3062|\\u309D"
+                       "=\\u3061|\\u309E/\\u3099"
+                       "=\\u3062|\\u309E/\\u3099"
+                    "<<<<\\u30C1|\\u30FD=\\uFF81|\\u30FD=\\u30C2|\\u30FD"
+                       "=\\u30C1|\\u30FE/\\u3099"
+                       "=\\uFF81|\\u30FE/\\u3099"
+                       "=\\u30C2|\\u30FE/\\u3099"
+  "&[before 3]\\u3064 <<<\\u3064|\\u309D=\\u3063|\\u309D=\\u3065|\\u309D"
+                       "=\\u3064|\\u309E/\\u3099"
+                       "=\\u3065|\\u309E/\\u3099"
+                       "=\\u3064|\\u309D=\\u3063|\\u309E/\\u3099"
+                       "=\\u3064|\\u309E/\\u3099"
+                    "<<<<\\u30C4|\\u30FD=\\uFF82|\\u30FD=\\u30C3|\\u30FD"
+                       "=\\uFF6F|\\u30FD=\\u30C5|\\u30FD"
+                       "=\\u30C4|\\u30FE/\\u3099"
+                       "=\\uFF82|\\u30FE/\\u3099"
+                       "=\\u30C5|\\u30FE/\\u3099=\\u30C4|\\u30FD"
+                       "=\\uFF82|\\u30FD=\\u30C3|\\u30FE/\\u3099"
+                       "=\\uFF6F|\\u30FE/\\u3099"
+                       "=\\u30C4|\\u30FE/\\u3099"
+                       "=\\uFF82|\\u30FE/\\u3099"
+  "&[before 3]\\u3066 <<<\\u3066|\\u309D=\\u3067|\\u309D"
+                       "=\\u3066|\\u309E/\\u3099"
+                       "=\\u3067|\\u309E/\\u3099"
+                    "<<<<\\u30C6|\\u30FD=\\uFF83|\\u30FD=\\u30C7|\\u30FD"
+                       "=\\u30C6|\\u30FE/\\u3099"
+                       "=\\uFF83|\\u30FE/\\u3099"
+                       "=\\u30C7|\\u30FE/\\u3099"
+  "&[before 3]\\u3068 <<<\\u3068|\\u309D=\\u3069|\\u309D"
+                       "=\\u3068|\\u309E/\\u3099"
+                       "=\\u3069|\\u309E/\\u3099"
+                    "<<<<\\u30C8|\\u30FD=\\uFF84|\\u30FD=\\u31F3|\\u30FD"
+                       "=\\u30C9|\\u30FD=\\u30C8|\\u30FE/\\u3099"
+                       "=\\uFF84|\\u30FE/\\u3099"
+                       "=\\u31F3|\\u30FE/\\u3099"
+                       "=\\u30C9|\\u30FE/\\u3099"
+  "&[before 3]\\u306A <<<\\u306A|\\u309D <<<<\\u30CA|\\u30FD=\\uFF85|\\u30FD"
+  "&[before 3]\\u306B <<<\\u306B|\\u309D <<<<\\u30CB|\\u30FD=\\uFF86|\\u30FD"
+  "&[before 3]\\u306C <<<\\u306C|\\u309D <<<<\\u30CC|\\u30FD=\\uFF87|\\u30FD"
+                       "=\\u31F4|\\u30FD"
+  "&[before 3]\\u306D <<<\\u306D|\\u309D <<<<\\u30CD|\\u30FD=\\uFF88|\\u30FD"
+  "&[before 3]\\u306E <<<\\u306E|\\u309D <<<<\\u30CE|\\u30FD=\\uFF89|\\u30FD"
+  "&[before 3]\\u306F <<<\\u306F|\\u309D=\\u3070|\\u309D"
+                       "=\\u306F|\\u309E/\\u3099"
+                       "=\\u3070|\\u309E/\\u3099"
+                       "=\\u3071|\\u309D=\\u3071|\\u309E/\\u3099"
+                    "<<<<\\u30CF|\\u30FD=\\uFF8A|\\u30FD=\\u31F5|\\u30FD"
+                       "=\\u30D0|\\u30FD=\\u30CF|\\u30FE/\\u3099"
+                       "=\\uFF8A|\\u30FE/\\u3099"
+                       "=\\u31F5|\\u30FE/\\u3099"
+                       "=\\u30D0|\\u30FE/\\u3099=\\u30D1|\\u30FD"
+                       "=\\u30D1|\\u30FE/\\u3099"
+  "&[before 3]\\u3072 <<<\\u3072|\\u309D=\\u3073|\\u309D"
+                       "=\\u3072|\\u309E/\\u3099"
+                       "=\\u3073|\\u309E/\\u3099"
+                       "=\\u3074|\\u309D=\\u3074|\\u309E/\\u3099"
+                    "<<<<\\u30D2|\\u30FD=\\uFF8B|\\u30FD=\\u31F6|\\u30FD"
+                       "=\\u30D3|\\u30FD=\\u30D2|\\u30FE/\\u3099"
+                       "=\\uFF8B|\\u30FE/\\u3099"
+                       "=\\u31F6|\\u30FE/\\u3099"
+                       "=\\u30D3|\\u30FE/\\u3099=\\u30D4|\\u30FD"
+                       "=\\u30D4|\\u30FE/\\u3099"
+  "&[before 3]\\u3075 <<<\\u3075|\\u309D=\\u3076|\\u309D"
+                       "=\\u3075|\\u309E/\\u3099"
+                       "=\\u3076|\\u309E/\\u3099"
+                       "=\\u3077|\\u309D=\\u3077|\\u309E/\\u3099"
+                    "<<<<\\u30D5|\\u30FD=\\uFF8C|\\u30FD=\\u31F7|\\u30FD"
+                       "=\\u30D6|\\u30FD=\\u30D5|\\u30FE/\\u3099"
+                       "=\\uFF8C|\\u30FE/\\u3099"
+                       "=\\u31F7|\\u30FE/\\u3099"
+                       "=\\u30D6|\\u30FE/\\u3099=\\u30D7|\\u30FD"
+                       "=\\u30D7|\\u30FE/\\u3099"
+  "&[before 3]\\u3078 <<<\\u3078|\\u309D=\\u3079|\\u309D"
+                       "=\\u3078|\\u309E/\\u3099"
+                       "=\\u3079|\\u309E/\\u3099"
+                       "=\\u307A|\\u309D=\\u307A|\\u309E/\\u3099"
+                    "<<<<\\u30D8|\\u30FD=\\uFF8D|\\u30FD=\\u31F8|\\u30FD"
+                       "=\\u30D9|\\u30FD=\\u30D8|\\u30FE/\\u3099"
+                       "=\\uFF8D|\\u30FE/\\u3099"
+                       "=\\u31F8|\\u30FE/\\u3099"
+                       "=\\u30D9|\\u30FE/\\u3099=\\u30DA|\\u30FD"
+                       "=\\u30DA|\\u30FE/\\u3099"
+  "&[before 3]\\u307B <<<\\u307B|\\u309D=\\u307C|\\u309D"
+                       "=\\u307B|\\u309E/\\u3099"
+                       "=\\u307C|\\u309E/\\u3099"
+                       "=\\u307D|\\u309D=\\u307D|\\u309E/\\u3099"
+                    "<<<<\\u30DB|\\u30FD=\\uFF8E|\\u30FD=\\u31F9|\\u30FD"
+                       "=\\u30DC|\\u30FD=\\u30DB|\\u30FE/\\u3099"
+                       "=\\uFF8E|\\u30FE/\\u3099"
+                       "=\\u31F9|\\u30FE/\\u3099"
+                       "=\\u30DC|\\u30FE/\\u3099=\\u30DD|\\u30FD"
+                       "=\\u30DD|\\u30FE/\\u3099"
+  "&[before 3]\\u307E <<<\\u307E|\\u309D <<<<\\u30DE|\\u30FD=\\uFF8F|\\u30FD"
+  "&[before 3]\\u307F <<<\\u307F|\\u309D <<<<\\u30DF|\\u30FD=\\uFF90|\\u30FD"
+  "&[before 3]\\u3080 <<<\\u3080|\\u309D <<<<\\u30E0|\\u30FD=\\uFF91|\\u30FD"
+                       "=\\u31FA|\\u30FD"
+  "&[before 3]\\u3081 <<<\\u3081|\\u309D <<<<\\u30E1|\\u30FD=\\uFF92|\\u30FD"
+  "&[before 3]\\u3082 <<<\\u3082|\\u309D <<<<\\u30E2|\\u30FD=\\uFF93|\\u30FD"
+  "&[before 3]\\u3084 <<<\\u3084|\\u309D=\\u3083|\\u309D <<<<\\u30E4|\\u30FD"
+                       "=\\uFF94|\\u30FD=\\u30E3|\\u30FD=\\uFF6C|\\u30FD"
+  "&[before 3]\\u3086 <<<\\u3086|\\u309D=\\u3085|\\u309D <<<<\\u30E6|\\u30FD"
+                       "=\\uFF95|\\u30FD=\\u30E5|\\u30FD=\\uFF6D|\\u30FD"
+  "&[before 3]\\u3088 <<<\\u3088|\\u309D=\\u3087|\\u309D <<<<\\u30E8|\\u30FD"
+                       "=\\uFF96|\\u30FD=\\u30E7|\\u30FD=\\uFF6E|\\u30FD"
+  "&[before 3]\\u3089 <<<\\u3089|\\u309D <<<<\\u30E9|\\u30FD=\\uFF97|\\u30FD"
+                       "=\\u31FB|\\u30FD"
+  "&[before 3]\\u308A <<<\\u308A|\\u309D <<<<\\u30EA|\\u30FD=\\uFF98|\\u30FD"
+                       "=\\u31FC|\\u30FD"
+  "&[before 3]\\u308B <<<\\u308B|\\u309D <<<<\\u30EB|\\u30FD=\\uFF99|\\u30FD"
+                       "=\\u31FD|\\u30FD"
+  "&[before 3]\\u308C <<<\\u308C|\\u309D <<<<\\u30EC|\\u30FD=\\uFF9A|\\u30FD"
+                       "=\\u31FE|\\u30FD"
+  "&[before 3]\\u308D <<<\\u308D|\\u309D <<<<\\u30ED|\\u30FD=\\uFF9B|\\u30FD"
+                       "=\\u31FF|\\u30FD"
+  "&[before 3]\\u308F <<<\\u308F|\\u309D=\\u308E|\\u309D"
+                       "=\\u308F|\\u309E/\\u3099"
+                       "=\\u308E|\\u309E/\\u3099"
+                    "<<<<\\u30EF|\\u30FD=\\uFF9C|\\u30FD=\\u30EE|\\u30FD"
+                       "=\\u30F7|\\u30FD=\\u30EF|\\u30FE/\\u3099"
+                       "=\\uFF9C|\\u30FE/\\u3099"
+                       "=\\u30F7|\\u30FE/\\u3099"
+                       "=\\u30EE|\\u30FE/\\u3099"
+  "&[before 3]\\u3090 <<<\\u3090|\\u309D=\\u3090|\\u309E/\\u3099"
+                    "<<<<\\u30F0|\\u30FD=\\u30F8|\\u30FD"
+                       "=\\u30F0|\\u30FE/\\u3099"
+                       "=\\u30F8|\\u30FE/\\u3099"
+  "&[before 3]\\u3091 <<<\\u3091|\\u309D=\\u3091|\\u309E/\\u3099"
+                    "<<<<\\u30F1|\\u30FD=\\u30F9|\\u30FD"
+                       "=\\u30F1|\\u30FE/\\u3099"
+                       "=\\u30F9|\\u30FE/\\u3099"
+  "&[before 3]\\u3092 <<<\\u3092|\\u309D=\\u3092|\\u309E/\\u3099"
+                    "<<<<\\u30F2|\\u30FD=\\uFF66|\\u30FD=\\u30FA|\\u30FD"
+                       "=\\u30F2|\\u30FE/\\u3099"
+                       "=\\uFF66|\\u30FE/\\u3099"
+                       "=\\u30FA|\\u30FE/\\u3099"
+  "&[before 3]\\u3093 <<<\\u3093|\\u309D <<<<\\u30F3|\\u30FD=\\uFF9D|\\u30FD"
+  "&\\u3041 <<<<\\u30A1=\\uFF67"
+  "&\\u3042 <<<<\\u30A2=\\uFF71"
+  "&\\u3043 <<<<\\u30A3=\\uFF68"
+  "&\\u3044 <<<<\\u30A4=\\uFF72"
+  "&\\u3045 <<<<\\u30A5=\\uFF69"
+  "&\\u3046 <<<<\\u30A6=\\uFF73"
+  "&\\u3047 <<<<\\u30A7=\\uFF6A"
+  "&\\u3048 <<<<\\u30A8=\\uFF74"
+  "&\\u3049 <<<<\\u30A9=\\uFF6B"
+  "&\\u304A <<<<\\u30AA=\\uFF75"
+  "&\\u304B <<<<\\u30AB=\\uFF76"
+  "&\\u304D <<<<\\u30AD=\\uFF77"
+  "&\\u304F <<<<\\u30AF=\\uFF78"
+  "&\\u3051 <<<<\\u30B1=\\uFF79"
+  "&\\u3053 <<<<\\u30B3=\\uFF7A"
+  "&\\u3055 <<<<\\u30B5=\\uFF7B"
+  "&\\u3057 <<<<\\u30B7=\\uFF7C"
+  "&\\u3059 <<<<\\u30B9=\\uFF7D"
+  "&\\u305B <<<<\\u30BB=\\uFF7E"
+  "&\\u305D <<<<\\u30BD=\\uFF7F"
+  "&\\u305F <<<<\\u30BF=\\uFF80"
+  "&\\u3061 <<<<\\u30C1=\\uFF81"
+  "&\\u3063 <<<<\\u30C3=\\uFF6F"
+  "&\\u3064 <<<<\\u30C4=\\uFF82"
+  "&\\u3066 <<<<\\u30C6=\\uFF83"
+  "&\\u3068 <<<<\\u30C8=\\uFF84"
+  "&\\u306A <<<<\\u30CA=\\uFF85"
+  "&\\u306B <<<<\\u30CB=\\uFF86"
+  "&\\u306C <<<<\\u30CC=\\uFF87"
+  "&\\u306D <<<<\\u30CD=\\uFF88"
+  "&\\u306E <<<<\\u30CE=\\uFF89"
+  "&\\u306F <<<<\\u30CF=\\uFF8A"
+  "&\\u3072 <<<<\\u30D2=\\uFF8B"
+  "&\\u3075 <<<<\\u30D5=\\uFF8C"
+  "&\\u3078 <<<<\\u30D8=\\uFF8D"
+  "&\\u307B <<<<\\u30DB=\\uFF8E"
+  "&\\u307E <<<<\\u30DE=\\uFF8F"
+  "&\\u307F <<<<\\u30DF=\\uFF90"
+  "&\\u3080 <<<<\\u30E0=\\uFF91"
+  "&\\u3081 <<<<\\u30E1=\\uFF92"
+  "&\\u3082 <<<<\\u30E2=\\uFF93"
+  "&\\u3083 <<<<\\u30E3=\\uFF6C"
+  "&\\u3084 <<<<\\u30E4=\\uFF94"
+  "&\\u3085 <<<<\\u30E5=\\uFF6D"
+  "&\\u3086 <<<<\\u30E6=\\uFF95"
+  "&\\u3087 <<<<\\u30E7=\\uFF6E"
+  "&\\u3088 <<<<\\u30E8=\\uFF96"
+  "&\\u3089 <<<<\\u30E9=\\uFF97"
+  "&\\u308A <<<<\\u30EA=\\uFF98"
+  "&\\u308B <<<<\\u30EB=\\uFF99"
+  "&\\u308C <<<<\\u30EC=\\uFF9A"
+  "&\\u308D <<<<\\u30ED=\\uFF9B"
+  "&\\u308E <<<<\\u30EE"
+  "&\\u308F <<<<\\u30EF=\\uFF9C"
+  "&\\u3090 <<<<\\u30F0"
+  "&\\u3091 <<<<\\u30F1"
+  "&\\u3092 <<<<\\u30F2=\\uFF66"
+  "&\\u3093 <<<<\\u30F3=\\uFF9D"
+  "&\\u3095 <<<<\\u30F5"
+  "&\\u3096 <<<<\\u30F6"
+  "&\\u3088\\u308A <<\\u309F"
+  "&\\u30B3\\u30C8 <<\\u30FF"
+  "&\\u0020=\\u3000=\\uFFE3"
+  "&\\u0021=\\uFF01"
+  "&\\u0022=\\uFF02"
+  "&\\u0023=\\uFF03"
+  "&\\u0024=\\uFF04"
+  "&\\u0025=\\uFF05"
+  "&\\u0026=\\uFF06"
+  "&\\u0027=\\uFF07"
+  "&\\u0028=\\uFF08"
+  "&\\u0029=\\uFF09"
+  "&\\u002A=\\uFF0A"
+  "&\\u002B=\\uFF0B"
+  "&\\u002C=\\uFF0C"
+  "&\\u002D=\\uFF0D"
+  "&\\u002E=\\uFF0E"
+  "&\\u002F=\\uFF0F"
+  "&0=\\uFF10"
+  "&1=\\uFF11"
+  "&2=\\uFF12"
+  "&3=\\uFF13"
+  "&4=\\uFF14"
+  "&5=\\uFF15"
+  "&6=\\uFF16"
+  "&7=\\uFF17"
+  "&8=\\uFF18"
+  "&9=\\uFF19"
+  "&\\u003A=\\uFF1A"
+  "&\\u003B=\\uFF1B"
+  "&\\u003C=\\uFF1C"
+  "&\\u003D=\\uFF1D"
+  "&\\u003E=\\uFF1E"
+  "&\\u003F=\\uFF1F"
+  "&\\u0040=\\uFF20"
+  "&A=\\uFF21"
+  "&B=\\uFF22"
+  "&C=\\uFF23"
+  "&D=\\uFF24"
+  "&E=\\uFF25"
+  "&F=\\uFF26"
+  "&G=\\uFF27"
+  "&H=\\uFF28"
+  "&I=\\uFF29"
+  "&J=\\uFF2A"
+  "&K=\\uFF2B"
+  "&L=\\uFF2C"
+  "&M=\\uFF2D"
+  "&N=\\uFF2E"
+  "&O=\\uFF2F"
+  "&P=\\uFF30"
+  "&Q=\\uFF31"
+  "&R=\\uFF32"
+  "&S=\\uFF33"
+  "&T=\\uFF34"
+  "&U=\\uFF35"
+  "&V=\\uFF36"
+  "&W=\\uFF37"
+  "&X=\\uFF38"
+  "&Y=\\uFF39"
+  "&Z=\\uFF3A"
+  "&\\u005B=\\uFF3B"
+  "&\\u005C=\\uFF3C "
+  "&\\u005D=\\uFF3D"
+  "&\\u005E=\\uFF3E"
+  "&\\u005F=\\uFF3F"
+  "&\\u0060=\\uFF40"
+  "&a=\\uFF41"
+  "&b=\\uFF42"
+  "&c=\\uFF43"
+  "&d=\\uFF44"
+  "&e=\\uFF45"
+  "&f=\\uFF46"
+  "&g=\\uFF47"
+  "&h=\\uFF48"
+  "&i=\\uFF49"
+  "&j=\\uFF4A"
+  "&k=\\uFF4B"
+  "&l=\\uFF4C"
+  "&m=\\uFF4D"
+  "&n=\\uFF4E"
+  "&o=\\uFF4F"
+  "&p=\\uFF50"
+  "&q=\\uFF51"
+  "&r=\\uFF52"
+  "&s=\\uFF53"
+  "&t=\\uFF54"
+  "&u=\\uFF55"
+  "&v=\\uFF56"
+  "&w=\\uFF57"
+  "&x=\\uFF58"
+  "&y=\\uFF59"
+  "&z=\\uFF5A"
+  "&\\u007B=\\uFF5B"
+  "&\\u007C=\\uFF5C"
+  "&\\u007D=\\uFF5D"
+  "&\\u007E=\\uFF5E"
+  "&\\u00A2=\\uFFE0"
+  "&\\u00A3=\\uFFE1"
+  "&\\u00A5=\\uFFE5"
+  "&\\u00A6=\\uFFE4"
+  "&\\u00AC=\\uFFE2"
+  "&\\u1100=\\uFFA1=\\u3131"
+  "&\\u1101=\\uFFA2=\\u3132"
+  "&\\u1102=\\uFFA4=\\u3134"
+  "&\\u1103=\\uFFA7=\\u3137"
+  "&\\u1104=\\uFFA8=\\u3138"
+  "&\\u1105=\\uFFA9=\\u3139"
+  "&\\u1106=\\uFFB1=\\u3141"
+  "&\\u1107=\\uFFB2=\\u3142"
+  "&\\u1108=\\uFFB3=\\u3143"
+  "&\\u1109=\\uFFB5=\\u3145"
+  "&\\u110A=\\uFFB6=\\u3146"
+  "&\\u110B=\\uFFB7=\\u3147"
+  "&\\u110C=\\uFFB8=\\u3148"
+  "&\\u110D=\\uFFB9=\\u3149"
+  "&\\u110E=\\uFFBA=\\u314A"
+  "&\\u110F=\\uFFBB=\\u314B"
+  "&\\u1110=\\uFFBC=\\u314C"
+  "&\\u1111=\\uFFBD=\\u314D"
+  "&\\u1112=\\uFFBE=\\u314E"
+  "&\\u111A=\\uFFB0=\\u3140"
+  "&\\u1121=\\uFFB4=\\u3144"
+  "&\\u1160=\\uFFA0=\\u3164"
+  "&\\u1161=\\uFFC2=\\u314F"
+  "&\\u1162=\\uFFC3=\\u3150"
+  "&\\u1163=\\uFFC4=\\u3151"
+  "&\\u1164=\\uFFC5=\\u3152"
+  "&\\u1165=\\uFFC6=\\u3153"
+  "&\\u1166=\\uFFC7=\\u3154"
+  "&\\u1167=\\uFFCA=\\u3155"
+  "&\\u1168=\\uFFCB=\\u3156"
+  "&\\u1169=\\uFFCC=\\u3157"
+  "&\\u116A=\\uFFCD=\\u3158"
+  "&\\u116B=\\uFFCE=\\u3159"
+  "&\\u116C=\\uFFCF=\\u315A"
+  "&\\u116D=\\uFFD2=\\u315B"
+  "&\\u116E=\\uFFD3=\\u315C"
+  "&\\u116F=\\uFFD4=\\u315D"
+  "&\\u1170=\\uFFD5=\\u315E"
+  "&\\u1171=\\uFFD6=\\u315F"
+  "&\\u1172=\\uFFD7=\\u3160"
+  "&\\u1173=\\uFFDA=\\u3161"
+  "&\\u1174=\\uFFDB=\\u3162"
+  "&\\u1175=\\uFFDC=\\u3163"
+  "&\\u11AA=\\uFFA3=\\u3133"
+  "&\\u11AC=\\uFFA5=\\u3135"
+  "&\\u11AD=\\uFFA6=\\u3136"
+  "&\\u11B0=\\uFFAA=\\u313A"
+  "&\\u11B1=\\uFFAB=\\u313B"
+  "&\\u11B2=\\uFFAC=\\u313C"
+  "&\\u11B3=\\uFFAD=\\u313D"
+  "&\\u11B4=\\uFFAE=\\u313E"
+  "&\\u11B5=\\uFFAF=\\u313F"
+  "&\\u20A9=\\uFFE6"
+  "&\\u2190=\\uFFE9"
+  "&\\u2191=\\uFFEA"
+  "&\\u2192=\\uFFEB"
+  "&\\u2193=\\uFFEC"
+  "&\\u2502=\\uFFE8"
+  "&\\u25A0=\\uFFED"
+  "&\\u25CB=\\uFFEE"
+  "&\\u3001=\\uFF64"
+  "&\\u3002=\\uFF61"
+  "&\\u300C=\\uFF62"
+  "&\\u300D=\\uFF63";
+
+/*
+  Below variables are defined in separate .cc file, generated by uca9dump at
+  build-time for the Japanese collations.
+ */
+extern uint16 *ja_han_pages[];
+extern const int MIN_JA_HAN_PAGE;
+extern const int MAX_JA_HAN_PAGE;
+#endif
--- a/deps/oblib/src/lib/charset/uca900_ja_tbls.cc
+++ b/deps/oblib/src/lib/charset/uca900_ja_tbls.cc
--- a/deps/oblib/src/lib/charset/uca900_zh2_tbls.cc
+++ b/deps/oblib/src/lib/charset/uca900_zh2_tbls.cc
--- a/deps/oblib/src/lib/charset/uca900_zh3_tbls.cc
+++ b/deps/oblib/src/lib/charset/uca900_zh3_tbls.cc
--- a/deps/oblib/src/lib/charset/uca900_zh_data.h
+++ b/deps/oblib/src/lib/charset/uca900_zh_data.h
@ -0,0 +1,918 @@
+/**
+ * Copyright (c) 2021 OceanBase
+ * OceanBase CE is licensed under Mulan PubL v2.
+ * You can use this software according to the terms and conditions of the Mulan PubL v2.
+ * You may obtain a copy of Mulan PubL v2 at:
+ *          http://license.coscl.org.cn/MulanPubL-2.0
+ * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND,
+ * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
+ * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
+ * See the Mulan PubL v2 for more details.
+ */
+#ifndef UCA900_ZH_DATA_H
+#define UCA900_ZH_DATA_H
+
+/*
+  For collation which changes character's primary weight according to its
+  tailoring rule, we give this character a extra collation element (see
+  comments in my_char_weight_put_900()). Usually the primary weight of this
+  extra CE starts from 0x54A4, which is the biggest primary weight of all
+  regular characters (non-CJK and non-ignorable) in DUCET. But Chinese is
+  special, because to keep assigning single primary weight to character
+  groups like Latin, Cyrillic, etc, we used all weight value in [0x1C47,
+  0xF643], so we give the primary weight of extra CE starting from 0xF644
+  to avoid weight overlapping.
+ */
+constexpr int ZH_EXTRA_CE_PRI = 0xF644;
+constexpr int ZH2_EXTRA_CE_PRI = 0x94AF;
+constexpr int ZH3_EXTRA_CE_PRI = 0x550D;
+
+static const char zh_cldr_30[] =
+    "&[before 2]a<<\\u0101<<<\\u0100<<\\u00E1<<<\\u00C1<<\\u01CE<<<\\u01CD"
+    "<<\\u00E0<<<\\u00C0"
+    "&[before 2]e<<\\u0113<<<\\u0112<<\\u00E9<<<\\u00C9<<\\u011B<<<\\u011A"
+    "<<\\u00E8<<<\\u00C8"
+    "&e<<e\\u0302\\u0304<<<E\\u0302\\u0304<<e\\u0302\\u0301<<<E\\u0302\\u0301"
+    "<<e\\u0302\\u030C<<<E\\u0302\\u030C<<e\\u0302\\u0300<<<E\\u0302\\u0300"
+    "&[before 2]i<<\\u012B<<<\\u012A<<\\u00ED<<<\\u00CD<<\\u01D0<<<\\u01CF"
+    "<<\\u00EC<<<\\u00CC"
+    "&[before 2]m<<m\\u0304<<<M\\u0304<<\\u1E3F<<<\\u1E3E<<m\\u030C"
+    "<<<M\\u030C<<m\\u0300<<<M\\u0300"
+    "&[before 2]n<<n\\u0304<<<N\\u0304<<\\u0144<<<\\u0143<<\\u0148<<<\\u0147"
+    "<<\\u01F9<<<\\u01F8"
+    "&[before 2]o<<\\u014D<<<\\u014C<<\\u00F3<<<\\u00D3<<\\u01D2<<<\\u01D1"
+    "<<\\u00F2<<<\\u00D2"
+    "&[before 2]u<<\\u016B<<<\\u016A<<\\u00FA<<<\\u00DA<<\\u01D4<<<\\u01D3"
+    "<<\\u00F9<<<\\u00D9"
+    "&U<<\\u01D6<<<\\u01D5<<\\u01D8<<<\\u01D7<<\\u01DA<<<\\u01D9<<\\u01DC"
+    "<<<\\u01DB<<\\u00FC<<<\\u00DC"
+    "&(\\u4E00)<<<\\u3220"
+    "&(\\u4E03)<<<\\u3226"
+    "&(\\u4E09)<<<\\u3222"
+    "&(\\u4E5D)<<<\\u3228"
+    "&(\\u4E8C)<<<\\u3221"
+    "&(\\u4E94)<<<\\u3224"
+    "&(\\u4EE3)<<<\\u3239"
+    "&(\\u4F01)<<<\\u323D"
+    "&(\\u4F11)<<<\\u3241"
+    "&(\\u516B)<<<\\u3227"
+    "&(\\u516D)<<<\\u3225"
+    "&(\\u52B4)<<<\\u3238"
+    "&(\\u5341)<<<\\u3229"
+    "&(\\u5354)<<<\\u323F"
+    "&(\\u540D)<<<\\u3234"
+    "&(\\u547C)<<<\\u323A"
+    "&(\\u56DB)<<<\\u3223"
+    "&(\\u571F)<<<\\u322F"
+    "&(\\u5B66)<<<\\u323B"
+    "&(\\u65E5)<<<\\u3230"
+    "&(\\u6708)<<<\\u322A"
+    "&(\\u6709)<<<\\u3232"
+    "&(\\u6728)<<<\\u322D"
+    "&(\\u682A)<<<\\u3231"
+    "&(\\u6C34)<<<\\u322C"
+    "&(\\u706B)<<<\\u322B"
+    "&(\\u7279)<<<\\u3235"
+    "&(\\u76E3)<<<\\u323C"
+    "&(\\u793E)<<<\\u3233"
+    "&(\\u795D)<<<\\u3237"
+    "&(\\u796D)<<<\\u3240"
+    "&(\\u81EA)<<<\\u3242"
+    "&(\\u81F3)<<<\\u3243"
+    "&(\\u8CA1)<<<\\u3236"
+    "&(\\u8CC7)<<<\\u323E"
+    "&(\\u91D1)<<<\\u322E"
+    "&0\\u70B9<<<\\u3358"
+    "&10\\u65E5<<<\\u33E9"
+    "&10\\u6708<<<\\u32C9"
+    "&10\\u70B9<<<\\u3362"
+    "&11\\u65E5<<<\\u33EA"
+    "&11\\u6708<<<\\u32CA"
+    "&11\\u70B9<<<\\u3363"
+    "&12\\u65E5<<<\\u33EB"
+    "&12\\u6708<<<\\u32CB"
+    "&12\\u70B9<<<\\u3364"
+    "&13\\u65E5<<<\\u33EC"
+    "&13\\u70B9<<<\\u3365"
+    "&14\\u65E5<<<\\u33ED"
+    "&14\\u70B9<<<\\u3366"
+    "&15\\u65E5<<<\\u33EE"
+    "&15\\u70B9<<<\\u3367"
+    "&16\\u65E5<<<\\u33EF"
+    "&16\\u70B9<<<\\u3368"
+    "&17\\u65E5<<<\\u33F0"
+    "&17\\u70B9<<<\\u3369"
+    "&18\\u65E5<<<\\u33F1"
+    "&18\\u70B9<<<\\u336A"
+    "&19\\u65E5<<<\\u33F2"
+    "&19\\u70B9<<<\\u336B"
+    "&1\\u65E5<<<\\u33E0"
+    "&1\\u6708<<<\\u32C0"
+    "&1\\u70B9<<<\\u3359"
+    "&20\\u65E5<<<\\u33F3"
+    "&20\\u70B9<<<\\u336C"
+    "&21\\u65E5<<<\\u33F4"
+    "&21\\u70B9<<<\\u336D"
+    "&22\\u65E5<<<\\u33F5"
+    "&22\\u70B9<<<\\u336E"
+    "&23\\u65E5<<<\\u33F6"
+    "&23\\u70B9<<<\\u336F"
+    "&24\\u65E5<<<\\u33F7"
+    "&24\\u70B9<<<\\u3370"
+    "&25\\u65E5<<<\\u33F8"
+    "&26\\u65E5<<<\\u33F9"
+    "&27\\u65E5<<<\\u33FA"
+    "&28\\u65E5<<<\\u33FB"
+    "&29\\u65E5<<<\\u33FC"
+    "&2\\u65E5<<<\\u33E1"
+    "&2\\u6708<<<\\u32C1"
+    "&2\\u70B9<<<\\u335A"
+    "&30\\u65E5<<<\\u33FD"
+    "&31\\u65E5<<<\\u33FE"
+    "&3\\u65E5<<<\\u33E2"
+    "&3\\u6708<<<\\u32C2"
+    "&3\\u70B9<<<\\u335B"
+    "&4\\u65E5<<<\\u33E3"
+    "&4\\u6708<<<\\u32C3"
+    "&4\\u70B9<<<\\u335C"
+    "&5\\u65E5<<<\\u33E4"
+    "&5\\u6708<<<\\u32C4"
+    "&5\\u70B9<<<\\u335D"
+    "&6\\u65E5<<<\\u33E5"
+    "&6\\u6708<<<\\u32C5"
+    "&6\\u70B9<<<\\u335E"
+    "&7\\u65E5<<<\\u33E6"
+    "&7\\u6708<<<\\u32C6"
+    "&7\\u70B9<<<\\u335F"
+    "&8\\u65E5<<<\\u33E7"
+    "&8\\u6708<<<\\u32C7"
+    "&8\\u70B9<<<\\u3360"
+    "&9\\u65E5<<<\\u33E8"
+    "&9\\u6708<<<\\u32C8"
+    "&9\\u70B9<<<\\u3361"
+    "&\\u3014\\u4E09\\u3015<<<\\u01F241"
+    "&\\u3014\\u4E8C\\u3015<<<\\u01F242"
+    "&\\u3014\\u52DD\\u3015<<<\\u01F247"
+    "&\\u3014\\u5B89\\u3015<<<\\u01F243"
+    "&\\u3014\\u6253\\u3015<<<\\u01F245"
+    "&\\u3014\\u6557\\u3015<<<\\u01F248"
+    "&\\u3014\\u672C\\u3015<<<\\u01F240"
+    "&\\u3014\\u70B9\\u3015<<<\\u01F244"
+    "&\\u3014\\u76D7\\u3015<<<\\u01F246"
+    "&\\u4E00<<<\\u2F00<<<\\u3192<<<\\u3280<<<\\u01F229"
+    "&\\u4E01<<<\\u319C"
+    "&\\u4E03<<<\\u3286"
+    "&\\u4E09<<<\\u3194<<<\\u3282<<<\\u01F22A"
+    "&\\u4E0A<<<\\u3196<<<\\u32A4"
+    "&\\u4E0B<<<\\u3198<<<\\u32A6"
+    "&\\u4E19<<<\\u319B"
+    "&\\u4E28<<<\\u2F01"
+    "&\\u4E2D<<<\\u3197<<<\\u32A5<<<\\u01F22D"
+    "&\\u4E36<<<\\u2F02"
+    "&\\u4E3F<<<\\u2F03"
+    "&\\u4E59<<<\\u2F04<<<\\u319A"
+    "&\\u4E5D<<<\\u3288"
+    "&\\u4E85<<<\\u2F05"
+    "&\\u4E8C<<<\\u2F06<<<\\u3193<<<\\u3281<<<\\u01F214"
+    "&\\u4E94<<<\\u3284"
+    "&\\u4EA0<<<\\u2F07"
+    "&\\u4EA4<<<\\u01F218"
+    "&\\u4EBA<<<\\u2F08<<<\\u319F"
+    "&\\u4F01<<<\\u32AD"
+    "&\\u4F11<<<\\u32A1"
+    "&\\u512A<<<\\u329D"
+    "&\\u513F<<<\\u2F09"
+    "&\\u5165<<<\\u2F0A"
+    "&\\u516B<<<\\u2F0B<<<\\u3287"
+    "&\\u516D<<<\\u3285"
+    "&\\u5182<<<\\u2F0C"
+    "&\\u518D<<<\\u01F21E"
+    "&\\u5196<<<\\u2F0D"
+    "&\\u5199<<<\\u32A2"
+    "&\\u51AB<<<\\u2F0E"
+    "&\\u51E0<<<\\u2F0F"
+    "&\\u51F5<<<\\u2F10"
+    "&\\u5200<<<\\u2F11"
+    "&\\u521D<<<\\u01F220"
+    "&\\u524D<<<\\u01F21C"
+    "&\\u5272<<<\\u01F239"
+    "&\\u529B<<<\\u2F12"
+    "&\\u52B4<<<\\u3298"
+    "&\\u52F9<<<\\u2F13"
+    "&\\u5315<<<\\u2F14"
+    "&\\u531A<<<\\u2F15"
+    "&\\u5338<<<\\u2F16<<<\\u32A9"
+    "&\\u5341<<<\\u2F17<<<\\u3038<<<\\u3289"
+    "&\\u5344<<<\\u3039"
+    "&\\u5345<<<\\u303A"
+    "&\\u5354<<<\\u32AF"
+    "&\\u535C<<<\\u2F18"
+    "&\\u5369<<<\\u2F19"
+    "&\\u5370<<<\\u329E"
+    "&\\u5382<<<\\u2F1A"
+    "&\\u53B6<<<\\u2F1B"
+    "&\\u53C8<<<\\u2F1C"
+    "&\\u53CC<<<\\u01F212"
+    "&\\u53E3<<<\\u2F1D"
+    "&\\u53EF<<<\\u01F251"
+    "&\\u53F3<<<\\u32A8<<<\\u01F22E"
+    "&\\u5408<<<\\u01F234"
+    "&\\u540D<<<\\u3294"
+    "&\\u5439<<<\\u01F225"
+    "&\\u554F<<<\\u3244"
+    "&\\u55B6<<<\\u01F23A"
+    "&\\u56D7<<<\\u2F1E"
+    "&\\u56DB<<<\\u3195<<<\\u3283"
+    "&\\u571F<<<\\u2F1F<<<\\u328F"
+    "&\\u5730<<<\\u319E"
+    "&\\u58EB<<<\\u2F20"
+    "&\\u58F0<<<\\u01F224"
+    "&\\u5902<<<\\u2F21"
+    "&\\u590A<<<\\u2F22"
+    "&\\u5915<<<\\u2F23"
+    "&\\u591A<<<\\u01F215"
+    "&\\u591C<<<\\u32B0"
+    "&\\u5927<<<\\u2F24"
+    "&\\u5927\\u6B63<<<\\u337D"
+    "&\\u5929<<<\\u319D<<<\\u01F217"
+    "&\\u5973<<<\\u2F25<<<\\u329B"
+    "&\\u5B50<<<\\u2F26"
+    "&\\u5B57<<<\\u01F211"
+    "&\\u5B66<<<\\u32AB"
+    "&\\u5B80<<<\\u2F27"
+    "&\\u5B97<<<\\u32AA"
+    "&\\u5BF8<<<\\u2F28"
+    "&\\u5C0F<<<\\u2F29"
+    "&\\u5C22<<<\\u2F2A"
+    "&\\u5C38<<<\\u2F2B"
+    "&\\u5C6E<<<\\u2F2C"
+    "&\\u5C71<<<\\u2F2D"
+    "&\\u5DDB<<<\\u2F2E"
+    "&\\u5DE5<<<\\u2F2F"
+    "&\\u5DE6<<<\\u32A7<<<\\u01F22C"
+    "&\\u5DF1<<<\\u2F30"
+    "&\\u5DFE<<<\\u2F31"
+    "&\\u5E72<<<\\u2F32"
+    "&\\u5E73\\u6210<<<\\u337B"
+    "&\\u5E7A<<<\\u2F33"
+    "&\\u5E7C<<<\\u3245"
+    "&\\u5E7F<<<\\u2F34"
+    "&\\u5EF4<<<\\u2F35"
+    "&\\u5EFE<<<\\u2F36"
+    "&\\u5F0B<<<\\u2F37"
+    "&\\u5F13<<<\\u2F38"
+    "&\\u5F50<<<\\u2F39"
+    "&\\u5F61<<<\\u2F3A"
+    "&\\u5F73<<<\\u2F3B"
+    "&\\u5F8C<<<\\u01F21D"
+    "&\\u5F97<<<\\u01F250"
+    "&\\u5FC3<<<\\u2F3C"
+    "&\\u6208<<<\\u2F3D"
+    "&\\u6236<<<\\u2F3E"
+    "&\\u624B<<<\\u2F3F<<<\\u01F210"
+    "&\\u6253<<<\\u01F231"
+    "&\\u6295<<<\\u01F227"
+    "&\\u6307<<<\\u01F22F"
+    "&\\u6355<<<\\u01F228"
+    "&\\u652F<<<\\u2F40"
+    "&\\u6534<<<\\u2F41"
+    "&\\u6587<<<\\u2F42<<<\\u3246"
+    "&\\u6597<<<\\u2F43"
+    "&\\u6599<<<\\u01F21B"
+    "&\\u65A4<<<\\u2F44"
+    "&\\u65B0<<<\\u01F21F"
+    "&\\u65B9<<<\\u2F45"
+    "&\\u65E0<<<\\u2F46"
+    "&\\u65E5<<<\\u2F47<<<\\u3290"
+    "&\\u660E\\u6CBB<<<\\u337E"
+    "&\\u6620<<<\\u01F219"
+    "&\\u662D\\u548C<<<\\u337C"
+    "&\\u66F0<<<\\u2F48"
+    "&\\u6708<<<\\u2F49<<<\\u328A<<<\\u01F237"
+    "&\\u6709<<<\\u3292<<<\\u01F236"
+    "&\\u6728<<<\\u2F4A<<<\\u328D"
+    "&\\u682A<<<\\u3291"
+    "&\\u682A\\u5F0F\\u4F1A\\u793E<<<\\u337F"
+    "&\\u6B20<<<\\u2F4B"
+    "&\\u6B62<<<\\u2F4C"
+    "&\\u6B63<<<\\u32A3"
+    "&\\u6B79<<<\\u2F4D"
+    "&\\u6BB3<<<\\u2F4E"
+    "&\\u6BCB<<<\\u2F4F"
+    "&\\u6BCD<<<\\u2E9F"
+    "&\\u6BD4<<<\\u2F50"
+    "&\\u6BDB<<<\\u2F51"
+    "&\\u6C0F<<<\\u2F52"
+    "&\\u6C14<<<\\u2F53"
+    "&\\u6C34<<<\\u2F54<<<\\u328C"
+    "&\\u6CE8<<<\\u329F"
+    "&\\u6E80<<<\\u01F235"
+    "&\\u6F14<<<\\u01F226"
+    "&\\u706B<<<\\u2F55<<<\\u328B"
+    "&\\u7121<<<\\u01F21A"
+    "&\\u722A<<<\\u2F56"
+    "&\\u7236<<<\\u2F57"
+    "&\\u723B<<<\\u2F58"
+    "&\\u723F<<<\\u2F59"
+    "&\\u7247<<<\\u2F5A"
+    "&\\u7259<<<\\u2F5B"
+    "&\\u725B<<<\\u2F5C"
+    "&\\u7279<<<\\u3295"
+    "&\\u72AC<<<\\u2F5D"
+    "&\\u7384<<<\\u2F5E"
+    "&\\u7389<<<\\u2F5F"
+    "&\\u74DC<<<\\u2F60"
+    "&\\u74E6<<<\\u2F61"
+    "&\\u7518<<<\\u2F62"
+    "&\\u751F<<<\\u2F63<<<\\u01F222"
+    "&\\u7528<<<\\u2F64"
+    "&\\u7530<<<\\u2F65"
+    "&\\u7532<<<\\u3199"
+    "&\\u7533<<<\\u01F238"
+    "&\\u7537<<<\\u329A"
+    "&\\u758B<<<\\u2F66"
+    "&\\u7592<<<\\u2F67"
+    "&\\u7676<<<\\u2F68"
+    "&\\u767D<<<\\u2F69"
+    "&\\u76AE<<<\\u2F6A"
+    "&\\u76BF<<<\\u2F6B"
+    "&\\u76E3<<<\\u32AC"
+    "&\\u76EE<<<\\u2F6C"
+    "&\\u77DB<<<\\u2F6D"
+    "&\\u77E2<<<\\u2F6E"
+    "&\\u77F3<<<\\u2F6F"
+    "&\\u793A<<<\\u2F70"
+    "&\\u793E<<<\\u3293"
+    "&\\u795D<<<\\u3297"
+    "&\\u7981<<<\\u01F232"
+    "&\\u79B8<<<\\u2F71"
+    "&\\u79BE<<<\\u2F72"
+    "&\\u79D8<<<\\u3299"
+    "&\\u7A74<<<\\u2F73"
+    "&\\u7A7A<<<\\u01F233"
+    "&\\u7ACB<<<\\u2F74"
+    "&\\u7AF9<<<\\u2F75"
+    "&\\u7B8F<<<\\u3247"
+    "&\\u7C73<<<\\u2F76"
+    "&\\u7CF8<<<\\u2F77"
+    "&\\u7D42<<<\\u01F221"
+    "&\\u7F36<<<\\u2F78"
+    "&\\u7F51<<<\\u2F79"
+    "&\\u7F8A<<<\\u2F7A"
+    "&\\u7FBD<<<\\u2F7B"
+    "&\\u8001<<<\\u2F7C"
+    "&\\u800C<<<\\u2F7D"
+    "&\\u8012<<<\\u2F7E"
+    "&\\u8033<<<\\u2F7F"
+    "&\\u807F<<<\\u2F80"
+    "&\\u8089<<<\\u2F81"
+    "&\\u81E3<<<\\u2F82"
+    "&\\u81EA<<<\\u2F83"
+    "&\\u81F3<<<\\u2F84"
+    "&\\u81FC<<<\\u2F85"
+    "&\\u820C<<<\\u2F86"
+    "&\\u821B<<<\\u2F87"
+    "&\\u821F<<<\\u2F88"
+    "&\\u826E<<<\\u2F89"
+    "&\\u8272<<<\\u2F8A"
+    "&\\u8278<<<\\u2F8B"
+    "&\\u864D<<<\\u2F8C"
+    "&\\u866B<<<\\u2F8D"
+    "&\\u8840<<<\\u2F8E"
+    "&\\u884C<<<\\u2F8F"
+    "&\\u8863<<<\\u2F90"
+    "&\\u897E<<<\\u2F91"
+    "&\\u898B<<<\\u2F92"
+    "&\\u89D2<<<\\u2F93"
+    "&\\u89E3<<<\\u01F216"
+    "&\\u8A00<<<\\u2F94"
+    "&\\u8C37<<<\\u2F95"
+    "&\\u8C46<<<\\u2F96"
+    "&\\u8C55<<<\\u2F97"
+    "&\\u8C78<<<\\u2F98"
+    "&\\u8C9D<<<\\u2F99"
+    "&\\u8CA1<<<\\u3296"
+    "&\\u8CA9<<<\\u01F223"
+    "&\\u8CC7<<<\\u32AE"
+    "&\\u8D64<<<\\u2F9A"
+    "&\\u8D70<<<\\u2F9B<<<\\u01F230"
+    "&\\u8DB3<<<\\u2F9C"
+    "&\\u8EAB<<<\\u2F9D"
+    "&\\u8ECA<<<\\u2F9E"
+    "&\\u8F9B<<<\\u2F9F"
+    "&\\u8FB0<<<\\u2FA0"
+    "&\\u8FB5<<<\\u2FA1"
+    "&\\u904A<<<\\u01F22B"
+    "&\\u9069<<<\\u329C"
+    "&\\u9091<<<\\u2FA2"
+    "&\\u9149<<<\\u2FA3"
+    "&\\u914D<<<\\u01F23B"
+    "&\\u91C6<<<\\u2FA4"
+    "&\\u91CC<<<\\u2FA5"
+    "&\\u91D1<<<\\u2FA6<<<\\u328E"
+    "&\\u9577<<<\\u2FA7"
+    "&\\u9580<<<\\u2FA8"
+    "&\\u961C<<<\\u2FA9"
+    "&\\u96B6<<<\\u2FAA"
+    "&\\u96B9<<<\\u2FAB"
+    "&\\u96E8<<<\\u2FAC"
+    "&\\u9751<<<\\u2FAD"
+    "&\\u975E<<<\\u2FAE"
+    "&\\u9762<<<\\u2FAF"
+    "&\\u9769<<<\\u2FB0"
+    "&\\u97CB<<<\\u2FB1"
+    "&\\u97ED<<<\\u2FB2"
+    "&\\u97F3<<<\\u2FB3"
+    "&\\u9801<<<\\u2FB4"
+    "&\\u9805<<<\\u32A0"
+    "&\\u98A8<<<\\u2FB5"
+    "&\\u98DB<<<\\u2FB6"
+    "&\\u98DF<<<\\u2FB7"
+    "&\\u9996<<<\\u2FB8"
+    "&\\u9999<<<\\u2FB9"
+    "&\\u99AC<<<\\u2FBA"
+    "&\\u9AA8<<<\\u2FBB"
+    "&\\u9AD8<<<\\u2FBC"
+    "&\\u9ADF<<<\\u2FBD"
+    "&\\u9B25<<<\\u2FBE"
+    "&\\u9B2F<<<\\u2FBF"
+    "&\\u9B32<<<\\u2FC0"
+    "&\\u9B3C<<<\\u2FC1"
+    "&\\u9B5A<<<\\u2FC2"
+    "&\\u9CE5<<<\\u2FC3"
+    "&\\u9E75<<<\\u2FC4"
+    "&\\u9E7F<<<\\u2FC5"
+    "&\\u9EA5<<<\\u2FC6"
+    "&\\u9EBB<<<\\u2FC7"
+    "&\\u9EC3<<<\\u2FC8"
+    "&\\u9ECD<<<\\u2FC9"
+    "&\\u9ED1<<<\\u2FCA"
+    "&\\u9EF9<<<\\u2FCB"
+    "&\\u9EFD<<<\\u2FCC"
+    "&\\u9F0E<<<\\u2FCD"
+    "&\\u9F13<<<\\u2FCE"
+    "&\\u9F20<<<\\u2FCF"
+    "&\\u9F3B<<<\\u2FD0"
+    "&\\u9F4A<<<\\u2FD1"
+    "&\\u9F52<<<\\u2FD2"
+    "&\\u9F8D<<<\\u2FD3"
+    "&\\u9F9C<<<\\u2FD4"
+    "&\\u9F9F<<<\\u2EF3"
+    "&\\u9FA0<<<\\u2FD5"
+    "&\\u02342F<\\u91CD\\u5E86/\\u5E86"
+    "&\\u5F1E<\\u6C88\\u9633/\\u9633"
+    "&\\u92BA<\\u85CF\\u6587/\\u6587";
+
+static const char zh2_cldr_30[] =
+    "&[before 2]a<<\\u0101<<<\\u0100<<\\u00E1<<<\\u00C1<<\\u01CE<<<\\u01CD"
+    "<<\\u00E0<<<\\u00C0"
+    "&[before 2]e<<\\u0113<<<\\u0112<<\\u00E9<<<\\u00C9<<\\u011B<<<\\u011A"
+    "<<\\u00E8<<<\\u00C8"
+    "&e<<e\\u0302\\u0304<<<E\\u0302\\u0304<<e\\u0302\\u0301<<<E\\u0302\\u0301"
+    "<<e\\u0302\\u030C<<<E\\u0302\\u030C<<e\\u0302\\u0300<<<E\\u0302\\u0300"
+    "&[before 2]i<<\\u012B<<<\\u012A<<\\u00ED<<<\\u00CD<<\\u01D0<<<\\u01CF"
+    "<<\\u00EC<<<\\u00CC"
+    "&[before 2]m<<m\\u0304<<<M\\u0304<<\\u1E3F<<<\\u1E3E<<m\\u030C"
+    "<<<M\\u030C<<m\\u0300<<<M\\u0300"
+    "&[before 2]n<<n\\u0304<<<N\\u0304<<\\u0144<<<\\u0143<<\\u0148<<<\\u0147"
+    "<<\\u01F9<<<\\u01F8"
+    "&[before 2]o<<\\u014D<<<\\u014C<<\\u00F3<<<\\u00D3<<\\u01D2<<<\\u01D1"
+    "<<\\u00F2<<<\\u00D2"
+    "&[before 2]u<<\\u016B<<<\\u016A<<\\u00FA<<<\\u00DA<<\\u01D4<<<\\u01D3"
+    "<<\\u00F9<<<\\u00D9"
+    "&U<<\\u01D6<<<\\u01D5<<\\u01D8<<<\\u01D7<<\\u01DA<<<\\u01D9<<\\u01DC"
+    "<<<\\u01DB<<\\u00FC<<<\\u00DC"
+    "&(\\u4E00)<<<\\u3220"
+    "&(\\u4E03)<<<\\u3226"
+    "&(\\u4E09)<<<\\u3222"
+    "&(\\u4E5D)<<<\\u3228"
+    "&(\\u4E8C)<<<\\u3221"
+    "&(\\u4E94)<<<\\u3224"
+    "&(\\u4EE3)<<<\\u3239"
+    "&(\\u4F01)<<<\\u323D"
+    "&(\\u4F11)<<<\\u3241"
+    "&(\\u516B)<<<\\u3227"
+    "&(\\u516D)<<<\\u3225"
+    "&(\\u52B4)<<<\\u3238"
+    "&(\\u5341)<<<\\u3229"
+    "&(\\u5354)<<<\\u323F"
+    "&(\\u540D)<<<\\u3234"
+    "&(\\u547C)<<<\\u323A"
+    "&(\\u56DB)<<<\\u3223"
+    "&(\\u571F)<<<\\u322F"
+    "&(\\u5B66)<<<\\u323B"
+    "&(\\u65E5)<<<\\u3230"
+    "&(\\u6708)<<<\\u322A"
+    "&(\\u6709)<<<\\u3232"
+    "&(\\u6728)<<<\\u322D"
+    "&(\\u682A)<<<\\u3231"
+    "&(\\u6C34)<<<\\u322C"
+    "&(\\u706B)<<<\\u322B"
+    "&(\\u7279)<<<\\u3235"
+    "&(\\u76E3)<<<\\u323C"
+    "&(\\u793E)<<<\\u3233"
+    "&(\\u795D)<<<\\u3237"
+    "&(\\u796D)<<<\\u3240"
+    "&(\\u81EA)<<<\\u3242"
+    "&(\\u81F3)<<<\\u3243"
+    "&(\\u8CA1)<<<\\u3236"
+    "&(\\u8CC7)<<<\\u323E"
+    "&(\\u91D1)<<<\\u322E"
+    "&0\\u70B9<<<\\u3358"
+    "&10\\u65E5<<<\\u33E9"
+    "&10\\u6708<<<\\u32C9"
+    "&10\\u70B9<<<\\u3362"
+    "&11\\u65E5<<<\\u33EA"
+    "&11\\u6708<<<\\u32CA"
+    "&11\\u70B9<<<\\u3363"
+    "&12\\u65E5<<<\\u33EB"
+    "&12\\u6708<<<\\u32CB"
+    "&12\\u70B9<<<\\u3364"
+    "&13\\u65E5<<<\\u33EC"
+    "&13\\u70B9<<<\\u3365"
+    "&14\\u65E5<<<\\u33ED"
+    "&14\\u70B9<<<\\u3366"
+    "&15\\u65E5<<<\\u33EE"
+    "&15\\u70B9<<<\\u3367"
+    "&16\\u65E5<<<\\u33EF"
+    "&16\\u70B9<<<\\u3368"
+    "&17\\u65E5<<<\\u33F0"
+    "&17\\u70B9<<<\\u3369"
+    "&18\\u65E5<<<\\u33F1"
+    "&18\\u70B9<<<\\u336A"
+    "&19\\u65E5<<<\\u33F2"
+    "&19\\u70B9<<<\\u336B"
+    "&1\\u65E5<<<\\u33E0"
+    "&1\\u6708<<<\\u32C0"
+    "&1\\u70B9<<<\\u3359"
+    "&20\\u65E5<<<\\u33F3"
+    "&20\\u70B9<<<\\u336C"
+    "&21\\u65E5<<<\\u33F4"
+    "&21\\u70B9<<<\\u336D"
+    "&22\\u65E5<<<\\u33F5"
+    "&22\\u70B9<<<\\u336E"
+    "&23\\u65E5<<<\\u33F6"
+    "&23\\u70B9<<<\\u336F"
+    "&24\\u65E5<<<\\u33F7"
+    "&24\\u70B9<<<\\u3370"
+    "&25\\u65E5<<<\\u33F8"
+    "&26\\u65E5<<<\\u33F9"
+    "&27\\u65E5<<<\\u33FA"
+    "&28\\u65E5<<<\\u33FB"
+    "&29\\u65E5<<<\\u33FC"
+    "&2\\u65E5<<<\\u33E1"
+    "&2\\u6708<<<\\u32C1"
+    "&2\\u70B9<<<\\u335A"
+    "&30\\u65E5<<<\\u33FD"
+    "&31\\u65E5<<<\\u33FE"
+    "&3\\u65E5<<<\\u33E2"
+    "&3\\u6708<<<\\u32C2"
+    "&3\\u70B9<<<\\u335B"
+    "&4\\u65E5<<<\\u33E3"
+    "&4\\u6708<<<\\u32C3"
+    "&4\\u70B9<<<\\u335C"
+    "&5\\u65E5<<<\\u33E4"
+    "&5\\u6708<<<\\u32C4"
+    "&5\\u70B9<<<\\u335D"
+    "&6\\u65E5<<<\\u33E5"
+    "&6\\u6708<<<\\u32C5"
+    "&6\\u70B9<<<\\u335E"
+    "&7\\u65E5<<<\\u33E6"
+    "&7\\u6708<<<\\u32C6"
+    "&7\\u70B9<<<\\u335F"
+    "&8\\u65E5<<<\\u33E7"
+    "&8\\u6708<<<\\u32C7"
+    "&8\\u70B9<<<\\u3360"
+    "&9\\u65E5<<<\\u33E8"
+    "&9\\u6708<<<\\u32C8"
+    "&9\\u70B9<<<\\u3361"
+    "&\\u3014\\u4E09\\u3015<<<\\u01F241"
+    "&\\u3014\\u4E8C\\u3015<<<\\u01F242"
+    "&\\u3014\\u52DD\\u3015<<<\\u01F247"
+    "&\\u3014\\u5B89\\u3015<<<\\u01F243"
+    "&\\u3014\\u6253\\u3015<<<\\u01F245"
+    "&\\u3014\\u6557\\u3015<<<\\u01F248"
+    "&\\u3014\\u672C\\u3015<<<\\u01F240"
+    "&\\u3014\\u70B9\\u3015<<<\\u01F244"
+    "&\\u3014\\u76D7\\u3015<<<\\u01F246"
+    "&\\u4E00<<<\\u2F00<<<\\u3192<<<\\u3280<<<\\u01F229"
+    "&\\u4E01<<<\\u319C"
+    "&\\u4E03<<<\\u3286"
+    "&\\u4E09<<<\\u3194<<<\\u3282<<<\\u01F22A"
+    "&\\u4E0A<<<\\u3196<<<\\u32A4"
+    "&\\u4E0B<<<\\u3198<<<\\u32A6"
+    "&\\u4E19<<<\\u319B"
+    "&\\u4E28<<<\\u2F01"
+    "&\\u4E2D<<<\\u3197<<<\\u32A5<<<\\u01F22D"
+    "&\\u4E36<<<\\u2F02"
+    "&\\u4E3F<<<\\u2F03"
+    "&\\u4E59<<<\\u2F04<<<\\u319A"
+    "&\\u4E5D<<<\\u3288"
+    "&\\u4E85<<<\\u2F05"
+    "&\\u4E8C<<<\\u2F06<<<\\u3193<<<\\u3281<<<\\u01F214"
+    "&\\u4E94<<<\\u3284"
+    "&\\u4EA0<<<\\u2F07"
+    "&\\u4EA4<<<\\u01F218"
+    "&\\u4EBA<<<\\u2F08<<<\\u319F"
+    "&\\u4F01<<<\\u32AD"
+    "&\\u4F11<<<\\u32A1"
+    "&\\u512A<<<\\u329D"
+    "&\\u513F<<<\\u2F09"
+    "&\\u5165<<<\\u2F0A"
+    "&\\u516B<<<\\u2F0B<<<\\u3287"
+    "&\\u516D<<<\\u3285"
+    "&\\u5182<<<\\u2F0C"
+    "&\\u518D<<<\\u01F21E"
+    "&\\u5196<<<\\u2F0D"
+    "&\\u5199<<<\\u32A2"
+    "&\\u51AB<<<\\u2F0E"
+    "&\\u51E0<<<\\u2F0F"
+    "&\\u51F5<<<\\u2F10"
+    "&\\u5200<<<\\u2F11"
+    "&\\u521D<<<\\u01F220"
+    "&\\u524D<<<\\u01F21C"
+    "&\\u5272<<<\\u01F239"
+    "&\\u529B<<<\\u2F12"
+    "&\\u52B4<<<\\u3298"
+    "&\\u52F9<<<\\u2F13"
+    "&\\u5315<<<\\u2F14"
+    "&\\u531A<<<\\u2F15"
+    "&\\u5338<<<\\u2F16<<<\\u32A9"
+    "&\\u5341<<<\\u2F17<<<\\u3038<<<\\u3289"
+    "&\\u5344<<<\\u3039"
+    "&\\u5345<<<\\u303A"
+    "&\\u5354<<<\\u32AF"
+    "&\\u535C<<<\\u2F18"
+    "&\\u5369<<<\\u2F19"
+    "&\\u5370<<<\\u329E"
+    "&\\u5382<<<\\u2F1A"
+    "&\\u53B6<<<\\u2F1B"
+    "&\\u53C8<<<\\u2F1C"
+    "&\\u53CC<<<\\u01F212"
+    "&\\u53E3<<<\\u2F1D"
+    "&\\u53EF<<<\\u01F251"
+    "&\\u53F3<<<\\u32A8<<<\\u01F22E"
+    "&\\u5408<<<\\u01F234"
+    "&\\u540D<<<\\u3294"
+    "&\\u5439<<<\\u01F225"
+    "&\\u554F<<<\\u3244"
+    "&\\u55B6<<<\\u01F23A"
+    "&\\u56D7<<<\\u2F1E"
+    "&\\u56DB<<<\\u3195<<<\\u3283"
+    "&\\u571F<<<\\u2F1F<<<\\u328F"
+    "&\\u5730<<<\\u319E"
+    "&\\u58EB<<<\\u2F20"
+    "&\\u58F0<<<\\u01F224"
+    "&\\u5902<<<\\u2F21"
+    "&\\u590A<<<\\u2F22"
+    "&\\u5915<<<\\u2F23"
+    "&\\u591A<<<\\u01F215"
+    "&\\u591C<<<\\u32B0"
+    "&\\u5927<<<\\u2F24"
+    "&\\u5927\\u6B63<<<\\u337D"
+    "&\\u5929<<<\\u319D<<<\\u01F217"
+    "&\\u5973<<<\\u2F25<<<\\u329B"
+    "&\\u5B50<<<\\u2F26"
+    "&\\u5B57<<<\\u01F211"
+    "&\\u5B66<<<\\u32AB"
+    "&\\u5B80<<<\\u2F27"
+    "&\\u5B97<<<\\u32AA"
+    "&\\u5BF8<<<\\u2F28"
+    "&\\u5C0F<<<\\u2F29"
+    "&\\u5C22<<<\\u2F2A"
+    "&\\u5C38<<<\\u2F2B"
+    "&\\u5C6E<<<\\u2F2C"
+    "&\\u5C71<<<\\u2F2D"
+    "&\\u5DDB<<<\\u2F2E"
+    "&\\u5DE5<<<\\u2F2F"
+    "&\\u5DE6<<<\\u32A7<<<\\u01F22C"
+    "&\\u5DF1<<<\\u2F30"
+    "&\\u5DFE<<<\\u2F31"
+    "&\\u5E72<<<\\u2F32"
+    "&\\u5E73\\u6210<<<\\u337B"
+    "&\\u5E7A<<<\\u2F33"
+    "&\\u5E7C<<<\\u3245"
+    "&\\u5E7F<<<\\u2F34"
+    "&\\u5EF4<<<\\u2F35"
+    "&\\u5EFE<<<\\u2F36"
+    "&\\u5F0B<<<\\u2F37"
+    "&\\u5F13<<<\\u2F38"
+    "&\\u5F50<<<\\u2F39"
+    "&\\u5F61<<<\\u2F3A"
+    "&\\u5F73<<<\\u2F3B"
+    "&\\u5F8C<<<\\u01F21D"
+    "&\\u5F97<<<\\u01F250"
+    "&\\u5FC3<<<\\u2F3C"
+    "&\\u6208<<<\\u2F3D"
+    "&\\u6236<<<\\u2F3E"
+    "&\\u624B<<<\\u2F3F<<<\\u01F210"
+    "&\\u6253<<<\\u01F231"
+    "&\\u6295<<<\\u01F227"
+    "&\\u6307<<<\\u01F22F"
+    "&\\u6355<<<\\u01F228"
+    "&\\u652F<<<\\u2F40"
+    "&\\u6534<<<\\u2F41"
+    "&\\u6587<<<\\u2F42<<<\\u3246"
+    "&\\u6597<<<\\u2F43"
+    "&\\u6599<<<\\u01F21B"
+    "&\\u65A4<<<\\u2F44"
+    "&\\u65B0<<<\\u01F21F"
+    "&\\u65B9<<<\\u2F45"
+    "&\\u65E0<<<\\u2F46"
+    "&\\u65E5<<<\\u2F47<<<\\u3290"
+    "&\\u660E\\u6CBB<<<\\u337E"
+    "&\\u6620<<<\\u01F219"
+    "&\\u662D\\u548C<<<\\u337C"
+    "&\\u66F0<<<\\u2F48"
+    "&\\u6708<<<\\u2F49<<<\\u328A<<<\\u01F237"
+    "&\\u6709<<<\\u3292<<<\\u01F236"
+    "&\\u6728<<<\\u2F4A<<<\\u328D"
+    "&\\u682A<<<\\u3291"
+    "&\\u682A\\u5F0F\\u4F1A\\u793E<<<\\u337F"
+    "&\\u6B20<<<\\u2F4B"
+    "&\\u6B62<<<\\u2F4C"
+    "&\\u6B63<<<\\u32A3"
+    "&\\u6B79<<<\\u2F4D"
+    "&\\u6BB3<<<\\u2F4E"
+    "&\\u6BCB<<<\\u2F4F"
+    "&\\u6BCD<<<\\u2E9F"
+    "&\\u6BD4<<<\\u2F50"
+    "&\\u6BDB<<<\\u2F51"
+    "&\\u6C0F<<<\\u2F52"
+    "&\\u6C14<<<\\u2F53"
+    "&\\u6C34<<<\\u2F54<<<\\u328C"
+    "&\\u6CE8<<<\\u329F"
+    "&\\u6E80<<<\\u01F235"
+    "&\\u6F14<<<\\u01F226"
+    "&\\u706B<<<\\u2F55<<<\\u328B"
+    "&\\u7121<<<\\u01F21A"
+    "&\\u722A<<<\\u2F56"
+    "&\\u7236<<<\\u2F57"
+    "&\\u723B<<<\\u2F58"
+    "&\\u723F<<<\\u2F59"
+    "&\\u7247<<<\\u2F5A"
+    "&\\u7259<<<\\u2F5B"
+    "&\\u725B<<<\\u2F5C"
+    "&\\u7279<<<\\u3295"
+    "&\\u72AC<<<\\u2F5D"
+    "&\\u7384<<<\\u2F5E"
+    "&\\u7389<<<\\u2F5F"
+    "&\\u74DC<<<\\u2F60"
+    "&\\u74E6<<<\\u2F61"
+    "&\\u7518<<<\\u2F62"
+    "&\\u751F<<<\\u2F63<<<\\u01F222"
+    "&\\u7528<<<\\u2F64"
+    "&\\u7530<<<\\u2F65"
+    "&\\u7532<<<\\u3199"
+    "&\\u7533<<<\\u01F238"
+    "&\\u7537<<<\\u329A"
+    "&\\u758B<<<\\u2F66"
+    "&\\u7592<<<\\u2F67"
+    "&\\u7676<<<\\u2F68"
+    "&\\u767D<<<\\u2F69"
+    "&\\u76AE<<<\\u2F6A"
+    "&\\u76BF<<<\\u2F6B"
+    "&\\u76E3<<<\\u32AC"
+    "&\\u76EE<<<\\u2F6C"
+    "&\\u77DB<<<\\u2F6D"
+    "&\\u77E2<<<\\u2F6E"
+    "&\\u77F3<<<\\u2F6F"
+    "&\\u793A<<<\\u2F70"
+    "&\\u793E<<<\\u3293"
+    "&\\u795D<<<\\u3297"
+    "&\\u7981<<<\\u01F232"
+    "&\\u79B8<<<\\u2F71"
+    "&\\u79BE<<<\\u2F72"
+    "&\\u79D8<<<\\u3299"
+    "&\\u7A74<<<\\u2F73"
+    "&\\u7A7A<<<\\u01F233"
+    "&\\u7ACB<<<\\u2F74"
+    "&\\u7AF9<<<\\u2F75"
+    "&\\u7B8F<<<\\u3247"
+    "&\\u7C73<<<\\u2F76"
+    "&\\u7CF8<<<\\u2F77"
+    "&\\u7D42<<<\\u01F221"
+    "&\\u7F36<<<\\u2F78"
+    "&\\u7F51<<<\\u2F79"
+    "&\\u7F8A<<<\\u2F7A"
+    "&\\u7FBD<<<\\u2F7B"
+    "&\\u8001<<<\\u2F7C"
+    "&\\u800C<<<\\u2F7D"
+    "&\\u8012<<<\\u2F7E"
+    "&\\u8033<<<\\u2F7F"
+    "&\\u807F<<<\\u2F80"
+    "&\\u8089<<<\\u2F81"
+    "&\\u81E3<<<\\u2F82"
+    "&\\u81EA<<<\\u2F83"
+    "&\\u81F3<<<\\u2F84"
+    "&\\u81FC<<<\\u2F85"
+    "&\\u820C<<<\\u2F86"
+    "&\\u821B<<<\\u2F87"
+    "&\\u821F<<<\\u2F88"
+    "&\\u826E<<<\\u2F89"
+    "&\\u8272<<<\\u2F8A"
+    "&\\u8278<<<\\u2F8B"
+    "&\\u864D<<<\\u2F8C"
+    "&\\u866B<<<\\u2F8D"
+    "&\\u8840<<<\\u2F8E"
+    "&\\u884C<<<\\u2F8F"
+    "&\\u8863<<<\\u2F90"
+    "&\\u897E<<<\\u2F91"
+    "&\\u898B<<<\\u2F92"
+    "&\\u89D2<<<\\u2F93"
+    "&\\u89E3<<<\\u01F216"
+    "&\\u8A00<<<\\u2F94"
+    "&\\u8C37<<<\\u2F95"
+    "&\\u8C46<<<\\u2F96"
+    "&\\u8C55<<<\\u2F97"
+    "&\\u8C78<<<\\u2F98"
+    "&\\u8C9D<<<\\u2F99"
+    "&\\u8CA1<<<\\u3296"
+    "&\\u8CA9<<<\\u01F223"
+    "&\\u8CC7<<<\\u32AE"
+    "&\\u8D64<<<\\u2F9A"
+    "&\\u8D70<<<\\u2F9B<<<\\u01F230"
+    "&\\u8DB3<<<\\u2F9C"
+    "&\\u8EAB<<<\\u2F9D"
+    "&\\u8ECA<<<\\u2F9E"
+    "&\\u8F9B<<<\\u2F9F"
+    "&\\u8FB0<<<\\u2FA0"
+    "&\\u8FB5<<<\\u2FA1"
+    "&\\u904A<<<\\u01F22B"
+    "&\\u9069<<<\\u329C"
+    "&\\u9091<<<\\u2FA2"
+    "&\\u9149<<<\\u2FA3"
+    "&\\u914D<<<\\u01F23B"
+    "&\\u91C6<<<\\u2FA4"
+    "&\\u91CC<<<\\u2FA5"
+    "&\\u91D1<<<\\u2FA6<<<\\u328E"
+    "&\\u9577<<<\\u2FA7"
+    "&\\u9580<<<\\u2FA8"
+    "&\\u961C<<<\\u2FA9"
+    "&\\u96B6<<<\\u2FAA"
+    "&\\u96B9<<<\\u2FAB"
+    "&\\u96E8<<<\\u2FAC"
+    "&\\u9751<<<\\u2FAD"
+    "&\\u975E<<<\\u2FAE"
+    "&\\u9762<<<\\u2FAF"
+    "&\\u9769<<<\\u2FB0"
+    "&\\u97CB<<<\\u2FB1"
+    "&\\u97ED<<<\\u2FB2"
+    "&\\u97F3<<<\\u2FB3"
+    "&\\u9801<<<\\u2FB4"
+    "&\\u9805<<<\\u32A0"
+    "&\\u98A8<<<\\u2FB5"
+    "&\\u98DB<<<\\u2FB6"
+    "&\\u98DF<<<\\u2FB7"
+    "&\\u9996<<<\\u2FB8"
+    "&\\u9999<<<\\u2FB9"
+    "&\\u99AC<<<\\u2FBA"
+    "&\\u9AA8<<<\\u2FBB"
+    "&\\u9AD8<<<\\u2FBC"
+    "&\\u9ADF<<<\\u2FBD"
+    "&\\u9B25<<<\\u2FBE"
+    "&\\u9B2F<<<\\u2FBF"
+    "&\\u9B32<<<\\u2FC0"
+    "&\\u9B3C<<<\\u2FC1"
+    "&\\u9B5A<<<\\u2FC2"
+    "&\\u9CE5<<<\\u2FC3"
+    "&\\u9E75<<<\\u2FC4"
+    "&\\u9E7F<<<\\u2FC5"
+    "&\\u9EA5<<<\\u2FC6"
+    "&\\u9EBB<<<\\u2FC7"
+    "&\\u9EC3<<<\\u2FC8"
+    "&\\u9ECD<<<\\u2FC9"
+    "&\\u9ED1<<<\\u2FCA"
+    "&\\u9EF9<<<\\u2FCB"
+    "&\\u9EFD<<<\\u2FCC"
+    "&\\u9F0E<<<\\u2FCD"
+    "&\\u9F13<<<\\u2FCE"
+    "&\\u9F20<<<\\u2FCF"
+    "&\\u9F3B<<<\\u2FD0"
+    "&\\u9F4A<<<\\u2FD1"
+    "&\\u9F52<<<\\u2FD2"
+    "&\\u9F8D<<<\\u2FD3"
+    "&\\u9F9C<<<\\u2FD4"
+    "&\\u9F9F<<<\\u2EF3"
+    "&\\u9FA0<<<\\u2FD5";
+
+/*
+  Below variables are defined in separate .cc file, generated by uca9dump at
+  build-time for the Chinese collation.
+ */
+extern uint16 *zh_han_pages[];
+extern const int MIN_ZH_HAN_PAGE;
+extern const int MAX_ZH_HAN_PAGE;
+extern int zh_han_to_single_weight[];
+extern const int ZH_HAN_WEIGHT_PAIRS;
+extern uint16 *zh2_han_pages[];
+extern const int MIN_ZH2_HAN_PAGE;
+extern const int MAX_ZH2_HAN_PAGE;
+extern int zh2_han_to_single_weight[];
+extern const int ZH2_HAN_WEIGHT_PAIRS;
+extern uint16 *zh3_han_pages[];
+extern const int MIN_ZH3_HAN_PAGE;
+extern const int MAX_ZH3_HAN_PAGE;
+extern int zh3_han_to_single_weight[];
+extern const int ZH3_HAN_WEIGHT_PAIRS;
+#endif
--- a/deps/oblib/src/lib/charset/uca900_zh_tbls.cc
+++ b/deps/oblib/src/lib/charset/uca900_zh_tbls.cc
--- a/src/share/schema/ob_database_sql_service.cpp
+++ b/src/share/schema/ob_database_sql_service.cpp
@ -47,7 +47,10 @@ int ObDatabaseSqlService::insert_database(const ObDatabaseSchema &database_schem
    LOG_WARN("database schema is invalid", K(ret));
  } else if (OB_FAIL(sql::ObSQLUtils::is_charset_data_version_valid(database_schema.get_charset_type(),
                                                                    exec_tenant_id))) {
-    LOG_WARN("failed to check charset data version valid", K(ret));
+    LOG_WARN("failed to check charset data version valid", K(database_schema.get_charset_type()), K(ret));
+  } else if (OB_FAIL(sql::ObSQLUtils::is_collation_data_version_valid(database_schema.get_collation_type(),
+                                                                      exec_tenant_id))) {
+    LOG_WARN("failed to check collation data version valid", K(database_schema.get_collation_type()), K(ret));
  } else {
    int64_t affected_rows = 0;
    ObDMLSqlSplicer dml;
@ -125,7 +128,10 @@ int ObDatabaseSqlService::update_database(const ObDatabaseSchema &database_schem
    LOG_WARN("database scheam is invalid", K(ret));
  } else if (OB_FAIL(sql::ObSQLUtils::is_charset_data_version_valid(database_schema.get_charset_type(),
                                                                    exec_tenant_id))) {
-    LOG_WARN("failed to check charset data version valid", K(ret));
+    LOG_WARN("failed to check charset data version valid", K(database_schema.get_charset_type()), K(ret));
+  } else if (OB_FAIL(sql::ObSQLUtils::is_collation_data_version_valid(database_schema.get_collation_type(),
+                                                                      exec_tenant_id))) {
+    LOG_WARN("failed to check collation data version valid", K(database_schema.get_charset_type()), K(ret));
  } else {
    int64_t affected_rows = 0;
    ObDMLSqlSplicer dml;
--- a/src/share/schema/ob_table_sql_service.cpp
+++ b/src/share/schema/ob_table_sql_service.cpp
@ -2658,7 +2658,10 @@ int ObTableSqlService::gen_table_dml(
    LOG_WARN("check ddl allowd failed", K(ret), K(table));
  } else if (OB_FAIL(sql::ObSQLUtils::is_charset_data_version_valid(table.get_charset_type(),
                                                                    exec_tenant_id))) {
-    LOG_WARN("failed to check charset data version valid", K(ret));
+    LOG_WARN("failed to check charset data version valid", K(table.get_charset_type()), K(ret));
+  } else if (OB_FAIL(sql::ObSQLUtils::is_collation_data_version_valid(table.get_collation_type(),
+                                                                      exec_tenant_id))) {
+    LOG_WARN("failed to check collation data version valid", K(table.get_collation_type()), K(ret));
  } else if (OB_FAIL(GET_MIN_DATA_VERSION(table.get_tenant_id(), data_version))) {
    LOG_WARN("failed to get data version", K(ret));
  } else if (data_version < DATA_VERSION_4_1_0_0
@ -3897,7 +3900,10 @@ int ObTableSqlService::gen_column_dml(
    LOG_USER_ERROR(OB_NOT_SUPPORTED, "tenant data version is less than 4.2, skip index");
  } else if (OB_FAIL(sql::ObSQLUtils::is_charset_data_version_valid(column.get_charset_type(),
                                                                    exec_tenant_id))) {
-    LOG_WARN("failed to check charset data version valid", K(ret));
+    LOG_WARN("failed to check charset data version valid",  K(column.get_charset_type()), K(ret));
+  } else if (OB_FAIL(sql::ObSQLUtils::is_collation_data_version_valid(column.get_collation_type(),
+                                                                      exec_tenant_id))) {
+    LOG_WARN("failed to check collation data version valid",  K(column.get_collation_type()), K(ret));
  } else if (column.is_generated_column() ||
      column.is_identity_column() ||
      ob_is_string_type(column.get_data_type()) ||
--- a/src/share/schema/ob_tenant_sql_service.cpp
+++ b/src/share/schema/ob_tenant_sql_service.cpp
@ -78,7 +78,10 @@ int ObTenantSqlService::alter_tenant(
    LOG_WARN("invalid tenant schema", K(tenant_schema), K(ret));
  } else if (OB_FAIL(sql::ObSQLUtils::is_charset_data_version_valid(tenant_schema.get_charset_type(),
                                                                    tenant_schema.get_tenant_id()))) {
-    LOG_WARN("failed to check charset data version valid", K(ret));
+    LOG_WARN("failed to check charset data version valid", K(tenant_schema.get_charset_type()), K(ret));
+  } else if (OB_FAIL(sql::ObSQLUtils::is_collation_data_version_valid(tenant_schema.get_collation_type(),
+                                                                      tenant_schema.get_tenant_id()))) {
+    LOG_WARN("failed to check charset data version valid", K(tenant_schema.get_collation_type()), K(ret));
  } else if (OB_FAIL(replace_tenant(tenant_schema, op, sql_client, ddl_stmt_str))) {
    LOG_WARN("replace_tenant failed", K(tenant_schema), K(op), K(ret));
  }
--- a/src/sql/engine/cmd/ob_set_names_executor.cpp
+++ b/src/sql/engine/cmd/ob_set_names_executor.cpp
@ -81,7 +81,7 @@ int ObSetNamesExecutor::execute(ObExecContext &ctx, ObSetNamesStmt &stmt)
          if (CS_TYPE_INVALID == cs_coll_type || CS_TYPE_INVALID == coll_type) {
            ret = OB_ERR_UNEXPECTED;
            SQL_ENG_LOG(ERROR, "cs coll type or coll type is invalid", K(ret), K(cs_coll_type), K(coll_type));
-          } else if (OB_FAIL(sql::ObSQLUtils::is_charset_data_version_valid(common::ObCharset::charset_type_by_coll(collation_type),
+          } else if (OB_FAIL(sql::ObSQLUtils::is_charset_data_version_valid(ObCharset::charset_type(charset),
                                                                            session->get_effective_tenant_id()))) {
            SQL_EXE_LOG(WARN, "failed to check charset data version valid", K(ret));
          } else if (OB_FAIL(session->update_sys_variable(SYS_VAR_CHARACTER_SET_CLIENT,
@ -102,7 +102,7 @@ int ObSetNamesExecutor::execute(ObExecContext &ctx, ObSetNamesStmt &stmt)
          ObObj database_charset;
          ObObj database_collation;
          ObCollationType cs_coll_type = ObCharset::get_default_collation(ObCharset::charset_type(charset));
-          if (OB_FAIL(sql::ObSQLUtils::is_charset_data_version_valid(common::ObCharset::charset_type_by_coll(cs_coll_type),
+          if (OB_FAIL(sql::ObSQLUtils::is_charset_data_version_valid(ObCharset::charset_type(charset),
                                                                     session->get_effective_tenant_id()))) {
            SQL_EXE_LOG(WARN, "failed to check charset data version valid", K(ret));
          } else if (OB_FAIL(session->get_sys_variable(SYS_VAR_CHARACTER_SET_DATABASE,
--- a/src/sql/engine/cmd/ob_variable_set_executor.cpp
+++ b/src/sql/engine/cmd/ob_variable_set_executor.cpp
@ -566,6 +566,9 @@ int ObVariableSetExecutor::update_global_variables(ObExecContext &ctx,
      } else if (OB_FAIL(sql::ObSQLUtils::is_charset_data_version_valid(common::ObCharset::charset_type_by_coll(static_cast<ObCollationType>(coll_int64)),
                                                                        session->get_effective_tenant_id()))) {
        LOG_WARN("failed to check charset data version valid", K(ret));
+      } else if (OB_FAIL(sql::ObSQLUtils::is_collation_data_version_valid(static_cast<ObCollationType>(coll_int64),
+                                                                          session->get_effective_tenant_id()))) {
+        LOG_WARN("failed to check collation data version valid", K(ret));
      } else if (FALSE_IT(coll_str = ObString::make_string(ObCharset::collation_name(static_cast<ObCollationType>(coll_int64))))) {
        //do nothing
      } else if (OB_FAIL(ObBasicSysVar::get_charset_var_and_val_by_collation(
@ -591,6 +594,9 @@ int ObVariableSetExecutor::update_global_variables(ObExecContext &ctx,
      } else if (OB_FAIL(sql::ObSQLUtils::is_charset_data_version_valid(common::ObCharset::charset_type_by_coll(static_cast<ObCollationType>(coll_int64)),
                                                                        session->get_effective_tenant_id()))) {
        LOG_WARN("failed to check charset data version valid", K(ret));
+      } else if (OB_FAIL(sql::ObSQLUtils::is_collation_data_version_valid(static_cast<ObCollationType>(coll_int64),
+                                                                          session->get_effective_tenant_id()))) {
+        LOG_WARN("failed to check collation data version valid", K(ret));
      } else if (FALSE_IT(cs_str = ObString::make_string(ObCharset::charset_name(
                                   ObCharset::charset_type_by_coll(static_cast<ObCollationType>(coll_int64)))))) {
        //do nothing
--- a/src/sql/ob_sql_utils.cpp
+++ b/src/sql/ob_sql_utils.cpp
@ -600,6 +600,24 @@ int ObSQLUtils::is_charset_data_version_valid(ObCharsetType charset_type, const
  return ret;
 }

+int ObSQLUtils::is_collation_data_version_valid(ObCollationType collation_type, const int64_t tenant_id)
+{
+  int ret = OB_SUCCESS;
+#ifndef OB_BUILD_CLOSE_MODULES
+   uint64_t data_version = 0;
+  if (OB_FAIL(GET_MIN_DATA_VERSION(tenant_id, data_version))) {
+    SQL_LOG(WARN, "failed to GET_MIN_DATA_VERSION", K(ret));
+  } else if (data_version < DATA_VERSION_4_2_2_0 &&
+             (CS_TYPE_UTF16_UNICODE_CI == collation_type ||
+              CS_TYPE_UTF8MB4_UNICODE_CI == collation_type)) {
+    ret = OB_NOT_SUPPORTED;
+    SQL_LOG(WARN, "Unicode collation not supported when data_version < 4_2_2_0", K(collation_type), K(ret));
+    LOG_USER_ERROR(OB_NOT_SUPPORTED, "tenant data version is less than 4.2.2, unicode collation is");
+  }
+#endif
+  return ret;
+}
+
 // 参数raw_expr中如果出现函数addr_to_partition_id，
 // 那么得到的partition_id结果在后面无法映射到相应的addr
 int ObSQLUtils::calc_calculable_expr(ObSQLSessionInfo *session,
--- a/src/sql/ob_sql_utils.h
+++ b/src/sql/ob_sql_utils.h
@ -275,6 +275,7 @@ public:
    }
  }
  static int is_charset_data_version_valid(ObCharsetType charset_type, const int64_t tenant_id);
+  static int is_collation_data_version_valid(ObCollationType collation_type, const int64_t tenant_id);
  static int calc_calculable_expr(ObSQLSessionInfo *session,
                                  const ObRawExpr *expr,
                                  common::ObObj &result,
--- a/src/sql/parser/CMakeLists.txt
+++ b/src/sql/parser/CMakeLists.txt
@ -1,26 +1,9 @@

 # charset objects used for proxy_parser

-if (OB_BUILD_OPENSOURCE)
+
+
 set(ob_sql_parser_charset_object_list
-  ob_ctype_bin_os.cc
-  ob_ctype_gb18030_os.cc
-  ob_ctype_gbk_os.cc
-  ob_ctype_latin1_os.cc
-  ob_ctype_mb_os.cc
-  ob_ctype_simple_os.cc
-  ob_ctype_os.cc
-  ob_ctype_utf16_os.cc
-  ob_ctype_utf8_os.cc
-  ob_dtoa_os.cc
-)
-endif()
-
-list(TRANSFORM ob_sql_parser_charset_object_list
-  PREPEND ${PROJECT_SOURCE_DIR}/deps/oblib/src/lib/charset/)
-
-if(OB_BUILD_FULL_CHARSET)
-  set(ob_sql_parser_full_charset_object_list
    ob_ctype_bin.cc
    ob_ctype.cc
    ob_ctype_gbk.cc
@ -36,12 +19,10 @@ if(OB_BUILD_FULL_CHARSET)
    uca900_zh_tbls.cc
    uca900_zh2_tbls.cc
    uca900_zh3_tbls.cc
-  )
-endif()
-
-list(TRANSFORM ob_sql_parser_full_charset_object_list
-  PREPEND ${PROJECT_SOURCE_DIR}/close_modules/charset/deps/oblib/src/lib/charset/)
+    )

+list(TRANSFORM ob_sql_parser_charset_object_list
+     PREPEND ${PROJECT_SOURCE_DIR}/deps/oblib/src/lib/charset/)
 # hash objects used for proxy parser
 set(ob_sql_parser_hash_object_list
  murmur_hash.h
@ -124,19 +105,12 @@ set(ob_extra_sql_parser_object_list
 )

 # ob_sql_proxy_parser_objects is the static library for proxy, it does not link against observer
-if (OB_BUILD_FULL_CHARSET)
-  add_library(ob_sql_proxy_parser_objects OBJECT
-              ${ob_inner_sql_parser_object_list}
-              ${ob_sql_parser_hash_object_list}
-              ${ob_sql_parser_full_charset_object_list}
-              )
-else()
-  add_library(ob_sql_proxy_parser_objects OBJECT
-              ${ob_inner_sql_parser_object_list}
-              ${ob_sql_parser_hash_object_list}
-              ${ob_sql_parser_charset_object_list}
-              )
-endif()
+add_library(ob_sql_proxy_parser_objects OBJECT
+            ${ob_inner_sql_parser_object_list}
+            ${ob_sql_parser_hash_object_list}
+            ${ob_sql_parser_charset_object_list}
+            )
+

 # ob_sql_server_parser_object is the static library for observer
 add_library(ob_sql_server_parser_objects OBJECT
--- a/src/sql/resolver/ddl/ob_alter_table_resolver.cpp
+++ b/src/sql/resolver/ddl/ob_alter_table_resolver.cpp
@ -4454,6 +4454,9 @@ int ObAlterTableResolver::resolve_convert_to_character(const ParseNode &node)
    } else if (OB_FAIL(sql::ObSQLUtils::is_charset_data_version_valid(common::ObCharset::charset_type_by_coll(collation_type),
                                                                      session_info_->get_effective_tenant_id()))) {
      LOG_WARN("failed to check charset data version valid", K(ret));
+    } else if (OB_FAIL(sql::ObSQLUtils::is_collation_data_version_valid(collation_type,
+                                                                        session_info_->get_effective_tenant_id()))) {
+      LOG_WARN("failed to check collation data version valid", K(ret));
    } else {
      collation_type_ = collation_type;
    }
--- a/src/sql/resolver/ddl/ob_database_resolver.h
+++ b/src/sql/resolver/ddl/ob_database_resolver.h
@ -160,6 +160,9 @@ int ObDatabaseResolver<T>::resolve_database_option(T *stmt, ParseNode *node, ObS
          } else if (OB_FAIL(sql::ObSQLUtils::is_charset_data_version_valid(charset_type,
                                                                            session_info->get_effective_tenant_id()))) {
            OB_LOG(WARN, "failed to check charset data version valid", K(ret));
+          } else if (OB_FAIL(sql::ObSQLUtils::is_collation_data_version_valid(collation_type,
+                                                                              session_info->get_effective_tenant_id()))) {
+            OB_LOG(WARN, "failed to check collation data version valid", K(ret));
          } else if (OB_UNLIKELY(collation_already_set_
                              && stmt->get_charset_type() != charset_type)) {
            // mysql执行下面这条sql时会报错，为了行为与mysql一致，resolve时即检查collation/charset不一致的问题
@ -181,6 +184,12 @@ int ObDatabaseResolver<T>::resolve_database_option(T *stmt, ParseNode *node, ObS
            ret = common::OB_ERR_UNEXPECTED;
            SQL_RESV_LOG(WARN, "all valid collation types should have corresponding charset type",
                            K(ret), K(charset_type), K(collation_type));
+          } else if (OB_FAIL(sql::ObSQLUtils::is_charset_data_version_valid(charset_type,
+                                                                            session_info->get_effective_tenant_id()))) {
+            OB_LOG(WARN, "failed to check charset data version valid", K(ret));
+          } else if (OB_FAIL(sql::ObSQLUtils::is_collation_data_version_valid(collation_type,
+                                                                              session_info->get_effective_tenant_id()))) {
+            OB_LOG(WARN, "failed to check collation data version valid", K(ret));
          } else if (OB_UNLIKELY(collation_already_set_
                              && stmt->get_charset_type() != charset_type)) {
            ret = OB_ERR_COLLATION_MISMATCH;
--- a/src/sql/resolver/ddl/ob_ddl_resolver.cpp
+++ b/src/sql/resolver/ddl/ob_ddl_resolver.cpp
@ -1470,6 +1470,9 @@ int ObDDLResolver::resolve_table_option(const ParseNode *option_node, const bool
            } else if (OB_FAIL(sql::ObSQLUtils::is_charset_data_version_valid(common::ObCharset::charset_type_by_coll(collation_type),
                                                                              session_info_->get_effective_tenant_id()))) {
              SQL_RESV_LOG(WARN, "failed to check charset data version valid", K(ret));
+            } else if (OB_FAIL(sql::ObSQLUtils::is_collation_data_version_valid(collation_type,
+                                                                                session_info_->get_effective_tenant_id()))) {
+              SQL_RESV_LOG(WARN, "failed to check collation data version valid", K(ret));
            } else {
              collation_type_ = collation_type;
              if (stmt::T_ALTER_TABLE == stmt_->get_stmt_type()) {
--- a/src/sql/resolver/ddl/ob_tenant_resolver.h
+++ b/src/sql/resolver/ddl/ob_tenant_resolver.h
@ -153,6 +153,9 @@ int ObTenantResolver<T>::resolve_tenant_option(T *stmt, ParseNode *node,
        } else if (OB_FAIL(sql::ObSQLUtils::is_charset_data_version_valid(common::ObCharset::charset_type_by_coll(collation_type),
                                                                          session_info->get_effective_tenant_id()))) {
          LOG_WARN("failed to check charset data version valid", K(ret));
+        } else if (OB_FAIL(sql::ObSQLUtils::is_collation_data_version_valid(collation_type,
+                                                                            session_info->get_effective_tenant_id()))) {
+          LOG_WARN("failed to check collation data version valid", K(ret));
        } else {
          collation_type_ = collation_type;
          if (stmt->get_stmt_type() == stmt::T_MODIFY_TENANT) {
--- a/src/sql/resolver/expr/ob_raw_expr_resolver_impl.cpp
+++ b/src/sql/resolver/expr/ob_raw_expr_resolver_impl.cpp
@ -4877,6 +4877,9 @@ int ObRawExprResolverImpl::process_collation_node(const ParseNode *node, ObRawEx
    } else if (OB_FAIL(sql::ObSQLUtils::is_charset_data_version_valid(common::ObCharset::charset_type_by_coll(collation_type),
                                                                      ctx_.session_info_->get_effective_tenant_id()))) {
      LOG_WARN("failed to check charset data version valid", K(ret));
+    } else if (OB_FAIL(sql::ObSQLUtils::is_collation_data_version_valid(collation_type,
+                                                                        ctx_.session_info_->get_effective_tenant_id()))) {
+      LOG_WARN("failed to check collation data version valid", K(ret));
    } else if (OB_FAIL(ctx_.expr_factory_.create_raw_expr(T_INT, c_expr))) {
      LOG_WARN("fail to create raw expr", K(ret));
    } else if (OB_ISNULL(c_expr)) {
--- a/tools/deploy/mysql_test/r/mysql/special_hook.result
+++ b/tools/deploy/mysql_test/r/mysql/special_hook.result
@ -13,6 +13,8 @@ gbk_chinese_ci	gbk	28	Yes	Yes	1
 gbk_bin	gbk	87		Yes	1
 utf16_general_ci	utf16	54	Yes	Yes	1
 utf16_bin	utf16	55		Yes	1
+utf8mb4_unicode_ci	utf8mb4	224		Yes	1
+utf16_unicode_ci	utf16	101		Yes	1
 gb18030_chinese_ci	gb18030	248	Yes	Yes	1
 gb18030_bin	gb18030	249		Yes	1
 latin1_swedish_ci	latin1	8	Yes	Yes	1
--- a/tools/deploy/mysql_test/r/mysql/special_stmt.result
+++ b/tools/deploy/mysql_test/r/mysql/special_stmt.result
@ -7,6 +7,8 @@ gbk_chinese_ci	gbk	28	Yes	Yes	1
 gbk_bin	gbk	87		Yes	1
 utf16_general_ci	utf16	54	Yes	Yes	1
 utf16_bin	utf16	55		Yes	1
+utf8mb4_unicode_ci	utf8mb4	224		Yes	1
+utf16_unicode_ci	utf16	101		Yes	1
 gb18030_chinese_ci	gb18030	248	Yes	Yes	1
 gb18030_bin	gb18030	249		Yes	1
 latin1_swedish_ci	latin1	8	Yes	Yes	1
@ -27,6 +29,8 @@ gbk_chinese_ci	gbk	28	Yes	Yes	1
 gbk_bin	gbk	87		Yes	1
 utf16_general_ci	utf16	54	Yes	Yes	1
 utf16_bin	utf16	55		Yes	1
+utf8mb4_unicode_ci	utf8mb4	224		Yes	1
+utf16_unicode_ci	utf16	101		Yes	1
 gb18030_chinese_ci	gb18030	248	Yes	Yes	1
 gb18030_bin	gb18030	249		Yes	1
 latin1_swedish_ci	latin1	8	Yes	Yes	1
--- a/tools/deploy/mysql_test/test_suite/inner_table/r/mysql/collations.result
+++ b/tools/deploy/mysql_test/test_suite/inner_table/r/mysql/collations.result
@ -8,6 +8,8 @@ gbk_chinese_ci	gbk	28	Yes	Yes	1
 gbk_bin	gbk	87		Yes	1
 utf16_general_ci	utf16	54	Yes	Yes	1
 utf16_bin	utf16	55		Yes	1
+utf8mb4_unicode_ci	utf8mb4	224		Yes	1
+utf16_unicode_ci	utf16	101		Yes	1
 gb18030_chinese_ci	gb18030	248	Yes	Yes	1
 gb18030_bin	gb18030	249		Yes	1
 latin1_swedish_ci	latin1	8	Yes	Yes	1
@ -28,6 +30,8 @@ gbk_chinese_ci	gbk	28	Yes	Yes	1
 gbk_bin	gbk	87		Yes	1
 utf16_general_ci	utf16	54	Yes	Yes	1
 utf16_bin	utf16	55		Yes	1
+utf8mb4_unicode_ci	utf8mb4	224		Yes	1
+utf16_unicode_ci	utf16	101		Yes	1
 gb18030_chinese_ci	gb18030	248	Yes	Yes	1
 gb18030_bin	gb18030	249		Yes	1
 latin1_swedish_ci	latin1	8	Yes	Yes	1
@ -43,6 +47,7 @@ select * from collations where collation_name like '%utf8%';
 COLLATION_NAME	CHARACTER_SET_NAME	ID	IS_DEFAULT	IS_COMPILED	SORTLEN
 utf8mb4_general_ci	utf8mb4	45	Yes	Yes	1
 utf8mb4_bin	utf8mb4	46		Yes	1
+utf8mb4_unicode_ci	utf8mb4	224		Yes	1
 show create table collations;
 View	Create View	character_set_client	collation_connection
 COLLATIONS	CREATE VIEW `COLLATIONS` AS select collation as COLLATION_NAME, charset as CHARACTER_SET_NAME, id as ID, `is_default` as IS_DEFAULT, is_compiled as IS_COMPILED, sortlen as SORTLEN from oceanbase.__tenant_virtual_collation 	utf8mb4	utf8mb4_general_ci