patch 4.0

2022-10-24 10:34:53 +08:00
parent 4ad6e00ec3
commit 93a1074b0c
10533 changed files with 2588271 additions and 2299373 deletions
--- a/deps/oblib/unittest/lib/charset/test_charset.cpp
+++ b/deps/oblib/unittest/lib/charset/test_charset.cpp
@ -19,53 +19,53 @@
 #include "lib/string/ob_string.h"
 #include "lib/utility/ob_print_utils.h"
 #include "gtest/gtest.h"
-#include <iostream>
-#include <fstream>
+#include "unicode_map.h"
+#include "common/data_buffer.h"
+#include "lib/oblog/ob_log_module.h"
+#define USING_LOG_PREFIX SQL

 using namespace oceanbase::common;

-#define CUR_RESULT_FILE_SUFFIX ".record"
-#define STD_RESULT_FILE_SUFFIX ".result"
-
-class TestCharset : public ::testing::Test {
+class TestCharset: public ::testing::Test
+{
 public:
  TestCharset();
  virtual ~TestCharset();
  virtual void SetUp();
  virtual void TearDown();
-  template <typename func>
-  void for_each_utf8(func handle);
-
 protected:
-  void gen_random_unicode_string(const int len, char* res, int& real_len);
+  void gen_random_unicode_string(const int len, char *res, int &real_len);
  int random_range(const int low, const int high);
 };

 TestCharset::TestCharset()
-{}
+{
+}

 TestCharset::~TestCharset()
-{}
+{
+}

 void TestCharset::SetUp()
 {
-  srand((unsigned)time(NULL));
+  srand((unsigned)time(NULL ));
 }

 void TestCharset::TearDown()
-{}
+{
+}

 int TestCharset::random_range(const int low, const int high)
 {
  return std::rand() % (high - low) + low;
 }

-void TestCharset::gen_random_unicode_string(const int len, char* res, int& real_len)
+void TestCharset::gen_random_unicode_string(const int len, char *res, int &real_len)
 {
  int i = 0;
  int unicode_point = 0;
  std::wstring_convert<std::codecvt_utf8<char32_t>, char32_t> converter;
-  for (i = 0; i < len;) {
+  for (i = 0; i < len; ) {
    const int bytes = random_range(1, 7);
    if (bytes < 4) {
      unicode_point = random_range(0, 127);
@ -75,8 +75,8 @@ void TestCharset::gen_random_unicode_string(const int len, char* res, int& real_
      unicode_point = random_range(0XFFFF, 0X10FFFF);
    }
    std::string utf_str = converter.to_bytes(unicode_point);
-    // fprintf(stdout, "code_point=%d\n", unicode_point);
-    // fprintf(stdout, "utf8_str=%s\n", utf_str.c_str());
+    //fprintf(stdout, "code_point=%d\n", unicode_point);
+    //fprintf(stdout, "utf8_str=%s\n", utf_str.c_str());
    for (int j = 0; j < utf_str.size(); ++j) {
      res[i++] = utf_str[j];
    }
@ -157,8 +157,8 @@ TEST_F(TestCharset, sortkey)
  ASSERT_FALSE(is_valid_unicode);

  // The parameter of sortkey cannot be NULL
-  // char *p = NULL;
-  // size1 = ObCharset::sortkey(CS_TYPE_UTF8MB4_GENERAL_CI, true, p, 0, aa1, 10);
+  //char *p = NULL;
+  //size1 = ObCharset::sortkey(CS_TYPE_UTF8MB4_GENERAL_CI, true, p, 0, aa1, 10);
 }

 TEST_F(TestCharset, casedn)
@ -175,7 +175,7 @@ TEST_F(TestCharset, casedn)
  y1.assign_ptr(a1, 14);
  y2.assign_ptr(a2, 14);
  y3.assign_ptr(a3, 14);
-  fprintf(stdout, "ret:%p, %d\n", y1.ptr(), y1.length());
+  fprintf(stdout, "ret:%p, %d\n", y1.ptr(), y1.length() );
  size_t size1 = ObCharset::casedn(CS_TYPE_UTF8MB4_GENERAL_CI, y1);
  EXPECT_TRUE(y1 == y3);
  size_t size2 = ObCharset::casedn(CS_TYPE_UTF8MB4_GENERAL_CI, y2);
@ -189,10 +189,10 @@ TEST_F(TestCharset, casedn)

 TEST_F(TestCharset, case_insensitive_equal)
 {
-  ObString y1 = "Variable_name";
-  ObString y2 = "variable_name";
-  ObString y3 = "variable_name1";
-  ObString y4 = "variable_name1";
+  ObString y1= "Variable_name";
+  ObString y2= "variable_name";
+  ObString y3= "variable_name1";
+  ObString y4= "variable_name1";
  bool yy = ObCharset::case_insensitive_equal(y1, y2, CS_TYPE_UTF8MB4_GENERAL_CI);
  ASSERT_TRUE(yy);
  yy = ObCharset::case_insensitive_equal(y2, y3, CS_TYPE_UTF8MB4_GENERAL_CI);
@ -205,10 +205,10 @@ TEST_F(TestCharset, hash_sort)
 {
  ObString s;
  uint64_t ret = ObCharset::hash(CS_TYPE_UTF8MB4_GENERAL_CI, s.ptr(), s.length(), 0);
-  const char* a = "abd";
-  const char* b = "aBD";
-  uint64_t ret1 = ObCharset::hash(CS_TYPE_UTF8MB4_GENERAL_CI, a, 3, 0, NULL);
-  uint64_t ret2 = ObCharset::hash(CS_TYPE_UTF8MB4_GENERAL_CI, b, 3, 0, NULL);
+  const char *a = "abd";
+  const char *b = "aBD";
+  uint64_t ret1 = ObCharset::hash(CS_TYPE_UTF8MB4_GENERAL_CI, a, 3, 0);
+  uint64_t ret2 = ObCharset::hash(CS_TYPE_UTF8MB4_GENERAL_CI, b, 3, 0);
  fprintf(stdout, "ret:%lu, ret1:%lu, ret2:%lu\n", ret, ret1, ret2);
  uint64_t ret3 = ObCharset::hash(CS_TYPE_UTF8MB4_GENERAL_CI, ObString::make_string(b));
  ASSERT_EQ(ret2, ret3);
@ -216,10 +216,10 @@ TEST_F(TestCharset, hash_sort)

 TEST_F(TestCharset, case_mode_equal)
 {
-  ObString y1 = "Variable_name";
-  ObString y2 = "variable_name";
-  ObString y3 = "variable_name1";
-  ObString y4 = "variable_name1";
+  ObString y1= "Variable_name";
+  ObString y2= "variable_name";
+  ObString y3= "variable_name1";
+  ObString y4= "variable_name1";
  bool is_equal = false;
  is_equal = ObCharset::case_mode_equal(OB_ORIGIN_AND_SENSITIVE, y1, y2);
  ASSERT_FALSE(is_equal);
@ -250,8 +250,8 @@ TEST_F(TestCharset, case_mode_equal)
 TEST_F(TestCharset, well_formed_length)
 {
  int ret = OB_SUCCESS;
-  const char* str = "\0123";
-  ObCollationType cs_type = CS_TYPE_UTF8MB4_GENERAL_CI;
+  const char *str = "\0123";
+  ObCollationType cs_type =  CS_TYPE_UTF8MB4_GENERAL_CI;
  int64_t well_formed_length = 0;
  int64_t str_len = 1;

@ -280,7 +280,7 @@ TEST_F(TestCharset, test_max_byte_char_pos)
    std::cout << "real_len" << real_len << std::endl;
    int64_t left_bytes = real_len;
    const int64_t block_size = 16000;
-    char* pos = buf;
+    char *pos = buf;
    while (left_bytes > 0) {
      int64_t well_formed_len = 0;
      int32_t well_formed_error = 0;
@ -312,7 +312,7 @@ TEST_F(TestCharset, test_ascii_list_for_all_charset)

  ASSERT_EQ(OB_SUCCESS, ObCharsetUtils::init(allocator));

-  std::cout << "ascii";
+  std::cout<< "ascii";
  for (int cs_i = CHARSET_INVALID; cs_i < CHARSET_MAX; ++cs_i) {
    auto charset_type = static_cast<ObCharsetType>(cs_i);
    if (!ObCharset::is_valid_charset(charset_type))
@ -324,7 +324,7 @@ TEST_F(TestCharset, test_ascii_list_for_all_charset)
  std::cout << std::endl;

  for (int ascii_wc = 0; ascii_wc <= INT8_MAX; ascii_wc++) {
-    std::cout << ascii_wc;
+    std::cout<< ascii_wc;
    for (int cs_i = CHARSET_INVALID; cs_i < CHARSET_MAX; ++cs_i) {
      auto charset_type = static_cast<ObCharsetType>(cs_i);
      if (!ObCharset::is_valid_charset(charset_type))
@ -333,479 +333,174 @@ TEST_F(TestCharset, test_ascii_list_for_all_charset)
      ASSERT_TRUE(ObCharset::is_valid_collation(cs_type));
      int64_t result_len = 0;
      ObString str = ObCharsetUtils::get_const_str(cs_type, ascii_wc);
-      ASSERT_EQ(OB_SUCCESS, hex_print(str.ptr(), str.length(), buf, buf_len, result_len));
+      ASSERT_EQ (OB_SUCCESS, hex_print(str.ptr(), str.length(), buf, buf_len, result_len));
      buf[result_len] = '\0';
-      std::cout << "\t" << buf;
+      std::cout <<"\t" << buf;
    }

    std::cout << std::endl;
  }
+
 }

-int unicode_to_utf8(ob_wc_t c, unsigned char* utf8string)
+TEST_F(TestCharset, test_find_gb18030_case_prob)
 {
-  if (c <= 0x7F) {
-    utf8string[0] = c;
-    return 1;
-  } else if (c <= 0x7FF) {
-    utf8string[0] = 0xC0 | ((c >> 6) & 0x1F);
-    utf8string[1] = 0x80 | (c & 0x3F);
-    return 2;
-  } else if (c <= 0xFFFF) {
-    utf8string[0] = 0xE0 | ((c >> 12) & 0x0F);
-    utf8string[1] = 0x80 | ((c >> 6) & 0x3F);
-    utf8string[2] = 0x80 | (c & 0x3F);
-    return 3;
-  } else {
-    utf8string[0] = 0xF0 | ((c >> 18) & 0x07);
-    utf8string[1] = 0x80 | ((c >> 12) & 0x3F);
-    utf8string[2] = 0x80 | ((c >> 6) & 0x3F);
-    utf8string[3] = 0x80 | (c & 0x3F);
-    return 4;
-  }
-
-  return 0;
-}
-
-template <typename func>
-void TestCharset::for_each_utf8(func handle)
-{
-  char buf[4];
-  ObString str(4, 0, buf);
-
-  for (ob_wc_t wchar = 0; wchar < 0x110000; wchar++) {
-    int len = unicode_to_utf8(wchar, (unsigned char*)buf);
-    ASSERT_TRUE(0 != len);
-    str.set_length(len);
-    handle(str, wchar);
-  }
-}
-
-/*
-template<typename func>
-void TestCharset::for_each_binary(func handle) {
-  char buf[3];
-  ObString str(3, 0, buf);
-
-  //one byte
-  for (unsigned char c = 0; c < 0xFF; c++) {
-    str.set_length(0);
-    str.write((char*)(&c), 1);
-    handle(str);
-  }
-  //two bytes
-  for (unsigned char c1 = 0; c1 < 0xFF; c1++) {
-    for (unsigned char c2 = 0; c2 < 0xFF; c2++) {
-      str.set_length(0);
-      str.write((char*)(&c1), 1);
-      str.write((char*)(&c2), 1);
-      handle(str);
-    }
-  }
-  //three bytes
-  for (unsigned char c1 = 0; c1 < 0xFF; c1++) {
-    for (unsigned char c2 = 0; c2 < 0xFF; c2++) {
-      for (unsigned char c3 = 0; c3 < 0xFF; c3++) {
-        str.set_length(0);
-        str.write((char*)(&c1), 1);
-        str.write((char*)(&c2), 1);
-        str.write((char*)(&c3), 1);
-        handle(str);
+  const int buf_len = 20;
+  char buf1[buf_len];
+  char buf2[buf_len];
+  char hex_buf1[buf_len];
+  char hex_buf2[buf_len];
+  int length1 = 0, length2 = 0;
+  ObCollationType cs_type = CS_TYPE_GB18030_BIN;
+  for (int i = 0; i < 256; i++) {
+    const ObUnicaseInfoChar *info = ObCharset::get_charset(cs_type)->caseinfo->page[i];
+    if (NULL != info) {
+      for (int j = 0; j < 256; j++) {
+        ASSERT_TRUE(OB_SUCCESS == ObCharset::wc_mb(cs_type, info[j].tolower, buf1, buf_len, length1));
+        ASSERT_TRUE(OB_SUCCESS == ObCharset::wc_mb(cs_type, info[j].toupper, buf2, buf_len, length2));
+        buf1[length1] = '\0';
+        buf2[length2] = '\0';
+        if (length1 != length2) {
+          ASSERT_TRUE(OB_SUCCESS == to_hex_cstr(buf1, length1, hex_buf1, buf_len));
+          ASSERT_TRUE(OB_SUCCESS == to_hex_cstr(buf2, length2, hex_buf2, buf_len));
+          std::cout<< info[j].tolower <<"," << info[j].toupper << "," << hex_buf1 << "," << hex_buf2 << std::endl;
+        }
      }
    }
  }
 }
+
+/*
+TEST_F(TestCharset, test_gbk_pua)
+{
+  
+  int64_t size = sizeof(gbk_uni_map) / sizeof(UniCodeMap);
+  ASSERT_EQ(size, 23940);
+  for (int i = 0; i < size; i++) {
+    ASSERT_TRUE(func_gbk_uni_onechar(gbk_uni_map[i].encoding) == gbk_uni_map[i].unicode) << "i=" << i;
+    ASSERT_TRUE(func_uni_gbk_onechar(gbk_uni_map[i].unicode) == gbk_uni_map[i].encoding) << "i=" << i;
+  }
+}
 */

-struct TestReusltFileGuard {
-  TestReusltFileGuard(const char* test_name) : fp_(nullptr)
-  {
-    std::string file_path;
-    file_path.append("./");
-    file_path.append(test_name);
-    file_path.append(CUR_RESULT_FILE_SUFFIX);
-    fp_ = fopen(file_path.c_str(), "w");
-  }
-  ~TestReusltFileGuard()
-  {
-    if (nullptr != fp_) {
-      fclose(fp_);
-      fp_ = nullptr;
-    }
-  }
-  FILE* get_fp()
-  {
-    return fp_;
-  }
-  FILE* fp_;
-};
-
-void compare_result(const char* test_name)
+TEST_F(TestCharset, test_zh_0900_as_cs)
 {
-  std::string cur_res_file_path, std_res_file_path;
-  cur_res_file_path.append("./");
-  cur_res_file_path.append(test_name);
-  cur_res_file_path.append(CUR_RESULT_FILE_SUFFIX);
-  std_res_file_path.append("./");
-  std_res_file_path.append(test_name);
-  std_res_file_path.append(STD_RESULT_FILE_SUFFIX);

-  std::ifstream cur_res(cur_res_file_path, std::ios::binary);
-  ASSERT_TRUE(cur_res.is_open());
-  std::ifstream std_res(std_res_file_path, std::ios::binary);
-  ASSERT_TRUE(std_res.is_open());

-  std::string cur_line;
-  std::string std_line;
-  int line_no = 0;
-  while (std::getline(std_res, std_line)) {
-    line_no++;
-    ASSERT_TRUE(std::getline(cur_res, cur_line));
-    if (0 != std_line.compare(cur_line)) {
-      fprintf(stdout,
-          "not consistent result detected at line %d:\n"
-          "cur_line:%s\n"
-          "std_line:%s\n",
-          line_no,
-          cur_line.c_str(),
-          std_line.c_str());
-      ASSERT_TRUE(0);
-    }
+  ObString str;
+  char sort_key[2048];
+  bool is_valid = false;
+
+  auto print_sort_key = [&](ObCollationType coll_type) -> void {
+      auto size = ObCharset::sortkey(coll_type, str.ptr(), str.length(), sort_key,
+                                     sizeof(sort_key), is_valid);
+      fprintf(stdout, "src=");
+      for (int i = 0; i < str.length(); i++) {
+        fprintf(stdout, "%02X", (unsigned char)str[i]);
+      }
+      fprintf(stdout, "\n");
+      fprintf(stdout, "sort_key=");
+      for (int i = 0; i < size; i++) {
+        fprintf(stdout, "%02X", (unsigned char)sort_key[i]);
+      }
+      fprintf(stdout, "\n");
+  };
+
+  char buffer[2048];
+  ObDataBuffer data_buffer(buffer, sizeof(buffer));
+
+  auto convert_string = [&data_buffer](const char* input, ObCollationType dest_type) -> ObString {
+    ObString output;
+    ObCharset::charset_convert(data_buffer, ObString(input), CS_TYPE_UTF8MB4_BIN, dest_type, output);
+    return output;
+  };
+
+  ObCollationType coll_types[] = {CS_TYPE_UTF8MB4_ZH_0900_AS_CS, CS_TYPE_GBK_ZH_0900_AS_CS,
+                                 CS_TYPE_GB18030_ZH_0900_AS_CS, CS_TYPE_UTF16_ZH_0900_AS_CS};
+
+  for (int i = 0; i < array_elements(coll_types); i++) {
+    ObCollationType coll_type = coll_types[i];
+    fprintf(stdout, "## TEST_COLL=%d\n", coll_type);
+
+    ASSERT_TRUE(ObCharset::strcmp(coll_type, convert_string("坝", coll_type), convert_string("弝", coll_type)) < 0);
+    ASSERT_TRUE(ObCharset::strcmp(coll_type, convert_string("弝", coll_type), convert_string("爸", coll_type)) < 0);
+    ASSERT_TRUE(ObCharset::strcmp(coll_type, convert_string("爸", coll_type), convert_string("跁", coll_type)) < 0);
+    ASSERT_TRUE(ObCharset::strcmp(coll_type, convert_string("韩", coll_type), convert_string("美", coll_type)) < 0);
+    ASSERT_TRUE(ObCharset::strcmp(coll_type, convert_string("美", coll_type), convert_string("日", coll_type)) < 0);
+
+    str = convert_string("我们今天", coll_type);
+    print_sort_key(coll_types[i]);
+    str = "\xFF\xFF";
+    print_sort_key(coll_types[i]);
+    str = "\xef\xbf\xbd\xef\xbf\xbd";
+    print_sort_key(coll_types[i]);
+    str = convert_string("中", coll_type);
+    print_sort_key(coll_types[i]);
  }
 }

-TEST_F(TestCharset, test_ismbchar_utf8)
+TEST_F(TestCharset, test_zh2_0900_as_cs)
 {
-  const char* test_name = ::testing::UnitTest::GetInstance()->current_test_info()->name();
-  ObString test_name_pure(test_name);
-  test_name_pure.split_on('_');
-  do {
-    TestReusltFileGuard file_guard(test_name);
-    ASSERT_TRUE(NULL != file_guard.get_fp());

-    auto handle = [&file_guard](const ObString& str, ob_wc_t wchar) -> void {
-      fprintf(file_guard.get_fp(),
-          "U+%04lX\t"
-          "%.*s\t"
-          "%d\t"
-          "%d\n",
-          wchar,
-          str.length(),
-          str.ptr(),
-          ObCharset::is_mbchar(CS_TYPE_UTF8MB4_BIN, str.ptr(), str.ptr() + str.length()),
-          ObCharset::is_mbchar(CS_TYPE_UTF8MB4_GENERAL_CI, str.ptr(), str.ptr() + str.length()));
-    };
-    fprintf(file_guard.get_fp(),
-        "wchar\t"
-        "str\t"
-        "%.*s(UTF8MB4_BIN)\t"
-        "%.*s(UTF8MB4_GENERAL_CI)\n",
-        test_name_pure.length(),
-        test_name_pure.ptr(),
-        test_name_pure.length(),
-        test_name_pure.ptr());
-    TestCharset::for_each_utf8(handle);
-  } while (0);

-  compare_result(test_name);
+  ObString str;
+  char sort_key[2048];
+  bool is_valid = false;
+
+  auto print_sort_key = [&](ObCollationType coll_type) -> void {
+      auto size = ObCharset::sortkey(coll_type, str.ptr(), str.length(), sort_key,
+                                     sizeof(sort_key), is_valid);
+      fprintf(stdout, "src=");
+      for (int i = 0; i < str.length(); i++) {
+        fprintf(stdout, "%02X", (unsigned char)str[i]);
+      }
+      fprintf(stdout, "\n");
+      fprintf(stdout, "sort_key=");
+      for (int i = 0; i < size; i++) {
+        fprintf(stdout, "%02X", (unsigned char)sort_key[i]);
+      }
+      fprintf(stdout, "\n");
+  };
+
+  char buffer[2048];
+  ObDataBuffer data_buffer(buffer, sizeof(buffer));
+
+  auto convert_string = [&data_buffer](const char* input, ObCollationType dest_type) -> ObString {
+    ObString output;
+    ObCharset::charset_convert(data_buffer, ObString(input), CS_TYPE_UTF8MB4_BIN, dest_type, output);
+    return output;
+  };
+
+  ObCollationType coll_types[] = {CS_TYPE_UTF8MB4_ZH2_0900_AS_CS};
+
+  for (int i = 0; i < array_elements(coll_types); i++) {
+    ObCollationType coll_type = coll_types[i];
+    fprintf(stdout, "## TEST_COLL=%d\n", coll_type);
+
+    ASSERT_TRUE(ObCharset::strcmp(coll_type, convert_string("一", coll_type), convert_string("二", coll_type)) < 0);
+
+    str = convert_string("一丁丂七丄丅丆", coll_type);
+    print_sort_key(coll_types[i]);
+
+
+    /*
+    str = convert_string("我们今天", coll_type);
+    print_sort_key(coll_types[i]);
+    str = "\xFF\xFF";
+    print_sort_key(coll_types[i]);
+    str = "\xef\xbf\xbd\xef\xbf\xbd";
+    print_sort_key(coll_types[i]);
+    str = convert_string("中", coll_type);
+    print_sort_key(coll_types[i]);
+    */
+  }
 }

-TEST_F(TestCharset, test_strlen_char_utf8)
+int main(int argc, char **argv)
 {
-  const char* test_name = ::testing::UnitTest::GetInstance()->current_test_info()->name();
-  ObString test_name_pure(test_name);
-  test_name_pure.split_on('_');
-  do {
-    TestReusltFileGuard file_guard(test_name);
-    ASSERT_TRUE(NULL != file_guard.get_fp());
-
-    auto handle = [&file_guard](const ObString& str, ob_wc_t wchar) -> void {
-      fprintf(file_guard.get_fp(),
-          "U+%04lX\t"
-          "%.*s\t"
-          "%lu\t"
-          "%lu\n",
-          wchar,
-          str.length(),
-          str.ptr(),
-
-          ObCharset::strlen_char(CS_TYPE_UTF8MB4_BIN, str.ptr(), str.length()),
-          ObCharset::strlen_char(CS_TYPE_UTF8MB4_GENERAL_CI, str.ptr(), str.length()));
-    };
-    fprintf(file_guard.get_fp(),
-        "wchar\t"
-        "str\t"
-        "%.*s(UTF8MB4_BIN)\t"
-        "%.*s(UTF8MB4_GENERAL_CI)\n",
-        test_name_pure.length(),
-        test_name_pure.ptr(),
-        test_name_pure.length(),
-        test_name_pure.ptr());
-    TestCharset::for_each_utf8(handle);
-  } while (0);
-
-  compare_result(test_name);
-}
-
-TEST_F(TestCharset, test_mb_wc_utf8)
-{
-  const char* test_name = ::testing::UnitTest::GetInstance()->current_test_info()->name();
-  ObString test_name_pure(test_name);
-  test_name_pure.split_on('_');
-  do {
-    TestReusltFileGuard file_guard(test_name);
-    ASSERT_TRUE(NULL != file_guard.get_fp());
-
-    auto handle = [&file_guard](const ObString& str, ob_wc_t wchar) -> void {
-      int32_t cur_wchar1, cur_wchar2;
-      int32_t length1, length2;
-
-      ASSERT_EQ(0, ObCharset::mb_wc(CS_TYPE_UTF8MB4_BIN, str.ptr(), str.length(), length1, cur_wchar1));
-      ASSERT_EQ(0, ObCharset::mb_wc(CS_TYPE_UTF8MB4_GENERAL_CI, str.ptr(), str.length(), length2, cur_wchar2));
-      fprintf(file_guard.get_fp(),
-          "U+%04lX\t"
-          "%.*s\t"
-          "%04x\t"
-          "%04x\n",
-          wchar,
-          str.length(),
-          str.ptr(),
-          cur_wchar1,
-          cur_wchar2);
-      ASSERT_TRUE(cur_wchar1 == wchar);
-      ASSERT_TRUE(cur_wchar2 == wchar);
-    };
-    fprintf(file_guard.get_fp(),
-        "wchar\t"
-        "str\t"
-        "%.*s(UTF8MB4_BIN)\t"
-        "%.*s(UTF8MB4_GENERAL_CI)\n",
-        test_name_pure.length(),
-        test_name_pure.ptr(),
-        test_name_pure.length(),
-        test_name_pure.ptr());
-    TestCharset::for_each_utf8(handle);
-  } while (0);
-
-  compare_result(test_name);
-}
-
-TEST_F(TestCharset, test_wc_mb_utf8)
-{
-  const char* test_name = ::testing::UnitTest::GetInstance()->current_test_info()->name();
-  ObString test_name_pure(test_name);
-  test_name_pure.split_on('_');
-  do {
-
-    auto handle = [](const ObString& str, ob_wc_t wchar) -> void {
-      char buf[4];
-      int32_t length;
-      ObString res(4, 0, buf);
-
-      ASSERT_EQ(0, ObCharset::wc_mb(CS_TYPE_UTF8MB4_BIN, wchar, buf, 4, length));
-      res.set_length(length);
-      ASSERT_TRUE(0 == str.compare(res));
-
-      ASSERT_EQ(0, ObCharset::wc_mb(CS_TYPE_UTF8MB4_GENERAL_CI, wchar, buf, 4, length));
-      res.set_length(length);
-      ASSERT_TRUE(0 == str.compare(res));
-    };
-    TestCharset::for_each_utf8(handle);
-  } while (0);
-}
-
-TEST_F(TestCharset, test_caseup_utf8)
-{
-  const char* test_name = ::testing::UnitTest::GetInstance()->current_test_info()->name();
-  ObString test_name_pure(test_name);
-  test_name_pure.split_on('_');
-  do {
-    TestReusltFileGuard file_guard(test_name);
-    ASSERT_TRUE(NULL != file_guard.get_fp());
-
-    auto handle = [&file_guard](const ObString& str, ob_wc_t wchar) -> void {
-      char buf1[4];
-      char buf2[4];
-      int length1, length2;
-
-      ASSERT_TRUE(
-          0 < (length1 = ObCharset::caseup(CS_TYPE_UTF8MB4_BIN, const_cast<char*>(str.ptr()), str.length(), buf1, 4)));
-      ASSERT_TRUE(0 < (length2 = ObCharset::caseup(
-                           CS_TYPE_UTF8MB4_GENERAL_CI, const_cast<char*>(str.ptr()), str.length(), buf2, 4)));
-
-      fprintf(file_guard.get_fp(),
-          "U+%04lX\t"
-          "%.*s\t"
-          "%.*s\t"
-          "%.*s\n",
-          wchar,
-          str.length(),
-          str.ptr(),
-          length1,
-          buf1,
-          length2,
-          buf2);
-    };
-    fprintf(file_guard.get_fp(),
-        "wchar\t"
-        "str\t"
-        "%.*s(UTF8MB4_BIN)\t"
-        "%.*s(UTF8MB4_GENERAL_CI)\n",
-        test_name_pure.length(),
-        test_name_pure.ptr(),
-        test_name_pure.length(),
-        test_name_pure.ptr());
-    TestCharset::for_each_utf8(handle);
-  } while (0);
-
-  compare_result(test_name);
-}
-
-TEST_F(TestCharset, test_casedn_utf8)
-{
-  const char* test_name = ::testing::UnitTest::GetInstance()->current_test_info()->name();
-  ObString test_name_pure(test_name);
-  test_name_pure.split_on('_');
-  do {
-    TestReusltFileGuard file_guard(test_name);
-    ASSERT_TRUE(NULL != file_guard.get_fp());
-
-    auto handle = [&file_guard](const ObString& str, ob_wc_t wchar) -> void {
-      char buf1[4];
-      char buf2[4];
-      int length1, length2;
-
-      ASSERT_TRUE(
-          0 < (length1 = ObCharset::casedn(CS_TYPE_UTF8MB4_BIN, const_cast<char*>(str.ptr()), str.length(), buf1, 4)));
-      ASSERT_TRUE(0 < (length2 = ObCharset::casedn(
-                           CS_TYPE_UTF8MB4_GENERAL_CI, const_cast<char*>(str.ptr()), str.length(), buf2, 4)));
-
-      fprintf(file_guard.get_fp(),
-          "U+%04lX\t"
-          "%.*s\t"
-          "%.*s\t"
-          "%.*s\n",
-          wchar,
-          str.length(),
-          str.ptr(),
-          length1,
-          buf1,
-          length2,
-          buf2);
-    };
-    fprintf(file_guard.get_fp(),
-        "wchar\t"
-        "str\t"
-        "%.*s(UTF8MB4_BIN)\t"
-        "%.*s(UTF8MB4_GENERAL_CI)\n",
-        test_name_pure.length(),
-        test_name_pure.ptr(),
-        test_name_pure.length(),
-        test_name_pure.ptr());
-    TestCharset::for_each_utf8(handle);
-  } while (0);
-
-  compare_result(test_name);
-}
-
-TEST_F(TestCharset, test_sortkey_utf8)
-{
-  const char* test_name = ::testing::UnitTest::GetInstance()->current_test_info()->name();
-  ObString test_name_pure(test_name);
-  test_name_pure.split_on('_');
-  do {
-    TestReusltFileGuard file_guard(test_name);
-    ASSERT_TRUE(NULL != file_guard.get_fp());
-
-    auto handle = [&file_guard](const ObString& str, ob_wc_t wchar) -> void {
-      char buf1[4];
-      char buf2[4];
-      int length1, length2;
-      bool is_uni1, is_uni2;
-
-      ASSERT_TRUE(0 < (length1 = ObCharset::sortkey(
-                           CS_TYPE_UTF8MB4_BIN, const_cast<char*>(str.ptr()), str.length(), buf1, 4, is_uni1)));
-      ASSERT_TRUE(is_uni1);
-      ASSERT_TRUE(0 < (length2 = ObCharset::sortkey(
-                           CS_TYPE_UTF8MB4_GENERAL_CI, const_cast<char*>(str.ptr()), str.length(), buf2, 4, is_uni2)));
-      ASSERT_TRUE(is_uni2);
-
-      fprintf(file_guard.get_fp(),
-          "U+%04lX\t"
-          "%.*s\t"
-          "%.*s\t"
-          "%.*s\n",
-          wchar,
-          str.length(),
-          str.ptr(),
-          length1,
-          buf1,
-          length2,
-          buf2);
-    };
-    fprintf(file_guard.get_fp(),
-        "wchar\t"
-        "str\t"
-        "%.*s(UTF8MB4_BIN)\t"
-        "%.*s(UTF8MB4_GENERAL_CI)\n",
-        test_name_pure.length(),
-        test_name_pure.ptr(),
-        test_name_pure.length(),
-        test_name_pure.ptr());
-    TestCharset::for_each_utf8(handle);
-  } while (0);
-
-  compare_result(test_name);
-}
-
-TEST_F(TestCharset, test_hash_sort_utf8)
-{
-  const char* test_name = ::testing::UnitTest::GetInstance()->current_test_info()->name();
-  ObString test_name_pure(test_name);
-  test_name_pure.split_on('_');
-  do {
-    TestReusltFileGuard file_guard(test_name);
-    ASSERT_TRUE(NULL != file_guard.get_fp());
-
-    auto handle = [&file_guard](const ObString& str, ob_wc_t wchar) -> void {
-      fprintf(file_guard.get_fp(),
-          "U+%04lX\t"
-          "%.*s\t"
-          "%lu\t"
-          "%lu\t"
-          "%lu\t"
-          "%lu\n",
-          wchar,
-          str.length(),
-          str.ptr(),
-          ObCharset::hash(CS_TYPE_UTF8MB4_BIN, const_cast<char*>(str.ptr()), str.length(), 0, 0, NULL),
-          ObCharset::hash(CS_TYPE_UTF8MB4_GENERAL_CI, const_cast<char*>(str.ptr()), str.length(), 0, 0, NULL),
-          ObCharset::hash(CS_TYPE_UTF8MB4_BIN, const_cast<char*>(str.ptr()), str.length(), 0, 1, NULL),
-          ObCharset::hash(CS_TYPE_UTF8MB4_GENERAL_CI, const_cast<char*>(str.ptr()), str.length(), 0, 1, NULL));
-    };
-    fprintf(file_guard.get_fp(),
-        "wchar\t"
-        "str\t"
-        "%.*s(UTF8MB4_BIN)\t"
-        "%.*s(UTF8MB4_GENERAL_CI)\t"
-        "%.*s(UTF8MB4_BIN oracle)\t"
-        "%.*s(UTF8MB4_GENERAL_CI oracle)\n",
-        test_name_pure.length(),
-        test_name_pure.ptr(),
-        test_name_pure.length(),
-        test_name_pure.ptr(),
-        test_name_pure.length(),
-        test_name_pure.ptr(),
-        test_name_pure.length(),
-        test_name_pure.ptr());
-    TestCharset::for_each_utf8(handle);
-  } while (0);
-
-  compare_result(test_name);
-}
-
-int main(int argc, char** argv)
-{
-  testing::InitGoogleTest(&argc, argv);
+  OB_LOGGER.set_log_level("INFO");
+  testing::InitGoogleTest(&argc,argv);
+  int ret = ObCharset::init_charset();
+  fprintf(stdout, "ret=%d\n", ret);
  return RUN_ALL_TESTS();
 }