patch 4.0

This commit is contained in:
wangzelin.wzl
2022-10-24 10:34:53 +08:00
parent 4ad6e00ec3
commit 93a1074b0c
10533 changed files with 2588271 additions and 2299373 deletions

View File

@ -19,53 +19,53 @@
#include "lib/string/ob_string.h"
#include "lib/utility/ob_print_utils.h"
#include "gtest/gtest.h"
#include <iostream>
#include <fstream>
#include "unicode_map.h"
#include "common/data_buffer.h"
#include "lib/oblog/ob_log_module.h"
#define USING_LOG_PREFIX SQL
using namespace oceanbase::common;
#define CUR_RESULT_FILE_SUFFIX ".record"
#define STD_RESULT_FILE_SUFFIX ".result"
class TestCharset : public ::testing::Test {
class TestCharset: public ::testing::Test
{
public:
TestCharset();
virtual ~TestCharset();
virtual void SetUp();
virtual void TearDown();
template <typename func>
void for_each_utf8(func handle);
protected:
void gen_random_unicode_string(const int len, char* res, int& real_len);
void gen_random_unicode_string(const int len, char *res, int &real_len);
int random_range(const int low, const int high);
};
TestCharset::TestCharset()
{}
{
}
TestCharset::~TestCharset()
{}
{
}
void TestCharset::SetUp()
{
srand((unsigned)time(NULL));
srand((unsigned)time(NULL ));
}
void TestCharset::TearDown()
{}
{
}
int TestCharset::random_range(const int low, const int high)
{
return std::rand() % (high - low) + low;
}
void TestCharset::gen_random_unicode_string(const int len, char* res, int& real_len)
void TestCharset::gen_random_unicode_string(const int len, char *res, int &real_len)
{
int i = 0;
int unicode_point = 0;
std::wstring_convert<std::codecvt_utf8<char32_t>, char32_t> converter;
for (i = 0; i < len;) {
for (i = 0; i < len; ) {
const int bytes = random_range(1, 7);
if (bytes < 4) {
unicode_point = random_range(0, 127);
@ -75,8 +75,8 @@ void TestCharset::gen_random_unicode_string(const int len, char* res, int& real_
unicode_point = random_range(0XFFFF, 0X10FFFF);
}
std::string utf_str = converter.to_bytes(unicode_point);
// fprintf(stdout, "code_point=%d\n", unicode_point);
// fprintf(stdout, "utf8_str=%s\n", utf_str.c_str());
//fprintf(stdout, "code_point=%d\n", unicode_point);
//fprintf(stdout, "utf8_str=%s\n", utf_str.c_str());
for (int j = 0; j < utf_str.size(); ++j) {
res[i++] = utf_str[j];
}
@ -157,8 +157,8 @@ TEST_F(TestCharset, sortkey)
ASSERT_FALSE(is_valid_unicode);
// The parameter of sortkey cannot be NULL
// char *p = NULL;
// size1 = ObCharset::sortkey(CS_TYPE_UTF8MB4_GENERAL_CI, true, p, 0, aa1, 10);
//char *p = NULL;
//size1 = ObCharset::sortkey(CS_TYPE_UTF8MB4_GENERAL_CI, true, p, 0, aa1, 10);
}
TEST_F(TestCharset, casedn)
@ -175,7 +175,7 @@ TEST_F(TestCharset, casedn)
y1.assign_ptr(a1, 14);
y2.assign_ptr(a2, 14);
y3.assign_ptr(a3, 14);
fprintf(stdout, "ret:%p, %d\n", y1.ptr(), y1.length());
fprintf(stdout, "ret:%p, %d\n", y1.ptr(), y1.length() );
size_t size1 = ObCharset::casedn(CS_TYPE_UTF8MB4_GENERAL_CI, y1);
EXPECT_TRUE(y1 == y3);
size_t size2 = ObCharset::casedn(CS_TYPE_UTF8MB4_GENERAL_CI, y2);
@ -189,10 +189,10 @@ TEST_F(TestCharset, casedn)
TEST_F(TestCharset, case_insensitive_equal)
{
ObString y1 = "Variable_name";
ObString y2 = "variable_name";
ObString y3 = "variable_name1";
ObString y4 = "variable_name1";
ObString y1= "Variable_name";
ObString y2= "variable_name";
ObString y3= "variable_name1";
ObString y4= "variable_name1";
bool yy = ObCharset::case_insensitive_equal(y1, y2, CS_TYPE_UTF8MB4_GENERAL_CI);
ASSERT_TRUE(yy);
yy = ObCharset::case_insensitive_equal(y2, y3, CS_TYPE_UTF8MB4_GENERAL_CI);
@ -205,10 +205,10 @@ TEST_F(TestCharset, hash_sort)
{
ObString s;
uint64_t ret = ObCharset::hash(CS_TYPE_UTF8MB4_GENERAL_CI, s.ptr(), s.length(), 0);
const char* a = "abd";
const char* b = "aBD";
uint64_t ret1 = ObCharset::hash(CS_TYPE_UTF8MB4_GENERAL_CI, a, 3, 0, NULL);
uint64_t ret2 = ObCharset::hash(CS_TYPE_UTF8MB4_GENERAL_CI, b, 3, 0, NULL);
const char *a = "abd";
const char *b = "aBD";
uint64_t ret1 = ObCharset::hash(CS_TYPE_UTF8MB4_GENERAL_CI, a, 3, 0);
uint64_t ret2 = ObCharset::hash(CS_TYPE_UTF8MB4_GENERAL_CI, b, 3, 0);
fprintf(stdout, "ret:%lu, ret1:%lu, ret2:%lu\n", ret, ret1, ret2);
uint64_t ret3 = ObCharset::hash(CS_TYPE_UTF8MB4_GENERAL_CI, ObString::make_string(b));
ASSERT_EQ(ret2, ret3);
@ -216,10 +216,10 @@ TEST_F(TestCharset, hash_sort)
TEST_F(TestCharset, case_mode_equal)
{
ObString y1 = "Variable_name";
ObString y2 = "variable_name";
ObString y3 = "variable_name1";
ObString y4 = "variable_name1";
ObString y1= "Variable_name";
ObString y2= "variable_name";
ObString y3= "variable_name1";
ObString y4= "variable_name1";
bool is_equal = false;
is_equal = ObCharset::case_mode_equal(OB_ORIGIN_AND_SENSITIVE, y1, y2);
ASSERT_FALSE(is_equal);
@ -250,8 +250,8 @@ TEST_F(TestCharset, case_mode_equal)
TEST_F(TestCharset, well_formed_length)
{
int ret = OB_SUCCESS;
const char* str = "\0123";
ObCollationType cs_type = CS_TYPE_UTF8MB4_GENERAL_CI;
const char *str = "\0123";
ObCollationType cs_type = CS_TYPE_UTF8MB4_GENERAL_CI;
int64_t well_formed_length = 0;
int64_t str_len = 1;
@ -280,7 +280,7 @@ TEST_F(TestCharset, test_max_byte_char_pos)
std::cout << "real_len" << real_len << std::endl;
int64_t left_bytes = real_len;
const int64_t block_size = 16000;
char* pos = buf;
char *pos = buf;
while (left_bytes > 0) {
int64_t well_formed_len = 0;
int32_t well_formed_error = 0;
@ -312,7 +312,7 @@ TEST_F(TestCharset, test_ascii_list_for_all_charset)
ASSERT_EQ(OB_SUCCESS, ObCharsetUtils::init(allocator));
std::cout << "ascii";
std::cout<< "ascii";
for (int cs_i = CHARSET_INVALID; cs_i < CHARSET_MAX; ++cs_i) {
auto charset_type = static_cast<ObCharsetType>(cs_i);
if (!ObCharset::is_valid_charset(charset_type))
@ -324,7 +324,7 @@ TEST_F(TestCharset, test_ascii_list_for_all_charset)
std::cout << std::endl;
for (int ascii_wc = 0; ascii_wc <= INT8_MAX; ascii_wc++) {
std::cout << ascii_wc;
std::cout<< ascii_wc;
for (int cs_i = CHARSET_INVALID; cs_i < CHARSET_MAX; ++cs_i) {
auto charset_type = static_cast<ObCharsetType>(cs_i);
if (!ObCharset::is_valid_charset(charset_type))
@ -333,479 +333,174 @@ TEST_F(TestCharset, test_ascii_list_for_all_charset)
ASSERT_TRUE(ObCharset::is_valid_collation(cs_type));
int64_t result_len = 0;
ObString str = ObCharsetUtils::get_const_str(cs_type, ascii_wc);
ASSERT_EQ(OB_SUCCESS, hex_print(str.ptr(), str.length(), buf, buf_len, result_len));
ASSERT_EQ (OB_SUCCESS, hex_print(str.ptr(), str.length(), buf, buf_len, result_len));
buf[result_len] = '\0';
std::cout << "\t" << buf;
std::cout <<"\t" << buf;
}
std::cout << std::endl;
}
}
int unicode_to_utf8(ob_wc_t c, unsigned char* utf8string)
TEST_F(TestCharset, test_find_gb18030_case_prob)
{
if (c <= 0x7F) {
utf8string[0] = c;
return 1;
} else if (c <= 0x7FF) {
utf8string[0] = 0xC0 | ((c >> 6) & 0x1F);
utf8string[1] = 0x80 | (c & 0x3F);
return 2;
} else if (c <= 0xFFFF) {
utf8string[0] = 0xE0 | ((c >> 12) & 0x0F);
utf8string[1] = 0x80 | ((c >> 6) & 0x3F);
utf8string[2] = 0x80 | (c & 0x3F);
return 3;
} else {
utf8string[0] = 0xF0 | ((c >> 18) & 0x07);
utf8string[1] = 0x80 | ((c >> 12) & 0x3F);
utf8string[2] = 0x80 | ((c >> 6) & 0x3F);
utf8string[3] = 0x80 | (c & 0x3F);
return 4;
}
return 0;
}
template <typename func>
void TestCharset::for_each_utf8(func handle)
{
char buf[4];
ObString str(4, 0, buf);
for (ob_wc_t wchar = 0; wchar < 0x110000; wchar++) {
int len = unicode_to_utf8(wchar, (unsigned char*)buf);
ASSERT_TRUE(0 != len);
str.set_length(len);
handle(str, wchar);
}
}
/*
template<typename func>
void TestCharset::for_each_binary(func handle) {
char buf[3];
ObString str(3, 0, buf);
//one byte
for (unsigned char c = 0; c < 0xFF; c++) {
str.set_length(0);
str.write((char*)(&c), 1);
handle(str);
}
//two bytes
for (unsigned char c1 = 0; c1 < 0xFF; c1++) {
for (unsigned char c2 = 0; c2 < 0xFF; c2++) {
str.set_length(0);
str.write((char*)(&c1), 1);
str.write((char*)(&c2), 1);
handle(str);
}
}
//three bytes
for (unsigned char c1 = 0; c1 < 0xFF; c1++) {
for (unsigned char c2 = 0; c2 < 0xFF; c2++) {
for (unsigned char c3 = 0; c3 < 0xFF; c3++) {
str.set_length(0);
str.write((char*)(&c1), 1);
str.write((char*)(&c2), 1);
str.write((char*)(&c3), 1);
handle(str);
const int buf_len = 20;
char buf1[buf_len];
char buf2[buf_len];
char hex_buf1[buf_len];
char hex_buf2[buf_len];
int length1 = 0, length2 = 0;
ObCollationType cs_type = CS_TYPE_GB18030_BIN;
for (int i = 0; i < 256; i++) {
const ObUnicaseInfoChar *info = ObCharset::get_charset(cs_type)->caseinfo->page[i];
if (NULL != info) {
for (int j = 0; j < 256; j++) {
ASSERT_TRUE(OB_SUCCESS == ObCharset::wc_mb(cs_type, info[j].tolower, buf1, buf_len, length1));
ASSERT_TRUE(OB_SUCCESS == ObCharset::wc_mb(cs_type, info[j].toupper, buf2, buf_len, length2));
buf1[length1] = '\0';
buf2[length2] = '\0';
if (length1 != length2) {
ASSERT_TRUE(OB_SUCCESS == to_hex_cstr(buf1, length1, hex_buf1, buf_len));
ASSERT_TRUE(OB_SUCCESS == to_hex_cstr(buf2, length2, hex_buf2, buf_len));
std::cout<< info[j].tolower <<"," << info[j].toupper << "," << hex_buf1 << "," << hex_buf2 << std::endl;
}
}
}
}
}
/*
TEST_F(TestCharset, test_gbk_pua)
{
int64_t size = sizeof(gbk_uni_map) / sizeof(UniCodeMap);
ASSERT_EQ(size, 23940);
for (int i = 0; i < size; i++) {
ASSERT_TRUE(func_gbk_uni_onechar(gbk_uni_map[i].encoding) == gbk_uni_map[i].unicode) << "i=" << i;
ASSERT_TRUE(func_uni_gbk_onechar(gbk_uni_map[i].unicode) == gbk_uni_map[i].encoding) << "i=" << i;
}
}
*/
struct TestReusltFileGuard {
TestReusltFileGuard(const char* test_name) : fp_(nullptr)
{
std::string file_path;
file_path.append("./");
file_path.append(test_name);
file_path.append(CUR_RESULT_FILE_SUFFIX);
fp_ = fopen(file_path.c_str(), "w");
}
~TestReusltFileGuard()
{
if (nullptr != fp_) {
fclose(fp_);
fp_ = nullptr;
}
}
FILE* get_fp()
{
return fp_;
}
FILE* fp_;
};
void compare_result(const char* test_name)
TEST_F(TestCharset, test_zh_0900_as_cs)
{
std::string cur_res_file_path, std_res_file_path;
cur_res_file_path.append("./");
cur_res_file_path.append(test_name);
cur_res_file_path.append(CUR_RESULT_FILE_SUFFIX);
std_res_file_path.append("./");
std_res_file_path.append(test_name);
std_res_file_path.append(STD_RESULT_FILE_SUFFIX);
std::ifstream cur_res(cur_res_file_path, std::ios::binary);
ASSERT_TRUE(cur_res.is_open());
std::ifstream std_res(std_res_file_path, std::ios::binary);
ASSERT_TRUE(std_res.is_open());
std::string cur_line;
std::string std_line;
int line_no = 0;
while (std::getline(std_res, std_line)) {
line_no++;
ASSERT_TRUE(std::getline(cur_res, cur_line));
if (0 != std_line.compare(cur_line)) {
fprintf(stdout,
"not consistent result detected at line %d:\n"
"cur_line:%s\n"
"std_line:%s\n",
line_no,
cur_line.c_str(),
std_line.c_str());
ASSERT_TRUE(0);
}
ObString str;
char sort_key[2048];
bool is_valid = false;
auto print_sort_key = [&](ObCollationType coll_type) -> void {
auto size = ObCharset::sortkey(coll_type, str.ptr(), str.length(), sort_key,
sizeof(sort_key), is_valid);
fprintf(stdout, "src=");
for (int i = 0; i < str.length(); i++) {
fprintf(stdout, "%02X", (unsigned char)str[i]);
}
fprintf(stdout, "\n");
fprintf(stdout, "sort_key=");
for (int i = 0; i < size; i++) {
fprintf(stdout, "%02X", (unsigned char)sort_key[i]);
}
fprintf(stdout, "\n");
};
char buffer[2048];
ObDataBuffer data_buffer(buffer, sizeof(buffer));
auto convert_string = [&data_buffer](const char* input, ObCollationType dest_type) -> ObString {
ObString output;
ObCharset::charset_convert(data_buffer, ObString(input), CS_TYPE_UTF8MB4_BIN, dest_type, output);
return output;
};
ObCollationType coll_types[] = {CS_TYPE_UTF8MB4_ZH_0900_AS_CS, CS_TYPE_GBK_ZH_0900_AS_CS,
CS_TYPE_GB18030_ZH_0900_AS_CS, CS_TYPE_UTF16_ZH_0900_AS_CS};
for (int i = 0; i < array_elements(coll_types); i++) {
ObCollationType coll_type = coll_types[i];
fprintf(stdout, "## TEST_COLL=%d\n", coll_type);
ASSERT_TRUE(ObCharset::strcmp(coll_type, convert_string("", coll_type), convert_string("", coll_type)) < 0);
ASSERT_TRUE(ObCharset::strcmp(coll_type, convert_string("", coll_type), convert_string("", coll_type)) < 0);
ASSERT_TRUE(ObCharset::strcmp(coll_type, convert_string("", coll_type), convert_string("", coll_type)) < 0);
ASSERT_TRUE(ObCharset::strcmp(coll_type, convert_string("", coll_type), convert_string("", coll_type)) < 0);
ASSERT_TRUE(ObCharset::strcmp(coll_type, convert_string("", coll_type), convert_string("", coll_type)) < 0);
str = convert_string("我们今天", coll_type);
print_sort_key(coll_types[i]);
str = "\xFF\xFF";
print_sort_key(coll_types[i]);
str = "\xef\xbf\xbd\xef\xbf\xbd";
print_sort_key(coll_types[i]);
str = convert_string("", coll_type);
print_sort_key(coll_types[i]);
}
}
TEST_F(TestCharset, test_ismbchar_utf8)
TEST_F(TestCharset, test_zh2_0900_as_cs)
{
const char* test_name = ::testing::UnitTest::GetInstance()->current_test_info()->name();
ObString test_name_pure(test_name);
test_name_pure.split_on('_');
do {
TestReusltFileGuard file_guard(test_name);
ASSERT_TRUE(NULL != file_guard.get_fp());
auto handle = [&file_guard](const ObString& str, ob_wc_t wchar) -> void {
fprintf(file_guard.get_fp(),
"U+%04lX\t"
"%.*s\t"
"%d\t"
"%d\n",
wchar,
str.length(),
str.ptr(),
ObCharset::is_mbchar(CS_TYPE_UTF8MB4_BIN, str.ptr(), str.ptr() + str.length()),
ObCharset::is_mbchar(CS_TYPE_UTF8MB4_GENERAL_CI, str.ptr(), str.ptr() + str.length()));
};
fprintf(file_guard.get_fp(),
"wchar\t"
"str\t"
"%.*s(UTF8MB4_BIN)\t"
"%.*s(UTF8MB4_GENERAL_CI)\n",
test_name_pure.length(),
test_name_pure.ptr(),
test_name_pure.length(),
test_name_pure.ptr());
TestCharset::for_each_utf8(handle);
} while (0);
compare_result(test_name);
ObString str;
char sort_key[2048];
bool is_valid = false;
auto print_sort_key = [&](ObCollationType coll_type) -> void {
auto size = ObCharset::sortkey(coll_type, str.ptr(), str.length(), sort_key,
sizeof(sort_key), is_valid);
fprintf(stdout, "src=");
for (int i = 0; i < str.length(); i++) {
fprintf(stdout, "%02X", (unsigned char)str[i]);
}
fprintf(stdout, "\n");
fprintf(stdout, "sort_key=");
for (int i = 0; i < size; i++) {
fprintf(stdout, "%02X", (unsigned char)sort_key[i]);
}
fprintf(stdout, "\n");
};
char buffer[2048];
ObDataBuffer data_buffer(buffer, sizeof(buffer));
auto convert_string = [&data_buffer](const char* input, ObCollationType dest_type) -> ObString {
ObString output;
ObCharset::charset_convert(data_buffer, ObString(input), CS_TYPE_UTF8MB4_BIN, dest_type, output);
return output;
};
ObCollationType coll_types[] = {CS_TYPE_UTF8MB4_ZH2_0900_AS_CS};
for (int i = 0; i < array_elements(coll_types); i++) {
ObCollationType coll_type = coll_types[i];
fprintf(stdout, "## TEST_COLL=%d\n", coll_type);
ASSERT_TRUE(ObCharset::strcmp(coll_type, convert_string("", coll_type), convert_string("", coll_type)) < 0);
str = convert_string("一丁丂七丄丅丆", coll_type);
print_sort_key(coll_types[i]);
/*
str = convert_string("我们今天", coll_type);
print_sort_key(coll_types[i]);
str = "\xFF\xFF";
print_sort_key(coll_types[i]);
str = "\xef\xbf\xbd\xef\xbf\xbd";
print_sort_key(coll_types[i]);
str = convert_string("中", coll_type);
print_sort_key(coll_types[i]);
*/
}
}
TEST_F(TestCharset, test_strlen_char_utf8)
int main(int argc, char **argv)
{
const char* test_name = ::testing::UnitTest::GetInstance()->current_test_info()->name();
ObString test_name_pure(test_name);
test_name_pure.split_on('_');
do {
TestReusltFileGuard file_guard(test_name);
ASSERT_TRUE(NULL != file_guard.get_fp());
auto handle = [&file_guard](const ObString& str, ob_wc_t wchar) -> void {
fprintf(file_guard.get_fp(),
"U+%04lX\t"
"%.*s\t"
"%lu\t"
"%lu\n",
wchar,
str.length(),
str.ptr(),
ObCharset::strlen_char(CS_TYPE_UTF8MB4_BIN, str.ptr(), str.length()),
ObCharset::strlen_char(CS_TYPE_UTF8MB4_GENERAL_CI, str.ptr(), str.length()));
};
fprintf(file_guard.get_fp(),
"wchar\t"
"str\t"
"%.*s(UTF8MB4_BIN)\t"
"%.*s(UTF8MB4_GENERAL_CI)\n",
test_name_pure.length(),
test_name_pure.ptr(),
test_name_pure.length(),
test_name_pure.ptr());
TestCharset::for_each_utf8(handle);
} while (0);
compare_result(test_name);
}
TEST_F(TestCharset, test_mb_wc_utf8)
{
const char* test_name = ::testing::UnitTest::GetInstance()->current_test_info()->name();
ObString test_name_pure(test_name);
test_name_pure.split_on('_');
do {
TestReusltFileGuard file_guard(test_name);
ASSERT_TRUE(NULL != file_guard.get_fp());
auto handle = [&file_guard](const ObString& str, ob_wc_t wchar) -> void {
int32_t cur_wchar1, cur_wchar2;
int32_t length1, length2;
ASSERT_EQ(0, ObCharset::mb_wc(CS_TYPE_UTF8MB4_BIN, str.ptr(), str.length(), length1, cur_wchar1));
ASSERT_EQ(0, ObCharset::mb_wc(CS_TYPE_UTF8MB4_GENERAL_CI, str.ptr(), str.length(), length2, cur_wchar2));
fprintf(file_guard.get_fp(),
"U+%04lX\t"
"%.*s\t"
"%04x\t"
"%04x\n",
wchar,
str.length(),
str.ptr(),
cur_wchar1,
cur_wchar2);
ASSERT_TRUE(cur_wchar1 == wchar);
ASSERT_TRUE(cur_wchar2 == wchar);
};
fprintf(file_guard.get_fp(),
"wchar\t"
"str\t"
"%.*s(UTF8MB4_BIN)\t"
"%.*s(UTF8MB4_GENERAL_CI)\n",
test_name_pure.length(),
test_name_pure.ptr(),
test_name_pure.length(),
test_name_pure.ptr());
TestCharset::for_each_utf8(handle);
} while (0);
compare_result(test_name);
}
TEST_F(TestCharset, test_wc_mb_utf8)
{
const char* test_name = ::testing::UnitTest::GetInstance()->current_test_info()->name();
ObString test_name_pure(test_name);
test_name_pure.split_on('_');
do {
auto handle = [](const ObString& str, ob_wc_t wchar) -> void {
char buf[4];
int32_t length;
ObString res(4, 0, buf);
ASSERT_EQ(0, ObCharset::wc_mb(CS_TYPE_UTF8MB4_BIN, wchar, buf, 4, length));
res.set_length(length);
ASSERT_TRUE(0 == str.compare(res));
ASSERT_EQ(0, ObCharset::wc_mb(CS_TYPE_UTF8MB4_GENERAL_CI, wchar, buf, 4, length));
res.set_length(length);
ASSERT_TRUE(0 == str.compare(res));
};
TestCharset::for_each_utf8(handle);
} while (0);
}
TEST_F(TestCharset, test_caseup_utf8)
{
const char* test_name = ::testing::UnitTest::GetInstance()->current_test_info()->name();
ObString test_name_pure(test_name);
test_name_pure.split_on('_');
do {
TestReusltFileGuard file_guard(test_name);
ASSERT_TRUE(NULL != file_guard.get_fp());
auto handle = [&file_guard](const ObString& str, ob_wc_t wchar) -> void {
char buf1[4];
char buf2[4];
int length1, length2;
ASSERT_TRUE(
0 < (length1 = ObCharset::caseup(CS_TYPE_UTF8MB4_BIN, const_cast<char*>(str.ptr()), str.length(), buf1, 4)));
ASSERT_TRUE(0 < (length2 = ObCharset::caseup(
CS_TYPE_UTF8MB4_GENERAL_CI, const_cast<char*>(str.ptr()), str.length(), buf2, 4)));
fprintf(file_guard.get_fp(),
"U+%04lX\t"
"%.*s\t"
"%.*s\t"
"%.*s\n",
wchar,
str.length(),
str.ptr(),
length1,
buf1,
length2,
buf2);
};
fprintf(file_guard.get_fp(),
"wchar\t"
"str\t"
"%.*s(UTF8MB4_BIN)\t"
"%.*s(UTF8MB4_GENERAL_CI)\n",
test_name_pure.length(),
test_name_pure.ptr(),
test_name_pure.length(),
test_name_pure.ptr());
TestCharset::for_each_utf8(handle);
} while (0);
compare_result(test_name);
}
TEST_F(TestCharset, test_casedn_utf8)
{
const char* test_name = ::testing::UnitTest::GetInstance()->current_test_info()->name();
ObString test_name_pure(test_name);
test_name_pure.split_on('_');
do {
TestReusltFileGuard file_guard(test_name);
ASSERT_TRUE(NULL != file_guard.get_fp());
auto handle = [&file_guard](const ObString& str, ob_wc_t wchar) -> void {
char buf1[4];
char buf2[4];
int length1, length2;
ASSERT_TRUE(
0 < (length1 = ObCharset::casedn(CS_TYPE_UTF8MB4_BIN, const_cast<char*>(str.ptr()), str.length(), buf1, 4)));
ASSERT_TRUE(0 < (length2 = ObCharset::casedn(
CS_TYPE_UTF8MB4_GENERAL_CI, const_cast<char*>(str.ptr()), str.length(), buf2, 4)));
fprintf(file_guard.get_fp(),
"U+%04lX\t"
"%.*s\t"
"%.*s\t"
"%.*s\n",
wchar,
str.length(),
str.ptr(),
length1,
buf1,
length2,
buf2);
};
fprintf(file_guard.get_fp(),
"wchar\t"
"str\t"
"%.*s(UTF8MB4_BIN)\t"
"%.*s(UTF8MB4_GENERAL_CI)\n",
test_name_pure.length(),
test_name_pure.ptr(),
test_name_pure.length(),
test_name_pure.ptr());
TestCharset::for_each_utf8(handle);
} while (0);
compare_result(test_name);
}
TEST_F(TestCharset, test_sortkey_utf8)
{
const char* test_name = ::testing::UnitTest::GetInstance()->current_test_info()->name();
ObString test_name_pure(test_name);
test_name_pure.split_on('_');
do {
TestReusltFileGuard file_guard(test_name);
ASSERT_TRUE(NULL != file_guard.get_fp());
auto handle = [&file_guard](const ObString& str, ob_wc_t wchar) -> void {
char buf1[4];
char buf2[4];
int length1, length2;
bool is_uni1, is_uni2;
ASSERT_TRUE(0 < (length1 = ObCharset::sortkey(
CS_TYPE_UTF8MB4_BIN, const_cast<char*>(str.ptr()), str.length(), buf1, 4, is_uni1)));
ASSERT_TRUE(is_uni1);
ASSERT_TRUE(0 < (length2 = ObCharset::sortkey(
CS_TYPE_UTF8MB4_GENERAL_CI, const_cast<char*>(str.ptr()), str.length(), buf2, 4, is_uni2)));
ASSERT_TRUE(is_uni2);
fprintf(file_guard.get_fp(),
"U+%04lX\t"
"%.*s\t"
"%.*s\t"
"%.*s\n",
wchar,
str.length(),
str.ptr(),
length1,
buf1,
length2,
buf2);
};
fprintf(file_guard.get_fp(),
"wchar\t"
"str\t"
"%.*s(UTF8MB4_BIN)\t"
"%.*s(UTF8MB4_GENERAL_CI)\n",
test_name_pure.length(),
test_name_pure.ptr(),
test_name_pure.length(),
test_name_pure.ptr());
TestCharset::for_each_utf8(handle);
} while (0);
compare_result(test_name);
}
TEST_F(TestCharset, test_hash_sort_utf8)
{
const char* test_name = ::testing::UnitTest::GetInstance()->current_test_info()->name();
ObString test_name_pure(test_name);
test_name_pure.split_on('_');
do {
TestReusltFileGuard file_guard(test_name);
ASSERT_TRUE(NULL != file_guard.get_fp());
auto handle = [&file_guard](const ObString& str, ob_wc_t wchar) -> void {
fprintf(file_guard.get_fp(),
"U+%04lX\t"
"%.*s\t"
"%lu\t"
"%lu\t"
"%lu\t"
"%lu\n",
wchar,
str.length(),
str.ptr(),
ObCharset::hash(CS_TYPE_UTF8MB4_BIN, const_cast<char*>(str.ptr()), str.length(), 0, 0, NULL),
ObCharset::hash(CS_TYPE_UTF8MB4_GENERAL_CI, const_cast<char*>(str.ptr()), str.length(), 0, 0, NULL),
ObCharset::hash(CS_TYPE_UTF8MB4_BIN, const_cast<char*>(str.ptr()), str.length(), 0, 1, NULL),
ObCharset::hash(CS_TYPE_UTF8MB4_GENERAL_CI, const_cast<char*>(str.ptr()), str.length(), 0, 1, NULL));
};
fprintf(file_guard.get_fp(),
"wchar\t"
"str\t"
"%.*s(UTF8MB4_BIN)\t"
"%.*s(UTF8MB4_GENERAL_CI)\t"
"%.*s(UTF8MB4_BIN oracle)\t"
"%.*s(UTF8MB4_GENERAL_CI oracle)\n",
test_name_pure.length(),
test_name_pure.ptr(),
test_name_pure.length(),
test_name_pure.ptr(),
test_name_pure.length(),
test_name_pure.ptr(),
test_name_pure.length(),
test_name_pure.ptr());
TestCharset::for_each_utf8(handle);
} while (0);
compare_result(test_name);
}
int main(int argc, char** argv)
{
testing::InitGoogleTest(&argc, argv);
OB_LOGGER.set_log_level("INFO");
testing::InitGoogleTest(&argc,argv);
int ret = ObCharset::init_charset();
fprintf(stdout, "ret=%d\n", ret);
return RUN_ALL_TESTS();
}