812 lines
24 KiB
C++
812 lines
24 KiB
C++
/**
|
|
* Copyright (c) 2021 OceanBase
|
|
* OceanBase CE is licensed under Mulan PubL v2.
|
|
* You can use this software according to the terms and conditions of the Mulan PubL v2.
|
|
* You may obtain a copy of Mulan PubL v2 at:
|
|
* http://license.coscl.org.cn/MulanPubL-2.0
|
|
* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND,
|
|
* EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
|
|
* MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
|
|
* See the Mulan PubL v2 for more details.
|
|
*/
|
|
|
|
#include <pthread.h>
|
|
#include <stdio.h>
|
|
#include <time.h>
|
|
#include <sys/time.h>
|
|
#include <codecvt>
|
|
#include "lib/charset/ob_charset.h"
|
|
#include "lib/string/ob_string.h"
|
|
#include "lib/utility/ob_print_utils.h"
|
|
#include "gtest/gtest.h"
|
|
#include <iostream>
|
|
#include <fstream>
|
|
|
|
using namespace oceanbase::common;
|
|
|
|
#define CUR_RESULT_FILE_SUFFIX ".record"
|
|
#define STD_RESULT_FILE_SUFFIX ".result"
|
|
|
|
class TestCharset : public ::testing::Test {
|
|
public:
|
|
TestCharset();
|
|
virtual ~TestCharset();
|
|
virtual void SetUp();
|
|
virtual void TearDown();
|
|
template <typename func>
|
|
void for_each_utf8(func handle);
|
|
|
|
protected:
|
|
void gen_random_unicode_string(const int len, char* res, int& real_len);
|
|
int random_range(const int low, const int high);
|
|
};
|
|
|
|
TestCharset::TestCharset()
|
|
{}
|
|
|
|
TestCharset::~TestCharset()
|
|
{}
|
|
|
|
void TestCharset::SetUp()
|
|
{
|
|
srand((unsigned)time(NULL));
|
|
}
|
|
|
|
void TestCharset::TearDown()
|
|
{}
|
|
|
|
int TestCharset::random_range(const int low, const int high)
|
|
{
|
|
return std::rand() % (high - low) + low;
|
|
}
|
|
|
|
void TestCharset::gen_random_unicode_string(const int len, char* res, int& real_len)
|
|
{
|
|
int i = 0;
|
|
int unicode_point = 0;
|
|
std::wstring_convert<std::codecvt_utf8<char32_t>, char32_t> converter;
|
|
for (i = 0; i < len;) {
|
|
const int bytes = random_range(1, 7);
|
|
if (bytes < 4) {
|
|
unicode_point = random_range(0, 127);
|
|
} else if (bytes < 6) {
|
|
unicode_point = random_range(0xFF, 0xFFFF);
|
|
} else if (bytes < 7) {
|
|
unicode_point = random_range(0XFFFF, 0X10FFFF);
|
|
}
|
|
std::string utf_str = converter.to_bytes(unicode_point);
|
|
// fprintf(stdout, "code_point=%d\n", unicode_point);
|
|
// fprintf(stdout, "utf8_str=%s\n", utf_str.c_str());
|
|
for (int j = 0; j < utf_str.size(); ++j) {
|
|
res[i++] = utf_str[j];
|
|
}
|
|
}
|
|
real_len = i;
|
|
}
|
|
|
|
TEST_F(TestCharset, strcmp)
|
|
{
|
|
ObString a;
|
|
ObString b;
|
|
int ret = ObCharset::strcmp(CS_TYPE_UTF8MB4_GENERAL_CI, a.ptr(), a.length(), b.ptr(), b.length());
|
|
fprintf(stdout, "ret:%d\n", ret);
|
|
ASSERT_EQ(0, ret);
|
|
char aa[10] = "abd";
|
|
char bb[10] = "aBd ";
|
|
char cc[10] = " aBd";
|
|
ret = ObCharset::strcmp(CS_TYPE_UTF8MB4_GENERAL_CI, aa, 3, bb, 4);
|
|
fprintf(stdout, "ret:%d\n", ret);
|
|
ASSERT_EQ(-1, ret);
|
|
ret = ObCharset::strcmp(CS_TYPE_UTF8MB4_GENERAL_CI, aa, 3, cc, 4);
|
|
fprintf(stdout, "ret:%d\n", ret);
|
|
ASSERT_EQ(1, ret);
|
|
ret = ObCharset::strcmp(CS_TYPE_UTF8MB4_BIN, aa, 3, bb, 4);
|
|
fprintf(stdout, "ret:%d\n", ret);
|
|
ASSERT_TRUE(ret > 0);
|
|
ObString c(aa);
|
|
ObString d(bb);
|
|
fprintf(stdout, "c:%.*s\n", c.length(), c.ptr());
|
|
fprintf(stdout, "d:%.*s\n", d.length(), d.ptr());
|
|
ret = ObCharset::strcmp(CS_TYPE_UTF8MB4_GENERAL_CI, c, d);
|
|
fprintf(stdout, "ret:%d\n", ret);
|
|
ASSERT_EQ(-1, ret);
|
|
fprintf(stdout, "ret:%d\n", ret);
|
|
ret = ObCharset::strcmp(CS_TYPE_UTF8MB4_BIN, c, d);
|
|
fprintf(stdout, "ret:%d\n", ret);
|
|
ASSERT_TRUE(ret > 0);
|
|
ObString empty;
|
|
ret = ObCharset::strcmp(CS_TYPE_UTF8MB4_GENERAL_CI, empty, d);
|
|
ASSERT_EQ(-1, ret);
|
|
ret = ObCharset::strcmp(CS_TYPE_UTF8MB4_GENERAL_CI, d, empty);
|
|
ASSERT_EQ(1, ret);
|
|
ObString empty1;
|
|
ret = ObCharset::strcmp(CS_TYPE_UTF8MB4_GENERAL_CI, empty1, empty);
|
|
ASSERT_EQ(0, ret);
|
|
ret = ObCharset::strcmp(CS_TYPE_UTF8MB4_BIN, empty1, empty);
|
|
ASSERT_EQ(0, ret);
|
|
}
|
|
|
|
TEST_F(TestCharset, sortkey)
|
|
{
|
|
char aa[10] = "abc";
|
|
char aa1[10];
|
|
char bb[10] = "abc ";
|
|
char bb1[10];
|
|
bool is_valid_unicode = false;
|
|
size_t size1 = ObCharset::sortkey(CS_TYPE_UTF8MB4_GENERAL_CI, aa, strlen(aa), aa1, 10, is_valid_unicode);
|
|
size_t size2 = ObCharset::sortkey(CS_TYPE_UTF8MB4_GENERAL_CI, bb, strlen(bb), bb1, 10, is_valid_unicode);
|
|
ASSERT_NE(size1, size2);
|
|
ASSERT_TRUE(is_valid_unicode);
|
|
|
|
char space[10] = " ";
|
|
size1 = ObCharset::sortkey(CS_TYPE_UTF8MB4_GENERAL_CI, space, strlen(space), aa1, 10, is_valid_unicode);
|
|
ASSERT_EQ(size1, 2);
|
|
ASSERT_TRUE(is_valid_unicode);
|
|
|
|
char empty[10] = "";
|
|
size1 = ObCharset::sortkey(CS_TYPE_UTF8MB4_GENERAL_CI, empty, strlen(empty), aa1, 10, is_valid_unicode);
|
|
ASSERT_EQ(size1, 0);
|
|
ASSERT_TRUE(is_valid_unicode);
|
|
|
|
char invalid[10];
|
|
invalid[0] = char(0x10);
|
|
invalid[1] = char(0x80);
|
|
invalid[2] = '\0';
|
|
size1 = ObCharset::sortkey(CS_TYPE_UTF8MB4_GENERAL_CI, invalid, strlen(invalid), aa1, 10, is_valid_unicode);
|
|
ASSERT_EQ(size1, 1);
|
|
ASSERT_FALSE(is_valid_unicode);
|
|
|
|
// The parameter of sortkey cannot be NULL
|
|
// char *p = NULL;
|
|
// size1 = ObCharset::sortkey(CS_TYPE_UTF8MB4_GENERAL_CI, true, p, 0, aa1, 10);
|
|
}
|
|
|
|
TEST_F(TestCharset, casedn)
|
|
{
|
|
char a1[14] = "Variable_name";
|
|
char a2[14] = "Variable_NAME";
|
|
char a3[14] = "variable_name";
|
|
ObString y1;
|
|
ObString y2;
|
|
ObString y3;
|
|
a1[13] = '1';
|
|
a2[13] = '1';
|
|
a3[13] = '1';
|
|
y1.assign_ptr(a1, 14);
|
|
y2.assign_ptr(a2, 14);
|
|
y3.assign_ptr(a3, 14);
|
|
fprintf(stdout, "ret:%p, %d\n", y1.ptr(), y1.length());
|
|
size_t size1 = ObCharset::casedn(CS_TYPE_UTF8MB4_GENERAL_CI, y1);
|
|
EXPECT_TRUE(y1 == y3);
|
|
size_t size2 = ObCharset::casedn(CS_TYPE_UTF8MB4_GENERAL_CI, y2);
|
|
fprintf(stdout, "y1:%.*s, y2:%.*s, y3:%.*s\n", y1.length(), y1.ptr(), y2.length(), y2.ptr(), y3.length(), y3.ptr());
|
|
EXPECT_TRUE(y2 == y3);
|
|
ASSERT_EQ(y1.length(), 14);
|
|
ASSERT_EQ(y2.length(), 14);
|
|
ASSERT_EQ(size1, 14);
|
|
ASSERT_EQ(size2, 14);
|
|
}
|
|
|
|
TEST_F(TestCharset, case_insensitive_equal)
|
|
{
|
|
ObString y1 = "Variable_name";
|
|
ObString y2 = "variable_name";
|
|
ObString y3 = "variable_name1";
|
|
ObString y4 = "variable_name1";
|
|
bool yy = ObCharset::case_insensitive_equal(y1, y2, CS_TYPE_UTF8MB4_GENERAL_CI);
|
|
ASSERT_TRUE(yy);
|
|
yy = ObCharset::case_insensitive_equal(y2, y3, CS_TYPE_UTF8MB4_GENERAL_CI);
|
|
ASSERT_FALSE(yy);
|
|
yy = ObCharset::case_insensitive_equal(y3, y4, CS_TYPE_UTF8MB4_GENERAL_CI);
|
|
ASSERT_TRUE(yy);
|
|
}
|
|
|
|
TEST_F(TestCharset, hash_sort)
|
|
{
|
|
ObString s;
|
|
uint64_t ret = ObCharset::hash(CS_TYPE_UTF8MB4_GENERAL_CI, s.ptr(), s.length(), 0);
|
|
const char* a = "abd";
|
|
const char* b = "aBD";
|
|
uint64_t ret1 = ObCharset::hash(CS_TYPE_UTF8MB4_GENERAL_CI, a, 3, 0, NULL);
|
|
uint64_t ret2 = ObCharset::hash(CS_TYPE_UTF8MB4_GENERAL_CI, b, 3, 0, NULL);
|
|
fprintf(stdout, "ret:%lu, ret1:%lu, ret2:%lu\n", ret, ret1, ret2);
|
|
uint64_t ret3 = ObCharset::hash(CS_TYPE_UTF8MB4_GENERAL_CI, ObString::make_string(b));
|
|
ASSERT_EQ(ret2, ret3);
|
|
}
|
|
|
|
TEST_F(TestCharset, case_mode_equal)
|
|
{
|
|
ObString y1 = "Variable_name";
|
|
ObString y2 = "variable_name";
|
|
ObString y3 = "variable_name1";
|
|
ObString y4 = "variable_name1";
|
|
bool is_equal = false;
|
|
is_equal = ObCharset::case_mode_equal(OB_ORIGIN_AND_SENSITIVE, y1, y2);
|
|
ASSERT_FALSE(is_equal);
|
|
is_equal = ObCharset::case_mode_equal(OB_ORIGIN_AND_SENSITIVE, y1, y1);
|
|
ASSERT_TRUE(is_equal);
|
|
is_equal = ObCharset::case_mode_equal(OB_ORIGIN_AND_SENSITIVE, y3, y4);
|
|
ASSERT_TRUE(is_equal);
|
|
is_equal = ObCharset::case_mode_equal(OB_ORIGIN_AND_SENSITIVE, y1, y3);
|
|
ASSERT_FALSE(is_equal);
|
|
is_equal = ObCharset::case_mode_equal(OB_ORIGIN_AND_INSENSITIVE, y1, y2);
|
|
ASSERT_TRUE(is_equal);
|
|
is_equal = ObCharset::case_mode_equal(OB_ORIGIN_AND_INSENSITIVE, y1, y1);
|
|
ASSERT_TRUE(is_equal);
|
|
is_equal = ObCharset::case_mode_equal(OB_ORIGIN_AND_INSENSITIVE, y3, y4);
|
|
ASSERT_TRUE(is_equal);
|
|
is_equal = ObCharset::case_mode_equal(OB_ORIGIN_AND_INSENSITIVE, y1, y3);
|
|
ASSERT_FALSE(is_equal);
|
|
is_equal = ObCharset::case_mode_equal(OB_LOWERCASE_AND_INSENSITIVE, y1, y2);
|
|
ASSERT_TRUE(is_equal);
|
|
is_equal = ObCharset::case_mode_equal(OB_LOWERCASE_AND_INSENSITIVE, y1, y1);
|
|
ASSERT_TRUE(is_equal);
|
|
is_equal = ObCharset::case_mode_equal(OB_LOWERCASE_AND_INSENSITIVE, y3, y4);
|
|
ASSERT_TRUE(is_equal);
|
|
is_equal = ObCharset::case_mode_equal(OB_LOWERCASE_AND_INSENSITIVE, y1, y3);
|
|
ASSERT_FALSE(is_equal);
|
|
}
|
|
|
|
TEST_F(TestCharset, well_formed_length)
|
|
{
|
|
int ret = OB_SUCCESS;
|
|
const char* str = "\0123";
|
|
ObCollationType cs_type = CS_TYPE_UTF8MB4_GENERAL_CI;
|
|
int64_t well_formed_length = 0;
|
|
int64_t str_len = 1;
|
|
|
|
ret = ObCharset::well_formed_len(cs_type, str, str_len, well_formed_length);
|
|
ASSERT_TRUE(OB_SUCC(ret));
|
|
ASSERT_TRUE(1 == well_formed_length);
|
|
ret = ObCharset::well_formed_len(cs_type, str, 0, well_formed_length);
|
|
ASSERT_TRUE(OB_SUCC(ret));
|
|
ASSERT_TRUE(0 == well_formed_length);
|
|
ret = ObCharset::well_formed_len(cs_type, NULL, 0, well_formed_length);
|
|
ASSERT_TRUE(OB_SUCC(ret));
|
|
ASSERT_TRUE(0 == well_formed_length);
|
|
ret = ObCharset::well_formed_len(cs_type, NULL, str_len, well_formed_length);
|
|
ASSERT_TRUE(OB_INVALID_ARGUMENT == ret);
|
|
}
|
|
|
|
TEST_F(TestCharset, test_max_byte_char_pos)
|
|
{
|
|
int ret = OB_SUCCESS;
|
|
const ObCollationType types[] = {CS_TYPE_BINARY, CS_TYPE_UTF8MB4_GENERAL_CI, CS_TYPE_UTF8MB4_BIN};
|
|
for (int64_t i = 0; OB_SUCC(ret) && i < sizeof(types) / sizeof(ObCollationType); ++i) {
|
|
int real_len = 0;
|
|
int64_t char_len = 0;
|
|
char buf[25600];
|
|
gen_random_unicode_string(25500, buf, real_len);
|
|
std::cout << "real_len" << real_len << std::endl;
|
|
int64_t left_bytes = real_len;
|
|
const int64_t block_size = 16000;
|
|
char* pos = buf;
|
|
while (left_bytes > 0) {
|
|
int64_t well_formed_len = 0;
|
|
int32_t well_formed_error = 0;
|
|
int64_t calc_char_len = 0;
|
|
const int64_t write_bytes = std::min(left_bytes, block_size);
|
|
const int64_t real_bytes = ObCharset::max_bytes_charpos(types[i], pos, left_bytes, write_bytes, char_len);
|
|
std::cout << "real_bytes" << real_bytes << std::endl;
|
|
ASSERT_TRUE(real_bytes <= 16000);
|
|
ret = ObCharset::well_formed_len(types[i], pos, real_bytes, well_formed_len, well_formed_error);
|
|
ASSERT_EQ(OB_SUCCESS, ret);
|
|
ASSERT_EQ(real_bytes, well_formed_len);
|
|
ASSERT_EQ(0, well_formed_error);
|
|
calc_char_len = ObCharset::strlen_char(types[i], pos, real_bytes);
|
|
ASSERT_EQ(calc_char_len, char_len);
|
|
left_bytes -= real_bytes;
|
|
pos += real_bytes;
|
|
}
|
|
}
|
|
}
|
|
|
|
TEST_F(TestCharset, test_ascii_list_for_all_charset)
|
|
{
|
|
const int64_t buf_len = 100;
|
|
char buf[buf_len] = {0};
|
|
|
|
const int64_t chunk_size = 8192;
|
|
char chunk[chunk_size] = {0};
|
|
ObDataBuffer allocator(chunk, chunk_size);
|
|
|
|
ASSERT_EQ(OB_SUCCESS, ObCharsetUtils::init(allocator));
|
|
|
|
std::cout << "ascii";
|
|
for (int cs_i = CHARSET_INVALID; cs_i < CHARSET_MAX; ++cs_i) {
|
|
auto charset_type = static_cast<ObCharsetType>(cs_i);
|
|
if (!ObCharset::is_valid_charset(charset_type))
|
|
continue;
|
|
ObCollationType cs_type = ObCharset::get_default_collation(charset_type);
|
|
ASSERT_TRUE(ObCharset::is_valid_collation(cs_type));
|
|
std::cout << "\t" << ObCharset::charset_name(cs_type);
|
|
}
|
|
std::cout << std::endl;
|
|
|
|
for (int ascii_wc = 0; ascii_wc <= INT8_MAX; ascii_wc++) {
|
|
std::cout << ascii_wc;
|
|
for (int cs_i = CHARSET_INVALID; cs_i < CHARSET_MAX; ++cs_i) {
|
|
auto charset_type = static_cast<ObCharsetType>(cs_i);
|
|
if (!ObCharset::is_valid_charset(charset_type))
|
|
continue;
|
|
ObCollationType cs_type = ObCharset::get_default_collation(charset_type);
|
|
ASSERT_TRUE(ObCharset::is_valid_collation(cs_type));
|
|
int64_t result_len = 0;
|
|
ObString str = ObCharsetUtils::get_const_str(cs_type, ascii_wc);
|
|
ASSERT_EQ(OB_SUCCESS, hex_print(str.ptr(), str.length(), buf, buf_len, result_len));
|
|
buf[result_len] = '\0';
|
|
std::cout << "\t" << buf;
|
|
}
|
|
|
|
std::cout << std::endl;
|
|
}
|
|
}
|
|
|
|
int unicode_to_utf8(ob_wc_t c, unsigned char* utf8string)
|
|
{
|
|
if (c <= 0x7F) {
|
|
utf8string[0] = c;
|
|
return 1;
|
|
} else if (c <= 0x7FF) {
|
|
utf8string[0] = 0xC0 | ((c >> 6) & 0x1F);
|
|
utf8string[1] = 0x80 | (c & 0x3F);
|
|
return 2;
|
|
} else if (c <= 0xFFFF) {
|
|
utf8string[0] = 0xE0 | ((c >> 12) & 0x0F);
|
|
utf8string[1] = 0x80 | ((c >> 6) & 0x3F);
|
|
utf8string[2] = 0x80 | (c & 0x3F);
|
|
return 3;
|
|
} else {
|
|
utf8string[0] = 0xF0 | ((c >> 18) & 0x07);
|
|
utf8string[1] = 0x80 | ((c >> 12) & 0x3F);
|
|
utf8string[2] = 0x80 | ((c >> 6) & 0x3F);
|
|
utf8string[3] = 0x80 | (c & 0x3F);
|
|
return 4;
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
template <typename func>
|
|
void TestCharset::for_each_utf8(func handle)
|
|
{
|
|
char buf[4];
|
|
ObString str(4, 0, buf);
|
|
|
|
for (ob_wc_t wchar = 0; wchar < 0x110000; wchar++) {
|
|
int len = unicode_to_utf8(wchar, (unsigned char*)buf);
|
|
ASSERT_TRUE(0 != len);
|
|
str.set_length(len);
|
|
handle(str, wchar);
|
|
}
|
|
}
|
|
|
|
/*
|
|
template<typename func>
|
|
void TestCharset::for_each_binary(func handle) {
|
|
char buf[3];
|
|
ObString str(3, 0, buf);
|
|
|
|
//one byte
|
|
for (unsigned char c = 0; c < 0xFF; c++) {
|
|
str.set_length(0);
|
|
str.write((char*)(&c), 1);
|
|
handle(str);
|
|
}
|
|
//two bytes
|
|
for (unsigned char c1 = 0; c1 < 0xFF; c1++) {
|
|
for (unsigned char c2 = 0; c2 < 0xFF; c2++) {
|
|
str.set_length(0);
|
|
str.write((char*)(&c1), 1);
|
|
str.write((char*)(&c2), 1);
|
|
handle(str);
|
|
}
|
|
}
|
|
//three bytes
|
|
for (unsigned char c1 = 0; c1 < 0xFF; c1++) {
|
|
for (unsigned char c2 = 0; c2 < 0xFF; c2++) {
|
|
for (unsigned char c3 = 0; c3 < 0xFF; c3++) {
|
|
str.set_length(0);
|
|
str.write((char*)(&c1), 1);
|
|
str.write((char*)(&c2), 1);
|
|
str.write((char*)(&c3), 1);
|
|
handle(str);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
*/
|
|
|
|
struct TestReusltFileGuard {
|
|
TestReusltFileGuard(const char* test_name) : fp_(nullptr)
|
|
{
|
|
std::string file_path;
|
|
file_path.append("./");
|
|
file_path.append(test_name);
|
|
file_path.append(CUR_RESULT_FILE_SUFFIX);
|
|
fp_ = fopen(file_path.c_str(), "w");
|
|
}
|
|
~TestReusltFileGuard()
|
|
{
|
|
if (nullptr != fp_) {
|
|
fclose(fp_);
|
|
fp_ = nullptr;
|
|
}
|
|
}
|
|
FILE* get_fp()
|
|
{
|
|
return fp_;
|
|
}
|
|
FILE* fp_;
|
|
};
|
|
|
|
void compare_result(const char* test_name)
|
|
{
|
|
std::string cur_res_file_path, std_res_file_path;
|
|
cur_res_file_path.append("./");
|
|
cur_res_file_path.append(test_name);
|
|
cur_res_file_path.append(CUR_RESULT_FILE_SUFFIX);
|
|
std_res_file_path.append("./");
|
|
std_res_file_path.append(test_name);
|
|
std_res_file_path.append(STD_RESULT_FILE_SUFFIX);
|
|
|
|
std::ifstream cur_res(cur_res_file_path, std::ios::binary);
|
|
ASSERT_TRUE(cur_res.is_open());
|
|
std::ifstream std_res(std_res_file_path, std::ios::binary);
|
|
ASSERT_TRUE(std_res.is_open());
|
|
|
|
std::string cur_line;
|
|
std::string std_line;
|
|
int line_no = 0;
|
|
while (std::getline(std_res, std_line)) {
|
|
line_no++;
|
|
ASSERT_TRUE(std::getline(cur_res, cur_line));
|
|
if (0 != std_line.compare(cur_line)) {
|
|
fprintf(stdout,
|
|
"not consistent result detected at line %d:\n"
|
|
"cur_line:%s\n"
|
|
"std_line:%s\n",
|
|
line_no,
|
|
cur_line.c_str(),
|
|
std_line.c_str());
|
|
ASSERT_TRUE(0);
|
|
}
|
|
}
|
|
}
|
|
|
|
TEST_F(TestCharset, test_ismbchar_utf8)
|
|
{
|
|
const char* test_name = ::testing::UnitTest::GetInstance()->current_test_info()->name();
|
|
ObString test_name_pure(test_name);
|
|
test_name_pure.split_on('_');
|
|
do {
|
|
TestReusltFileGuard file_guard(test_name);
|
|
ASSERT_TRUE(NULL != file_guard.get_fp());
|
|
|
|
auto handle = [&file_guard](const ObString& str, ob_wc_t wchar) -> void {
|
|
fprintf(file_guard.get_fp(),
|
|
"U+%04lX\t"
|
|
"%.*s\t"
|
|
"%d\t"
|
|
"%d\n",
|
|
wchar,
|
|
str.length(),
|
|
str.ptr(),
|
|
ObCharset::is_mbchar(CS_TYPE_UTF8MB4_BIN, str.ptr(), str.ptr() + str.length()),
|
|
ObCharset::is_mbchar(CS_TYPE_UTF8MB4_GENERAL_CI, str.ptr(), str.ptr() + str.length()));
|
|
};
|
|
fprintf(file_guard.get_fp(),
|
|
"wchar\t"
|
|
"str\t"
|
|
"%.*s(UTF8MB4_BIN)\t"
|
|
"%.*s(UTF8MB4_GENERAL_CI)\n",
|
|
test_name_pure.length(),
|
|
test_name_pure.ptr(),
|
|
test_name_pure.length(),
|
|
test_name_pure.ptr());
|
|
TestCharset::for_each_utf8(handle);
|
|
} while (0);
|
|
|
|
compare_result(test_name);
|
|
}
|
|
|
|
TEST_F(TestCharset, test_strlen_char_utf8)
|
|
{
|
|
const char* test_name = ::testing::UnitTest::GetInstance()->current_test_info()->name();
|
|
ObString test_name_pure(test_name);
|
|
test_name_pure.split_on('_');
|
|
do {
|
|
TestReusltFileGuard file_guard(test_name);
|
|
ASSERT_TRUE(NULL != file_guard.get_fp());
|
|
|
|
auto handle = [&file_guard](const ObString& str, ob_wc_t wchar) -> void {
|
|
fprintf(file_guard.get_fp(),
|
|
"U+%04lX\t"
|
|
"%.*s\t"
|
|
"%lu\t"
|
|
"%lu\n",
|
|
wchar,
|
|
str.length(),
|
|
str.ptr(),
|
|
|
|
ObCharset::strlen_char(CS_TYPE_UTF8MB4_BIN, str.ptr(), str.length()),
|
|
ObCharset::strlen_char(CS_TYPE_UTF8MB4_GENERAL_CI, str.ptr(), str.length()));
|
|
};
|
|
fprintf(file_guard.get_fp(),
|
|
"wchar\t"
|
|
"str\t"
|
|
"%.*s(UTF8MB4_BIN)\t"
|
|
"%.*s(UTF8MB4_GENERAL_CI)\n",
|
|
test_name_pure.length(),
|
|
test_name_pure.ptr(),
|
|
test_name_pure.length(),
|
|
test_name_pure.ptr());
|
|
TestCharset::for_each_utf8(handle);
|
|
} while (0);
|
|
|
|
compare_result(test_name);
|
|
}
|
|
|
|
TEST_F(TestCharset, test_mb_wc_utf8)
|
|
{
|
|
const char* test_name = ::testing::UnitTest::GetInstance()->current_test_info()->name();
|
|
ObString test_name_pure(test_name);
|
|
test_name_pure.split_on('_');
|
|
do {
|
|
TestReusltFileGuard file_guard(test_name);
|
|
ASSERT_TRUE(NULL != file_guard.get_fp());
|
|
|
|
auto handle = [&file_guard](const ObString& str, ob_wc_t wchar) -> void {
|
|
int32_t cur_wchar1, cur_wchar2;
|
|
int32_t length1, length2;
|
|
|
|
ASSERT_EQ(0, ObCharset::mb_wc(CS_TYPE_UTF8MB4_BIN, str.ptr(), str.length(), length1, cur_wchar1));
|
|
ASSERT_EQ(0, ObCharset::mb_wc(CS_TYPE_UTF8MB4_GENERAL_CI, str.ptr(), str.length(), length2, cur_wchar2));
|
|
fprintf(file_guard.get_fp(),
|
|
"U+%04lX\t"
|
|
"%.*s\t"
|
|
"%04x\t"
|
|
"%04x\n",
|
|
wchar,
|
|
str.length(),
|
|
str.ptr(),
|
|
cur_wchar1,
|
|
cur_wchar2);
|
|
ASSERT_TRUE(cur_wchar1 == wchar);
|
|
ASSERT_TRUE(cur_wchar2 == wchar);
|
|
};
|
|
fprintf(file_guard.get_fp(),
|
|
"wchar\t"
|
|
"str\t"
|
|
"%.*s(UTF8MB4_BIN)\t"
|
|
"%.*s(UTF8MB4_GENERAL_CI)\n",
|
|
test_name_pure.length(),
|
|
test_name_pure.ptr(),
|
|
test_name_pure.length(),
|
|
test_name_pure.ptr());
|
|
TestCharset::for_each_utf8(handle);
|
|
} while (0);
|
|
|
|
compare_result(test_name);
|
|
}
|
|
|
|
TEST_F(TestCharset, test_wc_mb_utf8)
|
|
{
|
|
const char* test_name = ::testing::UnitTest::GetInstance()->current_test_info()->name();
|
|
ObString test_name_pure(test_name);
|
|
test_name_pure.split_on('_');
|
|
do {
|
|
|
|
auto handle = [](const ObString& str, ob_wc_t wchar) -> void {
|
|
char buf[4];
|
|
int32_t length;
|
|
ObString res(4, 0, buf);
|
|
|
|
ASSERT_EQ(0, ObCharset::wc_mb(CS_TYPE_UTF8MB4_BIN, wchar, buf, 4, length));
|
|
res.set_length(length);
|
|
ASSERT_TRUE(0 == str.compare(res));
|
|
|
|
ASSERT_EQ(0, ObCharset::wc_mb(CS_TYPE_UTF8MB4_GENERAL_CI, wchar, buf, 4, length));
|
|
res.set_length(length);
|
|
ASSERT_TRUE(0 == str.compare(res));
|
|
};
|
|
TestCharset::for_each_utf8(handle);
|
|
} while (0);
|
|
}
|
|
|
|
TEST_F(TestCharset, test_caseup_utf8)
|
|
{
|
|
const char* test_name = ::testing::UnitTest::GetInstance()->current_test_info()->name();
|
|
ObString test_name_pure(test_name);
|
|
test_name_pure.split_on('_');
|
|
do {
|
|
TestReusltFileGuard file_guard(test_name);
|
|
ASSERT_TRUE(NULL != file_guard.get_fp());
|
|
|
|
auto handle = [&file_guard](const ObString& str, ob_wc_t wchar) -> void {
|
|
char buf1[4];
|
|
char buf2[4];
|
|
int length1, length2;
|
|
|
|
ASSERT_TRUE(
|
|
0 < (length1 = ObCharset::caseup(CS_TYPE_UTF8MB4_BIN, const_cast<char*>(str.ptr()), str.length(), buf1, 4)));
|
|
ASSERT_TRUE(0 < (length2 = ObCharset::caseup(
|
|
CS_TYPE_UTF8MB4_GENERAL_CI, const_cast<char*>(str.ptr()), str.length(), buf2, 4)));
|
|
|
|
fprintf(file_guard.get_fp(),
|
|
"U+%04lX\t"
|
|
"%.*s\t"
|
|
"%.*s\t"
|
|
"%.*s\n",
|
|
wchar,
|
|
str.length(),
|
|
str.ptr(),
|
|
length1,
|
|
buf1,
|
|
length2,
|
|
buf2);
|
|
};
|
|
fprintf(file_guard.get_fp(),
|
|
"wchar\t"
|
|
"str\t"
|
|
"%.*s(UTF8MB4_BIN)\t"
|
|
"%.*s(UTF8MB4_GENERAL_CI)\n",
|
|
test_name_pure.length(),
|
|
test_name_pure.ptr(),
|
|
test_name_pure.length(),
|
|
test_name_pure.ptr());
|
|
TestCharset::for_each_utf8(handle);
|
|
} while (0);
|
|
|
|
compare_result(test_name);
|
|
}
|
|
|
|
TEST_F(TestCharset, test_casedn_utf8)
|
|
{
|
|
const char* test_name = ::testing::UnitTest::GetInstance()->current_test_info()->name();
|
|
ObString test_name_pure(test_name);
|
|
test_name_pure.split_on('_');
|
|
do {
|
|
TestReusltFileGuard file_guard(test_name);
|
|
ASSERT_TRUE(NULL != file_guard.get_fp());
|
|
|
|
auto handle = [&file_guard](const ObString& str, ob_wc_t wchar) -> void {
|
|
char buf1[4];
|
|
char buf2[4];
|
|
int length1, length2;
|
|
|
|
ASSERT_TRUE(
|
|
0 < (length1 = ObCharset::casedn(CS_TYPE_UTF8MB4_BIN, const_cast<char*>(str.ptr()), str.length(), buf1, 4)));
|
|
ASSERT_TRUE(0 < (length2 = ObCharset::casedn(
|
|
CS_TYPE_UTF8MB4_GENERAL_CI, const_cast<char*>(str.ptr()), str.length(), buf2, 4)));
|
|
|
|
fprintf(file_guard.get_fp(),
|
|
"U+%04lX\t"
|
|
"%.*s\t"
|
|
"%.*s\t"
|
|
"%.*s\n",
|
|
wchar,
|
|
str.length(),
|
|
str.ptr(),
|
|
length1,
|
|
buf1,
|
|
length2,
|
|
buf2);
|
|
};
|
|
fprintf(file_guard.get_fp(),
|
|
"wchar\t"
|
|
"str\t"
|
|
"%.*s(UTF8MB4_BIN)\t"
|
|
"%.*s(UTF8MB4_GENERAL_CI)\n",
|
|
test_name_pure.length(),
|
|
test_name_pure.ptr(),
|
|
test_name_pure.length(),
|
|
test_name_pure.ptr());
|
|
TestCharset::for_each_utf8(handle);
|
|
} while (0);
|
|
|
|
compare_result(test_name);
|
|
}
|
|
|
|
TEST_F(TestCharset, test_sortkey_utf8)
|
|
{
|
|
const char* test_name = ::testing::UnitTest::GetInstance()->current_test_info()->name();
|
|
ObString test_name_pure(test_name);
|
|
test_name_pure.split_on('_');
|
|
do {
|
|
TestReusltFileGuard file_guard(test_name);
|
|
ASSERT_TRUE(NULL != file_guard.get_fp());
|
|
|
|
auto handle = [&file_guard](const ObString& str, ob_wc_t wchar) -> void {
|
|
char buf1[4];
|
|
char buf2[4];
|
|
int length1, length2;
|
|
bool is_uni1, is_uni2;
|
|
|
|
ASSERT_TRUE(0 < (length1 = ObCharset::sortkey(
|
|
CS_TYPE_UTF8MB4_BIN, const_cast<char*>(str.ptr()), str.length(), buf1, 4, is_uni1)));
|
|
ASSERT_TRUE(is_uni1);
|
|
ASSERT_TRUE(0 < (length2 = ObCharset::sortkey(
|
|
CS_TYPE_UTF8MB4_GENERAL_CI, const_cast<char*>(str.ptr()), str.length(), buf2, 4, is_uni2)));
|
|
ASSERT_TRUE(is_uni2);
|
|
|
|
fprintf(file_guard.get_fp(),
|
|
"U+%04lX\t"
|
|
"%.*s\t"
|
|
"%.*s\t"
|
|
"%.*s\n",
|
|
wchar,
|
|
str.length(),
|
|
str.ptr(),
|
|
length1,
|
|
buf1,
|
|
length2,
|
|
buf2);
|
|
};
|
|
fprintf(file_guard.get_fp(),
|
|
"wchar\t"
|
|
"str\t"
|
|
"%.*s(UTF8MB4_BIN)\t"
|
|
"%.*s(UTF8MB4_GENERAL_CI)\n",
|
|
test_name_pure.length(),
|
|
test_name_pure.ptr(),
|
|
test_name_pure.length(),
|
|
test_name_pure.ptr());
|
|
TestCharset::for_each_utf8(handle);
|
|
} while (0);
|
|
|
|
compare_result(test_name);
|
|
}
|
|
|
|
TEST_F(TestCharset, test_hash_sort_utf8)
|
|
{
|
|
const char* test_name = ::testing::UnitTest::GetInstance()->current_test_info()->name();
|
|
ObString test_name_pure(test_name);
|
|
test_name_pure.split_on('_');
|
|
do {
|
|
TestReusltFileGuard file_guard(test_name);
|
|
ASSERT_TRUE(NULL != file_guard.get_fp());
|
|
|
|
auto handle = [&file_guard](const ObString& str, ob_wc_t wchar) -> void {
|
|
fprintf(file_guard.get_fp(),
|
|
"U+%04lX\t"
|
|
"%.*s\t"
|
|
"%lu\t"
|
|
"%lu\t"
|
|
"%lu\t"
|
|
"%lu\n",
|
|
wchar,
|
|
str.length(),
|
|
str.ptr(),
|
|
ObCharset::hash(CS_TYPE_UTF8MB4_BIN, const_cast<char*>(str.ptr()), str.length(), 0, 0, NULL),
|
|
ObCharset::hash(CS_TYPE_UTF8MB4_GENERAL_CI, const_cast<char*>(str.ptr()), str.length(), 0, 0, NULL),
|
|
ObCharset::hash(CS_TYPE_UTF8MB4_BIN, const_cast<char*>(str.ptr()), str.length(), 0, 1, NULL),
|
|
ObCharset::hash(CS_TYPE_UTF8MB4_GENERAL_CI, const_cast<char*>(str.ptr()), str.length(), 0, 1, NULL));
|
|
};
|
|
fprintf(file_guard.get_fp(),
|
|
"wchar\t"
|
|
"str\t"
|
|
"%.*s(UTF8MB4_BIN)\t"
|
|
"%.*s(UTF8MB4_GENERAL_CI)\t"
|
|
"%.*s(UTF8MB4_BIN oracle)\t"
|
|
"%.*s(UTF8MB4_GENERAL_CI oracle)\n",
|
|
test_name_pure.length(),
|
|
test_name_pure.ptr(),
|
|
test_name_pure.length(),
|
|
test_name_pure.ptr(),
|
|
test_name_pure.length(),
|
|
test_name_pure.ptr(),
|
|
test_name_pure.length(),
|
|
test_name_pure.ptr());
|
|
TestCharset::for_each_utf8(handle);
|
|
} while (0);
|
|
|
|
compare_result(test_name);
|
|
}
|
|
|
|
int main(int argc, char** argv)
|
|
{
|
|
testing::InitGoogleTest(&argc, argv);
|
|
return RUN_ALL_TESTS();
|
|
}
|