patch 4.0
This commit is contained in:
667
deps/oblib/unittest/lib/charset/test_charset.cpp
vendored
667
deps/oblib/unittest/lib/charset/test_charset.cpp
vendored
@ -19,53 +19,53 @@
|
||||
#include "lib/string/ob_string.h"
|
||||
#include "lib/utility/ob_print_utils.h"
|
||||
#include "gtest/gtest.h"
|
||||
#include <iostream>
|
||||
#include <fstream>
|
||||
#include "unicode_map.h"
|
||||
#include "common/data_buffer.h"
|
||||
#include "lib/oblog/ob_log_module.h"
|
||||
#define USING_LOG_PREFIX SQL
|
||||
|
||||
using namespace oceanbase::common;
|
||||
|
||||
#define CUR_RESULT_FILE_SUFFIX ".record"
|
||||
#define STD_RESULT_FILE_SUFFIX ".result"
|
||||
|
||||
class TestCharset : public ::testing::Test {
|
||||
class TestCharset: public ::testing::Test
|
||||
{
|
||||
public:
|
||||
TestCharset();
|
||||
virtual ~TestCharset();
|
||||
virtual void SetUp();
|
||||
virtual void TearDown();
|
||||
template <typename func>
|
||||
void for_each_utf8(func handle);
|
||||
|
||||
protected:
|
||||
void gen_random_unicode_string(const int len, char* res, int& real_len);
|
||||
void gen_random_unicode_string(const int len, char *res, int &real_len);
|
||||
int random_range(const int low, const int high);
|
||||
};
|
||||
|
||||
TestCharset::TestCharset()
|
||||
{}
|
||||
{
|
||||
}
|
||||
|
||||
TestCharset::~TestCharset()
|
||||
{}
|
||||
{
|
||||
}
|
||||
|
||||
void TestCharset::SetUp()
|
||||
{
|
||||
srand((unsigned)time(NULL));
|
||||
srand((unsigned)time(NULL ));
|
||||
}
|
||||
|
||||
void TestCharset::TearDown()
|
||||
{}
|
||||
{
|
||||
}
|
||||
|
||||
int TestCharset::random_range(const int low, const int high)
|
||||
{
|
||||
return std::rand() % (high - low) + low;
|
||||
}
|
||||
|
||||
void TestCharset::gen_random_unicode_string(const int len, char* res, int& real_len)
|
||||
void TestCharset::gen_random_unicode_string(const int len, char *res, int &real_len)
|
||||
{
|
||||
int i = 0;
|
||||
int unicode_point = 0;
|
||||
std::wstring_convert<std::codecvt_utf8<char32_t>, char32_t> converter;
|
||||
for (i = 0; i < len;) {
|
||||
for (i = 0; i < len; ) {
|
||||
const int bytes = random_range(1, 7);
|
||||
if (bytes < 4) {
|
||||
unicode_point = random_range(0, 127);
|
||||
@ -75,8 +75,8 @@ void TestCharset::gen_random_unicode_string(const int len, char* res, int& real_
|
||||
unicode_point = random_range(0XFFFF, 0X10FFFF);
|
||||
}
|
||||
std::string utf_str = converter.to_bytes(unicode_point);
|
||||
// fprintf(stdout, "code_point=%d\n", unicode_point);
|
||||
// fprintf(stdout, "utf8_str=%s\n", utf_str.c_str());
|
||||
//fprintf(stdout, "code_point=%d\n", unicode_point);
|
||||
//fprintf(stdout, "utf8_str=%s\n", utf_str.c_str());
|
||||
for (int j = 0; j < utf_str.size(); ++j) {
|
||||
res[i++] = utf_str[j];
|
||||
}
|
||||
@ -157,8 +157,8 @@ TEST_F(TestCharset, sortkey)
|
||||
ASSERT_FALSE(is_valid_unicode);
|
||||
|
||||
// The parameter of sortkey cannot be NULL
|
||||
// char *p = NULL;
|
||||
// size1 = ObCharset::sortkey(CS_TYPE_UTF8MB4_GENERAL_CI, true, p, 0, aa1, 10);
|
||||
//char *p = NULL;
|
||||
//size1 = ObCharset::sortkey(CS_TYPE_UTF8MB4_GENERAL_CI, true, p, 0, aa1, 10);
|
||||
}
|
||||
|
||||
TEST_F(TestCharset, casedn)
|
||||
@ -175,7 +175,7 @@ TEST_F(TestCharset, casedn)
|
||||
y1.assign_ptr(a1, 14);
|
||||
y2.assign_ptr(a2, 14);
|
||||
y3.assign_ptr(a3, 14);
|
||||
fprintf(stdout, "ret:%p, %d\n", y1.ptr(), y1.length());
|
||||
fprintf(stdout, "ret:%p, %d\n", y1.ptr(), y1.length() );
|
||||
size_t size1 = ObCharset::casedn(CS_TYPE_UTF8MB4_GENERAL_CI, y1);
|
||||
EXPECT_TRUE(y1 == y3);
|
||||
size_t size2 = ObCharset::casedn(CS_TYPE_UTF8MB4_GENERAL_CI, y2);
|
||||
@ -189,10 +189,10 @@ TEST_F(TestCharset, casedn)
|
||||
|
||||
TEST_F(TestCharset, case_insensitive_equal)
|
||||
{
|
||||
ObString y1 = "Variable_name";
|
||||
ObString y2 = "variable_name";
|
||||
ObString y3 = "variable_name1";
|
||||
ObString y4 = "variable_name1";
|
||||
ObString y1= "Variable_name";
|
||||
ObString y2= "variable_name";
|
||||
ObString y3= "variable_name1";
|
||||
ObString y4= "variable_name1";
|
||||
bool yy = ObCharset::case_insensitive_equal(y1, y2, CS_TYPE_UTF8MB4_GENERAL_CI);
|
||||
ASSERT_TRUE(yy);
|
||||
yy = ObCharset::case_insensitive_equal(y2, y3, CS_TYPE_UTF8MB4_GENERAL_CI);
|
||||
@ -205,10 +205,10 @@ TEST_F(TestCharset, hash_sort)
|
||||
{
|
||||
ObString s;
|
||||
uint64_t ret = ObCharset::hash(CS_TYPE_UTF8MB4_GENERAL_CI, s.ptr(), s.length(), 0);
|
||||
const char* a = "abd";
|
||||
const char* b = "aBD";
|
||||
uint64_t ret1 = ObCharset::hash(CS_TYPE_UTF8MB4_GENERAL_CI, a, 3, 0, NULL);
|
||||
uint64_t ret2 = ObCharset::hash(CS_TYPE_UTF8MB4_GENERAL_CI, b, 3, 0, NULL);
|
||||
const char *a = "abd";
|
||||
const char *b = "aBD";
|
||||
uint64_t ret1 = ObCharset::hash(CS_TYPE_UTF8MB4_GENERAL_CI, a, 3, 0);
|
||||
uint64_t ret2 = ObCharset::hash(CS_TYPE_UTF8MB4_GENERAL_CI, b, 3, 0);
|
||||
fprintf(stdout, "ret:%lu, ret1:%lu, ret2:%lu\n", ret, ret1, ret2);
|
||||
uint64_t ret3 = ObCharset::hash(CS_TYPE_UTF8MB4_GENERAL_CI, ObString::make_string(b));
|
||||
ASSERT_EQ(ret2, ret3);
|
||||
@ -216,10 +216,10 @@ TEST_F(TestCharset, hash_sort)
|
||||
|
||||
TEST_F(TestCharset, case_mode_equal)
|
||||
{
|
||||
ObString y1 = "Variable_name";
|
||||
ObString y2 = "variable_name";
|
||||
ObString y3 = "variable_name1";
|
||||
ObString y4 = "variable_name1";
|
||||
ObString y1= "Variable_name";
|
||||
ObString y2= "variable_name";
|
||||
ObString y3= "variable_name1";
|
||||
ObString y4= "variable_name1";
|
||||
bool is_equal = false;
|
||||
is_equal = ObCharset::case_mode_equal(OB_ORIGIN_AND_SENSITIVE, y1, y2);
|
||||
ASSERT_FALSE(is_equal);
|
||||
@ -250,8 +250,8 @@ TEST_F(TestCharset, case_mode_equal)
|
||||
TEST_F(TestCharset, well_formed_length)
|
||||
{
|
||||
int ret = OB_SUCCESS;
|
||||
const char* str = "\0123";
|
||||
ObCollationType cs_type = CS_TYPE_UTF8MB4_GENERAL_CI;
|
||||
const char *str = "\0123";
|
||||
ObCollationType cs_type = CS_TYPE_UTF8MB4_GENERAL_CI;
|
||||
int64_t well_formed_length = 0;
|
||||
int64_t str_len = 1;
|
||||
|
||||
@ -280,7 +280,7 @@ TEST_F(TestCharset, test_max_byte_char_pos)
|
||||
std::cout << "real_len" << real_len << std::endl;
|
||||
int64_t left_bytes = real_len;
|
||||
const int64_t block_size = 16000;
|
||||
char* pos = buf;
|
||||
char *pos = buf;
|
||||
while (left_bytes > 0) {
|
||||
int64_t well_formed_len = 0;
|
||||
int32_t well_formed_error = 0;
|
||||
@ -312,7 +312,7 @@ TEST_F(TestCharset, test_ascii_list_for_all_charset)
|
||||
|
||||
ASSERT_EQ(OB_SUCCESS, ObCharsetUtils::init(allocator));
|
||||
|
||||
std::cout << "ascii";
|
||||
std::cout<< "ascii";
|
||||
for (int cs_i = CHARSET_INVALID; cs_i < CHARSET_MAX; ++cs_i) {
|
||||
auto charset_type = static_cast<ObCharsetType>(cs_i);
|
||||
if (!ObCharset::is_valid_charset(charset_type))
|
||||
@ -324,7 +324,7 @@ TEST_F(TestCharset, test_ascii_list_for_all_charset)
|
||||
std::cout << std::endl;
|
||||
|
||||
for (int ascii_wc = 0; ascii_wc <= INT8_MAX; ascii_wc++) {
|
||||
std::cout << ascii_wc;
|
||||
std::cout<< ascii_wc;
|
||||
for (int cs_i = CHARSET_INVALID; cs_i < CHARSET_MAX; ++cs_i) {
|
||||
auto charset_type = static_cast<ObCharsetType>(cs_i);
|
||||
if (!ObCharset::is_valid_charset(charset_type))
|
||||
@ -333,479 +333,174 @@ TEST_F(TestCharset, test_ascii_list_for_all_charset)
|
||||
ASSERT_TRUE(ObCharset::is_valid_collation(cs_type));
|
||||
int64_t result_len = 0;
|
||||
ObString str = ObCharsetUtils::get_const_str(cs_type, ascii_wc);
|
||||
ASSERT_EQ(OB_SUCCESS, hex_print(str.ptr(), str.length(), buf, buf_len, result_len));
|
||||
ASSERT_EQ (OB_SUCCESS, hex_print(str.ptr(), str.length(), buf, buf_len, result_len));
|
||||
buf[result_len] = '\0';
|
||||
std::cout << "\t" << buf;
|
||||
std::cout <<"\t" << buf;
|
||||
}
|
||||
|
||||
std::cout << std::endl;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
int unicode_to_utf8(ob_wc_t c, unsigned char* utf8string)
|
||||
TEST_F(TestCharset, test_find_gb18030_case_prob)
|
||||
{
|
||||
if (c <= 0x7F) {
|
||||
utf8string[0] = c;
|
||||
return 1;
|
||||
} else if (c <= 0x7FF) {
|
||||
utf8string[0] = 0xC0 | ((c >> 6) & 0x1F);
|
||||
utf8string[1] = 0x80 | (c & 0x3F);
|
||||
return 2;
|
||||
} else if (c <= 0xFFFF) {
|
||||
utf8string[0] = 0xE0 | ((c >> 12) & 0x0F);
|
||||
utf8string[1] = 0x80 | ((c >> 6) & 0x3F);
|
||||
utf8string[2] = 0x80 | (c & 0x3F);
|
||||
return 3;
|
||||
} else {
|
||||
utf8string[0] = 0xF0 | ((c >> 18) & 0x07);
|
||||
utf8string[1] = 0x80 | ((c >> 12) & 0x3F);
|
||||
utf8string[2] = 0x80 | ((c >> 6) & 0x3F);
|
||||
utf8string[3] = 0x80 | (c & 0x3F);
|
||||
return 4;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
template <typename func>
|
||||
void TestCharset::for_each_utf8(func handle)
|
||||
{
|
||||
char buf[4];
|
||||
ObString str(4, 0, buf);
|
||||
|
||||
for (ob_wc_t wchar = 0; wchar < 0x110000; wchar++) {
|
||||
int len = unicode_to_utf8(wchar, (unsigned char*)buf);
|
||||
ASSERT_TRUE(0 != len);
|
||||
str.set_length(len);
|
||||
handle(str, wchar);
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
template<typename func>
|
||||
void TestCharset::for_each_binary(func handle) {
|
||||
char buf[3];
|
||||
ObString str(3, 0, buf);
|
||||
|
||||
//one byte
|
||||
for (unsigned char c = 0; c < 0xFF; c++) {
|
||||
str.set_length(0);
|
||||
str.write((char*)(&c), 1);
|
||||
handle(str);
|
||||
}
|
||||
//two bytes
|
||||
for (unsigned char c1 = 0; c1 < 0xFF; c1++) {
|
||||
for (unsigned char c2 = 0; c2 < 0xFF; c2++) {
|
||||
str.set_length(0);
|
||||
str.write((char*)(&c1), 1);
|
||||
str.write((char*)(&c2), 1);
|
||||
handle(str);
|
||||
}
|
||||
}
|
||||
//three bytes
|
||||
for (unsigned char c1 = 0; c1 < 0xFF; c1++) {
|
||||
for (unsigned char c2 = 0; c2 < 0xFF; c2++) {
|
||||
for (unsigned char c3 = 0; c3 < 0xFF; c3++) {
|
||||
str.set_length(0);
|
||||
str.write((char*)(&c1), 1);
|
||||
str.write((char*)(&c2), 1);
|
||||
str.write((char*)(&c3), 1);
|
||||
handle(str);
|
||||
const int buf_len = 20;
|
||||
char buf1[buf_len];
|
||||
char buf2[buf_len];
|
||||
char hex_buf1[buf_len];
|
||||
char hex_buf2[buf_len];
|
||||
int length1 = 0, length2 = 0;
|
||||
ObCollationType cs_type = CS_TYPE_GB18030_BIN;
|
||||
for (int i = 0; i < 256; i++) {
|
||||
const ObUnicaseInfoChar *info = ObCharset::get_charset(cs_type)->caseinfo->page[i];
|
||||
if (NULL != info) {
|
||||
for (int j = 0; j < 256; j++) {
|
||||
ASSERT_TRUE(OB_SUCCESS == ObCharset::wc_mb(cs_type, info[j].tolower, buf1, buf_len, length1));
|
||||
ASSERT_TRUE(OB_SUCCESS == ObCharset::wc_mb(cs_type, info[j].toupper, buf2, buf_len, length2));
|
||||
buf1[length1] = '\0';
|
||||
buf2[length2] = '\0';
|
||||
if (length1 != length2) {
|
||||
ASSERT_TRUE(OB_SUCCESS == to_hex_cstr(buf1, length1, hex_buf1, buf_len));
|
||||
ASSERT_TRUE(OB_SUCCESS == to_hex_cstr(buf2, length2, hex_buf2, buf_len));
|
||||
std::cout<< info[j].tolower <<"," << info[j].toupper << "," << hex_buf1 << "," << hex_buf2 << std::endl;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
TEST_F(TestCharset, test_gbk_pua)
|
||||
{
|
||||
|
||||
int64_t size = sizeof(gbk_uni_map) / sizeof(UniCodeMap);
|
||||
ASSERT_EQ(size, 23940);
|
||||
for (int i = 0; i < size; i++) {
|
||||
ASSERT_TRUE(func_gbk_uni_onechar(gbk_uni_map[i].encoding) == gbk_uni_map[i].unicode) << "i=" << i;
|
||||
ASSERT_TRUE(func_uni_gbk_onechar(gbk_uni_map[i].unicode) == gbk_uni_map[i].encoding) << "i=" << i;
|
||||
}
|
||||
}
|
||||
*/
|
||||
|
||||
struct TestReusltFileGuard {
|
||||
TestReusltFileGuard(const char* test_name) : fp_(nullptr)
|
||||
{
|
||||
std::string file_path;
|
||||
file_path.append("./");
|
||||
file_path.append(test_name);
|
||||
file_path.append(CUR_RESULT_FILE_SUFFIX);
|
||||
fp_ = fopen(file_path.c_str(), "w");
|
||||
}
|
||||
~TestReusltFileGuard()
|
||||
{
|
||||
if (nullptr != fp_) {
|
||||
fclose(fp_);
|
||||
fp_ = nullptr;
|
||||
}
|
||||
}
|
||||
FILE* get_fp()
|
||||
{
|
||||
return fp_;
|
||||
}
|
||||
FILE* fp_;
|
||||
};
|
||||
|
||||
void compare_result(const char* test_name)
|
||||
TEST_F(TestCharset, test_zh_0900_as_cs)
|
||||
{
|
||||
std::string cur_res_file_path, std_res_file_path;
|
||||
cur_res_file_path.append("./");
|
||||
cur_res_file_path.append(test_name);
|
||||
cur_res_file_path.append(CUR_RESULT_FILE_SUFFIX);
|
||||
std_res_file_path.append("./");
|
||||
std_res_file_path.append(test_name);
|
||||
std_res_file_path.append(STD_RESULT_FILE_SUFFIX);
|
||||
|
||||
std::ifstream cur_res(cur_res_file_path, std::ios::binary);
|
||||
ASSERT_TRUE(cur_res.is_open());
|
||||
std::ifstream std_res(std_res_file_path, std::ios::binary);
|
||||
ASSERT_TRUE(std_res.is_open());
|
||||
|
||||
std::string cur_line;
|
||||
std::string std_line;
|
||||
int line_no = 0;
|
||||
while (std::getline(std_res, std_line)) {
|
||||
line_no++;
|
||||
ASSERT_TRUE(std::getline(cur_res, cur_line));
|
||||
if (0 != std_line.compare(cur_line)) {
|
||||
fprintf(stdout,
|
||||
"not consistent result detected at line %d:\n"
|
||||
"cur_line:%s\n"
|
||||
"std_line:%s\n",
|
||||
line_no,
|
||||
cur_line.c_str(),
|
||||
std_line.c_str());
|
||||
ASSERT_TRUE(0);
|
||||
}
|
||||
ObString str;
|
||||
char sort_key[2048];
|
||||
bool is_valid = false;
|
||||
|
||||
auto print_sort_key = [&](ObCollationType coll_type) -> void {
|
||||
auto size = ObCharset::sortkey(coll_type, str.ptr(), str.length(), sort_key,
|
||||
sizeof(sort_key), is_valid);
|
||||
fprintf(stdout, "src=");
|
||||
for (int i = 0; i < str.length(); i++) {
|
||||
fprintf(stdout, "%02X", (unsigned char)str[i]);
|
||||
}
|
||||
fprintf(stdout, "\n");
|
||||
fprintf(stdout, "sort_key=");
|
||||
for (int i = 0; i < size; i++) {
|
||||
fprintf(stdout, "%02X", (unsigned char)sort_key[i]);
|
||||
}
|
||||
fprintf(stdout, "\n");
|
||||
};
|
||||
|
||||
char buffer[2048];
|
||||
ObDataBuffer data_buffer(buffer, sizeof(buffer));
|
||||
|
||||
auto convert_string = [&data_buffer](const char* input, ObCollationType dest_type) -> ObString {
|
||||
ObString output;
|
||||
ObCharset::charset_convert(data_buffer, ObString(input), CS_TYPE_UTF8MB4_BIN, dest_type, output);
|
||||
return output;
|
||||
};
|
||||
|
||||
ObCollationType coll_types[] = {CS_TYPE_UTF8MB4_ZH_0900_AS_CS, CS_TYPE_GBK_ZH_0900_AS_CS,
|
||||
CS_TYPE_GB18030_ZH_0900_AS_CS, CS_TYPE_UTF16_ZH_0900_AS_CS};
|
||||
|
||||
for (int i = 0; i < array_elements(coll_types); i++) {
|
||||
ObCollationType coll_type = coll_types[i];
|
||||
fprintf(stdout, "## TEST_COLL=%d\n", coll_type);
|
||||
|
||||
ASSERT_TRUE(ObCharset::strcmp(coll_type, convert_string("坝", coll_type), convert_string("弝", coll_type)) < 0);
|
||||
ASSERT_TRUE(ObCharset::strcmp(coll_type, convert_string("弝", coll_type), convert_string("爸", coll_type)) < 0);
|
||||
ASSERT_TRUE(ObCharset::strcmp(coll_type, convert_string("爸", coll_type), convert_string("跁", coll_type)) < 0);
|
||||
ASSERT_TRUE(ObCharset::strcmp(coll_type, convert_string("韩", coll_type), convert_string("美", coll_type)) < 0);
|
||||
ASSERT_TRUE(ObCharset::strcmp(coll_type, convert_string("美", coll_type), convert_string("日", coll_type)) < 0);
|
||||
|
||||
str = convert_string("我们今天", coll_type);
|
||||
print_sort_key(coll_types[i]);
|
||||
str = "\xFF\xFF";
|
||||
print_sort_key(coll_types[i]);
|
||||
str = "\xef\xbf\xbd\xef\xbf\xbd";
|
||||
print_sort_key(coll_types[i]);
|
||||
str = convert_string("中", coll_type);
|
||||
print_sort_key(coll_types[i]);
|
||||
}
|
||||
}
|
||||
|
||||
TEST_F(TestCharset, test_ismbchar_utf8)
|
||||
TEST_F(TestCharset, test_zh2_0900_as_cs)
|
||||
{
|
||||
const char* test_name = ::testing::UnitTest::GetInstance()->current_test_info()->name();
|
||||
ObString test_name_pure(test_name);
|
||||
test_name_pure.split_on('_');
|
||||
do {
|
||||
TestReusltFileGuard file_guard(test_name);
|
||||
ASSERT_TRUE(NULL != file_guard.get_fp());
|
||||
|
||||
auto handle = [&file_guard](const ObString& str, ob_wc_t wchar) -> void {
|
||||
fprintf(file_guard.get_fp(),
|
||||
"U+%04lX\t"
|
||||
"%.*s\t"
|
||||
"%d\t"
|
||||
"%d\n",
|
||||
wchar,
|
||||
str.length(),
|
||||
str.ptr(),
|
||||
ObCharset::is_mbchar(CS_TYPE_UTF8MB4_BIN, str.ptr(), str.ptr() + str.length()),
|
||||
ObCharset::is_mbchar(CS_TYPE_UTF8MB4_GENERAL_CI, str.ptr(), str.ptr() + str.length()));
|
||||
};
|
||||
fprintf(file_guard.get_fp(),
|
||||
"wchar\t"
|
||||
"str\t"
|
||||
"%.*s(UTF8MB4_BIN)\t"
|
||||
"%.*s(UTF8MB4_GENERAL_CI)\n",
|
||||
test_name_pure.length(),
|
||||
test_name_pure.ptr(),
|
||||
test_name_pure.length(),
|
||||
test_name_pure.ptr());
|
||||
TestCharset::for_each_utf8(handle);
|
||||
} while (0);
|
||||
|
||||
compare_result(test_name);
|
||||
ObString str;
|
||||
char sort_key[2048];
|
||||
bool is_valid = false;
|
||||
|
||||
auto print_sort_key = [&](ObCollationType coll_type) -> void {
|
||||
auto size = ObCharset::sortkey(coll_type, str.ptr(), str.length(), sort_key,
|
||||
sizeof(sort_key), is_valid);
|
||||
fprintf(stdout, "src=");
|
||||
for (int i = 0; i < str.length(); i++) {
|
||||
fprintf(stdout, "%02X", (unsigned char)str[i]);
|
||||
}
|
||||
fprintf(stdout, "\n");
|
||||
fprintf(stdout, "sort_key=");
|
||||
for (int i = 0; i < size; i++) {
|
||||
fprintf(stdout, "%02X", (unsigned char)sort_key[i]);
|
||||
}
|
||||
fprintf(stdout, "\n");
|
||||
};
|
||||
|
||||
char buffer[2048];
|
||||
ObDataBuffer data_buffer(buffer, sizeof(buffer));
|
||||
|
||||
auto convert_string = [&data_buffer](const char* input, ObCollationType dest_type) -> ObString {
|
||||
ObString output;
|
||||
ObCharset::charset_convert(data_buffer, ObString(input), CS_TYPE_UTF8MB4_BIN, dest_type, output);
|
||||
return output;
|
||||
};
|
||||
|
||||
ObCollationType coll_types[] = {CS_TYPE_UTF8MB4_ZH2_0900_AS_CS};
|
||||
|
||||
for (int i = 0; i < array_elements(coll_types); i++) {
|
||||
ObCollationType coll_type = coll_types[i];
|
||||
fprintf(stdout, "## TEST_COLL=%d\n", coll_type);
|
||||
|
||||
ASSERT_TRUE(ObCharset::strcmp(coll_type, convert_string("一", coll_type), convert_string("二", coll_type)) < 0);
|
||||
|
||||
str = convert_string("一丁丂七丄丅丆", coll_type);
|
||||
print_sort_key(coll_types[i]);
|
||||
|
||||
|
||||
/*
|
||||
str = convert_string("我们今天", coll_type);
|
||||
print_sort_key(coll_types[i]);
|
||||
str = "\xFF\xFF";
|
||||
print_sort_key(coll_types[i]);
|
||||
str = "\xef\xbf\xbd\xef\xbf\xbd";
|
||||
print_sort_key(coll_types[i]);
|
||||
str = convert_string("中", coll_type);
|
||||
print_sort_key(coll_types[i]);
|
||||
*/
|
||||
}
|
||||
}
|
||||
|
||||
TEST_F(TestCharset, test_strlen_char_utf8)
|
||||
int main(int argc, char **argv)
|
||||
{
|
||||
const char* test_name = ::testing::UnitTest::GetInstance()->current_test_info()->name();
|
||||
ObString test_name_pure(test_name);
|
||||
test_name_pure.split_on('_');
|
||||
do {
|
||||
TestReusltFileGuard file_guard(test_name);
|
||||
ASSERT_TRUE(NULL != file_guard.get_fp());
|
||||
|
||||
auto handle = [&file_guard](const ObString& str, ob_wc_t wchar) -> void {
|
||||
fprintf(file_guard.get_fp(),
|
||||
"U+%04lX\t"
|
||||
"%.*s\t"
|
||||
"%lu\t"
|
||||
"%lu\n",
|
||||
wchar,
|
||||
str.length(),
|
||||
str.ptr(),
|
||||
|
||||
ObCharset::strlen_char(CS_TYPE_UTF8MB4_BIN, str.ptr(), str.length()),
|
||||
ObCharset::strlen_char(CS_TYPE_UTF8MB4_GENERAL_CI, str.ptr(), str.length()));
|
||||
};
|
||||
fprintf(file_guard.get_fp(),
|
||||
"wchar\t"
|
||||
"str\t"
|
||||
"%.*s(UTF8MB4_BIN)\t"
|
||||
"%.*s(UTF8MB4_GENERAL_CI)\n",
|
||||
test_name_pure.length(),
|
||||
test_name_pure.ptr(),
|
||||
test_name_pure.length(),
|
||||
test_name_pure.ptr());
|
||||
TestCharset::for_each_utf8(handle);
|
||||
} while (0);
|
||||
|
||||
compare_result(test_name);
|
||||
}
|
||||
|
||||
TEST_F(TestCharset, test_mb_wc_utf8)
|
||||
{
|
||||
const char* test_name = ::testing::UnitTest::GetInstance()->current_test_info()->name();
|
||||
ObString test_name_pure(test_name);
|
||||
test_name_pure.split_on('_');
|
||||
do {
|
||||
TestReusltFileGuard file_guard(test_name);
|
||||
ASSERT_TRUE(NULL != file_guard.get_fp());
|
||||
|
||||
auto handle = [&file_guard](const ObString& str, ob_wc_t wchar) -> void {
|
||||
int32_t cur_wchar1, cur_wchar2;
|
||||
int32_t length1, length2;
|
||||
|
||||
ASSERT_EQ(0, ObCharset::mb_wc(CS_TYPE_UTF8MB4_BIN, str.ptr(), str.length(), length1, cur_wchar1));
|
||||
ASSERT_EQ(0, ObCharset::mb_wc(CS_TYPE_UTF8MB4_GENERAL_CI, str.ptr(), str.length(), length2, cur_wchar2));
|
||||
fprintf(file_guard.get_fp(),
|
||||
"U+%04lX\t"
|
||||
"%.*s\t"
|
||||
"%04x\t"
|
||||
"%04x\n",
|
||||
wchar,
|
||||
str.length(),
|
||||
str.ptr(),
|
||||
cur_wchar1,
|
||||
cur_wchar2);
|
||||
ASSERT_TRUE(cur_wchar1 == wchar);
|
||||
ASSERT_TRUE(cur_wchar2 == wchar);
|
||||
};
|
||||
fprintf(file_guard.get_fp(),
|
||||
"wchar\t"
|
||||
"str\t"
|
||||
"%.*s(UTF8MB4_BIN)\t"
|
||||
"%.*s(UTF8MB4_GENERAL_CI)\n",
|
||||
test_name_pure.length(),
|
||||
test_name_pure.ptr(),
|
||||
test_name_pure.length(),
|
||||
test_name_pure.ptr());
|
||||
TestCharset::for_each_utf8(handle);
|
||||
} while (0);
|
||||
|
||||
compare_result(test_name);
|
||||
}
|
||||
|
||||
TEST_F(TestCharset, test_wc_mb_utf8)
|
||||
{
|
||||
const char* test_name = ::testing::UnitTest::GetInstance()->current_test_info()->name();
|
||||
ObString test_name_pure(test_name);
|
||||
test_name_pure.split_on('_');
|
||||
do {
|
||||
|
||||
auto handle = [](const ObString& str, ob_wc_t wchar) -> void {
|
||||
char buf[4];
|
||||
int32_t length;
|
||||
ObString res(4, 0, buf);
|
||||
|
||||
ASSERT_EQ(0, ObCharset::wc_mb(CS_TYPE_UTF8MB4_BIN, wchar, buf, 4, length));
|
||||
res.set_length(length);
|
||||
ASSERT_TRUE(0 == str.compare(res));
|
||||
|
||||
ASSERT_EQ(0, ObCharset::wc_mb(CS_TYPE_UTF8MB4_GENERAL_CI, wchar, buf, 4, length));
|
||||
res.set_length(length);
|
||||
ASSERT_TRUE(0 == str.compare(res));
|
||||
};
|
||||
TestCharset::for_each_utf8(handle);
|
||||
} while (0);
|
||||
}
|
||||
|
||||
TEST_F(TestCharset, test_caseup_utf8)
|
||||
{
|
||||
const char* test_name = ::testing::UnitTest::GetInstance()->current_test_info()->name();
|
||||
ObString test_name_pure(test_name);
|
||||
test_name_pure.split_on('_');
|
||||
do {
|
||||
TestReusltFileGuard file_guard(test_name);
|
||||
ASSERT_TRUE(NULL != file_guard.get_fp());
|
||||
|
||||
auto handle = [&file_guard](const ObString& str, ob_wc_t wchar) -> void {
|
||||
char buf1[4];
|
||||
char buf2[4];
|
||||
int length1, length2;
|
||||
|
||||
ASSERT_TRUE(
|
||||
0 < (length1 = ObCharset::caseup(CS_TYPE_UTF8MB4_BIN, const_cast<char*>(str.ptr()), str.length(), buf1, 4)));
|
||||
ASSERT_TRUE(0 < (length2 = ObCharset::caseup(
|
||||
CS_TYPE_UTF8MB4_GENERAL_CI, const_cast<char*>(str.ptr()), str.length(), buf2, 4)));
|
||||
|
||||
fprintf(file_guard.get_fp(),
|
||||
"U+%04lX\t"
|
||||
"%.*s\t"
|
||||
"%.*s\t"
|
||||
"%.*s\n",
|
||||
wchar,
|
||||
str.length(),
|
||||
str.ptr(),
|
||||
length1,
|
||||
buf1,
|
||||
length2,
|
||||
buf2);
|
||||
};
|
||||
fprintf(file_guard.get_fp(),
|
||||
"wchar\t"
|
||||
"str\t"
|
||||
"%.*s(UTF8MB4_BIN)\t"
|
||||
"%.*s(UTF8MB4_GENERAL_CI)\n",
|
||||
test_name_pure.length(),
|
||||
test_name_pure.ptr(),
|
||||
test_name_pure.length(),
|
||||
test_name_pure.ptr());
|
||||
TestCharset::for_each_utf8(handle);
|
||||
} while (0);
|
||||
|
||||
compare_result(test_name);
|
||||
}
|
||||
|
||||
TEST_F(TestCharset, test_casedn_utf8)
|
||||
{
|
||||
const char* test_name = ::testing::UnitTest::GetInstance()->current_test_info()->name();
|
||||
ObString test_name_pure(test_name);
|
||||
test_name_pure.split_on('_');
|
||||
do {
|
||||
TestReusltFileGuard file_guard(test_name);
|
||||
ASSERT_TRUE(NULL != file_guard.get_fp());
|
||||
|
||||
auto handle = [&file_guard](const ObString& str, ob_wc_t wchar) -> void {
|
||||
char buf1[4];
|
||||
char buf2[4];
|
||||
int length1, length2;
|
||||
|
||||
ASSERT_TRUE(
|
||||
0 < (length1 = ObCharset::casedn(CS_TYPE_UTF8MB4_BIN, const_cast<char*>(str.ptr()), str.length(), buf1, 4)));
|
||||
ASSERT_TRUE(0 < (length2 = ObCharset::casedn(
|
||||
CS_TYPE_UTF8MB4_GENERAL_CI, const_cast<char*>(str.ptr()), str.length(), buf2, 4)));
|
||||
|
||||
fprintf(file_guard.get_fp(),
|
||||
"U+%04lX\t"
|
||||
"%.*s\t"
|
||||
"%.*s\t"
|
||||
"%.*s\n",
|
||||
wchar,
|
||||
str.length(),
|
||||
str.ptr(),
|
||||
length1,
|
||||
buf1,
|
||||
length2,
|
||||
buf2);
|
||||
};
|
||||
fprintf(file_guard.get_fp(),
|
||||
"wchar\t"
|
||||
"str\t"
|
||||
"%.*s(UTF8MB4_BIN)\t"
|
||||
"%.*s(UTF8MB4_GENERAL_CI)\n",
|
||||
test_name_pure.length(),
|
||||
test_name_pure.ptr(),
|
||||
test_name_pure.length(),
|
||||
test_name_pure.ptr());
|
||||
TestCharset::for_each_utf8(handle);
|
||||
} while (0);
|
||||
|
||||
compare_result(test_name);
|
||||
}
|
||||
|
||||
TEST_F(TestCharset, test_sortkey_utf8)
|
||||
{
|
||||
const char* test_name = ::testing::UnitTest::GetInstance()->current_test_info()->name();
|
||||
ObString test_name_pure(test_name);
|
||||
test_name_pure.split_on('_');
|
||||
do {
|
||||
TestReusltFileGuard file_guard(test_name);
|
||||
ASSERT_TRUE(NULL != file_guard.get_fp());
|
||||
|
||||
auto handle = [&file_guard](const ObString& str, ob_wc_t wchar) -> void {
|
||||
char buf1[4];
|
||||
char buf2[4];
|
||||
int length1, length2;
|
||||
bool is_uni1, is_uni2;
|
||||
|
||||
ASSERT_TRUE(0 < (length1 = ObCharset::sortkey(
|
||||
CS_TYPE_UTF8MB4_BIN, const_cast<char*>(str.ptr()), str.length(), buf1, 4, is_uni1)));
|
||||
ASSERT_TRUE(is_uni1);
|
||||
ASSERT_TRUE(0 < (length2 = ObCharset::sortkey(
|
||||
CS_TYPE_UTF8MB4_GENERAL_CI, const_cast<char*>(str.ptr()), str.length(), buf2, 4, is_uni2)));
|
||||
ASSERT_TRUE(is_uni2);
|
||||
|
||||
fprintf(file_guard.get_fp(),
|
||||
"U+%04lX\t"
|
||||
"%.*s\t"
|
||||
"%.*s\t"
|
||||
"%.*s\n",
|
||||
wchar,
|
||||
str.length(),
|
||||
str.ptr(),
|
||||
length1,
|
||||
buf1,
|
||||
length2,
|
||||
buf2);
|
||||
};
|
||||
fprintf(file_guard.get_fp(),
|
||||
"wchar\t"
|
||||
"str\t"
|
||||
"%.*s(UTF8MB4_BIN)\t"
|
||||
"%.*s(UTF8MB4_GENERAL_CI)\n",
|
||||
test_name_pure.length(),
|
||||
test_name_pure.ptr(),
|
||||
test_name_pure.length(),
|
||||
test_name_pure.ptr());
|
||||
TestCharset::for_each_utf8(handle);
|
||||
} while (0);
|
||||
|
||||
compare_result(test_name);
|
||||
}
|
||||
|
||||
TEST_F(TestCharset, test_hash_sort_utf8)
|
||||
{
|
||||
const char* test_name = ::testing::UnitTest::GetInstance()->current_test_info()->name();
|
||||
ObString test_name_pure(test_name);
|
||||
test_name_pure.split_on('_');
|
||||
do {
|
||||
TestReusltFileGuard file_guard(test_name);
|
||||
ASSERT_TRUE(NULL != file_guard.get_fp());
|
||||
|
||||
auto handle = [&file_guard](const ObString& str, ob_wc_t wchar) -> void {
|
||||
fprintf(file_guard.get_fp(),
|
||||
"U+%04lX\t"
|
||||
"%.*s\t"
|
||||
"%lu\t"
|
||||
"%lu\t"
|
||||
"%lu\t"
|
||||
"%lu\n",
|
||||
wchar,
|
||||
str.length(),
|
||||
str.ptr(),
|
||||
ObCharset::hash(CS_TYPE_UTF8MB4_BIN, const_cast<char*>(str.ptr()), str.length(), 0, 0, NULL),
|
||||
ObCharset::hash(CS_TYPE_UTF8MB4_GENERAL_CI, const_cast<char*>(str.ptr()), str.length(), 0, 0, NULL),
|
||||
ObCharset::hash(CS_TYPE_UTF8MB4_BIN, const_cast<char*>(str.ptr()), str.length(), 0, 1, NULL),
|
||||
ObCharset::hash(CS_TYPE_UTF8MB4_GENERAL_CI, const_cast<char*>(str.ptr()), str.length(), 0, 1, NULL));
|
||||
};
|
||||
fprintf(file_guard.get_fp(),
|
||||
"wchar\t"
|
||||
"str\t"
|
||||
"%.*s(UTF8MB4_BIN)\t"
|
||||
"%.*s(UTF8MB4_GENERAL_CI)\t"
|
||||
"%.*s(UTF8MB4_BIN oracle)\t"
|
||||
"%.*s(UTF8MB4_GENERAL_CI oracle)\n",
|
||||
test_name_pure.length(),
|
||||
test_name_pure.ptr(),
|
||||
test_name_pure.length(),
|
||||
test_name_pure.ptr(),
|
||||
test_name_pure.length(),
|
||||
test_name_pure.ptr(),
|
||||
test_name_pure.length(),
|
||||
test_name_pure.ptr());
|
||||
TestCharset::for_each_utf8(handle);
|
||||
} while (0);
|
||||
|
||||
compare_result(test_name);
|
||||
}
|
||||
|
||||
int main(int argc, char** argv)
|
||||
{
|
||||
testing::InitGoogleTest(&argc, argv);
|
||||
OB_LOGGER.set_log_level("INFO");
|
||||
testing::InitGoogleTest(&argc,argv);
|
||||
int ret = ObCharset::init_charset();
|
||||
fprintf(stdout, "ret=%d\n", ret);
|
||||
return RUN_ALL_TESTS();
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user