diff --git a/deps/oblib/src/lib/charset/ob_charset.cpp b/deps/oblib/src/lib/charset/ob_charset.cpp index 79414b183c..a42f63ac14 100644 --- a/deps/oblib/src/lib/charset/ob_charset.cpp +++ b/deps/oblib/src/lib/charset/ob_charset.cpp @@ -2336,6 +2336,74 @@ size_t ObCharset::caseup(const ObCollationType collation_type, ObString &src) return size; } +int ObCharset::toupper(const ObCollationType collation_type, + const ObString &src, ObString &dst, + ObIAllocator &allocator) +{ + int ret = OB_SUCCESS; + const ObCharsetInfo *cs_info = NULL; + if (OB_ISNULL(cs_info = get_charset(collation_type))) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("invalid collation type", K(ret), K(collation_type)); + } else { + int casemulti = cs_info->caseup_multiply; + if (1 == casemulti) { + if (OB_FAIL(ob_write_string(allocator, src, dst))) { + LOG_WARN("fail to copy string", K(ret), K(src)); + } else { + size_t size = cs_info->cset->caseup(cs_info, dst.ptr(), dst.length(), dst.ptr(), dst.length()); + dst.assign_ptr(dst.ptr(), static_cast(size)); + } + } else { + char *buf = NULL; + int64_t buf_len = src.length() * casemulti; + if (OB_ISNULL(buf = static_cast(allocator.alloc(buf_len)))) { + ret = OB_ALLOCATE_MEMORY_FAILED; + LOG_WARN("fail to alloc memory", K(ret)); + } else { + size_t size = cs_info->cset->caseup(cs_info, const_cast(src.ptr()), src.length(), buf, buf_len); + dst.assign_ptr(buf, static_cast(size)); + } + } + } + return ret; +} + + +int ObCharset::tolower(const ObCollationType collation_type, + const ObString &src, ObString &dst, + ObIAllocator &allocator) +{ + int ret = OB_SUCCESS; + const ObCharsetInfo *cs_info = NULL; + if (OB_ISNULL(cs_info = get_charset(collation_type))) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("invalid collation type", K(ret), K(collation_type)); + } else { + int casemulti = cs_info->casedn_multiply; + if (1 == casemulti) { + if (OB_FAIL(ob_write_string(allocator, src, dst))) { + LOG_WARN("fail to copy string", K(ret), K(src)); + } else { + size_t size = cs_info->cset->casedn(cs_info, dst.ptr(), dst.length(), dst.ptr(), dst.length()); + dst.assign_ptr(dst.ptr(), static_cast(size)); + } + } else { + char *buf = NULL; + int64_t buf_len = src.length() * casemulti; + if (OB_ISNULL(buf = static_cast(allocator.alloc(buf_len)))) { + ret = OB_ALLOCATE_MEMORY_FAILED; + LOG_WARN("fail to alloc memory", K(ret)); + } else { + size_t size = cs_info->cset->casedn(cs_info, const_cast(src.ptr()), src.length(), buf, buf_len); + dst.assign_ptr(buf, static_cast(size)); + } + } + } + return ret; +} + + bool ObCharset::case_insensitive_equal(const ObString &one, const ObString &another, const ObCollationType &collation_type) { diff --git a/deps/oblib/src/lib/charset/ob_charset.h b/deps/oblib/src/lib/charset/ob_charset.h index cd0fcbdba9..6f6c73361a 100644 --- a/deps/oblib/src/lib/charset/ob_charset.h +++ b/deps/oblib/src/lib/charset/ob_charset.h @@ -405,9 +405,20 @@ public: static int strcmp(const ObCollationType collation_type, const ObString &l_str, const ObString &r_str); + //these interface is not safe: //when invoke this, if ObString a = "134"; this func will core; so avoid passing src as a style + //if collation type is gb18030, this func will die + //**Please** use toupper and tolower instead of casedn and caseup static size_t casedn(const ObCollationType collation_type, ObString &src); static size_t caseup(const ObCollationType collation_type, ObString &src); + + static int toupper(const ObCollationType collation_type, + const ObString &src, ObString &dst, + ObIAllocator &allocator); + static int tolower(const ObCollationType collation_type, + const ObString &src, ObString &dst, + ObIAllocator &allocator); + static bool case_insensitive_equal(const ObString &one, const ObString &another, const ObCollationType &collation_type = CS_TYPE_UTF8MB4_GENERAL_CI); diff --git a/deps/oblib/unittest/lib/CMakeLists.txt b/deps/oblib/unittest/lib/CMakeLists.txt index 5dd5654ce9..bb652b4d8b 100644 --- a/deps/oblib/unittest/lib/CMakeLists.txt +++ b/deps/oblib/unittest/lib/CMakeLists.txt @@ -22,7 +22,7 @@ oblib_addtest(allocator/test_page_arena.cpp) oblib_addtest(allocator/test_slice_alloc.cpp) oblib_addtest(allocator/test_sql_arena_allocator.cpp) oblib_addtest(atomic/test_atomic_reference.cpp) -#oblib_addtest(charset/test_charset.cpp) +oblib_addtest(charset/test_charset.cpp) oblib_addtest(checksum/test_crc64.cpp) oblib_addtest(container/ob_2d_array_test.cpp) oblib_addtest(container/ob_array_test.cpp) diff --git a/deps/oblib/unittest/lib/charset/test_charset.cpp b/deps/oblib/unittest/lib/charset/test_charset.cpp index d274c4d047..c2c6aedecc 100644 --- a/deps/oblib/unittest/lib/charset/test_charset.cpp +++ b/deps/oblib/unittest/lib/charset/test_charset.cpp @@ -210,8 +210,8 @@ TEST_F(TestCharset, hash_sort) uint64_t ret1 = ObCharset::hash(CS_TYPE_UTF8MB4_GENERAL_CI, a, 3, 0); uint64_t ret2 = ObCharset::hash(CS_TYPE_UTF8MB4_GENERAL_CI, b, 3, 0); fprintf(stdout, "ret:%lu, ret1:%lu, ret2:%lu\n", ret, ret1, ret2); - uint64_t ret3 = ObCharset::hash(CS_TYPE_UTF8MB4_GENERAL_CI, ObString::make_string(b)); - ASSERT_EQ(ret2, ret3); + //uint64_t ret3 = ObCharset::hash(CS_TYPE_UTF8MB4_GENERAL_CI, ObString::make_string(b)); + ASSERT_EQ(ret1, ret2); } TEST_F(TestCharset, case_mode_equal) @@ -496,6 +496,76 @@ TEST_F(TestCharset, test_zh2_0900_as_cs) } } + +TEST_F(TestCharset, tolower) +{ + ObArenaAllocator allocator; + char a1[] = "Variable_name"; + char a2[] = "Variable_NAME"; + char a3[] = "variable_name"; + ObString y1; + ObString y2; + ObString y3; + y1.assign_ptr(a1, strlen(a1)); + y2.assign_ptr(a2, strlen(a2)); + y3.assign_ptr(a3, strlen(a3)); + fprintf(stdout, "ret:%p, %d\n", y1.ptr(), y1.length() ); + for (int cs_i = CHARSET_INVALID; cs_i < CHARSET_MAX; ++cs_i) { + auto charset_type = static_cast(cs_i); + if (!ObCharset::is_valid_charset(charset_type) || CHARSET_UTF16 == charset_type || CHARSET_BINARY == charset_type) + continue; + ObCollationType cs_type = ObCharset::get_default_collation(charset_type); + ASSERT_TRUE(ObCharset::is_valid_collation(cs_type)); + const char *cs_name = ObCharset::charset_name(cs_type); + + ObString y1_res; + ASSERT_TRUE(OB_SUCCESS == ObCharset::tolower(cs_type, y1, y1_res, allocator)); + fprintf(stdout, "charset=%s, src:%.*s, src_lower:%.*s, dst:%.*s\n", cs_name, + y1.length(), y1.ptr(), y1_res.length(), y1_res.ptr(), y3.length(), y3.ptr()); + EXPECT_TRUE(y1_res == y3); + ObString y2_res; + ASSERT_TRUE(OB_SUCCESS == ObCharset::tolower(cs_type, y2, y2_res, allocator)); + fprintf(stdout, "charset=%s, src:%.*s, src_lower:%.*s, dst:%.*s\n", cs_name, + y2.length(), y2.ptr(), y2_res.length(), y2_res.ptr(), y3.length(), y3.ptr()); + EXPECT_TRUE(y2_res == y3); + } +} + + +TEST_F(TestCharset, toupper) +{ + ObArenaAllocator allocator; + char a1[] = "Variable_name"; + char a2[] = "Variable_NAME"; + char a3[] = "VARIABLE_NAME"; + ObString y1; + ObString y2; + ObString y3; + y1.assign_ptr(a1, strlen(a1)); + y2.assign_ptr(a2, strlen(a2)); + y3.assign_ptr(a3, strlen(a3)); + fprintf(stdout, "ret:%p, %d\n", y1.ptr(), y1.length() ); + for (int cs_i = CHARSET_INVALID; cs_i < CHARSET_MAX; ++cs_i) { + auto charset_type = static_cast(cs_i); + if (!ObCharset::is_valid_charset(charset_type) || CHARSET_UTF16 == charset_type || CHARSET_BINARY == charset_type) + continue; + ObCollationType cs_type = ObCharset::get_default_collation(charset_type); + ASSERT_TRUE(ObCharset::is_valid_collation(cs_type)); + const char *cs_name = ObCharset::charset_name(cs_type); + + ObString y1_res; + ASSERT_TRUE(OB_SUCCESS == ObCharset::toupper(cs_type, y1, y1_res, allocator)); + fprintf(stdout, "charset=%s, src:%.*s, src_upper:%.*s, dst:%.*s\n", cs_name, + y1.length(), y1.ptr(), y1_res.length(), y1_res.ptr(), y3.length(), y3.ptr()); + EXPECT_TRUE(y1_res == y3); + ObString y2_res; + ASSERT_TRUE(OB_SUCCESS == ObCharset::toupper(cs_type, y2, y2_res, allocator)); + fprintf(stdout, "charset=%s, src:%.*s, src_upper:%.*s, dst:%.*s\n", cs_name, + y2.length(), y2.ptr(), y2_res.length(), y2_res.ptr(), y3.length(), y3.ptr()); + EXPECT_TRUE(y2_res == y3); + } +} + int main(int argc, char **argv) { OB_LOGGER.set_log_level("INFO");