From 94b3a2bd501aa6c6e1f29f3c3ee5dafcf1256e3c Mon Sep 17 00:00:00 2001 From: yangzhg <780531911@qq.com> Date: Fri, 8 May 2020 12:52:46 +0800 Subject: [PATCH] [Bug] Fix string functions not support multibyte string (#3345) Let string functions support utf8 encoding --- be/src/exprs/string_functions.cpp | 106 ++++++++++++++---- be/src/exprs/string_functions.h | 2 + be/test/exprs/string_functions_test.cpp | 80 +++++++++++++ .../string-functions/char_length.md | 55 +++++++++ .../sql-functions/string-functions/left.md | 2 +- .../sql-functions/string-functions/length.md | 2 +- .../sql-functions/string-functions/reverse.md | 56 +++++++++ .../sql-functions/string-functions/right.md | 2 +- .../sql-functions/string-functions/strleft.md | 2 +- .../string-functions/strright.md | 2 +- .../string-functions/char_length.md | 54 +++++++++ .../sql-functions/string-functions/left.md | 2 +- .../sql-functions/string-functions/length.md | 2 +- .../sql-functions/string-functions/reverse.md | 56 +++++++++ .../sql-functions/string-functions/right.md | 2 +- .../sql-functions/string-functions/strleft.md | 2 +- .../string-functions/strright.md | 2 +- gensrc/script/doris_builtins_functions.py | 4 +- 18 files changed, 400 insertions(+), 33 deletions(-) create mode 100644 docs/en/sql-reference/sql-functions/string-functions/char_length.md create mode 100644 docs/en/sql-reference/sql-functions/string-functions/reverse.md create mode 100644 docs/zh-CN/sql-reference/sql-functions/string-functions/char_length.md create mode 100644 docs/zh-CN/sql-reference/sql-functions/string-functions/reverse.md diff --git a/be/src/exprs/string_functions.cpp b/be/src/exprs/string_functions.cpp index e64b745f00..06895ce29f 100644 --- a/be/src/exprs/string_functions.cpp +++ b/be/src/exprs/string_functions.cpp @@ -19,12 +19,12 @@ #include -#include "exprs/expr.h" #include "exprs/anyval_util.h" +#include "exprs/expr.h" +#include "math_functions.h" #include "runtime/string_value.hpp" #include "runtime/tuple_row.h" #include "util/url_parser.h" -#include "math_functions.h" // NOTE: be careful not to use string::append. It is not performant. namespace doris { @@ -32,24 +32,68 @@ namespace doris { void StringFunctions::init() { } +size_t get_utf8_byte_length(unsigned char byte) { + size_t char_size = 0; + if (byte >= 0xFC) { + char_size = 6; + } else if (byte >= 0xF8) { + char_size = 5; + } else if (byte >= 0xF0) { + char_size = 4; + } else if (byte >= 0xE0) { + char_size = 3; + } else if (byte >= 0xC0) { + char_size = 2; + } else { + char_size = 1; + } + return char_size; +} + // This behaves identically to the mysql implementation, namely: // - 1-indexed positions // - supported negative positions (count from the end of the string) // - [optional] len. No len indicates longest substr possible StringVal StringFunctions::substring( - FunctionContext* context, const StringVal& str, + FunctionContext* context, const StringVal& str, const IntVal& pos, const IntVal& len) { - if (str.is_null || pos.is_null || len.is_null) { + if (str.is_null || pos.is_null || len.is_null || pos.val > str.len) { return StringVal::null(); } + if (len.val <= 0 || str.len == 0) { + return StringVal(); + } + + // create index indicate every char start byte + // e.g. "hello word 你好" => [0,1,2,3,4,5,6,7,8,9,10,11,14] 你 and 好 are 3 bytes + // why use a vector as index? It is unnecessary if there is no negative pos val, + // but if has pos is negative it is not easy to determin where to start, so need a + // index save every character's length + size_t byte_pos = 0; + std::vector index; + for (size_t i = 0, char_size = 0; i < str.len; i += char_size) { + char_size = get_utf8_byte_length((unsigned)(str.ptr)[i]); + index.push_back(byte_pos); + byte_pos += char_size; + if (pos.val > 0 && index.size() > pos.val + len.val) { + break; + } + } + int fixed_pos = pos.val; if (fixed_pos < 0) { - fixed_pos = str.len + fixed_pos + 1; + fixed_pos = index.size() + fixed_pos + 1; } - int max_len = str.len - fixed_pos + 1; - int fixed_len = std::min(static_cast(len.val), max_len); - if (fixed_pos > 0 && fixed_pos <= str.len && fixed_len > 0) { - return StringVal(str.ptr + fixed_pos - 1, fixed_len); + if (fixed_pos > index.size()) { + return StringVal::null(); + } + byte_pos = index[fixed_pos - 1]; + int fixed_len = str.len - byte_pos; + if (fixed_pos + len.val <= index.size()) { + fixed_len = index[fixed_pos + len.val - 1] - byte_pos; + } + if (byte_pos <= str.len && fixed_len > 0) { + return StringVal(str.ptr + byte_pos, fixed_len); } else { return StringVal(); } @@ -118,7 +162,7 @@ StringVal StringFunctions::space(FunctionContext* context, const IntVal& len) { int32_t space_size = std::min(len.val, 65535); // TODO pengyubing // StringVal result = StringVal::create_temp_string_val(context, space_size); - StringVal result(context, space_size); + StringVal result(context, space_size); memset(result.ptr, ' ', space_size); return result; } @@ -147,7 +191,7 @@ StringVal StringFunctions::repeat( } StringVal StringFunctions::lpad( - FunctionContext* context, const StringVal& str, + FunctionContext* context, const StringVal& str, const IntVal& len, const StringVal& pad) { if (str.is_null || len.is_null || pad.is_null || len.val < 0) { return StringVal::null(); @@ -223,6 +267,23 @@ IntVal StringFunctions::length(FunctionContext* context, const StringVal& str) { return IntVal(str.len); } +// Implementation of CHAR_LENGTH +// int char_utf8_length(string input) +// Returns the length of characters of input. If input == NULL, returns +// NULL per MySQL +IntVal StringFunctions::char_utf8_length(FunctionContext* context, const StringVal& str) { + if (str.is_null) { + return IntVal::null(); + } + size_t char_len = 0; + std::vector index; + for (size_t i = 0, char_size = 0; i < str.len; i += char_size) { + char_size = get_utf8_byte_length((unsigned)(str.ptr)[i]); + ++char_len; + } + return IntVal(char_len); +} + StringVal StringFunctions::lower(FunctionContext* context, const StringVal& str) { if (str.is_null) { return StringVal::null(); @@ -260,13 +321,16 @@ StringVal StringFunctions::reverse(FunctionContext* context, const StringVal& st return StringVal::null(); } - // TODO pengyubing - // StringVal result = StringVal::create_temp_string_val(context, str.len); StringVal result(context, str.len); if (UNLIKELY(result.is_null)) { return result; } - std::reverse_copy(str.ptr, str.ptr + str.len, result.ptr); + + for (size_t i = 0, char_size = 0; i < str.len; i += char_size) { + char_size = get_utf8_byte_length((unsigned)(str.ptr)[i]); + std::copy(str.ptr + i, str.ptr + i + char_size, result.ptr + result.len - i - char_size); + } + return result; } @@ -370,7 +434,7 @@ IntVal StringFunctions::locate_pos( // This function sets options in the RE2 library before pattern matching. bool StringFunctions::set_re2_options( const StringVal& match_parameter, - std::string* error_str, + std::string* error_str, re2::RE2::Options* opts) { for (int i = 0; i < match_parameter.len; i++) { char match = match_parameter.ptr[i]; @@ -401,7 +465,7 @@ bool StringFunctions::set_re2_options( // The caller owns the returned regex. Returns NULL if the pattern could not be compiled. static re2::RE2* compile_regex( - const StringVal& pattern, + const StringVal& pattern, std::string* error_str, const StringVal& match_parameter) { re2::StringPiece pattern_sp(reinterpret_cast(pattern.ptr), pattern.len); @@ -418,7 +482,7 @@ static re2::RE2* compile_regex( re2::RE2* re = new re2::RE2(pattern_sp, options); if (!re->ok()) { std::stringstream ss; - ss << "Could not compile regexp pattern: " << AnyValUtil::to_string(pattern) + ss << "Could not compile regexp pattern: " << AnyValUtil::to_string(pattern) << std::endl << "Error: " << re->error(); *error_str = ss.str(); delete re; @@ -558,7 +622,7 @@ StringVal StringFunctions::concat( } StringVal StringFunctions::concat_ws( - FunctionContext* context, const StringVal& sep, + FunctionContext* context, const StringVal& sep, int num_children, const StringVal* strs) { DCHECK_GE(num_children, 1); if (sep.is_null) { @@ -633,9 +697,9 @@ IntVal StringFunctions::find_in_set( } void StringFunctions::parse_url_prepare( - FunctionContext* ctx, + FunctionContext* ctx, FunctionContext::FunctionStateScope scope) { - if (scope != FunctionContext::FRAGMENT_LOCAL) { + if (scope != FunctionContext::FRAGMENT_LOCAL) { return; } if (!ctx->is_arg_constant(1)) { @@ -818,7 +882,6 @@ static int index_of(const uint8_t* source, int source_offset, int source_count, return -1; } - StringVal StringFunctions::split_part(FunctionContext* context, const StringVal& content, const StringVal& delimiter, const IntVal& field) { if (content.is_null || delimiter.is_null || field.is_null || field.val <= 0) { @@ -847,5 +910,4 @@ StringVal StringFunctions::split_part(FunctionContext* context, const StringVal& int len = (find[field.val - 1] == -1 ? content.len : find[field.val - 1]) - start_pos; return StringVal(content.ptr + start_pos, len); } - } diff --git a/be/src/exprs/string_functions.h b/be/src/exprs/string_functions.h index 31139d3e3c..3c395c9b9e 100644 --- a/be/src/exprs/string_functions.h +++ b/be/src/exprs/string_functions.h @@ -70,6 +70,8 @@ public: const doris_udf::IntVal& len, const doris_udf::StringVal& pad); static doris_udf::IntVal length( doris_udf::FunctionContext* context, const doris_udf::StringVal& str); + static doris_udf::IntVal char_utf8_length( + doris_udf::FunctionContext* context, const doris_udf::StringVal& str); static doris_udf::StringVal lower( doris_udf::FunctionContext* context, const doris_udf::StringVal& str); static doris_udf::StringVal upper( diff --git a/be/test/exprs/string_functions_test.cpp b/be/test/exprs/string_functions_test.cpp index a9b76e6b95..3b257b9910 100644 --- a/be/test/exprs/string_functions_test.cpp +++ b/be/test/exprs/string_functions_test.cpp @@ -17,6 +17,7 @@ #include "exprs/string_functions.h" #include "util/logging.h" +#include "testutil/function_utils.h" #include "exprs/anyval_util.h" #include #include @@ -252,9 +253,88 @@ TEST_F(StringFunctionsTest, null_or_empty) { delete context; } +TEST_F(StringFunctionsTest, substring) { + doris_udf::FunctionContext* context = new doris_udf::FunctionContext(); + + ASSERT_EQ(AnyValUtil::from_string_temp(context,std::string("hello")), + StringFunctions::substring(context, StringVal("hello word"), 1, 5)); + + ASSERT_EQ(AnyValUtil::from_string_temp(context,std::string("word")), + StringFunctions::substring(context, StringVal("hello word"), 7, 4)); + + ASSERT_EQ(StringVal::null(), + StringFunctions::substring(context, StringVal::null(), 1, 0)); + + ASSERT_EQ(AnyValUtil::from_string_temp(context,std::string("")), + StringFunctions::substring(context, StringVal("hello word"), 1, 0)); + + ASSERT_EQ(AnyValUtil::from_string_temp(context,std::string(" word")), + StringFunctions::substring(context, StringVal("hello word"), -5, 5)); + + ASSERT_EQ(AnyValUtil::from_string_temp(context,std::string("hello word 你")), + StringFunctions::substring(context, StringVal("hello word 你好"), 1, 12)); + + ASSERT_EQ(AnyValUtil::from_string_temp(context,std::string("好")), + StringFunctions::substring(context, StringVal("hello word 你好"), 13, 1)); + + ASSERT_EQ(AnyValUtil::from_string_temp(context,std::string("")), + StringFunctions::substring(context, StringVal("hello word 你好"), 1, 0)); + + ASSERT_EQ(AnyValUtil::from_string_temp(context,std::string("rd 你好")), + StringFunctions::substring(context, StringVal("hello word 你好"), -5, 5)); + + ASSERT_EQ(AnyValUtil::from_string_temp(context,std::string("h")), + StringFunctions::substring(context, StringVal("hello word 你好"), 1, 1)); +} + +TEST_F(StringFunctionsTest, reverse) { + FunctionUtils fu; + doris_udf::FunctionContext* context = fu.get_fn_ctx(); + + ASSERT_EQ(AnyValUtil::from_string_temp(context,std::string("olleh")), + StringFunctions::reverse(context, StringVal("hello"))); + ASSERT_EQ(StringVal::null(), + StringFunctions::reverse(context, StringVal::null())); + + ASSERT_EQ(AnyValUtil::from_string_temp(context,std::string("")), + StringFunctions::reverse(context, StringVal(""))); + + ASSERT_EQ(AnyValUtil::from_string_temp(context,std::string("好你olleh")), + StringFunctions::reverse(context, StringVal("hello你好"))); +} + +TEST_F(StringFunctionsTest, length) { + doris_udf::FunctionContext* context = new doris_udf::FunctionContext(); + + ASSERT_EQ(IntVal(5), + StringFunctions::length(context, StringVal("hello"))); + ASSERT_EQ(IntVal(5), + StringFunctions::char_utf8_length(context, StringVal("hello"))); + ASSERT_EQ(IntVal::null(), + StringFunctions::length(context, StringVal::null())); + ASSERT_EQ(IntVal::null(), + StringFunctions::char_utf8_length(context, StringVal::null())); + + ASSERT_EQ(IntVal(0), + StringFunctions::length(context, StringVal(""))); + ASSERT_EQ(IntVal(0), + StringFunctions::char_utf8_length(context, StringVal(""))); + + ASSERT_EQ(IntVal(11), + StringFunctions::length(context, StringVal("hello你好"))); + + ASSERT_EQ(IntVal(7), + StringFunctions::char_utf8_length(context, StringVal("hello你好"))); +} + } int main(int argc, char** argv) { + std::string conffile = std::string(getenv("DORIS_HOME")) + "/conf/be.conf"; + if (!doris::config::init(conffile.c_str(), false)) { + fprintf(stderr, "error read config file. \n"); + return -1; + } ::testing::InitGoogleTest(&argc, argv); return RUN_ALL_TESTS(); } diff --git a/docs/en/sql-reference/sql-functions/string-functions/char_length.md b/docs/en/sql-reference/sql-functions/string-functions/char_length.md new file mode 100644 index 0000000000..e7478893c9 --- /dev/null +++ b/docs/en/sql-reference/sql-functions/string-functions/char_length.md @@ -0,0 +1,55 @@ +--- +{ + "title": "CHAR_LENGTH", + "language": "en" +} +--- + + + +# char_length +## Description +### Syntax + +'INT char_length (VARCHAR str)' + + +Returns the length of the string and the number of characters returned for multi-byte characters. For example, five two-byte width words return a length of 5, only utf8 encodeing is support at current version. + +## example + + +``` +mysql> select char_length("abc"); ++--------------------+ +| char_length('abc') | ++--------------------+ +| 3 | ++--------------------+ + +mysql> select char_length("中国"); ++------------------- ---+ +| char_length('中国') | ++-----------------------+ +| 2 | ++-----------------------+ +``` +## keyword +CHAR_LENGTH diff --git a/docs/en/sql-reference/sql-functions/string-functions/left.md b/docs/en/sql-reference/sql-functions/string-functions/left.md index 074bc349be..b570a5bc8c 100644 --- a/docs/en/sql-reference/sql-functions/string-functions/left.md +++ b/docs/en/sql-reference/sql-functions/string-functions/left.md @@ -31,7 +31,7 @@ under the License. 'VARCHAR left (VARCHAR str)' -It returns the left part of a string of specified length +It returns the left part of a string of specified length, length is char length not the byte size. ## example diff --git a/docs/en/sql-reference/sql-functions/string-functions/length.md b/docs/en/sql-reference/sql-functions/string-functions/length.md index ff2c961d4b..6f345de401 100644 --- a/docs/en/sql-reference/sql-functions/string-functions/length.md +++ b/docs/en/sql-reference/sql-functions/string-functions/length.md @@ -31,7 +31,7 @@ under the License. 'INT length (VARCHAR str)' -Returns the length of the string and the number of characters returned for multi-byte characters. For example, five two-byte width words return a length of 10. +Returns the length of the string in byte size. ## example diff --git a/docs/en/sql-reference/sql-functions/string-functions/reverse.md b/docs/en/sql-reference/sql-functions/string-functions/reverse.md new file mode 100644 index 0000000000..7118e748eb --- /dev/null +++ b/docs/en/sql-reference/sql-functions/string-functions/reverse.md @@ -0,0 +1,56 @@ +--- +{ + "title": "REVERSE", + "language": "en" +} +--- + + + +# reverse +## description +### Syntax + +`VARCHAR reverse(VARCHAR str)` + + +The REVERSE() function reverses a string and returns the result. + +## example + +``` +mysql> SELECT REVERSE('hello'); ++------------------+ +| REVERSE('hello') | ++------------------+ +| olleh | ++------------------+ +1 row in set (0.00 sec) + +mysql> SELECT REVERSE('你好'); ++------------------+ +| REVERSE('你好') | ++------------------+ +| 好你 | ++------------------+ +1 row in set (0.00 sec) +``` +## keyword +REVERSE diff --git a/docs/en/sql-reference/sql-functions/string-functions/right.md b/docs/en/sql-reference/sql-functions/string-functions/right.md index d8e356a966..e67a5acea3 100644 --- a/docs/en/sql-reference/sql-functions/string-functions/right.md +++ b/docs/en/sql-reference/sql-functions/string-functions/right.md @@ -31,7 +31,7 @@ under the License. 'VARCHAR RIGHT (VARCHAR STR)' -It returns the right part of a string of specified length +It returns the right part of a string of specified length, length is char length not the byte size. ## example diff --git a/docs/en/sql-reference/sql-functions/string-functions/strleft.md b/docs/en/sql-reference/sql-functions/string-functions/strleft.md index 81d6ee8023..d1b1c30d75 100644 --- a/docs/en/sql-reference/sql-functions/string-functions/strleft.md +++ b/docs/en/sql-reference/sql-functions/string-functions/strleft.md @@ -31,7 +31,7 @@ under the License. 'VARCHAR STRAIGHT (VARCHAR STR)' -It returns the left part of a string of specified length +It returns the left part of a string of specified length, length is char length not the byte size. ## example diff --git a/docs/en/sql-reference/sql-functions/string-functions/strright.md b/docs/en/sql-reference/sql-functions/string-functions/strright.md index 25707cdbc0..42a43ec7ef 100644 --- a/docs/en/sql-reference/sql-functions/string-functions/strright.md +++ b/docs/en/sql-reference/sql-functions/string-functions/strright.md @@ -32,7 +32,7 @@ under the License. 'VARCHAR strright (VARCHAR str)' -It returns the right part of a string of specified length +It returns the right part of a string of specified length, length is char length not the byte size. ## example diff --git a/docs/zh-CN/sql-reference/sql-functions/string-functions/char_length.md b/docs/zh-CN/sql-reference/sql-functions/string-functions/char_length.md new file mode 100644 index 0000000000..9bd3c1f56b --- /dev/null +++ b/docs/zh-CN/sql-reference/sql-functions/string-functions/char_length.md @@ -0,0 +1,54 @@ +--- +{ + "title": "CHAR_LENGTH", + "language": "zh-CN" +} +--- + + + +# char_length +## description +### Syntax + +`INT char_length(VARCHAR str)` + + +返回字符串的长度,对于多字节字符,返回字符数, 目前仅支持utf8 编码。 + +## example + +``` +mysql> select char_length("abc"); ++--------------------+ +| char_length('abc') | ++--------------------+ +| 3 | ++--------------------+ + +mysql> select char_length("中国"); ++------------------- ---+ +| char_length('中国') | ++-----------------------+ +| 2 | ++-----------------------+ +``` +##keyword +CHAR_LENGTH diff --git a/docs/zh-CN/sql-reference/sql-functions/string-functions/left.md b/docs/zh-CN/sql-reference/sql-functions/string-functions/left.md index 99a9b83e33..2c73243c1f 100644 --- a/docs/zh-CN/sql-reference/sql-functions/string-functions/left.md +++ b/docs/zh-CN/sql-reference/sql-functions/string-functions/left.md @@ -31,7 +31,7 @@ under the License. `VARCHAR left(VARCHAR str)` -它返回具有指定长度的字符串的左边部分 +它返回具有指定长度的字符串的左边部分, 长度的单位为utf8字符 ## example diff --git a/docs/zh-CN/sql-reference/sql-functions/string-functions/length.md b/docs/zh-CN/sql-reference/sql-functions/string-functions/length.md index f86e3ee02b..53fa1f9a72 100644 --- a/docs/zh-CN/sql-reference/sql-functions/string-functions/length.md +++ b/docs/zh-CN/sql-reference/sql-functions/string-functions/length.md @@ -31,7 +31,7 @@ under the License. `INT length(VARCHAR str)` -返回字符串的长度,对于多字节字符,返回的字符数。比如5个两字节宽度字,返回的长度是10。 +返回字符串的字节。 ## example diff --git a/docs/zh-CN/sql-reference/sql-functions/string-functions/reverse.md b/docs/zh-CN/sql-reference/sql-functions/string-functions/reverse.md new file mode 100644 index 0000000000..1daf7802e3 --- /dev/null +++ b/docs/zh-CN/sql-reference/sql-functions/string-functions/reverse.md @@ -0,0 +1,56 @@ +--- +{ + "title": "REVERSE", + "language": "zh-CN" +} +--- + + + +# reverse +## description +### Syntax + +`VARCHAR reverse(VARCHAR str)` + + +将字符串反转,返回的字符串的顺序和源字符串的顺序相反。 + +## example + +``` +mysql> SELECT REVERSE('hello'); ++------------------+ +| REVERSE('hello') | ++------------------+ +| olleh | ++------------------+ +1 row in set (0.00 sec) + +mysql> SELECT REVERSE('你好'); ++------------------+ +| REVERSE('你好') | ++------------------+ +| 好你 | ++------------------+ +1 row in set (0.00 sec) +``` +## keyword +REVERSE diff --git a/docs/zh-CN/sql-reference/sql-functions/string-functions/right.md b/docs/zh-CN/sql-reference/sql-functions/string-functions/right.md index 133c92b86c..0e0839f9da 100644 --- a/docs/zh-CN/sql-reference/sql-functions/string-functions/right.md +++ b/docs/zh-CN/sql-reference/sql-functions/string-functions/right.md @@ -31,7 +31,7 @@ under the License. `VARCHAR right(VARCHAR str)` -它返回具有指定长度的字符串的右边部分 +它返回具有指定长度的字符串的右边部分, 长度的单位为utf8字符 ## example diff --git a/docs/zh-CN/sql-reference/sql-functions/string-functions/strleft.md b/docs/zh-CN/sql-reference/sql-functions/string-functions/strleft.md index c4b5d32227..a2eae3a277 100644 --- a/docs/zh-CN/sql-reference/sql-functions/string-functions/strleft.md +++ b/docs/zh-CN/sql-reference/sql-functions/string-functions/strleft.md @@ -31,7 +31,7 @@ under the License. `VARCHAR strleft(VARCHAR str)` -它返回具有指定长度的字符串的左边部分 +它返回具有指定长度的字符串的左边部分,长度的单位为utf8字符 ## example diff --git a/docs/zh-CN/sql-reference/sql-functions/string-functions/strright.md b/docs/zh-CN/sql-reference/sql-functions/string-functions/strright.md index b6e8657f8b..f3510fdaa4 100644 --- a/docs/zh-CN/sql-reference/sql-functions/string-functions/strright.md +++ b/docs/zh-CN/sql-reference/sql-functions/string-functions/strright.md @@ -31,7 +31,7 @@ under the License. `VARCHAR strright(VARCHAR str)` -它返回具有指定长度的字符串的右边部分 +它返回具有指定长度的字符串的右边部分, 长度的单位为utf8字符 ## example diff --git a/gensrc/script/doris_builtins_functions.py b/gensrc/script/doris_builtins_functions.py index 2dfb76bdab..a2d972725a 100755 --- a/gensrc/script/doris_builtins_functions.py +++ b/gensrc/script/doris_builtins_functions.py @@ -533,7 +533,9 @@ visible_functions = [ '15FunctionContextERKNS1_9StringValERKNS1_6IntValES6_'], [['length'], 'INT', ['VARCHAR'], '_ZN5doris15StringFunctions6lengthEPN9doris_udf15FunctionContextERKNS1_9StringValE'], - [['lower', 'lcase'], 'VARCHAR', ['VARCHAR'], + [['char_length', 'character_length'], 'INT', ['VARCHAR'], + '_ZN5doris15StringFunctions16char_utf8_lengthEPN9doris_udf15FunctionContextERKNS1_9StringValE'], + [['lower', 'lcase'], 'VARCHAR', ['VARCHAR'], '_ZN5doris15StringFunctions5lowerEPN9doris_udf15FunctionContextERKNS1_9StringValE'], [['upper', 'ucase'], 'VARCHAR', ['VARCHAR'], '_ZN5doris15StringFunctions5upperEPN9doris_udf15FunctionContextERKNS1_9StringValE'],