diff --git a/be/src/exprs/math_functions.cpp b/be/src/exprs/math_functions.cpp index 40bb47a5a8..50712d5e1d 100644 --- a/be/src/exprs/math_functions.cpp +++ b/be/src/exprs/math_functions.cpp @@ -323,10 +323,25 @@ StringVal MathFunctions::hex_int(FunctionContext* ctx, const BigIntVal& v) { if (v.is_null) { return StringVal::null(); } - // TODO: this is probably unreasonably slow - std::stringstream ss; - ss << std::hex << std::uppercase << v.val; - return AnyValUtil::from_string_temp(ctx, ss.str()); + + uint64_t num = v.val; + char hex[] = {'0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'A', 'B', 'C', 'D', 'E', 'F'}; + // uint64_t max value 0xFFFFFFFFFFFFFFFF , 16 'F' + // need 1 more space for '\0' + char ans[17]; + int i = 0; + while (num) { + ans[i++] = hex[num & 15]; + num = num >> 4; + } + ans[i] = '\0'; + // reverse + for (int k = 0, j = i - 1; k <= j; k++, j--) { + char tmp = ans[j]; + ans[j] = ans[k]; + ans[k] = tmp; + } + return AnyValUtil::from_string_temp(ctx, ans); } StringVal MathFunctions::hex_string(FunctionContext* ctx, const StringVal& s) { @@ -347,8 +362,8 @@ StringVal MathFunctions::unhex(FunctionContext* ctx, const StringVal& s) { if (s.is_null) { return StringVal::null(); } - // For uneven number of chars return empty string like Hive does. - if (s.len % 2 != 0) { + // For odd number of chars return empty string like Hive does. + if (s.len & 1) { return StringVal(); } diff --git a/be/test/exprs/math_functions_test.cpp b/be/test/exprs/math_functions_test.cpp index 0e41318f64..8c397c8ffb 100644 --- a/be/test/exprs/math_functions_test.cpp +++ b/be/test/exprs/math_functions_test.cpp @@ -24,6 +24,7 @@ #include "exprs/anyval_util.h" #include "exprs/expr_context.h" +#include "test_util/test_util.h" #include "testutil/function_utils.h" #include "util/logging.h" @@ -168,6 +169,99 @@ TEST_F(MathFunctionsTest, rand) { ASSERT_NE(dv3.val, dv4.val); } +TEST_F(MathFunctionsTest, hex_int) { + doris_udf::FunctionContext* context = new doris_udf::FunctionContext(); + + ASSERT_EQ(StringVal::null(), + MathFunctions::hex_string(context, StringVal::null())); + + ASSERT_EQ(StringVal("7FFFFFFFFFFFFFFF"), + MathFunctions::hex_int(context, BigIntVal(9223372036854775807))); //BigIntVal max_value + + ASSERT_EQ(StringVal("FFE5853AB393E6C0"), + MathFunctions::hex_int(context, BigIntVal(-7453337203775808))); + + ASSERT_EQ(StringVal(""), + MathFunctions::hex_int(context, BigIntVal(0))); + + ASSERT_EQ(StringVal("C"), + MathFunctions::hex_int(context, BigIntVal(12))); + + ASSERT_EQ(StringVal("90"), + MathFunctions::hex_int(context, BigIntVal(144))); + + ASSERT_EQ(StringVal("FFFFFFFFFFFFFFFF"), + MathFunctions::hex_int(context, BigIntVal(-1))); + + ASSERT_EQ(StringVal("FFFFFFFFFFFFFFFE"), + MathFunctions::hex_int(context, BigIntVal(-2))); + + ASSERT_EQ(StringVal("24EC1"), + MathFunctions::hex_int(context, BigIntVal(151233))); + + delete context; +} + +TEST_F(MathFunctionsTest, hex_string) { + doris_udf::FunctionContext* context = new doris_udf::FunctionContext(); + + ASSERT_EQ(StringVal::null(), + MathFunctions::hex_string(context, StringVal::null())); + + ASSERT_EQ(StringVal("30"), + MathFunctions::hex_string(context, StringVal("0"))); + + ASSERT_EQ(StringVal("31"), + MathFunctions::hex_string(context, StringVal("1"))); + + ASSERT_EQ(StringVal("313233"), + MathFunctions::hex_string(context, StringVal("123"))); + + ASSERT_EQ(StringVal("41"), + MathFunctions::hex_string(context, StringVal("A"))); + + ASSERT_EQ(StringVal("61"), + MathFunctions::hex_string(context, StringVal("a"))); + + ASSERT_EQ(StringVal("E68891"), + MathFunctions::hex_string(context, StringVal("我"))); + + ASSERT_EQ(StringVal("3F"), + MathFunctions::hex_string(context, StringVal("?"))); + + delete context; +} + +TEST_F(MathFunctionsTest, unhex) { + doris_udf::FunctionContext* context = new doris_udf::FunctionContext(); + + ASSERT_EQ(StringVal::null(), + MathFunctions::unhex(context, StringVal::null())); + + ASSERT_EQ(StringVal("123"), + MathFunctions::unhex(context, StringVal("313233"))); + + ASSERT_EQ(StringVal(""), + MathFunctions::unhex(context, StringVal("@!#"))); + + ASSERT_EQ(StringVal(""), + MathFunctions::unhex(context, StringVal("@@"))); + + ASSERT_EQ(StringVal("a"), + MathFunctions::unhex(context, StringVal("61"))); + + ASSERT_EQ(StringVal("123"), + MathFunctions::unhex(context, StringVal("313233"))); + + ASSERT_EQ(StringVal(""), + MathFunctions::unhex(context, StringVal("我"))); + + ASSERT_EQ(StringVal("?"), + MathFunctions::unhex(context, StringVal("EFBC9F"))); + + delete context; +} + } // namespace doris int main(int argc, char** argv) { diff --git a/docs/.vuepress/sidebar/en.js b/docs/.vuepress/sidebar/en.js index ad9adc2192..4f7cdbb3fc 100644 --- a/docs/.vuepress/sidebar/en.js +++ b/docs/.vuepress/sidebar/en.js @@ -338,6 +338,7 @@ module.exports = [ "get_json_int", "get_json_string", "group_concat", + "hex", "instr", "lcase", "left", @@ -357,6 +358,7 @@ module.exports = [ "starts_with", "strleft", "strright", + "unhex", { title: "fuzzy match", directoryPath: "like/", diff --git a/docs/.vuepress/sidebar/zh-CN.js b/docs/.vuepress/sidebar/zh-CN.js index 05a9eeeaa4..c9d297d6f2 100644 --- a/docs/.vuepress/sidebar/zh-CN.js +++ b/docs/.vuepress/sidebar/zh-CN.js @@ -342,6 +342,7 @@ module.exports = [ "get_json_int", "get_json_string", "group_concat", + "hex", "instr", "lcase", "left", @@ -361,6 +362,7 @@ module.exports = [ "starts_with", "strleft", "strright", + "unhex", { title: "模糊匹配", directoryPath: "like/", diff --git a/docs/en/sql-reference/sql-functions/string-functions/hex.md b/docs/en/sql-reference/sql-functions/string-functions/hex.md new file mode 100644 index 0000000000..cb58c0d2fa --- /dev/null +++ b/docs/en/sql-reference/sql-functions/string-functions/hex.md @@ -0,0 +1,85 @@ +--- +{ + "title": "hex", + "language": "en" +} +--- + + + +# hex +## description +### Syntax + +`VARCHAR hex(VARCHAR str)` + +`VARCHAR hex(BIGINT num)` + +If the input parameter is a number, the string representation of the hexadecimal value is returned; + +If the input parameter is a string, each character will be converted into two hexadecimal characters, and all the characters after the conversion will be spliced into a string for output + + +## example + +``` +input string + +mysql> select hex('1'); ++----------+ +| hex('1') | ++----------+ +| 31 | ++----------+ + +mysql> select hex('@'); ++----------+ +| hex('@') | ++----------+ +| 40 | ++----------+ + +mysql> select hex('12'); ++-----------+ +| hex('12') | ++-----------+ +| 3132 | ++-----------+ +``` + +``` +intput num + +mysql> select hex(12); ++---------+ +| hex(12) | ++---------+ +| C | ++---------+ + +mysql> select hex(-1); ++------------------+ +| hex(-1) | ++------------------+ +| FFFFFFFFFFFFFFFF | ++------------------+ +``` +## keyword +HEX diff --git a/docs/en/sql-reference/sql-functions/string-functions/unhex.md b/docs/en/sql-reference/sql-functions/string-functions/unhex.md new file mode 100644 index 0000000000..eba746e1ba --- /dev/null +++ b/docs/en/sql-reference/sql-functions/string-functions/unhex.md @@ -0,0 +1,63 @@ +--- +{ + "title": "unhex", + "language": "en" +} +--- + + + +# unhex +## description +### Syntax + +`VARCHAR unhex(VARCHAR str)` + +Enter a string, if the length of the string is 0 or an odd number, an empty string is returned; +If the string contains characters other than `[0-9], [a-z], [A-Z]`, an empty string is returned; +In other cases, every two characters are a group of characters converted into hexadecimal, and then spliced into a string for output. + + +## example + +``` +mysql> select unhex('@'); ++------------+ +| unhex('@') | ++------------+ +| | ++------------+ + +mysql> select unhex('41'); ++-------------+ +| unhex('41') | ++-------------+ +| A | ++-------------+ + +mysql> select unhex('4142'); ++---------------+ +| unhex('4142') | ++---------------+ +| AB | ++---------------+ +``` +## keyword +UNHEX diff --git a/docs/zh-CN/sql-reference/sql-functions/string-functions/hex.md b/docs/zh-CN/sql-reference/sql-functions/string-functions/hex.md new file mode 100644 index 0000000000..51d861f986 --- /dev/null +++ b/docs/zh-CN/sql-reference/sql-functions/string-functions/hex.md @@ -0,0 +1,85 @@ +--- +{ + "title": "hex", + "language": "zh-CN" +} +--- + + + +# hex +## description +### Syntax + +`VARCHAR hex(VARCHAR str)` + +`VARCHAR hex(BIGINT num)` + +如果输入参数是数字,返回十六进制值的字符串表示形式; + +如果输入参数是字符串,则将每个字符转化为两个十六进制的字符,将转化后的所有字符拼接为字符串输出 + + +## example + +``` +输入字符串 + +mysql> select hex('1'); ++----------+ +| hex('1') | ++----------+ +| 31 | ++----------+ + +mysql> select hex('@'); ++----------+ +| hex('@') | ++----------+ +| 40 | ++----------+ + +mysql> select hex('12'); ++-----------+ +| hex('12') | ++-----------+ +| 3132 | ++-----------+ +``` + +``` +输入数字 + +mysql> select hex(12); ++---------+ +| hex(12) | ++---------+ +| C | ++---------+ + +mysql> select hex(-1); ++------------------+ +| hex(-1) | ++------------------+ +| FFFFFFFFFFFFFFFF | ++------------------+ +``` +## keyword +HEX diff --git a/docs/zh-CN/sql-reference/sql-functions/string-functions/unhex.md b/docs/zh-CN/sql-reference/sql-functions/string-functions/unhex.md new file mode 100644 index 0000000000..f33e75d73e --- /dev/null +++ b/docs/zh-CN/sql-reference/sql-functions/string-functions/unhex.md @@ -0,0 +1,63 @@ +--- +{ + "title": "unhex", + "language": "zh-CN" +} +--- + + + +# unhex +## description +### Syntax + +`VARCHAR unhex(VARCHAR str)` + +输入字符串,如果字符串长度为0或者为奇数,返回空串; +如果字符串中包含`[0-9]、[a-z]、[A-Z]`之外的字符,返回空串; +其他情况每两个字符为一组转化为16进制后的字符,然后拼接成字符串输出 + + +## example + +``` +mysql> select unhex('@'); ++------------+ +| unhex('@') | ++------------+ +| | ++------------+ + +mysql> select unhex('41'); ++-------------+ +| unhex('41') | ++-------------+ +| A | ++-------------+ + +mysql> select unhex('4142'); ++---------------+ +| unhex('4142') | ++---------------+ +| AB | ++---------------+ +``` +## keyword +UNHEX