From ab2a6864bc143eca8f8dbaf8758fe680671f31da Mon Sep 17 00:00:00 2001 From: Mellorsssss <45841569+Mellorsssss@users.noreply.github.com> Date: Mon, 24 Apr 2023 10:33:29 +0800 Subject: [PATCH] [function](json) Json unquote (#18037) --- be/src/vec/functions/function_json.cpp | 70 ++++++++++++++++ .../json-functions/json_unquote.md | 83 +++++++++++++++++++ docs/sidebars.json | 1 + .../json-functions/json_unquote.md | 83 +++++++++++++++++++ gensrc/script/doris_builtins_functions.py | 1 + .../json_functions/test_json_function.out | 30 +++++++ .../json_functions/test_json_function.groovy | 11 +++ 7 files changed, 279 insertions(+) create mode 100644 docs/en/docs/sql-manual/sql-functions/json-functions/json_unquote.md create mode 100644 docs/zh-CN/docs/sql-manual/sql-functions/json-functions/json_unquote.md diff --git a/be/src/vec/functions/function_json.cpp b/be/src/vec/functions/function_json.cpp index 9e40b56aa3..ccaad26a9e 100644 --- a/be/src/vec/functions/function_json.cpp +++ b/be/src/vec/functions/function_json.cpp @@ -933,11 +933,81 @@ public: } }; +class FunctionJsonUnquote : public IFunction { +public: + static constexpr auto name = "json_unquote"; + static FunctionPtr create() { return std::make_shared(); } + + String get_name() const override { return name; } + + size_t get_number_of_arguments() const override { return 1; } + + DataTypePtr get_return_type_impl(const DataTypes& arguments) const override { + return make_nullable(std::make_shared()); + } + + bool use_default_implementation_for_nulls() const override { return false; } + + bool use_default_implementation_for_constants() const override { return true; } + + Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments, + size_t result, size_t input_rows_count) override { + const IColumn& col_from = *(block.get_by_position(arguments[0]).column); + + auto null_map = ColumnUInt8::create(input_rows_count, 0); + + const ColumnString* col_from_string = check_and_get_column(col_from); + if (auto* nullable = check_and_get_column(col_from)) { + col_from_string = + check_and_get_column(*nullable->get_nested_column_ptr()); + } + + if (!col_from_string) { + return Status::RuntimeError("Illegal column {} should be ColumnString", + col_from.get_name()); + } + + auto col_to = ColumnString::create(); + col_to->reserve(input_rows_count); + + // parser can be reused for performance + rapidjson::Document document; + for (size_t i = 0; i < input_rows_count; ++i) { + if (col_from.is_null_at(i)) { + null_map->get_data()[i] = 1; + col_to->insert_data(nullptr, 0); + continue; + } + + const auto& json_str = col_from_string->get_data_at(i); + if (json_str.size < 2 || json_str.data[0] != '"' || + json_str.data[json_str.size - 1] != '"') { + // non-quoted string + col_to->insert_data(json_str.data, json_str.size); + } else { + document.Parse(json_str.data, json_str.size); + if (document.HasParseError() || !document.IsString()) { + return Status::RuntimeError( + fmt::format("Invalid JSON text in argument 1 to function {}: {}", name, + std::string_view(json_str.data, json_str.size))); + } + col_to->insert_data(document.GetString(), document.GetStringLength()); + } + } + + block.replace_by_position(result, + ColumnNullable::create(std::move(col_to), std::move(null_map))); + + return Status::OK(); + } +}; + void register_function_json(SimpleFunctionFactory& factory) { factory.register_function(); factory.register_function(); factory.register_function(); factory.register_function(); + factory.register_function(); factory.register_function>(); factory.register_function>(); diff --git a/docs/en/docs/sql-manual/sql-functions/json-functions/json_unquote.md b/docs/en/docs/sql-manual/sql-functions/json-functions/json_unquote.md new file mode 100644 index 0000000000..0b0c7f08ac --- /dev/null +++ b/docs/en/docs/sql-manual/sql-functions/json-functions/json_unquote.md @@ -0,0 +1,83 @@ +--- +{ + "title": "json_unquote", + "language": "en" +} +--- + + + +## json_unquote +### Description +#### Syntax + +`VARCHAR json_ununquote(VARCHAR)` + +This function unquotes a JSON value and returns the result as a utf8mb4 string. If the argument is NULL, it will return NULL. + +Escape sequences within a string as shown in the following table will be recognized. Backslashes will be ignored for all other escape sequences. + +| Escape Sequence | Character Represented by Sequence | +|-----------------|------------------------------------| +| \" | A double quote (") character | +| \b | A backspace character | +| \f | A formfeed character | +| \n | A newline (linefeed) character | +| \r | A carriage return character | +| \t | A tab character | +| \\ | A backslash (\) character | +| \uxxxx | UTF-8 bytes for Unicode value XXXX | + + + +### example + +``` +mysql> SELECT json_unquote('"doris"'); ++-------------------------+ +| json_unquote('"doris"') | ++-------------------------+ +| doris | ++-------------------------+ + +mysql> SELECT json_unquote('[1, 2, 3]'); ++---------------------------+ +| json_unquote('[1, 2, 3]') | ++---------------------------+ +| [1, 2, 3] | ++---------------------------+ + + +mysql> SELECT json_unquote(null); ++--------------------+ +| json_unquote(NULL) | ++--------------------+ +| NULL | ++--------------------+ + +mysql> SELECT json_unquote('"\\ttest"'); ++--------------------------+ +| json_unquote('"\ttest"') | ++--------------------------+ +| test | ++--------------------------+ +``` +### keywords +json,unquote,json_unquote diff --git a/docs/sidebars.json b/docs/sidebars.json index 2e13a75225..091f04d151 100644 --- a/docs/sidebars.json +++ b/docs/sidebars.json @@ -598,6 +598,7 @@ "sql-manual/sql-functions/json-functions/json_array", "sql-manual/sql-functions/json-functions/json_object", "sql-manual/sql-functions/json-functions/json_quote", + "sql-manual/sql-functions/json-functions/json_unquote", "sql-manual/sql-functions/json-functions/json_valid", "sql-manual/sql-functions/json-functions/json_extract" ] diff --git a/docs/zh-CN/docs/sql-manual/sql-functions/json-functions/json_unquote.md b/docs/zh-CN/docs/sql-manual/sql-functions/json-functions/json_unquote.md new file mode 100644 index 0000000000..2c4eb8061d --- /dev/null +++ b/docs/zh-CN/docs/sql-manual/sql-functions/json-functions/json_unquote.md @@ -0,0 +1,83 @@ +--- +{ + "title": "json_unquote", + "language": "zh-CN" +} +--- + + + +## json_unquote +### Description +#### Syntax + +`VARCHAR json_ununquote(VARCHAR)` + +这个函数将去掉JSON值中的引号,并将结果作为utf8mb4字符串返回。如果参数为NULL,则返回NULL。 + +在字符串中显示的如下转义序列将被识别,对于所有其他转义序列,反斜杠将被忽略。 + +| 转义序列 | 序列表示的字符 | +|----------|-------------------------------| +| \" | 双引号 " | +| \b | 退格字符 | +| \f | 换页符 | +| \n | 换行符 | +| \r | 回车符 | +| \t | 制表符 | +| \\ | 反斜杠 \ | +| \uxxxx | Unicode 值 XXXX 的 UTF-8 字节 | + + + +### example + +``` +mysql> SELECT json_unquote('"doris"'); ++-------------------------+ +| json_unquote('"doris"') | ++-------------------------+ +| doris | ++-------------------------+ + +mysql> SELECT json_unquote('[1, 2, 3]'); ++---------------------------+ +| json_unquote('[1, 2, 3]') | ++---------------------------+ +| [1, 2, 3] | ++---------------------------+ + + +mysql> SELECT json_unquote(null); ++--------------------+ +| json_unquote(NULL) | ++--------------------+ +| NULL | ++--------------------+ + +mysql> SELECT json_unquote('"\\ttest"'); ++--------------------------+ +| json_unquote('"\ttest"') | ++--------------------------+ +| test | ++--------------------------+ +``` +### keywords +json,unquote,json_unquote diff --git a/gensrc/script/doris_builtins_functions.py b/gensrc/script/doris_builtins_functions.py index b641356504..0413bcd3b6 100644 --- a/gensrc/script/doris_builtins_functions.py +++ b/gensrc/script/doris_builtins_functions.py @@ -1655,6 +1655,7 @@ visible_functions = [ [['json_object'], 'VARCHAR', ['VARCHAR', '...'], 'ALWAYS_NOT_NULLABLE'], [['json_quote'], 'VARCHAR', ['VARCHAR'], ''], [['json_valid'], 'INT', ['VARCHAR'], 'ALWAYS_NULLABLE'], + [['json_unquote'], 'VARCHAR', ['VARCHAR'], 'ALWAYS_NULLABLE'], [['json_extract'], 'VARCHAR', ['VARCHAR', 'VARCHAR', '...'], ''], #hll function diff --git a/regression-test/data/query_p0/sql_functions/json_functions/test_json_function.out b/regression-test/data/query_p0/sql_functions/json_functions/test_json_function.out index d94ec4f022..0057eb6d34 100644 --- a/regression-test/data/query_p0/sql_functions/json_functions/test_json_function.out +++ b/regression-test/data/query_p0/sql_functions/json_functions/test_json_function.out @@ -77,6 +77,36 @@ v1 -- !sql -- "\\n\\b\\r\\t" +-- !sql -- +"" + +-- !sql -- + + +-- !sql -- +doris + +-- !sql -- +doris + +-- !sql -- +open-quoted" + +-- !sql -- +"open-quoted + +-- !sql -- +\N + +-- !sql -- +Dorris\ ishere\n + +-- !sql -- +Dorris\ ishere\n + +-- !sql -- +DORIS + -- !sql -- 2 diff --git a/regression-test/suites/query_p0/sql_functions/json_functions/test_json_function.groovy b/regression-test/suites/query_p0/sql_functions/json_functions/test_json_function.groovy index bc07a41c5c..a47fe73999 100644 --- a/regression-test/suites/query_p0/sql_functions/json_functions/test_json_function.groovy +++ b/regression-test/suites/query_p0/sql_functions/json_functions/test_json_function.groovy @@ -48,6 +48,17 @@ suite("test_json_function") { qt_sql "SELECT json_quote('[1, 2, 3, 1678708107000]');" qt_sql "SELECT json_quote(null);" qt_sql "SELECT json_quote(\"\\n\\b\\r\\t\");" + qt_sql "SELECT json_quote('')" + + qt_sql "SELECT json_unquote('')" + qt_sql "SELECT json_unquote('doris')" + qt_sql "SELECT json_unquote('\"doris\"');" + qt_sql "SELECT json_unquote('open-quoted\"');" + qt_sql "SELECT json_unquote('\"open-quoted');" + qt_sql "SELECT json_unquote(null);" + qt_sql "SELECT json_unquote('Dorr\bis\tishere\n');" + qt_sql "SELECT json_unquote('\"Dorr\\\\bis\\\\tishere\\\\n\"');" + qt_sql "SELECT json_unquote('\"\\\\u0044\\\\u004F\\\\u0052\\\\u0049\\\\u0053\"');" qt_sql "SELECT json_extract('[1, 2, 3]', '\$.[1]');" qt_sql "SELECT json_extract('{\"id\": 123, \"name\": \"doris\"}', '\$.id', '\$.name');"