diff --git a/be/src/vec/functions/function_json.cpp b/be/src/vec/functions/function_json.cpp index 1a51559dcd..8417724d9a 100644 --- a/be/src/vec/functions/function_json.cpp +++ b/be/src/vec/functions/function_json.cpp @@ -200,6 +200,10 @@ rapidjson::Value* get_json_object(std::string_view json_string, std::string_view #endif std::vector paths(tok.begin(), tok.end()); get_parsed_paths(paths, &tmp_parsed_paths); + if (tmp_parsed_paths.empty()) { + return document; + } + parsed_paths = &tmp_parsed_paths; if (!(*parsed_paths)[0].is_valid) { @@ -648,6 +652,57 @@ struct FunctionJsonQuoteImpl { } }; +struct FunctionJsonExtractImpl { + static constexpr auto name = "json_extract"; + + static rapidjson::Value parse_json(const ColumnString* json_col, const ColumnString* path_col, + rapidjson::Document::AllocatorType& allocator, + const int row) { + rapidjson::Value value; + rapidjson::Document document; + + const auto obj = json_col->get_data_at(row); + std::string_view json_string(obj.data, obj.size); + const auto path = path_col->get_data_at(row); + std::string_view path_string(path.data, path.size); + + auto root = get_json_object(json_string, path_string, &document); + if (root != nullptr) { + value.CopyFrom(*root, allocator); + } + return value; + } + + static void execute(const std::vector& data_columns, + ColumnString& result_column, size_t input_rows_count) { + rapidjson::Document document; + rapidjson::Document::AllocatorType& allocator = document.GetAllocator(); + rapidjson::StringBuffer buf; + rapidjson::Writer writer(buf); + + const auto json_col = data_columns[0]; + for (size_t row = 0; row < input_rows_count; row++) { + rapidjson::Value value; + if (data_columns.size() == 2) { + value = parse_json(json_col, data_columns[1], allocator, row); + } else { + value.SetArray(); + value.Reserve(data_columns.size() - 1, allocator); + for (size_t col = 1; col < data_columns.size(); ++col) { + value.PushBack(parse_json(json_col, data_columns[col], allocator, row), + allocator); + } + } + + // write value as string + buf.Clear(); + writer.Reset(buf); + value.Accept(writer); + result_column.insert_data(buf.GetString(), buf.GetSize()); + } + } +}; + template class FunctionJson : public IFunction { public: @@ -763,6 +818,7 @@ void register_function_json(SimpleFunctionFactory& factory) { factory.register_function>(); factory.register_function>(); factory.register_function>(); + factory.register_function>(); factory.register_function(); } diff --git a/docs/en/docs/sql-manual/sql-functions/json-functions/json_extract.md b/docs/en/docs/sql-manual/sql-functions/json-functions/json_extract.md new file mode 100644 index 0000000000..da9b4db633 --- /dev/null +++ b/docs/en/docs/sql-manual/sql-functions/json-functions/json_extract.md @@ -0,0 +1,71 @@ +--- +{ + "title": "json_extract", + "language": "en" +} +--- + + + +## json_extract +### description + +#### Syntax + +`VARCHAR json_extract(VARCHAR json_str, VARCHAR path[, VARCHAR path] ...))` + +`json_extract` function returns data from a JSON document, selected from the parts of the document matched by the `path` arguments. Returns NULL if any argument is NULL or if the `json_str` argument is not a valid JSON document. If the `path` parameter is not a valid path (that is, the path does not appear in the JSON document), the corresponding item in the returned array is NULL (see example below) + +### example + +``` +mysql> SELECT json_extract('{"id": 123, "name": "doris"}', '$.id'); ++------------------------------------------------------+ +| json_extract('{"id": 123, "name": "doris"}', '$.id') | ++------------------------------------------------------+ +| 123 | ++------------------------------------------------------+ +1 row in set (0.01 sec) + +mysql> SELECT json_extract('[1, 2, 3]', '$.[1]'); ++------------------------------------+ +| json_extract('[1, 2, 3]', '$.[1]') | ++------------------------------------+ +| 2 | ++------------------------------------+ +1 row in set (0.01 sec) + +mysql> SELECT json_extract('{"k1": "v1", "k2": { "k21": 6.6, "k22": [1, 2] } }', '$.k1', '$.k2.k21', '$.k2.k22', '$.k2.k22[1]'); ++-------------------------------------------------------------------------------------------------------------------+ +| json_extract('{"k1": "v1", "k2": { "k21": 6.6, "k22": [1, 2] } }', '$.k1', '$.k2.k21', '$.k2.k22', '$.k2.k22[1]') | ++-------------------------------------------------------------------------------------------------------------------+ +| ["v1",6.6,[1,2],2] | ++-------------------------------------------------------------------------------------------------------------------+ +1 row in set (0.01 sec) + +mysql> SELECT json_extract('{"id": 123, "name": "doris"}', '$.aaa', '$.name'); ++-----------------------------------------------------------------+ +| json_extract('{"id": 123, "name": "doris"}', '$.aaa', '$.name') | ++-----------------------------------------------------------------+ +| [null,"doris"] | ++-----------------------------------------------------------------+ +1 row in set (0.01 sec) +``` + +### keywords +JSON, EXTRACT, JSON_EXTRACT \ No newline at end of file diff --git a/docs/sidebars.json b/docs/sidebars.json index 59ac5cb994..39d8d0f813 100644 --- a/docs/sidebars.json +++ b/docs/sidebars.json @@ -592,7 +592,8 @@ "sql-manual/sql-functions/json-functions/json_array", "sql-manual/sql-functions/json-functions/json_object", "sql-manual/sql-functions/json-functions/json_quote", - "sql-manual/sql-functions/json-functions/json_valid" + "sql-manual/sql-functions/json-functions/json_valid", + "sql-manual/sql-functions/json-functions/json_extract" ] }, { diff --git a/docs/zh-CN/docs/sql-manual/sql-functions/json-functions/json_extract.md b/docs/zh-CN/docs/sql-manual/sql-functions/json-functions/json_extract.md new file mode 100644 index 0000000000..fc294a64b4 --- /dev/null +++ b/docs/zh-CN/docs/sql-manual/sql-functions/json-functions/json_extract.md @@ -0,0 +1,71 @@ +--- +{ + "title": "json_extract", + "language": "zh-CN" +} +--- + + + +## json_extract +### description + +#### Syntax + +`VARCHAR json_extract(VARCHAR json_str, VARCHAR path[, VARCHAR path] ...))` + +json_extract函数从 JSON 文档中返回数据,这些数据是从与 `path` 参数所匹配的文档部分中选择的。如果任何参数为 NULL 或者 `json_str` 参数不是有效的 JSON 文档,则返回 NULL。如果 `path` 参数不是一个有效的路径(即这个路径没有出现在JSON文档中),则返回的数组中对应的项为 NULL(见下面例子)。 + +### example + +``` +mysql> SELECT json_extract('{"id": 123, "name": "doris"}', '$.id'); ++------------------------------------------------------+ +| json_extract('{"id": 123, "name": "doris"}', '$.id') | ++------------------------------------------------------+ +| 123 | ++------------------------------------------------------+ +1 row in set (0.01 sec) + +mysql> SELECT json_extract('[1, 2, 3]', '$.[1]'); ++------------------------------------+ +| json_extract('[1, 2, 3]', '$.[1]') | ++------------------------------------+ +| 2 | ++------------------------------------+ +1 row in set (0.01 sec) + +mysql> SELECT json_extract('{"k1": "v1", "k2": { "k21": 6.6, "k22": [1, 2] } }', '$.k1', '$.k2.k21', '$.k2.k22', '$.k2.k22[1]'); ++-------------------------------------------------------------------------------------------------------------------+ +| json_extract('{"k1": "v1", "k2": { "k21": 6.6, "k22": [1, 2] } }', '$.k1', '$.k2.k21', '$.k2.k22', '$.k2.k22[1]') | ++-------------------------------------------------------------------------------------------------------------------+ +| ["v1",6.6,[1,2],2] | ++-------------------------------------------------------------------------------------------------------------------+ +1 row in set (0.01 sec) + +mysql> SELECT json_extract('{"id": 123, "name": "doris"}', '$.aaa', '$.name'); ++-----------------------------------------------------------------+ +| json_extract('{"id": 123, "name": "doris"}', '$.aaa', '$.name') | ++-----------------------------------------------------------------+ +| [null,"doris"] | ++-----------------------------------------------------------------+ +1 row in set (0.01 sec) +``` + +### keywords +JSON, EXTRACT, JSON_EXTRACT \ No newline at end of file diff --git a/gensrc/script/doris_builtins_functions.py b/gensrc/script/doris_builtins_functions.py index 3078b96ca0..0a1ca8b557 100644 --- a/gensrc/script/doris_builtins_functions.py +++ b/gensrc/script/doris_builtins_functions.py @@ -1561,6 +1561,7 @@ visible_functions = [ [['json_object'], 'VARCHAR', ['VARCHAR', '...'], 'ALWAYS_NOT_NULLABLE'], [['json_quote'], 'VARCHAR', ['VARCHAR'], ''], [['json_valid'], 'INT', ['VARCHAR'], 'ALWAYS_NULLABLE'], + [['json_extract'], 'VARCHAR', ['VARCHAR', 'VARCHAR', '...'], ''], #hll function [['hll_cardinality'], 'BIGINT', ['HLL'], 'ALWAYS_NOT_NULLABLE'], diff --git a/regression-test/data/jsonb_p0/test_jsonb_load_and_function.out b/regression-test/data/jsonb_p0/test_jsonb_load_and_function.out index 8172b0f011..e6617ad6c3 100644 --- a/regression-test/data/jsonb_p0/test_jsonb_load_and_function.out +++ b/regression-test/data/jsonb_p0/test_jsonb_load_and_function.out @@ -3975,3 +3975,113 @@ false -- !select -- \N +-- !select -- +1 \N \N +2 null null +3 true null +4 false null +5 100 null +6 10000 null +7 1000000000 null +8 1152921504606846976 null +9 6.18 null +10 "abcd" null +11 {} null +12 {"k1":"v31","k2":300} "v31" +13 [] null +14 [123,456] null +15 ["abc","def"] null +16 [null,true,false,100,6.18,"abc"] null +17 [{"k1":"v41","k2":400},1,"a",3.14] ["v41"] +18 {"k1":"v31","k2":300,"a1":[{"k1":"v41","k2":400},1,"a",3.14]} "v31" +26 \N \N +27 {"k1":"v1","k2":200} "v1" + +-- !select -- +1 \N \N +2 null [null,null] +3 true [null,null] +4 false [null,null] +5 100 [null,null] +6 10000 [null,null] +7 1000000000 [null,null] +8 1152921504606846976 [null,null] +9 6.18 [null,null] +10 "abcd" [null,null] +11 {} [null,null] +12 {"k1":"v31","k2":300} [300,null] +13 [] [null,null] +14 [123,456] [null,456] +15 ["abc","def"] [null,"def"] +16 [null,true,false,100,6.18,"abc"] [null,true] +17 [{"k1":"v41","k2":400},1,"a",3.14] [[400],1] +18 {"k1":"v31","k2":300,"a1":[{"k1":"v41","k2":400},1,"a",3.14]} [300,null] +26 \N \N +27 {"k1":"v1","k2":200} [200,null] + +-- !select -- +1 \N \N +2 null [null,null] +3 true [null,null] +4 false [null,null] +5 100 [null,null] +6 10000 [null,null] +7 1000000000 [null,null] +8 1152921504606846976 [null,null] +9 6.18 [null,null] +10 "abcd" [null,null] +11 {} [null,null] +12 {"k1":"v31","k2":300} [300,null] +13 [] [null,null] +14 [123,456] [null,null] +15 ["abc","def"] [null,null] +16 [null,true,false,100,6.18,"abc"] [null,null] +17 [{"k1":"v41","k2":400},1,"a",3.14] [[400],null] +18 {"k1":"v31","k2":300,"a1":[{"k1":"v41","k2":400},1,"a",3.14]} [300,null] +26 \N \N +27 {"k1":"v1","k2":200} [200,null] + +-- !select -- +1 \N \N +2 null \N +3 true \N +4 false \N +5 100 \N +6 10000 \N +7 1000000000 \N +8 1152921504606846976 \N +9 6.18 \N +10 "abcd" \N +11 {} \N +12 {"k1":"v31","k2":300} \N +13 [] \N +14 [123,456] \N +15 ["abc","def"] \N +16 [null,true,false,100,6.18,"abc"] \N +17 [{"k1":"v41","k2":400},1,"a",3.14] \N +18 {"k1":"v31","k2":300,"a1":[{"k1":"v41","k2":400},1,"a",3.14]} \N +26 \N \N +27 {"k1":"v1","k2":200} \N + +-- !select -- +1 \N \N +2 null [null,null,null] +3 true [null,null,null] +4 false [null,null,null] +5 100 [null,null,null] +6 10000 [null,null,null] +7 1000000000 [null,null,null] +8 1152921504606846976 [null,null,null] +9 6.18 [null,null,null] +10 "abcd" [null,null,null] +11 {} [null,null,null] +12 {"k1":"v31","k2":300} [null,null,null] +13 [] [null,null,null] +14 [123,456] [null,null,null] +15 ["abc","def"] [null,null,null] +16 [null,true,false,100,6.18,"abc"] [null,null,null] +17 [{"k1":"v41","k2":400},1,"a",3.14] [null,null,null] +18 {"k1":"v31","k2":300,"a1":[{"k1":"v41","k2":400},1,"a",3.14]} ["v41",400,"a"] +26 \N \N +27 {"k1":"v1","k2":200} [null,null,null] + diff --git a/regression-test/data/query_p0/sql_functions/json_functions/test_json_function.out b/regression-test/data/query_p0/sql_functions/json_functions/test_json_function.out index e9a65fcc35..d94ec4f022 100644 --- a/regression-test/data/query_p0/sql_functions/json_functions/test_json_function.out +++ b/regression-test/data/query_p0/sql_functions/json_functions/test_json_function.out @@ -77,3 +77,21 @@ v1 -- !sql -- "\\n\\b\\r\\t" +-- !sql -- +2 + +-- !sql -- +[123,"doris"] + +-- !sql -- +\N + +-- !sql -- +\N + +-- !sql -- +["v1",{"k21":6.6,"k22":[1,2,3]}] + +-- !sql -- +[6.6,[1,2,3],2] + diff --git a/regression-test/suites/jsonb_p0/test_jsonb_load_and_function.groovy b/regression-test/suites/jsonb_p0/test_jsonb_load_and_function.groovy index 707c225ce1..85e6330f57 100644 --- a/regression-test/suites/jsonb_p0/test_jsonb_load_and_function.groovy +++ b/regression-test/suites/jsonb_p0/test_jsonb_load_and_function.groovy @@ -420,4 +420,10 @@ suite("test_jsonb_load_and_function", "p0") { qt_select """SELECT JSON_VALID('{"k1":"v31","k2":300}')""" qt_select """SELECT JSON_VALID('invalid json')""" qt_select """SELECT JSON_VALID(NULL)""" + + qt_select """SELECT id, j, JSON_EXTRACT(j, '\$.k1') FROM ${testTable} ORDER BY id""" + qt_select """SELECT id, j, JSON_EXTRACT(j, '\$.k2', '\$.[1]') FROM ${testTable} ORDER BY id""" + qt_select """SELECT id, j, JSON_EXTRACT(j, '\$.k2', '\$.x.y') FROM ${testTable} ORDER BY id""" + qt_select """SELECT id, j, JSON_EXTRACT(j, '\$.k2', null) FROM ${testTable} ORDER BY id""" + qt_select """SELECT id, j, JSON_EXTRACT(j, '\$.a1[0].k1', '\$.a1[0].k2', '\$.a1[2]') FROM ${testTable} ORDER BY id""" } diff --git a/regression-test/suites/query_p0/sql_functions/json_functions/test_json_function.groovy b/regression-test/suites/query_p0/sql_functions/json_functions/test_json_function.groovy index 7a403b549e..bc07a41c5c 100644 --- a/regression-test/suites/query_p0/sql_functions/json_functions/test_json_function.groovy +++ b/regression-test/suites/query_p0/sql_functions/json_functions/test_json_function.groovy @@ -49,4 +49,10 @@ suite("test_json_function") { qt_sql "SELECT json_quote(null);" qt_sql "SELECT json_quote(\"\\n\\b\\r\\t\");" + qt_sql "SELECT json_extract('[1, 2, 3]', '\$.[1]');" + qt_sql "SELECT json_extract('{\"id\": 123, \"name\": \"doris\"}', '\$.id', '\$.name');" + qt_sql "SELECT json_extract('{\"id\": 123, \"name\": \"doris\"}', null, '\$.id');" + qt_sql "SELECT json_extract(null, '\$.id');" + qt_sql "SELECT json_extract('{\"k1\": \"v1\", \"k2\": { \"k21\": 6.6, \"k22\": [1, 2, 3] } }', '\$.k1', '\$.k2');" + qt_sql "SELECT json_extract('{\"k1\": \"v1\", \"k2\": { \"k21\": 6.6, \"k22\": [1, 2, 3] } }', '\$.k2.k21', '\$.k2.k22', '\$.k2.k22[1]');" }