[Feature] (json)add json_contains function (#20824)

This commit is contained in:
yuxuan-luo
2023-06-16 15:10:12 +08:00
committed by GitHub
parent ccfd6f1d23
commit 97135a1cbb
6 changed files with 296 additions and 0 deletions

View File

@ -18,6 +18,7 @@
#include <rapidjson/allocators.h>
#include <rapidjson/document.h>
#include <rapidjson/encodings.h>
#include <rapidjson/pointer.h>
#include <rapidjson/rapidjson.h>
#include <rapidjson/stringbuffer.h>
#include <rapidjson/writer.h>
@ -935,6 +936,131 @@ public:
}
};
class FunctionJsonContains : public IFunction {
public:
static constexpr auto name = "json_contains";
static FunctionPtr create() { return std::make_shared<FunctionJsonContains>(); }
String get_name() const override { return name; }
size_t get_number_of_arguments() const override { return 3; }
DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
return make_nullable(std::make_shared<DataTypeInt32>());
}
bool use_default_implementation_for_nulls() const override { return false; }
bool json_contains_object(const rapidjson::Value& target,
const rapidjson::Value& search_value) {
if (!target.IsObject() || !search_value.IsObject()) {
return false;
}
for (auto itr = search_value.MemberBegin(); itr != search_value.MemberEnd(); ++itr) {
if (!target.HasMember(itr->name) || !json_contains(target[itr->name], itr->value)) {
return false;
}
}
return true;
}
bool json_contains_array(const rapidjson::Value& target, const rapidjson::Value& search_value) {
if (!target.IsArray() || !search_value.IsArray()) {
return false;
}
for (auto itr = search_value.Begin(); itr != search_value.End(); ++itr) {
bool found = false;
for (auto target_itr = target.Begin(); target_itr != target.End(); ++target_itr) {
if (json_contains(*target_itr, *itr)) {
found = true;
break;
}
}
if (!found) {
return false;
}
}
return true;
}
bool json_contains(const rapidjson::Value& target, const rapidjson::Value& search_value) {
if (target == search_value) {
return true;
}
if (target.IsObject() && search_value.IsObject()) {
return json_contains_object(target, search_value);
}
if (target.IsArray() && search_value.IsArray()) {
return json_contains_array(target, search_value);
}
return false;
}
Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments,
size_t result, size_t input_rows_count) override {
const IColumn& col_json = *(block.get_by_position(arguments[0]).column);
const IColumn& col_search = *(block.get_by_position(arguments[1]).column);
const IColumn& col_path = *(block.get_by_position(arguments[2]).column);
auto null_map = ColumnUInt8::create(input_rows_count, 0);
const ColumnString* col_json_string = check_and_get_column<ColumnString>(col_json);
const ColumnString* col_search_string = check_and_get_column<ColumnString>(col_search);
const ColumnString* col_path_string = check_and_get_column<ColumnString>(col_path);
if (!col_json_string || !col_search_string || !col_path_string) {
return Status::RuntimeError("Illegal column should be ColumnString");
}
auto col_to = ColumnVector<vectorized::Int32>::create();
auto& vec_to = col_to->get_data();
size_t size = col_json.size();
vec_to.resize(size);
for (size_t i = 0; i < input_rows_count; ++i) {
if (col_json.is_null_at(i) || col_search.is_null_at(i) || col_path.is_null_at(i)) {
null_map->get_data()[i] = 1;
vec_to[i] = 0;
continue;
}
const auto& json_val = col_json_string->get_data_at(i);
const auto& search_val = col_search_string->get_data_at(i);
const auto& path_val = col_path_string->get_data_at(i);
std::string_view json_string(json_val.data, json_val.size);
std::string_view search_string(search_val.data, search_val.size);
std::string_view path_string(path_val.data, path_val.size);
rapidjson::Document document;
auto target_val = get_json_object<JSON_FUN_STRING>(json_string, path_string, &document);
if (target_val == nullptr || target_val->IsNull()) {
vec_to[i] = 0;
} else {
rapidjson::Document search_doc;
search_doc.Parse(search_string.data(), search_string.size());
if (json_contains(*target_val, search_doc)) {
vec_to[i] = 1;
} else {
vec_to[i] = 0;
}
}
}
block.replace_by_position(result,
ColumnNullable::create(std::move(col_to), std::move(null_map)));
return Status::OK();
}
};
class FunctionJsonUnquote : public IFunction {
public:
static constexpr auto name = "json_unquote";
@ -1015,6 +1141,7 @@ void register_function_json(SimpleFunctionFactory& factory) {
factory.register_function<FunctionJson<FunctionJsonExtractImpl>>();
factory.register_function<FunctionJsonValid>();
factory.register_function<FunctionJsonContains>();
}
} // namespace doris::vectorized

View File

@ -0,0 +1,69 @@
---
{
"title": "json_contains",
"language": "en"
}
---
<!--
Licensed to the Apache Software Foundation (ASF) under one
or more contributor license agreements. See the NOTICE file
distributed with this work for additional information
regarding copyright ownership. The ASF licenses this file
to you under the Apache License, Version 2.0 (the
"License"); you may not use this file except in compliance
with the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing,
software distributed under the License is distributed on an
"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
KIND, either express or implied. See the License for the
specific language governing permissions and limitations
under the License.
-->
## json_contains
### description
#### Syntax
`INT json_contains(VARCHAR json_str, VARCHAR candidate, VARCHAR json_path)`
Indicates by returning 1 or 0 whether a given candidate JSON document is contained at a specific path within the json_str JSON document
### example
```
mysql> SET @j = '{"a": 1, "b": 2, "c": {"d": 4}}';
mysql> SET @j2 = '1';
mysql> SELECT JSON_CONTAINS(@j, @j2, '$.a');
+-------------------------------+
| JSON_CONTAINS(@j, @j2, '$.a') |
+-------------------------------+
| 1 |
+-------------------------------+
mysql> SELECT JSON_CONTAINS(@j, @j2, '$.b');
+-------------------------------+
| JSON_CONTAINS(@j, @j2, '$.b') |
+-------------------------------+
| 0 |
+-------------------------------+
mysql> SET @j2 = '{"d": 4}';
mysql> SELECT JSON_CONTAINS(@j, @j2, '$.a');
+-------------------------------+
| JSON_CONTAINS(@j, @j2, '$.a') |
+-------------------------------+
| 0 |
+-------------------------------+
mysql> SELECT JSON_CONTAINS(@j, @j2, '$.c');
+-------------------------------+
| JSON_CONTAINS(@j, @j2, '$.c') |
+-------------------------------+
| 1 |
+-------------------------------+
```
### keywords
json,json_contains

View File

@ -0,0 +1,69 @@
---
{
"title": "json_contains",
"language": "zh-CN"
}
---
<!--
Licensed to the Apache Software Foundation (ASF) under one
or more contributor license agreements. See the NOTICE file
distributed with this work for additional information
regarding copyright ownership. The ASF licenses this file
to you under the Apache License, Version 2.0 (the
"License"); you may not use this file except in compliance
with the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing,
software distributed under the License is distributed on an
"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
KIND, either express or implied. See the License for the
specific language governing permissions and limitations
under the License.
-->
## json_contains
### description
#### Syntax
`INT json_contains(VARCHAR json_str, VARCHAR candidate, VARCHAR json_path)`
通过返回 1 或 0 来指示给定的 candidate JSON 文档是否包含在 json_str JSON json_path 路径下的文档中
### example
```
mysql> SET @j = '{"a": 1, "b": 2, "c": {"d": 4}}';
mysql> SET @j2 = '1';
mysql> SELECT JSON_CONTAINS(@j, @j2, '$.a');
+-------------------------------+
| JSON_CONTAINS(@j, @j2, '$.a') |
+-------------------------------+
| 1 |
+-------------------------------+
mysql> SELECT JSON_CONTAINS(@j, @j2, '$.b');
+-------------------------------+
| JSON_CONTAINS(@j, @j2, '$.b') |
+-------------------------------+
| 0 |
+-------------------------------+
mysql> SET @j2 = '{"d": 4}';
mysql> SELECT JSON_CONTAINS(@j, @j2, '$.a');
+-------------------------------+
| JSON_CONTAINS(@j, @j2, '$.a') |
+-------------------------------+
| 0 |
+-------------------------------+
mysql> SELECT JSON_CONTAINS(@j, @j2, '$.c');
+-------------------------------+
| JSON_CONTAINS(@j, @j2, '$.c') |
+-------------------------------+
| 1 |
+-------------------------------+
```
### keywords
json,json_contains

View File

@ -1753,6 +1753,7 @@ visible_functions = {
[['json_object'], 'VARCHAR', ['VARCHAR', '...'], 'ALWAYS_NOT_NULLABLE'],
[['json_quote'], 'VARCHAR', ['VARCHAR'], ''],
[['json_valid'], 'INT', ['VARCHAR'], 'ALWAYS_NULLABLE'],
[['json_contains'], 'INT', ['VARCHAR', 'VARCHAR', 'VARCHAR'], 'ALWAYS_NULLABLE'],
[['json_unquote'], 'VARCHAR', ['VARCHAR'], 'ALWAYS_NULLABLE'],
[['json_extract'], 'VARCHAR', ['VARCHAR', 'VARCHAR', '...'], '']
],

View File

@ -146,3 +146,24 @@ doris
-- !sql --
123
-- !sql --
1
-- !sql --
0
-- !sql --
0
-- !sql --
1
-- !sql --
0
-- !sql --
1
-- !sql --
1

View File

@ -74,4 +74,13 @@ suite("test_json_function") {
qt_sql "SELECT '{\"k1\": \"v1\", \"k2\": { \"k21\": 6.6, \"k22\": [1, 2, 3] } }'->'\$.k2'->'\$.k22'"
qt_sql "SELECT json_unquote('{\"id\": 123, \"name\": \"doris\"}'->'\$.name');"
qt_sql "SELECT json_extract('{\"id\": 123, \"name\": \"doris\"}', '\$.id', '\$.name')->'\$.[0]';"
qt_sql "SELECT JSON_CONTAINS('{\"a\": 1, \"b\": 2, \"c\": {\"d\": 4}}','1','\$.a');"
qt_sql "SELECT JSON_CONTAINS('{\"a\": 1, \"b\": 2, \"c\": {\"d\": 4}}','1','\$.b');"
qt_sql "SELECT JSON_CONTAINS('{\"a\": 1, \"b\": 2, \"c\": {\"d\": 4}}','{\"d\": 4}','\$.a');"
qt_sql "SELECT JSON_CONTAINS('{\"a\": 1, \"b\": 2, \"c\": {\"d\": 4}}','{\"d\": 4}','\$.c');"
qt_sql "SELECT JSON_CONTAINS('{\"name\": \"John\", \"age\": 30, \"city\": \"New York\", \"hobbies\": [\"reading\", \"travelling\"]}', '{\"age\": 31, \"hobbies\": [\"reading\"]}', '\$.');"
qt_sql "SELECT JSON_CONTAINS('{\"name\": \"John\", \"age\": 30, \"projects\": [{\"name\": \"Project A\", \"year\": 2020}, {\"name\": \"Project B\", \"year\": 2021}]}', '{\"projects\": [{\"name\": \"Project A\"}]}', '\$.');"
qt_sql "SELECT JSON_CONTAINS('{\"name\": \"John\", \"age\": 30, \"address\": {\"city\": \"New York\", \"country\": \"USA\"}}', '{\"address\": {\"city\": \"New York\"}}', '\$.');"
}