[Improve](jsonb_extract) support jsonb_extract multi parse path (#21555)
support jsonb_extract multi parse path
This commit is contained in:
@ -339,7 +339,7 @@ using FunctionJsonbParseNotnullErrorValue =
|
||||
using FunctionJsonbParseNotnullErrorInvalid =
|
||||
FunctionJsonbParseBase<NullalbeMode::NOT_NULL, JsonbParseErrorMode::RETURN_INVALID>;
|
||||
|
||||
// func(json,string) -> nullable(type)
|
||||
// func(jsonb, [varchar, varchar, ...]) -> nullable(type)
|
||||
template <typename Impl>
|
||||
class FunctionJsonbExtract : public IFunction {
|
||||
public:
|
||||
@ -347,7 +347,8 @@ public:
|
||||
static constexpr auto alias = Impl::alias;
|
||||
static FunctionPtr create() { return std::make_shared<FunctionJsonbExtract>(); }
|
||||
String get_name() const override { return name; }
|
||||
size_t get_number_of_arguments() const override { return 2; }
|
||||
bool is_variadic() const override { return true; }
|
||||
size_t get_number_of_arguments() const override { return 0; }
|
||||
DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
|
||||
return make_nullable(std::make_shared<typename Impl::ReturnType>());
|
||||
}
|
||||
@ -355,26 +356,32 @@ public:
|
||||
Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments,
|
||||
size_t result, size_t input_rows_count) override {
|
||||
auto null_map = ColumnUInt8::create(input_rows_count, 0);
|
||||
DCHECK_EQ(arguments.size(), 2);
|
||||
ColumnPtr argument_columns[2];
|
||||
bool col_const[2];
|
||||
for (int i = 0; i < 2; ++i) {
|
||||
std::tie(argument_columns[i], col_const[i]) =
|
||||
unpack_if_const(block.get_by_position(arguments[i]).column);
|
||||
check_set_nullable(argument_columns[i], null_map, col_const[i]);
|
||||
DCHECK_GE(arguments.size(), 2);
|
||||
|
||||
ColumnPtr jsonb_data_column;
|
||||
bool jsonb_data_const = false;
|
||||
// prepare jsonb data column
|
||||
std::tie(jsonb_data_column, jsonb_data_const) =
|
||||
unpack_if_const(block.get_by_position(arguments[0]).column);
|
||||
check_set_nullable(jsonb_data_column, null_map, jsonb_data_const);
|
||||
auto& ldata = assert_cast<const ColumnString*>(jsonb_data_column.get())->get_chars();
|
||||
auto& loffsets = assert_cast<const ColumnString*>(jsonb_data_column.get())->get_offsets();
|
||||
|
||||
// prepare parse path column prepare
|
||||
std::vector<const ColumnString*> jsonb_path_columns;
|
||||
std::vector<bool> path_const(arguments.size() - 1);
|
||||
for (int i = 0; i < arguments.size() - 1; ++i) {
|
||||
ColumnPtr path_column;
|
||||
bool is_const = false;
|
||||
std::tie(path_column, is_const) =
|
||||
unpack_if_const(block.get_by_position(arguments[i + 1]).column);
|
||||
path_const[i] = is_const;
|
||||
check_set_nullable(path_column, null_map, path_const[i]);
|
||||
jsonb_path_columns.push_back(assert_cast<const ColumnString*>(path_column.get()));
|
||||
}
|
||||
|
||||
auto res = Impl::ColumnType::create();
|
||||
|
||||
auto jsonb_data_column = assert_cast<const ColumnString*>(argument_columns[0].get());
|
||||
auto jsonb_path_column = assert_cast<const ColumnString*>(argument_columns[1].get());
|
||||
|
||||
auto& ldata = jsonb_data_column->get_chars();
|
||||
auto& loffsets = jsonb_data_column->get_offsets();
|
||||
|
||||
auto& rdata = jsonb_path_column->get_chars();
|
||||
auto& roffsets = jsonb_path_column->get_offsets();
|
||||
|
||||
bool is_invalid_json_path = false;
|
||||
|
||||
// execute Impl
|
||||
@ -382,36 +389,34 @@ public:
|
||||
std::is_same_v<typename Impl::ReturnType, DataTypeJsonb>) {
|
||||
auto& res_data = res->get_chars();
|
||||
auto& res_offsets = res->get_offsets();
|
||||
if (col_const[0]) {
|
||||
Impl::scalar_vector(context, jsonb_data_column->get_data_at(0), rdata, roffsets,
|
||||
res_data, res_offsets, null_map->get_data(),
|
||||
is_invalid_json_path);
|
||||
} else if (col_const[1]) {
|
||||
Impl::vector_scalar(context, ldata, loffsets, jsonb_path_column->get_data_at(0),
|
||||
res_data, res_offsets, null_map->get_data(),
|
||||
is_invalid_json_path);
|
||||
} else {
|
||||
Impl::vector_vector(context, ldata, loffsets, rdata, roffsets, res_data,
|
||||
res_offsets, null_map->get_data(), is_invalid_json_path);
|
||||
Status st = Impl::vector_vector_v2(
|
||||
context, ldata, loffsets, jsonb_data_const, jsonb_path_columns, path_const,
|
||||
res_data, res_offsets, null_map->get_data(), is_invalid_json_path);
|
||||
if (st != Status::OK()) {
|
||||
return st;
|
||||
}
|
||||
} else {
|
||||
if (col_const[0]) {
|
||||
// not support other extract type for now (e.g. int, double, ...)
|
||||
DCHECK_EQ(jsonb_path_columns.size(), 1);
|
||||
auto& rdata = jsonb_path_columns[0]->get_chars();
|
||||
auto& roffsets = jsonb_path_columns[0]->get_offsets();
|
||||
if (jsonb_data_const) {
|
||||
Impl::scalar_vector(context, jsonb_data_column->get_data_at(0), rdata, roffsets,
|
||||
res->get_data(), null_map->get_data(), is_invalid_json_path);
|
||||
} else if (col_const[1]) {
|
||||
Impl::vector_scalar(context, ldata, loffsets, jsonb_path_column->get_data_at(0),
|
||||
} else if (path_const[0]) {
|
||||
Impl::vector_scalar(context, ldata, loffsets, jsonb_path_columns[0]->get_data_at(0),
|
||||
res->get_data(), null_map->get_data(), is_invalid_json_path);
|
||||
} else {
|
||||
Impl::vector_vector(context, ldata, loffsets, rdata, roffsets, res->get_data(),
|
||||
null_map->get_data(), is_invalid_json_path);
|
||||
}
|
||||
}
|
||||
|
||||
if (is_invalid_json_path) {
|
||||
return Status::InvalidArgument(
|
||||
"Json path error: {} for value: {}",
|
||||
JsonbErrMsg::getErrMsg(JsonbErrType::E_INVALID_JSON_PATH),
|
||||
std::string_view(reinterpret_cast<const char*>(rdata.data()), rdata.size()));
|
||||
if (is_invalid_json_path) {
|
||||
return Status::InvalidArgument(
|
||||
"Json path error: {} for value: {}",
|
||||
JsonbErrMsg::getErrMsg(JsonbErrType::E_INVALID_JSON_PATH),
|
||||
std::string_view(reinterpret_cast<const char*>(rdata.data()),
|
||||
rdata.size()));
|
||||
}
|
||||
}
|
||||
|
||||
block.get_by_position(result).column =
|
||||
@ -503,6 +508,77 @@ private:
|
||||
|
||||
public:
|
||||
// for jsonb_extract_string
|
||||
static Status vector_vector_v2(
|
||||
FunctionContext* context, const ColumnString::Chars& ldata,
|
||||
const ColumnString::Offsets& loffsets, const bool& json_data_const,
|
||||
const std::vector<const ColumnString*>& rdata_columns, // here we can support more paths
|
||||
const std::vector<bool>& path_const, ColumnString::Chars& res_data,
|
||||
ColumnString::Offsets& res_offsets, NullMap& null_map, bool& is_invalid_json_path) {
|
||||
size_t input_rows_count = json_data_const ? rdata_columns.size() : loffsets.size();
|
||||
res_offsets.resize(input_rows_count);
|
||||
|
||||
auto writer = std::make_unique<JsonbWriter>();
|
||||
std::unique_ptr<JsonbToJson> formater;
|
||||
|
||||
for (size_t i = 0; i < input_rows_count; ++i) {
|
||||
if (null_map[i]) {
|
||||
StringOP::push_null_string(i, res_data, res_offsets, null_map);
|
||||
continue;
|
||||
}
|
||||
size_t l_off = loffsets[index_check_const(i, json_data_const) - 1];
|
||||
size_t l_size = loffsets[index_check_const(i, json_data_const)] - l_off;
|
||||
const char* l_raw = reinterpret_cast<const char*>(&ldata[l_off]);
|
||||
if (rdata_columns.size() == 1) { // just return origin value
|
||||
const ColumnString* path_col = rdata_columns[0];
|
||||
const ColumnString::Chars& rdata = path_col->get_chars();
|
||||
const ColumnString::Offsets& roffsets = path_col->get_offsets();
|
||||
size_t r_off = roffsets[index_check_const(i, path_const[0]) - 1];
|
||||
size_t r_size = roffsets[index_check_const(i, path_const[0])] - r_off;
|
||||
const char* r_raw = reinterpret_cast<const char*>(&rdata[r_off]);
|
||||
inner_loop_impl(i, res_data, res_offsets, null_map, writer, formater, l_raw, l_size,
|
||||
r_raw, r_size, is_invalid_json_path);
|
||||
} else { // will make array string to user
|
||||
writer->reset();
|
||||
writer->writeStartArray();
|
||||
for (size_t pi = 0; pi < rdata_columns.size(); ++pi) {
|
||||
const ColumnString* path_col = rdata_columns[pi];
|
||||
const ColumnString::Chars& rdata = path_col->get_chars();
|
||||
const ColumnString::Offsets& roffsets = path_col->get_offsets();
|
||||
size_t r_off = roffsets[index_check_const(i, path_const[pi]) - 1];
|
||||
size_t r_size = roffsets[index_check_const(i, path_const[pi])] - r_off;
|
||||
const char* r_raw = reinterpret_cast<const char*>(&rdata[r_off]);
|
||||
// doc is NOT necessary to be deleted since JsonbDocument will not allocate memory
|
||||
JsonbDocument* doc = JsonbDocument::createDocument(l_raw, l_size);
|
||||
if (UNLIKELY(!doc || !doc->getValue())) {
|
||||
writer->writeNull();
|
||||
continue;
|
||||
}
|
||||
// value is NOT necessary to be deleted since JsonbValue will not allocate memory
|
||||
JsonbValue* value =
|
||||
doc->getValue()->findPath(r_raw, r_size, is_invalid_json_path, nullptr);
|
||||
// if not valid json path , should return error message to user
|
||||
if (is_invalid_json_path) {
|
||||
return Status::InvalidArgument(
|
||||
"Json path error: {} for value: {}",
|
||||
JsonbErrMsg::getErrMsg(JsonbErrType::E_INVALID_JSON_PATH),
|
||||
std::string_view(reinterpret_cast<const char*>(rdata.data()),
|
||||
rdata.size()));
|
||||
}
|
||||
if (UNLIKELY(!value)) {
|
||||
writer->writeNull();
|
||||
} else {
|
||||
writer->writeValue(value);
|
||||
}
|
||||
}
|
||||
writer->writeEndArray();
|
||||
StringOP::push_value_string(std::string_view(writer->getOutput()->getBuffer(),
|
||||
writer->getOutput()->getSize()),
|
||||
i, res_data, res_offsets);
|
||||
}
|
||||
} //for
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
static void vector_vector(FunctionContext* context, const ColumnString::Chars& ldata,
|
||||
const ColumnString::Offsets& loffsets,
|
||||
const ColumnString::Chars& rdata,
|
||||
|
||||
@ -1690,11 +1690,11 @@ visible_functions = {
|
||||
|
||||
[['jsonb_exists_path'], 'BOOLEAN', ['JSONB', 'VARCHAR'], ''],
|
||||
[['jsonb_exists_path'], 'BOOLEAN', ['JSONB', 'STRING'], ''],
|
||||
[['jsonb_type'], 'STRING', ['JSONB', 'STRING'], 'ALWAYS_NULLABLE'],
|
||||
[['jsonb_type'], 'STRING', ['JSONB', 'VARCHAR'], 'ALWAYS_NULLABLE'],
|
||||
[['jsonb_type'], 'STRING', ['JSONB', 'STRING'], 'ALWAYS_NULLABLE'],
|
||||
|
||||
[['jsonb_extract'], 'JSONB', ['JSONB', 'VARCHAR'], 'ALWAYS_NULLABLE'],
|
||||
[['jsonb_extract'], 'JSONB', ['JSONB', 'STRING'], 'ALWAYS_NULLABLE'],
|
||||
[['jsonb_extract'], 'JSONB', ['JSONB', 'VARCHAR', '...'], 'ALWAYS_NULLABLE'],
|
||||
[['jsonb_extract'], 'JSONB', ['JSONB', 'STRING', '...'], 'ALWAYS_NULLABLE'],
|
||||
[['jsonb_extract_isnull'], 'BOOLEAN', ['JSONB', 'VARCHAR'], 'ALWAYS_NULLABLE'],
|
||||
[['jsonb_extract_isnull'], 'BOOLEAN', ['JSONB', 'STRING'], 'ALWAYS_NULLABLE'],
|
||||
[['jsonb_extract_bool'], 'BOOLEAN', ['JSONB', 'VARCHAR'], 'ALWAYS_NULLABLE'],
|
||||
@ -1725,8 +1725,6 @@ visible_functions = {
|
||||
[['json_type'], 'STRING', ['JSONB', 'STRING'], 'ALWAYS_NULLABLE'],
|
||||
[['json_type'], 'STRING', ['JSONB', 'STRING'], 'ALWAYS_NULLABLE'],
|
||||
|
||||
[['jsonb_extract'], 'JSONB', ['JSONB', 'VARCHAR'], 'ALWAYS_NULLABLE'],
|
||||
[['jsonb_extract'], 'JSONB', ['JSONB', 'STRING'], 'ALWAYS_NULLABLE'],
|
||||
[['json_extract_isnull'], 'BOOLEAN', ['JSONB', 'VARCHAR'], 'ALWAYS_NULLABLE'],
|
||||
[['json_extract_isnull'], 'BOOLEAN', ['JSONB', 'STRING'], 'ALWAYS_NULLABLE'],
|
||||
[['json_extract_bool'], 'BOOLEAN', ['JSONB', 'VARCHAR'], 'ALWAYS_NULLABLE'],
|
||||
|
||||
@ -709,6 +709,29 @@
|
||||
27 {"k1":"v1","k2":200} \N
|
||||
28 {"a.b.c":{"k1.a1":"v31","k2":300},"a":"niu"} \N
|
||||
|
||||
-- !jsonb_extract_multipath --
|
||||
1 \N \N
|
||||
2 null [null,null,null,null]
|
||||
3 true [true,null,null,null]
|
||||
4 false [false,null,null,null]
|
||||
5 100 [100,null,null,null]
|
||||
6 10000 [10000,null,null,null]
|
||||
7 1000000000 [1000000000,null,null,null]
|
||||
8 1152921504606846976 [1152921504606846976,null,null,null]
|
||||
9 6.18 [6.18,null,null,null]
|
||||
10 "abcd" ["abcd",null,null,null]
|
||||
11 {} [{},{},null,{}]
|
||||
12 {"k1":"v31","k2":300} [{"k1":"v31","k2":300},{"k1":"v31","k2":300},"v31",{"k1":"v31","k2":300}]
|
||||
13 [] [[],null,null,null]
|
||||
14 [123,456] [[123,456],null,null,123]
|
||||
15 ["abc","def"] [["abc","def"],null,null,"abc"]
|
||||
16 [null,true,false,100,6.18,"abc"] [[null,true,false,100,6.18,"abc"],null,null,null]
|
||||
17 [{"k1":"v41","k2":400},1,"a",3.14] [[{"k1":"v41","k2":400},1,"a",3.14],null,null,{"k1":"v41","k2":400}]
|
||||
18 {"k1":"v31","k2":300,"a1":[{"k1":"v41","k2":400},1,"a",3.14]} [{"k1":"v31","k2":300,"a1":[{"k1":"v41","k2":400},1,"a",3.14]},{"k1":"v31","k2":300,"a1":[{"k1":"v41","k2":400},1,"a",3.14]},"v31",{"k1":"v31","k2":300,"a1":[{"k1":"v41","k2":400},1,"a",3.14]}]
|
||||
26 \N \N
|
||||
27 {"k1":"v1","k2":200} [{"k1":"v1","k2":200},{"k1":"v1","k2":200},"v1",{"k1":"v1","k2":200}]
|
||||
28 {"a.b.c":{"k1.a1":"v31","k2":300},"a":"niu"} [{"a.b.c":{"k1.a1":"v31","k2":300},"a":"niu"},{"a.b.c":{"k1.a1":"v31","k2":300},"a":"niu"},null,{"a.b.c":{"k1.a1":"v31","k2":300},"a":"niu"}]
|
||||
|
||||
-- !jsonb_extract_string_select --
|
||||
1 \N \N
|
||||
2 null null
|
||||
|
||||
@ -709,6 +709,29 @@
|
||||
27 {"k1":"v1","k2":200} \N
|
||||
28 {"a.b.c":{"k1.a1":"v31","k2":300},"a":"niu"} \N
|
||||
|
||||
-- !jsonb_extract_multipath --
|
||||
1 \N \N
|
||||
2 null [null,null,null,null]
|
||||
3 true [true,null,null,null]
|
||||
4 false [false,null,null,null]
|
||||
5 100 [100,null,null,null]
|
||||
6 10000 [10000,null,null,null]
|
||||
7 1000000000 [1000000000,null,null,null]
|
||||
8 1152921504606846976 [1152921504606846976,null,null,null]
|
||||
9 6.18 [6.18,null,null,null]
|
||||
10 "abcd" ["abcd",null,null,null]
|
||||
11 {} [{},{},null,{}]
|
||||
12 {"k1":"v31","k2":300} [{"k1":"v31","k2":300},{"k1":"v31","k2":300},"v31",{"k1":"v31","k2":300}]
|
||||
13 [] [[],null,null,null]
|
||||
14 [123,456] [[123,456],null,null,123]
|
||||
15 ["abc","def"] [["abc","def"],null,null,"abc"]
|
||||
16 [null,true,false,100,6.18,"abc"] [[null,true,false,100,6.18,"abc"],null,null,null]
|
||||
17 [{"k1":"v41","k2":400},1,"a",3.14] [[{"k1":"v41","k2":400},1,"a",3.14],null,null,{"k1":"v41","k2":400}]
|
||||
18 {"k1":"v31","k2":300,"a1":[{"k1":"v41","k2":400},1,"a",3.14]} [{"k1":"v31","k2":300,"a1":[{"k1":"v41","k2":400},1,"a",3.14]},{"k1":"v31","k2":300,"a1":[{"k1":"v41","k2":400},1,"a",3.14]},"v31",{"k1":"v31","k2":300,"a1":[{"k1":"v41","k2":400},1,"a",3.14]}]
|
||||
26 \N \N
|
||||
27 {"k1":"v1","k2":200} [{"k1":"v1","k2":200},{"k1":"v1","k2":200},"v1",{"k1":"v1","k2":200}]
|
||||
28 {"a.b.c":{"k1.a1":"v31","k2":300},"a":"niu"} [{"a.b.c":{"k1.a1":"v31","k2":300},"a":"niu"},{"a.b.c":{"k1.a1":"v31","k2":300},"a":"niu"},null,{"a.b.c":{"k1.a1":"v31","k2":300},"a":"niu"}]
|
||||
|
||||
-- !jsonb_extract_string_select --
|
||||
1 \N \N
|
||||
2 null null
|
||||
|
||||
@ -178,6 +178,9 @@ suite("test_jsonb_load_and_function", "p0") {
|
||||
qt_select "SELECT id, j, jsonb_extract(j, '\$.a1[-1]') FROM ${testTable} ORDER BY id"
|
||||
qt_select "SELECT id, j, jsonb_extract(j, '\$.a1[-10]') FROM ${testTable} ORDER BY id"
|
||||
|
||||
// jsonb_extract_multipath
|
||||
qt_jsonb_extract_multipath "SELECT id, j, jsonb_extract(j, '\$', '\$.*', '\$.k1', '\$[0]') FROM ${testTable} ORDER BY id"
|
||||
|
||||
// jsonb_extract_string
|
||||
qt_jsonb_extract_string_select "SELECT id, j, jsonb_extract_string(j, '\$') FROM ${testTable} ORDER BY id"
|
||||
|
||||
|
||||
@ -171,6 +171,10 @@ suite("test_jsonb_unique_load_and_function", "p0") {
|
||||
qt_select "SELECT id, j, jsonb_extract(j, '\$.a1[-1]') FROM ${testTable} ORDER BY id"
|
||||
qt_select "SELECT id, j, jsonb_extract(j, '\$.a1[-10]') FROM ${testTable} ORDER BY id"
|
||||
|
||||
|
||||
// jsonb_extract_multipath
|
||||
qt_jsonb_extract_multipath "SELECT id, j, jsonb_extract(j, '\$', '\$.*', '\$.k1', '\$[0]') FROM ${testTable} ORDER BY id"
|
||||
|
||||
// jsonb_extract_string
|
||||
qt_jsonb_extract_string_select "SELECT id, j, jsonb_extract_string(j, '\$') FROM ${testTable} ORDER BY id"
|
||||
|
||||
|
||||
Reference in New Issue
Block a user