[Improve](jsonb_extract) support jsonb_extract multi parse path (#21555)

support jsonb_extract multi parse path
This commit is contained in:
amory
2023-07-12 21:37:36 +08:00
committed by GitHub
parent da67d08bca
commit be55cb8dfc
6 changed files with 171 additions and 44 deletions

View File

@ -339,7 +339,7 @@ using FunctionJsonbParseNotnullErrorValue =
using FunctionJsonbParseNotnullErrorInvalid =
FunctionJsonbParseBase<NullalbeMode::NOT_NULL, JsonbParseErrorMode::RETURN_INVALID>;
// func(json,string) -> nullable(type)
// func(jsonb, [varchar, varchar, ...]) -> nullable(type)
template <typename Impl>
class FunctionJsonbExtract : public IFunction {
public:
@ -347,7 +347,8 @@ public:
static constexpr auto alias = Impl::alias;
static FunctionPtr create() { return std::make_shared<FunctionJsonbExtract>(); }
String get_name() const override { return name; }
size_t get_number_of_arguments() const override { return 2; }
bool is_variadic() const override { return true; }
size_t get_number_of_arguments() const override { return 0; }
DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
return make_nullable(std::make_shared<typename Impl::ReturnType>());
}
@ -355,26 +356,32 @@ public:
Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments,
size_t result, size_t input_rows_count) override {
auto null_map = ColumnUInt8::create(input_rows_count, 0);
DCHECK_EQ(arguments.size(), 2);
ColumnPtr argument_columns[2];
bool col_const[2];
for (int i = 0; i < 2; ++i) {
std::tie(argument_columns[i], col_const[i]) =
unpack_if_const(block.get_by_position(arguments[i]).column);
check_set_nullable(argument_columns[i], null_map, col_const[i]);
DCHECK_GE(arguments.size(), 2);
ColumnPtr jsonb_data_column;
bool jsonb_data_const = false;
// prepare jsonb data column
std::tie(jsonb_data_column, jsonb_data_const) =
unpack_if_const(block.get_by_position(arguments[0]).column);
check_set_nullable(jsonb_data_column, null_map, jsonb_data_const);
auto& ldata = assert_cast<const ColumnString*>(jsonb_data_column.get())->get_chars();
auto& loffsets = assert_cast<const ColumnString*>(jsonb_data_column.get())->get_offsets();
// prepare parse path column prepare
std::vector<const ColumnString*> jsonb_path_columns;
std::vector<bool> path_const(arguments.size() - 1);
for (int i = 0; i < arguments.size() - 1; ++i) {
ColumnPtr path_column;
bool is_const = false;
std::tie(path_column, is_const) =
unpack_if_const(block.get_by_position(arguments[i + 1]).column);
path_const[i] = is_const;
check_set_nullable(path_column, null_map, path_const[i]);
jsonb_path_columns.push_back(assert_cast<const ColumnString*>(path_column.get()));
}
auto res = Impl::ColumnType::create();
auto jsonb_data_column = assert_cast<const ColumnString*>(argument_columns[0].get());
auto jsonb_path_column = assert_cast<const ColumnString*>(argument_columns[1].get());
auto& ldata = jsonb_data_column->get_chars();
auto& loffsets = jsonb_data_column->get_offsets();
auto& rdata = jsonb_path_column->get_chars();
auto& roffsets = jsonb_path_column->get_offsets();
bool is_invalid_json_path = false;
// execute Impl
@ -382,36 +389,34 @@ public:
std::is_same_v<typename Impl::ReturnType, DataTypeJsonb>) {
auto& res_data = res->get_chars();
auto& res_offsets = res->get_offsets();
if (col_const[0]) {
Impl::scalar_vector(context, jsonb_data_column->get_data_at(0), rdata, roffsets,
res_data, res_offsets, null_map->get_data(),
is_invalid_json_path);
} else if (col_const[1]) {
Impl::vector_scalar(context, ldata, loffsets, jsonb_path_column->get_data_at(0),
res_data, res_offsets, null_map->get_data(),
is_invalid_json_path);
} else {
Impl::vector_vector(context, ldata, loffsets, rdata, roffsets, res_data,
res_offsets, null_map->get_data(), is_invalid_json_path);
Status st = Impl::vector_vector_v2(
context, ldata, loffsets, jsonb_data_const, jsonb_path_columns, path_const,
res_data, res_offsets, null_map->get_data(), is_invalid_json_path);
if (st != Status::OK()) {
return st;
}
} else {
if (col_const[0]) {
// not support other extract type for now (e.g. int, double, ...)
DCHECK_EQ(jsonb_path_columns.size(), 1);
auto& rdata = jsonb_path_columns[0]->get_chars();
auto& roffsets = jsonb_path_columns[0]->get_offsets();
if (jsonb_data_const) {
Impl::scalar_vector(context, jsonb_data_column->get_data_at(0), rdata, roffsets,
res->get_data(), null_map->get_data(), is_invalid_json_path);
} else if (col_const[1]) {
Impl::vector_scalar(context, ldata, loffsets, jsonb_path_column->get_data_at(0),
} else if (path_const[0]) {
Impl::vector_scalar(context, ldata, loffsets, jsonb_path_columns[0]->get_data_at(0),
res->get_data(), null_map->get_data(), is_invalid_json_path);
} else {
Impl::vector_vector(context, ldata, loffsets, rdata, roffsets, res->get_data(),
null_map->get_data(), is_invalid_json_path);
}
}
if (is_invalid_json_path) {
return Status::InvalidArgument(
"Json path error: {} for value: {}",
JsonbErrMsg::getErrMsg(JsonbErrType::E_INVALID_JSON_PATH),
std::string_view(reinterpret_cast<const char*>(rdata.data()), rdata.size()));
if (is_invalid_json_path) {
return Status::InvalidArgument(
"Json path error: {} for value: {}",
JsonbErrMsg::getErrMsg(JsonbErrType::E_INVALID_JSON_PATH),
std::string_view(reinterpret_cast<const char*>(rdata.data()),
rdata.size()));
}
}
block.get_by_position(result).column =
@ -503,6 +508,77 @@ private:
public:
// for jsonb_extract_string
static Status vector_vector_v2(
FunctionContext* context, const ColumnString::Chars& ldata,
const ColumnString::Offsets& loffsets, const bool& json_data_const,
const std::vector<const ColumnString*>& rdata_columns, // here we can support more paths
const std::vector<bool>& path_const, ColumnString::Chars& res_data,
ColumnString::Offsets& res_offsets, NullMap& null_map, bool& is_invalid_json_path) {
size_t input_rows_count = json_data_const ? rdata_columns.size() : loffsets.size();
res_offsets.resize(input_rows_count);
auto writer = std::make_unique<JsonbWriter>();
std::unique_ptr<JsonbToJson> formater;
for (size_t i = 0; i < input_rows_count; ++i) {
if (null_map[i]) {
StringOP::push_null_string(i, res_data, res_offsets, null_map);
continue;
}
size_t l_off = loffsets[index_check_const(i, json_data_const) - 1];
size_t l_size = loffsets[index_check_const(i, json_data_const)] - l_off;
const char* l_raw = reinterpret_cast<const char*>(&ldata[l_off]);
if (rdata_columns.size() == 1) { // just return origin value
const ColumnString* path_col = rdata_columns[0];
const ColumnString::Chars& rdata = path_col->get_chars();
const ColumnString::Offsets& roffsets = path_col->get_offsets();
size_t r_off = roffsets[index_check_const(i, path_const[0]) - 1];
size_t r_size = roffsets[index_check_const(i, path_const[0])] - r_off;
const char* r_raw = reinterpret_cast<const char*>(&rdata[r_off]);
inner_loop_impl(i, res_data, res_offsets, null_map, writer, formater, l_raw, l_size,
r_raw, r_size, is_invalid_json_path);
} else { // will make array string to user
writer->reset();
writer->writeStartArray();
for (size_t pi = 0; pi < rdata_columns.size(); ++pi) {
const ColumnString* path_col = rdata_columns[pi];
const ColumnString::Chars& rdata = path_col->get_chars();
const ColumnString::Offsets& roffsets = path_col->get_offsets();
size_t r_off = roffsets[index_check_const(i, path_const[pi]) - 1];
size_t r_size = roffsets[index_check_const(i, path_const[pi])] - r_off;
const char* r_raw = reinterpret_cast<const char*>(&rdata[r_off]);
// doc is NOT necessary to be deleted since JsonbDocument will not allocate memory
JsonbDocument* doc = JsonbDocument::createDocument(l_raw, l_size);
if (UNLIKELY(!doc || !doc->getValue())) {
writer->writeNull();
continue;
}
// value is NOT necessary to be deleted since JsonbValue will not allocate memory
JsonbValue* value =
doc->getValue()->findPath(r_raw, r_size, is_invalid_json_path, nullptr);
// if not valid json path , should return error message to user
if (is_invalid_json_path) {
return Status::InvalidArgument(
"Json path error: {} for value: {}",
JsonbErrMsg::getErrMsg(JsonbErrType::E_INVALID_JSON_PATH),
std::string_view(reinterpret_cast<const char*>(rdata.data()),
rdata.size()));
}
if (UNLIKELY(!value)) {
writer->writeNull();
} else {
writer->writeValue(value);
}
}
writer->writeEndArray();
StringOP::push_value_string(std::string_view(writer->getOutput()->getBuffer(),
writer->getOutput()->getSize()),
i, res_data, res_offsets);
}
} //for
return Status::OK();
}
static void vector_vector(FunctionContext* context, const ColumnString::Chars& ldata,
const ColumnString::Offsets& loffsets,
const ColumnString::Chars& rdata,

View File

@ -1690,11 +1690,11 @@ visible_functions = {
[['jsonb_exists_path'], 'BOOLEAN', ['JSONB', 'VARCHAR'], ''],
[['jsonb_exists_path'], 'BOOLEAN', ['JSONB', 'STRING'], ''],
[['jsonb_type'], 'STRING', ['JSONB', 'STRING'], 'ALWAYS_NULLABLE'],
[['jsonb_type'], 'STRING', ['JSONB', 'VARCHAR'], 'ALWAYS_NULLABLE'],
[['jsonb_type'], 'STRING', ['JSONB', 'STRING'], 'ALWAYS_NULLABLE'],
[['jsonb_extract'], 'JSONB', ['JSONB', 'VARCHAR'], 'ALWAYS_NULLABLE'],
[['jsonb_extract'], 'JSONB', ['JSONB', 'STRING'], 'ALWAYS_NULLABLE'],
[['jsonb_extract'], 'JSONB', ['JSONB', 'VARCHAR', '...'], 'ALWAYS_NULLABLE'],
[['jsonb_extract'], 'JSONB', ['JSONB', 'STRING', '...'], 'ALWAYS_NULLABLE'],
[['jsonb_extract_isnull'], 'BOOLEAN', ['JSONB', 'VARCHAR'], 'ALWAYS_NULLABLE'],
[['jsonb_extract_isnull'], 'BOOLEAN', ['JSONB', 'STRING'], 'ALWAYS_NULLABLE'],
[['jsonb_extract_bool'], 'BOOLEAN', ['JSONB', 'VARCHAR'], 'ALWAYS_NULLABLE'],
@ -1725,8 +1725,6 @@ visible_functions = {
[['json_type'], 'STRING', ['JSONB', 'STRING'], 'ALWAYS_NULLABLE'],
[['json_type'], 'STRING', ['JSONB', 'STRING'], 'ALWAYS_NULLABLE'],
[['jsonb_extract'], 'JSONB', ['JSONB', 'VARCHAR'], 'ALWAYS_NULLABLE'],
[['jsonb_extract'], 'JSONB', ['JSONB', 'STRING'], 'ALWAYS_NULLABLE'],
[['json_extract_isnull'], 'BOOLEAN', ['JSONB', 'VARCHAR'], 'ALWAYS_NULLABLE'],
[['json_extract_isnull'], 'BOOLEAN', ['JSONB', 'STRING'], 'ALWAYS_NULLABLE'],
[['json_extract_bool'], 'BOOLEAN', ['JSONB', 'VARCHAR'], 'ALWAYS_NULLABLE'],

View File

@ -709,6 +709,29 @@
27 {"k1":"v1","k2":200} \N
28 {"a.b.c":{"k1.a1":"v31","k2":300},"a":"niu"} \N
-- !jsonb_extract_multipath --
1 \N \N
2 null [null,null,null,null]
3 true [true,null,null,null]
4 false [false,null,null,null]
5 100 [100,null,null,null]
6 10000 [10000,null,null,null]
7 1000000000 [1000000000,null,null,null]
8 1152921504606846976 [1152921504606846976,null,null,null]
9 6.18 [6.18,null,null,null]
10 "abcd" ["abcd",null,null,null]
11 {} [{},{},null,{}]
12 {"k1":"v31","k2":300} [{"k1":"v31","k2":300},{"k1":"v31","k2":300},"v31",{"k1":"v31","k2":300}]
13 [] [[],null,null,null]
14 [123,456] [[123,456],null,null,123]
15 ["abc","def"] [["abc","def"],null,null,"abc"]
16 [null,true,false,100,6.18,"abc"] [[null,true,false,100,6.18,"abc"],null,null,null]
17 [{"k1":"v41","k2":400},1,"a",3.14] [[{"k1":"v41","k2":400},1,"a",3.14],null,null,{"k1":"v41","k2":400}]
18 {"k1":"v31","k2":300,"a1":[{"k1":"v41","k2":400},1,"a",3.14]} [{"k1":"v31","k2":300,"a1":[{"k1":"v41","k2":400},1,"a",3.14]},{"k1":"v31","k2":300,"a1":[{"k1":"v41","k2":400},1,"a",3.14]},"v31",{"k1":"v31","k2":300,"a1":[{"k1":"v41","k2":400},1,"a",3.14]}]
26 \N \N
27 {"k1":"v1","k2":200} [{"k1":"v1","k2":200},{"k1":"v1","k2":200},"v1",{"k1":"v1","k2":200}]
28 {"a.b.c":{"k1.a1":"v31","k2":300},"a":"niu"} [{"a.b.c":{"k1.a1":"v31","k2":300},"a":"niu"},{"a.b.c":{"k1.a1":"v31","k2":300},"a":"niu"},null,{"a.b.c":{"k1.a1":"v31","k2":300},"a":"niu"}]
-- !jsonb_extract_string_select --
1 \N \N
2 null null

View File

@ -709,6 +709,29 @@
27 {"k1":"v1","k2":200} \N
28 {"a.b.c":{"k1.a1":"v31","k2":300},"a":"niu"} \N
-- !jsonb_extract_multipath --
1 \N \N
2 null [null,null,null,null]
3 true [true,null,null,null]
4 false [false,null,null,null]
5 100 [100,null,null,null]
6 10000 [10000,null,null,null]
7 1000000000 [1000000000,null,null,null]
8 1152921504606846976 [1152921504606846976,null,null,null]
9 6.18 [6.18,null,null,null]
10 "abcd" ["abcd",null,null,null]
11 {} [{},{},null,{}]
12 {"k1":"v31","k2":300} [{"k1":"v31","k2":300},{"k1":"v31","k2":300},"v31",{"k1":"v31","k2":300}]
13 [] [[],null,null,null]
14 [123,456] [[123,456],null,null,123]
15 ["abc","def"] [["abc","def"],null,null,"abc"]
16 [null,true,false,100,6.18,"abc"] [[null,true,false,100,6.18,"abc"],null,null,null]
17 [{"k1":"v41","k2":400},1,"a",3.14] [[{"k1":"v41","k2":400},1,"a",3.14],null,null,{"k1":"v41","k2":400}]
18 {"k1":"v31","k2":300,"a1":[{"k1":"v41","k2":400},1,"a",3.14]} [{"k1":"v31","k2":300,"a1":[{"k1":"v41","k2":400},1,"a",3.14]},{"k1":"v31","k2":300,"a1":[{"k1":"v41","k2":400},1,"a",3.14]},"v31",{"k1":"v31","k2":300,"a1":[{"k1":"v41","k2":400},1,"a",3.14]}]
26 \N \N
27 {"k1":"v1","k2":200} [{"k1":"v1","k2":200},{"k1":"v1","k2":200},"v1",{"k1":"v1","k2":200}]
28 {"a.b.c":{"k1.a1":"v31","k2":300},"a":"niu"} [{"a.b.c":{"k1.a1":"v31","k2":300},"a":"niu"},{"a.b.c":{"k1.a1":"v31","k2":300},"a":"niu"},null,{"a.b.c":{"k1.a1":"v31","k2":300},"a":"niu"}]
-- !jsonb_extract_string_select --
1 \N \N
2 null null

View File

@ -178,6 +178,9 @@ suite("test_jsonb_load_and_function", "p0") {
qt_select "SELECT id, j, jsonb_extract(j, '\$.a1[-1]') FROM ${testTable} ORDER BY id"
qt_select "SELECT id, j, jsonb_extract(j, '\$.a1[-10]') FROM ${testTable} ORDER BY id"
// jsonb_extract_multipath
qt_jsonb_extract_multipath "SELECT id, j, jsonb_extract(j, '\$', '\$.*', '\$.k1', '\$[0]') FROM ${testTable} ORDER BY id"
// jsonb_extract_string
qt_jsonb_extract_string_select "SELECT id, j, jsonb_extract_string(j, '\$') FROM ${testTable} ORDER BY id"

View File

@ -171,6 +171,10 @@ suite("test_jsonb_unique_load_and_function", "p0") {
qt_select "SELECT id, j, jsonb_extract(j, '\$.a1[-1]') FROM ${testTable} ORDER BY id"
qt_select "SELECT id, j, jsonb_extract(j, '\$.a1[-10]') FROM ${testTable} ORDER BY id"
// jsonb_extract_multipath
qt_jsonb_extract_multipath "SELECT id, j, jsonb_extract(j, '\$', '\$.*', '\$.k1', '\$[0]') FROM ${testTable} ORDER BY id"
// jsonb_extract_string
qt_jsonb_extract_string_select "SELECT id, j, jsonb_extract_string(j, '\$') FROM ${testTable} ORDER BY id"