[fix](json function) Fix the slow performance of get_json_path when processing JSONB (#24631)

When processing JSONB, automatically convert to jsonb_extract_string
This commit is contained in:
Chenyang Sun
2023-09-27 21:17:39 +08:00
committed by GitHub
parent 732f821c15
commit 68087f6c82
7 changed files with 628 additions and 2 deletions

View File

@ -65,6 +65,11 @@ struct NullPresence {
bool has_null_constant = false;
};
template <typename T>
concept HasGetVariadicArgumentTypesImpl = requires(T t) {
{ t.get_variadic_argument_types_impl() } -> std::same_as<DataTypes>;
};
NullPresence get_null_presence(const Block& block, const ColumnNumbers& args);
[[maybe_unused]] NullPresence get_null_presence(const ColumnsWithTypeAndName& args);

View File

@ -273,6 +273,10 @@ struct GetJsonNumberType {
using ColumnType = typename NumberType::ColumnType;
using Container = typename ColumnType::Container;
static DataTypes get_variadic_argument_types_impl() {
return {std::make_shared<DataTypeString>(), std::make_shared<DataTypeString>()};
}
static void get_json_impl(rapidjson::Value*& root, const std::string_view& json_string,
const std::string_view& path_string, rapidjson::Document& document,
typename NumberType::T& res, UInt8& null_map) {
@ -526,6 +530,9 @@ struct GetJsonString {
res_offsets);
}
}
static DataTypes get_variadic_argument_types_impl() {
return {std::make_shared<DataTypeString>(), std::make_shared<DataTypeString>()};
}
};
template <int flag>

View File

@ -25,6 +25,7 @@
#include <string>
#include <string_view>
#include <tuple>
#include <type_traits>
#include <utility>
// IWYU pragma: no_include <opentelemetry/common/threadlocal.h>
@ -352,6 +353,13 @@ public:
DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
return make_nullable(std::make_shared<typename Impl::ReturnType>());
}
DataTypes get_variadic_argument_types_impl() const override {
if constexpr (vectorized::HasGetVariadicArgumentTypesImpl<Impl>) {
return Impl::get_variadic_argument_types_impl();
} else {
return {};
}
}
Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments,
size_t result, size_t input_rows_count) const override {
@ -958,11 +966,19 @@ struct JsonbExtractBool : public JsonbExtractImpl<JsonbTypeBool> {
struct JsonbExtractInt : public JsonbExtractImpl<JsonbTypeInt> {
static constexpr auto name = "json_extract_int";
static constexpr auto alias = "jsonb_extract_int";
static constexpr auto name2 = "get_json_int";
static DataTypes get_variadic_argument_types_impl() {
return {std::make_shared<DataTypeJsonb>(), std::make_shared<DataTypeString>()};
}
};
struct JsonbExtractBigInt : public JsonbExtractImpl<JsonbTypeInt64> {
static constexpr auto name = "json_extract_bigint";
static constexpr auto alias = "jsonb_extract_bigint";
static constexpr auto name2 = "get_json_bigint";
static DataTypes get_variadic_argument_types_impl() {
return {std::make_shared<DataTypeJsonb>(), std::make_shared<DataTypeString>()};
}
};
struct JsonbExtractLargeInt : public JsonbExtractImpl<JsonbTypeInt128> {
@ -973,11 +989,19 @@ struct JsonbExtractLargeInt : public JsonbExtractImpl<JsonbTypeInt128> {
struct JsonbExtractDouble : public JsonbExtractImpl<JsonbTypeDouble> {
static constexpr auto name = "json_extract_double";
static constexpr auto alias = "jsonb_extract_double";
static constexpr auto name2 = "get_json_double";
static DataTypes get_variadic_argument_types_impl() {
return {std::make_shared<DataTypeJsonb>(), std::make_shared<DataTypeString>()};
}
};
struct JsonbExtractString : public JsonbExtractStringImpl<JsonbTypeString> {
static constexpr auto name = "json_extract_string";
static constexpr auto alias = "jsonb_extract_string";
static constexpr auto name2 = "get_json_string";
static DataTypes get_variadic_argument_types_impl() {
return {std::make_shared<DataTypeJsonb>(), std::make_shared<DataTypeString>()};
}
};
struct JsonbExtractJsonb : public JsonbExtractStringImpl<JsonbTypeJson> {
@ -1310,14 +1334,18 @@ void register_function_jsonb(SimpleFunctionFactory& factory) {
factory.register_alias(FunctionJsonbExtractBool::name, FunctionJsonbExtractBool::alias);
factory.register_function<FunctionJsonbExtractInt>();
factory.register_alias(FunctionJsonbExtractInt::name, FunctionJsonbExtractInt::alias);
factory.register_function<FunctionJsonbExtractInt>(JsonbExtractInt::name2);
factory.register_function<FunctionJsonbExtractBigInt>();
factory.register_alias(FunctionJsonbExtractBigInt::name, FunctionJsonbExtractBigInt::alias);
factory.register_function<FunctionJsonbExtractBigInt>(JsonbExtractBigInt::name2);
factory.register_function<FunctionJsonbExtractLargeInt>();
factory.register_alias(FunctionJsonbExtractLargeInt::name, FunctionJsonbExtractLargeInt::alias);
factory.register_function<FunctionJsonbExtractDouble>();
factory.register_alias(FunctionJsonbExtractDouble::name, FunctionJsonbExtractDouble::alias);
factory.register_function<FunctionJsonbExtractDouble>(JsonbExtractDouble::name2);
factory.register_function<FunctionJsonbExtractString>();
factory.register_alias(FunctionJsonbExtractString::name, FunctionJsonbExtractString::alias);
factory.register_function<FunctionJsonbExtractString>(JsonbExtractString::name2);
factory.register_function<FunctionJsonbExtractJsonb>();
// factory.register_alias(FunctionJsonbExtractJsonb::name, FunctionJsonbExtractJsonb::alias);

View File

@ -25,6 +25,7 @@
#include "vec/columns/column_vector.h"
#include "vec/data_types/data_type.h"
#include "vec/data_types/data_type_bitmap.h"
#include "vec/data_types/data_type_jsonb.h"
#include "vec/data_types/data_type_nullable.h"
#include "vec/data_types/data_type_number.h"
#include "vec/data_types/data_type_string.h"
@ -385,6 +386,14 @@ public:
DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
return make_nullable(std::make_shared<typename Impl::ReturnType>());
}
DataTypes get_variadic_argument_types_impl() const override {
if constexpr (vectorized::HasGetVariadicArgumentTypesImpl<Impl>) {
return Impl::get_variadic_argument_types_impl();
} else {
return {};
}
}
Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments,
size_t result, size_t input_rows_count) const override {
auto null_map = ColumnUInt8::create(input_rows_count, 0);

View File

@ -116,7 +116,6 @@ public:
if (!types.empty()) {
function_variadic_set.insert(name);
}
std::string key_str = name;
if (!types.empty()) {
for (const auto& type : types) {
@ -137,7 +136,7 @@ public:
template <class Function>
void register_function(std::string name) {
function_creators[name] = &createDefaultFunction<Function>;
register_function(name, &createDefaultFunction<Function>);
}
void register_alias(const std::string& name, const std::string& alias) {

View File

@ -1487,4 +1487,574 @@ TEST(FunctionJsonbTEST, JsonbCastFromOtherTest) {
"CAST", {Notnull {TypeIndex::String}, ConstedNotnull {TypeIndex::JSONB}},
{{{STRING(R"("abcd")"), Null()}, STRING(R"("abcd")")}});
}
TEST(FunctionJsonbTEST, GetJSONSTRINGTest) {
std::string func_name = "get_json_string";
InputTypeSet input_types = {TypeIndex::JSONB, TypeIndex::String};
// get json from root
DataSet data_set = {
{{Null(), STRING("$")}, Null()},
{{STRING("null"), STRING("$")}, STRING("null")},
{{STRING("true"), STRING("$")}, STRING("true")},
{{STRING("false"), STRING("$")}, STRING("false")},
{{STRING("100"), STRING("$")}, STRING("100")}, //int8
{{STRING("10000"), STRING("$")}, STRING("10000")}, // int16
{{STRING("1000000000"), STRING("$")}, STRING("1000000000")}, // int32
{{STRING("1152921504606846976"), STRING("$")}, STRING("1152921504606846976")}, // int64
{{STRING("6.18"), STRING("$")}, STRING("6.18")}, // double
{{STRING(R"("abcd")"), STRING("$")}, STRING("abcd")}, // string
{{STRING("{}"), STRING("$")}, STRING("{}")}, // empty object
{{STRING(R"({"k1":"v31", "k2": 300})"), STRING("$")},
STRING(R"({"k1":"v31","k2":300})")}, // object
{{STRING("[]"), STRING("$")}, STRING("[]")}, // empty array
{{STRING("[123, 456]"), STRING("$")}, STRING("[123,456]")}, // int array
{{STRING(R"(["abc", "def"])"), STRING("$")},
STRING(R"(["abc","def"])")}, // string array
{{STRING(R"([null, true, false, 100, 6.18, "abc"])"), STRING("$")},
STRING(R"([null,true,false,100,6.18,"abc"])")}, // multi type array
{{STRING(R"([{"k1":"v41", "k2": 400}, 1, "a", 3.14])"), STRING("$")},
STRING(R"([{"k1":"v41","k2":400},1,"a",3.14])")}, // complex array
};
check_function<DataTypeString, true>(func_name, input_types, data_set);
// get json from obejct
data_set = {
{{Null(), STRING("$.k1")}, Null()},
{{STRING("null"), STRING("$.k1")}, Null()},
{{STRING("true"), STRING("$.k1")}, Null()},
{{STRING("false"), STRING("$.k1")}, Null()},
{{STRING("100"), STRING("$.k1")}, Null()}, //int8
{{STRING("10000"), STRING("$.k1")}, Null()}, // int16
{{STRING("1000000000"), STRING("$.k1")}, Null()}, // int32
{{STRING("1152921504606846976"), STRING("$.k1")}, Null()}, // int64
{{STRING("6.18"), STRING("$.k1")}, Null()}, // double
{{STRING(R"("abcd")"), STRING("$.k1")}, Null()}, // string
{{STRING("{}"), STRING("$.k1")}, Null()}, // empty object
{{STRING(R"({"k1":"v31", "k2": 300})"), STRING("$.k1")}, STRING("v31")}, // object
{{STRING("[]"), STRING("$.k1")}, Null()}, // empty array
{{STRING("[123, 456]"), STRING("$.k1")}, Null()}, // int array
{{STRING(R"(["abc", "def"])"), STRING("$.k1")}, Null()}, // string array
{{STRING(R"([null, true, false, 100, 6.18, "abc"])"), STRING("$.k1")},
Null()}, // multi type array
{{STRING(R"([{"k1":"v41", "k2": 400}, 1, "a", 3.14])"), STRING("$.k1")},
Null()}, // complex array
};
check_function<DataTypeString, true>(func_name, input_types, data_set);
// get json from array
data_set = {
{{Null(), STRING("$[0]")}, Null()},
{{STRING("null"), STRING("$[0]")}, Null()},
{{STRING("true"), STRING("$[0]")}, Null()},
{{STRING("false"), STRING("$[0]")}, Null()},
{{STRING("100"), STRING("$[0]")}, Null()}, //int8
{{STRING("10000"), STRING("$[0]")}, Null()}, // int16
{{STRING("1000000000"), STRING("$[0]")}, Null()}, // int32
{{STRING("1152921504606846976"), STRING("$[0]")}, Null()}, // int64
{{STRING("6.18"), STRING("$[0]")}, Null()}, // double
{{STRING(R"("abcd")"), STRING("$[0]")}, Null()}, // string
{{STRING("{}"), STRING("$[0]")}, STRING("{}")}, // empty object
{{STRING(R"({"k1":"v31", "k2": 300})"), STRING("$[0]")},
STRING(R"({"k1":"v31","k2":300})")}, // object
{{STRING("[]"), STRING("$[0]")}, Null()}, // empty array
{{STRING("null"), STRING("$[1]")}, Null()},
{{STRING("true"), STRING("$[1]")}, Null()},
{{STRING("false"), STRING("$[1]")}, Null()},
{{STRING("100"), STRING("$[1]")}, Null()}, //int8
{{STRING("10000"), STRING("$[1]")}, Null()}, // int16
{{STRING("1000000000"), STRING("$[1]")}, Null()}, // int32
{{STRING("1152921504606846976"), STRING("$[1]")}, Null()}, // int64
{{STRING("6.18"), STRING("$[1]")}, Null()}, // double
{{STRING(R"("abcd")"), STRING("$[1]")}, Null()}, // string
{{STRING("{}"), STRING("$[1]")}, Null()}, // empty object
{{STRING(R"({"k1":"v31", "k2": 300})"), STRING("$[1]")}, Null()}, // object
{{STRING("[]"), STRING("$[1]")}, Null()}, // empty array
{{STRING("[123, 456]"), STRING("$[0]")}, STRING("123")}, // int array
{{STRING("[123, 456]"), STRING("$[1]")}, STRING("456")}, // int array
{{STRING("[123, 456]"), STRING("$[2]")}, Null()}, // int array
{{STRING(R"(["abc", "def"])"), STRING("$[0]")}, STRING("abc")}, // string array
{{STRING(R"(["abc", "def"])"), STRING("$[1]")}, STRING("def")}, // string array
{{STRING(R"(["abc", "def"])"), STRING("$[2]")}, Null()}, // string array
{{STRING(R"([null, true, false, 100, 6.18, "abc"])"), STRING("$[0]")},
STRING("null")}, // multi type array
{{STRING(R"([null, true, false, 100, 6.18, "abc"])"), STRING("$[1]")},
STRING("true")}, // multi type array
{{STRING(R"([null, true, false, 100, 6.18, "abc"])"), STRING("$[2]")},
STRING("false")}, // multi type array
{{STRING(R"([null, true, false, 100, 6.18, "abc"])"), STRING("$[3]")},
STRING("100")}, // multi type array
{{STRING(R"([null, true, false, 100, 6.18, "abc"])"), STRING("$[4]")},
STRING("6.18")}, // multi type array
{{STRING(R"([null, true, false, 100, 6.18, "abc"])"), STRING("$[5]")},
STRING("abc")}, // multi type array
{{STRING(R"([null, true, false, 100, 6.18, "abc"])"), STRING("$[6]")},
Null()}, // multi type array
{{STRING(R"([{"k1":"v41", "k2": 400}, 1, "a", 3.14])"), STRING("$[0]")},
STRING(R"({"k1":"v41","k2":400})")}, // complex array
{{STRING(R"([{"k1":"v41", "k2": 400}, 1, "a", 3.14])"), STRING("$[1]")},
STRING("1")}, // complex array
{{STRING(R"([{"k1":"v41", "k2": 400}, 1, "a", 3.14])"), STRING("$[2]")},
STRING("a")}, // complex array
{{STRING(R"([{"k1":"v41", "k2": 400}, 1, "a", 3.14])"), STRING("$[3]")},
STRING("3.14")}, // complex array
{{STRING(R"([{"k1":"v41", "k2": 400}, 1, "a", 3.14])"), STRING("$[4]")},
Null()}, // complex array
};
check_function<DataTypeString, true>(func_name, input_types, data_set);
// get json with path $[0].k1
data_set = {
{{Null(), STRING("$[0].k1")}, Null()},
{{STRING("null"), STRING("$[0].k1")}, Null()},
{{STRING("true"), STRING("$[0].k1")}, Null()},
{{STRING("false"), STRING("$[0].k1")}, Null()},
{{STRING("100"), STRING("$[0].k1")}, Null()}, //int8
{{STRING("10000"), STRING("$[0].k1")}, Null()}, // int16
{{STRING("1000000000"), STRING("$[0].k1")}, Null()}, // int32
{{STRING("1152921504606846976"), STRING("$[0].k1")}, Null()}, // int64
{{STRING("6.18"), STRING("$[0].k1")}, Null()}, // double
{{STRING(R"("abcd")"), STRING("$[0].k1")}, Null()}, // string
{{STRING("{}"), STRING("$[0].k1")}, Null()}, // empty object
{{STRING(R"({"k1":"v31", "k2": 300})"), STRING("$[0].k1")}, STRING(R"(v31)")}, // object
{{STRING("[]"), STRING("$[0].k1")}, Null()}, // empty array
{{STRING("[123, 456]"), STRING("$[0].k1")}, Null()}, // int array
{{STRING(R"(["abc", "def"])"), STRING("$[0].k1")}, Null()}, // string array
{{STRING(R"([null, true, false, 100, 6.18, "abc"])"), STRING("$[0].k1")},
Null()}, // multi type array
{{STRING(R"([{"k1":"v41", "k2": 400}, 1, "a", 3.14])"), STRING("$[0].k1")},
STRING("v41")}, // complex array
};
check_function<DataTypeString, true>(func_name, input_types, data_set);
}
TEST(FunctionJsonbTEST, GetJsonIntTest) {
std::string func_name = "get_json_int";
InputTypeSet input_types = {TypeIndex::JSONB, TypeIndex::String};
// get json from root
DataSet data_set = {
{{Null(), STRING("$")}, Null()},
{{STRING("null"), STRING("$")}, Null()},
{{STRING("true"), STRING("$")}, Null()},
{{STRING("false"), STRING("$")}, Null()},
{{STRING("100"), STRING("$")}, INT(100)}, //int8
{{STRING("10000"), STRING("$")}, INT(10000)}, // int16
{{STRING("1000000000"), STRING("$")}, INT(1000000000)}, // int32
{{STRING("1152921504606846976"), STRING("$")}, Null()}, // int64
{{STRING("6.18"), STRING("$")}, Null()}, // double
{{STRING(R"("abcd")"), STRING("$")}, Null()}, // string
{{STRING("{}"), STRING("$")}, Null()}, // empty object
{{STRING(R"({"k1":"v31", "k2": 300})"), STRING("$")}, Null()}, // object
{{STRING("[]"), STRING("$")}, Null()}, // empty array
{{STRING("[123, 456]"), STRING("$")}, Null()}, // int array
{{STRING(R"(["abc", "def"])"), STRING("$")}, Null()}, // string array
{{STRING(R"([null, true, false, 100, 6.18, "abc"])"), STRING("$")},
Null()}, // multi type array
{{STRING(R"([{"k1":"v41", "k2": 400}, 1, "a", 3.14])"), STRING("$")},
Null()}, // complex array
};
check_function<DataTypeInt32, true>(func_name, input_types, data_set);
// get json from obejct
data_set = {
{{Null(), STRING("$.k1")}, Null()},
{{STRING("null"), STRING("$.k1")}, Null()},
{{STRING("true"), STRING("$.k1")}, Null()},
{{STRING("false"), STRING("$.k1")}, Null()},
{{STRING("100"), STRING("$.k1")}, Null()}, //int8
{{STRING("10000"), STRING("$.k1")}, Null()}, // int16
{{STRING("1000000000"), STRING("$.k1")}, Null()}, // int32
{{STRING("1152921504606846976"), STRING("$.k1")}, Null()}, // int64
{{STRING("6.18"), STRING("$.k1")}, Null()}, // double
{{STRING(R"("abcd")"), STRING("$.k1")}, Null()}, // string
{{STRING("{}"), STRING("$.k1")}, Null()}, // empty object
{{STRING(R"({"k1":"v31", "k2": 300})"), STRING("$.k1")}, Null()}, // object
{{STRING("[]"), STRING("$.k1")}, Null()}, // empty array
{{STRING("[123, 456]"), STRING("$.k1")}, Null()}, // int array
{{STRING(R"(["abc", "def"])"), STRING("$.k1")}, Null()}, // string array
{{STRING(R"([null, true, false, 100, 6.18, "abc"])"), STRING("$.k1")},
Null()}, // multi type array
{{STRING(R"([{"k1":"v41", "k2": 400}, 1, "a", 3.14])"), STRING("$.k1")},
Null()}, // complex array
};
check_function<DataTypeInt32, true>(func_name, input_types, data_set);
// get json from array
data_set = {
{{Null(), STRING("$[0]")}, Null()},
{{STRING("null"), STRING("$[0]")}, Null()},
{{STRING("true"), STRING("$[0]")}, Null()},
{{STRING("false"), STRING("$[0]")}, Null()},
{{STRING("100"), STRING("$[0]")}, Null()}, //int8
{{STRING("10000"), STRING("$[0]")}, Null()}, // int16
{{STRING("1000000000"), STRING("$[0]")}, Null()}, // int32
{{STRING("1152921504606846976"), STRING("$[0]")}, Null()}, // int64
{{STRING("6.18"), STRING("$[0]")}, Null()}, // double
{{STRING(R"("abcd")"), STRING("$[0]")}, Null()}, // string
{{STRING("{}"), STRING("$[0]")}, Null()}, // empty object
{{STRING(R"({"k1":"v31", "k2": 300})"), STRING("$[0]")}, Null()}, // object
{{STRING("[]"), STRING("$[0]")}, Null()}, // empty array
{{STRING("null"), STRING("$[1]")}, Null()},
{{STRING("true"), STRING("$[1]")}, Null()},
{{STRING("false"), STRING("$[1]")}, Null()},
{{STRING("100"), STRING("$[1]")}, Null()}, //int8
{{STRING("10000"), STRING("$[1]")}, Null()}, // int16
{{STRING("1000000000"), STRING("$[1]")}, Null()}, // int32
{{STRING("1152921504606846976"), STRING("$[1]")}, Null()}, // int64
{{STRING("6.18"), STRING("$[1]")}, Null()}, // double
{{STRING(R"("abcd")"), STRING("$[1]")}, Null()}, // string
{{STRING("{}"), STRING("$[1]")}, Null()}, // empty object
{{STRING(R"({"k1":"v31", "k2": 300})"), STRING("$[1]")}, Null()}, // object
{{STRING("[]"), STRING("$[1]")}, Null()}, // empty array
{{STRING("[123, 456]"), STRING("$[0]")}, INT(123)}, // int array
{{STRING("[123, 456]"), STRING("$[1]")}, INT(456)}, // int array
{{STRING("[123, 456]"), STRING("$[2]")}, Null()}, // int array
{{STRING(R"(["abc", "def"])"), STRING("$[0]")}, Null()}, // string array
{{STRING(R"(["abc", "def"])"), STRING("$[1]")}, Null()}, // string array
{{STRING(R"(["abc", "def"])"), STRING("$[2]")}, Null()}, // string array
{{STRING(R"([null, true, false, 100, 6.18, "abc"])"), STRING("$[0]")},
Null()}, // multi type array
{{STRING(R"([null, true, false, 100, 6.18, "abc"])"), STRING("$[1]")},
Null()}, // multi type array
{{STRING(R"([null, true, false, 100, 6.18, "abc"])"), STRING("$[2]")},
Null()}, // multi type array
{{STRING(R"([null, true, false, 100, 6.18, "abc"])"), STRING("$[3]")},
INT(100)}, // multi type array
{{STRING(R"([null, true, false, 100, 6.18, "abc"])"), STRING("$[4]")},
Null()}, // multi type array
{{STRING(R"([null, true, false, 100, 6.18, "abc"])"), STRING("$[5]")},
Null()}, // multi type array
{{STRING(R"([null, true, false, 100, 6.18, "abc"])"), STRING("$[6]")},
Null()}, // multi type array
{{STRING(R"([{"k1":"v41", "k2": 400}, 1, "a", 3.14])"), STRING("$[0]")},
Null()}, // complex array
{{STRING(R"([{"k1":"v41", "k2": 400}, 1, "a", 3.14])"), STRING("$[1]")},
INT(1)}, // complex array
{{STRING(R"([{"k1":"v41", "k2": 400}, 1, "a", 3.14])"), STRING("$[2]")},
Null()}, // complex array
{{STRING(R"([{"k1":"v41", "k2": 400}, 1, "a", 3.14])"), STRING("$[3]")},
Null()}, // complex array
{{STRING(R"([{"k1":"v41", "k2": 400}, 1, "a", 3.14])"), STRING("$[4]")},
Null()}, // complex array
};
check_function<DataTypeInt32, true>(func_name, input_types, data_set);
// get json with path $[0].k1
data_set = {
{{Null(), STRING("$[0].k1")}, Null()},
{{STRING("null"), STRING("$[0].k1")}, Null()},
{{STRING("true"), STRING("$[0].k1")}, Null()},
{{STRING("false"), STRING("$[0].k1")}, Null()},
{{STRING("100"), STRING("$[0].k1")}, Null()}, //int8
{{STRING("10000"), STRING("$[0].k1")}, Null()}, // int16
{{STRING("1000000000"), STRING("$[0].k1")}, Null()}, // int32
{{STRING("1152921504606846976"), STRING("$[0].k1")}, Null()}, // int64
{{STRING("6.18"), STRING("$[0].k1")}, Null()}, // double
{{STRING(R"("abcd")"), STRING("$[0].k1")}, Null()}, // string
{{STRING("{}"), STRING("$[0].k1")}, Null()}, // empty object
{{STRING(R"({"k1":"v31", "k2": 300})"), STRING("$[0].k1")}, Null()}, // object
{{STRING("[]"), STRING("$[0].k1")}, Null()}, // empty array
{{STRING("[123, 456]"), STRING("$[0].k1")}, Null()}, // int array
{{STRING(R"(["abc", "def"])"), STRING("$[0].k1")}, Null()}, // string array
{{STRING(R"([null, true, false, 100, 6.18, "abc"])"), STRING("$[0].k1")},
Null()}, // multi type array
{{STRING(R"([{"k1":"v41", "k2": 400}, 1, "a", 3.14])"), STRING("$[0].k1")},
Null()}, // complex array
{{STRING(R"([{"k1":"v41", "k2": 400}, 1, "a", 3.14])"), STRING("$[0].k2")},
INT(400)}, // complex array
};
check_function<DataTypeInt32, true>(func_name, input_types, data_set);
}
TEST(FunctionJsonbTEST, GetJsonBigIntTest) {
std::string func_name = "get_json_bigint";
InputTypeSet input_types = {TypeIndex::JSONB, TypeIndex::String};
// get json from root
DataSet data_set = {
{{Null(), STRING("$")}, Null()},
{{STRING("null"), STRING("$")}, Null()},
{{STRING("true"), STRING("$")}, Null()},
{{STRING("false"), STRING("$")}, Null()},
{{STRING("100"), STRING("$")}, BIGINT(100)}, //int8
{{STRING("10000"), STRING("$")}, BIGINT(10000)}, // int16
{{STRING("1000000000"), STRING("$")}, BIGINT(1000000000)}, // int32
{{STRING("1152921504606846976"), STRING("$")}, BIGINT(1152921504606846976)}, // int64
{{STRING("6.18"), STRING("$")}, Null()}, // double
{{STRING(R"("abcd")"), STRING("$")}, Null()}, // string
{{STRING("{}"), STRING("$")}, Null()}, // empty object
{{STRING(R"({"k1":"v31", "k2": 300})"), STRING("$")}, Null()}, // object
{{STRING("[]"), STRING("$")}, Null()}, // empty array
{{STRING("[123, 456]"), STRING("$")}, Null()}, // int array
{{STRING(R"(["abc", "def"])"), STRING("$")}, Null()}, // string array
{{STRING(R"([null, true, false, 100, 6.18, "abc"])"), STRING("$")},
Null()}, // multi type array
{{STRING(R"([{"k1":"v41", "k2": 400}, 1, "a", 3.14])"), STRING("$")},
Null()}, // complex array
};
check_function<DataTypeInt64, true>(func_name, input_types, data_set);
// get json from obejct
data_set = {
{{Null(), STRING("$.k1")}, Null()},
{{STRING("null"), STRING("$.k1")}, Null()},
{{STRING("true"), STRING("$.k1")}, Null()},
{{STRING("false"), STRING("$.k1")}, Null()},
{{STRING("100"), STRING("$.k1")}, Null()}, //int8
{{STRING("10000"), STRING("$.k1")}, Null()}, // int16
{{STRING("1000000000"), STRING("$.k1")}, Null()}, // int32
{{STRING("1152921504606846976"), STRING("$.k1")}, Null()}, // int64
{{STRING("6.18"), STRING("$.k1")}, Null()}, // double
{{STRING(R"("abcd")"), STRING("$.k1")}, Null()}, // string
{{STRING("{}"), STRING("$.k1")}, Null()}, // empty object
{{STRING(R"({"k1":"v31", "k2": 300})"), STRING("$.k1")}, Null()}, // object
{{STRING("[]"), STRING("$.k1")}, Null()}, // empty array
{{STRING("[123, 456]"), STRING("$.k1")}, Null()}, // int array
{{STRING(R"(["abc", "def"])"), STRING("$.k1")}, Null()}, // string array
{{STRING(R"([null, true, false, 100, 6.18, "abc"])"), STRING("$.k1")},
Null()}, // multi type array
{{STRING(R"([{"k1":"v41", "k2": 400}, 1, "a", 3.14])"), STRING("$.k1")},
Null()}, // complex array
};
check_function<DataTypeInt64, true>(func_name, input_types, data_set);
// get json from array
data_set = {
{{Null(), STRING("$[0]")}, Null()},
{{STRING("null"), STRING("$[0]")}, Null()},
{{STRING("true"), STRING("$[0]")}, Null()},
{{STRING("false"), STRING("$[0]")}, Null()},
{{STRING("100"), STRING("$[0]")}, Null()}, //int8
{{STRING("10000"), STRING("$[0]")}, Null()}, // int16
{{STRING("1000000000"), STRING("$[0]")}, Null()}, // int32
{{STRING("1152921504606846976"), STRING("$[0]")}, Null()}, // int64
{{STRING("6.18"), STRING("$[0]")}, Null()}, // double
{{STRING(R"("abcd")"), STRING("$[0]")}, Null()}, // string
{{STRING("{}"), STRING("$[0]")}, Null()}, // empty object
{{STRING(R"({"k1":"v31", "k2": 300})"), STRING("$[0]")}, Null()}, // object
{{STRING("[]"), STRING("$[0]")}, Null()}, // empty array
{{STRING("null"), STRING("$[1]")}, Null()},
{{STRING("true"), STRING("$[1]")}, Null()},
{{STRING("false"), STRING("$[1]")}, Null()},
{{STRING("100"), STRING("$[1]")}, Null()}, //int8
{{STRING("10000"), STRING("$[1]")}, Null()}, // int16
{{STRING("1000000000"), STRING("$[1]")}, Null()}, // int32
{{STRING("1152921504606846976"), STRING("$[1]")}, Null()}, // int64
{{STRING("6.18"), STRING("$[1]")}, Null()}, // double
{{STRING(R"("abcd")"), STRING("$[1]")}, Null()}, // string
{{STRING("{}"), STRING("$[1]")}, Null()}, // empty object
{{STRING(R"({"k1":"v31", "k2": 300})"), STRING("$[1]")}, Null()}, // object
{{STRING("[]"), STRING("$[1]")}, Null()}, // empty array
{{STRING("[123, 456]"), STRING("$[0]")}, BIGINT(123)}, // int array
{{STRING("[123, 456]"), STRING("$[1]")}, BIGINT(456)}, // int array
{{STRING("[123, 456]"), STRING("$[2]")}, Null()}, // int array
{{STRING(R"(["abc", "def"])"), STRING("$[0]")}, Null()}, // string array
{{STRING(R"(["abc", "def"])"), STRING("$[1]")}, Null()}, // string array
{{STRING(R"(["abc", "def"])"), STRING("$[2]")}, Null()}, // string array
{{STRING(R"([null, true, false, 100, 6.18, "abc"])"), STRING("$[0]")},
Null()}, // multi type array
{{STRING(R"([null, true, false, 100, 6.18, "abc"])"), STRING("$[1]")},
Null()}, // multi type array
{{STRING(R"([null, true, false, 100, 6.18, "abc"])"), STRING("$[2]")},
Null()}, // multi type array
{{STRING(R"([null, true, false, 100, 6.18, "abc"])"), STRING("$[3]")},
BIGINT(100)}, // multi type array
{{STRING(R"([null, true, false, 100, 6.18, "abc"])"), STRING("$[4]")},
Null()}, // multi type array
{{STRING(R"([null, true, false, 100, 6.18, "abc"])"), STRING("$[5]")},
Null()}, // multi type array
{{STRING(R"([null, true, false, 100, 6.18, "abc"])"), STRING("$[6]")},
Null()}, // multi type array
{{STRING(R"([{"k1":"v41", "k2": 400}, 1, "a", 3.14])"), STRING("$[0]")},
Null()}, // complex array
{{STRING(R"([{"k1":"v41", "k2": 400}, 1, "a", 3.14])"), STRING("$[1]")},
BIGINT(1)}, // complex array
{{STRING(R"([{"k1":"v41", "k2": 400}, 1, "a", 3.14])"), STRING("$[2]")},
Null()}, // complex array
{{STRING(R"([{"k1":"v41", "k2": 400}, 1, "a", 3.14])"), STRING("$[3]")},
Null()}, // complex array
{{STRING(R"([{"k1":"v41", "k2": 400}, 1, "a", 3.14])"), STRING("$[4]")},
Null()}, // complex array
};
check_function<DataTypeInt64, true>(func_name, input_types, data_set);
// get json with path $[0].k1
data_set = {
{{Null(), STRING("$[0].k1")}, Null()},
{{STRING("null"), STRING("$[0].k1")}, Null()},
{{STRING("true"), STRING("$[0].k1")}, Null()},
{{STRING("false"), STRING("$[0].k1")}, Null()},
{{STRING("100"), STRING("$[0].k1")}, Null()}, //int8
{{STRING("10000"), STRING("$[0].k1")}, Null()}, // int16
{{STRING("1000000000"), STRING("$[0].k1")}, Null()}, // int32
{{STRING("1152921504606846976"), STRING("$[0].k1")}, Null()}, // int64
{{STRING("6.18"), STRING("$[0].k1")}, Null()}, // double
{{STRING(R"("abcd")"), STRING("$[0].k1")}, Null()}, // string
{{STRING("{}"), STRING("$[0].k1")}, Null()}, // empty object
{{STRING(R"({"k1":"v31", "k2": 300})"), STRING("$[0].k1")}, Null()}, // object
{{STRING("[]"), STRING("$[0].k1")}, Null()}, // empty array
{{STRING("[123, 456]"), STRING("$[0].k1")}, Null()}, // int array
{{STRING(R"(["abc", "def"])"), STRING("$[0].k1")}, Null()}, // string array
{{STRING(R"([null, true, false, 100, 6.18, "abc"])"), STRING("$[0].k1")},
Null()}, // multi type array
{{STRING(R"([{"k1":"v41", "k2": 400}, 1, "a", 3.14])"), STRING("$[0].k1")},
Null()}, // complex array
{{STRING(R"([{"k1":"v41", "k2": 400}, 1, "a", 3.14])"), STRING("$[0].k2")},
BIGINT(400)}, // complex array
};
check_function<DataTypeInt64, true>(func_name, input_types, data_set);
}
TEST(FunctionJsonbTEST, GetJsonDoubleTest) {
std::string func_name = "get_json_double";
InputTypeSet input_types = {TypeIndex::JSONB, TypeIndex::String};
// get json from root
DataSet data_set = {
{{Null(), STRING("$")}, Null()},
{{STRING("null"), STRING("$")}, Null()},
{{STRING("true"), STRING("$")}, Null()},
{{STRING("false"), STRING("$")}, Null()},
{{STRING("100"), STRING("$")}, DOUBLE(100)}, //int8
{{STRING("10000"), STRING("$")}, DOUBLE(10000)}, // int16
{{STRING("1000000000"), STRING("$")}, DOUBLE(1000000000)}, // int32
{{STRING("1152921504606846976"), STRING("$")}, DOUBLE(1152921504606846976)}, // int64
{{STRING("6.18"), STRING("$")}, DOUBLE(6.18)}, // double
{{STRING(R"("abcd")"), STRING("$")}, Null()}, // string
{{STRING("{}"), STRING("$")}, Null()}, // empty object
{{STRING(R"({"k1":"v31", "k2": 300})"), STRING("$")}, Null()}, // object
{{STRING("[]"), STRING("$")}, Null()}, // empty array
{{STRING("[123, 456]"), STRING("$")}, Null()}, // int array
{{STRING(R"(["abc", "def"])"), STRING("$")}, Null()}, // string array
{{STRING(R"([null, true, false, 100, 6.18, "abc"])"), STRING("$")},
Null()}, // multi type array
{{STRING(R"([{"k1":"v41", "k2": 400}, 1, "a", 3.14])"), STRING("$")},
Null()}, // complex array
};
check_function<DataTypeFloat64, true>(func_name, input_types, data_set);
// get json from obejct
data_set = {
{{STRING("null"), STRING("$.k1")}, Null()},
{{STRING("true"), STRING("$.k1")}, Null()},
{{STRING("false"), STRING("$.k1")}, Null()},
{{STRING("100"), STRING("$.k1")}, Null()}, //int8
{{STRING("10000"), STRING("$.k1")}, Null()}, // int16
{{STRING("1000000000"), STRING("$.k1")}, Null()}, // int32
{{STRING("1152921504606846976"), STRING("$.k1")}, Null()}, // int64
{{STRING("6.18"), STRING("$.k1")}, Null()}, // double
{{STRING(R"("abcd")"), STRING("$.k1")}, Null()}, // string
{{STRING("{}"), STRING("$.k1")}, Null()}, // empty object
{{STRING(R"({"k1":"v31", "k2": 300})"), STRING("$.k1")}, Null()}, // object
{{STRING("[]"), STRING("$.k1")}, Null()}, // empty array
{{STRING("[123, 456]"), STRING("$.k1")}, Null()}, // int array
{{STRING(R"(["abc", "def"])"), STRING("$.k1")}, Null()}, // string array
{{STRING(R"([null, true, false, 100, 6.18, "abc"])"), STRING("$.k1")},
Null()}, // multi type array
{{STRING(R"([{"k1":"v41", "k2": 400}, 1, "a", 3.14])"), STRING("$.k1")},
Null()}, // complex array
};
check_function<DataTypeFloat64, true>(func_name, input_types, data_set);
// get json from array
data_set = {
{{STRING("null"), STRING("$[0]")}, Null()},
{{STRING("true"), STRING("$[0]")}, Null()},
{{STRING("false"), STRING("$[0]")}, Null()},
{{STRING("100"), STRING("$[0]")}, Null()}, //int8
{{STRING("10000"), STRING("$[0]")}, Null()}, // int16
{{STRING("1000000000"), STRING("$[0]")}, Null()}, // int32
{{STRING("1152921504606846976"), STRING("$[0]")}, Null()}, // int64
{{STRING("6.18"), STRING("$[0]")}, Null()}, // double
{{STRING(R"("abcd")"), STRING("$[0]")}, Null()}, // string
{{STRING("{}"), STRING("$[0]")}, Null()}, // empty object
{{STRING(R"({"k1":"v31", "k2": 300})"), STRING("$[0]")}, Null()}, // object
{{STRING("[]"), STRING("$[0]")}, Null()}, // empty array
{{STRING("null"), STRING("$[1]")}, Null()},
{{STRING("true"), STRING("$[1]")}, Null()},
{{STRING("false"), STRING("$[1]")}, Null()},
{{STRING("100"), STRING("$[1]")}, Null()}, //int8
{{STRING("10000"), STRING("$[1]")}, Null()}, // int16
{{STRING("1000000000"), STRING("$[1]")}, Null()}, // int32
{{STRING("1152921504606846976"), STRING("$[1]")}, Null()}, // int64
{{STRING("6.18"), STRING("$[1]")}, Null()}, // double
{{STRING(R"("abcd")"), STRING("$[1]")}, Null()}, // string
{{STRING("{}"), STRING("$[1]")}, Null()}, // empty object
{{STRING(R"({"k1":"v31", "k2": 300})"), STRING("$[1]")}, Null()}, // object
{{STRING("[]"), STRING("$[1]")}, Null()}, // empty array
{{STRING("[123, 456]"), STRING("$[0]")}, DOUBLE(123)}, // int array
{{STRING("[123, 456]"), STRING("$[1]")}, DOUBLE(456)}, // int array
{{STRING("[123, 456]"), STRING("$[2]")}, Null()}, // int array
{{STRING(R"(["abc", "def"])"), STRING("$[0]")}, Null()}, // string array
{{STRING(R"(["abc", "def"])"), STRING("$[1]")}, Null()}, // string array
{{STRING(R"(["abc", "def"])"), STRING("$[2]")}, Null()}, // string array
{{STRING(R"([null, true, false, 100, 6.18, "abc"])"), STRING("$[0]")},
Null()}, // multi type array
{{STRING(R"([null, true, false, 100, 6.18, "abc"])"), STRING("$[1]")},
Null()}, // multi type array
{{STRING(R"([null, true, false, 100, 6.18, "abc"])"), STRING("$[2]")},
Null()}, // multi type array
{{STRING(R"([null, true, false, 100, 6.18, "abc"])"), STRING("$[3]")},
DOUBLE(100)}, // multi type array
{{STRING(R"([null, true, false, 100, 6.18, "abc"])"), STRING("$[4]")},
DOUBLE(6.18)}, // multi type array
{{STRING(R"([null, true, false, 100, 6.18, "abc"])"), STRING("$[5]")},
Null()}, // multi type array
{{STRING(R"([null, true, false, 100, 6.18, "abc"])"), STRING("$[6]")},
Null()}, // multi type array
{{STRING(R"([{"k1":"v41", "k2": 400}, 1, "a", 3.14])"), STRING("$[0]")},
Null()}, // complex array
{{STRING(R"([{"k1":"v41", "k2": 400}, 1, "a", 3.14])"), STRING("$[1]")},
DOUBLE(1)}, // complex array
{{STRING(R"([{"k1":"v41", "k2": 400}, 1, "a", 3.14])"), STRING("$[2]")},
Null()}, // complex array
{{STRING(R"([{"k1":"v41", "k2": 400}, 1, "a", 3.14])"), STRING("$[3]")},
DOUBLE(3.14)}, // complex array
{{STRING(R"([{"k1":"v41", "k2": 400}, 1, "a", 3.14])"), STRING("$[4]")},
Null()}, // complex array
};
check_function<DataTypeFloat64, true>(func_name, input_types, data_set);
// get json with path $[0].k1
data_set = {
{{STRING("null"), STRING("$[0].k1")}, Null()},
{{STRING("true"), STRING("$[0].k1")}, Null()},
{{STRING("false"), STRING("$[0].k1")}, Null()},
{{STRING("100"), STRING("$[0].k1")}, Null()}, //int8
{{STRING("10000"), STRING("$[0].k1")}, Null()}, // int16
{{STRING("1000000000"), STRING("$[0].k1")}, Null()}, // int32
{{STRING("1152921504606846976"), STRING("$[0].k1")}, Null()}, // int64
{{STRING("6.18"), STRING("$[0].k1")}, Null()}, // double
{{STRING(R"("abcd")"), STRING("$[0].k1")}, Null()}, // string
{{STRING("{}"), STRING("$[0].k1")}, Null()}, // empty object
{{STRING(R"({"k1":"v31", "k2": 300})"), STRING("$[0].k1")}, Null()}, // object
{{STRING("[]"), STRING("$[0].k1")}, Null()}, // empty array
{{STRING("[123, 456]"), STRING("$[0].k1")}, Null()}, // int array
{{STRING(R"(["abc", "def"])"), STRING("$[0].k1")}, Null()}, // string array
{{STRING(R"([null, true, false, 100, 6.18, "abc"])"), STRING("$[0].k1")},
Null()}, // multi type array
{{STRING(R"([{"k1":"v41", "k2": 400}, 1, "a", 3.14])"), STRING("$[0].k1")},
Null()}, // complex array
{{STRING(R"([{"k1":"v41", "k2": 400}, 1, "a", 3.14])"), STRING("$[0].k2")},
DOUBLE(400)}, // complex array
};
check_function<DataTypeFloat64, true>(func_name, input_types, data_set);
}
} // namespace doris::vectorized

View File

@ -1781,6 +1781,14 @@ visible_functions = {
[['get_json_string'], 'STRING', ['STRING', 'STRING'], 'ALWAYS_NULLABLE'],
[['get_json_bigint'], 'BIGINT', ['VARCHAR', 'VARCHAR'], 'ALWAYS_NULLABLE'],
[['get_json_bigint'], 'BIGINT', ['STRING', 'STRING'], 'ALWAYS_NULLABLE'],
[['get_json_string'], 'STRING', ['JSONB', 'VARCHAR'], 'ALWAYS_NULLABLE'],
[['get_json_string'], 'STRING', ['JSONB', 'STRING'], 'ALWAYS_NULLABLE'],
[['get_json_int'], 'INT', ['JSONB', 'VARCHAR'], 'ALWAYS_NULLABLE'],
[['get_json_int'], 'INT', ['JSONB', 'STRING'], 'ALWAYS_NULLABLE'],
[['get_json_double'], 'DOUBLE', ['JSONB', 'VARCHAR'], 'ALWAYS_NULLABLE'],
[['get_json_double'], 'DOUBLE', ['JSONB', 'STRING'], 'ALWAYS_NULLABLE'],
[['get_json_bigint'], 'BIGINT', ['JSONB', 'VARCHAR'], 'ALWAYS_NULLABLE'],
[['get_json_bigint'], 'BIGINT', ['JSONB', 'STRING'], 'ALWAYS_NULLABLE'],
[['json_array'], 'VARCHAR', ['VARCHAR', '...'], 'ALWAYS_NOT_NULLABLE'],
[['json_object'], 'VARCHAR', ['VARCHAR', '...'], 'ALWAYS_NOT_NULLABLE'],