From 1bd14f1d82a25fc0e48fa0cd19a4622ba48f0013 Mon Sep 17 00:00:00 2001 From: Kang Date: Wed, 12 Oct 2022 13:56:37 +0800 Subject: [PATCH] [feature-wip](jsonb) jsonb parse function and load (#13129) add function to parse json string to jsonb format and use it to support stream load. --- be/src/exprs/anyval_util.cpp | 4 + be/src/util/jsonb_parser.h | 30 +- be/src/util/jsonb_writer.h | 44 +- be/src/vec/CMakeLists.txt | 1 + be/src/vec/functions/function_jsonb.cpp | 314 +++++++++++ .../vec/functions/simple_function_factory.h | 2 + be/src/vec/sink/vtablet_sink.cpp | 17 + be/test/CMakeLists.txt | 1 + be/test/vec/function/function_jsonb_test.cpp | 512 ++++++++++++++++++ be/test/vec/function/function_test_util.h | 62 ++- .../apache/doris/planner/LoadScanNode.java | 18 +- gensrc/script/doris_builtins_functions.py | 35 ++ 12 files changed, 1007 insertions(+), 33 deletions(-) create mode 100644 be/src/vec/functions/function_jsonb.cpp create mode 100644 be/test/vec/function/function_jsonb_test.cpp diff --git a/be/src/exprs/anyval_util.cpp b/be/src/exprs/anyval_util.cpp index 1c3765578a..346da87317 100644 --- a/be/src/exprs/anyval_util.cpp +++ b/be/src/exprs/anyval_util.cpp @@ -216,6 +216,10 @@ FunctionContext::TypeDesc AnyValUtil::column_type_to_type_desc(const TypeDescrip out.type = FunctionContext::TYPE_STRING; out.len = type.len; break; + case TYPE_JSONB: + out.type = FunctionContext::TYPE_JSONB; + out.len = type.len; + break; default: DCHECK(false) << "Unknown type: " << type; } diff --git a/be/src/util/jsonb_parser.h b/be/src/util/jsonb_parser.h index e72e9c5d7c..d261a0c532 100644 --- a/be/src/util/jsonb_parser.h +++ b/be/src/util/jsonb_parser.h @@ -122,7 +122,8 @@ public: skipChar(in); res = parseArray(in, handler); } else { - err_ = handle_parse_failure(in); + res = parsePrimitive(in, handler); + if (!res) err_ = handle_parse_failure(in); } trim(in); @@ -211,6 +212,33 @@ private: return error; } + // parse primitive + bool parsePrimitive(std::istream& in, hDictInsert handler) { + bool res = false; + switch (in.peek()) { + case 'n': + skipChar(in); + res = parseNull(in); + break; + case 't': + skipChar(in); + res = parseTrue(in); + break; + case 'f': + skipChar(in); + res = parseFalse(in); + break; + case '"': + skipChar(in); + res = parseString(in); + break; + default: + res = parseNumber(in); + } + + return res; + } + // parse a JSON object (comma-separated list of key-value pairs) bool parseObject(std::istream& in, hDictInsert handler) { if (!writer_.writeStartObject()) { diff --git a/be/src/util/jsonb_writer.h b/be/src/util/jsonb_writer.h index 5073d34648..ccce7d98e0 100644 --- a/be/src/util/jsonb_writer.h +++ b/be/src/util/jsonb_writer.h @@ -66,6 +66,7 @@ public: os_->seekp(0); hasHdr_ = false; kvState_ = WS_Value; + first_ = true; for (; !stack_.empty(); stack_.pop()) ; } @@ -125,8 +126,24 @@ public: return 0; } + bool writeFirstHeader() { + if (first_ && stack_.empty()) { + first_ = false; + // if this is a new JSONB, write the header + if (!hasHdr_) { + writeHeader(); + return true; + } else { + return false; + } + } else { + return true; + } + } + uint32_t writeNull() { - if (!stack_.empty() && verifyValueState()) { + if ((first_ && stack_.empty()) || (!stack_.empty() && verifyValueState())) { + if (!writeFirstHeader()) return 0; os_->put((JsonbTypeUnder)JsonbType::T_Null); kvState_ = WS_Value; return sizeof(JsonbValue); @@ -136,7 +153,8 @@ public: } uint32_t writeBool(bool b) { - if (!stack_.empty() && verifyValueState()) { + if ((first_ && stack_.empty()) || (!stack_.empty() && verifyValueState())) { + if (!writeFirstHeader()) return 0; if (b) { os_->put((JsonbTypeUnder)JsonbType::T_True); } else { @@ -168,7 +186,8 @@ public: } uint32_t writeInt8(int8_t v) { - if (!stack_.empty() && verifyValueState()) { + if ((first_ && stack_.empty()) || (!stack_.empty() && verifyValueState())) { + if (!writeFirstHeader()) return 0; os_->put((JsonbTypeUnder)JsonbType::T_Int8); os_->put(v); kvState_ = WS_Value; @@ -179,7 +198,8 @@ public: } uint32_t writeInt16(int16_t v) { - if (!stack_.empty() && verifyValueState()) { + if ((first_ && stack_.empty()) || (!stack_.empty() && verifyValueState())) { + if (!writeFirstHeader()) return 0; os_->put((JsonbTypeUnder)JsonbType::T_Int16); os_->write((char*)&v, sizeof(int16_t)); kvState_ = WS_Value; @@ -190,7 +210,8 @@ public: } uint32_t writeInt32(int32_t v) { - if (!stack_.empty() && verifyValueState()) { + if ((first_ && stack_.empty()) || (!stack_.empty() && verifyValueState())) { + if (!writeFirstHeader()) return 0; os_->put((JsonbTypeUnder)JsonbType::T_Int32); os_->write((char*)&v, sizeof(int32_t)); kvState_ = WS_Value; @@ -201,7 +222,8 @@ public: } uint32_t writeInt64(int64_t v) { - if (!stack_.empty() && verifyValueState()) { + if ((first_ && stack_.empty()) || (!stack_.empty() && verifyValueState())) { + if (!writeFirstHeader()) return 0; os_->put((JsonbTypeUnder)JsonbType::T_Int64); os_->write((char*)&v, sizeof(int64_t)); kvState_ = WS_Value; @@ -212,7 +234,8 @@ public: } uint32_t writeDouble(double v) { - if (!stack_.empty() && verifyValueState()) { + if ((first_ && stack_.empty()) || (!stack_.empty() && verifyValueState())) { + if (!writeFirstHeader()) return 0; os_->put((JsonbTypeUnder)JsonbType::T_Double); os_->write((char*)&v, sizeof(double)); kvState_ = WS_Value; @@ -224,7 +247,8 @@ public: // must call writeStartString before writing a string val bool writeStartString() { - if (!stack_.empty() && verifyValueState()) { + if ((first_ && stack_.empty()) || (!stack_.empty() && verifyValueState())) { + if (!writeFirstHeader()) return 0; os_->put((JsonbTypeUnder)JsonbType::T_String); str_pos_ = os_->tellp(); @@ -280,7 +304,8 @@ public: // must call writeStartBinary before writing a binary val bool writeStartBinary() { - if (!stack_.empty() && verifyValueState()) { + if ((first_ && stack_.empty()) || (!stack_.empty() && verifyValueState())) { + if (!writeFirstHeader()) return 0; os_->put((JsonbTypeUnder)JsonbType::T_Binary); str_pos_ = os_->tellp(); @@ -497,6 +522,7 @@ private: WriteState kvState_; // key or value state std::streampos str_pos_; std::stack stack_; + bool first_ = true; }; typedef JsonbWriterT JsonbWriter; diff --git a/be/src/vec/CMakeLists.txt b/be/src/vec/CMakeLists.txt index d91eec109f..d35b664f3c 100644 --- a/be/src/vec/CMakeLists.txt +++ b/be/src/vec/CMakeLists.txt @@ -173,6 +173,7 @@ set(VEC_FILES functions/function_utility.cpp functions/comparison_equal_for_null.cpp functions/function_json.cpp + functions/function_jsonb.cpp functions/function_datetime_floor_ceil.cpp functions/functions_geo.cpp functions/hll_cardinality.cpp diff --git a/be/src/vec/functions/function_jsonb.cpp b/be/src/vec/functions/function_jsonb.cpp new file mode 100644 index 0000000000..39cf25de81 --- /dev/null +++ b/be/src/vec/functions/function_jsonb.cpp @@ -0,0 +1,314 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include +#include + +#include "exprs/json_functions.h" +#include "util/string_parser.hpp" +#include "util/string_util.h" +#include "vec/columns/column.h" +#include "vec/columns/column_jsonb.h" +#include "vec/columns/column_nullable.h" +#include "vec/columns/column_string.h" +#include "vec/columns/column_vector.h" +#include "vec/common/string_ref.h" +#include "vec/data_types/data_type_jsonb.h" +#include "vec/data_types/data_type_number.h" +#include "vec/data_types/data_type_string.h" +#include "vec/functions/function_string.h" +#include "vec/functions/function_totype.h" +#include "vec/functions/simple_function_factory.h" +#include "vec/utils/template_helpers.hpp" + +namespace doris::vectorized { + +enum class NullalbeMode { + NULLABLE = 0, + NOT_NULL, + FOLLOW_INPUT, +}; + +enum class JsonbParseErrorMode { FAIL = 0, RETURN_NULL, RETURN_VALUE, RETURN_INVALID }; + +// func(string,string) -> json +template +class FunctionJsonbParseBase : public IFunction { +private: + JsonbParser default_value_parser; + bool has_const_default_value = false; + +public: + static constexpr auto name = "jsonb_parse"; + static FunctionPtr create() { return std::make_shared(); } + + String get_name() const override { + String nullable; + switch (nullable_mode) { + case NullalbeMode::NULLABLE: + nullable = "_nullable"; + break; + case NullalbeMode::NOT_NULL: + nullable = "_notnull"; + break; + case NullalbeMode::FOLLOW_INPUT: + nullable = ""; + break; + } + + String error_mode; + switch (parse_error_handle_mode) { + case JsonbParseErrorMode::FAIL: + break; + case JsonbParseErrorMode::RETURN_NULL: + error_mode = "_error_to_null"; + break; + case JsonbParseErrorMode::RETURN_VALUE: + error_mode = "_error_to_value"; + break; + case JsonbParseErrorMode::RETURN_INVALID: + error_mode = "_error_to_invalid"; + break; + } + + return name + nullable + error_mode; + } + + size_t get_number_of_arguments() const override { + switch (parse_error_handle_mode) { + case JsonbParseErrorMode::FAIL: + return 1; + case JsonbParseErrorMode::RETURN_NULL: + return 1; + case JsonbParseErrorMode::RETURN_VALUE: + return 2; + case JsonbParseErrorMode::RETURN_INVALID: + return 1; + } + } + + DataTypePtr get_return_type_impl(const DataTypes& arguments) const override { + bool is_nullable = true; + switch (nullable_mode) { + case NullalbeMode::NULLABLE: + is_nullable = true; + break; + case NullalbeMode::NOT_NULL: + is_nullable = false; + break; + case NullalbeMode::FOLLOW_INPUT: + is_nullable = arguments[0]->is_nullable(); + break; + } + + return is_nullable ? make_nullable(std::make_shared()) + : std::make_shared(); + } + + bool use_default_implementation_for_nulls() const override { return false; } + + bool use_default_implementation_for_constants() const override { return true; } + + Status prepare(FunctionContext* context, FunctionContext::FunctionStateScope scope) override { + if constexpr (parse_error_handle_mode == JsonbParseErrorMode::RETURN_VALUE) { + if (context->is_col_constant(1)) { + const auto default_value_col = context->get_constant_col(1)->column_ptr; + const auto& default_value = default_value_col->get_data_at(0); + + JsonbErrType error = JsonbErrType::E_NONE; + if (!default_value_parser.parse(default_value.data, default_value.size)) { + error = default_value_parser.getErrorCode(); + return Status::InvalidArgument( + "invalid default json value: {} , error: {}", + std::string_view(default_value.data, default_value.size), + JsonbErrMsg::getErrMsg(error)); + } + has_const_default_value = true; + } + } + return Status::OK(); + } + + Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments, + size_t result, size_t input_rows_count) override { + const IColumn& col_from = *(block.get_by_position(arguments[0]).column); + + auto null_map = ColumnUInt8::create(0, 0); + bool is_nullable = false; + switch (nullable_mode) { + case NullalbeMode::NULLABLE: { + is_nullable = true; + null_map = ColumnUInt8::create(input_rows_count, 0); + break; + } + case NullalbeMode::NOT_NULL: + is_nullable = false; + break; + case NullalbeMode::FOLLOW_INPUT: { + auto argument_column = col_from.convert_to_full_column_if_const(); + if (auto* nullable = check_and_get_column(*argument_column)) { + is_nullable = true; + null_map = ColumnUInt8::create(input_rows_count, 0); + // Danger: Here must dispose the null map data first! Because + // argument_columns[i]=nullable->get_nested_column_ptr(); will release the mem + // of column nullable mem of null map + VectorizedUtils::update_null_map(null_map->get_data(), + nullable->get_null_map_data()); + argument_column = nullable->get_nested_column_ptr(); + } + break; + } + } + + // const auto& col_with_type_and_name = block.get_by_position(arguments[0]); + + // const IColumn& col_from = *col_with_type_and_name.column; + + const ColumnString* col_from_string = check_and_get_column(col_from); + if (auto* nullable = check_and_get_column(col_from)) { + col_from_string = + check_and_get_column(*nullable->get_nested_column_ptr()); + } + + if (!col_from_string) { + return Status::RuntimeError("Illegal column {} should be ColumnString", + col_from.get_name()); + } + + auto col_to = ColumnJsonb::create(); + + //IColumn & col_to = *res; + size_t size = col_from.size(); + col_to->reserve(size); + + for (size_t i = 0; i < input_rows_count; ++i) { + if (col_from.is_null_at(i)) { + null_map->get_data()[i] = 1; + col_to->insert_data("", 0); + continue; + } + + const auto& val = col_from_string->get_data_at(i); + JsonbParser parser; + JsonbErrType error = JsonbErrType::E_NONE; + if (parser.parse(val.data, val.size)) { + // insert jsonb format data + col_to->insert_data(parser.getWriter().getOutput()->getBuffer(), + (size_t)parser.getWriter().getOutput()->getSize()); + } else { + error = parser.getErrorCode(); + LOG(WARNING) << "json parse error: " << JsonbErrMsg::getErrMsg(error) + << " for value: " << std::string_view(val.data, val.size); + + switch (parse_error_handle_mode) { + case JsonbParseErrorMode::FAIL: + return Status::InvalidArgument("json parse error: {} for value: {}", + JsonbErrMsg::getErrMsg(error), + std::string_view(val.data, val.size)); + case JsonbParseErrorMode::RETURN_NULL: { + if (is_nullable) null_map->get_data()[i] = 1; + col_to->insert_data("", 0); + continue; + } + case JsonbParseErrorMode::RETURN_VALUE: { + if (has_const_default_value) { + col_to->insert_data( + default_value_parser.getWriter().getOutput()->getBuffer(), + (size_t)default_value_parser.getWriter().getOutput()->getSize()); + } else { + auto val = block.get_by_position(arguments[1]).column->get_data_at(i); + if (parser.parse(val.data, val.size)) { + // insert jsonb format data + col_to->insert_data(parser.getWriter().getOutput()->getBuffer(), + (size_t)parser.getWriter().getOutput()->getSize()); + } else { + return Status::InvalidArgument( + "json parse error: {} for default value: {}", + JsonbErrMsg::getErrMsg(error), + std::string_view(val.data, val.size)); + } + } + continue; + } + case JsonbParseErrorMode::RETURN_INVALID: + col_to->insert_data("", 0); + continue; + } + } + } + + if (is_nullable) { + block.replace_by_position( + result, ColumnNullable::create(std::move(col_to), std::move(null_map))); + } else { + block.replace_by_position(result, std::move(col_to)); + } + + return Status::OK(); + } +}; + +// jsonb_parse return type nullable as input +using FunctionJsonbParse = + FunctionJsonbParseBase; +using FunctionJsonbParseErrorNull = + FunctionJsonbParseBase; +using FunctionJsonbParseErrorValue = + FunctionJsonbParseBase; +using FunctionJsonbParseErrorInvalid = + FunctionJsonbParseBase; + +// jsonb_parse return type is nullable +using FunctionJsonbParseNullable = + FunctionJsonbParseBase; +using FunctionJsonbParseNullableErrorNull = + FunctionJsonbParseBase; +using FunctionJsonbParseNullableErrorValue = + FunctionJsonbParseBase; +using FunctionJsonbParseNullableErrorInvalid = + FunctionJsonbParseBase; + +// jsonb_parse return type is not nullable +using FunctionJsonbParseNotnull = + FunctionJsonbParseBase; +using FunctionJsonbParseNotnullErrorValue = + FunctionJsonbParseBase; +using FunctionJsonbParseNotnullErrorInvalid = + FunctionJsonbParseBase; + +void register_function_jsonb(SimpleFunctionFactory& factory) { + factory.register_function("jsonb_parse"); + factory.register_function("jsonb_parse_error_to_null"); + factory.register_function("jsonb_parse_error_to_value"); + factory.register_function("jsonb_parse_error_to_invalid"); + + factory.register_function("jsonb_parse_nullable"); + factory.register_function( + "jsonb_parse_nullable_error_to_null"); + factory.register_function( + "jsonb_parse_nullable_error_to_value"); + factory.register_function( + "jsonb_parse_nullable_error_to_invalid"); + + factory.register_function("jsonb_parse_notnull"); + factory.register_function( + "jsonb_parse_notnull_error_to_value"); + factory.register_function( + "jsonb_parse_notnull_error_to_invalid"); +} + +} // namespace doris::vectorized diff --git a/be/src/vec/functions/simple_function_factory.h b/be/src/vec/functions/simple_function_factory.h index a30fe275b0..aef1029449 100644 --- a/be/src/vec/functions/simple_function_factory.h +++ b/be/src/vec/functions/simple_function_factory.h @@ -63,6 +63,7 @@ void register_function_date_time_computation(SimpleFunctionFactory& factory); void register_function_timestamp(SimpleFunctionFactory& factory); void register_function_utility(SimpleFunctionFactory& factory); void register_function_json(SimpleFunctionFactory& factory); +void register_function_jsonb(SimpleFunctionFactory& factory); void register_function_hash(SimpleFunctionFactory& factory); void register_function_ifnull(SimpleFunctionFactory& factory); void register_function_like(SimpleFunctionFactory& factory); @@ -196,6 +197,7 @@ public: register_function_date_time_to_string(instance); register_function_date_time_string_to_string(instance); register_function_json(instance); + register_function_jsonb(instance); register_function_hash(instance); register_function_ifnull(instance); register_function_comparison_eq_for_null(instance); diff --git a/be/src/vec/sink/vtablet_sink.cpp b/be/src/vec/sink/vtablet_sink.cpp index 22ffc8580f..bf4bae6dc1 100644 --- a/be/src/vec/sink/vtablet_sink.cpp +++ b/be/src/vec/sink/vtablet_sink.cpp @@ -24,6 +24,7 @@ #include "util/proto_util.h" #include "util/time.h" #include "vec/columns/column_array.h" +#include "vec/columns/column_jsonb.h" #include "vec/core/block.h" #include "vec/exprs/vexpr.h" #include "vec/exprs/vexpr_context.h" @@ -644,6 +645,22 @@ Status VOlapTableSink::_validate_column(RuntimeState* state, const TypeDescripto } break; } + case TYPE_JSONB: { + const auto column_jsonb = + assert_cast(real_column_ptr.get()); + for (size_t j = 0; j < column->size(); ++j) { + if (!filter_bitmap->Get(j)) { + auto str_val = column_jsonb->get_data_at(j); + bool invalid = str_val.size == 0; + if (invalid) { + error_msg.clear(); + fmt::format_to(error_msg, "{}", "jsonb with size 0 is invalid"); + RETURN_IF_ERROR(set_invalid_and_append_error_msg(j)); + } + } + } + break; + } case TYPE_DECIMALV2: { auto column_decimal = const_cast*>( assert_cast*>( diff --git a/be/test/CMakeLists.txt b/be/test/CMakeLists.txt index 88ddab1715..a5971dbaf1 100644 --- a/be/test/CMakeLists.txt +++ b/be/test/CMakeLists.txt @@ -355,6 +355,7 @@ set(VEC_TEST_FILES vec/function/function_like_test.cpp vec/function/function_arithmetic_test.cpp vec/function/function_json_test.cpp + vec/function/function_jsonb_test.cpp vec/function/function_geo_test.cpp vec/function/function_test_util.cpp vec/function/table_function_test.cpp diff --git a/be/test/vec/function/function_jsonb_test.cpp b/be/test/vec/function/function_jsonb_test.cpp new file mode 100644 index 0000000000..68f58e9bc4 --- /dev/null +++ b/be/test/vec/function/function_jsonb_test.cpp @@ -0,0 +1,512 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include + +#include "function_test_util.h" +#include "vec/data_types/data_type_jsonb.h" +#include "vec/data_types/data_type_number.h" +#include "vec/data_types/data_type_string.h" + +namespace doris::vectorized { +using namespace ut_type; + +TEST(FunctionJsonbTEST, JsonbParseTest) { + std::string func_name = "jsonb_parse"; + InputTypeSet input_types = {TypeIndex::String}; + + DataSet data_set_valid = { + {{STRING("null")}, STRING("null")}, + {{STRING("true")}, STRING("true")}, + {{STRING("false")}, STRING("false")}, + {{STRING("100")}, STRING("100")}, //int8 + {{STRING("10000")}, STRING("10000")}, // int16 + {{STRING("1073741820")}, STRING("1073741820")}, // int32 + {{STRING("1152921504606846976")}, STRING("1152921504606846976")}, // int64 + {{STRING("6.18")}, STRING("6.18")}, // double + {{STRING(R"("abcd")")}, STRING(R"("abcd")")}, // string + {{STRING("{}")}, STRING("{}")}, // empty object + {{STRING(R"({"k1":"v31", "k2": 300})")}, STRING(R"({"k1":"v31","k2":300})")}, // object + {{STRING("[]")}, STRING("[]")}, // empty array + {{STRING("[123, 456]")}, STRING("[123,456]")}, // int array + {{STRING(R"(["abc", "def"])")}, STRING(R"(["abc","def"])")}, // string array + {{STRING(R"([null, true, false, 100, 6.18, "abc"])")}, + STRING(R"([null,true,false,100,6.18,"abc"])")}, // multi type array + {{STRING(R"([{"k1":"v41", "k2": 400}, 1, "a", 3.14])")}, + STRING(R"([{"k1":"v41","k2":400},1,"a",3.14])")}, // complex array + }; + + auto st = check_function(func_name, input_types, data_set_valid); + EXPECT_EQ(Status::OK(), st); + + DataSet data_set_invalid = { + {{STRING("abc")}, Null()}, // invalid string + }; + st = check_function(func_name, input_types, data_set_invalid); + EXPECT_NE(Status::OK(), st); + + data_set_invalid = { + {{STRING("'abc'")}, Null()}, // invalid string + }; + st = check_function(func_name, input_types, data_set_invalid); + EXPECT_NE(Status::OK(), st); + + data_set_invalid = { + {{STRING("100x")}, Null()}, // invalid int + }; + st = check_function(func_name, input_types, data_set_invalid); + EXPECT_NE(Status::OK(), st); + + data_set_invalid = { + {{STRING("6.a8")}, Null()}, // invalid double + }; + st = check_function(func_name, input_types, data_set_invalid); + EXPECT_NE(Status::OK(), st); + + data_set_invalid = { + {{STRING("{x")}, Null()}, // invalid object + }; + st = check_function(func_name, input_types, data_set_invalid); + EXPECT_NE(Status::OK(), st); + + data_set_invalid = { + {{STRING("[123, abc]")}, Null()} // invalid array + }; + st = check_function(func_name, input_types, data_set_invalid); + EXPECT_NE(Status::OK(), st); +} + +TEST(FunctionJsonbTEST, JsonbParseErrorToNullTest) { + std::string func_name = "jsonb_parse_error_to_null"; + InputTypeSet input_types = {TypeIndex::String}; + + DataSet data_set = { + {{STRING("null")}, STRING("null")}, + {{STRING("true")}, STRING("true")}, + {{STRING("false")}, STRING("false")}, + {{STRING("100")}, STRING("100")}, //int8 + {{STRING("10000")}, STRING("10000")}, // int16 + {{STRING("1073741820")}, STRING("1073741820")}, // int32 + {{STRING("1152921504606846976")}, STRING("1152921504606846976")}, // int64 + {{STRING("6.18")}, STRING("6.18")}, // double + {{STRING(R"("abcd")")}, STRING(R"("abcd")")}, // string + {{STRING("{}")}, STRING("{}")}, // empty object + {{STRING(R"({"k1":"v31", "k2": 300})")}, STRING(R"({"k1":"v31","k2":300})")}, // object + {{STRING("[]")}, STRING("[]")}, // empty array + {{STRING("[123, 456]")}, STRING("[123,456]")}, // int array + {{STRING(R"(["abc", "def"])")}, STRING(R"(["abc","def"])")}, // string array + {{STRING(R"([null, true, false, 100, 6.18, "abc"])")}, + STRING(R"([null,true,false,100,6.18,"abc"])")}, // multi type array + {{STRING(R"([{"k1":"v41", "k2": 400}, 1, "a", 3.14])")}, + STRING(R"([{"k1":"v41","k2":400},1,"a",3.14])")}, // complex array + {{STRING("abc")}, Null()}, // invalid string + {{STRING("'abc'")}, Null()}, // invalid string + {{STRING("100x")}, Null()}, // invalid int + {{STRING("6.a8")}, Null()}, // invalid double + {{STRING("{x")}, Null()}, // invalid object + {{STRING("[123, abc]")}, Null()} // invalid array + }; + + auto st = check_function(func_name, input_types, data_set); + EXPECT_EQ(Status::OK(), st); +} + +TEST(FunctionJsonbTEST, JsonbParseErrorToValueTest) { + std::string func_name = "jsonb_parse_error_to_value"; + InputTypeSet input_types = {TypeIndex::String, TypeIndex::String}; + + DataSet data_set = { + {{STRING("null"), STRING("{}")}, STRING("null")}, + {{STRING("true"), STRING("{}")}, STRING("true")}, + {{STRING("false"), STRING("{}")}, STRING("false")}, + {{STRING("100"), STRING("{}")}, STRING("100")}, //int8 + {{STRING("10000"), STRING("{}")}, STRING("10000")}, // int16 + {{STRING("1073741820"), STRING("{}")}, STRING("1073741820")}, // int32 + {{STRING("1152921504606846976"), STRING("{}")}, STRING("1152921504606846976")}, // int64 + {{STRING("6.18"), STRING("{}")}, STRING("6.18")}, // double + {{STRING(R"("abcd")"), STRING("{}")}, STRING(R"("abcd")")}, // string + {{STRING("{}"), STRING("{}")}, STRING("{}")}, // empty object + {{STRING(R"({"k1":"v31", "k2": 300})"), STRING("{}")}, + STRING(R"({"k1":"v31","k2":300})")}, // object + {{STRING("[]"), STRING("{}")}, STRING("[]")}, // empty array + {{STRING("[123, 456]"), STRING("{}")}, STRING("[123,456]")}, // int array + {{STRING(R"(["abc", "def"])"), STRING("{}")}, + STRING(R"(["abc","def"])")}, // string array + {{STRING(R"([null, true, false, 100, 6.18, "abc"])"), STRING("{}")}, + STRING(R"([null,true,false,100,6.18,"abc"])")}, // multi type array + {{STRING(R"([{"k1":"v41", "k2": 400}, 1, "a", 3.14])"), STRING("{}")}, + STRING(R"([{"k1":"v41","k2":400},1,"a",3.14])")}, // complex array + {{STRING("abc"), STRING(R"("abc")")}, STRING(R"("abc")")}, // invalid string + {{STRING("'abc'"), STRING(R"("abc")")}, STRING(R"("abc")")}, // invalid string + {{STRING("100x"), STRING("100")}, STRING("100")}, // invalid int + {{STRING("6.a8"), STRING("6.18")}, STRING("6.18")}, // invalid double + {{STRING("{x"), STRING("{}")}, STRING("{}")}, // invalid object + {{STRING("[123, abc]"), STRING(R"([123,"abc"])")}, + STRING(R"([123,"abc"])")} // invalid array + }; + + auto st = check_function(func_name, input_types, data_set); + EXPECT_EQ(Status::OK(), st); +} + +TEST(FunctionJsonbTEST, JsonbParseErrorToInvalidTest) { + std::string func_name = "jsonb_parse_error_to_invalid"; + InputTypeSet input_types = {TypeIndex::String}; + + DataSet data_set = { + {{STRING("null")}, STRING("null")}, + {{STRING("true")}, STRING("true")}, + {{STRING("false")}, STRING("false")}, + {{STRING("100")}, STRING("100")}, //int8 + {{STRING("10000")}, STRING("10000")}, // int16 + {{STRING("1073741820")}, STRING("1073741820")}, // int32 + {{STRING("1152921504606846976")}, STRING("1152921504606846976")}, // int64 + {{STRING("6.18")}, STRING("6.18")}, // double + {{STRING(R"("abcd")")}, STRING(R"("abcd")")}, // string + {{STRING("{}")}, STRING("{}")}, // empty object + {{STRING(R"({"k1":"v31", "k2": 300})")}, STRING(R"({"k1":"v31","k2":300})")}, // object + {{STRING("[]")}, STRING("[]")}, // empty array + {{STRING("[123, 456]")}, STRING("[123,456]")}, // int array + {{STRING(R"(["abc", "def"])")}, STRING(R"(["abc","def"])")}, // string array + {{STRING(R"([null, true, false, 100, 6.18, "abc"])")}, + STRING(R"([null,true,false,100,6.18,"abc"])")}, // multi type array + {{STRING(R"([{"k1":"v41", "k2": 400}, 1, "a", 3.14])")}, + STRING(R"([{"k1":"v41","k2":400},1,"a",3.14])")}, // complex array + {{STRING("abc")}, STRING("")}, // invalid string + {{STRING("'abc'")}, STRING("")}, // invalid string + {{STRING("100x")}, STRING("")}, // invalid int + {{STRING("6.a8")}, STRING("")}, // invalid double + {{STRING("{x")}, STRING("")}, // invalid object + {{STRING("[123, abc]")}, STRING("")} // invalid array + }; + + auto st = check_function(func_name, input_types, data_set); + EXPECT_EQ(Status::OK(), st); +} + +TEST(FunctionJsonbTEST, JsonbParseNullableTest) { + std::string func_name = "jsonb_parse_nullable"; + InputTypeSet input_types = {TypeIndex::String}; + + DataSet data_set_valid = { + {{STRING("null")}, STRING("null")}, + {{STRING("true")}, STRING("true")}, + {{STRING("false")}, STRING("false")}, + {{STRING("100")}, STRING("100")}, //int8 + {{STRING("10000")}, STRING("10000")}, // int16 + {{STRING("1073741820")}, STRING("1073741820")}, // int32 + {{STRING("1152921504606846976")}, STRING("1152921504606846976")}, // int64 + {{STRING("6.18")}, STRING("6.18")}, // double + {{STRING(R"("abcd")")}, STRING(R"("abcd")")}, // string + {{STRING("{}")}, STRING("{}")}, // empty object + {{STRING(R"({"k1":"v31", "k2": 300})")}, STRING(R"({"k1":"v31","k2":300})")}, // object + {{STRING("[]")}, STRING("[]")}, // empty array + {{STRING("[123, 456]")}, STRING("[123,456]")}, // int array + {{STRING(R"(["abc", "def"])")}, STRING(R"(["abc","def"])")}, // string array + {{STRING(R"([null, true, false, 100, 6.18, "abc"])")}, + STRING(R"([null,true,false,100,6.18,"abc"])")}, // multi type array + {{STRING(R"([{"k1":"v41", "k2": 400}, 1, "a", 3.14])")}, + STRING(R"([{"k1":"v41","k2":400},1,"a",3.14])")}, // complex array + }; + + auto st = check_function(func_name, input_types, data_set_valid); + EXPECT_EQ(Status::OK(), st); + + DataSet data_set_invalid = { + {{STRING("abc")}, Null()}, // invalid string + }; + st = check_function(func_name, input_types, data_set_invalid); + EXPECT_NE(Status::OK(), st); + + data_set_invalid = { + {{STRING("'abc'")}, Null()}, // invalid string + }; + st = check_function(func_name, input_types, data_set_invalid); + EXPECT_NE(Status::OK(), st); + + data_set_invalid = { + {{STRING("100x")}, Null()}, // invalid int + }; + st = check_function(func_name, input_types, data_set_invalid); + EXPECT_NE(Status::OK(), st); + + data_set_invalid = { + {{STRING("6.a8")}, Null()}, // invalid double + }; + st = check_function(func_name, input_types, data_set_invalid); + EXPECT_NE(Status::OK(), st); + + data_set_invalid = { + {{STRING("{x")}, Null()}, // invalid object + }; + st = check_function(func_name, input_types, data_set_invalid); + EXPECT_NE(Status::OK(), st); + + data_set_invalid = { + {{STRING("[123, abc]")}, Null()} // invalid array + }; + st = check_function(func_name, input_types, data_set_invalid); + EXPECT_NE(Status::OK(), st); +} + +TEST(FunctionJsonbTEST, JsonbParseNullableErrorToNullTest) { + std::string func_name = "jsonb_parse_nullable_error_to_null"; + InputTypeSet input_types = {TypeIndex::String}; + + DataSet data_set = { + {{STRING("null")}, STRING("null")}, + {{STRING("true")}, STRING("true")}, + {{STRING("false")}, STRING("false")}, + {{STRING("100")}, STRING("100")}, //int8 + {{STRING("10000")}, STRING("10000")}, // int16 + {{STRING("1073741820")}, STRING("1073741820")}, // int32 + {{STRING("1152921504606846976")}, STRING("1152921504606846976")}, // int64 + {{STRING("6.18")}, STRING("6.18")}, // double + {{STRING(R"("abcd")")}, STRING(R"("abcd")")}, // string + {{STRING("{}")}, STRING("{}")}, // empty object + {{STRING(R"({"k1":"v31", "k2": 300})")}, STRING(R"({"k1":"v31","k2":300})")}, // object + {{STRING("[]")}, STRING("[]")}, // empty array + {{STRING("[123, 456]")}, STRING("[123,456]")}, // int array + {{STRING(R"(["abc", "def"])")}, STRING(R"(["abc","def"])")}, // string array + {{STRING(R"([null, true, false, 100, 6.18, "abc"])")}, + STRING(R"([null,true,false,100,6.18,"abc"])")}, // multi type array + {{STRING(R"([{"k1":"v41", "k2": 400}, 1, "a", 3.14])")}, + STRING(R"([{"k1":"v41","k2":400},1,"a",3.14])")}, // complex array + {{STRING("abc")}, Null()}, // invalid string + {{STRING("'abc'")}, Null()}, // invalid string + {{STRING("100x")}, Null()}, // invalid int + {{STRING("6.a8")}, Null()}, // invalid double + {{STRING("{x")}, Null()}, // invalid object + {{STRING("[123, abc]")}, Null()} // invalid array + }; + + auto st = check_function(func_name, input_types, data_set); + EXPECT_EQ(Status::OK(), st); +} + +TEST(FunctionJsonbTEST, JsonbParseNullableErrorToValueTest) { + std::string func_name = "jsonb_parse_nullable_error_to_value"; + InputTypeSet input_types = {TypeIndex::String, TypeIndex::String}; + + DataSet data_set = { + {{STRING("null"), STRING("{}")}, STRING("null")}, + {{STRING("true"), STRING("{}")}, STRING("true")}, + {{STRING("false"), STRING("{}")}, STRING("false")}, + {{STRING("100"), STRING("{}")}, STRING("100")}, //int8 + {{STRING("10000"), STRING("{}")}, STRING("10000")}, // int16 + {{STRING("1073741820"), STRING("{}")}, STRING("1073741820")}, // int32 + {{STRING("1152921504606846976"), STRING("{}")}, STRING("1152921504606846976")}, // int64 + {{STRING("6.18"), STRING("{}")}, STRING("6.18")}, // double + {{STRING(R"("abcd")"), STRING("{}")}, STRING(R"("abcd")")}, // string + {{STRING("{}"), STRING("{}")}, STRING("{}")}, // empty object + {{STRING(R"({"k1":"v31", "k2": 300})"), STRING("{}")}, + STRING(R"({"k1":"v31","k2":300})")}, // object + {{STRING("[]"), STRING("{}")}, STRING("[]")}, // empty array + {{STRING("[123, 456]"), STRING("{}")}, STRING("[123,456]")}, // int array + {{STRING(R"(["abc", "def"])"), STRING("{}")}, + STRING(R"(["abc","def"])")}, // string array + {{STRING(R"([null, true, false, 100, 6.18, "abc"])"), STRING("{}")}, + STRING(R"([null,true,false,100,6.18,"abc"])")}, // multi type array + {{STRING(R"([{"k1":"v41", "k2": 400}, 1, "a", 3.14])"), STRING("{}")}, + STRING(R"([{"k1":"v41","k2":400},1,"a",3.14])")}, // complex array + {{STRING("abc"), STRING(R"("abc")")}, STRING(R"("abc")")}, // invalid string + {{STRING("'abc'"), STRING(R"("abc")")}, STRING(R"("abc")")}, // invalid string + {{STRING("100x"), STRING("100")}, STRING("100")}, // invalid int + {{STRING("6.a8"), STRING("6.18")}, STRING("6.18")}, // invalid double + {{STRING("{x"), STRING("{}")}, STRING("{}")}, // invalid object + {{STRING("[123, abc]"), STRING(R"([123,"abc"])")}, + STRING(R"([123,"abc"])")} // invalid array + }; + + auto st = check_function(func_name, input_types, data_set); + EXPECT_EQ(Status::OK(), st); +} + +TEST(FunctionJsonbTEST, JsonbParseNullableErrorToInvalidTest) { + std::string func_name = "jsonb_parse_nullable_error_to_invalid"; + InputTypeSet input_types = {TypeIndex::String}; + + DataSet data_set = { + {{STRING("null")}, STRING("null")}, + {{STRING("true")}, STRING("true")}, + {{STRING("false")}, STRING("false")}, + {{STRING("100")}, STRING("100")}, //int8 + {{STRING("10000")}, STRING("10000")}, // int16 + {{STRING("1073741820")}, STRING("1073741820")}, // int32 + {{STRING("1152921504606846976")}, STRING("1152921504606846976")}, // int64 + {{STRING("6.18")}, STRING("6.18")}, // double + {{STRING(R"("abcd")")}, STRING(R"("abcd")")}, // string + {{STRING("{}")}, STRING("{}")}, // empty object + {{STRING(R"({"k1":"v31", "k2": 300})")}, STRING(R"({"k1":"v31","k2":300})")}, // object + {{STRING("[]")}, STRING("[]")}, // empty array + {{STRING("[123, 456]")}, STRING("[123,456]")}, // int array + {{STRING(R"(["abc", "def"])")}, STRING(R"(["abc","def"])")}, // string array + {{STRING(R"([null, true, false, 100, 6.18, "abc"])")}, + STRING(R"([null,true,false,100,6.18,"abc"])")}, // multi type array + {{STRING(R"([{"k1":"v41", "k2": 400}, 1, "a", 3.14])")}, + STRING(R"([{"k1":"v41","k2":400},1,"a",3.14])")}, // complex array + {{STRING("abc")}, STRING("")}, // invalid string + {{STRING("'abc'")}, STRING("")}, // invalid string + {{STRING("100x")}, STRING("")}, // invalid int + {{STRING("6.a8")}, STRING("")}, // invalid double + {{STRING("{x")}, STRING("")}, // invalid object + {{STRING("[123, abc]")}, STRING("")} // invalid array + }; + + auto st = check_function(func_name, input_types, data_set); + EXPECT_EQ(Status::OK(), st); +} + +TEST(FunctionJsonbTEST, JsonbParseNotnullTest) { + std::string func_name = "jsonb_parse_notnull"; + InputTypeSet input_types = {TypeIndex::String}; + + DataSet data_set_valid = { + {{STRING("null")}, STRING("null")}, + {{STRING("true")}, STRING("true")}, + {{STRING("false")}, STRING("false")}, + {{STRING("100")}, STRING("100")}, //int8 + {{STRING("10000")}, STRING("10000")}, // int16 + {{STRING("1073741820")}, STRING("1073741820")}, // int32 + {{STRING("1152921504606846976")}, STRING("1152921504606846976")}, // int64 + {{STRING("6.18")}, STRING("6.18")}, // double + {{STRING(R"("abcd")")}, STRING(R"("abcd")")}, // string + {{STRING("{}")}, STRING("{}")}, // empty object + {{STRING(R"({"k1":"v31", "k2": 300})")}, STRING(R"({"k1":"v31","k2":300})")}, // object + {{STRING("[]")}, STRING("[]")}, // empty array + {{STRING("[123, 456]")}, STRING("[123,456]")}, // int array + {{STRING(R"(["abc", "def"])")}, STRING(R"(["abc","def"])")}, // string array + {{STRING(R"([null, true, false, 100, 6.18, "abc"])")}, + STRING(R"([null,true,false,100,6.18,"abc"])")}, // multi type array + {{STRING(R"([{"k1":"v41", "k2": 400}, 1, "a", 3.14])")}, + STRING(R"([{"k1":"v41","k2":400},1,"a",3.14])")}, // complex array + }; + + auto st = check_function(func_name, input_types, data_set_valid); + EXPECT_EQ(Status::OK(), st); + + DataSet data_set_invalid = { + {{STRING("abc")}, Null()}, // invalid string + }; + st = check_function(func_name, input_types, data_set_invalid); + EXPECT_NE(Status::OK(), st); + + data_set_invalid = { + {{STRING("'abc'")}, Null()}, // invalid string + }; + st = check_function(func_name, input_types, data_set_invalid); + EXPECT_NE(Status::OK(), st); + + data_set_invalid = { + {{STRING("100x")}, Null()}, // invalid int + }; + st = check_function(func_name, input_types, data_set_invalid); + EXPECT_NE(Status::OK(), st); + + data_set_invalid = { + {{STRING("6.a8")}, Null()}, // invalid double + }; + st = check_function(func_name, input_types, data_set_invalid); + EXPECT_NE(Status::OK(), st); + + data_set_invalid = { + {{STRING("{x")}, Null()}, // invalid object + }; + st = check_function(func_name, input_types, data_set_invalid); + EXPECT_NE(Status::OK(), st); + + data_set_invalid = { + {{STRING("[123, abc]")}, Null()} // invalid array + }; + st = check_function(func_name, input_types, data_set_invalid); + EXPECT_NE(Status::OK(), st); +} + +TEST(FunctionJsonbTEST, JsonbParseNotnullErrorToValueTest) { + std::string func_name = "jsonb_parse_notnull_error_to_value"; + InputTypeSet input_types = {TypeIndex::String, TypeIndex::String}; + + DataSet data_set = { + {{STRING("null"), STRING("{}")}, STRING("null")}, + {{STRING("true"), STRING("{}")}, STRING("true")}, + {{STRING("false"), STRING("{}")}, STRING("false")}, + {{STRING("100"), STRING("{}")}, STRING("100")}, //int8 + {{STRING("10000"), STRING("{}")}, STRING("10000")}, // int16 + {{STRING("1073741820"), STRING("{}")}, STRING("1073741820")}, // int32 + {{STRING("1152921504606846976"), STRING("{}")}, STRING("1152921504606846976")}, // int64 + {{STRING("6.18"), STRING("{}")}, STRING("6.18")}, // double + {{STRING(R"("abcd")"), STRING("{}")}, STRING(R"("abcd")")}, // string + {{STRING("{}"), STRING("{}")}, STRING("{}")}, // empty object + {{STRING(R"({"k1":"v31", "k2": 300})"), STRING("{}")}, + STRING(R"({"k1":"v31","k2":300})")}, // object + {{STRING("[]"), STRING("{}")}, STRING("[]")}, // empty array + {{STRING("[123, 456]"), STRING("{}")}, STRING("[123,456]")}, // int array + {{STRING(R"(["abc", "def"])"), STRING("{}")}, + STRING(R"(["abc","def"])")}, // string array + {{STRING(R"([null, true, false, 100, 6.18, "abc"])"), STRING("{}")}, + STRING(R"([null,true,false,100,6.18,"abc"])")}, // multi type array + {{STRING(R"([{"k1":"v41", "k2": 400}, 1, "a", 3.14])"), STRING("{}")}, + STRING(R"([{"k1":"v41","k2":400},1,"a",3.14])")}, // complex array + {{STRING("abc"), STRING(R"("abc")")}, STRING(R"("abc")")}, // invalid string + {{STRING("'abc'"), STRING(R"("abc")")}, STRING(R"("abc")")}, // invalid string + {{STRING("100x"), STRING("100")}, STRING("100")}, // invalid int + {{STRING("6.a8"), STRING("6.18")}, STRING("6.18")}, // invalid double + {{STRING("{x"), STRING("{}")}, STRING("{}")}, // invalid object + {{STRING("[123, abc]"), STRING(R"([123,"abc"])")}, + STRING(R"([123,"abc"])")} // invalid array + }; + + auto st = check_function(func_name, input_types, data_set); + EXPECT_EQ(Status::OK(), st); +} + +TEST(FunctionJsonbTEST, JsonbParseNotnullErrorToInvalidTest) { + std::string func_name = "jsonb_parse_notnull_error_to_invalid"; + InputTypeSet input_types = {TypeIndex::String}; + + DataSet data_set = { + {{STRING("null")}, STRING("null")}, + {{STRING("true")}, STRING("true")}, + {{STRING("false")}, STRING("false")}, + {{STRING("100")}, STRING("100")}, //int8 + {{STRING("10000")}, STRING("10000")}, // int16 + {{STRING("1073741820")}, STRING("1073741820")}, // int32 + {{STRING("1152921504606846976")}, STRING("1152921504606846976")}, // int64 + {{STRING("6.18")}, STRING("6.18")}, // double + {{STRING(R"("abcd")")}, STRING(R"("abcd")")}, // string + {{STRING("{}")}, STRING("{}")}, // empty object + {{STRING(R"({"k1":"v31", "k2": 300})")}, STRING(R"({"k1":"v31","k2":300})")}, // object + {{STRING("[]")}, STRING("[]")}, // empty array + {{STRING("[123, 456]")}, STRING("[123,456]")}, // int array + {{STRING(R"(["abc", "def"])")}, STRING(R"(["abc","def"])")}, // string array + {{STRING(R"([null, true, false, 100, 6.18, "abc"])")}, + STRING(R"([null,true,false,100,6.18,"abc"])")}, // multi type array + {{STRING(R"([{"k1":"v41", "k2": 400}, 1, "a", 3.14])")}, + STRING(R"([{"k1":"v41","k2":400},1,"a",3.14])")}, // complex array + {{STRING("abc")}, STRING("")}, // invalid string + {{STRING("'abc'")}, STRING("")}, // invalid string + {{STRING("100x")}, STRING("")}, // invalid int + {{STRING("6.a8")}, STRING("")}, // invalid double + {{STRING("{x")}, STRING("")}, // invalid object + {{STRING("[123, abc]")}, STRING("")} // invalid array + }; + + auto st = check_function(func_name, input_types, data_set); + EXPECT_EQ(Status::OK(), st); +} + +} // namespace doris::vectorized diff --git a/be/test/vec/function/function_test_util.h b/be/test/vec/function/function_test_util.h index 4e0685ec91..1d4d56f029 100644 --- a/be/test/vec/function/function_test_util.h +++ b/be/test/vec/function/function_test_util.h @@ -31,6 +31,7 @@ #include "vec/core/columns_with_type_and_name.h" #include "vec/data_types/data_type_date_time.h" #include "vec/data_types/data_type_decimal.h" +#include "vec/data_types/data_type_jsonb.h" #include "vec/data_types/data_type_number.h" #include "vec/data_types/data_type_string.h" #include "vec/functions/simple_function_factory.h" @@ -161,8 +162,8 @@ void check_vec_table_function(TableFunction* fn, const InputTypeSet& input_types // The type of the constant column is represented as follows: Consted {TypeIndex::String} // A DataSet with a constant column can only have one row of data template -void check_function(const std::string& func_name, const InputTypeSet& input_types, - const DataSet& data_set) { +Status check_function(const std::string& func_name, const InputTypeSet& input_types, + const DataSet& data_set) { // 1.0 create data type ut_type::UTDataTypeDescs descs; EXPECT_TRUE(parse_ut_data_type(input_types, descs)); @@ -233,16 +234,16 @@ void check_function(const std::string& func_name, const InputTypeSet& input_type FunctionUtils fn_utils(fn_ctx_return, arg_types, 0); auto* fn_ctx = fn_utils.get_fn_ctx(); fn_ctx->impl()->set_constant_cols(constant_cols); - func->prepare(fn_ctx, FunctionContext::FRAGMENT_LOCAL); - func->prepare(fn_ctx, FunctionContext::THREAD_LOCAL); + RETURN_IF_ERROR(func->prepare(fn_ctx, FunctionContext::FRAGMENT_LOCAL)); + RETURN_IF_ERROR(func->prepare(fn_ctx, FunctionContext::THREAD_LOCAL)); block.insert({nullptr, return_type, "result"}); auto result = block.columns() - 1; - func->execute(fn_ctx, block, arguments, result, row_size); + RETURN_IF_ERROR(func->execute(fn_ctx, block, arguments, result, row_size)); - func->close(fn_ctx, FunctionContext::THREAD_LOCAL); - func->close(fn_ctx, FunctionContext::FRAGMENT_LOCAL); + RETURN_IF_ERROR(func->close(fn_ctx, FunctionContext::THREAD_LOCAL)); + RETURN_IF_ERROR(func->close(fn_ctx, FunctionContext::FRAGMENT_LOCAL)); // 3. check the result of function ColumnPtr column = block.get_columns()[result]; @@ -250,32 +251,49 @@ void check_function(const std::string& func_name, const InputTypeSet& input_type for (int i = 0; i < row_size; ++i) { auto check_column_data = [&]() { - Field field; - column->get(i, field); - - const auto& expect_data = - std::any_cast(data_set[i].second); - - if constexpr (std::is_same_v>) { - const auto& column_data = field.get>().get_value(); - EXPECT_EQ(column_data.value, expect_data.value); - } else if constexpr (std::is_same_v) { - const auto& column_data = field.get(); - EXPECT_EQ(column_data, expect_data); + if constexpr (std::is_same_v) { + const auto& expect_data = std::any_cast(data_set[i].second); + auto s = column->get_data_at(i); + if (expect_data.size() == 0) { + // zero size result means invalid + EXPECT_EQ(0, s.size) << " invalid result size should be 0 for row " << i; + } else { + // convert jsonb binary value to json string to compare with expected json text + JsonbToJson to_json; + doris::JsonbValue* val = + doris::JsonbDocument::createDocument(s.data, s.size)->getValue(); + EXPECT_EQ(to_json.jsonb_to_string(val), expect_data) << " for row " << i; + } } else { - const auto& column_data = field.get(); - EXPECT_EQ(column_data, expect_data); + Field field; + column->get(i, field); + + const auto& expect_data = + std::any_cast(data_set[i].second); + + if constexpr (std::is_same_v>) { + const auto& column_data = field.get>().get_value(); + EXPECT_EQ(expect_data.value, column_data.value); + } else if constexpr (std::is_same_v) { + const auto& column_data = field.get(); + EXPECT_EQ(expect_data, column_data); + } else { + const auto& column_data = field.get(); + EXPECT_EQ(expect_data, column_data); + } } }; if constexpr (nullable) { bool is_null = data_set[i].second.type() == typeid(Null); - EXPECT_EQ(column->is_null_at(i), is_null); + EXPECT_EQ(is_null, column->is_null_at(i)); if (!is_null) check_column_data(); } else { check_column_data(); } } + + return Status::OK(); } } // namespace doris::vectorized diff --git a/fe/fe-core/src/main/java/org/apache/doris/planner/LoadScanNode.java b/fe/fe-core/src/main/java/org/apache/doris/planner/LoadScanNode.java index 360858486c..0f8eb77ffb 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/planner/LoadScanNode.java +++ b/fe/fe-core/src/main/java/org/apache/doris/planner/LoadScanNode.java @@ -206,7 +206,23 @@ public abstract class LoadScanNode extends ScanNode { expr = new ArithmeticExpr(ArithmeticExpr.Operator.MULTIPLY, expr, new IntLiteral(-1)); expr.analyze(analyzer); } - expr = castToSlot(destSlotDesc, expr); + + PrimitiveType dstType = destSlotDesc.getType().getPrimitiveType(); + PrimitiveType srcType = expr.getType().getPrimitiveType(); + if (dstType == PrimitiveType.JSONB + && (srcType == PrimitiveType.VARCHAR || srcType == PrimitiveType.STRING)) { + List args = Lists.newArrayList(); + args.add(expr); + String nullable = "notnull"; + if (destSlotDesc.getIsNullable() || expr.isNullable()) { + nullable = "nullable"; + } + String name = "jsonb_parse_" + nullable + "_error_to_invalid"; + expr = new FunctionCallExpr(name, args); + expr.analyze(analyzer); + } else { + expr = castToSlot(destSlotDesc, expr); + } params.putToExprOfDestSlot(destSlotDesc.getId().asInt(), expr.treeToThrift()); } params.setDestSidToSrcSidWithoutTrans(destSidToSrcSidWithoutTrans); diff --git a/gensrc/script/doris_builtins_functions.py b/gensrc/script/doris_builtins_functions.py index 2070a546ff..f75c1ff680 100755 --- a/gensrc/script/doris_builtins_functions.py +++ b/gensrc/script/doris_builtins_functions.py @@ -2238,6 +2238,41 @@ visible_functions = [ '_ZN5doris16UtilityFunctions7versionEPN9doris_udf15FunctionContextE', '', '', 'vec', 'ALWAYS_NOT_NULLABLE'], + # Jsonb functions + [['jsonb_parse'], 'JSONB', ['VARCHAR'], + 'fake_symble_for_no_vec', '', '', + 'vec', ''], + [['jsonb_parse_error_to_null'], 'JSONB', ['VARCHAR'], + 'fake_symble_for_no_vec', '', '', + 'vec', 'ALWAYS_NULLABLE'], + [['jsonb_parse_error_to_value'], 'JSONB', ['VARCHAR', 'VARCHAR'], + 'fake_symble_for_no_vec', '', '', + 'vec', ''], + [['jsonb_parse_error_to_invalid'], 'JSONB', ['VARCHAR'], + 'fake_symble_for_no_vec', '', '', + 'vec', ''], + [['jsonb_parse_nullable'], 'JSONB', ['VARCHAR'], + 'fake_symble_for_no_vec', '', '', + 'vec', 'ALWAYS_NULLABLE'], + [['jsonb_parse_nullable_error_to_null'], 'JSONB', ['VARCHAR'], + 'fake_symble_for_no_vec', '', '', + 'vec', 'ALWAYS_NULLABLE'], + [['jsonb_parse_nullable_error_to_value'], 'JSONB', ['VARCHAR', 'VARCHAR'], + 'fake_symble_for_no_vec', '', '', + 'vec', 'ALWAYS_NULLABLE'], + [['jsonb_parse_nullable_error_to_invalid'], 'JSONB', ['VARCHAR'], + 'fake_symble_for_no_vec', '', '', + 'vec', 'ALWAYS_NULLABLE'], + [['jsonb_parse_notnull'], 'JSONB', ['VARCHAR'], + 'fake_symble_for_no_vec', '', '', + 'vec', ''], + [['jsonb_parse_notnull_error_to_value'], 'JSONB', ['VARCHAR', 'VARCHAR'], + 'fake_symble_for_no_vec', '', '', + 'vec', ''], + [['jsonb_parse_notnull_error_to_invalid'], 'JSONB', ['VARCHAR'], + 'fake_symble_for_no_vec', '', '', + 'vec', ''], + # Json functions [['get_json_int'], 'INT', ['VARCHAR', 'VARCHAR'], '_ZN5doris13JsonFunctions12get_json_intEPN9doris_udf15FunctionContextERKNS1_9StringValES6_',