diff --git a/be/src/exec/base_scanner.cpp b/be/src/exec/base_scanner.cpp index 7b770258fb..7764acc358 100644 --- a/be/src/exec/base_scanner.cpp +++ b/be/src/exec/base_scanner.cpp @@ -145,6 +145,15 @@ bool BaseScanner::fill_dest_tuple(const Slice& line, Tuple* dest_tuple, MemPool* ExprContext* ctx = _dest_expr_ctx[dest_index]; void* value = ctx->get_value(_src_tuple_row); if (value == nullptr) { + // Only when the expr return value is null, we will check the error message. + std::string expr_error = ctx->get_error_msg(); + if (!expr_error.empty()) { + _state->append_error_msg_to_file(_src_tuple_row->to_string(*(_row_desc.get())), expr_error); + _counter->num_rows_filtered++; + // The ctx is reused, so must clear the error state and message. + ctx->clear_error_msg(); + return false; + } SlotDescriptor* slot_descriptor = _src_slot_descs_order_by_dest[dest_index]; if (_strict_mode && (slot_descriptor != nullptr)&& !_src_tuple->is_null(slot_descriptor->null_indicator_offset())) { //Type of the slot is must be Varchar in _src_tuple. diff --git a/be/src/exprs/bitmap_function.cpp b/be/src/exprs/bitmap_function.cpp index 55ea1e2ca1..5a42e798fb 100644 --- a/be/src/exprs/bitmap_function.cpp +++ b/be/src/exprs/bitmap_function.cpp @@ -59,12 +59,30 @@ BigIntVal BitmapFunctions::bitmap_count(FunctionContext* ctx, const StringVal& s return result; } -// we assume the input src is a valid integer string StringVal BitmapFunctions::to_bitmap(doris_udf::FunctionContext* ctx, const doris_udf::StringVal& src) { std::unique_ptr bitmap {new RoaringBitmap()}; if (!src.is_null) { std::string tmp_str = std::string(reinterpret_cast(src.ptr), src.len) ; - bitmap->update(std::stoi(tmp_str)); + unsigned long uint32_value = 0; + try { + uint32_value = std::stoul(tmp_str); + // the std::stoul result type is unsigned long, not uint32_t. so we need check it + if(UNLIKELY(uint32_value > std::numeric_limits::max())) { + throw std::out_of_range(""); + } + } catch (std::invalid_argument& e) { + std::stringstream error_msg; + error_msg << "The to_bitmap function argument: " << tmp_str << " type isn't integer family"; + ctx->set_error(error_msg.str().c_str()); + return StringVal::null(); + } catch (std::out_of_range& e) { + std::stringstream error_msg; + error_msg << "The to_bitmap function argument: " << tmp_str << " exceed unsigned integer max value " + << std::numeric_limits::max(); + ctx->set_error(error_msg.str().c_str()); + return StringVal::null(); + } + bitmap->update(uint32_value); } std::string buf; buf.resize(bitmap->size()); diff --git a/be/src/exprs/expr_context.cpp b/be/src/exprs/expr_context.cpp index 3a84db6911..2c5118322b 100644 --- a/be/src/exprs/expr_context.cpp +++ b/be/src/exprs/expr_context.cpp @@ -514,4 +514,20 @@ Status ExprContext::get_error(int start_idx, int end_idx) const { } return Status::OK(); } + +std::string ExprContext::get_error_msg() const { + for (auto fn_ctx: _fn_contexts) { + if (fn_ctx->has_error()) { + return std::string(fn_ctx->error_msg()); + } + } + return ""; +} + +void ExprContext::clear_error_msg() { + for (auto fn_ctx: _fn_contexts) { + fn_ctx->clear_error_msg(); + } +} + } diff --git a/be/src/exprs/expr_context.h b/be/src/exprs/expr_context.h index de57638857..36c300c899 100644 --- a/be/src/exprs/expr_context.h +++ b/be/src/exprs/expr_context.h @@ -170,6 +170,10 @@ public: /// The default parameters correspond to the entire expr 'root_'. Status get_error(int start_idx, int end_idx) const; + std::string get_error_msg() const; + + // when you reused this expr context, you maybe need clear the error status and message. + void clear_error_msg(); private: friend class Expr; friend class ScalarFnCall; diff --git a/be/src/udf/udf.cpp b/be/src/udf/udf.cpp index 58ba7b92c9..4b49c3cade 100755 --- a/be/src/udf/udf.cpp +++ b/be/src/udf/udf.cpp @@ -264,7 +264,7 @@ const char* FunctionContext::error_msg() const { return _impl->_error_msg.c_str(); } - return NULL; + return nullptr; } uint8_t* FunctionContext::allocate(int byte_size) { @@ -338,12 +338,16 @@ void FunctionContext::set_error(const char* error_msg) { std::stringstream ss; ss << "UDF ERROR: " << error_msg; - if (_impl->_state != NULL) { + if (_impl->_state != nullptr) { _impl->_state->set_process_status(ss.str()); } } } +void FunctionContext::clear_error_msg() { + _impl->_error_msg.clear(); +} + bool FunctionContext::add_warning(const char* warning_msg) { if (_impl->_num_warnings++ >= MAX_WARNINGS) { return false; diff --git a/be/src/udf/udf.h b/be/src/udf/udf.h index 3e9a42fdcb..227f4bdd73 100755 --- a/be/src/udf/udf.h +++ b/be/src/udf/udf.h @@ -126,8 +126,13 @@ public: // Sets an error for this UDF. If this is called, this will trigger the // query to fail. + // Note: when you set error for the UDFs used in Data Load, you should + // ensure the function return value is null. void set_error(const char* error_msg); + // when you reused this FunctionContext, you maybe need clear the error status and message. + void clear_error_msg(); + // Adds a warning that is returned to the user. This can include things like // overflow or other recoverable error conditions. // Warnings are capped at a maximum number. Returns true if the warning was diff --git a/be/src/util/bitmap.h b/be/src/util/bitmap.h index 985dc2c13b..ae2e89ca9b 100644 --- a/be/src/util/bitmap.h +++ b/be/src/util/bitmap.h @@ -257,7 +257,7 @@ class RoaringBitmap { public: RoaringBitmap() : _type(EMPTY) {} - explicit RoaringBitmap(int32_t value): _int_value(value), _type(SINGLE){} + explicit RoaringBitmap(uint32_t value): _int_value(value), _type(SINGLE){} // the src is the serialized bitmap data, the type could be EMPTY, SINGLE or BITMAP explicit RoaringBitmap(const char* src) { @@ -273,7 +273,7 @@ public: } } - void update(const int32_t value) { + void update(const uint32_t value) { switch (_type) { case EMPTY: _int_value = value; @@ -337,7 +337,7 @@ public: case EMPTY: return 1; case SINGLE: - return sizeof(int32_t) + 1; + return sizeof(uint32_t) + 1; case BITMAP: _roaring.runOptimize(); return _roaring.getSizeInBytes() + 1; @@ -379,7 +379,7 @@ private: }; Roaring _roaring; - int32_t _int_value; + uint32_t _int_value; BitmapDataType _type; }; diff --git a/be/test/exprs/bitmap_function_test.cpp b/be/test/exprs/bitmap_function_test.cpp index b535549994..0c0a45adcc 100644 --- a/be/test/exprs/bitmap_function_test.cpp +++ b/be/test/exprs/bitmap_function_test.cpp @@ -72,6 +72,32 @@ TEST_F(BitmapFunctionsTest, to_bitmap_null) { ASSERT_EQ(expected, result); } +TEST_F(BitmapFunctionsTest, to_bitmap_invalid_argument) { + StringVal input = AnyValUtil::from_string_temp(ctx, std::string("xxxxxx")); + StringVal result = BitmapFunctions::to_bitmap(ctx, input); + + StringVal expected = StringVal::null(); + + ASSERT_EQ(expected, result); + ASSERT_TRUE(ctx->has_error()); + + std::string error_msg("The to_bitmap function argument: xxxxxx type isn't integer family"); + ASSERT_EQ(error_msg, ctx->error_msg()); +} + +TEST_F(BitmapFunctionsTest, to_bitmap_out_of_range) { + StringVal input = AnyValUtil::from_string_temp(ctx, std::string("4294967296")); + StringVal result = BitmapFunctions::to_bitmap(ctx, input); + + StringVal expected = StringVal::null(); + ASSERT_EQ(expected, result); + + ASSERT_TRUE(ctx->has_error()); + + std::string error_msg("The to_bitmap function argument: 4294967296 exceed unsigned integer max value 4294967295"); + ASSERT_EQ(error_msg, ctx->error_msg()); +} + TEST_F(BitmapFunctionsTest, bitmap_union_int) { StringVal dst; BitmapFunctions::bitmap_init(ctx, &dst);