From 9477c48ef87479e5d18fc32c039c05fdf11c0691 Mon Sep 17 00:00:00 2001 From: yiguolei <676222867@qq.com> Date: Mon, 6 Mar 2023 16:07:09 +0800 Subject: [PATCH] [refactor](functioncontext) remove duplicate type definition in function context (#17421) remove duplicate type definition in function context remove unused method in function context not need stale state in vexpr context because vexpr is stateless and function context saves state and they are cloned. remove useless slot_size in all tuple or slot descriptor. remove doris_udf namespace, it is useless. remove some unused macro definitions. init v_conjuncts in vscanner, not need write the same code in every scanner. using unique ptr to manage function context since it could only belong to a single expr context. Issue Number: close #xxx --------- Co-authored-by: yiguolei --- be/src/exprs/function_filter.h | 8 +- be/src/exprs/json_functions.h | 2 - be/src/exprs/math_functions.h | 4 +- be/src/exprs/string_functions.h | 2 +- be/src/olap/like_column_predicate.cpp | 11 +- be/src/olap/like_column_predicate.h | 4 +- be/src/olap/olap_define.h | 25 -- be/src/olap/rowset/beta_rowset_writer.cpp | 2 +- be/src/runtime/buffer_control_block.cpp | 2 + be/src/runtime/collection_value.h | 6 +- be/src/runtime/datetime_value.h | 4 +- be/src/runtime/decimalv2_value.h | 2 - be/src/runtime/descriptor_helper.h | 9 +- be/src/runtime/descriptors.cpp | 50 +-- be/src/runtime/descriptors.h | 23 -- be/src/runtime/primitive_type.cpp | 127 ------- be/src/runtime/primitive_type.h | 5 - be/src/runtime/types.cpp | 1 + be/src/runtime/types.h | 10 +- be/src/udf/CMakeLists.txt | 2 +- be/src/udf/uda_test_harness.h | 270 --------------- be/src/udf/udf.cpp | 37 +- be/src/udf/udf.h | 92 +---- be/src/udf/udf_debug.h | 50 --- be/src/udf/udf_internal.h | 20 +- be/src/util/bitmap_value.h | 16 +- be/src/util/counts.h | 6 +- be/src/vec/common/string_ref.h | 1 - .../vec/exec/format/parquet/schema_desc.cpp | 1 + be/src/vec/exec/scan/new_es_scanner.cpp | 5 +- be/src/vec/exec/scan/new_olap_scan_node.cpp | 4 +- be/src/vec/exec/scan/new_olap_scan_node.h | 2 +- be/src/vec/exec/scan/new_olap_scanner.cpp | 5 +- be/src/vec/exec/scan/vfile_scanner.cpp | 6 +- be/src/vec/exec/scan/vmeta_scanner.cpp | 5 +- be/src/vec/exec/scan/vscan_node.cpp | 4 +- be/src/vec/exec/scan/vscan_node.h | 2 +- be/src/vec/exec/scan/vscanner.cpp | 8 + be/src/vec/exec/scan/vscanner.h | 4 +- be/src/vec/exprs/vexpr.cpp | 127 +------ be/src/vec/exprs/vexpr.h | 1 - be/src/vec/exprs/vexpr_context.cpp | 14 +- be/src/vec/exprs/vexpr_context.h | 17 +- be/src/vec/functions/function_timestamp.cpp | 2 +- be/src/vec/functions/in.h | 11 +- be/src/vec/runtime/vdatetime_value.h | 7 +- be/test/CMakeLists.txt | 6 +- be/test/runtime/datetime_value_test.cpp | 4 +- be/test/testutil/desc_tbl_builder.cc | 14 +- be/test/testutil/function_utils.cpp | 9 +- be/test/testutil/function_utils.h | 9 +- be/test/udf/uda_test.cpp | 315 ------------------ be/test/udf/udf_test.cpp | 193 ----------- be/test/util/counts_test.cpp | 4 +- be/test/vec/function/function_test_util.cpp | 36 +- be/test/vec/function/function_test_util.h | 20 +- gensrc/proto/descriptors.proto | 2 +- 57 files changed, 176 insertions(+), 1452 deletions(-) delete mode 100644 be/src/udf/uda_test_harness.h delete mode 100644 be/src/udf/udf_debug.h delete mode 100644 be/test/udf/uda_test.cpp delete mode 100644 be/test/udf/udf_test.cpp diff --git a/be/src/exprs/function_filter.h b/be/src/exprs/function_filter.h index 84758e793c..ba240498a5 100644 --- a/be/src/exprs/function_filter.h +++ b/be/src/exprs/function_filter.h @@ -25,8 +25,8 @@ namespace doris { class FunctionFilter { public: - FunctionFilter(bool opposite, const std::string& col_name, doris_udf::FunctionContext* fn_ctx, - doris_udf::StringVal string_param) + FunctionFilter(bool opposite, const std::string& col_name, doris::FunctionContext* fn_ctx, + doris::StringVal string_param) : _opposite(opposite), _col_name(col_name), _fn_ctx(fn_ctx), @@ -35,8 +35,8 @@ public: bool _opposite; std::string _col_name; // these pointer's life time controlled by scan node - doris_udf::FunctionContext* _fn_ctx; - doris_udf::StringVal + doris::FunctionContext* _fn_ctx; + doris::StringVal _string_param; // only one param from conjunct, because now only support like predicate }; diff --git a/be/src/exprs/json_functions.h b/be/src/exprs/json_functions.h index a070b136b5..32a2f93814 100644 --- a/be/src/exprs/json_functions.h +++ b/be/src/exprs/json_functions.h @@ -70,8 +70,6 @@ struct JsonPath { } }; -using namespace doris_udf; - class JsonFunctions { public: /** diff --git a/be/src/exprs/math_functions.h b/be/src/exprs/math_functions.h index bfddcf43fd..8d1563a17e 100644 --- a/be/src/exprs/math_functions.h +++ b/be/src/exprs/math_functions.h @@ -32,8 +32,8 @@ public: // Converts src_num in decimal to dest_base, // and fills expr_val.string_val with the result. - static doris_udf::StringVal decimal_to_base(doris_udf::FunctionContext* ctx, int64_t src_num, - int8_t dest_base); + static doris::StringVal decimal_to_base(doris::FunctionContext* ctx, int64_t src_num, + int8_t dest_base); // Converts src_num representing a number in src_base but encoded in decimal // into its actual decimal number. diff --git a/be/src/exprs/string_functions.h b/be/src/exprs/string_functions.h index 373686dbb6..14515b8dc5 100644 --- a/be/src/exprs/string_functions.h +++ b/be/src/exprs/string_functions.h @@ -34,7 +34,7 @@ namespace doris { class StringFunctions { public: - static bool set_re2_options(const doris_udf::StringVal& match_parameter, std::string* error_str, + static bool set_re2_options(const doris::StringVal& match_parameter, std::string* error_str, re2::RE2::Options* opts); // The caller owns the returned regex. Returns nullptr if the pattern could not be compiled. diff --git a/be/src/olap/like_column_predicate.cpp b/be/src/olap/like_column_predicate.cpp index cb5a127f82..d2057534c9 100644 --- a/be/src/olap/like_column_predicate.cpp +++ b/be/src/olap/like_column_predicate.cpp @@ -25,21 +25,20 @@ namespace doris { template <> LikeColumnPredicate::LikeColumnPredicate(bool opposite, uint32_t column_id, - doris_udf::FunctionContext* fn_ctx, - doris_udf::StringVal val) + doris::FunctionContext* fn_ctx, doris::StringVal val) : ColumnPredicate(column_id, opposite), pattern(reinterpret_cast(val.ptr), val.len) { _state = reinterpret_cast( - fn_ctx->get_function_state(doris_udf::FunctionContext::THREAD_LOCAL)); + fn_ctx->get_function_state(doris::FunctionContext::THREAD_LOCAL)); _state->search_state.clone(_like_state); } template <> LikeColumnPredicate::LikeColumnPredicate(bool opposite, uint32_t column_id, - doris_udf::FunctionContext* fn_ctx, - doris_udf::StringVal val) + doris::FunctionContext* fn_ctx, + doris::StringVal val) : ColumnPredicate(column_id, opposite), pattern(val) { _state = reinterpret_cast( - fn_ctx->get_function_state(doris_udf::FunctionContext::THREAD_LOCAL)); + fn_ctx->get_function_state(doris::FunctionContext::THREAD_LOCAL)); } template diff --git a/be/src/olap/like_column_predicate.h b/be/src/olap/like_column_predicate.h index a38f9228fb..941c00723d 100644 --- a/be/src/olap/like_column_predicate.h +++ b/be/src/olap/like_column_predicate.h @@ -28,8 +28,8 @@ namespace doris { template class LikeColumnPredicate : public ColumnPredicate { public: - LikeColumnPredicate(bool opposite, uint32_t column_id, doris_udf::FunctionContext* fn_ctx, - doris_udf::StringVal val); + LikeColumnPredicate(bool opposite, uint32_t column_id, doris::FunctionContext* fn_ctx, + doris::StringVal val); ~LikeColumnPredicate() override = default; PredicateType type() const override { return PredicateType::EQ; } diff --git a/be/src/olap/olap_define.h b/be/src/olap/olap_define.h index fbc4f24557..ecfceda960 100644 --- a/be/src/olap/olap_define.h +++ b/be/src/olap/olap_define.h @@ -125,8 +125,6 @@ static const uint64_t GB_EXCHANGE_BYTE = 1024 * 1024 * 1024; // bloom filter fpp static const double BLOOM_FILTER_DEFAULT_FPP = 0.05; -#define OLAP_GOTO(label) goto label - enum ColumnFamilyIndex { DEFAULT_COLUMN_FAMILY_INDEX = 0, DORIS_COLUMN_FAMILY_INDEX, @@ -186,29 +184,6 @@ const std::string REMOTE_TABLET_GC_PREFIX = "tgc_"; type_t(const type_t&); #endif -// 没有使用的变量不报warning -#define OLAP_UNUSED_ARG(a) (void)(a) - -// thread-safe(gcc only) method for obtaining singleton -#define DECLARE_SINGLETON(classname) \ -public: \ - static classname* instance() { \ - classname* p_instance = nullptr; \ - try { \ - static classname s_instance; \ - p_instance = &s_instance; \ - } catch (...) { \ - p_instance = nullptr; \ - } \ - return p_instance; \ - } \ - \ -protected: \ - classname(); \ - \ -private: \ - ~classname(); - #define SAFE_DELETE(ptr) \ do { \ if (nullptr != ptr) { \ diff --git a/be/src/olap/rowset/beta_rowset_writer.cpp b/be/src/olap/rowset/beta_rowset_writer.cpp index 684d42a1f8..02e4d31841 100644 --- a/be/src/olap/rowset/beta_rowset_writer.cpp +++ b/be/src/olap/rowset/beta_rowset_writer.cpp @@ -66,7 +66,7 @@ BetaRowsetWriter::~BetaRowsetWriter() { * when the job is cancelled. Although it is meaningless to continue segcompaction when the job * is cancelled, the objects involved in the job should be preserved during segcompaction to * avoid crashs for memory issues. */ - OLAP_UNUSED_ARG(_wait_flying_segcompaction()); + _wait_flying_segcompaction(); // TODO(lingbin): Should wrapper exception logic, no need to know file ops directly. if (!_already_built) { // abnormal exit, remove all files generated diff --git a/be/src/runtime/buffer_control_block.cpp b/be/src/runtime/buffer_control_block.cpp index 25c268aaf2..7fa0c50923 100644 --- a/be/src/runtime/buffer_control_block.cpp +++ b/be/src/runtime/buffer_control_block.cpp @@ -19,7 +19,9 @@ #include "gen_cpp/PaloInternalService_types.h" #include "gen_cpp/internal_service.pb.h" +#include "runtime/exec_env.h" #include "runtime/raw_value.h" +#include "runtime/thread_context.h" #include "service/brpc.h" #include "util/thrift_util.h" diff --git a/be/src/runtime/collection_value.h b/be/src/runtime/collection_value.h index 15e29b88f8..6b59066f97 100644 --- a/be/src/runtime/collection_value.h +++ b/be/src/runtime/collection_value.h @@ -21,13 +21,13 @@ #include "runtime/primitive_type.h" -namespace doris_udf { +namespace doris { class FunctionContext; -} // namespace doris_udf +} // namespace doris namespace doris { -using doris_udf::FunctionContext; +using doris::FunctionContext; using MemFootprint = std::pair; using GenMemFootprintFunc = std::function; diff --git a/be/src/runtime/datetime_value.h b/be/src/runtime/datetime_value.h index 1e3ee94c59..057bba982b 100644 --- a/be/src/runtime/datetime_value.h +++ b/be/src/runtime/datetime_value.h @@ -527,12 +527,12 @@ public: DateTimeValue& operator--() { return *this += -1; } - void to_datetime_val(doris_udf::DateTimeVal* tv) const { + void to_datetime_val(doris::DateTimeVal* tv) const { tv->packed_time = to_int64_datetime_packed(); tv->type = _type; } - static DateTimeValue from_datetime_val(const doris_udf::DateTimeVal& tv) { + static DateTimeValue from_datetime_val(const doris::DateTimeVal& tv) { DateTimeValue value; value.from_packed_time(tv.packed_time); if (tv.type == TIME_DATE) { diff --git a/be/src/runtime/decimalv2_value.h b/be/src/runtime/decimalv2_value.h index 9d670b26ea..44d93c7e11 100644 --- a/be/src/runtime/decimalv2_value.h +++ b/be/src/runtime/decimalv2_value.h @@ -47,8 +47,6 @@ enum DecimalError { enum DecimalRoundMode { HALF_UP = 1, HALF_EVEN = 2, CEILING = 3, FLOOR = 4, TRUNCATE = 5 }; -using namespace doris_udf; - class DecimalV2Value { public: friend DecimalV2Value operator+(const DecimalV2Value& v1, const DecimalV2Value& v2); diff --git a/be/src/runtime/descriptor_helper.h b/be/src/runtime/descriptor_helper.h index 13892d154a..ba3f17c57a 100644 --- a/be/src/runtime/descriptor_helper.h +++ b/be/src/runtime/descriptor_helper.h @@ -117,17 +117,11 @@ public: } } int null_bytes = (num_nullables + 7) / 8; - int offset = null_bytes; int null_offset = 0; for (int i = 0; i < _slot_descs.size(); ++i) { auto& slot_desc = _slot_descs[i]; - int size = get_slot_size(thrift_to_type(slot_desc.slotType.types[0].scalar_type.type)); - int align = (size > 16) ? 16 : size; - offset = ((offset + align - 1) / align) * align; slot_desc.id = tb->next_slot_id(); slot_desc.parent = _tuple_id; - slot_desc.byteOffset = offset; - offset += size; if (slot_desc.nullIndicatorByte >= 0) { slot_desc.nullIndicatorBit = null_offset % 8; slot_desc.nullIndicatorByte = null_offset / 8; @@ -140,7 +134,8 @@ public: } _tuple_desc.id = _tuple_id; - _tuple_desc.byteSize = offset; + // Useless not set it. + _tuple_desc.byteSize = 0; _tuple_desc.numNullBytes = null_bytes; _tuple_desc.numNullSlots = _slot_descs.size(); diff --git a/be/src/runtime/descriptors.cpp b/be/src/runtime/descriptors.cpp index 01ed59eaac..1658a3bf6c 100644 --- a/be/src/runtime/descriptors.cpp +++ b/be/src/runtime/descriptors.cpp @@ -55,13 +55,11 @@ SlotDescriptor::SlotDescriptor(const TSlotDescriptor& tdesc) _type(TypeDescriptor::from_thrift(tdesc.slotType)), _parent(tdesc.parent), _col_pos(tdesc.columnPos), - _tuple_offset(tdesc.byteOffset), _null_indicator_offset(tdesc.nullIndicatorByte, tdesc.nullIndicatorBit), _col_name(tdesc.colName), _col_name_lower_case(to_lower(tdesc.colName)), _col_unique_id(tdesc.col_unique_id), _slot_idx(tdesc.slotIdx), - _slot_size(_type.get_slot_size()), _field_idx(-1), _is_materialized(tdesc.isMaterialized), _is_key(tdesc.is_key), @@ -72,13 +70,11 @@ SlotDescriptor::SlotDescriptor(const PSlotDescriptor& pdesc) _type(TypeDescriptor::from_protobuf(pdesc.slot_type())), _parent(pdesc.parent()), _col_pos(pdesc.column_pos()), - _tuple_offset(pdesc.byte_offset()), _null_indicator_offset(pdesc.null_indicator_byte(), pdesc.null_indicator_bit()), _col_name(pdesc.col_name()), _col_name_lower_case(to_lower(pdesc.col_name())), _col_unique_id(pdesc.col_unique_id()), _slot_idx(pdesc.slot_idx()), - _slot_size(_type.get_slot_size()), _field_idx(-1), _is_materialized(pdesc.is_materialized()), _is_key(pdesc.is_key()), @@ -89,7 +85,7 @@ void SlotDescriptor::to_protobuf(PSlotDescriptor* pslot) const { pslot->set_parent(_parent); _type.to_protobuf(pslot->mutable_slot_type()); pslot->set_column_pos(_col_pos); - pslot->set_byte_offset(_tuple_offset); + pslot->set_byte_offset(0); pslot->set_null_indicator_byte(_null_indicator_offset.byte_offset); pslot->set_null_indicator_bit(_null_indicator_offset.bit_offset); DCHECK_LE(_null_indicator_offset.bit_offset, 8); @@ -115,8 +111,7 @@ vectorized::DataTypePtr SlotDescriptor::get_data_type_ptr() const { std::string SlotDescriptor::debug_string() const { std::stringstream out; out << "Slot(id=" << _id << " type=" << _type << " col=" << _col_pos - << ", colname=" << _col_name << " offset=" << _tuple_offset - << " null=" << _null_indicator_offset.debug_string() << ")"; + << ", colname=" << _col_name << " null=" << _null_indicator_offset.debug_string() << ")"; return out.str(); } @@ -256,7 +251,6 @@ std::string JdbcTableDescriptor::debug_string() const { TupleDescriptor::TupleDescriptor(const TTupleDescriptor& tdesc, bool own_slots) : _id(tdesc.id), _table_desc(nullptr), - _byte_size(tdesc.byteSize), _num_null_bytes(tdesc.numNullBytes), _num_materialized_slots(0), _slots(), @@ -273,7 +267,6 @@ TupleDescriptor::TupleDescriptor(const TTupleDescriptor& tdesc, bool own_slots) TupleDescriptor::TupleDescriptor(const PTupleDescriptor& pdesc, bool own_slots) : _id(pdesc.id()), _table_desc(nullptr), - _byte_size(pdesc.byte_size()), _num_null_bytes(pdesc.num_null_bytes()), _num_materialized_slots(0), _slots(), @@ -313,22 +306,11 @@ std::vector TupleDescriptor::slots_ordered_by_idx() const { return sorted_slots; } -bool TupleDescriptor::layout_equals(const TupleDescriptor& other_desc) const { - if (byte_size() != other_desc.byte_size()) return false; - if (slots().size() != other_desc.slots().size()) return false; - - std::vector slots = slots_ordered_by_idx(); - std::vector other_slots = other_desc.slots_ordered_by_idx(); - for (int i = 0; i < slots.size(); ++i) { - if (!slots[i]->layout_equals(*other_slots[i])) return false; - } - return true; -} - void TupleDescriptor::to_protobuf(PTupleDescriptor* ptuple) const { ptuple->Clear(); ptuple->set_id(_id); - ptuple->set_byte_size(_byte_size); + // Useless not set + ptuple->set_byte_size(0); ptuple->set_num_null_bytes(_num_null_bytes); ptuple->set_table_id(-1); ptuple->set_num_null_slots(_num_null_slots); @@ -336,7 +318,7 @@ void TupleDescriptor::to_protobuf(PTupleDescriptor* ptuple) const { std::string TupleDescriptor::debug_string() const { std::stringstream out; - out << "Tuple(id=" << _id << " size=" << _byte_size; + out << "Tuple(id=" << _id; if (_table_desc != nullptr) { //out << " " << _table_desc->debug_string(); } @@ -501,19 +483,6 @@ bool RowDescriptor::equals(const RowDescriptor& other_desc) const { return true; } -bool RowDescriptor::layout_is_prefix_of(const RowDescriptor& other_desc) const { - if (_tuple_desc_map.size() > other_desc._tuple_desc_map.size()) return false; - for (int i = 0; i < _tuple_desc_map.size(); ++i) { - if (!_tuple_desc_map[i]->layout_equals(*other_desc._tuple_desc_map[i])) return false; - } - return true; -} - -bool RowDescriptor::layout_equals(const RowDescriptor& other_desc) const { - if (_tuple_desc_map.size() != other_desc._tuple_desc_map.size()) return false; - return layout_is_prefix_of(other_desc); -} - std::string RowDescriptor::debug_string() const { std::stringstream ss; @@ -674,15 +643,6 @@ SlotDescriptor* DescriptorTbl::get_slot_descriptor(SlotId id) const { } } -bool SlotDescriptor::layout_equals(const SlotDescriptor& other_desc) const { - if (type().type != other_desc.type().type) return false; - if (is_nullable() != other_desc.is_nullable()) return false; - if (slot_size() != other_desc.slot_size()) return false; - if (tuple_offset() != other_desc.tuple_offset()) return false; - if (!null_indicator_offset().equals(other_desc.null_indicator_offset())) return false; - return true; -} - std::string DescriptorTbl::debug_string() const { std::stringstream out; out << "tuples:\n"; diff --git a/be/src/runtime/descriptors.h b/be/src/runtime/descriptors.h index fd75a5ad03..e03aaac7f4 100644 --- a/be/src/runtime/descriptors.h +++ b/be/src/runtime/descriptors.h @@ -93,20 +93,13 @@ public: int col_pos() const { return _col_pos; } // Returns the field index in the generated llvm struct for this slot's tuple int field_idx() const { return _field_idx; } - int tuple_offset() const { return _tuple_offset; } const NullIndicatorOffset& null_indicator_offset() const { return _null_indicator_offset; } bool is_materialized() const { return _is_materialized; } bool is_nullable() const { return _null_indicator_offset.bit_mask != 0; } - int slot_size() const { return _slot_size; } - const std::string& col_name() const { return _col_name; } const std::string& col_name_lower_case() const { return _col_name_lower_case; } - /// Return true if the physical layout of this descriptor matches the physical layout - /// of other_desc, but not necessarily ids. - bool layout_equals(const SlotDescriptor& other_desc) const; - void to_protobuf(PSlotDescriptor* pslot) const; std::string debug_string() const; @@ -133,7 +126,6 @@ private: const TypeDescriptor _type; const TupleId _parent; const int _col_pos; - const int _tuple_offset; const NullIndicatorOffset _null_indicator_offset; const std::string _col_name; const std::string _col_name_lower_case; @@ -144,9 +136,6 @@ private: // this is provided by the FE const int _slot_idx; - // the byte size of this slot. - const int _slot_size; - // the idx of the slot in the llvm codegen'd tuple struct // this is set by TupleDescriptor during codegen and takes into account // leading null bytes. @@ -348,10 +337,6 @@ public: TupleId id() const { return _id; } - /// Return true if the physical layout of this descriptor matches that of other_desc, - /// but not necessarily the id. - bool layout_equals(const TupleDescriptor& other_desc) const; - std::string debug_string() const; void to_protobuf(PTupleDescriptor* ptuple) const; @@ -510,14 +495,6 @@ public: // Return true if the tuple ids of this descriptor match tuple ids of other desc. bool equals(const RowDescriptor& other_desc) const; - /// Return true if the physical layout of this descriptor matches the physical layout - /// of other_desc, but not necessarily the ids. - bool layout_equals(const RowDescriptor& other_desc) const; - - /// Return true if the tuples of this descriptor are a prefix of the tuples of - /// other_desc. Tuples are compared by their physical layout and not by ids. - bool layout_is_prefix_of(const RowDescriptor& other_desc) const; - std::string debug_string() const; int get_column_id(int slot_id) const; diff --git a/be/src/runtime/primitive_type.cpp b/be/src/runtime/primitive_type.cpp index 7a90ffbcf2..7085dce8e7 100644 --- a/be/src/runtime/primitive_type.cpp +++ b/be/src/runtime/primitive_type.cpp @@ -27,71 +27,6 @@ namespace doris { -PrimitiveType convert_type_to_primitive(FunctionContext::Type type) { - switch (type) { - case FunctionContext::Type::INVALID_TYPE: - return PrimitiveType::INVALID_TYPE; - case FunctionContext::Type::TYPE_DOUBLE: - return PrimitiveType::TYPE_DOUBLE; - case FunctionContext::Type::TYPE_NULL: - return PrimitiveType::TYPE_NULL; - case FunctionContext::Type::TYPE_CHAR: - return PrimitiveType::TYPE_CHAR; - case FunctionContext::Type::TYPE_VARCHAR: - return PrimitiveType::TYPE_VARCHAR; - case FunctionContext::Type::TYPE_STRING: - return PrimitiveType::TYPE_STRING; - case FunctionContext::Type::TYPE_DATETIME: - return PrimitiveType::TYPE_DATETIME; - case FunctionContext::Type::TYPE_DECIMALV2: - return PrimitiveType::TYPE_DECIMALV2; - case FunctionContext::Type::TYPE_DECIMAL32: - return PrimitiveType::TYPE_DECIMAL32; - case FunctionContext::Type::TYPE_DECIMAL64: - return PrimitiveType::TYPE_DECIMAL64; - case FunctionContext::Type::TYPE_DECIMAL128I: - return PrimitiveType::TYPE_DECIMAL128I; - case FunctionContext::Type::TYPE_BOOLEAN: - return PrimitiveType::TYPE_BOOLEAN; - case FunctionContext::Type::TYPE_ARRAY: - return PrimitiveType::TYPE_ARRAY; - case FunctionContext::Type::TYPE_MAP: - return PrimitiveType::TYPE_MAP; - case FunctionContext::Type::TYPE_STRUCT: - return PrimitiveType::TYPE_STRUCT; - case FunctionContext::Type::TYPE_OBJECT: - return PrimitiveType::TYPE_OBJECT; - case FunctionContext::Type::TYPE_HLL: - return PrimitiveType::TYPE_HLL; - case FunctionContext::Type::TYPE_QUANTILE_STATE: - return PrimitiveType::TYPE_QUANTILE_STATE; - case FunctionContext::Type::TYPE_TINYINT: - return PrimitiveType::TYPE_TINYINT; - case FunctionContext::Type::TYPE_SMALLINT: - return PrimitiveType::TYPE_SMALLINT; - case FunctionContext::Type::TYPE_INT: - return PrimitiveType::TYPE_INT; - case FunctionContext::Type::TYPE_BIGINT: - return PrimitiveType::TYPE_BIGINT; - case FunctionContext::Type::TYPE_LARGEINT: - return PrimitiveType::TYPE_LARGEINT; - case FunctionContext::Type::TYPE_DATE: - return PrimitiveType::TYPE_DATE; - case FunctionContext::Type::TYPE_DATEV2: - return PrimitiveType::TYPE_DATEV2; - case FunctionContext::Type::TYPE_DATETIMEV2: - return PrimitiveType::TYPE_DATETIMEV2; - case FunctionContext::Type::TYPE_TIMEV2: - return PrimitiveType::TYPE_TIMEV2; - case FunctionContext::Type::TYPE_JSONB: - return PrimitiveType::TYPE_JSONB; - default: - DCHECK(false); - } - - return PrimitiveType::INVALID_TYPE; -} - bool is_type_compatible(PrimitiveType lhs, PrimitiveType rhs) { if (lhs == TYPE_VARCHAR) { return rhs == TYPE_CHAR || rhs == TYPE_VARCHAR || rhs == TYPE_HLL || rhs == TYPE_OBJECT || @@ -591,66 +526,4 @@ PrimitiveType get_primitive_type(vectorized::TypeIndex v_type) { } } -int get_slot_size(PrimitiveType type) { - switch (type) { - case TYPE_CHAR: - case TYPE_VARCHAR: - case TYPE_STRING: - case TYPE_OBJECT: - case TYPE_HLL: - case TYPE_QUANTILE_STATE: - return sizeof(StringRef); - case TYPE_JSONB: - return sizeof(JsonBinaryValue); - case TYPE_VARIANT: - return sizeof(StringRef); - case TYPE_ARRAY: - return sizeof(CollectionValue); - case TYPE_MAP: - return sizeof(MapValue); - case TYPE_STRUCT: - return sizeof(StructValue); - - case TYPE_NULL: - case TYPE_BOOLEAN: - case TYPE_TINYINT: - return 1; - - case TYPE_SMALLINT: - return 2; - - case TYPE_INT: - case TYPE_DATEV2: - case TYPE_FLOAT: - case TYPE_DECIMAL32: - return 4; - - case TYPE_BIGINT: - case TYPE_DOUBLE: - case TYPE_TIME: - case TYPE_DECIMAL64: - case TYPE_DATETIMEV2: - case TYPE_TIMEV2: - return 8; - - case TYPE_LARGEINT: - return sizeof(__int128); - - case TYPE_DATE: - case TYPE_DATETIME: - // This is the size of the slot, the actual size of the data is 12. - return sizeof(DateTimeValue); - - case TYPE_DECIMALV2: - case TYPE_DECIMAL128I: - return 16; - - case INVALID_TYPE: - default: - DCHECK(false); - } - - return 0; -} - } // namespace doris diff --git a/be/src/runtime/primitive_type.h b/be/src/runtime/primitive_type.h index f6dacd1858..a1bab5e662 100644 --- a/be/src/runtime/primitive_type.h +++ b/be/src/runtime/primitive_type.h @@ -35,8 +35,6 @@ class DecimalV2Value; struct StringRef; struct JsonBinaryValue; -PrimitiveType convert_type_to_primitive(FunctionContext::Type type); - constexpr bool is_enumeration_type(PrimitiveType type) { switch (type) { case TYPE_FLOAT: @@ -98,9 +96,6 @@ constexpr bool has_variable_type(PrimitiveType type) { type == TYPE_QUANTILE_STATE || type == TYPE_STRING; } -// Returns the byte size of type when in a tuple -int get_slot_size(PrimitiveType type); - bool is_type_compatible(PrimitiveType lhs, PrimitiveType rhs); PrimitiveType get_primitive_type(vectorized::TypeIndex v_type); diff --git a/be/src/runtime/types.cpp b/be/src/runtime/types.cpp index 99f5f2debb..9ec81e9441 100644 --- a/be/src/runtime/types.cpp +++ b/be/src/runtime/types.cpp @@ -22,6 +22,7 @@ #include +#include "runtime/primitive_type.h" namespace doris { TypeDescriptor::TypeDescriptor(const std::vector& types, int* idx) diff --git a/be/src/runtime/types.h b/be/src/runtime/types.h index eaa9eef112..9cbd8d70d6 100644 --- a/be/src/runtime/types.h +++ b/be/src/runtime/types.h @@ -24,14 +24,16 @@ #include #include "common/config.h" -#include "runtime/primitive_type.h" +#include "gen_cpp/Types_types.h" +#include "gen_cpp/types.pb.h" +#include "olap/olap_define.h" +#include "runtime/define_primitive_type.h" namespace doris { extern const int HLL_COLUMN_DEFAULT_LEN; struct TPrimitiveType; -class PTypeDesc; // Describes a type. Includes the enum, children types, and any type-specific metadata // (e.g. precision and scale for decimals). @@ -40,7 +42,7 @@ struct TypeDescriptor { PrimitiveType type; /// Only set if type == TYPE_CHAR or type == TYPE_VARCHAR int len; - static constexpr int MAX_VARCHAR_LENGTH = OLAP_VARCHAR_MAX_LENGTH; + static constexpr int MAX_VARCHAR_LENGTH = 65535; static constexpr int MAX_CHAR_LENGTH = 255; static constexpr int MAX_CHAR_INLINE_LENGTH = 128; @@ -205,8 +207,6 @@ struct TypeDescriptor { bool is_variant_type() const { return type == TYPE_VARIANT; } - int get_slot_size() const { return ::doris::get_slot_size(type); } - static inline int get_decimal_byte_size(int precision) { DCHECK_GT(precision, 0); if (precision <= MAX_DECIMAL4_PRECISION) { diff --git a/be/src/udf/CMakeLists.txt b/be/src/udf/CMakeLists.txt index a0a012c629..c66e5a21dc 100755 --- a/be/src/udf/CMakeLists.txt +++ b/be/src/udf/CMakeLists.txt @@ -47,7 +47,7 @@ set (UDF_TEST_LINK_LIBS -lboost_date_time gtest) -set_target_properties(DorisUdf PROPERTIES PUBLIC_HEADER "udf.h;uda_test_harness.h") +set_target_properties(DorisUdf PROPERTIES PUBLIC_HEADER "udf.h") INSTALL(TARGETS DorisUdf ARCHIVE DESTINATION ${OUTPUT_DIR}/udf LIBRARY DESTINATION ${OUTPUT_DIR}/udf/lib diff --git a/be/src/udf/uda_test_harness.h b/be/src/udf/uda_test_harness.h deleted file mode 100644 index 26b3ec6f89..0000000000 --- a/be/src/udf/uda_test_harness.h +++ /dev/null @@ -1,270 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. -// This file is copied from -// https://github.com/apache/impala/blob/branch-2.9.0/be/src/udf/uda-test-harness.h -// and modified by Doris - -#pragma once - -#include -#include -#include - -#include "udf/udf.h" -#include "udf/udf_debug.h" - -namespace doris_udf { - -enum UdaExecutionMode { - ALL = 0, - SINGLE_NODE = 1, - ONE_LEVEL = 2, - TWO_LEVEL = 3, -}; - -template -class UdaTestHarnessBase { -public: - typedef void (*InitFn)(FunctionContext* context, INTERMEDIATE* result); - - typedef void (*MergeFn)(FunctionContext* context, const INTERMEDIATE& src, INTERMEDIATE* dst); - - typedef const INTERMEDIATE (*SerializeFn)(FunctionContext* context, const INTERMEDIATE& type); - - typedef RESULT (*FinalizeFn)(FunctionContext* context, const INTERMEDIATE& value); - - // UDA test harness allows for custom comparator to validate results. UDAs - // can specify a custom comparator to, for example, tolerate numerical imprecision. - // Returns true if x and y should be treated as equal. - typedef bool (*ResultComparator)(const RESULT& x, const RESULT& y); - - void set_result_comparator(ResultComparator fn) { _result_comparator_fn = fn; } - - // This must be called if the INTERMEDIATE is TYPE_FIXED_BUFFER - void set_intermediate_size(int byte_size) { _fixed_buffer_byte_size = byte_size; } - - // Returns the failure string if any. - const std::string& get_error_msg() const { return _error_msg; } - -protected: - UdaTestHarnessBase(InitFn init_fn, MergeFn merge_fn, SerializeFn serialize_fn, - FinalizeFn finalize_fn) - : _init_fn(init_fn), - _merge_fn(merge_fn), - _serialize_fn(serialize_fn), - _finalize_fn(finalize_fn), - _result_comparator_fn(nullptr), - _num_input_values(0) {} - - // Runs the UDA in all the modes, validating the result is 'expected' each time. - bool execute(const RESULT& expected, UdaExecutionMode mode); - - // Returns false if there is an error set in the context. - bool check_context(FunctionContext* context); - - // Verifies x == y, using the custom comparator if set. - bool check_result(const RESULT& x, const RESULT& y); - - // Runs the UDA on a single node. The entire execution happens in 1 context. - // The UDA does a update on all the input values and then a finalize. - RESULT execute_single_node(); - - // Runs the UDA, simulating a single level aggregation. The values are processed - // on num_nodes + 1 contexts. There are num_nodes that do update and serialize. - // There is a final context that does merge and finalize. - RESULT execute_one_level(int num_nodes); - - // Runs the UDA, simulating a two level aggregation with num1 in the first level and - // num2 in the second. The values are processed in num1 + num2 contexts. - RESULT execute_two_level(int num1, int num2); - - virtual void update(int idx, FunctionContext* context, INTERMEDIATE* dst) = 0; - -private: - // UDA functions - InitFn _init_fn; - MergeFn _merge_fn; - SerializeFn _serialize_fn; - FinalizeFn _finalize_fn; - - // Customer comparator, nullptr if default == should be used. - ResultComparator _result_comparator_fn; - - // Set during execute() by subclass - int _num_input_values; - - // Buffer len for intermediate results if the type is TYPE_FIXED_BUFFER - int _fixed_buffer_byte_size; - - // Error message if anything went wrong during the execution. - std::string _error_msg; -}; - -template -class UdaTestHarness : public UdaTestHarnessBase { -public: - typedef void (*UpdateFn)(FunctionContext* context, const INPUT& input, INTERMEDIATE* result); - - typedef UdaTestHarnessBase BaseClass; - - UdaTestHarness(typename BaseClass::InitFn init_fn, UpdateFn update_fn, - typename BaseClass::MergeFn merge_fn, - typename BaseClass::SerializeFn serialize_fn, - typename BaseClass::FinalizeFn finalize_fn) - : BaseClass(init_fn, merge_fn, serialize_fn, finalize_fn), _update_fn(update_fn) {} - - bool execute(const std::vector& values, const RESULT& expected) { - return execute(values, expected, ALL); - } - // Runs the UDA in all the modes, validating the result is 'expected' each time. - bool execute(const std::vector& values, const RESULT& expected, UdaExecutionMode mode); - - // Runs the UDA in all the modes, validating the result is 'expected' each time. - // T needs to be compatible (i.e. castable to) with INPUT - template - bool execute(const std::vector& values, const RESULT& expected) { - return execute(values, expected, ALL); - } - template - bool execute(const std::vector& values, const RESULT& expected, UdaExecutionMode mode) { - _input.resize(values.size()); - BaseClass::_num_input_values = _input.size(); - - for (int i = 0; i < values.size(); ++i) { - _input[i] = &values[i]; - } - - return BaseClass::execute(expected, mode); - } - -protected: - virtual void update(int idx, FunctionContext* context, INTERMEDIATE* dst); - -private: - UpdateFn _update_fn; - // Set during execute() - std::vector _input; -}; - -template -class UdaTestHarness2 : public UdaTestHarnessBase { -public: - typedef void (*UpdateFn)(FunctionContext* context, const INPUT1& input1, const INPUT2& input2, - INTERMEDIATE* result); - - typedef UdaTestHarnessBase BaseClass; - - ~UdaTestHarness2() {} - UdaTestHarness2(typename BaseClass::InitFn init_fn, UpdateFn update_fn, - typename BaseClass::MergeFn merge_fn, - typename BaseClass::SerializeFn serialize_fn, - typename BaseClass::FinalizeFn finalize_fn) - : BaseClass(init_fn, merge_fn, serialize_fn, finalize_fn), _update_fn(update_fn) {} - - // Runs the UDA in all the modes, validating the result is 'expected' each time. - bool execute(const std::vector& values1, const std::vector& values2, - const RESULT& expected, UdaExecutionMode mode); - - bool execute(const std::vector& values1, const std::vector& values2, - const RESULT& expected) { - return execute(values1, values2, expected, ALL); - } - -protected: - virtual void update(int idx, FunctionContext* context, INTERMEDIATE* dst); - -private: - UpdateFn _update_fn; - const std::vector* _input1; - const std::vector* _input2; -}; - -template -class UdaTestHarness3 : public UdaTestHarnessBase { -public: - typedef void (*UpdateFn)(FunctionContext* context, const INPUT1& input1, const INPUT2& input2, - const INPUT3& input3, INTERMEDIATE* result); - - typedef UdaTestHarnessBase BaseClass; - - ~UdaTestHarness3() {} - UdaTestHarness3(typename BaseClass::InitFn init_fn, UpdateFn update_fn, - typename BaseClass::MergeFn merge_fn, - typename BaseClass::SerializeFn serialize_fn, - typename BaseClass::FinalizeFn finalize_fn) - : BaseClass(init_fn, merge_fn, serialize_fn, finalize_fn), _update_fn(update_fn) {} - - // Runs the UDA in all the modes, validating the result is 'expected' each time. - bool execute(const std::vector& values1, const std::vector& values2, - const std::vector& values3, const RESULT& expected) { - return execute(values1, values2, values3, expected, ALL); - } - // Runs the UDA in all the modes, validating the result is 'expected' each time. - bool execute(const std::vector& values1, const std::vector& values2, - const std::vector& values3, const RESULT& expected, UdaExecutionMode mode); - -protected: - virtual void update(int idx, FunctionContext* context, INTERMEDIATE* dst); - -private: - UpdateFn _update_fn; - const std::vector* _input1; - const std::vector* _input2; - const std::vector* _input3; -}; - -template -class UdaTestHarness4 : public UdaTestHarnessBase { -public: - typedef void (*UpdateFn)(FunctionContext* context, const INPUT1& input1, const INPUT2& input2, - const INPUT3& input3, const INPUT4& input4, INTERMEDIATE* result); - - typedef UdaTestHarnessBase BaseClass; - - ~UdaTestHarness4() {} - UdaTestHarness4(typename BaseClass::InitFn init_fn, UpdateFn update_fn, - typename BaseClass::MergeFn merge_fn, - typename BaseClass::SerializeFn serialize_fn, - typename BaseClass::FinalizeFn finalize_fn) - : BaseClass(init_fn, merge_fn, serialize_fn, finalize_fn), _update_fn(update_fn) {} - - // Runs the UDA in all the modes, validating the result is 'expected' each time. - bool execute(const std::vector& values1, const std::vector& values2, - const std::vector& values3, const std::vector& values4, - const RESULT& expected) { - return execute(values1, values2, values3, values4, expected, ALL); - } - // Runs the UDA in all the modes, validating the result is 'expected' each time. - bool execute(const std::vector& values1, const std::vector& values2, - const std::vector& values3, const std::vector& values4, - const RESULT& expected, UdaExecutionMode mode); - -protected: - virtual void update(int idx, FunctionContext* context, INTERMEDIATE* dst); - -private: - UpdateFn _update_fn; - const std::vector* _input1; - const std::vector* _input2; - const std::vector* _input3; - const std::vector* _input4; -}; - -} // namespace doris_udf - -#include "udf/uda_test_harness_impl.hpp" diff --git a/be/src/udf/udf.cpp b/be/src/udf/udf.cpp index f7b3c6f552..d547e60432 100644 --- a/be/src/udf/udf.cpp +++ b/be/src/udf/udf.cpp @@ -34,6 +34,7 @@ // binary. For example, it would be unfortunate if they had a random dependency // on libhdfs. #include "runtime/runtime_state.h" +#include "runtime/types.h" #include "udf/udf_internal.h" #include "util/debug_util.h" @@ -41,7 +42,6 @@ namespace doris { FunctionContextImpl::FunctionContextImpl() : _state(nullptr), - _version(doris_udf::FunctionContext::V2_0), _num_warnings(0), _thread_local_fn_state(nullptr), _fragment_local_fn_state(nullptr) {} @@ -51,43 +51,36 @@ void FunctionContextImpl::set_constant_cols( _constant_cols = constant_cols; } -doris_udf::FunctionContext* FunctionContextImpl::create_context( - RuntimeState* state, const doris_udf::FunctionContext::TypeDesc& return_type, - const std::vector& arg_types) { - auto* ctx = new doris_udf::FunctionContext(); +std::unique_ptr FunctionContextImpl::create_context( + RuntimeState* state, const doris::TypeDescriptor& return_type, + const std::vector& arg_types) { + auto ctx = std::unique_ptr(new doris::FunctionContext()); ctx->_impl->_state = state; ctx->_impl->_return_type = return_type; ctx->_impl->_arg_types = arg_types; return ctx; } -FunctionContext* FunctionContextImpl::clone() { - doris_udf::FunctionContext* new_context = create_context(_state, _return_type, _arg_types); +std::unique_ptr FunctionContextImpl::clone() { + auto new_context = create_context(_state, _return_type, _arg_types); new_context->_impl->_constant_cols = _constant_cols; new_context->_impl->_fragment_local_fn_state = _fragment_local_fn_state; return new_context; } +const doris::TypeDescriptor& FunctionContextImpl::get_return_type() const { + return _return_type; +} + } // namespace doris -namespace doris_udf { +namespace doris { static const int MAX_WARNINGS = 1000; FunctionContext::FunctionContext() { _impl = std::make_unique(); } -FunctionContext::DorisVersion FunctionContext::version() const { - return _impl->_version; -} - -FunctionContext::UniqueId FunctionContext::query_id() const { - UniqueId id; - id.hi = _impl->_state->query_id().hi; - id.lo = _impl->_state->query_id().lo; - return id; -} - void FunctionContext::set_function_state(FunctionStateScope scope, std::shared_ptr ptr) { switch (scope) { case THREAD_LOCAL: @@ -131,7 +124,7 @@ bool FunctionContext::add_warning(const char* warning_msg) { } } -const FunctionContext::TypeDesc* FunctionContext::get_arg_type(int arg_idx) const { +const doris::TypeDescriptor* FunctionContext::get_arg_type(int arg_idx) const { if (arg_idx < 0 || arg_idx >= _impl->_arg_types.size()) { return nullptr; } @@ -156,7 +149,7 @@ int FunctionContext::get_num_args() const { return _impl->_arg_types.size(); } -const FunctionContext::TypeDesc& FunctionContext::get_return_type() const { +const doris::TypeDescriptor& FunctionContext::get_return_type() const { return _impl->_return_type; } @@ -180,4 +173,4 @@ StringVal FunctionContext::create_temp_string_val(int64_t len) { std::ostream& operator<<(std::ostream& os, const StringVal& string_val) { return os << string_val.to_string(); } -} // namespace doris_udf +} // namespace doris diff --git a/be/src/udf/udf.h b/be/src/udf/udf.h index d92ac31dae..093048372c 100644 --- a/be/src/udf/udf.h +++ b/be/src/udf/udf.h @@ -27,11 +27,8 @@ #include #include #include - -// This is the only Doris header required to develop UDFs and UDAs. This header -// contains the types that need to be used and the FunctionContext object. The context -// object serves as the interface object between the UDF/UDA and the doris process. namespace doris { + class FunctionContextImpl; struct ColumnPtrWrapper; struct StringRef; @@ -39,10 +36,7 @@ class BitmapValue; class DecimalV2Value; class DateTimeValue; class CollectionValue; -} // namespace doris - -namespace doris_udf { - +struct TypeDescriptor; // All input and output values will be one of the structs below. The struct is a simple // object containing a boolean to store if the value is nullptr and the value itself. The // value is unspecified if the nullptr boolean is set. @@ -56,64 +50,6 @@ struct DecimalV2Val; // and manage memory. class FunctionContext { public: - enum DorisVersion { - V2_0, - }; - - enum Type { - INVALID_TYPE = 0, - TYPE_NULL, - TYPE_BOOLEAN, - TYPE_TINYINT, - TYPE_SMALLINT, - TYPE_INT, - TYPE_BIGINT, - TYPE_LARGEINT, - TYPE_FLOAT, - TYPE_DOUBLE, - TYPE_DECIMAL [[deprecated]], - TYPE_DATE, - TYPE_DATETIME, - TYPE_CHAR, - TYPE_VARCHAR, - TYPE_HLL, - TYPE_STRING, - TYPE_FIXED_BUFFER, - TYPE_DECIMALV2, - TYPE_OBJECT, - TYPE_ARRAY, - TYPE_MAP, - TYPE_STRUCT, - TYPE_QUANTILE_STATE, - TYPE_DATEV2, - TYPE_DATETIMEV2, - TYPE_TIMEV2, - TYPE_DECIMAL32, - TYPE_DECIMAL64, - TYPE_DECIMAL128I, - TYPE_JSONB, - TYPE_VARIANT - }; - - struct TypeDesc { - Type type; - - /// Only valid if type == TYPE_DECIMAL - int precision; - int scale; - - /// Only valid if type == TYPE_FIXED_BUFFER || type == TYPE_VARCHAR - int len; - - // only valid if type == TYPE_ARRAY - std::vector children; - }; - - struct UniqueId { - int64_t hi; - int64_t lo; - }; - enum FunctionStateScope { /// Indicates that the function state for this FunctionContext's UDF is shared across /// the plan fragment (a query is divided into multiple plan fragments, each of which @@ -136,12 +72,6 @@ public: THREAD_LOCAL, }; - // Returns the version of Doris that's currently running. - DorisVersion version() const; - - // Returns the query_id for the current query. - UniqueId query_id() const; - // Sets an error for this UDF. If this is called, this will trigger the // query to fail. // Note: when you set error for the UDFs used in Data Load, you should @@ -177,7 +107,7 @@ public: // Returns the return type information of this function. For UDAs, this is the final // return type of the UDA (e.g., the type returned by the finalize function). - const TypeDesc& get_return_type() const; + const doris::TypeDescriptor& get_return_type() const; // Returns the number of arguments to this function (not including the FunctionContext* // argument). @@ -185,7 +115,7 @@ public: // Returns the type information for the arg_idx-th argument (0-indexed, not including // the FunctionContext* argument). Returns nullptr if arg_idx is invalid. - const TypeDesc* get_arg_type(int arg_idx) const; + const doris::TypeDescriptor* get_arg_type(int arg_idx) const; // Returns true if the arg_idx-th input argument (0 indexed, not including the // FunctionContext* argument) is a constant (e.g. 5, "string", 1 + 1). @@ -430,10 +360,10 @@ struct DecimalV2Val : public AnyVal { bool operator!=(const DecimalV2Val& other) const { return !(*this == other); } }; -using doris_udf::BigIntVal; -using doris_udf::DoubleVal; -using doris_udf::StringVal; -using doris_udf::DecimalV2Val; -using doris_udf::DateTimeVal; -using doris_udf::FunctionContext; -} // namespace doris_udf +using doris::BigIntVal; +using doris::DoubleVal; +using doris::StringVal; +using doris::DecimalV2Val; +using doris::DateTimeVal; +using doris::FunctionContext; +} // namespace doris diff --git a/be/src/udf/udf_debug.h b/be/src/udf/udf_debug.h deleted file mode 100644 index 3cdbd3aecc..0000000000 --- a/be/src/udf/udf_debug.h +++ /dev/null @@ -1,50 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. -// This file is copied from -// https://github.com/apache/impala/blob/branch-2.9.0/be/src/udf/udf-debug.h -// and modified by Doris - -#pragma once - -#include -#include - -#include "udf/udf.h" - -namespace doris_udf { - -template -std::string debug_string(const T& val) { - if (val.is_null) { - return "NULL"; - } - - std::stringstream ss; - ss << val.val; - return ss.str(); -} - -template <> -std::string debug_string(const StringVal& val) { - if (val.is_null) { - return "NULL"; - } - - return std::string(reinterpret_cast(val.ptr), val.len); -} - -} // namespace doris_udf diff --git a/be/src/udf/udf_internal.h b/be/src/udf/udf_internal.h index afc2e3ca03..5cdae856b5 100644 --- a/be/src/udf/udf_internal.h +++ b/be/src/udf/udf_internal.h @@ -27,12 +27,14 @@ #include #include +#include "runtime/types.h" #include "udf/udf.h" namespace doris { class RuntimeState; struct ColumnPtrWrapper; +struct TypeDescriptor; // This class actually implements the interface of FunctionContext. This is split to // hide the details from the external header. @@ -41,9 +43,9 @@ class FunctionContextImpl { public: /// Create a FunctionContext for a UDA. Identical to the UDF version except for the /// intermediate type. Caller is responsible for deleting it. - static doris_udf::FunctionContext* create_context( - RuntimeState* state, const doris_udf::FunctionContext::TypeDesc& return_type, - const std::vector& arg_types); + static std::unique_ptr create_context( + RuntimeState* state, const doris::TypeDescriptor& return_type, + const std::vector& arg_types); ~FunctionContextImpl() {} @@ -52,7 +54,7 @@ public: /// Returns a new FunctionContext with the same constant args, fragment-local state, and /// debug flag as this FunctionContext. The caller is responsible for calling delete on /// it. - doris_udf::FunctionContext* clone(); + std::unique_ptr clone(); void set_constant_cols(const std::vector>& cols); @@ -60,7 +62,7 @@ public: std::string& string_result() { return _string_result; } - const doris_udf::FunctionContext::TypeDesc& get_return_type() const { return _return_type; } + const doris::TypeDescriptor& get_return_type() const; bool check_overflow_for_decimal() const { return _check_overflow_for_decimal; } @@ -69,14 +71,12 @@ public: } private: - friend class doris_udf::FunctionContext; + friend class doris::FunctionContext; // We use the query's runtime state to report errors and warnings. nullptr for test // contexts. RuntimeState* _state; - doris_udf::FunctionContext::DorisVersion _version; - // Empty if there's no error std::string _error_msg; @@ -88,10 +88,10 @@ private: std::shared_ptr _fragment_local_fn_state; // Type descriptor for the return type of the function. - doris_udf::FunctionContext::TypeDesc _return_type; + doris::TypeDescriptor _return_type; // Type descriptors for each argument of the function. - std::vector _arg_types; + std::vector _arg_types; std::vector> _constant_cols; diff --git a/be/src/util/bitmap_value.h b/be/src/util/bitmap_value.h index 1e263d594e..d95ff00f1d 100644 --- a/be/src/util/bitmap_value.h +++ b/be/src/util/bitmap_value.h @@ -1628,14 +1628,14 @@ public: return true; } - doris_udf::BigIntVal minimum() const { + doris::BigIntVal minimum() const { switch (_type) { case SINGLE: - return doris_udf::BigIntVal(_sv); + return doris::BigIntVal(_sv); case BITMAP: - return doris_udf::BigIntVal(_bitmap.minimum()); + return doris::BigIntVal(_bitmap.minimum()); default: - return doris_udf::BigIntVal::null(); + return doris::BigIntVal::null(); } } @@ -1673,14 +1673,14 @@ public: return ss.str(); } - doris_udf::BigIntVal maximum() const { + doris::BigIntVal maximum() const { switch (_type) { case SINGLE: - return doris_udf::BigIntVal(_sv); + return doris::BigIntVal(_sv); case BITMAP: - return doris_udf::BigIntVal(_bitmap.maximum()); + return doris::BigIntVal(_bitmap.maximum()); default: - return doris_udf::BigIntVal::null(); + return doris::BigIntVal::null(); } } diff --git a/be/src/util/counts.h b/be/src/util/counts.h index 5f4e9fe30e..0818dbeed3 100644 --- a/be/src/util/counts.h +++ b/be/src/util/counts.h @@ -107,9 +107,9 @@ public: return (higher - position) * lower_key + (position - lower) * higher_key; } - doris_udf::DoubleVal terminate(double quantile) const { + doris::DoubleVal terminate(double quantile) const { if (_counts.empty()) { - return doris_udf::DoubleVal::null(); + return doris::DoubleVal::null(); } std::vector> elems(_counts.begin(), _counts.end()); @@ -126,7 +126,7 @@ public: long max_position = total - 1; double position = max_position * quantile; - return doris_udf::DoubleVal(get_percentile(elems, position)); + return doris::DoubleVal(get_percentile(elems, position)); } private: diff --git a/be/src/vec/common/string_ref.h b/be/src/vec/common/string_ref.h index 61cdcd8ea0..158d7ed5de 100644 --- a/be/src/vec/common/string_ref.h +++ b/be/src/vec/common/string_ref.h @@ -193,7 +193,6 @@ inline int string_compare(const char* s1, int64_t n1, const char* s2, int64_t n2 } // unnamed namespace -using namespace doris_udf; /// The thing to avoid creating strings to find substrings in the hash table. /// User should make sure data source is const. /// maybe considering rewrite it with std::span / std::basic_string_view is meaningful. diff --git a/be/src/vec/exec/format/parquet/schema_desc.cpp b/be/src/vec/exec/format/parquet/schema_desc.cpp index 3f1cb04160..c7ff2353d4 100644 --- a/be/src/vec/exec/format/parquet/schema_desc.cpp +++ b/be/src/vec/exec/format/parquet/schema_desc.cpp @@ -18,6 +18,7 @@ #include "schema_desc.h" #include "common/logging.h" +#include "util/slice.h" namespace doris::vectorized { diff --git a/be/src/vec/exec/scan/new_es_scanner.cpp b/be/src/vec/exec/scan/new_es_scanner.cpp index 6e5a9a266b..18233dde6c 100644 --- a/be/src/vec/exec/scan/new_es_scanner.cpp +++ b/be/src/vec/exec/scan/new_es_scanner.cpp @@ -42,10 +42,7 @@ NewEsScanner::NewEsScanner(RuntimeState* state, NewEsScanNode* parent, int64_t l Status NewEsScanner::prepare(RuntimeState* state, VExprContext** vconjunct_ctx_ptr) { VLOG_CRITICAL << NEW_SCANNER_TYPE << "::prepare"; - if (vconjunct_ctx_ptr != nullptr) { - // Copy vconjunct_ctx_ptr from scan node to this scanner's _vconjunct_ctx. - RETURN_IF_ERROR((*vconjunct_ctx_ptr)->clone(_state, &_vconjunct_ctx)); - } + RETURN_IF_ERROR(VScanner::prepare(_state, vconjunct_ctx_ptr)); if (_is_init) { return Status::OK(); diff --git a/be/src/vec/exec/scan/new_olap_scan_node.cpp b/be/src/vec/exec/scan/new_olap_scan_node.cpp index ae91bca981..4ad652f088 100644 --- a/be/src/vec/exec/scan/new_olap_scan_node.cpp +++ b/be/src/vec/exec/scan/new_olap_scan_node.cpp @@ -294,7 +294,7 @@ Status NewOlapScanNode::_build_key_ranges_and_filters() { Status NewOlapScanNode::_should_push_down_function_filter(VectorizedFnCall* fn_call, VExprContext* expr_ctx, StringVal* constant_str, - doris_udf::FunctionContext** fn_ctx, + doris::FunctionContext** fn_ctx, VScanNode::PushDownType& pdt) { // Now only `like` function filters is supported to push down if (fn_call->fn().name.function_name != "like") { @@ -303,7 +303,7 @@ Status NewOlapScanNode::_should_push_down_function_filter(VectorizedFnCall* fn_c } const auto& children = fn_call->children(); - doris_udf::FunctionContext* func_cxt = expr_ctx->fn_context(fn_call->fn_context_index()); + doris::FunctionContext* func_cxt = expr_ctx->fn_context(fn_call->fn_context_index()); DCHECK(func_cxt != nullptr); DCHECK(children.size() == 2); for (size_t i = 0; i < children.size(); i++) { diff --git a/be/src/vec/exec/scan/new_olap_scan_node.h b/be/src/vec/exec/scan/new_olap_scan_node.h index b179f31060..9ec95e6d0a 100644 --- a/be/src/vec/exec/scan/new_olap_scan_node.h +++ b/be/src/vec/exec/scan/new_olap_scan_node.h @@ -46,7 +46,7 @@ protected: Status _should_push_down_function_filter(VectorizedFnCall* fn_call, VExprContext* expr_ctx, StringVal* constant_str, - doris_udf::FunctionContext** fn_ctx, + doris::FunctionContext** fn_ctx, PushDownType& pdt) override; PushDownType _should_push_down_bloom_filter() override { return PushDownType::ACCEPTABLE; } diff --git a/be/src/vec/exec/scan/new_olap_scanner.cpp b/be/src/vec/exec/scan/new_olap_scanner.cpp index 23d448ad0a..80af93eff2 100644 --- a/be/src/vec/exec/scan/new_olap_scanner.cpp +++ b/be/src/vec/exec/scan/new_olap_scanner.cpp @@ -52,10 +52,7 @@ Status NewOlapScanner::prepare(const TPaloScanRange& scan_range, const std::vector& filters, const FilterPredicates& filter_predicates, const std::vector& function_filters) { - if (vconjunct_ctx_ptr != nullptr) { - // Copy vconjunct_ctx_ptr from scan node to this scanner's _vconjunct_ctx. - RETURN_IF_ERROR((*vconjunct_ctx_ptr)->clone(_state, &_vconjunct_ctx)); - } + RETURN_IF_ERROR(VScanner::prepare(_state, vconjunct_ctx_ptr)); // set limit to reduce end of rowset and segment mem use _tablet_reader = std::make_unique(); diff --git a/be/src/vec/exec/scan/vfile_scanner.cpp b/be/src/vec/exec/scan/vfile_scanner.cpp index 066760c5fe..72522119f3 100644 --- a/be/src/vec/exec/scan/vfile_scanner.cpp +++ b/be/src/vec/exec/scan/vfile_scanner.cpp @@ -60,6 +60,7 @@ VFileScanner::VFileScanner(RuntimeState* state, NewFileScanNode* parent, int64_t Status VFileScanner::prepare( VExprContext** vconjunct_ctx_ptr, std::unordered_map* colname_to_value_range) { + RETURN_IF_ERROR(VScanner::prepare(_state, vconjunct_ctx_ptr)); _colname_to_value_range = colname_to_value_range; _get_block_timer = ADD_TIMER(_parent->_scanner_profile, "FileScannerGetBlockTime"); @@ -79,11 +80,6 @@ Status VFileScanner::prepare( _io_ctx->query_id = &_state->query_id(); _io_ctx->enable_file_cache = _state->query_options().enable_file_cache; - if (vconjunct_ctx_ptr != nullptr) { - // Copy vconjunct_ctx_ptr from scan node to this scanner's _vconjunct_ctx. - RETURN_IF_ERROR((*vconjunct_ctx_ptr)->clone(_state, &_vconjunct_ctx)); - } - if (_is_load) { _src_block_mem_reuse = true; _src_row_desc.reset(new RowDescriptor(_state->desc_tbl(), diff --git a/be/src/vec/exec/scan/vmeta_scanner.cpp b/be/src/vec/exec/scan/vmeta_scanner.cpp index 89bd558ae6..86e60edf11 100644 --- a/be/src/vec/exec/scan/vmeta_scanner.cpp +++ b/be/src/vec/exec/scan/vmeta_scanner.cpp @@ -44,10 +44,7 @@ Status VMetaScanner::open(RuntimeState* state) { Status VMetaScanner::prepare(RuntimeState* state, VExprContext** vconjunct_ctx_ptr) { VLOG_CRITICAL << "VMetaScanner::prepare"; - if (vconjunct_ctx_ptr != nullptr) { - // Copy vconjunct_ctx_ptr from scan node to this scanner's _vconjunct_ctx. - RETURN_IF_ERROR((*vconjunct_ctx_ptr)->clone(_state, &_vconjunct_ctx)); - } + RETURN_IF_ERROR(VScanner::prepare(_state, vconjunct_ctx_ptr)); _tuple_desc = state->desc_tbl().get_tuple_descriptor(_tuple_id); if (_scan_range.meta_scan_range.__isset.iceberg_params) { RETURN_IF_ERROR(_fetch_iceberg_metadata_batch()); diff --git a/be/src/vec/exec/scan/vscan_node.cpp b/be/src/vec/exec/scan/vscan_node.cpp index d10cf98987..27edacfb4c 100644 --- a/be/src/vec/exec/scan/vscan_node.cpp +++ b/be/src/vec/exec/scan/vscan_node.cpp @@ -355,7 +355,6 @@ Status VScanNode::_append_rf_into_conjuncts(std::vector& vexprs) { RETURN_IF_ERROR(new_vconjunct_ctx_ptr->prepare(_state, _row_descriptor)); RETURN_IF_ERROR(new_vconjunct_ctx_ptr->open(_state)); if (_vconjunct_ctx_ptr) { - (*_vconjunct_ctx_ptr)->mark_as_stale(); _stale_vexpr_ctxs.push_back(std::move(_vconjunct_ctx_ptr)); } _vconjunct_ctx_ptr.reset(new doris::vectorized::VExprContext*); @@ -459,7 +458,6 @@ Status VScanNode::_normalize_conjuncts() { if (new_root) { (*_vconjunct_ctx_ptr)->set_root(new_root); } else { - (*_vconjunct_ctx_ptr)->mark_as_stale(); _stale_vexpr_ctxs.push_back(std::move(_vconjunct_ctx_ptr)); _vconjunct_ctx_ptr.reset(nullptr); } @@ -640,7 +638,7 @@ Status VScanNode::_normalize_function_filters(VExpr* expr, VExprContext* expr_ct } if (TExprNodeType::FUNCTION_CALL == fn_expr->node_type()) { - doris_udf::FunctionContext* fn_ctx = nullptr; + doris::FunctionContext* fn_ctx = nullptr; StringVal val; PushDownType temp_pdt; RETURN_IF_ERROR(_should_push_down_function_filter( diff --git a/be/src/vec/exec/scan/vscan_node.h b/be/src/vec/exec/scan/vscan_node.h index 476c7c37da..4fb023a5dc 100644 --- a/be/src/vec/exec/scan/vscan_node.h +++ b/be/src/vec/exec/scan/vscan_node.h @@ -158,7 +158,7 @@ protected: virtual Status _should_push_down_function_filter(VectorizedFnCall* fn_call, VExprContext* expr_ctx, StringVal* constant_str, - doris_udf::FunctionContext** fn_ctx, + doris::FunctionContext** fn_ctx, PushDownType& pdt) { pdt = PushDownType::UNACCEPTABLE; return Status::OK(); diff --git a/be/src/vec/exec/scan/vscanner.cpp b/be/src/vec/exec/scan/vscanner.cpp index 9af349caf8..68dbbbfe9e 100644 --- a/be/src/vec/exec/scan/vscanner.cpp +++ b/be/src/vec/exec/scan/vscanner.cpp @@ -33,6 +33,14 @@ VScanner::VScanner(RuntimeState* state, VScanNode* parent, int64_t limit, Runtim _is_load = (_input_tuple_desc != nullptr); } +Status VScanner::prepare(RuntimeState* state, VExprContext** vconjunct_ctx_ptr) { + if (vconjunct_ctx_ptr != nullptr) { + // Copy vconjunct_ctx_ptr from scan node to this scanner's _vconjunct_ctx. + RETURN_IF_ERROR((*vconjunct_ctx_ptr)->clone(_state, &_vconjunct_ctx)); + } + return Status::OK(); +} + Status VScanner::get_block(RuntimeState* state, Block* block, bool* eof) { // only empty block should be here DCHECK(block->rows() == 0); diff --git a/be/src/vec/exec/scan/vscanner.h b/be/src/vec/exec/scan/vscanner.h index 86c9cd60e5..e36968e6f9 100644 --- a/be/src/vec/exec/scan/vscanner.h +++ b/be/src/vec/exec/scan/vscanner.h @@ -57,6 +57,9 @@ protected: // Filter the output block finally. Status _filter_output_block(Block* block); + // Not virtual, all child will call this method explictly + Status prepare(RuntimeState* state, VExprContext** vconjunct_ctx_ptr); + public: VScanNode* get_parent() { return _parent; } @@ -117,7 +120,6 @@ public: protected: void _discard_conjuncts() { if (_vconjunct_ctx) { - _vconjunct_ctx->mark_as_stale(); _stale_vexpr_ctxs.push_back(_vconjunct_ctx); _vconjunct_ctx = nullptr; } diff --git a/be/src/vec/exprs/vexpr.cpp b/be/src/vec/exprs/vexpr.cpp index 786bb9375e..be6900bc1c 100644 --- a/be/src/vec/exprs/vexpr.cpp +++ b/be/src/vec/exprs/vexpr.cpp @@ -292,124 +292,6 @@ Status VExpr::clone_if_not_exists(const std::vector& ctxs, Runtim return Status::OK(); } -FunctionContext::TypeDesc VExpr::column_type_to_type_desc(const TypeDescriptor& type) { - FunctionContext::TypeDesc out; - switch (type.type) { - case TYPE_BOOLEAN: - out.type = FunctionContext::TYPE_BOOLEAN; - break; - case TYPE_TINYINT: - out.type = FunctionContext::TYPE_TINYINT; - break; - case TYPE_SMALLINT: - out.type = FunctionContext::TYPE_SMALLINT; - break; - case TYPE_INT: - out.type = FunctionContext::TYPE_INT; - break; - case TYPE_BIGINT: - out.type = FunctionContext::TYPE_BIGINT; - break; - case TYPE_LARGEINT: - out.type = FunctionContext::TYPE_LARGEINT; - break; - case TYPE_FLOAT: - out.type = FunctionContext::TYPE_FLOAT; - break; - case TYPE_TIME: - case TYPE_TIMEV2: - case TYPE_DOUBLE: - out.type = FunctionContext::TYPE_DOUBLE; - break; - case TYPE_DATE: - out.type = FunctionContext::TYPE_DATE; - break; - case TYPE_DATETIME: - out.type = FunctionContext::TYPE_DATETIME; - break; - case TYPE_DATEV2: - out.type = FunctionContext::TYPE_DATEV2; - break; - case TYPE_DATETIMEV2: - out.type = FunctionContext::TYPE_DATETIMEV2; - break; - case TYPE_DECIMAL32: - out.type = FunctionContext::TYPE_DECIMAL32; - out.precision = type.precision; - out.scale = type.scale; - break; - case TYPE_DECIMAL64: - out.type = FunctionContext::TYPE_DECIMAL64; - out.precision = type.precision; - out.scale = type.scale; - break; - case TYPE_DECIMAL128I: - out.type = FunctionContext::TYPE_DECIMAL128I; - out.precision = type.precision; - out.scale = type.scale; - break; - case TYPE_VARCHAR: - out.type = FunctionContext::TYPE_VARCHAR; - out.len = type.len; - break; - case TYPE_HLL: - out.type = FunctionContext::TYPE_HLL; - out.len = type.len; - break; - case TYPE_OBJECT: - out.type = FunctionContext::TYPE_OBJECT; - break; - case TYPE_QUANTILE_STATE: - out.type = FunctionContext::TYPE_QUANTILE_STATE; - break; - case TYPE_CHAR: - out.type = FunctionContext::TYPE_CHAR; - out.len = type.len; - break; - case TYPE_DECIMALV2: - out.type = FunctionContext::TYPE_DECIMALV2; - // out.precision = type.precision; - // out.scale = type.scale; - break; - case TYPE_NULL: - out.type = FunctionContext::TYPE_NULL; - break; - case TYPE_ARRAY: - out.type = FunctionContext::TYPE_ARRAY; - for (const auto& t : type.children) { - out.children.push_back(VExpr::column_type_to_type_desc(t)); - } - break; - case TYPE_MAP: - CHECK(type.children.size() == 2); - // only support map key is scalar - CHECK(!type.children[0].is_complex_type()); - out.type = FunctionContext::TYPE_MAP; - for (const auto& t : type.children) { - out.children.push_back(VExpr::column_type_to_type_desc(t)); - } - break; - case TYPE_STRUCT: - CHECK(type.children.size() >= 1); - out.type = FunctionContext::TYPE_STRUCT; - for (const auto& t : type.children) { - out.children.push_back(VExpr::column_type_to_type_desc(t)); - } - break; - case TYPE_STRING: - out.type = FunctionContext::TYPE_STRING; - out.len = type.len; - break; - case TYPE_JSONB: - out.type = FunctionContext::TYPE_JSONB; - out.len = type.len; - break; - default: - DCHECK(false) << "Unknown type: " << type; - } - return out; -} - std::string VExpr::debug_string() const { // TODO: implement partial debug string for member vars std::stringstream out; @@ -479,13 +361,12 @@ Status VExpr::get_const_col(VExprContext* context, } void VExpr::register_function_context(doris::RuntimeState* state, VExprContext* context) { - FunctionContext::TypeDesc return_type = VExpr::column_type_to_type_desc(_type); - std::vector arg_types; + std::vector arg_types; for (int i = 0; i < _children.size(); ++i) { - arg_types.push_back(VExpr::column_type_to_type_desc(_children[i]->type())); + arg_types.push_back(_children[i]->type()); } - _fn_context_index = context->register_func(state, return_type, arg_types); + _fn_context_index = context->register_function_context(state, _type, arg_types); } Status VExpr::init_function_context(VExprContext* context, @@ -511,7 +392,7 @@ Status VExpr::init_function_context(VExprContext* context, void VExpr::close_function_context(VExprContext* context, FunctionContext::FunctionStateScope scope, const FunctionBasePtr& function) const { - if (_fn_context_index != -1 && !context->_stale) { + if (_fn_context_index != -1) { FunctionContext* fn_ctx = context->fn_context(_fn_context_index); function->close(fn_ctx, FunctionContext::THREAD_LOCAL); if (scope == FunctionContext::FRAGMENT_LOCAL) { diff --git a/be/src/vec/exprs/vexpr.h b/be/src/vec/exprs/vexpr.h index 1eb527c600..b5e08d8499 100644 --- a/be/src/vec/exprs/vexpr.h +++ b/be/src/vec/exprs/vexpr.h @@ -180,7 +180,6 @@ public: } protected: - static FunctionContext::TypeDesc column_type_to_type_desc(const TypeDescriptor& type); /// Simple debug string that provides no expr subclass-specific information std::string debug_string(const std::string& expr_name) const { std::stringstream out; diff --git a/be/src/vec/exprs/vexpr_context.cpp b/be/src/vec/exprs/vexpr_context.cpp index 2103e63391..f8cab77906 100644 --- a/be/src/vec/exprs/vexpr_context.cpp +++ b/be/src/vec/exprs/vexpr_context.cpp @@ -28,15 +28,12 @@ VExprContext::VExprContext(VExpr* expr) _prepared(false), _opened(false), _closed(false), - _last_result_column_id(-1), - _stale(false) {} + _last_result_column_id(-1) {} VExprContext::~VExprContext() { + // Do not delete this code, this code here is used to check if forget to close the opened context + // Or there will be memory leak DCHECK(!_prepared || _closed) << get_stack_trace(); - - for (int i = 0; i < _fn_contexts.size(); ++i) { - delete _fn_contexts[i]; - } } doris::Status VExprContext::execute(doris::vectorized::Block* block, int* result_column_id) { @@ -95,8 +92,9 @@ void VExprContext::clone_fn_contexts(VExprContext* other) { } } -int VExprContext::register_func(RuntimeState* state, const FunctionContext::TypeDesc& return_type, - const std::vector& arg_types) { +int VExprContext::register_function_context(RuntimeState* state, + const doris::TypeDescriptor& return_type, + const std::vector& arg_types) { _fn_contexts.push_back(FunctionContextImpl::create_context(state, return_type, arg_types)); _fn_contexts.back()->impl()->set_check_overflow_for_decimal( state->check_overflow_for_decimal()); diff --git a/be/src/vec/exprs/vexpr_context.h b/be/src/vec/exprs/vexpr_context.h index 4c7d8e039f..e1b9bf69d7 100644 --- a/be/src/vec/exprs/vexpr_context.h +++ b/be/src/vec/exprs/vexpr_context.h @@ -41,15 +41,15 @@ public: /// retrieve the created context. Exprs that need a FunctionContext should call this in /// Prepare() and save the returned index. 'varargs_buffer_size', if specified, is the /// size of the varargs buffer in the created FunctionContext (see udf-internal.h). - int register_func(RuntimeState* state, const FunctionContext::TypeDesc& return_type, - const std::vector& arg_types); + int register_function_context(RuntimeState* state, const doris::TypeDescriptor& return_type, + const std::vector& arg_types); /// Retrieves a registered FunctionContext. 'i' is the index returned by the call to - /// register_func(). This should only be called by VExprs. + /// register_function_context(). This should only be called by VExprs. FunctionContext* fn_context(int i) { DCHECK_GE(i, 0); DCHECK_LT(i, _fn_contexts.size()); - return _fn_contexts[i]; + return _fn_contexts[i].get(); } [[nodiscard]] static Status filter_block(VExprContext* vexpr_ctx, Block* block, @@ -71,11 +71,6 @@ public: void clone_fn_contexts(VExprContext* other); - void mark_as_stale() { - DCHECK(!_stale); - _stale = true; - } - private: friend class VExpr; @@ -92,13 +87,11 @@ private: /// FunctionContexts for each registered expression. The FunctionContexts are created /// and owned by this VExprContext. - std::vector _fn_contexts; + std::vector> _fn_contexts; int _last_result_column_id; /// The depth of expression-tree. int _depth_num = 0; - - bool _stale; }; } // namespace doris::vectorized diff --git a/be/src/vec/functions/function_timestamp.cpp b/be/src/vec/functions/function_timestamp.cpp index 704fc8f3ff..4ec1b30cc1 100644 --- a/be/src/vec/functions/function_timestamp.cpp +++ b/be/src/vec/functions/function_timestamp.cpp @@ -118,7 +118,7 @@ struct StrToDate { } if constexpr (std::is_same_v) { if (context->impl()->get_return_type().type == - doris_udf::FunctionContext::Type::TYPE_DATETIME) { + doris::PrimitiveType::TYPE_DATETIME) { ts_val.to_datetime(); } else { ts_val.cast_to_date(); diff --git a/be/src/vec/functions/in.h b/be/src/vec/functions/in.h index 9a8b096513..02b2a7cb62 100644 --- a/be/src/vec/functions/in.h +++ b/be/src/vec/functions/in.h @@ -68,14 +68,13 @@ public: } std::shared_ptr state = std::make_shared(); context->set_function_state(scope, state); - if (context->get_arg_type(0)->type == FunctionContext::Type::TYPE_CHAR || - context->get_arg_type(0)->type == FunctionContext::Type::TYPE_VARCHAR || - context->get_arg_type(0)->type == FunctionContext::Type::TYPE_STRING) { + if (context->get_arg_type(0)->type == doris::PrimitiveType::TYPE_CHAR || + context->get_arg_type(0)->type == doris::PrimitiveType::TYPE_VARCHAR || + context->get_arg_type(0)->type == doris::PrimitiveType::TYPE_STRING) { // the StringValue's memory is held by FunctionContext, so we can use StringValueSet here directly state->hybrid_set.reset(new StringValueSet()); } else { - state->hybrid_set.reset( - create_set(convert_type_to_primitive(context->get_arg_type(0)->type))); + state->hybrid_set.reset(create_set(context->get_arg_type(0)->type)); } DCHECK(context->get_num_args() >= 1); @@ -197,7 +196,7 @@ public: } std::unique_ptr hybrid_set( - create_set(convert_type_to_primitive(context->get_arg_type(0)->type))); + create_set(context->get_arg_type(0)->type)); bool null_in_set = false; for (const auto& set_column : set_columns) { diff --git a/be/src/vec/runtime/vdatetime_value.h b/be/src/vec/runtime/vdatetime_value.h index 8cd7a66460..4060bae8cb 100644 --- a/be/src/vec/runtime/vdatetime_value.h +++ b/be/src/vec/runtime/vdatetime_value.h @@ -567,7 +567,7 @@ public: VecDateTimeValue& operator--() { return *this += -1; } - void to_datetime_val(doris_udf::DateTimeVal* tv) const { + void to_datetime_val(doris::DateTimeVal* tv) const { tv->packed_time = to_int64_datetime_packed(); tv->type = _type; } @@ -584,7 +584,7 @@ public: ((uint64_t)minute() << 26) | ((uint64_t)second() << 20)); } - static VecDateTimeValue from_datetime_val(const doris_udf::DateTimeVal& tv) { + static VecDateTimeValue from_datetime_val(const doris::DateTimeVal& tv) { VecDateTimeValue value; value.from_packed_time(tv.packed_time); if (tv.type == TIME_DATE) { @@ -1095,8 +1095,7 @@ public: bool get_date_from_daynr(uint64_t); - static DateV2Value from_datetimev2_val( - const doris_udf::DateTimeV2Val& tv) { + static DateV2Value from_datetimev2_val(const doris::DateTimeV2Val& tv) { DCHECK(is_datetime); DateV2Value value; value.from_datetime(tv.datetimev2_value); diff --git a/be/test/CMakeLists.txt b/be/test/CMakeLists.txt index 4e4de45271..054cb5f74b 100644 --- a/be/test/CMakeLists.txt +++ b/be/test/CMakeLists.txt @@ -163,10 +163,7 @@ set(TESTUTIL_TEST_FILES testutil/function_utils.cpp testutil/run_all_tests.cpp ) -set(UDF_TEST_FILES - # udf/udf_test.cpp - # udf/uda_test.cpp -) + set(UTIL_TEST_FILES util/bit_util_test.cpp util/brpc_client_cache_test.cpp @@ -286,7 +283,6 @@ add_executable(doris_be_test ${OLAP_TEST_FILES} ${RUNTIME_TEST_FILES} ${TESTUTIL_TEST_FILES} - ${UDF_TEST_FILES} ${UTIL_TEST_FILES} ${VEC_TEST_FILES} ) diff --git a/be/test/runtime/datetime_value_test.cpp b/be/test/runtime/datetime_value_test.cpp index d71923e8f8..f2e5163a72 100644 --- a/be/test/runtime/datetime_value_test.cpp +++ b/be/test/runtime/datetime_value_test.cpp @@ -1451,14 +1451,14 @@ TEST_F(DateTimeValueTest, packed_time) { } { - doris_udf::DateTimeVal tv; + doris::DateTimeVal tv; tv.packed_time = 1830650338932162560L; tv.type = TIME_DATETIME; DateTimeValue v1 = DateTimeValue::from_datetime_val(tv); v1.to_string(buf); EXPECT_STREQ("2001-02-03 12:34:56", buf); - doris_udf::DateTimeVal tv2; + doris::DateTimeVal tv2; v1.to_datetime_val(&tv2); EXPECT_TRUE(tv == tv2); diff --git a/be/test/testutil/desc_tbl_builder.cc b/be/test/testutil/desc_tbl_builder.cc index 5a7fcb46e7..86602cac6b 100644 --- a/be/test/testutil/desc_tbl_builder.cc +++ b/be/test/testutil/desc_tbl_builder.cc @@ -36,7 +36,7 @@ TupleDescBuilder& DescriptorTblBuilder::declare_tuple() { // item_id of -1 indicates no itemTupleId static TSlotDescriptor make_slot_descriptor(int id, int parent_id, const TypeDescriptor& type, - int slot_idx, int byte_offset, int item_id) { + int slot_idx, int item_id) { int null_byte = slot_idx / 8; int null_bit = slot_idx % 8; TSlotDescriptor slot_desc; @@ -45,7 +45,7 @@ static TSlotDescriptor make_slot_descriptor(int id, int parent_id, const TypeDes slot_desc.__set_slotType(type.to_thrift()); // For now no tests depend on the materialized path being populated correctly. // slot_desc.__set_materializedPath(vector()); - slot_desc.__set_byteOffset(byte_offset); + slot_desc.__set_byteOffset(0); slot_desc.__set_nullIndicatorByte(null_byte); slot_desc.__set_nullIndicatorBit(null_bit); slot_desc.__set_slotIdx(slot_idx); @@ -56,10 +56,10 @@ static TSlotDescriptor make_slot_descriptor(int id, int parent_id, const TypeDes return slot_desc; } -static TTupleDescriptor make_tuple_descriptor(int id, int byte_size, int num_null_bytes) { +static TTupleDescriptor make_tuple_descriptor(int id, int num_null_bytes) { TTupleDescriptor tuple_desc; tuple_desc.__set_id(id); - tuple_desc.__set_byteSize(byte_size); + tuple_desc.__set_byteSize(0); tuple_desc.__set_numNullBytes(num_null_bytes); return tuple_desc; } @@ -91,7 +91,6 @@ TTupleDescriptor DescriptorTblBuilder::build_tuple(const vector& } int num_null_bytes = BitUtil::ceil(slot_types.size(), 8); - int byte_offset = num_null_bytes; int tuple_id = *next_tuple_id; ++(*next_tuple_id); @@ -105,13 +104,12 @@ TTupleDescriptor DescriptorTblBuilder::build_tuple(const vector& // } thrift_desc_tbl->slotDescriptors.push_back( - make_slot_descriptor(*slot_id, tuple_id, slot_types[i], i, byte_offset, item_id)); + make_slot_descriptor(*slot_id, tuple_id, slot_types[i], i, item_id)); thrift_desc_tbl->__isset.slotDescriptors = true; - byte_offset += slot_types[i].get_slot_size(); ++(*slot_id); } - TTupleDescriptor result = make_tuple_descriptor(tuple_id, byte_offset, num_null_bytes); + TTupleDescriptor result = make_tuple_descriptor(tuple_id, num_null_bytes); thrift_desc_tbl->tupleDescriptors.push_back(result); return result; } diff --git a/be/test/testutil/function_utils.cpp b/be/test/testutil/function_utils.cpp index 87ff68ad70..7953614e97 100644 --- a/be/test/testutil/function_utils.cpp +++ b/be/test/testutil/function_utils.cpp @@ -31,13 +31,13 @@ FunctionUtils::FunctionUtils() { globals.__set_timestamp_ms(1565026737805); globals.__set_time_zone("Asia/Shanghai"); _state = new RuntimeState(globals); - doris_udf::FunctionContext::TypeDesc return_type; - std::vector arg_types; + doris::TypeDescriptor return_type; + std::vector arg_types; _fn_ctx = FunctionContextImpl::create_context(_state, return_type, arg_types); } -FunctionUtils::FunctionUtils(const doris_udf::FunctionContext::TypeDesc& return_type, - const std::vector& arg_types, +FunctionUtils::FunctionUtils(const doris::TypeDescriptor& return_type, + const std::vector& arg_types, int varargs_buffer_size) { TQueryGlobals globals; globals.__set_now_string("2019-08-06 01:38:57"); @@ -48,7 +48,6 @@ FunctionUtils::FunctionUtils(const doris_udf::FunctionContext::TypeDesc& return_ } FunctionUtils::~FunctionUtils() { - delete _fn_ctx; if (_state) { delete _state; } diff --git a/be/test/testutil/function_utils.h b/be/test/testutil/function_utils.h index d22489a7e6..710ef11123 100644 --- a/be/test/testutil/function_utils.h +++ b/be/test/testutil/function_utils.h @@ -29,16 +29,15 @@ class RuntimeState; class FunctionUtils { public: FunctionUtils(); - FunctionUtils(const doris_udf::FunctionContext::TypeDesc& return_type, - const std::vector& arg_types, - int varargs_buffer_size); + FunctionUtils(const doris::TypeDescriptor& return_type, + const std::vector& arg_types, int varargs_buffer_size); ~FunctionUtils(); - doris_udf::FunctionContext* get_fn_ctx() { return _fn_ctx; } + doris::FunctionContext* get_fn_ctx() { return _fn_ctx.get(); } private: RuntimeState* _state = nullptr; - doris_udf::FunctionContext* _fn_ctx = nullptr; + std::unique_ptr _fn_ctx; }; } // namespace doris diff --git a/be/test/udf/uda_test.cpp b/be/test/udf/uda_test.cpp deleted file mode 100644 index fe8e83618d..0000000000 --- a/be/test/udf/uda_test.cpp +++ /dev/null @@ -1,315 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -#include - -#include - -#include "common/logging.h" -#include "udf/uda_test_harness.h" - -namespace doris_udf { - -//-------------------------------- Count ------------------------------------ -// Example of implementing Count(int_col). -// The input type is: int -// The intermediate type is bigint -// the return type is bigint -void CountInit(FunctionContext* context, BigIntVal* val) { - val->is_null = false; - val->val = 0; -} - -void CountUpdate(FunctionContext* context, const IntVal& input, BigIntVal* val) { - // BigIntVal is the same ptr as what was passed to CountInit - if (input.is_null) { - return; - } - - ++val->val; -} - -void CountMerge(FunctionContext* context, const BigIntVal& src, BigIntVal* dst) { - dst->val += src.val; -} - -BigIntVal CountFinalize(FunctionContext* context, const BigIntVal& val) { - return val; -} - -//-------------------------------- Count(...) ------------------------------------ -// Example of implementing Count(...) -// The input type is: multiple ints -// The intermediate type is bigint -// the return type is bigint -void Count2Update(FunctionContext* context, const IntVal& input1, const IntVal& input2, - BigIntVal* val) { - val->val += (!input1.is_null + !input2.is_null); -} -void Count3Update(FunctionContext* context, const IntVal& input1, const IntVal& input2, - const IntVal& input3, BigIntVal* val) { - val->val += (!input1.is_null + !input2.is_null + !input3.is_null); -} -void Count4Update(FunctionContext* context, const IntVal& input1, const IntVal& input2, - const IntVal& input3, const IntVal& input4, BigIntVal* val) { - val->val += (!input1.is_null + !input2.is_null + !input3.is_null + !input4.is_null); -} - -//-------------------------------- Min(String) ------------------------------------ -// Example of implementing MIN for strings. -// The input type is: STRING -// The intermediate type is BufferVal -// the return type is STRING -// This is a little more sophisticated since the result buffers are reused (it grows -// to the longest result string). -struct MinState { - uint8_t* value; - int len; - int buffer_len; - - void set(FunctionContext* context, const StringVal& val) { - if (buffer_len < val.len) { - context->free(value); - value = context->allocate(val.len); - buffer_len = val.len; - } - - memcpy(value, val.ptr, val.len); - len = val.len; - } -}; - -// Initialize the MinState scratch space -void MinInit(FunctionContext* context, BufferVal* val) { - MinState* state = reinterpret_cast(*val); - state->value = nullptr; - state->buffer_len = 0; -} - -// Update the min value, comparing with the current value in MinState -void MinUpdate(FunctionContext* context, const StringVal& input, BufferVal* val) { - if (input.is_null) { - return; - } - - MinState* state = reinterpret_cast(*val); - - if (state->value == nullptr) { - state->set(context, input); - return; - } - - int cmp = memcmp(input.ptr, state->value, std::min(input.len, state->len)); - - if (cmp < 0 || (cmp == 0 && input.len < state->len)) { - state->set(context, input); - } -} - -// Serialize the state into the min string -const BufferVal MinSerialize(FunctionContext* context, const BufferVal& intermediate) { - return intermediate; -} - -// Merge is the same as Update since the serialized format is the raw input format -void MinMerge(FunctionContext* context, const BufferVal& src, BufferVal* dst) { - const MinState* src_state = reinterpret_cast(src); - - if (src_state->value == nullptr) { - return; - } - - MinUpdate(context, StringVal(src_state->value, src_state->len), dst); -} - -// Finalize also just returns the string so is the same as MinSerialize. -StringVal MinFinalize(FunctionContext* context, const BufferVal& val) { - const MinState* state = reinterpret_cast(val); - - if (state->value == nullptr) { - return StringVal::null(); - } - - StringVal result = StringVal(context, state->len); - memcpy(result.ptr, state->value, state->len); - return result; -} - -//----------------------------- Bits after Xor ------------------------------------ -// Example of a UDA that xors all the input bits and then returns the number of -// resulting bits that are set. This illustrates where the result and intermediate -// are the same type, but a transformation is still needed in Finalize() -// The input type is: double -// The intermediate type is bigint -// the return type is bigint -void XorInit(FunctionContext* context, BigIntVal* val) { - val->is_null = false; - val->val = 0; -} - -void XorUpdate(FunctionContext* context, const double* input, BigIntVal* val) { - // BigIntVal is the same ptr as what was passed to CountInit - if (input == nullptr) { - return; - } - - val->val |= *reinterpret_cast(input); -} - -void XorMerge(FunctionContext* context, const BigIntVal& src, BigIntVal* dst) { - dst->val |= src.val; -} - -BigIntVal XorFinalize(FunctionContext* context, const BigIntVal& val) { - int64_t set_bits = 0; - // Do popcnt on val - // set_bits = popcnt(val.val); - return BigIntVal(set_bits); -} - -//--------------------------- HLL(Distinct Estimate) --------------------------------- -// Example of implementing distinct estimate. As an example, we will compress the -// intermediate buffer. -// Note: this is not the actual algorithm but a sketch of how it would be implemented -// with the UDA interface. -// The input type is: bigint -// The intermediate type is string (fixed at 256 bytes) -// the return type is bigint -void DistinctEstimateInit(FunctionContext* context, StringVal* val) { - // Since this is known, this will be allocated to 256 bytes. - EXPECT_EQ(val->len, 256); - memset(val->ptr, 0, 256); -} - -void DistinctEstimatUpdate(FunctionContext* context, const int64_t* input, StringVal* val) { - if (input == nullptr) { - return; - } - - for (int i = 0; i < 256; ++i) { - int hash = 0; - // Hash(input) with the ith hash function - // hash = Hash(*input, i); - val->ptr[i] = hash; - } -} - -StringVal DistinctEstimatSerialize(FunctionContext* context, const StringVal& intermediate) { - int compressed_size = 0; - uint8_t* result = nullptr; // SnappyCompress(intermediate.ptr, intermediate.len); - return StringVal(result, compressed_size); -} - -void DistinctEstimateMerge(FunctionContext* context, const StringVal& src, StringVal* dst) { - uint8_t* src_uncompressed = nullptr; // SnappyUncompress(src.ptr, src.len); - - for (int i = 0; i < 256; ++i) { - dst->ptr[i] ^= src_uncompressed[i]; - } -} - -BigIntVal DistinctEstimateFinalize(FunctionContext* context, const StringVal& val) { - int64_t set_bits = 0; - // Do popcnt on val - // set_bits = popcnt(val.val); - return BigIntVal(set_bits); -} - -TEST(CountTest, Basic) { - UdaTestHarness test(CountInit, CountUpdate, CountMerge, nullptr, - CountFinalize); - std::vector no_nulls; - no_nulls.resize(1000); - - EXPECT_TRUE(test.execute(no_nulls, BigIntVal(no_nulls.size()))) << test; - EXPECT_FALSE(test.execute(no_nulls, BigIntVal(100))) << test; -} - -TEST(CountMultiArgTest, Basic) { - int num = 1000; - std::vector no_nulls; - no_nulls.resize(num); - - UdaTestHarness2 test2(CountInit, Count2Update, CountMerge, - nullptr, CountFinalize); - EXPECT_TRUE(test2.execute(no_nulls, no_nulls, BigIntVal(2 * num))); - EXPECT_FALSE(test2.execute(no_nulls, no_nulls, BigIntVal(100))); - - UdaTestHarness3 test3( - CountInit, Count3Update, CountMerge, nullptr, CountFinalize); - EXPECT_TRUE(test3.execute(no_nulls, no_nulls, no_nulls, BigIntVal(3 * num))); - - UdaTestHarness4 test4( - CountInit, Count4Update, CountMerge, nullptr, CountFinalize); - EXPECT_TRUE(test4.execute(no_nulls, no_nulls, no_nulls, no_nulls, BigIntVal(4 * num))); -} - -bool FuzzyCompare(const BigIntVal& r1, const BigIntVal& r2) { - if (r1.is_null && r2.is_null) { - return true; - } - - if (r1.is_null || r2.is_null) { - return false; - } - - return abs(r1.val - r2.val) <= 1; -} - -TEST(CountTest, FuzzyEquals) { - UdaTestHarness test(CountInit, CountUpdate, CountMerge, nullptr, - CountFinalize); - std::vector no_nulls; - no_nulls.resize(1000); - - EXPECT_TRUE(test.execute(no_nulls, BigIntVal(1000))) << test; - EXPECT_FALSE(test.execute(no_nulls, BigIntVal(999))) << test; - - test.set_result_comparator(FuzzyCompare); - EXPECT_TRUE(test.execute(no_nulls, BigIntVal(1000))) << test; - EXPECT_TRUE(test.execute(no_nulls, BigIntVal(999))) << test; - EXPECT_FALSE(test.execute(no_nulls, BigIntVal(998))) << test; -} - -TEST(MinTest, Basic) { - UdaTestHarness test(MinInit, MinUpdate, MinMerge, MinSerialize, - MinFinalize); - test.set_intermediate_size(sizeof(MinState)); - - std::vector values; - values.push_back(StringVal("BBB")); - EXPECT_TRUE(test.execute(values, StringVal("BBB"))) << test; - - values.push_back(StringVal("AA")); - EXPECT_TRUE(test.execute(values, StringVal("AA"))) << test; - - values.push_back(StringVal("CCC")); - EXPECT_TRUE(test.execute(values, StringVal("AA"))) << test; - - values.push_back(StringVal("ABCDEF")); - values.push_back(StringVal("AABCDEF")); - values.push_back(StringVal("A")); - EXPECT_TRUE(test.execute(values, StringVal("A"))) << test; - - values.clear(); - values.push_back(StringVal::null()); - EXPECT_TRUE(test.execute(values, StringVal::null())) << test; - - values.push_back(StringVal("ZZZ")); - EXPECT_TRUE(test.execute(values, StringVal("ZZZ"))) << test; -} -} // namespace doris_udf diff --git a/be/test/udf/udf_test.cpp b/be/test/udf/udf_test.cpp deleted file mode 100644 index bf5d7e1fbc..0000000000 --- a/be/test/udf/udf_test.cpp +++ /dev/null @@ -1,193 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -#include - -#include - -#include "common/logging.h" -#include "udf/udf_test_harness.hpp" - -namespace doris_udf { - -DoubleVal zero_udf(FunctionContext* context) { - return DoubleVal(0); -} - -StringVal log_udf(FunctionContext* context, const StringVal& arg1) { - std::cerr << (arg1.is_null ? "nullptr" : std::string((char*)arg1.ptr, arg1.len)) << std::endl; - return arg1; -} - -StringVal upper_udf(FunctionContext* context, const StringVal& input) { - if (input.is_null) { - return StringVal::null(); - } - - // Create a new StringVal object that's the same length as the input - StringVal result = StringVal(context, input.len); - - for (int i = 0; i < input.len; ++i) { - result.ptr[i] = toupper(input.ptr[i]); - } - - return result; -} - -FloatVal min3(FunctionContext* context, const FloatVal& f1, const FloatVal& f2, - const FloatVal& f3) { - bool is_null = true; - float v = 0.0; - - if (!f1.is_null) { - if (is_null) { - v = f1.val; - is_null = false; - } else { - v = std::min(v, f1.val); - } - } - - if (!f2.is_null) { - if (is_null) { - v = f2.val; - is_null = false; - } else { - v = std::min(v, f2.val); - } - } - - if (!f3.is_null) { - if (is_null) { - v = f3.val; - is_null = false; - } else { - v = std::min(v, f3.val); - } - } - - return is_null ? FloatVal::null() : FloatVal(v); -} - -StringVal concat(FunctionContext* context, int n, const StringVal* args) { - int size = 0; - bool all_null = true; - - for (int i = 0; i < n; ++i) { - if (args[i].is_null) { - continue; - } - - size += args[i].len; - all_null = false; - } - - if (all_null) { - return StringVal::null(); - } - - int offset = 0; - StringVal result(context, size); - - for (int i = 0; i < n; ++i) { - if (args[i].is_null) { - continue; - } - - memcpy(result.ptr + offset, args[i].ptr, args[i].len); - offset += args[i].len; - } - - return result; -} - -IntVal num_var_args(FunctionContext*, const BigIntVal& dummy, int n, const IntVal* args) { - return IntVal(n); -} - -IntVal validat_udf(FunctionContext* context) { - EXPECT_EQ(context->version(), FunctionContext::V2_0); - return IntVal::null(); -} - -IntVal validate_fail(FunctionContext* context) { - context->set_error("Fail"); - return IntVal::null(); -} - -IntVal validate_mem(FunctionContext* context) { - EXPECT_TRUE(context->allocate(0) == nullptr); - uint8_t* buffer = context->allocate(10); - EXPECT_TRUE(buffer != nullptr); - memset(buffer, 0, 10); - context->free(buffer); - return IntVal::null(); -} - -TEST(UdfTest, TestFunctionContext) { - EXPECT_TRUE(UdfTestHarness::validat_udf(validat_udf, IntVal::null())); - EXPECT_FALSE(UdfTestHarness::validat_udf(validate_fail, IntVal::null())); - EXPECT_TRUE(UdfTestHarness::validat_udf(validate_mem, IntVal::null())); -} - -TEST(UdfTest, TestValidate) { - EXPECT_TRUE(UdfTestHarness::validat_udf(zero_udf, DoubleVal(0))); - EXPECT_FALSE(UdfTestHarness::validat_udf(zero_udf, DoubleVal(10))); - - EXPECT_TRUE((UdfTestHarness::validat_udf(log_udf, StringVal("abcd"), - StringVal("abcd")))); - - EXPECT_TRUE((UdfTestHarness::validat_udf(upper_udf, StringVal("abcd"), - StringVal("ABCD")))); - - EXPECT_TRUE((UdfTestHarness::validat_udf( - min3, FloatVal(1), FloatVal(2), FloatVal(3), FloatVal(1)))); - EXPECT_TRUE((UdfTestHarness::validat_udf( - min3, FloatVal(1), FloatVal::null(), FloatVal(3), FloatVal(1)))); - EXPECT_TRUE((UdfTestHarness::validat_udf( - min3, FloatVal::null(), FloatVal::null(), FloatVal::null(), FloatVal::null()))); -} - -// TEST(UdfTest, TestTimestampVal) { -// boost::gregorian::date d(2003, 3, 15); -// TimestampVal t1(*(int32_t*)&d); -// EXPECT_TRUE((UdfTestHarness::validat_udf(time_to_string, t1, -// "2003-03-15 00:00:00"))); - -// TimestampVal t2(*(int32_t*)&d, 1000L * 1000L * 5000L); -// EXPECT_TRUE((UdfTestHarness::validat_udf(time_to_string, t2, -// "2003-03-15 00:00:05"))); -// } - -TEST(UdfTest, TestVarArgs) { - std::vector input; - input.push_back(StringVal("Hello")); - input.push_back(StringVal("World")); - - EXPECT_TRUE((UdfTestHarness::validat_udf(concat, input, - StringVal("HelloWorld")))); - - input.push_back(StringVal("More")); - EXPECT_TRUE((UdfTestHarness::validat_udf(concat, input, - StringVal("HelloWorldMore")))); - - std::vector args; - args.resize(10); - EXPECT_TRUE((UdfTestHarness::validat_udf( - num_var_args, BigIntVal(0), args, IntVal(args.size())))); -} -} // namespace doris_udf diff --git a/be/test/util/counts_test.cpp b/be/test/util/counts_test.cpp index 4f9d8d8235..d4d9e5895a 100644 --- a/be/test/util/counts_test.cpp +++ b/be/test/util/counts_test.cpp @@ -39,7 +39,7 @@ TEST_F(TCountsTest, TotalTest) { counts.increment(19, 1); counts.increment(7, 2); - doris_udf::DoubleVal result = counts.terminate(0.2); + doris::DoubleVal result = counts.terminate(0.2); EXPECT_EQ(1, result.val); uint8_t* writer = new uint8_t[counts.serialized_size()]; uint8_t* type_reader = writer; @@ -47,7 +47,7 @@ TEST_F(TCountsTest, TotalTest) { Counts other; other.unserialize(type_reader); - doris_udf::DoubleVal result1 = other.terminate(0.2); + doris::DoubleVal result1 = other.terminate(0.2); EXPECT_EQ(result.val, result1.val); Counts other1; diff --git a/be/test/vec/function/function_test_util.cpp b/be/test/vec/function/function_test_util.cpp index ae35124e66..e1551fe77f 100644 --- a/be/test/vec/function/function_test_util.cpp +++ b/be/test/vec/function/function_test_util.cpp @@ -50,7 +50,7 @@ uint64_t str_to_datetime_v2(std::string datetime_str, std::string datetime_forma size_t type_index_to_data_type(const std::vector& input_types, size_t index, ut_type::UTDataTypeDesc& ut_desc, DataTypePtr& type) { - doris_udf::FunctionContext::TypeDesc& desc = ut_desc.type_desc; + doris::TypeDescriptor& desc = ut_desc.type_desc; if (index >= input_types.size()) { return -1; } @@ -73,71 +73,71 @@ size_t type_index_to_data_type(const std::vector& input_types, size_t i switch (tp) { case TypeIndex::String: - desc.type = doris_udf::FunctionContext::TYPE_STRING; + desc.type = doris::PrimitiveType::TYPE_STRING; type = std::make_shared(); return 1; case TypeIndex::JSONB: - desc.type = doris_udf::FunctionContext::TYPE_JSONB; + desc.type = doris::PrimitiveType::TYPE_JSONB; type = std::make_shared(); return 1; case TypeIndex::BitMap: - desc.type = doris_udf::FunctionContext::TYPE_OBJECT; + desc.type = doris::PrimitiveType::TYPE_OBJECT; type = std::make_shared(); return 1; case TypeIndex::UInt8: - desc.type = doris_udf::FunctionContext::TYPE_BOOLEAN; + desc.type = doris::PrimitiveType::TYPE_BOOLEAN; type = std::make_shared(); return 1; case TypeIndex::Int8: - desc.type = doris_udf::FunctionContext::TYPE_TINYINT; + desc.type = doris::PrimitiveType::TYPE_TINYINT; type = std::make_shared(); return 1; case TypeIndex::Int16: - desc.type = doris_udf::FunctionContext::TYPE_SMALLINT; + desc.type = doris::PrimitiveType::TYPE_SMALLINT; type = std::make_shared(); return 1; case TypeIndex::Int32: - desc.type = doris_udf::FunctionContext::TYPE_INT; + desc.type = doris::PrimitiveType::TYPE_INT; type = std::make_shared(); return 1; case TypeIndex::Int64: - desc.type = doris_udf::FunctionContext::TYPE_BIGINT; + desc.type = doris::PrimitiveType::TYPE_BIGINT; type = std::make_shared(); return 1; case TypeIndex::Int128: - desc.type = doris_udf::FunctionContext::TYPE_LARGEINT; + desc.type = doris::PrimitiveType::TYPE_LARGEINT; type = std::make_shared(); return 1; case TypeIndex::Float32: - desc.type = doris_udf::FunctionContext::TYPE_FLOAT; + desc.type = doris::PrimitiveType::TYPE_FLOAT; type = std::make_shared(); return 1; case TypeIndex::Float64: - desc.type = doris_udf::FunctionContext::TYPE_DOUBLE; + desc.type = doris::PrimitiveType::TYPE_DOUBLE; type = std::make_shared(); return 1; case TypeIndex::Decimal128: - desc.type = doris_udf::FunctionContext::TYPE_DECIMALV2; + desc.type = doris::PrimitiveType::TYPE_DECIMALV2; type = std::make_shared>(); return 1; case TypeIndex::DateTime: - desc.type = doris_udf::FunctionContext::TYPE_DATETIME; + desc.type = doris::PrimitiveType::TYPE_DATETIME; type = std::make_shared(); return 1; case TypeIndex::Date: - desc.type = doris_udf::FunctionContext::TYPE_DATE; + desc.type = doris::PrimitiveType::TYPE_DATE; type = std::make_shared(); return 1; case TypeIndex::DateV2: - desc.type = doris_udf::FunctionContext::TYPE_DATEV2; + desc.type = doris::PrimitiveType::TYPE_DATEV2; type = std::make_shared(); return 1; case TypeIndex::DateTimeV2: - desc.type = doris_udf::FunctionContext::TYPE_DATETIMEV2; + desc.type = doris::PrimitiveType::TYPE_DATETIMEV2; type = std::make_shared(); return 1; case TypeIndex::Array: { - desc.type = doris_udf::FunctionContext::TYPE_ARRAY; + desc.type = doris::PrimitiveType::TYPE_ARRAY; ut_type::UTDataTypeDesc sub_desc; DataTypePtr sub_type = nullptr; ++index; diff --git a/be/test/vec/function/function_test_util.h b/be/test/vec/function/function_test_util.h index 9cb7a2c5bd..66178dd130 100644 --- a/be/test/vec/function/function_test_util.h +++ b/be/test/vec/function/function_test_util.h @@ -150,7 +150,7 @@ constexpr TypeIndex get_type_index() { struct UTDataTypeDesc { DataTypePtr data_type; - doris_udf::FunctionContext::TypeDesc type_desc; + doris::TypeDescriptor type_desc; std::string col_name; bool is_const = false; bool is_nullable = true; @@ -206,7 +206,7 @@ Status check_function(const std::string& func_name, const InputTypeSet& input_ty // 1.2 prepare args for function call ColumnNumbers arguments; - std::vector arg_types; + std::vector arg_types; std::vector> constant_col_ptrs; std::vector> constant_cols; for (size_t i = 0; i < descs.size(); ++i) { @@ -229,22 +229,22 @@ Status check_function(const std::string& func_name, const InputTypeSet& input_ty func_name, block.get_columns_with_type_and_name(), return_type); EXPECT_TRUE(func != nullptr); - doris_udf::FunctionContext::TypeDesc fn_ctx_return; + doris::TypeDescriptor fn_ctx_return; if constexpr (std::is_same_v) { - fn_ctx_return.type = doris_udf::FunctionContext::TYPE_BOOLEAN; + fn_ctx_return.type = doris::PrimitiveType::TYPE_BOOLEAN; } else if constexpr (std::is_same_v) { - fn_ctx_return.type = doris_udf::FunctionContext::TYPE_INT; + fn_ctx_return.type = doris::PrimitiveType::TYPE_INT; } else if constexpr (std::is_same_v || std::is_same_v) { - fn_ctx_return.type = doris_udf::FunctionContext::TYPE_DOUBLE; + fn_ctx_return.type = doris::PrimitiveType::TYPE_DOUBLE; } else if constexpr (std::is_same_v) { - fn_ctx_return.type = doris_udf::FunctionContext::TYPE_DATETIME; + fn_ctx_return.type = doris::PrimitiveType::TYPE_DATETIME; } else if (std::is_same_v) { - fn_ctx_return.type = doris_udf::FunctionContext::TYPE_DATEV2; + fn_ctx_return.type = doris::PrimitiveType::TYPE_DATEV2; } else if (std::is_same_v) { - fn_ctx_return.type = doris_udf::FunctionContext::TYPE_DATETIMEV2; + fn_ctx_return.type = doris::PrimitiveType::TYPE_DATETIMEV2; } else { - fn_ctx_return.type = doris_udf::FunctionContext::INVALID_TYPE; + fn_ctx_return.type = doris::PrimitiveType::INVALID_TYPE; } FunctionUtils fn_utils(fn_ctx_return, arg_types, 0); diff --git a/gensrc/proto/descriptors.proto b/gensrc/proto/descriptors.proto index 74234cb442..9660a6707d 100644 --- a/gensrc/proto/descriptors.proto +++ b/gensrc/proto/descriptors.proto @@ -28,7 +28,7 @@ message PSlotDescriptor { required int32 parent = 2; // tuple id which this slot is belong to required PTypeDesc slot_type = 3; required int32 column_pos = 4; // in originating table - required int32 byte_offset = 5; // into tuple + required int32 byte_offset = 5; // into tuple, not used any more required int32 null_indicator_byte = 6; required int32 null_indicator_bit = 7; required string col_name = 8;