From 79ad74637d59d1fe03cc54b162c5c774614df337 Mon Sep 17 00:00:00 2001 From: yiguolei <676222867@qq.com> Date: Tue, 24 Jan 2023 10:45:35 +0800 Subject: [PATCH] [refactor](remove expr) remove non vectorized Expr and ExprContext related codes (#16136) --- be/src/common/daemon.cpp | 20 - be/src/exec/CMakeLists.txt | 3 - be/src/exec/arrow/arrow_reader.h | 1 - be/src/exec/base_scanner.cpp | 135 --- be/src/exec/base_scanner.h | 25 +- be/src/exec/es/es_predicate.cpp | 437 -------- be/src/exec/es/es_predicate.h | 164 --- be/src/exec/es/es_query_builder.cpp | 454 -------- be/src/exec/es/es_query_builder.h | 138 --- be/src/exec/es/es_scroll_query.cpp | 1 - be/src/exec/es/es_scroll_query.h | 3 +- be/src/exec/exec_node.cpp | 59 +- be/src/exec/exec_node.h | 15 - be/src/exec/hash_table.cpp | 452 -------- be/src/exec/hash_table.h | 480 --------- be/src/exec/odbc_connector.h | 3 - be/src/exec/scan_node.h | 1 - be/src/exec/table_connector.cpp | 1 - be/src/exec/table_connector.h | 1 - be/src/exprs/CMakeLists.txt | 30 - be/src/exprs/agg_fn.cpp | 221 ---- be/src/exprs/agg_fn.h | 187 ---- be/src/exprs/agg_fn_evaluator.cpp | 948 ----------------- be/src/exprs/agg_fn_evaluator.h | 345 ------ be/src/exprs/anyval_util.h | 18 +- be/src/exprs/arithmetic_expr.cpp | 226 ---- be/src/exprs/arithmetic_expr.h | 181 ---- be/src/exprs/array_functions.cpp | 48 - be/src/exprs/array_functions.h | 37 - be/src/exprs/binary_predicate.cpp | 509 --------- be/src/exprs/binary_predicate.h | 99 -- ...filter_predicate.h => bloom_filter_func.h} | 58 +- be/src/exprs/bloomfilter_predicate.cpp | 96 -- be/src/exprs/case_expr.cpp | 233 ---- be/src/exprs/case_expr.h | 81 -- be/src/exprs/cast_expr.cpp | 156 --- be/src/exprs/cast_expr.h | 62 -- be/src/exprs/cast_functions.h | 14 + be/src/exprs/compound_predicate.cpp | 84 -- be/src/exprs/compound_predicate.h | 115 -- be/src/exprs/conditional_functions.cpp | 132 --- be/src/exprs/conditional_functions.h | 139 --- be/src/exprs/create_predicate_function.h | 1 - be/src/exprs/decimalv2_operators.cpp | 214 ---- be/src/exprs/decimalv2_operators.h | 83 -- be/src/exprs/encryption_functions.h | 3 - be/src/exprs/es_functions.cpp | 29 - be/src/exprs/es_functions.h | 38 - be/src/exprs/expr.cpp | 995 ------------------ be/src/exprs/expr.h | 599 ----------- be/src/exprs/expr_context.cpp | 455 -------- be/src/exprs/expr_context.h | 197 ---- be/src/exprs/expr_value.h | 245 ----- be/src/exprs/grouping_sets_functions.cpp | 34 - be/src/exprs/grouping_sets_functions.h | 34 - be/src/exprs/hll_function.cpp | 131 --- be/src/exprs/hll_function.h | 50 - be/src/exprs/hll_hash_function.cpp | 54 - be/src/exprs/hll_hash_function.h | 35 - be/src/exprs/in_predicate.cpp | 142 --- be/src/exprs/in_predicate.h | 70 -- be/src/exprs/info_func.cpp | 46 - be/src/exprs/info_func.h | 51 - be/src/exprs/is_null_predicate.cpp | 67 -- be/src/exprs/is_null_predicate.h | 40 - be/src/exprs/json_functions.h | 2 - be/src/exprs/literal.cpp | 266 ----- be/src/exprs/literal.h | 63 -- be/src/exprs/match_predicate.h | 20 - be/src/exprs/math_functions.h | 4 - be/src/exprs/new_agg_fn_evaluator.cc | 643 ----------- be/src/exprs/new_agg_fn_evaluator.h | 311 ------ be/src/exprs/new_in_predicate.cpp | 184 ---- be/src/exprs/new_in_predicate.h | 345 ------ be/src/exprs/null_literal.cpp | 75 -- be/src/exprs/null_literal.h | 57 - be/src/exprs/operators.cpp | 138 --- be/src/exprs/operators.h | 251 ----- be/src/exprs/predicate.h | 36 - be/src/exprs/rpc_fn.cpp | 277 ----- be/src/exprs/rpc_fn.h | 137 --- be/src/exprs/rpc_fn_call.cpp | 112 -- be/src/exprs/rpc_fn_call.h | 60 -- be/src/exprs/runtime_filter.cpp | 23 - be/src/exprs/runtime_filter.h | 148 ++- be/src/exprs/runtime_filter_slots.h | 14 - be/src/exprs/runtime_filter_slots_cross.h | 2 +- be/src/exprs/scalar_fn_call.cpp | 547 ---------- be/src/exprs/scalar_fn_call.h | 140 --- be/src/exprs/slot_ref.cpp | 308 ------ be/src/exprs/slot_ref.h | 121 --- be/src/exprs/string_functions.h | 2 - be/src/exprs/time_operators.h | 3 - be/src/exprs/timestamp_functions.h | 2 - be/src/exprs/tuple_is_null_predicate.cpp | 72 -- be/src/exprs/tuple_is_null_predicate.h | 54 - be/src/exprs/utility_functions.h | 2 - be/src/olap/bloom_filter_predicate.h | 2 +- be/src/olap/delta_writer.cpp | 1 - be/src/olap/delta_writer.h | 1 - be/src/olap/predicate_creator.h | 1 - be/src/olap/reader.h | 1 - be/src/runtime/CMakeLists.txt | 1 - be/src/runtime/cache/result_cache.h | 1 - be/src/runtime/cache/result_node.h | 1 - be/src/runtime/descriptor_helper.h | 2 + be/src/runtime/descriptors.h | 1 - be/src/runtime/dpp_sink_internal.cpp | 36 - be/src/runtime/dpp_sink_internal.h | 33 - be/src/runtime/fold_constant_executor.cpp | 55 - be/src/runtime/fold_constant_executor.h | 8 +- be/src/runtime/mem_pool.cpp | 18 - be/src/runtime/mem_pool.h | 6 - be/src/runtime/runtime_filter_mgr.cpp | 2 +- be/src/runtime/runtime_state.h | 1 - be/src/runtime/tuple.cpp | 67 -- be/src/runtime/tuple.h | 13 - be/src/runtime/tuple_row.cpp | 41 - be/src/runtime/tuple_row.h | 117 -- be/src/service/internal_service.cpp | 2 - be/src/udf/udf_internal.h | 1 - be/src/util/arrow/block_convertor.cpp | 1 - be/src/util/arrow/row_batch.cpp | 1 - be/src/util/tuple_row_zorder_compare.h | 3 - .../aggregate_function_hll_union_agg.h | 1 - be/src/vec/core/block.cpp | 1 - .../exec/data_gen_functions/vnumbers_tvf.cpp | 1 - be/src/vec/exec/join/vhash_join_node.cpp | 1 + .../vec/exec/join/vnested_loop_join_node.cpp | 2 +- be/src/vec/exec/scan/new_es_scan_node.cpp | 1 - be/src/vec/exec/scan/new_es_scan_node.h | 1 - be/src/vec/exec/scan/vfile_scanner.cpp | 1 - be/src/vec/exec/scan/vfile_scanner.h | 1 - be/src/vec/exec/scan/vscan_node.cpp | 7 +- be/src/vec/exec/scan/vscanner.h | 1 - be/src/vec/exec/varrow_scanner.cpp | 1 - be/src/vec/exec/vdata_gen_scan_node.cpp | 1 - be/src/vec/exec/vmysql_scan_node.cpp | 1 - be/src/vec/exec/vschema_scan_node.cpp | 1 - be/src/vec/exec/vtable_function_node.cpp | 2 - be/src/vec/exec/vtable_function_node.h | 2 - be/src/vec/exprs/vbloom_predicate.cpp | 1 + be/src/vec/exprs/vbloom_predicate.h | 1 - be/src/vec/exprs/vcompound_pred.h | 2 +- be/src/vec/exprs/vectorized_fn_call.cpp | 1 - be/src/vec/exprs/vexpr.h | 3 +- be/src/vec/functions/hll_cardinality.cpp | 1 - be/src/vec/functions/hll_empty.cpp | 1 - be/src/vec/functions/least_greast.cpp | 1 + be/src/vec/runtime/vfile_result_writer.cpp | 1 - be/src/vec/sink/vdata_stream_sender.h | 6 - be/src/vec/sink/vmemory_scratch_sink.cpp | 1 - be/src/vec/sink/vmysql_table_writer.cpp | 1 - be/src/vec/sink/vresult_sink.h | 1 - be/src/vec/sink/vtablet_sink.cpp | 5 - be/src/vec/sink/vtablet_sink.h | 3 - be/src/vec/utils/util.hpp | 1 - be/test/CMakeLists.txt | 2 - be/test/exprs/array_functions_test.cpp | 77 -- be/test/exprs/binary_predicate_test.cpp | 158 --- be/test/exprs/bloom_filter_predicate_test.cpp | 1 - be/test/exprs/hll_function_test.cpp | 109 -- be/test/exprs/in_op_test.cpp | 149 --- be/test/exprs/in_predicate_test.cpp | 129 --- be/test/exprs/runtime_filter_test.cpp | 119 +++ be/test/exprs/topn_function_test.cpp | 1 - be/test/runtime/data_spliter_test.cpp | 1 - be/test/runtime/mem_pool_test.cpp | 15 - be/test/vec/core/block_test.cpp | 1 - be/test/vec/exec/vtablet_sink_test.cpp | 1 - be/test/vec/exprs/vexpr_test.cpp | 1 - .../vec/function/function_arithmetic_test.cpp | 1 - .../function/function_array_index_test.cpp | 1 - .../vec/function/function_array_size_test.cpp | 1 - .../function/function_arrays_overlap_test.cpp | 1 - be/test/vec/function/function_hash_test.cpp | 1 - 176 files changed, 319 insertions(+), 15367 deletions(-) delete mode 100644 be/src/exec/es/es_predicate.cpp delete mode 100644 be/src/exec/es/es_predicate.h delete mode 100644 be/src/exec/es/es_query_builder.cpp delete mode 100644 be/src/exec/es/es_query_builder.h delete mode 100644 be/src/exec/hash_table.cpp delete mode 100644 be/src/exec/hash_table.h delete mode 100644 be/src/exprs/agg_fn.cpp delete mode 100644 be/src/exprs/agg_fn.h delete mode 100644 be/src/exprs/agg_fn_evaluator.cpp delete mode 100644 be/src/exprs/agg_fn_evaluator.h delete mode 100644 be/src/exprs/arithmetic_expr.cpp delete mode 100644 be/src/exprs/arithmetic_expr.h delete mode 100644 be/src/exprs/array_functions.cpp delete mode 100644 be/src/exprs/array_functions.h delete mode 100644 be/src/exprs/binary_predicate.cpp delete mode 100644 be/src/exprs/binary_predicate.h rename be/src/exprs/{bloomfilter_predicate.h => bloom_filter_func.h} (90%) delete mode 100644 be/src/exprs/bloomfilter_predicate.cpp delete mode 100644 be/src/exprs/case_expr.cpp delete mode 100644 be/src/exprs/case_expr.h delete mode 100644 be/src/exprs/cast_expr.cpp delete mode 100644 be/src/exprs/cast_expr.h delete mode 100644 be/src/exprs/compound_predicate.cpp delete mode 100644 be/src/exprs/compound_predicate.h delete mode 100644 be/src/exprs/conditional_functions.cpp delete mode 100644 be/src/exprs/conditional_functions.h delete mode 100644 be/src/exprs/decimalv2_operators.cpp delete mode 100644 be/src/exprs/decimalv2_operators.h delete mode 100644 be/src/exprs/es_functions.cpp delete mode 100644 be/src/exprs/es_functions.h delete mode 100644 be/src/exprs/expr.cpp delete mode 100644 be/src/exprs/expr.h delete mode 100644 be/src/exprs/expr_context.cpp delete mode 100644 be/src/exprs/expr_context.h delete mode 100644 be/src/exprs/expr_value.h delete mode 100644 be/src/exprs/grouping_sets_functions.cpp delete mode 100644 be/src/exprs/grouping_sets_functions.h delete mode 100644 be/src/exprs/hll_function.cpp delete mode 100644 be/src/exprs/hll_function.h delete mode 100644 be/src/exprs/hll_hash_function.cpp delete mode 100644 be/src/exprs/hll_hash_function.h delete mode 100644 be/src/exprs/in_predicate.cpp delete mode 100644 be/src/exprs/in_predicate.h delete mode 100644 be/src/exprs/info_func.cpp delete mode 100644 be/src/exprs/info_func.h delete mode 100644 be/src/exprs/is_null_predicate.cpp delete mode 100644 be/src/exprs/is_null_predicate.h delete mode 100644 be/src/exprs/literal.cpp delete mode 100644 be/src/exprs/literal.h delete mode 100644 be/src/exprs/new_agg_fn_evaluator.cc delete mode 100644 be/src/exprs/new_agg_fn_evaluator.h delete mode 100644 be/src/exprs/new_in_predicate.cpp delete mode 100644 be/src/exprs/new_in_predicate.h delete mode 100644 be/src/exprs/null_literal.cpp delete mode 100644 be/src/exprs/null_literal.h delete mode 100644 be/src/exprs/operators.cpp delete mode 100644 be/src/exprs/operators.h delete mode 100644 be/src/exprs/predicate.h delete mode 100644 be/src/exprs/rpc_fn.cpp delete mode 100644 be/src/exprs/rpc_fn.h delete mode 100644 be/src/exprs/rpc_fn_call.cpp delete mode 100644 be/src/exprs/rpc_fn_call.h delete mode 100644 be/src/exprs/scalar_fn_call.cpp delete mode 100644 be/src/exprs/scalar_fn_call.h delete mode 100644 be/src/exprs/slot_ref.cpp delete mode 100644 be/src/exprs/slot_ref.h delete mode 100644 be/src/exprs/tuple_is_null_predicate.cpp delete mode 100644 be/src/exprs/tuple_is_null_predicate.h delete mode 100644 be/src/runtime/tuple_row.cpp delete mode 100644 be/src/runtime/tuple_row.h delete mode 100644 be/test/exprs/array_functions_test.cpp delete mode 100644 be/test/exprs/binary_predicate_test.cpp delete mode 100644 be/test/exprs/hll_function_test.cpp delete mode 100644 be/test/exprs/in_op_test.cpp delete mode 100644 be/test/exprs/in_predicate_test.cpp create mode 100644 be/test/exprs/runtime_filter_test.cpp diff --git a/be/src/common/daemon.cpp b/be/src/common/daemon.cpp index 83b1cca007..a425501749 100644 --- a/be/src/common/daemon.cpp +++ b/be/src/common/daemon.cpp @@ -23,24 +23,14 @@ #include "common/config.h" #include "common/logging.h" -#include "exprs/array_functions.h" #include "exprs/bitmap_function.h" #include "exprs/cast_functions.h" -#include "exprs/compound_predicate.h" -#include "exprs/decimalv2_operators.h" #include "exprs/encryption_functions.h" -#include "exprs/es_functions.h" -#include "exprs/grouping_sets_functions.h" #include "exprs/hash_functions.h" -#include "exprs/hll_function.h" -#include "exprs/hll_hash_function.h" -#include "exprs/is_null_predicate.h" #include "exprs/json_functions.h" #include "exprs/like_predicate.h" #include "exprs/match_predicate.h" #include "exprs/math_functions.h" -#include "exprs/new_in_predicate.h" -#include "exprs/operators.h" #include "exprs/quantile_function.h" #include "exprs/string_functions.h" #include "exprs/time_operators.h" @@ -374,27 +364,17 @@ void Daemon::init(int argc, char** argv, const std::vector& paths) { DiskInfo::init(); MemInfo::init(); UserFunctionCache::instance()->init(config::user_function_dir); - Operators::init(); - IsNullPredicate::init(); LikePredicate::init(); StringFunctions::init(); - ArrayFunctions::init(); CastFunctions::init(); - InPredicate::init(); MathFunctions::init(); EncryptionFunctions::init(); TimestampFunctions::init(); - DecimalV2Operators::init(); TimeOperators::init(); UtilityFunctions::init(); - CompoundPredicate::init(); JsonFunctions::init(); - HllHashFunctions::init(); - ESFunctions::init(); GeoFunctions::init(); - GroupingSetsFunctions::init(); BitmapFunctions::init(); - HllFunctions::init(); QuantileStateFunctions::init(); HashFunctions::init(); TopNFunctions::init(); diff --git a/be/src/exec/CMakeLists.txt b/be/src/exec/CMakeLists.txt index c71c5ae9ae..848fe5a8f2 100644 --- a/be/src/exec/CMakeLists.txt +++ b/be/src/exec/CMakeLists.txt @@ -29,17 +29,14 @@ set(EXEC_FILES data_sink.cpp decompressor.cpp exec_node.cpp - hash_table.cpp text_converter.cpp olap_common.cpp tablet_info.cpp plain_binary_line_reader.cpp plain_text_line_reader.cpp - es/es_predicate.cpp es/es_scan_reader.cpp es/es_scroll_query.cpp es/es_scroll_parser.cpp - es/es_query_builder.cpp schema_scanner.cpp schema_scanner/schema_tables_scanner.cpp schema_scanner/schema_dummy_scanner.cpp diff --git a/be/src/exec/arrow/arrow_reader.h b/be/src/exec/arrow/arrow_reader.h index 3e05a7527a..7fce8f0925 100644 --- a/be/src/exec/arrow/arrow_reader.h +++ b/be/src/exec/arrow/arrow_reader.h @@ -33,7 +33,6 @@ #include #include "common/status.h" -#include "exprs/expr_context.h" #include "gen_cpp/PaloBrokerService_types.h" #include "gen_cpp/PlanNodes_types.h" #include "gen_cpp/Types_types.h" diff --git a/be/src/exec/base_scanner.cpp b/be/src/exec/base_scanner.cpp index 193860f286..7794663087 100644 --- a/be/src/exec/base_scanner.cpp +++ b/be/src/exec/base_scanner.cpp @@ -21,7 +21,6 @@ #include "common/utils.h" #include "exec/exec_node.h" -#include "exprs/expr_context.h" #include "runtime/descriptors.h" #include "runtime/raw_value.h" #include "runtime/runtime_state.h" @@ -41,9 +40,6 @@ BaseScanner::BaseScanner(RuntimeState* state, RuntimeProfile* profile, _broker_addresses(broker_addresses), _next_range(0), _counter(counter), - _src_tuple(nullptr), - _src_tuple_row(nullptr), - _mem_pool(std::make_unique()), _dest_tuple_desc(nullptr), _pre_filter_texprs(pre_filter_texprs), _strict_mode(false), @@ -108,10 +104,6 @@ Status BaseScanner::init_expr_ctxes() { } _src_slot_descs.emplace_back(it->second); } - // Construct source tuple and tuple row - _src_tuple = (Tuple*)_mem_pool->allocate(src_tuple_desc->byte_size()); - _src_tuple_row = (TupleRow*)_mem_pool->allocate(sizeof(Tuple*)); - _src_tuple_row->set_tuple(0, _src_tuple); _row_desc.reset(new RowDescriptor(_state->desc_tbl(), std::vector({_params.src_tuple_id}), std::vector({false}))); @@ -168,109 +160,6 @@ Status BaseScanner::init_expr_ctxes() { return Status::OK(); } -Status BaseScanner::fill_dest_tuple(Tuple* dest_tuple, MemPool* mem_pool, bool* fill_tuple) { - RETURN_IF_ERROR(_fill_dest_tuple(dest_tuple, mem_pool)); - if (_success) { - free_expr_local_allocations(); - *fill_tuple = true; - } else { - *fill_tuple = false; - } - return Status::OK(); -} - -Status BaseScanner::_fill_dest_tuple(Tuple* dest_tuple, MemPool* mem_pool) { - // filter src tuple by preceding filter first - if (!ExecNode::eval_conjuncts(&_pre_filter_ctxs[0], _pre_filter_ctxs.size(), _src_tuple_row)) { - _counter->num_rows_unselected++; - _success = false; - return Status::OK(); - } - - // convert and fill dest tuple - int ctx_idx = 0; - for (auto slot_desc : _dest_tuple_desc->slots()) { - if (!slot_desc->is_materialized()) { - continue; - } - - int dest_index = ctx_idx++; - ExprContext* ctx = _dest_expr_ctx[dest_index]; - void* value = ctx->get_value(_src_tuple_row); - if (value == nullptr) { - // Only when the expr return value is null, we will check the error message. - std::string expr_error = ctx->get_error_msg(); - if (!expr_error.empty()) { - RETURN_IF_ERROR(_state->append_error_msg_to_file( - [&]() -> std::string { - return _src_tuple_row->to_string(*(_row_desc.get())); - }, - [&]() -> std::string { return expr_error; }, &_scanner_eof)); - _counter->num_rows_filtered++; - // The ctx is reused, so must clear the error state and message. - ctx->clear_error_msg(); - _success = false; - return Status::OK(); - } - // If _strict_mode is false, _src_slot_descs_order_by_dest size could be zero - if (_strict_mode && (_src_slot_descs_order_by_dest[dest_index] != nullptr) && - !_src_tuple->is_null( - _src_slot_descs_order_by_dest[dest_index]->null_indicator_offset())) { - RETURN_IF_ERROR(_state->append_error_msg_to_file( - [&]() -> std::string { - return _src_tuple_row->to_string(*(_row_desc.get())); - }, - [&]() -> std::string { - // Type of the slot is must be Varchar in _src_tuple. - StringRef* raw_value = _src_tuple->get_string_slot( - _src_slot_descs_order_by_dest[dest_index]->tuple_offset()); - std::string raw_string; - if (raw_value != nullptr) { //is not null then get raw value - raw_string = raw_value->to_string(); - } - fmt::memory_buffer error_msg; - fmt::format_to(error_msg, - "column({}) value is incorrect while strict mode is {}, " - "src value is {}", - slot_desc->col_name(), _strict_mode, raw_string); - return fmt::to_string(error_msg); - }, - &_scanner_eof)); - _counter->num_rows_filtered++; - _success = false; - return Status::OK(); - } - if (!slot_desc->is_nullable()) { - RETURN_IF_ERROR(_state->append_error_msg_to_file( - [&]() -> std::string { - return _src_tuple_row->to_string(*(_row_desc.get())); - }, - [&]() -> std::string { - fmt::memory_buffer error_msg; - fmt::format_to( - error_msg, - "column({}) values is null while columns is not nullable", - slot_desc->col_name()); - return fmt::to_string(error_msg); - }, - &_scanner_eof)); - _counter->num_rows_filtered++; - _success = false; - return Status::OK(); - } - dest_tuple->set_null(slot_desc->null_indicator_offset()); - continue; - } - if (slot_desc->is_nullable()) { - dest_tuple->set_not_null(slot_desc->null_indicator_offset()); - } - void* slot = dest_tuple->get_slot(slot_desc->tuple_offset()); - RawValue::write(value, slot, slot_desc->type(), mem_pool); - } - _success = true; - return Status::OK(); -} - Status BaseScanner::_filter_src_block() { auto origin_column_num = _src_block.columns(); // filter block @@ -411,31 +300,7 @@ Status BaseScanner::_fill_dest_block(vectorized::Block* dest_block, bool* eof) { return Status::OK(); } -void BaseScanner::fill_slots_of_columns_from_path( - int start, const std::vector& columns_from_path) { - // values of columns from path can not be null - for (int i = 0; i < columns_from_path.size(); ++i) { - auto slot_desc = _src_slot_descs.at(i + start); - _src_tuple->set_not_null(slot_desc->null_indicator_offset()); - void* slot = _src_tuple->get_slot(slot_desc->tuple_offset()); - auto* str_slot = reinterpret_cast(slot); - const std::string& column_from_path = columns_from_path[i]; - str_slot->data = column_from_path.c_str(); - str_slot->size = column_from_path.size(); - } -} - -void BaseScanner::free_expr_local_allocations() { - if (++_line_counter % RELEASE_CONTEXT_COUNTER == 0) { - ExprContext::free_local_allocations(_dest_expr_ctx); - } -} - void BaseScanner::close() { - if (!_pre_filter_ctxs.empty()) { - Expr::close(_pre_filter_ctxs, _state); - } - if (_vpre_filter_ctx_ptr) { (*_vpre_filter_ctx_ptr)->close(_state); } diff --git a/be/src/exec/base_scanner.h b/be/src/exec/base_scanner.h index b0fc816f1b..0be92f9437 100644 --- a/be/src/exec/base_scanner.h +++ b/be/src/exec/base_scanner.h @@ -18,7 +18,6 @@ #pragma once #include "common/status.h" -#include "exprs/expr.h" #include "runtime/tuple.h" #include "util/runtime_profile.h" #include "vec/exprs/vexpr.h" @@ -28,10 +27,8 @@ namespace doris { class Tuple; class TupleDescriptor; -class TupleRow; class RowDescriptor; class RuntimeState; -class ExprContext; namespace vectorized { class VExprContext; @@ -56,10 +53,7 @@ public: const std::vector& broker_addresses, const std::vector& pre_filter_texprs, ScannerCounter* counter); - virtual ~BaseScanner() { - Expr::close(_dest_expr_ctx, _state); - vectorized::VExpr::close(_dest_vexpr_ctx, _state); - } + virtual ~BaseScanner() { vectorized::VExpr::close(_dest_vexpr_ctx, _state); } virtual Status init_expr_ctxes(); // Open this scanner, will initialize information need to @@ -77,12 +71,6 @@ public: // Close this scanner virtual void close() = 0; - Status fill_dest_tuple(Tuple* dest_tuple, MemPool* mem_pool, bool* fill_tuple); - - void fill_slots_of_columns_from_path(int start, - const std::vector& columns_from_path); - - void free_expr_local_allocations(); protected: Status _fill_dest_block(vectorized::Block* dest_block, bool* eof); @@ -106,15 +94,9 @@ protected: // slots for value read from broker file std::vector _src_slot_descs; std::unique_ptr _row_desc; - Tuple* _src_tuple; - TupleRow* _src_tuple_row; - - // Mem pool used to allocate _src_tuple and _src_tuple_row - std::unique_ptr _mem_pool; // Dest tuple descriptor and dest expr context const TupleDescriptor* _dest_tuple_desc; - std::vector _dest_expr_ctx; // the map values of dest slot id to src slot desc // if there is not key of dest slot id in dest_sid_to_src_sid_without_trans, it will be set to nullptr std::vector _src_slot_descs_order_by_dest; @@ -123,10 +105,8 @@ protected: std::unordered_map _dest_slot_to_src_slot_index; // to filter src tuple directly - // the `_pre_filter_texprs` is the origin thrift exprs passed from scan node, - // and will be converted to `_pre_filter_ctxs` when scanner is open. + // the `_pre_filter_texprs` is the origin thrift exprs passed from scan node. const std::vector _pre_filter_texprs; - std::vector _pre_filter_ctxs; bool _strict_mode; @@ -155,7 +135,6 @@ private: Status _filter_src_block(); void _fill_columns_from_path(); Status _materialize_dest_block(vectorized::Block* output_block); - Status _fill_dest_tuple(Tuple* dest_tuple, MemPool* mem_pool); }; } /* namespace doris */ diff --git a/be/src/exec/es/es_predicate.cpp b/be/src/exec/es/es_predicate.cpp deleted file mode 100644 index f3080ea86a..0000000000 --- a/be/src/exec/es/es_predicate.cpp +++ /dev/null @@ -1,437 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -#include "exec/es/es_predicate.h" - -#include -#include - -#include -#include -#include - -#include "common/status.h" -#include "exec/es/es_query_builder.h" -#include "exprs/expr.h" -#include "exprs/expr_context.h" -#include "exprs/in_predicate.h" -#include "runtime/datetime_value.h" -#include "runtime/large_int_value.h" -#include "runtime/tuple_row.h" -#include "vec/common/string_ref.h" - -namespace doris { - -#define RETURN_ERROR_IF_EXPR_IS_NOT_SLOTREF(expr) \ - do { \ - const Expr* expr_without_cast = Expr::expr_without_cast(expr); \ - if (expr_without_cast->node_type() != TExprNodeType::SLOT_REF) { \ - return Status::InternalError("build disjuncts failed: child is not slot ref"); \ - } \ - } while (false) - -std::string ExtLiteral::value_to_string() { - std::stringstream ss; - switch (_type) { - case TYPE_TINYINT: - ss << std::to_string(get_byte()); - break; - case TYPE_SMALLINT: - ss << std::to_string(get_short()); - break; - case TYPE_INT: - ss << std::to_string(get_int()); - break; - case TYPE_BIGINT: - ss << std::to_string(get_long()); - break; - case TYPE_FLOAT: - ss << std::to_string(get_float()); - break; - case TYPE_DOUBLE: - ss << std::to_string(get_double()); - break; - case TYPE_CHAR: - case TYPE_VARCHAR: - case TYPE_STRING: - ss << get_string(); - break; - case TYPE_DATE: - case TYPE_DATETIME: - ss << get_date_string(); - break; - case TYPE_BOOLEAN: - ss << std::to_string(get_bool()); - break; - case TYPE_DECIMALV2: - ss << get_decimalv2_string(); - break; - case TYPE_LARGEINT: - ss << get_largeint_string(); - break; - default: - DCHECK(false); - break; - } - return ss.str(); -} - -ExtLiteral::~ExtLiteral() {} - -int8_t ExtLiteral::get_byte() { - DCHECK(_type == TYPE_TINYINT); - return *(reinterpret_cast(_value)); -} - -int16_t ExtLiteral::get_short() { - DCHECK(_type == TYPE_SMALLINT); - return *(reinterpret_cast(_value)); -} - -int32_t ExtLiteral::get_int() { - DCHECK(_type == TYPE_INT); - return *(reinterpret_cast(_value)); -} - -int64_t ExtLiteral::get_long() { - DCHECK(_type == TYPE_BIGINT); - return *(reinterpret_cast(_value)); -} - -float ExtLiteral::get_float() { - DCHECK(_type == TYPE_FLOAT); - return *(reinterpret_cast(_value)); -} - -double ExtLiteral::get_double() { - DCHECK(_type == TYPE_DOUBLE); - return *(reinterpret_cast(_value)); -} - -std::string ExtLiteral::get_string() { - DCHECK(_type == TYPE_VARCHAR || _type == TYPE_CHAR || _type == TYPE_STRING); - return (reinterpret_cast(_value))->to_string(); -} - -std::string ExtLiteral::get_date_string() { - DCHECK(_type == TYPE_DATE || _type == TYPE_DATETIME); - DateTimeValue date_value = *reinterpret_cast(_value); - if (_type == TYPE_DATE) { - date_value.cast_to_date(); - } - - char str[MAX_DTVALUE_STR_LEN]; - date_value.to_string(str); - return std::string(str, strlen(str)); -} - -bool ExtLiteral::get_bool() { - DCHECK(_type == TYPE_BOOLEAN); - return *(reinterpret_cast(_value)); -} - -std::string ExtLiteral::get_decimalv2_string() { - DCHECK(_type == TYPE_DECIMALV2); - return reinterpret_cast(_value)->to_string(); -} - -std::string ExtLiteral::get_largeint_string() { - DCHECK(_type == TYPE_LARGEINT); - return LargeIntValue::to_string(*reinterpret_cast<__int128*>(_value)); -} - -EsPredicate::EsPredicate(ExprContext* context, const TupleDescriptor* tuple_desc, ObjectPool* pool) - : _context(context), _tuple_desc(tuple_desc), _es_query_status(Status::OK()), _pool(pool) {} - -EsPredicate::~EsPredicate() { - for (int i = 0; i < _disjuncts.size(); i++) { - delete _disjuncts[i]; - } - _disjuncts.clear(); -} - -Status EsPredicate::build_disjuncts_list() { - return build_disjuncts_list(_context->root()); -} - -// make sure to build by build_disjuncts_list -const std::vector& EsPredicate::get_predicate_list() const { - return _disjuncts; -} - -static bool ignore_cast(const SlotDescriptor* slot, const Expr* expr) { - if (slot->type().is_date_type() && expr->type().is_date_type()) { - return true; - } - if (slot->type().is_string_type() && expr->type().is_string_type()) { - return true; - } - return false; -} - -static bool is_literal_node(const Expr* expr) { - switch (expr->node_type()) { - case TExprNodeType::BOOL_LITERAL: - case TExprNodeType::INT_LITERAL: - case TExprNodeType::LARGE_INT_LITERAL: - case TExprNodeType::FLOAT_LITERAL: - case TExprNodeType::DECIMAL_LITERAL: - case TExprNodeType::STRING_LITERAL: - case TExprNodeType::DATE_LITERAL: - return true; - default: - return false; - } -} - -Status EsPredicate::build_disjuncts_list(const Expr* conjunct) { - // process binary predicate - if (TExprNodeType::BINARY_PRED == conjunct->node_type()) { - if (conjunct->children().size() != 2) { - return Status::InternalError("build disjuncts failed: number of children is not 2"); - } - SlotRef* slot_ref = nullptr; - TExprOpcode::type op; - Expr* expr = nullptr; - // k1 = 2 k1 is float (marked for processing later), - // doris on es should ignore this doris native cast transformation, we push down this `cast` to elasticsearch - // conjunct->get_child(0)->node_type() return CAST_EXPR - // conjunct->get_child(1)->node_type()return FLOAT_LITERAL - // the left child is literal and right child is SlotRef maybe not happened, but here we just process - // this situation regardless of the rewrite logic from the FE's Query Engine - if (TExprNodeType::SLOT_REF == conjunct->get_child(0)->node_type() || - TExprNodeType::CAST_EXPR == conjunct->get_child(0)->node_type()) { - expr = conjunct->get_child(1); - // process such as sub-query: select * from (select split_part(k, "_", 1) as new_field from table) t where t.new_field > 1; - RETURN_ERROR_IF_EXPR_IS_NOT_SLOTREF(conjunct->get_child(0)); - // process cast expr, such as: - // k (float) > 2.0, k(int) > 3.2 - slot_ref = (SlotRef*)Expr::expr_without_cast(conjunct->get_child(0)); - op = conjunct->op(); - } else if (TExprNodeType::SLOT_REF == conjunct->get_child(1)->node_type() || - TExprNodeType::CAST_EXPR == conjunct->get_child(1)->node_type()) { - expr = conjunct->get_child(0); - RETURN_ERROR_IF_EXPR_IS_NOT_SLOTREF(conjunct->get_child(1)); - slot_ref = (SlotRef*)Expr::expr_without_cast(conjunct->get_child(1)); - op = conjunct->op(); - } else { - return Status::InternalError("build disjuncts failed: no SLOT_REF child"); - } - - const SlotDescriptor* slot_desc = get_slot_desc(slot_ref); - if (slot_desc == nullptr) { - return Status::InternalError("build disjuncts failed: slot_desc is null"); - } - - if (!is_literal_node(expr)) { - return Status::InternalError("build disjuncts failed: expr is not literal type"); - } - - ExtLiteral literal(expr->type().type, _context->get_value(expr, nullptr)); - std::string col = slot_desc->col_name(); - if (_field_context.find(col) != _field_context.end()) { - col = _field_context[col]; - } - ExtPredicate* predicate = new ExtBinaryPredicate(TExprNodeType::BINARY_PRED, col, - slot_desc->type(), op, literal); - - _disjuncts.push_back(predicate); - return Status::OK(); - } - // process function call predicate: esquery, is_null_pred, is_not_null_pred - if (TExprNodeType::FUNCTION_CALL == conjunct->node_type()) { - std::string fname = conjunct->fn().name.function_name; - if (fname == "esquery") { - if (conjunct->children().size() != 2) { - return Status::InternalError("build disjuncts failed: number of children is not 2"); - } - Expr* expr = conjunct->get_child(1); - ExtLiteral literal(expr->type().type, _context->get_value(expr, nullptr)); - std::vector query_conditions; - query_conditions.emplace_back(literal); - std::vector cols; - ExtPredicate* predicate = new ExtFunction(TExprNodeType::FUNCTION_CALL, "esquery", cols, - query_conditions); - if (_es_query_status.ok()) { - _es_query_status = BooleanQueryBuilder::check_es_query(*(ExtFunction*)predicate); - if (!_es_query_status.ok()) { - delete predicate; - return _es_query_status; - } - } - _disjuncts.push_back(predicate); - } else if (fname == "is_null_pred" || fname == "is_not_null_pred") { - if (conjunct->children().size() != 1) { - return Status::InternalError("build disjuncts failed: number of children is not 1"); - } - // such as sub-query: select * from (select split_part(k, "_", 1) as new_field from table) t where t.new_field > 1; - // conjunct->get_child(0)->node_type() == TExprNodeType::FUNCTION_CALL, at present doris on es can not support push down function - RETURN_ERROR_IF_EXPR_IS_NOT_SLOTREF(conjunct->get_child(0)); - SlotRef* slot_ref = (SlotRef*)(conjunct->get_child(0)); - const SlotDescriptor* slot_desc = get_slot_desc(slot_ref); - if (slot_desc == nullptr) { - return Status::InternalError("build disjuncts failed: no SLOT_REF child"); - } - bool is_not_null = fname == "is_not_null_pred" ? true : false; - std::string col = slot_desc->col_name(); - if (_field_context.find(col) != _field_context.end()) { - col = _field_context[col]; - } - // use TExprNodeType::IS_NULL_PRED for BooleanQueryBuilder translate - ExtIsNullPredicate* predicate = new ExtIsNullPredicate(TExprNodeType::IS_NULL_PRED, col, - slot_desc->type(), is_not_null); - _disjuncts.push_back(predicate); - } else if (fname == "like") { - if (conjunct->children().size() != 2) { - return Status::InternalError("build disjuncts failed: number of children is not 2"); - } - SlotRef* slot_ref = nullptr; - Expr* expr = nullptr; - if (TExprNodeType::SLOT_REF == conjunct->get_child(0)->node_type()) { - expr = conjunct->get_child(1); - slot_ref = (SlotRef*)(conjunct->get_child(0)); - } else if (TExprNodeType::SLOT_REF == conjunct->get_child(1)->node_type()) { - expr = conjunct->get_child(0); - slot_ref = (SlotRef*)(conjunct->get_child(1)); - } else { - return Status::InternalError("build disjuncts failed: no SLOT_REF child"); - } - const SlotDescriptor* slot_desc = get_slot_desc(slot_ref); - if (slot_desc == nullptr) { - return Status::InternalError("build disjuncts failed: slot_desc is null"); - } - - PrimitiveType type = expr->type().type; - if (type != TYPE_VARCHAR && type != TYPE_CHAR && type != TYPE_STRING) { - return Status::InternalError("build disjuncts failed: like value is not a string"); - } - std::string col = slot_desc->col_name(); - if (_field_context.find(col) != _field_context.end()) { - col = _field_context[col]; - } - ExtLiteral literal(type, _context->get_value(expr, nullptr)); - ExtPredicate* predicate = - new ExtLikePredicate(TExprNodeType::LIKE_PRED, col, slot_desc->type(), literal); - - _disjuncts.push_back(predicate); - } else { - return Status::InternalError("can not process function predicate[ {} ]", fname); - } - return Status::OK(); - } - - if (TExprNodeType::IN_PRED == conjunct->node_type()) { - // the op code maybe FILTER_NEW_IN, it means there is function in list - // like col_a in (abs(1)) - if (TExprOpcode::FILTER_IN != conjunct->op() && - TExprOpcode::FILTER_NOT_IN != conjunct->op()) { - return Status::InternalError( - "build disjuncts failed: " - "opcode in IN_PRED is neither FILTER_IN nor FILTER_NOT_IN"); - } - - std::vector in_pred_values; - const InPredicate* pred = static_cast(conjunct); - const Expr* expr = Expr::expr_without_cast(pred->get_child(0)); - if (expr->node_type() != TExprNodeType::SLOT_REF) { - return Status::InternalError("build disjuncts failed: node type is not slot ref"); - } - - const SlotDescriptor* slot_desc = get_slot_desc((const SlotRef*)expr); - if (slot_desc == nullptr) { - return Status::InternalError("build disjuncts failed: slot_desc is null"); - } - - if (pred->get_child(0)->type().type != slot_desc->type().type) { - if (!ignore_cast(slot_desc, pred->get_child(0))) { - return Status::InternalError("build disjuncts failed"); - } - } - - HybridSetBase::IteratorBase* iter = pred->hybrid_set()->begin(); - while (iter->has_next()) { - if (nullptr == iter->get_value()) { - return Status::InternalError("build disjuncts failed: hybrid set has a null value"); - } - - ExtLiteral literal(slot_desc->type().type, const_cast(iter->get_value())); - in_pred_values.emplace_back(literal); - iter->next(); - } - std::string col = slot_desc->col_name(); - if (_field_context.find(col) != _field_context.end()) { - col = _field_context[col]; - } - ExtPredicate* predicate = new ExtInPredicate(TExprNodeType::IN_PRED, pred->is_not_in(), col, - slot_desc->type(), in_pred_values); - _disjuncts.push_back(predicate); - - return Status::OK(); - } - if (TExprNodeType::COMPOUND_PRED == conjunct->node_type()) { - // process COMPOUND_AND, such as: - // k = 1 or (k1 = 7 and (k2 in (6,7) or k3 = 12)) - // k1 = 7 and (k2 in (6,7) or k3 = 12) is compound pred, we should rebuild this sub tree - if (conjunct->op() == TExprOpcode::COMPOUND_AND) { - std::vector conjuncts; - for (int i = 0; i < conjunct->get_num_children(); ++i) { - EsPredicate* predicate = _pool->add(new EsPredicate(_context, _tuple_desc, _pool)); - predicate->set_field_context(_field_context); - Status status = predicate->build_disjuncts_list(conjunct->children()[i]); - if (status.ok()) { - conjuncts.push_back(predicate); - } else { - return Status::InternalError("build COMPOUND_AND conjuncts failed"); - } - } - ExtCompPredicates* compound_predicate = - new ExtCompPredicates(TExprOpcode::COMPOUND_AND, conjuncts); - _disjuncts.push_back(compound_predicate); - return Status::OK(); - } else if (conjunct->op() == TExprOpcode::COMPOUND_NOT) { - // reserved for processing COMPOUND_NOT - return Status::InternalError("currently do not support COMPOUND_NOT push-down"); - } - DCHECK(conjunct->op() == TExprOpcode::COMPOUND_OR); - Status status = build_disjuncts_list(conjunct->get_child(0)); - if (!status.ok()) { - return status; - } - status = build_disjuncts_list(conjunct->get_child(1)); - if (!status.ok()) { - return status; - } - return Status::OK(); - } - - // if go to here, report error - return Status::InternalError("build disjuncts failed: node type {} is not supported", - conjunct->node_type()); -} - -const SlotDescriptor* EsPredicate::get_slot_desc(const SlotRef* slotRef) { - const SlotDescriptor* slot_desc = nullptr; - for (SlotDescriptor* slot : _tuple_desc->slots()) { - if (slot->id() == slotRef->slot_id()) { - slot_desc = slot; - break; - } - } - return slot_desc; -} - -} // namespace doris diff --git a/be/src/exec/es/es_predicate.h b/be/src/exec/es/es_predicate.h deleted file mode 100644 index 5d7c7570bd..0000000000 --- a/be/src/exec/es/es_predicate.h +++ /dev/null @@ -1,164 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -#pragma once - -#include -#include - -#include "exprs/slot_ref.h" -#include "gen_cpp/Exprs_types.h" -#include "gen_cpp/Opcodes_types.h" -#include "runtime/descriptors.h" -#include "runtime/primitive_type.h" - -namespace doris { - -class Status; -class ExprContext; -struct ExtBinaryPredicate; -class EsPredicate; - -class ExtLiteral { -public: - ExtLiteral(PrimitiveType type, void* value) : _type(type), _value(value) { - _str = value_to_string(); - } - ~ExtLiteral(); - const std::string& to_string() const { return _str; } - -private: - int8_t get_byte(); - int16_t get_short(); - int32_t get_int(); - int64_t get_long(); - float get_float(); - double get_double(); - std::string get_string(); - std::string get_date_string(); - bool get_bool(); - std::string get_decimal_string(); - std::string get_decimalv2_string(); - std::string get_largeint_string(); - - std::string value_to_string(); - - PrimitiveType _type; - void* _value; - std::string _str; -}; - -struct ExtColumnDesc { - ExtColumnDesc(const std::string& name, const TypeDescriptor& type) : name(name), type(type) {} - - std::string name; - TypeDescriptor type; -}; - -struct ExtPredicate { - ExtPredicate(TExprNodeType::type node_type) : node_type(node_type) {} - virtual ~ExtPredicate() {} - - TExprNodeType::type node_type; -}; - -// this used for placeholder for compound_predicate -// reserved for compound_not -struct ExtCompPredicates : public ExtPredicate { - ExtCompPredicates(TExprOpcode::type expr_op, const std::vector& es_predicates) - : ExtPredicate(TExprNodeType::COMPOUND_PRED), op(expr_op), conjuncts(es_predicates) {} - - TExprOpcode::type op; - std::vector conjuncts; -}; - -struct ExtBinaryPredicate : public ExtPredicate { - ExtBinaryPredicate(TExprNodeType::type node_type, const std::string& name, - const TypeDescriptor& type, TExprOpcode::type op, const ExtLiteral& value) - : ExtPredicate(node_type), col(name, type), op(op), value(value) {} - - ExtColumnDesc col; - TExprOpcode::type op; - ExtLiteral value; -}; - -struct ExtInPredicate : public ExtPredicate { - ExtInPredicate(TExprNodeType::type node_type, bool is_not_in, const std::string& name, - const TypeDescriptor& type, const std::vector& values) - : ExtPredicate(node_type), is_not_in(is_not_in), col(name, type), values(values) {} - - bool is_not_in; - ExtColumnDesc col; - std::vector values; -}; - -struct ExtLikePredicate : public ExtPredicate { - ExtLikePredicate(TExprNodeType::type node_type, const std::string& name, - const TypeDescriptor& type, ExtLiteral value) - : ExtPredicate(node_type), col(name, type), value(value) {} - - ExtColumnDesc col; - ExtLiteral value; -}; - -struct ExtIsNullPredicate : public ExtPredicate { - ExtIsNullPredicate(TExprNodeType::type node_type, const std::string& name, - const TypeDescriptor& type, bool is_not_null) - : ExtPredicate(node_type), col(name, type), is_not_null(is_not_null) {} - - ExtColumnDesc col; - bool is_not_null; -}; - -struct ExtFunction : public ExtPredicate { - ExtFunction(TExprNodeType::type node_type, const std::string& func_name, - std::vector cols, std::vector values) - : ExtPredicate(node_type), func_name(func_name), cols(cols), values(values) {} - - const std::string func_name; - std::vector cols; - const std::vector values; -}; - -class EsPredicate { -public: - EsPredicate(ExprContext* context, const TupleDescriptor* tuple_desc, ObjectPool* pool); - ~EsPredicate(); - const std::vector& get_predicate_list() const; - Status build_disjuncts_list(); - // public for tests - EsPredicate(const std::vector& all_predicates) { _disjuncts = all_predicates; } - - Status get_es_query_status() { return _es_query_status; } - - void set_field_context(const std::map& field_context) { - _field_context = field_context; - } - -private: - Status build_disjuncts_list(const Expr* conjunct); - const SlotDescriptor* get_slot_desc(const SlotRef* slotRef); - - ExprContext* _context; - const TupleDescriptor* _tuple_desc; - std::vector _disjuncts; - Status _es_query_status; - ObjectPool* _pool; - std::map _field_context; -}; - -} // namespace doris diff --git a/be/src/exec/es/es_query_builder.cpp b/be/src/exec/es/es_query_builder.cpp deleted file mode 100644 index 31a5e5a205..0000000000 --- a/be/src/exec/es/es_query_builder.cpp +++ /dev/null @@ -1,454 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -#include "exec/es/es_query_builder.h" - -#include - -#include "rapidjson/rapidjson.h" - -namespace doris { - -ESQueryBuilder::ESQueryBuilder(const std::string& es_query_str) : _es_query_str(es_query_str) {} -ESQueryBuilder::ESQueryBuilder(const ExtFunction& es_query) { - auto first = es_query.values.front(); - _es_query_str = first.to_string(); -} - -// note: call this function must invoke BooleanQueryBuilder::check_es_query to check validation -void ESQueryBuilder::to_json(rapidjson::Document* document, rapidjson::Value* query) { - rapidjson::Document scratch_document; - scratch_document.Parse(_es_query_str.c_str(), _es_query_str.length()); - rapidjson::Document::AllocatorType& allocator = document->GetAllocator(); - rapidjson::Value query_key; - rapidjson::Value query_value; - //{ "term": { "dv": "2" } } - rapidjson::Value::ConstMemberIterator first = scratch_document.MemberBegin(); - // deep copy, reference http://rapidjson.org/md_doc_tutorial.html#DeepCopyValue - query_key.CopyFrom(first->name, allocator); - // if we found one key, then end loop as QueryDSL only support one `query` root - query_value.CopyFrom(first->value, allocator); - // Move Semantics, reference http://rapidjson.org/md_doc_tutorial.html#MoveSemantics - query->AddMember(query_key, query_value, allocator); -} - -TermQueryBuilder::TermQueryBuilder(const std::string& field, const std::string& term) - : _field(field), _term(term), _match_none(false) {} - -TermQueryBuilder::TermQueryBuilder(const ExtBinaryPredicate& binary_predicate) - : _field(binary_predicate.col.name), _match_none(false) { - if (binary_predicate.col.type.type == PrimitiveType::TYPE_BOOLEAN) { - int val = atoi(binary_predicate.value.to_string().c_str()); - if (val == 1) { - _term = std::string("true"); - } else if (val == 0) { - _term = std::string("false"); - } else { - // keep semantic consistent with mysql - _match_none = true; - } - } else { - _term = binary_predicate.value.to_string(); - } -} - -void TermQueryBuilder::to_json(rapidjson::Document* document, rapidjson::Value* query) { - rapidjson::Document::AllocatorType& allocator = document->GetAllocator(); - rapidjson::Value term_node(rapidjson::kObjectType); - term_node.SetObject(); - if (!_match_none) { - rapidjson::Value field_value(_field.c_str(), allocator); - rapidjson::Value term_value(_term.c_str(), allocator); - term_node.AddMember(field_value, term_value, allocator); - query->AddMember("term", term_node, allocator); - } else { - // this would only appear `bool` column's predicate (a = 2) - query->AddMember("match_none", term_node, allocator); - } -} - -RangeQueryBuilder::RangeQueryBuilder(const ExtBinaryPredicate& range_predicate) - : _field(range_predicate.col.name), - _value(range_predicate.value.to_string()), - _op(range_predicate.op) {} - -void RangeQueryBuilder::to_json(rapidjson::Document* document, rapidjson::Value* query) { - rapidjson::Document::AllocatorType& allocator = document->GetAllocator(); - rapidjson::Value field_value(_field.c_str(), allocator); - rapidjson::Value value(_value.c_str(), allocator); - rapidjson::Value op_node(rapidjson::kObjectType); - op_node.SetObject(); - switch (_op) { - case TExprOpcode::LT: - op_node.AddMember("lt", value, allocator); - break; - case TExprOpcode::LE: - op_node.AddMember("lte", value, allocator); - break; - case TExprOpcode::GT: - op_node.AddMember("gt", value, allocator); - break; - case TExprOpcode::GE: - op_node.AddMember("gte", value, allocator); - break; - default: - break; - } - rapidjson::Value field_node(rapidjson::kObjectType); - field_node.SetObject(); - field_node.AddMember(field_value, op_node, allocator); - query->AddMember("range", field_node, allocator); -} - -void WildCardQueryBuilder::to_json(rapidjson::Document* document, rapidjson::Value* query) { - rapidjson::Document::AllocatorType& allocator = document->GetAllocator(); - rapidjson::Value term_node(rapidjson::kObjectType); - term_node.SetObject(); - rapidjson::Value field_value(_field.c_str(), allocator); - rapidjson::Value term_value(_like_value.c_str(), allocator); - term_node.AddMember(field_value, term_value, allocator); - query->AddMember("wildcard", term_node, allocator); -} -WildCardQueryBuilder::WildCardQueryBuilder(const ExtLikePredicate& like_predicate) - : _field(like_predicate.col.name) { - _like_value = like_predicate.value.to_string(); - // example of translation : - // abc_123 ===> abc?123 - // abc%ykz ===> abc*123 - // %abc123 ===> *abc123 - // _abc123 ===> ?abc123 - // \\_abc1 ===> \\_abc1 - // abc\\_123 ===> abc\\_123 - // abc\\%123 ===> abc\\%123 - // NOTE. user must input sql like 'abc\\_123' or 'abc\\%ykz' - for (int i = 0; i < _like_value.size(); i++) { - if (_like_value[i] == '_' || _like_value[i] == '%') { - if (i == 0) { - _like_value[i] = (_like_value[i] == '_') ? '?' : '*'; - } else if (_like_value[i - 1] != '\\') { - _like_value[i] = (_like_value[i] == '_') ? '?' : '*'; - } - } - } -} - -void TermsInSetQueryBuilder::to_json(rapidjson::Document* document, rapidjson::Value* query) { - rapidjson::Document::AllocatorType& allocator = document->GetAllocator(); - rapidjson::Value terms_node(rapidjson::kObjectType); - rapidjson::Value values_node(rapidjson::kArrayType); - for (auto& value : _values) { - rapidjson::Value value_value(value.c_str(), allocator); - values_node.PushBack(value_value, allocator); - } - rapidjson::Value field_value(_field.c_str(), allocator); - terms_node.AddMember(field_value, values_node, allocator); - query->AddMember("terms", terms_node, allocator); -} - -TermsInSetQueryBuilder::TermsInSetQueryBuilder(const ExtInPredicate& in_predicate) - : _field(in_predicate.col.name) { - for (auto& value : in_predicate.values) { - _values.push_back(value.to_string()); - } -} - -void MatchAllQueryBuilder::to_json(rapidjson::Document* document, rapidjson::Value* query) { - rapidjson::Document::AllocatorType& allocator = document->GetAllocator(); - rapidjson::Value match_all_node(rapidjson::kObjectType); - match_all_node.SetObject(); - query->AddMember("match_all", match_all_node, allocator); -} - -ExistsQueryBuilder::ExistsQueryBuilder(const ExtIsNullPredicate& is_null_predicate) - : _field(is_null_predicate.col.name) {} - -void ExistsQueryBuilder::to_json(rapidjson::Document* document, rapidjson::Value* query) { - rapidjson::Document::AllocatorType& allocator = document->GetAllocator(); - rapidjson::Value term_node(rapidjson::kObjectType); - term_node.SetObject(); - rapidjson::Value field_name(_field.c_str(), allocator); - term_node.AddMember("field", field_name, allocator); - query->AddMember("exists", term_node, allocator); -} - -BooleanQueryBuilder::BooleanQueryBuilder() {} -BooleanQueryBuilder::~BooleanQueryBuilder() { - for (auto clause : _must_clauses) { - delete clause; - clause = nullptr; - } - for (auto clause : _must_not_clauses) { - delete clause; - clause = nullptr; - } - for (auto clause : _filter_clauses) { - delete clause; - clause = nullptr; - } - for (auto clause : _should_clauses) { - delete clause; - clause = nullptr; - } -} - -BooleanQueryBuilder::BooleanQueryBuilder(const std::vector& predicates) { - for (auto predicate : predicates) { - switch (predicate->node_type) { - case TExprNodeType::BINARY_PRED: { - ExtBinaryPredicate* binary_predicate = (ExtBinaryPredicate*)predicate; - switch (binary_predicate->op) { - case TExprOpcode::EQ: { - TermQueryBuilder* term_query = new TermQueryBuilder(*binary_predicate); - _should_clauses.push_back(term_query); - break; - } - case TExprOpcode::NE: { // process NE - TermQueryBuilder* term_query = new TermQueryBuilder(*binary_predicate); - BooleanQueryBuilder* bool_query = new BooleanQueryBuilder(); - bool_query->must_not(term_query); - _should_clauses.push_back(bool_query); - break; - } - case TExprOpcode::LT: - case TExprOpcode::LE: - case TExprOpcode::GT: - case TExprOpcode::GE: { - RangeQueryBuilder* range_query = new RangeQueryBuilder(*binary_predicate); - _should_clauses.push_back(range_query); - break; - } - default: - break; - } - break; - } - case TExprNodeType::IN_PRED: { - ExtInPredicate* in_predicate = (ExtInPredicate*)predicate; - bool is_not_in = in_predicate->is_not_in; - if (is_not_in) { // process not in predicate - TermsInSetQueryBuilder* terms_predicate = new TermsInSetQueryBuilder(*in_predicate); - BooleanQueryBuilder* bool_query = new BooleanQueryBuilder(); - bool_query->must_not(terms_predicate); - _should_clauses.push_back(bool_query); - } else { // process in predicate - TermsInSetQueryBuilder* terms_query = new TermsInSetQueryBuilder(*in_predicate); - _should_clauses.push_back(terms_query); - } - break; - } - case TExprNodeType::LIKE_PRED: { - ExtLikePredicate* like_predicate = (ExtLikePredicate*)predicate; - WildCardQueryBuilder* wild_card_query = new WildCardQueryBuilder(*like_predicate); - _should_clauses.push_back(wild_card_query); - break; - } - case TExprNodeType::IS_NULL_PRED: { - ExtIsNullPredicate* is_null_predicate = (ExtIsNullPredicate*)predicate; - ExistsQueryBuilder* exists_query = new ExistsQueryBuilder(*is_null_predicate); - if (is_null_predicate->is_not_null) { - _should_clauses.push_back(exists_query); - } else { - BooleanQueryBuilder* bool_query = new BooleanQueryBuilder(); - bool_query->must_not(exists_query); - _should_clauses.push_back(bool_query); - } - break; - } - case TExprNodeType::FUNCTION_CALL: { - ExtFunction* function_predicate = (ExtFunction*)predicate; - if ("esquery" == function_predicate->func_name) { - ESQueryBuilder* es_query = new ESQueryBuilder(*function_predicate); - _should_clauses.push_back(es_query); - }; - break; - } - case TExprNodeType::COMPOUND_PRED: { - ExtCompPredicates* compound_predicates = (ExtCompPredicates*)predicate; - // reserved for compound_not - if (compound_predicates->op == TExprOpcode::COMPOUND_AND) { - BooleanQueryBuilder* bool_query = new BooleanQueryBuilder(); - for (auto es_predicate : compound_predicates->conjuncts) { - std::vector or_predicates = es_predicate->get_predicate_list(); - BooleanQueryBuilder* inner_bool_query = new BooleanQueryBuilder(or_predicates); - bool_query->must(inner_bool_query); - } - _should_clauses.push_back(bool_query); - } - break; - } - default: - break; - } - } -} - -void BooleanQueryBuilder::to_json(rapidjson::Document* document, rapidjson::Value* query) { - rapidjson::Document::AllocatorType& allocator = document->GetAllocator(); - rapidjson::Value root_node_object(rapidjson::kObjectType); - if (_filter_clauses.size() > 0) { - rapidjson::Value filter_node(rapidjson::kArrayType); - for (auto must_clause : _filter_clauses) { - rapidjson::Value must_clause_query(rapidjson::kObjectType); - must_clause_query.SetObject(); - must_clause->to_json(document, &must_clause_query); - filter_node.PushBack(must_clause_query, allocator); - } - root_node_object.AddMember("filter", filter_node, allocator); - } - - if (_should_clauses.size() > 0) { - rapidjson::Value should_node(rapidjson::kArrayType); - for (auto should_clause : _should_clauses) { - rapidjson::Value should_clause_query(rapidjson::kObjectType); - should_clause_query.SetObject(); - should_clause->to_json(document, &should_clause_query); - should_node.PushBack(should_clause_query, allocator); - } - root_node_object.AddMember("should", should_node, allocator); - } - - if (_must_not_clauses.size() > 0) { - rapidjson::Value must_not_node(rapidjson::kArrayType); - for (auto must_not_clause : _must_not_clauses) { - rapidjson::Value must_not_clause_query(rapidjson::kObjectType); - must_not_clause_query.SetObject(); - must_not_clause->to_json(document, &must_not_clause_query); - must_not_node.PushBack(must_not_clause_query, allocator); - } - root_node_object.AddMember("must_not", must_not_node, allocator); - } - query->AddMember("bool", root_node_object, allocator); -} - -void BooleanQueryBuilder::should(QueryBuilder* filter) { - _should_clauses.push_back(filter); -} -void BooleanQueryBuilder::filter(QueryBuilder* filter) { - _filter_clauses.push_back(filter); -} -void BooleanQueryBuilder::must(QueryBuilder* filter) { - _filter_clauses.push_back(filter); -} -void BooleanQueryBuilder::must_not(QueryBuilder* filter) { - _must_not_clauses.push_back(filter); -} - -Status BooleanQueryBuilder::check_es_query(const ExtFunction& extFunction) { - const std::string& esquery_str = extFunction.values.front().to_string(); - rapidjson::Document scratch_document; - scratch_document.Parse(esquery_str.c_str(), esquery_str.length()); - rapidjson::Document::AllocatorType& allocator = scratch_document.GetAllocator(); - rapidjson::Value query_key; - // { "term": { "dv": "2" } } - if (!scratch_document.HasParseError()) { - if (!scratch_document.IsObject()) { - return Status::InvalidArgument("esquery must be a object"); - } - rapidjson::SizeType object_count = scratch_document.MemberCount(); - if (object_count != 1) { - return Status::InvalidArgument("esquery must only one root"); - } - // deep copy, reference http://rapidjson.org/md_doc_tutorial.html#DeepCopyValue - rapidjson::Value::ConstMemberIterator first = scratch_document.MemberBegin(); - query_key.CopyFrom(first->name, allocator); - if (!query_key.IsString()) { - // if we found one key, then end loop as QueryDSL only support one `query` root - return Status::InvalidArgument("esquery root key must be string"); - } - } else { - return Status::InvalidArgument("malformed esquery json"); - } - return Status::OK(); -} - -void BooleanQueryBuilder::validate(const std::vector& espredicates, - std::vector* result) { - for (auto espredicate : espredicates) { - result->push_back(validate(espredicate)); - } -} - -bool BooleanQueryBuilder::validate(const EsPredicate* espredicate) { - for (auto predicate : espredicate->get_predicate_list()) { - switch (predicate->node_type) { - case TExprNodeType::BINARY_PRED: { - ExtBinaryPredicate* binary_predicate = (ExtBinaryPredicate*)predicate; - TExprOpcode::type op = binary_predicate->op; - if (op != TExprOpcode::EQ && op != TExprOpcode::NE && op != TExprOpcode::LT && - op != TExprOpcode::LE && op != TExprOpcode::GT && op != TExprOpcode::GE) { - return false; - } - break; - } - case TExprNodeType::COMPOUND_PRED: { - ExtCompPredicates* compound_predicates = (ExtCompPredicates*)predicate; - if (compound_predicates->op != TExprOpcode::COMPOUND_AND) { - // reserved for compound_not - return false; - } - std::vector list; - validate(compound_predicates->conjuncts, &list); - for (int i = list.size() - 1; i >= 0; i--) { - if (!list[i]) { - return false; - } - } - break; - } - case TExprNodeType::LIKE_PRED: - case TExprNodeType::IS_NULL_PRED: - case TExprNodeType::IN_PRED: { - break; - } - case TExprNodeType::FUNCTION_CALL: { - ExtFunction* function_predicate = (ExtFunction*)predicate; - if ("esquery" != function_predicate->func_name) { - return false; - } - Status st = check_es_query(*function_predicate); - if (!st.ok()) { - return false; - } - break; - } - default: { - return false; - break; - } - } - } - - return true; -} - -void BooleanQueryBuilder::to_query(const std::vector& predicates, - rapidjson::Document* root, rapidjson::Value* query) { - if (predicates.size() == 0) { - MatchAllQueryBuilder match_all_query; - match_all_query.to_json(root, query); - return; - } - root->SetObject(); - BooleanQueryBuilder bool_query; - for (auto es_predicate : predicates) { - std::vector or_predicates = es_predicate->get_predicate_list(); - BooleanQueryBuilder* inner_bool_query = new BooleanQueryBuilder(or_predicates); - bool_query.must(inner_bool_query); - } - bool_query.to_json(root, query); -} -} // namespace doris diff --git a/be/src/exec/es/es_query_builder.h b/be/src/exec/es/es_query_builder.h deleted file mode 100644 index 6a5d16d85e..0000000000 --- a/be/src/exec/es/es_query_builder.h +++ /dev/null @@ -1,138 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -#pragma once - -#include -#include - -#include "common/status.h" -#include "exec/es/es_predicate.h" -#include "rapidjson/document.h" - -namespace doris { - -class QueryBuilder { -public: - virtual void to_json(rapidjson::Document* document, rapidjson::Value* query) = 0; - virtual ~QueryBuilder() {} -}; - -// process esquery(fieldA, json dsl) function -class ESQueryBuilder : public QueryBuilder { -public: - ESQueryBuilder(const std::string& es_query_str); - ESQueryBuilder(const ExtFunction& es_query); - void to_json(rapidjson::Document* document, rapidjson::Value* query) override; - -private: - std::string _es_query_str; -}; - -// process field = value -class TermQueryBuilder : public QueryBuilder { -public: - TermQueryBuilder(const std::string& field, const std::string& term); - TermQueryBuilder(const ExtBinaryPredicate& binary_predicate); - void to_json(rapidjson::Document* document, rapidjson::Value* query) override; - -private: - std::string _field; - std::string _term; - bool _match_none; -}; - -// process range predicate field >= value or field < value etc. -class RangeQueryBuilder : public QueryBuilder { -public: - RangeQueryBuilder(const ExtBinaryPredicate& range_predicate); - void to_json(rapidjson::Document* document, rapidjson::Value* query) override; - -private: - std::string _field; - std::string _value; - TExprOpcode::type _op; -}; - -// process in predicate : field in [value1, value2] -class TermsInSetQueryBuilder : public QueryBuilder { -public: - TermsInSetQueryBuilder(const ExtInPredicate& in_predicate); - void to_json(rapidjson::Document* document, rapidjson::Value* query) override; - -private: - std::string _field; - std::vector _values; -}; - -// process like predicate : field like "a%b%c_" -class WildCardQueryBuilder : public QueryBuilder { -public: - WildCardQueryBuilder(const ExtLikePredicate& like_predicate); - void to_json(rapidjson::Document* document, rapidjson::Value* query) override; - -private: - std::string _like_value; - std::string _field; -}; - -// no predicates: all document match -class MatchAllQueryBuilder : public QueryBuilder { -public: - void to_json(rapidjson::Document* document, rapidjson::Value* query) override; -}; - -// process like predicate : k1 is null or k1 is not null" -class ExistsQueryBuilder : public QueryBuilder { -public: - ExistsQueryBuilder(const ExtIsNullPredicate& like_predicate); - void to_json(rapidjson::Document* document, rapidjson::Value* query) override; - -private: - std::string _field; -}; - -// process bool compound query, and play the role of a bridge for transferring predicates to es native query -class BooleanQueryBuilder : public QueryBuilder { -public: - BooleanQueryBuilder(const std::vector& predicates); - BooleanQueryBuilder(); - virtual ~BooleanQueryBuilder(); - // class method for transfer predicate to es query value, invoker should enclose this value with `query` - static void to_query(const std::vector& predicates, rapidjson::Document* root, - rapidjson::Value* query); - // validate esquery syntax - static Status check_es_query(const ExtFunction& extFunction); - // decide which predicate can process - static void validate(const std::vector& espredicates, std::vector* result); - static bool validate(const EsPredicate* espredicate); - -private: - // add child query - void should(QueryBuilder* filter); - void filter(QueryBuilder* filter); - void must(QueryBuilder* filter); - void must_not(QueryBuilder* filter); - void to_json(rapidjson::Document* document, rapidjson::Value* query) override; - - std::vector _must_clauses; - std::vector _must_not_clauses; - std::vector _filter_clauses; - std::vector _should_clauses; -}; - -} // namespace doris diff --git a/be/src/exec/es/es_scroll_query.cpp b/be/src/exec/es/es_scroll_query.cpp index a9e3c5e0e0..b68e8624c3 100644 --- a/be/src/exec/es/es_scroll_query.cpp +++ b/be/src/exec/es/es_scroll_query.cpp @@ -19,7 +19,6 @@ #include -#include "exec/es/es_query_builder.h" #include "exec/es/es_scan_reader.h" #include "rapidjson/document.h" #include "rapidjson/stringbuffer.h" diff --git a/be/src/exec/es/es_scroll_query.h b/be/src/exec/es/es_scroll_query.h index 3b98c61c0c..af3bbbd9cc 100644 --- a/be/src/exec/es/es_scroll_query.h +++ b/be/src/exec/es/es_scroll_query.h @@ -17,11 +17,10 @@ #pragma once +#include #include #include -#include "exec/es/es_predicate.h" - namespace doris { class ESScrollQueryBuilder { diff --git a/be/src/exec/exec_node.cpp b/be/src/exec/exec_node.cpp index 62dc3a2acb..08d7986c52 100644 --- a/be/src/exec/exec_node.cpp +++ b/be/src/exec/exec_node.cpp @@ -27,7 +27,6 @@ #include "common/object_pool.h" #include "common/status.h" -#include "exprs/expr_context.h" #include "runtime/descriptors.h" #include "runtime/exec_env.h" #include "runtime/memory/mem_tracker.h" @@ -84,31 +83,6 @@ ExecNode::ExecNode(ObjectPool* pool, const TPlanNode& tnode, const DescriptorTbl ExecNode::~ExecNode() = default; -void ExecNode::push_down_predicate(RuntimeState* state, std::list* expr_ctxs) { - if (_type != TPlanNodeType::AGGREGATION_NODE) { - for (int i = 0; i < _children.size(); ++i) { - _children[i]->push_down_predicate(state, expr_ctxs); - if (expr_ctxs->size() == 0) { - return; - } - } - } - - std::list::iterator iter = expr_ctxs->begin(); - while (iter != expr_ctxs->end()) { - if ((*iter)->root()->is_bound(&_tuple_ids)) { - // LOG(INFO) << "push down success expr is " << (*iter)->debug_string() - // << " and node is " << debug_string(); - (*iter)->prepare(state, row_desc()); - (*iter)->open(state); - _conjunct_ctxs.push_back(*iter); - iter = expr_ctxs->erase(iter); - } else { - ++iter; - } - } -} - Status ExecNode::init(const TPlanNode& tnode, RuntimeState* state) { init_runtime_profile(get_name()); @@ -117,10 +91,6 @@ Status ExecNode::init(const TPlanNode& tnode, RuntimeState* state) { RETURN_IF_ERROR(doris::vectorized::VExpr::create_expr_tree(_pool, tnode.vconjunct, _vconjunct_ctx_ptr.get())); } - if (typeid(*this) != typeid(doris::vectorized::NewOlapScanNode) && - typeid(*this) != typeid(doris::vectorized::NewFileScanNode)) { - RETURN_IF_ERROR(Expr::create_expr_trees(_pool, tnode.conjuncts, &_conjunct_ctxs)); - } // create the projections expr if (tnode.__isset.projections) { @@ -148,13 +118,6 @@ Status ExecNode::prepare(RuntimeState* state) { RETURN_IF_ERROR((*_vconjunct_ctx_ptr)->prepare(state, intermediate_row_desc())); } - // For vectorized olap scan node, the conjuncts is prepared in _vconjunct_ctx_ptr. - // And _conjunct_ctxs is useless. - // TODO: Should be removed when non-vec engine is removed. - if (typeid(*this) != typeid(doris::vectorized::NewOlapScanNode) && - typeid(*this) != typeid(doris::vectorized::NewFileScanNode)) { - RETURN_IF_ERROR(Expr::prepare(_conjunct_ctxs, state, _row_descriptor)); - } RETURN_IF_ERROR(vectorized::VExpr::prepare(_projections, state, intermediate_row_desc())); for (int i = 0; i < _children.size(); ++i) { @@ -169,12 +132,7 @@ Status ExecNode::alloc_resource(doris::RuntimeState* state) { RETURN_IF_ERROR((*_vconjunct_ctx_ptr)->open(state)); } RETURN_IF_ERROR(vectorized::VExpr::open(_projections, state)); - if (typeid(*this) != typeid(doris::vectorized::NewOlapScanNode) && - typeid(*this) != typeid(doris::vectorized::NewFileScanNode)) { - return Expr::open(_conjunct_ctxs, state); - } else { - return Status::OK(); - } + return Status::OK(); } Status ExecNode::open(RuntimeState* state) { @@ -206,10 +164,6 @@ void ExecNode::release_resource(doris::RuntimeState* state) { if (_vconjunct_ctx_ptr) { (*_vconjunct_ctx_ptr)->close(state); } - if (typeid(*this) != typeid(doris::vectorized::NewOlapScanNode) && - typeid(*this) != typeid(doris::vectorized::NewFileScanNode)) { - Expr::close(_conjunct_ctxs, state); - } vectorized::VExpr::close(_projections, state); runtime_profile()->add_to_span(); @@ -486,7 +440,6 @@ std::string ExecNode::debug_string() const { } void ExecNode::debug_string(int indentation_level, std::stringstream* out) const { - *out << " conjuncts=" << Expr::debug_string(_conjuncts); *out << " id=" << _id; *out << " type=" << print_plan_node_type(_type); *out << " tuple_ids=["; @@ -501,16 +454,6 @@ void ExecNode::debug_string(int indentation_level, std::stringstream* out) const } } -bool ExecNode::eval_conjuncts(ExprContext* const* ctxs, int num_ctxs, TupleRow* row) { - for (int i = 0; i < num_ctxs; ++i) { - BooleanVal v = ctxs[i]->get_boolean_val(row); - if (v.is_null || !v.val) { - return false; - } - } - return true; -} - void ExecNode::collect_nodes(TPlanNodeType::type node_type, std::vector* nodes) { if (_type == node_type) { nodes->push_back(this); diff --git a/be/src/exec/exec_node.h b/be/src/exec/exec_node.h index 76ab99fd42..5d6f1f18b2 100644 --- a/be/src/exec/exec_node.h +++ b/be/src/exec/exec_node.h @@ -35,13 +35,10 @@ #include "vec/exprs/vexpr_context.h" namespace doris { -class Expr; -class ExprContext; class ObjectPool; class Counters; class RuntimeState; class TPlan; -class TupleRow; class MemTracker; namespace vectorized { @@ -191,17 +188,9 @@ public: // This improve is cautious, we ensure the correctness firstly. void try_do_aggregate_serde_improve(); - using EvalConjunctsFn = bool (*)(ExprContext* const*, int, TupleRow*); - // Evaluate exprs over row. Returns true if all exprs return true. - // TODO: This doesn't use the vector signature because I haven't figured - // out how to deal with declaring a templated std:vector type in IR - static bool eval_conjuncts(ExprContext* const* ctxs, int num_ctxs, TupleRow* row); - // Returns a string representation in DFS order of the plan rooted at this. std::string debug_string() const; - virtual void push_down_predicate(RuntimeState* state, std::list* expr_ctxs); - // recursive helper method for generating a string for Debug_string(). // implementations should call debug_string(int, std::stringstream) on their children. // Input parameters: @@ -210,8 +199,6 @@ public: // out: Stream to accumulate debug string. virtual void debug_string(int indentation_level, std::stringstream* out) const; - const std::vector& conjunct_ctxs() const { return _conjunct_ctxs; } - int id() const { return _id; } TPlanNodeType::type type() const { return _type; } virtual const RowDescriptor& row_desc() const { @@ -256,8 +243,6 @@ protected: int _id; // unique w/in single plan tree TPlanNodeType::type _type; ObjectPool* _pool; - std::vector _conjuncts; - std::vector _conjunct_ctxs; std::vector _tuple_ids; std::unique_ptr _vconjunct_ctx_ptr; diff --git a/be/src/exec/hash_table.cpp b/be/src/exec/hash_table.cpp deleted file mode 100644 index 6db56ba2ff..0000000000 --- a/be/src/exec/hash_table.cpp +++ /dev/null @@ -1,452 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. -// This file is copied from -// https://github.com/apache/impala/blob/branch-2.9.0/be/src/exec/hash-table.cc -// and modified by Doris - -#include "exec/hash_table.h" - -#include "exprs/expr.h" -#include "exprs/expr_context.h" -#include "runtime/memory/mem_tracker.h" -#include "runtime/raw_value.h" - -namespace doris { - -HashTable::HashTable(const std::vector& build_expr_ctxs, - const std::vector& probe_expr_ctxs, int num_build_tuples, - bool stores_nulls, const std::vector& finds_nulls, int32_t initial_seed, - int64_t num_buckets) - : _build_expr_ctxs(build_expr_ctxs), - _probe_expr_ctxs(probe_expr_ctxs), - _num_build_tuples(num_build_tuples), - _stores_nulls(stores_nulls), - _finds_nulls(finds_nulls), - _initial_seed(initial_seed), - _node_byte_size(sizeof(Node) + sizeof(Tuple*) * _num_build_tuples), - _num_filled_buckets(0), - _current_nodes(nullptr), - _num_nodes(0), - _current_capacity(num_buckets), - _current_used(0), - _total_capacity(num_buckets) { - DCHECK_EQ(_build_expr_ctxs.size(), _probe_expr_ctxs.size()); - - DCHECK_EQ((num_buckets & (num_buckets - 1)), 0) << "num_buckets must be a power of 2"; - _mem_tracker = std::make_unique("HashTable"); - _buckets.resize(num_buckets); - _num_buckets = num_buckets; - _num_buckets_till_resize = MAX_BUCKET_OCCUPANCY_FRACTION * _num_buckets; - _mem_tracker->consume(_buckets.capacity() * sizeof(Bucket)); - - // Compute the layout and buffer size to store the evaluated expr results - _results_buffer_size = Expr::compute_results_layout( - _build_expr_ctxs, &_expr_values_buffer_offsets, &_var_result_begin); - _expr_values_buffer = new uint8_t[_results_buffer_size]; - memset(_expr_values_buffer, 0, sizeof(uint8_t) * _results_buffer_size); - _expr_value_null_bits = new uint8_t[_build_expr_ctxs.size()]; - - _alloc_list.reserve(10); - _end_list.reserve(10); - _current_nodes = reinterpret_cast(malloc(_current_capacity * _node_byte_size)); - // TODO: remove memset later - memset(_current_nodes, 0, _current_capacity * _node_byte_size); - _alloc_list.push_back(_current_nodes); - _end_list.push_back(_current_nodes + _current_capacity * _node_byte_size); - - _mem_tracker->consume(_current_capacity * _node_byte_size); -} - -HashTable::~HashTable() {} - -void HashTable::close() { - // TODO: use tr1::array? - delete[] _expr_values_buffer; - delete[] _expr_value_null_bits; - for (auto ptr : _alloc_list) { - free(ptr); - } - _mem_tracker->release(_total_capacity * _node_byte_size); - _mem_tracker->release(_buckets.size() * sizeof(Bucket)); -} - -bool HashTable::eval_row(TupleRow* row, const std::vector& ctxs) { - // Put a non-zero constant in the result location for nullptr. - // We don't want(nullptr, 1) to hash to the same as (0, 1). - // This needs to be as big as the biggest primitive type since the bytes - // get copied directly. - - // the 10 is experience value which need bigger than sizeof(Decimal)/sizeof(int64). - // for if slot is null, we need copy the null value to all type. - static int64_t null_value[10] = {HashUtil::FNV_SEED, HashUtil::FNV_SEED, 0}; - bool has_null = false; - - for (int i = 0; i < ctxs.size(); ++i) { - void* loc = _expr_values_buffer + _expr_values_buffer_offsets[i]; - void* val = ctxs[i]->get_value(row); - - if (val == nullptr) { - // If the table doesn't store nulls, no reason to keep evaluating - if (!_stores_nulls) { - return true; - } - - _expr_value_null_bits[i] = true; - val = &null_value; - has_null = true; - } else { - _expr_value_null_bits[i] = false; - } - - RawValue::write(val, loc, _build_expr_ctxs[i]->root()->type(), nullptr); - } - - return has_null; -} - -uint32_t HashTable::hash_variable_len_row() { - uint32_t hash = _initial_seed; - // Hash the non-var length portions (if there are any) - if (_var_result_begin != 0) { - hash = HashUtil::hash(_expr_values_buffer, _var_result_begin, hash); - } - - for (int i = 0; i < _build_expr_ctxs.size(); ++i) { - // non-string and null slots are already part of expr_values_buffer - if (_build_expr_ctxs[i]->root()->type().is_string_type()) { - void* loc = _expr_values_buffer + _expr_values_buffer_offsets[i]; - - if (_expr_value_null_bits[i]) { - // Hash the null random seed values at 'loc' - hash = HashUtil::hash(loc, sizeof(StringRef), hash); - } else { - // Hash the string - StringRef* str = reinterpret_cast(loc); - hash = HashUtil::hash(str->data, str->size, hash); - } - } - } - - return hash; -} - -bool HashTable::equals(TupleRow* build_row) { - for (int i = 0; i < _build_expr_ctxs.size(); ++i) { - void* val = _build_expr_ctxs[i]->get_value(build_row); - - if (val == nullptr) { - if (!(_stores_nulls && _finds_nulls[i])) { - return false; - } - - if (!_expr_value_null_bits[i]) { - return false; - } - - continue; - } - - void* loc = _expr_values_buffer + _expr_values_buffer_offsets[i]; - - if (!RawValue::eq(loc, val, _build_expr_ctxs[i]->root()->type())) { - return false; - } - } - - return true; -} - -Status HashTable::resize_buckets(int64_t num_buckets) { - DCHECK_EQ((num_buckets & (num_buckets - 1)), 0) << "num_buckets must be a power of 2"; - - int64_t old_num_buckets = _num_buckets; - int64_t delta_bytes = (num_buckets - old_num_buckets) * sizeof(Bucket); - Status st = thread_context()->thread_mem_tracker()->check_limit(delta_bytes); - if (!st) { - LOG_EVERY_N(WARNING, 100) << "resize bucket failed: " << st; - return st; - } - _mem_tracker->consume(delta_bytes); - - _buckets.resize(std::max(num_buckets, _num_buckets)); - - // If we're doubling the number of buckets, all nodes in a particular bucket - // either remain there, or move down to an analogous bucket in the other half. - // In order to efficiently check which of the two buckets a node belongs in, the number - // of buckets must be a power of 2. - bool doubled_buckets = (num_buckets == old_num_buckets * 2); - - for (int i = 0; i < _num_buckets; ++i) { - Bucket* bucket = &_buckets[i]; - Bucket* sister_bucket = &_buckets[i + old_num_buckets]; - Node* last_node = nullptr; - Node* node = bucket->_node; - - while (node != nullptr) { - Node* next_node = node->_next; - uint32_t hash = node->_hash; - - bool node_must_move = true; - Bucket* move_to = nullptr; - - if (doubled_buckets) { - node_must_move = ((hash & old_num_buckets) != 0); - move_to = sister_bucket; - } else { - int64_t bucket_idx = hash & (num_buckets - 1); - node_must_move = (bucket_idx != i); - move_to = &_buckets[bucket_idx]; - } - - if (node_must_move) { - move_node(bucket, move_to, node, last_node); - } else { - last_node = node; - } - - node = next_node; - } - } - - _buckets.resize(num_buckets); - _num_buckets = num_buckets; - _num_buckets_till_resize = MAX_BUCKET_OCCUPANCY_FRACTION * _num_buckets; - return Status::OK(); -} - -void HashTable::grow_node_array() { - _current_capacity = _total_capacity / 2; - _total_capacity += _current_capacity; - int64_t alloc_size = _current_capacity * _node_byte_size; - _current_nodes = reinterpret_cast(malloc(alloc_size)); - _current_used = 0; - // TODO: remove memset later - memset(_current_nodes, 0, alloc_size); - // add _current_nodes to alloc pool - _alloc_list.push_back(_current_nodes); - _end_list.push_back(_current_nodes + alloc_size); - - _mem_tracker->consume(alloc_size); -} - -std::string HashTable::debug_string(bool skip_empty, const RowDescriptor* desc) { - std::stringstream ss; - ss << std::endl; - - for (int i = 0; i < _buckets.size(); ++i) { - Node* node = _buckets[i]._node; - bool first = true; - - if (skip_empty && node == nullptr) { - continue; - } - - ss << i << ": "; - - while (node != nullptr) { - if (!first) { - ss << ","; - } - - if (desc == nullptr) { - ss << node->_hash << "(" << (void*)node->data() << ")"; - } else { - ss << (void*)node->data() << " " << node->data()->to_string(*desc); - } - - node = node->_next; - first = false; - } - - ss << std::endl; - } - - return ss.str(); -} - -bool HashTable::emplace_key(TupleRow* row, TupleRow** dest_addr) { - if (_num_filled_buckets > _num_buckets_till_resize) { - if (!resize_buckets(_num_buckets * 2).ok()) { - return false; - } - } - if (_current_used == _current_capacity) { - grow_node_array(); - } - - bool has_nulls = eval_build_row(row); - - if (!_stores_nulls && has_nulls) { - return false; - } - - uint32_t hash = hash_current_row(); - int64_t bucket_idx = hash & (_num_buckets - 1); - - Bucket* bucket = &_buckets[bucket_idx]; - Node* node = bucket->_node; - - bool will_insert = true; - - if (node == nullptr) { - will_insert = true; - } else { - Node* last_node = node; - while (node != nullptr) { - if (node->_hash == hash && equals(node->data())) { - will_insert = false; - break; - } - last_node = node; - node = node->_next; - } - node = last_node; - } - if (will_insert) { - Node* alloc_node = - reinterpret_cast(_current_nodes + _node_byte_size * _current_used++); - ++_num_nodes; - TupleRow* data = alloc_node->data(); - *dest_addr = data; - alloc_node->_hash = hash; - if (node == nullptr) { - add_to_bucket(&_buckets[bucket_idx], alloc_node); - } else { - node->_next = alloc_node; - } - } - return will_insert; -} - -HashTable::Iterator HashTable::find(TupleRow* probe_row, bool probe) { - bool has_nulls = probe ? eval_probe_row(probe_row) : eval_build_row(probe_row); - - if (!_stores_nulls && has_nulls) { - return end(); - } - - uint32_t hash = hash_current_row(); - int64_t bucket_idx = hash & (_num_buckets - 1); - - Bucket* bucket = &_buckets[bucket_idx]; - Node* node = bucket->_node; - - while (node != nullptr) { - if (node->_hash == hash && equals(node->data())) { - return Iterator(this, bucket_idx, node, hash); - } - - node = node->_next; - } - - return end(); -} - -HashTable::Iterator HashTable::begin() { - int64_t bucket_idx = -1; - Bucket* bucket = next_bucket(&bucket_idx); - - if (bucket != nullptr) { - return Iterator(this, bucket_idx, bucket->_node, 0); - } - - return end(); -} - -HashTable::Bucket* HashTable::next_bucket(int64_t* bucket_idx) { - ++*bucket_idx; - - for (; *bucket_idx < _num_buckets; ++*bucket_idx) { - if (_buckets[*bucket_idx]._node != nullptr) { - return &_buckets[*bucket_idx]; - } - } - - *bucket_idx = -1; - return nullptr; -} - -void HashTable::insert_impl(TupleRow* row) { - bool has_null = eval_build_row(row); - - if (!_stores_nulls && has_null) { - return; - } - - uint32_t hash = hash_current_row(); - int64_t bucket_idx = hash & (_num_buckets - 1); - - if (_current_used == _current_capacity) { - grow_node_array(); - } - // get a node from memory pool - Node* node = reinterpret_cast(_current_nodes + _node_byte_size * _current_used++); - - TupleRow* data = node->data(); - node->_hash = hash; - memcpy(data, row, sizeof(Tuple*) * _num_build_tuples); - add_to_bucket(&_buckets[bucket_idx], node); - ++_num_nodes; -} - -void HashTable::add_to_bucket(Bucket* bucket, Node* node) { - if (bucket->_node == nullptr) { - ++_num_filled_buckets; - } - - node->_next = bucket->_node; - bucket->_node = node; - bucket->_size++; -} - -void HashTable::move_node(Bucket* from_bucket, Bucket* to_bucket, Node* node, Node* previous_node) { - Node* next_node = node->_next; - from_bucket->_size--; - - if (previous_node != nullptr) { - previous_node->_next = next_node; - } else { - // Update bucket directly - from_bucket->_node = next_node; - - if (next_node == nullptr) { - --_num_filled_buckets; - } - } - - add_to_bucket(to_bucket, node); -} - -std::pair HashTable::minmax_node() { - bool has_value = false; - int64_t min_size = std::numeric_limits::max(); - int64_t max_size = std::numeric_limits::min(); - for (const auto bucket : _buckets) { - int64_t counter = bucket._size; - if (counter > 0) { - has_value = true; - min_size = std::min(counter, min_size); - max_size = std::max(counter, max_size); - } - } - if (!has_value) { - return std::make_pair(0, 0); - } - return std::make_pair(min_size, max_size); -} - -} // namespace doris diff --git a/be/src/exec/hash_table.h b/be/src/exec/hash_table.h deleted file mode 100644 index c97db360fe..0000000000 --- a/be/src/exec/hash_table.h +++ /dev/null @@ -1,480 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. -// This file is copied from -// https://github.com/apache/impala/blob/branch-2.9.0/be/src/exec/hash-table.h -// and modified by Doris - -#pragma once - -#include - -#include "common/status.h" -#include "util/hash_util.hpp" - -namespace doris { - -class Expr; -class ExprContext; -class RowDescriptor; -class Tuple; -class TupleRow; -class MemTracker; -class RuntimeState; - -// Hash table implementation designed for hash aggregation and hash joins. This is not -// templatized and is tailored to the usage pattern for aggregation and joins. The -// hash table store TupleRows and allows for different exprs for insertions and finds. -// This is the pattern we use for joins and aggregation where the input/build tuple -// row descriptor is different from the find/probe descriptor. -// The table is optimized for the query engine's use case as much as possible and is not -// intended to be a generic hash table implementation. The API loosely mimics the -// std::hashset API. -// -// The hash table stores evaluated expr results for the current row being processed -// when possible into a contiguous memory buffer. This allows for very efficient -// computation for hashing. The implementation is also designed to allow codegen -// for some paths. -// -// The hash table does not support removes. The hash table is not thread safe. -// -// The implementation is based on the boost multiset. The hashtable is implemented by -// two data structures: a vector of buckets and a vector of nodes. Inserted values -// are stored as nodes (in the order they are inserted). The buckets (indexed by the -// mod of the hash) contain pointers to the node vector. Nodes that fall in the same -// bucket are linked together (the bucket pointer gets you the head of that linked list). -// When growing the hash table, the number of buckets is doubled, and nodes from a -// particular bucket either stay in place or move to an analogous bucket in the second -// half of buckets. This behavior allows us to avoid moving about half the nodes each -// time, and maintains good cache properties by only accessing 2 buckets at a time. -// The node vector is modified in place. -// Due to the doubling nature of the buckets, we require that the number of buckets is a -// power of 2. This allows us to determine if a node needs to move by simply checking a -// single bit, and further allows us to initially hash nodes using a bitmask. -// -// TODO: this is not a fancy hash table in terms of memory access patterns (cuckoo-hashing -// or something that spills to disk). We will likely want to invest more time into this. -// TODO: hash-join and aggregation have very different access patterns. Joins insert -// all the rows and then calls scan to find them. Aggregation interleaves find() and -// inserts(). We can want to optimize joins more heavily for inserts() (in particular -// growing). - -class HashTable { -private: - struct Node; - -public: - class Iterator; - - // Create a hash table. - // - build_exprs are the exprs that should be used to evaluate rows during insert(). - // - probe_exprs are used during find() - // - num_build_tuples: number of Tuples in the build tuple row - // - stores_nulls: if false, TupleRows with nulls are ignored during Insert - // - num_buckets: number of buckets that the hash table should be initialized to - // - mem_limits: if non-empty, all memory allocation for nodes and for buckets is - // tracked against those limits; the limits must be valid until the d'tor is called - // - initial_seed: Initial seed value to use when computing hashes for rows - HashTable(const std::vector& build_exprs, - const std::vector& probe_exprs, int num_build_tuples, bool stores_nulls, - const std::vector& finds_nulls, int32_t initial_seed, int64_t num_buckets); - - ~HashTable(); - - // Call to cleanup any resources. Must be called once. - void close(); - - // Insert row into the hash table. Row will be evaluated over _build_expr_ctxs - // This will grow the hash table if necessary - Status insert(TupleRow* row) { - if (_num_filled_buckets > _num_buckets_till_resize) { - RETURN_IF_ERROR(resize_buckets(_num_buckets * 2)); - } - - insert_impl(row); - return Status::OK(); - } - - void insert_without_check(TupleRow* row) { insert_impl(row); } - - // Insert row into the hash table. if the row is already exist will not insert - Status insert_unique(TupleRow* row) { - if (find(row, false) == end()) { - return insert(row); - } - return Status::OK(); - } - - void insert_unique_without_check(TupleRow* row) { - if (find(row, false) == end()) { - insert_without_check(row); - } - } - - Status resize_buckets_ahead(int64_t estimate_buckets) { - if (_num_filled_buckets + estimate_buckets > _num_buckets_till_resize) { - int64_t new_bucket_size = _num_buckets * 2; - while (new_bucket_size <= _num_filled_buckets + estimate_buckets) { - new_bucket_size = new_bucket_size * 2; - } - return resize_buckets(new_bucket_size); - } - return Status::OK(); - } - - bool emplace_key(TupleRow* row, TupleRow** key_addr); - - // Returns the start iterator for all rows that match 'probe_row'. 'probe_row' is - // evaluated with _probe_expr_ctxs. The iterator can be iterated until HashTable::end() - // to find all the matching rows. - // Only one scan be in progress at any time (i.e. it is not legal to call - // find(), begin iterating through all the matches, call another find(), - // and continuing iterator from the first scan iterator). - // Advancing the returned iterator will go to the next matching row. The matching - // rows are evaluated lazily (i.e. computed as the Iterator is moved). - // Returns HashTable::end() if there is no match. - Iterator find(TupleRow* probe_row, bool probe = true); - - // Returns number of elements in the hash table - int64_t size() const { return _num_nodes; } - - // Returns the number of buckets - int64_t num_buckets() const { return _buckets.size(); } - - // Returns the number of filled buckets - int64_t num_filled_buckets() const { return _num_filled_buckets; } - - // Check the hash table should be shrink - bool should_be_shrink(int64_t valid_row) { - return valid_row < MAX_BUCKET_OCCUPANCY_FRACTION * (_buckets.size() / 2.0); - } - - // Returns the load factor (the number of non-empty buckets) - float load_factor() { return _num_filled_buckets / static_cast(_buckets.size()); } - - // Returns the number of bytes allocated to the hash table - int64_t byte_size() const { - return _node_byte_size * _total_capacity + sizeof(Bucket) * _buckets.size(); - } - - // Returns the results of the exprs at 'expr_idx' evaluated over the last row - // processed by the HashTable. - // This value is invalid if the expr evaluated to nullptr. - // TODO: this is an awkward abstraction but aggregation node can take advantage of - // it and save some expr evaluation calls. - void* last_expr_value(int expr_idx) const { - return _expr_values_buffer + _expr_values_buffer_offsets[expr_idx]; - } - - // Returns if the expr at 'expr_idx' evaluated to nullptr for the last row. - bool last_expr_value_null(int expr_idx) const { return _expr_value_null_bits[expr_idx]; } - - // Return beginning of hash table. Advancing this iterator will traverse all - // elements. - Iterator begin(); - - // Returns end marker - Iterator end() { return Iterator(); } - - // Dump out the entire hash table to string. If skip_empty, empty buckets are - // skipped. If build_desc is non-null, the build rows will be output. Otherwise - // just the build row addresses. - std::string debug_string(bool skip_empty, const RowDescriptor* build_desc); - - std::pair minmax_node(); - - MemTracker* mem_tracker() { return _mem_tracker.get(); } - - // Load factor that will trigger growing the hash table on insert. This is - // defined as the number of non-empty buckets / total_buckets - static constexpr float MAX_BUCKET_OCCUPANCY_FRACTION = 0.75f; - - // stl-like iterator interface. - class Iterator { - public: - Iterator() : _table(nullptr), _bucket_idx(-1), _node(nullptr) {} - - // Iterates to the next element. In the case where the iterator was - // from a Find, this will lazily evaluate that bucket, only returning - // TupleRows that match the current scan row. - template - void next() { - if (_bucket_idx == -1) { - return; - } - - // TODO: this should prefetch the next tuplerow - Node* node = _node; - - // Iterator is not from a full table scan, evaluate equality now. Only the current - // bucket needs to be scanned. '_expr_values_buffer' contains the results - // for the current probe row. - if (check_match) { - // TODO: this should prefetch the next node - Node* next_node = node->_next; - - while (next_node != nullptr) { - node = next_node; - - if (node->_hash == _scan_hash && _table->equals(node->data())) { - _node = next_node; - return; - } - - next_node = node->_next; - } - - *this = _table->end(); - } else { - // Move onto the next chained node - if (node->_next != nullptr) { - _node = node->_next; - return; - } - - // Move onto the next bucket - Bucket* bucket = _table->next_bucket(&_bucket_idx); - - if (bucket == nullptr) { - _bucket_idx = -1; - _node = nullptr; - } else { - _node = bucket->_node; - } - } - } - - // Returns the current row or nullptr if at end. - TupleRow* get_row() { - if (_node == nullptr) { - return nullptr; - } - return _node->data(); - } - - // Returns Hash - uint32_t get_hash() { return _node->_hash; } - - // Returns if the iterator is at the end - bool has_next() { return _node != nullptr; } - - // Returns true if this iterator is at the end, i.e. get_row() cannot be called. - bool at_end() { return _node == nullptr; } - - // Sets as matched the node currently pointed by the iterator. The iterator - // cannot be AtEnd(). - void set_matched() { - DCHECK(!at_end()); - _node->matched = true; - } - - bool matched() { - DCHECK(!at_end()); - return _node->matched; - } - - bool operator==(const Iterator& rhs) const { - return _bucket_idx == rhs._bucket_idx && _node == rhs._node; - } - - bool operator!=(const Iterator& rhs) const { - return _bucket_idx != rhs._bucket_idx || _node != rhs._node; - } - - private: - friend class HashTable; - - Iterator(HashTable* table, int bucket_idx, Node* node, uint32_t hash) - : _table(table), _bucket_idx(bucket_idx), _node(node), _scan_hash(hash) {} - - HashTable* _table; - // Current bucket idx - int64_t _bucket_idx; - // Current node (within current bucket) - Node* _node; - // cached hash value for the row passed to find()() - uint32_t _scan_hash; - }; - - template - void for_each_row(Func&& func) { - size_t sz = _alloc_list.size(); - DCHECK_GT(sz, 0); - for (size_t i = 0; i < sz - 1; ++i) { - uint8_t* start = _alloc_list[i]; - uint8_t* end = _end_list[i]; - while (start < end) { - auto node = reinterpret_cast(start); - func(node->data()); - start += _node_byte_size; - } - } - uint8_t* last_st = _alloc_list[sz - 1]; - for (size_t i = 0; i < _current_used; ++i) { - auto node = reinterpret_cast(last_st); - func(node->data()); - last_st += _node_byte_size; - } - } - -private: - friend class Iterator; - friend class HashTableTest; - - // Header portion of a Node. The node data (TupleRow) is right after the - // node memory to maximize cache hits. - struct Node { - Node* _next; // chain to next node for collisions - uint32_t _hash; // Cache of the hash for _data - bool matched; - - Node() : _next(nullptr), _hash(-1), matched(false) {} - - TupleRow* data() { - uint8_t* mem = reinterpret_cast(this); - DCHECK_EQ(reinterpret_cast(mem) % 8, 0); - return reinterpret_cast(mem + sizeof(Node)); - } - }; - - struct Bucket { - Bucket() : _node(nullptr), _size(0) {} - Node* _node; - uint64_t _size; - }; - - // Returns the next non-empty bucket and updates idx to be the index of that bucket. - // If there are no more buckets, returns nullptr and sets idx to -1 - Bucket* next_bucket(int64_t* bucket_idx); - - // Resize the hash table to 'num_buckets' - Status resize_buckets(int64_t num_buckets); - - // Insert row into the hash table - void insert_impl(TupleRow* row); - - // Chains the node at 'node_idx' to 'bucket'. Nodes in a bucket are chained - // as a linked list; this places the new node at the beginning of the list. - void add_to_bucket(Bucket* bucket, Node* node); - - // Moves a node from one bucket to another. 'previous_node' refers to the - // node (if any) that's chained before this node in from_bucket's linked list. - void move_node(Bucket* from_bucket, Bucket* to_bucket, Node* node, Node* previous_node); - - // Evaluate the exprs over row and cache the results in '_expr_values_buffer'. - // Returns whether any expr evaluated to nullptr - // This will be replaced by codegen - bool eval_row(TupleRow* row, const std::vector& exprs); - - // Evaluate 'row' over _build_expr_ctxs caching the results in '_expr_values_buffer' - // This will be replaced by codegen. We do not want this function inlined when - // cross compiled because we need to be able to differentiate between EvalBuildRow - // and EvalProbeRow by name and the _build_expr_ctxs/_probe_expr_ctxs are baked into - // the codegen'd function. - bool eval_build_row(TupleRow* row) { return eval_row(row, _build_expr_ctxs); } - - // Evaluate 'row' over _probe_expr_ctxs caching the results in '_expr_values_buffer' - // This will be replaced by codegen. - bool eval_probe_row(TupleRow* row) { return eval_row(row, _probe_expr_ctxs); } - - // Compute the hash of the values in _expr_values_buffer. - // This will be replaced by codegen. We don't want this inlined for replacing - // with codegen'd functions so the function name does not change. - uint32_t hash_current_row() { - if (_var_result_begin == -1) { - // This handles NULLs implicitly since a constant seed value was put - // into results buffer for nulls. - return HashUtil::hash(_expr_values_buffer, _results_buffer_size, _initial_seed); - } else { - return hash_variable_len_row(); - } - } - - // Compute the hash of the values in _expr_values_buffer for rows with variable length - // fields (e.g. strings) - uint32_t hash_variable_len_row(); - - // Returns true if the values of build_exprs evaluated over 'build_row' equal - // the values cached in _expr_values_buffer - // This will be replaced by codegen. - bool equals(TupleRow* build_row); - - // Grow the node array. - void grow_node_array(); - - const std::vector& _build_expr_ctxs; - const std::vector& _probe_expr_ctxs; - - // Number of Tuple* in the build tuple row - const int _num_build_tuples; - // outer join || has null equal join should be true - const bool _stores_nulls; - // true: the null-safe equal '<=>' is true. The row with null should be judged. - // false: the equal '=' is false. The row with null should be filtered. - const std::vector _finds_nulls; - - const int32_t _initial_seed; - - // Size of hash table nodes. This includes a fixed size header and the Tuple*'s that - // follow. - const int _node_byte_size; - // Number of non-empty buckets. Used to determine when to grow and rehash - int64_t _num_filled_buckets; - // Buffer to store node data. - uint8_t* _current_nodes; - // number of nodes stored (i.e. size of hash table) - int64_t _num_nodes; - // current nodes buffer capacity - int64_t _current_capacity; - // current used - int64_t _current_used; - // total capacity - int64_t _total_capacity; - - std::unique_ptr _mem_tracker; - - std::vector _buckets; - - // equal to _buckets.size() but more efficient than the size function - int64_t _num_buckets; - - // The number of filled buckets to trigger a resize. This is cached for efficiency - int64_t _num_buckets_till_resize; - - // Cache of exprs values for the current row being evaluated. This can either - // be a build row (during insert()) or probe row (during find()). - std::vector _expr_values_buffer_offsets; - - // byte offset into _expr_values_buffer that begins the variable length results - int _var_result_begin; - - // byte size of '_expr_values_buffer' - int _results_buffer_size; - - // buffer to store evaluated expr results. This address must not change once - // allocated since the address is baked into the codegen - uint8_t* _expr_values_buffer; - - // Use bytes instead of bools to be compatible with llvm. This address must - // not change once allocated. - uint8_t* _expr_value_null_bits; - // node buffer list - std::vector _alloc_list; - // node buffer end pointer - std::vector _end_list; -}; - -} // namespace doris diff --git a/be/src/exec/odbc_connector.h b/be/src/exec/odbc_connector.h index 5a62ca3d3b..f7765a9fce 100644 --- a/be/src/exec/odbc_connector.h +++ b/be/src/exec/odbc_connector.h @@ -28,9 +28,6 @@ struct ODBCConnectorParam { // only use in query std::string query_string; const TupleDescriptor* tuple_desc; - - // only use in write - std::vector output_expr_ctxs; }; // Because the DataBinding have the mem alloc, so diff --git a/be/src/exec/scan_node.h b/be/src/exec/scan_node.h index 22589399de..c83bf9884b 100644 --- a/be/src/exec/scan_node.h +++ b/be/src/exec/scan_node.h @@ -99,7 +99,6 @@ protected: const std::function& checker); // remove pushed expr from conjunct tree RuntimeProfile::Counter* _bytes_read_counter; // # bytes read from the scanner - // # rows/tuples read from the scanner (including those discarded by eval_conjuncts()) RuntimeProfile::Counter* _rows_read_counter; // Wall based aggregate read throughput [bytes/sec] RuntimeProfile::Counter* _total_throughput_counter; diff --git a/be/src/exec/table_connector.cpp b/be/src/exec/table_connector.cpp index 920fad3ccc..12dc3acdc2 100644 --- a/be/src/exec/table_connector.cpp +++ b/be/src/exec/table_connector.cpp @@ -21,7 +21,6 @@ #include -#include "exprs/expr.h" #include "runtime/define_primitive_type.h" #include "runtime/primitive_type.h" #include "util/mysql_global.h" diff --git a/be/src/exec/table_connector.h b/be/src/exec/table_connector.h index 88f396d086..6a60170603 100644 --- a/be/src/exec/table_connector.h +++ b/be/src/exec/table_connector.h @@ -26,7 +26,6 @@ #include #include "common/status.h" -#include "exprs/expr_context.h" #include "runtime/descriptors.h" #include "vec/exprs/vexpr_context.h" diff --git a/be/src/exprs/CMakeLists.txt b/be/src/exprs/CMakeLists.txt index 98104ccf35..4cc2480854 100644 --- a/be/src/exprs/CMakeLists.txt +++ b/be/src/exprs/CMakeLists.txt @@ -24,54 +24,24 @@ set(EXECUTABLE_OUTPUT_PATH "${BUILD_DIR}/src/exprs") add_library(Exprs encryption_functions.cpp aggregate_functions.cpp - agg_fn_evaluator.cpp anyval_util.cpp - arithmetic_expr.cpp - binary_predicate.cpp - case_expr.cpp - cast_expr.cpp cast_functions.cpp - compound_predicate.cpp - conditional_functions.cpp - decimalv2_operators.cpp time_operators.cpp - es_functions.cpp hash_functions.cpp - literal.cpp - expr.cpp - expr_context.cpp - in_predicate.cpp - new_in_predicate.cpp - bloomfilter_predicate.cpp block_bloom_filter_avx_impl.cc block_bloom_filter_impl.cc runtime_filter.cpp runtime_filter_rpc.cpp - is_null_predicate.cpp like_predicate.cpp match_predicate.cpp math_functions.cpp - null_literal.cpp - scalar_fn_call.cpp - rpc_fn.cpp rpc_fn_comm.cpp - rpc_fn_call.cpp - slot_ref.cpp string_functions.cpp - array_functions.cpp timestamp_functions.cpp - tuple_is_null_predicate.cpp udf_builtins.cpp utility_functions.cpp - info_func.cpp json_functions.cpp - operators.cpp - hll_hash_function.cpp - agg_fn.cpp - new_agg_fn_evaluator.cc bitmap_function.cpp - hll_function.cpp quantile_function.cpp - grouping_sets_functions.cpp topn_function.cpp ) diff --git a/be/src/exprs/agg_fn.cpp b/be/src/exprs/agg_fn.cpp deleted file mode 100644 index 9e69ac8c6c..0000000000 --- a/be/src/exprs/agg_fn.cpp +++ /dev/null @@ -1,221 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. -// This file is copied from -// https://github.com/apache/impala/blob/branch-2.10.0/be/src/exprs/agg-fn.h -// and modified by Doris - -#include "exprs/agg_fn.h" - -#include "exprs/anyval_util.h" -#include "exprs/rpc_fn.h" -#include "runtime/descriptors.h" -#include "runtime/runtime_state.h" -#include "runtime/user_function_cache.h" - -using namespace doris_udf; - -namespace doris { - -AggFn::AggFn(const TExprNode& tnode, const SlotDescriptor& intermediate_slot_desc, - const SlotDescriptor& output_slot_desc) - : Expr(tnode), - is_merge_(tnode.agg_expr.is_merge_agg), - intermediate_slot_desc_(intermediate_slot_desc), - output_slot_desc_(output_slot_desc), - _vararg_start_idx(tnode.__isset.vararg_start_idx ? tnode.vararg_start_idx : -1) { - // TODO(pengyubing) arg_type_descs_ is used for codegen - // arg_type_descs_(AnyValUtil::column_type_to_type_desc( - // TypeDescriptor::from_thrift(tnode.agg_expr.arg_types))) { - DCHECK(tnode.__isset.fn); - DCHECK(tnode.fn.__isset.aggregate_fn); - // TODO chenhao - DCHECK_EQ(tnode.node_type, TExprNodeType::AGG_EXPR); - DCHECK_EQ(TypeDescriptor::from_thrift(tnode.type).type, - TypeDescriptor::from_thrift(_fn.ret_type).type); - const std::string& fn_name = _fn.name.function_name; - if (fn_name == "count") { - agg_op_ = COUNT; - } else if (fn_name == "min") { - agg_op_ = MIN; - } else if (fn_name == "max") { - agg_op_ = MAX; - } else if (fn_name == "sum" || fn_name == "sum_init_zero") { - agg_op_ = SUM; - } else if (fn_name == "avg") { - agg_op_ = AVG; - } else if (fn_name == "ndv" || fn_name == "ndv_no_finalize") { - agg_op_ = NDV; - } else if (fn_name == "multi_distinct_count") { - agg_op_ = COUNT_DISTINCT; - } else if (fn_name == "multi_distinct_sum") { - agg_op_ = SUM_DISTINCT; - } else { - agg_op_ = OTHER; - } -} - -Status AggFn::init(const RowDescriptor& row_desc, RuntimeState* state) { - // TODO chenhao , calling expr's prepare in NewAggFnEvaluator create - // Initialize all children (i.e. input exprs to this aggregate expr). - //for (Expr* input_expr : children()) { - // RETURN_IF_ERROR(input_expr->prepare(row_desc, state)); - //} - - // Initialize the aggregate expressions' internals. - const TAggregateFunction& aggregate_fn = _fn.aggregate_fn; - DCHECK_EQ(intermediate_slot_desc_.type().type, - TypeDescriptor::from_thrift(aggregate_fn.intermediate_type).type); - DCHECK_EQ(output_slot_desc_.type().type, TypeDescriptor::from_thrift(_fn.ret_type).type); - - // Load the function pointers. Must have init() and update(). - if (aggregate_fn.init_fn_symbol.empty() || aggregate_fn.update_fn_symbol.empty() || - (aggregate_fn.merge_fn_symbol.empty() && !aggregate_fn.is_analytic_only_fn)) { - // This path is only for partially implemented builtins. - DCHECK_EQ(_fn.binary_type, TFunctionBinaryType::BUILTIN); - return Status::InternalError("Function {} is not implemented.", _fn.name.function_name); - } - if (_fn.binary_type == TFunctionBinaryType::NATIVE || - _fn.binary_type == TFunctionBinaryType::BUILTIN || - _fn.binary_type == TFunctionBinaryType::HIVE) { - RETURN_IF_ERROR(UserFunctionCache::instance()->get_function_ptr( - _fn.id, aggregate_fn.init_fn_symbol, _fn.hdfs_location, _fn.checksum, &_init_fn, - &_cache_entry)); - RETURN_IF_ERROR(UserFunctionCache::instance()->get_function_ptr( - _fn.id, aggregate_fn.update_fn_symbol, _fn.hdfs_location, _fn.checksum, &_update_fn, - &_cache_entry)); - - // Merge() is not defined for purely analytic function. - if (!aggregate_fn.is_analytic_only_fn) { - RETURN_IF_ERROR(UserFunctionCache::instance()->get_function_ptr( - _fn.id, aggregate_fn.merge_fn_symbol, _fn.hdfs_location, _fn.checksum, - &_merge_fn, &_cache_entry)); - } - // Serialize(), GetValue(), Remove() and Finalize() are optional - if (!aggregate_fn.serialize_fn_symbol.empty()) { - RETURN_IF_ERROR(UserFunctionCache::instance()->get_function_ptr( - _fn.id, aggregate_fn.serialize_fn_symbol, _fn.hdfs_location, _fn.checksum, - &_serialize_fn, &_cache_entry)); - } - if (!aggregate_fn.get_value_fn_symbol.empty()) { - RETURN_IF_ERROR(UserFunctionCache::instance()->get_function_ptr( - _fn.id, aggregate_fn.get_value_fn_symbol, _fn.hdfs_location, _fn.checksum, - &_get_value_fn, &_cache_entry)); - } - if (!aggregate_fn.remove_fn_symbol.empty()) { - RETURN_IF_ERROR(UserFunctionCache::instance()->get_function_ptr( - _fn.id, aggregate_fn.remove_fn_symbol, _fn.hdfs_location, _fn.checksum, - &_remove_fn, &_cache_entry)); - } - if (!aggregate_fn.finalize_fn_symbol.empty()) { - RETURN_IF_ERROR(UserFunctionCache::instance()->get_function_ptr( - _fn.id, _fn.aggregate_fn.finalize_fn_symbol, _fn.hdfs_location, _fn.checksum, - &_finalize_fn, &_cache_entry)); - } - } else if (_fn.binary_type == TFunctionBinaryType::RPC) { - _rpc_init = std::make_unique(state, _fn, RPCFn::AggregationStep::INIT, true); - _rpc_update = std::make_unique(state, _fn, RPCFn::AggregationStep::UPDATE, true); - - // Merge() is not defined for purely analytic function. - if (!aggregate_fn.is_analytic_only_fn) { - _rpc_merge = std::make_unique(state, _fn, RPCFn::AggregationStep::MERGE, true); - } - // Serialize(), GetValue(), Remove() and Finalize() are optional - if (!aggregate_fn.serialize_fn_symbol.empty()) { - _rpc_serialize = - std::make_unique(state, _fn, RPCFn::AggregationStep::SERIALIZE, true); - } - if (!aggregate_fn.get_value_fn_symbol.empty()) { - _rpc_get_value = - std::make_unique(state, _fn, RPCFn::AggregationStep::GET_VALUE, true); - } - if (!aggregate_fn.remove_fn_symbol.empty()) { - _rpc_remove = std::make_unique(state, _fn, RPCFn::AggregationStep::REMOVE, true); - } - if (!aggregate_fn.finalize_fn_symbol.empty()) { - _rpc_finalize = - std::make_unique(state, _fn, RPCFn::AggregationStep::FINALIZE, true); - } - } else { - return Status::NotSupported("Not supported BinaryType: {}", _fn.binary_type); - } - return Status::OK(); -} - -Status AggFn::create(const TExpr& texpr, const RowDescriptor& row_desc, - const SlotDescriptor& intermediate_slot_desc, - const SlotDescriptor& output_slot_desc, RuntimeState* state, AggFn** agg_fn) { - *agg_fn = nullptr; - ObjectPool* pool = state->obj_pool(); - const TExprNode& texpr_node = texpr.nodes[0]; - //TODO chenhao - DCHECK_EQ(texpr_node.node_type, TExprNodeType::AGG_EXPR); - if (!texpr_node.__isset.fn) { - return Status::InternalError("Function not set in thrift AGGREGATE_EXPR node"); - } - AggFn* new_agg_fn = pool->add(new AggFn(texpr_node, intermediate_slot_desc, output_slot_desc)); - RETURN_IF_ERROR(Expr::create_tree(texpr, pool, new_agg_fn)); - Status status = new_agg_fn->init(row_desc, state); - if (UNLIKELY(!status.ok())) { - new_agg_fn->close(); - return status; - } - for (Expr* input_expr : new_agg_fn->children()) { - int fn_ctx_idx = 0; - input_expr->assign_fn_ctx_idx(&fn_ctx_idx); - } - *agg_fn = new_agg_fn; - return Status::OK(); -} - -FunctionContext::TypeDesc AggFn::get_intermediate_type_desc() const { - return AnyValUtil::column_type_to_type_desc(intermediate_slot_desc_.type()); -} - -FunctionContext::TypeDesc AggFn::get_output_type_desc() const { - return AnyValUtil::column_type_to_type_desc(output_slot_desc_.type()); -} - -void AggFn::close() { - // This also closes all the input expressions. - Expr::close(); -} - -void AggFn::close(const std::vector& exprs) { - for (AggFn* expr : exprs) expr->close(); -} - -std::string AggFn::debug_string() const { - std::stringstream out; - out << "AggFn(op=" << agg_op_; - for (Expr* input_expr : children()) { - out << " " << input_expr->debug_string() << ")"; - } - out << ")"; - return out.str(); -} - -std::string AggFn::debug_string(const std::vector& agg_fns) { - std::stringstream out; - out << "["; - for (int i = 0; i < agg_fns.size(); ++i) { - out << (i == 0 ? "" : " ") << agg_fns[i]->debug_string(); - } - out << "]"; - return out.str(); -} - -} // namespace doris diff --git a/be/src/exprs/agg_fn.h b/be/src/exprs/agg_fn.h deleted file mode 100644 index 96a3ef128c..0000000000 --- a/be/src/exprs/agg_fn.h +++ /dev/null @@ -1,187 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. -// This file is copied from -// https://github.com/apache/impala/blob/branch-2.10.0/be/src/exprs/agg-fn.h -// and modified by Doris - -#pragma once - -#include "exprs/expr.h" -#include "runtime/descriptors.h" -#include "udf/udf.h" - -namespace doris { - -using doris_udf::FunctionContext; - -class MemPool; -class ObjectPool; -class RuntimeState; -class Tuple; -class TupleRow; -class TExprNode; -class RPCFn; - -/// --- AggFn overview -/// -/// An aggregate function generates an output over a set of tuple rows. -/// An example would be AVG() which computes the average of all input rows. -/// The built-in aggregate functions such as min, max, sum, avg, ndv etc are -/// in this category. -/// -/// --- Implementation -/// -/// AggFn contains the aggregation operations, pointers to the UDAF interface functions -/// implementing various states of aggregation and the descriptors for the intermediate -/// and output values. Please see udf/udf.h for details of the UDAF interfaces. -/// -/// AggFnEvaluator is the interface for evaluating aggregate functions against input -/// tuple rows. It invokes the following functions at different phases of the aggregation: -/// -/// _init_fn : An initialization function that initializes the aggregate value. -/// -/// _update_fn : An update function that processes the arguments for each row in the -/// query result set and accumulates an intermediate result. For example, -/// this function might increment a counter, append to a string buffer or -/// add the input to a cumulative sum. -/// -/// _merge_fn : A merge function that combines multiple intermediate results into a -/// single value. -/// -/// _serialize_fn: A serialization function that flattens any intermediate values -/// containing pointers, and frees any memory allocated during the init, -/// update and merge phases. -/// -/// _finalize_fn : A finalize function that either passes through the combined result -/// unchanged, or does one final transformation. Also frees the resources -/// allocated during init, update and merge phases. -/// -/// _get_value_fn: Used by AnalyticEval node to obtain the current intermediate value. -/// -/// _remove_fn : Used by AnalyticEval node to undo the update to the intermediate value -/// by an input row as it falls out of a sliding window. -/// -class AggFn : public Expr { -public: - /// Override the base class' implementation. - virtual bool is_agg_fn() const { return true; } - - /// Enum for some built-in aggregation ops. - enum AggregationOp { - COUNT, - MIN, - MAX, - SUM, - AVG, - NDV, - SUM_DISTINCT, - COUNT_DISTINCT, - HLL_UNION_AGG, - OTHER, - }; - - /// Creates and initializes an aggregate function from 'texpr' and returns it in - /// 'agg_fn'. The returned AggFn lives in the ObjectPool of 'state'. 'row_desc' is - /// the row descriptor of the input tuple row; 'intermediate_slot_desc' is the slot - /// descriptor of the intermediate value; 'output_slot_desc' is the slot descriptor - /// of the output value. On failure, returns error status and sets 'agg_fn' to nullptr. - static Status create(const TExpr& texpr, const RowDescriptor& row_desc, - const SlotDescriptor& intermediate_slot_desc, - const SlotDescriptor& output_slot_desc, RuntimeState* state, - AggFn** agg_fn) WARN_UNUSED_RESULT; - - bool is_merge() const { return is_merge_; } - AggregationOp agg_op() const { return agg_op_; } - bool is_count_star() const { return agg_op_ == COUNT && _children.empty(); } - bool is_count_distinct() const { return agg_op_ == COUNT_DISTINCT; } - bool is_sum_distinct() const { return agg_op_ == SUM_DISTINCT; } - bool is_builtin() const { return _fn.binary_type == TFunctionBinaryType::BUILTIN; } - const std::string& fn_name() const { return _fn.name.function_name; } - const TypeDescriptor& intermediate_type() const { return intermediate_slot_desc_.type(); } - const SlotDescriptor& intermediate_slot_desc() const { return intermediate_slot_desc_; } - // Output type is the same as Expr::type(). - const SlotDescriptor& output_slot_desc() const { return output_slot_desc_; } - void* remove_fn() const { return _remove_fn; } - void* merge_or_update_fn() const { return is_merge_ ? _merge_fn : _update_fn; } - void* serialize_fn() const { return _serialize_fn; } - void* get_value_fn() const { return _get_value_fn; } - void* finalize_fn() const { return _finalize_fn; } - bool supports_remove() const { return _remove_fn != nullptr; } - bool supports_serialize() const { return _serialize_fn != nullptr; } - FunctionContext::TypeDesc get_intermediate_type_desc() const; - FunctionContext::TypeDesc get_output_type_desc() const; - const std::vector& arg_type_descs() const { return arg_type_descs_; } - - /// Releases all cache entries to libCache for all nodes in the expr tree. - virtual void close(); - static void close(const std::vector& exprs); - - Expr* clone(ObjectPool* pool) const { return nullptr; } - - virtual std::string debug_string() const; - static std::string debug_string(const std::vector& exprs); - - const int get_vararg_start_idx() const { return _vararg_start_idx; } - -private: - friend class Expr; - friend class NewAggFnEvaluator; - - /// True if this is a merging aggregation. - const bool is_merge_; - - /// Slot into which Update()/Merge()/Serialize() write their result. Not owned. - const SlotDescriptor& intermediate_slot_desc_; - - /// Slot into which Finalize() results are written. Not owned. Identical to - /// intermediate_slot_desc_ if this agg fn has the same intermediate and result type. - const SlotDescriptor& output_slot_desc_; - - /// The types of the arguments to the aggregate function. - const std::vector arg_type_descs_; - - /// The aggregation operation. - AggregationOp agg_op_; - - /// Function pointers for the different phases of the aggregate function. - void* _init_fn = nullptr; - void* _update_fn = nullptr; - void* _remove_fn = nullptr; - void* _merge_fn = nullptr; - void* _serialize_fn = nullptr; - void* _get_value_fn = nullptr; - void* _finalize_fn = nullptr; - - int _vararg_start_idx; - - std::unique_ptr _rpc_init; - std::unique_ptr _rpc_update; - std::unique_ptr _rpc_remove; - std::unique_ptr _rpc_merge; - std::unique_ptr _rpc_serialize; - std::unique_ptr _rpc_get_value; - std::unique_ptr _rpc_finalize; - - AggFn(const TExprNode& node, const SlotDescriptor& intermediate_slot_desc, - const SlotDescriptor& output_slot_desc); - - /// Initializes the AggFn and its input expressions. May load the UDAF from LibCache - /// if necessary. - virtual Status init(const RowDescriptor& desc, RuntimeState* state) WARN_UNUSED_RESULT; -}; - -} // namespace doris diff --git a/be/src/exprs/agg_fn_evaluator.cpp b/be/src/exprs/agg_fn_evaluator.cpp deleted file mode 100644 index e3a4b876da..0000000000 --- a/be/src/exprs/agg_fn_evaluator.cpp +++ /dev/null @@ -1,948 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. -// This file is copied from -// https://github.com/apache/impala/blob/branch-2.9.0/be/src/exprs/agg-fn-evaluator.cc -// and modified by Doris - -#include "exprs/agg_fn_evaluator.h" - -#include - -#include - -#include "exprs/anyval_util.h" -#include "runtime/datetime_value.h" -#include "runtime/memory/mem_tracker.h" -#include "runtime/raw_value.h" -#include "runtime/user_function_cache.h" -#include "udf/udf_internal.h" - -namespace doris { -using doris_udf::FunctionContext; -using doris_udf::BooleanVal; -using doris_udf::TinyIntVal; -using doris_udf::SmallIntVal; -using doris_udf::IntVal; -using doris_udf::BigIntVal; -using doris_udf::LargeIntVal; -using doris_udf::FloatVal; -using doris_udf::DoubleVal; -using doris_udf::DecimalV2Val; -using doris_udf::DateTimeVal; -using doris_udf::StringVal; -using doris_udf::AnyVal; - -// typedef for builtin aggregate functions. Unfortunately, these type defs don't -// really work since the actual builtin is implemented not in terms of the base -// AnyVal* type. Due to this, there are lots of casts when we use these typedefs. -// TODO: these typedefs exists as wrappers to go from (TupleRow, Tuple) to the -// types the aggregation functions need. This needs to be done with codegen instead. -typedef void (*InitFn)(FunctionContext*, AnyVal*); -typedef void (*UpdateFn0)(FunctionContext*, AnyVal*); -typedef void (*UpdateFn1)(FunctionContext*, const AnyVal&, AnyVal*); -typedef void (*UpdateFn2)(FunctionContext*, const AnyVal&, const AnyVal&, AnyVal*); -typedef void (*UpdateFn3)(FunctionContext*, const AnyVal&, const AnyVal&, const AnyVal&, AnyVal*); -typedef void (*UpdateFn4)(FunctionContext*, const AnyVal&, const AnyVal&, const AnyVal&, - const AnyVal&, AnyVal*); -typedef void (*UpdateFn5)(FunctionContext*, const AnyVal&, const AnyVal&, const AnyVal&, - const AnyVal&, const AnyVal&, AnyVal*); -typedef void (*UpdateFn6)(FunctionContext*, const AnyVal&, const AnyVal&, const AnyVal&, - const AnyVal&, const AnyVal&, const AnyVal&, AnyVal*); -typedef void (*UpdateFn7)(FunctionContext*, const AnyVal&, const AnyVal&, const AnyVal&, - const AnyVal&, const AnyVal&, const AnyVal&, const AnyVal&, AnyVal*); -typedef void (*UpdateFn8)(FunctionContext*, const AnyVal&, const AnyVal&, const AnyVal&, - const AnyVal&, const AnyVal&, const AnyVal&, const AnyVal&, const AnyVal&, - AnyVal*); -typedef StringVal (*SerializeFn)(FunctionContext*, const StringVal&); -typedef AnyVal (*GetValueFn)(FunctionContext*, const AnyVal&); -typedef AnyVal (*FinalizeFn)(FunctionContext*, const AnyVal&); - -Status AggFnEvaluator::create(ObjectPool* pool, const TExpr& desc, AggFnEvaluator** result) { - return create(pool, desc, false, result); -} - -Status AggFnEvaluator::create(ObjectPool* pool, const TExpr& desc, bool is_analytic_fn, - AggFnEvaluator** result) { - *result = pool->add(new AggFnEvaluator(desc.nodes[0], is_analytic_fn)); - int node_idx = 0; - for (int i = 0; i < desc.nodes[0].num_children; ++i) { - ++node_idx; - Expr* expr = nullptr; - ExprContext* ctx = nullptr; - RETURN_IF_ERROR( - Expr::create_tree_from_thrift(pool, desc.nodes, nullptr, &node_idx, &expr, &ctx)); - (*result)->_input_exprs_ctxs.push_back(ctx); - } - return Status::OK(); -} - -AggFnEvaluator::AggFnEvaluator(const TExprNode& desc, bool is_analytic_fn) - : _fn(desc.fn), - _is_merge(desc.agg_expr.is_merge_agg), - _is_analytic_fn(is_analytic_fn), - _return_type(TypeDescriptor::from_thrift(desc.fn.ret_type)), - _intermediate_type(TypeDescriptor::from_thrift(desc.fn.aggregate_fn.intermediate_type)), - _function_type(desc.fn.binary_type), - _total_mem_consumption(0), - _accumulated_mem_consumption(0), - _intermediate_slot_desc(nullptr), - _output_slot_desc(nullptr), - _init_fn(nullptr), - _update_fn(nullptr), - _remove_fn(nullptr), - _merge_fn(nullptr), - _serialize_fn(nullptr), - _get_value_fn(nullptr), - _finalize_fn(nullptr) { - if (_fn.name.function_name == "count") { - _agg_op = COUNT; - } else if (_fn.name.function_name == "min") { - _agg_op = MIN; - } else if (_fn.name.function_name == "max") { - _agg_op = MAX; - } else if (_fn.name.function_name == "sum") { - _agg_op = SUM; - } else if (_fn.name.function_name == "avg") { - _agg_op = AVG; - } else if (_fn.name.function_name == "ndv" || _fn.name.function_name == "ndv_no_finalize") { - _agg_op = NDV; - } else if (_fn.name.function_name == "count_distinct" || - _fn.name.function_name == "count_distinct") { - _agg_op = COUNT_DISTINCT; - } else if (_fn.name.function_name == "sum_distinct" || - _fn.name.function_name == "sum_distinct") { - _agg_op = SUM_DISTINCT; - } else if (_fn.name.function_name == "hll_union_agg") { - _agg_op = HLL_UNION_AGG; - } else { - _agg_op = OTHER; - } -} - -Status AggFnEvaluator::prepare(RuntimeState* state, const RowDescriptor& desc, MemPool* pool, - const SlotDescriptor* intermediate_slot_desc, - const SlotDescriptor* output_slot_desc, - FunctionContext** agg_fn_ctx) { - DCHECK(pool != nullptr); - DCHECK(intermediate_slot_desc != nullptr); - DCHECK(_intermediate_slot_desc == nullptr); - _output_slot_desc = output_slot_desc; - //DCHECK(_intermediate_slot_desc == nullptr); - _intermediate_slot_desc = intermediate_slot_desc; - - _string_buffer_len = 0; - _mem_tracker = std::make_unique("AggFnEvaluator"); - - Status status = Expr::prepare(_input_exprs_ctxs, state, desc); - RETURN_IF_ERROR(status); - - ObjectPool* obj_pool = state->obj_pool(); - - for (int i = 0; i < _input_exprs_ctxs.size(); ++i) { - _staging_input_vals.push_back( - create_any_val(obj_pool, input_expr_ctxs()[i]->root()->type())); - } - - // window has intermediate_slot_type - if (_intermediate_slot_desc != nullptr) { - _staging_intermediate_val = create_any_val(obj_pool, _intermediate_slot_desc->type()); - _staging_merge_input_val = create_any_val(obj_pool, _intermediate_slot_desc->type()); - } - - //_staging_output_val = create_any_val(obj_pool, _output_slot_desc->type()); - _is_multi_distinct = false; - - if (_agg_op == AggregationOp::COUNT_DISTINCT) { - _hybrid_map.reset(new HybridMap(TYPE_VARCHAR)); - _is_multi_distinct = true; - _string_buffer.reset(new char[1024]); - _string_buffer_len = 1024; - } else if (_agg_op == AggregationOp::SUM_DISTINCT) { - _hybrid_map.reset(new HybridMap(input_expr_ctxs()[0]->root()->type().type)); - _is_multi_distinct = true; - } - // TODO: this should be made identical for the builtin and UDA case by - // putting all this logic in an improved opcode registry. - - // Load the function pointers. Merge is not required if this is evaluating an - // analytic function. - if (_fn.aggregate_fn.init_fn_symbol.empty() || _fn.aggregate_fn.update_fn_symbol.empty() || - (!_is_analytic_fn && _fn.aggregate_fn.merge_fn_symbol.empty())) { - // This path is only for partially implemented builtins. - DCHECK_EQ(_fn.binary_type, TFunctionBinaryType::BUILTIN); - return Status::InternalError("Function {} is not implemented.", _fn.name.function_name); - } - - // Load the function pointers. - RETURN_IF_ERROR(UserFunctionCache::instance()->get_function_ptr( - _fn.id, _fn.aggregate_fn.init_fn_symbol, _fn.hdfs_location, _fn.checksum, &_init_fn, - nullptr)); - - RETURN_IF_ERROR(UserFunctionCache::instance()->get_function_ptr( - _fn.id, _fn.aggregate_fn.update_fn_symbol, _fn.hdfs_location, _fn.checksum, &_update_fn, - nullptr)); - - // Merge() is not loaded if evaluating the agg fn as an analytic function. - if (!_is_analytic_fn) { - RETURN_IF_ERROR(UserFunctionCache::instance()->get_function_ptr( - _fn.id, _fn.aggregate_fn.merge_fn_symbol, _fn.hdfs_location, _fn.checksum, - &_merge_fn, nullptr)); - } - - // Serialize and Finalize are optional - if (!_fn.aggregate_fn.serialize_fn_symbol.empty()) { - RETURN_IF_ERROR(UserFunctionCache::instance()->get_function_ptr( - _fn.id, _fn.aggregate_fn.serialize_fn_symbol, _fn.hdfs_location, _fn.checksum, - &_serialize_fn, nullptr)); - } - if (!_fn.aggregate_fn.finalize_fn_symbol.empty()) { - RETURN_IF_ERROR(UserFunctionCache::instance()->get_function_ptr( - _fn.id, _fn.aggregate_fn.finalize_fn_symbol, _fn.hdfs_location, _fn.checksum, - &_finalize_fn, nullptr)); - } - - if (!_fn.aggregate_fn.get_value_fn_symbol.empty()) { - RETURN_IF_ERROR(UserFunctionCache::instance()->get_function_ptr( - _fn.id, _fn.aggregate_fn.get_value_fn_symbol, _fn.hdfs_location, _fn.checksum, - &_get_value_fn, nullptr)); - } - if (!_fn.aggregate_fn.remove_fn_symbol.empty()) { - RETURN_IF_ERROR(UserFunctionCache::instance()->get_function_ptr( - _fn.id, _fn.aggregate_fn.remove_fn_symbol, _fn.hdfs_location, _fn.checksum, - &_remove_fn, nullptr)); - } - - std::vector arg_types; - for (int j = 0; j < _input_exprs_ctxs.size(); ++j) { - arg_types.push_back( - AnyValUtil::column_type_to_type_desc(_input_exprs_ctxs[j]->root()->type())); - } - - FunctionContext::TypeDesc intermediate_type = - AnyValUtil::column_type_to_type_desc(_intermediate_type); - FunctionContext::TypeDesc output_type = - AnyValUtil::column_type_to_type_desc(_output_slot_desc->type()); - - *agg_fn_ctx = FunctionContextImpl::create_context(state, pool, intermediate_type, output_type, - arg_types, 0, false); - return Status::OK(); -} - -Status AggFnEvaluator::open(RuntimeState* state, FunctionContext* agg_fn_ctx) { - RETURN_IF_ERROR(Expr::open(_input_exprs_ctxs, state)); - // Now that we have opened all our input exprs, it is safe to evaluate any constant - // values for the UDA's FunctionContext (we cannot evaluate exprs before calling Open() - // on them). - std::vector constant_args(_input_exprs_ctxs.size()); - for (int i = 0; i < _input_exprs_ctxs.size(); ++i) { - constant_args[i] = _input_exprs_ctxs[i]->root()->get_const_val(_input_exprs_ctxs[i]); - } - agg_fn_ctx->impl()->set_constant_args(constant_args); - return Status::OK(); -} - -void AggFnEvaluator::close(RuntimeState* state) { - Expr::close(_input_exprs_ctxs, state); - if (UNLIKELY(_total_mem_consumption > 0)) { - _mem_tracker->release(_total_mem_consumption); - } -} - -// Utility to put val into an AnyVal struct -inline void AggFnEvaluator::set_any_val(const void* slot, const TypeDescriptor& type, AnyVal* dst) { - if (slot == nullptr) { - dst->is_null = true; - return; - } - - dst->is_null = false; - - switch (type.type) { - case TYPE_NULL: - return; - - case TYPE_BOOLEAN: - reinterpret_cast(dst)->val = *reinterpret_cast(slot); - return; - - case TYPE_TINYINT: - reinterpret_cast(dst)->val = *reinterpret_cast(slot); - return; - - case TYPE_SMALLINT: - reinterpret_cast(dst)->val = *reinterpret_cast(slot); - return; - - case TYPE_INT: - reinterpret_cast(dst)->val = *reinterpret_cast(slot); - return; - - case TYPE_BIGINT: - reinterpret_cast(dst)->val = *reinterpret_cast(slot); - return; - - case TYPE_FLOAT: - reinterpret_cast(dst)->val = *reinterpret_cast(slot); - return; - - case TYPE_DOUBLE: - reinterpret_cast(dst)->val = *reinterpret_cast(slot); - return; - - case TYPE_CHAR: - case TYPE_VARCHAR: - case TYPE_HLL: - case TYPE_OBJECT: - case TYPE_STRING: - case TYPE_QUANTILE_STATE: - reinterpret_cast(slot)->to_string_val(reinterpret_cast(dst)); - return; - - case TYPE_DATE: - case TYPE_DATETIME: - reinterpret_cast(slot)->to_datetime_val( - reinterpret_cast(dst)); - return; - - case TYPE_DECIMALV2: - reinterpret_cast(dst)->val = - reinterpret_cast(slot)->value; - return; - - case TYPE_LARGEINT: - memcpy(&reinterpret_cast(dst)->val, slot, sizeof(__int128)); - return; - - default: - DCHECK(false) << "NYI"; - } -} - -inline void AggFnEvaluator::set_output_slot(const AnyVal* src, const SlotDescriptor* dst_slot_desc, - Tuple* dst) { - if (src->is_null && dst_slot_desc->is_nullable()) { - dst->set_null(dst_slot_desc->null_indicator_offset()); - return; - } - - dst->set_not_null(dst_slot_desc->null_indicator_offset()); - void* slot = dst->get_slot(dst_slot_desc->tuple_offset()); - - switch (dst_slot_desc->type().type) { - case TYPE_NULL: - return; - - case TYPE_BOOLEAN: - *reinterpret_cast(slot) = reinterpret_cast(src)->val; - return; - - case TYPE_TINYINT: - *reinterpret_cast(slot) = reinterpret_cast(src)->val; - return; - - case TYPE_SMALLINT: - *reinterpret_cast(slot) = reinterpret_cast(src)->val; - return; - - case TYPE_INT: - *reinterpret_cast(slot) = reinterpret_cast(src)->val; - return; - - case TYPE_BIGINT: - *reinterpret_cast(slot) = reinterpret_cast(src)->val; - return; - - case TYPE_FLOAT: - *reinterpret_cast(slot) = reinterpret_cast(src)->val; - return; - - case TYPE_DOUBLE: - *reinterpret_cast(slot) = reinterpret_cast(src)->val; - return; - - case TYPE_CHAR: - case TYPE_VARCHAR: - case TYPE_HLL: - case TYPE_OBJECT: - case TYPE_QUANTILE_STATE: - case TYPE_STRING: - *reinterpret_cast(slot) = StringRef(*reinterpret_cast(src)); - return; - - case TYPE_DATE: - case TYPE_DATETIME: - *reinterpret_cast(slot) = - DateTimeValue::from_datetime_val(*reinterpret_cast(src)); - return; - - case TYPE_DECIMALV2: - *reinterpret_cast(slot) = reinterpret_cast(src)->val; - return; - - case TYPE_LARGEINT: { - memcpy(slot, &reinterpret_cast(src)->val, sizeof(__int128)); - return; - } - - default: - DCHECK(false) << "NYI"; - } -} - -bool AggFnEvaluator::is_in_hybridmap(void* input_val, Tuple* dst, bool* is_add_buckets) { - bool is_in_hashset = false; - HybridSetBase* _set_ptr = nullptr; - _set_ptr = _hybrid_map->find_or_insert_set(reinterpret_cast(dst), is_add_buckets); - is_in_hashset = _set_ptr->find(input_val); - - if (!is_in_hashset) { - _set_ptr->insert(input_val); - } - - return is_in_hashset; -} - -// This function would be replaced in codegen. -void AggFnEvaluator::init(FunctionContext* agg_fn_ctx, Tuple* dst) { - DCHECK(_init_fn != nullptr); - reinterpret_cast(_init_fn)(agg_fn_ctx, _staging_intermediate_val); - set_output_slot(_staging_intermediate_val, _intermediate_slot_desc, dst); - agg_fn_ctx->impl()->set_num_updates(0); - agg_fn_ctx->impl()->set_num_removes(0); -} - -void AggFnEvaluator::update_mem_limlits(int len) { - _accumulated_mem_consumption += len; - // per 16M , update mem_tracker one time - if (UNLIKELY(_accumulated_mem_consumption > 16777216)) { - _mem_tracker->consume(_accumulated_mem_consumption); - _total_mem_consumption += _accumulated_mem_consumption; - _accumulated_mem_consumption = 0; - } -} - -AggFnEvaluator::~AggFnEvaluator() {} - -inline void AggFnEvaluator::update_mem_trackers(bool is_filter, bool is_add_buckets, int len) { - if (!is_filter) { - int total_len = len; - - if (is_add_buckets) { - total_len += BIGINT_SIZE; //map's key size - } - - update_mem_limlits(total_len); - } -} - -bool AggFnEvaluator::count_distinct_data_filter(TupleRow* row, Tuple* dst) { - std::vector vec_string_len; - int total_len = 0; - - // 1. calculate the total_len of all input parameters - for (int i = 0; i < input_expr_ctxs().size(); ++i) { - void* src_slot = input_expr_ctxs()[i]->get_value(row); - set_any_val(src_slot, input_expr_ctxs()[i]->root()->type(), _staging_input_vals[i]); - - if (_staging_input_vals[i]->is_null) { - // even though only one parameter is null, the row will be abandon - return true; - } - - if (input_expr_ctxs()[i]->root()->type().is_string_type()) { - const int string_len = reinterpret_cast(_staging_input_vals[i])->len; - vec_string_len.push_back(string_len); - total_len += string_len; - } - - total_len += get_byte_size(input_expr_ctxs()[i]->root()->type().type); - } - - int32_t vec_size = vec_string_len.size(); - int32_t int_size = INT_SIZE; - total_len += vec_size * int_size; - - // 2. merge multi parameter into one parameter(StringVal) - if (_string_buffer_len < total_len) { - _string_buffer_len = ((total_len << 10) + 1) >> 10; // (len/1024+1)*1024 - _string_buffer.reset(new char[_string_buffer_len]); - } - - StringRef string_ref(_string_buffer.get(), total_len); - // the content of StringVal: - // header: the STRING_VALUE's len - // body: all input parameters' content - char* begin = const_cast(string_ref.data); - - for (int i = 0; i < vec_size; i++) { - memcpy(begin, &vec_string_len[0], int_size); - begin += int_size; - } - - for (int i = 0; i < input_expr_ctxs().size(); ++i) { - switch (input_expr_ctxs()[i]->root()->type().type) { - case TYPE_NULL: - return true; - - case TYPE_BOOLEAN: { - *begin = (uint8_t) reinterpret_cast(_staging_input_vals[i])->val; - begin += TINYINT_SIZE; - break; - } - - case TYPE_TINYINT: { - memcpy(begin, &reinterpret_cast(_staging_input_vals[i])->val, - TINYINT_SIZE); - begin += TINYINT_SIZE; - break; - } - - case TYPE_SMALLINT: { - memcpy(begin, &reinterpret_cast(_staging_input_vals[i])->val, - SMALLINT_SIZE); - begin += SMALLINT_SIZE; - break; - } - - case TYPE_INT: { - memcpy(begin, &reinterpret_cast(_staging_input_vals[i])->val, INT_SIZE); - begin += INT_SIZE; - break; - } - - case TYPE_BIGINT: { - memcpy(begin, &reinterpret_cast(_staging_input_vals[i])->val, BIGINT_SIZE); - begin += BIGINT_SIZE; - break; - } - - case TYPE_LARGEINT: { - LargeIntVal* value = reinterpret_cast(_staging_input_vals[i]); - memcpy(begin, &value->val, LARGEINT_SIZE); - begin += LARGEINT_SIZE; - break; - } - - case TYPE_FLOAT: { - memcpy(begin, &reinterpret_cast(_staging_input_vals[i])->val, FLOAT_SIZE); - begin += FLOAT_SIZE; - break; - } - - case TYPE_DOUBLE: { - memcpy(begin, &reinterpret_cast(_staging_input_vals[i])->val, DOUBLE_SIZE); - begin += DOUBLE_SIZE; - break; - } - - case TYPE_DECIMALV2: { - DecimalV2Val* value = reinterpret_cast(_staging_input_vals[i]); - memcpy(begin, value, sizeof(DecimalV2Val)); - begin += sizeof(DecimalV2Val); - break; - } - - case TYPE_CHAR: - case TYPE_VARCHAR: - case TYPE_HLL: - case TYPE_OBJECT: - case TYPE_QUANTILE_STATE: - case TYPE_STRING: { - StringVal* value = reinterpret_cast(_staging_input_vals[i]); - memcpy(begin, value->ptr, value->len); - begin += value->len; - break; - } - - case TYPE_DATE: - case TYPE_DATETIME: { - DateTimeVal* value = reinterpret_cast(_staging_input_vals[i]); - memcpy(begin, &value->packed_time, DATETIME_SIZE); - begin += DATETIME_SIZE; - break; - } - - default: { - DCHECK(0) << "FYI" << input_expr_ctxs()[i]->root()->type(); - } - } - } - - DCHECK(begin == string_ref.data + string_ref.size) - << "COUNT_DISTINCT: StringVal's len doesn't match"; - bool is_add_buckets = false; - bool is_filter = is_in_hybridmap(&string_ref, dst, &is_add_buckets); - update_mem_trackers(is_filter, is_add_buckets, string_ref.size); - return is_filter; -} - -bool AggFnEvaluator::sum_distinct_data_filter(TupleRow* row, Tuple* dst) { - DCHECK(input_expr_ctxs().size() == 1); - void* src_slot = input_expr_ctxs()[0]->get_value(row); - set_any_val(src_slot, input_expr_ctxs()[0]->root()->type(), _staging_input_vals[0]); - - if (_staging_input_vals[0]->is_null) { - // if the parameter is null, the row will be abandon - return true; - } - - bool is_filter = false; - bool is_add_buckets = false; - - switch (input_expr_ctxs()[0]->root()->type().type) { - case TYPE_NULL: { - return true; - } - - case TYPE_BIGINT: { - const BigIntVal* value = reinterpret_cast(_staging_input_vals[0]); - is_filter = is_in_hybridmap((void*)&(value->val), dst, &is_add_buckets); - update_mem_trackers(is_filter, is_add_buckets, BIGINT_SIZE); - return is_filter; - } - - case TYPE_FLOAT: { - const FloatVal* value = reinterpret_cast(_staging_input_vals[0]); - is_filter = is_in_hybridmap((void*)&(value->val), dst, &is_add_buckets); - update_mem_trackers(is_filter, is_add_buckets, FLOAT_SIZE); - return is_filter; - } - - case TYPE_DOUBLE: { - const DoubleVal* value = reinterpret_cast(_staging_input_vals[0]); - is_filter = is_in_hybridmap((void*)&(value->val), dst, &is_add_buckets); - update_mem_trackers(is_filter, is_add_buckets, DOUBLE_SIZE); - return is_filter; - } - - case TYPE_DECIMALV2: { - const DecimalV2Val* value = reinterpret_cast(_staging_input_vals[0]); - DecimalV2Value temp_value = DecimalV2Value::from_decimal_val(*value); - is_filter = is_in_hybridmap((void*)&(temp_value), dst, &is_add_buckets); - update_mem_trackers(is_filter, is_add_buckets, DECIMALV2_SIZE); - return is_filter; - } - - case TYPE_LARGEINT: { - const LargeIntVal* value = reinterpret_cast(_staging_input_vals[0]); - is_filter = is_in_hybridmap((void*)&(value->val), dst, &is_add_buckets); - update_mem_trackers(is_filter, is_add_buckets, LARGEINT_SIZE); - return is_filter; - } - - default: { - DCHECK(0) << "FYI"; - } - } - - return false; -} - -void AggFnEvaluator::update_or_merge(FunctionContext* agg_fn_ctx, TupleRow* row, Tuple* dst, - void* fn) { - if (fn == nullptr) { - return; - } - - bool dst_null = dst->is_null(_intermediate_slot_desc->null_indicator_offset()); - void* dst_slot = nullptr; - - if (!dst_null) { - dst_slot = dst->get_slot(_intermediate_slot_desc->tuple_offset()); - } - - set_any_val(dst_slot, _intermediate_slot_desc->type(), _staging_intermediate_val); - - if (_is_multi_distinct) { - if (_agg_op == COUNT_DISTINCT) { - bool is_need_filter = count_distinct_data_filter(row, dst); - - if (is_need_filter) { - _staging_input_vals[0]->is_null = true; - } - } else if (_agg_op == SUM_DISTINCT) { - bool is_need_filter = sum_distinct_data_filter(row, dst); - - if (is_need_filter) { - _staging_input_vals[0]->is_null = true; - } - } else { - DCHECK(0) << "we only support count_distinct and sum_distinct"; - } - } else { - for (int i = 0; i < input_expr_ctxs().size(); ++i) { - void* src_slot = input_expr_ctxs()[i]->get_value(row); - set_any_val(src_slot, input_expr_ctxs()[i]->root()->type(), _staging_input_vals[i]); - } - } - - // TODO: this part is not so good and not scalable. It can be replaced with - // codegen but we can also consider leaving it for the first few cases for - // debugging. - - // if _agg_op is TAggregationOp::COUNT_DISTINCT, it has only one - // input parameter, we consider the first parameter as the only input parameter - if (_is_multi_distinct && _agg_op == AggregationOp::COUNT_DISTINCT) { - reinterpret_cast(fn)(agg_fn_ctx, *_staging_input_vals[0], - _staging_intermediate_val); - } else { - switch (input_expr_ctxs().size()) { - case 0: - reinterpret_cast(fn)(agg_fn_ctx, _staging_intermediate_val); - break; - - case 1: - reinterpret_cast(fn)(agg_fn_ctx, *_staging_input_vals[0], - _staging_intermediate_val); - break; - - case 2: - reinterpret_cast(fn)(agg_fn_ctx, *_staging_input_vals[0], - *_staging_input_vals[1], _staging_intermediate_val); - break; - - case 3: - reinterpret_cast(fn)(agg_fn_ctx, *_staging_input_vals[0], - *_staging_input_vals[1], *_staging_input_vals[2], - _staging_intermediate_val); - break; - - case 4: - reinterpret_cast(fn)(agg_fn_ctx, *_staging_input_vals[0], - *_staging_input_vals[1], *_staging_input_vals[2], - *_staging_input_vals[3], _staging_intermediate_val); - break; - - case 5: - reinterpret_cast(fn)(agg_fn_ctx, *_staging_input_vals[0], - *_staging_input_vals[1], *_staging_input_vals[2], - *_staging_input_vals[3], *_staging_input_vals[4], - _staging_intermediate_val); - break; - - case 6: - reinterpret_cast(fn)(agg_fn_ctx, *_staging_input_vals[0], - *_staging_input_vals[1], *_staging_input_vals[2], - *_staging_input_vals[3], *_staging_input_vals[4], - *_staging_input_vals[5], _staging_intermediate_val); - break; - - case 7: - reinterpret_cast(fn)( - agg_fn_ctx, *_staging_input_vals[0], *_staging_input_vals[1], - *_staging_input_vals[2], *_staging_input_vals[3], *_staging_input_vals[4], - *_staging_input_vals[5], *_staging_input_vals[6], _staging_intermediate_val); - break; - - case 8: - reinterpret_cast(fn)(agg_fn_ctx, *_staging_input_vals[0], - *_staging_input_vals[1], *_staging_input_vals[2], - *_staging_input_vals[3], *_staging_input_vals[4], - *_staging_input_vals[5], *_staging_input_vals[6], - *_staging_input_vals[7], _staging_intermediate_val); - break; - - default: - DCHECK(false) << "NYI"; - } - } - - set_output_slot(_staging_intermediate_val, _intermediate_slot_desc, dst); -} - -void AggFnEvaluator::update(FunctionContext* agg_fn_ctx, TupleRow* row, Tuple* dst, void* fn, - MemPool* pool) { - return update_or_merge(agg_fn_ctx, row, dst, fn); -} - -void AggFnEvaluator::merge(FunctionContext* agg_fn_ctx, TupleRow* row, Tuple* dst, MemPool* pool) { - return update_or_merge(agg_fn_ctx, row, dst, _merge_fn); -} - -static void set_any_val2(const SlotDescriptor* desc, Tuple* tuple, AnyVal* dst) { - bool is_null = tuple->is_null(desc->null_indicator_offset()); - void* slot = nullptr; - if (!is_null) { - slot = tuple->get_slot(desc->tuple_offset()); - } - AnyValUtil::set_any_val(slot, desc->type(), dst); -} - -void AggFnEvaluator::merge(FunctionContext* agg_fn_ctx, Tuple* src, Tuple* dst) { - DCHECK(_merge_fn != nullptr); - - set_any_val2(_intermediate_slot_desc, dst, _staging_intermediate_val); - set_any_val2(_intermediate_slot_desc, src, _staging_merge_input_val); - - // The merge fn always takes one input argument. - reinterpret_cast(_merge_fn)(agg_fn_ctx, *_staging_merge_input_val, - _staging_intermediate_val); - - set_output_slot(_staging_intermediate_val, _intermediate_slot_desc, dst); -} - -void AggFnEvaluator::choose_update_or_merge(FunctionContext* agg_fn_ctx, TupleRow* row, - Tuple* dst) { - if (_is_merge) { - return update_or_merge(agg_fn_ctx, row, dst, _merge_fn); - } else { - return update_or_merge(agg_fn_ctx, row, dst, _update_fn); - } -} - -void AggFnEvaluator::serialize_or_finalize(FunctionContext* agg_fn_ctx, Tuple* src, - const SlotDescriptor* dst_slot_desc, Tuple* dst, - void* fn, bool add_null) { - // DCHECK_EQ(dst_slot_desc->type().type, _return_type.type); - if (src == nullptr) { - src = dst; - } - if (fn == nullptr && src == dst) { - return; - } - - // same - bool src_slot_null = add_null || src->is_null(_intermediate_slot_desc->null_indicator_offset()); - void* src_slot = nullptr; - - if (!src_slot_null) { - src_slot = src->get_slot(_intermediate_slot_desc->tuple_offset()); - } - - // not same - // if (_is_analytic_fn) { - // No fn was given but the src and dst tuples are different (doing a finalize()). - // Just copy the src slot into the dst tuple. - if (fn == nullptr) { - DCHECK_EQ(_intermediate_slot_desc->type(), dst_slot_desc->type()); - RawValue::write(src_slot, dst, dst_slot_desc, nullptr); - return; - } - // } - set_any_val(src_slot, _intermediate_slot_desc->type(), _staging_intermediate_val); - - switch (dst_slot_desc->type().type) { - case TYPE_BOOLEAN: { - typedef BooleanVal (*Fn)(FunctionContext*, AnyVal*); - BooleanVal v = reinterpret_cast(fn)(agg_fn_ctx, _staging_intermediate_val); - set_output_slot(&v, dst_slot_desc, dst); - break; - } - - case TYPE_TINYINT: { - typedef TinyIntVal (*Fn)(FunctionContext*, AnyVal*); - TinyIntVal v = reinterpret_cast(fn)(agg_fn_ctx, _staging_intermediate_val); - set_output_slot(&v, dst_slot_desc, dst); - break; - } - - case TYPE_SMALLINT: { - typedef SmallIntVal (*Fn)(FunctionContext*, AnyVal*); - SmallIntVal v = reinterpret_cast(fn)(agg_fn_ctx, _staging_intermediate_val); - set_output_slot(&v, dst_slot_desc, dst); - break; - } - - case TYPE_INT: { - typedef IntVal (*Fn)(FunctionContext*, AnyVal*); - IntVal v = reinterpret_cast(fn)(agg_fn_ctx, _staging_intermediate_val); - set_output_slot(&v, dst_slot_desc, dst); - break; - } - - case TYPE_BIGINT: { - typedef BigIntVal (*Fn)(FunctionContext*, AnyVal*); - BigIntVal v = reinterpret_cast(fn)(agg_fn_ctx, _staging_intermediate_val); - set_output_slot(&v, dst_slot_desc, dst); - break; - } - - case TYPE_FLOAT: { - typedef FloatVal (*Fn)(FunctionContext*, AnyVal*); - FloatVal v = reinterpret_cast(fn)(agg_fn_ctx, _staging_intermediate_val); - set_output_slot(&v, dst_slot_desc, dst); - break; - } - - case TYPE_DOUBLE: { - typedef DoubleVal (*Fn)(FunctionContext*, AnyVal*); - DoubleVal v = reinterpret_cast(fn)(agg_fn_ctx, _staging_intermediate_val); - set_output_slot(&v, dst_slot_desc, dst); - break; - } - - case TYPE_CHAR: - case TYPE_VARCHAR: - case TYPE_HLL: - case TYPE_OBJECT: - case TYPE_QUANTILE_STATE: - case TYPE_STRING: { - typedef StringVal (*Fn)(FunctionContext*, AnyVal*); - StringVal v = reinterpret_cast(fn)(agg_fn_ctx, _staging_intermediate_val); - set_output_slot(&v, dst_slot_desc, dst); - break; - } - - case TYPE_DATE: - case TYPE_DATETIME: { - typedef DateTimeVal (*Fn)(FunctionContext*, AnyVal*); - DateTimeVal v = reinterpret_cast(fn)(agg_fn_ctx, _staging_intermediate_val); - set_output_slot(&v, dst_slot_desc, dst); - break; - } - - case TYPE_DECIMALV2: { - typedef DecimalV2Val (*Fn)(FunctionContext*, AnyVal*); - DecimalV2Val v = reinterpret_cast(fn)(agg_fn_ctx, _staging_intermediate_val); - set_output_slot(&v, dst_slot_desc, dst); - break; - } - - default: - DCHECK(false) << "NYI"; - } -} - -void AggFnEvaluator::serialize(FunctionContext* agg_fn_ctx, Tuple* tuple) { - serialize_or_finalize(agg_fn_ctx, nullptr, _intermediate_slot_desc, tuple, _serialize_fn); -} - -//void AggFnEvaluator::finalize(FunctionContext* agg_fn_ctx, Tuple* tuple) { -// serialize_or_finalize(agg_fn_ctx, nullptr, _output_slot_desc, tuple, _finalize_fn); -//} - -std::string AggFnEvaluator::debug_string(const std::vector& exprs) { - std::stringstream out; - out << "["; - - for (int i = 0; i < exprs.size(); ++i) { - out << (i == 0 ? "" : " ") << exprs[i]->debug_string(); - } - - out << "]"; - return out.str(); -} - -std::string AggFnEvaluator::debug_string() const { - std::stringstream out; - out << "AggFnEvaluator(op=" << _agg_op; - - out << ")"; - return out.str(); -} - -} // namespace doris diff --git a/be/src/exprs/agg_fn_evaluator.h b/be/src/exprs/agg_fn_evaluator.h deleted file mode 100644 index aedcb9f01c..0000000000 --- a/be/src/exprs/agg_fn_evaluator.h +++ /dev/null @@ -1,345 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. -// This file is copied from -// https://github.com/apache/impala/blob/branch-2.9.0/be/src/exprs/agg-fn-evaluator.h -// and modified by Doris - -#pragma once - -#include -#include - -#include "exprs/expr_context.h" -#include "exprs/hybrid_map.h" -#include "gen_cpp/Exprs_types.h" -#include "runtime/descriptors.h" -#include "runtime/runtime_state.h" -#include "runtime/tuple.h" -#include "udf/udf.h" -#include "udf/udf_internal.h" - -namespace doris { - -class AggregationNode; -class TExprNode; - -// This class evaluates aggregate functions. Aggregate functions can either be -// builtins or external UDAs. For both of types, they can either use codegen -// or not. -// This class provides an interface that's 1:1 with the UDA interface and serves -// as glue code between the TupleRow/Tuple signature used by the AggregationNode -// and the AnyVal signature of the UDA interface. It handles evaluating input -// slots from TupleRows and aggregating the result to the result tuple. -class AggFnEvaluator { -public: - /// TODO: The aggregation node has custom codegen paths for a few of the builtins. - /// That logic needs to be removed. For now, add some enums for those builtins. - enum AggregationOp { - COUNT, - MIN, - MAX, - SUM, - AVG, - NDV, - SUM_DISTINCT, - COUNT_DISTINCT, - HLL_UNION_AGG, - OTHER, - }; - - ~AggFnEvaluator(); - // Creates an AggFnEvaluator object from desc. The object is added to 'pool' - // and returned in *result. This constructs the input Expr trees for - // this aggregate function as specified in desc. The result is returned in - // *result. - static Status create(ObjectPool* pool, const TExpr& desc, AggFnEvaluator** result); - - static Status create(ObjectPool* pool, const TExpr& desc, bool is_analytic_fn, - AggFnEvaluator** result); - - // Initializes the agg expr. 'desc' must be the row descriptor for the input TupleRow. - // It is used to get the input values in the Update() and Merge() functions. - // 'output_slot_desc' is the slot that this aggregator should write to. - // The underlying aggregate function allocates memory from the 'pool'. This is - // either string data for intermediate results or whatever memory the UDA might - // need. - // TODO: should we give them their own pool? - Status prepare(RuntimeState* state, const RowDescriptor& desc, MemPool* pool, - const SlotDescriptor* intermediate_slot_desc, - const SlotDescriptor* output_slot_desc, FunctionContext** agg_fn_ctx); - - Status open(RuntimeState* state, FunctionContext* agg_fn_ctx); - - void close(RuntimeState* state); - - const TypeDescriptor& intermediate_type() const { return _intermediate_slot_desc->type(); } - - //PrimitiveType type() const { return _type.type; } - AggregationOp agg_op() const { return _agg_op; } - const std::vector& input_expr_ctxs() const { return _input_exprs_ctxs; } - bool is_merge() const { return _is_merge; } - bool is_count_star() const { - return _agg_op == AggregationOp::COUNT && _input_exprs_ctxs.empty(); - } - bool is_builtin() const { return _function_type == TFunctionBinaryType::BUILTIN; } - bool supports_serialize() const { return _serialize_fn != nullptr; } - - static std::string debug_string(const std::vector& exprs); - std::string debug_string() const; - - // Updates the intermediate state dst based on adding the input src row. This can be - // called either to drive the UDA's update() or merge() function depending on - // is_merge_. That is, from the caller, it doesn't mater. - void add(doris_udf::FunctionContext* agg_fn_ctx, TupleRow* src, Tuple* dst); - - // Updates the intermediate state dst to remove the input src row, i.e. undoes - // add(src, dst). Only used internally for analytic fn builtins. - void remove(doris_udf::FunctionContext* agg_fn_ctx, TupleRow* src, Tuple* dst); - // Puts the finalized value from Tuple* src in Tuple* dst just as finalize() does. - // However, unlike finalize(), get_value() does not clean up state in src. get_value() - // can be called repeatedly with the same src. Only used internally for analytic fn - // builtins. - void get_value(doris_udf::FunctionContext* agg_fn_ctx, Tuple* src, Tuple* dst); - - // Functions for different phases of the aggregation. - void init(FunctionContext* agg_fn_ctx, Tuple* dst); - void update(FunctionContext* agg_fn_ctx, TupleRow* src, Tuple* dst, void* fn, MemPool* pool); - void merge(FunctionContext* agg_fn_ctx, TupleRow* src, Tuple* dst, MemPool* pool); - // Explicitly does a merge, even if this evaluator is not marked as merging. - // This is used by the partitioned agg node when it needs to merge spill results. - // In the non-spilling case, this node would normally not merge. - void merge(FunctionContext* agg_fn_ctx, Tuple* src, Tuple* dst); - void serialize(FunctionContext* agg_fn_ctx, Tuple* dst); - void finalize(FunctionContext* agg_fn_ctx, Tuple* src, Tuple* dst, bool add_null = false); - - // TODO: implement codegen path. These functions would return IR functions with - // the same signature as the interpreted ones above. - // Function* GetIrInitFn(); - // Function* GetIrUpdateFn(); - // Function* GetIrMergeFn(); - // Function* GetIrSerializeFn(); - // Function* GetIrFinalizeFn(); - static const size_t TINYINT_SIZE = sizeof(int8_t); - static const size_t SMALLINT_SIZE = sizeof(int16_t); - static const size_t INT_SIZE = sizeof(int32_t); - static const size_t BIGINT_SIZE = sizeof(int64_t); - static const size_t FLOAT_SIZE = sizeof(float); - static const size_t DOUBLE_SIZE = sizeof(double); - static const size_t DECIMALV2_SIZE = sizeof(DecimalV2Value); - static const size_t LARGEINT_SIZE = sizeof(__int128); - // DATETIME VAL has two part: packet_time is 8 byte, and type is 4 byte - // MySQL packet time : int64_t packed_time; - // Indicate which type of this value : int type; - static const size_t DATETIME_SIZE = 16; - - void update_mem_limlits(int len); - void update_mem_trackers(bool is_filter, bool is_add_buckets, int len); - bool count_distinct_data_filter(TupleRow* row, Tuple* dst); - bool sum_distinct_data_filter(TupleRow* row, Tuple* dst); - bool is_multi_distinct() { return _is_multi_distinct; } - bool is_in_hybridmap(void* input_val, Tuple* dst, bool* is_add_buckets); - - void choose_update_or_merge(FunctionContext* agg_fn_ctx, TupleRow* row, Tuple* dst); - static void add(const std::vector& evaluators, - const std::vector& fn_ctxs, TupleRow* src, - Tuple* dst); - static void remove(const std::vector& evaluators, - const std::vector& fn_ctxs, TupleRow* src, - Tuple* dst); - static void get_value(const std::vector& evaluators, - const std::vector& fn_ctxs, Tuple* src, - Tuple* dst); - static void finalize(const std::vector& evaluators, - const std::vector& fn_ctxs, Tuple* src, - Tuple* dst, bool add_null = false); - static void init(const std::vector& evaluators, - const std::vector& fn_ctxs, Tuple* dst); - static void serialize(const std::vector& evaluators, - const std::vector& fn_ctxs, Tuple* dst); - - const std::string& fn_name() const { return _fn.name.function_name; } - - const SlotDescriptor* output_slot_desc() const { return _output_slot_desc; } - -private: - const TFunction _fn; - - /// Indicates whether to Update() or Merge() - const bool _is_merge; - /// Indicates which functions must be loaded. - const bool _is_analytic_fn; - std::unique_ptr _hybrid_map; - bool _is_multi_distinct; - std::vector _input_exprs_ctxs; - std::unique_ptr _string_buffer; //for count distinct - int _string_buffer_len; //for count distinct - std::unique_ptr _mem_tracker; // saved c'tor param - - const TypeDescriptor _return_type; - const TypeDescriptor _intermediate_type; - // Native (.so), IR (.ll) or builtin - TFunctionBinaryType::type _function_type; - - // If it's a builtin, the opcode. - AggregationOp _agg_op; - - uint64_t _total_mem_consumption; - uint64_t _accumulated_mem_consumption; - - // Slot into which update()/merge()/serialize() write their result. Not owned. - const SlotDescriptor* _intermediate_slot_desc; - // Unowned - const SlotDescriptor* _output_slot_desc; - - // Context to run the aggregate functions. - // TODO: this and _pool make this not thread safe but they are easy to duplicate - // per thread. - // std::unique_ptr _ctx; - - // Created to a subclass of AnyVal for type(). We use this to convert values - // from the UDA interface to the Expr interface. - // These objects are allocated in the runtime state's object pool. - // TODO: this is awful, remove this when exprs are updated. - std::vector _staging_input_vals; - doris_udf::AnyVal* _staging_intermediate_val; - doris_udf::AnyVal* _staging_merge_input_val; - // doris_udf::AnyVal* _staging_output_val; - - // Function ptrs to the aggregate function. This is either populated from the - // opcode registry for builtins or from the external binary for native UDAs. - // OpcodeRegistry::AggFnDescriptor _fn_ptrs; - - void* _init_fn; - void* _update_fn; - void* _remove_fn; - void* _merge_fn; - void* _serialize_fn; - void* _get_value_fn; - void* _finalize_fn; - - // Use create() instead. - AggFnEvaluator(const TExprNode& desc); - AggFnEvaluator(const TExprNode& desc, bool is_analytic_fn); - - std::string to_string(TAggregationOp::type index) { - std::map::const_iterator it = - _TAggregationOp_VALUES_TO_NAMES.find(_agg_op); - - if (it == _TAggregationOp_VALUES_TO_NAMES.end()) { - return "NULL"; - } else { - return it->second; - } - } - - // TODO: these functions below are not extensible and we need to use codegen to - // generate the calls into the UDA functions (like for UDFs). - // Remove these functions when this is supported. - - // Sets up the arguments to call fn. This converts from the agg-expr signature, - // taking TupleRow to the UDA signature taking AnvVals. - void update_or_merge(FunctionContext* agg_fn_ctx, TupleRow* row, Tuple* dst, void* fn); - - // Sets up the arguments to call fn. This converts from the agg-expr signature, - // taking TupleRow to the UDA signature taking AnvVals. - // void serialize_or_finalize(FunctionContext* agg_fn_ctx, const SlotDescriptor* dst_slot_desc, Tuple* dst, void* fn); - void serialize_or_finalize(FunctionContext* agg_fn_ctx, Tuple* src, - const SlotDescriptor* dst_slot_desc, Tuple* dst, void* fn, - bool add_null = false); - - // Writes the result in src into dst pointed to by _output_slot_desc - void set_output_slot(const doris_udf::AnyVal* src, const SlotDescriptor* dst_slot_desc, - Tuple* dst); - // Sets 'dst' to the value from 'slot'. - void set_any_val(const void* slot, const TypeDescriptor& type, doris_udf::AnyVal* dst); -}; - -inline void AggFnEvaluator::add(doris_udf::FunctionContext* agg_fn_ctx, TupleRow* row, Tuple* dst) { - agg_fn_ctx->impl()->increment_num_updates(); - update(agg_fn_ctx, row, dst, _is_merge ? _merge_fn : _update_fn, nullptr); -} -inline void AggFnEvaluator::remove(doris_udf::FunctionContext* agg_fn_ctx, TupleRow* row, - Tuple* dst) { - agg_fn_ctx->impl()->increment_num_removes(); - update(agg_fn_ctx, row, dst, _remove_fn, nullptr); -} - -inline void AggFnEvaluator::finalize(doris_udf::FunctionContext* agg_fn_ctx, Tuple* src, Tuple* dst, - bool add_null) { - serialize_or_finalize(agg_fn_ctx, src, _output_slot_desc, dst, _finalize_fn, add_null); -} -inline void AggFnEvaluator::get_value(doris_udf::FunctionContext* agg_fn_ctx, Tuple* src, - Tuple* dst) { - serialize_or_finalize(agg_fn_ctx, src, _output_slot_desc, dst, _get_value_fn); -} - -inline void AggFnEvaluator::init(const std::vector& evaluators, - const std::vector& fn_ctxs, - Tuple* dst) { - DCHECK_EQ(evaluators.size(), fn_ctxs.size()); - - for (int i = 0; i < evaluators.size(); ++i) { - evaluators[i]->init(fn_ctxs[i], dst); - } -} -inline void AggFnEvaluator::add(const std::vector& evaluators, - const std::vector& fn_ctxs, - TupleRow* src, Tuple* dst) { - DCHECK_EQ(evaluators.size(), fn_ctxs.size()); - - for (int i = 0; i < evaluators.size(); ++i) { - evaluators[i]->add(fn_ctxs[i], src, dst); - } -} -inline void AggFnEvaluator::remove(const std::vector& evaluators, - const std::vector& fn_ctxs, - TupleRow* src, Tuple* dst) { - DCHECK_EQ(evaluators.size(), fn_ctxs.size()); - - for (int i = 0; i < evaluators.size(); ++i) { - evaluators[i]->remove(fn_ctxs[i], src, dst); - } -} -inline void AggFnEvaluator::serialize(const std::vector& evaluators, - const std::vector& fn_ctxs, - Tuple* dst) { - DCHECK_EQ(evaluators.size(), fn_ctxs.size()); - - for (int i = 0; i < evaluators.size(); ++i) { - evaluators[i]->serialize(fn_ctxs[i], dst); - } -} -inline void AggFnEvaluator::get_value(const std::vector& evaluators, - const std::vector& fn_ctxs, - Tuple* src, Tuple* dst) { - DCHECK_EQ(evaluators.size(), fn_ctxs.size()); - - for (int i = 0; i < evaluators.size(); ++i) { - evaluators[i]->get_value(fn_ctxs[i], src, dst); - } -} -inline void AggFnEvaluator::finalize(const std::vector& evaluators, - const std::vector& fn_ctxs, - Tuple* src, Tuple* dst, bool add_null) { - DCHECK_EQ(evaluators.size(), fn_ctxs.size()); - - for (int i = 0; i < evaluators.size(); ++i) { - evaluators[i]->finalize(fn_ctxs[i], src, dst, add_null); - } -} - -} // namespace doris diff --git a/be/src/exprs/anyval_util.h b/be/src/exprs/anyval_util.h index 69144524c5..46cd49e0d9 100644 --- a/be/src/exprs/anyval_util.h +++ b/be/src/exprs/anyval_util.h @@ -21,16 +21,32 @@ #pragma once #include "common/status.h" -#include "exprs/expr.h" #include "runtime/collection_value.h" #include "runtime/primitive_type.h" #include "runtime/type_limit.h" +#include "runtime/types.h" #include "udf/udf.h" #include "util/hash_util.hpp" #include "util/types.h" namespace doris { +using doris_udf::FunctionContext; +using doris_udf::BooleanVal; +using doris_udf::TinyIntVal; +using doris_udf::SmallIntVal; +using doris_udf::IntVal; +using doris_udf::BigIntVal; +using doris_udf::LargeIntVal; +using doris_udf::FloatVal; +using doris_udf::DoubleVal; +using doris_udf::DecimalV2Val; +using doris_udf::DateTimeVal; +using doris_udf::DateTimeV2Val; +using doris_udf::DateV2Val; +using doris_udf::StringVal; +using doris_udf::AnyVal; + class MemPool; // Utilities for AnyVals diff --git a/be/src/exprs/arithmetic_expr.cpp b/be/src/exprs/arithmetic_expr.cpp deleted file mode 100644 index cc39140ce7..0000000000 --- a/be/src/exprs/arithmetic_expr.cpp +++ /dev/null @@ -1,226 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -#include "exprs/arithmetic_expr.h" - -namespace doris { - -std::set ArithmeticExpr::_s_valid_fn_names = { - "add", "subtract", "multiply", "divide", "int_divide", - "mod", "bitand", "bitor", "bitxor", "bitnot"}; - -Expr* ArithmeticExpr::from_thrift(const TExprNode& node) { - switch (node.opcode) { - case TExprOpcode::ADD: - return new AddExpr(node); - case TExprOpcode::SUBTRACT: - return new SubExpr(node); - case TExprOpcode::MULTIPLY: - return new MulExpr(node); - case TExprOpcode::DIVIDE: - case TExprOpcode::INT_DIVIDE: - return new DivExpr(node); - case TExprOpcode::MOD: - return new ModExpr(node); - case TExprOpcode::BITAND: - return new BitAndExpr(node); - case TExprOpcode::BITOR: - return new BitOrExpr(node); - case TExprOpcode::BITXOR: - return new BitXorExpr(node); - case TExprOpcode::BITNOT: - return new BitNotExpr(node); - default: - return nullptr; - } - return nullptr; -} - -Expr* ArithmeticExpr::from_fn_name(const TExprNode& node) { - std::string fn_name = node.fn.name.function_name; - if (fn_name == "add") { - return new AddExpr(node); - } else if (fn_name == "subtract") { - return new SubExpr(node); - } else if (fn_name == "multiply") { - return new MulExpr(node); - } else if (fn_name == "divide" || fn_name == "int_divide") { - return new DivExpr(node); - } else if (fn_name == "mod") { - return new ModExpr(node); - } else if (fn_name == "bitand") { - return new BitAndExpr(node); - } else if (fn_name == "bitor") { - return new BitOrExpr(node); - } else if (fn_name == "bitxor") { - return new BitXorExpr(node); - } else if (fn_name == "bitnot") { - return new BitNotExpr(node); - } - - return nullptr; -} - -#define BINARY_OP_CHECK_ZERO_FN(TYPE, CLASS, FN, OP) \ - TYPE CLASS::FN(ExprContext* context, TupleRow* row) { \ - TYPE v1 = _children[0]->FN(context, row); \ - if (v1.is_null) { \ - return TYPE::null(); \ - } \ - TYPE v2 = _children[1]->FN(context, row); \ - if (v2.is_null || v2.val == 0) { \ - return TYPE::null(); \ - } \ - return TYPE(v1.val OP v2.val); \ - } - -#define BINARY_OP_FN(TYPE, CLASS, FN, OP) \ - TYPE CLASS::FN(ExprContext* context, TupleRow* row) { \ - TYPE v1 = _children[0]->FN(context, row); \ - if (v1.is_null) { \ - return TYPE::null(); \ - } \ - TYPE v2 = _children[1]->FN(context, row); \ - if (v2.is_null) { \ - return TYPE::null(); \ - } \ - return TYPE(v1.val OP v2.val); \ - } - -#define BINARY_ARITH_FNS(CLASS, OP) \ - BINARY_OP_FN(TinyIntVal, CLASS, get_tiny_int_val, OP) \ - BINARY_OP_FN(SmallIntVal, CLASS, get_small_int_val, OP) \ - BINARY_OP_FN(IntVal, CLASS, get_int_val, OP) \ - BINARY_OP_FN(BigIntVal, CLASS, get_big_int_val, OP) \ - BINARY_OP_FN(LargeIntVal, CLASS, get_large_int_val, OP) \ - BINARY_OP_FN(FloatVal, CLASS, get_float_val, OP) \ - BINARY_OP_FN(DoubleVal, CLASS, get_double_val, OP) - -BINARY_ARITH_FNS(AddExpr, +) -BINARY_ARITH_FNS(SubExpr, -) -BINARY_ARITH_FNS(MulExpr, *) - -#define BINARY_DIV_FNS() \ - BINARY_OP_CHECK_ZERO_FN(TinyIntVal, DivExpr, get_tiny_int_val, /) \ - BINARY_OP_CHECK_ZERO_FN(SmallIntVal, DivExpr, get_small_int_val, /) \ - BINARY_OP_CHECK_ZERO_FN(IntVal, DivExpr, get_int_val, /) \ - BINARY_OP_CHECK_ZERO_FN(BigIntVal, DivExpr, get_big_int_val, /) \ - BINARY_OP_CHECK_ZERO_FN(LargeIntVal, DivExpr, get_large_int_val, /) \ - BINARY_OP_CHECK_ZERO_FN(FloatVal, DivExpr, get_float_val, /) \ - BINARY_OP_CHECK_ZERO_FN(DoubleVal, DivExpr, get_double_val, /) - -BINARY_DIV_FNS() - -#define BINARY_MOD_FNS() \ - BINARY_OP_CHECK_ZERO_FN(TinyIntVal, ModExpr, get_tiny_int_val, %) \ - BINARY_OP_CHECK_ZERO_FN(SmallIntVal, ModExpr, get_small_int_val, %) \ - BINARY_OP_CHECK_ZERO_FN(IntVal, ModExpr, get_int_val, %) \ - BINARY_OP_CHECK_ZERO_FN(BigIntVal, ModExpr, get_big_int_val, %) \ - BINARY_OP_CHECK_ZERO_FN(LargeIntVal, ModExpr, get_large_int_val, %) - -BINARY_MOD_FNS() - -FloatVal ModExpr::get_float_val(ExprContext* context, TupleRow* row) { - FloatVal v1 = _children[0]->get_float_val(context, row); - if (v1.is_null) { - return FloatVal::null(); - } - FloatVal v2 = _children[1]->get_float_val(context, row); - if (v2.is_null || v2.val == 0) { - return FloatVal::null(); - } - return FloatVal(fmod(v1.val, v2.val)); -} - -DoubleVal ModExpr::get_double_val(ExprContext* context, TupleRow* row) { - DoubleVal v1 = _children[0]->get_double_val(context, row); - if (v1.is_null) { - return DoubleVal::null(); - } - DoubleVal v2 = _children[1]->get_double_val(context, row); - if (v2.is_null || v2.val == 0) { - return DoubleVal::null(); - } - return DoubleVal(fmod(v1.val, v2.val)); -} - -#define BINARY_BIT_FNS(CLASS, OP) \ - BINARY_OP_FN(TinyIntVal, CLASS, get_tiny_int_val, OP) \ - BINARY_OP_FN(SmallIntVal, CLASS, get_small_int_val, OP) \ - BINARY_OP_FN(IntVal, CLASS, get_int_val, OP) \ - BINARY_OP_FN(BigIntVal, CLASS, get_big_int_val, OP) \ - BINARY_OP_FN(LargeIntVal, CLASS, get_large_int_val, OP) - -BINARY_BIT_FNS(BitAndExpr, &) -BINARY_BIT_FNS(BitOrExpr, |) -BINARY_BIT_FNS(BitXorExpr, ^) - -#define BITNOT_OP_FN(TYPE, FN) \ - TYPE BitNotExpr::FN(ExprContext* context, TupleRow* row) { \ - TYPE v = _children[0]->FN(context, row); \ - if (v.is_null) { \ - return TYPE::null(); \ - } \ - return TYPE(~v.val); \ - } - -#define BITNOT_FNS() \ - BITNOT_OP_FN(TinyIntVal, get_tiny_int_val) \ - BITNOT_OP_FN(SmallIntVal, get_small_int_val) \ - BITNOT_OP_FN(IntVal, get_int_val) \ - BITNOT_OP_FN(BigIntVal, get_big_int_val) \ - BITNOT_OP_FN(LargeIntVal, get_large_int_val) - -BITNOT_FNS() - -#define DECIMAL_ARITHMETIC_OP(EXPR_NAME, OP) \ - DecimalV2Val EXPR_NAME::get_decimalv2_val(ExprContext* context, TupleRow* row) { \ - DecimalV2Val v1 = _children[0]->get_decimalv2_val(context, row); \ - DecimalV2Val v2 = _children[1]->get_decimalv2_val(context, row); \ - if (v1.is_null || v2.is_null) { \ - return DecimalV2Val::null(); \ - } \ - DecimalV2Value iv1 = DecimalV2Value::from_decimal_val(v1); \ - DecimalV2Value iv2 = DecimalV2Value::from_decimal_val(v2); \ - DecimalV2Value ir = iv1 OP iv2; \ - DecimalV2Val result; \ - ir.to_decimal_val(&result); \ - return result; \ - } - -#define DECIMAL_ARITHMETIC_OP_DIVIDE(EXPR_NAME, OP) \ - DecimalV2Val EXPR_NAME::get_decimalv2_val(ExprContext* context, TupleRow* row) { \ - DecimalV2Val v1 = _children[0]->get_decimalv2_val(context, row); \ - DecimalV2Val v2 = _children[1]->get_decimalv2_val(context, row); \ - if (v1.is_null || v2.is_null || v2.value() == 0) { \ - return DecimalV2Val::null(); \ - } \ - DecimalV2Value iv1 = DecimalV2Value::from_decimal_val(v1); \ - DecimalV2Value iv2 = DecimalV2Value::from_decimal_val(v2); \ - DecimalV2Value ir = iv1 OP iv2; \ - DecimalV2Val result; \ - ir.to_decimal_val(&result); \ - return result; \ - } - -DECIMAL_ARITHMETIC_OP(AddExpr, +); -DECIMAL_ARITHMETIC_OP(SubExpr, -); -DECIMAL_ARITHMETIC_OP(MulExpr, *); -DECIMAL_ARITHMETIC_OP_DIVIDE(DivExpr, /); -DECIMAL_ARITHMETIC_OP_DIVIDE(ModExpr, %); - -} // namespace doris diff --git a/be/src/exprs/arithmetic_expr.h b/be/src/exprs/arithmetic_expr.h deleted file mode 100644 index 3f7c8640cc..0000000000 --- a/be/src/exprs/arithmetic_expr.h +++ /dev/null @@ -1,181 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -#pragma once - -#include - -#include "common/object_pool.h" -#include "exprs/expr.h" - -namespace doris { - -class ArithmeticExpr : public Expr { -public: - static bool is_valid(std::string fn_name) { return _s_valid_fn_names.count(fn_name); } - static Expr* from_thrift(const TExprNode& node); - static Expr* from_fn_name(const TExprNode& node); - -protected: - enum BinaryOpType { - ADD, - SUB, - MUL, - DIV, - MOD, - BIT_AND, - BIT_OR, - BIT_XOR, - BIT_NOT, - }; - - ArithmeticExpr(const TExprNode& node) : Expr(node) {} - virtual ~ArithmeticExpr() {} - - static std::set _s_valid_fn_names; -}; - -class AddExpr : public ArithmeticExpr { -public: - AddExpr(const TExprNode& node) : ArithmeticExpr(node) {} - virtual ~AddExpr() {} - virtual Expr* clone(ObjectPool* pool) const override { return pool->add(new AddExpr(*this)); } - virtual TinyIntVal get_tiny_int_val(ExprContext* context, TupleRow*) override; - virtual SmallIntVal get_small_int_val(ExprContext* context, TupleRow*) override; - virtual IntVal get_int_val(ExprContext* context, TupleRow*) override; - virtual BigIntVal get_big_int_val(ExprContext* context, TupleRow*) override; - virtual LargeIntVal get_large_int_val(ExprContext* context, TupleRow*) override; - virtual FloatVal get_float_val(ExprContext* context, TupleRow*) override; - virtual DoubleVal get_double_val(ExprContext* context, TupleRow*) override; - virtual DecimalV2Val get_decimalv2_val(ExprContext* context, TupleRow*) override; -}; - -class SubExpr : public ArithmeticExpr { -public: - SubExpr(const TExprNode& node) : ArithmeticExpr(node) {} - virtual ~SubExpr() {} - virtual Expr* clone(ObjectPool* pool) const override { return pool->add(new SubExpr(*this)); } - virtual TinyIntVal get_tiny_int_val(ExprContext* context, TupleRow*) override; - virtual SmallIntVal get_small_int_val(ExprContext* context, TupleRow*) override; - virtual IntVal get_int_val(ExprContext* context, TupleRow*) override; - virtual BigIntVal get_big_int_val(ExprContext* context, TupleRow*) override; - virtual LargeIntVal get_large_int_val(ExprContext* context, TupleRow*) override; - virtual FloatVal get_float_val(ExprContext* context, TupleRow*) override; - virtual DoubleVal get_double_val(ExprContext* context, TupleRow*) override; - virtual DecimalV2Val get_decimalv2_val(ExprContext* context, TupleRow*) override; -}; - -class MulExpr : public ArithmeticExpr { -public: - MulExpr(const TExprNode& node) : ArithmeticExpr(node) {} - virtual ~MulExpr() {} - virtual Expr* clone(ObjectPool* pool) const override { return pool->add(new MulExpr(*this)); } - virtual TinyIntVal get_tiny_int_val(ExprContext* context, TupleRow*) override; - virtual SmallIntVal get_small_int_val(ExprContext* context, TupleRow*) override; - virtual IntVal get_int_val(ExprContext* context, TupleRow*) override; - virtual BigIntVal get_big_int_val(ExprContext* context, TupleRow*) override; - virtual LargeIntVal get_large_int_val(ExprContext* context, TupleRow*) override; - virtual FloatVal get_float_val(ExprContext* context, TupleRow*) override; - virtual DoubleVal get_double_val(ExprContext* context, TupleRow*) override; - virtual DecimalV2Val get_decimalv2_val(ExprContext* context, TupleRow*) override; -}; - -class DivExpr : public ArithmeticExpr { -public: - DivExpr(const TExprNode& node) : ArithmeticExpr(node) {} - virtual ~DivExpr() {} - virtual Expr* clone(ObjectPool* pool) const override { return pool->add(new DivExpr(*this)); } - virtual TinyIntVal get_tiny_int_val(ExprContext* context, TupleRow*) override; - virtual SmallIntVal get_small_int_val(ExprContext* context, TupleRow*) override; - virtual IntVal get_int_val(ExprContext* context, TupleRow*) override; - virtual BigIntVal get_big_int_val(ExprContext* context, TupleRow*) override; - virtual LargeIntVal get_large_int_val(ExprContext* context, TupleRow*) override; - virtual FloatVal get_float_val(ExprContext* context, TupleRow*) override; - virtual DoubleVal get_double_val(ExprContext* context, TupleRow*) override; - virtual DecimalV2Val get_decimalv2_val(ExprContext* context, TupleRow*) override; -}; - -class ModExpr : public ArithmeticExpr { -public: - ModExpr(const TExprNode& node) : ArithmeticExpr(node) {} - virtual ~ModExpr() {} - virtual Expr* clone(ObjectPool* pool) const override { return pool->add(new ModExpr(*this)); } - virtual TinyIntVal get_tiny_int_val(ExprContext* context, TupleRow*) override; - virtual SmallIntVal get_small_int_val(ExprContext* context, TupleRow*) override; - virtual IntVal get_int_val(ExprContext* context, TupleRow*) override; - virtual BigIntVal get_big_int_val(ExprContext* context, TupleRow*) override; - virtual LargeIntVal get_large_int_val(ExprContext* context, TupleRow*) override; - virtual FloatVal get_float_val(ExprContext* context, TupleRow*) override; - virtual DoubleVal get_double_val(ExprContext* context, TupleRow*) override; - virtual DecimalV2Val get_decimalv2_val(ExprContext* context, TupleRow*) override; -}; - -class BitAndExpr : public ArithmeticExpr { -public: - BitAndExpr(const TExprNode& node) : ArithmeticExpr(node) {} - virtual ~BitAndExpr() {} - virtual Expr* clone(ObjectPool* pool) const override { - return pool->add(new BitAndExpr(*this)); - } - virtual TinyIntVal get_tiny_int_val(ExprContext* context, TupleRow*) override; - virtual SmallIntVal get_small_int_val(ExprContext* context, TupleRow*) override; - virtual IntVal get_int_val(ExprContext* context, TupleRow*) override; - virtual BigIntVal get_big_int_val(ExprContext* context, TupleRow*) override; - virtual LargeIntVal get_large_int_val(ExprContext* context, TupleRow*) override; -}; - -class BitOrExpr : public ArithmeticExpr { -public: - BitOrExpr(const TExprNode& node) : ArithmeticExpr(node) {} - virtual ~BitOrExpr() {} - virtual Expr* clone(ObjectPool* pool) const override { return pool->add(new BitOrExpr(*this)); } - virtual TinyIntVal get_tiny_int_val(ExprContext* context, TupleRow*) override; - virtual SmallIntVal get_small_int_val(ExprContext* context, TupleRow*) override; - virtual IntVal get_int_val(ExprContext* context, TupleRow*) override; - virtual BigIntVal get_big_int_val(ExprContext* context, TupleRow*) override; - virtual LargeIntVal get_large_int_val(ExprContext* context, TupleRow*) override; -}; - -class BitXorExpr : public ArithmeticExpr { -public: - BitXorExpr(const TExprNode& node) : ArithmeticExpr(node) {} - virtual ~BitXorExpr() {} - virtual Expr* clone(ObjectPool* pool) const override { - return pool->add(new BitXorExpr(*this)); - } - virtual TinyIntVal get_tiny_int_val(ExprContext* context, TupleRow*) override; - virtual SmallIntVal get_small_int_val(ExprContext* context, TupleRow*) override; - virtual IntVal get_int_val(ExprContext* context, TupleRow*) override; - virtual BigIntVal get_big_int_val(ExprContext* context, TupleRow*) override; - virtual LargeIntVal get_large_int_val(ExprContext* context, TupleRow*) override; -}; - -class BitNotExpr : public ArithmeticExpr { -public: - BitNotExpr(const TExprNode& node) : ArithmeticExpr(node) {} - virtual ~BitNotExpr() {} - virtual Expr* clone(ObjectPool* pool) const override { - return pool->add(new BitNotExpr(*this)); - } - virtual TinyIntVal get_tiny_int_val(ExprContext* context, TupleRow*) override; - virtual SmallIntVal get_small_int_val(ExprContext* context, TupleRow*) override; - virtual IntVal get_int_val(ExprContext* context, TupleRow*) override; - virtual BigIntVal get_big_int_val(ExprContext* context, TupleRow*) override; - virtual LargeIntVal get_large_int_val(ExprContext* context, TupleRow*) override; -}; - -} // namespace doris diff --git a/be/src/exprs/array_functions.cpp b/be/src/exprs/array_functions.cpp deleted file mode 100644 index 1bb2a59042..0000000000 --- a/be/src/exprs/array_functions.cpp +++ /dev/null @@ -1,48 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -#include "exprs/array_functions.h" - -#include "runtime/collection_value.h" - -namespace doris { - -void ArrayFunctions::init() {} - -#define ARRAY_FUNCTION(TYPE, PRIMARY_TYPE) \ - CollectionVal ArrayFunctions::array(FunctionContext* context, int num_children, \ - const TYPE* values) { \ - DCHECK_EQ(context->get_return_type().children.size(), 1); \ - CollectionValue v; \ - CollectionValue::init_collection(context, num_children, PRIMARY_TYPE, &v); \ - auto iterator = v.iterator(PRIMARY_TYPE); \ - for (int i = 0; i < num_children; ++i, iterator.next()) { \ - iterator.set(values + i); \ - } \ - CollectionVal ret; \ - v.to_collection_val(&ret); \ - return ret; \ - } - -ARRAY_FUNCTION(IntVal, TYPE_INT); -ARRAY_FUNCTION(StringVal, TYPE_VARCHAR); - -doris_udf::AnyVal array_fake_function(FunctionContext* context) { - return doris_udf::AnyVal(true); -} - -} // namespace doris diff --git a/be/src/exprs/array_functions.h b/be/src/exprs/array_functions.h deleted file mode 100644 index 292e984934..0000000000 --- a/be/src/exprs/array_functions.h +++ /dev/null @@ -1,37 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -#pragma once - -#include "udf/udf.h" - -namespace doris { - -class ArrayFunctions { -public: - static void init(); - - /** - * array construct functions, create array with the children values - */ - static CollectionVal array(FunctionContext* context, int num_children, const IntVal* values); - - static CollectionVal array(FunctionContext* context, int num_children, const StringVal* values); -}; - -doris_udf::AnyVal array_fake_function(FunctionContext* context); -} // namespace doris diff --git a/be/src/exprs/binary_predicate.cpp b/be/src/exprs/binary_predicate.cpp deleted file mode 100644 index 7408c3307a..0000000000 --- a/be/src/exprs/binary_predicate.cpp +++ /dev/null @@ -1,509 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -#include "exprs/binary_predicate.h" - -#include - -#include "gen_cpp/Exprs_types.h" -#include "runtime/decimalv2_value.h" -#include "vec/common/string_ref.h" - -namespace doris { - -Expr* BinaryPredicate::from_thrift(const TExprNode& node) { - switch (node.opcode) { - case TExprOpcode::EQ: { - switch (node.child_type) { - case TPrimitiveType::BOOLEAN: - return new EqBooleanValPred(node); - case TPrimitiveType::TINYINT: - return new EqTinyIntValPred(node); - case TPrimitiveType::SMALLINT: - return new EqSmallIntValPred(node); - case TPrimitiveType::INT: - case TPrimitiveType::DECIMAL32: - case TPrimitiveType::DATEV2: - return new EqIntValPred(node); - case TPrimitiveType::BIGINT: - case TPrimitiveType::DECIMAL64: - case TPrimitiveType::DATETIMEV2: - return new EqBigIntValPred(node); - case TPrimitiveType::LARGEINT: - case TPrimitiveType::DECIMAL128I: - return new EqLargeIntValPred(node); - case TPrimitiveType::FLOAT: - return new EqFloatValPred(node); - case TPrimitiveType::DOUBLE: - return new EqDoubleValPred(node); - case TPrimitiveType::CHAR: - case TPrimitiveType::VARCHAR: - case TPrimitiveType::STRING: - return new EqStringValPred(node); - case TPrimitiveType::DATE: - case TPrimitiveType::DATETIME: - return new EqDateTimeValPred(node); - case TPrimitiveType::DECIMALV2: - return new EqDecimalV2ValPred(node); - default: - DCHECK(false) << "Invalid type: " << node.child_type; - return nullptr; - } - } - case TExprOpcode::NE: { - switch (node.child_type) { - case TPrimitiveType::BOOLEAN: - return new NeBooleanValPred(node); - case TPrimitiveType::TINYINT: - return new NeTinyIntValPred(node); - case TPrimitiveType::SMALLINT: - return new NeSmallIntValPred(node); - case TPrimitiveType::INT: - case TPrimitiveType::DECIMAL32: - case TPrimitiveType::DATEV2: - return new NeIntValPred(node); - case TPrimitiveType::BIGINT: - case TPrimitiveType::DECIMAL64: - case TPrimitiveType::DATETIMEV2: - return new NeBigIntValPred(node); - case TPrimitiveType::LARGEINT: - case TPrimitiveType::DECIMAL128I: - return new NeLargeIntValPred(node); - case TPrimitiveType::FLOAT: - return new NeFloatValPred(node); - case TPrimitiveType::DOUBLE: - return new NeDoubleValPred(node); - case TPrimitiveType::CHAR: - case TPrimitiveType::VARCHAR: - case TPrimitiveType::STRING: - return new NeStringValPred(node); - case TPrimitiveType::DATE: - case TPrimitiveType::DATETIME: - return new NeDateTimeValPred(node); - case TPrimitiveType::DECIMALV2: - return new NeDecimalV2ValPred(node); - default: - DCHECK(false) << "Invalid type!"; - return nullptr; - } - } - case TExprOpcode::LT: { - switch (node.child_type) { - case TPrimitiveType::BOOLEAN: - return new LtBooleanValPred(node); - case TPrimitiveType::TINYINT: - return new LtTinyIntValPred(node); - case TPrimitiveType::SMALLINT: - return new LtSmallIntValPred(node); - case TPrimitiveType::INT: - case TPrimitiveType::DECIMAL32: - case TPrimitiveType::DATEV2: - return new LtIntValPred(node); - case TPrimitiveType::BIGINT: - case TPrimitiveType::DECIMAL64: - case TPrimitiveType::DATETIMEV2: - return new LtBigIntValPred(node); - case TPrimitiveType::LARGEINT: - case TPrimitiveType::DECIMAL128I: - return new LtLargeIntValPred(node); - case TPrimitiveType::FLOAT: - return new LtFloatValPred(node); - case TPrimitiveType::DOUBLE: - return new LtDoubleValPred(node); - case TPrimitiveType::CHAR: - case TPrimitiveType::VARCHAR: - case TPrimitiveType::STRING: - return new LtStringValPred(node); - case TPrimitiveType::DATE: - case TPrimitiveType::DATETIME: - return new LtDateTimeValPred(node); - case TPrimitiveType::DECIMALV2: - return new LtDecimalV2ValPred(node); - default: - DCHECK(false) << "Invalid type!"; - return nullptr; - } - } - case TExprOpcode::LE: { - switch (node.child_type) { - case TPrimitiveType::BOOLEAN: - return new LeBooleanValPred(node); - case TPrimitiveType::TINYINT: - return new LeTinyIntValPred(node); - case TPrimitiveType::SMALLINT: - return new LeSmallIntValPred(node); - case TPrimitiveType::INT: - case TPrimitiveType::DECIMAL32: - case TPrimitiveType::DATEV2: - return new LeIntValPred(node); - case TPrimitiveType::BIGINT: - case TPrimitiveType::DECIMAL64: - case TPrimitiveType::DATETIMEV2: - return new LeBigIntValPred(node); - case TPrimitiveType::LARGEINT: - case TPrimitiveType::DECIMAL128I: - return new LeLargeIntValPred(node); - case TPrimitiveType::FLOAT: - return new LeFloatValPred(node); - case TPrimitiveType::DOUBLE: - return new LeDoubleValPred(node); - case TPrimitiveType::CHAR: - case TPrimitiveType::VARCHAR: - case TPrimitiveType::STRING: - return new LeStringValPred(node); - case TPrimitiveType::DATE: - case TPrimitiveType::DATETIME: - return new LeDateTimeValPred(node); - case TPrimitiveType::DECIMALV2: - return new LeDecimalV2ValPred(node); - default: - DCHECK(false) << "Invalid type!"; - return nullptr; - } - } - case TExprOpcode::GT: { - switch (node.child_type) { - case TPrimitiveType::BOOLEAN: - return new GtBooleanValPred(node); - case TPrimitiveType::TINYINT: - return new GtTinyIntValPred(node); - case TPrimitiveType::SMALLINT: - return new GtSmallIntValPred(node); - case TPrimitiveType::INT: - case TPrimitiveType::DECIMAL32: - case TPrimitiveType::DATEV2: - return new GtIntValPred(node); - case TPrimitiveType::BIGINT: - case TPrimitiveType::DECIMAL64: - case TPrimitiveType::DATETIMEV2: - return new GtBigIntValPred(node); - case TPrimitiveType::LARGEINT: - case TPrimitiveType::DECIMAL128I: - return new GtLargeIntValPred(node); - case TPrimitiveType::FLOAT: - return new GtFloatValPred(node); - case TPrimitiveType::DOUBLE: - return new GtDoubleValPred(node); - case TPrimitiveType::CHAR: - case TPrimitiveType::VARCHAR: - case TPrimitiveType::STRING: - return new GtStringValPred(node); - case TPrimitiveType::DATE: - case TPrimitiveType::DATETIME: - return new GtDateTimeValPred(node); - case TPrimitiveType::DECIMALV2: - return new GtDecimalV2ValPred(node); - default: - DCHECK(false) << "Invalid type!"; - return nullptr; - } - } - case TExprOpcode::GE: { - switch (node.child_type) { - case TPrimitiveType::BOOLEAN: - return new GeBooleanValPred(node); - case TPrimitiveType::TINYINT: - return new GeTinyIntValPred(node); - case TPrimitiveType::SMALLINT: - return new GeSmallIntValPred(node); - case TPrimitiveType::INT: - case TPrimitiveType::DECIMAL32: - case TPrimitiveType::DATEV2: - return new GeIntValPred(node); - case TPrimitiveType::BIGINT: - case TPrimitiveType::DECIMAL64: - case TPrimitiveType::DATETIMEV2: - return new GeBigIntValPred(node); - case TPrimitiveType::LARGEINT: - case TPrimitiveType::DECIMAL128I: - return new GeLargeIntValPred(node); - case TPrimitiveType::FLOAT: - return new GeFloatValPred(node); - case TPrimitiveType::DOUBLE: - return new GeDoubleValPred(node); - case TPrimitiveType::CHAR: - case TPrimitiveType::VARCHAR: - case TPrimitiveType::STRING: - return new GeStringValPred(node); - case TPrimitiveType::DATE: - case TPrimitiveType::DATETIME: - return new GeDateTimeValPred(node); - case TPrimitiveType::DECIMALV2: - return new GeDecimalV2ValPred(node); - default: - DCHECK(false) << "Invalid type!"; - return nullptr; - } - } - case TExprOpcode::EQ_FOR_NULL: { - switch (node.child_type) { - case TPrimitiveType::BOOLEAN: - return new EqForNullBooleanValPred(node); - case TPrimitiveType::TINYINT: - return new EqForNullTinyIntValPred(node); - case TPrimitiveType::SMALLINT: - return new EqForNullSmallIntValPred(node); - case TPrimitiveType::INT: - case TPrimitiveType::DECIMAL32: - case TPrimitiveType::DATEV2: - return new EqForNullIntValPred(node); - case TPrimitiveType::BIGINT: - case TPrimitiveType::DECIMAL64: - case TPrimitiveType::DATETIMEV2: - return new EqForNullBigIntValPred(node); - case TPrimitiveType::LARGEINT: - case TPrimitiveType::DECIMAL128I: - return new EqForNullLargeIntValPred(node); - case TPrimitiveType::FLOAT: - return new EqForNullFloatValPred(node); - case TPrimitiveType::DOUBLE: - return new EqForNullDoubleValPred(node); - case TPrimitiveType::CHAR: - case TPrimitiveType::VARCHAR: - case TPrimitiveType::STRING: - return new EqForNullStringValPred(node); - case TPrimitiveType::DATE: - case TPrimitiveType::DATETIME: - return new EqForNullDateTimeValPred(node); - case TPrimitiveType::DECIMALV2: - return new EqForNullDecimalV2ValPred(node); - default: - DCHECK(false) << "Invalid type!"; - return nullptr; - } - } - default: - return nullptr; - } - return nullptr; -} - -std::string BinaryPredicate::debug_string() const { - std::stringstream out; - out << "BinaryPredicate(" << Expr::debug_string() << ")"; - return out.str(); -} - -#define BINARY_PRED_FN(CLASS, TYPE, FN, OP, LLVM_PRED) \ - BooleanVal CLASS::get_boolean_val(ExprContext* ctx, TupleRow* row) { \ - TYPE v1 = _children[0]->FN(ctx, row); \ - if (v1.is_null) { \ - return BooleanVal::null(); \ - } \ - TYPE v2 = _children[1]->FN(ctx, row); \ - if (v2.is_null) { \ - return BooleanVal::null(); \ - } \ - return BooleanVal(v1.val OP v2.val); \ - } - -// add '/**/' to pass code style check of cooder -#define BINARY_PRED_INT_FNS(TYPE, FN) \ - BINARY_PRED_FN(Eq##TYPE##Pred, TYPE, FN, /**/ == /**/, CmpInst::ICMP_EQ) \ - BINARY_PRED_FN(Ne##TYPE##Pred, TYPE, FN, /**/ != /**/, CmpInst::ICMP_NE) \ - BINARY_PRED_FN(Lt##TYPE##Pred, TYPE, FN, /**/ < /**/, CmpInst::ICMP_SLT) \ - BINARY_PRED_FN(Le##TYPE##Pred, TYPE, FN, /**/ <= /**/, CmpInst::ICMP_SLE) \ - BINARY_PRED_FN(Gt##TYPE##Pred, TYPE, FN, /**/ > /**/, CmpInst::ICMP_SGT) \ - BINARY_PRED_FN(Ge##TYPE##Pred, TYPE, FN, /**/ >= /**/, CmpInst::ICMP_SGE) - -BINARY_PRED_INT_FNS(BooleanVal, get_boolean_val); -BINARY_PRED_INT_FNS(TinyIntVal, get_tiny_int_val); -BINARY_PRED_INT_FNS(SmallIntVal, get_small_int_val); -BINARY_PRED_INT_FNS(IntVal, get_int_val); -BINARY_PRED_INT_FNS(BigIntVal, get_big_int_val); -BINARY_PRED_INT_FNS(LargeIntVal, get_large_int_val); - -#define BINARY_PRED_FLOAT_FNS(TYPE, FN) \ - BINARY_PRED_FN(Eq##TYPE##Pred, TYPE, FN, ==, CmpInst::FCMP_OEQ) \ - BINARY_PRED_FN(Ne##TYPE##Pred, TYPE, FN, !=, CmpInst::FCMP_UNE) \ - BINARY_PRED_FN(Lt##TYPE##Pred, TYPE, FN, <, CmpInst::FCMP_OLT) \ - BINARY_PRED_FN(Le##TYPE##Pred, TYPE, FN, <=, CmpInst::FCMP_OLE) \ - BINARY_PRED_FN(Gt##TYPE##Pred, TYPE, FN, >, CmpInst::FCMP_OGT) \ - BINARY_PRED_FN(Ge##TYPE##Pred, TYPE, FN, >=, CmpInst::FCMP_OGE) - -BINARY_PRED_FLOAT_FNS(FloatVal, get_float_val); -BINARY_PRED_FLOAT_FNS(DoubleVal, get_double_val); - -#define COMPLICATE_BINARY_PRED_FN(CLASS, TYPE, FN, DORIS_TYPE, FROM_FUNC, OP) \ - BooleanVal CLASS::get_boolean_val(ExprContext* ctx, TupleRow* row) { \ - TYPE v1 = _children[0]->FN(ctx, row); \ - if (v1.is_null) { \ - return BooleanVal::null(); \ - } \ - TYPE v2 = _children[1]->FN(ctx, row); \ - if (v2.is_null) { \ - return BooleanVal::null(); \ - } \ - DORIS_TYPE pv1 = DORIS_TYPE::FROM_FUNC(v1); \ - DORIS_TYPE pv2 = DORIS_TYPE::FROM_FUNC(v2); \ - return BooleanVal(pv1 OP pv2); \ - } - -#define COMPLICATE_BINARY_PRED_FNS(TYPE, FN, DORIS_TYPE, FROM_FUNC) \ - COMPLICATE_BINARY_PRED_FN(Eq##TYPE##Pred, TYPE, FN, DORIS_TYPE, FROM_FUNC, ==) \ - COMPLICATE_BINARY_PRED_FN(Ne##TYPE##Pred, TYPE, FN, DORIS_TYPE, FROM_FUNC, !=) \ - COMPLICATE_BINARY_PRED_FN(Lt##TYPE##Pred, TYPE, FN, DORIS_TYPE, FROM_FUNC, <) \ - COMPLICATE_BINARY_PRED_FN(Le##TYPE##Pred, TYPE, FN, DORIS_TYPE, FROM_FUNC, <=) \ - COMPLICATE_BINARY_PRED_FN(Gt##TYPE##Pred, TYPE, FN, DORIS_TYPE, FROM_FUNC, >) \ - COMPLICATE_BINARY_PRED_FN(Ge##TYPE##Pred, TYPE, FN, DORIS_TYPE, FROM_FUNC, >=) - -COMPLICATE_BINARY_PRED_FNS(DecimalV2Val, get_decimalv2_val, DecimalV2Value, from_decimal_val) - -#define DATETIME_BINARY_PRED_FN(CLASS, OP, LLVM_PRED) \ - BooleanVal CLASS::get_boolean_val(ExprContext* ctx, TupleRow* row) { \ - DateTimeVal v1 = _children[0]->get_datetime_val(ctx, row); \ - if (v1.is_null) { \ - return BooleanVal::null(); \ - } \ - DateTimeVal v2 = _children[1]->get_datetime_val(ctx, row); \ - if (v2.is_null) { \ - return BooleanVal::null(); \ - } \ - return BooleanVal(v1.packed_time OP v2.packed_time); \ - } - -#define DATETIME_BINARY_PRED_FNS() \ - DATETIME_BINARY_PRED_FN(Eq##DateTimeVal##Pred, ==, CmpInst::ICMP_EQ) \ - DATETIME_BINARY_PRED_FN(Ne##DateTimeVal##Pred, !=, CmpInst::ICMP_NE) \ - DATETIME_BINARY_PRED_FN(Lt##DateTimeVal##Pred, <, CmpInst::ICMP_SLT) \ - DATETIME_BINARY_PRED_FN(Le##DateTimeVal##Pred, <=, CmpInst::ICMP_SLE) \ - DATETIME_BINARY_PRED_FN(Gt##DateTimeVal##Pred, >, CmpInst::ICMP_SGT) \ - DATETIME_BINARY_PRED_FN(Ge##DateTimeVal##Pred, >=, CmpInst::ICMP_SGE) - -DATETIME_BINARY_PRED_FNS() - -#define STRING_BINARY_PRED_FN(CLASS, OP) \ - BooleanVal CLASS::get_boolean_val(ExprContext* ctx, TupleRow* row) { \ - StringVal v1 = _children[0]->get_string_val(ctx, row); \ - if (v1.is_null) { \ - return BooleanVal::null(); \ - } \ - StringVal v2 = _children[1]->get_string_val(ctx, row); \ - if (v2.is_null) { \ - return BooleanVal::null(); \ - } \ - StringRef pv1 = StringRef(v1); \ - StringRef pv2 = StringRef(v2); \ - return BooleanVal(pv1 OP pv2); \ - } - -#define STRING_BINARY_PRED_FNS() \ - STRING_BINARY_PRED_FN(Ne##StringVal##Pred, !=) \ - STRING_BINARY_PRED_FN(Lt##StringVal##Pred, <) \ - STRING_BINARY_PRED_FN(Le##StringVal##Pred, <=) \ - STRING_BINARY_PRED_FN(Gt##StringVal##Pred, >) \ - STRING_BINARY_PRED_FN(Ge##StringVal##Pred, >=) - -STRING_BINARY_PRED_FNS() - -BooleanVal EqStringValPred::get_boolean_val(ExprContext* ctx, TupleRow* row) { - StringVal v1 = _children[0]->get_string_val(ctx, row); - if (v1.is_null) { - return BooleanVal::null(); - } - StringVal v2 = _children[1]->get_string_val(ctx, row); - if (v2.is_null) { - return BooleanVal::null(); - } - if (v1.len != v2.len) { - return BooleanVal(false); - } - return BooleanVal(string_compare((char*)v1.ptr, v1.len, (char*)v2.ptr, v2.len, v1.len) == 0); -} - -#define BINARY_PRED_FOR_NULL_FN(CLASS, TYPE, FN, OP, LLVM_PRED) \ - BooleanVal CLASS::get_boolean_val(ExprContext* ctx, TupleRow* row) { \ - TYPE v1 = _children[0]->FN(ctx, row); \ - TYPE v2 = _children[1]->FN(ctx, row); \ - if (v1.is_null && v2.is_null) { \ - return BooleanVal(true); \ - } else if (v1.is_null || v2.is_null) { \ - return BooleanVal(false); \ - } \ - return BooleanVal(v1.val OP v2.val); \ - } - -// add '/**/' to pass code style check of cooder -#define BINARY_PRED_FOR_NULL_INT_FNS(TYPE, FN) \ - BINARY_PRED_FOR_NULL_FN(EqForNull##TYPE##Pred, TYPE, FN, /**/ == /**/, CmpInst::ICMP_EQ) - -BINARY_PRED_FOR_NULL_INT_FNS(BooleanVal, get_boolean_val); -BINARY_PRED_FOR_NULL_INT_FNS(TinyIntVal, get_tiny_int_val); -BINARY_PRED_FOR_NULL_INT_FNS(SmallIntVal, get_small_int_val); -BINARY_PRED_FOR_NULL_INT_FNS(IntVal, get_int_val); -BINARY_PRED_FOR_NULL_INT_FNS(BigIntVal, get_big_int_val); -BINARY_PRED_FOR_NULL_INT_FNS(LargeIntVal, get_large_int_val); - -#define BINARY_PRED_FOR_NULL_FLOAT_FNS(TYPE, FN) \ - BINARY_PRED_FOR_NULL_FN(EqForNull##TYPE##Pred, TYPE, FN, ==, CmpInst::FCMP_OEQ) - -BINARY_PRED_FOR_NULL_FLOAT_FNS(FloatVal, get_float_val); -BINARY_PRED_FOR_NULL_FLOAT_FNS(DoubleVal, get_double_val); - -#define COMPLICATE_BINARY_FOR_NULL_PRED_FN(CLASS, TYPE, FN, DORIS_TYPE, FROM_FUNC, OP) \ - BooleanVal CLASS::get_boolean_val(ExprContext* ctx, TupleRow* row) { \ - TYPE v1 = _children[0]->FN(ctx, row); \ - TYPE v2 = _children[1]->FN(ctx, row); \ - if (v1.is_null && v2.is_null) { \ - return BooleanVal(true); \ - } else if (v1.is_null || v2.is_null) { \ - return BooleanVal(false); \ - } \ - DORIS_TYPE pv1 = DORIS_TYPE::FROM_FUNC(v1); \ - DORIS_TYPE pv2 = DORIS_TYPE::FROM_FUNC(v2); \ - return BooleanVal(pv1 OP pv2); \ - } - -#define COMPLICATE_BINARY_FOR_NULL_PRED_FNS(TYPE, FN, DORIS_TYPE, FROM_FUNC) \ - COMPLICATE_BINARY_FOR_NULL_PRED_FN(EqForNull##TYPE##Pred, TYPE, FN, DORIS_TYPE, FROM_FUNC, ==) - -COMPLICATE_BINARY_FOR_NULL_PRED_FNS(DecimalV2Val, get_decimalv2_val, DecimalV2Value, - from_decimal_val) - -#define DATETIME_BINARY_FOR_NULL_PRED_FN(CLASS, OP, LLVM_PRED) \ - BooleanVal CLASS::get_boolean_val(ExprContext* ctx, TupleRow* row) { \ - DateTimeVal v1 = _children[0]->get_datetime_val(ctx, row); \ - DateTimeVal v2 = _children[1]->get_datetime_val(ctx, row); \ - if (v1.is_null && v2.is_null) { \ - return BooleanVal(true); \ - } else if (v1.is_null || v2.is_null) { \ - return BooleanVal(false); \ - } \ - return BooleanVal(v1.packed_time OP v2.packed_time); \ - } - -#define DATETIME_BINARY_FOR_NULL_PRED_FNS() \ - DATETIME_BINARY_FOR_NULL_PRED_FN(EqForNull##DateTimeVal##Pred, ==, CmpInst::ICMP_EQ) - -DATETIME_BINARY_FOR_NULL_PRED_FNS() - -BooleanVal EqForNullStringValPred::get_boolean_val(ExprContext* ctx, TupleRow* row) { - StringVal v1 = _children[0]->get_string_val(ctx, row); - StringVal v2 = _children[1]->get_string_val(ctx, row); - if (v1.is_null && v2.is_null) { - return BooleanVal(true); - } else if (v1.is_null || v2.is_null) { - return BooleanVal(false); - } - - if (v1.len != v2.len) { - return BooleanVal(false); - } - return BooleanVal(string_compare((char*)v1.ptr, v1.len, (char*)v2.ptr, v2.len, v1.len) == 0); -} - -} // namespace doris diff --git a/be/src/exprs/binary_predicate.h b/be/src/exprs/binary_predicate.h deleted file mode 100644 index 0378e15d72..0000000000 --- a/be/src/exprs/binary_predicate.h +++ /dev/null @@ -1,99 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -#pragma once - -#include -#include - -#include "common/object_pool.h" -#include "exprs/predicate.h" -#include "gen_cpp/Exprs_types.h" - -namespace doris { - -class BinaryPredicate : public Predicate { -public: - static Expr* from_thrift(const TExprNode& node); - BinaryPredicate(const TExprNode& node) : Predicate(node) {} - virtual ~BinaryPredicate() {} - -protected: - friend class Expr; - - // virtual Status prepare(RuntimeState* state, const RowDescriptor& desc); - virtual std::string debug_string() const; -}; - -#define BIN_PRED_CLASS_DEFINE(CLASS) \ - class CLASS : public BinaryPredicate { \ - public: \ - CLASS(const TExprNode& node) : BinaryPredicate(node) {} \ - virtual ~CLASS() {} \ - virtual Expr* clone(ObjectPool* pool) const override { \ - return pool->add(new CLASS(*this)); \ - } \ - \ - virtual BooleanVal get_boolean_val(ExprContext* context, TupleRow* row) override; \ - }; - -#define BIN_PRED_CLASSES_DEFINE(TYPE) \ - BIN_PRED_CLASS_DEFINE(Eq##TYPE##Pred) \ - BIN_PRED_CLASS_DEFINE(Ne##TYPE##Pred) \ - BIN_PRED_CLASS_DEFINE(Lt##TYPE##Pred) \ - BIN_PRED_CLASS_DEFINE(Le##TYPE##Pred) \ - BIN_PRED_CLASS_DEFINE(Gt##TYPE##Pred) \ - BIN_PRED_CLASS_DEFINE(Ge##TYPE##Pred) - -BIN_PRED_CLASSES_DEFINE(BooleanVal) -BIN_PRED_CLASSES_DEFINE(TinyIntVal) -BIN_PRED_CLASSES_DEFINE(SmallIntVal) -BIN_PRED_CLASSES_DEFINE(IntVal) -BIN_PRED_CLASSES_DEFINE(BigIntVal) -BIN_PRED_CLASSES_DEFINE(LargeIntVal) -BIN_PRED_CLASSES_DEFINE(FloatVal) -BIN_PRED_CLASSES_DEFINE(DoubleVal) -BIN_PRED_CLASSES_DEFINE(StringVal) -BIN_PRED_CLASSES_DEFINE(DateTimeVal) -BIN_PRED_CLASSES_DEFINE(DecimalV2Val) - -#define BIN_PRED_FOR_NULL_CLASS_DEFINE(CLASS) \ - class CLASS : public BinaryPredicate { \ - public: \ - CLASS(const TExprNode& node) : BinaryPredicate(node) {} \ - virtual ~CLASS() {} \ - virtual Expr* clone(ObjectPool* pool) const override { \ - return pool->add(new CLASS(*this)); \ - } \ - \ - virtual BooleanVal get_boolean_val(ExprContext* context, TupleRow* row) override; \ - }; - -#define BIN_PRED_FOR_NULL_CLASSES_DEFINE(TYPE) BIN_PRED_FOR_NULL_CLASS_DEFINE(EqForNull##TYPE##Pred) - -BIN_PRED_FOR_NULL_CLASSES_DEFINE(BooleanVal) -BIN_PRED_FOR_NULL_CLASSES_DEFINE(TinyIntVal) -BIN_PRED_FOR_NULL_CLASSES_DEFINE(SmallIntVal) -BIN_PRED_FOR_NULL_CLASSES_DEFINE(IntVal) -BIN_PRED_FOR_NULL_CLASSES_DEFINE(BigIntVal) -BIN_PRED_FOR_NULL_CLASSES_DEFINE(LargeIntVal) -BIN_PRED_FOR_NULL_CLASSES_DEFINE(FloatVal) -BIN_PRED_FOR_NULL_CLASSES_DEFINE(DoubleVal) -BIN_PRED_FOR_NULL_CLASSES_DEFINE(StringVal) -BIN_PRED_FOR_NULL_CLASSES_DEFINE(DateTimeVal) -BIN_PRED_FOR_NULL_CLASSES_DEFINE(DecimalV2Val) -} // namespace doris diff --git a/be/src/exprs/bloomfilter_predicate.h b/be/src/exprs/bloom_filter_func.h similarity index 90% rename from be/src/exprs/bloomfilter_predicate.h rename to be/src/exprs/bloom_filter_func.h index bf77a8fb3d..84a61ef2e2 100644 --- a/be/src/exprs/bloomfilter_predicate.h +++ b/be/src/exprs/bloom_filter_func.h @@ -17,27 +17,11 @@ #pragma once -#include -#include -#include -#include -#include -#include -#include - -#include "common/object_pool.h" #include "exprs/block_bloom_filter.hpp" -#include "exprs/predicate.h" -#include "olap/decimal12.h" -#include "olap/rowset/segment_v2/bloom_filter.h" -#include "olap/uint24.h" -#include "util/hash_util.hpp" - -namespace butil { -class IOBufAsZeroCopyInputStream; -} +#include "exprs/runtime_filter.h" namespace doris { + class BloomFilterAdaptor { public: BloomFilterAdaptor() { _bloom_filter = std::make_shared(); } @@ -452,40 +436,4 @@ private: typename BloomFilterTypeTraits::FindOp dummy; }; -// BloomFilterPredicate only used in runtime filter -class BloomFilterPredicate : public Predicate { -public: - ~BloomFilterPredicate() override; - BloomFilterPredicate(const TExprNode& node); - BloomFilterPredicate(const BloomFilterPredicate& other); - Expr* clone(ObjectPool* pool) const override { - return pool->add(new BloomFilterPredicate(*this)); - } - using Predicate::prepare; - Status prepare(RuntimeState* state, std::shared_ptr bloomfilterfunc); - - std::shared_ptr get_bloom_filter_func() { return _filter; } - - BooleanVal get_boolean_val(ExprContext* context, TupleRow* row) override; - - Status open(RuntimeState* state, ExprContext* context, - FunctionContext::FunctionStateScope scope) override; - -protected: - friend class Expr; - std::string debug_string() const override; - -private: - bool _is_prepare; - // if we set always = true, we will skip bloom filter - bool _always_true; - /// TODO: statistic filter rate in the profile - std::atomic _filtered_rows; - std::atomic _scan_rows; - - std::shared_ptr _filter; - bool _has_calculate_filter = false; - // if filter rate less than this, bloom filter will set always true - constexpr static double _expect_filter_rate = 0.4; -}; -} // namespace doris +} // namespace doris \ No newline at end of file diff --git a/be/src/exprs/bloomfilter_predicate.cpp b/be/src/exprs/bloomfilter_predicate.cpp deleted file mode 100644 index 1b514cd56b..0000000000 --- a/be/src/exprs/bloomfilter_predicate.cpp +++ /dev/null @@ -1,96 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -#include "exprs/bloomfilter_predicate.h" - -#include - -#include "exprs/expr_context.h" -#include "runtime/runtime_state.h" - -namespace doris { - -BloomFilterPredicate::BloomFilterPredicate(const TExprNode& node) - : Predicate(node), - _is_prepare(false), - _always_true(false), - _filtered_rows(0), - _scan_rows(0) {} - -BloomFilterPredicate::~BloomFilterPredicate() { - VLOG_NOTICE << "bloom filter rows:" << _filtered_rows << ",scan_rows:" << _scan_rows - << ",rate:" << (double)_filtered_rows / _scan_rows; -} - -BloomFilterPredicate::BloomFilterPredicate(const BloomFilterPredicate& other) - : Predicate(other), - _is_prepare(other._is_prepare), - _always_true(other._always_true), - _filtered_rows(), - _scan_rows() {} - -Status BloomFilterPredicate::prepare(RuntimeState* state, - std::shared_ptr filter) { - // DCHECK(filter != nullptr); - if (_is_prepare) { - return Status::OK(); - } - _filter = filter; - if (nullptr == _filter) { - return Status::InternalError("Unknown column type."); - } - _is_prepare = true; - return Status::OK(); -} - -std::string BloomFilterPredicate::debug_string() const { - std::stringstream out; - out << "BloomFilterPredicate()"; - return out.str(); -} - -BooleanVal BloomFilterPredicate::get_boolean_val(ExprContext* ctx, TupleRow* row) { - if (_always_true) { - return BooleanVal(true); - } - const void* lhs_slot = ctx->get_value(_children[0], row); - if (lhs_slot == nullptr) { - return BooleanVal::null(); - } - _scan_rows++; - if (_filter->find(lhs_slot)) { - return BooleanVal(true); - } - _filtered_rows++; - - if (!_has_calculate_filter && _scan_rows >= config::bloom_filter_predicate_check_row_num) { - double rate = (double)_filtered_rows / _scan_rows; - if (rate < _expect_filter_rate) { - _always_true = true; - } - _has_calculate_filter = true; - } - return BooleanVal(false); -} - -Status BloomFilterPredicate::open(RuntimeState* state, ExprContext* context, - FunctionContext::FunctionStateScope scope) { - Expr::open(state, context, scope); - return Status::OK(); -} - -} // namespace doris diff --git a/be/src/exprs/case_expr.cpp b/be/src/exprs/case_expr.cpp deleted file mode 100644 index b451179fe5..0000000000 --- a/be/src/exprs/case_expr.cpp +++ /dev/null @@ -1,233 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. -// This file is copied from -// https://github.com/apache/impala/blob/branch-2.9.0/be/src/exprs/case-expr.cpp -// and modified by Doris - -#include "exprs/case_expr.h" - -#include "exprs/anyval_util.h" -#include "exprs/expr_context.h" -#include "gen_cpp/Exprs_types.h" -#include "runtime/runtime_state.h" - -namespace doris { - -struct CaseExprState { - // Space to store the values being compared in the interpreted path. This makes it - // easier to pass around AnyVal subclasses. Allocated from the runtime state's object - // pool in Prepare(). - AnyVal* case_val; - AnyVal* when_val; -}; - -CaseExpr::CaseExpr(const TExprNode& node) - : Expr(node), - _has_case_expr(node.case_expr.has_case_expr), - _has_else_expr(node.case_expr.has_else_expr) {} - -CaseExpr::~CaseExpr() {} - -Status CaseExpr::prepare(RuntimeState* state, const RowDescriptor& desc, ExprContext* ctx) { - RETURN_IF_ERROR(Expr::prepare(state, desc, ctx)); - register_function_context(ctx, state, 0); - return Status::OK(); -} - -Status CaseExpr::open(RuntimeState* state, ExprContext* ctx, - FunctionContext::FunctionStateScope scope) { - RETURN_IF_ERROR(Expr::open(state, ctx, scope)); - FunctionContext* fn_ctx = ctx->fn_context(_fn_context_index); - CaseExprState* case_state = - reinterpret_cast(fn_ctx->allocate(sizeof(CaseExprState))); - fn_ctx->set_function_state(FunctionContext::THREAD_LOCAL, case_state); - if (_has_case_expr) { - case_state->case_val = create_any_val(state->obj_pool(), _children[0]->type()); - case_state->when_val = create_any_val(state->obj_pool(), _children[1]->type()); - } else { - case_state->case_val = create_any_val(state->obj_pool(), TypeDescriptor(TYPE_BOOLEAN)); - case_state->when_val = create_any_val(state->obj_pool(), _children[0]->type()); - } - return Status::OK(); -} - -void CaseExpr::close(RuntimeState* state, ExprContext* ctx, - FunctionContext::FunctionStateScope scope) { - if (_fn_context_index != -1) { - FunctionContext* fn_ctx = ctx->fn_context(_fn_context_index); - void* case_state = fn_ctx->get_function_state(FunctionContext::THREAD_LOCAL); - fn_ctx->free(reinterpret_cast(case_state)); - } - Expr::close(state, ctx, scope); -} - -std::string CaseExpr::debug_string() const { - std::stringstream out; - out << "CaseExpr(has_case_expr=" << _has_case_expr << " has_else_expr=" << _has_else_expr << " " - << Expr::debug_string() << ")"; - return out.str(); -} - -void CaseExpr::get_child_val(int child_idx, ExprContext* ctx, TupleRow* row, AnyVal* dst) { - switch (_children[child_idx]->type().type) { - case TYPE_BOOLEAN: - *reinterpret_cast(dst) = _children[child_idx]->get_boolean_val(ctx, row); - break; - case TYPE_TINYINT: - *reinterpret_cast(dst) = _children[child_idx]->get_tiny_int_val(ctx, row); - break; - case TYPE_SMALLINT: - *reinterpret_cast(dst) = _children[child_idx]->get_small_int_val(ctx, row); - break; - case TYPE_INT: - *reinterpret_cast(dst) = _children[child_idx]->get_int_val(ctx, row); - break; - case TYPE_BIGINT: - *reinterpret_cast(dst) = _children[child_idx]->get_big_int_val(ctx, row); - break; - case TYPE_FLOAT: - *reinterpret_cast(dst) = _children[child_idx]->get_float_val(ctx, row); - break; - case TYPE_DOUBLE: - *reinterpret_cast(dst) = _children[child_idx]->get_double_val(ctx, row); - break; - case TYPE_DATE: - case TYPE_DATETIME: - *reinterpret_cast(dst) = _children[child_idx]->get_datetime_val(ctx, row); - break; - case TYPE_CHAR: - case TYPE_VARCHAR: - case TYPE_HLL: - case TYPE_OBJECT: - case TYPE_QUANTILE_STATE: - case TYPE_STRING: - *reinterpret_cast(dst) = _children[child_idx]->get_string_val(ctx, row); - break; - case TYPE_DECIMALV2: - *reinterpret_cast(dst) = _children[child_idx]->get_decimalv2_val(ctx, row); - break; - case TYPE_LARGEINT: - *reinterpret_cast(dst) = _children[child_idx]->get_large_int_val(ctx, row); - break; - default: - DCHECK(false) << _children[child_idx]->type(); - } -} - -bool CaseExpr::any_val_eq(const TypeDescriptor& type, const AnyVal* v1, const AnyVal* v2) { - switch (type.type) { - case TYPE_BOOLEAN: - return AnyValUtil::equals(type, *reinterpret_cast(v1), - *reinterpret_cast(v2)); - case TYPE_TINYINT: - return AnyValUtil::equals(type, *reinterpret_cast(v1), - *reinterpret_cast(v2)); - case TYPE_SMALLINT: - return AnyValUtil::equals(type, *reinterpret_cast(v1), - *reinterpret_cast(v2)); - case TYPE_INT: - return AnyValUtil::equals(type, *reinterpret_cast(v1), - *reinterpret_cast(v2)); - case TYPE_BIGINT: - return AnyValUtil::equals(type, *reinterpret_cast(v1), - *reinterpret_cast(v2)); - case TYPE_FLOAT: - return AnyValUtil::equals(type, *reinterpret_cast(v1), - *reinterpret_cast(v2)); - case TYPE_DOUBLE: - return AnyValUtil::equals(type, *reinterpret_cast(v1), - *reinterpret_cast(v2)); - case TYPE_DATE: - case TYPE_DATETIME: - return AnyValUtil::equals(type, *reinterpret_cast(v1), - *reinterpret_cast(v2)); - case TYPE_CHAR: - case TYPE_VARCHAR: - case TYPE_HLL: - case TYPE_OBJECT: - case TYPE_QUANTILE_STATE: - case TYPE_STRING: - return AnyValUtil::equals(type, *reinterpret_cast(v1), - *reinterpret_cast(v2)); - case TYPE_DECIMALV2: - return AnyValUtil::equals(type, *reinterpret_cast(v1), - *reinterpret_cast(v2)); - case TYPE_LARGEINT: - return AnyValUtil::equals(type, *reinterpret_cast(v1), - *reinterpret_cast(v2)); - default: - DCHECK(false) << type; - return false; - } -} - -#define CASE_COMPUTE_FN(THEN_TYPE, TYPE_NAME) \ - THEN_TYPE CaseExpr::get_##TYPE_NAME(ExprContext* ctx, TupleRow* row) { \ - FunctionContext* fn_ctx = ctx->fn_context(_fn_context_index); \ - CaseExprState* state = reinterpret_cast( \ - fn_ctx->get_function_state(FunctionContext::THREAD_LOCAL)); \ - DCHECK(state->case_val != nullptr); \ - DCHECK(state->when_val != nullptr); \ - int num_children = _children.size(); \ - if (has_case_expr()) { \ - /* All case and when exprs return the same type */ \ - /* (we guaranteed that during analysis). */ \ - get_child_val(0, ctx, row, state->case_val); \ - } else { \ - /* If there's no case expression, compare the when values to "true". */ \ - *reinterpret_cast(state->case_val) = BooleanVal(true); \ - } \ - if (state->case_val->is_null) { \ - if (has_else_expr()) { \ - /* Return else value. */ \ - return _children[num_children - 1]->get_##TYPE_NAME(ctx, row); \ - } else { \ - return THEN_TYPE::null(); \ - } \ - } \ - int loop_start = has_case_expr() ? 1 : 0; \ - int loop_end = (has_else_expr()) ? num_children - 1 : num_children; \ - for (int i = loop_start; i < loop_end; i += 2) { \ - get_child_val(i, ctx, row, state->when_val); \ - if (state->when_val->is_null) continue; \ - if (any_val_eq(_children[0]->type(), state->case_val, state->when_val)) { \ - /* Return then value. */ \ - return _children[i + 1]->get_##TYPE_NAME(ctx, row); \ - } \ - } \ - if (has_else_expr()) { \ - /* Return else value. */ \ - return _children[num_children - 1]->get_##TYPE_NAME(ctx, row); \ - } \ - return THEN_TYPE::null(); \ - } - -#define CASE_COMPUTE_FN_WRAPPER(TYPE, TYPE_NAME) CASE_COMPUTE_FN(TYPE, TYPE_NAME) - -CASE_COMPUTE_FN_WRAPPER(BooleanVal, boolean_val) -CASE_COMPUTE_FN_WRAPPER(TinyIntVal, tiny_int_val) -CASE_COMPUTE_FN_WRAPPER(SmallIntVal, small_int_val) -CASE_COMPUTE_FN_WRAPPER(IntVal, int_val) -CASE_COMPUTE_FN_WRAPPER(BigIntVal, big_int_val) -CASE_COMPUTE_FN_WRAPPER(LargeIntVal, large_int_val) -CASE_COMPUTE_FN_WRAPPER(FloatVal, float_val) -CASE_COMPUTE_FN_WRAPPER(DoubleVal, double_val) -CASE_COMPUTE_FN_WRAPPER(StringVal, string_val) -CASE_COMPUTE_FN_WRAPPER(DateTimeVal, datetime_val) -CASE_COMPUTE_FN_WRAPPER(DecimalV2Val, decimalv2_val) - -} // namespace doris diff --git a/be/src/exprs/case_expr.h b/be/src/exprs/case_expr.h deleted file mode 100644 index 4f83163f07..0000000000 --- a/be/src/exprs/case_expr.h +++ /dev/null @@ -1,81 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. -// This file is copied from -// https://github.com/apache/impala/blob/branch-2.9.0/be/src/exprs/case-expr.h -// and modified by Doris - -#pragma once - -#include - -#include "common/object_pool.h" -#include "expr.h" - -namespace doris { - -class TExprNode; - -class CaseExpr : public Expr { -public: - virtual ~CaseExpr(); - virtual Expr* clone(ObjectPool* pool) const override { return pool->add(new CaseExpr(*this)); } - virtual BooleanVal get_boolean_val(ExprContext* ctx, TupleRow* row) override; - virtual TinyIntVal get_tiny_int_val(ExprContext* ctx, TupleRow* row) override; - virtual SmallIntVal get_small_int_val(ExprContext* ctx, TupleRow* row) override; - virtual IntVal get_int_val(ExprContext* ctx, TupleRow* row) override; - virtual BigIntVal get_big_int_val(ExprContext* ctx, TupleRow* row) override; - virtual LargeIntVal get_large_int_val(ExprContext* context, TupleRow*) override; - virtual FloatVal get_float_val(ExprContext* ctx, TupleRow* row) override; - virtual DoubleVal get_double_val(ExprContext* ctx, TupleRow* row) override; - virtual StringVal get_string_val(ExprContext* ctx, TupleRow* row) override; - virtual DateTimeVal get_datetime_val(ExprContext* ctx, TupleRow* row) override; - virtual DecimalV2Val get_decimalv2_val(ExprContext* ctx, TupleRow* row) override; - -protected: - friend class Expr; - friend class ComputeFunctions; - friend class ConditionalFunctions; - friend class DecimalOperators; - friend class DecimalV2Operators; - - CaseExpr(const TExprNode& node); - virtual Status prepare(RuntimeState* state, const RowDescriptor& row_desc, - ExprContext* context) override; - virtual Status open(RuntimeState* state, ExprContext* context, - FunctionContext::FunctionStateScope scope) override; - virtual void close(RuntimeState* state, ExprContext* context, - FunctionContext::FunctionStateScope scope) override; - - virtual std::string debug_string() const override; - - bool has_case_expr() { return _has_case_expr; } - - bool has_else_expr() { return _has_else_expr; } - -private: - const bool _has_case_expr; - const bool _has_else_expr; - - /// Populates 'dst' with the result of calling the appropriate Get*Val() function on the - /// specified child expr. - void get_child_val(int child_idx, ExprContext* ctx, TupleRow* row, AnyVal* dst); - - /// Return true iff *v1 == *v2. v1 and v2 should both be of the specified type. - bool any_val_eq(const TypeDescriptor& type, const AnyVal* v1, const AnyVal* v2); -}; - -} // namespace doris diff --git a/be/src/exprs/cast_expr.cpp b/be/src/exprs/cast_expr.cpp deleted file mode 100644 index eaed69faed..0000000000 --- a/be/src/exprs/cast_expr.cpp +++ /dev/null @@ -1,156 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. -// This file is copied from -// https://github.com/apache/impala/blob/branch-2.9.0/be/src/exprs/cast-expr.cpp -// and modified by Doris - -#include "exprs/cast_expr.h" - -namespace doris { - -Expr* CastExpr::from_thrift(const TExprNode& node) { - switch (node.child_type) { - case TPrimitiveType::BOOLEAN: - return new CastBooleanExpr(node); - case TPrimitiveType::TINYINT: - return new CastTinyIntExpr(node); - case TPrimitiveType::SMALLINT: - return new CastSmallIntExpr(node); - case TPrimitiveType::INT: - return new CastIntExpr(node); - case TPrimitiveType::BIGINT: - return new CastBigIntExpr(node); - case TPrimitiveType::LARGEINT: - return new CastLargeIntExpr(node); - case TPrimitiveType::FLOAT: - return new CastFloatExpr(node); - case TPrimitiveType::DOUBLE: - return new CastDoubleExpr(node); - default: - return nullptr; - } - return nullptr; -} - -#define CAST_SAME(CLASS, TYPE, FN) \ - TYPE CLASS::FN(ExprContext* context, TupleRow* row) { return _children[0]->FN(context, row); } - -#define CAST_FUNCTION(CLASS, TO_TYPE, TO_FN, FROM_TYPE, FROM_FN) \ - TO_TYPE CLASS::TO_FN(ExprContext* context, TupleRow* row) { \ - FROM_TYPE v = _children[0]->FROM_FN(context, row); \ - if (v.is_null) { \ - return TO_TYPE::null(); \ - } \ - return TO_TYPE(v.val); \ - } - -#define CAST_FROM_BOOLEAN(TO_TYPE, TO_FN) \ - CAST_FUNCTION(CastBooleanExpr, TO_TYPE, TO_FN, BooleanVal, get_boolean_val) - -CAST_SAME(CastBooleanExpr, BooleanVal, get_boolean_val) -CAST_FROM_BOOLEAN(TinyIntVal, get_tiny_int_val) -CAST_FROM_BOOLEAN(SmallIntVal, get_small_int_val) -CAST_FROM_BOOLEAN(IntVal, get_int_val) -CAST_FROM_BOOLEAN(BigIntVal, get_big_int_val) -CAST_FROM_BOOLEAN(LargeIntVal, get_large_int_val) -CAST_FROM_BOOLEAN(FloatVal, get_float_val) -CAST_FROM_BOOLEAN(DoubleVal, get_double_val) - -#define CAST_FROM_TINYINT(TO_TYPE, TO_FN) \ - CAST_FUNCTION(CastTinyIntExpr, TO_TYPE, TO_FN, TinyIntVal, get_tiny_int_val) - -CAST_SAME(CastTinyIntExpr, TinyIntVal, get_tiny_int_val) -CAST_FROM_TINYINT(BooleanVal, get_boolean_val) -CAST_FROM_TINYINT(SmallIntVal, get_small_int_val) -CAST_FROM_TINYINT(IntVal, get_int_val) -CAST_FROM_TINYINT(BigIntVal, get_big_int_val) -CAST_FROM_TINYINT(LargeIntVal, get_large_int_val) -CAST_FROM_TINYINT(FloatVal, get_float_val) -CAST_FROM_TINYINT(DoubleVal, get_double_val) - -#define CAST_FROM_SMALLINT(TO_TYPE, TO_FN) \ - CAST_FUNCTION(CastSmallIntExpr, TO_TYPE, TO_FN, SmallIntVal, get_small_int_val) - -CAST_SAME(CastSmallIntExpr, SmallIntVal, get_small_int_val) -CAST_FROM_SMALLINT(BooleanVal, get_boolean_val) -CAST_FROM_SMALLINT(TinyIntVal, get_tiny_int_val) -CAST_FROM_SMALLINT(IntVal, get_int_val) -CAST_FROM_SMALLINT(BigIntVal, get_big_int_val) -CAST_FROM_SMALLINT(LargeIntVal, get_large_int_val) -CAST_FROM_SMALLINT(FloatVal, get_float_val) -CAST_FROM_SMALLINT(DoubleVal, get_double_val) - -#define CAST_FROM_INT(TO_TYPE, TO_FN) \ - CAST_FUNCTION(CastIntExpr, TO_TYPE, TO_FN, IntVal, get_int_val) - -CAST_SAME(CastIntExpr, IntVal, get_int_val) -CAST_FROM_INT(BooleanVal, get_boolean_val) -CAST_FROM_INT(TinyIntVal, get_tiny_int_val) -CAST_FROM_INT(SmallIntVal, get_small_int_val) -CAST_FROM_INT(BigIntVal, get_big_int_val) -CAST_FROM_INT(LargeIntVal, get_large_int_val) -CAST_FROM_INT(FloatVal, get_float_val) -CAST_FROM_INT(DoubleVal, get_double_val) - -#define CAST_FROM_BIGINT(TO_TYPE, TO_FN) \ - CAST_FUNCTION(CastBigIntExpr, TO_TYPE, TO_FN, BigIntVal, get_big_int_val) - -CAST_SAME(CastBigIntExpr, BigIntVal, get_big_int_val) -CAST_FROM_BIGINT(BooleanVal, get_boolean_val) -CAST_FROM_BIGINT(TinyIntVal, get_tiny_int_val) -CAST_FROM_BIGINT(SmallIntVal, get_small_int_val) -CAST_FROM_BIGINT(IntVal, get_int_val) -CAST_FROM_BIGINT(LargeIntVal, get_large_int_val) -CAST_FROM_BIGINT(FloatVal, get_float_val) -CAST_FROM_BIGINT(DoubleVal, get_double_val) - -#define CAST_FROM_LARGEINT(TO_TYPE, TO_FN) \ - CAST_FUNCTION(CastLargeIntExpr, TO_TYPE, TO_FN, LargeIntVal, get_large_int_val) - -CAST_SAME(CastLargeIntExpr, LargeIntVal, get_large_int_val) -CAST_FROM_LARGEINT(BooleanVal, get_boolean_val) -CAST_FROM_LARGEINT(TinyIntVal, get_tiny_int_val) -CAST_FROM_LARGEINT(SmallIntVal, get_small_int_val) -CAST_FROM_LARGEINT(IntVal, get_int_val) -CAST_FROM_LARGEINT(BigIntVal, get_big_int_val) -CAST_FROM_LARGEINT(FloatVal, get_float_val) -CAST_FROM_LARGEINT(DoubleVal, get_double_val) - -#define CAST_FROM_FLOAT(TO_TYPE, TO_FN) \ - CAST_FUNCTION(CastFloatExpr, TO_TYPE, TO_FN, FloatVal, get_float_val) - -CAST_SAME(CastFloatExpr, FloatVal, get_float_val) -CAST_FROM_FLOAT(BooleanVal, get_boolean_val) -CAST_FROM_FLOAT(TinyIntVal, get_tiny_int_val) -CAST_FROM_FLOAT(SmallIntVal, get_small_int_val) -CAST_FROM_FLOAT(IntVal, get_int_val) -CAST_FROM_FLOAT(BigIntVal, get_big_int_val) -CAST_FROM_FLOAT(LargeIntVal, get_large_int_val) -CAST_FROM_FLOAT(DoubleVal, get_double_val) - -#define CAST_FROM_DOUBLE(TO_TYPE, TO_FN) \ - CAST_FUNCTION(CastDoubleExpr, TO_TYPE, TO_FN, DoubleVal, get_double_val) - -CAST_SAME(CastDoubleExpr, DoubleVal, get_double_val) -CAST_FROM_DOUBLE(BooleanVal, get_boolean_val) -CAST_FROM_DOUBLE(TinyIntVal, get_tiny_int_val) -CAST_FROM_DOUBLE(SmallIntVal, get_small_int_val) -CAST_FROM_DOUBLE(IntVal, get_int_val) -CAST_FROM_DOUBLE(BigIntVal, get_big_int_val) -CAST_FROM_DOUBLE(LargeIntVal, get_large_int_val) -CAST_FROM_DOUBLE(FloatVal, get_float_val) -} // namespace doris diff --git a/be/src/exprs/cast_expr.h b/be/src/exprs/cast_expr.h deleted file mode 100644 index 099423f619..0000000000 --- a/be/src/exprs/cast_expr.h +++ /dev/null @@ -1,62 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. -// This file is copied from -// https://github.com/apache/impala/blob/branch-2.9.0/be/src/exprs/cast-expr.h -// and modified by Doris - -#pragma once - -#include "common/object_pool.h" -#include "exprs/expr.h" - -namespace doris { - -class CastExpr : public Expr { -public: - CastExpr(const TExprNode& node) : Expr(node) {} - virtual ~CastExpr() {} - static Expr* from_thrift(const TExprNode& node); -}; - -#define CAST_EXPR_DEFINE(CLASS) \ - class CLASS : public CastExpr { \ - public: \ - CLASS(const TExprNode& node) : CastExpr(node) {} \ - virtual ~CLASS() {} \ - virtual Expr* clone(ObjectPool* pool) const override { \ - return pool->add(new CLASS(*this)); \ - } \ - virtual BooleanVal get_boolean_val(ExprContext* context, TupleRow*) override; \ - virtual TinyIntVal get_tiny_int_val(ExprContext* context, TupleRow*) override; \ - virtual SmallIntVal get_small_int_val(ExprContext* context, TupleRow*) override; \ - virtual IntVal get_int_val(ExprContext* context, TupleRow*) override; \ - virtual BigIntVal get_big_int_val(ExprContext* context, TupleRow*) override; \ - virtual LargeIntVal get_large_int_val(ExprContext* context, TupleRow*) override; \ - virtual FloatVal get_float_val(ExprContext* context, TupleRow*) override; \ - virtual DoubleVal get_double_val(ExprContext* context, TupleRow*) override; \ - }; - -CAST_EXPR_DEFINE(CastBooleanExpr); -CAST_EXPR_DEFINE(CastTinyIntExpr); -CAST_EXPR_DEFINE(CastSmallIntExpr); -CAST_EXPR_DEFINE(CastIntExpr); -CAST_EXPR_DEFINE(CastBigIntExpr); -CAST_EXPR_DEFINE(CastLargeIntExpr); -CAST_EXPR_DEFINE(CastFloatExpr); -CAST_EXPR_DEFINE(CastDoubleExpr); - -} // namespace doris diff --git a/be/src/exprs/cast_functions.h b/be/src/exprs/cast_functions.h index df370fdba2..e9f8bee2b6 100644 --- a/be/src/exprs/cast_functions.h +++ b/be/src/exprs/cast_functions.h @@ -24,6 +24,20 @@ namespace doris { +using doris_udf::FunctionContext; +using doris_udf::BooleanVal; +using doris_udf::TinyIntVal; +using doris_udf::SmallIntVal; +using doris_udf::IntVal; +using doris_udf::BigIntVal; +using doris_udf::LargeIntVal; +using doris_udf::FloatVal; +using doris_udf::DoubleVal; +using doris_udf::DecimalV2Val; +using doris_udf::DateTimeVal; +using doris_udf::StringVal; +using doris_udf::AnyVal; + class CastFunctions { public: static void init(); diff --git a/be/src/exprs/compound_predicate.cpp b/be/src/exprs/compound_predicate.cpp deleted file mode 100644 index e4d5cbb213..0000000000 --- a/be/src/exprs/compound_predicate.cpp +++ /dev/null @@ -1,84 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. -// This file is copied from -// https://github.com/apache/impala/blob/branch-2.9.0/be/src/exprs/compound-predicate.cc -// and modified by Doris - -#include "exprs/compound_predicate.h" - -#include - -namespace doris { - -CompoundPredicate::CompoundPredicate(const TExprNode& node) : Predicate(node) {} - -void CompoundPredicate::init() {} - -BooleanVal CompoundPredicate::compound_not(FunctionContext* context, const BooleanVal& v) { - if (v.is_null) { - return BooleanVal::null(); - } - return BooleanVal(!v.val); -} - -BooleanVal AndPredicate::get_boolean_val(ExprContext* context, TupleRow* row) { - DCHECK_EQ(_children.size(), 2); - BooleanVal val1 = _children[0]->get_boolean_val(context, row); - if (!val1.is_null && !val1.val) { - return BooleanVal(false); - } - BooleanVal val2 = _children[1]->get_boolean_val(context, row); - if (!val2.is_null && !val2.val) { - return BooleanVal(false); - } - if (val1.is_null || val2.is_null) { - return BooleanVal::null(); - } - return BooleanVal(true); -} - -BooleanVal OrPredicate::get_boolean_val(ExprContext* context, TupleRow* row) { - DCHECK_EQ(_children.size(), 2); - BooleanVal val1 = _children[0]->get_boolean_val(context, row); - if (!val1.is_null && val1.val) { - return BooleanVal(true); - } - BooleanVal val2 = _children[1]->get_boolean_val(context, row); - if (!val2.is_null && val2.val) { - return BooleanVal(true); - } - if (val1.is_null || val2.is_null) { - return BooleanVal::null(); - } - return BooleanVal(false); -} - -BooleanVal NotPredicate::get_boolean_val(ExprContext* context, TupleRow* row) { - BooleanVal val = _children[0]->get_boolean_val(context, row); - if (val.is_null) { - return BooleanVal::null(); - } - return BooleanVal(!val.val); -} - -std::string CompoundPredicate::debug_string() const { - std::stringstream out; - out << "CompoundPredicate(" << Expr::debug_string() << ")"; - return out.str(); -} - -} // namespace doris diff --git a/be/src/exprs/compound_predicate.h b/be/src/exprs/compound_predicate.h deleted file mode 100644 index 28aadb37ef..0000000000 --- a/be/src/exprs/compound_predicate.h +++ /dev/null @@ -1,115 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. -// This file is copied from -// https://github.com/apache/impala/blob/branch-2.9.0/be/src/exprs/compound-predicate.h -// and modified by Doris - -#pragma once - -#include - -#include "common/object_pool.h" -#include "exprs/predicate.h" -#include "gen_cpp/Exprs_types.h" - -namespace doris { - -class CompoundPredicate : public Predicate { -public: - static void init(); - static BooleanVal compound_not(FunctionContext* context, const BooleanVal&); - -protected: - friend class Expr; - - CompoundPredicate(const TExprNode& node); - - // virtual Status prepare(RuntimeState* state, const RowDescriptor& desc); - virtual std::string debug_string() const; - - virtual bool is_vectorized() const { return false; } - -private: - friend class OpcodeRegistry; -}; - -/// Expr for evaluating and (&&) operators -class AndPredicate : public CompoundPredicate { -public: - virtual Expr* clone(ObjectPool* pool) const override { - return pool->add(new AndPredicate(*this)); - } - virtual doris_udf::BooleanVal get_boolean_val(ExprContext* context, TupleRow*) override; - -protected: - friend class Expr; - AndPredicate(const TExprNode& node) : CompoundPredicate(node) {} - - virtual std::string debug_string() const override { - std::stringstream out; - out << "AndPredicate(" << Expr::debug_string() << ")"; - return out.str(); - } - -private: - friend class OpcodeRegistry; -}; - -/// Expr for evaluating or (||) operators -class OrPredicate : public CompoundPredicate { -public: - virtual Expr* clone(ObjectPool* pool) const override { - return pool->add(new OrPredicate(*this)); - } - virtual doris_udf::BooleanVal get_boolean_val(ExprContext* context, TupleRow*) override; - -protected: - friend class Expr; - OrPredicate(const TExprNode& node) : CompoundPredicate(node) {} - - virtual std::string debug_string() const override { - std::stringstream out; - out << "OrPredicate(" << Expr::debug_string() << ")"; - return out.str(); - } - -private: - friend class OpcodeRegistry; -}; - -/// Expr for evaluating or (||) operators -class NotPredicate : public CompoundPredicate { -public: - virtual Expr* clone(ObjectPool* pool) const override { - return pool->add(new NotPredicate(*this)); - } - virtual doris_udf::BooleanVal get_boolean_val(ExprContext* context, TupleRow*) override; - -protected: - friend class Expr; - NotPredicate(const TExprNode& node) : CompoundPredicate(node) {} - - virtual std::string debug_string() const override { - std::stringstream out; - out << "NotPredicate(" << Expr::debug_string() << ")"; - return out.str(); - } - -private: - friend class OpcodeRegistry; -}; -} // namespace doris diff --git a/be/src/exprs/conditional_functions.cpp b/be/src/exprs/conditional_functions.cpp deleted file mode 100644 index 7c76c20525..0000000000 --- a/be/src/exprs/conditional_functions.cpp +++ /dev/null @@ -1,132 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. -// This file is copied from -// https://github.com/apache/impala/blob/branch-2.9.0/be/src/exprs/conditional-functions.cc -// and modified by Doris - -#include "exprs/conditional_functions.h" - -#include "exprs/anyval_util.h" -#include "exprs/expr.h" -#include "runtime/tuple_row.h" -#include "udf/udf.h" - -namespace doris { - -#define CTOR_DCTOR_FUN(expr_class) \ - expr_class::expr_class(const TExprNode& node) : Expr(node) {}; \ - \ - expr_class::~expr_class() {}; - -CTOR_DCTOR_FUN(IfNullExpr); -CTOR_DCTOR_FUN(NullIfExpr); -CTOR_DCTOR_FUN(IfExpr); -CTOR_DCTOR_FUN(CoalesceExpr); - -#define IF_NULL_COMPUTE_FUNCTION(type, type_name) \ - type IfNullExpr::get_##type_name(ExprContext* context, TupleRow* row) { \ - DCHECK_EQ(_children.size(), 2); \ - type val = _children[0]->get_##type_name(context, row); \ - if (!val.is_null) return val; /* short-circuit */ \ - return _children[1]->get_##type_name(context, row); \ - } - -IF_NULL_COMPUTE_FUNCTION(BooleanVal, boolean_val); -IF_NULL_COMPUTE_FUNCTION(TinyIntVal, tiny_int_val); -IF_NULL_COMPUTE_FUNCTION(SmallIntVal, small_int_val); -IF_NULL_COMPUTE_FUNCTION(IntVal, int_val); -IF_NULL_COMPUTE_FUNCTION(BigIntVal, big_int_val); -IF_NULL_COMPUTE_FUNCTION(FloatVal, float_val); -IF_NULL_COMPUTE_FUNCTION(DoubleVal, double_val); -IF_NULL_COMPUTE_FUNCTION(StringVal, string_val); -IF_NULL_COMPUTE_FUNCTION(DateTimeVal, datetime_val); -IF_NULL_COMPUTE_FUNCTION(DecimalV2Val, decimalv2_val); -IF_NULL_COMPUTE_FUNCTION(LargeIntVal, large_int_val); - -#define NULL_IF_COMPUTE_FUNCTION(TYPE, type_name) \ - TYPE NullIfExpr::get_##type_name(ExprContext* ctx, TupleRow* row) { \ - DCHECK_EQ(_children.size(), 2); \ - TYPE lhs_val = _children[0]->get_##type_name(ctx, row); \ - /* Short-circuit in case lhs_val is nullptr. Can never be equal to RHS. */ \ - if (lhs_val.is_null) return TYPE::null(); \ - /* Get rhs and return nullptr if lhs == rhs, lhs otherwise */ \ - TYPE rhs_val = _children[1]->get_##type_name(ctx, row); \ - if (!rhs_val.is_null && AnyValUtil::equals(_children[0]->type(), lhs_val, rhs_val)) { \ - return TYPE::null(); \ - } \ - return lhs_val; \ - } - -// Just for code check..... -#define NULL_IF_COMPUTE_FUNCTION_WRAPPER(TYPE, type_name) NULL_IF_COMPUTE_FUNCTION(TYPE, type_name) - -NULL_IF_COMPUTE_FUNCTION_WRAPPER(BooleanVal, boolean_val); -NULL_IF_COMPUTE_FUNCTION_WRAPPER(TinyIntVal, tiny_int_val); -NULL_IF_COMPUTE_FUNCTION_WRAPPER(SmallIntVal, small_int_val); -NULL_IF_COMPUTE_FUNCTION_WRAPPER(IntVal, int_val); -NULL_IF_COMPUTE_FUNCTION_WRAPPER(BigIntVal, big_int_val); -NULL_IF_COMPUTE_FUNCTION_WRAPPER(FloatVal, float_val); -NULL_IF_COMPUTE_FUNCTION_WRAPPER(DoubleVal, double_val); -NULL_IF_COMPUTE_FUNCTION_WRAPPER(StringVal, string_val); -NULL_IF_COMPUTE_FUNCTION_WRAPPER(DateTimeVal, datetime_val); -NULL_IF_COMPUTE_FUNCTION_WRAPPER(DecimalV2Val, decimalv2_val); -NULL_IF_COMPUTE_FUNCTION_WRAPPER(LargeIntVal, large_int_val); - -#define IF_COMPUTE_FUNCTION(type, type_name) \ - type IfExpr::get_##type_name(ExprContext* context, TupleRow* row) { \ - DCHECK_EQ(_children.size(), 3); \ - BooleanVal cond = _children[0]->get_boolean_val(context, row); \ - if (cond.is_null || !cond.val) { \ - return _children[2]->get_##type_name(context, row); \ - } \ - return _children[1]->get_##type_name(context, row); \ - } - -IF_COMPUTE_FUNCTION(BooleanVal, boolean_val); -IF_COMPUTE_FUNCTION(TinyIntVal, tiny_int_val); -IF_COMPUTE_FUNCTION(SmallIntVal, small_int_val); -IF_COMPUTE_FUNCTION(IntVal, int_val); -IF_COMPUTE_FUNCTION(BigIntVal, big_int_val); -IF_COMPUTE_FUNCTION(FloatVal, float_val); -IF_COMPUTE_FUNCTION(DoubleVal, double_val); -IF_COMPUTE_FUNCTION(StringVal, string_val); -IF_COMPUTE_FUNCTION(DateTimeVal, datetime_val); -IF_COMPUTE_FUNCTION(DecimalV2Val, decimalv2_val); -IF_COMPUTE_FUNCTION(LargeIntVal, large_int_val); - -#define COALESCE_COMPUTE_FUNCTION(type, type_name) \ - type CoalesceExpr::get_##type_name(ExprContext* context, TupleRow* row) { \ - DCHECK_GE(_children.size(), 1); \ - for (int i = 0; i < _children.size(); ++i) { \ - type val = _children[i]->get_##type_name(context, row); \ - if (!val.is_null) return val; \ - } \ - return type::null(); \ - } - -COALESCE_COMPUTE_FUNCTION(BooleanVal, boolean_val); -COALESCE_COMPUTE_FUNCTION(TinyIntVal, tiny_int_val); -COALESCE_COMPUTE_FUNCTION(SmallIntVal, small_int_val); -COALESCE_COMPUTE_FUNCTION(IntVal, int_val); -COALESCE_COMPUTE_FUNCTION(BigIntVal, big_int_val); -COALESCE_COMPUTE_FUNCTION(FloatVal, float_val); -COALESCE_COMPUTE_FUNCTION(DoubleVal, double_val); -COALESCE_COMPUTE_FUNCTION(StringVal, string_val); -COALESCE_COMPUTE_FUNCTION(DateTimeVal, datetime_val); -COALESCE_COMPUTE_FUNCTION(DecimalV2Val, decimalv2_val); -COALESCE_COMPUTE_FUNCTION(LargeIntVal, large_int_val); -} // namespace doris diff --git a/be/src/exprs/conditional_functions.h b/be/src/exprs/conditional_functions.h deleted file mode 100644 index cf88e1f134..0000000000 --- a/be/src/exprs/conditional_functions.h +++ /dev/null @@ -1,139 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. -// This file is copied from -// https://github.com/apache/impala/blob/branch-2.9.0/be/src/exprs/conditional-functions.h -// and modified by Doris - -#pragma once - -#include - -#include "common/object_pool.h" -#include "exprs/expr.h" -#include "udf/udf.h" - -namespace doris { - -class TupleRow; - -class ConditionalFunctions { -public: -}; - -/// The following conditional functions require separate Expr classes to take advantage of -/// short circuiting - -class IfNullExpr : public Expr { -public: - virtual ~IfNullExpr(); - virtual Expr* clone(ObjectPool* pool) const override { - return pool->add(new IfNullExpr(*this)); - } - virtual BooleanVal get_boolean_val(ExprContext* context, TupleRow* row) override; - virtual TinyIntVal get_tiny_int_val(ExprContext* context, TupleRow* row) override; - virtual SmallIntVal get_small_int_val(ExprContext* context, TupleRow* row) override; - virtual IntVal get_int_val(ExprContext* context, TupleRow* row) override; - virtual BigIntVal get_big_int_val(ExprContext* context, TupleRow* row) override; - virtual FloatVal get_float_val(ExprContext* context, TupleRow* row) override; - virtual DoubleVal get_double_val(ExprContext* context, TupleRow* row) override; - virtual StringVal get_string_val(ExprContext* context, TupleRow* row) override; - virtual DateTimeVal get_datetime_val(ExprContext* context, TupleRow* row) override; - virtual DecimalV2Val get_decimalv2_val(ExprContext* context, TupleRow* row) override; - virtual LargeIntVal get_large_int_val(ExprContext* context, TupleRow* row) override; - - virtual std::string debug_string() const override { return Expr::debug_string("IfNullExpr"); } - -protected: - friend class Expr; - IfNullExpr(const TExprNode& node); -}; - -class NullIfExpr : public Expr { -public: - virtual ~NullIfExpr(); - virtual Expr* clone(ObjectPool* pool) const override { - return pool->add(new NullIfExpr(*this)); - } - virtual BooleanVal get_boolean_val(ExprContext* context, TupleRow* row) override; - virtual TinyIntVal get_tiny_int_val(ExprContext* context, TupleRow* row) override; - virtual SmallIntVal get_small_int_val(ExprContext* context, TupleRow* row) override; - virtual IntVal get_int_val(ExprContext* context, TupleRow* row) override; - virtual BigIntVal get_big_int_val(ExprContext* context, TupleRow* row) override; - virtual FloatVal get_float_val(ExprContext* context, TupleRow* row) override; - virtual DoubleVal get_double_val(ExprContext* context, TupleRow* row) override; - virtual StringVal get_string_val(ExprContext* context, TupleRow* row) override; - virtual DateTimeVal get_datetime_val(ExprContext* context, TupleRow* row) override; - virtual DecimalV2Val get_decimalv2_val(ExprContext* context, TupleRow* row) override; - virtual LargeIntVal get_large_int_val(ExprContext* context, TupleRow* row) override; - - virtual std::string debug_string() const override { return Expr::debug_string("NullIfExpr"); } - -protected: - friend class Expr; - NullIfExpr(const TExprNode& node); -}; - -class IfExpr : public Expr { -public: - virtual ~IfExpr(); - virtual Expr* clone(ObjectPool* pool) const override { return pool->add(new IfExpr(*this)); } - virtual BooleanVal get_boolean_val(ExprContext* context, TupleRow* row) override; - virtual TinyIntVal get_tiny_int_val(ExprContext* context, TupleRow* row) override; - virtual SmallIntVal get_small_int_val(ExprContext* context, TupleRow* row) override; - virtual IntVal get_int_val(ExprContext* context, TupleRow* row) override; - virtual BigIntVal get_big_int_val(ExprContext* context, TupleRow* row) override; - virtual FloatVal get_float_val(ExprContext* context, TupleRow* row) override; - virtual DoubleVal get_double_val(ExprContext* context, TupleRow* row) override; - virtual StringVal get_string_val(ExprContext* context, TupleRow* row) override; - virtual DateTimeVal get_datetime_val(ExprContext* context, TupleRow* row) override; - virtual DecimalV2Val get_decimalv2_val(ExprContext* context, TupleRow* row) override; - virtual LargeIntVal get_large_int_val(ExprContext* context, TupleRow* row) override; - - virtual std::string debug_string() const override { return Expr::debug_string("IfExpr"); } - -protected: - friend class Expr; - IfExpr(const TExprNode& node); -}; - -// Returns the first non-nullptr value in the list, or nullptr if there are no non-nullptr values. -class CoalesceExpr : public Expr { -public: - virtual ~CoalesceExpr(); - virtual Expr* clone(ObjectPool* pool) const override { - return pool->add(new CoalesceExpr(*this)); - } - virtual BooleanVal get_boolean_val(ExprContext* context, TupleRow* row) override; - virtual TinyIntVal get_tiny_int_val(ExprContext* context, TupleRow* row) override; - virtual SmallIntVal get_small_int_val(ExprContext* context, TupleRow* row) override; - virtual IntVal get_int_val(ExprContext* context, TupleRow* row) override; - virtual BigIntVal get_big_int_val(ExprContext* context, TupleRow* row) override; - virtual FloatVal get_float_val(ExprContext* context, TupleRow* row) override; - virtual DoubleVal get_double_val(ExprContext* context, TupleRow* row) override; - virtual StringVal get_string_val(ExprContext* context, TupleRow* row) override; - virtual DateTimeVal get_datetime_val(ExprContext* context, TupleRow* row) override; - virtual DecimalV2Val get_decimalv2_val(ExprContext* context, TupleRow* row) override; - virtual LargeIntVal get_large_int_val(ExprContext* context, TupleRow* row) override; - - virtual std::string debug_string() const override { return Expr::debug_string("CoalesceExpr"); } - -protected: - friend class Expr; - CoalesceExpr(const TExprNode& node); -}; - -} // namespace doris diff --git a/be/src/exprs/create_predicate_function.h b/be/src/exprs/create_predicate_function.h index 3178b7a970..d02b163980 100644 --- a/be/src/exprs/create_predicate_function.h +++ b/be/src/exprs/create_predicate_function.h @@ -17,7 +17,6 @@ #pragma once -#include "exprs/bloomfilter_predicate.h" #include "exprs/hybrid_set.h" #include "exprs/minmax_predicate.h" #include "olap/bitmap_filter_predicate.h" diff --git a/be/src/exprs/decimalv2_operators.cpp b/be/src/exprs/decimalv2_operators.cpp deleted file mode 100644 index f56d130d30..0000000000 --- a/be/src/exprs/decimalv2_operators.cpp +++ /dev/null @@ -1,214 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -#include "exprs/decimalv2_operators.h" - -#include - -#include -#include - -#include "exprs/anyval_util.h" - -namespace doris { - -void DecimalV2Operators::init() {} - -#define CAST_INT_TO_DECIMAL(from_type) \ - DecimalV2Val DecimalV2Operators::cast_to_decimalv2_val(FunctionContext* context, \ - const from_type& val) { \ - if (val.is_null) return DecimalV2Val::null(); \ - DecimalV2Value dv(val.val, 0); \ - DecimalV2Val result; \ - dv.to_decimal_val(&result); \ - return result; \ - } - -#define CAST_INT_TO_DECIMALS() \ - CAST_INT_TO_DECIMAL(TinyIntVal); \ - CAST_INT_TO_DECIMAL(SmallIntVal); \ - CAST_INT_TO_DECIMAL(IntVal); \ - CAST_INT_TO_DECIMAL(BigIntVal); \ - CAST_INT_TO_DECIMAL(LargeIntVal); - -CAST_INT_TO_DECIMALS(); - -DecimalV2Val DecimalV2Operators::cast_to_decimalv2_val(FunctionContext* context, - const FloatVal& val) { - if (val.is_null) { - return DecimalV2Val::null(); - } - DecimalV2Value dv(0); - dv.assign_from_float(val.val); - DecimalV2Val result; - dv.to_decimal_val(&result); - return result; -} - -DecimalV2Val DecimalV2Operators::cast_to_decimalv2_val(FunctionContext* context, - const DoubleVal& val) { - if (val.is_null) { - return DecimalV2Val::null(); - } - DecimalV2Value dv(0); - dv.assign_from_double(val.val); - DecimalV2Val result; - dv.to_decimal_val(&result); - return result; -} - -DecimalV2Val DecimalV2Operators::cast_to_decimalv2_val(FunctionContext* context, - const DateTimeVal& val) { - if (val.is_null) { - return DecimalV2Val::null(); - } - - DateTimeValue dt_value = DateTimeValue::from_datetime_val(val); - DecimalV2Value dv(dt_value.to_int64(), 0); - DecimalV2Val result; - dv.to_decimal_val(&result); - return result; -} - -DecimalV2Val DecimalV2Operators::cast_to_decimalv2_val(FunctionContext* context, - const StringVal& val) { - if (val.is_null) { - return DecimalV2Val::null(); - } - DecimalV2Value dv(0); - if (dv.parse_from_str((const char*)val.ptr, val.len)) { - return DecimalV2Val::null(); - } - DecimalV2Val result; - dv.to_decimal_val(&result); - return result; -} - -#define CAST_DECIMAL_TO_INT(to_type, type_name) \ - to_type DecimalV2Operators::cast_to_##type_name(FunctionContext* context, \ - const DecimalV2Val& val) { \ - if (val.is_null) return to_type::null(); \ - DecimalV2Value dv = DecimalV2Value::from_decimal_val(val); \ - return to_type(dv); \ - } - -#define CAST_FROM_DECIMAL() \ - CAST_DECIMAL_TO_INT(BooleanVal, boolean_val); \ - CAST_DECIMAL_TO_INT(TinyIntVal, tiny_int_val); \ - CAST_DECIMAL_TO_INT(SmallIntVal, small_int_val); \ - CAST_DECIMAL_TO_INT(IntVal, int_val); \ - CAST_DECIMAL_TO_INT(BigIntVal, big_int_val); \ - CAST_DECIMAL_TO_INT(LargeIntVal, large_int_val); \ - CAST_DECIMAL_TO_INT(FloatVal, float_val); \ - CAST_DECIMAL_TO_INT(DoubleVal, double_val); - -CAST_FROM_DECIMAL(); - -StringVal DecimalV2Operators::cast_to_string_val(FunctionContext* ctx, const DecimalV2Val& val) { - if (val.is_null) { - return StringVal::null(); - } - const DecimalV2Value& dv = DecimalV2Value::from_decimal_val(val); - return AnyValUtil::from_string_temp(ctx, dv.to_string()); -} - -DateTimeVal DecimalV2Operators::cast_to_datetime_val(FunctionContext* context, - const DecimalV2Val& val) { - if (val.is_null) { - return DateTimeVal::null(); - } - const DecimalV2Value& dv = DecimalV2Value::from_decimal_val(val); - DateTimeValue dt; - if (!dt.from_date_int64(dv)) { - return DateTimeVal::null(); - } - DateTimeVal result; - dt.to_datetime_val(&result); - return result; -} - -DateTimeVal DecimalV2Operators::cast_to_date_val(FunctionContext* context, - const DecimalV2Val& val) { - if (val.is_null) { - return DateTimeVal::null(); - } - - // convert from DecimalV2Val to DecimalV2Value for calculation - const DecimalV2Value& dv = DecimalV2Value::from_decimal_val(val); - DateTimeValue dt; - if (!dt.from_date_int64(dv)) { - return DateTimeVal::null(); - } - dt.cast_to_date(); - DateTimeVal result; - dt.to_datetime_val(&result); - return result; -} - -#define DECIMAL_ARITHMETIC_OP(FN_NAME, OP) \ - DecimalV2Val DecimalV2Operators::FN_NAME##_decimalv2_val_decimalv2_val( \ - FunctionContext* context, const DecimalV2Val& v1, const DecimalV2Val& v2) { \ - if (v1.is_null || v2.is_null) return DecimalV2Val::null(); \ - DecimalV2Value iv1 = DecimalV2Value::from_decimal_val(v1); \ - DecimalV2Value iv2 = DecimalV2Value::from_decimal_val(v2); \ - DecimalV2Value ir = iv1 OP iv2; \ - DecimalV2Val result; \ - ir.to_decimal_val(&result); \ - return result; \ - } - -#define DECIMAL_ARITHMETIC_OP_DIVIDE(FN_NAME, OP) \ - DecimalV2Val DecimalV2Operators::FN_NAME##_decimalv2_val_decimalv2_val( \ - FunctionContext* context, const DecimalV2Val& v1, const DecimalV2Val& v2) { \ - if (v1.is_null || v2.is_null || v2.value() == 0) return DecimalV2Val::null(); \ - DecimalV2Value iv1 = DecimalV2Value::from_decimal_val(v1); \ - DecimalV2Value iv2 = DecimalV2Value::from_decimal_val(v2); \ - DecimalV2Value ir = iv1 OP iv2; \ - DecimalV2Val result; \ - ir.to_decimal_val(&result); \ - return result; \ - } - -#define DECIMAL_ARITHMETIC_OPS() \ - DECIMAL_ARITHMETIC_OP(add, +); \ - DECIMAL_ARITHMETIC_OP(subtract, -); \ - DECIMAL_ARITHMETIC_OP(multiply, *); \ - DECIMAL_ARITHMETIC_OP_DIVIDE(divide, /); \ - DECIMAL_ARITHMETIC_OP_DIVIDE(mod, %); - -DECIMAL_ARITHMETIC_OPS(); - -#define DECIMAL_BINARY_PREDICATE_NONNUMERIC_FN(NAME, OP) \ - BooleanVal DecimalV2Operators::NAME##_decimalv2_val_decimalv2_val( \ - FunctionContext* c, const DecimalV2Val& v1, const DecimalV2Val& v2) { \ - if (v1.is_null || v2.is_null) return BooleanVal::null(); \ - DecimalV2Value iv1 = DecimalV2Value::from_decimal_val(v1); \ - DecimalV2Value iv2 = DecimalV2Value::from_decimal_val(v2); \ - return BooleanVal(iv1 OP iv2); \ - } - -#define BINARY_PREDICATE_NONNUMERIC_FNS() \ - DECIMAL_BINARY_PREDICATE_NONNUMERIC_FN(eq, ==); \ - DECIMAL_BINARY_PREDICATE_NONNUMERIC_FN(ne, !=); \ - DECIMAL_BINARY_PREDICATE_NONNUMERIC_FN(gt, >); \ - DECIMAL_BINARY_PREDICATE_NONNUMERIC_FN(lt, <); \ - DECIMAL_BINARY_PREDICATE_NONNUMERIC_FN(ge, >=); \ - DECIMAL_BINARY_PREDICATE_NONNUMERIC_FN(le, <=); - -BINARY_PREDICATE_NONNUMERIC_FNS(); - -} // namespace doris diff --git a/be/src/exprs/decimalv2_operators.h b/be/src/exprs/decimalv2_operators.h deleted file mode 100644 index a1f6c73950..0000000000 --- a/be/src/exprs/decimalv2_operators.h +++ /dev/null @@ -1,83 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -#pragma once - -#include - -#include "udf/udf.h" - -namespace doris { - -class Expr; -struct ExprValue; -class TupleRow; - -/// Implementation of the decimal operators. These include the cast, -/// arithmetic and binary operators. -class DecimalV2Operators { -public: - static void init(); - - static DecimalV2Val cast_to_decimalv2_val(FunctionContext*, const TinyIntVal&); - static DecimalV2Val cast_to_decimalv2_val(FunctionContext*, const SmallIntVal&); - static DecimalV2Val cast_to_decimalv2_val(FunctionContext*, const IntVal&); - static DecimalV2Val cast_to_decimalv2_val(FunctionContext*, const BigIntVal&); - static DecimalV2Val cast_to_decimalv2_val(FunctionContext*, const LargeIntVal&); - static DecimalV2Val cast_to_decimalv2_val(FunctionContext*, const FloatVal&); - static DecimalV2Val cast_to_decimalv2_val(FunctionContext*, const DoubleVal&); - static DecimalV2Val cast_to_decimalv2_val(FunctionContext*, const DateTimeVal&); - static DecimalV2Val cast_to_decimalv2_val(FunctionContext*, const StringVal&); - - static BooleanVal cast_to_boolean_val(FunctionContext*, const DecimalV2Val&); - static TinyIntVal cast_to_tiny_int_val(FunctionContext*, const DecimalV2Val&); - static SmallIntVal cast_to_small_int_val(FunctionContext*, const DecimalV2Val&); - static IntVal cast_to_int_val(FunctionContext*, const DecimalV2Val&); - static BigIntVal cast_to_big_int_val(FunctionContext*, const DecimalV2Val&); - static LargeIntVal cast_to_large_int_val(FunctionContext*, const DecimalV2Val&); - static FloatVal cast_to_float_val(FunctionContext*, const DecimalV2Val&); - static DoubleVal cast_to_double_val(FunctionContext*, const DecimalV2Val&); - static StringVal cast_to_string_val(FunctionContext*, const DecimalV2Val&); - static DateTimeVal cast_to_datetime_val(FunctionContext*, const DecimalV2Val&); - static DateTimeVal cast_to_date_val(FunctionContext*, const DecimalV2Val&); - - static DecimalV2Val add_decimalv2_val_decimalv2_val(FunctionContext*, const DecimalV2Val&, - const DecimalV2Val&); - static DecimalV2Val subtract_decimalv2_val_decimalv2_val(FunctionContext*, const DecimalV2Val&, - const DecimalV2Val&); - static DecimalV2Val multiply_decimalv2_val_decimalv2_val(FunctionContext*, const DecimalV2Val&, - const DecimalV2Val&); - static DecimalV2Val divide_decimalv2_val_decimalv2_val(FunctionContext*, const DecimalV2Val&, - const DecimalV2Val&); - static DecimalV2Val mod_decimalv2_val_decimalv2_val(FunctionContext*, const DecimalV2Val&, - const DecimalV2Val&); - - static BooleanVal eq_decimalv2_val_decimalv2_val(FunctionContext*, const DecimalV2Val&, - const DecimalV2Val&); - static BooleanVal ne_decimalv2_val_decimalv2_val(FunctionContext*, const DecimalV2Val&, - const DecimalV2Val&); - static BooleanVal gt_decimalv2_val_decimalv2_val(FunctionContext*, const DecimalV2Val&, - const DecimalV2Val&); - static BooleanVal lt_decimalv2_val_decimalv2_val(FunctionContext*, const DecimalV2Val&, - const DecimalV2Val&); - static BooleanVal ge_decimalv2_val_decimalv2_val(FunctionContext*, const DecimalV2Val&, - const DecimalV2Val&); - static BooleanVal le_decimalv2_val_decimalv2_val(FunctionContext*, const DecimalV2Val&, - const DecimalV2Val&); -}; - -} // namespace doris diff --git a/be/src/exprs/encryption_functions.h b/be/src/exprs/encryption_functions.h index 1f72b96eb2..832f16f13f 100644 --- a/be/src/exprs/encryption_functions.h +++ b/be/src/exprs/encryption_functions.h @@ -25,9 +25,6 @@ namespace doris { -class Expr; -struct ExprValue; -class TupleRow; inline StringCaseUnorderedMap aes_mode_map { {"AES_128_ECB", EncryptionMode::AES_128_ECB}, {"AES_192_ECB", EncryptionMode::AES_192_ECB}, diff --git a/be/src/exprs/es_functions.cpp b/be/src/exprs/es_functions.cpp deleted file mode 100644 index 4da1e19c0c..0000000000 --- a/be/src/exprs/es_functions.cpp +++ /dev/null @@ -1,29 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -#include "exprs/es_functions.h" - -namespace doris { - -void ESFunctions::init() {} - -BooleanVal ESFunctions::match(FunctionContext* ctx, const StringVal& col, - const StringVal& condition) { - return BooleanVal(true); -} - -} // namespace doris diff --git a/be/src/exprs/es_functions.h b/be/src/exprs/es_functions.h deleted file mode 100644 index 0a6498f062..0000000000 --- a/be/src/exprs/es_functions.h +++ /dev/null @@ -1,38 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -#pragma once - -#include "udf/udf.h" - -namespace doris { - -class Expr; -class OpcodeRegistry; -class TupleRow; - -class ESFunctions { -public: - static void init(); - - // used to push down query conditions to es. - static doris_udf::BooleanVal match(doris_udf::FunctionContext* ctx, - const doris_udf::StringVal& col, - const doris_udf::StringVal& condition); -}; - -} // namespace doris diff --git a/be/src/exprs/expr.cpp b/be/src/exprs/expr.cpp deleted file mode 100644 index 1fee250153..0000000000 --- a/be/src/exprs/expr.cpp +++ /dev/null @@ -1,995 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. -// This file is copied from -// https://github.com/apache/impala/blob/branch-2.9.0/be/src/exprs/expr.cpp -// and modified by Doris - -#include "exprs/expr.h" - -#include - -#include -#include - -#include "common/object_pool.h" -#include "common/status.h" -#include "exprs/aggregate_functions.h" -#include "exprs/anyval_util.h" -#include "exprs/arithmetic_expr.h" -#include "exprs/binary_predicate.h" -#include "exprs/case_expr.h" -#include "exprs/cast_expr.h" -#include "exprs/compound_predicate.h" -#include "exprs/conditional_functions.h" -#include "exprs/expr_context.h" -#include "exprs/in_predicate.h" -#include "exprs/info_func.h" -#include "exprs/literal.h" -#include "exprs/match_predicate.h" -#include "exprs/null_literal.h" -#include "exprs/rpc_fn_call.h" -#include "exprs/scalar_fn_call.h" -#include "exprs/slot_ref.h" -#include "exprs/tuple_is_null_predicate.h" -#include "gen_cpp/Exprs_types.h" -#include "runtime/primitive_type.h" -#include "runtime/runtime_state.h" -#include "runtime/user_function_cache.h" - -using std::vector; -namespace doris { - -const char* Expr::_s_get_constant_symbol_prefix = "_ZN4doris4Expr12get_constant"; - -template -bool parse_string(const std::string& str, T* val) { - std::stringstream stream(str); - stream >> *val; - return !stream.fail(); -} - -void init_builtins_dummy() { - // Call one function from each of the classes to pull all the symbols - // from that class in. - // TODO: is there a better way to do this? - AggregateFunctions::init_null(nullptr, nullptr); -} - -FunctionContext* Expr::register_function_context(ExprContext* ctx, RuntimeState* state, - int varargs_buffer_size) { - FunctionContext::TypeDesc return_type = AnyValUtil::column_type_to_type_desc(_type); - std::vector arg_types; - for (int i = 0; i < _children.size(); ++i) { - arg_types.push_back(AnyValUtil::column_type_to_type_desc(_children[i]->_type)); - } - _fn_context_index = ctx->register_func(state, return_type, arg_types, varargs_buffer_size); - return ctx->fn_context(_fn_context_index); -} - -// No children here -Expr::Expr(const Expr& expr) - : _cache_entry(expr._cache_entry), - _node_type(expr._node_type), - _opcode(expr._opcode), - _is_slotref(expr._is_slotref), - _type(expr._type), - _output_scale(expr._output_scale), - _output_column(expr._output_column), - _fn(expr._fn), - _fn_context_index(expr._fn_context_index), - _constant_val(expr._constant_val) {} - -Expr::Expr(const TypeDescriptor& type) - : _opcode(TExprOpcode::INVALID_OPCODE), - // _vector_opcode(TExprOpcode::INVALID_OPCODE), - _is_slotref(false), - _type(type), - _output_scale(-1), - _output_column(-1), - _fn_context_index(-1) { - switch (_type.type) { - case TYPE_BOOLEAN: - _node_type = (TExprNodeType::BOOL_LITERAL); - break; - - case TYPE_TINYINT: - case TYPE_SMALLINT: - case TYPE_INT: - case TYPE_BIGINT: - _node_type = (TExprNodeType::INT_LITERAL); - break; - - case TYPE_LARGEINT: - _node_type = (TExprNodeType::LARGE_INT_LITERAL); - break; - - case TYPE_NULL: - _node_type = (TExprNodeType::NULL_LITERAL); - break; - - case TYPE_FLOAT: - case TYPE_DOUBLE: - case TYPE_TIME: - case TYPE_TIMEV2: - _node_type = (TExprNodeType::FLOAT_LITERAL); - break; - - case TYPE_DECIMALV2: - _node_type = (TExprNodeType::DECIMAL_LITERAL); - break; - - case TYPE_DATE: - case TYPE_DATETIME: - case TYPE_DATEV2: - case TYPE_DATETIMEV2: - _node_type = (TExprNodeType::DATE_LITERAL); - break; - - case TYPE_CHAR: - case TYPE_VARCHAR: - case TYPE_HLL: - case TYPE_OBJECT: - case TYPE_STRING: - case TYPE_QUANTILE_STATE: - _node_type = (TExprNodeType::STRING_LITERAL); - break; - - default: - DCHECK(false) << "Invalid type."; - } -} - -Expr::Expr(const TypeDescriptor& type, bool is_slotref) - : _opcode(TExprOpcode::INVALID_OPCODE), - // _vector_opcode(TExprOpcode::INVALID_OPCODE), - _is_slotref(is_slotref), - _type(type), - _output_scale(-1), - _output_column(-1), - _fn_context_index(-1) { - if (is_slotref) { - _node_type = (TExprNodeType::SLOT_REF); - } else { - switch (_type.type) { - case TYPE_BOOLEAN: - _node_type = (TExprNodeType::BOOL_LITERAL); - break; - - case TYPE_TINYINT: - case TYPE_SMALLINT: - case TYPE_INT: - case TYPE_BIGINT: - _node_type = (TExprNodeType::INT_LITERAL); - break; - - case TYPE_LARGEINT: - _node_type = (TExprNodeType::LARGE_INT_LITERAL); - break; - - case TYPE_NULL: - _node_type = (TExprNodeType::NULL_LITERAL); - break; - - case TYPE_FLOAT: - case TYPE_DOUBLE: - case TYPE_TIME: - case TYPE_TIMEV2: - _node_type = (TExprNodeType::FLOAT_LITERAL); - break; - - case TYPE_DECIMALV2: - _node_type = (TExprNodeType::DECIMAL_LITERAL); - break; - - case TYPE_DATETIME: - case TYPE_DATEV2: - case TYPE_DATETIMEV2: - _node_type = (TExprNodeType::DATE_LITERAL); - break; - - case TYPE_CHAR: - case TYPE_VARCHAR: - case TYPE_HLL: - case TYPE_OBJECT: - case TYPE_QUANTILE_STATE: - case TYPE_STRING: - _node_type = (TExprNodeType::STRING_LITERAL); - break; - - default: - DCHECK(false) << "Invalid type."; - } - } -} - -Expr::Expr(const TExprNode& node) - : _node_type(node.node_type), - _opcode(node.__isset.opcode ? node.opcode : TExprOpcode::INVALID_OPCODE), - // _vector_opcode( - // node.__isset.vector_opcode ? node.vector_opcode : TExprOpcode::INVALID_OPCODE), - _is_slotref(false), - _type(TypeDescriptor::from_thrift(node.type)), - _output_scale(node.output_scale), - _output_column(node.__isset.output_column ? node.output_column : -1), - _fn_context_index(-1) { - if (node.__isset.fn) { - _fn = node.fn; - } -} - -Expr::Expr(const TExprNode& node, bool is_slotref) - : _node_type(node.node_type), - _opcode(node.__isset.opcode ? node.opcode : TExprOpcode::INVALID_OPCODE), - // _vector_opcode( - // node.__isset.vector_opcode ? node.vector_opcode : TExprOpcode::INVALID_OPCODE), - _is_slotref(is_slotref), - _type(TypeDescriptor::from_thrift(node.type)), - _output_scale(node.output_scale), - _output_column(node.__isset.output_column ? node.output_column : -1), - _fn_context_index(-1) { - if (node.__isset.fn) { - _fn = node.fn; - } -} - -Expr::~Expr() {} - -Status Expr::create_expr_tree(ObjectPool* pool, const TExpr& texpr, ExprContext** ctx) { - // input is empty - if (texpr.nodes.size() == 0) { - *ctx = nullptr; - return Status::OK(); - } - int node_idx = 0; - Expr* e = nullptr; - Status status = create_tree_from_thrift(pool, texpr.nodes, nullptr, &node_idx, &e, ctx); - if (status.ok() && node_idx + 1 != texpr.nodes.size()) { - status = Status::InternalError( - "Expression tree only partially reconstructed. Not all thrift nodes were used."); - } - if (!status.ok()) { - LOG(ERROR) << "Could not construct expr tree.\n" - << status << "\n" - << apache::thrift::ThriftDebugString(texpr); - } - return status; -} - -Status Expr::create_expr_trees(ObjectPool* pool, const std::vector& texprs, - std::vector* ctxs) { - ctxs->clear(); - for (int i = 0; i < texprs.size(); ++i) { - ExprContext* ctx = nullptr; - RETURN_IF_ERROR(create_expr_tree(pool, texprs[i], &ctx)); - ctxs->push_back(ctx); - } - return Status::OK(); -} - -Status Expr::create_tree_from_thrift(ObjectPool* pool, const std::vector& nodes, - Expr* parent, int* node_idx, Expr** root_expr, - ExprContext** ctx) { - // propagate error case - if (*node_idx >= nodes.size()) { - return Status::InternalError("Failed to reconstruct expression tree from thrift."); - } - int num_children = nodes[*node_idx].num_children; - Expr* expr = nullptr; - RETURN_IF_ERROR(create_expr(pool, nodes[*node_idx], &expr)); - DCHECK(expr != nullptr); - if (parent != nullptr) { - parent->add_child(expr); - } else { - DCHECK(root_expr != nullptr); - DCHECK(ctx != nullptr); - *root_expr = expr; - *ctx = pool->add(new ExprContext(expr)); - } - for (int i = 0; i < num_children; i++) { - *node_idx += 1; - RETURN_IF_ERROR(create_tree_from_thrift(pool, nodes, expr, node_idx, nullptr, nullptr)); - // we are expecting a child, but have used all nodes - // this means we have been given a bad tree and must fail - if (*node_idx >= nodes.size()) { - return Status::InternalError("Failed to reconstruct expression tree from thrift."); - } - } - return Status::OK(); -} - -Status Expr::create_expr(ObjectPool* pool, const TExprNode& texpr_node, Expr** expr) { - switch (texpr_node.node_type) { - case TExprNodeType::BOOL_LITERAL: - case TExprNodeType::INT_LITERAL: - case TExprNodeType::LARGE_INT_LITERAL: - case TExprNodeType::FLOAT_LITERAL: - case TExprNodeType::DECIMAL_LITERAL: - case TExprNodeType::DATE_LITERAL: - case TExprNodeType::STRING_LITERAL: - *expr = pool->add(new Literal(texpr_node)); - return Status::OK(); - case TExprNodeType::ARRAY_LITERAL: - *expr = pool->add(new Literal(texpr_node)); - return Status::OK(); - case TExprNodeType::COMPOUND_PRED: - switch (texpr_node.opcode) { - case TExprOpcode::COMPOUND_AND: - *expr = pool->add(new AndPredicate(texpr_node)); - break; - case TExprOpcode::COMPOUND_OR: - *expr = pool->add(new OrPredicate(texpr_node)); - break; - default: - *expr = pool->add(new NotPredicate(texpr_node)); - break; - } - return Status::OK(); - case TExprNodeType::BINARY_PRED: - *expr = pool->add(BinaryPredicate::from_thrift(texpr_node)); - return Status::OK(); - case TExprNodeType::NULL_LITERAL: - *expr = pool->add(new NullLiteral(texpr_node)); - return Status::OK(); - case TExprNodeType::ARITHMETIC_EXPR: - if (texpr_node.opcode != TExprOpcode::INVALID_OPCODE) { - *expr = pool->add(ArithmeticExpr::from_thrift(texpr_node)); - return Status::OK(); - } - case TExprNodeType::CAST_EXPR: - if (texpr_node.__isset.child_type) { - *expr = pool->add(CastExpr::from_thrift(texpr_node)); - return Status::OK(); - } - case TExprNodeType::COMPUTE_FUNCTION_CALL: - case TExprNodeType::FUNCTION_CALL: - if (!texpr_node.__isset.fn) { - // return error to prevent crash - return Status::InternalError("function is not set in thrift node"); - } - if (texpr_node.fn.name.function_name == "if") { - *expr = pool->add(new IfExpr(texpr_node)); - } else if (texpr_node.fn.name.function_name == "nullif") { - *expr = pool->add(new NullIfExpr(texpr_node)); - } else if (texpr_node.fn.name.function_name == "ifnull" || - texpr_node.fn.name.function_name == "nvl") { - *expr = pool->add(new IfNullExpr(texpr_node)); - } else if (texpr_node.fn.name.function_name == "coalesce") { - *expr = pool->add(new CoalesceExpr(texpr_node)); - } else if (texpr_node.fn.binary_type == TFunctionBinaryType::RPC) { - *expr = pool->add(new RPCFnCall(texpr_node)); - } else if (ArithmeticExpr::is_valid(texpr_node.fn.name.function_name)) { - *expr = pool->add(ArithmeticExpr::from_fn_name(texpr_node)); - } else { - *expr = pool->add(new ScalarFnCall(texpr_node)); - } - return Status::OK(); - - case TExprNodeType::CASE_EXPR: { - if (!texpr_node.__isset.case_expr) { - return Status::InternalError("Case expression not set in thrift node"); - } - - *expr = pool->add(new CaseExpr(texpr_node)); - return Status::OK(); - } - - case TExprNodeType::MATCH_PRED: { - DCHECK(texpr_node.__isset.fn); - if (MatchPredicateExpr::is_valid(texpr_node.fn.name.function_name)) { - *expr = pool->add(new MatchPredicateExpr(texpr_node)); - } - return Status::OK(); - } - - case TExprNodeType::IN_PRED: { - switch (texpr_node.opcode) { - case TExprOpcode::FILTER_IN: - case TExprOpcode::FILTER_NOT_IN: - *expr = pool->add(new InPredicate(texpr_node)); - break; - default: - *expr = pool->add(new ScalarFnCall(texpr_node)); - break; - } - return Status::OK(); - } - - case TExprNodeType::SLOT_REF: { - if (!texpr_node.__isset.slot_ref) { - return Status::InternalError("Slot reference not set in thrift node"); - } - - *expr = pool->add(new SlotRef(texpr_node)); - return Status::OK(); - } - case TExprNodeType::TUPLE_IS_NULL_PRED: { - *expr = pool->add(new TupleIsNullPredicate(texpr_node)); - return Status::OK(); - } - - case TExprNodeType::INFO_FUNC: { - *expr = pool->add(new InfoFunc(texpr_node)); - return Status::OK(); - } - - default: - return Status::InternalError("Unknown expr node type: {}", texpr_node.node_type); - } -} - -struct MemLayoutData { - int expr_idx; - int byte_size; - bool variable_length; - - // TODO: sort by type as well? Any reason to do this? - bool operator<(const MemLayoutData& rhs) const { - // variable_len go at end - if (this->variable_length && !rhs.variable_length) { - return false; - } - - if (!this->variable_length && rhs.variable_length) { - return true; - } - - return this->byte_size < rhs.byte_size; - } -}; - -int Expr::compute_results_layout(const std::vector& exprs, std::vector* offsets, - int* var_result_begin) { - if (exprs.size() == 0) { - *var_result_begin = -1; - return 0; - } - - std::vector data; - data.resize(exprs.size()); - - // Collect all the byte sizes and sort them - for (int i = 0; i < exprs.size(); ++i) { - data[i].expr_idx = i; - - if (exprs[i]->type().type == TYPE_CHAR || exprs[i]->type().type == TYPE_VARCHAR || - exprs[i]->type().type == TYPE_STRING) { - data[i].byte_size = 16; - data[i].variable_length = true; - } else { - data[i].byte_size = get_byte_size(exprs[i]->type().type); - data[i].variable_length = false; - } - - DCHECK_NE(data[i].byte_size, 0); - } - - sort(data.begin(), data.end()); - - // Walk the types and store in a packed aligned layout - int max_alignment = sizeof(int64_t); - int current_alignment = data[0].byte_size; - int byte_offset = 0; - - offsets->clear(); - offsets->resize(exprs.size()); - *var_result_begin = -1; - - for (int i = 0; i < data.size(); ++i) { - DCHECK_GE(data[i].byte_size, current_alignment); - - // Don't align more than word (8-byte) size. This is consistent with what compilers - // do. - if (data[i].byte_size != current_alignment && current_alignment != max_alignment) { - byte_offset += data[i].byte_size - current_alignment; - current_alignment = std::min(data[i].byte_size, max_alignment); - // TODO(zc): fixed decimal align - if (data[i].byte_size == 40) { - current_alignment = 4; - } - } - - (*offsets)[data[i].expr_idx] = byte_offset; - - if (data[i].variable_length && *var_result_begin == -1) { - *var_result_begin = byte_offset; - } - - byte_offset += data[i].byte_size; - } - - return byte_offset; -} - -int Expr::compute_results_layout(const std::vector& ctxs, std::vector* offsets, - int* var_result_begin) { - std::vector exprs; - for (int i = 0; i < ctxs.size(); ++i) { - exprs.push_back(ctxs[i]->root()); - } - return compute_results_layout(exprs, offsets, var_result_begin); -} - -Status Expr::prepare(const std::vector& ctxs, RuntimeState* state, - const RowDescriptor& row_desc) { - for (int i = 0; i < ctxs.size(); ++i) { - RETURN_IF_ERROR(ctxs[i]->prepare(state, row_desc)); - } - return Status::OK(); -} - -Status Expr::prepare(RuntimeState* state, const RowDescriptor& row_desc, ExprContext* context) { - DCHECK(_type.type != INVALID_TYPE); - for (int i = 0; i < _children.size(); ++i) { - RETURN_IF_ERROR(_children[i]->prepare(state, row_desc, context)); - } - return Status::OK(); -} - -Status Expr::open(const std::vector& ctxs, RuntimeState* state) { - for (int i = 0; i < ctxs.size(); ++i) { - RETURN_IF_ERROR(ctxs[i]->open(state)); - } - return Status::OK(); -} - -Status Expr::open(RuntimeState* state, ExprContext* context, - FunctionContext::FunctionStateScope scope) { - DCHECK(_type.type != INVALID_TYPE); - for (int i = 0; i < _children.size(); ++i) { - RETURN_IF_ERROR(_children[i]->open(state, context, scope)); - } - return Status::OK(); -} - -void Expr::close(const std::vector& ctxs, RuntimeState* state) { - for (int i = 0; i < ctxs.size(); ++i) { - ctxs[i]->close(state); - } -} - -void Expr::close(RuntimeState* state, ExprContext* context, - FunctionContext::FunctionStateScope scope) { - for (int i = 0; i < _children.size(); ++i) { - _children[i]->close(state, context, scope); - } -} - -Status Expr::clone_if_not_exists(const std::vector& ctxs, RuntimeState* state, - std::vector* new_ctxs) { - DCHECK(new_ctxs != nullptr); - if (!new_ctxs->empty()) { - // 'ctxs' was already cloned into '*new_ctxs', nothing to do. - DCHECK_EQ(new_ctxs->size(), ctxs.size()); - for (int i = 0; i < new_ctxs->size(); ++i) { - DCHECK((*new_ctxs)[i]->_is_clone); - } - return Status::OK(); - } - new_ctxs->resize(ctxs.size()); - for (int i = 0; i < ctxs.size(); ++i) { - RETURN_IF_ERROR(ctxs[i]->clone(state, &(*new_ctxs)[i])); - } - return Status::OK(); -} - -std::string Expr::debug_string() const { - // TODO: implement partial debug string for member vars - std::stringstream out; - out << " type=" << _type.debug_string(); - - if (_opcode != TExprOpcode::INVALID_OPCODE) { - out << " opcode=" << _opcode; - } - - out << " codegen=" - << "false"; - - if (!_children.empty()) { - out << " children=" << debug_string(_children); - } - - return out.str(); -} - -std::string Expr::debug_string(const std::vector& exprs) { - std::stringstream out; - out << "["; - - for (int i = 0; i < exprs.size(); ++i) { - out << (i == 0 ? "" : " ") << exprs[i]->debug_string(); - } - - out << "]"; - return out.str(); -} - -std::string Expr::debug_string(const std::vector& ctxs) { - std::vector exprs; - for (int i = 0; i < ctxs.size(); ++i) { - exprs.push_back(ctxs[i]->root()); - } - return debug_string(exprs); -} - -bool Expr::is_constant() const { - for (int i = 0; i < _children.size(); ++i) { - if (!_children[i]->is_constant()) { - return false; - } - } - - return true; -} - -bool Expr::is_vectorized() const { - for (int i = 0; i < _children.size(); ++i) { - if (!_children[i]->is_vectorized()) { - return false; - } - } - - return true; -} - -TExprNodeType::type Expr::type_without_cast(const Expr* expr) { - if (expr->_opcode == TExprOpcode::CAST) { - return type_without_cast(expr->_children[0]); - } - return expr->_node_type; -} - -const Expr* Expr::expr_without_cast(const Expr* expr) { - if (expr->_opcode == TExprOpcode::CAST) { - return expr_without_cast(expr->_children[0]); - } - return expr; -} - -doris_udf::AnyVal* Expr::get_const_val(ExprContext* context) { - if (!is_constant()) { - return nullptr; - } - if (_constant_val.get() != nullptr) { - return _constant_val.get(); - } - switch (_type.type) { - case TYPE_BOOLEAN: { - _constant_val.reset(new BooleanVal(get_boolean_val(context, nullptr))); - break; - } - case TYPE_TINYINT: { - _constant_val.reset(new TinyIntVal(get_tiny_int_val(context, nullptr))); - break; - } - case TYPE_SMALLINT: { - _constant_val.reset(new SmallIntVal(get_small_int_val(context, nullptr))); - break; - } - case TYPE_INT: { - _constant_val.reset(new IntVal(get_int_val(context, nullptr))); - break; - } - case TYPE_BIGINT: { - _constant_val.reset(new BigIntVal(get_big_int_val(context, nullptr))); - break; - } - case TYPE_LARGEINT: { - _constant_val.reset(new LargeIntVal(get_large_int_val(context, nullptr))); - break; - } - case TYPE_FLOAT: { - _constant_val.reset(new FloatVal(get_float_val(context, nullptr))); - break; - } - case TYPE_DOUBLE: - case TYPE_TIME: - case TYPE_TIMEV2: { - _constant_val.reset(new DoubleVal(get_double_val(context, nullptr))); - break; - } - case TYPE_CHAR: - case TYPE_VARCHAR: - case TYPE_HLL: - case TYPE_OBJECT: - case TYPE_QUANTILE_STATE: - case TYPE_STRING: { - _constant_val.reset(new StringVal(get_string_val(context, nullptr))); - break; - } - case TYPE_DATE: - case TYPE_DATETIME: { - _constant_val.reset(new DateTimeVal(get_datetime_val(context, nullptr))); - break; - } - - case TYPE_DATEV2: { - _constant_val.reset(new DateV2Val(get_datev2_val(context, nullptr))); - break; - } - - case TYPE_DATETIMEV2: { - _constant_val.reset(new DateTimeV2Val(get_datetimev2_val(context, nullptr))); - break; - } - - case TYPE_DECIMALV2: { - _constant_val.reset(new DecimalV2Val(get_decimalv2_val(context, nullptr))); - break; - } - case TYPE_DECIMAL32: { - _constant_val.reset(new Decimal32Val(get_decimal32_val(context, nullptr))); - break; - } - case TYPE_DECIMAL64: { - _constant_val.reset(new Decimal64Val(get_decimal64_val(context, nullptr))); - break; - } - case TYPE_DECIMAL128I: { - _constant_val.reset(new Decimal128Val(get_decimal128_val(context, nullptr))); - break; - } - case TYPE_NULL: { - _constant_val.reset(new AnyVal(true)); - break; - } - case TYPE_ARRAY: { - _constant_val.reset(new CollectionVal(get_array_val(context, nullptr))); - break; - } - default: - DCHECK(false) << "Type not implemented: " << type(); - } - DCHECK(_constant_val.get() != nullptr); - return _constant_val.get(); -} - -bool Expr::is_bound(std::vector* tuple_ids) const { - for (int i = 0; i < _children.size(); ++i) { - if (!_children[i]->is_bound(tuple_ids)) { - return false; - } - } - - return true; -} - -int Expr::get_slot_ids(std::vector* slot_ids) const { - int n = 0; - - for (int i = 0; i < _children.size(); ++i) { - n += _children[i]->get_slot_ids(slot_ids); - } - - return n; -} - -BooleanVal Expr::get_boolean_val(ExprContext* context, TupleRow* row) { - return BooleanVal::null(); // (*(bool*)get_value(row)); -} - -TinyIntVal Expr::get_tiny_int_val(ExprContext* context, TupleRow* row) { - return TinyIntVal::null(); // (*(int8_t*)get_value(row)); -} - -SmallIntVal Expr::get_small_int_val(ExprContext* context, TupleRow* row) { - return SmallIntVal::null(); // (*(int16_t*)get_value(row)); -} - -IntVal Expr::get_int_val(ExprContext* context, TupleRow* row) { - return IntVal::null(); // (*(int32_t*)get_value(row)); -} - -BigIntVal Expr::get_big_int_val(ExprContext* context, TupleRow* row) { - return BigIntVal::null(); // (*(int64_t*)get_value(row)); -} - -LargeIntVal Expr::get_large_int_val(ExprContext* context, TupleRow* row) { - return LargeIntVal::null(); // (*(int64_t*)get_value(row)); -} - -Decimal32Val Expr::get_decimal32_val(ExprContext* context, TupleRow* row) { - return Decimal32Val::null(); // (*(int32_t*)get_value(row)); -} - -Decimal64Val Expr::get_decimal64_val(ExprContext* context, TupleRow* row) { - return Decimal64Val::null(); -} - -Decimal128Val Expr::get_decimal128_val(ExprContext* context, TupleRow* row) { - return Decimal128Val::null(); -} - -FloatVal Expr::get_float_val(ExprContext* context, TupleRow* row) { - return FloatVal::null(); // (*(float*)get_value(row)); -} - -DoubleVal Expr::get_double_val(ExprContext* context, TupleRow* row) { - return DoubleVal::null(); // (*(double*)get_value(row)); -} - -StringVal Expr::get_string_val(ExprContext* context, TupleRow* row) { - StringVal val; - // ((StringValue*)get_value(row))->to_string_val(&val); - return val; -} - -// TODO(zc) -// virtual ArrayVal Expr::GetArrayVal(ExprContext* context, TupleRow*); -DateTimeVal Expr::get_datetime_val(ExprContext* context, TupleRow* row) { - DateTimeVal val; - // ((DateTimeValue*)get_value(row))->to_datetime_val(&val); - return val; -} - -DateV2Val Expr::get_datev2_val(ExprContext* context, TupleRow* row) { - DateV2Val val; - return val; -} - -DateTimeV2Val Expr::get_datetimev2_val(ExprContext* context, TupleRow* row) { - DateTimeV2Val val; - return val; -} - -DecimalV2Val Expr::get_decimalv2_val(ExprContext* context, TupleRow* row) { - DecimalV2Val val; - return val; -} - -CollectionVal Expr::get_array_val(ExprContext* context, TupleRow* row) { - CollectionVal val; - return val; -} - -Status Expr::get_fn_context_error(ExprContext* ctx) { - if (_fn_context_index != -1) { - FunctionContext* fn_ctx = ctx->fn_context(_fn_context_index); - if (fn_ctx->has_error()) { - return Status::InternalError(fn_ctx->error_msg()); - } - } - return Status::OK(); -} - -Expr* Expr::copy(ObjectPool* pool, Expr* old_expr) { - auto new_expr = old_expr->clone(pool); - for (auto child : old_expr->_children) { - auto new_child = copy(pool, child); - new_expr->_children.push_back(new_child); - } - return new_expr; -} - -void Expr::assign_fn_ctx_idx(int* next_fn_ctx_idx) { - _fn_ctx_idx_start = *next_fn_ctx_idx; - if (has_fn_ctx()) { - _fn_ctx_idx = *next_fn_ctx_idx; - ++(*next_fn_ctx_idx); - } - for (Expr* child : children()) child->assign_fn_ctx_idx(next_fn_ctx_idx); - _fn_ctx_idx_end = *next_fn_ctx_idx; -} - -Status Expr::create(const TExpr& texpr, const RowDescriptor& row_desc, RuntimeState* state, - ObjectPool* pool, Expr** scalar_expr) { - *scalar_expr = nullptr; - Expr* root = nullptr; - RETURN_IF_ERROR(create_expr(pool, texpr.nodes[0], &root)); - RETURN_IF_ERROR(create_tree(texpr, pool, root)); - // TODO pengyubing replace by Init() - ExprContext* ctx = pool->add(new ExprContext(root)); - // TODO chenhao check node type in ScalarExpr Init() - Status status = Status::OK(); - if (texpr.nodes[0].node_type != TExprNodeType::CASE_EXPR) { - status = root->prepare(state, row_desc, ctx); - } - if (UNLIKELY(!status.ok())) { - root->close(); - return status; - } - int fn_ctx_idx = 0; - root->assign_fn_ctx_idx(&fn_ctx_idx); - *scalar_expr = root; - return Status::OK(); -} - -Status Expr::create(const std::vector& texprs, const RowDescriptor& row_desc, - RuntimeState* state, ObjectPool* pool, std::vector* exprs) { - exprs->clear(); - for (const TExpr& texpr : texprs) { - Expr* expr = nullptr; - RETURN_IF_ERROR(create(texpr, row_desc, state, pool, &expr)); - DCHECK(expr != nullptr); - exprs->push_back(expr); - } - return Status::OK(); -} - -Status Expr::create(const TExpr& texpr, const RowDescriptor& row_desc, RuntimeState* state, - Expr** scalar_expr) { - return Expr::create(texpr, row_desc, state, state->obj_pool(), scalar_expr); -} - -Status Expr::create(const std::vector& texprs, const RowDescriptor& row_desc, - RuntimeState* state, std::vector* exprs) { - return Expr::create(texprs, row_desc, state, state->obj_pool(), exprs); -} - -Status Expr::create_tree(const TExpr& texpr, ObjectPool* pool, Expr* root) { - DCHECK(!texpr.nodes.empty()); - DCHECK(root != nullptr); - // The root of the tree at nodes[0] is already created and stored in 'root'. - int child_node_idx = 0; - int num_children = texpr.nodes[0].num_children; - for (int i = 0; i < num_children; ++i) { - ++child_node_idx; - Status status = create_tree_internal(texpr.nodes, pool, root, &child_node_idx); - if (UNLIKELY(!status.ok())) { - LOG(ERROR) << "Could not construct expr tree.\n" - << status << "\n" - << apache::thrift::ThriftDebugString(texpr); - return status; - } - } - if (UNLIKELY(child_node_idx + 1 != texpr.nodes.size())) { - return Status::InternalError( - "Expression tree only partially reconstructed. Not all thrift " - "nodes were used."); - } - return Status::OK(); -} - -Status Expr::create_tree_internal(const std::vector& nodes, ObjectPool* pool, Expr* root, - int* child_node_idx) { - // propagate error case - if (*child_node_idx >= nodes.size()) { - return Status::InternalError("Failed to reconstruct expression tree from thrift."); - } - - const TExprNode& texpr_node = nodes[*child_node_idx]; - DCHECK_NE(texpr_node.node_type, TExprNodeType::AGG_EXPR); - Expr* child_expr; - RETURN_IF_ERROR(create_expr(pool, texpr_node, &child_expr)); - root->_children.push_back(child_expr); - - int num_children = nodes[*child_node_idx].num_children; - for (int i = 0; i < num_children; ++i) { - *child_node_idx += 1; - RETURN_IF_ERROR(create_tree_internal(nodes, pool, child_expr, child_node_idx)); - DCHECK(child_expr->get_child(i) != nullptr); - } - return Status::OK(); -} - -// TODO chenhao -void Expr::close() { - for (Expr* child : _children) child->close(); - /*if (_cache_entry != nullptr) { - LibCache::instance()->decrement_use_count(_cache_entry); - _cache_entry = nullptr; - }*/ - if (_cache_entry != nullptr) { - UserFunctionCache::instance()->release_entry(_cache_entry); - _cache_entry = nullptr; - } -} - -void Expr::close(const std::vector& exprs) { - for (Expr* expr : exprs) expr->close(); -} - -} // namespace doris diff --git a/be/src/exprs/expr.h b/be/src/exprs/expr.h deleted file mode 100644 index 403021e1ce..0000000000 --- a/be/src/exprs/expr.h +++ /dev/null @@ -1,599 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. -// This file is copied from -// https://github.com/apache/impala/blob/branch-2.9.0/be/src/exprs/expr.h -// and modified by Doris - -#pragma once - -#include -#include -#include - -#include "common/status.h" -#include "exprs/expr_value.h" -#include "gen_cpp/Opcodes_types.h" -#include "runtime/descriptors.h" -#include "runtime/large_int_value.h" -#include "runtime/tuple.h" -#include "runtime/tuple_row.h" -#include "udf/udf.h" -#include "util/string_parser.hpp" -#include "vec/common/string_ref.h" -#include "vec/data_types/data_type_decimal.h" -#include "vec/io/io_helper.h" - -#undef USING_DORIS_UDF -#define USING_DORIS_UDF using namespace doris_udf - -USING_DORIS_UDF; - -namespace doris { - -class Expr; -class ExprContext; -class ObjectPool; -class RowDescriptor; -class RuntimeState; -class TColumnValue; -class TExpr; -class TExprNode; -class TupleIsNullPredicate; -class Literal; -class MemTracker; -struct UserFunctionCacheEntry; - -// This is the superclass of all expr evaluation nodes. -class Expr { -public: - // typedef for compute functions. - typedef void* (*ComputeFn)(Expr*, TupleRow*); - - // Empty virtual destructor - virtual ~Expr(); - - Expr(const Expr& expr); - - virtual Expr* clone(ObjectPool* pool) const = 0; - - // evaluate expr and return pointer to result. The result is - // valid as long as 'row' doesn't change. - // TODO: stop having the result cached in this Expr object - void* get_value(TupleRow* row) { return nullptr; } - - bool is_null_scalar_function(std::string& str) { - // name and function_name both are required - if (_fn.name.function_name.compare("is_null_pred") == 0) { - str.assign("null"); - return true; - } else if (_fn.name.function_name.compare("is_not_null_pred") == 0) { - str.assign("not null"); - return true; - } else { - return false; - } - } - /// Virtual compute functions for each *Val type. Each Expr subclass should implement - /// the functions for the return type(s) it supports. For example, a boolean function - /// will only implement GetBooleanVal(). Some Exprs, like Literal, have many possible - /// return types and will implement multiple Get*Val() functions. - virtual BooleanVal get_boolean_val(ExprContext* context, TupleRow*); - virtual TinyIntVal get_tiny_int_val(ExprContext* context, TupleRow*); - virtual SmallIntVal get_small_int_val(ExprContext* context, TupleRow*); - virtual IntVal get_int_val(ExprContext* context, TupleRow*); - virtual BigIntVal get_big_int_val(ExprContext* context, TupleRow*); - virtual LargeIntVal get_large_int_val(ExprContext* context, TupleRow*); - virtual FloatVal get_float_val(ExprContext* context, TupleRow*); - virtual DoubleVal get_double_val(ExprContext* context, TupleRow*); - virtual StringVal get_string_val(ExprContext* context, TupleRow*); - // TODO(zc) - // virtual ArrayVal GetArrayVal(ExprContext* context, TupleRow*); - virtual DateTimeVal get_datetime_val(ExprContext* context, TupleRow*); - virtual DecimalV2Val get_decimalv2_val(ExprContext* context, TupleRow*); - virtual DateV2Val get_datev2_val(ExprContext* context, TupleRow*); - virtual DateTimeV2Val get_datetimev2_val(ExprContext* context, TupleRow*); - virtual CollectionVal get_array_val(ExprContext* context, TupleRow*); - - virtual Decimal32Val get_decimal32_val(ExprContext* context, TupleRow*); - virtual Decimal64Val get_decimal64_val(ExprContext* context, TupleRow*); - virtual Decimal128Val get_decimal128_val(ExprContext* context, TupleRow*); - - // Get the number of digits after the decimal that should be displayed for this - // value. Returns -1 if no scale has been specified (currently the scale is only set for - // doubles set by RoundUpTo). get_value() must have already been called. - // TODO: this will be unnecessary once we support the DECIMAL(precision, scale) type - int output_scale() const { return _output_scale; } - int output_column() const { return _output_column; } - - void add_child(Expr* expr) { _children.push_back(expr); } - Expr* get_child(int i) const { return _children[i]; } - int get_num_children() const { return _children.size(); } - - const TypeDescriptor& type() const { return _type; } - const std::vector& children() const { return _children; } - - TExprOpcode::type op() const { return _opcode; } - - TExprNodeType::type node_type() const { return _node_type; } - - const TFunction& fn() const { return _fn; } - - bool is_slotref() const { return _is_slotref; } - - /// Returns true if this expr uses a FunctionContext to track its runtime state. - /// Overridden by exprs which use FunctionContext. - virtual bool has_fn_ctx() const { return false; } - - /// Returns an error status if the function context associated with the - /// expr has an error set. - Status get_fn_context_error(ExprContext* ctx); - - static TExprNodeType::type type_without_cast(const Expr* expr); - - static const Expr* expr_without_cast(const Expr* expr); - - // Returns true if expr doesn't contain slotrefs, ie, can be evaluated - // with get_value(nullptr). The default implementation returns true if all of - // the children are constant. - virtual bool is_constant() const; - - // Returns true ifi expr support vectorized process - // The default implementation returns true if all the children was supported - virtual bool is_vectorized() const; - - // Returns true if expr bound - virtual bool is_bound(std::vector* tuple_ids) const; - - // Returns the slots that are referenced by this expr tree in 'slot_ids'. - // Returns the number of slots added to the vector - virtual int get_slot_ids(std::vector* slot_ids) const; - - /// Create expression tree from the list of nodes contained in texpr within 'pool'. - /// Returns the root of expression tree in 'expr' and the corresponding ExprContext in - /// 'ctx'. - static Status create_expr_tree(ObjectPool* pool, const TExpr& texpr, ExprContext** ctx); - - /// Creates vector of ExprContexts containing exprs from the given vector of - /// TExprs within 'pool'. Returns an error if any of the individual conversions caused - /// an error, otherwise OK. - static Status create_expr_trees(ObjectPool* pool, const std::vector& texprs, - std::vector* ctxs); - - /// Create a new ScalarExpr based on thrift Expr 'texpr'. The newly created ScalarExpr - /// is stored in ObjectPool 'pool' and returned in 'expr' on success. 'row_desc' is the - /// tuple row descriptor of the input tuple row. On failure, 'expr' is set to nullptr and - /// the expr tree (if created) will be closed. Error status will be returned too. - static Status create(const TExpr& texpr, const RowDescriptor& row_desc, RuntimeState* state, - ObjectPool* pool, Expr** expr); - - /// Create a new ScalarExpr based on thrift Expr 'texpr'. The newly created ScalarExpr - /// is stored in ObjectPool 'state->obj_pool()' and returned in 'expr'. 'row_desc' is - /// the tuple row descriptor of the input tuple row. Returns error status on failure. - static Status create(const TExpr& texpr, const RowDescriptor& row_desc, RuntimeState* state, - Expr** expr); - - /// Convenience functions creating multiple ScalarExpr. - static Status create(const std::vector& texprs, const RowDescriptor& row_desc, - RuntimeState* state, ObjectPool* pool, std::vector* exprs); - - /// Convenience functions creating multiple ScalarExpr. - static Status create(const std::vector& texprs, const RowDescriptor& row_desc, - RuntimeState* state, std::vector* exprs); - - /// Convenience function for preparing multiple expr trees. - /// Allocations from 'ctxs' will be counted against 'tracker'. - static Status prepare(const std::vector& ctxs, RuntimeState* state, - const RowDescriptor& row_desc); - - /// Convenience function for opening multiple expr trees. - static Status open(const std::vector& ctxs, RuntimeState* state); - - /// Clones each ExprContext for multiple expr trees. 'new_ctxs' must be non-nullptr. - /// Idempotent: if '*new_ctxs' is empty, a clone of each context in 'ctxs' will be added - /// to it, and if non-empty, it is assumed CloneIfNotExists() was already called and the - /// call is a no-op. The new ExprContexts are created in state->obj_pool(). - static Status clone_if_not_exists(const std::vector& ctxs, RuntimeState* state, - std::vector* new_ctxs); - - /// Convenience function for closing multiple expr trees. - static void close(const std::vector& ctxs, RuntimeState* state); - - /// Convenience functions for closing a list of ScalarExpr. - static void close(const std::vector& exprs); - - // Computes a memory efficient layout for storing the results of evaluating 'exprs' - // Returns the number of bytes necessary to store all the results and offsets - // where the result for each expr should be stored. - // Variable length types are guaranteed to be at the end and 'var_result_begin' - // will be set the beginning byte offset where variable length results begin. - // 'var_result_begin' will be set to -1 if there are no variable len types. - static int compute_results_layout(const std::vector& exprs, std::vector* offsets, - int* var_result_begin); - static int compute_results_layout(const std::vector& ctxs, - std::vector* offsets, int* var_result_begin); - - /// If this expr is constant, evaluates the expr with no input row argument and returns - /// the output. Returns nullptr if the argument is not constant. The returned AnyVal* is - /// owned by this expr. This should only be called after Open() has been called on this - /// expr. - virtual AnyVal* get_const_val(ExprContext* context); - - /// Assigns indices into the FunctionContext vector 'fn_ctxs_' in an evaluator to - /// nodes which need FunctionContext in the tree. 'next_fn_ctx_idx' is the index - /// of the next available entry in the vector. It's updated as this function is - /// called recursively down the tree. - void assign_fn_ctx_idx(int* next_fn_ctx_idx); - - virtual std::string debug_string() const; - static std::string debug_string(const std::vector& exprs); - static std::string debug_string(const std::vector& ctxs); - - // Prefix of Expr::GetConstant() symbols, regardless of template specialization - static const char* _s_get_constant_symbol_prefix; - - /// The builtin functions are not called from anywhere in the code and the - /// symbols are therefore not included in the binary. We call these functions - /// by using dlsym. The compiler must think this function is callable to - /// not strip these symbols. - static void init_builtins_dummy(); - - // Any additions to this enum must be reflected in both GetConstant() and - // GetIrConstant(). - enum ExprConstant { - RETURN_TYPE_SIZE, // int - ARG_TYPE_SIZE // int[] - }; - - static Expr* copy(ObjectPool* pool, Expr* old_expr); - int get_fn_context_index() { return _fn_context_index; } - -protected: - friend class AggFnEvaluator; - friend class AnaFnEvaluator; - friend class TopNNode; - friend class AnalyticEvalNode; - friend class ComputeFunctions; - friend class MathFunctions; - friend class StringFunctions; - friend class TimestampFunctions; - friend class ConditionalFunctions; - friend class UtilityFunctions; - friend class CaseExpr; - friend class InPredicate; - friend class InfoFunc; - friend class FunctionCall; - friend class HashJoinNode; - friend class ExecNode; - friend class SetVar; - friend class NativeUdfExpr; - friend class JsonFunctions; - friend class Literal; - friend class ExprContext; - friend class CompoundPredicate; - friend class ScalarFnCall; - friend class HllHashFunction; - - /// Constructs an Expr tree from the thrift Expr 'texpr'. 'root' is the root of the - /// Expr tree created from texpr.nodes[0] by the caller (either ScalarExpr or AggFn). - /// The newly created Expr nodes are added to 'pool'. Returns error status on failure. - static Status create_tree(const TExpr& texpr, ObjectPool* pool, Expr* root); - - int fn_ctx_idx() const { return _fn_ctx_idx; } - - Expr(const TypeDescriptor& type); - Expr(const TypeDescriptor& type, bool is_slotref); - Expr(const TExprNode& node); - Expr(const TExprNode& node, bool is_slotref); - - /// Initializes this expr instance for execution. This does not include initializing - /// state in the ExprContext; 'context' should only be used to register a - /// FunctionContext via RegisterFunctionContext(). Any IR functions must be generated - /// here. - /// - /// Subclasses overriding this function should call Expr::Prepare() to recursively call - /// Prepare() on the expr tree. - virtual Status prepare(RuntimeState* state, const RowDescriptor& row_desc, - ExprContext* context); - - /// Initializes 'context' for execution. If scope if FRAGMENT_LOCAL, both fragment- and - /// thread-local state should be initialized. Otherwise, if scope is THREAD_LOCAL, only - /// thread-local state should be initialized. - // - /// Subclasses overriding this function should call Expr::Open() to recursively call - /// Open() on the expr tree. - Status open(RuntimeState* state, ExprContext* context) { - return open(state, context, FunctionContext::FRAGMENT_LOCAL); - } - - virtual Status open(RuntimeState* state, ExprContext* context, - FunctionContext::FunctionStateScope scope); - - /// Subclasses overriding this function should call Expr::Close(). - // - /// If scope if FRAGMENT_LOCAL, both fragment- and thread-local state should be torn - /// down. Otherwise, if scope is THREAD_LOCAL, only thread-local state should be torn - /// down. - void close(RuntimeState* state, ExprContext* context) { - close(state, context, FunctionContext::FRAGMENT_LOCAL); - } - - virtual void close(RuntimeState* state, ExprContext* context, - FunctionContext::FunctionStateScope scope); - - /// Releases cache entries to LibCache in all nodes of the Expr tree. - virtual void close(); - - /// Helper function that calls ctx->Register(), sets fn_context_index_, and returns the - /// registered FunctionContext. - FunctionContext* register_function_context(ExprContext* ctx, RuntimeState* state, - int varargs_buffer_size); - - /// Cache entry for the library implementing this function. - UserFunctionCacheEntry* _cache_entry = nullptr; - - // function opcode - - TExprNodeType::type _node_type; - - // Used to check what opcode - TExprOpcode::type _opcode; - - // recognize if this node is a slotref in order to speed up get_value() - const bool _is_slotref; - - // analysis is done, types are fixed at this point - TypeDescriptor _type; - std::vector _children; - int _output_scale; - int _output_column; - - /// Function description. - TFunction _fn; - - /// Index to pass to ExprContext::fn_context() to retrieve this expr's FunctionContext. - /// Set in RegisterFunctionContext(). -1 if this expr does not need a FunctionContext and - /// doesn't call RegisterFunctionContext(). - int _fn_context_index; - - // If this expr is constant, this will store and cache the value generated by - // get_const_val(). - std::shared_ptr _constant_val; - - /// Simple debug string that provides no expr subclass-specific information - std::string debug_string(const std::string& expr_name) const { - std::stringstream out; - out << expr_name << "(" << Expr::debug_string() << ")"; - return out.str(); - } - -private: - friend class ExprTest; - friend class QueryJitter; - - // Create a new Expr based on texpr_node.node_type within 'pool'. - static Status create_expr(ObjectPool* pool, const TExprNode& texpr_node, Expr** expr); - - // Create a new Expr based on texpr_node.node_type within 'pool'. - static Status create_expr(ObjectPool* pool, const Expr* old_expr, Expr** new_expr); - - /// Creates an expr tree for the node rooted at 'node_idx' via depth-first traversal. - /// parameters - /// nodes: vector of thrift expression nodes to be translated - /// parent: parent of node at node_idx (or nullptr for node_idx == 0) - /// node_idx: - /// in: root of TExprNode tree - /// out: next node in 'nodes' that isn't part of tree - /// root_expr: out: root of constructed expr tree - /// ctx: out: context of constructed expr tree - /// return - /// status.ok() if successful - /// !status.ok() if tree is inconsistent or corrupt - static Status create_tree_from_thrift(ObjectPool* pool, const std::vector& nodes, - Expr* parent, int* node_idx, Expr** root_expr, - ExprContext** ctx); - - /// Static wrappers around the virtual Get*Val() functions. Calls the appropriate - /// Get*Val() function on expr, passing it the context and row arguments. - // - /// These are used to call Get*Val() functions from generated functions, since I don't - /// know how to call virtual functions directly. GetStaticGetValWrapper() returns the - /// IR function of the appropriate wrapper function. - static BooleanVal get_boolean_val(Expr* expr, ExprContext* context, TupleRow* row); - static TinyIntVal get_tiny_int_val(Expr* expr, ExprContext* context, TupleRow* row); - static SmallIntVal get_small_int_val(Expr* expr, ExprContext* context, TupleRow* row); - static IntVal get_int_val(Expr* expr, ExprContext* context, TupleRow* row); - static BigIntVal get_big_int_val(Expr* expr, ExprContext* context, TupleRow* row); - static LargeIntVal get_large_int_val(Expr* expr, ExprContext* context, TupleRow* row); - static FloatVal get_float_val(Expr* expr, ExprContext* context, TupleRow* row); - static DoubleVal get_double_val(Expr* expr, ExprContext* context, TupleRow* row); - static StringVal get_string_val(Expr* expr, ExprContext* context, TupleRow* row); - static DateTimeVal get_datetime_val(Expr* expr, ExprContext* context, TupleRow* row); - static CollectionVal get_array_val(Expr* expr, ExprContext* context, TupleRow* row); - static DecimalV2Val get_decimalv2_val(Expr* expr, ExprContext* context, TupleRow* row); - - /// Creates an expression tree rooted at 'root' via depth-first traversal. - /// Called recursively to create children expr trees for sub-expressions. - /// - /// parameters: - /// nodes: vector of thrift expression nodes to be unpacked. - /// It is essentially an Expr tree encoded in a depth-first manner. - /// pool: Object pool in which Expr created from nodes are stored. - /// root: root of the new tree. Created and initialized by the caller. - /// child_node_idx: index into 'nodes' to be unpacked. It's the root of the next child - /// child Expr tree to be added to 'root'. Updated as 'nodes' are - /// consumed to construct the tree. - /// return - /// status.ok() if successful - /// !status.ok() if tree is inconsistent or corrupt - static Status create_tree_internal(const std::vector& nodes, ObjectPool* pool, - Expr* parent, int* child_node_idx); - - /// 'fn_ctx_idx_' is the index into the FunctionContext vector in ScalarExprEvaluator - /// for storing FunctionContext needed to evaluate this ScalarExprNode. It's -1 if this - /// ScalarExpr doesn't need a FunctionContext. The FunctionContext is managed by the - /// evaluator and initialized by calling ScalarExpr::OpenEvaluator(). - int _fn_ctx_idx = -1; - - /// [fn_ctx_idx_start_, fn_ctx_idx_end_) defines the range in FunctionContext vector - /// in ScalarExpeEvaluator for the expression subtree rooted at this ScalarExpr node. - int _fn_ctx_idx_start = 0; - int _fn_ctx_idx_end = 0; -}; - -template -Status create_texpr_literal_node(const void* data, TExprNode* node, int precision = 0, - int scale = 0) { - if constexpr (T == TYPE_BOOLEAN) { - auto origin_value = reinterpret_cast(data); - TBoolLiteral boolLiteral; - (*node).__set_node_type(TExprNodeType::BOOL_LITERAL); - boolLiteral.__set_value(*origin_value); - (*node).__set_bool_literal(boolLiteral); - (*node).__set_type(create_type_desc(PrimitiveType::TYPE_BOOLEAN)); - } else if constexpr (T == TYPE_TINYINT) { - auto origin_value = reinterpret_cast(data); - (*node).__set_node_type(TExprNodeType::INT_LITERAL); - TIntLiteral intLiteral; - intLiteral.__set_value(*origin_value); - (*node).__set_int_literal(intLiteral); - (*node).__set_type(create_type_desc(PrimitiveType::TYPE_TINYINT)); - } else if constexpr (T == TYPE_SMALLINT) { - auto origin_value = reinterpret_cast(data); - (*node).__set_node_type(TExprNodeType::INT_LITERAL); - TIntLiteral intLiteral; - intLiteral.__set_value(*origin_value); - (*node).__set_int_literal(intLiteral); - (*node).__set_type(create_type_desc(PrimitiveType::TYPE_SMALLINT)); - } else if constexpr (T == TYPE_INT) { - auto origin_value = reinterpret_cast(data); - (*node).__set_node_type(TExprNodeType::INT_LITERAL); - TIntLiteral intLiteral; - intLiteral.__set_value(*origin_value); - (*node).__set_int_literal(intLiteral); - (*node).__set_type(create_type_desc(PrimitiveType::TYPE_INT)); - } else if constexpr (T == TYPE_BIGINT) { - auto origin_value = reinterpret_cast(data); - (*node).__set_node_type(TExprNodeType::INT_LITERAL); - TIntLiteral intLiteral; - intLiteral.__set_value(*origin_value); - (*node).__set_int_literal(intLiteral); - (*node).__set_type(create_type_desc(PrimitiveType::TYPE_BIGINT)); - } else if constexpr (T == TYPE_LARGEINT) { - auto origin_value = reinterpret_cast(data); - (*node).__set_node_type(TExprNodeType::LARGE_INT_LITERAL); - TLargeIntLiteral large_int_literal; - large_int_literal.__set_value(LargeIntValue::to_string(*origin_value)); - (*node).__set_large_int_literal(large_int_literal); - (*node).__set_type(create_type_desc(PrimitiveType::TYPE_LARGEINT)); - } else if constexpr ((T == TYPE_DATE) || (T == TYPE_DATETIME) || (T == TYPE_TIME)) { - auto origin_value = reinterpret_cast(data); - TDateLiteral date_literal; - char convert_buffer[30]; - origin_value->to_string(convert_buffer); - date_literal.__set_value(convert_buffer); - (*node).__set_date_literal(date_literal); - (*node).__set_node_type(TExprNodeType::DATE_LITERAL); - if (origin_value->type() == TimeType::TIME_DATE) { - (*node).__set_type(create_type_desc(PrimitiveType::TYPE_DATE)); - } else if (origin_value->type() == TimeType::TIME_DATETIME) { - (*node).__set_type(create_type_desc(PrimitiveType::TYPE_DATETIME)); - } else if (origin_value->type() == TimeType::TIME_TIME) { - (*node).__set_type(create_type_desc(PrimitiveType::TYPE_TIME)); - } - } else if constexpr (T == TYPE_DATEV2) { - auto origin_value = reinterpret_cast< - const doris::vectorized::DateV2Value*>(data); - TDateLiteral date_literal; - char convert_buffer[30]; - origin_value->to_string(convert_buffer); - date_literal.__set_value(convert_buffer); - (*node).__set_date_literal(date_literal); - (*node).__set_node_type(TExprNodeType::DATE_LITERAL); - (*node).__set_type(create_type_desc(PrimitiveType::TYPE_DATEV2)); - } else if constexpr (T == TYPE_DATETIMEV2) { - auto origin_value = reinterpret_cast< - const doris::vectorized::DateV2Value*>( - data); - TDateLiteral date_literal; - char convert_buffer[30]; - origin_value->to_string(convert_buffer); - date_literal.__set_value(convert_buffer); - (*node).__set_date_literal(date_literal); - (*node).__set_node_type(TExprNodeType::DATE_LITERAL); - (*node).__set_type(create_type_desc(PrimitiveType::TYPE_DATETIMEV2)); - } else if constexpr (T == TYPE_DECIMALV2) { - auto origin_value = reinterpret_cast(data); - (*node).__set_node_type(TExprNodeType::DECIMAL_LITERAL); - TDecimalLiteral decimal_literal; - decimal_literal.__set_value(origin_value->to_string()); - (*node).__set_decimal_literal(decimal_literal); - (*node).__set_type(create_type_desc(PrimitiveType::TYPE_DECIMALV2, precision, scale)); - } else if constexpr (T == TYPE_DECIMAL32) { - auto origin_value = reinterpret_cast(data); - (*node).__set_node_type(TExprNodeType::DECIMAL_LITERAL); - TDecimalLiteral decimal_literal; - std::stringstream ss; - vectorized::write_text(*origin_value, scale, ss); - decimal_literal.__set_value(ss.str()); - (*node).__set_decimal_literal(decimal_literal); - (*node).__set_type(create_type_desc(PrimitiveType::TYPE_DECIMAL32, precision, scale)); - } else if constexpr (T == TYPE_DECIMAL64) { - auto origin_value = reinterpret_cast(data); - (*node).__set_node_type(TExprNodeType::DECIMAL_LITERAL); - TDecimalLiteral decimal_literal; - std::stringstream ss; - vectorized::write_text(*origin_value, scale, ss); - decimal_literal.__set_value(ss.str()); - (*node).__set_decimal_literal(decimal_literal); - (*node).__set_type(create_type_desc(PrimitiveType::TYPE_DECIMAL64, precision, scale)); - } else if constexpr (T == TYPE_DECIMAL128I) { - auto origin_value = reinterpret_cast(data); - (*node).__set_node_type(TExprNodeType::DECIMAL_LITERAL); - TDecimalLiteral decimal_literal; - std::stringstream ss; - vectorized::write_text(*origin_value, scale, ss); - decimal_literal.__set_value(ss.str()); - (*node).__set_decimal_literal(decimal_literal); - (*node).__set_type(create_type_desc(PrimitiveType::TYPE_DECIMAL128I, precision, scale)); - } else if constexpr (T == TYPE_FLOAT) { - auto origin_value = reinterpret_cast(data); - (*node).__set_node_type(TExprNodeType::FLOAT_LITERAL); - TFloatLiteral float_literal; - float_literal.__set_value(*origin_value); - (*node).__set_float_literal(float_literal); - (*node).__set_type(create_type_desc(PrimitiveType::TYPE_FLOAT)); - } else if constexpr (T == TYPE_DOUBLE) { - auto origin_value = reinterpret_cast(data); - (*node).__set_node_type(TExprNodeType::FLOAT_LITERAL); - TFloatLiteral float_literal; - float_literal.__set_value(*origin_value); - (*node).__set_float_literal(float_literal); - (*node).__set_type(create_type_desc(PrimitiveType::TYPE_DOUBLE)); - } else if constexpr ((T == TYPE_STRING) || (T == TYPE_CHAR) || (T == TYPE_VARCHAR)) { - auto origin_value = reinterpret_cast(data); - (*node).__set_node_type(TExprNodeType::STRING_LITERAL); - TStringLiteral string_literal; - string_literal.__set_value(origin_value->to_string()); - (*node).__set_string_literal(string_literal); - (*node).__set_type(create_type_desc(PrimitiveType::TYPE_STRING)); - } else { - return Status::InvalidArgument("Invalid argument type!"); - } - return Status::OK(); -} - -} // namespace doris diff --git a/be/src/exprs/expr_context.cpp b/be/src/exprs/expr_context.cpp deleted file mode 100644 index 99aec19935..0000000000 --- a/be/src/exprs/expr_context.cpp +++ /dev/null @@ -1,455 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. -// This file is copied from -// https://github.com/apache/impala/blob/branch-2.9.0/be/src/exprs/expr-context.cc -// and modified by Doris - -#include "exprs/expr_context.h" - -#include - -#include - -#include "exprs/anyval_util.h" -#include "exprs/expr.h" -#include "exprs/slot_ref.h" -#include "runtime/mem_pool.h" -#include "runtime/raw_value.h" -#include "runtime/runtime_state.h" -#include "runtime/thread_context.h" -#include "udf/udf_internal.h" - -namespace doris { - -ExprContext::ExprContext(Expr* root) - : _root(root), _is_clone(false), _prepared(false), _opened(false), _closed(false) {} - -ExprContext::~ExprContext() { - DCHECK(!_prepared || _closed); - for (int i = 0; i < _fn_contexts.size(); ++i) { - delete _fn_contexts[i]; - } -} - -Status ExprContext::prepare(RuntimeState* state, const RowDescriptor& row_desc) { - DCHECK(!_prepared); - DCHECK(_pool.get() == nullptr); - _prepared = true; - _pool.reset(new MemPool()); - return _root->prepare(state, row_desc, this); -} - -Status ExprContext::open(RuntimeState* state) { - DCHECK(_prepared); - if (_opened) { - return Status::OK(); - } - _opened = true; - // Fragment-local state is only initialized for original contexts. Clones inherit the - // original's fragment state and only need to have thread-local state initialized. - FunctionContext::FunctionStateScope scope = - _is_clone ? FunctionContext::THREAD_LOCAL : FunctionContext::FRAGMENT_LOCAL; - return _root->open(state, this, scope); -} - -// TODO chenhao , replace ExprContext with ScalarExprEvaluator -Status ExprContext::open(std::vector evals, RuntimeState* state) { - for (int i = 0; i < evals.size(); ++i) { - RETURN_IF_ERROR(evals[i]->open(state)); - } - return Status::OK(); -} - -void ExprContext::close(RuntimeState* state) { - DCHECK(!_closed); - FunctionContext::FunctionStateScope scope = - _is_clone ? FunctionContext::THREAD_LOCAL : FunctionContext::FRAGMENT_LOCAL; - _root->close(state, this, scope); - - for (int i = 0; i < _fn_contexts.size(); ++i) { - _fn_contexts[i]->impl()->close(); - } - // _pool can be nullptr if Prepare() was never called - if (_pool != nullptr) { - _pool->free_all(); - } - _closed = true; -} - -int ExprContext::register_func(RuntimeState* state, - const doris_udf::FunctionContext::TypeDesc& return_type, - const std::vector& arg_types, - int varargs_buffer_size) { - _fn_contexts.push_back(FunctionContextImpl::create_context( - state, _pool.get(), return_type, arg_types, varargs_buffer_size, false)); - return _fn_contexts.size() - 1; -} - -Status ExprContext::clone(RuntimeState* state, ExprContext** new_ctx) { - DCHECK(_prepared); - DCHECK(_opened); - DCHECK(*new_ctx == nullptr); - - *new_ctx = state->obj_pool()->add(new ExprContext(_root)); - (*new_ctx)->_pool.reset(new MemPool()); - for (int i = 0; i < _fn_contexts.size(); ++i) { - (*new_ctx)->_fn_contexts.push_back(_fn_contexts[i]->impl()->clone((*new_ctx)->_pool.get())); - } - - (*new_ctx)->_is_clone = true; - (*new_ctx)->_prepared = true; - (*new_ctx)->_opened = true; - - return _root->open(state, *new_ctx, FunctionContext::THREAD_LOCAL); -} - -Status ExprContext::clone(RuntimeState* state, ExprContext** new_ctx, Expr* root) { - DCHECK(_prepared); - DCHECK(_opened); - DCHECK(*new_ctx == nullptr); - - *new_ctx = state->obj_pool()->add(new ExprContext(root)); - (*new_ctx)->_pool.reset(new MemPool()); - for (int i = 0; i < _fn_contexts.size(); ++i) { - (*new_ctx)->_fn_contexts.push_back(_fn_contexts[i]->impl()->clone((*new_ctx)->_pool.get())); - } - - (*new_ctx)->_is_clone = true; - (*new_ctx)->_prepared = true; - (*new_ctx)->_opened = true; - - return root->open(state, *new_ctx, FunctionContext::THREAD_LOCAL); -} - -void ExprContext::free_local_allocations() { - free_local_allocations(_fn_contexts); -} - -void ExprContext::free_local_allocations(const std::vector& ctxs) { - for (int i = 0; i < ctxs.size(); ++i) { - ctxs[i]->free_local_allocations(); - } -} - -void ExprContext::free_local_allocations(const std::vector& fn_ctxs) { - for (int i = 0; i < fn_ctxs.size(); ++i) { - if (fn_ctxs[i]->impl()->closed()) { - continue; - } - fn_ctxs[i]->impl()->free_local_allocations(); - } -} - -bool ExprContext::is_nullable() { - if (_root->is_slotref()) { - return SlotRef::is_nullable(_root); - } - return false; -} - -void* ExprContext::get_value(Expr* e, TupleRow* row, int precision, int scale) { - switch (e->_type.type) { - case TYPE_NULL: { - return nullptr; - } - case TYPE_BOOLEAN: { - doris_udf::BooleanVal v = e->get_boolean_val(this, row); - if (v.is_null) { - return nullptr; - } - _result.bool_val = v.val; - return &_result.bool_val; - } - case TYPE_TINYINT: { - doris_udf::TinyIntVal v = e->get_tiny_int_val(this, row); - if (v.is_null) { - return nullptr; - } - _result.tinyint_val = v.val; - return &_result.tinyint_val; - } - case TYPE_SMALLINT: { - doris_udf::SmallIntVal v = e->get_small_int_val(this, row); - if (v.is_null) { - return nullptr; - } - _result.smallint_val = v.val; - return &_result.smallint_val; - } - case TYPE_INT: { - doris_udf::IntVal v = e->get_int_val(this, row); - if (v.is_null) { - return nullptr; - } - _result.int_val = v.val; - return &_result.int_val; - } - case TYPE_BIGINT: { - doris_udf::BigIntVal v = e->get_big_int_val(this, row); - if (v.is_null) { - return nullptr; - } - _result.bigint_val = v.val; - return &_result.bigint_val; - } - case TYPE_LARGEINT: { - doris_udf::LargeIntVal v = e->get_large_int_val(this, row); - if (v.is_null) { - return nullptr; - } - _result.large_int_val = v.val; - return &_result.large_int_val; - } - case TYPE_FLOAT: { - doris_udf::FloatVal v = e->get_float_val(this, row); - if (v.is_null) { - return nullptr; - } - _result.float_val = v.val; - return &_result.float_val; - } - case TYPE_TIME: - case TYPE_TIMEV2: - case TYPE_DOUBLE: { - doris_udf::DoubleVal v = e->get_double_val(this, row); - if (v.is_null) { - return nullptr; - } - _result.double_val = v.val; - return &_result.double_val; - } - case TYPE_CHAR: - case TYPE_VARCHAR: - case TYPE_HLL: - case TYPE_OBJECT: - case TYPE_QUANTILE_STATE: - case TYPE_STRING: { - doris_udf::StringVal v = e->get_string_val(this, row); - if (v.is_null) { - return nullptr; - } - _result.string_val.data = reinterpret_cast(v.ptr); - _result.string_val.size = v.len; - return &_result.string_val; - } - case TYPE_DATE: - case TYPE_DATETIME: { - doris_udf::DateTimeVal v = e->get_datetime_val(this, row); - if (v.is_null) { - return nullptr; - } - _result.datetime_val = DateTimeValue::from_datetime_val(v); - return &_result.datetime_val; - } - case TYPE_DATEV2: { - doris_udf::DateV2Val v = e->get_datev2_val(this, row); - if (v.is_null) { - return nullptr; - } - _result.datev2_val = - doris::vectorized::DateV2Value::from_datev2_val( - v); - return &_result.datev2_val; - } - case TYPE_DATETIMEV2: { - doris_udf::DateTimeV2Val v = e->get_datetimev2_val(this, row); - if (v.is_null) { - return nullptr; - } - _result.datetimev2_val = doris::vectorized::DateV2Value< - doris::vectorized::DateTimeV2ValueType>::from_datetimev2_val(v); - return &_result.datetimev2_val; - } - case TYPE_DECIMALV2: { - DecimalV2Val v = e->get_decimalv2_val(this, row); - if (v.is_null) { - return nullptr; - } - _result.decimalv2_val = DecimalV2Value::from_decimal_val(v); - return &_result.decimalv2_val; - } - case TYPE_DECIMAL32: { - doris_udf::Decimal32Val v = e->get_decimal32_val(this, row); - if (v.is_null) { - return nullptr; - } - _result.int_val = v.val; - return &_result.int_val; - } - case TYPE_DECIMAL64: { - doris_udf::Decimal64Val v = e->get_decimal64_val(this, row); - if (v.is_null) { - return nullptr; - } - _result.bigint_val = v.val; - return &_result.bigint_val; - } - case TYPE_DECIMAL128I: { - doris_udf::Decimal128Val v = e->get_decimal128_val(this, row); - if (v.is_null) { - return nullptr; - } - _result.large_int_val = v.val; - return &_result.large_int_val; - } - case TYPE_ARRAY: { - doris_udf::CollectionVal v = e->get_array_val(this, row); - if (v.is_null) { - return nullptr; - } - - _result.array_val = CollectionValue::from_collection_val(v); - return &_result.array_val; - } - default: - DCHECK(false) << "Type not implemented: " << e->_type; - return nullptr; - } -} - -void ExprContext::print_value(TupleRow* row, std::string* str) { - RawValue::print_value(get_value(row), _root->type(), _root->_output_scale, str); -} - -void ExprContext::print_value(void* value, std::string* str) { - RawValue::print_value(value, _root->type(), _root->_output_scale, str); -} - -void ExprContext::print_value(void* value, std::stringstream* stream) { - RawValue::print_value(value, _root->type(), _root->_output_scale, stream); -} - -void ExprContext::print_value(TupleRow* row, std::stringstream* stream) { - RawValue::print_value(get_value(row), _root->type(), _root->_output_scale, stream); -} - -BooleanVal ExprContext::get_boolean_val(TupleRow* row) { - return _root->get_boolean_val(this, row); -} - -TinyIntVal ExprContext::get_tiny_int_val(TupleRow* row) { - return _root->get_tiny_int_val(this, row); -} - -SmallIntVal ExprContext::get_small_int_val(TupleRow* row) { - return _root->get_small_int_val(this, row); -} - -IntVal ExprContext::get_int_val(TupleRow* row) { - return _root->get_int_val(this, row); -} - -BigIntVal ExprContext::get_big_int_val(TupleRow* row) { - return _root->get_big_int_val(this, row); -} - -FloatVal ExprContext::get_float_val(TupleRow* row) { - return _root->get_float_val(this, row); -} - -DoubleVal ExprContext::get_double_val(TupleRow* row) { - return _root->get_double_val(this, row); -} - -StringVal ExprContext::get_string_val(TupleRow* row) { - return _root->get_string_val(this, row); -} - -// TODO(zc) -// ArrayVal ExprContext::GetArrayVal(TupleRow* row) { -// return _root->GetArrayVal(this, row); -// } - -DateTimeVal ExprContext::get_datetime_val(TupleRow* row) { - return _root->get_datetime_val(this, row); -} - -DateV2Val ExprContext::get_datev2_val(TupleRow* row) { - return _root->get_datev2_val(this, row); -} - -DateTimeV2Val ExprContext::get_datetimev2_val(TupleRow* row) { - return _root->get_datetimev2_val(this, row); -} - -DecimalV2Val ExprContext::get_decimalv2_val(TupleRow* row) { - return _root->get_decimalv2_val(this, row); -} - -Status ExprContext::get_const_value(RuntimeState* state, Expr& expr, AnyVal** const_val) { - DCHECK(_opened); - if (!expr.is_constant()) { - *const_val = nullptr; - return Status::OK(); - } - - // A constant expression shouldn't have any SlotRefs expr in it. - DCHECK_EQ(expr.get_slot_ids(nullptr), 0); - DCHECK(_pool != nullptr); - const TypeDescriptor& result_type = expr.type(); - ObjectPool* obj_pool = state->obj_pool(); - *const_val = create_any_val(obj_pool, result_type); - if (*const_val == nullptr) { - return Status::InternalError("Could not create any val"); - } - - const void* result = ExprContext::get_value(&expr, nullptr); - AnyValUtil::set_any_val(result, result_type, *const_val); - if (result_type.is_string_type()) { - StringVal* sv = reinterpret_cast(*const_val); - if (!sv->is_null && sv->len > 0) { - // Make sure the memory is owned by this evaluator. - char* ptr_copy = reinterpret_cast(_pool->try_allocate(sv->len)); - if (ptr_copy == nullptr) { - RETURN_LIMIT_EXCEEDED(state, "Could not allocate constant string value", sv->len); - } - memcpy(ptr_copy, sv->ptr, sv->len); - sv->ptr = reinterpret_cast(ptr_copy); - } - } - return get_error(expr._fn_ctx_idx_start, expr._fn_ctx_idx_end); -} - -Status ExprContext::get_error(int start_idx, int end_idx) const { - DCHECK(_opened); - end_idx = end_idx == -1 ? _fn_contexts.size() : end_idx; - DCHECK_GE(start_idx, 0); - DCHECK_LE(end_idx, _fn_contexts.size()); - for (int idx = start_idx; idx < end_idx; ++idx) { - DCHECK_LT(idx, _fn_contexts.size()); - FunctionContext* fn_ctx = _fn_contexts[idx]; - if (fn_ctx->has_error()) return Status::InternalError(fn_ctx->error_msg()); - } - return Status::OK(); -} - -std::string ExprContext::get_error_msg() const { - for (auto fn_ctx : _fn_contexts) { - if (fn_ctx->has_error()) { - return std::string(fn_ctx->error_msg()); - } - } - return ""; -} - -void ExprContext::clear_error_msg() { - for (auto fn_ctx : _fn_contexts) { - fn_ctx->clear_error_msg(); - } -} - -} // namespace doris diff --git a/be/src/exprs/expr_context.h b/be/src/exprs/expr_context.h deleted file mode 100644 index a76b342da6..0000000000 --- a/be/src/exprs/expr_context.h +++ /dev/null @@ -1,197 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. -// This file is copied from -// https://github.com/apache/impala/blob/branch-2.9.0/be/src/exprs/expr-context.h -// and modified by Doris - -#pragma once - -#include - -#include "common/status.h" -#include "exprs/expr.h" -#include "exprs/expr_value.h" -#include "exprs/slot_ref.h" -#include "udf/udf.h" - -#undef USING_DORIS_UDF -#define USING_DORIS_UDF using namespace doris_udf - -USING_DORIS_UDF; - -namespace doris { - -class Expr; -class MemPool; -class RuntimeState; -class RowDescriptor; -class TColumnValue; -class TupleRow; - -/// An ExprContext contains the state for the execution of a tree of Exprs, in particular -/// the FunctionContexts necessary for the expr tree. This allows for multi-threaded -/// expression evaluation, as a given tree can be evaluated using multiple ExprContexts -/// concurrently. A single ExprContext is not thread-safe. -class ExprContext { -public: - ExprContext(Expr* root); - ~ExprContext(); - - /// Prepare expr tree for evaluation. - Status prepare(RuntimeState* state, const RowDescriptor& row_desc); - - /// Must be called after calling Prepare(). Does not need to be called on clones. - /// Idempotent (this allows exprs to be opened multiple times in subplans without - /// reinitializing function state). - Status open(RuntimeState* state); - - //TODO chenhao - static Status open(std::vector input_evals, RuntimeState* state); - - /// Creates a copy of this ExprContext. Open() must be called first. The copy contains - /// clones of each FunctionContext, which share the fragment-local state of the - /// originals but have their own MemPool and thread-local state. Clone() should be used - /// to create an ExprContext for each execution thread that needs to evaluate - /// 'root'. Note that clones are already opened. '*new_context' must be initialized by - /// the caller to nullptr. - Status clone(RuntimeState* state, ExprContext** new_context); - - Status clone(RuntimeState* state, ExprContext** new_ctx, Expr* root); - - /// Closes all FunctionContexts. Must be called on every ExprContext, including clones. - void close(RuntimeState* state); - - /// Calls the appropriate Get*Val() function on this context's expr tree and stores the - /// result in result_. - void* get_value(TupleRow* row); - - /// Convenience functions: print value into 'str' or 'stream'. nullptr turns into "NULL". - void print_value(TupleRow* row, std::string* str); - void print_value(void* value, std::string* str); - void print_value(void* value, std::stringstream* stream); - void print_value(TupleRow* row, std::stringstream* stream); - - /// Creates a FunctionContext, and returns the index that's passed to fn_context() to - /// retrieve the created context. Exprs that need a FunctionContext should call this in - /// Prepare() and save the returned index. 'varargs_buffer_size', if specified, is the - /// size of the varargs buffer in the created FunctionContext (see udf-internal.h). - int register_func(RuntimeState* state, const FunctionContext::TypeDesc& return_type, - const std::vector& arg_types, - int varargs_buffer_size); - - /// Retrieves a registered FunctionContext. 'i' is the index returned by the call to - /// register_func(). This should only be called by Exprs. - FunctionContext* fn_context(int i) { - DCHECK_GE(i, 0); - DCHECK_LT(i, _fn_contexts.size()); - return _fn_contexts[i]; - } - - Expr* root() { return _root; } - - bool closed() { return _closed; } - - bool is_nullable(); - - /// Calls Get*Val on _root - BooleanVal get_boolean_val(TupleRow* row); - TinyIntVal get_tiny_int_val(TupleRow* row); - SmallIntVal get_small_int_val(TupleRow* row); - IntVal get_int_val(TupleRow* row); - BigIntVal get_big_int_val(TupleRow* row); - FloatVal get_float_val(TupleRow* row); - DoubleVal get_double_val(TupleRow* row); - StringVal get_string_val(TupleRow* row); - // TODO(zc): - // ArrayVal GetArrayVal(TupleRow* row); - DateTimeVal get_datetime_val(TupleRow* row); - DateV2Val get_datev2_val(TupleRow* row); - DateTimeV2Val get_datetimev2_val(TupleRow* row); - DecimalV2Val get_decimalv2_val(TupleRow* row); - - /// Frees all local allocations made by fn_contexts_. This can be called when result - /// data from this context is no longer needed. - void free_local_allocations(); - static void free_local_allocations(const std::vector& ctxs); - static void free_local_allocations(const std::vector& ctxs); - - bool opened() { return _opened; } - - /// If 'expr' is constant, evaluates it with no input row argument and returns the - /// result in 'const_val'. Sets 'const_val' to nullptr if the argument is not constant. - /// The returned AnyVal and associated varlen data is owned by this evaluator. This - /// should only be called after Open() has been called on this expr. Returns an error - /// if there was an error evaluating the expression or if memory could not be allocated - /// for the expression result. - Status get_const_value(RuntimeState* state, Expr& expr, AnyVal** const_val); - - /// Returns an error status if there was any error in evaluating the expression - /// or its sub-expressions. 'start_idx' and 'end_idx' correspond to the range - /// within the vector of FunctionContext for the sub-expressions of interest. - /// The default parameters correspond to the entire expr 'root_'. - Status get_error(int start_idx, int end_idx) const; - - std::string get_error_msg() const; - - // when you reused this expr context, you maybe need clear the error status and message. - void clear_error_msg(); - -private: - friend class Expr; - friend class ScalarFnCall; - friend class RPCFn; - friend class InPredicate; - friend class RuntimePredicateWrapper; - friend class BloomFilterPredicate; - friend class EsPredicate; - friend class RowGroupReader; - - /// FunctionContexts for each registered expression. The FunctionContexts are created - /// and owned by this ExprContext. - std::vector _fn_contexts; - - /// Pool backing fn_contexts_. - std::unique_ptr _pool; - - /// The expr tree this context is for. - Expr* _root; - - /// Stores the result of the root expr. This is used in interpreted code when we need a - /// void*. - ExprValue _result; - - /// True if this context came from a Clone() call. Used to manage FunctionStateScope. - bool _is_clone; - - /// Variables keeping track of current state. - bool _prepared; - bool _opened; - bool _closed; - - /// Calls the appropriate Get*Val() function on 'e' and stores the result in result_. - /// This is used by Exprs to call GetValue() on a child expr, rather than root_. - void* get_value(Expr* e, TupleRow* row, int precision = 0, int scale = 0); -}; - -inline void* ExprContext::get_value(TupleRow* row) { - if (_root->is_slotref()) { - return SlotRef::get_value(_root, row); - } - return get_value(_root, row); -} - -} // namespace doris diff --git a/be/src/exprs/expr_value.h b/be/src/exprs/expr_value.h deleted file mode 100644 index a3cc441675..0000000000 --- a/be/src/exprs/expr_value.h +++ /dev/null @@ -1,245 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. -// This file is copied from -// https://github.com/apache/impala/blob/branch-2.9.0/be/src/exprs/expr-value.h -// and modified by Doris - -#pragma once - -#include "runtime/collection_value.h" -#include "runtime/datetime_value.h" -#include "runtime/decimalv2_value.h" -#include "runtime/types.h" -#include "vec/common/string_ref.h" - -namespace doris { - -// The materialized value returned by Expr::get_value(). -// Some exprs may set multiple fields of this value at once -// for maintaining state across evaluations. -// For example, the rand() math function uses double_val as its return value, -// and int_val as the state for the random number generator. -struct ExprValue { - bool bool_val; - int8_t tinyint_val; - int16_t smallint_val; - int32_t int_val; - int64_t bigint_val; - __int128 large_int_val; - float float_val; - double double_val; - std::string string_data; - StringRef string_val; - DateTimeValue datetime_val; - doris::vectorized::DateV2Value datev2_val; - doris::vectorized::DateV2Value datetimev2_val; - DecimalV2Value decimalv2_val; - CollectionValue array_val; - - ExprValue() - : bool_val(false), - tinyint_val(0), - smallint_val(0), - int_val(0), - bigint_val(0), - large_int_val(0), - float_val(0.0), - double_val(0.0), - string_data(), - string_val(), - datetime_val(), - decimalv2_val(0), - array_val() {} - - ExprValue(bool v) : bool_val(v) {} - ExprValue(int8_t v) : tinyint_val(v) {} - ExprValue(int16_t v) : smallint_val(v) {} - ExprValue(int32_t v) : int_val(v) {} - ExprValue(int64_t v) : bigint_val(v) {} - ExprValue(__int128 value) : large_int_val(value) {} - ExprValue(float v) : float_val(v) {} - ExprValue(double v) : double_val(v) {} - ExprValue(int64_t i, int32_t f) : decimalv2_val(i, f) {} - - // c'tor for string values - ExprValue(const std::string& str) : string_data(str), string_val(string_data) {} - - // Set string value to copy of str - void set_string_val(const StringRef& str) { - string_data = std::string(str.data, str.size); - sync_string_val(); - } - - void set_string_val(const std::string& str) { - string_data = str; - sync_string_val(); - } - - // Updates string_val ptr / len pair to reflect any changes in - // string_data. If not called after mutating string_data, - // string_val->ptr may point at garbage. - void sync_string_val() { - string_val.data = string_data.data(); - string_val.size = string_data.size(); - } - - // Sets the value for type to '0' and returns a pointer to the data - void* set_to_zero(const TypeDescriptor& type) { - switch (type.type) { - case TYPE_NULL: - return nullptr; - - case TYPE_BOOLEAN: - bool_val = false; - return &bool_val; - - case TYPE_TINYINT: - tinyint_val = 0; - return &tinyint_val; - - case TYPE_SMALLINT: - smallint_val = 0; - return &smallint_val; - - case TYPE_INT: - int_val = 0; - return &int_val; - - case TYPE_BIGINT: - bigint_val = 0; - return &bigint_val; - - case TYPE_LARGEINT: - large_int_val = 0; - return &large_int_val; - - case TYPE_FLOAT: - float_val = 0; - return &float_val; - - case TYPE_DOUBLE: - double_val = 0; - return &double_val; - - case TYPE_DECIMALV2: - decimalv2_val.set_to_zero(); - return &decimalv2_val; - - default: - DCHECK(false); - return nullptr; - } - } - - // Sets the value for type to min and returns a pointer to the data - void* set_to_min(const TypeDescriptor& type) { - switch (type.type) { - case TYPE_NULL: - return nullptr; - - case TYPE_BOOLEAN: - bool_val = false; - return &bool_val; - - case TYPE_TINYINT: - tinyint_val = std::numeric_limits::min(); - return &tinyint_val; - - case TYPE_SMALLINT: - smallint_val = std::numeric_limits::min(); - return &smallint_val; - - case TYPE_INT: - int_val = std::numeric_limits::min(); - return &int_val; - - case TYPE_BIGINT: - bigint_val = std::numeric_limits::min(); - return &bigint_val; - - case TYPE_LARGEINT: - large_int_val = std::numeric_limits::min(); - return &large_int_val; - - case TYPE_FLOAT: - float_val = std::numeric_limits::lowest(); - return &float_val; - - case TYPE_DOUBLE: - double_val = std::numeric_limits::lowest(); - return &double_val; - - case TYPE_DECIMALV2: - decimalv2_val = DecimalV2Value::get_min_decimal(); - return &decimalv2_val; - - default: - DCHECK(false); - return nullptr; - } - } - - // Sets the value for type to max and returns a pointer to the data - void* set_to_max(const TypeDescriptor& type) { - switch (type.type) { - case TYPE_NULL: - return nullptr; - - case TYPE_BOOLEAN: - bool_val = true; - return &bool_val; - - case TYPE_TINYINT: - tinyint_val = std::numeric_limits::max(); - return &tinyint_val; - - case TYPE_SMALLINT: - smallint_val = std::numeric_limits::max(); - return &smallint_val; - - case TYPE_INT: - int_val = std::numeric_limits::max(); - return &int_val; - - case TYPE_BIGINT: - bigint_val = std::numeric_limits::max(); - return &bigint_val; - - case TYPE_LARGEINT: - large_int_val = std::numeric_limits::max(); - return &large_int_val; - - case TYPE_FLOAT: - float_val = std::numeric_limits::max(); - return &float_val; - - case TYPE_DOUBLE: - double_val = std::numeric_limits::max(); - return &double_val; - - case TYPE_DECIMALV2: - decimalv2_val = DecimalV2Value::get_max_decimal(); - return &decimalv2_val; - - default: - DCHECK(false); - return nullptr; - } - } -}; - -} // namespace doris diff --git a/be/src/exprs/grouping_sets_functions.cpp b/be/src/exprs/grouping_sets_functions.cpp deleted file mode 100644 index e182ddf0cd..0000000000 --- a/be/src/exprs/grouping_sets_functions.cpp +++ /dev/null @@ -1,34 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -#include "exprs/grouping_sets_functions.h" - -namespace doris { - -void GroupingSetsFunctions::init() {} - -doris_udf::BigIntVal GroupingSetsFunctions::grouping_id(doris_udf::FunctionContext* ctx, - const doris_udf::BigIntVal& grouping_id) { - return grouping_id; -} - -BigIntVal GroupingSetsFunctions::grouping(doris_udf::FunctionContext* ctx, - const doris_udf::BigIntVal& grouping) { - return grouping; -} - -} // namespace doris diff --git a/be/src/exprs/grouping_sets_functions.h b/be/src/exprs/grouping_sets_functions.h deleted file mode 100644 index 524692bc50..0000000000 --- a/be/src/exprs/grouping_sets_functions.h +++ /dev/null @@ -1,34 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -#pragma once - -#include "udf/udf.h" - -namespace doris { - -class GroupingSetsFunctions { -public: - static void init(); - - static doris_udf::BigIntVal grouping_id(doris_udf::FunctionContext* ctx, - const doris_udf::BigIntVal& grouping_id); - static doris_udf::BigIntVal grouping(doris_udf::FunctionContext* ctx, - const doris_udf::BigIntVal& grouping); -}; - -} // namespace doris diff --git a/be/src/exprs/hll_function.cpp b/be/src/exprs/hll_function.cpp deleted file mode 100644 index 6fb45ec0ec..0000000000 --- a/be/src/exprs/hll_function.cpp +++ /dev/null @@ -1,131 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -#include "exprs/hll_function.h" - -#include "exprs/anyval_util.h" -#include "olap/hll.h" -#include "util/hash_util.hpp" -#include "util/slice.h" - -namespace doris { - -using doris_udf::BigIntVal; -using doris_udf::StringVal; - -void HllFunctions::init() {} - -StringVal HllFunctions::hll_hash(FunctionContext* ctx, const StringVal& input) { - return AnyValUtil::from_string_temp(ctx, hll_hash(input)); -} - -std::string HllFunctions::hll_hash(const StringVal& input) { - HyperLogLog hll; - if (!input.is_null) { - uint64_t hash_value = HashUtil::murmur_hash64A(input.ptr, input.len, HashUtil::MURMUR_SEED); - hll.update(hash_value); - } - std::string buf; - buf.resize(hll.max_serialized_size()); - buf.resize(hll.serialize((uint8_t*)buf.c_str())); - - return buf; -} - -void HllFunctions::hll_init(FunctionContext*, StringVal* dst) { - dst->is_null = false; - dst->len = sizeof(HyperLogLog); - dst->ptr = (uint8_t*)new HyperLogLog(); -} - -StringVal HllFunctions::hll_empty(FunctionContext* ctx) { - return AnyValUtil::from_string_temp(ctx, HyperLogLog::empty()); -} - -template -void HllFunctions::hll_update(FunctionContext*, const T& src, StringVal* dst) { - if (src.is_null) { - return; - } - - uint64_t hash_value = AnyValUtil::hash64_murmur(src, HashUtil::MURMUR_SEED); - if (hash_value != 0) { - auto* dst_hll = reinterpret_cast(dst->ptr); - dst_hll->update(hash_value); - } -} - -void HllFunctions::hll_merge(FunctionContext*, const StringVal& src, StringVal* dst) { - if (src.is_null) { - return; - } - auto* dst_hll = reinterpret_cast(dst->ptr); - // zero size means the src input is a agg object - if (src.len == 0) { - dst_hll->merge(*reinterpret_cast(src.ptr)); - } else { - dst_hll->merge(HyperLogLog(Slice(src.ptr, src.len))); - } -} - -BigIntVal HllFunctions::hll_finalize(FunctionContext*, const StringVal& src) { - auto* src_hll = reinterpret_cast(src.ptr); - BigIntVal result(src_hll->estimate_cardinality()); - delete src_hll; - return result; -} - -BigIntVal HllFunctions::hll_get_value(FunctionContext*, const StringVal& src) { - if (src.is_null) { - return BigIntVal::null(); - } - auto* src_hll = reinterpret_cast(src.ptr); - BigIntVal result(src_hll->estimate_cardinality()); - return result; -} - -BigIntVal HllFunctions::hll_cardinality(FunctionContext* ctx, const StringVal& input) { - if (input.is_null) { - return BigIntVal(); - } - StringVal dst; - hll_init(ctx, &dst); - hll_merge(ctx, input, &dst); - return hll_finalize(ctx, dst); -} - -StringVal HllFunctions::hll_serialize(FunctionContext* ctx, const StringVal& src) { - auto* src_hll = reinterpret_cast(src.ptr); - StringVal result(ctx, src_hll->max_serialized_size()); - int size = src_hll->serialize((uint8_t*)result.ptr); - result.resize(ctx, size); - delete src_hll; - return result; -} - -template void HllFunctions::hll_update(FunctionContext*, const BooleanVal&, StringVal*); -template void HllFunctions::hll_update(FunctionContext*, const TinyIntVal&, StringVal*); -template void HllFunctions::hll_update(FunctionContext*, const SmallIntVal&, StringVal*); -template void HllFunctions::hll_update(FunctionContext*, const IntVal&, StringVal*); -template void HllFunctions::hll_update(FunctionContext*, const BigIntVal&, StringVal*); -template void HllFunctions::hll_update(FunctionContext*, const FloatVal&, StringVal*); -template void HllFunctions::hll_update(FunctionContext*, const DoubleVal&, StringVal*); -template void HllFunctions::hll_update(FunctionContext*, const StringVal&, StringVal*); -template void HllFunctions::hll_update(FunctionContext*, const DateTimeVal&, StringVal*); -template void HllFunctions::hll_update(FunctionContext*, const LargeIntVal&, StringVal*); -template void HllFunctions::hll_update(FunctionContext*, const DecimalV2Val&, StringVal*); -} // namespace doris diff --git a/be/src/exprs/hll_function.h b/be/src/exprs/hll_function.h deleted file mode 100644 index 9cce923452..0000000000 --- a/be/src/exprs/hll_function.h +++ /dev/null @@ -1,50 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -#pragma once - -#include - -#include "udf/udf.h" - -namespace doris { - -class HllFunctions { -public: - static void init(); - static StringVal hll_hash(FunctionContext* ctx, const StringVal& dest_base); - static std::string hll_hash(const StringVal& dest_base); - - static StringVal hll_empty(FunctionContext* ctx); - static void hll_init(FunctionContext*, StringVal* dst); - - template - static void hll_update(FunctionContext*, const T& src, StringVal* dst); - - static void hll_merge(FunctionContext*, const StringVal& src, StringVal* dst); - - static BigIntVal hll_finalize(FunctionContext*, const StringVal& src); - - // Get the hll cardinality, the difference from hll_finalize method is - // hll_get_value method doesn't free memory, this function is used in analytic get_value function - static BigIntVal hll_get_value(FunctionContext*, const StringVal& src); - - static StringVal hll_serialize(FunctionContext* ctx, const StringVal& src); - - static BigIntVal hll_cardinality(FunctionContext* ctx, const StringVal& src); -}; -} // namespace doris diff --git a/be/src/exprs/hll_hash_function.cpp b/be/src/exprs/hll_hash_function.cpp deleted file mode 100644 index 06247435ed..0000000000 --- a/be/src/exprs/hll_hash_function.cpp +++ /dev/null @@ -1,54 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -#include "exprs/hll_hash_function.h" - -#include "exprs/aggregate_functions.h" -#include "exprs/anyval_util.h" -#include "olap/hll.h" -#include "util/hash_util.hpp" - -namespace doris { - -using doris_udf::BigIntVal; -using doris_udf::StringVal; - -void HllHashFunctions::init() {} - -StringVal HllHashFunctions::hll_hash(FunctionContext* ctx, const StringVal& input) { - HyperLogLog hll; - if (!input.is_null) { - uint64_t hash_value = HashUtil::murmur_hash64A(input.ptr, input.len, HashUtil::MURMUR_SEED); - hll.update(hash_value); - } - std::string buf; - buf.resize(hll.max_serialized_size()); - buf.resize(hll.serialize((uint8_t*)buf.data())); - return AnyValUtil::from_string_temp(ctx, buf); -} - -BigIntVal HllHashFunctions::hll_cardinality(FunctionContext* ctx, const HllVal& input) { - if (input.is_null) { - return BigIntVal::null(); - } - HllVal dst; - AggregateFunctions::hll_union_agg_init(ctx, &dst); - AggregateFunctions::hll_union_agg_update(ctx, input, &dst); - return AggregateFunctions::hll_union_agg_finalize(ctx, dst); -} - -} // namespace doris diff --git a/be/src/exprs/hll_hash_function.h b/be/src/exprs/hll_hash_function.h deleted file mode 100644 index 8e8e8db486..0000000000 --- a/be/src/exprs/hll_hash_function.h +++ /dev/null @@ -1,35 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -#pragma once - -#include "udf/udf.h" - -namespace doris { - -class Expr; -class TupleRow; - -// todo(kks): for backward compatibility, we should remove this class -// when doris 0.12 release -class HllHashFunctions { -public: - static void init(); - static StringVal hll_hash(FunctionContext* ctx, const StringVal& dest_base); - static BigIntVal hll_cardinality(FunctionContext* ctx, const HllVal& dest_base); -}; -} // namespace doris diff --git a/be/src/exprs/in_predicate.cpp b/be/src/exprs/in_predicate.cpp deleted file mode 100644 index 32acb5fdeb..0000000000 --- a/be/src/exprs/in_predicate.cpp +++ /dev/null @@ -1,142 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. -// This file is copied from -// https://github.com/apache/impala/blob/branch-2.9.0/be/src/exprs/in-predicate.cpp -// and modified by Doris - -#include "exprs/in_predicate.h" - -#include "exprs/create_predicate_function.h" -#include "exprs/expr_context.h" -#include "runtime/runtime_state.h" - -namespace doris { - -InPredicate::InPredicate(const TExprNode& node) - : Predicate(node), - _is_not_in(node.in_predicate.is_not_in), - _is_prepare(false), - _null_in_set(false), - _hybrid_set() {} - -InPredicate::~InPredicate() { - if (_should_delete) { - delete _hybrid_set; - } -} - -Status InPredicate::prepare(RuntimeState* state, HybridSetBase* hset) { - if (_is_prepare) { - return Status::OK(); - } - _hybrid_set = hset; - if (nullptr == _hybrid_set) { - return Status::InternalError("Unknown column type."); - } - _is_prepare = true; - - return Status::OK(); -} - -Status InPredicate::open(RuntimeState* state, ExprContext* context, - FunctionContext::FunctionStateScope scope) { - Expr::open(state, context, scope); - - for (int i = 1; i < _children.size(); ++i) { - if (_children[0]->type().is_string_type()) { - if (!_children[i]->type().is_string_type()) { - return Status::InternalError("InPredicate type not same"); - } - } else { - if (_children[i]->type().type != _children[0]->type().type) { - return Status::InternalError("InPredicate type not same"); - } - } - - void* value = context->get_value(_children[i], nullptr); - if (value == nullptr) { - _null_in_set = true; - continue; - } - _hybrid_set->insert(value); - } - return Status::OK(); -} - -Status InPredicate::prepare(RuntimeState* state, const RowDescriptor& row_desc, - ExprContext* context) { - for (int i = 0; i < _children.size(); ++i) { - RETURN_IF_ERROR(_children[i]->prepare(state, row_desc, context)); - } - if (_is_prepare) { - return Status::OK(); - } - if (_children.size() < 1) { - return Status::InternalError("no Function operator in."); - } - - _hybrid_set = create_set(_children[0]->type().type); - if (nullptr == _hybrid_set) { - return Status::InternalError("Unknown column type."); - } - _should_delete = true; - - _is_prepare = true; - - return Status::OK(); -} - -void InPredicate::insert(void* value) { - if (nullptr == value) { - _null_in_set = true; - } else { - _hybrid_set->insert(value); - } -} - -std::string InPredicate::debug_string() const { - std::stringstream out; - out << "InPredicate(" << get_child(0)->debug_string() << " " << _is_not_in << ",["; - int num_children = get_num_children(); - - for (int i = 1; i < num_children; ++i) { - out << (i == 1 ? "" : " ") << get_child(i)->debug_string(); - } - - out << "])"; - return out.str(); -} - -// this in predicate profile for case "a IN (1, 2, 3)" -// not for "a IN (b, 2, 3)" -// a, b is a column or a expr that contain slot -BooleanVal InPredicate::get_boolean_val(ExprContext* ctx, TupleRow* row) { - void* lhs_slot = ctx->get_value(_children[0], row); - if (lhs_slot == nullptr) { - return BooleanVal::null(); - } - // if find in const set, return true - if (_hybrid_set->find(lhs_slot)) { - return BooleanVal(!_is_not_in); - } - if (_null_in_set) { - return BooleanVal::null(); - } - return BooleanVal(_is_not_in); -} - -} // namespace doris diff --git a/be/src/exprs/in_predicate.h b/be/src/exprs/in_predicate.h deleted file mode 100644 index 41e5585676..0000000000 --- a/be/src/exprs/in_predicate.h +++ /dev/null @@ -1,70 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. -// This file is copied from -// https://github.com/apache/impala/blob/branch-2.9.0/be/src/exprs/in-predicate.h -// and modified by Doris - -#pragma once - -#include "exprs/hybrid_set.h" -#include "exprs/predicate.h" - -namespace doris { - -// has two method: -// 1. construct from TExprNode -// 2. construct by new one, and push child. -class InPredicate : public Predicate { -public: - ~InPredicate() override; - Expr* clone(ObjectPool* pool) const override { return pool->add(new InPredicate(*this)); } - - Status prepare(RuntimeState* state, HybridSetBase* hset); - Status open(RuntimeState* state, ExprContext* context, - FunctionContext::FunctionStateScope scope) override; - Status prepare(RuntimeState* state, const RowDescriptor& row_desc, - ExprContext* context) override; - - BooleanVal get_boolean_val(ExprContext* context, TupleRow* row) override; - - // this function add one item in hashset, not add to children. - // if add to children, when List is long, copy is a expensive op. - void insert(void* value); - - HybridSetBase* hybrid_set() const { return _hybrid_set; } - - bool is_not_in() const { return _is_not_in; } - -protected: - friend class Expr; - friend class HashJoinNode; - friend class RuntimePredicateWrapper; - - InPredicate(const TExprNode& node); - - // virtual Status prepare(RuntimeState* state, const RowDescriptor& desc); - std::string debug_string() const override; - -private: - const bool _is_not_in; - bool _is_prepare; - bool _null_in_set; - HybridSetBase* _hybrid_set; - bool _should_delete = false; -}; - -} // namespace doris diff --git a/be/src/exprs/info_func.cpp b/be/src/exprs/info_func.cpp deleted file mode 100644 index 2277f1cc1f..0000000000 --- a/be/src/exprs/info_func.cpp +++ /dev/null @@ -1,46 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -#include "exprs/info_func.h" - -#include - -namespace doris { - -InfoFunc::InfoFunc(const TExprNode& node) - : Expr(node), _int_value(node.info_func.int_value), _str_value(node.info_func.str_value) {} - -StringVal InfoFunc::get_string_val(ExprContext* context, TupleRow*) { - return {_str_value.c_str(), static_cast(_str_value.size())}; -} - -BigIntVal InfoFunc::get_big_int_val(ExprContext* context, TupleRow*) { - return BigIntVal(_int_value); -} - -std::string InfoFunc::debug_string() const { - std::stringstream out; - out << "InfoFunc(" << Expr::debug_string() << " int_value: " << _int_value - << "; str_value: " << _str_value << ")"; - return out.str(); -} - -void* InfoFunc::compute_fn(Expr* e, TupleRow* row) { - return nullptr; -} - -} // namespace doris diff --git a/be/src/exprs/info_func.h b/be/src/exprs/info_func.h deleted file mode 100644 index ff37a6fc12..0000000000 --- a/be/src/exprs/info_func.h +++ /dev/null @@ -1,51 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -#pragma once - -#include -#include - -#include "common/object_pool.h" -#include "exprs/expr.h" -#include "gen_cpp/Exprs_types.h" - -namespace doris { - -class InfoFunc : public Expr { -public: - virtual ~InfoFunc() {} - - virtual Expr* clone(ObjectPool* pool) const override { return pool->add(new InfoFunc(*this)); } - -protected: - friend class Expr; - - InfoFunc(const TExprNode& node); - - virtual StringVal get_string_val(ExprContext* context, TupleRow*) override; - virtual BigIntVal get_big_int_val(ExprContext* context, TupleRow*) override; - - virtual std::string debug_string() const override; - -private: - static void* compute_fn(Expr* e, TupleRow* row); - int64_t _int_value; - std::string _str_value; -}; - -} // namespace doris diff --git a/be/src/exprs/is_null_predicate.cpp b/be/src/exprs/is_null_predicate.cpp deleted file mode 100644 index 3b4b5b81a8..0000000000 --- a/be/src/exprs/is_null_predicate.cpp +++ /dev/null @@ -1,67 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. -// This file is copied from -// https://github.com/apache/impala/blob/branch-2.9.0/be/src/exprs/is-null-predicate.cc -// and modified by Doris - -#include "exprs/is_null_predicate.h" - -#include "udf/udf.h" - -namespace doris { - -void IsNullPredicate::init() {} - -template -BooleanVal IsNullPredicate::is_null(FunctionContext* ctx, const T& val) { - return val.is_null; -} - -template -BooleanVal IsNullPredicate::is_not_null(FunctionContext* ctx, const T& val) { - return !val.is_null; -} - -template BooleanVal IsNullPredicate::is_null(FunctionContext*, const AnyVal&); -template BooleanVal IsNullPredicate::is_null(FunctionContext*, const BooleanVal&); -template BooleanVal IsNullPredicate::is_null(FunctionContext*, const TinyIntVal&); -template BooleanVal IsNullPredicate::is_null(FunctionContext*, const SmallIntVal&); -template BooleanVal IsNullPredicate::is_null(FunctionContext*, const IntVal&); -template BooleanVal IsNullPredicate::is_null(FunctionContext*, const BigIntVal&); -template BooleanVal IsNullPredicate::is_null(FunctionContext*, const LargeIntVal&); -template BooleanVal IsNullPredicate::is_null(FunctionContext*, const FloatVal&); -template BooleanVal IsNullPredicate::is_null(FunctionContext*, const DoubleVal&); -template BooleanVal IsNullPredicate::is_null(FunctionContext*, const StringVal&); -template BooleanVal IsNullPredicate::is_null(FunctionContext*, const DateTimeVal&); -template BooleanVal IsNullPredicate::is_null(FunctionContext*, const DecimalV2Val&); -template BooleanVal IsNullPredicate::is_null(FunctionContext*, const CollectionVal&); - -template BooleanVal IsNullPredicate::is_not_null(FunctionContext*, const AnyVal&); -template BooleanVal IsNullPredicate::is_not_null(FunctionContext*, const BooleanVal&); -template BooleanVal IsNullPredicate::is_not_null(FunctionContext*, const TinyIntVal&); -template BooleanVal IsNullPredicate::is_not_null(FunctionContext*, const SmallIntVal&); -template BooleanVal IsNullPredicate::is_not_null(FunctionContext*, const IntVal&); -template BooleanVal IsNullPredicate::is_not_null(FunctionContext*, const BigIntVal&); -template BooleanVal IsNullPredicate::is_not_null(FunctionContext*, const LargeIntVal&); -template BooleanVal IsNullPredicate::is_not_null(FunctionContext*, const FloatVal&); -template BooleanVal IsNullPredicate::is_not_null(FunctionContext*, const DoubleVal&); -template BooleanVal IsNullPredicate::is_not_null(FunctionContext*, const StringVal&); -template BooleanVal IsNullPredicate::is_not_null(FunctionContext*, const DateTimeVal&); -template BooleanVal IsNullPredicate::is_not_null(FunctionContext*, const DecimalV2Val&); -template BooleanVal IsNullPredicate::is_not_null(FunctionContext*, const CollectionVal&); - -} // namespace doris diff --git a/be/src/exprs/is_null_predicate.h b/be/src/exprs/is_null_predicate.h deleted file mode 100644 index 7a68a1ecaa..0000000000 --- a/be/src/exprs/is_null_predicate.h +++ /dev/null @@ -1,40 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. -// This file is copied from -// https://github.com/apache/impala/blob/branch-2.9.0/be/src/exprs/is-null-predicate.h -// and modified by Doris - -#pragma once - -#include - -#include "exprs/predicate.h" - -namespace doris { - -class IsNullPredicate { -public: - static void init(); - - template - static BooleanVal is_null(FunctionContext* ctx, const T& val); - - template - static BooleanVal is_not_null(FunctionContext* ctx, const T& val); -}; - -} // namespace doris diff --git a/be/src/exprs/json_functions.h b/be/src/exprs/json_functions.h index 89899271a8..61b3e8d0db 100644 --- a/be/src/exprs/json_functions.h +++ b/be/src/exprs/json_functions.h @@ -34,9 +34,7 @@ enum JsonFunctionType { JSON_FUN_UNKNOWN //The last }; -class Expr; class OpcodeRegistry; -class TupleRow; struct JsonPath { std::string key; // key of a json object diff --git a/be/src/exprs/literal.cpp b/be/src/exprs/literal.cpp deleted file mode 100644 index 15ff1be5fe..0000000000 --- a/be/src/exprs/literal.cpp +++ /dev/null @@ -1,266 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. -// This file is copied from -// https://github.com/apache/impala/blob/branch-2.9.0/be/src/exprs/literal.cc -// and modified by Doris - -#include "exprs/literal.h" - -#include - -#include "gen_cpp/Exprs_types.h" -#include "runtime/collection_value.h" -#include "runtime/large_int_value.h" -#include "runtime/runtime_state.h" -#include "util/string_parser.hpp" - -namespace doris { - -Literal::Literal(const TExprNode& node) : Expr(node) { - switch (_type.type) { - case TYPE_BOOLEAN: - DCHECK_EQ(node.node_type, TExprNodeType::BOOL_LITERAL); - DCHECK(node.__isset.bool_literal); - _value.bool_val = node.bool_literal.value; - break; - case TYPE_TINYINT: - DCHECK_EQ(node.node_type, TExprNodeType::INT_LITERAL); - DCHECK(node.__isset.int_literal); - _value.tinyint_val = node.int_literal.value; - break; - case TYPE_SMALLINT: - DCHECK_EQ(node.node_type, TExprNodeType::INT_LITERAL); - DCHECK(node.__isset.int_literal); - _value.smallint_val = node.int_literal.value; - break; - case TYPE_INT: - DCHECK_EQ(node.node_type, TExprNodeType::INT_LITERAL); - DCHECK(node.__isset.int_literal); - _value.int_val = node.int_literal.value; - break; - case TYPE_BIGINT: - DCHECK_EQ(node.node_type, TExprNodeType::INT_LITERAL); - DCHECK(node.__isset.int_literal); - _value.bigint_val = node.int_literal.value; - break; - case TYPE_LARGEINT: { - StringParser::ParseResult parse_result = StringParser::PARSE_SUCCESS; - DCHECK_EQ(node.node_type, TExprNodeType::LARGE_INT_LITERAL); - _value.large_int_val = StringParser::string_to_int<__int128>( - node.large_int_literal.value.c_str(), node.large_int_literal.value.size(), - &parse_result); - if (parse_result != StringParser::PARSE_SUCCESS) { - _value.large_int_val = MAX_INT128; - } - break; - } - case TYPE_FLOAT: - DCHECK_EQ(node.node_type, TExprNodeType::FLOAT_LITERAL); - DCHECK(node.__isset.float_literal); - _value.float_val = node.float_literal.value; - break; - case TYPE_DOUBLE: - case TYPE_TIME: - case TYPE_TIMEV2: - DCHECK_EQ(node.node_type, TExprNodeType::FLOAT_LITERAL); - DCHECK(node.__isset.float_literal); - _value.double_val = node.float_literal.value; - break; - case TYPE_DATE: - case TYPE_DATETIME: - _value.datetime_val.from_date_str(node.date_literal.value.c_str(), - node.date_literal.value.size()); - break; - case TYPE_DATEV2: - _value.datev2_val.from_date_str(node.date_literal.value.c_str(), - node.date_literal.value.size()); - break; - case TYPE_DATETIMEV2: - _value.datetimev2_val.from_date_str(node.date_literal.value.c_str(), - node.date_literal.value.size()); - break; - case TYPE_CHAR: - case TYPE_VARCHAR: - case TYPE_STRING: - DCHECK_EQ(node.node_type, TExprNodeType::STRING_LITERAL); - DCHECK(node.__isset.string_literal); - _value.set_string_val(node.string_literal.value); - break; - - case TYPE_DECIMALV2: { - DCHECK_EQ(node.node_type, TExprNodeType::DECIMAL_LITERAL); - DCHECK(node.__isset.decimal_literal); - _value.decimalv2_val = DecimalV2Value(node.decimal_literal.value); - break; - } - case TYPE_DECIMAL32: - case TYPE_DECIMAL64: - case TYPE_DECIMAL128I: { - DCHECK_EQ(node.node_type, TExprNodeType::DECIMAL_LITERAL); - DCHECK(node.__isset.decimal_literal); - _value.set_string_val(node.decimal_literal.value); - break; - } - case TYPE_ARRAY: { - DCHECK_EQ(node.node_type, TExprNodeType::ARRAY_LITERAL); - // init in prepare - break; - } - default: - DCHECK(false) << "Invalid type: " << _type.debug_string(); - break; - } -} - -Literal::~Literal() {} - -BooleanVal Literal::get_boolean_val(ExprContext* context, TupleRow* row) { - DCHECK_EQ(_type.type, TYPE_BOOLEAN) << _type; - return BooleanVal(_value.bool_val); -} - -TinyIntVal Literal::get_tiny_int_val(ExprContext* context, TupleRow* row) { - DCHECK_EQ(_type.type, TYPE_TINYINT) << _type; - return TinyIntVal(_value.tinyint_val); -} - -SmallIntVal Literal::get_small_int_val(ExprContext* context, TupleRow* row) { - DCHECK_EQ(_type.type, TYPE_SMALLINT) << _type; - return SmallIntVal(_value.smallint_val); -} - -IntVal Literal::get_int_val(ExprContext* context, TupleRow* row) { - DCHECK(_type.type == TYPE_INT) << _type; - return IntVal(_value.int_val); -} - -BigIntVal Literal::get_big_int_val(ExprContext* context, TupleRow* row) { - DCHECK(_type.type == TYPE_BIGINT) << _type; - return BigIntVal(_value.bigint_val); -} - -LargeIntVal Literal::get_large_int_val(ExprContext* context, TupleRow* row) { - DCHECK(_type.type == TYPE_LARGEINT) << _type; - return LargeIntVal(_value.large_int_val); -} - -Decimal32Val Literal::get_decimal32_val(ExprContext* context, TupleRow* row) { - DCHECK(_type.type == TYPE_DECIMAL32) << _type; - StringParser::ParseResult result; - auto decimal32_value = StringParser::string_to_decimal( - _value.string_val.data, _value.string_val.size, _type.precision, _type.scale, &result); - if (result == StringParser::ParseResult::PARSE_SUCCESS) { - return Decimal32Val(decimal32_value); - } else { - return Decimal32Val::null(); - } -} - -Decimal64Val Literal::get_decimal64_val(ExprContext* context, TupleRow* row) { - DCHECK(_type.type == TYPE_DECIMAL64) << _type; - StringParser::ParseResult result; - auto decimal_value = StringParser::string_to_decimal( - _value.string_val.data, _value.string_val.size, _type.precision, _type.scale, &result); - if (result == StringParser::ParseResult::PARSE_SUCCESS) { - return Decimal64Val(decimal_value); - } else { - return Decimal64Val::null(); - } -} - -Decimal128Val Literal::get_decimal128_val(ExprContext* context, TupleRow* row) { - DCHECK(_type.type == TYPE_DECIMAL128I) << _type; - StringParser::ParseResult result; - auto decimal_value = StringParser::string_to_decimal( - _value.string_val.data, _value.string_val.size, _type.precision, _type.scale, &result); - if (result == StringParser::ParseResult::PARSE_SUCCESS) { - return Decimal128Val(decimal_value); - } else { - return Decimal128Val::null(); - } -} - -FloatVal Literal::get_float_val(ExprContext* context, TupleRow* row) { - DCHECK_EQ(_type.type, TYPE_FLOAT) << _type; - return FloatVal(_value.float_val); -} - -DoubleVal Literal::get_double_val(ExprContext* context, TupleRow* row) { - DCHECK(_type.type == TYPE_DOUBLE || _type.type == TYPE_TIME || _type.type == TYPE_TIMEV2) - << _type; - return DoubleVal(_value.double_val); -} - -DecimalV2Val Literal::get_decimalv2_val(ExprContext* context, TupleRow* row) { - DCHECK_EQ(_type.type, TYPE_DECIMALV2) << _type; - DecimalV2Val dec_val; - _value.decimalv2_val.to_decimal_val(&dec_val); - return dec_val; -} - -DateTimeVal Literal::get_datetime_val(ExprContext* context, TupleRow* row) { - DateTimeVal dt_val; - _value.datetime_val.to_datetime_val(&dt_val); - return dt_val; -} - -DateV2Val Literal::get_datev2_val(ExprContext* context, TupleRow* row) { - DateV2Val dt_val; - _value.datev2_val.to_datev2_val(&dt_val); - return dt_val; -} - -DateTimeV2Val Literal::get_datetimev2_val(ExprContext* context, TupleRow* row) { - DateTimeV2Val dt_val; - _value.datetimev2_val.to_datetimev2_val(&dt_val); - return dt_val; -} - -StringVal Literal::get_string_val(ExprContext* context, TupleRow* row) { - DCHECK(_type.is_string_type()) << _type; - StringVal str_val; - _value.string_val.to_string_val(&str_val); - return str_val; -} - -CollectionVal Literal::get_array_val(ExprContext* context, TupleRow*) { - DCHECK(_type.is_collection_type()); - CollectionVal val; - _value.array_val.to_collection_val(&val); - return val; -} - -Status Literal::prepare(RuntimeState* state, const RowDescriptor& row_desc, ExprContext* context) { - RETURN_IF_ERROR(Expr::prepare(state, row_desc, context)); - - if (type().type == TYPE_ARRAY) { - DCHECK_EQ(type().children.size(), 1) << "array children type not 1"; - // init array value - auto child_type = type().children.at(0).type; - RETURN_IF_ERROR(CollectionValue::init_collection(state->obj_pool(), get_num_children(), - child_type, &_value.array_val)); - auto iterator = _value.array_val.iterator(child_type); - // init every item - for (int i = 0; i < get_num_children() && iterator.has_next(); ++i, iterator.next()) { - Expr* child = get_child(i); - iterator.set(child->get_const_val(context)); - } - } - - return Status::OK(); -} -} // namespace doris diff --git a/be/src/exprs/literal.h b/be/src/exprs/literal.h deleted file mode 100644 index 06ddb0b398..0000000000 --- a/be/src/exprs/literal.h +++ /dev/null @@ -1,63 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. -// This file is copied from -// https://github.com/apache/impala/blob/branch-2.9.0/be/src/exprs/literal.h -// and modified by Doris - -#pragma once - -#include "common/object_pool.h" -#include "exprs/expr.h" -#include "exprs/expr_value.h" - -namespace doris { - -class TExprNode; - -class Literal final : public Expr { -public: - Literal(const TExprNode& node); - ~Literal() override; - - Expr* clone(ObjectPool* pool) const override { return pool->add(new Literal(*this)); } - - BooleanVal get_boolean_val(ExprContext* context, TupleRow*) override; - TinyIntVal get_tiny_int_val(ExprContext* context, TupleRow*) override; - SmallIntVal get_small_int_val(ExprContext* context, TupleRow*) override; - IntVal get_int_val(ExprContext* context, TupleRow*) override; - BigIntVal get_big_int_val(ExprContext* context, TupleRow*) override; - LargeIntVal get_large_int_val(ExprContext* context, TupleRow*) override; - FloatVal get_float_val(ExprContext* context, TupleRow*) override; - DoubleVal get_double_val(ExprContext* context, TupleRow*) override; - DecimalV2Val get_decimalv2_val(ExprContext* context, TupleRow*) override; - DateTimeVal get_datetime_val(ExprContext* context, TupleRow*) override; - DateV2Val get_datev2_val(ExprContext* context, TupleRow*) override; - DateTimeV2Val get_datetimev2_val(ExprContext* context, TupleRow*) override; - StringVal get_string_val(ExprContext* context, TupleRow* row) override; - CollectionVal get_array_val(ExprContext* context, TupleRow*) override; - Decimal32Val get_decimal32_val(ExprContext* context, TupleRow*) override; - Decimal64Val get_decimal64_val(ExprContext* context, TupleRow*) override; - Decimal128Val get_decimal128_val(ExprContext* context, TupleRow*) override; - // init val before use - Status prepare(RuntimeState* state, const RowDescriptor& row_desc, - ExprContext* context) override; - -private: - ExprValue _value; -}; - -} // namespace doris diff --git a/be/src/exprs/match_predicate.h b/be/src/exprs/match_predicate.h index 9acaea477c..8afe57481c 100644 --- a/be/src/exprs/match_predicate.h +++ b/be/src/exprs/match_predicate.h @@ -21,7 +21,6 @@ #include #include -#include "exprs/predicate.h" #include "gen_cpp/Exprs_types.h" #include "olap/column_predicate.h" #include "runtime/string_search.hpp" @@ -30,25 +29,6 @@ namespace doris { enum class MatchType; -class MatchPredicateExpr : public Predicate { -public: - MatchPredicateExpr(const TExprNode& node) : Predicate(node) {} - virtual ~MatchPredicateExpr() {} - Expr* clone(ObjectPool* pool) const override { - return pool->add(new MatchPredicateExpr(*this)); - } - - static bool is_valid(std::string fn_name) { - return fn_name == "match_any" || fn_name == "match_all" || fn_name == "match_phrase" || - fn_name == "match_element_eq" || fn_name == "match_element_lt" || - fn_name == "match_element_gt" || fn_name == "match_element_le" || - fn_name == "match_element_ge"; - } - -protected: - friend class Expr; -}; - class MatchPredicate : public ColumnPredicate { public: static void init() {} diff --git a/be/src/exprs/math_functions.h b/be/src/exprs/math_functions.h index f15f253732..cac13ef6c0 100644 --- a/be/src/exprs/math_functions.h +++ b/be/src/exprs/math_functions.h @@ -26,10 +26,6 @@ namespace doris { -class Expr; -struct ExprValue; -class TupleRow; - class MathFunctions { public: static void init(); diff --git a/be/src/exprs/new_agg_fn_evaluator.cc b/be/src/exprs/new_agg_fn_evaluator.cc deleted file mode 100644 index f795522009..0000000000 --- a/be/src/exprs/new_agg_fn_evaluator.cc +++ /dev/null @@ -1,643 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. -// This file is copied from -// https://github.com/apache/impala/blob/branch-2.10.0/be/src/exprs/agg-fn-evaluator.cc -// and modified by Doris - -#include "exprs/new_agg_fn_evaluator.h" - -#include - -#include - -#include "exprs/agg_fn.h" -#include "exprs/anyval_util.h" -#include "exprs/expr.h" -#include "exprs/expr_context.h" -#include "runtime/raw_value.h" -#include "runtime/runtime_state.h" -#include "udf/udf_internal.h" -#include "vec/common/string_ref.h" - -using namespace doris; -using namespace doris_udf; -using std::move; - -// typedef for builtin aggregate functions. Unfortunately, these type defs don't -// really work since the actual builtin is implemented not in terms of the base -// AnyVal* type. Due to this, there are lots of casts when we use these typedefs. -// TODO: these typedefs exists as wrappers to go from (TupleRow, Tuple) to the -// types the aggregation functions need. This needs to be done with codegen instead. -typedef void (*InitFn)(FunctionContext*, AnyVal*); -typedef void (*UpdateFn0)(FunctionContext*, AnyVal*); -typedef void (*UpdateFn1)(FunctionContext*, const AnyVal&, AnyVal*); -typedef void (*UpdateFn2)(FunctionContext*, const AnyVal&, const AnyVal&, AnyVal*); -typedef void (*UpdateFn3)(FunctionContext*, const AnyVal&, const AnyVal&, const AnyVal&, AnyVal*); -typedef void (*UpdateFn4)(FunctionContext*, const AnyVal&, const AnyVal&, const AnyVal&, - const AnyVal&, AnyVal*); -typedef void (*UpdateFn5)(FunctionContext*, const AnyVal&, const AnyVal&, const AnyVal&, - const AnyVal&, const AnyVal&, AnyVal*); -typedef void (*UpdateFn6)(FunctionContext*, const AnyVal&, const AnyVal&, const AnyVal&, - const AnyVal&, const AnyVal&, const AnyVal&, AnyVal*); -typedef void (*UpdateFn7)(FunctionContext*, const AnyVal&, const AnyVal&, const AnyVal&, - const AnyVal&, const AnyVal&, const AnyVal&, const AnyVal&, AnyVal*); -typedef void (*UpdateFn8)(FunctionContext*, const AnyVal&, const AnyVal&, const AnyVal&, - const AnyVal&, const AnyVal&, const AnyVal&, const AnyVal&, const AnyVal&, - AnyVal*); - -typedef void (*VarargUpdateFn0)(FunctionContext*, int num_varargs, const AnyVal*, AnyVal*); -typedef void (*VarargUpdateFn1)(FunctionContext*, const AnyVal&, int num_varargs, const AnyVal*, - AnyVal*); -typedef void (*VarargUpdateFn2)(FunctionContext*, const AnyVal&, const AnyVal&, int num_varargs, - const AnyVal*, AnyVal*); -typedef void (*VarargUpdateFn3)(FunctionContext*, const AnyVal&, const AnyVal&, const AnyVal&, - int num_varargs, const AnyVal*, AnyVal*); -typedef void (*VarargUpdateFn4)(FunctionContext*, const AnyVal&, const AnyVal&, const AnyVal&, - const AnyVal&, int num_varargs, const AnyVal*, AnyVal*); -typedef void (*VarargUpdateFn5)(FunctionContext*, const AnyVal&, const AnyVal&, const AnyVal&, - const AnyVal&, const AnyVal&, int num_varargs, const AnyVal*, - AnyVal*); -typedef void (*VarargUpdateFn6)(FunctionContext*, const AnyVal&, const AnyVal&, const AnyVal&, - const AnyVal&, const AnyVal&, const AnyVal&, int num_varargs, - const AnyVal*, AnyVal*); -typedef void (*VarargUpdateFn7)(FunctionContext*, const AnyVal&, const AnyVal&, const AnyVal&, - const AnyVal&, const AnyVal&, const AnyVal&, const AnyVal&, - int num_varargs, const AnyVal*, AnyVal*); -typedef void (*VarargUpdateFn8)(FunctionContext*, const AnyVal&, const AnyVal&, const AnyVal&, - const AnyVal&, const AnyVal&, const AnyVal&, const AnyVal&, - const AnyVal&, int num_varargs, const AnyVal*, AnyVal*); - -typedef StringVal (*SerializeFn)(FunctionContext*, const StringVal&); -typedef AnyVal (*GetValueFn)(FunctionContext*, const AnyVal&); -typedef AnyVal (*FinalizeFn)(FunctionContext*, const AnyVal&); - -NewAggFnEvaluator::NewAggFnEvaluator(const AggFn& agg_fn, MemPool* mem_pool, bool is_clone) - : _accumulated_mem_consumption(0), - is_clone_(is_clone), - agg_fn_(agg_fn), - mem_pool_(mem_pool) {} - -NewAggFnEvaluator::~NewAggFnEvaluator() { - DCHECK(closed_); -} - -const SlotDescriptor& NewAggFnEvaluator::intermediate_slot_desc() const { - return agg_fn_.intermediate_slot_desc(); -} - -const TypeDescriptor& NewAggFnEvaluator::intermediate_type() const { - return agg_fn_.intermediate_type(); -} - -Status NewAggFnEvaluator::Create(const AggFn& agg_fn, RuntimeState* state, ObjectPool* pool, - MemPool* mem_pool, NewAggFnEvaluator** result, - const RowDescriptor& row_desc) { - *result = nullptr; - - // Create a new AggFn evaluator. - NewAggFnEvaluator* agg_fn_eval = pool->add(new NewAggFnEvaluator(agg_fn, mem_pool, false)); - - agg_fn_eval->agg_fn_ctx_.reset(FunctionContextImpl::create_context( - state, mem_pool, agg_fn.get_intermediate_type_desc(), agg_fn.get_output_type_desc(), - agg_fn.arg_type_descs(), 0, false)); - - Status status; - // Create the evaluators for the input expressions. - for (Expr* input_expr : agg_fn.children()) { - // TODO chenhao replace ExprContext with ScalarFnEvaluator - ExprContext* input_eval = pool->add(new ExprContext(input_expr)); - if (input_eval == nullptr) goto cleanup; - input_eval->prepare(state, row_desc); - agg_fn_eval->input_evals_.push_back(input_eval); - Expr* root = input_eval->root(); - DCHECK(root == input_expr); - AnyVal* staging_input_val; - status = allocate_any_val(state, mem_pool, input_expr->type(), - "Could not allocate aggregate expression input value", - &staging_input_val); - agg_fn_eval->staging_input_vals_.push_back(staging_input_val); - if (UNLIKELY(!status.ok())) goto cleanup; - } - DCHECK_EQ(agg_fn.get_num_children(), agg_fn_eval->input_evals_.size()); - DCHECK_EQ(agg_fn_eval->staging_input_vals_.size(), agg_fn_eval->input_evals_.size()); - - status = allocate_any_val(state, mem_pool, agg_fn.intermediate_type(), - "Could not allocate aggregate expression intermediate value", - &(agg_fn_eval->staging_intermediate_val_)); - if (UNLIKELY(!status.ok())) goto cleanup; - status = allocate_any_val(state, mem_pool, agg_fn.intermediate_type(), - "Could not allocate aggregate expression merge input value", - &(agg_fn_eval->staging_merge_input_val_)); - if (UNLIKELY(!status.ok())) goto cleanup; - - if (agg_fn.is_merge()) { - DCHECK_EQ(agg_fn_eval->staging_input_vals_.size(), 1) << "Merge should only have 1 input."; - } - - *result = agg_fn_eval; - return Status::OK(); - -cleanup: - DCHECK(!status.ok()); - agg_fn_eval->Close(state); - return status; -} - -Status NewAggFnEvaluator::Create(const std::vector& agg_fns, RuntimeState* state, - ObjectPool* pool, MemPool* mem_pool, - std::vector* evals, - const RowDescriptor& row_desc) { - for (const AggFn* agg_fn : agg_fns) { - NewAggFnEvaluator* agg_fn_eval; - RETURN_IF_ERROR( - NewAggFnEvaluator::Create(*agg_fn, state, pool, mem_pool, &agg_fn_eval, row_desc)); - evals->push_back(agg_fn_eval); - } - return Status::OK(); -} - -Status NewAggFnEvaluator::Open(RuntimeState* state) { - if (opened_) return Status::OK(); - opened_ = true; - // TODO chenhao, ScalarFnEvaluator different from ExprContext - RETURN_IF_ERROR(ExprContext::open(input_evals_, state)); - // Now that we have opened all our input exprs, it is safe to evaluate any constant - // values for the UDA's FunctionContext (we cannot evaluate exprs before calling Open() - // on them). - std::vector constant_args(input_evals_.size(), nullptr); - for (int i = 0; i < input_evals_.size(); ++i) { - ExprContext* eval = input_evals_[i]; - RETURN_IF_ERROR(eval->get_const_value(state, *(agg_fn_.get_child(i)), &constant_args[i])); - } - agg_fn_ctx_->impl()->set_constant_args(std::move(constant_args)); - return Status::OK(); -} - -Status NewAggFnEvaluator::Open(const std::vector& evals, RuntimeState* state) { - for (NewAggFnEvaluator* eval : evals) RETURN_IF_ERROR(eval->Open(state)); - return Status::OK(); -} - -void NewAggFnEvaluator::Close(RuntimeState* state) { - if (closed_) return; - closed_ = true; - if (!is_clone_) Expr::close(input_evals_, state); - // TODO chenhao - //FreeLocalAllocations(); - agg_fn_ctx_->impl()->close(); - agg_fn_ctx_.reset(); - - //TODO chenhao release ExprContext - //for (int i = 0; i < input_evals_.size(); i++) { - // ExprContext* context = input_evals_[i]; - // delete context; - //} - input_evals_.clear(); -} - -void NewAggFnEvaluator::Close(const std::vector& evals, RuntimeState* state) { - for (NewAggFnEvaluator* eval : evals) eval->Close(state); -} - -void NewAggFnEvaluator::SetDstSlot(const AnyVal* src, const SlotDescriptor& dst_slot_desc, - Tuple* dst) { - if (src->is_null && dst_slot_desc.is_nullable()) { - dst->set_null(dst_slot_desc.null_indicator_offset()); - return; - } - - dst->set_not_null(dst_slot_desc.null_indicator_offset()); - void* slot = dst->get_slot(dst_slot_desc.tuple_offset()); - switch (dst_slot_desc.type().type) { - case TYPE_NULL: - return; - case TYPE_BOOLEAN: - *reinterpret_cast(slot) = reinterpret_cast(src)->val; - return; - case TYPE_TINYINT: - *reinterpret_cast(slot) = reinterpret_cast(src)->val; - return; - case TYPE_SMALLINT: - *reinterpret_cast(slot) = reinterpret_cast(src)->val; - return; - case TYPE_INT: - *reinterpret_cast(slot) = reinterpret_cast(src)->val; - return; - case TYPE_BIGINT: - *reinterpret_cast(slot) = reinterpret_cast(src)->val; - return; - case TYPE_LARGEINT: - memcpy(slot, &reinterpret_cast(src)->val, sizeof(__int128)); - return; - case TYPE_FLOAT: - *reinterpret_cast(slot) = reinterpret_cast(src)->val; - return; - case TYPE_DOUBLE: - *reinterpret_cast(slot) = reinterpret_cast(src)->val; - return; - case TYPE_CHAR: - case TYPE_VARCHAR: - case TYPE_HLL: - case TYPE_OBJECT: - case TYPE_QUANTILE_STATE: - case TYPE_STRING: - *reinterpret_cast(slot) = *reinterpret_cast(src); - return; - - case TYPE_DATE: - case TYPE_DATETIME: - *reinterpret_cast(slot) = - DateTimeValue::from_datetime_val(*reinterpret_cast(src)); - return; - - case TYPE_DECIMALV2: - *reinterpret_cast(slot) = reinterpret_cast(src)->val; - return; - - default: - DCHECK(false) << "NYI: " << dst_slot_desc.type(); - } -} - -// This function would be replaced in codegen. -void NewAggFnEvaluator::Init(Tuple* dst) { - DCHECK(opened_); - DCHECK(agg_fn_._init_fn != nullptr); - for (ExprContext* input_eval : input_evals_) { - DCHECK(input_eval->opened()); - } - - const TypeDescriptor& type = intermediate_type(); - const SlotDescriptor& slot_desc = intermediate_slot_desc(); - if (type.type == TYPE_CHAR) { - // The intermediate value is represented as a fixed-length buffer inline in the tuple. - // The aggregate function writes to this buffer directly. staging_intermediate_val_ - // is a StringVal with a pointer to the slot and the length of the slot. - void* slot = dst->get_slot(slot_desc.tuple_offset()); - StringVal* sv = reinterpret_cast(staging_intermediate_val_); - sv->is_null = dst->is_null(slot_desc.null_indicator_offset()); - sv->ptr = reinterpret_cast(slot); - sv->len = type.len; - } - reinterpret_cast(agg_fn_._init_fn)(agg_fn_ctx_.get(), staging_intermediate_val_); - SetDstSlot(staging_intermediate_val_, slot_desc, dst); - agg_fn_ctx_->impl()->set_num_updates(0); - agg_fn_ctx_->impl()->set_num_removes(0); -} - -static void SetAnyVal(const SlotDescriptor& desc, Tuple* tuple, AnyVal* dst) { - bool is_null = tuple->is_null(desc.null_indicator_offset()); - void* slot = nullptr; - if (!is_null) slot = tuple->get_slot(desc.tuple_offset()); - AnyValUtil::set_any_val(slot, desc.type(), dst); -} - -// Utility to put val into an AnyVal struct -inline void NewAggFnEvaluator::set_any_val(const void* slot, const TypeDescriptor& type, - AnyVal* dst) { - if (slot == nullptr) { - dst->is_null = true; - return; - } - - dst->is_null = false; - - switch (type.type) { - case TYPE_NULL: - return; - - case TYPE_BOOLEAN: - reinterpret_cast(dst)->val = *reinterpret_cast(slot); - return; - - case TYPE_TINYINT: - reinterpret_cast(dst)->val = *reinterpret_cast(slot); - return; - - case TYPE_SMALLINT: - reinterpret_cast(dst)->val = *reinterpret_cast(slot); - return; - - case TYPE_INT: - reinterpret_cast(dst)->val = *reinterpret_cast(slot); - return; - - case TYPE_BIGINT: - reinterpret_cast(dst)->val = *reinterpret_cast(slot); - return; - - case TYPE_FLOAT: - reinterpret_cast(dst)->val = *reinterpret_cast(slot); - return; - - case TYPE_DOUBLE: - reinterpret_cast(dst)->val = *reinterpret_cast(slot); - return; - - case TYPE_CHAR: - case TYPE_VARCHAR: - case TYPE_HLL: - case TYPE_OBJECT: - case TYPE_QUANTILE_STATE: - case TYPE_STRING: - reinterpret_cast(slot)->to_string_val(reinterpret_cast(dst)); - return; - - case TYPE_DATE: - case TYPE_DATETIME: - reinterpret_cast(slot)->to_datetime_val( - reinterpret_cast(dst)); - return; - - case TYPE_DECIMALV2: - reinterpret_cast(dst)->val = - reinterpret_cast(slot)->value; - return; - - case TYPE_LARGEINT: - memcpy(&reinterpret_cast(dst)->val, slot, sizeof(__int128)); - return; - - default: - DCHECK(false) << "NYI"; - } -} - -void NewAggFnEvaluator::Update(const TupleRow* row, Tuple* dst, void* fn) { - if (fn == nullptr) return; - - const SlotDescriptor& slot_desc = intermediate_slot_desc(); - SetAnyVal(slot_desc, dst, staging_intermediate_val_); - for (int i = 0; i < input_evals_.size(); ++i) { - void* src_slot = input_evals_[i]->get_value(const_cast(row)); - DCHECK(input_evals_[i]->root() == agg_fn_.get_child(i)); - AnyValUtil::set_any_val(src_slot, agg_fn_.get_child(i)->type(), staging_input_vals_[i]); - } - if (agg_fn_.is_merge()) { - reinterpret_cast(fn)(agg_fn_ctx_.get(), *staging_input_vals_[0], - staging_intermediate_val_); - SetDstSlot(staging_intermediate_val_, slot_desc, dst); - return; - } - - // TODO: this part is not so good and not scalable. It can be replaced with - // codegen but we can also consider leaving it for the first few cases for - // debugging. - if (agg_fn_.get_vararg_start_idx() == -1) { - switch (input_evals_.size()) { - case 0: - reinterpret_cast(fn)(agg_fn_ctx_.get(), staging_intermediate_val_); - break; - case 1: - reinterpret_cast(fn)(agg_fn_ctx_.get(), *staging_input_vals_[0], - staging_intermediate_val_); - break; - case 2: - reinterpret_cast(fn)(agg_fn_ctx_.get(), *staging_input_vals_[0], - *staging_input_vals_[1], staging_intermediate_val_); - break; - case 3: - reinterpret_cast(fn)(agg_fn_ctx_.get(), *staging_input_vals_[0], - *staging_input_vals_[1], *staging_input_vals_[2], - staging_intermediate_val_); - break; - case 4: - reinterpret_cast(fn)(agg_fn_ctx_.get(), *staging_input_vals_[0], - *staging_input_vals_[1], *staging_input_vals_[2], - *staging_input_vals_[3], staging_intermediate_val_); - break; - case 5: - reinterpret_cast(fn)(agg_fn_ctx_.get(), *staging_input_vals_[0], - *staging_input_vals_[1], *staging_input_vals_[2], - *staging_input_vals_[3], *staging_input_vals_[4], - staging_intermediate_val_); - break; - case 6: - reinterpret_cast(fn)(agg_fn_ctx_.get(), *staging_input_vals_[0], - *staging_input_vals_[1], *staging_input_vals_[2], - *staging_input_vals_[3], *staging_input_vals_[4], - *staging_input_vals_[5], staging_intermediate_val_); - break; - case 7: - reinterpret_cast(fn)( - agg_fn_ctx_.get(), *staging_input_vals_[0], *staging_input_vals_[1], - *staging_input_vals_[2], *staging_input_vals_[3], *staging_input_vals_[4], - *staging_input_vals_[5], *staging_input_vals_[6], staging_intermediate_val_); - break; - case 8: - reinterpret_cast(fn)(agg_fn_ctx_.get(), *staging_input_vals_[0], - *staging_input_vals_[1], *staging_input_vals_[2], - *staging_input_vals_[3], *staging_input_vals_[4], - *staging_input_vals_[5], *staging_input_vals_[6], - *staging_input_vals_[7], staging_intermediate_val_); - break; - default: - DCHECK(false) << "NYI"; - } - } else { - int num_varargs = input_evals_.size() - agg_fn_.get_vararg_start_idx(); - const AnyVal* varargs = *(staging_input_vals_.data() + agg_fn_.get_vararg_start_idx()); - switch (agg_fn_.get_vararg_start_idx()) { - case 0: - reinterpret_cast(fn)(agg_fn_ctx_.get(), num_varargs, varargs, - staging_intermediate_val_); - break; - case 1: - reinterpret_cast(fn)(agg_fn_ctx_.get(), *staging_input_vals_[0], - num_varargs, varargs, staging_intermediate_val_); - break; - case 2: - reinterpret_cast(fn)(agg_fn_ctx_.get(), *staging_input_vals_[0], - *staging_input_vals_[1], num_varargs, varargs, - staging_intermediate_val_); - break; - case 3: - reinterpret_cast(fn)(agg_fn_ctx_.get(), *staging_input_vals_[0], - *staging_input_vals_[1], *staging_input_vals_[2], - num_varargs, varargs, staging_intermediate_val_); - break; - case 4: - reinterpret_cast(fn)(agg_fn_ctx_.get(), *staging_input_vals_[0], - *staging_input_vals_[1], *staging_input_vals_[2], - *staging_input_vals_[3], num_varargs, varargs, - staging_intermediate_val_); - break; - case 5: - reinterpret_cast(fn)(agg_fn_ctx_.get(), *staging_input_vals_[0], - *staging_input_vals_[1], *staging_input_vals_[2], - *staging_input_vals_[3], *staging_input_vals_[4], - num_varargs, varargs, staging_intermediate_val_); - break; - case 6: - reinterpret_cast(fn)( - agg_fn_ctx_.get(), *staging_input_vals_[0], *staging_input_vals_[1], - *staging_input_vals_[2], *staging_input_vals_[3], *staging_input_vals_[4], - *staging_input_vals_[5], num_varargs, varargs, staging_intermediate_val_); - break; - case 7: - reinterpret_cast(fn)(agg_fn_ctx_.get(), *staging_input_vals_[0], - *staging_input_vals_[1], *staging_input_vals_[2], - *staging_input_vals_[3], *staging_input_vals_[4], - *staging_input_vals_[5], *staging_input_vals_[6], - num_varargs, varargs, staging_intermediate_val_); - break; - case 8: - reinterpret_cast(fn)( - agg_fn_ctx_.get(), *staging_input_vals_[0], *staging_input_vals_[1], - *staging_input_vals_[2], *staging_input_vals_[3], *staging_input_vals_[4], - *staging_input_vals_[5], *staging_input_vals_[6], *staging_input_vals_[7], - num_varargs, varargs, staging_intermediate_val_); - break; - default: - DCHECK(false) << "NYI"; - } - } - SetDstSlot(staging_intermediate_val_, slot_desc, dst); -} - -void NewAggFnEvaluator::Merge(Tuple* src, Tuple* dst) { - DCHECK(agg_fn_._merge_fn != nullptr); - const SlotDescriptor& slot_desc = intermediate_slot_desc(); - SetAnyVal(slot_desc, dst, staging_intermediate_val_); - SetAnyVal(slot_desc, src, staging_merge_input_val_); - // The merge fn always takes one input argument. - reinterpret_cast(agg_fn_._merge_fn)(agg_fn_ctx_.get(), *staging_merge_input_val_, - staging_intermediate_val_); - SetDstSlot(staging_intermediate_val_, slot_desc, dst); -} - -void NewAggFnEvaluator::SerializeOrFinalize(Tuple* src, const SlotDescriptor& dst_slot_desc, - Tuple* dst, void* fn, bool add_null) { - // No fn was given and the src and dst are identical. Nothing to be done. - if (fn == nullptr && src == dst) return; - // src != dst means we are performing a Finalize(), so even if fn == null we - // still must copy the value of the src slot into dst. - - const SlotDescriptor& slot_desc = intermediate_slot_desc(); - bool src_slot_null = add_null || src->is_null(slot_desc.null_indicator_offset()); - void* src_slot = nullptr; - if (!src_slot_null) src_slot = src->get_slot(slot_desc.tuple_offset()); - - // No fn was given but the src and dst tuples are different (doing a Finalize()). - // Just copy the src slot into the dst tuple. - if (fn == nullptr) { - DCHECK_EQ(intermediate_type(), dst_slot_desc.type()); - RawValue::write(src_slot, dst, &dst_slot_desc, nullptr); - return; - } - - AnyValUtil::set_any_val(src_slot, intermediate_type(), staging_intermediate_val_); - switch (dst_slot_desc.type().type) { - case TYPE_BOOLEAN: { - typedef BooleanVal (*Fn)(FunctionContext*, AnyVal*); - BooleanVal v = reinterpret_cast(fn)(agg_fn_ctx_.get(), staging_intermediate_val_); - SetDstSlot(&v, dst_slot_desc, dst); - break; - } - case TYPE_TINYINT: { - typedef TinyIntVal (*Fn)(FunctionContext*, AnyVal*); - TinyIntVal v = reinterpret_cast(fn)(agg_fn_ctx_.get(), staging_intermediate_val_); - SetDstSlot(&v, dst_slot_desc, dst); - break; - } - case TYPE_SMALLINT: { - typedef SmallIntVal (*Fn)(FunctionContext*, AnyVal*); - SmallIntVal v = reinterpret_cast(fn)(agg_fn_ctx_.get(), staging_intermediate_val_); - SetDstSlot(&v, dst_slot_desc, dst); - break; - } - case TYPE_INT: { - typedef IntVal (*Fn)(FunctionContext*, AnyVal*); - IntVal v = reinterpret_cast(fn)(agg_fn_ctx_.get(), staging_intermediate_val_); - SetDstSlot(&v, dst_slot_desc, dst); - break; - } - case TYPE_BIGINT: { - typedef BigIntVal (*Fn)(FunctionContext*, AnyVal*); - BigIntVal v = reinterpret_cast(fn)(agg_fn_ctx_.get(), staging_intermediate_val_); - SetDstSlot(&v, dst_slot_desc, dst); - break; - } - case TYPE_LARGEINT: { - typedef LargeIntVal (*Fn)(FunctionContext*, AnyVal*); - LargeIntVal v = reinterpret_cast(fn)(agg_fn_ctx_.get(), staging_intermediate_val_); - SetDstSlot(&v, dst_slot_desc, dst); - break; - } - case TYPE_FLOAT: { - typedef FloatVal (*Fn)(FunctionContext*, AnyVal*); - FloatVal v = reinterpret_cast(fn)(agg_fn_ctx_.get(), staging_intermediate_val_); - SetDstSlot(&v, dst_slot_desc, dst); - break; - } - case TYPE_DOUBLE: { - typedef DoubleVal (*Fn)(FunctionContext*, AnyVal*); - DoubleVal v = reinterpret_cast(fn)(agg_fn_ctx_.get(), staging_intermediate_val_); - SetDstSlot(&v, dst_slot_desc, dst); - break; - } - case TYPE_CHAR: - case TYPE_VARCHAR: - case TYPE_HLL: - case TYPE_OBJECT: - case TYPE_QUANTILE_STATE: - case TYPE_STRING: { - typedef StringVal (*Fn)(FunctionContext*, AnyVal*); - StringVal v = reinterpret_cast(fn)(agg_fn_ctx_.get(), staging_intermediate_val_); - SetDstSlot(&v, dst_slot_desc, dst); - break; - } - case TYPE_DECIMALV2: { - typedef DecimalV2Val (*Fn)(FunctionContext*, AnyVal*); - DecimalV2Val v = reinterpret_cast(fn)(agg_fn_ctx_.get(), staging_intermediate_val_); - SetDstSlot(&v, dst_slot_desc, dst); - break; - } - case TYPE_DATE: - case TYPE_DATETIME: { - typedef DateTimeVal (*Fn)(FunctionContext*, AnyVal*); - DateTimeVal v = reinterpret_cast(fn)(agg_fn_ctx_.get(), staging_intermediate_val_); - SetDstSlot(&v, dst_slot_desc, dst); - break; - } - default: - DCHECK(false) << "NYI"; - } -} - -void NewAggFnEvaluator::ShallowClone(ObjectPool* pool, MemPool* mem_pool, - NewAggFnEvaluator** cloned_eval) const { - DCHECK(opened_); - *cloned_eval = pool->add(new NewAggFnEvaluator(agg_fn_, mem_pool, true)); - (*cloned_eval)->agg_fn_ctx_.reset(agg_fn_ctx_->impl()->clone(mem_pool)); - DCHECK_EQ((*cloned_eval)->input_evals_.size(), 0); - (*cloned_eval)->input_evals_ = input_evals_; - (*cloned_eval)->staging_input_vals_ = staging_input_vals_; - (*cloned_eval)->staging_intermediate_val_ = staging_intermediate_val_; - (*cloned_eval)->staging_merge_input_val_ = staging_merge_input_val_; - (*cloned_eval)->opened_ = true; -} - -void NewAggFnEvaluator::ShallowClone(ObjectPool* pool, MemPool* mem_pool, - const std::vector& evals, - std::vector* cloned_evals) { - for (const NewAggFnEvaluator* eval : evals) { - NewAggFnEvaluator* cloned_eval; - eval->ShallowClone(pool, mem_pool, &cloned_eval); - cloned_evals->push_back(cloned_eval); - } -} diff --git a/be/src/exprs/new_agg_fn_evaluator.h b/be/src/exprs/new_agg_fn_evaluator.h deleted file mode 100644 index 1a7e7f17e4..0000000000 --- a/be/src/exprs/new_agg_fn_evaluator.h +++ /dev/null @@ -1,311 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. -// This file is copied from -// https://github.com/apache/impala/blob/branch-2.10.0/be/src/exprs/agg-fn-evaluator.h -// and modified by Doris - -#pragma once - -#include - -#include "common/status.h" -#include "exprs/agg_fn.h" -#include "runtime/descriptors.h" -#include "runtime/tuple_row.h" -#include "runtime/types.h" -#include "udf/udf.h" -#include "udf/udf_internal.h" - -namespace doris { - -class MemPool; -class ObjectPool; -class RowDescriptor; -class RuntimeState; -class SlotDescriptor; -class Tuple; -class TupleRow; -class TExprNode; -class ExprContext; - -/// NewAggFnEvaluator is the interface for evaluating aggregate functions during execution. -/// -/// NewAggFnEvaluator contains runtime state and implements wrapper functions which convert -/// the input TupleRow into AnyVal format expected by UDAF functions defined in AggFn. -/// It also evaluates TupleRow against input expressions, stores the results in staging -/// input values which are passed to Update() function to update the intermediate value -/// and handles the merging of intermediate values in the merge phases of execution. -/// -/// This class is not threadsafe. An evaluator can be cloned to isolate resource -/// consumption per partition in an aggregation node. -/// -class NewAggFnEvaluator { -public: - /// Creates an NewAggFnEvaluator object from the aggregate expression 'agg_fn'. - /// The evaluator is added to 'pool' and returned in 'eval'. This will also - /// create a single evaluator for each input expression. All allocations will come - /// from 'mem_pool'. Note that it's the responsibility to call Close() all evaluators - /// even if this function returns error status on initialization failure. - static Status Create(const AggFn& agg_fn, RuntimeState* state, ObjectPool* pool, - MemPool* mem_pool, NewAggFnEvaluator** eval, - const RowDescriptor& row_desc) WARN_UNUSED_RESULT; - - /// Convenience functions for creating evaluators for multiple aggregate functions. - static Status Create(const std::vector& agg_fns, RuntimeState* state, ObjectPool* pool, - MemPool* mem_pool, std::vector* evals, - const RowDescriptor& row_desc) WARN_UNUSED_RESULT; - - ~NewAggFnEvaluator(); - - /// Initializes the evaluator by calling Open() on all the input expressions' evaluators - /// and caches all constant input arguments. - /// TODO: Move the evaluation of constant input arguments to AggFn setup. - Status Open(RuntimeState* state) WARN_UNUSED_RESULT; - - /// Convenience functions for opening multiple NewAggFnEvaluators. - static Status Open(const std::vector& evals, - RuntimeState* state) WARN_UNUSED_RESULT; - - /// Used by PartitionedAggregation node to initialize one evaluator per partition. - /// Avoid the overhead of re-initializing an evaluator (e.g. calling GetConstVal() - /// on the input expressions). Cannot be called until after Open() has been called. - /// 'cloned_eval' is a shallow copy of this evaluator: all input values, staging - /// intermediate values and merge values are shared with the original evaluator. Only - /// the FunctionContext 'agg_fn_ctx' is cloned for resource isolation per partition. - /// So, it's not safe to use cloned evaluators concurrently. - void ShallowClone(ObjectPool* pool, MemPool* mem_pool, NewAggFnEvaluator** cloned_eval) const; - - /// Convenience function for cloning multiple evaluators. The newly cloned evaluators - /// are appended to 'cloned_evals'. - static void ShallowClone(ObjectPool* pool, MemPool* mem_pool, - const std::vector& evals, - std::vector* cloned_evals); - - /// Free resources owned by the evaluator. - void Close(RuntimeState* state); - static void Close(const std::vector& evals, RuntimeState* state); - - const AggFn& agg_fn() const { return agg_fn_; } - - FunctionContext* agg_fn_ctx() const; - - ExprContext* const* input_evals() const; - - /// Call the initialization function of the AggFn. May update 'dst'. - void Init(Tuple* dst); - - /// Updates the intermediate state dst based on adding the input src row. This can be - /// called either to drive the UDA's Update() or Merge() function, depending on whether - /// the AggFn is a merging aggregation. - void Add(const TupleRow* src, Tuple* dst); - - /// Updates the intermediate state dst to remove the input src row, i.e. undo - /// Add(src, dst). Only used internally for analytic fn builtins. - void Remove(const TupleRow* src, Tuple* dst); - - /// Explicitly does a merge, even if this evaluator is not marked as merging. - /// This is used by the partitioned agg node when it needs to merge spill results. - /// In the non-spilling case, this node would normally not merge. - void Merge(Tuple* src, Tuple* dst); - - /// Flattens any intermediate values containing pointers, and frees any memory - /// allocated during the init, update and merge phases. - void Serialize(Tuple* dst); - - /// Does one final transformation of the aggregated value in 'agg_val' and stores the - /// result in 'output_val'. Also frees the resources allocated during init, update and - /// merge phases. - void Finalize(Tuple* agg_val, Tuple* output_val, bool add_null = false); - - /// Puts the finalized value from Tuple* src in Tuple* dst just as Finalize() does. - /// However, unlike Finalize(), GetValue() does not clean up state in src. - /// GetValue() can be called repeatedly with the same src. Only used internally for - /// analytic fn builtins. Note that StringVal result is from local allocation (which - /// will be freed in the next QueryMaintenance()) so it needs to be copied out if it - /// needs to survive beyond QueryMaintenance() (e.g. if 'dst' lives in a row batch). - void GetValue(Tuple* src, Tuple* dst); - - // TODO: implement codegen path. These functions would return IR functions with - // the same signature as the interpreted ones above. - // Function* GetIrInitFn(); - // Function* GetIrUpdateFn(); - // Function* GetIrMergeFn(); - // Function* GetIrSerializeFn(); - // Function* GetIrFinalizeFn(); - static const size_t TINYINT_SIZE = sizeof(int8_t); - static const size_t SMALLINT_SIZE = sizeof(int16_t); - static const size_t INT_SIZE = sizeof(int32_t); - static const size_t BIGINT_SIZE = sizeof(int64_t); - static const size_t FLOAT_SIZE = sizeof(float); - static const size_t DOUBLE_SIZE = sizeof(double); - static const size_t DECIMALV2_SIZE = sizeof(DecimalV2Value); - static const size_t LARGEINT_SIZE = sizeof(__int128); - - // DATETIME VAL has two part: packet_time is 8 byte, and type is 4 byte - // MySQL packet time : int64_t packed_time; - // Indicate which type of this value : int type; - static const size_t DATETIME_SIZE = 16; - - bool is_multi_distinct() { return _is_multi_distinct; } - - const std::vector& input_expr_ctxs() const { return input_evals_; } - - /// Helper functions for calling the above functions on many evaluators. - static void Init(const std::vector& evals, Tuple* dst); - static void Add(const std::vector& evals, const TupleRow* src, Tuple* dst); - static void Remove(const std::vector& evals, const TupleRow* src, - Tuple* dst); - static void Serialize(const std::vector& evals, Tuple* dst); - static void GetValue(const std::vector& evals, Tuple* src, Tuple* dst); - static void Finalize(const std::vector& evals, Tuple* src, Tuple* dst, - bool add_null = false); - - /// Free local allocations made in UDA functions and input arguments' evals. - //void FreeLocalAllocations(); - //static void FreeLocalAllocations(const std::vector& evals); - - std::string DebugString() const; - static std::string DebugString(const std::vector& evals); - -private: - uint64_t _accumulated_mem_consumption; - - // index if has multi count distinct - bool _is_multi_distinct; - - /// True if the evaluator has been initialized. - bool opened_ = false; - - /// True if the evaluator has been closed. - bool closed_ = false; - - /// True if this evaluator is created from a ShallowClone() call. - const bool is_clone_; - - const AggFn& agg_fn_; - - /// Pointer to the MemPool which all allocations come from. - /// Owned by the exec node which owns this evaluator. - MemPool* mem_pool_ = nullptr; - - /// This contains runtime state such as constant input arguments to the aggregate - /// functions and a FreePool from which the intermediate values are allocated. - /// Owned by this evaluator. - std::unique_ptr agg_fn_ctx_; - - /// Evaluators for input expressions for this aggregate function. - /// Empty if there is no input expression (e.g. count(*)). - std::vector input_evals_; - - /// Staging input values used by the interpreted Update() / Merge() paths. - /// It stores the evaluation results of input expressions to be passed to the - /// Update() / Merge() function. - std::vector staging_input_vals_; - - /// Staging intermediate and merged values used in the interpreted - /// Update() / Merge() paths. - doris_udf::AnyVal* staging_intermediate_val_ = nullptr; - doris_udf::AnyVal* staging_merge_input_val_ = nullptr; - - /// Use Create() instead. - NewAggFnEvaluator(const AggFn& agg_fn, MemPool* mem_pool, bool is_clone); - - /// Return the intermediate type of the aggregate function. - const SlotDescriptor& intermediate_slot_desc() const; - const TypeDescriptor& intermediate_type() const; - - /// The interpreted path for the UDA's Update() function. It sets up the arguments to - /// call 'fn' is either the 'update_fn_' or 'merge_fn_' of agg_fn_, depending on whether - /// agg_fn_ is a merging aggregation. This converts from the agg-expr signature, taking - /// TupleRow to the UDA signature taking AnyVals by evaluating any input expressions - /// and populating the staging input values. - /// - /// Note that this function may be superseded by the codegend Update() IR function - /// generated by AggFn::CodegenUpdateOrMergeFunction() when codegen is enabled. - void Update(const TupleRow* row, Tuple* dst, void* fn); - - /// Writes the result in src into dst pointed to by dst_slot_desc - void SetDstSlot(const doris_udf::AnyVal* src, const SlotDescriptor& dst_slot_desc, Tuple* dst); - - /// Sets up the arguments to call 'fn'. This converts from the agg-expr signature, - /// taking TupleRow to the UDA signature taking AnyVals. Writes the serialize/finalize - /// result to the given destination slot/tuple. 'fn' can be nullptr to indicate the src - /// value should simply be written into the destination. Note that StringVal result is - /// from local allocation (which will be freed in the next QueryMaintenance()) so it - /// needs to be copied out if it needs to survive beyond QueryMaintenance() (e.g. if - /// 'dst' lives in a row batch). - void SerializeOrFinalize(Tuple* src, const SlotDescriptor& dst_slot_desc, Tuple* dst, void* fn, - bool add_null = false); - - // Sets 'dst' to the value from 'slot'. - void set_any_val(const void* slot, const TypeDescriptor& type, doris_udf::AnyVal* dst); -}; - -inline void NewAggFnEvaluator::Add(const TupleRow* row, Tuple* dst) { - agg_fn_ctx_->impl()->increment_num_updates(); - Update(row, dst, agg_fn_.merge_or_update_fn()); -} - -inline void NewAggFnEvaluator::Remove(const TupleRow* row, Tuple* dst) { - agg_fn_ctx_->impl()->increment_num_removes(); - Update(row, dst, agg_fn_.remove_fn()); -} - -inline void NewAggFnEvaluator::Serialize(Tuple* tuple) { - SerializeOrFinalize(tuple, agg_fn_.intermediate_slot_desc(), tuple, agg_fn_.serialize_fn()); -} - -inline void NewAggFnEvaluator::Finalize(Tuple* agg_val, Tuple* output_val, bool add_null) { - SerializeOrFinalize(agg_val, agg_fn_.output_slot_desc(), output_val, agg_fn_.finalize_fn(), - add_null); -} - -inline void NewAggFnEvaluator::GetValue(Tuple* src, Tuple* dst) { - SerializeOrFinalize(src, agg_fn_.output_slot_desc(), dst, agg_fn_.get_value_fn()); -} - -inline void NewAggFnEvaluator::Init(const std::vector& evals, Tuple* dst) { - for (int i = 0; i < evals.size(); ++i) evals[i]->Init(dst); -} - -inline void NewAggFnEvaluator::Add(const std::vector& evals, - const TupleRow* src, Tuple* dst) { - for (int i = 0; i < evals.size(); ++i) evals[i]->Add(src, dst); -} - -inline void NewAggFnEvaluator::Remove(const std::vector& evals, - const TupleRow* src, Tuple* dst) { - for (int i = 0; i < evals.size(); ++i) evals[i]->Remove(src, dst); -} - -inline void NewAggFnEvaluator::Serialize(const std::vector& evals, Tuple* dst) { - for (int i = 0; i < evals.size(); ++i) evals[i]->Serialize(dst); -} - -inline void NewAggFnEvaluator::GetValue(const std::vector& evals, Tuple* src, - Tuple* dst) { - for (int i = 0; i < evals.size(); ++i) evals[i]->GetValue(src, dst); -} - -inline void NewAggFnEvaluator::Finalize(const std::vector& evals, - Tuple* agg_val, Tuple* output_val, bool add_null) { - for (int i = 0; i < evals.size(); ++i) { - evals[i]->Finalize(agg_val, output_val, add_null); - } -} - -} // namespace doris diff --git a/be/src/exprs/new_in_predicate.cpp b/be/src/exprs/new_in_predicate.cpp deleted file mode 100644 index db88cfcd24..0000000000 --- a/be/src/exprs/new_in_predicate.cpp +++ /dev/null @@ -1,184 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -#include "exprs/new_in_predicate.h" - -#include - -#include "exprs/anyval_util.h" - -namespace doris { - -void InPredicate::init() {} - -// Templated getter functions for extracting 'SetType' values from AnyVals -template -SetType get_val(const FunctionContext::TypeDesc* type, const T& x) { - DCHECK(!x.is_null); - return x.val; -} - -template <> -StringRef get_val(const FunctionContext::TypeDesc* type, const StringVal& x) { - DCHECK(!x.is_null); - return StringRef(x); -} - -template <> -DateTimeValue get_val(const FunctionContext::TypeDesc* type, const DateTimeVal& x) { - return DateTimeValue::from_datetime_val(x); -} - -template <> -DecimalV2Value get_val(const FunctionContext::TypeDesc* type, const DecimalV2Val& x) { - return DecimalV2Value::from_decimal_val(x); -} - -template -void InPredicate::set_lookup_prepare(FunctionContext* ctx, - FunctionContext::FunctionStateScope scope) { - if (scope != FunctionContext::FRAGMENT_LOCAL) { - return; - } - - SetLookupState* state = new SetLookupState; - state->type = ctx->get_arg_type(0); - state->contains_null = false; - for (int i = 1; i < ctx->get_num_args(); ++i) { - DCHECK(ctx->is_arg_constant(i)); - T* arg = reinterpret_cast(ctx->get_constant_arg(i)); - if (arg->is_null) { - state->contains_null = true; - } else { - state->val_set.insert(get_val(state->type, *arg)); - } - } - ctx->set_function_state(scope, state); -} - -template -void InPredicate::set_lookup_close(FunctionContext* ctx, - FunctionContext::FunctionStateScope scope) { - if (scope != FunctionContext::FRAGMENT_LOCAL) { - return; - } - SetLookupState* state = - reinterpret_cast*>(ctx->get_function_state(scope)); - delete state; -} - -template -BooleanVal InPredicate::templated_in(FunctionContext* ctx, const T& val, int num_args, - const T* args) { - if (val.is_null) { - return BooleanVal::null(); - } - - BooleanVal found; - if (strategy == SET_LOOKUP) { - SetLookupState* state = reinterpret_cast*>( - ctx->get_function_state(FunctionContext::FRAGMENT_LOCAL)); - DCHECK(state != nullptr); - found = set_lookup(state, val); - } else { - DCHECK_EQ(strategy, ITERATE); - found = iterate(ctx->get_arg_type(0), val, num_args, args); - } - if (found.is_null) { - return BooleanVal::null(); - } - return BooleanVal(found.val ^ not_in); -} - -template -BooleanVal InPredicate::set_lookup(SetLookupState* state, const T& v) { - DCHECK(state != nullptr); - SetType val = get_val(state->type, v); - bool found = state->val_set.find(val) != state->val_set.end(); - if (found) { - return BooleanVal(true); - } - if (state->contains_null) { - return BooleanVal::null(); - } - return BooleanVal(false); -} - -template -BooleanVal InPredicate::iterate(const FunctionContext::TypeDesc* type, const T& val, int num_args, - const T* args) { - bool found_null = false; - for (int i = 0; i < num_args; ++i) { - if (args[i].is_null) { - found_null = true; - } else if (AnyValUtil::equals(*type, val, args[i])) { - return BooleanVal(true); - } - } - if (found_null) { - return BooleanVal::null(); - } - return BooleanVal(false); -} - -#define IN_FUNCTIONS(AnyValType, SetType, type_name) \ - BooleanVal InPredicate::in_set_lookup(FunctionContext* context, const AnyValType& val, \ - int num_args, const AnyValType* args) { \ - return templated_in(context, val, num_args, args); \ - } \ - \ - BooleanVal InPredicate::not_in_set_lookup(FunctionContext* context, const AnyValType& val, \ - int num_args, const AnyValType* args) { \ - return templated_in(context, val, num_args, args); \ - } \ - \ - BooleanVal InPredicate::in_iterate(FunctionContext* context, const AnyValType& val, \ - int num_args, const AnyValType* args) { \ - return templated_in(context, val, num_args, args); \ - } \ - \ - BooleanVal InPredicate::not_in_iterate(FunctionContext* context, const AnyValType& val, \ - int num_args, const AnyValType* args) { \ - return templated_in(context, val, num_args, args); \ - } \ - \ - void InPredicate::set_lookup_prepare_##type_name(FunctionContext* ctx, \ - FunctionContext::FunctionStateScope scope) { \ - set_lookup_prepare(ctx, scope); \ - } \ - \ - void InPredicate::set_lookup_close_##type_name(FunctionContext* ctx, \ - FunctionContext::FunctionStateScope scope) { \ - set_lookup_close(ctx, scope); \ - } - -IN_FUNCTIONS(BooleanVal, bool, boolean_val) -IN_FUNCTIONS(TinyIntVal, int8_t, tiny_int_val) -IN_FUNCTIONS(SmallIntVal, int16_t, small_int_val) -IN_FUNCTIONS(IntVal, int32_t, int_val) -IN_FUNCTIONS(BigIntVal, int64_t, big_int_val) -IN_FUNCTIONS(FloatVal, float, float_val) -IN_FUNCTIONS(DoubleVal, double, double_val) -IN_FUNCTIONS(StringVal, StringRef, string_val) -IN_FUNCTIONS(DateTimeVal, DateTimeValue, datetime_val) -IN_FUNCTIONS(DecimalV2Val, DecimalV2Value, decimalv2_val) -IN_FUNCTIONS(LargeIntVal, __int128, large_int_val) - -// Needed for in-predicate-benchmark to build -template BooleanVal InPredicate::iterate(const FunctionContext::TypeDesc*, const IntVal&, - int, const IntVal*); -} // namespace doris diff --git a/be/src/exprs/new_in_predicate.h b/be/src/exprs/new_in_predicate.h deleted file mode 100644 index a9bb8d1315..0000000000 --- a/be/src/exprs/new_in_predicate.h +++ /dev/null @@ -1,345 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -#pragma once - -#include -#include - -#include "udf/udf.h" - -/* added by lide */ -#define IN_FUNCTIONS_STMT(AnyValType, SetType, type_name) \ - static doris_udf::BooleanVal in_set_lookup(doris_udf::FunctionContext* context, \ - const doris_udf::AnyValType& val, int num_args, \ - const doris_udf::AnyValType* args); \ - \ - static doris_udf::BooleanVal not_in_set_lookup(doris_udf::FunctionContext* context, \ - const doris_udf::AnyValType& val, int num_args, \ - const doris_udf::AnyValType* args); \ - \ - static doris_udf::BooleanVal in_iterate(doris_udf::FunctionContext* context, \ - const doris_udf::AnyValType& val, int num_args, \ - const doris_udf::AnyValType* args); \ - \ - static doris_udf::BooleanVal not_in_iterate(doris_udf::FunctionContext* context, \ - const doris_udf::AnyValType& val, int num_args, \ - const doris_udf::AnyValType* args); \ - \ - static void set_lookup_prepare_##type_name( \ - doris_udf::FunctionContext* ctx, \ - doris_udf::FunctionContext::FunctionStateScope scope); \ - \ - static void set_lookup_close_##type_name( \ - doris_udf::FunctionContext* ctx, \ - doris_udf::FunctionContext::FunctionStateScope scope); - -namespace doris { - -/// Predicate for evaluating expressions of the form "val [NOT] IN (x1, x2, x3...)". -// -/// There are two strategies for evaluating the IN predicate: -// -/// 1) SET_LOOKUP: This strategy is for when all the values in the IN list are constant. In -/// the prepare function, we create a set of the constant values from the IN list, and -/// use this set to lookup a given 'val'. -// -/// 2) ITERATE: This is the fallback strategy for when their are non-constant IN list -/// values, or very few values in the IN list. We simply iterate through every -/// expression and compare it to val. This strategy has no prepare function. -// -/// The FE chooses which strategy we should use by choosing the appropriate function (e.g., -/// in_iterate() or in_set_lookup()). If it chooses SET_LOOKUP, it also sets the appropriate -/// set_lookup_prepare and set_lookup_close functions. -// -/// TODO: the set lookup logic is not yet implemented for DateTimeVals or DecimalVals -class InPredicate { -public: - static void init(); - - /// Functions for every type - static doris_udf::BooleanVal in_iterate(doris_udf::FunctionContext* context, - const doris_udf::BooleanVal& val, int num_args, - const doris_udf::BooleanVal* args); - - static doris_udf::BooleanVal not_in_iterate(doris_udf::FunctionContext* context, - const doris_udf::BooleanVal& val, int num_args, - const doris_udf::BooleanVal* args); - - static void set_lookup_prepare_boolean_val( - doris_udf::FunctionContext* ctx, doris_udf::FunctionContext::FunctionStateScope scope); - - static void set_lookup_close_boolean_val(doris_udf::FunctionContext* ctx, - doris_udf::FunctionContext::FunctionStateScope scope); - - static doris_udf::BooleanVal in_set_lookup(doris_udf::FunctionContext* context, - const doris_udf::BooleanVal& val, int num_args, - const doris_udf::BooleanVal* args); - - static doris_udf::BooleanVal not_in_set_lookup(doris_udf::FunctionContext* context, - const doris_udf::BooleanVal& val, int num_args, - const doris_udf::BooleanVal* args); - - static doris_udf::BooleanVal in_iterate(doris_udf::FunctionContext* context, - const doris_udf::TinyIntVal& val, int num_args, - const doris_udf::TinyIntVal* args); - - static doris_udf::BooleanVal not_in_iterate(doris_udf::FunctionContext* context, - const doris_udf::TinyIntVal& val, int num_args, - const doris_udf::TinyIntVal* args); - - static void set_lookup_prepare_tiny_int_val( - doris_udf::FunctionContext* ctx, doris_udf::FunctionContext::FunctionStateScope scope); - - static void set_lookup_close_tiny_int_val(doris_udf::FunctionContext* ctx, - doris_udf::FunctionContext::FunctionStateScope scope); - - static doris_udf::BooleanVal in_set_lookup(doris_udf::FunctionContext* context, - const doris_udf::TinyIntVal& val, int num_args, - const doris_udf::TinyIntVal* args); - - static doris_udf::BooleanVal not_in_set_lookup(doris_udf::FunctionContext* context, - const doris_udf::TinyIntVal& val, int num_args, - const doris_udf::TinyIntVal* args); - - static doris_udf::BooleanVal in_iterate(doris_udf::FunctionContext* context, - const doris_udf::SmallIntVal& val, int num_args, - const doris_udf::SmallIntVal* args); - - static doris_udf::BooleanVal not_in_iterate(doris_udf::FunctionContext* context, - const doris_udf::SmallIntVal& val, int num_args, - const doris_udf::SmallIntVal* args); - - static void set_lookup_prepare_small_int_val( - doris_udf::FunctionContext* ctx, doris_udf::FunctionContext::FunctionStateScope scope); - - static void set_lookup_close_small_int_val( - doris_udf::FunctionContext* ctx, doris_udf::FunctionContext::FunctionStateScope scope); - - static doris_udf::BooleanVal in_set_lookup(doris_udf::FunctionContext* context, - const doris_udf::SmallIntVal& val, int num_args, - const doris_udf::SmallIntVal* args); - - static doris_udf::BooleanVal not_in_set_lookup(doris_udf::FunctionContext* context, - const doris_udf::SmallIntVal& val, int num_args, - const doris_udf::SmallIntVal* args); - - static doris_udf::BooleanVal in_iterate(doris_udf::FunctionContext* context, - const doris_udf::IntVal& val, int num_args, - const doris_udf::IntVal* args); - - static doris_udf::BooleanVal not_in_iterate(doris_udf::FunctionContext* context, - const doris_udf::IntVal& val, int num_args, - const doris_udf::IntVal* args); - - static void set_lookup_prepare_int_val(doris_udf::FunctionContext* ctx, - doris_udf::FunctionContext::FunctionStateScope scope); - - static void set_lookup_close_int_val(doris_udf::FunctionContext* ctx, - doris_udf::FunctionContext::FunctionStateScope scope); - - static doris_udf::BooleanVal in_set_lookup(doris_udf::FunctionContext* context, - const doris_udf::IntVal& val, int num_args, - const doris_udf::IntVal* args); - - static doris_udf::BooleanVal not_in_set_lookup(doris_udf::FunctionContext* context, - const doris_udf::IntVal& val, int num_args, - const doris_udf::IntVal* args); - - static doris_udf::BooleanVal in_iterate(doris_udf::FunctionContext* context, - const doris_udf::BigIntVal& val, int num_args, - const doris_udf::BigIntVal* args); - - static doris_udf::BooleanVal not_in_iterate(doris_udf::FunctionContext* context, - const doris_udf::BigIntVal& val, int num_args, - const doris_udf::BigIntVal* args); - - static void set_lookup_prepare_big_int_val( - doris_udf::FunctionContext* ctx, doris_udf::FunctionContext::FunctionStateScope scope); - - static void set_lookup_close_big_int_val(doris_udf::FunctionContext* ctx, - doris_udf::FunctionContext::FunctionStateScope scope); - - static doris_udf::BooleanVal in_set_lookup(doris_udf::FunctionContext* context, - const doris_udf::BigIntVal& val, int num_args, - const doris_udf::BigIntVal* args); - - static doris_udf::BooleanVal not_in_set_lookup(doris_udf::FunctionContext* context, - const doris_udf::BigIntVal& val, int num_args, - const doris_udf::BigIntVal* args); - - static doris_udf::BooleanVal in_iterate(doris_udf::FunctionContext* context, - const doris_udf::FloatVal& val, int num_args, - const doris_udf::FloatVal* args); - - static doris_udf::BooleanVal not_in_iterate(doris_udf::FunctionContext* context, - const doris_udf::FloatVal& val, int num_args, - const doris_udf::FloatVal* args); - - static void set_lookup_prepare_float_val(doris_udf::FunctionContext* ctx, - doris_udf::FunctionContext::FunctionStateScope scope); - - static void set_lookup_close_float_val(doris_udf::FunctionContext* ctx, - doris_udf::FunctionContext::FunctionStateScope scope); - - static doris_udf::BooleanVal in_set_lookup(doris_udf::FunctionContext* context, - const doris_udf::FloatVal& val, int num_args, - const doris_udf::FloatVal* args); - - static doris_udf::BooleanVal not_in_set_lookup(doris_udf::FunctionContext* context, - const doris_udf::FloatVal& val, int num_args, - const doris_udf::FloatVal* args); - - static doris_udf::BooleanVal in_iterate(doris_udf::FunctionContext* context, - const doris_udf::DoubleVal& val, int num_args, - const doris_udf::DoubleVal* args); - - static doris_udf::BooleanVal not_in_iterate(doris_udf::FunctionContext* context, - const doris_udf::DoubleVal& val, int num_args, - const doris_udf::DoubleVal* args); - - static void set_lookup_prepare_double_val(doris_udf::FunctionContext* ctx, - doris_udf::FunctionContext::FunctionStateScope scope); - - static void set_lookup_close_double_val(doris_udf::FunctionContext* ctx, - doris_udf::FunctionContext::FunctionStateScope scope); - - static doris_udf::BooleanVal in_set_lookup(doris_udf::FunctionContext* context, - const doris_udf::DoubleVal& val, int num_args, - const doris_udf::DoubleVal* args); - - static doris_udf::BooleanVal not_in_set_lookup(doris_udf::FunctionContext* context, - const doris_udf::DoubleVal& val, int num_args, - const doris_udf::DoubleVal* args); - - static doris_udf::BooleanVal in_iterate(doris_udf::FunctionContext* context, - const doris_udf::StringVal& val, int num_args, - const doris_udf::StringVal* args); - - static doris_udf::BooleanVal not_in_iterate(doris_udf::FunctionContext* context, - const doris_udf::StringVal& val, int num_args, - const doris_udf::StringVal* args); - - static void set_lookup_prepare_string_val(doris_udf::FunctionContext* ctx, - doris_udf::FunctionContext::FunctionStateScope scope); - - static void set_lookup_close_string_val(doris_udf::FunctionContext* ctx, - doris_udf::FunctionContext::FunctionStateScope scope); - - static doris_udf::BooleanVal in_set_lookup(doris_udf::FunctionContext* context, - const doris_udf::StringVal& val, int num_args, - const doris_udf::StringVal* args); - - static doris_udf::BooleanVal not_in_set_lookup(doris_udf::FunctionContext* context, - const doris_udf::StringVal& val, int num_args, - const doris_udf::StringVal* args); - - static doris_udf::BooleanVal in_iterate(doris_udf::FunctionContext* context, - const doris_udf::DateTimeVal& val, int num_args, - const doris_udf::DateTimeVal* args); - - static doris_udf::BooleanVal not_in_iterate(doris_udf::FunctionContext* context, - const doris_udf::DateTimeVal& val, int num_args, - const doris_udf::DateTimeVal* args); - - static void set_lookup_prepare_datetime_val( - doris_udf::FunctionContext* ctx, doris_udf::FunctionContext::FunctionStateScope scope); - - static void set_lookup_close_datetime_val(doris_udf::FunctionContext* ctx, - doris_udf::FunctionContext::FunctionStateScope scope); - - static doris_udf::BooleanVal in_set_lookup(doris_udf::FunctionContext* context, - const doris_udf::DateTimeVal& val, int num_args, - const doris_udf::DateTimeVal* args); - - static doris_udf::BooleanVal not_in_set_lookup(doris_udf::FunctionContext* context, - const doris_udf::DateTimeVal& val, int num_args, - const doris_udf::DateTimeVal* args); - - static doris_udf::BooleanVal in_iterate(doris_udf::FunctionContext* context, - const doris_udf::DecimalV2Val& val, int num_args, - const doris_udf::DecimalV2Val* args); - - static doris_udf::BooleanVal not_in_iterate(doris_udf::FunctionContext* context, - const doris_udf::DecimalV2Val& val, int num_args, - const doris_udf::DecimalV2Val* args); - - static void set_lookup_prepare_decimalv2_val( - doris_udf::FunctionContext* ctx, doris_udf::FunctionContext::FunctionStateScope scope); - - static void set_lookup_close_decimalv2_val( - doris_udf::FunctionContext* ctx, doris_udf::FunctionContext::FunctionStateScope scope); - - static doris_udf::BooleanVal in_set_lookup(doris_udf::FunctionContext* context, - const doris_udf::DecimalV2Val& val, int num_args, - const doris_udf::DecimalV2Val* args); - - static doris_udf::BooleanVal not_in_set_lookup(doris_udf::FunctionContext* context, - const doris_udf::DecimalV2Val& val, int num_args, - const doris_udf::DecimalV2Val* args); - - /* added by lide */ - IN_FUNCTIONS_STMT(LargeIntVal, __int128, large_int_val) - -private: - friend class InPredicateBenchmark; - - enum Strategy { - /// Indicates we should use SetLookUp(). - SET_LOOKUP, - /// Indicates we should use Iterate(). - ITERATE - }; - - template - struct SetLookupState { - /// If true, there is at least one nullptr constant in the IN list. - bool contains_null; - - /// The set of all non-nullptr constant values in the IN list. - /// Note: std::unordered_set and std::binary_search performed worse based on the - /// in-predicate-benchmark - std::set val_set; - - /// The type of the arguments - const FunctionContext::TypeDesc* type; - }; - - /// The templated function that provides the implementation for all the In() and NotIn() - /// functions. - template - static doris_udf::BooleanVal templated_in(doris_udf::FunctionContext* context, const T& val, - int num_args, const T* args); - - /// Initializes an SetLookupState in ctx. - template - static void set_lookup_prepare(FunctionContext* ctx, FunctionContext::FunctionStateScope scope); - - template - static void set_lookup_close(FunctionContext* ctx, FunctionContext::FunctionStateScope scope); - - /// Looks up v in state->val_set. - template - static BooleanVal set_lookup(SetLookupState* state, const T& v); - - /// Iterates through each vararg looking for val. 'type' is the type of 'val' and 'args'. - template - static BooleanVal iterate(const FunctionContext::TypeDesc* type, const T& val, int num_args, - const T* args); -}; - -} // namespace doris diff --git a/be/src/exprs/null_literal.cpp b/be/src/exprs/null_literal.cpp deleted file mode 100644 index 1cf54a3277..0000000000 --- a/be/src/exprs/null_literal.cpp +++ /dev/null @@ -1,75 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. -// This file is copied from -// https://github.com/apache/impala/blob/branch-2.9.0/be/src/exprs/null-literal.cc -// and modified by Doris - -#include "null_literal.h" - -#include "gen_cpp/Exprs_types.h" - -namespace doris { - -NullLiteral::NullLiteral(const TExprNode& node) : Expr(node) {} - -// NullLiteral::NullLiteral(PrimitiveType type) : Expr(TypeDescriptor(type)) { -// } - -BooleanVal NullLiteral::get_boolean_val(ExprContext*, TupleRow*) { - return BooleanVal::null(); -} - -TinyIntVal NullLiteral::get_tiny_int_val(ExprContext*, TupleRow*) { - return TinyIntVal::null(); -} - -SmallIntVal NullLiteral::get_small_int_val(ExprContext*, TupleRow*) { - return SmallIntVal::null(); -} - -IntVal NullLiteral::get_int_val(ExprContext*, TupleRow*) { - return IntVal::null(); -} - -BigIntVal NullLiteral::get_big_int_val(ExprContext*, TupleRow*) { - return BigIntVal::null(); -} - -FloatVal NullLiteral::get_float_val(ExprContext*, TupleRow*) { - return FloatVal::null(); -} - -DoubleVal NullLiteral::get_double_val(ExprContext*, TupleRow*) { - return DoubleVal::null(); -} - -StringVal NullLiteral::get_string_val(ExprContext*, TupleRow*) { - return StringVal::null(); -} - -DateTimeVal NullLiteral::get_datetime_val(ExprContext*, TupleRow*) { - return DateTimeVal::null(); -} - -DecimalV2Val NullLiteral::get_decimalv2_val(ExprContext*, TupleRow*) { - return DecimalV2Val::null(); -} - -CollectionVal NullLiteral::get_array_val(ExprContext* context, TupleRow*) { - return CollectionVal::null(); -} -} // namespace doris diff --git a/be/src/exprs/null_literal.h b/be/src/exprs/null_literal.h deleted file mode 100644 index af3e5e615a..0000000000 --- a/be/src/exprs/null_literal.h +++ /dev/null @@ -1,57 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. -// This file is copied from -// https://github.com/apache/impala/blob/branch-2.9.0/be/src/exprs/null-literal.h -// and modified by Doris - -#pragma once - -#include "common/object_pool.h" -#include "exprs/expr.h" - -namespace doris { - -class TExprNode; - -class NullLiteral : public Expr { -public: - virtual Expr* clone(ObjectPool* pool) const override { - return pool->add(new NullLiteral(*this)); - } - // NullLiteral(PrimitiveType type); - virtual doris_udf::BooleanVal get_boolean_val(ExprContext*, TupleRow*) override; - virtual doris_udf::TinyIntVal get_tiny_int_val(ExprContext*, TupleRow*) override; - virtual doris_udf::SmallIntVal get_small_int_val(ExprContext*, TupleRow*) override; - virtual doris_udf::IntVal get_int_val(ExprContext*, TupleRow*) override; - virtual doris_udf::BigIntVal get_big_int_val(ExprContext*, TupleRow*) override; - virtual doris_udf::FloatVal get_float_val(ExprContext*, TupleRow*) override; - virtual doris_udf::DoubleVal get_double_val(ExprContext*, TupleRow*) override; - virtual doris_udf::StringVal get_string_val(ExprContext*, TupleRow*) override; - virtual doris_udf::DateTimeVal get_datetime_val(ExprContext*, TupleRow*) override; - virtual doris_udf::DecimalV2Val get_decimalv2_val(ExprContext*, TupleRow*) override; - virtual CollectionVal get_array_val(ExprContext* context, TupleRow*) override; - -protected: - friend class Expr; - - NullLiteral(const TExprNode& node); - -private: - static void* return_value(Expr* e, TupleRow* row); -}; - -} // namespace doris diff --git a/be/src/exprs/operators.cpp b/be/src/exprs/operators.cpp deleted file mode 100644 index 16e903deb1..0000000000 --- a/be/src/exprs/operators.cpp +++ /dev/null @@ -1,138 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. -// This file is copied from -// https://github.com/apache/impala/blob/branch-2.9.0/be/src/exprs/operators.cc -// and modified by Doris - -#include "exprs/operators.h" - -#include "runtime/datetime_value.h" -#include "vec/common/string_ref.h" - -namespace doris { - -void Operators::init() {} - -#define BINARY_OP_FN(NAME, TYPE_NAME, TYPE, OP) \ - TYPE Operators::NAME##_##TYPE_NAME##_##TYPE_NAME(FunctionContext* c, const TYPE& v1, \ - const TYPE& v2) { \ - if (v1.is_null || v2.is_null) return TYPE::null(); \ - return TYPE(v1.val OP v2.val); \ - } - -#define BINARY_OP_CHECK_ZERO_FN(NAME, TYPE_NAME, TYPE, OP) \ - TYPE Operators::NAME##_##TYPE_NAME##_##TYPE_NAME(FunctionContext* c, const TYPE& v1, \ - const TYPE& v2) { \ - if (v1.is_null || v2.is_null || v2.val == 0) return TYPE::null(); \ - return TYPE(v1.val OP v2.val); \ - } - -#define BITNOT_FN(TYPE, TYPE_NAME) \ - TYPE Operators::bitnot_##TYPE_NAME(FunctionContext* c, const TYPE& v) { \ - if (v.is_null) return TYPE::null(); \ - return TYPE(~v.val); \ - } - -// Return infinity if overflow. -#define FACTORIAL_FN(TYPE) \ - BigIntVal Operators::Factorial_##TYPE(FunctionContext* c, const TYPE& v) { \ - if (v.is_null) return BigIntVal::null(); \ - int64_t fact = ComputeFactorial(v.val); \ - if (fact < 0) { \ - return BigIntVal::null(); \ - } \ - return BigIntVal(fact); \ - } - -#define BINARY_PREDICATE_NUMERIC_FN(NAME, TYPE_NAME, TYPE, OP) \ - BooleanVal Operators::NAME##_##TYPE_NAME##_##TYPE_NAME(FunctionContext* c, const TYPE& v1, \ - const TYPE& v2) { \ - if (v1.is_null || v2.is_null) return BooleanVal::null(); \ - return BooleanVal(v1.val OP v2.val); \ - } - -#define BINARY_PREDICATE_NONNUMERIC_FN(NAME, TYPE_NAME, FUNC_NAME, TYPE, DORIS_TYPE, OP) \ - BooleanVal Operators::NAME##_##TYPE_NAME##_##TYPE_NAME(FunctionContext* c, const TYPE& v1, \ - const TYPE& v2) { \ - if (v1.is_null || v2.is_null) return BooleanVal::null(); \ - DORIS_TYPE iv1 = DORIS_TYPE::from_##FUNC_NAME(v1); \ - DORIS_TYPE iv2 = DORIS_TYPE::from_##FUNC_NAME(v2); \ - return BooleanVal(iv1 OP iv2); \ - } - -#define BINARY_OP_NUMERIC_TYPES(NAME, OP) \ - BINARY_OP_FN(NAME, tiny_int_val, TinyIntVal, OP); \ - BINARY_OP_FN(NAME, small_int_val, SmallIntVal, OP); \ - BINARY_OP_FN(NAME, int_val, IntVal, OP); \ - BINARY_OP_FN(NAME, big_int_val, BigIntVal, OP); \ - BINARY_OP_FN(NAME, large_int_val, LargeIntVal, OP); \ - BINARY_OP_FN(NAME, float_val, FloatVal, OP); \ - BINARY_OP_FN(NAME, double_val, DoubleVal, OP); - -#define BINARY_OP_INT_TYPES(NAME, OP) \ - BINARY_OP_FN(NAME, tiny_int_val, TinyIntVal, OP); \ - BINARY_OP_FN(NAME, small_int_val, SmallIntVal, OP); \ - BINARY_OP_FN(NAME, int_val, IntVal, OP); \ - BINARY_OP_FN(NAME, big_int_val, BigIntVal, OP); \ - BINARY_OP_FN(NAME, large_int_val, LargeIntVal, OP); - -#define BINARY_OP_CHECK_ZERO_INT_TYPES(NAME, OP) \ - BINARY_OP_CHECK_ZERO_FN(NAME, tiny_int_val, TinyIntVal, OP); \ - BINARY_OP_CHECK_ZERO_FN(NAME, small_int_val, SmallIntVal, OP); \ - BINARY_OP_CHECK_ZERO_FN(NAME, int_val, IntVal, OP); \ - BINARY_OP_CHECK_ZERO_FN(NAME, big_int_val, BigIntVal, OP); \ - BINARY_OP_CHECK_ZERO_FN(NAME, large_int_val, LargeIntVal, OP); - -#define BINARY_PREDICATE_ALL_TYPES(NAME, OP) \ - BINARY_PREDICATE_NUMERIC_FN(NAME, boolean_val, BooleanVal, OP); \ - BINARY_PREDICATE_NUMERIC_FN(NAME, tiny_int_val, TinyIntVal, OP); \ - BINARY_PREDICATE_NUMERIC_FN(NAME, small_int_val, SmallIntVal, OP); \ - BINARY_PREDICATE_NUMERIC_FN(NAME, int_val, IntVal, OP); \ - BINARY_PREDICATE_NUMERIC_FN(NAME, big_int_val, BigIntVal, OP); \ - BINARY_PREDICATE_NUMERIC_FN(NAME, large_int_val, LargeIntVal, OP); \ - BINARY_PREDICATE_NUMERIC_FN(NAME, float_val, FloatVal, OP); \ - BINARY_PREDICATE_NUMERIC_FN(NAME, double_val, DoubleVal, OP); \ - BINARY_PREDICATE_NONNUMERIC_FN(NAME, string_val, string_val, StringVal, StringRef, OP); \ - BINARY_PREDICATE_NONNUMERIC_FN(NAME, datetime_val, datetime_val, DateTimeVal, DateTimeValue, \ - OP); - -BINARY_OP_NUMERIC_TYPES(add, +); -BINARY_OP_NUMERIC_TYPES(subtract, -); -BINARY_OP_NUMERIC_TYPES(multiply, *); - -BINARY_OP_CHECK_ZERO_FN(divide, double_val, DoubleVal, /); - -BINARY_OP_CHECK_ZERO_INT_TYPES(int_divide, /); -BINARY_OP_CHECK_ZERO_INT_TYPES(mod, %); - -// Bit operator -BINARY_OP_INT_TYPES(bitand, &); -BINARY_OP_INT_TYPES(bitxor, ^); -BINARY_OP_INT_TYPES(bitor, |); -BITNOT_FN(TinyIntVal, tiny_int_val); -BITNOT_FN(SmallIntVal, small_int_val); -BITNOT_FN(IntVal, int_val); -BITNOT_FN(BigIntVal, big_int_val); -BITNOT_FN(LargeIntVal, large_int_val); - -BINARY_PREDICATE_ALL_TYPES(eq, ==); -BINARY_PREDICATE_ALL_TYPES(ne, !=); -BINARY_PREDICATE_ALL_TYPES(gt, >); -BINARY_PREDICATE_ALL_TYPES(lt, <); -BINARY_PREDICATE_ALL_TYPES(ge, >=); -BINARY_PREDICATE_ALL_TYPES(le, <=); -} // namespace doris diff --git a/be/src/exprs/operators.h b/be/src/exprs/operators.h deleted file mode 100644 index c385896f23..0000000000 --- a/be/src/exprs/operators.h +++ /dev/null @@ -1,251 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. -// This file is copied from -// https://github.com/apache/impala/blob/branch-2.9.0/be/src/exprs/operators.h -// and modified by Doris - -#pragma once - -#include "udf/udf.h" - -namespace doris { - -/// Operators written against the UDF interface. -class Operators { -public: - // Do nothing, just get its symbols - static void init(); - - // Bit operator - static TinyIntVal bitnot_tiny_int_val(FunctionContext*, const TinyIntVal&); - static SmallIntVal bitnot_small_int_val(FunctionContext*, const SmallIntVal&); - static IntVal bitnot_int_val(FunctionContext*, const IntVal&); - static BigIntVal bitnot_big_int_val(FunctionContext*, const BigIntVal&); - static LargeIntVal bitnot_large_int_val(FunctionContext*, const LargeIntVal&); - - static TinyIntVal bitand_tiny_int_val_tiny_int_val(FunctionContext*, const TinyIntVal&, - const TinyIntVal&); - static SmallIntVal bitand_small_int_val_small_int_val(FunctionContext*, const SmallIntVal&, - const SmallIntVal&); - static IntVal bitand_int_val_int_val(FunctionContext*, const IntVal&, const IntVal&); - static BigIntVal bitand_big_int_val_big_int_val(FunctionContext*, const BigIntVal&, - const BigIntVal&); - static LargeIntVal bitand_large_int_val_large_int_val(FunctionContext*, const LargeIntVal&, - const LargeIntVal&); - - static TinyIntVal bitxor_tiny_int_val_tiny_int_val(FunctionContext*, const TinyIntVal&, - const TinyIntVal&); - static SmallIntVal bitxor_small_int_val_small_int_val(FunctionContext*, const SmallIntVal&, - const SmallIntVal&); - static IntVal bitxor_int_val_int_val(FunctionContext*, const IntVal&, const IntVal&); - static BigIntVal bitxor_big_int_val_big_int_val(FunctionContext*, const BigIntVal&, - const BigIntVal&); - static LargeIntVal bitxor_large_int_val_large_int_val(FunctionContext*, const LargeIntVal&, - const LargeIntVal&); - - static TinyIntVal bitor_tiny_int_val_tiny_int_val(FunctionContext*, const TinyIntVal&, - const TinyIntVal&); - static SmallIntVal bitor_small_int_val_small_int_val(FunctionContext*, const SmallIntVal&, - const SmallIntVal&); - static IntVal bitor_int_val_int_val(FunctionContext*, const IntVal&, const IntVal&); - static BigIntVal bitor_big_int_val_big_int_val(FunctionContext*, const BigIntVal&, - const BigIntVal&); - static LargeIntVal bitor_large_int_val_large_int_val(FunctionContext*, const LargeIntVal&, - const LargeIntVal&); - - // Arithmetic - static TinyIntVal add_tiny_int_val_tiny_int_val(FunctionContext*, const TinyIntVal&, - const TinyIntVal&); - static SmallIntVal add_small_int_val_small_int_val(FunctionContext*, const SmallIntVal&, - const SmallIntVal&); - static IntVal add_int_val_int_val(FunctionContext*, const IntVal&, const IntVal&); - static BigIntVal add_big_int_val_big_int_val(FunctionContext*, const BigIntVal&, - const BigIntVal&); - static LargeIntVal add_large_int_val_large_int_val(FunctionContext*, const LargeIntVal&, - const LargeIntVal&); - static FloatVal add_float_val_float_val(FunctionContext*, const FloatVal&, const FloatVal&); - static DoubleVal add_double_val_double_val(FunctionContext*, const DoubleVal&, - const DoubleVal&); - - static TinyIntVal subtract_tiny_int_val_tiny_int_val(FunctionContext*, const TinyIntVal&, - const TinyIntVal&); - static SmallIntVal subtract_small_int_val_small_int_val(FunctionContext*, const SmallIntVal&, - const SmallIntVal&); - static IntVal subtract_int_val_int_val(FunctionContext*, const IntVal&, const IntVal&); - static BigIntVal subtract_big_int_val_big_int_val(FunctionContext*, const BigIntVal&, - const BigIntVal&); - static LargeIntVal subtract_large_int_val_large_int_val(FunctionContext*, const LargeIntVal&, - const LargeIntVal&); - static FloatVal subtract_float_val_float_val(FunctionContext*, const FloatVal&, - const FloatVal&); - static DoubleVal subtract_double_val_double_val(FunctionContext*, const DoubleVal&, - const DoubleVal&); - - static TinyIntVal multiply_tiny_int_val_tiny_int_val(FunctionContext*, const TinyIntVal&, - const TinyIntVal&); - static SmallIntVal multiply_small_int_val_small_int_val(FunctionContext*, const SmallIntVal&, - const SmallIntVal&); - static IntVal multiply_int_val_int_val(FunctionContext*, const IntVal&, const IntVal&); - static BigIntVal multiply_big_int_val_big_int_val(FunctionContext*, const BigIntVal&, - const BigIntVal&); - static LargeIntVal multiply_large_int_val_large_int_val(FunctionContext*, const LargeIntVal&, - const LargeIntVal&); - static FloatVal multiply_float_val_float_val(FunctionContext*, const FloatVal&, - const FloatVal&); - static DoubleVal multiply_double_val_double_val(FunctionContext*, const DoubleVal&, - const DoubleVal&); - - static DoubleVal divide_double_val_double_val(FunctionContext*, const DoubleVal&, - const DoubleVal&); - - static TinyIntVal int_divide_tiny_int_val_tiny_int_val(FunctionContext*, const TinyIntVal&, - const TinyIntVal&); - static SmallIntVal int_divide_small_int_val_small_int_val(FunctionContext*, const SmallIntVal&, - const SmallIntVal&); - static IntVal int_divide_int_val_int_val(FunctionContext*, const IntVal&, const IntVal&); - static BigIntVal int_divide_big_int_val_big_int_val(FunctionContext*, const BigIntVal&, - const BigIntVal&); - static LargeIntVal int_divide_large_int_val_large_int_val(FunctionContext*, const LargeIntVal&, - const LargeIntVal&); - - static TinyIntVal mod_tiny_int_val_tiny_int_val(FunctionContext*, const TinyIntVal&, - const TinyIntVal&); - static SmallIntVal mod_small_int_val_small_int_val(FunctionContext*, const SmallIntVal&, - const SmallIntVal&); - static IntVal mod_int_val_int_val(FunctionContext*, const IntVal&, const IntVal&); - static BigIntVal mod_big_int_val_big_int_val(FunctionContext*, const BigIntVal&, - const BigIntVal&); - static LargeIntVal mod_large_int_val_large_int_val(FunctionContext*, const LargeIntVal&, - const LargeIntVal&); - - // Binary predicate - static BooleanVal eq_boolean_val_boolean_val(FunctionContext*, const BooleanVal&, - const BooleanVal&); - static BooleanVal eq_tiny_int_val_tiny_int_val(FunctionContext*, const TinyIntVal&, - const TinyIntVal&); - static BooleanVal eq_small_int_val_small_int_val(FunctionContext*, const SmallIntVal&, - const SmallIntVal&); - static BooleanVal eq_int_val_int_val(FunctionContext*, const IntVal&, const IntVal&); - static BooleanVal eq_big_int_val_big_int_val(FunctionContext*, const BigIntVal&, - const BigIntVal&); - static BooleanVal eq_large_int_val_large_int_val(FunctionContext*, const LargeIntVal&, - const LargeIntVal&); - static BooleanVal eq_float_val_float_val(FunctionContext*, const FloatVal&, const FloatVal&); - static BooleanVal eq_double_val_double_val(FunctionContext*, const DoubleVal&, - const DoubleVal&); - static BooleanVal eq_string_val_string_val(FunctionContext*, const StringVal&, - const StringVal&); - static BooleanVal eq_datetime_val_datetime_val(FunctionContext*, const DateTimeVal&, - const DateTimeVal&); - - static BooleanVal ne_boolean_val_boolean_val(FunctionContext*, const BooleanVal&, - const BooleanVal&); - static BooleanVal ne_tiny_int_val_tiny_int_val(FunctionContext*, const TinyIntVal&, - const TinyIntVal&); - static BooleanVal ne_small_int_val_small_int_val(FunctionContext*, const SmallIntVal&, - const SmallIntVal&); - static BooleanVal ne_int_val_int_val(FunctionContext*, const IntVal&, const IntVal&); - static BooleanVal ne_big_int_val_big_int_val(FunctionContext*, const BigIntVal&, - const BigIntVal&); - static BooleanVal ne_large_int_val_large_int_val(FunctionContext*, const LargeIntVal&, - const LargeIntVal&); - static BooleanVal ne_float_val_float_val(FunctionContext*, const FloatVal&, const FloatVal&); - static BooleanVal ne_double_val_double_val(FunctionContext*, const DoubleVal&, - const DoubleVal&); - static BooleanVal ne_string_val_string_val(FunctionContext*, const StringVal&, - const StringVal&); - static BooleanVal ne_datetime_val_datetime_val(FunctionContext*, const DateTimeVal&, - const DateTimeVal&); - - static BooleanVal gt_boolean_val_boolean_val(FunctionContext*, const BooleanVal&, - const BooleanVal&); - static BooleanVal gt_tiny_int_val_tiny_int_val(FunctionContext*, const TinyIntVal&, - const TinyIntVal&); - static BooleanVal gt_small_int_val_small_int_val(FunctionContext*, const SmallIntVal&, - const SmallIntVal&); - static BooleanVal gt_int_val_int_val(FunctionContext*, const IntVal&, const IntVal&); - static BooleanVal gt_big_int_val_big_int_val(FunctionContext*, const BigIntVal&, - const BigIntVal&); - static BooleanVal gt_large_int_val_large_int_val(FunctionContext*, const LargeIntVal&, - const LargeIntVal&); - static BooleanVal gt_float_val_float_val(FunctionContext*, const FloatVal&, const FloatVal&); - static BooleanVal gt_double_val_double_val(FunctionContext*, const DoubleVal&, - const DoubleVal&); - static BooleanVal gt_string_val_string_val(FunctionContext*, const StringVal&, - const StringVal&); - static BooleanVal gt_datetime_val_datetime_val(FunctionContext*, const DateTimeVal&, - const DateTimeVal&); - - static BooleanVal lt_boolean_val_boolean_val(FunctionContext*, const BooleanVal&, - const BooleanVal&); - static BooleanVal lt_tiny_int_val_tiny_int_val(FunctionContext*, const TinyIntVal&, - const TinyIntVal&); - static BooleanVal lt_small_int_val_small_int_val(FunctionContext*, const SmallIntVal&, - const SmallIntVal&); - static BooleanVal lt_int_val_int_val(FunctionContext*, const IntVal&, const IntVal&); - static BooleanVal lt_big_int_val_big_int_val(FunctionContext*, const BigIntVal&, - const BigIntVal&); - static BooleanVal lt_large_int_val_large_int_val(FunctionContext*, const LargeIntVal&, - const LargeIntVal&); - static BooleanVal lt_float_val_float_val(FunctionContext*, const FloatVal&, const FloatVal&); - static BooleanVal lt_double_val_double_val(FunctionContext*, const DoubleVal&, - const DoubleVal&); - static BooleanVal lt_string_val_string_val(FunctionContext*, const StringVal&, - const StringVal&); - static BooleanVal lt_datetime_val_datetime_val(FunctionContext*, const DateTimeVal&, - const DateTimeVal&); - - static BooleanVal ge_boolean_val_boolean_val(FunctionContext*, const BooleanVal&, - const BooleanVal&); - static BooleanVal ge_tiny_int_val_tiny_int_val(FunctionContext*, const TinyIntVal&, - const TinyIntVal&); - static BooleanVal ge_small_int_val_small_int_val(FunctionContext*, const SmallIntVal&, - const SmallIntVal&); - static BooleanVal ge_int_val_int_val(FunctionContext*, const IntVal&, const IntVal&); - static BooleanVal ge_big_int_val_big_int_val(FunctionContext*, const BigIntVal&, - const BigIntVal&); - static BooleanVal ge_large_int_val_large_int_val(FunctionContext*, const LargeIntVal&, - const LargeIntVal&); - static BooleanVal ge_float_val_float_val(FunctionContext*, const FloatVal&, const FloatVal&); - static BooleanVal ge_double_val_double_val(FunctionContext*, const DoubleVal&, - const DoubleVal&); - static BooleanVal ge_string_val_string_val(FunctionContext*, const StringVal&, - const StringVal&); - static BooleanVal ge_datetime_val_datetime_val(FunctionContext*, const DateTimeVal&, - const DateTimeVal&); - - static BooleanVal le_boolean_val_boolean_val(FunctionContext*, const BooleanVal&, - const BooleanVal&); - static BooleanVal le_tiny_int_val_tiny_int_val(FunctionContext*, const TinyIntVal&, - const TinyIntVal&); - static BooleanVal le_small_int_val_small_int_val(FunctionContext*, const SmallIntVal&, - const SmallIntVal&); - static BooleanVal le_int_val_int_val(FunctionContext*, const IntVal&, const IntVal&); - static BooleanVal le_big_int_val_big_int_val(FunctionContext*, const BigIntVal&, - const BigIntVal&); - static BooleanVal le_large_int_val_large_int_val(FunctionContext*, const LargeIntVal&, - const LargeIntVal&); - static BooleanVal le_float_val_float_val(FunctionContext*, const FloatVal&, const FloatVal&); - static BooleanVal le_double_val_double_val(FunctionContext*, const DoubleVal&, - const DoubleVal&); - static BooleanVal le_string_val_string_val(FunctionContext*, const StringVal&, - const StringVal&); - static BooleanVal le_datetime_val_datetime_val(FunctionContext*, const DateTimeVal&, - const DateTimeVal&); -}; - -} // namespace doris diff --git a/be/src/exprs/predicate.h b/be/src/exprs/predicate.h deleted file mode 100644 index 3126da93a8..0000000000 --- a/be/src/exprs/predicate.h +++ /dev/null @@ -1,36 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. -// This file is copied from -// https://github.com/apache/impala/blob/branch-2.9.0/be/src/exprs/predicate.h -// and modified by Doris - -#pragma once - -#include "exprs/expr.h" - -namespace doris { - -class TExprNode; - -class Predicate : public Expr { -protected: - friend class Expr; - - Predicate(const TExprNode& node) : Expr(node) {} -}; - -} // namespace doris diff --git a/be/src/exprs/rpc_fn.cpp b/be/src/exprs/rpc_fn.cpp deleted file mode 100644 index 01363922e8..0000000000 --- a/be/src/exprs/rpc_fn.cpp +++ /dev/null @@ -1,277 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -#include "exprs/rpc_fn.h" - -#include - -#include "exprs/rpc_fn_comm.h" -#include "runtime/fragment_mgr.h" -#include "util/brpc_client_cache.h" -#include "vec/columns/column.h" -#include "vec/columns/column_vector.h" -#include "vec/core/block.h" -#include "vec/core/column_numbers.h" -#include "vec/data_types/data_type_decimal.h" -#include "vec/data_types/data_type_nullable.h" - -namespace doris { - -RPCFn::RPCFn(RuntimeState* state, const TFunction& fn, int fn_ctx_id, bool is_agg) - : _state(state), _fn(fn), _fn_ctx_id(fn_ctx_id), _is_agg(is_agg) { - _client = ExecEnv::GetInstance()->brpc_function_client_cache()->get_client(_server_addr); - if (!_is_agg) { - _function_name = _fn.scalar_fn.symbol; - _server_addr = _fn.hdfs_location; - _signature = fmt::format("{}: [{}/{}]", _fn.name.function_name, _fn.hdfs_location, - _fn.scalar_fn.symbol); - } -} - -RPCFn::RPCFn(const TFunction& fn, bool is_agg) : RPCFn(nullptr, fn, -1, is_agg) {} - -RPCFn::RPCFn(RuntimeState* state, const TFunction& fn, AggregationStep step, bool is_agg) - : RPCFn(nullptr, fn, -1, is_agg) { - _step = step; - DCHECK(is_agg) << "Only used for agg fns"; - switch (_step) { - case INIT: { - _function_name = _fn.aggregate_fn.init_fn_symbol; - _server_addr = _fn.hdfs_location; - _signature = fmt::format("{}: [{}/{}]", _fn.name.function_name, _fn.hdfs_location, - _fn.aggregate_fn.init_fn_symbol); - break; - } - case UPDATE: { - _function_name = _fn.aggregate_fn.init_fn_symbol; - break; - } - case MERGE: { - _function_name = _fn.aggregate_fn.merge_fn_symbol; - break; - } - case SERIALIZE: { - _function_name = _fn.aggregate_fn.serialize_fn_symbol; - break; - } - case GET_VALUE: { - _function_name = _fn.aggregate_fn.get_value_fn_symbol; - break; - } - case FINALIZE: { - _function_name = _fn.aggregate_fn.finalize_fn_symbol; - break; - } - case REMOVE: { - _function_name = _fn.aggregate_fn.remove_fn_symbol; - break; - } - - default: - CHECK(false) << "invalid AggregationStep: " << _step; - break; - } - _server_addr = _fn.hdfs_location; - _signature = fmt::format("{}: [{}/{}]", _fn.name.function_name, _server_addr, _function_name); -} - -Status RPCFn::call_internal(ExprContext* context, TupleRow* row, PFunctionCallResponse* response, - const std::vector& exprs) { - FunctionContext* fn_ctx = context->fn_context(_fn_ctx_id); - PFunctionCallRequest request; - request.set_function_name(_function_name); - for (int i = 0; i < exprs.size(); ++i) { - PValues* arg = request.add_args(); - void* src_slot = context->get_value(exprs[i], row); - PGenericType* ptype = arg->mutable_type(); - if (src_slot == nullptr) { - arg->set_has_null(true); - arg->add_null_map(true); - } else { - arg->set_has_null(false); - } - switch (exprs[i]->type().type) { - case TYPE_BOOLEAN: { - ptype->set_id(PGenericType::BOOLEAN); - arg->add_bool_value(*(bool*)src_slot); - break; - } - case TYPE_TINYINT: { - ptype->set_id(PGenericType::INT8); - arg->add_int32_value(*(int8_t*)src_slot); - break; - } - case TYPE_SMALLINT: { - ptype->set_id(PGenericType::INT16); - arg->add_int32_value(*(int16_t*)src_slot); - break; - } - case TYPE_INT: { - ptype->set_id(PGenericType::INT32); - arg->add_int32_value(*(int*)src_slot); - break; - } - case TYPE_BIGINT: { - ptype->set_id(PGenericType::INT64); - arg->add_int64_value(*(int64_t*)src_slot); - break; - } - case TYPE_LARGEINT: { - ptype->set_id(PGenericType::INT128); - char buffer[sizeof(__int128)]; - memcpy(buffer, src_slot, sizeof(__int128)); - arg->add_bytes_value(buffer, sizeof(__int128)); - break; - } - case TYPE_DOUBLE: { - ptype->set_id(PGenericType::DOUBLE); - arg->add_double_value(*(double*)src_slot); - break; - } - case TYPE_FLOAT: { - ptype->set_id(PGenericType::FLOAT); - arg->add_float_value(*(float*)src_slot); - break; - } - case TYPE_VARCHAR: - case TYPE_STRING: - case TYPE_CHAR: { - ptype->set_id(PGenericType::STRING); - StringRef value = *reinterpret_cast(src_slot); - arg->add_string_value(value.data, value.size); - break; - } - case TYPE_HLL: { - ptype->set_id(PGenericType::HLL); - StringRef value = *reinterpret_cast(src_slot); - arg->add_string_value(value.data, value.size); - break; - } - case TYPE_OBJECT: { - ptype->set_id(PGenericType::BITMAP); - StringRef value = *reinterpret_cast(src_slot); - arg->add_string_value(value.data, value.size); - break; - } - case TYPE_DECIMALV2: { - ptype->set_id(PGenericType::DECIMAL128); - ptype->mutable_decimal_type()->set_precision(exprs[i]->type().precision); - ptype->mutable_decimal_type()->set_scale(exprs[i]->type().scale); - char buffer[sizeof(__int128)]; - memcpy(buffer, src_slot, sizeof(__int128)); - arg->add_bytes_value(buffer, sizeof(__int128)); - break; - } - case TYPE_DATE: { - ptype->set_id(PGenericType::DATE); - const auto* time_val = (const DateTimeValue*)(src_slot); - PDateTime* date_time = arg->add_datetime_value(); - date_time->set_day(time_val->day()); - date_time->set_month(time_val->month()); - date_time->set_year(time_val->year()); - break; - } - case TYPE_DATETIME: { - ptype->set_id(PGenericType::DATETIME); - const auto* time_val = (const DateTimeValue*)(src_slot); - PDateTime* date_time = arg->add_datetime_value(); - date_time->set_day(time_val->day()); - date_time->set_month(time_val->month()); - date_time->set_year(time_val->year()); - date_time->set_hour(time_val->hour()); - date_time->set_minute(time_val->minute()); - date_time->set_second(time_val->second()); - date_time->set_microsecond(time_val->microsecond()); - break; - } - case TYPE_TIME: { - ptype->set_id(PGenericType::DATETIME); - const auto* time_val = (const DateTimeValue*)(src_slot); - PDateTime* date_time = arg->add_datetime_value(); - date_time->set_hour(time_val->hour()); - date_time->set_minute(time_val->minute()); - date_time->set_second(time_val->second()); - date_time->set_microsecond(time_val->microsecond()); - break; - } - default: { - std::string error_msg = - fmt::format("data time not supported: {}", exprs[i]->type().type); - fn_ctx->set_error(error_msg.c_str()); - cancel(error_msg); - break; - } - } - } - - brpc::Controller cntl; - _client->fn_call(&cntl, &request, response, nullptr); - if (cntl.Failed()) { - std::string error_msg = - fmt::format("call rpc function {} failed: {}", _signature, cntl.ErrorText()); - fn_ctx->set_error(error_msg.c_str()); - cancel(error_msg); - return Status::InternalError(error_msg); - } - if (!response->has_status() || response->result_size() == 0) { - std::string error_msg = - fmt::format("call rpc function {} failed: status or result is not set: {}", - _signature, response->status().DebugString()); - fn_ctx->set_error(error_msg.c_str()); - cancel(error_msg); - return Status::InternalError(error_msg); - } - if (response->status().status_code() != 0) { - std::string error_msg = fmt::format("call rpc function {} failed: {}", _signature, - response->status().DebugString()); - fn_ctx->set_error(error_msg.c_str()); - cancel(error_msg); - return Status::InternalError(error_msg); - } - return Status::OK(); -} - -void RPCFn::cancel(const std::string& msg) { - _state->exec_env()->fragment_mgr()->cancel(_state->fragment_instance_id(), - PPlanFragmentCancelReason::CALL_RPC_ERROR, msg); -} - -Status RPCFn::vec_call(FunctionContext* context, vectorized::Block& block, - const vectorized::ColumnNumbers& arguments, size_t result, - size_t input_rows_count) { - PFunctionCallRequest request; - PFunctionCallResponse response; - request.set_function_name(_function_name); - convert_block_to_proto(block, arguments, input_rows_count, &request); - brpc::Controller cntl; - _client->fn_call(&cntl, &request, &response, nullptr); - if (cntl.Failed()) { - return Status::InternalError("call to rpc function {} failed: {}", _signature, - cntl.ErrorText()); - } - if (!response.has_status() || response.result_size() == 0) { - return Status::InternalError("call rpc function {} failed: status or result is not set.", - _signature); - } - if (response.status().status_code() != 0) { - return Status::InternalError("call to rpc function {} failed: {}", _signature, - response.status().DebugString()); - } - convert_to_block(block, response.result(0), result); - return Status::OK(); -} -} // namespace doris diff --git a/be/src/exprs/rpc_fn.h b/be/src/exprs/rpc_fn.h deleted file mode 100644 index 43db2633bf..0000000000 --- a/be/src/exprs/rpc_fn.h +++ /dev/null @@ -1,137 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -#pragma once - -#include -#include - -#include "common/status.h" -#include "exprs/expr.h" -#include "exprs/expr_context.h" -#include "exprs/rpc_fn_comm.h" -#include "gen_cpp/function_service.pb.h" -#include "runtime/runtime_state.h" -#include "udf/udf.h" - -namespace doris { -namespace vectorized { -class Block; -} // namespace vectorized - -class RPCFn { -public: - enum AggregationStep { - INIT = 0, - UPDATE = 1, - MERGE = 2, - REMOVE = 3, - SERIALIZE = 4, - GET_VALUE = 5, - FINALIZE = 6, - INVALID = 999, - }; - - RPCFn(RuntimeState* state, const TFunction& fn, int fn_ctx_id, bool is_agg); - RPCFn(const TFunction& fn, bool is_agg); - RPCFn(RuntimeState* state, const TFunction& fn, AggregationStep step, bool is_agg); - ~RPCFn() {} - template - T call(ExprContext* context, TupleRow* row, const std::vector& exprs); - Status vec_call(FunctionContext* context, vectorized::Block& block, - const std::vector& arguments, size_t result, size_t input_rows_count); - bool avliable() { return _client != nullptr; } - -private: - Status call_internal(ExprContext* context, TupleRow* row, PFunctionCallResponse* response, - const std::vector& exprs); - void cancel(const std::string& msg); - - std::shared_ptr _client; - RuntimeState* _state; - std::string _function_name; - std::string _server_addr; - std::string _signature; - TFunction _fn; - int _fn_ctx_id; - bool _is_agg; - AggregationStep _step = AggregationStep::INVALID; -}; - -template -T RPCFn::call(ExprContext* context, TupleRow* row, const std::vector& exprs) { - PFunctionCallResponse response; - Status st = call_internal(context, row, &response, exprs); - WARN_IF_ERROR(st, "call rpc udf error"); - if (!st.ok() || (response.result(0).has_null() && response.result(0).null_map(0))) { - return T::null(); - } - T res_val; - // TODO(yangzhg) deal with udtf and udaf - const PValues& result = response.result(0); - if constexpr (std::is_same_v) { - DCHECK(result.type().id() == PGenericType::INT8); - res_val.val = static_cast(result.int32_value(0)); - } else if constexpr (std::is_same_v) { - DCHECK(result.type().id() == PGenericType::INT16); - res_val.val = static_cast(result.int32_value(0)); - } else if constexpr (std::is_same_v) { - DCHECK(result.type().id() == PGenericType::INT32); - res_val.val = result.int32_value(0); - } else if constexpr (std::is_same_v) { - DCHECK(result.type().id() == PGenericType::INT64); - res_val.val = result.int64_value(0); - } else if constexpr (std::is_same_v) { - DCHECK(result.type().id() == PGenericType::FLOAT); - res_val.val = result.float_value(0); - } else if constexpr (std::is_same_v) { - DCHECK(result.type().id() == PGenericType::DOUBLE); - res_val.val = result.double_value(0); - } else if constexpr (std::is_same_v) { - DCHECK(result.type().id() == PGenericType::STRING); - auto* fn_ctx = context->fn_context(_fn_ctx_id); - StringVal val(fn_ctx, result.string_value(0).size()); - res_val = val.copy_from(fn_ctx, - reinterpret_cast(result.string_value(0).c_str()), - result.string_value(0).size()); - } else if constexpr (std::is_same_v) { - DCHECK(result.type().id() == PGenericType::INT128); - memcpy(&(res_val.val), result.bytes_value(0).data(), sizeof(__int128_t)); - } else if constexpr (std::is_same_v) { - DCHECK(result.type().id() == PGenericType::DATE || - result.type().id() == PGenericType::DATETIME); - DateTimeValue value; - value.set_time(result.datetime_value(0).year(), result.datetime_value(0).month(), - result.datetime_value(0).day(), result.datetime_value(0).hour(), - result.datetime_value(0).minute(), result.datetime_value(0).second(), - result.datetime_value(0).microsecond()); - if (result.type().id() == PGenericType::DATE) { - value.set_type(TimeType::TIME_DATE); - } else if (result.type().id() == PGenericType::DATETIME) { - if (result.datetime_value(0).has_year()) { - value.set_type(TimeType::TIME_DATETIME); - } else - value.set_type(TimeType::TIME_TIME); - } - value.to_datetime_val(&res_val); - } else if constexpr (std::is_same_v) { - DCHECK(result.type().id() == PGenericType::DECIMAL128); - memcpy(&(res_val.val), result.bytes_value(0).data(), sizeof(__int128_t)); - } - return res_val; -} -} // namespace doris \ No newline at end of file diff --git a/be/src/exprs/rpc_fn_call.cpp b/be/src/exprs/rpc_fn_call.cpp deleted file mode 100644 index 1af4365b02..0000000000 --- a/be/src/exprs/rpc_fn_call.cpp +++ /dev/null @@ -1,112 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -#include "exprs/rpc_fn_call.h" - -#include "exprs/anyval_util.h" -#include "exprs/expr_context.h" -#include "exprs/rpc_fn.h" -#include "rpc_fn.h" -#include "runtime/runtime_state.h" - -namespace doris { - -RPCFnCall::RPCFnCall(const TExprNode& node) : Expr(node), _tnode(node) { - DCHECK_EQ(_fn.binary_type, TFunctionBinaryType::RPC); -} - -RPCFnCall::~RPCFnCall() {} - -Status RPCFnCall::prepare(RuntimeState* state, const RowDescriptor& desc, ExprContext* context) { - RETURN_IF_ERROR(Expr::prepare(state, desc, context)); - DCHECK(!_fn.scalar_fn.symbol.empty()); - - FunctionContext::TypeDesc return_type = AnyValUtil::column_type_to_type_desc(_type); - std::vector arg_types; - bool char_arg = false; - for (int i = 0; i < _children.size(); ++i) { - arg_types.push_back(AnyValUtil::column_type_to_type_desc(_children[i]->type())); - char_arg = char_arg || (_children[i]->type().type == TYPE_CHAR); - } - int id = context->register_func(state, return_type, arg_types, 0); - - _rpc_fn = std::make_unique(state, _fn, id, false); - if (!_rpc_fn->avliable()) { - return Status::InternalError("rpc env init error: {}/{}", _fn.hdfs_location, - _fn.scalar_fn.symbol); - } - return Status::OK(); -} - -Status RPCFnCall::open(RuntimeState* state, ExprContext* ctx, - FunctionContext::FunctionStateScope scope) { - RETURN_IF_ERROR(Expr::open(state, ctx, scope)); - return Status::OK(); -} - -void RPCFnCall::close(RuntimeState* state, ExprContext* context, - FunctionContext::FunctionStateScope scope) { - Expr::close(state, context, scope); -} - -doris_udf::IntVal RPCFnCall::get_int_val(ExprContext* context, TupleRow* row) { - return _rpc_fn->call(context, row, _children); -} - -doris_udf::BooleanVal RPCFnCall::get_boolean_val(ExprContext* context, TupleRow* row) { - return _rpc_fn->call(context, row, _children); -} - -doris_udf::TinyIntVal RPCFnCall::get_tiny_int_val(ExprContext* context, TupleRow* row) { - return _rpc_fn->call(context, row, _children); -} - -doris_udf::SmallIntVal RPCFnCall::get_small_int_val(ExprContext* context, TupleRow* row) { - return _rpc_fn->call(context, row, _children); -} - -doris_udf::BigIntVal RPCFnCall::get_big_int_val(ExprContext* context, TupleRow* row) { - return _rpc_fn->call(context, row, _children); -} - -doris_udf::FloatVal RPCFnCall::get_float_val(ExprContext* context, TupleRow* row) { - return _rpc_fn->call(context, row, _children); -} - -doris_udf::DoubleVal RPCFnCall::get_double_val(ExprContext* context, TupleRow* row) { - return _rpc_fn->call(context, row, _children); -} - -doris_udf::StringVal RPCFnCall::get_string_val(ExprContext* context, TupleRow* row) { - return _rpc_fn->call(context, row, _children); -} - -doris_udf::LargeIntVal RPCFnCall::get_large_int_val(ExprContext* context, TupleRow* row) { - return _rpc_fn->call(context, row, _children); -} - -doris_udf::DateTimeVal RPCFnCall::get_datetime_val(ExprContext* context, TupleRow* row) { - return _rpc_fn->call(context, row, _children); -} - -doris_udf::DecimalV2Val RPCFnCall::get_decimalv2_val(ExprContext* context, TupleRow* row) { - return _rpc_fn->call(context, row, _children); -} -doris_udf::CollectionVal RPCFnCall::get_array_val(ExprContext* context, TupleRow* row) { - return _rpc_fn->call(context, row, _children); -} -} // namespace doris diff --git a/be/src/exprs/rpc_fn_call.h b/be/src/exprs/rpc_fn_call.h deleted file mode 100644 index d63fb2db0e..0000000000 --- a/be/src/exprs/rpc_fn_call.h +++ /dev/null @@ -1,60 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -#pragma once - -#include "common/object_pool.h" -#include "exprs/expr.h" -#include "udf/udf.h" - -namespace doris { -class TExprNode; -class RPCFn; - -class RPCFnCall : public Expr { -public: - RPCFnCall(const TExprNode& node); - ~RPCFnCall(); - - virtual Status prepare(RuntimeState* state, const RowDescriptor& desc, - ExprContext* context) override; - virtual Status open(RuntimeState* state, ExprContext* context, - FunctionContext::FunctionStateScope scope) override; - virtual void close(RuntimeState* state, ExprContext* context, - FunctionContext::FunctionStateScope scope) override; - virtual Expr* clone(ObjectPool* pool) const override { - return pool->add(new RPCFnCall(_tnode)); - } - - virtual doris_udf::BooleanVal get_boolean_val(ExprContext* context, TupleRow*) override; - virtual doris_udf::TinyIntVal get_tiny_int_val(ExprContext* context, TupleRow*) override; - virtual doris_udf::SmallIntVal get_small_int_val(ExprContext* context, TupleRow*) override; - virtual doris_udf::IntVal get_int_val(ExprContext* context, TupleRow*) override; - virtual doris_udf::BigIntVal get_big_int_val(ExprContext* context, TupleRow*) override; - virtual doris_udf::LargeIntVal get_large_int_val(ExprContext* context, TupleRow*) override; - virtual doris_udf::FloatVal get_float_val(ExprContext* context, TupleRow*) override; - virtual doris_udf::DoubleVal get_double_val(ExprContext* context, TupleRow*) override; - virtual doris_udf::StringVal get_string_val(ExprContext* context, TupleRow*) override; - virtual doris_udf::DateTimeVal get_datetime_val(ExprContext* context, TupleRow*) override; - virtual doris_udf::DecimalV2Val get_decimalv2_val(ExprContext* context, TupleRow*) override; - virtual doris_udf::CollectionVal get_array_val(ExprContext* context, TupleRow*) override; - -private: - std::unique_ptr _rpc_fn; - const TExprNode& _tnode; -}; -} // namespace doris diff --git a/be/src/exprs/runtime_filter.cpp b/be/src/exprs/runtime_filter.cpp index 4ba1bd271e..d4054a9941 100644 --- a/be/src/exprs/runtime_filter.cpp +++ b/be/src/exprs/runtime_filter.cpp @@ -21,12 +21,9 @@ #include "common/object_pool.h" #include "common/status.h" -#include "exprs/binary_predicate.h" #include "exprs/bitmapfilter_predicate.h" -#include "exprs/bloomfilter_predicate.h" #include "exprs/create_predicate_function.h" #include "exprs/hybrid_set.h" -#include "exprs/literal.h" #include "exprs/minmax_predicate.h" #include "gen_cpp/internal_service.pb.h" #include "runtime/define_primitive_type.h" @@ -316,25 +313,6 @@ Status create_literal(ObjectPool* pool, const TypeDescriptor& type, const void* return Status::OK(); } -BinaryPredicate* create_bin_predicate(ObjectPool* pool, PrimitiveType prim_type, - TExprOpcode::type opcode) { - TExprNode node; - TScalarType tscalar_type; - tscalar_type.__set_type(TPrimitiveType::BOOLEAN); - TTypeNode ttype_node; - ttype_node.__set_type(TTypeNodeType::SCALAR); - ttype_node.__set_scalar_type(tscalar_type); - TTypeDesc t_type_desc; - t_type_desc.types.push_back(ttype_node); - node.__set_type(t_type_desc); - node.__set_opcode(opcode); - node.__set_child_type(to_thrift(prim_type)); - node.__set_num_children(2); - node.__set_output_scale(-1); - node.__set_node_type(TExprNodeType::BINARY_PRED); - return (BinaryPredicate*)pool->add(BinaryPredicate::from_thrift(node)); -} - Status create_vbin_predicate(ObjectPool* pool, const TypeDescriptor& type, TExprOpcode::type opcode, vectorized::VExpr** expr, TExprNode* tnode) { TExprNode node; @@ -1307,7 +1285,6 @@ Status IRuntimeFilter::init_with_desc(const TRuntimeFilterDesc* desc, const TQue _has_remote_target = desc->has_remote_targets; _expr_order = desc->expr_order; _filter_id = desc->filter_id; - vectorized::VExprContext* build_ctx = nullptr; RETURN_IF_ERROR(vectorized::VExpr::create_expr_tree(_pool, desc->src_expr, &build_ctx)); diff --git a/be/src/exprs/runtime_filter.h b/be/src/exprs/runtime_filter.h index 9142ce0557..f7a0348d22 100644 --- a/be/src/exprs/runtime_filter.h +++ b/be/src/exprs/runtime_filter.h @@ -17,6 +17,7 @@ #pragma once +#include "runtime/large_int_value.h" #include "runtime/runtime_state.h" #include "util/runtime_profile.h" #include "util/time.h" @@ -27,12 +28,9 @@ class IOBufAsZeroCopyInputStream; } namespace doris { -class Predicate; class ObjectPool; -class ExprContext; class RuntimePredicateWrapper; class MemTracker; -class TupleRow; class PPublishFilterRequest; class PMergeFilterRequest; class TRuntimeFilterDesc; @@ -364,4 +362,148 @@ private: WrapperPtr _wrapper; }; +// copied from expr.h since it is only used in runtime filter + +template +Status create_texpr_literal_node(const void* data, TExprNode* node, int precision = 0, + int scale = 0) { + if constexpr (T == TYPE_BOOLEAN) { + auto origin_value = reinterpret_cast(data); + TBoolLiteral boolLiteral; + (*node).__set_node_type(TExprNodeType::BOOL_LITERAL); + boolLiteral.__set_value(*origin_value); + (*node).__set_bool_literal(boolLiteral); + (*node).__set_type(create_type_desc(PrimitiveType::TYPE_BOOLEAN)); + } else if constexpr (T == TYPE_TINYINT) { + auto origin_value = reinterpret_cast(data); + (*node).__set_node_type(TExprNodeType::INT_LITERAL); + TIntLiteral intLiteral; + intLiteral.__set_value(*origin_value); + (*node).__set_int_literal(intLiteral); + (*node).__set_type(create_type_desc(PrimitiveType::TYPE_TINYINT)); + } else if constexpr (T == TYPE_SMALLINT) { + auto origin_value = reinterpret_cast(data); + (*node).__set_node_type(TExprNodeType::INT_LITERAL); + TIntLiteral intLiteral; + intLiteral.__set_value(*origin_value); + (*node).__set_int_literal(intLiteral); + (*node).__set_type(create_type_desc(PrimitiveType::TYPE_SMALLINT)); + } else if constexpr (T == TYPE_INT) { + auto origin_value = reinterpret_cast(data); + (*node).__set_node_type(TExprNodeType::INT_LITERAL); + TIntLiteral intLiteral; + intLiteral.__set_value(*origin_value); + (*node).__set_int_literal(intLiteral); + (*node).__set_type(create_type_desc(PrimitiveType::TYPE_INT)); + } else if constexpr (T == TYPE_BIGINT) { + auto origin_value = reinterpret_cast(data); + (*node).__set_node_type(TExprNodeType::INT_LITERAL); + TIntLiteral intLiteral; + intLiteral.__set_value(*origin_value); + (*node).__set_int_literal(intLiteral); + (*node).__set_type(create_type_desc(PrimitiveType::TYPE_BIGINT)); + } else if constexpr (T == TYPE_LARGEINT) { + auto origin_value = reinterpret_cast(data); + (*node).__set_node_type(TExprNodeType::LARGE_INT_LITERAL); + TLargeIntLiteral large_int_literal; + large_int_literal.__set_value(LargeIntValue::to_string(*origin_value)); + (*node).__set_large_int_literal(large_int_literal); + (*node).__set_type(create_type_desc(PrimitiveType::TYPE_LARGEINT)); + } else if constexpr ((T == TYPE_DATE) || (T == TYPE_DATETIME) || (T == TYPE_TIME)) { + auto origin_value = reinterpret_cast(data); + TDateLiteral date_literal; + char convert_buffer[30]; + origin_value->to_string(convert_buffer); + date_literal.__set_value(convert_buffer); + (*node).__set_date_literal(date_literal); + (*node).__set_node_type(TExprNodeType::DATE_LITERAL); + if (origin_value->type() == TimeType::TIME_DATE) { + (*node).__set_type(create_type_desc(PrimitiveType::TYPE_DATE)); + } else if (origin_value->type() == TimeType::TIME_DATETIME) { + (*node).__set_type(create_type_desc(PrimitiveType::TYPE_DATETIME)); + } else if (origin_value->type() == TimeType::TIME_TIME) { + (*node).__set_type(create_type_desc(PrimitiveType::TYPE_TIME)); + } + } else if constexpr (T == TYPE_DATEV2) { + auto origin_value = reinterpret_cast< + const doris::vectorized::DateV2Value*>(data); + TDateLiteral date_literal; + char convert_buffer[30]; + origin_value->to_string(convert_buffer); + date_literal.__set_value(convert_buffer); + (*node).__set_date_literal(date_literal); + (*node).__set_node_type(TExprNodeType::DATE_LITERAL); + (*node).__set_type(create_type_desc(PrimitiveType::TYPE_DATEV2)); + } else if constexpr (T == TYPE_DATETIMEV2) { + auto origin_value = reinterpret_cast< + const doris::vectorized::DateV2Value*>( + data); + TDateLiteral date_literal; + char convert_buffer[30]; + origin_value->to_string(convert_buffer); + date_literal.__set_value(convert_buffer); + (*node).__set_date_literal(date_literal); + (*node).__set_node_type(TExprNodeType::DATE_LITERAL); + (*node).__set_type(create_type_desc(PrimitiveType::TYPE_DATETIMEV2)); + } else if constexpr (T == TYPE_DECIMALV2) { + auto origin_value = reinterpret_cast(data); + (*node).__set_node_type(TExprNodeType::DECIMAL_LITERAL); + TDecimalLiteral decimal_literal; + decimal_literal.__set_value(origin_value->to_string()); + (*node).__set_decimal_literal(decimal_literal); + (*node).__set_type(create_type_desc(PrimitiveType::TYPE_DECIMALV2, precision, scale)); + } else if constexpr (T == TYPE_DECIMAL32) { + auto origin_value = reinterpret_cast(data); + (*node).__set_node_type(TExprNodeType::DECIMAL_LITERAL); + TDecimalLiteral decimal_literal; + std::stringstream ss; + vectorized::write_text(*origin_value, scale, ss); + decimal_literal.__set_value(ss.str()); + (*node).__set_decimal_literal(decimal_literal); + (*node).__set_type(create_type_desc(PrimitiveType::TYPE_DECIMAL32, precision, scale)); + } else if constexpr (T == TYPE_DECIMAL64) { + auto origin_value = reinterpret_cast(data); + (*node).__set_node_type(TExprNodeType::DECIMAL_LITERAL); + TDecimalLiteral decimal_literal; + std::stringstream ss; + vectorized::write_text(*origin_value, scale, ss); + decimal_literal.__set_value(ss.str()); + (*node).__set_decimal_literal(decimal_literal); + (*node).__set_type(create_type_desc(PrimitiveType::TYPE_DECIMAL64, precision, scale)); + } else if constexpr (T == TYPE_DECIMAL128I) { + auto origin_value = reinterpret_cast(data); + (*node).__set_node_type(TExprNodeType::DECIMAL_LITERAL); + TDecimalLiteral decimal_literal; + std::stringstream ss; + vectorized::write_text(*origin_value, scale, ss); + decimal_literal.__set_value(ss.str()); + (*node).__set_decimal_literal(decimal_literal); + (*node).__set_type(create_type_desc(PrimitiveType::TYPE_DECIMAL128I, precision, scale)); + } else if constexpr (T == TYPE_FLOAT) { + auto origin_value = reinterpret_cast(data); + (*node).__set_node_type(TExprNodeType::FLOAT_LITERAL); + TFloatLiteral float_literal; + float_literal.__set_value(*origin_value); + (*node).__set_float_literal(float_literal); + (*node).__set_type(create_type_desc(PrimitiveType::TYPE_FLOAT)); + } else if constexpr (T == TYPE_DOUBLE) { + auto origin_value = reinterpret_cast(data); + (*node).__set_node_type(TExprNodeType::FLOAT_LITERAL); + TFloatLiteral float_literal; + float_literal.__set_value(*origin_value); + (*node).__set_float_literal(float_literal); + (*node).__set_type(create_type_desc(PrimitiveType::TYPE_DOUBLE)); + } else if constexpr ((T == TYPE_STRING) || (T == TYPE_CHAR) || (T == TYPE_VARCHAR)) { + auto origin_value = reinterpret_cast(data); + (*node).__set_node_type(TExprNodeType::STRING_LITERAL); + TStringLiteral string_literal; + string_literal.__set_value(origin_value->to_string()); + (*node).__set_string_literal(string_literal); + (*node).__set_type(create_type_desc(PrimitiveType::TYPE_STRING)); + } else { + return Status::InvalidArgument("Invalid argument type!"); + } + return Status::OK(); +} + } // namespace doris diff --git a/be/src/exprs/runtime_filter_slots.h b/be/src/exprs/runtime_filter_slots.h index f050a1edb6..71084bacfb 100644 --- a/be/src/exprs/runtime_filter_slots.h +++ b/be/src/exprs/runtime_filter_slots.h @@ -155,20 +155,6 @@ public: return Status::OK(); } - void insert(TupleRow* row) { - for (int i = 0; i < _build_expr_context.size(); ++i) { - auto iter = _runtime_filters.find(i); - if (iter != _runtime_filters.end()) { - void* val = _build_expr_context[i]->get_value(row); - if (val != nullptr) { - for (auto filter : iter->second) { - filter->insert(val); - } - } - } - } - } - void insert(std::unordered_map>& datas) { for (int i = 0; i < _build_expr_context.size(); ++i) { auto iter = _runtime_filters.find(i); diff --git a/be/src/exprs/runtime_filter_slots_cross.h b/be/src/exprs/runtime_filter_slots_cross.h index a7f3151136..adc9fc1188 100644 --- a/be/src/exprs/runtime_filter_slots_cross.h +++ b/be/src/exprs/runtime_filter_slots_cross.h @@ -31,7 +31,7 @@ namespace doris { // this class used in cross join node -template +template class RuntimeFilterSlotsCross { public: RuntimeFilterSlotsCross(const std::vector& runtime_filter_descs, diff --git a/be/src/exprs/scalar_fn_call.cpp b/be/src/exprs/scalar_fn_call.cpp deleted file mode 100644 index b023df6ff5..0000000000 --- a/be/src/exprs/scalar_fn_call.cpp +++ /dev/null @@ -1,547 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. -// This file is copied from -// https://github.com/apache/impala/blob/branch-2.9.0/be/src/exprs/scalar-fn-call.cc -// and modified by Doris - -#include "exprs/scalar_fn_call.h" - -#include - -#include "exprs/anyval_util.h" -#include "exprs/expr_context.h" -#include "runtime/runtime_state.h" -#include "runtime/user_function_cache.h" -#include "udf/udf_internal.h" -#include "util/symbols_util.h" - -namespace doris { - -ScalarFnCall::ScalarFnCall(const TExprNode& node) - : Expr(node), - _vararg_start_idx(node.__isset.vararg_start_idx ? node.vararg_start_idx : -1), - _scalar_fn_wrapper(nullptr), - _prepare_fn(nullptr), - _close_fn(nullptr), - _scalar_fn(nullptr) { - DCHECK_NE(_fn.binary_type, TFunctionBinaryType::HIVE); -} - -ScalarFnCall::~ScalarFnCall() {} - -Status ScalarFnCall::prepare(RuntimeState* state, const RowDescriptor& desc, ExprContext* context) { - RETURN_IF_ERROR(Expr::prepare(state, desc, context)); - if (_fn.scalar_fn.symbol.empty()) { - // This path is intended to only be used during development to test FE - // code before the BE has implemented the function. - // Having the failure in the BE (rather than during analysis) allows for - // better FE testing. - DCHECK_EQ(_fn.binary_type, TFunctionBinaryType::BUILTIN); - return Status::InternalError("Function {} is not implemented.", _fn.name.function_name); - } - - FunctionContext::TypeDesc return_type = AnyValUtil::column_type_to_type_desc(_type); - std::vector arg_types; - bool char_arg = false; - for (int i = 0; i < _children.size(); ++i) { - arg_types.push_back(AnyValUtil::column_type_to_type_desc(_children[i]->_type)); - char_arg = char_arg || (_children[i]->_type.type == TYPE_CHAR); - } - - // Compute buffer size for varargs - int varargs_buffer_size = 0; - if (_vararg_start_idx != -1) { - DCHECK_GT(get_num_children(), _vararg_start_idx); - for (int i = _vararg_start_idx; i < get_num_children(); ++i) { - varargs_buffer_size += AnyValUtil::any_val_size(_children[i]->type()); - } - } - - _fn_context_index = context->register_func(state, return_type, arg_types, varargs_buffer_size); - Status status = Status::OK(); - if (_scalar_fn == nullptr) { - if (SymbolsUtil::is_mangled(_fn.scalar_fn.symbol)) { - status = UserFunctionCache::instance()->get_function_ptr( - _fn.id, _fn.scalar_fn.symbol, _fn.hdfs_location, _fn.checksum, &_scalar_fn, - &_cache_entry); - } else { - std::vector arg_types; - for (auto& t_type : _fn.arg_types) { - arg_types.push_back(TypeDescriptor::from_thrift(t_type)); - } - // ColumnType ret_type(INVALID_TYPE); - // ret_type = ColumnType(thrift_to_type(_fn.ret_type)); - std::string symbol = SymbolsUtil::mangle_user_function(_fn.scalar_fn.symbol, arg_types, - _fn.has_var_args, nullptr); - - status = UserFunctionCache::instance()->get_function_ptr( - _fn.id, symbol, _fn.hdfs_location, _fn.checksum, &_scalar_fn, &_cache_entry); - } - } - if (_fn.scalar_fn.__isset.prepare_fn_symbol) { - RETURN_IF_ERROR(get_function(state, _fn.scalar_fn.prepare_fn_symbol, - reinterpret_cast(&_prepare_fn))); - } - if (_fn.scalar_fn.__isset.close_fn_symbol) { - RETURN_IF_ERROR(get_function(state, _fn.scalar_fn.close_fn_symbol, - reinterpret_cast(&_close_fn))); - } - - return status; -} - -Status ScalarFnCall::open(RuntimeState* state, ExprContext* ctx, - FunctionContext::FunctionStateScope scope) { - // Opens and inits children - RETURN_IF_ERROR(Expr::open(state, ctx, scope)); - FunctionContext* fn_ctx = ctx->fn_context(_fn_context_index); - if (_scalar_fn != nullptr) { - // We're in the interpreted path (i.e. no JIT). Populate our FunctionContext's - // staging_input_vals, which will be reused across calls to _scalar_fn. - DCHECK(_scalar_fn_wrapper == nullptr); - ObjectPool* obj_pool = state->obj_pool(); - std::vector* input_vals = fn_ctx->impl()->staging_input_vals(); - for (int i = 0; i < num_fixed_args(); ++i) { - input_vals->push_back(create_any_val(obj_pool, _children[i]->type())); - } - } - - // Only evaluate constant arguments once per fragment - if (scope == FunctionContext::FRAGMENT_LOCAL) { - std::vector constant_args; - for (int i = 0; i < _children.size(); ++i) { - constant_args.push_back(_children[i]->get_const_val(ctx)); - // Check if any errors were set during the get_const_val() call - Status child_status = _children[i]->get_fn_context_error(ctx); - if (!child_status.ok()) { - return child_status; - } - } - fn_ctx->impl()->set_constant_args(constant_args); - } - - if (_prepare_fn != nullptr) { - if (scope == FunctionContext::FRAGMENT_LOCAL) { - _prepare_fn(fn_ctx, FunctionContext::FRAGMENT_LOCAL); - if (fn_ctx->has_error()) { - return Status::InternalError(fn_ctx->error_msg()); - } - } - _prepare_fn(fn_ctx, FunctionContext::THREAD_LOCAL); - if (fn_ctx->has_error()) { - return Status::InternalError(fn_ctx->error_msg()); - } - } - - // If we're calling MathFunctions::RoundUpTo(), we need to set _output_scale, which - // determines how many decimal places are printed. - // TODO: revisit this. We should be able to do this if the scale argument is - // non-constant. - if (_fn.name.function_name == "round" && _fn.__isset.signature && - _fn.signature.find("round_up_to") != std::string::npos) { - DCHECK_EQ(_children.size(), 2); - if (_children[1]->is_constant()) { - IntVal scale_arg = _children[1]->get_int_val(ctx, nullptr); - _output_scale = scale_arg.val; - } - } - - return Status::OK(); -} - -void ScalarFnCall::close(RuntimeState* state, ExprContext* context, - FunctionContext::FunctionStateScope scope) { - if (_fn_context_index != -1 && _close_fn != nullptr) { - FunctionContext* fn_ctx = context->fn_context(_fn_context_index); - _close_fn(fn_ctx, FunctionContext::THREAD_LOCAL); - if (scope == FunctionContext::FRAGMENT_LOCAL) { - _close_fn(fn_ctx, FunctionContext::FRAGMENT_LOCAL); - } - } - Expr::close(state, context, scope); -} - -bool ScalarFnCall::is_constant() const { - if (_fn.name.function_name == "rand") { - return false; - } - return Expr::is_constant(); -} - -Status ScalarFnCall::get_function(RuntimeState* state, const std::string& symbol, void** fn) { - if (_fn.binary_type == TFunctionBinaryType::NATIVE || - _fn.binary_type == TFunctionBinaryType::BUILTIN || - _fn.binary_type == TFunctionBinaryType::HIVE) { - return UserFunctionCache::instance()->get_function_ptr(_fn.id, symbol, _fn.hdfs_location, - _fn.checksum, fn, &_cache_entry); - } - return Status::OK(); -} - -void ScalarFnCall::evaluate_children(ExprContext* context, TupleRow* row, - std::vector* input_vals) { - DCHECK_EQ(input_vals->size(), num_fixed_args()); - FunctionContext* fn_ctx = context->fn_context(_fn_context_index); - uint8_t* varargs_buffer = fn_ctx->impl()->varargs_buffer(); - for (int i = 0; i < _children.size(); ++i) { - void* src_slot = context->get_value(_children[i], row); - AnyVal* dst_val = nullptr; - if (_vararg_start_idx == -1 || i < _vararg_start_idx) { - dst_val = (*input_vals)[i]; - } else { - dst_val = reinterpret_cast(varargs_buffer); - varargs_buffer += AnyValUtil::any_val_size(_children[i]->type()); - } - AnyValUtil::set_any_val(src_slot, _children[i]->type(), dst_val); - } -} - -template -RETURN_TYPE ScalarFnCall::interpret_eval(ExprContext* context, TupleRow* row) { - DCHECK(_scalar_fn != nullptr); - FunctionContext* fn_ctx = context->fn_context(_fn_context_index); - std::vector* input_vals = fn_ctx->impl()->staging_input_vals(); - - evaluate_children(context, row, input_vals); - - if (_vararg_start_idx == -1) { - switch (_children.size()) { - case 0: - typedef RETURN_TYPE (*ScalarFn0)(FunctionContext*); - return reinterpret_cast(_scalar_fn)(fn_ctx); - case 1: - typedef RETURN_TYPE (*ScalarFn1)(FunctionContext*, const AnyVal& a1); - return reinterpret_cast(_scalar_fn)(fn_ctx, *(*input_vals)[0]); - case 2: - typedef RETURN_TYPE (*ScalarFn2)(FunctionContext*, const AnyVal& a1, const AnyVal& a2); - return reinterpret_cast(_scalar_fn)(fn_ctx, *(*input_vals)[0], - *(*input_vals)[1]); - case 3: - typedef RETURN_TYPE (*ScalarFn3)(FunctionContext*, const AnyVal& a1, const AnyVal& a2, - const AnyVal& a3); - return reinterpret_cast(_scalar_fn)(fn_ctx, *(*input_vals)[0], - *(*input_vals)[1], *(*input_vals)[2]); - case 4: - typedef RETURN_TYPE (*ScalarFn4)(FunctionContext*, const AnyVal& a1, const AnyVal& a2, - const AnyVal& a3, const AnyVal& a4); - return reinterpret_cast(_scalar_fn)(fn_ctx, *(*input_vals)[0], - *(*input_vals)[1], *(*input_vals)[2], - *(*input_vals)[3]); - case 5: - typedef RETURN_TYPE (*ScalarFn5)(FunctionContext*, const AnyVal& a1, const AnyVal& a2, - const AnyVal& a3, const AnyVal& a4, const AnyVal& a5); - return reinterpret_cast(_scalar_fn)(fn_ctx, *(*input_vals)[0], - *(*input_vals)[1], *(*input_vals)[2], - *(*input_vals)[3], *(*input_vals)[4]); - case 6: - typedef RETURN_TYPE (*ScalarFn6)(FunctionContext*, const AnyVal& a1, const AnyVal& a2, - const AnyVal& a3, const AnyVal& a4, const AnyVal& a5, - const AnyVal& a6); - return reinterpret_cast(_scalar_fn)( - fn_ctx, *(*input_vals)[0], *(*input_vals)[1], *(*input_vals)[2], - *(*input_vals)[3], *(*input_vals)[4], *(*input_vals)[5]); - case 7: - typedef RETURN_TYPE (*ScalarFn7)(FunctionContext*, const AnyVal& a1, const AnyVal& a2, - const AnyVal& a3, const AnyVal& a4, const AnyVal& a5, - const AnyVal& a6, const AnyVal& a7); - return reinterpret_cast(_scalar_fn)( - fn_ctx, *(*input_vals)[0], *(*input_vals)[1], *(*input_vals)[2], - *(*input_vals)[3], *(*input_vals)[4], *(*input_vals)[5], *(*input_vals)[6]); - case 8: - typedef RETURN_TYPE (*ScalarFn8)(FunctionContext*, const AnyVal& a1, const AnyVal& a2, - const AnyVal& a3, const AnyVal& a4, const AnyVal& a5, - const AnyVal& a6, const AnyVal& a7, const AnyVal& a8); - return reinterpret_cast(_scalar_fn)( - fn_ctx, *(*input_vals)[0], *(*input_vals)[1], *(*input_vals)[2], - *(*input_vals)[3], *(*input_vals)[4], *(*input_vals)[5], *(*input_vals)[6], - *(*input_vals)[7]); - default: - DCHECK(false) << "Interpreted path not implemented. We should have " - << "codegen'd the wrapper"; - } - } else { - int num_varargs = _children.size() - num_fixed_args(); - const AnyVal* varargs = reinterpret_cast(fn_ctx->impl()->varargs_buffer()); - switch (num_fixed_args()) { - case 0: - typedef RETURN_TYPE (*VarargFn0)(FunctionContext*, int num_varargs, - const AnyVal* varargs); - return reinterpret_cast(_scalar_fn)(fn_ctx, num_varargs, varargs); - case 1: - typedef RETURN_TYPE (*VarargFn1)(FunctionContext*, const AnyVal& a1, int num_varargs, - const AnyVal* varargs); - return reinterpret_cast(_scalar_fn)(fn_ctx, *(*input_vals)[0], num_varargs, - varargs); - case 2: - typedef RETURN_TYPE (*VarargFn2)(FunctionContext*, const AnyVal& a1, const AnyVal& a2, - int num_varargs, const AnyVal* varargs); - return reinterpret_cast(_scalar_fn)(fn_ctx, *(*input_vals)[0], - *(*input_vals)[1], num_varargs, varargs); - case 3: - typedef RETURN_TYPE (*VarargFn3)(FunctionContext*, const AnyVal& a1, const AnyVal& a2, - const AnyVal& a3, int num_varargs, - const AnyVal* varargs); - return reinterpret_cast(_scalar_fn)(fn_ctx, *(*input_vals)[0], - *(*input_vals)[1], *(*input_vals)[2], - num_varargs, varargs); - case 4: - typedef RETURN_TYPE (*VarargFn4)(FunctionContext*, const AnyVal& a1, const AnyVal& a2, - const AnyVal& a3, const AnyVal& a4, int num_varargs, - const AnyVal* varargs); - return reinterpret_cast(_scalar_fn)(fn_ctx, *(*input_vals)[0], - *(*input_vals)[1], *(*input_vals)[2], - *(*input_vals)[3], num_varargs, varargs); - case 5: - typedef RETURN_TYPE (*VarargFn5)(FunctionContext*, const AnyVal& a1, const AnyVal& a2, - const AnyVal& a3, const AnyVal& a4, const AnyVal& a5, - int num_varargs, const AnyVal* varargs); - return reinterpret_cast(_scalar_fn)( - fn_ctx, *(*input_vals)[0], *(*input_vals)[1], *(*input_vals)[2], - *(*input_vals)[3], *(*input_vals)[4], num_varargs, varargs); - case 6: - typedef RETURN_TYPE (*VarargFn6)(FunctionContext*, const AnyVal& a1, const AnyVal& a2, - const AnyVal& a3, const AnyVal& a4, const AnyVal& a5, - const AnyVal& a6, int num_varargs, - const AnyVal* varargs); - return reinterpret_cast(_scalar_fn)( - fn_ctx, *(*input_vals)[0], *(*input_vals)[1], *(*input_vals)[2], - *(*input_vals)[3], *(*input_vals)[4], *(*input_vals)[5], num_varargs, varargs); - case 7: - typedef RETURN_TYPE (*VarargFn7)(FunctionContext*, const AnyVal& a1, const AnyVal& a2, - const AnyVal& a3, const AnyVal& a4, const AnyVal& a5, - const AnyVal& a6, const AnyVal& a7, int num_varargs, - const AnyVal* varargs); - return reinterpret_cast(_scalar_fn)( - fn_ctx, *(*input_vals)[0], *(*input_vals)[1], *(*input_vals)[2], - *(*input_vals)[3], *(*input_vals)[4], *(*input_vals)[5], *(*input_vals)[6], - num_varargs, varargs); - case 8: - typedef RETURN_TYPE (*VarargFn8)(FunctionContext*, const AnyVal& a1, const AnyVal& a2, - const AnyVal& a3, const AnyVal& a4, const AnyVal& a5, - const AnyVal& a6, const AnyVal& a7, const AnyVal& a8, - int num_varargs, const AnyVal* varargs); - return reinterpret_cast(_scalar_fn)( - fn_ctx, *(*input_vals)[0], *(*input_vals)[1], *(*input_vals)[2], - *(*input_vals)[3], *(*input_vals)[4], *(*input_vals)[5], *(*input_vals)[6], - *(*input_vals)[7], num_varargs, varargs); - default: - DCHECK(false) << "Interpreted path not implemented. We should have " - << "codegen'd the wrapper"; - } - } - return RETURN_TYPE::null(); -} - -typedef BooleanVal (*BooleanWrapper)(ExprContext*, TupleRow*); -typedef TinyIntVal (*TinyIntWrapper)(ExprContext*, TupleRow*); -typedef SmallIntVal (*SmallIntWrapper)(ExprContext*, TupleRow*); -typedef IntVal (*IntWrapper)(ExprContext*, TupleRow*); -typedef BigIntVal (*BigIntWrapper)(ExprContext*, TupleRow*); -typedef LargeIntVal (*LargeIntWrapper)(ExprContext*, TupleRow*); -typedef FloatVal (*FloatWrapper)(ExprContext*, TupleRow*); -typedef DoubleVal (*DoubleWrapper)(ExprContext*, TupleRow*); -typedef StringVal (*StringWrapper)(ExprContext*, TupleRow*); -typedef DateTimeVal (*DatetimeWrapper)(ExprContext*, TupleRow*); -typedef DateV2Val (*DateV2Wrapper)(ExprContext*, TupleRow*); -typedef DateTimeV2Val (*DateTimeV2Wrapper)(ExprContext*, TupleRow*); -typedef DecimalV2Val (*DecimalV2Wrapper)(ExprContext*, TupleRow*); -typedef Decimal32Val (*Decimal32Wrapper)(ExprContext*, TupleRow*); -typedef Decimal64Val (*Decimal64Wrapper)(ExprContext*, TupleRow*); -typedef Decimal128Val (*Decimal128Wrapper)(ExprContext*, TupleRow*); -typedef CollectionVal (*ArrayWrapper)(ExprContext*, TupleRow*); - -// TODO: macroify this? -BooleanVal ScalarFnCall::get_boolean_val(ExprContext* context, TupleRow* row) { - DCHECK_EQ(_type.type, TYPE_BOOLEAN); - DCHECK(context != nullptr); - if (_scalar_fn_wrapper == nullptr) { - return interpret_eval(context, row); - } - BooleanWrapper fn = reinterpret_cast(_scalar_fn_wrapper); - return fn(context, row); -} - -TinyIntVal ScalarFnCall::get_tiny_int_val(ExprContext* context, TupleRow* row) { - DCHECK_EQ(_type.type, TYPE_TINYINT); - DCHECK(context != nullptr); - if (_scalar_fn_wrapper == nullptr) { - return interpret_eval(context, row); - } - TinyIntWrapper fn = reinterpret_cast(_scalar_fn_wrapper); - return fn(context, row); -} - -SmallIntVal ScalarFnCall::get_small_int_val(ExprContext* context, TupleRow* row) { - DCHECK_EQ(_type.type, TYPE_SMALLINT); - DCHECK(context != nullptr); - if (_scalar_fn_wrapper == nullptr) { - return interpret_eval(context, row); - } - SmallIntWrapper fn = reinterpret_cast(_scalar_fn_wrapper); - return fn(context, row); -} - -IntVal ScalarFnCall::get_int_val(ExprContext* context, TupleRow* row) { - DCHECK_EQ(_type.type, TYPE_INT); - DCHECK(context != nullptr); - if (_scalar_fn_wrapper == nullptr) { - return interpret_eval(context, row); - } - IntWrapper fn = reinterpret_cast(_scalar_fn_wrapper); - return fn(context, row); -} - -BigIntVal ScalarFnCall::get_big_int_val(ExprContext* context, TupleRow* row) { - DCHECK_EQ(_type.type, TYPE_BIGINT); - DCHECK(context != nullptr); - if (_scalar_fn_wrapper == nullptr) { - return interpret_eval(context, row); - } - BigIntWrapper fn = reinterpret_cast(_scalar_fn_wrapper); - return fn(context, row); -} - -LargeIntVal ScalarFnCall::get_large_int_val(ExprContext* context, TupleRow* row) { - DCHECK_EQ(_type.type, TYPE_LARGEINT); - DCHECK(context != nullptr); - if (_scalar_fn_wrapper == nullptr) { - return interpret_eval(context, row); - } - LargeIntWrapper fn = reinterpret_cast(_scalar_fn_wrapper); - return fn(context, row); -} - -FloatVal ScalarFnCall::get_float_val(ExprContext* context, TupleRow* row) { - DCHECK_EQ(_type.type, TYPE_FLOAT); - DCHECK(context != nullptr); - if (_scalar_fn_wrapper == nullptr) { - return interpret_eval(context, row); - } - FloatWrapper fn = reinterpret_cast(_scalar_fn_wrapper); - return fn(context, row); -} - -DoubleVal ScalarFnCall::get_double_val(ExprContext* context, TupleRow* row) { - DCHECK(_type.type == TYPE_DOUBLE || _type.type == TYPE_TIME || _type.type == TYPE_TIMEV2); - DCHECK(context != nullptr); - if (_scalar_fn_wrapper == nullptr) { - return interpret_eval(context, row); - } - - DoubleWrapper fn = reinterpret_cast(_scalar_fn_wrapper); - return fn(context, row); -} - -StringVal ScalarFnCall::get_string_val(ExprContext* context, TupleRow* row) { - DCHECK(_type.is_string_type()); - DCHECK(context != nullptr); - if (_scalar_fn_wrapper == nullptr) { - return interpret_eval(context, row); - } - StringWrapper fn = reinterpret_cast(_scalar_fn_wrapper); - return fn(context, row); -} - -DateTimeVal ScalarFnCall::get_datetime_val(ExprContext* context, TupleRow* row) { - DCHECK(_type.is_date_type()); - DCHECK(context != nullptr); - if (_scalar_fn_wrapper == nullptr) { - return interpret_eval(context, row); - } - DatetimeWrapper fn = reinterpret_cast(_scalar_fn_wrapper); - return fn(context, row); -} - -DateV2Val ScalarFnCall::get_datev2_val(ExprContext* context, TupleRow* row) { - DCHECK(_type.is_date_v2_type()); - DCHECK(context != nullptr); - if (_scalar_fn_wrapper == nullptr) { - return interpret_eval(context, row); - } - DateV2Wrapper fn = reinterpret_cast(_scalar_fn_wrapper); - return fn(context, row); -} - -DateTimeV2Val ScalarFnCall::get_datetimev2_val(ExprContext* context, TupleRow* row) { - DCHECK(_type.is_datetime_v2_type()); - DCHECK(context != nullptr); - if (_scalar_fn_wrapper == nullptr) { - return interpret_eval(context, row); - } - DateTimeV2Wrapper fn = reinterpret_cast(_scalar_fn_wrapper); - return fn(context, row); -} - -DecimalV2Val ScalarFnCall::get_decimalv2_val(ExprContext* context, TupleRow* row) { - DCHECK_EQ(_type.type, TYPE_DECIMALV2); - DCHECK(context != nullptr); - if (_scalar_fn_wrapper == nullptr) { - return interpret_eval(context, row); - } - DecimalV2Wrapper fn = reinterpret_cast(_scalar_fn_wrapper); - return fn(context, row); -} - -Decimal32Val ScalarFnCall::get_decimal32_val(ExprContext* context, TupleRow* row) { - DCHECK_EQ(_type.type, TYPE_DECIMAL32); - DCHECK(context != nullptr); - if (_scalar_fn_wrapper == nullptr) { - return interpret_eval(context, row); - } - Decimal32Wrapper fn = reinterpret_cast(_scalar_fn_wrapper); - return fn(context, row); -} - -Decimal64Val ScalarFnCall::get_decimal64_val(ExprContext* context, TupleRow* row) { - DCHECK_EQ(_type.type, TYPE_DECIMAL64); - DCHECK(context != nullptr); - if (_scalar_fn_wrapper == nullptr) { - return interpret_eval(context, row); - } - Decimal64Wrapper fn = reinterpret_cast(_scalar_fn_wrapper); - return fn(context, row); -} - -Decimal128Val ScalarFnCall::get_decimal128_val(ExprContext* context, TupleRow* row) { - DCHECK_EQ(_type.type, TYPE_DECIMAL128I); - DCHECK(context != nullptr); - if (_scalar_fn_wrapper == nullptr) { - return interpret_eval(context, row); - } - Decimal128Wrapper fn = reinterpret_cast(_scalar_fn_wrapper); - return fn(context, row); -} - -CollectionVal ScalarFnCall::get_array_val(ExprContext* context, TupleRow* row) { - DCHECK_EQ(_type.type, TYPE_ARRAY); - DCHECK(context != nullptr); - - if (_scalar_fn_wrapper == nullptr) { - return interpret_eval(context, row); - } - - ArrayWrapper fn = reinterpret_cast(_scalar_fn_wrapper); - return fn(context, row); -} - -std::string ScalarFnCall::debug_string() const { - std::stringstream out; - out << "ScalarFnCall(udf_type=" << _fn.binary_type << " location=" << _fn.hdfs_location - << " symbol_name=" << _fn.scalar_fn.symbol << Expr::debug_string() << ")"; - return out.str(); -} -} // namespace doris diff --git a/be/src/exprs/scalar_fn_call.h b/be/src/exprs/scalar_fn_call.h deleted file mode 100644 index 46b633366c..0000000000 --- a/be/src/exprs/scalar_fn_call.h +++ /dev/null @@ -1,140 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. -// This file is copied from -// https://github.com/apache/impala/blob/branch-2.9.0/be/src/exprs/scalar-fn-call.h -// and modified by Doris - -#pragma once - -#include - -#include "common/object_pool.h" -#include "exprs/expr.h" -#include "udf/udf.h" - -namespace doris { - -class TExprNode; - -/// Expr for evaluating a pre-compiled native or LLVM IR function that uses the UDF -/// interface (i.e. a scalar function). This class overrides GetCodegendComputeFn() to -/// return a function that calls any child exprs and passes the results as arguments to the -/// specified scalar function. If codegen is enabled, ScalarFnCall's Get*Val() compute -/// functions are wrappers around this codegen'd function. -// -/// If codegen is disabled, some native functions can be called without codegen, depending -/// on the native function's signature. However, since we can't write static code to call -/// every possible function signature, codegen may be required to generate the call to the -/// function even if codegen is disabled. Codegen will also be used for IR UDFs (note that -/// there is no way to specify both a native and IR library for a single UDF). -// -/// TODO: -/// - Fix error reporting, e.g. reporting leaks -/// - Testing -/// - Test cancellation -/// - Type descs in UDA test harness -/// - Allow more functions to be nullptr in UDA test harness -class ScalarFnCall : public Expr { -public: - virtual std::string debug_string() const override; - virtual ~ScalarFnCall(); - virtual Expr* clone(ObjectPool* pool) const override { - return pool->add(new ScalarFnCall(*this)); - } - - // TODO: just for table function. - // It is not good to expose this field to public. - // We should refactor it after implementing real table functions. - int get_fn_context_index() const { return _fn_context_index; } - -protected: - friend class Expr; - - ScalarFnCall(const TExprNode& node); - virtual Status prepare(RuntimeState* state, const RowDescriptor& desc, - ExprContext* context) override; - virtual Status open(RuntimeState* state, ExprContext* context, - FunctionContext::FunctionStateScope scope) override; - virtual void close(RuntimeState* state, ExprContext* context, - FunctionContext::FunctionStateScope scope) override; - - virtual bool is_constant() const override; - - virtual doris_udf::BooleanVal get_boolean_val(ExprContext* context, TupleRow*) override; - virtual doris_udf::TinyIntVal get_tiny_int_val(ExprContext* context, TupleRow*) override; - virtual doris_udf::SmallIntVal get_small_int_val(ExprContext* context, TupleRow*) override; - virtual doris_udf::IntVal get_int_val(ExprContext* context, TupleRow*) override; - virtual doris_udf::BigIntVal get_big_int_val(ExprContext* context, TupleRow*) override; - virtual doris_udf::LargeIntVal get_large_int_val(ExprContext* context, TupleRow*) override; - virtual doris_udf::FloatVal get_float_val(ExprContext* context, TupleRow*) override; - virtual doris_udf::DoubleVal get_double_val(ExprContext* context, TupleRow*) override; - virtual doris_udf::StringVal get_string_val(ExprContext* context, TupleRow*) override; - virtual doris_udf::DateTimeVal get_datetime_val(ExprContext* context, TupleRow*) override; - virtual doris_udf::DateV2Val get_datev2_val(ExprContext* context, TupleRow*) override; - virtual doris_udf::DateTimeV2Val get_datetimev2_val(ExprContext* context, TupleRow*) override; - virtual doris_udf::DecimalV2Val get_decimalv2_val(ExprContext* context, TupleRow*) override; - virtual CollectionVal get_array_val(ExprContext* context, TupleRow*) override; - - virtual Decimal32Val get_decimal32_val(ExprContext* context, TupleRow*) override; - virtual Decimal64Val get_decimal64_val(ExprContext* context, TupleRow*) override; - virtual Decimal128Val get_decimal128_val(ExprContext* context, TupleRow*) override; - -private: - /// If this function has var args, children()[_vararg_start_idx] is the first vararg - /// argument. - /// If this function does not have varargs, it is set to -1. - int _vararg_start_idx; - - /// Function pointer to the JIT'd function produced by GetCodegendComputeFn(). - /// Has signature *Val (ExprContext*, TupleRow*), and calls the scalar - /// function with signature like *Val (FunctionContext*, const *Val& arg1, ...) - void* _scalar_fn_wrapper; - - /// The UDF's prepare function, if specified. This is initialized in Prepare() and - /// called in Open() (since we may have needed to codegen the function if it's from an - /// IR module). - UdfPrepare _prepare_fn; - - /// THe UDF's close function, if specified. This is initialized in Prepare() and called - /// in Close(). - UdfClose _close_fn; - - /// If running with codegen disabled, _scalar_fn will be a pointer to the non-JIT'd - /// scalar function. - void* _scalar_fn; - - /// Returns the number of non-vararg arguments - int num_fixed_args() const { - return _vararg_start_idx >= 0 ? _vararg_start_idx : _children.size(); - } - - /// Loads the native or IR function 'symbol' from HDFS and puts the result in *fn. - /// If the function is loaded from an IR module, it cannot be called until the module - /// has been JIT'd (i.e. after Prepare() has completed). - Status get_function(RuntimeState* state, const std::string& symbol, void** fn); - - /// Evaluates the children exprs and stores the results in input_vals. Used in the - /// interpreted path. - void evaluate_children(ExprContext* context, TupleRow* row, - std::vector* input_vals); - - /// Function to call _scalar_fn. Used in the interpreted path. - template - RETURN_TYPE interpret_eval(ExprContext* context, TupleRow* row); -}; - -} // namespace doris diff --git a/be/src/exprs/slot_ref.cpp b/be/src/exprs/slot_ref.cpp deleted file mode 100644 index 915704b66a..0000000000 --- a/be/src/exprs/slot_ref.cpp +++ /dev/null @@ -1,308 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. -// This file is copied from -// https://github.com/apache/impala/blob/branch-2.9.0/be/src/exprs/slot-ref.cc -// and modified by Doris - -#include "exprs/slot_ref.h" - -#include - -#include "gen_cpp/Exprs_types.h" -#include "gutil/strings/substitute.h" -#include "runtime/runtime_state.h" -#include "util/types.h" - -namespace doris { - -SlotRef::SlotRef(const TExprNode& node) - : Expr(node, true), - _slot_offset(-1), // invalid - _null_indicator_offset(0, 0), - _slot_id(node.slot_ref.slot_id), - _tuple_id(node.slot_ref.tuple_id) { - // _slot/_null_indicator_offset are set in Prepare() -} - -SlotRef::SlotRef(const SlotDescriptor* desc) - : Expr(desc->type(), true), - _slot_offset(-1), - _null_indicator_offset(0, 0), - _slot_id(desc->id()) { - // _slot/_null_indicator_offset are set in Prepare() -} - -SlotRef::SlotRef(const SlotDescriptor* desc, const TypeDescriptor& type) - : Expr(type, true), _slot_offset(-1), _null_indicator_offset(0, 0), _slot_id(desc->id()) { - // _slot/_null_indicator_offset are set in Prepare() -} - -SlotRef::SlotRef(const TypeDescriptor& type, int offset) - : Expr(type, true), - _tuple_idx(0), - _slot_offset(offset), - _null_indicator_offset(0, -1), - _slot_id(-1) {} - -Status SlotRef::prepare(const SlotDescriptor* slot_desc, const RowDescriptor& row_desc) { - if (!slot_desc->is_materialized()) { - return Status::InternalError("reference to non-materialized slot. slot_id: {}", _slot_id); - } - _tuple_idx = row_desc.get_tuple_idx(slot_desc->parent()); - if (_tuple_idx == RowDescriptor::INVALID_IDX) { - return Status::InternalError("failed to get tuple idx with tuple id: {}, slot id: {}", - slot_desc->parent(), _slot_id); - } - _tuple_is_nullable = row_desc.tuple_is_nullable(_tuple_idx); - _slot_offset = slot_desc->tuple_offset(); - _null_indicator_offset = slot_desc->null_indicator_offset(); - _is_nullable = slot_desc->is_nullable(); - return Status::OK(); -} - -Status SlotRef::prepare(RuntimeState* state, const RowDescriptor& row_desc, ExprContext* ctx) { - DCHECK_EQ(_children.size(), 0); - if (_slot_id == -1) { - return Status::OK(); - } - - const SlotDescriptor* slot_desc = state->desc_tbl().get_slot_descriptor(_slot_id); - if (slot_desc == nullptr) { - // TODO: create macro MAKE_ERROR() that returns a stream - return Status::InternalError("couldn't resolve slot descriptor {}", _slot_id); - } - - if (!slot_desc->is_materialized()) { - return Status::InternalError("reference to non-materialized slot. slot_id: {}", _slot_id); - } - - // TODO(marcel): get from runtime state - _tuple_idx = row_desc.get_tuple_idx(slot_desc->parent()); - if (_tuple_idx == RowDescriptor::INVALID_IDX) { - return Status::InternalError( - "failed to get tuple idx when prepare with tuple id: {}, slot id: {}", - slot_desc->parent(), _slot_id); - } - DCHECK(_tuple_idx != RowDescriptor::INVALID_IDX); - _tuple_is_nullable = row_desc.tuple_is_nullable(_tuple_idx); - _slot_offset = slot_desc->tuple_offset(); - _null_indicator_offset = slot_desc->null_indicator_offset(); - _is_nullable = slot_desc->is_nullable(); - return Status::OK(); -} - -int SlotRef::get_slot_ids(std::vector* slot_ids) const { - slot_ids->push_back(_slot_id); - return 1; -} - -bool SlotRef::is_bound(std::vector* tuple_ids) const { - for (int i = 0; i < tuple_ids->size(); i++) { - if (_tuple_id == (*tuple_ids)[i]) { - return true; - } - } - - return false; -} - -std::string SlotRef::debug_string() const { - std::stringstream out; - out << "SlotRef(slot_id=" << _slot_id << " tuple_idx=" << _tuple_idx - << " slot_offset=" << _slot_offset << " null_indicator=" << _null_indicator_offset << " " - << Expr::debug_string() << ")"; - return out.str(); -} - -BooleanVal SlotRef::get_boolean_val(ExprContext* context, TupleRow* row) { - DCHECK_EQ(_type.type, TYPE_BOOLEAN); - Tuple* t = row->get_tuple(_tuple_idx); - if (t == nullptr || t->is_null(_null_indicator_offset)) { - return BooleanVal::null(); - } - return BooleanVal(*reinterpret_cast(t->get_slot(_slot_offset))); -} - -TinyIntVal SlotRef::get_tiny_int_val(ExprContext* context, TupleRow* row) { - DCHECK_EQ(_type.type, TYPE_TINYINT); - Tuple* t = row->get_tuple(_tuple_idx); - if (t == nullptr || t->is_null(_null_indicator_offset)) { - return TinyIntVal::null(); - } - - return TinyIntVal(*reinterpret_cast(t->get_slot(_slot_offset))); -} - -SmallIntVal SlotRef::get_small_int_val(ExprContext* context, TupleRow* row) { - DCHECK_EQ(_type.type, TYPE_SMALLINT); - Tuple* t = row->get_tuple(_tuple_idx); - if (t == nullptr || t->is_null(_null_indicator_offset)) { - return SmallIntVal::null(); - } - return SmallIntVal(*reinterpret_cast(t->get_slot(_slot_offset))); -} - -IntVal SlotRef::get_int_val(ExprContext* context, TupleRow* row) { - DCHECK_EQ(_type.type, TYPE_INT); - Tuple* t = row->get_tuple(_tuple_idx); - if (t == nullptr || t->is_null(_null_indicator_offset)) { - return IntVal::null(); - } - return IntVal(*reinterpret_cast(t->get_slot(_slot_offset))); -} - -BigIntVal SlotRef::get_big_int_val(ExprContext* context, TupleRow* row) { - DCHECK_EQ(_type.type, TYPE_BIGINT); - Tuple* t = row->get_tuple(_tuple_idx); - if (t == nullptr || t->is_null(_null_indicator_offset)) { - return BigIntVal::null(); - } - return BigIntVal(*reinterpret_cast(t->get_slot(_slot_offset))); -} - -LargeIntVal SlotRef::get_large_int_val(ExprContext* context, TupleRow* row) { - DCHECK_EQ(_type.type, TYPE_LARGEINT); - Tuple* t = row->get_tuple(_tuple_idx); - if (t == nullptr || t->is_null(_null_indicator_offset)) { - return LargeIntVal::null(); - } - return LargeIntVal(reinterpret_cast(t->get_slot(_slot_offset))->value); -} - -FloatVal SlotRef::get_float_val(ExprContext* context, TupleRow* row) { - DCHECK_EQ(_type.type, TYPE_FLOAT); - Tuple* t = row->get_tuple(_tuple_idx); - if (t == nullptr || t->is_null(_null_indicator_offset)) { - return FloatVal::null(); - } - return FloatVal(*reinterpret_cast(t->get_slot(_slot_offset))); -} - -DoubleVal SlotRef::get_double_val(ExprContext* context, TupleRow* row) { - DCHECK_EQ(_type.type, TYPE_DOUBLE); - Tuple* t = row->get_tuple(_tuple_idx); - if (t == nullptr || t->is_null(_null_indicator_offset)) { - return DoubleVal::null(); - } - return DoubleVal(*reinterpret_cast(t->get_slot(_slot_offset))); -} - -StringVal SlotRef::get_string_val(ExprContext* context, TupleRow* row) { - DCHECK(_type.is_string_type()); - Tuple* t = row->get_tuple(_tuple_idx); - if (t == nullptr || t->is_null(_null_indicator_offset)) { - return StringVal::null(); - } - StringVal result; - StringRef* sv = reinterpret_cast(t->get_slot(_slot_offset)); - sv->to_string_val(&result); - return result; -} - -DateTimeVal SlotRef::get_datetime_val(ExprContext* context, TupleRow* row) { - DCHECK(_type.is_date_type()); - Tuple* t = row->get_tuple(_tuple_idx); - if (t == nullptr || t->is_null(_null_indicator_offset)) { - return DateTimeVal::null(); - } - DateTimeValue* tv = reinterpret_cast(t->get_slot(_slot_offset)); - DateTimeVal result; - tv->to_datetime_val(&result); - return result; -} - -DateV2Val SlotRef::get_datev2_val(ExprContext* context, TupleRow* row) { - DCHECK(_type.is_date_v2_type()); - Tuple* t = row->get_tuple(_tuple_idx); - if (t == nullptr || t->is_null(_null_indicator_offset)) { - return DateV2Val::null(); - } - doris::vectorized::DateV2Value* tv = - reinterpret_cast*>( - t->get_slot(_slot_offset)); - DateV2Val result; - tv->to_datev2_val(&result); - return result; -} - -DateTimeV2Val SlotRef::get_datetimev2_val(ExprContext* context, TupleRow* row) { - DCHECK(_type.is_datetime_v2_type()); - Tuple* t = row->get_tuple(_tuple_idx); - if (t == nullptr || t->is_null(_null_indicator_offset)) { - return DateTimeV2Val::null(); - } - doris::vectorized::DateV2Value* tv = reinterpret_cast< - doris::vectorized::DateV2Value*>( - t->get_slot(_slot_offset)); - DateTimeV2Val result; - tv->to_datetimev2_val(&result); - return result; -} - -DecimalV2Val SlotRef::get_decimalv2_val(ExprContext* context, TupleRow* row) { - DCHECK_EQ(_type.type, TYPE_DECIMALV2); - Tuple* t = row->get_tuple(_tuple_idx); - if (t == nullptr || t->is_null(_null_indicator_offset)) { - return DecimalV2Val::null(); - } - - return DecimalV2Val(reinterpret_cast(t->get_slot(_slot_offset))->value); -} - -Decimal32Val SlotRef::get_decimal32_val(ExprContext* context, TupleRow* row) { - DCHECK_EQ(_type.type, TYPE_DECIMAL32); - Tuple* t = row->get_tuple(_tuple_idx); - if (t == nullptr || t->is_null(_null_indicator_offset)) { - return Decimal32Val::null(); - } - - return Decimal32Val(*reinterpret_cast(t->get_slot(_slot_offset))); -} - -Decimal64Val SlotRef::get_decimal64_val(ExprContext* context, TupleRow* row) { - DCHECK_EQ(_type.type, TYPE_DECIMAL64); - Tuple* t = row->get_tuple(_tuple_idx); - if (t == nullptr || t->is_null(_null_indicator_offset)) { - return Decimal64Val::null(); - } - - return Decimal64Val(*reinterpret_cast(t->get_slot(_slot_offset))); -} - -Decimal128Val SlotRef::get_decimal128_val(ExprContext* context, TupleRow* row) { - DCHECK_EQ(_type.type, TYPE_DECIMAL128I); - Tuple* t = row->get_tuple(_tuple_idx); - if (t == nullptr || t->is_null(_null_indicator_offset)) { - return Decimal128Val::null(); - } - - return Decimal128Val(reinterpret_cast(t->get_slot(_slot_offset))->value); -} - -doris_udf::CollectionVal SlotRef::get_array_val(ExprContext* context, TupleRow* row) { - DCHECK_EQ(_type.type, TYPE_ARRAY); - - Tuple* t = row->get_tuple(_tuple_idx); - if (t == nullptr || t->is_null(_null_indicator_offset)) { - return CollectionVal::null(); - } - - CollectionVal val; - reinterpret_cast(t->get_slot(_slot_offset))->to_collection_val(&val); - return val; -} -} // namespace doris diff --git a/be/src/exprs/slot_ref.h b/be/src/exprs/slot_ref.h deleted file mode 100644 index 406ed1f3f5..0000000000 --- a/be/src/exprs/slot_ref.h +++ /dev/null @@ -1,121 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. -// This file is copied from -// https://github.com/apache/impala/blob/branch-2.9.0/be/src/exprs/slot-ref.h -// and modified by Doris - -#pragma once - -#include "common/object_pool.h" -#include "exprs/expr.h" - -namespace doris { - -// Reference to a single slot of a tuple. -// We inline this here in order for Expr::get_value() to be able -// to reference SlotRef::compute_fn() directly. -// Splitting it up into separate .h files would require circular #includes. -class SlotRef final : public Expr { -public: - SlotRef(const TExprNode& node); - SlotRef(const SlotDescriptor* desc); - virtual Expr* clone(ObjectPool* pool) const override { return pool->add(new SlotRef(*this)); } - - // TODO: this is a hack to allow aggregation nodes to work around nullptr slot - // descriptors. Ideally the FE would dictate the type of the intermediate SlotRefs. - SlotRef(const SlotDescriptor* desc, const TypeDescriptor& type); - - // Used for testing. get_value will return tuple + offset interpreted as 'type' - SlotRef(const TypeDescriptor& type, int offset); - - Status prepare(const SlotDescriptor* slot_desc, const RowDescriptor& row_desc); - - virtual Status prepare(RuntimeState* state, const RowDescriptor& row_desc, - ExprContext* ctx) override; - static void* get_value(Expr* expr, TupleRow* row); - void* get_slot(TupleRow* row); - Tuple* get_tuple(TupleRow* row); - bool is_null_bit_set(TupleRow* row); - static bool is_nullable(Expr* expr); - virtual std::string debug_string() const override; - virtual bool is_constant() const override { return false; } - virtual bool is_vectorized() const override { return true; } - virtual bool is_bound(std::vector* tuple_ids) const override; - virtual int get_slot_ids(std::vector* slot_ids) const override; - SlotId slot_id() const { return _slot_id; } - NullIndicatorOffset null_indicator_offset() const { return _null_indicator_offset; } - - virtual doris_udf::BooleanVal get_boolean_val(ExprContext* context, TupleRow*) override; - virtual doris_udf::TinyIntVal get_tiny_int_val(ExprContext* context, TupleRow*) override; - virtual doris_udf::SmallIntVal get_small_int_val(ExprContext* context, TupleRow*) override; - virtual doris_udf::IntVal get_int_val(ExprContext* context, TupleRow*) override; - virtual doris_udf::BigIntVal get_big_int_val(ExprContext* context, TupleRow*) override; - virtual doris_udf::LargeIntVal get_large_int_val(ExprContext* context, TupleRow*) override; - virtual doris_udf::FloatVal get_float_val(ExprContext* context, TupleRow*) override; - virtual doris_udf::DoubleVal get_double_val(ExprContext* context, TupleRow* row) override; - virtual doris_udf::StringVal get_string_val(ExprContext* context, TupleRow*) override; - virtual doris_udf::DateTimeVal get_datetime_val(ExprContext* context, TupleRow*) override; - virtual doris_udf::DateV2Val get_datev2_val(ExprContext* context, TupleRow*) override; - virtual doris_udf::DateTimeV2Val get_datetimev2_val(ExprContext* context, TupleRow*) override; - virtual doris_udf::DecimalV2Val get_decimalv2_val(ExprContext* context, TupleRow*) override; - virtual doris_udf::CollectionVal get_array_val(ExprContext* context, TupleRow*) override; - virtual Decimal32Val get_decimal32_val(ExprContext* context, TupleRow*) override; - virtual Decimal64Val get_decimal64_val(ExprContext* context, TupleRow*) override; - virtual Decimal128Val get_decimal128_val(ExprContext* context, TupleRow*) override; - -private: - int _tuple_idx; // within row - int _slot_offset; // within tuple - NullIndicatorOffset _null_indicator_offset; // within tuple - const SlotId _slot_id; - bool _tuple_is_nullable; // true if the tuple is nullable. - TupleId _tuple_id; // used for desc this slot from - bool _is_nullable; -}; - -inline void* SlotRef::get_value(Expr* expr, TupleRow* row) { - SlotRef* ref = (SlotRef*)expr; - Tuple* t = row->get_tuple(ref->_tuple_idx); - if (t == nullptr || t->is_null(ref->_null_indicator_offset)) { - return nullptr; - } - return t->get_slot(ref->_slot_offset); -} - -inline void* SlotRef::get_slot(TupleRow* row) { - //get_slot需要获取slot所在的position, - //以用于在小批量导入聚合时修改其内容 - Tuple* t = row->get_tuple(_tuple_idx); - return t->get_slot(_slot_offset); -} - -inline Tuple* SlotRef::get_tuple(TupleRow* row) { - Tuple* t = row->get_tuple(_tuple_idx); - return t; -} - -inline bool SlotRef::is_null_bit_set(TupleRow* row) { - Tuple* t = row->get_tuple(_tuple_idx); - return t->is_null(_null_indicator_offset); -} - -inline bool SlotRef::is_nullable(Expr* expr) { - SlotRef* ref = (SlotRef*)expr; - return ref->_is_nullable; -} - -} // namespace doris diff --git a/be/src/exprs/string_functions.h b/be/src/exprs/string_functions.h index 0ee17334cd..01fe8646da 100644 --- a/be/src/exprs/string_functions.h +++ b/be/src/exprs/string_functions.h @@ -32,9 +32,7 @@ namespace doris { -class Expr; class OpcodeRegistry; -class TupleRow; class StringFunctions { public: diff --git a/be/src/exprs/time_operators.h b/be/src/exprs/time_operators.h index 2959060eb3..bb2f9f7f27 100644 --- a/be/src/exprs/time_operators.h +++ b/be/src/exprs/time_operators.h @@ -22,9 +22,6 @@ #include "udf/udf.h" namespace doris { -class Expr; -struct ExprValue; -class TupleRow; /// Implementation of the time operators. These include the cast, /// arithmetic and binary operators. diff --git a/be/src/exprs/timestamp_functions.h b/be/src/exprs/timestamp_functions.h index 3e04602036..f31fb02ae5 100644 --- a/be/src/exprs/timestamp_functions.h +++ b/be/src/exprs/timestamp_functions.h @@ -26,9 +26,7 @@ namespace doris { -class Expr; class OpcodeRegistry; -class TupleRow; // The context used for timestamp function prepare phase, // to save the converted date formatter, so that it doesn't diff --git a/be/src/exprs/tuple_is_null_predicate.cpp b/be/src/exprs/tuple_is_null_predicate.cpp deleted file mode 100644 index 1c67092160..0000000000 --- a/be/src/exprs/tuple_is_null_predicate.cpp +++ /dev/null @@ -1,72 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. -// This file is copied from -// https://github.com/apache/impala/blob/branch-2.9.0/be/src/exprs/tuple-is-null-predicate.cc -// and modified by Doris - -#include "exprs/tuple_is_null_predicate.h" - -#include - -#include "gen_cpp/Exprs_types.h" -#include "runtime/descriptors.h" - -namespace doris { - -TupleIsNullPredicate::TupleIsNullPredicate(const TExprNode& node) - : Predicate(node), - _tuple_ids(node.tuple_is_null_pred.tuple_ids.begin(), - node.tuple_is_null_pred.tuple_ids.end()) {} - -Status TupleIsNullPredicate::prepare(RuntimeState* state, const RowDescriptor& row_desc, - ExprContext* ctx) { - RETURN_IF_ERROR(Expr::prepare(state, row_desc, ctx)); - DCHECK_EQ(0, _children.size()); - - // Resolve tuple ids to tuple indexes. - for (int i = 0; i < _tuple_ids.size(); ++i) { - int32_t tuple_idx = row_desc.get_tuple_idx(_tuple_ids[i]); - RETURN_IF_INVALID_TUPLE_IDX(_tuple_ids[i], tuple_idx); - if (row_desc.tuple_is_nullable(tuple_idx)) { - _tuple_idxs.push_back(tuple_idx); - } - } - - return Status::OK(); -} - -BooleanVal TupleIsNullPredicate::get_boolean_val(ExprContext* ctx, TupleRow* row) { - int count = 0; - for (int i = 0; i < _tuple_idxs.size(); ++i) { - count += row->get_tuple(_tuple_idxs[i]) == nullptr; - } - return BooleanVal(!_tuple_idxs.empty() && count == _tuple_idxs.size()); -} - -std::string TupleIsNullPredicate::debug_string() const { - std::stringstream out; - out << "TupleIsNullPredicate(tupleids=["; - - for (int i = 0; i < _tuple_ids.size(); ++i) { - out << (i == 0 ? "" : " ") << _tuple_ids[i]; - } - - out << "])"; - return out.str(); -} - -} // namespace doris diff --git a/be/src/exprs/tuple_is_null_predicate.h b/be/src/exprs/tuple_is_null_predicate.h deleted file mode 100644 index ee90901d5a..0000000000 --- a/be/src/exprs/tuple_is_null_predicate.h +++ /dev/null @@ -1,54 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. -// This file is copied from -// https://github.com/apache/impala/blob/branch-2.9.0/be/src/exprs/tuple-is-null-predicate.h -// and modified by Doris - -#pragma once - -#include "common/object_pool.h" -#include "exprs/predicate.h" - -namespace doris { - -class TExprNode; - -class TupleIsNullPredicate : public Predicate { -public: - virtual Expr* clone(ObjectPool* pool) const override { - return pool->add(new TupleIsNullPredicate(*this)); - } - - bool is_constant() const override { return false; } - -protected: - friend class Expr; - - TupleIsNullPredicate(const TExprNode& node); - - virtual Status prepare(RuntimeState* state, const RowDescriptor& row_desc, - ExprContext* ctx) override; - - virtual BooleanVal get_boolean_val(ExprContext* ctx, TupleRow* row) override; - virtual std::string debug_string() const override; - -private: - std::vector _tuple_ids; - std::vector _tuple_idxs; -}; - -} // namespace doris diff --git a/be/src/exprs/utility_functions.h b/be/src/exprs/utility_functions.h index b1c38b1a30..3bfa3ff38a 100644 --- a/be/src/exprs/utility_functions.h +++ b/be/src/exprs/utility_functions.h @@ -24,9 +24,7 @@ namespace doris { -class Expr; class OpcodeRegistry; -class TupleRow; class UtilityFunctions { public: diff --git a/be/src/olap/bloom_filter_predicate.h b/be/src/olap/bloom_filter_predicate.h index c68c4a9375..986fc0211f 100644 --- a/be/src/olap/bloom_filter_predicate.h +++ b/be/src/olap/bloom_filter_predicate.h @@ -17,7 +17,7 @@ #pragma once -#include "exprs/bloomfilter_predicate.h" +#include "exprs/bloom_filter_func.h" #include "exprs/runtime_filter.h" #include "olap/column_predicate.h" #include "runtime/primitive_type.h" diff --git a/be/src/olap/delta_writer.cpp b/be/src/olap/delta_writer.cpp index 6459a81604..80b8d9f654 100644 --- a/be/src/olap/delta_writer.cpp +++ b/be/src/olap/delta_writer.cpp @@ -24,7 +24,6 @@ #include "olap/schema.h" #include "olap/storage_engine.h" #include "runtime/load_channel_mgr.h" -#include "runtime/tuple_row.h" #include "service/backend_options.h" #include "util/brpc_client_cache.h" #include "util/ref_count_closure.h" diff --git a/be/src/olap/delta_writer.h b/be/src/olap/delta_writer.h index 5de52fd7b8..34f9be8093 100644 --- a/be/src/olap/delta_writer.h +++ b/be/src/olap/delta_writer.h @@ -31,7 +31,6 @@ class Schema; class StorageEngine; class Tuple; class TupleDescriptor; -class TupleRow; class SlotDescriptor; enum WriteType { LOAD = 1, LOAD_DELETE = 2, DELETE = 3 }; diff --git a/be/src/olap/predicate_creator.h b/be/src/olap/predicate_creator.h index 55e9a0a642..c12e155e0f 100644 --- a/be/src/olap/predicate_creator.h +++ b/be/src/olap/predicate_creator.h @@ -21,7 +21,6 @@ #include #include "exec/olap_utils.h" -#include "exprs/bloomfilter_predicate.h" #include "exprs/create_predicate_function.h" #include "exprs/hybrid_set.h" #include "exprs/match_predicate.h" diff --git a/be/src/olap/reader.h b/be/src/olap/reader.h index 19abd78e0d..9bb10b69d3 100644 --- a/be/src/olap/reader.h +++ b/be/src/olap/reader.h @@ -20,7 +20,6 @@ #include #include "exprs/bitmapfilter_predicate.h" -#include "exprs/bloomfilter_predicate.h" #include "exprs/function_filter.h" #include "exprs/hybrid_set.h" #include "olap/delete_handler.h" diff --git a/be/src/runtime/CMakeLists.txt b/be/src/runtime/CMakeLists.txt index 4351fe1ee5..f06e18b7d7 100644 --- a/be/src/runtime/CMakeLists.txt +++ b/be/src/runtime/CMakeLists.txt @@ -49,7 +49,6 @@ set(RUNTIME_FILES large_int_value.cpp collection_value.cpp tuple.cpp - tuple_row.cpp fragment_mgr.cpp dpp_sink_internal.cpp load_path_mgr.cpp diff --git a/be/src/runtime/cache/result_cache.h b/be/src/runtime/cache/result_cache.h index 0148bad786..d1ca970f78 100644 --- a/be/src/runtime/cache/result_cache.h +++ b/be/src/runtime/cache/result_cache.h @@ -32,7 +32,6 @@ #include "runtime/cache/cache_utils.h" #include "runtime/cache/result_node.h" #include "runtime/mem_pool.h" -#include "runtime/tuple_row.h" namespace doris { diff --git a/be/src/runtime/cache/result_node.h b/be/src/runtime/cache/result_node.h index ca1b344d9f..ff0a4b362e 100644 --- a/be/src/runtime/cache/result_node.h +++ b/be/src/runtime/cache/result_node.h @@ -34,7 +34,6 @@ #include "olap/olap_define.h" #include "runtime/cache/cache_utils.h" #include "runtime/mem_pool.h" -#include "runtime/tuple_row.h" #include "util/uid_util.h" namespace doris { diff --git a/be/src/runtime/descriptor_helper.h b/be/src/runtime/descriptor_helper.h index ec7af6b387..13892d154a 100644 --- a/be/src/runtime/descriptor_helper.h +++ b/be/src/runtime/descriptor_helper.h @@ -21,6 +21,8 @@ #include "gen_cpp/Descriptors_types.h" #include "gen_cpp/Types_types.h" +#include "runtime/define_primitive_type.h" +#include "runtime/primitive_type.h" namespace doris { diff --git a/be/src/runtime/descriptors.h b/be/src/runtime/descriptors.h index bd2e8b2564..801bd7794a 100644 --- a/be/src/runtime/descriptors.h +++ b/be/src/runtime/descriptors.h @@ -45,7 +45,6 @@ class ObjectPool; class TDescriptorTable; class TSlotDescriptor; class TTupleDescriptor; -class Expr; class RuntimeState; class SchemaScanner; class OlapTableSchemaParam; diff --git a/be/src/runtime/dpp_sink_internal.cpp b/be/src/runtime/dpp_sink_internal.cpp index 2f9ef1e9d1..e0120f9f55 100644 --- a/be/src/runtime/dpp_sink_internal.cpp +++ b/be/src/runtime/dpp_sink_internal.cpp @@ -21,7 +21,6 @@ #include "common/object_pool.h" #include "exec/text_converter.hpp" -#include "exprs/expr.h" #include "gen_cpp/DataSinks_types.h" #include "runtime/descriptors.h" #include "runtime/runtime_state.h" @@ -135,39 +134,4 @@ Status PartRange::from_thrift(ObjectPool* pool, const TPartitionRange& t_part_ra return Status::OK(); } -Status PartitionInfo::from_thrift(ObjectPool* pool, const TRangePartition& t_partition, - PartitionInfo* partition) { - partition->_id = t_partition.partition_id; - RETURN_IF_ERROR(PartRange::from_thrift(pool, t_partition.range, &partition->_range)); - if (t_partition.__isset.distributed_exprs) { - partition->_distributed_bucket = t_partition.distribute_bucket; - if (partition->_distributed_bucket == 0) { - return Status::InternalError("Distributed bucket is 0."); - } - RETURN_IF_ERROR(Expr::create_expr_trees(pool, t_partition.distributed_exprs, - &partition->_distributed_expr_ctxs)); - } - return Status::OK(); -} - -Status PartitionInfo::prepare(RuntimeState* state, const RowDescriptor& row_desc) { - if (_distributed_expr_ctxs.size() > 0) { - RETURN_IF_ERROR(Expr::prepare(_distributed_expr_ctxs, state, row_desc)); - } - return Status::OK(); -} - -Status PartitionInfo::open(RuntimeState* state) { - if (_distributed_expr_ctxs.size() > 0) { - return Expr::open(_distributed_expr_ctxs, state); - } - return Status::OK(); -} - -void PartitionInfo::close(RuntimeState* state) { - if (_distributed_expr_ctxs.size() > 0) { - Expr::close(_distributed_expr_ctxs, state); - } -} - } // namespace doris diff --git a/be/src/runtime/dpp_sink_internal.h b/be/src/runtime/dpp_sink_internal.h index 9e2122c3a5..a380d68fd7 100644 --- a/be/src/runtime/dpp_sink_internal.h +++ b/be/src/runtime/dpp_sink_internal.h @@ -28,7 +28,6 @@ namespace doris { -class ExprContext; class ObjectPool; class RuntimeState; class RowDescriptor; @@ -184,36 +183,4 @@ private: static PartRange _s_all_range; }; -class PartitionInfo { -public: - PartitionInfo() : _id(-1), _distributed_bucket(0) {} - - static Status from_thrift(ObjectPool* pool, const TRangePartition& t_partition, - PartitionInfo* partition); - - Status prepare(RuntimeState* state, const RowDescriptor& row_desc); - - Status open(RuntimeState* state); - - void close(RuntimeState* state); - - int64_t id() const { return _id; } - - const std::vector& distributed_expr_ctxs() const { - return _distributed_expr_ctxs; - } - - int distributed_bucket() const { return _distributed_bucket; } - - const PartRange& range() const { return _range; } - -private: - int64_t _id; - PartRange _range; - // Information used to distribute data - // distribute exprs - std::vector _distributed_expr_ctxs; - int32_t _distributed_bucket; -}; - } // namespace doris diff --git a/be/src/runtime/fold_constant_executor.cpp b/be/src/runtime/fold_constant_executor.cpp index 3fad048846..b98f4f4411 100644 --- a/be/src/runtime/fold_constant_executor.cpp +++ b/be/src/runtime/fold_constant_executor.cpp @@ -21,8 +21,6 @@ #include "common/object_pool.h" #include "common/status.h" -#include "exprs/expr.h" -#include "exprs/expr_context.h" #include "gen_cpp/PaloInternalService_types.h" #include "gen_cpp/internal_service.pb.h" #include "runtime/exec_env.h" @@ -30,7 +28,6 @@ #include "runtime/memory/mem_tracker.h" #include "runtime/runtime_state.h" #include "runtime/thread_context.h" -#include "runtime/tuple_row.h" #include "vec/data_types/data_type_number.h" #include "vec/exprs/vexpr.h" #include "vec/exprs/vexpr_context.h" @@ -42,58 +39,6 @@ namespace doris { TUniqueId FoldConstantExecutor::_dummy_id; -Status FoldConstantExecutor::fold_constant_expr(const TFoldConstantParams& params, - PConstantExprResult* response) { - const auto& expr_map = params.expr_map; - auto expr_result_map = response->mutable_expr_result_map(); - - TQueryGlobals query_globals = params.query_globals; - // init - RETURN_IF_ERROR(_init(query_globals)); - // only after init operation, _mem_tracker is ready - SCOPED_CONSUME_MEM_TRACKER(_mem_tracker.get()); - - for (const auto& m : expr_map) { - PExprResultMap pexpr_result_map; - for (const auto& n : m.second) { - ExprContext* ctx = nullptr; - const TExpr& texpr = n.second; - // create expr tree from TExpr - RETURN_IF_ERROR(Expr::create_expr_tree(&_pool, texpr, &ctx)); - // prepare and open context - RETURN_IF_ERROR(_prepare_and_open(ctx)); - - TupleRow* row = nullptr; - // calc expr - void* src = ctx->get_value(row); - PrimitiveType root_type = ctx->root()->type().type; - // covert to thrift type - TPrimitiveType::type t_type = doris::to_thrift(root_type); - - // collect result - PExprResult expr_result; - string result; - if (src == nullptr) { - expr_result.set_success(false); - } else { - expr_result.set_success(true); - result = _get_result(src, 0, ctx->root()->type().type); - } - - expr_result.set_content(std::move(result)); - expr_result.mutable_type()->set_type(t_type); - pexpr_result_map.mutable_map()->insert({n.first, expr_result}); - - // close context expr - ctx->close(_runtime_state.get()); - } - - expr_result_map->insert({m.first, pexpr_result_map}); - } - - return Status::OK(); -} - Status FoldConstantExecutor::fold_constant_vexpr(const TFoldConstantParams& params, PConstantExprResult* response) { const auto& expr_map = params.expr_map; diff --git a/be/src/runtime/fold_constant_executor.h b/be/src/runtime/fold_constant_executor.h index b9e5e501ea..47d334bacd 100644 --- a/be/src/runtime/fold_constant_executor.h +++ b/be/src/runtime/fold_constant_executor.h @@ -19,12 +19,11 @@ #include "common/object_pool.h" #include "common/status.h" -#include "exprs/expr.h" -#include "exprs/expr_context.h" #include "gen_cpp/PaloInternalService_types.h" #include "gen_cpp/internal_service.pb.h" +#include "runtime/define_primitive_type.h" #include "runtime/exec_env.h" -#include "runtime/tuple_row.h" +#include "runtime/runtime_state.h" #include "util/runtime_profile.h" namespace doris { @@ -36,9 +35,6 @@ class TQueryGlobals; // This class used to fold constant expr from fe class FoldConstantExecutor { public: - // fold constant expr - Status fold_constant_expr(const TFoldConstantParams& params, PConstantExprResult* response); - // fold constant vexpr Status fold_constant_vexpr(const TFoldConstantParams& params, PConstantExprResult* response); diff --git a/be/src/runtime/mem_pool.cpp b/be/src/runtime/mem_pool.cpp index 4c136cc019..d3f75a9e74 100644 --- a/be/src/runtime/mem_pool.cpp +++ b/be/src/runtime/mem_pool.cpp @@ -219,24 +219,6 @@ void MemPool::acquire_data(MemPool* src, bool keep_current) { DCHECK(check_integrity(false)); } -void MemPool::exchange_data(MemPool* other) { - int64_t delta_size = other->total_reserved_bytes_ - total_reserved_bytes_; - if (other->_mem_tracker != _mem_tracker) { - if (other->_mem_tracker) { - other->_mem_tracker->release(delta_size); - } - if (_mem_tracker) { - _mem_tracker->consume(delta_size); - } - } - - std::swap(current_chunk_idx_, other->current_chunk_idx_); - std::swap(next_chunk_size_, other->next_chunk_size_); - std::swap(total_allocated_bytes_, other->total_allocated_bytes_); - std::swap(total_reserved_bytes_, other->total_reserved_bytes_); - std::swap(chunks_, other->chunks_); -} - std::string MemPool::debug_string() { std::stringstream out; char str[16]; diff --git a/be/src/runtime/mem_pool.h b/be/src/runtime/mem_pool.h index e3b602e077..c74cfe298f 100644 --- a/be/src/runtime/mem_pool.h +++ b/be/src/runtime/mem_pool.h @@ -154,12 +154,6 @@ public: /// All offsets handed out by calls to GetCurrentOffset() for 'src' become invalid. void acquire_data(MemPool* src, bool keep_current); - // Exchange all chunks with input source, including reserved chunks. - // This function will keep its own MemTracker, and update it after exchange. - // Why we need this other than std::swap? Because swap will swap MemTracker too, which would - // lead error. We only has MemTracker's pointer, which can be invalid after swap. - void exchange_data(MemPool* other); - std::string debug_string(); int64_t total_allocated_bytes() const { return total_allocated_bytes_; } diff --git a/be/src/runtime/runtime_filter_mgr.cpp b/be/src/runtime/runtime_filter_mgr.cpp index 4d5478a596..2c6a3523d2 100644 --- a/be/src/runtime/runtime_filter_mgr.cpp +++ b/be/src/runtime/runtime_filter_mgr.cpp @@ -19,7 +19,7 @@ #include -#include "exprs/bloomfilter_predicate.h" +#include "exprs/bloom_filter_func.h" #include "exprs/runtime_filter.h" #include "gen_cpp/internal_service.pb.h" #include "runtime/exec_env.h" diff --git a/be/src/runtime/runtime_state.h b/be/src/runtime/runtime_state.h index d72631de92..83dae14d50 100644 --- a/be/src/runtime/runtime_state.h +++ b/be/src/runtime/runtime_state.h @@ -38,7 +38,6 @@ class DescriptorTbl; class ObjectPool; class Status; class ExecEnv; -class Expr; class DateTimeValue; class MemTracker; class DataStreamRecvr; diff --git a/be/src/runtime/tuple.cpp b/be/src/runtime/tuple.cpp index b71a55b050..1af5942dda 100644 --- a/be/src/runtime/tuple.cpp +++ b/be/src/runtime/tuple.cpp @@ -25,12 +25,10 @@ #include #include "common/utils.h" -#include "exprs/expr_context.h" #include "runtime/collection_value.h" #include "runtime/descriptors.h" #include "runtime/mem_pool.h" #include "runtime/raw_value.h" -#include "runtime/tuple_row.h" #include "util/mem_util.hpp" #include "vec/common/string_ref.h" @@ -195,71 +193,6 @@ void Tuple::deep_copy(const TupleDescriptor& desc, char** data, int64_t* offset, convert_ptrs); } -template -void Tuple::materialize_exprs(TupleRow* row, const TupleDescriptor& desc, - const std::vector& materialize_expr_ctxs, MemPool* pool, - std::vector* non_null_var_len_values, - int* total_var_len) { - if (collect_string_vals) { - non_null_var_len_values->clear(); - *total_var_len = 0; - } - memset(this, 0, desc.num_null_bytes()); - // Evaluate the output_slot_exprs and place the results in the tuples. - int mat_expr_index = 0; - auto& slots = desc.slots(); - for (int i = 0; i < slots.size(); ++i) { - SlotDescriptor* slot_desc = slots[i]; - if (!slot_desc->is_materialized()) { - continue; - } - // The FE ensures we don't get any TYPE_NULL expressions by picking an arbitrary type - // when necessary, but does not do this for slot descs. - // TODO: revisit this logic in the FE - PrimitiveType slot_type = slot_desc->type().type; - PrimitiveType expr_type = materialize_expr_ctxs[mat_expr_index]->root()->type().type; - if (slot_type == TYPE_CHAR || slot_type == TYPE_VARCHAR || slot_type == TYPE_HLL || - slot_type == TYPE_STRING) { - DCHECK(expr_type == TYPE_CHAR || expr_type == TYPE_VARCHAR || expr_type == TYPE_HLL || - expr_type == TYPE_STRING); - } else if (slot_type == TYPE_DATE || slot_type == TYPE_DATETIME) { - DCHECK(expr_type == TYPE_DATE || expr_type == TYPE_DATETIME); - } else if (slot_type == TYPE_ARRAY) { - DCHECK(expr_type == TYPE_ARRAY); - } else { - DCHECK(slot_type == TYPE_NULL || slot_type == expr_type); - } - void* src = materialize_expr_ctxs[mat_expr_index]->get_value(row); - if (src != nullptr) { - void* dst = get_slot(slot_desc->tuple_offset()); - RawValue::write(src, dst, slot_desc->type(), pool); - if (collect_string_vals) { - if (slot_desc->type().is_string_type()) { - StringRef* string_val = convert_to(dst); - non_null_var_len_values->push_back(string_val); - *total_var_len += string_val->size; - } - } - } else { - set_null(slot_desc->null_indicator_offset()); - } - ++mat_expr_index; - } - - DCHECK_EQ(mat_expr_index, materialize_expr_ctxs.size()); -} - -template void Tuple::materialize_exprs( - TupleRow* row, const TupleDescriptor& desc, - const std::vector& materialize_expr_ctxs, MemPool* pool, - std::vector* non_null_var_values, int* total_var_len); - -template void Tuple::materialize_exprs(TupleRow* row, const TupleDescriptor& desc, - const std::vector& materialize_expr_ctxs, - MemPool* pool, - std::vector* non_null_var_values, - int* total_var_len); - std::string Tuple::to_string(const TupleDescriptor& d) const { std::stringstream out; out << "("; diff --git a/be/src/runtime/tuple.h b/be/src/runtime/tuple.h index e19d8cc68c..4bda9003c5 100644 --- a/be/src/runtime/tuple.h +++ b/be/src/runtime/tuple.h @@ -32,8 +32,6 @@ struct StringRef; class CollectionValue; class TupleDescriptor; class DateTimeValue; -class TupleRow; -class ExprContext; // A tuple is stored as a contiguous sequence of bytes containing a fixed number // of fixed-size slots. The slots are arranged in order of increasing byte length; @@ -106,17 +104,6 @@ public: deep_copy(desc, data, offset, false); } - // Materialize this by evaluating the expressions in materialize_exprs - // over the specified 'row'. 'pool' is used to allocate var-length data. - // (Memory for this tuple itself must already be allocated.) - // If collect_string_vals is true, the materialized non-nullptr string value - // slots and the total length of the string slots are returned in var_values - // and total_var_len. - template - void materialize_exprs(TupleRow* row, const TupleDescriptor& desc, - const std::vector& materialize_expr_ctxs, MemPool* pool, - std::vector* non_null_var_len_values, int* total_var_len); - // Turn null indicator bit on. // Turn null indicator bit on. For non-nullable slots, the mask will be 0 and // this is a no-op (but we don't have to branch to check is slots are nulalble). diff --git a/be/src/runtime/tuple_row.cpp b/be/src/runtime/tuple_row.cpp deleted file mode 100644 index d4cd44a725..0000000000 --- a/be/src/runtime/tuple_row.cpp +++ /dev/null @@ -1,41 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. -// This file is copied from -// https://github.com/apache/impala/blob/branch-2.9.0/be/src/runtime/tuple-row.cpp -// and modified by Doris - -#include "runtime/tuple_row.h" - -#include - -namespace doris { - -std::string TupleRow::to_string(const RowDescriptor& d) { - std::stringstream out; - out << "["; - for (int i = 0; i < d.tuple_descriptors().size(); ++i) { - if (i != 0) { - out << " "; - } - out << Tuple::to_string(get_tuple(i), *d.tuple_descriptors()[i]); - } - - out << "]"; - return out.str(); -} - -} // namespace doris diff --git a/be/src/runtime/tuple_row.h b/be/src/runtime/tuple_row.h deleted file mode 100644 index cafd066404..0000000000 --- a/be/src/runtime/tuple_row.h +++ /dev/null @@ -1,117 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. -// This file is copied from -// https://github.com/apache/impala/blob/branch-2.9.0/be/src/runtime/tuple-row.h -// and modified by Doris - -#pragma once - -#include "runtime/descriptors.h" -#include "runtime/mem_pool.h" -#include "runtime/tuple.h" - -namespace doris { - -// A TupleRow encapsulates a contiguous sequence of Tuple pointers which -// together make up a row. -class TupleRow { -public: - Tuple* get_tuple(int tuple_idx) { return _tuples[tuple_idx]; } - - void set_tuple(int tuple_idx, Tuple* tuple) { _tuples[tuple_idx] = tuple; } - - static TupleRow* create(const std::vector& descs, MemPool* pool) { - int size = descs.size() * sizeof(Tuple*); - return reinterpret_cast(pool->allocate(size)); - } - - // Create a deep copy of this TupleRow. deep_copy will allocate from the pool. - TupleRow* deep_copy(const std::vector& descs, MemPool* pool) { - int size = descs.size() * sizeof(Tuple*); - TupleRow* result = reinterpret_cast(pool->allocate(size)); - deep_copy(result, descs, pool, false); - return result; - } - - // Create a deep copy of this TupleRow into 'dst'. deep_copy will allocate from - // the MemPool and copy the tuple pointers, the tuples and the string data in the - // tuples. - // If reuse_tuple_mem is true, it is assumed the dst TupleRow has already allocated - // tuple memory and that memory will be reused. Otherwise, new tuples will be allocated - // and stored in 'dst'. - void deep_copy(TupleRow* dst, const std::vector& descs, MemPool* pool, - bool reuse_tuple_mem) { - for (int i = 0; i < descs.size(); ++i) { - if (this->get_tuple(i) != nullptr) { - if (reuse_tuple_mem && dst->get_tuple(i) != nullptr) { - this->get_tuple(i)->deep_copy(dst->get_tuple(i), *descs[i], pool); - } else { - dst->set_tuple(i, this->get_tuple(i)->deep_copy(*descs[i], pool)); - } - } else { - // TODO: this is wasteful. If we have 'reuse_tuple_mem', we should be able - // to save the tuple buffer and reuse it (i.e. freelist). - dst->set_tuple(i, nullptr); - } - } - } - - TupleRow* dcopy_with_new(const std::vector& descs, MemPool* pool, - int64_t* bytes) { - int size = descs.size() * sizeof(Tuple*); - TupleRow* result = reinterpret_cast(pool->allocate(size)); - *bytes = dcopy_with_new(result, descs, pool, false); - return result; - } - - int64_t dcopy_with_new(TupleRow* dst, const std::vector& descs, MemPool* pool, - bool reuse_tuple_mem) { - int64_t bytes = 0; - for (int i = 0; i < descs.size(); ++i) { - Tuple* old_tuple = dst->get_tuple(i); - if (_tuples[i] != nullptr) { - if (reuse_tuple_mem && old_tuple != nullptr) { - bytes += _tuples[i]->dcopy_with_new(dst->get_tuple(i), *descs[i]); - } else { - int64_t new_bytes = 0; - dst->set_tuple(i, _tuples[i]->dcopy_with_new(*descs[i], pool, &new_bytes)); - bytes += new_bytes; - } - } else { - dst->set_tuple(i, nullptr); - } - } - return bytes; - } - - int64_t release_tuples(const std::vector& descs) { - int64_t bytes = 0; - for (int i = 0; i < descs.size(); ++i) { - if (_tuples[i] != nullptr) { - bytes += _tuples[i]->release_string(*descs[i]); - } - } - return bytes; - } - - std::string to_string(const RowDescriptor& d); - -private: - Tuple* _tuples[1]; -}; - -} // namespace doris diff --git a/be/src/service/internal_service.cpp b/be/src/service/internal_service.cpp index dc1e856b71..9f696d0177 100644 --- a/be/src/service/internal_service.cpp +++ b/be/src/service/internal_service.cpp @@ -624,8 +624,6 @@ Status PInternalServiceImpl::_fold_constant_expr(const std::string& ser_request, uint32_t len = ser_request.size(); RETURN_IF_ERROR(deserialize_thrift_msg(buf, &len, false, &t_request)); } - if (!t_request.__isset.vec_exec || !t_request.vec_exec) - return FoldConstantExecutor().fold_constant_expr(t_request, response); return FoldConstantExecutor().fold_constant_vexpr(t_request, response); } diff --git a/be/src/udf/udf_internal.h b/be/src/udf/udf_internal.h index 67a8ec60e7..8961ca64d6 100644 --- a/be/src/udf/udf_internal.h +++ b/be/src/udf/udf_internal.h @@ -117,7 +117,6 @@ public: private: friend class doris_udf::FunctionContext; - friend class ExprContext; /// Preallocated buffer for storing varargs (if the function has any). Allocated and /// owned by this object, but populated by an Expr function. diff --git a/be/src/util/arrow/block_convertor.cpp b/be/src/util/arrow/block_convertor.cpp index 1575f7008d..1bc1536463 100644 --- a/be/src/util/arrow/block_convertor.cpp +++ b/be/src/util/arrow/block_convertor.cpp @@ -35,7 +35,6 @@ #include #include -#include "exprs/slot_ref.h" #include "gutil/strings/substitute.h" #include "runtime/descriptor_helper.h" #include "runtime/descriptors.h" diff --git a/be/src/util/arrow/row_batch.cpp b/be/src/util/arrow/row_batch.cpp index 01b930b52e..dd93cb7b87 100644 --- a/be/src/util/arrow/row_batch.cpp +++ b/be/src/util/arrow/row_batch.cpp @@ -35,7 +35,6 @@ #include #include -#include "exprs/slot_ref.h" #include "gutil/strings/substitute.h" #include "runtime/descriptor_helper.h" #include "runtime/descriptors.h" diff --git a/be/src/util/tuple_row_zorder_compare.h b/be/src/util/tuple_row_zorder_compare.h index 9acbecf41e..39775a3527 100644 --- a/be/src/util/tuple_row_zorder_compare.h +++ b/be/src/util/tuple_row_zorder_compare.h @@ -17,14 +17,11 @@ #pragma once -#include "exprs/expr.h" -#include "exprs/expr_context.h" #include "olap/row_cursor.h" #include "olap/schema.h" #include "runtime/descriptors.h" #include "runtime/raw_value.h" #include "runtime/tuple.h" -#include "runtime/tuple_row.h" namespace doris { class RowComparator { diff --git a/be/src/vec/aggregate_functions/aggregate_function_hll_union_agg.h b/be/src/vec/aggregate_functions/aggregate_function_hll_union_agg.h index c41e7be7d9..5f76abd261 100644 --- a/be/src/vec/aggregate_functions/aggregate_function_hll_union_agg.h +++ b/be/src/vec/aggregate_functions/aggregate_function_hll_union_agg.h @@ -17,7 +17,6 @@ #pragma once -#include "exprs/hll_function.h" #include "olap/hll.h" #include "util/slice.h" #include "vec/aggregate_functions/aggregate_function.h" diff --git a/be/src/vec/core/block.cpp b/be/src/vec/core/block.cpp index 4202f76304..ca89e1715a 100644 --- a/be/src/vec/core/block.cpp +++ b/be/src/vec/core/block.cpp @@ -27,7 +27,6 @@ #include "common/status.h" #include "runtime/descriptors.h" #include "runtime/tuple.h" -#include "runtime/tuple_row.h" #include "udf/udf.h" #include "util/block_compression.h" #include "util/exception.h" diff --git a/be/src/vec/exec/data_gen_functions/vnumbers_tvf.cpp b/be/src/vec/exec/data_gen_functions/vnumbers_tvf.cpp index e00808fb63..b6324851cd 100644 --- a/be/src/vec/exec/data_gen_functions/vnumbers_tvf.cpp +++ b/be/src/vec/exec/data_gen_functions/vnumbers_tvf.cpp @@ -22,7 +22,6 @@ #include "exec/exec_node.h" #include "gen_cpp/PlanNodes_types.h" #include "runtime/runtime_state.h" -#include "runtime/tuple_row.h" #include "util/runtime_profile.h" #include "vec/common/string_ref.h" diff --git a/be/src/vec/exec/join/vhash_join_node.cpp b/be/src/vec/exec/join/vhash_join_node.cpp index 94be6ce0e7..7cdd7248e2 100644 --- a/be/src/vec/exec/join/vhash_join_node.cpp +++ b/be/src/vec/exec/join/vhash_join_node.cpp @@ -17,6 +17,7 @@ #include "vec/exec/join/vhash_join_node.h" +#include "exprs/bloom_filter_func.h" #include "exprs/runtime_filter_slots.h" #include "gen_cpp/PlanNodes_types.h" #include "gutil/strings/substitute.h" diff --git a/be/src/vec/exec/join/vnested_loop_join_node.cpp b/be/src/vec/exec/join/vnested_loop_join_node.cpp index 00ed59bcbd..e999a6c5f0 100644 --- a/be/src/vec/exec/join/vnested_loop_join_node.cpp +++ b/be/src/vec/exec/join/vnested_loop_join_node.cpp @@ -22,7 +22,6 @@ #include #include "common/status.h" -#include "exprs/expr.h" #include "exprs/runtime_filter_slots_cross.h" #include "gen_cpp/PlanNodes_types.h" #include "runtime/runtime_state.h" @@ -30,6 +29,7 @@ #include "util/simd/bits.h" #include "vec/columns/column_const.h" #include "vec/common/typeid_cast.h" +#include "vec/data_types/data_type_number.h" #include "vec/utils/template_helpers.hpp" #include "vec/utils/util.hpp" diff --git a/be/src/vec/exec/scan/new_es_scan_node.cpp b/be/src/vec/exec/scan/new_es_scan_node.cpp index b0ceeaf500..d513552857 100644 --- a/be/src/vec/exec/scan/new_es_scan_node.cpp +++ b/be/src/vec/exec/scan/new_es_scan_node.cpp @@ -17,7 +17,6 @@ #include "vec/exec/scan/new_es_scan_node.h" -#include "exec/es/es_query_builder.h" #include "exec/es/es_scroll_query.h" #include "vec/exec/scan/new_es_scanner.h" #include "vec/utils/util.hpp" diff --git a/be/src/vec/exec/scan/new_es_scan_node.h b/be/src/vec/exec/scan/new_es_scan_node.h index e57649a91e..857f1d0067 100644 --- a/be/src/vec/exec/scan/new_es_scan_node.h +++ b/be/src/vec/exec/scan/new_es_scan_node.h @@ -17,7 +17,6 @@ #pragma once -#include "exec/es/es_predicate.h" #include "vec/exec/scan/new_es_scanner.h" #include "vec/exec/scan/vscan_node.h" diff --git a/be/src/vec/exec/scan/vfile_scanner.cpp b/be/src/vec/exec/scan/vfile_scanner.cpp index 29a64edbe4..bc79e36506 100644 --- a/be/src/vec/exec/scan/vfile_scanner.cpp +++ b/be/src/vec/exec/scan/vfile_scanner.cpp @@ -26,7 +26,6 @@ #include "common/utils.h" #include "exec/arrow/orc_reader.h" #include "exec/text_converter.hpp" -#include "exprs/expr_context.h" #include "olap/iterators.h" #include "runtime/descriptors.h" #include "runtime/raw_value.h" diff --git a/be/src/vec/exec/scan/vfile_scanner.h b/be/src/vec/exec/scan/vfile_scanner.h index 8ba5174733..c69ac585d0 100644 --- a/be/src/vec/exec/scan/vfile_scanner.h +++ b/be/src/vec/exec/scan/vfile_scanner.h @@ -19,7 +19,6 @@ #include "exec/olap_common.h" #include "exec/text_converter.h" -#include "exprs/bloomfilter_predicate.h" #include "exprs/function_filter.h" #include "io/file_factory.h" #include "runtime/tuple.h" diff --git a/be/src/vec/exec/scan/vscan_node.cpp b/be/src/vec/exec/scan/vscan_node.cpp index 906f7a18d7..198e7ab0c7 100644 --- a/be/src/vec/exec/scan/vscan_node.cpp +++ b/be/src/vec/exec/scan/vscan_node.cpp @@ -19,6 +19,7 @@ #include "common/consts.h" #include "common/status.h" +#include "exprs/bloom_filter_func.h" #include "exprs/hybrid_set.h" #include "runtime/runtime_filter_mgr.h" #include "util/defer_op.h" @@ -560,7 +561,7 @@ Status VScanNode::_normalize_predicate(VExpr* conjunct_expr_root, VExpr** output (*_vconjunct_ctx_ptr)->get_function_state_scope()); } - // here do not close Expr* now + // here do not close VExpr* now *output_expr = left_child != nullptr ? left_child : right_child; return Status::OK(); } @@ -675,7 +676,7 @@ Status VScanNode::_eval_const_conjuncts(VExpr* vexpr, VExprContext* expr_ctx, Pu // But now we still don't cover all predicates for const expression. // For example, for query `SELECT col FROM tbl WHERE 'PROMOTION' LIKE 'AAA%'`, // predicate `like` will return a ColumnVector which contains a single value. - LOG(WARNING) << "Expr[" << vexpr->debug_string() + LOG(WARNING) << "VExpr[" << vexpr->debug_string() << "] should return a const column but actually is " << const_col_wrapper->column_ptr->get_name(); DCHECK_EQ(bool_column->size(), 1); @@ -691,7 +692,7 @@ Status VScanNode::_eval_const_conjuncts(VExpr* vexpr, VExprContext* expr_ctx, Pu << bool_column->size(); } } else { - LOG(WARNING) << "Expr[" << vexpr->debug_string() + LOG(WARNING) << "VExpr[" << vexpr->debug_string() << "] should return a const column but actually is " << const_col_wrapper->column_ptr->get_name(); } diff --git a/be/src/vec/exec/scan/vscanner.h b/be/src/vec/exec/scan/vscanner.h index 6ab1491191..a91b87c0b5 100644 --- a/be/src/vec/exec/scan/vscanner.h +++ b/be/src/vec/exec/scan/vscanner.h @@ -18,7 +18,6 @@ #pragma once #include "common/status.h" -#include "exprs/expr_context.h" #include "olap/tablet.h" #include "runtime/runtime_state.h" #include "vec/exprs/vexpr_context.h" diff --git a/be/src/vec/exec/varrow_scanner.cpp b/be/src/vec/exec/varrow_scanner.cpp index 236c336a09..0922b89e57 100644 --- a/be/src/vec/exec/varrow_scanner.cpp +++ b/be/src/vec/exec/varrow_scanner.cpp @@ -18,7 +18,6 @@ #include "vec/exec/varrow_scanner.h" #include "exec/arrow/parquet_reader.h" -#include "exprs/expr.h" #include "io/file_factory.h" #include "runtime/descriptors.h" #include "runtime/exec_env.h" diff --git a/be/src/vec/exec/vdata_gen_scan_node.cpp b/be/src/vec/exec/vdata_gen_scan_node.cpp index f30a2a14f5..74c2dd814c 100644 --- a/be/src/vec/exec/vdata_gen_scan_node.cpp +++ b/be/src/vec/exec/vdata_gen_scan_node.cpp @@ -22,7 +22,6 @@ #include "common/status.h" #include "gen_cpp/PlanNodes_types.h" #include "runtime/runtime_state.h" -#include "runtime/tuple_row.h" #include "util/runtime_profile.h" #include "vec/exec/data_gen_functions/vnumbers_tvf.h" diff --git a/be/src/vec/exec/vmysql_scan_node.cpp b/be/src/vec/exec/vmysql_scan_node.cpp index a7a77f29db..332fa0235b 100644 --- a/be/src/vec/exec/vmysql_scan_node.cpp +++ b/be/src/vec/exec/vmysql_scan_node.cpp @@ -21,7 +21,6 @@ #include "exec/text_converter.hpp" #include "gen_cpp/PlanNodes_types.h" #include "runtime/runtime_state.h" -#include "runtime/tuple_row.h" #include "util/runtime_profile.h" #include "util/types.h" #include "vec/common/string_ref.h" diff --git a/be/src/vec/exec/vschema_scan_node.cpp b/be/src/vec/exec/vschema_scan_node.cpp index f99de7ff85..9d26ddf03b 100644 --- a/be/src/vec/exec/vschema_scan_node.cpp +++ b/be/src/vec/exec/vschema_scan_node.cpp @@ -21,7 +21,6 @@ #include "exec/text_converter.hpp" #include "gen_cpp/PlanNodes_types.h" #include "runtime/runtime_state.h" -#include "runtime/tuple_row.h" #include "util/runtime_profile.h" #include "util/types.h" #include "vec/common/string_ref.h" diff --git a/be/src/vec/exec/vtable_function_node.cpp b/be/src/vec/exec/vtable_function_node.cpp index 5d2bb1eff5..32cfee7234 100644 --- a/be/src/vec/exec/vtable_function_node.cpp +++ b/be/src/vec/exec/vtable_function_node.cpp @@ -85,8 +85,6 @@ Status VTableFunctionNode::prepare(RuntimeState* state) { RETURN_IF_ERROR(ExecNode::prepare(state)); _num_rows_filtered_counter = ADD_COUNTER(_runtime_profile, "RowsFiltered", TUnit::UNIT); - - RETURN_IF_ERROR(Expr::prepare(_fn_ctxs, state, _row_descriptor)); for (auto fn : _fns) { RETURN_IF_ERROR(fn->prepare()); } diff --git a/be/src/vec/exec/vtable_function_node.h b/be/src/vec/exec/vtable_function_node.h index 7f6ff1be4b..99d2394514 100644 --- a/be/src/vec/exec/vtable_function_node.h +++ b/be/src/vec/exec/vtable_function_node.h @@ -39,7 +39,6 @@ public: bool need_more_input_data() const { return !_child_block.rows() && !_child_eos; } void release_resource(doris::RuntimeState* state) override { - Expr::close(_fn_ctxs, state); vectorized::VExpr::close(_vfn_ctxs, state); if (_num_rows_filtered_counter != nullptr) { @@ -110,7 +109,6 @@ private: std::vector _output_slots; int64_t _cur_child_offset = 0; - std::vector _fn_ctxs; std::vector _vfn_ctxs; std::vector _fns; diff --git a/be/src/vec/exprs/vbloom_predicate.cpp b/be/src/vec/exprs/vbloom_predicate.cpp index f154ab7856..9ca49a9c7e 100644 --- a/be/src/vec/exprs/vbloom_predicate.cpp +++ b/be/src/vec/exprs/vbloom_predicate.cpp @@ -18,6 +18,7 @@ #include "vec/exprs/vbloom_predicate.h" #include "common/status.h" +#include "exprs/bloom_filter_func.h" #include "vec/data_types/data_type_nullable.h" namespace doris::vectorized { diff --git a/be/src/vec/exprs/vbloom_predicate.h b/be/src/vec/exprs/vbloom_predicate.h index dd1218bb63..51835a19c0 100644 --- a/be/src/vec/exprs/vbloom_predicate.h +++ b/be/src/vec/exprs/vbloom_predicate.h @@ -17,7 +17,6 @@ #pragma once -#include "exprs/bloomfilter_predicate.h" #include "vec/exprs/vexpr.h" namespace doris::vectorized { diff --git a/be/src/vec/exprs/vcompound_pred.h b/be/src/vec/exprs/vcompound_pred.h index 156c10ce54..0068869443 100644 --- a/be/src/vec/exprs/vcompound_pred.h +++ b/be/src/vec/exprs/vcompound_pred.h @@ -43,7 +43,7 @@ public: VcompoundPred(const TExprNode& node) : VectorizedFnCall(node) { _op = node.opcode; _fn.name.function_name = compound_operator_to_string(_op); - _expr_name = "CompoundPredicate (" + _fn.name.function_name + ")"; + _expr_name = "VCompoundPredicate (" + _fn.name.function_name + ")"; } VExpr* clone(ObjectPool* pool) const override { return pool->add(new VcompoundPred(*this)); } diff --git a/be/src/vec/exprs/vectorized_fn_call.cpp b/be/src/vec/exprs/vectorized_fn_call.cpp index 465b0e34f1..8a41e332b9 100644 --- a/be/src/vec/exprs/vectorized_fn_call.cpp +++ b/be/src/vec/exprs/vectorized_fn_call.cpp @@ -22,7 +22,6 @@ #include "common/consts.h" #include "common/status.h" #include "exprs/anyval_util.h" -#include "exprs/rpc_fn.h" #include "fmt/format.h" #include "fmt/ranges.h" #include "udf/udf_internal.h" diff --git a/be/src/vec/exprs/vexpr.h b/be/src/vec/exprs/vexpr.h index 9d0e5ce42d..cd6a61a1c9 100644 --- a/be/src/vec/exprs/vexpr.h +++ b/be/src/vec/exprs/vexpr.h @@ -22,7 +22,6 @@ #include "common/status.h" #include "exprs/bitmapfilter_predicate.h" -#include "exprs/bloomfilter_predicate.h" #include "exprs/hybrid_set.h" #include "gen_cpp/Exprs_types.h" #include "runtime/types.h" @@ -77,7 +76,7 @@ public: /// thread-local state should be initialized. Otherwise, if scope is THREAD_LOCAL, only /// thread-local state should be initialized. // - /// Subclasses overriding this function should call Expr::Open() to recursively call + /// Subclasses overriding this function should call VExpr::Open() to recursively call /// Open() on the expr tree virtual Status open(RuntimeState* state, VExprContext* context, FunctionContext::FunctionStateScope scope); diff --git a/be/src/vec/functions/hll_cardinality.cpp b/be/src/vec/functions/hll_cardinality.cpp index 4c192ca5e1..02a5982890 100644 --- a/be/src/vec/functions/hll_cardinality.cpp +++ b/be/src/vec/functions/hll_cardinality.cpp @@ -15,7 +15,6 @@ // specific language governing permissions and limitations // under the License. -#include "exprs/hll_function.h" #include "udf/udf.h" #include "vec/columns/column_complex.h" #include "vec/data_types/number_traits.h" diff --git a/be/src/vec/functions/hll_empty.cpp b/be/src/vec/functions/hll_empty.cpp index 145a947478..a62a4d81bf 100644 --- a/be/src/vec/functions/hll_empty.cpp +++ b/be/src/vec/functions/hll_empty.cpp @@ -15,7 +15,6 @@ // specific language governing permissions and limitations // under the License. -#include "exprs/hll_function.h" #include "olap/hll.h" #include "vec/columns/column_complex.h" #include "vec/data_types/data_type_hll.h" diff --git a/be/src/vec/functions/least_greast.cpp b/be/src/vec/functions/least_greast.cpp index 49ec867622..072ba8cbd5 100644 --- a/be/src/vec/functions/least_greast.cpp +++ b/be/src/vec/functions/least_greast.cpp @@ -19,6 +19,7 @@ #include "vec/columns/columns_number.h" #include "vec/core/accurate_comparison.h" #include "vec/data_types/data_type.h" +#include "vec/data_types/data_type_number.h" #include "vec/functions/function_helpers.h" #include "vec/functions/function_multi_same_args.h" #include "vec/functions/simple_function_factory.h" diff --git a/be/src/vec/runtime/vfile_result_writer.cpp b/be/src/vec/runtime/vfile_result_writer.cpp index 256bb8bf19..2782008b34 100644 --- a/be/src/vec/runtime/vfile_result_writer.cpp +++ b/be/src/vec/runtime/vfile_result_writer.cpp @@ -19,7 +19,6 @@ #include "common/consts.h" #include "common/status.h" -#include "exprs/expr_context.h" #include "gutil/strings/numbers.h" #include "gutil/strings/substitute.h" #include "io/file_factory.h" diff --git a/be/src/vec/sink/vdata_stream_sender.h b/be/src/vec/sink/vdata_stream_sender.h index b9fd79fc91..3d1ab816ce 100644 --- a/be/src/vec/sink/vdata_stream_sender.h +++ b/be/src/vec/sink/vdata_stream_sender.h @@ -38,7 +38,6 @@ class ObjectPool; class RuntimeState; class RuntimeProfile; class BufferControlBlock; -class ExprContext; class MemTracker; class PartRangeKey; @@ -219,11 +218,6 @@ public: // Returns OK if successful, error indication otherwise. Status init(RuntimeState* state); - // Copies a single row into this channel's output buffer and flushes buffer - // if it reaches capacity. - // Returns error status if any of the preceding rpcs failed, OK otherwise. - //Status add_row(TupleRow* row); - // Asynchronously sends a row batch. // Returns the status of the most recently finished transmit_data // rpc (or OK if there wasn't one that hasn't been reported yet). diff --git a/be/src/vec/sink/vmemory_scratch_sink.cpp b/be/src/vec/sink/vmemory_scratch_sink.cpp index 094ec19e0a..79fdbb2425 100644 --- a/be/src/vec/sink/vmemory_scratch_sink.cpp +++ b/be/src/vec/sink/vmemory_scratch_sink.cpp @@ -22,7 +22,6 @@ #include -#include "exprs/expr.h" #include "gen_cpp/Types_types.h" #include "runtime/exec_env.h" #include "runtime/primitive_type.h" diff --git a/be/src/vec/sink/vmysql_table_writer.cpp b/be/src/vec/sink/vmysql_table_writer.cpp index cbba836377..03a38e302b 100644 --- a/be/src/vec/sink/vmysql_table_writer.cpp +++ b/be/src/vec/sink/vmysql_table_writer.cpp @@ -21,7 +21,6 @@ #include -#include "exprs/expr.h" #include "util/types.h" #include "vec/columns/column_nullable.h" #include "vec/core/block.h" diff --git a/be/src/vec/sink/vresult_sink.h b/be/src/vec/sink/vresult_sink.h index 66fb675def..9c02877571 100644 --- a/be/src/vec/sink/vresult_sink.h +++ b/be/src/vec/sink/vresult_sink.h @@ -24,7 +24,6 @@ class ObjectPool; class RuntimeState; class RuntimeProfile; class BufferControlBlock; -class ExprContext; class ResultWriter; class MemTracker; struct ResultFileOptions; diff --git a/be/src/vec/sink/vtablet_sink.cpp b/be/src/vec/sink/vtablet_sink.cpp index 32bb070d22..0899da3fb9 100644 --- a/be/src/vec/sink/vtablet_sink.cpp +++ b/be/src/vec/sink/vtablet_sink.cpp @@ -23,13 +23,10 @@ #include #include "exec/tablet_info.h" -#include "exprs/expr.h" -#include "exprs/expr_context.h" #include "olap/hll.h" #include "runtime/exec_env.h" #include "runtime/runtime_state.h" #include "runtime/thread_context.h" -#include "runtime/tuple_row.h" #include "service/backend_options.h" #include "util/brpc_client_cache.h" #include "util/debug/sanitizer_scopes.h" @@ -1248,8 +1245,6 @@ Status VOlapTableSink::close(RuntimeState* state, Status exec_status) { _send_batch_thread_pool_token->wait(); } - Expr::close(_output_expr_ctxs, state); - _close_status = status; DataSink::close(state, exec_status); return status; diff --git a/be/src/vec/sink/vtablet_sink.h b/be/src/vec/sink/vtablet_sink.h index b62544cb7c..04809a6eca 100644 --- a/be/src/vec/sink/vtablet_sink.h +++ b/be/src/vec/sink/vtablet_sink.h @@ -30,7 +30,6 @@ #include "common/status.h" #include "exec/data_sink.h" #include "exec/tablet_info.h" -#include "exprs/expr_context.h" #include "gen_cpp/Types_types.h" #include "gen_cpp/internal_service.pb.h" #include "runtime/thread_context.h" @@ -535,8 +534,6 @@ private: enum FindTabletMode { FIND_TABLET_EVERY_ROW, FIND_TABLET_EVERY_BATCH, FIND_TABLET_EVERY_SINK }; FindTabletMode findTabletMode = FindTabletMode::FIND_TABLET_EVERY_ROW; - std::vector _output_expr_ctxs; - VOlapTablePartitionParam* _vpartition = nullptr; std::vector _output_vexpr_ctxs; }; diff --git a/be/src/vec/utils/util.hpp b/be/src/vec/utils/util.hpp index 80574d0ae5..22f1d5d362 100644 --- a/be/src/vec/utils/util.hpp +++ b/be/src/vec/utils/util.hpp @@ -109,7 +109,6 @@ public: expr->close(state, context, context->get_function_state_scope()); } - // here do not close Expr* now return left_child != nullptr ? left_child : right_child; } } diff --git a/be/test/CMakeLists.txt b/be/test/CMakeLists.txt index 214ed42032..5f7340263f 100644 --- a/be/test/CMakeLists.txt +++ b/be/test/CMakeLists.txt @@ -57,12 +57,10 @@ set(EXPRS_TEST_FILES exprs/percentile_approx_test.cpp exprs/percentile_test.cpp exprs/bitmap_function_test.cpp - exprs/hll_function_test.cpp exprs/encryption_functions_test.cpp exprs/math_functions_test.cpp exprs/topn_function_test.cpp exprs/bloom_filter_predicate_test.cpp - exprs/array_functions_test.cpp exprs/quantile_function_test.cpp exprs/window_funnel_test.cpp exprs/hash_function_test.cpp diff --git a/be/test/exprs/array_functions_test.cpp b/be/test/exprs/array_functions_test.cpp deleted file mode 100644 index a0fe04a0ef..0000000000 --- a/be/test/exprs/array_functions_test.cpp +++ /dev/null @@ -1,77 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -#include "exprs/array_functions.h" - -#include - -#include "gmock/gmock.h" -#include "runtime/collection_value.h" -#include "runtime/free_pool.hpp" -#include "string" -#include "testutil/function_utils.h" -#include "udf/udf.h" -#include "udf/udf_internal.h" - -#define private public - -namespace doris { - -class ArrayFunctionsTest : public testing::Test { -public: - ArrayFunctionsTest() { - _utils = new FunctionUtils(); - _context = _utils->get_fn_ctx(); - } - ~ArrayFunctionsTest() { delete _utils; } - -public: - FunctionUtils* _utils; - FunctionContext* _context; -}; - -TEST_F(ArrayFunctionsTest, array) { - // Int array - { - FunctionContext::TypeDesc childTypeDesc {}; - childTypeDesc.type = FunctionContext::TYPE_INT; - - _context->impl()->_return_type.type = FunctionContext::TYPE_ARRAY; - _context->impl()->_return_type.children.clear(); - _context->impl()->_return_type.children.push_back(childTypeDesc); - - IntVal v[10]; - - for (int i = 0; i < 10; ++i) { - v[i].val = i + 1; - } - - CollectionVal cv = ArrayFunctions::array(_context, 10, v); - - CollectionValue value = CollectionValue::from_collection_val(cv); - - int i = 0; - for (auto&& iter = value.iterator(TYPE_INT); iter.has_next(); iter.next()) { - i++; - IntVal a; - iter.get(&a); - EXPECT_EQ(i, a.val); - } - } -} - -} // namespace doris diff --git a/be/test/exprs/binary_predicate_test.cpp b/be/test/exprs/binary_predicate_test.cpp deleted file mode 100644 index af96e332cb..0000000000 --- a/be/test/exprs/binary_predicate_test.cpp +++ /dev/null @@ -1,158 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -#include "exprs/binary_predicate.h" - -#include - -#include "common/object_pool.h" -#include "exec/exec_node.h" -#include "exprs/expr.h" -#include "exprs/int_literal.h" -#include "gen_cpp/Exprs_types.h" -#include "runtime/runtime_state.h" -#include "util/debug_util.h" - -namespace doris { - -class BinaryOpTest : public ::testing::Test { -public: - ~BinaryOpTest() {} - - virtual void SetUp() { - _object_pool = new ObjectPool(); - _runtime_state = _object_pool->add(new RuntimeState("")); - - TDescriptorTable ttbl; - TTupleDescriptor tuple_desc; - tuple_desc.__set_id(0); - tuple_desc.__set_byteSize(8); - tuple_desc.__set_numNullBytes(0); - ttbl.tupleDescriptors.push_back(tuple_desc); - - TSlotDescriptor slot_desc; - slot_desc.__set_id(0); - slot_desc.__set_parent(0); - slot_desc.__set_slotType(TPrimitiveType::INT); - slot_desc.__set_columnPos(0); - slot_desc.__set_byteOffset(4); - slot_desc.__set_nullIndicatorByte(0); - slot_desc.__set_nullIndicatorBit(0); - slot_desc.__set_colName("col1"); - slot_desc.__set_slotIdx(0); - slot_desc.__set_isMaterialized(true); - ttbl.slotDescriptors.push_back(slot_desc); - - DescriptorTbl* desc_tbl = nullptr; - EXPECT_TRUE(DescriptorTbl::create(_object_pool, ttbl, &desc_tbl).ok()); - EXPECT_TRUE(desc_tbl != nullptr); - _runtime_state->set_desc_tbl(desc_tbl); - - std::vector row_tuples; - row_tuples.push_back(0); - std::vector nullable_tuples; - nullable_tuples.push_back(false); - _row_desc = _object_pool->add(new RowDescriptor(*desc_tbl, row_tuples, nullable_tuples)); - - FieldInfo field; - field.name = "col1"; - field.type = OLAP_FIELD_TYPE_INT; - field.length = 4; - field.is_key = true; - _schema.push_back(field); - } - virtual void TearDown() { - if (_object_pool != nullptr) { - delete _object_pool; - _object_pool = nullptr; - } - } - - Expr* create_expr() { - TExpr exprs; - { - TExprNode expr_node; - expr_node.__set_node_type(TExprNodeType::BINARY_PRED); - TColumnType type; - type.__set_type(TPrimitiveType::INT); - expr_node.__set_type(type); - expr_node.__set_num_children(2); - expr_node.__isset.opcode = true; - expr_node.__set_opcode(TExprOpcode::LT_INT_INT); - expr_node.__isset.vector_opcode = true; - expr_node.__set_vector_opcode(TExprOpcode::FILTER_LT_INT_INT); - exprs.nodes.push_back(expr_node); - } - { - TExprNode expr_node; - expr_node.__set_node_type(TExprNodeType::SLOT_REF); - TColumnType type; - type.__set_type(TPrimitiveType::INT); - expr_node.__set_type(type); - expr_node.__set_num_children(0); - expr_node.__isset.slot_ref = true; - TSlotRef slot_ref; - slot_ref.__set_slot_id(0); - slot_ref.__set_tuple_id(0); - expr_node.__set_slot_ref(slot_ref); - expr_node.__isset.output_column = true; - expr_node.__set_output_column(0); - exprs.nodes.push_back(expr_node); - } - { - TExprNode expr_node; - expr_node.__set_node_type(TExprNodeType::INT_LITERAL); - TColumnType type; - type.__set_type(TPrimitiveType::INT); - expr_node.__set_type(type); - expr_node.__set_num_children(0); - expr_node.__isset.int_literal = true; - TIntLiteral int_literal; - int_literal.__set_value(10); - expr_node.__set_int_literal(int_literal); - exprs.nodes.push_back(expr_node); - } - Expr* root_expr = nullptr; - - if (Expr::create_expr_tree(_object_pool, exprs, &root_expr).ok()) { - return root_expr; - } else { - return nullptr; - } - } - -public: - ObjectPool* object_pool() { return _object_pool; } - RuntimeState* runtime_state() { return _runtime_state; } - RowDescriptor* row_desc() { return _row_desc; } - -private: - ObjectPool* _object_pool; - RuntimeState* _runtime_state; - RowDescriptor* _row_desc; - std::vector _schema; -}; - -TEST_F(BinaryOpTest, PrepareTest) { - Expr* expr = create_expr(); - EXPECT_TRUE(expr != nullptr); - EXPECT_TRUE(expr->prepare(runtime_state(), *row_desc()).ok()); -} - -} // namespace doris - -/* vim: set expandtab ts=4 sw=4 sts=4 tw=100: */ diff --git a/be/test/exprs/bloom_filter_predicate_test.cpp b/be/test/exprs/bloom_filter_predicate_test.cpp index ef87562b86..782cc30245 100644 --- a/be/test/exprs/bloom_filter_predicate_test.cpp +++ b/be/test/exprs/bloom_filter_predicate_test.cpp @@ -17,7 +17,6 @@ #include -#include "exprs/bloomfilter_predicate.h" #include "exprs/create_predicate_function.h" #include "gtest/gtest.h" #include "vec/common/string_ref.h" diff --git a/be/test/exprs/hll_function_test.cpp b/be/test/exprs/hll_function_test.cpp deleted file mode 100644 index 23e7a0dbc9..0000000000 --- a/be/test/exprs/hll_function_test.cpp +++ /dev/null @@ -1,109 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -#include "exprs/hll_function.h" - -#include - -#include -#include - -#include "exprs/aggregate_functions.h" -#include "exprs/anyval_util.h" -#include "olap/hll.h" -#include "testutil/function_utils.h" - -namespace doris { - -StringVal convert_hll_to_string(FunctionContext* ctx, HyperLogLog& hll) { - std::string buf; - buf.resize(HLL_COLUMN_DEFAULT_LEN); - int size = hll.serialize((uint8_t*)buf.c_str()); - buf.resize(size); - return AnyValUtil::from_string_temp(ctx, buf); -} - -class HllFunctionsTest : public testing::Test { -public: - HllFunctionsTest() = default; - - void SetUp() { - utils = new FunctionUtils(); - ctx = utils->get_fn_ctx(); - } - void TearDown() { delete utils; } - -private: - FunctionUtils* utils; - FunctionContext* ctx; -}; - -TEST_F(HllFunctionsTest, hll_hash) { - StringVal input = AnyValUtil::from_string_temp(ctx, std::string("1024")); - StringVal result = HllFunctions::hll_hash(ctx, input); - - HyperLogLog hll(Slice(result.ptr, result.len)); - int64_t cardinality = hll.estimate_cardinality(); - int64_t expected = 1; - - EXPECT_EQ(expected, cardinality); -} - -TEST_F(HllFunctionsTest, hll_hash_null) { - StringVal input = StringVal::null(); - StringVal result = HllFunctions::hll_hash(ctx, input); - - HyperLogLog hll(Slice(result.ptr, result.len)); - int64_t cardinality = hll.estimate_cardinality(); - int64_t expected = 0; - - EXPECT_EQ(expected, cardinality); -} - -TEST_F(HllFunctionsTest, hll_update) { - StringVal dst; - HllFunctions::hll_init(ctx, &dst); - IntVal src1(1); - HllFunctions::hll_update(ctx, src1, &dst); - IntVal src2(1234567); - HllFunctions::hll_update(ctx, src2, &dst); - - BigIntVal result = HllFunctions::hll_finalize(ctx, dst); - BigIntVal expected(2); - EXPECT_EQ(expected, result); -} - -TEST_F(HllFunctionsTest, hll_merge) { - StringVal dst; - HllFunctions::hll_init(ctx, &dst); - - HyperLogLog hll1(1024); - StringVal src1 = convert_hll_to_string(ctx, hll1); - HllFunctions::hll_merge(ctx, src1, &dst); - - HyperLogLog hll2; - StringVal src2 = convert_hll_to_string(ctx, hll2); - HllFunctions::hll_merge(ctx, src2, &dst); - - StringVal serialized = HllFunctions::hll_serialize(ctx, dst); - HyperLogLog hll(Slice(serialized.ptr, serialized.len)); - - BigIntVal expected(1); - EXPECT_EQ(expected, hll.estimate_cardinality()); -} - -} // namespace doris diff --git a/be/test/exprs/in_op_test.cpp b/be/test/exprs/in_op_test.cpp deleted file mode 100644 index fc49b4dd5b..0000000000 --- a/be/test/exprs/in_op_test.cpp +++ /dev/null @@ -1,149 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -#include - -#include "common/object_pool.h" -#include "exec/exec_node.h" -#include "exprs/expr.h" -#include "exprs/in_predicate.h" -#include "exprs/int_literal.h" -#include "gen_cpp/Exprs_types.h" -#include "runtime/runtime_state.h" -#include "util/debug_util.h" - -namespace doris { - -class InOpTest : public ::testing::Test { -public: - ~InOpTest() {} - InOpTest() : _object_pool(nullptr), _runtime_state(nullptr), _row_desc(nullptr) {} - virtual void SetUp() { - _object_pool = new ObjectPool(); - _runtime_state = _object_pool->add(new RuntimeState("")); - - TDescriptorTable ttbl; - TTupleDescriptor tuple_desc; - tuple_desc.__set_id(0); - tuple_desc.__set_byteSize(8); - tuple_desc.__set_numNullBytes(0); - ttbl.tupleDescriptors.push_back(tuple_desc); - - TSlotDescriptor slot_desc; - slot_desc.__set_id(0); - slot_desc.__set_parent(0); - slot_desc.__set_slotType(TPrimitiveType::INT); - slot_desc.__set_columnPos(0); - slot_desc.__set_byteOffset(4); - slot_desc.__set_nullIndicatorByte(0); - slot_desc.__set_nullIndicatorBit(0); - slot_desc.__set_colName("col1"); - slot_desc.__set_slotIdx(0); - slot_desc.__set_isMaterialized(true); - ttbl.slotDescriptors.push_back(slot_desc); - - DescriptorTbl* desc_tbl = nullptr; - EXPECT_TRUE(DescriptorTbl::create(_object_pool, ttbl, &desc_tbl).ok()); - EXPECT_TRUE(desc_tbl != nullptr); - _runtime_state->set_desc_tbl(desc_tbl); - - std::vector row_tuples; - row_tuples.push_back(0); - std::vector nullable_tuples; - nullable_tuples.push_back(false); - _row_desc = _object_pool->add(new RowDescriptor(*desc_tbl, row_tuples, nullable_tuples)); - } - virtual void TearDown() { - if (_object_pool != nullptr) { - delete _object_pool; - _object_pool = nullptr; - } - } - - Expr* create_expr() { - TExpr exprs; - int num_children = 128; - { - TExprNode expr_node; - expr_node.__set_node_type(TExprNodeType::IN_PRED); - TColumnType type; - type.__set_type(TPrimitiveType::INT); - expr_node.__isset.in_predicate = true; - expr_node.in_predicate.__set_is_not_in(false); - expr_node.__set_type(type); - expr_node.__set_num_children(num_children + 1); - expr_node.__isset.opcode = true; - expr_node.__set_opcode(TExprOpcode::INVALID_OPCODE); - expr_node.__isset.vector_opcode = true; - expr_node.__set_vector_opcode(TExprOpcode::FILTER_IN_INT); - exprs.nodes.push_back(expr_node); - } - { - TExprNode expr_node; - expr_node.__set_node_type(TExprNodeType::SLOT_REF); - TColumnType type; - type.__set_type(TPrimitiveType::INT); - expr_node.__set_type(type); - expr_node.__set_num_children(0); - expr_node.__isset.slot_ref = true; - TSlotRef slot_ref; - slot_ref.__set_slot_id(0); - slot_ref.__set_tuple_id(0); - expr_node.__set_slot_ref(slot_ref); - expr_node.__isset.output_column = true; - expr_node.__set_output_column(0); - exprs.nodes.push_back(expr_node); - } - - for (int i = 0; i < num_children; ++i) { - TExprNode expr_node; - expr_node.__set_node_type(TExprNodeType::INT_LITERAL); - TColumnType type; - type.__set_type(TPrimitiveType::INT); - expr_node.__set_type(type); - expr_node.__set_num_children(0); - expr_node.__isset.int_literal = true; - TIntLiteral int_literal; - int_literal.__set_value(i); - expr_node.__set_int_literal(int_literal); - exprs.nodes.push_back(expr_node); - } - - Expr* root_expr = nullptr; - - if (Expr::create_expr_tree(_object_pool, exprs, &root_expr).ok()) { - return root_expr; - } else { - return nullptr; - } - } - -private: - ObjectPool* _object_pool; - RuntimeState* _runtime_state; - RowDescriptor* _row_desc; -}; - -TEST_F(InOpTest, PrepareTest) { - Expr* expr = create_expr(); - EXPECT_TRUE(expr != nullptr); - EXPECT_TRUE(expr->prepare(_runtime_state, *_row_desc).ok()); -} - -} // namespace doris - -/* vim: set expandtab ts=4 sw=4 sts=4 tw=100: */ diff --git a/be/test/exprs/in_predicate_test.cpp b/be/test/exprs/in_predicate_test.cpp deleted file mode 100644 index 20bb32a045..0000000000 --- a/be/test/exprs/in_predicate_test.cpp +++ /dev/null @@ -1,129 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -#include "exprs/in_predicate.h" - -#include - -#include - -#include "common/object_pool.h" -#include "gen_cpp/Exprs_types.h" -#include "gen_cpp/Types_types.h" -#include "runtime/runtime_state.h" - -namespace doris { - -// mock -class InPredicateTest : public testing::Test { -public: - InPredicateTest() : _runtime_stat("abc") { - _in_node.node_type = TExprNodeType::IN_PRED; - _in_node.type = TColumnType(); - _in_node.num_children = 0; - _in_node.in_predicate.is_not_in = false; - _in_node.__isset.in_predicate = true; - - _tuple_row._tuples[0] = (Tuple*)&_data; - } - void init_in_pre(InPredicate* in_pre) { - in_pre->_children.push_back(_obj_pool.add(new SlotRef(TYPE_INT, 0))); - - for (int i = 0; i < 100; ++i) { - in_pre->_children.push_back(Expr::create_literal(&_obj_pool, TYPE_INT, &i)); - } - - in_pre->_children.push_back(_obj_pool.add(new SlotRef(TYPE_INT, 4))); - } - -protected: - virtual void SetUp() { _data[0] = _data[1] = -1; } - virtual void TearDown() {} - -private: - TExprNode _in_node; - ObjectPool _obj_pool; - RuntimeState _runtime_stat; - RowDescriptor _row_desc; - int _data[2]; - TupleRow _tuple_row; -}; - -TEST_F(InPredicateTest, push_100_const) { - InPredicate in_pre(_in_node); - in_pre._children.push_back(_obj_pool.add(new SlotRef(TYPE_INT, 0))); - Status status = in_pre.prepare(&_runtime_stat, _row_desc); - EXPECT_TRUE(status.ok()); - - for (int i = 0; i < 100; ++i) { - in_pre.insert(&i); - } - - EXPECT_EQ(100, in_pre._hybird_set->size()); - EXPECT_EQ(1, in_pre._children.size()); - - for (int i = 0; i < 100; ++i) { - _data[0] = i; - EXPECT_TRUE(*(bool*)in_pre.get_value(&_tuple_row)); - } - - _data[0] = 101; - EXPECT_FALSE(*(bool*)in_pre.get_value(&_tuple_row)); -} - -TEST_F(InPredicateTest, no_child) { - InPredicate in_pre(_in_node); - Status status = in_pre.prepare(&_runtime_stat, _row_desc); - EXPECT_FALSE(status.ok()); -} -TEST_F(InPredicateTest, diff_type) { - InPredicate in_pre(_in_node); - SlotRef* slot_ref = _obj_pool.add(new SlotRef(TYPE_BOOLEAN, 0)); - in_pre._children.push_back(slot_ref); - - for (int i = 0; i < 100; ++i) { - in_pre._children.push_back(Expr::create_literal(&_obj_pool, TYPE_INT, &i)); - } - - Status status = in_pre.prepare(&_runtime_stat, _row_desc); - EXPECT_FALSE(status.ok()); -} - -TEST_F(InPredicateTest, 100_const) { - InPredicate in_pre(_in_node); - init_in_pre(&in_pre); - Status status = in_pre.prepare(&_runtime_stat, _row_desc); - EXPECT_TRUE(status.ok()); - status = in_pre.prepare(&_runtime_stat, _row_desc); - EXPECT_TRUE(status.ok()); - EXPECT_EQ(100, in_pre._hybird_set->size()); - EXPECT_EQ(2, in_pre._children.size()); - - for (int i = 0; i < 100; ++i) { - _data[0] = i; - EXPECT_TRUE(*(bool*)in_pre.get_value(&_tuple_row)); - } - - _data[0] = 101; - EXPECT_FALSE(*(bool*)in_pre.get_value(&_tuple_row)); - _data[1] = 101; - EXPECT_TRUE(*(bool*)in_pre.get_value(&_tuple_row)); -} - -} // namespace doris - -\n \ No newline at end of file diff --git a/be/test/exprs/runtime_filter_test.cpp b/be/test/exprs/runtime_filter_test.cpp new file mode 100644 index 0000000000..5f59ab0d7f --- /dev/null +++ b/be/test/exprs/runtime_filter_test.cpp @@ -0,0 +1,119 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include "exprs/runtime_filter.h" + +#include +#include + +#include "exprs/bloom_filter_func.h" +#include "gen_cpp/Planner_types.h" +#include "gen_cpp/Types_types.h" +#include "gmock/gmock.h" +#include "gtest/gtest.h" +#include "runtime/exec_env.h" +#include "runtime/runtime_filter_mgr.h" +#include "runtime/runtime_state.h" + +namespace doris { +TTypeDesc create_type_desc(PrimitiveType type, int precision, int scale); + +class RuntimeFilterTest : public testing::Test { +public: + RuntimeFilterTest() {} + virtual void SetUp() { + ExecEnv* exec_env = ExecEnv::GetInstance(); + exec_env = nullptr; + _runtime_stat.reset( + new RuntimeState(_fragment_id, _query_options, _query_globals, exec_env)); + _runtime_stat->init_mem_trackers(); + } + virtual void TearDown() { _obj_pool.clear(); } + +private: + ObjectPool _obj_pool; + TUniqueId _fragment_id; + TQueryOptions _query_options; + TQueryGlobals _query_globals; + + std::unique_ptr _runtime_stat; + // std::unique_ptr _runtime_filter; +}; + +IRuntimeFilter* create_runtime_filter(TRuntimeFilterType::type type, TQueryOptions* options, + RuntimeState* _runtime_stat, ObjectPool* _obj_pool) { + TRuntimeFilterDesc desc; + desc.__set_filter_id(0); + desc.__set_expr_order(0); + desc.__set_has_local_targets(true); + desc.__set_has_remote_targets(false); + desc.__set_is_broadcast_join(true); + desc.__set_type(type); + desc.__set_bloom_filter_size_bytes(4096); + + // build src expr context + + { + TExpr build_expr; + TExprNode expr_node; + expr_node.__set_node_type(TExprNodeType::SLOT_REF); + expr_node.__set_type(create_type_desc(TYPE_INT)); + expr_node.__set_num_children(0); + expr_node.__isset.slot_ref = true; + TSlotRef slot_ref; + slot_ref.__set_slot_id(0); + slot_ref.__set_tuple_id(0); + expr_node.__set_slot_ref(slot_ref); + expr_node.__isset.output_column = true; + expr_node.__set_output_column(0); + build_expr.nodes.push_back(expr_node); + desc.__set_src_expr(build_expr); + } + // build dst expr + { + TExpr target_expr; + TExprNode expr_node; + expr_node.__set_node_type(TExprNodeType::SLOT_REF); + expr_node.__set_type(create_type_desc(TYPE_INT)); + expr_node.__set_num_children(0); + expr_node.__isset.slot_ref = true; + TSlotRef slot_ref; + slot_ref.__set_slot_id(0); + slot_ref.__set_tuple_id(0); + expr_node.__set_slot_ref(slot_ref); + expr_node.__isset.output_column = true; + expr_node.__set_output_column(0); + target_expr.nodes.push_back(expr_node); + std::map planid_to_target_expr = {{0, target_expr}}; + desc.__set_planId_to_target_expr(planid_to_target_expr); + } + + IRuntimeFilter* runtime_filter = nullptr; + Status status = IRuntimeFilter::create(_runtime_stat, _obj_pool, &desc, options, + RuntimeFilterRole::PRODUCER, -1, &runtime_filter); + + EXPECT_TRUE(status.ok()) << status.to_string(); + + if (auto bf = runtime_filter->get_bloomfilter()) { + status = bf->init_with_fixed_length(); + EXPECT_TRUE(status.ok()) << status.to_string(); + } + + return status.ok() ? runtime_filter : nullptr; +} + +} // namespace doris diff --git a/be/test/exprs/topn_function_test.cpp b/be/test/exprs/topn_function_test.cpp index fd6e642d52..fd390a2e9f 100644 --- a/be/test/exprs/topn_function_test.cpp +++ b/be/test/exprs/topn_function_test.cpp @@ -22,7 +22,6 @@ #include #include "exprs/anyval_util.h" -#include "exprs/expr_context.h" #include "testutil/function_utils.h" #include "testutil/test_util.h" #include "util/topn_counter.h" diff --git a/be/test/runtime/data_spliter_test.cpp b/be/test/runtime/data_spliter_test.cpp index b497978e3b..2bbf9b2439 100644 --- a/be/test/runtime/data_spliter_test.cpp +++ b/be/test/runtime/data_spliter_test.cpp @@ -28,7 +28,6 @@ #include "runtime/dpp_sink_internal.h" #include "runtime/runtime_state.h" #include "runtime/tuple.h" -#include "runtime/tuple_row.h" #include "util/file_utils.h" namespace doris { diff --git a/be/test/runtime/mem_pool_test.cpp b/be/test/runtime/mem_pool_test.cpp index 15fe9999b5..f495780e14 100644 --- a/be/test/runtime/mem_pool_test.cpp +++ b/be/test/runtime/mem_pool_test.cpp @@ -83,13 +83,6 @@ TEST(MemPoolTest, Basic) { p3.acquire_data(&p2, true); // we're keeping the 65k chunk EXPECT_EQ(33 * 1024, p2.total_allocated_bytes()); EXPECT_EQ(256 * 1024, p2.total_reserved_bytes()); - - { - MemPool p4; - p4.exchange_data(&p2); - EXPECT_EQ(33 * 1024, p4.total_allocated_bytes()); - EXPECT_EQ(256 * 1024, p4.total_reserved_bytes()); - } } // Test that we can keep an allocated chunk and a free chunk. @@ -112,14 +105,6 @@ TEST(MemPoolTest, Keep) { EXPECT_EQ(p.total_reserved_bytes(), (4 + 8 + 16) * 1024); MemPool p2; p2.acquire_data(&p, true); - - { - p2.exchange_data(&p); - EXPECT_EQ(4 * 1024, p2.total_allocated_bytes()); - EXPECT_EQ((8 + 16) * 1024, p2.total_reserved_bytes()); - EXPECT_EQ(1 * 1024, p.total_allocated_bytes()); - EXPECT_EQ(4 * 1024, p.total_reserved_bytes()); - } } // Maximum allocation size which exceeds 32-bit. diff --git a/be/test/vec/core/block_test.cpp b/be/test/vec/core/block_test.cpp index 991a82229e..4195be2a27 100644 --- a/be/test/vec/core/block_test.cpp +++ b/be/test/vec/core/block_test.cpp @@ -26,7 +26,6 @@ #include "agent/be_exec_version_manager.h" #include "exec/schema_scanner.h" #include "gen_cpp/data.pb.h" -#include "runtime/tuple_row.h" #include "vec/columns/column_array.h" #include "vec/columns/column_decimal.h" #include "vec/columns/column_nullable.h" diff --git a/be/test/vec/exec/vtablet_sink_test.cpp b/be/test/vec/exec/vtablet_sink_test.cpp index ef7ac73ce7..52706f57fa 100644 --- a/be/test/vec/exec/vtablet_sink_test.cpp +++ b/be/test/vec/exec/vtablet_sink_test.cpp @@ -32,7 +32,6 @@ #include "runtime/runtime_state.h" #include "runtime/stream_load/load_stream_mgr.h" #include "runtime/thread_resource_mgr.h" -#include "runtime/tuple_row.h" #include "runtime/types.h" #include "service/brpc.h" #include "util/brpc_client_cache.h" diff --git a/be/test/vec/exprs/vexpr_test.cpp b/be/test/vec/exprs/vexpr_test.cpp index 8eb195535e..cacc350303 100644 --- a/be/test/vec/exprs/vexpr_test.cpp +++ b/be/test/vec/exprs/vexpr_test.cpp @@ -32,7 +32,6 @@ #include "runtime/primitive_type.h" #include "runtime/runtime_state.h" #include "runtime/tuple.h" -#include "runtime/tuple_row.h" #include "testutil/desc_tbl_builder.h" #include "vec/exprs/vliteral.h" #include "vec/runtime/vdatetime_value.h" diff --git a/be/test/vec/function/function_arithmetic_test.cpp b/be/test/vec/function/function_arithmetic_test.cpp index a0c22a2686..89514a9e30 100644 --- a/be/test/vec/function/function_arithmetic_test.cpp +++ b/be/test/vec/function/function_arithmetic_test.cpp @@ -21,7 +21,6 @@ #include #include "function_test_util.h" -#include "runtime/tuple_row.h" #include "util/url_coding.h" #include "vec/core/field.h" diff --git a/be/test/vec/function/function_array_index_test.cpp b/be/test/vec/function/function_array_index_test.cpp index 6584c69a75..4dd040b884 100644 --- a/be/test/vec/function/function_array_index_test.cpp +++ b/be/test/vec/function/function_array_index_test.cpp @@ -21,7 +21,6 @@ #include #include "function_test_util.h" -#include "runtime/tuple_row.h" #include "util/url_coding.h" #include "vec/core/field.h" diff --git a/be/test/vec/function/function_array_size_test.cpp b/be/test/vec/function/function_array_size_test.cpp index 71a3ad5530..b2cb10b3a6 100644 --- a/be/test/vec/function/function_array_size_test.cpp +++ b/be/test/vec/function/function_array_size_test.cpp @@ -21,7 +21,6 @@ #include #include "function_test_util.h" -#include "runtime/tuple_row.h" #include "util/url_coding.h" #include "vec/core/field.h" diff --git a/be/test/vec/function/function_arrays_overlap_test.cpp b/be/test/vec/function/function_arrays_overlap_test.cpp index 053205e403..a5c114ff8b 100644 --- a/be/test/vec/function/function_arrays_overlap_test.cpp +++ b/be/test/vec/function/function_arrays_overlap_test.cpp @@ -21,7 +21,6 @@ #include #include "function_test_util.h" -#include "runtime/tuple_row.h" #include "util/url_coding.h" #include "vec/core/field.h" diff --git a/be/test/vec/function/function_hash_test.cpp b/be/test/vec/function/function_hash_test.cpp index 4578181181..f9dadf267e 100644 --- a/be/test/vec/function/function_hash_test.cpp +++ b/be/test/vec/function/function_hash_test.cpp @@ -19,7 +19,6 @@ #include #include "function_test_util.h" -#include "runtime/tuple_row.h" #include "vec/functions/simple_function_factory.h" namespace doris::vectorized {