From 668188b91f5c2a9d27ad257fc409f1df11aa0c17 Mon Sep 17 00:00:00 2001 From: Pxl <952130278@qq.com> Date: Sat, 26 Feb 2022 17:02:54 +0800 Subject: [PATCH] [improvement][vectorized] support es node predicate peel (#8174) --- be/src/exec/es/es_predicate.cpp | 2 +- be/src/exec/es/es_predicate.h | 4 +- be/src/exec/es/es_query_builder.cpp | 135 +++++++++--------- be/src/exec/es/es_query_builder.h | 1 + be/src/exec/es_http_scan_node.cpp | 24 +++- be/src/exec/es_http_scan_node.h | 10 +- be/src/exec/olap_scan_node.cpp | 58 ++------ be/src/exec/olap_scan_node.h | 14 +- be/src/exec/scan_node.cpp | 28 ++++ be/src/exec/scan_node.h | 9 +- be/src/vec/CMakeLists.txt | 1 + be/src/vec/exec/ves_http_scanner.cpp | 8 +- be/src/vec/exec/ves_http_scanner.h | 24 ++-- be/src/vec/functions/function_convert_tz.h | 2 + .../function_date_or_datetime_computation.h | 2 + .../function_date_or_datetime_to_something.h | 2 + .../function_date_or_datetime_to_string.h | 2 + .../function_datetime_string_to_string.h | 2 + be/src/vec/functions/function_fake.cpp | 26 ++++ be/src/vec/functions/function_fake.h | 61 ++++++++ be/src/vec/functions/function_grouping.h | 12 +- be/src/vec/functions/function_hash.h | 1 + be/src/vec/functions/function_ifnull.h | 51 +++---- .../vec/functions/function_multi_same_args.h | 4 +- be/src/vec/functions/function_rpc.h | 2 + .../vec/functions/function_string_to_string.h | 2 + .../vec/functions/simple_function_factory.h | 3 + be/src/vec/utils/util.hpp | 26 +++- gensrc/script/doris_builtins_functions.py | 2 +- 29 files changed, 315 insertions(+), 203 deletions(-) create mode 100644 be/src/vec/functions/function_fake.cpp create mode 100644 be/src/vec/functions/function_fake.h diff --git a/be/src/exec/es/es_predicate.cpp b/be/src/exec/es/es_predicate.cpp index 3b8be666aa..7dab9ca265 100644 --- a/be/src/exec/es/es_predicate.cpp +++ b/be/src/exec/es/es_predicate.cpp @@ -181,7 +181,7 @@ Status EsPredicate::build_disjuncts_list() { } // make sure to build by build_disjuncts_list -const std::vector& EsPredicate::get_predicate_list() { +const std::vector& EsPredicate::get_predicate_list() const { return _disjuncts; } diff --git a/be/src/exec/es/es_predicate.h b/be/src/exec/es/es_predicate.h index 825bc5ef57..826af0aa7c 100644 --- a/be/src/exec/es/es_predicate.h +++ b/be/src/exec/es/es_predicate.h @@ -141,10 +141,10 @@ class EsPredicate { public: EsPredicate(ExprContext* context, const TupleDescriptor* tuple_desc, ObjectPool* pool); ~EsPredicate(); - const std::vector& get_predicate_list(); + const std::vector& get_predicate_list() const; Status build_disjuncts_list(); // public for tests - EsPredicate(const std::vector& all_predicates) { _disjuncts = all_predicates; }; + EsPredicate(const std::vector& all_predicates) { _disjuncts = all_predicates; } Status get_es_query_status() { return _es_query_status; } diff --git a/be/src/exec/es/es_query_builder.cpp b/be/src/exec/es/es_query_builder.cpp index 5f8dcfd081..1f9488e697 100644 --- a/be/src/exec/es/es_query_builder.cpp +++ b/be/src/exec/es/es_query_builder.cpp @@ -54,19 +54,19 @@ TermQueryBuilder::TermQueryBuilder(const std::string& field, const std::string& TermQueryBuilder::TermQueryBuilder(const ExtBinaryPredicate& binary_predicate) : _field(binary_predicate.col.name), _match_none(false) { - if (binary_predicate.col.type.type == PrimitiveType::TYPE_BOOLEAN) { - int val = atoi(binary_predicate.value.to_string().c_str()); - if (val == 1) { - _term = std::string("true"); - } else if (val == 0){ - _term = std::string("false"); - } else { - // keep semantic consistent with mysql - _match_none = true; - } + if (binary_predicate.col.type.type == PrimitiveType::TYPE_BOOLEAN) { + int val = atoi(binary_predicate.value.to_string().c_str()); + if (val == 1) { + _term = std::string("true"); + } else if (val == 0) { + _term = std::string("false"); } else { - _term = binary_predicate.value.to_string(); + // keep semantic consistent with mysql + _match_none = true; } + } else { + _term = binary_predicate.value.to_string(); + } } void TermQueryBuilder::to_json(rapidjson::Document* document, rapidjson::Value* query) { @@ -82,7 +82,6 @@ void TermQueryBuilder::to_json(rapidjson::Document* document, rapidjson::Value* // this would only appear `bool` column's predicate (a = 2) query->AddMember("match_none", term_node, allocator); } - } RangeQueryBuilder::RangeQueryBuilder(const ExtBinaryPredicate& range_predicate) @@ -381,68 +380,64 @@ Status BooleanQueryBuilder::check_es_query(const ExtFunction& extFunction) { void BooleanQueryBuilder::validate(const std::vector& espredicates, std::vector* result) { - int conjunct_size = espredicates.size(); - result->reserve(conjunct_size); for (auto espredicate : espredicates) { - bool flag = true; - for (auto predicate : espredicate->get_predicate_list()) { - switch (predicate->node_type) { - case TExprNodeType::BINARY_PRED: { - ExtBinaryPredicate* binary_predicate = (ExtBinaryPredicate*)predicate; - TExprOpcode::type op = binary_predicate->op; - if (op != TExprOpcode::EQ && op != TExprOpcode::NE && op != TExprOpcode::LT && - op != TExprOpcode::LE && op != TExprOpcode::GT && op != TExprOpcode::GE) { - flag = false; - } - break; - } - case TExprNodeType::COMPOUND_PRED: { - ExtCompPredicates* compound_predicates = (ExtCompPredicates*)predicate; - if (compound_predicates->op == TExprOpcode::COMPOUND_AND) { - std::vector list; - validate(compound_predicates->conjuncts, &list); - for (int i = list.size() - 1; i >= 0; i--) { - if (!list[i]) { - flag = false; - break; - } - } - } else { - // reserved for compound_not - flag = false; - } - break; - } - case TExprNodeType::LIKE_PRED: - case TExprNodeType::IS_NULL_PRED: - case TExprNodeType::IN_PRED: { - break; - } - case TExprNodeType::FUNCTION_CALL: { - ExtFunction* function_predicate = (ExtFunction*)predicate; - if ("esquery" == function_predicate->func_name) { - Status st = check_es_query(*function_predicate); - if (!st.ok()) { - flag = false; - } - } else { - flag = false; - } - break; - } - default: { - flag = false; - break; - } - } - if (!flag) { - break; - } - } - result->push_back(flag); + result->push_back(validate(espredicate)); } } +bool BooleanQueryBuilder::validate(const EsPredicate* espredicate) { + for (auto predicate : espredicate->get_predicate_list()) { + switch (predicate->node_type) { + case TExprNodeType::BINARY_PRED: { + ExtBinaryPredicate* binary_predicate = (ExtBinaryPredicate*)predicate; + TExprOpcode::type op = binary_predicate->op; + if (op != TExprOpcode::EQ && op != TExprOpcode::NE && op != TExprOpcode::LT && + op != TExprOpcode::LE && op != TExprOpcode::GT && op != TExprOpcode::GE) { + return false; + } + break; + } + case TExprNodeType::COMPOUND_PRED: { + ExtCompPredicates* compound_predicates = (ExtCompPredicates*)predicate; + if (compound_predicates->op != TExprOpcode::COMPOUND_AND) { + // reserved for compound_not + return false; + } + std::vector list; + validate(compound_predicates->conjuncts, &list); + for (int i = list.size() - 1; i >= 0; i--) { + if (!list[i]) { + return false; + } + } + break; + } + case TExprNodeType::LIKE_PRED: + case TExprNodeType::IS_NULL_PRED: + case TExprNodeType::IN_PRED: { + break; + } + case TExprNodeType::FUNCTION_CALL: { + ExtFunction* function_predicate = (ExtFunction*)predicate; + if ("esquery" != function_predicate->func_name) { + return false; + } + Status st = check_es_query(*function_predicate); + if (!st.ok()) { + return false; + } + break; + } + default: { + return false; + break; + } + } + } + + return true; +} + void BooleanQueryBuilder::to_query(const std::vector& predicates, rapidjson::Document* root, rapidjson::Value* query) { if (predicates.size() == 0) { diff --git a/be/src/exec/es/es_query_builder.h b/be/src/exec/es/es_query_builder.h index c471380d23..6a5d16d85e 100644 --- a/be/src/exec/es/es_query_builder.h +++ b/be/src/exec/es/es_query_builder.h @@ -119,6 +119,7 @@ public: static Status check_es_query(const ExtFunction& extFunction); // decide which predicate can process static void validate(const std::vector& espredicates, std::vector* result); + static bool validate(const EsPredicate* espredicate); private: // add child query diff --git a/be/src/exec/es_http_scan_node.cpp b/be/src/exec/es_http_scan_node.cpp index 7b67486401..64b9792668 100644 --- a/be/src/exec/es_http_scan_node.cpp +++ b/be/src/exec/es_http_scan_node.cpp @@ -68,6 +68,9 @@ Status EsHttpScanNode::prepare(RuntimeState* state) { VLOG_QUERY << "EsHttpScanNode prepare"; RETURN_IF_ERROR(ScanNode::prepare(state)); + _scanner_profile.reset(new RuntimeProfile("EsHttpScanNode")); + runtime_profile()->add_child(_scanner_profile.get(), true, nullptr); + _runtime_state = state; _tuple_desc = state->desc_tbl().get_tuple_descriptor(_tuple_id); if (_tuple_desc == nullptr) { @@ -92,14 +95,20 @@ Status EsHttpScanNode::prepare(RuntimeState* state) { // build predicate Status EsHttpScanNode::build_conjuncts_list() { Status status = Status::OK(); + _conjunct_to_predicate.resize(_conjunct_ctxs.size()); + for (int i = 0; i < _conjunct_ctxs.size(); ++i) { EsPredicate* predicate = _pool->add(new EsPredicate(_conjunct_ctxs[i], _tuple_desc, _pool)); predicate->set_field_context(_fields_context); status = predicate->build_disjuncts_list(); if (status.ok()) { - _predicates.push_back(predicate); + _conjunct_to_predicate[i] = _predicate_to_conjunct.size(); _predicate_to_conjunct.push_back(i); + + _predicates.push_back(predicate); } else { + _conjunct_to_predicate[i] = -1; + VLOG_CRITICAL << status.get_error_msg(); status = predicate->get_es_query_status(); if (!status.ok()) { @@ -133,6 +142,7 @@ Status EsHttpScanNode::open(RuntimeState* state) { // remove those predicates which ES cannot support std::vector list; BooleanQueryBuilder::validate(_predicates, &list); + DCHECK(list.size() == _predicate_to_conjunct.size()); for (int i = list.size() - 1; i >= 0; i--) { if (!list[i]) { @@ -148,6 +158,12 @@ Status EsHttpScanNode::open(RuntimeState* state) { _conjunct_ctxs.erase(_conjunct_ctxs.begin() + conjunct_index); } + auto checker = [&](int index) { + return _conjunct_to_predicate[index] != -1 && list[_conjunct_to_predicate[index]]; + }; + std::string vconjunct_information = _peel_pushed_vconjunct(checker); + _scanner_profile->add_info_string("VconjunctExprTree", vconjunct_information); + RETURN_IF_ERROR(start_scanners()); return Status::OK(); @@ -443,9 +459,9 @@ void EsHttpScanNode::scanner_worker(int start_idx, int length, std::promise scanner( - new VEsHttpScanner(_runtime_state, runtime_profile(), _tuple_id, properties, - scanner_expr_ctxs, &counter, doc_value_mode)); + std::unique_ptr scanner(new vectorized::VEsHttpScanner( + _runtime_state, runtime_profile(), _tuple_id, properties, scanner_expr_ctxs, + &counter, doc_value_mode)); status = scanner_scan(std::move(scanner)); } if (!status.ok()) { diff --git a/be/src/exec/es_http_scan_node.h b/be/src/exec/es_http_scan_node.h index 2564294f39..0fc9fac376 100644 --- a/be/src/exec/es_http_scan_node.h +++ b/be/src/exec/es_http_scan_node.h @@ -15,8 +15,7 @@ // specific language governing permissions and limitations // under the License. -#ifndef BE_EXEC_ES_HTTP_SCAN_NODE_H -#define BE_EXEC_ES_HTTP_SCAN_NODE_H +#pragma once #include #include @@ -99,7 +98,7 @@ private: Status scanner_scan(std::unique_ptr scanner, const std::vector& conjunct_ctxs, EsScanCounter* counter); - virtual Status scanner_scan(std::unique_ptr scanner) { + virtual Status scanner_scan(std::unique_ptr scanner) { return Status::NotSupported("vectorized scan in EsHttpScanNode is not supported!"); }; @@ -117,8 +116,9 @@ private: std::vector _predicates; std::vector _predicate_to_conjunct; + std::vector _conjunct_to_predicate; + + std::unique_ptr _scanner_profile; }; } // namespace doris - -#endif diff --git a/be/src/exec/olap_scan_node.cpp b/be/src/exec/olap_scan_node.cpp index f1f0169834..af26ae18d1 100644 --- a/be/src/exec/olap_scan_node.cpp +++ b/be/src/exec/olap_scan_node.cpp @@ -83,10 +83,8 @@ Status OlapScanNode::init(const TPlanNode& tnode, RuntimeState* state) { for (int i = 0; i < filter_size; ++i) { IRuntimeFilter* runtime_filter = nullptr; const auto& filter_desc = _runtime_filter_descs[i]; - RETURN_IF_ERROR(state->runtime_filter_mgr()->regist_filter(RuntimeFilterRole::CONSUMER, - filter_desc, - state->query_options(), - id())); + RETURN_IF_ERROR(state->runtime_filter_mgr()->regist_filter( + RuntimeFilterRole::CONSUMER, filter_desc, state->query_options(), id())); RETURN_IF_ERROR(state->runtime_filter_mgr()->get_consume_filter(filter_desc.filter_id, &runtime_filter)); @@ -162,7 +160,7 @@ void OlapScanNode::_init_counter(RuntimeState* state) { _olap_wait_batch_queue_timer = ADD_TIMER(_runtime_profile, "BatchQueueWaitTime"); // for the purpose of debugging or profiling - for (int i = 0; i < sizeof(_general_debug_timer)/sizeof(*_general_debug_timer); ++i) { + for (int i = 0; i < GENERAL_DEBUG_COUNT; ++i) { char name[64]; snprintf(name, sizeof(name), "GeneralDebugTimer%d", i); _general_debug_timer[i] = ADD_TIMER(_segment_profile, name); @@ -529,6 +527,7 @@ void OlapScanNode::remove_pushed_conjuncts(RuntimeState* state) { iter->second->runtimefilter->set_push_down_profile(); } } + // set vconjunct_ctx is empty, if all conjunct if (_direct_conjunct_size == 0) { if (_vconjunct_ctx_ptr.get() != nullptr) { @@ -536,8 +535,11 @@ void OlapScanNode::remove_pushed_conjuncts(RuntimeState* state) { _vconjunct_ctx_ptr = nullptr; } } + // filter idle conjunct in vexpr_contexts - _peel_pushed_conjuncts(); + auto checker = [&](int index) { return _pushed_conjuncts_index.count(index); }; + std::string vconjunct_information = _peel_pushed_vconjunct(checker); + _scanner_profile->add_info_string("VconjunctExprTree", vconjunct_information); } void OlapScanNode::eval_const_conjuncts() { @@ -1679,48 +1681,4 @@ Status OlapScanNode::add_one_batch(RowBatch* row_batch) { _row_batch_added_cv.notify_one(); return Status::OK(); } - - -vectorized::VExpr* OlapScanNode::_dfs_peel_conjunct(vectorized::VExpr* expr, int& leaf_index) { - static constexpr auto is_leaf = [](vectorized::VExpr* expr) { return !expr->is_and_expr(); }; - - if (is_leaf(expr)) { - return _pushed_conjuncts_index.count(leaf_index++) ? nullptr : expr; - } else { - vectorized::VExpr* left_child = _dfs_peel_conjunct(expr->children()[0], leaf_index); - vectorized::VExpr* right_child = _dfs_peel_conjunct(expr->children()[1], leaf_index); - - if (left_child != nullptr && right_child != nullptr) { - expr->set_children({left_child, right_child}); - return expr; - } - // here do not close Expr* now - return left_child != nullptr ? left_child : right_child; - } -} - -// This function is used to remove pushed expr in expr tree. -// It relies on the logic of function convertConjunctsToAndCompoundPredicate() of FE splicing expr. -// It requires FE to satisfy each splicing with 'and' expr, and spliced from left to right, in order. -// Expr tree specific forms do not require requirements. -void OlapScanNode::_peel_pushed_conjuncts() { - if (_vconjunct_ctx_ptr.get() == nullptr) return; - - int leaf_index = 0; - vectorized::VExpr* conjunct_expr_root = (*_vconjunct_ctx_ptr.get())->root(); - - if (conjunct_expr_root != nullptr) { - vectorized::VExpr* new_conjunct_expr_root = - _dfs_peel_conjunct(conjunct_expr_root, leaf_index); - if (new_conjunct_expr_root == nullptr) { - _vconjunct_ctx_ptr = nullptr; - _scanner_profile->add_info_string("VconjunctExprTree", "null"); - } else { - (*_vconjunct_ctx_ptr.get())->set_root(new_conjunct_expr_root); - _scanner_profile->add_info_string("VconjunctExprTree", - new_conjunct_expr_root->debug_string()); - } - } -} - } // namespace doris diff --git a/be/src/exec/olap_scan_node.h b/be/src/exec/olap_scan_node.h index 82e98d5c0b..5ebd647435 100644 --- a/be/src/exec/olap_scan_node.h +++ b/be/src/exec/olap_scan_node.h @@ -15,8 +15,7 @@ // specific language governing permissions and limitations // under the License. -#ifndef DORIS_BE_SRC_QUERY_EXEC_OLAP_SCAN_NODE_H -#define DORIS_BE_SRC_QUERY_EXEC_OLAP_SCAN_NODE_H +#pragma once #include #include @@ -57,7 +56,7 @@ public: Status get_next(RuntimeState* state, RowBatch* row_batch, bool* eos) override; Status collect_query_statistics(QueryStatistics* statistics) override; Status close(RuntimeState* state) override; - Status set_scan_ranges(const std::vector & scan_ranges) override; + Status set_scan_ranges(const std::vector& scan_ranges) override; inline void set_no_agg_finalize() { _need_agg_finalize = false; } protected: @@ -137,7 +136,9 @@ protected: // Write debug string of this into out. void debug_string(int indentation_level, std::stringstream* out) const override {} - const std::vector& runtime_filter_descs() const { return _runtime_filter_descs; } + const std::vector& runtime_filter_descs() const { + return _runtime_filter_descs; + } void _init_counter(RuntimeState* state); // OLAP_SCAN_NODE profile layering: OLAP_SCAN_NODE, OlapScanner, and SegmentIterator @@ -328,11 +329,6 @@ protected: // for debugging or profiling, record any info as you want RuntimeProfile::Counter* _general_debug_timer[GENERAL_DEBUG_COUNT] = {}; - - vectorized::VExpr* _dfs_peel_conjunct(vectorized::VExpr* expr, int& leaf_index); - void _peel_pushed_conjuncts(); // remove pushed expr from conjunct tree }; } // namespace doris - -#endif diff --git a/be/src/exec/scan_node.cpp b/be/src/exec/scan_node.cpp index f292ed31f2..9f3e9cadb7 100644 --- a/be/src/exec/scan_node.cpp +++ b/be/src/exec/scan_node.cpp @@ -17,6 +17,8 @@ #include "exec/scan_node.h" +#include "vec/utils/util.hpp" + namespace doris { const std::string ScanNode::_s_bytes_read_counter = "BytesRead"; @@ -40,4 +42,30 @@ Status ScanNode::prepare(RuntimeState* state) { return Status::OK(); } +// This function is used to remove pushed expr in expr tree. +// It relies on the logic of function convertConjunctsToAndCompoundPredicate() of FE splicing expr. +// It requires FE to satisfy each splicing with 'and' expr, and spliced from left to right, in order. +// Expr tree specific forms do not require requirements. +std::string ScanNode::_peel_pushed_vconjunct(const std::function& checker) { + if (_vconjunct_ctx_ptr.get() == nullptr) { + return "null"; + } + + int leaf_index = 0; + vectorized::VExpr* conjunct_expr_root = (*_vconjunct_ctx_ptr.get())->root(); + + if (conjunct_expr_root != nullptr) { + vectorized::VExpr* new_conjunct_expr_root = vectorized::VectorizedUtils::dfs_peel_conjunct( + conjunct_expr_root, leaf_index, checker); + if (new_conjunct_expr_root == nullptr) { + _vconjunct_ctx_ptr = nullptr; + } else { + (*_vconjunct_ctx_ptr.get())->set_root(new_conjunct_expr_root); + return new_conjunct_expr_root->debug_string(); + } + } + + return "null"; +} + } // namespace doris diff --git a/be/src/exec/scan_node.h b/be/src/exec/scan_node.h index bd0999127f..9808b874e6 100644 --- a/be/src/exec/scan_node.h +++ b/be/src/exec/scan_node.h @@ -15,14 +15,14 @@ // specific language governing permissions and limitations // under the License. -#ifndef DORIS_BE_SRC_QUERY_EXEC_SCAN_NODE_H -#define DORIS_BE_SRC_QUERY_EXEC_SCAN_NODE_H +#pragma once #include #include "exec/exec_node.h" #include "gen_cpp/PaloInternalService_types.h" #include "util/runtime_profile.h" +#include "vec/exprs/vexpr.h" namespace doris { @@ -90,6 +90,9 @@ public: static const std::string _s_num_disks_accessed_counter; protected: + std::string _peel_pushed_vconjunct( + const std::function& checker); // remove pushed expr from conjunct tree + RuntimeProfile::Counter* _bytes_read_counter; // # bytes read from the scanner // # rows/tuples read from the scanner (including those discarded by eval_conjuncts()) RuntimeProfile::Counter* _rows_read_counter; @@ -99,5 +102,3 @@ protected: }; } // namespace doris - -#endif diff --git a/be/src/vec/CMakeLists.txt b/be/src/vec/CMakeLists.txt index d9ec3090ae..91f65f9b7f 100644 --- a/be/src/vec/CMakeLists.txt +++ b/be/src/vec/CMakeLists.txt @@ -151,6 +151,7 @@ set(VEC_FILES functions/function_rpc.cpp functions/function_convert_tz.cpp functions/least_greast.cpp + functions/function_fake.cpp olap/vgeneric_iterators.cpp olap/vcollect_iterator.cpp olap/block_reader.cpp diff --git a/be/src/vec/exec/ves_http_scanner.cpp b/be/src/vec/exec/ves_http_scanner.cpp index 6087257d1d..c06da0617b 100644 --- a/be/src/vec/exec/ves_http_scanner.cpp +++ b/be/src/vec/exec/ves_http_scanner.cpp @@ -17,7 +17,7 @@ #include "vec/exec/ves_http_scanner.h" -namespace doris { +namespace doris::vectorized { VEsHttpScanner::~VEsHttpScanner() { close(); @@ -43,8 +43,8 @@ Status VEsHttpScanner::get_next(std::vector& colum COUNTER_UPDATE(_rows_read_counter, 1); SCOPED_TIMER(_materialize_timer); - RETURN_IF_ERROR(_es_scroll_parser->fill_columns(_tuple_desc, columns, tuple_pool, &_line_eof, - docvalue_context)); + RETURN_IF_ERROR(_es_scroll_parser->fill_columns(_tuple_desc, columns, tuple_pool, + &_line_eof, docvalue_context)); if (!_line_eof) { break; } @@ -53,4 +53,4 @@ Status VEsHttpScanner::get_next(std::vector& colum return Status::OK(); } -} // namespace doris +} // namespace doris::vectorized diff --git a/be/src/vec/exec/ves_http_scanner.h b/be/src/vec/exec/ves_http_scanner.h index 3692a61b7d..5fa83db6fd 100644 --- a/be/src/vec/exec/ves_http_scanner.h +++ b/be/src/vec/exec/ves_http_scanner.h @@ -16,28 +16,24 @@ // specific language governing permissions and limitations // under the License. -#ifndef BE_EXEC_VES_HTTP_SCANNER_H -#define BE_EXEC_VES_HTTP_SCANNER_H +#pragma once #include -namespace doris { +namespace doris::vectorized { class VEsHttpScanner : public EsHttpScanner { public: VEsHttpScanner(RuntimeState* state, RuntimeProfile* profile, TupleId tuple_id, - const std::map& properties, - const std::vector& conjunct_ctxs, EsScanCounter* counter, - bool doc_value_mode): EsHttpScanner(state, profile, tuple_id, properties, - conjunct_ctxs, counter, doc_value_mode) {}; + const std::map& properties, + const std::vector& conjunct_ctxs, EsScanCounter* counter, + bool doc_value_mode) + : EsHttpScanner(state, profile, tuple_id, properties, conjunct_ctxs, counter, + doc_value_mode) {}; ~VEsHttpScanner(); - Status get_next(std::vector& columns, - MemPool* tuple_pool, bool* eof, - const std::map& docvalue_context); - + Status get_next(std::vector& columns, MemPool* tuple_pool, + bool* eof, const std::map& docvalue_context); }; -} // namespace doris - -#endif +} // namespace doris::vectorized diff --git a/be/src/vec/functions/function_convert_tz.h b/be/src/vec/functions/function_convert_tz.h index 7af61b3a75..f96031c58b 100644 --- a/be/src/vec/functions/function_convert_tz.h +++ b/be/src/vec/functions/function_convert_tz.h @@ -15,6 +15,8 @@ // specific language governing permissions and limitations // under the License. +#pragma once + #include "vec/columns/columns_number.h" #include "vec/common/string_ref.h" #include "vec/core/types.h" diff --git a/be/src/vec/functions/function_date_or_datetime_computation.h b/be/src/vec/functions/function_date_or_datetime_computation.h index 7fdffef80a..5a106fddbe 100644 --- a/be/src/vec/functions/function_date_or_datetime_computation.h +++ b/be/src/vec/functions/function_date_or_datetime_computation.h @@ -15,6 +15,8 @@ // specific language governing permissions and limitations // under the License. +#pragma once + #include "common/logging.h" #include "fmt/format.h" #include "runtime/datetime_value.h" diff --git a/be/src/vec/functions/function_date_or_datetime_to_something.h b/be/src/vec/functions/function_date_or_datetime_to_something.h index efef7c39d3..1440126390 100644 --- a/be/src/vec/functions/function_date_or_datetime_to_something.h +++ b/be/src/vec/functions/function_date_or_datetime_to_something.h @@ -18,6 +18,8 @@ // https://github.com/ClickHouse/ClickHouse/blob/master/src/Functions/FunctionDateOrDatetimeToSomething.h // and modified by Doris +#pragma once + #include "vec/data_types/data_type_date.h" #include "vec/data_types/data_type_date_time.h" #include "vec/functions/date_time_transforms.h" diff --git a/be/src/vec/functions/function_date_or_datetime_to_string.h b/be/src/vec/functions/function_date_or_datetime_to_string.h index 42922c8af4..15cc226e4c 100644 --- a/be/src/vec/functions/function_date_or_datetime_to_string.h +++ b/be/src/vec/functions/function_date_or_datetime_to_string.h @@ -18,6 +18,8 @@ // https://github.com/ClickHouse/ClickHouse/blob/master/src/Functions/FunctionDateOrDatetimeToString.h // and modified by Doris +#pragma once + #include "vec/data_types/data_type_date.h" #include "vec/data_types/data_type_date_time.h" #include "vec/data_types/data_type_string.h" diff --git a/be/src/vec/functions/function_datetime_string_to_string.h b/be/src/vec/functions/function_datetime_string_to_string.h index b63bfcde5d..6502d0fd90 100644 --- a/be/src/vec/functions/function_datetime_string_to_string.h +++ b/be/src/vec/functions/function_datetime_string_to_string.h @@ -15,6 +15,8 @@ // specific language governing permissions and limitations // under the License. +#pragma once + #include "vec/columns/column_nullable.h" #include "vec/columns/columns_number.h" #include "vec/data_types/data_type_date.h" diff --git a/be/src/vec/functions/function_fake.cpp b/be/src/vec/functions/function_fake.cpp new file mode 100644 index 0000000000..0aa9bf04a0 --- /dev/null +++ b/be/src/vec/functions/function_fake.cpp @@ -0,0 +1,26 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include "vec/functions/function_fake.h" + +namespace doris::vectorized { + +void register_function_fake(SimpleFunctionFactory& factory) { + factory.register_function>(); +} + +} // namespace doris::vectorized diff --git a/be/src/vec/functions/function_fake.h b/be/src/vec/functions/function_fake.h new file mode 100644 index 0000000000..519fb08a42 --- /dev/null +++ b/be/src/vec/functions/function_fake.h @@ -0,0 +1,61 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#pragma once + +#include "common/status.h" +#include "vec/core/types.h" +#include "vec/data_types/data_type_number.h" +#include "vec/functions/simple_function_factory.h" +#include "vec/utils/util.hpp" + +namespace doris::vectorized { + +struct FunctionEsqueryImpl { + static constexpr auto name = "esquery"; + static DataTypePtr get_return_type_impl(const DataTypes& arguments) { + return std::make_shared(); + } +}; + +//FunctionFake is use for some function call expr only work at prepare/open phase, do not support execute(). +template +class FunctionFake : public IFunction { +public: + static constexpr auto name = Impl::name; + + static FunctionPtr create() { return std::make_shared(); } + + String get_name() const override { return name; } + + size_t get_number_of_arguments() const override { return 0; } + + bool is_variadic() const override { return true; } + + DataTypePtr get_return_type_impl(const DataTypes& arguments) const override { + return Impl::get_return_type_impl(arguments); + } + + bool use_default_implementation_for_nulls() const override { return true; } + + Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments, + size_t result, size_t input_rows_count) override { + return Status::NotSupported(fmt::format("Fake function {} do not support execute", name)); + } +}; + +} // namespace doris::vectorized diff --git a/be/src/vec/functions/function_grouping.h b/be/src/vec/functions/function_grouping.h index f8ea725996..01cfe497a6 100644 --- a/be/src/vec/functions/function_grouping.h +++ b/be/src/vec/functions/function_grouping.h @@ -15,14 +15,13 @@ // specific language governing permissions and limitations // under the License. -#ifndef DORIS_FUNCTION_GROUPING_H -#define DORIS_FUNCTION_GROUPING_H +#pragma once -#include "vec/functions/simple_function_factory.h" #include "vec/columns/column_nullable.h" -#include "vec/functions/function_helpers.h" -#include "vec/utils/util.hpp" #include "vec/data_types/get_least_supertype.h" +#include "vec/functions/function_helpers.h" +#include "vec/functions/simple_function_factory.h" +#include "vec/utils/util.hpp" namespace doris::vectorized { @@ -64,5 +63,4 @@ public: String get_name() const override { return name; } }; -} -#endif //DORIS_FUNCTION_GROUPING_H +} // namespace doris::vectorized diff --git a/be/src/vec/functions/function_hash.h b/be/src/vec/functions/function_hash.h index a908c36489..7ea034b4a5 100644 --- a/be/src/vec/functions/function_hash.h +++ b/be/src/vec/functions/function_hash.h @@ -19,6 +19,7 @@ // and modified by Doris #pragma once + #include #include "vec/columns/column_string.h" diff --git a/be/src/vec/functions/function_ifnull.h b/be/src/vec/functions/function_ifnull.h index 26fe18cf0e..b6627641c0 100644 --- a/be/src/vec/functions/function_ifnull.h +++ b/be/src/vec/functions/function_ifnull.h @@ -18,15 +18,14 @@ // https://github.com/ClickHouse/ClickHouse/blob/master/src/Functions/Ifnull.h // and modified by Doris -#ifndef DORIS_FUNCTION_IFNULL_H -#define DORIS_FUNCTION_IFNULL_H +#pragma once -#include "vec/functions/simple_function_factory.h" #include "vec/columns/column_nullable.h" -#include "vec/functions/function_helpers.h" -#include "vec/utils/util.hpp" -#include "vec/functions/function_string.h" #include "vec/data_types/get_least_supertype.h" +#include "vec/functions/function_helpers.h" +#include "vec/functions/function_string.h" +#include "vec/functions/simple_function_factory.h" +#include "vec/utils/util.hpp" namespace doris::vectorized { class FunctionIfNull : public IFunction { @@ -59,43 +58,35 @@ public: return Status::OK(); } - ColumnWithTypeAndName null_column_arg0 { - nullptr, std::make_shared(),"" - }; - ColumnWithTypeAndName nested_column_arg0 { - nullptr, col_left.type, "" - }; + ColumnWithTypeAndName null_column_arg0 {nullptr, std::make_shared(), ""}; + ColumnWithTypeAndName nested_column_arg0 {nullptr, col_left.type, ""}; /// implement isnull(col_left) logic if (auto* nullable = check_and_get_column(*col_left.column)) { null_column_arg0.column = nullable->get_null_map_column_ptr(); nested_column_arg0.column = nullable->get_nested_column_ptr(); - nested_column_arg0.type = reinterpret_cast( - nested_column_arg0.type.get())->get_nested_type(); + nested_column_arg0.type = + reinterpret_cast(nested_column_arg0.type.get()) + ->get_nested_type(); } else { block.get_by_position(result).column = col_left.column; return Status::OK(); } - const ColumnsWithTypeAndName if_columns - { - null_column_arg0, - block.get_by_position(arguments[1]), - nested_column_arg0 - }; + const ColumnsWithTypeAndName if_columns { + null_column_arg0, block.get_by_position(arguments[1]), nested_column_arg0}; - Block temporary_block( - { - null_column_arg0, - block.get_by_position(arguments[1]), - nested_column_arg0, - block.get_by_position(result), - }); + Block temporary_block({ + null_column_arg0, + block.get_by_position(arguments[1]), + nested_column_arg0, + block.get_by_position(result), + }); - auto func_if = SimpleFunctionFactory::instance().get_function("if", if_columns, block.get_by_position(result).type); + auto func_if = SimpleFunctionFactory::instance().get_function( + "if", if_columns, block.get_by_position(result).type); func_if->execute(context, temporary_block, {0, 1, 2}, 3, input_rows_count); block.get_by_position(result).column = temporary_block.get_by_position(3).column; return Status::OK(); } }; -} -#endif //DORIS_FUNCTION_IFNULL_H +} // namespace doris::vectorized diff --git a/be/src/vec/functions/function_multi_same_args.h b/be/src/vec/functions/function_multi_same_args.h index 056544e188..95ecd1d034 100644 --- a/be/src/vec/functions/function_multi_same_args.h +++ b/be/src/vec/functions/function_multi_same_args.h @@ -15,6 +15,8 @@ // specific language governing permissions and limitations // under the License. +#pragma once + #include "udf/udf.h" #include "vec/data_types/get_least_supertype.h" #include "vec/functions/function_helpers.h" @@ -52,4 +54,4 @@ public: return Status::OK(); } }; -}; +}; // namespace doris::vectorized diff --git a/be/src/vec/functions/function_rpc.h b/be/src/vec/functions/function_rpc.h index 2c7535adfc..43bfe3acc2 100644 --- a/be/src/vec/functions/function_rpc.h +++ b/be/src/vec/functions/function_rpc.h @@ -15,6 +15,8 @@ // specific language governing permissions and limitations // under the License. +#pragma once + #include "vec/functions/function.h" namespace doris { diff --git a/be/src/vec/functions/function_string_to_string.h b/be/src/vec/functions/function_string_to_string.h index cd4ae44519..78e81f73d2 100644 --- a/be/src/vec/functions/function_string_to_string.h +++ b/be/src/vec/functions/function_string_to_string.h @@ -18,6 +18,8 @@ // https://github.com/ClickHouse/ClickHouse/blob/master/src/Functions/FunctionStringToString.h // and modified by Doris +#pragma once + #include "vec/columns/column_string.h" #include "vec/columns/column_vector.h" #include "vec/data_types/data_type_number.h" diff --git a/be/src/vec/functions/simple_function_factory.h b/be/src/vec/functions/simple_function_factory.h index 390418d7d8..7ab9061379 100644 --- a/be/src/vec/functions/simple_function_factory.h +++ b/be/src/vec/functions/simple_function_factory.h @@ -19,6 +19,7 @@ // and modified by Doris #pragma once + #include #include @@ -71,6 +72,7 @@ void register_function_grouping(SimpleFunctionFactory& factory); void register_function_datetime_floor_ceil(SimpleFunctionFactory& factory); void register_function_convert_tz(SimpleFunctionFactory& factory); void register_function_least_greast(SimpleFunctionFactory& factory); +void register_function_fake(SimpleFunctionFactory& factory); class SimpleFunctionFactory { using Creator = std::function; @@ -191,6 +193,7 @@ public: register_function_datetime_floor_ceil(instance); register_function_convert_tz(instance); register_function_least_greast(instance); + register_function_fake(instance); }); return instance; } diff --git a/be/src/vec/utils/util.hpp b/be/src/vec/utils/util.hpp index 04c5da726d..6cf9a14471 100644 --- a/be/src/vec/utils/util.hpp +++ b/be/src/vec/utils/util.hpp @@ -16,6 +16,7 @@ // under the License. #pragma once + #include #include @@ -23,6 +24,7 @@ #include "runtime/descriptors.h" #include "vec/columns/column_nullable.h" #include "vec/core/block.h" +#include "vec/exprs/vexpr.h" namespace doris::vectorized { class VectorizedUtils { @@ -47,8 +49,9 @@ public: size_t size = dst.size(); auto* __restrict l = dst.data(); auto* __restrict r = src.data(); - for (size_t i = 0; i < size; ++i) + for (size_t i = 0; i < size; ++i) { l[i] |= r[i]; + } } static DataTypes get_data_types(const RowDescriptor& row_desc) { @@ -60,7 +63,27 @@ public: } return data_types; } + + static VExpr* dfs_peel_conjunct(VExpr* expr, int& leaf_index, + std::function checker) { + static constexpr auto is_leaf = [](VExpr* expr) { return !expr->is_and_expr(); }; + + if (is_leaf(expr)) { + return checker(leaf_index++) ? nullptr : expr; + } else { + VExpr* left_child = dfs_peel_conjunct(expr->children()[0], leaf_index, checker); + VExpr* right_child = dfs_peel_conjunct(expr->children()[1], leaf_index, checker); + + if (left_child != nullptr && right_child != nullptr) { + expr->set_children({left_child, right_child}); + return expr; + } + // here do not close Expr* now + return left_child != nullptr ? left_child : right_child; + } + } }; + } // namespace doris::vectorized namespace apache::thrift { @@ -76,4 +99,5 @@ ThriftStruct from_json_string(const std::string& json_val) { ts.read(&protocol); return ts; } + } // namespace apache::thrift diff --git a/gensrc/script/doris_builtins_functions.py b/gensrc/script/doris_builtins_functions.py index 4d1f5d3799..8bd6829951 100755 --- a/gensrc/script/doris_builtins_functions.py +++ b/gensrc/script/doris_builtins_functions.py @@ -823,7 +823,7 @@ visible_functions = [ [['esquery'], 'BOOLEAN', ['VARCHAR', 'VARCHAR'], '_ZN5doris11ESFunctions5matchEPN' - '9doris_udf15FunctionContextERKNS1_9StringValES6_', '', '', '', ''], + '9doris_udf15FunctionContextERKNS1_9StringValES6_', '', '', 'vec', ''], # String builtin functions [['substr', 'substring'], 'VARCHAR', ['VARCHAR', 'INT'],