[feature](vectorization) Support Vectorized Exec Engine In Doris (#7785)

# Proposed changes

Issue Number: close #6238

    Co-authored-by: HappenLee <happenlee@hotmail.com>
    Co-authored-by: stdpain <34912776+stdpain@users.noreply.github.com>
    Co-authored-by: Zhengguo Yang <yangzhgg@gmail.com>
    Co-authored-by: wangbo <506340561@qq.com>
    Co-authored-by: emmymiao87 <522274284@qq.com>
    Co-authored-by: Pxl <952130278@qq.com>
    Co-authored-by: zhangstar333 <87313068+zhangstar333@users.noreply.github.com>
    Co-authored-by: thinker <zchw100@qq.com>
    Co-authored-by: Zeno Yang <1521564989@qq.com>
    Co-authored-by: Wang Shuo <wangshuo128@gmail.com>
    Co-authored-by: zhoubintao <35688959+zbtzbtzbt@users.noreply.github.com>
    Co-authored-by: Gabriel <gabrielleebuaa@gmail.com>
    Co-authored-by: xinghuayu007 <1450306854@qq.com>
    Co-authored-by: weizuo93 <weizuo@apache.org>
    Co-authored-by: yiguolei <guoleiyi@tencent.com>
    Co-authored-by: anneji-dev <85534151+anneji-dev@users.noreply.github.com>
    Co-authored-by: awakeljw <993007281@qq.com>
    Co-authored-by: taberylyang <95272637+taberylyang@users.noreply.github.com>
    Co-authored-by: Cui Kaifeng <48012748+azurenake@users.noreply.github.com>


## Problem Summary:

### 1. Some code from clickhouse

**ClickHouse is an excellent implementation of the vectorized execution engine database,
so here we have referenced and learned a lot from its excellent implementation in terms of
data structure and function implementation.
We are based on ClickHouse v19.16.2.2 and would like to thank the ClickHouse community and developers.**

The following comment has been added to the code from Clickhouse, eg:
// This file is copied from
// https://github.com/ClickHouse/ClickHouse/blob/master/src/Interpreters/AggregationCommon.h
// and modified by Doris

### 2. Support exec node and query:
* vaggregation_node
* vanalytic_eval_node
* vassert_num_rows_node
* vblocking_join_node
* vcross_join_node
* vempty_set_node
* ves_http_scan_node
* vexcept_node
* vexchange_node
* vintersect_node
* vmysql_scan_node
* vodbc_scan_node
* volap_scan_node
* vrepeat_node
* vschema_scan_node
* vselect_node
* vset_operation_node
* vsort_node
* vunion_node
* vhash_join_node

You can run exec engine of SSB/TPCH and 70% TPCDS stand query test set.

### 3. Data Model

Vec Exec Engine Support **Dup/Agg/Unq** table, Support Block Reader Vectorized.
Segment Vec is working in process.

### 4. How to use

1. Set the environment variable `set enable_vectorized_engine = true; `(required)
2. Set the environment variable `set batch_size = 4096; ` (recommended)

### 5. Some diff from origin exec engine

https://github.com/doris-vectorized/doris-vectorized/issues/294

## Checklist(Required)

1. Does it affect the original behavior: (No)
2. Has unit tests been added: (Yes)
3. Has document been added or modified: (No)
4. Does it need to update dependencies: (No)
5. Are there any changes that cannot be rolled back: (Yes)
This commit is contained in:
HappenLee
2022-01-18 10:07:15 +08:00
committed by GitHub
parent ebc27a40d7
commit e1d7233e9c
498 changed files with 69593 additions and 479 deletions

View File

@ -63,6 +63,26 @@
#include "util/debug_util.h"
#include "util/runtime_profile.h"
#include "vec/core/block.h"
#include "vec/exec/join/vhash_join_node.h"
#include "vec/exec/vaggregation_node.h"
#include "vec/exec/ves_http_scan_node.h"
#include "vec/exec/vcross_join_node.h"
#include "vec/exec/vexchange_node.h"
#include "vec/exec/vmysql_scan_node.h"
#include "vec/exec/vodbc_scan_node.h"
#include "vec/exec/volap_scan_node.h"
#include "vec/exec/vsort_node.h"
#include "vec/exec/vunion_node.h"
#include "vec/exec/vintersect_node.h"
#include "vec/exec/vexcept_node.h"
#include "vec/exec/vanalytic_eval_node.h"
#include "vec/exec/vassert_num_rows_node.h"
#include "vec/exec/vselect_node.h"
#include "vec/exprs/vexpr.h"
#include "vec/exec/vempty_set_node.h"
#include "vec/exec/vschema_scan_node.h"
#include "vec/exec/vrepeat_node.h"
namespace doris {
const std::string ExecNode::ROW_THROUGHPUT_COUNTER = "RowsReturnedRate";
@ -167,6 +187,9 @@ Status ExecNode::init(const TPlanNode& tnode, RuntimeState* state) {
init_runtime_profile(profile);
if (tnode.__isset.vconjunct) {
_vconjunct_ctx_ptr.reset(new doris::vectorized::VExprContext*);
RETURN_IF_ERROR(doris::vectorized::VExpr::create_expr_tree(_pool, tnode.vconjunct,
_vconjunct_ctx_ptr.get()));
}
RETURN_IF_ERROR(Expr::create_expr_trees(_pool, tnode.conjuncts, &_conjunct_ctxs));
@ -189,6 +212,9 @@ Status ExecNode::prepare(RuntimeState* state) {
_mem_tracker);
_expr_mem_pool.reset(new MemPool(_expr_mem_tracker.get()));
if (_vconjunct_ctx_ptr) {
RETURN_IF_ERROR((*_vconjunct_ctx_ptr)->prepare(state, row_desc(), expr_mem_tracker()));
}
RETURN_IF_ERROR(Expr::prepare(_conjunct_ctxs, state, row_desc(), expr_mem_tracker()));
// TODO(zc):
@ -202,6 +228,9 @@ Status ExecNode::prepare(RuntimeState* state) {
Status ExecNode::open(RuntimeState* state) {
RETURN_IF_ERROR(exec_debug_action(TExecNodePhase::OPEN));
if (_vconjunct_ctx_ptr) {
RETURN_IF_ERROR((*_vconjunct_ctx_ptr)->open(state));
}
return Expr::open(_conjunct_ctxs, state);
}
@ -240,6 +269,7 @@ Status ExecNode::close(RuntimeState* state) {
}
}
if (_vconjunct_ctx_ptr) (*_vconjunct_ctx_ptr)->close(state);
Expr::close(_conjunct_ctxs, state);
if (expr_mem_pool() != nullptr) {
@ -339,12 +369,42 @@ Status ExecNode::create_tree_helper(RuntimeState* state, ObjectPool* pool,
Status ExecNode::create_node(RuntimeState* state, ObjectPool* pool, const TPlanNode& tnode,
const DescriptorTbl& descs, ExecNode** node) {
if (state->enable_vectorized_exec()) {
return Status::InternalError("unsupport enable_vectorized_engine");
}
std::stringstream error_msg;
if (state->enable_vectorized_exec()) {
switch (tnode.node_type) {
case TPlanNodeType::OLAP_SCAN_NODE:
case TPlanNodeType::ASSERT_NUM_ROWS_NODE:
case TPlanNodeType::HASH_JOIN_NODE:
case TPlanNodeType::AGGREGATION_NODE:
case TPlanNodeType::UNION_NODE:
case TPlanNodeType::CROSS_JOIN_NODE:
case TPlanNodeType::SORT_NODE:
case TPlanNodeType::EXCHANGE_NODE:
case TPlanNodeType::ODBC_SCAN_NODE:
case TPlanNodeType::MYSQL_SCAN_NODE:
case TPlanNodeType::INTERSECT_NODE:
case TPlanNodeType::EXCEPT_NODE:
case TPlanNodeType::ES_HTTP_SCAN_NODE:
case TPlanNodeType::EMPTY_SET_NODE:
case TPlanNodeType::SCHEMA_SCAN_NODE:
case TPlanNodeType::ANALYTIC_EVAL_NODE:
case TPlanNodeType::SELECT_NODE:
case TPlanNodeType::REPEAT_NODE:
break;
default: {
const auto& i = _TPlanNodeType_VALUES_TO_NAMES.find(tnode.node_type);
const char* str = "unknown node type";
if (i != _TPlanNodeType_VALUES_TO_NAMES.end()) {
str = i->second;
}
error_msg << "V" << str << " not implemented";
return Status::InternalError(error_msg.str());
}
}
}
VLOG_CRITICAL << "tnode:\n" << apache::thrift::ThriftDebugString(tnode);
switch (tnode.node_type) {
case TPlanNodeType::CSV_SCAN_NODE:
@ -353,14 +413,20 @@ Status ExecNode::create_node(RuntimeState* state, ObjectPool* pool, const TPlanN
case TPlanNodeType::MYSQL_SCAN_NODE:
#ifdef DORIS_WITH_MYSQL
*node = pool->add(new MysqlScanNode(pool, tnode, descs));
if (state->enable_vectorized_exec()) {
*node = pool->add(new vectorized::VMysqlScanNode(pool, tnode, descs));
} else
*node = pool->add(new MysqlScanNode(pool, tnode, descs));
return Status::OK();
#else
return Status::InternalError(
"Don't support MySQL table, you should rebuild Doris with WITH_MYSQL option ON");
#endif
case TPlanNodeType::ODBC_SCAN_NODE:
*node = pool->add(new OdbcScanNode(pool, tnode, descs));
if (state->enable_vectorized_exec()) {
*node = pool->add(new vectorized::VOdbcScanNode(pool, tnode, descs));
} else
*node = pool->add(new OdbcScanNode(pool, tnode, descs));
return Status::OK();
case TPlanNodeType::ES_SCAN_NODE:
@ -368,15 +434,24 @@ Status ExecNode::create_node(RuntimeState* state, ObjectPool* pool, const TPlanN
return Status::OK();
case TPlanNodeType::ES_HTTP_SCAN_NODE:
*node = pool->add(new EsHttpScanNode(pool, tnode, descs));
if (state->enable_vectorized_exec()) {
*node = pool->add(new vectorized::VEsHttpScanNode(pool, tnode, descs));
} else {
*node = pool->add(new EsHttpScanNode(pool, tnode, descs));
}
return Status::OK();
case TPlanNodeType::SCHEMA_SCAN_NODE:
*node = pool->add(new SchemaScanNode(pool, tnode, descs));
if (state->enable_vectorized_exec()) {
*node = pool->add(new vectorized::VSchemaScanNode(pool, tnode, descs));
} else {
*node = pool->add(new SchemaScanNode(pool, tnode, descs));
}
return Status::OK();
case TPlanNodeType::OLAP_SCAN_NODE:
if (state->enable_vectorized_exec()) {
*node = pool->add(new vectorized::VOlapScanNode(pool, tnode, descs));
} else {
*node = pool->add(new OlapScanNode(pool, tnode, descs));
}
@ -384,6 +459,7 @@ Status ExecNode::create_node(RuntimeState* state, ObjectPool* pool, const TPlanN
case TPlanNodeType::AGGREGATION_NODE:
if (state->enable_vectorized_exec()) {
*node = pool->add(new vectorized::AggregationNode(pool, tnode, descs));
} else {
if (config::enable_partitioned_aggregation) {
*node = pool->add(new PartitionedAggregationNode(pool, tnode, descs));
@ -394,11 +470,16 @@ Status ExecNode::create_node(RuntimeState* state, ObjectPool* pool, const TPlanN
return Status::OK();
case TPlanNodeType::HASH_JOIN_NODE:
*node = pool->add(new HashJoinNode(pool, tnode, descs));
if (state->enable_vectorized_exec()) {
*node = pool->add(new vectorized::HashJoinNode(pool, tnode, descs));
} else {
*node = pool->add(new HashJoinNode(pool, tnode, descs));
}
return Status::OK();
case TPlanNodeType::CROSS_JOIN_NODE:
if (state->enable_vectorized_exec()) {
*node = pool->add(new vectorized::VCrossJoinNode(pool, tnode, descs));
} else {
*node = pool->add(new CrossJoinNode(pool, tnode, descs));
}
@ -409,18 +490,27 @@ Status ExecNode::create_node(RuntimeState* state, ObjectPool* pool, const TPlanN
return Status::OK();
case TPlanNodeType::EMPTY_SET_NODE:
*node = pool->add(new EmptySetNode(pool, tnode, descs));
if (state->enable_vectorized_exec()) {
*node = pool->add(new vectorized::VEmptySetNode(pool, tnode, descs));
} else {
*node = pool->add(new EmptySetNode(pool, tnode, descs));
}
return Status::OK();
case TPlanNodeType::EXCHANGE_NODE:
if (state->enable_vectorized_exec()) {
*node = pool->add(new doris::vectorized::VExchangeNode(pool, tnode, descs));
} else {
*node = pool->add(new ExchangeNode(pool, tnode, descs));
}
return Status::OK();
case TPlanNodeType::SELECT_NODE:
*node = pool->add(new SelectNode(pool, tnode, descs));
if (state->enable_vectorized_exec()) {
*node = pool->add(new doris::vectorized::VSelectNode(pool, tnode, descs));
} else {
*node = pool->add(new SelectNode(pool, tnode, descs));
}
return Status::OK();
case TPlanNodeType::OLAP_REWRITE_NODE:
@ -429,6 +519,7 @@ Status ExecNode::create_node(RuntimeState* state, ObjectPool* pool, const TPlanN
case TPlanNodeType::SORT_NODE:
if (state->enable_vectorized_exec()) {
*node = pool->add(new vectorized::VSortNode(pool, tnode, descs));
} else {
if (tnode.sort_node.use_top_n) {
*node = pool->add(new TopNNode(pool, tnode, descs));
@ -439,8 +530,12 @@ Status ExecNode::create_node(RuntimeState* state, ObjectPool* pool, const TPlanN
return Status::OK();
case TPlanNodeType::ANALYTIC_EVAL_NODE:
*node = pool->add(new AnalyticEvalNode(pool, tnode, descs));
break;
if (state->enable_vectorized_exec()) {
*node = pool->add(new vectorized::VAnalyticEvalNode(pool, tnode, descs));
} else {
*node = pool->add(new AnalyticEvalNode(pool, tnode, descs));
}
return Status::OK();
case TPlanNodeType::MERGE_NODE:
*node = pool->add(new MergeNode(pool, tnode, descs));
@ -448,17 +543,26 @@ Status ExecNode::create_node(RuntimeState* state, ObjectPool* pool, const TPlanN
case TPlanNodeType::UNION_NODE:
if (state->enable_vectorized_exec()) {
*node = pool->add(new vectorized::VUnionNode(pool, tnode, descs));
} else {
*node = pool->add(new UnionNode(pool, tnode, descs));
}
return Status::OK();
case TPlanNodeType::INTERSECT_NODE:
*node = pool->add(new IntersectNode(pool, tnode, descs));
if (state->enable_vectorized_exec()) {
*node = pool->add(new vectorized::VIntersectNode(pool, tnode, descs));
} else {
*node = pool->add(new IntersectNode(pool, tnode, descs));
}
return Status::OK();
case TPlanNodeType::EXCEPT_NODE:
*node = pool->add(new ExceptNode(pool, tnode, descs));
if (state->enable_vectorized_exec()) {
*node = pool->add(new vectorized::VExceptNode(pool, tnode, descs));
} else {
*node = pool->add(new ExceptNode(pool, tnode, descs));
}
return Status::OK();
case TPlanNodeType::BROKER_SCAN_NODE:
@ -466,11 +570,19 @@ Status ExecNode::create_node(RuntimeState* state, ObjectPool* pool, const TPlanN
return Status::OK();
case TPlanNodeType::REPEAT_NODE:
*node = pool->add(new RepeatNode(pool, tnode, descs));
if (state->enable_vectorized_exec()) {
*node = pool->add(new vectorized::VRepeatNode(pool, tnode, descs));
} else {
*node = pool->add(new RepeatNode(pool, tnode, descs));
}
return Status::OK();
case TPlanNodeType::ASSERT_NUM_ROWS_NODE:
*node = pool->add(new AssertNumRowsNode(pool, tnode, descs));
if (state->enable_vectorized_exec()) {
*node = pool->add(new vectorized::VAssertNumRowsNode(pool, tnode, descs));
} else {
*node = pool->add(new AssertNumRowsNode(pool, tnode, descs));
}
return Status::OK();
case TPlanNodeType::TABLE_FUNCTION_NODE:
@ -637,6 +749,22 @@ Status ExecNode::claim_buffer_reservation(RuntimeState* state) {
Status ExecNode::release_unused_reservation() {
return _buffer_pool_client.DecreaseReservationTo(_resource_profile.min_reservation);
}
void ExecNode::release_block_memory(vectorized::Block& block, uint16_t child_idx) {
DCHECK(child_idx < _children.size());
block.clear_column_data(child(child_idx)->row_desc().num_materialized_slots());
}
void ExecNode::reached_limit(vectorized::Block* block, bool* eos) {
if (_limit != -1 and _num_rows_returned + block->rows() >= _limit) {
block->set_num_rows(_limit - _num_rows_returned);
*eos = true;
}
_num_rows_returned += block->rows();
COUNTER_SET(_rows_returned_counter, _num_rows_returned);
}
/*
Status ExecNode::enable_deny_reservation_debug_action() {
DCHECK_EQ(debug_action_, TDebugAction::SET_DENY_RESERVATION_PROBABILITY);