[Pipeline](shared_scan_opt) Support shared scan opt in pipeline exec engine
This commit is contained in:
@ -246,6 +246,7 @@ E(SEGCOMPACTION_INIT_READER, -3117);
|
||||
E(SEGCOMPACTION_INIT_WRITER, -3118);
|
||||
E(SEGCOMPACTION_FAILED, -3119);
|
||||
E(PIP_WAIT_FOR_RF, -3120);
|
||||
E(PIP_WAIT_FOR_SC, -3121);
|
||||
E(INVERTED_INDEX_INVALID_PARAMETERS, -6000);
|
||||
E(INVERTED_INDEX_NOT_SUPPORTED, -6001);
|
||||
E(INVERTED_INDEX_CLUCENE_ERROR, -6002);
|
||||
@ -383,6 +384,7 @@ public:
|
||||
ERROR_CTOR(EndOfFile, END_OF_FILE)
|
||||
ERROR_CTOR(InternalError, INTERNAL_ERROR)
|
||||
ERROR_CTOR(WaitForRf, PIP_WAIT_FOR_RF)
|
||||
ERROR_CTOR(WaitForScannerContext, PIP_WAIT_FOR_SC)
|
||||
ERROR_CTOR(RuntimeError, RUNTIME_ERROR)
|
||||
ERROR_CTOR(Cancelled, CANCELLED)
|
||||
ERROR_CTOR(MemoryLimitExceeded, MEM_LIMIT_EXCEEDED)
|
||||
@ -402,6 +404,8 @@ public:
|
||||
|
||||
bool ok() const { return _code == ErrorCode::OK; }
|
||||
|
||||
bool is_blocked_by_sc() const { return _code == ErrorCode::PIP_WAIT_FOR_SC; }
|
||||
|
||||
bool is_io_error() const {
|
||||
return ErrorCode::IO_ERROR == _code || ErrorCode::READ_UNENOUGH == _code ||
|
||||
ErrorCode::CHECKSUM_ERROR == _code || ErrorCode::FILE_DATA_ERROR == _code ||
|
||||
|
||||
@ -43,56 +43,6 @@
|
||||
#include "vec/runtime/shared_hash_table_controller.h"
|
||||
|
||||
namespace doris {
|
||||
// PrimitiveType->TExprNodeType
|
||||
// TODO: use constexpr if we use c++14
|
||||
TExprNodeType::type get_expr_node_type(PrimitiveType type) {
|
||||
switch (type) {
|
||||
case TYPE_BOOLEAN:
|
||||
return TExprNodeType::BOOL_LITERAL;
|
||||
|
||||
case TYPE_TINYINT:
|
||||
case TYPE_SMALLINT:
|
||||
case TYPE_INT:
|
||||
case TYPE_BIGINT:
|
||||
return TExprNodeType::INT_LITERAL;
|
||||
|
||||
case TYPE_LARGEINT:
|
||||
return TExprNodeType::LARGE_INT_LITERAL;
|
||||
break;
|
||||
|
||||
case TYPE_NULL:
|
||||
return TExprNodeType::NULL_LITERAL;
|
||||
|
||||
case TYPE_FLOAT:
|
||||
case TYPE_DOUBLE:
|
||||
case TYPE_TIME:
|
||||
case TYPE_TIMEV2:
|
||||
return TExprNodeType::FLOAT_LITERAL;
|
||||
break;
|
||||
|
||||
case TYPE_DECIMAL32:
|
||||
case TYPE_DECIMAL64:
|
||||
case TYPE_DECIMAL128I:
|
||||
case TYPE_DECIMALV2:
|
||||
return TExprNodeType::DECIMAL_LITERAL;
|
||||
|
||||
case TYPE_DATETIME:
|
||||
case TYPE_DATEV2:
|
||||
case TYPE_DATETIMEV2:
|
||||
return TExprNodeType::DATE_LITERAL;
|
||||
|
||||
case TYPE_CHAR:
|
||||
case TYPE_VARCHAR:
|
||||
case TYPE_HLL:
|
||||
case TYPE_OBJECT:
|
||||
case TYPE_STRING:
|
||||
return TExprNodeType::STRING_LITERAL;
|
||||
|
||||
default:
|
||||
DCHECK(false) << "Invalid type.";
|
||||
return TExprNodeType::NULL_LITERAL;
|
||||
}
|
||||
}
|
||||
|
||||
// PrimitiveType-> PColumnType
|
||||
// TODO: use constexpr if we use c++14
|
||||
|
||||
@ -25,13 +25,21 @@ namespace doris::pipeline {
|
||||
OPERATOR_CODE_GENERATOR(ScanOperator, SourceOperator)
|
||||
|
||||
bool ScanOperator::can_read() {
|
||||
if (_node->_eos || _node->_scanner_ctx->done() || _node->_scanner_ctx->no_schedule()) {
|
||||
// _eos: need eos
|
||||
// _scanner_ctx->done(): need finish
|
||||
// _scanner_ctx->no_schedule(): should schedule _scanner_ctx
|
||||
return true;
|
||||
if (!_node->_opened) {
|
||||
if (_node->_should_create_scanner || _node->ready_to_open()) {
|
||||
return true;
|
||||
} else {
|
||||
return false;
|
||||
}
|
||||
} else {
|
||||
return !_node->_scanner_ctx->empty_in_queue(); // there are some blocks to process
|
||||
if (_node->_eos || _node->_scanner_ctx->done() || _node->_scanner_ctx->no_schedule()) {
|
||||
// _eos: need eos
|
||||
// _scanner_ctx->done(): need finish
|
||||
// _scanner_ctx->no_schedule(): should schedule _scanner_ctx
|
||||
return true;
|
||||
} else {
|
||||
return _node->ready_to_read(); // there are some blocks to process
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@ -344,6 +344,9 @@ Status PipelineFragmentContext::prepare(const doris::TPipelineFragmentParams& re
|
||||
if (request.__isset.load_job_id) {
|
||||
_runtime_state->set_load_job_id(request.load_job_id);
|
||||
}
|
||||
if (request.__isset.shared_scan_opt) {
|
||||
_runtime_state->set_shared_scan_opt(request.shared_scan_opt);
|
||||
}
|
||||
|
||||
if (request.query_options.__isset.is_report_success) {
|
||||
fragment_context->set_is_report_success(request.query_options.is_report_success);
|
||||
|
||||
@ -135,6 +135,9 @@ Status PipelineTask::execute(bool* eos) {
|
||||
if (st.is<ErrorCode::PIP_WAIT_FOR_RF>()) {
|
||||
set_state(PipelineTaskState::BLOCKED_FOR_RF);
|
||||
return Status::OK();
|
||||
} else if (st.is<ErrorCode::PIP_WAIT_FOR_SC>()) {
|
||||
set_state(PipelineTaskState::BLOCKED_FOR_SOURCE);
|
||||
return Status::OK();
|
||||
}
|
||||
RETURN_IF_ERROR(st);
|
||||
}
|
||||
|
||||
@ -33,6 +33,7 @@
|
||||
#include "util/threadpool.h"
|
||||
#include "vec/exec/scan/scanner_scheduler.h"
|
||||
#include "vec/runtime/shared_hash_table_controller.h"
|
||||
#include "vec/runtime/shared_scanner_controller.h"
|
||||
|
||||
namespace doris {
|
||||
|
||||
@ -46,6 +47,7 @@ public:
|
||||
: fragment_num(total_fragment_num), timeout_second(-1), _exec_env(exec_env) {
|
||||
_start_time = DateTimeValue::local_time();
|
||||
_shared_hash_table_controller.reset(new vectorized::SharedHashTableController());
|
||||
_shared_scanner_controller.reset(new vectorized::SharedScannerController());
|
||||
}
|
||||
|
||||
~QueryFragmentsCtx() {
|
||||
@ -122,6 +124,10 @@ public:
|
||||
return _shared_hash_table_controller;
|
||||
}
|
||||
|
||||
std::shared_ptr<vectorized::SharedScannerController> get_shared_scanner_controller() {
|
||||
return _shared_scanner_controller;
|
||||
}
|
||||
|
||||
vectorized::RuntimePredicate& get_runtime_predicate() { return _runtime_predicate; }
|
||||
|
||||
public:
|
||||
@ -167,6 +173,7 @@ private:
|
||||
std::atomic<bool> _is_cancelled {false};
|
||||
|
||||
std::shared_ptr<vectorized::SharedHashTableController> _shared_hash_table_controller;
|
||||
std::shared_ptr<vectorized::SharedScannerController> _shared_scanner_controller;
|
||||
vectorized::RuntimePredicate _runtime_predicate;
|
||||
};
|
||||
|
||||
|
||||
@ -88,6 +88,7 @@ public:
|
||||
bool abort_on_default_limit_exceeded() const {
|
||||
return _query_options.abort_on_default_limit_exceeded;
|
||||
}
|
||||
int query_parallel_instance_num() const { return _query_options.parallel_instance; }
|
||||
int max_errors() const { return _query_options.max_errors; }
|
||||
int query_timeout() const { return _query_options.query_timeout; }
|
||||
int insert_timeout() const { return _query_options.insert_timeout; }
|
||||
@ -200,6 +201,10 @@ public:
|
||||
|
||||
int64_t load_job_id() const { return _load_job_id; }
|
||||
|
||||
void set_shared_scan_opt(bool shared_scan_opt) { _shared_scan_opt = shared_scan_opt; }
|
||||
|
||||
bool shared_scan_opt() const { return _shared_scan_opt; }
|
||||
|
||||
const std::string get_error_log_file_path() const { return _error_log_file_path; }
|
||||
|
||||
// append error msg and error line to file when loading data.
|
||||
@ -453,6 +458,7 @@ private:
|
||||
std::string _db_name;
|
||||
std::string _load_dir;
|
||||
int64_t _load_job_id;
|
||||
bool _shared_scan_opt = false;
|
||||
|
||||
// mini load
|
||||
int64_t _normal_row_number;
|
||||
|
||||
@ -320,8 +320,7 @@ set(VEC_FILES
|
||||
if (WITH_MYSQL)
|
||||
set(VEC_FILES
|
||||
${VEC_FILES}
|
||||
exec/scan/mysql_scanner.cpp
|
||||
)
|
||||
exec/scan/mysql_scanner.cpp)
|
||||
endif ()
|
||||
|
||||
add_library(Vec STATIC
|
||||
|
||||
@ -411,10 +411,9 @@ Status NewOlapScanNode::_init_scanners(std::list<VScanner*>* scanners) {
|
||||
TabletSharedPtr tablet =
|
||||
StorageEngine::instance()->tablet_manager()->get_tablet(tablet_id, true, &err);
|
||||
if (tablet == nullptr) {
|
||||
std::stringstream ss;
|
||||
ss << "failed to get tablet: " << tablet_id << ", reason: " << err;
|
||||
LOG(WARNING) << ss.str();
|
||||
return Status::InternalError(ss.str());
|
||||
auto err_str = fmt::format("failed to get tablet: {}, reason: {}", tablet_id, err);
|
||||
LOG(WARNING) << err_str;
|
||||
return Status::InternalError(err_str);
|
||||
}
|
||||
|
||||
std::vector<std::unique_ptr<doris::OlapScanRange>>* ranges = &cond_ranges;
|
||||
|
||||
@ -33,20 +33,68 @@ public:
|
||||
: vectorized::ScannerContext(state, parent, input_tuple_desc, output_tuple_desc,
|
||||
scanners, limit, max_bytes_in_blocks_queue) {}
|
||||
|
||||
void _update_block_queue_empty() override { _blocks_queue_empty = _blocks_queue.empty(); }
|
||||
Status get_block_from_queue(RuntimeState* state, vectorized::BlockUPtr* block, bool* eos,
|
||||
int id, bool wait = false) override {
|
||||
{
|
||||
std::unique_lock<std::mutex> l(_transfer_lock);
|
||||
if (state->is_cancelled()) {
|
||||
_process_status = Status::Cancelled("cancelled");
|
||||
}
|
||||
|
||||
Status get_block_from_queue(vectorized::BlockUPtr* block, bool* eos,
|
||||
bool wait = false) override {
|
||||
return vectorized::ScannerContext::get_block_from_queue(block, eos, false);
|
||||
if (!_process_status.ok()) {
|
||||
return _process_status;
|
||||
}
|
||||
}
|
||||
|
||||
{
|
||||
std::unique_lock<std::mutex> l(*_queue_mutexs[id]);
|
||||
if (!_blocks_queues[id].empty()) {
|
||||
*block = std::move(_blocks_queues[id].front());
|
||||
_blocks_queues[id].pop_front();
|
||||
return Status::OK();
|
||||
} else {
|
||||
*eos = _is_finished || _should_stop;
|
||||
}
|
||||
}
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
// We should make those method lock free.
|
||||
bool done() override { return _is_finished || _should_stop || _status_error; }
|
||||
bool no_schedule() override { return _num_running_scanners == 0 && _num_scheduling_ctx == 0; }
|
||||
bool empty_in_queue() override { return _blocks_queue_empty; }
|
||||
|
||||
void append_blocks_to_queue(std::vector<vectorized::BlockUPtr>& blocks) override {
|
||||
const int queue_size = _queue_mutexs.size();
|
||||
const int block_size = blocks.size();
|
||||
for (int i = 0; i < queue_size && i < block_size; ++i) {
|
||||
int queue = _next_queue_to_feed;
|
||||
{
|
||||
std::lock_guard<std::mutex> l(*_queue_mutexs[queue]);
|
||||
for (int j = i; j < block_size; j += queue_size) {
|
||||
_blocks_queues[queue].emplace_back(std::move(blocks[j]));
|
||||
}
|
||||
}
|
||||
_next_queue_to_feed = queue + 1 < queue_size ? queue + 1 : 0;
|
||||
}
|
||||
}
|
||||
|
||||
bool empty_in_queue(int id) override {
|
||||
std::unique_lock<std::mutex> l(*_queue_mutexs[id]);
|
||||
return _blocks_queues[id].empty();
|
||||
}
|
||||
|
||||
void set_max_queue_size(int max_queue_size) override {
|
||||
for (int i = 0; i < max_queue_size; ++i) {
|
||||
_blocks_queue_empty.emplace_back(true);
|
||||
_queue_mutexs.emplace_back(new std::mutex);
|
||||
_blocks_queues.emplace_back(std::list<vectorized::BlockUPtr>());
|
||||
}
|
||||
}
|
||||
|
||||
private:
|
||||
std::atomic_bool _blocks_queue_empty = true;
|
||||
int _next_queue_to_feed = 0;
|
||||
std::vector<bool> _blocks_queue_empty;
|
||||
std::vector<std::unique_ptr<std::mutex>> _queue_mutexs;
|
||||
std::vector<std::list<vectorized::BlockUPtr>> _blocks_queues;
|
||||
};
|
||||
} // namespace pipeline
|
||||
} // namespace doris
|
||||
|
||||
@ -23,24 +23,53 @@
|
||||
#include "runtime/runtime_state.h"
|
||||
#include "util/threadpool.h"
|
||||
#include "vec/core/block.h"
|
||||
#include "vec/exec/scan/scanner_scheduler.h"
|
||||
#include "vec/exec/scan/vscan_node.h"
|
||||
#include "vec/exec/scan/vscanner.h"
|
||||
|
||||
namespace doris::vectorized {
|
||||
|
||||
ScannerContext::ScannerContext(doris::RuntimeState* state_, doris::vectorized::VScanNode* parent,
|
||||
const doris::TupleDescriptor* input_tuple_desc,
|
||||
const doris::TupleDescriptor* output_tuple_desc,
|
||||
const std::list<VScanner*>& scanners_, int64_t limit_,
|
||||
int64_t max_bytes_in_blocks_queue_)
|
||||
: _state(state_),
|
||||
_parent(parent),
|
||||
_input_tuple_desc(input_tuple_desc),
|
||||
_output_tuple_desc(output_tuple_desc),
|
||||
_process_status(Status::OK()),
|
||||
_batch_size(state_->batch_size()),
|
||||
limit(limit_),
|
||||
_max_bytes_in_queue(max_bytes_in_blocks_queue_),
|
||||
_scanner_scheduler(state_->exec_env()->scanner_scheduler()),
|
||||
_scanners(scanners_) {
|
||||
ctx_id = UniqueId::gen_uid().to_string();
|
||||
if (_scanners.empty()) {
|
||||
_is_finished = true;
|
||||
}
|
||||
}
|
||||
|
||||
// After init function call, should not access _parent
|
||||
Status ScannerContext::init() {
|
||||
_real_tuple_desc = _input_tuple_desc != nullptr ? _input_tuple_desc : _output_tuple_desc;
|
||||
// 1. Calculate max concurrency
|
||||
// TODO: now the max thread num <= config::doris_scanner_thread_pool_thread_num / 4
|
||||
// should find a more reasonable value.
|
||||
_max_thread_num =
|
||||
std::min(config::doris_scanner_thread_pool_thread_num / 4, (int32_t)_scanners.size());
|
||||
_max_thread_num = _state->shared_scan_opt() ? config::doris_scanner_thread_pool_thread_num
|
||||
: config::doris_scanner_thread_pool_thread_num / 4;
|
||||
_max_thread_num = std::min(_max_thread_num, (int32_t)_scanners.size());
|
||||
// For select * from table limit 10; should just use one thread.
|
||||
if (_parent->should_run_serial()) {
|
||||
_max_thread_num = 1;
|
||||
}
|
||||
|
||||
_scanner_profile = _parent->_scanner_profile;
|
||||
_scanner_sched_counter = _parent->_scanner_sched_counter;
|
||||
_scanner_ctx_sched_counter = _parent->_scanner_ctx_sched_counter;
|
||||
_free_blocks_memory_usage = _parent->_free_blocks_memory_usage;
|
||||
_newly_create_free_blocks_num = _parent->_newly_create_free_blocks_num;
|
||||
_queued_blocks_memory_usage = _parent->_queued_blocks_memory_usage;
|
||||
_scanner_wait_batch_timer = _parent->_scanner_wait_batch_timer;
|
||||
// 2. Calculate how many blocks need to be preallocated.
|
||||
// The calculation logic is as follows:
|
||||
// 1. Assuming that at most M rows can be scanned in one scan(config::doris_scanner_row_num),
|
||||
@ -50,8 +79,8 @@ Status ScannerContext::init() {
|
||||
auto doris_scanner_row_num =
|
||||
limit == -1 ? config::doris_scanner_row_num
|
||||
: std::min(static_cast<int64_t>(config::doris_scanner_row_num), limit);
|
||||
int real_block_size = limit == -1 ? _state->batch_size()
|
||||
: std::min(static_cast<int64_t>(_state->batch_size()), limit);
|
||||
int real_block_size =
|
||||
limit == -1 ? _batch_size : std::min(static_cast<int64_t>(_batch_size), limit);
|
||||
_block_per_scanner = (doris_scanner_row_num + (real_block_size - 1)) / real_block_size;
|
||||
auto pre_alloc_block_count = _max_thread_num * _block_per_scanner;
|
||||
|
||||
@ -64,7 +93,7 @@ Status ScannerContext::init() {
|
||||
free_blocks_memory_usage += block->allocated_bytes();
|
||||
_free_blocks.emplace_back(std::move(block));
|
||||
}
|
||||
_parent->_free_blocks_memory_usage->add(free_blocks_memory_usage);
|
||||
_free_blocks_memory_usage->add(free_blocks_memory_usage);
|
||||
|
||||
#ifndef BE_TEST
|
||||
// 3. get thread token
|
||||
@ -91,20 +120,20 @@ vectorized::BlockUPtr ScannerContext::get_free_block(bool* has_free_block) {
|
||||
if (!_free_blocks.empty()) {
|
||||
auto block = std::move(_free_blocks.back());
|
||||
_free_blocks.pop_back();
|
||||
_parent->_free_blocks_memory_usage->add(-block->allocated_bytes());
|
||||
_free_blocks_memory_usage->add(-block->allocated_bytes());
|
||||
return block;
|
||||
}
|
||||
}
|
||||
*has_free_block = false;
|
||||
|
||||
COUNTER_UPDATE(_parent->_newly_create_free_blocks_num, 1);
|
||||
return std::make_unique<vectorized::Block>(_real_tuple_desc->slots(), _state->batch_size(),
|
||||
COUNTER_UPDATE(_newly_create_free_blocks_num, 1);
|
||||
return std::make_unique<vectorized::Block>(_real_tuple_desc->slots(), _batch_size,
|
||||
true /*ignore invalid slots*/);
|
||||
}
|
||||
|
||||
void ScannerContext::return_free_block(std::unique_ptr<vectorized::Block> block) {
|
||||
block->clear_column_data();
|
||||
_parent->_free_blocks_memory_usage->add(block->allocated_bytes());
|
||||
_free_blocks_memory_usage->add(block->allocated_bytes());
|
||||
std::lock_guard l(_free_blocks_lock);
|
||||
_free_blocks.emplace_back(std::move(block));
|
||||
}
|
||||
@ -117,18 +146,18 @@ void ScannerContext::append_blocks_to_queue(std::vector<vectorized::BlockUPtr>&
|
||||
_blocks_queue.push_back(std::move(b));
|
||||
}
|
||||
blocks.clear();
|
||||
_update_block_queue_empty();
|
||||
_blocks_queue_added_cv.notify_one();
|
||||
_parent->_queued_blocks_memory_usage->add(_cur_bytes_in_queue - old_bytes_in_queue);
|
||||
_queued_blocks_memory_usage->add(_cur_bytes_in_queue - old_bytes_in_queue);
|
||||
}
|
||||
|
||||
bool ScannerContext::empty_in_queue() {
|
||||
std::unique_lock l(_transfer_lock);
|
||||
bool ScannerContext::empty_in_queue(int id) {
|
||||
std::unique_lock<std::mutex> l(_transfer_lock);
|
||||
return _blocks_queue.empty();
|
||||
}
|
||||
|
||||
Status ScannerContext::get_block_from_queue(vectorized::BlockUPtr* block, bool* eos, bool wait) {
|
||||
std::unique_lock l(_transfer_lock);
|
||||
Status ScannerContext::get_block_from_queue(RuntimeState* state, vectorized::BlockUPtr* block,
|
||||
bool* eos, int id, bool wait) {
|
||||
std::unique_lock<std::mutex> l(_transfer_lock);
|
||||
// Normally, the scanner scheduler will schedule ctx.
|
||||
// But when the amount of data in the blocks queue exceeds the upper limit,
|
||||
// the scheduler will stop scheduling.
|
||||
@ -137,18 +166,18 @@ Status ScannerContext::get_block_from_queue(vectorized::BlockUPtr* block, bool*
|
||||
// data can be continuously fetched.
|
||||
if (_has_enough_space_in_blocks_queue() && _num_running_scanners == 0) {
|
||||
_num_scheduling_ctx++;
|
||||
_state->exec_env()->scanner_scheduler()->submit(this);
|
||||
_scanner_scheduler->submit(this);
|
||||
}
|
||||
// Wait for block from queue
|
||||
if (wait) {
|
||||
SCOPED_TIMER(_parent->_scanner_wait_batch_timer);
|
||||
SCOPED_TIMER(_scanner_wait_batch_timer);
|
||||
while (!(!_blocks_queue.empty() || _is_finished || !_process_status.ok() ||
|
||||
_state->is_cancelled())) {
|
||||
state->is_cancelled())) {
|
||||
_blocks_queue_added_cv.wait(l);
|
||||
}
|
||||
}
|
||||
|
||||
if (_state->is_cancelled()) {
|
||||
if (state->is_cancelled()) {
|
||||
_process_status = Status::Cancelled("cancelled");
|
||||
}
|
||||
|
||||
@ -159,10 +188,9 @@ Status ScannerContext::get_block_from_queue(vectorized::BlockUPtr* block, bool*
|
||||
if (!_blocks_queue.empty()) {
|
||||
*block = std::move(_blocks_queue.front());
|
||||
_blocks_queue.pop_front();
|
||||
_update_block_queue_empty();
|
||||
auto block_bytes = (*block)->allocated_bytes();
|
||||
_cur_bytes_in_queue -= block_bytes;
|
||||
_parent->_queued_blocks_memory_usage->add(-block_bytes);
|
||||
_queued_blocks_memory_usage->add(-block_bytes);
|
||||
return Status::OK();
|
||||
} else {
|
||||
*eos = _is_finished;
|
||||
@ -181,9 +209,9 @@ bool ScannerContext::set_status_on_error(const Status& status) {
|
||||
return false;
|
||||
}
|
||||
|
||||
Status ScannerContext::_close_and_clear_scanners() {
|
||||
std::unique_lock l(_scanners_lock);
|
||||
if (_state->enable_profile()) {
|
||||
Status ScannerContext::_close_and_clear_scanners(VScanNode* node, RuntimeState* state) {
|
||||
std::unique_lock<std::mutex> l(_scanners_lock);
|
||||
if (state->enable_profile()) {
|
||||
std::stringstream scanner_statistics;
|
||||
std::stringstream scanner_rows_read;
|
||||
scanner_statistics << "[";
|
||||
@ -207,13 +235,12 @@ Status ScannerContext::_close_and_clear_scanners() {
|
||||
}
|
||||
scanner_statistics << "]";
|
||||
scanner_rows_read << "]";
|
||||
_parent->_scanner_profile->add_info_string("PerScannerRunningTime",
|
||||
scanner_statistics.str());
|
||||
_parent->_scanner_profile->add_info_string("PerScannerRowsRead", scanner_rows_read.str());
|
||||
node->_scanner_profile->add_info_string("PerScannerRunningTime", scanner_statistics.str());
|
||||
node->_scanner_profile->add_info_string("PerScannerRowsRead", scanner_rows_read.str());
|
||||
}
|
||||
// Only unfinished scanners here
|
||||
for (auto scanner : _scanners) {
|
||||
scanner->close(_state);
|
||||
scanner->close(state);
|
||||
// Scanners are in ObjPool in ScanNode,
|
||||
// so no need to delete them here.
|
||||
}
|
||||
@ -221,8 +248,8 @@ Status ScannerContext::_close_and_clear_scanners() {
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
void ScannerContext::clear_and_join() {
|
||||
std::unique_lock l(_transfer_lock);
|
||||
void ScannerContext::clear_and_join(VScanNode* node, RuntimeState* state) {
|
||||
std::unique_lock<std::mutex> l(_transfer_lock);
|
||||
do {
|
||||
if (_num_running_scanners == 0 && _num_scheduling_ctx == 0) {
|
||||
break;
|
||||
@ -239,14 +266,10 @@ void ScannerContext::clear_and_join() {
|
||||
}
|
||||
// Must wait all running scanners stop running.
|
||||
// So that we can make sure to close all scanners.
|
||||
_close_and_clear_scanners();
|
||||
_close_and_clear_scanners(node, state);
|
||||
|
||||
COUNTER_SET(_parent->_scanner_sched_counter, _num_scanner_scheduling);
|
||||
COUNTER_SET(_parent->_scanner_ctx_sched_counter, _num_ctx_scheduling);
|
||||
_blocks_queue.clear();
|
||||
_free_blocks.clear();
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
bool ScannerContext::no_schedule() {
|
||||
@ -273,7 +296,7 @@ void ScannerContext::push_back_scanner_and_reschedule(VScanner* scanner) {
|
||||
|
||||
std::lock_guard l(_transfer_lock);
|
||||
_num_scheduling_ctx++;
|
||||
auto submit_st = _state->exec_env()->scanner_scheduler()->submit(this);
|
||||
auto submit_st = _scanner_scheduler->submit(this);
|
||||
if (!submit_st.ok()) {
|
||||
_num_scheduling_ctx--;
|
||||
}
|
||||
@ -285,11 +308,6 @@ void ScannerContext::push_back_scanner_and_reschedule(VScanner* scanner) {
|
||||
// same scanner.
|
||||
if (scanner->need_to_close() && scanner->set_counted_down() &&
|
||||
(--_num_unfinished_scanners) == 0) {
|
||||
// ATTN: this 2 counters will be set at close() again, which is the final values.
|
||||
// But we set them here because the counter set at close() can not send to FE's profile.
|
||||
// So we set them here, and the counter value may be little less than final values.
|
||||
COUNTER_SET(_parent->_scanner_sched_counter, _num_scanner_scheduling);
|
||||
COUNTER_SET(_parent->_scanner_ctx_sched_counter, _num_ctx_scheduling);
|
||||
_is_finished = true;
|
||||
_blocks_queue_added_cv.notify_one();
|
||||
}
|
||||
@ -306,7 +324,7 @@ void ScannerContext::get_next_batch_of_scanners(std::list<VScanner*>* current_ru
|
||||
if (_has_enough_space_in_blocks_queue()) {
|
||||
// If there are enough space in blocks queue,
|
||||
// the scanner number depends on the _free_blocks numbers
|
||||
std::lock_guard l(_free_blocks_lock);
|
||||
std::lock_guard f(_free_blocks_lock);
|
||||
thread_slot_num = _free_blocks.size() / _block_per_scanner;
|
||||
thread_slot_num += (_free_blocks.size() % _block_per_scanner != 0);
|
||||
thread_slot_num = std::min(thread_slot_num, _max_thread_num - _num_running_scanners);
|
||||
@ -340,7 +358,6 @@ void ScannerContext::get_next_batch_of_scanners(std::list<VScanner*>* current_ru
|
||||
}
|
||||
}
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
} // namespace doris::vectorized
|
||||
|
||||
@ -32,7 +32,6 @@ namespace doris {
|
||||
class PriorityThreadPool;
|
||||
class ThreadPool;
|
||||
class ThreadPoolToken;
|
||||
class ScannerScheduler;
|
||||
|
||||
namespace vectorized {
|
||||
|
||||
@ -51,35 +50,21 @@ class ScannerContext {
|
||||
public:
|
||||
ScannerContext(RuntimeState* state_, VScanNode* parent, const TupleDescriptor* input_tuple_desc,
|
||||
const TupleDescriptor* output_tuple_desc, const std::list<VScanner*>& scanners_,
|
||||
int64_t limit_, int64_t max_bytes_in_blocks_queue_)
|
||||
: _state(state_),
|
||||
_parent(parent),
|
||||
_input_tuple_desc(input_tuple_desc),
|
||||
_output_tuple_desc(output_tuple_desc),
|
||||
_process_status(Status::OK()),
|
||||
limit(limit_),
|
||||
_max_bytes_in_queue(max_bytes_in_blocks_queue_),
|
||||
_scanners(scanners_) {
|
||||
ctx_id = UniqueId::gen_uid().to_string();
|
||||
if (_scanners.empty()) {
|
||||
_is_finished = true;
|
||||
}
|
||||
}
|
||||
int64_t limit_, int64_t max_bytes_in_blocks_queue_);
|
||||
|
||||
virtual ~ScannerContext() = default;
|
||||
|
||||
Status init();
|
||||
|
||||
vectorized::BlockUPtr get_free_block(bool* has_free_block);
|
||||
void return_free_block(std::unique_ptr<vectorized::Block> block);
|
||||
|
||||
// Append blocks from scanners to the blocks queue.
|
||||
void append_blocks_to_queue(std::vector<vectorized::BlockUPtr>& blocks);
|
||||
|
||||
virtual void append_blocks_to_queue(std::vector<vectorized::BlockUPtr>& blocks);
|
||||
// Get next block from blocks queue. Called by ScanNode
|
||||
// Set eos to true if there is no more data to read.
|
||||
// And if eos is true, the block returned must be nullptr.
|
||||
virtual Status get_block_from_queue(vectorized::BlockUPtr* block, bool* eos, bool wait = true);
|
||||
virtual Status get_block_from_queue(RuntimeState* state, vectorized::BlockUPtr* block,
|
||||
bool* eos, int id, bool wait = true);
|
||||
|
||||
// When a scanner complete a scan, this method will be called
|
||||
// to return the scanner to the list for next scheduling.
|
||||
@ -121,7 +106,7 @@ public:
|
||||
|
||||
void get_next_batch_of_scanners(std::list<VScanner*>* current_run);
|
||||
|
||||
void clear_and_join();
|
||||
void clear_and_join(VScanNode* node, RuntimeState* state);
|
||||
|
||||
virtual bool no_schedule();
|
||||
|
||||
@ -129,12 +114,14 @@ public:
|
||||
|
||||
RuntimeState* state() { return _state; }
|
||||
|
||||
void incr_num_ctx_scheduling(int64_t num) { _num_ctx_scheduling += num; }
|
||||
void incr_num_scanner_scheduling(int64_t num) { _num_scanner_scheduling += num; }
|
||||
void incr_num_ctx_scheduling(int64_t num) { _scanner_ctx_sched_counter->update(num); }
|
||||
void incr_num_scanner_scheduling(int64_t num) { _scanner_sched_counter->update(num); }
|
||||
|
||||
VScanNode* parent() { return _parent; }
|
||||
|
||||
virtual bool empty_in_queue();
|
||||
virtual bool empty_in_queue(int id);
|
||||
|
||||
virtual void set_max_queue_size(int max_queue_size) {};
|
||||
|
||||
// the unique id of this context
|
||||
std::string ctx_id;
|
||||
@ -143,15 +130,12 @@ public:
|
||||
std::vector<bthread_t> _btids;
|
||||
|
||||
private:
|
||||
Status _close_and_clear_scanners();
|
||||
Status _close_and_clear_scanners(VScanNode* node, RuntimeState* state);
|
||||
|
||||
inline bool _has_enough_space_in_blocks_queue() const {
|
||||
return _cur_bytes_in_queue < _max_bytes_in_queue / 2;
|
||||
}
|
||||
|
||||
// do nothing here, we only do update on pip_scanner_context
|
||||
virtual void _update_block_queue_empty() {}
|
||||
|
||||
protected:
|
||||
RuntimeState* _state;
|
||||
VScanNode* _parent;
|
||||
@ -198,6 +182,7 @@ protected:
|
||||
doris::Mutex _free_blocks_lock;
|
||||
std::vector<vectorized::BlockUPtr> _free_blocks;
|
||||
|
||||
int _batch_size;
|
||||
// The limit from SQL's limit clause
|
||||
int64_t limit;
|
||||
|
||||
@ -221,6 +206,7 @@ protected:
|
||||
// The max limit bytes of blocks in blocks queue
|
||||
int64_t _max_bytes_in_queue;
|
||||
|
||||
doris::vectorized::ScannerScheduler* _scanner_scheduler;
|
||||
// List "scanners" saves all "unfinished" scanners.
|
||||
// The scanner scheduler will pop scanners from this list, run scanner,
|
||||
// and then if the scanner is not finished, will be pushed back to this list.
|
||||
@ -230,8 +216,13 @@ protected:
|
||||
std::vector<int64_t> _finished_scanner_runtime;
|
||||
std::vector<int64_t> _finished_scanner_rows_read;
|
||||
|
||||
int64_t _num_ctx_scheduling = 0;
|
||||
int64_t _num_scanner_scheduling = 0;
|
||||
std::shared_ptr<RuntimeProfile> _scanner_profile;
|
||||
RuntimeProfile::Counter* _scanner_sched_counter = nullptr;
|
||||
RuntimeProfile::Counter* _scanner_ctx_sched_counter = nullptr;
|
||||
RuntimeProfile::HighWaterMarkCounter* _free_blocks_memory_usage = nullptr;
|
||||
RuntimeProfile::HighWaterMarkCounter* _queued_blocks_memory_usage = nullptr;
|
||||
RuntimeProfile::Counter* _newly_create_free_blocks_num = nullptr;
|
||||
RuntimeProfile::Counter* _scanner_wait_batch_timer = nullptr;
|
||||
};
|
||||
} // namespace vectorized
|
||||
} // namespace doris
|
||||
|
||||
@ -66,6 +66,8 @@ static bool ignore_cast(SlotDescriptor* slot, VExpr* expr) {
|
||||
Status VScanNode::init(const TPlanNode& tnode, RuntimeState* state) {
|
||||
RETURN_IF_ERROR(ExecNode::init(tnode, state));
|
||||
_state = state;
|
||||
_is_pipeline_scan = state->enable_pipeline_exec();
|
||||
_shared_scan_opt = state->shared_scan_opt();
|
||||
|
||||
const TQueryOptions& query_options = state->query_options();
|
||||
if (query_options.__isset.max_scan_key_num) {
|
||||
@ -92,6 +94,21 @@ Status VScanNode::prepare(RuntimeState* state) {
|
||||
for (auto& rf_ctx : _runtime_filter_ctxs) {
|
||||
rf_ctx.runtime_filter->init_profile(_runtime_profile.get());
|
||||
}
|
||||
|
||||
if (_is_pipeline_scan) {
|
||||
if (_shared_scan_opt) {
|
||||
_shared_scanner_controller =
|
||||
state->get_query_fragments_ctx()->get_shared_scanner_controller();
|
||||
auto [should_create_scanner, queue_id] =
|
||||
_shared_scanner_controller->should_build_scanner_and_queue_id(id());
|
||||
_should_create_scanner = should_create_scanner;
|
||||
_context_queue_id = queue_id;
|
||||
} else {
|
||||
_should_create_scanner = true;
|
||||
_context_queue_id = 0;
|
||||
}
|
||||
}
|
||||
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
@ -113,18 +130,37 @@ Status VScanNode::alloc_resource(RuntimeState* state) {
|
||||
RETURN_IF_ERROR(ExecNode::alloc_resource(state));
|
||||
RETURN_IF_ERROR(_acquire_runtime_filter());
|
||||
RETURN_IF_ERROR(_process_conjuncts());
|
||||
if (_eos) {
|
||||
return Status::OK();
|
||||
|
||||
if (_is_pipeline_scan) {
|
||||
if (_should_create_scanner) {
|
||||
auto status = !_eos ? _prepare_scanners() : Status::OK();
|
||||
if (_scanner_ctx) {
|
||||
DCHECK(!_eos && _num_scanners->value() > 0);
|
||||
_scanner_ctx->set_max_queue_size(
|
||||
_shared_scan_opt ? std::max(state->query_parallel_instance_num(), 1) : 1);
|
||||
RETURN_IF_ERROR(
|
||||
_state->exec_env()->scanner_scheduler()->submit(_scanner_ctx.get()));
|
||||
}
|
||||
if (_shared_scan_opt) {
|
||||
_shared_scanner_controller->set_scanner_context(id(),
|
||||
_eos ? nullptr : _scanner_ctx);
|
||||
}
|
||||
RETURN_IF_ERROR(status);
|
||||
} else if (_shared_scanner_controller->scanner_context_is_ready(id())) {
|
||||
_scanner_ctx = _shared_scanner_controller->get_scanner_context(id());
|
||||
if (!_scanner_ctx) {
|
||||
_eos = true;
|
||||
}
|
||||
} else {
|
||||
return Status::WaitForScannerContext("Need wait for scanner context create");
|
||||
}
|
||||
} else {
|
||||
RETURN_IF_ERROR(!_eos ? _prepare_scanners() : Status::OK());
|
||||
if (_scanner_ctx) {
|
||||
RETURN_IF_ERROR(_state->exec_env()->scanner_scheduler()->submit(_scanner_ctx.get()));
|
||||
}
|
||||
}
|
||||
|
||||
std::list<VScanner*> scanners;
|
||||
RETURN_IF_ERROR(_init_scanners(&scanners));
|
||||
if (scanners.empty()) {
|
||||
_eos = true;
|
||||
} else {
|
||||
COUNTER_SET(_num_scanners, static_cast<int64_t>(scanners.size()));
|
||||
RETURN_IF_ERROR(_start_scanners(scanners));
|
||||
}
|
||||
RETURN_IF_CANCELLED(state);
|
||||
_opened = true;
|
||||
return Status::OK();
|
||||
@ -163,7 +199,7 @@ Status VScanNode::get_next(RuntimeState* state, vectorized::Block* block, bool*
|
||||
}
|
||||
|
||||
vectorized::BlockUPtr scan_block = nullptr;
|
||||
RETURN_IF_ERROR(_scanner_ctx->get_block_from_queue(&scan_block, eos));
|
||||
RETURN_IF_ERROR(_scanner_ctx->get_block_from_queue(state, &scan_block, eos, _context_queue_id));
|
||||
if (*eos) {
|
||||
DCHECK(scan_block == nullptr);
|
||||
return Status::OK();
|
||||
@ -184,12 +220,6 @@ Status VScanNode::get_next(RuntimeState* state, vectorized::Block* block, bool*
|
||||
|
||||
Status VScanNode::_init_profile() {
|
||||
// 1. counters for scan node
|
||||
auto* memory_usage = _runtime_profile->create_child("MemoryUsage", true, true);
|
||||
_runtime_profile->add_child(memory_usage, false, nullptr);
|
||||
_queued_blocks_memory_usage =
|
||||
memory_usage->AddHighWaterMarkCounter("QueuedBlocks", TUnit::BYTES);
|
||||
_free_blocks_memory_usage = memory_usage->AddHighWaterMarkCounter("FreeBlocks", TUnit::BYTES);
|
||||
|
||||
_rows_read_counter = ADD_COUNTER(_runtime_profile, "RowsRead", TUnit::UNIT);
|
||||
_total_throughput_counter =
|
||||
runtime_profile()->add_rate_counter("TotalReadThroughput", _rows_read_counter);
|
||||
@ -201,30 +231,36 @@ Status VScanNode::_init_profile() {
|
||||
_scanner_profile.reset(new RuntimeProfile("VScanner"));
|
||||
runtime_profile()->add_child(_scanner_profile.get(), true, nullptr);
|
||||
|
||||
auto* memory_usage = _scanner_profile->create_child("MemoryUsage", true, true);
|
||||
_runtime_profile->add_child(memory_usage, false, nullptr);
|
||||
_queued_blocks_memory_usage =
|
||||
memory_usage->AddHighWaterMarkCounter("QueuedBlocks", TUnit::BYTES);
|
||||
_free_blocks_memory_usage = memory_usage->AddHighWaterMarkCounter("FreeBlocks", TUnit::BYTES);
|
||||
_newly_create_free_blocks_num =
|
||||
ADD_COUNTER(_scanner_profile, "NewlyCreateFreeBlocksNum", TUnit::UNIT);
|
||||
// time of transfer thread to wait for block from scan thread
|
||||
_scanner_wait_batch_timer = ADD_TIMER(_scanner_profile, "ScannerBatchWaitTime");
|
||||
_scanner_sched_counter = ADD_COUNTER(_scanner_profile, "ScannerSchedCount", TUnit::UNIT);
|
||||
_scanner_ctx_sched_counter = ADD_COUNTER(_scanner_profile, "ScannerCtxSchedCount", TUnit::UNIT);
|
||||
|
||||
_scan_timer = ADD_TIMER(_scanner_profile, "ScannerGetBlockTime");
|
||||
_scan_cpu_timer = ADD_TIMER(_scanner_profile, "ScannerCpuTime");
|
||||
_prefilter_timer = ADD_TIMER(_scanner_profile, "ScannerPrefilterTime");
|
||||
_convert_block_timer = ADD_TIMER(_scanner_profile, "ScannerConvertBlockTime");
|
||||
_filter_timer = ADD_TIMER(_scanner_profile, "ScannerFilterTime");
|
||||
|
||||
_scanner_sched_counter = ADD_COUNTER(_runtime_profile, "ScannerSchedCount", TUnit::UNIT);
|
||||
_scanner_ctx_sched_counter = ADD_COUNTER(_runtime_profile, "ScannerCtxSchedCount", TUnit::UNIT);
|
||||
// time of transfer thread to wait for block from scan thread
|
||||
_scanner_wait_batch_timer = ADD_TIMER(_runtime_profile, "ScannerBatchWaitTime");
|
||||
// time of scan thread to wait for worker thread of the thread pool
|
||||
_scanner_wait_worker_timer = ADD_TIMER(_runtime_profile, "ScannerWorkerWaitTime");
|
||||
|
||||
_pre_alloc_free_blocks_num =
|
||||
ADD_COUNTER(_runtime_profile, "PreAllocFreeBlocksNum", TUnit::UNIT);
|
||||
_newly_create_free_blocks_num =
|
||||
ADD_COUNTER(_runtime_profile, "NewlyCreateFreeBlocksNum", TUnit::UNIT);
|
||||
_max_scanner_thread_num = ADD_COUNTER(_runtime_profile, "MaxScannerThreadNum", TUnit::UNIT);
|
||||
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
Status VScanNode::_start_scanners(const std::list<VScanner*>& scanners) {
|
||||
if (_state->enable_pipeline_exec()) {
|
||||
if (_is_pipeline_scan) {
|
||||
_scanner_ctx.reset(new pipeline::PipScannerContext(_state, this, _input_tuple_desc,
|
||||
_output_tuple_desc, scanners, limit(),
|
||||
_state->query_options().mem_limit / 20));
|
||||
@ -234,7 +270,6 @@ Status VScanNode::_start_scanners(const std::list<VScanner*>& scanners) {
|
||||
_state->query_options().mem_limit / 20));
|
||||
}
|
||||
RETURN_IF_ERROR(_scanner_ctx->init());
|
||||
RETURN_IF_ERROR(_state->exec_env()->scanner_scheduler()->submit(_scanner_ctx.get()));
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
@ -374,10 +409,12 @@ Status VScanNode::close(RuntimeState* state) {
|
||||
void VScanNode::release_resource(RuntimeState* state) {
|
||||
START_AND_SCOPE_SPAN(state->get_tracer(), span, "VScanNode::release_resource");
|
||||
if (_scanner_ctx.get()) {
|
||||
// stop and wait the scanner scheduler to be done
|
||||
// _scanner_ctx may not be created for some short circuit case.
|
||||
_scanner_ctx->set_should_stop();
|
||||
_scanner_ctx->clear_and_join();
|
||||
if (!state->enable_pipeline_exec() || _should_create_scanner) {
|
||||
// stop and wait the scanner scheduler to be done
|
||||
// _scanner_ctx may not be created for some short circuit case.
|
||||
_scanner_ctx->set_should_stop();
|
||||
_scanner_ctx->clear_and_join(this, state);
|
||||
}
|
||||
}
|
||||
|
||||
for (auto& ctx : _runtime_filter_ctxs) {
|
||||
@ -1318,4 +1355,16 @@ VScanNode::PushDownType VScanNode::_should_push_down_in_predicate(VInPredicate*
|
||||
return PushDownType::ACCEPTABLE;
|
||||
}
|
||||
|
||||
Status VScanNode::_prepare_scanners() {
|
||||
std::list<VScanner*> scanners;
|
||||
RETURN_IF_ERROR(_init_scanners(&scanners));
|
||||
if (scanners.empty()) {
|
||||
_eos = true;
|
||||
} else {
|
||||
COUNTER_SET(_num_scanners, static_cast<int64_t>(scanners.size()));
|
||||
RETURN_IF_ERROR(_start_scanners(scanners));
|
||||
}
|
||||
|
||||
return Status::OK();
|
||||
}
|
||||
} // namespace doris::vectorized
|
||||
|
||||
@ -103,6 +103,8 @@ public:
|
||||
Status try_close();
|
||||
|
||||
bool should_run_serial() const { return _should_run_serial; }
|
||||
bool ready_to_open() { return _shared_scanner_controller->scanner_context_is_ready(id()); }
|
||||
bool ready_to_read() { return !_scanner_ctx->empty_in_queue(_context_queue_id); }
|
||||
|
||||
enum class PushDownType {
|
||||
// The predicate can not be pushed down to data source
|
||||
@ -178,8 +180,12 @@ protected:
|
||||
// Only predicate on key column can be pushed down.
|
||||
virtual bool _is_key_column(const std::string& col_name) { return false; }
|
||||
|
||||
Status _prepare_scanners();
|
||||
|
||||
protected:
|
||||
RuntimeState* _state;
|
||||
bool _is_pipeline_scan = false;
|
||||
bool _shared_scan_opt = false;
|
||||
// For load scan node, there should be both input and output tuple descriptor.
|
||||
// For query scan node, there is only output_tuple_desc.
|
||||
TupleId _input_tuple_id = -1;
|
||||
@ -267,7 +273,11 @@ protected:
|
||||
int64_t _limit_per_scanner = -1;
|
||||
|
||||
protected:
|
||||
std::unique_ptr<RuntimeProfile> _scanner_profile;
|
||||
std::shared_ptr<vectorized::SharedScannerController> _shared_scanner_controller;
|
||||
bool _should_create_scanner = false;
|
||||
int _context_queue_id = -1;
|
||||
|
||||
std::shared_ptr<RuntimeProfile> _scanner_profile;
|
||||
|
||||
// rows read from the scanner (including those discarded by (pre)filters)
|
||||
RuntimeProfile::Counter* _rows_read_counter;
|
||||
|
||||
@ -417,7 +417,7 @@ struct UnixTimeStampDateImpl {
|
||||
static Status execute_impl(FunctionContext* context, Block& block,
|
||||
const ColumnNumbers& arguments, size_t result,
|
||||
size_t input_rows_count) {
|
||||
const ColumnPtr col_source = block.get_by_position(arguments[0]).column;
|
||||
const ColumnPtr& col_source = block.get_by_position(arguments[0]).column;
|
||||
auto col_result = ColumnVector<Int32>::create();
|
||||
auto null_map = ColumnVector<UInt8>::create();
|
||||
auto& col_result_data = col_result->get_data();
|
||||
|
||||
@ -85,4 +85,4 @@ private:
|
||||
};
|
||||
|
||||
} // namespace vectorized
|
||||
} // namespace doris
|
||||
} // namespace doris
|
||||
|
||||
69
be/src/vec/runtime/shared_scanner_controller.h
Normal file
69
be/src/vec/runtime/shared_scanner_controller.h
Normal file
@ -0,0 +1,69 @@
|
||||
// Licensed to the Apache Software Foundation (ASF) under one
|
||||
// or more contributor license agreements. See the NOTICE file
|
||||
// distributed with this work for additional information
|
||||
// regarding copyright ownership. The ASF licenses this file
|
||||
// to you under the Apache License, Version 2.0 (the
|
||||
// "License"); you may not use this file except in compliance
|
||||
// with the License. You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing,
|
||||
// software distributed under the License is distributed on an
|
||||
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
// KIND, either express or implied. See the License for the
|
||||
// specific language governing permissions and limitations
|
||||
// under the License.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <condition_variable>
|
||||
#include <map>
|
||||
#include <memory>
|
||||
#include <mutex>
|
||||
#include <vector>
|
||||
|
||||
#include "vec/core/block.h"
|
||||
#include "vec/exec/scan/scanner_context.h"
|
||||
|
||||
namespace doris::vectorized {
|
||||
|
||||
class SharedScannerController {
|
||||
public:
|
||||
std::pair<bool, int> should_build_scanner_and_queue_id(int my_node_id) {
|
||||
std::lock_guard<std::mutex> lock(_mutex);
|
||||
auto it = _scanner_parallel.find(my_node_id);
|
||||
|
||||
if (it == _scanner_parallel.cend()) {
|
||||
_scanner_parallel.insert({my_node_id, 0});
|
||||
return {true, 0};
|
||||
} else {
|
||||
auto queue_id = it->second;
|
||||
_scanner_parallel[my_node_id] = queue_id + 1;
|
||||
return {false, queue_id + 1};
|
||||
}
|
||||
}
|
||||
|
||||
void set_scanner_context(int my_node_id,
|
||||
const std::shared_ptr<ScannerContext> scanner_context) {
|
||||
std::lock_guard<std::mutex> lock(_mutex);
|
||||
_scanner_context.insert({my_node_id, scanner_context});
|
||||
}
|
||||
|
||||
bool scanner_context_is_ready(int my_node_id) {
|
||||
std::lock_guard<std::mutex> lock(_mutex);
|
||||
return _scanner_context.find(my_node_id) != _scanner_context.end();
|
||||
}
|
||||
|
||||
std::shared_ptr<ScannerContext> get_scanner_context(int my_node_id) {
|
||||
std::lock_guard<std::mutex> lock(_mutex);
|
||||
return _scanner_context[my_node_id];
|
||||
}
|
||||
|
||||
private:
|
||||
std::mutex _mutex;
|
||||
std::map<int /*node id*/, int /*parallel*/> _scanner_parallel;
|
||||
std::map<int /*node id*/, std::shared_ptr<ScannerContext>> _scanner_context;
|
||||
};
|
||||
|
||||
} // namespace doris::vectorized
|
||||
@ -44,6 +44,7 @@ import org.apache.doris.nereids.trees.plans.physical.PhysicalStorageLayerAggrega
|
||||
import org.apache.doris.nereids.trees.plans.physical.PhysicalTVFRelation;
|
||||
import org.apache.doris.nereids.trees.plans.visitor.PlanVisitor;
|
||||
import org.apache.doris.nereids.util.JoinUtils;
|
||||
import org.apache.doris.qe.ConnectContext;
|
||||
|
||||
import com.google.common.base.Preconditions;
|
||||
import com.google.common.collect.Maps;
|
||||
@ -218,7 +219,9 @@ public class ChildOutputPropertyDeriver extends PlanVisitor<PhysicalProperties,
|
||||
@Override
|
||||
public PhysicalProperties visitPhysicalOlapScan(PhysicalOlapScan olapScan, PlanContext context) {
|
||||
// TODO: find a better way to handle both tablet num == 1 and colocate table together in future
|
||||
if (!olapScan.getTable().isColocateTable() && olapScan.getScanTabletNum() == 1) {
|
||||
if (!olapScan.getTable().isColocateTable() && olapScan.getScanTabletNum() == 1
|
||||
&& (!ConnectContext.get().getSessionVariable().enablePipelineEngine()
|
||||
|| ConnectContext.get().getSessionVariable().getParallelExecInstanceNum() == 1)) {
|
||||
return PhysicalProperties.GATHER;
|
||||
} else if (olapScan.getDistributionSpec() instanceof DistributionSpecHash) {
|
||||
return PhysicalProperties.createHash((DistributionSpecHash) olapScan.getDistributionSpec());
|
||||
|
||||
@ -1118,6 +1118,9 @@ public class OlapScanNode extends ScanNode {
|
||||
|
||||
@Override
|
||||
public int getNumInstances() {
|
||||
if (ConnectContext.get().getSessionVariable().enablePipelineEngine()) {
|
||||
return ConnectContext.get().getSessionVariable().getParallelExecInstanceNum();
|
||||
}
|
||||
return result.size();
|
||||
}
|
||||
|
||||
|
||||
@ -1622,6 +1622,7 @@ public class Coordinator {
|
||||
bucketShuffleJoinController.computeInstanceParam(fragment.getFragmentId(),
|
||||
parallelExecInstanceNum, params);
|
||||
} else {
|
||||
params.sharedScanOpt = true;
|
||||
// case A
|
||||
for (Entry<TNetworkAddress, Map<Integer, List<TScanRangeParams>>> entry : fragmentExecParamsMap.get(
|
||||
fragment.getFragmentId()).scanRangeAssignment.entrySet()) {
|
||||
@ -1630,13 +1631,22 @@ public class Coordinator {
|
||||
|
||||
for (Integer planNodeId : value.keySet()) {
|
||||
List<TScanRangeParams> perNodeScanRanges = value.get(planNodeId);
|
||||
int expectedInstanceNum = 1;
|
||||
if (parallelExecInstanceNum > 1) {
|
||||
//the scan instance num should not larger than the tablets num
|
||||
expectedInstanceNum = Math.min(perNodeScanRanges.size(), parallelExecInstanceNum);
|
||||
List<List<TScanRangeParams>> perInstanceScanRanges = Lists.newArrayList();
|
||||
if (!enablePipelineEngine) {
|
||||
int expectedInstanceNum = 1;
|
||||
if (parallelExecInstanceNum > 1) {
|
||||
//the scan instance num should not larger than the tablets num
|
||||
expectedInstanceNum = Math.min(perNodeScanRanges.size(), parallelExecInstanceNum);
|
||||
}
|
||||
perInstanceScanRanges = ListUtil.splitBySize(perNodeScanRanges,
|
||||
expectedInstanceNum);
|
||||
} else {
|
||||
int expectedInstanceNum = Math.min(parallelExecInstanceNum,
|
||||
leftMostNode.getNumInstances());
|
||||
for (int j = 0; j < Math.max(expectedInstanceNum, 1); j++) {
|
||||
perInstanceScanRanges.add(perNodeScanRanges);
|
||||
}
|
||||
}
|
||||
List<List<TScanRangeParams>> perInstanceScanRanges = ListUtil.splitBySize(perNodeScanRanges,
|
||||
expectedInstanceNum);
|
||||
|
||||
LOG.debug("scan range number per instance is: {}", perInstanceScanRanges.size());
|
||||
|
||||
@ -3034,6 +3044,8 @@ public class Coordinator {
|
||||
public List<FInstanceExecParam> instanceExecParams = Lists.newArrayList();
|
||||
public FragmentScanRangeAssignment scanRangeAssignment = new FragmentScanRangeAssignment();
|
||||
|
||||
public boolean sharedScanOpt = false;
|
||||
|
||||
public FragmentExecParams(PlanFragment fragment) {
|
||||
this.fragment = fragment;
|
||||
}
|
||||
@ -3125,6 +3137,7 @@ public class Coordinator {
|
||||
fragment.isTransferQueryStatisticsWithEveryBatch());
|
||||
params.setFragment(fragment.toThrift());
|
||||
params.setLocalParams(Lists.newArrayList());
|
||||
params.setSharedScanOpt(sharedScanOpt);
|
||||
res.put(instanceExecParam.host, params);
|
||||
}
|
||||
TPipelineFragmentParams params = res.get(instanceExecParam.host);
|
||||
|
||||
@ -1593,6 +1593,7 @@ public class SessionVariable implements Serializable, Writable {
|
||||
tResult.setCodegenLevel(codegenLevel);
|
||||
tResult.setBeExecVersion(Config.be_exec_version);
|
||||
tResult.setEnablePipelineEngine(enablePipelineEngine);
|
||||
tResult.setParallelInstance(parallelExecInstanceNum);
|
||||
tResult.setReturnObjectDataAsBinary(returnObjectDataAsBinary);
|
||||
tResult.setTrimTailingSpacesForExternalTableQuery(trimTailingSpacesForExternalTableQuery);
|
||||
tResult.setEnableShareHashTableForBroadcastJoin(enableShareHashTableForBroadcastJoin);
|
||||
|
||||
@ -201,9 +201,12 @@ struct TQueryOptions {
|
||||
|
||||
// For debug purpose, skip delete bitmap when reading data
|
||||
63: optional bool skip_delete_bitmap = false
|
||||
|
||||
64: optional bool dry_run_query = false
|
||||
|
||||
65: optional bool enable_common_expr_pushdown = false;
|
||||
|
||||
66: optional i32 parallel_instance = 1
|
||||
}
|
||||
|
||||
|
||||
@ -598,6 +601,7 @@ struct TPipelineFragmentParams {
|
||||
22: optional TGlobalDict global_dict // scan node could use the global dict to encode the string value to an integer
|
||||
23: optional Planner.TPlanFragment fragment
|
||||
24: list<TPipelineInstanceParams> local_params
|
||||
25: optional bool shared_scan_opt = false;
|
||||
}
|
||||
|
||||
struct TPipelineFragmentParamsList {
|
||||
|
||||
@ -159,16 +159,16 @@
|
||||
4 3 18
|
||||
|
||||
-- !select3 --
|
||||
\N \N 24
|
||||
\N \N 6
|
||||
\N \N 24
|
||||
\N 1 1
|
||||
\N 2 3
|
||||
\N 3 4
|
||||
\N 4 2
|
||||
\N 6 5
|
||||
\N 9 3
|
||||
1 \N 24
|
||||
1 \N 6
|
||||
1 \N 24
|
||||
1 1 1
|
||||
1 2 3
|
||||
1 3 4
|
||||
@ -216,12 +216,12 @@
|
||||
|
||||
-- !select7 --
|
||||
1
|
||||
2
|
||||
3
|
||||
4
|
||||
1
|
||||
2
|
||||
2
|
||||
3
|
||||
3
|
||||
4
|
||||
4
|
||||
|
||||
-- !select1 --
|
||||
@ -258,3 +258,4 @@ all 1
|
||||
2 1
|
||||
2 1
|
||||
2 2
|
||||
|
||||
|
||||
@ -167,23 +167,23 @@
|
||||
24 4
|
||||
|
||||
-- !in_subquery_with_order --
|
||||
1 3
|
||||
1 2
|
||||
1 3
|
||||
2 5
|
||||
3 3
|
||||
20 2
|
||||
22 3
|
||||
24 4
|
||||
3 3
|
||||
|
||||
-- !exists_subquery_with_order --
|
||||
1 3
|
||||
1 2
|
||||
1 3
|
||||
2 4
|
||||
3 4
|
||||
3 3
|
||||
20 2
|
||||
22 3
|
||||
24 4
|
||||
3 3
|
||||
3 4
|
||||
|
||||
-- !scalar_subquery_with_limit --
|
||||
20 2
|
||||
@ -200,13 +200,13 @@
|
||||
20
|
||||
|
||||
-- !case_when_subquery --
|
||||
20.0
|
||||
20.0
|
||||
20.0
|
||||
20.0
|
||||
20.0
|
||||
4.0
|
||||
4.0
|
||||
20.0
|
||||
20.0
|
||||
20.0
|
||||
20.0
|
||||
20.0
|
||||
|
||||
-- !in --
|
||||
1 2
|
||||
@ -244,106 +244,106 @@
|
||||
3 4
|
||||
|
||||
-- !hash_join_with_other_conjuncts1 --
|
||||
1 3
|
||||
1 2
|
||||
2 5
|
||||
1 3
|
||||
2 4
|
||||
3 4
|
||||
2 5
|
||||
3 3
|
||||
3 4
|
||||
|
||||
-- !hash_join_with_other_conjuncts2 --
|
||||
1 3
|
||||
1 2
|
||||
2 5
|
||||
1 3
|
||||
2 4
|
||||
3 4
|
||||
2 5
|
||||
3 3
|
||||
3 4
|
||||
|
||||
-- !hash_join_with_other_conjuncts3 --
|
||||
1 3
|
||||
1 2
|
||||
2 5
|
||||
1 3
|
||||
2 4
|
||||
3 4
|
||||
2 5
|
||||
3 3
|
||||
3 4
|
||||
|
||||
-- !hash_join_with_other_conjuncts4 --
|
||||
1 3
|
||||
1 2
|
||||
2 5
|
||||
1 3
|
||||
2 4
|
||||
3 4
|
||||
2 5
|
||||
3 3
|
||||
3 4
|
||||
|
||||
-- !same_subquery_in_conjuncts --
|
||||
1 3
|
||||
1 2
|
||||
2 5
|
||||
1 3
|
||||
2 4
|
||||
3 4
|
||||
2 5
|
||||
3 3
|
||||
3 4
|
||||
|
||||
-- !two_subquery_in_one_conjuncts --
|
||||
1 3
|
||||
1 2
|
||||
2 5
|
||||
1 3
|
||||
2 4
|
||||
3 4
|
||||
2 5
|
||||
3 3
|
||||
3 4
|
||||
|
||||
-- !multi_subquery_in_and_scalry --
|
||||
1 3
|
||||
1 2
|
||||
2 5
|
||||
1 3
|
||||
2 4
|
||||
3 4
|
||||
2 5
|
||||
3 3
|
||||
3 4
|
||||
|
||||
-- !multi_subquery_in_and_exist --
|
||||
1 3
|
||||
1 2
|
||||
2 5
|
||||
1 3
|
||||
2 4
|
||||
3 4
|
||||
2 5
|
||||
3 3
|
||||
3 4
|
||||
|
||||
-- !multi_subquery_in_and_exist_sum --
|
||||
1 3
|
||||
1 2
|
||||
2 5
|
||||
1 3
|
||||
2 4
|
||||
3 4
|
||||
3 3
|
||||
2 5
|
||||
20 2
|
||||
22 3
|
||||
24 4
|
||||
3 3
|
||||
3 4
|
||||
|
||||
-- !multi_subquery_in_and_in --
|
||||
1 3
|
||||
1 2
|
||||
2 5
|
||||
1 3
|
||||
2 4
|
||||
3 4
|
||||
2 5
|
||||
3 3
|
||||
3 4
|
||||
|
||||
-- !multi_subquery_scalar_and_exist --
|
||||
1 3
|
||||
1 2
|
||||
2 5
|
||||
1 3
|
||||
2 4
|
||||
3 4
|
||||
3 3
|
||||
2 5
|
||||
20 2
|
||||
22 3
|
||||
24 4
|
||||
3 3
|
||||
3 4
|
||||
|
||||
-- !multi_subquery_scalar_and_scalar --
|
||||
1 3
|
||||
1 2
|
||||
2 5
|
||||
1 3
|
||||
2 4
|
||||
3 4
|
||||
2 5
|
||||
3 3
|
||||
3 4
|
||||
|
||||
-- !multi_subquery_in_first_or_in_and_in --
|
||||
3 3
|
||||
|
||||
@ -1,21 +1,21 @@
|
||||
-- This file is automatically generated. You should know what you did if you want to edit this
|
||||
-- !exists_subquery_with_limit --
|
||||
1 3
|
||||
1 2
|
||||
1 3
|
||||
2 4
|
||||
3 4
|
||||
3 3
|
||||
20 2
|
||||
22 3
|
||||
24 4
|
||||
3 3
|
||||
3 4
|
||||
|
||||
-- !exists_subquery_with_order_and_limit --
|
||||
1 3
|
||||
1 2
|
||||
1 3
|
||||
2 4
|
||||
3 4
|
||||
3 3
|
||||
20 2
|
||||
22 3
|
||||
24 4
|
||||
3 3
|
||||
3 4
|
||||
|
||||
|
||||
@ -161,7 +161,7 @@ suite("test_colocate_join") {
|
||||
(20220101, 101, 202, 200, 100);"""
|
||||
|
||||
explain {
|
||||
sql("select " +
|
||||
sql("select /*+SET_VAR(parallel_fragment_exec_instance_num=1)*/ " +
|
||||
" sum_col1,sum_col2 " +
|
||||
"from " +
|
||||
"(select datekey,sum(sum_col1) as sum_col1 from test_query_colocate where datekey=20220101 group by datekey) t1 " +
|
||||
|
||||
@ -59,4 +59,4 @@ PROPERTIES (
|
||||
|
||||
sql "drop table if exists ${table1}"
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
@ -49,4 +49,4 @@ suite("nereids_gen_fn") {
|
||||
qt_sql_explode_split_outer_Varchar_Varchar_notnull '''
|
||||
select id, e from fn_test lateral view explode_split_outer('a, b, c, d', ',') lv as e order by id, e'''
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
@ -121,7 +121,7 @@ suite("test_nereids_grouping_sets") {
|
||||
rollup(k1_, k2) order by k1_, k2
|
||||
"""
|
||||
|
||||
qt_select3 "select 1 as k, k3, sum(k1) from groupingSetsTable group by cube(k, k3) order by k, k3"
|
||||
qt_select3 "select 1 as k, k3, sum(k1) as sum_k1 from groupingSetsTable group by cube(k, k3) order by k, k3, sum_k1"
|
||||
|
||||
qt_select4 """
|
||||
select k2, concat(k5, k6) as k_concat, sum(k1) from groupingSetsTable group by
|
||||
@ -230,7 +230,7 @@ suite("test_nereids_grouping_sets") {
|
||||
from
|
||||
grouping_sum_table
|
||||
) T
|
||||
) T2;
|
||||
) T2 order by a;
|
||||
"""
|
||||
|
||||
order_qt_select1 """
|
||||
|
||||
@ -227,7 +227,7 @@ suite("test_nereids_set_operation") {
|
||||
}
|
||||
qt_union39 """(select k1 from setOperationTable order by k1) union all (select k1 from setOperationTableNotNullable order by k1) order by k1;"""
|
||||
|
||||
qt_union40 """
|
||||
order_qt_union40 """
|
||||
SELECT k1 FROM setOperationTable WHERE k2 = 2
|
||||
INTERSECT
|
||||
SELECT k1 FROM setOperationTable WHERE k1 = 1
|
||||
@ -235,7 +235,7 @@ suite("test_nereids_set_operation") {
|
||||
SELECT k1 FROM setOperationTable WHERE k3 = 2
|
||||
"""
|
||||
|
||||
qt_union41 """
|
||||
order_qt_union41 """
|
||||
SELECT k1 FROM setOperationTable WHERE k2 = 1
|
||||
EXCEPT
|
||||
SELECT k1 FROM setOperationTable WHERE k3 = 2
|
||||
@ -245,7 +245,7 @@ suite("test_nereids_set_operation") {
|
||||
SELECT k1 FROM setOperationTable WHERE k2 > 0)
|
||||
"""
|
||||
|
||||
qt_union42 """
|
||||
order_qt_union42 """
|
||||
SELECT k1 FROM setOperationTable WHERE k2 = 1
|
||||
EXCEPT
|
||||
SELECT k1 FROM setOperationTable WHERE k3 = 2
|
||||
|
||||
@ -236,35 +236,35 @@ suite ("sub_query_correlated") {
|
||||
"""*/
|
||||
|
||||
//----------subquery with order----------
|
||||
qt_scalar_subquery_with_order """
|
||||
order_qt_scalar_subquery_with_order """
|
||||
select * from sub_query_correlated_subquery1 where sub_query_correlated_subquery1.k1 > (select sum(sub_query_correlated_subquery3.k3) a from sub_query_correlated_subquery3 where sub_query_correlated_subquery3.v2 = sub_query_correlated_subquery1.k2 order by a);
|
||||
"""
|
||||
|
||||
qt_in_subquery_with_order """
|
||||
order_qt_in_subquery_with_order """
|
||||
select * from sub_query_correlated_subquery1 where sub_query_correlated_subquery1.k1 not in (select sub_query_correlated_subquery3.k3 from sub_query_correlated_subquery3 where sub_query_correlated_subquery3.v2 = sub_query_correlated_subquery1.k2 order by k2);
|
||||
"""
|
||||
|
||||
qt_exists_subquery_with_order """
|
||||
order_qt_exists_subquery_with_order """
|
||||
select * from sub_query_correlated_subquery1 where exists (select sub_query_correlated_subquery3.k3 from sub_query_correlated_subquery3 where sub_query_correlated_subquery3.v2 = sub_query_correlated_subquery1.k2 order by k2);
|
||||
"""
|
||||
|
||||
//----------subquery with limit----------
|
||||
qt_scalar_subquery_with_limit """
|
||||
order_qt_scalar_subquery_with_limit """
|
||||
select * from sub_query_correlated_subquery1 where sub_query_correlated_subquery1.k1 > (select sum(sub_query_correlated_subquery3.k3) a from sub_query_correlated_subquery3 where sub_query_correlated_subquery3.v2 = sub_query_correlated_subquery1.k2 limit 1);
|
||||
"""
|
||||
|
||||
//----------subquery with order and limit----------
|
||||
qt_scalar_subquery_with_order_and_limit """
|
||||
order_qt_scalar_subquery_with_order_and_limit """
|
||||
select * from sub_query_correlated_subquery1 where sub_query_correlated_subquery1.k1 > (select sum(sub_query_correlated_subquery3.k3) a from sub_query_correlated_subquery3 where sub_query_correlated_subquery3.v2 = sub_query_correlated_subquery1.k2 order by a limit 1);
|
||||
"""
|
||||
|
||||
//---------subquery with Disjunctions-------------
|
||||
qt_scalar_subquery_with_disjunctions """
|
||||
order_qt_scalar_subquery_with_disjunctions """
|
||||
SELECT DISTINCT k1 FROM sub_query_correlated_subquery1 i1 WHERE ((SELECT count(*) FROM sub_query_correlated_subquery1 WHERE ((k1 = i1.k1) AND (k2 = 2)) or ((k1 = i1.k1) AND (k2 = 1)) ) > 0);
|
||||
"""
|
||||
|
||||
//--------subquery case when-----------
|
||||
qt_case_when_subquery """
|
||||
order_qt_case_when_subquery """
|
||||
SELECT CASE
|
||||
WHEN (
|
||||
SELECT COUNT(*) / 2
|
||||
@ -298,86 +298,86 @@ suite ("sub_query_correlated") {
|
||||
SELECT * FROM sub_query_correlated_subquery1 WHERE EXISTS (SELECT k1 FROM sub_query_correlated_subquery3 WHERE k1 > 10) OR k1 < 10;
|
||||
"""
|
||||
|
||||
qt_hash_join_with_other_conjuncts1 """
|
||||
order_qt_hash_join_with_other_conjuncts1 """
|
||||
SELECT * FROM sub_query_correlated_subquery1 WHERE k1 IN (SELECT k1 FROM sub_query_correlated_subquery3 WHERE sub_query_correlated_subquery1.k1 > sub_query_correlated_subquery3.k3) OR k1 < 10 ORDER BY k1;
|
||||
"""
|
||||
|
||||
qt_hash_join_with_other_conjuncts2 """
|
||||
order_qt_hash_join_with_other_conjuncts2 """
|
||||
SELECT * FROM sub_query_correlated_subquery1 WHERE k1 IN (SELECT k1 FROM sub_query_correlated_subquery3 WHERE sub_query_correlated_subquery1.k1 < sub_query_correlated_subquery3.k3) OR k1 < 10 ORDER BY k1;
|
||||
"""
|
||||
|
||||
qt_hash_join_with_other_conjuncts3 """
|
||||
order_qt_hash_join_with_other_conjuncts3 """
|
||||
SELECT * FROM sub_query_correlated_subquery1 WHERE k1 IN (SELECT k1 FROM sub_query_correlated_subquery3 WHERE sub_query_correlated_subquery1.k1 > sub_query_correlated_subquery3.k3) OR k1 < 11 ORDER BY k1;
|
||||
"""
|
||||
|
||||
qt_hash_join_with_other_conjuncts4 """
|
||||
order_qt_hash_join_with_other_conjuncts4 """
|
||||
SELECT * FROM sub_query_correlated_subquery1 WHERE k1 IN (SELECT k1 FROM sub_query_correlated_subquery3 WHERE sub_query_correlated_subquery1.k1 < sub_query_correlated_subquery3.k3) OR k1 < 11 ORDER BY k1;
|
||||
"""
|
||||
|
||||
qt_same_subquery_in_conjuncts """
|
||||
order_qt_same_subquery_in_conjuncts """
|
||||
SELECT * FROM sub_query_correlated_subquery1 WHERE k1 IN (SELECT k1 FROM sub_query_correlated_subquery3) OR k1 IN (SELECT k1 FROM sub_query_correlated_subquery3) OR k1 < 10 ORDER BY k1;
|
||||
"""
|
||||
|
||||
qt_two_subquery_in_one_conjuncts """
|
||||
order_qt_two_subquery_in_one_conjuncts """
|
||||
SELECT * FROM sub_query_correlated_subquery1 WHERE k1 IN (SELECT k1 FROM sub_query_correlated_subquery3) OR k1 IN (SELECT k3 FROM sub_query_correlated_subquery3) OR k1 < 10 ORDER BY k1;
|
||||
"""
|
||||
|
||||
qt_multi_subquery_in_and_scalry """
|
||||
order_qt_multi_subquery_in_and_scalry """
|
||||
SELECT * FROM sub_query_correlated_subquery1 WHERE k1 IN (SELECT k1 FROM sub_query_correlated_subquery3 where sub_query_correlated_subquery1.k2 = sub_query_correlated_subquery3.k1)
|
||||
OR k1 < (SELECT sum(k1) FROM sub_query_correlated_subquery3 where sub_query_correlated_subquery1.k1 = sub_query_correlated_subquery3.v1)
|
||||
OR k1 < 10 ORDER BY k1;
|
||||
"""
|
||||
|
||||
qt_multi_subquery_in_and_exist """
|
||||
order_qt_multi_subquery_in_and_exist """
|
||||
SELECT * FROM sub_query_correlated_subquery1 WHERE k1 IN (SELECT k1 FROM sub_query_correlated_subquery3 where sub_query_correlated_subquery1.k2 = sub_query_correlated_subquery3.k1)
|
||||
OR exists (SELECT k1 FROM sub_query_correlated_subquery3 where sub_query_correlated_subquery1.k1 = sub_query_correlated_subquery3.v1)
|
||||
OR k1 < 10 ORDER BY k1;
|
||||
"""
|
||||
|
||||
qt_multi_subquery_in_and_exist_sum """
|
||||
order_qt_multi_subquery_in_and_exist_sum """
|
||||
SELECT * FROM sub_query_correlated_subquery1 WHERE k1 IN (SELECT k1 FROM sub_query_correlated_subquery3 where sub_query_correlated_subquery1.k2 = sub_query_correlated_subquery3.k1)
|
||||
OR exists (SELECT sum(k1) FROM sub_query_correlated_subquery3 where sub_query_correlated_subquery1.k1 = sub_query_correlated_subquery3.v1)
|
||||
OR k1 < 10 ORDER BY k1;
|
||||
"""
|
||||
|
||||
qt_multi_subquery_in_and_in """
|
||||
order_qt_multi_subquery_in_and_in """
|
||||
SELECT * FROM sub_query_correlated_subquery1 WHERE k1 IN (SELECT k1 FROM sub_query_correlated_subquery3 where sub_query_correlated_subquery1.k2 = sub_query_correlated_subquery3.k1)
|
||||
OR k2 in (SELECT k2 FROM sub_query_correlated_subquery3 where sub_query_correlated_subquery1.k1 = sub_query_correlated_subquery3.v1)
|
||||
OR k1 < 10 ORDER BY k1;
|
||||
"""
|
||||
|
||||
qt_multi_subquery_scalar_and_exist """
|
||||
order_qt_multi_subquery_scalar_and_exist """
|
||||
SELECT * FROM sub_query_correlated_subquery1 WHERE k1 < (SELECT sum(k1) FROM sub_query_correlated_subquery3 where sub_query_correlated_subquery1.k2 = sub_query_correlated_subquery3.k1)
|
||||
OR exists (SELECT sum(k1) FROM sub_query_correlated_subquery3 where sub_query_correlated_subquery1.k1 = sub_query_correlated_subquery3.v1)
|
||||
OR k1 < 10 ORDER BY k1;
|
||||
"""
|
||||
|
||||
qt_multi_subquery_scalar_and_scalar """
|
||||
order_qt_multi_subquery_scalar_and_scalar """
|
||||
SELECT * FROM sub_query_correlated_subquery1 WHERE k1 < (SELECT sum(k1) FROM sub_query_correlated_subquery3 where sub_query_correlated_subquery1.k2 = sub_query_correlated_subquery3.k1)
|
||||
OR k2 < (SELECT sum(k1) FROM sub_query_correlated_subquery3 where sub_query_correlated_subquery1.k1 = sub_query_correlated_subquery3.v1)
|
||||
OR k1 < 10 ORDER BY k1;
|
||||
"""
|
||||
|
||||
qt_multi_subquery_in_first_or_in_and_in """
|
||||
order_qt_multi_subquery_in_first_or_in_and_in """
|
||||
SELECT * FROM sub_query_correlated_subquery1 WHERE (k1 in (SELECT k2 FROM sub_query_correlated_subquery3 where sub_query_correlated_subquery1.k2 = sub_query_correlated_subquery3.k1)
|
||||
or k2 in (SELECT k1 FROM sub_query_correlated_subquery3 where sub_query_correlated_subquery1.k2 = sub_query_correlated_subquery3.k1))
|
||||
and k1 in (SELECT k1 FROM sub_query_correlated_subquery3 where sub_query_correlated_subquery1.k2 = sub_query_correlated_subquery3.k1)
|
||||
"""
|
||||
|
||||
qt_multi_subquery_in_second_or_in_and_in """
|
||||
order_qt_multi_subquery_in_second_or_in_and_in """
|
||||
SELECT * FROM sub_query_correlated_subquery1 WHERE k1 in (SELECT k2 FROM sub_query_correlated_subquery3 where sub_query_correlated_subquery1.k2 = sub_query_correlated_subquery3.k1)
|
||||
or k2 in (SELECT k1 FROM sub_query_correlated_subquery3 where sub_query_correlated_subquery1.k2 = sub_query_correlated_subquery3.k1)
|
||||
and k1 in (SELECT k1 FROM sub_query_correlated_subquery3 where sub_query_correlated_subquery1.k2 = sub_query_correlated_subquery3.k1)
|
||||
"""
|
||||
|
||||
qt_multi_subquery_scalar_and_in_or_scalar_and_exists_agg """
|
||||
order_qt_multi_subquery_scalar_and_in_or_scalar_and_exists_agg """
|
||||
SELECT * FROM sub_query_correlated_subquery1 WHERE ((k1 != (SELECT sum(k1) FROM sub_query_correlated_subquery3) and k1 = 1 OR k1 < 10) and k1 = 10 and k1 = 15)
|
||||
and (k1 IN (SELECT k1 FROM sub_query_correlated_subquery3 WHERE sub_query_correlated_subquery1.k1 = sub_query_correlated_subquery3.k1)
|
||||
OR k1 < (SELECT sum(k1) FROM sub_query_correlated_subquery3 WHERE sub_query_correlated_subquery1.k1 = sub_query_correlated_subquery3.k1))
|
||||
and exists (SELECT sum(k1) FROM sub_query_correlated_subquery3 WHERE sub_query_correlated_subquery1.k1 = sub_query_correlated_subquery3.k1);
|
||||
"""
|
||||
|
||||
qt_multi_subquery_scalar_and_in_or_scalar_and_exists """
|
||||
order_qt_multi_subquery_scalar_and_in_or_scalar_and_exists """
|
||||
SELECT * FROM sub_query_correlated_subquery1 WHERE ((k1 != (SELECT sum(k1) FROM sub_query_correlated_subquery3) and k1 = 1 OR k1 < 10) and k1 = 10 and k1 = 15)
|
||||
and (k1 IN (SELECT k1 FROM sub_query_correlated_subquery3 WHERE sub_query_correlated_subquery1.k1 = sub_query_correlated_subquery3.k1)
|
||||
OR k1 < (SELECT sum(k1) FROM sub_query_correlated_subquery3 WHERE sub_query_correlated_subquery1.k1 = sub_query_correlated_subquery3.k1))
|
||||
|
||||
@ -159,7 +159,7 @@ suite ("sub_query_diff_old_optimize") {
|
||||
"""*/
|
||||
|
||||
//----------subquery with limit----------
|
||||
qt_exists_subquery_with_limit """
|
||||
order_qt_exists_subquery_with_limit """
|
||||
select * from sub_query_diff_old_optimize_subquery1 where exists (select sub_query_diff_old_optimize_subquery3.k3 from sub_query_diff_old_optimize_subquery3 where sub_query_diff_old_optimize_subquery3.v2 = sub_query_diff_old_optimize_subquery1.k2 limit 1);
|
||||
"""
|
||||
|
||||
@ -172,7 +172,7 @@ suite ("sub_query_diff_old_optimize") {
|
||||
}
|
||||
|
||||
//----------subquery with order and limit-------
|
||||
qt_exists_subquery_with_order_and_limit """
|
||||
order_qt_exists_subquery_with_order_and_limit """
|
||||
select * from sub_query_diff_old_optimize_subquery1 where exists (select sub_query_diff_old_optimize_subquery3.k3 from sub_query_diff_old_optimize_subquery3 where sub_query_diff_old_optimize_subquery3.v2 = sub_query_diff_old_optimize_subquery1.k2 order by k1 limit 1);
|
||||
"""
|
||||
|
||||
|
||||
@ -32,10 +32,10 @@ suite("redundant_conjuncts") {
|
||||
"""
|
||||
|
||||
qt_redundant_conjuncts """
|
||||
EXPLAIN SELECT /*+SET_VAR(REWRITE_OR_TO_IN_PREDICATE_THRESHOLD=2) */ v1 FROM redundant_conjuncts WHERE k1 = 1 AND k1 = 1;
|
||||
EXPLAIN SELECT /*+SET_VAR(REWRITE_OR_TO_IN_PREDICATE_THRESHOLD=2, parallel_fragment_exec_instance_num = 1) */ v1 FROM redundant_conjuncts WHERE k1 = 1 AND k1 = 1;
|
||||
"""
|
||||
|
||||
qt_redundant_conjuncts_gnerated_by_extract_common_filter """
|
||||
EXPLAIN SELECT /*+SET_VAR(REWRITE_OR_TO_IN_PREDICATE_THRESHOLD=100) */ v1 FROM redundant_conjuncts WHERE k1 = 1 OR k1 = 2;
|
||||
EXPLAIN SELECT /*+SET_VAR(REWRITE_OR_TO_IN_PREDICATE_THRESHOLD=100, parallel_fragment_exec_instance_num = 1) */ v1 FROM redundant_conjuncts WHERE k1 = 1 OR k1 = 2;
|
||||
"""
|
||||
}
|
||||
|
||||
@ -73,15 +73,15 @@ suite("test_query_limit", "query,p0") {
|
||||
qt_limit16 "select * from (select * from ${tableName} order by k1, k2, k3, k4 limit 1, 2) a limit 2, 2"
|
||||
qt_limit17 "select * from (select * from ${tableName} order by k1, k2, k3, k4 limit 1, 2) a limit 2, 3"
|
||||
test {
|
||||
sql "select * from ${tableName} limit 1, 10"
|
||||
sql "select /*+SET_VAR(parallel_fragment_exec_instance_num=4)*/ * from ${tableName} limit 1, 10"
|
||||
rowNum 2
|
||||
}
|
||||
test {
|
||||
sql "select * from ${tableName} limit 2, 10"
|
||||
sql "select /*+SET_VAR(parallel_fragment_exec_instance_num=4)*/ * from ${tableName} limit 2, 10"
|
||||
rowNum 1
|
||||
}
|
||||
test {
|
||||
sql "select * from ${tableName} limit 3, 10"
|
||||
sql "select /*+SET_VAR(parallel_fragment_exec_instance_num=4)*/ * from ${tableName} limit 3, 10"
|
||||
rowNum 0
|
||||
}
|
||||
}
|
||||
|
||||
@ -123,7 +123,7 @@ suite("test_window_fn") {
|
||||
select first_value(salary) over(order by enroll_date range between unbounded preceding and UNBOUNDED following), last_value(salary) over(order by enroll_date range between unbounded preceding and UNBOUNDED following), salary, enroll_date from ${tbName1} order by salary, enroll_date;
|
||||
"""
|
||||
qt_sql """
|
||||
SELECT first_value(ten) OVER (PARTITION BY four ORDER BY ten), ten, four FROM ${tbName2} WHERE unique2 < 10;
|
||||
SELECT first_value(ten) OVER (PARTITION BY four ORDER BY ten), ten, four FROM ${tbName2} WHERE unique2 < 10 order by four, ten;
|
||||
"""
|
||||
qt_sql """
|
||||
SELECT first_value(unique1) over (order by four range between current row and unbounded following),
|
||||
@ -190,7 +190,7 @@ suite("test_window_fn") {
|
||||
"""
|
||||
qt_sql """
|
||||
SELECT depname, empno, salary, rank() OVER (PARTITION BY depname ORDER BY salary, empno)
|
||||
FROM ${tbName1} ORDER BY rank() OVER (PARTITION BY depname ORDER BY salary, empno);
|
||||
FROM ${tbName1} ORDER BY rank() OVER (PARTITION BY depname ORDER BY salary, empno), depname;
|
||||
"""
|
||||
qt_sql """
|
||||
SELECT sum(salary) as s, row_number() OVER (ORDER BY depname) as r, sum(sum(salary)) OVER (ORDER BY depname DESC) as ss
|
||||
@ -207,10 +207,10 @@ suite("test_window_fn") {
|
||||
SELECT row_number() OVER (ORDER BY unique2) FROM ${tbName2} WHERE unique2 < 10;
|
||||
"""
|
||||
qt_sql """
|
||||
SELECT rank() OVER (PARTITION BY four ORDER BY ten) AS rank_1, ten, four FROM ${tbName2} WHERE unique2 < 10;
|
||||
SELECT rank() OVER (PARTITION BY four ORDER BY ten) AS rank_1, ten, four FROM ${tbName2} WHERE unique2 < 10 order by four, ten;
|
||||
"""
|
||||
qt_sql """
|
||||
SELECT dense_rank() OVER (PARTITION BY four ORDER BY ten), ten, four FROM ${tbName2} WHERE unique2 < 10;
|
||||
SELECT dense_rank() OVER (PARTITION BY four ORDER BY ten), ten, four FROM ${tbName2} WHERE unique2 < 10 order by four, ten;
|
||||
"""
|
||||
qt_sql """
|
||||
select ten, sum(unique1) + sum(unique2) as res, rank() over (order by sum(unique1) + sum(unique2)) as rank from ${tbName2} group by ten order by ten;
|
||||
@ -256,7 +256,7 @@ suite("test_window_fn") {
|
||||
SELECT count(1) OVER (PARTITION BY four) as c, four FROM (SELECT * FROM ${tbName2} WHERE two = 1)s WHERE unique2 < 10 order by c, four;
|
||||
"""
|
||||
qt_sql """
|
||||
SELECT avg(four) OVER (PARTITION BY four ORDER BY thousand / 100) FROM ${tbName2} WHERE unique2 < 10;
|
||||
SELECT avg(four) OVER (PARTITION BY four ORDER BY thousand / 100) FROM ${tbName2} WHERE unique2 < 10 order by four;
|
||||
"""
|
||||
qt_sql """
|
||||
SELECT count(1) OVER (PARTITION BY four) FROM (SELECT * FROM ${tbName2} WHERE FALSE)s;
|
||||
@ -278,19 +278,19 @@ suite("test_window_fn") {
|
||||
|
||||
// lag
|
||||
qt_sql """
|
||||
SELECT lag(ten, 1, 0) OVER (PARTITION BY four ORDER BY ten), ten, four FROM ${tbName2} WHERE unique2 < 10;
|
||||
SELECT lag(ten, 1, 0) OVER (PARTITION BY four ORDER BY ten), ten, four FROM ${tbName2} WHERE unique2 < 10 order by four, ten;
|
||||
"""
|
||||
|
||||
|
||||
// lead
|
||||
qt_sql """
|
||||
SELECT lead(ten, 1, 0) OVER (PARTITION BY four ORDER BY ten), ten, four FROM ${tbName2} WHERE unique2 < 10;
|
||||
SELECT lead(ten, 1, 0) OVER (PARTITION BY four ORDER BY ten), ten, four FROM ${tbName2} WHERE unique2 < 10 order by four, ten;
|
||||
"""
|
||||
qt_sql """
|
||||
SELECT lead(ten * 2, 1, 0) OVER (PARTITION BY four ORDER BY ten), ten, four FROM ${tbName2} WHERE unique2 < 10;
|
||||
SELECT lead(ten * 2, 1, 0) OVER (PARTITION BY four ORDER BY ten), ten, four FROM ${tbName2} WHERE unique2 < 10 order by four, ten;
|
||||
"""
|
||||
qt_sql """
|
||||
SELECT lead(ten * 2, 1, -1) OVER (PARTITION BY four ORDER BY ten), ten, four FROM ${tbName2} WHERE unique2 < 10;
|
||||
SELECT lead(ten * 2, 1, -1) OVER (PARTITION BY four ORDER BY ten), ten, four FROM ${tbName2} WHERE unique2 < 10 order by four, ten;
|
||||
"""
|
||||
|
||||
|
||||
@ -353,7 +353,7 @@ suite("test_window_fn") {
|
||||
qt_sql_window_last_value """
|
||||
select u_id, u_city, u_salary,
|
||||
last_value(u_salary) over (partition by u_city order by u_id rows between unbounded preceding and 1 preceding) last_value_test
|
||||
from example_window_tb;
|
||||
from example_window_tb order by u_id;
|
||||
"""
|
||||
|
||||
sql "DROP TABLE IF EXISTS example_window_tb;"
|
||||
|
||||
Reference in New Issue
Block a user