[Pipeline](shared_scan_opt) Support shared scan opt in pipeline exec engine

This commit is contained in:
HappenLee
2023-03-13 10:33:57 +08:00
committed by GitHub
parent edb2d90852
commit 39b5682d59
35 changed files with 476 additions and 288 deletions

View File

@ -246,6 +246,7 @@ E(SEGCOMPACTION_INIT_READER, -3117);
E(SEGCOMPACTION_INIT_WRITER, -3118);
E(SEGCOMPACTION_FAILED, -3119);
E(PIP_WAIT_FOR_RF, -3120);
E(PIP_WAIT_FOR_SC, -3121);
E(INVERTED_INDEX_INVALID_PARAMETERS, -6000);
E(INVERTED_INDEX_NOT_SUPPORTED, -6001);
E(INVERTED_INDEX_CLUCENE_ERROR, -6002);
@ -383,6 +384,7 @@ public:
ERROR_CTOR(EndOfFile, END_OF_FILE)
ERROR_CTOR(InternalError, INTERNAL_ERROR)
ERROR_CTOR(WaitForRf, PIP_WAIT_FOR_RF)
ERROR_CTOR(WaitForScannerContext, PIP_WAIT_FOR_SC)
ERROR_CTOR(RuntimeError, RUNTIME_ERROR)
ERROR_CTOR(Cancelled, CANCELLED)
ERROR_CTOR(MemoryLimitExceeded, MEM_LIMIT_EXCEEDED)
@ -402,6 +404,8 @@ public:
bool ok() const { return _code == ErrorCode::OK; }
bool is_blocked_by_sc() const { return _code == ErrorCode::PIP_WAIT_FOR_SC; }
bool is_io_error() const {
return ErrorCode::IO_ERROR == _code || ErrorCode::READ_UNENOUGH == _code ||
ErrorCode::CHECKSUM_ERROR == _code || ErrorCode::FILE_DATA_ERROR == _code ||

View File

@ -43,56 +43,6 @@
#include "vec/runtime/shared_hash_table_controller.h"
namespace doris {
// PrimitiveType->TExprNodeType
// TODO: use constexpr if we use c++14
TExprNodeType::type get_expr_node_type(PrimitiveType type) {
switch (type) {
case TYPE_BOOLEAN:
return TExprNodeType::BOOL_LITERAL;
case TYPE_TINYINT:
case TYPE_SMALLINT:
case TYPE_INT:
case TYPE_BIGINT:
return TExprNodeType::INT_LITERAL;
case TYPE_LARGEINT:
return TExprNodeType::LARGE_INT_LITERAL;
break;
case TYPE_NULL:
return TExprNodeType::NULL_LITERAL;
case TYPE_FLOAT:
case TYPE_DOUBLE:
case TYPE_TIME:
case TYPE_TIMEV2:
return TExprNodeType::FLOAT_LITERAL;
break;
case TYPE_DECIMAL32:
case TYPE_DECIMAL64:
case TYPE_DECIMAL128I:
case TYPE_DECIMALV2:
return TExprNodeType::DECIMAL_LITERAL;
case TYPE_DATETIME:
case TYPE_DATEV2:
case TYPE_DATETIMEV2:
return TExprNodeType::DATE_LITERAL;
case TYPE_CHAR:
case TYPE_VARCHAR:
case TYPE_HLL:
case TYPE_OBJECT:
case TYPE_STRING:
return TExprNodeType::STRING_LITERAL;
default:
DCHECK(false) << "Invalid type.";
return TExprNodeType::NULL_LITERAL;
}
}
// PrimitiveType-> PColumnType
// TODO: use constexpr if we use c++14

View File

@ -25,13 +25,21 @@ namespace doris::pipeline {
OPERATOR_CODE_GENERATOR(ScanOperator, SourceOperator)
bool ScanOperator::can_read() {
if (_node->_eos || _node->_scanner_ctx->done() || _node->_scanner_ctx->no_schedule()) {
// _eos: need eos
// _scanner_ctx->done(): need finish
// _scanner_ctx->no_schedule(): should schedule _scanner_ctx
return true;
if (!_node->_opened) {
if (_node->_should_create_scanner || _node->ready_to_open()) {
return true;
} else {
return false;
}
} else {
return !_node->_scanner_ctx->empty_in_queue(); // there are some blocks to process
if (_node->_eos || _node->_scanner_ctx->done() || _node->_scanner_ctx->no_schedule()) {
// _eos: need eos
// _scanner_ctx->done(): need finish
// _scanner_ctx->no_schedule(): should schedule _scanner_ctx
return true;
} else {
return _node->ready_to_read(); // there are some blocks to process
}
}
}

View File

@ -344,6 +344,9 @@ Status PipelineFragmentContext::prepare(const doris::TPipelineFragmentParams& re
if (request.__isset.load_job_id) {
_runtime_state->set_load_job_id(request.load_job_id);
}
if (request.__isset.shared_scan_opt) {
_runtime_state->set_shared_scan_opt(request.shared_scan_opt);
}
if (request.query_options.__isset.is_report_success) {
fragment_context->set_is_report_success(request.query_options.is_report_success);

View File

@ -135,6 +135,9 @@ Status PipelineTask::execute(bool* eos) {
if (st.is<ErrorCode::PIP_WAIT_FOR_RF>()) {
set_state(PipelineTaskState::BLOCKED_FOR_RF);
return Status::OK();
} else if (st.is<ErrorCode::PIP_WAIT_FOR_SC>()) {
set_state(PipelineTaskState::BLOCKED_FOR_SOURCE);
return Status::OK();
}
RETURN_IF_ERROR(st);
}

View File

@ -33,6 +33,7 @@
#include "util/threadpool.h"
#include "vec/exec/scan/scanner_scheduler.h"
#include "vec/runtime/shared_hash_table_controller.h"
#include "vec/runtime/shared_scanner_controller.h"
namespace doris {
@ -46,6 +47,7 @@ public:
: fragment_num(total_fragment_num), timeout_second(-1), _exec_env(exec_env) {
_start_time = DateTimeValue::local_time();
_shared_hash_table_controller.reset(new vectorized::SharedHashTableController());
_shared_scanner_controller.reset(new vectorized::SharedScannerController());
}
~QueryFragmentsCtx() {
@ -122,6 +124,10 @@ public:
return _shared_hash_table_controller;
}
std::shared_ptr<vectorized::SharedScannerController> get_shared_scanner_controller() {
return _shared_scanner_controller;
}
vectorized::RuntimePredicate& get_runtime_predicate() { return _runtime_predicate; }
public:
@ -167,6 +173,7 @@ private:
std::atomic<bool> _is_cancelled {false};
std::shared_ptr<vectorized::SharedHashTableController> _shared_hash_table_controller;
std::shared_ptr<vectorized::SharedScannerController> _shared_scanner_controller;
vectorized::RuntimePredicate _runtime_predicate;
};

View File

@ -88,6 +88,7 @@ public:
bool abort_on_default_limit_exceeded() const {
return _query_options.abort_on_default_limit_exceeded;
}
int query_parallel_instance_num() const { return _query_options.parallel_instance; }
int max_errors() const { return _query_options.max_errors; }
int query_timeout() const { return _query_options.query_timeout; }
int insert_timeout() const { return _query_options.insert_timeout; }
@ -200,6 +201,10 @@ public:
int64_t load_job_id() const { return _load_job_id; }
void set_shared_scan_opt(bool shared_scan_opt) { _shared_scan_opt = shared_scan_opt; }
bool shared_scan_opt() const { return _shared_scan_opt; }
const std::string get_error_log_file_path() const { return _error_log_file_path; }
// append error msg and error line to file when loading data.
@ -453,6 +458,7 @@ private:
std::string _db_name;
std::string _load_dir;
int64_t _load_job_id;
bool _shared_scan_opt = false;
// mini load
int64_t _normal_row_number;

View File

@ -320,8 +320,7 @@ set(VEC_FILES
if (WITH_MYSQL)
set(VEC_FILES
${VEC_FILES}
exec/scan/mysql_scanner.cpp
)
exec/scan/mysql_scanner.cpp)
endif ()
add_library(Vec STATIC

View File

@ -411,10 +411,9 @@ Status NewOlapScanNode::_init_scanners(std::list<VScanner*>* scanners) {
TabletSharedPtr tablet =
StorageEngine::instance()->tablet_manager()->get_tablet(tablet_id, true, &err);
if (tablet == nullptr) {
std::stringstream ss;
ss << "failed to get tablet: " << tablet_id << ", reason: " << err;
LOG(WARNING) << ss.str();
return Status::InternalError(ss.str());
auto err_str = fmt::format("failed to get tablet: {}, reason: {}", tablet_id, err);
LOG(WARNING) << err_str;
return Status::InternalError(err_str);
}
std::vector<std::unique_ptr<doris::OlapScanRange>>* ranges = &cond_ranges;

View File

@ -33,20 +33,68 @@ public:
: vectorized::ScannerContext(state, parent, input_tuple_desc, output_tuple_desc,
scanners, limit, max_bytes_in_blocks_queue) {}
void _update_block_queue_empty() override { _blocks_queue_empty = _blocks_queue.empty(); }
Status get_block_from_queue(RuntimeState* state, vectorized::BlockUPtr* block, bool* eos,
int id, bool wait = false) override {
{
std::unique_lock<std::mutex> l(_transfer_lock);
if (state->is_cancelled()) {
_process_status = Status::Cancelled("cancelled");
}
Status get_block_from_queue(vectorized::BlockUPtr* block, bool* eos,
bool wait = false) override {
return vectorized::ScannerContext::get_block_from_queue(block, eos, false);
if (!_process_status.ok()) {
return _process_status;
}
}
{
std::unique_lock<std::mutex> l(*_queue_mutexs[id]);
if (!_blocks_queues[id].empty()) {
*block = std::move(_blocks_queues[id].front());
_blocks_queues[id].pop_front();
return Status::OK();
} else {
*eos = _is_finished || _should_stop;
}
}
return Status::OK();
}
// We should make those method lock free.
bool done() override { return _is_finished || _should_stop || _status_error; }
bool no_schedule() override { return _num_running_scanners == 0 && _num_scheduling_ctx == 0; }
bool empty_in_queue() override { return _blocks_queue_empty; }
void append_blocks_to_queue(std::vector<vectorized::BlockUPtr>& blocks) override {
const int queue_size = _queue_mutexs.size();
const int block_size = blocks.size();
for (int i = 0; i < queue_size && i < block_size; ++i) {
int queue = _next_queue_to_feed;
{
std::lock_guard<std::mutex> l(*_queue_mutexs[queue]);
for (int j = i; j < block_size; j += queue_size) {
_blocks_queues[queue].emplace_back(std::move(blocks[j]));
}
}
_next_queue_to_feed = queue + 1 < queue_size ? queue + 1 : 0;
}
}
bool empty_in_queue(int id) override {
std::unique_lock<std::mutex> l(*_queue_mutexs[id]);
return _blocks_queues[id].empty();
}
void set_max_queue_size(int max_queue_size) override {
for (int i = 0; i < max_queue_size; ++i) {
_blocks_queue_empty.emplace_back(true);
_queue_mutexs.emplace_back(new std::mutex);
_blocks_queues.emplace_back(std::list<vectorized::BlockUPtr>());
}
}
private:
std::atomic_bool _blocks_queue_empty = true;
int _next_queue_to_feed = 0;
std::vector<bool> _blocks_queue_empty;
std::vector<std::unique_ptr<std::mutex>> _queue_mutexs;
std::vector<std::list<vectorized::BlockUPtr>> _blocks_queues;
};
} // namespace pipeline
} // namespace doris

View File

@ -23,24 +23,53 @@
#include "runtime/runtime_state.h"
#include "util/threadpool.h"
#include "vec/core/block.h"
#include "vec/exec/scan/scanner_scheduler.h"
#include "vec/exec/scan/vscan_node.h"
#include "vec/exec/scan/vscanner.h"
namespace doris::vectorized {
ScannerContext::ScannerContext(doris::RuntimeState* state_, doris::vectorized::VScanNode* parent,
const doris::TupleDescriptor* input_tuple_desc,
const doris::TupleDescriptor* output_tuple_desc,
const std::list<VScanner*>& scanners_, int64_t limit_,
int64_t max_bytes_in_blocks_queue_)
: _state(state_),
_parent(parent),
_input_tuple_desc(input_tuple_desc),
_output_tuple_desc(output_tuple_desc),
_process_status(Status::OK()),
_batch_size(state_->batch_size()),
limit(limit_),
_max_bytes_in_queue(max_bytes_in_blocks_queue_),
_scanner_scheduler(state_->exec_env()->scanner_scheduler()),
_scanners(scanners_) {
ctx_id = UniqueId::gen_uid().to_string();
if (_scanners.empty()) {
_is_finished = true;
}
}
// After init function call, should not access _parent
Status ScannerContext::init() {
_real_tuple_desc = _input_tuple_desc != nullptr ? _input_tuple_desc : _output_tuple_desc;
// 1. Calculate max concurrency
// TODO: now the max thread num <= config::doris_scanner_thread_pool_thread_num / 4
// should find a more reasonable value.
_max_thread_num =
std::min(config::doris_scanner_thread_pool_thread_num / 4, (int32_t)_scanners.size());
_max_thread_num = _state->shared_scan_opt() ? config::doris_scanner_thread_pool_thread_num
: config::doris_scanner_thread_pool_thread_num / 4;
_max_thread_num = std::min(_max_thread_num, (int32_t)_scanners.size());
// For select * from table limit 10; should just use one thread.
if (_parent->should_run_serial()) {
_max_thread_num = 1;
}
_scanner_profile = _parent->_scanner_profile;
_scanner_sched_counter = _parent->_scanner_sched_counter;
_scanner_ctx_sched_counter = _parent->_scanner_ctx_sched_counter;
_free_blocks_memory_usage = _parent->_free_blocks_memory_usage;
_newly_create_free_blocks_num = _parent->_newly_create_free_blocks_num;
_queued_blocks_memory_usage = _parent->_queued_blocks_memory_usage;
_scanner_wait_batch_timer = _parent->_scanner_wait_batch_timer;
// 2. Calculate how many blocks need to be preallocated.
// The calculation logic is as follows:
// 1. Assuming that at most M rows can be scanned in one scan(config::doris_scanner_row_num),
@ -50,8 +79,8 @@ Status ScannerContext::init() {
auto doris_scanner_row_num =
limit == -1 ? config::doris_scanner_row_num
: std::min(static_cast<int64_t>(config::doris_scanner_row_num), limit);
int real_block_size = limit == -1 ? _state->batch_size()
: std::min(static_cast<int64_t>(_state->batch_size()), limit);
int real_block_size =
limit == -1 ? _batch_size : std::min(static_cast<int64_t>(_batch_size), limit);
_block_per_scanner = (doris_scanner_row_num + (real_block_size - 1)) / real_block_size;
auto pre_alloc_block_count = _max_thread_num * _block_per_scanner;
@ -64,7 +93,7 @@ Status ScannerContext::init() {
free_blocks_memory_usage += block->allocated_bytes();
_free_blocks.emplace_back(std::move(block));
}
_parent->_free_blocks_memory_usage->add(free_blocks_memory_usage);
_free_blocks_memory_usage->add(free_blocks_memory_usage);
#ifndef BE_TEST
// 3. get thread token
@ -91,20 +120,20 @@ vectorized::BlockUPtr ScannerContext::get_free_block(bool* has_free_block) {
if (!_free_blocks.empty()) {
auto block = std::move(_free_blocks.back());
_free_blocks.pop_back();
_parent->_free_blocks_memory_usage->add(-block->allocated_bytes());
_free_blocks_memory_usage->add(-block->allocated_bytes());
return block;
}
}
*has_free_block = false;
COUNTER_UPDATE(_parent->_newly_create_free_blocks_num, 1);
return std::make_unique<vectorized::Block>(_real_tuple_desc->slots(), _state->batch_size(),
COUNTER_UPDATE(_newly_create_free_blocks_num, 1);
return std::make_unique<vectorized::Block>(_real_tuple_desc->slots(), _batch_size,
true /*ignore invalid slots*/);
}
void ScannerContext::return_free_block(std::unique_ptr<vectorized::Block> block) {
block->clear_column_data();
_parent->_free_blocks_memory_usage->add(block->allocated_bytes());
_free_blocks_memory_usage->add(block->allocated_bytes());
std::lock_guard l(_free_blocks_lock);
_free_blocks.emplace_back(std::move(block));
}
@ -117,18 +146,18 @@ void ScannerContext::append_blocks_to_queue(std::vector<vectorized::BlockUPtr>&
_blocks_queue.push_back(std::move(b));
}
blocks.clear();
_update_block_queue_empty();
_blocks_queue_added_cv.notify_one();
_parent->_queued_blocks_memory_usage->add(_cur_bytes_in_queue - old_bytes_in_queue);
_queued_blocks_memory_usage->add(_cur_bytes_in_queue - old_bytes_in_queue);
}
bool ScannerContext::empty_in_queue() {
std::unique_lock l(_transfer_lock);
bool ScannerContext::empty_in_queue(int id) {
std::unique_lock<std::mutex> l(_transfer_lock);
return _blocks_queue.empty();
}
Status ScannerContext::get_block_from_queue(vectorized::BlockUPtr* block, bool* eos, bool wait) {
std::unique_lock l(_transfer_lock);
Status ScannerContext::get_block_from_queue(RuntimeState* state, vectorized::BlockUPtr* block,
bool* eos, int id, bool wait) {
std::unique_lock<std::mutex> l(_transfer_lock);
// Normally, the scanner scheduler will schedule ctx.
// But when the amount of data in the blocks queue exceeds the upper limit,
// the scheduler will stop scheduling.
@ -137,18 +166,18 @@ Status ScannerContext::get_block_from_queue(vectorized::BlockUPtr* block, bool*
// data can be continuously fetched.
if (_has_enough_space_in_blocks_queue() && _num_running_scanners == 0) {
_num_scheduling_ctx++;
_state->exec_env()->scanner_scheduler()->submit(this);
_scanner_scheduler->submit(this);
}
// Wait for block from queue
if (wait) {
SCOPED_TIMER(_parent->_scanner_wait_batch_timer);
SCOPED_TIMER(_scanner_wait_batch_timer);
while (!(!_blocks_queue.empty() || _is_finished || !_process_status.ok() ||
_state->is_cancelled())) {
state->is_cancelled())) {
_blocks_queue_added_cv.wait(l);
}
}
if (_state->is_cancelled()) {
if (state->is_cancelled()) {
_process_status = Status::Cancelled("cancelled");
}
@ -159,10 +188,9 @@ Status ScannerContext::get_block_from_queue(vectorized::BlockUPtr* block, bool*
if (!_blocks_queue.empty()) {
*block = std::move(_blocks_queue.front());
_blocks_queue.pop_front();
_update_block_queue_empty();
auto block_bytes = (*block)->allocated_bytes();
_cur_bytes_in_queue -= block_bytes;
_parent->_queued_blocks_memory_usage->add(-block_bytes);
_queued_blocks_memory_usage->add(-block_bytes);
return Status::OK();
} else {
*eos = _is_finished;
@ -181,9 +209,9 @@ bool ScannerContext::set_status_on_error(const Status& status) {
return false;
}
Status ScannerContext::_close_and_clear_scanners() {
std::unique_lock l(_scanners_lock);
if (_state->enable_profile()) {
Status ScannerContext::_close_and_clear_scanners(VScanNode* node, RuntimeState* state) {
std::unique_lock<std::mutex> l(_scanners_lock);
if (state->enable_profile()) {
std::stringstream scanner_statistics;
std::stringstream scanner_rows_read;
scanner_statistics << "[";
@ -207,13 +235,12 @@ Status ScannerContext::_close_and_clear_scanners() {
}
scanner_statistics << "]";
scanner_rows_read << "]";
_parent->_scanner_profile->add_info_string("PerScannerRunningTime",
scanner_statistics.str());
_parent->_scanner_profile->add_info_string("PerScannerRowsRead", scanner_rows_read.str());
node->_scanner_profile->add_info_string("PerScannerRunningTime", scanner_statistics.str());
node->_scanner_profile->add_info_string("PerScannerRowsRead", scanner_rows_read.str());
}
// Only unfinished scanners here
for (auto scanner : _scanners) {
scanner->close(_state);
scanner->close(state);
// Scanners are in ObjPool in ScanNode,
// so no need to delete them here.
}
@ -221,8 +248,8 @@ Status ScannerContext::_close_and_clear_scanners() {
return Status::OK();
}
void ScannerContext::clear_and_join() {
std::unique_lock l(_transfer_lock);
void ScannerContext::clear_and_join(VScanNode* node, RuntimeState* state) {
std::unique_lock<std::mutex> l(_transfer_lock);
do {
if (_num_running_scanners == 0 && _num_scheduling_ctx == 0) {
break;
@ -239,14 +266,10 @@ void ScannerContext::clear_and_join() {
}
// Must wait all running scanners stop running.
// So that we can make sure to close all scanners.
_close_and_clear_scanners();
_close_and_clear_scanners(node, state);
COUNTER_SET(_parent->_scanner_sched_counter, _num_scanner_scheduling);
COUNTER_SET(_parent->_scanner_ctx_sched_counter, _num_ctx_scheduling);
_blocks_queue.clear();
_free_blocks.clear();
return;
}
bool ScannerContext::no_schedule() {
@ -273,7 +296,7 @@ void ScannerContext::push_back_scanner_and_reschedule(VScanner* scanner) {
std::lock_guard l(_transfer_lock);
_num_scheduling_ctx++;
auto submit_st = _state->exec_env()->scanner_scheduler()->submit(this);
auto submit_st = _scanner_scheduler->submit(this);
if (!submit_st.ok()) {
_num_scheduling_ctx--;
}
@ -285,11 +308,6 @@ void ScannerContext::push_back_scanner_and_reschedule(VScanner* scanner) {
// same scanner.
if (scanner->need_to_close() && scanner->set_counted_down() &&
(--_num_unfinished_scanners) == 0) {
// ATTN: this 2 counters will be set at close() again, which is the final values.
// But we set them here because the counter set at close() can not send to FE's profile.
// So we set them here, and the counter value may be little less than final values.
COUNTER_SET(_parent->_scanner_sched_counter, _num_scanner_scheduling);
COUNTER_SET(_parent->_scanner_ctx_sched_counter, _num_ctx_scheduling);
_is_finished = true;
_blocks_queue_added_cv.notify_one();
}
@ -306,7 +324,7 @@ void ScannerContext::get_next_batch_of_scanners(std::list<VScanner*>* current_ru
if (_has_enough_space_in_blocks_queue()) {
// If there are enough space in blocks queue,
// the scanner number depends on the _free_blocks numbers
std::lock_guard l(_free_blocks_lock);
std::lock_guard f(_free_blocks_lock);
thread_slot_num = _free_blocks.size() / _block_per_scanner;
thread_slot_num += (_free_blocks.size() % _block_per_scanner != 0);
thread_slot_num = std::min(thread_slot_num, _max_thread_num - _num_running_scanners);
@ -340,7 +358,6 @@ void ScannerContext::get_next_batch_of_scanners(std::list<VScanner*>* current_ru
}
}
}
return;
}
} // namespace doris::vectorized

View File

@ -32,7 +32,6 @@ namespace doris {
class PriorityThreadPool;
class ThreadPool;
class ThreadPoolToken;
class ScannerScheduler;
namespace vectorized {
@ -51,35 +50,21 @@ class ScannerContext {
public:
ScannerContext(RuntimeState* state_, VScanNode* parent, const TupleDescriptor* input_tuple_desc,
const TupleDescriptor* output_tuple_desc, const std::list<VScanner*>& scanners_,
int64_t limit_, int64_t max_bytes_in_blocks_queue_)
: _state(state_),
_parent(parent),
_input_tuple_desc(input_tuple_desc),
_output_tuple_desc(output_tuple_desc),
_process_status(Status::OK()),
limit(limit_),
_max_bytes_in_queue(max_bytes_in_blocks_queue_),
_scanners(scanners_) {
ctx_id = UniqueId::gen_uid().to_string();
if (_scanners.empty()) {
_is_finished = true;
}
}
int64_t limit_, int64_t max_bytes_in_blocks_queue_);
virtual ~ScannerContext() = default;
Status init();
vectorized::BlockUPtr get_free_block(bool* has_free_block);
void return_free_block(std::unique_ptr<vectorized::Block> block);
// Append blocks from scanners to the blocks queue.
void append_blocks_to_queue(std::vector<vectorized::BlockUPtr>& blocks);
virtual void append_blocks_to_queue(std::vector<vectorized::BlockUPtr>& blocks);
// Get next block from blocks queue. Called by ScanNode
// Set eos to true if there is no more data to read.
// And if eos is true, the block returned must be nullptr.
virtual Status get_block_from_queue(vectorized::BlockUPtr* block, bool* eos, bool wait = true);
virtual Status get_block_from_queue(RuntimeState* state, vectorized::BlockUPtr* block,
bool* eos, int id, bool wait = true);
// When a scanner complete a scan, this method will be called
// to return the scanner to the list for next scheduling.
@ -121,7 +106,7 @@ public:
void get_next_batch_of_scanners(std::list<VScanner*>* current_run);
void clear_and_join();
void clear_and_join(VScanNode* node, RuntimeState* state);
virtual bool no_schedule();
@ -129,12 +114,14 @@ public:
RuntimeState* state() { return _state; }
void incr_num_ctx_scheduling(int64_t num) { _num_ctx_scheduling += num; }
void incr_num_scanner_scheduling(int64_t num) { _num_scanner_scheduling += num; }
void incr_num_ctx_scheduling(int64_t num) { _scanner_ctx_sched_counter->update(num); }
void incr_num_scanner_scheduling(int64_t num) { _scanner_sched_counter->update(num); }
VScanNode* parent() { return _parent; }
virtual bool empty_in_queue();
virtual bool empty_in_queue(int id);
virtual void set_max_queue_size(int max_queue_size) {};
// the unique id of this context
std::string ctx_id;
@ -143,15 +130,12 @@ public:
std::vector<bthread_t> _btids;
private:
Status _close_and_clear_scanners();
Status _close_and_clear_scanners(VScanNode* node, RuntimeState* state);
inline bool _has_enough_space_in_blocks_queue() const {
return _cur_bytes_in_queue < _max_bytes_in_queue / 2;
}
// do nothing here, we only do update on pip_scanner_context
virtual void _update_block_queue_empty() {}
protected:
RuntimeState* _state;
VScanNode* _parent;
@ -198,6 +182,7 @@ protected:
doris::Mutex _free_blocks_lock;
std::vector<vectorized::BlockUPtr> _free_blocks;
int _batch_size;
// The limit from SQL's limit clause
int64_t limit;
@ -221,6 +206,7 @@ protected:
// The max limit bytes of blocks in blocks queue
int64_t _max_bytes_in_queue;
doris::vectorized::ScannerScheduler* _scanner_scheduler;
// List "scanners" saves all "unfinished" scanners.
// The scanner scheduler will pop scanners from this list, run scanner,
// and then if the scanner is not finished, will be pushed back to this list.
@ -230,8 +216,13 @@ protected:
std::vector<int64_t> _finished_scanner_runtime;
std::vector<int64_t> _finished_scanner_rows_read;
int64_t _num_ctx_scheduling = 0;
int64_t _num_scanner_scheduling = 0;
std::shared_ptr<RuntimeProfile> _scanner_profile;
RuntimeProfile::Counter* _scanner_sched_counter = nullptr;
RuntimeProfile::Counter* _scanner_ctx_sched_counter = nullptr;
RuntimeProfile::HighWaterMarkCounter* _free_blocks_memory_usage = nullptr;
RuntimeProfile::HighWaterMarkCounter* _queued_blocks_memory_usage = nullptr;
RuntimeProfile::Counter* _newly_create_free_blocks_num = nullptr;
RuntimeProfile::Counter* _scanner_wait_batch_timer = nullptr;
};
} // namespace vectorized
} // namespace doris

View File

@ -66,6 +66,8 @@ static bool ignore_cast(SlotDescriptor* slot, VExpr* expr) {
Status VScanNode::init(const TPlanNode& tnode, RuntimeState* state) {
RETURN_IF_ERROR(ExecNode::init(tnode, state));
_state = state;
_is_pipeline_scan = state->enable_pipeline_exec();
_shared_scan_opt = state->shared_scan_opt();
const TQueryOptions& query_options = state->query_options();
if (query_options.__isset.max_scan_key_num) {
@ -92,6 +94,21 @@ Status VScanNode::prepare(RuntimeState* state) {
for (auto& rf_ctx : _runtime_filter_ctxs) {
rf_ctx.runtime_filter->init_profile(_runtime_profile.get());
}
if (_is_pipeline_scan) {
if (_shared_scan_opt) {
_shared_scanner_controller =
state->get_query_fragments_ctx()->get_shared_scanner_controller();
auto [should_create_scanner, queue_id] =
_shared_scanner_controller->should_build_scanner_and_queue_id(id());
_should_create_scanner = should_create_scanner;
_context_queue_id = queue_id;
} else {
_should_create_scanner = true;
_context_queue_id = 0;
}
}
return Status::OK();
}
@ -113,18 +130,37 @@ Status VScanNode::alloc_resource(RuntimeState* state) {
RETURN_IF_ERROR(ExecNode::alloc_resource(state));
RETURN_IF_ERROR(_acquire_runtime_filter());
RETURN_IF_ERROR(_process_conjuncts());
if (_eos) {
return Status::OK();
if (_is_pipeline_scan) {
if (_should_create_scanner) {
auto status = !_eos ? _prepare_scanners() : Status::OK();
if (_scanner_ctx) {
DCHECK(!_eos && _num_scanners->value() > 0);
_scanner_ctx->set_max_queue_size(
_shared_scan_opt ? std::max(state->query_parallel_instance_num(), 1) : 1);
RETURN_IF_ERROR(
_state->exec_env()->scanner_scheduler()->submit(_scanner_ctx.get()));
}
if (_shared_scan_opt) {
_shared_scanner_controller->set_scanner_context(id(),
_eos ? nullptr : _scanner_ctx);
}
RETURN_IF_ERROR(status);
} else if (_shared_scanner_controller->scanner_context_is_ready(id())) {
_scanner_ctx = _shared_scanner_controller->get_scanner_context(id());
if (!_scanner_ctx) {
_eos = true;
}
} else {
return Status::WaitForScannerContext("Need wait for scanner context create");
}
} else {
RETURN_IF_ERROR(!_eos ? _prepare_scanners() : Status::OK());
if (_scanner_ctx) {
RETURN_IF_ERROR(_state->exec_env()->scanner_scheduler()->submit(_scanner_ctx.get()));
}
}
std::list<VScanner*> scanners;
RETURN_IF_ERROR(_init_scanners(&scanners));
if (scanners.empty()) {
_eos = true;
} else {
COUNTER_SET(_num_scanners, static_cast<int64_t>(scanners.size()));
RETURN_IF_ERROR(_start_scanners(scanners));
}
RETURN_IF_CANCELLED(state);
_opened = true;
return Status::OK();
@ -163,7 +199,7 @@ Status VScanNode::get_next(RuntimeState* state, vectorized::Block* block, bool*
}
vectorized::BlockUPtr scan_block = nullptr;
RETURN_IF_ERROR(_scanner_ctx->get_block_from_queue(&scan_block, eos));
RETURN_IF_ERROR(_scanner_ctx->get_block_from_queue(state, &scan_block, eos, _context_queue_id));
if (*eos) {
DCHECK(scan_block == nullptr);
return Status::OK();
@ -184,12 +220,6 @@ Status VScanNode::get_next(RuntimeState* state, vectorized::Block* block, bool*
Status VScanNode::_init_profile() {
// 1. counters for scan node
auto* memory_usage = _runtime_profile->create_child("MemoryUsage", true, true);
_runtime_profile->add_child(memory_usage, false, nullptr);
_queued_blocks_memory_usage =
memory_usage->AddHighWaterMarkCounter("QueuedBlocks", TUnit::BYTES);
_free_blocks_memory_usage = memory_usage->AddHighWaterMarkCounter("FreeBlocks", TUnit::BYTES);
_rows_read_counter = ADD_COUNTER(_runtime_profile, "RowsRead", TUnit::UNIT);
_total_throughput_counter =
runtime_profile()->add_rate_counter("TotalReadThroughput", _rows_read_counter);
@ -201,30 +231,36 @@ Status VScanNode::_init_profile() {
_scanner_profile.reset(new RuntimeProfile("VScanner"));
runtime_profile()->add_child(_scanner_profile.get(), true, nullptr);
auto* memory_usage = _scanner_profile->create_child("MemoryUsage", true, true);
_runtime_profile->add_child(memory_usage, false, nullptr);
_queued_blocks_memory_usage =
memory_usage->AddHighWaterMarkCounter("QueuedBlocks", TUnit::BYTES);
_free_blocks_memory_usage = memory_usage->AddHighWaterMarkCounter("FreeBlocks", TUnit::BYTES);
_newly_create_free_blocks_num =
ADD_COUNTER(_scanner_profile, "NewlyCreateFreeBlocksNum", TUnit::UNIT);
// time of transfer thread to wait for block from scan thread
_scanner_wait_batch_timer = ADD_TIMER(_scanner_profile, "ScannerBatchWaitTime");
_scanner_sched_counter = ADD_COUNTER(_scanner_profile, "ScannerSchedCount", TUnit::UNIT);
_scanner_ctx_sched_counter = ADD_COUNTER(_scanner_profile, "ScannerCtxSchedCount", TUnit::UNIT);
_scan_timer = ADD_TIMER(_scanner_profile, "ScannerGetBlockTime");
_scan_cpu_timer = ADD_TIMER(_scanner_profile, "ScannerCpuTime");
_prefilter_timer = ADD_TIMER(_scanner_profile, "ScannerPrefilterTime");
_convert_block_timer = ADD_TIMER(_scanner_profile, "ScannerConvertBlockTime");
_filter_timer = ADD_TIMER(_scanner_profile, "ScannerFilterTime");
_scanner_sched_counter = ADD_COUNTER(_runtime_profile, "ScannerSchedCount", TUnit::UNIT);
_scanner_ctx_sched_counter = ADD_COUNTER(_runtime_profile, "ScannerCtxSchedCount", TUnit::UNIT);
// time of transfer thread to wait for block from scan thread
_scanner_wait_batch_timer = ADD_TIMER(_runtime_profile, "ScannerBatchWaitTime");
// time of scan thread to wait for worker thread of the thread pool
_scanner_wait_worker_timer = ADD_TIMER(_runtime_profile, "ScannerWorkerWaitTime");
_pre_alloc_free_blocks_num =
ADD_COUNTER(_runtime_profile, "PreAllocFreeBlocksNum", TUnit::UNIT);
_newly_create_free_blocks_num =
ADD_COUNTER(_runtime_profile, "NewlyCreateFreeBlocksNum", TUnit::UNIT);
_max_scanner_thread_num = ADD_COUNTER(_runtime_profile, "MaxScannerThreadNum", TUnit::UNIT);
return Status::OK();
}
Status VScanNode::_start_scanners(const std::list<VScanner*>& scanners) {
if (_state->enable_pipeline_exec()) {
if (_is_pipeline_scan) {
_scanner_ctx.reset(new pipeline::PipScannerContext(_state, this, _input_tuple_desc,
_output_tuple_desc, scanners, limit(),
_state->query_options().mem_limit / 20));
@ -234,7 +270,6 @@ Status VScanNode::_start_scanners(const std::list<VScanner*>& scanners) {
_state->query_options().mem_limit / 20));
}
RETURN_IF_ERROR(_scanner_ctx->init());
RETURN_IF_ERROR(_state->exec_env()->scanner_scheduler()->submit(_scanner_ctx.get()));
return Status::OK();
}
@ -374,10 +409,12 @@ Status VScanNode::close(RuntimeState* state) {
void VScanNode::release_resource(RuntimeState* state) {
START_AND_SCOPE_SPAN(state->get_tracer(), span, "VScanNode::release_resource");
if (_scanner_ctx.get()) {
// stop and wait the scanner scheduler to be done
// _scanner_ctx may not be created for some short circuit case.
_scanner_ctx->set_should_stop();
_scanner_ctx->clear_and_join();
if (!state->enable_pipeline_exec() || _should_create_scanner) {
// stop and wait the scanner scheduler to be done
// _scanner_ctx may not be created for some short circuit case.
_scanner_ctx->set_should_stop();
_scanner_ctx->clear_and_join(this, state);
}
}
for (auto& ctx : _runtime_filter_ctxs) {
@ -1318,4 +1355,16 @@ VScanNode::PushDownType VScanNode::_should_push_down_in_predicate(VInPredicate*
return PushDownType::ACCEPTABLE;
}
Status VScanNode::_prepare_scanners() {
std::list<VScanner*> scanners;
RETURN_IF_ERROR(_init_scanners(&scanners));
if (scanners.empty()) {
_eos = true;
} else {
COUNTER_SET(_num_scanners, static_cast<int64_t>(scanners.size()));
RETURN_IF_ERROR(_start_scanners(scanners));
}
return Status::OK();
}
} // namespace doris::vectorized

View File

@ -103,6 +103,8 @@ public:
Status try_close();
bool should_run_serial() const { return _should_run_serial; }
bool ready_to_open() { return _shared_scanner_controller->scanner_context_is_ready(id()); }
bool ready_to_read() { return !_scanner_ctx->empty_in_queue(_context_queue_id); }
enum class PushDownType {
// The predicate can not be pushed down to data source
@ -178,8 +180,12 @@ protected:
// Only predicate on key column can be pushed down.
virtual bool _is_key_column(const std::string& col_name) { return false; }
Status _prepare_scanners();
protected:
RuntimeState* _state;
bool _is_pipeline_scan = false;
bool _shared_scan_opt = false;
// For load scan node, there should be both input and output tuple descriptor.
// For query scan node, there is only output_tuple_desc.
TupleId _input_tuple_id = -1;
@ -267,7 +273,11 @@ protected:
int64_t _limit_per_scanner = -1;
protected:
std::unique_ptr<RuntimeProfile> _scanner_profile;
std::shared_ptr<vectorized::SharedScannerController> _shared_scanner_controller;
bool _should_create_scanner = false;
int _context_queue_id = -1;
std::shared_ptr<RuntimeProfile> _scanner_profile;
// rows read from the scanner (including those discarded by (pre)filters)
RuntimeProfile::Counter* _rows_read_counter;

View File

@ -417,7 +417,7 @@ struct UnixTimeStampDateImpl {
static Status execute_impl(FunctionContext* context, Block& block,
const ColumnNumbers& arguments, size_t result,
size_t input_rows_count) {
const ColumnPtr col_source = block.get_by_position(arguments[0]).column;
const ColumnPtr& col_source = block.get_by_position(arguments[0]).column;
auto col_result = ColumnVector<Int32>::create();
auto null_map = ColumnVector<UInt8>::create();
auto& col_result_data = col_result->get_data();

View File

@ -85,4 +85,4 @@ private:
};
} // namespace vectorized
} // namespace doris
} // namespace doris

View File

@ -0,0 +1,69 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
#pragma once
#include <condition_variable>
#include <map>
#include <memory>
#include <mutex>
#include <vector>
#include "vec/core/block.h"
#include "vec/exec/scan/scanner_context.h"
namespace doris::vectorized {
class SharedScannerController {
public:
std::pair<bool, int> should_build_scanner_and_queue_id(int my_node_id) {
std::lock_guard<std::mutex> lock(_mutex);
auto it = _scanner_parallel.find(my_node_id);
if (it == _scanner_parallel.cend()) {
_scanner_parallel.insert({my_node_id, 0});
return {true, 0};
} else {
auto queue_id = it->second;
_scanner_parallel[my_node_id] = queue_id + 1;
return {false, queue_id + 1};
}
}
void set_scanner_context(int my_node_id,
const std::shared_ptr<ScannerContext> scanner_context) {
std::lock_guard<std::mutex> lock(_mutex);
_scanner_context.insert({my_node_id, scanner_context});
}
bool scanner_context_is_ready(int my_node_id) {
std::lock_guard<std::mutex> lock(_mutex);
return _scanner_context.find(my_node_id) != _scanner_context.end();
}
std::shared_ptr<ScannerContext> get_scanner_context(int my_node_id) {
std::lock_guard<std::mutex> lock(_mutex);
return _scanner_context[my_node_id];
}
private:
std::mutex _mutex;
std::map<int /*node id*/, int /*parallel*/> _scanner_parallel;
std::map<int /*node id*/, std::shared_ptr<ScannerContext>> _scanner_context;
};
} // namespace doris::vectorized

View File

@ -44,6 +44,7 @@ import org.apache.doris.nereids.trees.plans.physical.PhysicalStorageLayerAggrega
import org.apache.doris.nereids.trees.plans.physical.PhysicalTVFRelation;
import org.apache.doris.nereids.trees.plans.visitor.PlanVisitor;
import org.apache.doris.nereids.util.JoinUtils;
import org.apache.doris.qe.ConnectContext;
import com.google.common.base.Preconditions;
import com.google.common.collect.Maps;
@ -218,7 +219,9 @@ public class ChildOutputPropertyDeriver extends PlanVisitor<PhysicalProperties,
@Override
public PhysicalProperties visitPhysicalOlapScan(PhysicalOlapScan olapScan, PlanContext context) {
// TODO: find a better way to handle both tablet num == 1 and colocate table together in future
if (!olapScan.getTable().isColocateTable() && olapScan.getScanTabletNum() == 1) {
if (!olapScan.getTable().isColocateTable() && olapScan.getScanTabletNum() == 1
&& (!ConnectContext.get().getSessionVariable().enablePipelineEngine()
|| ConnectContext.get().getSessionVariable().getParallelExecInstanceNum() == 1)) {
return PhysicalProperties.GATHER;
} else if (olapScan.getDistributionSpec() instanceof DistributionSpecHash) {
return PhysicalProperties.createHash((DistributionSpecHash) olapScan.getDistributionSpec());

View File

@ -1118,6 +1118,9 @@ public class OlapScanNode extends ScanNode {
@Override
public int getNumInstances() {
if (ConnectContext.get().getSessionVariable().enablePipelineEngine()) {
return ConnectContext.get().getSessionVariable().getParallelExecInstanceNum();
}
return result.size();
}

View File

@ -1622,6 +1622,7 @@ public class Coordinator {
bucketShuffleJoinController.computeInstanceParam(fragment.getFragmentId(),
parallelExecInstanceNum, params);
} else {
params.sharedScanOpt = true;
// case A
for (Entry<TNetworkAddress, Map<Integer, List<TScanRangeParams>>> entry : fragmentExecParamsMap.get(
fragment.getFragmentId()).scanRangeAssignment.entrySet()) {
@ -1630,13 +1631,22 @@ public class Coordinator {
for (Integer planNodeId : value.keySet()) {
List<TScanRangeParams> perNodeScanRanges = value.get(planNodeId);
int expectedInstanceNum = 1;
if (parallelExecInstanceNum > 1) {
//the scan instance num should not larger than the tablets num
expectedInstanceNum = Math.min(perNodeScanRanges.size(), parallelExecInstanceNum);
List<List<TScanRangeParams>> perInstanceScanRanges = Lists.newArrayList();
if (!enablePipelineEngine) {
int expectedInstanceNum = 1;
if (parallelExecInstanceNum > 1) {
//the scan instance num should not larger than the tablets num
expectedInstanceNum = Math.min(perNodeScanRanges.size(), parallelExecInstanceNum);
}
perInstanceScanRanges = ListUtil.splitBySize(perNodeScanRanges,
expectedInstanceNum);
} else {
int expectedInstanceNum = Math.min(parallelExecInstanceNum,
leftMostNode.getNumInstances());
for (int j = 0; j < Math.max(expectedInstanceNum, 1); j++) {
perInstanceScanRanges.add(perNodeScanRanges);
}
}
List<List<TScanRangeParams>> perInstanceScanRanges = ListUtil.splitBySize(perNodeScanRanges,
expectedInstanceNum);
LOG.debug("scan range number per instance is: {}", perInstanceScanRanges.size());
@ -3034,6 +3044,8 @@ public class Coordinator {
public List<FInstanceExecParam> instanceExecParams = Lists.newArrayList();
public FragmentScanRangeAssignment scanRangeAssignment = new FragmentScanRangeAssignment();
public boolean sharedScanOpt = false;
public FragmentExecParams(PlanFragment fragment) {
this.fragment = fragment;
}
@ -3125,6 +3137,7 @@ public class Coordinator {
fragment.isTransferQueryStatisticsWithEveryBatch());
params.setFragment(fragment.toThrift());
params.setLocalParams(Lists.newArrayList());
params.setSharedScanOpt(sharedScanOpt);
res.put(instanceExecParam.host, params);
}
TPipelineFragmentParams params = res.get(instanceExecParam.host);

View File

@ -1593,6 +1593,7 @@ public class SessionVariable implements Serializable, Writable {
tResult.setCodegenLevel(codegenLevel);
tResult.setBeExecVersion(Config.be_exec_version);
tResult.setEnablePipelineEngine(enablePipelineEngine);
tResult.setParallelInstance(parallelExecInstanceNum);
tResult.setReturnObjectDataAsBinary(returnObjectDataAsBinary);
tResult.setTrimTailingSpacesForExternalTableQuery(trimTailingSpacesForExternalTableQuery);
tResult.setEnableShareHashTableForBroadcastJoin(enableShareHashTableForBroadcastJoin);

View File

@ -201,9 +201,12 @@ struct TQueryOptions {
// For debug purpose, skip delete bitmap when reading data
63: optional bool skip_delete_bitmap = false
64: optional bool dry_run_query = false
65: optional bool enable_common_expr_pushdown = false;
66: optional i32 parallel_instance = 1
}
@ -598,6 +601,7 @@ struct TPipelineFragmentParams {
22: optional TGlobalDict global_dict // scan node could use the global dict to encode the string value to an integer
23: optional Planner.TPlanFragment fragment
24: list<TPipelineInstanceParams> local_params
25: optional bool shared_scan_opt = false;
}
struct TPipelineFragmentParamsList {

View File

@ -159,16 +159,16 @@
4 3 18
-- !select3 --
\N \N 24
\N \N 6
\N \N 24
\N 1 1
\N 2 3
\N 3 4
\N 4 2
\N 6 5
\N 9 3
1 \N 24
1 \N 6
1 \N 24
1 1 1
1 2 3
1 3 4
@ -216,12 +216,12 @@
-- !select7 --
1
2
3
4
1
2
2
3
3
4
4
-- !select1 --
@ -258,3 +258,4 @@ all 1
2 1
2 1
2 2

View File

@ -167,23 +167,23 @@
24 4
-- !in_subquery_with_order --
1 3
1 2
1 3
2 5
3 3
20 2
22 3
24 4
3 3
-- !exists_subquery_with_order --
1 3
1 2
1 3
2 4
3 4
3 3
20 2
22 3
24 4
3 3
3 4
-- !scalar_subquery_with_limit --
20 2
@ -200,13 +200,13 @@
20
-- !case_when_subquery --
20.0
20.0
20.0
20.0
20.0
4.0
4.0
20.0
20.0
20.0
20.0
20.0
-- !in --
1 2
@ -244,106 +244,106 @@
3 4
-- !hash_join_with_other_conjuncts1 --
1 3
1 2
2 5
1 3
2 4
3 4
2 5
3 3
3 4
-- !hash_join_with_other_conjuncts2 --
1 3
1 2
2 5
1 3
2 4
3 4
2 5
3 3
3 4
-- !hash_join_with_other_conjuncts3 --
1 3
1 2
2 5
1 3
2 4
3 4
2 5
3 3
3 4
-- !hash_join_with_other_conjuncts4 --
1 3
1 2
2 5
1 3
2 4
3 4
2 5
3 3
3 4
-- !same_subquery_in_conjuncts --
1 3
1 2
2 5
1 3
2 4
3 4
2 5
3 3
3 4
-- !two_subquery_in_one_conjuncts --
1 3
1 2
2 5
1 3
2 4
3 4
2 5
3 3
3 4
-- !multi_subquery_in_and_scalry --
1 3
1 2
2 5
1 3
2 4
3 4
2 5
3 3
3 4
-- !multi_subquery_in_and_exist --
1 3
1 2
2 5
1 3
2 4
3 4
2 5
3 3
3 4
-- !multi_subquery_in_and_exist_sum --
1 3
1 2
2 5
1 3
2 4
3 4
3 3
2 5
20 2
22 3
24 4
3 3
3 4
-- !multi_subquery_in_and_in --
1 3
1 2
2 5
1 3
2 4
3 4
2 5
3 3
3 4
-- !multi_subquery_scalar_and_exist --
1 3
1 2
2 5
1 3
2 4
3 4
3 3
2 5
20 2
22 3
24 4
3 3
3 4
-- !multi_subquery_scalar_and_scalar --
1 3
1 2
2 5
1 3
2 4
3 4
2 5
3 3
3 4
-- !multi_subquery_in_first_or_in_and_in --
3 3

View File

@ -1,21 +1,21 @@
-- This file is automatically generated. You should know what you did if you want to edit this
-- !exists_subquery_with_limit --
1 3
1 2
1 3
2 4
3 4
3 3
20 2
22 3
24 4
3 3
3 4
-- !exists_subquery_with_order_and_limit --
1 3
1 2
1 3
2 4
3 4
3 3
20 2
22 3
24 4
3 3
3 4

View File

@ -161,7 +161,7 @@ suite("test_colocate_join") {
(20220101, 101, 202, 200, 100);"""
explain {
sql("select " +
sql("select /*+SET_VAR(parallel_fragment_exec_instance_num=1)*/ " +
" sum_col1,sum_col2 " +
"from " +
"(select datekey,sum(sum_col1) as sum_col1 from test_query_colocate where datekey=20220101 group by datekey) t1 " +

View File

@ -59,4 +59,4 @@ PROPERTIES (
sql "drop table if exists ${table1}"
}
}

View File

@ -49,4 +49,4 @@ suite("nereids_gen_fn") {
qt_sql_explode_split_outer_Varchar_Varchar_notnull '''
select id, e from fn_test lateral view explode_split_outer('a, b, c, d', ',') lv as e order by id, e'''
}
}

View File

@ -121,7 +121,7 @@ suite("test_nereids_grouping_sets") {
rollup(k1_, k2) order by k1_, k2
"""
qt_select3 "select 1 as k, k3, sum(k1) from groupingSetsTable group by cube(k, k3) order by k, k3"
qt_select3 "select 1 as k, k3, sum(k1) as sum_k1 from groupingSetsTable group by cube(k, k3) order by k, k3, sum_k1"
qt_select4 """
select k2, concat(k5, k6) as k_concat, sum(k1) from groupingSetsTable group by
@ -230,7 +230,7 @@ suite("test_nereids_grouping_sets") {
from
grouping_sum_table
) T
) T2;
) T2 order by a;
"""
order_qt_select1 """

View File

@ -227,7 +227,7 @@ suite("test_nereids_set_operation") {
}
qt_union39 """(select k1 from setOperationTable order by k1) union all (select k1 from setOperationTableNotNullable order by k1) order by k1;"""
qt_union40 """
order_qt_union40 """
SELECT k1 FROM setOperationTable WHERE k2 = 2
INTERSECT
SELECT k1 FROM setOperationTable WHERE k1 = 1
@ -235,7 +235,7 @@ suite("test_nereids_set_operation") {
SELECT k1 FROM setOperationTable WHERE k3 = 2
"""
qt_union41 """
order_qt_union41 """
SELECT k1 FROM setOperationTable WHERE k2 = 1
EXCEPT
SELECT k1 FROM setOperationTable WHERE k3 = 2
@ -245,7 +245,7 @@ suite("test_nereids_set_operation") {
SELECT k1 FROM setOperationTable WHERE k2 > 0)
"""
qt_union42 """
order_qt_union42 """
SELECT k1 FROM setOperationTable WHERE k2 = 1
EXCEPT
SELECT k1 FROM setOperationTable WHERE k3 = 2

View File

@ -236,35 +236,35 @@ suite ("sub_query_correlated") {
"""*/
//----------subquery with order----------
qt_scalar_subquery_with_order """
order_qt_scalar_subquery_with_order """
select * from sub_query_correlated_subquery1 where sub_query_correlated_subquery1.k1 > (select sum(sub_query_correlated_subquery3.k3) a from sub_query_correlated_subquery3 where sub_query_correlated_subquery3.v2 = sub_query_correlated_subquery1.k2 order by a);
"""
qt_in_subquery_with_order """
order_qt_in_subquery_with_order """
select * from sub_query_correlated_subquery1 where sub_query_correlated_subquery1.k1 not in (select sub_query_correlated_subquery3.k3 from sub_query_correlated_subquery3 where sub_query_correlated_subquery3.v2 = sub_query_correlated_subquery1.k2 order by k2);
"""
qt_exists_subquery_with_order """
order_qt_exists_subquery_with_order """
select * from sub_query_correlated_subquery1 where exists (select sub_query_correlated_subquery3.k3 from sub_query_correlated_subquery3 where sub_query_correlated_subquery3.v2 = sub_query_correlated_subquery1.k2 order by k2);
"""
//----------subquery with limit----------
qt_scalar_subquery_with_limit """
order_qt_scalar_subquery_with_limit """
select * from sub_query_correlated_subquery1 where sub_query_correlated_subquery1.k1 > (select sum(sub_query_correlated_subquery3.k3) a from sub_query_correlated_subquery3 where sub_query_correlated_subquery3.v2 = sub_query_correlated_subquery1.k2 limit 1);
"""
//----------subquery with order and limit----------
qt_scalar_subquery_with_order_and_limit """
order_qt_scalar_subquery_with_order_and_limit """
select * from sub_query_correlated_subquery1 where sub_query_correlated_subquery1.k1 > (select sum(sub_query_correlated_subquery3.k3) a from sub_query_correlated_subquery3 where sub_query_correlated_subquery3.v2 = sub_query_correlated_subquery1.k2 order by a limit 1);
"""
//---------subquery with Disjunctions-------------
qt_scalar_subquery_with_disjunctions """
order_qt_scalar_subquery_with_disjunctions """
SELECT DISTINCT k1 FROM sub_query_correlated_subquery1 i1 WHERE ((SELECT count(*) FROM sub_query_correlated_subquery1 WHERE ((k1 = i1.k1) AND (k2 = 2)) or ((k1 = i1.k1) AND (k2 = 1)) ) > 0);
"""
//--------subquery case when-----------
qt_case_when_subquery """
order_qt_case_when_subquery """
SELECT CASE
WHEN (
SELECT COUNT(*) / 2
@ -298,86 +298,86 @@ suite ("sub_query_correlated") {
SELECT * FROM sub_query_correlated_subquery1 WHERE EXISTS (SELECT k1 FROM sub_query_correlated_subquery3 WHERE k1 > 10) OR k1 < 10;
"""
qt_hash_join_with_other_conjuncts1 """
order_qt_hash_join_with_other_conjuncts1 """
SELECT * FROM sub_query_correlated_subquery1 WHERE k1 IN (SELECT k1 FROM sub_query_correlated_subquery3 WHERE sub_query_correlated_subquery1.k1 > sub_query_correlated_subquery3.k3) OR k1 < 10 ORDER BY k1;
"""
qt_hash_join_with_other_conjuncts2 """
order_qt_hash_join_with_other_conjuncts2 """
SELECT * FROM sub_query_correlated_subquery1 WHERE k1 IN (SELECT k1 FROM sub_query_correlated_subquery3 WHERE sub_query_correlated_subquery1.k1 < sub_query_correlated_subquery3.k3) OR k1 < 10 ORDER BY k1;
"""
qt_hash_join_with_other_conjuncts3 """
order_qt_hash_join_with_other_conjuncts3 """
SELECT * FROM sub_query_correlated_subquery1 WHERE k1 IN (SELECT k1 FROM sub_query_correlated_subquery3 WHERE sub_query_correlated_subquery1.k1 > sub_query_correlated_subquery3.k3) OR k1 < 11 ORDER BY k1;
"""
qt_hash_join_with_other_conjuncts4 """
order_qt_hash_join_with_other_conjuncts4 """
SELECT * FROM sub_query_correlated_subquery1 WHERE k1 IN (SELECT k1 FROM sub_query_correlated_subquery3 WHERE sub_query_correlated_subquery1.k1 < sub_query_correlated_subquery3.k3) OR k1 < 11 ORDER BY k1;
"""
qt_same_subquery_in_conjuncts """
order_qt_same_subquery_in_conjuncts """
SELECT * FROM sub_query_correlated_subquery1 WHERE k1 IN (SELECT k1 FROM sub_query_correlated_subquery3) OR k1 IN (SELECT k1 FROM sub_query_correlated_subquery3) OR k1 < 10 ORDER BY k1;
"""
qt_two_subquery_in_one_conjuncts """
order_qt_two_subquery_in_one_conjuncts """
SELECT * FROM sub_query_correlated_subquery1 WHERE k1 IN (SELECT k1 FROM sub_query_correlated_subquery3) OR k1 IN (SELECT k3 FROM sub_query_correlated_subquery3) OR k1 < 10 ORDER BY k1;
"""
qt_multi_subquery_in_and_scalry """
order_qt_multi_subquery_in_and_scalry """
SELECT * FROM sub_query_correlated_subquery1 WHERE k1 IN (SELECT k1 FROM sub_query_correlated_subquery3 where sub_query_correlated_subquery1.k2 = sub_query_correlated_subquery3.k1)
OR k1 < (SELECT sum(k1) FROM sub_query_correlated_subquery3 where sub_query_correlated_subquery1.k1 = sub_query_correlated_subquery3.v1)
OR k1 < 10 ORDER BY k1;
"""
qt_multi_subquery_in_and_exist """
order_qt_multi_subquery_in_and_exist """
SELECT * FROM sub_query_correlated_subquery1 WHERE k1 IN (SELECT k1 FROM sub_query_correlated_subquery3 where sub_query_correlated_subquery1.k2 = sub_query_correlated_subquery3.k1)
OR exists (SELECT k1 FROM sub_query_correlated_subquery3 where sub_query_correlated_subquery1.k1 = sub_query_correlated_subquery3.v1)
OR k1 < 10 ORDER BY k1;
"""
qt_multi_subquery_in_and_exist_sum """
order_qt_multi_subquery_in_and_exist_sum """
SELECT * FROM sub_query_correlated_subquery1 WHERE k1 IN (SELECT k1 FROM sub_query_correlated_subquery3 where sub_query_correlated_subquery1.k2 = sub_query_correlated_subquery3.k1)
OR exists (SELECT sum(k1) FROM sub_query_correlated_subquery3 where sub_query_correlated_subquery1.k1 = sub_query_correlated_subquery3.v1)
OR k1 < 10 ORDER BY k1;
"""
qt_multi_subquery_in_and_in """
order_qt_multi_subquery_in_and_in """
SELECT * FROM sub_query_correlated_subquery1 WHERE k1 IN (SELECT k1 FROM sub_query_correlated_subquery3 where sub_query_correlated_subquery1.k2 = sub_query_correlated_subquery3.k1)
OR k2 in (SELECT k2 FROM sub_query_correlated_subquery3 where sub_query_correlated_subquery1.k1 = sub_query_correlated_subquery3.v1)
OR k1 < 10 ORDER BY k1;
"""
qt_multi_subquery_scalar_and_exist """
order_qt_multi_subquery_scalar_and_exist """
SELECT * FROM sub_query_correlated_subquery1 WHERE k1 < (SELECT sum(k1) FROM sub_query_correlated_subquery3 where sub_query_correlated_subquery1.k2 = sub_query_correlated_subquery3.k1)
OR exists (SELECT sum(k1) FROM sub_query_correlated_subquery3 where sub_query_correlated_subquery1.k1 = sub_query_correlated_subquery3.v1)
OR k1 < 10 ORDER BY k1;
"""
qt_multi_subquery_scalar_and_scalar """
order_qt_multi_subquery_scalar_and_scalar """
SELECT * FROM sub_query_correlated_subquery1 WHERE k1 < (SELECT sum(k1) FROM sub_query_correlated_subquery3 where sub_query_correlated_subquery1.k2 = sub_query_correlated_subquery3.k1)
OR k2 < (SELECT sum(k1) FROM sub_query_correlated_subquery3 where sub_query_correlated_subquery1.k1 = sub_query_correlated_subquery3.v1)
OR k1 < 10 ORDER BY k1;
"""
qt_multi_subquery_in_first_or_in_and_in """
order_qt_multi_subquery_in_first_or_in_and_in """
SELECT * FROM sub_query_correlated_subquery1 WHERE (k1 in (SELECT k2 FROM sub_query_correlated_subquery3 where sub_query_correlated_subquery1.k2 = sub_query_correlated_subquery3.k1)
or k2 in (SELECT k1 FROM sub_query_correlated_subquery3 where sub_query_correlated_subquery1.k2 = sub_query_correlated_subquery3.k1))
and k1 in (SELECT k1 FROM sub_query_correlated_subquery3 where sub_query_correlated_subquery1.k2 = sub_query_correlated_subquery3.k1)
"""
qt_multi_subquery_in_second_or_in_and_in """
order_qt_multi_subquery_in_second_or_in_and_in """
SELECT * FROM sub_query_correlated_subquery1 WHERE k1 in (SELECT k2 FROM sub_query_correlated_subquery3 where sub_query_correlated_subquery1.k2 = sub_query_correlated_subquery3.k1)
or k2 in (SELECT k1 FROM sub_query_correlated_subquery3 where sub_query_correlated_subquery1.k2 = sub_query_correlated_subquery3.k1)
and k1 in (SELECT k1 FROM sub_query_correlated_subquery3 where sub_query_correlated_subquery1.k2 = sub_query_correlated_subquery3.k1)
"""
qt_multi_subquery_scalar_and_in_or_scalar_and_exists_agg """
order_qt_multi_subquery_scalar_and_in_or_scalar_and_exists_agg """
SELECT * FROM sub_query_correlated_subquery1 WHERE ((k1 != (SELECT sum(k1) FROM sub_query_correlated_subquery3) and k1 = 1 OR k1 < 10) and k1 = 10 and k1 = 15)
and (k1 IN (SELECT k1 FROM sub_query_correlated_subquery3 WHERE sub_query_correlated_subquery1.k1 = sub_query_correlated_subquery3.k1)
OR k1 < (SELECT sum(k1) FROM sub_query_correlated_subquery3 WHERE sub_query_correlated_subquery1.k1 = sub_query_correlated_subquery3.k1))
and exists (SELECT sum(k1) FROM sub_query_correlated_subquery3 WHERE sub_query_correlated_subquery1.k1 = sub_query_correlated_subquery3.k1);
"""
qt_multi_subquery_scalar_and_in_or_scalar_and_exists """
order_qt_multi_subquery_scalar_and_in_or_scalar_and_exists """
SELECT * FROM sub_query_correlated_subquery1 WHERE ((k1 != (SELECT sum(k1) FROM sub_query_correlated_subquery3) and k1 = 1 OR k1 < 10) and k1 = 10 and k1 = 15)
and (k1 IN (SELECT k1 FROM sub_query_correlated_subquery3 WHERE sub_query_correlated_subquery1.k1 = sub_query_correlated_subquery3.k1)
OR k1 < (SELECT sum(k1) FROM sub_query_correlated_subquery3 WHERE sub_query_correlated_subquery1.k1 = sub_query_correlated_subquery3.k1))

View File

@ -159,7 +159,7 @@ suite ("sub_query_diff_old_optimize") {
"""*/
//----------subquery with limit----------
qt_exists_subquery_with_limit """
order_qt_exists_subquery_with_limit """
select * from sub_query_diff_old_optimize_subquery1 where exists (select sub_query_diff_old_optimize_subquery3.k3 from sub_query_diff_old_optimize_subquery3 where sub_query_diff_old_optimize_subquery3.v2 = sub_query_diff_old_optimize_subquery1.k2 limit 1);
"""
@ -172,7 +172,7 @@ suite ("sub_query_diff_old_optimize") {
}
//----------subquery with order and limit-------
qt_exists_subquery_with_order_and_limit """
order_qt_exists_subquery_with_order_and_limit """
select * from sub_query_diff_old_optimize_subquery1 where exists (select sub_query_diff_old_optimize_subquery3.k3 from sub_query_diff_old_optimize_subquery3 where sub_query_diff_old_optimize_subquery3.v2 = sub_query_diff_old_optimize_subquery1.k2 order by k1 limit 1);
"""

View File

@ -32,10 +32,10 @@ suite("redundant_conjuncts") {
"""
qt_redundant_conjuncts """
EXPLAIN SELECT /*+SET_VAR(REWRITE_OR_TO_IN_PREDICATE_THRESHOLD=2) */ v1 FROM redundant_conjuncts WHERE k1 = 1 AND k1 = 1;
EXPLAIN SELECT /*+SET_VAR(REWRITE_OR_TO_IN_PREDICATE_THRESHOLD=2, parallel_fragment_exec_instance_num = 1) */ v1 FROM redundant_conjuncts WHERE k1 = 1 AND k1 = 1;
"""
qt_redundant_conjuncts_gnerated_by_extract_common_filter """
EXPLAIN SELECT /*+SET_VAR(REWRITE_OR_TO_IN_PREDICATE_THRESHOLD=100) */ v1 FROM redundant_conjuncts WHERE k1 = 1 OR k1 = 2;
EXPLAIN SELECT /*+SET_VAR(REWRITE_OR_TO_IN_PREDICATE_THRESHOLD=100, parallel_fragment_exec_instance_num = 1) */ v1 FROM redundant_conjuncts WHERE k1 = 1 OR k1 = 2;
"""
}

View File

@ -73,15 +73,15 @@ suite("test_query_limit", "query,p0") {
qt_limit16 "select * from (select * from ${tableName} order by k1, k2, k3, k4 limit 1, 2) a limit 2, 2"
qt_limit17 "select * from (select * from ${tableName} order by k1, k2, k3, k4 limit 1, 2) a limit 2, 3"
test {
sql "select * from ${tableName} limit 1, 10"
sql "select /*+SET_VAR(parallel_fragment_exec_instance_num=4)*/ * from ${tableName} limit 1, 10"
rowNum 2
}
test {
sql "select * from ${tableName} limit 2, 10"
sql "select /*+SET_VAR(parallel_fragment_exec_instance_num=4)*/ * from ${tableName} limit 2, 10"
rowNum 1
}
test {
sql "select * from ${tableName} limit 3, 10"
sql "select /*+SET_VAR(parallel_fragment_exec_instance_num=4)*/ * from ${tableName} limit 3, 10"
rowNum 0
}
}

View File

@ -123,7 +123,7 @@ suite("test_window_fn") {
select first_value(salary) over(order by enroll_date range between unbounded preceding and UNBOUNDED following), last_value(salary) over(order by enroll_date range between unbounded preceding and UNBOUNDED following), salary, enroll_date from ${tbName1} order by salary, enroll_date;
"""
qt_sql """
SELECT first_value(ten) OVER (PARTITION BY four ORDER BY ten), ten, four FROM ${tbName2} WHERE unique2 < 10;
SELECT first_value(ten) OVER (PARTITION BY four ORDER BY ten), ten, four FROM ${tbName2} WHERE unique2 < 10 order by four, ten;
"""
qt_sql """
SELECT first_value(unique1) over (order by four range between current row and unbounded following),
@ -190,7 +190,7 @@ suite("test_window_fn") {
"""
qt_sql """
SELECT depname, empno, salary, rank() OVER (PARTITION BY depname ORDER BY salary, empno)
FROM ${tbName1} ORDER BY rank() OVER (PARTITION BY depname ORDER BY salary, empno);
FROM ${tbName1} ORDER BY rank() OVER (PARTITION BY depname ORDER BY salary, empno), depname;
"""
qt_sql """
SELECT sum(salary) as s, row_number() OVER (ORDER BY depname) as r, sum(sum(salary)) OVER (ORDER BY depname DESC) as ss
@ -207,10 +207,10 @@ suite("test_window_fn") {
SELECT row_number() OVER (ORDER BY unique2) FROM ${tbName2} WHERE unique2 < 10;
"""
qt_sql """
SELECT rank() OVER (PARTITION BY four ORDER BY ten) AS rank_1, ten, four FROM ${tbName2} WHERE unique2 < 10;
SELECT rank() OVER (PARTITION BY four ORDER BY ten) AS rank_1, ten, four FROM ${tbName2} WHERE unique2 < 10 order by four, ten;
"""
qt_sql """
SELECT dense_rank() OVER (PARTITION BY four ORDER BY ten), ten, four FROM ${tbName2} WHERE unique2 < 10;
SELECT dense_rank() OVER (PARTITION BY four ORDER BY ten), ten, four FROM ${tbName2} WHERE unique2 < 10 order by four, ten;
"""
qt_sql """
select ten, sum(unique1) + sum(unique2) as res, rank() over (order by sum(unique1) + sum(unique2)) as rank from ${tbName2} group by ten order by ten;
@ -256,7 +256,7 @@ suite("test_window_fn") {
SELECT count(1) OVER (PARTITION BY four) as c, four FROM (SELECT * FROM ${tbName2} WHERE two = 1)s WHERE unique2 < 10 order by c, four;
"""
qt_sql """
SELECT avg(four) OVER (PARTITION BY four ORDER BY thousand / 100) FROM ${tbName2} WHERE unique2 < 10;
SELECT avg(four) OVER (PARTITION BY four ORDER BY thousand / 100) FROM ${tbName2} WHERE unique2 < 10 order by four;
"""
qt_sql """
SELECT count(1) OVER (PARTITION BY four) FROM (SELECT * FROM ${tbName2} WHERE FALSE)s;
@ -278,19 +278,19 @@ suite("test_window_fn") {
// lag
qt_sql """
SELECT lag(ten, 1, 0) OVER (PARTITION BY four ORDER BY ten), ten, four FROM ${tbName2} WHERE unique2 < 10;
SELECT lag(ten, 1, 0) OVER (PARTITION BY four ORDER BY ten), ten, four FROM ${tbName2} WHERE unique2 < 10 order by four, ten;
"""
// lead
qt_sql """
SELECT lead(ten, 1, 0) OVER (PARTITION BY four ORDER BY ten), ten, four FROM ${tbName2} WHERE unique2 < 10;
SELECT lead(ten, 1, 0) OVER (PARTITION BY four ORDER BY ten), ten, four FROM ${tbName2} WHERE unique2 < 10 order by four, ten;
"""
qt_sql """
SELECT lead(ten * 2, 1, 0) OVER (PARTITION BY four ORDER BY ten), ten, four FROM ${tbName2} WHERE unique2 < 10;
SELECT lead(ten * 2, 1, 0) OVER (PARTITION BY four ORDER BY ten), ten, four FROM ${tbName2} WHERE unique2 < 10 order by four, ten;
"""
qt_sql """
SELECT lead(ten * 2, 1, -1) OVER (PARTITION BY four ORDER BY ten), ten, four FROM ${tbName2} WHERE unique2 < 10;
SELECT lead(ten * 2, 1, -1) OVER (PARTITION BY four ORDER BY ten), ten, four FROM ${tbName2} WHERE unique2 < 10 order by four, ten;
"""
@ -353,7 +353,7 @@ suite("test_window_fn") {
qt_sql_window_last_value """
select u_id, u_city, u_salary,
last_value(u_salary) over (partition by u_city order by u_id rows between unbounded preceding and 1 preceding) last_value_test
from example_window_tb;
from example_window_tb order by u_id;
"""
sql "DROP TABLE IF EXISTS example_window_tb;"