[bugfix](scannerscheduler) should minus num_of_scanners before check should schedule #28926 (#29331)

---------

Co-authored-by: yiguolei <yiguolei@gmail.com>
This commit is contained in:
yiguolei
2024-01-03 20:47:35 +08:00
committed by GitHub
parent c84cd30223
commit bd8113f424
19 changed files with 272 additions and 353 deletions

View File

@ -1158,6 +1158,9 @@ DEFINE_mBool(enable_column_type_check, "true");
// 128 MB
DEFINE_mInt64(local_exchange_buffer_mem_limit, "134217728");
// Default 300s, if its value <= 0, then log is disabled
DEFINE_mInt64(enable_debug_log_timeout_secs, "0");
// clang-format off
#ifdef BE_TEST
// test s3

View File

@ -1232,6 +1232,8 @@ DECLARE_mInt32(variant_max_merged_tablet_schema_size);
DECLARE_mInt64(local_exchange_buffer_mem_limit);
DECLARE_mInt64(enable_debug_log_timeout_secs);
DECLARE_mBool(enable_column_type_check);
#ifdef BE_TEST

View File

@ -434,14 +434,6 @@ public:
bool ok() const { return _code == ErrorCode::OK; }
// Convert into TStatus. Call this if 'status_container' contains an optional
// TStatus field named 'status'. This also sets __isset.status.
template <typename T>
void set_t_status(T* status_container) const {
to_thrift(&status_container->status);
status_container->__isset.status = true;
}
// Convert into TStatus.
void to_thrift(TStatus* status) const;
TStatus to_thrift() const;

View File

@ -239,6 +239,10 @@ public:
size_t children_count() const { return _children.size(); }
// when the fragment is normal finished, call this method to do some finish work
// such as send the last buffer to remote.
virtual Status try_close(RuntimeState* state) { return Status::OK(); }
protected:
friend class DataSink;

View File

@ -53,6 +53,7 @@ bool ScanOperator::can_read() {
return false;
}
} else {
// If scanner meet any error, done == true
if (_node->_eos || _node->_scanner_ctx->done()) {
// _eos: need eos
// _scanner_ctx->done(): need finish
@ -64,14 +65,6 @@ bool ScanOperator::can_read() {
}
}
bool ScanOperator::is_pending_finish() const {
return _node->_scanner_ctx && !_node->_scanner_ctx->no_schedule();
}
Status ScanOperator::try_close(RuntimeState* state) {
return _node->try_close(state);
}
bool ScanOperator::runtime_filters_are_ready_or_timeout() {
return _node->runtime_filters_are_ready_or_timeout();
}
@ -81,9 +74,8 @@ std::string ScanOperator::debug_string() const {
fmt::format_to(debug_string_buffer, "{}, scanner_ctx is null: {} ",
SourceOperator::debug_string(), _node->_scanner_ctx == nullptr);
if (_node->_scanner_ctx) {
fmt::format_to(debug_string_buffer, ", num_running_scanners = {}, num_scheduling_ctx = {} ",
_node->_scanner_ctx->get_num_running_scanners(),
_node->_scanner_ctx->get_num_scheduling_ctx());
fmt::format_to(debug_string_buffer, ", num_running_scanners = {}",
_node->_scanner_ctx->debug_string());
}
return fmt::to_string(debug_string_buffer);
}
@ -101,9 +93,6 @@ std::string ScanOperator::debug_string() const {
template <typename Derived>
ScanLocalState<Derived>::ScanLocalState(RuntimeState* state, OperatorXBase* parent)
: ScanLocalStateBase(state, parent) {
_finish_dependency = std::make_shared<FinishDependency>(
parent->operator_id(), parent->node_id(), parent->get_name() + "_FINISH_DEPENDENCY",
state->get_query_ctx());
_filter_dependency = std::make_shared<RuntimeFilterDependency>(
parent->operator_id(), parent->node_id(), parent->get_name() + "_FILTER_DEPENDENCY",
state->get_query_ctx());
@ -175,7 +164,6 @@ Status ScanLocalState<Derived>::open(RuntimeState* state) {
auto status = _eos ? Status::OK() : _prepare_scanners();
if (_scanner_ctx) {
_finish_dependency->block();
DCHECK(!_eos && _num_scanners->value() > 0);
RETURN_IF_ERROR(_scanner_ctx->init());
RETURN_IF_ERROR(state->exec_env()->scanner_scheduler()->submit(_scanner_ctx));
@ -569,15 +557,14 @@ std::string ScanLocalState<Derived>::debug_string(int indentation_level) const {
_eos.load());
if (_scanner_ctx) {
fmt::format_to(debug_string_buffer, "");
fmt::format_to(
debug_string_buffer,
", Scanner Context: (_is_finished = {}, _should_stop = {}, "
"_num_running_scanners={}, "
"_num_scheduling_ctx = {}, _num_unfinished_scanners = {}, status = {}, error = {})",
_scanner_ctx->is_finished(), _scanner_ctx->should_stop(),
_scanner_ctx->get_num_running_scanners(), _scanner_ctx->get_num_scheduling_ctx(),
_scanner_ctx->get_num_unfinished_scanners(), _scanner_ctx->status().to_string(),
_scanner_ctx->status_error());
fmt::format_to(debug_string_buffer,
", Scanner Context: (_is_finished = {}, _should_stop = {}, "
"_num_running_scanners={}, "
" _num_unfinished_scanners = {}, status = {})",
_scanner_ctx->is_finished(), _scanner_ctx->should_stop(),
_scanner_ctx->get_num_running_scanners(),
_scanner_ctx->get_num_unfinished_scanners(),
_scanner_ctx->status().to_string());
}
return fmt::to_string(debug_string_buffer);
@ -1225,23 +1212,27 @@ template <typename Derived>
Status ScanLocalState<Derived>::_prepare_scanners() {
std::list<vectorized::VScannerSPtr> scanners;
RETURN_IF_ERROR(_init_scanners(&scanners));
// Init scanner wrapper
for (auto it = scanners.begin(); it != scanners.end(); ++it) {
_scanners.emplace_back(std::make_shared<vectorized::ScannerDelegate>(*it));
}
if (scanners.empty()) {
_eos = true;
_scan_dependency->set_ready();
} else {
COUNTER_SET(_num_scanners, static_cast<int64_t>(scanners.size()));
RETURN_IF_ERROR(_start_scanners(scanners));
RETURN_IF_ERROR(_start_scanners(_scanners));
}
return Status::OK();
}
template <typename Derived>
Status ScanLocalState<Derived>::_start_scanners(
const std::list<vectorized::VScannerSPtr>& scanners) {
const std::list<std::shared_ptr<vectorized::ScannerDelegate>>& scanners) {
auto& p = _parent->cast<typename Derived::Parent>();
_scanner_ctx = PipXScannerContext::create_shared(
state(), this, p._output_tuple_desc, p.output_row_descriptor(), scanners, p.limit(),
state()->scan_queue_mem_limit(), _scan_dependency, _finish_dependency);
state()->scan_queue_mem_limit(), _scan_dependency);
return Status::OK();
}
@ -1317,9 +1308,6 @@ Status ScanLocalState<Derived>::_init_profile() {
_max_scanner_thread_num = ADD_COUNTER(_runtime_profile, "MaxScannerThreadNum", TUnit::UNIT);
_wait_for_finish_dependency_timer =
ADD_TIMER(_runtime_profile, "WaitForPendingFinishDependency");
return Status::OK();
}
@ -1427,17 +1415,6 @@ Status ScanOperatorX<LocalStateType>::open(RuntimeState* state) {
return Status::OK();
}
template <typename LocalStateType>
Status ScanOperatorX<LocalStateType>::try_close(RuntimeState* state) {
auto& local_state = get_local_state(state);
if (local_state._scanner_ctx) {
// mark this scanner ctx as should_stop to make sure scanners will not be scheduled anymore
// TODO: there is a lock in `set_should_stop` may cause some slight impact
local_state._scanner_ctx->set_should_stop();
}
return Status::OK();
}
template <typename Derived>
Status ScanLocalState<Derived>::close(RuntimeState* state) {
if (_closed) {
@ -1449,10 +1426,9 @@ Status ScanLocalState<Derived>::close(RuntimeState* state) {
SCOPED_TIMER(exec_time_counter());
if (_scanner_ctx) {
_scanner_ctx->clear_and_join(reinterpret_cast<ScanLocalStateBase*>(this), state);
_scanner_ctx->stop_scanners(state);
}
COUNTER_SET(_wait_for_dependency_timer, _scan_dependency->watcher_elapse_time());
COUNTER_SET(_wait_for_finish_dependency_timer, _finish_dependency->watcher_elapse_time());
COUNTER_SET(_wait_for_rf_timer, _filter_dependency->watcher_elapse_time());
return PipelineXLocalState<ScanDependency>::close(state);
@ -1509,7 +1485,7 @@ Status ScanOperatorX<LocalStateType>::get_block(RuntimeState* state, vectorized:
if (eos) {
source_state = SourceState::FINISHED;
// reach limit, stop the scanners.
local_state._scanner_ctx->set_should_stop();
local_state._scanner_ctx->stop_scanners(state);
}
return Status::OK();

View File

@ -32,6 +32,9 @@
namespace doris {
class ExecNode;
} // namespace doris
namespace doris::vectorized {
class ScannerDelegate;
}
namespace doris::pipeline {
class PipScannerContext;
@ -49,13 +52,9 @@ public:
bool can_read() override; // for source
bool is_pending_finish() const override;
bool runtime_filters_are_ready_or_timeout() override;
std::string debug_string() const override;
Status try_close(RuntimeState* state) override;
};
class ScanDependency final : public Dependency {
@ -174,7 +173,6 @@ protected:
RuntimeProfile::Counter* _wait_for_scanner_done_timer = nullptr;
// time of prefilter input block from scanner
RuntimeProfile::Counter* _wait_for_eos_timer = nullptr;
RuntimeProfile::Counter* _wait_for_finish_dependency_timer = nullptr;
RuntimeProfile::Counter* _wait_for_rf_timer = nullptr;
};
@ -215,7 +213,6 @@ class ScanLocalState : public ScanLocalStateBase {
int64_t get_push_down_count() override;
RuntimeFilterDependency* filterdependency() override { return _filter_dependency.get(); };
Dependency* finishdependency() override { return _finish_dependency.get(); }
protected:
template <typename LocalStateType>
@ -351,7 +348,7 @@ protected:
Status _prepare_scanners();
// Submit the scanner to the thread pool and start execution
Status _start_scanners(const std::list<vectorized::VScannerSPtr>& scanners);
Status _start_scanners(const std::list<std::shared_ptr<vectorized::ScannerDelegate>>& scanners);
// For some conjunct there is chance to elimate cast operator
// Eg. Variant's sub column could eliminate cast in storage layer if
@ -414,14 +411,13 @@ protected:
std::shared_ptr<RuntimeFilterDependency> _filter_dependency;
std::shared_ptr<Dependency> _finish_dependency;
// ScanLocalState owns the ownership of scanner, scanner context only has its weakptr
std::list<std::shared_ptr<vectorized::ScannerDelegate>> _scanners;
};
template <typename LocalStateType>
class ScanOperatorX : public OperatorX<LocalStateType> {
public:
Status try_close(RuntimeState* state) override;
Status init(const TPlanNode& tnode, RuntimeState* state) override;
Status prepare(RuntimeState* state) override { return OperatorXBase::prepare(state); }
Status open(RuntimeState* state) override;

View File

@ -23,6 +23,7 @@
#include <string>
#include <vector>
#include "common/config.h"
#include "common/status.h"
#include "exec/operator.h"
#include "pipeline.h"
@ -264,6 +265,32 @@ public:
bool is_running() { return _running.load(); }
void set_running(bool running) { _running = running; }
bool is_exceed_debug_timeout() {
if (_has_exceed_timeout) {
return true;
}
// If enable_debug_log_timeout_secs <= 0, then disable the log
if (_pipeline_task_watcher.elapsed_time() >
config::enable_debug_log_timeout_secs * 1000l * 1000l * 1000l) {
_has_exceed_timeout = true;
return true;
}
return false;
}
void log_detail_if_need() {
if (config::enable_debug_log_timeout_secs < 1) {
return;
}
if (is_exceed_debug_timeout()) {
LOG(INFO) << "query id|instanceid " << print_id(_state->query_id()) << "|"
<< print_id(_state->fragment_instance_id())
<< " current pipeline exceed run time "
<< config::enable_debug_log_timeout_secs << " seconds. Task state "
<< get_state_name(get_state()) << debug_string();
}
}
protected:
void _finish_p_dependency() {
for (const auto& p : _pipeline->_parents) {
@ -278,7 +305,7 @@ protected:
uint32_t _index;
PipelinePtr _pipeline;
bool _dependency_finish = false;
bool _has_exceed_timeout = false;
bool _prepared;
bool _opened;
RuntimeState* _state = nullptr;

View File

@ -119,6 +119,7 @@ void BlockedTaskScheduler::_schedule() {
while (iter != local_blocked_tasks.end()) {
auto* task = *iter;
auto state = task->get_state();
task->log_detail_if_need();
if (state == PipelineTaskState::PENDING_FINISH) {
// should cancel or should finish
if (task->is_pending_finish()) {
@ -236,6 +237,7 @@ void TaskScheduler::_do_work(size_t index) {
static_cast<void>(_task_queue->push_back(task, index));
continue;
}
task->log_detail_if_need();
task->set_running(true);
task->set_task_queue(_task_queue.get());
auto* fragment_ctx = task->fragment_context();

View File

@ -213,7 +213,7 @@ void FragmentMgr::coordinator_callback(const ReportStatusRequest& req) {
params.__set_backend_num(req.backend_num);
params.__set_fragment_instance_id(req.fragment_instance_id);
params.__set_fragment_id(req.fragment_id);
exec_status.set_t_status(&params);
params.__set_status(exec_status.to_thrift());
params.__set_done(req.done);
params.__set_query_type(req.runtime_state->query_type());
params.__set_finished_scan_ranges(req.runtime_state->num_finished_range());

View File

@ -405,7 +405,7 @@ void BackendService::exec_plan_fragment(TExecPlanFragmentResult& return_val,
const TExecPlanFragmentParams& params) {
LOG(INFO) << "exec_plan_fragment() instance_id=" << print_id(params.params.fragment_instance_id)
<< " coord=" << params.coord << " backend#=" << params.backend_num;
start_plan_fragment_execution(params).set_t_status(&return_val);
return_val.__set_status(start_plan_fragment_execution(params).to_thrift());
}
Status BackendService::start_plan_fragment_execution(const TExecPlanFragmentParams& exec_params) {

View File

@ -30,9 +30,9 @@ public:
PipScannerContext(RuntimeState* state, vectorized::VScanNode* parent,
const TupleDescriptor* output_tuple_desc,
const RowDescriptor* output_row_descriptor,
const std::list<vectorized::VScannerSPtr>& scanners, int64_t limit_,
int64_t max_bytes_in_blocks_queue, const std::vector<int>& col_distribute_ids,
const int num_parallel_instances)
const std::list<std::shared_ptr<vectorized::ScannerDelegate>>& scanners,
int64_t limit_, int64_t max_bytes_in_blocks_queue,
const std::vector<int>& col_distribute_ids, const int num_parallel_instances)
: vectorized::ScannerContext(state, parent, output_tuple_desc, output_row_descriptor,
scanners, limit_, max_bytes_in_blocks_queue,
num_parallel_instances),
@ -55,6 +55,9 @@ public:
std::vector<vectorized::BlockUPtr> merge_blocks;
{
std::unique_lock<std::mutex> l(*_queue_mutexs[id]);
// The pipeline maybe wake up by scanner.done. If there are still any data
// in the queue, should read the data first and then check if the scanner.done
// if done, then eos is returned to indicate that the scan operator finished.
if (_blocks_queues[id].empty()) {
*eos = done();
return Status::OK();
@ -97,9 +100,6 @@ public:
return Status::OK();
}
// We should make those method lock free.
bool done() override { return _is_finished || _should_stop; }
void append_blocks_to_queue(std::vector<vectorized::BlockUPtr>& blocks) override {
const int queue_size = _blocks_queues.size();
const int block_size = blocks.size();
@ -277,13 +277,12 @@ public:
PipXScannerContext(RuntimeState* state, ScanLocalStateBase* local_state,
const TupleDescriptor* output_tuple_desc,
const RowDescriptor* output_row_descriptor,
const std::list<vectorized::VScannerSPtr>& scanners, int64_t limit_,
int64_t max_bytes_in_blocks_queue,
std::shared_ptr<pipeline::ScanDependency> dependency,
std::shared_ptr<pipeline::Dependency> finish_dependency)
const std::list<std::shared_ptr<vectorized::ScannerDelegate>>& scanners,
int64_t limit_, int64_t max_bytes_in_blocks_queue,
std::shared_ptr<pipeline::ScanDependency> dependency)
: vectorized::ScannerContext(state, output_tuple_desc, output_row_descriptor, scanners,
limit_, max_bytes_in_blocks_queue, 1, local_state,
dependency, finish_dependency) {}
dependency) {}
Status get_block_from_queue(RuntimeState* state, vectorized::BlockUPtr* block, bool* eos,
int id, bool wait = false) override {
std::unique_lock l(_transfer_lock);

View File

@ -47,11 +47,11 @@ using namespace std::chrono_literals;
ScannerContext::ScannerContext(RuntimeState* state, const TupleDescriptor* output_tuple_desc,
const RowDescriptor* output_row_descriptor,
const std::list<VScannerSPtr>& scanners, int64_t limit_,
int64_t max_bytes_in_blocks_queue, const int num_parallel_instances,
const std::list<std::shared_ptr<ScannerDelegate>>& scanners,
int64_t limit_, int64_t max_bytes_in_blocks_queue,
const int num_parallel_instances,
pipeline::ScanLocalStateBase* local_state,
std::shared_ptr<pipeline::ScanDependency> dependency,
std::shared_ptr<pipeline::Dependency> finish_dependency)
std::shared_ptr<pipeline::ScanDependency> dependency)
: HasTaskExecutionCtx(state),
_state(state),
_parent(nullptr),
@ -66,11 +66,10 @@ ScannerContext::ScannerContext(RuntimeState* state, const TupleDescriptor* outpu
_max_bytes_in_queue(std::max(max_bytes_in_blocks_queue, (int64_t)1024) *
num_parallel_instances),
_scanner_scheduler(state->exec_env()->scanner_scheduler()),
_scanners(scanners),
_scanners_ref(scanners.begin(), scanners.end()),
_scanners(scanners.begin(), scanners.end()),
_all_scanners(scanners.begin(), scanners.end()),
_num_parallel_instances(num_parallel_instances),
_dependency(dependency),
_finish_dependency(finish_dependency) {
_dependency(dependency) {
DCHECK(_output_row_descriptor == nullptr ||
_output_row_descriptor->tuple_descriptors().size() == 1);
_query_id = _state->get_query_ctx()->query_id();
@ -98,8 +97,9 @@ ScannerContext::ScannerContext(RuntimeState* state, const TupleDescriptor* outpu
ScannerContext::ScannerContext(doris::RuntimeState* state, doris::vectorized::VScanNode* parent,
const doris::TupleDescriptor* output_tuple_desc,
const RowDescriptor* output_row_descriptor,
const std::list<VScannerSPtr>& scanners, int64_t limit_,
int64_t max_bytes_in_blocks_queue, const int num_parallel_instances,
const std::list<std::shared_ptr<ScannerDelegate>>& scanners,
int64_t limit_, int64_t max_bytes_in_blocks_queue,
const int num_parallel_instances,
pipeline::ScanLocalStateBase* local_state)
: HasTaskExecutionCtx(state),
_state(state),
@ -115,8 +115,8 @@ ScannerContext::ScannerContext(doris::RuntimeState* state, doris::vectorized::VS
_max_bytes_in_queue(std::max(max_bytes_in_blocks_queue, (int64_t)1024) *
num_parallel_instances),
_scanner_scheduler(state->exec_env()->scanner_scheduler()),
_scanners(scanners),
_scanners_ref(scanners.begin(), scanners.end()),
_scanners(scanners.begin(), scanners.end()),
_all_scanners(scanners.begin(), scanners.end()),
_num_parallel_instances(num_parallel_instances) {
DCHECK(_output_row_descriptor == nullptr ||
_output_row_descriptor->tuple_descriptors().size() == 1);
@ -192,10 +192,6 @@ Status ScannerContext::init() {
}
#endif
// 4. This ctx will be submitted to the scanner scheduler right after init.
// So set _num_scheduling_ctx to 1 here.
_num_scheduling_ctx = 1;
_num_unfinished_scanners = _scanners.size();
if (_parent) {
@ -288,11 +284,9 @@ Status ScannerContext::get_block_from_queue(RuntimeState* state, vectorized::Blo
bool is_scheduled = false;
if (!done() && to_be_schedule && _num_running_scanners == 0) {
is_scheduled = true;
auto state = _scanner_scheduler->submit(shared_from_this());
if (state.ok()) {
_num_scheduling_ctx++;
} else {
set_status_on_error(state, false);
auto submit_status = _scanner_scheduler->submit(shared_from_this());
if (!submit_status.ok()) {
set_status_on_error(submit_status, false);
}
}
@ -384,60 +378,36 @@ Status ScannerContext::validate_block_schema(Block* block) {
return Status::OK();
}
void ScannerContext::set_should_stop() {
std::lock_guard l(_transfer_lock);
_should_stop = true;
_set_scanner_done();
for (const VScannerWPtr& scanner : _scanners_ref) {
if (VScannerSPtr sc = scanner.lock()) {
sc->try_stop();
}
}
_blocks_queue_added_cv.notify_one();
set_ready_to_finish();
}
void ScannerContext::inc_num_running_scanners(int32_t inc) {
std::lock_guard l(_transfer_lock);
_num_running_scanners += inc;
}
void ScannerContext::dec_num_scheduling_ctx() {
std::lock_guard l(_transfer_lock);
_num_scheduling_ctx--;
set_ready_to_finish();
if (_num_running_scanners == 0 && _num_scheduling_ctx == 0) {
_ctx_finish_cv.notify_one();
}
}
void ScannerContext::set_ready_to_finish() {
// `_should_stop == true` means this task has already ended and wait for pending finish now.
if (_finish_dependency && done() && _num_running_scanners == 0 && _num_scheduling_ctx == 0) {
_finish_dependency->set_ready();
}
}
bool ScannerContext::set_status_on_error(const Status& status, bool need_lock) {
void ScannerContext::set_status_on_error(const Status& status, bool need_lock) {
std::unique_lock l(_transfer_lock, std::defer_lock);
if (need_lock) {
l.lock();
}
if (this->status().ok()) {
_process_status = status;
_status_error = true;
_blocks_queue_added_cv.notify_one();
_should_stop = true;
_state->get_query_ctx()->set_exec_status(_process_status);
_set_scanner_done();
return true;
}
return false;
}
template <typename Parent>
Status ScannerContext::_close_and_clear_scanners(Parent* parent, RuntimeState* state) {
std::unique_lock l(_scanners_lock);
void ScannerContext::stop_scanners(RuntimeState* state) {
std::unique_lock l(_transfer_lock);
_should_stop = true;
_set_scanner_done();
for (const std::weak_ptr<ScannerDelegate>& scanner : _all_scanners) {
if (std::shared_ptr<ScannerDelegate> sc = scanner.lock()) {
sc->_scanner->try_stop();
}
}
_blocks_queue.clear();
// TODO yiguolei, call mark close to scanners
if (state->enable_profile()) {
std::stringstream scanner_statistics;
std::stringstream scanner_rows_read;
@ -445,76 +415,38 @@ Status ScannerContext::_close_and_clear_scanners(Parent* parent, RuntimeState* s
scanner_statistics << "[";
scanner_rows_read << "[";
scanner_wait_worker_time << "[";
for (auto finished_scanner_time : _finished_scanner_runtime) {
scanner_statistics << PrettyPrinter::print(finished_scanner_time, TUnit::TIME_NS)
<< ", ";
}
for (auto finished_scanner_rows : _finished_scanner_rows_read) {
scanner_rows_read << PrettyPrinter::print(finished_scanner_rows, TUnit::UNIT) << ", ";
}
for (auto finished_scanner_wait_time : _finished_scanner_wait_worker_time) {
scanner_wait_worker_time
<< PrettyPrinter::print(finished_scanner_wait_time, TUnit::TIME_NS) << ", ";
}
// Only unfinished scanners here
for (auto& scanner : _scanners) {
// Scanners are in ObjPool in ScanNode,
// so no need to delete them here.
// Scanners can in 3 state
// state 1: in scanner context, not scheduled
// state 2: in scanner worker pool's queue, scheduled but not running
// state 3: scanner is running.
for (auto& scanner_ref : _all_scanners) {
auto scanner = scanner_ref.lock();
if (scanner == nullptr) {
continue;
}
// Add per scanner running time before close them
scanner_statistics << PrettyPrinter::print(scanner->get_time_cost_ns(), TUnit::TIME_NS)
scanner_statistics << PrettyPrinter::print(scanner->_scanner->get_time_cost_ns(),
TUnit::TIME_NS)
<< ", ";
scanner_rows_read << PrettyPrinter::print(scanner->get_rows_read(), TUnit::UNIT)
scanner_rows_read << PrettyPrinter::print(scanner->_scanner->get_rows_read(),
TUnit::UNIT)
<< ", ";
scanner_wait_worker_time
<< PrettyPrinter::print(scanner->get_scanner_wait_worker_timer(),
<< PrettyPrinter::print(scanner->_scanner->get_scanner_wait_worker_timer(),
TUnit::TIME_NS)
<< ", ";
// since there are all scanners, some scanners is running, so that could not call scanner
// close here.
}
scanner_statistics << "]";
scanner_rows_read << "]";
scanner_wait_worker_time << "]";
parent->scanner_profile()->add_info_string("PerScannerRunningTime",
scanner_statistics.str());
parent->scanner_profile()->add_info_string("PerScannerRowsRead", scanner_rows_read.str());
parent->scanner_profile()->add_info_string("PerScannerWaitTime",
scanner_wait_worker_time.str());
_scanner_profile->add_info_string("PerScannerRunningTime", scanner_statistics.str());
_scanner_profile->add_info_string("PerScannerRowsRead", scanner_rows_read.str());
_scanner_profile->add_info_string("PerScannerWaitTime", scanner_wait_worker_time.str());
}
// Only unfinished scanners here
for (auto& scanner : _scanners) {
static_cast<void>(scanner->close(state));
// Scanners are in ObjPool in ScanNode,
// so no need to delete them here.
}
_scanners.clear();
return Status::OK();
}
template <typename Parent>
void ScannerContext::clear_and_join(Parent* parent, RuntimeState* state) {
std::unique_lock l(_transfer_lock);
do {
if (_num_running_scanners == 0 && _num_scheduling_ctx == 0) {
break;
} else {
DCHECK(!state->enable_pipeline_exec())
<< " _num_running_scanners: " << _num_running_scanners
<< " _num_scheduling_ctx: " << _num_scheduling_ctx;
while (!(_num_running_scanners == 0 && _num_scheduling_ctx == 0)) {
_ctx_finish_cv.wait(l);
}
break;
}
} while (false);
// Must wait all running scanners stop running.
// So that we can make sure to close all scanners.
static_cast<void>(_close_and_clear_scanners(parent, state));
_blocks_queue.clear();
}
bool ScannerContext::no_schedule() {
std::unique_lock l(_transfer_lock);
return _num_running_scanners == 0 && _num_scheduling_ctx == 0;
_blocks_queue_added_cv.notify_one();
}
void ScannerContext::_set_scanner_done() {
@ -527,12 +459,13 @@ std::string ScannerContext::debug_string() {
return fmt::format(
"id: {}, sacnners: {}, blocks in queue: {},"
" status: {}, _should_stop: {}, _is_finished: {}, free blocks: {},"
" limit: {}, _num_running_scanners: {}, _num_scheduling_ctx: {}, _max_thread_num: {},"
" _block_per_scanner: {}, _cur_bytes_in_queue: {}, MAX_BYTE_OF_QUEUE: {}",
" limit: {}, _num_running_scanners: {}, _max_thread_num: {},"
" _block_per_scanner: {}, _cur_bytes_in_queue: {}, MAX_BYTE_OF_QUEUE: {}, "
"num_ctx_scheduled: {}, query_id: {}",
ctx_id, _scanners.size(), _blocks_queue.size(), status().ok(), _should_stop,
_is_finished, _free_blocks.size_approx(), limit, _num_running_scanners,
_num_scheduling_ctx, _max_thread_num, _block_per_scanner, _cur_bytes_in_queue,
_max_bytes_in_queue);
_is_finished, _free_blocks.size_approx(), limit, _num_running_scanners, _max_thread_num,
_block_per_scanner, _cur_bytes_in_queue, _max_bytes_in_queue, num_ctx_scheduled(),
print_id(_query_id));
}
void ScannerContext::reschedule_scanner_ctx() {
@ -540,84 +473,71 @@ void ScannerContext::reschedule_scanner_ctx() {
if (done()) {
return;
}
auto state = _scanner_scheduler->submit(shared_from_this());
auto submit_status = _scanner_scheduler->submit(shared_from_this());
//todo(wb) rethinking is it better to mark current scan_context failed when submit failed many times?
if (state.ok()) {
_num_scheduling_ctx++;
} else {
set_status_on_error(state, false);
if (!submit_status.ok()) {
set_status_on_error(submit_status, false);
}
}
void ScannerContext::push_back_scanner_and_reschedule(VScannerSPtr scanner) {
{
std::unique_lock l(_scanners_lock);
_scanners.push_front(scanner);
}
void ScannerContext::push_back_scanner_and_reschedule(std::shared_ptr<ScannerDelegate> scanner) {
std::lock_guard l(_transfer_lock);
// In pipeline engine, doris will close scanners when `no_schedule`.
// We have to decrease _num_running_scanners before schedule, otherwise
// schedule does not woring due to _num_running_scanners.
_num_running_scanners--;
set_ready_to_finish();
if (!done() && should_be_scheduled()) {
auto state = _scanner_scheduler->submit(shared_from_this());
if (state.ok()) {
_num_scheduling_ctx++;
// Use a transfer lock to avoid the scanner be scheduled concurrently. For example, that after
// calling "_scanners.push_front(scanner)", there may be other ctx in scheduler
// to schedule that scanner right away, and in that schedule run, the scanner may be marked as closed
// before we call the following if() block.
//LOG(INFO) << "yyyy one scanner finished " << debug_string();
{
--_num_running_scanners;
if (scanner->_scanner->need_to_close()) {
--_num_unfinished_scanners;
if (_num_unfinished_scanners == 0) {
_dispose_coloate_blocks_not_in_queue();
_is_finished = true;
_set_scanner_done();
_blocks_queue_added_cv.notify_one();
return;
}
} else {
set_status_on_error(state, false);
_scanners.push_front(scanner);
}
}
// Notice that after calling "_scanners.push_front(scanner)", there may be other ctx in scheduler
// to schedule that scanner right away, and in that schedule run, the scanner may be marked as closed
// before we call the following if() block.
// So we need "scanner->set_counted_down()" to avoid "_num_unfinished_scanners" being decreased twice by
// same scanner.
if (scanner->need_to_close() && scanner->set_counted_down() &&
(--_num_unfinished_scanners) == 0) {
_dispose_coloate_blocks_not_in_queue();
_is_finished = true;
_set_scanner_done();
_blocks_queue_added_cv.notify_one();
if (should_be_scheduled()) {
auto submit_status = _scanner_scheduler->submit(shared_from_this());
if (!submit_status.ok()) {
set_status_on_error(submit_status, false);
}
}
_ctx_finish_cv.notify_one();
}
void ScannerContext::get_next_batch_of_scanners(std::list<VScannerSPtr>* current_run) {
// This method is called in scanner scheduler, and task context is hold
void ScannerContext::get_next_batch_of_scanners(
std::list<std::weak_ptr<ScannerDelegate>>* current_run) {
std::lock_guard l(_transfer_lock);
// Update the sched counter for profile
Defer defer {[&]() { _scanner_sched_counter->update(current_run->size()); }};
// 1. Calculate how many scanners should be scheduled at this run.
int thread_slot_num = 0;
{
// If there are enough space in blocks queue,
// the scanner number depends on the _free_blocks numbers
thread_slot_num = get_available_thread_slot_num();
}
// If there are enough space in blocks queue,
// the scanner number depends on the _free_blocks numbers
int thread_slot_num = get_available_thread_slot_num();
// 2. get #thread_slot_num scanners from ctx->scanners
// and put them into "this_run".
{
std::unique_lock l(_scanners_lock);
for (int i = 0; i < thread_slot_num && !_scanners.empty();) {
VScannerSPtr scanner = _scanners.front();
_scanners.pop_front();
if (scanner->need_to_close()) {
_finished_scanner_runtime.push_back(scanner->get_time_cost_ns());
_finished_scanner_rows_read.push_back(scanner->get_rows_read());
_finished_scanner_wait_worker_time.push_back(
scanner->get_scanner_wait_worker_timer());
static_cast<void>(scanner->close(_state));
} else {
current_run->push_back(scanner);
i++;
}
for (int i = 0; i < thread_slot_num && !_scanners.empty();) {
std::weak_ptr<ScannerDelegate> scanner_ref = _scanners.front();
std::shared_ptr<ScannerDelegate> scanner = scanner_ref.lock();
_scanners.pop_front();
if (scanner == nullptr) {
continue;
}
if (scanner->_scanner->need_to_close()) {
static_cast<void>(scanner->_scanner->close(_state));
} else {
current_run->push_back(scanner_ref);
i++;
}
}
}
template void ScannerContext::clear_and_join(pipeline::ScanLocalStateBase* parent,
RuntimeState* state);
template void ScannerContext::clear_and_join(VScanNode* parent, RuntimeState* state);
} // namespace doris::vectorized

View File

@ -53,6 +53,7 @@ class TaskGroup;
namespace vectorized {
class VScanner;
class ScannerDelegate;
class VScanNode;
class ScannerScheduler;
class SimplifiedScanScheduler;
@ -72,7 +73,7 @@ class ScannerContext : public std::enable_shared_from_this<ScannerContext>,
public:
ScannerContext(RuntimeState* state, VScanNode* parent, const TupleDescriptor* output_tuple_desc,
const RowDescriptor* output_row_descriptor,
const std::list<VScannerSPtr>& scanners, int64_t limit_,
const std::list<std::shared_ptr<ScannerDelegate>>& scanners, int64_t limit_,
int64_t max_bytes_in_blocks_queue, const int num_parallel_instances = 1,
pipeline::ScanLocalStateBase* local_state = nullptr);
@ -94,9 +95,9 @@ public:
// When a scanner complete a scan, this method will be called
// to return the scanner to the list for next scheduling.
void push_back_scanner_and_reschedule(VScannerSPtr scanner);
void push_back_scanner_and_reschedule(std::shared_ptr<ScannerDelegate> scanner);
bool set_status_on_error(const Status& status, bool need_lock = true);
void set_status_on_error(const Status& status, bool need_lock = true);
Status status() {
if (_process_status.is<ErrorCode::END_OF_FILE>()) {
@ -105,42 +106,27 @@ public:
return _process_status;
}
// Called by ScanNode.
// Used to notify the scheduler that this ScannerContext can stop working.
void set_should_stop();
// Return true if this ScannerContext need no more process
virtual bool done() { return _is_finished || _should_stop; }
bool done() const { return _is_finished || _should_stop; }
bool is_finished() { return _is_finished.load(); }
bool should_stop() { return _should_stop.load(); }
bool status_error() { return _status_error.load(); }
void inc_num_running_scanners(int32_t scanner_inc);
void set_ready_to_finish();
int get_num_running_scanners() const { return _num_running_scanners; }
int get_num_unfinished_scanners() const { return _num_unfinished_scanners; }
void dec_num_scheduling_ctx();
int get_num_scheduling_ctx() const { return _num_scheduling_ctx; }
void get_next_batch_of_scanners(std::list<VScannerSPtr>* current_run);
template <typename Parent>
void clear_and_join(Parent* parent, RuntimeState* state);
bool no_schedule();
void get_next_batch_of_scanners(std::list<std::weak_ptr<ScannerDelegate>>* current_run);
virtual std::string debug_string();
RuntimeState* state() { return _state; }
void incr_num_ctx_scheduling(int64_t num) { _scanner_ctx_sched_counter->update(num); }
int64_t num_ctx_scheduled() { return _scanner_ctx_sched_counter->value(); }
void incr_ctx_scheduling_time(int64_t num) { _scanner_ctx_sched_time->update(num); }
void incr_num_scanner_scheduling(int64_t num) { _scanner_sched_counter->update(num); }
std::string parent_name();
@ -172,6 +158,7 @@ public:
SimplifiedScanScheduler* get_simple_scan_scheduler() { return _simple_scan_scheduler; }
virtual void reschedule_scanner_ctx();
void stop_scanners(RuntimeState* state);
// the unique id of this context
std::string ctx_id;
@ -181,18 +168,13 @@ public:
bool _should_reset_thread_name = true;
private:
template <typename Parent>
Status _close_and_clear_scanners(Parent* parent, RuntimeState* state);
protected:
ScannerContext(RuntimeState* state_, const TupleDescriptor* output_tuple_desc,
const RowDescriptor* output_row_descriptor,
const std::list<VScannerSPtr>& scanners_, int64_t limit_,
const std::list<std::shared_ptr<ScannerDelegate>>& scanners_, int64_t limit_,
int64_t max_bytes_in_blocks_queue_, const int num_parallel_instances,
pipeline::ScanLocalStateBase* local_state,
std::shared_ptr<pipeline::ScanDependency> dependency,
std::shared_ptr<pipeline::Dependency> finish_dependency);
std::shared_ptr<pipeline::ScanDependency> dependency);
virtual void _dispose_coloate_blocks_not_in_queue() {}
void _set_scanner_done();
@ -232,7 +214,6 @@ protected:
// Always be set by ScannerScheduler.
// True means all scanners are finished to scan.
Status _process_status;
std::atomic_bool _status_error = false;
std::atomic_bool _should_stop = false;
std::atomic_bool _is_finished = false;
@ -276,9 +257,11 @@ protected:
// and then if the scanner is not finished, will be pushed back to this list.
// Not need to protect by lock, because only one scheduler thread will access to it.
std::mutex _scanners_lock;
std::list<VScannerSPtr> _scanners;
// Scanner's ownership belong to vscannode or scanoperator, scanner context does not own it.
// ScannerContext has to check if scanner is deconstructed before use it.
std::list<std::weak_ptr<ScannerDelegate>> _scanners;
// weak pointer for _scanners, used in stop function
std::vector<VScannerWPtr> _scanners_ref;
std::vector<std::weak_ptr<ScannerDelegate>> _all_scanners;
std::vector<int64_t> _finished_scanner_runtime;
std::vector<int64_t> _finished_scanner_rows_read;
std::vector<int64_t> _finished_scanner_wait_worker_time;
@ -295,7 +278,6 @@ protected:
RuntimeProfile::Counter* _scanner_wait_batch_timer = nullptr;
std::shared_ptr<pipeline::ScanDependency> _dependency = nullptr;
std::shared_ptr<pipeline::Dependency> _finish_dependency = nullptr;
};
} // namespace vectorized
} // namespace doris

View File

@ -143,6 +143,8 @@ Status ScannerScheduler::submit(std::shared_ptr<ScannerContext> ctx) {
if (ctx->done()) {
return Status::EndOfFile("ScannerContext is done");
}
//LOG(WARNING) << "yyyy " << Status::InternalError("Too many scheduled");
//LOG(WARNING) << "yyyy " << ctx->debug_string();
ctx->queue_idx = (_queue_idx++ % QUEUE_NUM);
if (!_pending_queues[ctx->queue_idx]->blocking_put(ctx)) {
return Status::InternalError("failed to submit scanner context to scheduler");
@ -174,28 +176,23 @@ void ScannerScheduler::_schedule_thread(int queue_id) {
void ScannerScheduler::_schedule_scanners(std::shared_ptr<ScannerContext> ctx) {
auto task_lock = ctx->task_exec_ctx();
if (task_lock == nullptr) {
// LOG(WARNING) << "could not lock task execution context, query " << print_id(_query_id)
// << " maybe finished";
LOG(WARNING) << "could not lock task execution context, query " << ctx->debug_string()
<< " maybe finished";
return;
}
//LOG(INFO) << "yyyy scheduled, query " << ctx->debug_string() << " maybe finished";
MonotonicStopWatch watch;
watch.reset();
watch.start();
ctx->incr_num_ctx_scheduling(1);
size_t size = 0;
Defer defer {[&]() {
ctx->incr_num_scanner_scheduling(size);
ctx->dec_num_scheduling_ctx();
}};
if (ctx->done()) {
return;
}
std::list<VScannerSPtr> this_run;
std::list<std::weak_ptr<ScannerDelegate>> this_run;
ctx->get_next_batch_of_scanners(&this_run);
size = this_run.size();
if (!size) {
if (this_run.empty()) {
// There will be 2 cases when this_run is empty:
// 1. The blocks queue reaches limit.
// The consumer will continue scheduling the ctx.
@ -214,9 +211,14 @@ void ScannerScheduler::_schedule_scanners(std::shared_ptr<ScannerContext> ctx) {
if (ctx->thread_token != nullptr) {
// TODO llj tg how to treat this?
while (iter != this_run.end()) {
(*iter)->start_wait_worker_timer();
auto s = ctx->thread_token->submit_func(
[this, scanner = *iter, ctx] { this->_scanner_scan(this, ctx, scanner); });
std::shared_ptr<ScannerDelegate> scanner_delegate = (*iter).lock();
if (scanner_delegate == nullptr) {
continue;
}
scanner_delegate->_scanner->start_wait_worker_timer();
auto s = ctx->thread_token->submit_func([this, scanner_ref = *iter, ctx]() {
this->_scanner_scan(this, ctx, scanner_ref);
});
if (s.ok()) {
this_run.erase(iter++);
} else {
@ -226,28 +228,32 @@ void ScannerScheduler::_schedule_scanners(std::shared_ptr<ScannerContext> ctx) {
}
} else {
while (iter != this_run.end()) {
(*iter)->start_wait_worker_timer();
TabletStorageType type = (*iter)->get_storage_type();
std::shared_ptr<ScannerDelegate> scanner_delegate = (*iter).lock();
if (scanner_delegate == nullptr) {
continue;
}
scanner_delegate->_scanner->start_wait_worker_timer();
TabletStorageType type = scanner_delegate->_scanner->get_storage_type();
bool ret = false;
if (type == TabletStorageType::STORAGE_TYPE_LOCAL) {
if (auto* scan_sche = ctx->get_simple_scan_scheduler()) {
auto work_func = [this, scanner = *iter, ctx] {
this->_scanner_scan(this, ctx, scanner);
auto work_func = [this, scanner_ref = *iter, ctx]() {
this->_scanner_scan(this, ctx, scanner_ref);
};
SimplifiedScanTask simple_scan_task = {work_func, ctx};
ret = scan_sche->get_scan_queue()->try_put(simple_scan_task);
} else {
PriorityThreadPool::Task task;
task.work_function = [this, scanner = *iter, ctx] {
this->_scanner_scan(this, ctx, scanner);
task.work_function = [this, scanner_ref = *iter, ctx]() {
this->_scanner_scan(this, ctx, scanner_ref);
};
task.priority = nice;
ret = _local_scan_thread_pool->offer(task);
}
} else {
PriorityThreadPool::Task task;
task.work_function = [this, scanner = *iter, ctx] {
this->_scanner_scan(this, ctx, scanner);
task.work_function = [this, scanner_ref = *iter, ctx]() {
this->_scanner_scan(this, ctx, scanner_ref);
};
task.priority = nice;
ret = _remote_scan_thread_pool->offer(task);
@ -265,13 +271,22 @@ void ScannerScheduler::_schedule_scanners(std::shared_ptr<ScannerContext> ctx) {
}
void ScannerScheduler::_scanner_scan(ScannerScheduler* scheduler,
std::shared_ptr<ScannerContext> ctx, VScannerSPtr scanner) {
std::shared_ptr<ScannerContext> ctx,
std::weak_ptr<ScannerDelegate> scanner_ref) {
auto task_lock = ctx->task_exec_ctx();
if (task_lock == nullptr) {
// LOG(WARNING) << "could not lock task execution context, query " << print_id(_query_id)
// << " maybe finished";
return;
}
//LOG_EVERY_N(INFO, 100) << "start running scanner from ctx " << ctx->debug_string();
// will release scanner if it is the last one, task lock is hold here, to ensure
// that scanner could call scannode's method during deconstructor
std::shared_ptr<ScannerDelegate> scanner_delegate = scanner_ref.lock();
auto& scanner = scanner_delegate->_scanner;
if (scanner_delegate == nullptr) {
return;
}
SCOPED_ATTACH_TASK(scanner->runtime_state());
// for cpu hard limit, thread name should not be reset
if (ctx->_should_reset_thread_name) {
@ -404,7 +419,7 @@ void ScannerScheduler::_scanner_scan(ScannerScheduler* scheduler,
if (eos || should_stop) {
scanner->mark_to_need_to_close();
}
ctx->push_back_scanner_and_reschedule(scanner);
ctx->push_back_scanner_and_reschedule(scanner_delegate);
}
void ScannerScheduler::_register_metrics() {

View File

@ -36,7 +36,7 @@ class BlockingQueue;
} // namespace doris
namespace doris::vectorized {
class ScannerDelegate;
class ScannerContext;
// Responsible for the scheduling and execution of all Scanners of a BE node.
@ -79,7 +79,7 @@ private:
void _schedule_scanners(std::shared_ptr<ScannerContext> ctx);
// execution thread function
void _scanner_scan(ScannerScheduler* scheduler, std::shared_ptr<ScannerContext> ctx,
VScannerSPtr scanner);
std::weak_ptr<ScannerDelegate> scanner);
void _register_metrics();

View File

@ -197,9 +197,13 @@ Status VScanNode::alloc_resource(RuntimeState* state) {
if (_scanner_ctx) {
DCHECK(!_eos && _num_scanners->value() > 0);
RETURN_IF_ERROR(_scanner_ctx->init());
//LOG(INFO) << "yyyy instance " << print_id(state->fragment_instance_id())
// << " submit scanner ctx " << _scanner_ctx->debug_string();
RETURN_IF_ERROR(_state->exec_env()->scanner_scheduler()->submit(_scanner_ctx));
}
if (_shared_scan_opt) {
LOG(INFO) << "instance shared scan enabled"
<< print_id(state->fragment_instance_id());
_shared_scanner_controller->set_scanner_context(id(),
_eos ? nullptr : _scanner_ctx);
}
@ -273,7 +277,7 @@ Status VScanNode::get_next(RuntimeState* state, vectorized::Block* block, bool*
reached_limit(block, eos);
if (*eos) {
// reach limit, stop the scanners.
_scanner_ctx->set_should_stop();
_scanner_ctx->stop_scanners(state);
}
return Status::OK();
@ -318,8 +322,8 @@ Status VScanNode::_init_profile() {
return Status::OK();
}
Status VScanNode::_start_scanners(const std::list<VScannerSPtr>& scanners,
const int query_parallel_instance_num) {
void VScanNode::_start_scanners(const std::list<std::shared_ptr<ScannerDelegate>>& scanners,
const int query_parallel_instance_num) {
if (_is_pipeline_scan) {
int max_queue_size = _shared_scan_opt ? std::max(query_parallel_instance_num, 1) : 1;
_scanner_ctx = pipeline::PipScannerContext::create_shared(
@ -330,41 +334,29 @@ Status VScanNode::_start_scanners(const std::list<VScannerSPtr>& scanners,
_output_row_descriptor.get(), scanners,
limit(), _state->scan_queue_mem_limit());
}
return Status::OK();
}
Status VScanNode::close(RuntimeState* state) {
if (is_closed()) {
return Status::OK();
}
RETURN_IF_ERROR(ExecNode::close(state));
return Status::OK();
}
void VScanNode::release_resource(RuntimeState* state) {
if (_scanner_ctx) {
if (!state->enable_pipeline_exec()) {
if (!state->enable_pipeline_exec() || _should_create_scanner) {
// stop and wait the scanner scheduler to be done
// _scanner_ctx may not be created for some short circuit case.
_scanner_ctx->set_should_stop();
_scanner_ctx->clear_and_join(this, state);
} else if (_should_create_scanner) {
_scanner_ctx->clear_and_join(this, state);
_scanner_ctx->stop_scanners(state);
}
}
_scanners.clear();
ExecNode::release_resource(state);
}
Status VScanNode::try_close(RuntimeState* state) {
if (_scanner_ctx) {
// mark this scanner ctx as should_stop to make sure scanners will not be scheduled anymore
// TODO: there is a lock in `set_should_stop` may cause some slight impact
_scanner_ctx->set_should_stop();
}
return Status::OK();
}
Status VScanNode::_normalize_conjuncts() {
// The conjuncts is always on output tuple, so use _output_tuple_desc;
std::vector<SlotDescriptor*> slots = _output_tuple_desc->slots();
@ -1330,11 +1322,15 @@ VScanNode::PushDownType VScanNode::_should_push_down_in_predicate(VInPredicate*
Status VScanNode::_prepare_scanners(const int query_parallel_instance_num) {
std::list<VScannerSPtr> scanners;
RETURN_IF_ERROR(_init_scanners(&scanners));
// Init scanner wrapper
for (auto it = scanners.begin(); it != scanners.end(); ++it) {
_scanners.emplace_back(std::make_shared<ScannerDelegate>(*it));
}
if (scanners.empty()) {
_eos = true;
} else {
COUNTER_SET(_num_scanners, static_cast<int64_t>(scanners.size()));
RETURN_IF_ERROR(_start_scanners(scanners, query_parallel_instance_num));
_start_scanners(_scanners, query_parallel_instance_num);
}
return Status::OK();
}

View File

@ -87,6 +87,21 @@ struct FilterPredicates {
std::vector<std::pair<std::string, std::shared_ptr<HybridSetBase>>> in_filters;
};
// We want to close scanner automatically, so using a delegate class
// and call close method in the delegate class's dctor.
class ScannerDelegate {
public:
VScannerSPtr _scanner;
ScannerDelegate(VScannerSPtr& scanner_ptr) : _scanner(scanner_ptr) {}
~ScannerDelegate() {
Status st = _scanner->close(_scanner->runtime_state());
if (!st.ok()) {
LOG(WARNING) << "close scanner failed, st = " << st;
}
}
ScannerDelegate(ScannerDelegate&&) = delete;
};
class VScanNode : public ExecNode, public RuntimeFilterConsumer {
public:
VScanNode(ObjectPool* pool, const TPlanNode& tnode, const DescriptorTbl& descs)
@ -156,8 +171,6 @@ public:
Status alloc_resource(RuntimeState* state) override;
void release_resource(RuntimeState* state) override;
Status try_close(RuntimeState* state);
bool should_run_serial() const {
return _should_run_serial || _state->enable_scan_node_run_serial();
}
@ -276,8 +289,11 @@ protected:
int _max_scan_key_num;
int _max_pushdown_conditions_per_column;
// Each scan node will generates a ScannerContext to manage all Scanners.
// See comments of ScannerContext for more details
// ScanNode owns the ownership of scanner, scanner context only has its weakptr
std::list<std::shared_ptr<ScannerDelegate>> _scanners;
// Each scan node will generates a ScannerContext to do schedule work
// ScannerContext will be added to scanner scheduler
std::shared_ptr<ScannerContext> _scanner_ctx = nullptr;
// indicate this scan node has no more data to return
@ -451,8 +467,8 @@ private:
const std::string& fn_name, int slot_ref_child = -1);
// Submit the scanner to the thread pool and start execution
Status _start_scanners(const std::list<VScannerSPtr>& scanners,
const int query_parallel_instance_num);
void _start_scanners(const std::list<std::shared_ptr<ScannerDelegate>>& scanners,
const int query_parallel_instance_num);
};
} // namespace doris::vectorized

View File

@ -148,16 +148,6 @@ public:
void set_status_on_failure(const Status& st) { _status = st; }
// return false if _is_counted_down is already true,
// otherwise, set _is_counted_down to true and return true.
bool set_counted_down() {
if (_is_counted_down) {
return false;
}
_is_counted_down = true;
return true;
}
protected:
void _discard_conjuncts() {
for (auto& conjunct : _conjuncts) {
@ -221,8 +211,6 @@ protected:
int64_t _scan_cpu_timer = 0;
bool _is_load = false;
// set to true after decrease the "_num_unfinished_scanners" in scanner context
bool _is_counted_down = false;
bool _is_init = true;
@ -233,6 +221,5 @@ protected:
};
using VScannerSPtr = std::shared_ptr<VScanner>;
using VScannerWPtr = std::weak_ptr<VScanner>;
} // namespace doris::vectorized

View File

@ -77,3 +77,5 @@ enable_feature_binlog=true
max_sys_mem_available_low_water_mark_bytes=69206016
user_files_secure_path=/
enable_debug_points=true
# debug scanner context dead loop
enable_debug_log_timeout_secs=300