[Fix](pipelinex) Fix MaxScannerThreadNum calculation error in file scan operator when turn on pipelinex. (#33037)

MaxScannerThreadNum in file scan operator when turn on pipelinex is incorrect, it will cost many memory and causing performance degradation. This PR fix it.
This commit is contained in:
Qi Chen
2024-03-30 17:43:51 +08:00
committed by morningman
parent 32d6a4fdd5
commit ecb4372479
3 changed files with 6 additions and 4 deletions

View File

@ -1224,7 +1224,8 @@ Status ScanLocalState<Derived>::_start_scanners(
auto& p = _parent->cast<typename Derived::Parent>();
_scanner_ctx = PipXScannerContext::create_shared(
state(), this, p._output_tuple_desc, p.output_row_descriptor(), scanners, p.limit(),
state()->scan_queue_mem_limit(), _scan_dependency);
state()->scan_queue_mem_limit(), _scan_dependency,
p.ignore_data_distribution() ? 1 : state()->query_parallel_instance_num());
return Status::OK();
}

View File

@ -47,9 +47,11 @@ public:
const RowDescriptor* output_row_descriptor,
const std::list<std::shared_ptr<vectorized::ScannerDelegate>>& scanners,
int64_t limit_, int64_t max_bytes_in_blocks_queue,
std::shared_ptr<pipeline::Dependency> dependency)
std::shared_ptr<pipeline::Dependency> dependency,
const int num_parallel_instances)
: vectorized::ScannerContext(state, output_tuple_desc, output_row_descriptor, scanners,
limit_, max_bytes_in_blocks_queue, 1, local_state) {
limit_, max_bytes_in_blocks_queue, num_parallel_instances,
local_state) {
_dependency = dependency;
}

View File

@ -78,7 +78,6 @@ ScannerContext::ScannerContext(RuntimeState* state, const TupleDescriptor* outpu
: config::doris_scanner_thread_pool_thread_num /
(_local_state ? num_parallel_instances
: state->query_parallel_instance_num());
_max_thread_num *= num_parallel_instances;
_max_thread_num = _max_thread_num == 0 ? 1 : _max_thread_num;
_max_thread_num = std::min(_max_thread_num, (int32_t)scanners.size());
// 1. Calculate max concurrency