Files
doris/be/src/vec/exec/scan/scanner_scheduler.h

184 lines
6.7 KiB
C++

// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
#pragma once
#include <atomic>
#include <memory>
#include "common/status.h"
#include "util/threadpool.h"
#include "vec/exec/scan/vscanner.h"
namespace doris {
class ExecEnv;
namespace vectorized {
class VScanner;
} // namespace vectorized
template <typename T>
class BlockingQueue;
} // namespace doris
namespace doris::vectorized {
class ScannerDelegate;
class ScannerContext;
// Responsible for the scheduling and execution of all Scanners of a BE node.
// ScannerScheduler has two types of thread pools:
// 1. Scheduling thread pool
// Responsible for Scanner scheduling.
// A set of Scanners for a query will be encapsulated into a ScannerContext
// and submitted to the ScannerScheduler's scheduling queue.
// There are multiple scheduling queues in ScannerScheduler, and each scheduling queue
// is handled by a scheduling thread.
// The scheduling thread is scheduled in granularity of ScannerContext,
// that is, a group of Scanners in a ScannerContext are scheduled at a time.
//
//2. Execution thread pool
// The scheduling thread will submit the Scanners selected from the ScannerContext
// to the execution thread pool to do the actual scan task.
// Each Scanner will act as a producer, read a group of blocks and put them into
// the corresponding block queue.
// The corresponding ScanNode will act as a consumer to consume blocks from the block queue.
class ScannerScheduler {
public:
ScannerScheduler();
~ScannerScheduler();
[[nodiscard]] Status init(ExecEnv* env);
[[nodiscard]] Status submit(std::shared_ptr<ScannerContext> ctx);
void stop();
std::unique_ptr<ThreadPoolToken> new_limited_scan_pool_token(ThreadPool::ExecutionMode mode,
int max_concurrency);
int remote_thread_pool_max_size() const { return _remote_thread_pool_max_size; }
private:
// scheduling thread function
void _schedule_thread(int queue_id);
// schedule scanners in a certain ScannerContext
void _schedule_scanners(std::shared_ptr<ScannerContext> ctx);
// execution thread function
void _scanner_scan(ScannerScheduler* scheduler, std::shared_ptr<ScannerContext> ctx,
std::weak_ptr<ScannerDelegate> scanner);
void _register_metrics();
static void _deregister_metrics();
// Scheduling queue number.
// TODO: make it configurable.
static const int QUEUE_NUM = 4;
// The ScannerContext will be submitted to the pending queue roundrobin.
// _queue_idx pointer to the current queue.
// Use std::atomic_uint to prevent numerical overflow from memory out of bound.
// The scheduler thread will take ctx from pending queue, schedule it,
// and put it to the _scheduling_map.
// If any scanner finish, it will take ctx from and put it to pending queue again.
std::atomic_uint _queue_idx = {0};
BlockingQueue<std::shared_ptr<ScannerContext>>** _pending_queues = nullptr;
// scheduling thread pool
std::unique_ptr<ThreadPool> _scheduler_pool;
// execution thread pool
// _local_scan_thread_pool is for local scan task(typically, olap scanner)
// _remote_scan_thread_pool is for remote scan task(cold data on s3, hdfs, etc.)
// _limited_scan_thread_pool is a special pool for queries with resource limit
std::unique_ptr<PriorityThreadPool> _local_scan_thread_pool;
std::unique_ptr<PriorityThreadPool> _remote_scan_thread_pool;
std::unique_ptr<ThreadPool> _limited_scan_thread_pool;
// true is the scheduler is closed.
std::atomic_bool _is_closed = {false};
bool _is_init = false;
int _remote_thread_pool_max_size;
};
struct SimplifiedScanTask {
SimplifiedScanTask() = default;
SimplifiedScanTask(std::function<void()> scan_func,
std::shared_ptr<vectorized::ScannerContext> scanner_context) {
this->scan_func = scan_func;
this->scanner_context = scanner_context;
}
std::function<void()> scan_func;
std::shared_ptr<vectorized::ScannerContext> scanner_context = nullptr;
};
// used for cpu hard limit
class SimplifiedScanScheduler {
public:
SimplifiedScanScheduler(std::string wg_name, CgroupCpuCtl* cgroup_cpu_ctl) {
_scan_task_queue = std::make_unique<BlockingQueue<SimplifiedScanTask>>(
config::doris_scanner_thread_pool_queue_size);
_is_stop.store(false);
_cgroup_cpu_ctl = cgroup_cpu_ctl;
_wg_name = wg_name;
}
~SimplifiedScanScheduler() {
stop();
LOG(INFO) << "Scanner sche " << _wg_name << " shutdown";
}
void stop() {
_is_stop.store(true);
_scan_task_queue->shutdown();
_scan_thread_pool->shutdown();
_scan_thread_pool->wait();
}
Status start() {
RETURN_IF_ERROR(ThreadPoolBuilder("Scan_" + _wg_name)
.set_min_threads(config::doris_scanner_thread_pool_thread_num)
.set_max_threads(config::doris_scanner_thread_pool_thread_num)
.set_cgroup_cpu_ctl(_cgroup_cpu_ctl)
.build(&_scan_thread_pool));
for (int i = 0; i < config::doris_scanner_thread_pool_thread_num; i++) {
RETURN_IF_ERROR(_scan_thread_pool->submit_func([this] { this->_work(); }));
}
return Status::OK();
}
BlockingQueue<SimplifiedScanTask>* get_scan_queue() { return _scan_task_queue.get(); }
private:
void _work() {
while (!_is_stop.load()) {
SimplifiedScanTask scan_task;
if (_scan_task_queue->blocking_get(&scan_task)) {
scan_task.scan_func();
};
}
}
std::unique_ptr<ThreadPool> _scan_thread_pool;
std::unique_ptr<BlockingQueue<SimplifiedScanTask>> _scan_task_queue;
std::atomic<bool> _is_stop;
CgroupCpuCtl* _cgroup_cpu_ctl = nullptr;
std::string _wg_name;
};
} // namespace doris::vectorized