// Licensed to the Apache Software Foundation (ASF) under one // or more contributor license agreements. See the NOTICE file // distributed with this work for additional information // regarding copyright ownership. The ASF licenses this file // to you under the Apache License, Version 2.0 (the // "License"); you may not use this file except in compliance // with the License. You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, // software distributed under the License is distributed on an // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY // KIND, either express or implied. See the License for the // specific language governing permissions and limitations // under the License. #pragma once #include #include #include "common/status.h" #include "util/threadpool.h" #include "vec/exec/scan/vscanner.h" namespace doris { class ExecEnv; namespace vectorized { class VScanner; } // namespace vectorized template class BlockingQueue; } // namespace doris namespace doris::vectorized { class ScannerDelegate; class ScannerContext; // Responsible for the scheduling and execution of all Scanners of a BE node. // ScannerScheduler has two types of thread pools: // 1. Scheduling thread pool // Responsible for Scanner scheduling. // A set of Scanners for a query will be encapsulated into a ScannerContext // and submitted to the ScannerScheduler's scheduling queue. // There are multiple scheduling queues in ScannerScheduler, and each scheduling queue // is handled by a scheduling thread. // The scheduling thread is scheduled in granularity of ScannerContext, // that is, a group of Scanners in a ScannerContext are scheduled at a time. // //2. Execution thread pool // The scheduling thread will submit the Scanners selected from the ScannerContext // to the execution thread pool to do the actual scan task. // Each Scanner will act as a producer, read a group of blocks and put them into // the corresponding block queue. // The corresponding ScanNode will act as a consumer to consume blocks from the block queue. class ScannerScheduler { public: ScannerScheduler(); ~ScannerScheduler(); [[nodiscard]] Status init(ExecEnv* env); [[nodiscard]] Status submit(std::shared_ptr ctx); void stop(); std::unique_ptr new_limited_scan_pool_token(ThreadPool::ExecutionMode mode, int max_concurrency); int remote_thread_pool_max_size() const { return _remote_thread_pool_max_size; } private: // scheduling thread function void _schedule_thread(int queue_id); // schedule scanners in a certain ScannerContext void _schedule_scanners(std::shared_ptr ctx); // execution thread function void _scanner_scan(ScannerScheduler* scheduler, std::shared_ptr ctx, std::weak_ptr scanner); void _register_metrics(); static void _deregister_metrics(); // Scheduling queue number. // TODO: make it configurable. static const int QUEUE_NUM = 4; // The ScannerContext will be submitted to the pending queue roundrobin. // _queue_idx pointer to the current queue. // Use std::atomic_uint to prevent numerical overflow from memory out of bound. // The scheduler thread will take ctx from pending queue, schedule it, // and put it to the _scheduling_map. // If any scanner finish, it will take ctx from and put it to pending queue again. std::atomic_uint _queue_idx = {0}; BlockingQueue>** _pending_queues = nullptr; // scheduling thread pool std::unique_ptr _scheduler_pool; // execution thread pool // _local_scan_thread_pool is for local scan task(typically, olap scanner) // _remote_scan_thread_pool is for remote scan task(cold data on s3, hdfs, etc.) // _limited_scan_thread_pool is a special pool for queries with resource limit std::unique_ptr _local_scan_thread_pool; std::unique_ptr _remote_scan_thread_pool; std::unique_ptr _limited_scan_thread_pool; // true is the scheduler is closed. std::atomic_bool _is_closed = {false}; bool _is_init = false; int _remote_thread_pool_max_size; }; struct SimplifiedScanTask { SimplifiedScanTask() = default; SimplifiedScanTask(std::function scan_func, std::shared_ptr scanner_context) { this->scan_func = scan_func; this->scanner_context = scanner_context; } std::function scan_func; std::shared_ptr scanner_context = nullptr; }; // used for cpu hard limit class SimplifiedScanScheduler { public: SimplifiedScanScheduler(std::string wg_name, CgroupCpuCtl* cgroup_cpu_ctl) { _scan_task_queue = std::make_unique>( config::doris_scanner_thread_pool_queue_size); _is_stop.store(false); _cgroup_cpu_ctl = cgroup_cpu_ctl; _wg_name = wg_name; } ~SimplifiedScanScheduler() { stop(); LOG(INFO) << "Scanner sche " << _wg_name << " shutdown"; } void stop() { _is_stop.store(true); _scan_task_queue->shutdown(); _scan_thread_pool->shutdown(); _scan_thread_pool->wait(); } Status start() { RETURN_IF_ERROR(ThreadPoolBuilder("Scan_" + _wg_name) .set_min_threads(config::doris_scanner_thread_pool_thread_num) .set_max_threads(config::doris_scanner_thread_pool_thread_num) .set_cgroup_cpu_ctl(_cgroup_cpu_ctl) .build(&_scan_thread_pool)); for (int i = 0; i < config::doris_scanner_thread_pool_thread_num; i++) { RETURN_IF_ERROR(_scan_thread_pool->submit_func([this] { this->_work(); })); } return Status::OK(); } BlockingQueue* get_scan_queue() { return _scan_task_queue.get(); } private: void _work() { while (!_is_stop.load()) { SimplifiedScanTask scan_task; if (_scan_task_queue->blocking_get(&scan_task)) { scan_task.scan_func(); }; } } std::unique_ptr _scan_thread_pool; std::unique_ptr> _scan_task_queue; std::atomic _is_stop; CgroupCpuCtl* _cgroup_cpu_ctl = nullptr; std::string _wg_name; }; } // namespace doris::vectorized