[Bug] Fix scanner threads heap-use-after-free (#5111)
Scanner threads may be running and using the member vars of OlapScanNode, when the OlapScanNode has already destroyed. We can use `_running_thread` to be the last accessed member variable. And `transfer_thread` need to wait for `_running_thread==0`. After `transfer_thread` joined, `OlapScanNode::close()` can continue.
This commit is contained in:
@ -59,7 +59,6 @@ OlapScanNode::OlapScanNode(ObjectPool* pool, const TPlanNode& tnode, const Descr
|
||||
_status(Status::OK()),
|
||||
_resource_info(nullptr),
|
||||
_buffered_bytes(0),
|
||||
_running_thread(0),
|
||||
_eval_conjuncts_fn(nullptr) {}
|
||||
|
||||
OlapScanNode::~OlapScanNode() {}
|
||||
@ -1251,12 +1250,22 @@ void OlapScanNode::transfer_thread(RuntimeState* state) {
|
||||
|
||||
state->resource_pool()->release_thread_token(true);
|
||||
VLOG(1) << "TransferThread finish.";
|
||||
std::unique_lock<std::mutex> l(_row_batches_lock);
|
||||
_transfer_done = true;
|
||||
_row_batch_added_cv.notify_all();
|
||||
{
|
||||
std::unique_lock<std::mutex> l(_row_batches_lock);
|
||||
_transfer_done = true;
|
||||
_row_batch_added_cv.notify_all();
|
||||
}
|
||||
|
||||
std::unique_lock<std::mutex> l(_scan_batches_lock);
|
||||
_scan_thread_exit_cv.wait(l, [this] { return _running_thread == 0; });
|
||||
VLOG(1) << "Scanner threads have been exited. TransferThread exit.";
|
||||
}
|
||||
|
||||
void OlapScanNode::scanner_thread(OlapScanner* scanner) {
|
||||
// Do not use ScopedTimer. There is no guarantee that, the counter
|
||||
// (_scan_cpu_timer, the class member) is not destroyed after `_running_thread==0`.
|
||||
ThreadCpuStopWatch cpu_watch;
|
||||
cpu_watch.start();
|
||||
Status status = Status::OK();
|
||||
bool eos = false;
|
||||
RuntimeState* state = scanner->runtime_state();
|
||||
@ -1345,7 +1354,6 @@ void OlapScanNode::scanner_thread(OlapScanner* scanner) {
|
||||
if (!eos) {
|
||||
_olap_scanners.push_front(scanner);
|
||||
}
|
||||
_running_thread--;
|
||||
}
|
||||
if (eos) {
|
||||
// close out of batches lock. we do this before _progress update
|
||||
@ -1359,7 +1367,15 @@ void OlapScanNode::scanner_thread(OlapScanner* scanner) {
|
||||
_scanner_done = true;
|
||||
}
|
||||
}
|
||||
|
||||
_scan_cpu_timer->update(cpu_watch.elapsed_time());
|
||||
_scan_batch_added_cv.notify_one();
|
||||
|
||||
// The transfer thead will wait for `_running_thread==0`, to make sure all scanner threads won't access class members.
|
||||
// Do not access class members after this code.
|
||||
std::unique_lock<std::mutex> l(_scan_batches_lock);
|
||||
_running_thread--;
|
||||
_scan_thread_exit_cv.notify_one();
|
||||
}
|
||||
|
||||
Status OlapScanNode::add_one_batch(RowBatchInterface* row_batch) {
|
||||
|
||||
@ -232,7 +232,8 @@ private:
|
||||
|
||||
std::mutex _scan_batches_lock;
|
||||
std::condition_variable _scan_batch_added_cv;
|
||||
int32_t _scanner_task_finish_count;
|
||||
int64_t _running_thread = 0;
|
||||
std::condition_variable _scan_thread_exit_cv;
|
||||
|
||||
std::list<RowBatchInterface*> _scan_row_batches;
|
||||
|
||||
@ -260,7 +261,6 @@ private:
|
||||
TResourceInfo* _resource_info;
|
||||
|
||||
int64_t _buffered_bytes;
|
||||
int64_t _running_thread;
|
||||
EvalConjunctsFn _eval_conjuncts_fn;
|
||||
|
||||
bool _need_agg_finalize = true;
|
||||
|
||||
@ -253,7 +253,6 @@ Status OlapScanner::get_batch(RuntimeState* state, RowBatch* batch, bool* eof) {
|
||||
int64_t raw_rows_threshold = raw_rows_read() + config::doris_scanner_row_num;
|
||||
{
|
||||
SCOPED_TIMER(_parent->_scan_timer);
|
||||
SCOPED_CPU_TIMER(_parent->_scan_cpu_timer);
|
||||
while (true) {
|
||||
// Batch is full, break
|
||||
if (batch->is_full()) {
|
||||
|
||||
Reference in New Issue
Block a user