[fix](scanner) cached blocks may be empty when VFileScanner return NOT_FOUND (#32745)

Cached blocks may be empty when VFileScanner return NOT_FOUND. This feature is introduced by https://github.com/apache/doris/pull/15226. Move this function inner `VFileScanner`.
This commit is contained in:
Ashin Gau
2024-03-27 09:58:37 +08:00
committed by yiguolei
parent 0a447273f0
commit 352617a34d
3 changed files with 18 additions and 30 deletions

View File

@ -254,18 +254,19 @@ Status ScannerContext::get_block_from_queue(RuntimeState* state, vectorized::Blo
_set_scanner_done();
return scan_task->get_status();
}
DCHECK(!scan_task->cached_blocks.empty());
vectorized::BlockUPtr current_block = std::move(scan_task->cached_blocks.front());
scan_task->cached_blocks.pop_front();
size_t block_size = current_block->allocated_bytes();
if (_estimated_block_size > block_size) {
_estimated_block_size = block_size;
if (!scan_task->cached_blocks.empty()) {
vectorized::BlockUPtr current_block = std::move(scan_task->cached_blocks.front());
scan_task->cached_blocks.pop_front();
size_t block_size = current_block->allocated_bytes();
if (_estimated_block_size > block_size) {
_estimated_block_size = block_size;
}
_free_blocks_memory_usage -= block_size;
_free_blocks_memory_usage_mark->set(_free_blocks_memory_usage);
// consume current block
block->swap(*current_block);
return_free_block(std::move(current_block));
}
_free_blocks_memory_usage -= block_size;
_free_blocks_memory_usage_mark->set(_free_blocks_memory_usage);
// consume current block
block->swap(*current_block);
return_free_block(std::move(current_block));
if (scan_task->cached_blocks.empty()) {
_blocks_queue.pop_front();
if (scan_task->is_eos()) { // current scanner is finished, and no more data to read

View File

@ -266,21 +266,9 @@ void ScannerScheduler::_scanner_scan(std::shared_ptr<ScannerContext> ctx,
}
status = scanner->get_block_after_projects(state, free_block.get(), &eos);
first_read = false;
// The VFileScanner for external table may try to open not exist files,
// Because FE file cache for external table may out of date.
// So, NOT_FOUND for VFileScanner is not a fail case.
// Will remove this after file reader refactor.
if (!status.ok() && (scanner->get_name() != doris::vectorized::VFileScanner::NAME ||
(scanner->get_name() == doris::vectorized::VFileScanner::NAME &&
!status.is<ErrorCode::NOT_FOUND>()))) {
if (!status.ok()) {
LOG(WARNING) << "Scan thread read VScanner failed: " << status.to_string();
break;
} else if (status.is<ErrorCode::NOT_FOUND>()) {
// The only case in this "if" branch is external table file delete and fe cache has not been updated yet.
// Set status to OK.
status = Status::OK();
eos = true;
break;
}
raw_bytes_read += free_block->allocated_bytes();
if (!scan_task->cached_blocks.empty() &&

View File

@ -925,15 +925,14 @@ Status VFileScanner::_get_next_reader() {
}
COUNTER_UPDATE(_file_counter, 1);
if (init_status.is<END_OF_FILE>()) {
if (init_status.is<END_OF_FILE>() || init_status.is<ErrorCode::NOT_FOUND>()) {
// The VFileScanner for external table may try to open not exist files,
// Because FE file cache for external table may out of date.
// So, NOT_FOUND for VFileScanner is not a fail case.
// Will remove this after file reader refactor.
COUNTER_UPDATE(_empty_file_counter, 1);
continue;
} else if (!init_status.ok()) {
if (init_status.is<ErrorCode::NOT_FOUND>()) {
COUNTER_UPDATE(_empty_file_counter, 1);
LOG(INFO) << "failed to find file: " << range.path;
return init_status;
}
return Status::InternalError("failed to init reader for file {}, err: {}", range.path,
init_status.to_string());
}