Files
doris/be/src/pipeline/exec/multi_cast_data_streamer.cpp

128 lines
4.4 KiB
C++

// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
#include "multi_cast_data_streamer.h"
#include "pipeline/exec/multi_cast_data_stream_source.h"
#include "pipeline/pipeline_x/dependency.h"
#include "runtime/runtime_state.h"
namespace doris::pipeline {
MultiCastBlock::MultiCastBlock(vectorized::Block* block, int used_count, size_t mem_size)
: _used_count(used_count), _mem_size(mem_size) {
_block = vectorized::Block::create_unique(block->get_columns_with_type_and_name());
block->clear();
}
void MultiCastDataStreamer::pull(int sender_idx, doris::vectorized::Block* block, bool* eos) {
std::lock_guard l(_mutex);
auto& pos_to_pull = _sender_pos_to_read[sender_idx];
if (pos_to_pull != _multi_cast_blocks.end()) {
if (pos_to_pull->_used_count == 1) {
DCHECK(pos_to_pull == _multi_cast_blocks.begin());
pos_to_pull->_block->swap(*block);
_cumulative_mem_size -= pos_to_pull->_mem_size;
pos_to_pull++;
_multi_cast_blocks.pop_front();
} else {
pos_to_pull->_used_count--;
pos_to_pull->_block->create_same_struct_block(0)->swap(*block);
(void)vectorized::MutableBlock(block).merge(*pos_to_pull->_block);
pos_to_pull++;
}
}
*eos = _eos and pos_to_pull == _multi_cast_blocks.end();
if (pos_to_pull == _multi_cast_blocks.end()) {
_block_reading(sender_idx);
}
}
void MultiCastDataStreamer::close_sender(int sender_idx) {
std::lock_guard l(_mutex);
auto& pos_to_pull = _sender_pos_to_read[sender_idx];
while (pos_to_pull != _multi_cast_blocks.end()) {
if (pos_to_pull->_used_count == 1) {
DCHECK(pos_to_pull == _multi_cast_blocks.begin());
_cumulative_mem_size -= pos_to_pull->_mem_size;
pos_to_pull++;
_multi_cast_blocks.pop_front();
} else {
pos_to_pull->_used_count--;
pos_to_pull++;
}
}
_closed_sender_count++;
_block_reading(sender_idx);
}
Status MultiCastDataStreamer::push(RuntimeState* state, doris::vectorized::Block* block, bool eos) {
auto rows = block->rows();
COUNTER_UPDATE(_process_rows, rows);
auto block_mem_size = block->allocated_bytes();
std::lock_guard l(_mutex);
int need_process_count = _cast_sender_count - _closed_sender_count;
if (need_process_count == 0) {
return Status::EndOfFile("All data streamer is EOF");
}
// TODO: if the [queue back block rows + block->rows()] < batch_size, better
// do merge block. but need check the need_process_count and used_count whether
// equal
_multi_cast_blocks.emplace_back(block, need_process_count, block_mem_size);
_cumulative_mem_size += block_mem_size;
COUNTER_SET(_peak_mem_usage, std::max(_cumulative_mem_size, _peak_mem_usage->value()));
auto end = _multi_cast_blocks.end();
end--;
for (int i = 0; i < _sender_pos_to_read.size(); ++i) {
if (_sender_pos_to_read[i] == _multi_cast_blocks.end()) {
_sender_pos_to_read[i] = end;
_set_ready_for_read(i);
}
}
_eos = eos;
return Status::OK();
}
void MultiCastDataStreamer::_set_ready_for_read(int sender_idx) {
if (_dependencies.empty()) {
return;
}
auto* dep = _dependencies[sender_idx];
DCHECK(dep);
dep->set_ready();
}
void MultiCastDataStreamer::_set_ready_for_read() {
for (auto* dep : _dependencies) {
DCHECK(dep);
dep->set_ready();
}
}
void MultiCastDataStreamer::_block_reading(int sender_idx) {
if (_dependencies.empty()) {
return;
}
auto* dep = _dependencies[sender_idx];
DCHECK(dep);
dep->block();
}
} // namespace doris::pipeline