// Licensed to the Apache Software Foundation (ASF) under one // or more contributor license agreements. See the NOTICE file // distributed with this work for additional information // regarding copyright ownership. The ASF licenses this file // to you under the Apache License, Version 2.0 (the // "License"); you may not use this file except in compliance // with the License. You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, // software distributed under the License is distributed on an // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY // KIND, either express or implied. See the License for the // specific language governing permissions and limitations // under the License. #include "runtime/row_batch.h" #include #include // for intptr_t #include "runtime/buffered_tuple_stream2.inline.h" #include "runtime/exec_env.h" #include "runtime/runtime_state.h" #include "runtime/string_value.h" #include "runtime/tuple_row.h" #include "gen_cpp/Data_types.h" #include "gen_cpp/data.pb.h" #include "runtime/collection_value.h" //#include "vec/columns/column_vector.h" //#include "vec/core/block.h" using std::vector; namespace doris { const int RowBatch::AT_CAPACITY_MEM_USAGE = 8 * 1024 * 1024; const int RowBatch::FIXED_LEN_BUFFER_LIMIT = AT_CAPACITY_MEM_USAGE / 2; RowBatch::RowBatch(const RowDescriptor& row_desc, int capacity, MemTracker* mem_tracker) : _mem_tracker(mem_tracker), _has_in_flight_row(false), _num_rows(0), _capacity(capacity), _flush(FlushMode::NO_FLUSH_RESOURCES), _needs_deep_copy(false), _num_tuples_per_row(row_desc.tuple_descriptors().size()), _row_desc(row_desc), _auxiliary_mem_usage(0), _need_to_return(false), _tuple_data_pool(new MemPool(_mem_tracker)), _agg_object_pool(new ObjectPool()) { DCHECK(_mem_tracker != NULL); DCHECK_GT(capacity, 0); _tuple_ptrs_size = _capacity * _num_tuples_per_row * sizeof(Tuple*); DCHECK_GT(_tuple_ptrs_size, 0); // TODO: switch to Init() pattern so we can check memory limit and return Status. if (config::enable_partitioned_aggregation) { _mem_tracker->Consume(_tuple_ptrs_size); _tuple_ptrs = reinterpret_cast(malloc(_tuple_ptrs_size)); DCHECK(_tuple_ptrs != NULL); } else { _tuple_ptrs = reinterpret_cast(_tuple_data_pool->allocate(_tuple_ptrs_size)); } } // TODO: we want our input_batch's tuple_data to come from our (not yet implemented) // global runtime memory segment; how do we get thrift to allocate it from there? // maybe change line (in Data_types.cc generated from Data.thrift) // xfer += iprot->readString(this->tuple_data[_i9]); // to allocated string data in special mempool // (change via python script that runs over Data_types.cc) RowBatch::RowBatch(const RowDescriptor& row_desc, const PRowBatch& input_batch, MemTracker* tracker) : _mem_tracker(tracker), _has_in_flight_row(false), _num_rows(input_batch.num_rows()), _capacity(_num_rows), _flush(FlushMode::NO_FLUSH_RESOURCES), _needs_deep_copy(false), _num_tuples_per_row(input_batch.row_tuples_size()), _row_desc(row_desc), _auxiliary_mem_usage(0), _need_to_return(false), _tuple_data_pool(new MemPool(_mem_tracker)), _agg_object_pool(new ObjectPool()) { DCHECK(_mem_tracker != nullptr); _tuple_ptrs_size = _num_rows * _num_tuples_per_row * sizeof(Tuple*); DCHECK_GT(_tuple_ptrs_size, 0); // TODO: switch to Init() pattern so we can check memory limit and return Status. if (config::enable_partitioned_aggregation) { _mem_tracker->Consume(_tuple_ptrs_size); _tuple_ptrs = reinterpret_cast(malloc(_tuple_ptrs_size)); DCHECK(_tuple_ptrs != nullptr); } else { _tuple_ptrs = reinterpret_cast(_tuple_data_pool->allocate(_tuple_ptrs_size)); } uint8_t* tuple_data = nullptr; if (input_batch.is_compressed()) { // Decompress tuple data into data pool const char* compressed_data = input_batch.tuple_data().c_str(); size_t compressed_size = input_batch.tuple_data().size(); size_t uncompressed_size = 0; bool success = snappy::GetUncompressedLength(compressed_data, compressed_size, &uncompressed_size); DCHECK(success) << "snappy::GetUncompressedLength failed"; tuple_data = reinterpret_cast(_tuple_data_pool->allocate(uncompressed_size)); success = snappy::RawUncompress(compressed_data, compressed_size, reinterpret_cast(tuple_data)); DCHECK(success) << "snappy::RawUncompress failed"; } else { // Tuple data uncompressed, copy directly into data pool tuple_data = _tuple_data_pool->allocate(input_batch.tuple_data().size()); memcpy(tuple_data, input_batch.tuple_data().c_str(), input_batch.tuple_data().size()); } // convert input_batch.tuple_offsets into pointers int tuple_idx = 0; for (auto offset : input_batch.tuple_offsets()) { if (offset == -1) { _tuple_ptrs[tuple_idx++] = nullptr; } else { _tuple_ptrs[tuple_idx++] = reinterpret_cast(tuple_data + offset); } } // Check whether we have slots that require offset-to-pointer conversion. if (!_row_desc.has_varlen_slots()) { return; } const std::vector& tuple_descs = _row_desc.tuple_descriptors(); // For every unique tuple, convert string offsets contained in tuple data into // pointers. Tuples were serialized in the order we are deserializing them in, // so the first occurrence of a tuple will always have a higher offset than any tuple // we already converted. for (int i = 0; i < _num_rows; ++i) { TupleRow* row = get_row(i); std::vector::const_iterator desc = tuple_descs.begin(); for (int j = 0; desc != tuple_descs.end(); ++desc, ++j) { if ((*desc)->string_slots().empty() && (*desc)->collection_slots().empty()) { continue; } Tuple* tuple = row->get_tuple(j); if (tuple == nullptr) { continue; } for (auto slot : (*desc)->string_slots()) { DCHECK(slot->type().is_string_type()); StringValue* string_val = tuple->get_string_slot(slot->tuple_offset()); int offset = reinterpret_cast(string_val->ptr); string_val->ptr = reinterpret_cast(tuple_data + offset); // Why we do this mask? Field len of StringValue is changed from int to size_t in // Doris 0.11. When upgrading, some bits of len sent from 0.10 is random value, // this works fine in version 0.10, however in 0.11 this will lead to an invalid // length. So we make the high bits zero here. string_val->len &= 0x7FFFFFFFL; } // copy collection slots vector::const_iterator slot_collection = (*desc)->collection_slots().begin(); for (; slot_collection != (*desc)->collection_slots().end(); ++slot_collection) { DCHECK((*slot_collection)->type().is_collection_type()); CollectionValue* array_val = tuple->get_collection_slot((*slot_collection)->tuple_offset()); // assgin data and null_sign pointer position in tuple_data int data_offset = reinterpret_cast(array_val->data()); array_val->set_data(reinterpret_cast(tuple_data + data_offset)); int null_offset = reinterpret_cast(array_val->null_signs()); array_val->set_null_signs(reinterpret_cast(tuple_data + null_offset)); const TypeDescriptor& item_type = (*slot_collection)->type().children.at(0); if (!item_type.is_string_type()) { continue; } // copy every string item for (int i = 0; i < array_val->length(); ++i) { if (array_val->is_null_at(i)) { continue; } StringValue* dst_item_v = reinterpret_cast( (uint8_t*)array_val->data() + i * item_type.get_slot_size()); if (dst_item_v->len != 0) { int offset = reinterpret_cast(dst_item_v->ptr); dst_item_v->ptr = reinterpret_cast(tuple_data + offset); } } } } } } // TODO: we want our input_batch's tuple_data to come from our (not yet implemented) // global runtime memory segment; how do we get thrift to allocate it from there? // maybe change line (in Data_types.cc generated from Data.thrift) // xfer += iprot->readString(this->tuple_data[_i9]); // to allocated string data in special mempool // (change via python script that runs over Data_types.cc) RowBatch::RowBatch(const RowDescriptor& row_desc, const TRowBatch& input_batch, MemTracker* tracker) : _mem_tracker(tracker), _has_in_flight_row(false), _num_rows(input_batch.num_rows), _capacity(_num_rows), _flush(FlushMode::NO_FLUSH_RESOURCES), _needs_deep_copy(false), _num_tuples_per_row(input_batch.row_tuples.size()), _row_desc(row_desc), _auxiliary_mem_usage(0), _need_to_return(false), _tuple_data_pool(new MemPool(_mem_tracker)), _agg_object_pool(new ObjectPool()) { DCHECK(_mem_tracker != NULL); _tuple_ptrs_size = _num_rows * input_batch.row_tuples.size() * sizeof(Tuple*); DCHECK_GT(_tuple_ptrs_size, 0); // TODO: switch to Init() pattern so we can check memory limit and return Status. if (config::enable_partitioned_aggregation) { _mem_tracker->Consume(_tuple_ptrs_size); _tuple_ptrs = reinterpret_cast(malloc(_tuple_ptrs_size)); DCHECK(_tuple_ptrs != NULL); } else { _tuple_ptrs = reinterpret_cast(_tuple_data_pool->allocate(_tuple_ptrs_size)); } uint8_t* tuple_data = NULL; if (input_batch.is_compressed) { // Decompress tuple data into data pool const char* compressed_data = input_batch.tuple_data.c_str(); size_t compressed_size = input_batch.tuple_data.size(); size_t uncompressed_size = 0; bool success = snappy::GetUncompressedLength(compressed_data, compressed_size, &uncompressed_size); DCHECK(success) << "snappy::GetUncompressedLength failed"; tuple_data = reinterpret_cast(_tuple_data_pool->allocate(uncompressed_size)); success = snappy::RawUncompress(compressed_data, compressed_size, reinterpret_cast(tuple_data)); DCHECK(success) << "snappy::RawUncompress failed"; } else { // Tuple data uncompressed, copy directly into data pool tuple_data = _tuple_data_pool->allocate(input_batch.tuple_data.size()); memcpy(tuple_data, input_batch.tuple_data.c_str(), input_batch.tuple_data.size()); } // convert input_batch.tuple_offsets into pointers int tuple_idx = 0; for (vector::const_iterator offset = input_batch.tuple_offsets.begin(); offset != input_batch.tuple_offsets.end(); ++offset) { if (*offset == -1) { _tuple_ptrs[tuple_idx++] = NULL; } else { // _tuple_ptrs[tuple_idx++] = // reinterpret_cast(_tuple_data_pool->get_data_ptr(*offset)); _tuple_ptrs[tuple_idx++] = reinterpret_cast(tuple_data + *offset); } } // Check whether we have slots that require offset-to-pointer conversion. if (!_row_desc.has_varlen_slots()) { return; } const std::vector& tuple_descs = _row_desc.tuple_descriptors(); // For every unique tuple, convert string offsets contained in tuple data into // pointers. Tuples were serialized in the order we are deserializing them in, // so the first occurrence of a tuple will always have a higher offset than any tuple // we already converted. for (int i = 0; i < _num_rows; ++i) { TupleRow* row = get_row(i); std::vector::const_iterator desc = tuple_descs.begin(); for (int j = 0; desc != tuple_descs.end(); ++desc, ++j) { if ((*desc)->string_slots().empty() && (*desc)->collection_slots().empty()) { continue; } Tuple* tuple = row->get_tuple(j); if (tuple == NULL) { continue; } std::vector::const_iterator slot = (*desc)->string_slots().begin(); for (; slot != (*desc)->string_slots().end(); ++slot) { DCHECK((*slot)->type().is_string_type()); StringValue* string_val = tuple->get_string_slot((*slot)->tuple_offset()); int offset = reinterpret_cast(string_val->ptr); string_val->ptr = reinterpret_cast(tuple_data + offset); // Why we do this mask? Field len of StringValue is changed from int to size_t in // Doris 0.11. When upgrading, some bits of len sent from 0.10 is random value, // this works fine in version 0.10, however in 0.11 this will lead to an invalid // length. So we make the high bits zero here. string_val->len &= 0x7FFFFFFFL; } // copy collection slot vector::const_iterator slot_collection = (*desc)->collection_slots().begin(); for (; slot_collection != (*desc)->collection_slots().end(); ++slot_collection) { DCHECK((*slot_collection)->type().is_collection_type()); CollectionValue* array_val = tuple->get_collection_slot((*slot_collection)->tuple_offset()); int offset = reinterpret_cast(array_val->data()); array_val->set_data(reinterpret_cast(tuple_data + offset)); int null_offset = reinterpret_cast(array_val->null_signs()); array_val->set_null_signs(reinterpret_cast(tuple_data + null_offset)); const TypeDescriptor& item_type = (*slot_collection)->type().children.at(0); if (!item_type.is_string_type()) { continue; } // copy string item for (int i = 0; i < array_val->length(); ++i) { if (array_val->is_null_at(i)) { continue; } StringValue* dst_item_v = reinterpret_cast( (uint8_t*)array_val->data() + i * item_type.get_slot_size()); if (dst_item_v->len != 0) { int offset = reinterpret_cast(dst_item_v->ptr); dst_item_v->ptr = reinterpret_cast(tuple_data + offset); } } } } } } void RowBatch::clear() { if (_cleared) { return; } _tuple_data_pool->free_all(); _agg_object_pool.reset(new ObjectPool()); for (int i = 0; i < _io_buffers.size(); ++i) { _io_buffers[i]->return_buffer(); } for (BufferInfo& buffer_info : _buffers) { ExecEnv::GetInstance()->buffer_pool()->FreeBuffer(buffer_info.client, &buffer_info.buffer); } close_tuple_streams(); for (int i = 0; i < _blocks.size(); ++i) { _blocks[i]->del(); } if (config::enable_partitioned_aggregation) { DCHECK(_tuple_ptrs != NULL); free(_tuple_ptrs); _mem_tracker->Release(_tuple_ptrs_size); _tuple_ptrs = NULL; } _cleared = true; } RowBatch::~RowBatch() { clear(); } int RowBatch::serialize(TRowBatch* output_batch) { // why does Thrift not generate a Clear() function? output_batch->row_tuples.clear(); output_batch->tuple_offsets.clear(); output_batch->is_compressed = false; output_batch->num_rows = _num_rows; _row_desc.to_thrift(&output_batch->row_tuples); output_batch->tuple_offsets.reserve(_num_rows * _num_tuples_per_row); int size = total_byte_size(); output_batch->tuple_data.resize(size); // Copy tuple data, including strings, into output_batch (converting string // pointers into offsets in the process) int offset = 0; // current offset into output_batch->tuple_data char* tuple_data = const_cast(output_batch->tuple_data.c_str()); for (int i = 0; i < _num_rows; ++i) { TupleRow* row = get_row(i); const std::vector& tuple_descs = _row_desc.tuple_descriptors(); std::vector::const_iterator desc = tuple_descs.begin(); for (int j = 0; desc != tuple_descs.end(); ++desc, ++j) { if (row->get_tuple(j) == NULL) { // NULLs are encoded as -1 output_batch->tuple_offsets.push_back(-1); continue; } // Record offset before creating copy (which increments offset and tuple_data) output_batch->tuple_offsets.push_back(offset); row->get_tuple(j)->deep_copy(**desc, &tuple_data, &offset, /* convert_ptrs */ true); DCHECK_LE(offset, size); } } DCHECK_EQ(offset, size); if (config::compress_rowbatches && size > 0) { // Try compressing tuple_data to _compression_scratch, swap if compressed data is // smaller int max_compressed_size = snappy::MaxCompressedLength(size); if (_compression_scratch.size() < max_compressed_size) { _compression_scratch.resize(max_compressed_size); } size_t compressed_size = 0; char* compressed_output = const_cast(_compression_scratch.c_str()); snappy::RawCompress(output_batch->tuple_data.c_str(), size, compressed_output, &compressed_size); if (LIKELY(compressed_size < size)) { _compression_scratch.resize(compressed_size); output_batch->tuple_data.swap(_compression_scratch); output_batch->is_compressed = true; } VLOG_ROW << "uncompressed size: " << size << ", compressed size: " << compressed_size; } // The size output_batch would be if we didn't compress tuple_data (will be equal to // actual batch size if tuple_data isn't compressed) return get_batch_size(*output_batch) - output_batch->tuple_data.size() + size; } int RowBatch::serialize(PRowBatch* output_batch) { // num_rows output_batch->set_num_rows(_num_rows); // row_tuples _row_desc.to_protobuf(output_batch->mutable_row_tuples()); // tuple_offsets: must clear before reserve output_batch->clear_tuple_offsets(); output_batch->mutable_tuple_offsets()->Reserve(_num_rows * _num_tuples_per_row); // is_compressed output_batch->set_is_compressed(false); // tuple data int size = total_byte_size(); auto mutable_tuple_data = output_batch->mutable_tuple_data(); mutable_tuple_data->resize(size); // Copy tuple data, including strings, into output_batch (converting string // pointers into offsets in the process) int offset = 0; // current offset into output_batch->tuple_data char* tuple_data = const_cast(mutable_tuple_data->data()); for (int i = 0; i < _num_rows; ++i) { TupleRow* row = get_row(i); const std::vector& tuple_descs = _row_desc.tuple_descriptors(); std::vector::const_iterator desc = tuple_descs.begin(); for (int j = 0; desc != tuple_descs.end(); ++desc, ++j) { if (row->get_tuple(j) == nullptr) { // NULLs are encoded as -1 output_batch->mutable_tuple_offsets()->Add(-1); continue; } // Record offset before creating copy (which increments offset and tuple_data) output_batch->mutable_tuple_offsets()->Add(offset); row->get_tuple(j)->deep_copy(**desc, &tuple_data, &offset, /* convert_ptrs */ true); DCHECK_LE(offset, size); } } DCHECK_EQ(offset, size); if (config::compress_rowbatches && size > 0) { // Try compressing tuple_data to _compression_scratch, swap if compressed data is // smaller int max_compressed_size = snappy::MaxCompressedLength(size); if (_compression_scratch.size() < max_compressed_size) { _compression_scratch.resize(max_compressed_size); } size_t compressed_size = 0; char* compressed_output = const_cast(_compression_scratch.c_str()); snappy::RawCompress(mutable_tuple_data->data(), size, compressed_output, &compressed_size); if (LIKELY(compressed_size < size)) { _compression_scratch.resize(compressed_size); mutable_tuple_data->swap(_compression_scratch); output_batch->set_is_compressed(true); } VLOG_ROW << "uncompressed size: " << size << ", compressed size: " << compressed_size; } // The size output_batch would be if we didn't compress tuple_data (will be equal to // actual batch size if tuple_data isn't compressed) return get_batch_size(*output_batch) - mutable_tuple_data->size() + size; } void RowBatch::add_io_buffer(DiskIoMgr::BufferDescriptor* buffer) { DCHECK(buffer != NULL); _io_buffers.push_back(buffer); _auxiliary_mem_usage += buffer->buffer_len(); buffer->set_mem_tracker(std::shared_ptr(_mem_tracker)); // TODO(yingchun): fixme } Status RowBatch::resize_and_allocate_tuple_buffer(RuntimeState* state, int64_t* tuple_buffer_size, uint8_t** buffer) { const int row_size = _row_desc.get_row_size(); // Avoid divide-by-zero. Don't need to modify capacity for empty rows anyway. if (row_size != 0) { _capacity = std::max(1, std::min(_capacity, FIXED_LEN_BUFFER_LIMIT / row_size)); } *tuple_buffer_size = static_cast(row_size) * _capacity; // TODO(dhc): change allocate to try_allocate? *buffer = _tuple_data_pool->allocate(*tuple_buffer_size); if (*buffer == NULL) { std::stringstream ss; ss << "Failed to allocate tuple buffer" << *tuple_buffer_size; LOG(WARNING) << ss.str(); return state->set_mem_limit_exceeded(ss.str()); } return Status::OK(); } void RowBatch::add_tuple_stream(BufferedTupleStream2* stream) { DCHECK(stream != NULL); _tuple_streams.push_back(stream); _auxiliary_mem_usage += stream->byte_size(); } void RowBatch::add_block(BufferedBlockMgr2::Block* block) { DCHECK(block != NULL); _blocks.push_back(block); _auxiliary_mem_usage += block->buffer_len(); } void RowBatch::reset() { DCHECK(_tuple_data_pool.get() != NULL); _num_rows = 0; _capacity = _tuple_ptrs_size / (_num_tuples_per_row * sizeof(Tuple*)); _has_in_flight_row = false; // TODO: Change this to Clear() and investigate the repercussions. _tuple_data_pool->free_all(); _agg_object_pool.reset(new ObjectPool()); for (int i = 0; i < _io_buffers.size(); ++i) { _io_buffers[i]->return_buffer(); } _io_buffers.clear(); for (BufferInfo& buffer_info : _buffers) { ExecEnv::GetInstance()->buffer_pool()->FreeBuffer(buffer_info.client, &buffer_info.buffer); } _buffers.clear(); close_tuple_streams(); for (int i = 0; i < _blocks.size(); ++i) { _blocks[i]->del(); } _blocks.clear(); _auxiliary_mem_usage = 0; if (!config::enable_partitioned_aggregation) { _tuple_ptrs = reinterpret_cast(_tuple_data_pool->allocate(_tuple_ptrs_size)); } _need_to_return = false; _flush = FlushMode::NO_FLUSH_RESOURCES; _needs_deep_copy = false; } void RowBatch::close_tuple_streams() { for (int i = 0; i < _tuple_streams.size(); ++i) { _tuple_streams[i]->close(); delete _tuple_streams[i]; } _tuple_streams.clear(); } void RowBatch::transfer_resource_ownership(RowBatch* dest) { dest->_auxiliary_mem_usage += _tuple_data_pool->total_allocated_bytes(); dest->_tuple_data_pool->acquire_data(_tuple_data_pool.get(), false); dest->_agg_object_pool->acquire_data(_agg_object_pool.get()); for (int i = 0; i < _io_buffers.size(); ++i) { DiskIoMgr::BufferDescriptor* buffer = _io_buffers[i]; dest->_io_buffers.push_back(buffer); dest->_auxiliary_mem_usage += buffer->buffer_len(); buffer->set_mem_tracker( std::shared_ptr(dest->_mem_tracker)); // TODO(yingchun): fixme } _io_buffers.clear(); for (BufferInfo& buffer_info : _buffers) { dest->add_buffer(buffer_info.client, std::move(buffer_info.buffer), FlushMode::NO_FLUSH_RESOURCES); } _buffers.clear(); for (int i = 0; i < _tuple_streams.size(); ++i) { dest->_tuple_streams.push_back(_tuple_streams[i]); dest->_auxiliary_mem_usage += _tuple_streams[i]->byte_size(); } // Resource release should be done by dest RowBatch. if we don't clear the corresponding resources. // This Rowbatch calls the reset() method, dest Rowbatch will also call the reset() method again, // which will cause the core problem of double delete _tuple_streams.clear(); for (int i = 0; i < _blocks.size(); ++i) { dest->_blocks.push_back(_blocks[i]); dest->_auxiliary_mem_usage += _blocks[i]->buffer_len(); } _blocks.clear(); dest->_need_to_return |= _need_to_return; if (_needs_deep_copy) { dest->mark_needs_deep_copy(); } else if (_flush == FlushMode::FLUSH_RESOURCES) { dest->mark_flush_resources(); } reset(); } int RowBatch::get_batch_size(const TRowBatch& batch) { int result = batch.tuple_data.size(); result += batch.row_tuples.size() * sizeof(TTupleId); result += batch.tuple_offsets.size() * sizeof(int32_t); return result; } int RowBatch::get_batch_size(const PRowBatch& batch) { int result = batch.tuple_data().size(); result += batch.row_tuples().size() * sizeof(int32_t); result += batch.tuple_offsets().size() * sizeof(int32_t); return result; } void RowBatch::acquire_state(RowBatch* src) { // DCHECK(_row_desc.equals(src->_row_desc)); DCHECK_EQ(_num_tuples_per_row, src->_num_tuples_per_row); DCHECK_EQ(_tuple_ptrs_size, src->_tuple_ptrs_size); DCHECK_EQ(_auxiliary_mem_usage, 0); // The destination row batch should be empty. DCHECK(!_has_in_flight_row); DCHECK_EQ(_num_rows, 0); for (int i = 0; i < src->_io_buffers.size(); ++i) { DiskIoMgr::BufferDescriptor* buffer = src->_io_buffers[i]; _io_buffers.push_back(buffer); _auxiliary_mem_usage += buffer->buffer_len(); buffer->set_mem_tracker(std::shared_ptr(_mem_tracker)); // TODO(yingchun): fixme } src->_io_buffers.clear(); src->_auxiliary_mem_usage = 0; DCHECK(src->_tuple_streams.empty()); DCHECK(src->_blocks.empty()); _has_in_flight_row = src->_has_in_flight_row; _num_rows = src->_num_rows; _capacity = src->_capacity; _need_to_return = src->_need_to_return; if (!config::enable_partitioned_aggregation) { // Tuple pointers are allocated from tuple_data_pool_ so are transferred. _tuple_ptrs = src->_tuple_ptrs; src->_tuple_ptrs = NULL; } else { // tuple_ptrs_ were allocated with malloc so can be swapped between batches. std::swap(_tuple_ptrs, src->_tuple_ptrs); } src->transfer_resource_ownership(this); } void RowBatch::deep_copy_to(RowBatch* dst) { DCHECK(dst->_row_desc.equals(_row_desc)); DCHECK_EQ(dst->_num_rows, 0); DCHECK_GE(dst->_capacity, _num_rows); dst->add_rows(_num_rows); for (int i = 0; i < _num_rows; ++i) { TupleRow* src_row = get_row(i); TupleRow* dst_row = reinterpret_cast(dst->_tuple_ptrs + i * _num_tuples_per_row); src_row->deep_copy(dst_row, _row_desc.tuple_descriptors(), dst->_tuple_data_pool.get(), false); } dst->commit_rows(_num_rows); } // TODO: consider computing size of batches as they are built up int RowBatch::total_byte_size() { int result = 0; // Sum total variable length byte sizes. for (int i = 0; i < _num_rows; ++i) { TupleRow* row = get_row(i); const std::vector& tuple_descs = _row_desc.tuple_descriptors(); std::vector::const_iterator desc = tuple_descs.begin(); for (int j = 0; desc != tuple_descs.end(); ++desc, ++j) { Tuple* tuple = row->get_tuple(j); if (tuple == NULL) { continue; } result += (*desc)->byte_size(); std::vector::const_iterator slot = (*desc)->string_slots().begin(); for (; slot != (*desc)->string_slots().end(); ++slot) { DCHECK((*slot)->type().is_string_type()); if (tuple->is_null((*slot)->null_indicator_offset())) { continue; } StringValue* string_val = tuple->get_string_slot((*slot)->tuple_offset()); result += string_val->len; } // compute slot collection size vector::const_iterator slot_collection = (*desc)->collection_slots().begin(); for (; slot_collection != (*desc)->collection_slots().end(); ++slot_collection) { DCHECK((*slot_collection)->type().is_collection_type()); if (tuple->is_null((*slot_collection)->null_indicator_offset())) { continue; } // compute data null_signs size CollectionValue* array_val = tuple->get_collection_slot((*slot_collection)->tuple_offset()); result += array_val->length() * sizeof(bool); const TypeDescriptor& item_type = (*slot_collection)->type().children.at(0); result += array_val->length() * item_type.get_slot_size(); if (!item_type.is_string_type()) { continue; } // compute string type item size for (int i = 0; i < array_val->length(); ++i) { if (array_val->is_null_at(i)) { continue; } StringValue* dst_item_v = reinterpret_cast( (uint8_t*)array_val->data() + i * item_type.get_slot_size()); result += dst_item_v->len; } } } } return result; } int RowBatch::max_tuple_buffer_size() { int row_size = _row_desc.get_row_size(); if (row_size > AT_CAPACITY_MEM_USAGE) { return row_size; } int num_rows = 0; if (row_size != 0) { num_rows = std::min(_capacity, AT_CAPACITY_MEM_USAGE / row_size); } int tuple_buffer_size = num_rows * row_size; DCHECK_LE(tuple_buffer_size, AT_CAPACITY_MEM_USAGE); return tuple_buffer_size; } void RowBatch::add_buffer(BufferPool::ClientHandle* client, BufferPool::BufferHandle&& buffer, FlushMode flush) { _auxiliary_mem_usage += buffer.len(); BufferInfo buffer_info; buffer_info.client = client; buffer_info.buffer = std::move(buffer); _buffers.push_back(std::move(buffer_info)); if (flush == FlushMode::FLUSH_RESOURCES) mark_flush_resources(); } std::string RowBatch::to_string() { std::stringstream out; for (int i = 0; i < _num_rows; ++i) { out << get_row(i)->to_string(_row_desc) << "\n"; } return out.str(); } } // end namespace doris