[BUG][Timeout][QueryLeak] Fixed memory not released in time (#6221)

* Revert "[Optimize] Put _Tuple_ptrs into mempool when RowBatch is initialized (#6036)"

This reverts commit f254870aeb18752a786586ef5d7ccf952b97f895.

* [BUG][Timeout][QueryLeak] Fixed memory not released in time, Fix Core dump in bloomfilter
This commit is contained in:
stdpain
2021-07-16 12:32:10 +08:00
committed by GitHub
parent c2695e9716
commit bf5db6eefe
4 changed files with 68 additions and 12 deletions

View File

@ -30,6 +30,8 @@ namespace doris {
// more friendly to CPU Cache, and it is easier to use SIMD instructions to
// speed up the implementation.
// BlockBloomFilter will not store null values, and will always return a false if the input is null.
class BlockBloomFilter {
public:
explicit BlockBloomFilter();
@ -51,7 +53,9 @@ public:
void insert(uint32_t hash) noexcept;
// Same as above with convenience of hashing the key.
void insert(const Slice& key) noexcept {
insert(HashUtil::murmur_hash3_32(key.data, key.size, _hash_seed));
if (key.data) {
insert(HashUtil::murmur_hash3_32(key.data, key.size, _hash_seed));
}
}
// Finds an element in the BloomFilter, returning true if it is found and false (with
@ -59,7 +63,11 @@ public:
bool find(uint32_t hash) const noexcept;
// Same as above with convenience of hashing the key.
bool find(const Slice& key) const noexcept {
return find(HashUtil::murmur_hash3_32(key.data, key.size, _hash_seed));
if (key.data) {
return find(HashUtil::murmur_hash3_32(key.data, key.size, _hash_seed));
} else {
return false;
}
}
// Computes the logical OR of this filter with 'other' and stores the result in this

View File

@ -56,7 +56,13 @@ RowBatch::RowBatch(const RowDescriptor& row_desc, int capacity, MemTracker* mem_
_tuple_ptrs_size = _capacity * _num_tuples_per_row * sizeof(Tuple*);
DCHECK_GT(_tuple_ptrs_size, 0);
// TODO: switch to Init() pattern so we can check memory limit and return Status.
_tuple_ptrs = reinterpret_cast<Tuple**>(_tuple_data_pool->allocate(_tuple_ptrs_size));
if (config::enable_partitioned_aggregation) {
_mem_tracker->Consume(_tuple_ptrs_size);
_tuple_ptrs = reinterpret_cast<Tuple**>(malloc(_tuple_ptrs_size));
DCHECK(_tuple_ptrs != NULL);
} else {
_tuple_ptrs = reinterpret_cast<Tuple**>(_tuple_data_pool->allocate(_tuple_ptrs_size));
}
}
// TODO: we want our input_batch's tuple_data to come from our (not yet implemented)
@ -82,7 +88,13 @@ RowBatch::RowBatch(const RowDescriptor& row_desc, const PRowBatch& input_batch,
_tuple_ptrs_size = _num_rows * _num_tuples_per_row * sizeof(Tuple*);
DCHECK_GT(_tuple_ptrs_size, 0);
// TODO: switch to Init() pattern so we can check memory limit and return Status.
_tuple_ptrs = reinterpret_cast<Tuple**>(_tuple_data_pool->allocate(_tuple_ptrs_size));
if (config::enable_partitioned_aggregation) {
_mem_tracker->Consume(_tuple_ptrs_size);
_tuple_ptrs = reinterpret_cast<Tuple**>(malloc(_tuple_ptrs_size));
DCHECK(_tuple_ptrs != nullptr);
} else {
_tuple_ptrs = reinterpret_cast<Tuple**>(_tuple_data_pool->allocate(_tuple_ptrs_size));
}
uint8_t* tuple_data = nullptr;
if (input_batch.is_compressed()) {
@ -210,7 +222,13 @@ RowBatch::RowBatch(const RowDescriptor& row_desc, const TRowBatch& input_batch,
_tuple_ptrs_size = _num_rows * input_batch.row_tuples.size() * sizeof(Tuple*);
DCHECK_GT(_tuple_ptrs_size, 0);
// TODO: switch to Init() pattern so we can check memory limit and return Status.
_tuple_ptrs = reinterpret_cast<Tuple**>(_tuple_data_pool->allocate(_tuple_ptrs_size));
if (config::enable_partitioned_aggregation) {
_mem_tracker->Consume(_tuple_ptrs_size);
_tuple_ptrs = reinterpret_cast<Tuple**>(malloc(_tuple_ptrs_size));
DCHECK(_tuple_ptrs != NULL);
} else {
_tuple_ptrs = reinterpret_cast<Tuple**>(_tuple_data_pool->allocate(_tuple_ptrs_size));
}
uint8_t* tuple_data = NULL;
if (input_batch.is_compressed) {
@ -338,6 +356,12 @@ void RowBatch::clear() {
for (int i = 0; i < _blocks.size(); ++i) {
_blocks[i]->del();
}
if (config::enable_partitioned_aggregation) {
DCHECK(_tuple_ptrs != NULL);
free(_tuple_ptrs);
_mem_tracker->Release(_tuple_ptrs_size);
_tuple_ptrs = NULL;
}
_cleared = true;
}
@ -521,6 +545,8 @@ void RowBatch::reset() {
_capacity = _tuple_ptrs_size / (_num_tuples_per_row * sizeof(Tuple*));
_has_in_flight_row = false;
// TODO: Change this to Clear() and investigate the repercussions.
_tuple_data_pool->free_all();
_agg_object_pool.reset(new ObjectPool());
for (int i = 0; i < _io_buffers.size(); ++i) {
_io_buffers[i]->return_buffer();
@ -538,6 +564,9 @@ void RowBatch::reset() {
}
_blocks.clear();
_auxiliary_mem_usage = 0;
if (!config::enable_partitioned_aggregation) {
_tuple_ptrs = reinterpret_cast<Tuple**>(_tuple_data_pool->allocate(_tuple_ptrs_size));
}
_need_to_return = false;
_flush = FlushMode::NO_FLUSH_RESOURCES;
_needs_deep_copy = false;
@ -635,7 +664,14 @@ void RowBatch::acquire_state(RowBatch* src) {
_num_rows = src->_num_rows;
_capacity = src->_capacity;
_need_to_return = src->_need_to_return;
std::swap(_tuple_ptrs, src->_tuple_ptrs);
if (!config::enable_partitioned_aggregation) {
// Tuple pointers are allocated from tuple_data_pool_ so are transferred.
_tuple_ptrs = src->_tuple_ptrs;
src->_tuple_ptrs = NULL;
} else {
// tuple_ptrs_ were allocated with malloc so can be swapped between batches.
std::swap(_tuple_ptrs, src->_tuple_ptrs);
}
src->transfer_resource_ownership(this);
}

View File

@ -70,6 +70,7 @@ class PRowBatch;
//
// A row batch is considered at capacity if all the rows are full or it has accumulated
// auxiliary memory up to a soft cap. (See _at_capacity_mem_usage comment).
// TODO: stick _tuple_ptrs into a pool?
class RowBatch : public RowBatchInterface {
public:
/// Flag indicating whether the resources attached to a RowBatch need to be flushed.
@ -413,14 +414,22 @@ private:
int _num_tuples_per_row;
RowDescriptor _row_desc;
// Memory is allocated from MemPool, need to investigate the repercussions.
//
// In the past, there were malloc'd and MemPool memory allocation methods.
// Malloc'd memory belongs to RowBatch itself, and the latter belongs to MemPool management.
// Array of pointers with _capacity * _num_tuples_per_row elements.
// The memory ownership depends on whether legacy joins and aggs are enabled.
//
// At present, it is allocated from MemPool uniformly, and tuple pointers are not transferred
// and do not have to be re-created in every Reset(), which has better performance.
// Memory is malloc'd and owned by RowBatch:
// If enable_partitioned_hash_join=true and enable_partitioned_aggregation=true
// then the memory is owned by this RowBatch and is freed upon its destruction.
// This mode is more performant especially with SubplanNodes in the ExecNode tree
// because the tuple pointers are not transferred and do not have to be re-created
// in every Reset().
//
// Memory is allocated from MemPool:
// Otherwise, the memory is allocated from _tuple_data_pool. As a result, the
// pointer memory is transferred just like tuple data, and must be re-created
// in Reset(). This mode is required for the legacy join and agg which rely on
// the tuple pointers being allocated from the _tuple_data_pool, so they can
// acquire ownership of the tuple pointers.
Tuple** _tuple_ptrs;
int _tuple_ptrs_size;

View File

@ -46,6 +46,9 @@ TEST_F(BloomFilterPredicateTest, bloom_filter_func_int_test) {
// test not exist val
int not_exist_val = 0x3355ff;
ASSERT_FALSE(func->find((const void*)&not_exist_val));
// TEST null value
func->insert(nullptr);
func->find(nullptr);
}
TEST_F(BloomFilterPredicateTest, bloom_filter_func_stringval_test) {