[optimization] avoid extra memory copy while build hash table (#5301)
avoid extra memory copy while build hash table
This commit is contained in:
@ -40,8 +40,11 @@ HashTable::HashTable(const std::vector<ExprContext*>& build_expr_ctxs,
|
||||
_initial_seed(initial_seed),
|
||||
_node_byte_size(sizeof(Node) + sizeof(Tuple*) * _num_build_tuples),
|
||||
_num_filled_buckets(0),
|
||||
_nodes(NULL),
|
||||
_current_nodes(nullptr),
|
||||
_num_nodes(0),
|
||||
_current_capacity(num_buckets),
|
||||
_current_used(0),
|
||||
_total_capacity(num_buckets),
|
||||
_exceeded_limit(false),
|
||||
_mem_tracker(mem_tracker),
|
||||
_mem_limit_exceeded(false) {
|
||||
@ -61,13 +64,15 @@ HashTable::HashTable(const std::vector<ExprContext*>& build_expr_ctxs,
|
||||
memset(_expr_values_buffer, 0, sizeof(uint8_t) * _results_buffer_size);
|
||||
_expr_value_null_bits = new uint8_t[_build_expr_ctxs.size()];
|
||||
|
||||
_nodes_capacity = 1024;
|
||||
_nodes = reinterpret_cast<uint8_t*>(malloc(_nodes_capacity * _node_byte_size));
|
||||
memset(_nodes, 0, _nodes_capacity * _node_byte_size);
|
||||
_alloc_list.reserve(10);
|
||||
_current_nodes = reinterpret_cast<uint8_t*>(malloc(_current_capacity * _node_byte_size));
|
||||
// TODO: remove memset later
|
||||
memset(_current_nodes, 0, _current_capacity * _node_byte_size);
|
||||
_alloc_list.push_back(_current_nodes);
|
||||
|
||||
_mem_tracker->Consume(_nodes_capacity * _node_byte_size);
|
||||
_mem_tracker->Consume(_current_capacity * _node_byte_size);
|
||||
if (_mem_tracker->limit_exceeded()) {
|
||||
mem_limit_exceeded(_nodes_capacity * _node_byte_size);
|
||||
mem_limit_exceeded(_current_capacity * _node_byte_size);
|
||||
}
|
||||
}
|
||||
|
||||
@ -77,8 +82,10 @@ void HashTable::close() {
|
||||
// TODO: use tr1::array?
|
||||
delete[] _expr_values_buffer;
|
||||
delete[] _expr_value_null_bits;
|
||||
free(_nodes);
|
||||
_mem_tracker->Release(_nodes_capacity * _node_byte_size);
|
||||
for (auto ptr : _alloc_list) {
|
||||
free(ptr);
|
||||
}
|
||||
_mem_tracker->Release(_total_capacity * _node_byte_size);
|
||||
_mem_tracker->Release(_buckets.size() * sizeof(Bucket));
|
||||
}
|
||||
|
||||
@ -199,11 +206,10 @@ void HashTable::resize_buckets(int64_t num_buckets) {
|
||||
Bucket* bucket = &_buckets[i];
|
||||
Bucket* sister_bucket = &_buckets[i + old_num_buckets];
|
||||
Node* last_node = NULL;
|
||||
int node_idx = bucket->_node_idx;
|
||||
Node* node = bucket->_node;
|
||||
|
||||
while (node_idx != -1) {
|
||||
Node* node = get_node(node_idx);
|
||||
int64_t next_idx = node->_next_idx;
|
||||
while (node != nullptr) {
|
||||
Node* next_node = node->_next;
|
||||
uint32_t hash = node->_hash;
|
||||
|
||||
bool node_must_move = true;
|
||||
@ -219,12 +225,12 @@ void HashTable::resize_buckets(int64_t num_buckets) {
|
||||
}
|
||||
|
||||
if (node_must_move) {
|
||||
move_node(bucket, move_to, node_idx, node, last_node);
|
||||
move_node(bucket, move_to, node, last_node);
|
||||
} else {
|
||||
last_node = node;
|
||||
}
|
||||
|
||||
node_idx = next_idx;
|
||||
node = next_node;
|
||||
}
|
||||
}
|
||||
|
||||
@ -233,19 +239,19 @@ void HashTable::resize_buckets(int64_t num_buckets) {
|
||||
}
|
||||
|
||||
void HashTable::grow_node_array() {
|
||||
int64_t old_size = _nodes_capacity * _node_byte_size;
|
||||
_nodes_capacity = _nodes_capacity + _nodes_capacity / 2;
|
||||
int64_t new_size = _nodes_capacity * _node_byte_size;
|
||||
_current_capacity = _total_capacity / 2;
|
||||
_total_capacity += _current_capacity;
|
||||
int64_t alloc_size = _current_capacity * _node_byte_size;
|
||||
_current_nodes = reinterpret_cast<uint8_t*>(malloc(alloc_size));
|
||||
_current_used = 0;
|
||||
// TODO: remove memset later
|
||||
memset(_current_nodes, 0, alloc_size);
|
||||
// add _current_nodes to alloc pool
|
||||
_alloc_list.push_back(_current_nodes);
|
||||
|
||||
uint8_t* new_nodes = reinterpret_cast<uint8_t*>(malloc(new_size));
|
||||
memset(new_nodes, 0, new_size);
|
||||
memcpy(new_nodes, _nodes, old_size);
|
||||
free(_nodes);
|
||||
_nodes = new_nodes;
|
||||
|
||||
_mem_tracker->Consume(new_size - old_size);
|
||||
_mem_tracker->Consume(alloc_size);
|
||||
if (_mem_tracker->limit_exceeded()) {
|
||||
mem_limit_exceeded(new_size - old_size);
|
||||
mem_limit_exceeded(alloc_size);
|
||||
}
|
||||
}
|
||||
|
||||
@ -262,29 +268,27 @@ std::string HashTable::debug_string(bool skip_empty, const RowDescriptor* desc)
|
||||
ss << std::endl;
|
||||
|
||||
for (int i = 0; i < _buckets.size(); ++i) {
|
||||
int64_t node_idx = _buckets[i]._node_idx;
|
||||
Node* node = _buckets[i]._node;
|
||||
bool first = true;
|
||||
|
||||
if (skip_empty && node_idx == -1) {
|
||||
if (skip_empty && node == nullptr) {
|
||||
continue;
|
||||
}
|
||||
|
||||
ss << i << ": ";
|
||||
|
||||
while (node_idx != -1) {
|
||||
Node* node = get_node(node_idx);
|
||||
|
||||
while (node != nullptr) {
|
||||
if (!first) {
|
||||
ss << ",";
|
||||
}
|
||||
|
||||
if (desc == NULL) {
|
||||
ss << node_idx << "(" << (void*)node->data() << ")";
|
||||
ss << node->_hash << "(" << (void*)node->data() << ")";
|
||||
} else {
|
||||
ss << (void*)node->data() << " " << node->data()->to_string(*desc);
|
||||
}
|
||||
|
||||
node_idx = node->_next_idx;
|
||||
node = node->_next;
|
||||
first = false;
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user