// Licensed to the Apache Software Foundation (ASF) under one // or more contributor license agreements. See the NOTICE file // distributed with this work for additional information // regarding copyright ownership. The ASF licenses this file // to you under the Apache License, Version 2.0 (the // "License"); you may not use this file except in compliance // with the License. You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, // software distributed under the License is distributed on an // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY // KIND, either express or implied. See the License for the // specific language governing permissions and limitations // under the License. #ifndef DORIS_BE_SRC_QUERY_EXEC_HASH_TABLE_HPP #define DORIS_BE_SRC_QUERY_EXEC_HASH_TABLE_HPP #include "exec/hash_table.h" namespace doris { inline bool HashTable::emplace_key(TupleRow* row, TupleRow** dest_addr) { if (_num_filled_buckets > _num_buckets_till_resize) { resize_buckets(_num_buckets * 2); } if (_current_used == _current_capacity) { grow_node_array(); } bool has_nulls = eval_build_row(row); if (!_stores_nulls && has_nulls) { return false; } uint32_t hash = hash_current_row(); int64_t bucket_idx = hash & (_num_buckets - 1); Bucket* bucket = &_buckets[bucket_idx]; Node* node = bucket->_node; bool will_insert = true; if (node == nullptr) { will_insert = true; } else { Node* last_node = node; while (node != nullptr) { if (node->_hash == hash && equals(node->data())) { will_insert = false; break; } last_node = node; node = node->_next; } node = last_node; } if (will_insert) { Node* alloc_node = reinterpret_cast(_current_nodes + _node_byte_size * _current_used++); ++_num_nodes; TupleRow* data = alloc_node->data(); *dest_addr = data; alloc_node->_hash = hash; if (node == nullptr) { add_to_bucket(&_buckets[bucket_idx], alloc_node); } else { node->_next = alloc_node; } } return will_insert; } inline HashTable::Iterator HashTable::find(TupleRow* probe_row, bool probe) { bool has_nulls = probe ? eval_probe_row(probe_row) : eval_build_row(probe_row); if (!_stores_nulls && has_nulls) { return end(); } uint32_t hash = hash_current_row(); int64_t bucket_idx = hash & (_num_buckets - 1); Bucket* bucket = &_buckets[bucket_idx]; Node* node = bucket->_node; while (node != nullptr) { if (node->_hash == hash && equals(node->data())) { return Iterator(this, bucket_idx, node, hash); } node = node->_next; } return end(); } inline HashTable::Iterator HashTable::begin() { int64_t bucket_idx = -1; Bucket* bucket = next_bucket(&bucket_idx); if (bucket != NULL) { return Iterator(this, bucket_idx, bucket->_node, 0); } return end(); } inline HashTable::Bucket* HashTable::next_bucket(int64_t* bucket_idx) { ++*bucket_idx; for (; *bucket_idx < _num_buckets; ++*bucket_idx) { if (_buckets[*bucket_idx]._node != nullptr) { return &_buckets[*bucket_idx]; } } *bucket_idx = -1; return NULL; } inline void HashTable::insert_impl(TupleRow* row) { bool has_null = eval_build_row(row); if (!_stores_nulls && has_null) { return; } uint32_t hash = hash_current_row(); int64_t bucket_idx = hash & (_num_buckets - 1); if (_current_used == _current_capacity) { grow_node_array(); } // get a node from memory pool Node* node = reinterpret_cast(_current_nodes + _node_byte_size * _current_used++); TupleRow* data = node->data(); node->_hash = hash; memcpy(data, row, sizeof(Tuple*) * _num_build_tuples); add_to_bucket(&_buckets[bucket_idx], node); ++_num_nodes; } inline void HashTable::add_to_bucket(Bucket* bucket, Node* node) { if (bucket->_node == nullptr) { ++_num_filled_buckets; } node->_next = bucket->_node; bucket->_node = node; bucket->_size++; } inline void HashTable::move_node(Bucket* from_bucket, Bucket* to_bucket, Node* node, Node* previous_node) { Node* next_node = node->_next; from_bucket->_size--; if (previous_node != NULL) { previous_node->_next = next_node; } else { // Update bucket directly from_bucket->_node = next_node; if (next_node == nullptr) { --_num_filled_buckets; } } add_to_bucket(to_bucket, node); } inline std::pair HashTable::minmax_node() { bool has_value = false; int64_t min_size = std::numeric_limits::max(); int64_t max_size = std::numeric_limits::min(); for (const auto bucket : _buckets) { int64_t counter = bucket._size; if (counter > 0) { has_value = true; min_size = std::min(counter, min_size); max_size = std::max(counter, max_size); } } if (!has_value) { return std::make_pair(0, 0); } return std::make_pair(min_size, max_size); } template inline void HashTable::Iterator::next() { if (_bucket_idx == -1) { return; } // TODO: this should prefetch the next tuplerow Node* node = _node; // Iterator is not from a full table scan, evaluate equality now. Only the current // bucket needs to be scanned. '_expr_values_buffer' contains the results // for the current probe row. if (check_match) { // TODO: this should prefetch the next node Node* next_node = node->_next; while (next_node != nullptr) { node = next_node; if (node->_hash == _scan_hash && _table->equals(node->data())) { _node = next_node; return; } next_node = node->_next; } *this = _table->end(); } else { // Move onto the next chained node if (node->_next != nullptr) { _node = node->_next; return; } // Move onto the next bucket Bucket* bucket = _table->next_bucket(&_bucket_idx); if (bucket == NULL) { _bucket_idx = -1; _node = nullptr; } else { _node = bucket->_node; } } } } // namespace doris #endif