[minor](hash table) parameterize hash table (#19653)
This commit is contained in:
@ -103,6 +103,9 @@ DEFINE_mInt64(mmap_threshold, "134217728"); // bytes
|
||||
// Increase can reduce the number of hash table resize, but may waste more memory.
|
||||
DEFINE_mInt32(hash_table_double_grow_degree, "31");
|
||||
|
||||
DEFINE_mInt32(max_fill_rate, "2");
|
||||
|
||||
DEFINE_mInt32(double_resize_threshold, "20");
|
||||
// Expand the hash table before inserting data, the maximum expansion size.
|
||||
// There are fewer duplicate keys, reducing the number of resize hash tables
|
||||
// There are many duplicate keys, and the hash table filled bucket is far less than the hash table build bucket.
|
||||
|
||||
@ -141,6 +141,11 @@ DECLARE_mInt64(mmap_threshold); // bytes
|
||||
// Increase can reduce the number of hash table resize, but may waste more memory.
|
||||
DECLARE_mInt32(hash_table_double_grow_degree);
|
||||
|
||||
// The max fill rate for hash table
|
||||
DECLARE_mInt32(max_fill_rate);
|
||||
|
||||
DECLARE_mInt32(double_resize_threshold);
|
||||
|
||||
// Expand the hash table before inserting data, the maximum expansion size.
|
||||
// There are fewer duplicate keys, reducing the number of resize hash tables
|
||||
// There are many duplicate keys, and the hash table filled bucket is far less than the hash table build bucket.
|
||||
|
||||
@ -238,6 +238,8 @@ void insert_set_mapped(MappedType* dest, const ValueType& src) {
|
||||
*dest = src.second;
|
||||
}
|
||||
|
||||
static doris::vectorized::Int32 double_resize_threshold = doris::config::double_resize_threshold;
|
||||
|
||||
/** Determines the size of the hash table, and when and how much it should be resized.
|
||||
*/
|
||||
template <size_t initial_size_degree = 10>
|
||||
@ -246,6 +248,8 @@ struct HashTableGrower {
|
||||
doris::vectorized::UInt8 size_degree = initial_size_degree;
|
||||
doris::vectorized::Int64 double_grow_degree = doris::config::hash_table_double_grow_degree;
|
||||
|
||||
doris::vectorized::Int32 max_fill_rate = doris::config::max_fill_rate;
|
||||
|
||||
/// The size of the hash table in the cells.
|
||||
size_t buf_size() const { return 1ULL << size_degree; }
|
||||
|
||||
@ -253,7 +257,7 @@ struct HashTableGrower {
|
||||
size_t max_fill() const {
|
||||
return size_degree < double_grow_degree
|
||||
? 1ULL << (size_degree - 1)
|
||||
: (1ULL << size_degree) - (1ULL << (size_degree - 2));
|
||||
: (1ULL << size_degree) - (1ULL << (size_degree - max_fill_rate));
|
||||
}
|
||||
|
||||
size_t mask() const { return buf_size() - 1; }
|
||||
@ -271,7 +275,7 @@ struct HashTableGrower {
|
||||
bool overflow(size_t elems) const { return elems > max_fill(); }
|
||||
|
||||
/// Increase the size of the hash table.
|
||||
void increase_size() { size_degree += size_degree >= 23 ? 1 : 2; }
|
||||
void increase_size() { size_degree += size_degree >= double_resize_threshold ? 1 : 2; }
|
||||
|
||||
/// Set the buffer size by the number of elements in the hash table. Used when deserializing a hash table.
|
||||
void set(size_t num_elems) {
|
||||
@ -336,7 +340,7 @@ public:
|
||||
bool overflow(size_t elems) const { return elems > precalculated_max_fill; }
|
||||
|
||||
/// Increase the size of the hash table.
|
||||
void increase_size() { increase_size_degree(size_degree_ >= 23 ? 1 : 2); }
|
||||
void increase_size() { increase_size_degree(size_degree_ >= double_resize_threshold ? 1 : 2); }
|
||||
|
||||
/// Set the buffer size by the number of elements in the hash table. Used when deserializing a hash table.
|
||||
void set(size_t num_elems) {
|
||||
|
||||
Reference in New Issue
Block a user