[chore](hashtable) Use doris' Allocator to replace std::allocator in phmap (#18735)

This commit is contained in:
Jerry Hu
2023-04-18 09:58:28 +08:00
committed by GitHub
parent 98b8efc2c2
commit 3de4d64657
11 changed files with 68 additions and 22 deletions

View File

@ -17,14 +17,13 @@
#pragma once
#include <parallel_hashmap/phmap.h>
#include "common/object_pool.h"
#include "runtime/decimalv2_value.h"
#include "runtime/define_primitive_type.h"
#include "runtime/primitive_type.h"
#include "vec/columns/column_nullable.h"
#include "vec/columns/column_string.h"
#include "vec/common/hash_table/phmap_fwd_decl.h"
#include "vec/common/string_ref.h"
namespace doris {
@ -149,7 +148,7 @@ template <typename T>
class DynamicContainer {
public:
using Self = DynamicContainer;
using Iterator = typename phmap::flat_hash_set<T>::iterator;
using Iterator = typename vectorized::flat_hash_set<T>::iterator;
using ElementType = T;
DynamicContainer() = default;
@ -168,7 +167,7 @@ public:
size_t size() const { return _set.size(); }
private:
phmap::flat_hash_set<T> _set;
vectorized::flat_hash_set<T> _set;
};
// TODO Maybe change void* parameter to template parameter better.

View File

@ -49,7 +49,7 @@ void HyperLogLog::_convert_explicit_to_register() {
_update_registers(value);
}
// clear _hash_set
phmap::flat_hash_set<uint64_t>().swap(_hash_set);
vectorized::flat_hash_set<uint64_t>().swap(_hash_set);
}
// Change HLL_DATA_EXPLICIT to HLL_DATA_FULL directly, because HLL_DATA_SPARSE

View File

@ -18,7 +18,6 @@
#pragma once
#include <math.h>
#include <parallel_hashmap/phmap.h>
#include <stdio.h>
#include <map>
@ -30,6 +29,7 @@
#endif
#include "gutil/macros.h"
#include "vec/common/hash_table/phmap_fwd_decl.h"
namespace doris {
@ -268,7 +268,7 @@ public:
private:
HllDataType _type = HLL_DATA_EMPTY;
phmap::flat_hash_set<uint64_t> _hash_set;
vectorized::flat_hash_set<uint64_t> _hash_set;
// This field is much space consuming(HLL_REGISTERS_COUNT), we create
// it only when it is really needed.

View File

@ -17,8 +17,6 @@
#pragma once
#include <parallel_hashmap/phmap.h>
#include <cstdint>
#include <roaring/roaring.hh>

View File

@ -17,8 +17,6 @@
#include "olap/reader.h"
#include <parallel_hashmap/phmap.h>
#include "common/status.h"
#include "exprs/create_predicate_function.h"
#include "exprs/hybrid_set.h"

View File

@ -17,8 +17,6 @@
#pragma once
#include <parallel_hashmap/phmap.h>
#include <functional>
#include <memory>
#include <string>

View File

@ -20,9 +20,8 @@
#pragma once
#include <parallel_hashmap/phmap.h>
#include "vec/columns/column_complex.h"
#include "vec/common/hash_table/phmap_fwd_decl.h"
#include "vec/core/block.h"
#include "vec/core/column_numbers.h"
#include "vec/core/field.h"
@ -221,7 +220,7 @@ public:
std::is_same_v<Derived,
AggregateFunctionBitmapOp<AggregateFunctionBitmapUnionOp>>) {
if (agg_many) {
phmap::flat_hash_map<AggregateDataPtr, std::vector<int>> place_rows;
flat_hash_map<AggregateDataPtr, std::vector<int>> place_rows;
for (int i = 0; i < batch_size; ++i) {
auto iter = place_rows.find(places[i] + place_offset);
if (iter == place_rows.end()) {

View File

@ -17,7 +17,6 @@
#pragma once
#include <parallel_hashmap/phmap.h>
#include <rapidjson/stringbuffer.h>
#include <rapidjson/writer.h>
@ -29,6 +28,7 @@
#include "vec/columns/column_array.h"
#include "vec/columns/column_string.h"
#include "vec/columns/column_vector.h"
#include "vec/common/hash_table/phmap_fwd_decl.h"
#include "vec/core/types.h"
#include "vec/data_types/data_type_array.h"
#include "vec/data_types/data_type_string.h"
@ -183,7 +183,7 @@ struct AggregateFunctionTopNData {
int top_num = 0;
uint64_t capacity = 0;
phmap::flat_hash_map<T, uint64_t> counter_map;
flat_hash_map<T, uint64_t> counter_map;
};
struct AggregateFunctionTopNImplInt {

View File

@ -20,8 +20,6 @@
#pragma once
#include <parallel_hashmap/phmap.h>
#include <type_traits>
#include "gutil/hash/city.h"
@ -31,6 +29,7 @@
#include "vec/common/assert_cast.h"
#include "vec/common/bit_cast.h"
#include "vec/common/hash_table/hash_set.h"
#include "vec/common/hash_table/phmap_fwd_decl.h"
#include "vec/common/typeid_cast.h"
#include "vec/data_types/data_type_number.h"
@ -47,7 +46,7 @@ struct AggregateFunctionUniqExactData {
using Key = std::conditional_t<is_string_key, UInt128, T>;
using Hash = std::conditional_t<is_string_key, UInt128TrivialHash, HashCRC32<Key>>;
using Set = phmap::flat_hash_set<Key, Hash>;
using Set = flat_hash_set<Key, Hash>;
static UInt128 ALWAYS_INLINE get_key(const StringRef& value) {
UInt128 key;

View File

@ -23,6 +23,7 @@
#include "vec/common/hash_table/hash.h"
#include "vec/common/hash_table/hash_table_utils.h"
#include "vec/common/hash_table/phmap_fwd_decl.h"
template <typename Key, typename Mapped>
ALWAYS_INLINE inline auto lookup_result_get_mapped(std::pair<const Key, Mapped>* it) {
@ -36,7 +37,7 @@ public:
using Self = PHHashMap;
using Hash = HashMethod;
using cell_type = std::pair<const Key, Mapped>;
using HashMapImpl = phmap::flat_hash_map<Key, Mapped, Hash>;
using HashMapImpl = doris::vectorized::flat_hash_map<Key, Mapped, Hash>;
using key_type = Key;
using mapped_type = Mapped;

View File

@ -0,0 +1,54 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
#pragma once
#include <parallel_hashmap/phmap.h>
#include "vec/common/allocator.h"
namespace doris::vectorized {
/// `Allocator_` implements several interfaces of `std::allocator`
/// which `phmap::flat_hash_map` will use.
template <typename T>
class Allocator_ : private Allocator<false, true> {
public:
using value_type = T;
using pointer = T*;
Allocator_() = default;
template <typename T_>
Allocator_(const Allocator_<T_>&) {};
constexpr T* allocate(size_t n) { return static_cast<T*>(Allocator::alloc(n * sizeof(T))); }
void deallocate(pointer p, size_t n) { Allocator::free(p, n * sizeof(T)); }
friend bool operator==(const Allocator_&, const Allocator_&) { return true; }
};
template <typename K, typename V, typename Hash = phmap::Hash<K>, typename Eq = phmap::EqualTo<K>,
typename Alloc = Allocator_<phmap::Pair<const K, V>>>
using flat_hash_map = phmap::flat_hash_map<K, V, Hash, Eq, Alloc>;
template <typename K, typename Hash = phmap::Hash<K>, typename Eq = phmap::EqualTo<K>,
typename Alloc = Allocator_<K>>
using flat_hash_set = phmap::flat_hash_set<K, Hash, Eq, Alloc>;
} // namespace doris::vectorized