patch 4.0
This commit is contained in:
@ -22,44 +22,57 @@
|
||||
#include "sql/engine/basic/ob_chunk_datum_store.h"
|
||||
#include "sql/engine/ob_sql_mem_mgr_processor.h"
|
||||
|
||||
namespace oceanbase {
|
||||
namespace common {
|
||||
namespace oceanbase
|
||||
{
|
||||
namespace common{
|
||||
class ObNewRow;
|
||||
class ObRowStore;
|
||||
} // namespace common
|
||||
}
|
||||
|
||||
namespace sql
|
||||
{
|
||||
|
||||
namespace sql {
|
||||
|
||||
// Auto extended hash table, extend to double buckets size if hash table is quarter filled.
|
||||
template <typename Item>
|
||||
class ObExtendHashTable {
|
||||
class ObExtendHashTable
|
||||
{
|
||||
public:
|
||||
const static int64_t INITIAL_SIZE = 128;
|
||||
const static int64_t SIZE_BUCKET_SCALE = 4;
|
||||
const static int64_t SIZE_BUCKET_SCALE = 2;
|
||||
const static int64_t MAX_MEM_PERCENT = 40;
|
||||
ObExtendHashTable() : initial_bucket_num_(0), size_(0), buckets_(NULL), allocator_(NULL),
|
||||
sql_mem_processor_(nullptr)
|
||||
{}
|
||||
~ObExtendHashTable()
|
||||
{
|
||||
destroy();
|
||||
}
|
||||
|
||||
int init(ObIAllocator* allocator, lib::ObMemAttr& mem_attr,
|
||||
ObSqlMemMgrProcessor *sql_mem_processor, int64_t initial_size = INITIAL_SIZE);
|
||||
bool is_inited() const
|
||||
struct Bucket
|
||||
{
|
||||
uint64_t hash_;
|
||||
Item *item_;
|
||||
|
||||
// keep trivial constructor make ObSegmentArray use memset to construct arrays.
|
||||
Bucket() = default;
|
||||
TO_STRING_KV(K(hash_), KP(item_));
|
||||
};
|
||||
using BucketArray = common::ObSegmentArray<Bucket,
|
||||
OB_MALLOC_MIDDLE_BLOCK_SIZE,
|
||||
common::ModulePageAllocator>;
|
||||
|
||||
ObExtendHashTable()
|
||||
: initial_bucket_num_(0),
|
||||
size_(0),
|
||||
buckets_(NULL),
|
||||
allocator_("ExtendHTBucket")
|
||||
{
|
||||
return NULL != buckets_;
|
||||
}
|
||||
~ObExtendHashTable() { destroy(); }
|
||||
|
||||
int init(ObIAllocator *allocator, lib::ObMemAttr &mem_attr,
|
||||
int64_t initial_size = INITIAL_SIZE);
|
||||
bool is_inited() const { return NULL != buckets_; }
|
||||
// return the first item which equal to, NULL for none exist.
|
||||
const Item* get(const Item& item) const;
|
||||
const Item *get(const Item &item) const;
|
||||
// Link item to hash table, extend buckets if needed.
|
||||
// (Do not check item is exist or not)
|
||||
int set(Item& item);
|
||||
int64_t size() const
|
||||
{
|
||||
return size_;
|
||||
}
|
||||
int set(Item &item);
|
||||
int64_t size() const { return size_; }
|
||||
|
||||
void reuse()
|
||||
{
|
||||
@ -74,7 +87,7 @@ public:
|
||||
size_ = 0;
|
||||
}
|
||||
|
||||
int resize(ObIAllocator *allocator, int64_t bucket_num, ObSqlMemMgrProcessor *sql_mem_processor);
|
||||
int resize(ObIAllocator *allocator, int64_t bucket_num);
|
||||
|
||||
void destroy()
|
||||
{
|
||||
@ -86,7 +99,6 @@ public:
|
||||
allocator_.set_allocator(nullptr);
|
||||
size_ = 0;
|
||||
initial_bucket_num_ = 0;
|
||||
sql_mem_processor_ = nullptr;
|
||||
}
|
||||
int64_t mem_used() const
|
||||
{
|
||||
@ -98,7 +110,7 @@ public:
|
||||
return NULL == buckets_ ? 0 : buckets_->count();
|
||||
}
|
||||
template <typename CB>
|
||||
int foreach (CB& cb) const
|
||||
int foreach(CB &cb) const
|
||||
{
|
||||
int ret = common::OB_SUCCESS;
|
||||
if (OB_ISNULL(buckets_)) {
|
||||
@ -106,7 +118,7 @@ public:
|
||||
SQL_ENG_LOG(WARN, "invalid null buckets", K(ret), K(buckets_));
|
||||
}
|
||||
for (int64_t i = 0; OB_SUCC(ret) && i < get_bucket_num(); i++) {
|
||||
Item* item = buckets_->at(i);
|
||||
Item *item = buckets_->at(i).item_;
|
||||
while (NULL != item && OB_SUCC(ret)) {
|
||||
if (OB_FAIL(cb(*item))) {
|
||||
SQL_ENG_LOG(WARN, "call back failed", K(ret));
|
||||
@ -117,49 +129,38 @@ public:
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
protected:
|
||||
// Locate the bucket with the same hash value, or empty bucket if not found.
|
||||
// The returned empty bucket is the insert position for the %hash_val
|
||||
OB_INLINE const Bucket &locate_bucket(const BucketArray &buckets,
|
||||
const uint64_t hash_val) const
|
||||
{
|
||||
const int64_t cnt = buckets.count();
|
||||
int64_t pos = hash_val & (cnt - 1);
|
||||
const Bucket *bucket = &buckets.at(pos);
|
||||
// The extend logical make sure the bucket never full, loop count will always less than %cnt
|
||||
while (hash_val != bucket->hash_ && NULL != bucket->item_) {
|
||||
bucket = &buckets.at((++pos) & (cnt - 1));
|
||||
}
|
||||
return *bucket;
|
||||
}
|
||||
|
||||
protected:
|
||||
DISALLOW_COPY_AND_ASSIGN(ObExtendHashTable);
|
||||
int extend();
|
||||
int64_t estimate_bucket_num(
|
||||
const int64_t bucket_num,
|
||||
const int64_t max_hash_mem);
|
||||
|
||||
protected:
|
||||
lib::ObMemAttr mem_attr_;
|
||||
int64_t initial_bucket_num_;
|
||||
int64_t size_;
|
||||
using BucketArray = common::ObSegmentArray<Item*, OB_MALLOC_BIG_BLOCK_SIZE, common::ModulePageAllocator>;
|
||||
BucketArray* buckets_;
|
||||
BucketArray *buckets_;
|
||||
common::ModulePageAllocator allocator_;
|
||||
ObSqlMemMgrProcessor *sql_mem_processor_;
|
||||
};
|
||||
|
||||
template <typename Item>
|
||||
int64_t ObExtendHashTable<Item>::estimate_bucket_num(
|
||||
const int64_t bucket_num,
|
||||
const int64_t max_hash_mem)
|
||||
{
|
||||
int64_t max_bound_size = max_hash_mem * MAX_MEM_PERCENT / 100;
|
||||
int64_t est_bucket_num = common::next_pow2(bucket_num);
|
||||
int64_t est_size = est_bucket_num * sizeof(void*);
|
||||
while (est_size > max_bound_size) {
|
||||
est_bucket_num >>= 1;
|
||||
est_size = est_bucket_num * sizeof(void*);
|
||||
}
|
||||
if (est_bucket_num < INITIAL_SIZE) {
|
||||
est_bucket_num = INITIAL_SIZE;
|
||||
}
|
||||
return est_bucket_num;
|
||||
}
|
||||
|
||||
|
||||
template <typename Item>
|
||||
int ObExtendHashTable<Item>::init(
|
||||
ObIAllocator *allocator,
|
||||
lib::ObMemAttr &mem_attr,
|
||||
ObSqlMemMgrProcessor *sql_mem_processor,
|
||||
const int64_t initial_size /* INITIAL_SIZE */)
|
||||
ObIAllocator *allocator,
|
||||
lib::ObMemAttr &mem_attr,
|
||||
const int64_t initial_size /* INITIAL_SIZE */)
|
||||
{
|
||||
int ret = common::OB_SUCCESS;
|
||||
if (initial_size < 2) {
|
||||
@ -167,16 +168,16 @@ int ObExtendHashTable<Item>::init(
|
||||
SQL_ENG_LOG(WARN, "invalid argument", K(ret));
|
||||
} else {
|
||||
mem_attr_ = mem_attr;
|
||||
sql_mem_processor_ = sql_mem_processor;
|
||||
allocator_.set_allocator(allocator);
|
||||
allocator_.set_label(mem_attr.label_);
|
||||
void* buckets_buf = NULL;
|
||||
void *buckets_buf = NULL;
|
||||
if (OB_ISNULL(buckets_buf = allocator_.alloc(sizeof(BucketArray), mem_attr))) {
|
||||
ret = OB_ALLOCATE_MEMORY_FAILED;
|
||||
SQL_ENG_LOG(WARN, "failed to allocate memory", K(ret));
|
||||
} else {
|
||||
buckets_ = new (buckets_buf) BucketArray(allocator_);
|
||||
buckets_ = new(buckets_buf)BucketArray(allocator_);
|
||||
initial_bucket_num_ = common::next_pow2(initial_size * SIZE_BUCKET_SCALE);
|
||||
SQL_ENG_LOG(DEBUG, "debug bucket num", K(ret), K(buckets_->count()), K(initial_bucket_num_));
|
||||
size_ = 0;
|
||||
}
|
||||
if (OB_FAIL(ret)) {
|
||||
@ -189,13 +190,12 @@ int ObExtendHashTable<Item>::init(
|
||||
}
|
||||
|
||||
template <typename Item>
|
||||
int ObExtendHashTable<Item>::resize(ObIAllocator* allocator, int64_t bucket_num,
|
||||
ObSqlMemMgrProcessor *sql_mem_processor)
|
||||
int ObExtendHashTable<Item>::resize(ObIAllocator *allocator, int64_t bucket_num)
|
||||
{
|
||||
int ret = OB_SUCCESS;
|
||||
if (bucket_num < get_bucket_num() / 2) {
|
||||
destroy();
|
||||
if (OB_FAIL(init(allocator, mem_attr_, sql_mem_processor, bucket_num))) {
|
||||
if (OB_FAIL(init(allocator, mem_attr_, bucket_num))) {
|
||||
SQL_ENG_LOG(WARN, "failed to reuse with bucket", K(bucket_num), K(ret));
|
||||
}
|
||||
} else {
|
||||
@ -205,29 +205,29 @@ int ObExtendHashTable<Item>::resize(ObIAllocator* allocator, int64_t bucket_num,
|
||||
}
|
||||
|
||||
template <typename Item>
|
||||
const Item* ObExtendHashTable<Item>::get(const Item& item) const
|
||||
const Item *ObExtendHashTable<Item>::get(const Item &item) const
|
||||
{
|
||||
Item* res = NULL;
|
||||
Item *res = NULL;
|
||||
if (NULL == buckets_) {
|
||||
// do nothing
|
||||
} else {
|
||||
common::hash::hash_func<Item> hf;
|
||||
common::hash::equal_to<Item> eqf;
|
||||
const uint64_t hash_val = hf(item);
|
||||
Item* bucket = buckets_->at(hash_val & (get_bucket_num() - 1));
|
||||
while (NULL != bucket) {
|
||||
if (hash_val == hf(*bucket) && eqf(*bucket, item)) {
|
||||
res = bucket;
|
||||
Item *it = locate_bucket(*buckets_, hash_val).item_;
|
||||
while (NULL != it) {
|
||||
if (eqf(*it, item)) {
|
||||
res = it;
|
||||
break;
|
||||
}
|
||||
bucket = bucket->next();
|
||||
it = it->next();
|
||||
}
|
||||
}
|
||||
return res;
|
||||
}
|
||||
|
||||
template <typename Item>
|
||||
int ObExtendHashTable<Item>::set(Item& item)
|
||||
int ObExtendHashTable<Item>::set(Item &item)
|
||||
{
|
||||
common::hash::hash_func<Item> hf;
|
||||
int ret = common::OB_SUCCESS;
|
||||
@ -242,9 +242,14 @@ int ObExtendHashTable<Item>::set(Item& item)
|
||||
ret = OB_INVALID_ARGUMENT;
|
||||
SQL_ENG_LOG(WARN, "invalid argument", K(ret), K(buckets_));
|
||||
} else {
|
||||
Item*& bucket = buckets_->at(hf(item) & (get_bucket_num() - 1));
|
||||
item.next() = bucket;
|
||||
bucket = &item;
|
||||
uint64_t hash_val = hf(item);
|
||||
Bucket *bucket = const_cast<Bucket *>(&locate_bucket(*buckets_, hash_val));
|
||||
if (NULL == bucket->item_) {
|
||||
bucket->hash_ = hash_val;
|
||||
} else {
|
||||
item.next() = bucket->item_;
|
||||
}
|
||||
bucket->item_ = &item;
|
||||
size_ += 1;
|
||||
}
|
||||
return ret;
|
||||
@ -259,16 +264,17 @@ int ObExtendHashTable<Item>::extend()
|
||||
int64_t new_bucket_num = 0 == pre_bucket_num ?
|
||||
(0 == initial_bucket_num_ ? INITIAL_SIZE : initial_bucket_num_)
|
||||
: pre_bucket_num * 2;
|
||||
new_bucket_num = estimate_bucket_num(new_bucket_num, sql_mem_processor_->get_mem_bound());
|
||||
SQL_ENG_LOG(DEBUG, "extend hash table", K(ret), K(new_bucket_num), K(initial_bucket_num_),
|
||||
K(pre_bucket_num));
|
||||
if (new_bucket_num <= pre_bucket_num) {
|
||||
} else {
|
||||
BucketArray* new_buckets = NULL;
|
||||
void* buckets_buf = NULL;
|
||||
BucketArray *new_buckets = NULL;
|
||||
void *buckets_buf = NULL;
|
||||
if (OB_ISNULL(buckets_buf = allocator_.alloc(sizeof(BucketArray), mem_attr_))) {
|
||||
ret = OB_ALLOCATE_MEMORY_FAILED;
|
||||
SQL_ENG_LOG(WARN, "failed to allocate memory", K(ret));
|
||||
} else {
|
||||
new_buckets = new (buckets_buf) BucketArray(allocator_);
|
||||
new_buckets = new(buckets_buf)BucketArray(allocator_);
|
||||
}
|
||||
if (OB_FAIL(ret)) {
|
||||
// do nothing
|
||||
@ -278,14 +284,11 @@ int ObExtendHashTable<Item>::extend()
|
||||
} else if (OB_FAIL(new_buckets->init(new_bucket_num))) {
|
||||
SQL_ENG_LOG(WARN, "resize bucket array failed", K(ret), K(new_bucket_num));
|
||||
} else {
|
||||
for (int64_t i = 0; i < get_bucket_num(); i++) {
|
||||
Item* bucket = buckets_->at(i);
|
||||
while (bucket != NULL) {
|
||||
Item* item = bucket;
|
||||
bucket = bucket->next();
|
||||
Item*& new_bucket = new_buckets->at(hf(*item) & (new_bucket_num - 1));
|
||||
item->next() = new_bucket;
|
||||
new_bucket = item;
|
||||
const int64_t size = get_bucket_num();
|
||||
for (int64_t i = 0; i < size; i++) {
|
||||
const Bucket &old = buckets_->at(i);
|
||||
if (NULL != old.item_) {
|
||||
const_cast<Bucket &>(locate_bucket(*new_buckets, old.hash_)) = old;
|
||||
}
|
||||
}
|
||||
buckets_->destroy();
|
||||
@ -306,19 +309,33 @@ int ObExtendHashTable<Item>::extend()
|
||||
return ret;
|
||||
}
|
||||
|
||||
// Used for calc hash for columns
|
||||
class ObHashCols {
|
||||
|
||||
//Used for calc hash for columns
|
||||
class ObHashCols
|
||||
{
|
||||
public:
|
||||
ObHashCols() : row_(NULL), stored_row_(NULL), hash_col_idx_(NULL), next_(NULL), hash_val_(0)
|
||||
{}
|
||||
ObHashCols(const common::ObNewRow* row, const common::ObIArray<common::ObColumnInfo>* hash_col_idx)
|
||||
: row_(row), stored_row_(NULL), hash_col_idx_(hash_col_idx), next_(NULL), hash_val_(0)
|
||||
{}
|
||||
ObHashCols()
|
||||
: row_(NULL),
|
||||
stored_row_(NULL),
|
||||
hash_col_idx_(NULL),
|
||||
next_(NULL),
|
||||
hash_val_(0)
|
||||
{
|
||||
}
|
||||
ObHashCols(const common::ObNewRow *row, const common::ObIArray<common::ObColumnInfo> *hash_col_idx)
|
||||
: row_(row),
|
||||
stored_row_(NULL),
|
||||
hash_col_idx_(hash_col_idx),
|
||||
next_(NULL),
|
||||
hash_val_(0)
|
||||
{
|
||||
}
|
||||
|
||||
~ObHashCols()
|
||||
{}
|
||||
|
||||
int init(const common::ObNewRow* row, const common::ObIArray<common::ObColumnInfo>* hash_col_idx,
|
||||
int init(const common::ObNewRow *row,
|
||||
const common::ObIArray<common::ObColumnInfo> *hash_col_idx,
|
||||
const uint64_t hash_val = 0)
|
||||
{
|
||||
row_ = row;
|
||||
@ -338,44 +355,43 @@ public:
|
||||
|
||||
uint64_t inner_hash() const;
|
||||
|
||||
bool operator==(const ObHashCols& other) const;
|
||||
bool operator ==(const ObHashCols &other) const;
|
||||
|
||||
void set_stored_row(const common::ObRowStore::StoredRow* stored_row);
|
||||
void set_stored_row(const common::ObRowStore::StoredRow *stored_row);
|
||||
|
||||
ObHashCols*& next()
|
||||
{
|
||||
return *reinterpret_cast<ObHashCols**>(&next_);
|
||||
};
|
||||
ObHashCols *&next() { return *reinterpret_cast<ObHashCols **>(&next_); };
|
||||
|
||||
TO_STRING_KV(K_(row), K_(stored_row), K_(hash_col_idx), K_(next), K_(hash_val));
|
||||
|
||||
public:
|
||||
const common::ObNewRow* row_;
|
||||
const common::ObRowStore::StoredRow* stored_row_;
|
||||
const common::ObIArray<common::ObColumnInfo>* hash_col_idx_;
|
||||
void* next_;
|
||||
const common::ObNewRow *row_;
|
||||
const common::ObRowStore::StoredRow *stored_row_;
|
||||
const common::ObIArray<common::ObColumnInfo> *hash_col_idx_;
|
||||
void *next_;
|
||||
mutable uint64_t hash_val_;
|
||||
};
|
||||
|
||||
class ObGbyHashCols : public ObHashCols {
|
||||
class ObGbyHashCols : public ObHashCols
|
||||
{
|
||||
public:
|
||||
using ObHashCols::ObHashCols;
|
||||
ObGbyHashCols*& next()
|
||||
{
|
||||
return *reinterpret_cast<ObGbyHashCols**>(&next_);
|
||||
};
|
||||
|
||||
ObGbyHashCols *&next() { return *reinterpret_cast<ObGbyHashCols **>(&next_); };
|
||||
public:
|
||||
int64_t group_id_ = 0;
|
||||
};
|
||||
|
||||
// This class not inherit ObPhyOperatorCtx, as multi-inheritance
|
||||
// Used for build hash group row.
|
||||
//This class not inherit ObPhyOperatorCtx, as multi-inheritance
|
||||
//Used for build hash group row.
|
||||
template <typename Item>
|
||||
class ObHashCtx {
|
||||
class ObHashCtx
|
||||
{
|
||||
public:
|
||||
explicit ObHashCtx() : group_rows_(), started_(false), bkt_created_(false)
|
||||
{}
|
||||
explicit ObHashCtx()
|
||||
: group_rows_(),
|
||||
started_(false),
|
||||
bkt_created_(false)
|
||||
{
|
||||
}
|
||||
virtual ~ObHashCtx()
|
||||
{
|
||||
group_rows_.destroy();
|
||||
@ -388,10 +404,8 @@ public:
|
||||
{
|
||||
return group_rows_.mem_used();
|
||||
}
|
||||
|
||||
private:
|
||||
DISALLOW_COPY_AND_ASSIGN(ObHashCtx);
|
||||
|
||||
protected:
|
||||
ObExtendHashTable<Item> group_rows_;
|
||||
bool started_;
|
||||
@ -409,10 +423,13 @@ protected:
|
||||
// bswap is needed here because the low bit of hight 32 bit may be used for partition, they
|
||||
// are the same in one partition.
|
||||
//
|
||||
class ObGbyBloomFilter {
|
||||
class ObGbyBloomFilter
|
||||
{
|
||||
public:
|
||||
explicit ObGbyBloomFilter(const ModulePageAllocator& alloc) : bits_(alloc), cnt_(0), h2_shift_(0)
|
||||
{}
|
||||
explicit ObGbyBloomFilter(const ModulePageAllocator &alloc)
|
||||
: bits_(alloc), cnt_(0), h2_shift_(0)
|
||||
{
|
||||
}
|
||||
|
||||
int init(const int64_t size, int64_t ratio = 8)
|
||||
{
|
||||
@ -442,7 +459,6 @@ public:
|
||||
cnt_ = 0;
|
||||
h2_shift_ = 0;
|
||||
}
|
||||
|
||||
private:
|
||||
inline uint64_t h1(const uint64_t hash_val)
|
||||
{
|
||||
@ -457,7 +473,8 @@ private:
|
||||
const static int64_t idx = 0;
|
||||
#endif
|
||||
uint64_t v = hash_val;
|
||||
reinterpret_cast<uint32_t*>(&v)[idx] = __builtin_bswap32(reinterpret_cast<const uint32_t*>(&hash_val)[idx]);
|
||||
reinterpret_cast<uint32_t *>(&v)[idx]
|
||||
= __builtin_bswap32(reinterpret_cast<const uint32_t *>(&hash_val)[idx]);
|
||||
return v >> h2_shift_;
|
||||
}
|
||||
|
||||
@ -468,8 +485,8 @@ public:
|
||||
if (0 == cnt_) {
|
||||
ret = OB_ERR_UNEXPECTED;
|
||||
SQL_ENG_LOG(WARN, "invalied cnt", K(ret), K(cnt_));
|
||||
} else if (OB_FAIL(bits_.add_member(h1(hash_val) & (cnt_ - 1))) ||
|
||||
OB_FAIL(bits_.add_member(h2(hash_val) & (cnt_ - 1)))) {
|
||||
} else if (OB_FAIL(bits_.add_member(h1(hash_val) & (cnt_ - 1)))
|
||||
|| OB_FAIL(bits_.add_member(h2(hash_val) & (cnt_ - 1)))) {
|
||||
SQL_ENG_LOG(WARN, "bit set add member failed", K(ret), K(cnt_));
|
||||
}
|
||||
return ret;
|
||||
@ -477,16 +494,17 @@ public:
|
||||
|
||||
bool exist(const uint64_t hash_val)
|
||||
{
|
||||
return bits_.has_member(h1(hash_val) & (cnt_ - 1)) && bits_.has_member(h2(hash_val) & (cnt_ - 1));
|
||||
return bits_.has_member(h1(hash_val) & (cnt_ - 1))
|
||||
&& bits_.has_member(h2(hash_val) & (cnt_ - 1));
|
||||
}
|
||||
|
||||
private:
|
||||
ObSegmentBitSet<common::OB_MALLOC_BIG_BLOCK_SIZE> bits_;
|
||||
int64_t cnt_; // power of 2
|
||||
ObSegmentBitSet<common::OB_MALLOC_MIDDLE_BLOCK_SIZE> bits_;
|
||||
int64_t cnt_; // power of 2
|
||||
int64_t h2_shift_;
|
||||
};
|
||||
|
||||
} // namespace sql
|
||||
} // namespace oceanbase
|
||||
}//ns sql
|
||||
}//ns oceanbase
|
||||
|
||||
#endif
|
||||
|
||||
Reference in New Issue
Block a user