|
|
|
@ -25,8 +25,9 @@ using namespace common;
|
|
|
|
|
|
|
|
|
|
ObEncodingHashTable::ObEncodingHashTable() : is_created_(false), bucket_num_(0),
|
|
|
|
|
node_num_(0), list_num_(0), node_cnt_(0), list_cnt_(0), buckets_(NULL), nodes_(NULL),
|
|
|
|
|
lists_(NULL), alloc_(blocksstable::OB_ENCODING_LABEL_HASH_TABLE, OB_MALLOC_NORMAL_BLOCK_SIZE, MTL_ID())
|
|
|
|
|
{
|
|
|
|
|
lists_(NULL), skip_bit_(NULL), hash_val_(NULL),
|
|
|
|
|
alloc_(blocksstable::OB_ENCODING_LABEL_HASH_TABLE, OB_MALLOC_NORMAL_BLOCK_SIZE, MTL_ID())
|
|
|
|
|
{
|
|
|
|
|
MEMSET(&null_nodes_, 0, sizeof(null_nodes_));
|
|
|
|
|
MEMSET(&nope_nodes_, 0, sizeof(nope_nodes_));
|
|
|
|
|
}
|
|
|
|
@ -58,6 +59,7 @@ int ObEncodingHashTable::create(const int64_t bucket_num, const int64_t node_num
|
|
|
|
|
const int64_t bucket_size = bucket_num_ * static_cast<int64_t>(sizeof(HashBucket));
|
|
|
|
|
const int64_t nodes_size = node_num_ * static_cast<int64_t>(sizeof(HashNode));
|
|
|
|
|
const int64_t lists_size = list_num_ * static_cast<int64_t>(sizeof(NodeList));
|
|
|
|
|
const int64_t vec_size = sql::ObBitVector::memory_size(node_num_);
|
|
|
|
|
|
|
|
|
|
if (OB_ISNULL(buckets_ = reinterpret_cast<HashBucket *>(alloc_.alloc(bucket_size)))) {
|
|
|
|
|
ret = OB_ALLOCATE_MEMORY_FAILED;
|
|
|
|
@ -68,10 +70,18 @@ int ObEncodingHashTable::create(const int64_t bucket_num, const int64_t node_num
|
|
|
|
|
} else if (OB_ISNULL(nodes_ = reinterpret_cast<HashNode *>(alloc_.alloc(nodes_size)))) {
|
|
|
|
|
ret = OB_ALLOCATE_MEMORY_FAILED;
|
|
|
|
|
LOG_WARN("failed to alloc memory for nodes", K(ret), K(nodes_size));
|
|
|
|
|
} else if (OB_ISNULL(skip_bit_ = sql::to_bit_vector((char *)alloc_.alloc(vec_size)))) {
|
|
|
|
|
ret = OB_ALLOCATE_MEMORY_FAILED;
|
|
|
|
|
LOG_WARN("failed to alloc memory for skip bit", K(ret), K(vec_size));
|
|
|
|
|
} else if (OB_ISNULL(hash_val_ = reinterpret_cast<uint64_t *>(alloc_.alloc(node_num_ * sizeof(uint64_t))))) {
|
|
|
|
|
ret = OB_ALLOCATE_MEMORY_FAILED;
|
|
|
|
|
LOG_WARN("failed to alloc memory for hash val", K(ret), K_(node_num));
|
|
|
|
|
} else {
|
|
|
|
|
MEMSET(buckets_, 0, bucket_size);
|
|
|
|
|
MEMSET(lists_, 0, lists_size);
|
|
|
|
|
MEMSET(nodes_, 0, nodes_size);
|
|
|
|
|
MEMSET(hash_val_, 0, node_num_ * sizeof(uint64_t));
|
|
|
|
|
skip_bit_->init(node_num_);
|
|
|
|
|
is_created_ = true;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
@ -88,6 +98,8 @@ void ObEncodingHashTable::reset()
|
|
|
|
|
buckets_ = NULL;
|
|
|
|
|
nodes_ = NULL;
|
|
|
|
|
lists_ = NULL;
|
|
|
|
|
skip_bit_ = NULL;
|
|
|
|
|
hash_val_ = NULL;
|
|
|
|
|
MEMSET(&null_nodes_, 0, sizeof(null_nodes_));
|
|
|
|
|
MEMSET(&nope_nodes_, 0, sizeof(nope_nodes_));
|
|
|
|
|
is_created_ = false;
|
|
|
|
@ -100,10 +112,39 @@ void ObEncodingHashTable::reuse()
|
|
|
|
|
// nodes no need to reuse
|
|
|
|
|
MEMSET(&null_nodes_, 0, sizeof(null_nodes_));
|
|
|
|
|
MEMSET(&nope_nodes_, 0, sizeof(nope_nodes_));
|
|
|
|
|
MEMSET(hash_val_, 0, node_num_ * sizeof(uint64_t));
|
|
|
|
|
skip_bit_->init(node_num_);
|
|
|
|
|
|
|
|
|
|
node_cnt_ = 0;
|
|
|
|
|
list_cnt_ = 0;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
int ObEncodingHashTableBuilder::add_to_table(const ObDatum &datum, const int64_t pos, const int64_t row_idx)
|
|
|
|
|
{
|
|
|
|
|
int ret = OB_SUCCESS;
|
|
|
|
|
NodeList *list = buckets_[pos];
|
|
|
|
|
while (OB_SUCC(ret) && nullptr != list) {
|
|
|
|
|
bool is_equal = false;
|
|
|
|
|
if (OB_FAIL(equal(*list->header_->datum_, datum, is_equal))) {
|
|
|
|
|
LOG_WARN("check datum equality failed", K(ret), K(datum), KPC(list->header_->datum_));
|
|
|
|
|
} else if (is_equal) {
|
|
|
|
|
add_to_list(*list, nodes_[row_idx], datum, node_cnt_);
|
|
|
|
|
break;
|
|
|
|
|
} else {
|
|
|
|
|
list = list->next_;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
if (OB_SUCC(ret) && nullptr == list) {
|
|
|
|
|
list = &lists_[list_cnt_];
|
|
|
|
|
list->next_ = buckets_[pos];
|
|
|
|
|
buckets_[pos] = list;
|
|
|
|
|
list->insert_ref_ = list_cnt_++;
|
|
|
|
|
add_to_list(*list, nodes_[row_idx], datum, node_cnt_);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return ret;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
int ObEncodingHashTableBuilder::build(const ObColDatums &col_datums, const ObColDesc &col_desc)
|
|
|
|
|
{
|
|
|
|
|
int ret = common::OB_SUCCESS;
|
|
|
|
@ -118,6 +159,7 @@ int ObEncodingHashTableBuilder::build(const ObColDatums &col_datums, const ObCol
|
|
|
|
|
ObObjTypeStoreClass store_class = get_store_class_map()[col_desc.col_type_.get_type_class()];
|
|
|
|
|
const bool need_binary_hash =
|
|
|
|
|
(store_class == ObTextSC || store_class == ObJsonSC || store_class == ObLobSC || store_class == ObGeometrySC || store_class == ObRoaringBitmapSC);
|
|
|
|
|
const bool need_batch_hash = !need_binary_hash;
|
|
|
|
|
bool has_lob_header = col_desc.col_type_.is_lob_storage();
|
|
|
|
|
ObPrecision precision = PRECISION_UNKNOWN_YET;
|
|
|
|
|
if (col_desc.col_type_.is_decimal_int()) {
|
|
|
|
@ -129,52 +171,71 @@ int ObEncodingHashTableBuilder::build(const ObColDatums &col_datums, const ObCol
|
|
|
|
|
col_desc.col_type_.get_type(), col_desc.col_type_.get_collation_type(),
|
|
|
|
|
col_desc.col_type_.get_scale(), lib::is_oracle_mode(), has_lob_header, precision);
|
|
|
|
|
ObHashFunc hash_func;
|
|
|
|
|
hash_func.hash_func_ = basic_funcs->murmur_hash_;
|
|
|
|
|
hash_func.hash_func_ = basic_funcs->murmur_hash_v2_;
|
|
|
|
|
hash_func.batch_hash_func_ = basic_funcs->murmur_hash_v2_batch_;
|
|
|
|
|
|
|
|
|
|
const uint64_t mask = (bucket_num_ - 1);
|
|
|
|
|
for (int64_t row_id = 0;
|
|
|
|
|
OB_SUCC(ret) && row_id < col_datums.count() && list_cnt_ < list_num_;
|
|
|
|
|
++row_id) {
|
|
|
|
|
int64_t dimension_size = col_datums.get_dimension_size();
|
|
|
|
|
int64_t datum_arr_cnt = col_datums.get_continuous_array_count();
|
|
|
|
|
int64_t datum_array_size = 0;
|
|
|
|
|
ObDatum *datum_arry = nullptr;
|
|
|
|
|
for (int64_t i = 0; OB_SUCC(ret) && i < datum_arr_cnt; i++) {
|
|
|
|
|
col_datums.get_continuous_array(i, datum_arry, datum_array_size);
|
|
|
|
|
if (OB_ISNULL(datum_arry)) {
|
|
|
|
|
ret = OB_ERR_UNEXPECTED;
|
|
|
|
|
STORAGE_LOG(WARN, "unexpected null datum array", K(ret), K(i), K(datum_arr_cnt));
|
|
|
|
|
} else {
|
|
|
|
|
skip_bit_->init(datum_array_size);
|
|
|
|
|
for (int64_t idx = 0; OB_SUCC(ret) && idx < datum_array_size && list_cnt_ < list_num_; ++idx) {
|
|
|
|
|
int64_t row_id = i * dimension_size + idx;
|
|
|
|
|
const ObDatum &datum = col_datums.at(row_id);
|
|
|
|
|
if (datum.is_null()) {
|
|
|
|
|
add_to_list(null_nodes_, nodes_[row_id], datum);
|
|
|
|
|
skip_bit_->set(idx);
|
|
|
|
|
add_to_list(null_nodes_, nodes_[row_id], datum, node_cnt_);
|
|
|
|
|
} else if (datum.is_nop()) {
|
|
|
|
|
add_to_list(nope_nodes_, nodes_[row_id], datum);
|
|
|
|
|
skip_bit_->set(idx);
|
|
|
|
|
add_to_list(nope_nodes_, nodes_[row_id], datum, node_cnt_);
|
|
|
|
|
} else if (datum.is_ext()) {
|
|
|
|
|
ret = common::OB_NOT_SUPPORTED;
|
|
|
|
|
STORAGE_LOG(WARN, "not supported extend object type",
|
|
|
|
|
K(ret), K(row_id), K(datum), K(*datum.extend_obj_));
|
|
|
|
|
} else {
|
|
|
|
|
} else if (!need_batch_hash) {
|
|
|
|
|
uint64_t pos = 0;
|
|
|
|
|
if (OB_FAIL(hash(datum, hash_func, need_binary_hash, pos))) {
|
|
|
|
|
STORAGE_LOG(WARN, "hash failed", K(ret));
|
|
|
|
|
} else {
|
|
|
|
|
pos = pos & mask;
|
|
|
|
|
}
|
|
|
|
|
NodeList *list = buckets_[pos];
|
|
|
|
|
while (OB_SUCC(ret) && nullptr != list) {
|
|
|
|
|
bool is_equal = false;
|
|
|
|
|
if (OB_FAIL(equal(*list->header_->datum_, datum, is_equal))) {
|
|
|
|
|
LOG_WARN("check datum equality failed", K(ret), K(datum), KPC(list->header_->datum_), K(col_desc));
|
|
|
|
|
} else if (is_equal) {
|
|
|
|
|
add_to_list(*list, nodes_[row_id], datum);
|
|
|
|
|
break;
|
|
|
|
|
} else {
|
|
|
|
|
list = list->next_;
|
|
|
|
|
if (OB_FAIL(add_to_table(datum, pos, row_id))) {
|
|
|
|
|
STORAGE_LOG(WARN, "fail to add to table", K(ret), K(row_id));
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
if (OB_SUCC(ret) && nullptr == list) {
|
|
|
|
|
list = &lists_[list_cnt_];
|
|
|
|
|
list->next_ = buckets_[pos];
|
|
|
|
|
buckets_[pos] = list;
|
|
|
|
|
list->insert_ref_ = list_cnt_++;
|
|
|
|
|
|
|
|
|
|
add_to_list(*list, nodes_[row_id], datum);
|
|
|
|
|
if (OB_SUCC(ret) && need_batch_hash && !skip_bit_->is_all_true(datum_array_size)) {
|
|
|
|
|
const uint64_t seed = 0;
|
|
|
|
|
MEMSET(hash_val_, 0, datum_array_size * sizeof(int64_t));
|
|
|
|
|
hash_func.batch_hash_func_(
|
|
|
|
|
hash_val_,
|
|
|
|
|
datum_arry,
|
|
|
|
|
true,
|
|
|
|
|
*skip_bit_,
|
|
|
|
|
datum_array_size,
|
|
|
|
|
&seed,
|
|
|
|
|
false);
|
|
|
|
|
for (int64_t idx = 0; OB_SUCC(ret) && idx < datum_array_size && list_cnt_ < list_num_; ++idx) {
|
|
|
|
|
if (!skip_bit_->at(idx)) {
|
|
|
|
|
int64_t row_id = i * dimension_size + idx;
|
|
|
|
|
uint64_t pos = hash_val_[idx] & mask;
|
|
|
|
|
if (OB_FAIL(add_to_table(col_datums.at(row_id), pos, row_id))) {
|
|
|
|
|
STORAGE_LOG(WARN, "fail to add to table", K(ret), K(row_id), K(pos));
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
if (OB_SUCC(ret)) {
|
|
|
|
|
node_cnt_++;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (OB_SUCC(ret)) {
|
|
|
|
|
// update dict reference id of null and nope node.
|
|
|
|
|
for (HashNode *n = null_nodes_.header_; NULL != n; n = n->next_) {
|
|
|
|
@ -188,13 +249,14 @@ int ObEncodingHashTableBuilder::build(const ObColDatums &col_datums, const ObCol
|
|
|
|
|
return ret;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
void ObEncodingHashTableBuilder::add_to_list(NodeList &list, HashNode &node, const ObDatum &datum)
|
|
|
|
|
void ObEncodingHashTableBuilder::add_to_list(NodeList &list, HashNode &node, const ObDatum &datum, int64_t &node_cnt)
|
|
|
|
|
{
|
|
|
|
|
node.dict_ref_ = list.insert_ref_;
|
|
|
|
|
node.datum_ = &datum;
|
|
|
|
|
node.next_ = list.header_;
|
|
|
|
|
list.header_ = &node;
|
|
|
|
|
++list.size_;
|
|
|
|
|
++node_cnt;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
int ObEncodingHashTableBuilder::equal(
|
|
|
|
|