[SKIP INDEX] remove skip index storing prefixes

This commit is contained in:
Hooper9973
2023-11-06 03:10:25 +00:00
committed by ob-robot
parent 1b5c21ddec
commit 7846915626
5 changed files with 57 additions and 77 deletions

View File

@ -24,15 +24,15 @@ namespace blocksstable
int ObIColAggregator::copy_agg_datum(const ObDatum &src, ObDatum &dst)
{
int ret = OB_SUCCESS;
if (OB_UNLIKELY(src.is_outrow()) || OB_ISNULL(dst.ptr_)) {
if (OB_UNLIKELY(src.is_outrow())|| OB_ISNULL(dst.ptr_) ||
OB_UNLIKELY(!src.is_null() && src.len_ > ObSkipIndexColMeta::MAX_SKIP_INDEX_COL_LENGTH) ) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("Unexpected agg datum for copy", K(ret), K(src), K(dst));
} else if (src.is_null()) {
dst.set_null();
} else {
const int64_t copy_len = MIN(ObSkipIndexColMeta::MAX_SKIP_INDEX_COL_LENGTH, src.len_);
dst.pack_ = copy_len;
MEMCPY(const_cast<char *>(dst.ptr_), src.ptr_, copy_len);
dst.pack_ = src.len_;
MEMCPY(const_cast<char *>(dst.ptr_), src.ptr_, src.len_);
}
return ret;
}
@ -130,8 +130,7 @@ int ObColMaxAggregator::eval(const ObStorageDatum &datum, const bool is_data)
LOG_WARN("Not init", K(ret));
} else if (!can_aggregate_ || datum.is_nop()) {
// Skip
} else if (is_lob_storage(obj_type_) && !datum.is_null() && !datum.get_lob_data().in_row_){
// contain out row column, can not keep aggregate
} else if (need_set_not_aggregate(obj_type_, datum)){
set_not_aggregate();
} else {
int cmp_res = 0;
@ -202,8 +201,7 @@ int ObColMinAggregator::eval(const ObStorageDatum &datum, const bool is_data)
LOG_WARN("Not init", K(ret));
} else if (!can_aggregate_ || datum.is_nop()) {
// Skip
} else if (is_lob_storage(obj_type_) && !datum.is_null() && !datum.get_lob_data().in_row_){
// contain out row column, can not keep aggregate
} else if (need_set_not_aggregate(obj_type_, datum)){
set_not_aggregate();
} else {
int cmp_res = 0;

View File

@ -37,6 +37,12 @@ public:
void set_not_aggregate() { can_aggregate_ = false; }
protected:
static int copy_agg_datum(const ObDatum &src, ObDatum &dst);
static bool need_set_not_aggregate(const ObObjType type, const ObDatum &datum)
{
// exceeds maximum length or contain out row column, can not keep aggregate
return (!datum.is_null() && datum.len_ > ObSkipIndexColMeta::MAX_SKIP_INDEX_COL_LENGTH) ||
(is_lob_storage(type) && !datum.is_null() && !datum.get_lob_data().in_row_);
}
protected:
bool can_aggregate_;
};

View File

@ -1,12 +1,14 @@
// Copyright (c) 2022 Ant Group CO., Ltd.
// OceanBase is licensed under Mulan PubL v1.
// You can use this software according to the terms and conditions of the Mulan
// PubL v1. You may obtain a copy of Mulan PubL v1 at:
// http://license.coscl.org.cn/MulanPubL-1.0
// THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY
// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO
// NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. See the
// Mulan PubL v1 for more details.
/**
* Copyright (c) 2022 OceanBase
* OceanBase is licensed under Mulan PubL v2.
* You can use this software according to the terms and conditions of the Mulan PubL v2.
* You may obtain a copy of Mulan PubL v2 at:
* http://license.coscl.org.cn/MulanPubL-2.0
* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND,
* EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
* MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
* See the Mulan PubL v2 for more details.
*/
#define USING_LOG_PREFIX STORAGE
#include "storage/blocksstable/index_block/ob_skip_index_filter_executor.h"
@ -250,10 +252,7 @@ int ObSkipIndexFilterExecutor::eq_operator(const sql::ObWhiteFilterExecutor &fil
ret = OB_INVALID_ARGUMENT;
LOG_WARN("Invalid argument for falsifiable EQ operator", K(ret), K(filter));
} else {
// falsifable calculation, if prefix, do not verify equal
const ObDatum &ref_datum = datums.at(0);
bool min_prefix = min_datum.len_ == ObSkipIndexColMeta::MAX_SKIP_INDEX_COL_LENGTH;
bool max_prefix = max_datum.len_ == ObSkipIndexColMeta::MAX_SKIP_INDEX_COL_LENGTH;
ObDatumCmpFuncType cmp_func = filter.cmp_func_;
int min_cmp_res = 0;
int max_cmp_res = 0;
@ -263,8 +262,7 @@ int ObSkipIndexFilterExecutor::eq_operator(const sql::ObWhiteFilterExecutor &fil
LOG_WARN("Failed to compare datum", K(ret), K(max_datum), K(ref_datum));
} else if (min_cmp_res > 0 || max_cmp_res < 0) {
fal_desc.set_always_false();
} else if (!max_prefix && !min_prefix &&
min_cmp_res == 0 && max_cmp_res ==0) {
} else if (min_cmp_res == 0 && max_cmp_res == 0) {
fal_desc.set_always_true();
} else {
fal_desc.set_uncertain();
@ -284,10 +282,7 @@ int ObSkipIndexFilterExecutor::ne_operator(const sql::ObWhiteFilterExecutor &fil
ret = OB_INVALID_ARGUMENT;
LOG_WARN("Invalid argument for falsifiable NE operator", K(ret), K(filter));
} else {
// falsifable calculation, if prefix, do not verify equal
const ObDatum &ref_datum = datums.at(0);
bool min_prefix = min_datum.len_ == ObSkipIndexColMeta::MAX_SKIP_INDEX_COL_LENGTH;
bool max_prefix = max_datum.len_ == ObSkipIndexColMeta::MAX_SKIP_INDEX_COL_LENGTH;
ObDatumCmpFuncType cmp_func = filter.cmp_func_;
int min_cmp_res = 0;
int max_cmp_res = 0;
@ -295,8 +290,7 @@ int ObSkipIndexFilterExecutor::ne_operator(const sql::ObWhiteFilterExecutor &fil
LOG_WARN("Failed to compare datum", K(ret), K(min_datum), K(ref_datum));
} else if (OB_FAIL(cmp_func(max_datum, ref_datum, max_cmp_res))) {
LOG_WARN("Failed to compare datum", K(ret), K(max_datum), K(ref_datum));
} else if (!max_prefix && !min_prefix &&
min_cmp_res == 0 && max_cmp_res ==0) {
} else if (min_cmp_res == 0 && max_cmp_res == 0) {
fal_desc.set_always_false();
} else if (min_cmp_res > 0 || max_cmp_res < 0) {
fal_desc.set_always_true();
@ -318,9 +312,7 @@ int ObSkipIndexFilterExecutor::gt_operator(const sql::ObWhiteFilterExecutor &fil
ret = OB_INVALID_ARGUMENT;
LOG_WARN("Invalid argument for falsifiable GT operator", K(ret), K(filter));
} else {
// falsifable calculation, if prefix, do not verify equal
const ObDatum &ref_datum = datums.at(0);
bool max_prefix = max_datum.len_ == ObSkipIndexColMeta::MAX_SKIP_INDEX_COL_LENGTH;
ObDatumCmpFuncType cmp_func = filter.cmp_func_;
int min_cmp_res = 0;
int max_cmp_res = 0;
@ -328,8 +320,7 @@ int ObSkipIndexFilterExecutor::gt_operator(const sql::ObWhiteFilterExecutor &fil
LOG_WARN("Failed to compare datum", K(ret), K(min_datum), K(ref_datum));
} else if (OB_FAIL(cmp_func(max_datum, ref_datum, max_cmp_res))) {
LOG_WARN("Failed to compare datum", K(ret), K(max_datum), K(ref_datum));
} else if (max_cmp_res < 0 ||
(!max_prefix && max_cmp_res == 0)) {
} else if (max_cmp_res < 0 || max_cmp_res == 0) {
fal_desc.set_always_false();
} else if (min_cmp_res > 0) {
fal_desc.set_always_true();
@ -351,9 +342,7 @@ int ObSkipIndexFilterExecutor::ge_operator(const sql::ObWhiteFilterExecutor &fil
ret = OB_INVALID_ARGUMENT;
LOG_WARN("Invalid argument for falsifiable GE operator", K(ret), K(filter));
} else {
// falsifable calculation, if prefix, do not verify equal
const ObDatum &ref_datum = datums.at(0);
bool min_prefix = min_datum.len_ == ObSkipIndexColMeta::MAX_SKIP_INDEX_COL_LENGTH;
ObDatumCmpFuncType cmp_func = filter.cmp_func_;
int min_cmp_res = 0;
int max_cmp_res = 0;
@ -363,8 +352,7 @@ int ObSkipIndexFilterExecutor::ge_operator(const sql::ObWhiteFilterExecutor &fil
LOG_WARN("Failed to compare datum", K(ret), K(max_datum), K(ref_datum));
} else if (max_cmp_res < 0) {
fal_desc.set_always_false();
} else if (min_cmp_res > 0 ||
(!min_prefix && min_cmp_res == 0)) {
} else if (min_cmp_res > 0 || min_cmp_res == 0) {
fal_desc.set_always_true();
} else {
fal_desc.set_uncertain();
@ -384,9 +372,7 @@ int ObSkipIndexFilterExecutor::lt_operator(const sql::ObWhiteFilterExecutor &fil
ret = OB_INVALID_ARGUMENT;
LOG_WARN("Invalid argument for falsifiable LT operator", K(ret), K(filter));
} else {
// falsifable calculation, if prefix, do not verify equal
const ObDatum &ref_datum = datums.at(0);
bool min_prefix = min_datum.len_ == ObSkipIndexColMeta::MAX_SKIP_INDEX_COL_LENGTH;
ObDatumCmpFuncType cmp_func = filter.cmp_func_;
int min_cmp_res = 0;
int max_cmp_res = 0;
@ -394,8 +380,7 @@ int ObSkipIndexFilterExecutor::lt_operator(const sql::ObWhiteFilterExecutor &fil
LOG_WARN("Failed to compare datum", K(ret), K(min_datum), K(ref_datum));
} else if (OB_FAIL(cmp_func(max_datum, ref_datum, max_cmp_res))) {
LOG_WARN("Failed to compare datum", K(ret), K(max_datum), K(ref_datum));
} else if (min_cmp_res > 0 ||
(!min_prefix && min_cmp_res == 0)) {
} else if (min_cmp_res > 0 || min_cmp_res == 0) {
fal_desc.set_always_false();
} else if (max_cmp_res < 0) {
fal_desc.set_always_true();
@ -417,7 +402,6 @@ int ObSkipIndexFilterExecutor::le_operator(const sql::ObWhiteFilterExecutor &fil
ret = OB_INVALID_ARGUMENT;
LOG_WARN("Invalid argument for falsifiable LE operator", K(ret), K(filter));
} else {
// falsifable calculation, if prefix, do not verify equal
const ObDatum &ref_datum = datums.at(0);
bool max_prefix = max_datum.len_ == ObSkipIndexColMeta::MAX_SKIP_INDEX_COL_LENGTH;
ObDatumCmpFuncType cmp_func = filter.cmp_func_;
@ -429,8 +413,7 @@ int ObSkipIndexFilterExecutor::le_operator(const sql::ObWhiteFilterExecutor &fil
LOG_WARN("Failed to compare datum", K(ret), K(max_datum), K(ref_datum));
} else if (min_cmp_res > 0) {
fal_desc.set_always_false();
} else if (max_cmp_res < 0 ||
(!max_prefix && max_cmp_res == 0)) {
} else if (max_cmp_res < 0 || max_cmp_res == 0) {
fal_desc.set_always_true();
} else {
fal_desc.set_uncertain();
@ -450,10 +433,7 @@ int ObSkipIndexFilterExecutor::in_operator(const sql::ObWhiteFilterExecutor &fil
ret = OB_INVALID_ARGUMENT;
LOG_WARN("Invalid argument for falsifiable IN operator", K(ret), K(filter));
} else {
// falsifable calculation, if prefix, do not verify equal
const int ref_count = datums.count();
bool min_prefix = min_datum.len_ == ObSkipIndexColMeta::MAX_SKIP_INDEX_COL_LENGTH;
bool max_prefix = max_datum.len_ == ObSkipIndexColMeta::MAX_SKIP_INDEX_COL_LENGTH;
ObDatumCmpFuncType cmp_func = filter.cmp_func_;
int min_cmp_res;
int max_cmp_res;
@ -466,11 +446,10 @@ int ObSkipIndexFilterExecutor::in_operator(const sql::ObWhiteFilterExecutor &fil
LOG_WARN("Failed to compare datum", K(ret), K(max_datum), K(i), K(datums.at(i)));
} else {
if (falsifable_true && ((min_cmp_res < 0 && max_cmp_res > 0) ||
(!min_prefix && min_cmp_res == 0) ||
(!max_prefix && max_cmp_res == 0))) {
min_cmp_res == 0 || max_cmp_res == 0)) {
falsifable_true = false;
}
if (!falsifable_false && !min_prefix && !max_prefix && min_cmp_res == 0 && max_cmp_res == 0) {
if (!falsifable_false && min_cmp_res == 0 && max_cmp_res == 0) {
falsifable_false = true;
}
}
@ -501,9 +480,6 @@ int ObSkipIndexFilterExecutor::bt_operator(const sql::ObWhiteFilterExecutor &fil
const ObDatum &ref_left_datum = datums.at(0);
const ObDatum &ref_right_datum = datums.at(1);
// falsifable calculation, if prefix, do not verify equal
bool min_prefix = min_datum.len_ == ObSkipIndexColMeta::MAX_SKIP_INDEX_COL_LENGTH;
bool max_prefix = max_datum.len_ == ObSkipIndexColMeta::MAX_SKIP_INDEX_COL_LENGTH;
ObDatumCmpFuncType cmp_func = filter.cmp_func_;
int min_left_cmp_res = 0;
int min_right_cmp_res = 0;
@ -520,10 +496,8 @@ int ObSkipIndexFilterExecutor::bt_operator(const sql::ObWhiteFilterExecutor &fil
LOG_WARN("Failed to compare datum", K(ret), K(max_datum), K(ref_right_datum));
} else if (min_right_cmp_res > 0 || max_left_cmp_res < 0) {
fal_desc.set_always_false();
} else if ((min_left_cmp_res > 0 ||
(!min_prefix && min_left_cmp_res == 0)) &&
(max_right_cmp_res < 0 ||
(!max_prefix && max_right_cmp_res == 0))) {
} else if ((min_left_cmp_res > 0 || min_left_cmp_res == 0) &&
(max_right_cmp_res < 0 || max_right_cmp_res == 0)) {
fal_desc.set_always_true();
} else {
fal_desc.set_uncertain();

View File

@ -1,13 +1,13 @@
/**
* Copyright (c) 2022 OceanBase
* OceanBase CE is licensed under Mulan PubL v2.
* You can use this software according to the terms and conditions of the Mulan
* PubL v2. You may obtain a copy of Mulan PubL v2 at:
* OceanBase is licensed under Mulan PubL v2.
* You can use this software according to the terms and conditions of the Mulan PubL v2.
* You may obtain a copy of Mulan PubL v2 at:
* http://license.coscl.org.cn/MulanPubL-2.0
* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY
* KIND, EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO
* NON-INFRINGEMENT, MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. See the
* Mulan PubL v2 for more details.
* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND,
* EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
* MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
* See the Mulan PubL v2 for more details.
*/
#ifndef OCEANBASE_STORAGE_BLOCKSSTABLE_OB_SKIP_INDEX_FILTER_EXECUTOR_H

View File

@ -158,6 +158,10 @@ void TestIndexBlockAggregator::update_min_max_row(const ObDatumRow &row)
if (!curr_datum.is_null()) {
ObStorageDatum &min_datum = min_row_.storage_datums_[i];
ObStorageDatum &max_datum = max_row_.storage_datums_[i];
if (curr_datum.len_ > ObSkipIndexColMeta::MAX_SKIP_INDEX_COL_LENGTH) {
min_datum.set_null();
max_datum.set_null();
} else {
int min_cmp_ret = 0;
int max_cmp_ret = 0;
ObStorageDatumCmpFunc cmp_func;
@ -166,14 +170,9 @@ void TestIndexBlockAggregator::update_min_max_row(const ObDatumRow &row)
ASSERT_EQ(OB_SUCCESS, cmp_func.compare(curr_datum, max_datum, max_cmp_ret));
if (min_cmp_ret < 0) {
min_datum.deep_copy(curr_datum, allocator_);
if (min_datum.len_ > ObSkipIndexColMeta::MAX_SKIP_INDEX_COL_LENGTH) {
min_datum.len_ = ObSkipIndexColMeta::MAX_SKIP_INDEX_COL_LENGTH;
}
}
if (max_cmp_ret > 0) {
max_datum.deep_copy(curr_datum, allocator_);
if (max_datum.len_ > ObSkipIndexColMeta::MAX_SKIP_INDEX_COL_LENGTH) {
max_datum.len_ = ObSkipIndexColMeta::MAX_SKIP_INDEX_COL_LENGTH;
}
}
} else {
@ -191,6 +190,9 @@ void TestIndexBlockAggregator::validate_agg_row(
bool is_nop_column = is_col_in_nop_col_arr(col_idx, nop_col_cnt, nop_col_idxs);
if (is_nop_column) {
ASSERT_TRUE(datum_row.storage_datums_[i].is_nop());
} else if (datum_row.storage_datums_[i].is_nop() || datum_row.storage_datums_[i].is_null()) { // skip for not aggregate data
ASSERT_TRUE(min_row_.storage_datums_[col_idx].is_null());
ASSERT_TRUE(max_row_.storage_datums_[col_idx].is_null());
} else {
ObStorageDatumCmpFunc cmp_func;
get_cmp_func(col_descs_.at(col_idx), cmp_func);