bugfix: BASIC_FUNCS adaption for roaringbitmap type

This commit is contained in:
qijiax 2024-12-17 20:15:38 +00:00 committed by ob-robot
parent 65d39e1748
commit bb0f03a92b
8 changed files with 222 additions and 3 deletions

View File

@ -107,6 +107,32 @@ int ObDatumCollectionCmpImpl::cmp(const ObDatum &l, const ObDatum &r, int &cmp_r
return ret;
}
int ObDatumRoaringbitmapCmpImpl::cmp(const ObDatum &l, const ObDatum &r, int &cmp_ret, const bool is_lob)
{
int ret = OB_SUCCESS;
cmp_ret = 0;
ObString l_data;
ObString r_data;
common::ObArenaAllocator allocator(ObModIds::OB_LOB_READER, OB_MALLOC_NORMAL_BLOCK_SIZE, MTL_ID());
ObTextStringIter l_instr_iter(ObRoaringBitmapType, CS_TYPE_BINARY, l.get_string(), is_lob);
ObTextStringIter r_instr_iter(ObRoaringBitmapType, CS_TYPE_BINARY, r.get_string(), is_lob);
if (OB_FAIL(l_instr_iter.init(0, NULL, &allocator))) {
COMMON_LOG(WARN, "Lob: init left lob str iter failed", K(ret), K(l));
} else if (OB_FAIL(l_instr_iter.get_full_data(l_data))) {
COMMON_LOG(WARN, "Lob: get left lob str iter full data failed ", K(ret), K(l_instr_iter));
} else if (OB_FAIL(r_instr_iter.init(0, NULL, &allocator))) {
COMMON_LOG(WARN, "Lob: init right lob str iter failed", K(ret), K(ret), K(r));
} else if (OB_FAIL(r_instr_iter.get_full_data(r_data))) {
COMMON_LOG(WARN, "Lob: get right lob str iter full data failed ", K(ret), K(r_instr_iter));
} else {
// only memcmp supported now
cmp_ret = MEMCMP(l_data.ptr(), r_data.ptr(), std::min(l_data.length(), r_data.length()));
if (cmp_ret == 0 && l_data.length() != r_data.length()) {
cmp_ret = l_data.length() > r_data.length() ? 1 : -1;
}
}
return ret;
}
int ObDatumTextCmpImpl::cmp_out_row(const ObDatum &l, const ObDatum &r, int &cmp_ret,
const ObCollationType cs, const bool with_end_space)

View File

@ -409,6 +409,20 @@ struct ObDatumCollectionCmp : public ObDefined<>
}
};
struct ObDatumRoaringbitmapCmpImpl
{
static int cmp(const ObDatum &l, const ObDatum &r, int &cmp_ret, const bool is_lob);
};
template <bool HAS_LOB_HEADER>
struct ObDatumRoaringbitmapCmp : public ObDefined<>
{
inline static int cmp(const ObDatum &l, const ObDatum &r, int &cmp_ret)
{
return ObDatumRoaringbitmapCmpImpl::cmp(l, r, cmp_ret, HAS_LOB_HEADER);
}
};
///////////////////////////////////////////////////////////////////////////////
// begin define string compare functions
///////////////////////////////////////////////////////////////////////////////

View File

@ -68,6 +68,10 @@ ObDatumCmpFuncType NULLSAFE_COLLECTION_CMP_FUNCS[2][2];
bool g_collection_cmp_array_inited = ObArrayConstIniter<1, InitCollectionCmpArray>::init();
ObDatumCmpFuncType NULLSAFE_ROARINGBITMAP_CMP_FUNCS[2][2];
bool g_roaringbitmap_cmp_array_inited = ObArrayConstIniter<1, InitRoaringbitmapCmpArray>::init();
ObDatumCmpFuncType FIXED_DOUBLE_CMP_FUNCS[OB_NOT_FIXED_SCALE][2];
bool g_fixed_double_cmp_array_inited =
@ -112,6 +116,8 @@ ObDatumCmpFuncType ObDatumFuncs::get_nullsafe_cmp_func(
func_ptr = NULLSAFE_GEO_CMP_FUNCS[null_pos_idx][has_lob_header];
} else if (is_collection(type1) && is_collection(type2)) {
func_ptr = NULLSAFE_COLLECTION_CMP_FUNCS[null_pos_idx][has_lob_header];
} else if (is_roaringbitmap(type1) && is_roaringbitmap(type2)) {
func_ptr = NULLSAFE_ROARINGBITMAP_CMP_FUNCS[null_pos_idx][has_lob_header];
} else if (ob_is_decimal_int(type1) && ob_is_decimal_int(type2) && prec1 != PRECISION_UNKNOWN_YET
&& prec2 != PRECISION_UNKNOWN_YET) {
ObDecimalIntWideType lw = get_decimalint_type(prec1);
@ -143,6 +149,7 @@ ObExprBasicFuncs EXPR_BASIC_JSON_FUNCS[2];
ObExprBasicFuncs EXPR_BASIC_GEO_FUNCS[2];
ObExprBasicFuncs EXPR_BASIC_COLLECTION_FUNCS[2];
ObExprBasicFuncs EXPR_BASIC_ROARINGBITMAP_FUNCS[2];
ObExprBasicFuncs FIXED_DOUBLE_BASIC_FUNCS[OB_NOT_FIXED_SCALE];
ObExprBasicFuncs EXPR_BASIC_UDT_FUNCS[1];
@ -152,6 +159,7 @@ ObExprBasicFuncs EXPR_BASIC_UDT_FUNCS[1];
bool g_basic_json_array_inited = ObArrayConstIniter<1, InitBasicJsonFuncArray>::init();
bool g_basic_geo_array_inited = ObArrayConstIniter<1, InitBasicGeoFuncArray>::init();
bool g_basic_collection_array_inited = ObArrayConstIniter<1, InitCollectionBasicFuncArray>::init();
bool g_basic_roaringbitmap_array_inited = ObArrayConstIniter<1, InitBasicRoaringbitmapFuncArray>::init();
bool g_fixed_double_basic_func_array_inited =
ObArrayConstIniter<OB_NOT_FIXED_SCALE, InitFixedDoubleBasicFuncArray>::init();
@ -196,7 +204,7 @@ ObExprBasicFuncs* ObDatumFuncs::get_basic_func(const ObObjType type,
// string is always without lob locator
res = &EXPR_BASIC_STR_FUNCS[cs_type][calc_end_space][false];
}
} else if (ob_is_lob_locator(type) || ob_is_roaringbitmap(type)) {
} else if (ob_is_lob_locator(type)) {
OB_ASSERT(cs_type > CS_TYPE_INVALID && cs_type < CS_TYPE_MAX);
bool calc_end_space = false;
res = &EXPR_BASIC_STR_FUNCS[cs_type][calc_end_space][has_lob_locator];
@ -208,6 +216,8 @@ ObExprBasicFuncs* ObDatumFuncs::get_basic_func(const ObObjType type,
res = &EXPR_BASIC_UDT_FUNCS[0];
} else if (ob_is_collection_sql_type(type)) {
res = &EXPR_BASIC_COLLECTION_FUNCS[has_lob_locator];
} else if (ob_is_roaringbitmap(type)) {
res = &EXPR_BASIC_ROARINGBITMAP_FUNCS[has_lob_locator];
} else if (!is_oracle_mode && ob_is_double_type(type) &&
scale > SCALE_UNKNOWN_YET && scale < OB_NOT_FIXED_SCALE) {
res = &FIXED_DOUBLE_BASIC_FUNCS[scale];
@ -250,6 +260,12 @@ bool ObDatumFuncs::is_geometry(const ObObjType type)
return (tc == ObGeometryTC);
}
bool ObDatumFuncs::is_roaringbitmap(const ObObjType type)
{
const ObObjTypeClass tc = OBJ_TYPE_TO_CLASS[type];
return (tc == ObRoaringBitmapTC);
}
/**
* This function is primarily responsible for handling inconsistent hash computations
* for null types and the null values of those types, such as string, float, double, etc.
@ -259,7 +275,7 @@ bool ObDatumFuncs::is_geometry(const ObObjType type)
bool ObDatumFuncs::is_null_aware_hash_type(const ObObjType type)
{
const ObObjTypeClass tc = OBJ_TYPE_TO_CLASS[type];
return is_string_type(type) || is_json(type) || is_geometry(type) ||
return is_string_type(type) || is_json(type) || is_geometry(type) || is_roaringbitmap(type) ||
(tc == ObUserDefinedSQLTC) || (tc == ObFloatTC) || (tc == ObDoubleTC);
}
@ -330,6 +346,11 @@ static_assert(2 * 2 == sizeof(NULLSAFE_COLLECTION_CMP_FUNCS) / sizeof(void *),
REG_SER_FUNC_ARRAY(OB_SFA_DATUM_NULLSAFE_COLLECTION_CMP,
NULLSAFE_COLLECTION_CMP_FUNCS,
sizeof(NULLSAFE_COLLECTION_CMP_FUNCS) / sizeof(void*));
static_assert(2 * 2 == sizeof(NULLSAFE_ROARINGBITMAP_CMP_FUNCS) / sizeof(void *),
"unexpected size");
REG_SER_FUNC_ARRAY(OB_SFA_DATUM_NULLSAFE_ROARINGBITMAP_CMP,
NULLSAFE_ROARINGBITMAP_CMP_FUNCS,
sizeof(NULLSAFE_ROARINGBITMAP_CMP_FUNCS) / sizeof(void*));
static_assert(OB_NOT_FIXED_SCALE * 2 == sizeof(FIXED_DOUBLE_CMP_FUNCS) / sizeof(void *),
"unexpected size");
@ -392,6 +413,8 @@ static ExprBasicFuncSerPart2 EXPR_BASIC_UDT_FUNCS_PART2[1];
static ExprBasicFuncSerPart1 EXPR_BASIC_COLLECTION_FUNCS_PART1[2];
static ExprBasicFuncSerPart2 EXPR_BASIC_COLLECTION_FUNCS_PART2[2];
static ExprBasicFuncSerPart1 EXPR_BASIC_ROARINGBITMAP_FUNCS_PART1[2];
static ExprBasicFuncSerPart2 EXPR_BASIC_ROARINGBITMAP_FUNCS_PART2[2];
bool split_basic_func_for_ser(void)
{
for (int64_t i = 0; i < sizeof(EXPR_BASIC_FUNCS)/sizeof(ObExprBasicFuncs); i++) {
@ -429,6 +452,10 @@ bool split_basic_func_for_ser(void)
EXPR_BASIC_COLLECTION_FUNCS_PART1[i].from(EXPR_BASIC_COLLECTION_FUNCS[i]);
EXPR_BASIC_COLLECTION_FUNCS_PART2[i].from(EXPR_BASIC_COLLECTION_FUNCS[i]);
}
for (int64_t i = 0; i < sizeof(EXPR_BASIC_ROARINGBITMAP_FUNCS)/sizeof(ObExprBasicFuncs); i++) {
EXPR_BASIC_ROARINGBITMAP_FUNCS_PART1[i].from(EXPR_BASIC_ROARINGBITMAP_FUNCS[i]);
EXPR_BASIC_ROARINGBITMAP_FUNCS_PART2[i].from(EXPR_BASIC_ROARINGBITMAP_FUNCS[i]);
}
return true;
}
bool g_split_basic_func_for_ser = split_basic_func_for_ser();
@ -507,6 +534,14 @@ REG_SER_FUNC_ARRAY(OB_SFA_EXPR_COLLECTION_BASIC_PART1,
REG_SER_FUNC_ARRAY(OB_SFA_EXPR_COLLECTION_BASIC_PART2,
EXPR_BASIC_COLLECTION_FUNCS_PART2,
sizeof(EXPR_BASIC_COLLECTION_FUNCS_PART2) / sizeof(void *));
static_assert(2 * EXPR_BASIC_FUNC_MEMBER_CNT == sizeof(EXPR_BASIC_ROARINGBITMAP_FUNCS) / sizeof(void *),
"unexpected size");
REG_SER_FUNC_ARRAY(OB_SFA_EXPR_ROARINGBITMAP_BASIC_PART1,
EXPR_BASIC_ROARINGBITMAP_FUNCS_PART1,
sizeof(EXPR_BASIC_ROARINGBITMAP_FUNCS_PART1) / sizeof(void *));
REG_SER_FUNC_ARRAY(OB_SFA_EXPR_ROARINGBITMAP_BASIC_PART2,
EXPR_BASIC_ROARINGBITMAP_FUNCS_PART2,
sizeof(EXPR_BASIC_ROARINGBITMAP_FUNCS_PART2) / sizeof(void *));
} // end namespace sql
} // end namespace oceanbase

View File

@ -42,6 +42,7 @@ public:
static bool is_json(const ObObjType type);
static bool is_geometry(const ObObjType type);
static bool is_collection(const ObObjType type);
static bool is_roaringbitmap(const ObObjType type);
static bool is_varying_len_char_type(const ObObjType type, const ObCollationType cs_type) {
return (type == ObNVarchar2Type || (type == ObVarcharType && cs_type != CS_TYPE_BINARY));
}

View File

@ -210,6 +210,24 @@ struct ObNullSafeDatumCollectionCmp
}
};
template <bool NULL_FIRST, bool HAS_LOB_HEADER>
struct ObNullSafeDatumRoaringbitmapCmp
{
inline static int cmp(const ObDatum &l, const ObDatum &r, int &cmp_ret) {
int ret = OB_SUCCESS;
if (OB_UNLIKELY(l.is_null()) && OB_UNLIKELY(r.is_null())) {
cmp_ret = 0;
} else if (OB_UNLIKELY(l.is_null())) {
cmp_ret = NULL_FIRST ? -1 : 1;
} else if (OB_UNLIKELY(r.is_null())) {
cmp_ret = NULL_FIRST ? 1 : -1;
} else {
ret = datum_cmp::ObDatumRoaringbitmapCmp<HAS_LOB_HEADER>::cmp(l, r, cmp_ret);
}
return ret;
}
};
template<ObDecimalIntWideType width1, ObDecimalIntWideType width2, bool NULL_FIRST>
struct ObNullSafeDecintCmp
{
@ -792,6 +810,7 @@ DEF_DATUM_SPECIAL_HASH_FUNCS(ObDoubleType);
DEF_DATUM_SPECIAL_HASH_FUNCS(ObUDoubleType);
DEF_DATUM_SPECIAL_HASH_FUNCS(ObJsonType);
DEF_DATUM_SPECIAL_HASH_FUNCS(ObGeometryType);
DEF_DATUM_SPECIAL_HASH_FUNCS(ObRoaringBitmapType);
extern ObDatumCmpFuncType NULLSAFE_JSON_CMP_FUNCS[2][2];
extern ObDatumCmpFuncType NULLSAFE_STR_CMP_FUNCS[CS_TYPE_MAX][2][2];
@ -803,6 +822,7 @@ extern ObDatumCmpFuncType NULLSAFE_TC_CMP_FUNCS[ObMaxTC][ObMaxTC][2];
extern ObDatumCmpFuncType NULLSAFE_GEO_CMP_FUNCS[2][2];
extern ObDatumCmpFuncType FIXED_DOUBLE_CMP_FUNCS[OB_NOT_FIXED_SCALE][2];
extern ObDatumCmpFuncType NULLSAFE_COLLECTION_CMP_FUNCS[2][2];
extern ObDatumCmpFuncType NULLSAFE_ROARINGBITMAP_CMP_FUNCS[2][2];
extern ObDatumCmpFuncType DECINT_CMP_FUNCS[DECIMAL_INT_MAX][DECIMAL_INT_MAX][2];
extern ObExprBasicFuncs EXPR_BASIC_FUNCS[ObMaxType];
@ -813,6 +833,7 @@ extern ObExprBasicFuncs FIXED_DOUBLE_BASIC_FUNCS[OB_NOT_FIXED_SCALE];
extern ObExprBasicFuncs EXPR_BASIC_UDT_FUNCS[1];
extern ObExprBasicFuncs DECINT_BASIC_FUNCS[DECIMAL_INT_MAX];
extern ObExprBasicFuncs EXPR_BASIC_COLLECTION_FUNCS[2];
extern ObExprBasicFuncs EXPR_BASIC_ROARINGBITMAP_FUNCS[2];
struct DummyIniter
{
@ -1435,7 +1456,87 @@ struct InitCollectionCmpArray
}
};
template <typename T, bool HAS_LOB_HEADER>
struct DatumRoaringbitmapHashCalculator : public DefHashMethod<T>
{
static int calc_datum_hash(const ObDatum &datum, const uint64_t seed, uint64_t &res)
{
return datum_lob_locator_hash(datum, CS_TYPE_UTF8MB4_BIN, seed, T::is_varchar_hash ? T::hash : NULL, res);
}
static int calc_datum_hash_v2(const ObDatum &datum, const uint64_t seed, uint64_t &res)
{
return datum_lob_locator_hash(datum, CS_TYPE_UTF8MB4_BIN, seed, T::is_varchar_hash ? T::hash : NULL, res);
}
};
template<int IDX>
struct InitBasicRoaringbitmapFuncArray
{
template <typename T, bool HAS_LOB_HEADER>
using Hash = DefHashFunc<DatumRoaringbitmapHashCalculator<T, HAS_LOB_HEADER>>;
template <bool NULL_FIRST>
using TCCmp = ObNullSafeDatumTCCmp<ObRoaringBitmapTC, ObRoaringBitmapTC, NULL_FIRST>;
using TCDef = datum_cmp::ObDatumTCCmp<ObRoaringBitmapTC, ObRoaringBitmapTC>;
template <bool NULL_FIRST, bool HAS_LOB_HEADER>
using TypeCmp = ObNullSafeDatumRoaringbitmapCmp<NULL_FIRST, HAS_LOB_HEADER>;
using TypeDef = datum_cmp::ObDatumRoaringbitmapCmp<false>;
static void init_array()
{
auto &basic_funcs = EXPR_BASIC_ROARINGBITMAP_FUNCS;
basic_funcs[0].default_hash_ = Hash<ObDefaultHash, false>::hash;
basic_funcs[0].default_hash_batch_= Hash<ObDefaultHash, false>::hash_batch;
basic_funcs[0].murmur_hash_ = Hash<ObMurmurHash, false>::hash;
basic_funcs[0].murmur_hash_batch_ = Hash<ObMurmurHash, false>::hash_batch;
// basic_funcs[0].xx_hash_ = Hash<ObXxHash, false>::hash;
// basic_funcs[0].xx_hash_batch_ = Hash<ObXxHash, false>::hash_batch;
// basic_funcs[0].wy_hash_ = Hash<ObWyHash, false>::hash;
// basic_funcs[0].wy_hash_batch_ = Hash<ObWyHash, false>::hash_batch;
basic_funcs[0].null_first_cmp_ = TypeDef::defined_
? &TypeCmp<1, 0>::cmp
: TCDef::defined_ ? &TCCmp<1>::cmp : NULL;
basic_funcs[0].null_last_cmp_ = TypeDef::defined_
? &TypeCmp<0, 0>::cmp
: TCDef::defined_ ? &TCCmp<0>::cmp : NULL;
basic_funcs[0].murmur_hash_v2_ = Hash<ObMurmurHash, false>::hash_v2;
basic_funcs[0].murmur_hash_v2_batch_ = Hash<ObMurmurHash, false>::hash_v2_batch;
basic_funcs[1].default_hash_ = Hash<ObDefaultHash, true>::hash;
basic_funcs[1].default_hash_batch_= Hash<ObDefaultHash, true>::hash_batch;
basic_funcs[1].murmur_hash_ = Hash<ObMurmurHash, true>::hash;
basic_funcs[1].murmur_hash_batch_ = Hash<ObMurmurHash, true>::hash_batch;
basic_funcs[1].xx_hash_ = Hash<ObXxHash, true>::hash;
basic_funcs[1].xx_hash_batch_ = Hash<ObXxHash, true>::hash_batch;
basic_funcs[1].wy_hash_ = Hash<ObWyHash, true>::hash;
basic_funcs[1].wy_hash_batch_ = Hash<ObWyHash, true>::hash_batch;
basic_funcs[1].null_first_cmp_ = TypeDef::defined_
? &TypeCmp<1, 1>::cmp
: TCDef::defined_ ? &TCCmp<1>::cmp : NULL;
basic_funcs[1].null_last_cmp_ = TypeDef::defined_
? &TypeCmp<0, 1>::cmp
: TCDef::defined_ ? &TCCmp<0>::cmp : NULL;
basic_funcs[1].murmur_hash_v2_ = Hash<ObMurmurHash, true>::hash_v2;
basic_funcs[1].murmur_hash_v2_batch_ = Hash<ObMurmurHash, true>::hash_v2_batch;
}
};
template<int IDX>
struct InitRoaringbitmapCmpArray
{
template <bool... args>
using Cmp = ObNullSafeDatumRoaringbitmapCmp<args...>;
using Def = datum_cmp::ObDatumRoaringbitmapCmp<false>;
static void init_array()
{
auto &funcs = NULLSAFE_ROARINGBITMAP_CMP_FUNCS;
funcs[0][0] = Def::defined_ ? &Cmp<0, 0>::cmp : NULL;
funcs[0][1] = Def::defined_ ? &Cmp<0, 1>::cmp : NULL;
funcs[1][0] = Def::defined_ ? &Cmp<1, 1>::cmp : NULL;
funcs[1][1] = Def::defined_ ? &Cmp<1, 1>::cmp : NULL;
}
};
template<int width>
struct InitDecintBasicFuncArray
{

View File

@ -349,6 +349,33 @@ struct VecTCHashCalc<VEC_TC_COLLECTION, HashMethod, hash_v2>
}
};
template<typename HashMethod, bool hash_v2>
struct VecTCHashCalc<VEC_TC_ROARINGBITMAP, HashMethod, hash_v2>
{
inline static int hash(HASH_ARG_LIST)
{
int ret = OB_SUCCESS;
ObString bin_str;
res = 0;
const char *in_str = reinterpret_cast<const char *>(data);
ObLobLocatorV2 loc(in_str, false);
if (!loc.is_valid()) {
COMMON_LOG(WARN, "invalid lob", K(ret));
} else if (!loc.has_inrow_data()) {
COMMON_LOG(WARN, "meet outrow lob do calc hash value", K(loc));
} else if (OB_FAIL(loc.get_inrow_data(bin_str))) {
COMMON_LOG(WARN, "fail to get inrow data", K(ret), K(loc));
} else {
res = seed;
if (bin_str.length() > 0) {
res = ObCharset::hash(CS_TYPE_BINARY, bin_str.ptr(), bin_str.length(), seed, false,
HashMethod::is_varchar_hash ? HashMethod::hash : NULL);
}
}
return ret;
}
};
template<typename HashMethod, bool hash_v2>
struct VecTCHashCalc<VEC_TC_ROWID, HashMethod, hash_v2>
{
@ -840,6 +867,18 @@ struct VecTCCmpCalc<VEC_TC_UDT, VEC_TC_UDT>
}
};
template<>
struct VecTCCmpCalc<VEC_TC_ROARINGBITMAP, VEC_TC_ROARINGBITMAP>
{
static const constexpr bool defined_ = true;
inline static int cmp(CMP_ARG_LIST)
{
int ret = OB_SUCCESS;
// not used
return ret;
}
};
// null type comparison
struct VecDummyCmpCalc

View File

@ -134,6 +134,9 @@ typedef void (*ser_eval_vector_function)(ObEvalVectorFuncTag &);
OB_SFA_EXPR_COLLECTION_BASIC_PART1, \
OB_SFA_EXPR_COLLECTION_BASIC_PART2, \
OB_SFA_FAST_CALC_PART_VEC, \
OB_SFA_DATUM_NULLSAFE_ROARINGBITMAP_CMP, \
OB_SFA_EXPR_ROARINGBITMAP_BASIC_PART1, \
OB_SFA_EXPR_ROARINGBITMAP_BASIC_PART2, \
OB_SFA_MAX
enum ObSerFuncArrayID {

View File

@ -3186,5 +3186,5 @@
<ROARINGBITMAP, ARRAY> : not defined
<ROARINGBITMAP, MYSQL_DATE> : not defined
<ROARINGBITMAP, MYSQL_DATETIME> : not defined
<ROARINGBITMAP, ROARINGBITMAP> : not defined
<ROARINGBITMAP, ROARINGBITMAP> : defined