
Co-authored-by: Naynahs <cfzy002@126.com> Co-authored-by: hwx65 <1780011298@qq.com> Co-authored-by: oceanoverflow <oceanoverflow@gmail.com>
358 lines
14 KiB
C++
358 lines
14 KiB
C++
/**
|
|
* Copyright (c) 2021 OceanBase
|
|
* OceanBase CE is licensed under Mulan PubL v2.
|
|
* You can use this software according to the terms and conditions of the Mulan PubL v2.
|
|
* You may obtain a copy of Mulan PubL v2 at:
|
|
* http://license.coscl.org.cn/MulanPubL-2.0
|
|
* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND,
|
|
* EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
|
|
* MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
|
|
* See the Mulan PubL v2 for more details.
|
|
*/
|
|
|
|
#include <gtest/gtest.h>
|
|
#include <random>
|
|
#include <time.h>
|
|
#include <vector>
|
|
|
|
#include "share/vector/vector_basic_op.h"
|
|
#include "share/vector/ob_fixed_length_vector.h"
|
|
#include "share/vector/ob_discrete_vector.h"
|
|
#include "share/vector/vector_op_util.h"
|
|
#include "lib/timezone/ob_timezone_info.h"
|
|
#include "share/datum/ob_datum_funcs.h"
|
|
#include "share/vector/expr_cmp_func.h"
|
|
#include "unittest/share/vector/util.h"
|
|
|
|
|
|
namespace oceanbase
|
|
{
|
|
namespace common
|
|
{
|
|
|
|
void prepare_datums(std::vector<ObDatum> &datums, const int64_t datum_len, const int64_t datum_cnt)
|
|
{
|
|
void *data = std::malloc(datum_cnt * datum_len);
|
|
ASSERT_TRUE(data != nullptr);
|
|
for (int i = 0; i < datum_cnt; i++) {
|
|
char *ptr = (char *)data + (i * datum_len);
|
|
ObDatum d(ptr, datum_len, false);
|
|
datums.push_back(d);
|
|
}
|
|
}
|
|
|
|
sql::ObBitVector *mock_skip(const int64_t cases)
|
|
{
|
|
int size = cases / 8 + 1;
|
|
void *data = std::malloc(size);
|
|
// ASSERT_TRUE(data != nullptr);
|
|
std::memset(data, 0, size);
|
|
return reinterpret_cast<sql::ObBitVector *>(data);
|
|
}
|
|
|
|
class ObTestVectorBasicOp: public ::testing::Test
|
|
{
|
|
private:
|
|
DISALLOW_COPY_AND_ASSIGN(ObTestVectorBasicOp);
|
|
};
|
|
|
|
std::ostream& operator<<(std::ostream &out, const ObIntervalDSValue &v)
|
|
{
|
|
out << "<NSEC: " << v.nsecond_ << ", FSEC: " << v.fractional_second_ << ">";
|
|
return out;
|
|
}
|
|
|
|
std::ostream& operator<<(std::ostream &out, const ObOTimestampData &v)
|
|
{
|
|
out << "<DESC: " << v.time_ctx_.desc_ << ", US: " << v.time_us_ << ">";
|
|
return out;
|
|
}
|
|
|
|
template<typename T, typename P>
|
|
void cmp(std::vector<T> &l, std::vector<T> &r, std::vector<P> &input, std::string name)
|
|
{
|
|
if (l != r) {
|
|
std::cout << "COMPARE " << name << '\n';
|
|
ASSERT_EQ(l.size(), r.size());
|
|
for (int i = 0; i < l.size(); i++) {
|
|
if (l[i] != r[i]) {
|
|
std::cout << "case " << i << ": " << input[i] << '\n';
|
|
ASSERT_EQ(l[i], r[i]);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
static const int test_cases = 50000;
|
|
|
|
template<typename ValueType, VecValueTypeClass tc>
|
|
void test_fixed_length_hash(const ObObjMeta &meta, std::string case_name)
|
|
{
|
|
std::vector<ValueType> data;
|
|
for (int i = 0; i < test_cases; i++) {
|
|
data.push_back(RandomData<ValueType>::rand());
|
|
}
|
|
std::vector<uint64_t> seeds;
|
|
for (int i = 0; i < test_cases; i++) {
|
|
seeds.push_back(RandomData<int64_t>::rand());
|
|
}
|
|
|
|
sql::ObBitVector *skip = mock_skip(test_cases);
|
|
|
|
sql::ObExpr mock_expr;
|
|
mock_expr.obj_meta_ = meta;
|
|
sql::EvalBound bound(static_cast<uint16_t>(test_cases), true);
|
|
std::vector<uint64_t> vec_hash(test_cases, 0);
|
|
std::vector<uint64_t> datum_hash(test_cases, 0);
|
|
auto vec = new ObFixedLengthVector<ValueType, VectorBasicOp<tc>>((char *)data.data(), skip);
|
|
|
|
sql::ObExprBasicFuncs *basic_funcs = ObDatumFuncs::get_basic_func(
|
|
meta.get_type(), meta.get_collation_type(), meta.get_scale(), false, meta.has_lob_header());
|
|
std::vector<ObDatum> datums;
|
|
prepare_datums(datums, sizeof(ValueType), test_cases);
|
|
for (int i = 0; i < test_cases; i++) {
|
|
std::memcpy(const_cast<char *>(datums[i].ptr_), &(data[i]), sizeof(ValueType));
|
|
}
|
|
|
|
int ret = vec->murmur_hash(mock_expr, vec_hash.data(), *skip, bound, seeds.data(), true);
|
|
ASSERT_EQ(ret, 0);
|
|
basic_funcs->murmur_hash_batch_(datum_hash.data(), datums.data(), true, *skip, test_cases,
|
|
seeds.data(), true);
|
|
cmp(datum_hash, vec_hash, data, case_name + " murmurhash");
|
|
|
|
ret = vec->murmur_hash_v3(mock_expr, vec_hash.data(), *skip, bound, seeds.data(), true);
|
|
ASSERT_EQ(ret, 0);
|
|
basic_funcs->murmur_hash_v2_batch_(datum_hash.data(), datums.data(), true, *skip, test_cases,
|
|
seeds.data(), true);
|
|
cmp(datum_hash, vec_hash, data, case_name + " murmurhash_v2");
|
|
|
|
ret = vec->default_hash(mock_expr, vec_hash.data(), *skip, bound, seeds.data(), true);
|
|
ASSERT_EQ(ret, 0);
|
|
basic_funcs->default_hash_batch_(datum_hash.data(), datums.data(), true, *skip, test_cases,
|
|
seeds.data(), true);
|
|
cmp(datum_hash, vec_hash, data, case_name + " default_hash");
|
|
}
|
|
|
|
std::ostream& operator <<(std::ostream &out, const Item &v)
|
|
{
|
|
out << "<LEN: " << v.data_len << ", DATA:";
|
|
for (int i = 0; i < v.data_len; i++) {
|
|
out << " " << int64_t(v.data[i]);
|
|
}
|
|
out << ">";
|
|
return out;
|
|
}
|
|
|
|
template<VecValueTypeClass tc>
|
|
void test_discrete_hash(const ObObjMeta &meta, std::string case_name)
|
|
{
|
|
std::vector<Item> items;
|
|
for (int i = 0; i < test_cases; i++) {
|
|
items.push_back(rand_item<tc>());
|
|
}
|
|
std::vector<int32_t> lens(test_cases, 0);
|
|
std::vector<char *> ptrs(test_cases, nullptr);
|
|
for (int i = 0; i < test_cases; i++) {
|
|
lens[i] = items[i].data_len;
|
|
ptrs[i] = items[i].data;
|
|
}
|
|
std::vector<uint64_t> seeds;
|
|
for (int i = 0; i < test_cases; i++) {
|
|
seeds.push_back(RandomData<int64_t>::rand());
|
|
}
|
|
std::vector<ObDatum> datums;
|
|
for (int i = 0; i < test_cases; i++) {
|
|
ObDatum d;
|
|
d.ptr_ = items[i].data;
|
|
d.len_ = items[i].data_len;
|
|
d.null_ = false;
|
|
datums.push_back(d);
|
|
}
|
|
sql::ObExprBasicFuncs *basic_funcs = ObDatumFuncs::get_basic_func(
|
|
meta.get_type(), meta.get_collation_type(), meta.get_scale(), false, meta.has_lob_header());
|
|
sql::ObBitVector *skip = mock_skip(test_cases);
|
|
|
|
std::vector<uint64_t> vec_hash(test_cases, 0);
|
|
std::vector<uint64_t> datum_hash(test_cases, 0);
|
|
|
|
sql::ObExpr mock_expr;
|
|
mock_expr.obj_meta_ = meta;
|
|
sql::EvalBound bound(test_cases, true);
|
|
auto vec = new ObDiscreteVector<VectorBasicOp<tc>>(lens.data(), ptrs.data(), skip);
|
|
|
|
int ret = vec->murmur_hash(mock_expr, vec_hash.data(), *skip, bound, seeds.data(), true);
|
|
ASSERT_EQ(ret, 0);
|
|
basic_funcs->murmur_hash_batch_(datum_hash.data(), datums.data(), true, *skip, test_cases,
|
|
seeds.data(), true);
|
|
cmp(vec_hash, datum_hash, items, "murmurhash");
|
|
|
|
ret = vec->murmur_hash_v3(mock_expr, vec_hash.data(), *skip, bound, seeds.data(), true);
|
|
ASSERT_EQ(ret, 0);
|
|
basic_funcs->murmur_hash_v2_batch_(datum_hash.data(), datums.data(), true, *skip, test_cases,
|
|
seeds.data(), true);
|
|
cmp(vec_hash, datum_hash, items, "murmurhash_v2");
|
|
|
|
ret = vec->default_hash(mock_expr, vec_hash.data(), *skip, bound, seeds.data(), true);
|
|
ASSERT_EQ(ret, 0);
|
|
basic_funcs->default_hash_batch_(datum_hash.data(), datums.data(), true, *skip, test_cases,
|
|
seeds.data(), true);
|
|
cmp(vec_hash, datum_hash, items, "default_hash");
|
|
}
|
|
|
|
struct CmpItem
|
|
{
|
|
void *l_data;
|
|
void *r_data;
|
|
int32_t l_len;
|
|
int32_t r_len;
|
|
bool l_null;
|
|
bool r_null;
|
|
};
|
|
|
|
template<typename T>
|
|
ObDatum get_datum(const T &v, const bool null_v)
|
|
{
|
|
ObDatum d;
|
|
d.ptr_ = (const char *)&v;
|
|
d.len_ = sizeof(T);
|
|
d.null_ = null_v;
|
|
return d;
|
|
}
|
|
|
|
template<VecValueTypeClass l_tc, VecValueTypeClass r_tc, typename LType, typename RType>
|
|
void test_fixed_length_cmp(const ObObjMeta &l_meta, const ObObjMeta &r_meta, std::string case_name)
|
|
{
|
|
std::vector<LType> l_datas(test_cases, LType());
|
|
std::vector<RType> r_datas(test_cases, RType());
|
|
std::vector<CmpItem> items;
|
|
ObDatumCmpFuncType null_first_datum_cmp = ObDatumFuncs::get_nullsafe_cmp_func(
|
|
l_meta.get_type(), r_meta.get_type(), NULL_FIRST, l_meta.get_collation_type(),
|
|
l_meta.get_scale(), false, false);
|
|
ObDatumCmpFuncType null_last_datum_cmp = ObDatumFuncs::get_nullsafe_cmp_func(
|
|
l_meta.get_type(), r_meta.get_type(), NULL_LAST, l_meta.get_collation_type(),
|
|
l_meta.get_scale(), false, false);
|
|
sql::NullSafeRowCmpFunc null_first_row_cmp = nullptr;
|
|
sql::NullSafeRowCmpFunc null_last_row_cmp = nullptr;
|
|
const sql::ObDatumMeta l_d_meta(l_meta.get_type(), l_meta.get_collation_type(),
|
|
l_meta.get_scale());
|
|
const sql::ObDatumMeta r_d_meta(r_meta.get_type(), r_meta.get_collation_type(),
|
|
r_meta.get_scale());
|
|
VectorCmpExprFuncsHelper::get_cmp_set(l_d_meta, r_d_meta, null_first_row_cmp, null_last_row_cmp);
|
|
int datum_cmp_ret = 0, row_cmp_ret = 0;
|
|
for (int i = 0; i < test_cases; i++) {
|
|
LType l_v = RandomData<LType>::rand();
|
|
LType r_v = RandomData<RType>::rand();
|
|
bool l_null = RandomData<bool>::rand();
|
|
bool r_null = RandomData<bool>::rand();
|
|
ObDatum l_datum = get_datum(l_v, l_null);
|
|
ObDatum r_datum = get_datum(r_v, r_null);
|
|
int ret = null_first_datum_cmp(l_datum, r_datum, datum_cmp_ret);
|
|
ASSERT_EQ(ret, 0);
|
|
ret = null_first_row_cmp(l_meta, r_meta, &l_v, sizeof(LType), l_null, &r_v, sizeof(RType),
|
|
r_null, row_cmp_ret);
|
|
ASSERT_EQ(ret, 0);
|
|
if (datum_cmp_ret != row_cmp_ret) {
|
|
std::cout << case_name << "NULL FIRST CMP\n";
|
|
std::cout << "L: " << l_v << ", R: " << r_v << ", L_NULL: " << l_null
|
|
<< ", R_NULL: " << r_null << '\n';
|
|
ASSERT_EQ(datum_cmp_ret, row_cmp_ret);
|
|
}
|
|
ret = null_last_datum_cmp(l_datum, r_datum, datum_cmp_ret);
|
|
ASSERT_EQ(ret, 0);
|
|
ret = null_last_row_cmp(l_meta, r_meta, &l_v, sizeof(LType), l_null, &r_v, sizeof(RType),
|
|
r_null, row_cmp_ret);
|
|
ASSERT_EQ(ret, 0);
|
|
if (datum_cmp_ret != row_cmp_ret) {
|
|
std::cout << case_name << "NULL LAST CMP\n";
|
|
std::cout << "L: " << l_v << ", R: " << r_v << ", L_NULL: " << l_null
|
|
<< ", R_NULL: " << r_null << '\n';
|
|
ASSERT_EQ(datum_cmp_ret, row_cmp_ret);
|
|
}
|
|
}
|
|
}
|
|
|
|
TEST(ObTestVectorBasicOp, hash_op)
|
|
{
|
|
ObObjMeta meta;
|
|
meta.set_int();
|
|
test_fixed_length_hash<int64_t, VEC_TC_INTEGER>(meta, "integer");
|
|
meta.set_uint64();
|
|
test_fixed_length_hash<uint64_t, VEC_TC_UINTEGER>(meta, "uinteger");
|
|
meta.set_float();
|
|
test_fixed_length_hash<float, VEC_TC_FLOAT>(meta, "float");
|
|
meta.set_double();
|
|
test_fixed_length_hash<double, VEC_TC_DOUBLE>(meta, "double");
|
|
meta.set_double();
|
|
for (int scale = 0; scale <= OB_MAX_DOUBLE_FLOAT_SCALE; scale++) {
|
|
meta.set_scale(scale);
|
|
test_fixed_length_hash<double, VEC_TC_FIXED_DOUBLE>(meta, "fixed_double");
|
|
}
|
|
meta.set_timestamp_tz();
|
|
test_fixed_length_hash<ObOTimestampData, VEC_TC_TIMESTAMP_TZ>(meta, "otimestamp");
|
|
meta.set_interval_ds();
|
|
test_fixed_length_hash<ObIntervalDSValue, VEC_TC_INTERVAL_DS>(meta, "intervalds");
|
|
|
|
meta.set_decimal_int(0);
|
|
test_fixed_length_hash<int32_t, VEC_TC_DEC_INT32>(meta, "decint_32");
|
|
test_fixed_length_hash<int64_t, VEC_TC_DEC_INT64>(meta, "decint_64");
|
|
test_fixed_length_hash<int128_t, VEC_TC_DEC_INT128>(meta, "decint_128");
|
|
test_fixed_length_hash<int256_t, VEC_TC_DEC_INT256>(meta, "decint_256");
|
|
test_fixed_length_hash<int512_t, VEC_TC_DEC_INT512>(meta, "decint_512");
|
|
|
|
meta.set_number();
|
|
test_discrete_hash<VEC_TC_NUMBER>(meta, "number");
|
|
meta.set_collation_type(CS_TYPE_UTF8MB4_BIN);
|
|
meta.set_varchar();
|
|
test_discrete_hash<VEC_TC_STRING>(meta, "string");
|
|
meta.set_urowid();
|
|
test_discrete_hash<VEC_TC_ROWID>(meta, "rowid");
|
|
meta.set_raw();
|
|
test_discrete_hash<VEC_TC_RAW>(meta, "raw");
|
|
}
|
|
|
|
TEST(ObTestVectorBasicOp, cmp_op)
|
|
{
|
|
ObObjMeta l_meta, r_meta;
|
|
l_meta.set_int();
|
|
r_meta.set_int();
|
|
test_fixed_length_cmp<VEC_TC_INTEGER, VEC_TC_INTEGER, int64_t, int64_t>(l_meta, r_meta,
|
|
"int-int");
|
|
r_meta.set_uint64();
|
|
test_fixed_length_cmp<VEC_TC_INTEGER, VEC_TC_UINTEGER, int64_t, uint64_t>(l_meta, r_meta,
|
|
"int-uint");
|
|
l_meta.set_uint64();
|
|
test_fixed_length_cmp<VEC_TC_UINTEGER, VEC_TC_UINTEGER, uint64_t, uint64_t>(l_meta, r_meta,
|
|
"uint-uint");
|
|
l_meta.set_float(), r_meta.set_float();
|
|
test_fixed_length_cmp<VEC_TC_FLOAT, VEC_TC_FLOAT, float, float>(l_meta, r_meta, "float-float");
|
|
l_meta.set_double(), r_meta.set_double();
|
|
test_fixed_length_cmp<VEC_TC_DOUBLE, VEC_TC_DOUBLE, double, double>(l_meta, r_meta,
|
|
"double-double");
|
|
l_meta.set_timestamp_tz(), r_meta.set_timestamp_tz();
|
|
test_fixed_length_cmp<VEC_TC_TIMESTAMP_TZ, VEC_TC_TIMESTAMP_TZ, ObOTimestampData,
|
|
ObOTimestampData>(l_meta, r_meta, "otimestamp-otimestamp");
|
|
l_meta.set_interval_ds(), r_meta.set_interval_ds();
|
|
test_fixed_length_cmp<VEC_TC_INTERVAL_DS, VEC_TC_INTERVAL_DS, ObIntervalDSValue,
|
|
ObIntervalDSValue>(l_meta, r_meta, "interval-interval");
|
|
|
|
l_meta.set_decimal_int(0), r_meta.set_decimal_int(0);
|
|
test_fixed_length_cmp<VEC_TC_DEC_INT32, VEC_TC_DEC_INT32, int32_t, int32_t>(l_meta, r_meta,
|
|
"dec32-dec32");
|
|
test_fixed_length_cmp<VEC_TC_DEC_INT64, VEC_TC_DEC_INT64, int64_t, int64_t>(l_meta, r_meta,
|
|
"dec64-dec64");
|
|
test_fixed_length_cmp<VEC_TC_DEC_INT128, VEC_TC_DEC_INT128, int128_t, int128_t>(l_meta, r_meta,
|
|
"dec128-dec128");
|
|
test_fixed_length_cmp<VEC_TC_DEC_INT256, VEC_TC_DEC_INT256, int256_t, int256_t>(l_meta, r_meta,
|
|
"dec256-dec256");
|
|
test_fixed_length_cmp<VEC_TC_DEC_INT512, VEC_TC_DEC_INT512, int512_t, int512_t>(l_meta, r_meta,
|
|
"dec512-dec512");
|
|
}
|
|
|
|
} // end namespace share
|
|
} // end namespace oceanbase
|
|
|
|
int main(int argc, char **argv)
|
|
{
|
|
::testing::InitGoogleTest(&argc, argv);
|
|
return RUN_ALL_TESTS();
|
|
} |