diff --git a/deps/oblib/src/common/ob_target_specific.cpp b/deps/oblib/src/common/ob_target_specific.cpp index 5bacbe720..2289a7012 100644 --- a/deps/oblib/src/common/ob_target_specific.cpp +++ b/deps/oblib/src/common/ob_target_specific.cpp @@ -12,36 +12,10 @@ #define USING_LOG_PREFIX COMMON -#include "ob_target_specific.h" -#include "lib/cpu/ob_cpu_topology.h" -#include "lib/oblog/ob_log.h" - namespace oceanbase { namespace common { -uint32_t get_supported_archs() -{ - uint32_t result = 0; - if (ObCpuFlagsCache::support_sse42()) { - result |= static_cast(ObTargetArch::SSE42); - } - if (ObCpuFlagsCache::support_avx()) { - result |= static_cast(ObTargetArch::AVX); - } - if (ObCpuFlagsCache::support_avx2()) { - result |= static_cast(ObTargetArch::AVX2); - } - if (ObCpuFlagsCache::support_avx512()) { - result |= static_cast(ObTargetArch::AVX512); - } - return result; -} -bool is_arch_supported(ObTargetArch arch) -{ - static uint32_t arches = get_supported_archs(); - return arch == ObTargetArch::Default || (arches & static_cast(arch)); -} } // namespace common } // namespace oceanbase \ No newline at end of file diff --git a/deps/oblib/src/common/ob_target_specific.h b/deps/oblib/src/common/ob_target_specific.h index 9732cd04f..95a861f73 100644 --- a/deps/oblib/src/common/ob_target_specific.h +++ b/deps/oblib/src/common/ob_target_specific.h @@ -14,6 +14,7 @@ #define OCEANBASE_COMMON_OB_TARGET_SPECIFIC_H_ #include +#include "lib/cpu/ob_cpu_topology.h" namespace oceanbase { @@ -36,14 +37,14 @@ bool is_arch_supported(ObTargetArch arch); #if defined(__clang__) -#define OB_AVX512_FUNCTION_SPECIFIC_ATTRIBUTE __attribute__((target("sse,sse2,sse3,ssse3,sse4,popcnt,avx,avx2,avx512f,avx512bw"))) +#define OB_AVX512_FUNCTION_SPECIFIC_ATTRIBUTE __attribute__((target("sse,sse2,sse3,ssse3,sse4,popcnt,avx,avx2,avx512f,avx512bw,avx512vl"))) #define OB_AVX2_FUNCTION_SPECIFIC_ATTRIBUTE __attribute__((target("sse,sse2,sse3,ssse3,sse4,popcnt,avx,avx2"))) #define OB_AVX_FUNCTION_SPECIFIC_ATTRIBUTE __attribute__((target("sse,sse2,sse3,ssse3,sse4,popcnt,avx"))) #define OB_SSE42_FUNCTION_SPECIFIC_ATTRIBUTE __attribute__((target("sse,sse2,sse3,ssse3,sse4,popcnt"))) #define OB_DEFAULT_FUNCTION_SPECIFIC_ATTRIBUTE # define OB_BEGIN_AVX512_SPECIFIC_CODE \ - _Pragma("clang attribute push(__attribute__((target(\"sse,sse2,sse3,ssse3,sse4,popcnt,avx,avx2,avx512f,avx512bw\"))),apply_to=function)") + _Pragma("clang attribute push(__attribute__((target(\"sse,sse2,sse3,ssse3,sse4,popcnt,avx,avx2,avx512f,avx512bw,avx512vl\"))),apply_to=function)") # define OB_BEGIN_AVX2_SPECIFIC_CODE \ _Pragma("clang attribute push(__attribute__((target(\"sse,sse2,sse3,ssse3,sse4,popcnt,avx,avx2\"))),apply_to=function)") # define OB_BEGIN_AVX_SPECIFIC_CODE \ @@ -56,7 +57,7 @@ bool is_arch_supported(ObTargetArch arch); # define OB_DUMMY_FUNCTION_DEFINITION [[maybe_unused]] void _dummy_function_definition(); #else -#define OB_AVX512_FUNCTION_SPECIFIC_ATTRIBUTE __attribute__((target("sse,sse2,sse3,ssse3,sse4,popcnt,avx,avx2,avx512f,avx512bw,tune=native"))) +#define OB_AVX512_FUNCTION_SPECIFIC_ATTRIBUTE __attribute__((target("sse,sse2,sse3,ssse3,sse4,popcnt,avx,avx2,avx512f,avx512bw,avx512vl,tune=native"))) #define OB_AVX2_FUNCTION_SPECIFIC_ATTRIBUTE __attribute__((target("sse,sse2,sse3,ssse3,sse4,popcnt,avx,avx2,tune=native"))) #define OB_AVX_FUNCTION_SPECIFIC_ATTRIBUTE __attribute__((target("sse,sse2,sse3,ssse3,sse4,popcnt,avx,tune=native"))) #define OB_SSE42_FUNCTION_SPECIFIC_ATTRIBUTE __attribute__((target("sse,sse2,sse3,ssse3,sse4,popcnt",tune=native))) @@ -64,7 +65,7 @@ bool is_arch_supported(ObTargetArch arch); # define OB_BEGIN_AVX512_SPECIFIC_CODE \ _Pragma("GCC push_options") \ - _Pragma("GCC target(\"sse,sse2,sse3,ssse3,sse4,popcnt,avx,avx2,avx512f,avx512bw,tune=native\")") + _Pragma("GCC target(\"sse,sse2,sse3,ssse3,sse4,popcnt,avx,avx2,avx512f,avx512bw,avx512vl,tune=native\")") # define OB_BEGIN_AVX2_SPECIFIC_CODE \ _Pragma("GCC push_options") \ _Pragma("GCC target(\"sse,sse2,sse3,ssse3,sse4,popcnt,avx,avx2,tune=native\")") @@ -214,6 +215,30 @@ OB_DECLARE_AVX512_SPECIFIC_CODE( #endif +OB_INLINE uint32_t get_supported_archs() +{ + uint32_t result = 0; + if (ObCpuFlagsCache::support_sse42()) { + result |= static_cast(ObTargetArch::SSE42); + } + if (ObCpuFlagsCache::support_avx()) { + result |= static_cast(ObTargetArch::AVX); + } + if (ObCpuFlagsCache::support_avx2()) { + result |= static_cast(ObTargetArch::AVX2); + } + if (ObCpuFlagsCache::support_avx512()) { + result |= static_cast(ObTargetArch::AVX512); + } + return result; +} + +OB_INLINE bool is_arch_supported(ObTargetArch arch) +{ + static uint32_t arches = get_supported_archs(); + return arch == ObTargetArch::Default || (arches & static_cast(arch)); +} + } // namespace common } // namespace oceanbase #endif // OCEANBASE_COMMON_OB_TARGET_SPECIFIC_H_ diff --git a/deps/oblib/src/lib/container/ob_bitmap.cpp b/deps/oblib/src/lib/container/ob_bitmap.cpp index 1fa897f00..bb6fc9002 100644 --- a/deps/oblib/src/lib/container/ob_bitmap.cpp +++ b/deps/oblib/src/lib/container/ob_bitmap.cpp @@ -23,9 +23,8 @@ namespace oceanbase namespace common { -// Transform 64-byte mask to 64-bit mask - OB_DECLARE_AVX512_SPECIFIC_CODE( +// Transform 64-byte mask to 64-bit mask inline static uint64_t bytes64mask_to_bits64mask( const uint8_t *bytes64, const bool need_flip = false) @@ -37,6 +36,77 @@ inline static uint64_t bytes64mask_to_bits64mask( } return res; } + +inline static void bitmap_get_condensed_index( + const uint8_t *data, + const int32_t size, + int32_t *row_ids, + int32_t &row_count) +{ + int32_t offset = 0; + const uint8_t *pos = data; + const uint8_t *end_pos = data + size; + const uint8_t *end_pos64 = pos + size / 64 * 64; + row_count = 0; + for (; pos < end_pos64; pos += 64) { + uint64_t mask64 = bytes64mask_to_bits64mask(pos); + __m512i start_index = _mm512_set1_epi32(offset); + __m512i base_index = _mm512_setr_epi32(0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15); + base_index = _mm512_add_epi32(base_index, start_index); + uint16_t mask16 = mask64 & 0xFFFF; + _mm512_mask_compressstoreu_epi32(row_ids + row_count, mask16, base_index); + row_count += popcount64(mask16); + const __m512i constant16 = _mm512_set1_epi32(16); + base_index = _mm512_add_epi32(base_index, constant16); + mask16 = (mask64 >> 16) & 0xFFFF; + _mm512_mask_compressstoreu_epi32(row_ids + row_count, mask16, base_index); + row_count += popcount64(mask16); + base_index = _mm512_add_epi32(base_index, constant16); + mask16 = (mask64 >> 32) & 0xFFFF; + _mm512_mask_compressstoreu_epi32(row_ids + row_count, mask16, base_index); + row_count += popcount64(mask16); + base_index = _mm512_add_epi32(base_index, constant16); + mask16 = mask64 >> 48; + _mm512_mask_compressstoreu_epi32(row_ids + row_count, mask16, base_index); + row_count += popcount64(mask16); + + offset += 64; + } + while (pos < end_pos) { + if (*pos) { + row_ids[row_count++] = pos - data; + } + ++pos; + } +} + +inline static void uint64_mask_to_bits_mask( + const uint64_t *data, + const int64_t size, + uint8_t *skip) +{ + const uint64_t *pos = data; + const uint64_t *end_pos = data + size; + const uint64_t *end_pos64 = data + size / 8 * 8; + uint64_t i = 0; + const __m512i zero64 = _mm512_setzero_si512(); + + while (pos < end_pos64) { + __m512i v = _mm512_loadu_si512(pos); + skip[i++] |= _mm512_cmp_epi64_mask(v, zero64, 0); + pos += 8; + } + + uint64_t *skip64 = reinterpret_cast(skip); + + while (pos < end_pos) { + if (*pos == 0) { + i = pos - data; + skip64[i / 64] |= 1LU << (i % 64); + } + ++pos; + } +} ) OB_DECLARE_AVX2_SPECIFIC_CODE( @@ -229,7 +299,7 @@ inline static void bitmap_next_valid_idx( } inline static void bitmap_get_row_ids( - int64_t *row_ids, + int32_t *row_ids, int64_t &row_count, int64_t &from, const int64_t to, @@ -263,6 +333,32 @@ inline static void bitmap_get_row_ids( } } +inline static void bitmap_get_condensed_index( + const uint8_t *data, + const int32_t size, + int32_t *row_ids, + int32_t &row_count) +{ + const uint8_t *pos = data; + const uint8_t *end_pos = data + size; + const uint8_t *end_pos64 = pos + size / 64 * 64; + row_count = 0; + for (; pos < end_pos64; pos += 64) { + uint64_t mask = bytes64mask_to_bits64mask(pos); + while (mask) { + uint64_t index = countr_zero64(mask); + mask = blsr64(mask); + row_ids[row_count++] = pos - data + index; + } + } + while (pos < end_pos) { + if (*pos) { + row_ids[row_count++] = pos - data; + } + ++pos; + } +} + inline static void bitmap_to_bits_mask( const int64_t from, const int64_t to, @@ -316,36 +412,6 @@ inline static void uint64_mask_to_bits_mask( } ) -OB_DECLARE_AVX512_SPECIFIC_CODE( -inline static void uint64_mask_to_bits_mask( - const uint64_t *data, - const int64_t size, - uint8_t *skip) -{ - const uint64_t *pos = data; - const uint64_t *end_pos = data + size; - const uint64_t *end_pos64 = data + size / 8 * 8; - uint64_t i = 0; - const __m512i zero64 = _mm512_setzero_si512(); - - while (pos < end_pos64) { - __m512i v = _mm512_loadu_si512(pos); - skip[i++] |= _mm512_cmp_epi64_mask(v, zero64, 0); - pos += 8; - } - - uint64_t *skip64 = reinterpret_cast(skip); - - while (pos < end_pos) { - if (*pos == 0) { - i = pos - data; - skip64[i / 64] |= 1LU << (i % 64); - } - ++pos; - } -} -) - class SelectAndOp { public: OB_INLINE static uint8_t apply(uint8_t a, uint8_t b) { return a & b; } @@ -398,8 +464,8 @@ struct SelectOpImpl }; ObBitmap::ObBitmap(ObIAllocator &allocator) - : is_inited_(false), valid_bytes_(0), capacity_(0), - data_(nullptr), allocator_(allocator) + : is_inited_(false), valid_bytes_(0), capacity_(0), data_(nullptr), + condensed_cnt_(-1), condensed_idx_(nullptr), allocator_(allocator) {} ObBitmap::~ObBitmap() @@ -473,7 +539,7 @@ int64_t ObBitmap::next_valid_idx(const int64_t start, } int ObBitmap::get_row_ids( - int64_t *row_ids, + int32_t *row_ids, int64_t &row_count, int64_t &from, const int64_t to, @@ -481,9 +547,10 @@ int ObBitmap::get_row_ids( const int64_t id_offset) const { int ret = OB_SUCCESS; - if (OB_UNLIKELY(from < 0 || to > valid_bytes_ || to < from || limit <= 0)) { + if (OB_UNLIKELY(from < 0 || to > valid_bytes_ || to < from || limit <= 0 || from < id_offset)) { ret = OB_INVALID_ARGUMENT; - LIB_LOG(WARN, "Invalid from or to when get row ids", K(ret), K(from), K(to), K_(valid_bytes), K(limit)); + LIB_LOG(WARN, "Invalid from or to when get row ids", K(ret), K(from), K(to), K_(valid_bytes), + K(limit), K(id_offset)); #if OB_USE_MULTITARGET_CODE } else if (common::is_arch_supported(ObTargetArch::AVX2)) { common::specific::avx2::bitmap_get_row_ids(row_ids, row_count, from, to, limit, id_offset, data_); @@ -625,15 +692,14 @@ int ObBitmap::reserve(size_type capacity) ret = OB_ALLOCATE_MEMORY_FAILED; LIB_LOG(WARN, "Failed to alloc memory for bitmap", K(ret), K(new_size)); } else { - if (nullptr != data_) { - allocator_.free(data_); - } + destroy(); capacity_ = new_size; data_ = new_data; } } if (OB_SUCC(ret)) { valid_bytes_ = capacity; + condensed_cnt_ = -1; } return ret; } @@ -680,6 +746,7 @@ void ObBitmap::reuse(const bool is_all_true) } else { MEMSET(static_cast(data_), 0, valid_bytes_); } + condensed_cnt_ = -1; } int ObBitmap::set_bitmap_batch(const int64_t offset, const int64_t count, const bool value) @@ -822,6 +889,33 @@ void ObBitmap::filter( #endif } +int ObBitmap::generate_condensed_index() +{ + int ret = OB_SUCCESS; + if (nullptr == condensed_idx_) { + void *buf = nullptr; + if (OB_ISNULL(buf = allocator_.alloc(sizeof(int32_t) * capacity()))) { + ret = common::OB_ALLOCATE_MEMORY_FAILED; + LIB_LOG(WARN, "fail to alloc row_ids", K(ret), K(capacity())); + } else { + condensed_idx_ = reinterpret_cast(buf); + } + } + + if (OB_FAIL(ret)) { +#if OB_USE_MULTITARGET_CODE + // enable when avx512 is more efficient + //} else if (common::is_arch_supported(ObTargetArch::AVX512)) { + // common::specific::avx512::bitmap_get_condensed_index(data_, valid_bytes_, condensed_idx_, condensed_cnt_); + } else if (common::is_arch_supported(ObTargetArch::AVX2)) { + common::specific::avx2::bitmap_get_condensed_index(data_, valid_bytes_, condensed_idx_, condensed_cnt_); +#endif + } else { + common::specific::normal::bitmap_get_condensed_index(data_, valid_bytes_, condensed_idx_, condensed_cnt_); + } + return ret; +} + } //end namespace oceanbase } //end namespace common diff --git a/deps/oblib/src/lib/container/ob_bitmap.h b/deps/oblib/src/lib/container/ob_bitmap.h index f27855ed1..7dd991bc6 100644 --- a/deps/oblib/src/lib/container/ob_bitmap.h +++ b/deps/oblib/src/lib/container/ob_bitmap.h @@ -98,12 +98,16 @@ public: int copy_from(const ObBitmap &bitmap, const int64_t start, const int64_t count); void reuse(const bool is_all_true = false); int get_row_ids( - int64_t *row_ids, + int32_t *row_ids, int64_t &row_count, int64_t &from, const int64_t to, const int64_t limit, const int64_t id_offset = 0) const; + int generate_condensed_index(); + OB_INLINE bool is_index_generated() { return -1 != condensed_cnt_; } + OB_INLINE const int32_t *get_condensed_idx() { return condensed_idx_; } + OB_INLINE int32_t get_condensed_cnt() { return condensed_cnt_; } OB_INLINE size_type capacity() const { @@ -154,10 +158,12 @@ private: private: bool is_inited_; - int64_t valid_bytes_; - int64_t capacity_; + int32_t valid_bytes_; + int32_t capacity_; // Make sure that data_[i] can only be equal to 0x00 or 0x01 when i is from 0 to (valid_bytes_ - 1). uint8_t *data_; + int32_t condensed_cnt_; + int32_t *condensed_idx_; ObIAllocator &allocator_; }; @@ -175,7 +181,15 @@ OB_INLINE void ObBitmap::destroy() { if (nullptr != data_) { allocator_.free(data_); + data_ = nullptr; } + if (nullptr != condensed_idx_) { + allocator_.free(condensed_idx_); + condensed_idx_ = nullptr; + } + valid_bytes_ = 0; + capacity_ = 0; + condensed_cnt_ = -1; } OB_INLINE bool ObBitmap::empty() const diff --git a/deps/oblib/unittest/lib/container/test_bitmap.cpp b/deps/oblib/unittest/lib/container/test_bitmap.cpp index b44102e23..e70911494 100644 --- a/deps/oblib/unittest/lib/container/test_bitmap.cpp +++ b/deps/oblib/unittest/lib/container/test_bitmap.cpp @@ -187,7 +187,7 @@ TEST_F(TestObBitmap, get_row_ids) for (int64_t i = 4090; i < 4100; ++i) { EXPECT_EQ(OB_SUCCESS, bitmap.set(i, true)); } - int64_t *row_ids = reinterpret_cast(allocator_.alloc(800)); + int32_t *row_ids = reinterpret_cast(allocator_.alloc(800)); int64_t row_count = 0; int64_t from = 0; EXPECT_EQ(OB_SUCCESS, bitmap.get_row_ids(row_ids, row_count, from, 3000, INT64_MAX)); @@ -211,6 +211,7 @@ TEST_F(TestObBitmap, get_row_ids) EXPECT_EQ(4090 + i - 2, row_ids[i]); } + int ret = OB_SUCCESS; from = 0; EXPECT_EQ(OB_SUCCESS, bitmap.get_row_ids(row_ids, row_count, from, 8193, 10)); EXPECT_EQ(10, row_count); diff --git a/mittest/mtlenv/storage/blocksstable/test_block_cache.cpp b/mittest/mtlenv/storage/blocksstable/test_block_cache.cpp index 075314816..8ef27f28c 100644 --- a/mittest/mtlenv/storage/blocksstable/test_block_cache.cpp +++ b/mittest/mtlenv/storage/blocksstable/test_block_cache.cpp @@ -208,16 +208,24 @@ TEST_F(TestObMicroBlockCache, test_block_cache) // multi block io + int64_t block_count = 0; ObMultiBlockIOParam multi_io_param; - multi_io_param.micro_index_infos_ = µ_idx_infos; - multi_io_param.start_index_ = 0; - multi_io_param.block_count_ = micro_idx_infos.count(); - ASSERT_EQ(OB_SUCCESS, data_block_cache_->prefetch( - MTL_ID(), - data_idx_info.get_macro_id(), - multi_io_param, - context_.query_flag_.is_use_block_cache(), - multi_io_handle)); + multi_io_param.row_header_ = micro_idx_infos.at(0).row_header_; + multi_io_param.micro_infos_.set_allocator(&allocator_); + multi_io_param.micro_infos_.prepare_reallocate(micro_idx_infos.count()); + while (block_count < 16 && block_count < micro_idx_infos.count()) { + multi_io_param.micro_infos_[block_count].set( + micro_idx_infos.at(block_count).get_block_offset(), micro_idx_infos.at(block_count).get_block_size()); + multi_io_param.data_cache_size_ += micro_idx_infos.at(block_count).get_block_size(); + multi_io_param.micro_block_count_++; + block_count++; + } + ASSERT_EQ(OB_SUCCESS, data_block_cache_->prefetch_multi_block( + MTL_ID(), + data_idx_info.get_macro_id(), + multi_io_param, + context_.query_flag_.is_use_block_cache(), + multi_io_handle)); ASSERT_EQ(OB_SUCCESS, multi_io_handle.wait()); const ObMultiBlockIOResult *io_result = reinterpret_cast(multi_io_handle.get_buffer()); @@ -225,8 +233,8 @@ TEST_F(TestObMicroBlockCache, test_block_cache) int64_t idx = 0; ObMicroBlockData data_block_data; - while (idx != micro_idx_infos.count()) { - ASSERT_EQ(OB_SUCCESS, io_result->get_block_data(idx, data_block_data)); + while (idx != block_count) { + ASSERT_EQ(OB_SUCCESS, io_result->get_block_data(idx, multi_io_param.micro_infos_[idx], data_block_data)); ASSERT_TRUE(data_block_data.is_valid()); ASSERT_EQ(ObMicroBlockData::DATA_BLOCK, data_block_data.type_); ASSERT_EQ(data_block_data.get_micro_header()->row_count_, micro_idx_infos[idx].get_row_count()); diff --git a/mittest/mtlenv/storage/blocksstable/test_cg_group_by_scanner.cpp b/mittest/mtlenv/storage/blocksstable/test_cg_group_by_scanner.cpp index dc3ecd5b8..52bb9b387 100644 --- a/mittest/mtlenv/storage/blocksstable/test_cg_group_by_scanner.cpp +++ b/mittest/mtlenv/storage/blocksstable/test_cg_group_by_scanner.cpp @@ -415,7 +415,7 @@ TEST_F(TestCGGroupByScanner, test_decide_group_size) ASSERT_EQ(OB_SUCCESS, group_by_scanner.init_group_by_info()); int64_t start = 0; int64_t locate_count = row_cnt_; - ASSERT_EQ(OB_SUCCESS, group_by_scanner.locate(ObCSRange(start, locate_count))); + ASSERT_EQ(OB_SUCCESS, group_by_scanner.locate_micro_index(ObCSRange(start, locate_count))); int64_t group_size = 0; ASSERT_EQ(OB_SUCCESS, group_by_scanner.decide_group_size(group_size)); ASSERT_EQ(500, group_size); @@ -441,7 +441,7 @@ TEST_F(TestCGGroupByScanner, test_decide_can_group_by) int64_t start = 0; int64_t locate_count = row_cnt_; - ASSERT_EQ(OB_SUCCESS, group_by_scanner.locate(ObCSRange(start, locate_count))); + ASSERT_EQ(OB_SUCCESS, group_by_scanner.locate_micro_index(ObCSRange(start, locate_count))); int64_t group_size = 0; ASSERT_EQ(OB_SUCCESS, group_by_scanner.decide_group_size(group_size)); ASSERT_EQ(500, group_size); @@ -472,7 +472,7 @@ TEST_F(TestCGGroupByScanner, test_read_distinct) int64_t start = 0; int64_t locate_count = row_cnt_; - ASSERT_EQ(OB_SUCCESS, group_by_scanner.locate(ObCSRange(start, locate_count))); + ASSERT_EQ(OB_SUCCESS, group_by_scanner.locate_micro_index(ObCSRange(start, locate_count))); int64_t group_size = 0; ASSERT_EQ(OB_SUCCESS, group_by_scanner.decide_group_size(group_size)); ASSERT_EQ(500, group_size); @@ -513,7 +513,7 @@ TEST_F(TestCGGroupByScanner, test_read_reference) int64_t start = 0; int64_t locate_count = row_cnt_; - ASSERT_EQ(OB_SUCCESS, group_by_scanner.locate(ObCSRange(start, locate_count))); + ASSERT_EQ(OB_SUCCESS, group_by_scanner.locate_micro_index(ObCSRange(start, locate_count))); int64_t group_size = 0; ASSERT_EQ(OB_SUCCESS, group_by_scanner.decide_group_size(group_size)); ASSERT_EQ(500, group_size); @@ -563,7 +563,7 @@ TEST_F(TestCGGroupByScanner, test_calc_aggregate_group_by) int64_t start = 0; int64_t locate_count = row_cnt_; - ASSERT_EQ(OB_SUCCESS, group_by_scanner.locate(ObCSRange(start, locate_count))); + ASSERT_EQ(OB_SUCCESS, group_by_scanner.locate_micro_index(ObCSRange(start, locate_count))); int64_t group_size = 0; ASSERT_EQ(OB_SUCCESS, group_by_scanner.decide_group_size(group_size)); ASSERT_EQ(500, group_size); @@ -642,7 +642,7 @@ TEST_F(TestCGGroupByScanner, test_calc_aggregate_group_by_with_bitmap) int64_t start = 0; int64_t locate_count = row_cnt_; - ASSERT_EQ(OB_SUCCESS, group_by_scanner.locate(ObCSRange(start, locate_count))); + ASSERT_EQ(OB_SUCCESS, group_by_scanner.locate_micro_index(ObCSRange(start, locate_count))); int64_t group_size = 0; ASSERT_EQ(OB_SUCCESS, group_by_scanner.decide_group_size(group_size)); ASSERT_EQ(500, group_size); diff --git a/mittest/mtlenv/storage/blocksstable/test_cg_scanner.cpp b/mittest/mtlenv/storage/blocksstable/test_cg_scanner.cpp index 106f4de71..420f68d80 100644 --- a/mittest/mtlenv/storage/blocksstable/test_cg_scanner.cpp +++ b/mittest/mtlenv/storage/blocksstable/test_cg_scanner.cpp @@ -583,8 +583,8 @@ TEST_F(TestCGScanner, test_filter) pd_filter.datum_buf_ = new (buf3) blocksstable::ObStorageDatum[1](); buf3 = allocator_.alloc(sizeof(char *) * pd_filter.batch_size_); pd_filter.cell_data_ptrs_ = reinterpret_cast(buf3); - buf3 = allocator_.alloc(sizeof(int64_t) * pd_filter.batch_size_); - pd_filter.row_ids_ = reinterpret_cast(buf3); + buf3 = allocator_.alloc(sizeof(int32_t) * pd_filter.batch_size_); + pd_filter.row_ids_ = reinterpret_cast(buf3); pd_filter.skip_bit_ = to_bit_vector(allocator_.alloc(ObBitVector::memory_size(256))); pd_filter.is_inited_ = true; diff --git a/mittest/mtlenv/storage/blocksstable/test_cs_cg_group_by_scanner.cpp b/mittest/mtlenv/storage/blocksstable/test_cs_cg_group_by_scanner.cpp index 8f3d4e059..2be2b72ad 100644 --- a/mittest/mtlenv/storage/blocksstable/test_cs_cg_group_by_scanner.cpp +++ b/mittest/mtlenv/storage/blocksstable/test_cs_cg_group_by_scanner.cpp @@ -416,7 +416,7 @@ TEST_F(TestCSCGGroupByScanner, test_decide_group_size) ASSERT_EQ(OB_SUCCESS, group_by_scanner.init_group_by_info()); int64_t start = 0; int64_t locate_count = row_cnt_; - ASSERT_EQ(OB_SUCCESS, group_by_scanner.locate(ObCSRange(start, locate_count))); + ASSERT_EQ(OB_SUCCESS, group_by_scanner.locate_micro_index(ObCSRange(start, locate_count))); int64_t group_size = 0; ASSERT_EQ(OB_SUCCESS, group_by_scanner.decide_group_size(group_size)); ASSERT_EQ(500, group_size); @@ -442,7 +442,7 @@ TEST_F(TestCSCGGroupByScanner, test_decide_can_group_by) int64_t start = 0; int64_t locate_count = row_cnt_; - ASSERT_EQ(OB_SUCCESS, group_by_scanner.locate(ObCSRange(start, locate_count))); + ASSERT_EQ(OB_SUCCESS, group_by_scanner.locate_micro_index(ObCSRange(start, locate_count))); int64_t group_size = 0; ASSERT_EQ(OB_SUCCESS, group_by_scanner.decide_group_size(group_size)); ASSERT_EQ(500, group_size); @@ -473,7 +473,7 @@ TEST_F(TestCSCGGroupByScanner, test_read_distinct) int64_t start = 0; int64_t locate_count = row_cnt_; - ASSERT_EQ(OB_SUCCESS, group_by_scanner.locate(ObCSRange(start, locate_count))); + ASSERT_EQ(OB_SUCCESS, group_by_scanner.locate_micro_index(ObCSRange(start, locate_count))); int64_t group_size = 0; ASSERT_EQ(OB_SUCCESS, group_by_scanner.decide_group_size(group_size)); ASSERT_EQ(500, group_size); @@ -514,7 +514,7 @@ TEST_F(TestCSCGGroupByScanner, test_read_reference) int64_t start = 0; int64_t locate_count = row_cnt_; - ASSERT_EQ(OB_SUCCESS, group_by_scanner.locate(ObCSRange(start, locate_count))); + ASSERT_EQ(OB_SUCCESS, group_by_scanner.locate_micro_index(ObCSRange(start, locate_count))); int64_t group_size = 0; ASSERT_EQ(OB_SUCCESS, group_by_scanner.decide_group_size(group_size)); ASSERT_EQ(500, group_size); @@ -560,7 +560,7 @@ TEST_F(TestCSCGGroupByScanner, test_calc_aggregate_group_by) int64_t start = 0; int64_t locate_count = row_cnt_; - ASSERT_EQ(OB_SUCCESS, group_by_scanner.locate(ObCSRange(start, locate_count))); + ASSERT_EQ(OB_SUCCESS, group_by_scanner.locate_micro_index(ObCSRange(start, locate_count))); int64_t group_size = 0; ASSERT_EQ(OB_SUCCESS, group_by_scanner.decide_group_size(group_size)); ASSERT_EQ(500, group_size); @@ -644,7 +644,7 @@ TEST_F(TestCSCGGroupByScanner, test_calc_aggregate_group_by_with_bitmap) int64_t start = 0; int64_t locate_count = row_cnt_; - ASSERT_EQ(OB_SUCCESS, group_by_scanner.locate(ObCSRange(start, locate_count))); + ASSERT_EQ(OB_SUCCESS, group_by_scanner.locate_micro_index(ObCSRange(start, locate_count))); int64_t group_size = 0; ASSERT_EQ(OB_SUCCESS, group_by_scanner.decide_group_size(group_size)); ASSERT_EQ(500, group_size); diff --git a/src/rootserver/ob_root_service.cpp b/src/rootserver/ob_root_service.cpp index 09fea4786..be2e5ebb7 100755 --- a/src/rootserver/ob_root_service.cpp +++ b/src/rootserver/ob_root_service.cpp @@ -2016,6 +2016,8 @@ int ObRootService::execute_bootstrap(const obrpc::ObBootstrapArg &arg) LOG_WARN("fail to set one phase commit config", K(ret)); } else if (OB_FAIL(disable_dbms_job())) { LOG_WARN("failed to update _enable_dbms_job_package", K(ret)); + } else if (OB_FAIL(set_bloom_filter_ratio_config_())) { + LOG_WARN("failed to update _bloom_filter_ratio", K(ret)); } if (OB_SUCC(ret)) { @@ -11566,6 +11568,18 @@ int ObRootService::disable_dbms_job() return ret; } +int ObRootService::set_bloom_filter_ratio_config_() +{ + int64_t affected_rows = 0; + int ret = OB_SUCCESS; + if (OB_FAIL(sql_proxy_.write("ALTER SYSTEM SET _bloom_filter_ratio = 3;", affected_rows))) { + LOG_WARN("update _bloom_filter_ratio failed", K(ret)); + } else if (OB_FAIL(check_config_result("_bloom_filter_ratio", "3"))) { + LOG_WARN("failed to check config same", K(ret)); + } + return ret; +} + int ObRootService::handle_recover_table(const obrpc::ObRecoverTableArg &arg) { int ret = OB_SUCCESS; diff --git a/src/rootserver/ob_root_service.h b/src/rootserver/ob_root_service.h index cbaa9e306..b4f114e82 100644 --- a/src/rootserver/ob_root_service.h +++ b/src/rootserver/ob_root_service.h @@ -918,6 +918,7 @@ private: int set_cpu_quota_concurrency_config_(); int set_enable_trace_log_(); int disable_dbms_job(); + int set_bloom_filter_ratio_config_(); int try_notify_switch_leader(const obrpc::ObNotifySwitchLeaderArg::SwitchLeaderComment &comment); int precheck_interval_part(const obrpc::ObAlterTableArg &arg); diff --git a/src/share/aggregate/agg_ctx.h b/src/share/aggregate/agg_ctx.h index 421c1ac05..95a5cdfb5 100644 --- a/src/share/aggregate/agg_ctx.h +++ b/src/share/aggregate/agg_ctx.h @@ -380,6 +380,10 @@ public: const sql::ObBitVector &skip, const sql::EvalBound &bound, char *agg_cell, const RowSelector row_sel = RowSelector{}) = 0; + + inline virtual int add_batch_for_multi_groups(RuntimeContext &agg_ctx, AggrRowPtr *agg_rows, + RowSelector &row_sel, const int64_t batch_size, + const int32_t agg_col_id) = 0; inline virtual int add_one_row(RuntimeContext &agg_ctx, const int64_t batch_idx, const int64_t batch_size, const bool is_null, const char *data, const int32_t data_len, int32_t agg_col_idx, char *agg_cell) = 0; diff --git a/src/share/aggregate/iaggregate.h b/src/share/aggregate/iaggregate.h index 7fe2ae553..6543fed11 100644 --- a/src/share/aggregate/iaggregate.h +++ b/src/share/aggregate/iaggregate.h @@ -177,6 +177,87 @@ public: return ret; } + inline int add_batch_for_multi_groups(RuntimeContext &agg_ctx, AggrRowPtr *agg_rows, + RowSelector &row_sel, const int64_t batch_size, + const int32_t agg_col_id) override + { +#define INNER_ADD(vec_tc) \ + case (vec_tc): { \ + ret = inner_add_for_multi_groups>>( \ + agg_ctx, agg_rows, row_sel, batch_size, agg_col_id, param_expr->get_vector(eval_ctx)); \ + } break + + int ret = OB_SUCCESS; + ObAggrInfo &aggr_info = agg_ctx.aggr_infos_.at(agg_col_id); + ObEvalCtx &eval_ctx = agg_ctx.eval_ctx_; + VectorFormat fmt = VEC_INVALID; + ObExpr *param_expr = nullptr; + Derived *derived_this = static_cast(this); +#ifndef NDEBUG + int64_t mock_skip_data = 0; + ObBitVector *mock_skip = to_bit_vector(&mock_skip_data); + helper::print_input_rows(row_sel, *mock_skip, sql::EvalBound(), aggr_info, + aggr_info.is_implicit_first_aggr(), eval_ctx, this, agg_col_id); +#endif + if (aggr_info.is_implicit_first_aggr()) { + fmt = aggr_info.expr_->get_format(eval_ctx); + param_expr = aggr_info.expr_; + } else if (aggr_info.param_exprs_.count() == 1) { + param_expr = aggr_info.param_exprs_.at(0); + fmt = param_expr->get_format(eval_ctx); + } + if (OB_ISNULL(param_expr)) { // count(*) + for (int i = 0; OB_SUCC(ret) && i < row_sel.size(); i++) { + int batch_idx = row_sel.index(i); + char *agg_cell = agg_ctx.row_meta().locate_cell_payload(agg_col_id, agg_rows[batch_idx]); + if (OB_FAIL(derived_this->add_one_row(agg_ctx, batch_idx, batch_size, false, nullptr, 0, + agg_col_id, agg_cell))) { + SQL_LOG(WARN, "inner add one row failed", K(ret)); + } + } + } else { + VecValueTypeClass vec_tc = param_expr->get_vec_value_tc(); + switch(fmt) { + case common::VEC_UNIFORM: { + ret = inner_add_for_multi_groups>( + agg_ctx, agg_rows, row_sel, batch_size, agg_col_id, param_expr->get_vector(eval_ctx)); + break; + } + case common::VEC_UNIFORM_CONST: { + ret = inner_add_for_multi_groups>( + agg_ctx, agg_rows, row_sel, batch_size, agg_col_id, param_expr->get_vector(eval_ctx)); + break; + } + case common::VEC_DISCRETE: { + ret = inner_add_for_multi_groups( + agg_ctx, agg_rows, row_sel, batch_size, agg_col_id, param_expr->get_vector(eval_ctx)); + break; + } + case common::VEC_CONTINUOUS: { + ret = inner_add_for_multi_groups( + agg_ctx, agg_rows, row_sel, batch_size, agg_col_id, param_expr->get_vector(eval_ctx)); + break; + } + case common::VEC_FIXED: { + switch(vec_tc) { + LST_DO_CODE(INNER_ADD, AGG_VEC_TC_LIST); + default: { + ret = OB_ERR_UNEXPECTED; + SQL_LOG(WARN, "unexpected type class", K(vec_tc)); + } + } + break; + } + default: { + ret = OB_ERR_UNEXPECTED; + break; + } + } + } + return ret; +#undef INNER_ADD + } + int collect_batch_group_results(RuntimeContext &agg_ctx, const int32_t agg_col_id, const int32_t cur_group_id, const int32_t output_start_idx, const int32_t expect_batch_size, int32_t &output_size, @@ -290,6 +371,28 @@ public: protected: + template + int inner_add_for_multi_groups(RuntimeContext &agg_ctx, AggrRowPtr *agg_rows, RowSelector &row_sel, + const int64_t batch_size, const int32_t agg_col_id, + ObIVector *ivec) + { + int ret = OB_SUCCESS; + ColumnFmt *param_vec = static_cast(ivec); + bool is_null = false; + const char *payload = nullptr; + int32_t len = 0; + Derived *derived_this = static_cast(this); + for (int i = 0; OB_SUCC(ret) && i < row_sel.size(); i++) { + int64_t batch_idx = row_sel.index(i); + param_vec->get_payload(batch_idx, is_null, payload, len); + char *agg_cell = agg_ctx.row_meta().locate_cell_payload(agg_col_id, agg_rows[batch_idx]); + if (OB_FAIL(derived_this->add_one_row(agg_ctx, batch_idx, batch_size, is_null, payload, len, + agg_col_id, agg_cell))) { + SQL_LOG(WARN, "inner add one row failed", K(ret)); + } + } + return ret; + } template int add_batch_rows(RuntimeContext &agg_ctx, const sql::ObBitVector &skip, const sql::EvalBound &bound, const ObExpr ¶m_expr, diff --git a/src/share/aggregate/processor.cpp b/src/share/aggregate/processor.cpp index b588225af..911070cce 100644 --- a/src/share/aggregate/processor.cpp +++ b/src/share/aggregate/processor.cpp @@ -27,22 +27,31 @@ int Processor::init() int ret = OB_SUCCESS; if (inited_) { LOG_DEBUG("already inited, do nothing"); - } else if (agg_ctx_.aggr_infos_.count() <= 0) { - // do nothing - } else if (OB_UNLIKELY(agg_ctx_.aggr_infos_.count() >= MAX_SUPPORTED_AGG_CNT)) { - ret = OB_NOT_SUPPORTED; - SQL_LOG(WARN, "too many aggregations, not supported", K(ret)); - } else if (OB_FAIL(aggregates_.reserve(agg_ctx_.aggr_infos_.count()))) { - SQL_LOG(WARN, "reserved allocator failed", K(ret)); - } else if (OB_FAIL(helper::init_aggregates(agg_ctx_, allocator_, aggregates_))) { - SQL_LOG(WARN, "init aggregates failed", K(ret)); - } else if (OB_FAIL(add_one_row_fns_.prepare_allocate(agg_ctx_.aggr_infos_.count()))) { - SQL_LOG(WARN, "prepare allocate elements failed", K(ret)); } else { - clear_add_one_row_fns(); - } - if (OB_SUCC(ret)) { - inited_ = true; + if (OB_ISNULL(row_selector_ = (uint16_t *)allocator_.alloc( + sizeof(uint16_t) * agg_ctx_.eval_ctx_.max_batch_size_))) { + ret = OB_ALLOCATE_MEMORY_FAILED; + LOG_WARN("allocate memory failed", K(ret)); + } else { + MEMSET(row_selector_, 0, sizeof(uint16_t) * agg_ctx_.eval_ctx_.max_batch_size_); + } + + if (OB_FAIL(ret)) { + } else if (agg_ctx_.aggr_infos_.count() <= 0) { + // do nothing + } else if (OB_UNLIKELY(agg_ctx_.aggr_infos_.count() >= MAX_SUPPORTED_AGG_CNT)) { + ret = OB_NOT_SUPPORTED; + SQL_LOG(WARN, "too many aggregations, not supported", K(ret)); + } else if (OB_FAIL(aggregates_.reserve(agg_ctx_.aggr_infos_.count()))) { + SQL_LOG(WARN, "reserved allocator failed", K(ret)); + } else if (OB_FAIL(helper::init_aggregates(agg_ctx_, allocator_, aggregates_))) { + SQL_LOG(WARN, "init aggregates failed", K(ret)); + } else if (OB_FAIL(add_one_row_fns_.prepare_allocate(agg_ctx_.aggr_infos_.count()))) { + SQL_LOG(WARN, "prepare allocate elements failed", K(ret)); + } else { + clear_add_one_row_fns(); + } + if (OB_SUCC(ret)) { inited_ = true; } } return ret; @@ -80,6 +89,7 @@ void Processor::destroy() } fast_single_row_aggregates_.reset(); allocator_.reset(); + row_selector_ = nullptr; inited_ = false; } diff --git a/src/share/aggregate/processor.h b/src/share/aggregate/processor.h index 6a33a441c..f28ddf8e5 100644 --- a/src/share/aggregate/processor.h +++ b/src/share/aggregate/processor.h @@ -36,7 +36,7 @@ public: agg_ctx_(eval_ctx, tenant_id, aggr_infos, label, allocator_), aggregates_(allocator_, aggr_infos.count()), fast_single_row_aggregates_(allocator_, aggr_infos.count()), extra_rt_info_buf_(nullptr), - cur_extra_rt_info_idx_(0), add_one_row_fns_(allocator_, aggr_infos.count()) + cur_extra_rt_info_idx_(0), add_one_row_fns_(allocator_, aggr_infos.count()), row_selector_(nullptr) {} ~Processor() { destroy(); } int init(); @@ -70,6 +70,29 @@ public: return ret; } + inline int add_batch_for_multi_groups(const int32_t start_agg_id, const int32_t end_agg_id, + AggrRowPtr *agg_rows, const int64_t batch_size) + { + int ret = OB_SUCCESS; + int size = 0; + OB_ASSERT(batch_size <= agg_ctx_.eval_ctx_.max_batch_size_); + if (OB_ISNULL(row_selector_)) { + ret = OB_ERR_UNEXPECTED; + SQL_LOG(WARN, "unexpected null selector", K(ret)); + } + for (int i = 0; OB_SUCC(ret) && i < batch_size; i++) { + if (OB_NOT_NULL(agg_rows[i])) { row_selector_[size++] = i; } + } + RowSelector iter(row_selector_, size); + for (int col_id = start_agg_id; OB_SUCC(ret) && col_id < end_agg_id; col_id++) { + if (OB_FAIL(aggregates_.at(col_id)->add_batch_for_multi_groups(agg_ctx_, agg_rows, iter, + batch_size, col_id))) { + SQL_LOG(WARN, "add batch for multi groups failed", K(ret)); + } + } + return ret; + } + int collect_group_results(const RowMeta &row_meta, const ObIArray &group_exprs, const int32_t output_batch_size, ObBatchRows &output_brs, int64_t &cur_group_id); @@ -220,6 +243,7 @@ private: char *extra_rt_info_buf_; int32_t cur_extra_rt_info_idx_; ObFixedArray add_one_row_fns_; + uint16_t *row_selector_; // ObFixedArray }; } // end aggregate diff --git a/src/share/ob_i_tablet_scan.cpp b/src/share/ob_i_tablet_scan.cpp index ab9b0edd1..a195a5426 100644 --- a/src/share/ob_i_tablet_scan.cpp +++ b/src/share/ob_i_tablet_scan.cpp @@ -60,8 +60,10 @@ DEF_TO_STRING(ObVTableScanParam) N_WAIT, for_update_wait_timeout_, N_FROZEN_VERSION, frozen_version_, K_(is_get), + K_(pd_storage_flag), KPC_(output_exprs), KPC_(op_filters), + K_(table_scan_opt), K_(external_file_format), K_(external_file_location)); J_OBJ_END(); diff --git a/src/share/ob_i_tablet_scan.h b/src/share/ob_i_tablet_scan.h index 5697efac2..5c1048014 100644 --- a/src/share/ob_i_tablet_scan.h +++ b/src/share/ob_i_tablet_scan.h @@ -266,6 +266,7 @@ ObVTableScanParam() : pd_storage_filters_(nullptr), pd_storage_flag_(false), row2exprs_projector_(NULL), + table_scan_opt_(), ext_file_column_exprs_(NULL), ext_column_convert_exprs_(NULL), schema_guard_(NULL) @@ -340,6 +341,7 @@ ObVTableScanParam() : int32_t pd_storage_flag_; // project storage output row to %output_exprs_ storage::ObRow2ExprsProjector *row2exprs_projector_; + ObTableScanOption table_scan_opt_; // external table const sql::ExprFixedArray *ext_file_column_exprs_; diff --git a/src/share/ob_lob_access_utils.cpp b/src/share/ob_lob_access_utils.cpp index 4ee178c60..388b5d3b4 100644 --- a/src/share/ob_lob_access_utils.cpp +++ b/src/share/ob_lob_access_utils.cpp @@ -1112,7 +1112,11 @@ int ObTextStringResult::calc_buffer_len(int64_t res_len) bool has_extern = lib::is_oracle_mode(); // even oracle may not need extern for temp data ObMemLobExternFlags extern_flags(has_extern); res_len += sizeof(ObLobCommon); - buff_len_ = ObLobLocatorV2::calc_locator_full_len(extern_flags, 0, static_cast(res_len), false); + if (has_extern) { + buff_len_ = ObLobLocatorV2::calc_locator_full_len(extern_flags, 0, static_cast(res_len), false); + } else { + buff_len_ = res_len; // for mysql mode temp lob, we can mock it as disk inrow lob + } } else { ret = OB_NOT_SUPPORTED; LOG_WARN("Lob: out row temp lob not implemented, not support length bigger than 512M", @@ -1193,7 +1197,10 @@ int ObTextStringResult::fill_temp_lob_header(const int64_t res_len) ObString rowkey_str; ObString empty_str; ObLobCommon lob_common; - if (OB_FAIL(locator.fill(TEMP_FULL_LOB, + if (lib::is_mysql_mode()) { + // for mysql mode temp lob, we can mock it as disk inrow lob + MEMCPY(buffer_, &lob_common, sizeof(ObLobCommon)); + } else if (OB_FAIL(locator.fill(TEMP_FULL_LOB, extern_flags, rowkey_str, &lob_common, diff --git a/src/share/parameter/ob_parameter_seed.ipp b/src/share/parameter/ob_parameter_seed.ipp index 752c23281..6362cf8a8 100644 --- a/src/share/parameter/ob_parameter_seed.ipp +++ b/src/share/parameter/ob_parameter_seed.ipp @@ -166,6 +166,10 @@ DEF_STR_WITH_CHECKER(default_table_store_format, OB_TENANT_PARAMETER, "row", "values: row, column, compound", ObParameterAttr(Section::TENANT, Source::DEFAULT, EditLevel::DYNAMIC_EFFECTIVE)); +DEF_INT(storage_rowsets_size, OB_TENANT_PARAMETER, "8192", "(0,1048576]", + "the row number processed by vectorized storage engine within one batch in column storage. Range: (0,1048576]", + ObParameterAttr(Section::TENANT, Source::DEFAULT, EditLevel::DYNAMIC_EFFECTIVE)); + DEF_TIME(weak_read_version_refresh_interval, OB_CLUSTER_PARAMETER, "100ms", "[50ms,)", "the time interval to refresh cluster weak read version " "Range: [50ms, +∞)", @@ -1067,6 +1071,12 @@ DEF_INT(_max_ls_cnt_per_server, OB_TENANT_PARAMETER, "0", "[0, 1024]", "0: the cluster will adapt the max ls number according to the memory size of tenant itself", ObParameterAttr(Section::OBSERVER, Source::DEFAULT, EditLevel::DYNAMIC_EFFECTIVE)); +DEF_CAP(_io_read_batch_size, OB_TENANT_PARAMETER, "0K", "[0K,16M]", "Maximum batch size in one read io request. Range:[0K,16M]", + ObParameterAttr(Section::SSTABLE, Source::DEFAULT, EditLevel::DYNAMIC_EFFECTIVE)); +DEF_INT(_io_read_redundant_limit_percentage, OB_TENANT_PARAMETER, "0", "[0, 99]", + "Maximum percentage of redundant size in one read io request, redundant data means blocks in the middle of the batch that hit in cache or filtered by skipping index but must be read. Range:[0,99]", + ObParameterAttr(Section::SSTABLE, Source::DEFAULT, EditLevel::DYNAMIC_EFFECTIVE)); + // TODO bin.lb: to be remove DEF_CAP(dtl_buffer_size, OB_CLUSTER_PARAMETER, "64K", "[4K,2M]", "to be removed", ObParameterAttr(Section::OBSERVER, Source::DEFAULT, EditLevel::DYNAMIC_EFFECTIVE)); diff --git a/src/share/schema/ob_schema_struct.cpp b/src/share/schema/ob_schema_struct.cpp index 9d55d734d..ffd03b120 100644 --- a/src/share/schema/ob_schema_struct.cpp +++ b/src/share/schema/ob_schema_struct.cpp @@ -7545,10 +7545,8 @@ int ObPartitionUtils::calc_hash_part_idx(const uint64_t val, partition_idx += powN; } } - LOG_TRACE("get hash part idx", K(lbt()), K(ret), K(val), K(part_num), K(N), K(powN), K(partition_idx)); } else { partition_idx = val % part_num; - LOG_TRACE("get hash part idx", K(lbt()), K(ret), K(val), K(part_num), K(partition_idx)); } return ret; } diff --git a/src/share/vector/expr_cmp_func.cpp b/src/share/vector/expr_cmp_func.cpp index d810cbe77..73a7bf985 100644 --- a/src/share/vector/expr_cmp_func.cpp +++ b/src/share/vector/expr_cmp_func.cpp @@ -88,12 +88,6 @@ using nullsafe_cmp_initer = InitCmpSet< static bool g_init_cmp_set = Ob2DArrayConstIniter::init(); -static bool init_row_cmp_double_func() { - ROW_CMP_FUNCS[VEC_TC_DOUBLE][VEC_TC_FIXED_DOUBLE] = VecTCCmpCalc::cmp; - ROW_CMP_FUNCS[VEC_TC_FIXED_DOUBLE][VEC_TC_DOUBLE] = VecTCCmpCalc::cmp; - return true; -} -static bool g_init_row_cmp_double_func = init_row_cmp_double_func(); void VectorCmpExprFuncsHelper::get_cmp_set(const sql::ObDatumMeta &l_meta, const sql::ObDatumMeta &r_meta, @@ -316,7 +310,6 @@ struct EvalVectorCmp return ret; } -#undef VECTOR_CMP_CASE }; struct EvalVectorCmpWithNull @@ -371,8 +364,6 @@ struct EvalVectorCmp: public EvalVectorCmpWithNull {} template struct EvalVectorCmp: public EvalVectorCmpWithNull {}; -#undef CALC_FORMAT - static sql::ObExpr::EvalVectorFunc EVAL_VECTOR_EXPR_CMP_FUNCS[MAX_VEC_TC][MAX_VEC_TC][CO_MAX]; template @@ -406,10 +397,21 @@ struct VectorExprCmpFuncIniter template using cmp_initer = VectorExprCmpFuncIniter< X, Y, - VecTCCmpCalc(X), static_cast(Y)>::defined_>; + VecTCCmpCalc(X), static_cast(Y)>::defined_ + && (!is_fixed_length_vec(static_cast(X)) + || !is_fixed_length_vec(static_cast(Y)))>; static int g_init_eval_vector_expr_cmp_funcs = Ob2DArrayConstIniter::init(); +} // end namespace common +} // end namespace oceanbase + +#include "expr_cmp_func_simd.ipp" + +namespace oceanbase +{ +namespace common +{ sql::ObExpr::EvalVectorFunc VectorCmpExprFuncsHelper::get_eval_vector_expr_cmp_func( const sql::ObDatumMeta &l_meta, const sql::ObDatumMeta &r_meta, const common::ObCmpOp cmp_op) diff --git a/src/share/vector/expr_cmp_func.h b/src/share/vector/expr_cmp_func.h index e5abbb187..d38cb4769 100644 --- a/src/share/vector/expr_cmp_func.h +++ b/src/share/vector/expr_cmp_func.h @@ -16,7 +16,6 @@ #include "common/object/ob_obj_type.h" #include "sql/engine/expr/ob_expr.h" #include "common/object/ob_obj_compare.h" -#include "sql/engine/expr/ob_expr_between.h" namespace oceanbase { diff --git a/src/share/vector/expr_cmp_func_simd.ipp b/src/share/vector/expr_cmp_func_simd.ipp new file mode 100644 index 000000000..f31c4b03f --- /dev/null +++ b/src/share/vector/expr_cmp_func_simd.ipp @@ -0,0 +1,374 @@ +/** + * Copyright (c) 2021 OceanBase + * OceanBase CE is licensed under Mulan PubL v2. + * You can use this software according to the terms and conditions of the Mulan PubL v2. + * You may obtain a copy of Mulan PubL v2 at: + * http://license.coscl.org.cn/MulanPubL-2.0 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PubL v2 for more details. + */ + +#include "expr_cmp_func.h" +#include "share/vector/ob_fixed_length_format.h" +#include "share/datum/ob_datum_util.h" +#include "share/vector/ob_uniform_format.h" +#include "share/vector/vector_basic_op.h" +#include "common/ob_target_specific.h" +#if OB_USE_MULTITARGET_CODE +#include +#include +#endif + +namespace oceanbase +{ +namespace common +{ +using namespace sql; + +OB_DECLARE_AVX512_SPECIFIC_CODE( +template +static int simd_eval_vector(const ObExpr &expr, ObEvalCtx &ctx, const ObBitVector &skip, + const EvalBound &bound); +) + +template +struct FixedVectorCmp +{ + using L_VEC_FIXED_FMT = + typename std::conditional>, + ObVectorBase>::type; + using R_VEC_FIXED_FMT = + typename std::conditional>, + ObVectorBase>::type; + using RES_VEC_FIXED_FMT = ObFixedLengthFormat; + + using L_VEC_UNIFORM_FMT = ObUniformFormat; + using R_VEC_UNIFORM_FMT = ObUniformFormat; + using L_VEC_UNIFORM_CONST_FMT = ObUniformFormat; + using R_VEC_UNIFORM_CONST_FMT = ObUniformFormat; + + static int eval_vector(const ObExpr &expr, ObEvalCtx &ctx, const ObBitVector &skip, + const EvalBound &bound) + { + int ret = OB_SUCCESS; + if (OB_FAIL(eval_cmp_operands(expr, ctx, skip, bound))) { + LOG_WARN("eval cmp operands failed", K(ret)); + } else { + const ObExpr &left = *expr.args_[0]; + const ObExpr &right = *expr.args_[1]; + VectorFormat left_format = left.get_format(ctx); + VectorFormat right_format = right.get_format(ctx); + VectorFormat res_format = expr.get_format(ctx); + LOG_DEBUG("eval vector cmp", K(expr), K(l_tc), K(r_tc), K(cmp_op), K(bound), K(left_format), + K(right_format), K(res_format)); + if (is_valid_format(left_format) && is_valid_format(right_format) && is_valid_format(res_format)) { + switch(CALC_FORMAT(left_format, right_format, res_format)) { + case CALC_FORMAT(VEC_FIXED, VEC_FIXED, VEC_FIXED): { + bool use_simd = (l_tc == r_tc + && static_cast(expr.get_vector(ctx))->get_length() == sizeof(int64_t) + && sizeof(RTCType) <= sizeof(int64_t) + && cmp_op != CO_CMP && !left.get_vector(ctx)->has_null() + && !right.get_vector(ctx)->has_null() & bound.get_all_rows_active()) + && simd_supported(l_tc); + LOG_DEBUG("simd used", K(l_tc), K(r_tc), K(cmp_op), K(left.get_vector(ctx)->has_null()), + K(right.get_vector(ctx)->has_null()), K(bound.get_all_rows_active()), K(use_simd)); +#if OB_USE_MULTITARGET_CODE + if (use_simd && common::is_arch_supported(ObTargetArch::AVX512)) { + ret = common::specific::avx512::simd_eval_vector), cmp_op>( + expr, ctx, skip, bound); + } else { + DO_VECTOR_CMP(L_VEC_FIXED_FMT, R_VEC_FIXED_FMT, RES_VEC_FIXED_FMT); + } +#else + DO_VECTOR_CMP(L_VEC_FIXED_FMT, R_VEC_FIXED_FMT, RES_VEC_FIXED_FMT); +#endif + break; + } + VECTOR_CMP_CASE(VEC_UNIFORM, VEC_FIXED, VEC_FIXED); + VECTOR_CMP_CASE(VEC_UNIFORM, VEC_UNIFORM, VEC_FIXED); + VECTOR_CMP_CASE(VEC_UNIFORM, VEC_UNIFORM_CONST, VEC_FIXED); + VECTOR_CMP_CASE(VEC_UNIFORM_CONST, VEC_FIXED, VEC_FIXED); + VECTOR_CMP_CASE(VEC_UNIFORM_CONST, VEC_UNIFORM, VEC_FIXED); + default: { + DO_VECTOR_CMP(ObVectorBase, ObVectorBase, ObVectorBase); + break; + } + } + } else { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("invalid format", K(left_format), K(right_format), K(res_format)); + } + } + return ret; + } +private: + static constexpr bool simd_supported(VecValueTypeClass vec_tc) + { + return vec_tc == VEC_TC_INTEGER + || vec_tc == VEC_TC_UINTEGER + || vec_tc == VEC_TC_FLOAT + || vec_tc == VEC_TC_DOUBLE + || vec_tc == VEC_TC_DATE + || vec_tc == VEC_TC_DATETIME + || vec_tc == VEC_TC_TIME + || vec_tc == VEC_TC_BIT + || vec_tc == VEC_TC_ENUM_SET + || vec_tc == VEC_TC_DEC_INT32 + || vec_tc == VEC_TC_YEAR + || vec_tc == VEC_TC_INTERVAL_YM + || vec_tc == VEC_TC_DEC_INT64; + } +}; + + +OB_DECLARE_AVX512_SPECIFIC_CODE( +template +struct __simd_cmp +{ + using ret_type = char; + OB_INLINE char operator()(const char *left, const char *right) + { + return char(); + } +}; + +template +OB_INLINE void __store_cmp_results(char *dst, const T & res_mask) +{ + static const uint64_t MASK = 0xFF; + static const __m512i TRUE_VALUES = _mm512_set1_epi64(1); + uint64_t res_bits = res_mask; + for (int i = 0; i < sizeof(T); i++) { + uint8_t store_bits = static_cast(MASK & res_bits); + res_bits >>= 8; + __m512i store_v = _mm512_maskz_mov_epi64(store_bits, TRUE_VALUES); + _mm512_storeu_epi64(dst, store_v); + dst += 64; + } +} +#define DEF_SIMD_INTEGER_OP(ret_size, val_size, bits, cmp_op, cmp_name) \ + template <> \ + struct __simd_cmp \ + { \ + using ret_type = __mmask##ret_size; \ + OB_INLINE ret_type operator()(const char *left, const char *right) \ + { \ + __m512i left_v = _mm512_loadu_epi64(left); \ + __m512i right_v = _mm512_loadu_epi64(right); \ + __mmask##ret_size res_mask = _mm512_cmp##cmp_name##_epi##bits##_mask(left_v, right_v); \ + return res_mask; \ + } \ + }; \ + template <> \ + struct __simd_cmp \ + { \ + using ret_type = __mmask##ret_size; \ + OB_INLINE ret_type operator()(const char *left, const char *right) \ + { \ + __m512i left_v = _mm512_loadu_epi64(left); \ + __m512i right_v = _mm512_loadu_epi64(right); \ + __mmask##ret_size res_mask = _mm512_cmp##cmp_name##_epu##bits##_mask(left_v, right_v); \ + return res_mask; \ + } \ + } + +DEF_SIMD_INTEGER_OP(64, 1, 8, CO_LE, le); +DEF_SIMD_INTEGER_OP(32, 2, 16, CO_LE, le); +DEF_SIMD_INTEGER_OP(16, 4, 32, CO_LE, le); +DEF_SIMD_INTEGER_OP(8, 8, 64, CO_LE, le); + +DEF_SIMD_INTEGER_OP(64, 1, 8, CO_LT, lt); +DEF_SIMD_INTEGER_OP(32, 2, 16, CO_LT, lt); +DEF_SIMD_INTEGER_OP(16, 4, 32, CO_LT, lt); +DEF_SIMD_INTEGER_OP(8, 8, 64, CO_LT, lt); + +DEF_SIMD_INTEGER_OP(64, 1, 8, CO_EQ, eq); +DEF_SIMD_INTEGER_OP(32, 2, 16, CO_EQ, eq); +DEF_SIMD_INTEGER_OP(16, 4, 32, CO_EQ, eq); +DEF_SIMD_INTEGER_OP(8, 8, 64, CO_EQ, eq); + +DEF_SIMD_INTEGER_OP(64, 1, 8, CO_NE, neq); +DEF_SIMD_INTEGER_OP(32, 2, 16, CO_NE, neq); +DEF_SIMD_INTEGER_OP(16, 4, 32, CO_NE, neq); +DEF_SIMD_INTEGER_OP(8, 8, 64, CO_NE, neq); + +DEF_SIMD_INTEGER_OP(64, 1, 8, CO_GT, gt); +DEF_SIMD_INTEGER_OP(32, 2, 16, CO_GT, gt); +DEF_SIMD_INTEGER_OP(16, 4, 32, CO_GT, gt); +DEF_SIMD_INTEGER_OP(8, 8, 64, CO_GT, gt); + +DEF_SIMD_INTEGER_OP(64, 1, 8, CO_GE, ge); +DEF_SIMD_INTEGER_OP(32, 2, 16, CO_GE, ge); +DEF_SIMD_INTEGER_OP(16, 4, 32, CO_GE, ge); +DEF_SIMD_INTEGER_OP(8, 8, 64, CO_GE, ge); + +#define DEF_SIMD_FLOATING_OP(cmp_name, cmp_op) \ + template <> \ + struct __simd_cmp \ + { \ + using ret_type = __mmask16; \ + OB_INLINE ret_type operator()(const char *left, const char *right) \ + { \ + __m512 left_v = _mm512_loadu_ps(left); \ + __m512 right_v = _mm512_loadu_ps(right); \ + __mmask16 res_mask = _mm512_cmp##cmp_name##_ps_mask(left_v, right_v); \ + if (cmp_op == CO_GE || cmp_op == CO_GT) { res_mask = ~res_mask; } \ + return res_mask; \ + } \ + }; \ + template <> \ + struct __simd_cmp \ + { \ + using ret_type = __mmask8; \ + OB_INLINE ret_type operator()(const char *left, const char *right) \ + { \ + __m512d left_v = _mm512_loadu_pd(left); \ + __m512d righ_v = _mm512_loadu_pd(right); \ + __mmask8 res_mask = _mm512_cmp##cmp_name##_pd_mask(left_v, righ_v); \ + if (cmp_op == CO_GE || cmp_op == CO_GT) { res_mask = ~res_mask; } \ + return res_mask; \ + } \ + } + +DEF_SIMD_FLOATING_OP(eq, CO_EQ); +DEF_SIMD_FLOATING_OP(le, CO_LE); +DEF_SIMD_FLOATING_OP(lt, CO_LT); +DEF_SIMD_FLOATING_OP(neq , CO_NE); +DEF_SIMD_FLOATING_OP(lt, CO_GE); +DEF_SIMD_FLOATING_OP(le, CO_GT); + +template +static int simd_eval_vector(const ObExpr &expr, ObEvalCtx &ctx, const ObBitVector &skip, + const EvalBound &bound) +{ + using mask_type = typename __simd_cmp::ret_type; + constexpr const VecValueTypeClass calc_tc = + (vec_tc == VEC_TC_FLOAT || vec_tc == VEC_TC_DOUBLE) ? + vec_tc : + (std::is_signed>::value ? VEC_TC_INTEGER : VEC_TC_UINTEGER); +#define DO_SIMD_CMP(off) \ + do { \ + mask_type res_mask = __simd_cmp()(left_data + offset + off * 64, \ + right_data + offset + off * 64); \ + __store_cmp_results(res_data + off * res_off_perf_unit, res_mask); \ + } while (false) + + using ResVec = ObFixedLengthFormat; + int ret = OB_SUCCESS; + ObFixedLengthBase *left_vec = static_cast(expr.args_[0]->get_vector(ctx)); + ObFixedLengthBase *right_vec = static_cast(expr.args_[1]->get_vector(ctx)); + ResVec *res_vec = static_cast(expr.get_vector(ctx)); + int64_t size = bound.range_size(), unit = 512 / CHAR_BIT; + int64_t chunk = size * val_size; + int64_t unit_cnt = chunk / unit, remain = chunk % unit; + const char *left_data = left_vec->get_data() + bound.start() * val_size; + const char *right_data = right_vec->get_data() + bound.start() * val_size; + char *res_data = res_vec->get_data() + bound.start() * sizeof(int64_t); + int32_t res_off_perf_unit = unit / val_size * sizeof(int64_t) , batch_cnt = (unit / val_size) * 8; + int64_t output_idx = bound.start(); + int32_t offset = 0; + LOG_DEBUG("simd cmp", K(vec_tc), K(val_size), K(cmp_op), K(bound), K(unit_cnt)); + if (remain > 0) { + int cmp_ret = 0; + ObObjMeta obj_meta = expr.args_[0]->obj_meta_; + for (int i = 0; i < remain / val_size; i++) { + VecTCCmpCalc::cmp(obj_meta, obj_meta, left_data + offset, val_size, + right_data + offset, val_size, + cmp_ret); // ignore ret code + res_vec->set_int(output_idx, get_cmp_ret(cmp_ret)); + output_idx += 1; + offset += val_size; + res_data = res_data + sizeof(int64_t); + } + } + for (int i = 0; i < unit_cnt / 8; i++) { + LST_DO_CODE(DO_SIMD_CMP, 0, 1, 2, 3, 4, 5, 6, 7); + output_idx += batch_cnt; + offset += unit * 8; + res_data = res_data + res_off_perf_unit * 8; + } + switch (unit_cnt % 8) { + case 7: { + LST_DO_CODE(DO_SIMD_CMP, 0, 1, 2, 3, 4, 5, 6); + break; + } + case 6: { + LST_DO_CODE(DO_SIMD_CMP, 0, 1, 2, 3, 4, 5); + break; + } + case 5: { + LST_DO_CODE(DO_SIMD_CMP, 0, 1, 2, 3, 4); + break; + } + case 4: { + LST_DO_CODE(DO_SIMD_CMP, 0, 1, 2, 3); + break; + } + case 3: { + LST_DO_CODE(DO_SIMD_CMP, 0, 1, 2); + break; + } + case 2: { + LST_DO_CODE(DO_SIMD_CMP, 0, 1); + break; + } + case 1: { + LST_DO_CODE(DO_SIMD_CMP, 0); + break; + } + default: { + break; + } + } + batch_cnt = (unit_cnt % 8) * (unit / val_size); + output_idx += batch_cnt; + ObBitVector &eval_flags = expr.get_evaluated_flags(ctx); + eval_flags.set_all(bound.start(), bound.end()); + OB_ASSERT(output_idx == bound.end()); + return ret; +}; +) + + + +template +struct FixedExprCmpFuncIniter +{ + static void init_array() + { + return; + } +}; + +template +struct FixedExprCmpFuncIniter +{ + template + using EvalFunc = + FixedVectorCmp(X), static_cast(Y), cmp_op>; + static void init_array() + { + auto &funcs = EVAL_VECTOR_EXPR_CMP_FUNCS; + funcs[X][Y][CO_LE] = &EvalFunc::eval_vector; + funcs[X][Y][CO_LT] = &EvalFunc::eval_vector; + funcs[X][Y][CO_GE] = &EvalFunc::eval_vector; + funcs[X][Y][CO_GT] = &EvalFunc::eval_vector; + funcs[X][Y][CO_NE] = &EvalFunc::eval_vector; + funcs[X][Y][CO_EQ] = &EvalFunc::eval_vector; + funcs[X][Y][CO_CMP] = &EvalFunc::eval_vector; + } +}; + +template +using fixed_cmp_initer = FixedExprCmpFuncIniter< + X, Y, + VecTCCmpCalc(X), static_cast(Y)>::defined_ + && is_fixed_length_vec(static_cast(X)) + && is_fixed_length_vec(static_cast(Y))>; + +static int g_init_fixed_eval_vector_cmp_funcs = Ob2DArrayConstIniter::init(); + +} // end common +} // end oceanbase \ No newline at end of file diff --git a/src/share/vector/ob_continuous_base.h b/src/share/vector/ob_continuous_base.h index c61c84188..56b0930d5 100644 --- a/src/share/vector/ob_continuous_base.h +++ b/src/share/vector/ob_continuous_base.h @@ -36,13 +36,13 @@ public: uint32_t *offsets, const int64_t start_idx, const int64_t read_rows, char *data) { - has_null_ = has_null; + UNUSED(has_null); + has_null_ = false; nulls_->reset(read_rows); - if (has_null) { - for (int64_t i = 0; i < read_rows; ++i) { - if (nulls.at(start_idx + i)) { - nulls_->set(i); - } + for (int64_t i = 0; i < read_rows; ++i) { + if (nulls.at(start_idx + i)) { + nulls_->set(i); + has_null_ = true; } } offsets_ = offsets + start_idx; diff --git a/src/share/vector/ob_continuous_format.h b/src/share/vector/ob_continuous_format.h index bd3f7da4b..a261d5f4a 100644 --- a/src/share/vector/ob_continuous_format.h +++ b/src/share/vector/ob_continuous_format.h @@ -66,6 +66,8 @@ public: OB_INLINE int to_row(const sql::RowMeta &row_meta, sql::ObCompactRow *stored_row, const uint64_t row_idx, const int64_t col_idx, const int64_t remain_size, const bool is_fixed_length_data, int64_t &row_size) const override final; + DEF_VEC_READ_INTERFACES(ObContinuousFormat); + DEF_VEC_WRITE_INTERFACES(ObContinuousFormat); }; OB_INLINE void ObContinuousFormat::get_payload(const int64_t idx, diff --git a/src/share/vector/ob_discrete_format.h b/src/share/vector/ob_discrete_format.h index 5cbec64e4..fd5db599d 100644 --- a/src/share/vector/ob_discrete_format.h +++ b/src/share/vector/ob_discrete_format.h @@ -81,6 +81,8 @@ public: OB_INLINE int to_row(const sql::RowMeta &row_meta, sql::ObCompactRow *stored_row, const uint64_t row_idx, const int64_t col_idx, const int64_t remain_size, const bool is_fixed_length_data, int64_t &row_size) const override final; + DEF_VEC_READ_INTERFACES(ObDiscreteFormat); + DEF_VEC_WRITE_INTERFACES(ObDiscreteFormat); }; OB_INLINE void ObDiscreteFormat::get_payload(const int64_t idx, diff --git a/src/share/vector/ob_fixed_length_base.cpp b/src/share/vector/ob_fixed_length_base.cpp index 4b5821a6d..9f17d5322 100644 --- a/src/share/vector/ob_fixed_length_base.cpp +++ b/src/share/vector/ob_fixed_length_base.cpp @@ -24,12 +24,24 @@ namespace common const int64_t size, const int64_t col_idx) const { - for (int64_t i = 0; i < size; i++) { - int64_t row_idx = selector[i]; - if (nulls_->at(row_idx)) { - stored_rows[i]->set_null(row_meta, col_idx); - } else { - stored_rows[i]->set_cell_payload(row_meta, col_idx, data_ + len_ * row_idx, len_); + if (row_meta.fixed_expr_reordered()) { + const int64_t offset = row_meta.get_fixed_cell_offset(col_idx); + for (int64_t i = 0; i < size; i++) { + int64_t row_idx = selector[i]; + if (nulls_->at(row_idx)) { + stored_rows[i]->set_null(row_meta, col_idx); + } else { + stored_rows[i]->set_fixed_cell_payload(data_ + len_ * row_idx, offset, len_); + } + } + } else { + for (int64_t i = 0; i < size; i++) { + int64_t row_idx = selector[i]; + if (nulls_->at(row_idx)) { + stored_rows[i]->set_null(row_meta, col_idx); + } else { + stored_rows[i]->set_cell_payload(row_meta, col_idx, data_ + len_ * row_idx, len_); + } } } } diff --git a/src/share/vector/ob_fixed_length_base.h b/src/share/vector/ob_fixed_length_base.h index 320f2a078..38aab5e94 100644 --- a/src/share/vector/ob_fixed_length_base.h +++ b/src/share/vector/ob_fixed_length_base.h @@ -41,15 +41,13 @@ public: const int64_t fixed_len, const int64_t start_idx, const int64_t read_rows, char *data) { - has_null_ = has_null; - // TODO: fix deep copy bug - //nulls_->deep_copy(nulls, start_idx, start_idx + read_rows); + UNUSED(has_null); + has_null_ = false; nulls_->reset(read_rows); - if (has_null) { - for (int64_t i = 0; i < read_rows; ++i) { - if (nulls.at(start_idx + i)) { - nulls_->set(i); - } + for (int64_t i = 0; i < read_rows; ++i) { + if (nulls.at(start_idx + i)) { + nulls_->set(i); + has_null_ = true; } } len_ = static_cast (fixed_len); diff --git a/src/share/vector/ob_fixed_length_format.h b/src/share/vector/ob_fixed_length_format.h index 5359f2ad8..bdcd42ad3 100644 --- a/src/share/vector/ob_fixed_length_format.h +++ b/src/share/vector/ob_fixed_length_format.h @@ -77,6 +77,8 @@ public: const uint64_t row_idx, const int64_t col_idx, const int64_t remain_size, const bool is_fixed_length_data, int64_t &row_size) const override final; OB_INLINE int32_t type_size() const { return sizeof(ValueType); } + DEF_VEC_READ_INTERFACES(ObFixedLengthFormat); + DEF_VEC_WRITE_INTERFACES(ObFixedLengthFormat); }; template diff --git a/src/share/vector/ob_i_vector.h b/src/share/vector/ob_i_vector.h index 0e40ae84c..6385dfe06 100644 --- a/src/share/vector/ob_i_vector.h +++ b/src/share/vector/ob_i_vector.h @@ -51,6 +51,367 @@ namespace common const char *r_v, \ const ObLength r_len, \ int &cmp_ret + +#define DEF_VEC_READ_INTERFACES(Derived) \ +public: \ + OB_INLINE bool is_false(const int64_t idx) const \ + { \ + return !derived_this().is_null(idx) && 0 == get_int(idx); \ + } \ + OB_INLINE bool is_true(const int64_t idx) const \ + { \ + return !derived_this().is_null(idx) && 0 != get_int(idx); \ + } \ + OB_INLINE int8_t get_int8(const uint64_t idx) const \ + { \ + return get(idx); \ + } \ + OB_INLINE int8_t get_tinyint(const int64_t idx) const \ + { \ + return get(idx); \ + } \ + OB_INLINE int16_t get_smallint(const int64_t idx) const \ + { \ + return get(idx); \ + } \ + OB_INLINE int32_t get_mediumint(const int64_t idx) const \ + { \ + return get(idx); \ + } \ + OB_INLINE int32_t get_int32(const int64_t idx) const \ + { \ + return get(idx); \ + } \ + OB_INLINE int64_t get_int(const int64_t idx) const \ + { \ + return get(idx); \ + } \ + OB_INLINE uint8_t get_uint8(const int64_t idx) const \ + { \ + return get(idx); \ + } \ + OB_INLINE uint8_t get_utinyint(const int64_t idx) const \ + { \ + return get(idx); \ + } \ + OB_INLINE uint16_t get_usmallint(const int64_t idx) const \ + { \ + return get(idx); \ + } \ + OB_INLINE uint32_t get_umediumint(const int64_t idx) const \ + { \ + return get(idx); \ + } \ + OB_INLINE uint32_t get_uint32(const int64_t idx) const \ + { \ + return get(idx); \ + } \ + OB_INLINE uint64_t get_uint64(const int64_t idx) const \ + { \ + return get(idx); \ + } \ + OB_INLINE uint64_t get_uint(const int64_t idx) const \ + { \ + return get(idx); \ + } \ + OB_INLINE float get_float(const int64_t idx) const \ + { \ + return get(idx); \ + } \ + OB_INLINE double get_double(const int64_t idx) const \ + { \ + return get(idx); \ + } \ + OB_INLINE float get_ufloat(const int64_t idx) const \ + { \ + return get(idx); \ + } \ + OB_INLINE double get_udouble(const int64_t idx) const \ + { \ + return get(idx); \ + } \ + OB_INLINE int64_t get_ext(const int64_t idx) const \ + { \ + return get(idx); \ + } \ + OB_INLINE int64_t get_unknown(const int64_t idx) const \ + { \ + return get(idx); \ + } \ + OB_INLINE uint64_t get_bit(const int64_t idx) const \ + { \ + return get(idx); \ + } \ + OB_INLINE bool get_bool(const int64_t idx) \ + { \ + return 0 != get_int(idx); \ + } \ + OB_INLINE uint64_t get_enum(const int64_t idx) const \ + { \ + return get(idx); \ + } \ + OB_INLINE uint64_t get_set(const int64_t idx) const \ + { \ + return get(idx); \ + } \ + OB_INLINE uint64_t get_enumset(const int64_t idx) const \ + { \ + return get(idx); \ + } \ + OB_INLINE int64_t get_interval_ym(const int64_t idx) const \ + { \ + return get(idx); \ + } \ + OB_INLINE int64_t get_interval_nmonth(const int64_t idx) const \ + { \ + return get(idx); \ + } \ + OB_INLINE int64_t get_datetime(const int64_t idx) const \ + { \ + return get(idx); \ + } \ + OB_INLINE int64_t get_timestamp(const int64_t idx) const \ + { \ + return get(idx); \ + } \ + OB_INLINE int32_t get_date(const int64_t idx) const \ + { \ + return get(idx); \ + } \ + OB_INLINE int64_t get_time(const int64_t idx) const \ + { \ + return get(idx); \ + } \ + OB_INLINE uint8_t get_year(const int64_t idx) const \ + { \ + return get(idx); \ + } \ + OB_INLINE const number::ObCompactNumber &get_number(const int64_t idx) const \ + { \ + return *(reinterpret_cast(derived_this().get_payload(idx))); \ + } \ + OB_INLINE const ObIntervalDSValue &get_interval_ds(const int64_t idx) const \ + { \ + return *(reinterpret_cast(derived_this().get_payload(idx))); \ + } \ + OB_INLINE const ObOTimestampData &get_otimestamp_tz(const int64_t idx) const \ + { \ + return *(reinterpret_cast(derived_this().get_payload(idx))); \ + } \ + OB_INLINE ObString get_string(const int64_t idx) const \ + { \ + const char *str = NULL; \ + ObLength len = 0; \ + derived_this().get_payload(idx, str, len); \ + return ObString(len, str); \ + } \ + OB_INLINE int get_enumset_inner(const int64_t idx, ObEnumSetInnerValue &inner_value) const \ + { \ + int64_t pos = 0; \ + const char *payload = NULL; \ + ObLength len = 0; \ + derived_this().get_payload(idx, payload, len); \ + return inner_value.deserialize(payload, len, pos); \ + } \ + OB_INLINE ObURowIDData get_urowid(const int64_t idx) const \ + { \ + const char *ptr = NULL; \ + ObLength len = 0; \ + derived_this().get_payload(idx, ptr, len); \ + return ObURowIDData(len, reinterpret_cast(ptr)); \ + } \ + OB_INLINE const ObLobLocator &get_lob_locator(const int64_t idx) const \ + { \ + return *(reinterpret_cast(derived_this().get_payload(idx))); \ + } \ + OB_INLINE const ObLobCommon &get_lob_data(const int64_t idx) const \ + { \ + return *(reinterpret_cast(derived_this().get_payload(idx))); \ + } \ + OB_INLINE const ObDecimalInt *get_decimal_int(const int64_t idx) const \ + { \ + return reinterpret_cast(derived_this().get_payload(idx)); \ + } \ + \ +private: \ + const Derived &derived_this() const \ + { \ + return *static_cast(this); \ + } \ + template \ + OB_INLINE T get(const int64_t idx) const \ + { \ + static_assert(sizeof(T) <= sizeof(int64_t), "invalid type"); \ + return *reinterpret_cast(derived_this().get_payload(idx)); \ + } + +#define DEF_VEC_WRITE_INTERFACES(Derived) \ +public: \ + OB_INLINE void set_int(const int64_t idx, const int64_t v) \ + { \ + set(idx, v); \ + }; \ + OB_INLINE void set_int32(const int64_t idx, const int32_t v) \ + { \ + set(idx, v); \ + } \ + OB_INLINE void set_uint(const int64_t idx, const uint64_t v) \ + { \ + set(idx, v); \ + } \ + OB_INLINE void set_uint32(const int64_t idx, const uint32_t v) \ + { \ + set(idx, v); \ + } \ + OB_INLINE void set_bit(const int64_t idx, const uint64_t v) \ + { \ + set(idx, v); \ + } \ + OB_INLINE void set_bool(const int64_t idx, const bool v) \ + { \ + set_int(idx, static_cast(v)); \ + } \ + OB_INLINE void set_true(const int64_t idx) \ + { \ + set_int(idx, static_cast(true)); \ + } \ + OB_INLINE void set_false(const int64_t idx) \ + { \ + set_int(idx, static_cast(false)); \ + } \ + OB_INLINE void set_float(const int64_t idx, const float v) \ + { \ + set(idx, v); \ + } \ + OB_INLINE void set_double(const int64_t idx, const double v) \ + { \ + set(idx, v); \ + } \ + OB_INLINE void set_enum(const int64_t idx, const uint64_t v) \ + { \ + set(idx, v); \ + } \ + OB_INLINE void set_set(const int64_t idx, const uint64_t v) \ + { \ + set(idx, v); \ + } \ + OB_INLINE void set_datetime(const int64_t idx, const int64_t v) \ + { \ + set(idx, v); \ + } \ + OB_INLINE void set_timestamp(const int64_t idx, const int64_t v) \ + { \ + set(idx, v); \ + } \ + OB_INLINE void set_time(const int64_t idx, const int64_t v) \ + { \ + set_int(idx, v); \ + } \ + OB_INLINE void set_date(const int64_t idx, const int32_t v) \ + { \ + set(idx, v); \ + } \ + OB_INLINE void set_year(const int64_t idx, const int8_t v) \ + { \ + set(idx, v); \ + } \ + OB_INLINE void set_interval_nmonth(const int64_t idx, const int64_t v) \ + { \ + set(idx, v); \ + } \ + OB_INLINE void set_interval_ym(const int64_t idx, const int64_t v) \ + { \ + set(idx, v); \ + } \ + OB_INLINE void set_interval_ds(const int64_t idx, const ObIntervalDSValue &v) \ + { \ + derived_this().set_payload_shallow(idx, &v, v.get_store_size()); \ + } \ + OB_INLINE void set_otimestamp_tz(const int64_t idx, const ObOTimestampData &v) \ + { \ + *(reinterpret_cast(no_cv(derived_this().get_payload(idx)))) = v; \ + } \ + OB_INLINE void set_otimestamp(const int64_t idx, const ObOTimestampData &v) \ + { \ + *(reinterpret_cast(no_cv(derived_this().get_payload(idx)))) = v; \ + } \ + OB_INLINE void set_number(const int64_t idx, const number::ObNumber &num) \ + { \ + using CptNumber = number::ObCompactNumber; \ + CptNumber *cnum = reinterpret_cast(no_cv(derived_this().get_payload(idx))); \ + cnum->desc_ = num.d_; \ + const ObLength len = num.d_.len_ * sizeof(*num.get_digits()); \ + MEMCPY(&cnum->digits_[0], num.get_digits(), len); \ + derived_this().set_payload_shallow(idx, cnum, len + sizeof(ObNumberDesc)); \ + } \ + OB_INLINE void set_number(const int64_t idx, const number::ObCompactNumber &cnum) \ + { \ + ObLength len = \ + static_cast(sizeof(cnum) + cnum.desc_.len_ * sizeof(cnum.digits_[0])); \ + derived_this().set_payload(idx, &cnum, len); \ + } \ + OB_INLINE void set_number_shallow(const int64_t idx, const number::ObCompactNumber &cnum) \ + { \ + ObLength len = \ + static_cast(sizeof(cnum) + cnum.desc_.len_ * sizeof(cnum.digits_[0])); \ + derived_this().set_payload_shallow(idx, &cnum, len); \ + } \ + OB_INLINE void set_string(const int64_t idx, const ObString &v) \ + { \ + derived_this().set_payload_shallow(idx, v.ptr(), v.length()); \ + } \ + OB_INLINE void set_string(const int64_t idx, const char *ptr, const uint32_t len) \ + { \ + derived_this().set_payload_shallow(idx, ptr, len); \ + } \ + OB_INLINE void set_enumset_inner(const int64_t idx, const ObString &v) \ + { \ + set_string(idx, v); \ + } \ + OB_INLINE void set_enumset_inner(const int64_t idx, const char *ptr, const uint32_t len) \ + { \ + set_string(idx, ptr, len); \ + } \ + OB_INLINE void set_urowid(const int64_t idx, const ObURowIDData &urowid_data) \ + { \ + const char *ptr = reinterpret_cast(urowid_data.rowid_content_); \ + ObLength len = static_cast(urowid_data.rowid_len_); \ + derived_this().set_payload(idx, ptr, len); \ + } \ + OB_INLINE void set_urowid(const int64_t idx, const char *ptr, const int64_t size) \ + { \ + derived_this().set_payload(idx, ptr, static_cast(size)); \ + } \ + OB_INLINE void set_lob_locator(const int64_t idx, const ObLobLocator &value) \ + { \ + derived_this().set_payload(idx, &value, static_cast(value.get_total_size())); \ + } \ + OB_INLINE void set_lob_data(const int64_t idx, const ObLobCommon &value, int64_t length) \ + { \ + derived_this().set_payload(idx, &value, static_cast(length)); \ + } \ + OB_INLINE void set_decimal_int(const int64_t idx, const ObDecimalInt *decint, int32_t len) \ + { \ + derived_this().set_payload(idx, decint, static_cast(len)); \ + } \ + \ +private: \ + template \ + OB_INLINE __attribute__((always_inline)) T *no_cv(const T *ptr) const \ + { \ + return const_cast(ptr); \ + } \ + Derived &derived_this() \ + { \ + return *static_cast(this); \ + } \ + template \ + OB_INLINE void set(const int64_t idx, const T value) \ + { \ + static_assert(sizeof(T) <= sizeof(int64_t), "invalid type"); \ + static_cast(this)->set_payload(idx, &value, sizeof(T)); \ + } + /* ObIVector | @@ -116,13 +477,6 @@ public: virtual void unset_null(const int64_t idx) = 0; void set_null(const sql::EvalBound &bound); - bool is_false(const int64_t idx) const { - return !is_null(idx) && 0 == get_int(idx); - } - bool is_true(const int64_t idx) const { - return !is_null(idx) && 0 != get_int(idx); - } - virtual int default_hash(BATCH_EVAL_HASH_ARGS) const = 0; virtual int murmur_hash(BATCH_EVAL_HASH_ARGS) const = 0; // In vectorization 1.0, hash value (calculated by murmur_hash_v2) of null is inconsistent for different types. @@ -137,88 +491,6 @@ public: virtual int null_first_cmp(VECTOR_ONE_COMPARE_ARGS) const = 0; virtual int null_last_cmp(VECTOR_ONE_COMPARE_ARGS) const = 0; - // Vector get idx-th value interface - OB_INLINE int8_t get_int8(const uint64_t idx) const { return get(idx); } - OB_INLINE int8_t get_tinyint(const int64_t idx) const { return get(idx); } - OB_INLINE int16_t get_smallint(const int64_t idx) const { return get(idx); } - OB_INLINE int32_t get_mediumint(const int64_t idx) const { return get(idx); } - OB_INLINE int32_t get_int32(const int64_t idx) const { return get(idx); } - OB_INLINE int64_t get_int(const int64_t idx) const { return get(idx); } - OB_INLINE uint8_t get_uint8(const int64_t idx) const { return get(idx); } - OB_INLINE uint8_t get_utinyint(const int64_t idx) const { return get(idx); } - OB_INLINE uint16_t get_usmallint(const int64_t idx) const { return get(idx); } - OB_INLINE uint32_t get_umediumint(const int64_t idx) const { return get(idx); } - OB_INLINE uint32_t get_uint32(const int64_t idx) const { return get(idx); } - OB_INLINE uint64_t get_uint64(const int64_t idx) const { return get(idx); } - OB_INLINE uint64_t get_uint(const int64_t idx) const { return get(idx); } - OB_INLINE float get_float(const int64_t idx) const { return get(idx); } - OB_INLINE double get_double(const int64_t idx) const { return get(idx); } - OB_INLINE float get_ufloat(const int64_t idx) const { return get(idx); } - OB_INLINE double get_udouble(const int64_t idx) const { return get(idx); } - OB_INLINE int64_t get_ext(const int64_t idx) const { return get(idx); } - OB_INLINE int64_t get_unknown(const int64_t idx) const { return get(idx); } - OB_INLINE uint64_t get_bit(const int64_t idx) const { return get(idx); } - OB_INLINE bool get_bool(const int64_t idx) { return 0 != get_int(idx); } - OB_INLINE uint64_t get_enum(const int64_t idx) const { return get(idx); } - OB_INLINE uint64_t get_set(const int64_t idx) const { return get(idx); } - OB_INLINE uint64_t get_enumset(const int64_t idx) const { return get(idx); } - OB_INLINE const number::ObCompactNumber &get_number(const int64_t idx) const; - OB_INLINE int64_t get_interval_ym(const int64_t idx) const { return get(idx); } - OB_INLINE int64_t get_interval_nmonth(const int64_t idx) const { return get(idx); } - OB_INLINE const ObIntervalDSValue &get_interval_ds(const int64_t idx) const; - OB_INLINE int64_t get_datetime(const int64_t idx) const { return get(idx); } - OB_INLINE int64_t get_timestamp(const int64_t idx) const { return get(idx); } - OB_INLINE int32_t get_date(const int64_t idx) const { return get(idx); } - OB_INLINE int64_t get_time(const int64_t idx) const { return get(idx); } - OB_INLINE uint8_t get_year(const int64_t idx) const { return get(idx); } - OB_INLINE const ObOTimestampData &get_otimestamp_tz(const int64_t idx) const; - OB_INLINE ObString get_string(const int64_t idx) const; - OB_INLINE int get_enumset_inner(const int64_t idx, ObEnumSetInnerValue &inner_value) const; - OB_INLINE ObURowIDData get_urowid(const int64_t idx) const; - OB_INLINE const ObLobLocator &get_lob_locator(const int64_t idx) const; - OB_INLINE const ObLobCommon &get_lob_data(const int64_t idx) const; - - OB_INLINE const ObDecimalInt *get_decimal_int(const int64_t idx) const; - - // Vector set idx-th value interface - OB_INLINE void set_int(const int64_t idx, const int64_t v) { set(idx, v); }; - OB_INLINE void set_int32(const int64_t idx, const int32_t v) { set(idx, v); } - OB_INLINE void set_uint(const int64_t idx, const uint64_t v) { set(idx, v); } - OB_INLINE void set_uint32(const int64_t idx, const uint32_t v) { set(idx, v); } - OB_INLINE void set_bit(const int64_t idx, const uint64_t v) { set(idx, v); } - OB_INLINE void set_bool(const int64_t idx, const bool v) { set_int(idx, static_cast(v)); } - OB_INLINE void set_true(const int64_t idx) { set_int(idx, static_cast(true)); } - OB_INLINE void set_false(const int64_t idx) { set_int(idx, static_cast(false)); } - OB_INLINE void set_float(const int64_t idx, const float v) { set(idx, v); } - OB_INLINE void set_double(const int64_t idx, const double v) { set(idx, v); } - OB_INLINE void set_enum(const int64_t idx, const uint64_t v) { set(idx, v); } - OB_INLINE void set_set(const int64_t idx, const uint64_t v) { set(idx, v); } - OB_INLINE void set_interval_nmonth(const int64_t idx, const int64_t interval_nmonth); - OB_INLINE void set_interval_ym(const int64_t idx, const int64_t interval_nmonth); - OB_INLINE void set_interval_ds(const int64_t idx, const ObIntervalDSValue &v); - OB_INLINE void set_datetime(const int64_t idx, const int64_t v) { set(idx, v); } - OB_INLINE void set_timestamp(const int64_t idx, const int64_t v) { set(idx, v); } - OB_INLINE void set_otimestamp_tz(const int64_t idx, const ObOTimestampData &v); - OB_INLINE void set_time(const int64_t idx, const int64_t v) { set_int(idx, v); } - OB_INLINE void set_otimestamp(const int64_t idx, const ObOTimestampData &v); - OB_INLINE void set_date(const int64_t idx, const int32_t v) { set(idx, v); } - OB_INLINE void set_year(const int64_t idx, const int8_t v) { set(idx, v); } - // OB_INLINE number, deep copy all number digits here. - OB_INLINE void set_number(const int64_t idx, const number::ObNumber &num); - // OB_INLINE compact number, deep copy all number digits too. - OB_INLINE void set_number(const int64_t idx, const number::ObCompactNumber &cnum); - OB_INLINE void set_number_shallow(const int64_t idx, const number::ObCompactNumber &cnum); - OB_INLINE void set_string(const int64_t idx, const ObString &v); - OB_INLINE void set_string(const int64_t idx, const char *ptr, const uint32_t len); - OB_INLINE void set_enumset_inner(const int64_t idx, const ObString &v) { set_string(idx, v); } - OB_INLINE void set_enumset_inner(const int64_t idx, const char *ptr, const uint32_t len); - OB_INLINE void set_urowid(const int64_t idx, const ObURowIDData &urowid_data); - OB_INLINE void set_urowid(const int64_t idx, const char *ptr, const int64_t size); - OB_INLINE void set_lob_locator(const int64_t idx, const ObLobLocator &value); - OB_INLINE void set_lob_data(const int64_t idx, const ObLobCommon &value, int64_t length); - - OB_INLINE void set_decimal_int(const int64_t idx, const ObDecimalInt *decint, int32_t len); - // append values to this vector from idx-th column of rows virtual int from_rows(const sql::RowMeta &row_meta, const sql::ObCompactRow **stored_rows, @@ -260,175 +532,10 @@ public: UNUSED(buf_len); return 0; } - -protected: - // Remove const value qualification for pointer. - template - OB_INLINE __attribute__((always_inline)) T *no_cv(const T *ptr) const { return const_cast(ptr); } - -private: - TYPE_CHECKER_DEF(is_native_ctype, bool, int8_t, int16_t, int32_t, int64_t, - uint8_t, uint16_t, uint32_t, uint64_t, - double, float); - - template - OB_INLINE T get(const int64_t idx) const { - static_assert(is_native_ctype::value, "invalid type"); - return *(reinterpret_cast(no_cv(get_payload(idx)))); - } - - // this interface will deep copy, if need shallow copy, - // use other interface - template - OB_INLINE void set(const int64_t idx, const T &value) { - static_assert(is_native_ctype::value, "invalid type"); - set_payload(idx, &value, sizeof(T)); - } - #undef TYPE_CHECKER_DEF + DEF_VEC_READ_INTERFACES(ObIVector); + DEF_VEC_WRITE_INTERFACES(ObIVector); }; -OB_INLINE const number::ObCompactNumber &ObIVector::get_number(const int64_t idx) const -{ - return *(reinterpret_cast(get_payload(idx))); -} - -OB_INLINE const ObIntervalDSValue &ObIVector::get_interval_ds(const int64_t idx) const { - return *(reinterpret_cast(get_payload(idx))); -} - -OB_INLINE const ObOTimestampData &ObIVector::get_otimestamp_tz(const int64_t idx) const { - return *(reinterpret_cast(get_payload(idx))); -} - -OB_INLINE int ObIVector::get_enumset_inner(const int64_t idx, ObEnumSetInnerValue &inner_value) const -{ - int64_t pos = 0; - const char *payload = NULL; - ObLength len = 0; - get_payload(idx, payload, len); - return inner_value.deserialize(payload, len, pos); -} - -OB_INLINE const ObLobLocator &ObIVector::get_lob_locator(const int64_t idx) const -{ - return *(reinterpret_cast(get_payload(idx))); -} - -OB_INLINE const ObDecimalInt *ObIVector::get_decimal_int(const int64_t idx) const -{ - return reinterpret_cast(get_payload(idx)); -} - -OB_INLINE const ObLobCommon &ObIVector::get_lob_data(const int64_t idx) const -{ - return *(reinterpret_cast(get_payload(idx))); -} - -OB_INLINE ObString ObIVector::get_string(const int64_t idx) const -{ - const char *str = NULL; - ObLength len = 0; - get_payload(idx, str, len); - return ObString(len, str); -} - -OB_INLINE ObURowIDData ObIVector::get_urowid(const int64_t idx) const -{ - const char *ptr = NULL; - ObLength len = 0; - get_payload(idx, ptr, len); - return ObURowIDData(len, reinterpret_cast(ptr)); -} - -OB_INLINE void ObIVector::set_interval_nmonth(const int64_t idx, const int64_t v) -{ - set(idx, v); -}; - -OB_INLINE void ObIVector::set_interval_ym(const int64_t idx, const int64_t v) -{ - set(idx, v); -}; - -OB_INLINE void ObIVector::set_interval_ds(const int64_t idx, const ObIntervalDSValue &v) -{ - set_payload_shallow(idx, &v, v.get_store_size()); -} - -OB_INLINE void ObIVector::set_otimestamp_tz(const int64_t idx, const ObOTimestampData &v) -{ - *(reinterpret_cast(no_cv(get_payload(idx)))) = v; -} - -OB_INLINE void ObIVector::set_otimestamp(const int64_t idx, const ObOTimestampData &v) -{ - *(reinterpret_cast(no_cv(get_payload(idx)))) = v; -} - -OB_INLINE void ObIVector::set_number(const int64_t idx, const number::ObNumber &num) -{ - using CptNumber = number::ObCompactNumber; - CptNumber *cnum = reinterpret_cast(no_cv(get_payload(idx))); - cnum->desc_ = num.d_; - const ObLength len = num.d_.len_ * sizeof(*num.get_digits()); - MEMCPY(&cnum->digits_[0], num.get_digits(), len); - set_payload_shallow(idx, cnum, len + sizeof(ObNumberDesc)); -} - -OB_INLINE void ObIVector::set_number(const int64_t idx, const number::ObCompactNumber &cnum) -{ - ObLength len = static_cast(sizeof(cnum) + cnum.desc_.len_ * sizeof(cnum.digits_[0])); - set_payload(idx, &cnum, len); -} - -OB_INLINE void ObIVector::set_number_shallow(const int64_t idx, const number::ObCompactNumber &cnum) -{ - ObLength len = static_cast(sizeof(cnum) + cnum.desc_.len_ * sizeof(cnum.digits_[0])); - set_payload_shallow(idx, &cnum, len); -}; - -OB_INLINE void ObIVector::set_string(const int64_t idx, const ObString &v) -{ - set_payload_shallow(idx, v.ptr(), v.length()); -} - -OB_INLINE void ObIVector::set_string(const int64_t idx, const char *ptr, const uint32_t len) -{ - set_payload_shallow(idx, ptr, len); -} - -OB_INLINE void ObIVector::set_enumset_inner(const int64_t idx, const char *ptr, const uint32_t len) -{ - set_string(idx, ptr, len); -} - -OB_INLINE void ObIVector::set_urowid(const int64_t idx, const ObURowIDData &urowid_data) -{ - const char *ptr = reinterpret_cast(urowid_data.rowid_content_); - ObLength len = static_cast(urowid_data.rowid_len_); - set_payload(idx, ptr, len); -} - -OB_INLINE void ObIVector::set_urowid(const int64_t idx, const char *ptr, const int64_t size) -{ - set_payload(idx, ptr, static_cast(size)); -} - -OB_INLINE void ObIVector::set_lob_locator(const int64_t idx, const ObLobLocator &value) -{ - set_payload(idx, &value, static_cast(value.get_total_size())); -} - -OB_INLINE void ObIVector::set_lob_data(const int64_t idx, const ObLobCommon &value, int64_t length) -{ - set_payload(idx, &value, static_cast(length)); -} - -OB_INLINE void ObIVector::set_decimal_int(const int64_t idx, const ObDecimalInt *decint, int32_t len) -{ - set_payload(idx, decint, static_cast(len)); -} - using IVectorPtrs = common::ObIArray; } diff --git a/src/share/vector/ob_uniform_format.h b/src/share/vector/ob_uniform_format.h index 8de23b3c6..e573cf2fa 100644 --- a/src/share/vector/ob_uniform_format.h +++ b/src/share/vector/ob_uniform_format.h @@ -108,6 +108,9 @@ public: OB_INLINE int to_row(const sql::RowMeta &row_meta, sql::ObCompactRow *stored_row, const uint64_t row_idx, const int64_t col_idx, const int64_t remain_size, const bool is_fixed_length_data, int64_t &row_size) const override final; + + DEF_VEC_READ_INTERFACES(ObUniformFormat); + DEF_VEC_WRITE_INTERFACES(ObUniformFormat); }; template diff --git a/src/share/vector/static_check_utils.h b/src/share/vector/static_check_utils.h index fdcb3a23f..0779b777b 100644 --- a/src/share/vector/static_check_utils.h +++ b/src/share/vector/static_check_utils.h @@ -30,11 +30,11 @@ struct exist_type { static const bool value = std::is_same::value || exist_type::value; }; -#define TYPE_CHECKER_DEF(checker_name, ...) \ -template \ -struct checker_name { \ - static constexpr bool value = exist_type::value; \ -}; +// #define TYPE_CHECKER_DEF(checker_name, ...) \ +// template \ +// struct checker_name { \ +// static constexpr bool value = exist_type::value; \ +// }; // define VALUE_CHECK // eg: DEF_CHECK_VALUE(VecValueTypeClass, is_decimal_tc, diff --git a/src/share/vector/vector_basic_op.h b/src/share/vector/vector_basic_op.h index 56b753c1f..69d56362e 100644 --- a/src/share/vector/vector_basic_op.h +++ b/src/share/vector/vector_basic_op.h @@ -184,11 +184,17 @@ struct VecTCHashCalc const ObLobLocator *lob_locator_v1 = reinterpret_cast(data); in_data.assign_ptr(lob_locator_v1->get_payload_ptr(), lob_locator_v1->payload_size_); } else if (loc.is_valid()) { - ObTextStringIter text_iter(ObLongTextType, CS_TYPE_BINARY, raw_data, true); - if (OB_FAIL(text_iter.init(0, NULL, &allocator))) { - COMMON_LOG(WARN, "Lob: str iter init failed ", K(ret), K(text_iter)); - } else if (OB_FAIL(text_iter.get_full_data(in_data))) { - COMMON_LOG(WARN, "Lob: str iter get full data failed ", K(ret), K(text_iter)); + const ObLobCommon* lob = reinterpret_cast(data); + // fast path for disk inrow lob + if (data_len != 0 && !lob->is_mem_loc_ && lob->in_row_) { + in_data.assign_ptr(lob->get_inrow_data_ptr(), static_cast(lob->get_byte_size(data_len))); + } else { + ObTextStringIter text_iter(ObLongTextType, CS_TYPE_BINARY, raw_data, true); + if (OB_FAIL(text_iter.init(0, NULL, &allocator))) { + COMMON_LOG(WARN, "Lob: str iter init failed ", K(ret), K(text_iter)); + } else if (OB_FAIL(text_iter.get_full_data(in_data))) { + COMMON_LOG(WARN, "Lob: str iter get full data failed ", K(ret), K(text_iter)); + } } } else { // not v1 or v2 lob ret = OB_INVALID_ARGUMENT; diff --git a/src/sql/code_generator/ob_static_engine_cg.cpp b/src/sql/code_generator/ob_static_engine_cg.cpp index 8327c0a9d..7c46b5915 100644 --- a/src/sql/code_generator/ob_static_engine_cg.cpp +++ b/src/sql/code_generator/ob_static_engine_cg.cpp @@ -5111,6 +5111,8 @@ int ObStaticEngineCG::generate_tsc_flags(ObLogTableScan &op, ObTableScanSpec &sp LOG_WARN("failed to init tenant config", K(tenant_id)); } else { const int64_t pd_level = tenant_config->_pushdown_storage_level; + const int64_t io_read_batch_size = tenant_config->_io_read_batch_size; + const int64_t io_read_gap_size = io_read_batch_size * tenant_config->_io_read_redundant_limit_percentage / 100; pd_blockscan = ObPushdownFilterUtils::is_blockscan_pushdown_enabled(pd_level); pd_filter = ObPushdownFilterUtils::is_filter_pushdown_enabled(pd_level); enable_skip_index = tenant_config->_enable_skip_index; @@ -5120,9 +5122,15 @@ int ObStaticEngineCG::generate_tsc_flags(ObLogTableScan &op, ObTableScanSpec &sp ObDASScanCtDef *lookup_ctdef = spec.tsc_ctdef_.lookup_ctdef_; scan_ctdef.pd_expr_spec_.pd_storage_flag_.set_flags(pd_blockscan, pd_filter, enable_skip_index, enable_column_store, enable_prefetch_limit); + scan_ctdef.table_scan_opt_.io_read_batch_size_ = io_read_batch_size; + scan_ctdef.table_scan_opt_.io_read_gap_size_ = io_read_gap_size; + scan_ctdef.table_scan_opt_.storage_rowsets_size_ = tenant_config->storage_rowsets_size; if (nullptr != lookup_ctdef) { lookup_ctdef->pd_expr_spec_.pd_storage_flag_.set_flags(pd_blockscan, pd_filter, enable_skip_index, enable_column_store, enable_prefetch_limit); + lookup_ctdef->table_scan_opt_.io_read_batch_size_ = io_read_batch_size; + lookup_ctdef->table_scan_opt_.io_read_gap_size_ = io_read_gap_size; + lookup_ctdef->table_scan_opt_.storage_rowsets_size_ = tenant_config->storage_rowsets_size; } } } diff --git a/src/sql/das/ob_das_scan_op.cpp b/src/sql/das/ob_das_scan_op.cpp index 8aaa7d10d..20bb7a5a4 100644 --- a/src/sql/das/ob_das_scan_op.cpp +++ b/src/sql/das/ob_das_scan_op.cpp @@ -262,6 +262,7 @@ int ObDASScanOp::init_scan_param() scan_param_.limit_param_ = scan_rtdef_->limit_param_; scan_param_.need_scn_ = scan_rtdef_->need_scn_; scan_param_.pd_storage_flag_ = scan_ctdef_->pd_expr_spec_.pd_storage_flag_.pd_flag_; + scan_param_.table_scan_opt_ = scan_ctdef_->table_scan_opt_; scan_param_.fb_snapshot_ = scan_rtdef_->fb_snapshot_; scan_param_.fb_read_tx_uncommitted_ = scan_rtdef_->fb_read_tx_uncommitted_; if (scan_rtdef_->is_for_foreign_check_) { @@ -1641,6 +1642,7 @@ OB_INLINE int ObLocalIndexLookupOp::init_scan_param() scan_param_.limit_param_ = lookup_rtdef_->limit_param_; scan_param_.need_scn_ = lookup_rtdef_->need_scn_; scan_param_.pd_storage_flag_ = lookup_ctdef_->pd_expr_spec_.pd_storage_flag_.pd_flag_; + scan_param_.table_scan_opt_ = lookup_ctdef_->table_scan_opt_; scan_param_.fb_snapshot_ = lookup_rtdef_->fb_snapshot_; scan_param_.fb_read_tx_uncommitted_ = lookup_rtdef_->fb_read_tx_uncommitted_; scan_param_.ls_id_ = ls_id_; diff --git a/src/sql/dtl/ob_dtl_basic_channel.cpp b/src/sql/dtl/ob_dtl_basic_channel.cpp index f0d26731a..b9e92054f 100644 --- a/src/sql/dtl/ob_dtl_basic_channel.cpp +++ b/src/sql/dtl/ob_dtl_basic_channel.cpp @@ -1379,7 +1379,7 @@ int ObDtlVectorRowMsgWriter::init(ObDtlLinkedBuffer *buffer, uint64_t tenant_id) if (OB_FAIL(ObTempBlockStore::init_block_buffer(buffer->buf(), buffer->size(), blk))) { LOG_WARN("fail to init block buffer", K(ret)); } else { - block_ = static_cast(blk); + block_ = static_cast(blk); block_buffer_ = block_->get_buffer(); write_buffer_ = buffer; } diff --git a/src/sql/dtl/ob_dtl_basic_channel.h b/src/sql/dtl/ob_dtl_basic_channel.h index 2cdf46ce2..95fada438 100644 --- a/src/sql/dtl/ob_dtl_basic_channel.h +++ b/src/sql/dtl/ob_dtl_basic_channel.h @@ -344,10 +344,12 @@ public: { buffer->msg_type() = ObDtlMsgType::PX_VECTOR_ROW; } + OB_INLINE ObTempRowStore::DtlRowBlock *get_block() { return block_; } + OB_INLINE ObDtlLinkedBuffer *get_write_buffer() { return write_buffer_; } private: DtlWriterType type_; ObDtlLinkedBuffer *write_buffer_; - ObTempRowStore::RowBlock* block_; + ObTempRowStore::DtlRowBlock *block_; ObTempRowStore::ShrinkBuffer *block_buffer_; RowMeta row_meta_; int64_t row_cnt_; diff --git a/src/sql/dtl/ob_dtl_vectors_buffer.cpp b/src/sql/dtl/ob_dtl_vectors_buffer.cpp index 4b1d8c15e..a80cfc5c9 100644 --- a/src/sql/dtl/ob_dtl_vectors_buffer.cpp +++ b/src/sql/dtl/ob_dtl_vectors_buffer.cpp @@ -450,15 +450,31 @@ int ObDtlVectors::append_batch(const ObIArray &exprs, const ObIArrayget_format(ctx)) { case VEC_FIXED : { ObFixedLengthBase *fixed_vec = static_cast (vectors.at(col_idx)); - for (int64_t i = 0; i < size; ++i) { - int64_t row_idx = selector[i]; - if (fixed_vec->get_nulls()->at(row_idx)) { - get_nulls(col_idx)->set(virtual_row_cnt); - } else { - memcpy(dst_data + fixed_len * (virtual_row_cnt), - fixed_vec->get_data() + fixed_len * row_idx, fixed_len); + if (0 == fixed_len % 8) { + for (int64_t i = 0; i < size; ++i) { + int64_t row_idx = selector[i]; + if (fixed_vec->get_nulls()->at(row_idx)) { + get_nulls(col_idx)->set(virtual_row_cnt); + } else { + int64_t base_offset = fixed_len * (virtual_row_cnt); + for (int64_t i = 0; i < fixed_len / 8; ++i) { + *(reinterpret_cast (dst_data + sizeof(int64_t) * i + base_offset)) + = *(reinterpret_cast (fixed_vec->get_data() + sizeof(int64_t) * i + fixed_len * row_idx)); + } + } + ++virtual_row_cnt; + } + } else { + for (int64_t i = 0; i < size; ++i) { + int64_t row_idx = selector[i]; + if (fixed_vec->get_nulls()->at(row_idx)) { + get_nulls(col_idx)->set(virtual_row_cnt); + } else { + memcpy(dst_data + fixed_len * (virtual_row_cnt), + fixed_vec->get_data() + fixed_len * row_idx, fixed_len); + } + ++virtual_row_cnt; } - ++virtual_row_cnt; } break; } @@ -493,15 +509,32 @@ int ObDtlVectors::append_batch(const ObIArray &exprs, const ObIArray (vectors.at(col_idx)); - for (int64_t i = 0; i < size; ++i) { - int64_t row_idx = selector[i]; - ObDatum &cell = uniform_vec->get_datums()[row_idx]; - if (cell.is_null()) { - get_nulls(col_idx)->set(virtual_row_cnt); - } else { - memcpy(dst_data + fixed_len * (virtual_row_cnt), cell.ptr_, fixed_len); + if (0 == fixed_len % 8) { + for (int64_t i = 0; i < size; ++i) { + int64_t row_idx = selector[i]; + ObDatum &cell = uniform_vec->get_datums()[row_idx]; + if (cell.is_null()) { + get_nulls(col_idx)->set(virtual_row_cnt); + } else { + int64_t base_offset = fixed_len * (virtual_row_cnt); + for (int64_t i = 0; i < fixed_len / 8; ++i) { + *(reinterpret_cast (dst_data + base_offset + sizeof(int64_t) * i)) + = *(reinterpret_cast (cell.ptr_ + sizeof(int64_t) * i)); + } + } + ++virtual_row_cnt; + } + } else { + for (int64_t i = 0; i < size; ++i) { + int64_t row_idx = selector[i]; + ObDatum &cell = uniform_vec->get_datums()[row_idx]; + if (cell.is_null()) { + get_nulls(col_idx)->set(virtual_row_cnt); + } else { + memcpy(dst_data + fixed_len * (virtual_row_cnt), cell.ptr_, fixed_len); + } + ++virtual_row_cnt; } - ++virtual_row_cnt; } break; } diff --git a/src/sql/engine/aggregate/ob_exec_hash_struct_vec.cpp b/src/sql/engine/aggregate/ob_exec_hash_struct_vec.cpp index 5d1a480e2..357ec898e 100644 --- a/src/sql/engine/aggregate/ob_exec_hash_struct_vec.cpp +++ b/src/sql/engine/aggregate/ob_exec_hash_struct_vec.cpp @@ -182,12 +182,21 @@ int ObExtendHashTableVec::process_batch(const common::ObIArray::inner_process_batch(const common::ObIA int64_t &agg_row_cnt, int64_t &agg_group_cnt, BatchAggrRowsTable *batch_aggr_rows, - bool need_reinit_vectors) + bool need_reinit_vectors, + const bool probe_by_col, + const int64_t start_idx, + int64_t &processed_idx) { int ret = OB_SUCCESS; - int64_t curr_idx = 0; - while (OB_SUCC(ret) && curr_idx < child_brs.size_) { + int64_t curr_idx = start_idx; + bool need_fallback = false; + while (OB_SUCC(ret) && !need_fallback && curr_idx < child_brs.size_) { bool batch_duplicate = false; new_row_selector_cnt_ = 0; - for (; OB_SUCC(ret) && !batch_duplicate && curr_idx < child_brs.size_; ++curr_idx) { + old_row_selector_cnt_ = 0; + for (; OB_SUCC(ret) && curr_idx < child_brs.size_; ++curr_idx) { if (child_brs.skip_->at(curr_idx) || is_dumped[curr_idx] || (nullptr != bloom_filter @@ -241,53 +255,60 @@ int ObExtendHashTableVec::inner_process_batch(const common::ObIA while (OB_SUCC(ret) && !find_bkt) { locate_buckets_[curr_idx] = const_cast (&locate_next_bucket(*buckets_, hash_values[curr_idx], curr_pos)); if (locate_buckets_[curr_idx]->is_valid()) { - ++probe_cnt_; - ++agg_row_cnt; - bool result = true; - ObGroupRowItemVec *it = &locate_buckets_[curr_idx]->get_item(); - for (int64_t i = 0; OB_SUCC(ret) && result && i < gby_exprs.count(); ++i) { - bool null_equal = (nullptr == gby_exprs.at(i)); - if (!null_equal) { - ObIVector *r_vec = gby_exprs.at(i)->get_vector(*eval_ctx_); - const bool l_isnull = it->is_null(i); - const bool r_isnull = r_vec->is_null(curr_idx); - if (l_isnull != r_isnull) { - result = false; - } else if (l_isnull && r_isnull) { - result = true; - } else { - const int64_t l_len = it->get_length(group_store_.get_row_meta(), i); - const int64_t r_len = r_vec->get_length(curr_idx); - if (l_len == r_len - && 0 == memcmp(it->get_cell_payload(group_store_.get_row_meta(), i), - r_vec->get_payload(curr_idx), - r_len)) { + if (probe_by_col) { + old_row_selector_.at(old_row_selector_cnt_++) = curr_idx; + __builtin_prefetch(&locate_buckets_[curr_idx]->get_item(), + 0, 2); + find_bkt = true; + } else { + bool result = true; + ObGroupRowItemVec *it = &locate_buckets_[curr_idx]->get_item(); + for (int64_t i = 0; OB_SUCC(ret) && result && i < gby_exprs.count(); ++i) { + bool null_equal = (nullptr == gby_exprs.at(i)); + if (!null_equal) { + ObIVector *r_vec = gby_exprs.at(i)->get_vector(*eval_ctx_); + const bool l_isnull = it->is_null(i); + const bool r_isnull = r_vec->is_null(curr_idx); + if (l_isnull != r_isnull) { + result = false; + } else if (l_isnull && r_isnull) { result = true; } else { - int cmp_res = 0; - if (OB_FAIL(r_vec->null_last_cmp(*gby_exprs.at(i), curr_idx, false, - it->get_cell_payload(group_store_.get_row_meta(), i), - l_len, cmp_res))) { - LOG_WARN("failed to cmp left and right", K(ret)); + const int64_t l_len = it->get_length(group_store_.get_row_meta(), i); + const int64_t r_len = r_vec->get_length(curr_idx); + if (l_len == r_len + && 0 == memcmp(it->get_cell_payload(group_store_.get_row_meta(), i), + r_vec->get_payload(curr_idx), + r_len)) { + result = true; } else { - result = (0 == cmp_res); + int cmp_res = 0; + if (OB_FAIL(r_vec->null_last_cmp(*gby_exprs.at(i), curr_idx, false, + it->get_cell_payload(group_store_.get_row_meta(), i), + l_len, cmp_res))) { + LOG_WARN("failed to cmp left and right", K(ret)); + } else { + result = (0 == cmp_res); + } } } } } - } - if (OB_SUCC(ret) && result) { - batch_old_rows[curr_idx] = it->get_aggr_row(group_store_.get_row_meta()); - if (batch_aggr_rows && batch_aggr_rows->is_valid()) { - if (size_ > BatchAggrRowsTable::MAX_REORDER_GROUPS) { - batch_aggr_rows->set_invalid(); - } else { - int64_t ser_num = locate_buckets_[curr_idx]->get_bkt_seq(); - batch_aggr_rows->aggr_rows_[ser_num] = batch_old_rows[curr_idx]; - batch_aggr_rows->selectors_[ser_num][batch_aggr_rows->selectors_item_cnt_[ser_num]++] = curr_idx; + if (OB_SUCC(ret) && result) { + ++probe_cnt_; + ++agg_row_cnt; + batch_old_rows[curr_idx] = it->get_aggr_row(group_store_.get_row_meta()); + if (batch_aggr_rows && batch_aggr_rows->is_valid()) { + if (size_ > BatchAggrRowsTable::MAX_REORDER_GROUPS) { + batch_aggr_rows->set_invalid(); + } else { + int64_t ser_num = locate_buckets_[curr_idx]->get_bkt_seq(); + batch_aggr_rows->aggr_rows_[ser_num] = batch_old_rows[curr_idx]; + batch_aggr_rows->selectors_[ser_num][batch_aggr_rows->selectors_item_cnt_[ser_num]++] = curr_idx; + } } + find_bkt = true; } - find_bkt = true; } } else if (locate_buckets_[curr_idx]->is_occupyed()) { batch_duplicate = true; @@ -307,6 +328,48 @@ int ObExtendHashTableVec::inner_process_batch(const common::ObIA break; } } + if (OB_SUCC(ret) + && probe_by_col + && old_row_selector_cnt_ > 0) { + for (int64_t i = 0; OB_SUCC(ret) && !need_fallback && i < gby_exprs.count(); ++i) { + if (nullptr == gby_exprs.at(i)) { + //3 stage null equal + continue; + } + col_has_null_.at(i) |= gby_exprs.at(i)->get_vector(*eval_ctx_)->has_null(); + if (OB_FAIL(col_has_null_.at(i) ? + inner_process_column(gby_exprs, group_store_.get_row_meta(), i, need_fallback) + : inner_process_column_not_null(gby_exprs, group_store_.get_row_meta(), i, need_fallback))) { + LOG_WARN("failed to process column", K(ret), K(i)); + } + } + if (OB_SUCC(ret)) { + if (!need_fallback) { + probe_cnt_ += old_row_selector_cnt_; + agg_row_cnt += old_row_selector_cnt_; + if (batch_aggr_rows && batch_aggr_rows->is_valid() && size_ > BatchAggrRowsTable::MAX_REORDER_GROUPS) { + batch_aggr_rows->set_invalid(); + } + for (int64_t i = 0; i < old_row_selector_cnt_; ++i) { + const int64_t idx = old_row_selector_.at(i); + batch_old_rows[idx] = (static_cast (locate_buckets_[idx]))->get_item().get_aggr_row(group_store_.get_row_meta()); + if (batch_aggr_rows && batch_aggr_rows->is_valid()) { + int64_t ser_num = locate_buckets_[idx]->get_bkt_seq(); + batch_aggr_rows->aggr_rows_[ser_num] = batch_old_rows[idx]; + batch_aggr_rows->selectors_[ser_num][batch_aggr_rows->selectors_item_cnt_[ser_num]++] = idx; + } + } + } else { + //reset occupyed bkt and stat + for (int64_t i = 0; i < new_row_selector_cnt_; ++i) { + locate_buckets_[new_row_selector_.at(i)]->set_empty(); + } + probe_cnt_ -= new_row_selector_cnt_; + agg_row_cnt -= new_row_selector_cnt_; + continue; + } + } + } if (OB_SUCC(ret)) { if (can_append_batch && new_row_selector_cnt_ > 0 @@ -318,6 +381,236 @@ int ObExtendHashTableVec::inner_process_batch(const common::ObIA new_row_selector_cnt_ = 0; } } + processed_idx = curr_idx; + } + return ret; +} + +template +int ObExtendHashTableVec::inner_process_column(const common::ObIArray &gby_exprs, + const RowMeta &row_meta, + const int64_t col_idx, + bool &need_fallback) +{ + int ret = OB_SUCCESS; + switch (gby_exprs.at(col_idx)->get_format(*eval_ctx_)) { + case VEC_FIXED : { + ObFixedLengthBase *r_vec = static_cast (gby_exprs.at(col_idx)->get_vector(*eval_ctx_)); + int64_t r_len = r_vec->get_length(); + if (row_meta.fixed_expr_reordered()) { + const int64_t offset = row_meta.get_fixed_cell_offset(col_idx); + if (r_len == 8) { + for (int64_t i = 0; !need_fallback && i < old_row_selector_cnt_; ++i) { + const int64_t curr_idx = old_row_selector_.at(i); + ObCompactRow *it = static_cast (&locate_buckets_[curr_idx]->get_item()); + const bool l_isnull = it->is_null(col_idx); + const bool r_isnull = r_vec->get_nulls()->at(curr_idx); + if (l_isnull != r_isnull) { + need_fallback = true; + } else if (l_isnull && r_isnull) { + } else { + need_fallback = (*(reinterpret_cast (r_vec->get_data() + r_len * curr_idx)) + != *(reinterpret_cast (it->get_fixed_cell_payload(offset)))); + } + } + } else { + for (int64_t i = 0; !need_fallback && i < old_row_selector_cnt_; ++i) { + const int64_t curr_idx = old_row_selector_.at(i); + ObCompactRow *it = static_cast (&locate_buckets_[curr_idx]->get_item()); + const bool l_isnull = it->is_null(col_idx); + const bool r_isnull = r_vec->get_nulls()->at(curr_idx); + if (l_isnull != r_isnull) { + need_fallback = true; + } else if (l_isnull && r_isnull) { + } else { + need_fallback = (0 != memcmp(it->get_fixed_cell_payload(offset), + r_vec->get_data() + r_len * curr_idx, + r_len)); + } + } + } + } else { + for (int64_t i = 0; !need_fallback && i < old_row_selector_cnt_; ++i) { + const int64_t curr_idx = old_row_selector_.at(i); + ObCompactRow *it = static_cast (&locate_buckets_[curr_idx]->get_item()); + const bool l_isnull = it->is_null(col_idx); + const bool r_isnull = r_vec->get_nulls()->at(curr_idx); + if (l_isnull != r_isnull) { + need_fallback = true; + } else if (l_isnull && r_isnull) { + } else { + need_fallback = (0 != memcmp(it->get_cell_payload(row_meta, col_idx), + r_vec->get_data() + r_len * curr_idx, + r_len)); + } + } + } + break; + } + case VEC_DISCRETE : { + ObDiscreteBase *r_vec = static_cast (gby_exprs.at(col_idx)->get_vector(*eval_ctx_)); + for (int64_t i = 0; OB_SUCC(ret) && !need_fallback && i < old_row_selector_cnt_; ++i) { + const int64_t curr_idx = old_row_selector_.at(i); + ObCompactRow *it = static_cast (&locate_buckets_[curr_idx]->get_item()); + const bool l_isnull = it->is_null(col_idx); + const bool r_isnull = r_vec->get_nulls()->at(curr_idx); + if (l_isnull != r_isnull) { + need_fallback = true; + } else if (l_isnull && r_isnull) { + } else { + const int64_t r_len = r_vec->get_lens()[curr_idx]; + const int64_t l_len = it->get_length(row_meta, col_idx); + if (r_len == l_len + && 0 == memcmp(it->get_cell_payload(row_meta, col_idx), + r_vec->get_ptrs()[curr_idx], + r_len)) { + } else { + int cmp_res = 0; + if (OB_FAIL(r_vec->null_last_cmp(*gby_exprs.at(col_idx), curr_idx, false, + it->get_cell_payload(row_meta, col_idx), + l_len, cmp_res))) { + LOG_WARN("failed to cmp left and right", K(ret)); + } else { + need_fallback = static_cast (cmp_res); + } + } + } + } + break; + } + case VEC_CONTINUOUS : + case VEC_UNIFORM : + case VEC_UNIFORM_CONST : { + ObIVector *r_vec = gby_exprs.at(col_idx)->get_vector(*eval_ctx_); + for (int64_t i = 0; OB_SUCC(ret) && !need_fallback && i < old_row_selector_cnt_; ++i) { + const int64_t curr_idx = old_row_selector_.at(i); + ObCompactRow *it = static_cast (&locate_buckets_[curr_idx]->get_item()); + const bool l_isnull = it->is_null(col_idx); + const bool r_isnull = r_vec->is_null(curr_idx); + if (l_isnull != r_isnull) { + need_fallback = true; + } else if (l_isnull && r_isnull) { + } else { + const int64_t l_len = it->get_length(row_meta, col_idx); + const int64_t r_len = r_vec->get_length(curr_idx); + if (l_len == r_len + && 0 == memcmp(it->get_cell_payload(row_meta, col_idx), + r_vec->get_payload(curr_idx), + r_len)) { + } else { + int cmp_res = 0; + if (OB_FAIL(r_vec->null_last_cmp(*gby_exprs.at(col_idx), curr_idx, false, + it->get_cell_payload(row_meta, col_idx), + l_len, cmp_res))) { + LOG_WARN("failed to cmp left and right", K(ret)); + } else { + need_fallback = static_cast (cmp_res); + } + } + } + } + break; + } + default : + ret = OB_ERR_UNEXPECTED; + LOG_WARN("invalid data format", K(ret), K(gby_exprs.at(col_idx)->get_format(*eval_ctx_))); + } + return ret; +} + +template +int ObExtendHashTableVec::inner_process_column_not_null(const common::ObIArray &gby_exprs, + const RowMeta &row_meta, + const int64_t col_idx, + bool &need_fallback) +{ + int ret = OB_SUCCESS; + switch (gby_exprs.at(col_idx)->get_format(*eval_ctx_)) { + case VEC_FIXED : { + ObFixedLengthBase *r_vec = static_cast (gby_exprs.at(col_idx)->get_vector(*eval_ctx_)); + int64_t r_len = r_vec->get_length(); + if (row_meta.fixed_expr_reordered()) { + const int64_t offset = row_meta.get_fixed_cell_offset(col_idx); + if (r_len == 8) { + for (int64_t i = 0; !need_fallback && i < old_row_selector_cnt_; ++i) { + const int64_t curr_idx = old_row_selector_.at(i); + ObCompactRow *it = static_cast (&locate_buckets_[curr_idx]->get_item()); + need_fallback = (*(reinterpret_cast (r_vec->get_data() + r_len * curr_idx)) + != *(reinterpret_cast (it->get_fixed_cell_payload(offset)))); + } + } else { + for (int64_t i = 0; !need_fallback && i < old_row_selector_cnt_; ++i) { + const int64_t curr_idx = old_row_selector_.at(i); + ObCompactRow *it = static_cast (&locate_buckets_[curr_idx]->get_item()); + need_fallback = (0 != memcmp(it->get_fixed_cell_payload(offset), + r_vec->get_data() + r_len * curr_idx, + r_len)); + } + } + } else { + for (int64_t i = 0; !need_fallback && i < old_row_selector_cnt_; ++i) { + const int64_t curr_idx = old_row_selector_.at(i); + ObCompactRow *it = static_cast (&locate_buckets_[curr_idx]->get_item()); + need_fallback = (0 != memcmp(it->get_cell_payload(row_meta, col_idx), + r_vec->get_data() + r_len * curr_idx, + r_len)); + } + } + break; + } + case VEC_DISCRETE : { + ObDiscreteBase *r_vec = static_cast (gby_exprs.at(col_idx)->get_vector(*eval_ctx_)); + for (int64_t i = 0; OB_SUCC(ret) && !need_fallback && i < old_row_selector_cnt_; ++i) { + const int64_t curr_idx = old_row_selector_.at(i); + ObCompactRow *it = static_cast (&locate_buckets_[curr_idx]->get_item()); + const int64_t r_len = r_vec->get_lens()[curr_idx]; + const int64_t l_len = it->get_length(row_meta, col_idx); + if (r_len == l_len + && 0 == memcmp(it->get_cell_payload(row_meta, col_idx), + r_vec->get_ptrs()[curr_idx], + r_len)) { + } else { + int cmp_res = 0; + if (OB_FAIL(r_vec->null_last_cmp(*gby_exprs.at(col_idx), curr_idx, false, + it->get_cell_payload(row_meta, col_idx), + l_len, cmp_res))) { + LOG_WARN("failed to cmp left and right", K(ret)); + } else { + need_fallback = static_cast (cmp_res); + } + } + } + break; + } + case VEC_CONTINUOUS : + case VEC_UNIFORM : + case VEC_UNIFORM_CONST : { + ObIVector *r_vec = gby_exprs.at(col_idx)->get_vector(*eval_ctx_); + for (int64_t i = 0; OB_SUCC(ret) && !need_fallback && i < old_row_selector_cnt_; ++i) { + const int64_t curr_idx = old_row_selector_.at(i); + ObCompactRow *it = static_cast (&locate_buckets_[curr_idx]->get_item()); + const int64_t l_len = it->get_length(row_meta, col_idx); + const int64_t r_len = r_vec->get_length(curr_idx); + if (l_len == r_len + && 0 == memcmp(it->get_cell_payload(row_meta, col_idx), + r_vec->get_payload(curr_idx), + r_len)) { + } else { + int cmp_res = 0; + if (OB_FAIL(r_vec->null_last_cmp(*gby_exprs.at(col_idx), curr_idx, false, + it->get_cell_payload(row_meta, col_idx), + l_len, cmp_res))) { + LOG_WARN("failed to cmp left and right", K(ret)); + } else { + need_fallback = static_cast (cmp_res); + } + } + } + break; + } + default : + ret = OB_ERR_UNEXPECTED; + LOG_WARN("invalid data format", K(ret), K(gby_exprs.at(col_idx)->get_format(*eval_ctx_))); } return ret; } @@ -352,6 +645,8 @@ int ObExtendHashTableVec::init(ObIAllocator *allocator, op_id_ = op_id; vector_ptrs_.set_allocator(allocator); new_row_selector_.set_allocator(allocator); + old_row_selector_.set_allocator(allocator); + col_has_null_.set_allocator(allocator); change_valid_idx_.set_allocator(allocator); if (use_sstr_aggr && OB_FAIL(sstr_aggr_.init(allocator_, *eval_ctx, gby_exprs, aggr_row_size))) { LOG_WARN("failed to init short string aggr", K(ret)); @@ -359,10 +654,15 @@ int ObExtendHashTableVec::init(ObIAllocator *allocator, SQL_ENG_LOG(WARN, "failed to alloc ptrs", K(ret)); } else if (OB_FAIL(new_row_selector_.prepare_allocate(max_batch_size))) { SQL_ENG_LOG(WARN, "failed to alloc array", K(ret)); + } else if (OB_FAIL(old_row_selector_.prepare_allocate(max_batch_size))) { + SQL_ENG_LOG(WARN, "failed to alloc array", K(ret)); + } else if (OB_FAIL(col_has_null_.prepare_allocate(gby_exprs.count()))) { + SQL_ENG_LOG(WARN, "failed to alloc array", K(ret)); } else if (OB_ISNULL(buckets_buf = allocator_.alloc(sizeof(BucketArray), mem_attr))) { ret = OB_ALLOCATE_MEMORY_FAILED; SQL_ENG_LOG(WARN, "failed to allocate memory", K(ret)); } else { + MEMSET(&col_has_null_.at(0), 0, col_has_null_.count()); buckets_ = new(buckets_buf)BucketArray(allocator_); buckets_->set_tenant_id(tenant_id_); initial_bucket_num_ = common::next_pow2(initial_size * SIZE_BUCKET_SCALE); @@ -423,56 +723,17 @@ int ObExtendHashTableVec::set_distinct_batch(const RowMeta &row_ } if (OB_SUCC(ret)) { - int64_t curr_idx = 0; - while (OB_SUCC(ret) && curr_idx < batch_size) { - bool batch_duplicate = false; - new_row_selector_cnt_ = 0; - for (; OB_SUCC(ret) && !batch_duplicate && curr_idx < batch_size; ++curr_idx) { - if (OB_NOT_NULL(child_skip) && child_skip->at(curr_idx)) { - my_skip.set(curr_idx); - continue; - } - int64_t curr_pos = -1; - bool find_bkt = false; - while (OB_SUCC(ret) && !find_bkt) { - locate_buckets_[curr_idx] = const_cast (&locate_next_bucket(*buckets_, hash_values[curr_idx], curr_pos)); - if (locate_buckets_[curr_idx]->is_valid()) { - bool result = false; - RowItemType *it = &(locate_buckets_[curr_idx]->get_item()); - if (OB_FAIL(likely_equal_nullable(row_meta, static_cast(*it), curr_idx, result))) { - LOG_WARN("failed to cmp", K(ret)); - } else if (result) { - my_skip.set(curr_idx); - find_bkt = true; - break; - } - } else if (locate_buckets_[curr_idx]->is_occupyed()) { - batch_duplicate = true; - find_bkt = true; - } else { - //occupy empty bucket - locate_buckets_[curr_idx]->set_occupyed(); - new_row_selector_.at(new_row_selector_cnt_++) = curr_idx; - find_bkt = true; - } - } - if (batch_duplicate) { - break; - } - } - if (OB_FAIL(ret) || 0 == new_row_selector_cnt_) { - } else if (OB_FAIL(sf(vector_ptrs_, &new_row_selector_.at(0), new_row_selector_cnt_, srows_))) { - LOG_WARN("failed to append batch", K(ret)); - } else { - for (int64_t i = 0; i < new_row_selector_cnt_; ++i) { - int64_t idx = new_row_selector_.at(i); - locate_buckets_[idx]->set_hash(hash_values[idx]); - locate_buckets_[idx]->set_valid(); - locate_buckets_[idx]->set_item(static_cast (*srows_[i])); - ++size_; - } - new_row_selector_cnt_ = 0; - } + const int64_t start_idx = 0; + int64_t processed_idx = 0; + if (OB_FAIL(inner_process_batch(row_meta, batch_size, child_skip, + my_skip, hash_values, sf, + true, start_idx, processed_idx))) { + LOG_WARN("failed to process batch", K(ret)); + } else if (processed_idx < batch_size + && OB_FAIL(inner_process_batch(row_meta, batch_size, child_skip, + my_skip, hash_values, sf, + false, processed_idx, processed_idx))) { + LOG_WARN("failed to process batch fallback", K(ret)); } } return ret; @@ -484,14 +745,19 @@ int ObExtendHashTableVec::inner_process_batch(const RowMeta &row const ObBitVector *child_skip, ObBitVector &my_skip, uint64_t *hash_values, - StoreRowFunc sf) + StoreRowFunc sf, + const bool probe_by_col, + const int64_t start_idx, + int64_t &processed_idx) { int ret = OB_SUCCESS; - int64_t curr_idx = 0; - while (OB_SUCC(ret) && curr_idx < batch_size) { + int64_t curr_idx = start_idx; + bool need_fallback = false; + while (OB_SUCC(ret) && !need_fallback && curr_idx < batch_size) { bool batch_duplicate = false; new_row_selector_cnt_ = 0; - for (; OB_SUCC(ret) && !batch_duplicate && curr_idx < batch_size; ++curr_idx) { + old_row_selector_cnt_ = 0; + for (; OB_SUCC(ret) && curr_idx < batch_size; ++curr_idx) { if (OB_NOT_NULL(child_skip) && child_skip->at(curr_idx)) { my_skip.set(curr_idx); continue; @@ -501,38 +767,45 @@ int ObExtendHashTableVec::inner_process_batch(const RowMeta &row while (OB_SUCC(ret) && !find_bkt) { locate_buckets_[curr_idx] = const_cast (&locate_next_bucket(*buckets_, hash_values[curr_idx], curr_pos)); if (locate_buckets_[curr_idx]->is_valid()) { - bool result = true; - RowItemType *it = &locate_buckets_[curr_idx]->get_item(); - for (int64_t i = 0; OB_SUCC(ret) && result && i < hash_expr_cnt_; ++i) { - ObIVector *r_vec = gby_exprs_->at(i)->get_vector(*eval_ctx_); - const bool l_isnull = it->is_null(i); - const bool r_isnull = r_vec->is_null(curr_idx); - if (l_isnull != r_isnull) { - result = false; - } else if (l_isnull && r_isnull) { - result = true; - } else { - const int64_t l_len = it->get_length(row_meta, i); - const int64_t r_len = r_vec->get_length(curr_idx); - if (l_len == r_len && (0 == memcmp(it->get_cell_payload(row_meta, i), - r_vec->get_payload(curr_idx), - l_len))) { + if (probe_by_col) { + old_row_selector_.at(old_row_selector_cnt_++) = curr_idx; + __builtin_prefetch(&locate_buckets_[curr_idx]->get_item(), + 0, 2); + find_bkt = true; + } else { + bool result = true; + RowItemType *it = &locate_buckets_[curr_idx]->get_item(); + for (int64_t i = 0; OB_SUCC(ret) && result && i < hash_expr_cnt_; ++i) { + ObIVector *r_vec = gby_exprs_->at(i)->get_vector(*eval_ctx_); + const bool l_isnull = it->is_null(i); + const bool r_isnull = r_vec->is_null(curr_idx); + if (l_isnull != r_isnull) { + result = false; + } else if (l_isnull && r_isnull) { result = true; } else { - int cmp_res = 0; - if (OB_FAIL(r_vec->null_last_cmp(*gby_exprs_->at(i), curr_idx, false, - it->get_cell_payload(row_meta, i), - l_len, cmp_res))) { - LOG_WARN("failed to cmp left and right", K(ret)); + const int64_t l_len = it->get_length(row_meta, i); + const int64_t r_len = r_vec->get_length(curr_idx); + if (l_len == r_len && (0 == memcmp(it->get_cell_payload(row_meta, i), + r_vec->get_payload(curr_idx), + l_len))) { + result = true; } else { - result = (0 == cmp_res); + int cmp_res = 0; + if (OB_FAIL(r_vec->null_last_cmp(*gby_exprs_->at(i), curr_idx, false, + it->get_cell_payload(row_meta, i), + l_len, cmp_res))) { + LOG_WARN("failed to cmp left and right", K(ret)); + } else { + result = (0 == cmp_res); + } } } } - } - if (OB_SUCC(ret) && result) { - my_skip.set(curr_idx); - find_bkt = true; + if (OB_SUCC(ret) && result) { + my_skip.set(curr_idx); + find_bkt = true; + } } } else if (locate_buckets_[curr_idx]->is_occupyed()) { batch_duplicate = true; @@ -548,6 +821,32 @@ int ObExtendHashTableVec::inner_process_batch(const RowMeta &row break; } } + if (OB_SUCC(ret) + && probe_by_col + && old_row_selector_cnt_ > 0) { + for (int64_t i = 0; OB_SUCC(ret) && !need_fallback && i < hash_expr_cnt_; ++i) { + col_has_null_.at(i) |= gby_exprs_->at(i)->get_vector(*eval_ctx_)->has_null(); + if (col_has_null_.at(i) ? + OB_FAIL(inner_process_column(*gby_exprs_, row_meta, i, need_fallback)) + : OB_FAIL(inner_process_column_not_null(*gby_exprs_, row_meta, i, need_fallback))) { + LOG_WARN("failed to process column", K(ret), K(i)); + } + } + } + if (OB_SUCC(ret)) { + if (!need_fallback) { + for (int64_t i = 0; i < old_row_selector_cnt_; ++i) { + const int64_t idx = old_row_selector_.at(i); + my_skip.set(idx); + } + } else { + //reset occupyed bkt and stat + for (int64_t i = 0; i < new_row_selector_cnt_; ++i) { + locate_buckets_[new_row_selector_.at(i)]->set_empty(); + } + continue; + } + } if (OB_FAIL(ret) || 0 == new_row_selector_cnt_) { } else if (OB_FAIL(sf(vector_ptrs_, &new_row_selector_.at(0), new_row_selector_cnt_, srows_))) { LOG_WARN("failed to append batch", K(ret)); @@ -561,6 +860,7 @@ int ObExtendHashTableVec::inner_process_batch(const RowMeta &row } new_row_selector_cnt_ = 0; } + processed_idx = curr_idx; } return ret; } @@ -622,15 +922,6 @@ void ObExtendHashTableVec::prefetch(const ObBatchRows &brs, uint __builtin_prefetch((&buckets_->at((ObGroupRowBucketBase::HASH_VAL_MASK & hash_vals[i]) & mask)), 0/* read */, 2 /*high temp locality*/); } - if (ObGroupRowBucketType::OUTLINE == GroupRowBucket::TYPE) { - for(auto i = 0; i < brs.size_; i++) { - if (brs.skip_->at(i)) { - continue; - } - __builtin_prefetch(&buckets_->at((ObGroupRowBucketBase::HASH_VAL_MASK & hash_vals[i]) & mask).get_item(), - 0, 2 ); - } - } } } diff --git a/src/sql/engine/aggregate/ob_exec_hash_struct_vec.h b/src/sql/engine/aggregate/ob_exec_hash_struct_vec.h index 45e7d2c8a..3f5d99a7a 100644 --- a/src/sql/engine/aggregate/ob_exec_hash_struct_vec.h +++ b/src/sql/engine/aggregate/ob_exec_hash_struct_vec.h @@ -548,7 +548,10 @@ public: max_batch_size_(0), locate_buckets_(nullptr), new_row_selector_(), + old_row_selector_(), + col_has_null_(), new_row_selector_cnt_(0), + old_row_selector_cnt_(), change_valid_idx_(), change_valid_idx_cnt_(0), srows_(nullptr), @@ -706,7 +709,27 @@ public: int64_t &agg_row_cnt, int64_t &agg_group_cnt, BatchAggrRowsTable *batch_aggr_rows, - bool need_reinit_vectors); + bool need_reinit_vectors, + const bool probe_by_col, + const int64_t start_idx, + int64_t &processed_idx); + int inner_process_batch(const RowMeta &row_meta, + const int64_t batch_size, + const ObBitVector *child_skip, + ObBitVector &my_skip, + uint64_t *hash_values, + StoreRowFunc sf, + const bool probe_by_col, + const int64_t start_idx, + int64_t &processed_idx); + int inner_process_column(const common::ObIArray &gby_exprs, + const RowMeta &row_meta, + const int64_t col_idx, + bool &need_fallback); + int inner_process_column_not_null(const common::ObIArray &gby_exprs, + const RowMeta &row_meta, + const int64_t col_idx, + bool &need_fallback); void prefetch(const ObBatchRows &brs, uint64_t *hash_vals) const; // Link item to hash table, extend buckets if needed. // (Do not check item is exist or not) @@ -751,6 +774,9 @@ public: SQL_ENG_LOG(ERROR, "resize bucket array failed", K(size_), K(bucket_num), K(get_bucket_num())); } } + if (col_has_null_.count() > 0) { + MEMSET(&col_has_null_.at(0), 0, col_has_null_.count()); + } size_ = 0; group_store_.reset(); iter_.reset(); @@ -781,6 +807,8 @@ public: sstr_aggr_.destroy(); vector_ptrs_.destroy(); new_row_selector_.destroy(); + old_row_selector_.destroy(); + col_has_null_.destroy(); change_valid_idx_.destroy(); size_ = 0; initial_bucket_num_ = 0; @@ -907,7 +935,10 @@ protected: int64_t max_batch_size_; GroupRowBucket **locate_buckets_; common::ObFixedArray new_row_selector_; + common::ObFixedArray old_row_selector_; + common::ObFixedArray col_has_null_; int64_t new_row_selector_cnt_; + int64_t old_row_selector_cnt_; common::ObFixedArray change_valid_idx_; int64_t change_valid_idx_cnt_; ObCompactRow **srows_; diff --git a/src/sql/engine/aggregate/ob_hash_groupby_vec_op.cpp b/src/sql/engine/aggregate/ob_hash_groupby_vec_op.cpp index 7ae93e213..06f989b43 100644 --- a/src/sql/engine/aggregate/ob_hash_groupby_vec_op.cpp +++ b/src/sql/engine/aggregate/ob_hash_groupby_vec_op.cpp @@ -445,10 +445,10 @@ int ObHashGroupByVecOp::next_duplicate_data_permutation( const int64_t aggr_code = last_group ? MY_SPEC.dist_col_group_idxs_.count() : nth_group; ObExpr *aggr_code_expr = all_groupby_exprs_.at(first_idx - 1); ObIVector *aggr_code_col = nullptr; - if (OB_FAIL(aggr_code_expr->init_vector_default(eval_ctx_, MY_SPEC.max_batch_size_))) { + if (OB_FAIL(aggr_code_expr->init_vector(eval_ctx_, VEC_FIXED, MY_SPEC.max_batch_size_))) { LOG_WARN("failed to init vector", K(ret)); } else { - aggr_code_col = aggr_code_expr->get_vector(eval_ctx_); + ObFixedLengthFormat *aggr_code_col = static_cast *> (aggr_code_expr->get_vector(eval_ctx_)); ObBitVector &eval_flags = aggr_code_expr->get_evaluated_flags(eval_ctx_); for (int64_t i = 0; i < child_brs->size_; ++i) { if (child_brs->skip_->exist(i)) { @@ -1010,6 +1010,8 @@ int ObHashGroupByVecOp::inner_get_next_batch(const int64_t max_row_cnt) all_groupby_exprs_, op_max_batch_size, brs_, curr_group_id_))) { LOG_WARN("failed to collect batch result", K(ret), K(curr_group_id_)); + } else { + brs_.all_rows_active_ = true; } } else { int64_t read_rows = 0; @@ -1025,8 +1027,21 @@ int ObHashGroupByVecOp::inner_get_next_batch(const int64_t max_row_cnt) } else { curr_group_id_ += read_rows; } + if (OB_SUCC(ret)) { + brs_.all_rows_active_ = true; + } } } + +#ifdef ENABLE_DEBUG_LOG + // check all_row_active_ + if (OB_SUCC(ret) && brs_.size_ > 0) { + bool is_all_false = brs_.skip_->is_all_false(brs_.size_); + if (is_all_false != brs_.all_rows_active_) { + ob_abort(); + } + } +#endif LOG_DEBUG("after inner_get_next_batch", K(get_aggr_used_size()), K(get_aggr_used_size()), K(get_hash_table_used_size()), @@ -1113,16 +1128,13 @@ int ObHashGroupByVecOp::load_data_batch(int64_t max_row_cnt) } else if (OB_FAIL(aggr_processor_.eval_aggr_param_batch(*child_brs))) { LOG_WARN("fail to eval aggr param batch", K(ret), K(*child_brs)); } - // prefetch the result that they will be processed - for (int64_t j = 0; OB_SUCC(ret) && j < child_brs->size_; ++j) { - if (batch_old_rows_[j]) { - aggr_processor_.prefetch_aggregate_row(batch_old_rows_[j]); - } - } // end for //batch calc aggr for each group int32_t start_agg_id = -1; int32_t end_agg_id = -1; - for (int64_t i = 0; i < MY_SPEC.aggr_infos_.count(); ++i) { + bool calc_multi_groups = (MY_SPEC.aggr_stage_ == ObThreeStageAggrStage::FIRST_STAGE + || MY_SPEC.aggr_stage_ == ObThreeStageAggrStage::NONE_STAGE); + + for (int64_t i = 0; !calc_multi_groups && i < MY_SPEC.aggr_infos_.count(); ++i) { if (MY_SPEC.aggr_infos_.at(i).param_exprs_.count() == 1) { aggr_vectors_[i] = MY_SPEC.aggr_infos_.at(i).param_exprs_.at(0)->get_vector(eval_ctx_); } else if (MY_SPEC.aggr_infos_.at(i).is_implicit_first_aggr()) { @@ -1132,10 +1144,16 @@ int ObHashGroupByVecOp::load_data_batch(int64_t max_row_cnt) } } if (OB_FAIL(ret)) { + } else if (calc_multi_groups) { // do nothing } else if (OB_FAIL(aggr_processor_.prepare_adding_one_row())) { LOG_WARN("prepare add one row failed", K(ret)); } - for (int64_t i = 0; OB_SUCC(ret) && i < child_brs->size_; i++) { + if (OB_SUCC(ret) && calc_multi_groups) { + if (OB_FAIL(process_multi_groups(batch_new_rows_, *child_brs))) { + LOG_WARN("process multiple groups failed", K(ret)); + } + } + for (int64_t i = 0; !calc_multi_groups && OB_SUCC(ret) && i < child_brs->size_; i++) { if ((!batch_new_rows_[i])) { continue; } @@ -1162,8 +1180,14 @@ int ObHashGroupByVecOp::load_data_batch(int64_t max_row_cnt) } start_agg_id = -1; end_agg_id = -1; - if (!reorder_aggr_rows_ || !batch_aggr_rows_table_.is_valid()) { - for (int64_t i = 0; OB_SUCC(ret) && i < child_brs->size_; i++) { + if (OB_FAIL(ret)) { + } else if (!reorder_aggr_rows_ || !batch_aggr_rows_table_.is_valid()) { + if (calc_multi_groups) { + if (OB_FAIL(process_multi_groups(batch_old_rows_, *child_brs))) { + LOG_WARN("process multiple groups failed", K(ret)); + } + } + for (int64_t i = 0; !calc_multi_groups && OB_SUCC(ret) && i < child_brs->size_; i++) { if ((!batch_old_rows_[i])) { continue; } @@ -1220,7 +1244,7 @@ int ObHashGroupByVecOp::load_data_batch(int64_t max_row_cnt) } } if (OB_FAIL(ret)) { - } else if (OB_FAIL(aggr_processor_.finish_adding_one_row())) { + } else if (!calc_multi_groups && OB_FAIL(aggr_processor_.finish_adding_one_row())) { LOG_WARN("finish add one row failed", K(ret)); } } @@ -1978,6 +2002,7 @@ int ObHashGroupByVecOp::by_pass_prepare_one_batch(const int64_t batch_size) } } if (OB_FAIL(ret) || no_non_distinct_aggr_) { + brs_.all_rows_active_ = by_pass_child_brs_->all_rows_active_; } else if (OB_UNLIKELY(by_pass_batch_size_ <= 0)) { ret = OB_ERR_UNEXPECTED; LOG_WARN("by pass group row is not init", K(ret), K(by_pass_batch_size_)); @@ -2239,6 +2264,30 @@ int ObHashGroupByVecOp::bypass_add_llc_map_batch(bool ready_to_check_ndv) { return ret; } +int ObHashGroupByVecOp::process_multi_groups(aggregate::AggrRowPtr *agg_rows, const ObBatchRows &brs) +{ + int ret = OB_SUCCESS; + int32_t start_agg_id = -1, end_agg_id = -1; + for (int i = 0; OB_SUCC(ret) && i < brs.size_; i++) { + if ((!agg_rows[i])) { continue; } + if (OB_FAIL(ObGroupByVecOp::calculate_3stage_agg_info( + agg_rows[i], local_group_rows_.get_row_meta(), i, start_agg_id, end_agg_id))) { + LOG_WARN("calculate 3stage aggregate info failed", K(ret)); + } else if (OB_UNLIKELY(start_agg_id == -1 || end_agg_id == -1)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("unexpected aggregate idx", K(ret), K(start_agg_id), K(end_agg_id)); + } else { + break; + } + } + if (OB_FAIL(ret)) { + } else if (OB_FAIL(aggr_processor_.add_batch_for_multi_groups(start_agg_id, end_agg_id, agg_rows, + brs.size_))) { + LOG_WARN("failed to calculate multiple groups", K(ret)); + } + return ret; +} + int LlcEstimate::init_llc_map(common::ObArenaAllocator &allocator) { int ret = OB_SUCCESS; diff --git a/src/sql/engine/aggregate/ob_hash_groupby_vec_op.h b/src/sql/engine/aggregate/ob_hash_groupby_vec_op.h index 731f5e9a1..1ee604833 100644 --- a/src/sql/engine/aggregate/ob_hash_groupby_vec_op.h +++ b/src/sql/engine/aggregate/ob_hash_groupby_vec_op.h @@ -337,6 +337,8 @@ private: const ObBatchRows *child_brs, ObBatchRows &my_brs, const int64_t batch_size, bool &insert_group_ht); int init_by_pass_op(); + + int process_multi_groups(aggregate::AggrRowPtr *agg_rows, const ObBatchRows &brs); // Alloc one batch group_row_item at a time static const int64_t BATCH_GROUP_ITEM_SIZE = 16; // const int64_t EXTEND_BKT_NUM_PUSH_DOWN = INIT_L3_CACHE_SIZE / ObExtendHashTableVec::get_sizeof_aggr_row(); diff --git a/src/sql/engine/basic/ob_compact_row.h b/src/sql/engine/basic/ob_compact_row.h index 92dfe3652..d20e744cc 100644 --- a/src/sql/engine/basic/ob_compact_row.h +++ b/src/sql/engine/basic/ob_compact_row.h @@ -86,6 +86,11 @@ public: { return fixed_expr_reordered() ? (project_idx(col_idx) - fixed_cnt_) : col_idx; } + //make sure column is fixed reordered + inline int64_t get_fixed_cell_offset(const int64_t col_idx) const + { + return fixed_offsets_[project_idx(col_idx)]; + } static int32_t get_row_fixed_size(const int64_t col_cnt, const int64_t fixed_payload_len, const int64_t extra_size, @@ -238,6 +243,11 @@ struct ObCompactRow } MEMCPY(payload_ + off, payload, len); } + //make sure column is fixed reordered + inline void set_fixed_cell_payload(const char *payload, const int64_t offset, const ObLength len) + { + MEMCPY(payload_ + offset, payload, len); + } inline void get_cell_payload(const RowMeta &meta, const int64_t col_idx, @@ -281,6 +291,11 @@ struct ObCompactRow } return payload; } + //make sure column is fixed reordered + inline const char *get_fixed_cell_payload(const int64_t offset) const + { + return payload_ + offset; + } inline int64_t get_row_size() const { return header_.row_size_; diff --git a/src/sql/engine/basic/ob_hp_infras_vec_op.h b/src/sql/engine/basic/ob_hp_infras_vec_op.h index f4032f17c..3c8a399e8 100644 --- a/src/sql/engine/basic/ob_hp_infras_vec_op.h +++ b/src/sql/engine/basic/ob_hp_infras_vec_op.h @@ -582,15 +582,6 @@ private: const auto &curr_bkt = buckets->at(bkt_idx); __builtin_prefetch(&curr_bkt, 0/* read */, 2 /*high temp locality*/); } - for (int i = 0; i < batch_size; ++i) { - int64_t bkt_idx = (hash_values_for_batch[i] & num_cnt); - const auto &curr_bkt = buckets->at(bkt_idx); - if ((OB_NOT_NULL(skip) && skip->at(i)) - || !curr_bkt.check_hash(hash_values_for_batch[i])) { - continue; - } - __builtin_prefetch(&curr_bkt.item_, 0/* read */, 2 /*high temp locality*/); - } return ret; } diff --git a/src/sql/engine/basic/ob_pushdown_filter.cpp b/src/sql/engine/basic/ob_pushdown_filter.cpp index bd5c184b9..fb772c032 100644 --- a/src/sql/engine/basic/ob_pushdown_filter.cpp +++ b/src/sql/engine/basic/ob_pushdown_filter.cpp @@ -1220,6 +1220,7 @@ int ObPushdownFilterExecutor::init_co_filter_param(const ObTableIterParam &iter_ int ret = OB_SUCCESS; const ObITableReadInfo *read_info = nullptr; const common::ObIArray *access_cgs = nullptr; + const common::ObIArray *cg_exprs = nullptr; const ObIArray &col_ids = get_col_ids(); const int64_t col_count = col_ids.count(); if (OB_UNLIKELY(!iter_param.is_valid() || nullptr == (read_info = iter_param.get_read_info()) @@ -1229,7 +1230,7 @@ int ObPushdownFilterExecutor::init_co_filter_param(const ObTableIterParam &iter_ } else if (is_filter_node()) { if (0 == col_count) { if (OB_FAIL(init_array_param(cg_idxs_, 1))) { - LOG_WARN("Fail to init col offsets", K(ret), K(col_count)); + LOG_WARN("Fail to init cg idxs", K(ret), K(col_count)); } else if (OB_FAIL(cg_idxs_.push_back(OB_CS_VIRTUAL_CG_IDX))) { LOG_WARN("Failed to push back cg idx", K(ret)); } @@ -1243,10 +1244,10 @@ int ObPushdownFilterExecutor::init_co_filter_param(const ObTableIterParam &iter_ LOG_WARN("Fail to init col offsets", K(ret), K(col_count)); } else if (OB_FAIL(init_array_param(default_datums_, col_count))) { LOG_WARN("Fail to init default datums", K(ret), K(col_count)); + } else if (FALSE_IT(cg_exprs = get_cg_col_exprs())) { + } else if (nullptr != cg_exprs && OB_FAIL(cg_col_exprs_.assign(*cg_exprs))) { + LOG_WARN("Fail to assign cg exprs", K(ret), KPC(cg_exprs)); } else { - if (nullptr == cg_col_exprs_) { - cg_col_exprs_ = get_cg_col_exprs(); - } access_cgs = read_info->get_cg_idxs(); for (int64_t i = 0; OB_SUCC(ret) && i < col_count; i++) { int32_t col_pos = -1; @@ -1325,13 +1326,13 @@ int ObPushdownFilterExecutor::init_co_filter_param(const ObTableIterParam &iter_ return ret; } -int ObPushdownFilterExecutor::set_cg_param(const common::ObIArray &cg_idxs, common::ObIArray *exprs) +int ObPushdownFilterExecutor::set_cg_param(const common::ObIArray &cg_idxs, const common::ObIArray &exprs) { int ret = OB_SUCCESS; if (OB_FAIL((cg_idxs_.assign(cg_idxs)))) { LOG_WARN("Failed to assign cg_idxs", K(ret), K(cg_idxs), K(cg_idxs_)); - } else { - cg_col_exprs_ = exprs; + } else if (!exprs.empty() && OB_FAIL(cg_col_exprs_.assign(exprs))) { + LOG_WARN("Failed to assign cg_exprs", K(ret), K(exprs)); } return ret; } @@ -1616,7 +1617,7 @@ ObPushdownFilterExecutor::ObPushdownFilterExecutor(common::ObIAllocator &alloc, : type_(type), need_check_row_filter_(false), filter_tree_status_(ObCommonFilterTreeStatus::NONE_FILTER), n_cols_(0), n_child_(0), cg_iter_idx_(INVALID_CG_ITER_IDX), skipped_rows_(0), childs_(nullptr), filter_bitmap_(nullptr), col_params_(alloc), col_offsets_(alloc), cg_col_offsets_(alloc), default_datums_(alloc), - cg_idxs_(alloc), cg_col_exprs_(nullptr), allocator_(alloc), op_(op), is_rewrited_(false), filter_bool_mask_() + cg_idxs_(alloc), cg_col_exprs_(alloc), allocator_(alloc), op_(op), is_rewrited_(false), filter_bool_mask_() {} ObPushdownFilterExecutor::~ObPushdownFilterExecutor() @@ -1631,7 +1632,7 @@ ObPushdownFilterExecutor::~ObPushdownFilterExecutor() cg_col_offsets_.reset(); default_datums_.reset(); cg_idxs_.reset(); - cg_col_exprs_ = nullptr; + cg_col_exprs_.reset(); for (uint32_t i = 0; i < n_child_; i++) { if (OB_NOT_NULL(childs_[i])) { childs_[i]->~ObPushdownFilterExecutor(); @@ -1651,7 +1652,7 @@ DEF_TO_STRING(ObPushdownFilterExecutor) J_KV(K_(type), K_(need_check_row_filter), K_(n_cols), K_(n_child), KP_(childs), KP_(filter_bitmap), K_(col_params), K_(default_datums), K_(col_offsets), - K_(cg_col_offsets), K_(cg_idxs), KP_(cg_col_exprs), + K_(cg_col_offsets), K_(cg_idxs), K_(cg_col_exprs), K_(is_rewrited), K_(filter_bool_mask)); J_OBJ_END(); return pos; @@ -2861,7 +2862,7 @@ int PushdownFilterInfo::init(const storage::ObTableIterParam &iter_param, common } else if (OB_ISNULL(skip_bit_ = to_bit_vector(alloc.alloc(ObBitVector::memory_size(batch_size_))))) { ret = common::OB_ALLOCATE_MEMORY_FAILED; LOG_WARN("Failed to alloc skip bit", K(ret), K_(batch_size)); - } else if (OB_ISNULL(buf = alloc.alloc(sizeof(int64_t) * batch_size_))) { + } else if (OB_ISNULL(buf = alloc.alloc(sizeof(int32_t) * batch_size_))) { ret = common::OB_ALLOCATE_MEMORY_FAILED; LOG_WARN("fail to alloc row_ids", K(ret), K(batch_size_)); } else if (OB_ISNULL(len_array_buf = alloc.alloc(sizeof(uint32_t) * batch_size_))) { @@ -2869,7 +2870,7 @@ int PushdownFilterInfo::init(const storage::ObTableIterParam &iter_param, common LOG_WARN("fail to alloc len_array_buf", K(ret), K_(batch_size)); } else { skip_bit_->init(batch_size_); - row_ids_ = reinterpret_cast(buf); + row_ids_ = reinterpret_cast(buf); len_array_ = reinterpret_cast(len_array_buf); } } diff --git a/src/sql/engine/basic/ob_pushdown_filter.h b/src/sql/engine/basic/ob_pushdown_filter.h index cf4d9df5f..068d9eb3c 100644 --- a/src/sql/engine/basic/ob_pushdown_filter.h +++ b/src/sql/engine/basic/ob_pushdown_filter.h @@ -545,12 +545,13 @@ private: ObPushdownFilterNode *filter_tree_; }; -enum ObCommonFilterTreeStatus +enum ObCommonFilterTreeStatus : uint8_t { NONE_FILTER = 0, WHITE = 1, SINGLE_BLACK = 2, - MULTI_BLACK = 3 + MULTI_BLACK = 3, + MAX_STATUS = 4 }; // executor interface @@ -620,10 +621,10 @@ public: OB_INLINE const common::ObFixedArray &get_default_datums() const { return default_datums_; } OB_INLINE const common::ObIArray &get_cg_idxs() const { return cg_idxs_; } - OB_INLINE common::ObIArray &get_cg_idxs() { return cg_idxs_; } - virtual common::ObIArray *get_cg_col_exprs() const { return cg_col_exprs_; } + virtual const common::ObIArray *get_cg_col_exprs() const + { return cg_col_exprs_.empty() ? nullptr : &cg_col_exprs_; } OB_INLINE bool is_cg_param_valid() const - { return !cg_idxs_.empty() && (nullptr == cg_col_exprs_ || cg_col_exprs_->count() <= cg_idxs_.count()); } + { return !cg_idxs_.empty() && cg_col_exprs_.count() <= cg_idxs_.count(); } OB_INLINE uint32_t get_child_count() const { return n_child_; } OB_INLINE int64_t get_cg_iter_idx() const { return cg_iter_idx_; } OB_INLINE void set_cg_iter_idx(const int64_t cg_iter_idx) @@ -677,7 +678,7 @@ public: const common::ObIArray &output_projector, const bool need_padding); int init_co_filter_param(const storage::ObTableIterParam &iter_param, const bool need_padding); - int set_cg_param(const common::ObIArray &cg_idxs, common::ObIArray *exprs); + int set_cg_param(const common::ObIArray &cg_idxs, const common::ObIArray &exprs); int pull_up_common_node( const common::ObIArray &filter_indexes, ObPushdownFilterExecutor *&common_filter_executor); @@ -725,7 +726,7 @@ protected: common::ObFixedArray cg_col_offsets_; common::ObFixedArray default_datums_; common::ObFixedArray cg_idxs_; - common::ObIArray *cg_col_exprs_; + common::ObFixedArray cg_col_exprs_; common::ObIAllocator &allocator_; ObPushdownOperator &op_; private: @@ -776,7 +777,7 @@ public: OB_INLINE ObPushdownBlackFilterNode &get_filter_node() { return filter_; } OB_INLINE virtual common::ObIArray &get_col_ids() override { return filter_.get_col_ids(); } - virtual common::ObIArray *get_cg_col_exprs() const override { return &filter_.column_exprs_; } + virtual const common::ObIArray *get_cg_col_exprs() const override { return &filter_.column_exprs_; } OB_INLINE bool can_vectorized(); int filter_batch(ObPushdownFilterExecutor *parent, const int64_t start, @@ -921,7 +922,7 @@ public: OB_INLINE const ObPushdownWhiteFilterNode &get_filter_node() const { return filter_; } OB_INLINE virtual common::ObIArray &get_col_ids() override { return filter_.get_col_ids(); } - virtual common::ObIArray *get_cg_col_exprs() const override { return &filter_.column_exprs_; } + virtual const common::ObIArray *get_cg_col_exprs() const override { return &filter_.column_exprs_; } virtual int init_evaluated_datums() override; OB_INLINE const common::ObIArray &get_datums() const { return datum_params_; } @@ -1247,7 +1248,7 @@ struct PushdownFilterInfo sql::ObPushdownFilterExecutor *filter_; // for black filter vectorize const char **cell_data_ptrs_; - int64_t *row_ids_; + int32_t *row_ids_; uint32_t *len_array_; common::ObBitmap *ref_bitmap_; sql::ObBitVector *skip_bit_; diff --git a/src/sql/engine/basic/ob_temp_block_store.h b/src/sql/engine/basic/ob_temp_block_store.h index 61cfc4183..249bc1a36 100644 --- a/src/sql/engine/basic/ob_temp_block_store.h +++ b/src/sql/engine/basic/ob_temp_block_store.h @@ -92,6 +92,9 @@ public: inline int fill_head(int64_t size); inline int fill_tail(int64_t size); inline int compact(); + inline int64_t head_pos() const { return head_; } + inline int64_t tail_pos() const { return tail_;} + inline void fast_update_head(const int64_t pos) { head_ = pos; } TO_STRING_KV(KP_(data), K_(head), K_(tail), K_(cap)); private: char *data_; diff --git a/src/sql/engine/basic/ob_temp_row_store.cpp b/src/sql/engine/basic/ob_temp_row_store.cpp index 1ca5c382e..883553a77 100644 --- a/src/sql/engine/basic/ob_temp_row_store.cpp +++ b/src/sql/engine/basic/ob_temp_row_store.cpp @@ -249,6 +249,68 @@ int ObTempRowStore::RowBlock::calc_rows_size(const IVectorPtrs &vectors, return ret; } +int ObTempRowStore::DtlRowBlock::calc_rows_size(const IVectorPtrs &vectors, + const RowMeta &row_meta, + const ObBatchRows &brs, + uint32_t row_size_arr[]) { + int ret = OB_SUCCESS; + const int64_t fixed_row_size = row_meta.get_row_fixed_size(); + const bool reordered = row_meta.fixed_expr_reordered(); + for (int64_t i = 0; i < brs.size_; i++) { + row_size_arr[i] = fixed_row_size; + } + for (int64_t col_idx = 0; OB_SUCC(ret) && col_idx < vectors.count(); col_idx++) { + ObIVector *vec = vectors.at(col_idx); + if (reordered && row_meta.project_idx(col_idx) < row_meta.fixed_cnt_) { + continue; + } + VectorFormat format = vec->get_format(); + if (VEC_DISCRETE == format) { + ObDiscreteBase *disc_vec = static_cast(vec); + ObLength *lens = disc_vec->get_lens(); + for (int64_t i = 0; i < brs.size_; i++) { + if (brs.skip_->at(i)) { + continue; + } + if (!disc_vec->is_null(i)) { + row_size_arr[i] += lens[i]; + } + } + } else if (VEC_CONTINUOUS == format) { + ObContinuousBase *cont_vec = static_cast(vec); + uint32_t *offsets = cont_vec->get_offsets(); + for (int64_t i = 0; i < brs.size_; i++) { + if (brs.skip_->at(i)) { + continue; + } + row_size_arr[i] += offsets[i + 1] - offsets[i]; + } + } else if (is_uniform_format(format)) { + ObUniformBase *uni_vec = static_cast(vec); + ObDatum *datums = uni_vec->get_datums(); + const uint16_t idx_mask = VEC_UNIFORM_CONST == format ? 0 : UINT16_MAX; + for (int64_t i = 0; i < brs.size_; i++) { + if (brs.skip_->at(i)) { + continue; + } + if (!datums[i & idx_mask].is_null()) { + row_size_arr[i] += datums[i & idx_mask].len_; + } + } + } else if (VEC_FIXED == format) { + ObFixedLengthBase *fixed_vec = static_cast(vec); + for (int64_t i = 0; i < brs.size_; i++) { + if (brs.skip_->at(i)) { + continue; + } + row_size_arr[i] += fixed_vec->get_length(); + } + } + } + + return ret; +} + int ObTempRowStore::Iterator::init(ObTempRowStore *store) { reset(); diff --git a/src/sql/engine/basic/ob_temp_row_store.h b/src/sql/engine/basic/ob_temp_row_store.h index 0634a2f1c..783ee6b0f 100644 --- a/src/sql/engine/basic/ob_temp_row_store.h +++ b/src/sql/engine/basic/ob_temp_row_store.h @@ -67,6 +67,14 @@ public: const int64_t col_idx); }; + struct DtlRowBlock : public RowBlock { + static int calc_rows_size(const IVectorPtrs &vectors, + const RowMeta &row_meta, + const ObBatchRows &brs, + uint32_t row_size_arr[]); + + }; + const static int64_t BLOCK_SIZE = (64L << 10); class Iterator : public ObTempBlockStore::BlockReader { diff --git a/src/sql/engine/expr/ob_batch_eval_util.h b/src/sql/engine/expr/ob_batch_eval_util.h index 4048ad1f9..483099bb8 100644 --- a/src/sql/engine/expr/ob_batch_eval_util.h +++ b/src/sql/engine/expr/ob_batch_eval_util.h @@ -689,6 +689,23 @@ struct ObArithOpBase : public ObArithOpRawType } }; +template +struct ObArithTypedBase : public ObArithOpRawType +{ + constexpr static bool is_raw_op_supported() + { + return false; + } + + template + static void raw_op(Res &, const Left &, const Right, Args &...args) + {} + static int raw_check(const Res &, const Left &, const Right &) + { + return common::OB_ERR_UNEXPECTED; + } +}; + // Wrap arith operate with null check. template struct ObWrapArithOpNullCheck: public ObArithOpBase @@ -707,8 +724,8 @@ struct ObWrapArithOpNullCheck: public ObArithOpBase }; // Wrap arith operate with null check for vector. -template -struct ObWrapVectorArithOpNullCheck: public ObArithOpBase +template +struct ObWrapVectorArithOpNullCheck: public Base { template static int vector_op(ResVector &res_vec, const LeftVector &left_vec, @@ -738,17 +755,17 @@ int def_batch_arith_op_by_datum_func(BATCH_EVAL_FUNC_ARG_DECL, Args &...args) BATCH_EVAL_FUNC_ARG_LIST, args...); } -template +template int def_fixed_len_vector_arith_op_func(VECTOR_EVAL_FUNC_ARG_DECL, Args &...args) { - return def_fixed_len_vector_arith_op, Args...>( + return def_fixed_len_vector_arith_op, Args...>( VECTOR_EVAL_FUNC_ARG_LIST, args...); } -template +template int def_variable_len_vector_arith_op_func(VECTOR_EVAL_FUNC_ARG_DECL, Args &...args) { - return def_variable_len_vector_arith_op, Args...>( + return def_variable_len_vector_arith_op, Args...>( VECTOR_EVAL_FUNC_ARG_LIST, args...); } diff --git a/src/sql/engine/expr/ob_datum_cast.cpp b/src/sql/engine/expr/ob_datum_cast.cpp index de7572f49..acce1b89b 100644 --- a/src/sql/engine/expr/ob_datum_cast.cpp +++ b/src/sql/engine/expr/ob_datum_cast.cpp @@ -3950,12 +3950,17 @@ CAST_FUNC_NAME(text, string) common::ObArenaAllocator &temp_allocator = tmp_alloc_g.get_allocator(); ObExprStrResAlloc res_alloc(expr, ctx); ObTextStringIter instr_iter(in_type, in_cs_type, child_res->get_string(), has_lob_header); - if (OB_FAIL(ObTextStringHelper::build_text_iter(instr_iter, &ctx.exec_ctx_, ctx.exec_ctx_.get_my_session(), + const ObLobCommon& lob = child_res->get_lob_data(); + if (child_res->len_ != 0 && !lob.is_mem_loc_ && lob.in_row_ && has_lob_header) { + data.assign_ptr(lob.get_inrow_data_ptr(), static_cast(lob.get_byte_size(child_res->len_))); + } else if (OB_FAIL(ObTextStringHelper::build_text_iter(instr_iter, &ctx.exec_ctx_, ctx.exec_ctx_.get_my_session(), is_same_charset ? reinterpret_cast(&res_alloc) : &temp_allocator, &temp_allocator))) { LOG_WARN("init lob str iter failed ", K(ret), K(in_type)); } else if (OB_FAIL(instr_iter.get_full_data(data))) { LOG_WARN("init lob str iter failed ", K(ret), K(in_type)); + } + if (OB_FAIL(ret)) { } else if (lib::is_oracle_mode() && ob_is_clob(in_type, in_cs_type) && (0 == data.length()) diff --git a/src/sql/engine/expr/ob_expr_add.cpp b/src/sql/engine/expr/ob_expr_add.cpp index 575074a31..3c044a4d6 100644 --- a/src/sql/engine/expr/ob_expr_add.cpp +++ b/src/sql/engine/expr/ob_expr_add.cpp @@ -125,6 +125,15 @@ int ObExprAdd::calc_result_type2(ObExprResType &type, type.set_scale(ORA_NUMBER_SCALE_UNKNOWN_YET); type.set_precision(PRECISION_UNKNOWN_YET); } + if ((ob_is_double_tc(type.get_type()) || ob_is_float_tc(type.get_type())) && type.get_scale() > 0) { + // if result is fixed double/float, calc type's of params should also be fixed double/float + if (ob_is_double_tc(type1.get_calc_type()) || ob_is_float_tc(type1.get_calc_type())) { + type1.set_calc_scale(type.get_scale()); + } + if (ob_is_double_tc(type2.get_calc_type()) || ob_is_float_tc(type2.get_calc_type())) { + type2.set_calc_scale(type.get_scale()); + } + } } LOG_DEBUG("calc_result_type2", K(scale), K(type1), K(type2), K(type), K(precision)); return ret; @@ -1669,7 +1678,8 @@ int ObExprAdd::add_decimal##TYPE##_oracle_vector(VECTOR_EVAL_FUNC_ARG_DECL) { \ ObNumStackOnceAlloc tmp_alloc; \ const int64_t scale = expr.args_[0]->datum_meta_.scale_; \ - return def_fixed_len_vector_arith_op_func>(VECTOR_EVAL_FUNC_ARG_LIST, scale, tmp_alloc); \ + return def_fixed_len_vector_arith_op_func,\ + ObArithTypedBase>(VECTOR_EVAL_FUNC_ARG_LIST, scale, tmp_alloc); \ } DECINC_ADD_EVAL_FUNC_ORA_DECL(int32) diff --git a/src/sql/engine/expr/ob_expr_div.cpp b/src/sql/engine/expr/ob_expr_div.cpp index 3ac8ed48e..c1fb9796b 100644 --- a/src/sql/engine/expr/ob_expr_div.cpp +++ b/src/sql/engine/expr/ob_expr_div.cpp @@ -578,7 +578,9 @@ int ObExprDiv::div_float_batch(BATCH_EVAL_FUNC_ARG_DECL) int ObExprDiv::div_float_vector(VECTOR_EVAL_FUNC_ARG_DECL) { const bool is_oracle = lib::is_oracle_mode(); - return def_fixed_len_vector_arith_op_func(VECTOR_EVAL_FUNC_ARG_LIST, is_oracle); + return def_fixed_len_vector_arith_op_func>( + VECTOR_EVAL_FUNC_ARG_LIST, is_oracle); } @@ -677,8 +679,8 @@ int ObExprDiv::div_double_batch(BATCH_EVAL_FUNC_ARG_DECL) int ObExprDiv::div_double_vector(VECTOR_EVAL_FUNC_ARG_DECL) { const bool is_oracle = lib::is_oracle_mode(); - return def_fixed_len_vector_arith_op_func(VECTOR_EVAL_FUNC_ARG_LIST, expr, - is_oracle); + return def_fixed_len_vector_arith_op_func>( + VECTOR_EVAL_FUNC_ARG_LIST, expr, is_oracle); } struct ObNumberDivFunc @@ -806,8 +808,8 @@ int ObExprDiv::div_number_batch(BATCH_EVAL_FUNC_ARG_DECL) int ObExprDiv::div_number_vector(VECTOR_EVAL_FUNC_ARG_DECL) { const bool is_oracle = lib::is_oracle_mode(); - return def_variable_len_vector_arith_op_func(VECTOR_EVAL_FUNC_ARG_LIST, expr, ctx, - is_oracle); + return def_variable_len_vector_arith_op_func( + VECTOR_EVAL_FUNC_ARG_LIST, expr, ctx, is_oracle); } diff --git a/src/sql/engine/expr/ob_expr_minus.cpp b/src/sql/engine/expr/ob_expr_minus.cpp index e8b8e7c08..691727cc3 100644 --- a/src/sql/engine/expr/ob_expr_minus.cpp +++ b/src/sql/engine/expr/ob_expr_minus.cpp @@ -138,6 +138,15 @@ int ObExprMinus::calc_result_type2(ObExprResType &type, type.set_scale(ORA_NUMBER_SCALE_UNKNOWN_YET); type.set_precision(PRECISION_UNKNOWN_YET); } + if ((ob_is_double_tc(type.get_type()) || ob_is_float_tc(type.get_type())) && type.get_scale() > 0) { + // if result is fixed double/float, calc type's of params should also be fixed double/float + if (ob_is_double_tc(type1.get_calc_type()) || ob_is_float_tc(type1.get_calc_type())) { + type1.set_calc_scale(type.get_scale()); + } + if (ob_is_double_tc(type2.get_calc_type()) || ob_is_float_tc(type2.get_calc_type())) { + type2.set_calc_scale(type.get_scale()); + } + } LOG_DEBUG("calc_result_type2", K(scale), K(type1), K(type2), K(type), K(precision)); } return ret; @@ -1793,7 +1802,8 @@ int ObExprMinus::minus_decimal##TYPE##_oracle_vector(VECTOR_EVAL_FUNC_ARG_DECL) { \ ObNumStackOnceAlloc tmp_alloc; \ const int64_t scale = expr.args_[0]->datum_meta_.scale_; \ - return def_fixed_len_vector_arith_op_func>(VECTOR_EVAL_FUNC_ARG_LIST, scale, tmp_alloc); \ + return def_fixed_len_vector_arith_op_func,\ + ObArithTypedBase>(VECTOR_EVAL_FUNC_ARG_LIST, scale, tmp_alloc); \ } DECINC_MINUS_EVAL_FUNC_ORA_DECL(int32) diff --git a/src/sql/engine/expr/ob_expr_mul.cpp b/src/sql/engine/expr/ob_expr_mul.cpp index 2519e588a..6dde660a5 100644 --- a/src/sql/engine/expr/ob_expr_mul.cpp +++ b/src/sql/engine/expr/ob_expr_mul.cpp @@ -1335,7 +1335,8 @@ int ObExprMul::mul_decimal##RES##_##L##_##R##_oracle_vector(VECTOR_EVAL_FUNC_ARG { \ ObNumStackOnceAlloc tmp_alloc; \ const int64_t scale = expr.args_[0]->datum_meta_.scale_ + expr.args_[1]->datum_meta_.scale_; \ - return def_fixed_len_vector_arith_op_func>(VECTOR_EVAL_FUNC_ARG_LIST, scale, tmp_alloc); \ + return def_fixed_len_vector_arith_op_func,\ + ObArithTypedBase>(VECTOR_EVAL_FUNC_ARG_LIST, scale, tmp_alloc); \ } DECINC_MUL_EVAL_FUNC_ORA_DECL(int32, int32, int32) diff --git a/src/sql/engine/expr/ob_expr_operator.cpp b/src/sql/engine/expr/ob_expr_operator.cpp index 1f2017f2d..9d54f6b6a 100644 --- a/src/sql/engine/expr/ob_expr_operator.cpp +++ b/src/sql/engine/expr/ob_expr_operator.cpp @@ -6435,6 +6435,7 @@ int ObRelationalExprOperator::cg_datum_cmp_expr(const ObRawExpr &raw_expr, } CK(NULL != rt_expr.eval_func_); CK(NULL != rt_expr.eval_batch_func_); + CK(NULL != rt_expr.eval_vector_func_); } return ret; } diff --git a/src/sql/engine/join/ob_join_filter_op.cpp b/src/sql/engine/join/ob_join_filter_op.cpp index dba6e588c..d9622ae6d 100644 --- a/src/sql/engine/join/ob_join_filter_op.cpp +++ b/src/sql/engine/join/ob_join_filter_op.cpp @@ -392,6 +392,9 @@ int ObJoinFilterOpInput::construct_msg_details( "RFInFilter", "RFInFilter"))) { LOG_WARN("fail to init in hash set", K(ret)); + } else if (OB_FAIL(in_msg.sm_hash_set_.init(config.runtime_filter_max_in_num_, + in_msg.get_tenant_id()))) { + LOG_WARN("failed to init sm_hash_set_", K(config.runtime_filter_max_in_num_)); } else if (OB_FAIL(in_msg.need_null_cmp_flags_.assign(spec.need_null_cmp_flags_))) { LOG_WARN("fail to init cmp flags", K(ret)); } else if (OB_FAIL(in_msg.build_row_cmp_info_.assign(spec.rf_build_cmp_infos_))) { diff --git a/src/sql/engine/px/exchange/ob_px_transmit_op.cpp b/src/sql/engine/px/exchange/ob_px_transmit_op.cpp index d05d51ced..b43e8ae5b 100644 --- a/src/sql/engine/px/exchange/ob_px_transmit_op.cpp +++ b/src/sql/engine/px/exchange/ob_px_transmit_op.cpp @@ -140,13 +140,18 @@ ObPxTransmitOp::ObPxTransmitOp(ObExecContext &exec_ctx, const ObOpSpec &spec, Ob batch_param_remain_(false), receive_channel_ready_(false), data_msg_type_(dtl::ObDtlMsgType::PX_DATUM_ROW), - disable_fast_append_(false), slice_info_bkts_(nullptr), slice_bkt_item_cnts_(nullptr), vectors_(&px_row_allocator_), + selector_array_(nullptr), + selector_cnt_(0), row_size_array_(nullptr), return_rows_(nullptr), - use_hash_reorder_(false) + use_hash_reorder_(false), + fallback_array_(nullptr), + fallback_cnt_(0), + blocks_(nullptr), + init_hash_reorder_struct_(false) { MEMSET(rand48_buf_, 0, sizeof(rand48_buf_)); } @@ -174,6 +179,7 @@ void ObPxTransmitOp::destroy() cur_transmit_sampled_rows_ = NULL; sampled_rows2transmit_.reset(); sampled_input_rows_.~ObRADatumStore(); + meta_.reset(); ObTransmitOp::destroy(); } @@ -213,6 +219,14 @@ int ObPxTransmitOp::inner_open() LOG_WARN("failed to alloc return rows", K(ret), K(get_spec().max_batch_size_)); } else if (OB_FAIL(vectors_.init(get_spec().output_.count()))) { LOG_WARN("failed to init vector array", K(ret)); + } else if (OB_ISNULL(fallback_array_ = static_cast + (px_row_allocator_.alloc(get_spec().max_batch_size_ * sizeof(uint16_t))))) { + ret = OB_ALLOCATE_MEMORY_FAILED; + LOG_WARN("failed to alloc row size array", K(ret), K(get_spec().max_batch_size_)); + } else if (OB_ISNULL(selector_array_ = static_cast + (px_row_allocator_.alloc(get_spec().max_batch_size_ * sizeof(uint16_t))))) { + ret = OB_ALLOCATE_MEMORY_FAILED; + LOG_WARN("failed to alloc row size array", K(ret), K(get_spec().max_batch_size_)); } } rand48_buf_[0] = 0x330E; // 0x330E is the arbitrary value of srand48 @@ -226,11 +240,16 @@ int ObPxTransmitOp::inner_open() OZ(fetch_first_row()); OZ(init_channel(*trans_input)); } + OZ(meta_.init(get_spec().output_, 0, false)); if (OB_SUCC(ret) && get_spec().use_rich_format_ && NULL == static_cast (get_spec()).tablet_id_expr_ && MY_SPEC.max_batch_size_ * task_channels_.count() <= MAX_BKT_FOR_REORDER) { - use_hash_reorder_ = true; + if (PX_VECTOR_ROW != data_msg_type_) { + use_hash_reorder_ = true; + } else { + init_hash_reorder_struct_ = true; + } if (OB_ISNULL(slice_info_bkts_ = static_cast (px_row_allocator_.alloc(task_channels_.count() * sizeof(uint16_t *))))) { ret = OB_ALLOCATE_MEMORY_FAILED; @@ -249,6 +268,36 @@ int ObPxTransmitOp::inner_open() } } } + if (OB_SUCC(ret) && get_spec().use_rich_format_ &&!use_hash_reorder_) { + data_msg_type_ = PX_VECTOR_ROW; + if (OB_ISNULL(blocks_ = + static_cast (px_row_allocator_.alloc(task_channels_.count() + * sizeof(ObTempRowStore::DtlRowBlock *))))) { + ret = OB_ALLOCATE_MEMORY_FAILED; + LOG_WARN("failed to alloc buffers", K(ret), K(task_channels_.count())); + } else if (OB_ISNULL(heads_ = static_cast (px_row_allocator_.alloc(task_channels_.count() + * sizeof(int64_t))))) { + ret = OB_ALLOCATE_MEMORY_FAILED; + LOG_WARN("failed to alloc heads", K(ret), K(task_channels_.count())); + } else if (OB_ISNULL(tails_ = static_cast (px_row_allocator_.alloc(task_channels_.count() + * sizeof(int64_t))))) { + ret = OB_ALLOCATE_MEMORY_FAILED; + LOG_WARN("failed to alloc tails", K(ret), K(task_channels_.count())); + } else if (OB_ISNULL(init_pos_ = static_cast (px_row_allocator_.alloc(task_channels_.count() + * sizeof(int64_t))))) { + ret = OB_ALLOCATE_MEMORY_FAILED; + LOG_WARN("failed to alloc init sizes", K(ret), K(task_channels_.count())); + } else if (OB_ISNULL(channel_unobstructeds_ = static_cast (px_row_allocator_.alloc(task_channels_.count() + * sizeof(bool))))) { + ret = OB_ALLOCATE_MEMORY_FAILED; + LOG_WARN("failed to alloc can adds", K(ret), K(task_channels_.count())); + } else { + memset(blocks_, 0, task_channels_.count() * sizeof(ObTempRowStore::DtlRowBlock *)); + memset(heads_, 0, task_channels_.count() * sizeof(int64_t)); + memset(tails_, 0, task_channels_.count() * sizeof(int64_t)); + memset(init_pos_, 0, task_channels_.count() * sizeof(int64_t)); + } + } } return ret; } @@ -867,7 +916,7 @@ int ObPxTransmitOp::send_rows_in_vector(ObSliceIdxCalc &slice_calc) LOG_WARN("eval expr failed", K(ret)); } } - if (use_hash_reorder_) { + if (use_hash_reorder_ || init_hash_reorder_struct_) { memset(slice_bkt_item_cnts_, 0, task_channels_.count() * sizeof(uint16_t)); } for (int64_t i = 0; OB_SUCC(ret) && i < brs_.size_; i++) { @@ -887,7 +936,7 @@ int ObPxTransmitOp::send_rows_in_vector(ObSliceIdxCalc &slice_calc) && OB_FAIL(slice_calc.get_previous_row_tablet_id(tablet_id))) { LOG_WARN("failed to get previous row tablet_id", K(ret)); } - if (!use_hash_reorder_) { + if (!use_hash_reorder_ && !init_hash_reorder_struct_) { LOG_DEBUG("[VEC2.0 PX] send rows vec without prefetch", K(i), K(slice_idx_array), K(tablet_id.get_int())); FOREACH_CNT_X(slice_idx, slice_idx_array, OB_SUCC(ret)) { if (OB_FAIL(send_row(*slice_idx, send_row_time_recorder, tablet_id.get_int(), i))) { @@ -906,7 +955,7 @@ int ObPxTransmitOp::send_rows_in_vector(ObSliceIdxCalc &slice_calc) } } } - if (OB_SUCC(ret) && use_hash_reorder_) { + if (OB_SUCC(ret) && (use_hash_reorder_ || init_hash_reorder_struct_)) { if (OB_FAIL(hash_reorder_send_batch(batch_info_guard))) { LOG_WARN("failed to send batch", K(ret)); } @@ -943,7 +992,7 @@ int ObPxTransmitOp::send_rows_in_vector(ObSliceIdxCalc &slice_calc) } } if (OB_FAIL(ret)) { - } else if (!disable_fast_append_ && use_hash_reorder_) { + } else if (use_hash_reorder_) { memset(slice_bkt_item_cnts_, 0, task_channels_.count() * sizeof(uint16_t)); for (int64_t i = 0; i < brs_.size_; ++i) { if (brs_.skip_->at(i)) { @@ -960,18 +1009,8 @@ int ObPxTransmitOp::send_rows_in_vector(ObSliceIdxCalc &slice_calc) if (OB_FAIL(hash_reorder_send_batch(batch_info_guard))) { LOG_WARN("failed to send batch", K(ret)); } - } else { - for (int64_t i = 0; OB_SUCC(ret) && i < brs_.size_; i++) { - if (brs_.skip_->at(i)) { - continue; - } - batch_info_guard.set_batch_idx(i); - metric_.count(); - int64_t slice_idx = indexes[i]; - if (OB_FAIL(send_row(indexes[i], send_row_time_recorder, tablet_id.get_int(), i))) { - LOG_WARN("fail emit row to interm result", K(ret), K(indexes[i])); - } - } + } else if (OB_FAIL(keep_order_send_batch(batch_info_guard, indexes))) { + LOG_WARN("failed to send batch", K(ret)); } } } @@ -1006,6 +1045,102 @@ int ObPxTransmitOp::send_rows_in_vector(ObSliceIdxCalc &slice_calc) return ret; } +void ObPxTransmitOp::fill_batch_ptrs(const int64_t *indexes) +{ + for (int64_t i = 0; i < brs_.size_; ++i) { + if (brs_.skip_->at(i)) { + continue; + } + if (ObSliceIdxCalc::DEFAULT_CHANNEL_IDX_TO_DROP_ROW == indexes[i]) { + op_monitor_info_.otherstat_1_value_++; + op_monitor_info_.otherstat_1_id_ = ObSqlMonitorStatIds::EXCHANGE_DROP_ROW_COUNT; + } else { + int64_t slice_idx = indexes[i]; + const int64_t row_size = row_size_array_[i]; + const int64_t head_pos = heads_[slice_idx]; + ObTempRowStore::DtlRowBlock *block = blocks_[slice_idx]; + if (nullptr == block + || !channel_unobstructeds_[slice_idx] + || row_size > tails_[slice_idx] - head_pos) { + fallback_array_[fallback_cnt_++] = i; + channel_unobstructeds_[slice_idx] = false; + } else { + ObCompactRow *ptr = reinterpret_cast (reinterpret_cast (block) + head_pos); + return_rows_[selector_cnt_] = ptr; + heads_[slice_idx] += row_size; + const static int64_t MEMSET_SIZE = 128; + while (heads_[slice_idx] > init_pos_[slice_idx]) { + if (init_pos_[slice_idx] + MEMSET_SIZE < tails_[slice_idx]) { + memset(reinterpret_cast (block) + init_pos_[slice_idx], 0, MEMSET_SIZE); + init_pos_[slice_idx] += MEMSET_SIZE; + } else { + memset(ptr, 0, row_size); + init_pos_[slice_idx] = heads_[slice_idx]; + } + } + block->cnt_ += 1; + selector_array_[selector_cnt_++] = i; + } + } + } +} + +int ObPxTransmitOp::keep_order_send_batch(ObEvalCtx::BatchInfoScopeGuard &batch_info_guard, const int64_t *indexes) +{ + int ret = OB_SUCCESS; + int64_t send_row_time_recorder = 0; + ObObj tablet_id; //not used + fallback_cnt_ = 0; + selector_cnt_ = 0; + if (vectors_.empty()) { + for (int64_t i = 0; OB_SUCC(ret) && i < get_spec().output_.count(); ++i) { + OZ (vectors_.push_back(get_spec().output_.at(i)->get_vector(eval_ctx_))); + } + } + if (OB_FAIL(ret)) { + } else if (OB_FAIL(ObTempRowStore::DtlRowBlock::calc_rows_size(vectors_, meta_, + brs_, row_size_array_))) { + LOG_WARN("failed to calc size", K(ret)); + } else { + fill_batch_ptrs(indexes); + for (int64_t i = 0; i < selector_cnt_; ++i) { + return_rows_[i]->set_row_size(row_size_array_[selector_array_[i]]); + } + for (int64_t idx = 0; idx < get_spec().output_.count(); ++idx) { + vectors_.at(idx)->to_rows(meta_, return_rows_, + selector_array_, selector_cnt_, idx); + } + for (int64_t idx = 0; idx < task_channels_.count(); ++idx) { + if (nullptr != blocks_[idx]) { + blocks_[idx]->get_buffer()->fast_update_head(heads_[idx]); + } + } + for (int64_t i = 0; OB_SUCC(ret) && i < fallback_cnt_; i++) { + batch_info_guard.set_batch_idx(fallback_array_[i]); + metric_.count(); + int64_t slice_idx = indexes[fallback_array_[i]]; + ObDtlBasicChannel *channel = + static_cast (task_channels_.at(slice_idx)); + ObDtlVectorRowMsgWriter &row_writer = channel->get_vector_row_writer(); + if (nullptr != row_writer.get_write_buffer()) { + row_writer.get_write_buffer()->pos() = row_writer.used(); + } + if (OB_FAIL(send_row(slice_idx, send_row_time_recorder, tablet_id.get_int(), fallback_array_[i]))) { + LOG_WARN("fail emit row to interm result", K(ret), K(slice_idx)); + } else { + blocks_[slice_idx] = static_cast (task_channels_.at(slice_idx))->get_vector_row_writer().get_block(); + if (nullptr != blocks_[slice_idx]) { + heads_[slice_idx] = blocks_[slice_idx]->get_buffer()->head_pos(); + tails_[slice_idx] = blocks_[slice_idx]->get_buffer()->tail_pos(); + init_pos_[slice_idx] = heads_[slice_idx]; + channel_unobstructeds_[slice_idx] = true; + } + } + } + } + return ret; +} + int ObPxTransmitOp::send_eof_row() { int ret = OB_SUCCESS; @@ -1128,7 +1263,7 @@ int ObPxTransmitOp::send_row(int64_t slice_idx, op_monitor_info_.otherstat_1_value_++; op_monitor_info_.otherstat_1_id_ = ObSqlMonitorStatIds::EXCHANGE_DROP_ROW_COUNT; // TODO: shanting2.0. use opt when DTL 2.0 implemented. - } else if (!is_vectorized() || disable_fast_append_) { + } else if (!is_vectorized()) { is_send_row_normal = true; } else if (get_spec().use_rich_format_) { if (NULL != spec.tablet_id_expr_) { @@ -1155,17 +1290,7 @@ int ObPxTransmitOp::send_row(int64_t slice_idx, break; } case dtl::ObDtlMsgType::PX_VECTOR_ROW: { - ObDtlVectorRowMsgWriter &row_writer = channel->get_vector_row_writer(); - if (!row_writer.is_inited()) { - is_send_row_normal = true; - } else if (OB_FAIL(row_writer.try_append_row(spec.output_, eval_ctx_))) { - if (OB_BUF_NOT_ENOUGH != ret) { - LOG_WARN("failed to append row", K(ret)); - } else { - is_send_row_normal = true; - ret = OB_SUCCESS; - } - } + is_send_row_normal = true; break; } case dtl::ObDtlMsgType::PX_VECTOR: { @@ -1540,8 +1665,6 @@ void ObPxTransmitOp::init_data_msg_type(const common::ObIArray &output data_msg_type_ = dtl::ObDtlMsgType::PX_VECTOR_FIXED; } else if (3 == std::abs(err_sim)) { data_msg_type_ = dtl::ObDtlMsgType::PX_VECTOR; - } else if (4 == std::abs(err_sim)) { - disable_fast_append_ = true; } } } @@ -1601,7 +1724,6 @@ int ObPxTransmitOp::hash_reorder_send_batch(ObEvalCtx::BatchInfoScopeGuard &batc } break; } - case dtl::ObDtlMsgType::PX_VECTOR: case dtl::ObDtlMsgType::PX_VECTOR_ROW: { for (int64_t channel_idx = 0; OB_SUCC(ret) && channel_idx < task_channels_.count(); ++channel_idx) { if (0 == slice_bkt_item_cnts_[channel_idx]) { diff --git a/src/sql/engine/px/exchange/ob_px_transmit_op.h b/src/sql/engine/px/exchange/ob_px_transmit_op.h index 312f9a146..65dc2af86 100644 --- a/src/sql/engine/px/exchange/ob_px_transmit_op.h +++ b/src/sql/engine/px/exchange/ob_px_transmit_op.h @@ -31,6 +31,7 @@ #include "sql/engine/px/ob_px_basic_info.h" #include "sql/engine/basic/ob_ra_datum_store.h" #include "sql/engine/px/datahub/components/ob_dh_init_channel.h" +#include "sql/engine/basic/ob_compact_row.h" namespace oceanbase { @@ -176,6 +177,7 @@ private: int set_expect_range_count(); int wait_channel_ready_msg(); int hash_reorder_send_batch(ObEvalCtx::BatchInfoScopeGuard &batch_info_guard); + int keep_order_send_batch(ObEvalCtx::BatchInfoScopeGuard &batch_info_guard, const int64_t *indexes); int64_t get_random_seq() { return nrand48(rand48_buf_) % INT16_MAX; @@ -189,6 +191,7 @@ private: && !proxy.get_transmit_use_interm_result(); } int try_wait_channel(); void init_data_msg_type(const common::ObIArray &output); + void fill_batch_ptrs(const int64_t *indexes); dtl::ObDtlMsgType get_data_msg_type() const { return data_msg_type_; } protected: ObArray ch_blocks_; @@ -226,15 +229,24 @@ protected: unsigned short rand48_buf_[3]; bool receive_channel_ready_; dtl::ObDtlMsgType data_msg_type_; - bool disable_fast_append_; //slice_idx, batch_idx uint16_t **slice_info_bkts_; uint16_t *slice_bkt_item_cnts_; ObFixedArray vectors_; uint16_t *selector_array_; + int64_t selector_cnt_; uint32_t *row_size_array_; ObCompactRow **return_rows_; bool use_hash_reorder_; + RowMeta meta_; + uint16_t *fallback_array_; + int64_t fallback_cnt_; + ObTempRowStore::DtlRowBlock **blocks_; + int64_t *heads_; + int64_t *tails_; + int64_t *init_pos_; //memset from this pos + bool *channel_unobstructeds_; + bool init_hash_reorder_struct_; }; inline void ObPxTransmitOp::update_row(const ObExpr *expr, int64_t tablet_id) diff --git a/src/sql/engine/px/ob_px_admission.cpp b/src/sql/engine/px/ob_px_admission.cpp index 02637912f..9eb305a1d 100644 --- a/src/sql/engine/px/ob_px_admission.cpp +++ b/src/sql/engine/px/ob_px_admission.cpp @@ -154,8 +154,8 @@ int ObPxAdmission::enter_query_admission(ObSQLSessionInfo &session, } else if (admit_worker_count <= 0) { plan.inc_delayed_px_querys(); ret = OB_ERR_INSUFFICIENT_PX_WORKER; - LOG_INFO("It's a px query, out of px worker resource, " - "need delay, do not need disconnect", + LOG_INFO("This query is out of px worker resources and needs to be delayed; " + "disconnection is unnecessary.", K(admit_worker_count), K(plan.get_px_dop()), K(plan.get_plan_id()), diff --git a/src/sql/engine/px/ob_px_bloom_filter.cpp b/src/sql/engine/px/ob_px_bloom_filter.cpp index 5d5798501..b46ee3506 100644 --- a/src/sql/engine/px/ob_px_bloom_filter.cpp +++ b/src/sql/engine/px/ob_px_bloom_filter.cpp @@ -27,14 +27,27 @@ using namespace obrpc; #define MIN_FILTER_SIZE 256 #define MAX_BIT_COUNT 17179869184// 2^34 due to the memory single alloc limit #define BF_BLOCK_SIZE 256L -#define BLOCK_MASK 255L // = size of block - 1 #define CACHE_LINE_SIZE 64 // 64 bytes #define LOG_CACHE_LINE_SIZE 6 // = log2(CACHE_LINE_SIZE) #define FIXED_HASH_COUNT 4 -#define LOG_HASH_COUNT 2 // = log2(FIXED_HASH_COUNT) #define WORD_SIZE 64 // WORD_SIZE * FIXED_HASH_COUNT = BF_BLOCK_SIZE -#define HASH_SHIFT_MASK 63 +#define BLOCK_FILTER_HASH_MASK 0x3F3F3F3F // for each 8 bits, we only use the last 6 bits + +class BloomFilterPrefetchOP +{ +public: + BloomFilterPrefetchOP(ObPxBloomFilter *bloom_filter, uint64_t *hash_values) + : bloom_filter_(bloom_filter), hash_values_(hash_values) + {} + OB_INLINE int operator()(int64_t i) { + (void)bloom_filter_->prefetch_bits_block(hash_values_[i]); + return OB_SUCCESS; + } +private: + ObPxBloomFilter *bloom_filter_; + uint64_t *hash_values_; +}; // before assign, please set allocator for channel_ids_ first int BloomFilterIndex::assign(const BloomFilterIndex &other) @@ -103,6 +116,7 @@ int ObPxBloomFilter::assign(const ObPxBloomFilter &filter, int64_t tenant_id) set_allocator_attr(tenant_id); data_length_ = filter.data_length_; max_bit_count_ = filter.max_bit_count_; + block_mask_ = filter.block_mask_; bits_count_ = filter.bits_count_; fpp_ = filter.fpp_; hash_func_count_ = filter.hash_func_count_; @@ -140,6 +154,7 @@ int ObPxBloomFilter::init(const ObPxBloomFilter *filter) } else { data_length_ = filter->data_length_; max_bit_count_ = filter->max_bit_count_; + block_mask_ = filter->block_mask_; bits_count_ = filter->bits_count_; fpp_ = filter->fpp_; hash_func_count_ = filter->hash_func_count_; @@ -176,6 +191,7 @@ void ObPxBloomFilter::calc_num_of_bits() // min size is block size = 256. bits_count_ = ((n < MIN_FILTER_SIZE) ? MIN_FILTER_SIZE : (n >= max_bit_count_) ? max_bit_count_ : n + 1); + block_mask_ = (bits_count_ >> (LOG_HASH_COUNT + 6)) - 1; LOG_TRACE("calc num of bits", K(data_length_), K(fpp_), K(old_n), K(ori_n), K(bits_count_)); } @@ -203,12 +219,13 @@ int ObPxBloomFilter::put(uint64_t hash) ret = OB_NOT_INIT; LOG_WARN("the px bloom filter is not inited", K(ret)); } else { - uint32_t hash_high = (uint32_t)(hash >> 32); - uint64_t block_begin = (hash & ((bits_count_ >> (LOG_HASH_COUNT + 6)) - 1)) << LOG_HASH_COUNT; - (void)set(block_begin, 1L << (hash_high & HASH_SHIFT_MASK)); - (void)set(block_begin + 1, 1L << ((hash_high >> 8) & HASH_SHIFT_MASK)); - (void)set(block_begin + 2, 1L << ((hash_high >> 16) & HASH_SHIFT_MASK)); - (void)set(block_begin + 3, 1L << ((hash_high >> 24) & HASH_SHIFT_MASK)); + uint64_t block_begin = (hash & block_mask_) << LOG_HASH_COUNT; + uint32_t hash_high = ((uint32_t)(hash >> 32) & BLOCK_FILTER_HASH_MASK); + uint8_t *block_hash_vals = (uint8_t *)&hash_high; + (void)set(block_begin, 1L << block_hash_vals[0]); + (void)set(block_begin + 1, 1L << block_hash_vals[1]); + (void)set(block_begin + 2, 1L << block_hash_vals[2]); + (void)set(block_begin + 3, 1L << block_hash_vals[3]); } return ret; } @@ -223,19 +240,62 @@ int ObPxBloomFilter::put_batch(ObPxBFHashArray &hash_val_array) return ret; } +int ObPxBloomFilter::put_batch(uint64_t *batch_hash_values, const EvalBound &bound, + const ObBitVector &skip, bool &is_empty) +{ + int ret = OB_SUCCESS; + if (OB_UNLIKELY(!is_inited_)) { + ret = OB_NOT_INIT; + LOG_WARN("the px bloom filter is not inited", K(ret)); + } else if (bound.get_all_rows_active()) { + uint32_t hash_high = 0; + uint8_t *block_hash_vals = (uint8_t *)&hash_high; + for (int64_t i = bound.start(); i < bound.end(); ++i) { + uint64_t block_begin = (batch_hash_values[i] & block_mask_) << LOG_HASH_COUNT; + hash_high = ((uint32_t)(batch_hash_values[i] >> 32) & BLOCK_FILTER_HASH_MASK); + (void)set(block_begin, 1L << block_hash_vals[0]); + (void)set(block_begin + 1, 1L << block_hash_vals[1]); + (void)set(block_begin + 2, 1L << block_hash_vals[2]); + (void)set(block_begin + 3, 1L << block_hash_vals[3]); + } + if (is_empty && bound.end() - bound.start() > 0) { + is_empty = false; + } + } else { + uint32_t hash_high = 0; + uint8_t *block_hash_vals = (uint8_t *)&hash_high; + for (int64_t i = bound.start(); i < bound.end(); ++i) { + if (skip.at(i)) { + } else { + uint64_t block_begin = (batch_hash_values[i] & block_mask_) << LOG_HASH_COUNT; + hash_high = ((uint32_t)(batch_hash_values[i] >> 32) & BLOCK_FILTER_HASH_MASK); + (void)set(block_begin, 1L << block_hash_vals[0]); + (void)set(block_begin + 1, 1L << block_hash_vals[1]); + (void)set(block_begin + 2, 1L << block_hash_vals[2]); + (void)set(block_begin + 3, 1L << block_hash_vals[3]); + if (is_empty) { + is_empty = false; + } + } + } + } + return ret; +} + int ObPxBloomFilter::might_contain_nonsimd(uint64_t hash, bool &is_match) { int ret = OB_SUCCESS; is_match = true; - uint32_t hash_high = (uint32_t)(hash >> 32); - uint64_t block_begin = (hash & ((bits_count_ >> (LOG_HASH_COUNT + 6)) - 1)) << LOG_HASH_COUNT; - if (!get(block_begin, 1L << (hash_high & HASH_SHIFT_MASK))) { + uint64_t block_begin = (hash & block_mask_) << LOG_HASH_COUNT; + uint32_t hash_high = ((uint32_t)(hash >> 32) & BLOCK_FILTER_HASH_MASK); + uint8_t *block_hash_vals = (uint8_t *)&hash_high; + if (!get(block_begin, 1L << block_hash_vals[0])) { is_match = false; - } else if (!get(block_begin + 1, 1L << ((hash_high >> 8) & HASH_SHIFT_MASK))) { + } else if (!get(block_begin + 1, 1L << block_hash_vals[1])) { is_match = false; - } else if (!get(block_begin + 2, 1L << ((hash_high >> 16) & HASH_SHIFT_MASK))) { + } else if (!get(block_begin + 2, 1L << block_hash_vals[2])) { is_match = false; - } else if (!get(block_begin + 3, 1L << ((hash_high >> 24) & HASH_SHIFT_MASK))) { + } else if (!get(block_begin + 3, 1L << block_hash_vals[3])) { is_match = false; } return ret; @@ -266,7 +326,8 @@ int ObPxBloomFilter::merge_filter(ObPxBloomFilter *filter) do { old_v = bits_array_[i + filter->begin_idx_]; new_v = old_v | filter->bits_array_[i]; - } while(ATOMIC_CAS(&bits_array_[i + filter->begin_idx_], old_v, new_v) != old_v); + } while (old_v != new_v // do not write if old is equal to new + && ATOMIC_CAS(&bits_array_[i + filter->begin_idx_], old_v, new_v) != old_v); } } return ret; @@ -388,12 +449,6 @@ void ObPxBloomFilter::reset() allocator_.reset(); } -void ObPxBloomFilter::prefetch_bits_block(uint64_t hash) -{ - uint64_t block_begin = (hash & ((bits_count_ >> (LOG_HASH_COUNT + 6)) - 1)) << LOG_HASH_COUNT; - __builtin_prefetch(&bits_array_[block_begin], 0); -} - OB_DEF_SERIALIZE(ObPxBloomFilter) { int ret = OB_SUCCESS; @@ -452,6 +507,7 @@ OB_DEF_DESERIALIZE(ObPxBloomFilter) } } OB_UNIS_DECODE(max_bit_count_); + block_mask_ = (bits_count_ >> (LOG_HASH_COUNT + 6)) - 1; return ret; } @@ -475,10 +531,177 @@ OB_DEF_SERIALIZE_SIZE(ObPxBloomFilter) return len; } - void ObPxBloomFilter::dump_filter() - { - LOG_INFO("dump px bloom filter info:", K(*this)); - } +void ObPxBloomFilter::dump_filter() +{ + LOG_INFO("dump px bloom filter info:", K(*this)); +} + +namespace oceanbase +{ +namespace common +{ +OB_DECLARE_DEFAULT_AND_AVX512_CODE( + +template +class BloomFilterProbeOP +{ +public: + BloomFilterProbeOP(ResVec *res_vec, ObPxBloomFilter *bloom_filter, int64_t *bits_array, + int64_t block_mask, uint64_t *hash_values, int64_t &total_count, + int64_t &filter_count) + : res_vec_(res_vec), bloom_filter_(bloom_filter), bits_array_(bits_array), + block_mask_(block_mask), hash_values_(hash_values), total_count_(total_count), + filter_count_(filter_count) + {} + int operator()(int64_t i) + { + bool is_match = false; + constexpr int64_t is_match_payload = 1; +#if OB_USE_MULTITARGET_CODE + if (SUPPORT_SIMD) { + (void)common::specific::avx512::inline_might_contain_simd(bits_array_, block_mask_, + hash_values_[i], is_match); + } else { +#endif + (void)bloom_filter_->might_contain_nonsimd(hash_values_[i], is_match); +#if OB_USE_MULTITARGET_CODE + } +#endif + ++total_count_; + if (!is_match) { + ++filter_count_; + if (std::is_same::value) { + res_vec_->set_int(i, 0); + } + } else { + if (std::is_same::value) { + res_vec_->set_int(i, 1); + } else { + res_vec_->set_payload(i, &is_match_payload, sizeof(int64_t)); + } + } + return OB_SUCCESS; + } + +private: + ResVec *res_vec_; + ObPxBloomFilter *bloom_filter_; + int64_t *bits_array_; + int64_t block_mask_; + uint64_t *hash_values_; + int64_t &total_count_; + int64_t &filter_count_; +}; + +template +int inner_might_contain(ObPxBloomFilter *bloom_filter, int64_t *bits_array, + int64_t block_mask, const ObExpr &expr, ObEvalCtx &ctx, + const ObBitVector &skip, const EvalBound &bound, + uint64_t *hash_values, int64_t &total_count, + int64_t &filter_count) { + int ret = OB_SUCCESS; + ResVec *res_vec = static_cast(expr.get_vector(ctx)); + static const int64_t is_match_payload = 1; + bool is_match = true; + if (std::is_same::value) { + IntegerFixedVec *int_fixed_vec = reinterpret_cast(res_vec); + uint64_t *data = reinterpret_cast(int_fixed_vec->get_data()); + MEMSET(data + bound.start(), 0, (bound.range_size() * res_vec->get_length(0))); + } + + if (ALL_ROWS_ACTIVE) { + total_count += bound.end() - bound.start(); + for (int64_t i = bound.start(); i < bound.end(); ++i) { + (void)bloom_filter->prefetch_bits_block(hash_values[i]); + } + for (int64_t i = bound.start(); i < bound.end(); ++i) { +#if OB_USE_MULTITARGET_CODE + if (SUPPORT_SIMD) { + (void)specific::avx512::inline_might_contain_simd(bits_array, block_mask, hash_values[i], + is_match); + } else { +#endif + (void)bloom_filter->might_contain_nonsimd(hash_values[i], is_match); +#if OB_USE_MULTITARGET_CODE + } +#endif + if (!is_match) { + filter_count += 1; + if (std::is_same::value) { + res_vec->set_int(i, 0); + } + } else { + if (std::is_same::value) { + res_vec->set_int(i, 1); + } else { + res_vec->set_payload(i, &is_match_payload, sizeof(int64_t)); + } + } + } + } else { + BloomFilterPrefetchOP prefetch_op(bloom_filter, hash_values); + BloomFilterProbeOP probe_op(res_vec, bloom_filter, bits_array, block_mask, + hash_values, total_count, filter_count); + (void)ObBitVector::flip_foreach(skip, bound, prefetch_op); + (void)ObBitVector::flip_foreach(skip, bound, probe_op); + } + return ret; +} +) + +} // namespace common +} // namespace oceanbase + +#define BLOOM_FILTER_DISPATCH_ALL_ROWS_ACTIVATE(function, all_rows_active, support_simd, \ + res_format) \ + if (all_rows_active) { \ + BLOOM_FILTER_DISPATCH_SIMD(function, true, support_simd, res_format) \ + } else { \ + BLOOM_FILTER_DISPATCH_SIMD(function, false, support_simd, res_format) \ + } + +#define BLOOM_FILTER_DISPATCH_SIMD(function, all_rows_active, support_simd, res_format) \ + if (support_simd) { \ + BLOOM_FILTER_DISPATCH_RES_FORMAT(function, all_rows_active, true, res_format) \ + } else { \ + BLOOM_FILTER_DISPATCH_RES_FORMAT(function, all_rows_active, false, res_format) \ + } + +#define BLOOM_FILTER_DISPATCH_RES_FORMAT(function, all_rows_active, support_simd, res_format) \ + if (res_format == VEC_FIXED) { \ + ret = function( \ + this, bits_array_, block_mask_, expr, ctx, skip, bound, hash_values, total_count, \ + filter_count); \ + } else { \ + ret = function( \ + this, bits_array_, block_mask_, expr, ctx, skip, bound, hash_values, total_count, \ + filter_count); \ + } + +int ObPxBloomFilter::might_contain_vector(const ObExpr &expr, ObEvalCtx &ctx, + const ObBitVector &skip, const EvalBound &bound, + uint64_t *hash_values, int64_t &total_count, + int64_t &filter_count) +{ + int ret = OB_SUCCESS; + bool all_rows_active = bound.get_all_rows_active(); + VectorFormat res_format = expr.get_format(ctx); +#if OB_USE_MULTITARGET_CODE + if (common::is_arch_supported(ObTargetArch::AVX512)) { + constexpr bool support_simd = true; + BLOOM_FILTER_DISPATCH_ALL_ROWS_ACTIVATE(common::specific::avx512::inner_might_contain, + all_rows_active, support_simd, res_format) + } else { +#endif + constexpr bool support_simd = false; + BLOOM_FILTER_DISPATCH_ALL_ROWS_ACTIVATE(common::specific::normal::inner_might_contain, + all_rows_active, support_simd, res_format) +#if OB_USE_MULTITARGET_CODE + } +#endif + return ret; +} + //-------------------------------------分割线---------------------------- int ObPxBFStaticInfo::init(int64_t tenant_id, int64_t filter_id, int64_t server_id, bool is_shared, diff --git a/src/sql/engine/px/ob_px_bloom_filter.h b/src/sql/engine/px/ob_px_bloom_filter.h index c52592b18..17a568827 100644 --- a/src/sql/engine/px/ob_px_bloom_filter.h +++ b/src/sql/engine/px/ob_px_bloom_filter.h @@ -19,6 +19,13 @@ #include "lib/lock/ob_spin_lock.h" #include "share/config/ob_server_config.h" #include "observer/ob_server_struct.h" +#include "common/ob_target_specific.h" + +#if OB_USE_MULTITARGET_CODE +#include +#include +#endif + #ifndef __SQL_ENG_PX_BLOOM_FILTER_H__ #define __SQL_ENG_PX_BLOOM_FILTER_H__ @@ -27,6 +34,8 @@ namespace oceanbase namespace sql { +#define LOG_HASH_COUNT 2 // = log2(FIXED_HASH_COUNT) + typedef common::ObSEArray ObPxBFHashArray; struct BloomFilterReceiveCount @@ -66,8 +75,12 @@ public: inline int might_contain(uint64_t hash, bool &is_match) { return (this->*might_contain_)(hash, is_match); } + int might_contain_vector(const ObExpr &expr, ObEvalCtx &ctx, const ObBitVector &skip, + const EvalBound &bound, uint64_t *hash_values, int64_t &total_count, + int64_t &filter_count); int put(uint64_t hash); int put_batch(ObPxBFHashArray &hash_val_array); + int put_batch(uint64_t *batch_hash_values, const EvalBound &bound, const ObBitVector &skip, bool &is_empty); int merge_filter(ObPxBloomFilter *filter); int64_t get_value_true_count() const { return true_count_; }; void dump_filter(); //for debug @@ -88,13 +101,18 @@ public: void set_end_idx(int64_t idx) { end_idx_ = idx; } int64_t get_begin_idx() const { return begin_idx_; } int64_t get_end_idx() const { return end_idx_; } - void prefetch_bits_block(uint64_t hash); + inline void prefetch_bits_block(uint64_t hash) + { + uint64_t block_begin = (hash & block_mask_) << LOG_HASH_COUNT; + __builtin_prefetch(&bits_array_[block_begin], 0); + } typedef int (ObPxBloomFilter::*GetFunc)(uint64_t hash, bool &is_match); int generate_receive_count_array(int64_t piece_size); void reset(); int assign(const ObPxBloomFilter &filter, int64_t tenant_id); int regenerate(); void set_allocator_attr(int64_t tenant_id); + int might_contain_nonsimd(uint64_t hash, bool &is_match); TO_STRING_KV(K_(data_length), K_(bits_count), K_(fpp), K_(hash_func_count), K_(is_inited), K_(bits_array_length), K_(true_count)); private: @@ -103,9 +121,12 @@ private: void calc_num_of_hash_func(); void calc_num_of_bits(); void align_max_bit_count(int64_t max_filter_size); - int might_contain_nonsimd(uint64_t hash, bool &is_match); int might_contain_simd(uint64_t hash, bool &is_match); +#ifdef unittest_bloom_filter + int might_contain_batch(uint64_t *hash_values, int64_t batch_size); +#endif + private: int64_t data_length_; //原始数据长度 int64_t max_bit_count_; // max filter size, default 2GB, so the max bit count = 17179869184; @@ -127,6 +148,7 @@ public: int64_t px_bf_recieve_size_; // 预期应该收到的个数 volatile int64_t px_bf_merge_filter_count_; // 当前持有filter, 做merge filter操作的线程个数 ObArray receive_count_array_; + int64_t block_mask_; // for locating block DISALLOW_COPY_AND_ASSIGN(ObPxBloomFilter); }; @@ -307,7 +329,58 @@ private: int process_px_bloom_filter_data(); }; +} // namespace sql + +namespace common +{ +OB_DECLARE_AVX512_SPECIFIC_CODE(OB_INLINE void inline_might_contain_simd( + int64_t *bits_array, int64_t block_mask, uint64_t hash, bool &is_match) { + static const __m256i HASH_VALUES_MASK = _mm256_set_epi64x(24, 16, 8, 0); + uint32_t hash_high = (uint32_t)(hash >> 32); + uint64_t block_begin = (hash & block_mask) << LOG_HASH_COUNT; + __m256i bit_ones = _mm256_set1_epi64x(1); + __m256i hash_values = _mm256_set1_epi64x(hash_high); + hash_values = _mm256_srlv_epi64(hash_values, HASH_VALUES_MASK); + hash_values = _mm256_rolv_epi64(bit_ones, hash_values); + __m256i bf_values = _mm256_load_si256(reinterpret_cast<__m256i *>(&bits_array[block_begin])); + is_match = 1 == _mm256_testz_si256(~bf_values, hash_values); +}) + +#ifdef unittest_bloom_filter +OB_DECLARE_AVX512_SPECIFIC_CODE(void might_contain_batch_simd( + sql::ObPxBloomFilter *bloom_filter, int64_t *bits_array, int64_t block_mask, uint64_t *hash_values, + const int64_t &batch_size) { + bool is_match; + for (int64_t i = 0; i < batch_size; ++i) { + common::specific::avx512::inline_might_contain_simd(bits_array, block_mask, hash_values[i], + is_match); + } +}) +#endif +} // namespace common + +namespace sql { +#ifdef unittest_bloom_filter +int ObPxBloomFilter::might_contain_batch(uint64_t *hash_values, int64_t batch_size) +{ + int ret = OB_SUCCESS; + bool is_match; +#if OB_USE_MULTITARGET_CODE + if (common::is_arch_supported(ObTargetArch::AVX512)) { + common::specific::avx512::might_contain_batch_simd(this, bits_array_, block_mask_, hash_values, + batch_size); + } else { +#endif + for (int64_t i = 0; i < batch_size; ++i) { + might_contain_nonsimd(hash_values[i], is_match); + } +#if OB_USE_MULTITARGET_CODE + } +#endif + return ret; } +#endif +} // namespace sql } //end oceanbase #endif diff --git a/src/sql/engine/px/ob_px_bloom_filter_simd.cpp b/src/sql/engine/px/ob_px_bloom_filter_simd.cpp index 8ac0214ec..e202eafaf 100644 --- a/src/sql/engine/px/ob_px_bloom_filter_simd.cpp +++ b/src/sql/engine/px/ob_px_bloom_filter_simd.cpp @@ -26,15 +26,7 @@ int ObPxBloomFilter::might_contain_simd(uint64_t hash, bool &is_match) { int ret = OB_SUCCESS; #if defined(__x86_64__) - static const __m256i HASH_VALUES_MASK = _mm256_set_epi64x(24, 16, 8, 0); - uint32_t hash_high = (uint32_t)(hash >> 32); - uint64_t block_begin = (hash & ((bits_count_ >> (LOG_HASH_COUNT + 6)) - 1)) << LOG_HASH_COUNT; - __m256i bit_ones = _mm256_set1_epi64x(1); - __m256i hash_values = _mm256_set1_epi64x(hash_high); - hash_values = _mm256_srlv_epi64(hash_values, HASH_VALUES_MASK); - hash_values = _mm256_rolv_epi64(bit_ones, hash_values); - __m256i bf_values = _mm256_load_si256(reinterpret_cast<__m256i *>(&bits_array_[block_begin])); - is_match = 1 == _mm256_testz_si256(~bf_values, hash_values); + specific::avx512::inline_might_contain_simd(bits_array_, block_mask_, hash, is_match); #else ret = might_contain_nonsimd(hash, is_match); #endif diff --git a/src/sql/engine/px/p2p_datahub/ob_runtime_filter_msg.cpp b/src/sql/engine/px/p2p_datahub/ob_runtime_filter_msg.cpp index 2689fa401..071bddc0b 100644 --- a/src/sql/engine/px/p2p_datahub/ob_runtime_filter_msg.cpp +++ b/src/sql/engine/px/p2p_datahub/ob_runtime_filter_msg.cpp @@ -755,8 +755,26 @@ int ObRFBloomFilterMsg::fill_vec_result(ResVec *res_vec, const ObBitVector &skip { int ret = OB_SUCCESS; bool is_match = true; - const int64_t is_match_payload = 1; // for VEC_FIXED set set_payload, always 1 - if (OB_FAIL(ObBitVector::flip_foreach( + static const int64_t is_match_payload = 1; // for set_payload, always 1 + if (bound.get_all_rows_active()) { + for (int64_t i = bound.start(); i < bound.end(); ++i) { + (void)bloom_filter_.prefetch_bits_block(hash_values[i]); + } + for (int64_t i = bound.start(); i < bound.end(); ++i) { + (void)bloom_filter_.might_contain(hash_values[i], is_match); + if (is_match) { + if (ResFormat == VEC_FIXED) { + res_vec->set_payload(i, &is_match_payload, sizeof(int64_t)); + } else { + res_vec->set_int(i, is_match_payload); + } + } else { + // already set not match in preset_not_match + filter_count += 1; + } + } + total_count += bound.end() - bound.start(); + } else if (OB_FAIL(ObBitVector::flip_foreach( skip, bound, [&](int64_t idx) __attribute__((always_inline)) { bloom_filter_.prefetch_bits_block(hash_values[idx]); return OB_SUCCESS; @@ -798,13 +816,6 @@ int ObRFBloomFilterMsg::do_might_contain_vector( uint64_t seed = ObExprJoinFilter::JOIN_FILTER_SEED; ObBitVector &eval_flags = expr.get_evaluated_flags(ctx); uint64_t *hash_values = filter_ctx.right_hash_vals_; - VectorFormat res_format = expr.get_format(ctx); - if (VEC_FIXED == res_format) { - IntegerFixedVec *res_vec = static_cast(expr.get_vector(ctx)); - if (OB_FAIL(preset_not_match(res_vec, bound))) { - LOG_WARN("failed to preset_not_match", K(ret)); - } - } for (int64_t i = 0; OB_SUCC(ret) && i < expr.arg_cnt_; ++i) { ObExpr *e = expr.args_[i]; @@ -820,14 +831,9 @@ int ObRFBloomFilterMsg::do_might_contain_vector( } } if (OB_FAIL(ret)) { - } else if (VEC_UNIFORM == res_format) { - IntegerUniVec *res_vec = static_cast(expr.get_vector(ctx)); - ret = fill_vec_result(res_vec, skip, bound, hash_values, - total_count, filter_count); - } else if (VEC_FIXED == res_format) { - IntegerFixedVec *res_vec = static_cast(expr.get_vector(ctx)); - ret = fill_vec_result(res_vec, skip, bound, hash_values, - total_count, filter_count); + } else { + ret = bloom_filter_.might_contain_vector(expr, ctx, skip, bound, hash_values, total_count, + filter_count); } if (OB_FAIL(ret)) { } else { @@ -947,14 +953,9 @@ int ObRFBloomFilterMsg::insert_by_row_vector( arg_vec->murmur_hash_v3(*expr, batch_hash_values, *(child_brs->skip_), bound, is_batch_seed ? batch_hash_values : &seed, is_batch_seed); } } - for (int64_t i = 0; OB_SUCC(ret) && i < child_brs->size_; ++i) { - if (child_brs->skip_->at(i)) { - continue; - } else if (OB_FAIL(bloom_filter_.put(batch_hash_values[i]))) { - LOG_WARN("fail to put hash value to px bloom filter", K(ret)); - } else if (is_empty_) { - is_empty_ = false; - } + if (OB_FAIL(ret)) { + } else if (OB_FAIL(bloom_filter_.put_batch(batch_hash_values, bound, *child_brs->skip_, is_empty_))) { + LOG_WARN("failed to push hash value to px bloom filter"); } } } diff --git a/src/sql/engine/px/p2p_datahub/ob_runtime_filter_vec_msg.cpp b/src/sql/engine/px/p2p_datahub/ob_runtime_filter_vec_msg.cpp index c149080bc..a27dea059 100644 --- a/src/sql/engine/px/p2p_datahub/ob_runtime_filter_vec_msg.cpp +++ b/src/sql/engine/px/p2p_datahub/ob_runtime_filter_vec_msg.cpp @@ -27,6 +27,60 @@ using namespace oceanbase::common; using namespace oceanbase::sql; using namespace oceanbase::share; +class SmallHashSetBatchInsertOP +{ +public: + SmallHashSetBatchInsertOP(ObSmallHashSet &sm_hash_set, uint64_t *batch_hash_values) + : sm_hash_set_(sm_hash_set), batch_hash_values_(batch_hash_values) + {} + OB_INLINE int operator()(int64_t batch_i) + { + return sm_hash_set_.insert_hash(batch_hash_values_[batch_i]); + } + +private: + ObSmallHashSet &sm_hash_set_; + uint64_t *batch_hash_values_; +}; + +template +class InFilterProbeOP +{ +public: + InFilterProbeOP(ObSmallHashSet &sm_hash_set, ResVec *res_vec, uint64_t *right_hash_values, + int64_t &total_count, int64_t &filter_count) + : sm_hash_set_(sm_hash_set), res_vec_(res_vec), right_hash_values_(right_hash_values), + total_count_(total_count), filter_count_(filter_count) + {} + OB_INLINE int operator()(int64_t batch_i) + { + bool is_match = false; + constexpr int64_t is_match_payload = 1; + total_count_ += 1; + is_match = sm_hash_set_.test_hash(right_hash_values_[batch_i]); + if (!is_match) { + filter_count_++; + if (std::is_same::value) { + res_vec_->set_int(batch_i, 0); + } + } else { + if (std::is_same::value) { + res_vec_->set_int(batch_i, 1); + } else { + res_vec_->set_payload(batch_i, &is_match_payload, sizeof(int64_t)); + } + } + return OB_SUCCESS; + } +private: + ObSmallHashSet &sm_hash_set_; + ResVec *res_vec_; + uint64_t *right_hash_values_; + int64_t &total_count_; + int64_t &filter_count_; +}; + + template static int proc_filter_not_active(ResVec *res_vec, const ObBitVector &skip, const EvalBound &bound); @@ -204,11 +258,15 @@ OB_DEF_DESERIALIZE(ObRFInFilterVecMsg) "RFDEInFilter", "RFDEInFilter"))) { LOG_WARN("fail to init in hash set", K(ret)); + } else if (OB_FAIL(sm_hash_set_.init(buckets_cnt, tenant_id_))) { + LOG_WARN("faield to init small hash set", K(row_cnt)); } for (int64_t i = 0; OB_SUCC(ret) && i < row_cnt; ++i) { ObRFInFilterNode node(&build_row_cmp_info_, &build_row_meta_, row_store_.get_row(i), nullptr); if (OB_FAIL(rows_set_.set_refactored(node))) { LOG_WARN("fail to insert in filter node", K(ret)); + } else if (OB_FAIL(sm_hash_set_.insert_hash(row_store_.get_hash_value(i, build_row_meta_)))) { + LOG_WARN("fail to insert hash value into sm_hash_set_", K(ret)); } } } @@ -1044,6 +1102,8 @@ int ObRFInFilterVecMsg::assign(const ObP2PDatahubMsgBase &msg) LOG_WARN("fail to assign row_store_", K(ret)); } else if (OB_FAIL(rows_set_.create(bucket_cnt * 2, "RFCPInFilter", "RFCPInFilter"))) { LOG_WARN("fail to init in hash set", K(ret)); + } else if (OB_FAIL(sm_hash_set_.init(bucket_cnt, tenant_id_))) { + LOG_WARN("failed to init sm_hash_set_", K(other_msg.row_store_.get_row_cnt())); } else if (OB_FAIL(hash_funcs_for_insert_.assign(other_msg.hash_funcs_for_insert_))) { LOG_WARN("fail to assign hash_funcs_for_insert_", K(ret)); } else if (OB_FAIL(query_range_info_.assign(other_msg.query_range_info_))) { @@ -1058,6 +1118,8 @@ int ObRFInFilterVecMsg::assign(const ObP2PDatahubMsgBase &msg) nullptr); if (OB_FAIL(rows_set_.set_refactored(node))) { LOG_WARN("fail to insert in filter node", K(ret)); + } else if (OB_FAIL(sm_hash_set_.insert_hash(row_store_.get_hash_value(i, build_row_meta_)))) { + LOG_WARN("fail to insert hash value into sm_hash_set_", K(ret)); } } } @@ -1107,6 +1169,13 @@ int ObRFInFilterVecMsg::insert_by_row_vector( } } + SmallHashSetBatchInsertOP sm_hash_set_batch_ins_op(sm_hash_set_, batch_hash_values); + if (OB_FAIL(ret)) { + } else if (OB_FAIL( + ObBitVector::flip_foreach(*child_brs->skip_, bound, sm_hash_set_batch_ins_op))) { + LOG_WARN("failed insert batch_hash_values into sm_hash_set_"); + } + ObRowWithHash &cur_row = cur_row_with_hash_; ObDatum datum; ObEvalCtx::BatchInfoScopeGuard batch_info_guard(eval_ctx); @@ -1348,6 +1417,9 @@ int ObRFInFilterVecMsg::merge(ObP2PDatahubMsgBase &msg) nullptr /*row_with_hash*/); if (OB_FAIL(try_merge_node(node, row_size))) { LOG_WARN("fail to insert node", K(ret)); + } else if (OB_FAIL(sm_hash_set_.insert_hash( + other_msg.row_store_.get_hash_value(i, build_row_meta_)))) { + LOG_WARN("failed to insert hash value into sm_hash_set_"); } } } @@ -1360,6 +1432,7 @@ int ObRFInFilterVecMsg::reuse() is_empty_ = true; row_store_.reset(); rows_set_.reuse(); + sm_hash_set_.clear(); (void)reuse_query_range(); return ret; } @@ -1616,6 +1689,93 @@ int ObRFInFilterVecMsg::do_might_contain_vector( return ret; } +template +int ObRFInFilterVecMsg::do_might_contain_vector_impl( + const ObExpr &expr, + ObEvalCtx &ctx, + const ObBitVector &skip, + const EvalBound &bound, + ObExprJoinFilter::ObExprJoinFilterContext &filter_ctx) +{ + int ret = OB_SUCCESS; + int64_t total_count = 0; + int64_t filter_count = 0; + uint64_t seed = ObExprJoinFilter::JOIN_FILTER_SEED; + ObBitVector &eval_flags = expr.get_evaluated_flags(ctx); + uint64_t *right_hash_vals = filter_ctx.right_hash_vals_; + ResVec *res_vec = static_cast(expr.get_vector(ctx)); + + if (std::is_same::value) { + IntegerFixedVec *res_vec = static_cast(expr.get_vector(ctx)); + if (OB_FAIL(preset_not_match(res_vec, bound))) { + LOG_WARN("failed to preset_not_match", K(ret)); + } + } + + for (int64_t i = 0; OB_SUCC(ret) && i < expr.arg_cnt_; ++i) { + ObExpr *e = expr.args_[i]; + if (OB_FAIL(e->eval_vector(ctx, skip, bound))) { + LOG_WARN("evaluate vector failed", K(ret), K(*e)); + } else { + const bool is_batch_seed = (i > 0); + ObIVector *arg_vec = e->get_vector(ctx); + if (OB_FAIL(arg_vec->murmur_hash_v3(*e, right_hash_vals, skip, + bound, is_batch_seed ? right_hash_vals : &seed, is_batch_seed))) { + LOG_WARN("failed to cal hash"); + } + } + } + +#define IN_FILTER_PROBE_HELPER \ + is_match = sm_hash_set_.test_hash(right_hash_vals[batch_i]); \ + if (!is_match) { \ + filter_count++; \ + if (std::is_same::value) { \ + res_vec->set_int(batch_i, 0); \ + } \ + } else { \ + if (std::is_same::value) { \ + res_vec->set_int(batch_i, 1); \ + } else { \ + res_vec->set_payload(batch_i, &is_match_payload, sizeof(int64_t)); \ + } \ + } + + if (OB_FAIL(ret)) { + } else { + ObEvalCtx::BatchInfoScopeGuard batch_info_guard(ctx); + batch_info_guard.set_batch_size(bound.batch_size()); + bool is_match = true; + const int64_t is_match_payload = 1; // for VEC_FIXED set set_payload, always 1 + if (bound.get_all_rows_active()) { + total_count += bound.end() - bound.start(); + for (int64_t batch_i = bound.start(); batch_i < bound.end() && OB_SUCC(ret); ++batch_i) { + IN_FILTER_PROBE_HELPER + } + } else { + InFilterProbeOP in_filter_probe_op(sm_hash_set_, res_vec, right_hash_vals, + total_count, filter_count); + (void)ObBitVector::flip_foreach(skip, bound, in_filter_probe_op); + } + if (OB_SUCC(ret)) { + eval_flags.set_all(true); + filter_ctx.total_count_ += total_count; + filter_ctx.check_count_ += total_count; + filter_ctx.filter_count_ += filter_count; + ObExprJoinFilter::collect_sample_info_batch(filter_ctx, filter_count, total_count); + } + } +#undef IN_FILTER_PROBE_HELPER + return ret; +} + +#define IN_FILTER_DISPATCH_RES_FORMAT(function, res_format) \ + if (res_format == VEC_FIXED) { \ + ret = function(expr, ctx, skip, bound, filter_ctx); \ + } else { \ + ret = function(expr, ctx, skip, bound, filter_ctx); \ + } + int ObRFInFilterVecMsg::might_contain_vector( const ObExpr &expr, ObEvalCtx &ctx, @@ -1655,8 +1815,9 @@ int ObRFInFilterVecMsg::might_contain_vector( filter_ctx.check_count_ += total_count; filter_ctx.total_count_ += total_count; } - } else if (OB_FAIL(do_might_contain_vector(expr, ctx, skip, bound, filter_ctx))) { - LOG_WARN("fail to do might contain vector"); + } else { + VectorFormat res_format = expr.get_format(ctx); + IN_FILTER_DISPATCH_RES_FORMAT(do_might_contain_vector_impl, res_format); } return ret; } @@ -1676,7 +1837,6 @@ int ObRFInFilterVecMsg::prepare_storage_white_filter_data(ObDynamicFilterExecuto is_data_prepared = true; } else { for (int64_t i = 0; i < row_store_.get_row_cnt() && OB_SUCC(ret); ++i) { - // row_store_.get_row(i)->get_datum(build_row_meta_, col_idx); if (OB_FAIL(params.push_back(row_store_.get_row(i)->get_datum(build_row_meta_, col_idx)))) { LOG_WARN("failed to push back"); } @@ -1697,6 +1857,7 @@ int ObRFInFilterVecMsg::destroy() build_row_meta_.reset(); cur_row_with_hash_.row_.reset(); rows_set_.destroy(); + sm_hash_set_.~ObSmallHashSet(); need_null_cmp_flags_.reset(); row_store_.reset(); hash_funcs_for_insert_.reset(); diff --git a/src/sql/engine/px/p2p_datahub/ob_runtime_filter_vec_msg.h b/src/sql/engine/px/p2p_datahub/ob_runtime_filter_vec_msg.h index 6043137a3..c14d3f872 100644 --- a/src/sql/engine/px/p2p_datahub/ob_runtime_filter_vec_msg.h +++ b/src/sql/engine/px/p2p_datahub/ob_runtime_filter_vec_msg.h @@ -20,6 +20,7 @@ #include "sql/engine/px/p2p_datahub/ob_p2p_dh_msg.h" #include "sql/engine/basic/ob_compact_row.h" #include "sql/engine/px/p2p_datahub/ob_runtime_filter_query_range.h" +#include "src/sql/engine/px/p2p_datahub/ob_small_hashset.h" namespace oceanbase { @@ -200,6 +201,9 @@ public: int assign(const ObRFInFilterRowStore &other); void reset(); inline ObCompactRow *get_row(int64_t idx) { return serial_rows_.at(idx); } + inline uint64_t get_hash_value(int64_t idx, const RowMeta &row_meta) { + return serial_rows_.at(idx)->extra_payload(row_meta); + } inline int64_t get_row_size(int64_t idx) { return row_sizes_.at(idx); } inline int64_t get_row_cnt() const { return serial_rows_.count(); } inline int add_row(ObCompactRow *new_row, int64_t row_size); @@ -240,7 +244,7 @@ public: build_row_meta_(&allocator_), cur_row_with_hash_(allocator_), rows_set_(), row_store_(allocator_), need_null_cmp_flags_(allocator_), max_in_num_(0), hash_funcs_for_insert_(allocator_),query_range_info_(allocator_), - query_range_(), is_query_range_ready_(false), query_range_allocator_() + query_range_(), is_query_range_ready_(false), query_range_allocator_(), sm_hash_set_() {} virtual int assign(const ObP2PDatahubMsgBase &); virtual int merge(ObP2PDatahubMsgBase &) final; @@ -313,6 +317,11 @@ private: const ObBitVector &skip, const EvalBound &bound, ObExprJoinFilter::ObExprJoinFilterContext &filter_ctx); + + template + int do_might_contain_vector_impl(const ObExpr &expr, ObEvalCtx &ctx, const ObBitVector &skip, + const EvalBound &bound, + ObExprJoinFilter::ObExprJoinFilterContext &filter_ctx); int prepare_query_ranges(); int process_query_ranges_with_deduplicate(); int process_query_ranges_without_deduplicate(); @@ -347,6 +356,7 @@ public: bool is_query_range_ready_; // not need to serialize common::ObArenaAllocator query_range_allocator_; // ---end--- + ObSmallHashSet sm_hash_set_; }; diff --git a/src/sql/engine/px/p2p_datahub/ob_small_hashset.h b/src/sql/engine/px/p2p_datahub/ob_small_hashset.h new file mode 100644 index 000000000..6d0b5230a --- /dev/null +++ b/src/sql/engine/px/p2p_datahub/ob_small_hashset.h @@ -0,0 +1,178 @@ +/** + * Copyright (c) 2024 OceanBase + * OceanBase CE is licensed under Mulan PubL v2. + * You can use this software according to the terms and conditions of the Mulan PubL v2. + * You may obtain a copy of Mulan PubL v2 at: + * http://license.coscl.org.cn/MulanPubL-2.0 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PubL v2 for more details. + */ + +#include "lib/ob_define.h" +#include "lib/allocator/page_arena.h" +#include "lib/utility/ob_macro_utils.h" +#include "lib/utility/utility.h" + +namespace oceanbase{ +namespace sql { +/** + * @brief A simple hash container composed of unique uint64_t keys implemented with opened + * addressing. The capacity of the ObSmallHashSet is 2^n. + * @tparam _Accurate Whether need to seek whole hashset when meeting conflict. + * If _Accurate = true, it works as a normal hashset. + * If _Accurate = false, it only MAX_SEEK_TIMES when meeting hash confilct. That is to say, + * when testing if an element is in the ObSmallHashSet, false positives are possible. It will either + * say that an element is definitely not in the set or that it is possible the element is in the + * set. + * @ + */ + +template +class ObSmallHashSet +{ +public: + using bucket_t = uint64_t; + ~ObSmallHashSet() {} + + int init(uint64_t capacity, int64_t tenant_id) + { + int ret = OB_SUCCESS; + alloc_.set_tenant_id(tenant_id); + alloc_.set_label("ObSmallHashSet"); + if (OB_FAIL(expand(capacity))) { + COMMON_LOG(WARN, "failed to expand when init"); + } else { + inited_ = true; + } + return ret; + } + + void clear() + { + memset(buckets_, 0, sizeof(bucket_t) * capacity_); + size_ = 0; + } + + inline uint64_t size() { + return size_; + } + + inline int insert_hash_batch(uint64_t* hashs, uint64_t batch_size) + { + int ret = OB_SUCCESS; + if (OB_UNLIKELY(!inited_)) { + ret = OB_NOT_INIT; + COMMON_LOG(ERROR, "not inited"); + } + for (int64_t i = 0; i < batch_size && OB_SUCC(ret); ++i) { + ret = insert_hash(hashs[i]); + } + return ret; + } + + inline int insert_hash(uint64_t hash) + { + int ret = OB_SUCCESS; + hash |= KEY_MASK; + uint64_t offset = hash & bucket_mask_; + while ((buckets_[offset] != EMPTY_KEY) && (buckets_[offset] != hash)) { + offset = (++offset) & bucket_mask_; + } + if (buckets_[offset] != hash) { + buckets_[offset] = hash; + size_++; + if (size_ * 2 > capacity_ && OB_FAIL(expand(capacity_))) { + COMMON_LOG(WARN, "failed to expand", K(capacity_)); + } + } + return ret; + } + + inline bool test_hash(uint64_t hash) + { + bool find = false; + hash |= KEY_MASK; + uint64_t offset = hash & bucket_mask_; + uint64_t i = 0; + for (i = 0; i < capacity_; ++i) { + if (EMPTY_KEY == buckets_[offset]) { + break; + } else if (buckets_[offset] == hash) { + find = true; + break; + } else if (!_Accurate) { + if (i > MAX_SEEK_TIMES) { + // no seek more, return true + find = true; + break; + } + } + offset = (++offset) & bucket_mask_; + } +#ifdef unittest + seek_total_times_ += i; +#endif + return find; + } + +private: + uint64_t normalize_capacity(uint64_t n) + { + return max(MIN_BUCKET_SIZE, next_pow2(2 * n)); + } + + int expand(uint64_t capacity) { + int ret = OB_SUCCESS; + uint64_t new_capacity = normalize_capacity(capacity); + void *buf = nullptr; + if (OB_ISNULL(buf = alloc_.alloc_aligned(sizeof(bucket_t) * new_capacity, CACHE_LINE_SIZE))) { + ret = OB_ALLOCATE_MEMORY_FAILED; + COMMON_LOG(WARN, "failed to allocate bucket memory", K(new_capacity)); + } else { + bucket_t *old_buckets = buckets_; + buckets_= static_cast(buf); + uint64_t old_capacity = capacity_; + capacity_ = new_capacity; + bucket_mask_ = capacity_ - 1; + // init new bucket + memset(buckets_, 0, sizeof(bucket_t) * new_capacity); + // move data + for (uint64_t i = 0; i < old_capacity; ++i) { + uint64_t &hash = old_buckets[i]; + if (hash == EMPTY_KEY) { + continue; + } + uint64_t offset = hash & bucket_mask_; + while ((buckets_[offset] != EMPTY_KEY) && (buckets_[offset] != hash)) { + offset = (++offset) & bucket_mask_; + } + buckets_[offset] = hash; + } + } + COMMON_LOG(DEBUG, "expand capacity to ", K(capacity_)); + return ret; + } + +private: + static constexpr uint64_t EMPTY_KEY = 0UL; + static constexpr uint64_t KEY_MASK = 1UL << 63; + static constexpr int64_t MIN_BUCKET_SIZE = 128; + static constexpr int64_t CACHE_LINE_SIZE = 64; + static constexpr int64_t MAX_SEEK_TIMES = 8; + +private: + bool inited_{false}; + bucket_t *buckets_{nullptr}; + uint64_t bucket_mask_{0}; + uint64_t capacity_{0}; + uint64_t size_{0}; + common::ObArenaAllocator alloc_; +#ifdef unittest + uint64_t seek_total_times_{0}; +#endif +}; + +} // namespace sql +} // namespace oceanbases diff --git a/src/sql/plan_cache/ob_plan_cache_util.cpp b/src/sql/plan_cache/ob_plan_cache_util.cpp index 4f989a537..99b4f0b33 100644 --- a/src/sql/plan_cache/ob_plan_cache_util.cpp +++ b/src/sql/plan_cache/ob_plan_cache_util.cpp @@ -523,6 +523,7 @@ int ObConfigInfoInPC::load_influence_plan_config() enable_newsort_ = GCONF._enable_newsort; is_strict_defensive_check_ = GCONF.enable_strict_defensive_check(); is_enable_px_fast_reclaim_ = GCONF._enable_px_fast_reclaim; + bloom_filter_ratio_ = GCONF._bloom_filter_ratio; // For Tenant configs // tenant config use tenant_config to get configs @@ -588,6 +589,9 @@ int ObConfigInfoInPC::serialize_configs(char *buf, int buf_len, int64_t &pos) } else if (OB_FAIL(databuff_printf(buf, buf_len, pos, "%d", enable_var_assign_use_das_))) { SQL_PC_LOG(WARN, "failed to databuff_printf", K(ret), K(enable_var_assign_use_das_)); + } else if (OB_FAIL(databuff_printf(buf, buf_len, pos, + "%d", bloom_filter_ratio_))) { + SQL_PC_LOG(WARN, "failed to databuff_printf", K(ret), K(bloom_filter_ratio_)); } else { // do nothing } diff --git a/src/sql/plan_cache/ob_plan_cache_util.h b/src/sql/plan_cache/ob_plan_cache_util.h index 6f13e8248..5d68ae6f1 100644 --- a/src/sql/plan_cache/ob_plan_cache_util.h +++ b/src/sql/plan_cache/ob_plan_cache_util.h @@ -1026,6 +1026,7 @@ public: is_enable_px_fast_reclaim_(false), enable_spf_batch_rescan_(false), enable_var_assign_use_das_(false), + bloom_filter_ratio_(0), cluster_config_version_(-1), tenant_config_version_(-1), tenant_id_(0) @@ -1069,6 +1070,7 @@ public: bool is_enable_px_fast_reclaim_; bool enable_spf_batch_rescan_; bool enable_var_assign_use_das_; + int bloom_filter_ratio_; private: // current cluster config version_ diff --git a/src/storage/access/ob_aggregated_store.cpp b/src/storage/access/ob_aggregated_store.cpp index 38274bc4c..0c23d6cbd 100644 --- a/src/storage/access/ob_aggregated_store.cpp +++ b/src/storage/access/ob_aggregated_store.cpp @@ -92,7 +92,7 @@ int ObCGAggCells::process( const ObTableAccessContext &context, const int32_t col_offset, blocksstable::ObIMicroBlockReader *reader, - const int64_t *row_ids, + const int32_t *row_ids, const int64_t row_count) { int ret = OB_SUCCESS; diff --git a/src/storage/access/ob_aggregated_store.h b/src/storage/access/ob_aggregated_store.h index 714434a99..dc11c0c17 100644 --- a/src/storage/access/ob_aggregated_store.h +++ b/src/storage/access/ob_aggregated_store.h @@ -46,7 +46,7 @@ public: const ObTableAccessContext &context, const int32_t col_idx, blocksstable::ObIMicroBlockReader *reader, - const int64_t *row_ids, + const int32_t *row_ids, const int64_t row_count); int process(blocksstable::ObStorageDatum &datum, const uint64_t row_count); int process(const blocksstable::ObMicroIndexInfo &index_info); diff --git a/src/storage/access/ob_block_batched_row_store.cpp b/src/storage/access/ob_block_batched_row_store.cpp index 24f570018..48508b10c 100644 --- a/src/storage/access/ob_block_batched_row_store.cpp +++ b/src/storage/access/ob_block_batched_row_store.cpp @@ -27,7 +27,7 @@ static bool copy_row_ids( const int64_t offset, const int64_t cap, const int64_t step, - int64_t *row_ids); + int32_t *row_ids); ObBlockBatchedRowStore::ObBlockBatchedRowStore( const int64_t batch_size, @@ -59,14 +59,14 @@ int ObBlockBatchedRowStore::init(const ObTableAccessParam ¶m) ret = common::OB_ALLOCATE_MEMORY_FAILED; LOG_WARN("fail to alloc cell data ptr", K(ret), K(batch_size_)); } else if (FALSE_IT(cell_data_ptrs_ = reinterpret_cast(buf))) { - } else if (OB_ISNULL(buf = context_.stmt_allocator_->alloc(sizeof(int64_t) * batch_size_))) { + } else if (OB_ISNULL(buf = context_.stmt_allocator_->alloc(sizeof(int32_t) * batch_size_))) { ret = common::OB_ALLOCATE_MEMORY_FAILED; LOG_WARN("fail to alloc row_ids", K(ret), K(batch_size_)); } else if (OB_ISNULL(len_array_buf = context_.stmt_allocator_->alloc(sizeof(uint32_t) * batch_size_))) { ret = common::OB_ALLOCATE_MEMORY_FAILED; LOG_WARN("fail to alloc len_array_buf", K(ret), K_(batch_size)); } else { - row_ids_ = reinterpret_cast(buf); + row_ids_ = reinterpret_cast(buf); len_array_ = reinterpret_cast(len_array_buf); } return ret; @@ -187,8 +187,8 @@ int ObBlockBatchedRowStore::get_row_ids( } static const int32_t DEFAULT_BATCH_ROW_COUNT = 1024; -static int64_t default_batch_row_ids_[DEFAULT_BATCH_ROW_COUNT]; -static int64_t default_batch_reverse_row_ids_[DEFAULT_BATCH_ROW_COUNT]; +static int32_t default_batch_row_ids_[DEFAULT_BATCH_ROW_COUNT]; +static int32_t default_batch_reverse_row_ids_[DEFAULT_BATCH_ROW_COUNT]; static void __attribute__((constructor)) init_row_ids_array() { for (int32_t i = 0; i < DEFAULT_BATCH_ROW_COUNT; i++) { @@ -200,14 +200,14 @@ bool copy_row_ids( const int64_t offset, const int64_t cap, const int64_t step, - int64_t *row_ids) + int32_t *row_ids) { bool is_success = false; if (1 == step && offset + cap <= DEFAULT_BATCH_ROW_COUNT) { - memcpy(row_ids, default_batch_row_ids_ + offset, sizeof(int64_t ) * cap); + MEMCPY(row_ids, default_batch_row_ids_ + offset, sizeof(int32_t ) * cap); is_success = true; } else if (-1 == step && offset < DEFAULT_BATCH_ROW_COUNT) { - memcpy(row_ids, default_batch_reverse_row_ids_ + DEFAULT_BATCH_ROW_COUNT - offset - 1, sizeof(int64_t ) * cap); + MEMCPY(row_ids, default_batch_reverse_row_ids_ + DEFAULT_BATCH_ROW_COUNT - offset - 1, sizeof(int32_t ) * cap); is_success = true; } return is_success; diff --git a/src/storage/access/ob_block_batched_row_store.h b/src/storage/access/ob_block_batched_row_store.h index 4976f42c0..8e488ae8c 100644 --- a/src/storage/access/ob_block_batched_row_store.h +++ b/src/storage/access/ob_block_batched_row_store.h @@ -83,7 +83,7 @@ protected: int64_t batch_size_; int64_t row_capacity_; const char **cell_data_ptrs_; - int64_t *row_ids_; + int32_t *row_ids_; uint32_t *len_array_; sql::ObEvalCtx &eval_ctx_; }; diff --git a/src/storage/access/ob_index_sstable_estimator.cpp b/src/storage/access/ob_index_sstable_estimator.cpp index 2e186287e..f71f0ec4f 100644 --- a/src/storage/access/ob_index_sstable_estimator.cpp +++ b/src/storage/access/ob_index_sstable_estimator.cpp @@ -330,7 +330,6 @@ int ObIndexBlockScanEstimator::prefetch_index_block_data( micro_handle.block_state_ = ObSSTableMicroBlockState::IN_BLOCK_IO; micro_handle.micro_info_.offset_ = micro_index_info.get_block_offset(); micro_handle.micro_info_.size_ = micro_index_info.get_block_size(); - micro_handle.need_release_data_buf_ = true; } } return ret; diff --git a/src/storage/access/ob_index_tree_prefetcher.cpp b/src/storage/access/ob_index_tree_prefetcher.cpp index 42dc99d62..337fdea62 100644 --- a/src/storage/access/ob_index_tree_prefetcher.cpp +++ b/src/storage/access/ob_index_tree_prefetcher.cpp @@ -390,7 +390,9 @@ int ObIndexTreePrefetcher::check_bloom_filter( int ObIndexTreePrefetcher::prefetch_block_data( blocksstable::ObMicroIndexInfo &index_block_info, ObMicroBlockDataHandle µ_handle, - const bool is_data) + const bool is_data, + const bool use_multi_block_prefetch, + const bool need_submit_io) { int ret = OB_SUCCESS; if (is_rescan() && last_handle_hit(index_block_info, is_data, micro_handle)) { @@ -400,10 +402,15 @@ int ObIndexTreePrefetcher::prefetch_block_data( } else if (OB_FAIL(access_ctx_->micro_block_handle_mgr_.get_micro_block_handle( index_block_info, is_data, - true, /* need submit io */ + !is_data || need_submit_io, /* need submit io */ + use_multi_block_prefetch, micro_handle, cur_level_))) { - LOG_WARN("Fail to get micro block handle from handle mgr", K(ret)); + if (is_data && !need_submit_io && OB_ENTRY_NOT_EXIST == ret) { + ret = OB_SUCCESS; + } else { + LOG_WARN("Fail to get micro block handle from handle mgr", K(ret)); + } } else if (is_rescan() && is_data && micro_handle.in_block_state()) { last_micro_block_handle_ = micro_handle; } @@ -634,16 +641,19 @@ int ObIndexTreeMultiPrefetcher::multi_prefetch() cur_index_info, cur_index_info.is_data_block(), false, /* need submit io */ + false, /* use_multi_block_prefetch */ next_handle, cur_level_))) { - //not in cache yet, stop this rowkey prefetching if it's not the rowkey to be feteched - ret = OB_SUCCESS; - if (is_rowkey_to_fetched) { - if (OB_FAIL(read_handle.micro_handle_->get_micro_block_data(nullptr, index_block_, false))) { - LOG_WARN("Fail to get index block data", K(ret), KPC(read_handle.micro_handle_)); + if (OB_ENTRY_NOT_EXIST == ret) { + //not in cache yet, stop this rowkey prefetching if it's not the rowkey to be feteched + ret = OB_SUCCESS; + if (is_rowkey_to_fetched) { + if (OB_FAIL(read_handle.micro_handle_->get_micro_block_data(nullptr, index_block_, false))) { + LOG_WARN("Fail to get index block data", K(ret), KPC(read_handle.micro_handle_)); + } + } else { + stop_prefetch = true; } - } else { - stop_prefetch = true; } } else if (FALSE_IT(read_handle.set_cur_micro_handle(next_handle))) { } else if (OB_FAIL(read_handle.micro_handle_->get_cached_index_block_data(index_block_))) { @@ -783,6 +793,7 @@ void ObIndexTreeMultiPassPrefetcher:: reset_tree_handles(); read_handles_.reset(); inner_reset(); + multi_io_params_.reset(); max_range_prefetching_cnt_ = 0; max_micro_handle_cnt_ = 0; ObIndexTreePrefetcher::reset(); @@ -796,6 +807,7 @@ void ObIndexTreeMultiPassPrefetcher:: } clean_blockscan_check_info(); inner_reset(); + multi_io_params_.reuse(); ObIndexTreePrefetcher::reuse(); } @@ -813,6 +825,7 @@ void ObIndexTreeMultiPassPrefetcher:: micro_data_handles_[i].reset(); } inner_reset(); + multi_io_params_.reset(); ObIndexTreePrefetcher::reclaim(); } @@ -823,6 +836,8 @@ void ObIndexTreeMultiPassPrefetcher:: is_prefetch_end_ = false; is_row_lock_checked_ = false; need_check_prefetch_depth_ = false; + use_multi_block_prefetch_ = false; + need_submit_io_ = true; cur_range_fetch_idx_ = 0; cur_range_prefetch_idx_ = 0; cur_micro_data_fetch_idx_ = -1; @@ -874,10 +889,10 @@ int ObIndexTreeMultiPassPrefetcher::g int ret = OB_SUCCESS; depth = 0; prefetch_depth_ = MIN(2 * prefetch_depth_, DEFAULT_SCAN_MICRO_DATA_HANDLE_CNT); - if (need_check_prefetch_depth_ && access_ctx_->limit_param_->offset_ < INT32_MAX && access_ctx_->limit_param_->limit_ < INT32_MAX) { + if (need_check_prefetch_depth_) { int64_t prefetch_micro_cnt = MAX(1, - (access_ctx_->limit_param_->offset_ + access_ctx_->limit_param_->limit_ - access_ctx_->out_cnt_ + \ - SSTABLE_MICRO_AVG_COUNT - 1) / SSTABLE_MICRO_AVG_COUNT); + (access_ctx_->limit_param_->offset_ + access_ctx_->limit_param_->limit_ - access_ctx_->out_cnt_ + \ + SSTABLE_MICRO_AVG_COUNT - 1) / SSTABLE_MICRO_AVG_COUNT); prefetch_depth_ = MIN(prefetch_depth_, prefetch_micro_cnt); } depth = min(static_cast(prefetch_depth_), @@ -996,7 +1011,9 @@ int ObIndexTreeMultiPassPrefetcher::i iter_param_->limit_prefetch_ && nullptr != access_ctx_->limit_param_ && access_ctx_->limit_param_->limit_ >= 0 && - access_ctx_->limit_param_->limit_ < 4096; + access_ctx_->limit_param_->limit_ < 4096 && + access_ctx_->limit_param_->offset_ < INT32_MAX; + use_multi_block_prefetch_ = (iter_param.get_io_read_batch_size() > 0); switch (iter_type) { case ObStoreRowIterator::IteratorMultiGet: case ObStoreRowIterator::IteratorCOMultiGet: { @@ -1066,6 +1083,13 @@ int ObIndexTreeMultiPassPrefetcher::i LOG_WARN("Fail to init read_handles", K(ret), K(max_handle_cnt)); } else if (OB_FAIL(init_tree_handles(max_height))) { LOG_WARN("Fail to init tree handles", K(ret), K(max_height)); + } else if (use_multi_block_prefetch_ && + OB_FAIL(multi_io_params_.init( + iter_param, + max_micro_handle_cnt_, + access_ctx_->query_flag_.is_reverse_scan(), + *access_ctx_->stmt_allocator_))) { + LOG_WARN("Fail to init multi io params", K(ret)); } return ret; } @@ -1087,13 +1111,10 @@ template int ObIndexTreeMultiPassPrefetcher::prefetch() { int ret = OB_SUCCESS; - const int32_t prefetch_limit = MAX(2, max_micro_handle_cnt_ / 2); if (IS_NOT_INIT) { ret = OB_NOT_INIT; LOG_WARN("ObIndexTreeMultiPassPrefetcher not init", K(ret)); } else if (is_prefetch_end_) { - } else if (micro_data_prefetch_idx_ - cur_micro_data_fetch_idx_ >= prefetch_limit) { - // continue current prefetch } else if (OB_FAIL(prefetch_index_tree())) { if (OB_LIKELY(OB_ITER_END == ret)) { is_prefetch_end_ = true; @@ -1232,7 +1253,9 @@ int ObIndexTreeMultiPassPrefetcher::p ret = OB_ERR_UNEXPECTED; LOG_WARN("Unexpected prefetch status", K(ret), K_(cur_level), K_(index_tree_height), K_(micro_data_prefetch_idx), K_(cur_micro_data_fetch_idx), K_(max_micro_handle_cnt)); - } else if (micro_data_prefetch_idx_ - cur_micro_data_fetch_idx_ == max_micro_handle_cnt_) { + } else if (micro_data_prefetch_idx_ - cur_micro_data_fetch_idx_ == max_micro_handle_cnt_ || + (use_multi_block_prefetch_ && prefetch_depth_ > MIN_DATA_READ_BATCH_COUNT && + (max_micro_handle_cnt_ - (micro_data_prefetch_idx_ - cur_micro_data_fetch_idx_)) < MIN_DATA_READ_BATCH_COUNT)) { // DataBlock ring buf full } else if (OB_FAIL(get_prefetch_depth(prefetch_depth))) { LOG_WARN("Fail to get prefetch depth", K(ret)); @@ -1290,7 +1313,7 @@ int ObIndexTreeMultiPassPrefetcher::p if (OB_UNLIKELY(OB_ITER_END != ret)) { LOG_WARN("Fail to check row lock", K(ret), K(block_info), KPC(this)); } - } else if (OB_FAIL(prefetch_block_data(block_info, micro_data_handles_[prefetch_micro_idx]))) { + } else if (OB_FAIL(prefetch_data_block(micro_data_prefetch_idx_, block_info, micro_data_handles_[prefetch_micro_idx]))) { LOG_WARN("fail to prefetch_block_data", K(ret), K(block_info)); } @@ -1300,6 +1323,10 @@ int ObIndexTreeMultiPassPrefetcher::p tree_handles_[cur_level_].current_block_read_handle().end_prefetched_row_idx_++; } } + if (OB_SUCC(ret) && multi_io_params_.count() > 0 && + OB_FAIL(prefetch_multi_data_block(micro_data_prefetch_idx_))) { + LOG_WARN("Fail to prefetch multi block", K(ret), K_(micro_data_prefetch_idx), K_(multi_io_params)); + } } if (OB_SUCC(ret) && 0 < prefetched_cnt) { ObSSTableReadHandle &read_handle = read_handles_[prefetching_range_idx() % max_range_prefetching_cnt_]; @@ -1691,6 +1718,56 @@ int ObIndexTreeMultiPassPrefetcher::c return ret; } +template +int ObIndexTreeMultiPassPrefetcher::prefetch_data_block( + const int64_t prefetch_idx, + blocksstable::ObMicroIndexInfo &index_block_info, + ObMicroBlockDataHandle µ_handle) +{ + int ret = OB_SUCCESS; + if (OB_FAIL(prefetch_block_data(index_block_info, + micro_handle, + true, /* is_data */ + use_multi_block_prefetch_, + need_submit_io_))) { + LOG_WARN("Fail to prefetch data block data", K(ret)); + } else if (use_multi_block_prefetch_ && micro_handle.need_multi_io()) { + bool need_split = false; + if (multi_io_params_.add_micro_data(index_block_info, prefetch_idx, micro_handle, need_split)) { + if (OB_FAIL(prefetch_multi_data_block(prefetch_idx + 1))) { + LOG_WARN("Fail to prefetch multi block", K(ret)); + } else if (need_split) { + // reused after prefetch_multi_data_block + if (multi_io_params_.add_micro_data(index_block_info, prefetch_idx, micro_handle, need_split)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("Unexpected multi_io_params status", K(ret), K_(multi_io_params)); + } + } + } + } + return ret; +} + +template +int ObIndexTreeMultiPassPrefetcher::prefetch_multi_data_block( + const int64_t max_prefetch_idx) +{ + int ret = OB_SUCCESS; + if (multi_io_params_.count() > 0) { + if (OB_FAIL(access_ctx_->micro_block_handle_mgr_.prefetch_multi_data_block( + micro_data_infos_, + micro_data_handles_, + max_micro_handle_cnt_, + max_prefetch_idx, + multi_io_params_))) { + LOG_WARN("Fail to prefetch multi block", K(ret)); + } else { + multi_io_params_.reuse(); + } + } + return ret; +} + //////////////////////////////////////// ObIndexTreeLevelHandle ////////////////////////////////////////////// template int ObIndexTreeMultiPassPrefetcher::ObIndexTreeLevelHandle::prefetch( diff --git a/src/storage/access/ob_index_tree_prefetcher.h b/src/storage/access/ob_index_tree_prefetcher.h index 18b7de778..d7cdaf222 100644 --- a/src/storage/access/ob_index_tree_prefetcher.h +++ b/src/storage/access/ob_index_tree_prefetcher.h @@ -174,7 +174,9 @@ protected: int prefetch_block_data( ObMicroIndexInfo &index_block_info, ObMicroBlockDataHandle µ_handle, - const bool is_data = true); + const bool is_data = true, + const bool use_multi_block_prefetch = false, + const bool need_submit_io = true); int lookup_in_cache(ObSSTableReadHandle &read_handle); int init_basic_info( const int iter_type, @@ -386,6 +388,8 @@ public: agg_row_store_(nullptr), can_blockscan_(false), need_check_prefetch_depth_(false), + use_multi_block_prefetch_(false), + need_submit_io_(true), tree_handle_cap_(0), prefetch_depth_(1), max_range_prefetching_cnt_(0), @@ -394,7 +398,8 @@ public: query_range_(nullptr), border_rowkey_(), read_handles_(), - tree_handles_(nullptr) + tree_handles_(nullptr), + multi_io_params_() {} virtual ~ObIndexTreeMultiPassPrefetcher(); virtual void reset() override; @@ -475,6 +480,7 @@ public: return DEFAULT_SCAN_MICRO_DATA_HANDLE_CNT; } + static const int16_t MIN_DATA_READ_BATCH_COUNT = 4; static const int16_t MAX_INDEX_TREE_HEIGHT = 16; static const int32_t MAX_DATA_PREFETCH_DEPTH = 32; static const int32_t MAX_INDEX_PREFETCH_DEPTH = 3; @@ -484,8 +490,8 @@ public: K_(cur_micro_data_fetch_idx), K_(micro_data_prefetch_idx), K_(max_micro_handle_cnt), K_(iter_type), K_(cur_level), K_(index_tree_height), K_(max_rescan_height), KP_(long_life_allocator), K_(prefetch_depth), K_(total_micro_data_cnt), KP_(query_range), K_(tree_handle_cap), - K_(can_blockscan), K_(need_check_prefetch_depth), - K(ObArrayWrap(tree_handles_, index_tree_height_))); + K_(can_blockscan), K_(need_check_prefetch_depth), K_(use_multi_block_prefetch), K_(need_submit_io), + K(ObArrayWrap(tree_handles_, index_tree_height_)), K_(multi_io_params)); protected: int init_basic_info( const int iter_type, @@ -519,6 +525,11 @@ protected: void inner_reset(); virtual int init_tree_handles(const int64_t count); int get_prefetch_depth(int64_t &depth); + int prefetch_data_block( + const int64_t prefetch_idx, + ObMicroIndexInfo &index_block_info, + ObMicroBlockDataHandle µ_handle); + int prefetch_multi_data_block(const int64_t max_prefetch_idx); static const int32_t DEFAULT_SCAN_RANGE_PREFETCH_CNT = 4; static const int32_t DEFAULT_SCAN_MICRO_DATA_HANDLE_CNT = DATA_PREFETCH_DEPTH; @@ -693,6 +704,8 @@ public: protected: bool can_blockscan_; bool need_check_prefetch_depth_; + bool use_multi_block_prefetch_; + bool need_submit_io_; int16_t tree_handle_cap_; int16_t prefetch_depth_; int32_t max_range_prefetching_cnt_; @@ -710,6 +723,7 @@ protected: ObIndexTreeLevelHandle *tree_handles_; ObMicroIndexInfo micro_data_infos_[DEFAULT_SCAN_MICRO_DATA_HANDLE_CNT]; ObMicroBlockDataHandle micro_data_handles_[DEFAULT_SCAN_MICRO_DATA_HANDLE_CNT]; + ObMultiBlockIOParam multi_io_params_; }; } diff --git a/src/storage/access/ob_micro_block_handle_mgr.cpp b/src/storage/access/ob_micro_block_handle_mgr.cpp index da3979e04..19b57b819 100644 --- a/src/storage/access/ob_micro_block_handle_mgr.cpp +++ b/src/storage/access/ob_micro_block_handle_mgr.cpp @@ -39,7 +39,6 @@ ObMicroBlockDataHandle::ObMicroBlockDataHandle() handle_mgr_(nullptr), allocator_(nullptr), loaded_block_data_(), - need_release_data_buf_(false), is_loaded_block_(false) { des_meta_.encrypt_key_ = encrypt_key_; @@ -98,7 +97,7 @@ int ObMicroBlockDataHandle::get_micro_block_data( int ret = OB_SUCCESS; if (ObSSTableMicroBlockState::NEED_SYNC_IO == block_state_ || OB_FAIL(get_loaded_block_data(block_data))) { if (is_loaded_block_ && loaded_block_data_.is_valid()) { - LOG_DEBUG("Use sync loaded index block data", K_(macro_block_id), + LOG_DEBUG("Use sync loaded index block data", K(is_data_block), K_(macro_block_id), K(loaded_block_data_), K_(io_handle)); block_data = loaded_block_data_; } else { @@ -211,8 +210,8 @@ int ObMicroBlockDataHandle::get_loaded_block_data(ObMicroBlockData &block_data) } else { //multi block io const ObMultiBlockIOResult *io_result = reinterpret_cast(io_buf); - if (OB_FAIL(io_result->get_block_data(block_index_, block_data))) { - LOG_WARN("get_block_data failed", K(ret), K_(block_index)); + if (OB_FAIL(io_result->get_block_data(block_index_, micro_info_, block_data))) { + LOG_WARN("get_block_data failed", K(ret), K_(block_index), K_(micro_info)); } } } @@ -399,6 +398,7 @@ int ObMicroBlockHandleMgr::get_micro_block_handle( ObMicroIndexInfo &index_block_info, const bool is_data_block, const bool need_submit_io, + const bool use_multi_block_prefetch, ObMicroBlockDataHandle µ_block_handle, int16_t cur_level) { @@ -436,7 +436,12 @@ int ObMicroBlockHandleMgr::get_micro_block_handle( // get data / index block cache from disk if (!need_submit_io) { } else if (cache_mem_ctrl_.need_sync_io(*query_flag_, micro_block_handle, cache, block_io_allocator_)) { - } else if (OB_FAIL(submit_async_io(cache, tenant_id, index_block_info, is_data_block, micro_block_handle))) { + } else if (OB_FAIL(submit_async_io(cache, + tenant_id, + index_block_info, + is_data_block, + use_multi_block_prefetch, + micro_block_handle))) { LOG_WARN("Fail to submit async io for prefetch", K(ret), K(index_block_info), K(micro_block_handle)); } } else { @@ -454,11 +459,71 @@ int ObMicroBlockHandleMgr::get_micro_block_handle( return ret; } +int ObMicroBlockHandleMgr::prefetch_multi_data_block( + const ObMicroIndexInfo *micro_data_infos, + ObMicroBlockDataHandle *micro_data_handles, + const int64_t max_micro_handle_cnt, + const int64_t max_prefetch_idx, + const ObMultiBlockIOParam &multi_io_params) +{ + int ret = OB_SUCCESS; + const uint64_t tenant_id = MTL_ID(); + ObMacroBlockHandle macro_handle; + if (OB_UNLIKELY(!multi_io_params.is_valid())) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("Unexpected io params", K(ret), K(multi_io_params)); + } else { + const MacroBlockId ¯o_id = micro_data_infos[multi_io_params.prefetch_idx_[0] % max_micro_handle_cnt].get_macro_id(); + if (1 == multi_io_params.count()) { + for (int64_t i = 0; OB_SUCC(ret) && i < multi_io_params.count(); i++) { + const ObMicroIndexInfo &index_info = micro_data_infos[multi_io_params.prefetch_idx_[i] % max_micro_handle_cnt]; + if (OB_FAIL(data_block_cache_->prefetch(tenant_id, macro_id, index_info, true, + macro_handle, &block_io_allocator_))) { + LOG_WARN("Fail to prefetch micro block", K(ret), K(index_info), K(macro_handle)); + } else { + ObMicroBlockDataHandle µ_handle = micro_data_handles[multi_io_params.prefetch_idx_[i] % max_micro_handle_cnt]; + micro_handle.block_state_ = ObSSTableMicroBlockState::IN_BLOCK_IO; + cache_mem_ctrl_.add_hold_size(micro_handle.get_handle_size()); + micro_handle.io_handle_ = macro_handle; + micro_handle.allocator_ = &block_io_allocator_; + cache_mem_ctrl_.update_data_block_io_size(index_info.get_block_size(), true, true); + } + } + } else if (OB_FAIL(data_block_cache_->prefetch_multi_block( + tenant_id, + macro_id, + multi_io_params, + true, /* use_cache */ + macro_handle))) { + LOG_WARN("Fail to prefetch multi blocks", K(ret), K(multi_io_params)); + } else { + for (int64_t i = 0; OB_SUCC(ret) && i < multi_io_params.count(); i++) { + if (multi_io_params.prefetch_idx_[i] >= max_prefetch_idx) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("Unexpected prefetch idx", K(ret), K(i), K(multi_io_params.prefetch_idx_[i]), K(max_prefetch_idx)); + } else { + ObMicroBlockDataHandle µ_handle = micro_data_handles[multi_io_params.prefetch_idx_[i] % max_micro_handle_cnt]; + micro_handle.block_state_ = ObSSTableMicroBlockState::IN_BLOCK_IO; + micro_handle.io_handle_ = macro_handle; + micro_handle.allocator_ = &block_io_allocator_; + micro_handle.block_index_ = i; + } + } + if (OB_SUCC(ret)) { + cache_mem_ctrl_.add_hold_size(multi_io_params.get_data_cache_size()); + cache_mem_ctrl_.update_data_block_io_size(multi_io_params.get_data_cache_size(), true, true); + } + } + } + return ret; +} + int ObMicroBlockHandleMgr::submit_async_io( blocksstable::ObIMicroBlockCache *cache, const uint64_t tenant_id, const ObMicroIndexInfo &index_block_info, const bool is_data_block, + const bool use_multi_block_prefetch, ObMicroBlockDataHandle µ_block_handle) { int ret = OB_SUCCESS; @@ -469,7 +534,11 @@ int ObMicroBlockHandleMgr::submit_async_io( bool is_use_block_cache = query_flag_->is_use_block_cache(); bool use_cache = is_data_block ? is_use_block_cache && cache_mem_ctrl_.get_cache_use_flag() : is_use_block_cache; - if (OB_FAIL(cache->prefetch(tenant_id, macro_id, index_block_info, use_cache, + if (use_cache && is_data_block && use_multi_block_prefetch) { + micro_block_handle.block_state_ = ObSSTableMicroBlockState::NEED_MULTI_IO; + ret = OB_SUCCESS; + // continue and use prefetch in batch later + } else if (OB_FAIL(cache->prefetch(tenant_id, macro_id, index_block_info, use_cache, macro_handle, &block_io_allocator_))) { LOG_WARN("Fail to prefetch micro block", K(ret), K(index_block_info), K(macro_handle), K(micro_block_handle)); @@ -478,7 +547,6 @@ int ObMicroBlockHandleMgr::submit_async_io( cache_mem_ctrl_.add_hold_size(micro_block_handle.get_handle_size()); micro_block_handle.io_handle_ = macro_handle; micro_block_handle.allocator_ = &block_io_allocator_; - micro_block_handle.need_release_data_buf_ = true; cache_mem_ctrl_.update_data_block_io_size(size, is_data_block, use_cache); } return ret; @@ -495,4 +563,4 @@ bool ObMicroBlockHandleMgr::reach_hold_limit() const } } -} \ No newline at end of file +} diff --git a/src/storage/access/ob_micro_block_handle_mgr.h b/src/storage/access/ob_micro_block_handle_mgr.h index 66e658310..5109c0ed4 100644 --- a/src/storage/access/ob_micro_block_handle_mgr.h +++ b/src/storage/access/ob_micro_block_handle_mgr.h @@ -22,15 +22,16 @@ #include "storage/ob_handle_mgr.h" namespace oceanbase { +using namespace blocksstable; namespace storage { - struct ObSSTableMicroBlockState { enum ObSSTableMicroBlockStateEnum { UNKNOWN_STATE = 0, IN_BLOCK_CACHE, IN_BLOCK_IO, - NEED_SYNC_IO + NEED_SYNC_IO, + NEED_MULTI_IO }; }; @@ -58,7 +59,9 @@ struct ObMicroBlockDataHandle { ObMicroBlockDataHandle & operator=(const ObMicroBlockDataHandle &other); OB_INLINE bool in_block_state() const { return ObSSTableMicroBlockState::IN_BLOCK_CACHE == block_state_ || ObSSTableMicroBlockState::IN_BLOCK_IO == block_state_; } - TO_STRING_KV(K_(tenant_id), K_(macro_block_id), K_(micro_info), K_(need_release_data_buf), K_(is_loaded_block), + OB_INLINE bool need_multi_io() const + { return ObSSTableMicroBlockState::NEED_MULTI_IO == block_state_; } + TO_STRING_KV(K_(tenant_id), K_(macro_block_id), K_(micro_info), K_(is_loaded_block), K_(block_state), K_(block_index), K_(cache_handle), K_(io_handle), K_(loaded_block_data), KP_(allocator)); uint64_t tenant_id_; blocksstable::MacroBlockId macro_block_id_; @@ -72,7 +75,6 @@ struct ObMicroBlockDataHandle { ObMicroBlockHandleMgr *handle_mgr_; ObIAllocator *allocator_; blocksstable::ObMicroBlockData loaded_block_data_; - bool need_release_data_buf_; // TODO : @lvling to be removed bool is_loaded_block_; private: @@ -171,13 +173,22 @@ public: blocksstable::ObMicroIndexInfo &index_block_info, const bool is_data_block, const bool need_submit_io, + const bool use_multi_block_prefetch, ObMicroBlockDataHandle µ_block_handle, int16_t cur_level); + int prefetch_multi_data_block( + const ObMicroIndexInfo *micro_data_infos, + ObMicroBlockDataHandle *micro_data_handles, + const int64_t max_micro_handle_cnt, + const int64_t max_prefetch_idx, + const ObMultiBlockIOParam &multi_io_params); + int submit_async_io( blocksstable::ObIMicroBlockCache *cache, const uint64_t tenant_id, const blocksstable::ObMicroIndexInfo &index_block_info, const bool is_data_block, + const bool use_multi_block_prefetch, ObMicroBlockDataHandle µ_block_handle); void dec_hold_size(ObMicroBlockDataHandle &handle); bool reach_hold_limit() const; diff --git a/src/storage/access/ob_pushdown_aggregate.cpp b/src/storage/access/ob_pushdown_aggregate.cpp index 55d8f485e..2fb48a984 100644 --- a/src/storage/access/ob_pushdown_aggregate.cpp +++ b/src/storage/access/ob_pushdown_aggregate.cpp @@ -496,7 +496,7 @@ int ObAggCell::eval_micro_block( const ObTableAccessContext &context, const int32_t col_offset, blocksstable::ObIMicroBlockReader *reader, - const int64_t *row_ids, + const int32_t *row_ids, const int64_t row_count) { int ret = OB_SUCCESS; @@ -933,7 +933,7 @@ int ObCountAggCell::eval_micro_block( const ObTableAccessContext &context, const int32_t col_offset, blocksstable::ObIMicroBlockReader *reader, - const int64_t *row_ids, + const int32_t *row_ids, const int64_t row_count) { int ret = OB_SUCCESS; @@ -1854,7 +1854,7 @@ int ObSumOpSizeAggCell::eval_micro_block( const ObTableAccessContext &context, const int32_t col_offset, blocksstable::ObIMicroBlockReader *reader, - const int64_t *row_ids, + const int32_t *row_ids, const int64_t row_count) { int ret = OB_SUCCESS; @@ -3140,7 +3140,7 @@ int ObFirstRowAggCell::eval_micro_block( const ObTableAccessContext &context, const int32_t col_offset, blocksstable::ObIMicroBlockReader *reader, - const int64_t *row_ids, + const int32_t *row_ids, const int64_t row_count) { int ret = OB_SUCCESS; diff --git a/src/storage/access/ob_pushdown_aggregate.h b/src/storage/access/ob_pushdown_aggregate.h index daee5641b..b520223b8 100644 --- a/src/storage/access/ob_pushdown_aggregate.h +++ b/src/storage/access/ob_pushdown_aggregate.h @@ -256,7 +256,7 @@ public: const ObTableAccessContext &context, const int32_t col_offset, blocksstable::ObIMicroBlockReader *reader, - const int64_t *row_ids, + const int32_t *row_ids, const int64_t row_count); virtual int eval_index_info(const blocksstable::ObMicroIndexInfo &index_info, const bool is_cg = false); // For group by pushdown @@ -349,7 +349,7 @@ public: const ObTableAccessContext &context, const int32_t col_offset, blocksstable::ObIMicroBlockReader *reader, - const int64_t *row_ids, + const int32_t *row_ids, const int64_t row_count) override; virtual int eval_index_info(const blocksstable::ObMicroIndexInfo &index_info, const bool is_cg = false) override; virtual int eval_batch_in_group_by( @@ -486,7 +486,7 @@ public: const ObTableAccessContext &context, const int32_t col_offset, blocksstable::ObIMicroBlockReader *reader, - const int64_t *row_ids, + const int32_t *row_ids, const int64_t row_count) override; virtual int eval_index_info(const blocksstable::ObMicroIndexInfo &index_info, const bool is_cg = false); virtual int eval_batch_in_group_by( @@ -659,7 +659,7 @@ public: const ObTableAccessContext &context, const int32_t col_offset, blocksstable::ObIMicroBlockReader *reader, - const int64_t *row_ids, + const int32_t *row_ids, const int64_t row_count) override; virtual int eval_index_info(const blocksstable::ObMicroIndexInfo &index_info, const bool is_cg = false) override; virtual int eval_batch_in_group_by( diff --git a/src/storage/access/ob_sstable_row_scanner.cpp b/src/storage/access/ob_sstable_row_scanner.cpp index c7be62165..164cd8ffd 100644 --- a/src/storage/access/ob_sstable_row_scanner.cpp +++ b/src/storage/access/ob_sstable_row_scanner.cpp @@ -646,7 +646,7 @@ int ObSSTableRowScanner::forward_blockscan( // 1. There are more undetected rows in current range for columnar scan. // 2. The number of detected rows is less than defualt batch size. if ((IN_END_OF_RANGE != state && PENDING_BLOCK_SCAN != state) - || (end - begin + 1) >= OB_CS_SCAN_GROUP_SIZE) { + || (end - begin + 1) >= iter_param_->get_storage_rowsets_size()) { break; } } diff --git a/src/storage/access/ob_table_access_param.cpp b/src/storage/access/ob_table_access_param.cpp index 5bf6ffcfc..9f849c081 100644 --- a/src/storage/access/ob_table_access_param.cpp +++ b/src/storage/access/ob_table_access_param.cpp @@ -54,7 +54,8 @@ ObTableIterParam::ObTableIterParam() limit_prefetch_(false), is_non_unique_local_index_(false), ss_rowkey_prefix_cnt_(0), - pd_storage_flag_() + pd_storage_flag_(), + table_scan_opt_() { } @@ -100,6 +101,7 @@ void ObTableIterParam::reset() is_for_foreign_check_ = false; limit_prefetch_ = false; is_non_unique_local_index_ = false; + table_scan_opt_.reset(); ObSSTableIndexFilterFactory::destroy_sstable_index_filter(sstable_index_filter_); } @@ -184,7 +186,8 @@ DEF_TO_STRING(ObTableIterParam) K_(is_for_foreign_check), K_(limit_prefetch), K_(is_non_unique_local_index), - K_(ss_rowkey_prefix_cnt)); + K_(ss_rowkey_prefix_cnt), + K_(table_scan_opt)); J_OBJ_END(); return pos; } @@ -260,6 +263,18 @@ int ObTableAccessParam::init( iter_param_.read_info_->get_schema_column_count() == iter_param_.rowkey_read_info_->get_schema_column_count(); iter_param_.pd_storage_flag_ = scan_param.pd_storage_flag_; + if (scan_param.table_scan_opt_.is_io_valid()) { + iter_param_.table_scan_opt_.io_read_batch_size_ = scan_param.table_scan_opt_.io_read_batch_size_; + iter_param_.table_scan_opt_.io_read_gap_size_ = scan_param.table_scan_opt_.io_read_gap_size_; + } else { + iter_param_.table_scan_opt_.io_read_batch_size_ = 0; + iter_param_.table_scan_opt_.io_read_gap_size_ = 0; + } + if (scan_param.table_scan_opt_.is_rowsets_valid()) { + iter_param_.table_scan_opt_.storage_rowsets_size_ = scan_param.table_scan_opt_.storage_rowsets_size_; + } else { + iter_param_.table_scan_opt_.storage_rowsets_size_ = 1; + } iter_param_.pushdown_filter_ = scan_param.pd_storage_filters_; // disable blockscan if scan order is KeepOrder(for iterator iterator and table api) // disable blockscan if use index skip scan as no large range to scan diff --git a/src/storage/access/ob_table_access_param.h b/src/storage/access/ob_table_access_param.h index ca7213572..0a650e834 100644 --- a/src/storage/access/ob_table_access_param.h +++ b/src/storage/access/ob_table_access_param.h @@ -175,6 +175,12 @@ public: !pd_storage_flag_.is_group_by_pushdown() && !pd_storage_flag_.is_aggregate_pushdown(); } + OB_INLINE int64_t get_io_read_batch_size() const + { return table_scan_opt_.io_read_batch_size_; } + OB_INLINE int64_t get_io_read_gap_size() const + { return table_scan_opt_.io_read_gap_size_; } + OB_INLINE int64_t get_storage_rowsets_size() const + { return table_scan_opt_.storage_rowsets_size_; } DECLARE_TO_STRING; public: uint64_t table_id_; @@ -212,6 +218,7 @@ public: bool is_non_unique_local_index_; int64_t ss_rowkey_prefix_cnt_; sql::ObStoragePushdownFlag pd_storage_flag_; + ObTableScanOption table_scan_opt_; }; struct ObTableAccessParam diff --git a/src/storage/access/ob_vector_store.cpp b/src/storage/access/ob_vector_store.cpp index 090ddbc5f..8aa936f5d 100644 --- a/src/storage/access/ob_vector_store.cpp +++ b/src/storage/access/ob_vector_store.cpp @@ -315,7 +315,7 @@ int ObVectorStore::fill_output_rows( } LOG_TRACE("[Vectorized] vector store copy rows", K(ret), K(begin_index), K(end_index), K(row_capacity), K(res), - "row_ids", common::ObArrayWrap(row_ids_, row_capacity), + "row_ids", common::ObArrayWrap(row_ids_, row_capacity), KPC(this)); return ret; } diff --git a/src/storage/blocksstable/cs_encoding/ob_cs_vector_decoding_util.cpp b/src/storage/blocksstable/cs_encoding/ob_cs_vector_decoding_util.cpp index d4f839029..71249f082 100644 --- a/src/storage/blocksstable/cs_encoding/ob_cs_vector_decoding_util.cpp +++ b/src/storage/blocksstable/cs_encoding/ob_cs_vector_decoding_util.cpp @@ -20,7 +20,7 @@ namespace blocksstable { int ObCSVectorDecodingUtil::decode_all_null_vector( - const int64_t *row_ids, + const int32_t *row_ids, const int64_t row_cap, sql::VectorHeader &vec_header, const int64_t vec_offset) diff --git a/src/storage/blocksstable/cs_encoding/ob_cs_vector_decoding_util.h b/src/storage/blocksstable/cs_encoding/ob_cs_vector_decoding_util.h index a5a9dd4c1..c08f39621 100644 --- a/src/storage/blocksstable/cs_encoding/ob_cs_vector_decoding_util.h +++ b/src/storage/blocksstable/cs_encoding/ob_cs_vector_decoding_util.h @@ -26,7 +26,7 @@ class ObCSVectorDecodingUtil final { public: static int decode_all_null_vector( - const int64_t *row_ids, + const int32_t *row_ids, const int64_t row_cap, sql::VectorHeader &vec_header, const int64_t vec_offset); @@ -35,4 +35,4 @@ public: } } -#endif \ No newline at end of file +#endif diff --git a/src/storage/blocksstable/cs_encoding/ob_dict_column_decoder.cpp b/src/storage/blocksstable/cs_encoding/ob_dict_column_decoder.cpp index d5e860bdf..ed437d4e8 100644 --- a/src/storage/blocksstable/cs_encoding/ob_dict_column_decoder.cpp +++ b/src/storage/blocksstable/cs_encoding/ob_dict_column_decoder.cpp @@ -160,7 +160,7 @@ void ObDictValueIterator::build_decode_by_ref_func_() int ObDictColumnDecoder::get_null_count( const ObColumnCSDecoderCtx &col_ctx, - const int64_t *row_ids, + const int32_t *row_ids, const int64_t row_cap, int64_t &null_count) const { @@ -204,7 +204,7 @@ int ObDictColumnDecoder::get_null_count( int ObDictColumnDecoder::extract_ref_and_null_count_( const ObConstEncodingRefDesc &ref_desc, const int64_t dict_count, - const int64_t *row_ids, + const int32_t *row_ids, const int64_t row_cap, common::ObDatum *datums, int64_t &null_count, @@ -2225,7 +2225,7 @@ int ObDictColumnDecoder::read_distinct( int ObDictColumnDecoder::read_reference( const ObColumnCSDecoderCtx &ctx, - const int64_t *row_ids, + const int32_t *row_ids, const int64_t row_cap, storage::ObGroupByCell &group_by_cell) const { @@ -2258,7 +2258,7 @@ int ObDictColumnDecoder::read_reference( int ObDictColumnDecoder::get_aggregate_result( const ObColumnCSDecoderCtx &col_ctx, - const int64_t *row_ids, + const int32_t *row_ids, const int64_t row_cap, storage::ObAggCell &agg_cell) const { diff --git a/src/storage/blocksstable/cs_encoding/ob_dict_column_decoder.h b/src/storage/blocksstable/cs_encoding/ob_dict_column_decoder.h index f5361dc33..67436c7fc 100644 --- a/src/storage/blocksstable/cs_encoding/ob_dict_column_decoder.h +++ b/src/storage/blocksstable/cs_encoding/ob_dict_column_decoder.h @@ -30,7 +30,7 @@ public: virtual ~ObDictColumnDecoder() {} ObDictColumnDecoder(const ObDictColumnDecoder &) = delete; ObDictColumnDecoder &operator=(const ObDictColumnDecoder &) = delete; - virtual int get_null_count(const ObColumnCSDecoderCtx &ctx, const int64_t *row_ids, + virtual int get_null_count(const ObColumnCSDecoderCtx &ctx, const int32_t *row_ids, const int64_t row_cap, int64_t &null_count) const override; virtual int pushdown_operator( @@ -50,7 +50,7 @@ public: virtual int get_aggregate_result( const ObColumnCSDecoderCtx &col_ctx, - const int64_t *row_ids, + const int32_t *row_ids, const int64_t row_cap, storage::ObAggCell &agg_cell) const override; @@ -64,7 +64,7 @@ public: virtual int read_reference( const ObColumnCSDecoderCtx &ctx, - const int64_t *row_ids, + const int32_t *row_ids, const int64_t row_cap, storage::ObGroupByCell &group_by_cell) const override; @@ -119,7 +119,7 @@ protected: static int extract_ref_and_null_count_( const ObConstEncodingRefDesc &ref_desc, const int64_t dict_count, - const int64_t *row_ids, + const int32_t *row_ids, const int64_t row_cap, common::ObDatum *datums, int64_t &null_count, diff --git a/src/storage/blocksstable/cs_encoding/ob_icolumn_cs_decoder.cpp b/src/storage/blocksstable/cs_encoding/ob_icolumn_cs_decoder.cpp index 07a5e9911..80bf2164f 100644 --- a/src/storage/blocksstable/cs_encoding/ob_icolumn_cs_decoder.cpp +++ b/src/storage/blocksstable/cs_encoding/ob_icolumn_cs_decoder.cpp @@ -24,7 +24,7 @@ using namespace common; int ObIColumnCSDecoder::get_null_count( const ObColumnCSDecoderCtx &ctx, - const int64_t *row_ids, + const int32_t *row_ids, const int64_t row_cap, int64_t &null_count) const { diff --git a/src/storage/blocksstable/cs_encoding/ob_icolumn_cs_decoder.h b/src/storage/blocksstable/cs_encoding/ob_icolumn_cs_decoder.h index 7192bd474..9709e4004 100644 --- a/src/storage/blocksstable/cs_encoding/ob_icolumn_cs_decoder.h +++ b/src/storage/blocksstable/cs_encoding/ob_icolumn_cs_decoder.h @@ -40,7 +40,7 @@ public: OB_INLINE void reuse() {} VIRTUAL_TO_STRING_KV(K(this)); - virtual int decode(const ObColumnCSDecoderCtx &ctx, const int64_t row_id, common::ObDatum &datum) const = 0; + virtual int decode(const ObColumnCSDecoderCtx &ctx, const int32_t row_id, common::ObDatum &datum) const = 0; virtual ObCSColumnHeader::Type get_type() const = 0; @@ -53,7 +53,7 @@ public: // Performance critical, only check pointer once in caller virtual int batch_decode( const ObColumnCSDecoderCtx &ctx, - const int64_t *row_ids, + const int32_t *row_ids, const int64_t row_cap, common::ObDatum *datums) const { @@ -93,7 +93,7 @@ public: virtual int get_aggregate_result( const ObColumnCSDecoderCtx &ctx, - const int64_t *row_ids, + const int32_t *row_ids, const int64_t row_cap, storage::ObAggCell &agg_cell) const { @@ -103,7 +103,7 @@ public: virtual int get_null_count( const ObColumnCSDecoderCtx &ctx, - const int64_t *row_ids, + const int32_t *row_ids, const int64_t row_cap, int64_t &null_count) const; @@ -123,7 +123,7 @@ public: virtual int read_reference( const ObColumnCSDecoderCtx &ctx, - const int64_t *row_ids, + const int32_t *row_ids, const int64_t row_cap, storage::ObGroupByCell &group_by_cell) const { @@ -137,7 +137,7 @@ class ObNoneExistColumnCSDecoder : public ObIColumnCSDecoder public: static const ObCSColumnHeader::Type type_ = ObCSColumnHeader::MAX_TYPE; - virtual int decode(const ObColumnCSDecoderCtx &ctx, const int64_t row_id, common::ObDatum &datum) const override + virtual int decode(const ObColumnCSDecoderCtx &ctx, const int32_t row_id, common::ObDatum &datum) const override { datum.set_ext(); datum.no_cv(datum.extend_obj_)->set_ext(common::ObActionFlag::OP_NOP); diff --git a/src/storage/blocksstable/cs_encoding/ob_int_dict_column_decoder.cpp b/src/storage/blocksstable/cs_encoding/ob_int_dict_column_decoder.cpp index 8bbf47b05..f48cd0cf0 100644 --- a/src/storage/blocksstable/cs_encoding/ob_int_dict_column_decoder.cpp +++ b/src/storage/blocksstable/cs_encoding/ob_int_dict_column_decoder.cpp @@ -23,7 +23,7 @@ namespace oceanbase namespace blocksstable { int ObIntDictColumnDecoder::decode( - const ObColumnCSDecoderCtx &ctx, const int64_t row_id, common::ObDatum &datum) const + const ObColumnCSDecoderCtx &ctx, const int32_t row_id, common::ObDatum &datum) const { int ret = OB_SUCCESS; const ObDictColumnDecoderCtx &dict_ctx = ctx.dict_ctx_; @@ -93,7 +93,7 @@ int ObIntDictColumnDecoder::decode_and_aggregate( return ret; } -int ObIntDictColumnDecoder::batch_decode(const ObColumnCSDecoderCtx &ctx, const int64_t *row_ids, +int ObIntDictColumnDecoder::batch_decode(const ObColumnCSDecoderCtx &ctx, const int32_t *row_ids, const int64_t row_cap, common::ObDatum *datums) const { int ret = OB_SUCCESS; diff --git a/src/storage/blocksstable/cs_encoding/ob_int_dict_column_decoder.h b/src/storage/blocksstable/cs_encoding/ob_int_dict_column_decoder.h index dcc4aff4e..8c1edc013 100644 --- a/src/storage/blocksstable/cs_encoding/ob_int_dict_column_decoder.h +++ b/src/storage/blocksstable/cs_encoding/ob_int_dict_column_decoder.h @@ -30,8 +30,8 @@ public: ObIntDictColumnDecoder &operator=(const ObDictColumnDecoder &) = delete; virtual int decode( - const ObColumnCSDecoderCtx &ctx, const int64_t row_id, common::ObDatum &datum) const override; - virtual int batch_decode(const ObColumnCSDecoderCtx &ctx, const int64_t *row_ids, + const ObColumnCSDecoderCtx &ctx, const int32_t row_id, common::ObDatum &datum) const override; + virtual int batch_decode(const ObColumnCSDecoderCtx &ctx, const int32_t *row_ids, const int64_t row_cap, common::ObDatum *datums) const override; virtual int decode_vector(const ObColumnCSDecoderCtx &ctx, ObVectorDecodeCtx &vector_ctx) const override; virtual int decode_and_aggregate( diff --git a/src/storage/blocksstable/cs_encoding/ob_integer_column_decoder.cpp b/src/storage/blocksstable/cs_encoding/ob_integer_column_decoder.cpp index a706c2d87..c8e7cb5cd 100644 --- a/src/storage/blocksstable/cs_encoding/ob_integer_column_decoder.cpp +++ b/src/storage/blocksstable/cs_encoding/ob_integer_column_decoder.cpp @@ -26,7 +26,7 @@ using namespace oceanbase::common; using namespace oceanbase::share; int ObIntegerColumnDecoder::decode(const ObColumnCSDecoderCtx &ctx, - const int64_t row_id, + const int32_t row_id, common::ObDatum &datum) const { int ret = OB_SUCCESS; @@ -42,7 +42,7 @@ int ObIntegerColumnDecoder::decode(const ObColumnCSDecoderCtx &ctx, } int ObIntegerColumnDecoder::batch_decode(const ObColumnCSDecoderCtx &ctx, - const int64_t *row_ids, const int64_t row_cap, common::ObDatum *datums) const + const int32_t *row_ids, const int64_t row_cap, common::ObDatum *datums) const { int ret = OB_SUCCESS; const ObIntegerColumnDecoderCtx &integer_ctx = ctx.integer_ctx_; @@ -70,7 +70,7 @@ int ObIntegerColumnDecoder::decode_vector( int ObIntegerColumnDecoder::get_null_count( const ObColumnCSDecoderCtx &col_ctx, - const int64_t *row_ids, + const int32_t *row_ids, const int64_t row_cap, int64_t &null_count) const { @@ -706,7 +706,7 @@ int ObIntegerColumnDecoder::tranverse_datum_all_op( int ObIntegerColumnDecoder::get_aggregate_result( const ObColumnCSDecoderCtx &ctx, - const int64_t *row_ids, + const int32_t *row_ids, const int64_t row_cap, storage::ObAggCell &agg_cell) const { diff --git a/src/storage/blocksstable/cs_encoding/ob_integer_column_decoder.h b/src/storage/blocksstable/cs_encoding/ob_integer_column_decoder.h index a40045860..f78abd264 100644 --- a/src/storage/blocksstable/cs_encoding/ob_integer_column_decoder.h +++ b/src/storage/blocksstable/cs_encoding/ob_integer_column_decoder.h @@ -30,13 +30,13 @@ public: ObIntegerColumnDecoder &operator=(const ObIntegerColumnDecoder&) = delete; virtual int decode(const ObColumnCSDecoderCtx &ctx, - const int64_t row_id, common::ObDatum &datum) const override; - virtual int batch_decode(const ObColumnCSDecoderCtx &ctx, const int64_t *row_ids, + const int32_t row_id, common::ObDatum &datum) const override; + virtual int batch_decode(const ObColumnCSDecoderCtx &ctx, const int32_t *row_ids, const int64_t row_cap, common::ObDatum *datums) const override; virtual int decode_vector(const ObColumnCSDecoderCtx &ctx, ObVectorDecodeCtx &vector_ctx) const override; virtual int get_null_count(const ObColumnCSDecoderCtx &ctx, - const int64_t *row_ids, const int64_t row_cap, int64_t &null_count) const override; + const int32_t *row_ids, const int64_t row_cap, int64_t &null_count) const override; virtual ObCSColumnHeader::Type get_type() const override { return type_; } @@ -49,7 +49,7 @@ public: virtual int get_aggregate_result( const ObColumnCSDecoderCtx &ctx, - const int64_t *row_ids, + const int32_t *row_ids, const int64_t row_cap, storage::ObAggCell &agg_cell) const override; diff --git a/src/storage/blocksstable/cs_encoding/ob_integer_stream_decoder.cpp b/src/storage/blocksstable/cs_encoding/ob_integer_stream_decoder.cpp index 119116d82..78667048b 100644 --- a/src/storage/blocksstable/cs_encoding/ob_integer_stream_decoder.cpp +++ b/src/storage/blocksstable/cs_encoding/ob_integer_stream_decoder.cpp @@ -40,7 +40,7 @@ struct ConvertUintToDatum_T const char *data, const ObIntegerStreamDecoderCtx &ctx, const char *ref_data, - const int64_t *row_ids, + const int32_t *row_ids, const int64_t row_cap, common::ObDatum *datums) { @@ -63,7 +63,7 @@ struct ConvertUintToDatum_T= row_cap)) { @@ -172,7 +172,7 @@ int ObColumnCSDecoder::batch_decode( } LOG_DEBUG("[Batch decode] Batch decoded datums: ", K(ret), K(row_cap), K(*ctx_), - K(ObArrayWrap(row_ids, row_cap)), K(ObArrayWrap(datums, row_cap))); + K(ObArrayWrap(row_ids, row_cap)), K(ObArrayWrap(datums, row_cap))); return ret; } @@ -192,7 +192,7 @@ int ObColumnCSDecoder::decode_vector(ObVectorDecodeCtx &vector_ctx) } int ObColumnCSDecoder::get_row_count( - const int64_t *row_ids, const int64_t row_cap, const bool contains_null, int64_t &count) + const int32_t *row_ids, const int64_t row_cap, const bool contains_null, int64_t &count) { int ret = OB_SUCCESS; int64_t null_count = 0; @@ -1559,7 +1559,7 @@ int ObMicroBlockCSDecoder::filter_black_filter_batch( int ObMicroBlockCSDecoder::get_rows( const common::ObIArray &cols, const common::ObIArray &col_params, - const int64_t *row_ids, + const int32_t *row_ids, const char **cell_datas, const int64_t row_cap, common::ObIArray &datum_infos, @@ -1598,7 +1598,7 @@ int ObMicroBlockCSDecoder::get_rows( return ret; } -int ObMicroBlockCSDecoder::get_row_count(int32_t col_id, const int64_t *row_ids, +int ObMicroBlockCSDecoder::get_row_count(int32_t col_id, const int32_t *row_ids, const int64_t row_cap, const bool contains_null, const share::schema::ObColumnParam *col_param, int64_t &count) { UNUSED(col_param); @@ -1609,7 +1609,7 @@ int ObMicroBlockCSDecoder::get_row_count(int32_t col_id, const int64_t *row_ids, LOG_WARN("not init", K(ret)); } else if (OB_FAIL(decoders_[col_id].get_row_count(row_ids, row_cap, contains_null, count))) { LOG_WARN("fail to get datums from decoder", K(ret), K(col_id), K(row_cap), "row_ids", - common::ObArrayWrap(row_ids, row_cap)); + common::ObArrayWrap(row_ids, row_cap)); } return ret; } @@ -1645,7 +1645,7 @@ int ObMicroBlockCSDecoder::get_column_datum( bool ObMicroBlockCSDecoder::can_pushdown_decoder( const ObColumnCSDecoderCtx &ctx, - const int64_t *row_ids, + const int32_t *row_ids, const int64_t row_cap, const ObAggCell &agg_cell) const { @@ -1680,7 +1680,7 @@ int ObMicroBlockCSDecoder::get_aggregate_result( const ObTableAccessContext &context, const int32_t col_offset, const share::schema::ObColumnParam &col_param, - const int64_t *row_ids, + const int32_t *row_ids, const int64_t row_cap, storage::ObAggDatumBuf &agg_datum_buf, ObAggCell &agg_cell) @@ -1732,7 +1732,7 @@ int ObMicroBlockCSDecoder::get_aggregate_result( int ObMicroBlockCSDecoder::get_col_datums( int32_t col_id, - const int64_t *row_ids, + const int32_t *row_ids, const int64_t row_cap, common::ObDatum *col_datums) { @@ -1751,7 +1751,7 @@ int ObMicroBlockCSDecoder::get_col_datums( } } else if (OB_FAIL(decoders_[col_id].batch_decode(row_ids, row_cap, col_datums))) { LOG_WARN("fail to get datums from decoder", K(ret), K(col_id), K(row_cap), - "row_ids", common::ObArrayWrap(row_ids, row_cap)); + "row_ids", common::ObArrayWrap(row_ids, row_cap)); } return ret; } @@ -1789,7 +1789,7 @@ int ObMicroBlockCSDecoder::read_distinct( int ObMicroBlockCSDecoder::read_reference( const int32_t group_by_col, - const int64_t *row_ids, + const int32_t *row_ids, const int64_t row_cap, storage::ObGroupByCell &group_by_cell) const { @@ -1807,7 +1807,7 @@ int ObMicroBlockCSDecoder::read_reference( } int ObMicroBlockCSDecoder::get_group_by_aggregate_result( - const int64_t *row_ids, + const int32_t *row_ids, const char **cell_datas, const int64_t row_cap, storage::ObGroupByCell &group_by_cell) @@ -1847,7 +1847,7 @@ int ObMicroBlockCSDecoder::get_group_by_aggregate_result( int ObMicroBlockCSDecoder::get_rows( const common::ObIArray &cols, const common::ObIArray &col_params, - const int64_t *row_ids, + const int32_t *row_ids, const int64_t row_cap, const char **cell_datas, const int64_t vec_offset, diff --git a/src/storage/blocksstable/cs_encoding/ob_micro_block_cs_decoder.h b/src/storage/blocksstable/cs_encoding/ob_micro_block_cs_decoder.h index 1538ddd00..8935c291f 100644 --- a/src/storage/blocksstable/cs_encoding/ob_micro_block_cs_decoder.h +++ b/src/storage/blocksstable/cs_encoding/ob_micro_block_cs_decoder.h @@ -46,17 +46,17 @@ public: int quick_compare(const ObStorageDatum &left, const ObStorageDatumCmpFunc &cmp_func, const int64_t row_id, int32_t &cmp_ret); - int batch_decode(const int64_t *row_ids, const int64_t row_cap, common::ObDatum *datums); + int batch_decode(const int32_t *row_ids, const int64_t row_cap, common::ObDatum *datums); int decode_vector(ObVectorDecodeCtx &vector_ctx); int get_row_count( - const int64_t *row_ids, const int64_t row_cap, const bool contains_null, int64_t &count); + const int32_t *row_ids, const int64_t row_cap, const bool contains_null, int64_t &count); OB_INLINE int get_distinct_count(int64_t &distinct_cnt) const { return decoder_->get_distinct_count(*ctx_, distinct_cnt); } OB_INLINE int read_distinct( storage::ObGroupByCell &group_by_cell) const { return decoder_->read_distinct(*ctx_, group_by_cell); } OB_INLINE int read_reference( - const int64_t *row_ids, + const int32_t *row_ids, const int64_t row_cap, storage::ObGroupByCell &group_by_cell) const { return decoder_->read_reference(*ctx_, row_ids, row_cap, group_by_cell); } @@ -212,12 +212,12 @@ public: virtual int get_rows( const common::ObIArray &cols, const common::ObIArray &col_params, - const int64_t *row_ids, + const int32_t *row_ids, const char **cell_datas, const int64_t row_cap, common::ObIArray &datum_infos, const int64_t datum_offset = 0) override; - virtual int get_row_count(int32_t col_id, const int64_t *row_ids, const int64_t row_cap, + virtual int get_row_count(int32_t col_id, const int32_t *row_ids, const int64_t row_cap, const bool contains_null, const share::schema::ObColumnParam *col_param, int64_t &count) override final; virtual int64_t get_column_count() const override { @@ -246,21 +246,21 @@ public: const ObTableAccessContext &context, const int32_t col_offset, const share::schema::ObColumnParam &col_param, - const int64_t *row_ids, + const int32_t *row_ids, const int64_t row_cap, storage::ObAggDatumBuf &agg_datum_buf, ObAggCell &agg_cell) override; virtual int get_distinct_count(const int32_t group_by_col, int64_t &distinct_cnt) const override; virtual int read_distinct(const int32_t group_by_col, const char **cell_datas, storage::ObGroupByCell &group_by_cell) const override; - virtual int read_reference(const int32_t group_by_col, const int64_t *row_ids, + virtual int read_reference(const int32_t group_by_col, const int32_t *row_ids, const int64_t row_cap, storage::ObGroupByCell &group_by_cell) const override; - virtual int get_group_by_aggregate_result(const int64_t *row_ids, const char **cell_datas, + virtual int get_group_by_aggregate_result(const int32_t *row_ids, const char **cell_datas, const int64_t row_cap, storage::ObGroupByCell &group_by_cell) override; virtual int get_rows( const common::ObIArray &cols, const common::ObIArray &col_params, - const int64_t *row_ids, + const int32_t *row_ids, const int64_t row_cap, const char **cell_datas, const int64_t vec_offset, @@ -284,7 +284,7 @@ private: int get_row_impl(int64_t index, ObDatumRow &row); bool can_pushdown_decoder( const ObColumnCSDecoderCtx &ctx, - const int64_t *row_ids, + const int32_t *row_ids, const int64_t row_cap, const ObAggCell &agg_cell) const; OB_INLINE static const ObRowHeader &get_major_store_row_header() @@ -304,7 +304,7 @@ private: sql::ObWhiteFilterExecutor &filter, const sql::PushdownFilterInfo &pd_filter_info, const int32_t col_offset, const share::schema::ObColumnParam *col_param, ObStorageDatum &decoded_datum, common::ObBitmap &result_bitmap); - int get_col_datums(int32_t col_id, const int64_t *row_ids, const int64_t row_cap, common::ObDatum *col_datums); + int get_col_datums(int32_t col_id, const int32_t *row_ids, const int64_t row_cap, common::ObDatum *col_datums); int get_col_data(int32_t col_id, ObVectorDecodeCtx &ctx); private: diff --git a/src/storage/blocksstable/cs_encoding/ob_str_dict_column_decoder.cpp b/src/storage/blocksstable/cs_encoding/ob_str_dict_column_decoder.cpp index 7b4fc4d56..69726249d 100644 --- a/src/storage/blocksstable/cs_encoding/ob_str_dict_column_decoder.cpp +++ b/src/storage/blocksstable/cs_encoding/ob_str_dict_column_decoder.cpp @@ -23,7 +23,7 @@ namespace oceanbase namespace blocksstable { int ObStrDictColumnDecoder::decode( - const ObColumnCSDecoderCtx &ctx, const int64_t row_id, common::ObDatum &datum) const + const ObColumnCSDecoderCtx &ctx, const int32_t row_id, common::ObDatum &datum) const { int ret = OB_SUCCESS; const ObDictColumnDecoderCtx &dict_ctx = ctx.dict_ctx_; @@ -96,7 +96,7 @@ int ObStrDictColumnDecoder::decode_and_aggregate( return ret; } -int ObStrDictColumnDecoder::batch_decode(const ObColumnCSDecoderCtx &ctx, const int64_t *row_ids, +int ObStrDictColumnDecoder::batch_decode(const ObColumnCSDecoderCtx &ctx, const int32_t *row_ids, const int64_t row_cap, common::ObDatum *datums) const { int ret = OB_SUCCESS; diff --git a/src/storage/blocksstable/cs_encoding/ob_str_dict_column_decoder.h b/src/storage/blocksstable/cs_encoding/ob_str_dict_column_decoder.h index 73b9bce29..16ee499b7 100644 --- a/src/storage/blocksstable/cs_encoding/ob_str_dict_column_decoder.h +++ b/src/storage/blocksstable/cs_encoding/ob_str_dict_column_decoder.h @@ -30,8 +30,8 @@ public: ObStrDictColumnDecoder &operator=(const ObStrDictColumnDecoder &) = delete; virtual int decode( - const ObColumnCSDecoderCtx &ctx, const int64_t row_id, common::ObDatum &datum) const override; - virtual int batch_decode(const ObColumnCSDecoderCtx &ctx, const int64_t *row_ids, + const ObColumnCSDecoderCtx &ctx, const int32_t row_id, common::ObDatum &datum) const override; + virtual int batch_decode(const ObColumnCSDecoderCtx &ctx, const int32_t *row_ids, const int64_t row_cap, common::ObDatum *datums) const override; virtual int decode_vector(const ObColumnCSDecoderCtx &ctx, ObVectorDecodeCtx &vector_ctx) const override; virtual int decode_and_aggregate( diff --git a/src/storage/blocksstable/cs_encoding/ob_string_column_decoder.cpp b/src/storage/blocksstable/cs_encoding/ob_string_column_decoder.cpp index de02f3ebb..4c616ffbd 100644 --- a/src/storage/blocksstable/cs_encoding/ob_string_column_decoder.cpp +++ b/src/storage/blocksstable/cs_encoding/ob_string_column_decoder.cpp @@ -24,7 +24,7 @@ namespace blocksstable { int ObStringColumnDecoder::decode( - const ObColumnCSDecoderCtx &ctx, const int64_t row_id, common::ObDatum &datum) const + const ObColumnCSDecoderCtx &ctx, const int32_t row_id, common::ObDatum &datum) const { int ret = OB_SUCCESS; const ObStringColumnDecoderCtx &string_ctx = ctx.string_ctx_; @@ -51,7 +51,7 @@ int ObStringColumnDecoder::decode( } int ObStringColumnDecoder::batch_decode(const ObColumnCSDecoderCtx &ctx, - const int64_t *row_ids, const int64_t row_cap, common::ObDatum *datums) const + const int32_t *row_ids, const int64_t row_cap, common::ObDatum *datums) const { int ret = OB_SUCCESS; const ObStringColumnDecoderCtx &string_ctx = ctx.string_ctx_; @@ -93,7 +93,7 @@ int ObStringColumnDecoder::decode_vector( } int ObStringColumnDecoder::get_null_count(const ObColumnCSDecoderCtx &col_ctx, - const int64_t *row_ids, const int64_t row_cap, int64_t &null_count) const + const int32_t *row_ids, const int64_t row_cap, int64_t &null_count) const { int ret = OB_SUCCESS; if (OB_ISNULL(row_ids) || row_cap < 1) { diff --git a/src/storage/blocksstable/cs_encoding/ob_string_column_decoder.h b/src/storage/blocksstable/cs_encoding/ob_string_column_decoder.h index b42bd1b74..671a963b2 100644 --- a/src/storage/blocksstable/cs_encoding/ob_string_column_decoder.h +++ b/src/storage/blocksstable/cs_encoding/ob_string_column_decoder.h @@ -30,13 +30,13 @@ public: ObStringColumnDecoder &operator=(const ObStringColumnDecoder&) = delete; virtual int decode(const ObColumnCSDecoderCtx &ctx, - const int64_t row_id, common::ObDatum &datum) const override; - virtual int batch_decode(const ObColumnCSDecoderCtx &ctx, const int64_t *row_ids, + const int32_t row_id, common::ObDatum &datum) const override; + virtual int batch_decode(const ObColumnCSDecoderCtx &ctx, const int32_t *row_ids, const int64_t row_cap, common::ObDatum *datums) const override; virtual int decode_vector(const ObColumnCSDecoderCtx &ctx, ObVectorDecodeCtx &vector_ctx) const override; virtual int get_null_count(const ObColumnCSDecoderCtx &ctx, - const int64_t *row_ids, const int64_t row_cap, int64_t &null_count) const override; + const int32_t *row_ids, const int64_t row_cap, int64_t &null_count) const override; virtual int pushdown_operator( const sql::ObPushdownFilterExecutor *parent, diff --git a/src/storage/blocksstable/cs_encoding/ob_string_stream_decoder.cpp b/src/storage/blocksstable/cs_encoding/ob_string_stream_decoder.cpp index 469071e38..8f73f687d 100644 --- a/src/storage/blocksstable/cs_encoding/ob_string_stream_decoder.cpp +++ b/src/storage/blocksstable/cs_encoding/ob_string_stream_decoder.cpp @@ -33,7 +33,7 @@ static void process( const ObStringStreamDecoderCtx &str_ctx, const char *offset_arr_buf, const char *ref_data, - const int64_t *row_ids, + const int32_t *row_ids, const int64_t row_cap, common::ObDatum *datums) { @@ -55,7 +55,7 @@ static void process( const ObStringStreamDecoderCtx &str_ctx, const char *offset_data, const char *ref_data, - const int64_t *row_ids, + const int32_t *row_ids, const int64_t row_cap, common::ObDatum *datums) { @@ -105,7 +105,7 @@ static void process( const ObStringStreamDecoderCtx &str_ctx, const char *offset_data, const char *ref_data, - const int64_t *row_ids, + const int32_t *row_ids, const int64_t row_cap, common::ObDatum *datums) { @@ -151,7 +151,7 @@ static void process( const ObStringStreamDecoderCtx &str_ctx, const char *offset_data, const char *ref_data, - const int64_t *row_ids, + const int32_t *row_ids, const int64_t row_cap, common::ObDatum *datums) { @@ -196,7 +196,7 @@ static void process( const ObStringStreamDecoderCtx &str_ctx, const char *offset_data, const char *ref_data, - const int64_t *row_ids, + const int32_t *row_ids, const int64_t row_cap, common::ObDatum *datums) { @@ -251,7 +251,7 @@ static void process( const ObStringStreamDecoderCtx &str_ctx, const char *offset_data, const char *ref_data, - const int64_t *row_ids, + const int32_t *row_ids, const int64_t row_cap, common::ObDatum *datums) { @@ -293,7 +293,7 @@ static void process( const ObStringStreamDecoderCtx &str_ctx, const char *offset_data, const char *ref_data, - const int64_t *row_ids, + const int32_t *row_ids, const int64_t row_cap, common::ObDatum *datums) { @@ -332,7 +332,7 @@ static void process( const ObStringStreamDecoderCtx &str_ctx, const char *offset_data, const char *ref_data, - const int64_t *row_ids, + const int32_t *row_ids, const int64_t row_cap, common::ObDatum *datums) { @@ -354,7 +354,7 @@ static void process( const ObStringStreamDecoderCtx &str_ctx, const char *offset_data, const char *ref_data, - const int64_t *row_ids, + const int32_t *row_ids, const int64_t row_cap, common::ObDatum *datums) { diff --git a/src/storage/blocksstable/cs_encoding/ob_string_stream_decoder.h b/src/storage/blocksstable/cs_encoding/ob_string_stream_decoder.h index 549fdc070..94b29aac2 100644 --- a/src/storage/blocksstable/cs_encoding/ob_string_stream_decoder.h +++ b/src/storage/blocksstable/cs_encoding/ob_string_stream_decoder.h @@ -28,7 +28,7 @@ typedef void (*ConvertStringToDatumFunc)( const ObStringStreamDecoderCtx &str_ctx, const char *offset_data, const char *ref_data, - const int64_t *row_ids, + const int32_t *row_ids, const int64_t row_cap_or_id, common::ObDatum *datums); diff --git a/src/storage/blocksstable/encoding/ob_const_decoder.cpp b/src/storage/blocksstable/encoding/ob_const_decoder.cpp index 44747bab6..fe9b24783 100644 --- a/src/storage/blocksstable/encoding/ob_const_decoder.cpp +++ b/src/storage/blocksstable/encoding/ob_const_decoder.cpp @@ -139,7 +139,7 @@ int ObConstDecoder::update_pointer(const char *old_block, const char *cur_block) int ObConstDecoder::batch_decode( const ObColumnDecoderCtx &ctx, const ObIRowIndex* row_index, - const int64_t *row_ids, + const int32_t *row_ids, const char **cell_datas, const int64_t row_cap, common::ObDatum *datums) const @@ -301,7 +301,7 @@ int ObConstDecoder::batch_decode_without_dict( int ObConstDecoder::get_null_count( const ObColumnDecoderCtx &ctx, const ObIRowIndex *row_index, - const int64_t *row_ids, + const int32_t *row_ids, const int64_t row_cap, int64_t &null_count) const { @@ -876,7 +876,7 @@ int ObConstDecoder::set_res_with_bitset( template int ObConstDecoder::extract_ref_and_null_count( - const int64_t *row_ids, + const int32_t *row_ids, const int64_t row_cap, T len_arr, int64_t &null_count, @@ -994,7 +994,7 @@ int ObConstDecoder::read_distinct( int ObConstDecoder::read_reference( const ObColumnDecoderCtx &ctx, - const int64_t *row_ids, + const int32_t *row_ids, const int64_t row_cap, storage::ObGroupByCell &group_by_cell) const { diff --git a/src/storage/blocksstable/encoding/ob_const_decoder.h b/src/storage/blocksstable/encoding/ob_const_decoder.h index 0101ed805..a35373923 100644 --- a/src/storage/blocksstable/encoding/ob_const_decoder.h +++ b/src/storage/blocksstable/encoding/ob_const_decoder.h @@ -54,7 +54,7 @@ public: virtual int batch_decode( const ObColumnDecoderCtx &ctx, const ObIRowIndex* row_index, - const int64_t *row_ids, + const int32_t *row_ids, const char **cell_datas, const int64_t row_cap, common::ObDatum *datums) const override; @@ -66,7 +66,7 @@ public: virtual int get_null_count( const ObColumnDecoderCtx &ctx, const ObIRowIndex *row_index, - const int64_t *row_ids, + const int32_t *row_ids, const int64_t row_cap, int64_t &null_count) const override; @@ -88,7 +88,7 @@ public: virtual int read_reference( const ObColumnDecoderCtx &ctx, - const int64_t *row_ids, + const int32_t *row_ids, const int64_t row_cap, storage::ObGroupByCell &group_by_cell) const override; protected: @@ -149,7 +149,7 @@ private: template int extract_ref_and_null_count( - const int64_t *row_ids, + const int32_t *row_ids, const int64_t row_cap, T len_arr, int64_t &null_count, diff --git a/src/storage/blocksstable/encoding/ob_dict_decoder.cpp b/src/storage/blocksstable/encoding/ob_dict_decoder.cpp index 5285066f8..1b821b55f 100644 --- a/src/storage/blocksstable/encoding/ob_dict_decoder.cpp +++ b/src/storage/blocksstable/encoding/ob_dict_decoder.cpp @@ -34,7 +34,7 @@ struct DictFixBatchDecodeFunc_T const char *ref_data, const char *base_data, const int64_t fixed_len, const int64_t dict_cnt, - const int64_t *row_ids, const int64_t row_cap, + const int32_t *row_ids, const int64_t row_cap, common::ObDatum *datums) { typedef typename ObEncodingTypeInference::Type RefType; @@ -63,7 +63,7 @@ struct DictFixBatchDecodeFunc_T::Type RefType; @@ -339,7 +339,7 @@ int ObDictDecoder::decode(const common::ObObjType &obj_type, common::ObDatum &da } int ObDictDecoder::batch_get_bitpacked_refs( - const int64_t *row_ids, + const int32_t *row_ids, const int64_t row_cap, const unsigned char *col_data, common::ObDatum *datums) const @@ -363,7 +363,7 @@ int ObDictDecoder::batch_get_bitpacked_refs( } int ObDictDecoder::batch_get_null_count( - const int64_t *row_ids, + const int32_t *row_ids, const int64_t row_cap, const unsigned char *col_data, int64_t &null_count) const @@ -394,7 +394,7 @@ int ObDictDecoder::batch_get_null_count( int ObDictDecoder::batch_decode( const ObColumnDecoderCtx &ctx, const ObIRowIndex* row_index, - const int64_t *row_ids, + const int32_t *row_ids, const char **cell_datas, const int64_t row_cap, common::ObDatum *datums) const @@ -758,7 +758,7 @@ bool ObDictDecoder::fast_string_equal_valid( int ObDictDecoder::get_null_count( const ObColumnDecoderCtx &ctx, const ObIRowIndex *row_index, - const int64_t *row_ids, + const int32_t *row_ids, const int64_t row_cap, int64_t &null_count) const { @@ -1745,7 +1745,7 @@ int ObDictDecoder::batch_read_distinct( int ObDictDecoder::read_reference( const ObColumnDecoderCtx &ctx, - const int64_t *row_ids, + const int32_t *row_ids, const int64_t row_cap, storage::ObGroupByCell &group_by_cell) const { diff --git a/src/storage/blocksstable/encoding/ob_dict_decoder.h b/src/storage/blocksstable/encoding/ob_dict_decoder.h index e868a0db0..51bfe3da2 100644 --- a/src/storage/blocksstable/encoding/ob_dict_decoder.h +++ b/src/storage/blocksstable/encoding/ob_dict_decoder.h @@ -34,14 +34,14 @@ typedef void (*dict_var_batch_decode_func)( const char *base_data, const char *base_data_end, const int64_t dict_cnt, - const int64_t *row_ids, const int64_t row_cap, + const int32_t *row_ids, const int64_t row_cap, common::ObDatum *datums); typedef void (*dict_fix_batch_decode_func)( const char *ref_data, const char *base_data, const int64_t fixed_len, const int64_t dict_cnt, - const int64_t *row_ids, const int64_t row_cap, + const int32_t *row_ids, const int64_t row_cap, common::ObDatum *datums); typedef void (*dict_cmp_ref_func)( @@ -71,7 +71,7 @@ public: virtual int batch_decode( const ObColumnDecoderCtx &ctx, const ObIRowIndex* row_index, - const int64_t *row_ids, + const int32_t *row_ids, const char **cell_datas, const int64_t row_cap, common::ObDatum *datums) const override; @@ -84,7 +84,7 @@ public: virtual int get_null_count( const ObColumnDecoderCtx &ctx, const ObIRowIndex *row_index, - const int64_t *row_ids, + const int32_t *row_ids, const int64_t row_cap, int64_t &null_count) const override; @@ -141,7 +141,7 @@ public: virtual int read_reference( const ObColumnDecoderCtx &ctx, - const int64_t *row_ids, + const int32_t *row_ids, const int64_t row_cap, storage::ObGroupByCell &group_by_cell) const override; int batch_read_distinct( @@ -172,13 +172,13 @@ private: // unpacked refs should be stores in datums.pack_ int batch_get_bitpacked_refs( - const int64_t *row_ids, + const int32_t *row_ids, const int64_t row_cap, const unsigned char *col_data, common::ObDatum *datums) const; int batch_get_null_count( - const int64_t *row_ids, + const int32_t *row_ids, const int64_t row_cap, const unsigned char *col_data, int64_t &null_count) const; @@ -313,7 +313,7 @@ template struct ObFixedDictDataLocator_T { explicit ObFixedDictDataLocator_T( - const int64_t *row_ids, + const int32_t *row_ids, const char *dict_payload, const int64_t dict_len, const int64_t dict_cnt, @@ -338,7 +338,7 @@ struct ObFixedDictDataLocator_T is_null = ref == dict_cnt_; } - const int64_t *__restrict row_ids_; + const int32_t *__restrict row_ids_; const char *__restrict dict_payload_; const int64_t dict_cnt_; const int64_t dict_len_; @@ -349,7 +349,7 @@ template struct ObVarDictDataLocator_T { explicit ObVarDictDataLocator_T( - const int64_t *row_ids, + const int32_t *row_ids, const char *dict_payload, const int64_t last_dict_entry_len, const int64_t dict_cnt, @@ -376,7 +376,7 @@ struct ObVarDictDataLocator_T is_null = ref == dict_cnt_; } - const int64_t *__restrict row_ids_; + const int32_t *__restrict row_ids_; const char *__restrict dict_payload_; const int64_t dict_cnt_; const int64_t last_dict_entry_len_; diff --git a/src/storage/blocksstable/encoding/ob_hex_string_decoder.cpp b/src/storage/blocksstable/encoding/ob_hex_string_decoder.cpp index a4822b119..49b59a246 100644 --- a/src/storage/blocksstable/encoding/ob_hex_string_decoder.cpp +++ b/src/storage/blocksstable/encoding/ob_hex_string_decoder.cpp @@ -141,7 +141,7 @@ int ObHexStringDecoder::update_pointer(const char *old_block, const char *cur_bl int ObHexStringDecoder::batch_decode( const ObColumnDecoderCtx &ctx, const ObIRowIndex* row_index, - const int64_t *row_ids, + const int32_t *row_ids, const char **cell_datas, const int64_t row_cap, common::ObDatum *datums) const diff --git a/src/storage/blocksstable/encoding/ob_hex_string_decoder.h b/src/storage/blocksstable/encoding/ob_hex_string_decoder.h index ea7303440..26811a5b0 100644 --- a/src/storage/blocksstable/encoding/ob_hex_string_decoder.h +++ b/src/storage/blocksstable/encoding/ob_hex_string_decoder.h @@ -53,7 +53,7 @@ public: virtual int batch_decode( const ObColumnDecoderCtx &ctx, const ObIRowIndex* row_index, - const int64_t *row_ids, + const int32_t *row_ids, const char **cell_datas, const int64_t row_cap, common::ObDatum *datums) const override; diff --git a/src/storage/blocksstable/encoding/ob_icolumn_decoder.cpp b/src/storage/blocksstable/encoding/ob_icolumn_decoder.cpp index 3b4e14b7a..ebe6b3868 100644 --- a/src/storage/blocksstable/encoding/ob_icolumn_decoder.cpp +++ b/src/storage/blocksstable/encoding/ob_icolumn_decoder.cpp @@ -111,7 +111,7 @@ int ObIColumnDecoder::get_is_null_bitmap_from_var_column( int ObIColumnDecoder::set_null_datums_from_fixed_column( const ObColumnDecoderCtx &ctx, - const int64_t *row_ids, + const int32_t *row_ids, const int64_t row_cap, const unsigned char *col_data, common::ObDatum *datums) const @@ -135,7 +135,7 @@ int ObIColumnDecoder::set_null_datums_from_fixed_column( int ObIColumnDecoder::set_null_datums_from_var_column( const ObColumnDecoderCtx &ctx, const ObIRowIndex* row_index, - const int64_t *row_ids, + const int32_t *row_ids, const int64_t row_cap, common::ObDatum *datums) const { @@ -164,7 +164,7 @@ int ObIColumnDecoder::set_null_datums_from_var_column( int ObIColumnDecoder::set_null_vector_from_fixed_column( const ObColumnDecoderCtx &ctx, - const int64_t *row_ids, + const int32_t *row_ids, const int64_t row_cap, const int64_t vec_offset, const unsigned char *col_data, @@ -220,7 +220,7 @@ int ObIColumnDecoder::batch_locate_var_len_row( int ObIColumnDecoder::get_null_count( const ObColumnDecoderCtx &ctx, const ObIRowIndex *row_index, - const int64_t *row_ids, + const int32_t *row_ids, const int64_t row_cap, int64_t &null_count) const { @@ -250,7 +250,7 @@ int ObIColumnDecoder::get_null_count( int ObIColumnDecoder::get_null_count_from_fixed_column( const ObColumnDecoderCtx &ctx, - const int64_t *row_ids, + const int32_t *row_ids, const int64_t row_cap, const unsigned char *col_data, int64_t &null_count) const @@ -273,7 +273,7 @@ int ObIColumnDecoder::get_null_count_from_fixed_column( int ObIColumnDecoder::get_null_count_from_var_column( const ObColumnDecoderCtx &ctx, const ObIRowIndex* row_index, - const int64_t *row_ids, + const int32_t *row_ids, const int64_t row_cap, int64_t &null_count) const { @@ -302,7 +302,7 @@ int ObIColumnDecoder::get_null_count_from_var_column( int ObIColumnDecoder::get_null_count_from_extend_value( const ObColumnDecoderCtx &ctx, const ObIRowIndex* row_index, - const int64_t *row_ids, + const int32_t *row_ids, const int64_t row_cap, const char *meta_data_, int64_t &null_count) const @@ -486,7 +486,7 @@ int ObSpanColumnDecoder::decode_refed_range( { int ret = OB_SUCCESS; const int64_t row_cap = ref_end_idx - ref_start_idx + 1; - const int64_t *row_id_arr = raw_vector_ctx.row_ids_ + ref_start_idx; + const int32_t *row_id_arr = raw_vector_ctx.row_ids_ + ref_start_idx; const char **ptr_arr = raw_vector_ctx.ptr_arr_ + ref_start_idx; uint32_t *len_arr = raw_vector_ctx.len_arr_ + ref_start_idx; const int64_t vec_offset = raw_vector_ctx.vec_offset_ + ref_start_idx; diff --git a/src/storage/blocksstable/encoding/ob_icolumn_decoder.h b/src/storage/blocksstable/encoding/ob_icolumn_decoder.h index d96842877..69a1ca023 100644 --- a/src/storage/blocksstable/encoding/ob_icolumn_decoder.h +++ b/src/storage/blocksstable/encoding/ob_icolumn_decoder.h @@ -99,7 +99,7 @@ struct ObVectorDecodeCtx explicit ObVectorDecodeCtx( const char **ptr_arr, uint32_t *len_arr, - const int64_t *row_ids, + const int32_t *row_ids, const int64_t row_cap, const int64_t vec_offset, sql::VectorHeader &vec_header) @@ -128,7 +128,7 @@ struct ObVectorDecodeCtx const char **ptr_arr_; // tmp mem buf as pointer array uint32_t *len_arr_; // tmp mem buf as 4-byte array - const int64_t *row_ids_; // projection row-ids + const int32_t *row_ids_; // projection row-ids const int64_t row_cap_; // batch size / array size const int64_t vec_offset_; // vector start projection offset sql::VectorHeader &vec_header_; // result @@ -174,7 +174,7 @@ public: virtual int batch_decode( const ObColumnDecoderCtx &ctx, const ObIRowIndex* row_index, - const int64_t *row_ids, + const int32_t *row_ids, const char **cell_datas, const int64_t row_cap, common::ObDatum *datums) const @@ -234,7 +234,7 @@ public: OB_INLINE virtual int batch_locate_row_data( const ObColumnDecoderCtx &col_ctx, const ObIRowIndex *row_index, - const int64_t *row_ids, + const int32_t *row_ids, const int64_t row_cap, const char **row_datas, common::ObDatum *datums) const; @@ -253,7 +253,7 @@ public: virtual int set_null_datums_from_fixed_column( const ObColumnDecoderCtx &ctx, - const int64_t *row_ids, + const int32_t *row_ids, const int64_t row_cap, const unsigned char *col_data, common::ObDatum *datums) const; @@ -261,13 +261,13 @@ public: virtual int set_null_datums_from_var_column( const ObColumnDecoderCtx &ctx, const ObIRowIndex* row_index, - const int64_t *row_ids, + const int32_t *row_ids, const int64_t row_cap, common::ObDatum *datums) const; virtual int set_null_vector_from_fixed_column( const ObColumnDecoderCtx &ctx, - const int64_t *row_ids, + const int32_t *row_ids, const int64_t row_cap, const int64_t vec_offset, const unsigned char *col_data, @@ -282,13 +282,13 @@ public: virtual int get_null_count( const ObColumnDecoderCtx &ctx, const ObIRowIndex *row_index, - const int64_t *row_ids, + const int32_t *row_ids, const int64_t row_cap, int64_t &null_count) const; virtual int get_null_count_from_fixed_column( const ObColumnDecoderCtx &ctx, - const int64_t *row_ids, + const int32_t *row_ids, const int64_t row_cap, const unsigned char *col_data, int64_t &null_count) const; @@ -296,7 +296,7 @@ public: virtual int get_null_count_from_var_column( const ObColumnDecoderCtx &ctx, const ObIRowIndex* row_index, - const int64_t *row_ids, + const int32_t *row_ids, const int64_t row_cap, int64_t &null_count) const; @@ -317,7 +317,7 @@ public: virtual int read_reference( const ObColumnDecoderCtx &ctx, - const int64_t *row_ids, + const int32_t *row_ids, const int64_t row_cap, storage::ObGroupByCell &group_by_cell) const { return OB_NOT_SUPPORTED; } @@ -326,7 +326,7 @@ protected: int get_null_count_from_extend_value( const ObColumnDecoderCtx &ctx, const ObIRowIndex *row_index, - const int64_t *row_ids, + const int32_t *row_ids, const int64_t row_cap, const char *meta_data_, int64_t &null_count) const; @@ -338,7 +338,7 @@ protected: const Header &header, const char **data_arr, uint32_t *len_arr, - const int64_t *row_ids, + const int32_t *row_ids, const int64_t row_cap); template @@ -419,7 +419,7 @@ OB_INLINE int ObIColumnDecoder::locate_row_data( OB_INLINE int ObIColumnDecoder::batch_locate_row_data( const ObColumnDecoderCtx &col_ctx, const ObIRowIndex *row_index, - const int64_t *row_ids, + const int32_t *row_ids, const int64_t row_cap, const char **row_datas, common::ObDatum *datums) const @@ -443,7 +443,7 @@ int ObIColumnDecoder::batch_locate_cell_data( const Header &header, const char **data_arr, uint32_t *len_arr, - const int64_t *row_ids, + const int32_t *row_ids, const int64_t row_cap) { // for var-length data, nullptr == data_arr[row_id] represent for this row is null diff --git a/src/storage/blocksstable/encoding/ob_imicro_block_decoder.h b/src/storage/blocksstable/encoding/ob_imicro_block_decoder.h index e180fb6a7..4ca62d305 100644 --- a/src/storage/blocksstable/encoding/ob_imicro_block_decoder.h +++ b/src/storage/blocksstable/encoding/ob_imicro_block_decoder.h @@ -43,7 +43,7 @@ public: common::ObBitmap &result_bitmap) = 0; virtual int get_rows(const common::ObIArray &cols, const common::ObIArray &col_params, - const int64_t *row_ids, const char **cell_datas, const int64_t row_cap, + const int32_t *row_ids, const char **cell_datas, const int64_t row_cap, common::ObIArray &datum_infos, const int64_t datum_offset = 0) = 0; virtual bool can_apply_black(const common::ObIArray &col_offsets) const = 0; virtual int filter_black_filter_batch(const sql::ObPushdownFilterExecutor *parent, @@ -58,7 +58,7 @@ public: virtual int get_rows( const common::ObIArray &cols, const common::ObIArray &col_params, - const int64_t *row_ids, + const int32_t *row_ids, const int64_t row_cap, const char **cell_datas, const int64_t vec_offset, diff --git a/src/storage/blocksstable/encoding/ob_integer_base_diff_decoder.cpp b/src/storage/blocksstable/encoding/ob_integer_base_diff_decoder.cpp index 2930fdd0d..cbcdcede0 100644 --- a/src/storage/blocksstable/encoding/ob_integer_base_diff_decoder.cpp +++ b/src/storage/blocksstable/encoding/ob_integer_base_diff_decoder.cpp @@ -130,7 +130,7 @@ int ObIntegerBaseDiffDecoder::update_pointer(const char *old_block, const char * int ObIntegerBaseDiffDecoder::batch_get_bitpacked_values( const ObColumnDecoderCtx &ctx, - const int64_t *row_ids, + const int32_t *row_ids, const int64_t row_cap, const int64_t datum_len, const int64_t data_offset, @@ -163,7 +163,7 @@ int ObIntegerBaseDiffDecoder::batch_get_bitpacked_values( int ObIntegerBaseDiffDecoder::batch_decode( const ObColumnDecoderCtx &ctx, const ObIRowIndex* row_index, - const int64_t *row_ids, + const int32_t *row_ids, const char **cell_datas, const int64_t row_cap, common::ObDatum *datums) const @@ -763,7 +763,7 @@ int ObIntegerBaseDiffDecoder::traverse_all_data( int ObIntegerBaseDiffDecoder::get_null_count( const ObColumnDecoderCtx &ctx, const ObIRowIndex *row_index, - const int64_t *row_ids, + const int32_t *row_ids, const int64_t row_cap, int64_t &null_count) const { diff --git a/src/storage/blocksstable/encoding/ob_integer_base_diff_decoder.h b/src/storage/blocksstable/encoding/ob_integer_base_diff_decoder.h index 076473443..6e25b8899 100644 --- a/src/storage/blocksstable/encoding/ob_integer_base_diff_decoder.h +++ b/src/storage/blocksstable/encoding/ob_integer_base_diff_decoder.h @@ -53,7 +53,7 @@ public: virtual int batch_decode( const ObColumnDecoderCtx &ctx, const ObIRowIndex* row_index, - const int64_t *row_ids, + const int32_t *row_ids, const char **cell_datas, const int64_t row_cap, common::ObDatum *datums) const override; @@ -73,13 +73,13 @@ public: virtual int get_null_count( const ObColumnDecoderCtx &ctx, const ObIRowIndex *row_index, - const int64_t *row_ids, + const int32_t *row_ids, const int64_t row_cap, int64_t &null_count) const override; private: int batch_get_bitpacked_values( const ObColumnDecoderCtx &ctx, - const int64_t *row_ids, + const int32_t *row_ids, const int64_t row_cap, const int64_t datum_len, const int64_t data_offset, diff --git a/src/storage/blocksstable/encoding/ob_inter_column_substring_decoder.cpp b/src/storage/blocksstable/encoding/ob_inter_column_substring_decoder.cpp index 22677571f..fdae8f879 100644 --- a/src/storage/blocksstable/encoding/ob_inter_column_substring_decoder.cpp +++ b/src/storage/blocksstable/encoding/ob_inter_column_substring_decoder.cpp @@ -154,7 +154,7 @@ int ObInterColSubStrDecoder::inner_decode_vector( if (col_ref_end_idx >= col_ref_start_idx) { // decode previous referenced range const int64_t row_cap = col_ref_end_idx - col_ref_start_idx + 1; - const int64_t *row_id_arr = vector_ctx.row_ids_ + col_ref_start_idx; + const int32_t *row_id_arr = vector_ctx.row_ids_ + col_ref_start_idx; const char **ptr_arr = vector_ctx.ptr_arr_ + col_ref_start_idx; uint32_t *len_arr = vector_ctx.len_arr_ + col_ref_start_idx; const int64_t vec_offset = vector_ctx.vec_offset_ + col_ref_start_idx; @@ -187,7 +187,7 @@ int ObInterColSubStrDecoder::inner_decode_vector( if (OB_SUCC(ret) && col_ref_end_idx >= col_ref_start_idx) { // decode last referenced range const int64_t row_cap = col_ref_end_idx - col_ref_start_idx + 1; - const int64_t *row_id_arr = vector_ctx.row_ids_ + col_ref_start_idx; + const int32_t *row_id_arr = vector_ctx.row_ids_ + col_ref_start_idx; const char **ptr_arr = vector_ctx.ptr_arr_ + col_ref_start_idx; uint32_t *len_arr = vector_ctx.len_arr_ + col_ref_start_idx; const int64_t vec_offset = vector_ctx.vec_offset_ + col_ref_start_idx; diff --git a/src/storage/blocksstable/encoding/ob_micro_block_decoder.cpp b/src/storage/blocksstable/encoding/ob_micro_block_decoder.cpp index 08e9a6ebc..00a6d1d10 100644 --- a/src/storage/blocksstable/encoding/ob_micro_block_decoder.cpp +++ b/src/storage/blocksstable/encoding/ob_micro_block_decoder.cpp @@ -152,7 +152,7 @@ int ObColumnDecoder::decode(common::ObDatum &datum, const int64_t row_id, int ObColumnDecoder::batch_decode( const ObIRowIndex *row_index, - const int64_t *row_ids, + const int32_t *row_ids, const char **cell_datas, const int64_t row_cap, common::ObDatum *datums) @@ -175,7 +175,7 @@ int ObColumnDecoder::batch_decode( } LOG_DEBUG("[Batch decode] Batch decoded datums: ", - K(ret), K(row_cap), K(*ctx_), K(ObArrayWrap(row_ids, row_cap)), + K(ret), K(row_cap), K(*ctx_), K(ObArrayWrap(row_ids, row_cap)), K(ObArrayWrap(datums, row_cap))); return ret; } @@ -197,7 +197,7 @@ int ObColumnDecoder::decode_vector( int ObColumnDecoder::get_row_count( const ObIRowIndex *row_index, - const int64_t *row_ids, + const int32_t *row_ids, const int64_t row_cap, const bool contains_null, int64_t &count) @@ -1948,7 +1948,7 @@ int ObMicroBlockDecoder::filter_black_filter_batch( int ObMicroBlockDecoder::get_rows( const common::ObIArray &cols, const common::ObIArray &col_params, - const int64_t *row_ids, + const int32_t *row_ids, const char **cell_datas, const int64_t row_cap, common::ObIArray &datum_infos, @@ -1989,7 +1989,7 @@ int ObMicroBlockDecoder::get_rows( int ObMicroBlockDecoder::get_row_count( int32_t col_id, - const int64_t *row_ids, + const int32_t *row_ids, const int64_t row_cap, const bool contains_null, const share::schema::ObColumnParam *col_param, @@ -2008,7 +2008,7 @@ int ObMicroBlockDecoder::get_row_count( contains_null, count))) { LOG_WARN("fail to get datums from decoder", K(ret), K(col_id), K(row_cap), - "row_ids", common::ObArrayWrap(row_ids, row_cap)); + "row_ids", common::ObArrayWrap(row_ids, row_cap)); } return ret; } @@ -2018,7 +2018,7 @@ int ObMicroBlockDecoder::get_aggregate_result( const ObTableAccessContext &context, const int32_t col_offset, const share::schema::ObColumnParam &col_param, - const int64_t *row_ids, + const int32_t *row_ids, const int64_t row_cap, storage::ObAggDatumBuf &agg_datum_buf, storage::ObAggCell &agg_cell) @@ -2050,7 +2050,7 @@ int ObMicroBlockDecoder::get_aggregate_result( int ObMicroBlockDecoder::get_col_datums( int32_t col_id, - const int64_t *row_ids, + const int32_t *row_ids, const char **cell_datas, const int64_t row_cap, common::ObDatum *col_datums) @@ -2084,7 +2084,7 @@ int ObMicroBlockDecoder::get_col_datums( row_cap, col_datums))) { LOG_WARN("fail to get datums from decoder", K(ret), K(col_id), K(row_cap), - "row_ids", common::ObArrayWrap(row_ids, row_cap)); + "row_ids", common::ObArrayWrap(row_ids, row_cap)); } return ret; } @@ -2160,7 +2160,7 @@ int ObMicroBlockDecoder::read_distinct( int ObMicroBlockDecoder::read_reference( const int32_t group_by_col, - const int64_t *row_ids, + const int32_t *row_ids, const int64_t row_cap, storage::ObGroupByCell &group_by_cell) const { @@ -2178,7 +2178,7 @@ int ObMicroBlockDecoder::read_reference( } int ObMicroBlockDecoder::get_group_by_aggregate_result( - const int64_t *row_ids, + const int32_t *row_ids, const char **cell_datas, const int64_t row_cap, storage::ObGroupByCell &group_by_cell) @@ -2216,7 +2216,7 @@ int ObMicroBlockDecoder::get_group_by_aggregate_result( int ObMicroBlockDecoder::get_rows( const common::ObIArray &cols, const common::ObIArray &col_params, - const int64_t *row_ids, + const int32_t *row_ids, const int64_t row_cap, const char **cell_datas, const int64_t vec_offset, diff --git a/src/storage/blocksstable/encoding/ob_micro_block_decoder.h b/src/storage/blocksstable/encoding/ob_micro_block_decoder.h index 546d436b7..38ff0e737 100644 --- a/src/storage/blocksstable/encoding/ob_micro_block_decoder.h +++ b/src/storage/blocksstable/encoding/ob_micro_block_decoder.h @@ -52,7 +52,7 @@ public: int batch_decode( const ObIRowIndex *row_index, - const int64_t *row_ids, + const int32_t *row_ids, const char **cell_datas, const int64_t row_cap, common::ObDatum *datums); @@ -63,7 +63,7 @@ public: int get_row_count( const ObIRowIndex *row_index, - const int64_t *row_ids, + const int32_t *row_ids, const int64_t row_cap, const bool contains_null, int64_t &count); @@ -75,7 +75,7 @@ public: storage::ObGroupByCell &group_by_cell) const { return decoder_->read_distinct(*ctx_, cell_datas, group_by_cell); } OB_INLINE int read_reference( - const int64_t *row_ids, + const int32_t *row_ids, const int64_t row_cap, storage::ObGroupByCell &group_by_cell) const { return decoder_->read_reference(*ctx_, row_ids, row_cap, group_by_cell); } @@ -299,14 +299,14 @@ public: virtual int get_rows( const common::ObIArray &cols, const common::ObIArray &col_params, - const int64_t *row_ids, + const int32_t *row_ids, const char **cell_datas, const int64_t row_cap, common::ObIArray &datum_infos, const int64_t datum_offset = 0) override; virtual int get_row_count( int32_t col_id, - const int64_t *row_ids, + const int32_t *row_ids, const int64_t row_cap, const bool contains_null, const share::schema::ObColumnParam *col_param, @@ -316,7 +316,7 @@ public: const ObTableAccessContext &context, const int32_t col_offset, const share::schema::ObColumnParam &col_param, - const int64_t *row_ids, + const int32_t *row_ids, const int64_t row_cap, storage::ObAggDatumBuf &agg_datum_buf, storage::ObAggCell &agg_cell) override; @@ -350,18 +350,18 @@ public: storage::ObGroupByCell &group_by_cell) const override; virtual int read_reference( const int32_t group_by_col, - const int64_t *row_ids, + const int32_t *row_ids, const int64_t row_cap, storage::ObGroupByCell &group_by_cell) const override; virtual int get_group_by_aggregate_result( - const int64_t *row_ids, + const int32_t *row_ids, const char **cell_datas, const int64_t row_cap, storage::ObGroupByCell &group_by_cell) override; virtual int get_rows( const common::ObIArray &cols, const common::ObIArray &col_params, - const int64_t *row_ids, + const int32_t *row_ids, const int64_t row_cap, const char **cell_datas, const int64_t vec_offset, @@ -389,7 +389,7 @@ private: ObStorageDatum *datums); int get_col_datums(int32_t col_id, - const int64_t *row_ids, + const int32_t *row_ids, const char **cell_datas, const int64_t row_cap, common::ObDatum *col_datums); diff --git a/src/storage/blocksstable/encoding/ob_raw_decoder.cpp b/src/storage/blocksstable/encoding/ob_raw_decoder.cpp index d76abbc12..ba3328116 100644 --- a/src/storage/blocksstable/encoding/ob_raw_decoder.cpp +++ b/src/storage/blocksstable/encoding/ob_raw_decoder.cpp @@ -63,7 +63,7 @@ struct RawVarBatchDecodeFunc_T const int64_t header_off, const int64_t header_len, const int64_t header_var_col_cnt, - const int64_t *row_ids, const int64_t row_cap, + const int32_t *row_ids, const int64_t row_cap, common::ObDatum *datums) { typedef typename ObEncodingByteLenMap::Type RowIdxType; @@ -138,7 +138,7 @@ struct RawFixBatchDecodeFunc_T static void raw_fix_batch_decode_func( const int64_t col_len, const char *base_data, - const int64_t *row_ids, + const int32_t *row_ids, const int64_t row_cap, common::ObDatum *datums) { @@ -162,7 +162,7 @@ struct RawFixBatchDecodeFunc_T static void raw_fix_batch_decode_func( const int64_t col_len, const char *base_data, - const int64_t *row_ids, + const int32_t *row_ids, const int64_t row_cap, common::ObDatum *datums) { @@ -353,7 +353,7 @@ int ObRawDecoder::update_pointer(const char *old_block, const char *cur_block) int ObRawDecoder::batch_decode( const ObColumnDecoderCtx &ctx, const ObIRowIndex* row_index, - const int64_t *row_ids, + const int32_t *row_ids, const char **cell_datas, const int64_t row_cap, common::ObDatum *datums) const @@ -403,7 +403,7 @@ bool ObRawDecoder::fast_decode_valid(const ObColumnDecoderCtx &ctx) const int ObRawDecoder::batch_decode_fast( const ObColumnDecoderCtx &ctx, const ObIRowIndex* row_index, - const int64_t *row_ids, + const int32_t *row_ids, const int64_t row_cap, common::ObDatum *datums) const { @@ -447,7 +447,7 @@ int ObRawDecoder::batch_decode_fast( int ObRawDecoder::batch_decode_general( const ObColumnDecoderCtx &ctx, const ObIRowIndex* row_index, - const int64_t *row_ids, + const int32_t *row_ids, const char **cell_datas, const int64_t row_cap, common::ObDatum *datums) const @@ -860,7 +860,7 @@ int ObRawDecoder::check_fast_filter_valid( int ObRawDecoder::get_null_count( const ObColumnDecoderCtx &ctx, const ObIRowIndex *row_index, - const int64_t *row_ids, + const int32_t *row_ids, const int64_t row_cap, int64_t &null_count) const { @@ -1043,7 +1043,7 @@ int ObRawDecoder::fast_datum_comparison_operator( ObDatumCmpFuncType cmp_func = filter.cmp_func_; // OPT: remove this rowid array usage by adding a new batch decode interface? - int64_t *row_ids = pd_filter_info.row_ids_; + int32_t *row_ids = pd_filter_info.row_ids_; int64_t evaluated_row_cnt = 0; while (OB_SUCC(ret) && evaluated_row_cnt < pd_filter_info.count_) { //decode and evaluate one batch diff --git a/src/storage/blocksstable/encoding/ob_raw_decoder.h b/src/storage/blocksstable/encoding/ob_raw_decoder.h index 6a577b7db..21f6605be 100644 --- a/src/storage/blocksstable/encoding/ob_raw_decoder.h +++ b/src/storage/blocksstable/encoding/ob_raw_decoder.h @@ -31,7 +31,7 @@ class ObIRowIndex; typedef void (*raw_fix_batch_decode_func)( const int64_t col_len, const char *base_data, - const int64_t *row_ids, + const int32_t *row_ids, const int64_t row_cap, common::ObDatum *datums); @@ -41,7 +41,7 @@ typedef void (*raw_var_batch_decode_func)( const int64_t header_off, const int64_t header_len, const int64_t header_var_col_cnt, - const int64_t *row_ids, + const int32_t *row_ids, const int64_t row_cap, common::ObDatum *datums); @@ -178,7 +178,7 @@ public: virtual int batch_decode( const ObColumnDecoderCtx &ctx, const ObIRowIndex* row_index, - const int64_t *row_ids, + const int32_t *row_ids, const char **cell_datas, const int64_t row_cap, common::ObDatum *datums) const override; @@ -200,7 +200,7 @@ public: virtual int get_null_count( const ObColumnDecoderCtx &ctx, const ObIRowIndex *row_index, - const int64_t *row_ids, + const int32_t *row_ids, const int64_t row_cap, int64_t &null_count) const override; virtual bool fast_decode_valid(const ObColumnDecoderCtx &ctx) const override; @@ -216,14 +216,14 @@ private: int batch_decode_general( const ObColumnDecoderCtx &ctx, const ObIRowIndex* row_index, - const int64_t *row_ids, + const int32_t *row_ids, const char **cell_datas, const int64_t row_cap, common::ObDatum *datums) const; int batch_decode_fast( const ObColumnDecoderCtx &ctx, const ObIRowIndex* row_index, - const int64_t *row_ids, + const int32_t *row_ids, const int64_t row_cap, common::ObDatum *datums) const; diff --git a/src/storage/blocksstable/encoding/ob_rle_decoder.cpp b/src/storage/blocksstable/encoding/ob_rle_decoder.cpp index d124b7859..72d2a14ea 100644 --- a/src/storage/blocksstable/encoding/ob_rle_decoder.cpp +++ b/src/storage/blocksstable/encoding/ob_rle_decoder.cpp @@ -75,7 +75,7 @@ int ObRLEDecoder::update_pointer(const char *old_block, const char *cur_block) int ObRLEDecoder::batch_decode( const ObColumnDecoderCtx &ctx, const ObIRowIndex* row_index, - const int64_t *row_ids, + const int32_t *row_ids, const char **cell_datas, const int64_t row_cap, common::ObDatum *datums) const @@ -138,7 +138,7 @@ int ObRLEDecoder::decode_vector( int ObRLEDecoder::get_null_count( const ObColumnDecoderCtx &ctx, const ObIRowIndex *row_index, - const int64_t *row_ids, + const int32_t *row_ids, const int64_t row_cap, int64_t &null_count) const { @@ -524,7 +524,7 @@ int ObRLEDecoder::set_res_with_bitset( template int ObRLEDecoder::extract_ref_and_null_count( - const int64_t *row_ids, + const int32_t *row_ids, const int64_t row_cap, T ref_buf, int64_t &null_count) const @@ -625,7 +625,7 @@ int ObRLEDecoder::read_distinct( int ObRLEDecoder::read_reference( const ObColumnDecoderCtx &ctx, - const int64_t *row_ids, + const int32_t *row_ids, const int64_t row_cap, storage::ObGroupByCell &group_by_cell) const { diff --git a/src/storage/blocksstable/encoding/ob_rle_decoder.h b/src/storage/blocksstable/encoding/ob_rle_decoder.h index e67ba6b34..a81ceb04a 100644 --- a/src/storage/blocksstable/encoding/ob_rle_decoder.h +++ b/src/storage/blocksstable/encoding/ob_rle_decoder.h @@ -49,7 +49,7 @@ public: virtual int batch_decode( const ObColumnDecoderCtx &ctx, const ObIRowIndex* row_index, - const int64_t *row_ids, + const int32_t *row_ids, const char **cell_datas, const int64_t row_cap, common::ObDatum *datums) const override; @@ -62,7 +62,7 @@ public: virtual int get_null_count( const ObColumnDecoderCtx &ctx, const ObIRowIndex *row_index, - const int64_t *row_ids, + const int32_t *row_ids, const int64_t row_cap, int64_t &null_count) const override; @@ -137,7 +137,7 @@ private: template int extract_ref_and_null_count( - const int64_t *row_ids, + const int32_t *row_ids, const int64_t row_cap, T ref_buf, int64_t &null_count) const; @@ -154,7 +154,7 @@ private: virtual int read_reference( const ObColumnDecoderCtx &ctx, - const int64_t *row_ids, + const int32_t *row_ids, const int64_t row_cap, storage::ObGroupByCell &group_by_cell) const override; diff --git a/src/storage/blocksstable/encoding/ob_row_index.h b/src/storage/blocksstable/encoding/ob_row_index.h index c13f18625..d6451465d 100644 --- a/src/storage/blocksstable/encoding/ob_row_index.h +++ b/src/storage/blocksstable/encoding/ob_row_index.h @@ -27,13 +27,13 @@ public: virtual int get(const int64_t row_id, const char *&data, int64_t &len) const = 0; // Batch read row data in row_datas, row_len in datums.len_ for batch decode virtual int batch_get( - const int64_t *row_ids, + const int32_t *row_ids, const int64_t row_cap, const bool has_ext, const char **row_datas, ObDatum *datums) const = 0; virtual int batch_get( - const int64_t *row_ids, + const int32_t *row_ids, const int64_t row_cap, const bool has_ext, const char **row_datas, @@ -50,13 +50,13 @@ public: const int64_t row_cnt, const int64_t index_byte); inline virtual int get(const int64_t row_id, const char *&data, int64_t &len) const; inline virtual int batch_get( - const int64_t *row_ids, + const int32_t *row_ids, const int64_t row_cap, const bool has_ext, const char **row_datas, ObDatum *datums) const; inline virtual int batch_get( - const int64_t *row_ids, + const int32_t *row_ids, const int64_t row_cap, const bool has_ext, const char **row_datas, @@ -78,13 +78,13 @@ public: inline int init(const char *data, const int64_t len, const int64_t row_cnt); inline virtual int get(const int64_t row_id, const char *&data, int64_t &len) const; inline virtual int batch_get( - const int64_t *row_ids, + const int32_t *row_ids, const int64_t row_cap, const bool has_ext, const char **row_datas, ObDatum *datums) const; inline virtual int batch_get( - const int64_t *row_ids, + const int32_t *row_ids, const int64_t row_cap, const bool has_ext, const char **row_datas, @@ -131,7 +131,7 @@ inline int ObVarRowIndex::get(const int64_t row_id, const char *&data, int64_t & } inline int ObVarRowIndex::batch_get( - const int64_t *row_ids, + const int32_t *row_ids, const int64_t row_cap, const bool has_ext, const char **row_datas, @@ -159,7 +159,7 @@ inline int ObVarRowIndex::batch_get( } inline int ObVarRowIndex::batch_get( - const int64_t *row_ids, + const int32_t *row_ids, const int64_t row_cap, const bool has_ext, const char **row_datas, @@ -214,7 +214,7 @@ inline int ObFixRowIndex::get(const int64_t row_id, const char *&data, int64_t & } inline int ObFixRowIndex::batch_get( - const int64_t *row_ids, + const int32_t *row_ids, const int64_t row_cap, const bool has_ext, const char **row_datas, @@ -241,7 +241,7 @@ inline int ObFixRowIndex::batch_get( } inline int ObFixRowIndex::batch_get( - const int64_t *row_ids, + const int32_t *row_ids, const int64_t row_cap, const bool has_ext, const char **row_datas, diff --git a/src/storage/blocksstable/encoding/ob_string_diff_decoder.cpp b/src/storage/blocksstable/encoding/ob_string_diff_decoder.cpp index 95a72ed5d..9db8e5755 100644 --- a/src/storage/blocksstable/encoding/ob_string_diff_decoder.cpp +++ b/src/storage/blocksstable/encoding/ob_string_diff_decoder.cpp @@ -141,7 +141,7 @@ int ObStringDiffDecoder::update_pointer(const char *old_block, const char *cur_b int ObStringDiffDecoder::batch_decode( const ObColumnDecoderCtx &ctx, const ObIRowIndex* row_index, - const int64_t *row_ids, + const int32_t *row_ids, const char **cell_datas, const int64_t row_cap, common::ObDatum *datums) const diff --git a/src/storage/blocksstable/encoding/ob_string_diff_decoder.h b/src/storage/blocksstable/encoding/ob_string_diff_decoder.h index 07cb3c0ff..46fe65e3c 100644 --- a/src/storage/blocksstable/encoding/ob_string_diff_decoder.h +++ b/src/storage/blocksstable/encoding/ob_string_diff_decoder.h @@ -47,7 +47,7 @@ public: virtual int batch_decode( const ObColumnDecoderCtx &ctx, const ObIRowIndex* row_index, - const int64_t *row_ids, + const int32_t *row_ids, const char **cell_datas, const int64_t row_cap, common::ObDatum *datums) const override; diff --git a/src/storage/blocksstable/encoding/ob_string_prefix_decoder.cpp b/src/storage/blocksstable/encoding/ob_string_prefix_decoder.cpp index d6f2fbe53..a964e86f6 100644 --- a/src/storage/blocksstable/encoding/ob_string_prefix_decoder.cpp +++ b/src/storage/blocksstable/encoding/ob_string_prefix_decoder.cpp @@ -139,7 +139,7 @@ int ObStringPrefixDecoder::update_pointer(const char *old_block, const char *cur int ObStringPrefixDecoder::batch_decode( const ObColumnDecoderCtx &ctx, const ObIRowIndex* row_index, - const int64_t *row_ids, + const int32_t *row_ids, const char **cell_datas, const int64_t row_cap, common::ObDatum *datums) const @@ -341,7 +341,7 @@ int ObStringPrefixDecoder::fill_vector( int ObStringPrefixDecoder::get_null_count( const ObColumnDecoderCtx &ctx, const ObIRowIndex *row_index, - const int64_t *row_ids, + const int32_t *row_ids, const int64_t row_cap, int64_t &null_count) const { diff --git a/src/storage/blocksstable/encoding/ob_string_prefix_decoder.h b/src/storage/blocksstable/encoding/ob_string_prefix_decoder.h index 11a3a6a16..5f64f6349 100644 --- a/src/storage/blocksstable/encoding/ob_string_prefix_decoder.h +++ b/src/storage/blocksstable/encoding/ob_string_prefix_decoder.h @@ -53,7 +53,7 @@ public: virtual int batch_decode( const ObColumnDecoderCtx &ctx, const ObIRowIndex* row_index, - const int64_t *row_ids, + const int32_t *row_ids, const char **cell_datas, const int64_t row_cap, common::ObDatum *datums) const override; @@ -64,7 +64,7 @@ public: virtual int get_null_count( const ObColumnDecoderCtx &ctx, const ObIRowIndex *row_index, - const int64_t *row_ids, + const int32_t *row_ids, const int64_t row_cap, int64_t &null_count) const override; private: diff --git a/src/storage/blocksstable/encoding/ob_vector_decode_util.cpp b/src/storage/blocksstable/encoding/ob_vector_decode_util.cpp index 02cffd261..a2980f28a 100644 --- a/src/storage/blocksstable/encoding/ob_vector_decode_util.cpp +++ b/src/storage/blocksstable/encoding/ob_vector_decode_util.cpp @@ -141,7 +141,7 @@ struct LoadVectorDataFunc_T(vector.get_lens()); if (NULL_TYPE == ObVecDecodeUtils::NOT_HAS_NULL) { @@ -180,7 +180,7 @@ struct LoadVectorDataFunc_T(vec_ptr); const StoreType *__restrict store_arr = reinterpret_cast(data_locator.fixed_buf_); - const int64_t *__restrict row_ids = data_locator.row_ids_; + const int32_t *__restrict row_ids = data_locator.row_ids_; if (NULL_TYPE == ObVecDecodeUtils::NOT_HAS_NULL) { for (int64_t i = 0; i < row_cap; ++i) { const int64_t row_id = row_ids[i]; @@ -382,7 +382,7 @@ struct LoadVectorDataFromFixDictFunc_T char *vec_ptr = vector.get_data() + (vec_offset * vec_store_size); VecType *__restrict vec_arr = reinterpret_cast(vec_ptr); const StoreType *__restrict store_arr = reinterpret_cast(data_locator.dict_payload_); - const int64_t *__restrict row_ids = data_locator.row_ids_; + const int32_t *__restrict row_ids = data_locator.row_ids_; const RefType *__restrict ref_arr = data_locator.ref_arr_; if (NULL_TYPE == ObVecDecodeUtils::NOT_HAS_NULL) { for (int64_t i = 0; i < row_cap; ++i) { @@ -836,4 +836,4 @@ int ObVecDecodeUtils::load_byte_aligned_vector( } -}; \ No newline at end of file +}; diff --git a/src/storage/blocksstable/encoding/ob_vector_decode_util.h b/src/storage/blocksstable/encoding/ob_vector_decode_util.h index 0c602d848..445cb3891 100644 --- a/src/storage/blocksstable/encoding/ob_vector_decode_util.h +++ b/src/storage/blocksstable/encoding/ob_vector_decode_util.h @@ -151,7 +151,7 @@ struct DataDiscreteLocator struct DataFixedLocator { - explicit DataFixedLocator(const int64_t *&row_ids, const char *&fixed_buf, const int64_t len, const void *null_bitmap) + explicit DataFixedLocator(const int32_t *&row_ids, const char *&fixed_buf, const int64_t len, const void *null_bitmap) : row_ids_(row_ids), fixed_buf_(fixed_buf), len_(len) { null_bitmap_ = nullptr == null_bitmap ? nullptr : sql::to_bit_vector(null_bitmap); @@ -168,7 +168,7 @@ struct DataFixedLocator is_null = null_bitmap_->contain(row_ids_[idx]); } - const int64_t *__restrict row_ids_; + const int32_t *__restrict row_ids_; const char *__restrict fixed_buf_; const int64_t len_; const sql::ObBitVector *__restrict null_bitmap_; @@ -596,4 +596,4 @@ struct LoadByteAlignedData_T= block_count_ || index < 0) { + LOG_WARN("inner stat error", K(ret), KP(micro_blocks_), KP(handles_), KP_(micro_infos), K_(block_count)); + } else if (OB_UNLIKELY(index >= block_count_ || index < 0)) { ret = OB_INVALID_ARGUMENT; - STORAGE_LOG(ERROR, "invalid index", K(ret), K(index), K_(block_count)); - } else if (NULL == micro_blocks_[index]) { + LOG_WARN("invalid index", K(ret), K(index), K_(block_count)); + } else if (OB_UNLIKELY(NULL == micro_blocks_[index] || + micro_infos_[index].offset_ != micro_info.offset_ || + micro_infos_[index].size_ != micro_info.size_)) { ret = OB_INNER_STAT_ERROR; - STORAGE_LOG(WARN, "micro_block is null", K(ret), - "handle validity", handles_[index].is_valid(), K(index)); + LOG_WARN("micro_block is null or invalid", K(ret), + "handle validity", handles_[index].is_valid(), K(index), K(micro_info), K(micro_infos_[index])); } else { block_data = micro_blocks_[index]->get_block_data(); } @@ -246,60 +244,79 @@ int ObMultiBlockIOResult::get_block_data( void ObMultiBlockIOParam::reset() { - micro_index_infos_ = nullptr; - start_index_ = -1; - block_count_ = -1; + is_reverse_ = false; + data_cache_size_ = 0; + micro_block_count_ = 0; + io_read_batch_size_ = 0; + io_read_gap_size_ = 0; + row_header_ = nullptr; + prefetch_idx_.reset(); + micro_infos_.reset(); +} + +void ObMultiBlockIOParam::reuse() +{ + data_cache_size_ = 0; + micro_block_count_ = 0; + row_header_ = nullptr; } bool ObMultiBlockIOParam::is_valid() const { - bool is_same_block = false; - const bool basic_valid = nullptr != micro_index_infos_ - && start_index_ >= 0 - && block_count_ > 0 - && micro_index_infos_->count() >= start_index_ + block_count_; - if (basic_valid) { - const ObMicroIndexInfo &first_micro = micro_index_infos_->at(start_index_); - const ObMicroIndexInfo &last_micro = micro_index_infos_->at(start_index_ + block_count_ - 1); - is_same_block = first_micro.get_macro_id() == last_micro.get_macro_id(); - } - return basic_valid && is_same_block; + return nullptr != row_header_ && data_cache_size_ > 0 && micro_block_count_ > 0; } -void ObMultiBlockIOParam::get_io_range(int64_t &offset, int64_t &size) const -{ - offset = 0; - size = 0; - if (block_count_ > 0) { - const int64_t end_index = start_index_ + block_count_ - 1; - offset = micro_index_infos_->at(start_index_).get_block_offset(); - ObMicroIndexInfo &end_micro_index = micro_index_infos_->at(end_index); - size = end_micro_index.get_block_offset() - offset + end_micro_index.get_block_size(); - } -} - -int ObMultiBlockIOParam::get_block_des_info(ObIMicroBlockIOCallback &io_callback) const +int ObMultiBlockIOParam::init( + const ObTableIterParam &iter_param, + const int64_t micro_count_cap, + const bool is_reverse, + common::ObIAllocator &allocator) { int ret = OB_SUCCESS; - if (OB_UNLIKELY(!is_valid())) { - ret = OB_INVALID_ARGUMENT; - LOG_WARN("Invalid multi block io parameter", K(ret), K(*this)); - } else { - ObMicroIndexInfo &start_info = micro_index_infos_->at(start_index_); - io_callback.set_micro_des_meta(start_info.row_header_); + is_reverse_ = is_reverse; + io_read_batch_size_ = iter_param.get_io_read_batch_size(); + io_read_gap_size_ = iter_param.get_io_read_gap_size(); + prefetch_idx_.set_allocator(&allocator); + micro_infos_.set_allocator(&allocator); + if (OB_UNLIKELY(0 >= micro_count_cap || MAX_MICRO_BLOCK_READ_COUNT < micro_count_cap)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("Unexpected micro count cap", K(ret), K(micro_count_cap)); + } else if (OB_FAIL(prefetch_idx_.prepare_reallocate(micro_count_cap))) { + LOG_WARN("Fail to init prefetch idx array", K(ret), K(micro_count_cap)); + } else if (OB_FAIL(micro_infos_.prepare_reallocate(micro_count_cap))) { + LOG_WARN("Fail to init micro info array", K(ret), K(micro_count_cap)); } return ret; } -void ObMultiBlockIOCtx::reset() +bool ObMultiBlockIOParam::add_micro_data(const ObMicroIndexInfo &index_info, + const int64_t micro_data_prefetch_idx, + storage::ObMicroBlockDataHandle µ_handle, + bool &need_split) { - micro_index_infos_ = nullptr; - block_count_ = 0; -} + int64_t size = 0; + bool need_prefetch = false; + need_split = false; + if (0 == micro_block_count_) { + row_header_ = index_info.row_header_; + size = index_info.get_block_size(); + } else if (!is_reverse_) { + size = index_info.get_block_offset() + index_info.get_block_size() - micro_infos_[0].offset_; + } else { + size = micro_infos_[0].offset_ + micro_infos_[0].size_ - index_info.get_block_offset(); + } -bool ObMultiBlockIOCtx::is_valid() const -{ - return OB_NOT_NULL(micro_index_infos_) && block_count_ > 0; + if ((data_cache_size_ + index_info.get_block_size() + io_read_gap_size_) < size) { + need_prefetch = true; + need_split = true; + } else { + prefetch_idx_[micro_block_count_] = micro_data_prefetch_idx; + micro_infos_[micro_block_count_].set(index_info.get_block_offset(), index_info.get_block_size()); + data_cache_size_ += index_info.get_block_size(); + micro_block_count_++; + need_prefetch = (micro_infos_.cap() == micro_block_count_) || (size > io_read_batch_size_); + } + return need_prefetch; } /*---------------------------------------ObIMicroBlockIOCallback-------------------------------------*/ @@ -530,6 +547,13 @@ ObMultiDataBlockIOCallback::ObMultiDataBlockIOCallback() ObMultiDataBlockIOCallback::~ObMultiDataBlockIOCallback() { + if (nullptr != allocator_) { + if (nullptr != io_ctx_.micro_infos_) { + allocator_->free(io_ctx_.micro_infos_); + io_ctx_.micro_infos_ = nullptr; + } + } + free_result(); } @@ -557,10 +581,10 @@ int ObMultiDataBlockIOCallback::inner_process(const char *data_buffer, const int LOG_WARN("alloc_result failed", K(ret)); } - const int64_t block_count = io_ctx_.block_count_; + const int64_t block_count = io_ctx_.micro_block_count_; for (int64_t i = 0; OB_SUCC(ret) && i < block_count; ++i) { - const int64_t data_size = io_ctx_.micro_index_infos_[i].get_block_size(); - const int64_t data_offset = io_ctx_.micro_index_infos_[i].get_block_offset() - offset_; + const int64_t data_size = io_ctx_.micro_infos_[i].get_block_size(); + const int64_t data_offset = io_ctx_.micro_infos_[i].get_block_offset() - offset_; if (OB_FAIL(process_block( reader, data_buffer + data_offset, @@ -569,6 +593,8 @@ int ObMultiDataBlockIOCallback::inner_process(const char *data_buffer, const int io_result_.micro_blocks_[i], io_result_.handles_[i]))) { LOG_WARN("process_block failed", K(ret)); + } else { + io_result_.micro_infos_[i] = io_ctx_.micro_infos_[i]; } } } @@ -594,35 +620,15 @@ int ObMultiDataBlockIOCallback::set_io_ctx( ret = OB_INVALID_ARGUMENT; LOG_WARN("invalid io_param", K(ret), K(io_param)); } else { - io_ctx_.micro_index_infos_ = &io_param.micro_index_infos_->at(io_param.start_index_); - io_ctx_.block_count_ = io_param.block_count_; - } - return ret; -} - -int ObMultiDataBlockIOCallback::deep_copy_ctx( - const ObMultiBlockIOCtx &io_ctx) -{ - int ret = OB_SUCCESS; - if (OB_UNLIKELY(!io_ctx.is_valid())) { - ret = OB_INVALID_ARGUMENT; - LOG_WARN("invalid io_ctx", K(ret), K(io_ctx)); - } else if (OB_ISNULL(allocator_)) { - ret = OB_INNER_STAT_ERROR; - LOG_WARN("allocator_ is null", K(ret), KP(allocator_)); - } else { - void *ptr = nullptr; - int64_t alloc_size = sizeof(ObMicroIndexInfo) * io_ctx.block_count_; - if (OB_ISNULL(ptr = allocator_->alloc(alloc_size))) { + void *ptr = NULL; + int64_t alloc_size = sizeof(ObMicroBlockInfo) * io_param.count(); + if (OB_UNLIKELY(nullptr == (ptr = allocator_->alloc(alloc_size)))) { ret = OB_ALLOCATE_MEMORY_FAILED; LOG_WARN("alloc memory failed", K(ret), K(alloc_size)); } else { - io_ctx_.micro_index_infos_ = reinterpret_cast(ptr); - MEMCPY(io_ctx_.micro_index_infos_, io_ctx.micro_index_infos_, alloc_size); - } - - if (OB_SUCC(ret)) { - io_ctx_.block_count_ = io_ctx.block_count_; + io_ctx_.micro_infos_ = reinterpret_cast(ptr); + MEMCPY(io_ctx_.micro_infos_, io_param.micro_infos_.get_data(), alloc_size); + io_ctx_.micro_block_count_ = io_param.count(); } } return ret; @@ -632,7 +638,7 @@ int ObMultiDataBlockIOCallback::alloc_result() { int ret = OB_SUCCESS; void *ptr = nullptr; - const int64_t block_count = io_ctx_.block_count_; + const int64_t block_count = io_ctx_.micro_block_count_; if (OB_ISNULL(ptr = allocator_->alloc(sizeof(ObMicroBlockCacheValue *) * block_count))) { ret = OB_ALLOCATE_MEMORY_FAILED; LOG_WARN("alloc failed", K(ret)); @@ -641,6 +647,15 @@ int ObMultiDataBlockIOCallback::alloc_result() MEMSET(io_result_.micro_blocks_, 0, sizeof(ObMicroBlockCacheValue *) * block_count); } + if (OB_SUCC(ret)) { + if (OB_ISNULL(ptr = allocator_->alloc(sizeof(ObMicroBlockInfo) * block_count))) { + ret = OB_ALLOCATE_MEMORY_FAILED; + LOG_WARN("alloc failed", K(ret)); + } else { + io_result_.micro_infos_ = new (ptr) ObMicroBlockInfo[block_count]; + } + } + if (OB_SUCC(ret)) { if (OB_ISNULL(ptr = allocator_->alloc(sizeof(ObKVCacheHandle) * block_count))) { ret = OB_ALLOCATE_MEMORY_FAILED; @@ -668,6 +683,10 @@ void ObMultiDataBlockIOCallback::free_result() allocator_->free(io_result_.micro_blocks_); io_result_.micro_blocks_ = nullptr; } + if (OB_NOT_NULL(io_result_.micro_infos_)) { + allocator_->free(io_result_.micro_infos_); + io_result_.micro_infos_ = nullptr; + } if (OB_NOT_NULL(io_result_.handles_)) { for (int64_t i = 0; i < io_result_.block_count_; ++i) { io_result_.handles_[i].~ObKVCacheHandle(); @@ -860,33 +879,30 @@ int ObIMicroBlockCache::prefetch( int ret = OB_SUCCESS; int64_t offset = 0; int64_t size = 0; - if (OB_FAIL(io_param.get_block_des_info(callback))) { - LOG_WARN("Fail to get meta data for deserializing block data", K(ret), K(io_param)); + // fill callback + io_param.get_io_range(offset, size); + callback.cache_ = this; + callback.put_size_stat_ = this; + callback.tenant_id_ = tenant_id; + callback.block_id_ = macro_id; + callback.offset_ = offset; + callback.use_block_cache_ = use_cache; + callback.set_micro_des_meta(io_param.row_header_); + // fill read info + ObMacroBlockReadInfo read_info; + read_info.macro_block_id_ = macro_id; + read_info.io_desc_.set_wait_event(ObWaitEventIds::DB_FILE_DATA_READ); + read_info.io_desc_.set_resource_group_id(THIS_WORKER.get_group_id()); + read_info.io_desc_.set_sys_module_id(ObIOModule::MICRO_BLOCK_CACHE_IO); + read_info.io_callback_ = &callback; + read_info.offset_ = offset; + read_info.size_ = size; + read_info.io_timeout_ms_ = max(THIS_WORKER.get_timeout_remain() / 1000, 0); + if (OB_FAIL(ObBlockManager::async_read_block(read_info, macro_handle))) { + STORAGE_LOG(WARN, "Fail to async read block, ", K(ret), K(read_info)); } else { - // fill callback - io_param.get_io_range(offset, size); - callback.cache_ = this; - callback.put_size_stat_ = this; - callback.tenant_id_ = tenant_id; - callback.block_id_ = macro_id; - callback.offset_ = offset; - callback.use_block_cache_ = use_cache; - // fill read info - ObMacroBlockReadInfo read_info; - read_info.macro_block_id_ = macro_id; - read_info.io_desc_.set_wait_event(ObWaitEventIds::DB_FILE_DATA_READ); - read_info.io_desc_.set_resource_group_id(THIS_WORKER.get_group_id()); - read_info.io_desc_.set_sys_module_id(ObIOModule::MICRO_BLOCK_CACHE_IO); - read_info.io_callback_ = &callback; - read_info.offset_ = offset; - read_info.size_ = size; - read_info.io_timeout_ms_ = max(THIS_WORKER.get_timeout_remain() / 1000, 0); - if (OB_FAIL(ObBlockManager::async_read_block(read_info, macro_handle))) { - STORAGE_LOG(WARN, "Fail to async read block, ", K(ret), K(read_info)); - } else { - EVENT_ADD(ObStatEventIds::IO_READ_PREFETCH_MICRO_COUNT, io_param.block_count_); - EVENT_ADD(ObStatEventIds::IO_READ_PREFETCH_MICRO_BYTES, size); - } + EVENT_ADD(ObStatEventIds::IO_READ_PREFETCH_MICRO_COUNT, io_param.micro_block_count_); + EVENT_ADD(ObStatEventIds::IO_READ_PREFETCH_MICRO_BYTES, size); } return ret; } @@ -1035,7 +1051,7 @@ void ObDataMicroBlockCache::destroy() allocator_.destroy(); } -int ObDataMicroBlockCache::prefetch( +int ObDataMicroBlockCache::prefetch_multi_block( const uint64_t tenant_id, const MacroBlockId ¯o_id, const ObMultiBlockIOParam &io_param, diff --git a/src/storage/blocksstable/ob_micro_block_cache.h b/src/storage/blocksstable/ob_micro_block_cache.h index 814f17dca..457fedb3d 100644 --- a/src/storage/blocksstable/ob_micro_block_cache.h +++ b/src/storage/blocksstable/ob_micro_block_cache.h @@ -26,8 +26,14 @@ namespace oceanbase { +namespace storage +{ +class ObMicroBlockDataHandle; +} + namespace blocksstable { + class ObIMicroBlockIOCallback; class ObMicroBlockCacheKey : public common::ObIKVCacheKey { @@ -112,39 +118,85 @@ struct ObMultiBlockIOResult ObMultiBlockIOResult(); virtual ~ObMultiBlockIOResult(); - int get_block_data(const int64_t index, ObMicroBlockData &block_data) const; - void reset(); + int get_block_data(const int64_t index, const ObMicroBlockInfo µ_info, ObMicroBlockData &block_data) const; const ObMicroBlockCacheValue **micro_blocks_; common::ObKVCacheHandle *handles_; + ObMicroBlockInfo *micro_infos_; int64_t block_count_; int ret_code_; }; struct ObMultiBlockIOParam { - ObMultiBlockIOParam() { reset(); } +public: + static const int64_t MAX_MICRO_BLOCK_READ_COUNT = 1 << 12; + ObMultiBlockIOParam() : + is_reverse_(false), + data_cache_size_(0), + micro_block_count_(0), + io_read_batch_size_(0), + io_read_gap_size_(0), + row_header_(nullptr), + prefetch_idx_(), + micro_infos_() + {} virtual ~ObMultiBlockIOParam() {} void reset(); + void reuse(); bool is_valid() const; - inline void get_io_range(int64_t &offset, int64_t &size) const; - inline int get_block_des_info(ObIMicroBlockIOCallback &des_meta) const; - TO_STRING_KV(KPC(micro_index_infos_), K_(start_index), K_(block_count)); - common::ObIArray *micro_index_infos_; - int64_t start_index_; - int64_t block_count_; + int init( + const ObTableIterParam &iter_param, + const int64_t micro_count_cap, + const bool is_reverse, + common::ObIAllocator &allocator); + int64_t count() const + { return micro_block_count_; } + bool add_micro_data( + const ObMicroIndexInfo &index_info, + const int64_t micro_data_prefetch_idx, + storage::ObMicroBlockDataHandle µ_handle, + bool &need_split); + inline void get_io_range(int64_t &offset, int64_t &size) const + { + offset = 0; + size = 0; + if (1 == micro_block_count_) { + offset = micro_infos_[0].offset_; + size = micro_infos_[0].size_; + } else if (!is_reverse_) { + offset = micro_infos_[0].offset_; + size = micro_infos_[micro_block_count_ - 1].offset_ + micro_infos_[micro_block_count_ - 1].size_ - offset; + } else { + offset = micro_infos_[micro_block_count_ - 1].offset_; + size = micro_infos_[0].offset_ + micro_infos_[0].size_ - offset; + } + } + inline int64_t get_data_cache_size() const + { return data_cache_size_; } + TO_STRING_KV(K_(is_reverse), K_(data_cache_size), K_(io_read_batch_size), + K_(io_read_gap_size), K_(micro_block_count)); + + bool is_reverse_; + int64_t data_cache_size_; + int64_t micro_block_count_; + int64_t io_read_batch_size_; + int64_t io_read_gap_size_; + const ObIndexBlockRowHeader *row_header_; + ObReallocatedFixedArray prefetch_idx_; + ObReallocatedFixedArray micro_infos_; }; struct ObMultiBlockIOCtx { ObMultiBlockIOCtx() - : micro_index_infos_(nullptr), hit_cache_bitmap_(nullptr), block_count_(0) {} + : micro_block_count_(0), micro_infos_(nullptr) {} virtual ~ObMultiBlockIOCtx() {} - void reset(); - bool is_valid() const; - ObMicroIndexInfo *micro_index_infos_; - bool *hit_cache_bitmap_; - int64_t block_count_; - TO_STRING_KV(KP_(micro_index_infos), KP_(hit_cache_bitmap), K_(block_count)); + bool is_valid() const + { return micro_block_count_ > 0; } + int64_t micro_block_count_; + ObMicroBlockInfo *micro_infos_; + + TO_STRING_KV(K_(micro_block_count), KP_(micro_infos)); }; class ObIPutSizeStat @@ -233,8 +285,6 @@ public: private: friend class ObDataMicroBlockCache; int set_io_ctx(const ObMultiBlockIOParam &io_param); - void reset_io_ctx() { io_ctx_.reset(); } - int deep_copy_ctx(const ObMultiBlockIOCtx &io_ctx); int alloc_result(); void free_result(); DISALLOW_COPY_AND_ASSIGN(ObMultiDataBlockIOCallback); @@ -359,7 +409,7 @@ public: int init(const char *cache_name, const int64_t priority = 1); virtual void destroy() override; using ObIMicroBlockCache::prefetch; - int prefetch( + int prefetch_multi_block( const uint64_t tenant_id, const MacroBlockId ¯o_id, const ObMultiBlockIOParam &io_param, diff --git a/src/storage/blocksstable/ob_micro_block_info.h b/src/storage/blocksstable/ob_micro_block_info.h index 04cd7ae35..ee40ca581 100644 --- a/src/storage/blocksstable/ob_micro_block_info.h +++ b/src/storage/blocksstable/ob_micro_block_info.h @@ -38,6 +38,10 @@ struct ObMicroBlockInfo }; ObMicroBlockInfo() : offset_(0), size_(0), mark_deletion_(false), reserved_(0) {} + int64_t get_block_size() const + { return size_; } + int64_t get_block_offset() const + { return offset_; } void reset() { offset_ = 0; size_ = 0; mark_deletion_ = false; reserved_ = 0; } int set(const int32_t offset, const int32_t size, bool mark_deletion = false) { diff --git a/src/storage/blocksstable/ob_micro_block_reader.cpp b/src/storage/blocksstable/ob_micro_block_reader.cpp index 4bd6b1da9..f2352c08a 100644 --- a/src/storage/blocksstable/ob_micro_block_reader.cpp +++ b/src/storage/blocksstable/ob_micro_block_reader.cpp @@ -791,7 +791,7 @@ int ObMicroBlockReader::get_rows( const common::ObIArray &cols_projector, const common::ObIArray &col_params, const blocksstable::ObDatumRow *default_row, - const int64_t *row_ids, + const int32_t *row_ids, const int64_t row_cap, ObDatumRow &row_buf, common::ObIArray &datum_infos, @@ -881,7 +881,7 @@ int ObMicroBlockReader::get_rows( const common::ObIArray &cols_projector, const common::ObIArray &col_params, const blocksstable::ObDatumRow *default_row, - const int64_t *row_ids, + const int32_t *row_ids, const int64_t vector_offset, const int64_t row_cap, ObDatumRow &row_buf, @@ -985,7 +985,7 @@ int ObMicroBlockReader::get_rows( int ObMicroBlockReader::get_row_count( int32_t col, - const int64_t *row_ids, + const int32_t *row_ids, const int64_t row_cap, const bool contains_null, const share::schema::ObColumnParam *col_param, @@ -1038,7 +1038,7 @@ int ObMicroBlockReader::get_aggregate_result( const ObTableAccessContext &context, const int32_t col_offset, const share::schema::ObColumnParam &col_param, - const int64_t *row_ids, + const int32_t *row_ids, const int64_t row_cap, storage::ObAggDatumBuf &agg_datum_buf, storage::ObAggCell &agg_cell) @@ -1101,7 +1101,7 @@ int ObMicroBlockReader::get_aggregate_result( int ObMicroBlockReader::get_aggregate_result( const ObTableIterParam &iter_param, const ObTableAccessContext &context, - const int64_t *row_ids, + const int32_t *row_ids, const int64_t row_cap, ObDatumRow &row_buf, common::ObIArray &agg_cells) diff --git a/src/storage/blocksstable/ob_micro_block_reader.h b/src/storage/blocksstable/ob_micro_block_reader.h index 2232053ae..716bfb644 100644 --- a/src/storage/blocksstable/ob_micro_block_reader.h +++ b/src/storage/blocksstable/ob_micro_block_reader.h @@ -93,7 +93,7 @@ public: const common::ObIArray &cols_projector, const common::ObIArray &col_params, const blocksstable::ObDatumRow *default_row, - const int64_t *row_ids, + const int32_t *row_ids, const int64_t row_cap, ObDatumRow &row_buf, common::ObIArray &datum_infos, @@ -102,7 +102,7 @@ public: sql::ObEvalCtx &eval_ctx); virtual int get_row_count( int32_t col, - const int64_t *row_ids, + const int32_t *row_ids, const int64_t row_cap, const bool contains_null, const share::schema::ObColumnParam *col_param, @@ -117,14 +117,14 @@ public: const ObTableAccessContext &context, const int32_t col_offset, const share::schema::ObColumnParam &col_param, - const int64_t *row_ids, + const int32_t *row_ids, const int64_t row_cap, storage::ObAggDatumBuf &agg_datum_buf, storage::ObAggCell &agg_cell) override; int get_aggregate_result( const ObTableIterParam &iter_param, const ObTableAccessContext &context, - const int64_t *row_ids, + const int32_t *row_ids, const int64_t row_cap, ObDatumRow &row_buf, common::ObIArray &agg_cells); @@ -165,7 +165,7 @@ public: const common::ObIArray &cols_projector, const common::ObIArray &col_params, const blocksstable::ObDatumRow *default_row, - const int64_t *row_ids, + const int32_t *row_ids, const int64_t vector_offset, const int64_t row_cap, ObDatumRow &row_buf, diff --git a/src/storage/blocksstable/ob_micro_block_row_scanner.cpp b/src/storage/blocksstable/ob_micro_block_row_scanner.cpp index 33fe09718..908d8bd7c 100644 --- a/src/storage/blocksstable/ob_micro_block_row_scanner.cpp +++ b/src/storage/blocksstable/ob_micro_block_row_scanner.cpp @@ -548,7 +548,7 @@ int ObIMicroBlockRowScanner::apply_black_filter_batch( int ret = OB_SUCCESS; int64_t cur_row_index = pd_filter_info.start_; int64_t end_row_index = pd_filter_info.start_ + pd_filter_info.count_; - int64_t *row_ids = pd_filter_info.row_ids_; + int32_t *row_ids = pd_filter_info.row_ids_; int64_t row_cap = 0; int64_t bitmap_offset = 0; const common::ObIArray &col_offsets = filter.get_col_offsets(pd_filter_info.is_pd_to_cg_); @@ -619,7 +619,7 @@ int ObIMicroBlockRowScanner::apply_black_filter_batch( int ObIMicroBlockRowScanner::get_rows_for_old_format( const common::ObIArray &col_offsets, const common::ObIArray &col_params, - const int64_t *row_ids, + const int32_t *row_ids, const int64_t row_cap, const int64_t vector_offset, const char **cell_datas, @@ -646,7 +646,7 @@ int ObIMicroBlockRowScanner::get_rows_for_old_format( exprs, eval_ctx))) { LOG_WARN("Failed to copy rows", K(ret), K(row_cap), - "row_ids", common::ObArrayWrap(row_ids, row_cap)); + "row_ids", common::ObArrayWrap(row_ids, row_cap)); } } else if (ObIMicroBlockReader::Decoder == reader_->get_type() || ObIMicroBlockReader::CSDecoder == reader_->get_type()) { @@ -685,7 +685,7 @@ int ObIMicroBlockRowScanner::get_rows_for_old_format( int ObIMicroBlockRowScanner::get_rows_for_rich_format( const common::ObIArray &col_offsets, const common::ObIArray &col_params, - const int64_t *row_ids, + const int32_t *row_ids, const int64_t row_cap, const int64_t vector_offset, const char **cell_datas, @@ -707,7 +707,7 @@ int ObIMicroBlockRowScanner::get_rows_for_rich_format( exprs, eval_ctx))) { LOG_WARN("Failed to copy rows", K(ret), K(row_cap), - "row_ids", common::ObArrayWrap(row_ids, row_cap)); + "row_ids", common::ObArrayWrap(row_ids, row_cap)); } } else if (ObIMicroBlockReader::Decoder == reader_->get_type() || ObIMicroBlockReader::CSDecoder == reader_->get_type()) { @@ -906,7 +906,7 @@ int ObIMicroBlockRowScanner::filter_micro_block_in_cg( int ObIMicroBlockRowScanner::get_next_rows( const common::ObIArray &cols_projector, const common::ObIArray &col_params, - const int64_t *row_ids, + const int32_t *row_ids, const char **cell_datas, const int64_t row_cap, common::ObIArray &datum_infos, @@ -948,7 +948,7 @@ int ObIMicroBlockRowScanner::get_next_rows( int ObIMicroBlockRowScanner::get_aggregate_result( const int32_t col_idx, - const int64_t *row_ids, + const int32_t *row_ids, const int64_t row_cap, ObCGAggCells &cg_agg_cells) { @@ -1046,7 +1046,7 @@ int ObIMicroBlockRowScanner::read_distinct( int ObIMicroBlockRowScanner::read_reference( const int32_t group_by_col, - const int64_t *row_ids, + const int32_t *row_ids, const int64_t row_cap, storage::ObGroupByCell &group_by_cell) const { diff --git a/src/storage/blocksstable/ob_micro_block_row_scanner.h b/src/storage/blocksstable/ob_micro_block_row_scanner.h index 320023179..dc08c7a2c 100644 --- a/src/storage/blocksstable/ob_micro_block_row_scanner.h +++ b/src/storage/blocksstable/ob_micro_block_row_scanner.h @@ -85,7 +85,7 @@ public: virtual int get_next_rows( const common::ObIArray &cols_projector, const common::ObIArray &col_params, - const int64_t *row_ids, + const int32_t *row_ids, const char **cell_datas, const int64_t row_cap, common::ObIArray &datums, @@ -93,7 +93,7 @@ public: uint32_t *len_array); int get_aggregate_result( const int32_t col_idx, - const int64_t *row_ids, + const int32_t *row_ids, const int64_t row_cap, ObCGAggCells &cg_agg_cells); int advance_to_border( @@ -112,7 +112,7 @@ public: storage::ObGroupByCell &group_by_cell) const; int read_reference( const int32_t group_by_col, - const int64_t *row_ids, + const int32_t *row_ids, const int64_t row_cap, storage::ObGroupByCell &group_by_cell) const; OB_INLINE void reserve_reader_memory(bool reserve) @@ -125,7 +125,7 @@ public: int get_rows_for_old_format( const common::ObIArray &col_offsets, const common::ObIArray &col_params, - const int64_t *row_ids, + const int32_t *row_ids, const int64_t row_cap, const int64_t vector_offset, const char **cell_datas, @@ -135,7 +135,7 @@ public: int get_rows_for_rich_format( const common::ObIArray &col_offsets, const common::ObIArray &col_params, - const int64_t *row_ids, + const int32_t *row_ids, const int64_t row_cap, const int64_t vector_offset, const char **cell_datas, diff --git a/src/storage/column_store/ob_cg_bitmap.cpp b/src/storage/column_store/ob_cg_bitmap.cpp index 2e4bead27..2040596b6 100644 --- a/src/storage/column_store/ob_cg_bitmap.cpp +++ b/src/storage/column_store/ob_cg_bitmap.cpp @@ -14,6 +14,11 @@ #include "ob_cg_bitmap.h" #include "common/ob_target_specific.h" +#if OB_USE_MULTITARGET_CODE +#include +#include +#endif + namespace oceanbase { using namespace common; @@ -58,72 +63,9 @@ int ObCGBitmap::set_bitmap(const ObCSRowId start, const int64_t row_count, const return ret; } -int ObCGBitmap::get_row_ids( - int64_t *row_ids, - int64_t &row_cap, - ObCSRowId ¤t, - const ObCSRange &query_range, - const ObCSRange &data_range, - const int64_t batch_size, - const bool is_reverse) const -{ - int ret = OB_SUCCESS; - if (OB_UNLIKELY(nullptr == row_ids || - current < start_row_id_ || - current < data_range.start_row_id_)) { - ret = OB_INVALID_ARGUMENT; - LOG_WARN("Invalid argument", K(ret), KP(row_ids), K(current), K(data_range), KPC(this)); - } else if (!is_reverse) { - int64_t offset = current - start_row_id_; - if (OB_FAIL(bitmap_.get_row_ids(row_ids, - row_cap, - offset, - MIN(query_range.end_row_id_, data_range.end_row_id_) - start_row_id_ + 1, - batch_size, - data_range.start_row_id_ - start_row_id_))) { - LOG_WARN("Fail to get row_ids", K(ret), K(current), KPC(this)); - } else { - current = start_row_id_ + offset; - // update next valid current - if (current <= query_range.end_row_id_) { - int64_t next_true_pos; - if (OB_FAIL(bitmap_.next_valid_idx(offset, - query_range.end_row_id_ - current + 1, - false, - next_true_pos))) { - LOG_WARN("Fail to get next valid index", K(ret)); - } else { - current = (-1 == next_true_pos) ? OB_INVALID_CS_ROW_ID : start_row_id_ + next_true_pos; - } - } - } - } else { - ObCSRowId lower_bound = MAX(query_range.start_row_id_, data_range.start_row_id_); - while (current >= lower_bound && row_cap < batch_size) { - if (test(current)) { - row_ids[row_cap++] = current - data_range.start_row_id_; - } - current--; - } - // update next valid current - if (current >= query_range.start_row_id_) { - int64_t next_true_pos; - if (OB_FAIL(bitmap_.next_valid_idx(query_range.start_row_id_ - start_row_id_, - current - query_range.start_row_id_ + 1, - true, - next_true_pos))) { - LOG_WARN("Fail to get next valid index", K(ret)); - } else { - current = (-1 == next_true_pos) ? OB_INVALID_CS_ROW_ID : start_row_id_ + next_true_pos; - } - } - } - return ret; -} - static const int32_t DEFAULT_CS_BATCH_ROW_COUNT = 1024; -static int64_t default_cs_batch_row_ids_[DEFAULT_CS_BATCH_ROW_COUNT]; -static int64_t default_cs_batch_reverse_row_ids_[DEFAULT_CS_BATCH_ROW_COUNT]; +static int32_t default_cs_batch_row_ids_[DEFAULT_CS_BATCH_ROW_COUNT]; +static int32_t default_cs_batch_reverse_row_ids_[DEFAULT_CS_BATCH_ROW_COUNT]; static void __attribute__((constructor)) init_row_cs_ids_array() { for (int32_t i = 0; i < DEFAULT_CS_BATCH_ROW_COUNT; i++) { @@ -133,29 +75,185 @@ static void __attribute__((constructor)) init_row_cs_ids_array() } OB_DECLARE_DEFAULT_AND_AVX2_CODE( -OB_NOINLINE static bool copy_cs_row_ids(int64_t *row_ids, const int64_t cap, const int64_t diff, const bool is_reverse) +inline static void copy_cs_row_ids(int32_t *row_ids, const int64_t cap, const int32_t diff, const bool is_reverse) { - bool is_success = false; - int64_t val = diff; - if (cap <= DEFAULT_CS_BATCH_ROW_COUNT) { - is_success = true; - if (!is_reverse) { - MEMCPY(row_ids, default_cs_batch_row_ids_, sizeof(int64_t) * cap); - } else { - MEMCPY(row_ids, default_cs_batch_reverse_row_ids_, sizeof(int64_t) * cap); - val = diff - DEFAULT_CS_BATCH_ROW_COUNT + 1; - } - int64_t* __restrict id_pos = row_ids; - const int64_t* __restrict id_end = row_ids + cap; - while (id_pos < id_end) { - *id_pos += val; - ++id_pos; + int32_t val = is_reverse ? (diff - DEFAULT_CS_BATCH_ROW_COUNT + 1) : diff; + const int32_t* __restrict base_ids = is_reverse ? default_cs_batch_reverse_row_ids_ : default_cs_batch_row_ids_; + int32_t* __restrict id_pos = row_ids; + const int32_t* __restrict id_end = row_ids + cap; + while (id_pos < id_end) { + *id_pos = *base_ids + val; + ++id_pos; + ++base_ids; + } +} +) + +OB_DECLARE_AVX512_SPECIFIC_CODE( +/* + * [from, to): inteval of bitmap to get row ids + * limit: upper limit of row ids + * block_offset: start bit index of current micro block + */ +inline void get_cs_row_ids( + const int32_t *condensed_idx, + const int32_t condensed_cnt, + const int32_t from, + const int32_t to, + const int32_t limit, + const int32_t block_offset, + const bool is_reverse, + int32_t *row_ids, + int64_t &row_count, + int32_t &next_valid_idx) +{ + row_count = 0; + next_valid_idx = -1; + if (from > condensed_idx[condensed_cnt - 1] || to <= condensed_idx[0]) { + } else { + int32_t pos1 = std::lower_bound(condensed_idx, condensed_idx + MIN(from + 1, condensed_cnt), from) - condensed_idx; + if (pos1 < condensed_cnt) { + int32_t pos2 = std::lower_bound(condensed_idx + pos1, condensed_idx + MIN(to + 1, condensed_cnt), to) - condensed_idx; + int32_t count = MIN(pos2 - pos1, limit); + if (0 == count) { + } else if (!is_reverse) { + const int32_t *pos = condensed_idx + pos1; + const int32_t *end_pos = pos + count; + const int32_t *end_pos16 = pos + count / 16 * 16; + __m512i offset = _mm512_set1_epi32(-block_offset); + for (; pos < end_pos16; pos += 16) { + __m512i idx_arr = _mm512_loadu_epi32(pos); + __m512i res_arr = _mm512_add_epi32(idx_arr, offset); + _mm512_storeu_epi32(row_ids + row_count, res_arr); + row_count += 16; + } + while (pos < end_pos) { + row_ids[row_count++] = *pos - block_offset; + ++pos; + } + if (pos < (condensed_idx + condensed_cnt)) { + next_valid_idx = *pos; + } + } else { + const int32_t* __restrict idx_pos = condensed_idx + pos2 - 1; + const int32_t* __restrict idx_end = idx_pos - (count - 1); + while (idx_pos >= idx_end) { + row_ids[row_count++] = *idx_pos - block_offset; + --idx_pos; + } + if (idx_pos >= condensed_idx) { + next_valid_idx = *idx_pos; + } + } } } - return is_success; -}) +} +) -int convert_bitmap_to_cs_index(int64_t *row_ids, +OB_DECLARE_AVX2_SPECIFIC_CODE( +inline void get_cs_row_ids( + const int32_t *condensed_idx, + const int32_t condensed_cnt, + const int32_t from, + const int32_t to, + const int32_t limit, + const int32_t block_offset, + const bool is_reverse, + int32_t *row_ids, + int64_t &row_count, + int32_t &next_valid_idx) +{ + row_count = 0; + next_valid_idx = -1; + if (from > condensed_idx[condensed_cnt - 1] || to <= condensed_idx[0]) { + } else { + int32_t pos1 = std::lower_bound(condensed_idx, condensed_idx + MIN(from + 1, condensed_cnt), from) - condensed_idx; + if (pos1 < condensed_cnt) { + int32_t pos2 = std::lower_bound(condensed_idx + pos1, condensed_idx + MIN(to + 1, condensed_cnt), to) - condensed_idx; + int32_t count = MIN(pos2 - pos1, limit); + if (0 == count) { + } else if (!is_reverse) { + const int32_t *pos = condensed_idx + pos1; + const int32_t *end_pos = pos + count; + const int32_t *end_pos8 = pos + count / 8 * 8; + __m256i offset = _mm256_set1_epi32(-block_offset); + for (; pos < end_pos8; pos += 8) { + __m256i idx_arr = _mm256_loadu_si256((const __m256i *)(pos)); + __m256i res_arr = _mm256_add_epi32(idx_arr, offset); + _mm256_storeu_si256((__m256i *)(row_ids + row_count), res_arr); + row_count += 8; + } + while (pos < end_pos) { + row_ids[row_count++] = *pos - block_offset; + ++pos; + } + if (pos < (condensed_idx + condensed_cnt)) { + next_valid_idx = *pos; + } + } else { + const int32_t* __restrict idx_pos = condensed_idx + pos2 - 1; + const int32_t* __restrict idx_end = condensed_idx + pos1; + while (row_count < limit && idx_pos >= idx_end) { + row_ids[row_count++] = *idx_pos - block_offset; + --idx_pos; + } + if (idx_pos >= condensed_idx) { + next_valid_idx = *idx_pos; + } + } + } + } +} +) + +OB_DECLARE_DEFAULT_CODE( +inline void get_cs_row_ids( + const int32_t *condensed_idx, + const int32_t condensed_cnt, + const int32_t from, + const int32_t to, + const int32_t limit, + const int32_t block_offset, + const bool is_reverse, + int32_t *row_ids, + int64_t &row_count, + int32_t &next_valid_idx) +{ + row_count = 0; + next_valid_idx = -1; + if (from > condensed_idx[condensed_cnt - 1] || to <= condensed_idx[0]) { + } else { + int pos1 = std::lower_bound(condensed_idx, condensed_idx + MIN(from + 1, condensed_cnt), from) - condensed_idx; + if (pos1 < condensed_cnt) { + int pos2 = std::lower_bound(condensed_idx + pos1, condensed_idx + MIN(to + 1, condensed_cnt), to) - condensed_idx; + if (pos1 == pos2) { + } else if (!is_reverse) { + const int32_t* __restrict idx_pos = condensed_idx + pos1; + const int32_t* __restrict idx_end = condensed_idx + pos2; + while (row_count < limit && idx_pos < idx_end) { + row_ids[row_count++] = *idx_pos - block_offset; + ++idx_pos; + } + if (idx_pos < (condensed_idx + condensed_cnt)) { + next_valid_idx = *idx_pos; + } + } else { + const int32_t* __restrict idx_pos = condensed_idx + pos2 - 1; + const int32_t* __restrict idx_end = condensed_idx + pos1; + while (row_count < limit && idx_pos >= idx_end) { + row_ids[row_count++] = *idx_pos - block_offset; + --idx_pos; + } + if (idx_pos >= condensed_idx) { + next_valid_idx = *idx_pos; + } + } + } + } +} +) + +int convert_bitmap_to_cs_index(int32_t *row_ids, int64_t &row_cap, ObCSRowId ¤t, const ObCSRange &query_range, @@ -174,17 +272,16 @@ int convert_bitmap_to_cs_index(int64_t *row_ids, ObCSRowId upper_bound = MIN(query_range.end_row_id_, data_range.end_row_id_); int64_t limit = MIN(upper_bound - current + 1, batch_size); if (nullptr == filter_bitmap || filter_bitmap->is_all_true(ObCSRange(current, limit))) { - bool is_success = false; -#if OB_USE_MULTITARGET_CODE - if (common::is_arch_supported(ObTargetArch::AVX2)) { - is_success = specific::avx2::copy_cs_row_ids(row_ids, limit, current - data_range.start_row_id_, false); - } else { - is_success = specific::normal::copy_cs_row_ids(row_ids, limit, current - data_range.start_row_id_, false); - } -#else - is_success = specific::normal::copy_cs_row_ids(row_ids, limit, current - data_range.start_row_id_, false); -#endif - if (is_success) { + if (limit < DEFAULT_CS_BATCH_ROW_COUNT) { + #if OB_USE_MULTITARGET_CODE + if (common::is_arch_supported(ObTargetArch::AVX2)) { + specific::avx2::copy_cs_row_ids(row_ids, limit, current - data_range.start_row_id_, false); + } else { + #endif + specific::normal::copy_cs_row_ids(row_ids, limit, current - data_range.start_row_id_, false); + #if OB_USE_MULTITARGET_CODE + } + #endif row_cap = limit; current += limit; } else { @@ -195,7 +292,7 @@ int convert_bitmap_to_cs_index(int64_t *row_ids, } } else if (filter_bitmap->is_all_false(ObCSRange(current, upper_bound - current + 1))) { current = upper_bound + 1; - } else if (OB_FAIL(filter_bitmap->get_row_ids( + } else if (OB_FAIL(const_cast(filter_bitmap)->get_row_ids( row_ids, row_cap, current, query_range, data_range, batch_size, is_reverse_scan))) { LOG_WARN("Fail to get row ids", K(ret), K(current), K(data_range), K(query_range)); } @@ -203,17 +300,16 @@ int convert_bitmap_to_cs_index(int64_t *row_ids, ObCSRowId lower_bound = MAX(query_range.start_row_id_, data_range.start_row_id_); int64_t limit = MIN(current - lower_bound + 1, batch_size); if (nullptr == filter_bitmap || filter_bitmap->is_all_true(ObCSRange(current - limit + 1, limit))) { - bool is_success = false; -#if OB_USE_MULTITARGET_CODE - if (common::is_arch_supported(ObTargetArch::AVX2)) { - is_success = specific::avx2::copy_cs_row_ids(row_ids, limit, current - data_range.start_row_id_, true); - } else { - is_success = specific::normal::copy_cs_row_ids(row_ids, limit, current - data_range.start_row_id_, true); - } -#else - is_success = specific::normal::copy_cs_row_ids(row_ids, limit, current - data_range.start_row_id_, true); -#endif - if (is_success) { + if (limit < DEFAULT_CS_BATCH_ROW_COUNT) { + #if OB_USE_MULTITARGET_CODE + if (common::is_arch_supported(ObTargetArch::AVX2)) { + specific::avx2::copy_cs_row_ids(row_ids, limit, current - data_range.start_row_id_, true); + } else { + #endif + specific::normal::copy_cs_row_ids(row_ids, limit, current - data_range.start_row_id_, true); + #if OB_USE_MULTITARGET_CODE + } + #endif row_cap = limit; current -= limit; } else { @@ -224,7 +320,7 @@ int convert_bitmap_to_cs_index(int64_t *row_ids, } } else if (filter_bitmap->is_all_false(ObCSRange(lower_bound, current - lower_bound + 1))) { current = lower_bound - 1; - } else if (OB_FAIL(filter_bitmap->get_row_ids( + } else if (OB_FAIL(const_cast(filter_bitmap)->get_row_ids( row_ids, row_cap, current, query_range, data_range, batch_size, is_reverse_scan))) { LOG_WARN("Fail to get row ids", K(ret), K(current), K(data_range), K(query_range)); } @@ -233,5 +329,94 @@ int convert_bitmap_to_cs_index(int64_t *row_ids, return ret; } +int ObCGBitmap::get_row_ids( + int32_t *row_ids, + int64_t &row_cap, + ObCSRowId ¤t, + const ObCSRange &query_range, + const ObCSRange &data_range, + const int64_t batch_size, + const bool is_reverse) +{ + int ret = OB_SUCCESS; + int32_t next_valid_idx = -1; + int32_t from_pos = -1; + int32_t end_pos = -1; + int32_t condensed_cnt = -1; + const int32_t *condensed_idx = nullptr; + if (OB_UNLIKELY(nullptr == row_ids || + current < start_row_id_ || + current < data_range.start_row_id_)) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("Invalid argument", K(ret), KP(row_ids), K(current), K(data_range), KPC(this)); + } else if (!bitmap_.is_index_generated() && OB_FAIL(bitmap_.generate_condensed_index())) { + LOG_WARN("Fail to get condensed idx", K(ret)); + } else { + if (!is_reverse) { + from_pos = current - start_row_id_; + end_pos = MIN(query_range.end_row_id_, data_range.end_row_id_) - start_row_id_ + 1; + } else { + from_pos = MAX(query_range.start_row_id_, data_range.start_row_id_) - start_row_id_; + end_pos = current - start_row_id_ + 1; + } + condensed_cnt = bitmap_.get_condensed_cnt(); + condensed_idx = bitmap_.get_condensed_idx(); + if (OB_UNLIKELY(0 > condensed_cnt || nullptr == condensed_idx)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("Unexpceted condensed idx info", K(ret), K_(bitmap)); + } + } + + if (OB_SUCC(ret)) { + if (0 == condensed_cnt) { + row_cap = 0; + next_valid_idx = -1; +#if OB_USE_MULTITARGET_CODE + // enable when avx512 is more efficient + //} else if (common::is_arch_supported(ObTargetArch::AVX512)) { + // specific::avx512::get_cs_row_ids(condensed_idx, + // condensed_cnt, + // from_pos, + // end_pos, + // batch_size, + // data_range.start_row_id_ - start_row_id_, + // is_reverse, + // row_ids, + // row_cap, + // next_valid_idx); + } else if (common::is_arch_supported(ObTargetArch::AVX2)) { + specific::avx2::get_cs_row_ids(condensed_idx, + condensed_cnt, + from_pos, + end_pos, + batch_size, + data_range.start_row_id_ - start_row_id_, + is_reverse, + row_ids, + row_cap, + next_valid_idx); +#endif + } else { + specific::normal::get_cs_row_ids(condensed_idx, + condensed_cnt, + from_pos, + end_pos, + batch_size, + data_range.start_row_id_ - start_row_id_, + is_reverse, + row_ids, + row_cap, + next_valid_idx); + } + + if (-1 == next_valid_idx) { + current = OB_INVALID_CS_ROW_ID; + } else { + current = start_row_id_ + next_valid_idx; + } + } + return ret; +} + } } diff --git a/src/storage/column_store/ob_cg_bitmap.h b/src/storage/column_store/ob_cg_bitmap.h index 7d5c0d2c4..38e7b4f44 100644 --- a/src/storage/column_store/ob_cg_bitmap.h +++ b/src/storage/column_store/ob_cg_bitmap.h @@ -176,7 +176,6 @@ public: OB_ASSERT(range.is_valid() && range.end_row_id_ >= start_row_id_); return bitmap_.is_all_false(MAX(range.start_row_id_ - start_row_id_, 0), MIN(range.end_row_id_ - start_row_id_, bitmap_.size() - 1)); - } OB_INLINE void set_all_true() @@ -197,13 +196,13 @@ public: int set_bitmap(const ObCSRowId start, const int64_t row_count, const bool is_reverse, ObBitmap &bitmap) const; int get_first_valid_idx(const ObCSRange &range, const bool is_reverse_scan, ObCSRowId &row_idx) const; - int get_row_ids(int64_t *row_ids, + int get_row_ids(int32_t *row_ids, int64_t &row_cap, ObCSRowId ¤t, const ObCSRange &query_range, const ObCSRange &data_range, const int64_t batch_size, - const bool is_reverse) const; + const bool is_reverse); TO_STRING_KV(K_(bitmap), K_(start_row_id)); private: @@ -211,7 +210,7 @@ private: ObCSRowId start_row_id_; }; -int convert_bitmap_to_cs_index(int64_t *row_ids, +int convert_bitmap_to_cs_index(int32_t *row_ids, int64_t &row_cap, ObCSRowId ¤t, const ObCSRange &query_range, diff --git a/src/storage/column_store/ob_cg_group_by_scanner.cpp b/src/storage/column_store/ob_cg_group_by_scanner.cpp index 017818ce0..817ac98f4 100644 --- a/src/storage/column_store/ob_cg_group_by_scanner.cpp +++ b/src/storage/column_store/ob_cg_group_by_scanner.cpp @@ -20,7 +20,8 @@ ObCGGroupByScanner::ObCGGroupByScanner() : ObCGRowScanner(), output_exprs_(nullptr), group_by_agg_idxs_(), - group_by_cell_(nullptr) + group_by_cell_(nullptr), + index_prefetcher_() {} ObCGGroupByScanner::~ObCGGroupByScanner() @@ -28,12 +29,19 @@ ObCGGroupByScanner::~ObCGGroupByScanner() reset(); } +void ObCGGroupByScanner::reuse() +{ + ObCGRowScanner::reuse(); + index_prefetcher_.reuse(); +} + void ObCGGroupByScanner::reset() { ObCGRowScanner::reset(); output_exprs_ = nullptr; group_by_agg_idxs_.reset(); group_by_cell_ = nullptr; + index_prefetcher_.reset(); } int ObCGGroupByScanner::init( @@ -44,6 +52,8 @@ int ObCGGroupByScanner::init( int ret = OB_SUCCESS; if (OB_FAIL(ObCGRowScanner::init(iter_param, access_ctx, wrapper))) { LOG_WARN("Failed to init ObCGRowScanner", K(ret)); + } else if (OB_FAIL(index_prefetcher_.init(get_type(), *sstable_, iter_param, access_ctx))) { + LOG_WARN("fail to init index prefetcher, ", K(ret)); } else if (OB_UNLIKELY(nullptr == iter_param.output_exprs_ || 0 == iter_param.output_exprs_->count())) { ret = OB_ERR_UNEXPECTED; @@ -57,6 +67,24 @@ int ObCGGroupByScanner::init( return ret; } +int ObCGGroupByScanner::switch_context( + const ObTableIterParam &iter_param, + ObTableAccessContext &access_ctx, + ObSSTableWrapper &wrapper) +{ + int ret = OB_SUCCESS; + if (OB_FAIL(ObCGRowScanner::switch_context(iter_param, access_ctx, wrapper))) { + LOG_WARN("Fail to switch context for cg row scanner", K(ret)); + } else if (!index_prefetcher_.is_valid()) { + if (OB_FAIL(index_prefetcher_.init(get_type(), *sstable_, iter_param, access_ctx))) { + LOG_WARN("fail to init prefetcher, ", K(ret)); + } + } else if (OB_FAIL(index_prefetcher_.switch_context(get_type(), *sstable_, iter_param, access_ctx))) { + LOG_WARN("Fail to switch context for prefetcher", K(ret)); + } + return ret; +} + int ObCGGroupByScanner::init_group_by_info() { int ret = OB_SUCCESS; @@ -87,20 +115,20 @@ int ObCGGroupByScanner::decide_group_size(int64_t &group_size) while (OB_SUCC(ret) && -1 == group_size) { if (end_of_scan()) { ret = OB_ITER_END; - } else if (OB_FAIL(prefetcher_.prefetch())) { - LOG_WARN("Fail to prefetch micro block", K(ret), K_(prefetcher)); - } else if (prefetcher_.read_wait()) { + } else if (OB_FAIL(index_prefetcher_.prefetch())) { + LOG_WARN("Fail to prefetch micro block", K(ret), K_(index_prefetcher)); + } else if (index_prefetcher_.read_wait()) { continue; } else { - prefetcher_.cur_micro_data_fetch_idx_++; - prefetcher_.cur_micro_data_read_idx_++; + index_prefetcher_.cur_micro_data_fetch_idx_++; + index_prefetcher_.cur_micro_data_read_idx_++; is_new_range_ = false; - const ObCSRange µ_data_range = prefetcher_.current_micro_info().get_row_range(); + const ObCSRange µ_data_range = index_prefetcher_.current_micro_info().get_row_range(); group_size = MIN(query_index_range_.end_row_id_, micro_data_range.end_row_id_) - - MAX(query_index_range_.start_row_id_, micro_data_range.start_row_id_) + 1; + MAX(query_index_range_.start_row_id_, micro_data_range.start_row_id_) + 1; } } - LOG_DEBUG("[GROUP BY PUSHDOWN]", K(ret), K(group_size), K(query_index_range_), K(prefetcher_)); + LOG_DEBUG("[GROUP BY PUSHDOWN]", K(ret), K(group_size), K_(query_index_range), K_(index_prefetcher)); return ret; } @@ -112,19 +140,31 @@ int ObCGGroupByScanner::decide_can_group_by(const int32_t group_by_col, bool &ca ret = OB_ERR_UNEXPECTED; LOG_WARN("Unexpected state, should be new range", K(ret)); } else { - prefetcher_.cur_micro_data_fetch_idx_++; - prefetcher_.cur_micro_data_read_idx_++; - is_new_range_ = false; - int64_t row_cnt = 0; - int64_t read_cnt = 0; - int64_t distinct_cnt = 0; - if (OB_FAIL(open_cur_data_block())) { - LOG_WARN("Failed to open data block", K(ret)); - } else if (OB_FAIL(micro_scanner_->check_can_group_by(group_by_col, row_cnt, read_cnt, distinct_cnt, can_group_by))) { - LOG_WARN("Failed to check group by", K(ret)); - } else if (can_group_by && OB_FAIL(group_by_cell_->decide_use_group_by( - row_cnt, read_cnt, distinct_cnt, filter_bitmap_, can_group_by))) { - LOG_WARN("Failed to decide use group by", K(ret)); + while (OB_SUCC(ret)) { + if (end_of_scan()) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("Unexpected end of scan", K(ret), KPC(this)); + } else if (OB_FAIL(prefetcher_.prefetch())) { + LOG_WARN("Fail to prefetch micro block", K(ret), K_(prefetcher)); + } else if (prefetcher_.read_wait()) { + continue; + } else { + prefetcher_.cur_micro_data_fetch_idx_++; + prefetcher_.cur_micro_data_read_idx_++; + is_new_range_ = false; + int64_t row_cnt = 0; + int64_t read_cnt = 0; + int64_t distinct_cnt = 0; + if (OB_FAIL(open_cur_data_block())) { + LOG_WARN("Failed to open data block", K(ret)); + } else if (OB_FAIL(micro_scanner_->check_can_group_by(group_by_col, row_cnt, read_cnt, distinct_cnt, can_group_by))) { + LOG_WARN("Failed to check group by", K(ret)); + } else if (can_group_by && OB_FAIL(group_by_cell_->decide_use_group_by( + row_cnt, read_cnt, distinct_cnt, filter_bitmap_, can_group_by))) { + LOG_WARN("Failed to decide use group by", K(ret)); + } + break; + } } } return ret; @@ -210,5 +250,34 @@ int ObCGGroupByScanner::do_group_by_aggregate(const uint64_t count, const bool i return ret; } +int ObCGGroupByScanner::locate_micro_index(const ObCSRange &range) +{ + int ret = OB_SUCCESS; + if (IS_NOT_INIT) { + ret = OB_NOT_INIT; + LOG_WARN("ObCGGroupByScanner not init", K(ret)); + } else if (OB_UNLIKELY(!range.is_valid())) { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("Invalid argument", K(ret), K(range)); + } else if (range.start_row_id_ >= sstable_row_cnt_) { + ret = OB_ITER_END; + } else { + is_new_range_ = true; + query_index_range_.start_row_id_ = range.start_row_id_; + query_index_range_.end_row_id_ = MIN(range.end_row_id_, sstable_row_cnt_ - 1); + current_ = is_reverse_scan_ ? query_index_range_.end_row_id_ : query_index_range_.start_row_id_; + + if (OB_FAIL(ret) || end_of_scan()) { + } else if (OB_FAIL(index_prefetcher_.locate(query_index_range_, nullptr))) { + LOG_WARN("Fail to locate range", K(ret), K_(query_index_range), K_(current)); + } else if (OB_FAIL(index_prefetcher_.prefetch())) { + LOG_WARN("Fail to prefetch", K(ret)); + } + } + LOG_TRACE("[COLUMNSTORE] CGGroupByScanner locate micro index", K(ret), "tablet_id", iter_param_->tablet_id_, "cg_idx", iter_param_->cg_idx_, + "type", get_type(), K(range)); + return ret; +} + } } diff --git a/src/storage/column_store/ob_cg_group_by_scanner.h b/src/storage/column_store/ob_cg_group_by_scanner.h index 80225dfbc..39516512f 100644 --- a/src/storage/column_store/ob_cg_group_by_scanner.h +++ b/src/storage/column_store/ob_cg_group_by_scanner.h @@ -22,11 +22,16 @@ class ObCGGroupByScanner final : public ObCGRowScanner, public ObICGGroupByProce public: ObCGGroupByScanner(); virtual ~ObCGGroupByScanner(); + virtual void reuse() override; virtual void reset() override; virtual int init( const ObTableIterParam &iter_param, ObTableAccessContext &access_ctx, ObSSTableWrapper &wrapper) override; + virtual int switch_context( + const ObTableIterParam &iter_param, + ObTableAccessContext &access_ctx, + ObSSTableWrapper &wrapper) override final; virtual ObCGIterType get_type() override { return OB_CG_GROUP_BY_SCANNER; } virtual int init_group_by_info() override; @@ -35,6 +40,7 @@ public: virtual int read_distinct(const int32_t group_by_col) override; virtual int read_reference(const int32_t group_by_col) override; virtual int calc_aggregate(const bool is_group_by_col) override; + virtual int locate_micro_index(const ObCSRange &range) override; INHERIT_TO_STRING_KV("ObCGRowScanner", ObCGRowScanner, KPC_(output_exprs), K_(group_by_agg_idxs), KP_(group_by_cell)); private: @@ -44,6 +50,7 @@ private: // aggregate cell indexes for each output(agg) expr ObSEArray group_by_agg_idxs_; ObGroupByCell *group_by_cell_; + ObCGIndexPrefetcher index_prefetcher_; }; } diff --git a/src/storage/column_store/ob_cg_iter_param_pool.cpp b/src/storage/column_store/ob_cg_iter_param_pool.cpp index 5a574349e..6f2d657ca 100644 --- a/src/storage/column_store/ob_cg_iter_param_pool.cpp +++ b/src/storage/column_store/ob_cg_iter_param_pool.cpp @@ -200,6 +200,7 @@ int ObCGIterParamPool::fill_virtual_cg_iter_param( cg_param.output_exprs_ = output_exprs; cg_param.op_ = row_param.op_; cg_param.pd_storage_flag_ = row_param.pd_storage_flag_; + cg_param.table_scan_opt_ = row_param.table_scan_opt_; cg_param.tablet_handle_ = row_param.tablet_handle_; } if (OB_FAIL(ret) && nullptr != output_exprs) { @@ -266,6 +267,7 @@ int ObCGIterParamPool::generate_for_column_store(const ObTableIterParam &row_par cg_param.limit_prefetch_ = row_param.limit_prefetch_; //cg_param.ss_rowkey_prefix_cnt_ = 0; cg_param.pd_storage_flag_ = row_param.pd_storage_flag_; + cg_param.table_scan_opt_ = row_param.table_scan_opt_; if (nullptr != row_param.cg_read_infos_) { if (OB_UNLIKELY(nullptr == row_param.cg_read_infos_->at(cg_pos))) { ret = OB_ERR_UNEXPECTED; diff --git a/src/storage/column_store/ob_cg_prefetcher.cpp b/src/storage/column_store/ob_cg_prefetcher.cpp index fe3e18b48..457631854 100644 --- a/src/storage/column_store/ob_cg_prefetcher.cpp +++ b/src/storage/column_store/ob_cg_prefetcher.cpp @@ -25,6 +25,7 @@ void ObCGPrefetcher::reset() query_range_.reset(); is_reverse_scan_ = false; is_project_without_filter_ = false; + need_prewarm_ = false; cg_iter_type_ = -1; filter_bitmap_ = nullptr; micro_data_prewarm_idx_ = 0; @@ -39,6 +40,7 @@ void ObCGPrefetcher::reuse() query_index_range_.reset(); query_range_.reset(); is_project_without_filter_ = false; + need_prewarm_ = false; filter_bitmap_ = nullptr; micro_data_prewarm_idx_ = 0; cur_micro_data_read_idx_ = -1; @@ -84,6 +86,15 @@ int ObCGPrefetcher::init( LOG_WARN("Fail to open index root", K(ret)); } } + + if (OB_SUCC(ret)) { + need_prewarm_ = + (ObICGIterator::OB_CG_SCANNER == cg_iter_type_ || + ((ObICGIterator::OB_CG_ROW_SCANNER == cg_iter_type_ || ObICGIterator::OB_CG_GROUP_BY_SCANNER == cg_iter_type_) && + is_project_without_filter_)) && + nullptr == cg_agg_cells_ && + nullptr == access_ctx_->limit_param_ ; + } return ret; } @@ -109,6 +120,15 @@ int ObCGPrefetcher::switch_context( LOG_WARN("Fail to open index root", K(ret)); } } + + if (OB_SUCC(ret)) { + need_prewarm_ = + (ObICGIterator::OB_CG_SCANNER == cg_iter_type_ || + ((ObICGIterator::OB_CG_ROW_SCANNER == cg_iter_type_ || ObICGIterator::OB_CG_GROUP_BY_SCANNER == cg_iter_type_) && + is_project_without_filter_)) && + nullptr == cg_agg_cells_ && + nullptr == access_ctx_->limit_param_ ; + } return ret; } @@ -190,9 +210,10 @@ int ObCGPrefetcher::locate_in_prefetched_data(bool &found) int ret = OB_SUCCESS; found = false; const ObCSRowId start_row_idx = is_reverse_scan_ ? query_index_range_.end_row_id_ : query_index_range_.start_row_id_; - if (micro_data_prewarm_idx_ > 0 && micro_data_prewarm_idx_ > cur_micro_data_fetch_idx_) { + int64_t max_data_prefetched_idx = MAX(micro_data_prewarm_idx_, micro_data_prefetch_idx_); + if (max_data_prefetched_idx > 0 && max_data_prefetched_idx > cur_micro_data_read_idx_) { int cmp_ret = -1; - for (int64_t micro_data_idx = MAX(0, cur_micro_data_fetch_idx_); OB_SUCC(ret) && cmp_ret < 0 && micro_data_idx < micro_data_prewarm_idx_; micro_data_idx++) { + for (int64_t micro_data_idx = MAX(0, cur_micro_data_read_idx_); OB_SUCC(ret) && cmp_ret < 0 && micro_data_idx < max_data_prefetched_idx; micro_data_idx++) { ObMicroIndexInfo µ_info = micro_data_infos_[micro_data_idx % max_micro_handle_cnt_]; const ObCSRange µ_range = micro_info.get_row_range(); cmp_ret = micro_range.compare(start_row_idx); @@ -214,9 +235,9 @@ int ObCGPrefetcher::locate_in_prefetched_data(bool &found) if (OB_SUCC(ret) && found) { cmp_ret = 1; is_prefetch_end_ = false; - micro_data_prefetch_idx_ = micro_data_prewarm_idx_; + micro_data_prefetch_idx_ = max_data_prefetched_idx; const ObCSRowId end_row_id = is_reverse_scan_ ? query_index_range_.start_row_id_ : query_index_range_.end_row_id_; - for (int64_t micro_data_idx = micro_data_prewarm_idx_ - 1; OB_SUCC(ret) && cmp_ret > 0 && micro_data_idx > cur_micro_data_fetch_idx_; micro_data_idx--) { + for (int64_t micro_data_idx = max_data_prefetched_idx - 1; OB_SUCC(ret) && cmp_ret > 0 && micro_data_idx > cur_micro_data_fetch_idx_; micro_data_idx--) { const ObCSRange µ_range = micro_data_infos_[micro_data_idx % max_micro_handle_cnt_].get_row_range(); cmp_ret = micro_range.compare(end_row_id); if (is_reverse_scan_) { @@ -275,10 +296,11 @@ bool ObCGPrefetcher::locate_back(const ObCSRange &locate_range) { bool is_locate_back = false; if (0 < query_index_range_.end_row_id_) { + int micro_index = MAX(0, cur_micro_data_read_idx_); if (is_reverse_scan_) { - is_locate_back = locate_range.end_row_id_ > query_index_range_.start_row_id_; + is_locate_back = locate_range.end_row_id_ > micro_data_infos_[micro_index % max_micro_handle_cnt_].get_row_range().end_row_id_; } else { - is_locate_back = locate_range.start_row_id_ < query_index_range_.end_row_id_; + is_locate_back = locate_range.start_row_id_ < micro_data_infos_[micro_index % max_micro_handle_cnt_].get_row_range().start_row_id_; } } return is_locate_back; @@ -357,7 +379,9 @@ int ObCGPrefetcher::prefetch_micro_data() ret = OB_ERR_UNEXPECTED; LOG_WARN("Unexpected prefetch status", K(ret), K_(cur_level), K_(index_tree_height), K_(micro_data_prefetch_idx), K_(cur_micro_data_read_idx), K_(max_micro_handle_cnt)); - } else if (micro_data_prefetch_idx_ - cur_micro_data_read_idx_ == max_micro_handle_cnt_) { + } else if (micro_data_prefetch_idx_ - cur_micro_data_read_idx_ == max_micro_handle_cnt_ || + (use_multi_block_prefetch_ && prefetch_depth_ > MIN_DATA_READ_BATCH_COUNT && + (max_micro_handle_cnt_ - (micro_data_prefetch_idx_ - cur_micro_data_fetch_idx_)) < MIN_DATA_READ_BATCH_COUNT)) { // DataBlock ring buf full } else if (OB_FAIL(get_prefetch_depth(prefetch_depth, micro_data_prefetch_idx_))) { LOG_WARN("Fail to get prefetch depth", K(ret)); @@ -416,7 +440,10 @@ int ObCGPrefetcher::prefetch_micro_data() } else { LOG_DEBUG("[COLUMNSTORE] success to agg index info", K(ret), K(block_info)); } - } else if (OB_FAIL(prefetch_block_data(block_info, micro_data_handles_[prefetch_micro_idx]))) { + } else if (OB_FAIL(prefetch_data_block( + micro_data_prefetch_idx_, + block_info, + micro_data_handles_[prefetch_micro_idx]))) { LOG_WARN("fail to prefetch_block_data", K(ret), K(block_info)); } else { prefetched_cnt++; @@ -428,6 +455,11 @@ int ObCGPrefetcher::prefetch_micro_data() is_prefetch_end_ = true; } } + + if (OB_SUCC(ret) && multi_io_params_.count() > 0 && + OB_FAIL(prefetch_multi_data_block(micro_data_prefetch_idx_))) { + LOG_WARN("Fail to prefetch multi block", K(ret), K_(micro_data_prefetch_idx), K_(multi_io_params)); + } } if (OB_SUCC(ret) && 0 < prefetched_cnt) { ObSSTableReadHandle &read_handle = read_handles_[prefetching_range_idx() % max_range_prefetching_cnt_]; @@ -705,11 +737,7 @@ int ObCGPrefetcher::prewarm() if (micro_data_prewarm_idx_ < micro_data_prefetch_idx_) { micro_data_prewarm_idx_ = micro_data_prefetch_idx_; } - if (is_prefetch_end_ && - (ObICGIterator::OB_CG_SCANNER == cg_iter_type_ || - (ObICGIterator::OB_CG_ROW_SCANNER == cg_iter_type_ && is_project_without_filter_)) && - nullptr == cg_agg_cells_ && - nullptr == access_ctx_->limit_param_ && + if (is_prefetch_end_ && need_submit_io_ && need_prewarm_ && (index_tree_height_ - 1) == cur_level_ && micro_data_prewarm_idx_ - cur_micro_data_fetch_idx_ < prefetch_limit) { @@ -743,7 +771,10 @@ int ObCGPrefetcher::prewarm() prefetched_cnt++; micro_data_prewarm_idx_++; tree_handles_[cur_level_].current_block_read_handle().end_prefetched_row_idx_++; - } else if (OB_FAIL(prefetch_block_data(block_info, micro_data_handles_[prefetch_micro_idx]))) { + } else if (OB_FAIL(prefetch_data_block( + micro_data_prewarm_idx_, + block_info, + micro_data_handles_[prefetch_micro_idx]))) { LOG_WARN("fail to prefetch_block_data", K(ret), K(block_info)); } else { prefetched_cnt++; @@ -751,6 +782,10 @@ int ObCGPrefetcher::prewarm() tree_handles_[cur_level_].current_block_read_handle().end_prefetched_row_idx_++; } } + if (OB_SUCC(ret) && multi_io_params_.count() > 0 && + OB_FAIL(prefetch_multi_data_block(micro_data_prewarm_idx_))) { + LOG_WARN("Fail to prefetch multi block", K(ret), K_(micro_data_prewarm_idx), K_(multi_io_params)); + } } if (OB_SUCC(ret) && 0 < prefetched_cnt) { @@ -762,5 +797,60 @@ int ObCGPrefetcher::prewarm() return ret; } +void ObCGIndexPrefetcher::reset() +{ + ObCGPrefetcher::reset(); + use_multi_block_prefetch_ = false; + need_submit_io_ = false; +} + +void ObCGIndexPrefetcher::reuse() +{ + ObCGPrefetcher::reuse(); + use_multi_block_prefetch_ = false; + need_submit_io_ = false; +} + +int ObCGIndexPrefetcher::init( + const int cg_iter_type, + ObSSTable &sstable, + const ObTableIterParam &iter_param, + ObTableAccessContext &access_ctx) +{ + int ret = OB_SUCCESS; + if (OB_FAIL(ObCGPrefetcher::init(cg_iter_type, sstable, iter_param, access_ctx))) { + LOG_WARN("Fail to init cg prefetcher", K(ret)); + } else { + use_multi_block_prefetch_ = false; + need_submit_io_ = false; + } + return ret; +} + +int ObCGIndexPrefetcher::switch_context( + const int cg_iter_type, + ObSSTable &sstable, + const ObTableIterParam &iter_param, + ObTableAccessContext &access_ctx) +{ + int ret = OB_SUCCESS; + if (OB_FAIL(ObCGPrefetcher::switch_context(cg_iter_type, sstable, iter_param, access_ctx))) { + LOG_WARN("Fail to switch context for cg prefetcher", K(ret)); + } else { + use_multi_block_prefetch_ = false; + need_submit_io_ = false; + } + return ret; +} + +int ObCGIndexPrefetcher::prefetch() +{ + int ret = OB_SUCCESS; + if (OB_FAIL(ObIndexTreeMultiPassPrefetcher::prefetch())) { + LOG_WARN("Fail to prefetch", K(ret)); + } + return ret; +} + } } diff --git a/src/storage/column_store/ob_cg_prefetcher.h b/src/storage/column_store/ob_cg_prefetcher.h index 20432e829..b24b612e8 100644 --- a/src/storage/column_store/ob_cg_prefetcher.h +++ b/src/storage/column_store/ob_cg_prefetcher.h @@ -24,6 +24,7 @@ public: ObCGPrefetcher() : is_reverse_scan_(false), is_project_without_filter_(false), + need_prewarm_(false), cg_iter_type_(-1), query_index_range_(), query_range_(), @@ -36,8 +37,8 @@ public: {} virtual ~ObCGPrefetcher() {} - virtual void reset() override final; - virtual void reuse() override final; + virtual void reset() override; + virtual void reuse() override; int init( const int cg_iter_type, ObSSTable &sstable, @@ -49,7 +50,7 @@ public: const ObTableIterParam &iter_param, ObTableAccessContext &access_ctx); int locate(const ObCSRange &range, const ObCGBitmap *bitmap); - virtual int prefetch() override final; + virtual int prefetch() override; OB_INLINE bool is_empty_range() const { return 0 == micro_data_prefetch_idx_ && is_prefetch_end_; } virtual bool read_wait() override final @@ -76,7 +77,7 @@ public: void set_cg_agg_cells(ObCGAggCells &cg_agg_cells) { cg_agg_cells_ = &cg_agg_cells; } void set_project_type(const bool project_without_filter) { is_project_without_filter_ = project_without_filter; } INHERIT_TO_STRING_KV("ObCGPrefetcher", ObIndexTreeMultiPassPrefetcher, - K_(is_reverse_scan), K_(is_project_without_filter), + K_(is_reverse_scan), K_(is_project_without_filter), K_(need_prewarm), K_(query_index_range), K_(query_range), K_(cg_iter_type), K_(micro_data_prewarm_idx), K_(cur_micro_data_read_idx), KP_(filter_bitmap), KP_(cg_agg_cells), KP_(sstable_index_filter)); @@ -111,6 +112,7 @@ private: private: bool is_reverse_scan_; bool is_project_without_filter_; + bool need_prewarm_; int16_t cg_iter_type_; ObStorageDatum datums_[2]; ObCSRange query_index_range_; @@ -124,6 +126,28 @@ public: ObSSTableIndexFilter *sstable_index_filter_; }; +// ObCGIndexPrefetcher only prefetch micro index info, skip data block +// 1. disable submit disk io for data block +// 2. disable multi block prefetch +// 3. disable prewarm data block +class ObCGIndexPrefetcher : public ObCGPrefetcher +{ +public: + virtual void reset() override final; + virtual void reuse() override final; + int init( + const int cg_iter_type, + ObSSTable &sstable, + const ObTableIterParam &iter_param, + ObTableAccessContext &access_ctx); + int switch_context( + const int cg_iter_type, + ObSSTable &sstable, + const ObTableIterParam &iter_param, + ObTableAccessContext &access_ctx); + virtual int prefetch() override final; +}; + } } #endif // OCEANBASE_STORAGE_COLUMN_STORE_OB_CG_PREFETCHER_H_ diff --git a/src/storage/column_store/ob_cg_scanner.cpp b/src/storage/column_store/ob_cg_scanner.cpp index 164be89ce..edf3b8a67 100644 --- a/src/storage/column_store/ob_cg_scanner.cpp +++ b/src/storage/column_store/ob_cg_scanner.cpp @@ -484,15 +484,15 @@ int ObCGRowScanner::init( // TODO: remove these later } else if (OB_ISNULL(buf = access_ctx.stmt_allocator_->alloc(sizeof(char *) * sql_batch_size))) { ret = common::OB_ALLOCATE_MEMORY_FAILED; - LOG_WARN("fail to alloc row_ids", K(ret), K(sql_batch_size)); + LOG_WARN("fail to alloc cell data", K(ret), K(sql_batch_size)); } else if (FALSE_IT(cell_data_ptrs_ = reinterpret_cast(buf))) { - } else if (OB_ISNULL(buf = access_ctx.stmt_allocator_->alloc(sizeof(int64_t) * sql_batch_size))) { + } else if (OB_ISNULL(buf = access_ctx.stmt_allocator_->alloc(sizeof(int32_t) * sql_batch_size))) { ret = common::OB_ALLOCATE_MEMORY_FAILED; LOG_WARN("fail to alloc row_ids", K(ret), K(sql_batch_size)); + } else if (FALSE_IT(row_ids_ = reinterpret_cast(buf))) { } else if (OB_ISNULL(len_array_buf = access_ctx.stmt_allocator_->alloc(sizeof(uint32_t) * sql_batch_size))) { ret = common::OB_ALLOCATE_MEMORY_FAILED; LOG_WARN("fail to alloc len_array_buf", K(ret), K(sql_batch_size)); - } else if (FALSE_IT(row_ids_ = reinterpret_cast(buf))) { } else if (FALSE_IT(len_array_ = reinterpret_cast(len_array_buf))) { } else if (!iter_param.enable_pd_aggregate()) { bool need_padding = common::is_pad_char_to_full_length(access_ctx.sql_mode_); diff --git a/src/storage/column_store/ob_cg_scanner.h b/src/storage/column_store/ob_cg_scanner.h index 38a6a8add..40dfccdab 100644 --- a/src/storage/column_store/ob_cg_scanner.h +++ b/src/storage/column_store/ob_cg_scanner.h @@ -47,7 +47,7 @@ public: virtual int switch_context( const ObTableIterParam &iter_param, ObTableAccessContext &access_ctx, - ObSSTableWrapper &wrapper) override final; + ObSSTableWrapper &wrapper) override; virtual void reset() override; virtual void reuse() override; virtual int locate( @@ -148,7 +148,7 @@ private: virtual int inner_fetch_rows(const int64_t row_cap, const int64_t datum_offset); protected: - int64_t *row_ids_; + int32_t *row_ids_; uint32_t *len_array_; // for projection in vectorize, need to remove later const char **cell_data_ptrs_; diff --git a/src/storage/column_store/ob_cg_tile_scanner.cpp b/src/storage/column_store/ob_cg_tile_scanner.cpp index f6fad02c7..f2d2290b6 100644 --- a/src/storage/column_store/ob_cg_tile_scanner.cpp +++ b/src/storage/column_store/ob_cg_tile_scanner.cpp @@ -59,7 +59,8 @@ int ObCGTileScanner::init( LOG_WARN("Unexpected cg scanner", K(ret), K(cg_scanner->get_type())); } else if (OB_FAIL(cg_scanners_.push_back(cg_scanner))) { LOG_WARN("Fail to push back cg scanner", K(ret), K(i), KPC(iter_param)); - } else if (ObICGIterator::OB_CG_ROW_SCANNER == cg_scanner->get_type()) { + } else if (ObICGIterator::OB_CG_ROW_SCANNER == cg_scanner->get_type() || + ObICGIterator::OB_CG_GROUP_BY_SCANNER == cg_scanner->get_type()) { static_cast(cg_scanner)->set_project_type(project_without_filter); } } @@ -113,7 +114,8 @@ int ObCGTileScanner::switch_context( } else if (OB_FAIL(cg_scanner->switch_context( cg_param, access_ctx, cg_wrapper))) { LOG_WARN("Fail to switch context for cg iter", K(ret)); - } else if (ObICGIterator::OB_CG_ROW_SCANNER == cg_scanner->get_type()) { + } else if (ObICGIterator::OB_CG_ROW_SCANNER == cg_scanner->get_type() || + ObICGIterator::OB_CG_GROUP_BY_SCANNER == cg_scanner->get_type()) { static_cast(cg_scanner)->set_project_type(project_without_filter); } } diff --git a/src/storage/column_store/ob_co_sstable_row_scanner.cpp b/src/storage/column_store/ob_co_sstable_row_scanner.cpp index 49cbf341a..13d85482b 100644 --- a/src/storage/column_store/ob_co_sstable_row_scanner.cpp +++ b/src/storage/column_store/ob_co_sstable_row_scanner.cpp @@ -38,6 +38,7 @@ ObCOSSTableRowScanner::ObCOSSTableRowScanner() blockscan_state_(MAX_STATE), group_by_project_idx_(0), group_size_(0), + batch_size_(1), column_group_cnt_(-1), current_(OB_INVALID_CS_ROW_ID), end_(OB_INVALID_CS_ROW_ID), @@ -104,6 +105,7 @@ int ObCOSSTableRowScanner::init( is_sstable_iter_ = true; iter_param_ = ¶m; access_ctx_ = &context; + batch_size_ = param.get_storage_rowsets_size(); reverse_scan_ = context.query_flag_.is_reverse_scan(); batched_row_store_ = static_cast(context.block_row_store_); block_row_store_ = context.block_row_store_; @@ -131,6 +133,7 @@ void ObCOSSTableRowScanner::reset() current_ = OB_INVALID_CS_ROW_ID; end_ = OB_INVALID_CS_ROW_ID; group_size_ = 0; + batch_size_ = 1; reverse_scan_ = false; state_ = BEGIN; blockscan_state_ = MAX_STATE; @@ -161,6 +164,7 @@ void ObCOSSTableRowScanner::reuse() current_ = OB_INVALID_CS_ROW_ID; end_ = OB_INVALID_CS_ROW_ID; group_size_ = 0; + batch_size_ = 1; reverse_scan_ = false; state_ = BEGIN; blockscan_state_ = MAX_STATE; @@ -342,7 +346,8 @@ int ObCOSSTableRowScanner::init_project_iter( LOG_WARN("Failed to cg scan", K(ret)); } else { project_iter_ = cg_scanner; - if (ObICGIterator::OB_CG_ROW_SCANNER == cg_scanner->get_type()) { + if (ObICGIterator::OB_CG_ROW_SCANNER == cg_scanner->get_type() || + ObICGIterator::OB_CG_GROUP_BY_SCANNER == cg_scanner->get_type()) { static_cast(cg_scanner)->set_project_type(nullptr == rows_filter_); } } @@ -882,13 +887,13 @@ int ObCOSSTableRowScanner::get_next_group_size(const ObCSRowId begin, int64_t &g ret = OB_ERR_UNEXPECTED; LOG_WARN("Unexpected rowid", K(begin), K(end_)); } else { - group_size = MIN(OB_CS_SCAN_GROUP_SIZE, begin - end_ + 1); + group_size = MIN(batch_size_, begin - end_ + 1); } } else if (begin > end_) { ret = OB_ERR_UNEXPECTED; LOG_WARN("Unexpected rowid", K(begin), K(end_)); } else { - group_size = MIN(OB_CS_SCAN_GROUP_SIZE, end_ - begin + 1); + group_size = MIN(batch_size_, end_ - begin + 1); } return ret; } @@ -944,42 +949,36 @@ int ObCOSSTableRowScanner::filter_group_by_rows() { int ret = OB_SUCCESS; const ObCGBitmap *result_bitmap = nullptr; - ObICGIterator *group_by_iter = nullptr; ObICGGroupByProcessor *group_by_processor = group_by_iters_.at(0); - if (OB_ISNULL(group_by_iter = dynamic_cast(group_by_processor))) { - ret = OB_ERR_UNEXPECTED; - LOG_WARN("Unexpected null group_by_iter", K(ret), KPC(group_by_processor)); - } else { - while(OB_SUCC(ret)) { - if (can_forward_row_scanner() && - OB_FAIL(row_scanner_->forward_blockscan(end_, blockscan_state_, current_))) { - LOG_WARN("Fail to forward blockscan border", K(ret)); - } else if (end_of_scan()) { - LOG_DEBUG("cur scan finished, update state", K(blockscan_state_), K(state_), KPC(this)); - ret = OB_ITER_END; - } else if (OB_FAIL(group_by_iter->locate(ObCSRange(current_, end_ - current_ + 1)))) { - LOG_WARN("Failed to locate", K(ret)); - } else if (OB_FAIL(group_by_processor->decide_group_size(group_size_))) { - LOG_WARN("Failed to decide group size", K(ret)); - } else if (nullptr != rows_filter_) { - if (OB_FAIL(rows_filter_->apply(ObCSRange(current_, group_size_)))) { - LOG_WARN("Fail to apply rows filter", K(ret), K(current_), K(group_size_)); - } else if (OB_ISNULL(result_bitmap = rows_filter_->get_result_bitmap())) { - ret = OB_ERR_UNEXPECTED; - LOG_WARN("Unexpected result bitmap", K(ret), KPC(rows_filter_)); - } else { - EVENT_ADD(ObStatEventIds::PUSHDOWN_STORAGE_FILTER_ROW_CNT, result_bitmap->popcnt()); - if (result_bitmap->is_all_false()) { - update_current(group_size_); - continue; - } - } + while(OB_SUCC(ret)) { + if (can_forward_row_scanner() && + OB_FAIL(row_scanner_->forward_blockscan(end_, blockscan_state_, current_))) { + LOG_WARN("Fail to forward blockscan border", K(ret)); + } else if (end_of_scan()) { + LOG_DEBUG("cur scan finished, update state", K(blockscan_state_), K(state_), KPC(this)); + ret = OB_ITER_END; + } else if (OB_FAIL(group_by_processor->locate_micro_index(ObCSRange(current_, end_ - current_ + 1)))) { + LOG_WARN("Failed to locate", K(ret)); + } else if (OB_FAIL(group_by_processor->decide_group_size(group_size_))) { + LOG_WARN("Failed to decide group size", K(ret)); + } else if (nullptr != rows_filter_) { + if (OB_FAIL(rows_filter_->apply(ObCSRange(current_, group_size_)))) { + LOG_WARN("Fail to apply rows filter", K(ret), K(current_), K(group_size_)); + } else if (OB_ISNULL(result_bitmap = rows_filter_->get_result_bitmap())) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("Unexpected result bitmap", K(ret), KPC(rows_filter_)); } else { - EVENT_ADD(ObStatEventIds::PUSHDOWN_STORAGE_FILTER_ROW_CNT, group_size_); - } - if (OB_SUCC(ret)) { - break; + EVENT_ADD(ObStatEventIds::PUSHDOWN_STORAGE_FILTER_ROW_CNT, result_bitmap->popcnt()); + if (result_bitmap->is_all_false()) { + update_current(group_size_); + continue; + } } + } else { + EVENT_ADD(ObStatEventIds::PUSHDOWN_STORAGE_FILTER_ROW_CNT, group_size_); + } + if (OB_SUCC(ret)) { + break; } } if (OB_SUCC(ret) && OB_FAIL(project_iter_->locate( diff --git a/src/storage/column_store/ob_co_sstable_row_scanner.h b/src/storage/column_store/ob_co_sstable_row_scanner.h index f944ddc13..903d74032 100644 --- a/src/storage/column_store/ob_co_sstable_row_scanner.h +++ b/src/storage/column_store/ob_co_sstable_row_scanner.h @@ -74,6 +74,7 @@ public: K_(current), K_(end), K_(group_size), + K_(batch_size), K_(reverse_scan), K_(state), K_(blockscan_state), @@ -174,6 +175,7 @@ private: BlockScanState blockscan_state_; int32_t group_by_project_idx_; int64_t group_size_; + int64_t batch_size_; int64_t column_group_cnt_; ObCSRowId current_; ObCSRowId end_; diff --git a/src/storage/column_store/ob_co_sstable_rows_filter.cpp b/src/storage/column_store/ob_co_sstable_rows_filter.cpp index c05310773..084c57d86 100644 --- a/src/storage/column_store/ob_co_sstable_rows_filter.cpp +++ b/src/storage/column_store/ob_co_sstable_rows_filter.cpp @@ -28,7 +28,7 @@ ObCOSSTableRowsFilter::ObCOSSTableRowsFilter() prepared_(false), subtree_filter_iter_to_locate_(0), subtree_filter_iter_to_filter_(0), - batch_size_(OB_CS_SCAN_GROUP_SIZE), + batch_size_(1), iter_param_(nullptr), access_ctx_(nullptr), co_sstable_(nullptr), @@ -69,6 +69,7 @@ int ObCOSSTableRowsFilter::init( } else { iter_param_ = ¶m; allocator_ = context.stmt_allocator_; + batch_size_ = param.get_storage_rowsets_size(); if (nullptr != param.pushdown_filter_ && OB_FAIL(rewrite_filter(depth))) { LOG_WARN("Failed rewriter filter", K(ret), KPC(block_row_store), KPC_(filter)); } else if (nullptr != context.sample_filter_ @@ -139,6 +140,7 @@ int ObCOSSTableRowsFilter::switch_context( } else { iter_param_ = ¶m; access_ctx_ = &context; + batch_size_ = param.get_storage_rowsets_size(); common::ObSEArray iter_params; for (int64_t i = 0; i < iter_filter_node_.count(); i++) { sql::ObPushdownFilterExecutor *filter = iter_filter_node_.at(i); @@ -164,7 +166,7 @@ void ObCOSSTableRowsFilter::reset() prepared_ = false; subtree_filter_iter_to_locate_ = 0; subtree_filter_iter_to_filter_ = 0; - batch_size_ = OB_CS_SCAN_GROUP_SIZE; + batch_size_ = 1; iter_param_ = nullptr; access_ctx_ = nullptr; co_sstable_ = nullptr; @@ -502,7 +504,7 @@ int ObCOSSTableRowsFilter::judge_whether_use_common_cg_iter( } else if (0 == i) { status = children[i]->get_status(); } else { - status = merge_common_filter_tree_status(status, children[i]->get_status()); + status = (sql::ObCommonFilterTreeStatus)(filter_tree_merge_status[status][children[i]->get_status()]); } } if (OB_SUCC(ret)) { @@ -524,39 +526,39 @@ int ObCOSSTableRowsFilter::transform_filter_tree( sql::ObPushdownFilterExecutor &filter) { int ret = OB_SUCCESS; - sql::ObPushdownFilterExecutor **children = filter.get_childs(); ObSEArray tmp_filter_indexes; - uint32_t pos = 0; + common::ObSEArray common_cg_ids; + common::ObSEArray common_cg_exprs; int64_t base_filter_idx = 0; - const uint32_t origin_child_count = filter.get_child_count(); while (OB_SUCC(ret)) { sql::ObPushdownFilterExecutor *common_filter_executor = nullptr; - common::ObIArray *common_col_group_ids = nullptr; - common::ObIArray *common_col_exprs = nullptr; - if (OB_FAIL(find_common_sub_filter_tree(filter, tmp_filter_indexes, common_col_group_ids, - common_col_exprs, base_filter_idx))) { + if (OB_FAIL(find_common_sub_filter_tree(filter, + tmp_filter_indexes, + common_cg_ids, + common_cg_exprs, + base_filter_idx))) { LOG_WARN("Failed to find common sub filter tree", K(ret), K(base_filter_idx)); - } else if (origin_child_count == tmp_filter_indexes.count()) { - common_filter_executor = &filter; - } else if (1 < tmp_filter_indexes.count() && - OB_FAIL(filter.pull_up_common_node(tmp_filter_indexes, common_filter_executor))) { - LOG_WARN("Failed to pull up common node", K(ret), K(tmp_filter_indexes)); - } - if (OB_SUCC(ret)) { - if (1 < tmp_filter_indexes.count()) { + } else if (1 < tmp_filter_indexes.count()) { + if (filter.get_child_count() == tmp_filter_indexes.count()) { + common_filter_executor = &filter; + } else if (OB_FAIL(filter.pull_up_common_node(tmp_filter_indexes, common_filter_executor))) { + LOG_WARN("Failed to pull up common node", K(ret), K(tmp_filter_indexes)); + } + + if (OB_SUCC(ret)) { if (OB_ISNULL(common_filter_executor)) { ret = OB_ERR_UNEXPECTED; LOG_WARN("Unexpected null common_filter_executor", K(ret)); - } else if (OB_FAIL(common_filter_executor->set_cg_param(*common_col_group_ids, common_col_exprs))) { + } else if (OB_FAIL(common_filter_executor->set_cg_param(common_cg_ids, common_cg_exprs))) { LOG_WARN("Failed to set cg param to filter", K(ret), KPC(common_filter_executor), - KP(common_col_group_ids), KP(common_col_exprs)); + K(common_cg_ids), K(common_cg_exprs)); } } - if (OB_SUCC(ret)) { - ++base_filter_idx; - if (common_filter_executor == &filter || base_filter_idx >= filter.get_child_count()) { - break; - } + } + if (OB_SUCC(ret)) { + ++base_filter_idx; + if (common_filter_executor == &filter || base_filter_idx >= filter.get_child_count()) { + break; } } } @@ -566,30 +568,37 @@ int ObCOSSTableRowsFilter::transform_filter_tree( int ObCOSSTableRowsFilter::find_common_sub_filter_tree( sql::ObPushdownFilterExecutor &filter, ObIArray &filter_indexes, - common::ObIArray *&common_col_group_ids, - common::ObIArray *&common_col_exprs, + common::ObIArray &common_cg_ids, + common::ObIArray &common_cg_exprs, const int64_t base_filter_idx) { int ret = OB_SUCCESS; filter_indexes.reuse(); + common_cg_ids.reuse(); + common_cg_exprs.reuse(); sql::ObPushdownFilterExecutor **children_filters = filter.get_childs(); sql::ObPushdownFilterExecutor *base_filter = children_filters[base_filter_idx]; sql::ObCommonFilterTreeStatus prev_status = base_filter->get_status(); + const common::ObIArray &base_cg_ids = base_filter->get_cg_idxs(); + const common::ObIArray *base_cg_exprs = base_filter->get_cg_col_exprs(); const uint32_t child_count = filter.get_child_count(); - common_col_group_ids = &(base_filter->get_cg_idxs()); - common_col_exprs = base_filter->get_cg_col_exprs(); - if (OB_FAIL(filter_indexes.push_back(base_filter_idx))) { + if (OB_FAIL(common_cg_ids.assign(base_cg_ids))) { + LOG_WARN("Failed to assign common cg ids", K(ret)); + } else if (nullptr != base_cg_exprs && OB_FAIL(common_cg_exprs.assign(*base_cg_exprs))) { + LOG_WARN("Failed to assign common cg exprs", K(ret)); + } else if (OB_FAIL(filter_indexes.push_back(base_filter_idx))) { LOG_WARN("Failed to push back filters", K(ret), K(base_filter_idx), K(filter_indexes)); } else { + bool is_common = false; for (uint32_t i = base_filter_idx + 1; OB_SUCC(ret) && i < child_count; ++i) { if (is_common_filter_tree_status(prev_status, children_filters[i]->get_status())) { - bool is_common = true; if (OB_FAIL(assign_common_col_groups( - children_filters[i], - common_col_group_ids, - common_col_exprs, - is_common))) { - LOG_WARN("Failed to assign common col groups", K(ret), KP(common_col_group_ids)); + children_filters[i], + prev_status, + common_cg_ids, + common_cg_exprs, + is_common))) { + LOG_WARN("Failed to assign common col groups", K(ret), KPC(base_filter), KPC(children_filters[i])); } else if (is_common) { prev_status = merge_common_filter_tree_status(prev_status, children_filters[i]->get_status()); if (OB_FAIL(filter_indexes.push_back(i))) { @@ -699,39 +708,50 @@ int ObCOSSTableRowsFilter::prepare_bitmap_buffer( } int ObCOSSTableRowsFilter::assign_common_col_groups( - sql::ObPushdownFilterExecutor *filter, - common::ObIArray *&common_col_group_ids, - common::ObIArray *&common_cg_col_exprs, + const sql::ObPushdownFilterExecutor *filter, + const sql::ObCommonFilterTreeStatus prev_status, + common::ObIArray &common_cg_ids, + common::ObIArray &common_cg_exprs, bool &is_common) { // Asssert that there is no duplication in col_group_idxs. int ret = OB_SUCCESS; - is_common = true; if (OB_UNLIKELY(nullptr == filter || !filter->is_cg_param_valid())) { ret = OB_INVALID_ARGUMENT; LOG_WARN("Invalid argument", K(ret), KPC(filter)); - } else if (OB_UNLIKELY(nullptr == common_col_group_ids || - nullptr == common_cg_col_exprs || - common_col_group_ids->empty() || - common_col_group_ids->count() < common_cg_col_exprs->count())) { - ret = OB_INVALID_ARGUMENT; - LOG_WARN("Invalid argument", KPC(common_col_group_ids), KPC(common_cg_col_exprs)); } else { - common::ObIArray &cur_col_group_ids = filter->get_cg_idxs(); - common::ObIArray *cur_cg_col_exprs = filter->get_cg_col_exprs(); + is_common = false; + const common::ObIArray &cur_cg_ids = filter->get_cg_idxs(); + const common::ObIArray *cur_cg_exprs = filter->get_cg_col_exprs(); ObSEArray tmp_array; - const common::ObIArray &longer_array = cur_col_group_ids.count() > common_col_group_ids->count() - ? cur_col_group_ids : *common_col_group_ids; - const common::ObIArray &shorter_array = cur_col_group_ids.count() > common_col_group_ids->count() - ? *common_col_group_ids : cur_col_group_ids; + const common::ObIArray &longer_array = cur_cg_ids.count() > common_cg_ids.count() + ? cur_cg_ids : common_cg_ids; + const common::ObIArray &shorter_array = cur_cg_ids.count() > common_cg_ids.count() + ? common_cg_ids : cur_cg_ids; if (OB_FAIL(common::get_difference(shorter_array, longer_array, tmp_array))) { - LOG_WARN("Failed to get difference", K(ret), KP(common_col_group_ids), - K(cur_col_group_ids)); - } else if (!tmp_array.empty()) { - is_common = false; - } else if (cur_col_group_ids.count() > common_col_group_ids->count()) { - common_col_group_ids = &cur_col_group_ids; - common_cg_col_exprs = cur_cg_col_exprs; + LOG_WARN("Failed to get difference", K(ret), K(common_cg_ids), K(cur_cg_ids)); + } else if (tmp_array.empty()) { + is_common = true; + if (cur_cg_ids.count() > common_cg_ids.count()) { + if (OB_FAIL(common_cg_ids.assign(cur_cg_ids))) { + LOG_WARN("Fail to assign cg ids", K(ret)); + } else if (nullptr != cur_cg_exprs && OB_FAIL(common_cg_exprs.assign(*cur_cg_exprs))) { + LOG_WARN("Fail to assign cg exprs", K(ret)); + } + } + } else if (prev_status > sql::ObCommonFilterTreeStatus::WHITE && + filter->get_status() > sql::ObCommonFilterTreeStatus::WHITE && + tmp_array.count() < shorter_array.count()) { + is_common = true; + for (int64_t i = 0; OB_SUCC(ret) && i < cur_cg_ids.count(); i++) { + if (!is_contain(common_cg_ids, cur_cg_ids.at(i))) { + if (OB_FAIL(common_cg_ids.push_back(cur_cg_ids.at(i)))) { + LOG_WARN("Fail to push back cg idx", K(ret)); + } else if (nullptr != cur_cg_exprs && OB_FAIL(common_cg_exprs.push_back(cur_cg_exprs->at(i)))) { + LOG_WARN("Fail to push back cg expr", K(ret)); + } + } + } } } return ret; @@ -757,15 +777,26 @@ sql::ObCommonFilterTreeStatus ObCOSSTableRowsFilter::merge_common_filter_tree_st { sql::ObCommonFilterTreeStatus ret = sql::ObCommonFilterTreeStatus::NONE_FILTER; if (is_common_filter_tree_status(status_one, status_two)) { - ret = sql::ObCommonFilterTreeStatus::SINGLE_BLACK == status_one ? - status_two : status_one; + ret = (sql::ObCommonFilterTreeStatus)(filter_tree_merge_status[status_one][status_two]); } return ret; } +/* + * filter1 filter2 is_common status + * NOP NOP O NONE_FILTER + * WHITE WHITE X WHITE + * WHITE SINGLE_BLACK X SINGLE_BLACK + * WHITE MULTI_BLACK O NONE_FILTER + * SINGLE_BLACK SINGLE_BLACK X SINGLE_BLACK + * SINGLE_BLACK MULTI_BLACK X MULTI_BLACK + * MULTI_BLACK MULTI_BLACK X MULTI_BLACK + * + * merge condition: cg idxs is contained by the other + */ bool ObCOSSTableRowsFilter::is_common_filter_tree_status( - sql::ObCommonFilterTreeStatus status_one, - sql::ObCommonFilterTreeStatus status_two) + const sql::ObCommonFilterTreeStatus status_one, + const sql::ObCommonFilterTreeStatus status_two) { bool ret = true; if (sql::ObCommonFilterTreeStatus::NONE_FILTER == status_one diff --git a/src/storage/column_store/ob_co_sstable_rows_filter.h b/src/storage/column_store/ob_co_sstable_rows_filter.h index 615866479..56e27b48c 100644 --- a/src/storage/column_store/ob_co_sstable_rows_filter.h +++ b/src/storage/column_store/ob_co_sstable_rows_filter.h @@ -30,7 +30,14 @@ class ObCOSSTableV2; class ObCOSSTableRowsFilter { public: - static const uint32_t MAX_NUM_OF_CG_ITER_TO_LOCATE_IN_ADVANCE = 2; + static constexpr uint32_t MAX_NUM_OF_CG_ITER_TO_LOCATE_IN_ADVANCE = 2; + static constexpr uint8_t filter_tree_merge_status[sql::ObCommonFilterTreeStatus::MAX_STATUS][sql::ObCommonFilterTreeStatus::MAX_STATUS] = + { {0, 0, 0, 0}, \ + {0, 1, 2, 0}, \ + {0, 2, 2, 3}, \ + {0, 0, 3, 3} + }; + public: ObCOSSTableRowsFilter(); ~ObCOSSTableRowsFilter(); @@ -84,8 +91,8 @@ private: int find_common_sub_filter_tree( sql::ObPushdownFilterExecutor &filter, ObIArray &filter_indexes, - common::ObIArray *&common_col_group_ids, - common::ObIArray *&common_col_exprs, + common::ObIArray &common_cg_ids, + common::ObIArray &common_cg_exprs, const int64_t base_filter_idx); int rewrite_filter_tree( sql::ObPushdownFilterExecutor *filter, @@ -104,16 +111,17 @@ private: void adjust_batch_size(); OB_INLINE ObCGBitmap* get_child_bitmap(uint32_t depth); static int assign_common_col_groups( - sql::ObPushdownFilterExecutor *filter, - common::ObIArray *&common_col_group_ids, - common::ObIArray *&common_cg_col_exprs, + const sql::ObPushdownFilterExecutor *filter, + const sql::ObCommonFilterTreeStatus prev_status, + common::ObIArray &common_cg_ids, + common::ObIArray &common_cg_exprs, bool &is_common); static sql::ObCommonFilterTreeStatus merge_common_filter_tree_status( - sql::ObCommonFilterTreeStatus status_one, - sql::ObCommonFilterTreeStatus status_two); + const sql::ObCommonFilterTreeStatus status_one, + const sql::ObCommonFilterTreeStatus status_two); static bool is_common_filter_tree_status( - sql::ObCommonFilterTreeStatus status_one, - sql::ObCommonFilterTreeStatus status_two); + const sql::ObCommonFilterTreeStatus status_one, + const sql::ObCommonFilterTreeStatus status_two); static void set_status_of_filter_tree(sql::ObPushdownFilterExecutor *filter); static void clear_filter_state(sql::ObPushdownFilterExecutor *filter); private: diff --git a/src/storage/column_store/ob_column_store_util.h b/src/storage/column_store/ob_column_store_util.h index fbfec2506..6eaf19a9e 100644 --- a/src/storage/column_store/ob_column_store_util.h +++ b/src/storage/column_store/ob_column_store_util.h @@ -26,7 +26,6 @@ struct ObTableAccessParam; typedef int64_t ObCSRowId; const ObCSRowId OB_INVALID_CS_ROW_ID = -1; -const int64_t OB_CS_SCAN_GROUP_SIZE = 8192; const uint32_t OB_CS_INVALID_CG_IDX = INT32_MAX; const uint32_t OB_CS_VIRTUAL_CG_IDX = INT32_MAX - 1; diff --git a/src/storage/column_store/ob_i_cg_iterator.h b/src/storage/column_store/ob_i_cg_iterator.h index 7c39aeff8..0f4e3d106 100644 --- a/src/storage/column_store/ob_i_cg_iterator.h +++ b/src/storage/column_store/ob_i_cg_iterator.h @@ -151,6 +151,8 @@ public: * is_group_by_col: current column is group by column? */ virtual int calc_aggregate(const bool is_group_by_col) = 0; + + virtual int locate_micro_index(const ObCSRange &range) = 0; DECLARE_PURE_VIRTUAL_TO_STRING; }; diff --git a/src/storage/column_store/ob_virtual_cg_scanner.h b/src/storage/column_store/ob_virtual_cg_scanner.h index 21ec2a75f..32c50fd52 100644 --- a/src/storage/column_store/ob_virtual_cg_scanner.h +++ b/src/storage/column_store/ob_virtual_cg_scanner.h @@ -144,6 +144,8 @@ public: virtual int read_distinct(const int32_t group_by_col) override; virtual int read_reference(const int32_t group_by_col) override; virtual int calc_aggregate(const bool is_group_by_col) override; + virtual int locate_micro_index(const ObCSRange &range) override + { return locate(range, nullptr); } INHERIT_TO_STRING_KV("ObDefaultCGScanner", ObDefaultCGScanner, KPC_(output_exprs), K_(group_by_agg_idxs), KP_(group_by_cell)); private: diff --git a/src/storage/ob_i_store.h b/src/storage/ob_i_store.h index 12f4137d7..b2f3edb00 100644 --- a/src/storage/ob_i_store.h +++ b/src/storage/ob_i_store.h @@ -142,6 +142,8 @@ public: } return ret; } + OB_INLINE int cap() const + { return capacity_; } using common::ObFixedArrayImpl::reset; protected: using common::ObFixedArrayImpl::prepare_allocate; diff --git a/tools/deploy/mysql_test/test_suite/inner_table/r/mysql/all_virtual_sys_parameter_stat.result b/tools/deploy/mysql_test/test_suite/inner_table/r/mysql/all_virtual_sys_parameter_stat.result index 02418a40d..8f22e3b46 100644 --- a/tools/deploy/mysql_test/test_suite/inner_table/r/mysql/all_virtual_sys_parameter_stat.result +++ b/tools/deploy/mysql_test/test_suite/inner_table/r/mysql/all_virtual_sys_parameter_stat.result @@ -224,6 +224,7 @@ standby_db_fetch_log_rpc_timeout standby_db_preferred_upstream_log_region standby_fetch_log_bandwidth_limit storage_meta_cache_priority +storage_rowsets_size strict_check_os_params syslog_compress_func syslog_disk_size @@ -354,6 +355,8 @@ _hidden_sys_tenant_memory _ignore_system_memory_over_limit_error _inlist_rewrite_threshold _io_callback_thread_count +_io_read_batch_size +_io_read_redundant_limit_percentage _iut_enable _iut_max_entries _iut_stat_collection_type diff --git a/unittest/sql/engine/px/CMakeLists.txt b/unittest/sql/engine/px/CMakeLists.txt index 79ea3b92d..a4f48ba3b 100644 --- a/unittest/sql/engine/px/CMakeLists.txt +++ b/unittest/sql/engine/px/CMakeLists.txt @@ -1,2 +1,3 @@ sql_unittest(test_random_affi) #sql_unittest(test_slice_calc) +sql_unittest(test_ob_small_hashset) diff --git a/unittest/sql/engine/px/test_ob_small_hashset.cpp b/unittest/sql/engine/px/test_ob_small_hashset.cpp new file mode 100644 index 000000000..a26108bb4 --- /dev/null +++ b/unittest/sql/engine/px/test_ob_small_hashset.cpp @@ -0,0 +1,261 @@ +/** + * Copyright (c) 2024 OceanBase + * OceanBase CE is licensed under Mulan PubL v2. + * You can use this software according to the terms and conditions of the Mulan PubL v2. + * You may obtain a copy of Mulan PubL v2 at: + * http://license.coscl.org.cn/MulanPubL-2.0 + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, + * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, + * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PubL v2 for more details. + */ + +#include +#include +#include +#define private public +// #define unittest +#define unittest_bloom_filter +#include "src/sql/engine/px/p2p_datahub/ob_small_hashset.h" +#include "lib/hash/ob_hashset.h" +#include "sql/engine/px/ob_px_bloom_filter.h" + +using namespace std; +namespace oceanbase +{ +namespace sql +{ + +static constexpr uint64_t build_count = 4096; + +class SimpleTimer +{ +public: + SimpleTimer() { + cpu_begin_time_ = rdtsc(); + } + ~SimpleTimer() { + uint64_t elapse_time = rdtsc() - cpu_begin_time_; + cout << "elapse time is: " << elapse_time << endl; + } +private: + uint64_t cpu_begin_time_; + +}; + +class SmallHashSetTest : public ::testing::Test +{ +public: + SmallHashSetTest() = default; + virtual ~SmallHashSetTest() = default; + virtual void SetUp(){}; + virtual void TearDown(){}; + + void insert_hash(int64_t insert_count); + void test_hash(int64_t test_count); + void performance_test(); + +public: + ObArenaAllocator alloc_; + ObSmallHashSet accurate_small_set_; + + ObSmallHashSet inaccurate_small_set_; + std::unordered_set std_set_; + hash::ObHashSet ob_set_; + ObPxBloomFilter bloom_filter_; +private: + DISALLOW_COPY_AND_ASSIGN(SmallHashSetTest); +}; + +void SmallHashSetTest::insert_hash(int64_t insert_count) { + constexpr uint64_t rand_upper_bound = 100000; + for (int64_t i = 0; i < insert_count; ++i) { + int64_t num = common::ObRandom::rand(1, rand_upper_bound); + uint64_t hash = murmurhash(&num, 8, 0); + accurate_small_set_.insert_hash(hash); + inaccurate_small_set_.insert_hash(hash); + std_set_.insert(hash); + } +} + +void SmallHashSetTest::test_hash(int64_t test_count) { + constexpr uint64_t rand_upper_bound = 100000; + for (int64_t i = 0; i < test_count; ++i) { + int64_t num = common::ObRandom::rand(1, rand_upper_bound); + uint64_t hash = murmurhash(&num, 8, 0); + bool in_accurate_set = accurate_small_set_.test_hash(hash); + bool in_inaccurate_set = inaccurate_small_set_.test_hash(hash); + bool in_std_set = std_set_.count(hash) != 0; + EXPECT_EQ(in_accurate_set, in_std_set); + EXPECT_GE(in_inaccurate_set, in_accurate_set); + } + +#ifdef unittest + int ret = OB_SUCCESS; + uint64_t accurate_total = accurate_small_set_.seek_total_times_; + uint64_t inaccurate_total = inaccurate_small_set_.seek_total_times_; + double accurate_avg = accurate_small_set_.seek_total_times_ / double(test_count); + double inaccurate_avg = inaccurate_small_set_.seek_total_times_ / double(test_count); + COMMON_LOG(WARN, "avg seek", K(accurate_total), K(inaccurate_total), K(accurate_avg), + K(inaccurate_avg)); +#endif +} + +void SmallHashSetTest::performance_test() +{ + constexpr uint64_t probe_count = 1000000; + std::vector build_hash_values(probe_count, 0); + std::vector probe_hash_values(probe_count, 0); + + accurate_small_set_.clear(); + inaccurate_small_set_.clear(); + std_set_.clear(); + +// ------------------- build test ----------------------- + + for (int64_t i = 0; i < build_count; ++i) { + int64_t num = common::ObRandom::rand(0, UINT64_MAX); + build_hash_values[i] = murmurhash(&num, 8, 0); + } + + { + cout << "Build:: for accurate_small_set, "; + SimpleTimer timer; + for (int64_t i = 0; i < build_count; ++i) { + accurate_small_set_.insert_hash(build_hash_values[i]); + } + } + + { + cout << "Build:: for inaccurate_small_set, "; + SimpleTimer timer; + for (int64_t i = 0; i < build_count; ++i) { + inaccurate_small_set_.insert_hash(build_hash_values[i]); + } + } + + { + cout << "Build:: for std::unordered_set, "; + SimpleTimer timer; + for (int64_t i = 0; i < build_count; ++i) { + std_set_.insert(build_hash_values[i]); + } + } + + { + cout << "Build:: for ob_hash_set, "; + SimpleTimer timer; + for (int64_t i = 0; i < build_count; ++i) { + ob_set_.set_refactored(build_hash_values[i]); + } + } + +#ifdef unittest_bloom_filter + { + cout << "Build:: for bloom filter, "; + SimpleTimer timer; + for (int64_t i = 0; i < build_count; ++i) { + bloom_filter_.put(build_hash_values[i]); + } + } +#endif + +// ------------------- probe test ----------------------- + + for (int64_t i = 0; i < probe_count; ++i) { + int64_t num = common::ObRandom::rand(0, UINT64_MAX); + probe_hash_values[i] = murmurhash(&num, 8, 0); + } + + { + cout << "Probe:: for accurate_small_set, "; + SimpleTimer timer; + for (int64_t i = 0; i < probe_count; ++i) { + accurate_small_set_.test_hash(probe_hash_values[i]); + } + } + + { + cout << "Probe:: for inaccurate_small_set, "; + SimpleTimer timer; + for (int64_t i = 0; i < probe_count; ++i) { + inaccurate_small_set_.test_hash(probe_hash_values[i]); + } + } + + { + cout << "Probe:: for std::unordered_set, "; + SimpleTimer timer; + for (int64_t i = 0; i < probe_count; ++i) { + std_set_.count(probe_hash_values[i]); + } + } + + { + cout << "Probe:: for ob_hash_set, "; + SimpleTimer timer; + for (int64_t i = 0; i < probe_count; ++i) { + ob_set_.exist_refactored(probe_hash_values[i]); + } + } + +#ifdef unittest_bloom_filter + { + cout << "Probe:: for bloom_filter batch, "; + SimpleTimer timer; + int64_t batch_size = 256; + uint64_t round = probe_count / batch_size; + for (int64_t i = 0; i < round; ++i) { + uint64_t offset = i * batch_size; + bloom_filter_.might_contain_batch(probe_hash_values.data() + offset, batch_size); + } + int64_t last_batch_size = probe_count % batch_size; + if (last_batch_size > 0) { + uint64_t offset = round * batch_size; + bloom_filter_.might_contain_batch(probe_hash_values.data() + offset, last_batch_size); + } + } +#endif + +// ------------------- false positive rate ----------------------- + { + int64_t total_count = probe_count; + int64_t error_count = 0; + for (int64_t i = 0; i < probe_count; ++i) { + bool result_inacc = inaccurate_small_set_.test_hash(probe_hash_values[i]); + bool result_std = std_set_.count(probe_hash_values[i]) != 0; + if (result_inacc != result_std) { + error_count++; + } + } + double error_rate = double(error_count) / double(total_count); + cout << "Probe:: the false positive rate is: " << error_rate + << ", error_count: " << error_count + << ", total_count: " << total_count << endl; + } +} + +TEST_F(SmallHashSetTest, test_small_hash_set) +{ + int64_t tenant_id = 1; + accurate_small_set_.init(build_count, tenant_id); + inaccurate_small_set_.init(build_count, tenant_id); + cout << "the small hash set init size is: " << accurate_small_set_.capacity_ << endl; + ob_set_.create(build_count * 4); + + bloom_filter_.init(build_count, alloc_, tenant_id, 0.03, 2147483648); + + insert_hash(4000); + test_hash(100000); + performance_test(); +} + +} // namespace sql +} // namespace oceanbase + +int main(int argc, char **argv) +{ + OB_LOGGER.set_log_level("DEBUG"); + ::testing::InitGoogleTest(&argc, argv); + return RUN_ALL_TESTS(); +} \ No newline at end of file diff --git a/unittest/storage/blocksstable/cs_encoding/ob_cs_encoding_test_base.h b/unittest/storage/blocksstable/cs_encoding/ob_cs_encoding_test_base.h index 871fe6646..0b917ccd7 100644 --- a/unittest/storage/blocksstable/cs_encoding/ob_cs_encoding_test_base.h +++ b/unittest/storage/blocksstable/cs_encoding/ob_cs_encoding_test_base.h @@ -317,7 +317,7 @@ int ObCSEncodingTestBase::check_decode_vector(ObMicroBlockCSDecoder &decoder, sql::ObEvalCtx eval_ctx(exec_context); char *buf = nullptr; if (OB_ISNULL(buf = reinterpret_cast(allocator_.alloc( - row_cnt * (sizeof(char*)/*ptr_arr*/ + sizeof(uint32_t)/*len_arr*/ + sizeof(int64_t)/*row_ids*/))))) { + row_cnt * (sizeof(char*)/*ptr_arr*/ + sizeof(uint32_t)/*len_arr*/ + sizeof(int32_t)/*row_ids*/))))) { ret = OB_ALLOCATE_MEMORY_FAILED; LOG_WARN("fail to allocate", K(ret)); } else { @@ -325,7 +325,7 @@ int ObCSEncodingTestBase::check_decode_vector(ObMicroBlockCSDecoder &decoder, buf += row_cnt * sizeof(char*); uint32_t *len_arr = reinterpret_cast(buf) ; buf += row_cnt * sizeof(uint32_t); - int64_t *row_ids = reinterpret_cast(buf); + int32_t *row_ids = reinterpret_cast(buf); bool need_test_column = true; for (int col_idx = 0; OB_SUCC(ret) && col_idx < ctx_.column_cnt_; col_idx++) { @@ -373,7 +373,7 @@ int ObCSEncodingTestBase::check_decode_vector(ObMicroBlockCSDecoder &decoder, row_cnt, col_meta, vector_format, eval_ctx, col_expr, frame_allocator))) { LOG_WARN("fail to generate_column_output_expr", K(ret), K(vec_tc), K(col_meta), K(vector_format)); } else { - for (int64_t row_idx = 0; row_idx < row_cnt; ++row_idx) { + for (int32_t row_idx = 0; row_idx < row_cnt; ++row_idx) { row_ids[row_idx] = row_idx; } ObVectorDecodeCtx vector_ctx(ptr_arr, len_arr, row_ids, row_cnt, 0, col_expr.get_vector_header(eval_ctx)); @@ -477,10 +477,10 @@ int ObCSEncodingTestBase::check_get_row_count(const ObMicroBlockHeader *header, int ret = OB_SUCCESS; ObMicroBlockData full_transformed_data; ObMicroBlockCSDecoder decoder; - int64_t *row_ids = nullptr; + int32_t *row_ids = nullptr; if (OB_FAIL(init_cs_decoder(header, desc, full_transformed_data, decoder))) { LOG_WARN("fail to init cs_decoder", KR(ret)); - } else if (OB_ISNULL(row_ids = (int64_t*)allocator_.alloc(header->row_count_ * sizeof(int64_t)))) { + } else if (OB_ISNULL(row_ids = (int32_t*)allocator_.alloc(header->row_count_ * sizeof(int32_t)))) { ret = OB_ALLOCATE_MEMORY_FAILED; LOG_WARN("fail to alloc", K(ret), KPC(header)); } else { diff --git a/unittest/storage/blocksstable/cs_encoding/test_decoder_filter_perf.h b/unittest/storage/blocksstable/cs_encoding/test_decoder_filter_perf.h index 342324e58..ea2a68fdc 100644 --- a/unittest/storage/blocksstable/cs_encoding/test_decoder_filter_perf.h +++ b/unittest/storage/blocksstable/cs_encoding/test_decoder_filter_perf.h @@ -1126,8 +1126,8 @@ void TestDecoderFilterPerf::init_encoding_ctx( } \ } else { \ const int64_t cur_column_cnt = full_column_cnt_; \ - int64_t row_ids[SIMPLE_ROW_CNT]; \ - for (int64_t i = 0; i < SIMPLE_ROW_CNT; ++i) { \ + int32_t row_ids[SIMPLE_ROW_CNT]; \ + for (int32_t i = 0; i < SIMPLE_ROW_CNT; ++i) { \ row_ids[i] = i; \ } \ ObSEArray cols; \ diff --git a/unittest/storage/blocksstable/cs_encoding/test_integer_stream.cpp b/unittest/storage/blocksstable/cs_encoding/test_integer_stream.cpp index 18783e5ef..dc4a4a400 100644 --- a/unittest/storage/blocksstable/cs_encoding/test_integer_stream.cpp +++ b/unittest/storage/blocksstable/cs_encoding/test_integer_stream.cpp @@ -125,7 +125,7 @@ public: template static void do_decode_raw_array(const ObStreamData &data, const ObIntegerStreamDecoderCtx &ctx, - const int64_t *row_ids, + const int32_t *row_ids, const int64_t row_count, char *out_buf) { @@ -140,7 +140,7 @@ public: static void decode_raw_array(const ObStreamData &data, const ObIntegerStreamDecoderCtx &ctx, - const int64_t *row_ids, + const int32_t *row_ids, const int64_t row_count, char *out_buf) { @@ -208,8 +208,8 @@ public: // test batch decode int64_t row_id_count = size; - int64_t *row_ids = new int64_t[row_id_count]; - for (int64_t i = 0; i < row_id_count; i++) { + int32_t *row_ids = new int32_t[row_id_count]; + for (int32_t i = 0; i < row_id_count; i++) { row_ids[i] = i; } @@ -335,8 +335,8 @@ public: } // decode batch - int64_t *row_ids = new int64_t[size]; - for (int64_t i = 0; i < size; i++) { + int32_t *row_ids = new int32_t[size]; + for (int32_t i = 0; i < size; i++) { row_ids[i] = i; } ObBaseColumnDecoderCtx base_ctx; @@ -391,7 +391,7 @@ public: datums2[i].ptr_ = (datums2_buf + i * sizeof(uint64_t)); } int64_t random_idx = ObTimeUtility::current_time() % size; - int64_t row_id = 0; + int32_t row_id = 0; for (int64_t i = 0; i < size; i++) { ref_arr[i] = i; row_id = (i + random_idx) % size; diff --git a/unittest/storage/blocksstable/cs_encoding/test_string_stream.cpp b/unittest/storage/blocksstable/cs_encoding/test_string_stream.cpp index 18d901950..b084993a7 100644 --- a/unittest/storage/blocksstable/cs_encoding/test_string_stream.cpp +++ b/unittest/storage/blocksstable/cs_encoding/test_string_stream.cpp @@ -213,8 +213,8 @@ public: } // test batch decode - int64_t *row_ids = new int64_t[size]; - for (int64_t i = 0; i < size; i++) { + int32_t *row_ids = new int32_t[size]; + for (int32_t i = 0; i < size; i++) { row_ids[i] = i; } ObDatum *datums3 = new ObDatum[size]; @@ -273,7 +273,7 @@ public: datums3[i].reset(); } int64_t random_idx = ObTimeUtility::current_time()%size; - int64_t row_id = 0; + int32_t row_id = 0; for (int64_t i = 0; i < size; i++) { row_id = (i + random_idx) % size; row_ids[i] = row_id; diff --git a/unittest/storage/blocksstable/encoding/test_column_decoder.h b/unittest/storage/blocksstable/encoding/test_column_decoder.h index ffcbc9334..54c0a84cf 100644 --- a/unittest/storage/blocksstable/encoding/test_column_decoder.h +++ b/unittest/storage/blocksstable/encoding/test_column_decoder.h @@ -1383,8 +1383,8 @@ void TestColumnDecoder::batch_decode_to_datum_test(bool is_condensed) STORAGE_LOG(INFO, "Current col: ", K(i), K(col_descs_.at(i)), K(row.storage_datums_[i]), K(*decoder.decoders_[col_offset].ctx_)); ObDatum datums[ROW_CNT]; - int64_t row_ids[ROW_CNT]; - for (int64_t j = 0; j < ROW_CNT; ++j) { + int32_t row_ids[ROW_CNT]; + for (int32_t j = 0; j < ROW_CNT; ++j) { datums[j].ptr_ = reinterpret_cast(datum_buf) + j * 128; row_ids[j] = j; } @@ -1889,8 +1889,8 @@ void TestColumnDecoder::batch_decode_to_vector_test( int32_t col_offset = i; LOG_INFO("Current col: ", K(i), K(col_meta), K(*decoder.decoders_[col_offset].ctx_), K(precision), K(vec_tc)); - int64_t row_ids[test_row_cnt]; - int64_t row_id_idx = 0; + int32_t row_ids[test_row_cnt]; + int32_t row_id_idx = 0; for (int64_t datum_idx = 0; datum_idx < ROW_CNT; ++datum_idx) { if (!align_row_id && 0 == datum_idx % 2) { // skip @@ -2001,8 +2001,8 @@ void TestColumnDecoder::col_equal_batch_decode_to_vector_test(const VectorFormat int32_t col_offset = i; LOG_INFO("Current col: ", K(i), K(col_meta), K(*decoder.decoders_[col_offset].ctx_), K(precision), K(vec_tc)); - int64_t row_ids[ROW_CNT]; - for (int64_t datum_idx = 0; datum_idx < ROW_CNT; ++datum_idx) { + int32_t row_ids[ROW_CNT]; + for (int32_t datum_idx = 0; datum_idx < ROW_CNT; ++datum_idx) { row_ids[datum_idx] = datum_idx; } @@ -2124,8 +2124,8 @@ void TestColumnDecoder::col_substr_batch_decode_to_vector_test(const VectorForma int32_t col_offset = i; LOG_INFO("Current col: ", K(i), K(col_meta), K(*decoder.decoders_[col_offset].ctx_), K(precision), K(vec_tc)); - int64_t row_ids[ROW_CNT]; - for (int64_t datum_idx = 0; datum_idx < ROW_CNT; ++datum_idx) { + int32_t row_ids[ROW_CNT]; + for (int32_t datum_idx = 0; datum_idx < ROW_CNT; ++datum_idx) { row_ids[datum_idx] = datum_idx; } @@ -2304,8 +2304,8 @@ int VectorDecodeTestUtil::test_batch_decode_perf( // decode to vector const char *ptr_arr[row_cnt]; uint32_t len_arr[row_cnt]; - int64_t row_ids[row_cnt]; - for (int64_t datum_idx = 0; datum_idx < row_cnt; ++datum_idx) { + int32_t row_ids[row_cnt]; + for (int32_t datum_idx = 0; datum_idx < row_cnt; ++datum_idx) { row_ids[datum_idx] = datum_idx; } ObVectorDecodeCtx decode_ctx(ptr_arr, len_arr, row_ids, row_cnt, 0, col_expr.get_vector_header(eval_ctx)); diff --git a/unittest/storage/blocksstable/encoding/test_column_equal_decoder.cpp b/unittest/storage/blocksstable/encoding/test_column_equal_decoder.cpp index 6228299f2..0a48117d4 100644 --- a/unittest/storage/blocksstable/encoding/test_column_equal_decoder.cpp +++ b/unittest/storage/blocksstable/encoding/test_column_equal_decoder.cpp @@ -97,8 +97,8 @@ TEST_F(TestColumnEqualMicroDecoder, small_uint_with_large_exception) int32_t col_offset = i; LOG_INFO("Current col: ", K(i), K(col_meta), K(*decoder.decoders_[col_offset].ctx_), K(precision), K(vec_tc)); - int64_t row_ids[ROW_CNT]; - for (int64_t datum_idx = 0; datum_idx < ROW_CNT; ++datum_idx) { + int32_t row_ids[ROW_CNT]; + for (int32_t datum_idx = 0; datum_idx < ROW_CNT; ++datum_idx) { row_ids[datum_idx] = datum_idx; } diff --git a/unittest/storage/blocksstable/encoding/test_const_decoder.cpp b/unittest/storage/blocksstable/encoding/test_const_decoder.cpp index 3c2edca48..2bbf47404 100644 --- a/unittest/storage/blocksstable/encoding/test_const_decoder.cpp +++ b/unittest/storage/blocksstable/encoding/test_const_decoder.cpp @@ -91,8 +91,8 @@ void TestConstDecoder::batch_decode_to_vector_no_exc_test(const VectorFormat vec int32_t col_offset = i; LOG_INFO("Current col: ", K(i), K(col_meta), K(*decoder.decoders_[col_offset].ctx_), K(precision), K(vec_tc)); - int64_t row_ids[ROW_CNT]; - for (int64_t datum_idx = 0; datum_idx < ROW_CNT; ++datum_idx) { + int32_t row_ids[ROW_CNT]; + for (int32_t datum_idx = 0; datum_idx < ROW_CNT; ++datum_idx) { row_ids[datum_idx] = datum_idx; } @@ -783,8 +783,8 @@ TEST_F(TestConstDecoder, batch_decode_to_datum_test_without_expection) int32_t col_offset = i; STORAGE_LOG(INFO, "Current col: ", K(i),K(col_descs_.at(i)), K(*decoder.decoders_[col_offset].ctx_)); ObDatum datums[ROW_CNT]; - int64_t row_ids[ROW_CNT]; - for (int64_t j = 0; j < ROW_CNT; ++j) { + int32_t row_ids[ROW_CNT]; + for (int32_t j = 0; j < ROW_CNT; ++j) { datums[j].ptr_ = reinterpret_cast(datum_buf) + j * 128; row_ids[j] = j; } diff --git a/unittest/storage/blocksstable/encoding/test_general_column_decoder.cpp b/unittest/storage/blocksstable/encoding/test_general_column_decoder.cpp index 25f70c57f..a29020849 100644 --- a/unittest/storage/blocksstable/encoding/test_general_column_decoder.cpp +++ b/unittest/storage/blocksstable/encoding/test_general_column_decoder.cpp @@ -172,7 +172,7 @@ TEST_F(TestDictDecoder, batch_decode_single_var_len_dict) { ObMicroBlockDecoder decoder; ObMicroBlockData data(encoder_.data_buffer_.data(), encoder_.data_buffer_.length()); ASSERT_EQ(OB_SUCCESS, decoder.init(data, read_info_)); - int64_t row_id = 0; + int32_t row_id = 0; const char *cell_data = nullptr; ObDatum datum; char datum_buf[40]; diff --git a/unittest/storage/blocksstable/encoding/test_raw_decoder.cpp b/unittest/storage/blocksstable/encoding/test_raw_decoder.cpp index 2e2b8e3f5..b6b21aac9 100644 --- a/unittest/storage/blocksstable/encoding/test_raw_decoder.cpp +++ b/unittest/storage/blocksstable/encoding/test_raw_decoder.cpp @@ -756,8 +756,8 @@ void TestRawDecoder::test_batch_decode_to_vector( LOG_INFO("Current col: ", K(i), K(col_meta), K(*decoder.decoders_[col_offset].ctx_), K(precision), K(vec_tc), K(need_test_column)); - int64_t row_ids[test_row_cnt]; - int64_t row_id_idx = 0; + int32_t row_ids[test_row_cnt]; + int32_t row_id_idx = 0; for (int64_t datum_idx = 0; datum_idx < ROW_CNT; ++datum_idx) { if (!align_row_id && 0 == datum_idx % 2) { // skip @@ -1246,8 +1246,8 @@ TEST_F(TestRawDecoder, batch_decode_to_datum) int32_t col_offset = i; LOG_INFO("Current col: ", K(i), K(col_descs_.at(i)), K(*decoder.decoders_[col_offset].ctx_)); ObDatum datums[ROW_CNT]; - int64_t row_ids[ROW_CNT]; - for (int64_t j = 0; j < ROW_CNT; ++j) { + int32_t row_ids[ROW_CNT]; + for (int32_t j = 0; j < ROW_CNT; ++j) { datums[j].ptr_ = reinterpret_cast(datum_buf) + j * 128; row_ids[j] = j; } @@ -1324,8 +1324,8 @@ TEST_F(TestRawDecoder, opt_batch_decode_to_datum) int32_t col_offset = i; STORAGE_LOG(INFO, "Current col: ", K(i),K(col_descs_.at(i)), K(*decoder.decoders_[col_offset].ctx_)); ObDatum datums[ROW_CNT]; - int64_t row_ids[ROW_CNT]; - for (int64_t j = 0; j < ROW_CNT; ++j) { + int32_t row_ids[ROW_CNT]; + for (int32_t j = 0; j < ROW_CNT; ++j) { datums[j].ptr_ = reinterpret_cast(datum_buf) + j * 128; row_ids[j] = j; } diff --git a/unittest/storage/column_store/test_co_sstable_rows_filter.cpp b/unittest/storage/column_store/test_co_sstable_rows_filter.cpp index e4d8ed985..871f311ee 100644 --- a/unittest/storage/column_store/test_co_sstable_rows_filter.cpp +++ b/unittest/storage/column_store/test_co_sstable_rows_filter.cpp @@ -257,7 +257,7 @@ ObPushdownFilterExecutor* TestCOSSTableRowsFilter::create_physical_filter( column_exprs->push_back(nullptr); cg_idxes.push_back(_cg_idxes.at(i)); } - filter->cg_col_exprs_ = cg_col_exprs; + filter->cg_col_exprs_.assign(*cg_col_exprs); return filter; } @@ -565,4 +565,4 @@ int main(int argc, char **argv) OB_LOGGER.set_log_level("INFO"); ::testing::InitGoogleTest(&argc, argv); return RUN_ALL_TESTS(); -} \ No newline at end of file +} diff --git a/unittest/storage/test_io_manager.cpp b/unittest/storage/test_io_manager.cpp index 796281458..cf70dac6d 100644 --- a/unittest/storage/test_io_manager.cpp +++ b/unittest/storage/test_io_manager.cpp @@ -631,7 +631,7 @@ TEST_F(TestIOStruct, Test_Size) int64_t trace_size = sizeof(ObCurTraceId::TraceId); int64_t fd_size = sizeof(ObIOFd); - ASSERT_LT(max_callback_size, 256); + ASSERT_LT(max_callback_size, 512); LOG_INFO("qilu :check size", K(size1), K(size2), K(size3), K(size4), K(size5), K(size6), K(max_callback_size)); LOG_INFO("qilu :check size", K(size_request), K(size_result), K(size_info), K(size_thread_cond), K(size_flag), K(ref_size), K(time_size), K(return_size), K(fd_size), K(trace_size));