diff --git a/mittest/mtlenv/storage/blocksstable/test_pushdown_aggregate.cpp b/mittest/mtlenv/storage/blocksstable/test_pushdown_aggregate.cpp index 613e53f1e..c710ad23b 100644 --- a/mittest/mtlenv/storage/blocksstable/test_pushdown_aggregate.cpp +++ b/mittest/mtlenv/storage/blocksstable/test_pushdown_aggregate.cpp @@ -539,6 +539,13 @@ TEST_F(TestPushdownAggregate, test_decide_use_group_by2) ASSERT_TRUE(use_group_by); ASSERT_TRUE(nullptr != group_by_cell.distinct_projector_buf_); ASSERT_TRUE(nullptr != group_by_cell.tmp_group_by_datum_buf_); + + group_by_cell.set_row_capacity(eval_ctx_.batch_size_ - 1); + ASSERT_EQ(OB_SUCCESS, group_by_cell.decide_use_group_by(row_count, row_count, distinct_count, &bitmap, use_group_by)); + ASSERT_FALSE(use_group_by); + group_by_cell.set_row_capacity(eval_ctx_.batch_size_); + ASSERT_EQ(OB_SUCCESS, group_by_cell.decide_use_group_by(row_count, row_count, distinct_count, &bitmap, use_group_by)); + ASSERT_TRUE(use_group_by); } TEST_F(TestPushdownAggregate, test_eval_batch) diff --git a/src/storage/access/ob_pushdown_aggregate.cpp b/src/storage/access/ob_pushdown_aggregate.cpp index 0b2db1c85..e6bf6c128 100644 --- a/src/storage/access/ob_pushdown_aggregate.cpp +++ b/src/storage/access/ob_pushdown_aggregate.cpp @@ -2835,6 +2835,7 @@ void ObPDAggFactory::release(common::ObIArray &agg_cells) ObGroupByCell::ObGroupByCell(const int64_t batch_size, common::ObIAllocator &allocator) : batch_size_(batch_size), + row_capacity_(batch_size), group_by_col_offset_(-1), group_by_col_expr_(nullptr), group_by_col_datum_buf_(nullptr), @@ -2856,6 +2857,7 @@ ObGroupByCell::ObGroupByCell(const int64_t batch_size, common::ObIAllocator &all void ObGroupByCell::reset() { batch_size_ = 0; + row_capacity_ = 0; group_by_col_offset_ = -1; group_by_col_expr_ = nullptr; agg_cell_factory_.release(agg_cells_); @@ -3104,10 +3106,13 @@ int ObGroupByCell::output_extra_group_by_result(int64_t &count) if (OB_UNLIKELY(!group_by_col_datum_buf_->is_use_extra_buf())) { ret = OB_ERR_UNEXPECTED; LOG_WARN("Unexpected state", K(ret), KPC(group_by_col_datum_buf_)); + } else if (OB_UNLIKELY(0 == projected_cnt_ && row_capacity_ != batch_size_)) { + ret = OB_ERR_UNEXPECTED; + LOG_WARN("Unexpected row_capacity, must be equal with batch_size at first", K(ret), K(row_capacity_), K(batch_size_)); } else if (projected_cnt_ >= distinct_cnt_) { ret = OB_ITER_END; } else { - count = MIN(batch_size_, distinct_cnt_ - projected_cnt_); + count = MIN(row_capacity_, distinct_cnt_ - projected_cnt_); common::ObDatum *result_datum = nullptr; common::ObDatum *sql_result_datums = group_by_col_datum_buf_->get_sql_result_datums(); common::ObDatum *extra_result_datums = group_by_col_datum_buf_->get_extra_result_datums(); @@ -3132,7 +3137,7 @@ int ObGroupByCell::output_extra_group_by_result(int64_t &count) ret = OB_ITER_END; } } - LOG_DEBUG("[GROUP BY PUSHDOWN]", K(ret), K(count), K(projected_cnt_), K(distinct_cnt_)); + LOG_DEBUG("[GROUP BY PUSHDOWN]", K(ret), K(count), K(projected_cnt_), K(distinct_cnt_), K(row_capacity_)); return ret; } @@ -3228,6 +3233,7 @@ int64_t ObGroupByCell::to_string(char *buf, const int64_t buf_len) const int64_t pos = 0; J_OBJ_START(); J_KV(K_(batch_size), + K_(row_capacity), K_(group_by_col_offset), KP_(group_by_col_expr), K_(agg_cells), diff --git a/src/storage/access/ob_pushdown_aggregate.h b/src/storage/access/ob_pushdown_aggregate.h index b45bcf3a9..f50de2201 100644 --- a/src/storage/access/ob_pushdown_aggregate.h +++ b/src/storage/access/ob_pushdown_aggregate.h @@ -657,12 +657,14 @@ public: OB_INLINE bool is_processing() const { return is_processing_; } OB_INLINE void set_is_processing(const bool is_processing) { is_processing_ = is_processing; } OB_INLINE void reset_projected_cnt() { projected_cnt_ = 0; } + OB_INLINE void set_row_capacity(const int64_t row_capacity) { row_capacity_ = row_capacity; } template int decide_use_group_by(const int64_t row_cnt, const int64_t read_cnt, const int64_t distinct_cnt, const T *bitmap, bool &use_group_by) { int ret = OB_SUCCESS; const bool is_valid_bitmap = nullptr != bitmap && !bitmap->is_all_true(); - use_group_by = read_cnt * USE_GROUP_BY_READ_CNT_FACTOR > row_cnt && + use_group_by = row_capacity_ == batch_size_ && + read_cnt * USE_GROUP_BY_READ_CNT_FACTOR > row_cnt && distinct_cnt < USE_GROUP_BY_MAX_DISTINCT_CNT && distinct_cnt < row_cnt * USE_GROUP_BY_DISTINCT_RATIO && (!is_valid_bitmap || @@ -674,7 +676,8 @@ public: LOG_WARN("Failed to prepare group by datum buf", K(ret)); } } - LOG_DEBUG("[GROUP BY PUSHDOWN]", K(ret), K(row_cnt), K(read_cnt), K(distinct_cnt), K(is_valid_bitmap), K(use_group_by), + LOG_TRACE("[GROUP BY PUSHDOWN]", K(ret), K(row_cnt), K(read_cnt), K(distinct_cnt), K(is_valid_bitmap), K(use_group_by), + K_(batch_size), K_(row_capacity), "popcnt", is_valid_bitmap ? bitmap->popcnt() : 0, "size", is_valid_bitmap ? bitmap->size() : 0); return ret; @@ -692,6 +695,7 @@ private: static constexpr double USE_GROUP_BY_DISTINCT_RATIO = 0.5; static const int64_t USE_GROUP_BY_FILTER_FACTOR = 2; int64_t batch_size_; + int64_t row_capacity_; int32_t group_by_col_offset_; sql::ObExpr *group_by_col_expr_; ObAggGroupByDatumBuf *group_by_col_datum_buf_; diff --git a/src/storage/access/ob_vector_store.cpp b/src/storage/access/ob_vector_store.cpp index 29e7bd406..98a1cf5d9 100644 --- a/src/storage/access/ob_vector_store.cpp +++ b/src/storage/access/ob_vector_store.cpp @@ -173,10 +173,13 @@ int ObVectorStore::reuse_capacity(const int64_t capacity) int ret = OB_SUCCESS; if (OB_FAIL(ObBlockBatchedRowStore::reuse_capacity(capacity))) { LOG_WARN("Fail to reuse capacity", K(ret), K(capacity)); - } else { - if (nullptr != group_by_cell_ && !group_by_cell_->is_processing()) { + } else if (nullptr != group_by_cell_) { + if (!group_by_cell_->is_processing()) { group_by_cell_->reuse(); } + group_by_cell_->set_row_capacity(capacity); + } + if (OB_SUCC(ret)) { count_ = 0; } return ret;