Fix max rows returned in group by pushdown

This commit is contained in:
haitaoyang 2024-01-24 07:16:44 +00:00 committed by ob-robot
parent b129c930b1
commit 61d85bedc0
4 changed files with 26 additions and 6 deletions

View File

@ -539,6 +539,13 @@ TEST_F(TestPushdownAggregate, test_decide_use_group_by2)
ASSERT_TRUE(use_group_by);
ASSERT_TRUE(nullptr != group_by_cell.distinct_projector_buf_);
ASSERT_TRUE(nullptr != group_by_cell.tmp_group_by_datum_buf_);
group_by_cell.set_row_capacity(eval_ctx_.batch_size_ - 1);
ASSERT_EQ(OB_SUCCESS, group_by_cell.decide_use_group_by(row_count, row_count, distinct_count, &bitmap, use_group_by));
ASSERT_FALSE(use_group_by);
group_by_cell.set_row_capacity(eval_ctx_.batch_size_);
ASSERT_EQ(OB_SUCCESS, group_by_cell.decide_use_group_by(row_count, row_count, distinct_count, &bitmap, use_group_by));
ASSERT_TRUE(use_group_by);
}
TEST_F(TestPushdownAggregate, test_eval_batch)

View File

@ -2835,6 +2835,7 @@ void ObPDAggFactory::release(common::ObIArray<ObAggCell*> &agg_cells)
ObGroupByCell::ObGroupByCell(const int64_t batch_size, common::ObIAllocator &allocator)
: batch_size_(batch_size),
row_capacity_(batch_size),
group_by_col_offset_(-1),
group_by_col_expr_(nullptr),
group_by_col_datum_buf_(nullptr),
@ -2856,6 +2857,7 @@ ObGroupByCell::ObGroupByCell(const int64_t batch_size, common::ObIAllocator &all
void ObGroupByCell::reset()
{
batch_size_ = 0;
row_capacity_ = 0;
group_by_col_offset_ = -1;
group_by_col_expr_ = nullptr;
agg_cell_factory_.release(agg_cells_);
@ -3104,10 +3106,13 @@ int ObGroupByCell::output_extra_group_by_result(int64_t &count)
if (OB_UNLIKELY(!group_by_col_datum_buf_->is_use_extra_buf())) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("Unexpected state", K(ret), KPC(group_by_col_datum_buf_));
} else if (OB_UNLIKELY(0 == projected_cnt_ && row_capacity_ != batch_size_)) {
ret = OB_ERR_UNEXPECTED;
LOG_WARN("Unexpected row_capacity, must be equal with batch_size at first", K(ret), K(row_capacity_), K(batch_size_));
} else if (projected_cnt_ >= distinct_cnt_) {
ret = OB_ITER_END;
} else {
count = MIN(batch_size_, distinct_cnt_ - projected_cnt_);
count = MIN(row_capacity_, distinct_cnt_ - projected_cnt_);
common::ObDatum *result_datum = nullptr;
common::ObDatum *sql_result_datums = group_by_col_datum_buf_->get_sql_result_datums();
common::ObDatum *extra_result_datums = group_by_col_datum_buf_->get_extra_result_datums();
@ -3132,7 +3137,7 @@ int ObGroupByCell::output_extra_group_by_result(int64_t &count)
ret = OB_ITER_END;
}
}
LOG_DEBUG("[GROUP BY PUSHDOWN]", K(ret), K(count), K(projected_cnt_), K(distinct_cnt_));
LOG_DEBUG("[GROUP BY PUSHDOWN]", K(ret), K(count), K(projected_cnt_), K(distinct_cnt_), K(row_capacity_));
return ret;
}
@ -3228,6 +3233,7 @@ int64_t ObGroupByCell::to_string(char *buf, const int64_t buf_len) const
int64_t pos = 0;
J_OBJ_START();
J_KV(K_(batch_size),
K_(row_capacity),
K_(group_by_col_offset),
KP_(group_by_col_expr),
K_(agg_cells),

View File

@ -657,12 +657,14 @@ public:
OB_INLINE bool is_processing() const { return is_processing_; }
OB_INLINE void set_is_processing(const bool is_processing) { is_processing_ = is_processing; }
OB_INLINE void reset_projected_cnt() { projected_cnt_ = 0; }
OB_INLINE void set_row_capacity(const int64_t row_capacity) { row_capacity_ = row_capacity; }
template <typename T>
int decide_use_group_by(const int64_t row_cnt, const int64_t read_cnt, const int64_t distinct_cnt, const T *bitmap, bool &use_group_by)
{
int ret = OB_SUCCESS;
const bool is_valid_bitmap = nullptr != bitmap && !bitmap->is_all_true();
use_group_by = read_cnt * USE_GROUP_BY_READ_CNT_FACTOR > row_cnt &&
use_group_by = row_capacity_ == batch_size_ &&
read_cnt * USE_GROUP_BY_READ_CNT_FACTOR > row_cnt &&
distinct_cnt < USE_GROUP_BY_MAX_DISTINCT_CNT &&
distinct_cnt < row_cnt * USE_GROUP_BY_DISTINCT_RATIO &&
(!is_valid_bitmap ||
@ -674,7 +676,8 @@ public:
LOG_WARN("Failed to prepare group by datum buf", K(ret));
}
}
LOG_DEBUG("[GROUP BY PUSHDOWN]", K(ret), K(row_cnt), K(read_cnt), K(distinct_cnt), K(is_valid_bitmap), K(use_group_by),
LOG_TRACE("[GROUP BY PUSHDOWN]", K(ret), K(row_cnt), K(read_cnt), K(distinct_cnt), K(is_valid_bitmap), K(use_group_by),
K_(batch_size), K_(row_capacity),
"popcnt", is_valid_bitmap ? bitmap->popcnt() : 0,
"size", is_valid_bitmap ? bitmap->size() : 0);
return ret;
@ -692,6 +695,7 @@ private:
static constexpr double USE_GROUP_BY_DISTINCT_RATIO = 0.5;
static const int64_t USE_GROUP_BY_FILTER_FACTOR = 2;
int64_t batch_size_;
int64_t row_capacity_;
int32_t group_by_col_offset_;
sql::ObExpr *group_by_col_expr_;
ObAggGroupByDatumBuf *group_by_col_datum_buf_;

View File

@ -173,10 +173,13 @@ int ObVectorStore::reuse_capacity(const int64_t capacity)
int ret = OB_SUCCESS;
if (OB_FAIL(ObBlockBatchedRowStore::reuse_capacity(capacity))) {
LOG_WARN("Fail to reuse capacity", K(ret), K(capacity));
} else {
if (nullptr != group_by_cell_ && !group_by_cell_->is_processing()) {
} else if (nullptr != group_by_cell_) {
if (!group_by_cell_->is_processing()) {
group_by_cell_->reuse();
}
group_by_cell_->set_row_capacity(capacity);
}
if (OB_SUCC(ret)) {
count_ = 0;
}
return ret;