From c9397814117efbb6d54cfbb69bddeea3a2b9f5b3 Mon Sep 17 00:00:00 2001 From: airborne12 Date: Fri, 21 Jun 2024 22:01:39 +0800 Subject: [PATCH] [Pick 2.1](inverted index) fix wrong no need read data when need_remaining_after_evaluate (#36684) When using an equal predicate on a column that applies an inverted index with a parser, it requires remaining_after_evaluate. In this situation, we cannot optimize the column without reading the data. ## Proposed changes From (#36637) --- .../rowset/segment_v2/segment_iterator.cpp | 1 + .../inverted_index_p0/test_need_read_data.out | 6 ++++ .../test_need_read_data.groovy | 33 ++++++++++++++++++- 3 files changed, 39 insertions(+), 1 deletion(-) diff --git a/be/src/olap/rowset/segment_v2/segment_iterator.cpp b/be/src/olap/rowset/segment_v2/segment_iterator.cpp index 614604494a..d6a3ae6fdd 100644 --- a/be/src/olap/rowset/segment_v2/segment_iterator.cpp +++ b/be/src/olap/rowset/segment_v2/segment_iterator.cpp @@ -1093,6 +1093,7 @@ Status SegmentIterator::_apply_inverted_index_on_column_predicate( if (need_remaining_after_evaluate) { remaining_predicates.emplace_back(pred); + _need_read_data_indices[pred->column_id()] = true; return Status::OK(); } diff --git a/regression-test/data/inverted_index_p0/test_need_read_data.out b/regression-test/data/inverted_index_p0/test_need_read_data.out index d4ea5870e3..fdfe790afd 100644 --- a/regression-test/data/inverted_index_p0/test_need_read_data.out +++ b/regression-test/data/inverted_index_p0/test_need_read_data.out @@ -13,3 +13,9 @@ 4 8 1 9 +-- !sql1 -- +1 + +-- !sql2 -- +2024-06-17T15:16:49 tengxun2 + diff --git a/regression-test/suites/inverted_index_p0/test_need_read_data.groovy b/regression-test/suites/inverted_index_p0/test_need_read_data.groovy index 86993d81e0..694b7856be 100644 --- a/regression-test/suites/inverted_index_p0/test_need_read_data.groovy +++ b/regression-test/suites/inverted_index_p0/test_need_read_data.groovy @@ -79,4 +79,35 @@ suite("test_need_read_data", "p0"){ } finally { //try_sql("DROP TABLE IF EXISTS ${testTable}") } -} \ No newline at end of file + + def indexTbName2 = "test_need_read_data_2" + + sql "DROP TABLE IF EXISTS ${indexTbName2}" + + sql """ + create table ${indexTbName2} ( + a datetime not null, + b varchar not null, + INDEX idx_inverted_b (`b`) USING INVERTED PROPERTIES("parser" = "unicode", "support_phrase" = "true") COMMENT '' + ) ENGINE=OLAP + DUPLICATE KEY(`a`) + COMMENT '' + DISTRIBUTED BY HASH(`a`) BUCKETS 1 + PROPERTIES ( + "replication_allocation" = "tag.location.default: 1" + ); + """ + sql """insert into ${indexTbName2} values + ('2024-06-03 15:16:49.266678','shanghai'), + ('2024-06-02 15:16:49.266678','shenzhen'), + ('2024-06-01 15:16:49.266678','beijing'), + ('2024-06-13 15:16:49.266678','beijing'), + ('2024-06-14 15:16:49.266678','beijing'), + ('2024-06-15 15:16:49.266678','shanghai'), + ('2024-06-16 15:16:49.266678','tengxun'), + ('2024-06-17 15:16:49.266678','tengxun2') + """ + + qt_sql1 """ select COUNT(1) from ${indexTbName2} WHERE a >= '2024-06-15 00:00:00' AND b = 'tengxun2' and `b` match 'tengxun2' ; """ + qt_sql2 """ select * from ${indexTbName2} WHERE a >= '2024-06-15 00:00:00' AND b = 'tengxun2' and `b` match 'tengxun2' ; """ +}