branch-2.1: [fix](be) fix parquet file reader not updating page index when processing it #52228 (#52782)
Cherry-picked from #52228 Co-authored-by: SWEI <z_swii@outlook.com> Co-authored-by: zengsiwei <zengsiwei@kuaishou.com> Co-authored-by: suxiaogang223 <suxiaogang223@icloud.com>
This commit is contained in:
committed by
GitHub
parent
1775200cba
commit
6c5ec15e10
@ -1345,7 +1345,7 @@ DEFINE_mInt64(compaction_batch_size, "-1");
|
||||
// If set to false, the parquet reader will not use page index to filter data.
|
||||
// This is only for debug purpose, in case sometimes the page index
|
||||
// filter wrong data.
|
||||
DEFINE_mBool(enable_parquet_page_index, "false");
|
||||
DEFINE_mBool(enable_parquet_page_index, "true");
|
||||
|
||||
DEFINE_mBool(ignore_not_found_file_in_external_table, "true");
|
||||
|
||||
|
||||
@ -860,7 +860,7 @@ Status ParquetReader::_process_page_index(const tparquet::RowGroup& row_group,
|
||||
// use the union row range
|
||||
skipped_row_ranges.emplace_back(skipped_row_range);
|
||||
}
|
||||
_col_offsets.emplace(parquet_col_id, offset_index);
|
||||
_col_offsets[parquet_col_id] = offset_index;
|
||||
}
|
||||
if (skipped_row_ranges.empty()) {
|
||||
read_whole_row_group();
|
||||
|
||||
Binary file not shown.
@ -1,4 +1,8 @@
|
||||
-- This file is automatically generated. You should know what you did if you want to edit this
|
||||
-- !small_2rowgroup --
|
||||
1024
|
||||
4049
|
||||
|
||||
-- !q01 --
|
||||
1 2132 4633 4 28.00 28955.64 0.09 0.06 N O 1996-04-21 1996-03-30 1996-05-16 NONE AIR lites. fluffily even de
|
||||
1 15635 638 6 32.00 49620.16 0.07 0.02 N O 1996-01-30 1996-02-07 1996-02-03 DELIVER IN PERSON MAIL arefully slyly ex
|
||||
|
||||
@ -94,11 +94,21 @@ suite("test_hive_parquet_skip_page", "p0,external,hive,external_docker,external_
|
||||
return;
|
||||
}
|
||||
|
||||
String externalEnvIp = context.config.otherConfigs.get("externalEnvIp")
|
||||
def hdfs_port = context.config.otherConfigs.get("hive2HdfsPort")
|
||||
def defaultFS = "hdfs://${externalEnvIp}:${hdfs_port}"
|
||||
def hdfsUserName = "doris"
|
||||
def uri = "${defaultFS}" + "/user/doris/preinstalled_data/parquet/small_2rowgroup.parquet"
|
||||
qt_small_2rowgroup """ select * from HDFS(
|
||||
"uri" = "${uri}",
|
||||
"hadoop.username" = "${hdfsUserName}",
|
||||
"format" = "parquet") where a = 1024 or a = 4049
|
||||
order by a;"""
|
||||
|
||||
for (String hivePrefix : ["hive2", "hive3"]) {
|
||||
try {
|
||||
String hms_port = context.config.otherConfigs.get(hivePrefix + "HmsPort")
|
||||
String catalog_name = "${hivePrefix}_test_parquet_skip_page"
|
||||
String externalEnvIp = context.config.otherConfigs.get("externalEnvIp")
|
||||
|
||||
sql """drop catalog if exists ${catalog_name}"""
|
||||
sql """create catalog if not exists ${catalog_name} properties (
|
||||
|
||||
Reference in New Issue
Block a user