[improvement](statistics)Enable estimate hive table row count using file size. (#37218) (#37694)

backport: https://github.com/apache/doris/pull/37218
This commit is contained in:
Jibing-Li
2024-07-12 13:47:27 +08:00
committed by GitHub
parent 37583d2d0a
commit 259d28407e
4 changed files with 158 additions and 145 deletions

View File

@ -875,7 +875,7 @@ public class HMSExternalTable extends ExternalTable implements MTMVRelatedTableI
}
int totalPartitionSize = partitionValues == null ? 1 : partitionValues.getIdToPartitionItem().size();
if (samplePartitionSize < totalPartitionSize) {
if (samplePartitionSize != 0 && samplePartitionSize < totalPartitionSize) {
totalSize = totalSize * totalPartitionSize / samplePartitionSize;
}
return totalSize / estimatedRowSize;

View File

@ -146,7 +146,7 @@ public final class GlobalVariable {
+ "Getting file list may be a time-consuming operation. "
+ "If you don't need to estimate the number of rows in the table "
+ "or it affects performance, you can disable this feature."})
public static boolean enable_get_row_count_from_file_list = false;
public static boolean enable_get_row_count_from_file_list = true;
@VariableMgr.VarAttr(name = READ_ONLY, flag = VariableMgr.GLOBAL,
description = {"仅用于兼容MySQL生态,暂无实际意义",