[improvement](statistics)Multi bucket columns using DUJ1 to collect ndv (#26950)
Using DUJ1 to collect ndv for multiple bucket columns.
This commit is contained in:
@ -90,7 +90,6 @@ import java.util.Collection;
|
||||
import java.util.Collections;
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
import java.util.Locale;
|
||||
import java.util.Map;
|
||||
import java.util.Objects;
|
||||
import java.util.Optional;
|
||||
@ -789,6 +788,7 @@ public class OlapTable extends Table {
|
||||
defaultDistributionInfo.markAutoBucket();
|
||||
}
|
||||
|
||||
@Override
|
||||
public Set<String> getDistributionColumnNames() {
|
||||
Set<String> distributionColumnNames = Sets.newHashSet();
|
||||
if (defaultDistributionInfo instanceof RandomDistributionInfo) {
|
||||
@ -2377,7 +2377,7 @@ public class OlapTable extends Table {
|
||||
public boolean isDistributionColumn(String columnName) {
|
||||
Set<String> distributeColumns = getDistributionColumnNames()
|
||||
.stream().map(String::toLowerCase).collect(Collectors.toSet());
|
||||
return distributeColumns.contains(columnName.toLowerCase(Locale.ROOT));
|
||||
return distributeColumns.contains(columnName.toLowerCase());
|
||||
}
|
||||
|
||||
@Override
|
||||
|
||||
@ -27,6 +27,7 @@ import org.apache.doris.statistics.TableStatsMeta;
|
||||
import org.apache.doris.thrift.TTableDescriptor;
|
||||
|
||||
import com.google.common.collect.Lists;
|
||||
import com.google.common.collect.Sets;
|
||||
import org.apache.logging.log4j.LogManager;
|
||||
import org.apache.logging.log4j.Logger;
|
||||
|
||||
@ -262,5 +263,9 @@ public interface TableIf {
|
||||
default boolean isPartitionColumn(String columnName) {
|
||||
return false;
|
||||
}
|
||||
|
||||
default Set<String> getDistributionColumnNames() {
|
||||
return Sets.newHashSet();
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@ -703,7 +703,13 @@ public class HMSExternalTable extends ExternalTable {
|
||||
@Override
|
||||
public boolean isDistributionColumn(String columnName) {
|
||||
return getRemoteTable().getSd().getBucketCols().stream().map(String::toLowerCase)
|
||||
.collect(Collectors.toSet()).contains(columnName.toLowerCase(Locale.ROOT));
|
||||
.collect(Collectors.toSet()).contains(columnName.toLowerCase());
|
||||
}
|
||||
|
||||
@Override
|
||||
public Set<String> getDistributionColumnNames() {
|
||||
return getRemoteTable().getSd().getBucketCols().stream().map(String::toLowerCase)
|
||||
.collect(Collectors.toSet());
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@ -140,8 +140,9 @@ public class HMSAnalysisTask extends BaseAnalysisTask {
|
||||
String.valueOf(sampleInfo.first * targetRows / StatisticsUtil.getHugeTableSampleRows()));
|
||||
}
|
||||
}
|
||||
// Distribution columns don't fit for DUJ1 estimator, use linear estimator.
|
||||
if (tbl.isDistributionColumn(col.getName())) {
|
||||
// Single distribution column is not fit for DUJ1 estimator, use linear estimator.
|
||||
Set<String> distributionColumns = tbl.getDistributionColumnNames();
|
||||
if (distributionColumns.size() == 1 && distributionColumns.contains(col.getName().toLowerCase())) {
|
||||
bucketFlag = true;
|
||||
sb.append(LINEAR_ANALYZE_TEMPLATE);
|
||||
params.put("rowCount", "ROUND(count(1) * ${scaleFactor})");
|
||||
|
||||
@ -129,8 +129,9 @@ public class OlapAnalysisTask extends BaseAnalysisTask {
|
||||
}
|
||||
StringSubstitutor stringSubstitutor = new StringSubstitutor(params);
|
||||
String sql;
|
||||
// Distribution columns don't fit for DUJ1 estimator, use linear estimator.
|
||||
if (tbl.isDistributionColumn(col.getName())) {
|
||||
// Single distribution column is not fit for DUJ1 estimator, use linear estimator.
|
||||
Set<String> distributionColumns = tbl.getDistributionColumnNames();
|
||||
if (distributionColumns.size() == 1 && distributionColumns.contains(col.getName().toLowerCase())) {
|
||||
params.put("min", StatisticsUtil.quote(min));
|
||||
params.put("max", StatisticsUtil.quote(max));
|
||||
sql = stringSubstitutor.replace(LINEAR_ANALYZE_TEMPLATE);
|
||||
|
||||
Reference in New Issue
Block a user