From d4f2db74f957a8ea8eeb760f037cc57ba99f83c7 Mon Sep 17 00:00:00 2001 From: xzj7019 <131111794+xzj7019@users.noreply.github.com> Date: Sat, 25 Nov 2023 11:04:51 +0800 Subject: [PATCH] [fix](nereids) fix stats error when using dateTime type filter (#27571) Currently doris doesn't support datetime type filter stats estimation, but only for date type. It will cause the filter using datetime type column with the same date and different time computing out a inaccurate selectivity and estimate a wrong row count, such as : where o.book_time >= '2020-03-01 00:00:00.0' and o.book_time <= '2020-03-01 23:59:59.0'; This pr adds the datetime type(only support hh:mm:ss scale) filter estimation and improve the row count estimation for the above case. --- .../doris/nereids/types/DateTimeType.java | 21 ++++++++ .../doris/nereids/types/DateTimeV2Type.java | 20 ++++++++ .../apache/doris/nereids/types/DateType.java | 21 ++++++++ .../doris/nereids/types/DateV2Type.java | 21 ++++++++ .../nereids/types/coercion/DateLikeType.java | 33 ++++++------- .../test_datetime_filter_stats0.groovy | 49 +++++++++++++++++++ 6 files changed, 147 insertions(+), 18 deletions(-) create mode 100644 regression-test/suites/nereids_p0/explain/test_datetime_filter_stats0.groovy diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/types/DateTimeType.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/types/DateTimeType.java index d8a80f018c..250a69cbd3 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/types/DateTimeType.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/types/DateTimeType.java @@ -21,6 +21,10 @@ import org.apache.doris.catalog.Type; import org.apache.doris.common.Config; import org.apache.doris.nereids.types.coercion.DateLikeType; +import java.time.DateTimeException; +import java.time.LocalDateTime; +import java.time.temporal.ChronoUnit; + /** * Datetime type in Nereids. */ @@ -55,4 +59,21 @@ public class DateTimeType extends DateLikeType { public int width() { return WIDTH; } + + @Override + public double rangeLength(double high, double low) { + if (high == low) { + return 0; + } + if (Double.isInfinite(high) || Double.isInfinite(low)) { + return Double.POSITIVE_INFINITY; + } + try { + LocalDateTime to = toLocalDateTime(high); + LocalDateTime from = toLocalDateTime(low); + return ChronoUnit.SECONDS.between(from, to); + } catch (DateTimeException e) { + return Double.POSITIVE_INFINITY; + } + } } diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/types/DateTimeV2Type.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/types/DateTimeV2Type.java index f312d6bc7c..b804815eb6 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/types/DateTimeV2Type.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/types/DateTimeV2Type.java @@ -26,6 +26,9 @@ import org.apache.doris.nereids.types.coercion.IntegralType; import com.google.common.base.Preconditions; +import java.time.DateTimeException; +import java.time.LocalDateTime; +import java.time.temporal.ChronoUnit; import java.util.Objects; /** @@ -126,4 +129,21 @@ public class DateTimeV2Type extends DateLikeType { public int getScale() { return scale; } + + @Override + public double rangeLength(double high, double low) { + if (high == low) { + return 0; + } + if (Double.isInfinite(high) || Double.isInfinite(low)) { + return Double.POSITIVE_INFINITY; + } + try { + LocalDateTime to = toLocalDateTime(high); + LocalDateTime from = toLocalDateTime(low); + return ChronoUnit.SECONDS.between(from, to); + } catch (DateTimeException e) { + return Double.POSITIVE_INFINITY; + } + } } diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/types/DateType.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/types/DateType.java index ddffb56fb0..69572895dd 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/types/DateType.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/types/DateType.java @@ -21,6 +21,10 @@ import org.apache.doris.catalog.Type; import org.apache.doris.common.Config; import org.apache.doris.nereids.types.coercion.DateLikeType; +import java.time.DateTimeException; +import java.time.LocalDate; +import java.time.temporal.ChronoUnit; + /** * Date type in Nereids. */ @@ -50,5 +54,22 @@ public class DateType extends DateLikeType { public int width() { return WIDTH; } + + @Override + public double rangeLength(double high, double low) { + if (high == low) { + return 0; + } + if (Double.isInfinite(high) || Double.isInfinite(low)) { + return Double.POSITIVE_INFINITY; + } + try { + LocalDate to = toLocalDate(high); + LocalDate from = toLocalDate(low); + return ChronoUnit.DAYS.between(from, to); + } catch (DateTimeException e) { + return Double.POSITIVE_INFINITY; + } + } } diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/types/DateV2Type.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/types/DateV2Type.java index 9cf5efdbb6..0437fb0365 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/types/DateV2Type.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/types/DateV2Type.java @@ -20,6 +20,10 @@ package org.apache.doris.nereids.types; import org.apache.doris.catalog.Type; import org.apache.doris.nereids.types.coercion.DateLikeType; +import java.time.DateTimeException; +import java.time.LocalDate; +import java.time.temporal.ChronoUnit; + /** * Date type in Nereids. */ @@ -41,5 +45,22 @@ public class DateV2Type extends DateLikeType { public int width() { return WIDTH; } + + @Override + public double rangeLength(double high, double low) { + if (high == low) { + return 0; + } + if (Double.isInfinite(high) || Double.isInfinite(low)) { + return Double.POSITIVE_INFINITY; + } + try { + LocalDate to = toLocalDate(high); + LocalDate from = toLocalDate(low); + return ChronoUnit.DAYS.between(from, to); + } catch (DateTimeException e) { + return Double.POSITIVE_INFINITY; + } + } } diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/types/coercion/DateLikeType.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/types/coercion/DateLikeType.java index 46086e5c93..22ea99f00b 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/types/coercion/DateLikeType.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/types/coercion/DateLikeType.java @@ -27,15 +27,15 @@ import org.apache.doris.nereids.types.DateTimeV2Type; import org.apache.doris.nereids.types.DateType; import org.apache.doris.nereids.types.DateV2Type; -import java.time.DateTimeException; import java.time.LocalDate; -import java.time.temporal.ChronoUnit; +import java.time.LocalDateTime; /** * date like type. */ public abstract class DateLikeType extends PrimitiveType { - private LocalDate toLocalDate(double d) { + + protected LocalDate toLocalDate(double d) { // d = (year * 10000 + month * 100 + day) * 1000000L; int date = (int) (d / 1000000); int day = date % 100; @@ -44,21 +44,18 @@ public abstract class DateLikeType extends PrimitiveType { return LocalDate.of(year, month, day); } - @Override - public double rangeLength(double high, double low) { - if (high == low) { - return 0; - } - if (Double.isInfinite(high) || Double.isInfinite(low)) { - return Double.POSITIVE_INFINITY; - } - try { - LocalDate to = toLocalDate(high); - LocalDate from = toLocalDate(low); - return ChronoUnit.DAYS.between(from, to); - } catch (DateTimeException e) { - return Double.POSITIVE_INFINITY; - } + protected LocalDateTime toLocalDateTime(double d) { + // d = (year * 10000 + month * 100 + day) * 1000000L + time + // time = (hour * 10000 + minute * 100 + second); + int date = (int) (d / 1000000); + int day = date % 100; + int month = (date / 100) % 100; + int year = date / 10000; + int time = (int) (d % 1000000); + int second = time % 100; + int minute = (time / 100) % 100; + int hour = time / 10000; + return LocalDateTime.of(year, month, day, hour, minute, second); } /** diff --git a/regression-test/suites/nereids_p0/explain/test_datetime_filter_stats0.groovy b/regression-test/suites/nereids_p0/explain/test_datetime_filter_stats0.groovy new file mode 100644 index 0000000000..317645e89d --- /dev/null +++ b/regression-test/suites/nereids_p0/explain/test_datetime_filter_stats0.groovy @@ -0,0 +1,49 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +suite("test_datetime_filter_stats0") { + sql "SET enable_nereids_planner=true" + sql "SET enable_fallback_to_original_planner=false" + + sql "DROP TABLE IF EXISTS test_datetime_filter_stats0" + sql """ CREATE TABLE `test_datetime_filter_stats0` ( + `id` int(11), + `is_delete` int, + `company_id` int, + `book_time` DATETIMEV2 + )ENGINE=OLAP + unique key (id) + distributed by hash(id) buckets 10 + properties( + "replication_allocation" = "tag.location.default: 1" + );""" + + sql """ alter table test_datetime_filter_stats0 modify column id set stats('row_count'='52899687', 'ndv'='52899687', 'num_nulls'='0', 'min_value'='1', 'max_value'='52899687', 'data_size'='4'); """ + sql """ alter table test_datetime_filter_stats0 modify column book_time set stats('row_count'='52899687', 'ndv'='23622730', 'num_nulls'='0', 'min_value'='2002-01-01 00:45:39', 'max_value'='2027-09-25 23:03:00', 'data_size'='10'); """ + sql """ alter table test_datetime_filter_stats0 modify column is_delete set stats('row_count'='52899687', 'ndv'='2', 'num_nulls'='0', 'min_value'='0', 'max_value'='1', 'data_size'='4'); """ + sql """ alter table test_datetime_filter_stats0 modify column company_id set stats('row_count'='52899687', 'ndv'='7559', 'num_nulls'='0', 'min_value'='2', 'max_value'='876981', 'data_size'='4'); """ + + explain { + sql("physical plan select count(1) from test_datetime_filter_stats0 o where o.book_time >= '2020-03-01 00:00:00.0' and o.book_time <= '2020-03-01 23:59:59.0';"); + notContains"stats=2.24" + } + + explain { + sql("physical plan select count(1) from test_datetime_filter_stats0 o where o.book_time >= '2020-03-01 00:00:00.0' and o.book_time <= '2020-03-01 00:00:01.0';"); + notContains"stats=2.24" + } +}