diff --git a/fe/fe-core/src/main/java/org/apache/doris/analysis/DecimalLiteral.java b/fe/fe-core/src/main/java/org/apache/doris/analysis/DecimalLiteral.java index 50749b46c2..19389a6f2e 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/analysis/DecimalLiteral.java +++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/DecimalLiteral.java @@ -42,7 +42,7 @@ import java.nio.ByteBuffer; import java.nio.ByteOrder; import java.util.Objects; -public class DecimalLiteral extends LiteralExpr { +public class DecimalLiteral extends NumericLiteralExpr { private static final Logger LOG = LogManager.getLogger(DecimalLiteral.class); private BigDecimal value; diff --git a/fe/fe-core/src/main/java/org/apache/doris/analysis/FloatLiteral.java b/fe/fe-core/src/main/java/org/apache/doris/analysis/FloatLiteral.java index b95b3860aa..1fba4edfb9 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/analysis/FloatLiteral.java +++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/FloatLiteral.java @@ -35,7 +35,7 @@ import java.nio.ByteBuffer; import java.nio.ByteOrder; import java.text.NumberFormat; -public class FloatLiteral extends LiteralExpr { +public class FloatLiteral extends NumericLiteralExpr { private double value; public FloatLiteral() { diff --git a/fe/fe-core/src/main/java/org/apache/doris/analysis/IntLiteral.java b/fe/fe-core/src/main/java/org/apache/doris/analysis/IntLiteral.java index f122d1edc4..e348c0be54 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/analysis/IntLiteral.java +++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/IntLiteral.java @@ -37,7 +37,7 @@ import java.math.BigDecimal; import java.nio.ByteBuffer; import java.nio.ByteOrder; -public class IntLiteral extends LiteralExpr { +public class IntLiteral extends NumericLiteralExpr { private static final Logger LOG = LogManager.getLogger(IntLiteral.class); public static final long TINY_INT_MIN = Byte.MIN_VALUE; // -2^7 ~ 2^7 - 1 diff --git a/fe/fe-core/src/main/java/org/apache/doris/analysis/LargeIntLiteral.java b/fe/fe-core/src/main/java/org/apache/doris/analysis/LargeIntLiteral.java index c284b963d5..6e8cd3ed2e 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/analysis/LargeIntLiteral.java +++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/LargeIntLiteral.java @@ -38,7 +38,7 @@ import java.nio.ByteOrder; import java.util.Objects; // large int for the num that native types can not -public class LargeIntLiteral extends LiteralExpr { +public class LargeIntLiteral extends NumericLiteralExpr { private static final Logger LOG = LogManager.getLogger(LargeIntLiteral.class); // -2^127 diff --git a/fe/fe-core/src/main/java/org/apache/doris/analysis/NumericLiteralExpr.java b/fe/fe-core/src/main/java/org/apache/doris/analysis/NumericLiteralExpr.java new file mode 100644 index 0000000000..31cde22078 --- /dev/null +++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/NumericLiteralExpr.java @@ -0,0 +1,31 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. +// This file is copied from +// https://github.com/apache/impala/blob/branch-2.9.0/fe/src/main/java/org/apache/impala/LiteralExpr.java +// and modified by Doris + +package org.apache.doris.analysis; + +public abstract class NumericLiteralExpr extends LiteralExpr { + public NumericLiteralExpr() { + super(); + } + + public NumericLiteralExpr(NumericLiteralExpr other) { + super(other); + } +} diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/ExpressionEstimation.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/ExpressionEstimation.java index f82f509ba0..acf6bddfe5 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/ExpressionEstimation.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/ExpressionEstimation.java @@ -18,6 +18,7 @@ package org.apache.doris.nereids.stats; import org.apache.doris.analysis.ArithmeticExpr.Operator; +import org.apache.doris.analysis.NumericLiteralExpr; import org.apache.doris.analysis.StringLiteral; import org.apache.doris.nereids.exceptions.AnalysisException; import org.apache.doris.nereids.trees.expressions.Add; @@ -169,35 +170,50 @@ public class ExpressionEstimation extends ExpressionVisitor slotToColumnStat = new HashMap<>(); + ColumnStatisticBuilder builder = new ColumnStatisticBuilder() + .setNdv(100) + .setMinExpr(new StringLiteral("01")) + .setMinValue(13333333) + .setMaxExpr(new StringLiteral("A9")) + .setMaxValue(23333333); + slotToColumnStat.put(a, builder.build()); + Statistics stats = new Statistics(1000, slotToColumnStat); + Cast cast = new Cast(a, DoubleType.INSTANCE); + ColumnStatistic est = ExpressionEstimation.estimate(cast, stats); + Assertions.assertTrue(Double.isInfinite(est.minValue)); + Assertions.assertTrue(Double.isInfinite(est.maxValue)); + Assertions.assertNull(est.minExpr); + Assertions.assertNull(est.maxExpr); + } + + // cast(str to date) = date + // both min and max can be converted to date + @Test + public void testCastStrToDateSuccess() { + SlotReference a = new SlotReference("a", StringType.INSTANCE); + Map slotToColumnStat = new HashMap<>(); + ColumnStatisticBuilder builder = new ColumnStatisticBuilder() + .setNdv(100) + .setMinExpr(new StringLiteral("2020-01-01")) + .setMinValue(20200101000000.0) + .setMaxExpr(new StringLiteral("2021-01-01")) + .setMaxValue(20210101000000.0); + slotToColumnStat.put(a, builder.build()); + Statistics stats = new Statistics(1000, slotToColumnStat); + Cast cast = new Cast(a, DateType.INSTANCE); + ColumnStatistic est = ExpressionEstimation.estimate(cast, stats); + Assertions.assertTrue(est.minExpr instanceof DateLiteral); + Assertions.assertTrue(est.maxExpr instanceof DateLiteral); + Assertions.assertEquals(est.minValue, 20200101000000.0); + Assertions.assertEquals(est.maxValue, 20210101000000.0); + } + + // cast(str to date) = date + // min or max cannot be converted to date + @Test + public void testCastStrToDateFail() { + SlotReference a = new SlotReference("a", StringType.INSTANCE); + Map slotToColumnStat = new HashMap<>(); + ColumnStatisticBuilder builder = new ColumnStatisticBuilder() + .setNdv(100) + .setMinExpr(new StringLiteral("2020-01-01")) + .setMinValue(20200101000000.0) + .setMaxExpr(new StringLiteral("2021abcdefg")) + .setMaxValue(20210101000000.0); + slotToColumnStat.put(a, builder.build()); + Statistics stats = new Statistics(1000, slotToColumnStat); + Cast cast = new Cast(a, DateType.INSTANCE); + ColumnStatistic est = ExpressionEstimation.estimate(cast, stats); + Assertions.assertTrue(Double.isInfinite(est.minValue)); + Assertions.assertTrue(Double.isInfinite(est.maxValue)); + Assertions.assertNull(est.minExpr); + Assertions.assertNull(est.maxExpr); + } } diff --git a/fe/fe-core/src/test/java/org/apache/doris/nereids/stats/FilterEstimationTest.java b/fe/fe-core/src/test/java/org/apache/doris/nereids/stats/FilterEstimationTest.java index b476cc563b..7bad73801a 100644 --- a/fe/fe-core/src/test/java/org/apache/doris/nereids/stats/FilterEstimationTest.java +++ b/fe/fe-core/src/test/java/org/apache/doris/nereids/stats/FilterEstimationTest.java @@ -835,7 +835,9 @@ class FilterEstimationTest { .setNdv(100) .setAvgSizeByte(4) .setNumNulls(0) + .setMaxExpr(new IntLiteral(100)) .setMaxValue(100) + .setMinExpr(new IntLiteral(0)) .setMinValue(0) .setCount(100); DoubleLiteral begin = new DoubleLiteral(40.0); @@ -847,7 +849,7 @@ class FilterEstimationTest { stats.addColumnStats(a, builder.build()); FilterEstimation filterEstimation = new FilterEstimation(); Statistics result = filterEstimation.estimate(and, stats); - Assertions.assertEquals(result.getRowCount(), 10, 0.01); + Assertions.assertEquals(10, result.getRowCount(), 0.01); ColumnStatistic colStats = result.findColumnStatistics(a); Assertions.assertTrue(colStats != null); Assertions.assertEquals(10, colStats.ndv, 0.1); diff --git a/regression-test/data/nereids_tpcds_shape_sf1000_p0/shape/query4.out b/regression-test/data/nereids_tpcds_shape_sf1000_p0/shape/query4.out index e40a7c0ebe..678e8bf71a 100644 --- a/regression-test/data/nereids_tpcds_shape_sf1000_p0/shape/query4.out +++ b/regression-test/data/nereids_tpcds_shape_sf1000_p0/shape/query4.out @@ -67,18 +67,18 @@ PhysicalCteAnchor ( cteId=CTEId#0 ) ----------------------PhysicalProject ------------------------filter((t_c_firstyear.dyear = 1999) and (t_c_firstyear.sale_type = 'c') and (t_c_firstyear.year_total > 0.000000)) --------------------------PhysicalCteConsumer ( cteId=CTEId#0 ) ---------------------hashJoin[INNER_JOIN] hashCondition=((t_s_secyear.customer_id = t_s_firstyear.customer_id)) otherCondition=() -----------------------PhysicalDistribute[DistributionSpecHash] -------------------------PhysicalProject ---------------------------filter((t_s_secyear.dyear = 2000) and (t_s_secyear.sale_type = 's')) -----------------------------PhysicalCteConsumer ( cteId=CTEId#0 ) -----------------------hashJoin[INNER_JOIN] hashCondition=((t_s_firstyear.customer_id = t_c_secyear.customer_id)) otherCondition=() -------------------------PhysicalDistribute[DistributionSpecHash] ---------------------------PhysicalProject -----------------------------filter((t_c_secyear.dyear = 2000) and (t_c_secyear.sale_type = 'c')) -------------------------------PhysicalCteConsumer ( cteId=CTEId#0 ) +--------------------hashJoin[INNER_JOIN] hashCondition=((t_s_firstyear.customer_id = t_c_secyear.customer_id)) otherCondition=() +----------------------hashJoin[INNER_JOIN] hashCondition=((t_s_secyear.customer_id = t_s_firstyear.customer_id)) otherCondition=() ------------------------PhysicalDistribute[DistributionSpecHash] --------------------------PhysicalProject ----------------------------filter((t_s_firstyear.dyear = 1999) and (t_s_firstyear.sale_type = 's') and (t_s_firstyear.year_total > 0.000000)) ------------------------------PhysicalCteConsumer ( cteId=CTEId#0 ) +------------------------PhysicalDistribute[DistributionSpecHash] +--------------------------PhysicalProject +----------------------------filter((t_s_secyear.dyear = 2000) and (t_s_secyear.sale_type = 's')) +------------------------------PhysicalCteConsumer ( cteId=CTEId#0 ) +----------------------PhysicalDistribute[DistributionSpecHash] +------------------------PhysicalProject +--------------------------filter((t_c_secyear.dyear = 2000) and (t_c_secyear.sale_type = 'c')) +----------------------------PhysicalCteConsumer ( cteId=CTEId#0 ) diff --git a/regression-test/data/nereids_tpcds_shape_sf1000_p0/shape/query58.out b/regression-test/data/nereids_tpcds_shape_sf1000_p0/shape/query58.out index c18a9cf868..6a846c34d2 100644 --- a/regression-test/data/nereids_tpcds_shape_sf1000_p0/shape/query58.out +++ b/regression-test/data/nereids_tpcds_shape_sf1000_p0/shape/query58.out @@ -6,17 +6,17 @@ PhysicalResultSink ------PhysicalDistribute[DistributionSpecGather] --------PhysicalTopN[LOCAL_SORT] ----------PhysicalProject -------------hashJoin[INNER_JOIN] hashCondition=((ss_items.item_id = ws_items.item_id)) otherCondition=((cast(cs_item_rev as DOUBLE) <= cast((1.1 * ws_item_rev) as DOUBLE)) and (cast(cs_item_rev as DOUBLE) >= cast((0.9 * ws_item_rev) as DOUBLE)) and (cast(ss_item_rev as DOUBLE) <= cast((1.1 * ws_item_rev) as DOUBLE)) and (cast(ss_item_rev as DOUBLE) >= cast((0.9 * ws_item_rev) as DOUBLE)) and (cast(ws_item_rev as DOUBLE) <= cast((1.1 * cs_item_rev) as DOUBLE)) and (cast(ws_item_rev as DOUBLE) <= cast((1.1 * ss_item_rev) as DOUBLE)) and (cast(ws_item_rev as DOUBLE) >= cast((0.9 * cs_item_rev) as DOUBLE)) and (cast(ws_item_rev as DOUBLE) >= cast((0.9 * ss_item_rev) as DOUBLE))) build RFs:RF13 item_id->[i_item_id] +------------hashJoin[INNER_JOIN] hashCondition=((ss_items.item_id = cs_items.item_id)) otherCondition=((cast(cs_item_rev as DOUBLE) <= cast((1.1 * ss_item_rev) as DOUBLE)) and (cast(cs_item_rev as DOUBLE) <= cast((1.1 * ws_item_rev) as DOUBLE)) and (cast(cs_item_rev as DOUBLE) >= cast((0.9 * ss_item_rev) as DOUBLE)) and (cast(cs_item_rev as DOUBLE) >= cast((0.9 * ws_item_rev) as DOUBLE)) and (cast(ss_item_rev as DOUBLE) <= cast((1.1 * cs_item_rev) as DOUBLE)) and (cast(ss_item_rev as DOUBLE) >= cast((0.9 * cs_item_rev) as DOUBLE)) and (cast(ws_item_rev as DOUBLE) <= cast((1.1 * cs_item_rev) as DOUBLE)) and (cast(ws_item_rev as DOUBLE) >= cast((0.9 * cs_item_rev) as DOUBLE))) build RFs:RF13 item_id->[i_item_id] --------------PhysicalProject ----------------hashAgg[GLOBAL] ------------------PhysicalDistribute[DistributionSpecHash] --------------------hashAgg[LOCAL] ----------------------PhysicalProject -------------------------hashJoin[INNER_JOIN] hashCondition=((web_sales.ws_item_sk = item.i_item_sk)) otherCondition=() build RFs:RF12 i_item_sk->[ws_item_sk] +------------------------hashJoin[INNER_JOIN] hashCondition=((catalog_sales.cs_item_sk = item.i_item_sk)) otherCondition=() build RFs:RF12 i_item_sk->[cs_item_sk] --------------------------PhysicalDistribute[DistributionSpecHash] -----------------------------hashJoin[INNER_JOIN] hashCondition=((web_sales.ws_sold_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF11 d_date_sk->[ws_sold_date_sk] +----------------------------hashJoin[INNER_JOIN] hashCondition=((catalog_sales.cs_sold_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF11 d_date_sk->[cs_sold_date_sk] ------------------------------PhysicalProject ---------------------------------PhysicalOlapScan[web_sales] apply RFs: RF11 RF12 +--------------------------------PhysicalOlapScan[catalog_sales] apply RFs: RF11 RF12 ------------------------------PhysicalDistribute[DistributionSpecReplicated] --------------------------------PhysicalProject ----------------------------------hashJoin[LEFT_SEMI_JOIN] hashCondition=((date_dim.d_date = date_dim.d_date)) otherCondition=() build RFs:RF10 d_date->[d_date] @@ -36,64 +36,63 @@ PhysicalResultSink --------------------------PhysicalDistribute[DistributionSpecHash] ----------------------------PhysicalProject ------------------------------PhysicalOlapScan[item] apply RFs: RF13 ---------------PhysicalProject -----------------hashJoin[INNER_JOIN] hashCondition=((ss_items.item_id = cs_items.item_id)) otherCondition=((cast(cs_item_rev as DOUBLE) <= cast((1.1 * ss_item_rev) as DOUBLE)) and (cast(cs_item_rev as DOUBLE) >= cast((0.9 * ss_item_rev) as DOUBLE)) and (cast(ss_item_rev as DOUBLE) <= cast((1.1 * cs_item_rev) as DOUBLE)) and (cast(ss_item_rev as DOUBLE) >= cast((0.9 * cs_item_rev) as DOUBLE))) build RFs:RF8 item_id->[i_item_id] -------------------PhysicalProject ---------------------hashAgg[GLOBAL] -----------------------PhysicalDistribute[DistributionSpecHash] -------------------------hashAgg[LOCAL] ---------------------------PhysicalProject -----------------------------hashJoin[INNER_JOIN] hashCondition=((store_sales.ss_item_sk = item.i_item_sk)) otherCondition=() build RFs:RF7 i_item_sk->[ss_item_sk] -------------------------------PhysicalDistribute[DistributionSpecHash] ---------------------------------hashJoin[INNER_JOIN] hashCondition=((store_sales.ss_sold_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF6 d_date_sk->[ss_sold_date_sk] -----------------------------------PhysicalProject -------------------------------------PhysicalOlapScan[store_sales] apply RFs: RF6 RF7 -----------------------------------PhysicalDistribute[DistributionSpecReplicated] -------------------------------------PhysicalProject ---------------------------------------hashJoin[LEFT_SEMI_JOIN] hashCondition=((date_dim.d_date = date_dim.d_date)) otherCondition=() build RFs:RF5 d_date->[d_date] -----------------------------------------PhysicalProject -------------------------------------------PhysicalOlapScan[date_dim] apply RFs: RF5 -----------------------------------------PhysicalDistribute[DistributionSpecReplicated] -------------------------------------------PhysicalProject ---------------------------------------------hashJoin[INNER_JOIN] hashCondition=((date_dim.d_week_seq = date_dim.d_week_seq)) otherCondition=() build RFs:RF4 d_week_seq->[d_week_seq] -----------------------------------------------PhysicalProject -------------------------------------------------PhysicalOlapScan[date_dim] apply RFs: RF4 -----------------------------------------------PhysicalDistribute[DistributionSpecReplicated] -------------------------------------------------PhysicalAssertNumRows ---------------------------------------------------PhysicalDistribute[DistributionSpecGather] -----------------------------------------------------PhysicalProject -------------------------------------------------------filter((date_dim.d_date = '2001-06-16')) ---------------------------------------------------------PhysicalOlapScan[date_dim] -------------------------------PhysicalDistribute[DistributionSpecHash] +--------------hashJoin[INNER_JOIN] hashCondition=((ss_items.item_id = ws_items.item_id)) otherCondition=((cast(ss_item_rev as DOUBLE) <= cast((1.1 * ws_item_rev) as DOUBLE)) and (cast(ss_item_rev as DOUBLE) >= cast((0.9 * ws_item_rev) as DOUBLE)) and (cast(ws_item_rev as DOUBLE) <= cast((1.1 * ss_item_rev) as DOUBLE)) and (cast(ws_item_rev as DOUBLE) >= cast((0.9 * ss_item_rev) as DOUBLE))) build RFs:RF8 item_id->[i_item_id] +----------------PhysicalProject +------------------hashAgg[GLOBAL] +--------------------PhysicalDistribute[DistributionSpecHash] +----------------------hashAgg[LOCAL] +------------------------PhysicalProject +--------------------------hashJoin[INNER_JOIN] hashCondition=((store_sales.ss_item_sk = item.i_item_sk)) otherCondition=() build RFs:RF7 i_item_sk->[ss_item_sk] +----------------------------PhysicalDistribute[DistributionSpecHash] +------------------------------hashJoin[INNER_JOIN] hashCondition=((store_sales.ss_sold_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF6 d_date_sk->[ss_sold_date_sk] --------------------------------PhysicalProject -----------------------------------PhysicalOlapScan[item] apply RFs: RF8 -------------------PhysicalProject ---------------------hashAgg[GLOBAL] -----------------------PhysicalDistribute[DistributionSpecHash] -------------------------hashAgg[LOCAL] ---------------------------PhysicalProject -----------------------------hashJoin[INNER_JOIN] hashCondition=((catalog_sales.cs_item_sk = item.i_item_sk)) otherCondition=() build RFs:RF3 i_item_sk->[cs_item_sk] -------------------------------PhysicalDistribute[DistributionSpecHash] ---------------------------------hashJoin[INNER_JOIN] hashCondition=((catalog_sales.cs_sold_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF2 d_date_sk->[cs_sold_date_sk] +----------------------------------PhysicalOlapScan[store_sales] apply RFs: RF6 RF7 +--------------------------------PhysicalDistribute[DistributionSpecReplicated] ----------------------------------PhysicalProject -------------------------------------PhysicalOlapScan[catalog_sales] apply RFs: RF2 RF3 -----------------------------------PhysicalDistribute[DistributionSpecReplicated] -------------------------------------PhysicalProject ---------------------------------------hashJoin[LEFT_SEMI_JOIN] hashCondition=((date_dim.d_date = date_dim.d_date)) otherCondition=() build RFs:RF1 d_date->[d_date] +------------------------------------hashJoin[LEFT_SEMI_JOIN] hashCondition=((date_dim.d_date = date_dim.d_date)) otherCondition=() build RFs:RF5 d_date->[d_date] +--------------------------------------PhysicalProject +----------------------------------------PhysicalOlapScan[date_dim] apply RFs: RF5 +--------------------------------------PhysicalDistribute[DistributionSpecReplicated] ----------------------------------------PhysicalProject -------------------------------------------PhysicalOlapScan[date_dim] apply RFs: RF1 -----------------------------------------PhysicalDistribute[DistributionSpecReplicated] -------------------------------------------PhysicalProject ---------------------------------------------hashJoin[INNER_JOIN] hashCondition=((date_dim.d_week_seq = date_dim.d_week_seq)) otherCondition=() build RFs:RF0 d_week_seq->[d_week_seq] -----------------------------------------------PhysicalProject -------------------------------------------------PhysicalOlapScan[date_dim] apply RFs: RF0 -----------------------------------------------PhysicalDistribute[DistributionSpecReplicated] -------------------------------------------------PhysicalAssertNumRows ---------------------------------------------------PhysicalDistribute[DistributionSpecGather] -----------------------------------------------------PhysicalProject -------------------------------------------------------filter((date_dim.d_date = '2001-06-16')) ---------------------------------------------------------PhysicalOlapScan[date_dim] -------------------------------PhysicalDistribute[DistributionSpecHash] +------------------------------------------hashJoin[INNER_JOIN] hashCondition=((date_dim.d_week_seq = date_dim.d_week_seq)) otherCondition=() build RFs:RF4 d_week_seq->[d_week_seq] +--------------------------------------------PhysicalProject +----------------------------------------------PhysicalOlapScan[date_dim] apply RFs: RF4 +--------------------------------------------PhysicalDistribute[DistributionSpecReplicated] +----------------------------------------------PhysicalAssertNumRows +------------------------------------------------PhysicalDistribute[DistributionSpecGather] +--------------------------------------------------PhysicalProject +----------------------------------------------------filter((date_dim.d_date = '2001-06-16')) +------------------------------------------------------PhysicalOlapScan[date_dim] +----------------------------PhysicalDistribute[DistributionSpecHash] +------------------------------PhysicalProject +--------------------------------PhysicalOlapScan[item] apply RFs: RF8 +----------------PhysicalProject +------------------hashAgg[GLOBAL] +--------------------PhysicalDistribute[DistributionSpecHash] +----------------------hashAgg[LOCAL] +------------------------PhysicalProject +--------------------------hashJoin[INNER_JOIN] hashCondition=((web_sales.ws_item_sk = item.i_item_sk)) otherCondition=() build RFs:RF3 i_item_sk->[ws_item_sk] +----------------------------PhysicalDistribute[DistributionSpecHash] +------------------------------hashJoin[INNER_JOIN] hashCondition=((web_sales.ws_sold_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF2 d_date_sk->[ws_sold_date_sk] --------------------------------PhysicalProject -----------------------------------PhysicalOlapScan[item] +----------------------------------PhysicalOlapScan[web_sales] apply RFs: RF2 RF3 +--------------------------------PhysicalDistribute[DistributionSpecReplicated] +----------------------------------PhysicalProject +------------------------------------hashJoin[LEFT_SEMI_JOIN] hashCondition=((date_dim.d_date = date_dim.d_date)) otherCondition=() build RFs:RF1 d_date->[d_date] +--------------------------------------PhysicalProject +----------------------------------------PhysicalOlapScan[date_dim] apply RFs: RF1 +--------------------------------------PhysicalDistribute[DistributionSpecReplicated] +----------------------------------------PhysicalProject +------------------------------------------hashJoin[INNER_JOIN] hashCondition=((date_dim.d_week_seq = date_dim.d_week_seq)) otherCondition=() build RFs:RF0 d_week_seq->[d_week_seq] +--------------------------------------------PhysicalProject +----------------------------------------------PhysicalOlapScan[date_dim] apply RFs: RF0 +--------------------------------------------PhysicalDistribute[DistributionSpecReplicated] +----------------------------------------------PhysicalAssertNumRows +------------------------------------------------PhysicalDistribute[DistributionSpecGather] +--------------------------------------------------PhysicalProject +----------------------------------------------------filter((date_dim.d_date = '2001-06-16')) +------------------------------------------------------PhysicalOlapScan[date_dim] +----------------------------PhysicalDistribute[DistributionSpecHash] +------------------------------PhysicalProject +--------------------------------PhysicalOlapScan[item] diff --git a/regression-test/data/nereids_tpcds_shape_sf100_p0/rf_prune/query4.out b/regression-test/data/nereids_tpcds_shape_sf100_p0/rf_prune/query4.out index 133758d014..a2d3494e52 100644 --- a/regression-test/data/nereids_tpcds_shape_sf100_p0/rf_prune/query4.out +++ b/regression-test/data/nereids_tpcds_shape_sf100_p0/rf_prune/query4.out @@ -67,18 +67,18 @@ PhysicalCteAnchor ( cteId=CTEId#0 ) ----------------------PhysicalProject ------------------------filter((t_c_firstyear.dyear = 1999) and (t_c_firstyear.sale_type = 'c') and (t_c_firstyear.year_total > 0.000000)) --------------------------PhysicalCteConsumer ( cteId=CTEId#0 ) ---------------------hashJoin[INNER_JOIN] hashCondition=((t_s_secyear.customer_id = t_s_firstyear.customer_id)) otherCondition=() -----------------------PhysicalDistribute[DistributionSpecHash] -------------------------PhysicalProject ---------------------------filter((t_s_secyear.dyear = 2000) and (t_s_secyear.sale_type = 's')) -----------------------------PhysicalCteConsumer ( cteId=CTEId#0 ) -----------------------hashJoin[INNER_JOIN] hashCondition=((t_s_firstyear.customer_id = t_c_secyear.customer_id)) otherCondition=() -------------------------PhysicalDistribute[DistributionSpecHash] ---------------------------PhysicalProject -----------------------------filter((t_c_secyear.dyear = 2000) and (t_c_secyear.sale_type = 'c')) -------------------------------PhysicalCteConsumer ( cteId=CTEId#0 ) +--------------------hashJoin[INNER_JOIN] hashCondition=((t_s_firstyear.customer_id = t_c_secyear.customer_id)) otherCondition=() +----------------------hashJoin[INNER_JOIN] hashCondition=((t_s_secyear.customer_id = t_s_firstyear.customer_id)) otherCondition=() ------------------------PhysicalDistribute[DistributionSpecHash] --------------------------PhysicalProject ----------------------------filter((t_s_firstyear.dyear = 1999) and (t_s_firstyear.sale_type = 's') and (t_s_firstyear.year_total > 0.000000)) ------------------------------PhysicalCteConsumer ( cteId=CTEId#0 ) +------------------------PhysicalDistribute[DistributionSpecHash] +--------------------------PhysicalProject +----------------------------filter((t_s_secyear.dyear = 2000) and (t_s_secyear.sale_type = 's')) +------------------------------PhysicalCteConsumer ( cteId=CTEId#0 ) +----------------------PhysicalDistribute[DistributionSpecHash] +------------------------PhysicalProject +--------------------------filter((t_c_secyear.dyear = 2000) and (t_c_secyear.sale_type = 'c')) +----------------------------PhysicalCteConsumer ( cteId=CTEId#0 ) diff --git a/regression-test/data/nereids_tpcds_shape_sf100_p0/rf_prune/query58.out b/regression-test/data/nereids_tpcds_shape_sf100_p0/rf_prune/query58.out index d49c4adfe9..05c0d34df1 100644 --- a/regression-test/data/nereids_tpcds_shape_sf100_p0/rf_prune/query58.out +++ b/regression-test/data/nereids_tpcds_shape_sf100_p0/rf_prune/query58.out @@ -42,11 +42,11 @@ PhysicalResultSink --------------------PhysicalDistribute[DistributionSpecHash] ----------------------hashAgg[LOCAL] ------------------------PhysicalProject ---------------------------hashJoin[INNER_JOIN] hashCondition=((web_sales.ws_item_sk = item.i_item_sk)) otherCondition=() build RFs:RF7 i_item_sk->[ws_item_sk] +--------------------------hashJoin[INNER_JOIN] hashCondition=((store_sales.ss_item_sk = item.i_item_sk)) otherCondition=() build RFs:RF7 i_item_sk->[ss_item_sk] ----------------------------PhysicalDistribute[DistributionSpecHash] -------------------------------hashJoin[INNER_JOIN] hashCondition=((web_sales.ws_sold_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF6 d_date_sk->[ws_sold_date_sk] +------------------------------hashJoin[INNER_JOIN] hashCondition=((store_sales.ss_sold_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF6 d_date_sk->[ss_sold_date_sk] --------------------------------PhysicalProject -----------------------------------PhysicalOlapScan[web_sales] apply RFs: RF6 RF7 +----------------------------------PhysicalOlapScan[store_sales] apply RFs: RF6 RF7 --------------------------------PhysicalDistribute[DistributionSpecReplicated] ----------------------------------PhysicalProject ------------------------------------hashJoin[LEFT_SEMI_JOIN] hashCondition=((date_dim.d_date = date_dim.d_date)) otherCondition=() build RFs:RF5 d_date->[d_date] @@ -71,11 +71,11 @@ PhysicalResultSink --------------------PhysicalDistribute[DistributionSpecHash] ----------------------hashAgg[LOCAL] ------------------------PhysicalProject ---------------------------hashJoin[INNER_JOIN] hashCondition=((store_sales.ss_item_sk = item.i_item_sk)) otherCondition=() +--------------------------hashJoin[INNER_JOIN] hashCondition=((web_sales.ws_item_sk = item.i_item_sk)) otherCondition=() ----------------------------PhysicalDistribute[DistributionSpecHash] -------------------------------hashJoin[INNER_JOIN] hashCondition=((store_sales.ss_sold_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF2 d_date_sk->[ss_sold_date_sk] +------------------------------hashJoin[INNER_JOIN] hashCondition=((web_sales.ws_sold_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF2 d_date_sk->[ws_sold_date_sk] --------------------------------PhysicalProject -----------------------------------PhysicalOlapScan[store_sales] apply RFs: RF2 +----------------------------------PhysicalOlapScan[web_sales] apply RFs: RF2 --------------------------------PhysicalDistribute[DistributionSpecReplicated] ----------------------------------PhysicalProject ------------------------------------hashJoin[LEFT_SEMI_JOIN] hashCondition=((date_dim.d_date = date_dim.d_date)) otherCondition=() build RFs:RF1 d_date->[d_date] diff --git a/regression-test/data/nereids_tpcds_shape_sf100_p0/shape/query4.out b/regression-test/data/nereids_tpcds_shape_sf100_p0/shape/query4.out index e40a7c0ebe..678e8bf71a 100644 --- a/regression-test/data/nereids_tpcds_shape_sf100_p0/shape/query4.out +++ b/regression-test/data/nereids_tpcds_shape_sf100_p0/shape/query4.out @@ -67,18 +67,18 @@ PhysicalCteAnchor ( cteId=CTEId#0 ) ----------------------PhysicalProject ------------------------filter((t_c_firstyear.dyear = 1999) and (t_c_firstyear.sale_type = 'c') and (t_c_firstyear.year_total > 0.000000)) --------------------------PhysicalCteConsumer ( cteId=CTEId#0 ) ---------------------hashJoin[INNER_JOIN] hashCondition=((t_s_secyear.customer_id = t_s_firstyear.customer_id)) otherCondition=() -----------------------PhysicalDistribute[DistributionSpecHash] -------------------------PhysicalProject ---------------------------filter((t_s_secyear.dyear = 2000) and (t_s_secyear.sale_type = 's')) -----------------------------PhysicalCteConsumer ( cteId=CTEId#0 ) -----------------------hashJoin[INNER_JOIN] hashCondition=((t_s_firstyear.customer_id = t_c_secyear.customer_id)) otherCondition=() -------------------------PhysicalDistribute[DistributionSpecHash] ---------------------------PhysicalProject -----------------------------filter((t_c_secyear.dyear = 2000) and (t_c_secyear.sale_type = 'c')) -------------------------------PhysicalCteConsumer ( cteId=CTEId#0 ) +--------------------hashJoin[INNER_JOIN] hashCondition=((t_s_firstyear.customer_id = t_c_secyear.customer_id)) otherCondition=() +----------------------hashJoin[INNER_JOIN] hashCondition=((t_s_secyear.customer_id = t_s_firstyear.customer_id)) otherCondition=() ------------------------PhysicalDistribute[DistributionSpecHash] --------------------------PhysicalProject ----------------------------filter((t_s_firstyear.dyear = 1999) and (t_s_firstyear.sale_type = 's') and (t_s_firstyear.year_total > 0.000000)) ------------------------------PhysicalCteConsumer ( cteId=CTEId#0 ) +------------------------PhysicalDistribute[DistributionSpecHash] +--------------------------PhysicalProject +----------------------------filter((t_s_secyear.dyear = 2000) and (t_s_secyear.sale_type = 's')) +------------------------------PhysicalCteConsumer ( cteId=CTEId#0 ) +----------------------PhysicalDistribute[DistributionSpecHash] +------------------------PhysicalProject +--------------------------filter((t_c_secyear.dyear = 2000) and (t_c_secyear.sale_type = 'c')) +----------------------------PhysicalCteConsumer ( cteId=CTEId#0 ) diff --git a/regression-test/data/nereids_tpcds_shape_sf100_p0/shape/query58.out b/regression-test/data/nereids_tpcds_shape_sf100_p0/shape/query58.out index 6404cfaf9d..7e91108980 100644 --- a/regression-test/data/nereids_tpcds_shape_sf100_p0/shape/query58.out +++ b/regression-test/data/nereids_tpcds_shape_sf100_p0/shape/query58.out @@ -42,11 +42,11 @@ PhysicalResultSink --------------------PhysicalDistribute[DistributionSpecHash] ----------------------hashAgg[LOCAL] ------------------------PhysicalProject ---------------------------hashJoin[INNER_JOIN] hashCondition=((web_sales.ws_item_sk = item.i_item_sk)) otherCondition=() build RFs:RF7 i_item_sk->[ws_item_sk] +--------------------------hashJoin[INNER_JOIN] hashCondition=((store_sales.ss_item_sk = item.i_item_sk)) otherCondition=() build RFs:RF7 i_item_sk->[ss_item_sk] ----------------------------PhysicalDistribute[DistributionSpecHash] -------------------------------hashJoin[INNER_JOIN] hashCondition=((web_sales.ws_sold_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF6 d_date_sk->[ws_sold_date_sk] +------------------------------hashJoin[INNER_JOIN] hashCondition=((store_sales.ss_sold_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF6 d_date_sk->[ss_sold_date_sk] --------------------------------PhysicalProject -----------------------------------PhysicalOlapScan[web_sales] apply RFs: RF6 RF7 +----------------------------------PhysicalOlapScan[store_sales] apply RFs: RF6 RF7 --------------------------------PhysicalDistribute[DistributionSpecReplicated] ----------------------------------PhysicalProject ------------------------------------hashJoin[LEFT_SEMI_JOIN] hashCondition=((date_dim.d_date = date_dim.d_date)) otherCondition=() build RFs:RF5 d_date->[d_date] @@ -71,11 +71,11 @@ PhysicalResultSink --------------------PhysicalDistribute[DistributionSpecHash] ----------------------hashAgg[LOCAL] ------------------------PhysicalProject ---------------------------hashJoin[INNER_JOIN] hashCondition=((store_sales.ss_item_sk = item.i_item_sk)) otherCondition=() build RFs:RF3 i_item_sk->[ss_item_sk] +--------------------------hashJoin[INNER_JOIN] hashCondition=((web_sales.ws_item_sk = item.i_item_sk)) otherCondition=() build RFs:RF3 i_item_sk->[ws_item_sk] ----------------------------PhysicalDistribute[DistributionSpecHash] -------------------------------hashJoin[INNER_JOIN] hashCondition=((store_sales.ss_sold_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF2 d_date_sk->[ss_sold_date_sk] +------------------------------hashJoin[INNER_JOIN] hashCondition=((web_sales.ws_sold_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF2 d_date_sk->[ws_sold_date_sk] --------------------------------PhysicalProject -----------------------------------PhysicalOlapScan[store_sales] apply RFs: RF2 RF3 +----------------------------------PhysicalOlapScan[web_sales] apply RFs: RF2 RF3 --------------------------------PhysicalDistribute[DistributionSpecReplicated] ----------------------------------PhysicalProject ------------------------------------hashJoin[LEFT_SEMI_JOIN] hashCondition=((date_dim.d_date = date_dim.d_date)) otherCondition=() build RFs:RF1 d_date->[d_date]