diff --git a/be/src/vec/exec/join/vhash_join_node.cpp b/be/src/vec/exec/join/vhash_join_node.cpp index bd76e5f89c..f6da393576 100644 --- a/be/src/vec/exec/join/vhash_join_node.cpp +++ b/be/src/vec/exec/join/vhash_join_node.cpp @@ -233,7 +233,6 @@ HashJoinNode::HashJoinNode(ObjectPool* pool, const TPlanNode& tnode, const Descr _arena = std::make_unique(); _hash_table_variants = std::make_unique(); _process_hashtable_ctx_variants = std::make_unique(); - _init_join_op(); // avoid vector expand change block address. // one block can store 4g data, _build_blocks can store 128*4g data. @@ -278,7 +277,6 @@ Status HashJoinNode::init(const TPlanNode& tnode, RuntimeState* state) { for (size_t i = 0; i < _probe_expr_ctxs.size(); ++i) { _probe_ignore_null |= !probe_not_ignore_null[i]; } - _short_circuit_for_null_in_build_side = _join_op == TJoinOp::NULL_AWARE_LEFT_ANTI_JOIN; _probe_column_disguise_null.reserve(eq_join_conjuncts.size()); diff --git a/be/src/vec/exec/join/vhash_join_node.h b/be/src/vec/exec/join/vhash_join_node.h index c2c62d193f..691857c986 100644 --- a/be/src/vec/exec/join/vhash_join_node.h +++ b/be/src/vec/exec/join/vhash_join_node.h @@ -254,12 +254,6 @@ private: Sizes _probe_key_sz; Sizes _build_key_sz; - // For null aware left anti join, we apply a short circuit strategy. - // 1. Set _short_circuit_for_null_in_build_side to true if join operator is null aware left anti join. - // 2. In build phase, we stop building hash table when we meet the first null value and set _short_circuit_for_null_in_probe_side to true. - // 3. In probe phase, if _short_circuit_for_null_in_probe_side is true, join node returns empty block directly. Otherwise, probing will continue as the same as generic left anti join. - bool _short_circuit_for_null_in_build_side = false; - bool _short_circuit_for_null_in_probe_side = false; bool _is_broadcast_join = false; SharedHashTableController* _shared_hashtable_controller = nullptr; VRuntimeFilterSlots* _runtime_filter_slots; diff --git a/be/src/vec/exec/join/vjoin_node_base.cpp b/be/src/vec/exec/join/vjoin_node_base.cpp index 59d433aca7..731b383340 100644 --- a/be/src/vec/exec/join/vjoin_node_base.cpp +++ b/be/src/vec/exec/join/vjoin_node_base.cpp @@ -44,7 +44,12 @@ VJoinNodeBase::VJoinNodeBase(ObjectPool* pool, const TPlanNode& tnode, const Des _join_op == TJoinOp::LEFT_SEMI_JOIN)), _is_right_semi_anti(_join_op == TJoinOp::RIGHT_ANTI_JOIN || _join_op == TJoinOp::RIGHT_SEMI_JOIN), - _is_outer_join(_match_all_build || _match_all_probe) { + _is_left_semi_anti(_join_op == TJoinOp::LEFT_ANTI_JOIN || + _join_op == TJoinOp::LEFT_SEMI_JOIN || + _join_op == TJoinOp::NULL_AWARE_LEFT_ANTI_JOIN), + _is_outer_join(_match_all_build || _match_all_probe), + _short_circuit_for_null_in_build_side(_join_op == TJoinOp::NULL_AWARE_LEFT_ANTI_JOIN) { + _init_join_op(); if (tnode.__isset.hash_join_node) { _output_row_desc.reset( new RowDescriptor(descs, {tnode.hash_join_node.voutput_tuple_id}, {false})); diff --git a/be/src/vec/exec/join/vjoin_node_base.h b/be/src/vec/exec/join/vjoin_node_base.h index 94e4902fac..23a8e8a4e0 100644 --- a/be/src/vec/exec/join/vjoin_node_base.h +++ b/be/src/vec/exec/join/vjoin_node_base.h @@ -80,8 +80,16 @@ protected: bool _build_unique; // build a hash table without duplicated rows. Left semi/anti Join const bool _is_right_semi_anti; + const bool _is_left_semi_anti; const bool _is_outer_join; + // For null aware left anti join, we apply a short circuit strategy. + // 1. Set _short_circuit_for_null_in_build_side to true if join operator is null aware left anti join. + // 2. In build phase, we stop materialize build side when we meet the first null value and set _short_circuit_for_null_in_probe_side to true. + // 3. In probe phase, if _short_circuit_for_null_in_probe_side is true, join node returns empty block directly. Otherwise, probing will continue as the same as generic left anti join. + const bool _short_circuit_for_null_in_build_side = false; + bool _short_circuit_for_null_in_probe_side = false; + std::unique_ptr _output_row_desc; std::unique_ptr _intermediate_row_desc; // output expr diff --git a/be/src/vec/exec/join/vnested_loop_join_node.cpp b/be/src/vec/exec/join/vnested_loop_join_node.cpp index 5bea38a51f..34baab4a2e 100644 --- a/be/src/vec/exec/join/vnested_loop_join_node.cpp +++ b/be/src/vec/exec/join/vnested_loop_join_node.cpp @@ -43,9 +43,6 @@ VNestedLoopJoinNode::VNestedLoopJoinNode(ObjectPool* pool, const TPlanNode& tnod _old_version_flag(!tnode.__isset.nested_loop_join_node) {} Status VNestedLoopJoinNode::prepare(RuntimeState* state) { - DCHECK(_join_op == TJoinOp::CROSS_JOIN || _join_op == TJoinOp::INNER_JOIN || - _join_op == TJoinOp::LEFT_OUTER_JOIN || _join_op == TJoinOp::RIGHT_OUTER_JOIN || - _join_op == TJoinOp::FULL_OUTER_JOIN); SCOPED_TIMER(_runtime_profile->total_time_counter()); RETURN_IF_ERROR(VJoinNodeBase::prepare(state)); SCOPED_CONSUME_MEM_TRACKER(mem_tracker()); @@ -99,7 +96,7 @@ Status VNestedLoopJoinNode::_materialize_build_side(RuntimeState* state) { _build_rows += rows; _total_mem_usage += mem_usage; _build_blocks.emplace_back(std::move(block)); - if (_match_all_build) { + if (_match_all_build || _is_right_semi_anti) { _build_side_visited_flags.emplace_back(ColumnUInt8::create(rows, 0)); } } @@ -134,6 +131,7 @@ Status VNestedLoopJoinNode::get_next(RuntimeState* state, Block* block, bool* eo std::stack offset_stack; RETURN_IF_ERROR(std::visit( [&](auto&& join_op_variants, auto set_build_side_flag, auto set_probe_side_flag) { + using JoinOpType = std::decay_t; while (mutable_join_block.rows() < state->batch_size() && !_matched_rows_done) { // If this left block is exhausted or empty, we need to pull data from left child. if (_left_block_pos == _left_block.rows()) { @@ -173,7 +171,7 @@ Status VNestedLoopJoinNode::get_next(RuntimeState* state, Block* block, bool* eo Block tmp_block = mutable_join_block.to_block(0); Status status = _do_filtering_and_update_visited_flags( - &tmp_block, offset_stack); + &tmp_block, offset_stack, !_is_left_semi_anti); if (!status.OK()) { return status; } @@ -184,7 +182,9 @@ Status VNestedLoopJoinNode::get_next(RuntimeState* state, Block* block, bool* eo // probe row with null from build side. if (_current_build_pos == _build_blocks.size()) { if (!_matched_rows_done) { - _output_null_data(dst_columns, state->batch_size()); + _finalize_current_phase( + dst_columns, state->batch_size()); _reset_with_next_probe_row(dst_columns); } break; @@ -199,7 +199,7 @@ Status VNestedLoopJoinNode::get_next(RuntimeState* state, Block* block, bool* eo Block tmp_block = mutable_join_block.to_block(0); Status status = _do_filtering_and_update_visited_flags( - &tmp_block, offset_stack); + &tmp_block, offset_stack, !_is_right_semi_anti); mutable_join_block = MutableBlock(std::move(tmp_block)); if (!status.OK()) { return status; @@ -209,13 +209,15 @@ Status VNestedLoopJoinNode::get_next(RuntimeState* state, Block* block, bool* eo if constexpr (set_build_side_flag) { if (_matched_rows_done && _output_null_idx_build_side < _build_blocks.size()) { auto& cols = mutable_join_block.mutable_columns(); - _output_null_data(cols, state->batch_size()); + _finalize_current_phase( + cols, state->batch_size()); } } return Status::OK(); }, - _join_op_variants, make_bool_variant(_match_all_build), - make_bool_variant(_match_all_probe))); + _join_op_variants, make_bool_variant(_match_all_build || _is_right_semi_anti), + make_bool_variant(_match_all_probe || _is_left_semi_anti))); *eos = _match_all_build ? _output_null_idx_build_side == _build_blocks.size() && _matched_rows_done : _matched_rows_done; @@ -265,8 +267,8 @@ void VNestedLoopJoinNode::_process_left_child_block(MutableColumns& dst_columns, } } -template -void VNestedLoopJoinNode::_output_null_data(MutableColumns& dst_columns, size_t batch_size) { +template +void VNestedLoopJoinNode::_finalize_current_phase(MutableColumns& dst_columns, size_t batch_size) { if constexpr (BuildSide) { auto build_block_sz = _build_blocks.size(); size_t i = _output_null_idx_build_side; @@ -281,13 +283,21 @@ void VNestedLoopJoinNode::_output_null_data(MutableColumns& dst_columns, size_t std::vector selector(num_rows); size_t selector_idx = 0; for (size_t j = 0; j < num_rows; j++) { - if (!cur_visited_flags[j]) { - selector[selector_idx++] = j; + if constexpr (IsSemi) { + if (cur_visited_flags[j]) { + selector[selector_idx++] = j; + } + } else { + if (!cur_visited_flags[j]) { + selector[selector_idx++] = j; + } } } for (size_t j = 0; j < _num_probe_side_columns; ++j) { DCHECK(_join_op == TJoinOp::RIGHT_OUTER_JOIN || - _join_op == TJoinOp::FULL_OUTER_JOIN); + _join_op == TJoinOp::FULL_OUTER_JOIN || + _join_op == TJoinOp::RIGHT_ANTI_JOIN || + _join_op == TJoinOp::RIGHT_SEMI_JOIN); dst_columns[j]->insert_many_defaults(selector_idx); } @@ -319,8 +329,14 @@ void VNestedLoopJoinNode::_output_null_data(MutableColumns& dst_columns, size_t } _output_null_idx_build_side = i; } else { - if (_cur_probe_row_visited_flags) { - return; + if constexpr (IsSemi) { + if (!_cur_probe_row_visited_flags) { + return; + } + } else { + if (_cur_probe_row_visited_flags) { + return; + } } DCHECK_LT(_left_block_pos, _left_block.rows()); @@ -355,7 +371,7 @@ void VNestedLoopJoinNode::_reset_with_next_probe_row(MutableColumns& dst_columns template Status VNestedLoopJoinNode::_do_filtering_and_update_visited_flags( - Block* block, std::stack& offset_stack) { + Block* block, std::stack& offset_stack, bool materialize) { auto column_to_keep = block->columns(); // If we need to set visited flags for build side, // 1. Execute conjuncts and get a column with bool type to do filtering. @@ -406,7 +422,15 @@ Status VNestedLoopJoinNode::_do_filtering_and_update_visited_flags( if constexpr (SetProbeSideFlag) { _cur_probe_row_visited_flags |= simd::contain_byte(filter_data, size, 1); } - Block::filter_block_internal(block, filter, column_to_keep); +#define CLEAR_BLOCK \ + for (size_t i = 0; i < column_to_keep; ++i) { \ + block->get_by_position(i).column->assume_mutable()->clear(); \ + } + if (materialize) { + Block::filter_block_internal(block, filter, column_to_keep); + } else { + CLEAR_BLOCK + } } else if (auto* const_column = check_and_get_column(*filter_column)) { bool ret = const_column->get_bool(0); if (!ret) { @@ -431,6 +455,9 @@ Status VNestedLoopJoinNode::_do_filtering_and_update_visited_flags( if constexpr (SetProbeSideFlag) { _cur_probe_row_visited_flags |= ret; } + if (!materialize) { + CLEAR_BLOCK + } } } else { const IColumn::Filter& filter = @@ -457,10 +484,14 @@ Status VNestedLoopJoinNode::_do_filtering_and_update_visited_flags( _cur_probe_row_visited_flags |= simd::contain_byte(filter.data(), filter.size(), 1); } - Block::filter_block_internal(block, filter, column_to_keep); + if (materialize) { + Block::filter_block_internal(block, filter, column_to_keep); + } else { + CLEAR_BLOCK + } } - Block::erase_useless_column(block, column_to_keep); } +#undef CLEAR_BLOCK return Status::OK(); } diff --git a/be/src/vec/exec/join/vnested_loop_join_node.h b/be/src/vec/exec/join/vnested_loop_join_node.h index 0254c11cf2..d45dd9b101 100644 --- a/be/src/vec/exec/join/vnested_loop_join_node.h +++ b/be/src/vec/exec/join/vnested_loop_join_node.h @@ -68,10 +68,11 @@ private: const Block& now_process_build_block) const; template - Status _do_filtering_and_update_visited_flags(Block* block, std::stack& offset_stack); + Status _do_filtering_and_update_visited_flags(Block* block, std::stack& offset_stack, + bool materialize); - template - void _output_null_data(MutableColumns& dst_columns, size_t batch_size); + template + void _finalize_current_phase(MutableColumns& dst_columns, size_t batch_size); void _reset_with_next_probe_row(MutableColumns& dst_columns); diff --git a/fe/fe-core/src/main/java/org/apache/doris/analysis/StmtRewriter.java b/fe/fe-core/src/main/java/org/apache/doris/analysis/StmtRewriter.java index 06416c162b..f141067038 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/analysis/StmtRewriter.java +++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/StmtRewriter.java @@ -724,25 +724,23 @@ public class StmtRewriter { } if (!hasEqJoinPred && !inlineView.isCorrelated()) { - // TODO: Remove this when independent subquery evaluation is implemented. - // TODO: Requires support for non-equi joins. - if (!expr.getSubquery().returnsScalarColumn()) { - throw new AnalysisException("Unsupported predicate with subquery: " - + expr.toSql()); + // Join with InPredicate is actually an equal join, so we choose HashJoin. + Preconditions.checkArgument(!(expr instanceof InPredicate)); + if (expr instanceof ExistsPredicate) { + joinOp = ((ExistsPredicate) expr).isNotExists() ? JoinOperator.LEFT_ANTI_JOIN + : JoinOperator.LEFT_SEMI_JOIN; + } else { + joinOp = JoinOperator.CROSS_JOIN; + // We can equal the aggregate subquery using a cross join. All conjuncts + // that were extracted from the subquery are added to stmt's WHERE clause. + stmt.whereClause = + CompoundPredicate.createConjunction(onClausePredicate, stmt.whereClause); } - // TODO: Requires support for null-aware anti-join mode in nested-loop joins - if (expr.getSubquery().isScalarSubquery() && expr instanceof InPredicate - && ((InPredicate) expr).isNotIn()) { - throw new AnalysisException("Unsupported NOT IN predicate with subquery: " - + expr.toSql()); + inlineView.setJoinOp(joinOp); + if (joinOp != JoinOperator.CROSS_JOIN) { + inlineView.setOnClause(onClausePredicate); } - - // We can equal the aggregate subquery using a cross join. All conjuncts - // that were extracted from the subquery are added to stmt's WHERE clause. - stmt.whereClause = - CompoundPredicate.createConjunction(onClausePredicate, stmt.whereClause); - inlineView.setJoinOp(JoinOperator.CROSS_JOIN); // Indicate that the CROSS JOIN may add a new visible tuple to stmt's // select list (if the latter contains an unqualified star item '*') return true; diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/glue/translator/PhysicalPlanTranslator.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/glue/translator/PhysicalPlanTranslator.java index 31fe935099..3ba7d548b0 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/glue/translator/PhysicalPlanTranslator.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/glue/translator/PhysicalPlanTranslator.java @@ -71,12 +71,12 @@ import org.apache.doris.nereids.util.JoinUtils; import org.apache.doris.nereids.util.Utils; import org.apache.doris.planner.AggregationNode; import org.apache.doris.planner.AssertNumRowsNode; -import org.apache.doris.planner.CrossJoinNode; import org.apache.doris.planner.DataPartition; import org.apache.doris.planner.EmptySetNode; import org.apache.doris.planner.ExchangeNode; import org.apache.doris.planner.HashJoinNode; import org.apache.doris.planner.HashJoinNode.DistributionMode; +import org.apache.doris.planner.NestedLoopJoinNode; import org.apache.doris.planner.OlapScanNode; import org.apache.doris.planner.PlanFragment; import org.apache.doris.planner.PlanNode; @@ -719,10 +719,13 @@ public class PhysicalPlanTranslator extends DefaultPlanVisitor leftChildOutputMap = Maps.newHashMap(); @@ -763,15 +766,14 @@ public class PhysicalPlanTranslator extends DefaultPlanVisitor sd.setIsNullable(true)); } - crossJoinNode.setvIntermediateTupleDescList(Lists.newArrayList(intermediateDescriptor)); + nestedLoopJoinNode.setvIntermediateTupleDescList(Lists.newArrayList(intermediateDescriptor)); rightFragment.getPlanRoot().setCompactData(false); - crossJoinNode.setChild(0, leftFragment.getPlanRoot()); - connectChildFragment(crossJoinNode, 1, leftFragment, rightFragment, context); - leftFragment.setPlanRoot(crossJoinNode); + nestedLoopJoinNode.setChild(0, leftFragment.getPlanRoot()); + connectChildFragment(nestedLoopJoinNode, 1, leftFragment, rightFragment, context); + leftFragment.setPlanRoot(nestedLoopJoinNode); nestedLoopJoin.getOtherJoinConjuncts().stream() - .map(e -> ExpressionTranslator.translate(e, context)).forEach(crossJoinNode::addConjunct); + .map(e -> ExpressionTranslator.translate(e, context)).forEach(nestedLoopJoinNode::addConjunct); if (nestedLoopJoin.isShouldTranslateOutput()) { // translate output expr on intermediate tuple @@ -817,11 +819,11 @@ public class PhysicalPlanTranslator extends DefaultPlanVisitor context.createSlotDesc(outputDescriptor, s)); - crossJoinNode.setvOutputTupleDesc(outputDescriptor); - crossJoinNode.setvSrcToOutputSMap(srcToOutput); + nestedLoopJoinNode.setvOutputTupleDesc(outputDescriptor); + nestedLoopJoinNode.setvSrcToOutputSMap(srcToOutput); } if (nestedLoopJoin.getStats() != null) { - crossJoinNode.setCardinality((long) nestedLoopJoin.getStats().getRowCount()); + nestedLoopJoinNode.setCardinality((long) nestedLoopJoin.getStats().getRowCount()); } return leftFragment; } else { @@ -865,10 +867,10 @@ public class PhysicalPlanTranslator extends DefaultPlanVisitor computeInputSlotIds(Analyzer analyzer) throws NotImplementedException { Set result = Sets.newHashSet(); @@ -89,28 +95,16 @@ public class CrossJoinNode extends JoinNodeBase { protected Pair needToCopyRightAndLeft() { boolean copyleft = true; boolean copyRight = true; - switch (joinOp) { - case LEFT_ANTI_JOIN: - case LEFT_SEMI_JOIN: - case NULL_AWARE_LEFT_ANTI_JOIN: - copyRight = false; - break; - case RIGHT_SEMI_JOIN: - case RIGHT_ANTI_JOIN: - copyleft = false; - break; - default: - break; - } return Pair.of(copyleft, copyRight); } /** * Only for Nereids. */ - public CrossJoinNode(PlanNodeId id, PlanNode outer, PlanNode inner, List tupleIds, - List srcToOutputList, TupleDescriptor intermediateTuple, TupleDescriptor outputTuple) { - super(id, "CROSS JOIN", StatisticalType.CROSS_JOIN_NODE, JoinOperator.CROSS_JOIN); + public NestedLoopJoinNode(PlanNodeId id, PlanNode outer, PlanNode inner, List tupleIds, + JoinOperator joinOperator, List srcToOutputList, TupleDescriptor intermediateTuple, + TupleDescriptor outputTuple) { + super(id, "NESTED LOOP JOIN", StatisticalType.NESTED_LOOP_JOIN_NODE, joinOperator); this.tupleIds.addAll(tupleIds); children.add(outer); children.add(inner); @@ -147,7 +141,7 @@ public class CrossJoinNode extends JoinNodeBase { cardinality = Math.round(((double) cardinality) * computeOldSelectivity()); } } - LOG.debug("stats CrossJoin: cardinality={}", Long.toString(cardinality)); + LOG.debug("stats NestedLoopJoin: cardinality={}", Long.toString(cardinality)); } @Override @@ -204,7 +198,7 @@ public class CrossJoinNode extends JoinNodeBase { } if (!conjuncts.isEmpty()) { - output.append(detailPrefix).append("other predicates: ").append(getExplainString(conjuncts)).append("\n"); + output.append(detailPrefix).append("predicates: ").append(getExplainString(conjuncts)).append("\n"); } output.append(detailPrefix).append(String.format("cardinality=%s", cardinality)).append("\n"); // todo unify in plan node diff --git a/fe/fe-core/src/main/java/org/apache/doris/planner/SingleNodePlanner.java b/fe/fe-core/src/main/java/org/apache/doris/planner/SingleNodePlanner.java index 24d07d98e3..02688673f8 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/planner/SingleNodePlanner.java +++ b/fe/fe-core/src/main/java/org/apache/doris/planner/SingleNodePlanner.java @@ -1050,7 +1050,7 @@ public class SingleNodePlanner { && candidateCardinalityIsSmaller( candidate, tblRefToPlanNodeOfCandidate.second.getCardinality(), newRoot, newRootRightChildCardinality))) - || (candidate instanceof HashJoinNode && newRoot instanceof CrossJoinNode)) { + || (candidate instanceof HashJoinNode && newRoot instanceof NestedLoopJoinNode)) { newRoot = candidate; minEntry = tblRefToPlanNodeOfCandidate; newRootRightChildCardinality = cardinalityOfCandidate; @@ -2055,20 +2055,13 @@ public class SingleNodePlanner { } analyzer.markConjunctsAssigned(ojConjuncts); if (eqJoinConjuncts.isEmpty()) { - - // only inner join can change to cross join - if (innerRef.getJoinOp().isSemiAntiJoin()) { - throw new AnalysisException("non-equal " + innerRef.getJoinOp().toString() - + " is not supported"); - } - // construct cross join node // LOG.debug("Join between {} and {} requires at least one conjunctive" // + " equality predicate between the two tables", // outerRef.getAliasAsName(), innerRef.getAliasAsName()); // TODO If there are eq join predicates then we should construct a hash join - CrossJoinNode result = - new CrossJoinNode(ctx.getNextNodeId(), outer, inner, innerRef); + NestedLoopJoinNode result = + new NestedLoopJoinNode(ctx.getNextNodeId(), outer, inner, innerRef); result.addConjuncts(ojConjuncts); result.init(analyzer); return result; diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/DeriveFactory.java b/fe/fe-core/src/main/java/org/apache/doris/statistics/DeriveFactory.java index 227ab74ee4..35391191c7 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/statistics/DeriveFactory.java +++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/DeriveFactory.java @@ -27,8 +27,8 @@ public class DeriveFactory { return new AnalyticEvalStatsDerive(); case ASSERT_NUM_ROWS_NODE: return new AssertNumRowsStatsDerive(); - case CROSS_JOIN_NODE: - return new CrossJoinStatsDerive(); + case NESTED_LOOP_JOIN_NODE: + return new NestedLoopJoinStatsDerive(); case EMPTY_SET_NODE: case REPEAT_NODE: return new EmptySetStatsDerive(); diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/CrossJoinStatsDerive.java b/fe/fe-core/src/main/java/org/apache/doris/statistics/NestedLoopJoinStatsDerive.java similarity index 89% rename from fe/fe-core/src/main/java/org/apache/doris/statistics/CrossJoinStatsDerive.java rename to fe/fe-core/src/main/java/org/apache/doris/statistics/NestedLoopJoinStatsDerive.java index 64a0a86411..c4cb221f21 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/statistics/CrossJoinStatsDerive.java +++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/NestedLoopJoinStatsDerive.java @@ -24,10 +24,10 @@ import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; /** - * Derive CrossJoinNode statistics. + * Derive NestedLoopJoinNode statistics. */ -public class CrossJoinStatsDerive extends BaseStatsDerive { - private static final Logger LOG = LogManager.getLogger(CrossJoinStatsDerive.class); +public class NestedLoopJoinStatsDerive extends BaseStatsDerive { + private static final Logger LOG = LogManager.getLogger(NestedLoopJoinStatsDerive.class); @Override protected long deriveRowCount() { diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticalType.java b/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticalType.java index c8b94422b0..ebdfd0471e 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticalType.java +++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/StatisticalType.java @@ -23,7 +23,7 @@ public enum StatisticalType { ANALYTIC_EVAL_NODE, ASSERT_NUM_ROWS_NODE, BROKER_SCAN_NODE, - CROSS_JOIN_NODE, + NESTED_LOOP_JOIN_NODE, EMPTY_SET_NODE, ES_SCAN_NODE, EXCEPT_NODE, diff --git a/fe/fe-core/src/test/java/org/apache/doris/planner/QueryPlanTest.java b/fe/fe-core/src/test/java/org/apache/doris/planner/QueryPlanTest.java index cd91238dfd..d790707a88 100644 --- a/fe/fe-core/src/test/java/org/apache/doris/planner/QueryPlanTest.java +++ b/fe/fe-core/src/test/java/org/apache/doris/planner/QueryPlanTest.java @@ -1024,7 +1024,7 @@ public class QueryPlanTest extends TestWithFeService { + " SELECT MAX(k9)\n" + " FROM test.pushdown_test);"; String explainString = getSQLPlanOrErrorMsg("explain " + sql); Assert.assertTrue(explainString.contains("PLAN FRAGMENT")); - Assert.assertTrue(explainString.contains("CROSS JOIN")); + Assert.assertTrue(explainString.contains("NESTED LOOP JOIN")); Assert.assertTrue(!explainString.contains("PREDICATES")); } diff --git a/fe/fe-core/src/test/java/org/apache/doris/planner/StatisticDeriveTest.java b/fe/fe-core/src/test/java/org/apache/doris/planner/StatisticDeriveTest.java index 36de4967ed..6a0b678518 100644 --- a/fe/fe-core/src/test/java/org/apache/doris/planner/StatisticDeriveTest.java +++ b/fe/fe-core/src/test/java/org/apache/doris/planner/StatisticDeriveTest.java @@ -161,7 +161,7 @@ public class StatisticDeriveTest extends TestWithFeService { Assert.assertNotNull(stmtExecutor.planner().getFragments()); Assert.assertNotEquals(0, stmtExecutor.planner().getFragments().size()); System.out.println(getSQLPlanOrErrorMsg("explain " + sql)); - assertSQLPlanOrErrorMsgContains(sql, "CROSS JOIN"); + assertSQLPlanOrErrorMsgContains(sql, "NESTED LOOP JOIN"); assertSQLPlanOrErrorMsgContains(sql, "ASSERT NUMBER OF ROWS"); assertSQLPlanOrErrorMsgContains(sql, "EXCHANGE"); assertSQLPlanOrErrorMsgContains(sql, "AGGREGATE"); diff --git a/fe/fe-core/src/test/java/org/apache/doris/rewrite/ExtractCommonFactorsRuleFunctionTest.java b/fe/fe-core/src/test/java/org/apache/doris/rewrite/ExtractCommonFactorsRuleFunctionTest.java index 51ac8dc04d..802e4f7f9e 100644 --- a/fe/fe-core/src/test/java/org/apache/doris/rewrite/ExtractCommonFactorsRuleFunctionTest.java +++ b/fe/fe-core/src/test/java/org/apache/doris/rewrite/ExtractCommonFactorsRuleFunctionTest.java @@ -72,7 +72,7 @@ public class ExtractCommonFactorsRuleFunctionTest { public void testWithoutRewritten() throws Exception { String query = "select * from tb1, tb2 where (tb1.k1 =1) or (tb2.k2=1)"; String planString = dorisAssert.query(query).explainQuery(); - Assert.assertTrue(planString.contains("CROSS JOIN")); + Assert.assertTrue(planString.contains("NESTED LOOP JOIN")); } @Test @@ -89,7 +89,7 @@ public class ExtractCommonFactorsRuleFunctionTest { String planString = dorisAssert.query(query).explainQuery(); Assert.assertTrue(planString.contains("(`tb1`.`k1` = 1 OR `tb1`.`k1` = 2)")); Assert.assertTrue(planString.contains("(`tb2`.`k1` = 1 OR `tb2`.`k1` = 2)")); - Assert.assertTrue(planString.contains("CROSS JOIN")); + Assert.assertTrue(planString.contains("NESTED LOOP JOIN")); } @Test @@ -97,7 +97,7 @@ public class ExtractCommonFactorsRuleFunctionTest { String query = "select * from tb1, tb2 where (tb1.k1>1 and tb2.k1=1) or (tb1.k1 <2 and tb2.k2=2)"; String planString = dorisAssert.query(query).explainQuery(); Assert.assertFalse(planString.contains("(`tb1`.`k1` > 1 OR `tb1`.`k1` < 2)")); - Assert.assertTrue(planString.contains("CROSS JOIN")); + Assert.assertTrue(planString.contains("NESTED LOOP JOIN")); } @Test @@ -105,7 +105,7 @@ public class ExtractCommonFactorsRuleFunctionTest { String query = "select * from tb1, tb2 where (tb1.k1 between 1 and 3 and tb2.k1=1) or (tb1.k1 <2 and tb2.k2=2)"; String planString = dorisAssert.query(query).explainQuery(); Assert.assertTrue(planString.contains("`tb1`.`k1` <= 3")); - Assert.assertTrue(planString.contains("CROSS JOIN")); + Assert.assertTrue(planString.contains("NESTED LOOP JOIN")); } @Test @@ -113,7 +113,7 @@ public class ExtractCommonFactorsRuleFunctionTest { String query = "select * from tb1, tb2 where (tb1.k1 >1 and tb1.k1 <3 and tb1.k1 <5 and tb2.k1=1) " + "or (tb1.k1 <2 and tb2.k2=2)"; String planString = dorisAssert.query(query).explainQuery(); - Assert.assertTrue(planString.contains("CROSS JOIN")); + Assert.assertTrue(planString.contains("NESTED LOOP JOIN")); } @Test @@ -122,7 +122,7 @@ public class ExtractCommonFactorsRuleFunctionTest { + "or (tb1.k1 <2 and tb2.k2=2)"; String planString = dorisAssert.query(query).explainQuery(); Assert.assertTrue(planString.contains("`tb1`.`k1` < 5")); - Assert.assertTrue(planString.contains("CROSS JOIN")); + Assert.assertTrue(planString.contains("NESTED LOOP JOIN")); } @Test @@ -132,7 +132,7 @@ public class ExtractCommonFactorsRuleFunctionTest { String planString = dorisAssert.query(query).explainQuery(); Assert.assertTrue(planString.contains("`tb1`.`k1` IN (1, 2)")); Assert.assertTrue(planString.contains("`tb2`.`k1` IN (1, 2)")); - Assert.assertTrue(planString.contains("CROSS JOIN")); + Assert.assertTrue(planString.contains("NESTED LOOP JOIN")); } @Test @@ -142,7 +142,7 @@ public class ExtractCommonFactorsRuleFunctionTest { String planString = dorisAssert.query(query).explainQuery(); Assert.assertTrue(planString.contains("`tb1`.`k1` IN (1, 2, 3)")); Assert.assertTrue(planString.contains("`tb2`.`k1` IN (1, 2)")); - Assert.assertTrue(planString.contains("CROSS JOIN")); + Assert.assertTrue(planString.contains("NESTED LOOP JOIN")); } @Test @@ -153,7 +153,7 @@ public class ExtractCommonFactorsRuleFunctionTest { Assert.assertTrue(planString.contains("`tb1`.`k1` >= 1")); Assert.assertTrue(planString.contains("`tb1`.`k1` <= 4")); Assert.assertTrue(planString.contains("`tb2`.`k1` IN (1, 2, 3)")); - Assert.assertTrue(planString.contains("CROSS JOIN")); + Assert.assertTrue(planString.contains("NESTED LOOP JOIN")); } @Test diff --git a/regression-test/data/query_p0/join/test_nestedloop_semi_anti_join.out b/regression-test/data/query_p0/join/test_nestedloop_semi_anti_join.out new file mode 100644 index 0000000000..73d9152253 --- /dev/null +++ b/regression-test/data/query_p0/join/test_nestedloop_semi_anti_join.out @@ -0,0 +1,41 @@ +-- This file is automatically generated. You should know what you did if you want to edit this +-- !join -- + +-- !join -- + +-- !join -- + +-- !join -- + +-- !join -- + +-- !join -- +1 1 +2 2 +3 3 +10 10 + +-- !join -- + +-- !join -- +1 1 +2 2 +3 3 +10 10 + +-- !join -- +1 1 +2 2 +3 3 +10 10 + +-- !join -- + +-- !join -- + +-- !join -- +1 1 +2 2 +3 3 +10 10 + diff --git a/regression-test/suites/query/join/test_join.groovy b/regression-test/suites/query/join/test_join.groovy index a9b756d657..146ab61de4 100644 --- a/regression-test/suites/query/join/test_join.groovy +++ b/regression-test/suites/query/join/test_join.groovy @@ -529,23 +529,11 @@ suite("test_join", "query,p0") { order by 1, 2, 3, 4, 5 limit 65535""" sql"""select ${s} from ${tbName1} a left outer join ${tbName2} b on a.k1 = b.k1 where b.k3 is not null order by 1, 2, 3, 4, 5 limit 65535""" - test { - sql"""select ${s} from ${tbName1} a left semi join ${tbName2} b on a.k1 > b.k1 - where a.k2 > 0 and a.k6 > "000" order by 1, 2, 3, 4, 5 limit 65535""" - check{result, exception, startTime, endTime -> - assertTrue(exception != null) - logger.info(exception.message) - } - } - test { - sql"""select ${s} from ${tbName1} a left semi join ${tbName2} b on a.k1 > 0 - where a.k2 > 0 and a.k6 > "000" order by 1, 2, 3, 4, 5 limit 65535""" - check{result, exception, startTime, endTime -> - assertTrue(exception != null) - logger.info(exception.message) - } - } - def res15 = sql"""select ${s} from ${tbName1} a left semi join ${tbName2} b + sql"""select ${s} from ${tbName1} a left semi join ${tbName2} b on a.k1 > b.k1 + where a.k2 > 0 and a.k6 > "000" order by 1, 2, 3, 4, 5 limit 65535""" + sql"""select ${s} from ${tbName1} a left semi join ${tbName2} b on a.k1 > 0 + where a.k2 > 0 and a.k6 > "000" order by 1, 2, 3, 4, 5 limit 65535""" + def res15 = sql"""select ${s} from ${tbName1} a left semi join ${tbName2} b on a.k1 = b.k1 and a.k2 > 0 order by 1, 2, 3, 4, 5 limit 65535""" def res16 = sql"""select ${s} from ${tbName1} a left outer join ${tbName2} b on a.k1 = b.k1 and a.k2 > 0 where b.k3 is not null @@ -565,52 +553,22 @@ suite("test_join", "query,p0") { logger.info(exception.message) } } - test { - sql"""select ${s} from ${tbName1} a left semi join ${tbName2} b - on a.k1 = b.k1 or a.k2 = b.k2 where a.k2 > 0 and a.k6 > "000" - order by 1, 2, 3, 4, 5 limit 65535""" - check{result, exception, startTime, endTime -> - assertTrue(exception != null) - logger.info(exception.message) - } - } - test { - sql"""select ${s} from ${tbName1} a left semi join ${tbName2} b - on a.k1 < b.k1 or a.k2 > b.k2 where a.k2 > 0 and a.k6 > "000" - order by 1, 2, 3, 4, 5 limit 65535""" - check{result, exception, startTime, endTime -> - assertTrue(exception != null) - logger.info(exception.message) - } - } - test { - sql"""select ${s} from ${tbName1} a left semi join ${tbName2} b - on a.k1 = b.k1 or a.k2 > b.k2 where a.k2 > 0 and a.k6 > "000" + sql"""select ${s} from ${tbName1} a left semi join ${tbName2} b + on a.k1 = b.k1 or a.k2 = b.k2 where a.k2 > 0 and a.k6 > "000" order by 1, 2, 3, 4, 5 limit 65535""" - check{result, exception, startTime, endTime -> - assertTrue(exception != null) - logger.info(exception.message) - } - } - test { - sql"""select ${s} from ${tbName1} a left semi join ${tbName2} b - on a.k1 = b.k1 or a.k2 > 0 where a.k2 > 0 and a.k6 > "000" + sql"""select ${s} from ${tbName1} a left semi join ${tbName2} b + on a.k1 < b.k1 or a.k2 > b.k2 where a.k2 > 0 and a.k6 > "000" order by 1, 2, 3, 4, 5 limit 65535""" - check{result, exception, startTime, endTime -> - assertTrue(exception != null) - logger.info(exception.message) - } - } - test { - sql"""select ${s} from ${tbName1} a left semi join ${tbName2} b - on a.k1 < b.k1 or a.k2 > 0 where a.k2 > 0 and a.k6 > "000" - order by 1, 2, 3, 4, 5 limit 65535""" - check{result, exception, startTime, endTime -> - assertTrue(exception != null) - logger.info(exception.message) - } - } - def res19 = sql"""select ${s} from ${tbName1} a left semi join ${tbName2} b on a.k1 = b.k1 + sql"""select ${s} from ${tbName1} a left semi join ${tbName2} b + on a.k1 = b.k1 or a.k2 > b.k2 where a.k2 > 0 and a.k6 > "000" + order by 1, 2, 3, 4, 5 limit 65535""" + sql"""select ${s} from ${tbName1} a left semi join ${tbName2} b + on a.k1 = b.k1 or a.k2 > 0 where a.k2 > 0 and a.k6 > "000" + order by 1, 2, 3, 4, 5 limit 65535""" + sql"""select ${s} from ${tbName1} a left semi join ${tbName2} b + on a.k1 < b.k1 or a.k2 > 0 where a.k2 > 0 and a.k6 > "000" + order by 1, 2, 3, 4, 5 limit 65535""" + def res19 = sql"""select ${s} from ${tbName1} a left semi join ${tbName2} b on a.k1 = b.k1 left semi join ${tbName3} c on a.k2 = c.k2 order by 1, 2, 3, 4, 5 limit 65535""" def res20 = sql"""select ${s} from (select distinct a.* from ${tbName1} a left outer join ${tbName2} b on a.k1 = b.k1 left outer join ${tbName3} c on a.k2 = c.k2 where a.k1 is not null @@ -638,25 +596,13 @@ suite("test_join", "query,p0") { def res24 = sql"""select ${s} from ${tbName1} a right outer join ${tbName1} b on a.k1 = b.k1 where a.k2 is not null order by 1, 2, 3, 4, 5 limit 65535""" check2_doris(res23, res24) - test { - sql"""select ${s} from ${tbName1} a right semi join ${tbName2} b - on a.k1 > b.k1 where b.k2 > 0 and b.k3 != 0 and b.k6 > "000" - order by 1, 2, 3, 4, 5 limit 65535""" - check{result, exception, startTime, endTime -> - assertTrue(exception != null) - logger.info(exception.message) - } - } - test { - sql"""select ${s} from ${tbName1} a right semi join ${tbName2} b - on a.k1 > 0 where b.k2 > 0 and b.k3 != 0 and b.k6 > "000" - order by 1, 2, 3, 4, 5 limit 65535""" - check{result, exception, startTime, endTime -> - assertTrue(exception != null) - logger.info(exception.message) - } - } - def res25 = sql"""select ${s} from ${tbName2} a right semi join ${tbName1} b + sql"""select ${s} from ${tbName1} a right semi join ${tbName2} b + on a.k1 > b.k1 where b.k2 > 0 and b.k3 != 0 and b.k6 > "000" + order by 1, 2, 3, 4, 5 limit 65535""" + sql"""select ${s} from ${tbName1} a right semi join ${tbName2} b + on a.k1 > 0 where b.k2 > 0 and b.k3 != 0 and b.k6 > "000" + order by 1, 2, 3, 4, 5 limit 65535""" + def res25 = sql"""select ${s} from ${tbName2} a right semi join ${tbName1} b on a.k1 = b.k1 and a.k2 > 0 order by 1, 2, 3, 4, 5 limit 65535""" def res26 = sql"""select ${s} from ${tbName2} a right outer join ${tbName1} b on a.k1 = b.k1 and a.k2 > 0 where a.k2 is not null order by 1, 2, 3, 4, 5 limit 65535""" @@ -675,52 +621,22 @@ suite("test_join", "query,p0") { logger.info(exception.message) } } - test { - sql"""select ${s} from ${tbName1} a right semi join ${tbName2} b - on a.k1 = b.k1 or a.k2 = b.k2 where b.k2 > 0 and b.k3 != 0 and b.k6 > "000" - order by 1, 2, 3, 4, 5 limit 65535""" - check{result, exception, startTime, endTime -> - assertTrue(exception != null) - logger.info(exception.message) - } - } - test { - sql"""select ${s} from ${tbName1} a right semi join ${tbName2} b - on a.k1 < b.k1 or a.k2 > b.k2 where b.k2 > 0 and b.k3 != 0 and b.k6 > "000" - order by 1, 2, 3, 4, 5 limit 65535""" - check{result, exception, startTime, endTime -> - assertTrue(exception != null) - logger.info(exception.message) - } - } - test { - sql"""select ${s} from ${tbName1} a right semi join ${tbName2} b - on a.k1 = b.k1 or a.k2 > b.k2 where b.k2 > 0 and b.k3 != 0 and b.k6 > "000" - order by 1, 2, 3, 4, 5 limit 65535""" - check{result, exception, startTime, endTime -> - assertTrue(exception != null) - logger.info(exception.message) - } - } - test { - sql"""select ${s} from ${tbName1} a right semi join ${tbName2} b - on a.k1 = b.k1 or a.k2 > 0 where b.k2 > 0 and b.k3 != 0 and b.k6 > "000" - order by 1, 2, 3, 4, 5 limit 65535""" - check{result, exception, startTime, endTime -> - assertTrue(exception != null) - logger.info(exception.message) - } - } - test { - sql"""select ${s} from ${tbName1} a right semi join ${tbName2} b - on a.k1 < b.k1 or a.k2 > 0 where b.k2 > 0 and b.k3 != 0 and b.k6 > "000" - order by 1, 2, 3, 4, 5 limit 65535""" - check{result, exception, startTime, endTime -> - assertTrue(exception != null) - logger.info(exception.message) - } - } - def res29 = sql"""select ${s} from ${tbName3} a right semi join ${tbName1} c on a.k1 = c.k1 + sql"""select ${s} from ${tbName1} a right semi join ${tbName2} b + on a.k1 = b.k1 or a.k2 = b.k2 where b.k2 > 0 and b.k3 != 0 and b.k6 > "000" + order by 1, 2, 3, 4, 5 limit 65535""" + sql"""select ${s} from ${tbName1} a right semi join ${tbName2} b + on a.k1 < b.k1 or a.k2 > b.k2 where b.k2 > 0 and b.k3 != 0 and b.k6 > "000" + order by 1, 2, 3, 4, 5 limit 65535""" + sql"""select ${s} from ${tbName1} a right semi join ${tbName2} b + on a.k1 = b.k1 or a.k2 > b.k2 where b.k2 > 0 and b.k3 != 0 and b.k6 > "000" + order by 1, 2, 3, 4, 5 limit 65535""" + sql"""select ${s} from ${tbName1} a right semi join ${tbName2} b + on a.k1 = b.k1 or a.k2 > 0 where b.k2 > 0 and b.k3 != 0 and b.k6 > "000" + order by 1, 2, 3, 4, 5 limit 65535""" + sql"""select ${s} from ${tbName1} a right semi join ${tbName2} b + on a.k1 < b.k1 or a.k2 > 0 where b.k2 > 0 and b.k3 != 0 and b.k6 > "000" + order by 1, 2, 3, 4, 5 limit 65535""" + def res29 = sql"""select ${s} from ${tbName3} a right semi join ${tbName1} c on a.k1 = c.k1 right semi join ${tbName2} b on b.k2 = c.k2 order by 1, 2, 3, 4, 5 limit 65535""" def res30 = sql"""select ${s} from (select distinct b.* from ${tbName3} a right outer join ${tbName1} c on a.k1 = c.k1 @@ -744,25 +660,13 @@ suite("test_join", "query,p0") { def res34 = sql"""select ${s} from ${tbName1} a left outer join ${tbName2} b on a.k1 = b.k1 where b.k3 is null order by 1, 2, 3, 4, 5 limit 65535""" check2_doris(res33, res34) - test { - sql"""select ${s} from ${tbName1} a left anti join ${tbName2} b - on a.k1 > b.k1 where a.k2 > 0 and a.k3 != 0 and a.k6 > "000" - order by 1, 2, 3, 4, 5 limit 65535""" - check{result, exception, startTime, endTime -> - assertTrue(exception != null) - logger.info(exception.message) - } - } - test { - sql"""select ${s} from ${tbName1} a left anti join ${tbName2} b - on a.k1 > 0 where a.k2 > 0 and a.k3 != 0 and a.k6 > "000" - order by 1, 2, 3, 4, 5 limit 65535""" - check{result, exception, startTime, endTime -> - assertTrue(exception != null) - logger.info(exception.message) - } - } - def res35 = sql"""select ${s} from ${tbName1} a left anti join ${tbName2} b + sql"""select ${s} from ${tbName1} a left anti join ${tbName2} b + on a.k1 > b.k1 where a.k2 > 0 and a.k3 != 0 and a.k6 > "000" + order by 1, 2, 3, 4, 5 limit 65535""" + sql"""select ${s} from ${tbName1} a left anti join ${tbName2} b + on a.k1 > 0 where a.k2 > 0 and a.k3 != 0 and a.k6 > "000" + order by 1, 2, 3, 4, 5 limit 65535""" + def res35 = sql"""select ${s} from ${tbName1} a left anti join ${tbName2} b on a.k1 = b.k1 and a.k2 > 0 order by 1, 2, 3, 4, 5 limit 50000""" def res36 = sql"""select ${s} from ${tbName1} a left outer join ${tbName2} b on a.k1 = b.k1 and a.k2 > 0 where b.k3 is null @@ -775,59 +679,29 @@ suite("test_join", "query,p0") { order by 1, 2, 3, 4, 5 limit 65535""" check2_doris(res37, res38) test { - sql"""select ${s} from ${tbName1} a left anti join ${tbName2} b + sql"""select ${s} from ${tbName1} a left anti join ${tbName2} b where a.k2 > 0 and a.k3 != 0 and a.k6 > "000" order by 1, 2, 3, 4, 5 limit 65535""" check{result, exception, startTime, endTime -> assertTrue(exception != null) logger.info(exception.message) } } - test { - sql"""select ${s} from ${tbName1} a left anti join ${tbName2} b - on a.k1 = b.k1 or a.k2 = b.k2 where a.k2 > 0 and a.k3 != 0 and a.k6 > "000" - order by 1, 2, 3, 4, 5 limit 65535""" - check{result, exception, startTime, endTime -> - assertTrue(exception != null) - logger.info(exception.message) - } - } - test { - sql"""select ${s} from ${tbName1} a left anti join ${tbName2} b - on a.k1 < b.k1 or a.k2 > b.k2 where a.k2 > 0 and a.k3 != 0 and a.k6 > "000" - order by 1, 2, 3, 4, 5 limit 65535""" - check{result, exception, startTime, endTime -> - assertTrue(exception != null) - logger.info(exception.message) - } - } - test { - sql"""select ${s} from ${tbName1} a left anti join ${tbName2} b - on a.k1 = b.k1 or a.k2 > b.k2 where a.k2 > 0 and a.k3 != 0 and a.k6 > "000" - order by 1, 2, 3, 4, 5 limit 65535""" - check{result, exception, startTime, endTime -> - assertTrue(exception != null) - logger.info(exception.message) - } - } - test { - sql"""select ${s} from ${tbName1} a left anti join ${tbName2} b - on a.k1 = b.k1 or a.k2 > 0 where a.k2 > 0 and a.k3 != 0 and a.k6 > "000" - order by 1, 2, 3, 4, 5 limit 65535""" - check{result, exception, startTime, endTime -> - assertTrue(exception != null) - logger.info(exception.message) - } - } - test { - sql"""select ${s} from ${tbName1} a left anti join ${tbName2} b - on a.k1 < b.k1 or a.k2 > 0 where a.k2 > 0 and a.k3 != 0 and a.k6 > "000" - order by 1, 2, 3, 4, 5 limit 65535""" - check{result, exception, startTime, endTime -> - assertTrue(exception != null) - logger.info(exception.message) - } - } - def res39 = sql"""select ${s} from ${tbName1} a left anti join ${tbName2} b on a.k1 = b.k1 + sql"""select ${s} from ${tbName1} a left anti join ${tbName2} b + on a.k1 = b.k1 or a.k2 = b.k2 where a.k2 > 0 and a.k3 != 0 and a.k6 > "000" + order by 1, 2, 3, 4, 5 limit 65535""" + sql"""select ${s} from ${tbName1} a left anti join ${tbName2} b + on a.k1 < b.k1 or a.k2 > b.k2 where a.k2 > 0 and a.k3 != 0 and a.k6 > "000" + order by 1, 2, 3, 4, 5 limit 65535""" + sql"""select ${s} from ${tbName1} a left anti join ${tbName2} b + on a.k1 = b.k1 or a.k2 > b.k2 where a.k2 > 0 and a.k3 != 0 and a.k6 > "000" + order by 1, 2, 3, 4, 5 limit 65535""" + sql"""select ${s} from ${tbName1} a left anti join ${tbName2} b + on a.k1 = b.k1 or a.k2 > 0 where a.k2 > 0 and a.k3 != 0 and a.k6 > "000" + order by 1, 2, 3, 4, 5 limit 65535""" + sql"""select ${s} from ${tbName1} a left anti join ${tbName2} b + on a.k1 < b.k1 or a.k2 > 0 where a.k2 > 0 and a.k3 != 0 and a.k6 > "000" + order by 1, 2, 3, 4, 5 limit 65535""" + def res39 = sql"""select ${s} from ${tbName1} a left anti join ${tbName2} b on a.k1 = b.k1 left anti join ${tbName3} c on a.k2 = c.k2 order by 1, 2, 3, 4, 5 limit 65535""" def res40 = sql"""select ${s} from ${tbName1} a left outer join ${tbName2} b on a.k1 = b.k1 left outer join ${tbName3} c on a.k2 = c.k2 where @@ -853,25 +727,13 @@ suite("test_join", "query,p0") { on a.k1 = b.k1 where a.k2 is null order by 1, 2, 3, 4, 5 limit 65535""" check2_doris(res43, res44) - test { - sql"""select ${s} from ${tbName1} a right anti join ${tbName2} b - on a.k1 > b.k1 where b.k2 > 0 and b.k3 != 0 and b.k6 > "000" - order by 1, 2, 3, 4, 5 limit 65535""" - check{result, exception, startTime, endTime -> - assertTrue(exception != null) - logger.info(exception.message) - } - } - test { - sql"""select ${s} from ${tbName1} a right anti join ${tbName2} b - on a.k1 > 0 where b.k2 > 0 and b.k3 != 0 and b.k6 > "000" - order by 1, 2, 3, 4, 5 limit 65535""" - check{result, exception, startTime, endTime -> - assertTrue(exception != null) - logger.info(exception.message) - } - } - def res45 = sql"""select ${s} from ${tbName2} a right anti join ${tbName1} b + sql"""select ${s} from ${tbName1} a right anti join ${tbName2} b + on a.k1 > b.k1 where b.k2 > 0 and b.k3 != 0 and b.k6 > "000" + order by 1, 2, 3, 4, 5 limit 65535""" + sql"""select ${s} from ${tbName1} a right anti join ${tbName2} b + on a.k1 > 0 where b.k2 > 0 and b.k3 != 0 and b.k6 > "000" + order by 1, 2, 3, 4, 5 limit 65535""" + def res45 = sql"""select ${s} from ${tbName2} a right anti join ${tbName1} b on a.k1 = b.k1 and a.k2 > 0 order by 1, 2, 3, 4, 5 limit 65535""" def res46 = sql"""select ${s} from ${tbName2} a right outer join ${tbName1} b on a.k1 = b.k1 and a.k2 > 0 where a.k2 is null @@ -891,52 +753,22 @@ suite("test_join", "query,p0") { logger.info(exception.message) } } - test { - sql"""select ${s} from ${tbName1} a right anti join ${tbName2} b - on a.k1 = b.k1 or a.k2 = b.k2 where b.k2 > 0 and b.k3 != 0 and b.k6 > "000" - order by 1, 2, 3, 4, 5 limit 65535""" - check{result, exception, startTime, endTime -> - assertTrue(exception != null) - logger.info(exception.message) - } - } - test { - sql"""select ${s} from ${tbName1} a right anti join ${tbName2} b - on a.k1 < b.k1 or a.k2 > b.k2 where b.k2 > 0 and b.k3 != 0 and b.k6 > "000" - order by 1, 2, 3, 4, 5 limit 65535""" - check{result, exception, startTime, endTime -> - assertTrue(exception != null) - logger.info(exception.message) - } - } - test { - sql"""select ${s} from ${tbName1} a right anti join ${tbName2} b - on a.k1 = b.k1 or a.k2 > b.k2 where b.k2 > 0 and b.k3 != 0 and b.k6 > "000" - order by 1, 2, 3, 4, 5 limit 65535""" - check{result, exception, startTime, endTime -> - assertTrue(exception != null) - logger.info(exception.message) - } - } - test { - sql"""select ${s} from ${tbName1} a right anti join ${tbName2} b - on a.k1 = b.k1 or a.k2 > 0 where b.k2 > 0 and b.k3 != 0 and b.k6 > "000" - order by 1, 2, 3, 4, 5 limit 65535""" - check{result, exception, startTime, endTime -> - assertTrue(exception != null) - logger.info(exception.message) - } - } - test { - sql"""select ${s} from ${tbName1} a right anti join ${tbName2} b - on a.k1 < b.k1 or a.k2 > 0 where b.k2 > 0 and b.k3 != 0 and b.k6 > "000" - order by 1, 2, 3, 4, 5 limit 65535""" - check{result, exception, startTime, endTime -> - assertTrue(exception != null) - logger.info(exception.message) - } - } - sql"""select ${s} from ${tbName1} a right anti join ${tbName2} c on a.k1 = c.k1 + sql"""select ${s} from ${tbName1} a right anti join ${tbName2} b + on a.k1 = b.k1 or a.k2 = b.k2 where b.k2 > 0 and b.k3 != 0 and b.k6 > "000" + order by 1, 2, 3, 4, 5 limit 65535""" + sql"""select ${s} from ${tbName1} a right anti join ${tbName2} b + on a.k1 < b.k1 or a.k2 > b.k2 where b.k2 > 0 and b.k3 != 0 and b.k6 > "000" + order by 1, 2, 3, 4, 5 limit 65535""" + sql"""select ${s} from ${tbName1} a right anti join ${tbName2} b + on a.k1 = b.k1 or a.k2 > b.k2 where b.k2 > 0 and b.k3 != 0 and b.k6 > "000" + order by 1, 2, 3, 4, 5 limit 65535""" + sql"""select ${s} from ${tbName1} a right anti join ${tbName2} b + on a.k1 = b.k1 or a.k2 > 0 where b.k2 > 0 and b.k3 != 0 and b.k6 > "000" + order by 1, 2, 3, 4, 5 limit 65535""" + sql"""select ${s} from ${tbName1} a right anti join ${tbName2} b + on a.k1 < b.k1 or a.k2 > 0 where b.k2 > 0 and b.k3 != 0 and b.k6 > "000" + order by 1, 2, 3, 4, 5 limit 65535""" + sql"""select ${s} from ${tbName1} a right anti join ${tbName2} c on a.k1 = c.k1 right anti join ${tbName3} b on c.k2 = b.k2 order by 1, 2, 3, 4, 5 limit 65535""" sql"""select ${s} from (select distinct b.k1, b.k2, b.k3, b.k4, b.k5 from diff --git a/regression-test/suites/query_p0/join/test_nestedloop_semi_anti_join.groovy b/regression-test/suites/query_p0/join/test_nestedloop_semi_anti_join.groovy new file mode 100644 index 0000000000..36a38996d7 --- /dev/null +++ b/regression-test/suites/query_p0/join/test_nestedloop_semi_anti_join.groovy @@ -0,0 +1,81 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +suite("test_nestedloop_semi_anti_join", "query_p0") { + def tbl1 = "test_nestedloop_semi_anti_join1" + def tbl2 = "test_nestedloop_semi_anti_join2" + + sql "DROP TABLE IF EXISTS ${tbl1}" + sql """ + CREATE TABLE IF NOT EXISTS ${tbl1} ( + `user_id` LARGEINT NOT NULL COMMENT "", + `user_id2` LARGEINT NOT NULL COMMENT "" + ) + DISTRIBUTED BY HASH(user_id) PROPERTIES("replication_num" = "1"); + """ + + sql "DROP TABLE IF EXISTS ${tbl2}" + sql """ + CREATE TABLE IF NOT EXISTS ${tbl2} ( + `user_id` LARGEINT NOT NULL COMMENT "", + `user_id2` LARGEINT NOT NULL COMMENT "" + ) + DISTRIBUTED BY HASH(user_id) PROPERTIES("replication_num" = "1"); + """ + + qt_join """ + select * from ${tbl1} where exists (select * from ${tbl2} where ${tbl1}.user_id > ${tbl2}.user_id) order by ${tbl1}.user_id; + """ + qt_join """ + select * from ${tbl1} where not exists (select * from ${tbl2} where ${tbl1}.user_id > ${tbl2}.user_id) order by ${tbl1}.user_id; + """ + qt_join """ + select * from ${tbl1} where user_id in (select user_id from ${tbl2} where ${tbl1}.user_id > ${tbl2}.user_id) order by ${tbl1}.user_id; + """ + qt_join """ + select * from ${tbl1} where user_id not in (select user_id from ${tbl2} where ${tbl1}.user_id > ${tbl2}.user_id) order by ${tbl1}.user_id; + """ + sql """ INSERT INTO ${tbl1} VALUES (1, 1), (2, 2), (3, 3), (10, 10); """ + qt_join """ + select * from ${tbl1} where exists (select * from ${tbl2} where ${tbl1}.user_id > ${tbl2}.user_id) order by ${tbl1}.user_id; + """ + qt_join """ + select * from ${tbl1} where not exists (select * from ${tbl2} where ${tbl1}.user_id > ${tbl2}.user_id) order by ${tbl1}.user_id; + """ + qt_join """ + select * from ${tbl1} where user_id in (select user_id from ${tbl2} where ${tbl1}.user_id > ${tbl2}.user_id) order by ${tbl1}.user_id; + """ + qt_join """ + select * from ${tbl1} where user_id not in (select user_id from ${tbl2} where ${tbl1}.user_id > ${tbl2}.user_id) order by ${tbl1}.user_id; + """ + + sql """ INSERT INTO ${tbl2} VALUES (2, 2), (3, 3), (4, 4), (0, 0); """ + qt_join """ + select * from ${tbl1} where exists (select * from ${tbl2} where ${tbl1}.user_id > ${tbl2}.user_id) order by ${tbl1}.user_id; + """ + qt_join """ + select * from ${tbl1} where not exists (select * from ${tbl2} where ${tbl1}.user_id > ${tbl2}.user_id) order by ${tbl1}.user_id; + """ + qt_join """ + select * from ${tbl1} where user_id in (select user_id from ${tbl2} where ${tbl1}.user_id > ${tbl2}.user_id) order by ${tbl1}.user_id; + """ + qt_join """ + select * from ${tbl1} where user_id not in (select user_id from ${tbl2} where ${tbl1}.user_id > ${tbl2}.user_id) order by ${tbl1}.user_id; + """ + sql "DROP TABLE IF EXISTS ${tbl1}" + sql "DROP TABLE IF EXISTS ${tbl2}" +} diff --git a/regression-test/suites/tpch_sf1_p1/tpch_sf1/explain/test_q11.groovy b/regression-test/suites/tpch_sf1_p1/tpch_sf1/explain/test_q11.groovy index d8e5bc2dae..4aa789dc55 100644 --- a/regression-test/suites/tpch_sf1_p1/tpch_sf1/explain/test_q11.groovy +++ b/regression-test/suites/tpch_sf1_p1/tpch_sf1/explain/test_q11.groovy @@ -57,9 +57,9 @@ suite("test_explain_tpch_sf_1_q11") { explainStr -> explainStr.contains("VTOP-N\n" + " | order by: `\$a\$1`.`\$c\$2` DESC") && - explainStr.contains("CROSS JOIN\n" + + explainStr.contains("VNESTED LOOP JOIN\n" + " | join op: CROSS JOIN()\n" + - " | other predicates: > * 0.0001") && + " | predicates: > * 0.0001") && explainStr.contains("VAGGREGATE (merge finalize)\n" + " | output: sum( sum( * ))\n" + " | group by: `ps_partkey`") && diff --git a/regression-test/suites/tpch_sf1_p1/tpch_sf1/explain/test_q22.groovy b/regression-test/suites/tpch_sf1_p1/tpch_sf1/explain/test_q22.groovy index e572aee35a..5d851d8bfd 100644 --- a/regression-test/suites/tpch_sf1_p1/tpch_sf1/explain/test_q22.groovy +++ b/regression-test/suites/tpch_sf1_p1/tpch_sf1/explain/test_q22.groovy @@ -74,9 +74,9 @@ suite("test_explain_tpch_sf_1_q22") { explainStr.contains("vec output tuple id: 11") && explainStr.contains("output slot ids: 40 41 \n" + " | hash output slot ids: 36 37 ") && - explainStr.contains("VCROSS JOIN\n" + + explainStr.contains("VNESTED LOOP JOIN\n" + " | join op: CROSS JOIN()\n" + - " | other predicates: > ") && + " | predicates: > ") && explainStr.contains("TABLE: default_cluster:regression_test_tpch_sf1_p1_tpch_sf1.customer(customer), PREAGGREGATION: ON\n" + " PREDICATES: substr(`c_phone`, 1, 2) IN ('13', '31', '23', '29', '30', '18', '17')") && explainStr.contains("TABLE: default_cluster:regression_test_tpch_sf1_p1_tpch_sf1.orders(orders), PREAGGREGATION: ON") &&